From b12cba1df9deec1f8489638cd2b3d7c0ca17b195 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Fri, 27 Mar 2026 00:38:22 -0400 Subject: [PATCH 01/58] initial commit --- .../ReadMe.md | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ReadMe.md diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ReadMe.md b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ReadMe.md new file mode 100644 index 000000000..81906ebef --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ReadMe.md @@ -0,0 +1,74 @@ +# Ansible + +## Description +- Ansible is an open-source automation tool used for application deployment, + configuration management, and task automation. +- It uses a simple, human-readable YAML syntax to define automation tasks, + making it accessible for users without extensive programming knowledge. +- Ansible operates in an agentless manner, meaning it does not require any + software to be installed on the target machines, allowing for easier + management of systems. +- It supports a wide range of modules for various tasks, including cloud + provisioning, orchestration, and security compliance, enabling extensive + automation capabilities. +- Ansible is designed to be idempotent, which means that running the same + playbook multiple times will not change the system beyond the initial + application, ensuring stability and predictability. + +## Project Objective +The goal of the project is to automate the deployment of a machine learning +model using Ansible. Students will create a playbook that provisions a virtual +machine, installs necessary dependencies, and deploys a pre-trained model to +serve predictions via a REST API. The project will optimize the deployment +process to ensure it is efficient and reproducible. + +## Dataset Suggestions +1. **Kaggle House Prices Dataset** + - **Source Name**: Kaggle + - **URL**: + [Kaggle House Prices](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data) + - **Data Contains**: Various features of houses in Ames, Iowa, including sale + prices, which can be used for regression tasks. + - **Access Requirements**: Free account on Kaggle. + +2. **UCI Machine Learning Repository: Wine Quality Dataset** + - **Source Name**: UCI Machine Learning Repository + - **URL**: + [Wine Quality Dataset](https://archive.ics.uci.edu/ml/datasets/wine+quality) + - **Data Contains**: Chemical properties of wine samples along with quality + ratings, suitable for classification tasks. + - **Access Requirements**: No authentication required. + +3. **Open Government Data: NYC Taxi Trip Data** + - **Source Name**: NYC Open Data + - **URL**: [NYC Taxi Trip Data](https://opendata.cityofnewyork.us/) + - **Data Contains**: Trip records including pickup and drop-off locations, + times, and fares, which can be used for regression or clustering tasks. + - **Access Requirements**: Publicly available without authentication. + +## Tasks +- **Set Up Virtual Environment**: Create a virtual machine using Ansible to host + the machine learning model. +- **Install Dependencies**: Write Ansible tasks to install necessary libraries + and frameworks (e.g., Flask, scikit-learn) for serving the model. +- **Deploy Model**: Use Ansible to copy the pre-trained model files to the + virtual machine and configure the application to serve predictions. +- **Create REST API**: Implement a simple REST API using Flask to handle + incoming prediction requests and return results. +- **Testing and Validation**: Write Ansible tasks to test the deployment and + validate that the API is returning the expected outputs. + +## Bonus Ideas +- **Monitoring and Logging**: Extend the project by integrating monitoring tools + (e.g., Prometheus) to keep track of API performance and logs. +- **Scaling Deployment**: Explore how to scale the deployment across multiple + servers using Ansible's orchestration capabilities. +- **CI/CD Pipeline**: Implement a continuous integration/continuous deployment + (CI/CD) pipeline to automate updates to the model and application. + +## Useful Resources +- [Ansible Documentation](https://docs.ansible.com/ansible/latest/index.html) +- [Ansible GitHub Repository](https://github.com/ansible/ansible) +- [Kaggle Datasets](https://www.kaggle.com/datasets) +- [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/index.php) +- [Flask Documentation](https://flask.palletsprojects.com/) \ No newline at end of file From 3bf5d336254795f1da2a3d435a41e075d2c9b720 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Wed, 8 Apr 2026 19:09:18 -0400 Subject: [PATCH 02/58] adding workflow --- .github/workflows/ansible.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .github/workflows/ansible.yml diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible.yml new file mode 100644 index 000000000..aacfafd8c --- /dev/null +++ b/.github/workflows/ansible.yml @@ -0,0 +1,14 @@ +# .github/workflows/hello.yml + +name: Hello World + +on: + push: + workflow_dispatch: + +jobs: + hello: + runs-on: ubuntu-latest + steps: + - name: Echo hello world + run: echo "Hello, World!" \ No newline at end of file From 3c720f0cf6d1a31be92920e659bf367b35d4a45a Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Wed, 8 Apr 2026 23:30:22 -0400 Subject: [PATCH 03/58] adding workflow --- .github/workflows/ansible.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible.yml index aacfafd8c..b4e7bf541 100644 --- a/.github/workflows/ansible.yml +++ b/.github/workflows/ansible.yml @@ -10,5 +10,8 @@ jobs: hello: runs-on: ubuntu-latest steps: + - name: Install Ansible & lint + run: pip install ansible ansible-lint + - name: Echo hello world run: echo "Hello, World!" \ No newline at end of file From 011e5fe1f78fba13c02e21006ea87deb89b5630a Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Wed, 8 Apr 2026 23:33:06 -0400 Subject: [PATCH 04/58] adding workflow --- .github/workflows/ansible.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible.yml index b4e7bf541..4298fe668 100644 --- a/.github/workflows/ansible.yml +++ b/.github/workflows/ansible.yml @@ -8,10 +8,13 @@ on: jobs: hello: - runs-on: ubuntu-latest + runs-on: alpine/ansible:2.20.0 steps: - - name: Install Ansible & lint - run: pip install ansible ansible-lint - + # - name: Install Ansible & lint + # run: pip install ansible ansible-lint + + - name: Run ansible-lint + run: ansible-lint playbook.yml + - name: Echo hello world run: echo "Hello, World!" \ No newline at end of file From 1b7c6249f8e83e076b6110b417b7273b8b7c157c Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Wed, 8 Apr 2026 23:36:14 -0400 Subject: [PATCH 05/58] adding workflow --- .github/workflows/ansible.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible.yml index 4298fe668..07cff571b 100644 --- a/.github/workflows/ansible.yml +++ b/.github/workflows/ansible.yml @@ -8,10 +8,10 @@ on: jobs: hello: - runs-on: alpine/ansible:2.20.0 + runs-on: ubuntu-latest steps: - # - name: Install Ansible & lint - # run: pip install ansible ansible-lint + - name: Install Ansible & lint + run: pip install ansible ansible-lint - name: Run ansible-lint run: ansible-lint playbook.yml From 1080fb98723f136768f0f1a37aa2651eb06a5596 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 00:17:46 -0400 Subject: [PATCH 06/58] adding workflow --- .github/workflows/{ansible.yml => ansible-workflow.yml} | 2 +- .../ansible.yaml | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{ansible.yml => ansible-workflow.yml} (70%) create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible-workflow.yml similarity index 70% rename from .github/workflows/ansible.yml rename to .github/workflows/ansible-workflow.yml index 07cff571b..4c1dd817c 100644 --- a/.github/workflows/ansible.yml +++ b/.github/workflows/ansible-workflow.yml @@ -14,7 +14,7 @@ jobs: run: pip install ansible ansible-lint - name: Run ansible-lint - run: ansible-lint playbook.yml + run: ansible-lint class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml - name: Echo hello world run: echo "Hello, World!" \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml new file mode 100644 index 000000000..e69de29bb From 16bc5b6c2883d16a3c1f4f85b23626995ac7ceb2 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 00:21:23 -0400 Subject: [PATCH 07/58] adding workflow --- .github/workflows/ansible-workflow.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 4c1dd817c..a4d365591 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -13,6 +13,9 @@ jobs: - name: Install Ansible & lint run: pip install ansible ansible-lint + - name: Checking ls + run: ls -la + - name: Run ansible-lint run: ansible-lint class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml From 4e3186e60de766914e5c9e79fcab7f20c343a232 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 00:22:44 -0400 Subject: [PATCH 08/58] adding workflow --- .github/workflows/ansible-workflow.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index a4d365591..ebbbeefe2 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -14,10 +14,10 @@ jobs: run: pip install ansible ansible-lint - name: Checking ls - run: ls -la + run: tree - - name: Run ansible-lint - run: ansible-lint class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml + # - name: Run ansible-lint + # run: ansible-lint class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml - name: Echo hello world run: echo "Hello, World!" \ No newline at end of file From 36e29f501b681b25f141aca5ae5cf7d457dbde18 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 00:37:26 -0400 Subject: [PATCH 09/58] adding workflow --- .github/workflows/ansible-workflow.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index ebbbeefe2..e4fe1c090 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -10,14 +10,13 @@ jobs: hello: runs-on: ubuntu-latest steps: + - uses: actions/checkout@v4 + - name: Install Ansible & lint run: pip install ansible ansible-lint - - name: Checking ls - run: tree - - # - name: Run ansible-lint - # run: ansible-lint class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml + - name: Run ansible-lint + run: ansible-lint class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml - name: Echo hello world run: echo "Hello, World!" \ No newline at end of file From ced114313620bf205c9c4334be2ae85572da00ac Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 00:39:36 -0400 Subject: [PATCH 10/58] adding workflow --- .github/workflows/ansible-workflow.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index e4fe1c090..56ef873ba 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -10,7 +10,10 @@ jobs: hello: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4z + + - name: Checking ls + run: tree - name: Install Ansible & lint run: pip install ansible ansible-lint From af3e4b2155e513756a4e0d4e4a8812aa4cce3c6f Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 00:41:21 -0400 Subject: [PATCH 11/58] adding workflow --- .github/workflows/ansible-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 56ef873ba..800346c2c 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -10,7 +10,7 @@ jobs: hello: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4z + - uses: actions/checkout@v4 - name: Checking ls run: tree From 3db03be31a64bdf17a374e07011cadae07331d73 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 00:49:54 -0400 Subject: [PATCH 12/58] adding workflow --- .../.github}/workflows/ansible-workflow.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {.github => class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/.github}/workflows/ansible-workflow.yml (100%) diff --git a/.github/workflows/ansible-workflow.yml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/.github/workflows/ansible-workflow.yml similarity index 100% rename from .github/workflows/ansible-workflow.yml rename to class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/.github/workflows/ansible-workflow.yml From 2d8ba77c5490dad6ba9549d88939879a459ca798 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:07:56 -0400 Subject: [PATCH 13/58] adding workflow --- .../ansible.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml index e69de29bb..73b314ff7 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml @@ -0,0 +1 @@ +--- \ No newline at end of file From e3ad61899ea615890bab346b748f3c95185385e1 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:08:09 -0400 Subject: [PATCH 14/58] adding workflow --- .../.github => .github}/workflows/ansible-workflow.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/.github => .github}/workflows/ansible-workflow.yml (100%) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml similarity index 100% rename from class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/.github/workflows/ansible-workflow.yml rename to .github/workflows/ansible-workflow.yml From 1d3e60dfa598c1d08bc8b670635258e746ce96d1 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:10:53 -0400 Subject: [PATCH 15/58] adding workflow --- .../ansible.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml index 73b314ff7..f20258f28 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml @@ -1 +1,4 @@ ---- \ No newline at end of file +--- +- name: Create a simple HTML webpage + hosts: localhost + connection: local \ No newline at end of file From eb0a5f05c32fff92729dbc31815d44c06853716f Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:13:15 -0400 Subject: [PATCH 16/58] adding workflow --- .github/workflows/ansible-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 800346c2c..382e85251 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -19,7 +19,7 @@ jobs: run: pip install ansible ansible-lint - name: Run ansible-lint - run: ansible-lint class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml + run: ansible-lint ./class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml - name: Echo hello world run: echo "Hello, World!" \ No newline at end of file From 9c0041bb4b454d9b0ad00bbe1c83705b12bb7f0b Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:18:55 -0400 Subject: [PATCH 17/58] adding workflow --- .github/workflows/ansible-workflow.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 382e85251..e18b8b636 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -12,9 +12,12 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Checking ls + - name: Checking tree run: tree + - name: Checking ls + run: ls -la + - name: Install Ansible & lint run: pip install ansible ansible-lint From bf92e4678c06141af0d8cd5eeaa2359b15df3717 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:23:09 -0400 Subject: [PATCH 18/58] adding workflow --- .github/workflows/ansible-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index e18b8b636..ea8fe3c92 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -16,7 +16,7 @@ jobs: run: tree - name: Checking ls - run: ls -la + run: ls class_project - name: Install Ansible & lint run: pip install ansible ansible-lint From 2b57153fc384480a091f4efd42451008fe7bc15d Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:25:40 -0400 Subject: [PATCH 19/58] adding workflow --- .github/workflows/ansible-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index ea8fe3c92..ca9f48b07 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -16,7 +16,7 @@ jobs: run: tree - name: Checking ls - run: ls class_project + run: ls class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/ - name: Install Ansible & lint run: pip install ansible ansible-lint From fb1446c397172f1e674dd8426c754e735296fe19 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:28:16 -0400 Subject: [PATCH 20/58] adding workflow --- .github/workflows/ansible-workflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index ca9f48b07..4868f414f 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -16,13 +16,13 @@ jobs: run: tree - name: Checking ls - run: ls class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/ + run: ls class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ - name: Install Ansible & lint run: pip install ansible ansible-lint - name: Run ansible-lint - run: ansible-lint ./class_project/data_605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml + run: ansible-lint ./class_project/data605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml - name: Echo hello world run: echo "Hello, World!" \ No newline at end of file From 6452237644ed572b1fa4bbf537e7287cb6ffb276 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:32:06 -0400 Subject: [PATCH 21/58] adding workflow --- .github/workflows/ansible-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 4868f414f..3c08adc09 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -22,7 +22,7 @@ jobs: run: pip install ansible ansible-lint - name: Run ansible-lint - run: ansible-lint ./class_project/data605/Spring2026/projects/UmdTask405_-Data605_Spring2026_Ansible_Deployment/playbook.yml + run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml - name: Echo hello world run: echo "Hello, World!" \ No newline at end of file From 6fbc053b119dd1a8f49f835956361605ab704f6f Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:33:49 -0400 Subject: [PATCH 22/58] adding workflow --- .github/workflows/ansible-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 3c08adc09..7bbc5bc1b 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -22,7 +22,7 @@ jobs: run: pip install ansible ansible-lint - name: Run ansible-lint - run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml + run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml - name: Echo hello world run: echo "Hello, World!" \ No newline at end of file From 6f73ba9a56bb999a7874835f4388b9d7e17db458 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 01:36:03 -0400 Subject: [PATCH 23/58] adding workflow --- .../ansible.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml index f20258f28..ba15bfc1f 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml @@ -1,4 +1,4 @@ --- - name: Create a simple HTML webpage hosts: localhost - connection: local \ No newline at end of file + connection: local From 20dd3498188bee0bee2d4d9027acc1dac654baa8 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 15:28:52 -0400 Subject: [PATCH 24/58] adding workflow --- .../UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Dockerfile | 0 .../requirements.txt | 0 .../template.API.ipynb | 0 .../template.API.py | 0 .../template.example.ipynb | 0 .../template.example.py | 0 .../template_utils.py | 0 7 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Dockerfile create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Dockerfile b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Dockerfile new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py new file mode 100644 index 000000000..e69de29bb From b8d15d92567b4197c52b6832e7fcc61b34f6b6c1 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 15:33:09 -0400 Subject: [PATCH 25/58] adding workflow --- .../Dockerfile | 30 +++ .../requirements.txt | 4 + .../template.API.ipynb | 215 ++++++++++++++++++ .../template.API.py | 129 +++++++++++ .../template.example.ipynb | 198 ++++++++++++++++ .../template.example.py | 125 ++++++++++ .../template_utils.py | 72 ++++++ 7 files changed, 773 insertions(+) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Dockerfile b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Dockerfile index e69de29bb..a7c11a930 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Dockerfile +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Dockerfile @@ -0,0 +1,30 @@ +# Use Python 3.12 slim (already has Python and pip). +FROM python:3.12-slim + +# Avoid interactive prompts during apt operations. +ENV DEBIAN_FRONTEND=noninteractive + +# Install CA certificates (needed for HTTPS). +RUN apt-get update && apt-get install -y \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install project specific packages. +RUN mkdir -p /install +COPY requirements.txt /install/requirements.txt +RUN pip install --upgrade pip && \ + pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt + +# Config. +COPY etc_sudoers /install/ +COPY etc_sudoers /etc/sudoers +COPY bashrc /root/.bashrc + +# Report package versions. +COPY version.sh /install/ +RUN /install/version.sh 2>&1 | tee version.log + +# Jupyter. +EXPOSE 8888 + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt index e69de29bb..1ffc8c3f3 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt @@ -0,0 +1,4 @@ +matplotlib +numpy +pandas +seaborn \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.ipynb index e69de29bb..3afca937c 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.ipynb +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.ipynb @@ -0,0 +1,215 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "183c2248-ea3d-43ba-b87e-d821bba1bbc6", + "metadata": {}, + "source": [ + "# Template API Notebook\n", + "\n", + "This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a neo4j tutorial the heading should be `Neo4j API`.\n", + "\n", + "- Add description of what the notebook does.\n", + "- Point to references, e.g. (neo4j.API.md)\n", + "- Add citations.\n", + "- Keep the notebook flow clear.\n", + "- Comments should be imperative and have a period at the end.\n", + "- Your code should be well commented.\n", + "\n", + "The name of this notebook should in the following format:\n", + "- if the notebook is exploring `pycaret API`, then it is `pycaret.API.ipynb`\n", + "\n", + "Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "265e0d58-a7cd-4edf-a0b4-96b60220e801", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "d3b2f997-5c9b-4238-b6d5-e5f2cea43809", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d1480ee9-d6a6-437d-b927-da6cbb05bdf5", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "# Import libraries in this section.\n", + "# Avoid imports like import *, from ... import ..., from ... import *, etc.\n", + "\n", + "import helpers.hdbg as hdbg\n", + "import helpers.hnotebook as hnotebo" + ] + }, + { + "cell_type": "markdown", + "id": "f9208cc9-837d-4fec-a312-9c4aa5b7648d", + "metadata": {}, + "source": [ + "## Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9a2d7a9c-c6c5-48c9-8445-11c97045d00b", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Running in Jupyter\n", + "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-085a2ce7-6161-4c8a-92d5-492051832f3c.json'\n" + ] + } + ], + "source": [ + "hdbg.init_logger(verbosity=logging.INFO)\n", + "\n", + "_LOG = logging.getLogger(__name__)\n", + "\n", + "hnotebo.config_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "79c37ba3-bd5d-4a44-87df-645eee54977a", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "## Make the notebook flow clear\n", + "Each notebook needs to follow a clear and logical flow, e.g:\n", + "- Load data\n", + "- Compute stats\n", + "- Clean data\n", + "- Compute stats\n", + "- Do analysis\n", + "- Show results\n", + "\n", + "\n", + "\n", + "\n", + "#############################################################################\n", + "Template\n", + "#############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a8a109cd-fc8e-4b9e-9dc0-4fc8d4126ad8", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "class Template:\n", + " \"\"\"\n", + " Brief imperative description of what the class does in one line, if needed.\n", + " \"\"\"\n", + "\n", + " def __init__(self):\n", + " pass\n", + "\n", + " def method1(self, arg1: int) -> None:\n", + " \"\"\"\n", + " Brief imperative description of what the method does in one line.\n", + "\n", + " You can elaborate more in the method docstring in this section, for e.g. explaining\n", + " the formula/algorithm. Every method/function should have a docstring, typehints and include the\n", + " parameters and return as follows:\n", + "\n", + " :param arg1: description of arg1\n", + " :return: description of return\n", + " \"\"\"\n", + " # Code bloks go here.\n", + " # Make sure to include comments to explain what the code is doing.\n", + " # No empty lines between code blocks.\n", + " pass\n", + "\n", + "\n", + "def template_function(arg1: int) -> None:\n", + " \"\"\"\n", + " Brief imperative description of what the function does in one line.\n", + "\n", + " You can elaborate more in the function docstring in this section, for e.g. explaining\n", + " the formula/algorithm. Every function should have a docstring, typehints and include the\n", + " parameters and return as follows:\n", + "\n", + " :param arg1: description of arg1\n", + " :return: description of return\n", + " \"\"\"\n", + " # Code bloks go here.\n", + " # Make sure to include comments to explain what the code is doing.\n", + " # No empty lines between code blocks.\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "id": "00926523-ae59-497d-bba8-b22e58333849", + "metadata": {}, + "source": [ + "## The flow should be highlighted using headings in markdown\n", + "```\n", + "# Level 1\n", + "## Level 2\n", + "### Level 3\n", + "```" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py index e69de29bb..465093a52 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py @@ -0,0 +1,129 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Template API Notebook +# +# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a neo4j tutorial the heading should be `Neo4j API`. +# +# - Add description of what the notebook does. +# - Point to references, e.g. (neo4j.API.md) +# - Add citations. +# - Keep the notebook flow clear. +# - Comments should be imperative and have a period at the end. +# - Your code should be well commented. +# +# The name of this notebook should in the following format: +# - if the notebook is exploring `pycaret API`, then it is `pycaret.API.ipynb` +# +# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md + +# %% +# %load_ext autoreload +# %autoreload 2 +# %matplotlib inline + +# %% [markdown] +# ## Imports + +# %% +import logging +# Import libraries in this section. +# Avoid imports like import *, from ... import ..., from ... import *, etc. + +import helpers.hdbg as hdbg +import helpers.hnotebook as hnotebo + +# %% [markdown] +# ## Configuration + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + +hnotebo.config_notebook() + + +# %% [markdown] +# ## Make the notebook flow clear +# Each notebook needs to follow a clear and logical flow, e.g: +# - Load data +# - Compute stats +# - Clean data +# - Compute stats +# - Do analysis +# - Show results +# +# +# +# + + +# ############################################################################# +# Template +# ############################################################################# + + +# %% +class Template: + """ + Brief imperative description of what the class does in one line, if needed. + """ + + def __init__(self): + pass + + def method1(self, arg1: int) -> None: + """ + Brief imperative description of what the method does in one line. + + You can elaborate more in the method docstring in this section, for e.g. explaining + the formula/algorithm. Every method/function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +def template_function(arg1: int) -> None: + """ + Brief imperative description of what the function does in one line. + + You can elaborate more in the function docstring in this section, for e.g. explaining + the formula/algorithm. Every function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +# %% [markdown] +# ## The flow should be highlighted using headings in markdown +# ``` +# # Level 1 +# ## Level 2 +# ### Level 3 +# ``` \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb index e69de29bb..a2e9aedd7 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "50f78f7e-2dee-45d6-9d37-7a55eeaae283", + "metadata": {}, + "source": [ + "# Template Example Notebook\n", + "\n", + "This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`.\n", + "\n", + "- Add description of what the notebook does.\n", + "- Point to references, e.g. (neo4j.example.md)\n", + "- Add citations.\n", + "- Keep the notebook flow clear.\n", + "- Comments should be imperative and have a period at the end.\n", + "- Your code should be well commented.\n", + "\n", + "The name of this notebook should in the following format:\n", + "- if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb`\n", + "\n", + "Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6226667e-cab5-479c-be6a-6b7d6f580a97", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8020901a-4bc7-4b73-95e8-aaa462b4fc19", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "# Import libraries in this section.\n", + "# Avoid imports like import *, from ... import ..., from ... import *, etc.\n", + "\n", + "import helpers.hdbg as hdbg\n", + "import helpers.hnotebook as hnotebo" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4ecb72b2-b21d-4fb0-ac92-e7174da390e6", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Running in Jupyter\n", + "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-783e0930-1631-4d64-8bb4-f3a98bb74fcd.json'\n" + ] + } + ], + "source": [ + "hdbg.init_logger(verbosity=logging.INFO)\n", + "\n", + "_LOG = logging.getLogger(__name__)\n", + "\n", + "hnotebo.config_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "1ede6422-bff2-4f0a-8d28-29a01d4786b2", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "## Make the notebook flow clear\n", + "Each notebook needs to follow a clear and logical flow, e.g:\n", + "- Load data\n", + "- Compute stats\n", + "- Clean data\n", + "- Compute stats\n", + "- Do analysis\n", + "- Show results\n", + "\n", + "\n", + "\n", + "\n", + "#############################################################################\n", + "Template\n", + "#############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8bbd660d-d22f-44fa-bf53-dd622dee0f53", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "class Template:\n", + " \"\"\"\n", + " Brief imperative description of what the class does in one line, if needed.\n", + " \"\"\"\n", + "\n", + " def __init__(self):\n", + " pass\n", + "\n", + " def method1(self, arg1: int) -> None:\n", + " \"\"\"\n", + " Brief imperative description of what the method does in one line.\n", + "\n", + " You can elaborate more in the method docstring in this section, for e.g. explaining\n", + " the formula/algorithm. Every method/function should have a docstring, typehints and include the\n", + " parameters and return as follows:\n", + "\n", + " :param arg1: description of arg1\n", + " :return: description of return\n", + " \"\"\"\n", + " # Code bloks go here.\n", + " # Make sure to include comments to explain what the code is doing.\n", + " # No empty lines between code blocks.\n", + " pass\n", + "\n", + "\n", + "def template_function(arg1: int) -> None:\n", + " \"\"\"\n", + " Brief imperative description of what the function does in one line.\n", + "\n", + " You can elaborate more in the function docstring in this section, for e.g. explaining\n", + " the formula/algorithm. Every function should have a docstring, typehints and include the\n", + " parameters and return as follows:\n", + "\n", + " :param arg1: description of arg1\n", + " :return: description of return\n", + " \"\"\"\n", + " # Code bloks go here.\n", + " # Make sure to include comments to explain what the code is doing.\n", + " # No empty lines between code blocks.\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "id": "103f6e36-54cf-442c-b137-8091d48805a7", + "metadata": {}, + "source": [ + "## The flow should be highlighted using headings in markdown\n", + "```\n", + "# Level 1\n", + "## Level 2\n", + "### Level 3\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d05d52af-67ba-4a4f-a561-af453e43854f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py index e69de29bb..30fa7957f 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py @@ -0,0 +1,125 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Template Example Notebook +# +# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`. +# +# - Add description of what the notebook does. +# - Point to references, e.g. (neo4j.example.md) +# - Add citations. +# - Keep the notebook flow clear. +# - Comments should be imperative and have a period at the end. +# - Your code should be well commented. +# +# The name of this notebook should in the following format: +# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb` +# +# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md + +# %% +# %load_ext autoreload +# %autoreload 2 +# %matplotlib inline + +# %% +import logging +# Import libraries in this section. +# Avoid imports like import *, from ... import ..., from ... import *, etc. + +import helpers.hdbg as hdbg +import helpers.hnotebook as hnotebo + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + +hnotebo.config_notebook() + + +# %% [markdown] +# ## Make the notebook flow clear +# Each notebook needs to follow a clear and logical flow, e.g: +# - Load data +# - Compute stats +# - Clean data +# - Compute stats +# - Do analysis +# - Show results +# +# +# +# + + +# ############################################################################# +# Template +# ############################################################################# + + +# %% +class Template: + """ + Brief imperative description of what the class does in one line, if needed. + """ + + def __init__(self): + pass + + def method1(self, arg1: int) -> None: + """ + Brief imperative description of what the method does in one line. + + You can elaborate more in the method docstring in this section, for e.g. explaining + the formula/algorithm. Every method/function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +def template_function(arg1: int) -> None: + """ + Brief imperative description of what the function does in one line. + + You can elaborate more in the function docstring in this section, for e.g. explaining + the formula/algorithm. Every function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +# %% [markdown] +# ## The flow should be highlighted using headings in markdown +# ``` +# # Level 1 +# ## Level 2 +# ### Level 3 +# ``` + +# %% \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py index e69de29bb..a4cbeed04 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py @@ -0,0 +1,72 @@ +""" +template_utils.py + +This file contains utility functions that support the tutorial notebooks. + +- Notebooks should call these functions instead of writing raw logic inline. +- This helps keep the notebooks clean, modular, and easier to debug. +- Students should implement functions here for data preprocessing, + model setup, evaluation, or any reusable logic. + +Import as: + +import class_project.project_template.template_utils as cpptteut +""" + +import pandas as pd +import logging +from sklearn.model_selection import train_test_split +from pycaret.classification import compare_models + +# ----------------------------------------------------------------------------- +# Logging +# ----------------------------------------------------------------------------- + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# ----------------------------------------------------------------------------- +# Example 1: Split the dataset into train and test sets +# ----------------------------------------------------------------------------- + + +def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2): + """ + Split the dataset into training and testing sets. + + :param df: full dataset + :param target_column: name of the target column + :param test_size: proportion of test data (default = 0.2) + + :return: X_train, X_test, y_train, y_test + """ + logger.info("Splitting data into train and test sets") + X = df.drop(columns=[target_column]) + y = df[target_column] + return train_test_split(X, y, test_size=test_size, random_state=42) + + +# ----------------------------------------------------------------------------- +# Example 2: PyCaret classification pipeline +# ----------------------------------------------------------------------------- + + +def run_pycaret_classification( + df: pd.DataFrame, target_column: str +) -> pd.DataFrame: + """ + Run a basic PyCaret classification experiment. + + :param df: dataset containing features and target + :param target_column: name of the target column + + :return: comparison of top-performing models + """ + logger.info("Initializing PyCaret classification setup") + ... + + logger.info("Comparing models") + results = compare_models() + ... + + return results \ No newline at end of file From e3ed5c6c9c63fd40ac33db2ec771bbf2139ec03e Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 9 Apr 2026 23:46:57 -0400 Subject: [PATCH 26/58] adding workflow --- .github/workflows/ansible-workflow.yml | 37 ++++++++++++++++++- .../ansible.yaml | 32 ++++++++++++++++ index.html | 25 +++++++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 index.html diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 7bbc5bc1b..43b4dc6f6 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -25,4 +25,39 @@ jobs: run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml - name: Echo hello world - run: echo "Hello, World!" \ No newline at end of file + run: echo "Hello, World!" + + - name: Create the HTML file + copy: + content: | + + + + + + {{ page_title }} + + + +

{{ page_title }}

+

Created by {{ author }}

+

Generated on {{ ansible_date_time.date }}

+ + + dest: "{{ output_path }}" + mode: "0644" + + - name: Confirm file was created + debug: + msg: "Webpage saved to {{ output_path }}" \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml index ba15bfc1f..55bcbdf1c 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml @@ -2,3 +2,35 @@ - name: Create a simple HTML webpage hosts: localhost connection: local + + tasks: + - name: Create the HTML file + copy: + content: | + + + + + + {{ page_title }} + + + +

{{ page_title }}

+

Created by {{ author }}

+

Generated on {{ ansible_date_time.date }}

+ + + dest: "{{ output_path }}" + mode: "0644" \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 000000000..f3bbb7ba5 --- /dev/null +++ b/index.html @@ -0,0 +1,25 @@ + + + + + + {{ page_title }} + + + +

{{ page_title }}

+

Created by {{ author }}

+

Generated on {{ ansible_date_time.date }}

+ + \ No newline at end of file From a7bea19ebd1ad0a37ecd5a206ea3316572330b17 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Fri, 10 Apr 2026 00:15:39 -0400 Subject: [PATCH 27/58] adding workflow --- .github/workflows/ansible-workflow.yml | 31 -------------------------- 1 file changed, 31 deletions(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 43b4dc6f6..0c7ee8f48 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -27,37 +27,6 @@ jobs: - name: Echo hello world run: echo "Hello, World!" - - name: Create the HTML file - copy: - content: | - - - - - - {{ page_title }} - - - -

{{ page_title }}

-

Created by {{ author }}

-

Generated on {{ ansible_date_time.date }}

- - - dest: "{{ output_path }}" - mode: "0644" - - name: Confirm file was created debug: msg: "Webpage saved to {{ output_path }}" \ No newline at end of file From 3f0ef965a2403b397f32b4187d1c7b353d497b71 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Fri, 10 Apr 2026 00:16:55 -0400 Subject: [PATCH 28/58] adding workflow --- .github/workflows/ansible-workflow.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 0c7ee8f48..7bbc5bc1b 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -25,8 +25,4 @@ jobs: run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml - name: Echo hello world - run: echo "Hello, World!" - - - name: Confirm file was created - debug: - msg: "Webpage saved to {{ output_path }}" \ No newline at end of file + run: echo "Hello, World!" \ No newline at end of file From 6c2212f683bcf7cf80dfdb55738dc364f819d178 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Fri, 10 Apr 2026 00:19:42 -0400 Subject: [PATCH 29/58] adding workflow --- .../ansible.yaml | 2 +- index.html | 25 ------------------- 2 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 index.html diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml index 55bcbdf1c..6b6bbf5c1 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml @@ -5,7 +5,7 @@ tasks: - name: Create the HTML file - copy: + ansible.builtin.copy: content: | diff --git a/index.html b/index.html deleted file mode 100644 index f3bbb7ba5..000000000 --- a/index.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - {{ page_title }} - - - -

{{ page_title }}

-

Created by {{ author }}

-

Generated on {{ ansible_date_time.date }}

- - \ No newline at end of file From 434032a50df07bdee90721bbbadccefd4a2ed8b2 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Fri, 10 Apr 2026 00:21:29 -0400 Subject: [PATCH 30/58] adding workflow --- .../ansible.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml index 6b6bbf5c1..05fcee592 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml @@ -33,4 +33,4 @@ dest: "{{ output_path }}" - mode: "0644" \ No newline at end of file + mode: "0644" From 72d2c6b49f22120e1d9effa217314c8b84ecd448 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Fri, 10 Apr 2026 00:26:05 -0400 Subject: [PATCH 31/58] adding workflow --- .github/workflows/ansible-workflow.yml | 5 ++++- .../ansible.yaml | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 7bbc5bc1b..87e6fb36b 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -25,4 +25,7 @@ jobs: run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml - name: Echo hello world - run: echo "Hello, World!" \ No newline at end of file + run: echo "Hello, World!" + + - name: Checking tree + run: tree diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml index 05fcee592..48734708a 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml @@ -3,6 +3,10 @@ hosts: localhost connection: local + vars: + page_title: "My Simple Webpage" + output_path: "simple_webpage.html" + tasks: - name: Create the HTML file ansible.builtin.copy: From e4807661a8590919c0cade8724df9db4f29fd24c Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Fri, 10 Apr 2026 00:31:32 -0400 Subject: [PATCH 32/58] adding workflow --- .github/workflows/ansible-workflow.yml | 5 ++++- .../{ansible.yaml => playbook.yaml} | 0 2 files changed, 4 insertions(+), 1 deletion(-) rename class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/{ansible.yaml => playbook.yaml} (100%) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 87e6fb36b..1f1075858 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -22,7 +22,10 @@ jobs: run: pip install ansible ansible-lint - name: Run ansible-lint - run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml + run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml + + - name: Run ansible-lint + run: ansible-playbook class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml - name: Echo hello world run: echo "Hello, World!" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml similarity index 100% rename from class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ansible.yaml rename to class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml From aaeb1174878f3caf312846bf41c79f69c6b3705a Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Fri, 10 Apr 2026 00:34:09 -0400 Subject: [PATCH 33/58] adding workflow --- .../playbook.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 48734708a..ab49f3d2f 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -6,6 +6,7 @@ vars: page_title: "My Simple Webpage" output_path: "simple_webpage.html" + author: "Likhon Gomes" tasks: - name: Create the HTML file From b1e5d8f9e8d733cc56c9d3ac8e0798289e1bfbf2 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Fri, 10 Apr 2026 00:40:55 -0400 Subject: [PATCH 34/58] adding workflow --- .github/workflows/ansible-workflow.yml | 6 ++++++ .../playbook.yaml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 1f1075858..32a3316bc 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -27,6 +27,12 @@ jobs: - name: Run ansible-lint run: ansible-playbook class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml + - name: Upload HTML as artifact + uses: actions/upload-artifact@v4 + with: + name: webpage + path: index.html + - name: Echo hello world run: echo "Hello, World!" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index ab49f3d2f..0d9aee886 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -5,7 +5,7 @@ vars: page_title: "My Simple Webpage" - output_path: "simple_webpage.html" + output_path: "index.html" author: "Likhon Gomes" tasks: From 2693ecd2961ba80f06bf4803434a5e90232f65bd Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Mon, 20 Apr 2026 23:30:50 -0400 Subject: [PATCH 35/58] adding s3 bucket cf template --- .../cf_templates/s3_bucket.yaml | 301 ++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/cf_templates/s3_bucket.yaml diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/cf_templates/s3_bucket.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/cf_templates/s3_bucket.yaml new file mode 100644 index 000000000..bfa387558 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/cf_templates/s3_bucket.yaml @@ -0,0 +1,301 @@ +AWSTemplateFormatVersion: "2010-09-09" +Description: S3 bucket with versioning, encryption, lifecycle, and access controls + +# ───────────────────────────────────────── +# PARAMETERS +# ───────────────────────────────────────── +Parameters: + Environment: + Type: String + Default: dev + AllowedValues: [dev, staging, prod] + Description: Deployment environment + + BucketName: + Type: String + Default: "" + Description: Optional custom bucket name (leave blank to auto-generate) + + EnableVersioning: + Type: String + Default: "true" + AllowedValues: ["true", "false"] + + EnableReplication: + Type: String + Default: "false" + AllowedValues: ["true", "false"] + + ReplicationDestinationBucketArn: + Type: String + Default: "" + Description: ARN of destination bucket for replication (if enabled) + + LogRetentionDays: + Type: Number + Default: 90 + Description: Days to retain access logs + + NoncurrentVersionExpirationDays: + Type: Number + Default: 30 + Description: Days before noncurrent versions are deleted + +# ───────────────────────────────────────── +# CONDITIONS +# ───────────────────────────────────────── +Conditions: + HasCustomBucketName: !Not [!Equals [!Ref BucketName, ""]] + VersioningEnabled: !Equals [!Ref EnableVersioning, "true"] + ReplicationEnabled: !And + - !Equals [!Ref EnableReplication, "true"] + - !Not [!Equals [!Ref ReplicationDestinationBucketArn, ""]] + IsProd: !Equals [!Ref Environment, "prod"] + +# ───────────────────────────────────────── +# RESOURCES +# ───────────────────────────────────────── +Resources: + + # --- Access Logs Bucket --- + AccessLogsBucket: + Type: AWS::S3::Bucket + DeletionPolicy: Retain + Properties: + BucketName: !If + - HasCustomBucketName + - !Sub "${BucketName}-access-logs" + - !Ref AWS::NoValue + LifecycleConfiguration: + Rules: + - Id: ExpireLogs + Status: Enabled + ExpirationInDays: !Ref LogRetentionDays + PublicAccessBlockConfiguration: + BlockPublicAcls: true + BlockPublicPolicy: true + IgnorePublicAcls: true + RestrictPublicBuckets: true + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: AES256 + + AccessLogsBucketPolicy: + Type: AWS::S3::BucketPolicy + Properties: + Bucket: !Ref AccessLogsBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + - Sid: S3ServerAccessLogsPolicy + Effect: Allow + Principal: + Service: logging.s3.amazonaws.com + Action: s3:PutObject + Resource: !Sub "${AccessLogsBucket.Arn}/logs/*" + Condition: + ArnLike: + aws:SourceArn: !GetAtt MainBucket.Arn + + # --- KMS Key for encryption (prod only) --- + BucketKMSKey: + Type: AWS::KMS::Key + Condition: IsProd + Properties: + Description: !Sub "KMS key for ${Environment} S3 bucket" + EnableKeyRotation: true + KeyPolicy: + Version: "2012-10-17" + Statement: + - Sid: AllowRootAccess + Effect: Allow + Principal: + AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" + Action: "kms:*" + Resource: "*" + + BucketKMSKeyAlias: + Type: AWS::KMS::Alias + Condition: IsProd + Properties: + AliasName: !Sub "alias/${Environment}-s3-bucket-key" + TargetKeyId: !Ref BucketKMSKey + + # --- Replication IAM Role --- + ReplicationRole: + Type: AWS::IAM::Role + Condition: ReplicationEnabled + Properties: + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: s3.amazonaws.com + Action: sts:AssumeRole + Policies: + - PolicyName: S3ReplicationPolicy + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - s3:GetReplicationConfiguration + - s3:ListBucket + Resource: !GetAtt MainBucket.Arn + - Effect: Allow + Action: + - s3:GetObjectVersionForReplication + - s3:GetObjectVersionAcl + - s3:GetObjectVersionTagging + Resource: !Sub "${MainBucket.Arn}/*" + - Effect: Allow + Action: + - s3:ReplicateObject + - s3:ReplicateDelete + - s3:ReplicateTags + Resource: !Sub "${ReplicationDestinationBucketArn}/*" + + # --- Main Bucket --- + MainBucket: + Type: AWS::S3::Bucket + DeletionPolicy: !If [IsProd, Retain, Delete] + Properties: + BucketName: !If + - HasCustomBucketName + - !Ref BucketName + - !Ref AWS::NoValue + + # Versioning + VersioningConfiguration: + Status: !If [VersioningEnabled, Enabled, Suspended] + + # Encryption (KMS for prod, AES256 otherwise) + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: !If [IsProd, aws:kms, AES256] + KMSMasterKeyID: !If [IsProd, !Ref BucketKMSKey, !Ref AWS::NoValue] + BucketKeyEnabled: !If [IsProd, true, false] + + # Block all public access + PublicAccessBlockConfiguration: + BlockPublicAcls: true + BlockPublicPolicy: true + IgnorePublicAcls: true + RestrictPublicBuckets: true + + # Server access logging + LoggingConfiguration: + DestinationBucketName: !Ref AccessLogsBucket + LogFilePrefix: logs/ + + # Lifecycle rules + LifecycleConfiguration: + Rules: + - Id: TransitionToIA + Status: Enabled + Transitions: + - TransitionInDays: 30 + StorageClass: STANDARD_IA + - TransitionInDays: 90 + StorageClass: GLACIER + NoncurrentVersionExpiration: + NoncurrentDays: !Ref NoncurrentVersionExpirationDays + + - Id: AbortIncompleteMultipartUploads + Status: Enabled + AbortIncompleteMultipartUpload: + DaysAfterInitiation: 7 + + # Cross-region replication + ReplicationConfiguration: !If + - ReplicationEnabled + - Role: !GetAtt ReplicationRole.Arn + Rules: + - Id: ReplicateAll + Status: Enabled + Destination: + Bucket: !Ref ReplicationDestinationBucketArn + StorageClass: STANDARD + Filter: + Prefix: "" + - !Ref AWS::NoValue + + # CORS (adjust origins as needed) + CorsConfiguration: + CorsRules: + - AllowedHeaders: ["*"] + AllowedMethods: [GET, PUT, POST, DELETE, HEAD] + AllowedOrigins: ["*"] # Lock this down per environment + MaxAge: 3600 + + Tags: + - Key: Environment + Value: !Ref Environment + - Key: ManagedBy + Value: CloudFormation + + # --- Bucket Policy --- + MainBucketPolicy: + Type: AWS::S3::BucketPolicy + Properties: + Bucket: !Ref MainBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + # Deny non-HTTPS requests + - Sid: DenyNonHTTPS + Effect: Deny + Principal: "*" + Action: s3:* + Resource: + - !GetAtt MainBucket.Arn + - !Sub "${MainBucket.Arn}/*" + Condition: + Bool: + aws:SecureTransport: false + + # Deny unencrypted object uploads (prod only) + - Sid: DenyUnencryptedUploads + Effect: Deny + Principal: "*" + Action: s3:PutObject + Resource: !Sub "${MainBucket.Arn}/*" + Condition: + StringNotEquals: + s3:x-amz-server-side-encryption: !If [IsProd, aws:kms, AES256] + +# ───────────────────────────────────────── +# OUTPUTS +# ───────────────────────────────────────── +Outputs: + BucketName: + Description: Main S3 bucket name + Value: !Ref MainBucket + Export: + Name: !Sub "${AWS::StackName}-BucketName" + + BucketArn: + Description: Main S3 bucket ARN + Value: !GetAtt MainBucket.Arn + Export: + Name: !Sub "${AWS::StackName}-BucketArn" + + BucketDomainName: + Description: Bucket regional domain name + Value: !GetAtt MainBucket.RegionalDomainName + Export: + Name: !Sub "${AWS::StackName}-BucketDomainName" + + AccessLogsBucketName: + Description: Access logs bucket name + Value: !Ref AccessLogsBucket + + KMSKeyArn: + Condition: IsProd + Description: KMS key ARN used for bucket encryption + Value: !GetAtt BucketKMSKey.Arn + Export: + Name: !Sub "${AWS::StackName}-KMSKeyArn" \ No newline at end of file From 156d8c464268ca239d3b13b3c17deac8f0124094 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Tue, 21 Apr 2026 00:14:40 -0400 Subject: [PATCH 36/58] adding s3 bucket cf template --- .../playbook.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 0d9aee886..9b57d199d 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -39,3 +39,21 @@ dest: "{{ output_path }}" mode: "0644" + + - name: create a stack, pass in the template via an URL + amazon.aws.cloudformation: + stack_name: "umd-stack-ansible-cloudformation" + state: present + region: us-east-1 + disable_rollback: true + template_url: cf_templates/s3_bucket.yaml + template_parameters: + Environment: dev + BucketName: umdtask405-bucket + EnableVersioning: "false" + EnableReplication: "false" + EnableReplication: "false" + LogRetentionDays: 90 + NoncurrentVersionExpirationDays: 30 + tags: + Stack: ansible-cloudformation From 4fb4972bfa4f607d527fcb8b387ac2cd1036c27e Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Tue, 21 Apr 2026 00:23:41 -0400 Subject: [PATCH 37/58] adding s3 bucket cf template --- .github/workflows/ansible-workflow.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 32a3316bc..16e69b818 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -21,6 +21,9 @@ jobs: - name: Install Ansible & lint run: pip install ansible ansible-lint + - name: Install Ansible AWS collection + run: ansible-galaxy collection install amazon.aws + - name: Run ansible-lint run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml From f54f2058c1b77c3cc88c56a1210692d4d7b159f9 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Tue, 21 Apr 2026 00:26:17 -0400 Subject: [PATCH 38/58] adding s3 bucket cf template --- .../playbook.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 9b57d199d..413fc2eda 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -52,7 +52,6 @@ BucketName: umdtask405-bucket EnableVersioning: "false" EnableReplication: "false" - EnableReplication: "false" LogRetentionDays: 90 NoncurrentVersionExpirationDays: 30 tags: From 4fb6f5b3d4d2b92c4cb183bf55b031e17c6bc1f8 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Tue, 21 Apr 2026 00:29:04 -0400 Subject: [PATCH 39/58] adding s3 bucket cf template --- .../playbook.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 413fc2eda..2b0d1b59b 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -46,7 +46,7 @@ state: present region: us-east-1 disable_rollback: true - template_url: cf_templates/s3_bucket.yaml + template: cf_templates/s3_bucket.yaml template_parameters: Environment: dev BucketName: umdtask405-bucket From d8c3d65bc2e5f754d945802cc611e51d68c08797 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Tue, 21 Apr 2026 00:41:51 -0400 Subject: [PATCH 40/58] adding s3 bucket cf template --- .../playbook.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 2b0d1b59b..9d7bb7900 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -46,7 +46,7 @@ state: present region: us-east-1 disable_rollback: true - template: cf_templates/s3_bucket.yaml + template_body: cf_templates/s3_bucket.yaml template_parameters: Environment: dev BucketName: umdtask405-bucket From c8a5b08c689028ac070f106429ef544f7ca71501 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Tue, 21 Apr 2026 00:42:08 -0400 Subject: [PATCH 41/58] adding s3 bucket cf template --- .../playbook.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 9d7bb7900..2b0d1b59b 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -46,7 +46,7 @@ state: present region: us-east-1 disable_rollback: true - template_body: cf_templates/s3_bucket.yaml + template: cf_templates/s3_bucket.yaml template_parameters: Environment: dev BucketName: umdtask405-bucket From 439a50193e12a52f9359dfec6ffa96fbf1618cf8 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Tue, 21 Apr 2026 00:58:50 -0400 Subject: [PATCH 42/58] adding s3 bucket cf template --- .../playbook.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 2b0d1b59b..0148f6bdd 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -46,7 +46,7 @@ state: present region: us-east-1 disable_rollback: true - template: cf_templates/s3_bucket.yaml + template_body: "{{ lookup('template', 'cf_templates/s3_bucket.yaml') }}" template_parameters: Environment: dev BucketName: umdtask405-bucket From 4e162198373c0e0e144d20642b78043beb4ec673 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Tue, 21 Apr 2026 01:04:17 -0400 Subject: [PATCH 43/58] adding s3 bucket cf template --- .../playbook.yaml | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 0148f6bdd..0a973433e 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -43,10 +43,9 @@ - name: create a stack, pass in the template via an URL amazon.aws.cloudformation: stack_name: "umd-stack-ansible-cloudformation" - state: present - region: us-east-1 - disable_rollback: true - template_body: "{{ lookup('template', 'cf_templates/s3_bucket.yaml') }}" + state: "present" + region: "us-east-1" + template: "cf_templates/s3_bucket.yaml" template_parameters: Environment: dev BucketName: umdtask405-bucket @@ -55,4 +54,20 @@ LogRetentionDays: 90 NoncurrentVersionExpirationDays: 30 tags: - Stack: ansible-cloudformation + Stack: "ansible-cloudformation" + + # amazon.aws.cloudformation: + # stack_name: "umd-stack-ansible-cloudformation" + # state: present + # region: us-east-1 + # disable_rollback: true + # template_body: "{{ lookup('template', 'cf_templates/s3_bucket.yaml') }}" + # template_parameters: + # Environment: dev + # BucketName: umdtask405-bucket + # EnableVersioning: "false" + # EnableReplication: "false" + # LogRetentionDays: 90 + # NoncurrentVersionExpirationDays: 30 + # tags: + # Stack: ansible-cloudformation From 8f30f862b75bda9d5f6411e6f3bd7cbce3c3692a Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Tue, 21 Apr 2026 20:32:13 -0400 Subject: [PATCH 44/58] adding s3 bucket cf template --- .../playbook.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 0a973433e..c292b3256 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -40,9 +40,9 @@ dest: "{{ output_path }}" mode: "0644" - - name: create a stack, pass in the template via an URL + - name: Create a stack, pass in the template via an URL amazon.aws.cloudformation: - stack_name: "umd-stack-ansible-cloudformation" + stack_name: "Umd-stack-ansible-cloudformation" state: "present" region: "us-east-1" template: "cf_templates/s3_bucket.yaml" From 78758b0843306643bd3e7d5ca6ba23098573d9a7 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 30 Apr 2026 20:35:04 -0400 Subject: [PATCH 45/58] adding playbook --- .../playbook.yaml | 41 +++++-------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index c292b3256..393f6cfbc 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -9,6 +9,16 @@ author: "Likhon Gomes" tasks: + - name: Fetch current time from a more stable API + ansible.builtin.uri: + url: "http://ip-api.com" + method: GET + register: time_response + + - name: Debug API response + ansible.builtin.debug: + var: time_response + - name: Create the HTML file ansible.builtin.copy: content: | @@ -40,34 +50,3 @@ dest: "{{ output_path }}" mode: "0644" - - name: Create a stack, pass in the template via an URL - amazon.aws.cloudformation: - stack_name: "Umd-stack-ansible-cloudformation" - state: "present" - region: "us-east-1" - template: "cf_templates/s3_bucket.yaml" - template_parameters: - Environment: dev - BucketName: umdtask405-bucket - EnableVersioning: "false" - EnableReplication: "false" - LogRetentionDays: 90 - NoncurrentVersionExpirationDays: 30 - tags: - Stack: "ansible-cloudformation" - - # amazon.aws.cloudformation: - # stack_name: "umd-stack-ansible-cloudformation" - # state: present - # region: us-east-1 - # disable_rollback: true - # template_body: "{{ lookup('template', 'cf_templates/s3_bucket.yaml') }}" - # template_parameters: - # Environment: dev - # BucketName: umdtask405-bucket - # EnableVersioning: "false" - # EnableReplication: "false" - # LogRetentionDays: 90 - # NoncurrentVersionExpirationDays: 30 - # tags: - # Stack: ansible-cloudformation From ac6d3ca2c72153815d01d6f40fe501e2c42bc219 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Thu, 30 Apr 2026 23:27:12 -0400 Subject: [PATCH 46/58] adding the ML files --- .github/workflows/static.yaml | 43 +++ .../cf_templates/s3_bucket.yaml | 301 ------------------ .../index.html | 25 ++ .../ml_model | 1 + .../playbook.yaml | 61 ++-- .../requirements.txt | 8 +- .../results/leaderboard.csv | 4 + .../results/metrics.csv | 4 + .../results/plots/accuracy_by_activation.png | Bin 0 -> 16500 bytes .../plots/accuracy_by_grad_clipping.png | Bin 0 -> 17404 bytes .../results/plots/accuracy_by_model.png | Bin 0 -> 16115 bytes .../results/plots/accuracy_by_optimizer.png | Bin 0 -> 17334 bytes .../results/plots/accuracy_vs_seq_length.png | Bin 0 -> 19941 bytes .../results/plots/best_loss.png | Bin 0 -> 26093 bytes .../results/plots/f1_by_activation.png | Bin 0 -> 14468 bytes .../results/plots/f1_by_grad_clipping.png | Bin 0 -> 15098 bytes .../results/plots/f1_by_model.png | Bin 0 -> 13683 bytes .../results/plots/f1_by_optimizer.png | Bin 0 -> 14784 bytes .../results/plots/f1_vs_seq_length.png | Bin 0 -> 17654 bytes .../plots/loss_rnn_relu_adam_L50_clip1.png | Bin 0 -> 26093 bytes .../results/plots/worst_loss.png | Bin 0 -> 26093 bytes results/leaderboard.csv | 2 + results/metrics.csv | 2 + results/plots/accuracy_by_activation.png | Bin 0 -> 16500 bytes results/plots/accuracy_by_grad_clipping.png | Bin 0 -> 17404 bytes results/plots/accuracy_by_model.png | Bin 0 -> 16115 bytes results/plots/accuracy_by_optimizer.png | Bin 0 -> 17334 bytes results/plots/accuracy_vs_seq_length.png | Bin 0 -> 19941 bytes results/plots/best_loss.png | Bin 0 -> 26093 bytes results/plots/f1_by_activation.png | Bin 0 -> 14468 bytes results/plots/f1_by_grad_clipping.png | Bin 0 -> 15098 bytes results/plots/f1_by_model.png | Bin 0 -> 13683 bytes results/plots/f1_by_optimizer.png | Bin 0 -> 14784 bytes results/plots/f1_vs_seq_length.png | Bin 0 -> 17654 bytes .../plots/loss_rnn_relu_adam_L50_clip1.png | Bin 0 -> 26093 bytes results/plots/worst_loss.png | Bin 0 -> 26093 bytes 36 files changed, 110 insertions(+), 341 deletions(-) create mode 100644 .github/workflows/static.yaml delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/cf_templates/s3_bucket.yaml create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/index.html create mode 160000 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml_model create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/leaderboard.csv create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/metrics.csv create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_activation.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_grad_clipping.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_model.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_optimizer.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_vs_seq_length.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/best_loss.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_activation.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_grad_clipping.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_model.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_optimizer.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_vs_seq_length.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/loss_rnn_relu_adam_L50_clip1.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/worst_loss.png create mode 100644 results/leaderboard.csv create mode 100644 results/metrics.csv create mode 100644 results/plots/accuracy_by_activation.png create mode 100644 results/plots/accuracy_by_grad_clipping.png create mode 100644 results/plots/accuracy_by_model.png create mode 100644 results/plots/accuracy_by_optimizer.png create mode 100644 results/plots/accuracy_vs_seq_length.png create mode 100644 results/plots/best_loss.png create mode 100644 results/plots/f1_by_activation.png create mode 100644 results/plots/f1_by_grad_clipping.png create mode 100644 results/plots/f1_by_model.png create mode 100644 results/plots/f1_by_optimizer.png create mode 100644 results/plots/f1_vs_seq_length.png create mode 100644 results/plots/loss_rnn_relu_adam_L50_clip1.png create mode 100644 results/plots/worst_loss.png diff --git a/.github/workflows/static.yaml b/.github/workflows/static.yaml new file mode 100644 index 000000000..e90d14d57 --- /dev/null +++ b/.github/workflows/static.yaml @@ -0,0 +1,43 @@ +# Simple workflow for deploying static content to GitHub Pages +name: Deploy static content to Pages + +on: + # Runs on pushes targeting the default branch + push: + branches: ["pages"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Single deploy job since we're just deploying + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Pages + uses: actions/configure-pages@v5 + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: '.' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v5 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/cf_templates/s3_bucket.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/cf_templates/s3_bucket.yaml deleted file mode 100644 index bfa387558..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/cf_templates/s3_bucket.yaml +++ /dev/null @@ -1,301 +0,0 @@ -AWSTemplateFormatVersion: "2010-09-09" -Description: S3 bucket with versioning, encryption, lifecycle, and access controls - -# ───────────────────────────────────────── -# PARAMETERS -# ───────────────────────────────────────── -Parameters: - Environment: - Type: String - Default: dev - AllowedValues: [dev, staging, prod] - Description: Deployment environment - - BucketName: - Type: String - Default: "" - Description: Optional custom bucket name (leave blank to auto-generate) - - EnableVersioning: - Type: String - Default: "true" - AllowedValues: ["true", "false"] - - EnableReplication: - Type: String - Default: "false" - AllowedValues: ["true", "false"] - - ReplicationDestinationBucketArn: - Type: String - Default: "" - Description: ARN of destination bucket for replication (if enabled) - - LogRetentionDays: - Type: Number - Default: 90 - Description: Days to retain access logs - - NoncurrentVersionExpirationDays: - Type: Number - Default: 30 - Description: Days before noncurrent versions are deleted - -# ───────────────────────────────────────── -# CONDITIONS -# ───────────────────────────────────────── -Conditions: - HasCustomBucketName: !Not [!Equals [!Ref BucketName, ""]] - VersioningEnabled: !Equals [!Ref EnableVersioning, "true"] - ReplicationEnabled: !And - - !Equals [!Ref EnableReplication, "true"] - - !Not [!Equals [!Ref ReplicationDestinationBucketArn, ""]] - IsProd: !Equals [!Ref Environment, "prod"] - -# ───────────────────────────────────────── -# RESOURCES -# ───────────────────────────────────────── -Resources: - - # --- Access Logs Bucket --- - AccessLogsBucket: - Type: AWS::S3::Bucket - DeletionPolicy: Retain - Properties: - BucketName: !If - - HasCustomBucketName - - !Sub "${BucketName}-access-logs" - - !Ref AWS::NoValue - LifecycleConfiguration: - Rules: - - Id: ExpireLogs - Status: Enabled - ExpirationInDays: !Ref LogRetentionDays - PublicAccessBlockConfiguration: - BlockPublicAcls: true - BlockPublicPolicy: true - IgnorePublicAcls: true - RestrictPublicBuckets: true - BucketEncryption: - ServerSideEncryptionConfiguration: - - ServerSideEncryptionByDefault: - SSEAlgorithm: AES256 - - AccessLogsBucketPolicy: - Type: AWS::S3::BucketPolicy - Properties: - Bucket: !Ref AccessLogsBucket - PolicyDocument: - Version: "2012-10-17" - Statement: - - Sid: S3ServerAccessLogsPolicy - Effect: Allow - Principal: - Service: logging.s3.amazonaws.com - Action: s3:PutObject - Resource: !Sub "${AccessLogsBucket.Arn}/logs/*" - Condition: - ArnLike: - aws:SourceArn: !GetAtt MainBucket.Arn - - # --- KMS Key for encryption (prod only) --- - BucketKMSKey: - Type: AWS::KMS::Key - Condition: IsProd - Properties: - Description: !Sub "KMS key for ${Environment} S3 bucket" - EnableKeyRotation: true - KeyPolicy: - Version: "2012-10-17" - Statement: - - Sid: AllowRootAccess - Effect: Allow - Principal: - AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" - Action: "kms:*" - Resource: "*" - - BucketKMSKeyAlias: - Type: AWS::KMS::Alias - Condition: IsProd - Properties: - AliasName: !Sub "alias/${Environment}-s3-bucket-key" - TargetKeyId: !Ref BucketKMSKey - - # --- Replication IAM Role --- - ReplicationRole: - Type: AWS::IAM::Role - Condition: ReplicationEnabled - Properties: - AssumeRolePolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: Allow - Principal: - Service: s3.amazonaws.com - Action: sts:AssumeRole - Policies: - - PolicyName: S3ReplicationPolicy - PolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: Allow - Action: - - s3:GetReplicationConfiguration - - s3:ListBucket - Resource: !GetAtt MainBucket.Arn - - Effect: Allow - Action: - - s3:GetObjectVersionForReplication - - s3:GetObjectVersionAcl - - s3:GetObjectVersionTagging - Resource: !Sub "${MainBucket.Arn}/*" - - Effect: Allow - Action: - - s3:ReplicateObject - - s3:ReplicateDelete - - s3:ReplicateTags - Resource: !Sub "${ReplicationDestinationBucketArn}/*" - - # --- Main Bucket --- - MainBucket: - Type: AWS::S3::Bucket - DeletionPolicy: !If [IsProd, Retain, Delete] - Properties: - BucketName: !If - - HasCustomBucketName - - !Ref BucketName - - !Ref AWS::NoValue - - # Versioning - VersioningConfiguration: - Status: !If [VersioningEnabled, Enabled, Suspended] - - # Encryption (KMS for prod, AES256 otherwise) - BucketEncryption: - ServerSideEncryptionConfiguration: - - ServerSideEncryptionByDefault: - SSEAlgorithm: !If [IsProd, aws:kms, AES256] - KMSMasterKeyID: !If [IsProd, !Ref BucketKMSKey, !Ref AWS::NoValue] - BucketKeyEnabled: !If [IsProd, true, false] - - # Block all public access - PublicAccessBlockConfiguration: - BlockPublicAcls: true - BlockPublicPolicy: true - IgnorePublicAcls: true - RestrictPublicBuckets: true - - # Server access logging - LoggingConfiguration: - DestinationBucketName: !Ref AccessLogsBucket - LogFilePrefix: logs/ - - # Lifecycle rules - LifecycleConfiguration: - Rules: - - Id: TransitionToIA - Status: Enabled - Transitions: - - TransitionInDays: 30 - StorageClass: STANDARD_IA - - TransitionInDays: 90 - StorageClass: GLACIER - NoncurrentVersionExpiration: - NoncurrentDays: !Ref NoncurrentVersionExpirationDays - - - Id: AbortIncompleteMultipartUploads - Status: Enabled - AbortIncompleteMultipartUpload: - DaysAfterInitiation: 7 - - # Cross-region replication - ReplicationConfiguration: !If - - ReplicationEnabled - - Role: !GetAtt ReplicationRole.Arn - Rules: - - Id: ReplicateAll - Status: Enabled - Destination: - Bucket: !Ref ReplicationDestinationBucketArn - StorageClass: STANDARD - Filter: - Prefix: "" - - !Ref AWS::NoValue - - # CORS (adjust origins as needed) - CorsConfiguration: - CorsRules: - - AllowedHeaders: ["*"] - AllowedMethods: [GET, PUT, POST, DELETE, HEAD] - AllowedOrigins: ["*"] # Lock this down per environment - MaxAge: 3600 - - Tags: - - Key: Environment - Value: !Ref Environment - - Key: ManagedBy - Value: CloudFormation - - # --- Bucket Policy --- - MainBucketPolicy: - Type: AWS::S3::BucketPolicy - Properties: - Bucket: !Ref MainBucket - PolicyDocument: - Version: "2012-10-17" - Statement: - # Deny non-HTTPS requests - - Sid: DenyNonHTTPS - Effect: Deny - Principal: "*" - Action: s3:* - Resource: - - !GetAtt MainBucket.Arn - - !Sub "${MainBucket.Arn}/*" - Condition: - Bool: - aws:SecureTransport: false - - # Deny unencrypted object uploads (prod only) - - Sid: DenyUnencryptedUploads - Effect: Deny - Principal: "*" - Action: s3:PutObject - Resource: !Sub "${MainBucket.Arn}/*" - Condition: - StringNotEquals: - s3:x-amz-server-side-encryption: !If [IsProd, aws:kms, AES256] - -# ───────────────────────────────────────── -# OUTPUTS -# ───────────────────────────────────────── -Outputs: - BucketName: - Description: Main S3 bucket name - Value: !Ref MainBucket - Export: - Name: !Sub "${AWS::StackName}-BucketName" - - BucketArn: - Description: Main S3 bucket ARN - Value: !GetAtt MainBucket.Arn - Export: - Name: !Sub "${AWS::StackName}-BucketArn" - - BucketDomainName: - Description: Bucket regional domain name - Value: !GetAtt MainBucket.RegionalDomainName - Export: - Name: !Sub "${AWS::StackName}-BucketDomainName" - - AccessLogsBucketName: - Description: Access logs bucket name - Value: !Ref AccessLogsBucket - - KMSKeyArn: - Condition: IsProd - Description: KMS key ARN used for bucket encryption - Value: !GetAtt BucketKMSKey.Arn - Export: - Name: !Sub "${AWS::StackName}-KMSKeyArn" \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/index.html b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/index.html new file mode 100644 index 000000000..56edd194a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/index.html @@ -0,0 +1,25 @@ + + + + + + My Simple Webpage + + + +

My Simple Webpage

+

Created by Likhon Gomes

+

Generated on 2026-04-30

+ + diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml_model b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml_model new file mode 160000 index 000000000..141f2778a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml_model @@ -0,0 +1 @@ +Subproject commit 141f2778a863a41d77628688cade4a05d5ca48a1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 393f6cfbc..142195013 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -1,52 +1,35 @@ --- -- name: Create a simple HTML webpage +- name: Run Python Page hosts: localhost connection: local vars: - page_title: "My Simple Webpage" + page_title: "ML Model Training and Evaluation" output_path: "index.html" author: "Likhon Gomes" tasks: - - name: Fetch current time from a more stable API - ansible.builtin.uri: - url: "http://ip-api.com" - method: GET - register: time_response + - name: Clone ML Model from Github + ansible.builtin.git: + repo: https://github.com/likhongomes/Comparative-Analysis-of-RNN-Architectures-for-Sentiment-Classification.git + dest: ml_model - - name: Debug API response + # - name: Install Python dependencies + # ansible.builtin.pip: + # requirements: ml_model/requirements.txt + + - name: Run Python Code to train model + ansible.builtin.command: python ml_model/train.py --architecture rnn --activation relu --optimizer adam --seq_len 50 --epochs 5 --grad_clip_enable --data_dir ml_model/data + register: python_train_output + + - name: Python train output ansible.builtin.debug: - var: time_response + msg: "{{ python_train_output }}" - - name: Create the HTML file - ansible.builtin.copy: - content: | - - - - - - {{ page_title }} - - - -

{{ page_title }}

-

Created by {{ author }}

-

Generated on {{ ansible_date_time.date }}

- - - dest: "{{ output_path }}" - mode: "0644" + - name: Run Python Code to evaluate model + ansible.builtin.command: python ml_model/evaluate.py + register: python_evaluate_output + - name: Python evaluate output + ansible.builtin.debug: + msg: "{{ python_evaluate_output }}" \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt index 1ffc8c3f3..83c75f98a 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt @@ -1,4 +1,10 @@ matplotlib numpy pandas -seaborn \ No newline at end of file +seaborn +tqdm +boto3 +requests +regex +sentencepiece +sacremoses \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/leaderboard.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/leaderboard.csv new file mode 100644 index 000000000..442d29f5a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/leaderboard.csv @@ -0,0 +1,4 @@ +Model,Activation,Optimizer,Seq Length,Grad Clipping,Accuracy,F1,Epoch Time (s) +RNN,relu,adam,50,Yes,0.7541,0.7536,6.39 +RNN,relu,adam,50,Yes,0.7541,0.7536,6.36 +RNN,relu,adam,50,Yes,0.7541,0.7536,6.28 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/metrics.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/metrics.csv new file mode 100644 index 000000000..b0f2ad24e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/metrics.csv @@ -0,0 +1,4 @@ +Model,Activation,Optimizer,Seq Length,Grad Clipping,Accuracy,F1,Epoch Time (s),Epochs,Hardware +RNN,relu,adam,50,Yes,0.7541,0.7536,6.39,5,CPU +RNN,relu,adam,50,Yes,0.7541,0.7536,6.36,5,CPU +RNN,relu,adam,50,Yes,0.7541,0.7536,6.28,5,CPU diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_activation.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_activation.png new file mode 100644 index 0000000000000000000000000000000000000000..f0a0df7bbce4c69ba5c06d3180e8098884dada64 GIT binary patch literal 16500 zcmdse2UJwqx+dl}w*dnZ+7?kki$=*&ZAAeIQiWttOF(kY$u_ovpahYO0VqH~C1)cj zK_m+lNhAwM&M@D$``vrr>-+A^teIJFy}9dlnTo1&_WAe!e`()TQ97}9727Hn7M8Vg zCy%MJuq+l~VOdo1!*cx1y{7w5@Qaw;aSc1FrJFtD*T zwzN1z5u+U3|I-hgF@;d7}_hmRlEaq=qvxNc`a6vyGV`MEaVdZjDHdqtnRNGF-< zs@s<5)y_q1x7Tax(=#dT(z?GYu-SJ!DvVr_3MH)P{TVH_U+Ys8 z+&>tm7~>7B;`38)ii#5K`)YY9lyQx6i!L8_mZh4VAbTJm-2h2C)= zhsry;D;~KFWOZ%k;z~BDne3WT9qX?6RfK7ECBki3PC4kZO<~JAnu~11x22%KV|M7oiOBXMU(?FRa@oVhd>vg~vD0IH366u!4H=e^ zj~BuV>oct%RXtnJCA}XH;M(e~;tjIi+a81nnf4|m=c}GN<(-uIc@?=ozkraC_?&@k zh1)ws?iCaigos!sJ);-#?%7imJ>Whw-qY+lsCM$?@3XVB)sN*it=%DfYT2@7;%<{q zbMx{7@7!s9-qKkXYLHStI*>D{_f2YcZlZ;C>(|(0Ip=CX^k6g+MWBrZlZEbDQ5ivcV zo;|{C4LZ#Zm+<$+gj%T1O^>NPl3lB7TP8#vm|bu6z7vraP1(jZ@oM5WUw)^5O)28* zPpz)g_e>fM3=4bm^2&;w(uUG7F)D|I11((Ke)G0%jPd?PJ~1&(qXo)yi9vq>Svfg5le(nv z#fulyavaRZ23z!3tmm|ANY>7UOjSC+5( z;Rlt3Gl_x*#hd4FSCsaG@di_qiig+8GH2VH=FN>avI(5O8X|1oJIq-cBBY?M@1BN# zq$TS~k##Ea;jry>#EmOPNNPTK@E~3^IRv`=-%(k%SFr^QU!!es(%?88tQ!HD}Au!-ho5D($fsh0*^d_!uzO< zZE}XMudh`qlCkT)bN9lQ{Ti(ocSyTs;s*p2LxqJ_u3Tx@lx_e0hYPnIavad{zjNp8 znI{_Ogv}c+G&{Agtx7l7u6k~uK-JLb7A;KD%{f&TBBY4TUdET4d%iYN+o;*4@4%t6 zFIfB|BQ3oJI+H!;5{xS!-xPD7PNgbNFl~K}@j%7wd#lA-3jFFeZP{WtkljZ~vFVT> zsnxa&#uEvB@#3&;>x)HnZvhlCkDy>R)3L=}X(m$KzF5%g(?_Olxm4F&nD@Sda$bJ^ z^8Ff5Uz|MXIn&ruil<+O{}g3quj77us$xS^7J6J3<|b})`-g<|3VvwLbG^+y^4W`D zyn#n4>|*DKaMRi+n)KRaeM>#3wq={x+0A5q*n7Isn_BKubhGbclN5bw%&>Hnb7AXa3B)HpFbj_Ry6|ofdD@D2KM(jZG$SEla$Hm3*2np3lj#X26e){PJ>Urd0bL`Au zLCfUiLY7+;1)z7>G?bxs8;?ppEhD-(NaA)3PAEL)h$> zP+{|fh1*7|smj4y^_*Tl(Ml^uPp`;D2jcnp=T~-v%}vI$sMDI02eG0VQ>|ClmwtZh zrHK1D<>ZvPa{YS8Q9fSYgBeW@p>d}lT@MzsjgeDS6iP0b&o-`R+%;~>PMboNa=71E z&t+jI9O^tfR8)ptt9btWL-K}|Yt{(9zOmlXdBJ6@*8n}ivw4C`ZFSr7z=ns%y}Y?c ziusba(Y?j2KVOa0Oo{1fNK2p=poq6~adD*>l}Ffe@<~hQS9>zcJ1sKrUg#{3^lYwE z(a|x(of`PCi=F2Vo@nuG8DbvDV4g?6Hh7+r!mm-~IX}X6Lfg`1w8^I%G! zqy#ezWp?%I)%)l#5@Vr~&d-O&#)QPg#G-vp(`XmSg={&Xbs;x5w^UhX?u%2Ba-_79 z{u84adggY)i+6@bMh^A#^eD}d1&CKqw(#MSvGAU1F{;RMw2bvo2%-r2g)OETlVaKR zexFX*p#9sTqR?#nK5^-Z=lRO0H4(qkk#d>AYW`ijUN2p`^!|JQJrWHT`BR3w-apaK z7|0b0oaOkGYV3dag2P~QKSKx`#O~{-inxWlyH0rf`ubKGJ-zULG@M(;+PbYbWn5RO8%V-H_%EBa_hjWvcsI7>NR*BoaXAj>`KOMfZ ze61gM%@a*3*{|tl^$!R0XFgO6zFsdwn;2+94^Ob=>?40d;n<-u-!r{U>PbP{!6E%T zozA*t3y-CxC1%K8Nl6{QRL4O*Iip%0vxZc;CCk^Sv+R6fj>TRuLu*Vpb~H(xJ38O1H8SY9w%sgT_nVro~VD57iS zJv{q_+1B5mxbxTR7pI0hic3oTxmj6Rd$4<}bsd|c$*NArL`V00kQk()v2}EG>_#z= z5qbRh@#*nKOMa8tvA(*56TY0QRW|MqPjaaJI7YFn z?P&MudNbi=D^>()y-v(%j*p7^sim5MRyE{9@(edc3 z*REA-OgGn6s;^Db)y`nrcqu4i0#&!Zyu!-H*30O@_`Nq;P_SfXVo?3enS0CEY->%m z^yg8mj#k(jn~wWFoMuu}$>GRsqp9aO^lv6Vl<=i|)P16#Zp4&Pso9u>4|J)Ae)o50J3GH;Vimph{fgf(~j`YgGhu1_MSrRmk@(_=ZX{`1DHpewCG_kuv&)+W@Owqnl$pZi_U3e4?51rK2MnO`_)p zmxt)!P;q7K7p|ffxK_Z8&hE9u~xIl<*lv&^50Y4%)jPt;72 zMK#$>$+T7mOc-?}VP5xL-(m6b^0L)}4qr76XgocPy-)uDD-E?MvQ}14v2Vso_zM`ax7mEgv1v|7OG{5)%JPU* z?UD7Zt!HZzwbe14t$X<7)OD<{RSBd#e-4bct7s%vInrTe$Zn76o;^j|o-Q}EP}xT| zJ(g7^#jq^(-0S5*=D^*kqv>z2*YB1?zo|r-d~mIiOV-O91$i;Sw|9M|+SnJOAbYCw z&%eEOuugB-caE!Ish<0oT5Qp7^#ge`136u5ly}e0&5gZSvc8*qKE@oTUDrW#Pic&H z>uj6OcXxL+q??oe-C51g%PU_|QQ?BBOLH1Je|ww2s~)dZLDnJ7KzreKvt1{As$(NP z;uEzqOgf4K*#Nw%_Mb40^b&}vPPjm-SJJ_kQ;J=`(?~n@;#~|6J&T}3eN277$4geM z6HRM06LxlWmB_1Y@t9G>Lp6*m%KA8CH`AX{vh5MJiGBCSmj*d#j}#44&4qQTX7#E7 zo?6*bU9sMu5wgyN}&So}PFW2c=)jVcX4KR~J;sm-=JP>l+ zp_7be)KxHtF;tpY1X-GF2It`WPumFXHqI@`I&Z(={_J!Zp*)g0mLp zXLOV9#bdYwx7^draY%GVA5zrO32QgmNB7PetJPNQZ%8XcV>LueG>vfjywbKtO+7F> zDib9{NyG360bdOM_RVSp4LX#dpBtPm?@}*TyfP3ld0(TduIC&bEMgf!V6Eo!3pcZ{ zuQ=2d0w>W`yfZQ~?q1OIoEtls=j)lP75~K~fIv#Vq|8@txDd)r!2X}p>&-gR&hlEf z;n5IuPk@(QPvu*)PfuUE%};gl96D6BtsPhN25nC9@Zn3wAc=P`j1RU9j?NMc2@ug! zn;5E@=aQzG@0Qh5pAy@iBIUZl`TaI9j?OA&8KFqe`KNsiFqnJg8MbjyKTo}bwD$t$v??q<$H?TEqrjX9I3RcM$Zc3Zu{ zXtG9KKUiWQ^YzkY%eG^bkmnTi)F!U2a2p-Nuh%9mP@oXwqXnC}l3W;QRwXw#_x&$l zzNq5%w|Yd%`;fJuf^Kj6@yD|e5Dd}mI!C)Yb;yU{QVhi%2h0c*1@AC|T0_sD{OqNw z-+QckIToo-k4AZnmfe_=YP{SanVOcn-s*{5gpGT=Pa zt%}N79>tIg6Yix54xx}aH9s?`khf~JT4>q*l+O4?G_Csj`c>=K*X{C^ zeApG?n)IqrGs`A=Aa_J5Q7cUz^fl?Jb!)ys$vxxN7rzm8pvq-qf|$o_rdI032l?(Z zCe69dtm3r`b1e(F&g0@%tuMrqV~*DRnz@KK3(WWk0aP*zQ_6|@1$t3`=hCxVIfzQZ=pMy`NpPJQK+j78$#@9v zbOZual@EtRb&O)@svR?vL+wPy85{Im$T_vT>$LsTsBMfdXfyVxF2xqNk=tAN^>ofH z6iHh!MVlZE#p~vz-`lTw^l98x;95P`ejy<+*MkMC2A~9ilbA9yt$Wcm3Vpef@7(#( z9R20!mD}7L;&vyYN!6eM^VzZMc$6|B?J1n={JXSQ9BEDHV^Vc3Ef{3kz*S z;kIy5>!X<6v^0~h$k3^(YoXBYvS(j#u&^-L4jw#6nVhln%(kKq&+LMpMUip(frW)P zc6Qe7+O=z9`9wve&l@d^T$`0y=7r}q51=4)c@PBxl+0;DitA0 znjJkGS!j1&myZtFtz=xipS1{D?a|Etn%B=mR{jA2kAMIDhpCwvuwZ#0PBEjF4ZCCk z=(Efqu4H+$E@8P;T>7YuXZP;on|MxrOf{|wxOXoG-GWSOd1Yn(w1qG++Y1?u9NG{e zdaL6exkCRssJ;%%&F6cah&Q)6rL-Vuo+j#&>$n*nfbjq`D|&qWs#U6h&Gg1+^cw_c zLqr-5qi^24d9RF2i}4Z`mfDVIR?uA-;S!F#JUoSQQan7e;1+yU0)GDfL?7dol$;H7 z-?(uje(1}mSFc`C;_xt4vYhub(JmMa#>t&KcPjPcrMS2_yLH|nM;F`Y0a}7>vj@sf zO;0a^Uh?i;=Hu)|EQdQf`+F9nc&q4iZv}1REfgJwR+>qiWlMfne^*!69vPVeg?$@P zeLFcrvsSKKcP}U?sARt)C}4@%2d?dOLC<-&wH)F!^j=m?9#T}2TP)n%-GSAjtE*|T z<9Im^nbffb(o+v8Kpg}}zURkq#mk&WQ)z$<*Vl2xuj3GB*miw@>KRd4RmHkv2VYOA zOEA#R0j<<`#sy!#T(JN8>Ga*ZcO&-hz{Q;BTq^>wM6<9+`rO`bkkQDxeR~`Tjzp3N z03l@}_kBqT1gsK4qYv!L0c-GB#6krMow9!~*(W2Tf8@v!kVq=XP;ALdgO7H_yvji= zQJ2|0Vtiv2UKo7-Fp*K@>4Ex;lH7o$y%hc9{}-=Ll1%R1yZ8P4^HI>2Teogi-aDZE zp|Kz;>M)c&D!MwrY4sD$LuHXN3kTI!W6>lmmDff1`8`~@iD&P=eR3Fm6_Bl=ak?0n zM@X3fe#gzM+4jp}!je#(HM!2VV90$CV1lx2@Psw%y#)%XR~+%^Al1|{vurucF%49q zt`W()(i7B+4RXplPDzHieXmaS{i&&`RoEdYQK-TE*$y-g#n>mqb}LzU-?bS{J7q7` z;TdrAbp4u2J9qAZ!vHhx{{3yS<9LC{kXY>w5%U2`sXMDH0;BSF^~2+5HpFOFYY}2_)$N=uNpq4 zI@7v+RpM5B=~_;y1H==69pbJqGcn~?mXMT;U_wqs*#9O`zrSiX*(0Z|0XR*6Lh-@${b3F^sbP)FSF z@kc4CriubUe0fF18iEPP4u=}V_RHBz_}h-SAj~}8?Y_-jg_|bU@dJgKNI1#N^B`u4? zg7`+T#V{wDE1@KK$X(gP!(%eqUBPz9&>NjSx<18-Z~y*NXtq*=9#F*~<}m0Z!^7E+ z#nI~QGdV1X8Uci;1bvE(szM)*gmUSb(*ECI$)wqKDZx7tO)Gly<|4*YSJOmxUlJ8k zV)dgF>&-tsZOe6Ey=qlJWaMBf>b%cJf_Y3|`a zsu~tov0WJG3+BZ>sNQi>+}y_?aYPSPLHH>P6;XzsP&M39LP#v}$)K%EcZtHnAiKou z+_`fGp3hK^#aZ3U-ZnQkPxt9B#78Rx_0}6j*8H|)#poxxZyYu-ILQ644z9sI^pyHA z&K-1}26;l9!4e9fUnhSEm*7K#bDtiqD3~8hw&nE1P{)k0H3N<^@VxuSH&~_6i`Ky= zQ(DU=NJIio*RKK`qE?4N%hVxD!{JC88Q@}P-_Os_AEX)b^{ZJ>eb`7(b(}w(f*f1v zL7+mo9}Wb;P7QNx&?C_~s289+0gJ+EBAq-_5$Y{h#c1 zTPb5>W0Q|jzvb`)HfRK7$Gx`5`5Bf+B%;*F!{Ob(zgpWeFPZ@M`sW4%`52;S zPopkJ!WZ+l_4OsRy}qIUwB^NbO9Def6W}G*_d6Vyl{EqP1q%q`#vfx{n>TOS()+B4 zlZw@WIR}`%<4-c+Fkyf_?)7{--CIHe-p*kN0M$|x%=YSe_?NxtPzR`tp~rx(Fo*W> z^ZSm8cy9b>qaY`*vy+&$(5Mt#A^42>dd|LXdX3r(;LCVjh8~8+Y^&ttIsoE@#eAaN4{9~T3Qc#+$?J=7(?qiTpqeTO-1D=IbzJNV;608qU_ld zJIjl^%lyG2Ig1pv^!y|}&7w(*?3%}N{-dZ?W9akPIn?9T_#73|#sG!GC7o6M1nGrq z$Zycl!1S`PbUW0+oGyhj*NMPaWBiT=$WnZ*v8@wwebV3lWQfhuRki=;{|L54j7S2rdgqTD~bV>9d-#UKzb7rQf)a2_ScDP|Enn0Jf& zMEK)F1$)#|3>D=np>zFKwIMl)Ex=IWcUQ$zt5<6bi%asz5rayK+;W-7%2;vn8=;E%^`7y zlxx?AaDGwQe?+2K)YQ~e)j`{;IO-mVrU|7*`_Ozbl2|uwYOm(pw25ls#QW^*%f)2Y zRK+UuSN*&Bmqgf^l*Nr5NSJKeqm-H2)zJ|L8dzHN{{4G^jJrpV9t~@NMH3JhsMae@ zZa?N!@WZ!WTT2>q%uFAkfc^$fJJ~=9YuX($iDwIn8Fu*;J^;TVGdo-NQkkKxi-Fpi<#obGI&nVQ=7%P$}Q;IHG+t>sb^VHVjvJZYG*x;YMk+~)Pqj|1Cl z!N3akg#rJMBa&TC98R&r*G~@saY&{3J^uXp{B}A#uS5u=;sZHY^7&k5L>6(T^Jj+@ z)`D|DZX!Ph6)XlFaNog$&r6Ho;i^IOQKeGD8>mz&LLdE=GQa$y0(CtQqG87;3_JoP zu*wS1{p=>9u>n`xxOU3=q0AVbv+Z0;=2=-;A<(+D3)XM~D?j-4Mm(TcFEc9z3Uln} zsNLJbLV58NJ*dZ27^A8{*gCJMm^>YWgM()nD70FK(`_+V1!PU@lGG4PBAyHsmu%L5 ziI?@iML>dKF-pNi%#q-bcGDmT{nfSAff&v(sZHSx^)~0_z>(&C_`wETAQ{di3iDa! zr1Ha_0JR9pqQlF?W>0Y`2VE+86FxY2Fga`A-n}3Fb}JBQM)(A=nl^6Qq?V{91d&xO zquIqYdOg_Ci|JxFt#~jG6Rhv`HQOIz%PW(x4tJC5U>@rkc5Km{Vo6&njmC|x6Ixcd z23?PlTFh<%017*I(E-ip$6!Ka1LllHF3iSO?-;;5b&`j6i@*;ix-?Pk$YUT76mmP^ zh%VDTv4IF|)Ryz(&S^_CEI;p&10$^ZbF>nufa#x&430ux$}hOi4m-Jtthh6YArQr* zu{#k-qPrl)QIbF*oy+F*A(3d(Nk4!7T-;|xnrk$Z@Q1?&hyNlseIBn-MY8}ydw}^L zi^dIVBq3D*b=!uIOc`BlOyaAp-b$fGO1p2~zWqJNEo_hI&kmW;{v1>7dQ?I4a(?c` zt!8p#D3bESu;6>R1y`ehJ{l}&?1!}5F8ZnC)u>@BIi~c1H2c0Y1Y8hJ2EnIYa4}=M zg823IEZ&mODe1SOCY(7iH8UT9pFJ?cBpQ~qk4<682xGr=iA5JiqD$Mg ztxV2M-&w{4>ICA27RFjtgp@1vwkKHD1W<4bu04O~n<}cRewg+T5g8)vvJ!725yPN) zq|817lRhEw2=aUUpb!k?8pi2g*Tb87n~VE9DkAK}2{EZZ*e2C~PY==1K} z`QTJhcU^L%Nj2jEyqAoMoZ=-E%;@R)-kWkYePE-a?eCeaPnQQI4=PJqzCn z{caypXmL_PLTa$E4WxYp3XK)fASZ2e7z!_=55@XJ3icL2yf>dgfAjXOareguy&oT3 zfw%O;d889)NcY*ZXMP{TRFU+By5_|#$6qk}JVem=A%sD#%HWyiSupTQ2tGE?zSB}~ z?Y0;wj3q(*b+c0=r1>>@%=EkTLsm^{Q$bG7_V*Sc44zm%lIn@T@%c(_>AVO zQlC|@3Un-Llt%IIzc@<^j1 zOEj<~jIWYx7OJYMg6Cc>g*u=J>4C&&$Q&LW9aWvNY|hmLyCDybNJRF7&AKG}T3K01 zvF*T;Jts^*cKP6c+9obm#oQ)(B7EpUgI<*OpYR5-^4r5+*_z6Ks?r-;VDSA%P0B!A z7E=T>>H#$6uoU9${$2K6ErO6ASiLGDK~z_~K#(f7^G5bZkK|zBQ(JwI{s(y}hz3~3 zU`pmkeS(=QC0v-98fXY}m^2Km2Mu_^c05$vK7M$3m>q6V;qE$OH}G33uVw%DFgHv_ zUQVt8>g*U%IFN#sveZY&0;pKmCJ#$w^6gV8acLhue26S~Z*>F$;oaTcwgVZ+`5|R; z6kI)hdi2~*=&x3t&w_Wz{V$0+;+xC}Mn+3x!2I#ijKq+@+;jeF51df^`ca!9{Mh2M zGBr21T%ft1ckN1Z88boaRPe(etE4PRR0fd&t@To)cSu_P#0+$YG^@5(CA!jBnyZp7 zkvo)IrA0iYnO)1DQU+{Cx=!yNAUcT3hW{6~;(zUDN^)BKg=}_xHO*)gLY|S(9pomf zK&C1B5AaOozg^5kr?0?Y2qKae9UUD9R)9?s1eZ&pUtdKf3R}pS1mGLf`wfx+9BIqo zZ9(~AlX6MLM0$i6fiZ|O>bwg6i9&x%sxg%$;k%uLcOSF!=AaEStqa%r@!f z2+E6pD}LQof7EPX;}9BU2kI~sF)rlrvPvs27#Zc1b^^-;1QxyeYbjq`ueJ;`Ropk? z-oL0^O@9ABC4ICOk&C_BxHme+Tr6lT~uqjL5 zg;`L$!_Ghdw{7}=9asN!3}h@oc-hR#x*K8&L9s3!f$9`0h;kI~zb8oC2;haU7Bo*%j>N!2Rz$o3 zOg+tY!onQyJVo#9>f+nG_av%p)tWWAqg#KIPyT1mXt0Fl@Ac}{(SBwYl^S!(+Qj$f z%?i}Ne3KD^9*Brqo6MBx&p(8lj8Kp~qCJ!unCOhCs24+`{QSz5!6GX?*S%d<;F7(R zh4rdu$%3T3bl>yR_fG%JTIaQoeDdUp>I}G{7G?@j;!DWnAgYLhtA@s>-L&;Aj5U`H zU9%VKSnN>>!*2cPJcG{)cv^2pq&a`hFp{bb8coT!VhPkcd-kWc; z)g|dBKt&A5Zbna;opKsg{RR-5hm$HHNDqw%6lifvahY$k zV^L-)A}wb%$$>Ov=29(Nq?Lmfw;nv-x%`p=O11|hKZ3I4g+bxv^DJu3w^;dz0fq3$ zSfv7g9QZqD{HjZ_R$8kL2dXEAjBuxx8B#&R>`D$~bVE~Chqoq4+s>l=)A!EvU2uN7 zxt7Vo@FOw@Q$#@E*UlFavIMR*{hnKRN-l)NA_=|tp70j8 zl@q-r!}V(IC0>5uZYh^h6-ZgtIJ*GbMy~R{09_Cs!g}=;tu}zVjA3w&z0fIO6cfbf zGY-6N0-=WU$(l=hkdZnZnBmZ`MN-}5m>qRSO#g)BN{*$R~oQcHw70u`lt%(;-% z)ih{5G*_7(D7*Tj;673eQo748$zN7@xrpYBY3TwZ1tQh= zfC}iJQu)a1;1`iN>vA}70%v3rpgsCM1~RMd5%Un;G&%D3=YvSsUt2EzhEe@z$VP2a z6bEeXl^IqcS;XtMpM}-SYDpB6To~A2c7J*!vmqJhjEEXZZN%2mr3~iNcFB6HAU_e_ zfaDK~2*DT4B>JMvS*&Mq6DZc$yIsB)p$ULD6+EUx=wLwq>7dnTp%nKC3euXJne(d| zz2xmR;BF6zS&HKf)rd$WkpqfS1?S(c0?|5xkjSca>&^h+a?9ZiKovNZAkqTRFN(@0 zK0wHV6oNk>*iqhdAArTAAvF%v5r!EGo)wH0K$8F>+s;z!(Qv?Jan54DHiBt%;eviTG5XSUVh)%| zg!MoImq;rhNBod@0PzJXsmZjaAJEOFxsHFW!0|grsi`miBwEegL!2eZO@y_I72fpm zaoA2YCd{f@ltFS+j-x5e3V9_Z$Pp=F(|JRMJD{juCyEbjOB=JKaI!3+qB%pz32VUpodrT+_uaHrazG z{89}LNVrF6JQ^Q~sd!AbtvJyLrL8YjAKw%Z9_}u{zxJ@$xeoSAHvTvO3d17-(zFlY z8jf&`;>;AzF17jRC3w&P$D0d|sY*j3Fg#MXVVm>3$e?xot8thv-h8h}n1CONuMCB7pWC`K$DPnDoaqSfIF z$-(FFk!_fpDDv!9jfp15NKn93_ZbH`GcN?F+;RvLl4HY#w|B&n1O;T(I`m2=XDogc zs#WN&k0*^N)&;nvuva@S72m*q*Kzyyas@FDg!JWJ zz@{dbzC;qda2^1Mg~6FaPUFHtQ|CMLL@vcC)I#DlAE7E;D z7z>yHG43{!mE;^K)7_Ls$57aIJd6*(oVBPFI@bSDTqs~+{Q{qYO!NbBUcd_8Gh5EMi}q}u>DNbg9AqV(Q7b|WGPf=E~CUFjVgpj7G7 zMS7Rs;mzeaCpYhud+!4@GgUdhLUbVC`Ff-+1 z=VL#9<)zB}_Pm3tsWyQfi1?`WKf{M(tOmCi5D0+q#EgLINj8_RlZvZowF`-Y|+ z^D3oV+9szRg}f^E=zLG1{I*RilpVMAlzW>gg>o(Or+?$syeMyBFg~Tp*svzzUbN(4 z72)~r9Zkk3@@LzfRJ!d2>_+QeU0FbMTRf>zzDe+W)@!NIjFyWDYz0{_uR;wz$hN}F+ zFy{&CVdb$w3A(;By;Wh!gZ1%j92`|TJiaQKt(NyqvwL=xEEKpbcH=t91q-ECqR&)s z?bqP6>Z|l)bXgq9T=;UILr`Oj`*EX&BSr7t_21_x`1tYJ+3C)^42DJSwEIpNKPgzA zxtOkBeP(fXY}PtJ+Jkejqp)zaV9}Iz*Dk|}?hgxdU0&0@L1uAksk-})U-?~LUf#W} zz+tj?VQI3eKF?wDc;ci4UGzz2nb10}D#7sRFjsY2Hsz`FTa@JsU0sCcKHuCY?3@)X zd0OsA^L5ViJ^o@o1I_92-dE^d=Pt?kD<*WfE>_esom4Ay&G){ti$hBzs-!8+;L-TR z#Bs7tAF8T2QmUSd-Y>5X7w5vd$aOul{yt_^Ma^bArNJ}O!{;@M`$uJ4>z9PIh%vH7X z_GM1Xn{1aWgZX?SHtsmY!p<%|RCbOHHTYZe&S-Vz?yEDU02V>_tSI^$v`aw6;GJ{+t92oq%n5I$rPK=MsSz4x)*@n3;#2TLn!n!$E&wlk1S^4<;wu3u& z?TUK!>QuT}ht|NrfM;We{u=6U;Kp{Efd!8?%t027nja1 zcQLH8q{yhMJ~1tr@4Y-LSJ&6m>Of*|@YBoi8#it!<~!O5t;}_?hg}S+h$>-l(@52= zkY8Sy?!iUH9_$-vv+FoIZc(th;?!nQLKi3d?8ck7Z+R@c&*Mo2Ox(KPZ)`MB$saaM@_r7NC`hyzI2%|Va<`|bcF}|cy{jG84)EFG}z;> zZJXoEYy9chx^?Rg^YS)cE!?j{9u33)2Bak znYNxxb(}U#z4Bo@mvz5vs%aa)PWdXsfGRSmc7=vsP@ge?x8442^~}e!G<&tRwG}c= zH6H~B8#g3r8Yw4hnRsyO7JKYH#K+fkYLI}T2IjWw1LxP%4sM9E4S67$LXx^l> z|MKm-cY_dr0k=2r4!m0SIG{2_AlD;9v;QQvt+!)!C37B?uR>&XUPdRiDOl{ebnwYb zNY4v?tV-om1GN<>#uZ;X-%4G%5`kc32?z)f^Py6y77iwJUpvYA_J(^2A0KK+-jZ4XNGWwDy{>-7BA@8=d40uhgV&7|IU_4Vm}E4mjA z4dcA8oG|_*JItAqlH$QQ`D!gqR-SEd`ChAqDT9)UzI_s1%SKHpk+DkgkLQ3ipDANNtU@%u$s&5>KB{hQl$7-1<{y8wURdjZOVy&b?VzPq zdfs(JJ?;I#NmAAgK0dv!QC7yK6t8TvFzk%dE#?!zZQxT|z{SNSh5T?BjE(l- z)O)LrzLDJ*>J+@-GSj3dChy(Zo8G9!<#S+_^0xkZnLLY?Rd2Z*K0t5(c@0(m_9oi8 zaB&)w)-1UWm)Vf;jyR?Gx@TuMh>DBv&P~R*#iCW^jAXP(q!%xa=2k@>6%e?Hz}`ed zQ{8@Mun%P1cmQz)~8?M`D#cwC(p^v}Vh+|ir}N6Ff6HKaY)&Wvj5Sj7~J zwn}Pzf|HXI+b_SI?r>Yp_dLL7`C8|b@d;a@3ukp`m!RUfP{I^*ah&JYAO;XB&Gfg3|5A zw8UHXo8>**byWRi=rxUe$8^*}(xpTAtcqWL|NZwdE{ijw!iR17!a@K9W1VJ4rTyjD z6k|S5xVp}D-UXf$f}0h()iu7Dj|0EN{gk*5GNT_MfL8X zuRp%oQd)&GcpxUvxMx;(8yI;79a>JCMpsahG4Slf~2-L2Zxg{+}K zqa-C5dO%)ix=uko*ETW3v@P!F#f>!-RO z$rz%OHs{(U1Cb`8Qc00shiWaMs3*6Ke_JojS(G@Uq-@{5og2`SEKFZzNOP7&I6ZED z^JuPmy5T00OE(=W1;+BiadCMRDK^WNGMCj&n>XJV%Dj){^JEl1_mWG zfcJqbOQEaT`HU`Ab`?C0C)Wv3CuV0G8QhjSRYhEvvJ~w3qhm6j2Rjv3iAaly-D%D+ ziAV7;L?0kuUa&MFDgJ{5S`P+jcg=3L12y_I5`YCqkU=sCH^zydn|s` z5ZaS{I^Zx1%f}$If>_7tK?49p_Okch2d`$f(8>I2Jy0V$)R=6yZ$?<#pItM}tV1~Q zVrJ4O;XcYMMndgP+>jz0HcWt+CiCu zK%a=WQZP!CgiqN4QPJgK4^C-+p49}m)fEEV(=2-=@`1*3NZk)D$ia_2U0vZQOl&7k zWW4XW+&Vvu%^xgWT~3w`;qTEGvG{UJcC;^Do`I) zJCIAyli;Zb7%K1O4*iIMbDknM9TDV3$AJqf_T66lvd;beEYyOl2P)~)X`FKyX-JmTD< zS#8vVD4DP%H0v`6lg5Kyg3kfP%gV|mZF4mxA0P2%^e_8C)S_?Msq^;kx@;>wo54CN z5SZfW)7KA42P1HPE-7^9=te=vwH<3u)C+S_zL;sMG}fL^OHZE^9DKlKVIVqC$SEV# zZN(w;Mg-&h{Ja5rA3rt?F!ve0)d}t9yL(B4H;kW*Apv@EXw{tBZXE zoLm>L!a*9tLYJ2aw@&eo%UCYaQD$LbEktPp*3Z|^ug97y!M{jz-=eG70Mh}#`R8l= zcB2WUer!X)=XDsT&<7&=kkL2Nb0(Yi@|p(c*p9@tm=$WGj45F;R;H8P$Pg?Y7IMO* zxgWqlZgpij-K6CNzC{`tmC`bTn0gd=!X!?d!9~%JO$F^ZoEueU%RcV&LXK0iy1G$- z@!6L-BvGkFd0WkIvZ^FGzn*+U+Z!toaLJ`D*Nzu+>~3fG{_7j)j|mG4dooKM=i(xx z)CEkb1hE3*9w%VSaiPy&+m#bH$AcZ?L{o^uKoDirn50`3D)N}&}sn;^$SjEwI9 zWJ@?Z8|GysF;3E|wIpfgUWk?sxqu;As;AUXiVQ$vVq)HpULk19ruwVjDkiGekN;p% z=(=1^T3AZQMbqpe#aQ{GU_plzMzip)!$Fztj%9-L-H)PB^olz=I?_y98jM>q65pFl zOin()*gpUyea~}TPr!Eg7}A^9+uNJG57^=f0{zjGC;FJ`hUzlY+6&1PGn8D|KHe_% z~Zl>#V77cMiAI?VImjbXP zYf%B*q^F#uF);DHoW;V-h!J{IcTIuwyk%{I8rP1We)=sbe@1u=$*_ z(|jG^=eZ4A4}2_KndWeCa4_iq`0T1>FfiNzCZm0PmJy~|o$GA|Y9cRq-efaF3iwoN zW$?Kyq#>!IRkU4{Fm)WRb`g5{^5tMtYP6Pp=Q@8U`O?(uYd6FxL`#F4i^3?xja?mS z&%cPj9IfpN<+Bn4L00#KQzwhjhW6K1ipncQM-tLE*0e2$5RTZ0=Hq;oq&zn1g~bA> zI8IjZN=^^_V&8ye30Ki_5JfeVcXchmgMFj+R%hK-Rlc}f2;`=csRF&~YJngZ2Zv0! z*n=7XQ%2AY7OrSHJO26xGsgOWXb zYyW?|x@GTo-@JMA^jjK{n9D`(t;QQEr}!>;kZ%DcWmA2RZPl`~7qhV}xP|C4P`Um; zb8$^RuVq;3c~g;H!hp$0lKX)mh4MA8wN(QXzQ6vyn}r%#e0GOfPo8{Yz82S0@VO|F z^=uD?qPnRQg!lj9S9gF#N~wBVHJhO3ch`Yq`O@xK=T(?KxoujGIDzy2ynZ;IUA zxp!~uwQCem!1@@}i)KauY(N^8)nr+8v8i%WC|Q9H+^rE&QRvf`_6gV~)YR72XP8_> zNQ&;=yH|83jNc}1*fv+^LwUI(2oMaAUThkC_?$XEcOg_zZS97wbx%*<+(}25cvvBd zMNm)!>>Nvq+xhdqV_-N?2FhTnjH56H_(dhll0q?0pHR)>vl;ZwFzZN~o3oFe!HY?I zUiP`gCx8^Dxg#gg{&%)8i->4rdY9fzp}b)Cx*de!E*3!S*|TTTBX|L3&gq{)8z%4s z-_QPugO5)QB%3Fbpx{!;bCL{>fp(}I^1@n8q*^!Dx|jz4~>n19b7F-+D)PO z($MeE9-o|4xNyOvrluyixwfVT5Yq0o35dt1V}+|)Sehec+t(oUx?g(lr|#ptd<$hU z{?jMbbRh0jy((o?P>|J(A^8#3Q~h!kVImy{xqMc=F(~LnZeT7YkHP76CkU_v*j)pA z_^8cP<hxDI5_klwB%&Ny#yhmoY1yfINFDoPCIX*tV zOa_o|&tK6hw)%rmgm`SAefbl%ul{YMaa(qpjI0G3q2PC&#;AD<~-NfHsbdh#-?fAdk`G z9P5GSsMoOo)1s!esd_R5ViNERO3*hXB#}$6QlqnU4dqpRsjdfSD8D^g2hlmFH&RO9 zKijA&r1@RaPj)M=PQxi6W~0+w7Hx}4N(9nNN=l5tm5G|B8aEv;0@epV(g5NoeqnmZ zs4hls0X;LB#KF@8Uq4+XtjOOb4y$DlLQ@~a7uM$E;#gra-ed6o;V%myuI}fL9YX41 zp**KsTU%=cHezk+$LF>bod~KhXxr0OV=E0#fhZ40RG$W!=1>Vkc1R{P0bPVvz!7wF z!o!C@m29V{*Bb0YSS8WZZ{`JbeFSE#j717lQJH=vA7K2B$tcq@HaViFjh9 zq8OoMdagiMkX2U?47LBV5xWzI21Z(Uu@}QwtOC|UWv^GIRoBLHtI5W2y z8P2{zyOwe-KA@$u7oS)%SyN@^RGHn)@NHD!#Y_E1UjHo3KICSDmGbk zrUfetMs4<84+thh-})xtw&m21tVIKqXvs8FadysAaKg(8vv%j#CT4l#V)Y4X3efB7 zkairOc>mn4GT%y}L~~<2r4w>g#Cz`mq*()D#UpG2F#@<^?At!l_vxh$D_8*VX$nqw z!JFJ$qcFOXiSY3ewdawN{&C>JiJ&IDJXL$y>(9wG@n{`|(o@OQAv+8L0Hcxk{ar_E zkPPW=tF92kB#;ZrmoGmbwspEULzIkfZLp79|Klkl76kH|B@rPEMH1z$9(=*r-6tQHmzVp5F;XZjcIql<1sKhHv zwYRskhjTYIHL-{D?XCg2fPuiSs9+S87KlC?x2x*k(y0q=nNYr8zI-9+tG}`|&+k0< z0{KCt#&m;PG3bOfz`i!Gmx0<3gAz6{FpwU>i~IfiY!*TgHVN*{jb4LpRa(-vMYsIP zvo&ki6Nmwdku60AsGM7`GWh91vJXGi*Y>4>=%h;C6av79vVbLTXsjCSD9rGYJ-WXx^Owu+WqB>W`!|Hl$? z_)q@$1QApEk(DHzm1Jd~-nj7{m^Nv`L$0L}l-sH>rx@2wb z>7hoB3UplxdkfR;AW$ayO(|DP-@pHeMi;CVBL9Pd7k%;IZ=}#8> z+1a~SJmk;>%btrNfwx;N8pMLBBg4i*$LdNnATI$#nAHqB3i8VmittS;cz)g4(KgRy z)vx2@)c|NokhJt$vn&EnT3n9Z{R4p0R%EWM>_bf8OjM8sHE0KP$;k19DONOqtnW=b ztZGSILvAswi}5Bb2Nc{vaF^9Jk&P4 zu~@wGg-G1+oLrRyHhy*0N}x4w?#q3IfG^?snnrAa`uNfND52dmTZ%B?KV~G72B=t1 zTOAKHZg58u$tZjj-fTKkC^Fi+Z19WkCOkZdva;v+qHFSVD{LvF*aQ* z>#@zs?s2SNy*pN1EW2v$ty6|L1xXlBC z_Oe9vi%9KYR(f`39ks@;!zCKoot>WipY&Ta1KfrjQouUX8FkA7YXp_h0IA15qU}6> z8`Yw(p@6_YP!iycPrG}1HlUuuHIsyA&@j)1RvM4}HEhdCzgU7SYUs1gqRPl{ScV3H zqBKAmVQR1XJAAu)Z)l}M*8^TCC$2(p^)T^EgNm@UvI^OSo6t19yC0SkQ1AXT7=1uK z`r3=&ItpMKVXg{kK7018pP!#n_X+aEgc)A9dDr`r=qs~ln3jAPMBVPCu@^FZ4{A#RGa=tRajeMA} z%M|qHSjcHU5nB0`NZg_hK9f=P)cvKJes4* zgKZ_eH=VGvDw}eGEaDzAOXaV6yHMt`eAtab_j7P?5Pn88$GRHCTLhTo__RS%!5ICy zEfsvhe-hjinJz#|m%Je%yFMG9%EAgiKJ%y7FEh3Bpnw4cL%}4+gYkg#k3YWv9HBfL zX*S5SAnb)9Y;1!t?geriy!jEG^mE)7+bjn_JGcjbCDyI0ax|n7SuBR`hfxka&QFxI zAODIGb1n>ZQX%|G#ZOXVHsGTvSuSdZ7(CVCSRw)iky9EQ8waKVbswvLTV@n1#W+Rk zY^#-wS{`cPD{{xG%&@Sg7OR;mF9y0kjE4X=&;I zc$*w#!OeM&27rTaPPCb@3>%)lo4!G}-LXtk12h*;W?V{G8_yv5z z{n`hkcQsrztDop8W1!ddIb!VqVJrvlkqwYJwT39jXlH(ge_Poj zKt-?=<&sAT4NY2Fa;D0IWJUHz%5(gLU8KWujCYk7A$c8P5ehEYCdkh2*>rCWP(MUgf>`eSxL5vf(6X~PHab)u2(2DMMaW5uGf-1%LG6)Qj2H<`cR%#;c@7i^ z!Hxw}!G2!TBN<}=Lv>M7`(Ywbx_e^%1gz9<;8!TF4?n8 zu%^?HsR!WeavaSj)8Ny~i@h*h9@^(yc}RqGSEjAJ2slS1xXtHRYqkKLkeDKjRW2o{ zqBqUd!@@wI1DR{{<=sXi#-ST1LvRF}rgWtP{gbpxe@$jgj|>t^X^N03!61jaE*L;5 z*XZo*By`Ob#$&nqh{ zD0TkeUemC(WOyS(D*TuL+no*9m;Zq?^0OBy-VoRR2dP z>K>esi}E5|Joa#&0h^>`Y@7fSUSd`t^-f^PgA-9fd<|x9FU5^;Gh93`8ufbqzc4}T;YbcE@9I5jK`!fqI`Ax!hze|_rg?5qfjIl;p>H_+HFqG!Ddln|_{ zAQH}K?pPe?N#fsz{VJTaa~qdPPY2T#N6<UmD%WJ*5PU+i*f3cj@Q7;5_+pNk*SYoft1?)*imgYZi5Maz0OL}8U&jQhK#9Hny`oGCfLYiBXuKhDSBD9hQuH%#ed;}DVmy|Jo$r0Pn zG7etE2#jW(h0&J?7NHKkdxhZaQBS$@fi~#?LMPF!p_1FoOiCIyrM!0QaVC$=Ya#x^UT-dwf@dx!1gYr zumgej&ouS-Cc>*rR{kpl@`IF!s*RTMS4#vzx%3rkw-=p26lmVtckb|~-@S8(m5nX_ zy$KN^hTVu)RKWHEGSs^14>=HA3srB&&nx>jP0Bz!rd++%(c?zf=i0!O{ka?2=vkp_ zrOsQNm8O~CCO6WD6dQt|XlFn}Lt~ezNO`4n=!$zZ+bh4vXj2a#KHNMruA23C#E4!c z4DoC!h47n`(%pSRr`V$!RK$THoG2jjq!MV^&39NRJu=_`a9+qSAmD?L&9$OEI=;9n z9H*LT$t7E4*f)D!0@H1 zHBLz~1VF#F|J4ol6PK+##TnKyK)NT#c8C`^*KVuqaUst}FDNjgrS-**VE^b#+IMwloLivHw%>Bmu1$im5fgq^;ptFMV&eHH)|F!Y- zx3T`q_}F8NTr5(5w$}Xdb58@IydN>IuG-xt0%}@48 z`VeDzI${+XmVBmZ8?gn_sBCzp0251TB90qMAkh67i6a_z`+D3gQBm@7N|HWl>$PY| zn7$Wr;z<#L2UEy$XtvW>KCG=;Kn8wW8H9&B2PyV2;GD{)rKODoAyZgdUS0DRx8mXzN zL3AN5`n9>M8809Z^XA>_Q~ zxAUGzjcH=#0$ml#PJ3;Gc@-A051J5hmZNv_8~oJ++cPqfNKhX!Y|}dpb3(MZknxR#M_{YytGjgsc)p@Lrib z0t2eL5fn4RTVTI>x0_Oyc*cV$r?RJ-jgk$Z%#focQ_vY@;7XnS?RN(OQFQDq=f9dkuEk5hZCQuICh{Fs6G? z#EBp&P$IGzv35cO1#xp!EAP|z!n}XvU(Zk{YUV1#*_hKCXhh8S*gXCbJNTW!(cEEG zPKbf_Z2&thIf=?{-%gS6G++Cw2<%k4ZpA*KnNK=+9@Z2u(skUR$kiua+M3|d$9c$CHMk!cp~-SCc&fd1g20!P{U9bD!mz=&tcKFKvd-uttdWGjC#^zF z;r#jY_o^nfR~P(=Jm)f9uR=KEJ2VVQ#D^lhI#(iM4S`{RsGOjTiIW=clEoW~L@*@7 z6*lKXNr^Z-B<69egoPsFJX|~&8pVl8&Z2@M@rv@wA;Y-oGGhQ3Q2ulk7>B=yEf*}) zWSxQxUo%qx31sU+P#c_JWWsdv!L#YUxwX-A8qGWi=cau(ZM(w5L>w7rfTB zIn&GxK|ZL8lUK(e0@^mC`@F_^iXxnpW@KXGn>K(7Ku-SQd>_=IkLQm=SjX{H2!dve z0kBbgs&+#O$MHRKlCDJr>n*)#3`@L0Lqd~i)y7jFj#;jKs@msOY222h1edZb8E3=7 z#-`Lkos`YM7^R3G;#87}&e{*Nj{;0LjE6ygA#os!*iOxkj~})aantp%h#it&p8wR> zP{rVPm>M>z>9#!Wxtau(pYbpbq4}iYz*U=dt(5d~mG%a62Q(rL=x*eQH3(4Y5lr17 zuwx<0`co?A{ga_VMvxx*>e?D2MA#00dIlPJ2*K{JbP)$kIQFr#i&JmZ7A-8z>keAE zzizH$P-@UN7uU_UKrX_3F2msb^?I36F>zG*W)+JtEVeqLDYcDuc@iB4ZAuyu zVv^gvyvAHKYmICKXlNWC6GKK|%C3iRjGQwghdr88^}L`Sab&rGW+ONbWx@yt(iTcY zR-WOARZVOa${f$7H{X+EmE_$h56^K%efq}Y2*@_L6-ab4F*6(C3yIr;D8+5IEhgaM zUG7zlr&bBq!lao%Diwf-dY=7D5<7KzI7tJIHECv~CHk+27wW-@YJJfO9=pdJ`Z1W# zY6|+&VD4B!EvU$JkX`b*b?dJ=m{N9zqZJX>9p}s=095*MrUys5RN#t$VVVUeEV^&B zFf%a`0Z`)nd5F3SIEqJB7{|d%tM$+@(A5bCke8Ro{o5-_n^l$2?gK6_n~WX7rE%j7 zD?wwR+sWa?m6^=K!w@O~be-;1S>VIiid=H4LMcI24nW4T3Fj!o8gZlXznr~8&YNuB zbwn07BjM~ZN&a62ym(0L!nSb MrOzdu`R&I40;*>nEdT%j literal 0 HcmV?d00001 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_model.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_model.png new file mode 100644 index 0000000000000000000000000000000000000000..8afa714d04b52320b661311940fb8a368b5f3e43 GIT binary patch literal 16115 zcmdse2UL~Uy6rZ`9yKZoA|+8!5Cu_+bfW^&dy%RjNKrZ>9ivF3NazXziqe}>q=OMb z1d$HXMSAbOy}7*S-g`>UJ@<|G#(3ktjFGLv-v3_zT3?ypob!JmFDt%tJMDG~g|buP z(nUoI0Y&Djjl~Ufn;XV@Kix90vNkq1 zJHyV$e)_~uMm9DU*8ChCH~-@e?B-V2IewPi*o=#8v$%BGnnKxqmHfXUP9)ZtLJ__q zaq$=B+fPQ@9koCH18j^jZ?_6JU={}fhvG^{d4PGq%y&{^O6?c2B4(wlAi!|j3xa&Gd3 zML3S}h0III$OKgy*8RF^hi8LpU5w1s~X($FFWta_Ea$+T8E+d*cq&tjf11Az4s~I;kGwZug%&M!UgOS z9OuXUm3u<2+fNRs7UkxBpXKS}(~xFR5f~8AT`g?hAt)*;8h^F;zFES^P}1devDPf} zrhRgPU1N&u2~n$~B_$=S+}xS<;`_9?0tX@L^-Cd69OHYgt`g-R!_~ z-{WV`s&{mBBB#k@;gmet!otGsJ9gAf zbQRlveYY((*L9(5%j{sC_?pRZPt)io&BYEkPMay`4m2d!# zBX7LWvAJquq&eNw*H=wm{`ZxYmHKBFwnV>p@wrns|MsgJJYj_kUw6bhACn1vAsKXh zaG>USh>%>Qurrf{R0w-!mcjMwrp;*vw5P9K@>_l)Xs>ug%v&Do?in3TF7-KN`u5(o z^2$ovfslZJ=i&_Z7c)&7$Sv7c3C$0@XLLO=^$4e+(pl)Nl4Yj7xUj$?D5#ZMw6+74DX6^hQt0W%r_M4rB+Cq+V%09H* zHmkEuQvB;8n;quIk`Xf>s;cDhV@2GhXMXr=`)NZ2!iT0NwVkwF)n}>%pXXTjmm8+z z@EZm-o<~I~M@#NU5UObB+E^|eI-YUmkMFo|w$!iXaA;)2l~r+_Iwg;AYeWcnIx#l! zyxn;M2kKH+8!d&MU=zJ~(WvbApZT4aGU})u92{~rGEJiAJ6&}rCnq%u95Q2M!eZRq zD61>eRsJvPV&#S(3XE0b#JF!yTuIi-QA{_iR17#O6X@q>DLUKR+dEjeJXWV|*B-rF zc;Or}Dxb-CeQnj#)3f&D<1?AJMm2<%N79#u5>sv7)7uT;i?YUxtovngFp|11i_u4M z&<%-dY(M|}OHNKsc}0cNb1@&APUm@J+(3C%mEGBS)6{~PrP<-(wdL`m8{w|Yt(`Vs z-fXB07f`eA|I|?PT-+APKlJ9#M*mN~+23#4#J9-}zc8K~Y0h0=TUIG@S*?h8k7L31 zXLO1Me%eC6d-v{=(o+6e5y!bC{#zq)qFx7Bg@tt(ooD>;MX_o+_M*E5C)^4mImLt= zW~#rIcyT+-7!}}ZCUr4Bxwl4#Y(8q|-HJ!FERE%j&!oDpN-?@DJj=BkPw_h<-m>nj zPK3b*S}s0`(9h?b&`bvq(KmWb*%gB7$~ zIB#N-5Xy5i!MHAFu-jLpv4W#0$=mw}Y;*a~7oDdod579^(p^?()lO<<%i|1d=SEv+ zzj|@-hxbQ^=#bc}Pr7`jVEU7Q2J%ogQaJh6Xp3=EiuRn#jsqvEzx;7`KLbOGK}GQ3 zxa)eJ&h4&E$tW;oRdb&N#_RhdT~kmhv{x7@O-DDXe~uFM@@6_?+IT|Wn=@7}k_Y?5 zjT^Ea;JsG%qf)#gbGm6$iWh@v?F$Jpe+gOHLZir?hj?ft3{gIIjc(b^z~%pOWpR4+ z=Eg0%xcsH{^u98uK70D~%?d)Gulm^qG5^IccXyABx7#*p$2iOm9X)eqI*ps0&)n;o z8Jq6jUV{kNwMD6jJd4f)Cp8}Z`s;U&j@Y1h<#_*tJU1Q{-2D3PL1}Ai>k3Ns-0K%F zn2vn7(Uv{w-~U*|wb1Wc;o6evGGEd1m;i1dStIi_D$MCpn!0>@>pGOHWfyGVY5$J` z<(j8_^j(o^RxbIrkaheo8@vJhDV{V z$XfQ~>@4>&xd^UOs#SB1z|_K}@=K@9i&p2Dm-nv;n73uuTskdaH zdiwiYTUC=a4459HywK`${Ce@o2SFrW zNYV!y{ZHWn*FQbs(@#*2|2nZKG*f*c2DPqsqD #GK>S}#@bECv#=+&BcBzzy1J-^8qseXUP$a~cb;!&*66;w zTX>*8`0?ZT?Ci6fwrr`WsE`Ueu97BqvAw`C>&3J+6OUP2oKAs5JSr=-&>9jTUehe6 zO?7dpi&0;?_cu5PSFS8?{z=$S9J6NF4KjFW|Cr`+=Mh zxMDK9_U+rL_U(KAT-JIWWC09Z94LB)#dv9SSi+(`Kj4iTO$$cK5ty-n69p_ zOoZTNnNTi;-tr*Zg^8;|6K^)otunoz5-RmIC8ItdgIn*MkOSTYD(hG4Bv*5vgF#e%44Xa+LY-EX?v zqE;uAh2jK9-UqO+W!>tcV;Ec)t`lk8bm`t9b#*ipXUH3Iaes&(Uu$*0+V$aBWIejy>>pbgjZkY6 zbP96oj3g|7pyj$$CA^$uSQ)C)L-+7JR$i;7>&&e0U3~p9vKj5SS$sckCmMfaWn-&& z*0&=y5m0ApW=0w4fMaR#-u?TzHYeS=f#GVOIj=6w@;l7L39rs5EgSi~INa|`)yU<% zWE0MBD`{RZ8*F85P4GwT^a_qnA&lpyXHxImFP2@My0>n<7G9aSAoJv`0uWkyR)1d~ ze}itK>Pe3ebMEXb(mRI9ma#9=ICT|Ol&m#+Dyr8piu+O?Y%6&ghoVW^VUwBi|O@>+h|8ENPAJ=W<=NOLboq{9CP&FrG*H^MpXa&<(C$p*~k*$f`$;NTeTELxAUl~BZxg-aB8ttIb|wm9#I4#{q> zoxM57`t5Fc4b>DaW2VQbk~wAbzKkMfPEJnJz6?U%oTYSpR_{=;s*n#&(qTN6Nt#)o zPH4XVC^%Ji#BhGBy_81Z%F0T5y020O=(V!vLqH``v~mlh)0MB?-IaiAqwlfhc``uI z9}k)5Uu{g$HZ6TfZ)%p^b^q`&Dz$jUlK9v8`GVF!-C^io(~X+ukC6nA-ZMCU( zXFN5Gc$dc2We`D?Px!1q;;NO{BN?C~X<6Ba#g@Kwe0nG+05x`FZFolrS2mn|BJwI&VL=r}YNUG?3VVVL{g?@=mPOvMg)B;U6n` zJ8xjjFpQB6H$f+@eD`3VOtfT>be38B$SM$^%<|l*kFT$<{a|#+M{=v=4Om#XaNT&( zdIz9mahuEXsNvemB7sYWC7$~Y5gai{-#M3(aQPeFoG6$)&Py|vMgo>!@5L)Tzn{|^ ztod=Ox1zExg=XNMZj0LAkHuVu`) zzS8#~_y$RmE+Hv-noTw76dLE5pMU;Y%%9I@;P--YDUO0`*w2p`BP)UG%}L@11O{Fc z^JN%46>mS*mLulBYCej+;<|eE>d~`jv&*)X$O%r~1LO`a{{Gq<8vU)jFJCYSInbD! zk4p)Q*5}!o6WE871O?HD>l$LnIm*IPrktSsp*~f&b7rWaqH8cR(siv_aIq&K20bW7 zF!t3({DGGzl$KQE3~S zjK^pg7b3sj55u9Bk~oTwIjQ;jQCu9067URJ&Y2sZPo)C|crn zj$xP;AH92b@r=cnbFmgm^72>CpFjU3;_Y`RBZeOtOV=sts)mL!_~FdEeR2aqYC5c$ z(@7Xvv5-jNrGXF#~9F(6ud!~_Lq~JF^t1cle?X9S!#F2YWPvUZxSvV#x zLI4T0MQmhxV6joX4I>?yI9IzF*Q-$)W|7qiDAUX?5vZ7hm`5C@J{(iQRE{CPpln-R zQ^G6#_a?}R7G%Qewr8H~7%sEgZ-pV>Y^Qo8PF_yCSP{a}ly5)HzO>*XG5Gn_a}08H z!3>n`lNnT3EXiE^LLvZXbOi5xq5P{4lMfB&dBkoG7X1vf;NBbT6Z1vu+JtJr437)9 zYs)?;tEy@OfJ;Oz_+XeGe>vTd40BkrT0;NoVt-@f{T7C~)2|MKO^^87-ymUUOmglb?>kx18Oire;>vr3YW z@U-=|pY9t3#h`lWQkm#76B855Pd~jW#IUBT(oUf~b+XpX=0lr7=h_b>Tg^a2 z6Nm8AzO@eaAz7zD1FOlqRy5iDo=rfYwUA!eX(3?<^mlJqNZyW@%{O+X`f$qL6utcN z${ir<6jXGrv`AD5AcT~di`bGo**AM)z(oK$+R#RS`1sL=wle2;PO%3M9t?FB73CKc zNTo~TeSc)1Y(tS?JAGOaeC4iDynq`m>?tI)EoLz;n}O=u6MHBW<#T2QC-*Zlrh>~& z1ffBwDm4AT;NTccp%VT56)$3AO>3e=cP&?j@yaD@W~pAhcu%05Ec%gVu?#vSU^|cl zlfkuXEQb%DPgG4#6Ilfz)|JsG13Zdtivb#Lpl9AlakKh_el*4&b)vVY2RnDJxw(02 ze%{WGk7c7zsdLZRUP_>UJ!^Zr_IKsKv2}KutuG=S-TW_RZ=g_u&;Nh%`gP9YjDG14 z?)UGPgVgl%^TV`J$Gi-rZUVY?`?hURBou zab5s&Zor(^02B@yNeT`2gjFPHfx*~Fms(U&ssjgNK7N!&3rz85+k$fns-)3xvz=;8 z)=UJq7s_WHQ*aU-E(Txu^XGq6T1S76vm5W=xA@|Q-fS=0|M9~Ig>&cJ($dp=$H!eu zGM|4n;urxu6+1 zt|LnmKylFu-hPRMkH&;4O@PAv9}XS{yJV-0^|Oe#<$n6p6jZ!)={F|UBS$WvQqHY` zaa6IgN{hJtbzA0*WD2)z7MndxC`Mj6%%4>J{aSMSF+ zJmvb)3>g^x|I=%>P}@hO5ik#U4wY-yUVuumsVRgs6b~k)ZmxK@NZ{2H+`FJPUmG2^ z|D9|^VBiad=VHdlP)P|11x$wZ$iC?4Xl<(^pfi>uM~VwcfH14>qfp{LYwh9%P#*%2 z>2R8s3CBAUWQG4(KbrPmCJGd&p1I1py6J==NQI4ER@u!Ue$4VldB_-!U(yOYWv>C` z(Ng2y$-Dd=)Nu$syhw$nk zAPF~?H(-xkr26s4E1kexUaqgOX>W7BZYzP*MVP@^(BT7MW*gC5xD7wjKp3&5J$1$H z^y$-{wsilQw4mu7di=!eqeORL3O6qGJ~-4-khiv2<*GBY(EZ+54SmE}z-hr^l4CDg z)>$<=Ga@7IIU+{rP;yuJ$n(|Zc~gw!l5%oQ7)AS~#&q*;-2@ZG?^Aot(2(D!PfHS9 zNd_vNSK)Yf_YqfDR}UZ{^^l4#GBf55#)J{R6vUkg`W+a;-;c^gBvn_-gR4*l@zDs8 zfWw~7+}xay6d3lN3Gwk=uBoXhNjb*MEDD5cJ$o9-B5I$LTMW5}8}vTm7;t!^bH<-v zJ_9jq*Q{Ui{(YkB`kF~|T69!Y6c^@A#1ShyyKL>ErsUw@ATX^TlQFsik!8v8;J6DG z`@%F{-*}8>+Bal?0P(WoYttG!ApbK~vy<-ZMA|Co!Jwd^Xw+Jg_Bd>id_h3}OcLO5 zcA0h%ID@dq?u0}}Mw09L3NKy4B>I;{H)C!Hq94_R>I`} z-FM#whKEBmu+-~=+85YZe3GyaJl zfU_(L>>gpDcv!i(>f?o&n0nGg!iXN;f~dq4(m?p5`a;NHz{`mjJyDUe74eCV0!P!H zGQXCWSMLH(1tBhHhnt2$!YN~DmwGOSt=6+%G>-oeaBLW32jPVB^FNL2fG*y!aq}rs zzOutBx}a=mN=QgJbh%S!;5$IP;-JmaL<({CUu5w;=Lxk55l=Gh04|w$OSG%6%H~|qih^Zk}S9f`SJgdkKa*cl?hJ`QNvjzJBY*^YN;BYYbn za3=Ovge2lu(F#W>lyk4}U7%PJz;0afCmJkTlNdIIh}Y2j$MAXz41 zE12S~ddovO^~K_;u$30fTA|k6gM;zx+PMSRk7(sx_(+4L{rO2kaf9m!6g!DiRjcf^ zn1+;Qf?!*kDBcZ({_M#UI+AH`!nGpgoghA6@+bQT9oR9CQw;4IXTq#TkL~qBM4))F z`0(ROlJ{`;2%BgHG~06zEcn`-=9;gEI2BCUi+==e4LL>yLQN6kvYmE@f7v!d3-Y*`K#`a>{3LU9qGWanT|Y36n9>f2pZxEgA(l z$i^8LX1C;P`{Cf}i1?&JOZ`?L8>-3TAlR?gk_5;y3_E=@}-JW;~ zMT~85Wn6;j_x>o%I?*vPxi(2qAi-Zs?mH?iFDdDXdJ>zWop=1?$x;7h;&{L^NUkUL zqO^FSMHBvA^Ysl&lXd9oTt?MDg*@c`FPiO6Mqw>PkczT$pj1CD(b#X5jZ|Q@3P*Qn zF?#=yzC>77M<)Wi9s^m!WA4$TM~91*o-I!In}V$mJ!`4=>Qg{iSmIxzn)HuS!}P(K z!8+Md3^b7GIp!>7pPYS$T-vmGGk?aG&6|M*^hL+gaf?L10X+2!QJd^Lf9yX8Na{Qk zH2KMFIx}*^_wIe~@lyaw9{$p9`JSGg`bv5;iNPl=+pB7_%yos*^723FdxMqCfN5blSHQIg9TR35HFh#IUSaob<1%fO?sS^= z0T&ld#tmU-jg6assD_K?>2JUNrpNdmbr<7}q^#^MBRN@FRd8-fXt}P=7y}3u#OCs3o7tmwoOpBPu?mas1bg{ zig9Qpji}}^n2#YF{}QtV7XsD(_HCn;g~?p-_7%=Au?dZ8BdvU3f1*|CNSE>yvO;)pd~<)5Zbflr$?~K(1F)JQLd#%nGzgAb zNSAT&m=u+5n#b+5MSt!p#mtEldfAw&n@Yr9EQ6NM>IF3MSZZA-1<(ZSt1H;IbH*0- zdqgCNF3gQF0bT17@q^$7W@ANGC@qN@+1UL))W&oS-=i3#D|Y9+mVys(j=zp?pf zkdKD~7MT~U~S6r7SUnfPfySs{BB(Z(APsQT_sm} z+v^twb=DMOcYC3=VgyaC?NhIXHRe%c$YlO6U<8CsFG3L=asQ!16-&gUII88MVFJ<{lKt;Yu-e5# z+ic0VPai9o*TFx7Zr{O|q@W|;zJ0r_xO;uG6R85g;z35=I^2-wfyq`_9zc-Bf>Do- zjg|68WF&z?3NiZk90sHR9glc~0o<9w-Qdve!%OF~ zN06UCwBf-9id!oV29~DhfaC(1%gf8dKqSlx7{1=@M@N8!CWqQ*ZPJM~!{w)SSd>I0 zYe;b=w2;}J06BDuLVtMh;Frq0_F=Ej!82m;&awE=;(eY3!JsRu5P}40Zrk_qAaoub zaCgM%Hw)!6PwS2M!825}w8x>2V;X=FN``tg4~zww6)<1>p6lSWmgkfNY{TE&p%V=pm z>2O)TNq^Sj9Mm2KnAwz3BNN%*7^d^(^OT;=)HcFN@WZ)KxH2V0S}mEw0j>O;arM-N zh1<)+F1AP(N&cd-N|E(NIibmS`_vo`lJL;l&I?>eW_TQCCW3W!P|mfM*nx<> zhJk@SbxYjm&*}yS2F5@Z=!5d0dQqgJArwm}Vv8^jjjZn2zMTt@gI@5qJQ?#zhcGCo z0;+23wMLOQJwC_Ji(IOP_bU1R&O;_BdP7aAkr2XET4saJHs<8az5KNEiguE<^(ae_ ztr<#7G~Ra{asVNxp{{w2WH~KbH6`HGS$=`q2w0Ila-;<%XRNRGunYd|4IfT-!(Nwj z{$nTkpY@Y}qJ=C@^%9FBa4w-Kh&2H;B6~4f&^i15MhmFJf6hnkZ(av4ql8+HB6OTk zv*2**0U#0s0|QT|06GM!@#uS7HhqBhkqkGWQQybM-&oGsftm~3wrwln7lZzSMv!zvM1i{_?Z%Oi}LdpJNnsp2$Gw^H1!D&Z58h{c2 z|LyVtIyxs6TPO%qm81JSVQeop)%RA?R0w9*PCy3A0>GoQj)^Y&FpB5^gwKtlI;v@( zK?D`$x7mJ07FNLCWgG}|WSucr|9O0Xmft3>!+BntxbH$nhQHz4$ix(C;M*U}4&^|+ zRs}hT=1boa5D>7Rp5D1;m3$w>&OwYait}UAObGagZ9M4y@Cy?G1|&^98eZk&GvZ)g zJ-xi3{{<^Tlf+gt4LZF%2=+_Ic~&LUxUN)qsJp*E7I&AWji$^Z(nf3cuSlGa5kh2g zt<(1viXo1T4q>RjC)Hm*O{%~pdVkk!%CX$?HyIZ9<8T`iCUEorthD&=toF}ShZpP@ z8Sl^sdI3$r+6HMqL#aP+VVVv9MQxnIbFQmzzlSv_4tn)aOJ*Xfn&(I)20DJ*AtNxn zx$uXSX9G3L@5$zFgI_=eBt*l@tHo6C{9;mOMTbChJ(1=LbM_371R0FCZ{5n$P0~~J z%WI22%Y|3dJ$MY@*s=Fl5i^awuPvl0oT*dnty^zqHLs7OPJGKCh!n6IRHWJ9He?9_*)AFw{WrMe zpG{M(qjF?i3}pK61j_%E_xx!6=-~Tj;@D)(_LPok@TPT(N z2YyU(RTy82&+6aMGbHga`sTt!$A8BwtH+qxc`V5Vzf!7DQqbc#p_~ zR{tT{o3rkuIrXMP1d@WsE_y|fexR9?VHF|*zJ4j1KQo3_tI}laf7x%~OC`AN+jCq? z!>Nfbt4pE6&bbUu6Td(?bh83172kJE)+TrV&Mz%r8zCNowhe7{bwO@EW!i&({wwJ) zd)4F2hA)zK+lw{$LLe4NBThG4XzQ3)6a^fn+ z7lN$TW@1I+!PmBAnMV>r{MVfpI}f)};;J!G#sUO`k= zsIoeF%nB$eC_FqF=`%#Y0Fty_`v8Sk|I<#l?G?~_VQtBUDUhSt9FwSl@*cM-x7cWW zmTVV>tn#15zU|I8nL>Sg7WoyR+6gQ7uOe{4XuvpsQ04olf7X3uPPL_C<$zTZ*YQUXZM;`JB*5Y;=wuvhTi4;y;P@OKzGOp{( zoy3S+Ug5RDy>E@aai8Q7qXCqW)?rG*ekCMwMS zg8_2F3MeYy?za0i)xX`Xi;D{pD8Url)VLfGe|RDd~b7>7Bd^iu`@`XEbiixOHiKOj%w1QJ&NW`Oo}?ltoara`zg00FJHpM}58PTZc- zcFzK_OU&oR!$_r^_`Q_gZMYJ$m6*&yIuU7_gmU*offxgEB;JV;H`kz19@{3%+&mN6P(H^P!n2O!7jcakEiC@jR!N0b`)qr zAMvX+O#fEp&sO0*8SqQPdte6 t>5sp*Jw?97{X~<4e)WIkFPmDYM7_Lo=c5Wgp>+PePg^k-gtM6WJfof{r~@3Uzy*WbLFn0ywv6mEE^~k z%4X>c=aeaw)j|}?s;=+X;T;oi4rl!5r2Y9z_9|A!_D%-2Mie;%d+Tdf_SZ}=|Kw<7 zYiDX@$;W+?oA>BXSMBYs?Sy%FEdKrhZYx_8p8fJ`TJa@6SYObzqfi(O$iJ)NCE`pe zl(Pq<&z)Ai9y;3TrlV@KyfQKS{c4U|w>Fr4AD?hU{a(y3#%9mrA3u@F?=mkdi1~f& z@NjP7+h=XQ$(yWNX#Digsjk@-DU@S(S!%d(Uy(m=wWLsPCj9t+@T$mF`1OPL z^GioJY~JkP$lhGx#p-^ajQ> z;2RPU5FN;=(U#3B=AydOJEqD_BZ>EQ^7jb|3ARp7^bEuDP+gg9KF5(uYuB#T)L;2r z%8;m$7T$HecXM3xZx!eBPG_gzn6s5`2-bH`P!OMc^|m?kC{NK8wZL&SvA5iRxYZ=0 zc|GfmNRQ>oh>Dt;meJhq)U_LT>P){2-XP?=Td1X+)gyU%X+i&NU!X*PHt(jh85tQa z?|)stgNfMNovu8h>1mBi9m3KODb}%l&Y-M?=MMAJLh|A9Q zeDF-8g2YMMcX98ETyrlQ8aAitOT@&+nl;5M74-Uv@Y{a8fWIc{m$*OSHM-|G-lh9= zz`@B$BbZyaDczuyL##h=adEP?prdxtqO(A4VP>@PfWN41rb;cvlxz&93il~Q&htK0S7@3(9 z3LJ*9)_T^>b0*SK_zZvNZkI@{jri2_qn;bx1F83 z)lUQ(ai40JF2zSl`ByYItNrl9502$-Q}t2^53LgSB}R!-efPQZHa0e2W@j~0w6dFu zT%BU#;KIA-gA0-dS7N{THZW3;PB<6w0TKn!a+E7bM5`I%`VI)&$sA{W$>)Vc2b_S@ChKFsqI<*X@g{k0K1 zcoM6Ur54K$DONXCOG`_^?n(ynucwM!rm4Zf!DjT-lvG`sS$mF!V0obXYZ4@NPtU5A zxVvRvt$#Vs+~L&iwqe5tWi0EC9Xp~I#!E`7DotsQqsrH>=ZT7nHf5Qrh|Rn+)GDw~ z=p4|R*DEb8Rg06~>+Ir^s1PA8*xl3ZUYw$x8$_eg&YU@uW!9X?E1@DGAu&DHsqMX! zi^EPLQRwC7#HYI90%S7KeAqA#n2-at)Q=ECjpu&@+ut0J|ulI4OAA3kIm zRqm{gkkIey>YDHIVm&C0Pc@PpEc4!F-jQeHvM`#9TmL!RtXbyd$&(st`btU<)@|7n zA1>xPR4vrq6nfI`pmev}Y}m`v(roLVGh~Tf7AMN5-?Bw~nrXM9<3eU)?#RS~=@#v5 zr*XaNFcA&yoNG#F&fMCtaif~5YBaWT^PUrmxPN6En~duGT}LlP9+rD@_4E5bG?FLB zJyvMEyu4ZVgUVdG1*&ZFp><(ST?sxuo1~TP?Xwn_mZrZ{^0e=9>Up`AORwOLCsv2GPiY z2oF7#IQizt)3=*SJUoh)=ZA+{GvoHTO(|KlWeq+z3=Ge;Z^z}H2wHHUuFie^dV%UV z^6n{uWN4%{Q!DH0gRUEsa-Fu7T=)F^xF@M?UnhFiGL7X@kZZdIuPGwSDn*^J=Hg%9 zubKW-UuxMc)O{n$eQDmb^|?`7fkP5XS+-@TaHrdBXN&j&DZhY_kPzL?Wo2b9)6E(V zvz<;!r*E&LMr-K1sXch`;LDdU4V-C(&k*Zx%gd$Q=BF|?o}ltydGllTwN~X}dRngM zZzv2T7oJFX6nD9fi-}EE@m0KiTaQpO+x~3Jo)eK>MXtkf5$@l<=)Ct%)9zw7moIPG zMr$UUR1^l67sfS^0OY2~Qy`#JaxFXUKfgb1UV>I)hN#-`!w>bZeRofLtjv12e7MV& z<+d=xE72}8UZ_HI8gD^4tnBbNtD+>PBMF(QKDY=ox*)!aPgG3I*Rr#qb!s>ASP3iD z`~HvH!F$ZdI`Taa9yoNUkLrd6Hn_k`UK`y%zS(+~nsK>ogL(U-u`Q+_p!g9 zh3c~~MjL;geNIYhH#NDp78`p1%Ed6@=Gg8h0_M1*q_ko)3*>58Jqj)Dc;&nk``+37 zf$+>h*GjHjCaRBIh@fV>`_i;VltHQI>-+o9a*Om^>E>Dgkv91#H>~*Ebe5yl)c2j+FyWQK5Z7ThW8n~I2m9-a7lf#^w zm-jAju0O=L-I7+_rasRsvHZ1HBY6e2^!422fUmH_5ED<~=Udrs@6rtH#271pBs6*OnrKK z|NebhziiYuQoEcw9W?H=Tpur*ZOiWK>tjEBSaQ{>RkjWeq>rU+JY6zB#3{Xteq^L0 zU!~NGp=4yKmU9N6(&7rX|C2I z=-?7C$nzYoOX;UZ+jHrAojR0KCw2TLR_FnXIcVjL5zAL5UA}(q!DX`(M$I-c9edI5 zaN)x5XnW6|JxjJoO-(gxeI9ex=_j$p&!sNN-E8+I7l)B|O4KRTWE7;cEwi>LbHgnF zFMS_Bl71-I8!%nOE<4!wLeiI|b+q!$o4Vh(9{4yu&a-RRuI6M-l}?&d@Knk8 z7oU$usbtsC(8w<+sAz0d!EQ&%$%#0Psn&*zrDRG{Rih*~Z)O$mFfN?!a#h&Hty2e- zVKMaO^Jm4`vCfp>`wXn&$=liFb-vK_f*NZ1MMP*-OYy}zX&q~={m@a>S>2`{j1|r5 zAimTod`>1z)L9w3l~l)qY~kYJ;R(sX?VgvE96}bl zjAWEI6}!1MC#XmrI&^5LA;#N%dCo+7mz5M3H+S*nPAb3imn(9iCzMdc)hGL_r_(%^ zWRWj{B_$sDLr0S$N=iy*#=EWDMh6DsK7G1MS_z+VRUHAYO--u!>(}RoLRaffWt-Dm z9vqP3ZXd@ZChF31rg>{(%cB@`v$eYUu@G};VEi1 zy0fIiP*eVhE7S4IzvWo8$CmGNy_nnOl2lk&h>&hV9ECi=O>+T5(0V<()wSrT^IFDo z0%lF=er&q)wB-c>(S{ZA#_pE2c0YAQR%jL9$lp|2=ec=zMPXD=poU&O?(gG=56|$< z5V}|J-ZW7+nH9s?g|aXsuJ3|IO!glf^yVq91BlNC3?p&Yw!E-5Ok7;t*2X6PvRp%% z1QQ7{J@+{yhrx#P`T6~he8hBC_cHKP3r zxG!E0af``hn3$NrvR-&xzT$q|TC+Rsd4QiAQX*sqc|45Zn#Wj^k|{u~(2)RUdZrX5 zIasz!zX^S68t6p(&Ye3fLYASO&U!9`hwub3Ra)+>qV6mEzD!O|*NBW?ZAsRAa@^1} zyC^-qF4bLMUq7U5ImUfOmn+2_7&o|RbMe%IZNj*Vri}Ug=*ZTpUn`a4srS6Srz<$q z;?extG7P;s1u-EY=dU7ssNBmEjeA8L-^nelsH>_56a#q3a&reiOn8OAN&TigF9%CZ}Y^wFtUDRI0*`h!p$!{NAcR0`R8?Mj$BT1NUQAp z=CAKE4BIkI{KpC>*k?9p(D-2BxqMvWR_GeAg%rtHt#T(g{g%gsa z-6i^H5FB=h_c66F+LZDzP_jv*Ue~y0f=%!1L1bV(&^ig8J6>K%XooFJxM4MbZlH=k zWQ*4e3FTI{m}OkL!sMwdCmYP2y`X200j@pQDtB%yI3VC%=g~S~I~1=W+ITVTif%)h z$87)*{cqK1?KHYl(a6G?>C&priP2ozIiE;D>3?i8vP_XxzIru&CjhBYWsoVFYKw^P zSYGdzB@{>vz+w4k0`BxwT>+2f8)dIwlS|M`86=VcY}onuRCr1j zpNr3Z+>ZI6q$-;Mw@1Zn=Q2B%f}be+i_JW;ES@tC$=*e}oq1YO?8lE+$OyY{-#)YM z;-a$=e>4l*e*7JEA-2CdgiI4{G^e!cJNa-=`Rj z7~hwslRFaJ=egTb5v7_;4RYb4t!?`-PGd6WMEAx6k!Fx>T+I?9>s!(c^n>8w+`s=c zYNXclD|f5ifsBf#8f01cjD2cCPewgG>y?Pi6?Xm-jXIwOJYv@K@=kC{b7iHhQ-_V$ z@`S%fJg_q6vxb^55iBxqGvHwCc-3RRX)H|=X6+{@?P4)=#ULj&>C*vvmgK`7`E~+! z{TGd{T#2{pE(U{8T)eXEx-=lMqJ+5SH>ugv9Itd%L3HA`!}6h5mr^6dT-7s8YKuSi zwHLY0R?z9Q1#ZFCdrw-h^I0?BUbmTGNp9C&^+5C{c?AV5sD}hCsjCMaN5LxX?(UA) z%#6kMZ8rp%5Oi7rSYUV^z^>whjblcZ^0Nq-U6~s1?mmIxKU+aw-tV~mC&kk8^779! zGlG(SdrWVDn=tC)!e|-BYc6vAb7Vcu$tnuCT5@TA+7~dB`SIh&0E{SHj&5F?cN>45 zpJ&0iOP1XSyTwwn_{F<3&#B8fuBW%xa1oP!-yBnllPpSf+apou=k@6ZXG=RfJ3qCg zgiL^n339$@VDPkOY|L^ByQ?$sHN{_I`5hDbe9yqZx2N=F5#aUQ#C#cbxjfZsaIcW5 zsp;!Zr*4buE-ox6CIW=T2=U-6&AVPMd*w>BQm&=$S?Y96aAhagbUV#@Q*XnH9AP01E;sNqM)Oi0Hg>5qmKgv!Op2+ zB2Fwz3$sGtDSQCwSx}$^=4Z#PlJ!fv+6I&(G7OkrPxO}ix{epo;}IR?&Z7`1uXpL+ zF!*lWIu^s%J{f}{Po9|Bk9D+KJ2?pfnE940Pp7>`*)_btTN5s3-Z<+k)O8HF*5JCc zGc#Bs$zOhnB-`41n?XE%Fc;Ht@9W)G{A3PMPtjto4CWC6b7AO|t12yhcPkUqRi|A$ ztNysm6zqKT@J)^bJUj}&{`#xS#|Kg)`$RP!bh$5BkQscucp()TreQkpSj;sCb59iV z>R%f>ns@=@ zf5TWOtr_E5(U%I&zT%Me^zxOZ?v(_`(KfJ*>LB4VD-(VsQ!=Lez>pA55IcZJX46B> zEGKQR#DvcRQ4#1`SrogenISlaP_B;*Ew@uB?^h0VfQMC?D^1B~XJ@a!yPZuHJKC~? zLQ#}s@_Y(%ECJ(-hK}lr*G3`G700SCi&7}dSD!zBp87T9hDV;Y%KR&Oa`I6ctLS$W zikiyl)4#4>y}Dvr(a{Iju3ukST6z}fLq<-n=t?(gPQ~Sn zcNR!TnjhmD%FV>8gr37EPile1x^XOceRm)`dw6(QO)<)GtUh-di zmp-g>l0u133}vz0wtc&R!OQQ+FtC6Be!_EL`LnEhUh*!uy1G`Pz!tcnKoUZXACO!& zJSsh15&X8`ZY&H>+?^2c%rP-B?7X}Uv0}Wu%C0jb>~tpJvPNB}&X^Z3UdYZ!U$}4| z)TP75@4wHBEm%c4)!d(DZ8y}!&bVt=Wo>P3Y-eq)0t&l^s5l#&6zH&eKn|XE9&vF! zFtsgQoSef?hOqH!AxW7V(=QzrOxs+VZq8X*5y+{QrYE+Uh5ys@rH;IBQWY<5M4&gzy0^bSHa-1VOmy_S zthngtBR~KA;dwc7NTu6-A?5JJM~aWXedU{k;27XtezT?{7$T%m4oPF><>O=AzWu=9 z2baeqBiB$6D*E#PjuH^}L|%}DcoMn6&Mfb)pI-|ww}8`_4q)pMety*uA*-COrGSSI z4QH!D78h?=bmS?KjpCCbPa0YVmZ9~$jsjL19I?8)dohNTP~9!5wDMmV_Oe?`O${|5wG_eOXZZ6skd(5?gNicb|^MhFUJF&)3_!y z0ay`OItkbVGuWI%;!X-h)giS;)Y;bdIRT!KY6$r}@u3XEVZ$G{*JJw6*QI7ajwUr2a-kCWE=0u&lGkXi3f?V)ed5r|7zV&hYlWmH{A_}5rmI8 zzzq=TKw^!iqT)|hR#trj1C^DP!-?in(_8&edt4p_7u zIB*~vLIAJ7Gl=%41Qj+y-J7*#C1_=v#UMzA!K4;{bS8YTqT(-^sCARJZAK6<6-!pb5r+2x+_OPkb$=6@^*`R%vwsvq;ky&NVF_pou- z4(+2mL+G9c@3I3q*x4Td^`b}Ew_D^UK$9G6$! zD<6_?S{M1Nj*iZVu_+>J6y-lfFIiT@ z=t~5f&}l&mA)fjZZ|JQc(?eu&^Y8)8jmLOp%H#jH;@-Xzl@&j5_e? zfz&=Fph5DrJ*8geU4_myE*o)6_iTpjEJJ@C=vbWi9J>2O;q{toR;ldkHRukfjDGso z2>1UFdbM%Mla`TDwzf`(8na{P&e*1=CfS)K0Boa-OCvepi*(EHp^B>YTj!mGWOye` z^j|cg$>-&J#cy1KXwotYV3wTz1S>X;UjuO;L-@Wna=2IIDE z&%mE@LaBh>RXAdU_@DxURfNF*b zQ>22bqhk&s-8wpSI^7pW!Ci%+Q9`DBYwPR#PH4^e^gu+LL?=ICFv=S3=JisFqwB2IFR{z3i#d>10Yc@`CX>2qE7^v z@9{3XJlS?^6D>y>tWFEjSpXL~L`8L^rKN|uOG=hOw(8j~e*JnKRY*HGIXU@Di8R31 zsE$M493~L~(}sg2Q}5or`$XKWpmkZ6UAgnG6$vzAYHW3Q3 zJE>b>Uk|F+?(GtQHFEtqde}|plY0nKXa=cx{@gi237}X7666YTi2cNgOW+`yKa_cA z1IkzW{c}rw)-fF&7e|uRuNc|#i*l;pzH}&LM2iD+_Yl0}V>eWzSTs*vn`ERt_F4uL zBhdmeV?0B8mc4p)q2RhSyNHN(l6ngDAd%`$pFU0Y1~qC zpx=$Qty!}s?Q7-_Xz78~J=Tqw)b^gRev#@1fm58!#|7=MWzpeE^y&-L|x?o=jY0$?K~9iIFGJ<-DG=yOoEjH|I1*1}$;zTju6 z4vMO=|$b0b^SrJ2e6V^oS3KzbTK9cyd8uJW9Oe;c;0BH&? zAUrsukKsv@daUzWb1XuLGKwjQBtDW#*kvkyQ>kLP-(JorNqoxI?yF@2!WJz=PXETl z9h8_Xn_fvHBclY@*)c~2E+Cn_E?ipQZ8~t8ZKsZ|?(}4>L?sA!fU9N&aWCV}oifxX{HD(^wbc}iqpvb*Fyrpktglzg!MBj*I%}D2(XNi+l%8eUI5jsA zVcr-k2X0)}sOfuTjO7-ukXNtHg@NOz^1IGlBY%JZPDF@skL9@yJU`;wwr;r-f^5NE z4#tL9QV$Xy8krcV%!D5{TVJjA1>__w4mb^UWngRy{dLdUGK~2el9G}}O}Js}YCII& zAJK&F25BILs>pHVNIKGt68aV&hW;0Lkj$Suz02AxnLMG0fd1r6MZu%2M;=1VQ(|_& zUsS*MDwV@n#u>GR1Q3>e4&9HNw&Eu;mN#@6pGXZvjDU2_GG7>N+ucHXlZh?Xoz?y8 zAI88ejV&qKMD8D2n(a>WVL^;nRQEjIjao&^fv{N#W(sSorlqBIX9xDfT3}CjetrNU zw;{GL3hW{4YjeH7pYW^z<%&q7aQAuB&Kr}}jOu?b$%&2*2)gi7@NO!NEdn10yykp| z@y~DDA|&OvC&O76f(c|RMZg$Fb&QTLnr%;XfDBNV+>1MAe2GQD!(w7ZRRbON5GEqrW;FB5+5ZvO zR&G*=A#D>mC=YgwgYLX-+qRFRqZXZHrq?}ynbRH7ztj6X*xO8V%zhdmi}TA*It&FM z#?it|8pj|xfwPVr#X z;G_vT$SxqDhV6te!ZpAAKZ4$agWxykQSJl7!&5?2zFY_~`EdU^;yY{-gJQKbp!A8Ec~g z0a366oqdX!f7GsaCqx;@9r;gxZd5siB?sYB`P6#*f7UB&7$cm~VLxl{gq$wLKuUWM zkA5oDBn~uZ1Wz6O1%%+w&&yMl3}cLqj64NXO+{_3iOmG-xjG*w46Zk2Y^Mg7(IM$~ z4|tSR7Lh4j5vmJD`l1~`emuor&j7U$t_(_w16|iiZSEIbwJNP6I0hb)L_$%0eDk0! z-QYIV10LxDxNSI7&`T*&fv)|r=!J;0o!)ntSy0`L0!2DLLL`RktO=xO-}rbMC`b+X z16&?6bl_{PgKWKaLk(IQRPbm3xjBsn!HKbk6dokT6mp&8(4n%Y5&F15{H84uuN0`T ztULw&8H7kg1ZrX=TeW6gz}yuB)Z^j7C*gJ~sBDCX;oy*AQwVPwYEI%zgmLj^lC2t_ zkdf)T^rzqyILrtDNK!T+b9*GmKMxW%DB0*bDLVPegj&SV3eU#18DOoVm-ooM1L_z< z%h1ZVjRD!329r~`;wHe%9bTo?M2tXilNm2i3G>(s0h{a9q~Sxw)@07c)9CQwW27g( z+{p9hm`_DK3dj2=CvIp00qGLC4Ss`XAPW<~77H#2S+uDdl%Z9es6gi>{L4PkJYF-r zk!Oq+%E+m%`e)g`<@FVBs-QneQU$NQBj&Teg9R+G#{N6_vCd{M6HC1t6X%H_kI+_!8us;cR;(nzkQ`)u zqH)Ml!Tv;IN|i@`m`+GcO5hsXqd5RjB;Q}C=c0KKO?di3aTFu^MX zSB;nSU6K2`2_*nFinO|KsI5g!>=h4c4BSboz_!mbGY?5aO~L)qII_M4FP>D;vLjyH zZ9X1|OGZIK1DwaV%HuMW5XVt?ibOaDALvMN;6DcSSLTDOd2UIaJNMZhP5-?;b^+HL z1`0?wpM}Q>(k$n*!z9GW#MP0PQM@#xO`8E^PeAlCD*vtHicZy?C4Nt$bHN}OcEaY2 zc}sFVx+tcI=!*EQh|Ug_Z;yrcZbFK{f^~MFHiDQto`U%O=Cg!kHe$UBkZ8k}EkP(V zFJHb4c=E*MZ87onA)NX!Qv*vT0QR{o&kgJqXCpMCSA^XGX@ZFewFA#uBJe4sz(Anv z(%yq?Y@C9E>SW+)%TC(Kqn8{Xe`K%db>auh9M%W+BMy-Q05Vnlnb08&VpI2FgN=f@ z8>fAAA@19qzzYYC0vlC$M1FKKGe3-CSk&8qA{H`54-n6%CBd9R`6eJcX6@6EI zo!)wg5`R!CVr+b5n|0WUFSE1`J|U0{JQ^in6@!zA7qFqBfi$d;nKy4PjKce-V~C;0 z9O3jt$g1nm0!C=9h_VB^i`Li+XB4DlBs7Ev;$^wBI6sR}Q`ONq3im;XBVM5Q>&?8| zZJBqPln&G&f=a>DPEg&pZ`%ebEz~>}U_q6kZ;KEZ5f@mP0w6E|mz$$3!u6TozZ%%F z+Pw7GNSix}IkH)Rb$^ph(XlXf_4f8+bTDz#RaA_;dGn@uDlosQTwk#^B3v5S)CG1} zk`81HM_AEOHl5V?PW}_U3YIzv(3RMc`5lH-tHZ?>sBTcb;4m`!lf?R@4n4=I(_>{x zMq0X_aB``-B80EP8W0lbESsa9oyw(4kD&8jYg18H-m!J7(wNPr(sWG~RJrAMsYB}4 zgCF`1dY7T+qVw9S8NMnjGXO1bL>tOvWiaKEG4^7g( zUlW!T8oK9>r>7Z4stubq1so)NE+1cGTM1$o-U*`eVpeH}crAv((B;!puRY=@9LM5p zB?gQeqNT|`H6?5Ek%C7oifL(9Ms$rH!X+`(=C7*q2EAmXT486olS?a_sLzzl2`u@;~tMsfm`VJg51(92|e3aJ;RLTM(grr^DvAxa8PY1@jH53p<#ZO&j;Y zs17X^^#)!oD5dhS{REF7;{Od}H@>X?=Q0pv?n7n}Vl;yz(EY7AZV0MCjwSR!x+;Pz zzAdyk@*OmxLcvX?sVPSRqne=|6Z-{72HofSH*OTdf0F_Qs6~KIkO8-bNmw2Pez=Y@ zLSo)lR%V7&&lSP6Aou9lX;@zYw>lkKi$MY5XhTWA3qjlOvmWmj5)y*LhaE^LR_xHB z^INxW4HmUT&1J!mu?!zbl847mD%z|zDB}mw{}mp`{`fneePUWf`zWvE7>WzQFjWd1(`+H(dZT?(u z&q<-1&J&>_5`JbAp~#S%6{r3i#Kiv&f%3opogb#GA42{B_2PW}`Vp}1IG++c5(17J zZcQ@h5J(M;Bu)P*8oHLXS)R1Ky!y3k$)x$>XaYV#9Agz#RYx<4+(HfrJmxbQ!fa4n z(+qiS=mq4)e?`;ccN7)x>s{|z>x*3l>hBYl!k0jiz6K`#7d`onM3|J3l_d(foGV^1 zo2nQL{5z2_K0O1w)(ma#FQ`^V$MAngc5c{Bx|LXJ4fl(`MgOQ)9*qX(qUhyo6hgI- zMq^7B{;NO107}z*QdlTTYXxm_^NX!kp3OgXo1W-I#GJ!GQ>8NIQi2=d-HrbI*_u$h zC?ogo-hE(iQ*@EA!zo%uV*Q!5;tvlJriip?lL)uN$PchgEzDNP0oCKENdK~N*iSrl zEAi-;10kKfA_A|{%JOUpBNG!ji=tQCK;rgWNpjpuC!#nqA}}HXMo%7`>KXp&ugFFD zl{s{VoA=KB4+`nO4Cc`ixU^A*-a-|JhC_!XIaLL&gh=84G?-U_@`=fl1o+_K;5>{j zsc#>$nS(W^=i6dPRMXs4dMwo83tDZGVY#1Hp7qmj(iS*!;!)FGAqQPxCk!ISEOYIP z7k!BM0P_xUsvbE)L~c}B03L@?tJP|He>Laorm4s|g02Y&3W{asGk%vPB`IkJglM#H z5}tBSqN1nZa0_O$$He6h)v|Hy5s@f>9rXtW24oAq?(gw`8vU8=n@{0$_ZJDj29Mz{ zRc}vlb%Y8#$iT*dd&o4Mot#v`l=Qe9zWbn7;zdU&2f&(n&FLbZ1#T8Y`o153wBFpj zhLX5?e6A*yLT}Hv8vtA5)+&xHjrluvOl>Mjo>2z20Y#Hp%nh3d$^pbN*Vak!ImqlC zAdg_-;1*eP*pSH+s}~0|q)tX!<4d`Jlov!)D!RK%s+M8y29M1($pkzI8Mi$Pgw^=a z#`TJQ=;}Dqx4odjM~*$NlM5L5KZx_sstf`2M(0Z~?F03xJRY{dRL1JBVzt)a+q)5T zZZ^)?x#7IW6JdwMzw7gdV(O!$v=_+HG)(JwkVc{>5Ps{=>q; zT5$>yEueY=Q8Y136U+dNN{t5e3o+^XZvA?5pg;2c#HNek=p4w3XT-5>QnZ$$_B(v0 z0B%N3>?|)>y;UOYP=^%jLzgF^{7@M0!p%atc;{cdmJOD0t)VF&ciQ`XN zZI$)TaJw~;TEwq@`SNA#tnuvaJg^+h%*^u1TW<2<9Hgo_KmqEM5?a1l(|Z{a3TWT> ze)su7HjJol-|eQTFVZQevISxKNP#J<)rb;*ChzFlNG(oo?iLs`r@*okAfE_dUG*{; zaW=JeFEsPCH=N3zAxBz(w^Tq(9eQhd`uI(Wea7UHkVxqGwKPZ!!n7R}weQTyy8sVl zV#}t%V0K92BGRQKxytyW9{cR&=2V7SGJRV_F2Iyiax#7#Fd(<7Nq zWvdfd$P9UYO5r=Z^w^jOfQ)ExzYmEZ} zViyzB&Fypyx#8wUJXn|?O@i0I_~8;+OsD|9*sU0xKhsBLXofYrxzO283}-lq*9vym z_aoa@>6lbu8C(7pw=i7QKsoV4&$?6D{eQ(Ryn4m|xg2F#F=ZOfj#ya_n!fz)2NFTV zLJEgH7UJF8=2h0~$ZoZO-XX-I2Hji8BUL+Bo(#{h)DWu)8VsAp_wQZ)^j!hZKulb- zeoFvtTu!J{^==>~{vH-eaTG(*SwsfRvzIe;a;$7@CL9tsoKpMa)@4d_4GJL{6)}EX zMe)O^t`4UKY(oSz!*GN{Zf785+B+stLT^6aO=)f;=3USTNg$B~M`2xQG4vOU&C25c z78~SxDQ=kN@kKf23|qFeV73cW+)6RQO~$tn3kBT%OE{R;l7mu)imd>vk$f_C*;MF4`^-#}kT5Yee{RK~hcyI(P z@baS$^p0QyR`smA$-W2709rL17tc*uPgqAjB9&*dRY-JT z-Pc|N1hJU~xHH^)3kG5mhQq|6k1B@a6Krl@d%SA68E$U>cBd`S?TCCT)M@_^4SAZ} zIMHPhdF0G0h6xecISQSb54(~L#tugnWeS+=;TPO5jbX6~Zh|mP77Cj|_2!#qTyO!c z-?E38dYkj@Oo$5v#-v8-Az5e=cHaW@TV7(|o}I$fVfNXQd0TgQELRv?E@~mf;a7N|r zHxp5yB40x1B5`Wg5PcVvJWd(M<4naT5JFpg4RHntQvSOvxd?Gxu#&Xcd2sy1Nl{l< zXU?+zR5OY9r#P%gEG}+vr)bd}QwZ}!j(rds6YLkP$aZqQt}ZUb`1|CwwT;aK==(al zoCEtu(a69+Bm`uv(qTqo-66*yen#|@LzP2aMKq!oBaz)c|FNE&mBz?-NE&i5IYP&O z`PKL2lqj5sE`u?_$-UpLT`Nsp$FT1xgAr3sE?5dnQvjK?ox48!=fe?&0A@||6JM3Hj{0jCEn8;wH~7pSBnLT?(*rt3BSx@JAhfXB!or|uh*z79SS z?(@=^Ey;;nVqS${@M%cm0yIp>i_bxno~2H};ToJTar}7w{VO+CmS&0bwm#QNKV%6& zR}vQXX$MV15ObB}KwL^1<}gDDc`Vq>mK=zUyJ2?7EH1Y0xdnsGUPBCyaC5&#Ioc#G z3FcQ9q=+RytbD#WWdv=Zx815l06649SeQj)8a(Hc=xPQ`p5zcC?giRN;L;zIywtC1 z))cz$#EQu^8IMV@0v*O4h%TH_9{-$&YpD3Ltc3hN16ZVabmYT_pK<5}AWhdX6w?lw zQi!?VvSi6ox}n%@0p}(_8W}a_;(UtnC^@-gSul9m%Y41!1lh=osKcj69ifoGK)B6$T6VuabU^s|@4W}hqjrA=pQ;FXfJ;MC^cQ`X37XhQZ7CryM zuCuZjQex;!l{_V%!d=IdK#t%H%hk*@s7xH(+yp!TN1A5hn1zOm1!a#ENl0>h14Vdx zun}N(>H#7p%VAiZ9GM0s+x$>L+;o%%shm_J;$|ktiOu1IC%mu&ne2!l3Retq0R^g0 z61NSIUlIO~3gj#S>KFXONk|cL&d>v={D_l<5NptR>pfN$^vMC*Hk7xR=I`DQk&+(2 z+p16rahE{E6g(v9Jvc=kNnAld)|Wt$v=k%PRA8SYN7&iIub-(En+=7?ZrHifVOI+- z;yfS^AyH$R3o#ggkRy!&XKOPY+HrO#+S}P_U&n28=N<#NYqV5-9!<%Qm2_Q(mYd3jT|)%dFeV8l_-SBOy-a&vqEW*CJiJqWKNVJ^AMU1h0IfB zo-$^9uWxIg^X~IG@80|D{l|IV_pHxa>sc$0=lT7<-|v0j*L_{r{kw8p^$_DKu2mEY zh4HYmqB?~_El;6PmDADUPq>(rit)17`Jj%ohW#05H{)~Wlw-!u4z~8rwpJ!QuIA^Q ztnAN<3-1-)Dad2#?CjtqB_d+??{5&cKW8Drt4h;|4_WD;e9DPJVKOHFP$kI4TTv*- z)DJ7}*F67dsMYqq}0`06Z2F!O>3YjbAWZh>1J5i z)y5YV>rcn4CyrLEx8L_=#&dc&Ff?@Cr^hmL$Gf+m(9~S(*Lne{69WC)YxoF&&$lp>hC+>y=io8Eb-R#@ii6}7AlYTaW2jG z$eqyAI@5oye*--hC$s$m%?gKaZ&)V!KCf6>oG!X`_ijK(hmpp8^XlOB5=N$`ru{or z`UldAl(e+iT!)$h&$i~ituL>rh|bS%Q4Bp{H{I&i-dX+RfKHK5LE_OwO}n{`%o{fb zzjEp^`TX?Q`R}3HD*CDsrJOt-mWK}?rua>_c``*vxs+aFT<_3b!C2tq?(WVZ=d&-r zpx}vCa_9H&f&2YwJLi9mt49iGKQnrFcZX$5j#x@c%JCu}?}_2o^}dV0wo#0#9?Kk0 z)cpFwBE)u3+~SjboI3B8EeEZyUA=lSB4VS@(t=w+U|>mG8@EXQC^dygMrPx}?Bs#6 zON>uyEURByY2?rLC+D0Wi#BK_pJ(9n%RDD9uk78sRl!`lOW)kud_q_EDn7Tkw3K?~ z%9S3?3lpE!@P#w818I!h+4Stf@~K*;71udw+co0Ujd27h!}GO11F6{^Jv}Rem1x-6 z*_SO}-uYgnXqTkx$*WhdQYeofKc;O6fB4o{=#iAmso}zu6ky3|w6(Pr4;?a{n;P3_cv$RrJGi+_IJ^ma@X3veS2|oTH#f!xI(H{SydI2 z(~l1&L(SQY+%Hz7J9aAt1Oyb<*R#30y7o4{*tU9;tl)sgn7yQc0DXby^d&4O_D0P7 z-P%(H6o=uKy$oEt=%%Ko9L}999qlX|Yql?X>m!UyjIC0QQ$PIvp}2*a>B$~Rt_e7+8(kBNli&|FmNJ>k`9KN#^%O2AF z+_*`Jlp3XTwKhqsz(p?d3^5KD4*Xt{+3Us_jjvP@;)=EWoNDT@J zVQ=?cD)^f3{^G$dtJ_~QZSFHG1hwY5(Nj#Ohg&noUc7j*oRV%`$De5TCE-MAZLK

Ot)>^`TIA^*$snu4_pemeR~aW0Q17!G&>IVnr!>_dv&|cj~XG)wjh#E_Kgj+ z7Zr_8O|eNusMaq|HQUo;6NC-&w+AqH)oB~9!w#5qmIjiGH9@xlMG-6XTB6-+bHQa^yn*$`jT_M}1N9~!!*+LF-sHoq@l=K7%$YMLQ-h7_MHeqp zpFkMbMD3>`_k1fXjQzh-8FPQ)nn6=+*pCeSf5h8h?a`RKp2X;7WA z-zp|{LQ+cVD!$U-?*02my{YJMw>9aehsJP~SH67FtjV;|Ha0fqU0j^APc(FqRZG

%f5n2eD5{gW~pW`<|#LzJ4HP`ufI3 z>DM?0ipt7C&oeXE;g0UiV&#q`)LD0qjEKn2j~*E74w1WxQ@$$n$QGpA&69bqLt&Op zFB2Uc9a$p1rykBM&W)6RdwVA^GBR?ohfhG@C_+Y=!dx`}^V-XneD}`@-ImH$R*BEE zve@j47P8uM-P*miy>Rfm+LsoyNmA3vvVAx@KF->j>vCuIbdJM*d_N02`?lt(siEf5 z_wSD~aEQuV>*cwoA}OKNDcCk<9JFmO)X$&kx>nlKa+;h=^`cWR&a9-AR#qO$AO9rH zCnKY8i#?~~ecfQ_D|F`5qaB66eyqQB>lW`G+pjzd2Mz>=hll4lbY9$5aclFT0Hqif zyQY^c>-X6HK6%ImY=wn|)P8S1ecChYq@QUWO5vHE9N3m+-!5D@(?wr{wA$C+TF~aZ z`(y21%PyS4<1^E_A~Cg#eX;lgO3msvDcv0fpjCg|vW;g!4r=X)aD z0*=J3L7C0)k@1)~-$A*1^Cr)S4xrecQG}IyyQ*2q2AmpUM7{ot>S$ zml!ztGEdI0U+hm#GxG*&5DJs97cN*Fd{MQGa_io`r`aOYi>Fl{i0;>%S}LrQxp8TJ zx@ZY&gAAWEs694*4_iukv-pt_hh&%S*W*)i%Mue4sr|kddgn2BsfxPdpnA_ug$5^o z?o&ioIf!E(mvxo03x$E-W=+?ps@6=K29m&(RaEXatzhC7w6(J{f1(`!`3i#L9rbY+ zIqeA5ZTb26iYQX%yo(A?mRm1s`X;0_mAT!R>SGJ1;sl z+5=f%(Oz;ZlkoTBEIdZYQ$9;ZAuO?j!*NC6_Ag`*r8n|l= zi(dNa)9Uy4gbo-PMLFvZ{rDLE^_A1M+369c_3PKS6ng9FdQ1-ohfCOZA&!<)2HOhO zr|M=ZU?I&%#zh+h2fwD%P*G7?Hav?MEbyEfv>M%Ii34n8Z!c2d>gIL`NF+{!t8QVR z+u+v=$igLv=cz*MJjL(&_3JCzwsm~!@tm8oyS`zs`kq3inEl9yZ9a*b{1ZdX+V%BM zkF9S^47+}v;cIhqjCPtqhL1?md`8V=>xiV*Ryny%6oQlrT;kOtD+JQ=sj&iN`Jxyko#lUAzka=fQ+W~4cr>AtXjX~KyZv&yvm{0#*j zlchM|Pu;G5sUGpOX?WHd;nubvs1A9Sik_YxHK!8^AR`PLGKw5qhMH(CE-qenllOpG z_sMA|8TX%m{2U)2dnb?vfOfcfZe}J5n@<~JS1$JP{d@78$J`7e7oV=U7aAHE=`*{M zX2t3NW@_zcr>Q6eWFR>!oI97{Z1*9Mp1m5Uh`%z=p_3ZbVnW&GWqSGre}Dg!R9q;o z=S@{r<#kT+>RcC_uT4z=ryHvFjbC3Yj3mSmq3YJ2OJ#)e%pq}hgX-gbS1N+no0eZ) z$Fa+jdEdT$NiClM^NakG|(`&r`sPPt59^I>ou*{#eeJyE)rF zZEB7yxhpJt^+qX<)4n6Sbx@G2Eq7(?0d7Po(eTjM)LiD~=2no-kl9+baKqG`zrI~> z0O4Ex%E`iV7uSafX?8W4$+fn&wl~AWuTjy^zNuhjV!9k3zYRMd2v~Ny(2Emiw=DMX z9iy{nS9dIhv`6jt=TH4!8+Q~nFw|guZrf0(wn53K2x-I+1Hp8wSFc9l>21x+PI=nT z-Q4f?H;4ep*w9`i7Uvl16bfI{?E0)Jkw3W2Zbt3Y{GtFT8s7 z%iD%}cz&lha`I@SMP%ngi^u!-@2A?T^1z{e$WMByVXkDwCL;iS4k_pDS$55r9z5Xa zW0$4iRG55vwD*Cy#p+Lyav~pTClO4!^7Fe`h^o=yrT4BbE`gbunG->Qo~^CfQb}{}xw*N~$HMnK zkg$JTGHGC7pj43dWQ@uP(GoJKHRdtdZ@jd);4yURhVQY&uW2YOZ=2lEPR-AIp(q>* zo224dVEWw3zgR#{8$zH2k%cCNKX4+i-D0e({QQrH77X?0>?L1KPs=+xin$_ppu8LR ze@RsL1eh>b04%)InsB1C%~Y?oDwEGhYr1DSHo|(a$UTXRhFOL=LiI9AFG0;Zug=A_ z%_0|e#DvHyARiidjU5cQeA#rUDO1w@XKdR>zICN#WnxZ0R0f+e>8s?H@=IjbF|CyL zm`Jd_WQNmH_pG4n{*%zq(3FMoD!KawSy@?1mX_NK7iVoggiEBoGnC~DrtPi_sf>^G zxr$ozL_g1!K>6g8Aw@-U7ybPS{_Ay?DlnXxK8Ku$X86X2y%*JFf4%N)_Na=KW5VGm zcu0$DM#du9v#Z{J$6m#z_g{#AhA9-Dxu!6_ne|D*N5t%YG4!( zCcSyItPVfF9c~o!87pTO^Ii1%lB`|TF?c&2dj+&|P5IcdgLk$b>lhrIuO9JCdo1Hw ziX`B>w2)n*AODhL=!iOPZQyZ{^ET`X;7;?S*X8 zhO6`fYhn(WmRwvu^YgvPt=qRR-n_~D(y~TDcL_DS^zGY~l_5CKm-i%T^3(B7SnIwz zx5Zg1B1H(_g2J5Phx78x&CS9^t^viS40*@U=hqe!Q`5L^r>VhHgU#7hM#Zw;v%4r! zC~_4ef+w$11#v55-$-^vN*ZX*WuTzs%%Dpp9Xz|C7t7e#nDWu12awT6(|i}`Q}wbd zLIt!((XR9E*~5lYCaH?vSmNEhnMR~=hA-D;fEINDHF+aC zUZO z#Wj(_Anb8*VUiBfkFNwT=6 zh6OEJ*}G5y$HA{+A0F-1SS5Ys$`x`&BqQceHQ9_#PL?AOkaq*o1)BmvQ2dZ3{@AuH zMlDkIfwY@&OKWTT*;Xn47aFJv9AYNR415<{7&b^UBkz}ATuwjn>^(lS1e;Y8ca)tF zD?rNo-QD+|J9lohj7g5r7D{sVNm7v(US9iZKYy-C)Yzu-NRkD;a3%h6{PBSkc+sRW z!+csMSW8{q#6j!J$B&Gt(153hPM_YO8Yvr$+*5-4@2QHcTwIuoYm_y;&b52hCf|86 zrRy9Qz_5_D#U7VUxBD{5`{<^k?*hTn?z;(CLR3Pc$i6uB1~6f?V|T^OWP_oYW0zuL z0m2EJspRau>&(Y66rQ6gdfBT1fyE?MudxX+uUT^u-RHL-KTLl8{GKrh*hZ?NQZQE_ za+Yy*)cyp+B3a?nIewcD-RLL{q(^gV6`Cm}DH(;IlbYDk-@gjInVFdx&yF1{5w#hG zj*gCip3(;AJD0yMFGu`v0bSjBCY~gjg=nptJ^k^+htc7@5fe~bz3oMEi(?_a9oWGK zGM=t+@z;G9Cbbd0YO{mAEMh~uziR3oR8*wP)Ybpqx(kV(6#y|hDoUP?SMluGXB^^Z zuA>Su-@AA3cb~+$Q}L0UuR+s~pWok`)W#l;MvF<>>@R7Ch9JET&Hw6&RNsVVk(QC` zkJG152gJn0V8;W1Lj6keih+9>>s{w(91-A5#z1h7A|lFha1W}g-k|Vc1>+jrMmtKP zu~#5hu3??7BqVI_tMANn8EQJ2lamv)UgE4~kLZ!B*RGL@LE+I#(Rq^XgMj1PzFiqC z20bcyO^&0fnEOw&MxnuV+4XJS4E)X0J2ajKMMd#+{`j#KeN6y}0`$b|fWqnqA|oP% z5H*`YZ14yQ36ZvqL=H~wv$h;GD9l{JO604-0bK=v(;5+e{`2R#bfii0MPR@bj~~BD zxQI4=BZZb%1)j4LeS}TG9URU6ruKN>sadxzuOssAqpC(}=RJ}Blr-*cfG!kA_{7Yw zUn_BHI=i~~sA*^po<993$8|_Iv{GW%F3sG$;kNI?3(dJM$w<`U4h{~?T3TA90VN%< z>v}iyrAq?CXLI$idK{uge$QXLUVqw}|=*{=XABtbUex*Zw5Ifr<>DXKSL=Wv{&6ladJS?m`51zo|Tn$8&=fO;Q(2&ReGH= zQ%HPq5O*ySg2U{DmDgy=3Q>S?z&REDH9M+NfLC>V+7XNIGe)Y@d1=Xl^f9JW%H+^P zJ%+r8dd=j&f~h|5Q%cs=;NbGlYI2PB_V%Z9omURjrylU|$ema_AGXVF_%s0arOtR- zu;?SimI7@8=4RHmu5D4~{wvB6Wyvu`FHd{}4ix?JOudY6xzc@R98$MF5 zVFV&SR?@}P=XOv~&>K1iLhQ#%{^cc~uE`Ha{98{?SzB9MhxQI;7M7Yjhv+TW$ykG>JDKKi17{bp(;(u`~4U-QwnEy{X*? zLyajrqq<(+_xGov!RMV2(ujDkeV%0(h=d)h$)Ei9zub6BQ+<2O(LYIM|Kei#K|t`@ z!V@P>0FHYcMaL8?3q+)VsuQ-$iaJ=yA9c4YZ=}$3L!JT_EHmIRO*I0H1@b^nj^qYe zFOCRlH>LCEB~iH;rqona0<@At?}0!<{xCvfQ;)>1b&~?yResG8c@Rx`jPKH-q}R-7 zcasoVvC_l<=2A_2Zq$2V0MowOIQ4K~JpAb9&6@z_xSp$MJ9r>e$jZu6C?X;vr~<~N z0V~yuf`aHs7l~@0pryZM%N7S`XGMV94fw1U0er&F<)({1;kL91DJcw~A#ms)q{s*g zGJthRuIB*1(A!p!x7LRe_44IfB)$47)E2p=d8fGUyZ7!z$HppVk&SCL&!9MubsYrW z1k3-vskOEB#PQ>}G(<2Eo+#4DDNiU0=?SG69cyYTIs89ET8 z}ToI7>u6o<5{ZJhUC$Z+(Prl7xj8`9c0Cx(;jkfYZqbhot?ybB_(upbk*y}i7#gV#W8;Hoq zr(sMXFXjJZU&P!gx*29GQ4pePESWhtg0M1LO%nY4tN25Y*jv>lg{HJ`i}bokS_d3| z*(uAmSBv;*o0c(nL0_5jCU zi+p`UUmSIVmUST}W;3Fq6Giu%0a0mpaEwnkn~BSto106XALT*iGaDiedbYh`W`**R zBMQ(W;znsGRWj2XR&(zQg1A9LA$S=F3q`c2W}O;2En$I)Z8Jz()qTzmZf^H8LCnTP zMRgQ5+hpp-#>E+fh0B;SF1v(wg`RW=JORb%2F99f(#Wlc-Z%*b5a6ND*Cz zz$E@~xeW;7_!73?{nuCCtk?OOm+*r_?RkD^c!1W#owgC0@}lJq(BxBR)`ywDq= zH*Zo79Xez+hN2hc&CACZ-Bg0xd-m)ZFnUAy&<;VtFX8KQ?=w@sefu`(`t<|^hGThWnRWdT- zLBC->)YsPsOsEmsda}q<7fWZx|M5#6TkwB*#lW` zDdd)pA3rV?de51(<-6}Ll*7JiN_&07bs-?Vy&infH zYaU_Y)t8wzDWOGX;pV>a=+OrM1yn3#W>S^d0hmG3KmTHZ{wzb*cYbWehmRl4y|L2D zjhYH{#z_)Pt5++b_@+bKP5jv2z6k*fX)3me;38zy@CGHiI# zZY6c~HMwph239kPz2dfCpYxZgSy}CXxN1g^FRjzxN-xhYVi2Gj?~lk%K}p}ZeEIU} zR)gd^J|Q7SNV3TgG-?@Euv#$s9N^!4!wW> zGU$;vSx1p09UMfdsj2xlO5;|rTnW$#f`@6y!pZ*EYjWs~IVc>R5M3*XXcaTb3NSTf zR@+07Nt_|oH@YcaYaa>3pf{Qvq%12t{*Qf8;r6>Hc#2w1P7bX4GFn=bk+y=+sbCmR zj>OgK>1JAQ8F2glZXIMfD);e6BA?6P1Vvp=j4RO`XYkisQ0^f@2WvMJlg9-8@|V`^zEH(=_ci?(9Cu? z4zb}QWjaYnexOAQ2GWB8qINZ?^2;9MsH89L+quEVgf`7DEa0sdVDXqcTjVmJ1>Km^ z*w~0Jm5Ks6mSL0EDAnTpxEdO7+2j8afpc3jkmQ|z{nwn`1Vi=R9U7xU+3kc(B3=nd z#(pS|G1mnDPVfo`ZlVycl7fPQsB!5sfbY#bJQQ@hmsR7%bCpOGhF@PTZ(sOpE2-q@ zLZd-&K9F?e+b1If4c5K`&Z0W*p1)JZK^bLC0WzARBBIoT)-4AJ7^^w7$rv@`&oT}Z zL?h{bTpDN$X}&-$M4{nBHG6U9V=21&nkNTVytHd(L-|b@ez+U5zNb(?wT<{6Ted6* ze}!XxGxPra`!OI&bqc-q0%D8VeicOz^H%jms{k)A74lkb*U*suU($m|2gP}yUdU^* zPWyac+_T z8%PqwP+(aT9ry*hIDIS1{jE#>cEu6ZihldnY2JP=zaFx7y_pTKAZn9ox zZtw-nTr%(8zc)VPx~(0giQ(wP#25Ob>)^sNGB-c@-E$3Ts*EQXjsxsC^ncj_0JxU0 zt}uLTr9kfU`=dMVCmkP&(YX$yVvkri;~Z904czo5ALZ7;>l+&mB`AJSZ= zF1Q=-!-z|k8dD2rN`Fnv<%yO@tBC4Tv^Xg;(Oc7fiw$5}QIqxhT2>m3V(!j>~3|u~u3U1^%)Ffl1gHC@Iu1LtTd^2@&!qjqjWk`L6B> zdG;ZqgFzw>$t7K5bY<6tG+kMrdD8w*6dDcyw|VqNlZ0+vOCz%~7sq417miAZc;dYoEg2K2F;e=7pJRKMOnbH z1mVaI*2U<|8Crl67!&i%b*1P9|c76Rmfu$Kj;n+ zZGTxNZzc(Y3Lm$8*)npk#2HAWBoLeM$B>Q{p2cTZxJd8Gy(5OE8YhJSXG$>y$FZ(! zBGFL2Q3|;sd(6C{|9u-@G^4xrJiF}BSGxsd;8C}Wb*}$`hfvX9jkP=JFXmemoJ-pa zhU5A3=fxo5d2QSr9TUYnF9SO3zbY2-W$$eMuy6jS8;kRIriCS6Ma`p}E~`F3R08Ya zRnNpyC)K|2r2NrP1OtTVBeLb6G^;(=Ru zHL_UUl_|3NZ7CCF?b`O&HijEY-Fz*NX>zhWyjsJ)Uj$Midi&9zKQCg9h(%+7X`|G{ zo2#4oJalr;UD(?kQda#|m`Z-R(Wj~I-kMsPA7}Q3s>QCwalsOCZTAmKI5SvP7%MUE zBta#w=fyhrLWDQ=a6zRu2g}2|HB^@9`FEdK!Ez#ad%mvwjQdR=MNJ6~Z$@U86L#wD z2IIJQbRqKfYRw&*Qf9gwuui7~19K5{w2bVyQbi6ZRjHyI-l##q#iG z4OMTrec`QwLYDzHBI7}ZF@-7{k|N3%eW;{GCGeED9=YTF%iky*FJF51vA_8?yFBZT z51ZZB?4qO?7ICA75)aVa_DkYBWLJhI40nPL=$8deB$q*%fcOyuz*ajQPZWy1>wbKcE;FNKQ5cv95m)$<^}M{&YbK{6HlTZ9s=WQ1OW^$2noCTNXou6sE~| zcR%8HVuoXZ=k>{CtxjlO^)^dzkETNkKBT@DZ#{8rrxXQ#I!^}|mpYp?$SOaoBA1k} z;*j$t9mW>H@K{<^rIyx;78+DcNbGj}Bs$E@nUaJz-{6)>YVd(|_e*kR^HCi6-nYml zTzhOewyHiN7Q+MapZCXqCY&#pI3_6W(E$VxS12QZ+EBqX^GVv{)?+j!&GIq2W2h9p&N^P%=9}y41ACsoH zN_I|A7Q7oYmkzi91%{y#9Th1J-!WubO z()g~cfKrG3D52kADwjW9}$aC)owx(#sUyKFl==tF#8e{)JZ8V1qEujOykO% zoAa9H$cf1esiE2q$`0~Hjm=11GP}=g-?kX~oH?7_e$wqiuJ+-)u%W_x3p^8$vHX7^iDXSc1NP$j{DHx?`qnny#tuveE1L$A8(OqgIpe+`>~fZia2wj|IEyd6pg_|L_;AOGd#@% z&?yD4`{x>-iHb)sco3)jE}MD=Byu8m>=YLtgH?nxFPZG&A{f6-7F?n)A4{5 zY@Bv*$%Nq92aos+3UoDGsitnBSf~Dk6+Wwv2NEAUjGUmZN&$JIKKF5-wA)(NLkx*^ zvK8|SduF+!R>4GCik<9$0Ulh+TEc$9n$PVlNn*aTB0<9l0PDb!BdcJiAvKMGbLTQJ z>!o1*f^#Wh__jB9lVoVVfDRx+3JeE?v4`)?@cRoF{50sXX5C||+0M(GL?Htc8JuZy z9S0Uf@u}7jXB57#tG_N8IES>3vI`R$aGgHcO16nFiKT9}JTEV=PP)nRm%2g*-jgTM zk5d6Z$wTr6+vEuANm$CC1>8NMxdRH~6>x^ckd7tVAHps39H;-#smQ~ld(Pu_ntIY( zXqhNP42PIbJmp@$zTamOCt0z^biA5LA?2FnpGa$87AN6#qML%K$W5%h{QvAr?;CLtXPws{N) zA>3a*b@RuXSaaT?jec<%kb~rI=o4O2_q7?--&0zoy}Z24hF~mlaB?a`v#{w@0f{I#od^BF zA_n<%5vnU-MLPvPMtaq%Rl|7&VESNEsZTQ$v019}UG&GH0C?5PyNAvZ0?m;$P!&l9 zLcarHYZ0Apocdb>dftg91}Hlh;O`xz?ty@3@;ZoxoW0Zf`pNnS{yJqg6VN_nn!)*Z zMUJ8{M z=s<^H+=?SK&BQ=M+P~2>ac;qCNbIA?TJM{PYJuzI6BA3XJ#K1lzFa^cc|{8=2^~Ag zG87&dZeS!K!G(#0dm@H}c!Q8elOryE=RdPHW@@-SLeMsEV`$tHlj~~e^ zD3p3taGZ_&udX9`9lXYnHADL z-ULpQ+`AW9M!`O!pg@MyWC(?Xqe1AELqFl)XoEN;_In~foBo;FSRhKg`a4Xku6@D7 zhnx^rS5aWM-blnMxR;!Vn(8{v|H-;~Jm2^`wN-*X{r8Z*pBWDhgg~&f;kRyGNJ$aA zzEL_$cVtU|yp>fSO~VnSHJ58|&-}g6b+YFFKM5iaZ{GBSZD*HN?J@{hgpL9THFFC? zRNnfhbdYf@^?8sljwp{hdU`Pjud%_lC?X2`Em!*{l3O;a%x;Lso(b3Z% zFfeHQm#h%9b1)=;bm&IAQ4Tj_@=5K(hgYH>0LWxmvEp+75Ccxl!@Hm!j_sk*)Y7^H zf|O5CP^F%mjqMV$!AYCzsHnKDf?DXI5@%bA;qvV0>Qc9gBn9^F-T!S3z)08MD$l6@ z+U7;Xs)?9}pTaZF`=1d@EdFA9O*enfX1Ocu$lPBGCPTA?wVO)+$ASq224(`sF{ee0 zpPG9gV^kJ|MypG2K}x}TvAakE* zg7*(2M(}hVadWZC)Ge@}Mv^2Z3rx`LlKcVB&T;G?7Wi$G#mBHfOa?c+@gZq)$mIWT zIYm%Kzxa+a9TjE!jvZo-TL>Q1U+0@Guy{i|5auetB5Dfe@Q$4(G8E?kdJI@;QZho0 zXl~oOLk0Z5IM! zbf*hE*1<0=DxN&czv&Z&6MW(&kQ-sS*SFok?{xX*aIg5wFLXHic%4rf$!Y(fw%lOZ zFx|66h>n;=Wkz`xLSwWW2kidwynAM~>s)-v7J(Q2$%9^3Ls` z|AFYm#KhE_(O46sLlo#Fj6Gd~2kM@%zI86&0OdgCFHAHCK`Xj{pB)~#YWNLdG*}te zKmk+_2Rwo)%Ln6%mAa;OZjef_KP3?Fp$(T|iK&PfN2DDX+rBh_V}S!4MwiIMcd2#! z()AOToKBCfUAqSU9afWxnN=y~9Dd3fm`2zjPc>X z)Zcch^DLU9QfN3=(fF5uzm!{;U?nDc%z$$}665opmi`2XHfdTaZfvB-v^;dK7>qj7 zF)>Ntq8eH&lVL)_E$+vV#&eHJ>wV)N#3>YLbInS0Pa2e_wd>Mv1PAXnHg`Gd5;Bgu zCkjmtvW+Qv@Tc1|>y0RUlw#8ra;Gg01I{T;zbo5_n|X3HrHTr>IS2TSfPNN zUyErmjeZRsodgL1#frQ+9gUA-&X4R{a|1DjO{|aLce)0vyhqn^P*I2xejcV`kYRW4 z0W30XlHGuShJ2X=Ui`y{Z%bzIU7Rp5I2)a06FIlh0t8)R8hlcaPl(*MLDGQ&`;xfv z_e?#Q1b}*};P-`xJsW-6yU z?}UiUFQYuq&ORhB@0aos4*?)UL`|5C9z(m%3OhB4eI+GYazH>aghhYTO_Zwq1vdLq zc=&ou;ssy?W+S$U(2;}D`bkZC_631xvx&_f)48u9yqr*1zli@kT%{i-=PL^|dwg`0$1Z{n`xYE48xw`kQ%ZS}5@{;1ggDa6hiOU`3xBSp1?>``A9@944=rH1_ zs-m(MeC1l4J#vL6#Q6U5 zHvIDsANk$MhpvLRX15EcVkRw+Jyp)Qc~n~AEI$^184RH3&!7K2jR;(-qTj^YeVC3D z6-5chO+}aHgnsb|^=1SDW}qD*Nv9YTa6uxgfL=$Q5`YDN`y~$g;kicuS*Kr}TMm1v zIGNvue$9xaL1uyoi;N&qUVuqU%xC^Q1}8RwOE{05<_PtnQD>xJnj2XHMLY!F6%%+_ zGB~e2)6e7Ex>YF)MRyr_Hbetr=sao+8U2R@$tNkvfyvlg_3@aq1kr(0Bm>^c*uvs6 zCMotO*S)Ja1}z*(AA<989PWI$0hETWrITq@UteEXFo?qza>lLt2pA+#5%scc_k$3{ z^D~I&x&PzyRGoBcUR&h`s@2-{7M zQ&$G+wxh3a2R&8YZZ!RP=*m(FFp9%O?+FI(eH>)47Wp$)mOyrB=0QLXT|_vAnTK$D zRtp%t6hp-!jz1hWY245y?1Pr4;q^>-K!|-Bjk6@?p&=QC5C$D_o8bd<;CVcP1VYM{ z_{Scr$N=iu_CiL=4B_5-do4+RMxG(|4baBh?4-eTt32Gf9MaE4t>i$hwKag*fn zAQ<5#S4$qaQg)fChk^Tqxyqb=isD4<*Oyjzpm(u>qk?bgFa*nEWcW_E2nPx#P97Lf zcQikPu1?0X*cIr=+!h>4<^up{2eGtq4XANs5+_3zIgAw}0-!y z2l90RGfVb^2AFYtxd@h$+7Anb$60)m@R`OG$es#78iD{OJdo8gy4~IEU>~_}V3_B6 z3`{NN>$Un8nRjqn_yy4wN7(oQpQhV2@0l2EWW|Zy5BL$MAqX$}Yi!!_Gy@r{3Oo>D zHOe?qFsLY)R+GoWBWznd4cn{mtP9WroX~i zhunR?Vk_TX0$10-?hou01-j|MUZ<_->n;P|^1>(#00axiUHxksPrA55`!OEk&#X}I zij2@#g#1PfBWNv&pRK;?(W+t}IZn=^)38xC_b4LI3f1%Qw`mY2;wGe) zXZ5llAIq$-si`5;bik%iP)`2N5>UrarFti-xVr8EMP_b?X1W~eD;eE`)t_hIz76DI z84ev3g5@v>%Cph7T6AgcG^3oc2AC`hMkD6iogiftM$kZ-Q3Iiahjc#faXC}1%zwDJTGfBIVG562qZ~e{ Ls`yO7=;HqZQMqKs literal 0 HcmV?d00001 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/best_loss.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/best_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..b7ae2425ffaaff8826b86a13d55d39f00f3537dc GIT binary patch literal 26093 zcmb@u1z1=8wl(?}>5@i}1_41rkS>uwl1hg(f+!%}jdX~F0)ik)NyjTG4F(dTNF$AO zDxG&O+~4`m+2`Ev-upb)XK#IVvFaCdjycAdD^mTA0ueqfJ_>~*Qc^@~qEHx0DAc)+ zxY+P7bF_BI0Wnv3eOE09D_0M5=liJJ=B|#999$n+-(zyW@9bjjU@yok#w&1@>7lEu zql-8npWT1_0I!4d13u27Y)!~N|~ z=VQC-Z*1Nqqf{r6t#xqs4oyDv-i*nDc+S8^`?aJZ+dK3sOU-&b0KH6oKUC<1;} zv}f}M!k>A(_Y_&+SMOuyNcgKFP8b{VSNe+y$WOVN{XZO3!VOZ-zbq+v6uBc9aN-Mp z%W-RKYb(qN;~3r@C^oe9o6r`rx3~9PtsKnem3#YA#Nj3hW|3)=$&#k4H~gcSy}gNf zMsE}O`=n^6{~4X!PFZ^4?jA+*uu+88i)$H!)$5KH0Ny zT^=BckB@)&v&JcAu-5O;UEjb!*Lp1AWFO~}h&}pnXZ3rjsm!C1iu}~VtPU7TCD+AX z`DHh_C-bu}9ThTM92_{gxw(nsYCm#sf3L9q!PWoJ+IoM;JMUn@$KKY!xG-F~;^Ft{ z@&G;=xF2Vt^2*OzEpqLnqoZ$`@&s26s|Zz+uI6uwOSmqm|8lGISbyrXyLJxsHBGE7 zj*XePS|h#x-2*jGAD@|p?^(EngdzF)SH?YOh)~T*Jcc9`6qpJM3NK!~XzuJJ5Pa|z z?e1PRAnZz1U0YlGCgAjBsLq4W*U!)V-n}5*VuPu}^`^n4x3)FKuKJT10VlFe{>Lyu z#MIQ(sxO6!_V@PM5e z|68M<&(YpYtbt#jjSv-|389*r8a&2pjjUT6o0~1krvCYhF6-k`_1go6j!sUGHS^Ih zX5TAq_2@($UmGm>YJ1IBx#~Y0{^!1aqp#S{u`zRd`=^0{fdX<|2Wim-1LsjRG&C=y z{lt+&j~)>|cKtgfruH9v9Z@{}3Bg+Y`CCxLmjH z#q5#%L->?lLN|9kUBZLkvYkX)*$RG;k&|Du>ErBraqWDD z|3Us7W9*9;7q^xNFKFeeK2AxYDKTk?x~!5k*_W?%KWxQSEZ{^k_x8)M6u}3rD~n!#xo#a$PkovNQu($mK@H{U=;ms_tSq_~*J|78C`{mzJOjn6Kx zRo9DiDE;@YSLS*$Vq>KbaL4LA6wJ+;kteLExZYiLE0!U~W&Z2bGc{@1&Lp1rq@*zT zE3=SL<;apBtOY7mZrvgjsw-WB`uJeGE#{Jl+5P)Di+U+8n{3;&v$L-i zVi^<_6`NOv-%ZWT1gj+TPR+#1+|t&jAf^*l|0U*dP!n(}jWQdr_i}V}yrHX01yk|# zm6Xr)WJ~bE@H-9^lZXhl-@#T3tPBz|vLM(+_XbOh#RJyu$LsN66%$?(CKMJHR#a8} zu+(4pz1%{ zr1{GiJeb+y_>7MqBFV2k8sdMEn##h&go0Q8_(M&m*z5FQd%3;4J2W;{Eyc~x*O!Ef zDy+U)zz)>+`N~NUvBoIy5wNI-XSl{%QZu zywH00OA$5St-+uD{W{wD+SA2`)z|GHY-Gm7#O&>iI@pd7GfH2@Ai2OOU8KW-+Jvat zKVk(D0}X@Jz3kAqe+!doZTOu+;l!vz6AuPSz}{3ier&$$MAPw(VXa>n(Is}SMCEI| zymtzvL`C(l3kgyF=FA#>C)L#JN`b144GVNp`S_)* zQ1IYZ-ko<c$|oW!-&el{(0k zh|gnKRn&J%j~^6x?mYIx%Kg2)hs8d#?W7R1o~K-w_TAg~Y~S<`F}-*QJaF5`kMf1b zm4ha7ckVp3a`uIBcRV@rs{iv8ZZFdkkqEvQ9GrU;XRKwFpwyjz@gl+ypSC%#xCluhlu>e~_%6 zuB<#t4+uZlT`%1pfuwiiORaZBLxJ91uHH9tI7Ry980T?_(be{28-N66VEaM1?lThd z)0fxVF8lYvZ7O!wmzJU+w9!fX){N}P_J=b^E?rkp_$**| z_wL<=f#ORiCx;GKP3kXBO-+gV9bB)n8^wNY**@j;$j+{_;!0wNaU zlUH+YHa1)!U#f)w%xhGmKDT%E>Q$yF7RYd0<7zrrO&jUR*;Q}A@*}0Cje=R5hP1T+ z`);5oyM96f2Pa&>4pvh$?Ccn)nNQ{bqD0-6DDGrP$9#&pw0z`aW?_-FJu);@^K+-* z=%_FglHbBalMLMJb$;!Zhr*!xCL{}HTnt(v8}0nn8599%x!G&B_t$}sfJu}!^nuveP!q|Bs~@} zF&Y3*{j~uc92|gJ4CZ}ezdrb$BRuzeK~7H!LOB1mYgYvGPp5V}wP?o~($OY~X4`*y zWU?OgywY#*uGIChvdSG;?l>9Gf#?e3_Cmyg!Rl*TzI9Je&tO-807WJz#@@uf?RdQy zq-+vON}XH>NIkGVxuh8Lw{*|p;o;pc8PZKG2{@mSkl=a}dr2u)FQI+%`H6+Tx7(Y( zfRjIrS317uL)d7CcmsK$jL33N%!d<7N zMvtjhtl}Md3HJr#a32nH3a<%wJ5+;GzXp=sGMY5lRQiU77Ds!V$Us84Ds!5-l_nElvebM1(-WFtfRh*i zGFzuVM#J8EEakIf25S(-bxLV!>_LXDd~>MY>ra`>{GF|(egXueJbLs9xx~syB>`mq zeqVw|@Ms%5J7G;tGPEN0PYkQ=%|Av_I=Z<5u;Gb&_39~90OlVeNFVkWP{QVLJlL{c z8LQJPz3}|tVaM|;xI{$qyK7^cCx`28pFiW+RMI**ISn-h$RIEZV%M@!wZ-SB8(Ujv zfKOW+)7IAZ(L^Kt=UB$Ux~GuFEWT&TBk$(DweT2lz*VCflC`xpPA;zAEg#svn;)r7 zC;RnGKXPRF22t@Cn8D3bC+akll9Cq28ziJpk6ea^hg-V4i4>mFwY+y*=8~e&8Hi)S&H-aF4b;jVv=Ot1xO) zUs8x(k4sBS`+l9{+z&tg_D1f7z7eQKP$qpf`~u^x zo>x*%(;x7VolVYh2W@Z9TVG$l(RM-R;aEnFUa1L8&PR_4zwjqd2t`FjCx;(oe1yzv zZfS{`s2mEDGXoV~D`ZUo;t^KesZ(%fpBQr$^EE-iwsMQl8*qYocdAg0(+oC(uVL#z z_j!wcY3@Yu(?*Zeh=2BI#=RF;YSx6p>z~@md*&Jfmr*OFUV|^xh`r}*pJap z_#g5hh+fF+XI~!$63QU`UvrsL?Oq?7>wZak@nT+Ox2gX=7Q|Rs*MjPR3QLUZSgu}G zT^}h>fm+kV>sO+{DijAbUVqqqeSORBcT)Ktu2#2qbOgo35WVt0us+)Qp|&too;4s| zY*2aA(^FLXXhV*WLGmJgC5;+U*2DN7hYZULp zeK)@=Ff#MBIq`hZj6NT(&h-PSi|3)j^<*Zogg!dKLSv55^t#9yTNh;PpYmcHed{@xc zwMHiMjd=QZ-S+2AlWnu!2e;XwwH>xshRI0;RE3ys)q!uuJCs0+R1&m7Q%li(P(xK1}+pP(Mj2DRXmk z4HZhA()UMyVxyu8He}nWsHk8=7k!NS-_(f6GlUfBeN0|ms93B!18WO_2>*~$A{1mq zF`Q8lszaVV`(fd*(ix{M@3HnXzv{t02`Q<@YE*1&z{;xr#e{-9SC;#}zLHmYcrKTO zL3O|dz~($JFK^<(O-wmW%0%Mwy9NdVY@{$=FFI5c<9(-h3Umr5*Vc#u2g%6D&`G!# z{cJ34*rCOg<$MQ3!mUEnhYvZCqBch}66!_^2*+v_rtqLPo@_))2wni>%>Upk9X#c0 zhAlN&c~q}UF!Ayd&wP%>Q5X2&cDQ5rXsArqw!v+A;0=_Jknnilb$@xrkp*Z+ z{~t>{6W>kwqs`BZqK=cOr;IWti^pVeWhL5#FkI)EZb9Pl=iLovEB@oiA zKZF0Y^dy{?R)YUPyU{Q@8h*C?MLi|!eop(A^9$O}Hv$QAygo-fq%fX&-q+aruPJ1% zn@{u2SByNzr%`?Ki(e=~?!FP(E7d2n{MU4w`xJ@EX9AMLgZfVoo>y_3-Mbnb8J{`Z z5I%4%;ZG0?De3pDiwR3F?rDTX#?#e?eoU=8ViEcs-KQvL9<%a8kynzeh=TuGLQ9{% z2D_tmDBq02;CK4o=lFMX>`9c-XzgTvcm%!*msWz+wAZ@P?R=W7Z(rlWh1z*IZ`qq) zb3QqCu$TRkpr!xhXAsLQRbIT7en@?g!ogA9opILXISLBdNm`YR6ykTQT-!R zD%@x(+M=iqOx|MYSj@8en?mrBHS^7M`8qLRLDY_C(6|1JrVI(mOSvmzuE-L6`57L+5X^Fom4A$U zxuuEzzDr$HMK~AZQgXargoJgiqGw$6hb>78ihRv}?y84+PaBS)qohWTdJFo26ytSG}wBJ`sTTh2W5lq~Nw<17>WLB;@o_rp>3%3EHHLDmVLcUMt3Y*fP598FeZm znHW}3N3$P07FMhIhUUHPfqEcU1K0ignu2n#(CDyR89fdrN-PO4%5P%p84A{nq%Qi( zH053N=>6@Aofm>rTa%m;TXe)?bi3n2f3|A+XqozziV|rlagKuAxfqE00+1!e@8rj| zjeR9tbAKPUeHPjaCCg4 z_!^^XmOCp$iSFCjFqLLG|5hNOEh|z?GWEr>uMDi_YDPS@3#N(Qaas`-0m;$TZsDAl z1TAVLC7(ag5*I1RzI|U?aQ6=jO9B~;qSOPMq8kK01Qlh~-}e1={o@DF&a3>1(OW-n z{_NpxK3>5{Q&SwQC6aK-v0C{0aqf+|lr6cakbrpv3rp~aICNig3OD7$UuKcI#P2!y zt!620Ftm^4+PpV+RZdxpl}=g?!P6O{RBpCzB(B0=G(Bo6ey4WCDg>F1k$_}ogO zA@qIz&r9a~D9|k!)F|X7+pk=8*X*-r_woG*Ab{&4SAyuOBQmi>$C?;}d zINUI2tqo)}T@Sf&p>9N+j-|jtlp{Y%lq0fzBXM_Qs?$TjQRoV}``o7?vHVML9nEBA zbbp8oXgyplioC(mF;|Qpsgj}$Z~K^-Uo$aq!_XrW+*sqAFIz0;ttUFCasEToHX0qP zPV=SQJDcySHhx;|HY#Jamdi~E&2|ovj_7}`bZrEbR-8u_N%)_y<7HDeA|>q?hC`Xo zn3Yb=dko#@P=ZyjK5zTnCDTKrD-n>Ehw&rWAhT0TxSf~tQ%FV+%iGtAO9>HqNiadA zIyab;&3|nWKfL$mLuK9F7Y#i4N1FT^ZOw6L^w!1OSGD)NL-=L|&NV5FxAX*jRx&0d z?Z-gwr@Sz0AAGl^7=z6x7Ze_94w+w>Yd%pwH8kp3)RNMch9l|Fk*>QDMElWGvb z)UxzyapS0j5ZVx+_I{fYwq`xMTmo?kLynA8gR;Kw&6Qj3_ShPjxsM{zt-*BpDoC7x zSJ0Mcyzr1HBUe|k^U8-0G4pYz+E-yTwPV;WC>81!2i4U{Xme459OMYJJpSSQ{JaGe zAW*@E0-y!pzInVmVLQGKG%tb+B~Oq3q)Pvp@8SE?7#u7YL(fXuucjPIS9t3nGd`d6 zD@V4)=2vdbWHh>8k@?rx>PN#tSG01?`U`XxMk={a{$!joiHcrK<}v3r2fP=S=( zx=3{oJd4oHn>RbQCP12c3fO39NFNkH6zb>CpA`TAl1J*j1XXX}?hIiIz1dQzfY#>m z_7-l*eqdnstL0+X2o)Bef{LfOzM?S|sTA{y5)|EI|SX012hz|~z zTi^hgDu)72R6-)_erFOQB{{pebdL2vVaJb9@+m3Sm7hO+K*)Ni*^v_Po`nT>k%){8 zBS=pM1f&}IUm6Tw8xly&QYA^^iU^jf!=hclQ=&?M5?bBJh|tH!r`R2SW4nBL(TD@M zS>Odee~hLs2O$xmrvQ{A0)X3&4A5%4CiQu>nmPpl;D*Y~&jTeIUTHhXa^(u%J7(Z74wnhNwdsn{10OMGOJa~adgets28cVi8_3v9P8Mcn*+PdqcZh|7)tci<;02!P;b;}ly?Y%^ zE+v}zAvrlY12@rIdBojC*Qx_-QOys0N~X8E50PM@bNQ~KfUs~_Mh0VRYiq?wTWc!` z)YdNPP)tulRR+~^Z-sH4JCQ&N;q8~g{Y!^Xm9oHm0g2@OhzIn`8s}g5boBIvj%;Z3 z$Sn`2kI}2?>e;+rBx?;_zQ|i}#ktnl0ovY!;(r6S)=Wntx992cE-5v2_~y*#^PDu( z-ay<49V{&k){kK0{zHiwd2FR&%$ z_denyARQEi&rq9{gQ`*U{yhc?syKMCw~G$IdqYKNe6B70LipRa9H6XYT3A>B)vWo>6lU+fO;PibhCjbx(oE@)_L1q3 z5DW*46dp^Hzs6-)?`8k|bJN>fe4c|;qt@M|CfFk&5``-A>u2n#8jzY#h_DHl&X`CD zsEdinlU`n*2^p+=e-4#5b8HOe@Ql%*5 zyd>A{vbG8)HB|FY8Q!VsRo6&vmdq$5bM$B8Nu_3a6tZ)2YR;rXXnoa&Gey>~LkOiw zr6N0dnXjTx&Cm;`HGksVV=BDLRF8Yi1<_~sOG(Tp)s5W>F25OeUN2#D_4_gsGsTgs zDm9M|;}e)ZJde_^;-lp#d8-H+P6~MxMHV@U_mh-Ox$4o)J}+6~p18Pd^lG;+;*_U=}D zO?o5ePy_T$kH<73k&S>tl1e&B-=}jkKG$(jz8j-nMor0=o#fGgztGwWsuye`dDVuQ z>Rw+Y_m_@N3u1xEwE2d-+7fQ4QCMP|#OugVvcWYg^PMWr2o{IyN^F!!@)OUF48=E% z-3T>epWJPi*xPrI#!Xh_LPpF@mBZsw7rs3fN`G%cCE|(Nvnr-c?6WB#rai2X2x_2L z*S~si`jbmdhA|VoI1w@Ol=N)47)iuCkL%KD?BD*n*O8=1@dsYK^6bQ?|ESZLZ*!XF zJ+Tz^US(($r(i`+#6LT6preH>qp^<)wE=-WM1e!ah*YDk1$n>EM8EB#;?rNsQarw5 z$P&N7qyQ_0784og^BbFi0~^*hp%rO2ReyRC@5_ILnbTfJk~a-1=jKy~TYLXC6aMQM zF7+iX-)j|8nCJEuXL*hooDAWbOG0PY?CpGq$1e2)1I2G^NRjzW01|uN3F4a(;D6>* zeWRMny`AT(7m2-<$s0fe{VK?CVc+5<-}qHV?D&r+v+(6hjA15|!H$tO$jrr``{$bA zClOXZb2jQS2`;?lrxS7S*)qLBSc+)B;r7rfruNwIJW6`hh4QV#cybL(=0PZOC@^Mi zI*NKk?2V%FZ>FC(>;Vt)$0CDIRCutL(c^+D~xyEMHag z|NG`Hpgvm<=VICC^aX}+P-Wsr;;pTjYF#!SL!q<+Vm=i2J+dZX z5pYBzCAb|Huf!$FSL8c3KCz zE+&%;@~_9L!24O2{U5k2quOOc8Uak43k)A1X_J5h0iymJAd?OJfM<(;@glDh6myOd zKGOI%0GSaK8Hd-LB)GY^?OG2rMZ+fhe8Xf1L?uKu*DW9g zBI5jqPoG*peY!O-JXmTP3vh{vho^XCr%>hAE$pJAB2W>CRa82ai{;STFB`>SuzcnpW*3CXG=k) zK;j{T^TR(T#Go7Es!q}@c{P^oH*X%0i>fq1D+yvU15eLFUp_fpJp5N@vIST+QVW%g z-@CSeHt?;)SOW1^HJu!A8&o}t1Sd^F$01fAXw=M7QpWE-e0F(~5wRrAt;Gt#5yv8w z5T}G~W;AOshc~j_Opz*KWo3maxwF%#LFvvNUYEqT&$F^v)6&z;?{7Cbx$*JwMPx>V zgkYvGw_6Iht_*3oT+Zh@#6sn0;;E>p0Cm_-;wfQhK1E=MLUr!Q&wSXXfXpF4m=MYa zd2w%lpBd=+LMhOL0e^`PE&@IVMJ>3f=<4lJ0A@_jzI>~&)=FQ7yR^3}ii(TtdNlzL zVE<5onsCP!zY@2;`U-~9qF1bGWZvUv|G%ILsAQz#suKrf0NIe}-Jdgz*0b=22+wvy<6%T1}k7eiP-kWL-1>;Icb+w55`Z$rEo*wGP zy?dDh4bffQ-K5|#QF_LXAP4|07X>VFLCh~&dTz!ZU{DbZ0P2P#5bPiw98_*21ZhRJtQdI)aI?_?=>$JU^hz&zszb*uVn)ANvOe{lkozECgq z@rXE-g&M2ZA`T&V#a|B^5uIFRP&61!R3;%?$#AMjmZ@P7CR(B5owbXP&P755dcD7kN4R4GvV7V@*OFm zBWTO+tO>suuBJqpO$G2V0}U|}6tRRP=}c^UBN2}+yO}^B^T@-nX zl8t?CX~$~8w$%zF)nT&8dl2*@ATURu2oeD;{0{9<49gl^n<|vM29b+R49OiBs0!z@ z4{!M@u~5EV{Mb|9zd-(G<3SGfUYs4JV3iOF=ETsdLbFWCK828^6oy=|!dLROkZ-C1 zGtXNRcn~>%jdJ8z(AhCgN@rieR_Ykl7)NPl`e+Z6c!)7dNE*3n-_3}eh-h`G9KR<2AyI{GY)Em0%GRX_&u26?;Ulxw^kR%YXj$=SS-c?JlM&`1|5oj6koQx?b+aIpUks=$!`g`>0p zN1>Q&Lr^oizqO+ecjD>q{|ZO9&}UOL@;^keg7-FfO4ioa{pB?9DmY>8t}SN&21|7^ zJroT<8>o6ToarS6bw1)}93G~b@5v|~n2v~O-#`#6pLbj5cd{#Yu{?K^T{vfh9lxL! z=@Ss1p!wrREEHl(gSrEZZa7ds?o9??C}_9=ut<{q(_cWF^Pd21{P-5J637gxaTOrh zBZ(W5AXr+k!BAqniUqjH-1(hg+bNMX4octS2-{uZGOW>fe>iJkU?3uvuQqE5GV=Gg zYK*8r&L~daITxRZh=qWEj~)pCm^yw)un~dj4zaJi>yeHIkpmM4=RAsql@*)>8h`J0 z$ka}rK4uqZto{T~CPV&?Y&`51ov3l<#c009=DPL9Xpq%dApw934W%45HSRS}-$0;W zX+sH~Q}DP>&d-N~YNIm*nhvAOT-W2mLhk3vms3R?o`N1^^5+DYk?+NZBH(do0|5*% z73OQbt*E+o=9GS2399viu>|avS2uav^jO9J9g@3c+?=2UR|G$tDX|J=;)qwCi5ZQF7ZJc2-|Dp4bdP4wY0UJ|MKMvC)dHmX<|rtxCK~P zals(0oYUHyqe2R@J9s2ohKKL|CgxP%RG;uH8HCYKEMO%I?NZyOw> zF4QX>sPOXgGM{XIjEjeNz3~O0fqaPmWVHRadEM%qN3=xJB06qN(GC}FKgUDukP9TGL6(-2aIK7wzYpW;8U{- z?#a-|NIcYJY12t7VqD5mNp1zt15Aj2PPLZ0dLVctSvWZr3#GuG!f_|8#dSmw;0TL&Syu-7BF6qWxJ(EUpE_^a{Xb6pqi~IbQ!(0Eqsg-Q$%K$Q`_cmw1 z$;pB^QV})`SU1FnR(qHt=E4rZMrfk-ymE<|-B8&%(0bcobs?@}(B&YrC)K6@iycbp z9c3aR)%b-M6xd_B`EPZXwgPNl$n`?z_fAiaQ#W80bJb^RF@XAR5}m1^Tm0OhIE)E* zsK$v^+-)hu|MMetSNS{Wv0G z%i@hd>ykzzaK?^XBT~L@oVOrNxb{l0Ngb}dyC`B$2VSyp+rbj+Id%YscwkFO9Dkag zPCx7Hl~Q;M7V!NN-b80t*N}Jb1OaYew=*#@A(1BqCzXL;GLK;x;%PYCT@MGVY-lBr zc5lER8WDNrl&|1F*d(;hkN=^TybE&#E>-Mv=g!r5tTWt{lLKEA0Sb|!!!9tIEUsR> zbg8YU=K@#+3|8Sc*gJi-cfbqmH2;;}^8WFq)VSSyIBM5#4N5Aa?>2%n z77XELU`x@jbLZhTevb(ltt(ZCpxR;L6F6p3C@2axe*eCi%xjF8bU^4v^lh;I;|!D- z6TG&Jumg~~_bb`-8(8VP8&G!j>zKA_-(P4gb)DDeS z<1H<_D0lLQnwKp*D_w>4=)j$tld}FYK|h_JbklaD8W{@6C3Dc@5e5xYHZVDv8oc9$ zLWH_Bh-n^ZU@9=4eE5I{LeTMd6jknp~_GNj+Ym^QUpEMYUjH#Du=$TrutS>*gjwaQInjhQU(f>#pt@FBPzIvQFA86T)jg4m5UvJc(8%2Og zxE1^&2<8TfU$?>ed@E8NHzcjce(I1rZT_J6RAziNDrtubaMQMWOc)!q3_#X?jTEA* zz@;PZGYBMGr9&tLKIoL*tBe@*7Y((zcEz*fD`7zWC?ypaYkp^@ckZqA8ar$2d?B?w zZNw~c^~C`&TsEL?m=Z6e6u2qxY5lWlzJLY5lP$+Xkrr$gFbmNTvY4c$8DL9*Ax;qX z)7wEF6v*og_bsfRINUmDRpd4E&?aj<{gc~wS>t@}ZuKa*g@AS3SSl>d%*(??nORzf z;E}UWLar>_!GL?3Z`xVK#J+Nf@si+Ww%nzxu(iPosP2&JOUo12*_T!l6BDcPKNd%8 zKw`mx92Ez%4a-Z%BgTzv9$vDaPJ`l_mkXPFTGM$SmCNpTmcl+!DI@Rr`qn&KqJWT) zJh*YdqJLgXOA9g31I4a0MhDdZgdbd*J}QRWT}~pbjO%t$H#Eu(mj20mobL=siKRfv zN*Sw>{uhp*h1f~d?T^39Y+=}m2oZ$Ue;3Xww{-j>*P-_xAdm>}?2z?;1%V^{Yo8T* z3%9CA5ohew;rH*~)r<(?_Pp08^M_@ z!4~&zLL3zKTlAUR9&>-$+gWhSSIP7~t6~Tfs^3vk)qlfAl|3@Oe#b~HzJ{g10l}E< zW<<*iK`s8b-%vdUa+>Lil-g249z#Bip0eA*w{t;E&#hrRfHz=ZU_e^EzCqr%xe^Dj zED0pQ_vmo|!t{F21UCPrCGscyp+!lFH3UQ-?5swE!h^(Zu=F9_3J|iKjt|_RVkZT# z2=<=~Cm=(l<|PA?kB%&-LvV1Y=Hq|Wn!p7tMJi7?)%Cq}XpDpvXb$nuU=;~*fJe&Q zR}H#eid+CSqZJIm1(ieZLp*-6vlJNa7%R7q%A{kVif9+3{q5+1(pzFdLLIRZ*prYP z0H)oFk<E@P=KmPQ}OwU@f|$idaE)s5Z+1VdLJe*XwnO9@#NAhLp_6H;?}{(jZxOpD3em{GsU=rQ4W=7y(tnk}8&MWD zsm~)Fh>wHwWnXC~%VR6APYI)tJPy7^TeO10c@((au37i8J-K)d0~Po3W#N96q2I?T zVV2DIwu7N))uUf@6awdvU}TrJ=PSackp1(oQ+w!)_#+w@^_GfD{WYNeIis&d!dAmLO*W zc%Fj3Di4p{)I2a%DLTaPJb!VzZlR*f5GCuXd3^jysGqQb!@dc6htxSKkmf0nY~05E zUc!3f^?Z{~e&TaNOl+j|=}GZCsW>wUK5?DKWxzE6?wvsCgC?N@_9bQr6~aMGlh2Yz zru$-8MKj`_u0NeX^k+b^gdyWjz!(lFDos600%nze$gqI_Ol^X2vd1B&G9A^fzCfRl zboFi!ji42iq$C}f2HV@)zZoEovsBm$-?Q#pfG6de#j+Pg;r>T^Z^;ISb4X_COmf_pH{`Ooj@jqiU~YZdx`-GJ3WA zus!p^M$xk3Z}Or+3cqtlAuBdn3iR}B-smO%an*mKJTJquZUEt=v3&9=<1{<-SsjsZ z(=e6*O;gTToc3X2HUTwn^XTX$(7(V3npai|py(I(Wf%&17>akY(w_DoHPYTYG0L3! z5tL%rC>c}rYb2)Dj6t_vx5&sywL>iQe7yEM+_|4rAlc^*y0D{%N4RoM5J+xF!<_jk zKwrqRIAB!;Z`(sKx;H|1OR@BuH*X$}y%&P=2ONnppi{|g0;ikj4>qEY;eIOi7Jr6c z6-G2=usX?W>*;W@jnz1v2T^k1NDO3Aki|IP0RUdA`~#`X42;(=1Rp>oWcdcw6Vz>b zd!Emrxo#4C#`@Lv^qE;%<<>vgi+zNTw+BrTuOZSs2BNnGbcG_k)d~2u5!RQqpApCg zWR#roqprD(VmZ#?EwU`8W&gjcPO@BHBG?9K-kp`gw7+I`P0g4lN1i)SWLcf)+VwuwFa=sx69=yP)3mwdD&w85z?* zsko*NHWW%gtv+pJL~CUi-Fh%6@jE3u`^_$WH8nH?sY&l+0(BVi%hJ+~ znx7RwgGx4HH(Q)^=htm_oE?!0c~!>)i7y;>DubkFAeaz-MS47fw*>$2+|q5pTq%W; zIS@RiR#phPxw&t@1lWq!K;OKHfog7PdDCjAd%p?wTARxpFjplBY5dfmhxeZD9#&wY zt{$yl1`6sKq(E*M!P5HQ_&e(prynGs@#g=aKOaoA;guTKVW+5uK`D%Z!oa}jc;teV zTF9*+U^g`kchj!|v&UMB>r!PpTRnz*gL00|3z{h*afy2UW*e!pBLH!@cz42Qy+PT* z_Wywn)enS;^WA8{@tlbfxlUfqq4w$t7&|S#BwY4{mIEl> z1E3y14-@Tq=G`QU1_GkDW%b)>p@pd9FJ1s`!PKJ?D#lbws_TZRn!38=rfag_4owh8 z3PK)$w1Vs4;NS}vL;4g@lh7R223C7gXx~{3*k00s7CAgHwQ@iY&o$tdpgT&RJOPDN zw0jF#u?Um~xIY8Uo-MCsgSi?eIwldox|ETKV^Jo#LQ9)M%e->T@bJ-_=kM7HtRSEM z8Mf0o(>6~js0ufy2Lq-d$sAO=H5bJD1ODq*1^ZnC0z66k^=>)SNC$kx!A}Vs(@+|s zPXutp_C~o|DHYmgaucc)Cv(j``f24`0HDbo+gH$wEUdl120ll z*)>669uscl+|)#)G4TlqgaUXkI5f)x(cbdmfn?dPCKIA6X<&d*lCD2b}MA0_Ij(~*h>)j4vtxRUGM>V*cc`< zIQl^I3DWx(5gjcjEBkoB2LN*6-^UZqQ6pGsN(&kC8P>1)BXHuNC;_m|AbU448%8|~ z_BY}|Kz3|v>rDhr154lQZG;Qk^NkkGm(6`MMqCUMZ{Bm^SH}^ zj{5I!gV1gVMB(m40Hf_dQ9e>TL}Uv~wUgUdQkt zP{)JU1sA2Lq5=(tceZt5V{7BQxY$`fd!G^2oU64d%J7T@^28FZxanSmYs zg`}4N(u;!hsz8-pa(1(&ps#Af)BPwtRCD+n>bOKxHaPRgR-;-kG&{v7C*we`7XUA$ zksfRi;ZUuBdzrx2b~&}6^3D`!6A`qpR*qOCtEV}^Ac|b*s_%EueihY(b^=E;Gg%(} ztRY1@%joE6cWzfAaF{k%=2WSrefZG z_jCo!-V;uT=^o=PVDksg2nGBi;znv%pPA9J`-rS@`6Mn@#4!S8WDMh-pUr=yi8ov- zk$7*F`0Cl#r%MiI(B}kSP|z5T!Nq_ojR}{N*ZyKMy%+~fQ0}QagJBQ*Wd0|sp<@$~ zyeG4;g<>^W$*X@ki-D&E;l9je^z;Th^lX}uH|CUjOVZ=-gb8j%&$`cAFDO?oyD4p5 zgng;!T~mBrMd&#Rvb05Yb8qd<=2UsQw9ynv(MFp&C{%b@vq9lhu2lwXfepzsb z$Gx(%i-q_i%Y(sqHoB=>so!zxsz(!!i$O#!Eo_>leuuK&TQXaS(Z#4bi) z%6NeDBS;dKZ^2^|X6u$9ERdrL@)1Brm{6H=jjCZG2Jcdjxz3)OJue&C` zs^NL|B@SpZ_S>m_uD7wFha)m#K?6~DO7hU2Vg}Q(k;djl-wxeFwA*ZEpN^)*!kcK9 zWARlzuFNMbVY4!G+jGnBe8Zu=7?(Zo;t0b>ODExSd9DcxrH$>Wi(u%Dt6}jbOC(mZ zsZ5mvG6fyGc~gQza&&|t^dk#MS{PAL{V>2fA6hh_y0Za8gfz_pkxC+lM)i;KIsGE2 z$tjWAja26y*g`@2u^h6K*C#wt`uZHw4?ZF*WYTW@Qz{5Xw_CF1w2Y4$0`CdU@6giz zJ;d}^G=ZWhqUQS1#yn!jk~+I!KZyc2d6!q??3-#>TAyN`KLQ=b50Q#U`(MAIH{PfS zrw6P2p5roBp4f^NKQh-5HtLIIFZ0Xk-rA3UR^xC0L!}Pbf?=f+xd50en$$%FUiK+v zIq`oAi~0WFiv`7}8-E`j!xqE(h=@cPmgcCx*LtkVZuX^98e#vYy}A6wC^?XYg9t(B z$`9RN2^l5z1Xig2IphL!2(bM+=-+XG-?<>=$mM9Z0Q&|d7k9M-v^ybQ0iyqXrOJv|DyZiRul4y~;- z^U7VJE>{=Y6D&vW|8HCaGjIo_+vNIn3dlym;2ZPaU5f!4Y-uF`s!}8P<_A3f z51aL$wSvNrh615-Fkc<1R8&^JX=cU*z0IA)A^;Ylq)r94M81cWorUFq5sKCxQ)?WH zT0)#s_Ln@p$64C!I| z{<~vQe{mJofAjb6(9sNtEeam@39Zml`DlAj&u3`XceM|PQkR|P%l$oHM+r_#j2#o@ zRtQ&+^8>DA|J;Y~d0g|}dboFZ$O^6CkauOF$@x<(;{_BnXHwZuG{z8J5(Xi(NdKIO zhzKMRY+zaUAY&ta-rv4`6Bspt#ez*thihm!DzJV#Lz@0PHsKAh{KloFJppR=KbaFj zr+!oU?DF{Hug3t+5LXVaX9rc)9B_Fy8{p0PAK721^s!KE?CdsUg88psqkgRDf(>Q{FY~rrmNokcrZ;3!RbEwa<;`}fs95G)n>+bf6o%4 z3~Y@t!g~l|Mrv(4+K2Dp7J!f9e{d+D{fk1;NaL@xsvWKh;y2KS@d=5sp%Q#5#&?3x zG*|6#NQ?U7LpTEGhTPYLiH#e$-IR!^&H^F*%xD*_MTR>Ls%zYxl*TKROs|jsZ@`t+ z%GeHhS$riy;P=KC#A*G5>0bvx61__QlQ;LCR%1lc@!&~AeqX|vke4vCJzgWx#*P73zv?X$?~_9DL=Lj`*uh4PoC2`3-aVk^m; zI{Q>p!3+k-&zUZQsLO5kVpwnn(b*Z;X1b2tqIdHVO7IM1q+y?Z@Tnc8pZA31xAT{> z(F1akze0V%=~?I|M8_t@Y(|-CMX>N&EMv`5QmGhgHk^%(cqxZ~!exboi@zaPOm&w-0U)ig`QP0ngmpZ-`#!2-F>-(kjq zwh(@Y8(>dP#<@r0BBC`NMaX!zHl#K+aUMwiD8Pf}EqFRCibDB(6@iCmciILRwl{m( zL7$Tn{F80(XXxC0$o?LQ$V$YMURuXO?KK?_lKAie14Vc?a2isAA11I6cSe(942>b* zjc)U5uD`3}0jBJY_SaR2Ww)ysXK%CgrDn2a+}T7dy~!QOeKA}LNMdpP;ZNo6q#BmC zGtYyzTFgoxq4BGKWTCfBn($ZI%f`QFTuFi4mOR=5a>ldr-ObK?QFw07zfY$xXsFe- zu#AVo9rnY*yWq@L((d>-@w-@H{G433iCGD{hj{NbX3Gr!1qoaPQUY^={a$OgJ6>hS zYI+q3HjiMWX#?GdlI$O%`w?V-fudTk56a$>jN~pj{GZH&&WB;);c_G)0-exvD&z+r zb}D_RX7Xx>FIXD9M=S*wK1m-_fC7Be%AeHAVZLm12D%Es(Qww(0BsV0W^TZzJ`jbx zrY7pa1bhn%z9XUW4!*c`bJ-8Ef$f4;10v`vPbkjOVXENSgj(rh=Y3IEA(~NEdsy=6 z?AD3~@ChnJXaPN{1#Il_S&6{tXaX<{8c%uOPQCs(I5^lUzVnyCIziCuZKUy{as}w> z_HUJC)y=cgm#L|#j&=nS1VvYL8K7BJR`%T2S5nchUo#c&z;9`yPMNK4@O>{7Jl@vY z*d=Iw=l$akpC$rD06rO}g>>&Di0vQf#LYaIEM8M<8675DtPR~;*o~%h=MJ}Iz5-Db z5^Py@^`K34b+v+~X2MrDAU?{ix@n<>^ByE`Xlh0yUlRfI*6j@V*q1q&@xT<00}&Z% zm`D22+1Yau1K$5ux6xtkio9*}!K2U}WOn+iZpe!nOxa6pp?z4m@TLY79|i1kfLESc zT+H{Pht5>cypc)^zV3vBf-j+Afuj3;3$f0D`nCxlIxzp$k*G7mhk~|YGeGc(GQfIf zig$s>gxtr=?BnB$U87ETkGt*6ZA+fNFA|rB&S3Z~M_lKx`z7OS9308D9a{0AiasnM zTVw$P9(>y1c1FQ}Zq%*+j0t5|m4~%?FzXzZDsp$F&A+pqCg*=YWoo?Eo#4q+Bx zl>XXbCMS!4faZ_41&7eL3vPqHv&td`OsV%vD}sS{0+M{Cq20VXTuJ<=u-lUM69y^T zlamu%T-;{(00YVYY7nUPcKO?odk%Gm1K~q!V6Wf=4A&!b`sCX;b(jyEs#*|lkpTkW zc>@SWq+K2ieZGIXMH0t>xYUpX#V2%A>Q$>qc7#*rDyHRKCAFyd|f{gf!s;qu(pGr?-A3RxXg<1lbTj8s?$gQFAq#6qex*n&Kl zi>sj~!URgp733TE*oZuQ(}$}8SzlqZ4dXmz>%GQD&(cC6Am*ES`mTQk2=!JD$zw=`?Frvop_(lXG|#oX>&{CJJdUb1l07PV8+}jHgC*P%n#GAj*I8nN`=gRX{z zWIq(D>4-j@{4`t9DE|6cxXXJII9GMCKZgc2*=9!_myXZ|mH$_3=N=DrzW?!2N{LYI zidB}S6_w4UM5J@_FB0ujeUM@ZxJilGp4!Q5o%2B+#M0m+vH+v!_RBEEy{D6 zeUBR#t~(m!c6+0LEgqXA8r5QzZ0#)A2{Uz+*AWo7A+73t$DSeE6GRrpif5^6#oe_V zC^VbMnFrZ=efT~CEghyc+!8;d7aJC+L)?8RrYk@7>eYR*5}|tW%pk3= z=hS8u>f}?oYl+Knml>z*}>gs`Mk!iAllfEp62$=OK10y+SB<^_^*?!Ih2sf zAyv=pqdN}r5(P(tN!4WT(G=cLV(j2lU-Vl$E%NI*Y)9jZJO2|A092jmfy zelHgF=2Pov}F zOrTIc>c(S+zo{4=_9D$c1jsrF#=?o71uCwR6UXDv+yN2e_;;3;mfAjK(qPSGoKx-^ zw0WPRAI4E8BGXc46h7)7EQgTMRAZLX-P?N>*@bPXL*lBP-QC?+_rLPF`=$alNH1D& zTM6Pb&qnQ*efhVLw8Lu9(jKubv#H(WzsvTwVO@Q#at?8q17WZJq_^&8dy8lsOk+nO7cgg5boeD^XLbtXXp&OufCqzx*Nw4j7OhaZc7GlTPYD)#U#$=JR~UH36g{W=G)hQtw~_=8V<=;kQ9J6rGi&Bs_n< zB5;eHgU~IZ{@YF}TE#D?Ad%k||As=l8+Nb;dgu#k%{vwRF!6V-_|~}imjkN-`wI*T zqB@8O3lj|rgiPBZ|9SMm5wJ(37Uyxwa;#N%277rGz2QQdWo&3jAqq)M!=zw5Y`r@K z-P)RcMTUjWKi~tOuNwP!fBMnSrT!ZiEt(Eq2;B?3L@s9j%Joq30Vnm?<{B`M8n(%* zm_ta{)YK%oIo@%XAt=3p7gh2G1_s}FvVlgzN44HfvqDV^+gjPt3-KH89cu!UrS%)w zVKXE=MN%kJQl~_ml84Ogtz`aG^v~$BK<6K)i`7m@Ok}u|L7t3 z7Sa+cJ4W(U;za@7kX=(FOw7GV5}Uxg-I+mvO(^WoiAc1yKLv#ACrDZ;hM22CIY*|3 zTArQDiOmBL5^a*HGuz>qemQCz^3y~|RneIHob+*TBX#a7t7M3XjkcGn2d+rEcu@!m z37n?BZ)4G`S-5s;$R%WkFOJ@L(a@JaimYW?ABIBad3mZ>3Acrj{AT__+XH))uJa*K zYULE}UNjrkd{AkJY{+@l@%A^1+8%D0l~RE~-$L)FlbHWJ`$*_885JPryXns^d&eG& zd6JM%SQE;@*(>~KV}dRlzA2;+-!GuiF8%0jX8fLW373$R{y^Wi{yI7=k#ws2I=2<~ z0QA6YGOk5v;a6TVF}45aFlCak{I3}1t+?|d%$FHckTOQpFbHADg%RqR92#eP9_#}R zO-&&RY&uJ9$+9uo04foKVH18@xLCt7s$uud)>2>oVuAo%u#M%ZH;W&l$HzxzXS=XG z%hz>}$W%^-#*BTLpiPFwY&Kxq?%}9JDPpTVuU-kK-0IzLdhe38@vOHtpLZH4{`92bFU63+Si3DzR38RkTeW1el*Z$%Z3 z=MK02(;LgYVy=`z9b=R=w~%B96R;B1jCw*Cb@$islUdHML9MNy6E zxO{mdM2*m7i=djhksc%pDwoF-=}F-a`gK@@fTlirH)xDHxa?LWRvaQP0_n-MIcp*M zV01vDI0eVd_(iH5S2MmB8n>Vn^E$7tLw_y!<3O(0itV~#v}y9u21i zM(5lHxs1#-HnfY$xvgZX?3bkzCiXLH4S2ceg|eYSw$Gi0#=#J9RHA6WH#!7GLEuou zxTLJC9hGZ{Y6uw{MDKDmo?6hj_05U=L}gXzE*Bg zhC;gO)%P6RhJ;2ZD0**GW22%}OA3$hcPZ6VBvCW~$qm^H9lo$;YA`TlwWzsT4!N}6 z(BK443_D5DCLqPI%gxnQ{_)3XN(~IhNgbRfu9Hg~FM*q>62)Y(SQ8*s_JwepxQ@9| zalGAJN3av00Y$bRwyFW%wMfjmU-^8N?LF1PpTHMt1Z~zGox$wSJN%{tm2=~WcN=WD ztk#d6zT6&YP7o)Pa`IAnz<9!K#S9qg(g~R;YI$vK9u%ZYxnEazz0)8fbg`OmOg6R$ zle{zV2oD<@O$-fbjd*2RRq)Ex&MP`nD8)7dp*s4kG9qy0*rrOWh?9$*4-}TiU_GPwBTfKR%>qQ*^YV?= zY&NeVI3V|tTCz$Mfm9NH8-%gR9oz&Qq?FO zl#_6TB1~*})4_=!OgqFGXxgJh*DjnpXO5)2d^}hOt@Tx%*EA~9xMlo!wuj#pP-$UByxk6r1h8}`uMbW#;Xt5YxNxy7#s_Vbg?j!2_t63- zm~_$4vi+M{_{%YR<2PYE4PhbF!=`3t3!tir1c%q^81^}AQtsM1zfV(XuvJ1SK0G>l zZd)b|Ls+Z?#Qv_bOceft_Lue!V12L0XJ)YTK(NIpI!hHCO1-wivwBA*vAzcL^k}B*1b^4E zu`MQ84qu=8(BU}Nv)N$4n{&*S?}aYI;lRLBsOHtuK@~tYR^i<&merQ|PA|Vi z-QV|AOWq#I$q!LXl{gtq6S5Gr>B5q87~?D>d@&w4|CsXm4N4b?oR0T~V&yRIVq*q$ zWd{yKL+gLL^!U?tZc$-k_MCZ!Q$3OLTBpUnkmXRkPp)#L?mH%6&#vz7Yc_ax?gPrR zWYuOmvnOMVWy=iyVv51%Nm|M`zGSZq2#N4tj1R@Ks~d%siJ6#~r~{(*2yUg+@plcM zmKu$`NZBO_KM(#jlKpv5`qzkMsVyZe1o(4#iwginDAt%kJrK&A18Vg$Oq}NSvQjcC z2HltG{jvCOZ=`%^6LpU&V~Ndnqlh@j)N{HZu>+;N$quqJp(L^Je1{5I_^coaI|ChM z=;y(W`D53Z%;CqH-22GT;Mx>)b{LH8RzA>urdZn{ZtB4d51dTfcVDn>-TTnI1N>Nm zKP`fHDOKN9F>gJ>YRe&#UgEyeApAd&8wJ37X#FYQ;|T^B>xHve+JyXzs46b9SguKQ zD>S}!pqGra9XO$&sw#?`mj-!7n*q@u5w%`*_#(m^F^QAA0Ds2Qe(xog+PNd9Zvoml zyWY8u?sNB`mZs+4>|h)~#886bTT=P?`L*!eKKTp{?-Cb{ zlwXsQlJm5JX-W>i1Ho zV+Q>IR~eLZb{+tHq6tc&kwcG$W8h@pjbiXCuju-TLQplA5+k>132qWLymTZF@5J*C zP|8+gtpE@aGr=uk1{e;;LjghHJ|MqOfN2aVa8|qiZpFNBaQ3ZF;8u|ati{d)>whVp zg+aS?BHaVHKl0!NNM8t57q}Q%${}Dp$&mN`t9rmle*lPQ1dL#3ELEuY5|C2xd-|o79$P=fw*uLv}?ES&_10KCX`PtaQs9@Wnv;PJ2%xmQU literal 0 HcmV?d00001 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_activation.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_activation.png new file mode 100644 index 0000000000000000000000000000000000000000..91dbf117d63f3a125f330ca17f2dc05f09f9e712 GIT binary patch literal 14468 zcmdsecU)BWzU?T9MvV{)g47r_D##!pRRJpsNRcj8Md=J3q>ix{N~Dc+l-^Z(R|z1( zh;)GgM5H(A(%;&i+;h)6H}AZAKkxl@@(EkY%$~h}zi(MIPs(fbiMvjdn5^1yi z*;8sH(pqs6X-(Y^>+qZ4j>oC^N7C`Mj-$GbiKELU`>Q0SOOCcyHjY;2mw$G?YVTlf zLlGfMl7$ccZ06`_>mVg4X#MvWkZtTu1$QYkwc;cjY|mbBAdy%u5npQ(WfROvB=HIP zQ^z%~g^zYn^@C=XS10a%zG1)z=)EgWUA|eNz$eI4a)ObC!V1@L zGFhSVi{}n=T3GcZ9?ty3+nnQ0ys)EnDE-`}jUlpYv)A69n`4m;SE*#$xREE%gJF|9 z?A-2ZG(PNBny#2L#aLXhv#MaOIyMv9kN`fOlVHM{iE-noYdAH*c+RWVEHawk~MwP=FLqXo*P?Qk5tPmDQO-% zc3nA2W;BOmDCNj_k-AH#YeLTy&B*J3prGLL@;KGS==`%=cgD!1M9W|Sz4&%YAw6ld zg44!vso$l`Q@cIaMmv|%aV57sC?di$O))h%FmStkeO17|Y!f<1d3m{}J8u_twK$Tx z;wo>yAnYIe>gIaO_8jYt8#fjV>GB6eMyB81#HPh=+vz!HFu$5)W`|lN0R9XyNI^t4O55~pD>YvZc54?Vz^!<0=H4ez6#OUQxD4dHv z;+^mBZ`*hH((CVXn`|YCYvmB9jEo5feh=^RxY|XOoRiIg`tE&^=JM_P+_MO6l4J^%1 zg^F2+-r39<@53oR^rgFuyj-$4c0_u4x}jh&J`8Kv^x~@GW{$&WjXWl9GjmIQ{z0%- zcD1NS&0sz*`k|nrQiPPIjEoE)AD{isePpuoZ@>K}L=LMKgYBb}^7oJVv7^z&5x!^y0{e%f_clNbzrA1+BX0ll2S4vO~T?5L>An0`=D zkmSPB^w#bo*QxD0cEtax6uMtZN*B9ZI+7n0d-~xH^EbcmD(Jq$8Q#W5)=xY#mUogk zgP)(Dl+#cw;-%G&(IlcBF?<3gkEK zIQSl6txtZ%VH_A9o|b7Q)t zUY@u64+{&cjkf2ud@6s)ug{`!f6Ya2A(Pe@SLOU$OFRoDv3?C|A^Re)z1HY2l{2C) zXFs}f~3YyNZfRKu}QcKzo{=ctfF!4b5|UpsoyGF$z7| zmM!nT`~Lfoss7rq(&bSbIsXt5Q(yI{r%!)rcy4^Ar>Dnw>$YulY<*3#W=c)q0reV$ zb&a3wikZyPmy^4zym@6it=eJQng@sr2O*Nz=;}TeD`3$;7AfzA_fs!vXtL#QbaixWCO{ zMc~BAll0=bPRr)h3&zA3cKJ%EsHF)4wooy$y{PEu?y5ce_7$dvA*3&4nLV2SQpQ3j zZYm|C#A0cAnVx6Y!{wRm5J}{Tab|0cv7fYw%-4@hnQk){+qk){Jv=;WN>^qC&y)%o zR}ibajf2C&-`~FmnMACVw@}4F-K;lz_U;YA@`fVX9*cKgD{~yIFTZv7ZkWIS)=HV? z3?mzk!)6!9il&sO#tO$nbGy9Sr-z!VZ{4|5u#~nsNh>`Z85!9(m=Gn#>-F`C>l9s_ zZk^jG<*AV!B3JGr6g>YL}~PgHQyS!9qsxpn(?2%XqOX<+YIXNaIqT>+(#C&z?-1t`bF_^{P8jY1z=8n%+em zi3q;4*>ZL^>r&aBs`mEwiG1^hcn;w!_hjZjZwm?zR`BO>{k$dHsn{D(Znsgay{C|>C>kr z1w_+u`>Z)Zfq?@xAtL^JP>3sesroi0^YKL6@?0FvbsJ90B|2y8=&a({Dk8-Vzii4h zRY8v(C>$?oKt?10kQj-D(YJW)8Em&H)kMLtnEpz%^43vj0izOq|16hr!%;@QdQXLq z!?dJBpZCJ_U`(yd;yJ;hFE_+&I#VN#xLC{#HTlOPhr1tC$k=^4bz6lJBs<~!>5mNw zv&52ml4WygoTVPiDkqa2yS<`hJQoJuJld`M?BZ|2A|lxi{mN&~E)RO8*hiMk_i@Ur zAzVKX4i09;#>M3`L>lt!jA;p#Z{NP%&dHh5l%hQxVj5Y8LX>S>$y`}{x>qzp%1NPu z{rh$63b@A0t-Cy?d}7ASovgb`I?ij)&!>;rl>E}$KdrF3juaWZmJyBrUW;Kv(DnYP z6T|fuPuDBz7C0smb(q-ciHQkX$LiZ4{uy8$OH^Z`kyS4ckSakrLM_vTzWt}4qF0yt zWwl+7v6CzpYX>JOYtyO1ddWY9F!loDa`tMhp0=nKE(@PtXbPS^ej z1yd=nW@1O}^#lBa@pSu=4MbXIjtPNaTxs zeqTbeRM-R&E)4+3K|qkDOTB^xt1F8Uk$ilsTT8w^oXBo5EKHtQoEy(5cAqOK`XPPk zK&IF7g62FsIc~||tdZwTkh=Tn%B$XW=(2|0USyX@0(|mP<_>Nyt9~qZ$P(Z zK&zorPRaH4^^RShbLJJ^JBL~`6Ob_Lty^E5mR+5%ZOSl`CDs=pzzs#av6M;`wdvHy zUb3!~j@bi2S~SG_0Q$w=xUn`tQK0^|I~hGc{_EFk^hjwvR2|*tmv7IGwD!Enm=t{5 zwLIp+Az>Sh*3pQHm#}!^EdkDxl9D)%UV8?-!@70rQ>WqP1Co*#&`LE5okz*;%qdT` zGYl;n6CdT+bfp_DePZ*SUBk5QO?9=R-_gl?M1LSc1_AsCeM|G(#sG^1J${^cA=6|K zCAMI$)4eJ6f`})2Ik6bP5qwL9^GRSPO5$A}ndq~-jJ-p}ZPMqus750}v@+@xg4*MQ5kcw(Qpz&O|FPT*XCH zX@RmcnSlO$Gbv{CgX%|E4^U6^yOb_QxUS6iW0kDEX*Akkb6Qx>qmBmvE1|Qa?b?G9 z@yZbvdqO%2U9{U0mhL|t8L>jM+@~9VF644sj+(fy&%hPoI= zoaTA^sB)!lU4vS0r2Cj;Wz2Y7TpW2g^TmsEj*eO8jJ)R?e)!?gwqMU30&DC-o3&~c zK2!SX(`6Cd%eJ3>x`@nfxW*NZaMEmV3dcj<-Z<1${_MkSu}7}kNAe{ zl;pp!j4Wo+xWD1a$@R2ZEOQru%C5P)hc&0^g&D0bw3wjN0<>T!Q#>Pqx9Opx@kXUy z6)i0-z^~i3ZoN!^_Nm;`WqO?{z)MACWy0&xxv>t*bw9FS1WmFaz$TB|z^;I>Fum^Q z1ni7QnpIR+S7%wbHy5Ft_9Z1m3gg5Mt>g7^ib@%V#onMJ<_N9)`MxkZfH@r`fy)yq zbxuvqe*-hKn4zIzJa(lIHCLR#c&@rb1iZ29sUWLdTGMn%q~ft9X97v1Qg&s6#c8nq z^d>fPW2}4t$e-8cmUM%mx|q9&+p)(6nNM+^B+Hg`i;jY9nT3HEU2gupdrzy!pF3}8 z$Ri@2clj1)wC}=DN(Pbf(sK-lw{&{bZ%pflnk=U?*zD}=q}*pytECUjw0@oF5xw}5 z1iqRS9ewN}n_x4DHu^Jf(*Qpc*?DEjl|UQD72b8J7cwoZxpa8Nw=+34V(axbvmZKU z`YyZT|98>+p(p?oq|PkoE|uja*_2~+;uJKdO;<7Ohbao zFqo{y&2_ZMu>7&tbd+CKE-sK|66qx?a=370yxYK~J01wlpBx_kkgCs2A~hNL`pPGwpZMMyCIPjcQK%u{C_^04~(t7-nK)^IHFLIq^ihV zeE37h-e~5_eg9)&=6@>W{N?K(w3jK_+rN1I`ZdwK-@G|{)x<=r&1K}nbM#WOB0xaK zrE(G}n(SbiC$Fg3(3qr-zH<;1IuYARlv*Hj{*ls)uhybE9SH^rBkDH`ijinK7ZzHR zvRQnpkKRB;6cRhzrppk(UmLI-5J}?_RaRCONbv3Rk+y6_w5)niv~;^(d}6x2UfzKN z2hKY?=YXF_v>6*4LlMx7^Nfv+9RTpqI(znPoacjw4>dJ3K4)R0hVceT&~%hiR(AG* zMjm$d#2YtmR8yj&qB3TNTP$DS+CUqj1qSZHXI5_I;_}dRVnPtnN5!|dTZtvqRb@lw!Ov&h4q)+yMrD*>TZmLzVP_*V?|2g)#@GG z61K;X3K{gRIJNZWoDfcVg|lbhC!UWX8vMY3nZSh?r}FdjANcvv9J~GNAMHL97#cd1 zWuTz&z-?wI?%us0Tmmq#W1AUeA-L-ZQDLu0YK3!2)Sx@It(&D>HN^78WJ zWm)Ji(o0{;q^^C5259g)ZM=h(wW_vO<@W8{lQT2LpW4n-Z@ts$AKOZ5^gk)M+1X`7 zo?PBMV*R0cm+1q5}v z1Ox`g_D4}mDu#)h3uw35pVMlk9OWZKw(dpEz5>PZa{0!YkyiDa>o(Uz%>#s=R4}zM zc^97a`t^xQe;Q4@rxF=F#GFfKeEp~7>O~GW7C2cVNG03Ab_I>S0XbI(n)Pg)S}VR~ zT8q^LV)LE-UuN3I+`*2*TwfmPWF)ghTglR-A_0KWM3S1^u${{vo2%6}Y}l~S5VaDE zO}6MN@gy`+)HPOKUctfFd*jejjx6}G3!T7Ly+?r}$f4(C)xL$93~Cxd=Y2Rt?OP!D zUzhqLZ9M<}!-owJJOY6aL3lvk_e)C~0O0z^;>X^;KB)z92#-L4Za4=k5*?M$3{nOG z?GQvD(4TFTf9{JcRDh7DEFY%3p-VRqJ{iz5_$4HC9_|n*`0NY? zPXlF<>*3wIm7>Y)=vPjZP2&GGhsgQXm?&)DzCCtjd2zVMPqySr7oaLOPf`H8QNY+i z`OCaP>}Fl5+6w(nAw@ew>)N%v1<*egnfcgyJLrm^`}_NKpzK;sOOQy%I_lqdeE;D; zI*~mn+C~-@7Y|=wUo8M6FQDXM-y>s*wqX!*`F8DkHIbi~c<(w|*zJ7lzKX6cBOV?e za^_+5Y=;nfejost!RpFfDZhbDZhP`Lu$_!VmW;C;%W!}F`hXj|-z`#H=&=@KE&-c(lV|}@g z0KYmxLjApvc@1_%NX*#V&SguB8miRN(vpUb&YNZNmizB_4l@kaUe~oBh+=zZI70P_FqF|Oh@y~K0Tu|Y|^sc{USl780bHX-?P%Xs9)JEQd3p61ZX zHa>CU_1+dLG$(BXMBvM&Wc$1eP<#rUNBNA5x;C~%Njk(=JV3S0Q`2M?&+T+0rb+SW z>{@wVARFjFsn1^%r#}^hQi6Wu?CcyjY%Lb%EhMV89cK(mo~)rm&(qY`*B@LU8x$m_ zrl!jM{fvmUWV#eiU~wu)P*K33VA@{<6@%n-JgAH=NS#+io=M4cUF-3YavV^#Zu|}x zUY^WFA(veE3gW>q#?lROdFi3u>nH@3`Or-QFyosRL4X>l_9n!63lVbVZes8uhkZg# ze>XpXMj)!U3ojAXkYpUx%Sja*#z!V@!5xAmB>nArzWdjxI>|W?LnBQ<8#!TLS6af8^08t?!7ve@`mZ#MTT?CyhZT2ydfyqnzujL<5k2jm2>?OE9 zW^n040_EoB=7a))^3sI)Nbzi&C4pII$2wASFefW80JxL$<4Gg3I?Sdd|I@@8()F1S zHRYxETz5i*K#(^@AHSLI_QkN+Es2=lKm>?M15e|Sa!f=9^L31M-{oyi%kAW2VPPSJ ztU{OZ^eh`YyXSzW+))o7JlM{~m5Me8eqlHx6g{>n;`zIdy`;pwcMUm4#FHG*xIS5!!4S5{Q;@$;wDj}r6FjFJTfi1O+65cAGhdj$o@lZU`Wob6#2)#cp<#g7*}-C6I4@sI%@}s^ehXr9hTXf%4jjCC<;noqx5ME0@Nfz~nvo;`JqHR#J;uc`&z?QwNJ1HCv}s}%7d^0R z*KrIW)ig8$pT@*wqZp$D^U25<*56*d>dPhhw6Rfxzyn*hY*B8{wQ5xZ3M7w`NFNuR zPIvc3xOCQ`o5Z2?L35T!TU_i~HSAsfqr7|&Gbx30=QO;$N)T#+5d1Jckypcy01g?k zr;t?<$+Hb=)URc{hbasR3T0;-}8rzon5#)a%M zB?=`Sz$L^Y@0G_2)I->{%t2weZjtPqXWoOCAWf4nogfd5jrnG)q6Q z;MOfPk1!ZcEe39@%X53>K~P$<%o`3v55Vla>CK`IQJv(nqk?r`T)7W`w?`hzu|;Q5 zeh#H0dDqE15atUU(RUaiBsI6+$0;mM;uOg2h|XX1-?g-8LxMC=`m^3~Q_z|n z2?co;xOON$jFSB5DREyKcn|t}I!eg^M*XOLgrJ)Z0IL)wV-WDGQnC*R&Yhgv=66GO zsz!wG;6aU2ujOgHX9(T-cd!LiQBl{)&uxiY&;j2fBlirJvl%r)DItI(NNm!{yUZ9y z%PX~Dom8#0w^Q~cLGMAUO|H@B;(zA~VA*GKV)?#9fcQtACo_Q4z5D)|kY#QaMHD0<}0=yyW z@zvJ)!?WK)z|d7qHHM|doeK>JtrM7lxJIXu59e{KYG=>hgFi!E?^|HCvM%%G?P;|V z!~Z*tHMF#}0z`?;d~R_&3|J^lKVOXyjDc}vR+k)>C;X@y6bgAfAFpvb5oEJJ^02x8 z%0f$NvUPh7G3EO9J*INm7ohMQ%hp8Wfbxz%YU=1b&arBJN`SKuHW;#qYT-JMIpG!F zJkq*iVQFHPIBA|mliPh>{K3GbTtyMTXgvKlt?0jhq_Bwpm*!&Lg@8zQ8Sk{HdwQDy zB!q~Ik~<`tih9)Oq`5MGyY2Vj;P@o<1U{4p{y`X7-O#Fz>mgoUOIuiiVmu{BOU|B?l2wpBj6RQ9>2Cx(#8HHVBeGzi@S&HHUj z%iMz@x#g|@yN2|UBkA2-ZaY~Kg=+tiBYKd|l}B+vf+p9?*xTC&1_i|dz-YB4?NxBHvkQe`c^{l6$E zo6z9*I1M$*6JxBtnvfXGkz!w$AS|5=i%lL7Eh)0ygaUbG`_7$7-rheZ%;He~#~&K5 zvK8#CY-}x#u1JVT`{9_6AajHi^YrP{?cCgHdliHCi;HWcMER5T)rd-?rmntM9xAE^ zCOl5<*17TpUpm9O0F?o(WW6W z7Y_e~$mH&6o@3-OjpygDe2L_IS#YhEJ`-bD)Q{VV7W;P+QT}!>e<7uq#Cc-2l7?~0 z{-1w728HQ?uWvJ)6zP|;u?Fe?T2)-F+81T^^7P~7rAh$AjdqJ&PF@}zLx?A#pf?%li92kDr;>zg=`gHebkdMbxUbGw>5 zFsZHjTgoga4}T9Hck{1Prl12@Kokb3VhklXjy~qB!hML~#?Pu@+$ZP55to>_|MZzN zmR0_HF(T0P@Fe~Y4H|6 zw~0u!iR*302rv|U;@{}_Y1TFX=CNnb4#6YF4`*0NNJ!8uy|%XH-j?0ld3e$RqRgN* zwxp|~TMOoiGGGuO#7B&yf>GBS6I6INb4jQ|CuW@$gqjEYB!*jYV21iVgkb6UvDsF{ zrt_!|udE^FZK{O7j=JLU{rdHmbx%*T&a%LL30?TG+q4-(HcfNye^NHU>;nN!KYY+h zn8o4h)vHY({*ii!<^U0@&gK1O9{Bi(IP{$(I45QUMI7w}OL6BYC?ti^`=(pjd+MD< zN1O{soZWp!0X-Y7DPj2&{}kkCmdS6eyD?`|eDvti+c$4)Eo$GqQ9XCgJ1iN9cP8Rr zs}-d+f6C?`Ou(=GV<7_$KpSa-nTY_=@HFQ2{8_ARx3RG$U@p90Na#G4;=#Rps%~xt zCr+H$V{py9t9-oZ+jvyg)YNnslCTEg5#H@6phyI5AkoeZ{>aw&$41@K(fg6(hks^# z8vTpyqq3#AFV%p5a~*HWe;K6eDF#aj?qR^}dUzf)&c~tn%h>+Qk<#i+i;?B~=;MUM zhbW(BIMfbex=LtEe-4OV>@@Z+TsPj7LMs6029o@y4`)0%d6)0NfvUJw!l?^>NeH^W zIWP>2wq~9|%TK9)4D&OZq1 zIL!keA18~#{z&&ge}7`|TGi0daFqbU03%0a*mmqtD6sXn_1w?D()o?@ZAuFHUuHX_6(K?mUih`YR+9XCF6FPVX4Ar;Fm2tsl`EJOB@~4Z{qJ zw!-T<{Rn7`uc!;aN1w`A*nbUuW@S1-)*NFqh_X?H1oofuqB*Q+zAHd%QXXHQ;ZCw_ zx?~_^XyIBJI$dEsjmK{Y z3kWeahxIX0cBz}i?c;Cj388)HC-}!&GffNiAFQ+WBs6!Vq(x{u)eAl5zc=g3TfQr@ z=A#8nP=@x)DqW7uFBh)e=e_Qh0cVJERBgnO1W+!F_zf*!cuo#3dcP zm!Ywf=o9bk&Oo7!FL)h8R62+IK-NZ@RpfG{o_G^L4ia`}QR$EL(}`323j4 zPXEsrzZ7QXl8D~SDK>DI*Xwu3?nkzy7GB~FZVnRL6-8fscd07H~{)kHUJe0IzO`4zBCtoy(+|&NIB8u{eDqUa34vh@^yD zGCrAt{@aOb;OYpnTYp}`+%w41~ zefsoCpJ%*87fM_c9>$1hgD5~~1o!UUxH(EL^X4ESIiP9|S+64a6EKU| zef#cR%U3tP_bqYgtEuC(DSCq2sug+%xr4vxU^WKo#E=nn3iz8S*qFGE>rne6Tp*A| z){E30jlnBZP}V|4OqJ;o=SbSOutdaX96;g2yoE#3K9*2eCtYr*>*XrLaNGc2Q&2!a z44ha1{0jbA@DFC2H^}`tWry5XT>>irD@Z9Tzm`OHDS@bkF5iNA3520BUWv1Krdf}e z3wuq4bqx>?kYI`S$`f@AdRV&qcyWu6GN4Sg&KX~>I%v&=;g{%C7*MG}3pc#h{e6O$ zP0_?1n98PGO``@89$F}PkZLSG>#xl_v5sUxV4)CMY6&db=tMiIRBFJZM{sukDkar+ z6h@#R3McGMgfTxM=W2EEPQn1uk3Yf zKb}5ZYZ@fnY(Eq(Q4@Ry;f&+)`g%!T4GW7dnSg1lf*UU2_hI;f7oZ^am<==X zGaH-{0vNh~2;$Ly6sU*Em5FKF%wD!0Rx~UL4qX;0xB5<38IoSlA{SIvjhQ&A~~yM5KvK( zBpHN55G7~HnKu{Rw_o?|y8Z4Q@4i2Gyc(lUX~Efh?Y+J*zd7f+BY$3smSz_Xg+if~ zK66TuLRl?Hp{%O-c^&@Gw-SxB_>Yjy=?gYW7Wy{!y4HG>bGkN`rWQ7)MwfrH)3d&2 zWMR(BCB${~&~Jt|HkQ|fxw*~$_6uAV)&|^r&aY|0MK)NTQNKo^(Cd2N&8sHE4SQRs-qhE6~I z?u-{!rs)iIC9>U%&GFTWhj_~ww^TosE*taYyn6X&z-o`l`68D2^sc=e*_URW#Fz(u zSh`NzCr_#eiw;e@G-|Y)@op;mg+jT0TA3#g>$>i9Dti@$;(zkL@OeY;kWPW7K=9#q zy@i<{Q*O&M#o~N6z495x^)g!zs`^#v9~?V!K2(@3xFV)-X)-n1ipDWS#8I(QeEIe2 z)vJ|rto3qoa!T6Ub#ASvZ%EO~vq{~*qtewvE>u`0YH7oU4ZC^Iqx()pXV|=<|BIJVoTz8hW&f?{GO7MBJ4VsZp81W zR^&w=P*_;#y4Vx$u8L(%EX|Jut8OXXW{|d#BGddzudFUfo!2Wf*LI+JIbW}A+YmKN z&a64zJ25eF^Oh|mws%gCy%b7oPI?@h=sDM3)E$pG^X2+XrIF~6+dLEZC{$oa><(f z@uNCG$1UEIcDKLrgX#WbYmaP_@b>ne`_8h=AucYiWce9%|E5*A$%!a=$@nxr#{y5l_Hj@4M`3#=* zfy|QwJBOMxcx!5F$A{8P8o9n#MM!uI_u%4eigLrw&SRY=C+CxO3fmrEy}ygU^ixSm z$+(lGq~}Z5dAs@C@bK^>re9ucj(;iAlxY^cr(~u(*rJ>(`UKl#6DMtvCoS*7!UAKn zqLLD=fq}u5`UI7329Kj2Ws2D&%5j#{Eb}2DjQLZ|CJ8!)1uZQt3=9lWa$>XZmz++X zylE;B+>oU1Yd}S)e9}+rOH3^|V7Y8jFdKtNi23s6f}?{&C$;BgF|*P@L-NpoQtbIB z75cSrZ>^uKQ<95!n(UwQzO!aMUGH?^az4VLYj~K;-QAs*@fc6L?B4mQAtf(5rq1r} z7;LWbxA%8)-4>m^+XDJ(V-xXRr-9_GvHTXb3}Y1!5090bE)&aSD;jk&?73@rtMOohEDUY+=8e*2HJqUq>yFv$C=> zZcGUc5-{tpDll&q#zIPpi=~E#hiQKPx$5?YEsR`U6`{w~1cJ}CyDz(jEf!4F$XZ)j zReNmQK4_K`>CDW`oS+=JNhL*7T_r(TAhDjuew~th;pN`;={h_{L_~x} zwxw=C>i4CS**Chb8W;>9;OS(ggT>to#Fl2Rkfj||OHuZq*;*GVMejP>72p%~km-2B zxnKc}7ngo7pk|sho=5Z@v|Rr27JA&i zZP=1+b-F+VhsHd+7P?~lY)ahHZvom708I=_ioQ1`_F z9U5BNdX$fe;k*&!j&~ly4x`Vsorf|+iX9gx8%eUyHacje(eu> zVq=@Gk#;Ma9B3SG?LN;fTc3Qfai%LkXK-le&YhAEcJbdwR(8j;b8?onWx6lBp#Wi7 z+ng8$1qHu*OEH(!hJ}WjX%@P*7A#C{ruuOzZIb?oly;(yXIp*uwCXC|8fo2C`th~n z_~1}SM~7!#UY>VQP=8gHdYXP#{vo_wF$td|y6I69$ewyW36Rq7>CQ~7S<_#v054nC_H9*T)%urQAI_C+fy?X3{Td5 zTwI?cbk3f=zo!IsStg=qq0)V+_VLvut-Q=CHz6URk9gV#*wWoT_t7Y*IwUX^Qnf|B z3f+gJQ|QGPs(9`P2Rt#Zi_@JLZl&rLHZ?U#;?fViSlo_P=CoHB^z_As3mi~OF+)BotEqCXxz5@g-U;yO`nR=K9(<0slf2Kd=re_|@*HiI6YeER~OLN`>Mv{bwa zm3-&v26VKP!9i2$nm0GrsN~t1c~u8zwYyzJlT&nXm|i_h?Je_i}Oc6N4^qzek6!nQuF zUWK)P{JL+Vh+fXNL(2M@}srD%G`#>Q%D zX-$^%RvLZ|5egD@&P>rMOhFJG6c*M1>hKsYbX%O8ZL8%m_{^9;R(jA!`K#Vx7M5@k zvs>$EYuLkGo})u2YUSCDjgCI9&{smOo-E@o{4yUM9lh1TJWHw2%~e{EMTeUlVf8CMqA8_(G!z3 zGHUQ>HfRf*3rF_z^Q+K{j9zzLL`TBADBB`QlepYaVE?YHOjgjmrRu}I?M$oBE41yC zp7MThv3*`nd{LR~x?5OG*;^Ky8a&G^P;R0&m6)1+-&nsKBrNFAfuvUO`TnwHVvmzTMK zuUhrSX-!vmNQPBEw{LSScH3}R4a=P@VGlp%BCTg&VA7gvds?`mv&46j>>9~x`Y1ftM4F>l2KrnL|uW^I6unijzl5 zkc{`zT~D02&e}Kg<2&6@&f9Mdg|*L5-y3VKB#AhXT4;N<; zsysQMQiD((YYA3(92@SY;ma=9aDON7M1^3xVO^ZUl=Q+9z=v`5^V2l+^xP9xqc=_4 zUaO-{lCIsT8tl>LVo9=a0VI zSo-Oc{H06J3Kqs>=l7Ln1sgxp&UbPeeY>7?M=$4?lP6Cq=R4V+K6T2l)Q>aa!prBX z$r?}CHz z&ai#Enql&X*}=0xe9|m#GZ6%9N&D_EX>HZoxScEA-OjpwJ23URa4{`Z6M|mr@}2Ef zQnj_jmu4cTM%q&WYD}8bjS}P|Zt9l@MpQ>i4IpakSe9p`NLm4f9UeW}SKvC|+0_+| z)oyjt&ZZA^7Uz|>zAJo3M^{dz)%;{mORQ;|!*|*%xRNAb@}}+Ew?|@=>3nNjOWWJq zfByMrjwvC;Q;WlJPS5?AbHTd+l07k`b@cQYhybI1^gn!&F>G&%>$k2fxx~)74lkDb zON;Zn5&MD(s>#MSBWEC?{R09vvO!!K;1HlxDCSP-x2SgQLcQBS)%xTl2CMQ0*d-S|8DaAF#MD zRCi>%xw%QEYUkfa2=6+qkdU6ckP$Y0Yc=nT}52fJ(yEx&)Q79imR@R-<3viez+` zF6L$DvK-zzh-vt5of8>iXd(aM+7v51+TIlAr8g-dpvU5dIoud_;N1@%9<@-5!uF32`( zPcx|81q7Vs%Gg4KXMBKq9WZBQH{4>R94G(wnA3z|Jj+HsmVoH%iq;D=R-t098r+2o zuUuSQ7~Ou`+(ku7+e{6ruP_P+l&cU&d}!06bG?yk#bOpNBEM7sxPglKMG*yTM@{V1 ze_>)u2Au!Y*r>LhTO02zk%5IJ74Y0|?xn%!ka+KXQr)v!LM`Dx3!dP7dgO{{oAZb$ zLCUCI(ML_V%M~yaCTr!XB9rO}*U*u{!7J25K=evG6?IlEC@6@&FqH2b8k$T>MSOhx zt0q$d{>Y_hnMJadAI+mqCnEZH@#>$AXI|gsRlzgn7zKvqS;(u87dR-CH_K9gN3~eD zx5|eA*kB5ZM@%@z>{?Hu==tjIKzDf6-;nG~W#2TCXL$`HN14b|e0#CG{3&mOTB>%0 z>u{c>5|%72*dFrytQI|mQXb&#y+=n!XMWm27eDlM<}58Lx@EbBazaSS@1jh0#10DO z0!;^~t^elFyJ<~pDbW?~?^M%dDe^MsEBf{9HgMe9k@H_=mi}*l|FTo|od*w8fVv0) z@aD}4Hda=KFZMH|9oQ&G|80jgT8i$jqMT6IH_leiG7sImZy&!=4J#WPTRdhc0y=$s zeaDByE!sq)qoTe9AQa*-1UaE-j#zbJ6O)eex4fS14#v>tC;5mu8Iv;^hY=WHsLHYm z0TqF8ujtS*9(xw@G^ zEK-}DoqZyC2e)9f78rQ5ol8^R&MxzHem+xket!OwpdcsekdmU}M70!)5r$Kbsj~8N zWdIjf(S7^Af2P5ncHQg~WT2V~c-a>5jHP!OU~WbQ44Ru7x@cG!7`Ov0Me}wYo%TG= z?G%ciRg6b4k_^q2vy!7?cD!f&es@bL&nAE(WeLws-uLdwM!2${4~d(Yu$i|;(B-8t z;cHqk9=lPb*RET4KuoMHLR3sl8|<8ic^-PI>+<3>-bm;;XB7nzJlYQPYDI z1Sk}aqO-Ju81{hlYA5icb?P)d&l z>+bMuI^80Rnmsl#0YXMNAZ~ce(w591t<@+t#;V+DvTsBcLq;rW!~Iv|CepWeQz(}> z|0jLMcm;z;<2S52VQy|tvuTqu07S$hFE1|%b_7~UeZ2|+PBqc8BMKQLtgi`-f5n8W zgra)+$`yi3(8SJ&I!#oeY${{Camm^VQr2zZhZKQMm?divGq&G9K5*6)!b?mv_0-*= z&-5Cd2K$rz?c29OvsJ5B^;5G_FJHbqHn9x8baK9ZSsNiGk8cn62L5e>asknXv+ev? z{&bVLWcBp)IFB4r(ACwY8^T93xRe5_3(ye_4NZ;C(%5!Uv^_%Re0;=`s^c~r{pR~s zef%{}qa2!8=v8}C*ZU`ST=gw8w2XA3NZ-_~ZaC!(Un~ryDAME6n zMDI8>S`dtcL0l!jA6Ti!|BzY88E+7hXfP9_Mf8aXU#Ia3!3@Cp)lwGw|1M=Vi{s1k zc7X!cN@@D%fM~mRMhm$wxr9}RXaWy9eZ8^1v#-y{V&l(0Ki1truuC0gG7)NaZf9`vlQR$u7FMyq0jr2X=F;HAcdVPS}1p1mpc>i{o~Kfd`c1o`K+eVw{f+ zb>W$OrHrH_{hXvUMtv1!Wj|E6t-t=dr>~nWIzqy$d7ge3-x+Of?OjJM-+ss_^c)1e zCgGj=tvk=;rgqV)# z026&EE*{9~2~(1nfBgFOYd$a^6(K^kNlMTe^zO%xw*=mBG#?f(zqaPW_VJb$@zM_> z5L+nGH_)OehdllVeSVm*+_HOj;yT)0pD>i)xN$?~%o&asBN#*XgT*pH1F;y^{eYKy zh?l=5>0;J-XaGTcM&LMo>Rw)vA_F@Jc@OjHp{(`;Y-}G2_{>{om{mxD6ILp!2(~DQ zSBR7}n&_=2j4@c2u7QEX?Ck85SzsA5S2|EgNXW1PxkVE1;g+laoI(-QOru&!gdveI z?1q|E4BN}g&+1*hdh8HjM(){-g8$ruf<^?$9P`@YCE-mgf&~*XSEial(=>|M?JB`Zf(~Xos`PLOMGcqQjwf3gEFD5dB`*O*8 zdd%wi^T^1d0aS&b7!eKS>E8d;pBTD9I9A)YyQ+O10EP#dVtD}ymhIDR=78G_J9fm) z%s6;W;R8f3gQK{#v@`}#&b~1U5RhKKUVLw_A??nir+)wacTnXGz~WCrLX!UY<7a6> zZq88PyMwApk6B4SUs_tigy-=Jgf9qN2t(4r0%k|Y`Ea3~myNSG(9^4FXao_($Ip); z!?Z~eJ;@iM2`eWyOZRhdQqrLZEaKWI4Nt&#YNQ)RBwx%*z+d%4Nz1viQZ2VzapQFp zfvc~tuf_dZ_-Ocx`oYZ9!0hrPeKxCbwdZ$yr;fo=J3&ync&?_inh_G;ORiisFP#za>##i&LLM`RNf^voPts|Ws z9o{1YAjvpiJ|`DY_AWYyrmz(>2Lzj*`8kvsY|Mqr9tek)JA2!&ZNWCZfB$~9PF9oY z(M^%3?&5`HE1hP}dCF_>@F*|uMwfh`dxA^6|8_+TYcgqc*y_y+on?w(mk!%>VkOTG zaad>1LONRkDNf)@K-Ao{TZwpxvb+4tqSGo!EkoA zrnMB@u3r7c5-WOv6=5*z1G({fU`-=591w_(l187y#lsEwq2Ye66Af|VUXy|CG zzOjW6AD0$K+>fV!`1r9OY)w73z8-w_=6JjsKZ_4^f^3{1oPo}vr#%KYk>T*;G8;Sl z!M%G=g8!EJD~?uH`MJXNBKxRyVQ*MiF`pd7Q7R94DC|Mg`J6>%I(VV}Z{vM=K2$bxLKg;5D2z$z=EEpj&$90_ znUj7AMUoI~=t`yK<$gBfS>f95i<6tV;l_A&48Nlfk^%~E|A_l?UJ>_AFZ3`IyUVD0 zHS&X&<3Iw!<$FWQpd3P9K5!?9l-mn0iyl0B^r&EYX9d13EqQfuQ8xCUX4 zlBmYDAiCt5Oa(%Z+n-NVO|Aj)%Ma47Chk|<`P1D+jv0_Pq=rJ-R4-hbBRyFGL2C?m zhZA;AHc}F+*n7|}+dHaJ+xZ-!1Thx)fwusUn+QIeO$#&BiI<2m?o&Ou1|s91W_q$_3gIgL&kXY zk3`Jg(dsLg-a4x=ikSved2e)pb`6Mj4lo_Gkf>0^3|J8gemg$|^?bx+gLEWNnE+bk zyZ}c5U1WE5mLb2NVb=58kt-j`I*GRdvOxrLp|q?lke&su zk+x(Ii#=2eR5UhK0Gkt5c+yQM&jP{8e1CaGAA;V4ym^+C_dPZm@27O{J6aSThX4F$ zZP7WX3{cm8gD6Wz^F4ba-V~4!CQgX1E;f66dw!FKv+ncM_F>(XdGFv@CAXg^8ZGD1 zqlzzIz9cL^URD&ds8jDJ?|pbSx3DF=vWB!uu|zLsr!(ZwXc`j`!Q!Cj&V?_}vTXT` z<@)gp6!C0IqiZX0b;eMj3Uk9$CTNk#A*$MuBS%g*OJRB>mY@9THs{m*;h?C15u;;c z5y{^l_Djxw6W2{_Vxw@d#p+KU&HJfk--p>HfuUE*s;d65;%wcvt-FdDQa`lMZAL6e z9?XBK!nXa&m%{&#cvYh}6I_We_~M&4Z}QmIc!Puy_U_W{6t{g@n|9{qLQ@Zn+i{N&@oYe?*}mCwy6vP8?9J+r-heFb>y2yXYDmOoDU(WkKDk`>8>> zFi0HvkAcFK4|WFjbT1W&2wt~6QdzMWsfjrbKic&(is&1~|4E-`=>?GzB!7u_33&DP z?c1y8%^~xkE@15U2Vp@r1g5d}B4a`)8vIK-aT&VZL5Nux5|l1qevYX<2~rq5M}5p{ zXb`0b2xq0f-lDxAA07eBamSo5LZJh5mlzlrxOXV>^u28mn;M~fYeX&mBX1IC?#KP7 zyosbarWe1!z_=GLc)X_YF$Ce_%sh-<)qs;KfM=`c|A8ja$jDQ3s_V*TIYBtvbg{fxNw zV~X+o*Q^I)_iOcZLvQnz>j( z4+Dg(JrbS}+t0jx`Lop7|}T5r3kj+(8{BO9h@oemI%A6~4_AkR(E=(jI;3)wblZ=c^ z4Xi&&uyJVv{pS8v9Ma$6soU@$%;WpzmWh9h#bxxyLuOH9i+g|kQPol8H3^sAw!VKi zlrOQ$chAawF}B-4ktvB+&oAd%HGRgapzz%8oRZQ~MQk=4K#a{r@7}?j+=f|ttc-hn zVx%ECm?%~-!Ubp~1B)(L{;389{tuZkJ5>m0AD3qKZ^Yt6B~65vx3(ZGER3s9fA#yt zJ?!H_D}ZhxYhlTIK8ve#Eqm>|b6mj{-7n1Hb;+boR;G%o!Jr30-#@msu`w$2! zeCa$osW1v&F@OB?aSa$Nu|o-PF8*ySR|r@k+NDmN>V<>_YM}vxa)wQxB9T$P7e5jl z04sH3=aqiZKUcc`4W=C{cZ7Fs4H?Z96cko4Uqf;te8{qk^V5I32>5h)ACPFo5Q5Iz zox@1`b?NsNz4`K-g)F8VKh{5+E3R7)z?NQ}VX{0;)g@LPP|6usis`l_uD&Gjj`q5B zqFlI`1_mTn_yJ^%0aTy}MvA%4aezAN?56~Rynoh0jH<$RgB;S$AQ~WD+RjZF!B>3J zB7f4qq<$68#*TF-a%_j1`BmU$tf^>!N{pAJEuj6L!d(S&gem%aS|}V@kP)M%+ue1r zf7LJtV+hzUE$qG|A|A4#r5I4NbVYy1>mx3I!a_h*l+6{o3)Aav-^4YSwz;{jI+5ca z6qG>tS@Twntp{2bmQCiTM*_gIpg`+e+fvF;WUrDRCaR^d+d}rr%$t{2#bb;t&5i34&m$+jDpHCGIv=C?e5g!vHffc-F5Yq#bCpUp%Iilv1R9rf>D7h zS~i32dv51@JTQ_D5%(~6B4(~yU!Cmbyb@QFVJ7_*t|R{?=2ha&X^ zoI;b{YMu>bfzfb4k)BH&#$-gquH!($^`kej<(kA34s$ibFTea|U{L(F7c5AS@>UA3 zl{yW!8v9SUb7;cy;bN|1kXWbLmt-l=H0K}%rVye}E6=VTiW3VSqioQJ;`b|lr|Ued zQ5oh}fO+K4W2ES9BC$h^7sMd#sK0+5<<4#>`PV=2NPtxM^J)3|Q9UUC&>$`D zjI5j|xO%lyYh*eL3>7(=f=fT?bz9ZKgaBV19TQ^=Yd9X%8=$`vT;uXw>T=CHkBuZ? zYx|FGk^oty3VMmSX8}&mK}85UZf}0W)(nFIuYNg4wXln;>t97`|CI}O=d601in%Hd zw<20R_#RtgX(3icV8(Qm0r;)eV52=%K-DgE)54L6LmACPDTlZ+@I+0!8e=6vLQG;V z7vK?jQdsC-uumK>l;ejm_DnhUeCXzO?(2C)pJ=bU5cv`eGMQZoNrfSt+zj=h5$Kv& z(4BSg*o2-yHYaH2ydsPfyu{FBWk{!kRDm-I36A64LX7*si~>ZHRYS^~_;!!G-%iN_ z2S&6BgMBWgi84KvMSDUg45gI@>?PXISDJN1%CPD(Ln70ALsr>z{#IDh^j zPQSz=#0S7|I8D~8P2v$uV3D0NBh?H9m2rs=TXUMhBQEXNafdHvGH6gS?bSiGn(V7( zZx6!Jkb5p+Kwu5T@{CX?$4hW*fKa#+;l%d`!?m!>%!_sO%t<%atncrwj`SF|uZfZl zg0_JMiySd{z$jD`u|#Mzi`PFq!EmU+{EL^eHAkOv`V!^MVbnW=Ge-q1+6JqJySnt0 zQ#706uvb5@1O;YNM-*%Y8WMo8SR0G_OlktL#N1yD<;v91pCA zcc{!JUtZ|a&G(^g9x-n@wsN$m8eTpxvv);BphKL#G_vgkr9kHj=Y_>WpPV~f={)Dd zM2yZHg&K)J-$#z_^tli6uzB91HMjop)djq_sUbK}2VPVHhVCE`eMo2($1X{83PW#0 z^;3l00L~i`2@TDNNqZv=O=;R9a%n3L0%>Fz$AcqWDFI-jr4?~z2E;Q4YSWRC!NHIb zH@7mL`y#fj@$JmV?H3yqo<5(*smB|?x}1f&#M+0H_=}$#PW=}W%NHB}lENZr8esKe zaFXa_#3D{_;rz{m&f(!OgW!wYIwW$@IClwIN0CrLLK?mQC8Q~ylUrssGil2<<7uSST|w2oX~wT6M2IR#&W*dw`~ zjbe0p%!EV z$nYKY@k&ytY>MD%@44K+xY$AOUIhfj=eFRm2uC$Jk?2X!5@x`+{!M7wPR+r!mMmka ze88Cz;w6{Tns8FcfAg%+AMb8^dM2R35D7qL^4Ocjo8G%A*VMg~SGv5$ YSulvYd{7+pGle33`uwSslb3G(FY3L}e*gdg literal 0 HcmV?d00001 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_model.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_model.png new file mode 100644 index 0000000000000000000000000000000000000000..3e8a73d0228380c795d220a1c8de772cd9a20e26 GIT binary patch literal 13683 zcmdse2UL_zKr^f-G-`zG1as)#xH1ROlUiEf|K)#Hy%82be`|qkX9aS3lT4~r)FGb zxVyjhT&tK28I`!6+cFzwR}ff~CZLEl-7+#E;kFJA8ZWgCTZVdHBy3gw5@?bw$sH?5b6`1JQ*mEd})6KM7^HhU(2VTQF;!Ztc3 zr*HrC^z{37>zY|Ew~7A8MeUWZZ>|acyk4)xW7bk@puWDIK9N1$72IhpUXyLtEidiV zT^=gxI@V*?z-+fR9c;=T@5{C8R+b1=&YvC3?Y2)@Q{R+r*StVg36+c~5iZob|D9l3 z>b|xk*9L?rKa`DhmefX_;$UPx+QA3Ru1pI7mm@hzAeQS9vKh_`AjIIKBSU7%J(Ywzx2Fnb<$K@w@N(Qa$IBxLtd8 z@N1>`;k_$)t_|Pl=9F@NzH{eJKY#yD$H7hfnx-}Js&s>r_r7~Y?YhpKJaww1wbc*{ zs*YChweNVhj^Pof99kXaEt5Dg(3s@Fs>{LojW5g&F?CKUDBRt1=1D|}Q0|$9g@t<$ zAJ$#=mX1wKOytSC)o{hwIMID}Vk_>a>to@J)!i(Y*F&2~xq zceUN*ak(d~=S2I;)OWo1nh8*<*HKra!$xs(dkgaqiUJ zyLZWJ8|W5s3ch>Hif*xCe_Uq=7|k;a^Fy9YK}J(#M8x-WYO~J#>}2YA(BZ3}A}mt# zH4z$BRaFix>&GW2wQOzE0)+G%B9C#Fp2%9VRolUVj>l~ZcsqeRnSXifrITU$*uc4c zIo-VWw1R>{O82Q1Yqvxg(vC!}E(bs;jFrF1=aZd~LKWF+SdUJb$QQ zHp9C8m=~9XT57>e;$YsWG8>PKUaQ9}t?_{3>C^Vj4$NLJv6jmI`t)%|;k*)-r|#;S zgHS41G2c}F2^XbiwXg8D&O{RJ=H=aY=#aWq!E~%~MHtOory3-hzAcaL}Jq+Pjd;68p%-)(??T3I<|Tsszr z^r50cr9yTw&%mW~6H=`AQ$?%9_={Zw6t7y;VVb8P4V`e zsn7T6#${Ut&p($F&L2~wF@+SwTAZUM2Ah+xj=9^OtK`(vC?fYdO9FL$%=ML#z$_Wx zJxXa?M=Go*2b%+UlP(uOtV81I4Rm&PYX11+9Z}QD@4a1DuU@TZIaIhP}`(j57^&eUhg~yTj>*N=jbjuq=jJ@^P11Ekg%O zMMcH*rOTGlcAa{d)sSh~XsnPH=F-KfrR&Wtos5`tnOm^R9XOnH{5HELBH2iLaWNOA zxkK9m>qcCs?dRvGQJZtJ=jS}W-ZwnYe!FO(`=WM$=#_VWQuG-oj~`&9UF^-nICkt< za-&sYGtCg!aVQgIo@eSig(lHd!ety5Xtfhgx$d)$ia!2NpHAOkE)m%+PrWXmiNJRs zezlT%Kh^4sOs?#;DeL9me3MltTrhQsB(H0CsFU6<0jE}X#_QLwo8+y?+UXWd<@fL3 zkIQJ;+hPl_{dtmN1) zDtd0+x^;+A|M2ke$jC_EP!Hwq2MFxGU+-@C@cz9G9a*7=A~D%y=gn(Y{BZlskmn*R z=j~9P2aCo?%*y&?*>Tc`WT$r!_H^vm4Jth!_*4A~3k$EgyLS!^r4BSQa%(fpKJIz* z_H7@2W#71n9Cg%f&2dO#=aETPjgrr<)lgTD;#r(3oR~*~&4N;i8U~zJq^5hFKKa*7!gplOE1H-e0dy3S!2!Y1?Mc#redPl)5#-c@akj zvyP+or{u(Zv@&vc+J8O0w&mIJmEJODW@b8h&M#tB!c%b7J}mznl19X1&Y2N(T2XP+ z_U!{NJe{4h8fTv7^v!HPUEN-E8>xx?Q}1rou`HiEQ>eT7!phfdl}r8o{AwDya02&* za^BTWyj;d%x-d8W;lqb&w8L}f&c)Q!)T{`%ni_5o5;5~3B?~Y^1sg-3us{tXr&%V} znzfv(Buzt2?Xzon_nGUc)~e{3dh__L-mC5hm6d1pQPo}Nz7|_ZcRLxTr+VW@3URV8 z3t6KTvP^7sB=C{fR?!c&1t>f^-iM4!w)?crtNZd5E6gYQ>q&x+Vbf~rhXQn|RS0{x{NdMYf+QUh zliUXy=#n0DS=8*#?-b&Ar2M0CN_;{><+VdGfd(Val-bNisydaLB2+kMtMA$y!hrL`nfe|?YsBv$;osYYSBl#|A-Cz zyzb1_*X`EMH5nI+Rt&Tj8X{C3=xki#XV+{NszKpvqAJm})8AUQl2+xTC{%@?nY(|Q zTI=cQi8A3pYLqcRiBE@(#adJ9FWVxX4m9U7$G1*A<@ls!;BvCMx88+ir&v;4d0_EyA_`MX}V3%-nEM-^Vy_-rDH^Mfd2K7-ZEzvO9r{JC5Hnof+%h zFD$GDm^a=V7j9l2BGFfy6sV@A#v5AT;53v!XA4%@Esl+rI0H@e4tt;!epy*VM9pwk7oU3UK7d%Doggu* zu-8RJH1 z3vQj0M;G@NbsoMTJ@RfBsRTnFlSk-gAKzZ?06qd%pPriPxwVZt^@_DL$gf#NWLd2E z=-Uz?+q0=2bir14#rVS}EdevlLmnp56Q3^E83pPaPtVTk=DUukwK#wAlAL5UWv6f2 zut9yuVtTMq&17_*5hGBNrW_)EQ7K%e`QE*IX+ZYL?As;Jb4U$eA4kaNj3>p%Kj(Jo zSjR6QPzo^md}d<6f-nwE4Gq8Iigv)!7dV`&0MEJ}9v%cSY}&lpq%26ZMAEVUNm6*< z*GxYn?YcCh5&<3_9v1Kob)YfRG99sTmGF%^q=Y%pn!cv0s%mI4*&6-ATJKzPniU9&P;}VUn<&0Jj3qYyz)oa$I>oBO6`kwRSbzNn_ zG?Yi3Wh>e3S+j^I-5V>|(v~h=n*8X|pDdcPZ0hc9;s*jD*;D8AF;=Or^4W13en;-n zSCil?YTt9#yLB1}*qGF>0JN)3oSbH5;j*nkhfI`dR*Gy}et7vsP~asyd&#BD-7l|% zd(0%HWOr>9G{`%@VXtyVE?27cNQc5ndHKG_r?{)edOr>b#VL>)PiP5BHF{s`mG1g% zySN+&RxChT(Pi5c5!vggl;ZC&aKkrLqzR|~ocsd>EXRE7mpNZ+CMoMYg}eTapO3$KWr#>&07{rB4q9k4ee7AFw6V`1RU1|Ah&ESUxFd19-;HB_45KxFD-8 zp?nGD`s51%#Z%tg=kRFKdrIdD)(ACmiCfD7Y&ZsjT$jHpaG&+C)*Qjl^$;UI%8S+o zvSmjolwneH?%la#?fx*lZ24wtdDA^W)1Bbrm9J~MAFt=$2jcx7a`XR;%HK zjXq#=45UQd?SCp;Gq}F(=9cXLTm<;fZuD2p;9tBv^LFrq2WydQ{NH~sXJcd2kmq6- z>W-Zhyj|4(Vhm)5lOG$+2hZ>SS7%ti6BVzRK%MmihVlMWf5;%*u|}sP7jN zaul4M7*I#T4g#_pr*Kgy&iuMXWxgsh2{vEen$1mpy?6I63iJJq>()IhEiKiVUAb~) z5>Qo2XAy^N4N@=Zz~#3hcAYqK-TLpS0L;LNslWbuf-!+lmWWw{;`5LbR5IyB_ zdK5}q=d}Sz8&lJGG;}F>Gj?vNo&!On?&u4s*#IjelasO*hH)SH;y${xj4!2J4|tXN zcpKWun>TOxgoP_;(!#=F8>5X=w*(`nj4a(&K*HcO$WcyPu;_=icekFfd4IYrEj@@6Wey z-@8HE%C(XepDsA9q6EDQn?61uw4^M0M|f*yq2O|4#FKxnOZ|%&`y(0af5OY}4Ycps zwd*+a2%?GT>4hA&YPnXofC?ETY7v0I?y#41nC=YZ=jTs)nuzCbDQA}Zc6W4`e(>Y_ z@b2AN0zaDUDsb)!Xh7;$u3VwbKG<^Ln4#x9og5JWR$tqz8>n^WT_12v4BPVoM)eLO zIJ$TMt`DW9_VhQu{%Q*C;IxX$WKW^Z7y0>q6WNb=xGg-TF{mTzuKD3!BCrC1GOU zQ+brAdDwv@!99EA1B`l;QVU4mkeaZO47HUE_0pBXn|1K!so7b6At7a;JL@rYu9G54nncr{K6qDxTE8W ze?WkFOP&jgq2|@A$v~BerriqUTUFJ^NsA{p>qqD1NlA}Y9>3@9twpYS>DO=PCm4lv z)_Pnf5SQ6@KDVeR+M!>C>mxR8_0ACuf0{$Deg^$!=Uqp_CfwW=YuhJW5PT+QiL0SjWxHo#Nx; zqchoC`RoQ%%|vJ`09g)wE%~{Gw&jnOA8`oVD=n@6>eZ{1-8*+is;1)coHrZhuCW&9 zqOt;vTrxtd^4**qhFY5GVUHd~pfQ?(x8$_n+GgHt(pCBtD9?@-7#O(08WVvhmb=8B zJ$p9t_3PIyF%r(h5yhtE^=k^f+P7pr<;yj)E8 zpxa95PoXAPpXYuBe@5Z(G~0Vk#tow8i|65{L!=uE(* z___f=Qj3g8%y!Dp)&^kwcD_Ha~xo@B;yDd zyLLeBzLVq^Nv_^abL||SXR(E$jj3P-wKX*&r&V#y?!=b%SOkiaHcGHuRQ#kR_BJ{F zA@D23`G)K5V4|@=)k_%%)-bOKlU7pQfK^H)Ib}^R6ukz}>*U0gsm)=E%7y zw~Kb%E>i85#D0p!Pd2Kd>fYl|v0feoJtaXa)xf;=gEY@}nQK`#{NGS0rq^*3sJ~)H z+i^>;$d-0v%s~FUoVK4X{o@4duP0$5Tco!_kOaTl zi0vo~7Gqizdd{;9p&Qnsr8TzMApz}bXU`2E;`W%jXk=ny(Np>Co|l(&s`foQTU!yM z4;ziHT#5HwTyP!C?j|r5g`4l-!L;&uv{DhMKVf5@1Cw}>J z&u(E-hL|pISL7;EDo;$5@|ZiVGTj5p0sSzA+3j@0N)e1Y$%2{Et=Ghf=z_wp1yx?x z4WBYGKI!%-2^40QhaM3%DGzZC{hI>vIj@f(bK0x+F2V@aRitB9uunOQ|4~t@FL2K_ zE`9P0veA1c!^5e@xI82Qn2?!SM&4pNRK#lOvZD+z`!`jm_<>ZsB1E_{f8KtU@r5ZrZxPjd67LmJ@N@_uz;&r zfEUqV0n12_J$CG8QjQ}dcgDoVo|c@?EB!N!CPSqcFJ4e*Z{NJh_x<-TqY%J)S>=39 zc_SbAp`ir=AVq)paGGJs<36UE@9i5Bk__<{;iv*Mk7zpw!HsCx3iuXNGs(d9ODHdw zC%&>$l%5~^NCr>nHK@W7m|rL(0BZrJ5Q{Z!OZO;;#7nzP#79OR185@!Uav7NJ$-zl z(TbTpgsE$VWHF|`S;HdYO+b~Vz11+8aG0&?2L6z(=FW>InCzG^NsZD*z%dPRRek&9E1qJV*ptpy9};9lc{tveI9F-QZ>LJ$VgwWDant z)@QfkSe)$q`+;q*W}%ix)ZfJ#gO1%OD%)&$t^M%*Va}sRk3y9D@p28Slyx~Gis-LU zHk>o8>eEh=vhMeLRmOXWj6Yn&!D?MxU|@Pmv<0M!8N~Gl2iuQi8qBAqrIlxI!Lg2>8?yEd4F%}f zhpGB~t3{-{K~hWm9!yaz>XNGvP9l!|=SeHUVqPtiLVJ=!m#luo~LSlvel|+6Bcdth0twQ zhx^Q!Dn^Q!-1VAcu2l8>Vv%21m_h30wU{-NcVLKNn7Z(Lf6zprVLDr$K+O>NL+C?Q z#4u_+SP=dKM&3AumNOF+4OqLWf`P7|iwRUVpzsYGsF0p@-foY~wXabEFAFz@!$g+< zy@Thk_~$<}{QOUQ`6aik_y2&;>YYG!h&hEgLSXzL(-YJi)3@eC_JBIo@w;k5gca0v zkqa*=yLazKY{u6u5I#-jI-m%h-QBF%U}`m5N(v(X&k`ab1NwW1Fb#5@9Hc?9*3vz5 z=D{ttZLwoxW3JnyKp)_csDoC?sx8dR(*wZ*F;PEzwo;?$Uu7$f-q?Bw42g^S@%Ikf9Rr-z=LiL|L~0N;Vl_*-LiX zOIG9>ppqW^blCO_fI(K4ILbygy}PqB!LZQ7VUU5ld2vhYzyT0>i@ozGWQw+OWR(H} z{oiIxul;jQ*vi6O`}oLcS+s8hMv15gjVV~AGsnNlnA|d#Q3zuRiHWC4L4o;6ZvZX~ zHK;#!fgISEN3_wVPd~X(g`^;EJh_iAxCAeS0YhAmmHTg);r^%P#y=YQu5yTB+zT_8 zCNY$K{+!`5+9f&r-R8{%LaV|c1A&ZR>FL3k>D5G$`Wf7L@7`1Dv1h81wbKc9T(WG% zKIn@mdGv`?)N)K}hz_D}S0-AfCkXzv>MEOkr%RQozs}9xymD^470kpJCst24#&8NT|A3_w?-K5c7jSzy9nspe~0Q(Ijt*8_}d!usJSzO(?0CQtHJdYk;l@bw;S}A1}PkuDYW#|g}L?rhA{DuQTG2r z8vFzJG^#yo)sUfpu@Mo_F)+-y^4`ZdaU)pRH`wWr5H3Xa$#ml~wWCLWBF;Tb#c^u2 z4W0=69Pmt0^Z`Ny0}JP3$Uq*N+2P-(KN5gT{gN(lveiItF)}vJX~1lenD{WjaT~+W za}3-DL@`v)X;4#FABVQw)9lnrV@i~ilx*VSN(Q-w)iiYRpXlP-Y8zjcMf@MQ_=W}dECKOaunBdS|Rov^=*>Kk`qvRbO z-kHlUXLkf02&r1pP>il~>o23Jf!j08I*+$#8PJ%!bW#%%s^G<`h4&&oY^7X@I_5Zc z?tBY7k#}SH^@{=})o}XNLipzCx6sX6#m46E?d@H>uMCJ?THd7Gw6(yUHt+84PA;G` z`Ma7Tue+a>|yC3G8*U4$HNx&H;|0fp(Iap^F(4Hgx-h~M6*@|3H%5}@Gn@(z3bme zh<8y^!M_PjeRF>^F&Vnf^u{^BMAP7~3L+%DSs@a3Qq+^nvYvmEtUxoCyPQ7H+Dxd$ zhQFAp|3&}E^t9OnkKpm6IX&!U{@Pu^A^mQ zFZ?n$ca4o(>O7;_F&K`BBoI~N$^K?r+fsOADWkCk&1lz##X{fXou(NTOn(kYnKTm&Q_d%Oc*G*CzTyRI7n2C4FMHC(1i0 zVJD^KIwOXXao(=0^aZq41M(O!onxwxv3Gms#_Qh^M3(4q>+=w(*bKN))tOC8hF5@m z3D!Z3)9ua#NTM0!>mWVKwEjd%eR)WX@<0uKFPsbOYcUTQV0nxNxAvIG&OU86T4G9w z+uQSWbAMuu1xf7DDxC2jEL`-Mo;fJTre`{(#S!tQG0P^3C|1q6P8Oiy1hf2RP*`(a z3Dr&~d9u~hkPuvu-#AQb=CQR}n9Jh&&pCg4ARtZcxt1&`u9_z85SDg z@EY(;0Di>qNyGzec@^9%148Srv;Fo==r*L_z*|xcZeK&U&{7JM(k8b1ToKA)coIxs z5DSIVkby)0Csm&s!uA#yW+6qDOf znNKvH2QJLoU2Vy_jPi#gluy4{BQmy%n)@}*K$@{o-%ZI|RY7=5)Nm=X=70a?!QUp! z4=wPg!XoetX4o3HaBT3Qj|VD`VQW88$(_WxFtZ;2bQlpS!Nm>~nlrs-S>;ar-uWh~ z6zpU2dzC}1>0}!qQfWd4@bX_`YKCpdI|hObb4>5y;9)3_#3}%DZFU0`HW8PREAa6_ ztdoODGGAt@Z5iWoXIEDr^y#xWYof}aT^E13rput#vWYcCynHEl^27;K96juF&c6CQ z*Sn5qEol=awoHiDxpgQ~L_=Z7c+1Sh!x5JR6F(9|Mva*2aZb>=&--LpwaO4PIHSci zNoHZ{G8rquS7?3Ytx33hEb*hkNl2Xb{;){k1c|VDb>{vzl^x`HcK>~{j^Nn0csW#(h}LNr!PV!#K(ssthZoF4oELR5~^Pznh-?h2Lw zjM&5*8++zyB)r4~>61D`RBP;InuIW?+=jN?7>GRZMY3&|)Zo`lhIm74(-61y8=VH5 zP>+=pAdROSmX(zyem%@R{diG_6QKp;dg?ZmL*T6U+-GpX@Hf65c|8n4z#gpUsar^!X72;e zLMrg_UIj#DErJytG#0ez7@CDGKLnRQEQXhMHds;f*-AP2c@PO&<3t23sn3DNb!IED~4W#0v)PkWe0ObM6X2hOj87;57Qn0$5x;Pj$20q0?ZEF S#c+c{Q8=l5BIVeHpZ^#CITcv| literal 0 HcmV?d00001 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_optimizer.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_optimizer.png new file mode 100644 index 0000000000000000000000000000000000000000..2cbd7fe2e68c8088f8ac6af4483e324a3457a602 GIT binary patch literal 14784 zcmeHu2UJvPx@}=>QKS_xAVRBT1Ox;G2_{gIl94PZAUT6%6Q~HH2qGW?lCxSM8B_$6 zoRKUdIVn-{+o!wlym>Qo=id9)4QsvidUaQKTUDq2^Z#Gi-`@M&Q#^l`ijtm^L?Thi zoI9gTBCQi9ktn)<*@QnaxzFi>e~CCqU35^fHg<3}urnel7&zEiSUXsl8vf>FWM^+` zZ6&}X!oz>~x2p~gHuj>tyq16Y0FSku3GadP>)UXVEjH&g?MWnR1L7ZwM9BnGT$o$t z%t_U2&&Iplbkx*V*M7|J5ZTOon=9e zSFhi2 zIqKy4+3^kyCim~}1pSv!qB&v;9-A&CqnMq;P(a znaM~~5|5WPk4{e16K?G%KLs`hsVR`Rl9I!WDxV}-cD&)S^xr4!v^t+ZS(FlS+&S5P zu*U8fgRHD<^4Lx}qpmzhdj2bSwKA_p7q5(4n>EJ!pMQ2Nn)t4DPm$}_du%gPmAv}R zpREgLpUjPSBy6E!rEQyE7>rCYZH#Zd=usNrlE!-Bd1`l|8`t5(Rb$G@;?B;_;?q?E zVaIG=_-xzsePo_eU(YZaRl7RRHTH}q~Z)8SV89@o*deo;Lse;1bBJUS1n zja90{O4n#|z1BPn=eyk&zn4&RWu?D9<~;FCOiZleDEVsq9Y)u4e0+Sk%SX>W7fs2P zCk4xmIHLS0Z&hcpv$NN2Q9k9;aAIUOAxy*LyT@N6mDvV%3eIzE_7W0#22!c_n3 zHjDNwbB%j_Yb%pptr^9u$qUni`5VGdkxgpV3teYt-f!hS)HN_U+@Nsku4z z2vNtQm)?@%;^Re^RwkT_8xal;Geb%ta?aj7#kWAJRuD*;gGdY>u_&{V3a~mhWWQKQJ)z<=riM5xZEeY^yZvT#C$N zR{A5Ct`{u~gdZGSo9nQqV`NOn8>40WteZZ+x}0TD;&U)Eg?VLAa;;w1xr^n(g$vE; zy4vEdGwgHXmY-jq6z*{+Gi<-JZvA?>0%r5<4!XU2lS=RJzCUSSmW6Z;`BYomivQPr zGd0(4o-tfPE!otWYoBFUO4*#Kbb7~*9W}H2#NFmpvF`iJ2YJNio@iUub7yxdfBkvy z-o2^mX)fEc7^m@eN^0uXB9En@(9q%6yquh}f@V!N=_w0HgxUO_mGhEo%VcH_9-i}9 znPQ0#)hx2%cy&#Ub7Z=m!WVOXn{k7!mZdW<4#m2~{41Gz*ktUhRg6Ri7)J*AZ zWE1n%MCXD55~i;9Z0-dJcpvZdL%vnft)x8vAnrNEgVbacb?(Tlnen8&iXYU=Y zmzQqf29m70@`jfti=7$6A3ciJD|99PN5kE{;0dpOx`0W|)y^DSsTwj4#rw{k%Em^O zwdFxcB8*(z+)^$?O7Kt&#so+-c+7Vt?6b-_iJ)vq)g)&kJq>&E9XO6LiMy&U&5d*Y zAa9l}C0*L%V{lmeP1QryF{j@5-i85tk2|F*Jt>yHL(L?_yceZUs3zk0phfMcPkaT1 zh5o35`RBwxefm`MhPQaR;o+6#>BzN$uP&$*e%h#Uz8=rrr({kC{wUq8pYzPRF6t!D zip0{G0RCioy7qX-;_Rr&h7B8NkEb*DwV6b6Pf0Mkjq!zeuHU@F}E|{2T z%gXxV!;)S_EAJ!NZaes`$$&4thE7hJ3VSXn86jI@-0 zcC5Z_^#jd?JwoBtKaq{Pi}S;cOO=C*JBrfoLEA-IskNYkgG z_N)|gns&Ao%WCTG`S}49ARp#K5)!MzJ)3P7UlgW_42~BW1qh;CqMsGnE8=a>yfo7!M)Uy%P%WO zC%W>ve*5hNlh^VO!H5j6wN;*sTQ_eq%d9NUay59)4Am@ZOPsoUSc>q&Ltf7h1`R8IuLqk^= zmX=ZwV2E#x6DO|6%8fcu`qK-_Vlx{@no}YyE1z&bF6pg%c;Ga%%w}rFCg&cH!R6_k zW$`OluH;k|1kEt==oKXH)^}GLZ_j$%Wu;LaCTNu1;rQh(MM&5itFGiPUoK^tx7PIk zL?`;#Kl@<+afihjQz|Bj&bauSH`jI9*NUSOhIlUY?_NQf!GCMqlN>GOSe6gie_Vac zGb;0<;x-*tcJ?9Pt-h6~QFQKGR#sLvqCWKZ_p{t7iN-yvq7$_sR~&lZZ%or9OQYkV z(`zskYm$vc-G6?kTVfEgI}`YAa#BCJ{q^hDo_)Hyq1oBlmFD?fdWK8bBRRnVUplUt znZ^sFp(f9jR8@n>;#^#EyLt7Ra7=OqA7+b3FEJ-6#Yo4A4(8eqGJopd{l>v`zuL8H zIS-g5(+LPsQ+Z|M#*KNiE!q*~GRnwQ$4c((t8K51T)#a!8x$FtLC2*&RJ1f<@5#t^ z{@Fu^;q=rs589KOJ*Va6{dOI`7{wOpu8hW~j8j&Nk*4xz6mO4rv{%&4QQPD-=^2x6JY~oh|jdtZv47BQdsTmr+xOwZ={pA-N zNpIBCw1NlH)WSY|I737Pup0YopLoxeMLT=@#=7X!f=*+f4Figoce|KQa+=je-H4Sd ze!E6=k4)!DJyg!CKfm|Z%mwaZ4PX#azFg*4zq+y%92S=B=eNzDLFDyE7sJb!9oJUo zcORDy7qax67EitS>J-km@jh?SP=n`46GwB3M(sj-d3pI>oz>QUNosG$>PX4r4}!X$ zo}M+wYh)K)IM4T*XkFX>#cYQai6&J$`#cbn-D>1WsDMeP=Zfp>SeuHKl~s)=?x0~k zwOCY6&M`!d>D%iRH5u&28hpHe5Cy@PL50=xia(kYhT}ehNXdU>pY!q zbmu!LLCN(oXODNF11GIH7n;;EuB^@%w?;U36HRz`n?Blpmu>le+4JYs%OC8IAfM?! zfb2!7Y5=;^Y2SLQw4ZEte-}5W)3f*5k?1}_*audHFm^Gq&fH>!a3Qm+(-fG6<;Vr6BOM(DT> z$AttveylR4E7n&Yppk9W#hmmZU!JG+wUJyQ0F5iU8xh6qtgK}VzZS>Ig^b*ymZG6Hte6riT6=GD2TI~DvhQ>xgqY7F<(}puRYTLQty69UiY1#=te_r#xeLGRwpPtT> zBSZ20`3H-0<0)EMW~LYv+6 _2pgKA{;-4^%f=W_QIu7k3*Wpl_J4HPJMOfTgsG zJqzDhwArZ*W_OHJZr#fD+Bg9v->mI*+;i6vj=eqytgWrn^o#X?WlA5YWax=g(H}jx zWy=<#rwh7FeLLu^&#jet4mfC}F(E+OZ%;Hfe`sd7vAx)Nt8itmqgc~=ZFMF1 z;lrpmmL18L%O7A;AR4f*7_II5+gphIL7hG7vFNa{urQpK-Q9p<$Tqwtxh@)wKMakC zz&MQw7e=YQe%=Byo$U*dGy~jR1j2}jUdSDM-Y7ob8H3$7Z-|vaFKCtHEsVu@LmRpf z7>hKlsIFE3lv%Pa;{D=VrB}aAmCoDw*%&&H>WXg%uI^cSdF$j8;?VqnT=yby1abKF zx%MWP8{-vN7+VWm><_WCCss`kcUTt-;u}^+E!&%HoSlUU2;er6TmJc_!RdRG=Gg5r zB(XojXfM%HaVq8v$86rlO%69E_)9MLQcI%}Z40j+up7KDo1;B4RXdBWFUY)Ca`k77 z+|u{n5`U~bCv#=;sduD-jCQZCxVOhye|n>%rVSs6AG9j?Asyp3Kf&YdN!QIii#{7N zA|SY}#rOSqUC#4hR7tPibr|&xTiLII^;gtB)$3t4Vc5!G* z6|+;_{eZ)<$QyGUrxfFZUxMgD;;wU!;G{%dXA+4C4v735YVnbL=4B>oYHEal0)DYM z$F|ScgVMo~2v0$)&Wi~5I<&~y)uqW%tg2OiZLyV-N+@s!SZ*{uf8yZhD4k!eaA_hc z31k};^&61{$-4P(+E{wM#IP;FO}Or8(@_2ez9|j~385FV80sV@Cxfbly+Qri?b{oG94=rmDKISl{Q07hiOKQonVA`Lj@|gMtzfmN z?X68z-v$QMuRIw*E0Z#|=P}vi^Mpq?8ST2lDqH_mMoI6(lHQyFSKPX+Alq~2 z?(U?a`JFpxE_9pcV$63ORR_1AsHCJsr+E-N+xCMO8tkpj9<~ExjN7 zFE{jnN9nD$?p9nh5tjG;D6h<_{2Lul$LTFIikqO$VjRw3Qr`@q5Iw*)%(7Yw}^JR3K zR2sJPS8+~e&B?*UxCG>^Zr4wI|1~Jnr;bx-`$=*kJjes*Hvm(nU*6sbVXqCzH zXEdYx_miN%Dmprrl50Z$`iQ2J&Ckx(FDFqMhmfLfGK99(j_p*GshC>o$gEUGWCM37 zW2}&K!wW#R(iq^E^)woIBG_#O zH)qE0aq?BQvAv7s9_dGKPT{`5|G7?A3pZmlBRE>37MwbD>K<7b#lE7Z#>r}Uc({L+ z5y91L&|anRx4A`0v@ERp5h?|lo1OifkHHW+Y6fO;-EVvDlpMHIqEuA6o^+kFGBb1^ z#+HW4vhDA3o_Nc|@BPMGHPWyZ;FF%X=qzdXlVr51}{Kp3x zdO@=w9I#PK2S{(VyGgaV_x$`+)zzQGUx-XcJ>0l?vw2^cKWf!OW}_=t*txl@+lq1U zvhwmxmjlG(>*^F2e*V1r{mVPo)rG-o*R9w>U%pupVdF0%!mUit?bl!2U7MThsd|5q zAklKfm$%P$|mo@W;voLf(>E^6B%vHjznljpbRI&hf=ePcT4h{~P-rioGalEv@x?D~d zFy5I<#!1KKPvprFqzc{Az`y_}8(X;%@JS0Am5Ad=5HhBJaB#3;VsJ2_s;bH`z!XH% zE?&J<05}Y2j)X)okvGaiFgYyGP0IZCPEAeOS)mj5FG>*6uiQAcj$n?>OOE(v(h(mA zYuV?%f7h<~nHdLSipvAUx-ZTuB&3djt!YKtZeRdTs>%!v-$X_KvAR0# zolh7slF`J1a(a4gKBBV!%UWMy2->nm3B$Ua8(#7-ZjaaoK7I_UH)e{58vCXaulL0F z-o1L@Mxt!Ie0hXhui&Dosj2US2TvS^0~@d@neGdAG7YF8%@`W8tb6oOd9+>`dbgzO zrr?Id7SNGT&>5QL{L_5M5u0Y^<-PEj?LrH#shXnBOkN`p!U{A}ntJ0Q2GD74#^&d;&l(X;OV4%y z*Nft0cg$I6j9iT!8?zW3E%Vj~rBchWTinO6%UX)EL z@vk<13`XIHATFq;IFcW?t)vd^3%q56Mu%HeVP;nD(3Z!4kM^UV& zLMo}G8lxW9>+RSuyLUGYfzmi`qtpDOSEKF1=sQ}N?|ioHeSx&s23?*|sjhBoYfHIp zThN_5chEO7Q67}{&tsTO!Yq^r%DB?~JOTdVis%aJFdPsQIA|u0W5<4mJy$Mpoi$T< zdgLUoF;X9MXBW3t?A^P+#Kp#fTsCH542g(H!-frGq-nTE!DC_UJyReV` z=+&tH%8(T3d9CjMTDQJeF8=e&ErNjQ?ChNBcAGSR>3} zWgmR?S$J{rnqV_JL-HH`JK@laOuGwQIQwp2*pYBPLfO-^XnA>Ar+sC48On_27~kgY z40S3O#N!|fsR)`snef%Jut-H-MQFRaF8<4PGIHCBJn{*K-Q)Ya&1OACg`Lo1=Q6aj z)8+egi;T;U_!+kT_%=9b{_)`fbki`ayy1A|fJvKU-)xRms3NWsiz*nbKm#X$Qr4Zm zx9{lH--Ly;50a0vU3h+&Km3r4NzJo*P}RZDo~7FMmB|EBQc_;o<3mUxK&kr8U0OH{ zEnK5wwBy~lQ$9{V=;_m;olk(H3Hgm~cyf3+$+~EvERGv63|mu=p+4sA+qZT}=%cFE zh^T%JFjWwK)vjJmeEXQoH;_oJN&G$Bvm+@Pzl)l) zvubvBuR*jCvMzuf=fQ(7sy6!V6_7srfI$hN5*Qfh03qY}Jh;!H#}_0xetb?+NznVT z7bEDGMU?>?n%%n-z!#2Ywd#Q>Z`EI&+6|mD0(MBl$;nBoiAX;)+%oF)NOxgAa}p?6 zQTqWuX49d%XpjU(3+gBfiqiN9A!GuhFmwF&+lk}O6G6=TCES08F^%DUn>1|rM8WiV zf)ER&rK5{OUL-qDcKiDI9S#}nL0aqF$BkuSCsBCL1p*}XkB_H~cVwFryr^~#%3hW7 zfQd1r?Ee%6nIy}D)zftj`s&@bsL+g9*t+N4M%O(B$-sPj0 z#V639gsI*}BEUz`-OusfTTEPh7OH>9Sp}jvo@3Ae3Y*0nt4c%>6SQe*aj?B4piWo*johPcYy$HCAV$X^= zhR&yKW|jmCmYN&J$Tcn#yM?@^qL&4ag52bwkV~0W+zY=dW z>&|w81R|;J=*5*TQvSPNeTRElQ88}ZxIw7#n9MQqsT=kX!GLx?_2UN-bxm(9wO3$S z$oODJpihmF4E@Ak{$xOCk&BGtu2<_{ND&MO1_D$G^;j>$w_w$Ij8KN5T-PJKW?>9S zhC)qZs)30nNWU{w0;=tgSAoC7j<+Fz9h3k^krHHzGaT@b91flKBAaK$~oz zMIHVi0i+8QWefk%b;rveF zp1lIbkUuMP!Iy=|{r!|ziJq*0+-pH93OY~dr0L|o{!sMSl= zXq<@4)RjkV&nlr#$p(r#3`G&+GAtCXL(zA9$4CEYBMDb=%>AK4_LpH=*tzosf;40f zFOCSeroKHs(P+B=f?2yM8EI%@K*4YxJ*sM8V4ziS?G+{i+Wq^vhZ|Vpz#W;wjS@&$ zJ&5LShA|^nuC@?{B8{2PmjgoQejh>qEyViA1Q!)`0=EZk@Fmf$;6*WmMCOXp@&6~d zzf5q*Vig<{p;1>rpuve?oKay>M{7$0u@5Lrqr%(Z`|zD4EO|Zm`EklWh_VSf0c13#ddU+L-P(ZD&YC zs0Nw&A@R8Q`IT$Mrsb0s2fuwo^vc0J0H;tva-LG*Q7^MwZ7ih3-_9D(3FL234Sc=4 zDr7l8z7X@+4|+7()z+llR?Lw82o(T0#TPq!q>uw8`Vfq6Q? zQc}lPcYYps0-ejw$5$UG&c~Yd zvVX!P7F|jqLSQX$nRdYUMNK@we6Zl=8oN=~5=BAuK6>=1qN1V!(pWG&ED-N4J9CsM zC@2DBF`0*$bRUL{Mu_6jMIiuRG$Di_0CvT;exjp&Y#Qu*dSkFt$Lm@R)g@iA!GT33 zZcrfDgdP7CO#J`sHRY*=|8BC`xnz9h%2fbrV){30N{j}@sHzu@uAy~AurpP!P>Y6D zzAlJEg@8oRkANai0q>R0)mB$G!?u*?vm ziJtu%E**%Mg@uI!B6FO$n3&FSr!n^IY0!SB@#&iCo}S{)GPGT_0Mp(9INu0!N@QfD zTs~g-1q3R`#s0bh2!RerbcOVB{V&|0dG8-J-ck(bn}r(R_&2=)Nbq_CKleWfWZ@)E ze5WhaRg?4I0q-Nq$J)qe1A%8WSKtL}Cz}Ld%7RtIq)eMY4}M9#tSAx33=wut0vy4pSaBF4ld6(X$&qd3`-$>kV?gLA z0}=vnR`YBQj#^OJSH(kdVxtH@IkJrs6j6_)0*Vnh_J7-9&El(>)2(Yo8 zMeu@kev*(dm{q(*)ulSJ{gTrb66eQ9p({A7;Ml{(#pdeD3Q~U(ffvMQgZ_$2kMvs2 z{aucN$`ZpO_hbl=2ks|cJ9#2`?Cm2m$Ye5$?&3om5B85M&4j}D>neA#C+(_ci1GQd zOdER`Q$Q0|!j2QyRa<)@G&Ho7@iE47i!}%YgoPAouk`5hpQJjS>HPeBvCjwvN$p0f z4w>tO(Q%A)nEBH`muK3jz^$5FAha#!T!-~6Z+f93=6FLx!)F=T5W*(~YFX!p7%4gO z53Q3+Ts)$(=6We`6RZ)57*mSdS~G3_Ht#}Dp(GrB;{`8Pe_x-<7$4x^_B$nUX%NFT z^p9QVtZi(3Y_tAioptqRTK;h>j(!zCv-w*pQnkVjV;O5jtnmxDtl;hDiF2`(NANpm2J+)=L9 zdm0#6;cQZfl+fL~cQ57k?F3Yy`}Nn~WFURnxC|&nrL3v%`9QuBIwdCE8Z#Z95}6#l9E`}H^1Em`l~stxVRW| zGu^&@DfEIl4co5^3X_s^^}SZqs3m^>_HNU4nl*Y-V#|vcFJ@qT2-n?oo%433GATI= zXq|APf_GE7vwi$Dt~ojVI{dg%Rs0=~WD16tCi0e}e8*4uxVG)Ofw=!CK+`Z6vi{LgGlO z=8$6=fN#I`^~C^G?bcK*u<8#^9`W(`5RZ|4>k7dgS^V=xX52nbMC zQK8$h!@$XfB>$FJq!ze>S#0!3^c}_qD&fv3Aj4$L?5+c$CPa`EE=J>!U!nr}h-=Ep z$*Igwbg{?~_9+mW%ranqnk$02st;3LmqFc2&VrYm6Xv`?+FySq+-tId+nFQ|v&P>T z#krrzavF?qHiLyOFg7ufUE$d=CH$QFl`F9z*ss_*lB)h#6F?!@Dpm48T(~4`4Kc*% zsFtGs$B7H)o+1VN+W@y5_J(Ye=v0J=SDqh7#B5Yw7sY8voCv{>v{?wjeH32pR9b`~ zELfH9dh$&e50WEF?oe|H3aY`2+k!qthq)NmGf3`S*d~Am!ny>a-P|b&dsKyJOGA0Y zlMc!7;fyvE!7~d#p%)>Y%LG##*(yNXHFz><47^h9>({Rj4h%FrB}J-|Ce9oZ-7vwU zz{VJ1iP?-0q7cwm42?)t?~OP}&F#<)lPK12O2pKI0XL9¥S=5luX%0Yb~n&YQ%! zkJ!5T(TrlVQeV17_+&ujVdV)>%?N+Camc2(8xZ+V9tH-!$jm$n7hP^<4;=BVY8&1j z7W+VbBI^mPA|MKf!>x-r=GuhC1~=ubzKq&)B1;Ht42HxJ@Z^NO1W}j(l7JZU2#u(% zpW+C`?~w|KT!%<)dUg3Mw1{Cm4WJ65Bh#pI7t});j2QnrNKkz>&_4iZMF~SI^TrJu zh7iTVnt}m(~&S-GX5`R^M|$ z3ffaK;Qkd?+>$2GAOCsP`dY=yA(ub4hm zxd>wpbOU#V%Y^+D8xDPdBP5KZ%8i}u=Mfjzg>`@^Z9p@CBuBAN(Eo!?8SwN4p695I zBc7%qp2h`nw~x;VEK+sI56ACcPIBwyC^C7>JOWdS z%o;YQC!|W;Ol~|M25mfKak@EGlaJaI%}e9t$&=oGR9~`xzZAq97#|2*7;ZZRTu3}v z_TQgJz&nIpL`E6!&34+?ngZN{{z|78wn|0B9WMt}oy%+rzc|>onpW&HJ*+3hBuBV{ zVc|&D$aoI#cRbM@@3B392;VK(=41#Pq5-+fubWWNu8`Y&Exda;pNE7b6fgJln!$U8 zP8x&7=eqY@J9X+NQKoQ4`h?3Ed>8DPl`G_pilW3u4iHAZOw-2@ricu_RCU z_F<+?+st_F#C)fHz;=dXUl{uEC(R&>XBI~@M&Zp1&d8W-_IgN4_opEBUqL(YVeZ32 z5!_B|&qVCE$&{h*`CY$G>U-`sVcW*EcdzN0GiQ$J?-p($*GiZZe)I)b*lVT1C&KAI zX#u~Oe?8gc!r&-&nI6m)pL)8EDV`8ukZ}L}@Cq{<+wLS=?43_Z?-4B4n0QC*THVVw zi!z+A&SIBitFAP#2d45@7H#~e1788XeU;op3ih?$xLMKEZ!P>4$8t>q*xry85CLJw zovlL4EukrxHT_(psVD3@_r1G1JBc=f0awrV*|%?3h{0s*uETY(b+XDVf=EYAfylrb zQiLkSWA25p$GKb_Xr~C0d{1@Vt}$7S3l$YC=2ciJq~^SkiOKrKMbL2+zwXhPHVQ$% zlqzFSAD6xFE+8P_h(~oep$#L8YUW@j$5jdG7mi<^@}@NH)`cGQkgxYeZ*bm3f3EOH zZ*rZVNJAw?*QhblH#UwZs9`B-Y0=YfF(BwJj&wCt+fLKqqZ+=PDibA2gLH?|W6^${VP%daoTsW&} zA2ihFbX7^|`|@bjFSjGqFFt#G$k+d+y##}VW z#95)=9CpiFCyHNLUrxL0))Kc_TO#*~pXlOVTz?++%GGVgzPNM68U>&1jucvRRh_ES zP)5_WbzOBW^Ml%4#k*HfC|55D%i7_-zMeZw!C%V`{u8g^l^-iBpI7K6SO%Wbd6d?! zW>ygO?VU%$d5EXCeTdqw2r}hJRdHL-O49=CUp?K1{siB17w!F~s%%tMhRy7}?v0snMh9q`% z-QqF*^0`jqt=SpMx!`TPcEu!Xmv7!x2u(ul^tK%#g=87btLa#Q`VpCZwMK_oZ-4SsnR?&CGc9FO~S6- zAx2(7VRB)-YJkIS_Hj(!-41uj>)BmFKZ$18U_p3HS`&e!jYo@YIWzOcOROEiE2{+6#xpz3U5I=GFUyv#rw{Ck<`- zYAV?9Y z?;G13Xu%scY>>Zt^@W%heUz{7R%-4#I>!3mnur}+w;pnHbKA_ocO}nu$hShbZlbR) zcu08W>s{YStN`su8=c6^o2aCKa&4uaiolp3Kg=hGTAQMTPM=me-Qk=TaN3MLuPwQZ zqe$JZ!zF8Kq+=v8?dFFK&BnZ0{CqXZxMEDGYya{2uBWmS^Bv2Y2M-=hZ5_%TD`lJP zPe{pSu={>$DsQAPK`ufpPU7iN!_Tk6OG``Bt!Oe{4E*v*YFWzf9&Bz%ROP~2T?%!X z@^76j!ZRH?dNlsVy9b7~kq?V~_NQcZxaO7SBz&4@l`tz>%oMO2K8|Jo@cw;oUrcDS zrHqTSGq-i`m5NYNEoZ9zK0SAVo8R8v7xPs`bnrV)#Fi-sk9L*>wk*7m4H0VQ)~|l^ z?Af#IXH~(1O_xrEy3WOM7QgWH+|=3KtrIOBsN3$g>_YZ%yqv4*1p>0kQ9aLYgtxuD zz2^~I^2x`$IC!0=2IDf5-+enNCntAQK%hlvd11V#V7i^lM`SK`baIl1g@vWkv1noJ z@k1{!p0j7q_B88-@{Ht`K%}YPtdBT$^&HURry|(rOr$GG43pRTH>o4R|v$-!{ zzMUB9=$M(E_20v#`{K(N_3<;({aAl4J#UARg6ZVg?cC z&(_0VnxkZNb)(mB*l?;z%dH?TF;Qq}!FxC7Tm1Im%t}d)f?LMCffAQ5pDLqroHi?* zjn3(d4)ml_spOJDlVrV^{CQ1UUWhX~yzlDi3E;Qt*^S-dLikg2a(e=dJljl9`-m=O zJ50CPRe0{$VmvoFwuv8$giIjsw4)!lX+$R7x6M$orv#$e5 zuP#b*7j0m9fhJm9Qc^N8*j#s9Beyc2$UQv_)re^u@SkNZ4vS0R!?Q{!12f4-n_PXZBd~`OLGb9*RSU! z!JcMZA1j9<(CR$fO>L-@7883Ar;-xVjE_mqlEq>24+;u0^bzLtu3HoK&Vw%Sx8Hu| z(yu;19eea>Lmg^j9d<#l`5|Qj*&(+c(c$FPW@`7~HNv?S&)HL9wYTikijTXsLmc~A z4u2YKFKA6N;|VT_Uzq9i5%YCe7%h=iOG!(w8oseDdT;52C^5x!?2W0l)qB z8|(7o=hxceej^PS*nd#z2AMhvR_KHPH7=G`aH_y&dUR!h_MTDfv1Nwn?VLT)qf_E+Zmm=#PuzjOC4 z4_SCCyWuaY-@bh_r7|K%Chk0OjZdp}$X3JR!;ZP7`H^6q1m&bZuKfA)=em&7?CdhZ zf_Aq2W+>9}`CW2qq;lV0wT}Dd`=6*&Y;0@|MQ&~;U$Se4<5?5cP;D~hy4Y775RK(?3 zNJuh*%xA-vEsaQbv8bO4C>ctb7T+1Yzkk1Z|Ni})Av!ub(S$;YTJnaw>*z{LN@(dc zk??rgk27;~{p8FQwr81zqVBpDj}L#z8TZr{QDYwF$sNjUjZd=r`ep@gh+ivIfEOuN z!qPh_JNqiIipr%+ze*Oww?E~I`y`~?{eJ1`a;#*@h?b<3RDFoB)5MQ=bbmnC@o@zm zov7s8epae&YnE9`$KtTvwQTe7V()zk2-*{x`BzEVw=!n#F)Mw>!tN8n*Bmz@r!<`x z6&>Bbe|g4Z3Hda^Zlv9?KK4m}LqZIvc5(JZ9PWaXlhd>$R3MATMazUYkd=)sVWBN{ zc)I(2P)MKfn-5-;)j{9i#|WjSr2*g^FPNy6ynOkxe1w=+kWGJ6N=kjbBKZ{vYe&?T z32M>8#l^Wvg|laG1N6)U&es&WIJH@n>~rk#*PEy~-4UynWx8SWW+fZ|TrZK;hkumM zz1VB75%7fR{NqQDzLXBy=Cy9l_nqILR9$@q@HM+dcKe#utAi}0VhFHso%!Zb*SH?n zASa_>^fWhjdLu2Ve?~9VMOj6|M#;~{aIZT3Y12d;y}mcMSFsEwrLlvbOHww~QPvnASAuC`4r*@_qNsd^Y|2!|}+dD1#qge^<(^ ziI(0=ZBlaiZiHh+FXEi7TNN@SU+A{%N-bH%4UpZvc6OUgaBDzeintG>A1KIJLEP}7 z0B20mq#j+!MaoK0-xMAZ)08%CwR9n-s z@fbu#qPx30tBP{rTtAz{@&W1bduK5(W&(F#Nh=}levBScBLcvu%<)z4kbm!V+Rk07k3H zLES0&V~^C@6jXdF{d#-#k;%?lG>+fR;V>S05oxhjV$Aq)Qa$#D)YFBh;$OvI|^# zm5QdO$&M4f;iN)<+|)cdEIxrs&%(?sUL`yogba}k2r#g`G_uU?JY$Y^ktA?hM#h^| znyhhVX6AueT4(h!Leg#ww`4r89KZ0_ffOYlnch_YA9wT1kvHStt2EO34 z(MXL3V_l#6KfjJZ^!tT|rT|5#&b8g)h zhH8_nwrJos!;V51(5XO_<ek21E75`RS$x6|JOojP_@uqM{!8_%s0?CIWOxSh}R2 z_1LnHNy6XXpPuvPz5IfL23#ZUnw+3N$W^Z0NHUUQ-m@%|R%OD-&(jGrX?vbKcaCsi z*3+l6N^MaxZOTq$9TOB(BV-Bm>kDJW4{%STH|{HtkU~L=>%mL*@tL1NrfdQ`ngkIH zeqBT&09lL>aht)`d z{Cv}li_1ifIH{@NSY%<`Sy@>p-fxrrSXEVJvPRmsF-2=6f6z;yyw2{%Dju8f+n8cp z>0@&pl}rwcdB)b{;O$b1iqD?hs;#SQn_8YsapMtooH!XC9!?N$x_-@DPe$PuX9Rww z0CJ+m{Cim?FvD`_6BSrVleh2QNh7IBpE=_$0hB{<6Tj`?3&};7`6+*#vyS)g-|xmb z*tT`+P2d@6N5|Y*9HHPzq{?+x14&u*Ad}mE{`tCT!K8AUX`4Wn?Cm|;<*0x9H?_5a z?M_2p;6ilR%x{0eSt3fm2rwEb6D+VNugw$)RT^Ztf}mB~!O!0kckSC}vUUqYFK=VK zh=uZ%E7!qZb#RMbSy@>RJw3Sz>6;xb*&l4bt`A`Kci*{*s5G zIZs7D*`S&dma>h3!5BqGAKHfzw!nIU4K1!c}zcn>A zO?{MSkjeeFL#r=m6vLst%;xZBF>^TUNL2sSV4@o;P@kJfb6x)dbkBfkEQ2#k)nrW65KTY_3hWKyD5}c3?ZvGQz*whD69YS z8W@&v1-g>3)6}s82f|9lGgndEANz2M;n2zB-dk4*%3h?zzr|Vu;&%6$QYh|eP>#%t zIj$Y0D1Cc>xOd}*4POekKzvjWx-2QVW5*7qh!o$0)TO% z3gtTg>I47#Um5An0Ffw3NqGVY5qe@f)M7Xg9U1viH!a6xs5Q&3k!T~jl|gYxlAlbo z60il=3LGcz?}kqW%ucKQYhIUn>{&;6_{l&UAjVL z*Wfm57t&KTHckY+e)i;vMRBJ};?;0e7o+LnHdIiEcVc3!_~k(6c8$P`zR*&4a%ieY zxPnQs9y@jkC}_i`O^MyH*a7DHo{_8)Wp}WF41>C(kdShpF&{l@2$_?|@Gu*jq|4ld z#e@A4C{9m7L*tsl&aPUVE?Qn33WgF+y*C#kFuJrjuLSw@)vJ@#F}z^kf=BH=-i0eA zC>sD6v3DbV)&=of)hDQM5Pg;iaZ8IMZV4}6zHG^avZbi3+~cfq?b@}n8r)Lu`_9v6 zTa9_CO~t#jQsZyw>9GN%Nk~YrccXIQ&N7IC2+dFW#mTc!5J65=GE8)C-8y#c*vFiU z7cT}kT%}OXw0(U|*-lT-%E7^r)eutPG@YBYA-HsR#F~vekMZ-Xiin7?&!Y;zgs|ER z;zOw3B=Y^Ijt(uL@`g<6^XJi!1!+jK=2NRyQ7F5FebyYnul-O`!gl)fpX5GN6276= z4h{}2IaV=ZK8zYB`=I>kRs{4R#;#==`vVp97!J(M&9R<5S*?SsP}zOjRxwF{kzbaP zQEp1sXee;9BN`ki3Se_kWG5RJNY6Ept**U*4SwqdF4MH`MXp>Ob*(XCBdiEe9#E2XX_(1f0oVB2l zaGX$qw-GD!0mKQ21FFc-Dcd?WQZhYJQVkTwLAed_ij9VSrEEw?6)?7won; zpyk*b?!gTOkI)=!Eur(ZwYBW^uPBr^7b|r=IYR^-&YioHYu)z}XpX-O_`vVkvq(=* zPcFtH2*{fmg|0$VRkpTHXEJzuZ+%OlOCFJ3^$G~w+skXrH`HJIiKNc$+aqyy(Ozk^ zRK&%%8IDi4YO|f`?(S|$H+Z^r+cu5TZQHhGn6~B8XFT!qixP2}<9p}Bq+wOTL*Bq^ z{P}FX1&w z9KYicf$N|@TB;2-=h>yS##hIAauRsy`}8Rq7@esj-#qp!TUw?Td+s?b(GLk~%YKnW zlsJ+wC;M~sM4%9$Ewohp%L-iZEiQ;e0TitQVj=19Cw5`cqCGw~_J|XLGAla zO9+>@JfEx^nZ^?B`o$-SA|Y>R7?0oEkYS`^Gt#aBiBiSYwGechxo;*a4wMV<;bV7p zV&d^5Oiahv*(;MOAkrD2^paM9-*3N#L1^m74L6RsEjuB}X#8e@yLucbzN)%9{Jau4 zDiAXyAADAZpP#={)`p*9OP1bB92))RRGrfD^2F4NIE9!}-86&#`Z#0_Mf6CZ z8OZlpN1@bjW^~z7{oxS}ZN4kh^$*Eciv0#)2+Gwh|HEIsOIB6mtTV&~4nP01B|j$BvKT8dQ7)gMvvg zkaX{Om8hu6K60C+r)LF4D8BgsiP5hAI7TB42X5);u;BawFbJBc85kJgrY%gN^YQcx zqH+ZGqjvn4F{z*vtmQg;k;oZ9RSw04(DX{1nzDL=u^%=8z$Q>Gx9{Jdf{qS#3@?+t zHJ#}tJ>A^~Q26PE9j!BLG+ic@{QUg-lJkZIm>1We%5M2r0x-EHGc7e0U9Lz3X=oEC z$tfjB^}W^ML}r5iV&($UpbZXE`LiCA8mE^-8& ztTI|&0T|L%L!)RU+HWD?RW|sKukJBFqnf&R|IdB37ot=I3#tGpp>at>1{8~gq(EQa zLxB}NnHgps?NRN3q&%(*_MjdgO|#f={*j_+PRA&eFz+Aw6jJ<-N(V$vOjw&Vr(OhW zsCQqn>RD*0md+xWBoY}xUSa5tY~FL~y=n}a$EeE}IH817j}6_vd$+x^3ASis!rzZNAFhQ*s zAm_DTR1?(@z^e|*K2Wioi6G=aT-gR@k2g_e+AH*zmloq=W3Qm|Ea`w32$7Zc`R~3} z$15dp>z41PjzLrF1L0MH{6p$5IQ$}13-o)_;!W5NA3l%FP`2@;j|UGx1IkP?^dL5xCY*~oRjmK}oi)Iz&!G>M75@6`PYzB_ct@G) z&b@n4{SWz3sd)`lrhluM5B`MS|Dl>mG%8fLc=V}MrywOBJ9Ow}=?~<&#F7Z=8{%jn z^mgOU3pNZGV%HS$&sDAzN8e^s48Y#a$; z@{5UFg>qT<{^7Ps8i@Y?#j+{mLaaIL&mLWl*qleFKN5b}kp8+#){CDV8qyqc)hI$= zgVu)Kxw8`WK^jZPN3B@-#w>Hr%0oAQHj3ehc#zI7XKkhT*2^q*9uoS@MF}yN;kZq- zIu2MOa*Vf9T+_F`fD{d0NxbU?o8m{@djB6ADa`M z(tg*Ns$@YAmnwo+ zx?6on2$8N^oAK0IXfSB-{eZ#U7K>Hf*EusboPkc<2(L;|3iOP42uGtqb{i;_QA-xT z1lXzUy1y<*vu6Wcs!_X!26FuMv%CtA18qsK2Vp`yIKJC|{y7Z7Z(nK|yP(;=iF>uR z@~Dz=k&zeBA_y7Y;%;@1`RpocX3N52Pr^yc)dwoVd-vO{I}rDr`UYl#Rx4NgVLDV) zBkaR1Pr1vNA33|Y9FY+BW~f2eu!}~c$(zlNb<>hPK-6eglU7#$`paQ;%?|WLY3W?} z*{*Z=8O%^sDx&xs`wOc%lOOoT=JtOIlPdSVmThmzG8;*J8XT;l6MR^FCS*T_Vjctl z3R?tem_)_I7?nLg(chdFLloSFg$33DJzZUZQobyl#QXQ|>3{s~Ap7LXRjV}mk<^=q zoU8B@e3dAx0JfQEk4%czoi15E$5MXj7kZz49S1Uc|yL))4CjM2!}MINGP|-J;72X)hxp8qjuV zv#ygSs*1tivKhvWXXd9zNVa|Z_N{v9ySHx@6&0{H+QJA}`U*AZv87(GDt*oq6HaC9-wMM-(1wF(zYzV;k zQ%9I#&^T36QMb4cX}|eDCNv#ucK(+dMof}gil#6cl0||;8-7bkNg3~JgceXsd%ffD zX%#NT?nNy#pS2ZrY2^T4`=cZKnbBRM` zZ+yY2y&dIVFT_C%Wf5#f({J+bojcnZ8Hc}!%yw-f{R(-?wi_Uc{y zV13E1&4x-4`R*RDMd@y&_A;@;y7_a8am$}_jB(6Qtl0;n!m^i zra9d^^@yZEG{QQ@Kq@r`h$y$Mvkpx%Vnk)vfwtTL&_XbY2{Fj|rKF6gdP8%s=fS0X zp9KWO!$V~=)S}duXV!x9Xuao1Hq68 z53ssiB_2yLZ-zKDvHM|E%(?Oo@wAISkh9pnzYB)5{@SR==&;g`Klbucl9%^|)vwZi z;n5B@Zu=kChuRB>f2g5x0PjM_FczJwbg{qXI9DUIv2S_}f4>C~$Gfmwdjb;q0Nh2& zXluut{EfiS0O@FYq$33xfH-!ZdECDbBU!I!i6Fxk#KL9L(rs0dmwyRq9BxV21vSaS zqIsxhqK4RmrCMxg9k#Te=|7`0LmPkksT!@R42w=N(zi!s&3(N~2TFu61K+7k3JVL% zd`A$?uMiE+oj(t?vpcgT&2B_9ODSAok9ImN#YdQ#%h2{UaYM_-8cSuT(&y` zStxCo4Pr~Wz44k$zF;tZK|zV|d&k45KqwgWBiI)BBuY4>q2e3DhQeNgY{hc$;8_a` z3sV!_Teo73nvxA0;{CwF{GL9IAkphwi*UO7&nbRz6%N4v+yJ9v7#!cRK%K-Y2Zy%f zzq6tL52gLTFX~qcaOD5ELvR8naTwai0_UKS zuo;0G#QA^D3WHuYKr+9Ii>L`8s zGro;a8izxvzniMnh@_~K)tZn*h0q@sc7V1Mz1+?vG=y_Yv{xMC=7x%?X__F6(6CY4 zA>hX;2D*9$vOJ`|_{4Dl3M04{2?K9{HedJGy|^oRzawV}e9&I;v7|(Nbab?|qQZuy zs-i;4hf!F@$rwgt+u<*9cx;uN<+zxT+6W(!czA>o5Ll^zl{4dnd zj9K!KM3qOIqaOBU+J>j-u)?WPwG8zC=HfqxbsoL7p0|_hhT|av0?=@dKfQCNY9#; z^)e}un>I~Y+p(*bkKg}YNNYE~yi;HPi@%`1L2yw+B$1*N4oih$dwxz#o<+_2O`Fme zIGXOi`O^pRIll5q3%zTz{I>(c&`bHV1ZyrgBN9hJHQvvvdej3kMZX##IOa^g;3Kc0ewt5;i=ix?CrCyia5m}=&QguLC>up1}l!T zmh|fl!>Vn9TYJ`4Vue=E4tt8EYYnaVw{5P2mMjV$pWS^n%kVX@wiZo6!bS+3H zkTmQG(b^6n4AB&KX85DdCHwS@W7W9 zfv16EB1#wC=36`JlQjf+%-Yq#IQlz^iZHW~V3eHGvvNA!RIp8mxcq<*QC{jWK@kat zCcHG$iBN<2_Qx5sfoM<$^d~Qo=^L_uxSu#!YtaW$mXmwJ#B%(2dG`yrJ!m5JU;29J zX)-1~vk+-Wi+jRDbm2CKxbWt}&5XX0M2`&OlLJpmF6)2yX=y5}$N=j?q!j2flSrif z04qwE&hTRsGq;O&n3|eugi6EhIH3>u=wn$K@u1Y8IUrQsnmf=nOS-m1nSeOc2X9xs zl8VL=2@*FYyP<%a>&65Qz!yz7HCQW;eS=wttSi`=AzJ<+y@tVODR-D4i%YklK@43y zSOs6I%k4g-b*H~@{+G<_P4c!{GkqP)lznUZj6kVu7kr-mKe_3WDFWobX(LV(!5cMxFZY0k_lzF0nnYMiBllDJ# z5sXP$BeX{_(RI(FYG6 zH^uG0i?-*{BS$`55EG+^#}Y+XdugtzC;+vm%4TeeGg zRsN0L?G?Wqf7(*W^l0KN*I$E=(n#AD(gfweoBOoYNu(B!Z?9=unj2p^-cUPA_QVn71B&=zyHK>22DQAEZF$W;I{g+?9 zZCz<-*!A(rTyKQW1ctGCk!l*C1=PdyJoNeXJ5?IN@%Uj9Rnt*ozw1mmp^`eK+i6Sa zk{FOGglRmH`9ZEE^&EM;*TJhqUX4&4c$3K32(cSM*=u}9Cz`lR%ef{>6S`oul>bX4 zm43`-RzqBfL-a`aip=#M#tecCvD)Jq8?wx_K()&p27%Hh->jmGMGF?)GW$3< zOoU&IVh@lz_B!1jiz4~@&N{`Cw2luOzTOPEn8%JR+ zoOMK|U>|p|?1+{L?ss0qJ?{;y&_x+i8nUObX|MUoJ+gC}Lf|W#N-}1x7AMe^A`XyB z$>YTEUM0Gm2xEbuiYLedF@LyV?R+C4J|y5V*@r^r=A?juj%#}+XMf*79mnVtf7M|L zNUDlhBRtNbLnL8Jl$iClr|J3NKs{isl6(5Pz8HNeMKsHS{_@hNKWqEsWHg~QK@|z@ zO<{7uh-N9yVqpbD>u=3UVRY#F#cL0j7!Irg z+DiaQJ8(KJIV#sP56i_pbe!z}w7XN@7DMGUH}saFQb^*ld{Igyg<&D2xj8vGWE5eZ6o9rPu{dg0Vnl*h6{g%WeiB4YF!E#9ryygsI~p-+*AnYm@{*GH7q zVb=hCtXv24uwFnR@_+~pKmn`+B|y*k3u*eM8RHvv8&tXPmT8t~*oZ7ISYcGd#y1k^ zM;>*Y)N~Mh2j#^r7)u9SC235sRajNJSbi@W^M;Ly{%|}-;h+Y?uY&x5;;LRsWNc~^ zfD#(>d;?Y#K5qmy1XFrA)Nz2L$Z$N;SE%#YLjrNzoX0DCBSF++v2RFwwcBKc!jaqc z;q@*3^hpj{9Kb+4fT|%p_N0>n+(i!q;BqbEFT9LBuiPOSKvfeOmLtwU1N_yC9m|WM zn9=Y3;`U*OiayR9d16wR`jHdr;jx_zU0x6;lO%|=s}Y>AxoZH-D`2;XzZm~;iKyY2 z)-R;@iV}cNun{4wQOaJlU`gJKLDwQsU-DjJl_3y;0KfYBdiu#(BuE9Qdfa-I3}mJp zc6PUP!v@EJWaa07 z`C?Tie3XglJ*<-Tw{G3a{<7NTWhLxUH(_2iAOi|0`Z?Xt)ymNrH1y1uekEi$K((Q+7&l3?09jTA^G?N5RU^}H@&cmWHTHUVoZM z?~o}tD@gpu``y_BIWw}~Y@%dvyUbZ9U48x5q+vDBIFgba8PHad9<8s(NT#mS{yQ?=hF3z$y=jt tnY|iL!{Z+74uVJjQ~#R|qMXkz@44*Xv*~tf8$bs|;-buj5@i}1_41rkS>uwl1hg(f+!%}jdX~F0)ik)NyjTG4F(dTNF$AO zDxG&O+~4`m+2`Ev-upb)XK#IVvFaCdjycAdD^mTA0ueqfJ_>~*Qc^@~qEHx0DAc)+ zxY+P7bF_BI0Wnv3eOE09D_0M5=liJJ=B|#999$n+-(zyW@9bjjU@yok#w&1@>7lEu zql-8npWT1_0I!4d13u27Y)!~N|~ z=VQC-Z*1Nqqf{r6t#xqs4oyDv-i*nDc+S8^`?aJZ+dK3sOU-&b0KH6oKUC<1;} zv}f}M!k>A(_Y_&+SMOuyNcgKFP8b{VSNe+y$WOVN{XZO3!VOZ-zbq+v6uBc9aN-Mp z%W-RKYb(qN;~3r@C^oe9o6r`rx3~9PtsKnem3#YA#Nj3hW|3)=$&#k4H~gcSy}gNf zMsE}O`=n^6{~4X!PFZ^4?jA+*uu+88i)$H!)$5KH0Ny zT^=BckB@)&v&JcAu-5O;UEjb!*Lp1AWFO~}h&}pnXZ3rjsm!C1iu}~VtPU7TCD+AX z`DHh_C-bu}9ThTM92_{gxw(nsYCm#sf3L9q!PWoJ+IoM;JMUn@$KKY!xG-F~;^Ft{ z@&G;=xF2Vt^2*OzEpqLnqoZ$`@&s26s|Zz+uI6uwOSmqm|8lGISbyrXyLJxsHBGE7 zj*XePS|h#x-2*jGAD@|p?^(EngdzF)SH?YOh)~T*Jcc9`6qpJM3NK!~XzuJJ5Pa|z z?e1PRAnZz1U0YlGCgAjBsLq4W*U!)V-n}5*VuPu}^`^n4x3)FKuKJT10VlFe{>Lyu z#MIQ(sxO6!_V@PM5e z|68M<&(YpYtbt#jjSv-|389*r8a&2pjjUT6o0~1krvCYhF6-k`_1go6j!sUGHS^Ih zX5TAq_2@($UmGm>YJ1IBx#~Y0{^!1aqp#S{u`zRd`=^0{fdX<|2Wim-1LsjRG&C=y z{lt+&j~)>|cKtgfruH9v9Z@{}3Bg+Y`CCxLmjH z#q5#%L->?lLN|9kUBZLkvYkX)*$RG;k&|Du>ErBraqWDD z|3Us7W9*9;7q^xNFKFeeK2AxYDKTk?x~!5k*_W?%KWxQSEZ{^k_x8)M6u}3rD~n!#xo#a$PkovNQu($mK@H{U=;ms_tSq_~*J|78C`{mzJOjn6Kx zRo9DiDE;@YSLS*$Vq>KbaL4LA6wJ+;kteLExZYiLE0!U~W&Z2bGc{@1&Lp1rq@*zT zE3=SL<;apBtOY7mZrvgjsw-WB`uJeGE#{Jl+5P)Di+U+8n{3;&v$L-i zVi^<_6`NOv-%ZWT1gj+TPR+#1+|t&jAf^*l|0U*dP!n(}jWQdr_i}V}yrHX01yk|# zm6Xr)WJ~bE@H-9^lZXhl-@#T3tPBz|vLM(+_XbOh#RJyu$LsN66%$?(CKMJHR#a8} zu+(4pz1%{ zr1{GiJeb+y_>7MqBFV2k8sdMEn##h&go0Q8_(M&m*z5FQd%3;4J2W;{Eyc~x*O!Ef zDy+U)zz)>+`N~NUvBoIy5wNI-XSl{%QZu zywH00OA$5St-+uD{W{wD+SA2`)z|GHY-Gm7#O&>iI@pd7GfH2@Ai2OOU8KW-+Jvat zKVk(D0}X@Jz3kAqe+!doZTOu+;l!vz6AuPSz}{3ier&$$MAPw(VXa>n(Is}SMCEI| zymtzvL`C(l3kgyF=FA#>C)L#JN`b144GVNp`S_)* zQ1IYZ-ko<c$|oW!-&el{(0k zh|gnKRn&J%j~^6x?mYIx%Kg2)hs8d#?W7R1o~K-w_TAg~Y~S<`F}-*QJaF5`kMf1b zm4ha7ckVp3a`uIBcRV@rs{iv8ZZFdkkqEvQ9GrU;XRKwFpwyjz@gl+ypSC%#xCluhlu>e~_%6 zuB<#t4+uZlT`%1pfuwiiORaZBLxJ91uHH9tI7Ry980T?_(be{28-N66VEaM1?lThd z)0fxVF8lYvZ7O!wmzJU+w9!fX){N}P_J=b^E?rkp_$**| z_wL<=f#ORiCx;GKP3kXBO-+gV9bB)n8^wNY**@j;$j+{_;!0wNaU zlUH+YHa1)!U#f)w%xhGmKDT%E>Q$yF7RYd0<7zrrO&jUR*;Q}A@*}0Cje=R5hP1T+ z`);5oyM96f2Pa&>4pvh$?Ccn)nNQ{bqD0-6DDGrP$9#&pw0z`aW?_-FJu);@^K+-* z=%_FglHbBalMLMJb$;!Zhr*!xCL{}HTnt(v8}0nn8599%x!G&B_t$}sfJu}!^nuveP!q|Bs~@} zF&Y3*{j~uc92|gJ4CZ}ezdrb$BRuzeK~7H!LOB1mYgYvGPp5V}wP?o~($OY~X4`*y zWU?OgywY#*uGIChvdSG;?l>9Gf#?e3_Cmyg!Rl*TzI9Je&tO-807WJz#@@uf?RdQy zq-+vON}XH>NIkGVxuh8Lw{*|p;o;pc8PZKG2{@mSkl=a}dr2u)FQI+%`H6+Tx7(Y( zfRjIrS317uL)d7CcmsK$jL33N%!d<7N zMvtjhtl}Md3HJr#a32nH3a<%wJ5+;GzXp=sGMY5lRQiU77Ds!V$Us84Ds!5-l_nElvebM1(-WFtfRh*i zGFzuVM#J8EEakIf25S(-bxLV!>_LXDd~>MY>ra`>{GF|(egXueJbLs9xx~syB>`mq zeqVw|@Ms%5J7G;tGPEN0PYkQ=%|Av_I=Z<5u;Gb&_39~90OlVeNFVkWP{QVLJlL{c z8LQJPz3}|tVaM|;xI{$qyK7^cCx`28pFiW+RMI**ISn-h$RIEZV%M@!wZ-SB8(Ujv zfKOW+)7IAZ(L^Kt=UB$Ux~GuFEWT&TBk$(DweT2lz*VCflC`xpPA;zAEg#svn;)r7 zC;RnGKXPRF22t@Cn8D3bC+akll9Cq28ziJpk6ea^hg-V4i4>mFwY+y*=8~e&8Hi)S&H-aF4b;jVv=Ot1xO) zUs8x(k4sBS`+l9{+z&tg_D1f7z7eQKP$qpf`~u^x zo>x*%(;x7VolVYh2W@Z9TVG$l(RM-R;aEnFUa1L8&PR_4zwjqd2t`FjCx;(oe1yzv zZfS{`s2mEDGXoV~D`ZUo;t^KesZ(%fpBQr$^EE-iwsMQl8*qYocdAg0(+oC(uVL#z z_j!wcY3@Yu(?*Zeh=2BI#=RF;YSx6p>z~@md*&Jfmr*OFUV|^xh`r}*pJap z_#g5hh+fF+XI~!$63QU`UvrsL?Oq?7>wZak@nT+Ox2gX=7Q|Rs*MjPR3QLUZSgu}G zT^}h>fm+kV>sO+{DijAbUVqqqeSORBcT)Ktu2#2qbOgo35WVt0us+)Qp|&too;4s| zY*2aA(^FLXXhV*WLGmJgC5;+U*2DN7hYZULp zeK)@=Ff#MBIq`hZj6NT(&h-PSi|3)j^<*Zogg!dKLSv55^t#9yTNh;PpYmcHed{@xc zwMHiMjd=QZ-S+2AlWnu!2e;XwwH>xshRI0;RE3ys)q!uuJCs0+R1&m7Q%li(P(xK1}+pP(Mj2DRXmk z4HZhA()UMyVxyu8He}nWsHk8=7k!NS-_(f6GlUfBeN0|ms93B!18WO_2>*~$A{1mq zF`Q8lszaVV`(fd*(ix{M@3HnXzv{t02`Q<@YE*1&z{;xr#e{-9SC;#}zLHmYcrKTO zL3O|dz~($JFK^<(O-wmW%0%Mwy9NdVY@{$=FFI5c<9(-h3Umr5*Vc#u2g%6D&`G!# z{cJ34*rCOg<$MQ3!mUEnhYvZCqBch}66!_^2*+v_rtqLPo@_))2wni>%>Upk9X#c0 zhAlN&c~q}UF!Ayd&wP%>Q5X2&cDQ5rXsArqw!v+A;0=_Jknnilb$@xrkp*Z+ z{~t>{6W>kwqs`BZqK=cOr;IWti^pVeWhL5#FkI)EZb9Pl=iLovEB@oiA zKZF0Y^dy{?R)YUPyU{Q@8h*C?MLi|!eop(A^9$O}Hv$QAygo-fq%fX&-q+aruPJ1% zn@{u2SByNzr%`?Ki(e=~?!FP(E7d2n{MU4w`xJ@EX9AMLgZfVoo>y_3-Mbnb8J{`Z z5I%4%;ZG0?De3pDiwR3F?rDTX#?#e?eoU=8ViEcs-KQvL9<%a8kynzeh=TuGLQ9{% z2D_tmDBq02;CK4o=lFMX>`9c-XzgTvcm%!*msWz+wAZ@P?R=W7Z(rlWh1z*IZ`qq) zb3QqCu$TRkpr!xhXAsLQRbIT7en@?g!ogA9opILXISLBdNm`YR6ykTQT-!R zD%@x(+M=iqOx|MYSj@8en?mrBHS^7M`8qLRLDY_C(6|1JrVI(mOSvmzuE-L6`57L+5X^Fom4A$U zxuuEzzDr$HMK~AZQgXargoJgiqGw$6hb>78ihRv}?y84+PaBS)qohWTdJFo26ytSG}wBJ`sTTh2W5lq~Nw<17>WLB;@o_rp>3%3EHHLDmVLcUMt3Y*fP598FeZm znHW}3N3$P07FMhIhUUHPfqEcU1K0ignu2n#(CDyR89fdrN-PO4%5P%p84A{nq%Qi( zH053N=>6@Aofm>rTa%m;TXe)?bi3n2f3|A+XqozziV|rlagKuAxfqE00+1!e@8rj| zjeR9tbAKPUeHPjaCCg4 z_!^^XmOCp$iSFCjFqLLG|5hNOEh|z?GWEr>uMDi_YDPS@3#N(Qaas`-0m;$TZsDAl z1TAVLC7(ag5*I1RzI|U?aQ6=jO9B~;qSOPMq8kK01Qlh~-}e1={o@DF&a3>1(OW-n z{_NpxK3>5{Q&SwQC6aK-v0C{0aqf+|lr6cakbrpv3rp~aICNig3OD7$UuKcI#P2!y zt!620Ftm^4+PpV+RZdxpl}=g?!P6O{RBpCzB(B0=G(Bo6ey4WCDg>F1k$_}ogO zA@qIz&r9a~D9|k!)F|X7+pk=8*X*-r_woG*Ab{&4SAyuOBQmi>$C?;}d zINUI2tqo)}T@Sf&p>9N+j-|jtlp{Y%lq0fzBXM_Qs?$TjQRoV}``o7?vHVML9nEBA zbbp8oXgyplioC(mF;|Qpsgj}$Z~K^-Uo$aq!_XrW+*sqAFIz0;ttUFCasEToHX0qP zPV=SQJDcySHhx;|HY#Jamdi~E&2|ovj_7}`bZrEbR-8u_N%)_y<7HDeA|>q?hC`Xo zn3Yb=dko#@P=ZyjK5zTnCDTKrD-n>Ehw&rWAhT0TxSf~tQ%FV+%iGtAO9>HqNiadA zIyab;&3|nWKfL$mLuK9F7Y#i4N1FT^ZOw6L^w!1OSGD)NL-=L|&NV5FxAX*jRx&0d z?Z-gwr@Sz0AAGl^7=z6x7Ze_94w+w>Yd%pwH8kp3)RNMch9l|Fk*>QDMElWGvb z)UxzyapS0j5ZVx+_I{fYwq`xMTmo?kLynA8gR;Kw&6Qj3_ShPjxsM{zt-*BpDoC7x zSJ0Mcyzr1HBUe|k^U8-0G4pYz+E-yTwPV;WC>81!2i4U{Xme459OMYJJpSSQ{JaGe zAW*@E0-y!pzInVmVLQGKG%tb+B~Oq3q)Pvp@8SE?7#u7YL(fXuucjPIS9t3nGd`d6 zD@V4)=2vdbWHh>8k@?rx>PN#tSG01?`U`XxMk={a{$!joiHcrK<}v3r2fP=S=( zx=3{oJd4oHn>RbQCP12c3fO39NFNkH6zb>CpA`TAl1J*j1XXX}?hIiIz1dQzfY#>m z_7-l*eqdnstL0+X2o)Bef{LfOzM?S|sTA{y5)|EI|SX012hz|~z zTi^hgDu)72R6-)_erFOQB{{pebdL2vVaJb9@+m3Sm7hO+K*)Ni*^v_Po`nT>k%){8 zBS=pM1f&}IUm6Tw8xly&QYA^^iU^jf!=hclQ=&?M5?bBJh|tH!r`R2SW4nBL(TD@M zS>Odee~hLs2O$xmrvQ{A0)X3&4A5%4CiQu>nmPpl;D*Y~&jTeIUTHhXa^(u%J7(Z74wnhNwdsn{10OMGOJa~adgets28cVi8_3v9P8Mcn*+PdqcZh|7)tci<;02!P;b;}ly?Y%^ zE+v}zAvrlY12@rIdBojC*Qx_-QOys0N~X8E50PM@bNQ~KfUs~_Mh0VRYiq?wTWc!` z)YdNPP)tulRR+~^Z-sH4JCQ&N;q8~g{Y!^Xm9oHm0g2@OhzIn`8s}g5boBIvj%;Z3 z$Sn`2kI}2?>e;+rBx?;_zQ|i}#ktnl0ovY!;(r6S)=Wntx992cE-5v2_~y*#^PDu( z-ay<49V{&k){kK0{zHiwd2FR&%$ z_denyARQEi&rq9{gQ`*U{yhc?syKMCw~G$IdqYKNe6B70LipRa9H6XYT3A>B)vWo>6lU+fO;PibhCjbx(oE@)_L1q3 z5DW*46dp^Hzs6-)?`8k|bJN>fe4c|;qt@M|CfFk&5``-A>u2n#8jzY#h_DHl&X`CD zsEdinlU`n*2^p+=e-4#5b8HOe@Ql%*5 zyd>A{vbG8)HB|FY8Q!VsRo6&vmdq$5bM$B8Nu_3a6tZ)2YR;rXXnoa&Gey>~LkOiw zr6N0dnXjTx&Cm;`HGksVV=BDLRF8Yi1<_~sOG(Tp)s5W>F25OeUN2#D_4_gsGsTgs zDm9M|;}e)ZJde_^;-lp#d8-H+P6~MxMHV@U_mh-Ox$4o)J}+6~p18Pd^lG;+;*_U=}D zO?o5ePy_T$kH<73k&S>tl1e&B-=}jkKG$(jz8j-nMor0=o#fGgztGwWsuye`dDVuQ z>Rw+Y_m_@N3u1xEwE2d-+7fQ4QCMP|#OugVvcWYg^PMWr2o{IyN^F!!@)OUF48=E% z-3T>epWJPi*xPrI#!Xh_LPpF@mBZsw7rs3fN`G%cCE|(Nvnr-c?6WB#rai2X2x_2L z*S~si`jbmdhA|VoI1w@Ol=N)47)iuCkL%KD?BD*n*O8=1@dsYK^6bQ?|ESZLZ*!XF zJ+Tz^US(($r(i`+#6LT6preH>qp^<)wE=-WM1e!ah*YDk1$n>EM8EB#;?rNsQarw5 z$P&N7qyQ_0784og^BbFi0~^*hp%rO2ReyRC@5_ILnbTfJk~a-1=jKy~TYLXC6aMQM zF7+iX-)j|8nCJEuXL*hooDAWbOG0PY?CpGq$1e2)1I2G^NRjzW01|uN3F4a(;D6>* zeWRMny`AT(7m2-<$s0fe{VK?CVc+5<-}qHV?D&r+v+(6hjA15|!H$tO$jrr``{$bA zClOXZb2jQS2`;?lrxS7S*)qLBSc+)B;r7rfruNwIJW6`hh4QV#cybL(=0PZOC@^Mi zI*NKk?2V%FZ>FC(>;Vt)$0CDIRCutL(c^+D~xyEMHag z|NG`Hpgvm<=VICC^aX}+P-Wsr;;pTjYF#!SL!q<+Vm=i2J+dZX z5pYBzCAb|Huf!$FSL8c3KCz zE+&%;@~_9L!24O2{U5k2quOOc8Uak43k)A1X_J5h0iymJAd?OJfM<(;@glDh6myOd zKGOI%0GSaK8Hd-LB)GY^?OG2rMZ+fhe8Xf1L?uKu*DW9g zBI5jqPoG*peY!O-JXmTP3vh{vho^XCr%>hAE$pJAB2W>CRa82ai{;STFB`>SuzcnpW*3CXG=k) zK;j{T^TR(T#Go7Es!q}@c{P^oH*X%0i>fq1D+yvU15eLFUp_fpJp5N@vIST+QVW%g z-@CSeHt?;)SOW1^HJu!A8&o}t1Sd^F$01fAXw=M7QpWE-e0F(~5wRrAt;Gt#5yv8w z5T}G~W;AOshc~j_Opz*KWo3maxwF%#LFvvNUYEqT&$F^v)6&z;?{7Cbx$*JwMPx>V zgkYvGw_6Iht_*3oT+Zh@#6sn0;;E>p0Cm_-;wfQhK1E=MLUr!Q&wSXXfXpF4m=MYa zd2w%lpBd=+LMhOL0e^`PE&@IVMJ>3f=<4lJ0A@_jzI>~&)=FQ7yR^3}ii(TtdNlzL zVE<5onsCP!zY@2;`U-~9qF1bGWZvUv|G%ILsAQz#suKrf0NIe}-Jdgz*0b=22+wvy<6%T1}k7eiP-kWL-1>;Icb+w55`Z$rEo*wGP zy?dDh4bffQ-K5|#QF_LXAP4|07X>VFLCh~&dTz!ZU{DbZ0P2P#5bPiw98_*21ZhRJtQdI)aI?_?=>$JU^hz&zszb*uVn)ANvOe{lkozECgq z@rXE-g&M2ZA`T&V#a|B^5uIFRP&61!R3;%?$#AMjmZ@P7CR(B5owbXP&P755dcD7kN4R4GvV7V@*OFm zBWTO+tO>suuBJqpO$G2V0}U|}6tRRP=}c^UBN2}+yO}^B^T@-nX zl8t?CX~$~8w$%zF)nT&8dl2*@ATURu2oeD;{0{9<49gl^n<|vM29b+R49OiBs0!z@ z4{!M@u~5EV{Mb|9zd-(G<3SGfUYs4JV3iOF=ETsdLbFWCK828^6oy=|!dLROkZ-C1 zGtXNRcn~>%jdJ8z(AhCgN@rieR_Ykl7)NPl`e+Z6c!)7dNE*3n-_3}eh-h`G9KR<2AyI{GY)Em0%GRX_&u26?;Ulxw^kR%YXj$=SS-c?JlM&`1|5oj6koQx?b+aIpUks=$!`g`>0p zN1>Q&Lr^oizqO+ecjD>q{|ZO9&}UOL@;^keg7-FfO4ioa{pB?9DmY>8t}SN&21|7^ zJroT<8>o6ToarS6bw1)}93G~b@5v|~n2v~O-#`#6pLbj5cd{#Yu{?K^T{vfh9lxL! z=@Ss1p!wrREEHl(gSrEZZa7ds?o9??C}_9=ut<{q(_cWF^Pd21{P-5J637gxaTOrh zBZ(W5AXr+k!BAqniUqjH-1(hg+bNMX4octS2-{uZGOW>fe>iJkU?3uvuQqE5GV=Gg zYK*8r&L~daITxRZh=qWEj~)pCm^yw)un~dj4zaJi>yeHIkpmM4=RAsql@*)>8h`J0 z$ka}rK4uqZto{T~CPV&?Y&`51ov3l<#c009=DPL9Xpq%dApw934W%45HSRS}-$0;W zX+sH~Q}DP>&d-N~YNIm*nhvAOT-W2mLhk3vms3R?o`N1^^5+DYk?+NZBH(do0|5*% z73OQbt*E+o=9GS2399viu>|avS2uav^jO9J9g@3c+?=2UR|G$tDX|J=;)qwCi5ZQF7ZJc2-|Dp4bdP4wY0UJ|MKMvC)dHmX<|rtxCK~P zals(0oYUHyqe2R@J9s2ohKKL|CgxP%RG;uH8HCYKEMO%I?NZyOw> zF4QX>sPOXgGM{XIjEjeNz3~O0fqaPmWVHRadEM%qN3=xJB06qN(GC}FKgUDukP9TGL6(-2aIK7wzYpW;8U{- z?#a-|NIcYJY12t7VqD5mNp1zt15Aj2PPLZ0dLVctSvWZr3#GuG!f_|8#dSmw;0TL&Syu-7BF6qWxJ(EUpE_^a{Xb6pqi~IbQ!(0Eqsg-Q$%K$Q`_cmw1 z$;pB^QV})`SU1FnR(qHt=E4rZMrfk-ymE<|-B8&%(0bcobs?@}(B&YrC)K6@iycbp z9c3aR)%b-M6xd_B`EPZXwgPNl$n`?z_fAiaQ#W80bJb^RF@XAR5}m1^Tm0OhIE)E* zsK$v^+-)hu|MMetSNS{Wv0G z%i@hd>ykzzaK?^XBT~L@oVOrNxb{l0Ngb}dyC`B$2VSyp+rbj+Id%YscwkFO9Dkag zPCx7Hl~Q;M7V!NN-b80t*N}Jb1OaYew=*#@A(1BqCzXL;GLK;x;%PYCT@MGVY-lBr zc5lER8WDNrl&|1F*d(;hkN=^TybE&#E>-Mv=g!r5tTWt{lLKEA0Sb|!!!9tIEUsR> zbg8YU=K@#+3|8Sc*gJi-cfbqmH2;;}^8WFq)VSSyIBM5#4N5Aa?>2%n z77XELU`x@jbLZhTevb(ltt(ZCpxR;L6F6p3C@2axe*eCi%xjF8bU^4v^lh;I;|!D- z6TG&Jumg~~_bb`-8(8VP8&G!j>zKA_-(P4gb)DDeS z<1H<_D0lLQnwKp*D_w>4=)j$tld}FYK|h_JbklaD8W{@6C3Dc@5e5xYHZVDv8oc9$ zLWH_Bh-n^ZU@9=4eE5I{LeTMd6jknp~_GNj+Ym^QUpEMYUjH#Du=$TrutS>*gjwaQInjhQU(f>#pt@FBPzIvQFA86T)jg4m5UvJc(8%2Og zxE1^&2<8TfU$?>ed@E8NHzcjce(I1rZT_J6RAziNDrtubaMQMWOc)!q3_#X?jTEA* zz@;PZGYBMGr9&tLKIoL*tBe@*7Y((zcEz*fD`7zWC?ypaYkp^@ckZqA8ar$2d?B?w zZNw~c^~C`&TsEL?m=Z6e6u2qxY5lWlzJLY5lP$+Xkrr$gFbmNTvY4c$8DL9*Ax;qX z)7wEF6v*og_bsfRINUmDRpd4E&?aj<{gc~wS>t@}ZuKa*g@AS3SSl>d%*(??nORzf z;E}UWLar>_!GL?3Z`xVK#J+Nf@si+Ww%nzxu(iPosP2&JOUo12*_T!l6BDcPKNd%8 zKw`mx92Ez%4a-Z%BgTzv9$vDaPJ`l_mkXPFTGM$SmCNpTmcl+!DI@Rr`qn&KqJWT) zJh*YdqJLgXOA9g31I4a0MhDdZgdbd*J}QRWT}~pbjO%t$H#Eu(mj20mobL=siKRfv zN*Sw>{uhp*h1f~d?T^39Y+=}m2oZ$Ue;3Xww{-j>*P-_xAdm>}?2z?;1%V^{Yo8T* z3%9CA5ohew;rH*~)r<(?_Pp08^M_@ z!4~&zLL3zKTlAUR9&>-$+gWhSSIP7~t6~Tfs^3vk)qlfAl|3@Oe#b~HzJ{g10l}E< zW<<*iK`s8b-%vdUa+>Lil-g249z#Bip0eA*w{t;E&#hrRfHz=ZU_e^EzCqr%xe^Dj zED0pQ_vmo|!t{F21UCPrCGscyp+!lFH3UQ-?5swE!h^(Zu=F9_3J|iKjt|_RVkZT# z2=<=~Cm=(l<|PA?kB%&-LvV1Y=Hq|Wn!p7tMJi7?)%Cq}XpDpvXb$nuU=;~*fJe&Q zR}H#eid+CSqZJIm1(ieZLp*-6vlJNa7%R7q%A{kVif9+3{q5+1(pzFdLLIRZ*prYP z0H)oFk<E@P=KmPQ}OwU@f|$idaE)s5Z+1VdLJe*XwnO9@#NAhLp_6H;?}{(jZxOpD3em{GsU=rQ4W=7y(tnk}8&MWD zsm~)Fh>wHwWnXC~%VR6APYI)tJPy7^TeO10c@((au37i8J-K)d0~Po3W#N96q2I?T zVV2DIwu7N))uUf@6awdvU}TrJ=PSackp1(oQ+w!)_#+w@^_GfD{WYNeIis&d!dAmLO*W zc%Fj3Di4p{)I2a%DLTaPJb!VzZlR*f5GCuXd3^jysGqQb!@dc6htxSKkmf0nY~05E zUc!3f^?Z{~e&TaNOl+j|=}GZCsW>wUK5?DKWxzE6?wvsCgC?N@_9bQr6~aMGlh2Yz zru$-8MKj`_u0NeX^k+b^gdyWjz!(lFDos600%nze$gqI_Ol^X2vd1B&G9A^fzCfRl zboFi!ji42iq$C}f2HV@)zZoEovsBm$-?Q#pfG6de#j+Pg;r>T^Z^;ISb4X_COmf_pH{`Ooj@jqiU~YZdx`-GJ3WA zus!p^M$xk3Z}Or+3cqtlAuBdn3iR}B-smO%an*mKJTJquZUEt=v3&9=<1{<-SsjsZ z(=e6*O;gTToc3X2HUTwn^XTX$(7(V3npai|py(I(Wf%&17>akY(w_DoHPYTYG0L3! z5tL%rC>c}rYb2)Dj6t_vx5&sywL>iQe7yEM+_|4rAlc^*y0D{%N4RoM5J+xF!<_jk zKwrqRIAB!;Z`(sKx;H|1OR@BuH*X$}y%&P=2ONnppi{|g0;ikj4>qEY;eIOi7Jr6c z6-G2=usX?W>*;W@jnz1v2T^k1NDO3Aki|IP0RUdA`~#`X42;(=1Rp>oWcdcw6Vz>b zd!Emrxo#4C#`@Lv^qE;%<<>vgi+zNTw+BrTuOZSs2BNnGbcG_k)d~2u5!RQqpApCg zWR#roqprD(VmZ#?EwU`8W&gjcPO@BHBG?9K-kp`gw7+I`P0g4lN1i)SWLcf)+VwuwFa=sx69=yP)3mwdD&w85z?* zsko*NHWW%gtv+pJL~CUi-Fh%6@jE3u`^_$WH8nH?sY&l+0(BVi%hJ+~ znx7RwgGx4HH(Q)^=htm_oE?!0c~!>)i7y;>DubkFAeaz-MS47fw*>$2+|q5pTq%W; zIS@RiR#phPxw&t@1lWq!K;OKHfog7PdDCjAd%p?wTARxpFjplBY5dfmhxeZD9#&wY zt{$yl1`6sKq(E*M!P5HQ_&e(prynGs@#g=aKOaoA;guTKVW+5uK`D%Z!oa}jc;teV zTF9*+U^g`kchj!|v&UMB>r!PpTRnz*gL00|3z{h*afy2UW*e!pBLH!@cz42Qy+PT* z_Wywn)enS;^WA8{@tlbfxlUfqq4w$t7&|S#BwY4{mIEl> z1E3y14-@Tq=G`QU1_GkDW%b)>p@pd9FJ1s`!PKJ?D#lbws_TZRn!38=rfag_4owh8 z3PK)$w1Vs4;NS}vL;4g@lh7R223C7gXx~{3*k00s7CAgHwQ@iY&o$tdpgT&RJOPDN zw0jF#u?Um~xIY8Uo-MCsgSi?eIwldox|ETKV^Jo#LQ9)M%e->T@bJ-_=kM7HtRSEM z8Mf0o(>6~js0ufy2Lq-d$sAO=H5bJD1ODq*1^ZnC0z66k^=>)SNC$kx!A}Vs(@+|s zPXutp_C~o|DHYmgaucc)Cv(j``f24`0HDbo+gH$wEUdl120ll z*)>669uscl+|)#)G4TlqgaUXkI5f)x(cbdmfn?dPCKIA6X<&d*lCD2b}MA0_Ij(~*h>)j4vtxRUGM>V*cc`< zIQl^I3DWx(5gjcjEBkoB2LN*6-^UZqQ6pGsN(&kC8P>1)BXHuNC;_m|AbU448%8|~ z_BY}|Kz3|v>rDhr154lQZG;Qk^NkkGm(6`MMqCUMZ{Bm^SH}^ zj{5I!gV1gVMB(m40Hf_dQ9e>TL}Uv~wUgUdQkt zP{)JU1sA2Lq5=(tceZt5V{7BQxY$`fd!G^2oU64d%J7T@^28FZxanSmYs zg`}4N(u;!hsz8-pa(1(&ps#Af)BPwtRCD+n>bOKxHaPRgR-;-kG&{v7C*we`7XUA$ zksfRi;ZUuBdzrx2b~&}6^3D`!6A`qpR*qOCtEV}^Ac|b*s_%EueihY(b^=E;Gg%(} ztRY1@%joE6cWzfAaF{k%=2WSrefZG z_jCo!-V;uT=^o=PVDksg2nGBi;znv%pPA9J`-rS@`6Mn@#4!S8WDMh-pUr=yi8ov- zk$7*F`0Cl#r%MiI(B}kSP|z5T!Nq_ojR}{N*ZyKMy%+~fQ0}QagJBQ*Wd0|sp<@$~ zyeG4;g<>^W$*X@ki-D&E;l9je^z;Th^lX}uH|CUjOVZ=-gb8j%&$`cAFDO?oyD4p5 zgng;!T~mBrMd&#Rvb05Yb8qd<=2UsQw9ynv(MFp&C{%b@vq9lhu2lwXfepzsb z$Gx(%i-q_i%Y(sqHoB=>so!zxsz(!!i$O#!Eo_>leuuK&TQXaS(Z#4bi) z%6NeDBS;dKZ^2^|X6u$9ERdrL@)1Brm{6H=jjCZG2Jcdjxz3)OJue&C` zs^NL|B@SpZ_S>m_uD7wFha)m#K?6~DO7hU2Vg}Q(k;djl-wxeFwA*ZEpN^)*!kcK9 zWARlzuFNMbVY4!G+jGnBe8Zu=7?(Zo;t0b>ODExSd9DcxrH$>Wi(u%Dt6}jbOC(mZ zsZ5mvG6fyGc~gQza&&|t^dk#MS{PAL{V>2fA6hh_y0Za8gfz_pkxC+lM)i;KIsGE2 z$tjWAja26y*g`@2u^h6K*C#wt`uZHw4?ZF*WYTW@Qz{5Xw_CF1w2Y4$0`CdU@6giz zJ;d}^G=ZWhqUQS1#yn!jk~+I!KZyc2d6!q??3-#>TAyN`KLQ=b50Q#U`(MAIH{PfS zrw6P2p5roBp4f^NKQh-5HtLIIFZ0Xk-rA3UR^xC0L!}Pbf?=f+xd50en$$%FUiK+v zIq`oAi~0WFiv`7}8-E`j!xqE(h=@cPmgcCx*LtkVZuX^98e#vYy}A6wC^?XYg9t(B z$`9RN2^l5z1Xig2IphL!2(bM+=-+XG-?<>=$mM9Z0Q&|d7k9M-v^ybQ0iyqXrOJv|DyZiRul4y~;- z^U7VJE>{=Y6D&vW|8HCaGjIo_+vNIn3dlym;2ZPaU5f!4Y-uF`s!}8P<_A3f z51aL$wSvNrh615-Fkc<1R8&^JX=cU*z0IA)A^;Ylq)r94M81cWorUFq5sKCxQ)?WH zT0)#s_Ln@p$64C!I| z{<~vQe{mJofAjb6(9sNtEeam@39Zml`DlAj&u3`XceM|PQkR|P%l$oHM+r_#j2#o@ zRtQ&+^8>DA|J;Y~d0g|}dboFZ$O^6CkauOF$@x<(;{_BnXHwZuG{z8J5(Xi(NdKIO zhzKMRY+zaUAY&ta-rv4`6Bspt#ez*thihm!DzJV#Lz@0PHsKAh{KloFJppR=KbaFj zr+!oU?DF{Hug3t+5LXVaX9rc)9B_Fy8{p0PAK721^s!KE?CdsUg88psqkgRDf(>Q{FY~rrmNokcrZ;3!RbEwa<;`}fs95G)n>+bf6o%4 z3~Y@t!g~l|Mrv(4+K2Dp7J!f9e{d+D{fk1;NaL@xsvWKh;y2KS@d=5sp%Q#5#&?3x zG*|6#NQ?U7LpTEGhTPYLiH#e$-IR!^&H^F*%xD*_MTR>Ls%zYxl*TKROs|jsZ@`t+ z%GeHhS$riy;P=KC#A*G5>0bvx61__QlQ;LCR%1lc@!&~AeqX|vke4vCJzgWx#*P73zv?X$?~_9DL=Lj`*uh4PoC2`3-aVk^m; zI{Q>p!3+k-&zUZQsLO5kVpwnn(b*Z;X1b2tqIdHVO7IM1q+y?Z@Tnc8pZA31xAT{> z(F1akze0V%=~?I|M8_t@Y(|-CMX>N&EMv`5QmGhgHk^%(cqxZ~!exboi@zaPOm&w-0U)ig`QP0ngmpZ-`#!2-F>-(kjq zwh(@Y8(>dP#<@r0BBC`NMaX!zHl#K+aUMwiD8Pf}EqFRCibDB(6@iCmciILRwl{m( zL7$Tn{F80(XXxC0$o?LQ$V$YMURuXO?KK?_lKAie14Vc?a2isAA11I6cSe(942>b* zjc)U5uD`3}0jBJY_SaR2Ww)ysXK%CgrDn2a+}T7dy~!QOeKA}LNMdpP;ZNo6q#BmC zGtYyzTFgoxq4BGKWTCfBn($ZI%f`QFTuFi4mOR=5a>ldr-ObK?QFw07zfY$xXsFe- zu#AVo9rnY*yWq@L((d>-@w-@H{G433iCGD{hj{NbX3Gr!1qoaPQUY^={a$OgJ6>hS zYI+q3HjiMWX#?GdlI$O%`w?V-fudTk56a$>jN~pj{GZH&&WB;);c_G)0-exvD&z+r zb}D_RX7Xx>FIXD9M=S*wK1m-_fC7Be%AeHAVZLm12D%Es(Qww(0BsV0W^TZzJ`jbx zrY7pa1bhn%z9XUW4!*c`bJ-8Ef$f4;10v`vPbkjOVXENSgj(rh=Y3IEA(~NEdsy=6 z?AD3~@ChnJXaPN{1#Il_S&6{tXaX<{8c%uOPQCs(I5^lUzVnyCIziCuZKUy{as}w> z_HUJC)y=cgm#L|#j&=nS1VvYL8K7BJR`%T2S5nchUo#c&z;9`yPMNK4@O>{7Jl@vY z*d=Iw=l$akpC$rD06rO}g>>&Di0vQf#LYaIEM8M<8675DtPR~;*o~%h=MJ}Iz5-Db z5^Py@^`K34b+v+~X2MrDAU?{ix@n<>^ByE`Xlh0yUlRfI*6j@V*q1q&@xT<00}&Z% zm`D22+1Yau1K$5ux6xtkio9*}!K2U}WOn+iZpe!nOxa6pp?z4m@TLY79|i1kfLESc zT+H{Pht5>cypc)^zV3vBf-j+Afuj3;3$f0D`nCxlIxzp$k*G7mhk~|YGeGc(GQfIf zig$s>gxtr=?BnB$U87ETkGt*6ZA+fNFA|rB&S3Z~M_lKx`z7OS9308D9a{0AiasnM zTVw$P9(>y1c1FQ}Zq%*+j0t5|m4~%?FzXzZDsp$F&A+pqCg*=YWoo?Eo#4q+Bx zl>XXbCMS!4faZ_41&7eL3vPqHv&td`OsV%vD}sS{0+M{Cq20VXTuJ<=u-lUM69y^T zlamu%T-;{(00YVYY7nUPcKO?odk%Gm1K~q!V6Wf=4A&!b`sCX;b(jyEs#*|lkpTkW zc>@SWq+K2ieZGIXMH0t>xYUpX#V2%A>Q$>qc7#*rDyHRKCAFyd|f{gf!s;qu(pGr?-A3RxXg<1lbTj8s?$gQFAq#6qex*n&Kl zi>sj~!URgp733TE*oZuQ(}$}8SzlqZ4dXmz>%GQD&(cC6Am*ES`mTQk2=!JD$zw=`?Frvop_(lXG|#oX>&{CJJdUb1l07PV8+}jHgC*P%n#GAj*I8nN`=gRX{z zWIq(D>4-j@{4`t9DE|6cxXXJII9GMCKZgc2*=9!_myXZ|mH$_3=N=DrzW?!2N{LYI zidB}S6_w4UM5J@_FB0ujeUM@ZxJilGp4!Q5o%2B+#M0m+vH+v!_RBEEy{D6 zeUBR#t~(m!c6+0LEgqXA8r5QzZ0#)A2{Uz+*AWo7A+73t$DSeE6GRrpif5^6#oe_V zC^VbMnFrZ=efT~CEghyc+!8;d7aJC+L)?8RrYk@7>eYR*5}|tW%pk3= z=hS8u>f}?oYl+Knml>z*}>gs`Mk!iAllfEp62$=OK10y+SB<^_^*?!Ih2sf zAyv=pqdN}r5(P(tN!4WT(G=cLV(j2lU-Vl$E%NI*Y)9jZJO2|A092jmfy zelHgF=2Pov}F zOrTIc>c(S+zo{4=_9D$c1jsrF#=?o71uCwR6UXDv+yN2e_;;3;mfAjK(qPSGoKx-^ zw0WPRAI4E8BGXc46h7)7EQgTMRAZLX-P?N>*@bPXL*lBP-QC?+_rLPF`=$alNH1D& zTM6Pb&qnQ*efhVLw8Lu9(jKubv#H(WzsvTwVO@Q#at?8q17WZJq_^&8dy8lsOk+nO7cgg5boeD^XLbtXXp&OufCqzx*Nw4j7OhaZc7GlTPYD)#U#$=JR~UH36g{W=G)hQtw~_=8V<=;kQ9J6rGi&Bs_n< zB5;eHgU~IZ{@YF}TE#D?Ad%k||As=l8+Nb;dgu#k%{vwRF!6V-_|~}imjkN-`wI*T zqB@8O3lj|rgiPBZ|9SMm5wJ(37Uyxwa;#N%277rGz2QQdWo&3jAqq)M!=zw5Y`r@K z-P)RcMTUjWKi~tOuNwP!fBMnSrT!ZiEt(Eq2;B?3L@s9j%Joq30Vnm?<{B`M8n(%* zm_ta{)YK%oIo@%XAt=3p7gh2G1_s}FvVlgzN44HfvqDV^+gjPt3-KH89cu!UrS%)w zVKXE=MN%kJQl~_ml84Ogtz`aG^v~$BK<6K)i`7m@Ok}u|L7t3 z7Sa+cJ4W(U;za@7kX=(FOw7GV5}Uxg-I+mvO(^WoiAc1yKLv#ACrDZ;hM22CIY*|3 zTArQDiOmBL5^a*HGuz>qemQCz^3y~|RneIHob+*TBX#a7t7M3XjkcGn2d+rEcu@!m z37n?BZ)4G`S-5s;$R%WkFOJ@L(a@JaimYW?ABIBad3mZ>3Acrj{AT__+XH))uJa*K zYULE}UNjrkd{AkJY{+@l@%A^1+8%D0l~RE~-$L)FlbHWJ`$*_885JPryXns^d&eG& zd6JM%SQE;@*(>~KV}dRlzA2;+-!GuiF8%0jX8fLW373$R{y^Wi{yI7=k#ws2I=2<~ z0QA6YGOk5v;a6TVF}45aFlCak{I3}1t+?|d%$FHckTOQpFbHADg%RqR92#eP9_#}R zO-&&RY&uJ9$+9uo04foKVH18@xLCt7s$uud)>2>oVuAo%u#M%ZH;W&l$HzxzXS=XG z%hz>}$W%^-#*BTLpiPFwY&Kxq?%}9JDPpTVuU-kK-0IzLdhe38@vOHtpLZH4{`92bFU63+Si3DzR38RkTeW1el*Z$%Z3 z=MK02(;LgYVy=`z9b=R=w~%B96R;B1jCw*Cb@$islUdHML9MNy6E zxO{mdM2*m7i=djhksc%pDwoF-=}F-a`gK@@fTlirH)xDHxa?LWRvaQP0_n-MIcp*M zV01vDI0eVd_(iH5S2MmB8n>Vn^E$7tLw_y!<3O(0itV~#v}y9u21i zM(5lHxs1#-HnfY$xvgZX?3bkzCiXLH4S2ceg|eYSw$Gi0#=#J9RHA6WH#!7GLEuou zxTLJC9hGZ{Y6uw{MDKDmo?6hj_05U=L}gXzE*Bg zhC;gO)%P6RhJ;2ZD0**GW22%}OA3$hcPZ6VBvCW~$qm^H9lo$;YA`TlwWzsT4!N}6 z(BK443_D5DCLqPI%gxnQ{_)3XN(~IhNgbRfu9Hg~FM*q>62)Y(SQ8*s_JwepxQ@9| zalGAJN3av00Y$bRwyFW%wMfjmU-^8N?LF1PpTHMt1Z~zGox$wSJN%{tm2=~WcN=WD ztk#d6zT6&YP7o)Pa`IAnz<9!K#S9qg(g~R;YI$vK9u%ZYxnEazz0)8fbg`OmOg6R$ zle{zV2oD<@O$-fbjd*2RRq)Ex&MP`nD8)7dp*s4kG9qy0*rrOWh?9$*4-}TiU_GPwBTfKR%>qQ*^YV?= zY&NeVI3V|tTCz$Mfm9NH8-%gR9oz&Qq?FO zl#_6TB1~*})4_=!OgqFGXxgJh*DjnpXO5)2d^}hOt@Tx%*EA~9xMlo!wuj#pP-$UByxk6r1h8}`uMbW#;Xt5YxNxy7#s_Vbg?j!2_t63- zm~_$4vi+M{_{%YR<2PYE4PhbF!=`3t3!tir1c%q^81^}AQtsM1zfV(XuvJ1SK0G>l zZd)b|Ls+Z?#Qv_bOceft_Lue!V12L0XJ)YTK(NIpI!hHCO1-wivwBA*vAzcL^k}B*1b^4E zu`MQ84qu=8(BU}Nv)N$4n{&*S?}aYI;lRLBsOHtuK@~tYR^i<&merQ|PA|Vi z-QV|AOWq#I$q!LXl{gtq6S5Gr>B5q87~?D>d@&w4|CsXm4N4b?oR0T~V&yRIVq*q$ zWd{yKL+gLL^!U?tZc$-k_MCZ!Q$3OLTBpUnkmXRkPp)#L?mH%6&#vz7Yc_ax?gPrR zWYuOmvnOMVWy=iyVv51%Nm|M`zGSZq2#N4tj1R@Ks~d%siJ6#~r~{(*2yUg+@plcM zmKu$`NZBO_KM(#jlKpv5`qzkMsVyZe1o(4#iwginDAt%kJrK&A18Vg$Oq}NSvQjcC z2HltG{jvCOZ=`%^6LpU&V~Ndnqlh@j)N{HZu>+;N$quqJp(L^Je1{5I_^coaI|ChM z=;y(W`D53Z%;CqH-22GT;Mx>)b{LH8RzA>urdZn{ZtB4d51dTfcVDn>-TTnI1N>Nm zKP`fHDOKN9F>gJ>YRe&#UgEyeApAd&8wJ37X#FYQ;|T^B>xHve+JyXzs46b9SguKQ zD>S}!pqGra9XO$&sw#?`mj-!7n*q@u5w%`*_#(m^F^QAA0Ds2Qe(xog+PNd9Zvoml zyWY8u?sNB`mZs+4>|h)~#886bTT=P?`L*!eKKTp{?-Cb{ zlwXsQlJm5JX-W>i1Ho zV+Q>IR~eLZb{+tHq6tc&kwcG$W8h@pjbiXCuju-TLQplA5+k>132qWLymTZF@5J*C zP|8+gtpE@aGr=uk1{e;;LjghHJ|MqOfN2aVa8|qiZpFNBaQ3ZF;8u|ati{d)>whVp zg+aS?BHaVHKl0!NNM8t57q}Q%${}Dp$&mN`t9rmle*lPQ1dL#3ELEuY5|C2xd-|o79$P=fw*uLv}?ES&_10KCX`PtaQs9@Wnv;PJ2%xmQU literal 0 HcmV?d00001 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/worst_loss.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/worst_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..b7ae2425ffaaff8826b86a13d55d39f00f3537dc GIT binary patch literal 26093 zcmb@u1z1=8wl(?}>5@i}1_41rkS>uwl1hg(f+!%}jdX~F0)ik)NyjTG4F(dTNF$AO zDxG&O+~4`m+2`Ev-upb)XK#IVvFaCdjycAdD^mTA0ueqfJ_>~*Qc^@~qEHx0DAc)+ zxY+P7bF_BI0Wnv3eOE09D_0M5=liJJ=B|#999$n+-(zyW@9bjjU@yok#w&1@>7lEu zql-8npWT1_0I!4d13u27Y)!~N|~ z=VQC-Z*1Nqqf{r6t#xqs4oyDv-i*nDc+S8^`?aJZ+dK3sOU-&b0KH6oKUC<1;} zv}f}M!k>A(_Y_&+SMOuyNcgKFP8b{VSNe+y$WOVN{XZO3!VOZ-zbq+v6uBc9aN-Mp z%W-RKYb(qN;~3r@C^oe9o6r`rx3~9PtsKnem3#YA#Nj3hW|3)=$&#k4H~gcSy}gNf zMsE}O`=n^6{~4X!PFZ^4?jA+*uu+88i)$H!)$5KH0Ny zT^=BckB@)&v&JcAu-5O;UEjb!*Lp1AWFO~}h&}pnXZ3rjsm!C1iu}~VtPU7TCD+AX z`DHh_C-bu}9ThTM92_{gxw(nsYCm#sf3L9q!PWoJ+IoM;JMUn@$KKY!xG-F~;^Ft{ z@&G;=xF2Vt^2*OzEpqLnqoZ$`@&s26s|Zz+uI6uwOSmqm|8lGISbyrXyLJxsHBGE7 zj*XePS|h#x-2*jGAD@|p?^(EngdzF)SH?YOh)~T*Jcc9`6qpJM3NK!~XzuJJ5Pa|z z?e1PRAnZz1U0YlGCgAjBsLq4W*U!)V-n}5*VuPu}^`^n4x3)FKuKJT10VlFe{>Lyu z#MIQ(sxO6!_V@PM5e z|68M<&(YpYtbt#jjSv-|389*r8a&2pjjUT6o0~1krvCYhF6-k`_1go6j!sUGHS^Ih zX5TAq_2@($UmGm>YJ1IBx#~Y0{^!1aqp#S{u`zRd`=^0{fdX<|2Wim-1LsjRG&C=y z{lt+&j~)>|cKtgfruH9v9Z@{}3Bg+Y`CCxLmjH z#q5#%L->?lLN|9kUBZLkvYkX)*$RG;k&|Du>ErBraqWDD z|3Us7W9*9;7q^xNFKFeeK2AxYDKTk?x~!5k*_W?%KWxQSEZ{^k_x8)M6u}3rD~n!#xo#a$PkovNQu($mK@H{U=;ms_tSq_~*J|78C`{mzJOjn6Kx zRo9DiDE;@YSLS*$Vq>KbaL4LA6wJ+;kteLExZYiLE0!U~W&Z2bGc{@1&Lp1rq@*zT zE3=SL<;apBtOY7mZrvgjsw-WB`uJeGE#{Jl+5P)Di+U+8n{3;&v$L-i zVi^<_6`NOv-%ZWT1gj+TPR+#1+|t&jAf^*l|0U*dP!n(}jWQdr_i}V}yrHX01yk|# zm6Xr)WJ~bE@H-9^lZXhl-@#T3tPBz|vLM(+_XbOh#RJyu$LsN66%$?(CKMJHR#a8} zu+(4pz1%{ zr1{GiJeb+y_>7MqBFV2k8sdMEn##h&go0Q8_(M&m*z5FQd%3;4J2W;{Eyc~x*O!Ef zDy+U)zz)>+`N~NUvBoIy5wNI-XSl{%QZu zywH00OA$5St-+uD{W{wD+SA2`)z|GHY-Gm7#O&>iI@pd7GfH2@Ai2OOU8KW-+Jvat zKVk(D0}X@Jz3kAqe+!doZTOu+;l!vz6AuPSz}{3ier&$$MAPw(VXa>n(Is}SMCEI| zymtzvL`C(l3kgyF=FA#>C)L#JN`b144GVNp`S_)* zQ1IYZ-ko<c$|oW!-&el{(0k zh|gnKRn&J%j~^6x?mYIx%Kg2)hs8d#?W7R1o~K-w_TAg~Y~S<`F}-*QJaF5`kMf1b zm4ha7ckVp3a`uIBcRV@rs{iv8ZZFdkkqEvQ9GrU;XRKwFpwyjz@gl+ypSC%#xCluhlu>e~_%6 zuB<#t4+uZlT`%1pfuwiiORaZBLxJ91uHH9tI7Ry980T?_(be{28-N66VEaM1?lThd z)0fxVF8lYvZ7O!wmzJU+w9!fX){N}P_J=b^E?rkp_$**| z_wL<=f#ORiCx;GKP3kXBO-+gV9bB)n8^wNY**@j;$j+{_;!0wNaU zlUH+YHa1)!U#f)w%xhGmKDT%E>Q$yF7RYd0<7zrrO&jUR*;Q}A@*}0Cje=R5hP1T+ z`);5oyM96f2Pa&>4pvh$?Ccn)nNQ{bqD0-6DDGrP$9#&pw0z`aW?_-FJu);@^K+-* z=%_FglHbBalMLMJb$;!Zhr*!xCL{}HTnt(v8}0nn8599%x!G&B_t$}sfJu}!^nuveP!q|Bs~@} zF&Y3*{j~uc92|gJ4CZ}ezdrb$BRuzeK~7H!LOB1mYgYvGPp5V}wP?o~($OY~X4`*y zWU?OgywY#*uGIChvdSG;?l>9Gf#?e3_Cmyg!Rl*TzI9Je&tO-807WJz#@@uf?RdQy zq-+vON}XH>NIkGVxuh8Lw{*|p;o;pc8PZKG2{@mSkl=a}dr2u)FQI+%`H6+Tx7(Y( zfRjIrS317uL)d7CcmsK$jL33N%!d<7N zMvtjhtl}Md3HJr#a32nH3a<%wJ5+;GzXp=sGMY5lRQiU77Ds!V$Us84Ds!5-l_nElvebM1(-WFtfRh*i zGFzuVM#J8EEakIf25S(-bxLV!>_LXDd~>MY>ra`>{GF|(egXueJbLs9xx~syB>`mq zeqVw|@Ms%5J7G;tGPEN0PYkQ=%|Av_I=Z<5u;Gb&_39~90OlVeNFVkWP{QVLJlL{c z8LQJPz3}|tVaM|;xI{$qyK7^cCx`28pFiW+RMI**ISn-h$RIEZV%M@!wZ-SB8(Ujv zfKOW+)7IAZ(L^Kt=UB$Ux~GuFEWT&TBk$(DweT2lz*VCflC`xpPA;zAEg#svn;)r7 zC;RnGKXPRF22t@Cn8D3bC+akll9Cq28ziJpk6ea^hg-V4i4>mFwY+y*=8~e&8Hi)S&H-aF4b;jVv=Ot1xO) zUs8x(k4sBS`+l9{+z&tg_D1f7z7eQKP$qpf`~u^x zo>x*%(;x7VolVYh2W@Z9TVG$l(RM-R;aEnFUa1L8&PR_4zwjqd2t`FjCx;(oe1yzv zZfS{`s2mEDGXoV~D`ZUo;t^KesZ(%fpBQr$^EE-iwsMQl8*qYocdAg0(+oC(uVL#z z_j!wcY3@Yu(?*Zeh=2BI#=RF;YSx6p>z~@md*&Jfmr*OFUV|^xh`r}*pJap z_#g5hh+fF+XI~!$63QU`UvrsL?Oq?7>wZak@nT+Ox2gX=7Q|Rs*MjPR3QLUZSgu}G zT^}h>fm+kV>sO+{DijAbUVqqqeSORBcT)Ktu2#2qbOgo35WVt0us+)Qp|&too;4s| zY*2aA(^FLXXhV*WLGmJgC5;+U*2DN7hYZULp zeK)@=Ff#MBIq`hZj6NT(&h-PSi|3)j^<*Zogg!dKLSv55^t#9yTNh;PpYmcHed{@xc zwMHiMjd=QZ-S+2AlWnu!2e;XwwH>xshRI0;RE3ys)q!uuJCs0+R1&m7Q%li(P(xK1}+pP(Mj2DRXmk z4HZhA()UMyVxyu8He}nWsHk8=7k!NS-_(f6GlUfBeN0|ms93B!18WO_2>*~$A{1mq zF`Q8lszaVV`(fd*(ix{M@3HnXzv{t02`Q<@YE*1&z{;xr#e{-9SC;#}zLHmYcrKTO zL3O|dz~($JFK^<(O-wmW%0%Mwy9NdVY@{$=FFI5c<9(-h3Umr5*Vc#u2g%6D&`G!# z{cJ34*rCOg<$MQ3!mUEnhYvZCqBch}66!_^2*+v_rtqLPo@_))2wni>%>Upk9X#c0 zhAlN&c~q}UF!Ayd&wP%>Q5X2&cDQ5rXsArqw!v+A;0=_Jknnilb$@xrkp*Z+ z{~t>{6W>kwqs`BZqK=cOr;IWti^pVeWhL5#FkI)EZb9Pl=iLovEB@oiA zKZF0Y^dy{?R)YUPyU{Q@8h*C?MLi|!eop(A^9$O}Hv$QAygo-fq%fX&-q+aruPJ1% zn@{u2SByNzr%`?Ki(e=~?!FP(E7d2n{MU4w`xJ@EX9AMLgZfVoo>y_3-Mbnb8J{`Z z5I%4%;ZG0?De3pDiwR3F?rDTX#?#e?eoU=8ViEcs-KQvL9<%a8kynzeh=TuGLQ9{% z2D_tmDBq02;CK4o=lFMX>`9c-XzgTvcm%!*msWz+wAZ@P?R=W7Z(rlWh1z*IZ`qq) zb3QqCu$TRkpr!xhXAsLQRbIT7en@?g!ogA9opILXISLBdNm`YR6ykTQT-!R zD%@x(+M=iqOx|MYSj@8en?mrBHS^7M`8qLRLDY_C(6|1JrVI(mOSvmzuE-L6`57L+5X^Fom4A$U zxuuEzzDr$HMK~AZQgXargoJgiqGw$6hb>78ihRv}?y84+PaBS)qohWTdJFo26ytSG}wBJ`sTTh2W5lq~Nw<17>WLB;@o_rp>3%3EHHLDmVLcUMt3Y*fP598FeZm znHW}3N3$P07FMhIhUUHPfqEcU1K0ignu2n#(CDyR89fdrN-PO4%5P%p84A{nq%Qi( zH053N=>6@Aofm>rTa%m;TXe)?bi3n2f3|A+XqozziV|rlagKuAxfqE00+1!e@8rj| zjeR9tbAKPUeHPjaCCg4 z_!^^XmOCp$iSFCjFqLLG|5hNOEh|z?GWEr>uMDi_YDPS@3#N(Qaas`-0m;$TZsDAl z1TAVLC7(ag5*I1RzI|U?aQ6=jO9B~;qSOPMq8kK01Qlh~-}e1={o@DF&a3>1(OW-n z{_NpxK3>5{Q&SwQC6aK-v0C{0aqf+|lr6cakbrpv3rp~aICNig3OD7$UuKcI#P2!y zt!620Ftm^4+PpV+RZdxpl}=g?!P6O{RBpCzB(B0=G(Bo6ey4WCDg>F1k$_}ogO zA@qIz&r9a~D9|k!)F|X7+pk=8*X*-r_woG*Ab{&4SAyuOBQmi>$C?;}d zINUI2tqo)}T@Sf&p>9N+j-|jtlp{Y%lq0fzBXM_Qs?$TjQRoV}``o7?vHVML9nEBA zbbp8oXgyplioC(mF;|Qpsgj}$Z~K^-Uo$aq!_XrW+*sqAFIz0;ttUFCasEToHX0qP zPV=SQJDcySHhx;|HY#Jamdi~E&2|ovj_7}`bZrEbR-8u_N%)_y<7HDeA|>q?hC`Xo zn3Yb=dko#@P=ZyjK5zTnCDTKrD-n>Ehw&rWAhT0TxSf~tQ%FV+%iGtAO9>HqNiadA zIyab;&3|nWKfL$mLuK9F7Y#i4N1FT^ZOw6L^w!1OSGD)NL-=L|&NV5FxAX*jRx&0d z?Z-gwr@Sz0AAGl^7=z6x7Ze_94w+w>Yd%pwH8kp3)RNMch9l|Fk*>QDMElWGvb z)UxzyapS0j5ZVx+_I{fYwq`xMTmo?kLynA8gR;Kw&6Qj3_ShPjxsM{zt-*BpDoC7x zSJ0Mcyzr1HBUe|k^U8-0G4pYz+E-yTwPV;WC>81!2i4U{Xme459OMYJJpSSQ{JaGe zAW*@E0-y!pzInVmVLQGKG%tb+B~Oq3q)Pvp@8SE?7#u7YL(fXuucjPIS9t3nGd`d6 zD@V4)=2vdbWHh>8k@?rx>PN#tSG01?`U`XxMk={a{$!joiHcrK<}v3r2fP=S=( zx=3{oJd4oHn>RbQCP12c3fO39NFNkH6zb>CpA`TAl1J*j1XXX}?hIiIz1dQzfY#>m z_7-l*eqdnstL0+X2o)Bef{LfOzM?S|sTA{y5)|EI|SX012hz|~z zTi^hgDu)72R6-)_erFOQB{{pebdL2vVaJb9@+m3Sm7hO+K*)Ni*^v_Po`nT>k%){8 zBS=pM1f&}IUm6Tw8xly&QYA^^iU^jf!=hclQ=&?M5?bBJh|tH!r`R2SW4nBL(TD@M zS>Odee~hLs2O$xmrvQ{A0)X3&4A5%4CiQu>nmPpl;D*Y~&jTeIUTHhXa^(u%J7(Z74wnhNwdsn{10OMGOJa~adgets28cVi8_3v9P8Mcn*+PdqcZh|7)tci<;02!P;b;}ly?Y%^ zE+v}zAvrlY12@rIdBojC*Qx_-QOys0N~X8E50PM@bNQ~KfUs~_Mh0VRYiq?wTWc!` z)YdNPP)tulRR+~^Z-sH4JCQ&N;q8~g{Y!^Xm9oHm0g2@OhzIn`8s}g5boBIvj%;Z3 z$Sn`2kI}2?>e;+rBx?;_zQ|i}#ktnl0ovY!;(r6S)=Wntx992cE-5v2_~y*#^PDu( z-ay<49V{&k){kK0{zHiwd2FR&%$ z_denyARQEi&rq9{gQ`*U{yhc?syKMCw~G$IdqYKNe6B70LipRa9H6XYT3A>B)vWo>6lU+fO;PibhCjbx(oE@)_L1q3 z5DW*46dp^Hzs6-)?`8k|bJN>fe4c|;qt@M|CfFk&5``-A>u2n#8jzY#h_DHl&X`CD zsEdinlU`n*2^p+=e-4#5b8HOe@Ql%*5 zyd>A{vbG8)HB|FY8Q!VsRo6&vmdq$5bM$B8Nu_3a6tZ)2YR;rXXnoa&Gey>~LkOiw zr6N0dnXjTx&Cm;`HGksVV=BDLRF8Yi1<_~sOG(Tp)s5W>F25OeUN2#D_4_gsGsTgs zDm9M|;}e)ZJde_^;-lp#d8-H+P6~MxMHV@U_mh-Ox$4o)J}+6~p18Pd^lG;+;*_U=}D zO?o5ePy_T$kH<73k&S>tl1e&B-=}jkKG$(jz8j-nMor0=o#fGgztGwWsuye`dDVuQ z>Rw+Y_m_@N3u1xEwE2d-+7fQ4QCMP|#OugVvcWYg^PMWr2o{IyN^F!!@)OUF48=E% z-3T>epWJPi*xPrI#!Xh_LPpF@mBZsw7rs3fN`G%cCE|(Nvnr-c?6WB#rai2X2x_2L z*S~si`jbmdhA|VoI1w@Ol=N)47)iuCkL%KD?BD*n*O8=1@dsYK^6bQ?|ESZLZ*!XF zJ+Tz^US(($r(i`+#6LT6preH>qp^<)wE=-WM1e!ah*YDk1$n>EM8EB#;?rNsQarw5 z$P&N7qyQ_0784og^BbFi0~^*hp%rO2ReyRC@5_ILnbTfJk~a-1=jKy~TYLXC6aMQM zF7+iX-)j|8nCJEuXL*hooDAWbOG0PY?CpGq$1e2)1I2G^NRjzW01|uN3F4a(;D6>* zeWRMny`AT(7m2-<$s0fe{VK?CVc+5<-}qHV?D&r+v+(6hjA15|!H$tO$jrr``{$bA zClOXZb2jQS2`;?lrxS7S*)qLBSc+)B;r7rfruNwIJW6`hh4QV#cybL(=0PZOC@^Mi zI*NKk?2V%FZ>FC(>;Vt)$0CDIRCutL(c^+D~xyEMHag z|NG`Hpgvm<=VICC^aX}+P-Wsr;;pTjYF#!SL!q<+Vm=i2J+dZX z5pYBzCAb|Huf!$FSL8c3KCz zE+&%;@~_9L!24O2{U5k2quOOc8Uak43k)A1X_J5h0iymJAd?OJfM<(;@glDh6myOd zKGOI%0GSaK8Hd-LB)GY^?OG2rMZ+fhe8Xf1L?uKu*DW9g zBI5jqPoG*peY!O-JXmTP3vh{vho^XCr%>hAE$pJAB2W>CRa82ai{;STFB`>SuzcnpW*3CXG=k) zK;j{T^TR(T#Go7Es!q}@c{P^oH*X%0i>fq1D+yvU15eLFUp_fpJp5N@vIST+QVW%g z-@CSeHt?;)SOW1^HJu!A8&o}t1Sd^F$01fAXw=M7QpWE-e0F(~5wRrAt;Gt#5yv8w z5T}G~W;AOshc~j_Opz*KWo3maxwF%#LFvvNUYEqT&$F^v)6&z;?{7Cbx$*JwMPx>V zgkYvGw_6Iht_*3oT+Zh@#6sn0;;E>p0Cm_-;wfQhK1E=MLUr!Q&wSXXfXpF4m=MYa zd2w%lpBd=+LMhOL0e^`PE&@IVMJ>3f=<4lJ0A@_jzI>~&)=FQ7yR^3}ii(TtdNlzL zVE<5onsCP!zY@2;`U-~9qF1bGWZvUv|G%ILsAQz#suKrf0NIe}-Jdgz*0b=22+wvy<6%T1}k7eiP-kWL-1>;Icb+w55`Z$rEo*wGP zy?dDh4bffQ-K5|#QF_LXAP4|07X>VFLCh~&dTz!ZU{DbZ0P2P#5bPiw98_*21ZhRJtQdI)aI?_?=>$JU^hz&zszb*uVn)ANvOe{lkozECgq z@rXE-g&M2ZA`T&V#a|B^5uIFRP&61!R3;%?$#AMjmZ@P7CR(B5owbXP&P755dcD7kN4R4GvV7V@*OFm zBWTO+tO>suuBJqpO$G2V0}U|}6tRRP=}c^UBN2}+yO}^B^T@-nX zl8t?CX~$~8w$%zF)nT&8dl2*@ATURu2oeD;{0{9<49gl^n<|vM29b+R49OiBs0!z@ z4{!M@u~5EV{Mb|9zd-(G<3SGfUYs4JV3iOF=ETsdLbFWCK828^6oy=|!dLROkZ-C1 zGtXNRcn~>%jdJ8z(AhCgN@rieR_Ykl7)NPl`e+Z6c!)7dNE*3n-_3}eh-h`G9KR<2AyI{GY)Em0%GRX_&u26?;Ulxw^kR%YXj$=SS-c?JlM&`1|5oj6koQx?b+aIpUks=$!`g`>0p zN1>Q&Lr^oizqO+ecjD>q{|ZO9&}UOL@;^keg7-FfO4ioa{pB?9DmY>8t}SN&21|7^ zJroT<8>o6ToarS6bw1)}93G~b@5v|~n2v~O-#`#6pLbj5cd{#Yu{?K^T{vfh9lxL! z=@Ss1p!wrREEHl(gSrEZZa7ds?o9??C}_9=ut<{q(_cWF^Pd21{P-5J637gxaTOrh zBZ(W5AXr+k!BAqniUqjH-1(hg+bNMX4octS2-{uZGOW>fe>iJkU?3uvuQqE5GV=Gg zYK*8r&L~daITxRZh=qWEj~)pCm^yw)un~dj4zaJi>yeHIkpmM4=RAsql@*)>8h`J0 z$ka}rK4uqZto{T~CPV&?Y&`51ov3l<#c009=DPL9Xpq%dApw934W%45HSRS}-$0;W zX+sH~Q}DP>&d-N~YNIm*nhvAOT-W2mLhk3vms3R?o`N1^^5+DYk?+NZBH(do0|5*% z73OQbt*E+o=9GS2399viu>|avS2uav^jO9J9g@3c+?=2UR|G$tDX|J=;)qwCi5ZQF7ZJc2-|Dp4bdP4wY0UJ|MKMvC)dHmX<|rtxCK~P zals(0oYUHyqe2R@J9s2ohKKL|CgxP%RG;uH8HCYKEMO%I?NZyOw> zF4QX>sPOXgGM{XIjEjeNz3~O0fqaPmWVHRadEM%qN3=xJB06qN(GC}FKgUDukP9TGL6(-2aIK7wzYpW;8U{- z?#a-|NIcYJY12t7VqD5mNp1zt15Aj2PPLZ0dLVctSvWZr3#GuG!f_|8#dSmw;0TL&Syu-7BF6qWxJ(EUpE_^a{Xb6pqi~IbQ!(0Eqsg-Q$%K$Q`_cmw1 z$;pB^QV})`SU1FnR(qHt=E4rZMrfk-ymE<|-B8&%(0bcobs?@}(B&YrC)K6@iycbp z9c3aR)%b-M6xd_B`EPZXwgPNl$n`?z_fAiaQ#W80bJb^RF@XAR5}m1^Tm0OhIE)E* zsK$v^+-)hu|MMetSNS{Wv0G z%i@hd>ykzzaK?^XBT~L@oVOrNxb{l0Ngb}dyC`B$2VSyp+rbj+Id%YscwkFO9Dkag zPCx7Hl~Q;M7V!NN-b80t*N}Jb1OaYew=*#@A(1BqCzXL;GLK;x;%PYCT@MGVY-lBr zc5lER8WDNrl&|1F*d(;hkN=^TybE&#E>-Mv=g!r5tTWt{lLKEA0Sb|!!!9tIEUsR> zbg8YU=K@#+3|8Sc*gJi-cfbqmH2;;}^8WFq)VSSyIBM5#4N5Aa?>2%n z77XELU`x@jbLZhTevb(ltt(ZCpxR;L6F6p3C@2axe*eCi%xjF8bU^4v^lh;I;|!D- z6TG&Jumg~~_bb`-8(8VP8&G!j>zKA_-(P4gb)DDeS z<1H<_D0lLQnwKp*D_w>4=)j$tld}FYK|h_JbklaD8W{@6C3Dc@5e5xYHZVDv8oc9$ zLWH_Bh-n^ZU@9=4eE5I{LeTMd6jknp~_GNj+Ym^QUpEMYUjH#Du=$TrutS>*gjwaQInjhQU(f>#pt@FBPzIvQFA86T)jg4m5UvJc(8%2Og zxE1^&2<8TfU$?>ed@E8NHzcjce(I1rZT_J6RAziNDrtubaMQMWOc)!q3_#X?jTEA* zz@;PZGYBMGr9&tLKIoL*tBe@*7Y((zcEz*fD`7zWC?ypaYkp^@ckZqA8ar$2d?B?w zZNw~c^~C`&TsEL?m=Z6e6u2qxY5lWlzJLY5lP$+Xkrr$gFbmNTvY4c$8DL9*Ax;qX z)7wEF6v*og_bsfRINUmDRpd4E&?aj<{gc~wS>t@}ZuKa*g@AS3SSl>d%*(??nORzf z;E}UWLar>_!GL?3Z`xVK#J+Nf@si+Ww%nzxu(iPosP2&JOUo12*_T!l6BDcPKNd%8 zKw`mx92Ez%4a-Z%BgTzv9$vDaPJ`l_mkXPFTGM$SmCNpTmcl+!DI@Rr`qn&KqJWT) zJh*YdqJLgXOA9g31I4a0MhDdZgdbd*J}QRWT}~pbjO%t$H#Eu(mj20mobL=siKRfv zN*Sw>{uhp*h1f~d?T^39Y+=}m2oZ$Ue;3Xww{-j>*P-_xAdm>}?2z?;1%V^{Yo8T* z3%9CA5ohew;rH*~)r<(?_Pp08^M_@ z!4~&zLL3zKTlAUR9&>-$+gWhSSIP7~t6~Tfs^3vk)qlfAl|3@Oe#b~HzJ{g10l}E< zW<<*iK`s8b-%vdUa+>Lil-g249z#Bip0eA*w{t;E&#hrRfHz=ZU_e^EzCqr%xe^Dj zED0pQ_vmo|!t{F21UCPrCGscyp+!lFH3UQ-?5swE!h^(Zu=F9_3J|iKjt|_RVkZT# z2=<=~Cm=(l<|PA?kB%&-LvV1Y=Hq|Wn!p7tMJi7?)%Cq}XpDpvXb$nuU=;~*fJe&Q zR}H#eid+CSqZJIm1(ieZLp*-6vlJNa7%R7q%A{kVif9+3{q5+1(pzFdLLIRZ*prYP z0H)oFk<E@P=KmPQ}OwU@f|$idaE)s5Z+1VdLJe*XwnO9@#NAhLp_6H;?}{(jZxOpD3em{GsU=rQ4W=7y(tnk}8&MWD zsm~)Fh>wHwWnXC~%VR6APYI)tJPy7^TeO10c@((au37i8J-K)d0~Po3W#N96q2I?T zVV2DIwu7N))uUf@6awdvU}TrJ=PSackp1(oQ+w!)_#+w@^_GfD{WYNeIis&d!dAmLO*W zc%Fj3Di4p{)I2a%DLTaPJb!VzZlR*f5GCuXd3^jysGqQb!@dc6htxSKkmf0nY~05E zUc!3f^?Z{~e&TaNOl+j|=}GZCsW>wUK5?DKWxzE6?wvsCgC?N@_9bQr6~aMGlh2Yz zru$-8MKj`_u0NeX^k+b^gdyWjz!(lFDos600%nze$gqI_Ol^X2vd1B&G9A^fzCfRl zboFi!ji42iq$C}f2HV@)zZoEovsBm$-?Q#pfG6de#j+Pg;r>T^Z^;ISb4X_COmf_pH{`Ooj@jqiU~YZdx`-GJ3WA zus!p^M$xk3Z}Or+3cqtlAuBdn3iR}B-smO%an*mKJTJquZUEt=v3&9=<1{<-SsjsZ z(=e6*O;gTToc3X2HUTwn^XTX$(7(V3npai|py(I(Wf%&17>akY(w_DoHPYTYG0L3! z5tL%rC>c}rYb2)Dj6t_vx5&sywL>iQe7yEM+_|4rAlc^*y0D{%N4RoM5J+xF!<_jk zKwrqRIAB!;Z`(sKx;H|1OR@BuH*X$}y%&P=2ONnppi{|g0;ikj4>qEY;eIOi7Jr6c z6-G2=usX?W>*;W@jnz1v2T^k1NDO3Aki|IP0RUdA`~#`X42;(=1Rp>oWcdcw6Vz>b zd!Emrxo#4C#`@Lv^qE;%<<>vgi+zNTw+BrTuOZSs2BNnGbcG_k)d~2u5!RQqpApCg zWR#roqprD(VmZ#?EwU`8W&gjcPO@BHBG?9K-kp`gw7+I`P0g4lN1i)SWLcf)+VwuwFa=sx69=yP)3mwdD&w85z?* zsko*NHWW%gtv+pJL~CUi-Fh%6@jE3u`^_$WH8nH?sY&l+0(BVi%hJ+~ znx7RwgGx4HH(Q)^=htm_oE?!0c~!>)i7y;>DubkFAeaz-MS47fw*>$2+|q5pTq%W; zIS@RiR#phPxw&t@1lWq!K;OKHfog7PdDCjAd%p?wTARxpFjplBY5dfmhxeZD9#&wY zt{$yl1`6sKq(E*M!P5HQ_&e(prynGs@#g=aKOaoA;guTKVW+5uK`D%Z!oa}jc;teV zTF9*+U^g`kchj!|v&UMB>r!PpTRnz*gL00|3z{h*afy2UW*e!pBLH!@cz42Qy+PT* z_Wywn)enS;^WA8{@tlbfxlUfqq4w$t7&|S#BwY4{mIEl> z1E3y14-@Tq=G`QU1_GkDW%b)>p@pd9FJ1s`!PKJ?D#lbws_TZRn!38=rfag_4owh8 z3PK)$w1Vs4;NS}vL;4g@lh7R223C7gXx~{3*k00s7CAgHwQ@iY&o$tdpgT&RJOPDN zw0jF#u?Um~xIY8Uo-MCsgSi?eIwldox|ETKV^Jo#LQ9)M%e->T@bJ-_=kM7HtRSEM z8Mf0o(>6~js0ufy2Lq-d$sAO=H5bJD1ODq*1^ZnC0z66k^=>)SNC$kx!A}Vs(@+|s zPXutp_C~o|DHYmgaucc)Cv(j``f24`0HDbo+gH$wEUdl120ll z*)>669uscl+|)#)G4TlqgaUXkI5f)x(cbdmfn?dPCKIA6X<&d*lCD2b}MA0_Ij(~*h>)j4vtxRUGM>V*cc`< zIQl^I3DWx(5gjcjEBkoB2LN*6-^UZqQ6pGsN(&kC8P>1)BXHuNC;_m|AbU448%8|~ z_BY}|Kz3|v>rDhr154lQZG;Qk^NkkGm(6`MMqCUMZ{Bm^SH}^ zj{5I!gV1gVMB(m40Hf_dQ9e>TL}Uv~wUgUdQkt zP{)JU1sA2Lq5=(tceZt5V{7BQxY$`fd!G^2oU64d%J7T@^28FZxanSmYs zg`}4N(u;!hsz8-pa(1(&ps#Af)BPwtRCD+n>bOKxHaPRgR-;-kG&{v7C*we`7XUA$ zksfRi;ZUuBdzrx2b~&}6^3D`!6A`qpR*qOCtEV}^Ac|b*s_%EueihY(b^=E;Gg%(} ztRY1@%joE6cWzfAaF{k%=2WSrefZG z_jCo!-V;uT=^o=PVDksg2nGBi;znv%pPA9J`-rS@`6Mn@#4!S8WDMh-pUr=yi8ov- zk$7*F`0Cl#r%MiI(B}kSP|z5T!Nq_ojR}{N*ZyKMy%+~fQ0}QagJBQ*Wd0|sp<@$~ zyeG4;g<>^W$*X@ki-D&E;l9je^z;Th^lX}uH|CUjOVZ=-gb8j%&$`cAFDO?oyD4p5 zgng;!T~mBrMd&#Rvb05Yb8qd<=2UsQw9ynv(MFp&C{%b@vq9lhu2lwXfepzsb z$Gx(%i-q_i%Y(sqHoB=>so!zxsz(!!i$O#!Eo_>leuuK&TQXaS(Z#4bi) z%6NeDBS;dKZ^2^|X6u$9ERdrL@)1Brm{6H=jjCZG2Jcdjxz3)OJue&C` zs^NL|B@SpZ_S>m_uD7wFha)m#K?6~DO7hU2Vg}Q(k;djl-wxeFwA*ZEpN^)*!kcK9 zWARlzuFNMbVY4!G+jGnBe8Zu=7?(Zo;t0b>ODExSd9DcxrH$>Wi(u%Dt6}jbOC(mZ zsZ5mvG6fyGc~gQza&&|t^dk#MS{PAL{V>2fA6hh_y0Za8gfz_pkxC+lM)i;KIsGE2 z$tjWAja26y*g`@2u^h6K*C#wt`uZHw4?ZF*WYTW@Qz{5Xw_CF1w2Y4$0`CdU@6giz zJ;d}^G=ZWhqUQS1#yn!jk~+I!KZyc2d6!q??3-#>TAyN`KLQ=b50Q#U`(MAIH{PfS zrw6P2p5roBp4f^NKQh-5HtLIIFZ0Xk-rA3UR^xC0L!}Pbf?=f+xd50en$$%FUiK+v zIq`oAi~0WFiv`7}8-E`j!xqE(h=@cPmgcCx*LtkVZuX^98e#vYy}A6wC^?XYg9t(B z$`9RN2^l5z1Xig2IphL!2(bM+=-+XG-?<>=$mM9Z0Q&|d7k9M-v^ybQ0iyqXrOJv|DyZiRul4y~;- z^U7VJE>{=Y6D&vW|8HCaGjIo_+vNIn3dlym;2ZPaU5f!4Y-uF`s!}8P<_A3f z51aL$wSvNrh615-Fkc<1R8&^JX=cU*z0IA)A^;Ylq)r94M81cWorUFq5sKCxQ)?WH zT0)#s_Ln@p$64C!I| z{<~vQe{mJofAjb6(9sNtEeam@39Zml`DlAj&u3`XceM|PQkR|P%l$oHM+r_#j2#o@ zRtQ&+^8>DA|J;Y~d0g|}dboFZ$O^6CkauOF$@x<(;{_BnXHwZuG{z8J5(Xi(NdKIO zhzKMRY+zaUAY&ta-rv4`6Bspt#ez*thihm!DzJV#Lz@0PHsKAh{KloFJppR=KbaFj zr+!oU?DF{Hug3t+5LXVaX9rc)9B_Fy8{p0PAK721^s!KE?CdsUg88psqkgRDf(>Q{FY~rrmNokcrZ;3!RbEwa<;`}fs95G)n>+bf6o%4 z3~Y@t!g~l|Mrv(4+K2Dp7J!f9e{d+D{fk1;NaL@xsvWKh;y2KS@d=5sp%Q#5#&?3x zG*|6#NQ?U7LpTEGhTPYLiH#e$-IR!^&H^F*%xD*_MTR>Ls%zYxl*TKROs|jsZ@`t+ z%GeHhS$riy;P=KC#A*G5>0bvx61__QlQ;LCR%1lc@!&~AeqX|vke4vCJzgWx#*P73zv?X$?~_9DL=Lj`*uh4PoC2`3-aVk^m; zI{Q>p!3+k-&zUZQsLO5kVpwnn(b*Z;X1b2tqIdHVO7IM1q+y?Z@Tnc8pZA31xAT{> z(F1akze0V%=~?I|M8_t@Y(|-CMX>N&EMv`5QmGhgHk^%(cqxZ~!exboi@zaPOm&w-0U)ig`QP0ngmpZ-`#!2-F>-(kjq zwh(@Y8(>dP#<@r0BBC`NMaX!zHl#K+aUMwiD8Pf}EqFRCibDB(6@iCmciILRwl{m( zL7$Tn{F80(XXxC0$o?LQ$V$YMURuXO?KK?_lKAie14Vc?a2isAA11I6cSe(942>b* zjc)U5uD`3}0jBJY_SaR2Ww)ysXK%CgrDn2a+}T7dy~!QOeKA}LNMdpP;ZNo6q#BmC zGtYyzTFgoxq4BGKWTCfBn($ZI%f`QFTuFi4mOR=5a>ldr-ObK?QFw07zfY$xXsFe- zu#AVo9rnY*yWq@L((d>-@w-@H{G433iCGD{hj{NbX3Gr!1qoaPQUY^={a$OgJ6>hS zYI+q3HjiMWX#?GdlI$O%`w?V-fudTk56a$>jN~pj{GZH&&WB;);c_G)0-exvD&z+r zb}D_RX7Xx>FIXD9M=S*wK1m-_fC7Be%AeHAVZLm12D%Es(Qww(0BsV0W^TZzJ`jbx zrY7pa1bhn%z9XUW4!*c`bJ-8Ef$f4;10v`vPbkjOVXENSgj(rh=Y3IEA(~NEdsy=6 z?AD3~@ChnJXaPN{1#Il_S&6{tXaX<{8c%uOPQCs(I5^lUzVnyCIziCuZKUy{as}w> z_HUJC)y=cgm#L|#j&=nS1VvYL8K7BJR`%T2S5nchUo#c&z;9`yPMNK4@O>{7Jl@vY z*d=Iw=l$akpC$rD06rO}g>>&Di0vQf#LYaIEM8M<8675DtPR~;*o~%h=MJ}Iz5-Db z5^Py@^`K34b+v+~X2MrDAU?{ix@n<>^ByE`Xlh0yUlRfI*6j@V*q1q&@xT<00}&Z% zm`D22+1Yau1K$5ux6xtkio9*}!K2U}WOn+iZpe!nOxa6pp?z4m@TLY79|i1kfLESc zT+H{Pht5>cypc)^zV3vBf-j+Afuj3;3$f0D`nCxlIxzp$k*G7mhk~|YGeGc(GQfIf zig$s>gxtr=?BnB$U87ETkGt*6ZA+fNFA|rB&S3Z~M_lKx`z7OS9308D9a{0AiasnM zTVw$P9(>y1c1FQ}Zq%*+j0t5|m4~%?FzXzZDsp$F&A+pqCg*=YWoo?Eo#4q+Bx zl>XXbCMS!4faZ_41&7eL3vPqHv&td`OsV%vD}sS{0+M{Cq20VXTuJ<=u-lUM69y^T zlamu%T-;{(00YVYY7nUPcKO?odk%Gm1K~q!V6Wf=4A&!b`sCX;b(jyEs#*|lkpTkW zc>@SWq+K2ieZGIXMH0t>xYUpX#V2%A>Q$>qc7#*rDyHRKCAFyd|f{gf!s;qu(pGr?-A3RxXg<1lbTj8s?$gQFAq#6qex*n&Kl zi>sj~!URgp733TE*oZuQ(}$}8SzlqZ4dXmz>%GQD&(cC6Am*ES`mTQk2=!JD$zw=`?Frvop_(lXG|#oX>&{CJJdUb1l07PV8+}jHgC*P%n#GAj*I8nN`=gRX{z zWIq(D>4-j@{4`t9DE|6cxXXJII9GMCKZgc2*=9!_myXZ|mH$_3=N=DrzW?!2N{LYI zidB}S6_w4UM5J@_FB0ujeUM@ZxJilGp4!Q5o%2B+#M0m+vH+v!_RBEEy{D6 zeUBR#t~(m!c6+0LEgqXA8r5QzZ0#)A2{Uz+*AWo7A+73t$DSeE6GRrpif5^6#oe_V zC^VbMnFrZ=efT~CEghyc+!8;d7aJC+L)?8RrYk@7>eYR*5}|tW%pk3= z=hS8u>f}?oYl+Knml>z*}>gs`Mk!iAllfEp62$=OK10y+SB<^_^*?!Ih2sf zAyv=pqdN}r5(P(tN!4WT(G=cLV(j2lU-Vl$E%NI*Y)9jZJO2|A092jmfy zelHgF=2Pov}F zOrTIc>c(S+zo{4=_9D$c1jsrF#=?o71uCwR6UXDv+yN2e_;;3;mfAjK(qPSGoKx-^ zw0WPRAI4E8BGXc46h7)7EQgTMRAZLX-P?N>*@bPXL*lBP-QC?+_rLPF`=$alNH1D& zTM6Pb&qnQ*efhVLw8Lu9(jKubv#H(WzsvTwVO@Q#at?8q17WZJq_^&8dy8lsOk+nO7cgg5boeD^XLbtXXp&OufCqzx*Nw4j7OhaZc7GlTPYD)#U#$=JR~UH36g{W=G)hQtw~_=8V<=;kQ9J6rGi&Bs_n< zB5;eHgU~IZ{@YF}TE#D?Ad%k||As=l8+Nb;dgu#k%{vwRF!6V-_|~}imjkN-`wI*T zqB@8O3lj|rgiPBZ|9SMm5wJ(37Uyxwa;#N%277rGz2QQdWo&3jAqq)M!=zw5Y`r@K z-P)RcMTUjWKi~tOuNwP!fBMnSrT!ZiEt(Eq2;B?3L@s9j%Joq30Vnm?<{B`M8n(%* zm_ta{)YK%oIo@%XAt=3p7gh2G1_s}FvVlgzN44HfvqDV^+gjPt3-KH89cu!UrS%)w zVKXE=MN%kJQl~_ml84Ogtz`aG^v~$BK<6K)i`7m@Ok}u|L7t3 z7Sa+cJ4W(U;za@7kX=(FOw7GV5}Uxg-I+mvO(^WoiAc1yKLv#ACrDZ;hM22CIY*|3 zTArQDiOmBL5^a*HGuz>qemQCz^3y~|RneIHob+*TBX#a7t7M3XjkcGn2d+rEcu@!m z37n?BZ)4G`S-5s;$R%WkFOJ@L(a@JaimYW?ABIBad3mZ>3Acrj{AT__+XH))uJa*K zYULE}UNjrkd{AkJY{+@l@%A^1+8%D0l~RE~-$L)FlbHWJ`$*_885JPryXns^d&eG& zd6JM%SQE;@*(>~KV}dRlzA2;+-!GuiF8%0jX8fLW373$R{y^Wi{yI7=k#ws2I=2<~ z0QA6YGOk5v;a6TVF}45aFlCak{I3}1t+?|d%$FHckTOQpFbHADg%RqR92#eP9_#}R zO-&&RY&uJ9$+9uo04foKVH18@xLCt7s$uud)>2>oVuAo%u#M%ZH;W&l$HzxzXS=XG z%hz>}$W%^-#*BTLpiPFwY&Kxq?%}9JDPpTVuU-kK-0IzLdhe38@vOHtpLZH4{`92bFU63+Si3DzR38RkTeW1el*Z$%Z3 z=MK02(;LgYVy=`z9b=R=w~%B96R;B1jCw*Cb@$islUdHML9MNy6E zxO{mdM2*m7i=djhksc%pDwoF-=}F-a`gK@@fTlirH)xDHxa?LWRvaQP0_n-MIcp*M zV01vDI0eVd_(iH5S2MmB8n>Vn^E$7tLw_y!<3O(0itV~#v}y9u21i zM(5lHxs1#-HnfY$xvgZX?3bkzCiXLH4S2ceg|eYSw$Gi0#=#J9RHA6WH#!7GLEuou zxTLJC9hGZ{Y6uw{MDKDmo?6hj_05U=L}gXzE*Bg zhC;gO)%P6RhJ;2ZD0**GW22%}OA3$hcPZ6VBvCW~$qm^H9lo$;YA`TlwWzsT4!N}6 z(BK443_D5DCLqPI%gxnQ{_)3XN(~IhNgbRfu9Hg~FM*q>62)Y(SQ8*s_JwepxQ@9| zalGAJN3av00Y$bRwyFW%wMfjmU-^8N?LF1PpTHMt1Z~zGox$wSJN%{tm2=~WcN=WD ztk#d6zT6&YP7o)Pa`IAnz<9!K#S9qg(g~R;YI$vK9u%ZYxnEazz0)8fbg`OmOg6R$ zle{zV2oD<@O$-fbjd*2RRq)Ex&MP`nD8)7dp*s4kG9qy0*rrOWh?9$*4-}TiU_GPwBTfKR%>qQ*^YV?= zY&NeVI3V|tTCz$Mfm9NH8-%gR9oz&Qq?FO zl#_6TB1~*})4_=!OgqFGXxgJh*DjnpXO5)2d^}hOt@Tx%*EA~9xMlo!wuj#pP-$UByxk6r1h8}`uMbW#;Xt5YxNxy7#s_Vbg?j!2_t63- zm~_$4vi+M{_{%YR<2PYE4PhbF!=`3t3!tir1c%q^81^}AQtsM1zfV(XuvJ1SK0G>l zZd)b|Ls+Z?#Qv_bOceft_Lue!V12L0XJ)YTK(NIpI!hHCO1-wivwBA*vAzcL^k}B*1b^4E zu`MQ84qu=8(BU}Nv)N$4n{&*S?}aYI;lRLBsOHtuK@~tYR^i<&merQ|PA|Vi z-QV|AOWq#I$q!LXl{gtq6S5Gr>B5q87~?D>d@&w4|CsXm4N4b?oR0T~V&yRIVq*q$ zWd{yKL+gLL^!U?tZc$-k_MCZ!Q$3OLTBpUnkmXRkPp)#L?mH%6&#vz7Yc_ax?gPrR zWYuOmvnOMVWy=iyVv51%Nm|M`zGSZq2#N4tj1R@Ks~d%siJ6#~r~{(*2yUg+@plcM zmKu$`NZBO_KM(#jlKpv5`qzkMsVyZe1o(4#iwginDAt%kJrK&A18Vg$Oq}NSvQjcC z2HltG{jvCOZ=`%^6LpU&V~Ndnqlh@j)N{HZu>+;N$quqJp(L^Je1{5I_^coaI|ChM z=;y(W`D53Z%;CqH-22GT;Mx>)b{LH8RzA>urdZn{ZtB4d51dTfcVDn>-TTnI1N>Nm zKP`fHDOKN9F>gJ>YRe&#UgEyeApAd&8wJ37X#FYQ;|T^B>xHve+JyXzs46b9SguKQ zD>S}!pqGra9XO$&sw#?`mj-!7n*q@u5w%`*_#(m^F^QAA0Ds2Qe(xog+PNd9Zvoml zyWY8u?sNB`mZs+4>|h)~#886bTT=P?`L*!eKKTp{?-Cb{ zlwXsQlJm5JX-W>i1Ho zV+Q>IR~eLZb{+tHq6tc&kwcG$W8h@pjbiXCuju-TLQplA5+k>132qWLymTZF@5J*C zP|8+gtpE@aGr=uk1{e;;LjghHJ|MqOfN2aVa8|qiZpFNBaQ3ZF;8u|ati{d)>whVp zg+aS?BHaVHKl0!NNM8t57q}Q%${}Dp$&mN`t9rmle*lPQ1dL#3ELEuY5|C2xd-|o79$P=fw*uLv}?ES&_10KCX`PtaQs9@Wnv;PJ2%xmQU literal 0 HcmV?d00001 diff --git a/results/leaderboard.csv b/results/leaderboard.csv new file mode 100644 index 000000000..ad7d3cbf5 --- /dev/null +++ b/results/leaderboard.csv @@ -0,0 +1,2 @@ +Model,Activation,Optimizer,Seq Length,Grad Clipping,Accuracy,F1,Epoch Time (s) +RNN,relu,adam,50,Yes,0.7541,0.7536,7.07 diff --git a/results/metrics.csv b/results/metrics.csv new file mode 100644 index 000000000..c1e9b9962 --- /dev/null +++ b/results/metrics.csv @@ -0,0 +1,2 @@ +Model,Activation,Optimizer,Seq Length,Grad Clipping,Accuracy,F1,Epoch Time (s),Epochs,Hardware +RNN,relu,adam,50,Yes,0.7541,0.7536,7.07,5,CPU diff --git a/results/plots/accuracy_by_activation.png b/results/plots/accuracy_by_activation.png new file mode 100644 index 0000000000000000000000000000000000000000..f0a0df7bbce4c69ba5c06d3180e8098884dada64 GIT binary patch literal 16500 zcmdse2UJwqx+dl}w*dnZ+7?kki$=*&ZAAeIQiWttOF(kY$u_ovpahYO0VqH~C1)cj zK_m+lNhAwM&M@D$``vrr>-+A^teIJFy}9dlnTo1&_WAe!e`()TQ97}9727Hn7M8Vg zCy%MJuq+l~VOdo1!*cx1y{7w5@Qaw;aSc1FrJFtD*T zwzN1z5u+U3|I-hgF@;d7}_hmRlEaq=qvxNc`a6vyGV`MEaVdZjDHdqtnRNGF-< zs@s<5)y_q1x7Tax(=#dT(z?GYu-SJ!DvVr_3MH)P{TVH_U+Ys8 z+&>tm7~>7B;`38)ii#5K`)YY9lyQx6i!L8_mZh4VAbTJm-2h2C)= zhsry;D;~KFWOZ%k;z~BDne3WT9qX?6RfK7ECBki3PC4kZO<~JAnu~11x22%KV|M7oiOBXMU(?FRa@oVhd>vg~vD0IH366u!4H=e^ zj~BuV>oct%RXtnJCA}XH;M(e~;tjIi+a81nnf4|m=c}GN<(-uIc@?=ozkraC_?&@k zh1)ws?iCaigos!sJ);-#?%7imJ>Whw-qY+lsCM$?@3XVB)sN*it=%DfYT2@7;%<{q zbMx{7@7!s9-qKkXYLHStI*>D{_f2YcZlZ;C>(|(0Ip=CX^k6g+MWBrZlZEbDQ5ivcV zo;|{C4LZ#Zm+<$+gj%T1O^>NPl3lB7TP8#vm|bu6z7vraP1(jZ@oM5WUw)^5O)28* zPpz)g_e>fM3=4bm^2&;w(uUG7F)D|I11((Ke)G0%jPd?PJ~1&(qXo)yi9vq>Svfg5le(nv z#fulyavaRZ23z!3tmm|ANY>7UOjSC+5( z;Rlt3Gl_x*#hd4FSCsaG@di_qiig+8GH2VH=FN>avI(5O8X|1oJIq-cBBY?M@1BN# zq$TS~k##Ea;jry>#EmOPNNPTK@E~3^IRv`=-%(k%SFr^QU!!es(%?88tQ!HD}Au!-ho5D($fsh0*^d_!uzO< zZE}XMudh`qlCkT)bN9lQ{Ti(ocSyTs;s*p2LxqJ_u3Tx@lx_e0hYPnIavad{zjNp8 znI{_Ogv}c+G&{Agtx7l7u6k~uK-JLb7A;KD%{f&TBBY4TUdET4d%iYN+o;*4@4%t6 zFIfB|BQ3oJI+H!;5{xS!-xPD7PNgbNFl~K}@j%7wd#lA-3jFFeZP{WtkljZ~vFVT> zsnxa&#uEvB@#3&;>x)HnZvhlCkDy>R)3L=}X(m$KzF5%g(?_Olxm4F&nD@Sda$bJ^ z^8Ff5Uz|MXIn&ruil<+O{}g3quj77us$xS^7J6J3<|b})`-g<|3VvwLbG^+y^4W`D zyn#n4>|*DKaMRi+n)KRaeM>#3wq={x+0A5q*n7Isn_BKubhGbclN5bw%&>Hnb7AXa3B)HpFbj_Ry6|ofdD@D2KM(jZG$SEla$Hm3*2np3lj#X26e){PJ>Urd0bL`Au zLCfUiLY7+;1)z7>G?bxs8;?ppEhD-(NaA)3PAEL)h$> zP+{|fh1*7|smj4y^_*Tl(Ml^uPp`;D2jcnp=T~-v%}vI$sMDI02eG0VQ>|ClmwtZh zrHK1D<>ZvPa{YS8Q9fSYgBeW@p>d}lT@MzsjgeDS6iP0b&o-`R+%;~>PMboNa=71E z&t+jI9O^tfR8)ptt9btWL-K}|Yt{(9zOmlXdBJ6@*8n}ivw4C`ZFSr7z=ns%y}Y?c ziusba(Y?j2KVOa0Oo{1fNK2p=poq6~adD*>l}Ffe@<~hQS9>zcJ1sKrUg#{3^lYwE z(a|x(of`PCi=F2Vo@nuG8DbvDV4g?6Hh7+r!mm-~IX}X6Lfg`1w8^I%G! zqy#ezWp?%I)%)l#5@Vr~&d-O&#)QPg#G-vp(`XmSg={&Xbs;x5w^UhX?u%2Ba-_79 z{u84adggY)i+6@bMh^A#^eD}d1&CKqw(#MSvGAU1F{;RMw2bvo2%-r2g)OETlVaKR zexFX*p#9sTqR?#nK5^-Z=lRO0H4(qkk#d>AYW`ijUN2p`^!|JQJrWHT`BR3w-apaK z7|0b0oaOkGYV3dag2P~QKSKx`#O~{-inxWlyH0rf`ubKGJ-zULG@M(;+PbYbWn5RO8%V-H_%EBa_hjWvcsI7>NR*BoaXAj>`KOMfZ ze61gM%@a*3*{|tl^$!R0XFgO6zFsdwn;2+94^Ob=>?40d;n<-u-!r{U>PbP{!6E%T zozA*t3y-CxC1%K8Nl6{QRL4O*Iip%0vxZc;CCk^Sv+R6fj>TRuLu*Vpb~H(xJ38O1H8SY9w%sgT_nVro~VD57iS zJv{q_+1B5mxbxTR7pI0hic3oTxmj6Rd$4<}bsd|c$*NArL`V00kQk()v2}EG>_#z= z5qbRh@#*nKOMa8tvA(*56TY0QRW|MqPjaaJI7YFn z?P&MudNbi=D^>()y-v(%j*p7^sim5MRyE{9@(edc3 z*REA-OgGn6s;^Db)y`nrcqu4i0#&!Zyu!-H*30O@_`Nq;P_SfXVo?3enS0CEY->%m z^yg8mj#k(jn~wWFoMuu}$>GRsqp9aO^lv6Vl<=i|)P16#Zp4&Pso9u>4|J)Ae)o50J3GH;Vimph{fgf(~j`YgGhu1_MSrRmk@(_=ZX{`1DHpewCG_kuv&)+W@Owqnl$pZi_U3e4?51rK2MnO`_)p zmxt)!P;q7K7p|ffxK_Z8&hE9u~xIl<*lv&^50Y4%)jPt;72 zMK#$>$+T7mOc-?}VP5xL-(m6b^0L)}4qr76XgocPy-)uDD-E?MvQ}14v2Vso_zM`ax7mEgv1v|7OG{5)%JPU* z?UD7Zt!HZzwbe14t$X<7)OD<{RSBd#e-4bct7s%vInrTe$Zn76o;^j|o-Q}EP}xT| zJ(g7^#jq^(-0S5*=D^*kqv>z2*YB1?zo|r-d~mIiOV-O91$i;Sw|9M|+SnJOAbYCw z&%eEOuugB-caE!Ish<0oT5Qp7^#ge`136u5ly}e0&5gZSvc8*qKE@oTUDrW#Pic&H z>uj6OcXxL+q??oe-C51g%PU_|QQ?BBOLH1Je|ww2s~)dZLDnJ7KzreKvt1{As$(NP z;uEzqOgf4K*#Nw%_Mb40^b&}vPPjm-SJJ_kQ;J=`(?~n@;#~|6J&T}3eN277$4geM z6HRM06LxlWmB_1Y@t9G>Lp6*m%KA8CH`AX{vh5MJiGBCSmj*d#j}#44&4qQTX7#E7 zo?6*bU9sMu5wgyN}&So}PFW2c=)jVcX4KR~J;sm-=JP>l+ zp_7be)KxHtF;tpY1X-GF2It`WPumFXHqI@`I&Z(={_J!Zp*)g0mLp zXLOV9#bdYwx7^draY%GVA5zrO32QgmNB7PetJPNQZ%8XcV>LueG>vfjywbKtO+7F> zDib9{NyG360bdOM_RVSp4LX#dpBtPm?@}*TyfP3ld0(TduIC&bEMgf!V6Eo!3pcZ{ zuQ=2d0w>W`yfZQ~?q1OIoEtls=j)lP75~K~fIv#Vq|8@txDd)r!2X}p>&-gR&hlEf z;n5IuPk@(QPvu*)PfuUE%};gl96D6BtsPhN25nC9@Zn3wAc=P`j1RU9j?NMc2@ug! zn;5E@=aQzG@0Qh5pAy@iBIUZl`TaI9j?OA&8KFqe`KNsiFqnJg8MbjyKTo}bwD$t$v??q<$H?TEqrjX9I3RcM$Zc3Zu{ zXtG9KKUiWQ^YzkY%eG^bkmnTi)F!U2a2p-Nuh%9mP@oXwqXnC}l3W;QRwXw#_x&$l zzNq5%w|Yd%`;fJuf^Kj6@yD|e5Dd}mI!C)Yb;yU{QVhi%2h0c*1@AC|T0_sD{OqNw z-+QckIToo-k4AZnmfe_=YP{SanVOcn-s*{5gpGT=Pa zt%}N79>tIg6Yix54xx}aH9s?`khf~JT4>q*l+O4?G_Csj`c>=K*X{C^ zeApG?n)IqrGs`A=Aa_J5Q7cUz^fl?Jb!)ys$vxxN7rzm8pvq-qf|$o_rdI032l?(Z zCe69dtm3r`b1e(F&g0@%tuMrqV~*DRnz@KK3(WWk0aP*zQ_6|@1$t3`=hCxVIfzQZ=pMy`NpPJQK+j78$#@9v zbOZual@EtRb&O)@svR?vL+wPy85{Im$T_vT>$LsTsBMfdXfyVxF2xqNk=tAN^>ofH z6iHh!MVlZE#p~vz-`lTw^l98x;95P`ejy<+*MkMC2A~9ilbA9yt$Wcm3Vpef@7(#( z9R20!mD}7L;&vyYN!6eM^VzZMc$6|B?J1n={JXSQ9BEDHV^Vc3Ef{3kz*S z;kIy5>!X<6v^0~h$k3^(YoXBYvS(j#u&^-L4jw#6nVhln%(kKq&+LMpMUip(frW)P zc6Qe7+O=z9`9wve&l@d^T$`0y=7r}q51=4)c@PBxl+0;DitA0 znjJkGS!j1&myZtFtz=xipS1{D?a|Etn%B=mR{jA2kAMIDhpCwvuwZ#0PBEjF4ZCCk z=(Efqu4H+$E@8P;T>7YuXZP;on|MxrOf{|wxOXoG-GWSOd1Yn(w1qG++Y1?u9NG{e zdaL6exkCRssJ;%%&F6cah&Q)6rL-Vuo+j#&>$n*nfbjq`D|&qWs#U6h&Gg1+^cw_c zLqr-5qi^24d9RF2i}4Z`mfDVIR?uA-;S!F#JUoSQQan7e;1+yU0)GDfL?7dol$;H7 z-?(uje(1}mSFc`C;_xt4vYhub(JmMa#>t&KcPjPcrMS2_yLH|nM;F`Y0a}7>vj@sf zO;0a^Uh?i;=Hu)|EQdQf`+F9nc&q4iZv}1REfgJwR+>qiWlMfne^*!69vPVeg?$@P zeLFcrvsSKKcP}U?sARt)C}4@%2d?dOLC<-&wH)F!^j=m?9#T}2TP)n%-GSAjtE*|T z<9Im^nbffb(o+v8Kpg}}zURkq#mk&WQ)z$<*Vl2xuj3GB*miw@>KRd4RmHkv2VYOA zOEA#R0j<<`#sy!#T(JN8>Ga*ZcO&-hz{Q;BTq^>wM6<9+`rO`bkkQDxeR~`Tjzp3N z03l@}_kBqT1gsK4qYv!L0c-GB#6krMow9!~*(W2Tf8@v!kVq=XP;ALdgO7H_yvji= zQJ2|0Vtiv2UKo7-Fp*K@>4Ex;lH7o$y%hc9{}-=Ll1%R1yZ8P4^HI>2Teogi-aDZE zp|Kz;>M)c&D!MwrY4sD$LuHXN3kTI!W6>lmmDff1`8`~@iD&P=eR3Fm6_Bl=ak?0n zM@X3fe#gzM+4jp}!je#(HM!2VV90$CV1lx2@Psw%y#)%XR~+%^Al1|{vurucF%49q zt`W()(i7B+4RXplPDzHieXmaS{i&&`RoEdYQK-TE*$y-g#n>mqb}LzU-?bS{J7q7` z;TdrAbp4u2J9qAZ!vHhx{{3yS<9LC{kXY>w5%U2`sXMDH0;BSF^~2+5HpFOFYY}2_)$N=uNpq4 zI@7v+RpM5B=~_;y1H==69pbJqGcn~?mXMT;U_wqs*#9O`zrSiX*(0Z|0XR*6Lh-@${b3F^sbP)FSF z@kc4CriubUe0fF18iEPP4u=}V_RHBz_}h-SAj~}8?Y_-jg_|bU@dJgKNI1#N^B`u4? zg7`+T#V{wDE1@KK$X(gP!(%eqUBPz9&>NjSx<18-Z~y*NXtq*=9#F*~<}m0Z!^7E+ z#nI~QGdV1X8Uci;1bvE(szM)*gmUSb(*ECI$)wqKDZx7tO)Gly<|4*YSJOmxUlJ8k zV)dgF>&-tsZOe6Ey=qlJWaMBf>b%cJf_Y3|`a zsu~tov0WJG3+BZ>sNQi>+}y_?aYPSPLHH>P6;XzsP&M39LP#v}$)K%EcZtHnAiKou z+_`fGp3hK^#aZ3U-ZnQkPxt9B#78Rx_0}6j*8H|)#poxxZyYu-ILQ644z9sI^pyHA z&K-1}26;l9!4e9fUnhSEm*7K#bDtiqD3~8hw&nE1P{)k0H3N<^@VxuSH&~_6i`Ky= zQ(DU=NJIio*RKK`qE?4N%hVxD!{JC88Q@}P-_Os_AEX)b^{ZJ>eb`7(b(}w(f*f1v zL7+mo9}Wb;P7QNx&?C_~s289+0gJ+EBAq-_5$Y{h#c1 zTPb5>W0Q|jzvb`)HfRK7$Gx`5`5Bf+B%;*F!{Ob(zgpWeFPZ@M`sW4%`52;S zPopkJ!WZ+l_4OsRy}qIUwB^NbO9Def6W}G*_d6Vyl{EqP1q%q`#vfx{n>TOS()+B4 zlZw@WIR}`%<4-c+Fkyf_?)7{--CIHe-p*kN0M$|x%=YSe_?NxtPzR`tp~rx(Fo*W> z^ZSm8cy9b>qaY`*vy+&$(5Mt#A^42>dd|LXdX3r(;LCVjh8~8+Y^&ttIsoE@#eAaN4{9~T3Qc#+$?J=7(?qiTpqeTO-1D=IbzJNV;608qU_ld zJIjl^%lyG2Ig1pv^!y|}&7w(*?3%}N{-dZ?W9akPIn?9T_#73|#sG!GC7o6M1nGrq z$Zycl!1S`PbUW0+oGyhj*NMPaWBiT=$WnZ*v8@wwebV3lWQfhuRki=;{|L54j7S2rdgqTD~bV>9d-#UKzb7rQf)a2_ScDP|Enn0Jf& zMEK)F1$)#|3>D=np>zFKwIMl)Ex=IWcUQ$zt5<6bi%asz5rayK+;W-7%2;vn8=;E%^`7y zlxx?AaDGwQe?+2K)YQ~e)j`{;IO-mVrU|7*`_Ozbl2|uwYOm(pw25ls#QW^*%f)2Y zRK+UuSN*&Bmqgf^l*Nr5NSJKeqm-H2)zJ|L8dzHN{{4G^jJrpV9t~@NMH3JhsMae@ zZa?N!@WZ!WTT2>q%uFAkfc^$fJJ~=9YuX($iDwIn8Fu*;J^;TVGdo-NQkkKxi-Fpi<#obGI&nVQ=7%P$}Q;IHG+t>sb^VHVjvJZYG*x;YMk+~)Pqj|1Cl z!N3akg#rJMBa&TC98R&r*G~@saY&{3J^uXp{B}A#uS5u=;sZHY^7&k5L>6(T^Jj+@ z)`D|DZX!Ph6)XlFaNog$&r6Ho;i^IOQKeGD8>mz&LLdE=GQa$y0(CtQqG87;3_JoP zu*wS1{p=>9u>n`xxOU3=q0AVbv+Z0;=2=-;A<(+D3)XM~D?j-4Mm(TcFEc9z3Uln} zsNLJbLV58NJ*dZ27^A8{*gCJMm^>YWgM()nD70FK(`_+V1!PU@lGG4PBAyHsmu%L5 ziI?@iML>dKF-pNi%#q-bcGDmT{nfSAff&v(sZHSx^)~0_z>(&C_`wETAQ{di3iDa! zr1Ha_0JR9pqQlF?W>0Y`2VE+86FxY2Fga`A-n}3Fb}JBQM)(A=nl^6Qq?V{91d&xO zquIqYdOg_Ci|JxFt#~jG6Rhv`HQOIz%PW(x4tJC5U>@rkc5Km{Vo6&njmC|x6Ixcd z23?PlTFh<%017*I(E-ip$6!Ka1LllHF3iSO?-;;5b&`j6i@*;ix-?Pk$YUT76mmP^ zh%VDTv4IF|)Ryz(&S^_CEI;p&10$^ZbF>nufa#x&430ux$}hOi4m-Jtthh6YArQr* zu{#k-qPrl)QIbF*oy+F*A(3d(Nk4!7T-;|xnrk$Z@Q1?&hyNlseIBn-MY8}ydw}^L zi^dIVBq3D*b=!uIOc`BlOyaAp-b$fGO1p2~zWqJNEo_hI&kmW;{v1>7dQ?I4a(?c` zt!8p#D3bESu;6>R1y`ehJ{l}&?1!}5F8ZnC)u>@BIi~c1H2c0Y1Y8hJ2EnIYa4}=M zg823IEZ&mODe1SOCY(7iH8UT9pFJ?cBpQ~qk4<682xGr=iA5JiqD$Mg ztxV2M-&w{4>ICA27RFjtgp@1vwkKHD1W<4bu04O~n<}cRewg+T5g8)vvJ!725yPN) zq|817lRhEw2=aUUpb!k?8pi2g*Tb87n~VE9DkAK}2{EZZ*e2C~PY==1K} z`QTJhcU^L%Nj2jEyqAoMoZ=-E%;@R)-kWkYePE-a?eCeaPnQQI4=PJqzCn z{caypXmL_PLTa$E4WxYp3XK)fASZ2e7z!_=55@XJ3icL2yf>dgfAjXOareguy&oT3 zfw%O;d889)NcY*ZXMP{TRFU+By5_|#$6qk}JVem=A%sD#%HWyiSupTQ2tGE?zSB}~ z?Y0;wj3q(*b+c0=r1>>@%=EkTLsm^{Q$bG7_V*Sc44zm%lIn@T@%c(_>AVO zQlC|@3Un-Llt%IIzc@<^j1 zOEj<~jIWYx7OJYMg6Cc>g*u=J>4C&&$Q&LW9aWvNY|hmLyCDybNJRF7&AKG}T3K01 zvF*T;Jts^*cKP6c+9obm#oQ)(B7EpUgI<*OpYR5-^4r5+*_z6Ks?r-;VDSA%P0B!A z7E=T>>H#$6uoU9${$2K6ErO6ASiLGDK~z_~K#(f7^G5bZkK|zBQ(JwI{s(y}hz3~3 zU`pmkeS(=QC0v-98fXY}m^2Km2Mu_^c05$vK7M$3m>q6V;qE$OH}G33uVw%DFgHv_ zUQVt8>g*U%IFN#sveZY&0;pKmCJ#$w^6gV8acLhue26S~Z*>F$;oaTcwgVZ+`5|R; z6kI)hdi2~*=&x3t&w_Wz{V$0+;+xC}Mn+3x!2I#ijKq+@+;jeF51df^`ca!9{Mh2M zGBr21T%ft1ckN1Z88boaRPe(etE4PRR0fd&t@To)cSu_P#0+$YG^@5(CA!jBnyZp7 zkvo)IrA0iYnO)1DQU+{Cx=!yNAUcT3hW{6~;(zUDN^)BKg=}_xHO*)gLY|S(9pomf zK&C1B5AaOozg^5kr?0?Y2qKae9UUD9R)9?s1eZ&pUtdKf3R}pS1mGLf`wfx+9BIqo zZ9(~AlX6MLM0$i6fiZ|O>bwg6i9&x%sxg%$;k%uLcOSF!=AaEStqa%r@!f z2+E6pD}LQof7EPX;}9BU2kI~sF)rlrvPvs27#Zc1b^^-;1QxyeYbjq`ueJ;`Ropk? z-oL0^O@9ABC4ICOk&C_BxHme+Tr6lT~uqjL5 zg;`L$!_Ghdw{7}=9asN!3}h@oc-hR#x*K8&L9s3!f$9`0h;kI~zb8oC2;haU7Bo*%j>N!2Rz$o3 zOg+tY!onQyJVo#9>f+nG_av%p)tWWAqg#KIPyT1mXt0Fl@Ac}{(SBwYl^S!(+Qj$f z%?i}Ne3KD^9*Brqo6MBx&p(8lj8Kp~qCJ!unCOhCs24+`{QSz5!6GX?*S%d<;F7(R zh4rdu$%3T3bl>yR_fG%JTIaQoeDdUp>I}G{7G?@j;!DWnAgYLhtA@s>-L&;Aj5U`H zU9%VKSnN>>!*2cPJcG{)cv^2pq&a`hFp{bb8coT!VhPkcd-kWc; z)g|dBKt&A5Zbna;opKsg{RR-5hm$HHNDqw%6lifvahY$k zV^L-)A}wb%$$>Ov=29(Nq?Lmfw;nv-x%`p=O11|hKZ3I4g+bxv^DJu3w^;dz0fq3$ zSfv7g9QZqD{HjZ_R$8kL2dXEAjBuxx8B#&R>`D$~bVE~Chqoq4+s>l=)A!EvU2uN7 zxt7Vo@FOw@Q$#@E*UlFavIMR*{hnKRN-l)NA_=|tp70j8 zl@q-r!}V(IC0>5uZYh^h6-ZgtIJ*GbMy~R{09_Cs!g}=;tu}zVjA3w&z0fIO6cfbf zGY-6N0-=WU$(l=hkdZnZnBmZ`MN-}5m>qRSO#g)BN{*$R~oQcHw70u`lt%(;-% z)ih{5G*_7(D7*Tj;673eQo748$zN7@xrpYBY3TwZ1tQh= zfC}iJQu)a1;1`iN>vA}70%v3rpgsCM1~RMd5%Un;G&%D3=YvSsUt2EzhEe@z$VP2a z6bEeXl^IqcS;XtMpM}-SYDpB6To~A2c7J*!vmqJhjEEXZZN%2mr3~iNcFB6HAU_e_ zfaDK~2*DT4B>JMvS*&Mq6DZc$yIsB)p$ULD6+EUx=wLwq>7dnTp%nKC3euXJne(d| zz2xmR;BF6zS&HKf)rd$WkpqfS1?S(c0?|5xkjSca>&^h+a?9ZiKovNZAkqTRFN(@0 zK0wHV6oNk>*iqhdAArTAAvF%v5r!EGo)wH0K$8F>+s;z!(Qv?Jan54DHiBt%;eviTG5XSUVh)%| zg!MoImq;rhNBod@0PzJXsmZjaAJEOFxsHFW!0|grsi`miBwEegL!2eZO@y_I72fpm zaoA2YCd{f@ltFS+j-x5e3V9_Z$Pp=F(|JRMJD{juCyEbjOB=JKaI!3+qB%pz32VUpodrT+_uaHrazG z{89}LNVrF6JQ^Q~sd!AbtvJyLrL8YjAKw%Z9_}u{zxJ@$xeoSAHvTvO3d17-(zFlY z8jf&`;>;AzF17jRC3w&P$D0d|sY*j3Fg#MXVVm>3$e?xot8thv-h8h}n1CONuMCB7pWC`K$DPnDoaqSfIF z$-(FFk!_fpDDv!9jfp15NKn93_ZbH`GcN?F+;RvLl4HY#w|B&n1O;T(I`m2=XDogc zs#WN&k0*^N)&;nvuva@S72m*q*Kzyyas@FDg!JWJ zz@{dbzC;qda2^1Mg~6FaPUFHtQ|CMLL@vcC)I#DlAE7E;D z7z>yHG43{!mE;^K)7_Ls$57aIJd6*(oVBPFI@bSDTqs~+{Q{qYO!NbBUcd_8Gh5EMi}q}u>DNbg9AqV(Q7b|WGPf=E~CUFjVgpj7G7 zMS7Rs;mzeaCpYhud+!4@GgUdhLUbVC`Ff-+1 z=VL#9<)zB}_Pm3tsWyQfi1?`WKf{M(tOmCi5D0+q#EgLINj8_RlZvZowF`-Y|+ z^D3oV+9szRg}f^E=zLG1{I*RilpVMAlzW>gg>o(Or+?$syeMyBFg~Tp*svzzUbN(4 z72)~r9Zkk3@@LzfRJ!d2>_+QeU0FbMTRf>zzDe+W)@!NIjFyWDYz0{_uR;wz$hN}F+ zFy{&CVdb$w3A(;By;Wh!gZ1%j92`|TJiaQKt(NyqvwL=xEEKpbcH=t91q-ECqR&)s z?bqP6>Z|l)bXgq9T=;UILr`Oj`*EX&BSr7t_21_x`1tYJ+3C)^42DJSwEIpNKPgzA zxtOkBeP(fXY}PtJ+Jkejqp)zaV9}Iz*Dk|}?hgxdU0&0@L1uAksk-})U-?~LUf#W} zz+tj?VQI3eKF?wDc;ci4UGzz2nb10}D#7sRFjsY2Hsz`FTa@JsU0sCcKHuCY?3@)X zd0OsA^L5ViJ^o@o1I_92-dE^d=Pt?kD<*WfE>_esom4Ay&G){ti$hBzs-!8+;L-TR z#Bs7tAF8T2QmUSd-Y>5X7w5vd$aOul{yt_^Ma^bArNJ}O!{;@M`$uJ4>z9PIh%vH7X z_GM1Xn{1aWgZX?SHtsmY!p<%|RCbOHHTYZe&S-Vz?yEDU02V>_tSI^$v`aw6;GJ{+t92oq%n5I$rPK=MsSz4x)*@n3;#2TLn!n!$E&wlk1S^4<;wu3u& z?TUK!>QuT}ht|NrfM;We{u=6U;Kp{Efd!8?%t027nja1 zcQLH8q{yhMJ~1tr@4Y-LSJ&6m>Of*|@YBoi8#it!<~!O5t;}_?hg}S+h$>-l(@52= zkY8Sy?!iUH9_$-vv+FoIZc(th;?!nQLKi3d?8ck7Z+R@c&*Mo2Ox(KPZ)`MB$saaM@_r7NC`hyzI2%|Va<`|bcF}|cy{jG84)EFG}z;> zZJXoEYy9chx^?Rg^YS)cE!?j{9u33)2Bak znYNxxb(}U#z4Bo@mvz5vs%aa)PWdXsfGRSmc7=vsP@ge?x8442^~}e!G<&tRwG}c= zH6H~B8#g3r8Yw4hnRsyO7JKYH#K+fkYLI}T2IjWw1LxP%4sM9E4S67$LXx^l> z|MKm-cY_dr0k=2r4!m0SIG{2_AlD;9v;QQvt+!)!C37B?uR>&XUPdRiDOl{ebnwYb zNY4v?tV-om1GN<>#uZ;X-%4G%5`kc32?z)f^Py6y77iwJUpvYA_J(^2A0KK+-jZ4XNGWwDy{>-7BA@8=d40uhgV&7|IU_4Vm}E4mjA z4dcA8oG|_*JItAqlH$QQ`D!gqR-SEd`ChAqDT9)UzI_s1%SKHpk+DkgkLQ3ipDANNtU@%u$s&5>KB{hQl$7-1<{y8wURdjZOVy&b?VzPq zdfs(JJ?;I#NmAAgK0dv!QC7yK6t8TvFzk%dE#?!zZQxT|z{SNSh5T?BjE(l- z)O)LrzLDJ*>J+@-GSj3dChy(Zo8G9!<#S+_^0xkZnLLY?Rd2Z*K0t5(c@0(m_9oi8 zaB&)w)-1UWm)Vf;jyR?Gx@TuMh>DBv&P~R*#iCW^jAXP(q!%xa=2k@>6%e?Hz}`ed zQ{8@Mun%P1cmQz)~8?M`D#cwC(p^v}Vh+|ir}N6Ff6HKaY)&Wvj5Sj7~J zwn}Pzf|HXI+b_SI?r>Yp_dLL7`C8|b@d;a@3ukp`m!RUfP{I^*ah&JYAO;XB&Gfg3|5A zw8UHXo8>**byWRi=rxUe$8^*}(xpTAtcqWL|NZwdE{ijw!iR17!a@K9W1VJ4rTyjD z6k|S5xVp}D-UXf$f}0h()iu7Dj|0EN{gk*5GNT_MfL8X zuRp%oQd)&GcpxUvxMx;(8yI;79a>JCMpsahG4Slf~2-L2Zxg{+}K zqa-C5dO%)ix=uko*ETW3v@P!F#f>!-RO z$rz%OHs{(U1Cb`8Qc00shiWaMs3*6Ke_JojS(G@Uq-@{5og2`SEKFZzNOP7&I6ZED z^JuPmy5T00OE(=W1;+BiadCMRDK^WNGMCj&n>XJV%Dj){^JEl1_mWG zfcJqbOQEaT`HU`Ab`?C0C)Wv3CuV0G8QhjSRYhEvvJ~w3qhm6j2Rjv3iAaly-D%D+ ziAV7;L?0kuUa&MFDgJ{5S`P+jcg=3L12y_I5`YCqkU=sCH^zydn|s` z5ZaS{I^Zx1%f}$If>_7tK?49p_Okch2d`$f(8>I2Jy0V$)R=6yZ$?<#pItM}tV1~Q zVrJ4O;XcYMMndgP+>jz0HcWt+CiCu zK%a=WQZP!CgiqN4QPJgK4^C-+p49}m)fEEV(=2-=@`1*3NZk)D$ia_2U0vZQOl&7k zWW4XW+&Vvu%^xgWT~3w`;qTEGvG{UJcC;^Do`I) zJCIAyli;Zb7%K1O4*iIMbDknM9TDV3$AJqf_T66lvd;beEYyOl2P)~)X`FKyX-JmTD< zS#8vVD4DP%H0v`6lg5Kyg3kfP%gV|mZF4mxA0P2%^e_8C)S_?Msq^;kx@;>wo54CN z5SZfW)7KA42P1HPE-7^9=te=vwH<3u)C+S_zL;sMG}fL^OHZE^9DKlKVIVqC$SEV# zZN(w;Mg-&h{Ja5rA3rt?F!ve0)d}t9yL(B4H;kW*Apv@EXw{tBZXE zoLm>L!a*9tLYJ2aw@&eo%UCYaQD$LbEktPp*3Z|^ug97y!M{jz-=eG70Mh}#`R8l= zcB2WUer!X)=XDsT&<7&=kkL2Nb0(Yi@|p(c*p9@tm=$WGj45F;R;H8P$Pg?Y7IMO* zxgWqlZgpij-K6CNzC{`tmC`bTn0gd=!X!?d!9~%JO$F^ZoEueU%RcV&LXK0iy1G$- z@!6L-BvGkFd0WkIvZ^FGzn*+U+Z!toaLJ`D*Nzu+>~3fG{_7j)j|mG4dooKM=i(xx z)CEkb1hE3*9w%VSaiPy&+m#bH$AcZ?L{o^uKoDirn50`3D)N}&}sn;^$SjEwI9 zWJ@?Z8|GysF;3E|wIpfgUWk?sxqu;As;AUXiVQ$vVq)HpULk19ruwVjDkiGekN;p% z=(=1^T3AZQMbqpe#aQ{GU_plzMzip)!$Fztj%9-L-H)PB^olz=I?_y98jM>q65pFl zOin()*gpUyea~}TPr!Eg7}A^9+uNJG57^=f0{zjGC;FJ`hUzlY+6&1PGn8D|KHe_% z~Zl>#V77cMiAI?VImjbXP zYf%B*q^F#uF);DHoW;V-h!J{IcTIuwyk%{I8rP1We)=sbe@1u=$*_ z(|jG^=eZ4A4}2_KndWeCa4_iq`0T1>FfiNzCZm0PmJy~|o$GA|Y9cRq-efaF3iwoN zW$?Kyq#>!IRkU4{Fm)WRb`g5{^5tMtYP6Pp=Q@8U`O?(uYd6FxL`#F4i^3?xja?mS z&%cPj9IfpN<+Bn4L00#KQzwhjhW6K1ipncQM-tLE*0e2$5RTZ0=Hq;oq&zn1g~bA> zI8IjZN=^^_V&8ye30Ki_5JfeVcXchmgMFj+R%hK-Rlc}f2;`=csRF&~YJngZ2Zv0! z*n=7XQ%2AY7OrSHJO26xGsgOWXb zYyW?|x@GTo-@JMA^jjK{n9D`(t;QQEr}!>;kZ%DcWmA2RZPl`~7qhV}xP|C4P`Um; zb8$^RuVq;3c~g;H!hp$0lKX)mh4MA8wN(QXzQ6vyn}r%#e0GOfPo8{Yz82S0@VO|F z^=uD?qPnRQg!lj9S9gF#N~wBVHJhO3ch`Yq`O@xK=T(?KxoujGIDzy2ynZ;IUA zxp!~uwQCem!1@@}i)KauY(N^8)nr+8v8i%WC|Q9H+^rE&QRvf`_6gV~)YR72XP8_> zNQ&;=yH|83jNc}1*fv+^LwUI(2oMaAUThkC_?$XEcOg_zZS97wbx%*<+(}25cvvBd zMNm)!>>Nvq+xhdqV_-N?2FhTnjH56H_(dhll0q?0pHR)>vl;ZwFzZN~o3oFe!HY?I zUiP`gCx8^Dxg#gg{&%)8i->4rdY9fzp}b)Cx*de!E*3!S*|TTTBX|L3&gq{)8z%4s z-_QPugO5)QB%3Fbpx{!;bCL{>fp(}I^1@n8q*^!Dx|jz4~>n19b7F-+D)PO z($MeE9-o|4xNyOvrluyixwfVT5Yq0o35dt1V}+|)Sehec+t(oUx?g(lr|#ptd<$hU z{?jMbbRh0jy((o?P>|J(A^8#3Q~h!kVImy{xqMc=F(~LnZeT7YkHP76CkU_v*j)pA z_^8cP<hxDI5_klwB%&Ny#yhmoY1yfINFDoPCIX*tV zOa_o|&tK6hw)%rmgm`SAefbl%ul{YMaa(qpjI0G3q2PC&#;AD<~-NfHsbdh#-?fAdk`G z9P5GSsMoOo)1s!esd_R5ViNERO3*hXB#}$6QlqnU4dqpRsjdfSD8D^g2hlmFH&RO9 zKijA&r1@RaPj)M=PQxi6W~0+w7Hx}4N(9nNN=l5tm5G|B8aEv;0@epV(g5NoeqnmZ zs4hls0X;LB#KF@8Uq4+XtjOOb4y$DlLQ@~a7uM$E;#gra-ed6o;V%myuI}fL9YX41 zp**KsTU%=cHezk+$LF>bod~KhXxr0OV=E0#fhZ40RG$W!=1>Vkc1R{P0bPVvz!7wF z!o!C@m29V{*Bb0YSS8WZZ{`JbeFSE#j717lQJH=vA7K2B$tcq@HaViFjh9 zq8OoMdagiMkX2U?47LBV5xWzI21Z(Uu@}QwtOC|UWv^GIRoBLHtI5W2y z8P2{zyOwe-KA@$u7oS)%SyN@^RGHn)@NHD!#Y_E1UjHo3KICSDmGbk zrUfetMs4<84+thh-})xtw&m21tVIKqXvs8FadysAaKg(8vv%j#CT4l#V)Y4X3efB7 zkairOc>mn4GT%y}L~~<2r4w>g#Cz`mq*()D#UpG2F#@<^?At!l_vxh$D_8*VX$nqw z!JFJ$qcFOXiSY3ewdawN{&C>JiJ&IDJXL$y>(9wG@n{`|(o@OQAv+8L0Hcxk{ar_E zkPPW=tF92kB#;ZrmoGmbwspEULzIkfZLp79|Klkl76kH|B@rPEMH1z$9(=*r-6tQHmzVp5F;XZjcIql<1sKhHv zwYRskhjTYIHL-{D?XCg2fPuiSs9+S87KlC?x2x*k(y0q=nNYr8zI-9+tG}`|&+k0< z0{KCt#&m;PG3bOfz`i!Gmx0<3gAz6{FpwU>i~IfiY!*TgHVN*{jb4LpRa(-vMYsIP zvo&ki6Nmwdku60AsGM7`GWh91vJXGi*Y>4>=%h;C6av79vVbLTXsjCSD9rGYJ-WXx^Owu+WqB>W`!|Hl$? z_)q@$1QApEk(DHzm1Jd~-nj7{m^Nv`L$0L}l-sH>rx@2wb z>7hoB3UplxdkfR;AW$ayO(|DP-@pHeMi;CVBL9Pd7k%;IZ=}#8> z+1a~SJmk;>%btrNfwx;N8pMLBBg4i*$LdNnATI$#nAHqB3i8VmittS;cz)g4(KgRy z)vx2@)c|NokhJt$vn&EnT3n9Z{R4p0R%EWM>_bf8OjM8sHE0KP$;k19DONOqtnW=b ztZGSILvAswi}5Bb2Nc{vaF^9Jk&P4 zu~@wGg-G1+oLrRyHhy*0N}x4w?#q3IfG^?snnrAa`uNfND52dmTZ%B?KV~G72B=t1 zTOAKHZg58u$tZjj-fTKkC^Fi+Z19WkCOkZdva;v+qHFSVD{LvF*aQ* z>#@zs?s2SNy*pN1EW2v$ty6|L1xXlBC z_Oe9vi%9KYR(f`39ks@;!zCKoot>WipY&Ta1KfrjQouUX8FkA7YXp_h0IA15qU}6> z8`Yw(p@6_YP!iycPrG}1HlUuuHIsyA&@j)1RvM4}HEhdCzgU7SYUs1gqRPl{ScV3H zqBKAmVQR1XJAAu)Z)l}M*8^TCC$2(p^)T^EgNm@UvI^OSo6t19yC0SkQ1AXT7=1uK z`r3=&ItpMKVXg{kK7018pP!#n_X+aEgc)A9dDr`r=qs~ln3jAPMBVPCu@^FZ4{A#RGa=tRajeMA} z%M|qHSjcHU5nB0`NZg_hK9f=P)cvKJes4* zgKZ_eH=VGvDw}eGEaDzAOXaV6yHMt`eAtab_j7P?5Pn88$GRHCTLhTo__RS%!5ICy zEfsvhe-hjinJz#|m%Je%yFMG9%EAgiKJ%y7FEh3Bpnw4cL%}4+gYkg#k3YWv9HBfL zX*S5SAnb)9Y;1!t?geriy!jEG^mE)7+bjn_JGcjbCDyI0ax|n7SuBR`hfxka&QFxI zAODIGb1n>ZQX%|G#ZOXVHsGTvSuSdZ7(CVCSRw)iky9EQ8waKVbswvLTV@n1#W+Rk zY^#-wS{`cPD{{xG%&@Sg7OR;mF9y0kjE4X=&;I zc$*w#!OeM&27rTaPPCb@3>%)lo4!G}-LXtk12h*;W?V{G8_yv5z z{n`hkcQsrztDop8W1!ddIb!VqVJrvlkqwYJwT39jXlH(ge_Poj zKt-?=<&sAT4NY2Fa;D0IWJUHz%5(gLU8KWujCYk7A$c8P5ehEYCdkh2*>rCWP(MUgf>`eSxL5vf(6X~PHab)u2(2DMMaW5uGf-1%LG6)Qj2H<`cR%#;c@7i^ z!Hxw}!G2!TBN<}=Lv>M7`(Ywbx_e^%1gz9<;8!TF4?n8 zu%^?HsR!WeavaSj)8Ny~i@h*h9@^(yc}RqGSEjAJ2slS1xXtHRYqkKLkeDKjRW2o{ zqBqUd!@@wI1DR{{<=sXi#-ST1LvRF}rgWtP{gbpxe@$jgj|>t^X^N03!61jaE*L;5 z*XZo*By`Ob#$&nqh{ zD0TkeUemC(WOyS(D*TuL+no*9m;Zq?^0OBy-VoRR2dP z>K>esi}E5|Joa#&0h^>`Y@7fSUSd`t^-f^PgA-9fd<|x9FU5^;Gh93`8ufbqzc4}T;YbcE@9I5jK`!fqI`Ax!hze|_rg?5qfjIl;p>H_+HFqG!Ddln|_{ zAQH}K?pPe?N#fsz{VJTaa~qdPPY2T#N6<UmD%WJ*5PU+i*f3cj@Q7;5_+pNk*SYoft1?)*imgYZi5Maz0OL}8U&jQhK#9Hny`oGCfLYiBXuKhDSBD9hQuH%#ed;}DVmy|Jo$r0Pn zG7etE2#jW(h0&J?7NHKkdxhZaQBS$@fi~#?LMPF!p_1FoOiCIyrM!0QaVC$=Ya#x^UT-dwf@dx!1gYr zumgej&ouS-Cc>*rR{kpl@`IF!s*RTMS4#vzx%3rkw-=p26lmVtckb|~-@S8(m5nX_ zy$KN^hTVu)RKWHEGSs^14>=HA3srB&&nx>jP0Bz!rd++%(c?zf=i0!O{ka?2=vkp_ zrOsQNm8O~CCO6WD6dQt|XlFn}Lt~ezNO`4n=!$zZ+bh4vXj2a#KHNMruA23C#E4!c z4DoC!h47n`(%pSRr`V$!RK$THoG2jjq!MV^&39NRJu=_`a9+qSAmD?L&9$OEI=;9n z9H*LT$t7E4*f)D!0@H1 zHBLz~1VF#F|J4ol6PK+##TnKyK)NT#c8C`^*KVuqaUst}FDNjgrS-**VE^b#+IMwloLivHw%>Bmu1$im5fgq^;ptFMV&eHH)|F!Y- zx3T`q_}F8NTr5(5w$}Xdb58@IydN>IuG-xt0%}@48 z`VeDzI${+XmVBmZ8?gn_sBCzp0251TB90qMAkh67i6a_z`+D3gQBm@7N|HWl>$PY| zn7$Wr;z<#L2UEy$XtvW>KCG=;Kn8wW8H9&B2PyV2;GD{)rKODoAyZgdUS0DRx8mXzN zL3AN5`n9>M8809Z^XA>_Q~ zxAUGzjcH=#0$ml#PJ3;Gc@-A051J5hmZNv_8~oJ++cPqfNKhX!Y|}dpb3(MZknxR#M_{YytGjgsc)p@Lrib z0t2eL5fn4RTVTI>x0_Oyc*cV$r?RJ-jgk$Z%#focQ_vY@;7XnS?RN(OQFQDq=f9dkuEk5hZCQuICh{Fs6G? z#EBp&P$IGzv35cO1#xp!EAP|z!n}XvU(Zk{YUV1#*_hKCXhh8S*gXCbJNTW!(cEEG zPKbf_Z2&thIf=?{-%gS6G++Cw2<%k4ZpA*KnNK=+9@Z2u(skUR$kiua+M3|d$9c$CHMk!cp~-SCc&fd1g20!P{U9bD!mz=&tcKFKvd-uttdWGjC#^zF z;r#jY_o^nfR~P(=Jm)f9uR=KEJ2VVQ#D^lhI#(iM4S`{RsGOjTiIW=clEoW~L@*@7 z6*lKXNr^Z-B<69egoPsFJX|~&8pVl8&Z2@M@rv@wA;Y-oGGhQ3Q2ulk7>B=yEf*}) zWSxQxUo%qx31sU+P#c_JWWsdv!L#YUxwX-A8qGWi=cau(ZM(w5L>w7rfTB zIn&GxK|ZL8lUK(e0@^mC`@F_^iXxnpW@KXGn>K(7Ku-SQd>_=IkLQm=SjX{H2!dve z0kBbgs&+#O$MHRKlCDJr>n*)#3`@L0Lqd~i)y7jFj#;jKs@msOY222h1edZb8E3=7 z#-`Lkos`YM7^R3G;#87}&e{*Nj{;0LjE6ygA#os!*iOxkj~})aantp%h#it&p8wR> zP{rVPm>M>z>9#!Wxtau(pYbpbq4}iYz*U=dt(5d~mG%a62Q(rL=x*eQH3(4Y5lr17 zuwx<0`co?A{ga_VMvxx*>e?D2MA#00dIlPJ2*K{JbP)$kIQFr#i&JmZ7A-8z>keAE zzizH$P-@UN7uU_UKrX_3F2msb^?I36F>zG*W)+JtEVeqLDYcDuc@iB4ZAuyu zVv^gvyvAHKYmICKXlNWC6GKK|%C3iRjGQwghdr88^}L`Sab&rGW+ONbWx@yt(iTcY zR-WOARZVOa${f$7H{X+EmE_$h56^K%efq}Y2*@_L6-ab4F*6(C3yIr;D8+5IEhgaM zUG7zlr&bBq!lao%Diwf-dY=7D5<7KzI7tJIHECv~CHk+27wW-@YJJfO9=pdJ`Z1W# zY6|+&VD4B!EvU$JkX`b*b?dJ=m{N9zqZJX>9p}s=095*MrUys5RN#t$VVVUeEV^&B zFf%a`0Z`)nd5F3SIEqJB7{|d%tM$+@(A5bCke8Ro{o5-_n^l$2?gK6_n~WX7rE%j7 zD?wwR+sWa?m6^=K!w@O~be-;1S>VIiid=H4LMcI24nW4T3Fj!o8gZlXznr~8&YNuB zbwn07BjM~ZN&a62ym(0L!nSb MrOzdu`R&I40;*>nEdT%j literal 0 HcmV?d00001 diff --git a/results/plots/accuracy_by_model.png b/results/plots/accuracy_by_model.png new file mode 100644 index 0000000000000000000000000000000000000000..8afa714d04b52320b661311940fb8a368b5f3e43 GIT binary patch literal 16115 zcmdse2UL~Uy6rZ`9yKZoA|+8!5Cu_+bfW^&dy%RjNKrZ>9ivF3NazXziqe}>q=OMb z1d$HXMSAbOy}7*S-g`>UJ@<|G#(3ktjFGLv-v3_zT3?ypob!JmFDt%tJMDG~g|buP z(nUoI0Y&Djjl~Ufn;XV@Kix90vNkq1 zJHyV$e)_~uMm9DU*8ChCH~-@e?B-V2IewPi*o=#8v$%BGnnKxqmHfXUP9)ZtLJ__q zaq$=B+fPQ@9koCH18j^jZ?_6JU={}fhvG^{d4PGq%y&{^O6?c2B4(wlAi!|j3xa&Gd3 zML3S}h0III$OKgy*8RF^hi8LpU5w1s~X($FFWta_Ea$+T8E+d*cq&tjf11Az4s~I;kGwZug%&M!UgOS z9OuXUm3u<2+fNRs7UkxBpXKS}(~xFR5f~8AT`g?hAt)*;8h^F;zFES^P}1devDPf} zrhRgPU1N&u2~n$~B_$=S+}xS<;`_9?0tX@L^-Cd69OHYgt`g-R!_~ z-{WV`s&{mBBB#k@;gmet!otGsJ9gAf zbQRlveYY((*L9(5%j{sC_?pRZPt)io&BYEkPMay`4m2d!# zBX7LWvAJquq&eNw*H=wm{`ZxYmHKBFwnV>p@wrns|MsgJJYj_kUw6bhACn1vAsKXh zaG>USh>%>Qurrf{R0w-!mcjMwrp;*vw5P9K@>_l)Xs>ug%v&Do?in3TF7-KN`u5(o z^2$ovfslZJ=i&_Z7c)&7$Sv7c3C$0@XLLO=^$4e+(pl)Nl4Yj7xUj$?D5#ZMw6+74DX6^hQt0W%r_M4rB+Cq+V%09H* zHmkEuQvB;8n;quIk`Xf>s;cDhV@2GhXMXr=`)NZ2!iT0NwVkwF)n}>%pXXTjmm8+z z@EZm-o<~I~M@#NU5UObB+E^|eI-YUmkMFo|w$!iXaA;)2l~r+_Iwg;AYeWcnIx#l! zyxn;M2kKH+8!d&MU=zJ~(WvbApZT4aGU})u92{~rGEJiAJ6&}rCnq%u95Q2M!eZRq zD61>eRsJvPV&#S(3XE0b#JF!yTuIi-QA{_iR17#O6X@q>DLUKR+dEjeJXWV|*B-rF zc;Or}Dxb-CeQnj#)3f&D<1?AJMm2<%N79#u5>sv7)7uT;i?YUxtovngFp|11i_u4M z&<%-dY(M|}OHNKsc}0cNb1@&APUm@J+(3C%mEGBS)6{~PrP<-(wdL`m8{w|Yt(`Vs z-fXB07f`eA|I|?PT-+APKlJ9#M*mN~+23#4#J9-}zc8K~Y0h0=TUIG@S*?h8k7L31 zXLO1Me%eC6d-v{=(o+6e5y!bC{#zq)qFx7Bg@tt(ooD>;MX_o+_M*E5C)^4mImLt= zW~#rIcyT+-7!}}ZCUr4Bxwl4#Y(8q|-HJ!FERE%j&!oDpN-?@DJj=BkPw_h<-m>nj zPK3b*S}s0`(9h?b&`bvq(KmWb*%gB7$~ zIB#N-5Xy5i!MHAFu-jLpv4W#0$=mw}Y;*a~7oDdod579^(p^?()lO<<%i|1d=SEv+ zzj|@-hxbQ^=#bc}Pr7`jVEU7Q2J%ogQaJh6Xp3=EiuRn#jsqvEzx;7`KLbOGK}GQ3 zxa)eJ&h4&E$tW;oRdb&N#_RhdT~kmhv{x7@O-DDXe~uFM@@6_?+IT|Wn=@7}k_Y?5 zjT^Ea;JsG%qf)#gbGm6$iWh@v?F$Jpe+gOHLZir?hj?ft3{gIIjc(b^z~%pOWpR4+ z=Eg0%xcsH{^u98uK70D~%?d)Gulm^qG5^IccXyABx7#*p$2iOm9X)eqI*ps0&)n;o z8Jq6jUV{kNwMD6jJd4f)Cp8}Z`s;U&j@Y1h<#_*tJU1Q{-2D3PL1}Ai>k3Ns-0K%F zn2vn7(Uv{w-~U*|wb1Wc;o6evGGEd1m;i1dStIi_D$MCpn!0>@>pGOHWfyGVY5$J` z<(j8_^j(o^RxbIrkaheo8@vJhDV{V z$XfQ~>@4>&xd^UOs#SB1z|_K}@=K@9i&p2Dm-nv;n73uuTskdaH zdiwiYTUC=a4459HywK`${Ce@o2SFrW zNYV!y{ZHWn*FQbs(@#*2|2nZKG*f*c2DPqsqD #GK>S}#@bECv#=+&BcBzzy1J-^8qseXUP$a~cb;!&*66;w zTX>*8`0?ZT?Ci6fwrr`WsE`Ueu97BqvAw`C>&3J+6OUP2oKAs5JSr=-&>9jTUehe6 zO?7dpi&0;?_cu5PSFS8?{z=$S9J6NF4KjFW|Cr`+=Mh zxMDK9_U+rL_U(KAT-JIWWC09Z94LB)#dv9SSi+(`Kj4iTO$$cK5ty-n69p_ zOoZTNnNTi;-tr*Zg^8;|6K^)otunoz5-RmIC8ItdgIn*MkOSTYD(hG4Bv*5vgF#e%44Xa+LY-EX?v zqE;uAh2jK9-UqO+W!>tcV;Ec)t`lk8bm`t9b#*ipXUH3Iaes&(Uu$*0+V$aBWIejy>>pbgjZkY6 zbP96oj3g|7pyj$$CA^$uSQ)C)L-+7JR$i;7>&&e0U3~p9vKj5SS$sckCmMfaWn-&& z*0&=y5m0ApW=0w4fMaR#-u?TzHYeS=f#GVOIj=6w@;l7L39rs5EgSi~INa|`)yU<% zWE0MBD`{RZ8*F85P4GwT^a_qnA&lpyXHxImFP2@My0>n<7G9aSAoJv`0uWkyR)1d~ ze}itK>Pe3ebMEXb(mRI9ma#9=ICT|Ol&m#+Dyr8piu+O?Y%6&ghoVW^VUwBi|O@>+h|8ENPAJ=W<=NOLboq{9CP&FrG*H^MpXa&<(C$p*~k*$f`$;NTeTELxAUl~BZxg-aB8ttIb|wm9#I4#{q> zoxM57`t5Fc4b>DaW2VQbk~wAbzKkMfPEJnJz6?U%oTYSpR_{=;s*n#&(qTN6Nt#)o zPH4XVC^%Ji#BhGBy_81Z%F0T5y020O=(V!vLqH``v~mlh)0MB?-IaiAqwlfhc``uI z9}k)5Uu{g$HZ6TfZ)%p^b^q`&Dz$jUlK9v8`GVF!-C^io(~X+ukC6nA-ZMCU( zXFN5Gc$dc2We`D?Px!1q;;NO{BN?C~X<6Ba#g@Kwe0nG+05x`FZFolrS2mn|BJwI&VL=r}YNUG?3VVVL{g?@=mPOvMg)B;U6n` zJ8xjjFpQB6H$f+@eD`3VOtfT>be38B$SM$^%<|l*kFT$<{a|#+M{=v=4Om#XaNT&( zdIz9mahuEXsNvemB7sYWC7$~Y5gai{-#M3(aQPeFoG6$)&Py|vMgo>!@5L)Tzn{|^ ztod=Ox1zExg=XNMZj0LAkHuVu`) zzS8#~_y$RmE+Hv-noTw76dLE5pMU;Y%%9I@;P--YDUO0`*w2p`BP)UG%}L@11O{Fc z^JN%46>mS*mLulBYCej+;<|eE>d~`jv&*)X$O%r~1LO`a{{Gq<8vU)jFJCYSInbD! zk4p)Q*5}!o6WE871O?HD>l$LnIm*IPrktSsp*~f&b7rWaqH8cR(siv_aIq&K20bW7 zF!t3({DGGzl$KQE3~S zjK^pg7b3sj55u9Bk~oTwIjQ;jQCu9067URJ&Y2sZPo)C|crn zj$xP;AH92b@r=cnbFmgm^72>CpFjU3;_Y`RBZeOtOV=sts)mL!_~FdEeR2aqYC5c$ z(@7Xvv5-jNrGXF#~9F(6ud!~_Lq~JF^t1cle?X9S!#F2YWPvUZxSvV#x zLI4T0MQmhxV6joX4I>?yI9IzF*Q-$)W|7qiDAUX?5vZ7hm`5C@J{(iQRE{CPpln-R zQ^G6#_a?}R7G%Qewr8H~7%sEgZ-pV>Y^Qo8PF_yCSP{a}ly5)HzO>*XG5Gn_a}08H z!3>n`lNnT3EXiE^LLvZXbOi5xq5P{4lMfB&dBkoG7X1vf;NBbT6Z1vu+JtJr437)9 zYs)?;tEy@OfJ;Oz_+XeGe>vTd40BkrT0;NoVt-@f{T7C~)2|MKO^^87-ymUUOmglb?>kx18Oire;>vr3YW z@U-=|pY9t3#h`lWQkm#76B855Pd~jW#IUBT(oUf~b+XpX=0lr7=h_b>Tg^a2 z6Nm8AzO@eaAz7zD1FOlqRy5iDo=rfYwUA!eX(3?<^mlJqNZyW@%{O+X`f$qL6utcN z${ir<6jXGrv`AD5AcT~di`bGo**AM)z(oK$+R#RS`1sL=wle2;PO%3M9t?FB73CKc zNTo~TeSc)1Y(tS?JAGOaeC4iDynq`m>?tI)EoLz;n}O=u6MHBW<#T2QC-*Zlrh>~& z1ffBwDm4AT;NTccp%VT56)$3AO>3e=cP&?j@yaD@W~pAhcu%05Ec%gVu?#vSU^|cl zlfkuXEQb%DPgG4#6Ilfz)|JsG13Zdtivb#Lpl9AlakKh_el*4&b)vVY2RnDJxw(02 ze%{WGk7c7zsdLZRUP_>UJ!^Zr_IKsKv2}KutuG=S-TW_RZ=g_u&;Nh%`gP9YjDG14 z?)UGPgVgl%^TV`J$Gi-rZUVY?`?hURBou zab5s&Zor(^02B@yNeT`2gjFPHfx*~Fms(U&ssjgNK7N!&3rz85+k$fns-)3xvz=;8 z)=UJq7s_WHQ*aU-E(Txu^XGq6T1S76vm5W=xA@|Q-fS=0|M9~Ig>&cJ($dp=$H!eu zGM|4n;urxu6+1 zt|LnmKylFu-hPRMkH&;4O@PAv9}XS{yJV-0^|Oe#<$n6p6jZ!)={F|UBS$WvQqHY` zaa6IgN{hJtbzA0*WD2)z7MndxC`Mj6%%4>J{aSMSF+ zJmvb)3>g^x|I=%>P}@hO5ik#U4wY-yUVuumsVRgs6b~k)ZmxK@NZ{2H+`FJPUmG2^ z|D9|^VBiad=VHdlP)P|11x$wZ$iC?4Xl<(^pfi>uM~VwcfH14>qfp{LYwh9%P#*%2 z>2R8s3CBAUWQG4(KbrPmCJGd&p1I1py6J==NQI4ER@u!Ue$4VldB_-!U(yOYWv>C` z(Ng2y$-Dd=)Nu$syhw$nk zAPF~?H(-xkr26s4E1kexUaqgOX>W7BZYzP*MVP@^(BT7MW*gC5xD7wjKp3&5J$1$H z^y$-{wsilQw4mu7di=!eqeORL3O6qGJ~-4-khiv2<*GBY(EZ+54SmE}z-hr^l4CDg z)>$<=Ga@7IIU+{rP;yuJ$n(|Zc~gw!l5%oQ7)AS~#&q*;-2@ZG?^Aot(2(D!PfHS9 zNd_vNSK)Yf_YqfDR}UZ{^^l4#GBf55#)J{R6vUkg`W+a;-;c^gBvn_-gR4*l@zDs8 zfWw~7+}xay6d3lN3Gwk=uBoXhNjb*MEDD5cJ$o9-B5I$LTMW5}8}vTm7;t!^bH<-v zJ_9jq*Q{Ui{(YkB`kF~|T69!Y6c^@A#1ShyyKL>ErsUw@ATX^TlQFsik!8v8;J6DG z`@%F{-*}8>+Bal?0P(WoYttG!ApbK~vy<-ZMA|Co!Jwd^Xw+Jg_Bd>id_h3}OcLO5 zcA0h%ID@dq?u0}}Mw09L3NKy4B>I;{H)C!Hq94_R>I`} z-FM#whKEBmu+-~=+85YZe3GyaJl zfU_(L>>gpDcv!i(>f?o&n0nGg!iXN;f~dq4(m?p5`a;NHz{`mjJyDUe74eCV0!P!H zGQXCWSMLH(1tBhHhnt2$!YN~DmwGOSt=6+%G>-oeaBLW32jPVB^FNL2fG*y!aq}rs zzOutBx}a=mN=QgJbh%S!;5$IP;-JmaL<({CUu5w;=Lxk55l=Gh04|w$OSG%6%H~|qih^Zk}S9f`SJgdkKa*cl?hJ`QNvjzJBY*^YN;BYYbn za3=Ovge2lu(F#W>lyk4}U7%PJz;0afCmJkTlNdIIh}Y2j$MAXz41 zE12S~ddovO^~K_;u$30fTA|k6gM;zx+PMSRk7(sx_(+4L{rO2kaf9m!6g!DiRjcf^ zn1+;Qf?!*kDBcZ({_M#UI+AH`!nGpgoghA6@+bQT9oR9CQw;4IXTq#TkL~qBM4))F z`0(ROlJ{`;2%BgHG~06zEcn`-=9;gEI2BCUi+==e4LL>yLQN6kvYmE@f7v!d3-Y*`K#`a>{3LU9qGWanT|Y36n9>f2pZxEgA(l z$i^8LX1C;P`{Cf}i1?&JOZ`?L8>-3TAlR?gk_5;y3_E=@}-JW;~ zMT~85Wn6;j_x>o%I?*vPxi(2qAi-Zs?mH?iFDdDXdJ>zWop=1?$x;7h;&{L^NUkUL zqO^FSMHBvA^Ysl&lXd9oTt?MDg*@c`FPiO6Mqw>PkczT$pj1CD(b#X5jZ|Q@3P*Qn zF?#=yzC>77M<)Wi9s^m!WA4$TM~91*o-I!In}V$mJ!`4=>Qg{iSmIxzn)HuS!}P(K z!8+Md3^b7GIp!>7pPYS$T-vmGGk?aG&6|M*^hL+gaf?L10X+2!QJd^Lf9yX8Na{Qk zH2KMFIx}*^_wIe~@lyaw9{$p9`JSGg`bv5;iNPl=+pB7_%yos*^723FdxMqCfN5blSHQIg9TR35HFh#IUSaob<1%fO?sS^= z0T&ld#tmU-jg6assD_K?>2JUNrpNdmbr<7}q^#^MBRN@FRd8-fXt}P=7y}3u#OCs3o7tmwoOpBPu?mas1bg{ zig9Qpji}}^n2#YF{}QtV7XsD(_HCn;g~?p-_7%=Au?dZ8BdvU3f1*|CNSE>yvO;)pd~<)5Zbflr$?~K(1F)JQLd#%nGzgAb zNSAT&m=u+5n#b+5MSt!p#mtEldfAw&n@Yr9EQ6NM>IF3MSZZA-1<(ZSt1H;IbH*0- zdqgCNF3gQF0bT17@q^$7W@ANGC@qN@+1UL))W&oS-=i3#D|Y9+mVys(j=zp?pf zkdKD~7MT~U~S6r7SUnfPfySs{BB(Z(APsQT_sm} z+v^twb=DMOcYC3=VgyaC?NhIXHRe%c$YlO6U<8CsFG3L=asQ!16-&gUII88MVFJ<{lKt;Yu-e5# z+ic0VPai9o*TFx7Zr{O|q@W|;zJ0r_xO;uG6R85g;z35=I^2-wfyq`_9zc-Bf>Do- zjg|68WF&z?3NiZk90sHR9glc~0o<9w-Qdve!%OF~ zN06UCwBf-9id!oV29~DhfaC(1%gf8dKqSlx7{1=@M@N8!CWqQ*ZPJM~!{w)SSd>I0 zYe;b=w2;}J06BDuLVtMh;Frq0_F=Ej!82m;&awE=;(eY3!JsRu5P}40Zrk_qAaoub zaCgM%Hw)!6PwS2M!825}w8x>2V;X=FN``tg4~zww6)<1>p6lSWmgkfNY{TE&p%V=pm z>2O)TNq^Sj9Mm2KnAwz3BNN%*7^d^(^OT;=)HcFN@WZ)KxH2V0S}mEw0j>O;arM-N zh1<)+F1AP(N&cd-N|E(NIibmS`_vo`lJL;l&I?>eW_TQCCW3W!P|mfM*nx<> zhJk@SbxYjm&*}yS2F5@Z=!5d0dQqgJArwm}Vv8^jjjZn2zMTt@gI@5qJQ?#zhcGCo z0;+23wMLOQJwC_Ji(IOP_bU1R&O;_BdP7aAkr2XET4saJHs<8az5KNEiguE<^(ae_ ztr<#7G~Ra{asVNxp{{w2WH~KbH6`HGS$=`q2w0Ila-;<%XRNRGunYd|4IfT-!(Nwj z{$nTkpY@Y}qJ=C@^%9FBa4w-Kh&2H;B6~4f&^i15MhmFJf6hnkZ(av4ql8+HB6OTk zv*2**0U#0s0|QT|06GM!@#uS7HhqBhkqkGWQQybM-&oGsftm~3wrwln7lZzSMv!zvM1i{_?Z%Oi}LdpJNnsp2$Gw^H1!D&Z58h{c2 z|LyVtIyxs6TPO%qm81JSVQeop)%RA?R0w9*PCy3A0>GoQj)^Y&FpB5^gwKtlI;v@( zK?D`$x7mJ07FNLCWgG}|WSucr|9O0Xmft3>!+BntxbH$nhQHz4$ix(C;M*U}4&^|+ zRs}hT=1boa5D>7Rp5D1;m3$w>&OwYait}UAObGagZ9M4y@Cy?G1|&^98eZk&GvZ)g zJ-xi3{{<^Tlf+gt4LZF%2=+_Ic~&LUxUN)qsJp*E7I&AWji$^Z(nf3cuSlGa5kh2g zt<(1viXo1T4q>RjC)Hm*O{%~pdVkk!%CX$?HyIZ9<8T`iCUEorthD&=toF}ShZpP@ z8Sl^sdI3$r+6HMqL#aP+VVVv9MQxnIbFQmzzlSv_4tn)aOJ*Xfn&(I)20DJ*AtNxn zx$uXSX9G3L@5$zFgI_=eBt*l@tHo6C{9;mOMTbChJ(1=LbM_371R0FCZ{5n$P0~~J z%WI22%Y|3dJ$MY@*s=Fl5i^awuPvl0oT*dnty^zqHLs7OPJGKCh!n6IRHWJ9He?9_*)AFw{WrMe zpG{M(qjF?i3}pK61j_%E_xx!6=-~Tj;@D)(_LPok@TPT(N z2YyU(RTy82&+6aMGbHga`sTt!$A8BwtH+qxc`V5Vzf!7DQqbc#p_~ zR{tT{o3rkuIrXMP1d@WsE_y|fexR9?VHF|*zJ4j1KQo3_tI}laf7x%~OC`AN+jCq? z!>Nfbt4pE6&bbUu6Td(?bh83172kJE)+TrV&Mz%r8zCNowhe7{bwO@EW!i&({wwJ) zd)4F2hA)zK+lw{$LLe4NBThG4XzQ3)6a^fn+ z7lN$TW@1I+!PmBAnMV>r{MVfpI}f)};;J!G#sUO`k= zsIoeF%nB$eC_FqF=`%#Y0Fty_`v8Sk|I<#l?G?~_VQtBUDUhSt9FwSl@*cM-x7cWW zmTVV>tn#15zU|I8nL>Sg7WoyR+6gQ7uOe{4XuvpsQ04olf7X3uPPL_C<$zTZ*YQUXZM;`JB*5Y;=wuvhTi4;y;P@OKzGOp{( zoy3S+Ug5RDy>E@aai8Q7qXCqW)?rG*ekCMwMS zg8_2F3MeYy?za0i)xX`Xi;D{pD8Url)VLfGe|RDd~b7>7Bd^iu`@`XEbiixOHiKOj%w1QJ&NW`Oo}?ltoara`zg00FJHpM}58PTZc- zcFzK_OU&oR!$_r^_`Q_gZMYJ$m6*&yIuU7_gmU*offxgEB;JV;H`kz19@{3%+&mN6P(H^P!n2O!7jcakEiC@jR!N0b`)qr zAMvX+O#fEp&sO0*8SqQPdte6 t>5sp*Jw?97{X~<4e)WIkFPmDYM7_Lo=c5Wgp>+PePg^k-gtM6WJfof{r~@3Uzy*WbLFn0ywv6mEE^~k z%4X>c=aeaw)j|}?s;=+X;T;oi4rl!5r2Y9z_9|A!_D%-2Mie;%d+Tdf_SZ}=|Kw<7 zYiDX@$;W+?oA>BXSMBYs?Sy%FEdKrhZYx_8p8fJ`TJa@6SYObzqfi(O$iJ)NCE`pe zl(Pq<&z)Ai9y;3TrlV@KyfQKS{c4U|w>Fr4AD?hU{a(y3#%9mrA3u@F?=mkdi1~f& z@NjP7+h=XQ$(yWNX#Digsjk@-DU@S(S!%d(Uy(m=wWLsPCj9t+@T$mF`1OPL z^GioJY~JkP$lhGx#p-^ajQ> z;2RPU5FN;=(U#3B=AydOJEqD_BZ>EQ^7jb|3ARp7^bEuDP+gg9KF5(uYuB#T)L;2r z%8;m$7T$HecXM3xZx!eBPG_gzn6s5`2-bH`P!OMc^|m?kC{NK8wZL&SvA5iRxYZ=0 zc|GfmNRQ>oh>Dt;meJhq)U_LT>P){2-XP?=Td1X+)gyU%X+i&NU!X*PHt(jh85tQa z?|)stgNfMNovu8h>1mBi9m3KODb}%l&Y-M?=MMAJLh|A9Q zeDF-8g2YMMcX98ETyrlQ8aAitOT@&+nl;5M74-Uv@Y{a8fWIc{m$*OSHM-|G-lh9= zz`@B$BbZyaDczuyL##h=adEP?prdxtqO(A4VP>@PfWN41rb;cvlxz&93il~Q&htK0S7@3(9 z3LJ*9)_T^>b0*SK_zZvNZkI@{jri2_qn;bx1F83 z)lUQ(ai40JF2zSl`ByYItNrl9502$-Q}t2^53LgSB}R!-efPQZHa0e2W@j~0w6dFu zT%BU#;KIA-gA0-dS7N{THZW3;PB<6w0TKn!a+E7bM5`I%`VI)&$sA{W$>)Vc2b_S@ChKFsqI<*X@g{k0K1 zcoM6Ur54K$DONXCOG`_^?n(ynucwM!rm4Zf!DjT-lvG`sS$mF!V0obXYZ4@NPtU5A zxVvRvt$#Vs+~L&iwqe5tWi0EC9Xp~I#!E`7DotsQqsrH>=ZT7nHf5Qrh|Rn+)GDw~ z=p4|R*DEb8Rg06~>+Ir^s1PA8*xl3ZUYw$x8$_eg&YU@uW!9X?E1@DGAu&DHsqMX! zi^EPLQRwC7#HYI90%S7KeAqA#n2-at)Q=ECjpu&@+ut0J|ulI4OAA3kIm zRqm{gkkIey>YDHIVm&C0Pc@PpEc4!F-jQeHvM`#9TmL!RtXbyd$&(st`btU<)@|7n zA1>xPR4vrq6nfI`pmev}Y}m`v(roLVGh~Tf7AMN5-?Bw~nrXM9<3eU)?#RS~=@#v5 zr*XaNFcA&yoNG#F&fMCtaif~5YBaWT^PUrmxPN6En~duGT}LlP9+rD@_4E5bG?FLB zJyvMEyu4ZVgUVdG1*&ZFp><(ST?sxuo1~TP?Xwn_mZrZ{^0e=9>Up`AORwOLCsv2GPiY z2oF7#IQizt)3=*SJUoh)=ZA+{GvoHTO(|KlWeq+z3=Ge;Z^z}H2wHHUuFie^dV%UV z^6n{uWN4%{Q!DH0gRUEsa-Fu7T=)F^xF@M?UnhFiGL7X@kZZdIuPGwSDn*^J=Hg%9 zubKW-UuxMc)O{n$eQDmb^|?`7fkP5XS+-@TaHrdBXN&j&DZhY_kPzL?Wo2b9)6E(V zvz<;!r*E&LMr-K1sXch`;LDdU4V-C(&k*Zx%gd$Q=BF|?o}ltydGllTwN~X}dRngM zZzv2T7oJFX6nD9fi-}EE@m0KiTaQpO+x~3Jo)eK>MXtkf5$@l<=)Ct%)9zw7moIPG zMr$UUR1^l67sfS^0OY2~Qy`#JaxFXUKfgb1UV>I)hN#-`!w>bZeRofLtjv12e7MV& z<+d=xE72}8UZ_HI8gD^4tnBbNtD+>PBMF(QKDY=ox*)!aPgG3I*Rr#qb!s>ASP3iD z`~HvH!F$ZdI`Taa9yoNUkLrd6Hn_k`UK`y%zS(+~nsK>ogL(U-u`Q+_p!g9 zh3c~~MjL;geNIYhH#NDp78`p1%Ed6@=Gg8h0_M1*q_ko)3*>58Jqj)Dc;&nk``+37 zf$+>h*GjHjCaRBIh@fV>`_i;VltHQI>-+o9a*Om^>E>Dgkv91#H>~*Ebe5yl)c2j+FyWQK5Z7ThW8n~I2m9-a7lf#^w zm-jAju0O=L-I7+_rasRsvHZ1HBY6e2^!422fUmH_5ED<~=Udrs@6rtH#271pBs6*OnrKK z|NebhziiYuQoEcw9W?H=Tpur*ZOiWK>tjEBSaQ{>RkjWeq>rU+JY6zB#3{Xteq^L0 zU!~NGp=4yKmU9N6(&7rX|C2I z=-?7C$nzYoOX;UZ+jHrAojR0KCw2TLR_FnXIcVjL5zAL5UA}(q!DX`(M$I-c9edI5 zaN)x5XnW6|JxjJoO-(gxeI9ex=_j$p&!sNN-E8+I7l)B|O4KRTWE7;cEwi>LbHgnF zFMS_Bl71-I8!%nOE<4!wLeiI|b+q!$o4Vh(9{4yu&a-RRuI6M-l}?&d@Knk8 z7oU$usbtsC(8w<+sAz0d!EQ&%$%#0Psn&*zrDRG{Rih*~Z)O$mFfN?!a#h&Hty2e- zVKMaO^Jm4`vCfp>`wXn&$=liFb-vK_f*NZ1MMP*-OYy}zX&q~={m@a>S>2`{j1|r5 zAimTod`>1z)L9w3l~l)qY~kYJ;R(sX?VgvE96}bl zjAWEI6}!1MC#XmrI&^5LA;#N%dCo+7mz5M3H+S*nPAb3imn(9iCzMdc)hGL_r_(%^ zWRWj{B_$sDLr0S$N=iy*#=EWDMh6DsK7G1MS_z+VRUHAYO--u!>(}RoLRaffWt-Dm z9vqP3ZXd@ZChF31rg>{(%cB@`v$eYUu@G};VEi1 zy0fIiP*eVhE7S4IzvWo8$CmGNy_nnOl2lk&h>&hV9ECi=O>+T5(0V<()wSrT^IFDo z0%lF=er&q)wB-c>(S{ZA#_pE2c0YAQR%jL9$lp|2=ec=zMPXD=poU&O?(gG=56|$< z5V}|J-ZW7+nH9s?g|aXsuJ3|IO!glf^yVq91BlNC3?p&Yw!E-5Ok7;t*2X6PvRp%% z1QQ7{J@+{yhrx#P`T6~he8hBC_cHKP3r zxG!E0af``hn3$NrvR-&xzT$q|TC+Rsd4QiAQX*sqc|45Zn#Wj^k|{u~(2)RUdZrX5 zIasz!zX^S68t6p(&Ye3fLYASO&U!9`hwub3Ra)+>qV6mEzD!O|*NBW?ZAsRAa@^1} zyC^-qF4bLMUq7U5ImUfOmn+2_7&o|RbMe%IZNj*Vri}Ug=*ZTpUn`a4srS6Srz<$q z;?extG7P;s1u-EY=dU7ssNBmEjeA8L-^nelsH>_56a#q3a&reiOn8OAN&TigF9%CZ}Y^wFtUDRI0*`h!p$!{NAcR0`R8?Mj$BT1NUQAp z=CAKE4BIkI{KpC>*k?9p(D-2BxqMvWR_GeAg%rtHt#T(g{g%gsa z-6i^H5FB=h_c66F+LZDzP_jv*Ue~y0f=%!1L1bV(&^ig8J6>K%XooFJxM4MbZlH=k zWQ*4e3FTI{m}OkL!sMwdCmYP2y`X200j@pQDtB%yI3VC%=g~S~I~1=W+ITVTif%)h z$87)*{cqK1?KHYl(a6G?>C&priP2ozIiE;D>3?i8vP_XxzIru&CjhBYWsoVFYKw^P zSYGdzB@{>vz+w4k0`BxwT>+2f8)dIwlS|M`86=VcY}onuRCr1j zpNr3Z+>ZI6q$-;Mw@1Zn=Q2B%f}be+i_JW;ES@tC$=*e}oq1YO?8lE+$OyY{-#)YM z;-a$=e>4l*e*7JEA-2CdgiI4{G^e!cJNa-=`Rj z7~hwslRFaJ=egTb5v7_;4RYb4t!?`-PGd6WMEAx6k!Fx>T+I?9>s!(c^n>8w+`s=c zYNXclD|f5ifsBf#8f01cjD2cCPewgG>y?Pi6?Xm-jXIwOJYv@K@=kC{b7iHhQ-_V$ z@`S%fJg_q6vxb^55iBxqGvHwCc-3RRX)H|=X6+{@?P4)=#ULj&>C*vvmgK`7`E~+! z{TGd{T#2{pE(U{8T)eXEx-=lMqJ+5SH>ugv9Itd%L3HA`!}6h5mr^6dT-7s8YKuSi zwHLY0R?z9Q1#ZFCdrw-h^I0?BUbmTGNp9C&^+5C{c?AV5sD}hCsjCMaN5LxX?(UA) z%#6kMZ8rp%5Oi7rSYUV^z^>whjblcZ^0Nq-U6~s1?mmIxKU+aw-tV~mC&kk8^779! zGlG(SdrWVDn=tC)!e|-BYc6vAb7Vcu$tnuCT5@TA+7~dB`SIh&0E{SHj&5F?cN>45 zpJ&0iOP1XSyTwwn_{F<3&#B8fuBW%xa1oP!-yBnllPpSf+apou=k@6ZXG=RfJ3qCg zgiL^n339$@VDPkOY|L^ByQ?$sHN{_I`5hDbe9yqZx2N=F5#aUQ#C#cbxjfZsaIcW5 zsp;!Zr*4buE-ox6CIW=T2=U-6&AVPMd*w>BQm&=$S?Y96aAhagbUV#@Q*XnH9AP01E;sNqM)Oi0Hg>5qmKgv!Op2+ zB2Fwz3$sGtDSQCwSx}$^=4Z#PlJ!fv+6I&(G7OkrPxO}ix{epo;}IR?&Z7`1uXpL+ zF!*lWIu^s%J{f}{Po9|Bk9D+KJ2?pfnE940Pp7>`*)_btTN5s3-Z<+k)O8HF*5JCc zGc#Bs$zOhnB-`41n?XE%Fc;Ht@9W)G{A3PMPtjto4CWC6b7AO|t12yhcPkUqRi|A$ ztNysm6zqKT@J)^bJUj}&{`#xS#|Kg)`$RP!bh$5BkQscucp()TreQkpSj;sCb59iV z>R%f>ns@=@ zf5TWOtr_E5(U%I&zT%Me^zxOZ?v(_`(KfJ*>LB4VD-(VsQ!=Lez>pA55IcZJX46B> zEGKQR#DvcRQ4#1`SrogenISlaP_B;*Ew@uB?^h0VfQMC?D^1B~XJ@a!yPZuHJKC~? zLQ#}s@_Y(%ECJ(-hK}lr*G3`G700SCi&7}dSD!zBp87T9hDV;Y%KR&Oa`I6ctLS$W zikiyl)4#4>y}Dvr(a{Iju3ukST6z}fLq<-n=t?(gPQ~Sn zcNR!TnjhmD%FV>8gr37EPile1x^XOceRm)`dw6(QO)<)GtUh-di zmp-g>l0u133}vz0wtc&R!OQQ+FtC6Be!_EL`LnEhUh*!uy1G`Pz!tcnKoUZXACO!& zJSsh15&X8`ZY&H>+?^2c%rP-B?7X}Uv0}Wu%C0jb>~tpJvPNB}&X^Z3UdYZ!U$}4| z)TP75@4wHBEm%c4)!d(DZ8y}!&bVt=Wo>P3Y-eq)0t&l^s5l#&6zH&eKn|XE9&vF! zFtsgQoSef?hOqH!AxW7V(=QzrOxs+VZq8X*5y+{QrYE+Uh5ys@rH;IBQWY<5M4&gzy0^bSHa-1VOmy_S zthngtBR~KA;dwc7NTu6-A?5JJM~aWXedU{k;27XtezT?{7$T%m4oPF><>O=AzWu=9 z2baeqBiB$6D*E#PjuH^}L|%}DcoMn6&Mfb)pI-|ww}8`_4q)pMety*uA*-COrGSSI z4QH!D78h?=bmS?KjpCCbPa0YVmZ9~$jsjL19I?8)dohNTP~9!5wDMmV_Oe?`O${|5wG_eOXZZ6skd(5?gNicb|^MhFUJF&)3_!y z0ay`OItkbVGuWI%;!X-h)giS;)Y;bdIRT!KY6$r}@u3XEVZ$G{*JJw6*QI7ajwUr2a-kCWE=0u&lGkXi3f?V)ed5r|7zV&hYlWmH{A_}5rmI8 zzzq=TKw^!iqT)|hR#trj1C^DP!-?in(_8&edt4p_7u zIB*~vLIAJ7Gl=%41Qj+y-J7*#C1_=v#UMzA!K4;{bS8YTqT(-^sCARJZAK6<6-!pb5r+2x+_OPkb$=6@^*`R%vwsvq;ky&NVF_pou- z4(+2mL+G9c@3I3q*x4Td^`b}Ew_D^UK$9G6$! zD<6_?S{M1Nj*iZVu_+>J6y-lfFIiT@ z=t~5f&}l&mA)fjZZ|JQc(?eu&^Y8)8jmLOp%H#jH;@-Xzl@&j5_e? zfz&=Fph5DrJ*8geU4_myE*o)6_iTpjEJJ@C=vbWi9J>2O;q{toR;ldkHRukfjDGso z2>1UFdbM%Mla`TDwzf`(8na{P&e*1=CfS)K0Boa-OCvepi*(EHp^B>YTj!mGWOye` z^j|cg$>-&J#cy1KXwotYV3wTz1S>X;UjuO;L-@Wna=2IIDE z&%mE@LaBh>RXAdU_@DxURfNF*b zQ>22bqhk&s-8wpSI^7pW!Ci%+Q9`DBYwPR#PH4^e^gu+LL?=ICFv=S3=JisFqwB2IFR{z3i#d>10Yc@`CX>2qE7^v z@9{3XJlS?^6D>y>tWFEjSpXL~L`8L^rKN|uOG=hOw(8j~e*JnKRY*HGIXU@Di8R31 zsE$M493~L~(}sg2Q}5or`$XKWpmkZ6UAgnG6$vzAYHW3Q3 zJE>b>Uk|F+?(GtQHFEtqde}|plY0nKXa=cx{@gi237}X7666YTi2cNgOW+`yKa_cA z1IkzW{c}rw)-fF&7e|uRuNc|#i*l;pzH}&LM2iD+_Yl0}V>eWzSTs*vn`ERt_F4uL zBhdmeV?0B8mc4p)q2RhSyNHN(l6ngDAd%`$pFU0Y1~qC zpx=$Qty!}s?Q7-_Xz78~J=Tqw)b^gRev#@1fm58!#|7=MWzpeE^y&-L|x?o=jY0$?K~9iIFGJ<-DG=yOoEjH|I1*1}$;zTju6 z4vMO=|$b0b^SrJ2e6V^oS3KzbTK9cyd8uJW9Oe;c;0BH&? zAUrsukKsv@daUzWb1XuLGKwjQBtDW#*kvkyQ>kLP-(JorNqoxI?yF@2!WJz=PXETl z9h8_Xn_fvHBclY@*)c~2E+Cn_E?ipQZ8~t8ZKsZ|?(}4>L?sA!fU9N&aWCV}oifxX{HD(^wbc}iqpvb*Fyrpktglzg!MBj*I%}D2(XNi+l%8eUI5jsA zVcr-k2X0)}sOfuTjO7-ukXNtHg@NOz^1IGlBY%JZPDF@skL9@yJU`;wwr;r-f^5NE z4#tL9QV$Xy8krcV%!D5{TVJjA1>__w4mb^UWngRy{dLdUGK~2el9G}}O}Js}YCII& zAJK&F25BILs>pHVNIKGt68aV&hW;0Lkj$Suz02AxnLMG0fd1r6MZu%2M;=1VQ(|_& zUsS*MDwV@n#u>GR1Q3>e4&9HNw&Eu;mN#@6pGXZvjDU2_GG7>N+ucHXlZh?Xoz?y8 zAI88ejV&qKMD8D2n(a>WVL^;nRQEjIjao&^fv{N#W(sSorlqBIX9xDfT3}CjetrNU zw;{GL3hW{4YjeH7pYW^z<%&q7aQAuB&Kr}}jOu?b$%&2*2)gi7@NO!NEdn10yykp| z@y~DDA|&OvC&O76f(c|RMZg$Fb&QTLnr%;XfDBNV+>1MAe2GQD!(w7ZRRbON5GEqrW;FB5+5ZvO zR&G*=A#D>mC=YgwgYLX-+qRFRqZXZHrq?}ynbRH7ztj6X*xO8V%zhdmi}TA*It&FM z#?it|8pj|xfwPVr#X z;G_vT$SxqDhV6te!ZpAAKZ4$agWxykQSJl7!&5?2zFY_~`EdU^;yY{-gJQKbp!A8Ec~g z0a366oqdX!f7GsaCqx;@9r;gxZd5siB?sYB`P6#*f7UB&7$cm~VLxl{gq$wLKuUWM zkA5oDBn~uZ1Wz6O1%%+w&&yMl3}cLqj64NXO+{_3iOmG-xjG*w46Zk2Y^Mg7(IM$~ z4|tSR7Lh4j5vmJD`l1~`emuor&j7U$t_(_w16|iiZSEIbwJNP6I0hb)L_$%0eDk0! z-QYIV10LxDxNSI7&`T*&fv)|r=!J;0o!)ntSy0`L0!2DLLL`RktO=xO-}rbMC`b+X z16&?6bl_{PgKWKaLk(IQRPbm3xjBsn!HKbk6dokT6mp&8(4n%Y5&F15{H84uuN0`T ztULw&8H7kg1ZrX=TeW6gz}yuB)Z^j7C*gJ~sBDCX;oy*AQwVPwYEI%zgmLj^lC2t_ zkdf)T^rzqyILrtDNK!T+b9*GmKMxW%DB0*bDLVPegj&SV3eU#18DOoVm-ooM1L_z< z%h1ZVjRD!329r~`;wHe%9bTo?M2tXilNm2i3G>(s0h{a9q~Sxw)@07c)9CQwW27g( z+{p9hm`_DK3dj2=CvIp00qGLC4Ss`XAPW<~77H#2S+uDdl%Z9es6gi>{L4PkJYF-r zk!Oq+%E+m%`e)g`<@FVBs-QneQU$NQBj&Teg9R+G#{N6_vCd{M6HC1t6X%H_kI+_!8us;cR;(nzkQ`)u zqH)Ml!Tv;IN|i@`m`+GcO5hsXqd5RjB;Q}C=c0KKO?di3aTFu^MX zSB;nSU6K2`2_*nFinO|KsI5g!>=h4c4BSboz_!mbGY?5aO~L)qII_M4FP>D;vLjyH zZ9X1|OGZIK1DwaV%HuMW5XVt?ibOaDALvMN;6DcSSLTDOd2UIaJNMZhP5-?;b^+HL z1`0?wpM}Q>(k$n*!z9GW#MP0PQM@#xO`8E^PeAlCD*vtHicZy?C4Nt$bHN}OcEaY2 zc}sFVx+tcI=!*EQh|Ug_Z;yrcZbFK{f^~MFHiDQto`U%O=Cg!kHe$UBkZ8k}EkP(V zFJHb4c=E*MZ87onA)NX!Qv*vT0QR{o&kgJqXCpMCSA^XGX@ZFewFA#uBJe4sz(Anv z(%yq?Y@C9E>SW+)%TC(Kqn8{Xe`K%db>auh9M%W+BMy-Q05Vnlnb08&VpI2FgN=f@ z8>fAAA@19qzzYYC0vlC$M1FKKGe3-CSk&8qA{H`54-n6%CBd9R`6eJcX6@6EI zo!)wg5`R!CVr+b5n|0WUFSE1`J|U0{JQ^in6@!zA7qFqBfi$d;nKy4PjKce-V~C;0 z9O3jt$g1nm0!C=9h_VB^i`Li+XB4DlBs7Ev;$^wBI6sR}Q`ONq3im;XBVM5Q>&?8| zZJBqPln&G&f=a>DPEg&pZ`%ebEz~>}U_q6kZ;KEZ5f@mP0w6E|mz$$3!u6TozZ%%F z+Pw7GNSix}IkH)Rb$^ph(XlXf_4f8+bTDz#RaA_;dGn@uDlosQTwk#^B3v5S)CG1} zk`81HM_AEOHl5V?PW}_U3YIzv(3RMc`5lH-tHZ?>sBTcb;4m`!lf?R@4n4=I(_>{x zMq0X_aB``-B80EP8W0lbESsa9oyw(4kD&8jYg18H-m!J7(wNPr(sWG~RJrAMsYB}4 zgCF`1dY7T+qVw9S8NMnjGXO1bL>tOvWiaKEG4^7g( zUlW!T8oK9>r>7Z4stubq1so)NE+1cGTM1$o-U*`eVpeH}crAv((B;!puRY=@9LM5p zB?gQeqNT|`H6?5Ek%C7oifL(9Ms$rH!X+`(=C7*q2EAmXT486olS?a_sLzzl2`u@;~tMsfm`VJg51(92|e3aJ;RLTM(grr^DvAxa8PY1@jH53p<#ZO&j;Y zs17X^^#)!oD5dhS{REF7;{Od}H@>X?=Q0pv?n7n}Vl;yz(EY7AZV0MCjwSR!x+;Pz zzAdyk@*OmxLcvX?sVPSRqne=|6Z-{72HofSH*OTdf0F_Qs6~KIkO8-bNmw2Pez=Y@ zLSo)lR%V7&&lSP6Aou9lX;@zYw>lkKi$MY5XhTWA3qjlOvmWmj5)y*LhaE^LR_xHB z^INxW4HmUT&1J!mu?!zbl847mD%z|zDB}mw{}mp`{`fneePUWf`zWvE7>WzQFjWd1(`+H(dZT?(u z&q<-1&J&>_5`JbAp~#S%6{r3i#Kiv&f%3opogb#GA42{B_2PW}`Vp}1IG++c5(17J zZcQ@h5J(M;Bu)P*8oHLXS)R1Ky!y3k$)x$>XaYV#9Agz#RYx<4+(HfrJmxbQ!fa4n z(+qiS=mq4)e?`;ccN7)x>s{|z>x*3l>hBYl!k0jiz6K`#7d`onM3|J3l_d(foGV^1 zo2nQL{5z2_K0O1w)(ma#FQ`^V$MAngc5c{Bx|LXJ4fl(`MgOQ)9*qX(qUhyo6hgI- zMq^7B{;NO107}z*QdlTTYXxm_^NX!kp3OgXo1W-I#GJ!GQ>8NIQi2=d-HrbI*_u$h zC?ogo-hE(iQ*@EA!zo%uV*Q!5;tvlJriip?lL)uN$PchgEzDNP0oCKENdK~N*iSrl zEAi-;10kKfA_A|{%JOUpBNG!ji=tQCK;rgWNpjpuC!#nqA}}HXMo%7`>KXp&ugFFD zl{s{VoA=KB4+`nO4Cc`ixU^A*-a-|JhC_!XIaLL&gh=84G?-U_@`=fl1o+_K;5>{j zsc#>$nS(W^=i6dPRMXs4dMwo83tDZGVY#1Hp7qmj(iS*!;!)FGAqQPxCk!ISEOYIP z7k!BM0P_xUsvbE)L~c}B03L@?tJP|He>Laorm4s|g02Y&3W{asGk%vPB`IkJglM#H z5}tBSqN1nZa0_O$$He6h)v|Hy5s@f>9rXtW24oAq?(gw`8vU8=n@{0$_ZJDj29Mz{ zRc}vlb%Y8#$iT*dd&o4Mot#v`l=Qe9zWbn7;zdU&2f&(n&FLbZ1#T8Y`o153wBFpj zhLX5?e6A*yLT}Hv8vtA5)+&xHjrluvOl>Mjo>2z20Y#Hp%nh3d$^pbN*Vak!ImqlC zAdg_-;1*eP*pSH+s}~0|q)tX!<4d`Jlov!)D!RK%s+M8y29M1($pkzI8Mi$Pgw^=a z#`TJQ=;}Dqx4odjM~*$NlM5L5KZx_sstf`2M(0Z~?F03xJRY{dRL1JBVzt)a+q)5T zZZ^)?x#7IW6JdwMzw7gdV(O!$v=_+HG)(JwkVc{>5Ps{=>q; zT5$>yEueY=Q8Y136U+dNN{t5e3o+^XZvA?5pg;2c#HNek=p4w3XT-5>QnZ$$_B(v0 z0B%N3>?|)>y;UOYP=^%jLzgF^{7@M0!p%atc;{cdmJOD0t)VF&ciQ`XN zZI$)TaJw~;TEwq@`SNA#tnuvaJg^+h%*^u1TW<2<9Hgo_KmqEM5?a1l(|Z{a3TWT> ze)su7HjJol-|eQTFVZQevISxKNP#J<)rb;*ChzFlNG(oo?iLs`r@*okAfE_dUG*{; zaW=JeFEsPCH=N3zAxBz(w^Tq(9eQhd`uI(Wea7UHkVxqGwKPZ!!n7R}weQTyy8sVl zV#}t%V0K92BGRQKxytyW9{cR&=2V7SGJRV_F2Iyiax#7#Fd(<7Nq zWvdfd$P9UYO5r=Z^w^jOfQ)ExzYmEZ} zViyzB&Fypyx#8wUJXn|?O@i0I_~8;+OsD|9*sU0xKhsBLXofYrxzO283}-lq*9vym z_aoa@>6lbu8C(7pw=i7QKsoV4&$?6D{eQ(Ryn4m|xg2F#F=ZOfj#ya_n!fz)2NFTV zLJEgH7UJF8=2h0~$ZoZO-XX-I2Hji8BUL+Bo(#{h)DWu)8VsAp_wQZ)^j!hZKulb- zeoFvtTu!J{^==>~{vH-eaTG(*SwsfRvzIe;a;$7@CL9tsoKpMa)@4d_4GJL{6)}EX zMe)O^t`4UKY(oSz!*GN{Zf785+B+stLT^6aO=)f;=3USTNg$B~M`2xQG4vOU&C25c z78~SxDQ=kN@kKf23|qFeV73cW+)6RQO~$tn3kBT%OE{R;l7mu)imd>vk$f_C*;MF4`^-#}kT5Yee{RK~hcyI(P z@baS$^p0QyR`smA$-W2709rL17tc*uPgqAjB9&*dRY-JT z-Pc|N1hJU~xHH^)3kG5mhQq|6k1B@a6Krl@d%SA68E$U>cBd`S?TCCT)M@_^4SAZ} zIMHPhdF0G0h6xecISQSb54(~L#tugnWeS+=;TPO5jbX6~Zh|mP77Cj|_2!#qTyO!c z-?E38dYkj@Oo$5v#-v8-Az5e=cHaW@TV7(|o}I$fVfNXQd0TgQELRv?E@~mf;a7N|r zHxp5yB40x1B5`Wg5PcVvJWd(M<4naT5JFpg4RHntQvSOvxd?Gxu#&Xcd2sy1Nl{l< zXU?+zR5OY9r#P%gEG}+vr)bd}QwZ}!j(rds6YLkP$aZqQt}ZUb`1|CwwT;aK==(al zoCEtu(a69+Bm`uv(qTqo-66*yen#|@LzP2aMKq!oBaz)c|FNE&mBz?-NE&i5IYP&O z`PKL2lqj5sE`u?_$-UpLT`Nsp$FT1xgAr3sE?5dnQvjK?ox48!=fe?&0A@||6JM3Hj{0jCEn8;wH~7pSBnLT?(*rt3BSx@JAhfXB!or|uh*z79SS z?(@=^Ey;;nVqS${@M%cm0yIp>i_bxno~2H};ToJTar}7w{VO+CmS&0bwm#QNKV%6& zR}vQXX$MV15ObB}KwL^1<}gDDc`Vq>mK=zUyJ2?7EH1Y0xdnsGUPBCyaC5&#Ioc#G z3FcQ9q=+RytbD#WWdv=Zx815l06649SeQj)8a(Hc=xPQ`p5zcC?giRN;L;zIywtC1 z))cz$#EQu^8IMV@0v*O4h%TH_9{-$&YpD3Ltc3hN16ZVabmYT_pK<5}AWhdX6w?lw zQi!?VvSi6ox}n%@0p}(_8W}a_;(UtnC^@-gSul9m%Y41!1lh=osKcj69ifoGK)B6$T6VuabU^s|@4W}hqjrA=pQ;FXfJ;MC^cQ`X37XhQZ7CryM zuCuZjQex;!l{_V%!d=IdK#t%H%hk*@s7xH(+yp!TN1A5hn1zOm1!a#ENl0>h14Vdx zun}N(>H#7p%VAiZ9GM0s+x$>L+;o%%shm_J;$|ktiOu1IC%mu&ne2!l3Retq0R^g0 z61NSIUlIO~3gj#S>KFXONk|cL&d>v={D_l<5NptR>pfN$^vMC*Hk7xR=I`DQk&+(2 z+p16rahE{E6g(v9Jvc=kNnAld)|Wt$v=k%PRA8SYN7&iIub-(En+=7?ZrHifVOI+- z;yfS^AyH$R3o#ggkRy!&XKOPY+HrO#+S}P_U&n28=N<#NYqV5-9!<%Qm2_Q(mYd3jT|)%dFeV8l_-SBOy-a&vqEW*CJiJqWKNVJ^AMU1h0IfB zo-$^9uWxIg^X~IG@80|D{l|IV_pHxa>sc$0=lT7<-|v0j*L_{r{kw8p^$_DKu2mEY zh4HYmqB?~_El;6PmDADUPq>(rit)17`Jj%ohW#05H{)~Wlw-!u4z~8rwpJ!QuIA^Q ztnAN<3-1-)Dad2#?CjtqB_d+??{5&cKW8Drt4h;|4_WD;e9DPJVKOHFP$kI4TTv*- z)DJ7}*F67dsMYqq}0`06Z2F!O>3YjbAWZh>1J5i z)y5YV>rcn4CyrLEx8L_=#&dc&Ff?@Cr^hmL$Gf+m(9~S(*Lne{69WC)YxoF&&$lp>hC+>y=io8Eb-R#@ii6}7AlYTaW2jG z$eqyAI@5oye*--hC$s$m%?gKaZ&)V!KCf6>oG!X`_ijK(hmpp8^XlOB5=N$`ru{or z`UldAl(e+iT!)$h&$i~ituL>rh|bS%Q4Bp{H{I&i-dX+RfKHK5LE_OwO}n{`%o{fb zzjEp^`TX?Q`R}3HD*CDsrJOt-mWK}?rua>_c``*vxs+aFT<_3b!C2tq?(WVZ=d&-r zpx}vCa_9H&f&2YwJLi9mt49iGKQnrFcZX$5j#x@c%JCu}?}_2o^}dV0wo#0#9?Kk0 z)cpFwBE)u3+~SjboI3B8EeEZyUA=lSB4VS@(t=w+U|>mG8@EXQC^dygMrPx}?Bs#6 zON>uyEURByY2?rLC+D0Wi#BK_pJ(9n%RDD9uk78sRl!`lOW)kud_q_EDn7Tkw3K?~ z%9S3?3lpE!@P#w818I!h+4Stf@~K*;71udw+co0Ujd27h!}GO11F6{^Jv}Rem1x-6 z*_SO}-uYgnXqTkx$*WhdQYeofKc;O6fB4o{=#iAmso}zu6ky3|w6(Pr4;?a{n;P3_cv$RrJGi+_IJ^ma@X3veS2|oTH#f!xI(H{SydI2 z(~l1&L(SQY+%Hz7J9aAt1Oyb<*R#30y7o4{*tU9;tl)sgn7yQc0DXby^d&4O_D0P7 z-P%(H6o=uKy$oEt=%%Ko9L}999qlX|Yql?X>m!UyjIC0QQ$PIvp}2*a>B$~Rt_e7+8(kBNli&|FmNJ>k`9KN#^%O2AF z+_*`Jlp3XTwKhqsz(p?d3^5KD4*Xt{+3Us_jjvP@;)=EWoNDT@J zVQ=?cD)^f3{^G$dtJ_~QZSFHG1hwY5(Nj#Ohg&noUc7j*oRV%`$De5TCE-MAZLK

Ot)>^`TIA^*$snu4_pemeR~aW0Q17!G&>IVnr!>_dv&|cj~XG)wjh#E_Kgj+ z7Zr_8O|eNusMaq|HQUo;6NC-&w+AqH)oB~9!w#5qmIjiGH9@xlMG-6XTB6-+bHQa^yn*$`jT_M}1N9~!!*+LF-sHoq@l=K7%$YMLQ-h7_MHeqp zpFkMbMD3>`_k1fXjQzh-8FPQ)nn6=+*pCeSf5h8h?a`RKp2X;7WA z-zp|{LQ+cVD!$U-?*02my{YJMw>9aehsJP~SH67FtjV;|Ha0fqU0j^APc(FqRZG

%f5n2eD5{gW~pW`<|#LzJ4HP`ufI3 z>DM?0ipt7C&oeXE;g0UiV&#q`)LD0qjEKn2j~*E74w1WxQ@$$n$QGpA&69bqLt&Op zFB2Uc9a$p1rykBM&W)6RdwVA^GBR?ohfhG@C_+Y=!dx`}^V-XneD}`@-ImH$R*BEE zve@j47P8uM-P*miy>Rfm+LsoyNmA3vvVAx@KF->j>vCuIbdJM*d_N02`?lt(siEf5 z_wSD~aEQuV>*cwoA}OKNDcCk<9JFmO)X$&kx>nlKa+;h=^`cWR&a9-AR#qO$AO9rH zCnKY8i#?~~ecfQ_D|F`5qaB66eyqQB>lW`G+pjzd2Mz>=hll4lbY9$5aclFT0Hqif zyQY^c>-X6HK6%ImY=wn|)P8S1ecChYq@QUWO5vHE9N3m+-!5D@(?wr{wA$C+TF~aZ z`(y21%PyS4<1^E_A~Cg#eX;lgO3msvDcv0fpjCg|vW;g!4r=X)aD z0*=J3L7C0)k@1)~-$A*1^Cr)S4xrecQG}IyyQ*2q2AmpUM7{ot>S$ zml!ztGEdI0U+hm#GxG*&5DJs97cN*Fd{MQGa_io`r`aOYi>Fl{i0;>%S}LrQxp8TJ zx@ZY&gAAWEs694*4_iukv-pt_hh&%S*W*)i%Mue4sr|kddgn2BsfxPdpnA_ug$5^o z?o&ioIf!E(mvxo03x$E-W=+?ps@6=K29m&(RaEXatzhC7w6(J{f1(`!`3i#L9rbY+ zIqeA5ZTb26iYQX%yo(A?mRm1s`X;0_mAT!R>SGJ1;sl z+5=f%(Oz;ZlkoTBEIdZYQ$9;ZAuO?j!*NC6_Ag`*r8n|l= zi(dNa)9Uy4gbo-PMLFvZ{rDLE^_A1M+369c_3PKS6ng9FdQ1-ohfCOZA&!<)2HOhO zr|M=ZU?I&%#zh+h2fwD%P*G7?Hav?MEbyEfv>M%Ii34n8Z!c2d>gIL`NF+{!t8QVR z+u+v=$igLv=cz*MJjL(&_3JCzwsm~!@tm8oyS`zs`kq3inEl9yZ9a*b{1ZdX+V%BM zkF9S^47+}v;cIhqjCPtqhL1?md`8V=>xiV*Ryny%6oQlrT;kOtD+JQ=sj&iN`Jxyko#lUAzka=fQ+W~4cr>AtXjX~KyZv&yvm{0#*j zlchM|Pu;G5sUGpOX?WHd;nubvs1A9Sik_YxHK!8^AR`PLGKw5qhMH(CE-qenllOpG z_sMA|8TX%m{2U)2dnb?vfOfcfZe}J5n@<~JS1$JP{d@78$J`7e7oV=U7aAHE=`*{M zX2t3NW@_zcr>Q6eWFR>!oI97{Z1*9Mp1m5Uh`%z=p_3ZbVnW&GWqSGre}Dg!R9q;o z=S@{r<#kT+>RcC_uT4z=ryHvFjbC3Yj3mSmq3YJ2OJ#)e%pq}hgX-gbS1N+no0eZ) z$Fa+jdEdT$NiClM^NakG|(`&r`sPPt59^I>ou*{#eeJyE)rF zZEB7yxhpJt^+qX<)4n6Sbx@G2Eq7(?0d7Po(eTjM)LiD~=2no-kl9+baKqG`zrI~> z0O4Ex%E`iV7uSafX?8W4$+fn&wl~AWuTjy^zNuhjV!9k3zYRMd2v~Ny(2Emiw=DMX z9iy{nS9dIhv`6jt=TH4!8+Q~nFw|guZrf0(wn53K2x-I+1Hp8wSFc9l>21x+PI=nT z-Q4f?H;4ep*w9`i7Uvl16bfI{?E0)Jkw3W2Zbt3Y{GtFT8s7 z%iD%}cz&lha`I@SMP%ngi^u!-@2A?T^1z{e$WMByVXkDwCL;iS4k_pDS$55r9z5Xa zW0$4iRG55vwD*Cy#p+Lyav~pTClO4!^7Fe`h^o=yrT4BbE`gbunG->Qo~^CfQb}{}xw*N~$HMnK zkg$JTGHGC7pj43dWQ@uP(GoJKHRdtdZ@jd);4yURhVQY&uW2YOZ=2lEPR-AIp(q>* zo224dVEWw3zgR#{8$zH2k%cCNKX4+i-D0e({QQrH77X?0>?L1KPs=+xin$_ppu8LR ze@RsL1eh>b04%)InsB1C%~Y?oDwEGhYr1DSHo|(a$UTXRhFOL=LiI9AFG0;Zug=A_ z%_0|e#DvHyARiidjU5cQeA#rUDO1w@XKdR>zICN#WnxZ0R0f+e>8s?H@=IjbF|CyL zm`Jd_WQNmH_pG4n{*%zq(3FMoD!KawSy@?1mX_NK7iVoggiEBoGnC~DrtPi_sf>^G zxr$ozL_g1!K>6g8Aw@-U7ybPS{_Ay?DlnXxK8Ku$X86X2y%*JFf4%N)_Na=KW5VGm zcu0$DM#du9v#Z{J$6m#z_g{#AhA9-Dxu!6_ne|D*N5t%YG4!( zCcSyItPVfF9c~o!87pTO^Ii1%lB`|TF?c&2dj+&|P5IcdgLk$b>lhrIuO9JCdo1Hw ziX`B>w2)n*AODhL=!iOPZQyZ{^ET`X;7;?S*X8 zhO6`fYhn(WmRwvu^YgvPt=qRR-n_~D(y~TDcL_DS^zGY~l_5CKm-i%T^3(B7SnIwz zx5Zg1B1H(_g2J5Phx78x&CS9^t^viS40*@U=hqe!Q`5L^r>VhHgU#7hM#Zw;v%4r! zC~_4ef+w$11#v55-$-^vN*ZX*WuTzs%%Dpp9Xz|C7t7e#nDWu12awT6(|i}`Q}wbd zLIt!((XR9E*~5lYCaH?vSmNEhnMR~=hA-D;fEINDHF+aC zUZO z#Wj(_Anb8*VUiBfkFNwT=6 zh6OEJ*}G5y$HA{+A0F-1SS5Ys$`x`&BqQceHQ9_#PL?AOkaq*o1)BmvQ2dZ3{@AuH zMlDkIfwY@&OKWTT*;Xn47aFJv9AYNR415<{7&b^UBkz}ATuwjn>^(lS1e;Y8ca)tF zD?rNo-QD+|J9lohj7g5r7D{sVNm7v(US9iZKYy-C)Yzu-NRkD;a3%h6{PBSkc+sRW z!+csMSW8{q#6j!J$B&Gt(153hPM_YO8Yvr$+*5-4@2QHcTwIuoYm_y;&b52hCf|86 zrRy9Qz_5_D#U7VUxBD{5`{<^k?*hTn?z;(CLR3Pc$i6uB1~6f?V|T^OWP_oYW0zuL z0m2EJspRau>&(Y66rQ6gdfBT1fyE?MudxX+uUT^u-RHL-KTLl8{GKrh*hZ?NQZQE_ za+Yy*)cyp+B3a?nIewcD-RLL{q(^gV6`Cm}DH(;IlbYDk-@gjInVFdx&yF1{5w#hG zj*gCip3(;AJD0yMFGu`v0bSjBCY~gjg=nptJ^k^+htc7@5fe~bz3oMEi(?_a9oWGK zGM=t+@z;G9Cbbd0YO{mAEMh~uziR3oR8*wP)Ybpqx(kV(6#y|hDoUP?SMluGXB^^Z zuA>Su-@AA3cb~+$Q}L0UuR+s~pWok`)W#l;MvF<>>@R7Ch9JET&Hw6&RNsVVk(QC` zkJG152gJn0V8;W1Lj6keih+9>>s{w(91-A5#z1h7A|lFha1W}g-k|Vc1>+jrMmtKP zu~#5hu3??7BqVI_tMANn8EQJ2lamv)UgE4~kLZ!B*RGL@LE+I#(Rq^XgMj1PzFiqC z20bcyO^&0fnEOw&MxnuV+4XJS4E)X0J2ajKMMd#+{`j#KeN6y}0`$b|fWqnqA|oP% z5H*`YZ14yQ36ZvqL=H~wv$h;GD9l{JO604-0bK=v(;5+e{`2R#bfii0MPR@bj~~BD zxQI4=BZZb%1)j4LeS}TG9URU6ruKN>sadxzuOssAqpC(}=RJ}Blr-*cfG!kA_{7Yw zUn_BHI=i~~sA*^po<993$8|_Iv{GW%F3sG$;kNI?3(dJM$w<`U4h{~?T3TA90VN%< z>v}iyrAq?CXLI$idK{uge$QXLUVqw}|=*{=XABtbUex*Zw5Ifr<>DXKSL=Wv{&6ladJS?m`51zo|Tn$8&=fO;Q(2&ReGH= zQ%HPq5O*ySg2U{DmDgy=3Q>S?z&REDH9M+NfLC>V+7XNIGe)Y@d1=Xl^f9JW%H+^P zJ%+r8dd=j&f~h|5Q%cs=;NbGlYI2PB_V%Z9omURjrylU|$ema_AGXVF_%s0arOtR- zu;?SimI7@8=4RHmu5D4~{wvB6Wyvu`FHd{}4ix?JOudY6xzc@R98$MF5 zVFV&SR?@}P=XOv~&>K1iLhQ#%{^cc~uE`Ha{98{?SzB9MhxQI;7M7Yjhv+TW$ykG>JDKKi17{bp(;(u`~4U-QwnEy{X*? zLyajrqq<(+_xGov!RMV2(ujDkeV%0(h=d)h$)Ei9zub6BQ+<2O(LYIM|Kei#K|t`@ z!V@P>0FHYcMaL8?3q+)VsuQ-$iaJ=yA9c4YZ=}$3L!JT_EHmIRO*I0H1@b^nj^qYe zFOCRlH>LCEB~iH;rqona0<@At?}0!<{xCvfQ;)>1b&~?yResG8c@Rx`jPKH-q}R-7 zcasoVvC_l<=2A_2Zq$2V0MowOIQ4K~JpAb9&6@z_xSp$MJ9r>e$jZu6C?X;vr~<~N z0V~yuf`aHs7l~@0pryZM%N7S`XGMV94fw1U0er&F<)({1;kL91DJcw~A#ms)q{s*g zGJthRuIB*1(A!p!x7LRe_44IfB)$47)E2p=d8fGUyZ7!z$HppVk&SCL&!9MubsYrW z1k3-vskOEB#PQ>}G(<2Eo+#4DDNiU0=?SG69cyYTIs89ET8 z}ToI7>u6o<5{ZJhUC$Z+(Prl7xj8`9c0Cx(;jkfYZqbhot?ybB_(upbk*y}i7#gV#W8;Hoq zr(sMXFXjJZU&P!gx*29GQ4pePESWhtg0M1LO%nY4tN25Y*jv>lg{HJ`i}bokS_d3| z*(uAmSBv;*o0c(nL0_5jCU zi+p`UUmSIVmUST}W;3Fq6Giu%0a0mpaEwnkn~BSto106XALT*iGaDiedbYh`W`**R zBMQ(W;znsGRWj2XR&(zQg1A9LA$S=F3q`c2W}O;2En$I)Z8Jz()qTzmZf^H8LCnTP zMRgQ5+hpp-#>E+fh0B;SF1v(wg`RW=JORb%2F99f(#Wlc-Z%*b5a6ND*Cz zz$E@~xeW;7_!73?{nuCCtk?OOm+*r_?RkD^c!1W#owgC0@}lJq(BxBR)`ywDq= zH*Zo79Xez+hN2hc&CACZ-Bg0xd-m)ZFnUAy&<;VtFX8KQ?=w@sefu`(`t<|^hGThWnRWdT- zLBC->)YsPsOsEmsda}q<7fWZx|M5#6TkwB*#lW` zDdd)pA3rV?de51(<-6}Ll*7JiN_&07bs-?Vy&infH zYaU_Y)t8wzDWOGX;pV>a=+OrM1yn3#W>S^d0hmG3KmTHZ{wzb*cYbWehmRl4y|L2D zjhYH{#z_)Pt5++b_@+bKP5jv2z6k*fX)3me;38zy@CGHiI# zZY6c~HMwph239kPz2dfCpYxZgSy}CXxN1g^FRjzxN-xhYVi2Gj?~lk%K}p}ZeEIU} zR)gd^J|Q7SNV3TgG-?@Euv#$s9N^!4!wW> zGU$;vSx1p09UMfdsj2xlO5;|rTnW$#f`@6y!pZ*EYjWs~IVc>R5M3*XXcaTb3NSTf zR@+07Nt_|oH@YcaYaa>3pf{Qvq%12t{*Qf8;r6>Hc#2w1P7bX4GFn=bk+y=+sbCmR zj>OgK>1JAQ8F2glZXIMfD);e6BA?6P1Vvp=j4RO`XYkisQ0^f@2WvMJlg9-8@|V`^zEH(=_ci?(9Cu? z4zb}QWjaYnexOAQ2GWB8qINZ?^2;9MsH89L+quEVgf`7DEa0sdVDXqcTjVmJ1>Km^ z*w~0Jm5Ks6mSL0EDAnTpxEdO7+2j8afpc3jkmQ|z{nwn`1Vi=R9U7xU+3kc(B3=nd z#(pS|G1mnDPVfo`ZlVycl7fPQsB!5sfbY#bJQQ@hmsR7%bCpOGhF@PTZ(sOpE2-q@ zLZd-&K9F?e+b1If4c5K`&Z0W*p1)JZK^bLC0WzARBBIoT)-4AJ7^^w7$rv@`&oT}Z zL?h{bTpDN$X}&-$M4{nBHG6U9V=21&nkNTVytHd(L-|b@ez+U5zNb(?wT<{6Ted6* ze}!XxGxPra`!OI&bqc-q0%D8VeicOz^H%jms{k)A74lkb*U*suU($m|2gP}yUdU^* zPWyac+_T z8%PqwP+(aT9ry*hIDIS1{jE#>cEu6ZihldnY2JP=zaFx7y_pTKAZn9ox zZtw-nTr%(8zc)VPx~(0giQ(wP#25Ob>)^sNGB-c@-E$3Ts*EQXjsxsC^ncj_0JxU0 zt}uLTr9kfU`=dMVCmkP&(YX$yVvkri;~Z904czo5ALZ7;>l+&mB`AJSZ= zF1Q=-!-z|k8dD2rN`Fnv<%yO@tBC4Tv^Xg;(Oc7fiw$5}QIqxhT2>m3V(!j>~3|u~u3U1^%)Ffl1gHC@Iu1LtTd^2@&!qjqjWk`L6B> zdG;ZqgFzw>$t7K5bY<6tG+kMrdD8w*6dDcyw|VqNlZ0+vOCz%~7sq417miAZc;dYoEg2K2F;e=7pJRKMOnbH z1mVaI*2U<|8Crl67!&i%b*1P9|c76Rmfu$Kj;n+ zZGTxNZzc(Y3Lm$8*)npk#2HAWBoLeM$B>Q{p2cTZxJd8Gy(5OE8YhJSXG$>y$FZ(! zBGFL2Q3|;sd(6C{|9u-@G^4xrJiF}BSGxsd;8C}Wb*}$`hfvX9jkP=JFXmemoJ-pa zhU5A3=fxo5d2QSr9TUYnF9SO3zbY2-W$$eMuy6jS8;kRIriCS6Ma`p}E~`F3R08Ya zRnNpyC)K|2r2NrP1OtTVBeLb6G^;(=Ru zHL_UUl_|3NZ7CCF?b`O&HijEY-Fz*NX>zhWyjsJ)Uj$Midi&9zKQCg9h(%+7X`|G{ zo2#4oJalr;UD(?kQda#|m`Z-R(Wj~I-kMsPA7}Q3s>QCwalsOCZTAmKI5SvP7%MUE zBta#w=fyhrLWDQ=a6zRu2g}2|HB^@9`FEdK!Ez#ad%mvwjQdR=MNJ6~Z$@U86L#wD z2IIJQbRqKfYRw&*Qf9gwuui7~19K5{w2bVyQbi6ZRjHyI-l##q#iG z4OMTrec`QwLYDzHBI7}ZF@-7{k|N3%eW;{GCGeED9=YTF%iky*FJF51vA_8?yFBZT z51ZZB?4qO?7ICA75)aVa_DkYBWLJhI40nPL=$8deB$q*%fcOyuz*ajQPZWy1>wbKcE;FNKQ5cv95m)$<^}M{&YbK{6HlTZ9s=WQ1OW^$2noCTNXou6sE~| zcR%8HVuoXZ=k>{CtxjlO^)^dzkETNkKBT@DZ#{8rrxXQ#I!^}|mpYp?$SOaoBA1k} z;*j$t9mW>H@K{<^rIyx;78+DcNbGj}Bs$E@nUaJz-{6)>YVd(|_e*kR^HCi6-nYml zTzhOewyHiN7Q+MapZCXqCY&#pI3_6W(E$VxS12QZ+EBqX^GVv{)?+j!&GIq2W2h9p&N^P%=9}y41ACsoH zN_I|A7Q7oYmkzi91%{y#9Th1J-!WubO z()g~cfKrG3D52kADwjW9}$aC)owx(#sUyKFl==tF#8e{)JZ8V1qEujOykO% zoAa9H$cf1esiE2q$`0~Hjm=11GP}=g-?kX~oH?7_e$wqiuJ+-)u%W_x3p^8$vHX7^iDXSc1NP$j{DHx?`qnny#tuveE1L$A8(OqgIpe+`>~fZia2wj|IEyd6pg_|L_;AOGd#@% z&?yD4`{x>-iHb)sco3)jE}MD=Byu8m>=YLtgH?nxFPZG&A{f6-7F?n)A4{5 zY@Bv*$%Nq92aos+3UoDGsitnBSf~Dk6+Wwv2NEAUjGUmZN&$JIKKF5-wA)(NLkx*^ zvK8|SduF+!R>4GCik<9$0Ulh+TEc$9n$PVlNn*aTB0<9l0PDb!BdcJiAvKMGbLTQJ z>!o1*f^#Wh__jB9lVoVVfDRx+3JeE?v4`)?@cRoF{50sXX5C||+0M(GL?Htc8JuZy z9S0Uf@u}7jXB57#tG_N8IES>3vI`R$aGgHcO16nFiKT9}JTEV=PP)nRm%2g*-jgTM zk5d6Z$wTr6+vEuANm$CC1>8NMxdRH~6>x^ckd7tVAHps39H;-#smQ~ld(Pu_ntIY( zXqhNP42PIbJmp@$zTamOCt0z^biA5LA?2FnpGa$87AN6#qML%K$W5%h{QvAr?;CLtXPws{N) zA>3a*b@RuXSaaT?jec<%kb~rI=o4O2_q7?--&0zoy}Z24hF~mlaB?a`v#{w@0f{I#od^BF zA_n<%5vnU-MLPvPMtaq%Rl|7&VESNEsZTQ$v019}UG&GH0C?5PyNAvZ0?m;$P!&l9 zLcarHYZ0Apocdb>dftg91}Hlh;O`xz?ty@3@;ZoxoW0Zf`pNnS{yJqg6VN_nn!)*Z zMUJ8{M z=s<^H+=?SK&BQ=M+P~2>ac;qCNbIA?TJM{PYJuzI6BA3XJ#K1lzFa^cc|{8=2^~Ag zG87&dZeS!K!G(#0dm@H}c!Q8elOryE=RdPHW@@-SLeMsEV`$tHlj~~e^ zD3p3taGZ_&udX9`9lXYnHADL z-ULpQ+`AW9M!`O!pg@MyWC(?Xqe1AELqFl)XoEN;_In~foBo;FSRhKg`a4Xku6@D7 zhnx^rS5aWM-blnMxR;!Vn(8{v|H-;~Jm2^`wN-*X{r8Z*pBWDhgg~&f;kRyGNJ$aA zzEL_$cVtU|yp>fSO~VnSHJ58|&-}g6b+YFFKM5iaZ{GBSZD*HN?J@{hgpL9THFFC? zRNnfhbdYf@^?8sljwp{hdU`Pjud%_lC?X2`Em!*{l3O;a%x;Lso(b3Z% zFfeHQm#h%9b1)=;bm&IAQ4Tj_@=5K(hgYH>0LWxmvEp+75Ccxl!@Hm!j_sk*)Y7^H zf|O5CP^F%mjqMV$!AYCzsHnKDf?DXI5@%bA;qvV0>Qc9gBn9^F-T!S3z)08MD$l6@ z+U7;Xs)?9}pTaZF`=1d@EdFA9O*enfX1Ocu$lPBGCPTA?wVO)+$ASq224(`sF{ee0 zpPG9gV^kJ|MypG2K}x}TvAakE* zg7*(2M(}hVadWZC)Ge@}Mv^2Z3rx`LlKcVB&T;G?7Wi$G#mBHfOa?c+@gZq)$mIWT zIYm%Kzxa+a9TjE!jvZo-TL>Q1U+0@Guy{i|5auetB5Dfe@Q$4(G8E?kdJI@;QZho0 zXl~oOLk0Z5IM! zbf*hE*1<0=DxN&czv&Z&6MW(&kQ-sS*SFok?{xX*aIg5wFLXHic%4rf$!Y(fw%lOZ zFx|66h>n;=Wkz`xLSwWW2kidwynAM~>s)-v7J(Q2$%9^3Ls` z|AFYm#KhE_(O46sLlo#Fj6Gd~2kM@%zI86&0OdgCFHAHCK`Xj{pB)~#YWNLdG*}te zKmk+_2Rwo)%Ln6%mAa;OZjef_KP3?Fp$(T|iK&PfN2DDX+rBh_V}S!4MwiIMcd2#! z()AOToKBCfUAqSU9afWxnN=y~9Dd3fm`2zjPc>X z)Zcch^DLU9QfN3=(fF5uzm!{;U?nDc%z$$}665opmi`2XHfdTaZfvB-v^;dK7>qj7 zF)>Ntq8eH&lVL)_E$+vV#&eHJ>wV)N#3>YLbInS0Pa2e_wd>Mv1PAXnHg`Gd5;Bgu zCkjmtvW+Qv@Tc1|>y0RUlw#8ra;Gg01I{T;zbo5_n|X3HrHTr>IS2TSfPNN zUyErmjeZRsodgL1#frQ+9gUA-&X4R{a|1DjO{|aLce)0vyhqn^P*I2xejcV`kYRW4 z0W30XlHGuShJ2X=Ui`y{Z%bzIU7Rp5I2)a06FIlh0t8)R8hlcaPl(*MLDGQ&`;xfv z_e?#Q1b}*};P-`xJsW-6yU z?}UiUFQYuq&ORhB@0aos4*?)UL`|5C9z(m%3OhB4eI+GYazH>aghhYTO_Zwq1vdLq zc=&ou;ssy?W+S$U(2;}D`bkZC_631xvx&_f)48u9yqr*1zli@kT%{i-=PL^|dwg`0$1Z{n`xYE48xw`kQ%ZS}5@{;1ggDa6hiOU`3xBSp1?>``A9@944=rH1_ zs-m(MeC1l4J#vL6#Q6U5 zHvIDsANk$MhpvLRX15EcVkRw+Jyp)Qc~n~AEI$^184RH3&!7K2jR;(-qTj^YeVC3D z6-5chO+}aHgnsb|^=1SDW}qD*Nv9YTa6uxgfL=$Q5`YDN`y~$g;kicuS*Kr}TMm1v zIGNvue$9xaL1uyoi;N&qUVuqU%xC^Q1}8RwOE{05<_PtnQD>xJnj2XHMLY!F6%%+_ zGB~e2)6e7Ex>YF)MRyr_Hbetr=sao+8U2R@$tNkvfyvlg_3@aq1kr(0Bm>^c*uvs6 zCMotO*S)Ja1}z*(AA<989PWI$0hETWrITq@UteEXFo?qza>lLt2pA+#5%scc_k$3{ z^D~I&x&PzyRGoBcUR&h`s@2-{7M zQ&$G+wxh3a2R&8YZZ!RP=*m(FFp9%O?+FI(eH>)47Wp$)mOyrB=0QLXT|_vAnTK$D zRtp%t6hp-!jz1hWY245y?1Pr4;q^>-K!|-Bjk6@?p&=QC5C$D_o8bd<;CVcP1VYM{ z_{Scr$N=iu_CiL=4B_5-do4+RMxG(|4baBh?4-eTt32Gf9MaE4t>i$hwKag*fn zAQ<5#S4$qaQg)fChk^Tqxyqb=isD4<*Oyjzpm(u>qk?bgFa*nEWcW_E2nPx#P97Lf zcQikPu1?0X*cIr=+!h>4<^up{2eGtq4XANs5+_3zIgAw}0-!y z2l90RGfVb^2AFYtxd@h$+7Anb$60)m@R`OG$es#78iD{OJdo8gy4~IEU>~_}V3_B6 z3`{NN>$Un8nRjqn_yy4wN7(oQpQhV2@0l2EWW|Zy5BL$MAqX$}Yi!!_Gy@r{3Oo>D zHOe?qFsLY)R+GoWBWznd4cn{mtP9WroX~i zhunR?Vk_TX0$10-?hou01-j|MUZ<_->n;P|^1>(#00axiUHxksPrA55`!OEk&#X}I zij2@#g#1PfBWNv&pRK;?(W+t}IZn=^)38xC_b4LI3f1%Qw`mY2;wGe) zXZ5llAIq$-si`5;bik%iP)`2N5>UrarFti-xVr8EMP_b?X1W~eD;eE`)t_hIz76DI z84ev3g5@v>%Cph7T6AgcG^3oc2AC`hMkD6iogiftM$kZ-Q3Iiahjc#faXC}1%zwDJTGfBIVG562qZ~e{ Ls`yO7=;HqZQMqKs literal 0 HcmV?d00001 diff --git a/results/plots/best_loss.png b/results/plots/best_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..b7ae2425ffaaff8826b86a13d55d39f00f3537dc GIT binary patch literal 26093 zcmb@u1z1=8wl(?}>5@i}1_41rkS>uwl1hg(f+!%}jdX~F0)ik)NyjTG4F(dTNF$AO zDxG&O+~4`m+2`Ev-upb)XK#IVvFaCdjycAdD^mTA0ueqfJ_>~*Qc^@~qEHx0DAc)+ zxY+P7bF_BI0Wnv3eOE09D_0M5=liJJ=B|#999$n+-(zyW@9bjjU@yok#w&1@>7lEu zql-8npWT1_0I!4d13u27Y)!~N|~ z=VQC-Z*1Nqqf{r6t#xqs4oyDv-i*nDc+S8^`?aJZ+dK3sOU-&b0KH6oKUC<1;} zv}f}M!k>A(_Y_&+SMOuyNcgKFP8b{VSNe+y$WOVN{XZO3!VOZ-zbq+v6uBc9aN-Mp z%W-RKYb(qN;~3r@C^oe9o6r`rx3~9PtsKnem3#YA#Nj3hW|3)=$&#k4H~gcSy}gNf zMsE}O`=n^6{~4X!PFZ^4?jA+*uu+88i)$H!)$5KH0Ny zT^=BckB@)&v&JcAu-5O;UEjb!*Lp1AWFO~}h&}pnXZ3rjsm!C1iu}~VtPU7TCD+AX z`DHh_C-bu}9ThTM92_{gxw(nsYCm#sf3L9q!PWoJ+IoM;JMUn@$KKY!xG-F~;^Ft{ z@&G;=xF2Vt^2*OzEpqLnqoZ$`@&s26s|Zz+uI6uwOSmqm|8lGISbyrXyLJxsHBGE7 zj*XePS|h#x-2*jGAD@|p?^(EngdzF)SH?YOh)~T*Jcc9`6qpJM3NK!~XzuJJ5Pa|z z?e1PRAnZz1U0YlGCgAjBsLq4W*U!)V-n}5*VuPu}^`^n4x3)FKuKJT10VlFe{>Lyu z#MIQ(sxO6!_V@PM5e z|68M<&(YpYtbt#jjSv-|389*r8a&2pjjUT6o0~1krvCYhF6-k`_1go6j!sUGHS^Ih zX5TAq_2@($UmGm>YJ1IBx#~Y0{^!1aqp#S{u`zRd`=^0{fdX<|2Wim-1LsjRG&C=y z{lt+&j~)>|cKtgfruH9v9Z@{}3Bg+Y`CCxLmjH z#q5#%L->?lLN|9kUBZLkvYkX)*$RG;k&|Du>ErBraqWDD z|3Us7W9*9;7q^xNFKFeeK2AxYDKTk?x~!5k*_W?%KWxQSEZ{^k_x8)M6u}3rD~n!#xo#a$PkovNQu($mK@H{U=;ms_tSq_~*J|78C`{mzJOjn6Kx zRo9DiDE;@YSLS*$Vq>KbaL4LA6wJ+;kteLExZYiLE0!U~W&Z2bGc{@1&Lp1rq@*zT zE3=SL<;apBtOY7mZrvgjsw-WB`uJeGE#{Jl+5P)Di+U+8n{3;&v$L-i zVi^<_6`NOv-%ZWT1gj+TPR+#1+|t&jAf^*l|0U*dP!n(}jWQdr_i}V}yrHX01yk|# zm6Xr)WJ~bE@H-9^lZXhl-@#T3tPBz|vLM(+_XbOh#RJyu$LsN66%$?(CKMJHR#a8} zu+(4pz1%{ zr1{GiJeb+y_>7MqBFV2k8sdMEn##h&go0Q8_(M&m*z5FQd%3;4J2W;{Eyc~x*O!Ef zDy+U)zz)>+`N~NUvBoIy5wNI-XSl{%QZu zywH00OA$5St-+uD{W{wD+SA2`)z|GHY-Gm7#O&>iI@pd7GfH2@Ai2OOU8KW-+Jvat zKVk(D0}X@Jz3kAqe+!doZTOu+;l!vz6AuPSz}{3ier&$$MAPw(VXa>n(Is}SMCEI| zymtzvL`C(l3kgyF=FA#>C)L#JN`b144GVNp`S_)* zQ1IYZ-ko<c$|oW!-&el{(0k zh|gnKRn&J%j~^6x?mYIx%Kg2)hs8d#?W7R1o~K-w_TAg~Y~S<`F}-*QJaF5`kMf1b zm4ha7ckVp3a`uIBcRV@rs{iv8ZZFdkkqEvQ9GrU;XRKwFpwyjz@gl+ypSC%#xCluhlu>e~_%6 zuB<#t4+uZlT`%1pfuwiiORaZBLxJ91uHH9tI7Ry980T?_(be{28-N66VEaM1?lThd z)0fxVF8lYvZ7O!wmzJU+w9!fX){N}P_J=b^E?rkp_$**| z_wL<=f#ORiCx;GKP3kXBO-+gV9bB)n8^wNY**@j;$j+{_;!0wNaU zlUH+YHa1)!U#f)w%xhGmKDT%E>Q$yF7RYd0<7zrrO&jUR*;Q}A@*}0Cje=R5hP1T+ z`);5oyM96f2Pa&>4pvh$?Ccn)nNQ{bqD0-6DDGrP$9#&pw0z`aW?_-FJu);@^K+-* z=%_FglHbBalMLMJb$;!Zhr*!xCL{}HTnt(v8}0nn8599%x!G&B_t$}sfJu}!^nuveP!q|Bs~@} zF&Y3*{j~uc92|gJ4CZ}ezdrb$BRuzeK~7H!LOB1mYgYvGPp5V}wP?o~($OY~X4`*y zWU?OgywY#*uGIChvdSG;?l>9Gf#?e3_Cmyg!Rl*TzI9Je&tO-807WJz#@@uf?RdQy zq-+vON}XH>NIkGVxuh8Lw{*|p;o;pc8PZKG2{@mSkl=a}dr2u)FQI+%`H6+Tx7(Y( zfRjIrS317uL)d7CcmsK$jL33N%!d<7N zMvtjhtl}Md3HJr#a32nH3a<%wJ5+;GzXp=sGMY5lRQiU77Ds!V$Us84Ds!5-l_nElvebM1(-WFtfRh*i zGFzuVM#J8EEakIf25S(-bxLV!>_LXDd~>MY>ra`>{GF|(egXueJbLs9xx~syB>`mq zeqVw|@Ms%5J7G;tGPEN0PYkQ=%|Av_I=Z<5u;Gb&_39~90OlVeNFVkWP{QVLJlL{c z8LQJPz3}|tVaM|;xI{$qyK7^cCx`28pFiW+RMI**ISn-h$RIEZV%M@!wZ-SB8(Ujv zfKOW+)7IAZ(L^Kt=UB$Ux~GuFEWT&TBk$(DweT2lz*VCflC`xpPA;zAEg#svn;)r7 zC;RnGKXPRF22t@Cn8D3bC+akll9Cq28ziJpk6ea^hg-V4i4>mFwY+y*=8~e&8Hi)S&H-aF4b;jVv=Ot1xO) zUs8x(k4sBS`+l9{+z&tg_D1f7z7eQKP$qpf`~u^x zo>x*%(;x7VolVYh2W@Z9TVG$l(RM-R;aEnFUa1L8&PR_4zwjqd2t`FjCx;(oe1yzv zZfS{`s2mEDGXoV~D`ZUo;t^KesZ(%fpBQr$^EE-iwsMQl8*qYocdAg0(+oC(uVL#z z_j!wcY3@Yu(?*Zeh=2BI#=RF;YSx6p>z~@md*&Jfmr*OFUV|^xh`r}*pJap z_#g5hh+fF+XI~!$63QU`UvrsL?Oq?7>wZak@nT+Ox2gX=7Q|Rs*MjPR3QLUZSgu}G zT^}h>fm+kV>sO+{DijAbUVqqqeSORBcT)Ktu2#2qbOgo35WVt0us+)Qp|&too;4s| zY*2aA(^FLXXhV*WLGmJgC5;+U*2DN7hYZULp zeK)@=Ff#MBIq`hZj6NT(&h-PSi|3)j^<*Zogg!dKLSv55^t#9yTNh;PpYmcHed{@xc zwMHiMjd=QZ-S+2AlWnu!2e;XwwH>xshRI0;RE3ys)q!uuJCs0+R1&m7Q%li(P(xK1}+pP(Mj2DRXmk z4HZhA()UMyVxyu8He}nWsHk8=7k!NS-_(f6GlUfBeN0|ms93B!18WO_2>*~$A{1mq zF`Q8lszaVV`(fd*(ix{M@3HnXzv{t02`Q<@YE*1&z{;xr#e{-9SC;#}zLHmYcrKTO zL3O|dz~($JFK^<(O-wmW%0%Mwy9NdVY@{$=FFI5c<9(-h3Umr5*Vc#u2g%6D&`G!# z{cJ34*rCOg<$MQ3!mUEnhYvZCqBch}66!_^2*+v_rtqLPo@_))2wni>%>Upk9X#c0 zhAlN&c~q}UF!Ayd&wP%>Q5X2&cDQ5rXsArqw!v+A;0=_Jknnilb$@xrkp*Z+ z{~t>{6W>kwqs`BZqK=cOr;IWti^pVeWhL5#FkI)EZb9Pl=iLovEB@oiA zKZF0Y^dy{?R)YUPyU{Q@8h*C?MLi|!eop(A^9$O}Hv$QAygo-fq%fX&-q+aruPJ1% zn@{u2SByNzr%`?Ki(e=~?!FP(E7d2n{MU4w`xJ@EX9AMLgZfVoo>y_3-Mbnb8J{`Z z5I%4%;ZG0?De3pDiwR3F?rDTX#?#e?eoU=8ViEcs-KQvL9<%a8kynzeh=TuGLQ9{% z2D_tmDBq02;CK4o=lFMX>`9c-XzgTvcm%!*msWz+wAZ@P?R=W7Z(rlWh1z*IZ`qq) zb3QqCu$TRkpr!xhXAsLQRbIT7en@?g!ogA9opILXISLBdNm`YR6ykTQT-!R zD%@x(+M=iqOx|MYSj@8en?mrBHS^7M`8qLRLDY_C(6|1JrVI(mOSvmzuE-L6`57L+5X^Fom4A$U zxuuEzzDr$HMK~AZQgXargoJgiqGw$6hb>78ihRv}?y84+PaBS)qohWTdJFo26ytSG}wBJ`sTTh2W5lq~Nw<17>WLB;@o_rp>3%3EHHLDmVLcUMt3Y*fP598FeZm znHW}3N3$P07FMhIhUUHPfqEcU1K0ignu2n#(CDyR89fdrN-PO4%5P%p84A{nq%Qi( zH053N=>6@Aofm>rTa%m;TXe)?bi3n2f3|A+XqozziV|rlagKuAxfqE00+1!e@8rj| zjeR9tbAKPUeHPjaCCg4 z_!^^XmOCp$iSFCjFqLLG|5hNOEh|z?GWEr>uMDi_YDPS@3#N(Qaas`-0m;$TZsDAl z1TAVLC7(ag5*I1RzI|U?aQ6=jO9B~;qSOPMq8kK01Qlh~-}e1={o@DF&a3>1(OW-n z{_NpxK3>5{Q&SwQC6aK-v0C{0aqf+|lr6cakbrpv3rp~aICNig3OD7$UuKcI#P2!y zt!620Ftm^4+PpV+RZdxpl}=g?!P6O{RBpCzB(B0=G(Bo6ey4WCDg>F1k$_}ogO zA@qIz&r9a~D9|k!)F|X7+pk=8*X*-r_woG*Ab{&4SAyuOBQmi>$C?;}d zINUI2tqo)}T@Sf&p>9N+j-|jtlp{Y%lq0fzBXM_Qs?$TjQRoV}``o7?vHVML9nEBA zbbp8oXgyplioC(mF;|Qpsgj}$Z~K^-Uo$aq!_XrW+*sqAFIz0;ttUFCasEToHX0qP zPV=SQJDcySHhx;|HY#Jamdi~E&2|ovj_7}`bZrEbR-8u_N%)_y<7HDeA|>q?hC`Xo zn3Yb=dko#@P=ZyjK5zTnCDTKrD-n>Ehw&rWAhT0TxSf~tQ%FV+%iGtAO9>HqNiadA zIyab;&3|nWKfL$mLuK9F7Y#i4N1FT^ZOw6L^w!1OSGD)NL-=L|&NV5FxAX*jRx&0d z?Z-gwr@Sz0AAGl^7=z6x7Ze_94w+w>Yd%pwH8kp3)RNMch9l|Fk*>QDMElWGvb z)UxzyapS0j5ZVx+_I{fYwq`xMTmo?kLynA8gR;Kw&6Qj3_ShPjxsM{zt-*BpDoC7x zSJ0Mcyzr1HBUe|k^U8-0G4pYz+E-yTwPV;WC>81!2i4U{Xme459OMYJJpSSQ{JaGe zAW*@E0-y!pzInVmVLQGKG%tb+B~Oq3q)Pvp@8SE?7#u7YL(fXuucjPIS9t3nGd`d6 zD@V4)=2vdbWHh>8k@?rx>PN#tSG01?`U`XxMk={a{$!joiHcrK<}v3r2fP=S=( zx=3{oJd4oHn>RbQCP12c3fO39NFNkH6zb>CpA`TAl1J*j1XXX}?hIiIz1dQzfY#>m z_7-l*eqdnstL0+X2o)Bef{LfOzM?S|sTA{y5)|EI|SX012hz|~z zTi^hgDu)72R6-)_erFOQB{{pebdL2vVaJb9@+m3Sm7hO+K*)Ni*^v_Po`nT>k%){8 zBS=pM1f&}IUm6Tw8xly&QYA^^iU^jf!=hclQ=&?M5?bBJh|tH!r`R2SW4nBL(TD@M zS>Odee~hLs2O$xmrvQ{A0)X3&4A5%4CiQu>nmPpl;D*Y~&jTeIUTHhXa^(u%J7(Z74wnhNwdsn{10OMGOJa~adgets28cVi8_3v9P8Mcn*+PdqcZh|7)tci<;02!P;b;}ly?Y%^ zE+v}zAvrlY12@rIdBojC*Qx_-QOys0N~X8E50PM@bNQ~KfUs~_Mh0VRYiq?wTWc!` z)YdNPP)tulRR+~^Z-sH4JCQ&N;q8~g{Y!^Xm9oHm0g2@OhzIn`8s}g5boBIvj%;Z3 z$Sn`2kI}2?>e;+rBx?;_zQ|i}#ktnl0ovY!;(r6S)=Wntx992cE-5v2_~y*#^PDu( z-ay<49V{&k){kK0{zHiwd2FR&%$ z_denyARQEi&rq9{gQ`*U{yhc?syKMCw~G$IdqYKNe6B70LipRa9H6XYT3A>B)vWo>6lU+fO;PibhCjbx(oE@)_L1q3 z5DW*46dp^Hzs6-)?`8k|bJN>fe4c|;qt@M|CfFk&5``-A>u2n#8jzY#h_DHl&X`CD zsEdinlU`n*2^p+=e-4#5b8HOe@Ql%*5 zyd>A{vbG8)HB|FY8Q!VsRo6&vmdq$5bM$B8Nu_3a6tZ)2YR;rXXnoa&Gey>~LkOiw zr6N0dnXjTx&Cm;`HGksVV=BDLRF8Yi1<_~sOG(Tp)s5W>F25OeUN2#D_4_gsGsTgs zDm9M|;}e)ZJde_^;-lp#d8-H+P6~MxMHV@U_mh-Ox$4o)J}+6~p18Pd^lG;+;*_U=}D zO?o5ePy_T$kH<73k&S>tl1e&B-=}jkKG$(jz8j-nMor0=o#fGgztGwWsuye`dDVuQ z>Rw+Y_m_@N3u1xEwE2d-+7fQ4QCMP|#OugVvcWYg^PMWr2o{IyN^F!!@)OUF48=E% z-3T>epWJPi*xPrI#!Xh_LPpF@mBZsw7rs3fN`G%cCE|(Nvnr-c?6WB#rai2X2x_2L z*S~si`jbmdhA|VoI1w@Ol=N)47)iuCkL%KD?BD*n*O8=1@dsYK^6bQ?|ESZLZ*!XF zJ+Tz^US(($r(i`+#6LT6preH>qp^<)wE=-WM1e!ah*YDk1$n>EM8EB#;?rNsQarw5 z$P&N7qyQ_0784og^BbFi0~^*hp%rO2ReyRC@5_ILnbTfJk~a-1=jKy~TYLXC6aMQM zF7+iX-)j|8nCJEuXL*hooDAWbOG0PY?CpGq$1e2)1I2G^NRjzW01|uN3F4a(;D6>* zeWRMny`AT(7m2-<$s0fe{VK?CVc+5<-}qHV?D&r+v+(6hjA15|!H$tO$jrr``{$bA zClOXZb2jQS2`;?lrxS7S*)qLBSc+)B;r7rfruNwIJW6`hh4QV#cybL(=0PZOC@^Mi zI*NKk?2V%FZ>FC(>;Vt)$0CDIRCutL(c^+D~xyEMHag z|NG`Hpgvm<=VICC^aX}+P-Wsr;;pTjYF#!SL!q<+Vm=i2J+dZX z5pYBzCAb|Huf!$FSL8c3KCz zE+&%;@~_9L!24O2{U5k2quOOc8Uak43k)A1X_J5h0iymJAd?OJfM<(;@glDh6myOd zKGOI%0GSaK8Hd-LB)GY^?OG2rMZ+fhe8Xf1L?uKu*DW9g zBI5jqPoG*peY!O-JXmTP3vh{vho^XCr%>hAE$pJAB2W>CRa82ai{;STFB`>SuzcnpW*3CXG=k) zK;j{T^TR(T#Go7Es!q}@c{P^oH*X%0i>fq1D+yvU15eLFUp_fpJp5N@vIST+QVW%g z-@CSeHt?;)SOW1^HJu!A8&o}t1Sd^F$01fAXw=M7QpWE-e0F(~5wRrAt;Gt#5yv8w z5T}G~W;AOshc~j_Opz*KWo3maxwF%#LFvvNUYEqT&$F^v)6&z;?{7Cbx$*JwMPx>V zgkYvGw_6Iht_*3oT+Zh@#6sn0;;E>p0Cm_-;wfQhK1E=MLUr!Q&wSXXfXpF4m=MYa zd2w%lpBd=+LMhOL0e^`PE&@IVMJ>3f=<4lJ0A@_jzI>~&)=FQ7yR^3}ii(TtdNlzL zVE<5onsCP!zY@2;`U-~9qF1bGWZvUv|G%ILsAQz#suKrf0NIe}-Jdgz*0b=22+wvy<6%T1}k7eiP-kWL-1>;Icb+w55`Z$rEo*wGP zy?dDh4bffQ-K5|#QF_LXAP4|07X>VFLCh~&dTz!ZU{DbZ0P2P#5bPiw98_*21ZhRJtQdI)aI?_?=>$JU^hz&zszb*uVn)ANvOe{lkozECgq z@rXE-g&M2ZA`T&V#a|B^5uIFRP&61!R3;%?$#AMjmZ@P7CR(B5owbXP&P755dcD7kN4R4GvV7V@*OFm zBWTO+tO>suuBJqpO$G2V0}U|}6tRRP=}c^UBN2}+yO}^B^T@-nX zl8t?CX~$~8w$%zF)nT&8dl2*@ATURu2oeD;{0{9<49gl^n<|vM29b+R49OiBs0!z@ z4{!M@u~5EV{Mb|9zd-(G<3SGfUYs4JV3iOF=ETsdLbFWCK828^6oy=|!dLROkZ-C1 zGtXNRcn~>%jdJ8z(AhCgN@rieR_Ykl7)NPl`e+Z6c!)7dNE*3n-_3}eh-h`G9KR<2AyI{GY)Em0%GRX_&u26?;Ulxw^kR%YXj$=SS-c?JlM&`1|5oj6koQx?b+aIpUks=$!`g`>0p zN1>Q&Lr^oizqO+ecjD>q{|ZO9&}UOL@;^keg7-FfO4ioa{pB?9DmY>8t}SN&21|7^ zJroT<8>o6ToarS6bw1)}93G~b@5v|~n2v~O-#`#6pLbj5cd{#Yu{?K^T{vfh9lxL! z=@Ss1p!wrREEHl(gSrEZZa7ds?o9??C}_9=ut<{q(_cWF^Pd21{P-5J637gxaTOrh zBZ(W5AXr+k!BAqniUqjH-1(hg+bNMX4octS2-{uZGOW>fe>iJkU?3uvuQqE5GV=Gg zYK*8r&L~daITxRZh=qWEj~)pCm^yw)un~dj4zaJi>yeHIkpmM4=RAsql@*)>8h`J0 z$ka}rK4uqZto{T~CPV&?Y&`51ov3l<#c009=DPL9Xpq%dApw934W%45HSRS}-$0;W zX+sH~Q}DP>&d-N~YNIm*nhvAOT-W2mLhk3vms3R?o`N1^^5+DYk?+NZBH(do0|5*% z73OQbt*E+o=9GS2399viu>|avS2uav^jO9J9g@3c+?=2UR|G$tDX|J=;)qwCi5ZQF7ZJc2-|Dp4bdP4wY0UJ|MKMvC)dHmX<|rtxCK~P zals(0oYUHyqe2R@J9s2ohKKL|CgxP%RG;uH8HCYKEMO%I?NZyOw> zF4QX>sPOXgGM{XIjEjeNz3~O0fqaPmWVHRadEM%qN3=xJB06qN(GC}FKgUDukP9TGL6(-2aIK7wzYpW;8U{- z?#a-|NIcYJY12t7VqD5mNp1zt15Aj2PPLZ0dLVctSvWZr3#GuG!f_|8#dSmw;0TL&Syu-7BF6qWxJ(EUpE_^a{Xb6pqi~IbQ!(0Eqsg-Q$%K$Q`_cmw1 z$;pB^QV})`SU1FnR(qHt=E4rZMrfk-ymE<|-B8&%(0bcobs?@}(B&YrC)K6@iycbp z9c3aR)%b-M6xd_B`EPZXwgPNl$n`?z_fAiaQ#W80bJb^RF@XAR5}m1^Tm0OhIE)E* zsK$v^+-)hu|MMetSNS{Wv0G z%i@hd>ykzzaK?^XBT~L@oVOrNxb{l0Ngb}dyC`B$2VSyp+rbj+Id%YscwkFO9Dkag zPCx7Hl~Q;M7V!NN-b80t*N}Jb1OaYew=*#@A(1BqCzXL;GLK;x;%PYCT@MGVY-lBr zc5lER8WDNrl&|1F*d(;hkN=^TybE&#E>-Mv=g!r5tTWt{lLKEA0Sb|!!!9tIEUsR> zbg8YU=K@#+3|8Sc*gJi-cfbqmH2;;}^8WFq)VSSyIBM5#4N5Aa?>2%n z77XELU`x@jbLZhTevb(ltt(ZCpxR;L6F6p3C@2axe*eCi%xjF8bU^4v^lh;I;|!D- z6TG&Jumg~~_bb`-8(8VP8&G!j>zKA_-(P4gb)DDeS z<1H<_D0lLQnwKp*D_w>4=)j$tld}FYK|h_JbklaD8W{@6C3Dc@5e5xYHZVDv8oc9$ zLWH_Bh-n^ZU@9=4eE5I{LeTMd6jknp~_GNj+Ym^QUpEMYUjH#Du=$TrutS>*gjwaQInjhQU(f>#pt@FBPzIvQFA86T)jg4m5UvJc(8%2Og zxE1^&2<8TfU$?>ed@E8NHzcjce(I1rZT_J6RAziNDrtubaMQMWOc)!q3_#X?jTEA* zz@;PZGYBMGr9&tLKIoL*tBe@*7Y((zcEz*fD`7zWC?ypaYkp^@ckZqA8ar$2d?B?w zZNw~c^~C`&TsEL?m=Z6e6u2qxY5lWlzJLY5lP$+Xkrr$gFbmNTvY4c$8DL9*Ax;qX z)7wEF6v*og_bsfRINUmDRpd4E&?aj<{gc~wS>t@}ZuKa*g@AS3SSl>d%*(??nORzf z;E}UWLar>_!GL?3Z`xVK#J+Nf@si+Ww%nzxu(iPosP2&JOUo12*_T!l6BDcPKNd%8 zKw`mx92Ez%4a-Z%BgTzv9$vDaPJ`l_mkXPFTGM$SmCNpTmcl+!DI@Rr`qn&KqJWT) zJh*YdqJLgXOA9g31I4a0MhDdZgdbd*J}QRWT}~pbjO%t$H#Eu(mj20mobL=siKRfv zN*Sw>{uhp*h1f~d?T^39Y+=}m2oZ$Ue;3Xww{-j>*P-_xAdm>}?2z?;1%V^{Yo8T* z3%9CA5ohew;rH*~)r<(?_Pp08^M_@ z!4~&zLL3zKTlAUR9&>-$+gWhSSIP7~t6~Tfs^3vk)qlfAl|3@Oe#b~HzJ{g10l}E< zW<<*iK`s8b-%vdUa+>Lil-g249z#Bip0eA*w{t;E&#hrRfHz=ZU_e^EzCqr%xe^Dj zED0pQ_vmo|!t{F21UCPrCGscyp+!lFH3UQ-?5swE!h^(Zu=F9_3J|iKjt|_RVkZT# z2=<=~Cm=(l<|PA?kB%&-LvV1Y=Hq|Wn!p7tMJi7?)%Cq}XpDpvXb$nuU=;~*fJe&Q zR}H#eid+CSqZJIm1(ieZLp*-6vlJNa7%R7q%A{kVif9+3{q5+1(pzFdLLIRZ*prYP z0H)oFk<E@P=KmPQ}OwU@f|$idaE)s5Z+1VdLJe*XwnO9@#NAhLp_6H;?}{(jZxOpD3em{GsU=rQ4W=7y(tnk}8&MWD zsm~)Fh>wHwWnXC~%VR6APYI)tJPy7^TeO10c@((au37i8J-K)d0~Po3W#N96q2I?T zVV2DIwu7N))uUf@6awdvU}TrJ=PSackp1(oQ+w!)_#+w@^_GfD{WYNeIis&d!dAmLO*W zc%Fj3Di4p{)I2a%DLTaPJb!VzZlR*f5GCuXd3^jysGqQb!@dc6htxSKkmf0nY~05E zUc!3f^?Z{~e&TaNOl+j|=}GZCsW>wUK5?DKWxzE6?wvsCgC?N@_9bQr6~aMGlh2Yz zru$-8MKj`_u0NeX^k+b^gdyWjz!(lFDos600%nze$gqI_Ol^X2vd1B&G9A^fzCfRl zboFi!ji42iq$C}f2HV@)zZoEovsBm$-?Q#pfG6de#j+Pg;r>T^Z^;ISb4X_COmf_pH{`Ooj@jqiU~YZdx`-GJ3WA zus!p^M$xk3Z}Or+3cqtlAuBdn3iR}B-smO%an*mKJTJquZUEt=v3&9=<1{<-SsjsZ z(=e6*O;gTToc3X2HUTwn^XTX$(7(V3npai|py(I(Wf%&17>akY(w_DoHPYTYG0L3! z5tL%rC>c}rYb2)Dj6t_vx5&sywL>iQe7yEM+_|4rAlc^*y0D{%N4RoM5J+xF!<_jk zKwrqRIAB!;Z`(sKx;H|1OR@BuH*X$}y%&P=2ONnppi{|g0;ikj4>qEY;eIOi7Jr6c z6-G2=usX?W>*;W@jnz1v2T^k1NDO3Aki|IP0RUdA`~#`X42;(=1Rp>oWcdcw6Vz>b zd!Emrxo#4C#`@Lv^qE;%<<>vgi+zNTw+BrTuOZSs2BNnGbcG_k)d~2u5!RQqpApCg zWR#roqprD(VmZ#?EwU`8W&gjcPO@BHBG?9K-kp`gw7+I`P0g4lN1i)SWLcf)+VwuwFa=sx69=yP)3mwdD&w85z?* zsko*NHWW%gtv+pJL~CUi-Fh%6@jE3u`^_$WH8nH?sY&l+0(BVi%hJ+~ znx7RwgGx4HH(Q)^=htm_oE?!0c~!>)i7y;>DubkFAeaz-MS47fw*>$2+|q5pTq%W; zIS@RiR#phPxw&t@1lWq!K;OKHfog7PdDCjAd%p?wTARxpFjplBY5dfmhxeZD9#&wY zt{$yl1`6sKq(E*M!P5HQ_&e(prynGs@#g=aKOaoA;guTKVW+5uK`D%Z!oa}jc;teV zTF9*+U^g`kchj!|v&UMB>r!PpTRnz*gL00|3z{h*afy2UW*e!pBLH!@cz42Qy+PT* z_Wywn)enS;^WA8{@tlbfxlUfqq4w$t7&|S#BwY4{mIEl> z1E3y14-@Tq=G`QU1_GkDW%b)>p@pd9FJ1s`!PKJ?D#lbws_TZRn!38=rfag_4owh8 z3PK)$w1Vs4;NS}vL;4g@lh7R223C7gXx~{3*k00s7CAgHwQ@iY&o$tdpgT&RJOPDN zw0jF#u?Um~xIY8Uo-MCsgSi?eIwldox|ETKV^Jo#LQ9)M%e->T@bJ-_=kM7HtRSEM z8Mf0o(>6~js0ufy2Lq-d$sAO=H5bJD1ODq*1^ZnC0z66k^=>)SNC$kx!A}Vs(@+|s zPXutp_C~o|DHYmgaucc)Cv(j``f24`0HDbo+gH$wEUdl120ll z*)>669uscl+|)#)G4TlqgaUXkI5f)x(cbdmfn?dPCKIA6X<&d*lCD2b}MA0_Ij(~*h>)j4vtxRUGM>V*cc`< zIQl^I3DWx(5gjcjEBkoB2LN*6-^UZqQ6pGsN(&kC8P>1)BXHuNC;_m|AbU448%8|~ z_BY}|Kz3|v>rDhr154lQZG;Qk^NkkGm(6`MMqCUMZ{Bm^SH}^ zj{5I!gV1gVMB(m40Hf_dQ9e>TL}Uv~wUgUdQkt zP{)JU1sA2Lq5=(tceZt5V{7BQxY$`fd!G^2oU64d%J7T@^28FZxanSmYs zg`}4N(u;!hsz8-pa(1(&ps#Af)BPwtRCD+n>bOKxHaPRgR-;-kG&{v7C*we`7XUA$ zksfRi;ZUuBdzrx2b~&}6^3D`!6A`qpR*qOCtEV}^Ac|b*s_%EueihY(b^=E;Gg%(} ztRY1@%joE6cWzfAaF{k%=2WSrefZG z_jCo!-V;uT=^o=PVDksg2nGBi;znv%pPA9J`-rS@`6Mn@#4!S8WDMh-pUr=yi8ov- zk$7*F`0Cl#r%MiI(B}kSP|z5T!Nq_ojR}{N*ZyKMy%+~fQ0}QagJBQ*Wd0|sp<@$~ zyeG4;g<>^W$*X@ki-D&E;l9je^z;Th^lX}uH|CUjOVZ=-gb8j%&$`cAFDO?oyD4p5 zgng;!T~mBrMd&#Rvb05Yb8qd<=2UsQw9ynv(MFp&C{%b@vq9lhu2lwXfepzsb z$Gx(%i-q_i%Y(sqHoB=>so!zxsz(!!i$O#!Eo_>leuuK&TQXaS(Z#4bi) z%6NeDBS;dKZ^2^|X6u$9ERdrL@)1Brm{6H=jjCZG2Jcdjxz3)OJue&C` zs^NL|B@SpZ_S>m_uD7wFha)m#K?6~DO7hU2Vg}Q(k;djl-wxeFwA*ZEpN^)*!kcK9 zWARlzuFNMbVY4!G+jGnBe8Zu=7?(Zo;t0b>ODExSd9DcxrH$>Wi(u%Dt6}jbOC(mZ zsZ5mvG6fyGc~gQza&&|t^dk#MS{PAL{V>2fA6hh_y0Za8gfz_pkxC+lM)i;KIsGE2 z$tjWAja26y*g`@2u^h6K*C#wt`uZHw4?ZF*WYTW@Qz{5Xw_CF1w2Y4$0`CdU@6giz zJ;d}^G=ZWhqUQS1#yn!jk~+I!KZyc2d6!q??3-#>TAyN`KLQ=b50Q#U`(MAIH{PfS zrw6P2p5roBp4f^NKQh-5HtLIIFZ0Xk-rA3UR^xC0L!}Pbf?=f+xd50en$$%FUiK+v zIq`oAi~0WFiv`7}8-E`j!xqE(h=@cPmgcCx*LtkVZuX^98e#vYy}A6wC^?XYg9t(B z$`9RN2^l5z1Xig2IphL!2(bM+=-+XG-?<>=$mM9Z0Q&|d7k9M-v^ybQ0iyqXrOJv|DyZiRul4y~;- z^U7VJE>{=Y6D&vW|8HCaGjIo_+vNIn3dlym;2ZPaU5f!4Y-uF`s!}8P<_A3f z51aL$wSvNrh615-Fkc<1R8&^JX=cU*z0IA)A^;Ylq)r94M81cWorUFq5sKCxQ)?WH zT0)#s_Ln@p$64C!I| z{<~vQe{mJofAjb6(9sNtEeam@39Zml`DlAj&u3`XceM|PQkR|P%l$oHM+r_#j2#o@ zRtQ&+^8>DA|J;Y~d0g|}dboFZ$O^6CkauOF$@x<(;{_BnXHwZuG{z8J5(Xi(NdKIO zhzKMRY+zaUAY&ta-rv4`6Bspt#ez*thihm!DzJV#Lz@0PHsKAh{KloFJppR=KbaFj zr+!oU?DF{Hug3t+5LXVaX9rc)9B_Fy8{p0PAK721^s!KE?CdsUg88psqkgRDf(>Q{FY~rrmNokcrZ;3!RbEwa<;`}fs95G)n>+bf6o%4 z3~Y@t!g~l|Mrv(4+K2Dp7J!f9e{d+D{fk1;NaL@xsvWKh;y2KS@d=5sp%Q#5#&?3x zG*|6#NQ?U7LpTEGhTPYLiH#e$-IR!^&H^F*%xD*_MTR>Ls%zYxl*TKROs|jsZ@`t+ z%GeHhS$riy;P=KC#A*G5>0bvx61__QlQ;LCR%1lc@!&~AeqX|vke4vCJzgWx#*P73zv?X$?~_9DL=Lj`*uh4PoC2`3-aVk^m; zI{Q>p!3+k-&zUZQsLO5kVpwnn(b*Z;X1b2tqIdHVO7IM1q+y?Z@Tnc8pZA31xAT{> z(F1akze0V%=~?I|M8_t@Y(|-CMX>N&EMv`5QmGhgHk^%(cqxZ~!exboi@zaPOm&w-0U)ig`QP0ngmpZ-`#!2-F>-(kjq zwh(@Y8(>dP#<@r0BBC`NMaX!zHl#K+aUMwiD8Pf}EqFRCibDB(6@iCmciILRwl{m( zL7$Tn{F80(XXxC0$o?LQ$V$YMURuXO?KK?_lKAie14Vc?a2isAA11I6cSe(942>b* zjc)U5uD`3}0jBJY_SaR2Ww)ysXK%CgrDn2a+}T7dy~!QOeKA}LNMdpP;ZNo6q#BmC zGtYyzTFgoxq4BGKWTCfBn($ZI%f`QFTuFi4mOR=5a>ldr-ObK?QFw07zfY$xXsFe- zu#AVo9rnY*yWq@L((d>-@w-@H{G433iCGD{hj{NbX3Gr!1qoaPQUY^={a$OgJ6>hS zYI+q3HjiMWX#?GdlI$O%`w?V-fudTk56a$>jN~pj{GZH&&WB;);c_G)0-exvD&z+r zb}D_RX7Xx>FIXD9M=S*wK1m-_fC7Be%AeHAVZLm12D%Es(Qww(0BsV0W^TZzJ`jbx zrY7pa1bhn%z9XUW4!*c`bJ-8Ef$f4;10v`vPbkjOVXENSgj(rh=Y3IEA(~NEdsy=6 z?AD3~@ChnJXaPN{1#Il_S&6{tXaX<{8c%uOPQCs(I5^lUzVnyCIziCuZKUy{as}w> z_HUJC)y=cgm#L|#j&=nS1VvYL8K7BJR`%T2S5nchUo#c&z;9`yPMNK4@O>{7Jl@vY z*d=Iw=l$akpC$rD06rO}g>>&Di0vQf#LYaIEM8M<8675DtPR~;*o~%h=MJ}Iz5-Db z5^Py@^`K34b+v+~X2MrDAU?{ix@n<>^ByE`Xlh0yUlRfI*6j@V*q1q&@xT<00}&Z% zm`D22+1Yau1K$5ux6xtkio9*}!K2U}WOn+iZpe!nOxa6pp?z4m@TLY79|i1kfLESc zT+H{Pht5>cypc)^zV3vBf-j+Afuj3;3$f0D`nCxlIxzp$k*G7mhk~|YGeGc(GQfIf zig$s>gxtr=?BnB$U87ETkGt*6ZA+fNFA|rB&S3Z~M_lKx`z7OS9308D9a{0AiasnM zTVw$P9(>y1c1FQ}Zq%*+j0t5|m4~%?FzXzZDsp$F&A+pqCg*=YWoo?Eo#4q+Bx zl>XXbCMS!4faZ_41&7eL3vPqHv&td`OsV%vD}sS{0+M{Cq20VXTuJ<=u-lUM69y^T zlamu%T-;{(00YVYY7nUPcKO?odk%Gm1K~q!V6Wf=4A&!b`sCX;b(jyEs#*|lkpTkW zc>@SWq+K2ieZGIXMH0t>xYUpX#V2%A>Q$>qc7#*rDyHRKCAFyd|f{gf!s;qu(pGr?-A3RxXg<1lbTj8s?$gQFAq#6qex*n&Kl zi>sj~!URgp733TE*oZuQ(}$}8SzlqZ4dXmz>%GQD&(cC6Am*ES`mTQk2=!JD$zw=`?Frvop_(lXG|#oX>&{CJJdUb1l07PV8+}jHgC*P%n#GAj*I8nN`=gRX{z zWIq(D>4-j@{4`t9DE|6cxXXJII9GMCKZgc2*=9!_myXZ|mH$_3=N=DrzW?!2N{LYI zidB}S6_w4UM5J@_FB0ujeUM@ZxJilGp4!Q5o%2B+#M0m+vH+v!_RBEEy{D6 zeUBR#t~(m!c6+0LEgqXA8r5QzZ0#)A2{Uz+*AWo7A+73t$DSeE6GRrpif5^6#oe_V zC^VbMnFrZ=efT~CEghyc+!8;d7aJC+L)?8RrYk@7>eYR*5}|tW%pk3= z=hS8u>f}?oYl+Knml>z*}>gs`Mk!iAllfEp62$=OK10y+SB<^_^*?!Ih2sf zAyv=pqdN}r5(P(tN!4WT(G=cLV(j2lU-Vl$E%NI*Y)9jZJO2|A092jmfy zelHgF=2Pov}F zOrTIc>c(S+zo{4=_9D$c1jsrF#=?o71uCwR6UXDv+yN2e_;;3;mfAjK(qPSGoKx-^ zw0WPRAI4E8BGXc46h7)7EQgTMRAZLX-P?N>*@bPXL*lBP-QC?+_rLPF`=$alNH1D& zTM6Pb&qnQ*efhVLw8Lu9(jKubv#H(WzsvTwVO@Q#at?8q17WZJq_^&8dy8lsOk+nO7cgg5boeD^XLbtXXp&OufCqzx*Nw4j7OhaZc7GlTPYD)#U#$=JR~UH36g{W=G)hQtw~_=8V<=;kQ9J6rGi&Bs_n< zB5;eHgU~IZ{@YF}TE#D?Ad%k||As=l8+Nb;dgu#k%{vwRF!6V-_|~}imjkN-`wI*T zqB@8O3lj|rgiPBZ|9SMm5wJ(37Uyxwa;#N%277rGz2QQdWo&3jAqq)M!=zw5Y`r@K z-P)RcMTUjWKi~tOuNwP!fBMnSrT!ZiEt(Eq2;B?3L@s9j%Joq30Vnm?<{B`M8n(%* zm_ta{)YK%oIo@%XAt=3p7gh2G1_s}FvVlgzN44HfvqDV^+gjPt3-KH89cu!UrS%)w zVKXE=MN%kJQl~_ml84Ogtz`aG^v~$BK<6K)i`7m@Ok}u|L7t3 z7Sa+cJ4W(U;za@7kX=(FOw7GV5}Uxg-I+mvO(^WoiAc1yKLv#ACrDZ;hM22CIY*|3 zTArQDiOmBL5^a*HGuz>qemQCz^3y~|RneIHob+*TBX#a7t7M3XjkcGn2d+rEcu@!m z37n?BZ)4G`S-5s;$R%WkFOJ@L(a@JaimYW?ABIBad3mZ>3Acrj{AT__+XH))uJa*K zYULE}UNjrkd{AkJY{+@l@%A^1+8%D0l~RE~-$L)FlbHWJ`$*_885JPryXns^d&eG& zd6JM%SQE;@*(>~KV}dRlzA2;+-!GuiF8%0jX8fLW373$R{y^Wi{yI7=k#ws2I=2<~ z0QA6YGOk5v;a6TVF}45aFlCak{I3}1t+?|d%$FHckTOQpFbHADg%RqR92#eP9_#}R zO-&&RY&uJ9$+9uo04foKVH18@xLCt7s$uud)>2>oVuAo%u#M%ZH;W&l$HzxzXS=XG z%hz>}$W%^-#*BTLpiPFwY&Kxq?%}9JDPpTVuU-kK-0IzLdhe38@vOHtpLZH4{`92bFU63+Si3DzR38RkTeW1el*Z$%Z3 z=MK02(;LgYVy=`z9b=R=w~%B96R;B1jCw*Cb@$islUdHML9MNy6E zxO{mdM2*m7i=djhksc%pDwoF-=}F-a`gK@@fTlirH)xDHxa?LWRvaQP0_n-MIcp*M zV01vDI0eVd_(iH5S2MmB8n>Vn^E$7tLw_y!<3O(0itV~#v}y9u21i zM(5lHxs1#-HnfY$xvgZX?3bkzCiXLH4S2ceg|eYSw$Gi0#=#J9RHA6WH#!7GLEuou zxTLJC9hGZ{Y6uw{MDKDmo?6hj_05U=L}gXzE*Bg zhC;gO)%P6RhJ;2ZD0**GW22%}OA3$hcPZ6VBvCW~$qm^H9lo$;YA`TlwWzsT4!N}6 z(BK443_D5DCLqPI%gxnQ{_)3XN(~IhNgbRfu9Hg~FM*q>62)Y(SQ8*s_JwepxQ@9| zalGAJN3av00Y$bRwyFW%wMfjmU-^8N?LF1PpTHMt1Z~zGox$wSJN%{tm2=~WcN=WD ztk#d6zT6&YP7o)Pa`IAnz<9!K#S9qg(g~R;YI$vK9u%ZYxnEazz0)8fbg`OmOg6R$ zle{zV2oD<@O$-fbjd*2RRq)Ex&MP`nD8)7dp*s4kG9qy0*rrOWh?9$*4-}TiU_GPwBTfKR%>qQ*^YV?= zY&NeVI3V|tTCz$Mfm9NH8-%gR9oz&Qq?FO zl#_6TB1~*})4_=!OgqFGXxgJh*DjnpXO5)2d^}hOt@Tx%*EA~9xMlo!wuj#pP-$UByxk6r1h8}`uMbW#;Xt5YxNxy7#s_Vbg?j!2_t63- zm~_$4vi+M{_{%YR<2PYE4PhbF!=`3t3!tir1c%q^81^}AQtsM1zfV(XuvJ1SK0G>l zZd)b|Ls+Z?#Qv_bOceft_Lue!V12L0XJ)YTK(NIpI!hHCO1-wivwBA*vAzcL^k}B*1b^4E zu`MQ84qu=8(BU}Nv)N$4n{&*S?}aYI;lRLBsOHtuK@~tYR^i<&merQ|PA|Vi z-QV|AOWq#I$q!LXl{gtq6S5Gr>B5q87~?D>d@&w4|CsXm4N4b?oR0T~V&yRIVq*q$ zWd{yKL+gLL^!U?tZc$-k_MCZ!Q$3OLTBpUnkmXRkPp)#L?mH%6&#vz7Yc_ax?gPrR zWYuOmvnOMVWy=iyVv51%Nm|M`zGSZq2#N4tj1R@Ks~d%siJ6#~r~{(*2yUg+@plcM zmKu$`NZBO_KM(#jlKpv5`qzkMsVyZe1o(4#iwginDAt%kJrK&A18Vg$Oq}NSvQjcC z2HltG{jvCOZ=`%^6LpU&V~Ndnqlh@j)N{HZu>+;N$quqJp(L^Je1{5I_^coaI|ChM z=;y(W`D53Z%;CqH-22GT;Mx>)b{LH8RzA>urdZn{ZtB4d51dTfcVDn>-TTnI1N>Nm zKP`fHDOKN9F>gJ>YRe&#UgEyeApAd&8wJ37X#FYQ;|T^B>xHve+JyXzs46b9SguKQ zD>S}!pqGra9XO$&sw#?`mj-!7n*q@u5w%`*_#(m^F^QAA0Ds2Qe(xog+PNd9Zvoml zyWY8u?sNB`mZs+4>|h)~#886bTT=P?`L*!eKKTp{?-Cb{ zlwXsQlJm5JX-W>i1Ho zV+Q>IR~eLZb{+tHq6tc&kwcG$W8h@pjbiXCuju-TLQplA5+k>132qWLymTZF@5J*C zP|8+gtpE@aGr=uk1{e;;LjghHJ|MqOfN2aVa8|qiZpFNBaQ3ZF;8u|ati{d)>whVp zg+aS?BHaVHKl0!NNM8t57q}Q%${}Dp$&mN`t9rmle*lPQ1dL#3ELEuY5|C2xd-|o79$P=fw*uLv}?ES&_10KCX`PtaQs9@Wnv;PJ2%xmQU literal 0 HcmV?d00001 diff --git a/results/plots/f1_by_activation.png b/results/plots/f1_by_activation.png new file mode 100644 index 0000000000000000000000000000000000000000..91dbf117d63f3a125f330ca17f2dc05f09f9e712 GIT binary patch literal 14468 zcmdsecU)BWzU?T9MvV{)g47r_D##!pRRJpsNRcj8Md=J3q>ix{N~Dc+l-^Z(R|z1( zh;)GgM5H(A(%;&i+;h)6H}AZAKkxl@@(EkY%$~h}zi(MIPs(fbiMvjdn5^1yi z*;8sH(pqs6X-(Y^>+qZ4j>oC^N7C`Mj-$GbiKELU`>Q0SOOCcyHjY;2mw$G?YVTlf zLlGfMl7$ccZ06`_>mVg4X#MvWkZtTu1$QYkwc;cjY|mbBAdy%u5npQ(WfROvB=HIP zQ^z%~g^zYn^@C=XS10a%zG1)z=)EgWUA|eNz$eI4a)ObC!V1@L zGFhSVi{}n=T3GcZ9?ty3+nnQ0ys)EnDE-`}jUlpYv)A69n`4m;SE*#$xREE%gJF|9 z?A-2ZG(PNBny#2L#aLXhv#MaOIyMv9kN`fOlVHM{iE-noYdAH*c+RWVEHawk~MwP=FLqXo*P?Qk5tPmDQO-% zc3nA2W;BOmDCNj_k-AH#YeLTy&B*J3prGLL@;KGS==`%=cgD!1M9W|Sz4&%YAw6ld zg44!vso$l`Q@cIaMmv|%aV57sC?di$O))h%FmStkeO17|Y!f<1d3m{}J8u_twK$Tx z;wo>yAnYIe>gIaO_8jYt8#fjV>GB6eMyB81#HPh=+vz!HFu$5)W`|lN0R9XyNI^t4O55~pD>YvZc54?Vz^!<0=H4ez6#OUQxD4dHv z;+^mBZ`*hH((CVXn`|YCYvmB9jEo5feh=^RxY|XOoRiIg`tE&^=JM_P+_MO6l4J^%1 zg^F2+-r39<@53oR^rgFuyj-$4c0_u4x}jh&J`8Kv^x~@GW{$&WjXWl9GjmIQ{z0%- zcD1NS&0sz*`k|nrQiPPIjEoE)AD{isePpuoZ@>K}L=LMKgYBb}^7oJVv7^z&5x!^y0{e%f_clNbzrA1+BX0ll2S4vO~T?5L>An0`=D zkmSPB^w#bo*QxD0cEtax6uMtZN*B9ZI+7n0d-~xH^EbcmD(Jq$8Q#W5)=xY#mUogk zgP)(Dl+#cw;-%G&(IlcBF?<3gkEK zIQSl6txtZ%VH_A9o|b7Q)t zUY@u64+{&cjkf2ud@6s)ug{`!f6Ya2A(Pe@SLOU$OFRoDv3?C|A^Re)z1HY2l{2C) zXFs}f~3YyNZfRKu}QcKzo{=ctfF!4b5|UpsoyGF$z7| zmM!nT`~Lfoss7rq(&bSbIsXt5Q(yI{r%!)rcy4^Ar>Dnw>$YulY<*3#W=c)q0reV$ zb&a3wikZyPmy^4zym@6it=eJQng@sr2O*Nz=;}TeD`3$;7AfzA_fs!vXtL#QbaixWCO{ zMc~BAll0=bPRr)h3&zA3cKJ%EsHF)4wooy$y{PEu?y5ce_7$dvA*3&4nLV2SQpQ3j zZYm|C#A0cAnVx6Y!{wRm5J}{Tab|0cv7fYw%-4@hnQk){+qk){Jv=;WN>^qC&y)%o zR}ibajf2C&-`~FmnMACVw@}4F-K;lz_U;YA@`fVX9*cKgD{~yIFTZv7ZkWIS)=HV? z3?mzk!)6!9il&sO#tO$nbGy9Sr-z!VZ{4|5u#~nsNh>`Z85!9(m=Gn#>-F`C>l9s_ zZk^jG<*AV!B3JGr6g>YL}~PgHQyS!9qsxpn(?2%XqOX<+YIXNaIqT>+(#C&z?-1t`bF_^{P8jY1z=8n%+em zi3q;4*>ZL^>r&aBs`mEwiG1^hcn;w!_hjZjZwm?zR`BO>{k$dHsn{D(Znsgay{C|>C>kr z1w_+u`>Z)Zfq?@xAtL^JP>3sesroi0^YKL6@?0FvbsJ90B|2y8=&a({Dk8-Vzii4h zRY8v(C>$?oKt?10kQj-D(YJW)8Em&H)kMLtnEpz%^43vj0izOq|16hr!%;@QdQXLq z!?dJBpZCJ_U`(yd;yJ;hFE_+&I#VN#xLC{#HTlOPhr1tC$k=^4bz6lJBs<~!>5mNw zv&52ml4WygoTVPiDkqa2yS<`hJQoJuJld`M?BZ|2A|lxi{mN&~E)RO8*hiMk_i@Ur zAzVKX4i09;#>M3`L>lt!jA;p#Z{NP%&dHh5l%hQxVj5Y8LX>S>$y`}{x>qzp%1NPu z{rh$63b@A0t-Cy?d}7ASovgb`I?ij)&!>;rl>E}$KdrF3juaWZmJyBrUW;Kv(DnYP z6T|fuPuDBz7C0smb(q-ciHQkX$LiZ4{uy8$OH^Z`kyS4ckSakrLM_vTzWt}4qF0yt zWwl+7v6CzpYX>JOYtyO1ddWY9F!loDa`tMhp0=nKE(@PtXbPS^ej z1yd=nW@1O}^#lBa@pSu=4MbXIjtPNaTxs zeqTbeRM-R&E)4+3K|qkDOTB^xt1F8Uk$ilsTT8w^oXBo5EKHtQoEy(5cAqOK`XPPk zK&IF7g62FsIc~||tdZwTkh=Tn%B$XW=(2|0USyX@0(|mP<_>Nyt9~qZ$P(Z zK&zorPRaH4^^RShbLJJ^JBL~`6Ob_Lty^E5mR+5%ZOSl`CDs=pzzs#av6M;`wdvHy zUb3!~j@bi2S~SG_0Q$w=xUn`tQK0^|I~hGc{_EFk^hjwvR2|*tmv7IGwD!Enm=t{5 zwLIp+Az>Sh*3pQHm#}!^EdkDxl9D)%UV8?-!@70rQ>WqP1Co*#&`LE5okz*;%qdT` zGYl;n6CdT+bfp_DePZ*SUBk5QO?9=R-_gl?M1LSc1_AsCeM|G(#sG^1J${^cA=6|K zCAMI$)4eJ6f`})2Ik6bP5qwL9^GRSPO5$A}ndq~-jJ-p}ZPMqus750}v@+@xg4*MQ5kcw(Qpz&O|FPT*XCH zX@RmcnSlO$Gbv{CgX%|E4^U6^yOb_QxUS6iW0kDEX*Akkb6Qx>qmBmvE1|Qa?b?G9 z@yZbvdqO%2U9{U0mhL|t8L>jM+@~9VF644sj+(fy&%hPoI= zoaTA^sB)!lU4vS0r2Cj;Wz2Y7TpW2g^TmsEj*eO8jJ)R?e)!?gwqMU30&DC-o3&~c zK2!SX(`6Cd%eJ3>x`@nfxW*NZaMEmV3dcj<-Z<1${_MkSu}7}kNAe{ zl;pp!j4Wo+xWD1a$@R2ZEOQru%C5P)hc&0^g&D0bw3wjN0<>T!Q#>Pqx9Opx@kXUy z6)i0-z^~i3ZoN!^_Nm;`WqO?{z)MACWy0&xxv>t*bw9FS1WmFaz$TB|z^;I>Fum^Q z1ni7QnpIR+S7%wbHy5Ft_9Z1m3gg5Mt>g7^ib@%V#onMJ<_N9)`MxkZfH@r`fy)yq zbxuvqe*-hKn4zIzJa(lIHCLR#c&@rb1iZ29sUWLdTGMn%q~ft9X97v1Qg&s6#c8nq z^d>fPW2}4t$e-8cmUM%mx|q9&+p)(6nNM+^B+Hg`i;jY9nT3HEU2gupdrzy!pF3}8 z$Ri@2clj1)wC}=DN(Pbf(sK-lw{&{bZ%pflnk=U?*zD}=q}*pytECUjw0@oF5xw}5 z1iqRS9ewN}n_x4DHu^Jf(*Qpc*?DEjl|UQD72b8J7cwoZxpa8Nw=+34V(axbvmZKU z`YyZT|98>+p(p?oq|PkoE|uja*_2~+;uJKdO;<7Ohbao zFqo{y&2_ZMu>7&tbd+CKE-sK|66qx?a=370yxYK~J01wlpBx_kkgCs2A~hNL`pPGwpZMMyCIPjcQK%u{C_^04~(t7-nK)^IHFLIq^ihV zeE37h-e~5_eg9)&=6@>W{N?K(w3jK_+rN1I`ZdwK-@G|{)x<=r&1K}nbM#WOB0xaK zrE(G}n(SbiC$Fg3(3qr-zH<;1IuYARlv*Hj{*ls)uhybE9SH^rBkDH`ijinK7ZzHR zvRQnpkKRB;6cRhzrppk(UmLI-5J}?_RaRCONbv3Rk+y6_w5)niv~;^(d}6x2UfzKN z2hKY?=YXF_v>6*4LlMx7^Nfv+9RTpqI(znPoacjw4>dJ3K4)R0hVceT&~%hiR(AG* zMjm$d#2YtmR8yj&qB3TNTP$DS+CUqj1qSZHXI5_I;_}dRVnPtnN5!|dTZtvqRb@lw!Ov&h4q)+yMrD*>TZmLzVP_*V?|2g)#@GG z61K;X3K{gRIJNZWoDfcVg|lbhC!UWX8vMY3nZSh?r}FdjANcvv9J~GNAMHL97#cd1 zWuTz&z-?wI?%us0Tmmq#W1AUeA-L-ZQDLu0YK3!2)Sx@It(&D>HN^78WJ zWm)Ji(o0{;q^^C5259g)ZM=h(wW_vO<@W8{lQT2LpW4n-Z@ts$AKOZ5^gk)M+1X`7 zo?PBMV*R0cm+1q5}v z1Ox`g_D4}mDu#)h3uw35pVMlk9OWZKw(dpEz5>PZa{0!YkyiDa>o(Uz%>#s=R4}zM zc^97a`t^xQe;Q4@rxF=F#GFfKeEp~7>O~GW7C2cVNG03Ab_I>S0XbI(n)Pg)S}VR~ zT8q^LV)LE-UuN3I+`*2*TwfmPWF)ghTglR-A_0KWM3S1^u${{vo2%6}Y}l~S5VaDE zO}6MN@gy`+)HPOKUctfFd*jejjx6}G3!T7Ly+?r}$f4(C)xL$93~Cxd=Y2Rt?OP!D zUzhqLZ9M<}!-owJJOY6aL3lvk_e)C~0O0z^;>X^;KB)z92#-L4Za4=k5*?M$3{nOG z?GQvD(4TFTf9{JcRDh7DEFY%3p-VRqJ{iz5_$4HC9_|n*`0NY? zPXlF<>*3wIm7>Y)=vPjZP2&GGhsgQXm?&)DzCCtjd2zVMPqySr7oaLOPf`H8QNY+i z`OCaP>}Fl5+6w(nAw@ew>)N%v1<*egnfcgyJLrm^`}_NKpzK;sOOQy%I_lqdeE;D; zI*~mn+C~-@7Y|=wUo8M6FQDXM-y>s*wqX!*`F8DkHIbi~c<(w|*zJ7lzKX6cBOV?e za^_+5Y=;nfejost!RpFfDZhbDZhP`Lu$_!VmW;C;%W!}F`hXj|-z`#H=&=@KE&-c(lV|}@g z0KYmxLjApvc@1_%NX*#V&SguB8miRN(vpUb&YNZNmizB_4l@kaUe~oBh+=zZI70P_FqF|Oh@y~K0Tu|Y|^sc{USl780bHX-?P%Xs9)JEQd3p61ZX zHa>CU_1+dLG$(BXMBvM&Wc$1eP<#rUNBNA5x;C~%Njk(=JV3S0Q`2M?&+T+0rb+SW z>{@wVARFjFsn1^%r#}^hQi6Wu?CcyjY%Lb%EhMV89cK(mo~)rm&(qY`*B@LU8x$m_ zrl!jM{fvmUWV#eiU~wu)P*K33VA@{<6@%n-JgAH=NS#+io=M4cUF-3YavV^#Zu|}x zUY^WFA(veE3gW>q#?lROdFi3u>nH@3`Or-QFyosRL4X>l_9n!63lVbVZes8uhkZg# ze>XpXMj)!U3ojAXkYpUx%Sja*#z!V@!5xAmB>nArzWdjxI>|W?LnBQ<8#!TLS6af8^08t?!7ve@`mZ#MTT?CyhZT2ydfyqnzujL<5k2jm2>?OE9 zW^n040_EoB=7a))^3sI)Nbzi&C4pII$2wASFefW80JxL$<4Gg3I?Sdd|I@@8()F1S zHRYxETz5i*K#(^@AHSLI_QkN+Es2=lKm>?M15e|Sa!f=9^L31M-{oyi%kAW2VPPSJ ztU{OZ^eh`YyXSzW+))o7JlM{~m5Me8eqlHx6g{>n;`zIdy`;pwcMUm4#FHG*xIS5!!4S5{Q;@$;wDj}r6FjFJTfi1O+65cAGhdj$o@lZU`Wob6#2)#cp<#g7*}-C6I4@sI%@}s^ehXr9hTXf%4jjCC<;noqx5ME0@Nfz~nvo;`JqHR#J;uc`&z?QwNJ1HCv}s}%7d^0R z*KrIW)ig8$pT@*wqZp$D^U25<*56*d>dPhhw6Rfxzyn*hY*B8{wQ5xZ3M7w`NFNuR zPIvc3xOCQ`o5Z2?L35T!TU_i~HSAsfqr7|&Gbx30=QO;$N)T#+5d1Jckypcy01g?k zr;t?<$+Hb=)URc{hbasR3T0;-}8rzon5#)a%M zB?=`Sz$L^Y@0G_2)I->{%t2weZjtPqXWoOCAWf4nogfd5jrnG)q6Q z;MOfPk1!ZcEe39@%X53>K~P$<%o`3v55Vla>CK`IQJv(nqk?r`T)7W`w?`hzu|;Q5 zeh#H0dDqE15atUU(RUaiBsI6+$0;mM;uOg2h|XX1-?g-8LxMC=`m^3~Q_z|n z2?co;xOON$jFSB5DREyKcn|t}I!eg^M*XOLgrJ)Z0IL)wV-WDGQnC*R&Yhgv=66GO zsz!wG;6aU2ujOgHX9(T-cd!LiQBl{)&uxiY&;j2fBlirJvl%r)DItI(NNm!{yUZ9y z%PX~Dom8#0w^Q~cLGMAUO|H@B;(zA~VA*GKV)?#9fcQtACo_Q4z5D)|kY#QaMHD0<}0=yyW z@zvJ)!?WK)z|d7qHHM|doeK>JtrM7lxJIXu59e{KYG=>hgFi!E?^|HCvM%%G?P;|V z!~Z*tHMF#}0z`?;d~R_&3|J^lKVOXyjDc}vR+k)>C;X@y6bgAfAFpvb5oEJJ^02x8 z%0f$NvUPh7G3EO9J*INm7ohMQ%hp8Wfbxz%YU=1b&arBJN`SKuHW;#qYT-JMIpG!F zJkq*iVQFHPIBA|mliPh>{K3GbTtyMTXgvKlt?0jhq_Bwpm*!&Lg@8zQ8Sk{HdwQDy zB!q~Ik~<`tih9)Oq`5MGyY2Vj;P@o<1U{4p{y`X7-O#Fz>mgoUOIuiiVmu{BOU|B?l2wpBj6RQ9>2Cx(#8HHVBeGzi@S&HHUj z%iMz@x#g|@yN2|UBkA2-ZaY~Kg=+tiBYKd|l}B+vf+p9?*xTC&1_i|dz-YB4?NxBHvkQe`c^{l6$E zo6z9*I1M$*6JxBtnvfXGkz!w$AS|5=i%lL7Eh)0ygaUbG`_7$7-rheZ%;He~#~&K5 zvK8#CY-}x#u1JVT`{9_6AajHi^YrP{?cCgHdliHCi;HWcMER5T)rd-?rmntM9xAE^ zCOl5<*17TpUpm9O0F?o(WW6W z7Y_e~$mH&6o@3-OjpygDe2L_IS#YhEJ`-bD)Q{VV7W;P+QT}!>e<7uq#Cc-2l7?~0 z{-1w728HQ?uWvJ)6zP|;u?Fe?T2)-F+81T^^7P~7rAh$AjdqJ&PF@}zLx?A#pf?%li92kDr;>zg=`gHebkdMbxUbGw>5 zFsZHjTgoga4}T9Hck{1Prl12@Kokb3VhklXjy~qB!hML~#?Pu@+$ZP55to>_|MZzN zmR0_HF(T0P@Fe~Y4H|6 zw~0u!iR*302rv|U;@{}_Y1TFX=CNnb4#6YF4`*0NNJ!8uy|%XH-j?0ld3e$RqRgN* zwxp|~TMOoiGGGuO#7B&yf>GBS6I6INb4jQ|CuW@$gqjEYB!*jYV21iVgkb6UvDsF{ zrt_!|udE^FZK{O7j=JLU{rdHmbx%*T&a%LL30?TG+q4-(HcfNye^NHU>;nN!KYY+h zn8o4h)vHY({*ii!<^U0@&gK1O9{Bi(IP{$(I45QUMI7w}OL6BYC?ti^`=(pjd+MD< zN1O{soZWp!0X-Y7DPj2&{}kkCmdS6eyD?`|eDvti+c$4)Eo$GqQ9XCgJ1iN9cP8Rr zs}-d+f6C?`Ou(=GV<7_$KpSa-nTY_=@HFQ2{8_ARx3RG$U@p90Na#G4;=#Rps%~xt zCr+H$V{py9t9-oZ+jvyg)YNnslCTEg5#H@6phyI5AkoeZ{>aw&$41@K(fg6(hks^# z8vTpyqq3#AFV%p5a~*HWe;K6eDF#aj?qR^}dUzf)&c~tn%h>+Qk<#i+i;?B~=;MUM zhbW(BIMfbex=LtEe-4OV>@@Z+TsPj7LMs6029o@y4`)0%d6)0NfvUJw!l?^>NeH^W zIWP>2wq~9|%TK9)4D&OZq1 zIL!keA18~#{z&&ge}7`|TGi0daFqbU03%0a*mmqtD6sXn_1w?D()o?@ZAuFHUuHX_6(K?mUih`YR+9XCF6FPVX4Ar;Fm2tsl`EJOB@~4Z{qJ zw!-T<{Rn7`uc!;aN1w`A*nbUuW@S1-)*NFqh_X?H1oofuqB*Q+zAHd%QXXHQ;ZCw_ zx?~_^XyIBJI$dEsjmK{Y z3kWeahxIX0cBz}i?c;Cj388)HC-}!&GffNiAFQ+WBs6!Vq(x{u)eAl5zc=g3TfQr@ z=A#8nP=@x)DqW7uFBh)e=e_Qh0cVJERBgnO1W+!F_zf*!cuo#3dcP zm!Ywf=o9bk&Oo7!FL)h8R62+IK-NZ@RpfG{o_G^L4ia`}QR$EL(}`323j4 zPXEsrzZ7QXl8D~SDK>DI*Xwu3?nkzy7GB~FZVnRL6-8fscd07H~{)kHUJe0IzO`4zBCtoy(+|&NIB8u{eDqUa34vh@^yD zGCrAt{@aOb;OYpnTYp}`+%w41~ zefsoCpJ%*87fM_c9>$1hgD5~~1o!UUxH(EL^X4ESIiP9|S+64a6EKU| zef#cR%U3tP_bqYgtEuC(DSCq2sug+%xr4vxU^WKo#E=nn3iz8S*qFGE>rne6Tp*A| z){E30jlnBZP}V|4OqJ;o=SbSOutdaX96;g2yoE#3K9*2eCtYr*>*XrLaNGc2Q&2!a z44ha1{0jbA@DFC2H^}`tWry5XT>>irD@Z9Tzm`OHDS@bkF5iNA3520BUWv1Krdf}e z3wuq4bqx>?kYI`S$`f@AdRV&qcyWu6GN4Sg&KX~>I%v&=;g{%C7*MG}3pc#h{e6O$ zP0_?1n98PGO``@89$F}PkZLSG>#xl_v5sUxV4)CMY6&db=tMiIRBFJZM{sukDkar+ z6h@#R3McGMgfTxM=W2EEPQn1uk3Yf zKb}5ZYZ@fnY(Eq(Q4@Ry;f&+)`g%!T4GW7dnSg1lf*UU2_hI;f7oZ^am<==X zGaH-{0vNh~2;$Ly6sU*Em5FKF%wD!0Rx~UL4qX;0xB5<38IoSlA{SIvjhQ&A~~yM5KvK( zBpHN55G7~HnKu{Rw_o?|y8Z4Q@4i2Gyc(lUX~Efh?Y+J*zd7f+BY$3smSz_Xg+if~ zK66TuLRl?Hp{%O-c^&@Gw-SxB_>Yjy=?gYW7Wy{!y4HG>bGkN`rWQ7)MwfrH)3d&2 zWMR(BCB${~&~Jt|HkQ|fxw*~$_6uAV)&|^r&aY|0MK)NTQNKo^(Cd2N&8sHE4SQRs-qhE6~I z?u-{!rs)iIC9>U%&GFTWhj_~ww^TosE*taYyn6X&z-o`l`68D2^sc=e*_URW#Fz(u zSh`NzCr_#eiw;e@G-|Y)@op;mg+jT0TA3#g>$>i9Dti@$;(zkL@OeY;kWPW7K=9#q zy@i<{Q*O&M#o~N6z495x^)g!zs`^#v9~?V!K2(@3xFV)-X)-n1ipDWS#8I(QeEIe2 z)vJ|rto3qoa!T6Ub#ASvZ%EO~vq{~*qtewvE>u`0YH7oU4ZC^Iqx()pXV|=<|BIJVoTz8hW&f?{GO7MBJ4VsZp81W zR^&w=P*_;#y4Vx$u8L(%EX|Jut8OXXW{|d#BGddzudFUfo!2Wf*LI+JIbW}A+YmKN z&a64zJ25eF^Oh|mws%gCy%b7oPI?@h=sDM3)E$pG^X2+XrIF~6+dLEZC{$oa><(f z@uNCG$1UEIcDKLrgX#WbYmaP_@b>ne`_8h=AucYiWce9%|E5*A$%!a=$@nxr#{y5l_Hj@4M`3#=* zfy|QwJBOMxcx!5F$A{8P8o9n#MM!uI_u%4eigLrw&SRY=C+CxO3fmrEy}ygU^ixSm z$+(lGq~}Z5dAs@C@bK^>re9ucj(;iAlxY^cr(~u(*rJ>(`UKl#6DMtvCoS*7!UAKn zqLLD=fq}u5`UI7329Kj2Ws2D&%5j#{Eb}2DjQLZ|CJ8!)1uZQt3=9lWa$>XZmz++X zylE;B+>oU1Yd}S)e9}+rOH3^|V7Y8jFdKtNi23s6f}?{&C$;BgF|*P@L-NpoQtbIB z75cSrZ>^uKQ<95!n(UwQzO!aMUGH?^az4VLYj~K;-QAs*@fc6L?B4mQAtf(5rq1r} z7;LWbxA%8)-4>m^+XDJ(V-xXRr-9_GvHTXb3}Y1!5090bE)&aSD;jk&?73@rtMOohEDUY+=8e*2HJqUq>yFv$C=> zZcGUc5-{tpDll&q#zIPpi=~E#hiQKPx$5?YEsR`U6`{w~1cJ}CyDz(jEf!4F$XZ)j zReNmQK4_K`>CDW`oS+=JNhL*7T_r(TAhDjuew~th;pN`;={h_{L_~x} zwxw=C>i4CS**Chb8W;>9;OS(ggT>to#Fl2Rkfj||OHuZq*;*GVMejP>72p%~km-2B zxnKc}7ngo7pk|sho=5Z@v|Rr27JA&i zZP=1+b-F+VhsHd+7P?~lY)ahHZvom708I=_ioQ1`_F z9U5BNdX$fe;k*&!j&~ly4x`Vsorf|+iX9gx8%eUyHacje(eu> zVq=@Gk#;Ma9B3SG?LN;fTc3Qfai%LkXK-le&YhAEcJbdwR(8j;b8?onWx6lBp#Wi7 z+ng8$1qHu*OEH(!hJ}WjX%@P*7A#C{ruuOzZIb?oly;(yXIp*uwCXC|8fo2C`th~n z_~1}SM~7!#UY>VQP=8gHdYXP#{vo_wF$td|y6I69$ewyW36Rq7>CQ~7S<_#v054nC_H9*T)%urQAI_C+fy?X3{Td5 zTwI?cbk3f=zo!IsStg=qq0)V+_VLvut-Q=CHz6URk9gV#*wWoT_t7Y*IwUX^Qnf|B z3f+gJQ|QGPs(9`P2Rt#Zi_@JLZl&rLHZ?U#;?fViSlo_P=CoHB^z_As3mi~OF+)BotEqCXxz5@g-U;yO`nR=K9(<0slf2Kd=re_|@*HiI6YeER~OLN`>Mv{bwa zm3-&v26VKP!9i2$nm0GrsN~t1c~u8zwYyzJlT&nXm|i_h?Je_i}Oc6N4^qzek6!nQuF zUWK)P{JL+Vh+fXNL(2M@}srD%G`#>Q%D zX-$^%RvLZ|5egD@&P>rMOhFJG6c*M1>hKsYbX%O8ZL8%m_{^9;R(jA!`K#Vx7M5@k zvs>$EYuLkGo})u2YUSCDjgCI9&{smOo-E@o{4yUM9lh1TJWHw2%~e{EMTeUlVf8CMqA8_(G!z3 zGHUQ>HfRf*3rF_z^Q+K{j9zzLL`TBADBB`QlepYaVE?YHOjgjmrRu}I?M$oBE41yC zp7MThv3*`nd{LR~x?5OG*;^Ky8a&G^P;R0&m6)1+-&nsKBrNFAfuvUO`TnwHVvmzTMK zuUhrSX-!vmNQPBEw{LSScH3}R4a=P@VGlp%BCTg&VA7gvds?`mv&46j>>9~x`Y1ftM4F>l2KrnL|uW^I6unijzl5 zkc{`zT~D02&e}Kg<2&6@&f9Mdg|*L5-y3VKB#AhXT4;N<; zsysQMQiD((YYA3(92@SY;ma=9aDON7M1^3xVO^ZUl=Q+9z=v`5^V2l+^xP9xqc=_4 zUaO-{lCIsT8tl>LVo9=a0VI zSo-Oc{H06J3Kqs>=l7Ln1sgxp&UbPeeY>7?M=$4?lP6Cq=R4V+K6T2l)Q>aa!prBX z$r?}CHz z&ai#Enql&X*}=0xe9|m#GZ6%9N&D_EX>HZoxScEA-OjpwJ23URa4{`Z6M|mr@}2Ef zQnj_jmu4cTM%q&WYD}8bjS}P|Zt9l@MpQ>i4IpakSe9p`NLm4f9UeW}SKvC|+0_+| z)oyjt&ZZA^7Uz|>zAJo3M^{dz)%;{mORQ;|!*|*%xRNAb@}}+Ew?|@=>3nNjOWWJq zfByMrjwvC;Q;WlJPS5?AbHTd+l07k`b@cQYhybI1^gn!&F>G&%>$k2fxx~)74lkDb zON;Zn5&MD(s>#MSBWEC?{R09vvO!!K;1HlxDCSP-x2SgQLcQBS)%xTl2CMQ0*d-S|8DaAF#MD zRCi>%xw%QEYUkfa2=6+qkdU6ckP$Y0Yc=nT}52fJ(yEx&)Q79imR@R-<3viez+` zF6L$DvK-zzh-vt5of8>iXd(aM+7v51+TIlAr8g-dpvU5dIoud_;N1@%9<@-5!uF32`( zPcx|81q7Vs%Gg4KXMBKq9WZBQH{4>R94G(wnA3z|Jj+HsmVoH%iq;D=R-t098r+2o zuUuSQ7~Ou`+(ku7+e{6ruP_P+l&cU&d}!06bG?yk#bOpNBEM7sxPglKMG*yTM@{V1 ze_>)u2Au!Y*r>LhTO02zk%5IJ74Y0|?xn%!ka+KXQr)v!LM`Dx3!dP7dgO{{oAZb$ zLCUCI(ML_V%M~yaCTr!XB9rO}*U*u{!7J25K=evG6?IlEC@6@&FqH2b8k$T>MSOhx zt0q$d{>Y_hnMJadAI+mqCnEZH@#>$AXI|gsRlzgn7zKvqS;(u87dR-CH_K9gN3~eD zx5|eA*kB5ZM@%@z>{?Hu==tjIKzDf6-;nG~W#2TCXL$`HN14b|e0#CG{3&mOTB>%0 z>u{c>5|%72*dFrytQI|mQXb&#y+=n!XMWm27eDlM<}58Lx@EbBazaSS@1jh0#10DO z0!;^~t^elFyJ<~pDbW?~?^M%dDe^MsEBf{9HgMe9k@H_=mi}*l|FTo|od*w8fVv0) z@aD}4Hda=KFZMH|9oQ&G|80jgT8i$jqMT6IH_leiG7sImZy&!=4J#WPTRdhc0y=$s zeaDByE!sq)qoTe9AQa*-1UaE-j#zbJ6O)eex4fS14#v>tC;5mu8Iv;^hY=WHsLHYm z0TqF8ujtS*9(xw@G^ zEK-}DoqZyC2e)9f78rQ5ol8^R&MxzHem+xket!OwpdcsekdmU}M70!)5r$Kbsj~8N zWdIjf(S7^Af2P5ncHQg~WT2V~c-a>5jHP!OU~WbQ44Ru7x@cG!7`Ov0Me}wYo%TG= z?G%ciRg6b4k_^q2vy!7?cD!f&es@bL&nAE(WeLws-uLdwM!2${4~d(Yu$i|;(B-8t z;cHqk9=lPb*RET4KuoMHLR3sl8|<8ic^-PI>+<3>-bm;;XB7nzJlYQPYDI z1Sk}aqO-Ju81{hlYA5icb?P)d&l z>+bMuI^80Rnmsl#0YXMNAZ~ce(w591t<@+t#;V+DvTsBcLq;rW!~Iv|CepWeQz(}> z|0jLMcm;z;<2S52VQy|tvuTqu07S$hFE1|%b_7~UeZ2|+PBqc8BMKQLtgi`-f5n8W zgra)+$`yi3(8SJ&I!#oeY${{Camm^VQr2zZhZKQMm?divGq&G9K5*6)!b?mv_0-*= z&-5Cd2K$rz?c29OvsJ5B^;5G_FJHbqHn9x8baK9ZSsNiGk8cn62L5e>asknXv+ev? z{&bVLWcBp)IFB4r(ACwY8^T93xRe5_3(ye_4NZ;C(%5!Uv^_%Re0;=`s^c~r{pR~s zef%{}qa2!8=v8}C*ZU`ST=gw8w2XA3NZ-_~ZaC!(Un~ryDAME6n zMDI8>S`dtcL0l!jA6Ti!|BzY88E+7hXfP9_Mf8aXU#Ia3!3@Cp)lwGw|1M=Vi{s1k zc7X!cN@@D%fM~mRMhm$wxr9}RXaWy9eZ8^1v#-y{V&l(0Ki1truuC0gG7)NaZf9`vlQR$u7FMyq0jr2X=F;HAcdVPS}1p1mpc>i{o~Kfd`c1o`K+eVw{f+ zb>W$OrHrH_{hXvUMtv1!Wj|E6t-t=dr>~nWIzqy$d7ge3-x+Of?OjJM-+ss_^c)1e zCgGj=tvk=;rgqV)# z026&EE*{9~2~(1nfBgFOYd$a^6(K^kNlMTe^zO%xw*=mBG#?f(zqaPW_VJb$@zM_> z5L+nGH_)OehdllVeSVm*+_HOj;yT)0pD>i)xN$?~%o&asBN#*XgT*pH1F;y^{eYKy zh?l=5>0;J-XaGTcM&LMo>Rw)vA_F@Jc@OjHp{(`;Y-}G2_{>{om{mxD6ILp!2(~DQ zSBR7}n&_=2j4@c2u7QEX?Ck85SzsA5S2|EgNXW1PxkVE1;g+laoI(-QOru&!gdveI z?1q|E4BN}g&+1*hdh8HjM(){-g8$ruf<^?$9P`@YCE-mgf&~*XSEial(=>|M?JB`Zf(~Xos`PLOMGcqQjwf3gEFD5dB`*O*8 zdd%wi^T^1d0aS&b7!eKS>E8d;pBTD9I9A)YyQ+O10EP#dVtD}ymhIDR=78G_J9fm) z%s6;W;R8f3gQK{#v@`}#&b~1U5RhKKUVLw_A??nir+)wacTnXGz~WCrLX!UY<7a6> zZq88PyMwApk6B4SUs_tigy-=Jgf9qN2t(4r0%k|Y`Ea3~myNSG(9^4FXao_($Ip); z!?Z~eJ;@iM2`eWyOZRhdQqrLZEaKWI4Nt&#YNQ)RBwx%*z+d%4Nz1viQZ2VzapQFp zfvc~tuf_dZ_-Ocx`oYZ9!0hrPeKxCbwdZ$yr;fo=J3&ync&?_inh_G;ORiisFP#za>##i&LLM`RNf^voPts|Ws z9o{1YAjvpiJ|`DY_AWYyrmz(>2Lzj*`8kvsY|Mqr9tek)JA2!&ZNWCZfB$~9PF9oY z(M^%3?&5`HE1hP}dCF_>@F*|uMwfh`dxA^6|8_+TYcgqc*y_y+on?w(mk!%>VkOTG zaad>1LONRkDNf)@K-Ao{TZwpxvb+4tqSGo!EkoA zrnMB@u3r7c5-WOv6=5*z1G({fU`-=591w_(l187y#lsEwq2Ye66Af|VUXy|CG zzOjW6AD0$K+>fV!`1r9OY)w73z8-w_=6JjsKZ_4^f^3{1oPo}vr#%KYk>T*;G8;Sl z!M%G=g8!EJD~?uH`MJXNBKxRyVQ*MiF`pd7Q7R94DC|Mg`J6>%I(VV}Z{vM=K2$bxLKg;5D2z$z=EEpj&$90_ znUj7AMUoI~=t`yK<$gBfS>f95i<6tV;l_A&48Nlfk^%~E|A_l?UJ>_AFZ3`IyUVD0 zHS&X&<3Iw!<$FWQpd3P9K5!?9l-mn0iyl0B^r&EYX9d13EqQfuQ8xCUX4 zlBmYDAiCt5Oa(%Z+n-NVO|Aj)%Ma47Chk|<`P1D+jv0_Pq=rJ-R4-hbBRyFGL2C?m zhZA;AHc}F+*n7|}+dHaJ+xZ-!1Thx)fwusUn+QIeO$#&BiI<2m?o&Ou1|s91W_q$_3gIgL&kXY zk3`Jg(dsLg-a4x=ikSved2e)pb`6Mj4lo_Gkf>0^3|J8gemg$|^?bx+gLEWNnE+bk zyZ}c5U1WE5mLb2NVb=58kt-j`I*GRdvOxrLp|q?lke&su zk+x(Ii#=2eR5UhK0Gkt5c+yQM&jP{8e1CaGAA;V4ym^+C_dPZm@27O{J6aSThX4F$ zZP7WX3{cm8gD6Wz^F4ba-V~4!CQgX1E;f66dw!FKv+ncM_F>(XdGFv@CAXg^8ZGD1 zqlzzIz9cL^URD&ds8jDJ?|pbSx3DF=vWB!uu|zLsr!(ZwXc`j`!Q!Cj&V?_}vTXT` z<@)gp6!C0IqiZX0b;eMj3Uk9$CTNk#A*$MuBS%g*OJRB>mY@9THs{m*;h?C15u;;c z5y{^l_Djxw6W2{_Vxw@d#p+KU&HJfk--p>HfuUE*s;d65;%wcvt-FdDQa`lMZAL6e z9?XBK!nXa&m%{&#cvYh}6I_We_~M&4Z}QmIc!Puy_U_W{6t{g@n|9{qLQ@Zn+i{N&@oYe?*}mCwy6vP8?9J+r-heFb>y2yXYDmOoDU(WkKDk`>8>> zFi0HvkAcFK4|WFjbT1W&2wt~6QdzMWsfjrbKic&(is&1~|4E-`=>?GzB!7u_33&DP z?c1y8%^~xkE@15U2Vp@r1g5d}B4a`)8vIK-aT&VZL5Nux5|l1qevYX<2~rq5M}5p{ zXb`0b2xq0f-lDxAA07eBamSo5LZJh5mlzlrxOXV>^u28mn;M~fYeX&mBX1IC?#KP7 zyosbarWe1!z_=GLc)X_YF$Ce_%sh-<)qs;KfM=`c|A8ja$jDQ3s_V*TIYBtvbg{fxNw zV~X+o*Q^I)_iOcZLvQnz>j( z4+Dg(JrbS}+t0jx`Lop7|}T5r3kj+(8{BO9h@oemI%A6~4_AkR(E=(jI;3)wblZ=c^ z4Xi&&uyJVv{pS8v9Ma$6soU@$%;WpzmWh9h#bxxyLuOH9i+g|kQPol8H3^sAw!VKi zlrOQ$chAawF}B-4ktvB+&oAd%HGRgapzz%8oRZQ~MQk=4K#a{r@7}?j+=f|ttc-hn zVx%ECm?%~-!Ubp~1B)(L{;389{tuZkJ5>m0AD3qKZ^Yt6B~65vx3(ZGER3s9fA#yt zJ?!H_D}ZhxYhlTIK8ve#Eqm>|b6mj{-7n1Hb;+boR;G%o!Jr30-#@msu`w$2! zeCa$osW1v&F@OB?aSa$Nu|o-PF8*ySR|r@k+NDmN>V<>_YM}vxa)wQxB9T$P7e5jl z04sH3=aqiZKUcc`4W=C{cZ7Fs4H?Z96cko4Uqf;te8{qk^V5I32>5h)ACPFo5Q5Iz zox@1`b?NsNz4`K-g)F8VKh{5+E3R7)z?NQ}VX{0;)g@LPP|6usis`l_uD&Gjj`q5B zqFlI`1_mTn_yJ^%0aTy}MvA%4aezAN?56~Rynoh0jH<$RgB;S$AQ~WD+RjZF!B>3J zB7f4qq<$68#*TF-a%_j1`BmU$tf^>!N{pAJEuj6L!d(S&gem%aS|}V@kP)M%+ue1r zf7LJtV+hzUE$qG|A|A4#r5I4NbVYy1>mx3I!a_h*l+6{o3)Aav-^4YSwz;{jI+5ca z6qG>tS@Twntp{2bmQCiTM*_gIpg`+e+fvF;WUrDRCaR^d+d}rr%$t{2#bb;t&5i34&m$+jDpHCGIv=C?e5g!vHffc-F5Yq#bCpUp%Iilv1R9rf>D7h zS~i32dv51@JTQ_D5%(~6B4(~yU!Cmbyb@QFVJ7_*t|R{?=2ha&X^ zoI;b{YMu>bfzfb4k)BH&#$-gquH!($^`kej<(kA34s$ibFTea|U{L(F7c5AS@>UA3 zl{yW!8v9SUb7;cy;bN|1kXWbLmt-l=H0K}%rVye}E6=VTiW3VSqioQJ;`b|lr|Ued zQ5oh}fO+K4W2ES9BC$h^7sMd#sK0+5<<4#>`PV=2NPtxM^J)3|Q9UUC&>$`D zjI5j|xO%lyYh*eL3>7(=f=fT?bz9ZKgaBV19TQ^=Yd9X%8=$`vT;uXw>T=CHkBuZ? zYx|FGk^oty3VMmSX8}&mK}85UZf}0W)(nFIuYNg4wXln;>t97`|CI}O=d601in%Hd zw<20R_#RtgX(3icV8(Qm0r;)eV52=%K-DgE)54L6LmACPDTlZ+@I+0!8e=6vLQG;V z7vK?jQdsC-uumK>l;ejm_DnhUeCXzO?(2C)pJ=bU5cv`eGMQZoNrfSt+zj=h5$Kv& z(4BSg*o2-yHYaH2ydsPfyu{FBWk{!kRDm-I36A64LX7*si~>ZHRYS^~_;!!G-%iN_ z2S&6BgMBWgi84KvMSDUg45gI@>?PXISDJN1%CPD(Ln70ALsr>z{#IDh^j zPQSz=#0S7|I8D~8P2v$uV3D0NBh?H9m2rs=TXUMhBQEXNafdHvGH6gS?bSiGn(V7( zZx6!Jkb5p+Kwu5T@{CX?$4hW*fKa#+;l%d`!?m!>%!_sO%t<%atncrwj`SF|uZfZl zg0_JMiySd{z$jD`u|#Mzi`PFq!EmU+{EL^eHAkOv`V!^MVbnW=Ge-q1+6JqJySnt0 zQ#706uvb5@1O;YNM-*%Y8WMo8SR0G_OlktL#N1yD<;v91pCA zcc{!JUtZ|a&G(^g9x-n@wsN$m8eTpxvv);BphKL#G_vgkr9kHj=Y_>WpPV~f={)Dd zM2yZHg&K)J-$#z_^tli6uzB91HMjop)djq_sUbK}2VPVHhVCE`eMo2($1X{83PW#0 z^;3l00L~i`2@TDNNqZv=O=;R9a%n3L0%>Fz$AcqWDFI-jr4?~z2E;Q4YSWRC!NHIb zH@7mL`y#fj@$JmV?H3yqo<5(*smB|?x}1f&#M+0H_=}$#PW=}W%NHB}lENZr8esKe zaFXa_#3D{_;rz{m&f(!OgW!wYIwW$@IClwIN0CrLLK?mQC8Q~ylUrssGil2<<7uSST|w2oX~wT6M2IR#&W*dw`~ zjbe0p%!EV z$nYKY@k&ytY>MD%@44K+xY$AOUIhfj=eFRm2uC$Jk?2X!5@x`+{!M7wPR+r!mMmka ze88Cz;w6{Tns8FcfAg%+AMb8^dM2R35D7qL^4Ocjo8G%A*VMg~SGv5$ YSulvYd{7+pGle33`uwSslb3G(FY3L}e*gdg literal 0 HcmV?d00001 diff --git a/results/plots/f1_by_model.png b/results/plots/f1_by_model.png new file mode 100644 index 0000000000000000000000000000000000000000..3e8a73d0228380c795d220a1c8de772cd9a20e26 GIT binary patch literal 13683 zcmdse2UL_zKr^f-G-`zG1as)#xH1ROlUiEf|K)#Hy%82be`|qkX9aS3lT4~r)FGb zxVyjhT&tK28I`!6+cFzwR}ff~CZLEl-7+#E;kFJA8ZWgCTZVdHBy3gw5@?bw$sH?5b6`1JQ*mEd})6KM7^HhU(2VTQF;!Ztc3 zr*HrC^z{37>zY|Ew~7A8MeUWZZ>|acyk4)xW7bk@puWDIK9N1$72IhpUXyLtEidiV zT^=gxI@V*?z-+fR9c;=T@5{C8R+b1=&YvC3?Y2)@Q{R+r*StVg36+c~5iZob|D9l3 z>b|xk*9L?rKa`DhmefX_;$UPx+QA3Ru1pI7mm@hzAeQS9vKh_`AjIIKBSU7%J(Ywzx2Fnb<$K@w@N(Qa$IBxLtd8 z@N1>`;k_$)t_|Pl=9F@NzH{eJKY#yD$H7hfnx-}Js&s>r_r7~Y?YhpKJaww1wbc*{ zs*YChweNVhj^Pof99kXaEt5Dg(3s@Fs>{LojW5g&F?CKUDBRt1=1D|}Q0|$9g@t<$ zAJ$#=mX1wKOytSC)o{hwIMID}Vk_>a>to@J)!i(Y*F&2~xq zceUN*ak(d~=S2I;)OWo1nh8*<*HKra!$xs(dkgaqiUJ zyLZWJ8|W5s3ch>Hif*xCe_Uq=7|k;a^Fy9YK}J(#M8x-WYO~J#>}2YA(BZ3}A}mt# zH4z$BRaFix>&GW2wQOzE0)+G%B9C#Fp2%9VRolUVj>l~ZcsqeRnSXifrITU$*uc4c zIo-VWw1R>{O82Q1Yqvxg(vC!}E(bs;jFrF1=aZd~LKWF+SdUJb$QQ zHp9C8m=~9XT57>e;$YsWG8>PKUaQ9}t?_{3>C^Vj4$NLJv6jmI`t)%|;k*)-r|#;S zgHS41G2c}F2^XbiwXg8D&O{RJ=H=aY=#aWq!E~%~MHtOory3-hzAcaL}Jq+Pjd;68p%-)(??T3I<|Tsszr z^r50cr9yTw&%mW~6H=`AQ$?%9_={Zw6t7y;VVb8P4V`e zsn7T6#${Ut&p($F&L2~wF@+SwTAZUM2Ah+xj=9^OtK`(vC?fYdO9FL$%=ML#z$_Wx zJxXa?M=Go*2b%+UlP(uOtV81I4Rm&PYX11+9Z}QD@4a1DuU@TZIaIhP}`(j57^&eUhg~yTj>*N=jbjuq=jJ@^P11Ekg%O zMMcH*rOTGlcAa{d)sSh~XsnPH=F-KfrR&Wtos5`tnOm^R9XOnH{5HELBH2iLaWNOA zxkK9m>qcCs?dRvGQJZtJ=jS}W-ZwnYe!FO(`=WM$=#_VWQuG-oj~`&9UF^-nICkt< za-&sYGtCg!aVQgIo@eSig(lHd!ety5Xtfhgx$d)$ia!2NpHAOkE)m%+PrWXmiNJRs zezlT%Kh^4sOs?#;DeL9me3MltTrhQsB(H0CsFU6<0jE}X#_QLwo8+y?+UXWd<@fL3 zkIQJ;+hPl_{dtmN1) zDtd0+x^;+A|M2ke$jC_EP!Hwq2MFxGU+-@C@cz9G9a*7=A~D%y=gn(Y{BZlskmn*R z=j~9P2aCo?%*y&?*>Tc`WT$r!_H^vm4Jth!_*4A~3k$EgyLS!^r4BSQa%(fpKJIz* z_H7@2W#71n9Cg%f&2dO#=aETPjgrr<)lgTD;#r(3oR~*~&4N;i8U~zJq^5hFKKa*7!gplOE1H-e0dy3S!2!Y1?Mc#redPl)5#-c@akj zvyP+or{u(Zv@&vc+J8O0w&mIJmEJODW@b8h&M#tB!c%b7J}mznl19X1&Y2N(T2XP+ z_U!{NJe{4h8fTv7^v!HPUEN-E8>xx?Q}1rou`HiEQ>eT7!phfdl}r8o{AwDya02&* za^BTWyj;d%x-d8W;lqb&w8L}f&c)Q!)T{`%ni_5o5;5~3B?~Y^1sg-3us{tXr&%V} znzfv(Buzt2?Xzon_nGUc)~e{3dh__L-mC5hm6d1pQPo}Nz7|_ZcRLxTr+VW@3URV8 z3t6KTvP^7sB=C{fR?!c&1t>f^-iM4!w)?crtNZd5E6gYQ>q&x+Vbf~rhXQn|RS0{x{NdMYf+QUh zliUXy=#n0DS=8*#?-b&Ar2M0CN_;{><+VdGfd(Val-bNisydaLB2+kMtMA$y!hrL`nfe|?YsBv$;osYYSBl#|A-Cz zyzb1_*X`EMH5nI+Rt&Tj8X{C3=xki#XV+{NszKpvqAJm})8AUQl2+xTC{%@?nY(|Q zTI=cQi8A3pYLqcRiBE@(#adJ9FWVxX4m9U7$G1*A<@ls!;BvCMx88+ir&v;4d0_EyA_`MX}V3%-nEM-^Vy_-rDH^Mfd2K7-ZEzvO9r{JC5Hnof+%h zFD$GDm^a=V7j9l2BGFfy6sV@A#v5AT;53v!XA4%@Esl+rI0H@e4tt;!epy*VM9pwk7oU3UK7d%Doggu* zu-8RJH1 z3vQj0M;G@NbsoMTJ@RfBsRTnFlSk-gAKzZ?06qd%pPriPxwVZt^@_DL$gf#NWLd2E z=-Uz?+q0=2bir14#rVS}EdevlLmnp56Q3^E83pPaPtVTk=DUukwK#wAlAL5UWv6f2 zut9yuVtTMq&17_*5hGBNrW_)EQ7K%e`QE*IX+ZYL?As;Jb4U$eA4kaNj3>p%Kj(Jo zSjR6QPzo^md}d<6f-nwE4Gq8Iigv)!7dV`&0MEJ}9v%cSY}&lpq%26ZMAEVUNm6*< z*GxYn?YcCh5&<3_9v1Kob)YfRG99sTmGF%^q=Y%pn!cv0s%mI4*&6-ATJKzPniU9&P;}VUn<&0Jj3qYyz)oa$I>oBO6`kwRSbzNn_ zG?Yi3Wh>e3S+j^I-5V>|(v~h=n*8X|pDdcPZ0hc9;s*jD*;D8AF;=Or^4W13en;-n zSCil?YTt9#yLB1}*qGF>0JN)3oSbH5;j*nkhfI`dR*Gy}et7vsP~asyd&#BD-7l|% zd(0%HWOr>9G{`%@VXtyVE?27cNQc5ndHKG_r?{)edOr>b#VL>)PiP5BHF{s`mG1g% zySN+&RxChT(Pi5c5!vggl;ZC&aKkrLqzR|~ocsd>EXRE7mpNZ+CMoMYg}eTapO3$KWr#>&07{rB4q9k4ee7AFw6V`1RU1|Ah&ESUxFd19-;HB_45KxFD-8 zp?nGD`s51%#Z%tg=kRFKdrIdD)(ACmiCfD7Y&ZsjT$jHpaG&+C)*Qjl^$;UI%8S+o zvSmjolwneH?%la#?fx*lZ24wtdDA^W)1Bbrm9J~MAFt=$2jcx7a`XR;%HK zjXq#=45UQd?SCp;Gq}F(=9cXLTm<;fZuD2p;9tBv^LFrq2WydQ{NH~sXJcd2kmq6- z>W-Zhyj|4(Vhm)5lOG$+2hZ>SS7%ti6BVzRK%MmihVlMWf5;%*u|}sP7jN zaul4M7*I#T4g#_pr*Kgy&iuMXWxgsh2{vEen$1mpy?6I63iJJq>()IhEiKiVUAb~) z5>Qo2XAy^N4N@=Zz~#3hcAYqK-TLpS0L;LNslWbuf-!+lmWWw{;`5LbR5IyB_ zdK5}q=d}Sz8&lJGG;}F>Gj?vNo&!On?&u4s*#IjelasO*hH)SH;y${xj4!2J4|tXN zcpKWun>TOxgoP_;(!#=F8>5X=w*(`nj4a(&K*HcO$WcyPu;_=icekFfd4IYrEj@@6Wey z-@8HE%C(XepDsA9q6EDQn?61uw4^M0M|f*yq2O|4#FKxnOZ|%&`y(0af5OY}4Ycps zwd*+a2%?GT>4hA&YPnXofC?ETY7v0I?y#41nC=YZ=jTs)nuzCbDQA}Zc6W4`e(>Y_ z@b2AN0zaDUDsb)!Xh7;$u3VwbKG<^Ln4#x9og5JWR$tqz8>n^WT_12v4BPVoM)eLO zIJ$TMt`DW9_VhQu{%Q*C;IxX$WKW^Z7y0>q6WNb=xGg-TF{mTzuKD3!BCrC1GOU zQ+brAdDwv@!99EA1B`l;QVU4mkeaZO47HUE_0pBXn|1K!so7b6At7a;JL@rYu9G54nncr{K6qDxTE8W ze?WkFOP&jgq2|@A$v~BerriqUTUFJ^NsA{p>qqD1NlA}Y9>3@9twpYS>DO=PCm4lv z)_Pnf5SQ6@KDVeR+M!>C>mxR8_0ACuf0{$Deg^$!=Uqp_CfwW=YuhJW5PT+QiL0SjWxHo#Nx; zqchoC`RoQ%%|vJ`09g)wE%~{Gw&jnOA8`oVD=n@6>eZ{1-8*+is;1)coHrZhuCW&9 zqOt;vTrxtd^4**qhFY5GVUHd~pfQ?(x8$_n+GgHt(pCBtD9?@-7#O(08WVvhmb=8B zJ$p9t_3PIyF%r(h5yhtE^=k^f+P7pr<;yj)E8 zpxa95PoXAPpXYuBe@5Z(G~0Vk#tow8i|65{L!=uE(* z___f=Qj3g8%y!Dp)&^kwcD_Ha~xo@B;yDd zyLLeBzLVq^Nv_^abL||SXR(E$jj3P-wKX*&r&V#y?!=b%SOkiaHcGHuRQ#kR_BJ{F zA@D23`G)K5V4|@=)k_%%)-bOKlU7pQfK^H)Ib}^R6ukz}>*U0gsm)=E%7y zw~Kb%E>i85#D0p!Pd2Kd>fYl|v0feoJtaXa)xf;=gEY@}nQK`#{NGS0rq^*3sJ~)H z+i^>;$d-0v%s~FUoVK4X{o@4duP0$5Tco!_kOaTl zi0vo~7Gqizdd{;9p&Qnsr8TzMApz}bXU`2E;`W%jXk=ny(Np>Co|l(&s`foQTU!yM z4;ziHT#5HwTyP!C?j|r5g`4l-!L;&uv{DhMKVf5@1Cw}>J z&u(E-hL|pISL7;EDo;$5@|ZiVGTj5p0sSzA+3j@0N)e1Y$%2{Et=Ghf=z_wp1yx?x z4WBYGKI!%-2^40QhaM3%DGzZC{hI>vIj@f(bK0x+F2V@aRitB9uunOQ|4~t@FL2K_ zE`9P0veA1c!^5e@xI82Qn2?!SM&4pNRK#lOvZD+z`!`jm_<>ZsB1E_{f8KtU@r5ZrZxPjd67LmJ@N@_uz;&r zfEUqV0n12_J$CG8QjQ}dcgDoVo|c@?EB!N!CPSqcFJ4e*Z{NJh_x<-TqY%J)S>=39 zc_SbAp`ir=AVq)paGGJs<36UE@9i5Bk__<{;iv*Mk7zpw!HsCx3iuXNGs(d9ODHdw zC%&>$l%5~^NCr>nHK@W7m|rL(0BZrJ5Q{Z!OZO;;#7nzP#79OR185@!Uav7NJ$-zl z(TbTpgsE$VWHF|`S;HdYO+b~Vz11+8aG0&?2L6z(=FW>InCzG^NsZD*z%dPRRek&9E1qJV*ptpy9};9lc{tveI9F-QZ>LJ$VgwWDant z)@QfkSe)$q`+;q*W}%ix)ZfJ#gO1%OD%)&$t^M%*Va}sRk3y9D@p28Slyx~Gis-LU zHk>o8>eEh=vhMeLRmOXWj6Yn&!D?MxU|@Pmv<0M!8N~Gl2iuQi8qBAqrIlxI!Lg2>8?yEd4F%}f zhpGB~t3{-{K~hWm9!yaz>XNGvP9l!|=SeHUVqPtiLVJ=!m#luo~LSlvel|+6Bcdth0twQ zhx^Q!Dn^Q!-1VAcu2l8>Vv%21m_h30wU{-NcVLKNn7Z(Lf6zprVLDr$K+O>NL+C?Q z#4u_+SP=dKM&3AumNOF+4OqLWf`P7|iwRUVpzsYGsF0p@-foY~wXabEFAFz@!$g+< zy@Thk_~$<}{QOUQ`6aik_y2&;>YYG!h&hEgLSXzL(-YJi)3@eC_JBIo@w;k5gca0v zkqa*=yLazKY{u6u5I#-jI-m%h-QBF%U}`m5N(v(X&k`ab1NwW1Fb#5@9Hc?9*3vz5 z=D{ttZLwoxW3JnyKp)_csDoC?sx8dR(*wZ*F;PEzwo;?$Uu7$f-q?Bw42g^S@%Ikf9Rr-z=LiL|L~0N;Vl_*-LiX zOIG9>ppqW^blCO_fI(K4ILbygy}PqB!LZQ7VUU5ld2vhYzyT0>i@ozGWQw+OWR(H} z{oiIxul;jQ*vi6O`}oLcS+s8hMv15gjVV~AGsnNlnA|d#Q3zuRiHWC4L4o;6ZvZX~ zHK;#!fgISEN3_wVPd~X(g`^;EJh_iAxCAeS0YhAmmHTg);r^%P#y=YQu5yTB+zT_8 zCNY$K{+!`5+9f&r-R8{%LaV|c1A&ZR>FL3k>D5G$`Wf7L@7`1Dv1h81wbKc9T(WG% zKIn@mdGv`?)N)K}hz_D}S0-AfCkXzv>MEOkr%RQozs}9xymD^470kpJCst24#&8NT|A3_w?-K5c7jSzy9nspe~0Q(Ijt*8_}d!usJSzO(?0CQtHJdYk;l@bw;S}A1}PkuDYW#|g}L?rhA{DuQTG2r z8vFzJG^#yo)sUfpu@Mo_F)+-y^4`ZdaU)pRH`wWr5H3Xa$#ml~wWCLWBF;Tb#c^u2 z4W0=69Pmt0^Z`Ny0}JP3$Uq*N+2P-(KN5gT{gN(lveiItF)}vJX~1lenD{WjaT~+W za}3-DL@`v)X;4#FABVQw)9lnrV@i~ilx*VSN(Q-w)iiYRpXlP-Y8zjcMf@MQ_=W}dECKOaunBdS|Rov^=*>Kk`qvRbO z-kHlUXLkf02&r1pP>il~>o23Jf!j08I*+$#8PJ%!bW#%%s^G<`h4&&oY^7X@I_5Zc z?tBY7k#}SH^@{=})o}XNLipzCx6sX6#m46E?d@H>uMCJ?THd7Gw6(yUHt+84PA;G` z`Ma7Tue+a>|yC3G8*U4$HNx&H;|0fp(Iap^F(4Hgx-h~M6*@|3H%5}@Gn@(z3bme zh<8y^!M_PjeRF>^F&Vnf^u{^BMAP7~3L+%DSs@a3Qq+^nvYvmEtUxoCyPQ7H+Dxd$ zhQFAp|3&}E^t9OnkKpm6IX&!U{@Pu^A^mQ zFZ?n$ca4o(>O7;_F&K`BBoI~N$^K?r+fsOADWkCk&1lz##X{fXou(NTOn(kYnKTm&Q_d%Oc*G*CzTyRI7n2C4FMHC(1i0 zVJD^KIwOXXao(=0^aZq41M(O!onxwxv3Gms#_Qh^M3(4q>+=w(*bKN))tOC8hF5@m z3D!Z3)9ua#NTM0!>mWVKwEjd%eR)WX@<0uKFPsbOYcUTQV0nxNxAvIG&OU86T4G9w z+uQSWbAMuu1xf7DDxC2jEL`-Mo;fJTre`{(#S!tQG0P^3C|1q6P8Oiy1hf2RP*`(a z3Dr&~d9u~hkPuvu-#AQb=CQR}n9Jh&&pCg4ARtZcxt1&`u9_z85SDg z@EY(;0Di>qNyGzec@^9%148Srv;Fo==r*L_z*|xcZeK&U&{7JM(k8b1ToKA)coIxs z5DSIVkby)0Csm&s!uA#yW+6qDOf znNKvH2QJLoU2Vy_jPi#gluy4{BQmy%n)@}*K$@{o-%ZI|RY7=5)Nm=X=70a?!QUp! z4=wPg!XoetX4o3HaBT3Qj|VD`VQW88$(_WxFtZ;2bQlpS!Nm>~nlrs-S>;ar-uWh~ z6zpU2dzC}1>0}!qQfWd4@bX_`YKCpdI|hObb4>5y;9)3_#3}%DZFU0`HW8PREAa6_ ztdoODGGAt@Z5iWoXIEDr^y#xWYof}aT^E13rput#vWYcCynHEl^27;K96juF&c6CQ z*Sn5qEol=awoHiDxpgQ~L_=Z7c+1Sh!x5JR6F(9|Mva*2aZb>=&--LpwaO4PIHSci zNoHZ{G8rquS7?3Ytx33hEb*hkNl2Xb{;){k1c|VDb>{vzl^x`HcK>~{j^Nn0csW#(h}LNr!PV!#K(ssthZoF4oELR5~^Pznh-?h2Lw zjM&5*8++zyB)r4~>61D`RBP;InuIW?+=jN?7>GRZMY3&|)Zo`lhIm74(-61y8=VH5 zP>+=pAdROSmX(zyem%@R{diG_6QKp;dg?ZmL*T6U+-GpX@Hf65c|8n4z#gpUsar^!X72;e zLMrg_UIj#DErJytG#0ez7@CDGKLnRQEQXhMHds;f*-AP2c@PO&<3t23sn3DNb!IED~4W#0v)PkWe0ObM6X2hOj87;57Qn0$5x;Pj$20q0?ZEF S#c+c{Q8=l5BIVeHpZ^#CITcv| literal 0 HcmV?d00001 diff --git a/results/plots/f1_by_optimizer.png b/results/plots/f1_by_optimizer.png new file mode 100644 index 0000000000000000000000000000000000000000..2cbd7fe2e68c8088f8ac6af4483e324a3457a602 GIT binary patch literal 14784 zcmeHu2UJvPx@}=>QKS_xAVRBT1Ox;G2_{gIl94PZAUT6%6Q~HH2qGW?lCxSM8B_$6 zoRKUdIVn-{+o!wlym>Qo=id9)4QsvidUaQKTUDq2^Z#Gi-`@M&Q#^l`ijtm^L?Thi zoI9gTBCQi9ktn)<*@QnaxzFi>e~CCqU35^fHg<3}urnel7&zEiSUXsl8vf>FWM^+` zZ6&}X!oz>~x2p~gHuj>tyq16Y0FSku3GadP>)UXVEjH&g?MWnR1L7ZwM9BnGT$o$t z%t_U2&&Iplbkx*V*M7|J5ZTOon=9e zSFhi2 zIqKy4+3^kyCim~}1pSv!qB&v;9-A&CqnMq;P(a znaM~~5|5WPk4{e16K?G%KLs`hsVR`Rl9I!WDxV}-cD&)S^xr4!v^t+ZS(FlS+&S5P zu*U8fgRHD<^4Lx}qpmzhdj2bSwKA_p7q5(4n>EJ!pMQ2Nn)t4DPm$}_du%gPmAv}R zpREgLpUjPSBy6E!rEQyE7>rCYZH#Zd=usNrlE!-Bd1`l|8`t5(Rb$G@;?B;_;?q?E zVaIG=_-xzsePo_eU(YZaRl7RRHTH}q~Z)8SV89@o*deo;Lse;1bBJUS1n zja90{O4n#|z1BPn=eyk&zn4&RWu?D9<~;FCOiZleDEVsq9Y)u4e0+Sk%SX>W7fs2P zCk4xmIHLS0Z&hcpv$NN2Q9k9;aAIUOAxy*LyT@N6mDvV%3eIzE_7W0#22!c_n3 zHjDNwbB%j_Yb%pptr^9u$qUni`5VGdkxgpV3teYt-f!hS)HN_U+@Nsku4z z2vNtQm)?@%;^Re^RwkT_8xal;Geb%ta?aj7#kWAJRuD*;gGdY>u_&{V3a~mhWWQKQJ)z<=riM5xZEeY^yZvT#C$N zR{A5Ct`{u~gdZGSo9nQqV`NOn8>40WteZZ+x}0TD;&U)Eg?VLAa;;w1xr^n(g$vE; zy4vEdGwgHXmY-jq6z*{+Gi<-JZvA?>0%r5<4!XU2lS=RJzCUSSmW6Z;`BYomivQPr zGd0(4o-tfPE!otWYoBFUO4*#Kbb7~*9W}H2#NFmpvF`iJ2YJNio@iUub7yxdfBkvy z-o2^mX)fEc7^m@eN^0uXB9En@(9q%6yquh}f@V!N=_w0HgxUO_mGhEo%VcH_9-i}9 znPQ0#)hx2%cy&#Ub7Z=m!WVOXn{k7!mZdW<4#m2~{41Gz*ktUhRg6Ri7)J*AZ zWE1n%MCXD55~i;9Z0-dJcpvZdL%vnft)x8vAnrNEgVbacb?(Tlnen8&iXYU=Y zmzQqf29m70@`jfti=7$6A3ciJD|99PN5kE{;0dpOx`0W|)y^DSsTwj4#rw{k%Em^O zwdFxcB8*(z+)^$?O7Kt&#so+-c+7Vt?6b-_iJ)vq)g)&kJq>&E9XO6LiMy&U&5d*Y zAa9l}C0*L%V{lmeP1QryF{j@5-i85tk2|F*Jt>yHL(L?_yceZUs3zk0phfMcPkaT1 zh5o35`RBwxefm`MhPQaR;o+6#>BzN$uP&$*e%h#Uz8=rrr({kC{wUq8pYzPRF6t!D zip0{G0RCioy7qX-;_Rr&h7B8NkEb*DwV6b6Pf0Mkjq!zeuHU@F}E|{2T z%gXxV!;)S_EAJ!NZaes`$$&4thE7hJ3VSXn86jI@-0 zcC5Z_^#jd?JwoBtKaq{Pi}S;cOO=C*JBrfoLEA-IskNYkgG z_N)|gns&Ao%WCTG`S}49ARp#K5)!MzJ)3P7UlgW_42~BW1qh;CqMsGnE8=a>yfo7!M)Uy%P%WO zC%W>ve*5hNlh^VO!H5j6wN;*sTQ_eq%d9NUay59)4Am@ZOPsoUSc>q&Ltf7h1`R8IuLqk^= zmX=ZwV2E#x6DO|6%8fcu`qK-_Vlx{@no}YyE1z&bF6pg%c;Ga%%w}rFCg&cH!R6_k zW$`OluH;k|1kEt==oKXH)^}GLZ_j$%Wu;LaCTNu1;rQh(MM&5itFGiPUoK^tx7PIk zL?`;#Kl@<+afihjQz|Bj&bauSH`jI9*NUSOhIlUY?_NQf!GCMqlN>GOSe6gie_Vac zGb;0<;x-*tcJ?9Pt-h6~QFQKGR#sLvqCWKZ_p{t7iN-yvq7$_sR~&lZZ%or9OQYkV z(`zskYm$vc-G6?kTVfEgI}`YAa#BCJ{q^hDo_)Hyq1oBlmFD?fdWK8bBRRnVUplUt znZ^sFp(f9jR8@n>;#^#EyLt7Ra7=OqA7+b3FEJ-6#Yo4A4(8eqGJopd{l>v`zuL8H zIS-g5(+LPsQ+Z|M#*KNiE!q*~GRnwQ$4c((t8K51T)#a!8x$FtLC2*&RJ1f<@5#t^ z{@Fu^;q=rs589KOJ*Va6{dOI`7{wOpu8hW~j8j&Nk*4xz6mO4rv{%&4QQPD-=^2x6JY~oh|jdtZv47BQdsTmr+xOwZ={pA-N zNpIBCw1NlH)WSY|I737Pup0YopLoxeMLT=@#=7X!f=*+f4Figoce|KQa+=je-H4Sd ze!E6=k4)!DJyg!CKfm|Z%mwaZ4PX#azFg*4zq+y%92S=B=eNzDLFDyE7sJb!9oJUo zcORDy7qax67EitS>J-km@jh?SP=n`46GwB3M(sj-d3pI>oz>QUNosG$>PX4r4}!X$ zo}M+wYh)K)IM4T*XkFX>#cYQai6&J$`#cbn-D>1WsDMeP=Zfp>SeuHKl~s)=?x0~k zwOCY6&M`!d>D%iRH5u&28hpHe5Cy@PL50=xia(kYhT}ehNXdU>pY!q zbmu!LLCN(oXODNF11GIH7n;;EuB^@%w?;U36HRz`n?Blpmu>le+4JYs%OC8IAfM?! zfb2!7Y5=;^Y2SLQw4ZEte-}5W)3f*5k?1}_*audHFm^Gq&fH>!a3Qm+(-fG6<;Vr6BOM(DT> z$AttveylR4E7n&Yppk9W#hmmZU!JG+wUJyQ0F5iU8xh6qtgK}VzZS>Ig^b*ymZG6Hte6riT6=GD2TI~DvhQ>xgqY7F<(}puRYTLQty69UiY1#=te_r#xeLGRwpPtT> zBSZ20`3H-0<0)EMW~LYv+6 _2pgKA{;-4^%f=W_QIu7k3*Wpl_J4HPJMOfTgsG zJqzDhwArZ*W_OHJZr#fD+Bg9v->mI*+;i6vj=eqytgWrn^o#X?WlA5YWax=g(H}jx zWy=<#rwh7FeLLu^&#jet4mfC}F(E+OZ%;Hfe`sd7vAx)Nt8itmqgc~=ZFMF1 z;lrpmmL18L%O7A;AR4f*7_II5+gphIL7hG7vFNa{urQpK-Q9p<$Tqwtxh@)wKMakC zz&MQw7e=YQe%=Byo$U*dGy~jR1j2}jUdSDM-Y7ob8H3$7Z-|vaFKCtHEsVu@LmRpf z7>hKlsIFE3lv%Pa;{D=VrB}aAmCoDw*%&&H>WXg%uI^cSdF$j8;?VqnT=yby1abKF zx%MWP8{-vN7+VWm><_WCCss`kcUTt-;u}^+E!&%HoSlUU2;er6TmJc_!RdRG=Gg5r zB(XojXfM%HaVq8v$86rlO%69E_)9MLQcI%}Z40j+up7KDo1;B4RXdBWFUY)Ca`k77 z+|u{n5`U~bCv#=;sduD-jCQZCxVOhye|n>%rVSs6AG9j?Asyp3Kf&YdN!QIii#{7N zA|SY}#rOSqUC#4hR7tPibr|&xTiLII^;gtB)$3t4Vc5!G* z6|+;_{eZ)<$QyGUrxfFZUxMgD;;wU!;G{%dXA+4C4v735YVnbL=4B>oYHEal0)DYM z$F|ScgVMo~2v0$)&Wi~5I<&~y)uqW%tg2OiZLyV-N+@s!SZ*{uf8yZhD4k!eaA_hc z31k};^&61{$-4P(+E{wM#IP;FO}Or8(@_2ez9|j~385FV80sV@Cxfbly+Qri?b{oG94=rmDKISl{Q07hiOKQonVA`Lj@|gMtzfmN z?X68z-v$QMuRIw*E0Z#|=P}vi^Mpq?8ST2lDqH_mMoI6(lHQyFSKPX+Alq~2 z?(U?a`JFpxE_9pcV$63ORR_1AsHCJsr+E-N+xCMO8tkpj9<~ExjN7 zFE{jnN9nD$?p9nh5tjG;D6h<_{2Lul$LTFIikqO$VjRw3Qr`@q5Iw*)%(7Yw}^JR3K zR2sJPS8+~e&B?*UxCG>^Zr4wI|1~Jnr;bx-`$=*kJjes*Hvm(nU*6sbVXqCzH zXEdYx_miN%Dmprrl50Z$`iQ2J&Ckx(FDFqMhmfLfGK99(j_p*GshC>o$gEUGWCM37 zW2}&K!wW#R(iq^E^)woIBG_#O zH)qE0aq?BQvAv7s9_dGKPT{`5|G7?A3pZmlBRE>37MwbD>K<7b#lE7Z#>r}Uc({L+ z5y91L&|anRx4A`0v@ERp5h?|lo1OifkHHW+Y6fO;-EVvDlpMHIqEuA6o^+kFGBb1^ z#+HW4vhDA3o_Nc|@BPMGHPWyZ;FF%X=qzdXlVr51}{Kp3x zdO@=w9I#PK2S{(VyGgaV_x$`+)zzQGUx-XcJ>0l?vw2^cKWf!OW}_=t*txl@+lq1U zvhwmxmjlG(>*^F2e*V1r{mVPo)rG-o*R9w>U%pupVdF0%!mUit?bl!2U7MThsd|5q zAklKfm$%P$|mo@W;voLf(>E^6B%vHjznljpbRI&hf=ePcT4h{~P-rioGalEv@x?D~d zFy5I<#!1KKPvprFqzc{Az`y_}8(X;%@JS0Am5Ad=5HhBJaB#3;VsJ2_s;bH`z!XH% zE?&J<05}Y2j)X)okvGaiFgYyGP0IZCPEAeOS)mj5FG>*6uiQAcj$n?>OOE(v(h(mA zYuV?%f7h<~nHdLSipvAUx-ZTuB&3djt!YKtZeRdTs>%!v-$X_KvAR0# zolh7slF`J1a(a4gKBBV!%UWMy2->nm3B$Ua8(#7-ZjaaoK7I_UH)e{58vCXaulL0F z-o1L@Mxt!Ie0hXhui&Dosj2US2TvS^0~@d@neGdAG7YF8%@`W8tb6oOd9+>`dbgzO zrr?Id7SNGT&>5QL{L_5M5u0Y^<-PEj?LrH#shXnBOkN`p!U{A}ntJ0Q2GD74#^&d;&l(X;OV4%y z*Nft0cg$I6j9iT!8?zW3E%Vj~rBchWTinO6%UX)EL z@vk<13`XIHATFq;IFcW?t)vd^3%q56Mu%HeVP;nD(3Z!4kM^UV& zLMo}G8lxW9>+RSuyLUGYfzmi`qtpDOSEKF1=sQ}N?|ioHeSx&s23?*|sjhBoYfHIp zThN_5chEO7Q67}{&tsTO!Yq^r%DB?~JOTdVis%aJFdPsQIA|u0W5<4mJy$Mpoi$T< zdgLUoF;X9MXBW3t?A^P+#Kp#fTsCH542g(H!-frGq-nTE!DC_UJyReV` z=+&tH%8(T3d9CjMTDQJeF8=e&ErNjQ?ChNBcAGSR>3} zWgmR?S$J{rnqV_JL-HH`JK@laOuGwQIQwp2*pYBPLfO-^XnA>Ar+sC48On_27~kgY z40S3O#N!|fsR)`snef%Jut-H-MQFRaF8<4PGIHCBJn{*K-Q)Ya&1OACg`Lo1=Q6aj z)8+egi;T;U_!+kT_%=9b{_)`fbki`ayy1A|fJvKU-)xRms3NWsiz*nbKm#X$Qr4Zm zx9{lH--Ly;50a0vU3h+&Km3r4NzJo*P}RZDo~7FMmB|EBQc_;o<3mUxK&kr8U0OH{ zEnK5wwBy~lQ$9{V=;_m;olk(H3Hgm~cyf3+$+~EvERGv63|mu=p+4sA+qZT}=%cFE zh^T%JFjWwK)vjJmeEXQoH;_oJN&G$Bvm+@Pzl)l) zvubvBuR*jCvMzuf=fQ(7sy6!V6_7srfI$hN5*Qfh03qY}Jh;!H#}_0xetb?+NznVT z7bEDGMU?>?n%%n-z!#2Ywd#Q>Z`EI&+6|mD0(MBl$;nBoiAX;)+%oF)NOxgAa}p?6 zQTqWuX49d%XpjU(3+gBfiqiN9A!GuhFmwF&+lk}O6G6=TCES08F^%DUn>1|rM8WiV zf)ER&rK5{OUL-qDcKiDI9S#}nL0aqF$BkuSCsBCL1p*}XkB_H~cVwFryr^~#%3hW7 zfQd1r?Ee%6nIy}D)zftj`s&@bsL+g9*t+N4M%O(B$-sPj0 z#V639gsI*}BEUz`-OusfTTEPh7OH>9Sp}jvo@3Ae3Y*0nt4c%>6SQe*aj?B4piWo*johPcYy$HCAV$X^= zhR&yKW|jmCmYN&J$Tcn#yM?@^qL&4ag52bwkV~0W+zY=dW z>&|w81R|;J=*5*TQvSPNeTRElQ88}ZxIw7#n9MQqsT=kX!GLx?_2UN-bxm(9wO3$S z$oODJpihmF4E@Ak{$xOCk&BGtu2<_{ND&MO1_D$G^;j>$w_w$Ij8KN5T-PJKW?>9S zhC)qZs)30nNWU{w0;=tgSAoC7j<+Fz9h3k^krHHzGaT@b91flKBAaK$~oz zMIHVi0i+8QWefk%b;rveF zp1lIbkUuMP!Iy=|{r!|ziJq*0+-pH93OY~dr0L|o{!sMSl= zXq<@4)RjkV&nlr#$p(r#3`G&+GAtCXL(zA9$4CEYBMDb=%>AK4_LpH=*tzosf;40f zFOCSeroKHs(P+B=f?2yM8EI%@K*4YxJ*sM8V4ziS?G+{i+Wq^vhZ|Vpz#W;wjS@&$ zJ&5LShA|^nuC@?{B8{2PmjgoQejh>qEyViA1Q!)`0=EZk@Fmf$;6*WmMCOXp@&6~d zzf5q*Vig<{p;1>rpuve?oKay>M{7$0u@5Lrqr%(Z`|zD4EO|Zm`EklWh_VSf0c13#ddU+L-P(ZD&YC zs0Nw&A@R8Q`IT$Mrsb0s2fuwo^vc0J0H;tva-LG*Q7^MwZ7ih3-_9D(3FL234Sc=4 zDr7l8z7X@+4|+7()z+llR?Lw82o(T0#TPq!q>uw8`Vfq6Q? zQc}lPcYYps0-ejw$5$UG&c~Yd zvVX!P7F|jqLSQX$nRdYUMNK@we6Zl=8oN=~5=BAuK6>=1qN1V!(pWG&ED-N4J9CsM zC@2DBF`0*$bRUL{Mu_6jMIiuRG$Di_0CvT;exjp&Y#Qu*dSkFt$Lm@R)g@iA!GT33 zZcrfDgdP7CO#J`sHRY*=|8BC`xnz9h%2fbrV){30N{j}@sHzu@uAy~AurpP!P>Y6D zzAlJEg@8oRkANai0q>R0)mB$G!?u*?vm ziJtu%E**%Mg@uI!B6FO$n3&FSr!n^IY0!SB@#&iCo}S{)GPGT_0Mp(9INu0!N@QfD zTs~g-1q3R`#s0bh2!RerbcOVB{V&|0dG8-J-ck(bn}r(R_&2=)Nbq_CKleWfWZ@)E ze5WhaRg?4I0q-Nq$J)qe1A%8WSKtL}Cz}Ld%7RtIq)eMY4}M9#tSAx33=wut0vy4pSaBF4ld6(X$&qd3`-$>kV?gLA z0}=vnR`YBQj#^OJSH(kdVxtH@IkJrs6j6_)0*Vnh_J7-9&El(>)2(Yo8 zMeu@kev*(dm{q(*)ulSJ{gTrb66eQ9p({A7;Ml{(#pdeD3Q~U(ffvMQgZ_$2kMvs2 z{aucN$`ZpO_hbl=2ks|cJ9#2`?Cm2m$Ye5$?&3om5B85M&4j}D>neA#C+(_ci1GQd zOdER`Q$Q0|!j2QyRa<)@G&Ho7@iE47i!}%YgoPAouk`5hpQJjS>HPeBvCjwvN$p0f z4w>tO(Q%A)nEBH`muK3jz^$5FAha#!T!-~6Z+f93=6FLx!)F=T5W*(~YFX!p7%4gO z53Q3+Ts)$(=6We`6RZ)57*mSdS~G3_Ht#}Dp(GrB;{`8Pe_x-<7$4x^_B$nUX%NFT z^p9QVtZi(3Y_tAioptqRTK;h>j(!zCv-w*pQnkVjV;O5jtnmxDtl;hDiF2`(NANpm2J+)=L9 zdm0#6;cQZfl+fL~cQ57k?F3Yy`}Nn~WFURnxC|&nrL3v%`9QuBIwdCE8Z#Z95}6#l9E`}H^1Em`l~stxVRW| zGu^&@DfEIl4co5^3X_s^^}SZqs3m^>_HNU4nl*Y-V#|vcFJ@qT2-n?oo%433GATI= zXq|APf_GE7vwi$Dt~ojVI{dg%Rs0=~WD16tCi0e}e8*4uxVG)Ofw=!CK+`Z6vi{LgGlO z=8$6=fN#I`^~C^G?bcK*u<8#^9`W(`5RZ|4>k7dgS^V=xX52nbMC zQK8$h!@$XfB>$FJq!ze>S#0!3^c}_qD&fv3Aj4$L?5+c$CPa`EE=J>!U!nr}h-=Ep z$*Igwbg{?~_9+mW%ranqnk$02st;3LmqFc2&VrYm6Xv`?+FySq+-tId+nFQ|v&P>T z#krrzavF?qHiLyOFg7ufUE$d=CH$QFl`F9z*ss_*lB)h#6F?!@Dpm48T(~4`4Kc*% zsFtGs$B7H)o+1VN+W@y5_J(Ye=v0J=SDqh7#B5Yw7sY8voCv{>v{?wjeH32pR9b`~ zELfH9dh$&e50WEF?oe|H3aY`2+k!qthq)NmGf3`S*d~Am!ny>a-P|b&dsKyJOGA0Y zlMc!7;fyvE!7~d#p%)>Y%LG##*(yNXHFz><47^h9>({Rj4h%FrB}J-|Ce9oZ-7vwU zz{VJ1iP?-0q7cwm42?)t?~OP}&F#<)lPK12O2pKI0XL9¥S=5luX%0Yb~n&YQ%! zkJ!5T(TrlVQeV17_+&ujVdV)>%?N+Camc2(8xZ+V9tH-!$jm$n7hP^<4;=BVY8&1j z7W+VbBI^mPA|MKf!>x-r=GuhC1~=ubzKq&)B1;Ht42HxJ@Z^NO1W}j(l7JZU2#u(% zpW+C`?~w|KT!%<)dUg3Mw1{Cm4WJ65Bh#pI7t});j2QnrNKkz>&_4iZMF~SI^TrJu zh7iTVnt}m(~&S-GX5`R^M|$ z3ffaK;Qkd?+>$2GAOCsP`dY=yA(ub4hm zxd>wpbOU#V%Y^+D8xDPdBP5KZ%8i}u=Mfjzg>`@^Z9p@CBuBAN(Eo!?8SwN4p695I zBc7%qp2h`nw~x;VEK+sI56ACcPIBwyC^C7>JOWdS z%o;YQC!|W;Ol~|M25mfKak@EGlaJaI%}e9t$&=oGR9~`xzZAq97#|2*7;ZZRTu3}v z_TQgJz&nIpL`E6!&34+?ngZN{{z|78wn|0B9WMt}oy%+rzc|>onpW&HJ*+3hBuBV{ zVc|&D$aoI#cRbM@@3B392;VK(=41#Pq5-+fubWWNu8`Y&Exda;pNE7b6fgJln!$U8 zP8x&7=eqY@J9X+NQKoQ4`h?3Ed>8DPl`G_pilW3u4iHAZOw-2@ricu_RCU z_F<+?+st_F#C)fHz;=dXUl{uEC(R&>XBI~@M&Zp1&d8W-_IgN4_opEBUqL(YVeZ32 z5!_B|&qVCE$&{h*`CY$G>U-`sVcW*EcdzN0GiQ$J?-p($*GiZZe)I)b*lVT1C&KAI zX#u~Oe?8gc!r&-&nI6m)pL)8EDV`8ukZ}L}@Cq{<+wLS=?43_Z?-4B4n0QC*THVVw zi!z+A&SIBitFAP#2d45@7H#~e1788XeU;op3ih?$xLMKEZ!P>4$8t>q*xry85CLJw zovlL4EukrxHT_(psVD3@_r1G1JBc=f0awrV*|%?3h{0s*uETY(b+XDVf=EYAfylrb zQiLkSWA25p$GKb_Xr~C0d{1@Vt}$7S3l$YC=2ciJq~^SkiOKrKMbL2+zwXhPHVQ$% zlqzFSAD6xFE+8P_h(~oep$#L8YUW@j$5jdG7mi<^@}@NH)`cGQkgxYeZ*bm3f3EOH zZ*rZVNJAw?*QhblH#UwZs9`B-Y0=YfF(BwJj&wCt+fLKqqZ+=PDibA2gLH?|W6^${VP%daoTsW&} zA2ihFbX7^|`|@bjFSjGqFFt#G$k+d+y##}VW z#95)=9CpiFCyHNLUrxL0))Kc_TO#*~pXlOVTz?++%GGVgzPNM68U>&1jucvRRh_ES zP)5_WbzOBW^Ml%4#k*HfC|55D%i7_-zMeZw!C%V`{u8g^l^-iBpI7K6SO%Wbd6d?! zW>ygO?VU%$d5EXCeTdqw2r}hJRdHL-O49=CUp?K1{siB17w!F~s%%tMhRy7}?v0snMh9q`% z-QqF*^0`jqt=SpMx!`TPcEu!Xmv7!x2u(ul^tK%#g=87btLa#Q`VpCZwMK_oZ-4SsnR?&CGc9FO~S6- zAx2(7VRB)-YJkIS_Hj(!-41uj>)BmFKZ$18U_p3HS`&e!jYo@YIWzOcOROEiE2{+6#xpz3U5I=GFUyv#rw{Ck<`- zYAV?9Y z?;G13Xu%scY>>Zt^@W%heUz{7R%-4#I>!3mnur}+w;pnHbKA_ocO}nu$hShbZlbR) zcu08W>s{YStN`su8=c6^o2aCKa&4uaiolp3Kg=hGTAQMTPM=me-Qk=TaN3MLuPwQZ zqe$JZ!zF8Kq+=v8?dFFK&BnZ0{CqXZxMEDGYya{2uBWmS^Bv2Y2M-=hZ5_%TD`lJP zPe{pSu={>$DsQAPK`ufpPU7iN!_Tk6OG``Bt!Oe{4E*v*YFWzf9&Bz%ROP~2T?%!X z@^76j!ZRH?dNlsVy9b7~kq?V~_NQcZxaO7SBz&4@l`tz>%oMO2K8|Jo@cw;oUrcDS zrHqTSGq-i`m5NYNEoZ9zK0SAVo8R8v7xPs`bnrV)#Fi-sk9L*>wk*7m4H0VQ)~|l^ z?Af#IXH~(1O_xrEy3WOM7QgWH+|=3KtrIOBsN3$g>_YZ%yqv4*1p>0kQ9aLYgtxuD zz2^~I^2x`$IC!0=2IDf5-+enNCntAQK%hlvd11V#V7i^lM`SK`baIl1g@vWkv1noJ z@k1{!p0j7q_B88-@{Ht`K%}YPtdBT$^&HURry|(rOr$GG43pRTH>o4R|v$-!{ zzMUB9=$M(E_20v#`{K(N_3<;({aAl4J#UARg6ZVg?cC z&(_0VnxkZNb)(mB*l?;z%dH?TF;Qq}!FxC7Tm1Im%t}d)f?LMCffAQ5pDLqroHi?* zjn3(d4)ml_spOJDlVrV^{CQ1UUWhX~yzlDi3E;Qt*^S-dLikg2a(e=dJljl9`-m=O zJ50CPRe0{$VmvoFwuv8$giIjsw4)!lX+$R7x6M$orv#$e5 zuP#b*7j0m9fhJm9Qc^N8*j#s9Beyc2$UQv_)re^u@SkNZ4vS0R!?Q{!12f4-n_PXZBd~`OLGb9*RSU! z!JcMZA1j9<(CR$fO>L-@7883Ar;-xVjE_mqlEq>24+;u0^bzLtu3HoK&Vw%Sx8Hu| z(yu;19eea>Lmg^j9d<#l`5|Qj*&(+c(c$FPW@`7~HNv?S&)HL9wYTikijTXsLmc~A z4u2YKFKA6N;|VT_Uzq9i5%YCe7%h=iOG!(w8oseDdT;52C^5x!?2W0l)qB z8|(7o=hxceej^PS*nd#z2AMhvR_KHPH7=G`aH_y&dUR!h_MTDfv1Nwn?VLT)qf_E+Zmm=#PuzjOC4 z4_SCCyWuaY-@bh_r7|K%Chk0OjZdp}$X3JR!;ZP7`H^6q1m&bZuKfA)=em&7?CdhZ zf_Aq2W+>9}`CW2qq;lV0wT}Dd`=6*&Y;0@|MQ&~;U$Se4<5?5cP;D~hy4Y775RK(?3 zNJuh*%xA-vEsaQbv8bO4C>ctb7T+1Yzkk1Z|Ni})Av!ub(S$;YTJnaw>*z{LN@(dc zk??rgk27;~{p8FQwr81zqVBpDj}L#z8TZr{QDYwF$sNjUjZd=r`ep@gh+ivIfEOuN z!qPh_JNqiIipr%+ze*Oww?E~I`y`~?{eJ1`a;#*@h?b<3RDFoB)5MQ=bbmnC@o@zm zov7s8epae&YnE9`$KtTvwQTe7V()zk2-*{x`BzEVw=!n#F)Mw>!tN8n*Bmz@r!<`x z6&>Bbe|g4Z3Hda^Zlv9?KK4m}LqZIvc5(JZ9PWaXlhd>$R3MATMazUYkd=)sVWBN{ zc)I(2P)MKfn-5-;)j{9i#|WjSr2*g^FPNy6ynOkxe1w=+kWGJ6N=kjbBKZ{vYe&?T z32M>8#l^Wvg|laG1N6)U&es&WIJH@n>~rk#*PEy~-4UynWx8SWW+fZ|TrZK;hkumM zz1VB75%7fR{NqQDzLXBy=Cy9l_nqILR9$@q@HM+dcKe#utAi}0VhFHso%!Zb*SH?n zASa_>^fWhjdLu2Ve?~9VMOj6|M#;~{aIZT3Y12d;y}mcMSFsEwrLlvbOHww~QPvnASAuC`4r*@_qNsd^Y|2!|}+dD1#qge^<(^ ziI(0=ZBlaiZiHh+FXEi7TNN@SU+A{%N-bH%4UpZvc6OUgaBDzeintG>A1KIJLEP}7 z0B20mq#j+!MaoK0-xMAZ)08%CwR9n-s z@fbu#qPx30tBP{rTtAz{@&W1bduK5(W&(F#Nh=}levBScBLcvu%<)z4kbm!V+Rk07k3H zLES0&V~^C@6jXdF{d#-#k;%?lG>+fR;V>S05oxhjV$Aq)Qa$#D)YFBh;$OvI|^# zm5QdO$&M4f;iN)<+|)cdEIxrs&%(?sUL`yogba}k2r#g`G_uU?JY$Y^ktA?hM#h^| znyhhVX6AueT4(h!Leg#ww`4r89KZ0_ffOYlnch_YA9wT1kvHStt2EO34 z(MXL3V_l#6KfjJZ^!tT|rT|5#&b8g)h zhH8_nwrJos!;V51(5XO_<ek21E75`RS$x6|JOojP_@uqM{!8_%s0?CIWOxSh}R2 z_1LnHNy6XXpPuvPz5IfL23#ZUnw+3N$W^Z0NHUUQ-m@%|R%OD-&(jGrX?vbKcaCsi z*3+l6N^MaxZOTq$9TOB(BV-Bm>kDJW4{%STH|{HtkU~L=>%mL*@tL1NrfdQ`ngkIH zeqBT&09lL>aht)`d z{Cv}li_1ifIH{@NSY%<`Sy@>p-fxrrSXEVJvPRmsF-2=6f6z;yyw2{%Dju8f+n8cp z>0@&pl}rwcdB)b{;O$b1iqD?hs;#SQn_8YsapMtooH!XC9!?N$x_-@DPe$PuX9Rww z0CJ+m{Cim?FvD`_6BSrVleh2QNh7IBpE=_$0hB{<6Tj`?3&};7`6+*#vyS)g-|xmb z*tT`+P2d@6N5|Y*9HHPzq{?+x14&u*Ad}mE{`tCT!K8AUX`4Wn?Cm|;<*0x9H?_5a z?M_2p;6ilR%x{0eSt3fm2rwEb6D+VNugw$)RT^Ztf}mB~!O!0kckSC}vUUqYFK=VK zh=uZ%E7!qZb#RMbSy@>RJw3Sz>6;xb*&l4bt`A`Kci*{*s5G zIZs7D*`S&dma>h3!5BqGAKHfzw!nIU4K1!c}zcn>A zO?{MSkjeeFL#r=m6vLst%;xZBF>^TUNL2sSV4@o;P@kJfb6x)dbkBfkEQ2#k)nrW65KTY_3hWKyD5}c3?ZvGQz*whD69YS z8W@&v1-g>3)6}s82f|9lGgndEANz2M;n2zB-dk4*%3h?zzr|Vu;&%6$QYh|eP>#%t zIj$Y0D1Cc>xOd}*4POekKzvjWx-2QVW5*7qh!o$0)TO% z3gtTg>I47#Um5An0Ffw3NqGVY5qe@f)M7Xg9U1viH!a6xs5Q&3k!T~jl|gYxlAlbo z60il=3LGcz?}kqW%ucKQYhIUn>{&;6_{l&UAjVL z*Wfm57t&KTHckY+e)i;vMRBJ};?;0e7o+LnHdIiEcVc3!_~k(6c8$P`zR*&4a%ieY zxPnQs9y@jkC}_i`O^MyH*a7DHo{_8)Wp}WF41>C(kdShpF&{l@2$_?|@Gu*jq|4ld z#e@A4C{9m7L*tsl&aPUVE?Qn33WgF+y*C#kFuJrjuLSw@)vJ@#F}z^kf=BH=-i0eA zC>sD6v3DbV)&=of)hDQM5Pg;iaZ8IMZV4}6zHG^avZbi3+~cfq?b@}n8r)Lu`_9v6 zTa9_CO~t#jQsZyw>9GN%Nk~YrccXIQ&N7IC2+dFW#mTc!5J65=GE8)C-8y#c*vFiU z7cT}kT%}OXw0(U|*-lT-%E7^r)eutPG@YBYA-HsR#F~vekMZ-Xiin7?&!Y;zgs|ER z;zOw3B=Y^Ijt(uL@`g<6^XJi!1!+jK=2NRyQ7F5FebyYnul-O`!gl)fpX5GN6276= z4h{}2IaV=ZK8zYB`=I>kRs{4R#;#==`vVp97!J(M&9R<5S*?SsP}zOjRxwF{kzbaP zQEp1sXee;9BN`ki3Se_kWG5RJNY6Ept**U*4SwqdF4MH`MXp>Ob*(XCBdiEe9#E2XX_(1f0oVB2l zaGX$qw-GD!0mKQ21FFc-Dcd?WQZhYJQVkTwLAed_ij9VSrEEw?6)?7won; zpyk*b?!gTOkI)=!Eur(ZwYBW^uPBr^7b|r=IYR^-&YioHYu)z}XpX-O_`vVkvq(=* zPcFtH2*{fmg|0$VRkpTHXEJzuZ+%OlOCFJ3^$G~w+skXrH`HJIiKNc$+aqyy(Ozk^ zRK&%%8IDi4YO|f`?(S|$H+Z^r+cu5TZQHhGn6~B8XFT!qixP2}<9p}Bq+wOTL*Bq^ z{P}FX1&w z9KYicf$N|@TB;2-=h>yS##hIAauRsy`}8Rq7@esj-#qp!TUw?Td+s?b(GLk~%YKnW zlsJ+wC;M~sM4%9$Ewohp%L-iZEiQ;e0TitQVj=19Cw5`cqCGw~_J|XLGAla zO9+>@JfEx^nZ^?B`o$-SA|Y>R7?0oEkYS`^Gt#aBiBiSYwGechxo;*a4wMV<;bV7p zV&d^5Oiahv*(;MOAkrD2^paM9-*3N#L1^m74L6RsEjuB}X#8e@yLucbzN)%9{Jau4 zDiAXyAADAZpP#={)`p*9OP1bB92))RRGrfD^2F4NIE9!}-86&#`Z#0_Mf6CZ z8OZlpN1@bjW^~z7{oxS}ZN4kh^$*Eciv0#)2+Gwh|HEIsOIB6mtTV&~4nP01B|j$BvKT8dQ7)gMvvg zkaX{Om8hu6K60C+r)LF4D8BgsiP5hAI7TB42X5);u;BawFbJBc85kJgrY%gN^YQcx zqH+ZGqjvn4F{z*vtmQg;k;oZ9RSw04(DX{1nzDL=u^%=8z$Q>Gx9{Jdf{qS#3@?+t zHJ#}tJ>A^~Q26PE9j!BLG+ic@{QUg-lJkZIm>1We%5M2r0x-EHGc7e0U9Lz3X=oEC z$tfjB^}W^ML}r5iV&($UpbZXE`LiCA8mE^-8& ztTI|&0T|L%L!)RU+HWD?RW|sKukJBFqnf&R|IdB37ot=I3#tGpp>at>1{8~gq(EQa zLxB}NnHgps?NRN3q&%(*_MjdgO|#f={*j_+PRA&eFz+Aw6jJ<-N(V$vOjw&Vr(OhW zsCQqn>RD*0md+xWBoY}xUSa5tY~FL~y=n}a$EeE}IH817j}6_vd$+x^3ASis!rzZNAFhQ*s zAm_DTR1?(@z^e|*K2Wioi6G=aT-gR@k2g_e+AH*zmloq=W3Qm|Ea`w32$7Zc`R~3} z$15dp>z41PjzLrF1L0MH{6p$5IQ$}13-o)_;!W5NA3l%FP`2@;j|UGx1IkP?^dL5xCY*~oRjmK}oi)Iz&!G>M75@6`PYzB_ct@G) z&b@n4{SWz3sd)`lrhluM5B`MS|Dl>mG%8fLc=V}MrywOBJ9Ow}=?~<&#F7Z=8{%jn z^mgOU3pNZGV%HS$&sDAzN8e^s48Y#a$; z@{5UFg>qT<{^7Ps8i@Y?#j+{mLaaIL&mLWl*qleFKN5b}kp8+#){CDV8qyqc)hI$= zgVu)Kxw8`WK^jZPN3B@-#w>Hr%0oAQHj3ehc#zI7XKkhT*2^q*9uoS@MF}yN;kZq- zIu2MOa*Vf9T+_F`fD{d0NxbU?o8m{@djB6ADa`M z(tg*Ns$@YAmnwo+ zx?6on2$8N^oAK0IXfSB-{eZ#U7K>Hf*EusboPkc<2(L;|3iOP42uGtqb{i;_QA-xT z1lXzUy1y<*vu6Wcs!_X!26FuMv%CtA18qsK2Vp`yIKJC|{y7Z7Z(nK|yP(;=iF>uR z@~Dz=k&zeBA_y7Y;%;@1`RpocX3N52Pr^yc)dwoVd-vO{I}rDr`UYl#Rx4NgVLDV) zBkaR1Pr1vNA33|Y9FY+BW~f2eu!}~c$(zlNb<>hPK-6eglU7#$`paQ;%?|WLY3W?} z*{*Z=8O%^sDx&xs`wOc%lOOoT=JtOIlPdSVmThmzG8;*J8XT;l6MR^FCS*T_Vjctl z3R?tem_)_I7?nLg(chdFLloSFg$33DJzZUZQobyl#QXQ|>3{s~Ap7LXRjV}mk<^=q zoU8B@e3dAx0JfQEk4%czoi15E$5MXj7kZz49S1Uc|yL))4CjM2!}MINGP|-J;72X)hxp8qjuV zv#ygSs*1tivKhvWXXd9zNVa|Z_N{v9ySHx@6&0{H+QJA}`U*AZv87(GDt*oq6HaC9-wMM-(1wF(zYzV;k zQ%9I#&^T36QMb4cX}|eDCNv#ucK(+dMof}gil#6cl0||;8-7bkNg3~JgceXsd%ffD zX%#NT?nNy#pS2ZrY2^T4`=cZKnbBRM` zZ+yY2y&dIVFT_C%Wf5#f({J+bojcnZ8Hc}!%yw-f{R(-?wi_Uc{y zV13E1&4x-4`R*RDMd@y&_A;@;y7_a8am$}_jB(6Qtl0;n!m^i zra9d^^@yZEG{QQ@Kq@r`h$y$Mvkpx%Vnk)vfwtTL&_XbY2{Fj|rKF6gdP8%s=fS0X zp9KWO!$V~=)S}duXV!x9Xuao1Hq68 z53ssiB_2yLZ-zKDvHM|E%(?Oo@wAISkh9pnzYB)5{@SR==&;g`Klbucl9%^|)vwZi z;n5B@Zu=kChuRB>f2g5x0PjM_FczJwbg{qXI9DUIv2S_}f4>C~$Gfmwdjb;q0Nh2& zXluut{EfiS0O@FYq$33xfH-!ZdECDbBU!I!i6Fxk#KL9L(rs0dmwyRq9BxV21vSaS zqIsxhqK4RmrCMxg9k#Te=|7`0LmPkksT!@R42w=N(zi!s&3(N~2TFu61K+7k3JVL% zd`A$?uMiE+oj(t?vpcgT&2B_9ODSAok9ImN#YdQ#%h2{UaYM_-8cSuT(&y` zStxCo4Pr~Wz44k$zF;tZK|zV|d&k45KqwgWBiI)BBuY4>q2e3DhQeNgY{hc$;8_a` z3sV!_Teo73nvxA0;{CwF{GL9IAkphwi*UO7&nbRz6%N4v+yJ9v7#!cRK%K-Y2Zy%f zzq6tL52gLTFX~qcaOD5ELvR8naTwai0_UKS zuo;0G#QA^D3WHuYKr+9Ii>L`8s zGro;a8izxvzniMnh@_~K)tZn*h0q@sc7V1Mz1+?vG=y_Yv{xMC=7x%?X__F6(6CY4 zA>hX;2D*9$vOJ`|_{4Dl3M04{2?K9{HedJGy|^oRzawV}e9&I;v7|(Nbab?|qQZuy zs-i;4hf!F@$rwgt+u<*9cx;uN<+zxT+6W(!czA>o5Ll^zl{4dnd zj9K!KM3qOIqaOBU+J>j-u)?WPwG8zC=HfqxbsoL7p0|_hhT|av0?=@dKfQCNY9#; z^)e}un>I~Y+p(*bkKg}YNNYE~yi;HPi@%`1L2yw+B$1*N4oih$dwxz#o<+_2O`Fme zIGXOi`O^pRIll5q3%zTz{I>(c&`bHV1ZyrgBN9hJHQvvvdej3kMZX##IOa^g;3Kc0ewt5;i=ix?CrCyia5m}=&QguLC>up1}l!T zmh|fl!>Vn9TYJ`4Vue=E4tt8EYYnaVw{5P2mMjV$pWS^n%kVX@wiZo6!bS+3H zkTmQG(b^6n4AB&KX85DdCHwS@W7W9 zfv16EB1#wC=36`JlQjf+%-Yq#IQlz^iZHW~V3eHGvvNA!RIp8mxcq<*QC{jWK@kat zCcHG$iBN<2_Qx5sfoM<$^d~Qo=^L_uxSu#!YtaW$mXmwJ#B%(2dG`yrJ!m5JU;29J zX)-1~vk+-Wi+jRDbm2CKxbWt}&5XX0M2`&OlLJpmF6)2yX=y5}$N=j?q!j2flSrif z04qwE&hTRsGq;O&n3|eugi6EhIH3>u=wn$K@u1Y8IUrQsnmf=nOS-m1nSeOc2X9xs zl8VL=2@*FYyP<%a>&65Qz!yz7HCQW;eS=wttSi`=AzJ<+y@tVODR-D4i%YklK@43y zSOs6I%k4g-b*H~@{+G<_P4c!{GkqP)lznUZj6kVu7kr-mKe_3WDFWobX(LV(!5cMxFZY0k_lzF0nnYMiBllDJ# z5sXP$BeX{_(RI(FYG6 zH^uG0i?-*{BS$`55EG+^#}Y+XdugtzC;+vm%4TeeGg zRsN0L?G?Wqf7(*W^l0KN*I$E=(n#AD(gfweoBOoYNu(B!Z?9=unj2p^-cUPA_QVn71B&=zyHK>22DQAEZF$W;I{g+?9 zZCz<-*!A(rTyKQW1ctGCk!l*C1=PdyJoNeXJ5?IN@%Uj9Rnt*ozw1mmp^`eK+i6Sa zk{FOGglRmH`9ZEE^&EM;*TJhqUX4&4c$3K32(cSM*=u}9Cz`lR%ef{>6S`oul>bX4 zm43`-RzqBfL-a`aip=#M#tecCvD)Jq8?wx_K()&p27%Hh->jmGMGF?)GW$3< zOoU&IVh@lz_B!1jiz4~@&N{`Cw2luOzTOPEn8%JR+ zoOMK|U>|p|?1+{L?ss0qJ?{;y&_x+i8nUObX|MUoJ+gC}Lf|W#N-}1x7AMe^A`XyB z$>YTEUM0Gm2xEbuiYLedF@LyV?R+C4J|y5V*@r^r=A?juj%#}+XMf*79mnVtf7M|L zNUDlhBRtNbLnL8Jl$iClr|J3NKs{isl6(5Pz8HNeMKsHS{_@hNKWqEsWHg~QK@|z@ zO<{7uh-N9yVqpbD>u=3UVRY#F#cL0j7!Irg z+DiaQJ8(KJIV#sP56i_pbe!z}w7XN@7DMGUH}saFQb^*ld{Igyg<&D2xj8vGWE5eZ6o9rPu{dg0Vnl*h6{g%WeiB4YF!E#9ryygsI~p-+*AnYm@{*GH7q zVb=hCtXv24uwFnR@_+~pKmn`+B|y*k3u*eM8RHvv8&tXPmT8t~*oZ7ISYcGd#y1k^ zM;>*Y)N~Mh2j#^r7)u9SC235sRajNJSbi@W^M;Ly{%|}-;h+Y?uY&x5;;LRsWNc~^ zfD#(>d;?Y#K5qmy1XFrA)Nz2L$Z$N;SE%#YLjrNzoX0DCBSF++v2RFwwcBKc!jaqc z;q@*3^hpj{9Kb+4fT|%p_N0>n+(i!q;BqbEFT9LBuiPOSKvfeOmLtwU1N_yC9m|WM zn9=Y3;`U*OiayR9d16wR`jHdr;jx_zU0x6;lO%|=s}Y>AxoZH-D`2;XzZm~;iKyY2 z)-R;@iV}cNun{4wQOaJlU`gJKLDwQsU-DjJl_3y;0KfYBdiu#(BuE9Qdfa-I3}mJp zc6PUP!v@EJWaa07 z`C?Tie3XglJ*<-Tw{G3a{<7NTWhLxUH(_2iAOi|0`Z?Xt)ymNrH1y1uekEi$K((Q+7&l3?09jTA^G?N5RU^}H@&cmWHTHUVoZM z?~o}tD@gpu``y_BIWw}~Y@%dvyUbZ9U48x5q+vDBIFgba8PHad9<8s(NT#mS{yQ?=hF3z$y=jt tnY|iL!{Z+74uVJjQ~#R|qMXkz@44*Xv*~tf8$bs|;-buj5@i}1_41rkS>uwl1hg(f+!%}jdX~F0)ik)NyjTG4F(dTNF$AO zDxG&O+~4`m+2`Ev-upb)XK#IVvFaCdjycAdD^mTA0ueqfJ_>~*Qc^@~qEHx0DAc)+ zxY+P7bF_BI0Wnv3eOE09D_0M5=liJJ=B|#999$n+-(zyW@9bjjU@yok#w&1@>7lEu zql-8npWT1_0I!4d13u27Y)!~N|~ z=VQC-Z*1Nqqf{r6t#xqs4oyDv-i*nDc+S8^`?aJZ+dK3sOU-&b0KH6oKUC<1;} zv}f}M!k>A(_Y_&+SMOuyNcgKFP8b{VSNe+y$WOVN{XZO3!VOZ-zbq+v6uBc9aN-Mp z%W-RKYb(qN;~3r@C^oe9o6r`rx3~9PtsKnem3#YA#Nj3hW|3)=$&#k4H~gcSy}gNf zMsE}O`=n^6{~4X!PFZ^4?jA+*uu+88i)$H!)$5KH0Ny zT^=BckB@)&v&JcAu-5O;UEjb!*Lp1AWFO~}h&}pnXZ3rjsm!C1iu}~VtPU7TCD+AX z`DHh_C-bu}9ThTM92_{gxw(nsYCm#sf3L9q!PWoJ+IoM;JMUn@$KKY!xG-F~;^Ft{ z@&G;=xF2Vt^2*OzEpqLnqoZ$`@&s26s|Zz+uI6uwOSmqm|8lGISbyrXyLJxsHBGE7 zj*XePS|h#x-2*jGAD@|p?^(EngdzF)SH?YOh)~T*Jcc9`6qpJM3NK!~XzuJJ5Pa|z z?e1PRAnZz1U0YlGCgAjBsLq4W*U!)V-n}5*VuPu}^`^n4x3)FKuKJT10VlFe{>Lyu z#MIQ(sxO6!_V@PM5e z|68M<&(YpYtbt#jjSv-|389*r8a&2pjjUT6o0~1krvCYhF6-k`_1go6j!sUGHS^Ih zX5TAq_2@($UmGm>YJ1IBx#~Y0{^!1aqp#S{u`zRd`=^0{fdX<|2Wim-1LsjRG&C=y z{lt+&j~)>|cKtgfruH9v9Z@{}3Bg+Y`CCxLmjH z#q5#%L->?lLN|9kUBZLkvYkX)*$RG;k&|Du>ErBraqWDD z|3Us7W9*9;7q^xNFKFeeK2AxYDKTk?x~!5k*_W?%KWxQSEZ{^k_x8)M6u}3rD~n!#xo#a$PkovNQu($mK@H{U=;ms_tSq_~*J|78C`{mzJOjn6Kx zRo9DiDE;@YSLS*$Vq>KbaL4LA6wJ+;kteLExZYiLE0!U~W&Z2bGc{@1&Lp1rq@*zT zE3=SL<;apBtOY7mZrvgjsw-WB`uJeGE#{Jl+5P)Di+U+8n{3;&v$L-i zVi^<_6`NOv-%ZWT1gj+TPR+#1+|t&jAf^*l|0U*dP!n(}jWQdr_i}V}yrHX01yk|# zm6Xr)WJ~bE@H-9^lZXhl-@#T3tPBz|vLM(+_XbOh#RJyu$LsN66%$?(CKMJHR#a8} zu+(4pz1%{ zr1{GiJeb+y_>7MqBFV2k8sdMEn##h&go0Q8_(M&m*z5FQd%3;4J2W;{Eyc~x*O!Ef zDy+U)zz)>+`N~NUvBoIy5wNI-XSl{%QZu zywH00OA$5St-+uD{W{wD+SA2`)z|GHY-Gm7#O&>iI@pd7GfH2@Ai2OOU8KW-+Jvat zKVk(D0}X@Jz3kAqe+!doZTOu+;l!vz6AuPSz}{3ier&$$MAPw(VXa>n(Is}SMCEI| zymtzvL`C(l3kgyF=FA#>C)L#JN`b144GVNp`S_)* zQ1IYZ-ko<c$|oW!-&el{(0k zh|gnKRn&J%j~^6x?mYIx%Kg2)hs8d#?W7R1o~K-w_TAg~Y~S<`F}-*QJaF5`kMf1b zm4ha7ckVp3a`uIBcRV@rs{iv8ZZFdkkqEvQ9GrU;XRKwFpwyjz@gl+ypSC%#xCluhlu>e~_%6 zuB<#t4+uZlT`%1pfuwiiORaZBLxJ91uHH9tI7Ry980T?_(be{28-N66VEaM1?lThd z)0fxVF8lYvZ7O!wmzJU+w9!fX){N}P_J=b^E?rkp_$**| z_wL<=f#ORiCx;GKP3kXBO-+gV9bB)n8^wNY**@j;$j+{_;!0wNaU zlUH+YHa1)!U#f)w%xhGmKDT%E>Q$yF7RYd0<7zrrO&jUR*;Q}A@*}0Cje=R5hP1T+ z`);5oyM96f2Pa&>4pvh$?Ccn)nNQ{bqD0-6DDGrP$9#&pw0z`aW?_-FJu);@^K+-* z=%_FglHbBalMLMJb$;!Zhr*!xCL{}HTnt(v8}0nn8599%x!G&B_t$}sfJu}!^nuveP!q|Bs~@} zF&Y3*{j~uc92|gJ4CZ}ezdrb$BRuzeK~7H!LOB1mYgYvGPp5V}wP?o~($OY~X4`*y zWU?OgywY#*uGIChvdSG;?l>9Gf#?e3_Cmyg!Rl*TzI9Je&tO-807WJz#@@uf?RdQy zq-+vON}XH>NIkGVxuh8Lw{*|p;o;pc8PZKG2{@mSkl=a}dr2u)FQI+%`H6+Tx7(Y( zfRjIrS317uL)d7CcmsK$jL33N%!d<7N zMvtjhtl}Md3HJr#a32nH3a<%wJ5+;GzXp=sGMY5lRQiU77Ds!V$Us84Ds!5-l_nElvebM1(-WFtfRh*i zGFzuVM#J8EEakIf25S(-bxLV!>_LXDd~>MY>ra`>{GF|(egXueJbLs9xx~syB>`mq zeqVw|@Ms%5J7G;tGPEN0PYkQ=%|Av_I=Z<5u;Gb&_39~90OlVeNFVkWP{QVLJlL{c z8LQJPz3}|tVaM|;xI{$qyK7^cCx`28pFiW+RMI**ISn-h$RIEZV%M@!wZ-SB8(Ujv zfKOW+)7IAZ(L^Kt=UB$Ux~GuFEWT&TBk$(DweT2lz*VCflC`xpPA;zAEg#svn;)r7 zC;RnGKXPRF22t@Cn8D3bC+akll9Cq28ziJpk6ea^hg-V4i4>mFwY+y*=8~e&8Hi)S&H-aF4b;jVv=Ot1xO) zUs8x(k4sBS`+l9{+z&tg_D1f7z7eQKP$qpf`~u^x zo>x*%(;x7VolVYh2W@Z9TVG$l(RM-R;aEnFUa1L8&PR_4zwjqd2t`FjCx;(oe1yzv zZfS{`s2mEDGXoV~D`ZUo;t^KesZ(%fpBQr$^EE-iwsMQl8*qYocdAg0(+oC(uVL#z z_j!wcY3@Yu(?*Zeh=2BI#=RF;YSx6p>z~@md*&Jfmr*OFUV|^xh`r}*pJap z_#g5hh+fF+XI~!$63QU`UvrsL?Oq?7>wZak@nT+Ox2gX=7Q|Rs*MjPR3QLUZSgu}G zT^}h>fm+kV>sO+{DijAbUVqqqeSORBcT)Ktu2#2qbOgo35WVt0us+)Qp|&too;4s| zY*2aA(^FLXXhV*WLGmJgC5;+U*2DN7hYZULp zeK)@=Ff#MBIq`hZj6NT(&h-PSi|3)j^<*Zogg!dKLSv55^t#9yTNh;PpYmcHed{@xc zwMHiMjd=QZ-S+2AlWnu!2e;XwwH>xshRI0;RE3ys)q!uuJCs0+R1&m7Q%li(P(xK1}+pP(Mj2DRXmk z4HZhA()UMyVxyu8He}nWsHk8=7k!NS-_(f6GlUfBeN0|ms93B!18WO_2>*~$A{1mq zF`Q8lszaVV`(fd*(ix{M@3HnXzv{t02`Q<@YE*1&z{;xr#e{-9SC;#}zLHmYcrKTO zL3O|dz~($JFK^<(O-wmW%0%Mwy9NdVY@{$=FFI5c<9(-h3Umr5*Vc#u2g%6D&`G!# z{cJ34*rCOg<$MQ3!mUEnhYvZCqBch}66!_^2*+v_rtqLPo@_))2wni>%>Upk9X#c0 zhAlN&c~q}UF!Ayd&wP%>Q5X2&cDQ5rXsArqw!v+A;0=_Jknnilb$@xrkp*Z+ z{~t>{6W>kwqs`BZqK=cOr;IWti^pVeWhL5#FkI)EZb9Pl=iLovEB@oiA zKZF0Y^dy{?R)YUPyU{Q@8h*C?MLi|!eop(A^9$O}Hv$QAygo-fq%fX&-q+aruPJ1% zn@{u2SByNzr%`?Ki(e=~?!FP(E7d2n{MU4w`xJ@EX9AMLgZfVoo>y_3-Mbnb8J{`Z z5I%4%;ZG0?De3pDiwR3F?rDTX#?#e?eoU=8ViEcs-KQvL9<%a8kynzeh=TuGLQ9{% z2D_tmDBq02;CK4o=lFMX>`9c-XzgTvcm%!*msWz+wAZ@P?R=W7Z(rlWh1z*IZ`qq) zb3QqCu$TRkpr!xhXAsLQRbIT7en@?g!ogA9opILXISLBdNm`YR6ykTQT-!R zD%@x(+M=iqOx|MYSj@8en?mrBHS^7M`8qLRLDY_C(6|1JrVI(mOSvmzuE-L6`57L+5X^Fom4A$U zxuuEzzDr$HMK~AZQgXargoJgiqGw$6hb>78ihRv}?y84+PaBS)qohWTdJFo26ytSG}wBJ`sTTh2W5lq~Nw<17>WLB;@o_rp>3%3EHHLDmVLcUMt3Y*fP598FeZm znHW}3N3$P07FMhIhUUHPfqEcU1K0ignu2n#(CDyR89fdrN-PO4%5P%p84A{nq%Qi( zH053N=>6@Aofm>rTa%m;TXe)?bi3n2f3|A+XqozziV|rlagKuAxfqE00+1!e@8rj| zjeR9tbAKPUeHPjaCCg4 z_!^^XmOCp$iSFCjFqLLG|5hNOEh|z?GWEr>uMDi_YDPS@3#N(Qaas`-0m;$TZsDAl z1TAVLC7(ag5*I1RzI|U?aQ6=jO9B~;qSOPMq8kK01Qlh~-}e1={o@DF&a3>1(OW-n z{_NpxK3>5{Q&SwQC6aK-v0C{0aqf+|lr6cakbrpv3rp~aICNig3OD7$UuKcI#P2!y zt!620Ftm^4+PpV+RZdxpl}=g?!P6O{RBpCzB(B0=G(Bo6ey4WCDg>F1k$_}ogO zA@qIz&r9a~D9|k!)F|X7+pk=8*X*-r_woG*Ab{&4SAyuOBQmi>$C?;}d zINUI2tqo)}T@Sf&p>9N+j-|jtlp{Y%lq0fzBXM_Qs?$TjQRoV}``o7?vHVML9nEBA zbbp8oXgyplioC(mF;|Qpsgj}$Z~K^-Uo$aq!_XrW+*sqAFIz0;ttUFCasEToHX0qP zPV=SQJDcySHhx;|HY#Jamdi~E&2|ovj_7}`bZrEbR-8u_N%)_y<7HDeA|>q?hC`Xo zn3Yb=dko#@P=ZyjK5zTnCDTKrD-n>Ehw&rWAhT0TxSf~tQ%FV+%iGtAO9>HqNiadA zIyab;&3|nWKfL$mLuK9F7Y#i4N1FT^ZOw6L^w!1OSGD)NL-=L|&NV5FxAX*jRx&0d z?Z-gwr@Sz0AAGl^7=z6x7Ze_94w+w>Yd%pwH8kp3)RNMch9l|Fk*>QDMElWGvb z)UxzyapS0j5ZVx+_I{fYwq`xMTmo?kLynA8gR;Kw&6Qj3_ShPjxsM{zt-*BpDoC7x zSJ0Mcyzr1HBUe|k^U8-0G4pYz+E-yTwPV;WC>81!2i4U{Xme459OMYJJpSSQ{JaGe zAW*@E0-y!pzInVmVLQGKG%tb+B~Oq3q)Pvp@8SE?7#u7YL(fXuucjPIS9t3nGd`d6 zD@V4)=2vdbWHh>8k@?rx>PN#tSG01?`U`XxMk={a{$!joiHcrK<}v3r2fP=S=( zx=3{oJd4oHn>RbQCP12c3fO39NFNkH6zb>CpA`TAl1J*j1XXX}?hIiIz1dQzfY#>m z_7-l*eqdnstL0+X2o)Bef{LfOzM?S|sTA{y5)|EI|SX012hz|~z zTi^hgDu)72R6-)_erFOQB{{pebdL2vVaJb9@+m3Sm7hO+K*)Ni*^v_Po`nT>k%){8 zBS=pM1f&}IUm6Tw8xly&QYA^^iU^jf!=hclQ=&?M5?bBJh|tH!r`R2SW4nBL(TD@M zS>Odee~hLs2O$xmrvQ{A0)X3&4A5%4CiQu>nmPpl;D*Y~&jTeIUTHhXa^(u%J7(Z74wnhNwdsn{10OMGOJa~adgets28cVi8_3v9P8Mcn*+PdqcZh|7)tci<;02!P;b;}ly?Y%^ zE+v}zAvrlY12@rIdBojC*Qx_-QOys0N~X8E50PM@bNQ~KfUs~_Mh0VRYiq?wTWc!` z)YdNPP)tulRR+~^Z-sH4JCQ&N;q8~g{Y!^Xm9oHm0g2@OhzIn`8s}g5boBIvj%;Z3 z$Sn`2kI}2?>e;+rBx?;_zQ|i}#ktnl0ovY!;(r6S)=Wntx992cE-5v2_~y*#^PDu( z-ay<49V{&k){kK0{zHiwd2FR&%$ z_denyARQEi&rq9{gQ`*U{yhc?syKMCw~G$IdqYKNe6B70LipRa9H6XYT3A>B)vWo>6lU+fO;PibhCjbx(oE@)_L1q3 z5DW*46dp^Hzs6-)?`8k|bJN>fe4c|;qt@M|CfFk&5``-A>u2n#8jzY#h_DHl&X`CD zsEdinlU`n*2^p+=e-4#5b8HOe@Ql%*5 zyd>A{vbG8)HB|FY8Q!VsRo6&vmdq$5bM$B8Nu_3a6tZ)2YR;rXXnoa&Gey>~LkOiw zr6N0dnXjTx&Cm;`HGksVV=BDLRF8Yi1<_~sOG(Tp)s5W>F25OeUN2#D_4_gsGsTgs zDm9M|;}e)ZJde_^;-lp#d8-H+P6~MxMHV@U_mh-Ox$4o)J}+6~p18Pd^lG;+;*_U=}D zO?o5ePy_T$kH<73k&S>tl1e&B-=}jkKG$(jz8j-nMor0=o#fGgztGwWsuye`dDVuQ z>Rw+Y_m_@N3u1xEwE2d-+7fQ4QCMP|#OugVvcWYg^PMWr2o{IyN^F!!@)OUF48=E% z-3T>epWJPi*xPrI#!Xh_LPpF@mBZsw7rs3fN`G%cCE|(Nvnr-c?6WB#rai2X2x_2L z*S~si`jbmdhA|VoI1w@Ol=N)47)iuCkL%KD?BD*n*O8=1@dsYK^6bQ?|ESZLZ*!XF zJ+Tz^US(($r(i`+#6LT6preH>qp^<)wE=-WM1e!ah*YDk1$n>EM8EB#;?rNsQarw5 z$P&N7qyQ_0784og^BbFi0~^*hp%rO2ReyRC@5_ILnbTfJk~a-1=jKy~TYLXC6aMQM zF7+iX-)j|8nCJEuXL*hooDAWbOG0PY?CpGq$1e2)1I2G^NRjzW01|uN3F4a(;D6>* zeWRMny`AT(7m2-<$s0fe{VK?CVc+5<-}qHV?D&r+v+(6hjA15|!H$tO$jrr``{$bA zClOXZb2jQS2`;?lrxS7S*)qLBSc+)B;r7rfruNwIJW6`hh4QV#cybL(=0PZOC@^Mi zI*NKk?2V%FZ>FC(>;Vt)$0CDIRCutL(c^+D~xyEMHag z|NG`Hpgvm<=VICC^aX}+P-Wsr;;pTjYF#!SL!q<+Vm=i2J+dZX z5pYBzCAb|Huf!$FSL8c3KCz zE+&%;@~_9L!24O2{U5k2quOOc8Uak43k)A1X_J5h0iymJAd?OJfM<(;@glDh6myOd zKGOI%0GSaK8Hd-LB)GY^?OG2rMZ+fhe8Xf1L?uKu*DW9g zBI5jqPoG*peY!O-JXmTP3vh{vho^XCr%>hAE$pJAB2W>CRa82ai{;STFB`>SuzcnpW*3CXG=k) zK;j{T^TR(T#Go7Es!q}@c{P^oH*X%0i>fq1D+yvU15eLFUp_fpJp5N@vIST+QVW%g z-@CSeHt?;)SOW1^HJu!A8&o}t1Sd^F$01fAXw=M7QpWE-e0F(~5wRrAt;Gt#5yv8w z5T}G~W;AOshc~j_Opz*KWo3maxwF%#LFvvNUYEqT&$F^v)6&z;?{7Cbx$*JwMPx>V zgkYvGw_6Iht_*3oT+Zh@#6sn0;;E>p0Cm_-;wfQhK1E=MLUr!Q&wSXXfXpF4m=MYa zd2w%lpBd=+LMhOL0e^`PE&@IVMJ>3f=<4lJ0A@_jzI>~&)=FQ7yR^3}ii(TtdNlzL zVE<5onsCP!zY@2;`U-~9qF1bGWZvUv|G%ILsAQz#suKrf0NIe}-Jdgz*0b=22+wvy<6%T1}k7eiP-kWL-1>;Icb+w55`Z$rEo*wGP zy?dDh4bffQ-K5|#QF_LXAP4|07X>VFLCh~&dTz!ZU{DbZ0P2P#5bPiw98_*21ZhRJtQdI)aI?_?=>$JU^hz&zszb*uVn)ANvOe{lkozECgq z@rXE-g&M2ZA`T&V#a|B^5uIFRP&61!R3;%?$#AMjmZ@P7CR(B5owbXP&P755dcD7kN4R4GvV7V@*OFm zBWTO+tO>suuBJqpO$G2V0}U|}6tRRP=}c^UBN2}+yO}^B^T@-nX zl8t?CX~$~8w$%zF)nT&8dl2*@ATURu2oeD;{0{9<49gl^n<|vM29b+R49OiBs0!z@ z4{!M@u~5EV{Mb|9zd-(G<3SGfUYs4JV3iOF=ETsdLbFWCK828^6oy=|!dLROkZ-C1 zGtXNRcn~>%jdJ8z(AhCgN@rieR_Ykl7)NPl`e+Z6c!)7dNE*3n-_3}eh-h`G9KR<2AyI{GY)Em0%GRX_&u26?;Ulxw^kR%YXj$=SS-c?JlM&`1|5oj6koQx?b+aIpUks=$!`g`>0p zN1>Q&Lr^oizqO+ecjD>q{|ZO9&}UOL@;^keg7-FfO4ioa{pB?9DmY>8t}SN&21|7^ zJroT<8>o6ToarS6bw1)}93G~b@5v|~n2v~O-#`#6pLbj5cd{#Yu{?K^T{vfh9lxL! z=@Ss1p!wrREEHl(gSrEZZa7ds?o9??C}_9=ut<{q(_cWF^Pd21{P-5J637gxaTOrh zBZ(W5AXr+k!BAqniUqjH-1(hg+bNMX4octS2-{uZGOW>fe>iJkU?3uvuQqE5GV=Gg zYK*8r&L~daITxRZh=qWEj~)pCm^yw)un~dj4zaJi>yeHIkpmM4=RAsql@*)>8h`J0 z$ka}rK4uqZto{T~CPV&?Y&`51ov3l<#c009=DPL9Xpq%dApw934W%45HSRS}-$0;W zX+sH~Q}DP>&d-N~YNIm*nhvAOT-W2mLhk3vms3R?o`N1^^5+DYk?+NZBH(do0|5*% z73OQbt*E+o=9GS2399viu>|avS2uav^jO9J9g@3c+?=2UR|G$tDX|J=;)qwCi5ZQF7ZJc2-|Dp4bdP4wY0UJ|MKMvC)dHmX<|rtxCK~P zals(0oYUHyqe2R@J9s2ohKKL|CgxP%RG;uH8HCYKEMO%I?NZyOw> zF4QX>sPOXgGM{XIjEjeNz3~O0fqaPmWVHRadEM%qN3=xJB06qN(GC}FKgUDukP9TGL6(-2aIK7wzYpW;8U{- z?#a-|NIcYJY12t7VqD5mNp1zt15Aj2PPLZ0dLVctSvWZr3#GuG!f_|8#dSmw;0TL&Syu-7BF6qWxJ(EUpE_^a{Xb6pqi~IbQ!(0Eqsg-Q$%K$Q`_cmw1 z$;pB^QV})`SU1FnR(qHt=E4rZMrfk-ymE<|-B8&%(0bcobs?@}(B&YrC)K6@iycbp z9c3aR)%b-M6xd_B`EPZXwgPNl$n`?z_fAiaQ#W80bJb^RF@XAR5}m1^Tm0OhIE)E* zsK$v^+-)hu|MMetSNS{Wv0G z%i@hd>ykzzaK?^XBT~L@oVOrNxb{l0Ngb}dyC`B$2VSyp+rbj+Id%YscwkFO9Dkag zPCx7Hl~Q;M7V!NN-b80t*N}Jb1OaYew=*#@A(1BqCzXL;GLK;x;%PYCT@MGVY-lBr zc5lER8WDNrl&|1F*d(;hkN=^TybE&#E>-Mv=g!r5tTWt{lLKEA0Sb|!!!9tIEUsR> zbg8YU=K@#+3|8Sc*gJi-cfbqmH2;;}^8WFq)VSSyIBM5#4N5Aa?>2%n z77XELU`x@jbLZhTevb(ltt(ZCpxR;L6F6p3C@2axe*eCi%xjF8bU^4v^lh;I;|!D- z6TG&Jumg~~_bb`-8(8VP8&G!j>zKA_-(P4gb)DDeS z<1H<_D0lLQnwKp*D_w>4=)j$tld}FYK|h_JbklaD8W{@6C3Dc@5e5xYHZVDv8oc9$ zLWH_Bh-n^ZU@9=4eE5I{LeTMd6jknp~_GNj+Ym^QUpEMYUjH#Du=$TrutS>*gjwaQInjhQU(f>#pt@FBPzIvQFA86T)jg4m5UvJc(8%2Og zxE1^&2<8TfU$?>ed@E8NHzcjce(I1rZT_J6RAziNDrtubaMQMWOc)!q3_#X?jTEA* zz@;PZGYBMGr9&tLKIoL*tBe@*7Y((zcEz*fD`7zWC?ypaYkp^@ckZqA8ar$2d?B?w zZNw~c^~C`&TsEL?m=Z6e6u2qxY5lWlzJLY5lP$+Xkrr$gFbmNTvY4c$8DL9*Ax;qX z)7wEF6v*og_bsfRINUmDRpd4E&?aj<{gc~wS>t@}ZuKa*g@AS3SSl>d%*(??nORzf z;E}UWLar>_!GL?3Z`xVK#J+Nf@si+Ww%nzxu(iPosP2&JOUo12*_T!l6BDcPKNd%8 zKw`mx92Ez%4a-Z%BgTzv9$vDaPJ`l_mkXPFTGM$SmCNpTmcl+!DI@Rr`qn&KqJWT) zJh*YdqJLgXOA9g31I4a0MhDdZgdbd*J}QRWT}~pbjO%t$H#Eu(mj20mobL=siKRfv zN*Sw>{uhp*h1f~d?T^39Y+=}m2oZ$Ue;3Xww{-j>*P-_xAdm>}?2z?;1%V^{Yo8T* z3%9CA5ohew;rH*~)r<(?_Pp08^M_@ z!4~&zLL3zKTlAUR9&>-$+gWhSSIP7~t6~Tfs^3vk)qlfAl|3@Oe#b~HzJ{g10l}E< zW<<*iK`s8b-%vdUa+>Lil-g249z#Bip0eA*w{t;E&#hrRfHz=ZU_e^EzCqr%xe^Dj zED0pQ_vmo|!t{F21UCPrCGscyp+!lFH3UQ-?5swE!h^(Zu=F9_3J|iKjt|_RVkZT# z2=<=~Cm=(l<|PA?kB%&-LvV1Y=Hq|Wn!p7tMJi7?)%Cq}XpDpvXb$nuU=;~*fJe&Q zR}H#eid+CSqZJIm1(ieZLp*-6vlJNa7%R7q%A{kVif9+3{q5+1(pzFdLLIRZ*prYP z0H)oFk<E@P=KmPQ}OwU@f|$idaE)s5Z+1VdLJe*XwnO9@#NAhLp_6H;?}{(jZxOpD3em{GsU=rQ4W=7y(tnk}8&MWD zsm~)Fh>wHwWnXC~%VR6APYI)tJPy7^TeO10c@((au37i8J-K)d0~Po3W#N96q2I?T zVV2DIwu7N))uUf@6awdvU}TrJ=PSackp1(oQ+w!)_#+w@^_GfD{WYNeIis&d!dAmLO*W zc%Fj3Di4p{)I2a%DLTaPJb!VzZlR*f5GCuXd3^jysGqQb!@dc6htxSKkmf0nY~05E zUc!3f^?Z{~e&TaNOl+j|=}GZCsW>wUK5?DKWxzE6?wvsCgC?N@_9bQr6~aMGlh2Yz zru$-8MKj`_u0NeX^k+b^gdyWjz!(lFDos600%nze$gqI_Ol^X2vd1B&G9A^fzCfRl zboFi!ji42iq$C}f2HV@)zZoEovsBm$-?Q#pfG6de#j+Pg;r>T^Z^;ISb4X_COmf_pH{`Ooj@jqiU~YZdx`-GJ3WA zus!p^M$xk3Z}Or+3cqtlAuBdn3iR}B-smO%an*mKJTJquZUEt=v3&9=<1{<-SsjsZ z(=e6*O;gTToc3X2HUTwn^XTX$(7(V3npai|py(I(Wf%&17>akY(w_DoHPYTYG0L3! z5tL%rC>c}rYb2)Dj6t_vx5&sywL>iQe7yEM+_|4rAlc^*y0D{%N4RoM5J+xF!<_jk zKwrqRIAB!;Z`(sKx;H|1OR@BuH*X$}y%&P=2ONnppi{|g0;ikj4>qEY;eIOi7Jr6c z6-G2=usX?W>*;W@jnz1v2T^k1NDO3Aki|IP0RUdA`~#`X42;(=1Rp>oWcdcw6Vz>b zd!Emrxo#4C#`@Lv^qE;%<<>vgi+zNTw+BrTuOZSs2BNnGbcG_k)d~2u5!RQqpApCg zWR#roqprD(VmZ#?EwU`8W&gjcPO@BHBG?9K-kp`gw7+I`P0g4lN1i)SWLcf)+VwuwFa=sx69=yP)3mwdD&w85z?* zsko*NHWW%gtv+pJL~CUi-Fh%6@jE3u`^_$WH8nH?sY&l+0(BVi%hJ+~ znx7RwgGx4HH(Q)^=htm_oE?!0c~!>)i7y;>DubkFAeaz-MS47fw*>$2+|q5pTq%W; zIS@RiR#phPxw&t@1lWq!K;OKHfog7PdDCjAd%p?wTARxpFjplBY5dfmhxeZD9#&wY zt{$yl1`6sKq(E*M!P5HQ_&e(prynGs@#g=aKOaoA;guTKVW+5uK`D%Z!oa}jc;teV zTF9*+U^g`kchj!|v&UMB>r!PpTRnz*gL00|3z{h*afy2UW*e!pBLH!@cz42Qy+PT* z_Wywn)enS;^WA8{@tlbfxlUfqq4w$t7&|S#BwY4{mIEl> z1E3y14-@Tq=G`QU1_GkDW%b)>p@pd9FJ1s`!PKJ?D#lbws_TZRn!38=rfag_4owh8 z3PK)$w1Vs4;NS}vL;4g@lh7R223C7gXx~{3*k00s7CAgHwQ@iY&o$tdpgT&RJOPDN zw0jF#u?Um~xIY8Uo-MCsgSi?eIwldox|ETKV^Jo#LQ9)M%e->T@bJ-_=kM7HtRSEM z8Mf0o(>6~js0ufy2Lq-d$sAO=H5bJD1ODq*1^ZnC0z66k^=>)SNC$kx!A}Vs(@+|s zPXutp_C~o|DHYmgaucc)Cv(j``f24`0HDbo+gH$wEUdl120ll z*)>669uscl+|)#)G4TlqgaUXkI5f)x(cbdmfn?dPCKIA6X<&d*lCD2b}MA0_Ij(~*h>)j4vtxRUGM>V*cc`< zIQl^I3DWx(5gjcjEBkoB2LN*6-^UZqQ6pGsN(&kC8P>1)BXHuNC;_m|AbU448%8|~ z_BY}|Kz3|v>rDhr154lQZG;Qk^NkkGm(6`MMqCUMZ{Bm^SH}^ zj{5I!gV1gVMB(m40Hf_dQ9e>TL}Uv~wUgUdQkt zP{)JU1sA2Lq5=(tceZt5V{7BQxY$`fd!G^2oU64d%J7T@^28FZxanSmYs zg`}4N(u;!hsz8-pa(1(&ps#Af)BPwtRCD+n>bOKxHaPRgR-;-kG&{v7C*we`7XUA$ zksfRi;ZUuBdzrx2b~&}6^3D`!6A`qpR*qOCtEV}^Ac|b*s_%EueihY(b^=E;Gg%(} ztRY1@%joE6cWzfAaF{k%=2WSrefZG z_jCo!-V;uT=^o=PVDksg2nGBi;znv%pPA9J`-rS@`6Mn@#4!S8WDMh-pUr=yi8ov- zk$7*F`0Cl#r%MiI(B}kSP|z5T!Nq_ojR}{N*ZyKMy%+~fQ0}QagJBQ*Wd0|sp<@$~ zyeG4;g<>^W$*X@ki-D&E;l9je^z;Th^lX}uH|CUjOVZ=-gb8j%&$`cAFDO?oyD4p5 zgng;!T~mBrMd&#Rvb05Yb8qd<=2UsQw9ynv(MFp&C{%b@vq9lhu2lwXfepzsb z$Gx(%i-q_i%Y(sqHoB=>so!zxsz(!!i$O#!Eo_>leuuK&TQXaS(Z#4bi) z%6NeDBS;dKZ^2^|X6u$9ERdrL@)1Brm{6H=jjCZG2Jcdjxz3)OJue&C` zs^NL|B@SpZ_S>m_uD7wFha)m#K?6~DO7hU2Vg}Q(k;djl-wxeFwA*ZEpN^)*!kcK9 zWARlzuFNMbVY4!G+jGnBe8Zu=7?(Zo;t0b>ODExSd9DcxrH$>Wi(u%Dt6}jbOC(mZ zsZ5mvG6fyGc~gQza&&|t^dk#MS{PAL{V>2fA6hh_y0Za8gfz_pkxC+lM)i;KIsGE2 z$tjWAja26y*g`@2u^h6K*C#wt`uZHw4?ZF*WYTW@Qz{5Xw_CF1w2Y4$0`CdU@6giz zJ;d}^G=ZWhqUQS1#yn!jk~+I!KZyc2d6!q??3-#>TAyN`KLQ=b50Q#U`(MAIH{PfS zrw6P2p5roBp4f^NKQh-5HtLIIFZ0Xk-rA3UR^xC0L!}Pbf?=f+xd50en$$%FUiK+v zIq`oAi~0WFiv`7}8-E`j!xqE(h=@cPmgcCx*LtkVZuX^98e#vYy}A6wC^?XYg9t(B z$`9RN2^l5z1Xig2IphL!2(bM+=-+XG-?<>=$mM9Z0Q&|d7k9M-v^ybQ0iyqXrOJv|DyZiRul4y~;- z^U7VJE>{=Y6D&vW|8HCaGjIo_+vNIn3dlym;2ZPaU5f!4Y-uF`s!}8P<_A3f z51aL$wSvNrh615-Fkc<1R8&^JX=cU*z0IA)A^;Ylq)r94M81cWorUFq5sKCxQ)?WH zT0)#s_Ln@p$64C!I| z{<~vQe{mJofAjb6(9sNtEeam@39Zml`DlAj&u3`XceM|PQkR|P%l$oHM+r_#j2#o@ zRtQ&+^8>DA|J;Y~d0g|}dboFZ$O^6CkauOF$@x<(;{_BnXHwZuG{z8J5(Xi(NdKIO zhzKMRY+zaUAY&ta-rv4`6Bspt#ez*thihm!DzJV#Lz@0PHsKAh{KloFJppR=KbaFj zr+!oU?DF{Hug3t+5LXVaX9rc)9B_Fy8{p0PAK721^s!KE?CdsUg88psqkgRDf(>Q{FY~rrmNokcrZ;3!RbEwa<;`}fs95G)n>+bf6o%4 z3~Y@t!g~l|Mrv(4+K2Dp7J!f9e{d+D{fk1;NaL@xsvWKh;y2KS@d=5sp%Q#5#&?3x zG*|6#NQ?U7LpTEGhTPYLiH#e$-IR!^&H^F*%xD*_MTR>Ls%zYxl*TKROs|jsZ@`t+ z%GeHhS$riy;P=KC#A*G5>0bvx61__QlQ;LCR%1lc@!&~AeqX|vke4vCJzgWx#*P73zv?X$?~_9DL=Lj`*uh4PoC2`3-aVk^m; zI{Q>p!3+k-&zUZQsLO5kVpwnn(b*Z;X1b2tqIdHVO7IM1q+y?Z@Tnc8pZA31xAT{> z(F1akze0V%=~?I|M8_t@Y(|-CMX>N&EMv`5QmGhgHk^%(cqxZ~!exboi@zaPOm&w-0U)ig`QP0ngmpZ-`#!2-F>-(kjq zwh(@Y8(>dP#<@r0BBC`NMaX!zHl#K+aUMwiD8Pf}EqFRCibDB(6@iCmciILRwl{m( zL7$Tn{F80(XXxC0$o?LQ$V$YMURuXO?KK?_lKAie14Vc?a2isAA11I6cSe(942>b* zjc)U5uD`3}0jBJY_SaR2Ww)ysXK%CgrDn2a+}T7dy~!QOeKA}LNMdpP;ZNo6q#BmC zGtYyzTFgoxq4BGKWTCfBn($ZI%f`QFTuFi4mOR=5a>ldr-ObK?QFw07zfY$xXsFe- zu#AVo9rnY*yWq@L((d>-@w-@H{G433iCGD{hj{NbX3Gr!1qoaPQUY^={a$OgJ6>hS zYI+q3HjiMWX#?GdlI$O%`w?V-fudTk56a$>jN~pj{GZH&&WB;);c_G)0-exvD&z+r zb}D_RX7Xx>FIXD9M=S*wK1m-_fC7Be%AeHAVZLm12D%Es(Qww(0BsV0W^TZzJ`jbx zrY7pa1bhn%z9XUW4!*c`bJ-8Ef$f4;10v`vPbkjOVXENSgj(rh=Y3IEA(~NEdsy=6 z?AD3~@ChnJXaPN{1#Il_S&6{tXaX<{8c%uOPQCs(I5^lUzVnyCIziCuZKUy{as}w> z_HUJC)y=cgm#L|#j&=nS1VvYL8K7BJR`%T2S5nchUo#c&z;9`yPMNK4@O>{7Jl@vY z*d=Iw=l$akpC$rD06rO}g>>&Di0vQf#LYaIEM8M<8675DtPR~;*o~%h=MJ}Iz5-Db z5^Py@^`K34b+v+~X2MrDAU?{ix@n<>^ByE`Xlh0yUlRfI*6j@V*q1q&@xT<00}&Z% zm`D22+1Yau1K$5ux6xtkio9*}!K2U}WOn+iZpe!nOxa6pp?z4m@TLY79|i1kfLESc zT+H{Pht5>cypc)^zV3vBf-j+Afuj3;3$f0D`nCxlIxzp$k*G7mhk~|YGeGc(GQfIf zig$s>gxtr=?BnB$U87ETkGt*6ZA+fNFA|rB&S3Z~M_lKx`z7OS9308D9a{0AiasnM zTVw$P9(>y1c1FQ}Zq%*+j0t5|m4~%?FzXzZDsp$F&A+pqCg*=YWoo?Eo#4q+Bx zl>XXbCMS!4faZ_41&7eL3vPqHv&td`OsV%vD}sS{0+M{Cq20VXTuJ<=u-lUM69y^T zlamu%T-;{(00YVYY7nUPcKO?odk%Gm1K~q!V6Wf=4A&!b`sCX;b(jyEs#*|lkpTkW zc>@SWq+K2ieZGIXMH0t>xYUpX#V2%A>Q$>qc7#*rDyHRKCAFyd|f{gf!s;qu(pGr?-A3RxXg<1lbTj8s?$gQFAq#6qex*n&Kl zi>sj~!URgp733TE*oZuQ(}$}8SzlqZ4dXmz>%GQD&(cC6Am*ES`mTQk2=!JD$zw=`?Frvop_(lXG|#oX>&{CJJdUb1l07PV8+}jHgC*P%n#GAj*I8nN`=gRX{z zWIq(D>4-j@{4`t9DE|6cxXXJII9GMCKZgc2*=9!_myXZ|mH$_3=N=DrzW?!2N{LYI zidB}S6_w4UM5J@_FB0ujeUM@ZxJilGp4!Q5o%2B+#M0m+vH+v!_RBEEy{D6 zeUBR#t~(m!c6+0LEgqXA8r5QzZ0#)A2{Uz+*AWo7A+73t$DSeE6GRrpif5^6#oe_V zC^VbMnFrZ=efT~CEghyc+!8;d7aJC+L)?8RrYk@7>eYR*5}|tW%pk3= z=hS8u>f}?oYl+Knml>z*}>gs`Mk!iAllfEp62$=OK10y+SB<^_^*?!Ih2sf zAyv=pqdN}r5(P(tN!4WT(G=cLV(j2lU-Vl$E%NI*Y)9jZJO2|A092jmfy zelHgF=2Pov}F zOrTIc>c(S+zo{4=_9D$c1jsrF#=?o71uCwR6UXDv+yN2e_;;3;mfAjK(qPSGoKx-^ zw0WPRAI4E8BGXc46h7)7EQgTMRAZLX-P?N>*@bPXL*lBP-QC?+_rLPF`=$alNH1D& zTM6Pb&qnQ*efhVLw8Lu9(jKubv#H(WzsvTwVO@Q#at?8q17WZJq_^&8dy8lsOk+nO7cgg5boeD^XLbtXXp&OufCqzx*Nw4j7OhaZc7GlTPYD)#U#$=JR~UH36g{W=G)hQtw~_=8V<=;kQ9J6rGi&Bs_n< zB5;eHgU~IZ{@YF}TE#D?Ad%k||As=l8+Nb;dgu#k%{vwRF!6V-_|~}imjkN-`wI*T zqB@8O3lj|rgiPBZ|9SMm5wJ(37Uyxwa;#N%277rGz2QQdWo&3jAqq)M!=zw5Y`r@K z-P)RcMTUjWKi~tOuNwP!fBMnSrT!ZiEt(Eq2;B?3L@s9j%Joq30Vnm?<{B`M8n(%* zm_ta{)YK%oIo@%XAt=3p7gh2G1_s}FvVlgzN44HfvqDV^+gjPt3-KH89cu!UrS%)w zVKXE=MN%kJQl~_ml84Ogtz`aG^v~$BK<6K)i`7m@Ok}u|L7t3 z7Sa+cJ4W(U;za@7kX=(FOw7GV5}Uxg-I+mvO(^WoiAc1yKLv#ACrDZ;hM22CIY*|3 zTArQDiOmBL5^a*HGuz>qemQCz^3y~|RneIHob+*TBX#a7t7M3XjkcGn2d+rEcu@!m z37n?BZ)4G`S-5s;$R%WkFOJ@L(a@JaimYW?ABIBad3mZ>3Acrj{AT__+XH))uJa*K zYULE}UNjrkd{AkJY{+@l@%A^1+8%D0l~RE~-$L)FlbHWJ`$*_885JPryXns^d&eG& zd6JM%SQE;@*(>~KV}dRlzA2;+-!GuiF8%0jX8fLW373$R{y^Wi{yI7=k#ws2I=2<~ z0QA6YGOk5v;a6TVF}45aFlCak{I3}1t+?|d%$FHckTOQpFbHADg%RqR92#eP9_#}R zO-&&RY&uJ9$+9uo04foKVH18@xLCt7s$uud)>2>oVuAo%u#M%ZH;W&l$HzxzXS=XG z%hz>}$W%^-#*BTLpiPFwY&Kxq?%}9JDPpTVuU-kK-0IzLdhe38@vOHtpLZH4{`92bFU63+Si3DzR38RkTeW1el*Z$%Z3 z=MK02(;LgYVy=`z9b=R=w~%B96R;B1jCw*Cb@$islUdHML9MNy6E zxO{mdM2*m7i=djhksc%pDwoF-=}F-a`gK@@fTlirH)xDHxa?LWRvaQP0_n-MIcp*M zV01vDI0eVd_(iH5S2MmB8n>Vn^E$7tLw_y!<3O(0itV~#v}y9u21i zM(5lHxs1#-HnfY$xvgZX?3bkzCiXLH4S2ceg|eYSw$Gi0#=#J9RHA6WH#!7GLEuou zxTLJC9hGZ{Y6uw{MDKDmo?6hj_05U=L}gXzE*Bg zhC;gO)%P6RhJ;2ZD0**GW22%}OA3$hcPZ6VBvCW~$qm^H9lo$;YA`TlwWzsT4!N}6 z(BK443_D5DCLqPI%gxnQ{_)3XN(~IhNgbRfu9Hg~FM*q>62)Y(SQ8*s_JwepxQ@9| zalGAJN3av00Y$bRwyFW%wMfjmU-^8N?LF1PpTHMt1Z~zGox$wSJN%{tm2=~WcN=WD ztk#d6zT6&YP7o)Pa`IAnz<9!K#S9qg(g~R;YI$vK9u%ZYxnEazz0)8fbg`OmOg6R$ zle{zV2oD<@O$-fbjd*2RRq)Ex&MP`nD8)7dp*s4kG9qy0*rrOWh?9$*4-}TiU_GPwBTfKR%>qQ*^YV?= zY&NeVI3V|tTCz$Mfm9NH8-%gR9oz&Qq?FO zl#_6TB1~*})4_=!OgqFGXxgJh*DjnpXO5)2d^}hOt@Tx%*EA~9xMlo!wuj#pP-$UByxk6r1h8}`uMbW#;Xt5YxNxy7#s_Vbg?j!2_t63- zm~_$4vi+M{_{%YR<2PYE4PhbF!=`3t3!tir1c%q^81^}AQtsM1zfV(XuvJ1SK0G>l zZd)b|Ls+Z?#Qv_bOceft_Lue!V12L0XJ)YTK(NIpI!hHCO1-wivwBA*vAzcL^k}B*1b^4E zu`MQ84qu=8(BU}Nv)N$4n{&*S?}aYI;lRLBsOHtuK@~tYR^i<&merQ|PA|Vi z-QV|AOWq#I$q!LXl{gtq6S5Gr>B5q87~?D>d@&w4|CsXm4N4b?oR0T~V&yRIVq*q$ zWd{yKL+gLL^!U?tZc$-k_MCZ!Q$3OLTBpUnkmXRkPp)#L?mH%6&#vz7Yc_ax?gPrR zWYuOmvnOMVWy=iyVv51%Nm|M`zGSZq2#N4tj1R@Ks~d%siJ6#~r~{(*2yUg+@plcM zmKu$`NZBO_KM(#jlKpv5`qzkMsVyZe1o(4#iwginDAt%kJrK&A18Vg$Oq}NSvQjcC z2HltG{jvCOZ=`%^6LpU&V~Ndnqlh@j)N{HZu>+;N$quqJp(L^Je1{5I_^coaI|ChM z=;y(W`D53Z%;CqH-22GT;Mx>)b{LH8RzA>urdZn{ZtB4d51dTfcVDn>-TTnI1N>Nm zKP`fHDOKN9F>gJ>YRe&#UgEyeApAd&8wJ37X#FYQ;|T^B>xHve+JyXzs46b9SguKQ zD>S}!pqGra9XO$&sw#?`mj-!7n*q@u5w%`*_#(m^F^QAA0Ds2Qe(xog+PNd9Zvoml zyWY8u?sNB`mZs+4>|h)~#886bTT=P?`L*!eKKTp{?-Cb{ zlwXsQlJm5JX-W>i1Ho zV+Q>IR~eLZb{+tHq6tc&kwcG$W8h@pjbiXCuju-TLQplA5+k>132qWLymTZF@5J*C zP|8+gtpE@aGr=uk1{e;;LjghHJ|MqOfN2aVa8|qiZpFNBaQ3ZF;8u|ati{d)>whVp zg+aS?BHaVHKl0!NNM8t57q}Q%${}Dp$&mN`t9rmle*lPQ1dL#3ELEuY5|C2xd-|o79$P=fw*uLv}?ES&_10KCX`PtaQs9@Wnv;PJ2%xmQU literal 0 HcmV?d00001 diff --git a/results/plots/worst_loss.png b/results/plots/worst_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..b7ae2425ffaaff8826b86a13d55d39f00f3537dc GIT binary patch literal 26093 zcmb@u1z1=8wl(?}>5@i}1_41rkS>uwl1hg(f+!%}jdX~F0)ik)NyjTG4F(dTNF$AO zDxG&O+~4`m+2`Ev-upb)XK#IVvFaCdjycAdD^mTA0ueqfJ_>~*Qc^@~qEHx0DAc)+ zxY+P7bF_BI0Wnv3eOE09D_0M5=liJJ=B|#999$n+-(zyW@9bjjU@yok#w&1@>7lEu zql-8npWT1_0I!4d13u27Y)!~N|~ z=VQC-Z*1Nqqf{r6t#xqs4oyDv-i*nDc+S8^`?aJZ+dK3sOU-&b0KH6oKUC<1;} zv}f}M!k>A(_Y_&+SMOuyNcgKFP8b{VSNe+y$WOVN{XZO3!VOZ-zbq+v6uBc9aN-Mp z%W-RKYb(qN;~3r@C^oe9o6r`rx3~9PtsKnem3#YA#Nj3hW|3)=$&#k4H~gcSy}gNf zMsE}O`=n^6{~4X!PFZ^4?jA+*uu+88i)$H!)$5KH0Ny zT^=BckB@)&v&JcAu-5O;UEjb!*Lp1AWFO~}h&}pnXZ3rjsm!C1iu}~VtPU7TCD+AX z`DHh_C-bu}9ThTM92_{gxw(nsYCm#sf3L9q!PWoJ+IoM;JMUn@$KKY!xG-F~;^Ft{ z@&G;=xF2Vt^2*OzEpqLnqoZ$`@&s26s|Zz+uI6uwOSmqm|8lGISbyrXyLJxsHBGE7 zj*XePS|h#x-2*jGAD@|p?^(EngdzF)SH?YOh)~T*Jcc9`6qpJM3NK!~XzuJJ5Pa|z z?e1PRAnZz1U0YlGCgAjBsLq4W*U!)V-n}5*VuPu}^`^n4x3)FKuKJT10VlFe{>Lyu z#MIQ(sxO6!_V@PM5e z|68M<&(YpYtbt#jjSv-|389*r8a&2pjjUT6o0~1krvCYhF6-k`_1go6j!sUGHS^Ih zX5TAq_2@($UmGm>YJ1IBx#~Y0{^!1aqp#S{u`zRd`=^0{fdX<|2Wim-1LsjRG&C=y z{lt+&j~)>|cKtgfruH9v9Z@{}3Bg+Y`CCxLmjH z#q5#%L->?lLN|9kUBZLkvYkX)*$RG;k&|Du>ErBraqWDD z|3Us7W9*9;7q^xNFKFeeK2AxYDKTk?x~!5k*_W?%KWxQSEZ{^k_x8)M6u}3rD~n!#xo#a$PkovNQu($mK@H{U=;ms_tSq_~*J|78C`{mzJOjn6Kx zRo9DiDE;@YSLS*$Vq>KbaL4LA6wJ+;kteLExZYiLE0!U~W&Z2bGc{@1&Lp1rq@*zT zE3=SL<;apBtOY7mZrvgjsw-WB`uJeGE#{Jl+5P)Di+U+8n{3;&v$L-i zVi^<_6`NOv-%ZWT1gj+TPR+#1+|t&jAf^*l|0U*dP!n(}jWQdr_i}V}yrHX01yk|# zm6Xr)WJ~bE@H-9^lZXhl-@#T3tPBz|vLM(+_XbOh#RJyu$LsN66%$?(CKMJHR#a8} zu+(4pz1%{ zr1{GiJeb+y_>7MqBFV2k8sdMEn##h&go0Q8_(M&m*z5FQd%3;4J2W;{Eyc~x*O!Ef zDy+U)zz)>+`N~NUvBoIy5wNI-XSl{%QZu zywH00OA$5St-+uD{W{wD+SA2`)z|GHY-Gm7#O&>iI@pd7GfH2@Ai2OOU8KW-+Jvat zKVk(D0}X@Jz3kAqe+!doZTOu+;l!vz6AuPSz}{3ier&$$MAPw(VXa>n(Is}SMCEI| zymtzvL`C(l3kgyF=FA#>C)L#JN`b144GVNp`S_)* zQ1IYZ-ko<c$|oW!-&el{(0k zh|gnKRn&J%j~^6x?mYIx%Kg2)hs8d#?W7R1o~K-w_TAg~Y~S<`F}-*QJaF5`kMf1b zm4ha7ckVp3a`uIBcRV@rs{iv8ZZFdkkqEvQ9GrU;XRKwFpwyjz@gl+ypSC%#xCluhlu>e~_%6 zuB<#t4+uZlT`%1pfuwiiORaZBLxJ91uHH9tI7Ry980T?_(be{28-N66VEaM1?lThd z)0fxVF8lYvZ7O!wmzJU+w9!fX){N}P_J=b^E?rkp_$**| z_wL<=f#ORiCx;GKP3kXBO-+gV9bB)n8^wNY**@j;$j+{_;!0wNaU zlUH+YHa1)!U#f)w%xhGmKDT%E>Q$yF7RYd0<7zrrO&jUR*;Q}A@*}0Cje=R5hP1T+ z`);5oyM96f2Pa&>4pvh$?Ccn)nNQ{bqD0-6DDGrP$9#&pw0z`aW?_-FJu);@^K+-* z=%_FglHbBalMLMJb$;!Zhr*!xCL{}HTnt(v8}0nn8599%x!G&B_t$}sfJu}!^nuveP!q|Bs~@} zF&Y3*{j~uc92|gJ4CZ}ezdrb$BRuzeK~7H!LOB1mYgYvGPp5V}wP?o~($OY~X4`*y zWU?OgywY#*uGIChvdSG;?l>9Gf#?e3_Cmyg!Rl*TzI9Je&tO-807WJz#@@uf?RdQy zq-+vON}XH>NIkGVxuh8Lw{*|p;o;pc8PZKG2{@mSkl=a}dr2u)FQI+%`H6+Tx7(Y( zfRjIrS317uL)d7CcmsK$jL33N%!d<7N zMvtjhtl}Md3HJr#a32nH3a<%wJ5+;GzXp=sGMY5lRQiU77Ds!V$Us84Ds!5-l_nElvebM1(-WFtfRh*i zGFzuVM#J8EEakIf25S(-bxLV!>_LXDd~>MY>ra`>{GF|(egXueJbLs9xx~syB>`mq zeqVw|@Ms%5J7G;tGPEN0PYkQ=%|Av_I=Z<5u;Gb&_39~90OlVeNFVkWP{QVLJlL{c z8LQJPz3}|tVaM|;xI{$qyK7^cCx`28pFiW+RMI**ISn-h$RIEZV%M@!wZ-SB8(Ujv zfKOW+)7IAZ(L^Kt=UB$Ux~GuFEWT&TBk$(DweT2lz*VCflC`xpPA;zAEg#svn;)r7 zC;RnGKXPRF22t@Cn8D3bC+akll9Cq28ziJpk6ea^hg-V4i4>mFwY+y*=8~e&8Hi)S&H-aF4b;jVv=Ot1xO) zUs8x(k4sBS`+l9{+z&tg_D1f7z7eQKP$qpf`~u^x zo>x*%(;x7VolVYh2W@Z9TVG$l(RM-R;aEnFUa1L8&PR_4zwjqd2t`FjCx;(oe1yzv zZfS{`s2mEDGXoV~D`ZUo;t^KesZ(%fpBQr$^EE-iwsMQl8*qYocdAg0(+oC(uVL#z z_j!wcY3@Yu(?*Zeh=2BI#=RF;YSx6p>z~@md*&Jfmr*OFUV|^xh`r}*pJap z_#g5hh+fF+XI~!$63QU`UvrsL?Oq?7>wZak@nT+Ox2gX=7Q|Rs*MjPR3QLUZSgu}G zT^}h>fm+kV>sO+{DijAbUVqqqeSORBcT)Ktu2#2qbOgo35WVt0us+)Qp|&too;4s| zY*2aA(^FLXXhV*WLGmJgC5;+U*2DN7hYZULp zeK)@=Ff#MBIq`hZj6NT(&h-PSi|3)j^<*Zogg!dKLSv55^t#9yTNh;PpYmcHed{@xc zwMHiMjd=QZ-S+2AlWnu!2e;XwwH>xshRI0;RE3ys)q!uuJCs0+R1&m7Q%li(P(xK1}+pP(Mj2DRXmk z4HZhA()UMyVxyu8He}nWsHk8=7k!NS-_(f6GlUfBeN0|ms93B!18WO_2>*~$A{1mq zF`Q8lszaVV`(fd*(ix{M@3HnXzv{t02`Q<@YE*1&z{;xr#e{-9SC;#}zLHmYcrKTO zL3O|dz~($JFK^<(O-wmW%0%Mwy9NdVY@{$=FFI5c<9(-h3Umr5*Vc#u2g%6D&`G!# z{cJ34*rCOg<$MQ3!mUEnhYvZCqBch}66!_^2*+v_rtqLPo@_))2wni>%>Upk9X#c0 zhAlN&c~q}UF!Ayd&wP%>Q5X2&cDQ5rXsArqw!v+A;0=_Jknnilb$@xrkp*Z+ z{~t>{6W>kwqs`BZqK=cOr;IWti^pVeWhL5#FkI)EZb9Pl=iLovEB@oiA zKZF0Y^dy{?R)YUPyU{Q@8h*C?MLi|!eop(A^9$O}Hv$QAygo-fq%fX&-q+aruPJ1% zn@{u2SByNzr%`?Ki(e=~?!FP(E7d2n{MU4w`xJ@EX9AMLgZfVoo>y_3-Mbnb8J{`Z z5I%4%;ZG0?De3pDiwR3F?rDTX#?#e?eoU=8ViEcs-KQvL9<%a8kynzeh=TuGLQ9{% z2D_tmDBq02;CK4o=lFMX>`9c-XzgTvcm%!*msWz+wAZ@P?R=W7Z(rlWh1z*IZ`qq) zb3QqCu$TRkpr!xhXAsLQRbIT7en@?g!ogA9opILXISLBdNm`YR6ykTQT-!R zD%@x(+M=iqOx|MYSj@8en?mrBHS^7M`8qLRLDY_C(6|1JrVI(mOSvmzuE-L6`57L+5X^Fom4A$U zxuuEzzDr$HMK~AZQgXargoJgiqGw$6hb>78ihRv}?y84+PaBS)qohWTdJFo26ytSG}wBJ`sTTh2W5lq~Nw<17>WLB;@o_rp>3%3EHHLDmVLcUMt3Y*fP598FeZm znHW}3N3$P07FMhIhUUHPfqEcU1K0ignu2n#(CDyR89fdrN-PO4%5P%p84A{nq%Qi( zH053N=>6@Aofm>rTa%m;TXe)?bi3n2f3|A+XqozziV|rlagKuAxfqE00+1!e@8rj| zjeR9tbAKPUeHPjaCCg4 z_!^^XmOCp$iSFCjFqLLG|5hNOEh|z?GWEr>uMDi_YDPS@3#N(Qaas`-0m;$TZsDAl z1TAVLC7(ag5*I1RzI|U?aQ6=jO9B~;qSOPMq8kK01Qlh~-}e1={o@DF&a3>1(OW-n z{_NpxK3>5{Q&SwQC6aK-v0C{0aqf+|lr6cakbrpv3rp~aICNig3OD7$UuKcI#P2!y zt!620Ftm^4+PpV+RZdxpl}=g?!P6O{RBpCzB(B0=G(Bo6ey4WCDg>F1k$_}ogO zA@qIz&r9a~D9|k!)F|X7+pk=8*X*-r_woG*Ab{&4SAyuOBQmi>$C?;}d zINUI2tqo)}T@Sf&p>9N+j-|jtlp{Y%lq0fzBXM_Qs?$TjQRoV}``o7?vHVML9nEBA zbbp8oXgyplioC(mF;|Qpsgj}$Z~K^-Uo$aq!_XrW+*sqAFIz0;ttUFCasEToHX0qP zPV=SQJDcySHhx;|HY#Jamdi~E&2|ovj_7}`bZrEbR-8u_N%)_y<7HDeA|>q?hC`Xo zn3Yb=dko#@P=ZyjK5zTnCDTKrD-n>Ehw&rWAhT0TxSf~tQ%FV+%iGtAO9>HqNiadA zIyab;&3|nWKfL$mLuK9F7Y#i4N1FT^ZOw6L^w!1OSGD)NL-=L|&NV5FxAX*jRx&0d z?Z-gwr@Sz0AAGl^7=z6x7Ze_94w+w>Yd%pwH8kp3)RNMch9l|Fk*>QDMElWGvb z)UxzyapS0j5ZVx+_I{fYwq`xMTmo?kLynA8gR;Kw&6Qj3_ShPjxsM{zt-*BpDoC7x zSJ0Mcyzr1HBUe|k^U8-0G4pYz+E-yTwPV;WC>81!2i4U{Xme459OMYJJpSSQ{JaGe zAW*@E0-y!pzInVmVLQGKG%tb+B~Oq3q)Pvp@8SE?7#u7YL(fXuucjPIS9t3nGd`d6 zD@V4)=2vdbWHh>8k@?rx>PN#tSG01?`U`XxMk={a{$!joiHcrK<}v3r2fP=S=( zx=3{oJd4oHn>RbQCP12c3fO39NFNkH6zb>CpA`TAl1J*j1XXX}?hIiIz1dQzfY#>m z_7-l*eqdnstL0+X2o)Bef{LfOzM?S|sTA{y5)|EI|SX012hz|~z zTi^hgDu)72R6-)_erFOQB{{pebdL2vVaJb9@+m3Sm7hO+K*)Ni*^v_Po`nT>k%){8 zBS=pM1f&}IUm6Tw8xly&QYA^^iU^jf!=hclQ=&?M5?bBJh|tH!r`R2SW4nBL(TD@M zS>Odee~hLs2O$xmrvQ{A0)X3&4A5%4CiQu>nmPpl;D*Y~&jTeIUTHhXa^(u%J7(Z74wnhNwdsn{10OMGOJa~adgets28cVi8_3v9P8Mcn*+PdqcZh|7)tci<;02!P;b;}ly?Y%^ zE+v}zAvrlY12@rIdBojC*Qx_-QOys0N~X8E50PM@bNQ~KfUs~_Mh0VRYiq?wTWc!` z)YdNPP)tulRR+~^Z-sH4JCQ&N;q8~g{Y!^Xm9oHm0g2@OhzIn`8s}g5boBIvj%;Z3 z$Sn`2kI}2?>e;+rBx?;_zQ|i}#ktnl0ovY!;(r6S)=Wntx992cE-5v2_~y*#^PDu( z-ay<49V{&k){kK0{zHiwd2FR&%$ z_denyARQEi&rq9{gQ`*U{yhc?syKMCw~G$IdqYKNe6B70LipRa9H6XYT3A>B)vWo>6lU+fO;PibhCjbx(oE@)_L1q3 z5DW*46dp^Hzs6-)?`8k|bJN>fe4c|;qt@M|CfFk&5``-A>u2n#8jzY#h_DHl&X`CD zsEdinlU`n*2^p+=e-4#5b8HOe@Ql%*5 zyd>A{vbG8)HB|FY8Q!VsRo6&vmdq$5bM$B8Nu_3a6tZ)2YR;rXXnoa&Gey>~LkOiw zr6N0dnXjTx&Cm;`HGksVV=BDLRF8Yi1<_~sOG(Tp)s5W>F25OeUN2#D_4_gsGsTgs zDm9M|;}e)ZJde_^;-lp#d8-H+P6~MxMHV@U_mh-Ox$4o)J}+6~p18Pd^lG;+;*_U=}D zO?o5ePy_T$kH<73k&S>tl1e&B-=}jkKG$(jz8j-nMor0=o#fGgztGwWsuye`dDVuQ z>Rw+Y_m_@N3u1xEwE2d-+7fQ4QCMP|#OugVvcWYg^PMWr2o{IyN^F!!@)OUF48=E% z-3T>epWJPi*xPrI#!Xh_LPpF@mBZsw7rs3fN`G%cCE|(Nvnr-c?6WB#rai2X2x_2L z*S~si`jbmdhA|VoI1w@Ol=N)47)iuCkL%KD?BD*n*O8=1@dsYK^6bQ?|ESZLZ*!XF zJ+Tz^US(($r(i`+#6LT6preH>qp^<)wE=-WM1e!ah*YDk1$n>EM8EB#;?rNsQarw5 z$P&N7qyQ_0784og^BbFi0~^*hp%rO2ReyRC@5_ILnbTfJk~a-1=jKy~TYLXC6aMQM zF7+iX-)j|8nCJEuXL*hooDAWbOG0PY?CpGq$1e2)1I2G^NRjzW01|uN3F4a(;D6>* zeWRMny`AT(7m2-<$s0fe{VK?CVc+5<-}qHV?D&r+v+(6hjA15|!H$tO$jrr``{$bA zClOXZb2jQS2`;?lrxS7S*)qLBSc+)B;r7rfruNwIJW6`hh4QV#cybL(=0PZOC@^Mi zI*NKk?2V%FZ>FC(>;Vt)$0CDIRCutL(c^+D~xyEMHag z|NG`Hpgvm<=VICC^aX}+P-Wsr;;pTjYF#!SL!q<+Vm=i2J+dZX z5pYBzCAb|Huf!$FSL8c3KCz zE+&%;@~_9L!24O2{U5k2quOOc8Uak43k)A1X_J5h0iymJAd?OJfM<(;@glDh6myOd zKGOI%0GSaK8Hd-LB)GY^?OG2rMZ+fhe8Xf1L?uKu*DW9g zBI5jqPoG*peY!O-JXmTP3vh{vho^XCr%>hAE$pJAB2W>CRa82ai{;STFB`>SuzcnpW*3CXG=k) zK;j{T^TR(T#Go7Es!q}@c{P^oH*X%0i>fq1D+yvU15eLFUp_fpJp5N@vIST+QVW%g z-@CSeHt?;)SOW1^HJu!A8&o}t1Sd^F$01fAXw=M7QpWE-e0F(~5wRrAt;Gt#5yv8w z5T}G~W;AOshc~j_Opz*KWo3maxwF%#LFvvNUYEqT&$F^v)6&z;?{7Cbx$*JwMPx>V zgkYvGw_6Iht_*3oT+Zh@#6sn0;;E>p0Cm_-;wfQhK1E=MLUr!Q&wSXXfXpF4m=MYa zd2w%lpBd=+LMhOL0e^`PE&@IVMJ>3f=<4lJ0A@_jzI>~&)=FQ7yR^3}ii(TtdNlzL zVE<5onsCP!zY@2;`U-~9qF1bGWZvUv|G%ILsAQz#suKrf0NIe}-Jdgz*0b=22+wvy<6%T1}k7eiP-kWL-1>;Icb+w55`Z$rEo*wGP zy?dDh4bffQ-K5|#QF_LXAP4|07X>VFLCh~&dTz!ZU{DbZ0P2P#5bPiw98_*21ZhRJtQdI)aI?_?=>$JU^hz&zszb*uVn)ANvOe{lkozECgq z@rXE-g&M2ZA`T&V#a|B^5uIFRP&61!R3;%?$#AMjmZ@P7CR(B5owbXP&P755dcD7kN4R4GvV7V@*OFm zBWTO+tO>suuBJqpO$G2V0}U|}6tRRP=}c^UBN2}+yO}^B^T@-nX zl8t?CX~$~8w$%zF)nT&8dl2*@ATURu2oeD;{0{9<49gl^n<|vM29b+R49OiBs0!z@ z4{!M@u~5EV{Mb|9zd-(G<3SGfUYs4JV3iOF=ETsdLbFWCK828^6oy=|!dLROkZ-C1 zGtXNRcn~>%jdJ8z(AhCgN@rieR_Ykl7)NPl`e+Z6c!)7dNE*3n-_3}eh-h`G9KR<2AyI{GY)Em0%GRX_&u26?;Ulxw^kR%YXj$=SS-c?JlM&`1|5oj6koQx?b+aIpUks=$!`g`>0p zN1>Q&Lr^oizqO+ecjD>q{|ZO9&}UOL@;^keg7-FfO4ioa{pB?9DmY>8t}SN&21|7^ zJroT<8>o6ToarS6bw1)}93G~b@5v|~n2v~O-#`#6pLbj5cd{#Yu{?K^T{vfh9lxL! z=@Ss1p!wrREEHl(gSrEZZa7ds?o9??C}_9=ut<{q(_cWF^Pd21{P-5J637gxaTOrh zBZ(W5AXr+k!BAqniUqjH-1(hg+bNMX4octS2-{uZGOW>fe>iJkU?3uvuQqE5GV=Gg zYK*8r&L~daITxRZh=qWEj~)pCm^yw)un~dj4zaJi>yeHIkpmM4=RAsql@*)>8h`J0 z$ka}rK4uqZto{T~CPV&?Y&`51ov3l<#c009=DPL9Xpq%dApw934W%45HSRS}-$0;W zX+sH~Q}DP>&d-N~YNIm*nhvAOT-W2mLhk3vms3R?o`N1^^5+DYk?+NZBH(do0|5*% z73OQbt*E+o=9GS2399viu>|avS2uav^jO9J9g@3c+?=2UR|G$tDX|J=;)qwCi5ZQF7ZJc2-|Dp4bdP4wY0UJ|MKMvC)dHmX<|rtxCK~P zals(0oYUHyqe2R@J9s2ohKKL|CgxP%RG;uH8HCYKEMO%I?NZyOw> zF4QX>sPOXgGM{XIjEjeNz3~O0fqaPmWVHRadEM%qN3=xJB06qN(GC}FKgUDukP9TGL6(-2aIK7wzYpW;8U{- z?#a-|NIcYJY12t7VqD5mNp1zt15Aj2PPLZ0dLVctSvWZr3#GuG!f_|8#dSmw;0TL&Syu-7BF6qWxJ(EUpE_^a{Xb6pqi~IbQ!(0Eqsg-Q$%K$Q`_cmw1 z$;pB^QV})`SU1FnR(qHt=E4rZMrfk-ymE<|-B8&%(0bcobs?@}(B&YrC)K6@iycbp z9c3aR)%b-M6xd_B`EPZXwgPNl$n`?z_fAiaQ#W80bJb^RF@XAR5}m1^Tm0OhIE)E* zsK$v^+-)hu|MMetSNS{Wv0G z%i@hd>ykzzaK?^XBT~L@oVOrNxb{l0Ngb}dyC`B$2VSyp+rbj+Id%YscwkFO9Dkag zPCx7Hl~Q;M7V!NN-b80t*N}Jb1OaYew=*#@A(1BqCzXL;GLK;x;%PYCT@MGVY-lBr zc5lER8WDNrl&|1F*d(;hkN=^TybE&#E>-Mv=g!r5tTWt{lLKEA0Sb|!!!9tIEUsR> zbg8YU=K@#+3|8Sc*gJi-cfbqmH2;;}^8WFq)VSSyIBM5#4N5Aa?>2%n z77XELU`x@jbLZhTevb(ltt(ZCpxR;L6F6p3C@2axe*eCi%xjF8bU^4v^lh;I;|!D- z6TG&Jumg~~_bb`-8(8VP8&G!j>zKA_-(P4gb)DDeS z<1H<_D0lLQnwKp*D_w>4=)j$tld}FYK|h_JbklaD8W{@6C3Dc@5e5xYHZVDv8oc9$ zLWH_Bh-n^ZU@9=4eE5I{LeTMd6jknp~_GNj+Ym^QUpEMYUjH#Du=$TrutS>*gjwaQInjhQU(f>#pt@FBPzIvQFA86T)jg4m5UvJc(8%2Og zxE1^&2<8TfU$?>ed@E8NHzcjce(I1rZT_J6RAziNDrtubaMQMWOc)!q3_#X?jTEA* zz@;PZGYBMGr9&tLKIoL*tBe@*7Y((zcEz*fD`7zWC?ypaYkp^@ckZqA8ar$2d?B?w zZNw~c^~C`&TsEL?m=Z6e6u2qxY5lWlzJLY5lP$+Xkrr$gFbmNTvY4c$8DL9*Ax;qX z)7wEF6v*og_bsfRINUmDRpd4E&?aj<{gc~wS>t@}ZuKa*g@AS3SSl>d%*(??nORzf z;E}UWLar>_!GL?3Z`xVK#J+Nf@si+Ww%nzxu(iPosP2&JOUo12*_T!l6BDcPKNd%8 zKw`mx92Ez%4a-Z%BgTzv9$vDaPJ`l_mkXPFTGM$SmCNpTmcl+!DI@Rr`qn&KqJWT) zJh*YdqJLgXOA9g31I4a0MhDdZgdbd*J}QRWT}~pbjO%t$H#Eu(mj20mobL=siKRfv zN*Sw>{uhp*h1f~d?T^39Y+=}m2oZ$Ue;3Xww{-j>*P-_xAdm>}?2z?;1%V^{Yo8T* z3%9CA5ohew;rH*~)r<(?_Pp08^M_@ z!4~&zLL3zKTlAUR9&>-$+gWhSSIP7~t6~Tfs^3vk)qlfAl|3@Oe#b~HzJ{g10l}E< zW<<*iK`s8b-%vdUa+>Lil-g249z#Bip0eA*w{t;E&#hrRfHz=ZU_e^EzCqr%xe^Dj zED0pQ_vmo|!t{F21UCPrCGscyp+!lFH3UQ-?5swE!h^(Zu=F9_3J|iKjt|_RVkZT# z2=<=~Cm=(l<|PA?kB%&-LvV1Y=Hq|Wn!p7tMJi7?)%Cq}XpDpvXb$nuU=;~*fJe&Q zR}H#eid+CSqZJIm1(ieZLp*-6vlJNa7%R7q%A{kVif9+3{q5+1(pzFdLLIRZ*prYP z0H)oFk<E@P=KmPQ}OwU@f|$idaE)s5Z+1VdLJe*XwnO9@#NAhLp_6H;?}{(jZxOpD3em{GsU=rQ4W=7y(tnk}8&MWD zsm~)Fh>wHwWnXC~%VR6APYI)tJPy7^TeO10c@((au37i8J-K)d0~Po3W#N96q2I?T zVV2DIwu7N))uUf@6awdvU}TrJ=PSackp1(oQ+w!)_#+w@^_GfD{WYNeIis&d!dAmLO*W zc%Fj3Di4p{)I2a%DLTaPJb!VzZlR*f5GCuXd3^jysGqQb!@dc6htxSKkmf0nY~05E zUc!3f^?Z{~e&TaNOl+j|=}GZCsW>wUK5?DKWxzE6?wvsCgC?N@_9bQr6~aMGlh2Yz zru$-8MKj`_u0NeX^k+b^gdyWjz!(lFDos600%nze$gqI_Ol^X2vd1B&G9A^fzCfRl zboFi!ji42iq$C}f2HV@)zZoEovsBm$-?Q#pfG6de#j+Pg;r>T^Z^;ISb4X_COmf_pH{`Ooj@jqiU~YZdx`-GJ3WA zus!p^M$xk3Z}Or+3cqtlAuBdn3iR}B-smO%an*mKJTJquZUEt=v3&9=<1{<-SsjsZ z(=e6*O;gTToc3X2HUTwn^XTX$(7(V3npai|py(I(Wf%&17>akY(w_DoHPYTYG0L3! z5tL%rC>c}rYb2)Dj6t_vx5&sywL>iQe7yEM+_|4rAlc^*y0D{%N4RoM5J+xF!<_jk zKwrqRIAB!;Z`(sKx;H|1OR@BuH*X$}y%&P=2ONnppi{|g0;ikj4>qEY;eIOi7Jr6c z6-G2=usX?W>*;W@jnz1v2T^k1NDO3Aki|IP0RUdA`~#`X42;(=1Rp>oWcdcw6Vz>b zd!Emrxo#4C#`@Lv^qE;%<<>vgi+zNTw+BrTuOZSs2BNnGbcG_k)d~2u5!RQqpApCg zWR#roqprD(VmZ#?EwU`8W&gjcPO@BHBG?9K-kp`gw7+I`P0g4lN1i)SWLcf)+VwuwFa=sx69=yP)3mwdD&w85z?* zsko*NHWW%gtv+pJL~CUi-Fh%6@jE3u`^_$WH8nH?sY&l+0(BVi%hJ+~ znx7RwgGx4HH(Q)^=htm_oE?!0c~!>)i7y;>DubkFAeaz-MS47fw*>$2+|q5pTq%W; zIS@RiR#phPxw&t@1lWq!K;OKHfog7PdDCjAd%p?wTARxpFjplBY5dfmhxeZD9#&wY zt{$yl1`6sKq(E*M!P5HQ_&e(prynGs@#g=aKOaoA;guTKVW+5uK`D%Z!oa}jc;teV zTF9*+U^g`kchj!|v&UMB>r!PpTRnz*gL00|3z{h*afy2UW*e!pBLH!@cz42Qy+PT* z_Wywn)enS;^WA8{@tlbfxlUfqq4w$t7&|S#BwY4{mIEl> z1E3y14-@Tq=G`QU1_GkDW%b)>p@pd9FJ1s`!PKJ?D#lbws_TZRn!38=rfag_4owh8 z3PK)$w1Vs4;NS}vL;4g@lh7R223C7gXx~{3*k00s7CAgHwQ@iY&o$tdpgT&RJOPDN zw0jF#u?Um~xIY8Uo-MCsgSi?eIwldox|ETKV^Jo#LQ9)M%e->T@bJ-_=kM7HtRSEM z8Mf0o(>6~js0ufy2Lq-d$sAO=H5bJD1ODq*1^ZnC0z66k^=>)SNC$kx!A}Vs(@+|s zPXutp_C~o|DHYmgaucc)Cv(j``f24`0HDbo+gH$wEUdl120ll z*)>669uscl+|)#)G4TlqgaUXkI5f)x(cbdmfn?dPCKIA6X<&d*lCD2b}MA0_Ij(~*h>)j4vtxRUGM>V*cc`< zIQl^I3DWx(5gjcjEBkoB2LN*6-^UZqQ6pGsN(&kC8P>1)BXHuNC;_m|AbU448%8|~ z_BY}|Kz3|v>rDhr154lQZG;Qk^NkkGm(6`MMqCUMZ{Bm^SH}^ zj{5I!gV1gVMB(m40Hf_dQ9e>TL}Uv~wUgUdQkt zP{)JU1sA2Lq5=(tceZt5V{7BQxY$`fd!G^2oU64d%J7T@^28FZxanSmYs zg`}4N(u;!hsz8-pa(1(&ps#Af)BPwtRCD+n>bOKxHaPRgR-;-kG&{v7C*we`7XUA$ zksfRi;ZUuBdzrx2b~&}6^3D`!6A`qpR*qOCtEV}^Ac|b*s_%EueihY(b^=E;Gg%(} ztRY1@%joE6cWzfAaF{k%=2WSrefZG z_jCo!-V;uT=^o=PVDksg2nGBi;znv%pPA9J`-rS@`6Mn@#4!S8WDMh-pUr=yi8ov- zk$7*F`0Cl#r%MiI(B}kSP|z5T!Nq_ojR}{N*ZyKMy%+~fQ0}QagJBQ*Wd0|sp<@$~ zyeG4;g<>^W$*X@ki-D&E;l9je^z;Th^lX}uH|CUjOVZ=-gb8j%&$`cAFDO?oyD4p5 zgng;!T~mBrMd&#Rvb05Yb8qd<=2UsQw9ynv(MFp&C{%b@vq9lhu2lwXfepzsb z$Gx(%i-q_i%Y(sqHoB=>so!zxsz(!!i$O#!Eo_>leuuK&TQXaS(Z#4bi) z%6NeDBS;dKZ^2^|X6u$9ERdrL@)1Brm{6H=jjCZG2Jcdjxz3)OJue&C` zs^NL|B@SpZ_S>m_uD7wFha)m#K?6~DO7hU2Vg}Q(k;djl-wxeFwA*ZEpN^)*!kcK9 zWARlzuFNMbVY4!G+jGnBe8Zu=7?(Zo;t0b>ODExSd9DcxrH$>Wi(u%Dt6}jbOC(mZ zsZ5mvG6fyGc~gQza&&|t^dk#MS{PAL{V>2fA6hh_y0Za8gfz_pkxC+lM)i;KIsGE2 z$tjWAja26y*g`@2u^h6K*C#wt`uZHw4?ZF*WYTW@Qz{5Xw_CF1w2Y4$0`CdU@6giz zJ;d}^G=ZWhqUQS1#yn!jk~+I!KZyc2d6!q??3-#>TAyN`KLQ=b50Q#U`(MAIH{PfS zrw6P2p5roBp4f^NKQh-5HtLIIFZ0Xk-rA3UR^xC0L!}Pbf?=f+xd50en$$%FUiK+v zIq`oAi~0WFiv`7}8-E`j!xqE(h=@cPmgcCx*LtkVZuX^98e#vYy}A6wC^?XYg9t(B z$`9RN2^l5z1Xig2IphL!2(bM+=-+XG-?<>=$mM9Z0Q&|d7k9M-v^ybQ0iyqXrOJv|DyZiRul4y~;- z^U7VJE>{=Y6D&vW|8HCaGjIo_+vNIn3dlym;2ZPaU5f!4Y-uF`s!}8P<_A3f z51aL$wSvNrh615-Fkc<1R8&^JX=cU*z0IA)A^;Ylq)r94M81cWorUFq5sKCxQ)?WH zT0)#s_Ln@p$64C!I| z{<~vQe{mJofAjb6(9sNtEeam@39Zml`DlAj&u3`XceM|PQkR|P%l$oHM+r_#j2#o@ zRtQ&+^8>DA|J;Y~d0g|}dboFZ$O^6CkauOF$@x<(;{_BnXHwZuG{z8J5(Xi(NdKIO zhzKMRY+zaUAY&ta-rv4`6Bspt#ez*thihm!DzJV#Lz@0PHsKAh{KloFJppR=KbaFj zr+!oU?DF{Hug3t+5LXVaX9rc)9B_Fy8{p0PAK721^s!KE?CdsUg88psqkgRDf(>Q{FY~rrmNokcrZ;3!RbEwa<;`}fs95G)n>+bf6o%4 z3~Y@t!g~l|Mrv(4+K2Dp7J!f9e{d+D{fk1;NaL@xsvWKh;y2KS@d=5sp%Q#5#&?3x zG*|6#NQ?U7LpTEGhTPYLiH#e$-IR!^&H^F*%xD*_MTR>Ls%zYxl*TKROs|jsZ@`t+ z%GeHhS$riy;P=KC#A*G5>0bvx61__QlQ;LCR%1lc@!&~AeqX|vke4vCJzgWx#*P73zv?X$?~_9DL=Lj`*uh4PoC2`3-aVk^m; zI{Q>p!3+k-&zUZQsLO5kVpwnn(b*Z;X1b2tqIdHVO7IM1q+y?Z@Tnc8pZA31xAT{> z(F1akze0V%=~?I|M8_t@Y(|-CMX>N&EMv`5QmGhgHk^%(cqxZ~!exboi@zaPOm&w-0U)ig`QP0ngmpZ-`#!2-F>-(kjq zwh(@Y8(>dP#<@r0BBC`NMaX!zHl#K+aUMwiD8Pf}EqFRCibDB(6@iCmciILRwl{m( zL7$Tn{F80(XXxC0$o?LQ$V$YMURuXO?KK?_lKAie14Vc?a2isAA11I6cSe(942>b* zjc)U5uD`3}0jBJY_SaR2Ww)ysXK%CgrDn2a+}T7dy~!QOeKA}LNMdpP;ZNo6q#BmC zGtYyzTFgoxq4BGKWTCfBn($ZI%f`QFTuFi4mOR=5a>ldr-ObK?QFw07zfY$xXsFe- zu#AVo9rnY*yWq@L((d>-@w-@H{G433iCGD{hj{NbX3Gr!1qoaPQUY^={a$OgJ6>hS zYI+q3HjiMWX#?GdlI$O%`w?V-fudTk56a$>jN~pj{GZH&&WB;);c_G)0-exvD&z+r zb}D_RX7Xx>FIXD9M=S*wK1m-_fC7Be%AeHAVZLm12D%Es(Qww(0BsV0W^TZzJ`jbx zrY7pa1bhn%z9XUW4!*c`bJ-8Ef$f4;10v`vPbkjOVXENSgj(rh=Y3IEA(~NEdsy=6 z?AD3~@ChnJXaPN{1#Il_S&6{tXaX<{8c%uOPQCs(I5^lUzVnyCIziCuZKUy{as}w> z_HUJC)y=cgm#L|#j&=nS1VvYL8K7BJR`%T2S5nchUo#c&z;9`yPMNK4@O>{7Jl@vY z*d=Iw=l$akpC$rD06rO}g>>&Di0vQf#LYaIEM8M<8675DtPR~;*o~%h=MJ}Iz5-Db z5^Py@^`K34b+v+~X2MrDAU?{ix@n<>^ByE`Xlh0yUlRfI*6j@V*q1q&@xT<00}&Z% zm`D22+1Yau1K$5ux6xtkio9*}!K2U}WOn+iZpe!nOxa6pp?z4m@TLY79|i1kfLESc zT+H{Pht5>cypc)^zV3vBf-j+Afuj3;3$f0D`nCxlIxzp$k*G7mhk~|YGeGc(GQfIf zig$s>gxtr=?BnB$U87ETkGt*6ZA+fNFA|rB&S3Z~M_lKx`z7OS9308D9a{0AiasnM zTVw$P9(>y1c1FQ}Zq%*+j0t5|m4~%?FzXzZDsp$F&A+pqCg*=YWoo?Eo#4q+Bx zl>XXbCMS!4faZ_41&7eL3vPqHv&td`OsV%vD}sS{0+M{Cq20VXTuJ<=u-lUM69y^T zlamu%T-;{(00YVYY7nUPcKO?odk%Gm1K~q!V6Wf=4A&!b`sCX;b(jyEs#*|lkpTkW zc>@SWq+K2ieZGIXMH0t>xYUpX#V2%A>Q$>qc7#*rDyHRKCAFyd|f{gf!s;qu(pGr?-A3RxXg<1lbTj8s?$gQFAq#6qex*n&Kl zi>sj~!URgp733TE*oZuQ(}$}8SzlqZ4dXmz>%GQD&(cC6Am*ES`mTQk2=!JD$zw=`?Frvop_(lXG|#oX>&{CJJdUb1l07PV8+}jHgC*P%n#GAj*I8nN`=gRX{z zWIq(D>4-j@{4`t9DE|6cxXXJII9GMCKZgc2*=9!_myXZ|mH$_3=N=DrzW?!2N{LYI zidB}S6_w4UM5J@_FB0ujeUM@ZxJilGp4!Q5o%2B+#M0m+vH+v!_RBEEy{D6 zeUBR#t~(m!c6+0LEgqXA8r5QzZ0#)A2{Uz+*AWo7A+73t$DSeE6GRrpif5^6#oe_V zC^VbMnFrZ=efT~CEghyc+!8;d7aJC+L)?8RrYk@7>eYR*5}|tW%pk3= z=hS8u>f}?oYl+Knml>z*}>gs`Mk!iAllfEp62$=OK10y+SB<^_^*?!Ih2sf zAyv=pqdN}r5(P(tN!4WT(G=cLV(j2lU-Vl$E%NI*Y)9jZJO2|A092jmfy zelHgF=2Pov}F zOrTIc>c(S+zo{4=_9D$c1jsrF#=?o71uCwR6UXDv+yN2e_;;3;mfAjK(qPSGoKx-^ zw0WPRAI4E8BGXc46h7)7EQgTMRAZLX-P?N>*@bPXL*lBP-QC?+_rLPF`=$alNH1D& zTM6Pb&qnQ*efhVLw8Lu9(jKubv#H(WzsvTwVO@Q#at?8q17WZJq_^&8dy8lsOk+nO7cgg5boeD^XLbtXXp&OufCqzx*Nw4j7OhaZc7GlTPYD)#U#$=JR~UH36g{W=G)hQtw~_=8V<=;kQ9J6rGi&Bs_n< zB5;eHgU~IZ{@YF}TE#D?Ad%k||As=l8+Nb;dgu#k%{vwRF!6V-_|~}imjkN-`wI*T zqB@8O3lj|rgiPBZ|9SMm5wJ(37Uyxwa;#N%277rGz2QQdWo&3jAqq)M!=zw5Y`r@K z-P)RcMTUjWKi~tOuNwP!fBMnSrT!ZiEt(Eq2;B?3L@s9j%Joq30Vnm?<{B`M8n(%* zm_ta{)YK%oIo@%XAt=3p7gh2G1_s}FvVlgzN44HfvqDV^+gjPt3-KH89cu!UrS%)w zVKXE=MN%kJQl~_ml84Ogtz`aG^v~$BK<6K)i`7m@Ok}u|L7t3 z7Sa+cJ4W(U;za@7kX=(FOw7GV5}Uxg-I+mvO(^WoiAc1yKLv#ACrDZ;hM22CIY*|3 zTArQDiOmBL5^a*HGuz>qemQCz^3y~|RneIHob+*TBX#a7t7M3XjkcGn2d+rEcu@!m z37n?BZ)4G`S-5s;$R%WkFOJ@L(a@JaimYW?ABIBad3mZ>3Acrj{AT__+XH))uJa*K zYULE}UNjrkd{AkJY{+@l@%A^1+8%D0l~RE~-$L)FlbHWJ`$*_885JPryXns^d&eG& zd6JM%SQE;@*(>~KV}dRlzA2;+-!GuiF8%0jX8fLW373$R{y^Wi{yI7=k#ws2I=2<~ z0QA6YGOk5v;a6TVF}45aFlCak{I3}1t+?|d%$FHckTOQpFbHADg%RqR92#eP9_#}R zO-&&RY&uJ9$+9uo04foKVH18@xLCt7s$uud)>2>oVuAo%u#M%ZH;W&l$HzxzXS=XG z%hz>}$W%^-#*BTLpiPFwY&Kxq?%}9JDPpTVuU-kK-0IzLdhe38@vOHtpLZH4{`92bFU63+Si3DzR38RkTeW1el*Z$%Z3 z=MK02(;LgYVy=`z9b=R=w~%B96R;B1jCw*Cb@$islUdHML9MNy6E zxO{mdM2*m7i=djhksc%pDwoF-=}F-a`gK@@fTlirH)xDHxa?LWRvaQP0_n-MIcp*M zV01vDI0eVd_(iH5S2MmB8n>Vn^E$7tLw_y!<3O(0itV~#v}y9u21i zM(5lHxs1#-HnfY$xvgZX?3bkzCiXLH4S2ceg|eYSw$Gi0#=#J9RHA6WH#!7GLEuou zxTLJC9hGZ{Y6uw{MDKDmo?6hj_05U=L}gXzE*Bg zhC;gO)%P6RhJ;2ZD0**GW22%}OA3$hcPZ6VBvCW~$qm^H9lo$;YA`TlwWzsT4!N}6 z(BK443_D5DCLqPI%gxnQ{_)3XN(~IhNgbRfu9Hg~FM*q>62)Y(SQ8*s_JwepxQ@9| zalGAJN3av00Y$bRwyFW%wMfjmU-^8N?LF1PpTHMt1Z~zGox$wSJN%{tm2=~WcN=WD ztk#d6zT6&YP7o)Pa`IAnz<9!K#S9qg(g~R;YI$vK9u%ZYxnEazz0)8fbg`OmOg6R$ zle{zV2oD<@O$-fbjd*2RRq)Ex&MP`nD8)7dp*s4kG9qy0*rrOWh?9$*4-}TiU_GPwBTfKR%>qQ*^YV?= zY&NeVI3V|tTCz$Mfm9NH8-%gR9oz&Qq?FO zl#_6TB1~*})4_=!OgqFGXxgJh*DjnpXO5)2d^}hOt@Tx%*EA~9xMlo!wuj#pP-$UByxk6r1h8}`uMbW#;Xt5YxNxy7#s_Vbg?j!2_t63- zm~_$4vi+M{_{%YR<2PYE4PhbF!=`3t3!tir1c%q^81^}AQtsM1zfV(XuvJ1SK0G>l zZd)b|Ls+Z?#Qv_bOceft_Lue!V12L0XJ)YTK(NIpI!hHCO1-wivwBA*vAzcL^k}B*1b^4E zu`MQ84qu=8(BU}Nv)N$4n{&*S?}aYI;lRLBsOHtuK@~tYR^i<&merQ|PA|Vi z-QV|AOWq#I$q!LXl{gtq6S5Gr>B5q87~?D>d@&w4|CsXm4N4b?oR0T~V&yRIVq*q$ zWd{yKL+gLL^!U?tZc$-k_MCZ!Q$3OLTBpUnkmXRkPp)#L?mH%6&#vz7Yc_ax?gPrR zWYuOmvnOMVWy=iyVv51%Nm|M`zGSZq2#N4tj1R@Ks~d%siJ6#~r~{(*2yUg+@plcM zmKu$`NZBO_KM(#jlKpv5`qzkMsVyZe1o(4#iwginDAt%kJrK&A18Vg$Oq}NSvQjcC z2HltG{jvCOZ=`%^6LpU&V~Ndnqlh@j)N{HZu>+;N$quqJp(L^Je1{5I_^coaI|ChM z=;y(W`D53Z%;CqH-22GT;Mx>)b{LH8RzA>urdZn{ZtB4d51dTfcVDn>-TTnI1N>Nm zKP`fHDOKN9F>gJ>YRe&#UgEyeApAd&8wJ37X#FYQ;|T^B>xHve+JyXzs46b9SguKQ zD>S}!pqGra9XO$&sw#?`mj-!7n*q@u5w%`*_#(m^F^QAA0Ds2Qe(xog+PNd9Zvoml zyWY8u?sNB`mZs+4>|h)~#886bTT=P?`L*!eKKTp{?-Cb{ zlwXsQlJm5JX-W>i1Ho zV+Q>IR~eLZb{+tHq6tc&kwcG$W8h@pjbiXCuju-TLQplA5+k>132qWLymTZF@5J*C zP|8+gtpE@aGr=uk1{e;;LjghHJ|MqOfN2aVa8|qiZpFNBaQ3ZF;8u|ati{d)>whVp zg+aS?BHaVHKl0!NNM8t57q}Q%${}Dp$&mN`t9rmle*lPQ1dL#3ELEuY5|C2xd-|o79$P=fw*uLv}?ES&_10KCX`PtaQs9@Wnv;PJ2%xmQU literal 0 HcmV?d00001 From 71a30d56b7039fbd440c3e3e60c1c51230d17073 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Sat, 2 May 2026 16:34:21 -0400 Subject: [PATCH 47/58] adding the file to test on Github --- .../Untitled.ipynb | 6 + .../app.py | 108 + .../bashrc | 26 + .../etc_sudoers | 31 + .../helpers/README.md | 285 + .../helpers/__init__.py | 0 .../helpers/asana_utils.py | 1156 ++ .../helpers/github_utils.py | 2110 +++ .../helpers/hasyncio.py | 508 + .../helpers/haws.py | 266 + .../helpers/hcache.py | 1086 ++ .../helpers/hcache_simple.py | 1188 ++ .../helpers/hcfile.py | 135 + .../helpers/hchatgpt.py | 549 + .../helpers/hchatgpt_instructions.py | 32 + .../helpers/hcoverage.py | 183 + .../helpers/hcsv.py | 365 + .../helpers/hdataframe.py | 309 + .../helpers/hdatetime.py | 909 ++ .../helpers/hdbg.py | 1134 ++ .../helpers/hdict.py | 119 + .../helpers/hdocker.py | 871 ++ .../helpers/hdocker_tests.py | 197 + .../helpers/hemail.py | 47 + .../helpers/henv.py | 541 + .../helpers/hfile_tree.py | 232 + .../helpers/hgit.py | 1865 +++ .../helpers/hgoogle_drive_api.py | 1183 ++ .../helpers/hintrospection.py | 284 + .../helpers/hio.py | 1046 ++ .../helpers/hjoblib.py | 880 ++ .../helpers/hjupyter.py | 383 + .../helpers/hlatex.py | 334 + .../helpers/hlint.py | 29 + .../helpers/hlist.py | 78 + .../helpers/hllm.py | 680 + .../helpers/hllm_cli.py | 840 + .../helpers/hllm_cost.py | 233 + .../helpers/hlogging.py | 809 + .../helpers/hlogging.pyi | 14 + .../helpers/hmarkdown.py | 18 + .../helpers/hmarkdown_bullets.py | 248 + .../helpers/hmarkdown_coloring.py | 286 + .../helpers/hmarkdown_comments.py | 66 + .../helpers/hmarkdown_div_blocks.py | 132 + .../helpers/hmarkdown_fenced_blocks.py | 131 + .../helpers/hmarkdown_filtering.py | 109 + .../helpers/hmarkdown_formatting.py | 530 + .../helpers/hmarkdown_headers.py | 841 + .../helpers/hmarkdown_rules.py | 367 + .../helpers/hmarkdown_slides.py | 201 + .../helpers/hmarkdown_tables.py | 121 + .../helpers/hmarkdown_toc.py | 164 + .../helpers/hmatplotlib.py | 106 + .../helpers/hmkdocs.py | 170 + .../helpers/hmodule.py | 121 + .../helpers/hmoto.py | 111 + .../helpers/hnetwork.py | 97 + .../helpers/hnotebook.py | 105 + .../helpers/hnumba.py | 43 + .../helpers/hnumpy.py | 57 + .../helpers/hobject.py | 500 + .../helpers/hopen.py | 106 + .../helpers/hpandas.py | 18 + .../helpers/hpandas.py.old | 2684 ++++ .../helpers/hpandas_analysis.py | 628 + .../helpers/hpandas_check_summary.py | 111 + .../helpers/hpandas_clean.py | 282 + .../helpers/hpandas_compare.py | 289 + .../helpers/hpandas_conversion.py | 221 + .../helpers/hpandas_dassert.py | 371 + .../helpers/hpandas_display.py | 302 + .../helpers/hpandas_io.py | 128 + .../helpers/hpandas_multiindex.py | 183 + .../helpers/hpandas_stats.py | 527 + .../helpers/hpandas_transform.py | 1023 ++ .../helpers/hpandas_utils.py | 649 + .../helpers/hparquet.py | 1309 ++ .../helpers/hparser.py | 1151 ++ .../helpers/hpickle.py | 253 + .../helpers/hplayback.py | 495 + .../helpers/hprint.py | 1076 ++ .../helpers/hpytest.py | 266 + .../helpers/hretry.py | 94 + .../helpers/hs3.py | 1129 ++ .../helpers/hsecrets.py | 233 + .../helpers/hserver.py | 1160 ++ .../helpers/hsftp.py | 204 + .../helpers/hslack.py | 66 + .../helpers/hsql.py | 36 + .../helpers/hsql_implementation.py | 954 ++ .../helpers/hsql_test.py | 273 + .../helpers/hstring.py | 176 + .../helpers/hsystem.py | 1097 ++ .../helpers/htable.py | 180 + .../helpers/htest_logger.py | 48 + .../helpers/htext_protect.py | 262 + .../helpers/hthreading.py | 43 + .../helpers/htimer.py | 275 + .../helpers/htqdm.py | 48 + .../helpers/htraceback.py | 228 + .../helpers/htranslate.py | 109 + .../helpers/htypes.py | 11 + .../helpers/hunit_test.py | 1876 +++ .../helpers/hunit_test_purification.py | 450 + .../helpers/hunit_test_utils.py | 586 + .../helpers/hversion.py | 300 + .../helpers/hwall_clock_time.py | 125 + .../helpers/hwarnings.py | 156 + .../helpers/lib_tasks.py | 37 + .../helpers/lib_tasks_aws.py | 407 + .../helpers/lib_tasks_bash.py | 104 + .../helpers/lib_tasks_docker.py | 1590 ++ .../helpers/lib_tasks_docker_release.py | 1890 +++ .../helpers/lib_tasks_find.py | 606 + .../helpers/lib_tasks_gh.py | 1252 ++ .../helpers/lib_tasks_git.py | 1500 ++ .../helpers/lib_tasks_integrate.py | 837 + .../helpers/lib_tasks_lint.py | 444 + .../helpers/lib_tasks_perms.py | 380 + .../helpers/lib_tasks_print.py | 103 + .../helpers/lib_tasks_pytest.py | 1743 +++ .../helpers/lib_tasks_utils.py | 395 + .../helpers/logging_testing/__init__.py | 0 .../helpers/logging_testing/logging_main.py | 81 + .../helpers/logging_testing/logging_module.py | 10 + .../helpers/notebooks/conftest.py | 17 + .../helpers/notebooks/hcache.tutorial.ipynb | 638 + .../helpers/notebooks/hcache.tutorial.py | 274 + .../notebooks/hcache_simple.tutorial.ipynb | 653 + .../notebooks/hcache_simple.tutorial.py | 257 + .../hgoodle_drive_api.tutorial.ipynb | 424 + .../notebooks/hgoodle_drive_api.tutorial.py | 107 + .../helpers/notebooks/hllm.tutorial.ipynb | 13040 ++++++++++++++++ .../helpers/notebooks/hllm.tutorial.py | 118 + .../notebooks/hplayback.tutorial.ipynb | 993 ++ .../helpers/notebooks/hplayback.tutorial.py | 374 + .../helpers/notebooks/parquet.tutorial.ipynb | 1774 +++ .../helpers/notebooks/parquet.tutorial.py | 304 + .../helpers/notebooks/s3.tutorial.ipynb | 210 + .../helpers/notebooks/s3.tutorial.py | 44 + .../helpers/notebooks/sage.tutorial.ipynb | 448 + .../helpers/notebooks/sage.tutorial.py | 98 + .../helpers/old/__init__.py | 0 .../helpers/old/conda.py | 192 + .../helpers/old/conftest.py | 17 + .../helpers/old/env2.py | 75 + .../helpers/old/tunnels.py | 267 + .../helpers/old/user_credentials.py | 208 + .../pandoc_docker_files/install-texlive.sh | 113 + .../helpers/pandoc_docker_files/packages.txt | 115 + .../pandoc_docker_files/texlive.profile | 32 + .../helpers/repo_config_utils.py | 411 + .../helpers/stage_linked_file.py | 83 + .../helpers/telegram_notify/__init__.py | 0 .../helpers/telegram_notify/config.py | 30 + .../helpers/telegram_notify/get_chat_id.py | 76 + .../telegram_notify/telegram_notify.py | 155 + .../helpers/test/__init__.py | 0 .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../TestDataframeToJson.test1/output/test.txt | 31 + .../TestDataframeToJson.test2/output/test.txt | 13 + .../TestDataframeToJson.test3/output/test.txt | 13 + .../TestDataframeToJson.test4/output/test.txt | 13 + .../output/test.txt | 31 + .../output/test.txt | 13 + .../output/test.txt | 13 + .../output/test.txt | 13 + .../output/test.txt | 4 + .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 1 + .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 4 + .../output/test.txt | 3 + .../output/test.txt | 2 + .../output/test.txt | 2 + .../output/test.txt | 2 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 3 + .../output/test.txt | 1 + .../output/test.txt | 0 .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 0 .../output/test.txt | 0 .../output/test.txt | 2 + .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 0 .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 2 + .../output/test.txt | 3 + .../output/test.txt | 2 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 0 .../input/test.txt | 101 + .../output/test.txt | 8 + .../TestGetDocstrings.test1/input/test.txt | 18 + .../output/test.txt | 52 + .../output/test.txt | 52 + .../output/test.txt | 30 + .../output/test.txt | 20 + .../output/test.txt | 19 + .../output/test.txt | 20 + .../output/test.txt | 20 + .../output/test.txt | 20 + .../output/test.txt | 23 + .../output/test.txt | 19 + .../output/test.txt | 21 + .../output/test.txt | 15 + .../output/test.txt | 17 + .../output/test.txt | 20 + .../output/test.txt | 20 + .../output/test.txt | 20 + .../output/test.txt | 22 + .../output/test.txt | 23 + .../output/test.txt | 19 + .../output/test.txt | 18 + .../output/test.txt | 19 + .../output/test.txt | 30 + .../output/test.txt | 1 + .../output/test.txt | 20 + .../output/test.txt | 30 + .../output/test.txt | 30 + .../output/test.txt | 65 + .../Test_CheckSummary.test1/output/test.txt | 4 + .../Test_CheckSummary.test2/output/test.txt | 4 + .../output/test.txt | 19 + .../output/test.txt | 5 + .../output/test.txt | 9 + .../output/test.txt | 9 + .../input/tmp.cache_simple._llm.json | 10 + .../Test_apply_nan_mode.test1/output/test.txt | 41 + .../Test_apply_nan_mode.test2/output/test.txt | 33 + .../Test_apply_nan_mode.test3/output/test.txt | 41 + .../Test_apply_nan_mode.test4/output/test.txt | 38 + .../Test_apply_nan_mode.test5/output/test.txt | 41 + .../output/test.txt | 3 + .../output/test.txt | 1 + .../output/test_df.txt | 3 + .../input/test.csv | 5 + .../Test_dassert1.test2/output/test.txt | 5 + .../Test_dassert1.test3/output/test.txt | 6 + .../Test_dassert1.test4/output/test.txt | 6 + .../Test_dassert1.test5/output/test.txt | 8 + .../Test_dassert1.test6/output/test.txt | 8 + .../Test_dassert1.test7/output/test.txt | 1 + .../Test_dassert_eq1.test3/output/test.txt | 8 + .../Test_dassert_eq1.test4/output/test.txt | 8 + .../Test_dassert_eq1.test5/output/test.txt | 10 + .../output/test.txt | 1 + .../output/test.txt | 5 + .../output/test.txt | 5 + .../output/test.txt | 8 + .../output/test.txt | 5 + .../output/test.txt | 5 + .../output/test.txt | 5 + .../output/test.txt | 5 + .../output/test.txt | 9 + .../output/test.txt | 9 + .../output/test.txt | 8 + .../output/test.txt | 1 + .../output/test.txt | 28 + .../output/test.txt | 28 + .../output/test.txt | 26 + .../output/test.txt | 27 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../Test_from_typed_csv.test1/input/test.csv | 2 + .../input/test.csv.types | 1 + .../output/test.txt | 58 + .../output/test.txt | 58 + .../output/test.txt | 60 + .../output/test.txt | 57 + .../output/test.txt | 56 + .../output/test.txt | 63 + .../output/test.txt | 63 + .../output/test.txt | 63 + .../output/test.txt | 63 + .../input/result_0/config.pkl | Bin 0 -> 405 bytes .../input/result_0/config.txt | 7 + .../input/result_0/run_notebook.0.log | 0 .../input/result_1/config.pkl | Bin 0 -> 405 bytes .../input/result_1/config.txt | 7 + .../input/result_1/run_notebook.1.log | 0 .../input/result_0/config.txt | 7 + .../input/result_0/run_notebook.0.log | 0 .../input/result_1/config.txt | 7 + .../input/result_1/run_notebook.1.log | 0 .../output/test.txt | 45 + .../input/test.json | 17 + .../Test_obj_to_str1.test1/output/test.txt | 11 + .../Test_obj_to_str1.test2/output/test.txt | 11 + .../Test_obj_to_str1.test3/output/test.txt | 11 + .../Test_obj_to_str1.test4/output/test.txt | 12 + .../Test_obj_to_str1.test5/output/test.txt | 12 + .../Test_obj_to_str1.test6/output/test.txt | 12 + .../Test_obj_to_str2.test1/output/test.txt | 11 + .../Test_obj_to_str2.test2/output/test.txt | 11 + .../Test_obj_to_str2.test3/output/test.txt | 11 + .../Test_obj_to_str2.test4/output/test.txt | 11 + .../Test_obj_to_str2.test5/output/test.txt | 11 + .../Test_obj_to_str2.test6/output/test.txt | 11 + .../output/test.txt | 1 + .../Test_open_html.test_mac1/output/test.txt | 1 + .../output/test.txt | 1 + .../Test_open_pdf.test_mac1/output/test.txt | 1 + .../input/test.txt | 16 + .../output/test.txt | 20 + .../Test_process_lines1.test1/input/test.txt | 16 + .../Test_process_lines1.test1/output/test.txt | 20 + .../output/test.txt | 2 + .../input/cache/lastfailed | 12 + .../output/test.txt | 15 + .../input/log.txt | 325 + .../output/test.txt | 10 + .../input/log.txt | 10 + .../output/test.txt | 8 + .../input/log.txt | 61 + .../output/test.txt | 61 + .../input/log.txt | 36 + .../output/test.txt | 36 + .../input/log.txt | 2533 +++ .../output/test.txt | 41 + .../input/log.txt | 396 + .../output/test.txt | 399 + .../input/test.txt | 7 + .../input/test.txt | 16 + .../output/test.txt | 16 + .../input/test.txt | 9 + .../output/test.txt | 7 + .../output/test.txt | 2 + .../output/test.txt | 3 + .../output/test.txt | 71 + .../output/test.txt | 40 + .../output/test.txt | 40 + .../output/test.txt | 4 + .../output/test.txt | 1 + .../Test_system1.test7/output/test.txt | 16 + .../Test_to_typed_csv.test1/input/test.csv | 2 + .../helpers/test/test_create_link.py | 136 + .../helpers/test/test_hasyncio.py | 96 + .../helpers/test/test_haws.py | 276 + .../helpers/test/test_hcache.py | 1002 ++ .../helpers/test/test_hcache_simple.py | 1815 +++ .../helpers/test/test_hcfile.py | 335 + .../helpers/test/test_hcsv.py | 81 + .../helpers/test/test_hdataframe.py | 299 + .../helpers/test/test_hdatetime.py | 932 ++ .../helpers/test/test_hdbg.py | 934 ++ .../helpers/test/test_hdict.py | 107 + .../helpers/test/test_hdocker.py | 624 + .../helpers/test/test_hdocker_tests.py | 158 + .../helpers/test/test_henv.py | 17 + .../helpers/test/test_hfile_tree.py | 347 + .../helpers/test/test_hgit.py | 822 + .../helpers/test/test_hintrospection.py | 406 + .../helpers/test/test_hio.py | 225 + .../helpers/test/test_hlatex.py | 665 + .../helpers/test/test_hlist.py | 176 + .../helpers/test/test_hllm.py | 361 + .../helpers/test/test_hllm_cli.py | 1403 ++ .../helpers/test/test_hlogging.py | 103 + .../helpers/test/test_hmarkdown_bullets.py | 716 + .../helpers/test/test_hmarkdown_coloring.py | 205 + .../helpers/test/test_hmarkdown_div_blocks.py | 355 + .../test/test_hmarkdown_fenced_blocks.py | 218 + .../helpers/test/test_hmarkdown_filtering.py | 449 + .../helpers/test/test_hmarkdown_formatting.py | 1403 ++ .../helpers/test/test_hmarkdown_headers.py | 2002 +++ .../helpers/test/test_hmarkdown_rules.py | 377 + .../helpers/test/test_hmarkdown_slides.py | 399 + .../helpers/test/test_hmarkdown_tables.py | 196 + .../helpers/test/test_hmarkdown_toc.py | 228 + .../helpers/test/test_hmkdocs.py | 394 + .../helpers/test/test_hmodule.py | 25 + .../helpers/test/test_hnumpy.py | 215 + .../helpers/test/test_hobject.py | 392 + .../helpers/test/test_hopen.py | 92 + .../helpers/test/test_hpandas_analysis.py | 42 + .../test/test_hpandas_check_summary.py | 67 + .../helpers/test/test_hpandas_clean.py | 364 + .../helpers/test/test_hpandas_compare.py | 650 + .../helpers/test/test_hpandas_conversion.py | 276 + .../helpers/test/test_hpandas_dassert.py | 448 + .../helpers/test/test_hpandas_display.py | 685 + .../helpers/test/test_hpandas_io.py | 43 + .../helpers/test/test_hpandas_multiindex.py | 680 + .../helpers/test/test_hpandas_stats.py | 426 + .../helpers/test/test_hpandas_transform.py | 1888 +++ .../helpers/test/test_hpandas_utils.py | 251 + .../helpers/test/test_hparquet.py | 1468 ++ .../helpers/test/test_hparser.py | 398 + .../helpers/test/test_hpickle.py | 97 + .../helpers/test/test_hplayback.py | 506 + .../helpers/test/test_hprint.py | 844 + .../helpers/test/test_hpytest.py | 228 + .../helpers/test/test_hretry.py | 154 + .../helpers/test/test_hs3.py | 597 + .../helpers/test/test_hsecrets.py | 209 + .../helpers/test/test_hserver.py | 321 + .../helpers/test/test_hslack.py | 81 + .../helpers/test/test_hsql.py | 29 + .../helpers/test/test_hstring.py | 270 + .../helpers/test/test_hsystem.py | 494 + .../helpers/test/test_htable.py | 159 + .../helpers/test/test_htext_protect.py | 578 + .../helpers/test/test_htimer.py | 24 + .../helpers/test/test_htraceback.py | 474 + .../helpers/test/test_hunit_test.py | 954 ++ .../helpers/test/test_hunit_test_mock.py | 288 + .../test/test_hunit_test_purification.py | 1065 ++ .../helpers/test/test_hunit_test_utils.py | 347 + .../helpers/test/test_hversion.py | 74 + .../helpers/test/test_joblib_helpers.py | 569 + .../helpers/test/test_lib_tasks.py | 540 + .../helpers/test/test_lib_tasks_docker.py | 494 + .../test/test_lib_tasks_docker_release.py | 1530 ++ .../helpers/test/test_lib_tasks_find.py | 267 + .../helpers/test/test_lib_tasks_gh.py | 133 + .../helpers/test/test_lib_tasks_git.py | 249 + .../helpers/test/test_lib_tasks_integrate.py | 27 + .../helpers/test/test_lib_tasks_lint.py | 32 + .../helpers/test/test_lib_tasks_pytest.py | 1163 ++ .../helpers/test/test_lib_tasks_utils.py | 301 + .../test_master_buildmeister_dashboard.py | 74 + .../helpers/test/test_repo_config_amp.py | 284 + .../helpers/test/test_repo_config_utils.py | 65 + .../logs.log | 0 .../playbook.yaml | 206 +- .../requirements.txt | 24 +- .../results/price_by_neighborhood.png | Bin 0 -> 27259 bytes .../template.API.ipynb | 296 +- .../template.API.py | 183 +- .../template.example.ipynb | 344 +- .../template.example.py | 204 +- .../template_utils.py | 473 +- .../version.sh | 28 + 467 files changed, 128755 insertions(+), 449 deletions(-) create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Untitled.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/app.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/bashrc create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/etc_sudoers create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_aws.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py create mode 100755 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/run_notebook.0.log create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.pkl create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/run_notebook.1.log create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_0/config.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_0/run_notebook.0.log create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_1/config.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_1/run_notebook.1.log create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_load_df_from_json.test1/input/test.json create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/logs.log create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_by_neighborhood.png create mode 100755 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/version.sh diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Untitled.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Untitled.ipynb new file mode 100644 index 000000000..363fcab7e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Untitled.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/app.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/app.py new file mode 100644 index 000000000..864b08367 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/app.py @@ -0,0 +1,108 @@ +""" +app.py +────── +House Price Prediction – Flask REST API server. + +Run: + python app.py + +Endpoints: + GET /health Liveness probe. + GET /features Feature catalogue and defaults. + POST /predict Predict price for a single house. + POST /predict/batch Predict prices for multiple houses. +""" + +import logging +import os +import sys + +# Add /project to the path so template_utils can be imported directly. +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import template_utils as tu +from flask import Flask, jsonify, request + +# ── App setup ───────────────────────────────────────────────── +app = Flask(__name__) +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) +_LOG = logging.getLogger(__name__) + +# Eager-load the model at startup so the first request is fast. +try: + _model = tu.load_model_artifact() + _LOG.info("Model loaded successfully.") +except FileNotFoundError as exc: + _LOG.warning("Startup model load failed: %s", exc) + _model = None + + +# ── Routes ──────────────────────────────────────────────────── +@app.get("/health") +def health(): + """Return API liveness and model status.""" + status = "ok" if _model is not None else "model_unavailable" + return jsonify({"status": status}), 200 if _model else 503 + + +@app.get("/features") +def features(): + """Return the feature catalogue and default values.""" + return jsonify({ + "numeric_features": tu.NUMERIC_FEATURES, + "categorical_features": tu.CATEGORICAL_FEATURES, + "defaults": tu.FEATURE_DEFAULTS, + }) + + +@app.post("/predict") +def predict(): + """ + Predict the sale price for a single house. + + All request fields are optional; missing values use FEATURE_DEFAULTS. + """ + if _model is None: + return jsonify({"error": "Model not loaded"}), 503 + try: + payload = request.get_json(force=True) or {} + errors = tu.validate_features(payload) + if errors: + return jsonify({"error": "Validation failed", "details": errors}), 400 + price = tu.predict_price(payload, model=_model) + _LOG.info("predict price=%.2f payload=%s", price, payload) + return jsonify({ + "predicted_price": price, + "model_version": "1.0", + }) + except Exception as exc: + _LOG.exception("Prediction error.") + return jsonify({"error": str(exc)}), 500 + + +@app.post("/predict/batch") +def predict_batch(): + """Predict prices for multiple houses in one call.""" + if _model is None: + return jsonify({"error": "Model not loaded"}), 503 + try: + body = request.get_json(force=True) or {} + instances = body.get("instances", []) + if not instances: + return jsonify({"error": "No instances provided"}), 400 + prices = [tu.predict_price(inst, model=_model) for inst in instances] + _LOG.info("batch_predict count=%d", len(prices)) + return jsonify({"predictions": prices, "count": len(prices)}) + except Exception as exc: + _LOG.exception("Batch prediction error.") + return jsonify({"error": str(exc)}), 500 + + +# ── Entry point ─────────────────────────────────────────────── +if __name__ == "__main__": + port = int(os.getenv("PORT", 5000)) + debug = os.getenv("FLASK_DEBUG", "false").lower() == "true" + app.run(host="0.0.0.0", port=port, debug=debug) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/bashrc b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/bashrc new file mode 100644 index 000000000..845fe970d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/bashrc @@ -0,0 +1,26 @@ +# ~/.bashrc – container shell configuration. + +# Prompt. +export PS1='\[\033[01;32m\]\u@container\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ ' + +# Convenience aliases. +alias ll='ls -alF' +alias la='ls -A' +alias l='ls -CF' + +# Set working directory to the project root on login. +cd /project 2>/dev/null || true + +# Show available commands on startup. +echo "" +echo " ┌─────────────────────────────────────────────────────┐" +echo " │ House Price Prediction – Docker Container │" +echo " │ │" +echo " │ Run notebooks: jupyter lab --ip=0.0.0.0 │" +echo " │ --no-browser --allow-root │" +echo " │ │" +echo " │ Run API: python template.API.py │" +echo " │ Run example: python template.example.py │" +echo " │ Run Ansible: ansible-playbook playbook.yaml │" +echo " └─────────────────────────────────────────────────────┘" +echo "" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/etc_sudoers b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/etc_sudoers new file mode 100644 index 000000000..ee0816a15 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/etc_sudoers @@ -0,0 +1,31 @@ +# +# This file MUST be edited with the 'visudo' command as root. +# +# Please consider adding local content in /etc/sudoers.d/ instead of +# directly modifying this file. +# +# See the man page for details on how to write a sudoers file. +# +Defaults env_reset +Defaults mail_badpass +Defaults secure_path="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin" + +# Host alias specification + +# User alias specification + +# Cmnd alias specification + +# User privilege specification +root ALL=(ALL:ALL) ALL + +# Members of the admin group may gain root privileges +%admin ALL=(ALL) ALL + +# Allow members of group sudo to execute any command +%sudo ALL=(ALL:ALL) ALL + +# See sudoers(5) for more information on "#include" directives: +postgres ALL=(ALL) NOPASSWD:ALL + +#includedir /etc/sudoers.d diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md new file mode 100644 index 000000000..8578eccd3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md @@ -0,0 +1,285 @@ +# Summary + +The `helpers/` directory is the core Python library providing utilities, +development tools, and infrastructure components for the helpers ecosystem. +Modules follow the `h` naming convention and are organized by domain. + +# Directory Structure + +- `logging_testing/` + - Utilities for testing logging behavior across modules +- `notebooks/` + - Jupyter notebooks and tutorials (e.g., hcache_simple usage) +- `old/` + - Deprecated and archived modules (conda, tunnels, user_credentials) +- `pandoc_docker_files/` + - Docker setup files and package lists for pandoc and texlive +- `telegram_notify/` + - Telegram bot notification module with config and chat ID utilities +- `test/` + - Unit tests for all modules (90+ test files organized by module name) + +# Files + +## Core Infrastructure + +- `hdbg.py` + - Debugging utilities with specialized assertions, logging, and fatal error handling +- `hio.py` + - Filesystem operations, file read/write, and directory management utilities +- `hsystem.py` + - System interaction: shell commands, environment variables, process management +- `henv.py` + - Environment variable checks and module installation management +- `hserver.py` + - Identify which server the code is running on +- `hversion.py` + - Code version control and Docker container compatibility checking +- `hlogging.py` + - Logging configuration, custom formatters, and logging utilities +- `hwarnings.py` + - Suppress annoying Python warnings when imported +- `htraceback.py` + - Traceback parsing, formatting, and manipulation utilities +- `hprint.py` + - Debugging and pretty-printing utilities for Python objects +- `hparser.py` + - Argparse helpers: verbosity, action, limit-range, and other standard arguments +- `hobject.py` + - Introspect and print the state of a Python object +- `hintrospection.py` + - Python introspection and module analysis utilities +- `hmodule.py` + - Dynamic module installation and import management utilities +- `htimer.py` + - Timer class for measuring and reporting elapsed time +- `htqdm.py` + - tqdm progress bar stream redirected to Python logger +- `hthreading.py` + - Timeout decorator to enforce execution time limits on functions +- `hretry.py` + - Retry decorators for synchronous and asynchronous functions +- `hasyncio.py` + - Async/await utilities and coroutine management for asyncio +- `hnetwork.py` + - Network utilities including URL availability checking +- `hopen.py` + - Cross-platform file opening utility +- `htypes.py` + - General type aliases and type utilities based on standard Python libraries +- `hwall_clock_time.py` + - Wall clock time simulation and management for testing and replays + +## Data Processing + +- `hpandas.py` + - Pandas utilities aggregating all hpandas_* submodules +- `hpandas_analysis.py` + - Statistical analysis and ML-related functions for pandas DataFrames +- `hpandas_check_summary.py` + - DataFrame check and summary reporting utilities +- `hpandas_clean.py` + - DataFrame cleaning operations (deduplicate, fill NaN, sanitize) +- `hpandas_compare.py` + - DataFrame comparison utilities for diffing and equality checks +- `hpandas_conversion.py` + - DataFrame and Series conversion and casting utilities +- `hpandas_dassert.py` + - Pandas-specific assertions and validation functions +- `hpandas_display.py` + - DataFrame display formatting and signature generation +- `hpandas_io.py` + - Pandas I/O operations for local and S3 storage +- `hpandas_multiindex.py` + - MultiIndex creation, manipulation, and access operations +- `hpandas_stats.py` + - Pandas statistics, duration computation, and time-series helpers +- `hpandas_transform.py` + - DataFrame transformation operations (pivot, reshape, normalize) +- `hpandas_utils.py` + - General-purpose pandas utilities and helper functions +- `hdataframe.py` + - Lower-level helper functions for processing pandas DataFrames +- `hnumpy.py` + - NumPy utilities, array helpers, and random seed management +- `hnumba.py` + - Numba JIT compilation wrapper and acceleration utilities +- `hparquet.py` + - Parquet file read/write operations using pyarrow +- `hcsv.py` + - CSV file operations and DataFrame I/O utilities +- `hdatetime.py` + - Date/time manipulation, parsing, and timezone handling utilities +- `hdict.py` + - Dictionary manipulation and nested dictionary operation utilities +- `hlist.py` + - List manipulation, deduplication, and membership utilities +- `hstring.py` + - String manipulation, formatting, and transformation utilities +- `htable.py` + - Lightweight rectangular table class with no pandas dependency + +## Caching and Performance + +- `hcache.py` + - Advanced function caching using joblib with S3 and git integration +- `hcache_simple.py` + - Simple caching with JSON or pickle file-based storage backends +- `hjoblib.py` + - Joblib parallelization, memory caching, and job management +- `hpickle.py` + - Pickle and JSON serialization and deserialization routines + +## Testing Framework + +- `hunit_test.py` + - Enhanced unit testing framework built on unittest and pytest with golden files +- `hunit_test_purification.py` + - Text purification utilities to sanitize test output for comparison +- `hunit_test_utils.py` + - Unit test utilities including test renaming and helpers +- `hpytest.py` + - Pytest integration utilities and test artifact handling +- `hcoverage.py` + - Code coverage utilities and test coverage analysis helpers +- `hplayback.py` + - Automatically generate unit tests by recording and replaying function calls +- `htest_logger.py` + - Test logging script template +- `hmoto.py` + - AWS service mocking with moto for unit testing + +## Markdown Processing + +- `hmarkdown.py` + - Markdown processing entry point aggregating all hmarkdown_* submodules +- `hmarkdown_bullets.py` + - Markdown bullet point processing and formatting +- `hmarkdown_coloring.py` + - Markdown text coloring utilities for LaTeX and HTML output +- `hmarkdown_comments.py` + - Markdown comment detection, extraction, and removal utilities +- `hmarkdown_div_blocks.py` + - Utilities for handling HTML div blocks within markdown files +- `hmarkdown_fenced_blocks.py` + - Fenced code block parsing and manipulation in markdown +- `hmarkdown_filtering.py` + - Markdown section extraction and content filtering utilities +- `hmarkdown_formatting.py` + - Markdown text formatting and whitespace normalization utilities +- `hmarkdown_headers.py` + - Markdown header manipulation, extraction, and level adjustment +- `hmarkdown_rules.py` + - Markdown rule validation and processing utilities +- `hmarkdown_slides.py` + - Markdown slide extraction, splitting, and processing for presentations +- `hmarkdown_tables.py` + - Markdown table parsing, formatting, and manipulation utilities +- `hmarkdown_toc.py` + - Markdown table of contents generation and YAML frontmatter handling +- `hlint.py` + - Linting utilities for text and code files +- `htext_protect.py` + - Utilities for protecting content regions during text processing + +## External Services and Cloud + +- `haws.py` + - AWS services integration with boto3 client and resource management +- `hs3.py` + - S3 file operations, listing, and S3-backed filesystem utilities +- `hsecrets.py` + - AWS Secrets Manager integration for secret retrieval +- `htranslate.py` + - AWS Translate service wrapper for text translation +- `hgit.py` + - Git repository operations, branch management, and diff utilities +- `hdocker.py` + - Docker container operations, image management, and Docker utilities +- `hdocker_tests.py` + - Utilities for running tests inside Docker containers +- `hdockerized_executables.py` + - Wrappers for Dockerized executables: prettier, pandoc, latex, and others +- `hgoogle_drive_api.py` + - Google Drive and Google Sheets API integration utilities +- `hchatgpt.py` + - OpenAI API integration with file management and chat utilities +- `hchatgpt_instructions.py` + - ChatGPT system instructions and prompt templates +- `hllm.py` + - LLM API integration with caching, cost tracking, and response handling +- `hllm_cli.py` + - LLM CLI interaction wrapper and cost estimation utilities +- `hllm_cost.py` + - LLM cost calculation for OpenRouter and other APIs +- `hslack.py` + - Slack notification utilities for sending messages to channels +- `hemail.py` + - Email sending utilities via SMTP +- `hsftp.py` + - SFTP file transfer operations using pysftp +- `hsql.py` + - SQL database operations as a PostgreSQL wrapper +- `hsql_implementation.py` + - Low-level SQL implementation with psycopg2 driver +- `hsql_test.py` + - SQL testing utilities, fixtures, and database test helpers +- `asana_utils.py` + - Enhanced Asana analytics with time estimation and team grouping +- `github_utils.py` + - GitHub API utilities for caching and repository data retrieval + +## Notebooks and Visualization + +- `hnotebook.py` + - Jupyter notebook configuration and display setup utilities +- `hjupyter.py` + - Jupyter notebook execution and output capture utilities +- `hmatplotlib.py` + - Matplotlib utilities, figure management, and plotting helpers +- `hmkdocs.py` + - MkDocs-specific markdown generation and documentation utilities +- `hlatex.py` + - LaTeX conversion utilities using pandoc + +## Miscellaneous + +- `hfile_tree.py` + - Directory tree building and formatted output utilities +- `hcfile.py` + - C file parsing and transformation utilities +- `repo_config_utils.py` + - Repository configuration utilities loaded from YAML +- `stage_linked_file.py` + - Symbolic link staging utility for git operations + +## Task System (`lib_tasks_*.py`) + +- `lib_tasks.py` + - Entry point that aggregates all invoke task modules +- `lib_tasks_aws.py` + - Invoke tasks for AWS operations and deployments +- `lib_tasks_bash.py` + - Invoke tasks for bash script execution +- `lib_tasks_docker.py` + - Invoke tasks for Docker build, run, and management operations +- `lib_tasks_docker_release.py` + - Invoke tasks for Docker image release and publishing workflows +- `lib_tasks_find.py` + - Invoke tasks for searching and finding files in the repo +- `lib_tasks_gh.py` + - Invoke tasks for GitHub pull requests and issues +- `lib_tasks_git.py` + - Invoke tasks for git branch, merge, and commit operations +- `lib_tasks_integrate.py` + - Invoke tasks for integrating changes between repositories +- `lib_tasks_lint.py` + - Invoke tasks for linting and code quality checks +- `lib_tasks_perms.py` + - Invoke tasks for managing file permissions +- `lib_tasks_print.py` + - Invoke tasks for printing setup and environment info +- `lib_tasks_pytest.py` + - Invoke tasks for running pytest suites (fast, slow, superslow) +- `lib_tasks_utils.py` + - Shared utilities and helpers used across task modules diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py new file mode 100644 index 000000000..0aa7f7f4b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py @@ -0,0 +1,1156 @@ +""" +Enhanced Asana Analytics with Time Estimation and Team Grouping. + +Import as: + +import helpers.asana_utils as hasautil +""" + +import datetime as datetime_lib +import json +import logging +import os +from typing import Any, Dict, List, Optional + +import asana +import asana.rest as arest +import dateutil.parser as dateutil_parser +import pandas as pd + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# EnhancedAsanaAnalytics +# ############################################################################# + + +class EnhancedAsanaAnalytics: + def __init__(self, access_token: Optional[str] = None) -> None: + # Get token from parameter or environment variable. + token = access_token or os.getenv("ASANA_ACCESS_TOKEN") + if not token: + raise ValueError( + "Asana access token must be provided or set in ASANA_ACCESS_TOKEN" + ) + # Initialize Asana API client with access token. + configuration = asana.Configuration() + configuration.access_token = token + self.api_client = asana.ApiClient(configuration) + # Initialize API endpoints. + self.workspaces_api = asana.WorkspacesApi(self.api_client) + self.users_api = asana.UsersApi(self.api_client) + self.tasks_api = asana.TasksApi(self.api_client) + self.stories_api = asana.StoriesApi(self.api_client) + self.projects_api = asana.ProjectsApi(self.api_client) + self.custom_fields_api = asana.CustomFieldsApi(self.api_client) + + def get_workspace_gid(self, workspace_name: Optional[str] = None) -> str: + """ + Get the workspace GID by name or return the first available workspace. + + Retrieve the GID (Global ID) for an Asana workspace. If no + workspace name is provided, return the GID of the first + workspace available to the user. + + :param workspace_name: name of the workspace to find. + :return: workspace GID as a string + """ + _LOG.info( + "Fetching workspace GID for workspace: %s", + workspace_name or "first available", + ) + # Fetch all available workspaces. + opts: Dict[str, Any] = {} + workspaces = self.workspaces_api.get_workspaces(opts) + # Convert to list if needed. + workspace_list = list(workspaces) if workspaces else [] + _LOG.info("Found %s workspaces", len(workspace_list)) + # Check if any workspaces exist. + if not workspace_list: + raise ValueError("No workspaces found") + result = None + # Search for specific workspace by name if provided. + if workspace_name: + for ws in workspace_list: + if ws["name"].lower() == workspace_name.lower(): + _LOG.info( + "Found workspace '%s' with GID: %s", + workspace_name, + ws["gid"], + ) + result = str(ws["gid"]) + break + if result is None: + raise ValueError(f"Workspace '{workspace_name}' not found") + else: + # Return first workspace if no name specified. + _LOG.info( + "Using first workspace: %s (GID: %s)", + workspace_list[0]["name"], + workspace_list[0]["gid"], + ) + result = str(workspace_list[0]["gid"]) + return result + + def get_team_members(self, workspace_gid: str) -> List[Dict[str, Any]]: + """ + Get all team members in a workspace. + + :param workspace_gid: workspace GID to query for users + :return: user information with keys 'gid','name', and 'email' + """ + _LOG.info("Fetching team members for workspace: %s", workspace_gid) + # Fetch all users in the workspace. + opts: Dict[str, Any] = {} + users = self.users_api.get_users_for_workspace(workspace_gid, opts) + # Convert to list if needed. + users_list = list(users) if users else [] + _LOG.info("Found %s team members", len(users_list)) + # Extract relevant user information. + result = [ + {"gid": u["gid"], "name": u["name"], "email": u.get("email", "N/A")} + for u in users_list + ] + # Log member names. + member_names = [r["name"] for r in result] + _LOG.debug("Team members: %s", ", ".join(member_names)) + return result + + def get_user_by_name( + self, workspace_gid: str, username: str + ) -> Optional[Dict[str, Any]]: + """ + Get a specific user by their name in a workspace. + + Search for a user by their display name (case-insensitive + partial match). + + :param workspace_gid: workspace GID to search in + :param username: username or partial name to search for + :return: user with 'gid', 'name', and 'email' + """ + _LOG.info("Searching for user: %s", username) + team_members = self.get_team_members(workspace_gid) + res = None + # Search for exact match first. + for team_member in team_members: + if team_member["name"].lower() == username.lower(): + _LOG.info("Found exact match: %s", team_member["name"]) + res = team_member + # Search for partial match. + for team_member in team_members: + if username.lower() in team_member["name"].lower(): + _LOG.info("Found partial match: %s", team_member["name"]) + res = team_member + if res is None: + _LOG.warning("User '%s' not found in workspace", username) + return res + + def get_user_tasks_detailed( + self, + workspace_gid: str, + user_identifier: str, + *, + start_date: Optional[datetime_lib.datetime] = None, + end_date: Optional[datetime_lib.datetime] = None, + ) -> List[Dict[str, Any]]: + """ + Get detailed task information including estimated time. + + Fetch all tasks for a user with extended fields including custom + fields for time estimates, projects, tags, sections, and dates. + + :param workspace_gid: workspace GID to query + :param user_identifier: user GID or username to retrieve tasks + for + :param start_date: start date for filtering tasks by creation + date. + :param end_date: end date for filtering tasks by creation date. + :return: data with name, completion status, timestamps, custom + fields, and project associations + """ + # Resolve username to GID if needed. + if not user_identifier.isdigit(): + _LOG.info("Resolving username '%s' to GID", user_identifier) + user = self.get_user_by_name(workspace_gid, user_identifier) + if not user: + _LOG.error("User '%s' not found", user_identifier) + return [] + user_gid = user["gid"] + _LOG.debug("Resolved '%s' to GID: %s", user_identifier, user_gid) + else: + user_gid = user_identifier + _LOG.info("Fetching detailed tasks for user GID: %s", user_gid) + try: + # Define query parameters for task retrieval with extended fields. + opts = { + "assignee": user_gid, + "workspace": workspace_gid, + "opt_fields": ( + "name,completed,completed_at,created_at,modified_at," + "projects.name,projects.gid,num_subtasks,memberships.section.name," + "custom_fields,custom_fields.name,custom_fields.display_value," + "custom_fields.number_value,due_on,due_at,start_on," + "assignee.name,tags.name" + ), + } + # Fetch all tasks for the user. + _LOG.debug("Querying Asana API for detailed tasks...") + tasks = self.tasks_api.get_tasks(opts) + # Convert to list if generator. + tasks_list = list(tasks) if tasks else [] + _LOG.info( + "Retrieved %d tasks from API for user GID: %s", + len(tasks_list), + user_gid, + ) + # Make start_date and end_date timezone-aware if they aren't already. + if start_date and start_date.tzinfo is None: + start_date = start_date.replace(tzinfo=datetime_lib.timezone.utc) + if end_date and end_date.tzinfo is None: + end_date = end_date.replace(tzinfo=datetime_lib.timezone.utc) + # Filter tasks by date range if specified. + filtered_tasks = [] + for task in tasks_list: + # Parse creation date. + created_at = ( + dateutil_parser.parse(task["created_at"]) + if task.get("created_at") + else None + ) + # Apply start date filter. + if start_date and created_at and created_at < start_date: + continue + # Apply end date filter. + if end_date and created_at and created_at > end_date: + continue + # Add task to filtered results. + filtered_tasks.append(task) + _LOG.info( + "Filtered to %d tasks within date range for user GID: %s", + len(filtered_tasks), + user_gid, + ) + return filtered_tasks + except arest.ApiException as e: + _LOG.error("API error fetching detailed tasks: %s", e) + raise + except Exception as e: + _LOG.error("Unexpected error fetching detailed tasks: %s", e) + return [] + + def extract_time_estimate(self, task: Dict[str, Any]) -> Optional[float]: + """ + Extract time estimate from custom fields. + + Search through task custom fields for time estimation values. + Looks for common field names like 'estimated time', 'estimate', + 'hours', etc. + + :param task: tasks data containing custom_fields + :return: estimated hours as float, or None if not found + """ + result = None + if not task.get("custom_fields"): + _LOG.debug( + "No custom fields found for task: %s", task.get("gid", "unknown") + ) + return result + # Common field names for time estimates. + time_field_names = [ + "estimated time", + "estimate", + "time estimate", + "hours", + "estimated hours", + "effort", + ] + for field in task["custom_fields"]: + field_name = field.get("name", "").lower() + # Check if field name matches any time estimation pattern. + if any(time_name in field_name for time_name in time_field_names): + # Try number_value first, then display_value. + if field.get("number_value") is not None: + result = float(field["number_value"]) / 60.0 + _LOG.debug( + "Found time estimate %s hours in field '%s' for task: %s", + result, + field.get("name"), + task.get("gid", "unknown"), + ) + break + elif field.get("display_value"): + try: + result = float(field["display_value"]) / 60.0 + _LOG.debug( + "Found time estimate %s hours in field '%s' for task: %s", + result, + field.get("name"), + task.get("gid", "unknown"), + ) + break + except (ValueError, TypeError): + _LOG.warning( + "Could not parse display_value '%s' as float for task: %s", + field.get("display_value"), + task.get("gid", "unknown"), + ) + return result + + def get_task_stories(self, task_gid: str) -> List[Dict[str, Any]]: + """ + Get all stories (comments and activity) for a task. + + Fetch all stories including comments, task updates, and system + activities for a specific task. + + :param task_gid: task GID to fetch stories for + :return: data of type, text, created_at, and creator information + """ + _LOG.info("Fetching stories for task: %s", task_gid) + try: + opts = { + "opt_fields": ( + "type,text,created_at,created_by.name,created_by.email," + "resource_subtype,is_edited" + ) + } + stories = self.stories_api.get_stories_for_task(task_gid, opts) + stories_list = list(stories) if stories else [] + _LOG.debug( + "Found %d stories for task %s", len(stories_list), task_gid + ) + return stories_list + except arest.ApiException as e: + _LOG.error("API error fetching stories for task %s: %s", task_gid, e) + return [] + except Exception as e: + _LOG.error( + "Unexpected error fetching stories for task %s: %s", task_gid, e + ) + return [] + + def extract_comment_metrics(self, task_gid: str) -> Dict[str, Any]: + """ + Extract comment and activity metrics for a task. + + Analyze all stories for a task to extract metrics including: + - Total comment count + - Unique commenters + - Activity count (system updates) + - Last activity timestamp + - Comment frequency + + :param task_gid: task GID to analyze + :return: comment metrics + """ + stories = self.get_task_stories(task_gid) + # Initialize counters. + num_comments = 0 + num_activities = 0 + unique_commenters = set() + last_activity_at = None + for story in stories: + # Parse created timestamp. + created_at = ( + dateutil_parser.parse(story["created_at"]) + if story.get("created_at") + else None + ) + # Track last activity. + if created_at: + if last_activity_at is None or created_at > last_activity_at: + last_activity_at = created_at + # Categorize story type. + story_type = story.get("type", "") + if story_type == "comment": + num_comments += 1 + # Track unique commenters. + if story.get("created_by") and story["created_by"].get("name"): + unique_commenters.add(story["created_by"]["name"]) + else: + # System activities (status changes, assignments, etc). + num_activities += 1 + result = { + "num_comments": num_comments, + "num_activities": num_activities, + "total_stories": len(stories), + "unique_commenters": len(unique_commenters), + "unique_commenter_names": list(unique_commenters), + "last_activity_at": last_activity_at, + } + _LOG.debug( + "Task %s metrics: %d comments, %d activities, %d unique commenters", + task_gid, + num_comments, + num_activities, + len(unique_commenters), + ) + return result + + def calculate_activity_rate( + self, + created_at: datetime_lib.datetime, + last_activity_at: Optional[datetime_lib.datetime], + num_comments: int, + num_activities: int, + ) -> Dict[str, float]: + """ + Calculate activity rate metrics for a task. + + Compute various activity rate metrics based on task timeline and + activity counts. + + :param created_at: task creation timestamp + :param last_activity_at: timestamp of last activity/comment + :param num_comments: total number of comments + :param num_activities: total number of system activities + :return: activity rate metric + """ + now = datetime_lib.datetime.now(datetime_lib.timezone.utc) + + # Calculate task age in days. + task_age_days = (now - created_at).total_seconds() / 86400 + + # Calculate days since last activity. + days_since_activity = None + if last_activity_at: + days_since_activity = ( + now - last_activity_at + ).total_seconds() / 86400 + + # Calculate activity rates (avoid division by zero). + if task_age_days > 0: + comments_per_day = num_comments / task_age_days + activities_per_day = num_activities / task_age_days + total_activity_per_day = ( + num_comments + num_activities + ) / task_age_days + else: + comments_per_day = 0.0 + activities_per_day = 0.0 + total_activity_per_day = 0.0 + + result = { + "task_age_days": task_age_days, + "comments_per_day": comments_per_day, + "activities_per_day": activities_per_day, + "total_activity_per_day": total_activity_per_day, + "days_since_activity": days_since_activity, + } + + return result + + def get_user_tasks_with_activity( + self, + workspace_gid: str, + user_identifier: str, + *, + start_date: Optional[datetime_lib.datetime] = None, + end_date: Optional[datetime_lib.datetime] = None, + include_comments: bool = True, + ) -> List[Dict[str, Any]]: + """ + Get detailed task information including comments and activity metrics. + + Extended version of get_user_tasks_detailed that also fetches + comment and activity data for each task. + + :param workspace_gid: workspace GID to query + :param user_identifier: user GID or username to retrieve tasks + for + :param start_date: start date for filtering tasks by creation + date + :param end_date: end date for filtering tasks by creation date + :param include_comments: if True, fetch comment/activity data + for each task (default: True). Set to False for faster + execution + :return: task data with comment and activity metrics included + """ + # Get detailed tasks first. + tasks = self.get_user_tasks_detailed( + workspace_gid, + user_identifier, + start_date=start_date, + end_date=end_date, + ) + + if not include_comments: + return tasks + + _LOG.info("Fetching comment/activity data for %d tasks", len(tasks)) + + # Enhance each task with comment metrics. + for i, task in enumerate(tasks): + if (i + 1) % 10 == 0: + _LOG.info( + "Processing task %d/%d for comments...", i + 1, len(tasks) + ) + + # Get comment metrics. + comment_metrics = self.extract_comment_metrics(task["gid"]) + + # Add metrics to task. + task["num_comments"] = comment_metrics["num_comments"] + task["num_activities"] = comment_metrics["num_activities"] + task["total_stories"] = comment_metrics["total_stories"] + task["unique_commenters"] = comment_metrics["unique_commenters"] + task["unique_commenter_names"] = comment_metrics[ + "unique_commenter_names" + ] + task["last_activity_at"] = comment_metrics["last_activity_at"] + + # Calculate activity rates if we have created_at. + if task.get("created_at"): + created_at = dateutil_parser.parse(task["created_at"]) + activity_rates = self.calculate_activity_rate( + created_at, + comment_metrics["last_activity_at"], + comment_metrics["num_comments"], + comment_metrics["num_activities"], + ) + task.update(activity_rates) + + _LOG.info("Comment/activity data added to all tasks") + return tasks + + def create_task_dataframe( + self, + workspace_gid: str, + user_identifiers: Optional[List[str]] = None, + *, + project_names: Optional[List[str]] = None, + start_date: Optional[datetime_lib.datetime] = None, + end_date: Optional[datetime_lib.datetime] = None, + team_mapping: Optional[Dict[str, str]] = None, + include_comments: bool = False, + ) -> pd.DataFrame: + """ + Create comprehensive task DataFrame for all users. + + Build a detailed DataFrame containing all task information for + specified users, with optional filtering by project and date + range. Includes time estimates, sprint information, and team + assignments. + + :param workspace_gid: workspace GID to query + :param user_identifiers: usernames or GIDs to analyze. + :param project_names: project names to filter by and use + as team names (e.g., ["tech-now", "tech-next"]). If + provided, team will be determined from project name + :param start_date: start date for filtering tasks by creation + date + :param end_date: end date for filtering tasks by creation date + :param team_mapping: username to team name. Only + used if project_names is not provided + - Example: {"John Doe": "tech-now", "Jane Smith": "tech-next"} + :param include_comments: if True, fetch comment/activity data + (default: False). Set to True to include activity metrics + :return: data with columns including user info, task + details, dates, completion status, time estimates, project, + sprint, section, tags, and subtasks + """ + _LOG.info("Creating comprehensive task DataFrame") + # Get users to analyze. + team_members = [] + if user_identifiers: + for user_id in user_identifiers: + if user_id.isdigit(): + # If GID, fetch user info. + opts = {"opt_fields": "name,email"} + user_info = self.users_api.get_user(user_id, opts) + team_members.append( + { + "gid": user_id, + "name": user_info["name"], + "email": user_info.get("email", "N/A"), + } + ) + else: + # If username, resolve to user. + user = self.get_user_by_name(workspace_gid, user_id) + if user: + team_members.append(user) + else: + # Get all team members if no specific users provided. + team_members = self.get_team_members(workspace_gid) + all_task_data = [] + # Process tasks for each team member. + for member in team_members: + _LOG.info("Processing tasks for: %s", member["name"]) + # Fetch detailed tasks for this user. + if include_comments: + tasks = self.get_user_tasks_with_activity( + workspace_gid, + member["gid"], + start_date=start_date, + end_date=end_date, + include_comments=True, + ) + else: + tasks = self.get_user_tasks_detailed( + workspace_gid, + member["gid"], + start_date=start_date, + end_date=end_date, + ) + # Process each task. + for task in tasks: + # Parse dates. + created_at = ( + dateutil_parser.parse(task["created_at"]) + if task.get("created_at") + else None + ) + completed_at = ( + dateutil_parser.parse(task["completed_at"]) + if task.get("completed_at") + else None + ) + due_at = ( + dateutil_parser.parse(task["due_at"]) + if task.get("due_at") + else None + ) + # Check if task is overdue. + is_overdue = False + if not task.get("completed") and due_at: + is_overdue = due_at < datetime_lib.datetime.now( + datetime_lib.timezone.utc + ) + # Extract time estimate from custom fields. + estimated_hours = self.extract_time_estimate(task) + # Calculate actual hours if task is completed. + actual_hours = None + if completed_at and created_at: + actual_hours = ( + completed_at - created_at + ).total_seconds() / 3600 + # Extract projects, tags, and sections. + projects = [p["name"] for p in task.get("projects", [])] + project_gids = [p["gid"] for p in task.get("projects", [])] + tags = [t["name"] for t in task.get("tags", [])] + # Extract sections (sprints in Asana). + sections = [] + sprints = [] + if task.get("memberships"): + for membership in task["memberships"]: + if membership.get("section"): + section_name = membership["section"]["name"] + sections.append(section_name) + # Identify sprint sections using common patterns. + if any( + keyword in section_name.lower() + for keyword in [ + "sprint", + "iteration", + "cycle", + "week", + ] + ): + sprints.append(section_name) + # Build task data dictionary. + task_data = { + # User info. + "user_name": member["name"], + "user_email": member["email"], + "user_gid": member["gid"], + # Task info. + "task_name": task.get("name", "Untitled"), + "task_gid": task["gid"], + # Dates. + "created_at": created_at, + "completed_at": completed_at, + "due_on": task.get("due_on"), + "due_at": due_at, + "start_on": task.get("start_on"), + # Status. + "is_completed": task.get("completed", False), + "is_overdue": is_overdue, + # Time tracking. + "estimated_hours": estimated_hours, + "actual_hours": actual_hours, + # Organization. + "project": projects[0] if projects else None, + "all_projects": ", ".join(projects) if projects else None, + "project_gid": project_gids[0] if project_gids else None, + "tags": ", ".join(tags) if tags else None, + "section": sections[0] if sections else None, + "sprint": sprints[0] if sprints else None, + "all_sprints": ", ".join(sprints) if sprints else None, + "num_subtasks": task.get("num_subtasks", 0), + } + # Add comment/activity metrics if included. + if include_comments: + task_data.update( + { + "num_comments": task.get("num_comments", 0), + "num_activities": task.get("num_activities", 0), + "total_stories": task.get("total_stories", 0), + "unique_commenters": task.get( + "unique_commenters", 0 + ), + "last_activity_at": task.get("last_activity_at"), + "task_age_days": task.get("task_age_days", 0), + "comments_per_day": task.get( + "comments_per_day", 0.0 + ), + "activities_per_day": task.get( + "activities_per_day", 0.0 + ), + "total_activity_per_day": task.get( + "total_activity_per_day", 0.0 + ), + "days_since_activity": task.get( + "days_since_activity" + ), + } + ) + # Add team - either from project name or mapping. + if project_names: + # Determine team from project name. + task_data["team"] = task_data["project"] + elif team_mapping: + task_data["team"] = team_mapping.get( + member["name"], "Unassigned" + ) + else: + # No team mapping, use project as team (default). + task_data["team"] = task_data["project"] + all_task_data.append(task_data) + # Create DataFrame. + df = pd.DataFrame(all_task_data) + # Filter by project if specified. + if project_names and len(df) > 0: + df = df[df["project"].isin(project_names)] + _LOG.info( + "Filtered to %d tasks from projects: %s", len(df), project_names + ) + _LOG.info("Created DataFrame with %d tasks", len(df)) + result = df + return result + + def create_team_comparison_df( + self, task_df: pd.DataFrame, metrics: Optional[List[str]] = None + ) -> pd.DataFrame: + """ + Create team-level comparison DataFrame from task DataFrame. + + Aggregate task-level data to team-level metrics for comparison + across teams. Requires task DataFrame to have 'team' column. + + :param task_df: data with 'team' column + :param metrics: metrics to calculate. If None, calculate all + :return: data with team-level aggregated metrics + """ + if "team" not in task_df.columns: + _LOG.error( + "task_df missing 'team' column. Available columns: %s", + task_df.columns.tolist(), + ) + raise ValueError( + "task_df must have 'team' column. Pass team_mapping or " + "project_names to create_task_dataframe()" + ) + + _LOG.info("Creating team comparison DataFrame") + _LOG.info("Found %d unique teams in data", task_df["team"].nunique()) + + # Set default metrics if not provided. + if metrics is None: + metrics = [ + "total_tasks", + "completed_tasks", + "in_progress_tasks", + "completion_rate", + "total_estimated_hours", + "avg_estimated_hours", + "total_actual_hours", + "overdue_tasks", + "overdue_rate", + "unique_users", + ] + team_stats = [] + # Calculate metrics for each team. + for team_name in task_df["team"].unique(): + if team_name is None or ( + isinstance(team_name, float) and pd.isna(team_name) + ): + _LOG.warning("Skipping None/NaN team name") + continue + + team_data = task_df[task_df["team"] == team_name] + _LOG.debug( + "Processing team: %s (%d tasks)", team_name, len(team_data) + ) + + stats = {"team": team_name} + # Calculate each requested metric. + if "total_tasks" in metrics: + stats["total_tasks"] = len(team_data) + if "completed_tasks" in metrics: + stats["completed_tasks"] = team_data["is_completed"].sum() + if "in_progress_tasks" in metrics: + stats["in_progress_tasks"] = (~team_data["is_completed"]).sum() + if "completion_rate" in metrics: + if len(team_data) > 0: + stats["completion_rate"] = ( + stats["completed_tasks"] / len(team_data) + ) * 100 + else: + stats["completion_rate"] = 0.0 + if "total_estimated_hours" in metrics: + stats["total_estimated_hours"] = team_data[ + "estimated_hours" + ].sum() + if "avg_estimated_hours" in metrics: + stats["avg_estimated_hours"] = team_data[ + "estimated_hours" + ].mean() + if "total_actual_hours" in metrics: + stats["total_actual_hours"] = team_data["actual_hours"].sum() + if "overdue_tasks" in metrics: + stats["overdue_tasks"] = team_data["is_overdue"].sum() + if "overdue_rate" in metrics: + active_tasks = (~team_data["is_completed"]).sum() + if active_tasks > 0: + stats["overdue_rate"] = ( + stats["overdue_tasks"] / active_tasks + ) * 100 + else: + stats["overdue_rate"] = 0.0 + if "unique_users" in metrics: + stats["unique_users"] = team_data["user_name"].nunique() + team_stats.append(stats) + + _LOG.info("Team comparison completed for %d teams", len(team_stats)) + result = pd.DataFrame(team_stats) + return result + + def create_user_comparison_df( + self, task_df: pd.DataFrame, metrics: Optional[List[str]] = None + ) -> pd.DataFrame: + """ + Create user-level comparison DataFrame with aggregated metrics. + + Aggregate task-level data to user-level metrics for individual + performance comparison. + + :param task_df: tasks data + :param metrics: metrics to calculate. If None, calculate all + :return: data with user-level aggregated metrics + """ + # Set default metrics if not provided. + if metrics is None: + metrics = [ + "total_tasks", + "completed_tasks", + "completion_rate", + "total_estimated_hours", + "avg_estimated_hours", + "overdue_tasks", + "unique_projects", + ] + user_stats = [] + # Calculate metrics for each user. + for user_name in task_df["user_name"].unique(): + user_data = task_df[task_df["user_name"] == user_name] + stats = { + "user_name": user_name, + "user_email": user_data["user_email"].iloc[0], + } + # Add team if available. + if "team" in task_df.columns: + stats["team"] = user_data["team"].iloc[0] + # Calculate each requested metric. + if "total_tasks" in metrics: + stats["total_tasks"] = len(user_data) + if "completed_tasks" in metrics: + stats["completed_tasks"] = user_data["is_completed"].sum() + if "completion_rate" in metrics: + if len(user_data) > 0: + stats["completion_rate"] = ( + stats["completed_tasks"] / len(user_data) + ) * 100 + else: + stats["completion_rate"] = 0.0 + if "total_estimated_hours" in metrics: + stats["total_estimated_hours"] = user_data[ + "estimated_hours" + ].sum() + if "avg_estimated_hours" in metrics: + stats["avg_estimated_hours"] = user_data[ + "estimated_hours" + ].mean() + if "overdue_tasks" in metrics: + stats["overdue_tasks"] = user_data["is_overdue"].sum() + if "unique_projects" in metrics: + projects = user_data["all_projects"].dropna() + unique_projects = set() + for proj_str in projects: + unique_projects.update(proj_str.split(", ")) + stats["unique_projects"] = len(unique_projects) + user_stats.append(stats) + result = pd.DataFrame(user_stats) + return result + + +# ############################################################################# +# Convenience functions +# ############################################################################# + + +def list_workspace_users( + workspace_name: str, *, access_token: Optional[str] = None +) -> List[str]: + """ + Get all usernames in a workspace. + + Convenience function to quickly see all available users in a + workspace. + + :param workspace_name: name of workspace to query + :param access_token: Asana access token + :return: usernames (display names) + """ + # Initialize analytics instance. + analytics_instance = EnhancedAsanaAnalytics(access_token) + # Get workspace GID. + workspace_gid_local = analytics_instance.get_workspace_gid(workspace_name) + # Get team members. + team_members = analytics_instance.get_team_members(workspace_gid_local) + # Extract usernames. + result = [member["name"] for member in team_members] + return result + + +def get_user_by_name( + workspace_name: str, + username: str, + *, + access_token: Optional[str] = None, +) -> Optional[Dict[str, Any]]: + """ + Get a specific user by their name in a workspace. + + Convenience function to find a user without instantiating the class. + + :param workspace_name: name of workspace to search in + :param username: username or partial name to search for + :param access_token: Asana access token + :return: user with 'gid', 'name', and 'email', or None if not found + """ + # Initialize analytics instance. + analytics_instance = EnhancedAsanaAnalytics(access_token) + # Get workspace GID. + workspace_gid_local = analytics_instance.get_workspace_gid(workspace_name) + # Find user. + result = analytics_instance.get_user_by_name(workspace_gid_local, username) + return result + + +def create_kibana_ready_dataset( + workspace_name: str, + start_date: datetime_lib.datetime, + end_date: datetime_lib.datetime, + *, + project_names: Optional[List[str]] = None, + team_mapping: Optional[Dict[str, str]] = None, + access_token: Optional[str] = None, + user_list: Optional[List[str]] = None, + include_comments: bool = False, +) -> Dict[str, pd.DataFrame]: + """ + Create Kibana-ready datasets with all metrics. + + Generate three DataFrames suitable for Kibana visualization: detailed + task-level data, user-level aggregates, and team-level aggregates. + By default, extracts ALL tasks from ALL users and ALL projects. + The 'project' column can be used for filtering in Kibana. + + :param workspace_name: Asana workspace name to analyze + :param start_date: start date for analysis period + :param end_date: end date for analysis period + :param project_names: project names to filter by + (e.g., ["tech-now", "tech-next"]). If None, extract ALL projects. + When provided, also uses project names as team names + :param team_mapping: usernames to team names. + Alternative to project_names. If both are None, uses project as + team + - Example: {"John Doe": "tech-now", "Jane Smith": "tech-next"} + :param access_token: Asana access token. If None, reads from + environment variable ASANA_ACCESS_TOKEN + :param user_list: specific usernames or GIDs to analyze. If + None, analyze ALL team members + :param include_comments: if True, fetch comment/activity data + (default: False). Set to True to include activity metrics + :return: data with three DataFrames: + - 'tasks': detailed task-level data with sprint/section info + - 'users': user-level aggregated metrics + - 'teams': team-level aggregated metrics + """ + _LOG.info("=" * 70) + _LOG.info("STARTING KIBANA DATASET CREATION") + _LOG.info("=" * 70) + _LOG.info("Workspace: %s", workspace_name) + _LOG.info("Date range: %s to %s", start_date.date(), end_date.date()) + _LOG.info("Project filter: %s", project_names if project_names else "ALL") + _LOG.info("User filter: %s", user_list if user_list else "ALL") + _LOG.info("Include comments: %s", include_comments) + + # Initialize analytics instance. + _LOG.info("Initializing Asana Analytics client...") + analytics = EnhancedAsanaAnalytics(access_token) + + # Get workspace GID. + _LOG.info("Resolving workspace GID for: %s", workspace_name) + workspace_gid = analytics.get_workspace_gid(workspace_name) + _LOG.info("Workspace GID resolved: %s", workspace_gid) + + # Create detailed task DataFrame. + _LOG.info("-" * 70) + _LOG.info("STEP 1/3: Creating detailed task DataFrame...") + _LOG.info("-" * 70) + task_df = analytics.create_task_dataframe( + workspace_gid, + user_identifiers=user_list, + project_names=project_names, + start_date=start_date, + end_date=end_date, + team_mapping=team_mapping, + include_comments=include_comments, + ) + _LOG.info("Task DataFrame created with %d rows", len(task_df)) + + # Create user-level comparison DataFrame. + _LOG.info("-" * 70) + _LOG.info("STEP 2/3: Creating user-level aggregates...") + _LOG.info("-" * 70) + user_df = analytics.create_user_comparison_df(task_df) + _LOG.info("User DataFrame created with %d rows", len(user_df)) + + # Create team-level comparison DataFrame. + _LOG.info("-" * 70) + _LOG.info("STEP 3/3: Creating team-level aggregates...") + _LOG.info("-" * 70) + team_df = analytics.create_team_comparison_df(task_df) + _LOG.info("Team DataFrame created with %d rows", len(team_df)) + + _LOG.info("=" * 70) + _LOG.info("DATASET CREATION COMPLETE!") + _LOG.info("=" * 70) + _LOG.info("Summary:") + _LOG.info(" Tasks: %d rows", len(task_df)) + _LOG.info(" Users: %d rows", len(user_df)) + _LOG.info(" Teams: %d rows", len(team_df)) + _LOG.info("=" * 70) + + result = {"tasks": task_df, "users": user_df, "teams": team_df} + return result + + +def save_to_ndjson( + df: pd.DataFrame, filepath: str, index_name: Optional[str] = None +) -> None: + """ + Save DataFrame to NDJSON format for Kibana/OpenSearch bulk upload. + + Convert DataFrame to newline-delimited JSON format suitable for + Elasticsearch/OpenSearch bulk API ingestion. + + :param df: data to save + :param filepath: output file path (e.g., 'asana_tasks.ndjson') + :param index_name: optional index name to include in bulk action + metadata. If None, only document data is written + """ + _LOG.info("Saving DataFrame to NDJSON: %s", filepath) + _LOG.info("DataFrame shape: %d rows, %d columns", len(df), len(df.columns)) + + # Convert DataFrame to records (list of dicts). + records = df.to_dict(orient="records") + + # Open file for writing. + with open(filepath, "w") as f: + for record in records: + # Convert timestamps to ISO format strings. + for key, value in record.items(): + if pd.isna(value): + # Convert NaN/None to null. + record[key] = None + elif isinstance(value, pd.Timestamp): + # Convert pandas Timestamp to ISO string. + record[key] = value.isoformat() + + if index_name: + # Write bulk API metadata line. + action = {"index": {"_index": index_name}} + f.write(json.dumps(action) + "\n") + + # Write document data line. + f.write(json.dumps(record) + "\n") + + _LOG.info("Successfully saved %d records to %s", len(records), filepath) + + +def save_datasets_for_kibana( + datasets: Dict[str, pd.DataFrame], + output_dir: str = ".", + *, + use_ndjson: bool = True, + index_prefix: str = "asana", +) -> Dict[str, str]: + """ + Save all datasets to files for Kibana ingestion. + + Save task, user, and team DataFrames to either NDJSON or CSV format + for Kibana/OpenSearch ingestion. + + :param datasets: dictionary with 'tasks', 'users', 'teams' + DataFrames from create_kibana_ready_dataset() + :param output_dir: directory to save files (default: current + directory) + :param use_ndjson: if True, save as NDJSON format. If False, save as + CSV (default: True) + :param index_prefix: prefix for index names when using NDJSON + (default: 'asana') + :return: dataset names to saved file paths + """ + _LOG.info("=" * 70) + _LOG.info("SAVING DATASETS FOR KIBANA") + _LOG.info("=" * 70) + _LOG.info("Output directory: %s", output_dir) + _LOG.info("Format: %s", "NDJSON" if use_ndjson else "CSV") + + saved_files = {} + extension = "ndjson" if use_ndjson else "csv" + + for dataset_name, df in datasets.items(): + # Construct file path. + filename = "{}_{}_{}.{}".format( + index_prefix, dataset_name, "kibana", extension + ) + filepath = "{}/{}".format(output_dir, filename) + + _LOG.info("Saving %s dataset (%d rows)...", dataset_name, len(df)) + + if use_ndjson: + # Save as NDJSON with index name. + index_name = "{}-{}".format(index_prefix, dataset_name) + save_to_ndjson(df, filepath, index_name=index_name) + else: + # Save as CSV. + df.to_csv(filepath, index=False) + _LOG.info("Saved to CSV: %s", filepath) + + saved_files[dataset_name] = filepath + + _LOG.info("=" * 70) + _LOG.info("ALL DATASETS SAVED!") + _LOG.info("=" * 70) + for dataset_name, filepath in saved_files.items(): + _LOG.info(" %s: %s", dataset_name, filepath) + _LOG.info("=" * 70) + + result = saved_files + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py new file mode 100644 index 000000000..318897d3e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py @@ -0,0 +1,2110 @@ +""" +Import as: + +import helpers.github_utils as hgitutil +""" + +import collections +import datetime +import functools +import itertools +import json +import logging +import os +import time +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple + +import github +import matplotlib.pyplot as plt +import pandas as pd +from tqdm import tqdm + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): Why not using helpers.hcache_simple as hcacsimp. +def github_cached(cache_type: str = "json", write_through: bool = True): + """ + Cache decorator specifically for GitHub API functions. + + Automatically excludes the 'client' parameter (first positional arg) + from cache keys since client instances change across sessions. + + :param cache_type: Type of cache ('json' or 'pickle') + :param write_through: If True, write to disk after each cache update + :return: Decorated function with caching + """ + + def decorator(func: Callable) -> Callable: + # Get function name for cache. + func_name = func.__name__ + if func_name.endswith("_intrinsic"): + func_name = func_name[: -len("_intrinsic")] + # Set cache type property. + existing_type = hcacsimp.get_cache_property(func_name, "type") + if not existing_type: + hcacsimp.set_cache_property(func_name, "type", cache_type) + + # Create a cached version that only uses args after client. + @functools.wraps(func) + def wrapper(client, *args, **kwargs): + # Create cache key from everything EXCEPT client. + cache_key = json.dumps( + {"args": args, "kwargs": kwargs}, + sort_keys=True, + default=str, + ) + # Get cache. + cache = hcacsimp.get_cache(func_name) + # Check if we have cached value. + if cache_key in cache: + _LOG.debug("Cache hit for %s", func_name) + return cache[cache_key] + # Cache miss - call the actual function. + _LOG.debug("Cache miss for %s, fetching from API", func_name) + result = func(client, *args, **kwargs) + # Store in cache + cache[cache_key] = result + # Write to disk if enabled. + if write_through: + hcacsimp.flush_cache_to_disk(func_name) + return result + + return wrapper + + return decorator + + +# ############################################################################# +# GitHubAPI +# ############################################################################# + + +class GitHubAPI: + """ + Initialize and manage authentication with the GitHub API using PyGithub. + """ + + def __init__( + self, + *, + access_token: Optional[str] = None, + base_url: Optional[str] = None, + ): + """ + Initialize the GitHub API client. + + :param access_token: GitHub personal access token; if not provided, it + is fetched from the environment variable `GITHUB_ACCESS_TOKEN` + :param base_url: optional custom GitHub Enterprise base URL + """ + self.access_token = access_token or os.getenv("GITHUB_ACCESS_TOKEN") + if not self.access_token: + raise ValueError( + "GitHub Access Token is required. Set it as an environment variable or pass it explicitly." + ) + auth = github.Auth.Token(self.access_token) + self.github = ( + github.Github(base_url=base_url, auth=auth) + if base_url + else github.Github(auth=auth) + ) + + def get_client(self) -> github.Github: + """ + Return the authenticated GitHub client. + + :return: an instance of the authenticated PyGithub client + """ + return self.github + + def close_connection(self) -> None: + """ + Close the GitHub API connection. + """ + self.github.close() + + +# ############################################################################# +# Utility APIs +# ############################################################################# + + +def get_repo_names(client: github.Github, org_name: str) -> Dict[str, List[str]]: + """ + Retrieve a list of repositories under a specific organization. + + :param client: authenticated instance of the PyGithub client + :param org_name: name of the GitHub organization + :return: a dictionary containing: + - owner: name of the organization + - repositories: repository names + """ + owner = client.get_organization(org_name) + hdbg.dassert_is_not( + owner, + None, + "'%s' is not a valid GitHub organization", + org_name, + ) + repos = [repo.name for repo in owner.get_repos()] + result = {"owner": org_name, "repositories": repos} + return result + + +def get_github_contributors( + client: github.Github, repo_names: List[str] +) -> Dict[str, List[str]]: + """ + Retrieve GitHub usernames contributing to specified repositories. + + :param client: authenticated instance of the PyGithub client + :param repo_names: repository names in the format 'owner/repo' to fetch + contributor usernames + :return: a dictionary containing: + - repository: repository name + - contributors: contributor GitHub usernames + """ + result = {} + for repo_name in repo_names: + repo = client.get_repo(repo_name) + hdbg.dassert_is_not(repo, None, "Could not fetch repo: %s", repo_name) + contributors = [ + contributor.login for contributor in repo.get_contributors() + ] + result[repo_name] = contributors + return result + + +def normalize_period_to_utc( + period: Optional[Tuple[datetime.datetime, datetime.datetime]], +) -> Tuple[Optional[datetime.datetime], Optional[datetime.datetime]]: + """ + Convert a datetime period to UTC and ensure both dates are timezone-aware. + + :param period: start and end datetime + :return: UTC-aware start and end datetime, or (None, None) if period + is None + """ + + def to_utc(dt: Optional[datetime.datetime]) -> Optional[datetime.datetime]: + res = None + if dt is None: + return res + else: + res = ( + dt.replace(tzinfo=datetime.timezone.utc) + if dt.tzinfo is None + else dt.astimezone(datetime.timezone.utc) + ) + return res + + norm = ( + tuple(to_utc(dt) for dt in period) + if period is not None + else (None, None) + ) + return norm + + +# ############################################################################# +# Global Metrics APIs +# ############################################################################# + + +def get_total_commits( + client: github.Github, + org_name: str, + *, + usernames: Optional[List[str]] = None, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, Any]: + """ + Fetch the number of commits made in the repositories of the specified + organization, optionally filtered by GitHub usernames and a specified time + period. + + :param client: authenticated instance of the PyGithub client + :param org_name: name of the GitHub organization + :param usernames: GitHub usernames to filter commits; if None, fetches for + all users + :param period: start and end datetime for filtering commits + :return: a dictionary containing: + - total_commits (int): total number of commits across all repositories + - period (str): the time range considered + - commits_per_repository (Dict[str, int]): repository names as keys and + commit counts as values + """ + # Retrieve organization repositories + repos_info = get_repo_names(client, org_name) + hdbg.dassert_in( + "repositories", + repos_info, + "Missing 'repositories' key in get_repo_names() output", + ) + repositories = repos_info["repositories"] + total_commits = 0 + commits_per_repository = {} + since, until = period if period else (None, None) + for repo_name in tqdm( + repositories, desc="Processing repositories", unit="repo" + ): + repo = client.get_repo(f"{org_name}/{repo_name}") + hdbg.dassert_is_not(repo, None, "Could not retrieve repo: %s", repo_name) + repo_commit_count = 0 + if usernames: + for username in usernames: + commits = repo.get_commits( + author=username, since=since, until=until + ) + hdbg.dassert_is_not( + commits, + None, + "Failed to get commits by '%s' in %s", + username, + repo_name, + ) + repo_commit_count += commits.totalCount + else: + commits = repo.get_commits(since=since, until=until) + hdbg.dassert_is_not( + commits, None, "Failed to get commits in %s", repo_name + ) + repo_commit_count = commits.totalCount + commits_per_repository[repo_name] = repo_commit_count + total_commits += repo_commit_count + result = { + "total_commits": total_commits, + "period": f"{since} to {until}" if since and until else "All time", + "commits_per_repository": commits_per_repository, + } + return result + + +def get_total_prs( + client: github.Github, + org_name: str, + *, + usernames: Optional[List[str]] = None, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, + state: str = "all", +) -> Dict[str, Any]: + """ + Fetch the number of pull requests made in the repositories of the specified + organization, optionally filtered by GitHub usernames, a specified time + period, and the state of the pull requests. + + :param client: authenticated instance of the PyGithub client + :param org_name: name of the GitHub organization + :param usernames: GitHub usernames to filter pull requests; if None, fetches + for all users + :param period: start and end datetime for filtering pull requests + :param state: the state of the pull requests to fetch; can be 'open', 'closed', or 'all' + :return: a dictionary containing: + - total_prs (int): total number of pull requests + - period (str): the time range considered + - prs_per_repository (Dict[str, int]): repository names as keys and pull + request counts as values + """ + # Retrieve repositories for the organization + repos_info = get_repo_names(client, org_name) + hdbg.dassert_in( + "repositories", repos_info, "Missing 'repositories' key in repo info" + ) + repositories = repos_info["repositories"] + total_prs = 0 + prs_per_repository = {} + since, until = normalize_period_to_utc(period) + for repo_name in tqdm( + repositories, desc="Processing repositories", unit="repo" + ): + repo = client.get_repo(f"{org_name}/{repo_name}") + hdbg.dassert_is_not( + repo, None, "Could not retrieve repository: %s", repo_name + ) + repo_pr_count = 0 + pulls = repo.get_pulls(state=state) + for pr in pulls: + hdbg.dassert_is_not( + pr, None, "PR could not be fetched in %s", repo_name + ) + if usernames and pr.user.login not in usernames: + continue + pr_created_at = ( + pr.created_at.replace(tzinfo=datetime.timezone.utc) + if pr.created_at.tzinfo is None + else pr.created_at.astimezone(datetime.timezone.utc) + ) + if since and until and not (since <= pr_created_at <= until): + continue + repo_pr_count += 1 + prs_per_repository[repo_name] = repo_pr_count + total_prs += repo_pr_count + result = { + "total_prs": total_prs, + "period": f"{since} to {until}" if since and until else "All time", + "prs_per_repository": prs_per_repository, + } + return result + + +def get_prs_not_merged( + client: github.Github, + org_name: str, + *, + usernames: Optional[List[str]] = None, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, Any]: + """ + Fetch the count of closed but unmerged pull requests in the specified + repositories and by the specified GitHub users within a given period. + + :param client: authenticated instance of the PyGithub client + :param org_name: name of the GitHub organization + :param usernames: GitHub usernames to filter pull requests; if None, fetches for all users + :param period: start and end datetime for filtering pull requests + :return: a dictionary containing: + - prs_not_merged (int): total number of closed but unmerged pull requests + - period (str): the time range considered + - prs_per_repository (Dict[str, int]): repository names as keys and + unmerged pull request counts as values + """ + # Fetch all repositories in the org. + repos_info = get_repo_names(client, org_name) + hdbg.dassert_in( + "repositories", + repos_info, + "Missing 'repositories' in get_repo_names() output", + ) + repositories = repos_info["repositories"] + total_unmerged_prs = 0 + prs_per_repository = {} + since, until = normalize_period_to_utc(period) + for repo_name in tqdm( + repositories, desc="Processing repositories", unit="repo" + ): + # Fetch repo object. + repo = client.get_repo(f"{org_name}/{repo_name}") + hdbg.dassert_is_not( + repo, + None, + "Could not fetch repo: %s/%s", + org_name, + repo_name, + ) + repo_unmerged_pr_count = 0 + issues = repo.get_issues(state="closed", since=since) + pulls = [] + for issue in issues: + if issue.pull_request: + pull = repo.get_pull(issue.number) + hdbg.dassert_is_not( + pull, + None, + "Could not fetch pull request #%d in %s", + issue.number, + repo_name, + ) + pulls.append(pull) + for pr in pulls: + _LOG.debug("Processing PR #%d from %s", pr.number, repo_name) + pr_created_at = pr.created_at or datetime.datetime.min + pr_created_at = ( + pr_created_at.replace(tzinfo=datetime.timezone.utc) + if pr_created_at.tzinfo is None + else pr_created_at.astimezone(datetime.timezone.utc) + ) + if pr.merged: + continue + if usernames and pr.user.login not in usernames: + continue + if since and until and not (since <= pr_created_at <= until): + continue + repo_unmerged_pr_count += 1 + prs_per_repository[repo_name] = repo_unmerged_pr_count + total_unmerged_prs += repo_unmerged_pr_count + result = { + "prs_not_merged": total_unmerged_prs, + "period": f"{since} to {until}" if since and until else "All time", + "prs_per_repository": prs_per_repository, + } + return result + + +# ############################################################################# +# Individual User Metrics APIs +# ############################################################################# + + +def get_commits_by_user( + client: github.Github, + username: str, + org_name: str, + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, Any]: + """ + Retrieve the number of commits made by a specific GitHub user. + + :param client: authenticated instance of the PyGithub client + :param username: GitHub username to fetch commit data for + :param org_name: name of the GitHub organization + :param period: start and end datetime for filtering commits + :return: a dictionary containing: + - user (str): GitHub username + - total_commits (int): total number of commits made by the user + - period (str): the time range considered + - commits_per_repository (Dict[str, int]): repository names as keys and + commit counts as values + """ + result = get_total_commits( + client=client, org_name=org_name, usernames=[username], period=period + ) + res_dict = { + "user": username, + "total_commits": result["total_commits"], + "period": result["period"], + "commits_per_repository": result["commits_per_repository"], + } + return res_dict + + +def get_prs_by_user( + client: github.Github, + username: str, + org_name: str, + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, + state: str = "all", +) -> Dict[str, Any]: + """ + Fetch the number of pull requests created by a specific GitHub user in the + given repositories and time period. + + :param client: authenticated instance of the PyGithub client + :param username: GitHub username to fetch pull request data for + :param org_name: name of the GitHub organization + :param period: start and end datetime for filtering pull requests + :param state: state of the pull requests to fetch; can be 'open', 'closed', + or 'all' + :return: a dictionary containing: + - user (str): GitHub username + - total_prs (int): total number of pull requests created + - period (str): the time range considered + - prs_per_repository (Dict[str, int]): repository names as keys and pull + request counts as values + """ + result = get_total_prs( + client=client, + org_name=org_name, + usernames=[username], + period=period, + state=state, + ) + res_dict = { + "user": username, + "total_prs": result["total_prs"], + "period": result["period"], + "prs_per_repository": result["prs_per_repository"], + } + return res_dict + + +def get_prs_not_merged_by_user( + client: github.Github, + username: str, + org_name: str, + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, Any]: + """ + Fetch the number of closed but unmerged pull requests created by a specific + GitHub user in the given repositories and time period. + + :param client: authenticated instance of the PyGithub client + :param username: GitHub username to fetch unmerged pull request data for + :param org_name: name of the GitHub organization + :param period: start and end datetime for filtering pull requests + :return: a dictionary containing: + - user (str): GitHub username + - prs_not_merged (int): total number of closed but unmerged pull requests + - period (str): the time range considered + - prs_per_repository (Dict[str, int]): repository names as keys and + unmerged PR counts as values + """ + result = get_prs_not_merged( + client=client, org_name=org_name, usernames=[username], period=period + ) + res_dict = { + "user": username, + "prs_not_merged": result["prs_not_merged"], + "period": result["period"], + "prs_per_repository": result["prs_per_repository"], + } + return res_dict + + +def days_between( + period: Tuple[datetime.datetime, datetime.datetime], +) -> List[datetime.date]: + """ + Generate each date in time span. + + :param period: start and end datetime + :return: date span + """ + start_date = period[0].date() + end_date = period[1].date() + days: List[datetime.date] = [] + current = start_date + while current <= end_date: + days.append(current) + current += datetime.timedelta(days=1) + _LOG.debug("Generated %d days in period.", len(days)) + return days + + +@github_cached(cache_type="json", write_through=True) +def get_commit_datetimes_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: Optional[str], + since: datetime.datetime, + until: datetime.datetime, +) -> List[str]: + """ + Fetch commit timestamps for user in repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: commit timestamps in ISO format + """ + timestamps: List[str] = [] + # Fetch the repository object. + repo_obj = client.get_repo(f"{org}/{repo}") + # Retrieve all commits in the specified time period. + commits = repo_obj.get_commits(since=since, until=until) + # Iterate through each commit to find ones by the specified user. + for c in commits: + # Skip commits with incomplete metadata. + if not c.commit or not c.commit.author or not c.commit.author.date: + continue + # Extract author and committer logins. + author_login = c.author.login if c.author else None + committer_login = c.committer.login if c.committer else None + # Check if this commit belongs to the target user. + if username in (author_login, committer_login): + # Convert commit date to UTC timezone. + dt = c.commit.author.date + dt_utc = ( + dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) + ) + # Add timestamp to results list. + timestamps.append(dt_utc.isoformat()) + # Log the results summary. + if not timestamps: + _LOG.info( + "No commits found for %s/%s user=%s in %s to %s - possibly outdated or inactive.", + org, + repo, + username, + since.date(), + until.date(), + ) + else: + _LOG.info( + "Fetched %d commits for %s/%s user=%s.", + len(timestamps), + org, + repo, + username, + ) + return timestamps + + +@github_cached(cache_type="json", write_through=True) +def get_pr_datetimes_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: str, + since: datetime.datetime, + until: datetime.datetime, +) -> List[str]: + """ + Fetch pull request timestamps for user in repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: PR created timestamps in ISO format + """ + timestamps: List[str] = [] + # Format dates for GitHub search query. + since_date = since.date().isoformat() + until_date = until.date().isoformat() + # Build search query for PRs authored by the user. + query = f"repo:{org}/{repo} is:pr author:{username} created:{since_date}..{until_date}" + # Execute the search query. + results = client.search_issues(query) + # Process each PR from search results. + for issue in results: + # Convert PR creation date to UTC timezone. + dt = issue.created_at + dt_utc = dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) + # Add timestamp to results list. + timestamps.append(dt_utc.isoformat()) + # Log the results summary. + if not timestamps: + _LOG.debug( + "No PRs found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", + org, + repo, + username, + since_date, + until_date, + ) + else: + _LOG.info( + "Found %d PRs for %s/%s user=%s.", + len(timestamps), + org, + repo, + username, + ) + return timestamps + + +@github_cached(cache_type="json", write_through=True) +def get_issue_datetimes_by_repo_intrinsic( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> Dict[str, List[str]]: + """ + Fetch opened and closed issue timestamps for a user in a repo over a given + period. + + :param client: authenticated PyGithub client + :param org: GitHub organization name + :param repo: repository name + :param username: GitHub username + :param period: time window to filter issues + :return: 'assigned' and 'closed' issues containing ISO timestamps + """ + # Extract and format the time period. + since_date = period[0].date().isoformat() + until_date = period[1].date().isoformat() + # Build search query for issues assigned to the user. + query = ( + f"repo:{org}/{repo} type:issue assignee:{username} " + f"created:{since_date}..{until_date}" + ) + # Execute the search query. + issues = client.search_issues(query) + # Initialize lists for assigned and closed issues. + assigned: List[str] = [] + closed: List[str] = [] + # Process each issue from search results. + for issue in issues: + # Skip pull requests that appear in issue search. + if issue.pull_request is not None: + continue + # Add issue creation timestamp to assigned list. + assigned.append(issue.created_at.isoformat()) + # Check if issue was closed within the period. + if issue.closed_at: + # Convert closed date to UTC timezone. + closed_dt = issue.closed_at + dt_utc = ( + closed_dt + if closed_dt.tzinfo + else closed_dt.replace(tzinfo=datetime.timezone.utc) + ) + # Add to closed list if within the specified period. + if period[0] <= dt_utc <= period[1]: + closed.append(dt_utc.isoformat()) + # Log the results summary. + _LOG.info( + "Found %d opened and %d closed issues for %s/%s user=%s", + len(assigned), + len(closed), + org, + repo, + username, + ) + # Return the results dictionary. + result_dict = {"assigned": assigned, "closed": closed} + return result_dict + + +@github_cached(cache_type="json", write_through=True) +def get_loc_stats_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: str, + since: datetime.datetime, + until: datetime.datetime, +) -> List[Dict[str, int]]: + """ + Fetch commit LOC stats for user in repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: additions, deletions in code + """ + stats_list: List[Dict[str, int]] = [] + # Fetch the repository object. + repo_obj = client.get_repo(f"{org}/{repo}") + # Retrieve all commits in the specified time period. + commits = repo_obj.get_commits(since=since, until=until) + # Track number of commits processed for safety limit. + commit_count = 0 + # Process each commit to extract LOC statistics. + for c in commits: + # Extract author and committer logins. + author_login = c.author.login if c.author else None + committer_login = c.committer.login if c.committer else None + # Skip commits not by the target user. + if username not in (author_login, committer_login): + continue + # Fetch commit statistics. + s = c.stats + # Skip if statistics are not available. + if s is None: + _LOG.debug("No stats available for commit %s", c.sha) + continue + # Convert commit date to UTC timezone. + dt = c.commit.author.date + dt_utc = dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) + iso = dt_utc.date().isoformat() + # Add statistics to results list. + stats_list.append( + {"date": iso, "additions": s.additions, "deletions": s.deletions} + ) + # Increment commit counter and check safety limit. + commit_count += 1 + if commit_count > 1000: + _LOG.warning("Processed 1000 commits, stopping to avoid timeout") + break + # Log the results summary. + if not stats_list: + _LOG.info( + "No LOC stats found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", + org, + repo, + username, + since.date(), + until.date(), + ) + else: + _LOG.info( + "Fetched LOC stats for %s/%s user=%s entries=%d.", + org, + repo, + username, + len(stats_list), + ) + return stats_list + + +@github_cached(cache_type="json", write_through=True) +def get_issue_comment_datetimes_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: str, + since: datetime.datetime, + until: datetime.datetime, +) -> List[str]: + """ + Fetch issue comment timestamps for user in repo over period using search + API. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: comment timestamps in ISO format + """ + timestamps: List[str] = [] + # Format dates for GitHub search query. + since_date = since.date().isoformat() + until_date = until.date().isoformat() + # Build search query for issues where user has commented. + query = f"repo:{org}/{repo} is:issue commenter:{username} updated:{since_date}..{until_date}" + # Execute the search query. + results = client.search_issues(query) + # Process each issue to find user's comments. + for issue in results: + # Skip pull requests that appear in issue search. + if issue.pull_request: + continue + # Fetch all comments for this issue. + comments = issue.get_comments() + # Filter comments by the target user. + for comment in comments: + # Skip comments by other users. + if comment.user.login != username: + continue + # Convert comment date to UTC timezone. + comment_dt = comment.created_at + comment_dt_utc = ( + comment_dt + if comment_dt.tzinfo + else comment_dt.replace(tzinfo=datetime.timezone.utc) + ) + # Add timestamp if within the specified period. + if since <= comment_dt_utc <= until: + timestamps.append(comment_dt_utc.isoformat()) + # Log the results summary. + if not timestamps: + _LOG.info( + "No issue comments found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", + org, + repo, + username, + since.date(), + until.date(), + ) + else: + _LOG.info( + "Fetched %d issue comments for %s/%s user=%s.", + len(timestamps), + org, + repo, + username, + ) + return timestamps + + +@github_cached(cache_type="json", write_through=True) +def get_pr_review_datetimes_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: str, + since: datetime.datetime, + until: datetime.datetime, +) -> List[str]: + """ + Fetch PR review timestamps for user in repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: review timestamps in ISO format + """ + timestamps: List[str] = [] + # Fetch the repository object. + repo_obj = client.get_repo(f"{org}/{repo}") + # Format dates for GitHub search query. + since_date = since.date().isoformat() + until_date = until.date().isoformat() + # Build search query for PRs reviewed by the user. + query = f"repo:{org}/{repo} is:pr reviewed-by:{username} updated:{since_date}..{until_date}" + # Execute the search query. + results = client.search_issues(query) + # Process each PR to find user's reviews. + for issue in results: + # Fetch the full PR object. + pr = repo_obj.get_pull(issue.number) + # Fetch all reviews for this PR. + reviews = pr.get_reviews() + # Filter reviews by the target user. + for review in reviews: + # Skip reviews by other users. + if review.user.login != username: + continue + # Convert review date to UTC timezone. + review_dt = review.submitted_at + review_dt_utc = ( + review_dt + if review_dt.tzinfo + else review_dt.replace(tzinfo=datetime.timezone.utc) + ) + # Add timestamp if within the specified period. + if since <= review_dt_utc <= until: + timestamps.append(review_dt_utc.isoformat()) + # Log the results summary. + if not timestamps: + _LOG.info( + "No PR reviews found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", + org, + repo, + username, + since.date(), + until.date(), + ) + else: + _LOG.info( + "Fetched %d PR reviews for %s/%s user=%s.", + len(timestamps), + org, + repo, + username, + ) + return timestamps + + +def build_daily_commit_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily commit counts for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, commits, repo, user + """ + since, until = period + timestamps = get_commit_datetimes_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) + df["date"] = df.ts.dt.date + daily = df.groupby("date").size().reset_index(name="commits") + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + daily["commits"] = daily["commits"].fillna(0).astype(int) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily commit DataFrame rows=%d.", len(daily)) + return daily + + +def slice_by_date(df, start, end, date_col="date"): + """ + Slice DataFrame by date range. + + :param df: input DataFrame + :param start: start date (inclusive) + :param end: end date (inclusive) + :param date_col: name of the date column in df + :return: filtered DataFrame + """ + out = df.copy() + out[date_col] = pd.to_datetime(out[date_col], errors="coerce") + res = out.loc[(out[date_col] >= start) & (out[date_col] <= end)].copy() + return res + + +def build_daily_issue_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily assigned / closed issue counts for a user-repo pair. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with columns date, issues_assigned, issues_closed, + repo, user + """ + issue_data = get_issue_datetimes_by_repo_intrinsic( + client, org, repo, username, period + ) + df_assigned = pd.DataFrame( + {"ts": pd.to_datetime(issue_data["assigned"]), "issues_assigned": 1} + ) + df_assigned["date"] = df_assigned.ts.dt.date + df_closed = pd.DataFrame( + {"ts": pd.to_datetime(issue_data["closed"]), "issues_closed": 1} + ) + df_closed["date"] = df_closed.ts.dt.date + # Daily counts. + daily_assigned = ( + df_assigned.groupby("date")["issues_assigned"].sum().reset_index() + ) + daily_closed = df_closed.groupby("date")["issues_closed"].sum().reset_index() + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily_assigned, on="date", how="left").merge( + daily_closed, on="date", how="left" + ) + daily[["issues_assigned", "issues_closed"]] = ( + daily[["issues_assigned", "issues_closed"]].fillna(0).astype(int) + ) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily issue DataFrame rows=%d.", len(daily)) + return daily + + +def build_daily_pr_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily PR counts for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, prs, repo, user + """ + since, until = period + timestamps = get_pr_datetimes_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) + df["date"] = df.ts.dt.date + daily = df.groupby("date").size().reset_index(name="prs") + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + daily["prs"] = daily["prs"].fillna(0).astype(int) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily PR DataFrame rows=%d.", len(daily)) + return daily + + +def build_daily_loc_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily LOC additions and deletions for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, additions, deletions, repo, user + """ + since, until = period + # Fetch raw LOC stats list. + stats_list = get_loc_stats_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + # If no stats, return zeros for full range. + if not stats_list: + all_days = pd.DataFrame({"date": days_between(period)}) + # Initialize zeroes. + all_days["additions"] = all_days["date"].apply(lambda _: 0) + all_days["deletions"] = all_days["date"].apply(lambda _: 0) + # Format signs. + all_days["additions"] = ( + all_days["additions"].astype(str).apply(lambda x: "+" + x) + ) + all_days["deletions"] = ( + all_days["deletions"].astype(str).apply(lambda x: "-" + x) + ) + # Add context. + all_days["repo"] = repo + all_days["user"] = username + # TODO(*): Logging-248: Use `_LOG.debug()` instead of `_LOG.info()` for tracing execution. + _LOG.debug("Built daily LOC DataFrame rows=%d (no data).", len(all_days)) + return all_days + # Otherwise build from stats_list. + df = pd.DataFrame(stats_list) + df["date"] = pd.to_datetime(df["date"]).dt.date + # Sum per date. + daily = df.groupby("date")[["additions", "deletions"]].sum().reset_index() + # Ensure full date coverage. + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + # Fill missing and integerize. + daily[["additions", "deletions"]] = ( + daily[["additions", "deletions"]].fillna(0).astype(int) + ) + # Apply sign formatting. + daily["additions"] = daily["additions"].astype(str).apply(lambda x: "+" + x) + daily["deletions"] = daily["deletions"].astype(str).apply(lambda x: "-" + x) + # Add context. + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily LOC DataFrame rows=%d.", len(daily)) + return daily + + +def get_total_loc_for_period( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> Dict[str, int]: + """ + Get total LOC additions and deletions for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: additions and deletions totals + """ + since, until = period + stats = get_loc_stats_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + total_add = sum(item["additions"] for item in stats) + total_del = sum(item["deletions"] for item in stats) + _LOG.info( + "Total LOC for %s/%s user=%s => +%d -%d.", + org, + repo, + username, + total_add, + total_del, + ) + return {"additions": total_add, "deletions": total_del} + + +def prefetch_periodic_user_repo_data( + client, + org: str, + repos: List[str], + users: List[str], + period: Tuple[datetime.datetime, datetime.datetime], +) -> None: + """ + Prefetch and cache commits, PRs, LOC, issues, comments, and reviews for + each user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repos: repository names + :param users: GitHub usernames + :param period: start and end datetime objects + """ + # Validate input types. + if not isinstance(org, str): + raise ValueError(f"org must be a string, got {type(org).__name__}") + if not isinstance(repos, list) or not all(isinstance(r, str) for r in repos): + raise ValueError("repos must be a list of strings") + if not isinstance(users, list) or not all(isinstance(u, str) for u in users): + raise ValueError("users must be a list of strings") + # Initialize timer and pair up (repo, user) combinations. + start = time.time() + count = 0 + since, until = period + user_repo_pairs = list(itertools.product(repos, users)) + # Prefetch and cache GitHub data for each user-repo pair. + for repo, user in tqdm(user_repo_pairs, desc="Prefetching user-repo data"): + commits = get_commit_datetimes_by_repo_period_intrinsic( + client, org, repo, user, since, until + ) + prs = get_pr_datetimes_by_repo_period_intrinsic( + client, org, repo, user, since, until + ) + locs = get_loc_stats_by_repo_period_intrinsic( + client, org, repo, user, since, until + ) + issues = get_issue_datetimes_by_repo_intrinsic( + client, org, repo, user, period + ) + # issue_comments = get_issue_comment_datetimes_by_repo_period_intrinsic( + # client, org, repo, user, since, until + # ) + # pr_reviews = get_pr_review_datetimes_by_repo_period_intrinsic( + # client, org, repo, user, since, until + # ) + issue_comments = [] + pr_reviews = [] + _LOG.info( + "%s/%s: %d commits, %d PRs, %d LOC entries, %d issues assigned, " + "%d closed, %d issue comments, %d PR reviews", + repo, + user, + len(commits), + len(prs), + len(locs), + len(issues["assigned"]), + len(issues["closed"]), + len(issue_comments), + len(pr_reviews), + ) + count += 1 + # Report overall prefetch duration. + elapsed = time.time() - start + _LOG.info( + "Prefetched %d user-repo combos in %.2f seconds for period %s to %s.", + count, + elapsed, + period[0], + period[1], + ) + + +def build_daily_issue_comment_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily issue comment counts for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, issue_comments, repo, user + """ + since, until = period + timestamps = get_issue_comment_datetimes_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) + df["date"] = df.ts.dt.date + daily = df.groupby("date").size().reset_index(name="issue_comments") + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + daily["issue_comments"] = daily["issue_comments"].fillna(0).astype(int) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily issue comment DataFrame rows=%d.", len(daily)) + return daily + + +def build_daily_pr_review_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily PR review counts for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, pr_reviews, repo, user + """ + since, until = period + timestamps = get_pr_review_datetimes_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) + df["date"] = df.ts.dt.date + daily = df.groupby("date").size().reset_index(name="pr_reviews") + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + daily["pr_reviews"] = daily["pr_reviews"].fillna(0).astype(int) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily PR review DataFrame rows=%d.", len(daily)) + return daily + + +def collect_all_metrics( + client, + org: str, + repos: List[str], + users: List[str], + period: Tuple[datetime.datetime, datetime.datetime], + *, + skip_issue_comments: bool = True, + skip_pr_reviews: bool = True, +) -> pd.DataFrame: + """ + Collect daily metrics for all user-repo combinations. + + :param client: authenticated PyGithub client + :param org: Github org name + :param repos: repository names + :param users: github usernames + :param period: start and end datetime + :param skip_issue_comments: skip fetching issue comments (for speed) + :param skip_pr_reviews: skip fetching PR reviews (for speed) + :return: concatenated data with date, commits, prs, additions, + deletions, issues_assigned, issues_closed, issue_comments + (optional), pr_reviews (optional), repo, user + """ + combined_frames: List[pd.DataFrame] = [] + # Track progress. + total_combinations = len(repos) * len(users) + current = 0 + for repo in repos: + # Ensure repo is a string. + if not isinstance(repo, str): + raise ValueError(f"Expected repo to be a string but got {repo!r}") + for user in users: + # Ensure user is a string. + if not isinstance(user, str): + raise ValueError( + f"Expected user to be a string but got {user!r}" + ) + current += 1 + _LOG.info( + "Processing %d/%d: %s/%s", + current, + total_combinations, + repo, + user, + ) + # Build each metric DataFrame. + df_c = build_daily_commit_df(client, org, repo, user, period) + df_p = build_daily_pr_df(client, org, repo, user, period) + df_l = build_daily_loc_df(client, org, repo, user, period) + df_i = build_daily_issue_df(client, org, repo, user, period) + # Start merging with required metrics. + df = ( + df_c.merge(df_p, on=["date", "repo", "user"], how="inner") + .merge(df_l, on=["date", "repo", "user"], how="inner") + .merge(df_i, on=["date", "repo", "user"], how="inner") + ) + # Optionally add issue comments. + if not skip_issue_comments: + df_ic = build_daily_issue_comment_df( + client, org, repo, user, period + ) + df = df.merge(df_ic, on=["date", "repo", "user"], how="inner") + else: + # Add dummy column for consistency. + df["issue_comments"] = 0 + # Optionally add PR reviews. + if not skip_pr_reviews: + df_pr = build_daily_pr_review_df(client, org, repo, user, period) + df = df.merge(df_pr, on=["date", "repo", "user"], how="inner") + else: + # Add dummy column for consistency. + df["pr_reviews"] = 0 + combined_frames.append(df) + # Concatenate all DataFrames or return empty. + combined = ( + pd.concat(combined_frames, ignore_index=True) + if combined_frames + else pd.DataFrame() + ) + _LOG.info("Collected metrics for %d daily records", len(combined)) + return combined + + +def summarize_user_metrics_for_repo( + combined: pd.DataFrame, repo: str +) -> pd.DataFrame: + """ + Summarize total commits, PRs, LOC, issues, comments, and reviews per user + in a specific repository. + + :param combined: data with all metrics + :param repo: repository name + :return: data with columns user, commits, prs, additions, deletions, + issues_assigned, issues_closed, issue_comments, pr_reviews + """ + df = combined[combined["repo"] == repo].copy() + df["additions"] = df["additions"].str.replace("+", "").astype(int) + df["deletions"] = df["deletions"].str.replace("-", "").astype(int) + df["issues_assigned"] = df["issues_assigned"].astype(int) + df["issues_closed"] = df["issues_closed"].astype(int) + # df["issue_comments"] = df["issue_comments"].astype(int) + # df["pr_reviews"] = df["pr_reviews"].astype(int) + summary = ( + df.groupby("user") + .agg( + commits=pd.NamedAgg(column="commits", aggfunc="sum"), + prs=pd.NamedAgg(column="prs", aggfunc="sum"), + additions=pd.NamedAgg(column="additions", aggfunc="sum"), + deletions=pd.NamedAgg(column="deletions", aggfunc="sum"), + issues_assigned=pd.NamedAgg(column="issues_assigned", aggfunc="sum"), + issues_closed=pd.NamedAgg(column="issues_closed", aggfunc="sum"), + # issue_comments=pd.NamedAgg(column="issue_comments", aggfunc="sum"), + # pr_reviews=pd.NamedAgg(column="pr_reviews", aggfunc="sum"), + ) + .reset_index() + ) + return summary + + +def summarize_repo_metrics_for_user( + combined: pd.DataFrame, user: str +) -> pd.DataFrame: + """ + Summarize total commits, PRs, LOC, issues, comments, and reviews per repo + for a user. + + :param combined: data with all metrics + :param user: GitHub username + :return: columns repo, commits, prs, additions, deletions, + issues_assigned, issues_closed, issue_comments, pr_reviews + """ + df = combined[combined["user"] == user].copy() + df["additions"] = df["additions"].str.replace("+", "").astype(int) + df["deletions"] = df["deletions"].str.replace("-", "").astype(int) + df["issue_comments"] = df["issue_comments"].astype(int) + df["pr_reviews"] = df["pr_reviews"].astype(int) + summary = ( + df.groupby("repo") + .agg( + commits=pd.NamedAgg(column="commits", aggfunc="sum"), + prs=pd.NamedAgg(column="prs", aggfunc="sum"), + additions=pd.NamedAgg(column="additions", aggfunc="sum"), + deletions=pd.NamedAgg(column="deletions", aggfunc="sum"), + issues_assigned=pd.NamedAgg(column="issues_assigned", aggfunc="sum"), + issues_closed=pd.NamedAgg(column="issues_closed", aggfunc="sum"), + issue_comments=pd.NamedAgg(column="issue_comments", aggfunc="sum"), + pr_reviews=pd.NamedAgg(column="pr_reviews", aggfunc="sum"), + ) + .reset_index() + ) + return summary + + +def summarize_users_across_repos( + combined: pd.DataFrame, + users: List[str], + repos: List[str], +) -> pd.DataFrame: + """ + Aggregate commit / PR / LOC / issue / comment / review totals per-user + across a repo subset. + + :param combined: output of `collect_all_metrics` + :param users: GitHub usernames + :param repos: repository names + :return: data with columns user, commits, prs, additions, deletions, + issues_assigned, issues_closed, issue_comments, pr_reviews + """ + # Filter to requested slice. + df = combined[ + combined["user"].isin(users) & combined["repo"].isin(repos) + ].copy() + # Normalise numeric columns. + df["additions"] = df["additions"].str.replace("+", "").astype(int) + df["deletions"] = df["deletions"].str.replace("-", "").astype(int) + df["issue_comments"] = df["issue_comments"].astype(int) + df["pr_reviews"] = df["pr_reviews"].astype(int) + df.rename( + columns={ + "issues_assigned": "issues_assigned", + "issues_closed": "issues_closed", + }, + inplace=True, + errors="ignore", + ) + # Aggregate across repos. + summary = ( + df.groupby("user") + .agg( + commits=("commits", "sum"), + prs=("prs", "sum"), + additions=("additions", "sum"), + deletions=("deletions", "sum"), + issues_assigned=("issues_assigned", "sum"), + issues_closed=("issues_closed", "sum"), + issue_comments=("issue_comments", "sum"), + pr_reviews=("pr_reviews", "sum"), + ) + .reset_index() + ) + return summary + + +def _filter_period( + df: pd.DataFrame, + *, + start: Optional[datetime.datetime] = None, + end: Optional[datetime.datetime] = None, +) -> pd.DataFrame: + """ + Slice a DataFrame by date using optional start and end boundaries. + + :param df: data with a 'date' column + :param start: start datetime (inclusive) + :param end: end datetime (inclusive) + :return: filtered data such that start ≤ date ≤ end + """ + if not pd.api.types.is_datetime64_any_dtype(df["date"]): + df = df.copy() + df["date"] = pd.to_datetime(df["date"]) + if start is not None: + df = df[df["date"] >= start] + if end is not None: + df = df[df["date"] <= end] + return df + + +def _plot_grouped_bars( + summary: pd.DataFrame, + index_col: str, + title: str, + *, + metrics: Optional[List[str]] = None, +) -> None: + """ + Internal helper to render grouped bar plots. + + :param summary: data with one row per category (user or repo), and + one column per metric + :param index_col: column name(e.g., "user" or "repo") + :param metrics: subset of metrics to plot (e.g., ["commits", "prs"]) + :param title: chart title + """ + # Validate and prepare the list of metrics to plot. + default_metrics = [ + "commits", + "prs", + "additions", + "deletions", + "issues_assigned", + "issues_closed", + "issue_comments", + "pr_reviews", + ] + to_plot = metrics if metrics else default_metrics + for m in to_plot: + if m not in default_metrics: + raise ValueError(f"Unsupported metric '{m}'") + # Filter to only metrics that exist in the summary. + to_plot = [m for m in to_plot if m in summary.columns] + # Compute layout parameters. + categories = summary[index_col].tolist() + x = range(len(to_plot)) + n_cat = len(categories) + width = 0.8 / n_cat if n_cat else 0.8 + # Plot bars for each category (user or repo). + fig_width = max(12, len(to_plot) * 1.5) + fig, ax = plt.subplots(figsize=(fig_width, 5)) + for idx, cat in enumerate(categories): + values = ( + summary.loc[summary[index_col] == cat, to_plot].astype(int).iloc[0] + ) + pos = [i + idx * width for i in x] + bars = ax.bar(pos, values, width=width, label=str(cat)) + for b in bars: + ax.text( + b.get_x() + b.get_width() / 2, + b.get_height(), + str(int(b.get_height())), + ha="center", + va="bottom", + fontsize=8, + ) + # Finalize plot aesthetics. + ax.set_xticks([i + width * (n_cat - 1) / 2 for i in x]) + ax.set_xticklabels( + [m.replace("_", " ").title() for m in to_plot], rotation=45, ha="right" + ) + ax.set_ylabel("Count") + ax.set_title(title) + ax.legend(title=index_col.replace("_", " ").title()) + plt.tight_layout() + plt.show() + + +def plot_metrics_by_user( + combined: pd.DataFrame, + repo: str, + *, + start: Optional[datetime.datetime] = None, + end: Optional[datetime.datetime] = None, + users: Optional[List[str]] = None, + metrics: Optional[List[str]] = None, +) -> None: + """ + Plot selected metrics for users in one repo. + + :param combined: output from `collect_all_metrics` + :param repo: repository name + :param start: start datetime (inclusive) + :param end: end datetime (inclusive) + :param users: optional subset of GitHub usernames to show + :param metrics: list of metrics to plot; defaults to all numeric columns + :return: grouped bar chart where each group = metric, each bar = user + """ + df_period = _filter_period(df=combined, start=start, end=end) + summary = summarize_user_metrics_for_repo(df_period, repo) + if users is not None: + summary = summary[summary["user"].isin(users)] + _plot_grouped_bars( + summary, + index_col="user", + metrics=metrics, + title=f"Metric comparison for {repo} " + f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})", + ) + + +def plot_metrics_by_repo( + combined: pd.DataFrame, + user: str, + *, + start: Optional[datetime.datetime] = None, + end: Optional[datetime.datetime] = None, + repos: Optional[List[str]] = None, + metrics: Optional[List[str]] = None, +) -> None: + """ + Plot specified metrics for repos for a single user as grouped bar chart. + + :param combined: data from `collect_all_metrics` + :param user: GitHub username + :param start: start datetime (inclusive) + :param end: end datetime (inclusive) + :param repos: repos to include + :param metrics: metrics to plot; defaults to all numeric columns + :return: grouped bar chart where each group = metric, each bar = repo + """ + df_period = _filter_period(df=combined, start=start, end=end) + summary = summarize_repo_metrics_for_user(df_period, user) + if repos is not None: + summary = summary[summary["repo"].isin(repos)] + _plot_grouped_bars( + summary, + index_col="repo", + metrics=metrics, + title=f"Metric comparison for {user} " + f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})", + ) + + +def plot_multi_metrics_totals_by_user( + combined: pd.DataFrame, + metrics: List[str], + *, + start: Optional[datetime.datetime] = None, + end: Optional[datetime.datetime] = None, + users: Optional[List[str]] = None, + repos: Optional[List[str]] = None, +) -> None: + """ + Plot multiple metrics (summed across repos) per user as grouped bars. + + :param combined: data from `collect_all_metrics` + :param metrics: metrics to plot, e.g. ["commits", "prs", "additions"] + :param start: start datetime (inclusive) + :param end: end datetime (inclusive) + :param users: users to include + :param repos: repos to include + :return: grouped bar chart where each group = user, each bar = one metric + """ + df_period = _filter_period(df=combined, start=start, end=end) + # Aggregate totals for each user across the selected repos. + summary = summarize_users_across_repos( + df_period, + users or df_period["user"].unique().tolist(), + repos or df_period["repo"].unique().tolist(), + ) + if users is not None: + summary = summary[summary["user"].isin(users)] + # Validate metrics exist. + for metric in metrics: + if metric not in summary.columns: + raise ValueError(f"Metric '{metric}' not found in summary columns") + # Set up bar positions and sizing. + users_sorted = summary["user"].tolist() + x = range(len(users_sorted)) + width = 0.8 / len(metrics) if metrics else 0.8 + fig_width = max(10, len(users_sorted) * 0.7) + fig, ax = plt.subplots(figsize=(fig_width, 5)) + # Draw bars for each metric across users + for i, metric in enumerate(metrics): + offsets = [pos + i * width for pos in x] + values = ( + summary.set_index("user") + .loc[users_sorted, metric] + .astype(int) + .tolist() + ) + bars = ax.bar( + offsets, values, width=width, label=metric.replace("_", " ").title() + ) + for bar in bars: + ax.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height(), + str(int(bar.get_height())), + ha="center", + va="bottom", + fontsize=8, + ) + # Final plot styling. + ax.set_xticks([pos + width * (len(metrics) - 1) / 2 for pos in x]) + ax.set_xticklabels(users_sorted, rotation=15, ha="right") + ax.set_ylabel("Total count across repos") + ax.set_title( + f"Metric totals across repos by user " + f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})" + ) + ax.legend() + plt.tight_layout() + plt.show() + + +def get_contributors_for_repo( + client, + org: str, + repo: str, + *, + top_n: Optional[int] = None, +) -> List[str]: + """ + Fetch GitHub usernames of contributors to a repository. + + :param client: authenticated PyGithub client + :param org: GitHub organization name + :param repo: repository name + :param top_n: if specified, return only the top N contributors by + commit count + :return: GitHub usernames + """ + repo_obj = client.get_repo(f"{org}/{repo}") + contributors = repo_obj.get_contributors() + usernames = list() + for idx, user in enumerate(contributors): + if top_n and idx >= top_n: + break + usernames.append(user.login) + _LOG.info("Fetched %d contributors for %s/%s", len(usernames), org, repo) + return usernames + + +def utc_period( + start: str, end: str +) -> Tuple[datetime.datetime, datetime.datetime]: + """ + Construct a UTC datetime period from string inputs. + + :param start: start date e.g. '2025-01-01' + :param end: end date e.g. '2025-05-24' + """ + date = ( + datetime.datetime.fromisoformat(start).replace( + tzinfo=datetime.timezone.utc + ), + datetime.datetime.fromisoformat(end).replace( + tzinfo=datetime.timezone.utc + ), + ) + return date + + +def slice_period( + df: pd.DataFrame, + start: datetime.date, + end: datetime.date, +) -> pd.DataFrame: + """ + Filter a DataFrame by date range. + + :param df: data with a `date` column of type `datetime.date` + :param start: start date for the filtering window + :param end: end date for the filtering window + :return: filtered data within the specified date range + """ + req_period = df[(df["date"] >= start) & (df["date"] <= end)] + return req_period + + +def compute_z_scores(summary: pd.DataFrame, metrics: List[str]) -> pd.DataFrame: + """ + Compute z-score (standardized score) for specified metrics across users. + + This helps assess how far a user's metric is from the group mean in units + of standard deviation. + + :param summary: data with users and raw metric values + :param metrics: metric column names to compute z-scores for + :return: data with added z-score columns suffixed with `_z` + """ + z_df = summary.copy() + for metric in metrics: + mean = z_df[metric].mean() + std = z_df[metric].std() + z_df[metric + "_z"] = (z_df[metric] - mean) / std + return z_df + + +def compute_percentile_ranks( + summary: pd.DataFrame, metrics: List[str] +) -> pd.DataFrame: + """ + Compute percentile rank for each user for the specified metrics. + + Percentile rank reflects the relative standing of a user compared to the + group. For example, a percentile of 0.8 means the user is ahead of 80% + of the group for that metric. + + :param summary: data with users and raw metric values + :param metrics: metric column names + :return: data with added percentile columns suffixed with `_pctile` + """ + perc_df = summary.copy() + for metric in metrics: + perc_df[metric + "_pctile"] = perc_df[metric].rank(pct=True) + return perc_df + + +def visualize_user_metric_comparison( + stats: pd.DataFrame, + *, + score_type: Literal["z", "percentile"] = "z", + top_n: Optional[int] = None, +) -> None: + """ + Visualize user performance across all available metrics using z-scores or + percentiles. + + :param stats: data with user metrics and their standardized scores + :param score_type: "z" for z-scores or "percentile" for relative + percentiles + :param top_n: number of top users to show in leaderboard bar chart + """ + suffix = "_z" if score_type == "z" else "_pctile" + score_cols = [col for col in stats.columns if col.endswith(suffix)] + if not score_cols: + raise ValueError( + f"No columns ending with '{suffix}' found in input DataFrame." + ) + # Stylized table. + import IPython + + IPython.display.display( + stats[["user"] + score_cols] + .set_index("user") + .style.format("{:.2f}") + .background_gradient( + axis=0, cmap="Greens" if score_type == "percentile" else "RdYlGn" + ) + ) + # Leaderboard chart (by average score). + stats["__score_avg__"] = stats[score_cols].mean(axis=1) + if top_n is None: + top_users = stats.sort_values("__score_avg__", ascending=False) + top_n_display = len(top_users) + else: + top_users = stats.sort_values("__score_avg__", ascending=False).head( + top_n + ) + top_n_display = top_n + fig, ax = plt.subplots(figsize=(max(8, 0.5 * len(top_users)), 4)) + ax.bar(top_users["user"], top_users["__score_avg__"], color="skyblue") + ax.set_ylabel( + "Average Score" + + (" (Z-score)" if score_type == "z" else " (Percentile)") + ) + ax.set_title(f"Top {top_n_display} Users by Average {score_type.title()}") + ax.axhline(0 if score_type == "z" else 0.5, color="gray", linestyle="--") + plt.xticks(rotation=15, ha="right") + plt.tight_layout() + plt.show() + stats.drop(columns="__score_avg__", inplace=True) + + +def compute_engagement_score( + summary: pd.DataFrame, + weights: Optional[Dict[str, float]] = None, +) -> pd.DataFrame: + """ + Compute a weighted engagement score for each user based on all metrics. + + :param summary: data with user metrics + :param weights: optional dictionary of metric weights; if None, uses + defaults + :return: summary with an added 'engagement_score' column + """ + # Default weights emphasizing collaboration and code quality. + default_weights = { + "commits": 1.0, + "prs": 2.0, + "additions": 0.001, + "deletions": 0.0005, + "issues_assigned": 0.5, + "issues_closed": 1.5, + "issue_comments": 0.3, + "pr_reviews": 2.5, + } + weights = weights or default_weights + summary = summary.copy() + summary["engagement_score"] = 0 + for metric, weight in weights.items(): + if metric in summary.columns: + summary["engagement_score"] += summary[metric] * weight + # Normalize to 0-100 scale. + max_score = summary["engagement_score"].max() + if max_score > 0: + summary["engagement_score"] = ( + summary["engagement_score"] / max_score * 100 + ).round(2) + summary_sorted = summary.sort_values("engagement_score", ascending=False) + return summary_sorted + + +# ############################################################################# +# PR Statistics +# ############################################################################# + + +def count_open_prs_by_author( + repo_obj, +) -> Dict[str, Dict[str, int]]: + """ + Count open PRs grouped by author and draft/ready status. + + :param repo_obj: PyGithub repository object + :return: dict mapping author -> {"ready": int, "draft": int} + """ + stats: Dict[str, Dict[str, int]] = collections.defaultdict( + lambda: {"ready": 0, "draft": 0} + ) + pulls = repo_obj.get_pulls(state="open") + for pr in pulls: + author = pr.user.login + status = "draft" if pr.draft else "ready" + stats[author][status] += 1 + _LOG.debug("Open PR #%d by %s status=%s", pr.number, author, status) + return dict(stats) + + +def count_closed_prs_by_author( + repo_obj, + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, int]: + """ + Count closed PRs grouped by author, optionally filtered by period. + + :param repo_obj: PyGithub repository object + :param period: optional (start, end) UTC-aware datetimes for filtering + :return: dict mapping author -> count of closed PRs + """ + stats: Dict[str, int] = collections.defaultdict(int) + since, until = normalize_period_to_utc(period) + pulls = repo_obj.get_pulls(state="closed") + for pr in pulls: + # Normalize the PR closed_at timestamp to UTC. + closed_at = pr.closed_at + if closed_at is None: + continue + if closed_at.tzinfo is None: + closed_at = closed_at.replace(tzinfo=datetime.timezone.utc) + else: + closed_at = closed_at.astimezone(datetime.timezone.utc) + # Filter by period if specified. + if since is not None and until is not None: + if not (since <= closed_at <= until): + continue + author = pr.user.login + stats[author] += 1 + _LOG.debug("Closed PR #%d by %s at %s", pr.number, author, closed_at) + return dict(stats) + + +def print_open_pr_stats( + open_stats: Dict[str, Dict[str, int]], +) -> None: + """ + Print open PR statistics by author and draft/ready status. + + :param open_stats: dict mapping author -> {"ready": int, "draft": int} + """ + if not open_stats: + _LOG.info("No open PRs found.") + return + # Sort by total PR count descending. + sorted_authors = sorted( + open_stats.items(), + key=lambda item: item[1]["ready"] + item[1]["draft"], + reverse=True, + ) + total_ready = 0 + total_draft = 0 + header = f"{'Author':<25} {'Ready':>7} {'Draft':>7} {'Total':>7}" + separator = "-" * len(header) + _LOG.info("Open PRs by author:") + _LOG.info(separator) + _LOG.info(header) + _LOG.info(separator) + for author, counts in sorted_authors: + ready = counts["ready"] + draft = counts["draft"] + total = ready + draft + total_ready += ready + total_draft += draft + _LOG.info("%-25s %7d %7d %7d", author, ready, draft, total) + _LOG.info(separator) + _LOG.info( + "%-25s %7d %7d %7d", + "TOTAL", + total_ready, + total_draft, + total_ready + total_draft, + ) + + +def print_closed_pr_stats( + closed_stats: Dict[str, int], + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> None: + """ + Print closed PR statistics by author. + + :param closed_stats: dict mapping author -> count of closed PRs + :param period: optional period used for filtering (for display only) + """ + if not closed_stats: + _LOG.info("No closed PRs found.") + return + # Sort by count descending. + sorted_authors = sorted( + closed_stats.items(), key=lambda item: item[1], reverse=True + ) + period_str = "all time" + if period is not None: + since, until = period + period_str = f"{since.date()} to {until.date()}" + header = f"{'Author':<25} {'Closed':>7}" + separator = "-" * len(header) + _LOG.info("Closed PRs by author (%s):", period_str) + _LOG.info(separator) + _LOG.info(header) + _LOG.info(separator) + total = 0 + for author, count in sorted_authors: + total += count + _LOG.info("%-25s %7d", author, count) + _LOG.info(separator) + _LOG.info("%-25s %7d", "TOTAL", total) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py new file mode 100644 index 000000000..96c8af1da --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py @@ -0,0 +1,508 @@ +""" +Import as: + +import helpers.hasyncio as hasynci +""" + +import asyncio +import contextlib +import datetime +import logging +import math +import time +from typing import ( + Any, + Callable, + Coroutine, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, + cast, +) + +import async_solipsism # type: ignore[import-not-found] +import numpy as np +import pandas as pd + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hprint as hprint + +# Avoid dependency from other `helpers` modules, such as `helpers.hsql`, to prevent +# import cycles. + + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# _EventLoop +# ############################################################################# + + +# TODO(gp): We could make this a mixin and add this behavior to both asyncio and +# async_solipsism event loop. +# TODO(gp): -> _AsyncSolipsismEventLoop +# TODO(gp): Consider injecting a `get_wall_clock_time: hdatetim.GetWallClockTime` +# in the event loop so we can simplify the interfaces. An event loop always needs +# a function to get the wall clock. +class _EventLoop(async_solipsism.EventLoop): + """ + An `async_solipsism.EventLoop` returning also the wall-clock time. + """ + + # TODO(gp): If we pass an `initial_replayed_timestamp` we could incorporate here also + # the replayed time approach and can remove `ReplayedTime` object. + def __init__(self) -> None: + super().__init__() + self._initial_dt = datetime.datetime.utcnow() + + def get_current_time(self) -> datetime.datetime: + # `loop.time()` returns the number of seconds as `float` from when the event + # loop was created. + try: + num_secs = super().time() + except AttributeError: + # Sometimes we call the logger before `async_solipsism` is fully initialized. + # File "/app/amp/helpers/hdatetime.py", line 255, in get_current_time + # timestamp = event_loop.get_current_time() + # File "/app/amp/helpers/hasyncio.py", line 60, in get_current_time + # num_secs = super().time() + # File "/venv/lib/python3.8/site-packages/async_solipsism/loop.py", line 39, in time + # return self._selector.clock.time() + # AttributeError: 'NoneType' object has no attribute 'clock' + # Call stack: + # File "/app/amp/helpers/hcache.py", line 311, in clear_global_cache + # _LOG.info("After clear_global_cache: %s", info_after) + # Message: 'After clear_global_cache: %s' + # Arguments: ("'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan",) + # To avoid the error above we just set the `num_secs` to 0. + num_secs = 0 + return self._initial_dt + datetime.timedelta(seconds=num_secs) + + +# From https://stackoverflow.com/questions/49555991 +@contextlib.contextmanager +def solipsism_context() -> Iterator: + """ + Context manager to isolate an `asyncio_solipsism` event loop. + """ + # Use the variation of solipsistic `EventLoop` above. + event_loop = _EventLoop() + asyncio.set_event_loop(event_loop) + try: + yield event_loop + finally: + asyncio.set_event_loop(None) + + +async def gather_coroutines_with_wall_clock( + event_loop: asyncio.AbstractEventLoop, + *coroutines: Callable[[Any], Coroutine[Any, Any, Any]], +) -> List[Any]: + """ + Inject a wall clock associated to `event_loop` in all the coroutines and + then gathers them in a single coroutine. + """ + get_wall_clock_time = lambda: hdateti.get_current_time( + tz="ET", event_loop=event_loop + ) + # Construct the coroutines here by passing the `get_wall_clock_time()` + # function. + coros_list = [coro(get_wall_clock_time) for coro in coroutines] + # + result: List[Any] = await asyncio.gather(*coros_list) + return result + + +# TODO(gp): For some reason `asyncio.run()` doesn't seem to pick up the new event +# loop. So we use a re-implementation of `run` that does that. +def run( + coroutine: Coroutine, + event_loop: Optional[asyncio.AbstractEventLoop], + *, + close_event_loop: bool = True, +) -> Any: + """ + `asyncio.run()` wrapper that allows to use a specified `EventLoop`. + + :param coroutine: the coroutine to run + :param event_loop: the event loop to use. `None` means the standard `asyncio` + event loop + :param close_event_loop: if False the event loop is not closed, so that we can + run multiple times in the same event loop + :return: same output of `run_until_complete()` + """ + if event_loop is None: + # Use a normal `asyncio` EventLoop. + event_loop = asyncio.new_event_loop() + hdbg.dassert_issubclass(event_loop, asyncio.AbstractEventLoop) + hprint.log_frame(_LOG, "asyncio.run") + try: + ret = event_loop.run_until_complete(coroutine) + finally: + if close_event_loop: + event_loop.close() + return ret + + +# ############################################################################# +# Synchronous / asynchronous polling. +# ############################################################################# + + +# The result of a polling function in terms of a bool indicating success (which +# when True stops the polling) and a result. +PollOutput = Tuple[bool, Any] + +# A polling function accepts any inputs and returns a `PollOutput` in terms of +# (success, result). Typically polling functions don't accept any inputs and are +# built through lambdas and closures. +PollingFunction = Callable[[], PollOutput] + + +def _get_max_num_iterations( + sleep_in_secs: float, + timeout_in_secs: float, +) -> int: + hdbg.dassert_lt(0, sleep_in_secs) + hdbg.dassert_lt(0, timeout_in_secs) + max_num_iter = int(math.ceil(timeout_in_secs / sleep_in_secs)) + hdbg.dassert_lte(1, max_num_iter) + return max_num_iter + + +# TODO(gp): This is probably better implemented with an iterator. +def _poll_iterate( + polling_func: PollingFunction, + sleep_in_secs: float, + timeout_in_secs: float, + get_wall_clock_time: hdateti.GetWallClockTime, + num_iter: int, + max_num_iter: int, + tag: str, +) -> Tuple[int, PollOutput]: + """ + Execute an iteration of the polling loop. + + :return: the number of iterations executed and the output of the + polling function (sucess, return value) + :raises: TimeoutError in case of timeout + """ + _LOG.debug( + "\n## %s: wall clock time=%s: iter=%s/%s", + tag, + get_wall_clock_time(), + num_iter, + max_num_iter, + ) + hdbg.dassert_callable(get_wall_clock_time) + # Poll. + success, value = polling_func() + _LOG.debug("success=%s, value=%s", success, value) + if success: + # If success, then exit. + hprint.log_frame( + _LOG, + "%s: wall clock time=%s: poll done", + tag, + get_wall_clock_time(), + ) + else: + # Otherwise update state. + num_iter += 1 + if num_iter > max_num_iter: + msg = "Timeout for " + hprint.to_str( + "polling_func sleep_in_secs timeout_in_secs tag" + ) + _LOG.error(msg) + raise TimeoutError(msg) + return num_iter, (success, value) + + +# TODO(ai_gp): -> async_poll +async def poll( + polling_func: PollingFunction, + sleep_in_secs: float, + timeout_in_secs: float, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + tag: Optional[str] = None, +) -> Tuple[int, Any]: + """ + Call `polling_func()` every `sleep_in_secs` secs until the polling function + returns success or there is a timeout. A timeout happens if no success is + achieved within `timeout_in_secs` secs. + + :param polling_func: function returning a tuple (success, value) + :return: + - number of iterations before a successful call to `polling_func` + - result from `polling_func` + :raises: TimeoutError in case of timeout + """ + _LOG.debug(hprint.to_str("polling_func sleep_in_secs timeout_in_secs tag")) + if tag is None: + # Use the function calling this function. + tag = hintros.get_function_name(count=0) + max_num_iter = _get_max_num_iterations(sleep_in_secs, timeout_in_secs) + num_iter = 1 + while True: + num_iter, (success, value) = _poll_iterate( + polling_func, + sleep_in_secs, + timeout_in_secs, + get_wall_clock_time, + num_iter, + max_num_iter, + tag, + ) + if success: + return num_iter, value + _LOG.debug("sleep for %s secs", sleep_in_secs) + await asyncio.sleep(sleep_in_secs) + + +def sync_poll( + polling_func: PollingFunction, + sleep_in_secs: float, + timeout_in_secs: float, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + tag: Optional[str] = None, +) -> Tuple[int, Any]: + """ + Same interface and behavior of `poll()` but using a synchronous + implementation. + """ + _LOG.debug(hprint.to_str("polling_func sleep_in_secs timeout_in_secs tag")) + if tag is None: + # Use the function calling this function. + tag = hintros.get_function_name(count=0) + max_num_iter = _get_max_num_iterations(sleep_in_secs, timeout_in_secs) + num_iter = 1 + while True: + num_iter, (success, value) = _poll_iterate( + polling_func, + sleep_in_secs, + timeout_in_secs, + get_wall_clock_time, + num_iter, + max_num_iter, + tag, + ) + if success: + return success, value + _LOG.debug("sleep for %s secs", sleep_in_secs) + time.sleep(sleep_in_secs) + + +def get_poll_kwargs( + get_wall_clock_time: hdateti.GetWallClockTime, + *, + # TODO(ai_gp): Avoid using defaults. + sleep_in_secs: float = 1.0, + timeout_in_secs: float = 10.0, +) -> Dict[str, Any]: + hdbg.dassert_lt(0, sleep_in_secs) + hdbg.dassert_lt(0, timeout_in_secs) + hdbg.dassert_callable(get_wall_clock_time) + poll_kwargs = { + "sleep_in_secs": sleep_in_secs, + "timeout_in_secs": timeout_in_secs, + "get_wall_clock_time": get_wall_clock_time, + } + return poll_kwargs + + +# ############################################################################# +# Wait. +# ############################################################################# + + +# Represent a deterministic, if float, or random delay in [a, b] if a Tuple. +# All values are in seconds. +WaitInSecs = Union[float, Tuple[float, float]] + + +async def sleep( + delay_in_secs: WaitInSecs, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + # TODO(gp): -> msg + tag: Optional[str] = None, + # TODO(gp): How to handle random seed here? + seed: int = 42, +) -> None: + """ + Wait a deterministic or a randomized delay. + """ + if tag is None: + # Use the name of the function calling this function. + tag = hintros.get_function_name(count=0) + # Extract or compute the delay. + if isinstance(delay_in_secs, (int, float)): + # Deterministic delay. + pass + elif isinstance(delay_in_secs, tuple): + # Randomized delay. + hdbg.dassert_eq(len(delay_in_secs), 2) + min_, max_ = delay_in_secs + hdbg.dassert_lte(0, min_) + hdbg.dassert_lte(min_, max_) + delay_in_secs = np.random.rand(min_, max_) + else: + raise ValueError(f"Invalid delay_in_secs='{delay_in_secs}'") + # Wait. + hprint.log_frame( + _LOG, + "%s: wall_clock_time=%s: started waiting for %s secs", + tag, + get_wall_clock_time(), + delay_in_secs, + ) + hdbg.dassert_lte(0, delay_in_secs) + delay_in_secs = cast(float, delay_in_secs) + await asyncio.sleep(delay_in_secs) + hprint.log_frame( + _LOG, + "%s: wall_clock_time=%s: done waiting for %s secs", + tag, + get_wall_clock_time(), + delay_in_secs, + ) + + +# ////////////////////////////////////////////////////////////////////////////////// + + +def get_seconds_to_align_to_grid( + bar_duration_in_secs: int, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + add_buffer_in_secs: int = 0, +) -> Tuple[pd.Timestamp, float]: + """ + Given the current time return the amount of seconds to wait to align on a + grid with period `bar_duration_in_secs`. + + E.g., current_time=9:31:02am, bar_duration_in_secs=120 -> return 58 + + :param add_buffer_in_secs: number of seconds to add to make sure we + are right after the grid time + """ + hdbg.dassert_lte(0, add_buffer_in_secs) + current_time = get_wall_clock_time() + _LOG.debug("current_time=%s ...", current_time) + # Align on the time grid. + hdbg.dassert_isinstance(bar_duration_in_secs, int) + hdbg.dassert_lt(0, bar_duration_in_secs) + freq = f"{bar_duration_in_secs}S" + target_time = current_time.ceil(freq) + hdbg.dassert_lte(current_time, target_time) + _LOG.debug("target_time=%s", target_time) + secs_to_wait = (target_time - current_time).total_seconds() + # E.g., for + # target_time=2022-07-11 11:30:00-04:00 + # curr_time=2022-07-11 11:29:15.129365-04:00 + # The difference is 44secs, so we need to add 1 sec to make sure we pass + # the target time. + secs_to_wait += add_buffer_in_secs + return target_time, secs_to_wait + + +def _wait_until( + wait_until_timestamp: pd.Timestamp, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + tag: Optional[str] = None, +) -> float: + """ + Return amount of seconds to wait for. + + More accurate version of _wait_until, uses total_seconds() which + allows for returning fractional second values. + """ + if tag is None: + # Use the name of the function calling this function. + tag = hintros.get_function_name(count=2) + curr_timestamp = get_wall_clock_time() + _LOG.debug( + "wait_until_timestamp=%s, curr_timestamp=%s", + wait_until_timestamp, + curr_timestamp, + ) + # We can only wait for times in the future. + if curr_timestamp > wait_until_timestamp: + _LOG.warning( + "curr_timestamp=%s, wait_until_timestamp=%s is in the future: " + "continuing ", + curr_timestamp, + wait_until_timestamp, + ) + time_in_secs = 0 + else: + time_in_secs = (wait_until_timestamp - curr_timestamp).total_seconds() + _LOG.debug( + "%s: wall_clock_time=%s: sleep for %s secs", + tag, + get_wall_clock_time(), + time_in_secs, + ) + return time_in_secs + + +def sync_wait_until( + wait_until_timestamp: pd.Timestamp, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + tag: Optional[str] = None, + log_verbosity: int = logging.DEBUG, +) -> None: + """ + Synchronous wait until the wall clock time is `timestamp`. + + More accurate version of sync_wait_until allowing to wait for + fractional seconds. + """ + # Sync wait. + time_in_secs = _wait_until( + wait_until_timestamp, get_wall_clock_time, tag=tag + ) + hdbg.dassert_lte(0, time_in_secs) + # TODO(gp): Consider using part of align_on_time_grid for high-precision clock. + time.sleep(time_in_secs) + # + hprint.log_frame( + _LOG, + "%s: wall_clock_time=%s: done waiting", + tag, + get_wall_clock_time(), + verbosity=log_verbosity, + ) + + +async def async_wait_until( + wait_until_timestamp: pd.Timestamp, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + # TODO(gp): -> msg + tag: Optional[str] = None, +) -> None: + """ + Asynchronous wait until the wall clock time is `timestamp`. + """ + _LOG.debug(hprint.to_str("wait_until_timestamp")) + time_in_secs = _wait_until( + wait_until_timestamp, get_wall_clock_time, tag=tag + ) + # Async wait. + hdbg.dassert_lte(0, time_in_secs) + await asyncio.sleep(time_in_secs) + # + hprint.log_frame( + _LOG, "%s: wall_clock_time=%s: done waiting", tag, get_wall_clock_time() + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py new file mode 100644 index 000000000..e010f5b08 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py @@ -0,0 +1,266 @@ +""" +Import as: + +import helpers.haws as haws +""" + +import logging +from typing import Dict, List, Optional + +import boto3 +import boto3.session +from boto3.resources.base import ServiceResource +from botocore.client import BaseClient + +import helpers.hdbg as hdbg +import helpers.hserver as hserver + +_LOG = logging.getLogger(__name__) + + +# AWS profile is used as a mechanism to differentiate between different AWS accounts. +# See CmampTask12943. +# `test` and `preprod` environments are in the same account using `ck` profile. +# `prod` environment is in the different account using `csfy` profile. +AWS_PROFILE = { + "test": "ck", + "preprod": "ck", + "prod": "csfy", +} + +# ############################################################################# +# Utils +# ############################################################################# + + +def get_session( + aws_profile: str, *, region: Optional[str] = None +) -> boto3.session.Session: + """ + Return connected Boto3 session. + + :param aws_profile: AWS profile name to use for the session. + :param region: AWS region, if None get region from AWS credentials. + :return: Boto3 session object. + """ + hdbg.dassert_isinstance(aws_profile, str) + # When deploying jobs via ECS the container obtains credentials based on + # passed task role specified in the ECS task-definition, refer to: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html + if aws_profile in ["ck", "csfy"] and hserver.is_inside_ecs_container(): + _LOG.info("Fetching credentials from task IAM role") + session = boto3.session.Session() + else: + # We do not need to extract the credential from the file because + # the credential is already set and `boto3` know where to find them. + if region: + session = boto3.Session(profile_name=aws_profile, region_name=region) + else: + session = boto3.Session(profile_name=aws_profile) + return session + + +def get_service_client( + aws_profile: str, service_name: str, *, region: Optional[str] = None +) -> BaseClient: + """ + Return client to work with desired service in the specific region. + + For params look at `get_session()` + """ + session = get_session(aws_profile, region=region) + client = session.client(service_name=service_name) + return client + + +def get_service_resource(aws_profile: str, service_name: str) -> ServiceResource: + """ + Return resource to work with desired service in the specific region. + """ + session = get_session(aws_profile) + resource = session.resource(service_name=service_name) + return resource + + +# ############################################################################# +# ECS +# ############################################################################# + + +# TODO(Toma): Deprecate in favor of `get_service_client`. +def get_ecs_client( + aws_profile: str, *, region: Optional[str] = None +) -> BaseClient: + """ + Return client to work with Elastic Container Service in the specific + region. + + For params look at `get_session()` + """ + session = get_session(aws_profile, region=region) + client = session.client(service_name="ecs") + return client + + +def get_task_definition_image_url( + task_definition_name: str, environment: str, *, region: Optional[str] = None +) -> str: + """ + Get ECS task definition by name and return only image URL. + + :param task_definition_name: The name of the ECS task definition, + e.g., `cmamp-test`. + :param region: AWS region, if None get region from AWS credentials. + :param region: look at `get_session()` + """ + aws_profile = AWS_PROFILE[environment] + service_name = "ecs" + client = get_service_client(aws_profile, service_name, region=region) + # Get the last revision of the task definition. + task_description = client.describe_task_definition( + taskDefinition=task_definition_name + ) + task_definition_json = task_description["taskDefinition"] + image_url = task_definition_json["containerDefinitions"][0]["image"] + return image_url + + +def is_task_definition_exists( + task_definition_name: str, *, region: Optional[str] = None +) -> bool: + """ + Check if a task definition exists in the specified region. + + :param task_definition_name: the name of the ECS task definition + :param region: region of the task definition + :return: whether the task definition exists + """ + client = get_ecs_client("ck", region=region) + try: + client.describe_task_definition(taskDefinition=task_definition_name) + return True + except client.exceptions.ClientError as e: + _LOG.warning( + "Failed to describe task definition '%s': %s", + task_definition_name, + e, + ) + return False + + +# TODO(Nikola): Pass a dict config instead, so any part can be updated. +def update_task_definition( + task_definition_name: str, + new_image_url: str, + *, + region: Optional[str] = None, + environment: str, +) -> None: + """ + Create the new revision of specified ECS task definition. + + If region is different then the default one, it is assumed that ECR + replication is enabled from the default region to the target region. + + :param task_definition_name: The name of the ECS task definition for + which an update to container image URL is made, e.g., `cmamp- + test`. + :param new_image_url: New image URL for task definition. e.g., + `***.dkr.ecr.***/cmamp:prod`. + :param region: AWS region, if None get region from AWS credentials. + """ + aws_profile = AWS_PROFILE[environment] + client = get_ecs_client(aws_profile, region=region) + # Get the last revision of the task definition. + task_description = client.describe_task_definition( + taskDefinition=task_definition_name + ) + task_definition_json = task_description["taskDefinition"] + # Set new image. + old_image_url = task_definition_json["containerDefinitions"][0]["image"] + if old_image_url == new_image_url: + _LOG.info( + "New image url `%s` is already set for task definition `%s`!", + new_image_url, + task_definition_name, + ) + return + task_definition_json["containerDefinitions"][0]["image"] = new_image_url + # Register the new revision with the new image. + response = client.register_task_definition( + family=task_definition_name, + taskRoleArn=task_definition_json.get("taskRoleArn", ""), + executionRoleArn=task_definition_json["executionRoleArn"], + networkMode=task_definition_json["networkMode"], + containerDefinitions=task_definition_json["containerDefinitions"], + volumes=task_definition_json["volumes"], + placementConstraints=task_definition_json["placementConstraints"], + requiresCompatibilities=task_definition_json["requiresCompatibilities"], + cpu=task_definition_json["cpu"], + memory=task_definition_json["memory"], + ) + updated_image_url = response["taskDefinition"]["containerDefinitions"][0][ + "image" + ] + # Check if the image URL is updated. + hdbg.dassert_eq(updated_image_url, new_image_url) + _LOG.info( + "The image URL of `%s` task definition is updated to `%s`", + task_definition_name, + updated_image_url, + ) + + +def list_all_objects( + s3_client: BaseClient, bucket_name: str, prefix: str +) -> List[Dict]: + """ + List all objects in the specified S3 bucket under the given prefix, + handling pagination. + + :param s3_client: Instance of boto3 S3 client. + :param bucket_name: The name of the S3 bucket e.g., `cryptokaizen-data-test`. + :param prefix: Prefix to filter the S3 objects e.g., `binance/historical_bid_ask/`. + :return: A list of dictionaries containing metadata about each object. E.g., + ``` + [ + { + 'Key': 'binance/historical_bid_ask/S_DEPTH/1000BONK_USDT/2023-05-27/data.tar.gz', + 'LastModified': datetime.datetime(2024, 5, 30, 17, 12, 12, tzinfo=tzlocal()), + 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', + 'Size': 0, + 'StorageClass': 'STANDARD' + }, + { + 'Key': 'binance/historical_bid_ask/S_DEPTH/1000BONK_USDT/2023-05-28/data.tar.gz', + 'LastModified': datetime.datetime(2024, 5, 30, 17, 12, 12, tzinfo=tzlocal()), + 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', + 'Size': 0, + 'StorageClass': 'STANDARD' + } + ] + ``` + """ + objects = [] + continuation_token = None + while True: + # If there's a continuation token, include it in the request to fetch + # the next page of results. + if continuation_token: + response = s3_client.list_objects_v2( + Bucket=bucket_name, + Prefix=prefix, + ContinuationToken=continuation_token, + ) + else: + response = s3_client.list_objects_v2( + Bucket=bucket_name, Prefix=prefix + ) + # Extend the objects list with the contents of the current page. + objects.extend(response.get("Contents", [])) + # Check if there are more pages. + if response.get("IsTruncated"): + continuation_token = response.get("NextContinuationToken") + else: + break + return objects diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py new file mode 100644 index 000000000..d72a2f708 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py @@ -0,0 +1,1086 @@ +""" +See `docs/coding/all.hcache.explanation.md` for implementation details. + +Import as: + +import helpers.hcache as hcache +""" + +import atexit +import copy +import functools +import logging +import os +import time +from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast + +import joblib +import joblib.func_inspect as jfunci +import joblib.memory as jmemor + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hintrospection as hintros +import helpers.hlogging as hloggin +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hsystem as hsystem +import helpers.htimer as htimer + +_LOG = hloggin.getLogger(__name__) +# Enable extra verbose debugging. Do not commit. +_TRACE = False + +# ############################################################################# + + +_IS_CACHE_ENABLED: bool = True + + +def enable_caching(val: bool) -> None: + """ + Enable or disable all caching, i.e., global, tagged global, function- + specific. + """ + global _IS_CACHE_ENABLED + if _TRACE: + _LOG.trace("") + _LOG.warning("Setting caching to %s -> %s", _IS_CACHE_ENABLED, val) + _IS_CACHE_ENABLED = val + + +def is_caching_enabled() -> bool: + """ + Check if cache is enabled. + + :return: whether the cache is enabled or not + """ + if _TRACE: + _LOG.trace("") + return _IS_CACHE_ENABLED + + +# Global switch to allow or prevent clearing the cache. +_IS_CLEAR_CACHE_ENABLED: bool = True + + +def enable_clear_cache(val: bool) -> None: + """ + Enable or disable clearing a cache (both global and function-specific). + """ + global _IS_CLEAR_CACHE_ENABLED + if _TRACE: + _LOG.trace("") + _LOG.warning( + "Enabling clear cache to %s -> %s", _IS_CLEAR_CACHE_ENABLED, val + ) + _IS_CLEAR_CACHE_ENABLED = val + + +# ############################################################################# +# Global cache interface +# ############################################################################# + + +def _get_cache_types() -> List[str]: + """ + Return the types (aka levels) of the cache. + """ + return ["mem", "disk"] + + +def _dassert_is_valid_cache_type(cache_type: str) -> None: + """ + Assert that `cache_type` is a valid cache type. + """ + hdbg.dassert_in(cache_type, _get_cache_types()) + + +def _get_global_cache_name(cache_type: str, tag: Optional[str] = None) -> str: + """ + Get the canonical cache name for a type of cache and tag, both global and + function-specific. + + E.g., `tmp.cache.{cache_type}.{tag}` like `tmp.cache.mem.unit_tests` + + :param cache_type: type of a cache + :param tag: optional unique tag of the cache + :return: name of the folder for a cache + """ + _dassert_is_valid_cache_type(cache_type) + cache_name = f"tmp.cache.{cache_type}" + if tag is not None: + cache_name += f".{tag}" + return cache_name + + +def _get_global_cache_path(cache_type: str, tag: Optional[str] = None) -> str: + """ + Get path to the directory storing the cache. + + For a memory cache, the path is in a predefined RAM disk. + For a disk cache, the path is on the file system relative to Git root. + + :return: the file system path to the cache + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + # Get the cache name. + cache_name = _get_global_cache_name(cache_type, tag) + # Get the enclosing directory path. + if cache_type == "mem": + if hsystem.get_os_name() == "Darwin": + root_path = "/tmp" + else: + root_path = "/mnt/tmpfs" + elif cache_type == "disk": + root_path = hgit.get_client_root(super_module=True) + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + # Compute path. + file_name = os.path.join(root_path, cache_name) + file_name = os.path.abspath(file_name) + return file_name + + +def _get_cache_size(path: str, description: str) -> str: + """ + Report information about a cache (global or function) stored at a given + path. + """ + if _TRACE: + _LOG.trace("") + if path is None: + txt = f"'{description}' cache: path='{path}' doesn't exist yet" + else: + if os.path.exists(path): + size_in_bytes = hsystem.du(path) + if isinstance(size_in_bytes, str): + size_as_str = size_in_bytes + else: + size_as_str = hintros.format_size(size_in_bytes) + else: + size_as_str = "nan" + # TODO(gp): Compute number of files. + txt = f"'{description}' cache: path='{path}', size={size_as_str}" + return txt + + +def get_global_cache_info( + tag: Optional[str] = None, add_banner: bool = False +) -> str: + """ + Report information on global cache. + """ + if _TRACE: + _LOG.trace("") + txt = [] + if add_banner: + txt.append(hprint.frame("get_global_cache_info()", char1="<")) + txt.append(f"is global cache enabled={is_caching_enabled()}") + # + cache_types = _get_cache_types() + txt.append(f"cache_types={str(cache_types)}") + for cache_type in cache_types: + path = _get_global_cache_path(cache_type, tag=tag) + description = f"global {cache_type}" + cache_info = _get_cache_size(path, description) + txt.append(cache_info) + txt = "\n".join(txt) + return txt + + +# This is the global memory cache. +_MEMORY_CACHE: Optional[joblib.Memory] = None + + +# This is the global disk cache. +_DISK_CACHE: Optional[joblib.Memory] = None + + +def _create_global_cache_backend( + cache_type: str, tag: Optional[str] = None +) -> joblib.Memory: + """ + Create a Joblib memory object storing a cache. + + :return: cache backend object + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + dir_name = _get_global_cache_path(cache_type, tag) + _LOG.debug( + "Creating cache for cache_type='%s' and tag='%s' at '%s'", + cache_type, + tag, + dir_name, + ) + cache_backend = joblib.Memory(dir_name, verbose=0, compress=True) + return cache_backend + + +# TODO(gp): -> _get_global_cache +def get_global_cache( + cache_type: str, tag: Optional[str] = None +) -> joblib.Memory: + """ + Get global cache by cache type. + + :return: caching backend + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + global _MEMORY_CACHE + global _DISK_CACHE + if tag is None: + if cache_type == "mem": + # Create global memory cache if it doesn't exist. + if _MEMORY_CACHE is None: + _MEMORY_CACHE = _create_global_cache_backend(cache_type) + global_cache = _MEMORY_CACHE + elif cache_type == "disk": + # Create global disk cache if it doesn't exist. + if _DISK_CACHE is None: + _DISK_CACHE = _create_global_cache_backend(cache_type) + global_cache = _DISK_CACHE + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + else: + # Build a one-off cache using tag. + global_cache = _create_global_cache_backend(cache_type, tag) + return global_cache + + +def set_global_cache(cache_type: str, cache_backend: joblib.Memory) -> None: + """ + Set global cache by cache type. + + :param cache_type: type of a cache + :param cache_backend: caching backend + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + global _MEMORY_CACHE + global _DISK_CACHE + if cache_type == "mem": + _MEMORY_CACHE = cache_backend + elif cache_type == "disk": + _DISK_CACHE = cache_backend + + +def clear_global_cache( + cache_type: str, tag: Optional[str] = None, destroy: bool = False +) -> None: + """ + Reset the global cache by cache type. + + :param cache_type: type of a cache. `None` to clear all the caches. + :param tag: optional unique tag of the cache, empty by default + :param destroy: remove physical directory + """ + if _TRACE: + _LOG.trace("") + if cache_type == "all": + for cache_type_tmp in _get_cache_types(): + clear_global_cache(cache_type_tmp, tag=tag, destroy=destroy) + return + _dassert_is_valid_cache_type(cache_type) + # Clear and / or destroy the cache `cache_type` with the given `tag`. + cache_path = _get_global_cache_path(cache_type, tag) + if not _IS_CLEAR_CACHE_ENABLED: + hdbg.dfatal(f"Trying to delete cache '{cache_path}'") + description = f"global {cache_type}" + try: + # TODO(ShaopengZ): in some test run outside CK infra, the + # _get_cache_size() hangs. + info_before = _get_cache_size(cache_path, description) + except ValueError: + _LOG.warning("Cache has already been deleted by another process.") + return + _LOG.info("Before clear_global_cache: %s", info_before) + _LOG.warning("Resetting 'global %s' cache '%s'", cache_type, cache_path) + if hs3.is_s3_path(cache_path): + # For now we only allow to delete caches under the unit test path. + _, abs_path = hs3.split_path(cache_path) + hdbg.dassert( + abs_path.startswith("/tmp/cache.unit_test/"), + "The path '%s' is not valid", + abs_path, + ) + if destroy: + _LOG.warning("Destroying '%s' ...", cache_path) + hio.delete_dir(cache_path) + else: + cache_backend = get_global_cache(cache_type, tag) + try: + cache_backend.clear(warn=True) + except FileNotFoundError as e: + # A race condition can cause: + # FileNotFoundError: [Errno 2] No such file or directory: '/app/tmp.cache.disk/joblib' + _LOG.error("Caught %s: continuing", str(e)) + # Report stats before and after. + try: + info_after = _get_cache_size(cache_path, description) + except ValueError: + _LOG.warning("Cache has already been deleted by another process.") + return + _LOG.info("After clear_global_cache: %s", info_after) + + +# ############################################################################# +# CachedValueException +# ############################################################################# + + +class CachedValueException(RuntimeError): + """ + A cached function is run for a value present in the cache. + + This exception is thrown when the `check_only_if_present` mode is + used. + """ + + +# ############################################################################# +# NotCachedValueException +# ############################################################################# + + +class NotCachedValueException(RuntimeError): + """ + A cached function is run for a value not present in the cache. + + This exception is thrown when the `enable_read_only` mode is used. + """ + + +# ############################################################################# +# _Cached +# ############################################################################# + + +class _Cached: + # pylint: disable=protected-access + """ + Implement a cache in memory and disk for a function. + + If the function value was not cached either in memory or on disk, the function + `f()` is executed and the value is stored in both caches for future calls. + + This class uses 2 levels of caching: + - memory cache: useful for caching across multiple executions of a function in + a process or in notebooks without resetting the state + - disk cache: useful for retrieving the state among different executions of a + process or when a notebook is reset + """ + + def _create_function_memory_cache(self) -> joblib.Memory: + """ + Initialize Joblib object storing a memory cache for this function. + """ + if _TRACE: + _LOG.trace("") + _LOG.debug("Create memory cache") + # For memory always use the global cache. + cache_type = "mem" + memory_cache = get_global_cache(cache_type, self._tag) + # Get the Joblib object corresponding to the cached function. + return memory_cache.cache(self._func) + + def _create_function_disk_cache( + self, + ) -> Tuple[joblib.Memory, joblib.memory.MemorizedFunc]: + """ + Initialize Joblib object storing a disk cache for this function. + """ + if _TRACE: + _LOG.trace("") + if self.has_function_cache(): + hdbg.dassert( + not self._use_mem_cache, + "When using function cache the memory cache needs to be disabled", + ) + # Create a function-specific cache. + memory_kwargs: Dict[str, Any] = { + "verbose": 0, + "compress": True, + } + if hs3.is_s3_path(self._disk_cache_path): + import helpers.hjoblib as hjoblib + + # Register the S3 backend. + hjoblib.register_s3fs_store_backend() + s3fs = hs3.get_s3fs(self._aws_profile) + bucket, path = hs3.split_path(self._disk_cache_path) + # Remove the initial `/` from the path that makes the path + # absolute, since `Joblib.Memory` wants a path relative to the + # bucket. + hdbg.dassert( + path.startswith("/"), + "The path should be absolute instead of %s", + path, + ) + path = path[1:] + memory_kwargs.update( + { + "backend": "s3", + "backend_options": {"s3fs": s3fs, "bucket": bucket}, + } + ) + else: + path = self._disk_cache_path + _LOG.debug("path='%s'\nmemory_kwargs=\n%s", path, str(memory_kwargs)) + disk_cache = joblib.Memory(path, **memory_kwargs) + else: + # Use the global cache. + cache_type = "disk" + disk_cache = get_global_cache(cache_type, self._tag) + # Get the Joblib object corresponding to the cached function. + disk_cached_func = disk_cache.cache(self._func) + return disk_cache, disk_cached_func + # + + # /////////////////////////////////////////////////////////////////////////// + + def _reset_cache_tracing(self) -> None: + """ + Reset the values used to track which cache we are hitting when + executing the cached function. + """ + if _TRACE: + _LOG.trace("") + # The reset values depend on which caches are enabled. + self._last_used_disk_cache = self._use_disk_cache + self._last_used_mem_cache = self._use_mem_cache + + # TODO(gp): Either allow users to initialize `mem_cache_path` here or with + # `set_function_cache_path()` but not both code paths. It's unclear which option + # is better. On the one side `set_function_cache_path()` is more explicit, but + # it can't be changed. On the other side the wrapper needs to be initialized in + # one shot. + def __init__( + self, + func: Callable, + *, + use_mem_cache: bool = True, + use_disk_cache: bool = True, + verbose: bool = False, + tag: Optional[str] = None, + disk_cache_path: Optional[str] = None, + aws_profile: Optional[str] = "am", + ): + """ + Construct the class. + + :param func: function to cache + :param use_mem_cache, use_disk_cache: whether we allow memory and disk caching + :param verbose: print high-level information about the cache + behavior, e.g., + - whether a function was cached or not + - from which level the data was retrieved + - the execution time + - the amount of data retrieved + :param tag: a tag added to the global cache path to make it specific (e.g., + when running unit tests we want to use a different cache) + :param disk_cache_path: path of the function-specific cache + :param aws_profile: the AWS profile to use in case of S3 backend + """ + # Make the class have the same attributes (e.g., `__name__`, `__doc__`, + # `__dict__`) as the called function. + functools.update_wrapper(self, func) + if _TRACE: + _LOG.trace("") + # Save interface parameters. + hdbg.dassert_callable(func) + self._func = func + # TODO(gp): We should use memory cache only inside Jupyter notebooks. + self._use_mem_cache = use_mem_cache + self._use_disk_cache = use_disk_cache + self._is_verbose = verbose + self._tag = tag + self._disk_cache_path = disk_cache_path + self._aws_profile = aws_profile + # + self._reset_cache_tracing() + # Create the memory and disk cache objects for this function. + # TODO(gp): We might simplify the code by using a dict instead of 2 variables. + # Store the Joblib memory cache object for this function. + self._memory_cached_func = self._create_function_memory_cache() + # Store the Joblib memory object and the Joblib memory cache object for + # this function. + ( + self._disk_cache, + self._disk_cached_func, + ) = self._create_function_disk_cache() + # Enable a mode where an exception `NotCachedValueException` is thrown if + # the value is not in the cache. + self._enable_read_only = False + # Enable a mode where an exception `NotCachedValueException` is thrown if + # the value is in the cache, instead of accessing the value. + self._check_only_if_present = False + + def get_function_cache_info(self, add_banner: bool = False) -> str: + """ + Return info about the caching properties for this function. + """ + if _TRACE: + _LOG.trace("") + txt = [] + if add_banner: + txt.append(hprint.frame("get_global_cache_info()", char1="<")) + has_func_cache = self.has_function_cache() + txt.append(f"has function-specific cache={has_func_cache}") + if has_func_cache: + # Function-specific cache: print the paths of the local cache. + cache_type = "disk" + txt.append(f"local {cache_type} cache path={self._disk_cache_path}") + txt = "\n".join(txt) + return txt + + def get_last_cache_accessed(self) -> str: + """ + Get the cache used in the latest call of the wrapped function. + + :return: type of cache used in the last call + """ + if _TRACE: + _LOG.trace("") + if self._last_used_mem_cache: + ret = "mem" + elif self._last_used_disk_cache: + # If the disk cache was used, then the memory cache should not been used. + hdbg.dassert(not self._last_used_mem_cache) + ret = "disk" + else: + ret = "no_cache" + return ret + + def enable_read_only(self, val: bool) -> None: + """ + If set to True, the cached function can only read from the cache but + not execute for new values. + + Otherwise a `NotCachedValueException` is thrown. + """ + if _TRACE: + _LOG.trace("") + _LOG.warning( + "Setting enable_read_only to %s -> %s", self._enable_read_only, val + ) + self._enable_read_only = val + + def enable_check_only_if_present(self, val: bool) -> None: + """ + If set to True, the cached function a `CachedValueException` is thrown + if a function invocation was cached, instead of executing it. + + This can be used to check if a value was already cached without + triggering retrieving the value from the cache, e.g., when + probing the content of the cache. + """ + _LOG.warning( + "Setting check_only_if_present to %s -> %s", + self._check_only_if_present, + val, + ) + self._check_only_if_present = val + + def _get_memorized_result(self, cache_type: str) -> joblib.MemorizedResult: + """ + Get the instance of a cache by type. + + From https://github.com/joblib/joblib/blob/master/joblib/memory.py + A `MemorizedResult` is an object representing a cached value + + :param cache_type: type of a cache + :return: instance of the Joblib cache + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + if cache_type == "mem": + memorized_result = self._memory_cached_func + elif cache_type == "disk": + memorized_result = self._disk_cached_func + _LOG.debug("memorized_result=%s", memorized_result) + return memorized_result + + def _get_function_specific_code_path(self) -> str: + if _TRACE: + _LOG.trace("") + # Get the store backend. + cache_type = "disk" + memorized_result = self._get_memorized_result(cache_type) + store_backend = memorized_result.store_backend + # Get the function id (which is the full path). + func_id = jmemor._build_func_identifier(self._func) + # Assemble the path. + func_path = os.path.join(store_backend.location, func_id, "func_code.py") + _LOG.debug("func_path='%s'", func_path) + hdbg.dassert( + store_backend._item_exists(func_path), "Can't find '%s'", func_path + ) + return func_path + + def update_func_code_without_invalidating_cache(self) -> None: + """ + Update the Python code stored in the cache. + + This is used when we make changes to the cached function but we don't want + to invalidate the cache. + + NOTE: here the caller must guarantee that the new function yields exactly + the same results than the previous ones. Use carefully. + """ + if _TRACE: + _LOG.trace("") + hdbg.dassert( + self.has_function_cache(), + "This is used only for function-specific caches", + ) + # From `store_cached_func_code` in + # https://github.com/joblib/joblib/tree/master/joblib/_store_backends.py + func_path = self._get_function_specific_code_path() + # Archive old code. + new_func_path = ( + func_path + "." + hdateti.get_current_timestamp_as_string(tz="ET") + ) + _LOG.debug("new_func_path='%s'", new_func_path) + # Get the store backend. + cache_type = "disk" + memorized_result = self._get_memorized_result(cache_type) + store_backend = memorized_result.store_backend + hdbg.dassert( + not store_backend._item_exists(new_func_path), + "'%s' already exists", + new_func_path, + ) + store_backend._move_item(func_path, new_func_path) + # Write out function code to the cache. + func_code, _, first_line = jfunci.get_func_code(memorized_result.func) + memorized_result._write_func_code(func_code, first_line) + _LOG.debug("Updated func_path='%s'", func_path) + + # /////////////////////////////////////////////////////////////////////////// + # Function-specific cache. + # /////////////////////////////////////////////////////////////////////////// + + def has_function_cache(self) -> bool: + """ + Return whether this function has a function-specific cache or uses the + global cache. + """ + if _TRACE: + _LOG.trace("") + has_func_cache = self._disk_cache_path is not None + return has_func_cache + + # TODO(gp): Can we reuse the same code for `clear_function_cache` as above? + def clear_function_cache(self, destroy: bool = False) -> None: + """ + Clear a function-specific cache. + """ + if _TRACE: + _LOG.trace("") + hdbg.dassert( + self.has_function_cache(), + "This function has no function-specific cache", + ) + # Get the path for the disk cache. + cache_path = self._disk_cache_path + hdbg.dassert_is_not(cache_path, None) + cache_path = cast(str, cache_path) + if not _IS_CLEAR_CACHE_ENABLED: + hdbg.dfatal(f"Trying to delete function cache '{cache_path}'") + # Collect info before. + cache_type = "disk" + description = f"function {cache_type}" + info_before = _get_cache_size(cache_path, description) + _LOG.info("Before clear_function_cache: %s", info_before) + # Clear / destroy the cache. + _LOG.warning( + "Resetting '%s' cache for function '%s' in dir '%s'", + cache_type, + self._func.__name__, + cache_path, + ) + if hs3.is_s3_path(cache_path): + # For now we only allow to delete caches under the unit test path. + _, abs_path = hs3.split_path(cache_path) + hdbg.dassert( + abs_path.startswith("/tmp/"), + "The path '%s' is not valid", + abs_path, + ) + if destroy: + _LOG.warning("Destroying '%s' ...", cache_path) + hio.delete_dir(cache_path) + else: + self._disk_cache.clear() + # Print stats. + info_after = _get_cache_size(cache_path, description) + _LOG.info("After clear_function_cache: %s", info_after) + + def set_function_cache_path(self, cache_path: Optional[str]) -> None: + """ + Set the path for the function-specific cache for a cache type. + + :param cache_path: cache directory or `None` to use global cache + """ + if _TRACE: + _LOG.trace("") + if cache_path: + hdbg.dassert_dir_exists(cache_path) + # We need to disable the memory cache. + if cache_path: + self._use_mem_cache = False + else: + self._use_mem_cache = True + self._disk_cache_path = cache_path + ( + self._disk_cache, + self._disk_cached_func, + ) = self._create_function_disk_cache() + + # /////////////////////////////////////////////////////////////////////////// + + # TODO(gp): We should use the actual stored dir. + def _get_cache_dir(self, cache_type: str, tag: Optional[str]) -> str: + """ + Return the dir of the cache corresponding to `cache_type` and `tag`. + """ + if _TRACE: + _LOG.trace("") + if cache_type == "no_cache": + return "no_cache" + if self.has_function_cache(): + hdbg.dassert_eq(cache_type, "disk") + ret = self._disk_cache_path + else: + ret = _get_global_cache_path(cache_type, tag=tag) + ret = cast(str, ret) + return ret + + def _get_identifiers( + self, cache_type: str, *args: Any, **kwargs: Any + ) -> Tuple[str, str]: + """ + Get digests for current function and arguments to be used in cache. + + :param cache_type: type of a cache + :param args: original arguments of the call + :param kwargs: original kw-arguments of the call + :return: digests of the function and current arguments + """ + memorized_result = self._get_memorized_result(cache_type) + _LOG.debug("memorized_result=%s", memorized_result) + hdbg.dassert_is_not( + memorized_result, + None, + "Cache backend not initialized for %s", + cache_type, + ) + # This is needed for joblib >= 1.4.2. + func_id = memorized_result.func_id + args_id = memorized_result._get_args_id(*args, **kwargs) + _LOG.debug("func_id=%s args_id=%s", func_id, args_id) + return func_id, args_id + + def _has_cached_version( + self, cache_type: str, func_id: str, args_id: str + ) -> bool: + """ + Check if a cache contains an entry for a corresponding function and + arguments digests, and that function source has not changed. + + :param cache_type: type of a cache + :param func_id: digest of the function obtained from _get_identifiers + :param args_id: digest of arguments obtained from _get_identifiers + :return: whether there is an entry in a cache + """ + if _TRACE: + _LOG.trace("") + memorized_result = self._get_memorized_result(cache_type) + has_cached_version = memorized_result.store_backend.contains_item( + [func_id, args_id] + ) + _LOG.debug("has_cached_version=%s", has_cached_version) + if has_cached_version: + # We must check that the source of the function is the same, otherwise, + # cache tracing will not be correct. + # First, try faster check via joblib hash. + if self._func in jmemor._FUNCTION_HASHES: + func_hash = memorized_result._hash_func() + if func_hash == jmemor._FUNCTION_HASHES[self._func]: + return True + # Otherwise, check the the source of the function is still the same. + func_code, _, _ = jmemor.get_func_code(self._func) + old_func_code_cache = ( + memorized_result.store_backend.get_cached_func_code([func_id]) + ) + old_func_code, _ = jmemor.extract_first_line(old_func_code_cache) + if func_code == old_func_code: + return True + return False + + def _store_cached_version( + self, cache_type: str, func_id: str, args_id: str, obj: Any + ) -> None: + """ + Store returned value from the intrinsic function in the cache. + + :param cache_type: type of a cache + :param func_id: digest of the function obtained from `_get_identifiers()` + :param args_id: digest of arguments obtained from `_get_identifiers()` + :param obj: return value of the intrinsic function + """ + if _TRACE: + _LOG.trace("") + # This corresponds to + # /venv/lib/python3.8/site-packages/joblib/memory.py + # __call__ + if self._enable_read_only: + raise NotCachedValueException + memorized_result = self._get_memorized_result(cache_type) + # Write out function code to the cache. + func_code, _, first_line = jfunci.get_func_code(memorized_result.func) + memorized_result._write_func_code(func_code, first_line) + # Store the returned value into the cache. + memorized_result.store_backend.dump_item([func_id, args_id], obj) + + def _execute_func_from_disk_cache(self, *args: Any, **kwargs: Any) -> Any: + if _TRACE: + _LOG.trace("") + func_info = ( + f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" + ) + # Get the function signature. + func_id, args_id = self._get_identifiers("disk", *args, **kwargs) + if self._has_cached_version("disk", func_id, args_id): + _LOG.debug("There is a disk cached version") + with htimer.TimedScope( + logging.INFO, "Loading cached version from disk" + ): + obj = self._disk_cached_func(*args, **kwargs) + if self._check_only_if_present: + raise CachedValueException(func_info) + else: + # INV: we didn't hit neither memory nor the disk cache. + self._last_used_disk_cache = False + # + _LOG.debug( + "%s: execute the intrinsic function", + func_info, + ) + # If the cache was read-only, then assert. + if self._enable_read_only: + msg = f"{func_info}: trying to execute" + raise NotCachedValueException(msg) + with htimer.TimedScope( + logging.INFO, "Updating cached version on disk" + ): + obj = self._disk_cached_func(*args, **kwargs) + # obj = self._execute_intrinsic_function(*args, **kwargs) + # The function was not cached in disk, so now we need to update the + # memory cache. + # self._store_cached_version("disk", func_id, args_id, obj) + return obj + + def _execute_intrinsic_function(self, *args: Any, **kwargs: Any) -> Any: + if _TRACE: + _LOG.trace("") + with htimer.TimedScope(logging.INFO, "Executing intrinsic function"): + func_info = ( + f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" + ) + _LOG.debug("%s: execute intrinsic function", func_info) + if self._enable_read_only: + msg = f"{func_info}: trying to execute" + raise NotCachedValueException(msg) + obj = self._func(*args, **kwargs) + return obj + + def _execute_func_from_mem_cache(self, *args: Any, **kwargs: Any) -> Any: + """ + Execute the function from memory cache and if not possible try the + lower cache levels. + """ + if _TRACE: + _LOG.trace("") + func_info = ( + f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" + ) + # Get the function signature. + func_id, args_id = self._get_identifiers("mem", *args, **kwargs) + if self._has_cached_version("mem", func_id, args_id): + _LOG.debug("There is a mem cached version") + if self._check_only_if_present: + raise CachedValueException(func_info) + # The function execution was cached in the mem cache. + with htimer.TimedScope( + logging.INFO, "Loading cached version from memory" + ): + obj = self._memory_cached_func(*args, **kwargs) + else: + # INV: we know that we didn't hit the memory cache, but we don't know + # about the disk cache. + _LOG.debug("There is not a mem cached version") + self._last_used_mem_cache = False + # + if self._use_disk_cache: + # Try the disk cache. + _LOG.debug( + "Trying to retrieve from disk", + ) + obj = self._execute_func_from_disk_cache(*args, **kwargs) + else: + _LOG.warning("Skipping disk cache") + obj = self._execute_intrinsic_function(*args, **kwargs) + # The function was not cached in memory, so now we need to update the + # memory cache. + self._store_cached_version("mem", func_id, args_id, obj) + return obj + + def _execute_func(self, *args: Any, **kwargs: Any) -> Any: + if _TRACE: + _LOG.trace("") + func_info = ( + f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" + ) + _LOG.debug( + "%s: use_mem_cache=%s use_disk_cache=%s", + func_info, + self._use_mem_cache, + self._use_disk_cache, + ) + if self._use_mem_cache: + _LOG.debug("Trying to retrieve from memory") + obj = self._execute_func_from_mem_cache(*args, **kwargs) + else: + if self.has_function_cache(): + # For function-specific cache, skipping the memory cache is the + # normal behavior. + _LOG.debug( + "Function has function-specific cache: skipping memory cache" + ) + else: + _LOG.warning("Skipping memory cache") + self._last_used_mem_cache = False + if self._use_disk_cache: + obj = self._execute_func_from_disk_cache(*args, **kwargs) + else: + _LOG.warning("Skipping disk cache") + self._last_used_disk_cache = False + obj = self._execute_intrinsic_function(*args, **kwargs) + return obj + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + """ + Execute the wrapped function using the caches, if needed. + + :return: object returned by the wrapped function + """ + if _TRACE: + _LOG.trace("") + perf_counter_start: float + if self._is_verbose: + perf_counter_start = time.perf_counter() + # Execute the cached function. + if not is_caching_enabled(): + # No caching is allowed: execute the function. + _LOG.warning("All caching is disabled") + self._last_used_disk_cache = self._last_used_mem_cache = False + obj = self._func(*args, **kwargs) + else: + # Caching is allowed. + self._reset_cache_tracing() + obj = self._execute_func(*args, **kwargs) + _LOG.debug( + "%s: executed from '%s'", + self._func.__name__, + self.get_last_cache_accessed(), + ) + # TODO(gp): Not sure making a deep copy is a good idea. In the end, + # the client should not modify a cached value. + obj = copy.deepcopy(obj) + # Print caching info. + if self._is_verbose: + # Get time. + elapsed_time = time.perf_counter() - perf_counter_start + # Get memory. + # TODO(gp): This is very slow. + # obj_size = hintros.get_size_in_bytes(obj) + # obj_size_as_str = hintros.format_size(obj_size) + obj_size_as_str = "nan" + last_cache = self.get_last_cache_accessed() + cache_dir = self._get_cache_dir(last_cache, self._tag) + _LOG.info( + " --> Cache data for '%s' from '%s' cache " + "(size=%s, time=%.2f s, tag=%s, loc=%s)", + self._func.__name__, + last_cache, + obj_size_as_str, + elapsed_time, + self._tag, + cache_dir, + ) + return obj + + +# ############################################################################# +# Decorator +# ############################################################################# + + +def cache( + use_mem_cache: bool = True, + use_disk_cache: bool = True, + set_verbose_mode: bool = False, + tag: Optional[str] = None, + disk_cache_path: Optional[str] = None, + aws_profile: Optional[str] = None, +) -> Union[Callable, _Cached]: + """ + Decorate a function with a cache. + + The parameters are the same as `hcache._Cached`. + + Usage examples: + ``` + import helpers.hcache as hcache + + @hcache.cache() + def add(x: int, y: int) -> int: + return x + y + + @hcache.cache(use_mem_cache=False) + def add(x: int, y: int) -> int: + return x + y + ``` + """ + + def wrapper(func: Callable) -> _Cached: + return _Cached( + func, + use_mem_cache=use_mem_cache, + use_disk_cache=use_disk_cache, + verbose=set_verbose_mode, + tag=tag, + disk_cache_path=disk_cache_path, + aws_profile=aws_profile, + ) + + return wrapper + + +# ############################################################################# + +# Clean up the memory cache on-exit. +# TODO(gp): Add another function and make it silent. +atexit.register(clear_global_cache, cache_type="mem", destroy="true") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py new file mode 100644 index 000000000..afdf5438c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py @@ -0,0 +1,1188 @@ +""" +Detailed documentation at. + +- //helpers/docs/tools/helpers/all.hcache_simple.explanation.md +- //helpers/notebooks/hcache_simple.tutorial.ipynb + +Import as: + +import helpers.hcache_simple as hcacsimp +""" + +import functools +import glob +import json +import logging +import os +import pickle +import re +from typing import Any, Callable, Dict, List, Optional, Union, cast + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# Disable tracing for production code. +_LOG.trace = lambda *args, **kwargs: None +# _LOG.trace = _LOG.debug + +# ############################################################################# +# Memory cache. +# ############################################################################# + +# Type for the cache of a single function: key -> value properties. E.g., +# ``` +# { +# "{\"args\": [4], \"kwargs\": {}}": 16 +# } +# ``` +_FunctionCacheType = Dict[str, Any] + +# Basic type for caching data: func_name -> key -> value properties. E.g., +# ``` +# { +# "slow_square": { +# "{\"args\": [4], \"kwargs\": {}}": 16 +# } +# } +# ``` +_CacheType = Dict[str, _FunctionCacheType] + +# Create global variable for the memory cache. +if "_CACHE" not in globals(): + _LOG.trace("Creating _CACHE") + _CACHE: _CacheType = {} + +# Process-wide default `cache_mode` applied to every `@simple_cache` function +# when no explicit `cache_mode` is passed at the call site. Used by CLI scripts +# to flip all cached functions into refresh/disable/hit-or-abort mode from a +# single switch (see `hparser.add_cache_control_arg`). +_VALID_CACHE_MODES = ("REFRESH_CACHE", "DISABLE_CACHE", "HIT_CACHE_OR_ABORT") +_GLOBAL_CACHE_MODE: Optional[str] = None + + +def set_global_cache_mode(mode: Optional[str]) -> None: + """ + Set the process-wide default `cache_mode`. + + :param mode: one of `REFRESH_CACHE`, `DISABLE_CACHE`, + `HIT_CACHE_OR_ABORT`, or `None` to clear + """ + global _GLOBAL_CACHE_MODE + if mode is not None: + hdbg.dassert_in(mode, _VALID_CACHE_MODES) + _GLOBAL_CACHE_MODE = mode + + +def get_global_cache_mode() -> Optional[str]: + """ + Return the process-wide default `cache_mode`, or `None` if unset. + """ + return _GLOBAL_CACHE_MODE + + +# When enabled, every `@simple_cache` call emits a WARNING describing whether +# the result came from the cache, was computed on miss, or was recomputed +# because of an active `cache_mode`. +_CACHE_DEBUG: bool = False + + +def set_cache_debug(enabled: bool) -> None: + """ + Enable or disable process-wide cache-decision logging at WARNING level. + """ + global _CACHE_DEBUG + hdbg.dassert_isinstance(enabled, bool) + _CACHE_DEBUG = enabled + + +def get_cache_debug() -> bool: + """ + Return True if cache-decision logging is enabled. + """ + return _CACHE_DEBUG + + +def sanity_check_function_cache( + func_cache_data: _FunctionCacheType, *, assert_on_empty: bool = True +) -> None: + """ + Sanity check the function cache data. + + :param func_cache_data: The function cache data to check. + :param assert_on_empty: If True, assert that the function cache data + is not empty. + """ + hdbg.dassert_isinstance(func_cache_data, dict) + if assert_on_empty: + hdbg.dassert_ne(len(func_cache_data), 0, "Function data is empty") + for cache_key, cached_value in func_cache_data.items(): + hdbg.dassert_isinstance(cache_key, str) + hdbg.dassert_ne(cache_key, "", "Cache key is empty") + # cached_value can be any type, so no type check needed. + _ = cached_value + + +def sanity_check_cache( + cache_data: _CacheType, *, assert_on_empty: bool = True +) -> None: + """ + Sanity check the cache data. + + :param cache_data: The cache data to check. + :param assert_on_empty: If True, assert that the cache data is not + empty. + """ + hdbg.dassert_isinstance(cache_data, dict) + if assert_on_empty: + hdbg.dassert_ne(len(cache_data), 0, "Cache data is empty") + for func_name, func_cache_data in cache_data.items(): + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_ne(func_name, "", "Function name is empty") + sanity_check_function_cache( + func_cache_data, assert_on_empty=assert_on_empty + ) + + +def cache_data_to_str(cache_data: _CacheType) -> str: + """ + Convert cache data to a human-readable string. + + :param cache_data: The cache data to convert. + :return: A string representation of the cache data. + """ + txt = [] + txt.append(hprint.frame("Cache data")) + hdbg.dassert_isinstance(cache_data, dict) + for func_name, func_data in cache_data.items(): + txt.append(f"# func_name={func_name}") + hdbg.dassert_isinstance(func_data, dict) + for cache_key, cached_value in func_data.items(): + txt.append(f" cache_key={cache_key} cached_value={cached_value}") + result = "\n".join(txt) + return result + + +# ############################################################################# +# Cache properties. +# ############################################################################# + +# There are several ways to control caching behavior: +# - By passing keyword arguments to the decorated function. +# - E.g., `type_` +# - By using a special keyword argument (`force_refresh`, `abort_on_cache_miss`, +# `report_on_cache_miss`) cache_mode`) when calling the decorated function. +# - By setting cache properties +# - E.g., set_cache_property("func_name", "force_refresh", True) + +# - There are two types of properties: +# - `User Properties`: Configurable by the user to alter caching behavior. +# E.g., +# - `abort_on_cache_miss`: Whether to raise an error if a cache miss occurs +# - `report_on_cache_miss`: Whether to return a special value ("_cache_miss_") +# on a cache miss +# - `enable_perf`: Whether to enable performance statistics tracking (hits, +# misses, total calls) +# - `force_refresh`: Whether to bypass the cache and refresh the value +# - `System Properties`: +# - cache type (e.g., "json" or "pickle") +# - write through (e.g., True or False) +# - exclude keys (e.g., ["password", "api_key"]) + +_SYSTEM_PROPERTIES = ["type", "write_through", "exclude_keys"] + + +def get_main_cache_dir() -> str: + """ + Get the main cache directory (git root). + + :return: The absolute path to the main cache directory. + """ + git_dir = hgit.find_git_root() + cache_dir = os.path.abspath(git_dir) + return cache_dir + + +# Create global variable for the cache directory. +if "_CACHE_DIR" not in globals(): + _LOG.trace("Creating _CACHE_DIR") + _CACHE_DIR = get_main_cache_dir() + + +def set_cache_dir(cache_dir: str) -> None: + """ + Set the cache directory. + """ + global _CACHE_DIR + hdbg.dassert_isinstance(cache_dir, str) + _CACHE_DIR = os.path.abspath(cache_dir) + hio.create_dir(_CACHE_DIR, incremental=True) + _LOG.trace("Setting _CACHE_DIR to %s", _CACHE_DIR) + + +def get_cache_dir() -> str: + """ + Get the cache directory. + """ + return _CACHE_DIR + + +# Create global variable for the cache file prefix. +if "_CACHE_FILE_PREFIX" not in globals(): + _LOG.trace("Creating _CACHE_FILE_PREFIX") + _CACHE_FILE_PREFIX = "tmp.cache_simple" + + +def set_cache_file_prefix(prefix: str) -> None: + """ + Set the cache file prefix. + + :param prefix: prefix to use for cache files + """ + global _CACHE_FILE_PREFIX + hdbg.dassert_isinstance(prefix, str) + hdbg.dassert_ne(prefix, "", "Cache file prefix cannot be empty") + if prefix.endswith("."): + _LOG.warning( + "Prefix '%s' ends with '.' - cache files will have '..' in names", + prefix, + ) + _CACHE_FILE_PREFIX = prefix + _LOG.trace("Setting _CACHE_FILE_PREFIX to %s", _CACHE_FILE_PREFIX) + + +def get_cache_file_prefix() -> str: + """ + Get the cache file prefix. + + :return: cache file prefix + """ + return _CACHE_FILE_PREFIX + + +def get_cache_property_file() -> str: + """ + Get the cache property file name. + + :return: The cache property file name. + """ + prefix = get_cache_file_prefix() + val = os.path.join(get_cache_dir(), f"{prefix}_property.pkl") + return val + + +def _get_initial_cache_property() -> _CacheType: + """ + Get the initial cache property from disk or create an empty one. + + :return: A dictionary containing cache properties. + """ + file_name_ = get_cache_property_file() + if os.path.exists(file_name_): + _LOG.trace("Loading from %s", file_name_) + # TODO(gp): Use _load_data_from_file, if possible. + with open(file_name_, "rb") as file: + val = pickle.load(file) + else: + # func_name -> key -> value properties. + val = {} + val = cast(_CacheType, val) + return val + + +# Create global variables for the cache properties. +if "_CACHE_PROPERTY" not in globals(): + _LOG.trace("Creating _CACHE_PROPERTY") + _CACHE_PROPERTY = _get_initial_cache_property() + + +def _check_valid_cache_property(property_name: str) -> None: + """ + Verify that a cache property name is valid for the given type. + + :param property_name: The property name to validate. + """ + _LOG.trace(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(property_name, str) + valid_properties = [ + # Abort if there is a cache miss. This is used to make sure everything + # is cached. + "abort_on_cache_miss", + # Report if there is a cache miss and return `_cache_miss_` instead of + # accessing the real value. + "report_on_cache_miss", + # Enable performance stats (e.g., miss, hit, tot for the cache). + "enable_perf", + # Force to refresh the value. + "force_refresh", + # TODO(gp): "force_refresh_once" + # json or pickle cache type. + "type", + # cache mode. + "mode", + ] + hdbg.dassert_in(property_name, valid_properties) + + +def _save_func_cache_data_to_file( + file_name: str, + cache_type: Optional[str], + func_cache_data: _FunctionCacheType, +) -> None: + """ + Save the function cache data to a file. + + :param file_name: The name of the file. + :param func_cache_data: The function cache data to save. + """ + # Infer cache type from file extension if not set. + if cache_type is None: + if file_name.endswith(".pkl"): + cache_type = "pickle" + else: + cache_type = "json" + hio.create_enclosing_dir(file_name, incremental=True) + _LOG.trace("Saving to '%s'", file_name) + # Save data. + if cache_type == "pickle": + with open(file_name, "wb") as file: + pickle.dump(func_cache_data, file) + elif cache_type == "json": + with open(file_name, "w", encoding="utf-8") as file: + json.dump( + func_cache_data, + file, + indent=4, + sort_keys=True, + ensure_ascii=False, + ) + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + + +def set_cache_property(func_name: str, property_name: str, val: Any) -> None: + """ + Set a property for the cache of a given function name. + + :param func_name: The name of the function whose cache property is + to be set. + :param property_name: The name of the property to set. + :param val: The value to set for the property. + """ + _LOG.trace(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_isinstance(property_name, str) + _check_valid_cache_property(property_name) + # Assign value. + cache_property = _CACHE_PROPERTY + if func_name not in cache_property: + cache_property[func_name] = {} + dict_ = cache_property[func_name] + dict_[property_name] = val + # Update values on the disk. + file_name = get_cache_property_file() + _LOG.trace("Updating %s", file_name) + # Make sure the dict is well-formed. + for func_name_tmp in cache_property: + hdbg.dassert_isinstance(func_name_tmp, str) + _LOG.trace( + "func_name_tmp='%s' -> %s", + func_name_tmp, + cache_property[func_name_tmp], + ) + hio.create_enclosing_dir(file_name, incremental=True) + _save_func_cache_data_to_file(file_name, "pickle", cache_property) + + +def get_cache_property(func_name: str, property_name: str) -> Union[bool, Any]: + """ + Get the value of a property for the cache of a given function name. + """ + _LOG.trace(hprint.func_signature_to_str()) + _check_valid_cache_property(property_name) + # Read data. + cache_property = _CACHE_PROPERTY + if property_name in _SYSTEM_PROPERTIES: + if func_name not in cache_property: + return None + value = cache_property[func_name].get(property_name) + else: + value = cache_property.get(func_name, {}).get(property_name, False) + return value + + +def reset_cache_property() -> None: + """ + Reset the cache property for the given type. + """ + file_name = get_cache_property_file() + _LOG.warning("Resetting %s", file_name) + # Empty the values. + global _CACHE_PROPERTY + cache_property = _CACHE_PROPERTY + # Empty the values excluding the system properties like `type` and + # `write_through`. + _LOG.trace("before cache_property=%s", cache_property) + # Iterate over a list of keys to avoid modifying the dictionary during iteration. + for func_name_tmp in list(cache_property.keys()): + # Only remove non-system properties from the function's property dict. + func_prop = cache_property[func_name_tmp] + for property_name_tmp in list(func_prop.keys()): + if property_name_tmp not in _SYSTEM_PROPERTIES: + del func_prop[property_name_tmp] + _LOG.trace("after cache_property=%s", cache_property) + # Update values on the disk. + _LOG.trace("Updating %s", file_name) + hio.create_enclosing_dir(file_name, incremental=True) + _save_func_cache_data_to_file(file_name, "pickle", cache_property) + + +# ############################################################################# +# Get cache. +# ############################################################################# + +# Functions to retrieve cache (both memory and disk). + + +def get_cache_func_names(type_: str) -> List[str]: + """ + Retrieve the cache function names based on the specified type. + + :param type_: The type of cache to retrieve ('all', 'mem', or + 'disk'). + :return: A list of function names corresponding to the specified + cache type. + """ + if type_ == "all": + mem_func_names = get_cache_func_names("mem") + disk_func_names = get_cache_func_names("disk") + val = sorted(set(mem_func_names + disk_func_names)) + elif type_ == "mem": + mem_func_names = sorted(list(_CACHE.keys())) + val = mem_func_names + elif type_ == "disk": + prefix = get_cache_file_prefix() + disk_func_names = glob.glob(os.path.join(get_cache_dir(), f"{prefix}.*")) + disk_func_names = [os.path.basename(cache) for cache in disk_func_names] + # Exclude the cache property file. + property_file_name = os.path.basename(get_cache_property_file()) + disk_func_names = [ + cache for cache in disk_func_names if cache != property_file_name + ] + escaped_prefix = re.escape(prefix) + pattern = rf"{escaped_prefix}\.(.*)\.(json|pkl)" + disk_func_names = [ + re.sub(pattern, r"\1", cache) for cache in disk_func_names + ] + disk_func_names = sorted(disk_func_names) + val = disk_func_names + else: + raise ValueError(f"Invalid type '{type_}'") + return val + + +def cache_property_to_str(func_name: str = "") -> str: + """ + Convert cache properties to a string representation. + + :param func_name: The name of the function whose cache properties + are to be converted. + :return: A string representation of the cache properties. E.g., + ``` + # func_name=slow_square + type: json + write_through: False + exclude_keys: [] + ``` + """ + txt: List[str] = [] + if func_name == "": + func_names = get_cache_func_names("all") + for func_name_tmp in func_names: + txt.append(cache_property_to_str(func_name_tmp)) + result = "\n".join(txt) + return result + # + txt.append(f"# func_name={func_name}") + cache_property = _CACHE_PROPERTY + _LOG.trace("cache_property=%s", cache_property) + if func_name in cache_property: + for k, v in cache_property[func_name].items(): + txt.append(f"{k}: {v}") + result = "\n".join(txt) + return result + + +# ############################################################################# +# Cache performance. +# ############################################################################# + + +# Create global variable for the cache performance. +if "_CACHE_PERF" not in globals(): + _LOG.trace("Creating _CACHE_PERF") + # func_name -> perf properties (such as tot, hits, misses). + _CACHE_PERF: Dict[str, Dict[str, int]] = {} + + +def enable_cache_perf(func_name: str) -> None: + """ + Enable cache performance statistics for a given function. + """ + _CACHE_PERF[func_name] = {"tot": 0, "hits": 0, "misses": 0} + + +def disable_cache_perf(func_name: str = "") -> None: + """ + Disable cache performance statistics for a given function. + + If `func_name` is empty, disable cache performance statistics for all + functions. + """ + if func_name == "": + for func_name_tmp in get_cache_func_names("all"): + disable_cache_perf(func_name_tmp) + return + _CACHE_PERF[func_name] = None + + +def reset_cache_perf(func_name: str = "") -> None: + """ + Reset cache performance statistics for a given function. + """ + if func_name == "": + for func_name_tmp in get_cache_func_names("all"): + reset_cache_perf(func_name_tmp) + return + _CACHE_PERF[func_name] = {"tot": 0, "hits": 0, "misses": 0} + + +def get_cache_perf(func_name: str) -> Optional[Dict[str, int]]: + """ + Get the cache performance object for a given function. + """ + if func_name in _CACHE_PERF: + return _CACHE_PERF[func_name] + return None + + +def get_cache_perf_stats(func_name: str) -> str: + """ + Get the cache performance statistics for a given function. + + :param func_name: The name of the function whose cache performance + stats are to be retrieved. + :return: A string with the cache performance statistics. E.g., + `slow_square: hits=2 misses=0 tot=2 hit_rate=1.00`. + """ + perf = get_cache_perf(func_name) + if perf is None: + _LOG.warning("No cache performance stats for '%s'", func_name) + return "" + hits = perf["hits"] + misses = perf["misses"] + tot = perf["tot"] + hit_rate = hits / tot if tot > 0 else 0 + txt = ( + f"{func_name}: hits={hits} misses={misses} tot={tot}" + f" hit_rate={hit_rate:.2f}" + ) + return txt + + +# ############################################################################# +# Disk cache. +# ############################################################################# + +# Functions to save and retrieve cache from disk. +# ``` +# { +# "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"10 + 15\", \"gpt-5-nano\"], \"kwargs\": {}}": [ +# "25", +# 3.195e-05 +# ], +# "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"2 + 3\", \"gpt-5-nano\"], \"kwargs\": {}}": [ +# "5", +# 3.195e-05 +# ] +# } +# ``` + + +def _get_cache_file_name(func_name: str) -> str: + """ + Get the cache file name for a given function. + + :param func_name: The name of the function. + :return: The cache file name with appropriate extension. + """ + _LOG.trace("func_name='%s'", func_name) + hdbg.dassert_isinstance(func_name, str) + prefix = get_cache_file_prefix() + file_name = os.path.join(get_cache_dir(), f"{prefix}.{func_name}") + cache_type = get_cache_property(func_name, "type") + _LOG.trace(hprint.to_str("cache_type")) + if cache_type == "pickle": + file_name += ".pkl" + elif cache_type == "json": + file_name += ".json" + elif cache_type is None: + # Cache type not set - try to infer from existing files. + if os.path.exists(file_name + ".pkl"): + file_name += ".pkl" + elif os.path.exists(file_name + ".json"): + file_name += ".json" + else: + # Default to json if no file exists. + file_name += ".json" + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + return file_name + + +def _save_cache_dict_to_disk( + func_name: str, func_cache_data: _FunctionCacheType +) -> None: + """ + Save a cache dictionary to the disk cache. + + :param func_name: The name of the function. + :param func_cache_data: The function cache data to save. + """ + # Get the filename for the disk cache. + file_name = _get_cache_file_name(func_name) + cache_type = get_cache_property(func_name, "type") + _LOG.trace(hprint.to_str("file_name cache_type")) + _save_func_cache_data_to_file(file_name, cache_type, func_cache_data) + + +def _load_func_cache_data_from_file( + file_name: str, cache_type: Optional[str] +) -> _FunctionCacheType: + """ + Load the function cache data from a file. + + :param file_name: The name of the file. + :param cache_type: The type of the cache. + :return: The function cache data. + """ + # Infer cache type from file extension if not set. + if cache_type is None: + if file_name.endswith(".pkl"): + cache_type = "pickle" + else: + cache_type = "json" + # Load data. + _LOG.trace("Loading from '%s'", file_name) + hdbg.dassert_file_exists(file_name) + if cache_type == "pickle": + with open(file_name, "rb") as file: + func_cache_data = pickle.load(file) + elif cache_type == "json": + with open(file_name, "r", encoding="utf-8") as file: + func_cache_data = json.load(file) + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + return func_cache_data + + +# TODO(gp): Maybe private? +def get_disk_cache(func_name: str) -> _FunctionCacheType: + """ + Retrieve the disk cache for a given function. + + :param func_name: The name of the function. + :return: A dictionary containing the cache data. + """ + file_name = _get_cache_file_name(func_name) + # If the disk cache doesn't exist, create it. + if not os.path.exists(file_name): + _LOG.trace("No cache from disk") + func_cache_data: _FunctionCacheType = {} + _save_cache_dict_to_disk(func_name, func_cache_data) + # Load data. + cache_type = get_cache_property(func_name, "type") + _LOG.trace(hprint.to_str("cache_type")) + func_cache_data = _load_func_cache_data_from_file(file_name, cache_type) + return func_cache_data + + +# ############################################################################# +# Stats. +# ############################################################################# + + +def cache_stats_to_str( + func_name: str = "", +) -> Optional["pd.DataFrame"]: # noqa: F821 + """ + Print the cache stats for a function or for all functions. + + E.g., + ``` + find_email: + memory: - + disk: 1044 + + verify_email: + memory: - + disk: 2322 + ``` + """ + # We want to limit the dependency from pandas in the cache. + import pandas as pd + + if func_name == "": + result = [] + for func_name in get_cache_func_names("all"): + result_tmp = cache_stats_to_str(func_name) + result.append(result_tmp) + if result: + result = pd.concat(result) + else: + result = None + return result + result = {} + # Memory cache. + if func_name in _CACHE: + result["memory"] = len(_CACHE[func_name]) + else: + result["memory"] = "-" + # Disk cache. + file_name = _get_cache_file_name(func_name) + if os.path.exists(file_name): + disk_cache = get_disk_cache(func_name) + result["disk"] = len(disk_cache) + else: + result["disk"] = "-" + result = pd.Series(result).to_frame().T + result.index = [func_name] + return result + + +def force_cache_from_disk(func_name: str = "") -> None: + """ + Force loading the cache from disk and update the memory cache. + + :param func_name: The name of the function. If empty, apply to all + cached functions. + """ + if func_name == "": + _LOG.info("Before:\n%s", cache_stats_to_str()) + for func_name_tmp in get_cache_func_names("all"): + force_cache_from_disk(func_name_tmp) + _LOG.info("After:\n%s", cache_stats_to_str()) + return + _LOG.trace("func_name='%s'", func_name) + # Get disk cache. + disk_cache = get_disk_cache(func_name) + _LOG.trace("disk_cache=%s", len(disk_cache)) + # Update the memory cache. + global _CACHE + _CACHE[func_name] = disk_cache + + +def get_mem_cache(func_name: str) -> _CacheType: + """ + Retrieve the memory cache for a given function. + + :param func_name: The name of the function. + :return: A dictionary containing the memory cache data. + """ + mem_cache = _CACHE.get(func_name, {}) + return mem_cache + + +def flush_cache_to_disk(func_name: str = "") -> None: + """ + Flush the memory cache to disk and update the memory cache. + + :param func_name: The name of the function. If empty, apply to all + cached functions. + """ + if func_name == "": + _LOG.info("Before:\n%s", cache_stats_to_str()) + for func_name_tmp in get_cache_func_names("all"): + flush_cache_to_disk(func_name_tmp) + _LOG.info("After:\n%s", cache_stats_to_str()) + return + _LOG.trace("func_name='%s'", func_name) + # Get memory cache. + mem_cache = get_mem_cache(func_name) + _LOG.trace("mem_cache=%s", len(mem_cache)) + # Get disk cache. + disk_cache = get_disk_cache(func_name) + _LOG.trace("disk_cache=%s", len(disk_cache)) + # Merge disk cache with memory cache. + disk_cache.update(mem_cache) + # Save merged cache to disk. + _save_cache_dict_to_disk(func_name, disk_cache) + # Update the memory cache. + global _CACHE + _CACHE[func_name] = disk_cache + + +def get_cache(func_name: str) -> _CacheType: + """ + Retrieve the cache for a given function name. + + :param func_name: The name of the function whose cache is to be + retrieved. + :return: A dictionary containing the cache data. + """ + global _CACHE + if func_name in _CACHE: + _LOG.trace("Loading mem cache for '%s'", func_name) + cache = get_mem_cache(func_name) + else: + _LOG.trace("Loading disk cache for '%s'", func_name) + func_cache_data = get_disk_cache(func_name) + _CACHE[func_name] = func_cache_data + cache = func_cache_data + return cache + + +# ############################################################################# +# Reset cache. +# ############################################################################# + +# Functions to reset cache (both memory and disk). + + +def reset_mem_cache(func_name: str = "") -> None: + """ + Reset the memory cache for a given function. + + :param func_name: The name of the function. If empty, reset all + memory caches. + """ + _LOG.trace(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(func_name, str) + if func_name == "": + _LOG.trace("Before resetting memory cache:\n%s", cache_stats_to_str()) + for func_name_tmp in get_cache_func_names("all"): + reset_mem_cache(func_name=func_name_tmp) + _LOG.trace("After:\n%s", cache_stats_to_str()) + return + _CACHE[func_name] = {} + del _CACHE[func_name] + + +def reset_disk_cache(func_name: str = "", interactive: bool = True) -> None: + """ + Reset the disk cache for a given function name. + + If `func_name` is empty, reset all disk cache files. + :param func_name: The name of the function whose disk cache is to + be reset. If empty, reset all disk cache files. + :param interactive: If True, prompt the user for confirmation before + resetting the disk cache. + """ + _LOG.trace(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_isinstance(interactive, bool) + if interactive: + hsystem.query_yes_no( + f"Are you sure you want to reset the disk cache for func_name={func_name}?" + ) + if func_name == "": + _LOG.trace("Before resetting disk cache:\n%s", cache_stats_to_str()) + prefix = get_cache_file_prefix() + cache_files = glob.glob(os.path.join(get_cache_dir(), f"{prefix}.*")) + _LOG.warning("Resetting disk cache") + for file_name in cache_files: + if os.path.isfile(file_name): + os.remove(file_name) + _LOG.trace("After:\n%s", cache_stats_to_str()) + return + # + file_name = _get_cache_file_name(func_name) + if os.path.exists(file_name): + _LOG.warning("Removing cache file '%s'", file_name) + os.remove(file_name) + + +def reset_cache(func_name: str = "", interactive: bool = True) -> None: + """ + Reset both memory and disk cache for a given function. + + :param func_name: The name of the function. If empty, reset all + caches. + :param interactive: If True, prompt the user for confirmation before + resetting the disk cache. + """ + _LOG.trace(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_isinstance(interactive, bool) + reset_mem_cache(func_name=func_name) + reset_disk_cache(func_name=func_name, interactive=interactive) + + +# ############################################################################# +# Mock / unit test cache. +# ############################################################################# + + +def _get_cache_key(args: Any, kwargs: Any) -> str: + cache_key = json.dumps( + {"args": args, "kwargs": kwargs}, + sort_keys=True, + default=str, + ) + _LOG.trace("cache_key=%s", cache_key) + return cache_key + + +def mock_cache(func_name: str, cache_key: str, value: Any) -> None: + """ + Mock the function cache for a given function and cache key. + + :param func_name: The name of the function. + :param cache_key: The cache key. + :param value: The value to store in the cache. + """ + # We should not use the main cache directory for mocking. + hdbg.dassert_ne( + get_cache_dir(), + get_main_cache_dir(), + msg="Do not use the main cache directory for mocking", + ) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_ne(func_name, "", "Function name is empty") + hdbg.dassert_isinstance(cache_key, str) + hdbg.dassert_ne(cache_key, "", "Cache key is empty") + # Get the function cache. + func_cache_data = get_cache(func_name) + # Update the function cache. + func_cache_data[cache_key] = value + + +def mock_cache_from_args_kwargs( + func_name: str, args: Any, kwargs: Any, value: Any +) -> None: + """ + Mock the function cache for a given function and args/kwargs. + + E.g., when testing a cached expensive function (e.g., an LLM call or + downloading data) we can mock the cache to return a fixed value, + instead of calling the function. + + :param func_name: The name of the function. + :param args: The arguments for the function. + :param kwargs: The keyword arguments for the function. + :param value: The value to store in the cache. + """ + hdbg.dassert_isinstance(args, tuple, "args is not a tuple: %s", args) + hdbg.dassert_isinstance(kwargs, dict, "kwargs is not a dict: %s", kwargs) + # Get the cache key. + cache_key = _get_cache_key(args, kwargs) + # Mock the cache. + mock_cache(func_name, cache_key, value) + + +def mock_cache_from_disk( + func_name: str, func_cache_data: _FunctionCacheType +) -> None: + """ + Mock the function cache from disk data. + + :param func_name: The name of the function. + :param cache_data: The cache data to mock. + """ + hdbg.dassert_isinstance(func_name, str) + sanity_check_function_cache(func_cache_data, assert_on_empty=True) + for cache_key, cached_value in func_cache_data.items(): + mock_cache(func_name, cache_key, cached_value) + + +# ############################################################################# +# Decorator +# ############################################################################# + +# - Decorated functions accept special keyword arguments to control caching +# behavior: +# - `force_refresh=True`: Bypass cache and recompute the result +# - `abort_on_cache_miss=True`: Raise an exception if cache miss occurs +# - `report_on_cache_miss=True`: Return "_cache_miss_" instead of computing on +# cache miss +# - `cache_mode`: Alternative way to control caching with predefined modes: +# - `"REFRESH_CACHE"`: Force cache refresh (same as `force_refresh=True`) +# - `"HIT_CACHE_OR_ABORT"`: Abort on cache miss (same as +# `abort_on_cache_miss=True`) +# - `"DISABLE_CACHE"`: Completely disable caching for this call + + +# TODO(gp): Not sure that cache_mode is worth having the duplication. +def simple_cache( + *, + cache_type: str = "json", + write_through: bool = True, + exclude_keys: Optional[List[str]] = None, +) -> Callable[..., Any]: + """ + Decorate a function to cache its results. + + The cache is stored in memory and on disk. + :param cache_type: The type of cache to use ('json' or 'pickle'). + :param write_through: If True, the cache is written to disk after + each access. + :param exclude_keys: A list of keys to exclude from the cache key. + :return: A decorator that can be applied to a function. + """ + + def decorator(func: Callable[..., Any]) -> Callable[..., Any]: + """ + Decorate a function to cache its results. + """ + hdbg.dassert_in(cache_type, ("json", "pickle")) + func_name = getattr(func, "__name__", "unknown_function") + if func_name.endswith("_intrinsic"): + func_name = func_name[: -len("_intrinsic")] + # Only set cache type if not already set (preserve existing setting). + existing_type = get_cache_property(func_name, "type") + if not existing_type: + set_cache_property(func_name, "type", cache_type) + # Handle mutable default argument. + exclude_keys_list: List[str] = ( + exclude_keys if exclude_keys is not None else [] + ) + + @functools.wraps(func) + def wrapper( + *args: Any, + force_refresh: bool = False, + abort_on_cache_miss: bool = False, + report_on_cache_miss: bool = False, + **kwargs: Any, + ) -> Any: + """ + Cache the results of the decorated function. + + :param args: Positional arguments for the function. + :param force_refresh: If True, the cache is refreshed + regardless of whether the key exists in the cache. + :param abort_on_cache_miss: If True, an exception is raised + if the key is not found in the cache. + :param report_on_cache_miss: If True, a message is logged if + the key is not found in the cache, and the function + returns "_cache_miss_" instead of accessing the real + value. + :param kwargs: Keyword arguments for the function. + :return: The cached value or the result of the function. + """ + # Get the function name. + func_name = getattr(func, "__name__", "unknown_function") + if func_name.endswith("_intrinsic"): + func_name = func_name[: -len("_intrinsic")] + # Get the cache. + cache = get_cache(func_name) + # Remove keys that should not be part of the cache key. + # Also exclude cache_mode since it's a control parameter. + excluded_keys = set(exclude_keys_list) | {"cache_mode"} + kwargs_for_cache_key = { + k: v for k, v in kwargs.items() if k not in excluded_keys + } + # Prepare kwargs for the actual function call. + # Keep cache_mode since the wrapped function may need it in its signature. + kwargs_for_func = kwargs.copy() + # Resolve effective cache_mode: explicit kwarg wins, otherwise + # fall back to the process-wide global (set via + # `set_global_cache_mode`). Do NOT inject into kwargs_for_func, as + # the wrapped function may not accept a `cache_mode` parameter. + if "cache_mode" in kwargs: + cache_mode = kwargs.get("cache_mode") + else: + cache_mode = _GLOBAL_CACHE_MODE + # `cache_mode` is a special keyword argument to control caching + # behavior. + if cache_mode is not None: + _LOG.trace("cache_mode=%s", cache_mode) + if cache_mode == "REFRESH_CACHE": + # Force to refresh the cache. + _LOG.trace("Forcing cache refresh") + force_refresh = True + if cache_mode == "HIT_CACHE_OR_ABORT": + # Abort if the cache is not hit. + _LOG.trace("Abort on cache miss") + abort_on_cache_miss = True + if cache_mode == "DISABLE_CACHE": + # Disable the cache. + _LOG.trace("Disabling cache") + if _CACHE_DEBUG: + _LOG.warning( + "cache[%s]: COMPUTE (cache disabled by cache_mode=DISABLE_CACHE)", + func_name, + ) + value = func(*args, **kwargs_for_func) + return value + # Get the key. + cache_key = _get_cache_key(args, kwargs_for_cache_key) + # Get the cache properties. + cache_perf = get_cache_perf(func_name) + _LOG.trace("cache_perf is None=%s", cache_perf is None) + # Update the performance stats. + if cache_perf: + hdbg.dassert_in("tot", cache_perf) + cache_perf["tot"] += 1 + # Handle a forced refresh. + force_refresh = force_refresh or get_cache_property( + func_name, "force_refresh" + ) + _LOG.trace("force_refresh=%s", force_refresh) + if cache_key in cache and not force_refresh: + _LOG.trace("Cache hit for key='%s'", cache_key) + if _CACHE_DEBUG: + _LOG.warning("cache[%s]: HIT", func_name) + # Update the performance stats. + if cache_perf: + cache_perf["hits"] += 1 + # Retrieve the value from the cache. + value = cache[cache_key] + else: + _LOG.trace("Cache miss for key='%s'", cache_key) + # Update the performance stats. + if cache_perf: + cache_perf["misses"] += 1 + # Abort on cache miss. + abort_on_cache_miss = abort_on_cache_miss or get_cache_property( + func_name, "abort_on_cache_miss" + ) + _LOG.trace("abort_on_cache_miss=%s", abort_on_cache_miss) + if abort_on_cache_miss: + raise ValueError(f"Cache miss for key='{cache_key}'") + # Report on cache miss. + report_on_cache_miss = ( + report_on_cache_miss + or get_cache_property(func_name, "report_on_cache_miss") + ) + _LOG.trace("report_on_cache_miss=%s", report_on_cache_miss) + if report_on_cache_miss: + _LOG.trace("Cache miss for key='%s'", cache_key) + return "_cache_miss_" + if _CACHE_DEBUG: + if force_refresh: + _LOG.warning( + "cache[%s]: RECOMPUTE (cache_mode=REFRESH_CACHE)", + func_name, + ) + else: + _LOG.warning("cache[%s]: COMPUTE (miss)", func_name) + # Access the intrinsic function. + value = func(*args, **kwargs_for_func) + # Update cache. + cache[cache_key] = value + _LOG.trace( + "Updating cache with key='%s' value='%s'", cache_key, value + ) + if write_through: + _LOG.trace("Writing through to disk") + flush_cache_to_disk(func_name) + return value + + return wrapper + + return decorator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py new file mode 100644 index 000000000..e2f54a02c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py @@ -0,0 +1,135 @@ +""" +Import as: + +import helpers.hcfile as hcfile +""" + +import logging +import re +from typing import List, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hio as hio + +_LOG = logging.getLogger(__name__) + + +def parse_cfile(cfile: str) -> List[Tuple[str, str, str]]: + """ + Read and parse a cfile. + + :param cfile: path to the cfile + :return: list of tuples, each containing a line number and a transform, e.g., + [(file_name, line_number, transform), ...] + """ + # Read the cfile. + cfile_lines = hio.from_file(cfile) + cfile_lines = cfile_lines.split("\n") + # + ret = [] + # Parse the cfile. + for line in cfile_lines: + _LOG.debug("line=%s", line) + hdbg.dassert_isinstance(line, str) + # Parse the lines of the cfile, like + # ``` + # dev_scripts_helpers/llms/llm_prompts.py:106: in public function `test`:D404: ... + # dev_scripts_helpers/llms/llm_prompts.py:110: error: Need type annotation for ... + # dev_scripts_helpers/llms/llm_transform.py:63:33: F821 undefined name '_extract_bullet_points' [flake8] + # ``` + # extracting the file name, line number, and transform. + regex = r"^([^:]+):(\d+):(.*)$" + match = re.match(regex, line) + if match is None: + _LOG.debug("Failed to parse line '%s'", line) + continue + # Extract the file name, line number, and transform. + file_name = match.group(1) + line_number = match.group(2) + transform = match.group(3) + # Add values to the list. + ret.append((file_name, line_number, transform)) + return ret + + +# ############################################################################# + + +def inject_todos_from_cfile( + cfile_txt: str, todo_user: str, comment_prefix: str +) -> None: + """ + Inject the TODOs from a cfile in the corresponding files. + + Given a cfile with the following content: + the function will inject the TODO in the corresponding file and line + + :param cfile_txt: The content of the cfile. + :param todo_user: The user to use in the TODO. + :param comment_prefix: The prefix to use for the comment (e.g., "#") + """ + # For each file, store + # - the current file content + # - the offset (i.e., how many lines we inserted in the file so far, so + # we can inject the TODO at the correct line number) + # - the index of the last line modified to make sure the TODOs are for + # increasing line numbers. + file_content = {} + for todo_line in cfile_txt.split("\n"): + _LOG.debug("\n%s", hprint.frame(f"todo line='{todo_line}'")) + if todo_line.strip() == "": + continue + # dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py:101: The logic for extracting required status checks and pull request reviews is repeated. Consider creating a helper function to handle this extraction to reduce redundancy. + m = re.match(r"^\s*(\S+):(\d+):\s*(.*)$", todo_line) + if not m: + _LOG.warning("Can't parse line='%s': skipping", todo_line) + continue + file_name, todo_line_number, todo = m.groups() + todo_line_number = int(todo_line_number) + _LOG.debug(hprint.to_str("file_name todo_line_number todo")) + # Update the state if needed. + if file_name not in file_content: + _LOG.debug("Reading %s", file_name) + hdbg.dassert_path_exists(file_name) + txt = hio.from_file(file_name).split("\n") + offset = 0 + last_line_modified = 0 + file_content[file_name] = (txt, offset, last_line_modified) + # Extract the info for the file to process. + txt, offset, last_line_modified = file_content[file_name] + _LOG.debug(hprint.to_str("offset last_line_modified")) + hdbg.dassert_lt( + last_line_modified, + todo_line_number, + "The TODOs don't look like they are increasing line numbers: " + "TODO at line %d is before the last line modified %d", + todo_line_number, + last_line_modified, + ) + # We subtract 1 from the line number since TODOs count from 1, while + # Python arrays count from 0. + act_line_number = todo_line_number - 1 + offset + hdbg.dassert_lte(0, act_line_number) + hdbg.dassert_lt(act_line_number, len(txt)) + insert_line = txt[act_line_number] + _LOG.debug(hprint.to_str("act_line_number insert_line")) + # Extract how many spaces there are at place where the line to insert + # the TODO. + m = re.match(r"^(\s*)\S", insert_line) + hdbg.dassert(m, "Can't parse insert_line='%s'", insert_line) + spaces = len(m.group(1)) * " " # type: ignore[union-attr] + # Build the new line to insert. + new_line = spaces + f"{comment_prefix} TODO({todo_user}): {todo}" + _LOG.debug(hprint.to_str("new_line")) + # Insert the new line in txt at the correct position. + txt = txt[:act_line_number] + [new_line] + txt[act_line_number:] + # Update the state. + offset += 1 + file_content[file_name] = (txt, offset, todo_line_number) + # Write updated files back. + for file_name, (txt, offset, last_line_modified) in file_content.items(): + _ = last_line_modified + _LOG.info("Writing %d lines in %s", offset, file_name) + txt = "\n".join(txt) + hio.to_file(file_name, txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py new file mode 100644 index 000000000..675ba557d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py @@ -0,0 +1,549 @@ +""" +Import as: + +import helpers.hchatgpt as hchatgp +""" + +import logging +import math +import os +import sys +import time +from typing import Dict, List, Optional + +import helpers.hdbg as hdbg +import helpers.hio as hio + +# import helpers.henv as henv +# henv.install_module_if_not_present("openai") +import openai # noqa: E402 + +_LOG = logging.getLogger(__name__) + +# Setting API as env var in your terminal is the correct approach. +# NEVER upload any OpenAI API key to GitHub, OpenAI will revoke it. + +client = openai.OpenAI() + +# The OpenAI File ID cache will be saved as `prefix_to_root/gpt_id.json` +# Only files under the given root directory may be uploaded to OpenAI. +prefix_to_root = os.path.join(os.path.dirname(__file__), "..") + +# ############################################################################# +# Create/update/delete Assistant. +# ############################################################################# + + +def create_assistant( + assistant_name: str, + instructions: str, + *, + model: str = "gpt-3.5-turbo-1106", + use_retrieval: bool = True, + use_code_interpreter: bool = True, + use_function: Optional[Dict] = None, +) -> str: + """ + Create an OpenAI Assistant for your OpenAI Organization. All configs can + still be updated after creation. + + This method should only be used when a new Assistant is needed. + Otherwise, use the Assistant name to retrieve an existing Assistant. + + :param assistant_name: name of the Assistant to be created + :param instructions: instruction string that describes the expected + behavior of assistant + :param model: GPT model used by the assistant + :param use_retrieval: enable the retrieval tool from OpenAI + :param use_code_interpreter: enable the code interpreter tool from + OpenAI + :param use_function: enable the function tool from OpenAI (To be + implemented) + """ + # Create the assistant + tools = [] + if use_retrieval: + tools.append({"type": "retrieval"}) + if use_code_interpreter: + tools.append({"type": "code_interpreter"}) + if use_function: + tools.append(use_function) + if not model: + model = "gpt-3.5-turbo-1106" + assistant = client.beta.assistants.create( + instructions=instructions, + name=assistant_name, + model=model, + tools=tools, + ) + return assistant.id + + +def update_assistant_by_id( + assistant_id: str, + *, + instructions: str = "", + name: str = "", + tools: Optional[List[Dict[str, str]]] = None, + model: str = "", + file_ids: Optional[List[str]] = None, +) -> str: + """ + Update an existing OpenAI Assistant in our OpenAI Organization. + + :param assistant_id: Assistant to be updated + :param instructions: instruction string that describes the expected + behavior of assistant + :param name: change the name of assistant, no change when empty + :param tools: change the tools of assistant, no change when empty + :param model: change the model of assistant, no change when empty + :param file_ids: change the files linked to assistant, no change + when empty + """ + if tools is None: + tools = [] + if file_ids is None: + file_ids = [] + update_config = { + "instructions": instructions, + "name": name, + "tools": tools, + "model": model, + "file_ids": file_ids, + } + not_empty_params = {k: v for k, v in update_config.items() if v} + updated_assistant = client.beta.assistants.update( + assistant_id, **not_empty_params + ) + return updated_assistant.id + + +def delete_assistant_by_id(assistant_id: str) -> None: + """ + Delete an Assistant from our OpenAI Organization. + """ + client.beta.assistants.delete(assistant_id) + + +def get_all_assistants() -> List[openai.types.beta.assistant.Assistant]: + """ + Get all available Assistant objects in our OpenAI Organization. + """ + list_assistants_response = client.beta.assistants.list( + order="desc", + limit="100", + ) + assistants = list_assistants_response.data + return assistants + + +def get_all_assistant_names() -> List[str]: + """ + Get all available Assistant names in our OpenAI Organization. + """ + assistants = get_all_assistants() + return [assistant.name for assistant in assistants] + + +def get_assistant_id_by_name(assistant_name) -> str: + """ + Get the id of an Assistant by its name. + """ + assistant = None + assistants = get_all_assistants() + for cur_assistant in assistants: + if cur_assistant.name == assistant_name: + assistant = cur_assistant + break + hdbg.dassert_is_not( + assistant, None, f"Assistant '{assistant_name}' not found" + ) + assert assistant is not None + return assistant.id + + +# ############################################################################# +# Create directory structure storing gpt file ids +# ############################################################################# + + +def _path_to_dict(path: str) -> Dict: + """ + Generate a dictionary of all files under a given folder. + """ + for root, dirs, files in os.walk(path): + tree = {d: _path_to_dict(os.path.join(root, d)) for d in dirs} + tree.update({f: {"name": f} for f in files}) + return tree + return {} + + +# TODO(Henry): We use fileIO here to store the directory structure, which may +# not be thread-safe. Should change to use DAO if we have any. +def _dump_gpt_ids(dictionary: Dict) -> None: + """ + Dump a given OpenAI File ID dictionary into a cache file for furture use. + """ + file_path = os.path.join(prefix_to_root, "gpt_id.json") + hio.to_json(file_path, dictionary) + return + + +def _load_gpt_ids() -> Dict: + """ + Load the OpenAI File ID dictionary from the cache file. + """ + file_path = os.path.join(prefix_to_root, "gpt_id.json") + if os.path.exists(file_path) and os.path.isfile(file_path): + return hio.from_json(file_path) + else: + directory_dict = _path_to_dict(prefix_to_root) + _dump_gpt_ids(directory_dict) + return directory_dict + + +# ############################################################################# +# Upload file to OpenAI account +# ############################################################################# + + +def _upload_to_gpt_no_set_id(path_from_root: str) -> str: + """ + Upload a file to OpenAI. + + This method will NOT set File ID to cache. + """ + _LOG.info("Uploading file %s to chatgpt", path_from_root) + upload_file_response = client.files.create( + # Must use 'rb' regardless of file type. + file=open(os.path.join(prefix_to_root, path_from_root), "rb"), + purpose="assistants", + ) + gpt_id = upload_file_response.id + return gpt_id + + +def _get_gpt_id_file(dictionary: Dict, path_from_root: str) -> Dict[str, str]: + """ + Get the OpenAI File ID for a given file using a specific cache. + + If this file has not been uploaded to OpenAI, this method will + upload it and generate its OpenAI File ID. + """ + cur = dictionary + path_list = path_from_root.split("/") + for level in path_list: + cur = cur[level] + if "gpt_id" not in cur: + cur["gpt_id"] = _upload_to_gpt_no_set_id(path_from_root) + _dump_gpt_ids(dictionary) + return cur + + +def _set_gpt_id(path_from_root: str, gpt_id: str) -> None: + """ + Manually set the cached OpenAI File ID of a given file. + + This method should ONLY be called if a file manually uploaded to + OpenAI. It will NOT upload the given file to OpenAI. + """ + gpt_id_dict = _load_gpt_ids() + item = _get_gpt_id_file(gpt_id_dict, path_from_root) + item["gpt_id"] = gpt_id + _dump_gpt_ids(gpt_id_dict) + + +def _remove_gpt_id(path_from_root: str): + """ + Remove the cached ID of a given file. + + It does NOT fully remove a file from OpenAI. Use `remove_from_gpt` + to fully remove a file. + """ + gpt_id_dict = _load_gpt_ids() + item = _get_gpt_id_file(gpt_id_dict, path_from_root) + if "gpt_id" in item: + del item["gpt_id"] + _dump_gpt_ids(gpt_id_dict) + + +def get_gpt_id(path_from_root: str) -> str: + """ + Get the OpenAI File ID from cache for a given file. + + If this file has not been uploaded to OpenAI, this method will + upload it and generate its OpenAI File ID. + """ + gpt_id_dict = _load_gpt_ids() + return _get_gpt_id_file(gpt_id_dict, path_from_root)["gpt_id"] + + +def upload_to_gpt(path_from_root: str) -> str: + """ + Upload a file to OpenAI and set its File ID to cache. + """ + gpt_id = _upload_to_gpt_no_set_id(path_from_root) + _set_gpt_id(path_from_root, gpt_id) + return gpt_id + + +def remove_from_gpt(path_from_root: str) -> None: + """ + Fully remove a file from OpenAI. + + This method will first delete the file from OpenAI account, then + remove its OpenAI File ID from the cache. + """ + gpt_id = get_gpt_id(path_from_root) + client.files.delete(gpt_id) + _remove_gpt_id(path_from_root) + + +def get_gpt_file_from_id(gpt_id: str) -> openai.types.file_object.FileObject: + """ + Get a OpenAI File Object using its OpenAI File ID. + """ + return client.files.retrieve(gpt_id) + + +def get_gpt_file_from_path( + path_from_root: str, +) -> openai.types.file_object.FileObject: + """ + Get a OpenAI File Object using its file path. + """ + gpt_id = get_gpt_id(path_from_root) + return get_gpt_file_from_id(gpt_id) + + +# ############################################################################# +# Add/Remove files for an assistant +# ############################################################################# + +# Note that files for Assistant means files constantly used by this assistant +# (like guidelines). For one-time used files, add them to a message instead. +# One Assistant can have up to 20 files linked to it. + + +def set_assistant_files_by_name( + assistant_name: str, file_path_list: List[str] +) -> str: + """ + Use the given file list to overwrite the file list linked to an assistant. + """ + assistant_id = get_assistant_id_by_name(assistant_name) + file_ids = [get_gpt_id(path) for path in file_path_list] + return update_assistant_by_id(assistant_id, file_ids=file_ids) + + +def add_files_to_assistant_by_name( + assistant_name: str, file_path_list: List[str] +) -> str: + """ + Link all given files to an assistant. + + An Assistant can hold only 20 files, the oldest files will be + unlinked automatically. + """ + assistant_id = get_assistant_id_by_name(assistant_name) + assistant_files = client.beta.assistants.files.list( + assistant_id=assistant_id + ).data + existing_file_ids = [file.id for file in assistant_files] + new_file_ids = [get_gpt_id(path) for path in file_path_list] + file_ids = list(set(existing_file_ids + new_file_ids)) + file_ids = file_ids[-20:] + return update_assistant_by_id(assistant_id, file_ids=file_ids) + + +def delete_file_from_assistant_by_id(assistant_id: str, file_id: str) -> None: + """ + Unlink a file from an Assistant using Assistant id and file id. + + This method does NOT remove the file from OpenAI account. + """ + client.beta.assistants.files.delete( + assistant_id=assistant_id, file_id=file_id + ) + + +def delete_file_from_assistant_by_name( + assistant_name: str, file_path: str +) -> None: + """ + Unlink a file from an Assistant using Assistant name and file path. + + This method does NOT remove the file from OpenAI account. + """ + gpt_id = get_gpt_id(file_path) + assistant_id = get_assistant_id_by_name(assistant_name) + delete_file_from_assistant_by_id(assistant_id, gpt_id) + + +# ############################################################################# +# Create Thread and Message from user input +# ############################################################################# + + +def create_thread() -> str: + message_thread = client.beta.threads.create() + return message_thread.id + + +def create_message_on_thread( + thread_id: str, content: str, file_ids: List[str] +) -> str: + """ + Create a message on a thread, then link files to the message using file id. + + Files linked to a message can only be used by ChatGPT in the thread + that holds this message. + """ + if not content: + _LOG.error( + "Message content must not be empty. This will cause an OpenAI error." + ) + if file_ids: + message = client.beta.threads.messages.create( + thread_id=thread_id, + role="user", + content=content, + file_ids=file_ids, + ) + else: + message = client.beta.threads.messages.create( + thread_id=thread_id, + role="user", + content=content, + ) + return message.id + + +def create_message_on_thread_with_file_names( + thread_id: str, content: str, file_names: List[str] +) -> str: + """ + Create a message on a thread, then link files to the message using file + name. + + Files linked to a message can only be used by ChatGPT in the thread + that holds this message. + """ + if file_names: + file_ids = [get_gpt_id(file) for file in file_names] + else: + file_ids = [] + return create_message_on_thread(thread_id, content, file_ids) + + +# ############################################################################# +# Run thread on certain assistant +# ############################################################################# + + +def run_thread_on_assistant(assistant_id, thread_id, model: str = "") -> str: + """ + Run a thread on a given Assistant id. + + This is similar to sending a message to ChatGPT. + """ + if model: + run = client.beta.threads.runs.create( + thread_id=thread_id, assistant_id=assistant_id, model=model + ) + else: + run = client.beta.threads.runs.create( + thread_id=thread_id, assistant_id=assistant_id + ) + return run.id + + +def run_thread_on_assistant_by_name( + assistant_name: str, thread_id: str, model: str = "" +) -> str: + """ + Run a thread on a given Assistant name. + + This is similar to sending a message to ChatGPT. + """ + assistant_id = get_assistant_id_by_name(assistant_name) + if model: + return run_thread_on_assistant(assistant_id, thread_id, model) + else: + return run_thread_on_assistant(assistant_id, thread_id) + + +def wait_for_run_result(thread_id: str, run_id: str, timeout: int = 180) -> List: + """ + Wait for the thread to be processed. + + This is similar to waiting for ChatGPT's typing. + """ + finished = False + _LOG.info("Waiting for chatgpt response...") + for i in range(math.ceil(timeout / 5)): + _LOG.info("%s/%s seconds before timeout", i * 5, timeout) + time.sleep(5) + run = client.beta.threads.runs.retrieve( + thread_id=thread_id, run_id=run_id + ) + finished = run.status == "completed" + if finished: + break + if not finished: + raise TimeoutError("Failed to retrieve response from OpenAI.") + messages = client.beta.threads.messages.list(thread_id=thread_id).data + return messages + + +# ############################################################################# +# ChatGPT runner +# ############################################################################# + + +def e2e_assistant_runner( + assistant_name: str, + user_input: str = "", + *, + model: str = "", + input_file_names: Optional[List[str]] = None, + output_file_path: str = "", + vim_mode: bool = False, +) -> str: + """ + Send a message with files to an Assistant and wait for its reply. + + :param assistant_name: Assistant that should process this message + :param user_input: message to be sent to ChatGPT assistant + :param model: change the GPT model used by the assistant, no change + when empty this WILL update the configuration of the assistant + :param input_file_names: files to be used in this conversation + :param output_file_path: redirect ChatGPT's output to the given file + :param vim_mode: if True, take input from stdin and output to stdout + forcely + """ + if input_file_names is None: + input_file_names = [] + if not assistant_name: + _LOG.error("No Assistant name provided.") + return "" + if vim_mode: + user_input = "".join(sys.stdin.readlines()) + thread_id = create_thread() + create_message_on_thread_with_file_names( + thread_id, user_input, input_file_names + ) + if model: + run_id = run_thread_on_assistant_by_name( + assistant_name, thread_id, model + ) + else: + run_id = run_thread_on_assistant_by_name(assistant_name, thread_id) + messages = wait_for_run_result(thread_id, run_id) + output = messages[0].content[0].text.value + if vim_mode or not output_file_path: + sys.stdout.write(output) + if output_file_path: + with open(output_file_path, "w", encoding="utf-8") as fp: + fp.write(output) + return output diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py new file mode 100644 index 000000000..18ce63d7d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py @@ -0,0 +1,32 @@ +""" +Import as: + +import helpers.hchatgpt_instructions as hchainst +""" + +instructions = { + "MarkdownLinter": """ +You are a markdown linter. +If you are given a piece of text under markdown format, treat these text as the +content of the markdown content you need to lint. +If you are given a filename, you should find the file in your linked files, use +it as the markdown content you need to lint. +After get the markdown content, find and fix grammatical errors in that content +with the minimum amount of changes possible and preserve the formatting. +You don't need to add periods at the end of each sentence. +You should not add ```markdown ``` around the output content. +Your only output message should be the linted result of that file, no additional +explanations should be added in your output. + """, + "DocWriter": """ +You are a documentation writer. +If you are given several python code files, try to understand these files and +how they may work. +You should write a markdown document about these files for users that have not +read the codes to know the basic workflow of them, your can use examples to show +the user how they can easily use those codes. +For the format of markdown document, you can use files linked to you as +reference. You don't need to strictly follow the format, the goal is to make the +document easy to understand + """, +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py new file mode 100644 index 000000000..2fd175bf4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py @@ -0,0 +1,183 @@ +""" +Import as: + +import helpers.hcoverage as hcovera +""" + +import glob +import logging +import os +import pathlib +import site +import subprocess +import sysconfig + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def _detect_site_packages() -> pathlib.Path: + """ + Return the Path to the site-packages directory for the active interpreter. + + - Try sysconfig first + - Fall back to site.getsitepackages() or user-site. + """ + try: + purelib = sysconfig.get_path("purelib") + if purelib: + return pathlib.Path(purelib) + except (KeyError, IOError): + _LOG.debug( + "sysconfig.get_path('purelib') failed, falling back to site packages" + ) + try: + sp_dirs = site.getsitepackages() + except AttributeError: + sp_dirs = [] + for d in sp_dirs: + if "site-packages" in d: + return pathlib.Path(d) + return pathlib.Path(site.getusersitepackages()) + + +def inject(coveragerc: str = ".coveragerc") -> None: + """ + Install the coverage startup hook into this env site-packages. + """ + rc = pathlib.Path(coveragerc).resolve() + os.environ["COVERAGE_PROCESS_START"] = str(rc) + _LOG.debug("Set COVERAGE_PROCESS_START to %s", rc) + sp = _detect_site_packages() + target = sp / "coverage.pth" + hook_line = "import coverage; coverage.process_startup()" + cmd = f'echo "{hook_line}" | sudo tee "{target}" > /dev/null' + try: + hsystem.system(cmd) + _LOG.debug("Installed coverage hook to %s via sudo tee", target) + except (OSError, subprocess.SubprocessError) as e: + hdbg.dassert(False, f"Failed to install coverage hook via sudo tee: {e}") + + +def remove() -> None: + """ + Remove the coverage startup hook from this env site-packages. + """ + sp = _detect_site_packages() + target = sp / "coverage.pth" + if target.is_file(): + cmd = f'sudo rm -f "{target}"' + try: + hsystem.system(cmd) + _LOG.info("Removed coverage hook from %s via sudo rm", target) + except Exception as e: + _LOG.error("Failed to remove coverage hook via sudo rm: %s", e) + raise + else: + # TODO(Maddy): Is this acceptable? + _LOG.warning("No coverage.pth found in %s", sp) + # Remove coverage environment variables. + try: + if "COVERAGE_PROCESS_START" in os.environ: + del os.environ["COVERAGE_PROCESS_START"] + _LOG.info("Removed COVERAGE_PROCESS_START from environment") + else: + _LOG.debug("COVERAGE_PROCESS_START not found in environment") + except Exception as e: + _LOG.error("Failed to remove COVERAGE_PROCESS_START: %s", e) + raise + + +def generate_coverage_dockerfile() -> str: + """ + Build a Dockerfile string that appends coverage support. + """ + # This requires to: + # - Install coverage, pytest, pytest-cov at build time + # - Create /coverage_data and writes .coveragerc + # - Set ENV COVERAGE_PROCESS_START to /coverage_data/.coveragerc + # - Write a coverage.pth into site-packages so coverage auto-starts + txt = """ + # Install coverage and testing dependencies. + RUN pip install --no-cache-dir coverage pytest pytest-cov + + # Create coverage data directory with proper permissions. + RUN mkdir -p /app/coverage_data && chmod 777 /app/coverage_data + + # Setup coverage configuration. + COPY .coveragerc /app/coverage_data/.coveragerc + ENV COVERAGE_PROCESS_START=/app/coverage_data/.coveragerc + + # Create coverage.pth file for automatic startup. + # This ensures coverage tracking starts automatically when Python runs. + RUN python - < None: + """ + Execute shell commands to run coverage steps in a Docker container. + + Assumes: + - A valid .coveragerc exists in the current working directory. + - coverage_data/ is the mounted folder inside the container. + """ + commands = [ + "mkdir -p coverage_data", + "chmod 777 coverage_data", + "cp .coveragerc coverage_data/.coveragerc", + "chmod 644 coverage_data/.coveragerc", + ] + for cmd in commands: + hsystem.system(cmd, suppress_output=False) + + +def coverage_combine() -> None: + """ + Execute shell commands to combine coverage data. + + Assumes: + - .coverage.* files are present in the current directory or coverage_data/. + """ + # Check if there are any coverage files in coverage_data/ and copy them. + if os.path.exists("coverage_data"): + coverage_files_cmd = ( + "find coverage_data -name '.coverage.*' 2>/dev/null | wc -l" + ) + rc = hsystem.system(coverage_files_cmd, abort_on_error=False) + if rc == 0: + # Use a simple existence check instead of parsing command output. + coverage_files = glob.glob("coverage_data/.coverage.*") + if coverage_files: + _LOG.info( + "Found coverage files in coverage_data/, copying to current directory" + ) + commands = [ + "cp coverage_data/.coverage.* . 2>/dev/null || true", + "rm -rf coverage_data/.coverage.* 2>/dev/null || true", + ] + for cmd in commands: + hsystem.system(cmd, suppress_output=False) + # Check if there are any .coverage.* files to combine. + coverage_files = glob.glob(".coverage.*") + num_files = len(coverage_files) + if num_files > 0: + _LOG.info("Found %d coverage data files to combine", num_files) + commands = [ + "coverage combine", + "coverage report --skip-empty", + ] + for cmd in commands: + hsystem.system(cmd, suppress_output=False) + else: + _LOG.warning("No .coverage.* files found to combine") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py new file mode 100644 index 000000000..6c64659c0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py @@ -0,0 +1,365 @@ +""" +Import as: + +import helpers.hcsv as hcsv +""" + +import ast +import logging +import os +from typing import Any, Callable, Dict, List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hs3 as hs3 + +_LOG = logging.getLogger(__name__) + + +def _append_csv( + df: pd.DataFrame, path: str, *, index: bool = False, **kwargs: Any +) -> None: + """ + Append a df to the CSV file `path` without header. + """ + with open(path, "a") as f: + df.to_csv(f, header=False, index=index, **kwargs) + + +def _read_csv_range( + csv_path: str, from_: int, to: int, **kwargs: Any +) -> pd.DataFrame: + """ + Read a specified row range of a CSV file and convert to a DataFrame. + + This function: + - assumes the CSV file to have header, considered to be row 0. + - reads [from_, to), e.g., (to - from_) lines following list slicing semantics. + + :param csv_path: location of CSV file + :param from_: first line to read (header is row 0 and is always read) + :param to: last line to read, not inclusive + :return: DataFrame with columns from CSV line 0 (header) + """ + hdbg.dassert_lt(0, from_, msg="Row 0 assumed to be header row") + hdbg.dassert_lt(from_, to, msg="Empty range requested!") + skiprows = list(range(1, from_)) + nrows = to - from_ + df = pd.read_csv(csv_path, skiprows=skiprows, nrows=nrows, **kwargs) + if df.shape[0] < to: + _LOG.warning("Number of df rows = %i vs requested = %i", df.shape[0], to) + return df + + +# TODO(gp): There is no use of this function. +def build_chunk( + csv_path: str, + col_name: str, + start: int, + *, + nrows_at_a_time: int = 1000, + **kwargs: Any, +) -> pd.DataFrame: + """ + Build a DataFrame from a CSV subset as follows: + + - Names the columns using the header line (row 0) + - Reads the value in (row, col) coordinates (`start`, `col_name`) (if it + exists) as `value` + - Adds row `start` and all subsequent contiguous rows with `value` in + column `col_name` + + For memory efficiency, the CSV is processed in chunks of size `nrows_at_a_time`. + + :param csv_path: location of CSV file + :param col_name: name of column whose values define chunks + :param start: first row to process + :param nrows_at_a_time: size of chunks to process + :return: DataFrame with columns from CSV line 0 + """ + hdbg.dassert_lt(0, start) + stop = False + dfs: List[pd.DataFrame] = [] + init_df = _read_csv_range(csv_path, start, start + 1, **kwargs) + if init_df.shape[0] < 1: + return init_df + val = init_df[col_name].iloc[0] + _LOG.debug("Building chunk for %s", val) + counter = 0 + while not stop: + from_ = start + counter * nrows_at_a_time + df = _read_csv_range(csv_path, from_, from_ + nrows_at_a_time) + # Break if there are no matches. + if df.shape[0] == 0: + break + if not (df[col_name] == val).any(): + break + # Stop if we have run out of rows to read. + if df.shape[0] < nrows_at_a_time: + stop = True + idx_max = (df[col_name] == val)[::-1].idxmax() + # Stop if we have reached a new value. + if idx_max < (df.shape[0] - 1): + stop = True + dfs.append(df.iloc[0 : idx_max + 1]) + counter += 1 + if not dfs: + return pd.DataFrame() + return pd.concat(dfs, axis=0).reset_index(drop=True) + + +# TODO(gp): There is no use of this function. +def find_first_matching_row( + csv_path: str, + col_name: str, + val: str, + *, + start: int = 1, + nrows_at_a_time: int = 1000000, + **kwargs: Any, +) -> Optional[int]: + """ + Find first row in CSV where value in column `col_name` equals `val`. + + :param csv_path: location of CSV file + :param col_name: name of column whose values define chunks + :param val: value to match on + :param start: first row (inclusive) to start search on + :param nrows_at_a_time: size of chunks to process + :return: line in CSV of first matching row at or past start + """ + curr = start + while True: + _LOG.debug("Start of current chunk = line %i", curr) + df = _read_csv_range(csv_path, curr, curr + nrows_at_a_time, **kwargs) + if df.shape[0] < 1: + _LOG.info("Value %s not found", val) + break + matches = df[col_name] == val + if matches.any(): + idx_max = matches.idxmax() + return int(curr + idx_max) + curr += nrows_at_a_time + return None + + +# ############################################################################# +# CSV to PQ conversion +# ############################################################################# + + +def _csv_mapreduce( + csv_path: str, + out_dir: str, + key_func: Callable, + chunk_preprocessor: Optional[Callable], + *, + chunk_size: int = 1000000, +) -> None: + """ + Map-reduce-type processing of CSV. + + The phases are: + - Read the CSV in chunks as DataFrame + - Key each row of the DataFrame using a `groupby` + - "Reduce" keyed groups by writing and appending to a CSV + + :param csv_path: input CSV path + :param out_dir: output dir for CSV with filenames corresponding to keys + :param key_func: function to apply to each chunk DataFrame to key rows + Should return an iterable with elements like (key, df) + :param chunk_preprocessor: function to apply to each chunk DataFrame before + applying key_func + :param chunk_size: chunk_size of input to process + """ + # Read CSV data in chunks. + chunks = pd.read_csv(csv_path, chunksize=chunk_size) + # Preprocess chunk, if needed. + if chunk_preprocessor is not None: + chunks = map(chunk_preprocessor, chunks) + # Apply key_func to each chunk. + keyed_group_blocks = map(key_func, chunks) + # Append results. + for block in keyed_group_blocks: + for idx, df in block: + file_name = os.path.join(out_dir, idx + ".csv") + _append_csv(df, file_name) + + +def convert_csv_to_pq( + csv_path: str, + pq_path: str, + *, + normalizer: Optional[Callable] = None, + header: Optional[int] = 0, + compression: Optional[str] = "gzip", +) -> None: + """ + Convert CSV file to Parquet file. + + Output of `csv_map_reduce()` is typically header-less to support append mode, + and so `normalizer` may be used to add appropriate headers. Note that Parquet + requires string column names, whereas Pandas by default uses integer column + names. + + :param csv_path: full path of CSV + :param pq_path: full path of parquet + :param header: header specification of CSV + :param normalizer: function to apply to df before writing to PQ + """ + df = pd.read_csv(csv_path, header=header) + # TODO(Paul): Ensure that one of header, normalizer is not None. + if normalizer is not None: + df = normalizer(df) + df.to_parquet(pq_path, compression=compression) + + +def convert_csv_dir_to_pq_dir( + csv_dir: str, + pq_dir: str, + *, + normalizer: Optional[Callable] = None, + header: Optional[int] = None, +) -> None: + """ + Apply `convert_csv_to_pq()` to all files in `csv_dir`. + + :param csv_dir: directory storing CSV files on S3 or local + :param pq_dir: target directory to save PQ files (only local + filesystem) + :param header: header specification of CSV + :param normalizer: function to apply to df before writing to PQ + """ + # Get the filenames in `csv_dir`. + if hs3.is_s3_path(csv_dir): + # TODO(gp): Pass aws_profile. + s3fs = hs3.get_s3fs("am") + filenames = s3fs.ls(csv_dir) + else: + # Local filesystem. + hdbg.dassert_dir_exists(csv_dir) + # TODO(Paul): check .endswith(".csv") or do glob(csv_dir + "/*.csv") + filenames = os.listdir(csv_dir) + hdbg.dassert(filenames, "No files in the directory '%s'", csv_dir) + # Process all the filenames. + # TODO(gp): Add tqdm. + # TODO(gp): Consider parallelizing. + for filename in filenames: + # Remove .csv/.csv.gz. + csv_stem = hio.remove_extension( + filename, ".csv", check_file_exists=True, check_has_extension=False + ) + if csv_stem is None: + csv_stem = hio.remove_extension( + filename, + ".csv.gz", + check_file_exists=True, + check_has_extension=False, + ) + if csv_stem is None: + _LOG.warning( + "Skipping filename=%s since it has invalid extension", csv_stem + ) + continue + # Convert file to PQ. + pq_filename = csv_stem + ".pq" + convert_csv_to_pq( + os.path.join(csv_dir, filename), + os.path.join(pq_dir, pq_filename), + normalizer=normalizer, + header=header, + ) + + +# ############################################################################# +# CSV-JSON dict conversion +# ############################################################################# + + +# TODO(gp): convert_csv_to_json_dict? +# TODO(gp): path_to_csv -> file_name +def convert_csv_to_dict(path_to_csv: str, remove_nans: bool) -> Dict[Any, Any]: + """ + Convert a CSV file storing a dataframe into a JSON-compatible dict. + + :param path_to_csv: path to the CSV file + :param remove_nans: whether to remove NaNs from the dictionary + :return: a JSON-compatible dict with the dataframe data + """ + hdbg.dassert_file_exists(path_to_csv) + # Load the dataframe from a CSV file. + df = pd.read_csv(path_to_csv) + # Transform the dataframe into a dict. + dict_df = df.to_dict(orient="list") + if remove_nans: + # Remove NaNs from the dict. + for key in dict_df: + dict_df[key] = [x for x in dict_df[key] if not pd.isnull(x)] + return dict_df # type: ignore + + +# TODO(gp): path_to_csv -> file_name +def save_csv_as_json( + path_to_csv: str, remove_nans: bool, path_to_json: Optional[str] = None +) -> None: + """ + Convert the df from a CSV into a dict and save it into a JSON file. + + If the `path_to_json` is not provided, the JSON is saved in the folder where + the CSV file is located. + + :param path_to_csv: path to the CSV file + :param remove_nans: whether to remove NaNs from the dictionary + :param path_to_json: path to save the JSON file + """ + # Convert the df from the CSV into a JSON-compatible dict. + dict_df = convert_csv_to_dict(path_to_csv, remove_nans) + # Determine the JSON destination path. + if path_to_json is None: + path_to_json = hio.change_filename_extension( + path_to_csv, ".csv", ".json" + ) + # Save the dict into a JSON file. + hio.to_json(path_to_json, dict_df) + + +# ############################################################################# +# CSV files with types +# ############################################################################# + + +def to_typed_csv(df: pd.DataFrame, file_name: str) -> str: + """ + Convert df into CSV and creates a file with the dtypes of columns. + + This function creates a file containing the types with the same name + and suffix e.g., `foobar.csv.types`. + """ + # Save the types. + dtypes_filename = file_name + ".types" + hio.create_enclosing_dir(dtypes_filename, incremental=True) + dtypes_dict = str(df.dtypes.apply(lambda x: x.name).to_dict()) + # Save the data. + df.to_csv(file_name, index=False) + with open(dtypes_filename, "w") as dtypes_file: + dtypes_file.write(dtypes_dict) + return dtypes_filename + + +def from_typed_csv(file_name: str) -> pd.DataFrame: + """ + Load CSV file as df applying the original types of columns. + + This function uses a file with name `file_name.types` to load + information about the column types. + """ + # Load the types. + dtypes_filename = file_name + ".types" + hdbg.dassert_path_exists(dtypes_filename) + with open(dtypes_filename) as dtypes_file: + dtypes_dict = ast.literal_eval(list(dtypes_file)[0]) + # Load the data, applying the types. + df = pd.read_csv(file_name, dtype=dtypes_dict) + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py new file mode 100644 index 000000000..2849dfb10 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py @@ -0,0 +1,309 @@ +""" +Helper functions for processing pandas dataframes. + +Import as: + +import helpers.hdataframe as hdatafr +""" + +# TODO(gp): Consider merging with `helpers/pandas_helpers.py`. + +import collections +import functools +import logging +import operator +from typing import Any, Dict, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + + +_METHOD_TO_APPLY = Dict[str, Dict[str, Any]] + + +def _combine_masks( + masks: pd.DataFrame, mode: str, info: collections.OrderedDict +) -> pd.Series: + if mode == "and": + combined_mask = masks.all(axis=1) + elif mode == "or": + combined_mask = masks.any(axis=1) + else: + raise ValueError(f"Invalid `mode`='{mode}'") + if combined_mask.sum() == 0: + _LOG.warning("No data remaining after filtering.") + info["nrows_remaining"] = combined_mask.sum() + return combined_mask + + +def filter_data_by_values( + df: pd.DataFrame, + filters: Dict[Union[int, str], Tuple[Any, ...]], + mode: str, + info: Optional[collections.OrderedDict] = None, +) -> pd.DataFrame: + """ + Filter dataframe rows based on column values. + + :param df: dataframe + :param filters: `{col_name: (possible_values)}` + :param mode: `and` for conjunction and `or` for disjunction of filters + :param info: information storage + :return: filtered dataframe + """ + if info is None: + info = collections.OrderedDict() + info["nrows"] = df.shape[0] + if not filters: + info["nrows_remaining"] = df.shape[0] + return df.copy() + # Create filter masks for each column. + masks = [] + for col_name, vals in filters.items(): + hdbg.dassert_isinstance(vals, tuple) + mask = df[col_name].isin(vals) + info[f"n_{col_name}"] = mask.sum() + info[f"perc_{col_name}"] = hprint.perc(mask.sum(), df.shape[0]) + masks.append(mask) + masks = pd.concat(masks, axis=1) + combined_mask = _combine_masks(masks, mode, info) + filtered_df = df.loc[combined_mask].copy() + return filtered_df + + +def filter_data_by_comparison( + df: pd.DataFrame, + filters: Dict[ + Union[int, str], Union[Tuple[str, Any], Tuple[Tuple[str, Any], ...]] + ], + mode: str, + info: Optional[collections.OrderedDict] = None, +) -> pd.DataFrame: + """ + Filter dataframe by comparing columns to values. + + :param df: dataframe + :param filters: `{col_name: (comparison_method, value)}` or + `{col_name: ((comparison_method_i, value_i))}`. + `comparison_method` is one of the ("eq", "ne", "le", "lt", "ge", "gt") + pandas method names. + :param mode: `and` for conjunction and `or` for disjunction of filters + :param info: information storage + :return: filtered dataframe + """ + if info is None: + info = collections.OrderedDict() + info["nrows"] = df.shape[0] + if not filters: + info["nrows_remaining"] = df.shape[0] + return df.copy() + # Create filter masks for each column. + masks = [] + for col_name, tuple_ in filters.items(): + if not isinstance(tuple_[0], tuple): + tuple_ = (tuple_,) # type: ignore + for comparison_method, val in tuple_: + hdbg.dassert_in( + comparison_method, ("eq", "ne", "le", "lt", "ge", "gt") + ) + mask = getattr(df[col_name], comparison_method)(val) + info[f"n_{col_name}_{comparison_method}_{val}"] = mask.sum() + info[f"perc_{col_name}_{comparison_method}_{val}"] = hprint.perc( + mask.sum(), df.shape[0] + ) + masks.append(mask) + masks = pd.concat(masks, axis=1) + combined_mask = _combine_masks(masks, mode, info) + filtered_df = df.loc[combined_mask].copy() + return filtered_df + + +def filter_data_by_method( + df: pd.DataFrame, + filters: Dict[Union[int, str], _METHOD_TO_APPLY], + mode: str, + info: Optional[collections.OrderedDict] = None, +) -> pd.DataFrame: + """ + Filter dataframe by calling a method specified for each column. + + :param df: dataframe + :param filters: `{col_name: {method: kwargs}}`, where `method` is the + method called on the dataframe column, e.g. "isin" or "str.contains", + and `kwargs` are the kwargs for this method + :param mode: `and` for conjunction and `or` for disjunction of filters + :param info: information storage + :return: filtered dataframe + """ + if info is None: + info = collections.OrderedDict() + info["nrows"] = df.shape[0] + if not filters: + info["nrows_remaining"] = df.shape[0] + return df.copy() + # Create filter masks for each column. + masks = [] + for col_name, method_dict in filters.items(): + for method, kwargs in method_dict.items(): + mask = operator.attrgetter(method)(df[col_name])(**kwargs) + info[f"n_{col_name}"] = mask.sum() + info[f"perc_{col_name}"] = hprint.perc(mask.sum(), df.shape[0]) + masks.append(mask) + masks = pd.concat(masks, axis=1) + combined_mask = _combine_masks(masks, mode, info) + filtered_df = df.loc[combined_mask].copy() + return filtered_df + + +# ############################################################################# + + +def apply_nan_mode( + srs: pd.Series, + mode: str = "leave_unchanged", + info: Optional[dict] = None, +) -> pd.Series: + """ + Process NaN values in a series according to the parameters. + + :param srs: pd.Series to process + :param mode: method of processing NaNs + - "leave_unchanged" - no transformation + - "drop" - drop all NaNs + - "ffill" - forward fill not leading NaNs + - "ffill_and_drop_leading" - do ffill and drop leading NaNs + - "fill_with_zero" - fill NaNs with 0 + - "strict" - raise ValueError that NaNs are detected + :param info: information storage + :return: transformed copy of input series + """ + hdbg.dassert_isinstance(srs, pd.Series) + if srs.empty: + _LOG.warning("Empty input series `%s`", srs.name) + if mode == "leave_unchanged": + res = srs.copy() + elif mode == "drop": + res = srs.dropna().copy() + elif mode == "ffill": + res = srs.ffill().copy() + elif mode == "ffill_and_drop_leading": + res = srs.ffill().dropna().copy() + elif mode == "fill_with_zero": + res = srs.fillna(0).copy() + elif mode == "strict": + res = srs.copy() + if srs.isna().any(): + raise ValueError(f"NaNs detected in mode `{mode}`") + else: + raise ValueError(f"Unrecognized mode `{mode}`") + # + if info is not None: + hdbg.dassert_isinstance(info, dict) + # Dictionary should be empty. + hdbg.dassert(not info) + info["series_name"] = srs.name + info["num_elems_before"] = len(srs) + info["num_nans_before"] = np.isnan(srs).sum() + info["num_elems_removed"] = len(srs) - len(res) + info["num_nans_imputed"] = ( + info["num_nans_before"] - info["num_elems_removed"] + ) + info["percentage_elems_removed"] = ( + 100.0 * info["num_elems_removed"] / info["num_elems_before"] + ) + info["percentage_elems_imputed"] = ( + 100.0 * info["num_nans_imputed"] / info["num_elems_before"] + ) + return res + + +@functools.lru_cache() +def compute_points_per_year_for_given_freq(freq: str) -> float: + """ + Return the number of index time points per year. + + :param freq: string identifier of date frequency + :return: number of time points per year (approximate) + """ + # `pd.date_range` breaks for zero-period frequencies, so we need to work + # around that. + try: + # Leap years: 2012, 2016. + points_in_span = pd.date_range( + freq=freq, start="2012-01-01", end="2019-12-31" + ).size + span_in_years = 8 + points_per_year: float = points_in_span / span_in_years + return points_per_year + except ZeroDivisionError: + return 0.0 + + +# ############################################################################# + + +def infer_sampling_points_per_year(df: Union[pd.Series, pd.DataFrame]) -> float: + """ + Return the number of index time points per year. + + TODO(*): Consider extending to all frequencies and count points by + explicitly building indices of the given frequency. + + :param df: series or dataframe with non-null `df.index.freq` + :return: number of time points per year (approximate) + """ + hdbg.dassert(hasattr(df.index, "freq") and df.index.freq is not None) + freq = df.index.freq + # TODO(*): Make start, end dates parameters that can be passed in. + return compute_points_per_year_for_given_freq(freq) + + +def compute_count_per_year(df: Union[pd.Series, pd.DataFrame]) -> float: + """ + Return df.count() divided by the length of `df` in years. + """ + hdbg.dassert( + hasattr(df.index, "freq") and df.index.freq is not None, + msg="`df` must have a `DatetimeIndex` with a `freq`", + ) + assert hasattr(df.index, "freq") and df.index.freq is not None + freq = df.index.freq + # Calculate the time span of `df` in years. + points_per_year = compute_points_per_year_for_given_freq(freq) + span_in_years = df.size / points_per_year + # Determine the number of non-NaN/inf/etc. data points per year. + count_per_year = df.count() / span_in_years + count_per_year = cast(float, count_per_year) + return count_per_year + + +# ############################################################################# + + +def remove_duplicates( + df: pd.DataFrame, + duplicate_columns: Optional[List[str]], + control_column: Optional[str], +) -> pd.DataFrame: + """ + Remove duplicates from DataFrame. + + :param df: DataFrame to process + :param duplicate_columns: subset of column names, None for all + :param control_column: column max value of which determines the kept + row + :return: DataFrame with removed duplicates + """ + # Fix maximum value of control column at the bottom. + if control_column: + df = df.sort_values(by=control_column) + duplicate_columns = duplicate_columns or df.columns + df = df.drop_duplicates(subset=duplicate_columns) + # Sort by index to return to original view. + df = df.sort_index() + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py new file mode 100644 index 000000000..e63152593 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py @@ -0,0 +1,909 @@ +""" +Import as: + +import helpers.hdatetime as hdateti +""" + +import asyncio +import calendar +import datetime +import logging +import re +from typing import Callable, Iterable, Optional, Tuple, Union + +# TODO(gp): Use hdbg.WARNING +_WARNING = "\033[33mWARNING\033[0m" + +# Avoid dependency from other `helpers` modules to prevent import cycles. + +import pandas as pd # noqa: E402 # pylint: disable=wrong-import-position + +# TODO(gp): Check if dateutils is equivalent to `pytz` or better so we can simplify +# the dependencies. +try: + import pytz +except ModuleNotFoundError: + _module = "pytz" + print(_WARNING + f": Can't find {_module}: continuing") + + +import helpers.hdbg as hdbg # noqa: E402 # pylint: disable=wrong-import-position +import helpers.hprint as hprint # noqa: E402 # pylint: disable=wrong-import-position +import helpers.hwall_clock_time as hwacltim # noqa: E402 # pylint: disable=wrong-import-position + +_LOG = logging.getLogger(__name__) + +# We use the type `Datetime` to allow flexibility in the interface exposed to client. +# The typical pattern is: +# - we call `to_datetime()`, as soon as we enter functions exposed to users, +# to convert the user-provided datetime into a `datetime.datetime` +# - we use only `datetime.datetime` in the private interfaces +# TODO(gp): In practice we are using `pd.Timestamp` +# +# It's often worth to import this file even for just the type `Datetime`, +# since typically as soon as the caller uses this type, they also want to use +# `to_datetime()` and `dassert_*()` functions. +# TODO(gp): It would be better to call this `GeneralDateTime`, `FlexibleDateTime`, +# and rename `StrictDateTime` -> `DateTime`. +Datetime = Union[str, pd.Timestamp, datetime.datetime] + +# The type `StrictDateTime` is for stricter interfaces, although it is a bit of a +# compromise. +# Either one wants to allow everything that can be interpreted as a datetime (and +# then use `Datetime`), or strict (and then use only `datetime.datetime`). +StrictDatetime = Union[pd.Timestamp, datetime.datetime] + + +def dassert_is_datetime(datetime_: Datetime) -> None: + """ + Assert that `datetime_` is of type `Datetime`. + """ + hdbg.dassert_isinstance( + datetime_, + (str, pd.Timestamp, datetime.datetime), + "datetime_='%s' of type '%s' is not a DateTimeType", + datetime_, + str(type(datetime_)), + ) + + +def dassert_is_strict_datetime(datetime_: StrictDatetime) -> None: + """ + Assert that `datetime_` is of type `StrictDatetime`. + """ + hdbg.dassert_isinstance( + datetime_, + (pd.Timestamp, datetime.datetime), + "datetime_='%s' of type '%s' is not a StrictDateTimeType", + datetime_, + str(type(datetime_)), + ) + + +def dassert_str_is_date(date: str) -> None: + """ + Check if an input string is a date. + + :param date: date as string, e.g., "20221101" + """ + hdbg.dassert_isinstance(date, str) + try: + _ = datetime.datetime.strptime(date, "%Y%m%d") + except ValueError as e: + raise ValueError(f"date='{date}' doesn't have the right format: {e}") + + +# TODO(Grisha): also pass timezone. +def to_datetime(datetime_: Datetime) -> datetime.datetime: + """ + Convert a `Datetime` into a `datetime.datetime`. + + :return: tz-aware or naive datetime.datetime + """ + dassert_is_datetime(datetime_) + if isinstance(datetime_, str): + datetime_ = pd.Timestamp(datetime_) + if isinstance(datetime_, pd.Timestamp): + datetime_ = datetime_.to_pydatetime() + return datetime_ # type: ignore + + +def to_timestamp(datetime_: Datetime) -> pd.Timestamp: + """ + Convert a `Datetime` into a `pd.Timestamp`. + + :return: tz-aware or naive datetime.datetime + """ + dassert_is_datetime(datetime_) + timestamp = pd.Timestamp(datetime_) + return timestamp + + +# //////////////////////////////////////////////////////////////////////////////////O + + +def dassert_is_tz_naive(datetime_: StrictDatetime) -> None: + """ + Assert that the passed timestamp is tz-naive, i.e., doesn't have timezone + info. + """ + hdbg.dassert_is( + datetime_.tzinfo, None, "datetime_='%s' is not tz naive", datetime_ + ) + + +def dassert_has_tz(datetime_: StrictDatetime) -> None: + """ + Assert that the passed timestamp has timezone info. + """ + hdbg.dassert_is_not( + datetime_.tzinfo, + None, + "datetime_='%s' doesn't have timezone info", + datetime_, + ) + + +def dassert_has_specified_tz( + datetime_: StrictDatetime, tz_zones: Iterable[str] +) -> None: + """ + Assert that the passed timestamp has the timezone passed in `tz_zones`. + """ + # Make sure that the passed timestamp has timezone information. + dassert_has_tz(datetime_) + # Get the timezone. + tz_info = datetime_.tzinfo + # Unlike other timezones UTC is a `datetime.timezone` object not a + # `pytz.tzfile`. See CmTask5895 for details. + if ( + isinstance(tz_info, datetime.timezone) + and tz_info == datetime.timezone.utc + ): + tz_zone = "UTC" + else: + tz_zone = tz_info.zone # type: ignore + has_expected_tz = tz_zone in tz_zones + hdbg.dassert( + has_expected_tz, + "datetime_=%s (type=%s) tz_info=%s tz_info.zone=%s instead of tz_zones=%s", + datetime_, + type(datetime_), + tz_info, + tz_zone, + tz_zones, + ) + + +def dassert_has_UTC_tz(datetime_: StrictDatetime) -> None: + """ + Assert that the passed timestamp is UTC. + """ + tz_zones = (pytz.timezone("UTC").zone,) + dassert_has_specified_tz(datetime_, tz_zones) + + +def dassert_has_ET_tz(datetime_: StrictDatetime) -> None: + """ + Assert that the passed timestamp is Eastern Time (ET). + """ + tz_zones = ( + pytz.timezone("US/Eastern").zone, + pytz.timezone("America/New_York").zone, + ) + dassert_has_specified_tz(datetime_, tz_zones) + + +def dassert_tz_compatible( + datetime1: StrictDatetime, datetime2: StrictDatetime +) -> None: + """ + Assert that two timestamps are both naive or both have timezone info. + """ + dassert_is_strict_datetime(datetime1) + dassert_is_strict_datetime(datetime2) + has_tz1 = datetime1.tzinfo is not None + has_tz2 = datetime2.tzinfo is not None + hdbg.dassert_eq( + has_tz1, + has_tz2, + "datetime1='%s' and datetime2='%s' are not compatible", + str(datetime1), + str(datetime2), + ) + + +def dassert_have_same_tz( + datetime1: StrictDatetime, datetime2: StrictDatetime +) -> None: + """ + Assert that both timestamps have the same tz. + + The timezones are compared regardless of a DST mode. + """ + dassert_tz_compatible(datetime1, datetime2) + # Convert to string to remove DST mode info. + tz1_as_str = str(datetime1.tzinfo) + tz2_as_str = str(datetime2.tzinfo) + hdbg.dassert_eq( + tz1_as_str, + tz2_as_str, + "datetime1=%s (datetime1.tzinfo=%s) datetime2=%s (datetime2.tzinfo=%s) ", + datetime1, + tz1_as_str, + datetime2, + tz2_as_str, + ) + + +# TODO(gp): Replace this check with compatibility between series vs scalar. +# def dassert_srs_tz_compatible( +# def dassert_srs_has_tz +# def dassert_srs_is_tz_naive +def dassert_tz_compatible_timestamp_with_df( + datetime_: StrictDatetime, + df: pd.DataFrame, + col_name: Optional[str], +) -> None: + """ + Assert that timestamp and a df column are both naive or both have timezone + info. + + :param col_name: col_name. `None` represents the index. + """ + dassert_is_strict_datetime(datetime_) + hdbg.dassert_isinstance(df, pd.DataFrame) + if df.empty: + return + if col_name is None: + # We assume that the first element in the index is representative. + df_datetime = df.index[0] + else: + hdbg.dassert_in(col_name, df.columns) + df_datetime = df[col_name].iloc[0] + dassert_tz_compatible(df_datetime, datetime_) + + +# //////////////////////////////////////////////////////////////////////////////////O + + +def dassert_is_valid_timestamp(timestamp: Optional[pd.Timestamp]) -> None: + """ + Assert that a timestamp is `None` or a `pd.Timestamp` with timezone. + """ + if timestamp is not None: + hdbg.dassert_isinstance(timestamp, pd.Timestamp) + dassert_has_tz(timestamp) + + +def dassert_timestamp_lte( + start_timestamp: Optional[pd.Timestamp], + end_timestamp: Optional[pd.Timestamp], +) -> None: + dassert_is_valid_timestamp(start_timestamp) + dassert_is_valid_timestamp(end_timestamp) + if start_timestamp is not None and end_timestamp is not None: + hdbg.dassert_lte(start_timestamp, end_timestamp) + + +def dassert_timestamp_lt( + start_timestamp: Optional[pd.Timestamp], + end_timestamp: Optional[pd.Timestamp], +) -> None: + dassert_is_valid_timestamp(start_timestamp) + dassert_is_valid_timestamp(end_timestamp) + if start_timestamp is not None and end_timestamp is not None: + hdbg.dassert_lt(start_timestamp, end_timestamp) + + +def dassert_is_valid_interval( + start_timestamp: Optional[pd.Timestamp], + end_timestamp: Optional[pd.Timestamp], + left_close: bool, + right_close: bool, +) -> None: + """ + Assert that an interval has valid start and end timestamps. + """ + _LOG.debug( + hprint.to_str("start_timestamp end_timestamp left_close right_close") + ) + dassert_is_valid_timestamp(start_timestamp) + dassert_is_valid_timestamp(end_timestamp) + # Check the requested interval. + if start_timestamp is not None and end_timestamp is not None: + if left_close and right_close: + # If they are both closed, an interval like [a, a] makes sense, + # otherwise it doesn't. + hdbg.dassert_lte(start_timestamp, end_timestamp) + else: + hdbg.dassert_lt(start_timestamp, end_timestamp) + + +# ############################################################################# + + +def get_UTC_tz() -> datetime.tzinfo: + """ + Return the UTC timezone. + """ + return pytz.timezone("UTC") + + +def get_ET_tz() -> datetime.tzinfo: + """ + Return the US Eastern Time timezone. + """ + # TODO(Grisha): -> `US/Eastern`? + # It appears that "America/New_York" is to be preferred over "US/Eastern". + # https://www.iana.org/time-zones + # https://en.wikipedia.org/wiki/Tz_database + return pytz.timezone("America/New_York") + + +# Function returning the current (true, replayed, simulated) wall-clock time as a +# timestamp. +# TODO(gp): maybe GetWallClockTimeFunc is better to clarify that this is a function +# and not time. We often pass +GetWallClockTime = Callable[[], pd.Timestamp] + + +# TODO(gp): -> get_wall_clock_time +# TODO(gp): tz -> tz_mode since we are not passing neither a timezone or a +# timezone_as_str. +def get_current_time( + tz: str, + # TODO(gp): Add * + # *, + event_loop: Optional[asyncio.AbstractEventLoop] = None, +) -> pd.Timestamp: + """ + Return current time in UTC / ET timezone or as a naive time. + + This should be the only way to get the current wall-clock time, + since it handles both wall-clock time and "simulated" wall-clock + time through asyncio. + + :param tz: how to represent the returned time (e.g., "UTC", "ET", + "naive") + """ + if event_loop is not None: + # We accept only `hasyncio.EventLoop` here. If we are using standard asyncio + # EventLoop we rely on wall-clock time instead of `loop.time()`. + hdbg.dassert_isinstance(event_loop, asyncio.AbstractEventLoop) + hdbg.dassert(hasattr(event_loop, "get_current_time")) + timestamp = event_loop.get_current_time() + else: + # Use true real-time. + timestamp = datetime.datetime.utcnow() + # Convert it into the right + timestamp = pd.Timestamp(timestamp, tz=get_UTC_tz()) + if tz == "UTC": + pass + elif tz == "ET": + timestamp = timestamp.tz_convert(get_ET_tz()) + elif tz == "naive_UTC": + timestamp = timestamp.replace(tzinfo=None) + elif tz == "naive_ET": + timestamp = timestamp.tz_convert(get_ET_tz()) + timestamp = timestamp.replace(tzinfo=None) + else: + raise ValueError(f"Invalid tz='{tz}'") + return timestamp + + +def get_current_timestamp_as_string(tz: str) -> str: + """ + Return the current time in the format `YYYYMMDD_HHMMSS` (e.g., + 20210728_221734). + + Note that no information about the timezone is returned. Thus the + same time corresponds to `20210728_171749` for tz="ET" and + `20210728_221749` for tz="UTC". + """ + timestamp = get_current_time(tz) + ret = timestamp.strftime("%Y%m%d-%H%M%S") + return ret + + +def get_current_date_as_string(tz: str) -> str: + """ + Return the current date in the format `YYYYMMDD` (e.g., 20210728). + """ + timestamp = get_current_time(tz) + ret = timestamp.strftime("%Y%m%d") + return ret + + +# ############################################################################# +# Bar-related utilities +# ############################################################################# + + +def convert_seconds_to_minutes(num_secs: int) -> int: + hdbg.dassert_lt(0, num_secs) + hdbg.dassert_eq( + num_secs % 60, + 0, + "num_secs=%s is not an integer number of minutes", + num_secs, + ) + num_mins = int(num_secs / 60) + hdbg.dassert_lt(0, num_mins) + _LOG.debug(hprint.to_str("num_secs num_mins")) + return num_mins + + +# TODO(Dan): Unit test. +def convert_seconds_to_pandas_minutes(val: int) -> str: + """ + Convert a number of seconds to its Pandas delay representation in minutes. + + E.g. 300 -> '5T' + + :param val: number of seconds to convert + :return: Pandas delay representation + """ + res = convert_seconds_to_minutes(val) + res = f"{res}T" + return res + + +def convert_minutes_to_seconds(num_minutes: int) -> int: + """ + Convert minutes to seconds. + + E.g., 5 (minutes) -> 300 (seconds). + + :param num_minutes: the number of minutes to convert + :return: the number of seconds + """ + hdbg.dassert_isinstance(num_minutes, int) + hdbg.dassert_lt(0, num_minutes) + num_seconds = num_minutes * 60 + _LOG.debug(hprint.to_str("num_minutes num_seconds")) + return num_seconds + + +# TODO(gp): bar_duration_in_secs -> bar_{length,period}_in_secs +def find_bar_timestamp( + current_timestamp: pd.Timestamp, + bar_duration_in_secs: int, + *, + mode: str = "round", + max_distance_in_secs: int = 10, +) -> pd.Timestamp: + """ + Compute the bar (a, b] with period `bar_duration_in_secs` including + `current_timestamp`. + + :param current_timestamp: current timestamp + :param bar_duration_in_secs: bar duration in seconds + :param mode: how to compute the bar + - `round`: snap to the closest bar extreme + - `floor`: pick timestamp to the bar that includes it, returning the lower + bound. E.g., For `9:13am` and 5 mins bars returns `9:10am` + :param max_distance_in_secs: number of seconds representing the maximal distance + that it's allowed from the start of the bar + """ + _LOG.debug( + hprint.to_str( + "current_timestamp bar_duration_in_secs mode max_distance_in_secs" + ) + ) + hdbg.dassert_isinstance(current_timestamp, pd.Timestamp) + # Align. + reference_timestamp = f"{bar_duration_in_secs}S" + if mode == "round": + bar_timestamp = current_timestamp.round(reference_timestamp) + elif mode == "floor": + bar_timestamp = current_timestamp.floor(reference_timestamp) + hdbg.dassert_lte(bar_timestamp, current_timestamp) + else: + raise ValueError(f"Invalid mode='{mode}'") + _LOG.debug( + hprint.to_str("current_timestamp bar_duration_in_secs bar_timestamp") + ) + # Sanity check. + if mode == "round": + hdbg.dassert_lte(1, max_distance_in_secs) + if bar_timestamp >= current_timestamp: + distance_in_secs = (bar_timestamp - current_timestamp).seconds + else: + distance_in_secs = (current_timestamp - bar_timestamp).seconds + hdbg.dassert_lte(0, distance_in_secs) + hdbg.dassert_lte( + distance_in_secs, + max_distance_in_secs, + "current_timestamp=%s is too distant from bar_timestamp=%s", + current_timestamp, + bar_timestamp, + ) + _LOG.debug(hprint.to_str("bar_timestamp")) + return bar_timestamp + + +# This can't go in `helpers.hwall_clock_time` since it has a dependency from +# `find_bar_timestamp()` and might introduce an import loop. +def set_current_bar_timestamp( + current_timestamp: pd.Timestamp, + bar_duration_in_secs: int, +) -> None: + """ + Compute the current bar by snapping the current timestamp to the grid. + """ + mode = "round" + # E.g., `current_timestamp` is 09:26 and the next bar is at 09:30, so + # the distance is 4 minutes, i.e. max distance should be within a bar's + # length. + max_distance_in_secs = bar_duration_in_secs + bar_timestamp = find_bar_timestamp( + current_timestamp, + bar_duration_in_secs, + mode=mode, + max_distance_in_secs=max_distance_in_secs, + ) + _LOG.debug(hprint.to_str("current_timestamp bar_timestamp")) + hwacltim.set_current_bar_timestamp(bar_timestamp) + + +# ############################################################################# + + +def str_to_timestamp( + timestamp_as_str: str, tz: str, *, datetime_format: Optional[str] = None +) -> pd.Timestamp: + """ + Convert timestamp as string to `pd.Timestamp`. + + Localize input time to the specified timezone. + + E.g., `timestamp_as_str = "20230523_150513"`: + - `tz = "UTC"` -> "2023-05-23 15:05:13+0000" + - `tz = "US/Eastern"` -> "2023-05-23 15:05:13-0400" + + :param timestamp_as_str: string datetime (e.g., 20230523_150513) + :param tz: timezone info (e.g., "US/Eastern") + :param datetime_format: datetime format (e.g., %Y%m%d_%H%M%S) + If None, infer automatically + :return: pd.Timestamp with a specified timezone + """ + hdbg.dassert_isinstance(timestamp_as_str, str) + hdbg.dassert_isinstance(tz, str) + msg = "timestamp_as_str must be nonempty." + hdbg.dassert_is_not(timestamp_as_str, "", msg=msg) + _LOG.debug(hprint.to_str("timestamp_as_str tz datetime_format")) + if datetime_format is None: + # Try to infer the format automatically. + timestamp = pd.to_datetime(timestamp_as_str, infer_datetime_format=True) + else: + # Convert using the provided format. + timestamp = pd.to_datetime(timestamp_as_str, format=datetime_format) + # Convert to the specified timezone + timestamp = timestamp.tz_localize(tz) + return timestamp + + +def _handle_incorrect_conversions( + date: str, +) -> Optional[Tuple[Optional[str], Callable[[str], str]]]: + """ + Change data pre-processing for cases when `pd.to_datetime` is mistaken. + + :param date: string date + :return: date format and a function to apply to string dates before + passing them into `pd.to_datetime()` + """ + if len(date) in [7, 8]: + # "2021-M2" is transformed to '2020-01-01 00:00:01' by + # `pd.to_datetime`. + if date[:4].isdigit() and date[4] in ["-", ".", "/"] and date[5] == "M": + + def modify_monthly_date(x: str) -> str: + year_number = int(x[:4]) + month_number = x[6:] + num_days_in_month = calendar.monthrange( + year_number, int(month_number) + )[1] + modified_x = f"{x[:4]}-{month_number}-{num_days_in_month}" + return modified_x + + return "%Y-%m-%d", modify_monthly_date + return None + + +def _shift_to_period_end( # pylint: disable=too-many-return-statements + date: str, +) -> Optional[Callable[[StrictDatetime], StrictDatetime]]: + """ + Get function to shift the dates to the end of period. + + :param date: string date + :return: a function to shift the dates to the end of period. If `None`, no + shift is needed + """ + + def shift_to_month_end(x: StrictDatetime) -> StrictDatetime: + return x + pd.offsets.MonthEnd(0) + + def shift_to_quarter_end(x: StrictDatetime) -> StrictDatetime: + return x + pd.offsets.QuarterEnd(0) + + def shift_to_year_end(x: StrictDatetime) -> StrictDatetime: + return x + pd.offsets.YearEnd(0) + + if date[:4].isdigit(): + if len(date) == 7: + if date[5:].isdigit(): + # "2020-12" format. + return shift_to_month_end + if date[5] == "Q": + # "2021-Q1" format. + return shift_to_quarter_end + elif len(date) == 6: + # "2021Q1" format. + if date[4] == "Q": + return shift_to_quarter_end + elif len(date) == 4: + # "2021" format. + return shift_to_year_end + # "September 2020" or "Sep 2020" format. + # Get a flat list of month aliases. The full month name comes first. + # Since the `calendar` is using the natural month order, we need to + # shift the month aliases by one to get the correct order. + # E.g., `calendar.month_name[1:]` is `['January', 'February', ...]` and + # `calendar.month_abbr[1:]` is `['Jan', 'Feb', ...]`. + month_aliases = list(calendar.month_name[1:]) + list(calendar.month_abbr[1:]) + pattern = re.compile("|".join(month_aliases), re.IGNORECASE) + match = pattern.search(date) + if match is None: + return None + span = match.span() + date_without_month = f"{date[: span[0]]}{date[span[1] :]}".strip() + if len(date_without_month) == 4 and date_without_month.isdigit(): + return shift_to_month_end + return None + + +def _determine_date_format( + date: str, date_standard: Optional[str] = None +) -> Optional[Tuple[str, Callable[[str], str]]]: + """ + Determine date format for cases when `pd.to_datetime` fails. + + :param date: date string + :param date_standard: "standard" or "ISO_8601", `None` defaults to + "standard" + :return: date format and a function to transform date strings before + converting them to datetime using `pd.to_datetime` + """ + date_standard = date_standard or "standard" + if date_standard == "standard": + year_format = "%Y" + week_format = "%W" + day_of_week_format = "%w" + elif date_standard == "ISO_8601": + year_format = "%G" + week_format = "%V" + day_of_week_format = "%u" + else: + raise ValueError(f"Invalid `date_standard`='{date_standard}'") + # Determine format and original `date` modification function. + format_ = "" + if date[:4].isdigit(): + format_ += year_format + elif date[0] == "Q" and len(date) == 7 and date[-4:].isdigit(): + # "Q1 2020" format. + + def modify_quarterly_data(x: str) -> str: + year_number = x[-4:] + quarter = int(x[1:2]) + last_month_of_quarter = 3 * quarter + last_day_of_quarter = calendar.monthrange( + int(year_number), last_month_of_quarter + )[1] + modified_x = ( + f"{year_number}-{last_month_of_quarter}-{last_day_of_quarter}" + ) + return modified_x + + format_ = f"{year_format}-%m-%d" + return format_, modify_quarterly_data + else: + _LOG.error("This format is not supported: '%s'", date) + return None + next_char = date[4] + if next_char in ["-", ".", "/", " "]: + if len(date) not in [7, 8]: + _LOG.error("This format is not supported: '%s'", date) + return None + format_ += "-" + next_char = date[5] + if next_char == "W": + # "2020-W14" format. + + def modify_weekly_date(x: str) -> str: + x = re.sub(r"[//.\s]", "-", x) + return x + "-6" + + date_modification_func = modify_weekly_date + format_ += f"W{week_format}-{day_of_week_format}" + elif next_char == "S": + # "2020-S1" - semi-annual format. + def modify_semiannual_date(x: str) -> str: + x = re.sub(r"[//.\s]", "-", x) + return x.replace("S1", "06-30").replace("S2", "12-31") + + date_modification_func = modify_semiannual_date + format_ += "%m-%d" + elif next_char == "B": + # "2020-B1" - bi-monthly format (every other month). + # We'll index by the start of the month starting with January + # based on PiT. + + def modify_bimonthly_date(x: str) -> str: + x = re.sub(r"[//.\s]", "-", x) + bimonth_number = x[6] + month_number = int(bimonth_number) * 2 - 1 + modified_x = f"{x[:5]}{month_number}-01" + return modified_x + + date_modification_func = modify_bimonthly_date + format_ += "%m-%d" + else: + _LOG.error("This format is not supported: '%s'", date) + return None + elif next_char == "M" and len(date) == 7: + # "1959M01" format. + + def modify_monthly_date(x: str) -> str: + year_number = int(x[:4]) + month_number = x[5:] + num_days_in_month = calendar.monthrange( + year_number, int(month_number) + )[1] + modified_x = f"{x[:4]}-{month_number}-{num_days_in_month}" + return modified_x + + date_modification_func = modify_monthly_date + format_ += "-%m-%d" + else: + _LOG.error("This format is not supported: '%s'", date) + return None + return format_, date_modification_func + + +def to_generalized_datetime( + dates: Union[pd.Series, pd.Index], date_standard: Optional[str] = None +) -> Union[pd.Series, pd.Index]: + """ + Convert string dates to datetime. + + This works like `pd.to_datetime`, but supports more date formats and shifts + the dates to the end of period instead of the start. + + :param dates: series or index of dates to convert + :param date_standard: "standard" or "ISO_8601", `None` defaults to + "standard" + :return: datetime dates + """ + # This function doesn't deal with mixed formats. + hdbg.dassert_isinstance(dates, Iterable) + hdbg.dassert(not isinstance(dates, str)) + # Try converting to datetime using `pd.to_datetime`. + format_example_index = -1 + date_example = dates.tolist()[format_example_index] + format_fix = _handle_incorrect_conversions(date_example) + if format_fix is not None: + format_, date_modification_func = format_fix + dates = dates.map(date_modification_func) + date_example = dates.tolist()[format_example_index] + else: + format_ = None + datetime_dates = pd.to_datetime(dates, format=format_, errors="coerce") + # Shift to end of period if conversion has been successful. + # Handle both scalar and array cases for `pd.isna()`. + if hasattr(datetime_dates, "all"): + # datetime_dates is a Series or array-like + all_na = pd.isna(datetime_dates).all() + datetime_example = ( + datetime_dates.tolist()[format_example_index] + if hasattr(datetime_dates, "tolist") + else datetime_dates + ) + else: + # datetime_dates is a scalar + all_na = pd.isna(datetime_dates) + datetime_example = datetime_dates + if not all_na: + if ( + not pd.isna(datetime_example) + and hasattr(datetime_example, "strftime") + and datetime_example.strftime("%Y-%m-%d") == date_example + ): + return datetime_dates + shift_func = _shift_to_period_end(date_example) + if shift_func is not None: + if hasattr(datetime_dates, "map"): + datetime_dates = datetime_dates.map(shift_func) + else: + # For scalar case, apply the shift function directly + datetime_dates = shift_func(datetime_dates) + return datetime_dates + # If standard conversion fails, attempt our own conversion. + date_standard = date_standard or "standard" + format_determination_output = _determine_date_format( + date_example, date_standard + ) + if format_determination_output is None: + return datetime_dates + format_, date_modification_func = format_determination_output + dates = dates.map(date_modification_func) + return pd.to_datetime(dates, format=format_) + + +# ############################################################################# +# Unix to epoch conversion +# ############################################################################# + + +def convert_unix_epoch_to_timestamp( + epoch: int, unit: str = "ms", tz: str = "UTC" +) -> pd.Timestamp: + """ + Convert Unix epoch to timestamp. + + :param epoch: Unix time epoch + :param unit: epoch's time unit + :param tz: resulting timestamp timezone + :return: timestamp + """ + timestamp = pd.Timestamp(epoch, unit=unit, tz=tz) + return timestamp + + +def convert_timestamp_to_unix_epoch( + timestamp: pd.Timestamp, unit: str = "ms" +) -> int: + """ + Convert timestamp to Unix epoch. + + :param timestamp: timestamp + :param unit: epoch's time unit + :return: Unix time epoch + """ + # Make timestamp tz-naive if it is not. Converted to UTC tz before becoming + # naive automatically. + if timestamp.tz: + timestamp = timestamp.tz_convert(None) + # Convert to epoch. + epoch: int = (timestamp - pd.Timestamp("1970-01-01")) // pd.Timedelta( + "1" + unit + ) + return epoch + + +# TODO(Sameep): Reuse this function across the code base (`jackpy strftime`) when +# it doesn't make the import graph too complicated. +# TODO(gp): This seems redundant with get_timestamp() in `hwall_clock_time`. +def timestamp_to_str( + timestamp: pd.Timestamp, *, include_msec: bool = False +) -> str: + """ + Convert timestamp to string. + + :param timestamp: timestamp to convert + :param include_msec: whether to include milliseconds e.g. + `20230727_111057_123` + :return: timestamp in string format e.g. `20230727_111057`. + """ + hdbg.dassert_isinstance(timestamp, pd.Timestamp) + # Convert timestamp to string. + if include_msec: + # %f is the format code for microseconds. We truncate the last 3 digits + # to get milliseconds. + # This results in a string like "20230426_153042_123". + timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S_%f")[:-3] + else: + timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S") + return timestamp_str diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py new file mode 100644 index 000000000..a11dfb243 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py @@ -0,0 +1,1134 @@ +""" +Import as: + +import helpers.hdbg as hdbg +""" + +import functools +import logging +import os +import pprint +import sys +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union + +# This module can depend only on: +# - Python standard modules +# - `helpers/hserver.py` +# See `helpers/dependencies.txt` for more details + +_LOG = logging.getLogger(__name__) + + +# Enforce that certain warnings are disabled. +import helpers.hwarnings as hwarnin # # isort:skip # noqa: E402,F401,F403 # pylint: disable=unused-import + + +# TODO(gp): Make these generate from MAPPING below. +INFO = "\033[36mINFO\033[0m" +WARNING = "\033[33mWARNING\033[0m" +ERROR = "\033[31mERROR\033[0m" + + +# ############################################################################# +# dfatal. +# ############################################################################# + +# Copied from printing.py to avoid cyclical dependencies. + + +def _line(chars: str = "#", num_cols: int = 80) -> str: + line_ = chars * num_cols + "\n" + return line_ + + +def _frame(x: str, chars: str = "#", num_cols: int = 80) -> str: + """ + Return a string with a frame of num_cols chars around the object x. + + :param x: object to print through str() + :param num_cols: number + """ + line_ = _line(chars=chars, num_cols=num_cols) + ret = "" + ret += line_ + ret += str(x) + "\n" + ret += line_ + return ret + + +# End of copy. + + +def dfatal(message: str, assertion_type: Optional[Any] = None) -> None: + """ + Print an error message and exits. + """ + ret = "" + message = str(message) + ret = "\n" + _frame(message, "#", 80) + if assertion_type is None: + assertion_type = AssertionError + raise assertion_type(ret) + + +# ############################################################################# +# dassert. +# ############################################################################# + +# TODO(gp): Would be nice to have a way to disable the assertions in certain +# builds, or at least know how much time is spent in the assertions. +# To disable we could have a fake_dbg.py that has all `dassert_*`, `logging` +# defined as `lambda x: 0`. + + +# INVARIANTS: +# - `dassert_COND()` checks that COND is true, and raises if COND is False +# - For this reason the condition inside the `dassert` is typically in the form +# `if not (...):`, even this might annoy the linter or look weird +# - The parameter `only_warning` is to report a problem but keep going. +# This can be used (sparingly) for production when we want to be aware of +# certain conditions without aborting. + + +def _to_msg(msg: Optional[str], *args: Any) -> str: + """ + Format error message `msg` using the params in `args`, like `msg % args`. + """ + if msg is None: + # If there is no message, we should have no arguments to format. + assert not args, f"args={str(args)}" + res = "" + else: + try: + res = msg % args + except TypeError as e: + # The arguments didn't match the format string: report error and + # print the result somehow. + res = f"Caught assertion while formatting message:\n'{str(e)}'" + _LOG.warning(res) + res += "\n" + msg + " " + " ".join(map(str, args)) + # res = "(" + res + ") " + return res + + +def _dfatal( + txt: Union[str, Iterable[str]], + msg: Optional[str], + *args: Any, + only_warning: bool = False, +) -> None: + """ + Abort execution. + + :param only_warning: issue a warning instead of aborting + """ + dfatal_txt = "* Failed assertion *\n" + # TODO(gp): This should be an iterable. + if isinstance(txt, list): + dfatal_txt += "\n".join(txt) + else: + dfatal_txt += str(txt) + msg = _to_msg(msg, *args) + if msg: + if not dfatal_txt.endswith("\n"): + dfatal_txt += "\n" + dfatal_txt += msg + if only_warning: + # Only warn. + dfatal_txt += "\nContinuing as per user request with only_warning=True" + _LOG.warning(dfatal_txt) + else: + # Abort. + dfatal(dfatal_txt) + + +def dassert( + cond: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # Handle the somehow frequent case of using `dassert` instead of another + # one, e.g., `dassert(y, list)` + if msg is not None: + assert isinstance(msg, str), ( + f"You passed '{msg}' or type '{type(msg)}' instead of str" + ) + if not cond: + txt = f"cond={cond}" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_eq( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 == val2 + if not cond: + txt = f"'{val1}'\n==\n'{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_ne( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 != val2 + if not cond: + txt = f"'{val1}'\n!=\n'{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_imply( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = not val1 or val2 + if not cond: + txt = f"'{val1}' implies '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# Comparison related. +# ############################################################################# + + +def dassert_lt( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 < val2 + if not cond: + txt = f"{val1} < {val2}" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_lte( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 <= val2 + if not cond: + txt = f"{val1} <= {val2}" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_lgt( + lower_bound: float, + x: float, + upper_bound: float, + lower_bound_closed: bool, + upper_bound_closed: bool, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert that `lower_bound <= x <= upper_bound`. + + :param lower_bound_closed, upper_bound_closed: control the open- + ness/close-ness of the interval extremes. + """ + # `lower_bound <= or < x`. + if lower_bound_closed: + dassert_lte(lower_bound, x, msg, *args, only_warning=only_warning) + else: + dassert_lt(lower_bound, x, msg, *args, only_warning=only_warning) + # `x <= or < upper_bound`. + if upper_bound_closed: + dassert_lte(x, upper_bound, msg, *args, only_warning=only_warning) + else: + dassert_lt(x, upper_bound, msg, *args, only_warning=only_warning) + + +def dassert_is_proportion( + x: float, msg: Optional[str] = None, *args: Any, only_warning: bool = False +) -> None: + """ + Assert that `0 <= x <= 1`. + """ + lower_bound_closed = True + upper_bound_closed = True + dassert_lgt( + 0, + x, + 1, + lower_bound_closed, + upper_bound_closed, + msg, + *args, + only_warning=only_warning, + ) + + +# ############################################################################# +# Membership. +# ############################################################################# + + +def dassert_in( + value: Any, + valid_values: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = value in valid_values + if not cond: + txt = f"'{value}' in '{valid_values}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_not_in( + value: Any, + valid_values: Iterable[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = value not in valid_values + if not cond: + txt = f"'{value}' not in '{valid_values}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# Type related. +# ############################################################################# + + +def dassert_is( + val1: Optional[str], + val2: Optional[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 is val2 + if not cond: + txt = f"'{val1}' is '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_is_not( + val1: Any, + val2: Optional[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 is not val2 + if not cond: + txt = f"'{val1}' is not '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_type_is( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # pylint: disable=unidiomatic-typecheck + cond = type(val1) is val2 + if not cond: + txt = f"Type of '{val1}' is '{type(val1)}' instead of '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# TODO(gp): This is redundant with dassert_isinstance(..., (str, float)). +def dassert_type_in( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # pylint: disable=unidiomatic-typecheck + cond = type(val1) in val2 + if not cond: + txt = f"Type of '{val1}' is '{type(val1)}' not in '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_isinstance( + val1: Any, + val2: Union[type, Iterable[type]], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = isinstance(val1, val2) # type: ignore[arg-type] + if not cond: + txt = f"Instance of '{val1}' is '{type(val1)}' instead of '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_issubclass( + val1: Any, + val2: Union[type, Iterable[type]], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert that an object `val1` is a subclass of `val2`. + """ + cond = issubclass(val1.__class__, val2) # type: ignore[arg-type] + if not cond: + txt = ( + f"Instance '{str(val1)}' of class '{val1.__class__.__name__}' is " + f"not a subclass of '{val2}'" + ) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_is_integer( + val: Union[int, float], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert that val represents an integer number, independently of the type. + """ + if isinstance(val, int): + pass + elif isinstance(val, float): + cond = val == int(val) + if not cond: + txt = f"Invalid val='{val}' of type '{type(val)}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + else: + txt = f"Invalid val='{val}' of type '{type(val)}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_callable( + func: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert that an object `val1` is callable. + """ + cond = callable(func) + if not cond: + txt = f"Obj '{str(func)}' of type '{str(type(func))}' is not callable" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# Set related. +# ############################################################################# + + +# TODO(gp): A more general solution is to have a function that traverses an obj +# and creates a corresponding obj only with deterministic data structures (e.g., +# converting sets and dicts to sorted lists). Then we can print with `pprint`. +def _set_to_str(set_: Set[Any], thr: Optional[int] = 20) -> str: + """ + Return a string with the ordered content of a set. + + This is useful when printing assertions that we want to be deterministic (e.g., + if we use it inside unit tests like: + ``` + with self.assertRaises(AssertionError) as cm: + ... + actual = str(cm.exception) + expected = r + self.assert_equal(actual, expected, fuzzy_match=True) + ``` + """ + try: + list_ = sorted(list(set_)) + # If sets have less than `thr` elements print them as well, otherwise + # print the beginning / end. + if thr is not None and len(list_) > thr: + txt = f"{len(list_)} [{min(list_)}, ... {max(list_)}]" + else: + txt = str(list_) + except TypeError: + # Sometimes the set has elements of different types and we can't easily + # sort them. In these cases we just skip the sorting. + txt = str(list(set_)) + return txt + + +def dassert_set_eq( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val1` has the same elements as `val2`, raise otherwise. + + :param only_warning: issue a warning instead of aborting + """ + val1 = set(val1) + val2 = set(val2) + # pylint: disable=superfluous-parens + if not (val1 == val2): + txt = [] + txt.append("val1 - val2=" + _set_to_str(val1.difference(val2))) + txt.append("val2 - val1=" + _set_to_str(val2.difference(val1))) + txt.append("val1=" + _set_to_str(val1)) + txt.append("set eq") + txt.append("val2=" + _set_to_str(val2)) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# TODO(gp): -> dassert_issubset to match Python set function. +def dassert_is_subset( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val1` is a subset of `val2`, raise otherwise. + """ + val1 = set(val1) + val2 = set(val2) + if not val1.issubset(val2): + txt = [] + txt.append("val1=" + _set_to_str(val1)) + txt.append("issubset") + txt.append("val2=" + _set_to_str(val2)) + txt.append("val1 - val2=" + _set_to_str(val1.difference(val2))) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# TODO(gp): -> dassert_no_intersection to match other functions. +def dassert_not_intersection( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val1` has no intersection `val2`, raise otherwise. + """ + val1 = set(val1) + val2 = set(val2) + if val1.intersection(val2): + txt = [] + txt.append("val1=" + _set_to_str(val1)) + txt.append("has no intersection") + txt.append("val2=" + _set_to_str(val2)) + txt.append( + "val1.intersection(val2)=" + _set_to_str(val1.intersection(val2)) + ) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_is_iterable( + val: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val` is an iterable (excluding strings, bytes), raise otherwise. + """ + cond = isinstance(val, Iterable) and not isinstance( + val, (str, bytes, bytearray) + ) + if not cond: + txt = f"Val '{val}' of type '{type(val)}' is not an iterable" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# Array related. +# ############################################################################# + + +def dassert_no_duplicates( + val1: Iterable[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val1` has no duplicates, raise otherwise. + """ + dassert_is_iterable(val1) + cond = len(set(val1)) == len(val1) + if not cond: + # Count the occurrences of each element of the seq. + v_to_num = [(v, val1.count(v)) for v in set(val1)] + # Build list of elements with duplicates. + dups = [v for v, n in v_to_num if n > 1] + txt = [] + txt.append("val1=\n" + pprint.pformat(val1)) + txt.append("has duplicates") + txt.append(",".join(map(str, dups))) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_is_sorted( + val1: Union[List, Tuple], + sort_kwargs: Optional[Dict[Any, Any]] = None, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val` is sorted, raise otherwise. + """ + # TODO(gp): Extend for pd.Series using the proper method. + dassert_isinstance(val1, (list, tuple)) + sort_kwargs = {} if sort_kwargs is None else sort_kwargs + sorted_val1 = sorted(val1, **sort_kwargs) + cond = sorted_val1 == val1 + if not cond: + txt = [] + txt.append("val1=\n" + pprint.pformat(val1)) + txt.append("is not sorted") + txt.append("sorted(val1)=\n" + pprint.pformat(sorted_val1)) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_eq_all( + val1: Iterable[Any], + val2: Iterable[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that two iterables `val1` and `val2` are equal, raise otherwise. + """ + dassert_is_iterable(val1) + val1 = list(val1) + dassert_is_iterable(val2) + val2 = list(val2) + cond = val1 == val2 + if not cond: + # mask = val1 != val2 + txt = [] + txt.append(f"val1={len(val1)}\n{val1}") + txt.append(f"val2={len(val2)}\n{val2}") + # txt += "\ndiff=%s" % mask.sum() + # txt += "\n%s" % val1[mask] + # txt += "\n%s" % val2[mask] + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def _get_first_type(obj: Iterable, tag: str) -> Type: + obj_types = {type(v) for v in obj} + dassert_eq( + len(obj_types), + 1, + "More than one type for elem of %s=%s", + tag, + map(str, obj_types), + ) + return list(obj_types)[0] + + +# TODO(gp): IMO a bit overfit to the use case. Move this to the files that are +# using is. +def dassert_all_attributes_are_same( + list_: List[Any], + attribute_name: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check if all the elements in the list have the same attribute value. + + :param list_: list of objects + :param attribute_name: name of the attribute to check + """ + dassert_isinstance(list_, list) + dassert_isinstance(attribute_name, str) + attribute_values = [getattr(element, attribute_name) for element in list_] + if len(set(attribute_values)) != 1: + txt = [] + txt.append("Elements in the list have different values for ") + txt.append(f"attribute {attribute_name}:\n\t{set(attribute_values)}") + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_array_has_same_type_element( + obj1: Any, + obj2: Any, + only_first_elem: bool, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that two objects iterables like arrays (e.g., pd.Index) have elements + of the same type. + + :param only_first_elem: whether to check only the first element or + all the elements of the iterable. + """ + # Get the types to compare. + if only_first_elem: + obj1_first_type = type(obj1[0]) + obj2_first_type = type(obj2[0]) + else: + obj1_first_type = _get_first_type(obj1, "obj1") + obj2_first_type = _get_first_type(obj2, "obj2") + # + if obj1_first_type != obj2_first_type: + txt = [] + num_elems = 5 + txt.append(f"obj1=\n{obj1[:num_elems]}") + txt.append(f"obj2=\n{obj2[:num_elems]}") + txt.append( + f"type(obj1)='{obj1_first_type}' is different from type(obj2)='{obj2_first_type}'" + ) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_container_type( + obj: Any, + container_type: Optional[Any], + elem_type: Optional[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert `obj` is a certain type of container containing certain type of + objects. + + E.g., `obj` is a list of strings. + """ + # Add information about the obj. + if not msg: + msg = "" + msg = msg.rstrip("\n") + f"\nobj='{str(obj)}'" + # Check container. + if container_type is not None: + dassert_isinstance( + obj, container_type, msg, *args, only_warning=only_warning + ) + # Check the elements of the container. + if elem_type is not None: + for elem in obj: + dassert_isinstance( + elem, elem_type, msg, *args, only_warning=only_warning + ) + + +# TODO(gp): @all Replace calls to this with calls to `dassert_container_type()`. +def dassert_list_of_strings( + list_: List[str], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # TODO(gp): Allow iterable? + dassert_isinstance(list_, list, msg, *args, only_warning=only_warning) + for elem in list_: + dassert_isinstance(elem, str, msg, *args, only_warning=only_warning) + + +def dassert_all_defined_or_all_None( + vals: List[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that all the values in a list are either all defined or all None. + """ + all_defined_cond = all(val is not None for val in vals) + all_none_cond = all(val is None for val in vals) + cond = all_defined_cond or all_none_cond + if not cond: + txt = f"Some values in list are defined and some are None: '{vals}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# File related. +# ############################################################################# + + +def dassert_path_exists( + path: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + dassert_isinstance(path, str) + path = os.path.abspath(path) + if not os.path.exists(path): + txt = f"Path '{path}' doesn't exist!" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_path_not_exists( + path: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + dassert_isinstance(path, str) + dassert_ne(path, "") + path = os.path.abspath(path) + if os.path.exists(path): + txt = f"Path '{path}' already exist!" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_file_exists( + file_name: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert unless `file_name` exists and it's a file and not a directory. + """ + dassert_isinstance(file_name, str) + dassert_ne(file_name, "") + file_name = os.path.abspath(file_name) + # `file_name` exists. + exists = os.path.exists(file_name) + if not exists: + txt = f"File '{file_name}' doesn't exist" + _dfatal(txt, msg, *args, only_warning=only_warning) + # `file_name` is a file. + is_file = os.path.isfile(file_name) + if not is_file: + txt = f"'{file_name}' is not a file" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_dir_exists( + dir_name: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert unless `dir_name` exists and it's a directory. + """ + dassert_isinstance(dir_name, str) + dassert_ne(dir_name, "") + dir_name = os.path.abspath(dir_name) + # `dir_name` exists. + exists = os.path.exists(dir_name) + if not exists: + txt = f"Dir '{dir_name}' doesn't exist" + _dfatal(txt, msg, *args, only_warning=only_warning) + # `dir_name` is a directory. + is_dir = os.path.isdir(dir_name) + if not is_dir: + txt = f"'{dir_name}' is not a dir" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_file_extension( + file_name: str, + extensions: Union[str, List[str]], + only_warning: bool = False, +) -> None: + """ + Ensure that file has one of the given extensions. + + :param extensions: don't need to start with `.`, e.g., use `csv` instead of + `.csv` + """ + # Handle single extension case. + if isinstance(extensions, str): + extensions = [extensions] + # Make sure extension starts with . + extensions = [f".{e}" if not e.startswith(".") else e for e in extensions] + # Check. + name, act_ext = os.path.splitext(file_name) + if act_ext == ".gz": + # Concatenate with the preceding extension, e.g., `.csv.gz`. + ext = os.path.splitext(name)[-1] + act_ext = (ext + act_ext).lower() + dassert_in( + act_ext, + extensions, + "Invalid extension '%s' for file '%s'", + act_ext, + file_name, + only_warning=only_warning, + ) + + +def dassert_is_path_abs(path: str, only_warning: bool = False) -> None: + """ + Assert that `path` is an absolute path. + """ + dassert_isinstance(path, str) + dassert_ne(path, "") + dassert( + os.path.isabs(path), + "Path '%s' is not absolute", + path, + only_warning=only_warning, + ) + + +def dassert_related_params( + params: Dict[str, Any], + mode: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check whether `params` have a certain relationship. + + :params params: dictionary of parameter name, value + :params mode: + - `all_or_none_non_null`: either all params are null (i.e., `bool` evaluate + to false) or are non-null + - `all_or_none_non_None`: either all params are None or all params are not + None. This is useful when passing set of params that are optional + """ + # TODO(gp): Allow iterable? + dassert_isinstance(params, dict, msg, *args, only_warning=only_warning) + if mode == "all_or_none_non_null": + # Find out if at least one value is set. + is_non_null = map(bool, params.values()) + one_is_non_null = functools.reduce(lambda x, y: x or y, is_non_null) + for k, v in params.items(): + if bool(v) != one_is_non_null: + txt = f"All or none parameter should be non-null:\n{k}={v}\nparams={pprint.pformat(params)}\n" + _dfatal(txt, msg, *args, only_warning=only_warning) + elif mode == "all_or_none_non_None": + # Find out if at least one value is not None. + is_non_None = map(lambda x: x is not None, params.values()) + one_is_non_None = functools.reduce(lambda x, y: x or y, is_non_None) + for k, v in params.items(): + if (v is not None) != one_is_non_None: + txt = f"All or none parameter should be non-None:\n{k}={v}\nparams={pprint.pformat(params)}\n" + _dfatal(txt, msg, *args, only_warning=only_warning) + else: + raise ValueError(f"Invalid mode='{mode}'") + + +# ############################################################################# +# Command line. +# ############################################################################# + + +# Sample at the beginning of time before we start fiddling with command line +# args. +_CMD_LINE = " ".join(arg for arg in sys.argv) + + +def get_command_line() -> str: + return _CMD_LINE + + +# ############################################################################# +# Logger. +# ############################################################################# + + +# TODO(gp): Move this to helpers/hlogging.py and change all the callers. + + +# TODO(gp): maybe replace "force_verbose_format" and "force_print_format" with +# a "mode" in ("auto", "verbose", "print") +def init_logger( + verbosity: int = logging.INFO, + use_exec_path: bool = False, + log_filename: Optional[str] = None, + force_verbose_format: bool = False, + force_print_format: bool = False, + force_white: bool = True, + force_no_warning: bool = False, + in_pytest: bool = False, + report_memory_usage: bool = False, + report_cpu_usage: bool = False, + report_command_line: bool = True, +) -> None: + """ + Send stderr and stdout to logging (optionally teeing the logs to file). + + - Note that: + - logging.DEBUG = 10 + - logging.INFO = 20 + + :param verbosity: verbosity to use + :param use_exec_path: use the name of the executable + :param log_filename: log to that file + :param force_verbose_format: use the verbose format for the logging + :param force_print_format: use the print format for the logging + :param force_white: use white color for printing. This can pollute the + output of a script when redirected to file with echo characters + :param in_pytest: True when we are running through pytest, so that we + can overwrite the default logger from pytest + :param report_memory_usage: turn on reporting memory usage + :param report_cpu_usage: turn on reporting CPU usage + :param report_command_line: turn on reporting command line + """ + # Try to minimize dependencies. + import helpers.hlogging as hloggin + + # TODO(gp): Print the stacktrace every time is called. + if force_white: + sys.stdout.write("\033[0m") + if isinstance(verbosity, str): + # pylint: disable=protected-access + dassert(hasattr(logging, "_checkLevel")) + assert hasattr(logging, "_checkLevel") + verbosity = logging._checkLevel(verbosity) + # From https://stackoverflow.com/questions/14058453 + root_logger = logging.getLogger() + # Set verbosity for all loggers. + root_logger.setLevel(verbosity) + # if False: + # eff_level = root_logger.getEffectiveLevel() + # print( + # "effective level= %s (%s)" + # % (eff_level, logging.getLevelName(eff_level)) + # ) + # if False: + # # dassert_eq(root_logger.getEffectiveLevel(), verbosity) + # for handler in root_logger.handlers: + # handler.setLevel(verbosity) + # Exit to avoid to replicate the same output multiple times. + if not in_pytest and root_logger.handlers: + print(WARNING + ": Logger already initialized: skipping") + if False: + # Print info about the caller. + import traceback + + traceback.print_stack() + return + # + ch = logging.StreamHandler(sys.stdout) + ch.setLevel(verbosity) + # Set the formatter. + # formatter = hloggin.set_v1_formatter( + dassert(hasattr(hloggin, "set_v2_formatter")) + assert hasattr(hloggin, "set_v2_formatter") + formatter = hloggin.set_v2_formatter( + ch, + root_logger, + force_no_warning, + force_print_format, + force_verbose_format, + report_memory_usage, + report_cpu_usage, + ) + # Find name of the log file. + if use_exec_path and log_filename is None: + dassert_is(log_filename, None, msg="Can't specify conflicting filenames") + # Use the name of the executable. + import inspect + + frame = inspect.stack()[1] + module = inspect.getmodule(frame[0]) + if not hasattr(module, __file__): + if module is None: + filename = "none" + else: + filename = str(module.__file__) + else: + filename = "unknown_module" + log_filename = os.path.realpath(filename) + ".log" + # Handle teeing to a file. + if log_filename: + # Create a dir (and all its missing parent dirs) if it doesn't exist. + log_dirname = os.path.dirname(log_filename) + if log_dirname != "" and not os.path.exists(log_dirname): + os.makedirs(log_dirname) + # Delete the file since we don't want to append. + if os.path.exists(log_filename): + try: + os.unlink(log_filename) + except FileNotFoundError as e: + print(e) + # Tee to file. + file_handler = logging.FileHandler(log_filename) + root_logger.addHandler(file_handler) + file_handler.setFormatter(formatter) + # + _LOG.info("Saving log to file '%s'", log_filename) + # + _LOG.debug("Effective logging level=%s", _LOG.getEffectiveLevel()) + # Shut up chatty modules. + dassert(hasattr(hloggin, "shutup_chatty_modules")) + assert hasattr(hloggin, "shutup_chatty_modules") + hloggin.shutup_chatty_modules(verbose=False) + if report_command_line: + _LOG.info("> cmd='%s'", get_command_line()) + # + # test_logger() + + +def set_logger_verbosity( + verbosity: int, module_name: Optional[str] = None +) -> None: + """ + Change the verbosity of the logging after the initialization. + + Passing a module_name (e.g., matplotlib) one can change the logging + of that specific module. + + E.g., set_logger_verbosity(logging.WARNING, "matplotlib") + """ + logger = logging.getLogger(module_name) + if module_name is None and not logger.handlers: + assert 0, "ERROR: Logger not initialized" + logger.setLevel(verbosity) + eff_level = logger.getEffectiveLevel() + print(f"effective level= {eff_level} ({logging.getLevelName(eff_level)})") + dassert_eq(logger.getEffectiveLevel(), verbosity) + + +def get_logger_verbosity() -> int: + root_logger = logging.getLogger() + if not root_logger.handlers: + assert 0, "ERROR: Logger not initialized" + return root_logger.getEffectiveLevel() + + +# ############################################################################# +# Command line. +# ############################################################################# + + +# Sample at the beginning of time before we start fiddling with command line +# args. +_CMD_LINE = " ".join(arg for arg in sys.argv) +_EXEC_NAME = os.path.abspath(sys.argv[0]) + + +def get_command_line() -> str: + return _CMD_LINE + + +def get_exec_name() -> str: + return _EXEC_NAME diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py new file mode 100644 index 000000000..13d388249 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py @@ -0,0 +1,119 @@ +""" +Import as: + +import helpers.hdict as hdict +""" + +import logging +from typing import ( + Any, + Dict, + Generator, + Iterable, + Mapping, + Optional, + Tuple, + Union, +) + +try: + from collections.abc import Mapping as AbcMapping +except ImportError: + from collections import Mapping as AbcMapping + +import helpers.hdbg as hdbg + +_LOG = logging.getLogger(__name__) + + +def get_nested_dict_iterator( + nested: Mapping[Any, Any], + path: Optional[Iterable[Any]] = None, +) -> Generator[Tuple[Tuple, Any], None, None]: + """ + Return nested mapping iterator that iterates in a depth-first fashion. + + :param nested: nested dictionary + :param path: path to node to start the visit from or `None` to start from + the root + :return: path to leaf node, value + """ + if path is None: + path = [] + if not isinstance(path, tuple): + path = tuple(path) + if not nested.items(): + yield path, nested + for key, value in nested.items(): + local_path = path + (key,) + if isinstance(value, AbcMapping): + yield from get_nested_dict_iterator(value, local_path) + else: + yield local_path, value + + +def extract_leaf_values(nested: Dict[Any, Any], key: Any) -> Dict[Any, Any]: + """ + Extract leaf values with key matching `key`. + + :param nested: nested dictionary + :param key: leaf key value to match + :return: dict with key = path as tuple, value = leaf value + """ + d = {} + for k, v in get_nested_dict_iterator(nested): + if k[-1] == key: + d[k] = v + return d + + +_NO_VALUE_SPECIFIED = "__NO_VALUE_SPECIFIED__" + + +def typed_get( + dict_: Union[Dict, "Config"], # noqa: F821 + key: Any, + default_value: Optional[Any] = _NO_VALUE_SPECIFIED, + *, + expected_type: Optional[Any] = None, +) -> Any: + """ + Equivalent to `dict.get(key, default_val)` and check the type of the + output. + + :param default_value: default value to return if key is not in `config` + :param expected_type: expected type of `value` + :return: config[key] if available, else `default_value` + """ + hdbg.dassert_isinstance(dict_, dict) + if default_value == _NO_VALUE_SPECIFIED: + # No value is specified so check that the key is present with dassert_in + # to report a decent error. + hdbg.dassert_in(key, dict_) + try: + ret = dict_.__getitem__(key) + except KeyError as e: + # No key: use the default val if it was passed or asserts. + _LOG.debug("e=%s", e) + # We can't use None since None can be a valid default value, so we use + # another value. + if default_value != _NO_VALUE_SPECIFIED: + ret = default_value + else: + # No default value found, then raise. + raise e + if expected_type is not None: + hdbg.dassert_isinstance(ret, expected_type) + return ret + + +def checked_get( + dict_: Dict, + key: Any, +) -> Any: + """ + Ensure that the key exists and print a decent error message in case of + error, instead of a generic `TypeError`. + """ + hdbg.dassert_in(key, dict_) + return dict_[key] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py new file mode 100644 index 000000000..44f973a89 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py @@ -0,0 +1,871 @@ +""" +Import as: + +import helpers.hdocker as hdocker +""" + +import argparse +import copy +import hashlib +import logging +import os +import platform +import subprocess +import time +from typing import List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.henv as henv +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Docker utilities +# ############################################################################# + + +# TODO(gp): This is a function of the architecture. Move to the repo_config.py +# or the config file. +def get_use_sudo() -> bool: + """ + Check if Docker commands should be run with sudo. + + :return: Whether to use sudo for Docker commands. + """ + use_sudo = False + # if hserver.is_inside_docker(): + # use_sudo = True + return use_sudo + + +# TODO(gp): use_sudo should be set to None and the correct value inferred from +# the repo config. +def get_docker_executable(use_sudo: bool) -> str: + """ + Get the Docker executable with / without sudo, if needed. + """ + executable = "sudo " if use_sudo else "" + executable += "docker" + return executable + + +def process_docker_cmd( + docker_cmd: str, container_image: str, dockerfile: str, mode: str +) -> str: + """ + Process a Docker command according to the mode. + + :param docker_cmd: The Docker command to process. + :param container_image: The name of the Docker container. + :param dockerfile: The content of the Dockerfile. + :param mode: The mode to process the Docker command. + - "return_cmd": return the command as is. + - "system": execute the command. + - "save_to_file": save the command to a file. + :return: The output of the Docker command. + """ + _LOG.debug(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(docker_cmd, str) + hdbg.dassert_isinstance(container_image, str) + hdbg.dassert_isinstance(dockerfile, str) + if mode == "return_cmd": + ret = docker_cmd + elif mode == "system": + # TODO(gp): Note that `suppress_output=False` seems to hang the call. + hsystem.system(docker_cmd, suppress_output=False) + ret = "" + elif mode == "system_without_output": + hsystem.system(docker_cmd, suppress_output=True) + ret = "" + elif mode == "save_to_file": + file_name = f"tmp.process_docker_cmd.{container_image}.txt" + txt = [] + txt.append(f"docker_cmd={docker_cmd}") + txt.append(f"container_image={container_image}") + txt.append(f"dockerfile={dockerfile}") + txt = "\n".join(txt) + hio.to_file(file_name, txt) + ret = "" + else: + raise ValueError(f"Invalid mode='{mode}'") + return ret + + +def container_exists(container_name: str, use_sudo: bool) -> Tuple[bool, str]: + """ + Check if a Docker container is running by executing a command like: + + ``` + > docker container ls --filter=tmp.prettier -aq + aed8a5ce33a9 + ``` + """ + _LOG.debug(hprint.func_signature_to_str()) + # + executable = get_docker_executable(use_sudo) + cmd = f"{executable} container ls --filter name=/{container_name} -aq" + _, container_id = hsystem.system_to_one_line(cmd) + container_id = container_id.rstrip("\n") + exists = container_id != "" + _LOG.debug(hprint.to_str("exists container_id")) + return exists, container_id + + +def image_exists(image_name: str, use_sudo: bool) -> Tuple[bool, str]: + """ + Check if a Docker image already exists by executing a command like: + + ``` + > docker images tmp.prettier -aq + aed8a5ce33a9 + ``` + """ + _LOG.debug(hprint.func_signature_to_str()) + # + executable = get_docker_executable(use_sudo) + cmd = f"{executable} image ls --filter reference={image_name} -q" + _, image_id = hsystem.system_to_one_line(cmd) + image_id = image_id.rstrip("\n") + exists = image_id != "" + _LOG.debug(hprint.to_str("exists image_id")) + return exists, image_id + + +def container_rm(container_name: str, use_sudo: bool) -> None: + """ + Remove a Docker container by its name. + + :param container_name: Name of the Docker container to remove. + :param use_sudo: Whether to use sudo for Docker commands. + :raises AssertionError: If the container ID is not found. + """ + _LOG.debug(hprint.func_signature_to_str()) + # + executable = get_docker_executable(use_sudo) + # Find the container ID from the name. + # Docker filter refers to container names using a leading `/`. + cmd = f"{executable} container ls --filter name=/{container_name} -aq" + _, container_id = hsystem.system_to_one_line(cmd) + container_id = container_id.rstrip("\n") + hdbg.dassert_ne(container_id, "") + # Delete the container. + _LOG.debug(hprint.to_str("container_id")) + cmd = f"{executable} container rm --force {container_id}" + hsystem.system(cmd) + _LOG.debug("docker container '%s' deleted", container_name) + + +def volume_rm(volume_name: str, use_sudo: bool) -> None: + """ + Remove a Docker volume by its name. + + :param volume_name: Name of the Docker volume to remove. + :param use_sudo: Whether to use sudo for Docker commands. + """ + _LOG.debug(hprint.func_signature_to_str()) + # + executable = get_docker_executable(use_sudo) + cmd = f"{executable} volume rm {volume_name}" + hsystem.system(cmd) + _LOG.debug("docker volume '%s' deleted", volume_name) + + +# ############################################################################# + + +def get_current_arch() -> str: + """ + Return the architecture that we are running on (e.g., arm64, aarch64, + x86_64). + """ + cmd = "uname -m" + _, current_arch = hsystem.system_to_one_line(cmd) + _LOG.debug(hprint.to_str("current_arch")) + return current_arch + + +def _is_compatible_arch(val1: str, val2: str) -> bool: + valid_arch = ["x86_64", "amd64", "aarch64", "arm64"] + hdbg.dassert_in(val1, valid_arch) + hdbg.dassert_in(val2, valid_arch) + if val1 == val2: + return True + compatible_sets = [{"x86_64", "amd64"}, {"aarch64", "arm64"}] + for comp_set in compatible_sets: + if {val1, val2}.issubset(comp_set): + return True + return False + + +def check_image_compatibility_with_current_arch( + image_name: str, + *, + use_sudo: Optional[bool] = None, + pull_image_if_needed: bool = True, + assert_on_error: bool = True, +) -> None: + """ + Check if the Docker image is compatible with the current architecture. + + :param image_name: Name of the Docker image to check. + :param use_sudo: Whether to use sudo for Docker commands. + :param pull_image_if_needed: Whether to pull the image if it doesn't + exist. + :param assert_on_error: Whether to raise an error if the image is + not compatible with the current architecture. + """ + _LOG.debug(hprint.func_signature_to_str()) + hdbg.dassert_ne(image_name, "") + if use_sudo is None: + use_sudo = get_use_sudo() + # Get the architecture that we are running on. + current_arch = get_current_arch() + # > docker image inspect \ + # 623860924167.dkr.ecr.eu-north-1.amazonaws.com/helpers:local-saggese-1.1.0 \ + # --format '{{.Architecture}}' + # arm64 + # Check and pull the image if needed. + has_image, _ = image_exists(image_name, use_sudo) + if not has_image: + _LOG.warning("Image '%s' not found: trying to pull it", image_name) + if pull_image_if_needed: + cmd = f"docker pull {image_name}" + hsystem.system(cmd) + else: + hdbg.dfatal("Image '%s' not found", image_name) + # Check the image architecture. + executable = get_docker_executable(use_sudo) + cmd = f"{executable} inspect {image_name}" + r" --format '{{.Architecture}}'" + _, image_arch = hsystem.system_to_one_line(cmd) + _LOG.debug(hprint.to_str("image_arch")) + # Check architecture compatibility. + if not _is_compatible_arch(current_arch, image_arch): + msg = f"Running architecture '{current_arch}' != image architecture '{image_arch}'" + if assert_on_error: + hdbg.dfatal(msg) + else: + _LOG.warning(msg) + _LOG.debug( + "Running architecture '%s' and image architecture '%s' are compatible", + current_arch, + image_arch, + ) + + +# ############################################################################# + + +def wait_for_file_in_docker( + container_id: str, + docker_file_path: str, + out_file_path: str, + *, + check_interval_in_secs: float = 0.5, + timeout_in_secs: int = 10, +) -> None: + """ + Wait for a file to be generated inside a Docker container and copy it to + the host. + + This function periodically checks for the existence of a file inside + a Docker container. Once the file is found, it copies the file to + the specified output path on the host. + + :param container_id: ID of the Docker container. + :param docker_file_path: Path to the file inside the Docker + container. + :param out_file_path: Path to copy the file to on the host. + :param check_interval_in_secs: Time in seconds between checks. + :param timeout_in_secs: Maximum time to wait for the file in + seconds. + :raises ValueError: If the file is not found within the timeout + period. + """ + _LOG.debug("Waiting for file: %s:%s", container_id, docker_file_path) + start_time = time.time() + while not os.path.exists(out_file_path): + cmd = f"docker cp {container_id}:{docker_file_path} {out_file_path}" + hsystem.system(cmd) + if time.time() - start_time > timeout_in_secs: + raise ValueError( + "Timeout reached. File not found: " + f"{container_id}:{docker_file_path}" + ) + time.sleep(check_interval_in_secs) + _LOG.debug("File generated: %s", out_file_path) + + +def replace_shared_root_path( + path: str, *, replace_ecs_tokyo: Optional[bool] = False +) -> str: + """ + Replace root path of the shared directory based on the mapping. + + :param path: path to replace, e.g., `/data/shared` + :param replace_ecs_tokyo: if True replace `ecs_tokyo` to `ecs` in the path + :return: replaced shared data dir root path, e.g., + - `/data/shared/ecs_tokyo/.../20240522_173000.20240522_182500/` -> + `/shared_data/ecs/.../20240522_173000.20240522_182500/` + - `/data/shared/ecs/.../20240522_173000.20240522_182500` -> + `/shared_data/ecs/.../20240522_173000.20240522_182500` + """ + # Inside ECS, we keep the original shared data path and replace it only when + # running inside Docker on the dev server. + if hserver.is_inside_docker() and not hserver.is_inside_ecs_container(): + shared_data_dirs = hserver.get_shared_data_dirs() + if shared_data_dirs is not None: + if replace_ecs_tokyo: + # Make a copy to avoid modifying the original one. + shared_data_dirs = copy.deepcopy(shared_data_dirs) + shared_data_dirs["ecs_tokyo"] = "ecs" + for shared_dir, docker_shared_dir in shared_data_dirs.items(): + path = path.replace(shared_dir, docker_shared_dir) + _LOG.debug( + "Running inside Docker on the dev server, thus replacing %s " + "with %s", + shared_dir, + docker_shared_dir, + ) + else: + _LOG.debug("No replacement found, returning path as-is: %s", path) + return path + + +# ############################################################################# +# Dockerized executable utils. +# ############################################################################# + +# See `docs/tools/docker/all.dockerized_flow.explanation.md` for details +# about the Dockerized flow. + + +def get_docker_base_cmd(use_sudo: bool) -> List[str]: + """ + Get the base command for running a Docker container. + + E.g., + ``` + docker run --rm --user $(id -u):$(id -g) \ + -e CSFY_AWS_PROFILE -e CSFY_ECR_BASE_PATH \ + ... + -e OPENAI_API_KEY + ``` + + :param use_sudo: Whether to use sudo for Docker commands. + :return: The base command for running a Docker container. + """ + docker_executable = get_docker_executable(use_sudo) + # Get the env vars to pass to the Docker container. + vars_to_pass = henv.get_csfy_env_vars() + henv.get_api_key_env_vars() + vars_to_pass = sorted(vars_to_pass) + vars_to_pass_as_str = " ".join(f"-e {v}" for v in vars_to_pass) + # Build the command as a list. + docker_cmd = [ + docker_executable, + "run --rm", + "--user $(id -u):$(id -g)", + vars_to_pass_as_str, + ] + # Handle coverage. + # TODO(gp): Is this env var standard, or should it be + # CSFY_COVERAGE_PROCESS_START? + # if os.environ.get("COVERAGE_PROCESS_START"): + # _LOG.debug("Enabling coverage") + # host_cov_dir = os.path.abspath("coverage_data") + # # TODO(gp): Use `hio.create_dir()` instead. + # os.makedirs(host_cov_dir, exist_ok=True) + # os.chmod(host_cov_dir, 0o777) + # coverage_dir_container = "/app/coverage_data" + # docker_cmd.extend( + # [ + # f"-e COVERAGE_FILE={coverage_dir_container}/.coverage", + # f"-e COVERAGE_PROCESS_START={coverage_dir_container}/.coveragerc", + # f"-v {host_cov_dir}:{coverage_dir_container}", + # ] + # ) + return docker_cmd + + +def get_container_image_name( + image_name: str, dockerfile: str +) -> Tuple[str, str]: + """ + Get the name of the container image. + + :param image_name: Name of the Docker container to build. + :param dockerfile: Content of the Dockerfile for building the + container. + :return: Name of the container image. + """ + _LOG.debug(hprint.func_signature_to_str("image_name dockerfile")) + hdbg.dassert_ne(image_name, "") + hdbg.dassert_ne(dockerfile, "") + dockerfile = hprint.dedent(dockerfile) + # if os.environ.get("COVERAGE_PROCESS_START"): + # _LOG.debug("Enabling coverage") + # # Check if this is a Python-based Dockerfile. + # if any( + # keyword in dockerfile.lower() + # for keyword in ["python", "pip", "python3"] + # ): + # coverage_dockerfile = hcovera.generate_coverage_dockerfile() + # _LOG.debug("Coverage Dockerfile content:\n%s", coverage_dockerfile) + # dockerfile = dockerfile.strip() + "\n" + coverage_dockerfile + # _LOG.debug("Coverage support added to Dockerfile") + # else: + # _LOG.warning( + # "Skipping coverage addition - not a Python-based Dockerfile" + # ) + _LOG.debug("Final Dockerfile:\n%s", dockerfile) + # Get the current architecture. + current_arch = get_current_arch() + sha256_hash = hashlib.sha256(dockerfile.encode()).hexdigest() + short_hash = sha256_hash[:8] + # Build the name of the container image. + image_name_out = f"{image_name}.{current_arch}.{short_hash}" + return image_name_out, dockerfile + + +def build_container_image( + image_name: str, + dockerfile: str, + force_rebuild: bool, + use_sudo: bool, + *, + use_cache: bool = True, + incremental: bool = True, +) -> str: + """ + Build a Docker image from a Dockerfile. + + :param image_name: Name of the Docker container to build. + :param dockerfile: Content of the Dockerfile for building the + container. + :param force_rebuild: Whether to force rebuild the Docker container. + There are two level of caching. The first level of caching is + our approach of skipping `docker build` if the image already + exists and the Dockerfile hasn't changed. The second level is + the Docker cache itself, which is invalidated by `--no-cache`. + :param use_sudo: Whether to use sudo for Docker commands. + :return: Name of the built Docker container. + :raises AssertionError: If the container ID is not found. + """ + _LOG.debug(hprint.func_signature_to_str("dockerfile")) + # + image_name_out, dockerfile = get_container_image_name(image_name, dockerfile) + # Check if the container already exists. If not, build it. + has_container, _ = image_exists(image_name_out, use_sudo) + coverage_enabled = os.environ.get("COVERAGE_PROCESS_START") + # if coverage_enabled: + # # Add coverage suffix to image name for tracking. + # image_name_out += ".coverage" + # # Force rebuild when coverage is enabled. + # has_container = False + # _LOG.debug( + # "Coverage enabled - forcing rebuild of image: {image_name_out}" + # ) + if bool(os.environ.get("CSFY_DOCKER_FORCE_REBUILD", False)): + _LOG.warning( + "CSFY_DOCKER_FORCE_REBUILD forcing to rebuild container without cache" + ) + force_rebuild = True + if force_rebuild: + _LOG.warning( + "Forcing to rebuild of container '%s' without cache", + image_name, + ) + has_container = False + use_cache = False + _LOG.debug(hprint.to_str("has_container use_cache")) + # # Always prepare coverage files when coverage is enabled, regardless of container existence. + # if coverage_enabled: + # # Create build context directory for coverage files. + # build_context_dir = "tmp.docker_build" + # hio.create_dir(build_context_dir, incremental=incremental) + # # Always copy .coveragerc when coverage is enabled. + # coveragerc_src = ".coveragerc" + # coveragerc_dst = os.path.join(build_context_dir, ".coveragerc") + # if os.path.exists(coveragerc_src): + # shutil.copy2(coveragerc_src, coveragerc_dst) + # _LOG.debug( + # "Coverage enabled - copied {coveragerc_src} to {coveragerc_dst}" + # ) + # else: + # _LOG.warning( + # "Coverage enabled but .coveragerc not found at {coveragerc_src}" + # ) + if not has_container: + # Create a temporary Dockerfile. + _LOG.warning("Building Docker container...") + build_context_dir = "tmp.docker_build" + if not coverage_enabled: + # Only create build context if not already created for coverage + hio.create_dir(build_context_dir, incremental=incremental) + temp_dockerfile = os.path.join(build_context_dir, "Dockerfile") + hio.to_file(temp_dockerfile, dockerfile) + # Build the container. + docker_executable = get_docker_executable(use_sudo) + cmd = [ + f"{docker_executable} build", + f"-f {temp_dockerfile}", + f"-t {image_name_out}", + # "--platform linux/aarch64", + ] + if not use_cache: + cmd.append("--no-cache") + cmd.append(build_context_dir) + cmd = " ".join(cmd) + hsystem.system(cmd, suppress_output=False) + _LOG.info("Building Docker container... done") + return image_name_out + + +# ############################################################################# + + +def get_host_git_root() -> str: + """ + Get the Git root path on the host machine, when inside a Docker container. + """ + hdbg.dassert_in("CSFY_HOST_GIT_ROOT_PATH", os.environ) + host_git_root_path = os.environ["CSFY_HOST_GIT_ROOT_PATH"] + return host_git_root_path + + +def get_docker_mount_info( + is_caller_host: bool, use_sibling_container_for_callee: bool +) -> Tuple[str, str, str]: + """ + Get the Docker mount information for the current environment. + + This function determines the appropriate source and target paths for + mounting a directory in a Docker container. + + Same inputs as `convert_caller_to_callee_docker_path()`. + + :return: A tuple containing + - caller_mount_path: the mount path on the caller filesystem, e.g., + `/app` or `/Users/.../src/cmamp1` + - callee_mount_path: the mount path inside the called Docker container, + e.g., `/app` + - the mount string, e.g., + `source={caller_mount_path},target={callee_mount_path}` + type=bind,source=/app,target=/app + """ + _LOG.debug(hprint.func_signature_to_str()) + # Compute the mount path on the caller filesystem. + if is_caller_host: + # On the host machine, the mount path is the Git root. + caller_mount_path = hgit.find_git_root() + else: + # Inside a Docker container, the mount path depends on the container + # style. + use_host_git_root = ( + use_sibling_container_for_callee + and not hserver.is_csfy_dind_enabled() + ) + if use_host_git_root: + # For sibling containers, we need to get the Git root on the host. + caller_mount_path = get_host_git_root() + else: + # For children containers, we need to get the local Git root on the + # host. + caller_mount_path = hgit.find_git_root() + # The target mount path is always `/app` inside the Docker container. + callee_mount_path = "/app" + # Build the Docker mount string. + mount = f"type=bind,source={caller_mount_path},target={callee_mount_path}" + _LOG.debug(hprint.to_str("caller_mount_path callee_mount_path mount")) + return caller_mount_path, callee_mount_path, mount + + +def get_docker_mount_context() -> Tuple[bool, bool, str, str, str]: + """ + Return Docker mount context for container operations. + + :return: (is_caller_host, use_sibling_container_for_callee, + caller_mount_path, callee_mount_path, mount) + """ + is_caller_host = not hserver.is_inside_docker() + use_sibling_container_for_callee = hserver.use_docker_sibling_containers() + caller_mount_path, callee_mount_path, mount = get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + return ( + is_caller_host, + use_sibling_container_for_callee, + caller_mount_path, + callee_mount_path, + mount, + ) + + +def build_and_run_docker_cmd( + use_sudo: bool, + callee_mount_path: str, + mount: str, + container_image: str, + dockerfile: str, + tool_cmd: str, + mode: str, + *, + override_entrypoint: bool = False, + wrap_in_bash: bool = False, +) -> str: + """ + Build and execute a Docker command. + """ + docker_cmd = get_docker_base_cmd(use_sudo) + if override_entrypoint: + docker_cmd.append("--entrypoint ''") + # Check that the container image exists. + hdbg.dassert( + image_exists(container_image, use_sudo)[0], + "Container image '%s' does not exist", + container_image, + ) + docker_cmd.extend( + [ + f"--workdir {callee_mount_path} --mount {mount}", + container_image, + ] + ) + if wrap_in_bash: + docker_cmd.append(f'bash -c "{tool_cmd}"') + else: + docker_cmd.append(tool_cmd) + docker_cmd_str = " ".join(docker_cmd) + return process_docker_cmd(docker_cmd_str, container_image, dockerfile, mode) + + +# TODO(gp): Move to helpers.hdbg. +def _dassert_valid_path(file_path: str, is_input: bool) -> None: + """ + Assert that a file path is valid, based on it being input or output. + + For input files, it ensures that the file or directory exists. For + output files, it ensures that the enclosing directory exists. + + :param file_path: The file path to check. + :param is_input: Whether the file path is an input file. + """ + if is_input: + # If it's an input file, then `file_path` must exist as a file or a dir. + hdbg.dassert_path_exists(file_path) + else: + # If it's an output, we might be writing a file that doesn't exist yet, + # but we assume that the including directory is already present. + dir_name = os.path.normpath(os.path.dirname(file_path)) + hio.create_dir(dir_name, incremental=True) + hdbg.dassert( + os.path.exists(file_path) or os.path.exists(dir_name), + "Invalid path: '%s' and '%s' don't exist", + file_path, + dir_name, + ) + + +# TODO(gp): Move to helpers.hdbg. +def _dassert_is_path_included(file_path: str, including_path: str) -> None: + """ + Assert that a file path is included within another path. + + This function checks if the given file path starts with the + specified including path. If not, it raises an assertion error. + + :param file_path: The file path to check. + :param including_path: The path that should include the file path. + """ + # TODO(gp): Maybe we need to normalize the paths. + hdbg.dassert( + file_path.startswith(including_path), + "'%s' needs to be underneath '%s'", + file_path, + including_path, + ) + + +def convert_caller_to_callee_docker_path( + caller_file_path: str, + caller_mount_path: str, + callee_mount_path: str, + check_if_exists: bool, + is_input: bool, + is_caller_host: bool, + use_sibling_container_for_callee: bool, +) -> str: + """ + Convert a file path from the (current) caller filesystem to the called + Docker container path. + + :param caller_file_path: The file path on the caller filesystem. + :param caller_mount_path: The source mount path on the host machine. + :param callee_mount_path: The target mount path inside the Docker + container. + :param check_if_exists: Whether to check if the file path exists. + :param is_input: Whether the file path is an input file (used only if + `check_if_exists` is True). + :param is_caller_host: Whether the caller is running on the host + machine or inside a Docker container. + :param use_sibling_container_for_callee: Whether to use a sibling + container or a children container + :return: The converted file path inside the Docker container. + """ + _LOG.debug(hprint.func_signature_to_str()) + hdbg.dassert_ne(caller_file_path, "") + hdbg.dassert_ne(caller_mount_path, "") + hdbg.dassert_ne(callee_mount_path, "") + if check_if_exists: + _dassert_valid_path(caller_file_path, is_input) + # Make the path absolute with respect to the (current) caller filesystem. + abs_caller_file_path = os.path.abspath(caller_file_path) + if is_caller_host: + # On the host, the path needs to be underneath the caller mount point. + caller_mount_point = caller_mount_path + else: + # We are inside a Docker container, so the path needs to be under + # the local Git root, since this is the mount point. + caller_mount_point = hgit.find_git_root() + _ = use_sibling_container_for_callee + # This is not always possible, e.g., '/var/log/app.log' needs to be + # underneath '/app' + _dassert_is_path_included(abs_caller_file_path, caller_mount_point) + # Make the path relative to the caller mount point. + _LOG.debug(hprint.to_str("caller_file_path caller_mount_point")) + rel_path = os.path.relpath(caller_file_path, caller_mount_point) + docker_path = os.path.join(callee_mount_path, rel_path) + docker_path = os.path.normpath(docker_path) + # + _LOG.debug( + " Converted %s -> %s -> %s", caller_file_path, rel_path, docker_path + ) + return docker_path + + +def is_path(path: str) -> bool: + """ + Check if `path` can be considered a file or a directory using heuristics. + + - return: True if the string looks like a path, False otherwise. + """ + # E.g., + # ``` + # is_path("file.txt") # True, since it has an extension + # is_path("/path/to/file.py") # True, since it has an absolute path + # is_path("/path/to") # True, since it has an absolute path + # is_path("../data.csv") # True, since it has an relative path + # is_path("folder/") # True, since it has a trailing slash + # is_path(".hidden") # True, since it has a leading dot + # is_path("readme") # False, since it has no extension and no path + # ``` + # Check if it has a file extension (e.g., .txt, .csv). + if os.path.splitext(path)[1]: + return True + # Check if it is an absolute or relative path (e.g., starts with "/" or "./" + # or "../") + if path.startswith("/") or path.startswith("./") or path.startswith("../"): + return True + # Check if it ends with a slash. + if path.endswith("/"): + return True + # Check if it has a hidden file. + basename = os.path.basename(path) + if basename.startswith(".") and basename.count(".") == 1: + return True + # Check if it contains a slash. + if "/" in path: + return True + return False + + +def convert_all_paths_from_caller_to_callee_docker_path( + cmd_opts: List[str], + caller_mount_path: str, + callee_mount_path: str, + is_caller_host: bool, + use_sibling_container_for_callee: bool, +) -> List[str]: + """ + Convert all the paths from the caller to the callee Docker container path. + + The paths are recognized by checking whether they point to an existing file + or directory. + + The limitation of this approach is that output files are not recognized. To + work around this problem: + - Create output dirs + - Explicitly parse options that are outputs (e.g., `-o `) + + :param cmd_opts: List of command options. + :param caller_mount_path: See `get_docker_mount_info()`. + :param callee_mount_path: See `get_docker_mount_info()`. + :param is_caller_host: See `get_docker_mount_info()`. + :param use_sibling_container_for_callee: See `get_docker_mount_info()`. + :return: List of converted command options. + """ + _LOG.debug(hprint.func_signature_to_str()) + # Converted command options. + cmd_opts_out = [] + # Scan the list of command option. + for cmd_opt_in in cmd_opts: + exists = os.path.exists(cmd_opt_in) + is_path_ = is_path(cmd_opt_in) + _LOG.debug(hprint.to_str("cmd_opt_in exists is_path_")) + if exists or is_path_: + check_if_exists = False + is_input = False + cmd_opt_out = convert_caller_to_callee_docker_path( + cmd_opt_in, + caller_mount_path, + callee_mount_path, + check_if_exists, + is_input, + is_caller_host, + use_sibling_container_for_callee, + ) + _LOG.debug(hprint.to_str("cmd_opt_in -> cmd_opt_out")) + cmd_opts_out.append(cmd_opt_out) + else: + _LOG.debug("File does not exist: %s", cmd_opt_in) + cmd_opts_out.append(cmd_opt_in) + _LOG.debug(hprint.to_str("cmd_opts_out")) + return cmd_opts_out + + +# ############################################################################# +# CLI utilities +# ############################################################################# + + +def add_open_arg(parser: argparse.ArgumentParser) -> None: + """ + Add --open option to parser for opening output files on macOS. + + :param parser: ArgumentParser instance to add the option to + """ + parser.add_argument( + "--open", + action="store_true", + default=False, + help="Open the output file on macOS", + ) + + +def open_file_on_macos(file_path: str) -> None: + """ + Open a file on macOS using the 'open' command. + + :param file_path: Path to the file to open + :raises subprocess.CalledProcessError: If open command fails + """ + if platform.system() != "Darwin": + _LOG.warning("--open flag only works on macOS") + return + subprocess.run(["open", file_path], check=True) + _LOG.info("Opened file with macOS 'open' command: %s", file_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py new file mode 100644 index 000000000..0ab2f2f2f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py @@ -0,0 +1,197 @@ +""" +Utilities for running docker tests. + +Import as: + +import helpers.hdocker_tests as hdoctest +""" + +import glob +import logging +import os +from typing import List + +import pytest + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# ############################################################################# +# Constants +# ############################################################################# + + +# Pattern for docker test files. +DOCKER_TEST_PATTERN = "docker_test_*.py" + + +# ############################################################################# +# Helper functions +# ############################################################################# + + +def get_docker_test_files(test_dir: str) -> List[str]: + """ + Find all docker test files in the specified directory. + + :param test_dir: directory to search for test files + :return: sorted list of test file paths + """ + pattern = os.path.join(test_dir, DOCKER_TEST_PATTERN) + files = sorted(glob.glob(pattern)) + _LOG.info("Found %d docker test files", len(files)) + for file in files: + _LOG.debug(" - %s", file) + return files + + +def _run_docker_pytest_cmd( + test_file: str, *, docker_cmd_script: str = "./docker_cmd.sh" +) -> int: + """ + Run a test file through docker_cmd.sh with pytest. + + :param test_file: path to the test file + :param docker_cmd_script: path to docker_cmd.sh script + :return: return code from the command + """ + hdbg.dassert_file_exists(test_file) + hdbg.dassert_file_exists(docker_cmd_script) + cmd = f'{docker_cmd_script} "pytest {test_file}"' + _LOG.info("Running: %s", cmd) + rc = hsystem.system(cmd, abort_on_error=False) + return rc + + +def run_docker_cmd(script_dir: str, *, shell_cmd: str = "ls /git_root") -> None: + """ + Run an arbitrary shell command inside Docker via docker_cmd.sh. + + :param script_dir: directory containing docker_cmd.sh + :param shell_cmd: shell command to run inside the container + """ + hdbg.dassert_path_exists(script_dir) + docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") + hdbg.dassert_file_exists(docker_cmd_script) + cmd = f"cd {script_dir} && bash {docker_cmd_script} '{shell_cmd}'" + hsystem.system(cmd) + + +def run_all_tests( + test_dir: str, *, docker_cmd_script: str = "./docker_cmd.sh" +) -> int: + """ + Find and run all docker test files in the directory. + + :param test_dir: directory containing test files + :param docker_cmd_script: path to docker_cmd.sh script + :return: 0 if all tests passed, non-zero otherwise + """ + test_files = get_docker_test_files(test_dir) + if not test_files: + _LOG.warning("No docker test files found in %s", test_dir) + return 0 + failed_tests = [] + for test_file in test_files: + return_code = _run_docker_pytest_cmd( + test_file, docker_cmd_script=docker_cmd_script + ) + if return_code != 0: + failed_tests.append(test_file) + if failed_tests: + _LOG.error("Failed tests: %s", failed_tests) + return 1 + _LOG.info("All tests passed") + return 0 + + +# ############################################################################# +# DockerTestCase +# ############################################################################# + + +# TODO(gp): Can this be used for run_dockerized_* tests? +class DockerTestCase(hunitest.TestCase): + """ + Base test class for Docker tests. + + Subclasses must set `_test_file = __file__` and may add notebook test + methods that call `self._helper(notebook_name)`. + """ + + _test_file: str = "" + + @pytest.mark.slow + def test_docker_build(self) -> None: + """ + Test that docker_build.sh runs without error. + """ + # Prepare inputs. + script_dir = os.path.dirname( + os.path.dirname(os.path.abspath(self._test_file)) + ) + docker_build_script = os.path.join(script_dir, "docker_build.sh") + hdbg.dassert_file_exists(docker_build_script) + # Run test. + cmd = f"cd {script_dir} && bash {docker_build_script}" + hsystem.system(cmd) + + @pytest.mark.slow + def test_docker_cmd(self) -> None: + """ + Test that docker_cmd.sh 'ls /git_root' runs without error. + """ + # Prepare inputs. + script_dir = os.path.dirname( + os.path.dirname(os.path.abspath(self._test_file)) + ) + docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") + hdbg.dassert_file_exists(docker_cmd_script) + # Run test. + cmd = f"cd {script_dir} && bash {docker_cmd_script} 'ls /git_root'" + hsystem.system(cmd) + + def test_docker_bash(self) -> None: + """ + Test that docker_bash.sh runs 'ls /git_root' and exits without error. + """ + # Prepare inputs. + script_dir = os.path.dirname( + os.path.dirname(os.path.abspath(self._test_file)) + ) + docker_bash_script = os.path.join(script_dir, "docker_bash.sh") + if not os.path.exists(docker_bash_script): + pytest.skip("docker_bash.sh not found in " + script_dir) + # Run test. + shell_cmd = "ls /git_root" + cmd = f"echo '{shell_cmd}' | bash {docker_bash_script}" + hsystem.system(cmd) + + def _run_notebook(self, notebook_name: str) -> None: + """ + Run a single notebook inside Docker. + + :param notebook_name: notebook filename relative to the project dir + """ + # Prepare inputs. + script_dir = os.path.dirname( + os.path.dirname(os.path.abspath(self._test_file)) + ) + docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") + notebook_path = os.path.join(script_dir, notebook_name) + hdbg.dassert_file_exists(notebook_path) + # Compute the notebook path inside the container via /git_root. + git_root = hgit.find_git_root(script_dir) + rel_path = os.path.relpath(script_dir, git_root) + container_notebook_path = f"/git_root/{rel_path}/{notebook_name}" + cmd = ( + f"cd {script_dir} && " + f"bash {docker_cmd_script} " + f"'jupyter nbconvert --execute --to html " + f"--ExecutePreprocessor.timeout=-1 {container_notebook_path}'" + ) + hsystem.system(cmd) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py new file mode 100644 index 000000000..f52fc9230 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py @@ -0,0 +1,47 @@ +""" +Import as: + +import helpers.hemail as hemail +""" + +import email.mime.multipart as emmult +import email.mime.text as emtext +import os +import smtplib +from typing import Optional + + +def send_email( + subject: str, + message: str, + to_adr: str, + email_address: Optional[str] = None, + email_password: Optional[str] = None, + html: bool = False, +) -> None: + """ + Send mail to specified e-mail addresses. + + :param message: Message to be sent + :param to_adr: Mail to which to send messages + :type list + :return: None + """ + server = smtplib.SMTP("smtp.gmail.com", 587) + server.starttls() + if email_address is None: + email_address = os.environ["AM_EMAIL_ADDRESS"] + if email_password is None: + email_password = os.environ["AM_EMAIL_PASSWORD"] + server.login(email_address, email_password) + msg = emmult.MIMEMultipart() + msg["From"] = email_address + msg["To"] = ", ".join(to_adr) + msg["Subject"] = subject + if html: + msg.attach(emtext.MIMEText(message, "html")) + else: + msg.attach(emtext.MIMEText(message, "plain")) + + text = msg.as_string() + server.sendmail(email_address, to_adr, text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py new file mode 100644 index 000000000..f2e0719bd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py @@ -0,0 +1,541 @@ +""" +Import as: + +import helpers.henv as henv +""" + +import logging +import os +from typing import Any, List, Tuple, Union + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hversion as hversio +import helpers.repo_config_utils as hrecouti + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + + +_WARNING = "\033[33mWARNING\033[0m" + + +# All printing functions should: +# - Return a string and not a list of strings +# - Add a newline at the end of the string (i.e., the string should end with +# `\n`) + + +# ############################################################################# +# Get env vars info. +# ############################################################################# + + +def get_env_var( + env_name: str, + *, + as_bool: bool = False, + default_value: Any = None, + abort_on_missing: bool = True, +) -> Union[str, bool, Any]: + """ + Get an environment variable by name. + + :param env_name: name of the env var + :param as_bool: convert the value into a Boolean + :param default_value: the default value to use in case it's not + defined + :param abort_on_missing: if the env var is not defined aborts, + otherwise use the default value + :return: value of env var + """ + if env_name not in os.environ: + if abort_on_missing: + hdbg.dassert_in( + env_name, + os.environ, + "Can't find env var '%s' in '%s'", + env_name, + str(os.environ), + ) + else: + return default_value + value = os.environ[env_name] + if as_bool: + # Convert the value into a boolean. + if value in ("0", "", "None", "False"): + value = False + else: + value = True + return value + + +def get_csfy_env_vars() -> List[str]: + """ + Get all the environment variables that start with `AM_`, `CK_`, `CSFY_`. + """ + # TODO(gp): We should only pass the `CSFY_` vars. + env_var_names = [ + v + for v in os.environ.keys() + if v.startswith("AM_") or v.startswith("CK_") or v.startswith("CSFY_") + ] + return env_var_names + + +# TODO(gp): Extract all the env vars that start with AM_, CK_, CSFY_ and make +# sure they have a description here. +def get_env_vars() -> List[str]: + """ + Return all the env vars that are expected to be set in Docker. + """ + # Keep in sync with `lib_tasks.py:_generate_compose_file()`. + env_var_names = [ + # Force enabling Docker-in-Docker. + "CSFY_ENABLE_DIND", + # Enable forcing certain unit tests to fail to check that unit test + # failures are caught. + "CSFY_FORCE_TEST_FAIL", + # The name of the host running Docker. + "CSFY_HOST_NAME", + # The OS of the host running Docker. + "CSFY_HOST_OS_NAME", + # The version of the host running Docker. + "CSFY_HOST_OS_VERSION", + # The name of the user running the host. + "CSFY_HOST_USER_NAME", + # Whether to check if certain property of the repo are as expected or not. + "CSFY_REPO_CONFIG_CHECK", + # Path to use for `repo_config.py`. E.g., used when running `helpers` + # container to avoid using the `repo_config.py` corresponding to the + # container launching the linter. + "CSFY_REPO_CONFIG_PATH", + "GH_ACTION_ACCESS_TOKEN", + # Whether we are running inside GH Actions. + "CSFY_CI", + # TODO(gp): Difference between amp and cmamp. + # CK AWS credentials. + "CSFY_AWS_ACCESS_KEY_ID", + "CSFY_AWS_DEFAULT_REGION", + "CSFY_AWS_SECRET_ACCESS_KEY", + "CSFY_AWS_SESSION_TOKEN", + # S3 bucket to use for CK. + "CSFY_AWS_S3_BUCKET", + # Path to the ECR for the Docker images for CK. + "CSFY_ECR_BASE_PATH", + ] + # No duplicates. + # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. + hdbg.dassert_eq( + len(set(env_var_names)), + len(env_var_names), + "There are duplicates", + str(env_var_names), + ) + # Sort. + env_var_names = sorted(env_var_names) + return env_var_names + + +def get_secret_env_vars() -> List[str]: + """ + Return the list of env vars that are secrets. + """ + secret_env_var_names = [ + # TODO(gp): Difference between amp and cmamp. + "CSFY_AWS_ACCESS_KEY_ID", + "CSFY_AWS_SECRET_ACCESS_KEY", + "GH_ACTION_ACCESS_TOKEN", + ] + # No duplicates. + # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. + hdbg.dassert_eq( + len(set(secret_env_var_names)), + len(secret_env_var_names), + "There are duplicates", + str(secret_env_var_names), + ) + # Secret env vars are a subset of the env vars. + env_vars = get_env_vars() + # TODO(gp): GFI. Use `hdbg.dassert_issubset()` instead. + if not set(secret_env_var_names).issubset(set(env_vars)): + diff = set(secret_env_var_names).difference(set(env_vars)) + cmd = f"Secret vars in `{str(diff)} are not in '{str(env_vars)}'" + assert 0, cmd + # Sort. + secret_env_var_names = sorted(secret_env_var_names) + return secret_env_var_names + + +def get_api_key_env_vars() -> List[str]: + """ + Return the list of env vars that are API keys. + """ + # Find all the env vars that end with "_API_KEY". + env_var_names = [ + env_var for env_var in os.environ.keys() if env_var.endswith("_API_KEY") + ] + return env_var_names + + +def check_env_vars() -> None: + """ + Make sure all the expected env vars are defined. + """ + env_vars = get_env_vars() + for env_var in env_vars: + hdbg.dassert_in( + env_var, + os.environ, + "env_var='%s' is not in env_vars='%s'", + env_var, + str(os.environ.keys()), + ) + + +def env_vars_to_string() -> str: + """ + Return a string with the signature of all the expected env vars (including + the secret ones). + """ + txt: List[str] = [] + # Get the expected env vars and the secret ones. + env_vars = get_env_vars() + secret_env_vars = get_secret_env_vars() + # Print a signature. + for env_name in env_vars: + is_defined = env_name in os.environ + is_empty = is_defined and os.environ[env_name] == "" + if not is_defined: + txt.append(f"{env_name}=undef") + else: + if env_name in secret_env_vars: + # Secret env var: print if it's empty or not. + if is_empty: + txt.append(f"{env_name}=empty") + else: + txt.append(f"{env_name}=***") + else: + # Not a secret var: print the value. + txt.append(f"{env_name}='{os.environ[env_name]}'") + result = "\n".join(txt) + return result + + +# ############################################################################# +# Get Git info. +# ############################################################################# + + +# Copied from helpers.hgit to avoid circular dependencies. + + +def _git_log(num_commits: int = 5, my_commits: bool = False) -> str: + """ + Return the output of a pimped version of git log. + + :param num_commits: number of commits to report + :param my_commits: True to report only the current user commits + :return: string + """ + cmd = [] + cmd.append("git log --date=local --oneline --graph --date-order --decorate") + cmd.append( + "--pretty=format:'%h %<(8)%aN% %<(65)%s (%>(14)%ar) %ad %<(10)%d'" + ) + cmd.append(f"-{num_commits}") + if my_commits: + # This doesn't work in a container if the user relies on `~/.gitconfig` to + # set the user name. + # TODO(gp): We should use `get_git_name()`. + cmd.append("--author $(git config user.name)") + cmd = " ".join(cmd) + data: Tuple[int, str] = hsystem.system_to_string(cmd) + _, txt = data + return txt + + +# End copy. + + +def _get_git_signature(git_commit_type: str = "all") -> str: + """ + Get information about current branch and latest commits. + """ + txt: List[str] = [] + # Get the branch name. + cmd = "git branch --show-current" + _, branch_name = hsystem.system_to_one_line(cmd) + txt.append(f"branch_name='{branch_name}'") + # Get the short Git hash of the current branch. + cmd = "git rev-parse --short HEAD" + _, hash_ = hsystem.system_to_one_line(cmd) + txt.append(f"hash='{hash_}'") + # Add info about the latest commits. + num_commits = 3 + if git_commit_type == "all": + txt.append("# Last commits:") + log_txt = _git_log(num_commits=num_commits, my_commits=False) + txt.append(hprint.indent(log_txt)) + elif git_commit_type == "mine": + txt.append("# Your last commits:") + log_txt = _git_log(num_commits=num_commits, my_commits=True) + txt.append(hprint.indent(log_txt)) + elif git_commit_type == "none": + pass + else: + raise ValueError(f"Invalid value='{git_commit_type}'") + # + result = "\n".join(txt) + "\n" + hdbg.dassert(result.endswith("\n"), "result='%s'", result) + return result + + +# def _get_submodule_signature( +# partial_signature: List[str], *, git_commit_type: str = "all" +# ) -> str: +# """ +# Add git signature for all submodules. +# :param partial_signature: the signature to append to +# `git_commit_type` the type of git commit to include in the +# signature +# :return: system signature enhanced by git submodule info +# """ +# # TODO(Juraj): Think of a better generalisation rather listing all the options. +# submodule_options = ["amp", "amp/helpers_root", "helpers_root"] +# signature = partial_signature +# prev_cwd = os.getcwd() +# for submodule in submodule_options: +# if os.path.exists(submodule): +# try: +# # Temporarily descend into submodule. +# os.chdir(submodule) +# signature.append(f"# Git {submodule}") +# git_amp_sig = _get_git_signature(git_commit_type) +# signature = _append(signature, git_amp_sig) +# # In case there is a runtime error we want to end up in a consistent +# # state (the original path). +# finally: +# os.chdir(prev_cwd) +# hdbg.dassert(txt_tmp.endswith("\n"), f"txt_tmp='%s'", txt_tmp) +# return signature + + +# ############################################################################# +# Get system info. +# ############################################################################# + + +def _get_platform_info() -> str: + """ + Get platform information as a list of strings. + """ + import platform + + txt_tmp: List[str] = [] + uname = platform.uname() + txt_tmp.append(f"system={uname.system}") + txt_tmp.append(f"node name={uname.node}") + txt_tmp.append(f"release={uname.release}") + txt_tmp.append(f"version={uname.version}") + txt_tmp.append(f"machine={uname.machine}") + txt_tmp.append(f"processor={uname.processor}") + # + txt = hprint.to_info("Platform info", txt_tmp) + return txt + + +def _get_psutil_info() -> str: + """ + Get system resource information using psutil. + """ + try: + import psutil + + has_psutil = True + except ModuleNotFoundError as e: + _LOG.warning("psutil is not installed: %s", str(e)) + has_psutil = False + txt_tmp = [] + if has_psutil: + txt_tmp.append(f"cpu count={psutil.cpu_count()}") + if hasattr(psutil, "cpu_freq") and psutil.cpu_freq is not None: + txt_tmp.append(f"cpu freq={str(psutil.cpu_freq())}") + else: + txt_tmp.append("cpu freq=unavailable") + # TODO(gp): Report in MB or GB. + txt_tmp.append(f"memory={str(psutil.virtual_memory())}") + txt_tmp.append(f"disk usage={str(psutil.disk_usage('/'))}") + else: + txt_tmp.append("psutil is not installed") + # + txt = hprint.to_info("psutils info", txt_tmp) + return txt + + +# ############################################################################# +# Get package info. +# ############################################################################# + + +def _get_library_version(lib_name: str) -> str: + try: + cmd = f"import {lib_name}" + # pylint: disable=exec-used + exec(cmd) + except ImportError: + version = "?" + else: + cmd = f"{lib_name}.__version__" + version = eval(cmd) + return version + + +def _get_package_info() -> Tuple[str, int]: + """ + Get package version information. + + Returns: + Tuple containing: + - List of strings with package info + - Number of failed imports + """ + import platform + + txt_tmp = [] + packages = [] + packages.append(("python", platform.python_version())) + # import sys + # print(sys.version) + libs = [ + "cvxopt", + "cvxpy", + "gluonnlp", + "gluonts", + "joblib", + "mxnet", + "numpy", + "pandas", + "pyarrow", + "scipy", + "seaborn", + "sklearn", + "statsmodels", + ] + libs = sorted(libs) + failed_imports = 0 + for lib in libs: + # This is due to Cmamp4924: + # WARNING: libarmpl_lp64_mp.so: cannot open shared object file: No such + # file or directory + try: + version = _get_library_version(lib) + except OSError as e: + print(_WARNING + ": " + str(e)) + if version.startswith("ERROR"): + failed_imports += 1 + packages.append((lib, version)) + txt_tmp.extend([f"{lib}: {version}" for (lib, version) in packages]) + # + txt = hprint.to_info("Packages", txt_tmp) + return txt, failed_imports + + +# ############################################################################# + + +def _get_git_info(git_commit_type: str) -> str: + txt_tmp: List[str] = [] + try: + txt_tmp.append(_get_git_signature(git_commit_type)) + # If there are any submodules, fetch their git signature. + # txt_tmp.append(_get_submodule_signature(txt_tmp, git_commit_type)) + except RuntimeError as e: + _LOG.warning(str(e)) + txt_tmp.append("No git info") + # + txt = hprint.to_info("Git info", txt_tmp) + return txt + + +# ############################################################################# +# Get system signature. +# ############################################################################# + + +def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: + """ + Return a string with the system signature. + + :param git_commit_type: the type of git commit to include in the + signature + :return: the system signature and the number of failed imports + """ + txt: List[str] = [] + # Add container version. + txt_tmp = hversio.get_container_version_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add Git signature. + txt_tmp = _get_git_info(git_commit_type) + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add platform info. + txt_tmp = _get_platform_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add psutil info. + txt_tmp = _get_psutil_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add Docker info. + txt_tmp = hserver.get_docker_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add package info. + txt_tmp, failed_imports = _get_package_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # + txt_str: str = hprint.to_info("System signature", txt) + return txt_str, failed_imports + + +# ############################################################################# +# Package all the information into a string. +# ############################################################################# + + +def env_to_str( + repo_config: bool = True, + server_config: bool = True, + system_signature: bool = True, + env_vars: bool = True, +) -> str: + """ + Package all the information into a string. + """ + # + msg = "" + # + if repo_config: + repo_config_str = hrecouti.get_repo_config().config_func_to_str() + msg += hprint.to_info("Repo config", repo_config_str) + "\n" + # + if server_config: + server_config_str = hserver.config_func_to_str() + msg += hprint.to_info("Server config", server_config_str) + "\n" + # + if system_signature: + msg += get_system_signature()[0] + "\n" + # + if env_vars: + env_vars_str = env_vars_to_string() + msg += hprint.to_info("Env vars", env_vars_str) + "\n" + return msg diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py new file mode 100644 index 000000000..d758ff16b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py @@ -0,0 +1,232 @@ +""" +Import as: + +import helpers.hfile_tree as hfiltree +""" + +import logging +import os +import pathlib +import re +from typing import Dict, List + +_LOG = logging.getLogger(__name__) + + +def _build_tree_lines( + dir_name: str, + nodes: List[pathlib.Path], + comments: Dict[str, str], +) -> str: + """ + Build the text lines for the directory tree while preserving inline + comments. + + :param dir_name: the directory name + :param nodes: relative paths under the given directory + :param comments: inline comments from existing file + :return: a formatted tree + + Example output: + ``` + devops + - __init__.py + - compose + - __init__.py + - tmp.docker-compose.yml + - docker_build + - create_users.sh + - dev.Dockerfile + - dockerignore.dev + - dockerignore.prod + - etc_sudoers + - fstab + - install_cprofile.sh + - install_dind.sh + - install_os_packages.sh + - install_publishing_tools.sh + - install_python_packages.sh + - pip_list.txt + - poetry.lock + - poetry.toml + - prod.Dockerfile + - pyproject.python_data_stack.toml + - pyproject.toml + - update_os.sh + - utils.sh + - docker_run + - bashrc + - docker_setenv.sh + - entrypoint.sh + - run_jupyter_server.sh + - env + - default.env + ``` + """ + lines = [dir_name] + for rel in nodes: + indent = " " * (len(rel.parts) - 1) + key = "/".join(rel.parts) + suffix = comments.get(key, "") + lines.append(f"{indent}- {rel.name}{suffix}".rstrip()) + return "\n".join(lines) + + +def _parse_comments(old_tree: List[str]) -> Dict[str, str]: + """ + Parse existing tree lines to extract inline comments. + + :param old_tree: the existing tree block + :return: inline comments and indentations + """ + comments: Dict[str, str] = {} + stack: List[str] = [] + for line in old_tree: + # Find indents, bullet points, name, and inline comments. + match = re.match(r"^(\s*)-\s+([^\s#]+)(\s*#.*)?$", line) + if not match: + continue + indent, name, suffix = match.groups() + level = len(indent) // 2 + stack = stack[:level] + stack.append(name) + key = "/".join(stack) + comments[key] = suffix or "" + return comments + + +def _get_tree_nodes( + dir_path: pathlib.Path, + depth: int, + include_tests: bool, + include_python: bool, + only_dirs: bool, +) -> List[pathlib.Path]: + """ + Get relative paths under the given directory based on filters. + + Filters include: + - Test files and directories + - Python files + + :param dir_path: the directory path + :param depth: maximum depth to traverse + :param include_tests: include test files or directories + :param include_python: only show python files + :param only_dirs: only show directories + :return: all relative paths that match the specified flags + """ + nodes: List[pathlib.Path] = [] + for dirpath, dirnames, filenames in os.walk(dir_path): + rel_dir = pathlib.Path(dirpath).relative_to(dir_path) + level = len(rel_dir.parts) + if 0 < depth <= level: + # Stop pruning on given depth. + dirnames[:] = [] + continue + if not include_tests: + # Prune out test directories. + filtered = [] + for d in dirnames: + dir_lower = d.lower() + if not ( + dir_lower.startswith("test_") + or dir_lower in {"test", "tests"} + ): + filtered.append(d) + dirnames[:] = filtered + candidates = dirnames + filenames + for name in candidates: + full_path = pathlib.Path(dirpath) / name + rel_path = full_path.relative_to(dir_path) + name_lower = name.lower() + is_dir = full_path.is_dir() + is_test_name = name_lower.startswith("test_") or name_lower in { + "test", + "tests", + } + is_test = is_test_name or name_lower.endswith("_test.py") + is_python = full_path.suffix in {".py", ".ipynb"} + if is_dir: + # Always include directories. + nodes.append(rel_path) + continue + # Flag filter to include test or python files. + allowed_by_flag = (include_tests and is_test) or ( + include_python and is_python + ) + if only_dirs: + include_file = allowed_by_flag + else: + include_file = allowed_by_flag or ( + not is_test + and not is_python + and not include_tests + and not include_python + ) + if include_file: + nodes.append(rel_path) + nodes.sort() + return nodes + + +def generate_tree( + path: str, + depth: int, + include_tests: bool, + include_python: bool, + only_dirs: bool, + output: str, +) -> str: + """ + Generate a directory tree, and optionally update or create a markdown file. + + :param path: directory path to traverse + :param depth: maximum depth to traverse + :param include_tests: include test files or directories + :param include_python: include show python files + :param only_dirs: only show directories + :param output: path of the markdown file to create or update + """ + dir_path = pathlib.Path(path).resolve() + nodes = _get_tree_nodes( + dir_path, depth, include_tests, include_python, only_dirs + ) + _LOG.debug("Collected %d nodes under '%s'", len(nodes), dir_path) + if output: + output_path = pathlib.Path(output) + start_marker = f"" + end_marker = "" + prefix = [] + suffix = [] + comments = {} + if output_path.exists(): + # Parse inline comments. + file = output_path.read_text(encoding="utf-8") + lines = file.splitlines() + _LOG.debug("Reading existing file '%s' for markers", output_path) + try: + idx_start = lines.index(start_marker) + idx_end = lines.index(end_marker) + _LOG.debug("Markers found at lines %d–%d", idx_start, idx_end) + except ValueError as exc: + raise RuntimeError( + "Couldn't find tree markers in output file." + ) from exc + # Parse existing file. + prefix = lines[:idx_start] + old_tree = lines[idx_start + 1 : idx_end] + suffix = lines[idx_end + 1 :] + comments = _parse_comments(old_tree) + # Build the directory tree. + tree_block = _build_tree_lines(dir_path.name, nodes, comments) + # Build the content of the file. + content = ( + "\n".join(prefix + [start_marker, tree_block, end_marker] + suffix) + + "\n" + ) + output_path.write_text(content, encoding="utf-8") + _LOG.debug("Writing updated tree to '%s'", output_path) + # Return tree without markers. + tree_block = _build_tree_lines(dir_path.name, nodes, {}) + return tree_block diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py new file mode 100644 index 000000000..14e2f600e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py @@ -0,0 +1,1865 @@ +""" +Import as: + +import helpers.hgit as hgit +""" + +import collections +import functools +import logging +import os +import random +import re +import string +from typing import cast, List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.repo_config_utils as hrecouti + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + +# We refer to "Git" when we talk about the control system (e.g., "in a Git +# repository") and `git` when we refer to implementation of Git as a program +# installed in a computer. + +# TODO(gp): Check +# https://git-scm.com/book/en/v2/Appendix-B%3A-Embedding-Git-in-your-Applications-Dulwich + +# TODO(gp): Avoid "stuttering": the module is already called "git", so no need +# to make reference to git again. + +# TODO(gp): Add mem caching to some functions below. We assume that one doesn't +# change dir (which is a horrible idea) and thus we can memoize. + +# TODO(gp): Spell super_module and sub_module always in the same way in both +# comments and code. For simplicity (e.g., instead of `super_module` in code and +# `super-module` in comment) we might want to spell `supermodule` everywhere. + +# ############################################################################# +# Git branch functions +# ############################################################################# + + +def extract_gh_issue_number_from_branch(branch_name: str) -> Optional[int]: + """ + Extract the GitHub issue number from a branch name. + + Example: + CmampTask10725_Add_more_tabs_to_orange_tmux -> 10725 + HelpersTask23_Add_more_tabs_to_orange_tmux -> 23. + + Works only if `invoke gh_branch_create` was used to create the branch. + or the name was retrieved using `invoke gh_issue_title`. + + :param branch_name: the name of the branch + :return: the issue number or None if it can't be extracted + """ + match = re.match(r".*Task_?(\d+)(?:_\w+)?", branch_name) + if match: + # Return the captured number. + return int(match.group(1)) + return None + + +def get_branch_name(dir_name: str = ".") -> str: + """ + Return the name of the Git branch in a directory. + + E.g., `master` or `AmpTask672_Add_script_to_check_and_merge_PR` + + :param dir_name: directory containing the git repository + :return: the name of the current branch + """ + hdbg.dassert_path_exists(dir_name) + # > git rev-parse --abbrev-ref HEAD + # master + cmd = f"cd {dir_name} && git rev-parse --abbrev-ref HEAD" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + return output + + +def _get_branch_next_name_via_github_api( + curr_branch_name: str, + *, + max_num_ids: int = 100, +) -> Optional[str]: + """ + Find the next available branch name using GitHub API (fast method). + + Uses `gh pr list` to query merged branches and extract the highest number. + + :param curr_branch_name: current branch name (e.g., "gp_scratch") + :param max_num_ids: maximum number of IDs to check + :return: next available branch name or None if GitHub API is not available + """ + try: + # Query merged PRs and extract branch names matching pattern. + cmd = ( + "gh pr list --state merged --json headRefName " + "| jq -r '.[].headRefName | select(test(\"^{branch}_[0-9]+$\"))' " + "| sed 's/.*_//' | sort -rn | head -1" + ).format(branch=re.escape(curr_branch_name)) + _LOG.debug("Running GitHub API query: %s", cmd) + ret, output = hsystem.system_to_one_line(cmd, suppress_output=True) + if ret != 0: + _LOG.debug("GitHub API query failed, falling back to linear scan") + return None + # Extract the highest number from merged branches. + output = output.strip() + if output: + highest_num = int(output) + next_num = highest_num + 1 + new_branch_name = f"{curr_branch_name}_{next_num}" + _LOG.info( + "Found highest number '%s' in merged branches, next is '%s'", + highest_num, + next_num, + ) + return new_branch_name + # No existing numbered branches found. + _LOG.debug("No existing numbered branches found, starting at 1") + return f"{curr_branch_name}_1" + except Exception as e: + _LOG.debug( + "Error querying GitHub API: %s, falling back to linear scan", + e, + ) + return None + + +@functools.lru_cache() +def _get_gh_pr_list() -> str: + """ + Get a cached list of all pull requests from GitHub (merged and open). + + Results are cached via functools.lru_cache to avoid repeated GitHub API calls. + + :return: raw output from `gh pr list` command + """ + cmd = "gh pr list -s all --limit 1000" + rc, txt = hsystem.system_to_string(cmd) + _ = rc + return txt + + +def does_branch_exist( + branch_name: str, + mode: str, + *, + dir_name: str = ".", +) -> bool: + """ + Check if a branch with the given name exists in local git or on GitHub. + + Supports checking in local git repository or on GitHub via the `gh` CLI. + + :param branch_name: the name of the branch to check + :param mode: where to check ("all" checks all, "git_local", "git_remote", "github") + :param dir_name: directory containing the git repository + :return: True if the branch exists in the specified location + """ + _LOG.debug(hprint.to_str("branch_name mode dir_name")) + # Handle the "all" case by recursion on all the possible modes. + if mode == "all": + exists = False + for mode_tmp in ("git_local", "git_remote", "github"): + exists_tmp = does_branch_exist( + branch_name, mode_tmp, dir_name=dir_name + ) + exists = exists or exists_tmp + return exists + # + hdbg.dassert_in(mode, ("git_local", "git_remote", "github")) + exists = False + if mode in ("git_local", "git_remote"): + # From https://stackoverflow.com/questions/35941566 + cmd = f"cd {dir_name} && git fetch --prune" + hsystem.system(cmd, abort_on_error=False) + # From https://stackoverflow.com/questions/5167957 + # > git rev-parse --verify LimeTask197_Get_familiar_with_CF2 + # f03bfa0b4577c2524afd6a1f24d06013f8aa9f1a + # > git rev-parse --verify I_dont_exist + # fatal: Needed a single revision + git_branch_name = branch_name + if mode == "git_remote": + git_branch_name = f"origin/{git_branch_name}" + cmd = f"cd {dir_name} && git rev-parse --verify {git_branch_name}" + rc = hsystem.system(cmd, abort_on_error=False) + exists = rc == 0 + _LOG.debug("branch_name='%s' on git: exists=%s", branch_name, exists) + # Check on GitHub. + if mode == "github": + txt = _get_gh_pr_list() + # ``` + # > gh pr list -s all --limit 10000 | grep AmpTask2163 + # 347 AmpTask2163_Implement_tiled_backtesting_1 AmpTask2163 ... MERGED + # ``` + # The text is separated by tabs. + # + # If there are no issues on the GitHub repo, just return. + # ``` + # > gh pr list -s all --limit 1000 + # no pull requests match your search in causify-ai/sports_analytics + # ``` + if txt == "": + return False + for line in txt.split("\n"): + # number, GH branch name, Git branch name, status. + fields = line.split("\t") + # fields=['179', + # 'CmTask2914: Add end-to-end unit test for prod reconcile', + # 'CmTask2914_Add_end_to_end_unit_test_around_the_prod_reconciliation', + # 'DRAFT', '2022-09-27 19:56:50 +0000 UTC'] + hdbg.dassert_lte(4, len(fields), "fields=%s", fields) + number, gh_branch_name, git_branch_name = fields[:3] + _ = number, gh_branch_name + if branch_name == git_branch_name: + exists = True + _LOG.debug( + "branch_name='%s' on github: exists=%s", branch_name, exists + ) + return exists + + +def _get_branch_next_name_linear_scan( + dir_name: str, + curr_branch_name: str, + *, + max_num_ids: int = 100, + log_verb: int = logging.DEBUG, +) -> str: + """ + Find the next available branch name using linear scanning (fallback method). + + Tries branch names sequentially until finding one that doesn't exist. + + :param dir_name: directory containing the git repository + :param curr_branch_name: current branch name (e.g., "gp_scratch") + :param max_num_ids: maximum number of IDs to check + :param log_verb: logging verbosity level + :return: next available branch name + """ + for i in range(1, max_num_ids): + new_branch_name = f"{curr_branch_name}_{i}" + _LOG.info("Trying branch name '%s' ...", new_branch_name) + mode = "all" + exists = does_branch_exist(new_branch_name, mode, dir_name=dir_name) + _LOG.log(log_verb, "-> exists=%s", exists) + if not exists: + _LOG.log(log_verb, "new_branch_name='%s'", new_branch_name) + return new_branch_name + raise ValueError( + f"Can't find the next branch name for '{curr_branch_name}' " + f"within {max_num_ids} ids" + ) + + +def get_branch_next_name( + dir_name: str = ".", + *, + curr_branch_name: Optional[str] = None, + log_verb: int = logging.DEBUG, + method: str = "auto", +) -> str: + """ + Return a name derived from the branch so that the branch doesn't exist. + + E.g., `AmpTask1903_Implemented_system_Portfolio` -> + `AmpTask1903_Implemented_system_Portfolio_3` + + :param dir_name: directory containing the git repository + :param curr_branch_name: branch name to use (if None, gets current branch) + :param log_verb: logging verbosity level + :param method: method to use ('auto' tries fast first, 'github_api', 'linear_scan') + :return: next available branch name + """ + if curr_branch_name is None: + curr_branch_name = get_branch_name(dir_name=dir_name) + hdbg.dassert_ne( + curr_branch_name, "master", "Cannot get next name for 'master' branch" + ) + _LOG.log(log_verb, "curr_branch_name='%s'", curr_branch_name) + max_num_ids = 100 + hdbg.dassert_in( + method, ["auto", "github_api", "linear_scan"], "Invalid method specified" + ) + # Try GitHub API method first (faster) if requested or on auto mode. + next_name: Optional[str] = None + if method in ("auto", "github_api"): + next_name = _get_branch_next_name_via_github_api( + curr_branch_name, + max_num_ids=max_num_ids, + ) + if next_name is None and method == "github_api": + raise ValueError("GitHub API method requested but failed") + # Fall back to linear scanning if GitHub API failed in auto mode. + if next_name is None and method == "auto": + _LOG.warning("GitHub API method failed, falling back to linear scan") + next_name = _get_branch_next_name_linear_scan( + dir_name, + curr_branch_name, + max_num_ids=max_num_ids, + log_verb=log_verb, + ) + else: + # Fall back to linear scanning method when explicitly requested. + next_name = _get_branch_next_name_linear_scan( + dir_name, + curr_branch_name, + max_num_ids=max_num_ids, + log_verb=log_verb, + ) + hdbg.dassert_ne(next_name, None) + return cast(str, next_name) + + +def get_branch_hash(dir_name: str = ".") -> str: + """ + Return the hash of the commit right before the branch was created. + + This finds the merge-base between the current branch and master, which is + the commit where the branch was created. + + :param dir_name: directory containing the git repository + :return: the hash of the commit where the branch diverged from master + """ + curr_branch_name = get_branch_name(dir_name=dir_name) + hdbg.dassert_ne( + curr_branch_name, "master", "Cannot get branch hash for 'master' branch" + ) + _LOG.debug("curr_branch_name=%s", curr_branch_name) + cmd = f"cd {dir_name} && git merge-base master {curr_branch_name}" + _, hash_ = hsystem.system_to_string(cmd) + hash_ = hash_.rstrip("\n").lstrip("\n") + hdbg.dassert_eq( + len(hash_.split("\n")), 1, "Expected single hash line from merge-base" + ) + return hash_ + + +# ############################################################################# + + +@functools.lru_cache() +def is_inside_submodule(git_dir: str = ".") -> bool: + """ + Return whether a dir is inside a Git submodule or a Git supermodule. + + We determine this by checking if the current Git repo is included inside another Git repo. + + :param git_dir: directory to check + :return: True if the directory is inside a submodule + """ + cmd = [] + # Go to the directory. + cmd.append(f"cd {git_dir}") + # > cd im/ + # > git rev-parse --show-toplevel + # /Users/saggese/src/.../amp + cmd.append('cd "$(git rev-parse --show-toplevel)/.."') + # > git rev-parse --is-inside-work-tree + # true + cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") + # Execute the command chain and check the return code. + cmd_as_str = " && ".join(cmd) + rc = hsystem.system(cmd_as_str, abort_on_error=False) + ret: bool = rc == 0 + return ret + + +# ############################################################################# +# Git submodule functions +# ############################################################################# + + +@functools.lru_cache() +def get_client_root(super_module: bool) -> str: + """ + Return the full path of the root of the Git client. + + E.g., `/Users/saggese/src/.../amp`. + + :param super_module: if True use the root of the Git super_module, + if we are in a submodule. Otherwise use the Git sub_module root + """ + if super_module and is_inside_submodule(): + # https://stackoverflow.com/questions/957928 + # > cd /Users/saggese/src/.../amp + # > git rev-parse --show-superproject-working-tree + # /Users/saggese/src/... + cmd = "git rev-parse --show-superproject-working-tree" + else: + # > git rev-parse --show-toplevel + # /Users/saggese/src/.../amp + cmd = "git rev-parse --show-toplevel" + # TODO(gp): Use system_to_one_line(). + _, out = hsystem.system_to_string(cmd) + out = out.rstrip("\n") + hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") + client_root: str = os.path.realpath(out) + return client_root + + +# TODO(gp): Replace `get_client_root` with this. +# TODO(gp): -> get_client_root2() or get_outermost_supermodule_root() +def find_git_root(path: str = ".") -> str: + """ + Find recursively the dir of the outermost super module. + + This function traverses the directory hierarchy upward from a specified + starting path to find the root directory of a Git repository. + It supports: + - standard git repository: where a `.git` directory exists at the root + - submodule: where repository is nested inside another, and the `.git` file contains + a `gitdir:` reference to the submodule's actual Git directory + - linked repositories: where the `.git` file points to a custom Git directory + location, such as in Git worktrees or relocated `.git` directories + + :param path: starting file system path. Defaults to the current directory (".") + :return: absolute path to the top-level Git repository directory + """ + import helpers.hio as hio + + path = os.path.abspath(path) + git_root_dir = None + while True: + git_dir = os.path.join(path, ".git") + _LOG.debug("git_dir=%s", git_dir) + # Check if `.git` is a directory which indicates a standard Git repository. + if os.path.isdir(git_dir): + # Found the Git root directory. + git_root_dir = path + break + # Check if `.git` is a file which indicates submodules or linked setups. + if os.path.isfile(git_dir): + txt = hio.from_file(git_dir) + lines = txt.split("\n") + for line in lines: + # Look for a `gitdir:` line that specifies the linked directory. + # Example: `gitdir: ../.git/modules/helpers_root` (submodule) + # or `gitdir: /path/to/.git/worktrees/name` (worktree). + if line.startswith("gitdir:"): + git_dir_path = line.split(":", 1)[1].strip() + _LOG.debug("git_dir_path=%s", git_dir_path) + # For worktrees, the current path is the root of the worktree. + # The worktree's `.git` file points to the shared git directory + # (e.g., main_repo/.git/worktrees/worktree_name). + if ".git/worktrees/" in git_dir_path: + git_root_dir = path + else: + # For other linked setups (submodules, custom .git directory), + # traverse up to find the root of the target repository. + abs_git_dir = os.path.abspath( + os.path.join(path, git_dir_path) + ) + # Traverse up to find the top-level `.git` directory. + while True: + # Check if the current directory is a `.git` directory. + if os.path.basename(abs_git_dir) == ".git": + git_root_dir = os.path.dirname(abs_git_dir) + # Found the root. + break + # Move one level up in the directory structure. + parent = os.path.dirname(abs_git_dir) + # Reached the filesystem root without finding the `.git` directory. + hdbg.dassert_ne( + parent, + abs_git_dir, + "Top-level .git directory not found.", + ) + # Continue traversing up. + abs_git_dir = parent + break + # Exit the loop if the Git root directory is found. + if git_root_dir is not None: + break + # Move up one level in the directory hierarchy. + parent = os.path.dirname(path) + # Reached the filesystem root without finding `.git`. + hdbg.dassert_ne( + parent, + path, + "No .git directory or file found in any parent directory.", + ) + # Update the path to the parent directory for the next iteration. + path = parent + hdbg.dassert_is_not( + git_root_dir, None, "Git root directory should have been found" + ) + return str(git_root_dir) + + +# ############################################################################# + + +# TODO(gp): There are several functions doing the same work. +# helpers_root/helpers/hgit.py:827:def find_file_in_git_tree( +# helpers_root/helpers/hsystem.py:757:def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: +def find_file(file_name: str, *, dir_path: Optional[str] = None) -> str: + """ + Find a file within a directory hierarchy, excluding version control and cache dirs. + + Searches for the file starting from a directory, skipping .git and .mypy_cache + to avoid expensive traversals. + + :param file_name: the name of the file to find + :param dir_path: the directory to start the search from (defaults to git root) + :return: the first absolute path to the file found + """ + if dir_path is None: + dir_path = find_git_root() + _LOG.debug(hprint.to_str("dir_path")) + cmd = ( + rf"find {dir_path} " + + r"\( -path '*/.git' -o -path '*/.mypy_cache' \) -prune " + + rf'-o -name "{file_name}" -print' + ) + _LOG.debug(hprint.to_str("cmd")) + _, res = hsystem.system_to_one_line(cmd) + hdbg.dassert_ne(res, "Can't find file '%s' in '%s'", file_name, dir_path) + return res + + +def _is_repo(repo_short_name: str) -> bool: + """ + Check if the current directory is in a repository with the given short name. + + Uses repo config to determine the repository type without relying on directory names. + + :param repo_short_name: the short name of the repository to check (e.g., "helpers", "amp") + :return: True if the current directory is in the specified repository + """ + curr_repo_short_name = hrecouti.get_repo_config().get_repo_short_name() + is_repo = bool(curr_repo_short_name == repo_short_name) + return is_repo + + +def is_helpers() -> bool: + """ + Return whether we are inside `helpers` repo. + + Either as super module, or a sub module depending on a current + working directory. + """ + return _is_repo("helpers") + + +def find_helpers_root(dir_path: str = ".") -> str: + """ + Find the root directory of the `helpers` repository. + + If the current directory is within the `helpers` repository, the root of the + repository is returned. Otherwise, the function searches for the `helpers_root` + directory starting from the root of the repository. + + :param dir_path: starting directory for the search + :return: absolute path to the `helpers_root` directory + """ + with hsystem.cd(dir_path): + git_root = find_git_root() + if is_helpers(): + # If we are in `helpers` repo as supermodule, its root is the helpers_root. + cmd = "git rev-parse --show-toplevel" + _, helpers_root = hsystem.system_to_one_line(cmd) + else: + # Search for the `helpers_root` directory from the root of the supermodule. + helpers_root = find_file("helpers_root", dir_path=git_root) + helpers_root = os.path.abspath(helpers_root) + # Verify that the directory and `helpers` subdirectory exist. + hdbg.dassert_dir_exists( + helpers_root, "helpers_root directory must exist" + ) + hdbg.dassert_dir_exists( + os.path.join(helpers_root, "helpers"), + "helpers subdirectory must exist within helpers_root", + ) + return helpers_root + + +# ############################################################################# + + +def resolve_git_client_dir(git_client_name: str) -> str: + """ + Resolve the absolute path of the Git client directory. + + Supports both relative names (assumed to be in ~/src/) and absolute paths. + + :param git_client_name: the name of the Git client (e.g., "helpers1" + or "/Users/saggese/src/helpers1") + :return: the absolute path of the Git client directory + """ + if not os.path.isabs(git_client_name): + # Relative names are resolved relative to ~/src/ directory for convenience. + git_client_dir = os.path.join(os.environ["HOME"], "src", git_client_name) + else: + # Absolute paths are used as-is. + git_client_dir = git_client_name + _LOG.debug(hprint.to_str("git_client_dir")) + hdbg.dassert_dir_exists(git_client_dir, "Git client directory must exist") + return git_client_dir + + +def project_file_name_in_git_client( + file_name: str, + git_src_dir: str, + git_dst_dir: str, + *, + check_src_file_exists: bool = False, + check_dst_file_exists: bool = False, +) -> str: + """ + Find the file corresponding to `file_name` in `git_src_dir` for the client + `git_dst_dir`. + + This is useful when we want to find the file in a destination Git client + directory corresponding to a file in a source Git client directory. + + E.g., for: + ``` + file_name = '/Users/saggese/src/helpers1/dev_scripts_helpers/system_tools/path.py' + git_src_dir = '/Users/saggese/src/helpers1' + git_dst_dir = '/Users/saggese/src/helpers2' + ``` + the output is + `/Users/saggese/src/helpers2/dev_scripts_helpers/system_tools/path.py` + + :param file_name: the name of the file to find (which is under `git_src_dir`) + :param git_src_dir: the directory of the Git client from which `file_name` is + :param git_dst_dir: the directory of the Git client to which find the + corresponding file + :param check_src_file_exists: if True, check that `file_name` exists in + `git_src_dir` + :param check_dst_file_exists: if True, check that the file in `git_dst_dir` + exists + :return: the absolute path of the file in `git_dst_dir` + """ + if not os.path.isabs(file_name): + file_name = os.path.abspath(file_name) + if check_src_file_exists: + hdbg.dassert_file_exists(file_name) + if not os.path.isabs(git_src_dir): + git_src_dir = os.path.abspath(git_src_dir) + if not os.path.isabs(git_dst_dir): + git_dst_dir = os.path.abspath(git_dst_dir) + # Compute the relative path of the file in the source git client. + hdbg.dassert_is_path_abs(file_name) + hdbg.dassert_is_path_abs(git_src_dir) + rel_path = os.path.relpath(file_name, git_src_dir) + # Compute the absolute path of the file in the destination git client. + hdbg.dassert_is_path_abs(git_dst_dir) + dst_file_path = os.path.join(git_dst_dir, rel_path) + dst_file_path = os.path.abspath(dst_file_path) + if check_dst_file_exists: + hdbg.dassert_file_exists(dst_file_path) + return dst_file_path + + +def get_project_dirname(only_index: bool = False) -> str: + """ + Return the name of the project directory (e.g., `/Users/saggese/src/amp1` -> `amp1`). + + NOTE: This works properly only outside Docker. Inside Docker the Git client is + mapped to `/app`, so the result might be incorrect. + + :param only_index: if True, return only the numeric suffix (e.g., "1" from "amp1") + :return: the directory name or numeric index suffix + """ + # git_dir = get_client_root(super_module=True) + git_dir = find_git_root() + _LOG.debug("git_dir=%s", git_dir) + ret = os.path.basename(git_dir) + if only_index: + last_char = ret[-1] + hdbg.dassert( + last_char.isdigit(), + "The last char `%s` of the git dir `%s` is not a digit", + last_char, + git_dir, + ) + ret = last_char + _LOG.debug("ret=%s", ret) + return ret + + +def is_amp() -> bool: + """ + Return whether we are inside `amp` repo. + + Either as super module or a sub module depending on a current + working directory. + """ + return _is_repo("amp") or _is_repo("cmamp") or _is_repo("sorr") + + +def is_in_helpers_as_supermodule() -> bool: + """ + Return whether we are in the `helpers` repo and it's a super-module, i.e., + `helpers` by itself. + """ + return is_helpers() and not is_inside_submodule(".") + + +# TODO(gp): Be consistent with submodule and sub-module in the code. Same for +# supermodule. +def is_in_amp_as_submodule() -> bool: + """ + Return whether we are in the `amp` repo and it's a sub-module, e.g., of + `lm`. + """ + return is_amp() and is_inside_submodule(".") + + +def is_in_amp_as_supermodule() -> bool: + """ + Return whether we are in the `amp` repo and it's a super-module, i.e., + `amp` by itself. + """ + return is_amp() and not is_inside_submodule(".") + + +def is_amp_present(*, dir_name: str = ".") -> bool: + """ + Return whether the `amp` dir exists. + + This is a bit of an hacky way of knowing if there is the amp + submodule. + + :param dir_name: path to the directory where we want to + check the existence of `amp`. + """ + amp_path = os.path.join(dir_name, "amp") + return os.path.exists(amp_path) + + +# Using these functions is the last resort to skip / change the tests depending +# on the repo. We should control the tests through what functionalities they +# have, rather than the name of the repo. + + +def is_cmamp() -> bool: + """ + Return whether we are inside `cmamp` repo. + """ + return _is_repo("cmamp") + + +def is_lem() -> bool: + """ + Return whether we are inside `lem` repo. + """ + return _is_repo("lem") + + +def is_lime() -> bool: + """ + Return whether we are inside `lime` repo. + """ + return _is_repo("lime") + + +# ############################################################################# + + +def _get_submodule_hash(dir_name: str) -> str: + """ + Report the Git hash that a submodule is at from the supermodule perspective. + + Uses git ls-tree to get the submodule commit hash from the parent repository. + > git ls-tree master | grep + 160000 commit 0011776388b4c0582161eb2749b665fc45b87e7e amp + + :param dir_name: the name of the submodule directory + :return: the git commit hash of the submodule + """ + hdbg.dassert_path_exists(dir_name) + # Use git ls-tree to get the submodule entry which includes its hash. + cmd = f"git ls-tree master | grep {dir_name}" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + _LOG.debug("output=%s", output) + # Parse the output; format is: "160000 commit ". + data: List[str] = output.split() + _LOG.debug("data=%s", data) + # Extract the hash from the third field (index 2). + git_hash = data[2] + return git_hash + + +@functools.lru_cache() +def get_path_from_supermodule() -> Tuple[str, str]: + """ + Return the path to the Git repo including the Git submodule for a submodule. + + Returns the superproject path and submodule path, or empty for a supermodule. + E.g., + - for amp included in another repo returns 'amp' + - for amp without supermodule returns '' + + :return: tuple of (superproject_path, submodule_path) + """ + # Get the superproject working tree path. + cmd = "git rev-parse --show-superproject-working-tree" + # > cd /Users/saggese/src/.../lm/amp + # > git rev-parse --show-superproject-working-tree + # /Users/saggese/src/.../lm + # + # > cd /Users/saggese/src/.../lm + # > git rev-parse --show-superproject-working-tree + # (No result) + superproject_path: str = hsystem.system_to_one_line(cmd)[1] + _LOG.debug("superproject_path='%s'", superproject_path) + # Query the .gitmodules file to get the path for the current submodule. + cmd = ( + f"git config --file {superproject_path}/.gitmodules --get-regexp path" + '| grep $(basename "$(pwd)")' + "| awk '{ print $2 }'" + ) + # > git config --file /Users/saggese/src/.../.gitmodules --get-regexp path + # submodule.amp.path amp + submodule_path: str = hsystem.system_to_one_line(cmd)[1] + _LOG.debug("submodule_path='%s'", submodule_path) + return superproject_path, submodule_path + + +@functools.lru_cache() +def get_submodule_paths() -> List[str]: + """ + Return the path of the submodules in this repo. + + :return: list of submodule paths, e.g., ["amp"] or [] + """ + # Query .gitmodules to get submodule paths. + # > git config --file .gitmodules --get-regexp path + # submodule.amp.path amp + cmd = "git config --file .gitmodules --get-regexp path | awk '{ print $2 }'" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug("txt=%s", txt) + # Convert the output string to a list of paths. + files: List[str] = hsystem.text_to_list(txt) + _LOG.debug("files=%s", files) + return files + + +def has_submodules() -> bool: + """ + Return whether the repository has any submodules configured. + + :return: True if the repository contains submodules + """ + return len(get_submodule_paths()) > 0 + + +# ############################################################################# + + +def _get_hash(git_hash: str, short_hash: bool, num_digits: int = 8) -> str: + """ + Return the git hash, optionally shortened. + + :param git_hash: the full git hash + :param short_hash: if True, return only the first num_digits characters + :param num_digits: number of digits for short hash + :return: the git hash or shortened version + """ + hdbg.dassert_lte(1, num_digits) + # Return shortened hash if requested, otherwise return full hash. + if short_hash: + ret = git_hash[:num_digits] + else: + ret = git_hash + return ret + + +def _group_hashes(head_hash: str, remh_hash: str, subm_hash: str) -> str: + """ + Group multiple hashes and display which ones are equal. + + Transform three hashes into a string that shows which ones are identical. + For example, if head_hash == remh_hash, display "head_hash = remh_hash = ". + + :param head_hash: the head hash + :param remh_hash: the remote head hash + :param subm_hash: the submodule hash + :return: formatted string showing hash equality + """ + # Build a mapping from hash names to their values. + map_ = collections.OrderedDict() + map_["head_hash"] = head_hash + map_["remh_hash"] = remh_hash + if subm_hash: + map_["subm_hash"] = subm_hash + # Invert the mapping to group identical hashes together. + inv_map = collections.OrderedDict() + for k, v in map_.items(): + if v not in inv_map: + inv_map[v] = [k] + else: + inv_map[v].append(k) + # Format the output so equal hashes are grouped together. + txt = [] + for k, v in inv_map.items(): + # Transform: + # ('a2bfc704', ['head_hash', 'remh_hash']) + # into + # 'head_hash = remh_hash = a2bfc704' + txt.append(f"{' = '.join(v)} = {k}") + txt = "\n".join(txt) + return txt + + +# ############################################################################# +# GitHub repository name +# ############################################################################# + + +# All functions should take as input `repo_short_name` and have a switch `mode` +# to distinguish full vs short repo name. + +# TODO(gp): Maybe rename full -> long to keep it more symmetric "short vs long". + + +def _parse_github_repo_name(repo_name: str) -> Tuple[str, str]: + """ + Parse a repo name from `git remote`. + + The supported formats are both SSH and HTTPS, e.g., + - `git@github.com:alphamatic/amp` + - `https://github.com/alphamatic/amp` + + For both of these strings the function returns ("github.com", "alphamatic/amp"). + """ + # Try to parse the SSH format, e.g., `git@github.com:alphamatic/amp` + m = re.match(r"^git@(\S+.com):(\S+)$", repo_name) + if not m: + # Try tp parse the HTTPS format, e.g., `https://github.com/alphamatic/amp` + m = re.match(r"^https://(\S+.com)/(\S+)$", repo_name) + hdbg.dassert(m, "Can't parse '%s'", repo_name) + # The linter doesn't understand that `dassert` is equivalent to an + # `assert`. + assert m is not None + host_name = m.group(1) + repo_name = m.group(2) + _LOG.debug("host_name=%s repo_name=%s", host_name, repo_name) + # We expect something like "alphamatic/amp". + m = re.match(r"^\S+/\S+$", repo_name) + hdbg.dassert(m, "repo_name='%s'", repo_name) + # The linter doesn't understand that `dassert` is equivalent to an + # `assert`. + assert m is not None + # origin git@github.com:.../ORG_....git (fetch) + suffix_to_remove = ".git" + if repo_name.endswith(suffix_to_remove): + repo_name = repo_name[: -len(suffix_to_remove)] + return host_name, repo_name + + +def get_repo_full_name_from_dirname( + dir_name: str, include_host_name: bool +) -> str: + """ + Return the full name of the repo in a directory. + + E.g., "alphamatic/amp" or "github.com/alphamatic/amp" (if hostname included). + + This function relies on `git remote` to extract the origin URL. + + :param dir_name: directory containing the git repository + :param include_host_name: if True, prepend the GitHub hostname (e.g., + "github.com/alphamatic/amp") + :return: the full name of the repo + - E.g., "alphamatic/amp", "github.com/alphamatic/amp". + """ + hdbg.dassert_path_exists(dir_name) + cmd = f"cd {dir_name}; (git remote -v | grep origin | grep fetch)" + _, output = hsystem.system_to_string(cmd) + # > git remote -v + # origin git@github.com:alphamatic/amp (fetch) + # origin git@github.com:alphamatic/amp (push) + data: List[str] = output.split() + _LOG.debug("data=%s", data) + hdbg.dassert_eq(len(data), 3, "Expected 3 fields from git remote output") + # Extract the origin URL (second field). + repo_name = data[1] + # Parse SSH/HTTPS URL into host and org/repo parts. + host_name, repo_name = _parse_github_repo_name(repo_name) + if include_host_name: + res = f"{host_name}/{repo_name}" + else: + res = repo_name + return res + + +# ############################################################################# +# Git hash +# ############################################################################# + + +def get_head_hash(dir_name: str = ".", short_hash: bool = False) -> str: + """ + Return the git commit hash of a repository with submodule/random suffix. + + Gets the HEAD commit hash and appends either the amp submodule hash (if present) + or a random suffix to make the hash unique across different module configurations. + + ``` + > git rev-parse HEAD + 4759b3685f903e6c669096e960b248ec31c63b69 + ``` + + :param dir_name: directory containing the git repository + :param short_hash: if True, return abbreviated hash (useful when combined with suffix) + :return: the commit hash with submodule/random suffix (e.g., "4759b36-abc123") + """ + hdbg.dassert_path_exists(dir_name) + # Get the commit hash, optionally abbreviated to 7 characters. + opts = "--short " if short_hash else " " + cmd = f"cd {dir_name} && git rev-parse {opts}HEAD" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + # Check whether we are building an orange image. If the condition + # is True, add './amp' hash to the tag as well. + if is_amp_present(dir_name=dir_name): + amp_hash = get_head_hash(os.path.join(dir_name, "amp"), short_hash=True) + output = output + "-" + amp_hash + else: + # Use random suffix when no submodule exists (needed for Docker image tags). + random_string = "".join( + random.choices(string.ascii_lowercase + string.digits, k=3) + ) + output = output + "-" + random_string + return output + + +def get_remote_head_hash(dir_name: str) -> str: + """ + Return the commit hash that the remote repository's HEAD points to. + + Queries the remote origin to get the current HEAD hash without fetching. + + :param dir_name: directory containing the git repository + :return: the remote HEAD commit hash + """ + hdbg.dassert_path_exists(dir_name) + sym_name = get_repo_full_name_from_dirname(dir_name, include_host_name=False) + cmd = f"git ls-remote git@github.com:{sym_name} HEAD 2>/dev/null" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + # > git ls-remote git@github.com:alphamatic/amp HEAD 2>/dev/null + # 921676624f6a5f3f36ab507baed1b886227ac2e6 HEAD + return output + + +def report_submodule_status(dir_names: List[str], short_hash: bool) -> str: + """ + Return a formatted string reporting the status of git repositories. + + Reports whether each directory is a submodule, current branch, and commit hashes + (local, remote, and submodule hash if applicable). + + :param dir_names: list of directory paths to report on + :param short_hash: if True, truncate hashes to 8 characters + :return: formatted string with status information for each directory + """ + txt = [] + for dir_name in dir_names: + txt.append(f"dir_name='{dir_name}'") + txt.append(f" is_inside_submodule: {is_inside_submodule(dir_name)}") + # Get branch name, highlighting if not on master (likely indicates incomplete work). + branch_name = get_branch_name(dir_name) + if branch_name != "master": + branch_name = f"!!! {branch_name} !!!" + txt.append(f" branch: {branch_name}") + # Get local and remote commit hashes. + head_hash = get_head_hash(dir_name) + head_hash = _get_hash(head_hash, short_hash) + txt.append(f" head_hash: {head_hash}") + remh_hash = get_remote_head_hash(dir_name) + remh_hash = _get_hash(remh_hash, short_hash) + txt.append(f" remh_hash: {remh_hash}") + # Get submodule hash if this is not the root directory. + if dir_name != ".": + subm_hash = _get_submodule_hash(dir_name) + subm_hash = _get_hash(subm_hash, short_hash) + txt.append(f" subm_hash: {subm_hash}") + txt_as_str = "\n".join(txt) + return txt_as_str + + +def get_repo_full_name_from_client(super_module: bool) -> str: + """ + Return the full name of the repo (e.g., "alphamatic/amp") from a Git + client. + + :param super_module: like in get_client_root() + """ + # Get the Git remote in the dir containing the Git repo. + git_dir = get_client_root(super_module) + repo_name = get_repo_full_name_from_dirname(git_dir, include_host_name=False) + return repo_name + + +def is_cwd_git_repo() -> bool: + """ + Return whether the current directory is a git repository root. + + Checks for the presence of a .git file or directory in the current location. + + :return: True if .git exists in the current directory + """ + return os.path.exists(".git") + + +# ############################################################################# +# Git path +# ############################################################################# + + +# TODO(gp): Use find_file +@functools.lru_cache() +def find_file_in_git_tree( + file_name: str, super_module: bool = True, remove_tmp_base: bool = False +) -> str: + """ + Find the path of a file in a Git tree. + + We get the Git root and then search for the file from there. + """ + root_dir = get_client_root(super_module=super_module) + cmd = rf"find {root_dir} -name '{file_name}' -not -path '*/.git/*'" + if remove_tmp_base: + cmd += r" -not -path '*/tmp\.base/*'" + _, file_name_out = hsystem.system_to_one_line(cmd) + _LOG.debug(hprint.to_str("file_name_out")) + hdbg.dassert_ne( + file_name_out, + "", + "Can't find file '%s' in dir '%s'", + file_name, + root_dir, + ) + file_name_out: str = os.path.abspath(file_name_out) + hdbg.dassert_path_exists(file_name_out) + return file_name_out + + +def get_path_from_git_root( + file_name: str, + super_module: bool, + *, + git_root: Optional[str] = None, +) -> str: + """ + Get the path of `file_name` from the root of the Git client. + + E.g., in Docker: + - `super_module=True` -> git_root=/app + - `super_module=False` -> git_root=/app/amp + + :param super_module: like get_client_root() + """ + # Get the root of the Git client. + if git_root is None: + git_root = get_client_root(super_module) + # + git_root = os.path.normpath(git_root) + _LOG.debug("git_root=%s", git_root) + file_name = os.path.normpath(file_name) + _LOG.debug("file_name=%s", file_name) + if file_name.startswith(git_root): + # Remove the `git_root` from file_name. + ret = os.path.relpath(file_name, git_root) + else: + # If the file is not under the root, we can't normalize it. + raise ValueError( + f"Can't normalize file_name='{file_name}' for git_root='{git_root}'" + ) + _LOG.debug( + "file_name=%s, git_root=%s (super_module=%s) -> ret=%s", + file_name, + git_root, + super_module, + ret, + ) + return str(ret) + + +# TODO(gp): Rewrite this function in a better way. +@functools.lru_cache() +def get_amp_abs_path() -> str: + """ + Return the absolute path of `amp` dir. + """ + repo_sym_name = get_repo_full_name_from_client(super_module=False) + _LOG.debug("repo_sym_name=%s", repo_sym_name) + # + repo_sym_names = ["alphamatic/amp"] + extra_amp_repo_sym_name = ( + hrecouti.get_repo_config().get_extra_amp_repo_sym_name() + ) + repo_sym_names.append(extra_amp_repo_sym_name) + _LOG.debug("repo_sym_names=%s", repo_sym_names) + # + if repo_sym_name in repo_sym_names: + # If we are in the amp repo, then the git client root is the amp + # directory. + git_root = get_client_root(super_module=False) + amp_dir = git_root + else: + # If we are not in the amp repo, then look for the amp dir. + amp_dir = find_file_in_git_tree( + "amp", super_module=True, remove_tmp_base=True + ) + git_root = get_client_root(super_module=True) + amp_dir = os.path.join(git_root, amp_dir) + amp_dir = os.path.abspath(amp_dir) + # Sanity check. + hdbg.dassert_dir_exists(amp_dir) + return amp_dir + + +# TODO(gp): Is this needed? +def get_repo_dirs() -> List[str]: + """ + Return the list of the repo repositories, e.g., `[".", "amp", "infra"]`. + """ + dir_names = ["."] + dirs = ["amp"] + for dir_name in dirs: + if os.path.exists(dir_name): + dir_names.append(dir_name) + return dir_names + + +# TODO(gp): It should go in hdocker? +# TODO(gp): There are functions in hdocker.py that might be more general than +# this. +def find_docker_file( + file_name: str, + *, + root_dir: str = ".", + dir_depth: int = -1, + mode: str = "return_all_results", + candidate_files: Optional[List[str]] = None, +) -> List[str]: + """ + Convert a file or dir that was generated inside Docker to a file in the + current Git client. + + This operation is best-effort since it might not be able to find the + corresponding file in the current repo. + + E.g., + - A file like '/app/amp/core/dataflow_model/utils.py', in a Docker container + with Git root in '/app' becomes 'amp/core/dataflow_model/utils.py' + - For a file like '/app/amp/core/dataflow_model/utils.py' outside Docker, we + look for the file 'dataflow_model/utils.py' in the current client and + then normalize with respect to the + + :param dir_depth: same meaning as in `find_file_with_dir()` + :param mode: same as `system_interaction.select_result_file_from_list()` + :param candidate_files: list of results from the `find` command for unit + test mocking + :return: the best guess for the file name corresponding to `file_name` + """ + _LOG.debug(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(file_name, str) + # Clean up file name. + file_name = os.path.normpath(file_name) + _LOG.debug("file_name=%s", file_name) + # Find the file in the dir. + file_names = hsystem.find_file_with_dir( + file_name, + root_dir=root_dir, + dir_depth=dir_depth, + mode=mode, + candidate_files=candidate_files, + ) + # Purify. + _LOG.debug("Purifying file_names=%s", file_names) + file_names = [ + os.path.relpath(file_name, root_dir) for file_name in file_names + ] + return file_names + + +# TODO(gp): Use get_head_hash() and remove this. +def get_current_commit_hash(dir_name: str = ".") -> str: + """ + Return the full SHA-1 hash of the current HEAD commit. + + :param dir_name: directory containing the git repository + :return: the full commit hash (e.g., "0011776388b4c0582161eb2749b665fc45b87e7e") + """ + hdbg.dassert_path_exists(dir_name) + cmd = f"cd {dir_name} && git rev-parse HEAD" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, sha = data + # 0011776388b4c0582161eb2749b665fc45b87e7e + _LOG.debug("sha=%s", sha) + return sha + + +# ############################################################################# +# Modified files +# ############################################################################# + + +def get_modified_files( + dir_name: str = ".", remove_files_non_present: bool = True +) -> List[str]: + """ + Return the files that are added and modified in the Git client. + + In other words the files that will be committed with a `git commit -am ...`. + Equivalent to `dev_scripts/git_files.sh` + + :param dir_name: directory with Git client + :param remove_files_non_present: remove the files that are not + currently present in the client + :return: list of files + """ + # If the client status is: + # > git status -s + # AM dev_scripts/infra/ssh_tunnels.py + # M helpers/git.py + # ?? linter_warnings.txt + # + # The result is: + # > git diff --cached --name-only + # dev_scripts/infra/ssh_tunnels.py + # + # > git ls-files -m + # dev_scripts/infra/ssh_tunnels.py + # helpers/git.py + cmd = "(git diff --cached --name-only; git ls-files -m) | sort | uniq" + files: List[str] = hsystem.system_to_files( + cmd, dir_name, remove_files_non_present + ) + return files + + +# TODO(gp): -> ...previously... +def get_previous_committed_files( + dir_name: str = ".", + num_commits: int = 1, + remove_files_non_present: bool = True, +) -> List[str]: + """ + Return files changed in the Git client in the last `num_commits` commits. + + Equivalent to `dev_scripts/git_previous_commit_files.sh` + + :param dir_name: directory with Git client + :param num_commits: how many commits in the past to consider + :param remove_files_non_present: remove the files that are not + currently present in the client + :return: list of files + """ + cmd = [] + cmd.append('git show --pretty="" --name-only') + cmd.append(f'$(git log --author "$(git config user.name)" -{num_commits}') + cmd.append(r"""| \grep "^commit " | perl -pe 's/commit (.*)/$1/')""") + cmd_as_str = " ".join(cmd) + files: List[str] = hsystem.system_to_files( + cmd_as_str, dir_name, remove_files_non_present + ) + return files + + +def get_modified_files_in_branch( + dst_branch: str, dir_name: str = ".", remove_files_non_present: bool = True +) -> List[str]: + """ + Return files modified in the current branch with respect to `dst_branch`. + + Equivalent to `git diff --name-only master...` + Please remember that there is a difference between `master` and `origin/master`. + See https://stackoverflow.com/questions/18137175 + + :param dir_name: directory with Git client + :param dst_branch: branch to compare to, e.g., `master`, `HEAD` + :param remove_files_non_present: remove the files that are not + currently present in the client + :return: list of files + """ + if dst_branch == "HEAD": + target = dst_branch + else: + target = f"{dst_branch}..." + cmd = f"git diff --name-only {target}" + files: List[str] = hsystem.system_to_files( + cmd, dir_name, remove_files_non_present + ) + return files + + +def get_modified_and_untracked_files( + repo_path: str = ".", *, mode: str = "all" +) -> List[str]: + """ + Get list of modified and untracked files in a git repository. + + Excludes files from submodules and deleted files. + + Mode options: + - "all": Both modified and untracked files (default, current behavior) + - "modified": Only files with changes (staged, modified, added, renamed, copied) + - "untracked": Only untracked files + + This includes (when mode="all"): + - Modified files (both staged and unstaged) + - Untracked files + - Cached/staged files + + The function uses `git status --porcelain -u` which shows all changes + including cached (staged) files. + + :param repo_path: Path to the git repository + :param mode: Filter mode: "all", "modified", or "untracked" + :return: List of file paths relative to repo_path + """ + hdbg.dassert_dir_exists(repo_path) + # Validate mode. + valid_modes = ["all", "modified", "untracked"] + hdbg.dassert_in( + mode, + valid_modes, + "Invalid mode '%s'; must be one of: %s", + mode, + ", ".join(valid_modes), + ) + # Get modified and untracked files, excluding submodules. + # The command uses: + # - git status --porcelain -u: Get status in machine-readable format with untracked files + # This includes both cached (staged) and modified files + # Status codes: ?? = untracked, M/A/R/C/D = modified/added/renamed/copied/deleted + cmd = f"cd {repo_path} && git status --porcelain -u" + _, output = hsystem.system_to_string(cmd, abort_on_error=False) + # Get submodule paths to exclude. + submodule_cmd = ( + f"cd {repo_path} && " + "git config -f .gitmodules --get-regexp path 2>/dev/null || true" + ) + _, submodule_output = hsystem.system_to_string( + submodule_cmd, abort_on_error=False + ) + submodule_paths = set() + for line in submodule_output.strip().split("\n"): + if line: + # Format: "submodule..path " + parts = line.split() + if len(parts) >= 2: + submodule_paths.add(parts[-1]) + # Parse output. + files = [] + for line in output.strip().split("\n"): + line = line.strip() + if not line: + continue + # Extract status code (first 2 characters) and filename (from position 3). + status_code = line[:2] if len(line) >= 2 else "" + file_name = line[3:].strip() if len(line) > 3 else "" + # Filter by mode. + if mode == "untracked": + # Untracked files have status "??" + if status_code != "??": + continue + elif mode == "modified": + # Modified files have any status other than "??" + if status_code == "??": + continue + # Skip submodule paths. + is_in_submodule = any( + file_name.startswith(subpath + "/") or file_name == subpath + for subpath in submodule_paths + ) + if is_in_submodule: + _LOG.debug("Skipping submodule file: %s", file_name) + continue + # Check if file exists (exclude deleted files). + file_path = os.path.join(repo_path, file_name) + if os.path.exists(file_path) and os.path.isfile(file_path): + files.append(file_name) + else: + _LOG.debug("Skipping non-existent or non-file: %s", file_path) + return files + + +def get_summary_files_in_branch( + dst_branch: str, + *, + dir_name: str = ".", +) -> str: + """ + Report summary of files in the current branch with respect to `dst_branch'. + + Same interface as `get_modified_files_in_branch`. + """ + # File types (from https://git-scm.com/docs/git-diff). + file_types = [ + ("added", "A"), + ("copied", "C"), + ("deleted", "D"), + ("modified", "M"), + ("renamed", "R"), + ("type changed", "T"), + ("unmerged", "U"), + ("unknown", "X"), + ("broken pairing", "B"), + ] + res = "" + for tag, diff_type in file_types: + cmd = f"git diff --diff-filter={diff_type} --name-only {dst_branch}..." + files = hsystem.system_to_files( + cmd, dir_name, remove_files_non_present=False + ) + _LOG.debug("files=%s", "\n".join(files)) + if files: + res += f"# {tag}: {len(files)}\n" + res += hprint.indent("\n".join(files)) + "\n" + res = res.rstrip("\n") + return res + + +# ############################################################################# +# Git commands. +# ############################################################################# + + +# TODO(gp): -> get_user_name() +@functools.lru_cache() +def get_git_name() -> str: + """ + Return the configured git user name from git config. + + Caches the result to avoid repeated config lookups. + + :return: the configured git user name (e.g., from user.name setting) + """ + cmd = "git config --get user.name" + # For some reason data is annotated as Any by mypy, instead of + # Tuple[int, str] so we need to cast it to the right value. + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + return output + + +def git_log(num_commits: int = 5, my_commits: bool = False) -> str: + """ + Return a formatted git log with graph, timestamps, and author information. + + Uses a custom pretty format to display commits in a user-friendly layout + with graph visualization, relative time, and author name. + + :param num_commits: number of commits to report + :param my_commits: if True, filter to only commits by the current git user + :return: formatted git log output + """ + cmd = [] + cmd.append("git log --date=local --oneline --graph --date-order --decorate") + cmd.append( + "--pretty=format:'%h %<(8)%aN% %<(65)%s (%>(14)%ar) %ad %<(10)%d'" + ) + cmd.append(f"-{num_commits}") + if my_commits: + # This doesn't work in a container if the user relies on `~/.gitconfig` to + # set the user name. + # TODO(gp): We should use `get_git_name()`. + cmd.append("--author $(git config user.name)") + cmd = " ".join(cmd) + data: Tuple[int, str] = hsystem.system_to_string(cmd) + _, txt = data + return txt + + +def git_stash_push( + prefix: str, msg: Optional[str] = None, log_level: int = logging.DEBUG +) -> Tuple[str, bool]: + """ + Stash current changes with a timestamped, labeled message. + + Creates a unique stash name from prefix, username, server, and timestamp to + enable tracking of which changes were stashed when and by whom. + + :param prefix: prefix for the stash tag (e.g., "backup", "work") + :param msg: optional message to append to the stash description + :param log_level: logging level for system output + :return: tuple of (stash_tag, was_stashed) indicating success + """ + import helpers.hdatetime as hdateti + + user_name = hsystem.get_user_name() + server_name = hsystem.get_server_name() + timestamp = hdateti.get_current_timestamp_as_string("naive_ET") + # Build unique tag from context to identify who stashed what when. + tag = f"{user_name}-{server_name}-{timestamp}" + tag = prefix + "." + tag + _LOG.debug("tag='%s'", tag) + cmd = "git stash push" + _LOG.debug("msg='%s'", msg) + push_msg = tag[:] + if msg: + push_msg += ": " + msg + cmd += f" -m '{push_msg}'" + hsystem.system(cmd, suppress_output=False, log_level=log_level) + # Verify that something was actually stashed (git stash push is silent on no-op). + cmd = rf"git stash list | \grep '{tag}' | wc -l" + _, output = hsystem.system_to_string(cmd) + was_stashed = int(output) > 0 + if not was_stashed: + msg = "Nothing was stashed" + _LOG.warning(msg) + # raise RuntimeError(msg) + return tag, was_stashed + + +def git_stash_apply(mode: str, log_level: int = logging.DEBUG) -> None: + """ + Apply or pop the most recent git stash. + + Displays the stash list before applying to help the user verify they're applying + the correct stash. + + :param mode: "apply" to keep the stash or "pop" to remove after applying + :param log_level: logging level for system output + """ + _LOG.debug("# Checking stash head ...") + cmd = "git stash list | head -3" + hsystem.system(cmd, suppress_output=False, log_level=log_level) + # Restore the stashed changes, either keeping or removing the stash. + _LOG.debug("# Restoring local changes...") + if mode == "pop": + cmd = "git stash pop --quiet" + elif mode == "apply": + cmd = "git stash apply --quiet" + else: + raise ValueError(f"mode='{mode}'") + hsystem.system(cmd, suppress_output=False, log_level=log_level) + + +# TODO(gp): Consider using this everywhere. Maybe it can simplify handling issues +# stemming from the super-module / sub-module repo. +def _get_git_cmd(super_module: bool) -> str: + """ + Build a git command prefix with explicit repository and working tree paths. + + Useful for running git commands from outside the repository or when working + with specific submodules/supermodules. + + :param super_module: if True, use supermodule root; else use current module root + :return: git command prefix (e.g., "git --git-dir=... --work-tree=...") + """ + cmd = [] + cmd.append("git") + client_root = get_client_root(super_module=super_module) + # Set the path to the repository (".git" directory), avoiding Git to search for + # it (from https://git-scm.com/docs/git) + cmd.append(f"--git-dir='{client_root}/.git'") + # Explicitly specify working tree location. + cmd.append(f"--work-tree='{client_root}'") + cmd = " ".join(cmd) + return cmd + + +def git_tag( + tag_name: str, super_module: bool = True, log_level: int = logging.DEBUG +) -> None: + """ + Create a git tag on the current commit (locally, not pushed). + + Overwrites existing tags with the same name (using -f flag). + + :param tag_name: the name of the tag to create + :param super_module: if True, tag the supermodule; else tag the current module + :param log_level: logging level for system output + """ + _LOG.debug("# Tagging current commit ...") + git_cmd = _get_git_cmd(super_module) + cmd = f"{git_cmd} tag -f {tag_name}" + _ = hsystem.system(cmd, suppress_output=False, log_level=log_level) + + +def git_push_tag( + tag_name: str, + remote: str = "origin", + super_module: bool = True, + log_level: int = logging.DEBUG, +) -> None: + """ + Push a git tag to the remote repository. + + :param tag_name: the name of the tag to push + :param remote: the remote name to push to (default: origin) + :param super_module: if True, tag the supermodule; else tag the current module + :param log_level: logging level for system output + """ + _LOG.debug("# Pushing current commit ...") + git_cmd = _get_git_cmd(super_module) + cmd = f"{git_cmd} push {remote} {tag_name}" + _ = hsystem.system(cmd, suppress_output=False, log_level=log_level) + + +def git_describe( + match: Optional[str] = None, log_level: int = logging.DEBUG +) -> str: + """ + Return the most recent git tag, or abbreviated commit hash if no tags exist. + + Useful for version identification and release tracking. + + :param match: optional glob pattern to filter tags (e.g., "cmamp-*") + :param log_level: logging level for system output + :return: the closest tag (e.g., "1.0.0") or short commit hash + """ + _LOG.debug("# Looking for version ...") + cmd = "git describe --tags --always --abbrev=0" + if match is not None: + hdbg.dassert_isinstance(match, str, "match pattern must be a string") + hdbg.dassert_ne(match, "", "match pattern cannot be empty") + cmd = f"{cmd} --match '{match}'" + num, tag = hsystem.system_to_one_line(cmd, log_level=log_level) + _ = num + return tag + + +def git_add_update( + file_list: Optional[List[str]] = None, log_level: int = logging.DEBUG +) -> None: + """ + Add files to the git staging area. + + If no file list is provided, adds all modified and deleted files (git add -u). + + :param file_list: list of specific files to add; if None, add all modified files + :param log_level: logging level for system output + """ + _LOG.debug("# Adding all changed files to staging ...") + cmd = f"git add {' '.join(file_list) if file_list is not None else '-u'}" + hsystem.system(cmd, suppress_output=False, log_level=log_level) + + +def fetch_origin_master_if_needed() -> None: + """ + Fetch the master branch from origin if running in a CI environment. + + In CI, master may not be fetched when testing a branch, but it's often needed + for tests that compare against baseline or merge behavior. This ensures master + is available if needed. + """ + if hserver.is_inside_ci(): + _LOG.warning("Running inside CI so fetching master") + cmd = "git branch -a" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug("%s=%s", cmd, txt) + cmd = r'git branch -a | egrep "\s+master\s*$" | wc -l' + # * (HEAD detached at pull/1337/merge) + # master + # remotes/origin/master + # remotes/pull/1337/merge + _, num = hsystem.system_to_one_line(cmd) + num = int(num) + _LOG.debug("num=%s", num) + if num == 0: + # See AmpTask1321 and AmpTask1338 for details. + cmd = "git fetch origin master:refs/remotes/origin/master" + hsystem.system(cmd) + cmd = "git branch --track master origin/master" + hsystem.system(cmd) + + +def is_client_clean( + dir_name: str = ".", + abort_if_not_clean: bool = False, +) -> bool: + """ + Return whether there are files modified, added, or removed in a directory. + + Ignores submodule changes (amp, helpers_root) to focus on actual code changes. + + :param dir_name: directory containing the git repository + :param abort_if_not_clean: if True and the client is not clean, + abort with a detailed message showing the modified files + :return: True if no files are modified (excluding submodules) + """ + _LOG.debug(hprint.to_str("abort_if_not_clean")) + files = get_modified_files(dir_name) + # Exclude submodule directories from consideration since their changes + # are tracked separately and don't affect code cleanliness. + if "amp" in files: + _LOG.warning("Skipping 'amp' in modified files") + files = [f for f in files if "amp" != f] + elif "helpers_root" in files: + _LOG.warning("Skipping 'helpers_root' in modified files") + files = [f for f in files if "helpers_root" != f] + # A Git client is clean iff there are no files in the index. + is_clean = len(files) == 0 + if abort_if_not_clean: + hdbg.dassert( + is_clean, "The Git client is not clean:\n%s", "\n".join(files) + ) + return is_clean + + +def delete_branches( + dir_name: str, + mode: str, + branches: List[str], + confirm_delete: bool, + abort_on_error: bool = True, +) -> None: + """ + Delete local or remote git branches. + + Optionally prompts the user for confirmation before performing deletion. + + :param dir_name: directory containing the git repository + :param mode: "local" for local branches or "remote" for remote branches + :param branches: list of branch names to delete + :param confirm_delete: if True, prompt user for confirmation before deletion + :param abort_on_error: if True, abort on any deletion error + """ + hdbg.dassert_isinstance( + branches, list, "branches must be a list, got type %s", type(branches) + ) + delete_cmd = f"cd {dir_name} && " + if mode == "local": + delete_cmd += "git branch -d" + elif mode == "remote": + delete_cmd += "git push origin --delete" + else: + raise ValueError(f"Invalid mode='{mode}'") + # Prompt for confirmation to prevent accidental deletion of important branches. + if confirm_delete: + branches_as_str = " ".join(branches) + msg = ( + hdbg.WARNING + + f": Delete {len(branches)} {mode} branch(es) '{branches_as_str}'?" + ) + hsystem.query_yes_no(msg, abort_on_no=True) + for branch in branches: + if mode == "remote": + prefix = "origin/" + hdbg.dassert( + branch.startswith(prefix), + "Remote branch '%s' needs to start with '%s'", + branch, + prefix, + ) + branch = branch[len(prefix) :] + cmd = f"{delete_cmd} {branch}" + hsystem.system( + cmd, + suppress_output=False, + log_level="echo", + abort_on_error=abort_on_error, + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py new file mode 100644 index 000000000..e796b865f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py @@ -0,0 +1,1183 @@ +""" +Use cases for this module are at: +helpers/notebooks/Master_how_to_use_hgoogle_drive_api.ipynb + +Import as: + +import helpers.hgoogle_drive_api as hgodrapi +""" + +import datetime +import importlib +import logging +import os +import re +import sys +from typing import List, Optional, Union + +# Keep try-except to avoid `ModuleNotFoundError` in CI/CD (HelpersTask #1183). +try: + # Authentication for Google API to produce credentials. + import google.oauth2.service_account as goasea + + # Google API client for service objects (e.g., Drive, Sheets, etc.) + import googleapiclient.discovery as godisc + + # Built on top of Google API to simplify interactions with Google Sheets. + import gspread + + _GOOGLE_API_AVAILABLE = True +except ImportError: + # If Google API packages are not installed, set placeholders. + _GOOGLE_API_AVAILABLE = False + +import pandas as pd + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg +import helpers.hmodule as hmodule +import helpers.hpandas as hpandas + +_LOG = logging.getLogger(__name__) + + +def install_needed_modules( + *, use_sudo: bool = True, venv_path: Optional[str] = None +) -> None: + """ + Install needed modules for Google Drive API. + + :param use_sudo: whether to use sudo to install the module + :param venv_path: path to the virtual environment E.g., + /Users/saggese/src/venv/client_venv.helpers + """ + hmodule.install_module_if_not_present( + "google", + package_name="google-auth", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + hmodule.install_module_if_not_present( + "googleapiclient", + package_name="google-api-python-client", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + hmodule.install_module_if_not_present( + "gspread", + package_name="gspread", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + # Reload this module (hgoogle_drive_api) if already imported + this_module_name = __name__ + if this_module_name in sys.modules: + importlib.reload(sys.modules[this_module_name]) + + +# ############################################################################# +# Credentials +# ############################################################################# + + +def get_credentials( + *, + service_key_path: Optional[str] = None, +) -> "goasea.Credentials": + """ + Get credentials for Google API with service account key. + + :param service_key_path: service account key file path. + :return: Google credentials. + """ + # service_key_path = "/home/.config/gspread_pandas/google_secret.json" + if not service_key_path: + service_key_path = os.path.join( + os.path.expanduser("~"), + ".config", + "gspread_pandas", + "google_secret.json", + ) + service_key_path = os.path.join(os.path.dirname(__file__), service_key_path) + # Download service.json from Google API, then save it as + # /home/.config/gspread_pandas/google_secret.json + # Instructions: https://gspread-pandas.readthedocs.io/en/latest/getting_started.html#client-credentials" + hdbg.dassert_file_exists( + service_key_path, + "Failed to read service key file: %s", + service_key_path, + ) + # Scopes required for making API calls. + scopes = [ + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/spreadsheets", + ] + creds = goasea.Credentials.from_service_account_file( + service_key_path, scopes=scopes + ) + return creds + + +# ############################################################################# +# Google Sheets API +# ############################################################################# + + +# TODO(gp): Extend this to work with v3, v4, etc. +# TODO(ai_gp): Make it private if it's not called by anybody else. +def get_sheets_service(credentials: "goasea.Credentials") -> "godisc.Resource": + """ + Get Google Sheets service with provided credentials. + + :param credentials: Google credentials object. + :return: Google Sheets service instance. + """ + # Ensure credentials are provided. + hdbg.dassert(credentials, "The 'credentials' parameter must be provided") + # Build the Sheets service. + sheets_service = godisc.build( + "sheets", "v4", credentials=credentials, cache_discovery=False + ) + return sheets_service + + +def _get_gsheet_id( + credentials: "goasea.Credentials", + sheet_id: str, + *, + tab_name: Optional[str] = None, +) -> str: + """ + Get the sheet ID from the sheet name in a Google Sheets document. + + :param credentials: Google credentials object. + :param sheet_id: ID of the Google Sheet document. + :param tab_name: Name of the sheet (tab) in the Google Sheets + document. + :return: Sheet ID of the sheet with the given name or the first + sheet if the name is not provided. + """ + sheets_service = get_sheets_service(credentials) + sheet_metadata = ( + sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() + ) + sheets = sheet_metadata.get("sheets", []) + if tab_name: + for sheet in sheets: + properties = sheet.get("properties", {}) + if properties.get("title") == tab_name: + return properties.get("sheetId") + raise ValueError(f"Sheet with name '{tab_name}' not found.") + # Return the ID of the first sheet if no sheet name is provided. + first_sheet_id = sheets[0].get("properties", {}).get("sheetId") + return first_sheet_id + + +def get_gsheet_name( + url: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Get the name of a Google Sheet from its URL. + + E.g., https://docs.google.com/spreadsheets/d/1GnnmtGTrHDwMP77VylEK0bSF_RLUV5BWf1iGmxuBQpI + -> pitchbook.Outreach_AI_companies + + :param url: URL of the Google Sheets file. + :param credentials: Google credentials object. + :return: Name of the Google Sheet (spreadsheet title). + """ + if credentials is None: + credentials = get_credentials() + # TODO(ai): Should we use the Sheets API instead? + client = gspread.authorize(credentials) + spreadsheet = client.open_by_url(url) + tab_name = spreadsheet.title + _LOG.debug("Retrieved sheet name: '%s'", tab_name) + return tab_name + + +def get_tabs_from_gsheet( + url: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> List[str]: + """ + Get all the tabs (worksheets) from a Google Sheet. + + :param url: URL of the Google Sheet. + :param credentials: Google credentials object. + :return: List of tab names. + """ + if credentials is None: + credentials = get_credentials() + client = gspread.authorize(credentials) + spreadsheet = client.open_by_url(url) + return [sheet.title for sheet in spreadsheet.worksheets()] + + +# ############################################################################# + + +def _extract_file_id_from_url(url: str) -> str: + """ + Extract the file ID from a Google Docs/Sheets/Drive URL. + + E.g., + https://docs.google.com/spreadsheets/d/FILE_ID/... + https://docs.google.com/document/d/FILE_ID/... + https://drive.google.com/file/d/FILE_ID/... + + :param url: URL of the Google Docs/Sheets/Drive file. + :return: File ID extracted from the URL. + """ + # Handle URLs like: + # https://docs.google.com/spreadsheets/d/FILE_ID/... + # https://docs.google.com/document/d/FILE_ID/... + # https://drive.google.com/file/d/FILE_ID/... + pattern = r"/d/([a-zA-Z0-9-_]+)" + match = re.search(pattern, url) + hdbg.dassert(match, "Invalid URL format: %s", url) + file_id = match.group(1) + _LOG.debug("Extracted file ID: '%s' from URL: '%s'", file_id, url) + return file_id + + +def get_gsheet_tab_url( + url: str, + tab_name: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Generate the full URL for a specific tab in a Google Sheet. + + E.g., + - Input URL: https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI + - Tab name: Sheet3 + - Output: https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI/edit?gid=229426446#gid=229426446 + + :param url: URL of the Google Sheets file. + :param tab_name: Name of the tab to generate the URL for. + :param credentials: Google credentials object. + :return: Full URL with the gid parameter for the specified tab. + """ + if credentials is None: + credentials = get_credentials() + hdbg.dassert(tab_name, "tab_name parameter must be provided") + # Extract the spreadsheet ID from the URL. + sheet_id = _extract_file_id_from_url(url) + _LOG.debug("Extracted sheet_id: '%s' from URL: '%s'", sheet_id, url) + # Get the gid for the specified tab. + gid = _get_gsheet_id(credentials, sheet_id, tab_name=tab_name) + _LOG.debug("Retrieved gid: '%s' for tab: '%s'", gid, tab_name) + # Construct the full URL with the gid parameter. + full_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/edit?gid={gid}#gid={gid}" + _LOG.debug("Generated full URL: '%s'", full_url) + return full_url + + +def _freeze_rows_in_gsheet( + credentials: "goasea.Credentials", + sheet_id: str, + num_rows_to_freeze: int, + *, + tab_name: Optional[str] = None, + bold: bool = True, +) -> None: + """ + Freeze specified rows in the given sheet. + + :param credentials: Google credentials object. + :param sheet_id: ID of the Google Sheet (spreadsheet ID). + :param num_rows_to_freeze: Number of rows to freeze (starting from + row 0). + :param tab_name: Name of the sheet (tab) to freeze rows in. Defaults + to the first tab if not provided. + :param bold: If True, make the frozen rows bold. + """ + hdbg.dassert_lt(0, num_rows_to_freeze) + tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) + sheets_service = get_sheets_service(credentials) + # Build the batch update request. + requests = [] + # Add freeze rows request. + requests.append( + { + "updateSheetProperties": { + "properties": { + "sheetId": tab_id, + "gridProperties": {"frozenRowCount": num_rows_to_freeze}, + }, + "fields": "gridProperties.frozenRowCount", + } + } + ) + # Add bold formatting request if requested. + if bold: + requests.append( + { + "repeatCell": { + "range": { + "sheetId": tab_id, + "startRowIndex": 0, + "endRowIndex": num_rows_to_freeze, + }, + "cell": { + "userEnteredFormat": { + "textFormat": { + "bold": True, + } + } + }, + "fields": "userEnteredFormat.textFormat.bold", + } + } + ) + _LOG.debug( + "Adding bold formatting to %s frozen rows", num_rows_to_freeze + ) + # Execute the batch update. + freeze_request = {"requests": requests} + response = ( + sheets_service.spreadsheets() + .batchUpdate(spreadsheetId=sheet_id, body=freeze_request) + .execute() + ) + _LOG.debug("response: %s", response) + + +def _set_row_height_in_gsheet( + credentials: "goasea.Credentials", + sheet_id: str, + height: int, + *, + start_index: Optional[int] = None, + end_index: Optional[int] = None, + tab_name: Optional[str] = None, +) -> None: + """ + Set the height for rows in the given Google sheet. + + :param credentials: Google credentials object. + :param sheet_id: ID of the Google Sheet (spreadsheet ID). + :param height: Height of the rows in pixels. + :param start_index: Starting index of the rows (zero-based). If + None, applies to all rows. + :param end_index: Ending index of the rows (zero-based). If None, + applies to all rows. + :param tab_name: Name of the sheet (tab) to set row height in. + Defaults to the first tab if not provided. + """ + tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) + sheets_service = get_sheets_service(credentials) + if start_index is None and end_index is None: + sheet_metadata = ( + sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() + ) + sheet_properties = next( + sheet + for sheet in sheet_metadata.get("sheets", []) + if sheet.get("properties", {}).get("sheetId") == tab_id + ).get("properties", {}) + grid_properties = sheet_properties.get("gridProperties", {}) + start_index, end_index = 0, grid_properties.get("rowCount", 1000) + elif start_index is None: + start_index = 0 + elif end_index is None: + sheet_metadata = ( + sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() + ) + sheet_properties = next( + sheet + for sheet in sheet_metadata.get("sheets", []) + if sheet.get("properties", {}).get("sheetId") == tab_id + ).get("properties", {}) + grid_properties = sheet_properties.get("gridProperties", {}) + end_index = grid_properties.get("rowCount", 1000) + elif start_index >= end_index: + raise ValueError( + f"Invalid params: start_index ({start_index}) must be less than end_index ({end_index})." + ) + # Create request. + set_row_height_request = { + "requests": [ + { + "updateDimensionProperties": { + "range": { + "sheetId": tab_id, + "dimension": "ROWS", + "startIndex": start_index, + "endIndex": end_index, + }, + "properties": {"pixelSize": height}, + "fields": "pixelSize", + } + } + ] + } + # Get response. + response = ( + sheets_service.spreadsheets() + .batchUpdate(spreadsheetId=sheet_id, body=set_row_height_request) + .execute() + ) + _LOG.debug("response: %s", response) + + +def _set_text_wrapping_clip_in_gsheet( + credentials: "goasea.Credentials", + sheet_id: str, + *, + tab_name: Optional[str] = None, +) -> None: + """ + Set text wrapping to "CLIP" for all columns in the given Google sheet. + + :param credentials: Google credentials object. + :param sheet_id: ID of the Google Sheet (spreadsheet ID). + :param tab_name: Name of the sheet (tab) to set text wrapping in. + Defaults to the first tab if not provided. + """ + tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) + sheets_service = get_sheets_service(credentials) + # Get sheet metadata to determine the range. + sheet_metadata = ( + sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() + ) + sheet_properties = next( + sheet + for sheet in sheet_metadata.get("sheets", []) + if sheet.get("properties", {}).get("sheetId") == tab_id + ).get("properties", {}) + grid_properties = sheet_properties.get("gridProperties", {}) + row_count = grid_properties.get("rowCount", 1000) + col_count = grid_properties.get("columnCount", 26) + _LOG.debug( + "Setting text wrapping to CLIP for sheet with %s rows and %s columns", + row_count, + col_count, + ) + # Create request to set text wrapping to CLIP. + set_wrapping_request = { + "requests": [ + { + "repeatCell": { + "range": { + "sheetId": tab_id, + "startRowIndex": 0, + "endRowIndex": row_count, + "startColumnIndex": 0, + "endColumnIndex": col_count, + }, + "cell": { + "userEnteredFormat": { + "wrapStrategy": "CLIP", + } + }, + "fields": "userEnteredFormat.wrapStrategy", + } + } + ] + } + # Execute the batch update. + response = ( + sheets_service.spreadsheets() + .batchUpdate(spreadsheetId=sheet_id, body=set_wrapping_request) + .execute() + ) + _LOG.debug("response: %s", response) + + +def from_gsheet( + url: str, + *, + tab_name: Optional[str] = None, + credentials: Optional["goasea.Credentials"] = None, +) -> pd.DataFrame: + """ + Read data from a Google Sheet. + + :param url: URL of the Google Sheets file. + :param tab_name: Name of the tab to read (default: first sheet if + not specified). + :param credentials: Google credentials object. + :return: pandas DataFrame with the sheet data. + """ + if credentials is None: + credentials = get_credentials() + client = gspread.authorize(credentials) + spreadsheet = client.open_by_url(url) + if tab_name is None: + # Read the first sheet. + worksheet = spreadsheet.get_worksheet(0) + else: + # Read the specified sheet. + worksheet = spreadsheet.worksheet(tab_name) + data = worksheet.get_all_records() + hdbg.dassert(data, "The sheet '%s' is empty", tab_name) + df = pd.DataFrame(data) + _LOG.debug("Data fetched") + return df + + +def to_gsheet( + df: pd.DataFrame, + url: str, + *, + tab_name: Optional[str] = "new_data", + freeze_rows: bool = False, + set_text_wrapping_clip: bool = False, + credentials: Optional["goasea.Credentials"] = None, +) -> None: + """ + Write data to a specified Google Sheet and tab. + + :param df: Data to be written. + :param url: URL of the Google Sheet. + :param tab_name: Name of the tab where the data will be written. + :param freeze_rows: If True, freeze the header row. + :param set_text_wrapping_clip: If True, set text wrapping to CLIP. + :param credentials: Google credentials object. + """ + if credentials is None: + credentials = get_credentials() + client = gspread.authorize(credentials) + spreadsheet = client.open_by_url(url) + # Try to get existing worksheet or create new one. + try: + worksheet = spreadsheet.worksheet(tab_name) + except gspread.exceptions.WorksheetNotFound: + _LOG.debug( + "Tab '%s' not found, creating a new tab with that name", + tab_name, + ) + worksheet = spreadsheet.add_worksheet( + title=tab_name, rows="100", cols="20" + ) + # + if freeze_rows: + _freeze_rows_in_gsheet( + credentials, + spreadsheet.id, + num_rows_to_freeze=1, + tab_name=tab_name, + ) + # + _set_row_height_in_gsheet( + credentials, + spreadsheet.id, + height=20, + tab_name=tab_name, + ) + # Clear and write data. + worksheet.clear() + # Replace NaN/inf values with empty strings for JSON compatibility. + df_clean = df.fillna("").replace([float("inf"), float("-inf")], "") + values = [df_clean.columns.values.tolist()] + df_clean.values.tolist() + worksheet.update("A1", values) + # + if set_text_wrapping_clip: + _set_text_wrapping_clip_in_gsheet( + credentials, + spreadsheet.id, + tab_name=tab_name, + ) + _LOG.info("Data written to:\ntab '%s'\nGoogle Sheet '%s'", tab_name, url) + _LOG.info( + "url=%s", get_gsheet_tab_url(url, tab_name, credentials=credentials) + ) + + +# ############################################################################# +# Google file API +# ############################################################################# + + +def _get_gdrive_service(credentials: "goasea.Credentials") -> "godisc.Resource": + """ + Get Google Drive service with provided credentials. + + :param credentials: Google credentials object. + :return: Google Drive service instance. + """ + # Ensure credentials are provided. + hdbg.dassert(credentials, "The 'credentials' parameter must be provided") + # Build the drive service. + gdrive_service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + return gdrive_service + + +def _create_new_google_document( + credentials: "goasea.Credentials", + doc_name: str, + doc_type: str, +) -> str: + """ + Create a new Google document (Sheet or Doc). + + :param credentials: Google credentials object. + :param doc_name: The name of the new Google document. + :param doc_type: The type of the Google document ('sheets' or + 'docs'). + :return: doc_id. The ID of the created document in Google Drive. + """ + if doc_type not in ["sheets", "docs"]: + raise ValueError("Invalid doc_type. Must be 'sheets' or 'docs'.") + # Build the service for the respective document type. + service = godisc.build( + doc_type, + "v4" if doc_type == "sheets" else "v1", + credentials=credentials, + cache_discovery=False, + ) + # Create the document with the specified name. + document = {"properties": {"title": doc_name}} + create_method = ( + service.spreadsheets().create + if doc_type == "sheets" + else service.documents().create + ) + response = create_method( + body=document, + fields="spreadsheetId" if doc_type == "sheets" else "documentId", + ).execute() + # Extract the document ID. + doc_id = response.get( + "spreadsheetId" if doc_type == "sheets" else "documentId" + ) + return doc_id + + +def move_gfile_to_dir( + gfile_id: str, + folder_id: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> dict: + """ + Move a Google file to a specified folder in Google Drive. + + :param gfile_id: The ID of the Google file. + :param folder_id: The ID of the folder. + :param credentials: Google credentials object. + :return: The response from the API after moving the file. + """ + if credentials is None: + credentials = get_credentials() + service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + res = ( + service.files() + .update( + fileId=gfile_id, + body={}, + addParents=folder_id, + removeParents="root", + supportsAllDrives=True, + ) + .execute() + ) + return res + + +def share_google_file( + gfile_id: str, + user: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> None: + """ + Share a Google file with a user. + + :param gfile_id: The ID of the Google file. + :param user: The email address of the user. + :param credentials: Google credentials object. + """ + if credentials is None: + credentials = get_credentials() + # Build the Google Drive service using the provided credentials. + # TODO(gp): -> get_gdrive_service + service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + # Create the permission. + parameters = {"role": "reader", "type": "user", "emailAddress": user} + new_permission = ( + service.permissions().create(fileId=gfile_id, body=parameters).execute() + ) + _LOG.debug( + "The new permission ID of the document is: '%s'", + new_permission.get("id"), + ) + _LOG.debug("The Google file is shared with '%s'", user) + + +def create_empty_google_file( + gfile_type: str, + gfile_name: str, + gdrive_folder_id: str, + *, + user: Optional[str] = None, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Create a new Google file (sheet or doc) and move it to a specified folder. + + :param gfile_type: the type of the Google file ('sheet' or 'doc'). + :param gfile_name: the name of the new Google file. + :param gdrive_folder_id: the ID of the Google Drive folder. + :param user: the email address of the user to share the Google file. + :param credentials: Google credentials object for API access. + :return: the ID of the created Google file, or None if an error + occurred. + """ + if credentials is None: + credentials = get_credentials() + # Create the new Google file (either Sheet or Doc). + if gfile_type == "sheet": + gfile_id = _create_new_google_document( + credentials, + doc_name=gfile_name, + doc_type="sheets", + ) + elif gfile_type == "doc": + gfile_id = _create_new_google_document( + credentials, + doc_name=gfile_name, + doc_type="docs", + ) + else: + raise ValueError(f"Invalid gfile_type={gfile_type}") + _LOG.debug("Created a new Google %s '%s'", gfile_type, gfile_name) + # Move the Google file to the specified folder. + if gdrive_folder_id: + move_gfile_to_dir(gfile_id, gdrive_folder_id, credentials=credentials) + # Share the Google file to the user and send an email. + if user: + share_google_file(gfile_id, user, credentials=credentials) + _LOG.debug( + "The new Google '%s': '%s' is shared with '%s'", + gfile_type, + gfile_name, + user, + ) + # Return the file ID. + return gfile_id + + +def create_or_overwrite_with_timestamp( + file_name: str, + folder_id: str, + *, + file_type: str = "sheets", + overwrite: bool = False, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Create or overwrite a Google Sheet or Google Doc with a timestamp in a + specific Google Drive folder. + + :param file_name: Name for the file (timestamp will be added). + :param folder_id: Google Drive folder ID where the file will be + created or updated. + :param file_type: Type of file to create ('sheets' or 'docs'). + :param overwrite: If True, overwrite an existing file. Otherwise, + create a new file. + :param credentials: Google credentials object. + :return: The ID of the created or overwritten file. + """ + if credentials is None: + credentials = get_credentials() + # Authenticate with Google APIs using the provided credentials. + # TODO(gp): -> get_gdrive_service + drive_service = godisc.build("drive", "v3", credentials=credentials) + if file_type == "sheets": + mime_type = "application/vnd.google-apps.spreadsheet" + elif file_type == "docs": + mime_type = "application/vnd.google-apps.document" + else: + raise ValueError("Invalid file_type. Must be 'sheets' or 'docs'.") + query = ( + f"'{folder_id}' in parents and mimeType = '{mime_type}'" + f" and name contains '{file_name}'" + ) + response = ( + drive_service.files() + .list( + q=query, + fields="files(id, name)", + includeItemsFromAllDrives=True, + supportsAllDrives=True, + ) + .execute() + ) + files = response.get("files", []) + # Check if overwriting or creating new file. + if files and overwrite: + file_id = files[0]["id"] + _LOG.debug("Overwriting existing file '%s'", files[0]["name"]) + else: + # Create new file with timestamp. + timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + new_file_name = f"{file_name}_{timestamp}" + file_metadata = { + "name": new_file_name, + "mimeType": mime_type, + "parents": [folder_id], + } + file = ( + drive_service.files() + .create(body=file_metadata, fields="id", supportsAllDrives=True) + .execute() + ) + file_id = file.get("id") + _LOG.debug( + "New file '%s' created successfully in folder '%s'", + new_file_name, + folder_id, + ) + return file_id + + +# ############################################################################# +# Google folder API +# ############################################################################# + + +def create_google_drive_folder( + folder_name: str, + parent_folder_id: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Create a new Google Drive folder inside the given folder. + + :param folder_name: the name of the new Google Drive folder. + :param parent_folder_id: the ID of the parent folder. + :param credentials: Google credentials object. + :return: the ID of the created Google Drive folder. + """ + if credentials is None: + credentials = get_credentials() + # Build the Google Drive service using the provided credentials. + # TODO(gp): -> get_gdrive_service + service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + # Define the metadata for the new folder. + file_metadata = { + "name": folder_name, + "mimeType": "application/vnd.google-apps.folder", + "parents": [parent_folder_id], + } + # Create the folder in Google Drive. + folder = service.files().create(body=file_metadata, fields="id").execute() + # Log and return the folder ID. + _LOG.debug("Created a new Google Drive folder '%s'", folder_name) + _LOG.debug("The new folder id is '%s'", folder.get("id")) + return folder.get("id") + + +def _get_folders_in_gdrive(*, credentials: "goasea.Credentials") -> list: + """ + Get a list of folders in Google Drive. + + :param credentials: Google credentials object. + :return: A list of folders (each containing an ID and name). + """ + # Build the Google Drive service using the provided credentials. + # TODO(gp): -> get_gdrive_service + service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + # Make the API request to list folders. + response = ( + service.files() + .list( + q="mimeType='application/vnd.google-apps.folder' and trashed=false", + spaces="drive", + fields="nextPageToken, files(id, name)", + ) + .execute() + ) + # Return the list of folders (id and name). + return response.get("files", []) + + +def get_folder_id_by_name( + credentials: "goasea.Credentials", + name: str, +) -> dict: + """ + Get the folder id by the folder name. + + :param credentials: Google credentials object. + :param name: The name of the folder. + :return: Dictionary with folder id and name. + """ + folders = _get_folders_in_gdrive(credentials=credentials) + folder_list = [] + # Find all folders matching the name. + for folder in folders: + if folder.get("name") == name: + folder_list.append(folder) + if len(folder_list) == 1: + _LOG.debug("Found folder: %s", folder_list[0]) + elif len(folder_list) > 1: + for folder in folder_list: + _LOG.debug( + "Found folder: '%s', '%s'", + folder.get("name"), + folder.get("id"), + ) + _LOG.debug( + "Return the first found folder. '%s' '%s' ", + folder_list[0].get("name"), + folder_list[0].get("id"), + ) + _LOG.debug( + "if you want to use another '%s' folder, " + "please change the folder id manually.", + name, + ) + else: + raise ValueError(f"Can't find the folder '{name}'.") + return folder_list[0] + + +def _get_folder_path_list( + service: "godisc.Resource", + file_id: str, +) -> List[str]: + """ + Get the full folder path as a list of folder names. + + :param service: Google Drive service instance. + :param file_id: The ID of the file. + :return: List of folder names from root to immediate parent folder. + Returns empty list if file is at root level. + """ + # Get file metadata with parents. + file_metadata = ( + service.files() + .get( + fileId=file_id, + fields="parents", + supportsAllDrives=True, + ) + .execute() + ) + parents = file_metadata.get("parents", []) + # If no parents, file is at root level. + if not parents: + _LOG.debug("File is at root level") + return [] + # Build the path by traversing up the folder hierarchy. + path_list = [] + current_id = parents[0] # Files typically have one parent in Google Drive. + while current_id: + folder_metadata = ( + service.files() + .get( + fileId=current_id, + fields="name,parents", + supportsAllDrives=True, + ) + .execute() + ) + folder_name = folder_metadata.get("name") + path_list.insert(0, folder_name) + parents = folder_metadata.get("parents", []) + current_id = parents[0] if parents else None + _LOG.debug("Folder path: %s", path_list) + return path_list + + +def get_google_path_from_url( + url: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> List[str]: + """ + Get the full folder path from a Google Docs/Sheets/Drive URL. + + E.g., https://docs.google.com/spreadsheets/d/1GnnmtGTrHDwMP77VylEK0bSF_RLUV5BWf1iGmxuBQpI + -> ['My Drive', 'Folder1', 'Folder2'] + + :param url: URL of the Google Docs/Sheets/Drive file. + :param credentials: Google credentials object. + :return: List of folder names from root to immediate parent folder. + Returns empty list if file is at root level. + """ + if credentials is None: + credentials = get_credentials() + # Extract file ID from URL. + file_id = _extract_file_id_from_url(url) + # Get Google Drive service. + service = _get_gdrive_service(credentials) + # Get folder path as list. + path_list = _get_folder_path_list(service, file_id) + _LOG.debug("Retrieved folder path for URL '%s': %s", url, path_list) + return path_list + + +def print_info_about_google_url( + url: str, + *, + tab_name: Optional[str] = None, + credentials: Optional["goasea.Credentials"] = None, +) -> None: + """ + Print information about a Google Sheet URL. + + :param url: URL of the Google Sheets file. + :param tab_name: Optional tab name to display full URL for. + :param credentials: Google credentials object. + """ + if credentials is None: + credentials = get_credentials() + print("url: '%s'" % url) + print("file name: '%s'" % get_gsheet_name(url, credentials=credentials)) + print("tab names: '%s'" % get_tabs_from_gsheet(url, credentials=credentials)) + if tab_name is not None: + print( + "full url: '%s'" + % get_gsheet_tab_url(url, tab_name, credentials=credentials) + ) + print( + "folder path: '%s'" + % "/".join(get_google_path_from_url(url, credentials=credentials)) + ) + + +# TODO(gp): Add clean up +# TODO(gp): Make url mandatory and when url = "tmp" use the hardcored value. +# TODO(gp): -> save_df_to_gsheet +def save_df_to_tmp_gsheet( + df: pd.DataFrame, + *, + url: str = "", + tab_name: str = "", + remove_empty_columns: bool = False, + remove_stable_columns: bool = False, + verbose: bool = True, + credentials: Optional["goasea.Credentials"] = None, +) -> None: + """ + Save a DataFrame to a Google Sheet. + + :param df: The DataFrame to save. + :param url: URL of the Google Sheet (empty means default temp + sheet). + :param tab_name: The name of the tab to save the DataFrame to. + :param remove_empty_columns: Whether to remove empty columns. + :param remove_stable_columns: Whether to remove stable columns. + :param verbose: Whether to print verbose output. + :param credentials: Google credentials object. + """ + if credentials is None: + credentials = get_credentials() + if remove_stable_columns: + df = hpandas.remove_stable_columns(df, verbose=verbose) + if remove_empty_columns: + df = hpandas.remove_empty_columns(df, verbose=verbose) + if url == "": + url = "https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI/edit?gid=0#gid=0" + if tab_name == "": + # Find the first tab name that is not empty. + tab_names = get_tabs_from_gsheet(url, credentials=credentials) + for i in range(0, 100): + tab_name = "Sheet" + str(i) + if tab_name not in tab_names: + break + hdbg.dassert_ne(tab_name, "No empty tab name found") + to_gsheet( + df, + url, + tab_name=tab_name, + freeze_rows=True, + set_text_wrapping_clip=True, + credentials=credentials, + ) + + +def _get_gsheet_to_df(url: str, tab_name: Optional[str]) -> pd.DataFrame: + credentials = get_credentials() + file_name = get_gsheet_name(url, credentials=credentials) + _LOG.info( + "Reading data:\n url='%s'\n file_name='%s'\n tab_name='%s'" + % (url, file_name, tab_name) + ) + df = from_gsheet(url, tab_name=tab_name, credentials=credentials) + return df + + +get_cached_gsheet_to_df = hcacsimp.simple_cache( + cache_type="pickle", write_through=True +)(_get_gsheet_to_df) + + +# TODO(gp): This is redundant with disable cache. +# TODO(gp): Create a function to normalize the column names. +def get_gsheet_to_df( + url: str, + tab_name: Optional[str], + *, + remove_spaces_in_cols: bool = True, + force_no_cache: bool = False, +) -> pd.DataFrame: + """ + Get a Google Sheet as a DataFrame with optional caching. + + :param url: The URL of the Google Sheet. + :param tab_name: The name of the tab to read + - `None` means the first sheet + :param remove_spaces_in_cols: Whether to remove spaces in the column names. + :param force_no_cache: Whether to bypass the cache and fetch fresh data. + :return: DataFrame containing the sheet data. + """ + if force_no_cache: + df = get_gsheet_to_df(url, tab_name) + else: + df = get_cached_gsheet_to_df(url, tab_name) + if remove_spaces_in_cols: + df.columns = df.columns.str.replace(" ", "") + return df + + +def read_all_gsheets( + url: str, *, tab_names: Union[str, List[str]], concat: bool = False +) -> Union[pd.DataFrame, List[pd.DataFrame]]: + """ + Read all the sheets from a Google Sheet. + + :param url: The URL of the Google Sheet. + :param tab_names: The names of the sheets to read. + :param concat: Whether to concatenate the DataFrames. + :return: A list of DataFrames, one for each sheet. + """ + dfs = [] + # TODO(ai_gp): -> _all_ + if tab_names == "all": + tab_names = get_tabs_from_gsheet(url) + for tab_name in tab_names: + df = get_cached_gsheet_to_df(url, tab_name) + dfs.append(df) + if len(dfs) > 1 and concat: + # Assert if the columns are the same. + for df in dfs[1:]: + hdbg.dassert_eq(df.columns, dfs[0].columns) + # Concatenate the DataFrames. + df = pd.concat(dfs) + df.reset_index(drop=True, inplace=True) + return df + return dfs diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py new file mode 100644 index 000000000..fdc7ed66c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py @@ -0,0 +1,284 @@ +""" +Import as: + +import helpers.hintrospection as hintros +""" + +import collections.abc as cabc +import importlib +import inspect +import logging +import pickle +import re +import sys +import types +from typing import Any, Callable, List, Optional, cast + +import helpers.hdbg as hdbg + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + +_LOG = logging.getLogger(__name__) + + +# Copied from `hstring` to avoid import cycles. + + +def remove_prefix(string: str, prefix: str, assert_on_error: bool = True) -> str: + if string.startswith(prefix): + res = string[len(prefix) :] + else: + if assert_on_error: + raise RuntimeError( + f"string='{string}' doesn't start with prefix ='{prefix}'" + ) + return res + + +# End copy. + +# TODO(gp): object -> Any? + + +# ############################################################################# +# Function introspection +# ############################################################################# + + +def get_function_name(count: int = 0) -> str: + """ + Return the name of the function calling this function. + """ + ptr = inspect.currentframe() + # count=0 corresponds to the calling function, so we need to add an extra + # step walking the call stack. + count += 1 + for _ in range(count): + hdbg.dassert_is_not(ptr, None) + ptr = ptr.f_back # type: ignore + func_name = ptr.f_code.co_name # type: ignore + return func_name + + +def get_name_from_function(func: Callable) -> str: + """ + Return the name of the passed function. + + E.g., amp.helpers.test.test_hintrospection.test_function + """ + func_name = func.__name__ + # + module = inspect.getmodule(func) + hdbg.dassert_is_not( + module, None, f"Could not get module for function {func}" + ) + assert module is not None + module_name = module.__name__ + # Remove `app.` if needed from the module name, e.g., + # `app.amp.helpers.test.test_hintrospection`. + prefix = "app." + if module_name.startswith(prefix): + module_name = remove_prefix(module_name, prefix) + return f"{module_name}.{func_name}" + + +def get_function_from_string(func_as_str: str) -> Callable: + """ + Return the function from its name including the import. + + E.g., `import im.scripts.AmpTask317_transform_pq_by_date_to_by_asset` + """ + # Split txt in an import and function name. + m = re.match(r"^(\S+)\.(\S+)$", func_as_str) + hdbg.dassert(m, "txt='%s'", func_as_str) + m = cast(re.Match, m) + import_, function = m.groups() + _LOG.debug("import=%s", import_) + _LOG.debug("function=%s", function) + # Import the needed module. + imp = importlib.import_module(import_) + # Force the linter not to remove this import which is needed in the following + # eval. + _ = imp + python_code = f"imp.{function}" + func: Callable = eval(python_code) + _LOG.debug("%s -> func=%s", func_as_str, func) + return func + + +def get_methods(obj: Any, access: str = "all") -> List[str]: + """ + Return list of names corresponding to class methods of an object `obj`. + + :param obj: class or class object + :param access: allows to select private, public or all methods of + the object. + """ + methods = [method for method in dir(obj) if callable(getattr(obj, method))] + if access == "all": + pass + elif access == "private": + methods = [method for method in methods if method.startswith("_")] + elif access == "public": + methods = [method for method in methods if not method.startswith("_")] + else: + raise ValueError(f"Invalid access='{access}'") + return methods + + +# ############################################################################# + + +def is_iterable(obj: object) -> bool: + """ + Return whether obj can be iterated upon or not. + + Note that a string is iterable in Python, but typically we refer to + iterables as lists, tuples, so we exclude strings. + """ + # From https://stackoverflow.com/questions/1952464 + return not isinstance(obj, str) and isinstance(obj, cabc.Iterable) + + +# From https://stackoverflow.com/questions/53225 +def is_bound_to_object(method: object) -> bool: + """ + Return whether a method is bound to an object. + """ + _LOG.debug("method=%s", method) + if not hasattr(method, "__self__"): + _LOG.debug("hasattr(im_self)=False") + val = False + else: + # val = method.im_self is not None + val = True + return val + + +# From https://stackoverflow.com/questions/23852423 +def is_lambda_function(method: object) -> bool: + _LOG.debug("type(method)=%s", str(type(method))) + return isinstance(method, types.LambdaType) and method.__name__ == "" + + +def is_pickleable(obj: object, *, mode: str = "try_and_catch") -> bool: + """ + Return if an object is a bound method. + + :param obj: object to process + :param mode: approach to detect non-pikleable objects + - "type_search": detect non-pickleable objects by type, e.g., lambda + functions are not Pickleable + - "try_and_catch": try to pickle an object directly, if it fails, + an object is non-pickleable then + """ + _LOG.debug("obj=%s", obj) + if mode == "type_search": + _LOG.debug("callable=%s", callable(obj)) + if not callable(obj): + return True + # + is_bound = is_bound_to_object(obj) + _LOG.debug("is_bound=%s", is_bound) + if is_bound: + return False + # + is_lambda = is_lambda_function(obj) + _LOG.debug("is_lambda=%s", is_lambda) + if is_lambda: + return False + return True + elif mode == "try_and_catch": + try: + _ = pickle.dumps(obj) + return True + # `AttributeError` is raised when obj is a class with lambda param + # values, and `TypeError`is raised when the class has DB connection + # object as value. + except (AttributeError, TypeError) as e: + _LOG.debug("Cannot pickle object=%s, the error is %s", obj, str(e)) + return False + else: + raise ValueError(f"Invalid mode='{mode}'") + + +# ############################################################################# +# Object size +# ############################################################################# + + +# https://code.activestate.com/recipes/577504/ +# https://stackoverflow.com/questions/449560/how-do-i-determine-the-size-of-an-object-in-python + + +def get_size_in_bytes(obj: object, seen: Optional[set] = None) -> int: + """ + Recursively find size of an object `obj` in bytes. + """ + # From https://github.com/bosswissam/pysize + # getsizeof() returns the size in bytes. + size = sys.getsizeof(obj) + if seen is None: + seen = set() + obj_id = id(obj) + if obj_id in seen: + return 0 + # Mark as seen *before* entering recursion to gracefully handle + # self-referential objects. + seen.add(obj_id) + if hasattr(obj, "__dict__"): + for cls in obj.__class__.__mro__: + if "__dict__" in cls.__dict__: + d = cls.__dict__["__dict__"] + if inspect.isgetsetdescriptor(d) or inspect.ismemberdescriptor( + d + ): + size += get_size_in_bytes(obj.__dict__, seen) + break + if isinstance(obj, dict): + size += sum((get_size_in_bytes(v, seen) for v in obj.values())) + size += sum((get_size_in_bytes(k, seen) for k in obj.keys())) + elif isinstance(obj, cabc.Iterable) and not isinstance( + obj, (str, bytes, bytearray) + ): + size += sum((get_size_in_bytes(i, seen) for i in obj)) + if hasattr(obj, "__slots__"): # can have __slots__ with __dict__ + slots = getattr(obj, "__slots__", None) + if slots is not None: + size += sum( + get_size_in_bytes(getattr(obj, s), seen) + for s in slots + if hasattr(obj, s) + ) + return size + + +# TODO(gp): -> move to helpers/hprint.py +def format_size(num: float) -> str: + """ + Return a human-readable string for a filesize (e.g., "3.5 MB"). + """ + # From http://stackoverflow.com/questions/1094841 + for x in ["b", "KB", "MB", "GB", "TB"]: + if num < 1024.0: + return f"%3.1f {x}" % num + num /= 1024.0 + assert 0, f"Invalid num='{num}'" + + +# ############################################################################# +# Stacktrace +# ############################################################################# + + +def stacktrace_to_str() -> str: + """ + Print the stack trace. + """ + import traceback + + txt = traceback.format_stack() + txt = "".join(txt) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py new file mode 100644 index 000000000..bc2f71ab7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py @@ -0,0 +1,1046 @@ +""" +Functions to handle filesystem operations. + +Import as: + +import helpers.hio as hio +""" + +import datetime +import gzip +import json +import logging +import os +import re +import shlex +import shutil +import time +import uuid +from typing import Any, Dict, List, Optional, Union + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + +# Set logging level of this file. +_LOG.setLevel(logging.INFO) + +# ############################################################################# +# Glob. +# ############################################################################# + + +def purify_file_name(file_name: str) -> str: + """ + Remove non-Linux friendly characters from the basename. + """ + basename = os.path.basename(file_name) + for char in (" ", "_", "'", '"', "`", "/"): + basename = basename.replace(char, "_") + # + dir_name = os.path.dirname(file_name) + file_name_out = os.path.join(dir_name, basename) + file_name_out: str = os.path.normpath(file_name_out) + return file_name_out + + +def listdir( + dir_name: str, + pattern: str, + only_files: bool, + use_relative_paths: bool, + *, + exclude_git_dirs: bool = True, + maxdepth: Optional[int] = None, +) -> List[str]: + """ + Find all files and subdirectories under `directory` that match `pattern`. + + :param dir_name: path to the directory where to look for files + :param pattern: pattern to match a filename against (e.g., `*.py`) + :param only_files: look for only files instead of both files and directories + :param use_relative_paths: remove `dir_name` from path + :param exclude_git_dirs: skip `.git` dirs + :param maxdepth: limit the depth of directory traversal + """ + hdbg.dassert_dir_exists(dir_name) + # Escape the directory path. + dir_name = shlex.quote(dir_name) + cmd = [f"find {dir_name}", f'-name "{pattern}"'] + if maxdepth is not None: + cmd.append(f'-maxdepth "{maxdepth}"') + if only_files: + cmd.append("-type f") + if exclude_git_dirs: + cmd.append(r'-not -path "*/\.git/*"') + cmd = " ".join(cmd) + _, output = hsystem.system_to_string(cmd) + # TODO(gp): -> system_to_files + paths = [path for path in output.split("\n") if path != ""] + _LOG.debug("Found %s paths in %s", len(paths), dir_name) + _LOG.debug("\n".join(paths)) + if use_relative_paths: + paths = [os.path.relpath(path, start=dir_name) for path in paths] + return paths + + +def is_valid_filename_extension(ext: str) -> bool: + """ + By convention extensions don't include the initial `.`. + + E.g., "tgz" is valid, but not ".tgz". + """ + valid = not ext.startswith(".") + return valid + + +def change_filename_extension(filename: str, old_ext: str, new_ext: str) -> str: + """ + Change extension of a filename (e.g. "data.csv" to "data.json"). + + :param filename: the old filename (including extension) + :param old_ext: the extension of the old filename (e.g., "csv") + - If empty, it is extracted from the filename + :param new_ext: the extension to replace the old extension (e.g., "json") + :return: a filename with the new extension + """ + # If the old extension is empty, extract it from the filename. + if old_ext == "": + _, old_ext = os.path.splitext(filename) + # Remove the leading dot. + old_ext = old_ext.lstrip(".") + hdbg.dassert( + is_valid_filename_extension(old_ext), "Invalid extension '%s'", old_ext + ) + hdbg.dassert( + is_valid_filename_extension(new_ext), "Invalid extension '%s'", new_ext + ) + hdbg.dassert( + filename.endswith(old_ext), + "Extension '%s' doesn't match file '%s'", + old_ext, + filename, + ) + # Remove the old extension. + len_ext = len(old_ext) + new_filename = filename[:-len_ext] + hdbg.dassert(new_filename.endswith("."), "new_filename='%s'", new_filename) + # Add the new extension. + new_filename += new_ext + return new_filename + + +def is_paired_jupytext_python_file(py_filename: str) -> bool: + """ + Return if a Python file has a paired Jupyter notebook. + """ + hdbg.dassert( + py_filename.endswith("py"), "Invalid python filename='%s'", py_filename + ) + hdbg.dassert_file_exists(py_filename) + # Check if a corresponding ipynb file exists. + ipynb_filename = change_filename_extension(py_filename, "py", "ipynb") + is_paired = os.path.exists(ipynb_filename) + _LOG.debug( + "Checking ipynb file='%s' for py file='%s': is_paired=%s", + py_filename, + ipynb_filename, + is_paired, + ) + return is_paired + + +def keep_python_files( + file_names: List[str], exclude_paired_jupytext: bool +) -> List[str]: + """ + Return a list with all Python file names (i.e., with the `py` extension). + + :param exclude_paired_jupytext: exclude Python file that are associated to + notebooks (i.e., that have a corresponding `.ipynb` file) + """ + hdbg.dassert_isinstance(file_names, list) + # Check all the files. + py_file_names = [] + for file_name in file_names: + if file_name.endswith(".py"): + if exclude_paired_jupytext: + # Include only the non-paired Python files. + is_paired = is_paired_jupytext_python_file(file_name) + add = not is_paired + else: + # Include all the Python files. + add = True + else: + add = False + _LOG.debug("file_name='%s' -> add='%s'", file_name, add) + if add: + py_file_names.append(file_name) + _LOG.debug("Found %s python files", len(py_file_names)) + return py_file_names + + +def delete_file(file_name: str) -> None: + _LOG.debug("Deleting file '%s'", file_name) + # hs3.dassert_is_not_s3_path(file_name) + if not os.path.exists(file_name) or file_name == "/dev/null": + # Nothing to delete. + return + try: + os.unlink(file_name) + except OSError as e: + # It can happen that we try to delete the file, while somebody already + # deleted it, so we neutralize the corresponding exception. + if e.errno == 2: + # OSError: [Errno 2] No such file or directory. + pass + else: + raise e + + +def _create_dir( + dir_name: str, + incremental: bool, + abort_if_exists: bool = False, + ask_to_delete: bool = False, +) -> None: + """ + Create a directory `dir_name` if it doesn't exist. + + Same interface as `create_dir()` but without handling + `backup_dir_if_exists`. + """ + _LOG.debug( + hprint.to_str("dir_name incremental abort_if_exists ask_to_delete") + ) + hdbg.dassert_is_not(dir_name, None) + dir_name = os.path.normpath(dir_name) + if os.path.normpath(dir_name) == ".": + _LOG.debug("Can't create dir '%s'", dir_name) + exists = os.path.exists(dir_name) + is_dir = os.path.isdir(dir_name) + _LOG.debug(hprint.to_str("dir_name exists is_dir")) + if abort_if_exists: + hdbg.dassert_path_not_exists(dir_name) + # dir exists / dir does not exist + # incremental no-op mkdir + # not incremental rm+mkdir mkdir + if exists: + if incremental and is_dir: + # The dir exists and we want to keep it (i.e., incremental), so we + # are done. + # os.chmod(dir_name, 0755) + _LOG.debug( + "The dir '%s' exists and incremental=True: exiting", dir_name + ) + return + if ask_to_delete: + hsystem.query_yes_no( + f"Do you really want to delete dir '{dir_name}'?", + abort_on_no=True, + ) + # The dir exists and we want to create it from scratch (i.e., not + # incremental), so we need to delete the dir. + _LOG.debug("Deleting dir '%s'", dir_name) + if os.path.islink(dir_name): + delete_file(dir_name) + else: + hdbg.dassert_ne(os.path.normpath(dir_name), ".") + shutil.rmtree(dir_name) + _LOG.debug("Creating directory '%s'", dir_name) + # NOTE: `os.makedirs` raises `OSError` if the target directory already exists. + # A race condition can happen when another process creates our target + # directory, while we have just found that it doesn't exist, so we need to + # handle this situation gracefully. + try: + os.makedirs(dir_name) + except OSError as e: + _LOG.error(str(e)) + # It can happen that we try to create the directory while somebody else + # created it, so we neutralize the corresponding exception. + if e.errno == 17: + # OSError: [Errno 17] File exists. + pass + else: + raise e + + +def create_dir( + dir_name: str, + incremental: bool, + *, + abort_if_exists: bool = False, + ask_to_delete: bool = False, + backup_dir_if_exists: bool = False, +) -> None: + """ + Create a directory. + + :param incremental: if False then the directory is deleted and re- + created, otherwise the same directory is reused as it is + :param abort_if_exists: abort if the target directory already exists + :param ask_to_delete: if it is not incremental and the dir exists, + asks before deleting. This option is used when we want to start + with a clean dir (i.e., incremental=False) but, at the same + time, we want to make sure that the user doesn't want to delete + the content of the dir. Another approach is to automatically + rename the old dir with backup_dir_if_exists. + :param backup_dir_if_exists: if the target dir already exists, then + rename it using a timestamp (e.g., dir_20231003_080000) and + create a new target dir + """ + if backup_dir_if_exists: + if not os.path.exists(dir_name): + # Create new dir. + _LOG.debug("Creating dir '%s'", dir_name) + _create_dir(dir_name, incremental=True) + else: + _LOG.debug("Dir '%s' already exists", dir_name) + # Get dir timestamp. + dir_timestamp = os.path.getmtime(dir_name) + dir_datetime = datetime.datetime.fromtimestamp(dir_timestamp) + # Build new dir name with timestamp. + dir_name_new = ( + dir_name + "." + dir_datetime.strftime("%Y%m%d_%H%M%S") + ) + # Rename dir. + if not os.path.exists(dir_name_new): + _LOG.warning("Renaming dir '%s' -> '%s'", dir_name, dir_name_new) + os.rename(dir_name, dir_name_new) + else: + _LOG.warning("Dir '%s' already exists", dir_name_new) + # Create new dir. + _LOG.debug("Creating dir '%s'", dir_name) + _create_dir(dir_name, incremental=True) + else: + _create_dir( + dir_name, + incremental, + abort_if_exists=abort_if_exists, + ask_to_delete=ask_to_delete, + ) + + +# ############################################################################# +# Filesystem. +# ############################################################################# + + +def create_soft_link(src: str, dst: str) -> None: + """ + Create a soft-link to called (where and are files + or directories as in a Linux ln command). + + This is equivalent to a command like "cp " but creating a + soft link. + """ + _LOG.debug("# CreateSoftLink") + # hs3.dassert_is_not_s3_path(src) + # hs3.dassert_is_not_s3_path(dst) + # Create the enclosing directory, if needed. + enclosing_dir = os.path.dirname(dst) + _LOG.debug("enclosing_dir=%s", enclosing_dir) + create_dir(enclosing_dir, incremental=True) + # Create the link. Note that the link source needs to be an absolute path. + src = os.path.abspath(src) + cmd = f"ln -s {src} {dst}" + hsystem.system(cmd) + + +def delete_dir( + dir_: str, + change_perms: bool = False, + errnum_to_retry_on: int = 16, + num_retries: int = 1, + num_secs_retry: int = 1, +) -> None: + """ + Delete a directory. + + :param change_perms: change permissions to -R rwx before deleting to deal with + incorrect permissions left over + :param errnum_to_retry_on: specify the error to retry on, e.g., + ``` + OSError: [Errno 16] Device or resource busy: + 'gridTmp/.nfs0000000002c8c10b00056e57' + ``` + """ + _LOG.debug("Deleting dir '%s'", dir_) + # hs3.dassert_is_not_s3_path(dir_) + if not os.path.isdir(dir_): + # No directory so nothing to do. + return + if change_perms and os.path.isdir(dir_): + cmd = "chmod -R +rwx " + dir_ + hsystem.system(cmd) + i = 1 + while True: + try: + shutil.rmtree(dir_) + # Command succeeded: exit. + break + except OSError as e: + if errnum_to_retry_on is not None and e.errno == errnum_to_retry_on: + # TODO(saggese): Make it less verbose once we know it's working + # properly. + _LOG.warning( + "Couldn't delete %s: attempt=%s / %s", dir_, i, num_retries + ) + i += 1 + if i > num_retries: + hdbg.dfatal( + f"Couldn't delete {dir_} after {num_retries} attempts ({str(e)})" + ) + else: + time.sleep(num_secs_retry) + else: + # Unforeseen error: just propagate it. + raise e + + +def backup_file_or_dir_if_exists(path: str) -> None: + """ + Create a timestamped backup of a file or directory if it exists. + + If the path exists, it is moved to a new location with a timestamp + appended to the name (e.g., path.20231003_080000.backup). + + :param path: path to the file or directory to back up + """ + if not os.path.exists(path): + # Nothing to back up. + return + _LOG.warning("Path '%s' already exists: making a backup", path) + # Get current timestamp. + timestamp = datetime.datetime.now() + timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S") + # Build backup path. + backup_path = f"{path}.{timestamp_str}.backup" + # Move the file or directory to backup. + shutil.move(path, backup_path) + _LOG.info("Backed up '%s' -> '%s'", path, backup_path) + + +def dassert_is_valid_file_name(file_name: str) -> None: + hdbg.dassert_isinstance(file_name, str) + hdbg.dassert_ne(file_name, "") + + +# TODO(gp): Don't use default incremental. +def create_enclosing_dir(file_name: str, incremental: bool = False) -> str: + """ + Create the dir enclosing file_name, if needed. + + :param incremental: same meaning as in `create_dir()` + """ + _LOG.debug(hprint.to_str("file_name incremental")) + dassert_is_valid_file_name(file_name) + # hs3.dassert_is_not_s3_path(file_name) + # + dir_name = os.path.dirname(file_name) + _LOG.debug(hprint.to_str("dir_name")) + if dir_name != "": + _LOG.debug( + "Creating dir_name='%s' for file_name='%s'", dir_name, file_name + ) + create_dir(dir_name, incremental=incremental) + hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) + return dir_name + + +# ############################################################################# +# File. +# ############################################################################# + + +# TODO(saggese): We should have `lines` first since it is an input param. +# TODO(Nikola): Remove `use_gzip` and use `file_name` extension instead. +def to_file( + file_name: str, + txt: str, + use_gzip: bool = False, + mode: Optional[str] = None, + force_flush: bool = False, +) -> None: + """ + Write the content of txt into file_name, creating the enclosing directory + if needed. + + :param file_name: name of written file + :param txt: content of the file + :param use_gzip: whether the file should be compressed as gzip + :param mode: file writing mode + :param force_flush: whether to forcibly clear the file buffer + """ + _LOG.debug(hprint.to_str("file_name use_gzip mode force_flush")) + dassert_is_valid_file_name(file_name) + hdbg.dassert_isinstance(txt, str) + # Choose default writing mode based on compression. + if mode is None: + if use_gzip: + # Override default binary mode for `gzip`. + mode = "wt" + else: + mode = "w" + # Create the enclosing dir, if needed. + create_enclosing_dir(file_name, incremental=True) + if use_gzip: + # Check if user provided correct file name. + if not file_name.endswith(("gz", "gzip")): + _LOG.warning("The provided file extension is not for a gzip file.") + # Open gzipped file. + f = gzip.open(file_name, mode) + else: + # Open regular text file. + # buffering = 0 if mode == "a" else -1 + buffering = 0 if force_flush else -1 + f = open( # pylint: disable=consider-using-with,assignment + file_name, mode, buffering=buffering + ) + # Write file contents. + f.write(txt) # type: ignore + f.close() + # Clear internal buffer of the file. + if force_flush: + f.flush() + os.fsync(f.fileno()) + + +def _raise_file_decode_error(error: Exception, file_name: str) -> None: + """ + Raise UnicodeDecodeError with detailed error message. + + :param error: raised UnicodeDecodeError + :param file_name: name of read file that raised the exception + """ + msg = [] + msg.append(f"error={error}") + msg.append(f"file_name='{file_name}'") + msg_as_str = "\n".join(msg) + _LOG.error(msg_as_str) + raise RuntimeError(msg_as_str) + + +def from_file( + file_name: str, + *, + encoding: Optional[Any] = None, +) -> str: + """ + Read contents of a file as string. + + :param file_name: path to .txt,.gz or .pq file + :param encoding: encoding to use when reading the string + :return: contents of file as string + """ + dassert_is_valid_file_name(file_name) + hdbg.dassert_path_exists(file_name) + data: str = "" + if file_name.endswith((".gz", ".gzip")): + # Open gzipped file. + f = gzip.open(file_name, "rt", encoding=encoding) + else: + # Open regular text file. + f = open( # pylint: disable=consider-using-with + file_name, "r", encoding=encoding + ) + try: + # Read data. + data = f.read() + except UnicodeDecodeError as e: + # Raise unicode decode error message. + _raise_file_decode_error(e, file_name) + finally: + f.close() + hdbg.dassert_isinstance(data, str) + return data + + +# TODO(gp): Use hintro.format_size +def get_size_as_str(file_name: str) -> str: + if os.path.exists(file_name): + size_in_bytes = os.path.getsize(file_name) + if size_in_bytes < (1024**2): + size_in_kb = size_in_bytes / 1024.0 + res = "%.1f KB" % size_in_kb + elif size_in_bytes < (1024**3): + size_in_mb = size_in_bytes / (1024.0**2) + res = "%.1f MB" % size_in_mb + else: + size_in_gb = size_in_bytes / (1024.0**3) + res = "%.1f GB" % size_in_gb + else: + res = "nan" + return res + + +def remove_extension( + filename: str, + extension: str, + *, + check_file_exists: bool = False, + check_has_extension: bool = True, +) -> Optional[str]: + """ + Attempt to remove `extension` from `filename`. + + :param filename: str filename + :param extension: file extension starting with a dot. E.g., ".csv" + :return: filename without `extension`, if applicable, else returns `None`. + """ + hdbg.dassert_isinstance(filename, str) + hdbg.dassert(filename) + if check_file_exists: + hdbg.dassert_file_exists(filename) + # + hdbg.dassert_isinstance(extension, str) + hdbg.dassert( + extension.startswith("."), + "Filename extension=`%s` expected to start with `.`", + extension, + ) + # + ret: Optional[str] = None + if check_has_extension: + hdbg.dassert( + filename.endswith(extension), + "Filename '%s' doesn't have extension=`%s`", + filename, + extension, + ) + if filename.endswith(extension): + ret = filename[: -len(extension)] + return ret + + +# TODO(gp): @all Use msg in all uses of this script `jackpyc "create_executable"` +# TODO(gp): `file_name` should go last. +def create_executable_script( + file_name: str, content: str, *, msg: str = "" +) -> None: + # Write the file. + hdbg.dassert_isinstance(content, str) + to_file(file_name, content) + # Make it executable. + cmd = "chmod +x " + file_name + hsystem.system(cmd) + if msg: + print(f"# {msg}:\n> {file_name}") + + +def add_suffix_to_filename( + file_name: str, + suffix: Union[int, str], + *, + before_extension: bool = True, + with_underscore: bool = True, +) -> str: + """ + Add a suffix to a file name, with or without changing the extension. + + E.g., {base_name}.{ext} -> {file_name}.{suffix}.{ext} + + :param file_name: file name to modify + :param suffix: index to add to the file name + :param before_extension: whether to insert the index before the file + extension + :param with_underscore: whether to separate the index with an + underscore + :return: modified file name with an index + """ + suffix = str(suffix) + if with_underscore: + suffix = "_" + suffix + _LOG.debug(hprint.to_str("suffix")) + # + if before_extension: + # Add the suffix to the file name before the extension. + data = file_name.rsplit(".", 1) + if len(data) == 1: + # E.g., `system_log_dir` -> `system_log_dir_1` + ret = file_name + suffix + else: + # E.g., `dir/file.txt` -> `dir/file_1.txt`. + hdbg.dassert_eq(len(data), 2, "Invalid file_name='%s'", file_name) + file_name_no_ext, ext = data + ret = file_name_no_ext + suffix + "." + ext + else: + # Add the suffix after the name of the file. + # E.g., `dir/file.txt` -> `dir/file.txt_1`. + ret = file_name + suffix + _LOG.debug(hprint.to_str("ret")) + return ret + + +def rename_file_if_exists( + file_path: str, + suffix: str, + *, + before_extension: bool = True, +) -> None: + """ + Rename a file if it exists using provided suffix. + + Used to avoid overwriting if writing multiple files with the same name. + + :param file_path: a file path to modify + :param suffix: index to add to the file name + :param before_extension: whether to insert the suffix before the file extension + - if True, {file_path}.{ext} -> {file_path}.{suffix}.{ext} + - if False, {file_path}.{ext} -> {file_path}.{ext}.{suffix} + """ + if os.path.exists(file_path): + # Add a suffix to a file name. + if before_extension: + # Add a suffix before an extension, e.g., `file.suffix.csv`. + dir_path, file_name = os.path.split(file_path) + file_name, ext = os.path.splitext(file_name) + hdbg.dassert(ext.startswith("."), "Invalid extension='%s'", ext) + new_file_path = f"{file_name}.{suffix}{ext}" + new_file_path = os.path.join(dir_path, new_file_path) + else: + # Add a suffix after an extension, e.g., `file.csv.suffix`. + new_file_path = f"{file_path}.{suffix}" + hdbg.dassert_path_not_exists(new_file_path) + _LOG.debug("renaming %s to %s", file_path, new_file_path) + os.rename(file_path, new_file_path) + + +def change_file_extension(file_path: str, new_extension: str) -> str: + """ + Change the extension of a file path. + + :param file_path: The path of the file to change the extension of. + :param new_extension: The new extension to use, starting with `.` + :return: The new file path with the new extension. + """ + # Make sure the new extension starts with a dot + hdbg.dassert( + new_extension.startswith("."), "Invalid extension='%s'", new_extension + ) + # Split the file path into root and extension + file_name, _ = os.path.splitext(file_path) + # Create the new file path + new_file_path = file_name + new_extension + return new_file_path + + +def wait_for_file( + file_path: str, + *, + check_interval_in_secs: float = 0.5, + timeout_in_secs: int = 10, +) -> None: + """ + Wait until a specified file is generated or until the timeout is reached. + + :param file_path: The path of the file to wait for. + :param check_interval_in_secs: Time in seconds between checks + :param timeout_in_secs: Maximum time to wait for the file in seconds + """ + _LOG.debug("Waiting for file: %s", file_path) + start_time = time.time() + while not os.path.exists(file_path): + if time.time() - start_time > timeout_in_secs: + raise ValueError(f"Timeout reached. File not found: {file_path}") + time.sleep(check_interval_in_secs) + _LOG.debug("File generated: %s", file_path) + + +# ############################################################################# +# JSON +# ############################################################################# + + +def serialize_custom_types_for_json_encoder(obj: Any) -> Any: + """ + Serialize DataFrame and other objects for JSON. + + E.g. dataframe {"A": [0, 1], "B": [0, 1]} will go to a list of dictionaries: + [{"A": 0, "B": 0}, {"A": 1, "B": 1}] - each dictionary is for one row. + """ + import numpy as np + import pandas as pd + + result = None + if isinstance(obj, pd.DataFrame): # type: ignore + result = obj.to_dict("records") + elif isinstance(obj, pd.Series): # type: ignore + result = obj.to_dict() + elif isinstance(obj, np.int64): # type: ignore + result = int(obj) + elif isinstance(obj, np.float64): # type: ignore + result = float(obj) + elif isinstance(obj, uuid.UUID): + result = str(obj) + elif isinstance(obj, datetime.date): + result = obj.isoformat() + elif isinstance(obj, type(pd.NaT)): + result = None + elif isinstance(obj, type(pd.NA)): + result = None + else: + raise TypeError(f"Can not serialize {obj} of type {type(obj)}") + return result + + +def to_json(file_name: str, obj: dict, *, use_types: bool = False) -> None: + """ + Write an object into a JSON file. + + :param obj: data for writing + :param file_name: name of file + :param use_types: whether to use jsonpickle to save the file + """ + if not file_name.endswith(".json"): + _LOG.warning("The file '%s' doesn't end in .json", file_name) + # Create dir. + dir_name = os.path.dirname(file_name) + if dir_name != "" and not os.path.isdir(dir_name): + create_dir(dir_name, incremental=True) + # Write data as JSON. + with open(file_name, "w") as outfile: + if use_types: + # Use jsonpickle to save types. + import jsonpickle # type: ignore[import-untyped] + + txt = jsonpickle.encode(obj, indent=4) + outfile.write(txt) + else: + json.dump( + obj, + outfile, + indent=4, + default=serialize_custom_types_for_json_encoder, + ) + + +def from_json(file_name: str, *, use_types: bool = False) -> Dict: + """ + Read object from JSON file. + + :param file_name: name of file + :param use_types: whether to use jsonpickle to load the file + :return: dict with data + """ + hdbg.dassert(file_name) + if not file_name.endswith(".json"): + _LOG.warning("The file '%s' doesn't end in .json", file_name) + # Read file as text. + hdbg.dassert_file_exists(file_name) + txt = from_file(file_name) + # Remove comments (which are not supported natively by JSON). + txt_tmp = [] + for line in txt.split("\n"): + if re.match(r"^\s*#", line): + continue + txt_tmp.append(line) + txt_tmp = "\n".join(txt_tmp) + _LOG.debug("txt_tmp=\n%s", txt_tmp) + # Convert text into Python data structures. + data = {} + if use_types: + import jsonpickle # type: ignore + + data = jsonpickle.decode(txt_tmp) + else: + data = json.loads(txt_tmp) + return data + + +# TODO(gp): -> pandas_helpers.py +def load_df_from_json(path_to_json: str) -> "pd.DataFrame": # noqa: F821 # type: ignore + """ + Load a dataframe from a json file. + + :param path_to_json: path to the json file + :return: + """ + import pandas as pd + + # Load the dict with the data. + data = from_json(path_to_json) + # Preprocess the dict to handle arrays with different length. + data = {k: pd.Series(v) for k, v in data.items()} + # Package into a dataframe. + df = pd.DataFrame(data) + return df + + +# ############################################################################# +# Directory operations +# ############################################################################# + +# Copied from `hgit.py` to avoid import cycles. + + +def _find_git_root(path: str = ".") -> str: + """ + Find recursively the dir of the outermost super module. + + This function traverses the directory hierarchy upward from a specified + starting path to find the root directory of a Git repository. + It supports: + - standard git repository: where a `.git` directory exists at the root + - submodule: where repository is nested inside another, and the `.git` file contains + a `gitdir:` reference to the submodule's actual Git directory + - linked repositories: where the `.git` file points to a custom Git directory + location, such as in Git worktrees or relocated `.git` directories + + :param path: starting file system path. Defaults to the current directory (".") + :return: absolute path to the top-level Git repository directory + """ + path = os.path.abspath(path) + git_root_dir = None + while True: + git_dir = os.path.join(path, ".git") + _LOG.debug("git_dir=%s", git_dir) + # Check if `.git` is a directory which indicates a standard Git repository. + if os.path.isdir(git_dir): + # Found the Git root directory. + git_root_dir = path + break + # Check if `.git` is a file which indicates submodules or linked setups. + if os.path.isfile(git_dir): + # Using the `open()` to avoid import cycles with the `hio` module. + with open(git_dir, "r") as f: + txt = f.read() + lines = txt.split("\n") + for line in lines: + # Look for a `gitdir:` line that specifies the linked directory. + # Example: `gitdir: ../.git/modules/helpers_root`. + if line.startswith("gitdir:"): + git_dir_path = line.split(":", 1)[1].strip() + _LOG.debug("git_dir_path=%s", git_dir_path) + # Resolve the relative path to the absolute path of the Git directory. + abs_git_dir = os.path.abspath( + os.path.join(path, git_dir_path) + ) + # Traverse up to find the top-level `.git` directory. + while True: + # Check if the current directory is a `.git` directory. + if os.path.basename(abs_git_dir) == ".git": + git_root_dir = os.path.dirname(abs_git_dir) + # Found the root. + break + # Move one level up in the directory structure. + parent = os.path.dirname(abs_git_dir) + # Reached the filesystem root without finding the `.git` directory. + hdbg.dassert_ne( + parent, + abs_git_dir, + "Top-level .git directory not found.", + ) + # Continue traversing up. + abs_git_dir = parent + break + # Exit the loop if the Git root directory is found. + if git_root_dir is not None: + break + # Move up one level in the directory hierarchy. + parent = os.path.dirname(path) + # Reached the filesystem root without finding `.git`. + hdbg.dassert_ne( + parent, + path, + "No .git directory or file found in any parent directory.", + ) + # Update the path to the parent directory for the next iteration. + path = parent + return git_root_dir + + +# End copy. + + +def safe_rm_file(dir_path: str) -> None: + """ + Safely remove a file after ensuring it's within our Git client. + + This function provides a safety check to prevent accidental deletion + of files outside our Git repository. + + :param dir_path: Path to the directory to delete + :raises AssertionError: If dir_path is not within the Git client + :raises OSError: If directory doesn't exist or can't be deleted + """ + # Convert to absolute path for comparison. + dir_path = os.path.abspath(dir_path) + # Get the Git client root. + git_root = _find_git_root() + git_root = os.path.abspath(git_root) + # Ensure the directory is within our Git client. + hdbg.dassert( + dir_path.startswith(git_root), + "Directory '%s' is not within Git client root '%s'", + dir_path, + git_root, + ) + # Additional safety check: prevent deletion of Git root itself. + hdbg.dassert_ne( + dir_path, + git_root, + "Cannot delete Git client root directory '%s'", + git_root, + ) + # Verify directory exists before attempting deletion. + hdbg.dassert( + os.path.exists(dir_path), + "Directory '%s' does not exist", + dir_path, + ) + hdbg.dassert( + os.path.isdir(dir_path), + "Path '%s' is not a directory", + dir_path, + ) + # Perform the deletion. + _LOG.debug("Safely removing directory: %s", dir_path) + shutil.rmtree(dir_path) + _LOG.debug("Successfully removed directory: %s", dir_path) + + +# TODO(ai_gp): Add unit tests. +def is_subdir(dir1: str, dir2: str) -> bool: + """ + Check if `dir1` is a subdirectory of `dir2`. + + :param dir1: First directory + :param dir2: Second directory + :return: True if `dir1` is a subdirectory of `dir2`, False otherwise + """ + # Resolve to absolute and normalized paths. + abs_dir1 = os.path.abspath(dir1) + abs_dir2 = os.path.abspath(dir2) + # Get the common path prefix. + common = os.path.commonpath([abs_dir1, abs_dir2]) + # It's a subdir if they share the same common path as the parent. + return common == abs_dir2 + + +def write_file_back( + file_name: str, txt_old: List[str], txt_new: List[str] +) -> None: + """ + Write new text to file only if it differs from the old text. + + :param file_name: Path to the file to write to + :param txt_old: Original text as a list of strings + :param txt_new: New text as a list of strings + """ + # Process old text. + hdbg.dassert_list_of_strings(txt_old) + txt_as_str = "\n".join(txt_old) + # Process new text. + hdbg.dassert_list_of_strings(txt_new) + txt_new_as_str = "\n".join(txt_new) + # Write file back, if needed. + if txt_as_str != txt_new_as_str: + to_file(file_name, txt_new_as_str) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py new file mode 100644 index 000000000..d11ecbafc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py @@ -0,0 +1,880 @@ +""" +Import as: + +import helpers.hjoblib as hjoblib +""" + +import concurrent.futures +import logging +import math +import os +import pprint +import random +import sys +import traceback +from functools import wraps +from multiprocessing import Process, Queue +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import joblib +from joblib._store_backends import StoreBackendBase, StoreBackendMixin +from tqdm.autonotebook import tqdm + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.htimer as htimer +import helpers.htqdm as htqdm + +# Avoid dependency from other `helpers` modules, such as `helpers.hcache`, to +# prevent import cycles. + + +_LOG = logging.getLogger(__name__) + +# - Assume one wants to execute `n` invocations of a given `func` +# - E.g., `func(param_1), func(param_2), ..., func(param_n)` +# - Each `param` is a tuple of `*args` and `**kwargs` to apply to `func` +# - A `Workload` is composed of: +# - `workload_func`: the function to execute +# - `func_name`: the name / description of the function `func` +# - `tasks`: a list of `n` set of parameters `*args`, `**kwargs` to apply +# to the function (e.g., `param_1`, ..., `param_n`) +# - Each `Task` executes a subset of the functions +# - `Tasks` are a partition of the function invocations, i.e., each function +# invocation is executed by one and only one task +# - The `n` `Tasks` are then executed by `k` threads in parallel or serially +# - Note that a single task can correspond to processing of multiple logical +# chunks of work, because they need to be processed together or because we +# want to enforce that it is executed on a single processor +# - E.g., if we want to concatenate files we can map multiple filenames in a +# single `Task`. In this case the `Task` contains a list of filenames to +# concatenate together + +# ############################################################################# +# Task +# ############################################################################# + +# A `Task` contains the parameters to pass to the function that needs to be +# executed. +# A `Task` is represented by a tuple of `*args` and `**kwargs`, e.g., +# ``` +# args=() +# kwargs={ +# 'asset_col_name': 'asset', +# 'dst_dir': './tmp.s3_out', +# 'parquet_file_names': [ +# './tmp.s3/20220110/data.parquet', +# './tmp.s3/20220111/data.parquet', +# './tmp.s3/20220112/data.parquet'] +# } +# ``` +Task = Tuple[Tuple[Any], Dict[str, Any]] + + +# TODO(gp): @Nikola add unit tests +def split_list_in_tasks( + list_in: List[Any], + n: int, + *, + keep_order: bool = False, + num_elems_per_task: Optional[int] = None, +) -> List[List[Any]]: + """ + Split a list in tasks based on the number of threads or elements per + partition. + + :param num_elems_per_task: force each task to have the given number of elements + :param keep_order: split the list so that consecutive elements of the list + are in different tasks. This favors executing the workload in order on `n` + threads + :return: list of lists of elements, where each list can be assigned to an + execution thread + + - E.g., [a, b, c, d, e] executed on 3 threads [1, 2, 3] gives the allocation + for `keep_order=True`: + ``` + 1 -> [a, d] + 2 -> [b, e] + 3 -> [c] + ``` + - For `keep_order=False` the allocation is: + ``` + 1 -> [a, b] + 2 -> [c, d] + 3 -> [e] + ``` + - For `num_elems_per_task=3` the allocation is: + ``` + 1 -> [a, b, c] + 2 -> [d, e] + 3 -> [] + ``` + """ + hdbg.dassert_lte(1, n) + hdbg.dassert_lte(n, len(list_in), "There are fewer tasks than threads") + if keep_order: + hdbg.dassert_is( + num_elems_per_task, + None, + "Can't specify num_elems_per_task with keep_order", + ) + list_out: List[list] = [[] for _ in range(n)] + for i, elem in enumerate(list_in): + _LOG.debug("%s: %s -> %s", i, elem, i % n) + list_out[i % n].append(elem) + else: + if num_elems_per_task is None: + k = int(math.ceil(len(list_in) / n)) + else: + k = num_elems_per_task + hdbg.dassert_lte(1, k) + list_out = [list_in[i : i + k] for i in range(0, len(list_in), k)] + # Ensure that the elements are all distributed. + hdbg.dassert_eq(sum(len(l_) for l_ in list_out), len(list_in)) + return list_out + + +def apply_incremental_mode( + src_dst_file_name_map: List[Tuple[str, str]], +) -> List[Tuple[str, str]]: + """ + Apply incremental mode to a map of source to destination files. + + Often the function in a `Workload` corresponds to reading a file, processing it, + and writing the output in a file. In this case, applying the incremental mode + means removing the tuples in the src_file -> dst_file mapping where the dst file + already exists. + + :return: filtered mapping + """ + hdbg.dassert_container_type(src_dst_file_name_map, list, tuple) + # + src_dst_file_name_map_tmp = [] + for src_dst_file_name in src_dst_file_name_map: + # Parse the element of the mapping. + hdbg.dassert_eq(len(src_dst_file_name), 2) + src_file_name, dst_file_name = src_dst_file_name + _LOG.debug("%s -> %s", src_file_name, dst_file_name) + # Discard the mapping element if the destination file already exists. + hdbg.dassert_path_exists(src_file_name) + if os.path.exists(dst_file_name): + _LOG.debug("Skipping %s -> %s", src_file_name, dst_file_name) + else: + src_dst_file_name_map_tmp.append((src_file_name, dst_file_name)) + _LOG.info( + "After applying incremental mode, there are %s / %s files to process", + len(src_dst_file_name_map_tmp), + len(src_dst_file_name_map), + ) + return src_dst_file_name_map_tmp + + +def validate_task(task: Task) -> bool: + """ + Assert if `Task` is malformed, otherwise return True. + + A valid `Task` is a tuple `(*args, **kwargs)`. + """ + # A `Task` is a tuple. + hdbg.dassert_isinstance(task, tuple) + hdbg.dassert_eq(len(task), 2) + # Parse the `Task`. + args, kwargs = task + _LOG.debug("task.args=%s", pprint.pformat(args)) + hdbg.dassert_isinstance(args, tuple) + _LOG.debug("task.kwargs=%s", pprint.pformat(kwargs)) + hdbg.dassert_isinstance(kwargs, dict) + return True + + +def task_to_string(task: Task, *, use_pprint: bool = True) -> str: + hdbg.dassert(validate_task(task)) + args, kwargs = task + txt = [] + if use_pprint: + txt.append(f"args={pprint.pformat(args)}") + txt.append(f"kwargs={pprint.pformat(kwargs)}") + else: + txt.append(f"args={str(args)}") + txt.append(f"kwargs={str(kwargs)}") + txt = "\n".join(txt) + return txt + + +# ############################################################################# +# Workload +# ############################################################################# + +# A `Workload` consists of multiple executions of a function with different +# parameters represented by `Tasks`. +# Note: `joblib_helper` can be used together with caching. The workload function +# doesn't have to be the one that is cached, but it can trigger caching of function +# results in the call stack. +Workload = Tuple[ + # `func`: the function representing the workload to execute + Callable, + # `func_name`: the mnemonic name of the function, which is used for debugging + # info and for naming the directory storing the cache + # - E.g., `vltbut.get_cached_bar_data_for_date_interval` + # - Note that the `func_name` can be different than the name of `func` + # - E.g., we can call + # `vltbut.get_cached_bar_data_for_date_interval_for_interval` inside `func`, + # in order to create a cache for + # `vltbut.get_cached_bar_data_for_date_interval`, so the cache name + # should be for `vltbut.get_cached_bar_data_for_date_interval` + str, + # `tasks`: a list of (*args, **kwargs) to pass to `func` + List[Task], +] + + +def validate_workload(workload: Workload) -> bool: + """ + Assert if the `Workload` is malformed, otherwise return True. + + A valid `Workload` is a triple `(func, func_name, List[Task])`. + """ + # A valid workload` is a triple. + hdbg.dassert_isinstance(workload, tuple) + hdbg.dassert_eq(len(workload), 3) + # Parse. + workload_func, func_name, tasks = workload + # Check each component. + hdbg.dassert_callable(workload_func) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_container_type(tasks, List, tuple) + hdbg.dassert(all(validate_task(task) for task in tasks)) + return True + + +def randomize_workload( + workload: Workload, *, seed: Optional[int] = None +) -> Workload: + validate_workload(workload) + # Parse the workload. + workload_func, func_name, tasks = workload + # Randomize `tasks`. + seed = seed or 42 + random.seed(seed) + random.shuffle(tasks) + # Build a new workload. + workload = (workload_func, func_name, tasks) + validate_workload(workload) + return workload + + +def reverse_workload( + workload: Workload, *, seed: Optional[int] = None +) -> Workload: + """ + Reverse the workload. + + Typically we generate workload in chronological order, but sometimes + we want to run from most recent data to least recent, so that we + have the results about the most recent periods first, which is what + we care most about. + """ + validate_workload(workload) + # Parse the workload. + workload_func, func_name, tasks = workload + # Reverse. + _LOG.warning("Reversing the workload as per user request") + tasks = list(reversed(tasks)) + # Build a new workload. + workload = (workload_func, func_name, tasks) + validate_workload(workload) + return workload + + +def truncate_workload( + workload: Workload, + max_num: int, +) -> Workload: + """ + Limit the workload to the first `max_num` tasks. + """ + validate_workload(workload) + # Parse the workload. + workload_func, func_name, tasks = workload + # Truncate the workload. + _LOG.warning("Considering only the first %d / %d tasks", max_num, len(tasks)) + hdbg.dassert_lte(1, max_num) + hdbg.dassert_lte(max_num, len(tasks)) + tasks = tasks[:max_num] + # Build a new workload. + workload = (workload_func, func_name, tasks) + validate_workload(workload) + return workload + + +def workload_to_string(workload: Workload, *, use_pprint: bool = True) -> str: + """ + Print the workload. + + E.g., + + ``` + workload_func=_LimeTask317_process_chunk + func_name=_LimeTask317_process_chunk + # task 1 / 3 + args=([('./tmp.s3/20220110/data.parquet', + './tmp.s3_out/./tmp.s3/20220110/data.parquet')],) + kwargs={} + # task 2 / 3 + args=([('./tmp.s3/20220111/data.parquet', + './tmp.s3_out/./tmp.s3/20220111/data.parquet')],) + kwargs={} + # task 3 / 3 + args=([('./tmp.s3/20220112/data.parquet', + './tmp.s3_out/./tmp.s3/20220112/data.parquet')],) + kwargs={} + ``` + """ + validate_workload(workload) + workload_func, func_name, tasks = workload + txt = [] + workload_func_str = getattr(workload_func, "__name__", "unknown_function") + txt.append(f"workload_func={workload_func_str}") + txt.append(f"func_name={func_name}") + for i, task in enumerate(tasks): + txt.append(f"# task {i + 1} / {len(tasks)}") + txt.append(task_to_string(task, use_pprint=use_pprint)) + txt = "\n".join(txt) + return txt + + +# ############################################################################# +# Template for functions to execute in parallel. +# ############################################################################# + +# NOTE: the workload function: +# - asserts if there is an error, since the return value is a string with a summary +# of the execution +# - doesn't have to be the function that we intend to cache + + +def _workload_function(*args: Any, **kwargs: Any) -> str: + """ + Execute the function task. + + :raises: in case of error + :return: string representing information about the cached function + execution + """ + _ = args + incremental = kwargs.pop("incremental") + num_attempts = kwargs.pop("num_attempts") + _ = incremental, num_attempts + func_output: List[str] = [] + result = "\n".join(func_output) + return result + + +def _get_workload( + # args: argparse.Namespace +) -> None: + """ + Prepare the workload using the parameters from command line. + """ + # _ = args + + +# ############################################################################# +# Layer passing information from `parallel_execute` to the function to execute +# in parallel. +# ############################################################################# + + +def get_num_executing_threads(args_num_threads: Union[str, int]) -> int: + """ + Return the number of executing threads based on the value of + `args.num_threads`. + + E.g., + - `serial` corresponds to 1 + - `-1` corresponds to all available CPUs + """ + if args_num_threads == "serial": + num_executing_threads = 1 + elif args_num_threads == -1: + # All CPUs available. + num_executing_threads = joblib.cpu_count() + else: + # Assume it's an int. + num_executing_threads = int(args_num_threads) + hdbg.dassert_lte(1, num_executing_threads) + return num_executing_threads + + +def _run_in_process(func: Callable, q: Queue, *args: Any, **kwargs: Any) -> None: + """ + Run function as a process and store output in the input Queue. + """ + _LOG.debug("pid after processify=", os.getpid()) + try: + ret = func(*args, **kwargs) + except Exception: + # Store error logs in the queue. + ex_type, ex_value, tb = sys.exc_info() + error = ex_type, ex_value, "".join(traceback.format_tb(tb)) + ret = None + else: + error = None + q.put((ret, error)) + + +# TODO(grisha): Add type hints, add unit test to understand the behavior. +# From https://gist.github.com/schlamar/2311116 +# Note that this is not going to work with joblib.parallel with +# backend="multiprocessing" returning an error +# AssertionError: daemonic processes are not allowed to have children +def processify(func): + """ + Decorator to run a function as a process. + + Be sure that every argument and the return value is *pickable*. The + created process is joined, so the code does not run in parallel. + """ + + @wraps(func) + def wrapper(*args, **kwargs): + q = Queue() + p = Process( + target=_run_in_process, args=[func] + [q] + list(args), kwargs=kwargs + ) + p.start() + ret, error = q.get() + p.join() + if error: + ex_type, ex_value, tb_str = error + message = f"{ex_value.message} (in subprocess)\n{tb_str}" + raise ex_type(message) + return ret + + return wrapper + + +def _parallel_execute_decorator( + task_idx: int, + task_len: int, + incremental: bool, + abort_on_error: bool, + num_attempts: int, + log_file: str, + # TODO(gp): Pass these parameters first. + workload_func: Callable, + func_name: str, + processify_func: bool, + task: Task, + enable_file_logging: bool, + verbose_log: bool, +) -> Any: + """ + Parameters have the same meaning as in `parallel_execute()`. + + :param abort_on_error: control whether to abort on `workload_func` function + that is failing and asserting + - If `workload_func` fails: + - if `abort_on_error=True` the exception from `workload_func` is + propagated and the return value is `None` + - if `abort_on_error=False` the exception is not propagated, but the + return value is the string representation of the exception + :param processify_func: switch to enable wrapping a function into a process + :param enable_file_logging: see same parameter in `parallel_execute()` + :param verbose_log: see same parameter in `parallel_execute()` + :return: the return value of the workload function or the exception string + """ + # Validate very carefully all the parameters. + hdbg.dassert_lte(0, task_idx) + hdbg.dassert_lt(task_idx, task_len) + hdbg.dassert_isinstance(incremental, bool) + hdbg.dassert_isinstance(abort_on_error, bool) + hdbg.dassert_lte(1, num_attempts) + hdbg.dassert_isinstance(log_file, str) + hdbg.dassert_callable(workload_func) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert(validate_task(task)) + # Redirect the logging output of each task to a different file. + # TODO(gp): This file should go in the `task_dst_dir`. + # log_to_file = True + log_to_file = False + if log_to_file: + dst_dir = os.path.dirname(os.path.abspath(log_file)) + print(dst_dir) + hio.create_dir(dst_dir, incremental=True) + file_name = os.path.join( + dst_dir, f"{func_name}.{task_idx + 1}_{task_len}.log" + ) + _LOG.warning("Logging to %s", file_name) + file_handler = logging.FileHandler(file_name) + root_logger = logging.getLogger() + root_logger.addHandler(file_handler) + # Save information about the function to be executed. + txt = [] + # `start_ts` needs to be before running the function. + start_ts = hdateti.get_current_timestamp_as_string("naive_ET") + tag = f"{task_idx + 1}/{task_len} ({start_ts})" + txt.append("\n" + hprint.frame(tag) + "\n") + txt.append(f"tag={tag}") + workload_func_str = getattr(workload_func, "__name__", "unknown_function") + txt.append(f"workload_func={workload_func_str}") + txt.append(f"func_name={func_name}") + txt.append(task_to_string(task)) + # Run the workload. + args, kwargs = task + kwargs.update({"incremental": incremental, "num_attempts": num_attempts}) + with htimer.TimedScope( + logging.DEBUG, f"Execute '{workload_func_str}'" + ) as ts: + try: + if processify_func: + _LOG.debug("Using processify") + # Wrap the function into a process to enforce de-allocating + # memory at the end of the execution (see + # CmampTask5854: Resolve backtest memory leakage). + _LOG.debug("pid before processify=%s", os.getpid()) + workload_func = processify(workload_func) + res = workload_func(*args, **kwargs) + error = False + except Exception as e: # pylint: disable=broad-except + exception = e + txt.append(f"exception='{str(e)}'") + res = None + error = True + _LOG.error("Execution failed") + # Save information about the execution of the function. + elapsed_time = ts.elapsed_time + end_ts = hdateti.get_current_timestamp_as_string("naive_ET") + # TODO(gp): -> func_result + if verbose_log: + txt.append(f"func_res=\n{hprint.indent(str(res))}") + else: + txt.append("func_res=") + txt.append(f"elapsed_time_in_secs={elapsed_time}") + txt.append(f"start_ts={start_ts}") + txt.append(f"end_ts={end_ts}") + txt.append(f"error={error}") + # Update log file. + txt = "\n".join(txt) + _LOG.debug("txt=\n%s", hprint.indent(txt)) + if enable_file_logging: + hio.to_file(log_file, txt, mode="a") + if error: + # The execution wasn't successful. + _LOG.error(txt) + if abort_on_error: + _LOG.error("Aborting since abort_on_error=%s", abort_on_error) + raise exception # noqa: F821 + _LOG.error( + "Continuing execution since abort_on_error=%s", abort_on_error + ) + res = str(exception) + else: + # The execution was successful. + pass + return res + + +# TODO(gp): Pass a `task_dst_dir` to each task so it can write there. +# This is a generalization of `experiment_result_dir` for `run_config_list` and +# `run_notebook`. +def parallel_execute( + workload: Workload, + # Options for the `parallel_execute` framework. + dry_run: bool, + num_threads: Union[str, int], + incremental: bool, + abort_on_error: bool, + num_attempts: int, + log_file: str, + *, + backend: str = "loky", + enable_file_logging: bool = True, + verbose_log: bool = False, +) -> Optional[List[Any]]: + """ + Run a workload in parallel using joblib or asyncio. + + Note: + - if `abort_on_error=True` and a task fails early, `joblib` does not return partial results + - use `enable_logging=False` to disable logging entirely (useful for large results) + - use `verbose_log=False` to keep logging enabled but skip verbose output per task + + :param workload: the workload to execute + :param dry_run: if True, print the workload and exit without executing it + :param num_threads: joblib parameter to control how many threads to use + :param incremental: parameter passed to the function to execute to control if + we want to re-execute tasks already executed or not + :param abort_on_error: when True, if one task asserts then stop executing the + workload and return the exception of the failing task + - If False, the execution continues + :param num_attempts: number of times to attempt running a function before + declaring an error + :param log_file: file used to log information about the execution + :param backend: specify the backend type (e.g., joblib `loky` or `asyncio_process_executor`) + :param enable_file_logging: if False, skip writing any log file + :param verbose_log: if True, write detailed task results to the log file + - If False, large outputs will be omitted from the log to reduce file size + :return: results from executing `func` or the exception of the failing function + """ + # Print the parameters. + _LOG.info(hprint.frame("Workload")) + # It's too verbose to print all the workload. + # print(workload_to_string(workload, use_pprint=False)) + _LOG.info( + hprint.to_str( + "dry_run num_threads incremental num_attempts abort_on_error" + ) + ) + # Parse the workload. + validate_workload(workload) + workload_func, func_name, tasks = workload + _LOG.info("Saving log info in '%s'", log_file) + _LOG.info( + "Number of executing threads=%s (%s)", + get_num_executing_threads(num_threads), + num_threads, + ) + _LOG.info("Number of tasks=%s", len(tasks)) + # + if dry_run: + file_name = "./tmp.parallel_execute.workload.txt" + workload_as_str = workload_to_string(workload, use_pprint=False) + hio.to_file(file_name, workload_as_str) + _LOG.warning("Workload saved at '%s'", file_name) + _LOG.warning("Exiting without executing workload, as per user request") + return None + # Run. + task_len = len(tasks) + tqdm_out = htqdm.TqdmToLogger(_LOG, level=logging.INFO) + tqdm_iter = tqdm( + enumerate(tasks), + total=task_len, + file=tqdm_out, + desc=f"num_threads={num_threads} backend={backend}", + ) + if backend == "threading": + # Enable wrapping a function into a process for threading backend + # to force memory de-allocation. + # TODO(Grisha): unclear if there are cases when we want to use + # `False` with `threading` backends, consider exposing to the + # interface. + # TODO(Grisha): should we enable the switch for `num_threads="serial"`? will it work? + processify_func = True + else: + processify_func = False + if num_threads == "serial": + # Execute the tasks serially. + res = [] + for task_idx, task in tqdm_iter: + _LOG.debug("\n%s", hprint.frame(f"Task {task_idx + 1} / {task_len}")) + # Execute. + res_tmp = _parallel_execute_decorator( + task_idx, + task_len, + incremental, + abort_on_error, + num_attempts, + log_file, + # + workload_func, + func_name, + processify_func, + task, + enable_file_logging, + verbose_log, + ) + res.append(res_tmp) + else: + # Execute the tasks in parallel. + num_threads = int(num_threads) + # -1 is interpreted by joblib like for all cores. + _LOG.info("Using %d threads, backend='%s'", num_threads, backend) + if backend in ("loky", "threading", "multiprocessing"): + # from joblib.externals.loky import set_loky_pickler + # set_loky_pickler('cloudpickle') + # Removed `verbose` param which causes issues in HelpersTask715. + res = joblib.Parallel(n_jobs=num_threads, backend=backend)( + joblib.delayed(_parallel_execute_decorator)( + task_idx, + task_len, + incremental, + abort_on_error, + num_attempts, + log_file, + # + workload_func, + func_name, + processify_func, + task, + enable_file_logging, + verbose_log, + ) + # We can't use `tqdm_iter` since this only shows the submission of + # the jobs but not their completion. + for task_idx, task in enumerate(tasks) + ) + elif backend in ("asyncio_threading", "asyncio_multiprocessing"): + if backend == "asyncio_threading": + executor = concurrent.futures.ThreadPoolExecutor + elif backend == "asyncio_multiprocessing": + executor = concurrent.futures.ProcessPoolExecutor + else: + raise ValueError(f"Invalid backend='{backend}'") + func = lambda args_: _parallel_execute_decorator( + args_[0], + task_len, + incremental, + abort_on_error, + num_attempts, + log_file, + # + workload_func, + func_name, + processify_func, + args_[1], + enable_file_logging, + verbose_log, + ) + args = list(enumerate(tasks)) + use_progress_bar = True + if not use_progress_bar: + # Implementation without progress bar. + with executor(max_workers=num_threads) as executor_: + res = list(executor_.map(func, args)) + else: + # Implementation with progress bar. + res = [] + with tqdm_iter as pbar: + with executor(max_workers=num_threads) as executor_: + futures = { + executor_.submit(func, arg): arg for arg in args + } + _LOG.debug("done submitting") + for future in concurrent.futures.as_completed(futures): + res_tmp = future.result() + res.append(res_tmp) + pbar.update(1) + else: + raise ValueError(f"Invalid backend='{backend}'") + _LOG.info("Saved log info in '%s'", log_file) + return res + + +# ############################################################################# +# joblib storage backend for S3. +# ############################################################################# + +# This allows to store a joblib cache on S3. + +# Adapted from https://github.com/aabadie/joblib-s3 + + +# ############################################################################# +# _S3FSStoreBackend +# ############################################################################# + + +class _S3FSStoreBackend(StoreBackendBase, StoreBackendMixin): + """ + A StoreBackend for S3 cloud storage file system. + """ + + def __init__(self) -> None: + super().__init__() + self._objs: List[Any] = [] + + def _flush(self) -> None: + _ = self + + def clear_location(self, location: str) -> None: + """ + Check if object exists in store. + """ + if self.storage.exists(location): + self._flush() + self.storage.rm(location, recursive=True) + + def _mkdirp(self, directory: str) -> None: + """ + Create recursively a directory on the S3 store. + """ + # Remove root cachedir from input directory to create as it should + # have already been created in the configure function. + if directory.startswith(self.location): + directory = directory.replace(self.location + "/", "") + current_path = self.location + for sub_dir in directory.split("/"): + current_path = os.path.join(current_path, sub_dir) + self.storage.mkdir(current_path) + + def create_location(self, location: str) -> None: + """ + Create object location on store. + """ + self._mkdirp(location) + + def get_items(self) -> List[Any]: + """ + Return the whole list of items available in cache. + """ + _ = self + return [] + + def configure( + self, + location: str, + backend_options: Dict[str, Any], + verbose: int = 0, + ) -> None: + """ + Configure the store backend. + """ + options = backend_options + hdbg.dassert_in("s3fs", options) + self.storage = options["s3fs"] + hdbg.dassert_in("bucket", options) + bucket = options["bucket"] + # Ensure the given bucket exists. + root_bucket = os.path.join("s3://", bucket) + if not self.storage.exists(root_bucket): + self.storage.mkdir(root_bucket) + if location.startswith("/"): + location.replace("/", "") + self.location = os.path.join(root_bucket, location) + if not self.storage.exists(self.location): + self.storage.mkdir(self.location) + # Computation results can be stored compressed for faster I/O. + self.compress = backend_options["compress"] + # Memory map mode is not supported. + self.mmap_mode = None + # TODO(gp): No need to flush for now. + # for fd in self._objs: + # fd.flush(force=True) + + def _open_item(self, fd: Any, mode: str) -> Any: + self._objs.append(fd) + return self.storage.open(fd, mode) + + def _item_exists(self, path: str) -> bool: + self._flush() + ret: bool = self.storage.exists(path) + return ret + + def _move_item(self, src: str, dst: str) -> None: + self.storage.mv(src, dst) + + +_REGISTER_S3FS_STORE = False + + +def register_s3fs_store_backend() -> None: + """ + Register the S3 store backend for joblib memory caching. + """ + global _REGISTER_S3FS_STORE + if not _REGISTER_S3FS_STORE: + joblib.register_store_backend("s3", _S3FSStoreBackend) + _REGISTER_S3FS_STORE = True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py new file mode 100644 index 000000000..5b8aa72aa --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py @@ -0,0 +1,383 @@ +""" +Import as: + +import helpers.hjupyter as hjupyte +""" + +import logging +import os +from typing import Dict, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hsystem as hsystem +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + + +def run_notebook( + file_name: str, + scratch_dir: str, + *, + pre_cmd: str = "", +) -> None: + """ + Run jupyter notebook. + + Assert if the notebook doesn't complete successfully. + + :param file_name: path to the notebook to run. If this is a .py + file, convert to .ipynb first + :param scratch_dir: temporary dir storing the output + :param pre_cmd: + """ + file_name = os.path.abspath(file_name) + hdbg.dassert_path_exists(file_name) + hio.create_dir(scratch_dir, incremental=True) + # Build command line. + cmd = [] + if pre_cmd: + cmd.append(f"{pre_cmd} &&") + # Convert .py file into .ipynb if needed. + root, ext = os.path.splitext(file_name) + if ext == ".ipynb": + notebook_name = file_name + elif ext == ".py": + cmd.append(f"jupytext --update --to notebook {file_name};") + notebook_name = f"{root}.ipynb" + else: + raise ValueError(f"Unsupported file format for file_name='{file_name}'") + # Execute notebook. + cmd.append(f"cd {scratch_dir} &&") + cmd.append(f"jupyter nbconvert {notebook_name}") + cmd.append("--execute") + cmd.append("--to html") + cmd.append("--ExecutePreprocessor.kernel_name=python") + # No time-out. + cmd.append("--ExecutePreprocessor.timeout=-1") + # Execute. + cmd_as_str = " ".join(cmd) + hsystem.system(cmd_as_str, abort_on_error=True, suppress_output=False) + + +def run_notebook_cells( + notebook_path: str, + dst_notebook_path: str, + *, + num_cells: Optional[int] = None, + kernel_name: str = "python3", + timeout: int = 30, +) -> None: + """ + Execute the first N cells of a notebook and save the result. + + :param notebook_path: path to the source notebook to execute + :param dst_notebook_path: path where the executed notebook will be saved + :param num_cells: number of cells to execute from the beginning; if None, + execute all cells + :param kernel_name: name of the Jupyter kernel to use + :param timeout: execution timeout in seconds per cell + """ + import nbformat + from nbconvert.preprocessors import ExecutePreprocessor + + hdbg.dassert_path_exists(notebook_path) + # Read the notebook. + _LOG.info("Reading notebook '%s'", notebook_path) + with open(notebook_path) as f: + nb = nbformat.read(f, as_version=4) + # Truncate to first N cells if requested. + total_cells = len(nb.cells) + if num_cells is not None: + hdbg.dassert_lte(1, num_cells, "num_cells must be >= 1") + hdbg.dassert_lte( + num_cells, + total_cells, + "num_cells=%d exceeds total cells=%d in notebook", + num_cells, + total_cells, + ) + _LOG.info("Executing first %d of %d cells", num_cells, total_cells) + nb.cells = nb.cells[:num_cells] + else: + _LOG.info("Executing all %d cells", total_cells) + # Execute the cells. + ep = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name) + ep.preprocess(nb) + # Save the executed notebook. + _LOG.info("Saving executed notebook to '%s'", dst_notebook_path) + with open(dst_notebook_path, "w") as f: + nbformat.write(nb, f) + + +def build_run_notebook_cmd( + config_builder: str, + dst_dir: str, + notebook_path: str, + *, + extra_opts: str = "", +) -> str: + """ + Construct a command string to run dev_scripts/notebooks/run_notebook.py + with specified configurations. + + :param config_builder: the configuration builder to use for the + notebook execution + :param dst_dir: the destination directory where the notebook results + will be saved + :param notebook_path: the path to the notebook that should be + executed + :param extra_opts: options for "run_notebook.py", e.g., "-- + publish_notebook" + """ + # Importing inside func to avoid error while creating dockerized executable. + # TODO(Shaunak): debug why. + import helpers.hgit as hgit + + # TODO(Vlad): Factor out common code with the + # `helpers.lib_tasks_gh.publish_buildmeister_dashboard_to_s3()`. + run_notebook_script_path = hgit.find_file_in_git_tree("run_notebook.py") + cmd_run_txt = [ + run_notebook_script_path, + f"--notebook {notebook_path}", + f"--config_builder '{config_builder}'", + f"--dst_dir '{dst_dir}'", + f"{extra_opts}", + ] + cmd_run_txt = " ".join(cmd_run_txt) + return cmd_run_txt + + +# ############################################################################# + + +def find_paired_files( + directory: str, + *, + pattern: str = "*.py", + exclude_pattern: str = None, +) -> tuple: + """ + Find Python files and paired Jupyter notebooks in a directory. + + :param directory: path to the directory to search + :param pattern: glob pattern for Python files (default: "*.py") + :param exclude_pattern: suffix pattern to exclude (e.g., "_utils.py") + :return: tuple of (python_files, paired_notebooks, unpaired_notebooks) + - python_files: list of .py files matching pattern + - paired_notebooks: list of .ipynb files with corresponding .py + - unpaired_notebooks: list of .ipynb files without corresponding .py + """ + hdbg.dassert_path_exists(directory) + # Find Python files matching pattern. + py_files = hio.listdir( + directory, + pattern, + only_files=True, + use_relative_paths=False, + maxdepth=1, + ) + # Exclude files matching exclude_pattern. + if exclude_pattern: + py_files = [f for f in py_files if not f.endswith(exclude_pattern)] + py_files = sorted(py_files) + # Find notebook files. + nb_pattern = pattern.replace(".py", ".ipynb") + nb_files = hio.listdir( + directory, + nb_pattern, + only_files=True, + use_relative_paths=False, + maxdepth=1, + ) + nb_files = sorted(nb_files) + # Build set of base names from Python files. + py_basenames = set() + for py_file in py_files: + basename = os.path.basename(py_file) + basename = os.path.splitext(basename)[0] + py_basenames.add(basename) + # Check which notebooks have corresponding .py files. + paired_notebooks = [] + unpaired_notebooks = [] + for nb_file in nb_files: + basename = os.path.basename(nb_file) + basename = os.path.splitext(basename)[0] + if basename in py_basenames: + paired_notebooks.append(nb_file) + else: + unpaired_notebooks.append(nb_file) + return py_files, paired_notebooks, unpaired_notebooks + + +def execute_file_with_docker( + file_path: str, + *, + working_dir: str, + is_notebook: bool, +) -> Tuple[bool, str, float]: + """ + Execute a Python file or notebook using docker_cmd. + + :param file_path: path to the file to execute + :param working_dir: directory to cd into before execution + :param is_notebook: True if file is a notebook, False if Python script + :return: tuple of (success, error_message, elapsed_time) + """ + timer = htimer.Timer() + success = False + error_msg = "" + try: + if is_notebook: + # For notebooks, use hjupyter.run_notebook via docker_cmd. + scratch_dir = os.path.join(working_dir, "tmp.notebook_scratch") + # Build Python command to run notebook. + cmd = ( + f'python -c "' + f"import helpers.hjupyter as hjupyte; " + f"import helpers.hio as hio; " + f"hio.create_dir('{scratch_dir}', incremental=True); " + f"hjupyte.run_notebook('{file_path}', '{scratch_dir}')\"" + ) + else: + # For Python scripts, execute directly. + cmd = f"python {file_path}" + # Build invoke docker_cmd command. + docker_cmd = f'invoke docker_cmd --cmd "{cmd}"' + # Execute in the working directory. + hsystem.system( + docker_cmd, + abort_on_error=False, + suppress_output=False, + ) + success = True + except Exception as e: + error_msg = str(e) + elapsed = timer.get_elapsed() + return success, error_msg, elapsed + + +def execute_file_directly( + file_path: str, + *, + working_dir: str, + is_notebook: bool, +) -> Tuple[bool, str, float]: + """ + Execute a Python file or notebook directly (inside container). + + :param file_path: path to the file to execute + :param working_dir: directory to cd into before execution + :param is_notebook: True if file is a notebook, False if Python script + :return: tuple of (success, error_message, elapsed_time) + """ + timer = htimer.Timer() + success = False + error_msg = "" + try: + if is_notebook: + # For notebooks, use hjupyter.run_notebook. + scratch_dir = os.path.join(working_dir, "tmp.notebook_scratch") + hio.create_dir(scratch_dir, incremental=True) + run_notebook( + file_path, + scratch_dir, + pre_cmd=f"cd {working_dir}", + ) + else: + # For Python scripts, execute directly. + cmd = f"cd {working_dir} && python {file_path}" + hsystem.system( + cmd, + abort_on_error=True, + suppress_output=False, + ) + success = True + except Exception as e: + error_msg = str(e) + elapsed = timer.get_elapsed() + return success, error_msg, elapsed + + +def report_execution_results( + py_results: Dict[str, Tuple[bool, str, float]], + nb_results: Dict[str, Tuple[bool, str, float]], +) -> Tuple[int, str]: + """ + Report execution results and return failure information. + + :param py_results: results from Python file execution + :param nb_results: results from notebook execution + :return: tuple of (total_failures, error_message) + """ + # Collect failures. + py_failures = [f for f, (success, _, _) in py_results.items() if not success] + nb_failures = [f for f, (success, _, _) in nb_results.items() if not success] + # Calculate statistics. + py_total = len(py_results) + py_success = py_total - len(py_failures) + nb_total = len(nb_results) + nb_success = nb_total - len(nb_failures) + total_files = py_total + nb_total + total_success = py_success + nb_success + total_failures = len(py_failures) + len(nb_failures) + # Calculate timing statistics. + py_times = [elapsed for _, _, elapsed in py_results.values()] + nb_times = [elapsed for _, _, elapsed in nb_results.values()] + py_total_time = sum(py_times) if py_times else 0.0 + nb_total_time = sum(nb_times) if nb_times else 0.0 + total_time = py_total_time + nb_total_time + # Report summary. + _LOG.info("=" * 80) + _LOG.info("EXECUTION SUMMARY") + _LOG.info("=" * 80) + _LOG.info( + "Python scripts: %d total, %d success, %d failed", + py_total, + py_success, + len(py_failures), + ) + if py_total > 0: + _LOG.info(" Total time: %.2f seconds", py_total_time) + _LOG.info(" Average time: %.2f seconds", py_total_time / py_total) + _LOG.info( + "Notebooks: %d total, %d success, %d failed", + nb_total, + nb_success, + len(nb_failures), + ) + if nb_total > 0: + _LOG.info(" Total time: %.2f seconds", nb_total_time) + _LOG.info(" Average time: %.2f seconds", nb_total_time / nb_total) + _LOG.info("-" * 80) + _LOG.info( + "TOTAL: %d files, %d success, %d failed", + total_files, + total_success, + total_failures, + ) + _LOG.info("Total execution time: %.2f seconds", total_time) + # Build error message if failures exist. + error_message = "" + if total_failures > 0: + _LOG.error("=" * 80) + _LOG.error("FAILURES DETECTED") + _LOG.error("=" * 80) + if py_failures: + _LOG.error("Failed Python scripts:") + for file_path in py_failures: + basename = os.path.basename(file_path) + _, error, _ = py_results[file_path] + _LOG.error(" - %s: %s", basename, error) + if nb_failures: + _LOG.error("Failed notebooks:") + for file_path in nb_failures: + basename = os.path.basename(file_path) + _, error, _ = nb_results[file_path] + _LOG.error(" - %s: %s", basename, error) + _LOG.error("=" * 80) + error_message = ( + f"{total_failures} file(s) failed to execute. See log for details." + ) + return total_failures, error_message diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py new file mode 100644 index 000000000..5e0ec6214 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py @@ -0,0 +1,334 @@ +""" +Import as: + +import helpers.hlatex as hlatex +""" + +import logging +import re +from typing import List, Optional + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hmarkdown_headers as hmarhead +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Consider using `pypandoc` instead of calling `pandoc` directly. +# https://boisgera.github.io/pandoc + + +# TODO(gp): Add a switch to keep the tmp files or delete them. +def convert_pandoc_md_to_latex(txt: str) -> str: + """ + Run pandoc to convert a markdown file to a latex file. + """ + hdbg.dassert_isinstance(txt, str) + # Save to tmp file. + in_file_name = "./tmp.run_pandoc_in.md" + hio.to_file(in_file_name, txt) + # Run Pandoc. + out_file_name = "./tmp.run_pandoc_out.tex" + cmd = ( + f"pandoc {in_file_name} -o {out_file_name} --read=markdown --write=latex" + ) + container_type = "pandoc_only" + + # To minimze the dependency. + import dev_scripts_helpers.dockerize.lib_pandoc as dshdlipa + + dshdlipa.run_dockerized_pandoc(cmd, container_type) + # Read tmp file. + res = hio.from_file(out_file_name) + # Remove lines that contain \tightlist. + res = "\n".join( + [line for line in res.splitlines() if "\\tightlist" not in line] + ) + return res + + +def markdown_list_to_latex(markdown: str) -> str: + """ + Convert a Markdown list to LaTeX format. + + :param markdown: The Markdown text to convert + :return: The converted LaTeX text + """ + hdbg.dassert_isinstance(markdown, str) + markdown = hprint.dedent(markdown) + # Remove the first line if it's a title. + markdown_lines = markdown.split("\n") + m = re.match(r"^(\*+ )(.*)", markdown_lines[0]) + if m: + title = m.group(2) + markdown_lines = markdown_lines[1:] + else: + title = "" + markdown = "\n".join(markdown_lines) + # Convert. + txt = convert_pandoc_md_to_latex(markdown) + # Remove `\tightlist` and empty lines. + lines = txt.splitlines() + lines = [line for line in lines if "\\tightlist" not in line] + lines = [line for line in lines if line.strip() != ""] + txt = "\n".join(lines) + # Add the title frame. + if title: + txt = f"\\begin{{frame}}{{{title}}}" + "\n" + txt + "\n" + "\\end{frame}" + return txt + + +def remove_latex_formatting(latex_string: str) -> str: + r""" + Remove LaTeX formatting such as \textcolor{color}{content} and retains only + the content. + """ + cleaned_string = re.sub( + r"\\textcolor\{[^}]*\}\{([^}]*)\}", r"\1", latex_string + ) + return cleaned_string + + +def format_latex(txt: str) -> str: + """ + Format LaTeX text using `prettier`. + + :param txt: input LaTeX text to format + :return: formatted LaTeX text + """ + file_type = "tex" + # To minimize the dependency. + import dev_scripts_helpers.dockerize.lib_prettier as dshdlipr + + txt = dshdlipr.prettier_on_str(txt, file_type) + return txt + + +# ############################################################################# +# Frame Latex sections +# ############################################################################# + + +def _is_latex_line_separator(line: str, *, min_repeats: int = 5) -> bool: + """ + Check if the given line is a LaTeX comment separator. + + This function determines if a line consists of a comment character + `%` followed by repeated characters (`#`, `=`, `-`) that would + indicate a section separator. + + :param line: current line of text being processed + :param min_repeats: minimum number of times the characters have to + be repeated to be considered a separator + :return: whether the line is a separator + """ + separator_pattern = rf""" + ^\s*%\s* # % + ([#=\-])\1{{{min_repeats - 1},}} # Capture a character, then repeat it + # (`min_repeats` - 1) times. + \s*$ # Match only whitespace characters + # until the end of the line. + """ + res = bool(re.match(separator_pattern, line, re.VERBOSE)) + return res + + +def frame_sections(lines: List[str]) -> List[str]: + r""" + Add line separators before LaTeX section commands. + + This function adds comment separators before \section, \subsection, and + \subsubsection commands in LaTeX files. The separators are: + ``` + % #####... + \section + + % =====... + \subsection: + + % -----... + \subsubsection + ``` + + If a separator comment already exists immediately before the section command, + no separator is added. + + :param lines: list of strings representing the LaTeX file content + :return: list of strings with separators added before section commands + """ + hdbg.dassert_isinstance(lines, list) + # Loop 1: Remove existing latex separators. + txt_tmp: List[str] = [] + for line in lines: + if not _is_latex_line_separator(line): + txt_tmp.append(line) + # Loop 2: Remove consecutive empty lines, leaving only one. + txt_tmp2: List[str] = [] + prev_was_empty = False + for line in txt_tmp: + is_empty = line.strip() == "" + if is_empty: + if not prev_was_empty: + txt_tmp2.append(line) + prev_was_empty = True + else: + txt_tmp2.append(line) + prev_was_empty = False + # Loop 3: Add correct LaTeX separator based on section commands. + txt_new: List[str] = [] + # Define the section patterns and their corresponding separators. + # Total line length is 80 characters, "% " is 2 characters, so 78 separator chars. + prefix = "% " + section_patterns = [ + (r"^\\section\{", prefix + "#" * 78), + (r"^\\subsection\{", prefix + "=" * 78), + (r"^\\subsubsection\{", prefix + "-" * 78), + ] + for i, line in enumerate(txt_tmp2): + _LOG.debug("line=%d:%s", i, line) + txt_processed = False + # Check if the line matches any section command. + for pattern, separator in section_patterns: + m = re.match(pattern, line.strip()) + if m: + _LOG.debug(" -> Found section command") + txt_new.append(separator) + _LOG.debug(" -> Added separator: %s", separator) + txt_new.append(line) + txt_processed = True + break + if not txt_processed: + txt_new.append(line) + hdbg.dassert_isinstance(txt_new, list) + return txt_new + + +# ############################################################################# +# LaTeX Header Extraction +# ############################################################################# + + +def _is_latex_comment(line: str) -> bool: + r""" + Check if a line is a LaTeX comment. + + A LaTeX comment line starts with the `%` character. This function + handles the edge case where `%` is escaped (e.g., `\%`), which + should not be treated as a comment. + + :param line: line of text to check + :return: True if the line is a comment, False otherwise + """ + hdbg.dassert_isinstance(line, str) + # Strip leading whitespace to check the first non-whitespace character. + stripped_line = line.lstrip() + # Check if line starts with %. + if not stripped_line.startswith("%"): + return False + # Check if the % is escaped by looking at the character before it in the + # original line. + # Find the position of % in the original line. + percent_pos = line.find("%") + # If there's a character before %, check if it's a backslash. + if percent_pos > 0 and line[percent_pos - 1] == "\\": + # Check if the backslash itself is escaped. + if percent_pos > 1 and line[percent_pos - 2] == "\\": + # Double backslash before %, so % is not escaped. + return True + # Single backslash before %, so % is escaped. + return False + # % is at the beginning or has no backslash before it. + return True + + +def _extract_latex_section( + line: str, line_number: int +) -> Optional[hmarhead.HeaderInfo]: + r""" + Parse a LaTeX section command and extract section information. + + This function identifies LaTeX section commands (\section{}, \subsection{}, + \subsubsection{}) and extracts the section title. It handles several edge + cases including: + - Regex parsing of `\section[Short]{Long Title}` (extracts "Long Title") + - Handles nested braces within titles (e.g., `\section{Intro to \textbf{ML}}`) + - Does not handle multi-line section titles + + :param line: line of text to parse + :param line_number: line number in the original file + :return: HeaderInfo object if section found, None otherwise + """ + hdbg.dassert_isinstance(line, str) + hdbg.dassert_isinstance(line_number, int) + # Define section patterns with their corresponding levels. + # Pattern supports optional [short title] before {long title}. + regex = r"(?:\[.*?\])?\{(.*)\}" + section_patterns = [ + (r"\\section" + regex, 1), + (r"\\subsection" + regex, 2), + (r"\\subsubsection" + regex, 3), + ] + line_stripped = line.strip() + # Try to match each section pattern. + for pattern, level in section_patterns: + # Check if line starts with the section command. + match = re.match(pattern, line_stripped) + if match: + # Extract the title from the first capture group. + title = match.group(1) + # Skip sections with empty titles. + if not title: + return None + # Return HeaderInfo with level, title, and line number. + return hmarhead.HeaderInfo(level, title, line_number) + # No section command found. + return None + + +def extract_headers_from_latex( + lines: List[str], max_level: int, *, sanity_check: bool = True +) -> hmarhead.HeaderList: + r""" + Extract headers from a LaTeX file and return a HeaderList. + + This function processes a LaTeX file line by line, identifies section + commands (\section, \subsection, \subsubsection), and creates a list + of HeaderInfo objects. It skips commented-out lines (lines starting + with %) and only includes headers up to the specified maximum level. + + :param lines: content of the input LaTeX file as list of strings + :param max_level: maximum header levels to parse (e.g., '3' parses + \section, \subsection, and \subsubsection, but not deeper levels) + :param sanity_check: whether to check that the header list is valid + using the same validation as Markdown headers + :return: list of HeaderInfo objects, each containing (level, title, + line_number), e.g.: + ``` + [ + HeaderInfo(1, "Introduction", 5), + HeaderInfo(2, "Background", 10), + ... + ] + ``` + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_lte(1, max_level) + header_list: hmarhead.HeaderList = [] + # Process the input file to extract headers. + for line_number, line in enumerate(lines, start=1): + # Skip LaTeX comment lines. + if _is_latex_comment(line): + continue + # Check if this line contains a section command. + header_info = _extract_latex_section(line, line_number) + if header_info and header_info.level <= max_level: + # Add HeaderInfo to list. + header_list.append(header_info) + # Check the header list. + if sanity_check: + hmarhead.sanity_check_header_list(header_list) + else: + _LOG.debug("Skipping sanity check") + hdbg.dassert_isinstance(header_list, list) + return header_list diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py new file mode 100644 index 000000000..8f857d385 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py @@ -0,0 +1,29 @@ +""" +Linting utilities for text and code files. + +Import as: + +import helpers.hlint as hlint +""" + +import logging + +import helpers.hgit as hgit +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def lint_file(file_path: str) -> None: + """ + Run lint_txt.py on the file to ensure proper formatting. + + :param file_path: path to the file to lint + """ + _LOG.info("Linting file: %s", file_path) + lint_script = hgit.find_file_in_git_tree("lint_txt.py", super_module=True) + # Run lint_txt.py. + cmd = f"{lint_script} -i {file_path} -v CRITICAL" + _LOG.debug("Running command: %s", cmd) + hsystem.system(cmd, suppress_output=True) + _LOG.info("File linted successfully: %s", file_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py new file mode 100644 index 000000000..c13ed1255 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py @@ -0,0 +1,78 @@ +""" +Import as: + +import helpers.hlist as hlist +""" + +from typing import Any, List, Optional, Set + +import helpers.hdbg as hdbg + + +# TODO(gp): -> return_single_element, return_single_element_or_assert? +def assert_single_element_and_return(list_: List[Any]) -> Any: + """ + Assert that the passed list has a single element and return that single + element. + + :return: return the unique element in the list + """ + hdbg.dassert_isinstance(list_, list) + hdbg.dassert_eq(len(list_), 1, "List has %d elements!", len(list_)) + return list_[0] + + +def find_duplicates(list_: List[Any]) -> List[Any]: + """ + Find the elements duplicated in a list. + """ + hdbg.dassert_isinstance(list_, list) + # Count the occurrences of each element of the seq. + set_l = set(list_) + v_to_num = [(v, list_.count(v)) for v in set_l] + # Build list of elems with duplicates. + res = [v for v, n in v_to_num if n > 1] + return res + + +def remove_duplicates(list_: List[Any]) -> List[Any]: + """ + Remove the elements duplicated in a list, without changing the order. + """ + hdbg.dassert_isinstance(list_, list) + list_out = [] + set_l: Set[Any] = set() + for v in list_: + if v not in set_l: + set_l.add(v) + list_out.append(v) + return list_out + + +def extract( + list_: List[Any], start_idx: Optional[int], end_idx: Optional[int] +) -> List[Any]: + """ + Filter the list using [start_idx, end_idx). + """ + if start_idx is not None: + hdbg.dassert_lte(0, start_idx) + else: + start_idx = 0 + if end_idx is not None: + hdbg.dassert_lte(end_idx, len(list_)) + else: + end_idx = len(list_) + if list_: + hdbg.dassert_lt(start_idx, end_idx) + list_ = list_[start_idx:end_idx] + return list_ + + +def chunk(list_: List[Any], n: int) -> List[Any]: + hdbg.dassert_lte(1, n) + hdbg.dassert_lte(n, len(list_)) + k, m = divmod(len(list_), n) + return [ + list_[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n) + ] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py new file mode 100644 index 000000000..f821d4f76 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py @@ -0,0 +1,680 @@ +""" +Import as: + +import helpers.hllm as hllm +""" + +import functools +import logging +import os +import re +from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union + +import openai +import tqdm +from pydantic import BaseModel + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg +import helpers.hllm_cost as hllmcost +import helpers.hprint as hprint +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + + +# Create a generic type variable. +T = TypeVar("T", bound=BaseModel) + +# ############################################################################# +# Update LLM cache +# ############################################################################# + + +_UPDATE_LLM_CACHE = False + + +def set_update_llm_cache(update: bool) -> None: + """ + Set whether to update the LLM cache. + + :param update: True to update the cache, False otherwise + """ + global _UPDATE_LLM_CACHE + _UPDATE_LLM_CACHE = update + + +def get_update_llm_cache() -> bool: + """ + Get whether to update the LLM cache. + + :return: True if the cache should be updated, False otherwise + """ + return _UPDATE_LLM_CACHE + + +# ############################################################################# +# Utility Functions +# ############################################################################# + + +def _get_llm_provider_and_model(model: str) -> Tuple[str, str]: + """ + Get the provider and model names from a model string. + + The model can be specified as: + - "gpt-4o-mini" + - "openai/gpt-4o-mini" + - "deepseek/deepseek-r1-0528-qwen3-8b:free/" + + :param model: model to use for the completion + :return: tuple of provider name and model name + """ + if "/" in model: + if model.startswith("openai/"): + provider_name = "openai" + model = model.split("/")[1] + else: + provider_name = "openrouter" + else: + provider_name = "openai" + hdbg.dassert_in( + provider_name, + ("openai", "openrouter"), + "Unknown provider: %s", + provider_name, + ) + return provider_name, model + + +def response_to_txt(response: Any) -> str: + """ + Convert an OpenAI API response to a text string. + + :param response: API response object + :return: extracted text contents as a string + """ + if isinstance(response, openai.types.chat.chat_completion.ChatCompletion): + ret = response.choices[0].message.content + elif isinstance(response, openai.types.responses.Response): + ret = response.output_text + # elif isinstance(response, openai.pagination.SyncCursorPage): + # ret = response.data[0].content[0].text.value + elif isinstance(response, openai.types.beta.threads.message.Message): + ret = response.content[0].text.value + elif isinstance(response, str): + ret = response + elif isinstance(response, dict): + # Handle Chat Completions dict form. + if "choices" in response and "message" in response["choices"][0]: + ret = response["choices"][0]["message"]["content"] + # Handle Responses API dict form. + elif "output_text" in response: + ret = response["output_text"] + else: + raise ValueError( + f"Unknown dict structure in response: {response.keys()}" + ) + else: + raise ValueError(f"Unknown response type: {type(response)}") + hdbg.dassert_isinstance(ret, str) + return ret + + +def build_chat_completion_messages( + system_prompt: str, + user_prompt: str, + *, + images_as_base64: Optional[Tuple[str, ...]] = None, +) -> List[Dict[str, Any]]: + """ + Construct the standard messages payload for the Chat Completions API. + + :param system_prompt: system prompt + :param user_prompt: user prompt + :param images_as_base64: base64-encoded images + :return: messages in the format expected by the Chat Completions API + """ + hdbg.dassert_isinstance(system_prompt, str) + hdbg.dassert_isinstance(user_prompt, str) + ret = [{"role": "system", "content": system_prompt}] + # Build user message content. + if images_as_base64: + # Multi-modal message with text and images + user_content = [{"type": "text", "text": user_prompt}] + for image_b64 in images_as_base64: + user_content.append( + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}, + } + ) + ret.append({"role": "user", "content": user_content}) + else: + # Text-only message. + ret.append({"role": "user", "content": user_prompt}) + return ret + + +def build_responses_input( + user_prompt: str, + *, + images_as_base64: Optional[Tuple[str, ...]] = None, +) -> List[Dict[str, Any]]: + """ + Construct the user input payload for the Responses API. + + :param user_prompt: user prompt + :param images_as_base64: base64-encoded images + :return: input in the format expected by the Responses API + """ + hdbg.dassert_isinstance(user_prompt, str) + # Build user message content. + content_blocks = [{"type": "input_text", "text": user_prompt}] + if images_as_base64: + # Add image input. + for image_b64 in images_as_base64: + content_blocks.append( + { + "type": "input_image", + "image_url": f"data:image/jpeg;base64,{image_b64}", + } + ) + responses_input = [ + { + "role": "user", + "content": content_blocks, + } + ] + return responses_input + + +# ############################################################################# + + +@hcacsimp.simple_cache( + write_through=True, exclude_keys=["client", "cache_mode", "cost_tracker"] +) +def _call_api_sync( + # pylint: disable=unused-argument + # This is needed to support caching. + cache_mode: str, + client: openai.OpenAI, + user_prompt: str, + system_prompt: str, + temperature: float, + model: str, + *, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional[hllmcost.LLMCostTracker] = None, + use_responses_api: bool = False, + **create_kwargs, +) -> Dict[Any, Any]: + """ + Make a non-streaming API call. + + See `get_completion()` for other parameter descriptions. + + :param client: LLM client + :param cost_tracker: LLMCostTracker instance to track costs + :param use_responses_api: whether to use the Responses API instead + of Chat Completions + :return: OpenAI API result as a dictionary + """ + if not use_responses_api: + messages = build_chat_completion_messages( + system_prompt, user_prompt, images_as_base64=images_as_base64 + ) + completion = client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + **create_kwargs, + ) + else: + user_input = build_responses_input( + user_prompt, images_as_base64=images_as_base64 + ) + completion = client.responses.create( + model=model, + instructions=system_prompt, + input=user_input, + temperature=temperature, + **create_kwargs, + ) + completion_obj = completion.to_dict() + if isinstance(completion, openai.types.responses.Response): + # Store the output of the Responses API. + completion_obj["output_text"] = completion.output_text + if cost_tracker is not None: + # Calculate the cost of the completion. + hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) + cost = cost_tracker.calculate_cost(completion, model) + cost_tracker.accumulate_cost(cost) + # Store the cost in the completion object. + completion_obj["cost"] = cost + return completion_obj + + +@hcacsimp.simple_cache( + cache_type="pickle", + write_through=True, + exclude_keys=["client", "cache_mode", "cost_tracker"], +) +def _call_structured_api_sync( + # pylint: disable=unused-argument + # This is needed to support caching. + cache_mode: str, + client: openai.OpenAI, + model: str, + user_prompt: str, + system_prompt: str, + temperature: float, + response_format: type[T], + *, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional[hllmcost.LLMCostTracker] = None, + print_cost: bool = False, + **create_kwargs, +) -> T: + """ + Make a non-streaming structured API call. + + See `get_structured_completion()` for parameter descriptions. + + :param client: LLM client + :param response_format: expected structured output format + :return: parsed output as the specified Pydantic model + """ + user_input = build_responses_input( + user_prompt, images_as_base64=images_as_base64 + ) + response = client.responses.parse( + model=model, + instructions=system_prompt, + input=user_input, + temperature=temperature, + text_format=response_format, + **create_kwargs, + ) + # Extract the parsed output. + parsed_output: T = response.output_parsed + # Track costs. + if cost_tracker is not None: + hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) + cost = cost_tracker.calculate_cost(response) + cost_tracker.accumulate_cost(cost) + if print_cost: + _LOG.info("cost=%.6f", cost) + return parsed_output + + +# ############################################################################# +# LLMClient +# ############################################################################# + + +class LLMClient: + """ + Class to handle LLM API client creation and requests. + """ + + def __init__( + self, + model: str, + ) -> None: + """ + Initialize the LLMClient. + + The model can be specified as: + - "gpt-4o-mini" + - "openai/gpt-4o-mini" + - "deepseek/deepseek-r1-0528-qwen3-8b:free/" + + :param model: model to use for the completion. + """ + hdbg.dassert_isinstance(model, str) + if model == "": + provider_name, model = self.get_default_model() + else: + provider_name, model = _get_llm_provider_and_model(model) + + self.provider_name = provider_name + self.model = model + self.client = None + + def get_default_model(self) -> Tuple[str, str]: + """ + Get the default provider and model for the client. + + :return: default provider and model used in the client + """ + provider_name = "openai" + model = self._get_default_model(provider_name) + return provider_name, model + + def create_client(self) -> None: + """ + Create an LLM client. + """ + if self.provider_name == "openai": + base_url = "https://api.openai.com/v1" + api_key = os.environ.get("OPENAI_API_KEY") + elif self.provider_name == "openrouter": + base_url = "https://openrouter.ai/api/v1" + api_key = os.environ.get("OPENROUTER_API_KEY") + else: + raise ValueError(f"Unknown provider: {self.provider_name}") + _LOG.debug(hprint.to_str("self.provider_name base_url")) + client = openai.OpenAI(base_url=base_url, api_key=api_key) + self.client = client + + def call_llm( + self, + cache_mode: str, + user_prompt: str, + system_prompt: str, + temperature: float, + *, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional[hllmcost.LLMCostTracker] = None, + use_responses_api: bool = False, + **create_kwargs, + ) -> Dict[Any, Any]: + """ + Call the LLM API. + + Check `_call_api_sync()` params for more details. + """ + return _call_api_sync( + cache_mode=cache_mode, + client=self.client, + user_prompt=user_prompt, + system_prompt=system_prompt, + temperature=temperature, + model=self.model, + images_as_base64=images_as_base64, + cost_tracker=cost_tracker, + use_responses_api=use_responses_api, + **create_kwargs, + ) + + def _get_default_model(self, provider_name: str) -> str: + """ + Get the default model for a provider. + + :return: default model for the provider + """ + if provider_name == "openai": + model = "gpt-4o" + elif provider_name == "openrouter": + model = "openai/gpt-4o" + else: + raise ValueError(f"Unknown provider: {self.provider_name}") + return model + + +# ############################################################################# + + +@functools.lru_cache(maxsize=1024) +def get_completion( + user_prompt: str, + *, + system_prompt: str = "", + model: str = "", + report_progress: bool = False, + print_cost: bool = False, + cache_mode: str = "DISABLE_CACHE", + temperature: float = 0.1, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional["hllmcost.LLMCostTracker"] = None, + use_responses_api: bool = False, + return_raw: bool = False, + **create_kwargs, +) -> Union[str, Dict[Any, Any]]: + """ + Generate a completion using OpenAI's API. + + :param user_prompt: user input message + :param system_prompt: system instruction + :param model: model to use or empty string to use the default model + :param report_progress: whether to report progress running the API + call + :param cache_mode: + - "DISABLE_CACHE": No caching + - "REFRESH_CACHE": Make API calls and save responses to cache + - "HIT_CACHE_OR_ABORT": Use cached responses, fail if not in cache + - "NORMAL": Use cached responses if available, otherwise make API call + :param cache_file: file to save/load completion cache + :param temperature: adjust an LLM's sampling diversity: lower values make it + more deterministic, while higher values foster creative variation. + 0 < temperature <= 2, 0.1 is default value in OpenAI models. + :param images_as_base64: base64-encoded images to include in the user message + :param cost_tracker: LLMCostTracker instance to track costs + :param use_responses_api: whether to use the Responses API instead of Chat + Completions + :param return_raw: whether to return the raw API response instead of + extracting the text content + :param create_kwargs: additional params for the API call + :return: API response or its text content + """ + hdbg.dassert_in( + cache_mode, + ("DISABLE_CACHE", "REFRESH_CACHE", "HIT_CACHE_OR_ABORT", "NORMAL"), + ) + update_llm_cache = get_update_llm_cache() + if update_llm_cache: + cache_mode = "REFRESH_CACHE" + # Initialize LLM client. + # Skip client creation for HIT_CACHE_OR_ABORT mode since: + # - If cache hits, we never use the client + # - If cache misses, we abort before calling the function + llm_client = LLMClient(model=model) + if cache_mode != "HIT_CACHE_OR_ABORT": + llm_client.create_client() + if use_responses_api and llm_client.provider_name != "openai": + raise ValueError( + "Responses API is only supported for the 'openai' provider." + ) + if report_progress and return_raw: + raise ValueError( + "Streaming mode is only supported while returning text content." + ) + if report_progress and cache_mode == "HIT_CACHE_OR_ABORT": + raise ValueError( + "Streaming mode (report_progress=True) is not supported with " + "cache_mode='HIT_CACHE_OR_ABORT'." + ) + # Construct messages in OpenAI API request format. + _LOG.info("LLM API call ... ") + memento = htimer.dtimer_start(logging.DEBUG, "LLM API call") + if not report_progress: + completion = llm_client.call_llm( + cache_mode=cache_mode, + user_prompt=user_prompt, + system_prompt=system_prompt, + temperature=temperature, + images_as_base64=images_as_base64, + cost_tracker=cost_tracker, + use_responses_api=use_responses_api, + **create_kwargs, + ) + if not use_responses_api: + txt_response = completion["choices"][0]["message"]["content"] + else: + txt_response = completion["output_text"] + else: + # TODO(gp): This is not working. It doesn't show the progress and it + # doesn't show the cost. + # Stream the output to show progress. + collected_messages = [] + if not use_responses_api: + # Stream Chat Completions API. + messages = build_chat_completion_messages( + system_prompt, user_prompt, images_as_base64=images_as_base64 + ) + completion = llm_client.client.chat.completions.create( + model=model, + messages=messages, + stream=True, + **create_kwargs, + ) + for chunk in tqdm.tqdm( + completion, desc="Generating completion", unit=" chunks" + ): + if chunk.choices[0].delta.content is not None: + collected_messages.append(chunk.choices[0].delta.content) + else: + # Stream Responses API. + user_input = build_responses_input( + user_prompt, images_as_base64=images_as_base64 + ) + completion = llm_client.client.responses.create( + model=model, + instructions=system_prompt, + input=user_input, + stream=True, + **create_kwargs, + ) + for event in tqdm.tqdm( + completion, desc="Generating response", unit=" events" + ): + if event.type == "response.output_text.delta": + collected_messages.append(event.delta.value) + txt_response = "".join(collected_messages) + # Report the time taken. + msg, _ = htimer.dtimer_stop(memento) + _LOG.info(msg) + if print_cost and "cost" in completion: + _LOG.info("cost=%.6f", completion["cost"]) + if return_raw: + # Return the full completion/response object. + return completion + return txt_response + + +@functools.lru_cache(maxsize=1024) +def get_structured_completion( + user_prompt: str, + response_format: type[T], + *, + system_prompt: str = "", + model: str = "", + cache_mode: str = "DISABLE_CACHE", + temperature: float = 0.1, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional[hllmcost.LLMCostTracker] = None, + print_cost: bool = False, + **create_kwargs, +) -> T: + """ + Generate a Structured Output using OpenAI's API. + + See `get_completion()` for other parameter descriptions. + + :param response_format: expected structured output format + :param cache_mode: + - "DISABLE_CACHE": No caching + - "REFRESH_CACHE": Make API calls and save responses to cache + - "HIT_CACHE_OR_ABORT": Use cached responses, fail if not in cache + - "NORMAL": Use cached responses if available, otherwise make API call + :return: output parsed into the specified format + """ + hdbg.dassert_in( + cache_mode, + ("DISABLE_CACHE", "REFRESH_CACHE", "HIT_CACHE_OR_ABORT", "NORMAL"), + ) + update_llm_cache = get_update_llm_cache() + if update_llm_cache: + cache_mode = "REFRESH_CACHE" + # Initialize LLM client. + # Skip client creation for HIT_CACHE_OR_ABORT mode since: + # - If cache hits, we never use the client + # - If cache misses, we abort before calling the function + if cache_mode == "HIT_CACHE_OR_ABORT": + # Don't create the client; pass None since it won't be used. + llm_client = LLMClient(model=model) + client = None + model_to_use = llm_client.model + else: + llm_client = LLMClient(model=model) + llm_client.create_client() + if llm_client.provider_name != "openai": + raise ValueError( + "`get_structured_completion()` currently only supports the " + "'openai' provider (Responses API + Structured Outputs). " + f"Got provider_name='{llm_client.provider_name}'." + ) + client = llm_client.client + model_to_use = llm_client.model + # Retrieve a structured response. + parsed_output: T = _call_structured_api_sync( + cache_mode=cache_mode, + client=client, + model=model_to_use, + user_prompt=user_prompt, + system_prompt=system_prompt, + temperature=temperature, + response_format=response_format, + images_as_base64=images_as_base64, + cost_tracker=cost_tracker, + print_cost=print_cost, + **create_kwargs, + ) + return parsed_output + + +# ############################################################################# + + +def apply_prompt_to_dataframe( + df, + prompt, + model: str, + input_col, + response_col, + *, + chunk_size=50, + allow_overwrite: bool = False, +): + _LOG.debug(hprint.to_str("prompt model input_col response_col chunk_size")) + hdbg.dassert_in(input_col, df.columns) + if not allow_overwrite: + hdbg.dassert_not_in(response_col, df.columns) + response_data = [] + for start in tqdm.tqdm( + range(0, len(df), chunk_size), desc="Processing chunks" + ): + end = start + chunk_size + chunk = df.iloc[start:end] + _LOG.debug("chunk.size=%s", chunk.shape[0]) + data = chunk[input_col].astype(str).tolist() + data = [f"{i + 1}: {val}" for i, val in enumerate(data)] + user = "\n".join(data) + _LOG.debug("user=\n%s", user) + try: + response = get_completion(user, system_prompt=prompt, model=model) + except Exception as e: + _LOG.error( + f"Error processing column {input} in chunk {start}-{end}: {e}" + ) + raise e + # processed_response = response.split("\n") + processed_response = [ + ln.rstrip() for ln in response.splitlines() if ln.strip() + ] + _LOG.debug(hprint.to_str("processed_response")) + _LOG.debug("len(processed_response)=%s", len(processed_response)) + hdbg.dassert_eq(len(processed_response), chunk.shape[0]) + for i in range(len(processed_response)): + m = re.match(r"\d+: (.*)\s*", processed_response[i]) + hdbg.dassert(m, f"Invalid response: {processed_response[i]}") + # The linter doesn't understand that `dassert` is equivalent to an + # `assert`. + assert m is not None + processed_response[i] = m.group(1).rstrip().lstrip() + _LOG.debug(hprint.to_str("processed_response")) + response_data.extend(processed_response) + df[response_col] = response_data + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py new file mode 100644 index 000000000..bc42d6816 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py @@ -0,0 +1,840 @@ +""" +Import as: + +import helpers.hllm_cli as hllmcli +""" + +import json +import logging +import shlex +import subprocess +import sys +import importlib +import pprint +import time +from typing import Callable, Dict, List, Optional, Tuple, Union + +try: + import llm + import tokencost + + _LLM_AVAILABLE = True +except ImportError: + _LLM_AVAILABLE = False + +import pandas as pd +from tqdm import tqdm + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hmodule as hmodule +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +# _LOG.trace = lambda *args, **kwargs: None +_LOG.trace = _LOG.debug + + +def install_needed_modules( + *, use_sudo: bool = True, venv_path: Optional[str] = None +) -> None: + """ + Install needed modules for LLM CLI. + + :param use_sudo: whether to use sudo to install the module + :param venv_path: path to the virtual environment + E.g., /Users/saggese/src/venv/client_venv.helpers + """ + hmodule.install_module_if_not_present( + "llm", + package_name="llm", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + hmodule.install_module_if_not_present( + "tokencost", + package_name="tokencost", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + # Reload this module if already imported. + this_module_name = __name__ + if this_module_name in sys.modules: + importlib.reload(sys.modules[this_module_name]) + + +def shutup_llm_logging() -> None: + """ + Shut up OpenAI logging. + """ + # OpenAI client logging. + logging.getLogger("openai").setLevel(logging.WARNING) + # Common HTTP logging sources + logging.getLogger("httpx").setLevel(logging.WARNING) + logging.getLogger("httpcore").setLevel(logging.WARNING) + logging.getLogger("urllib3").setLevel(logging.WARNING) + + +# ############################################################################# +# Helper functions +# ############################################################################# + + +def _check_llm_executable() -> bool: + """ + Check if the llm command-line executable is available. + + :return: True if llm executable exists, False otherwise + """ + try: + hsystem.system("which llm", suppress_output=True) + _LOG.debug("llm command found") + return True + except Exception: + _LOG.debug("llm command not found") + return False + + +def _apply_llm_via_executable( + input_str: str, + *, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + expected_num_chars: Optional[int] = None, +) -> Tuple[str, float]: + """ + Apply LLM using the llm CLI executable. + + :param input_str: the input text to process + :param system_prompt: optional system prompt to use + :param model: optional model name to use + :param expected_num_chars: optional expected number of characters in + output (used for progress bar) + :return: tuple of (LLM response as string, cost in dollars) + """ + # Build command. + cmd = ["llm"] + if system_prompt: + cmd.extend(["--system", system_prompt]) + if model: + cmd.extend(["--model", model]) + # Add the user prompt. + cmd.append(input_str) + _LOG.debug("Running command: %s", " ".join(cmd)) + # Execute command. + if expected_num_chars: + # Use streaming with progress bar. + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + response_parts = [] + with tqdm(total=expected_num_chars, unit="char") as pbar: + for line in proc.stdout: + response_parts.append(line) + pbar.update(len(line)) + # Wait for process to complete. + proc.wait() + if proc.returncode != 0: + error_msg = proc.stderr.read() if proc.stderr else "" + hdbg.dfatal( + f"llm command failed with return code: {proc.returncode} error: {error_msg}" + ) + response = "".join(response_parts) + else: + # Run without progress bar. + cmd_str = " ".join(shlex.quote(arg) for arg in cmd) + _, response = hsystem.system_to_string(cmd_str) + # Cost calculation not available when using executable. + cost = 0.0 + _LOG.debug("Cost calculation not available when using llm executable") + return response, cost + + +def _calculate_cost_from_usage( + usage: object, + model: str, +) -> float: + """ + Calculate LLM cost from usage object. + + :param usage: usage object from LLM result containing input/output token counts + :param model: model name for cost calculation + :return: total cost in dollars + """ + input_tokens = usage.input + output_tokens = usage.output + prompt_cost = tokencost.calculate_cost_by_tokens( + num_tokens=input_tokens, model=model, token_type="input" + ) + completion_cost = tokencost.calculate_cost_by_tokens( + num_tokens=output_tokens, model=model, token_type="output" + ) + cost = float(prompt_cost + completion_cost) + return cost + + +def _apply_llm_via_library( + input_str: str, + *, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + expected_num_chars: Optional[int] = None, +) -> Tuple[str, float]: + """ + Apply LLM using the llm Python library. + + :param input_str: the input text to process + :param system_prompt: optional system prompt to use + :param model: optional model name to use + :param expected_num_chars: optional expected number of characters in + output (used for progress bar) + :return: tuple of (LLM response as string, cost in dollars) + """ + # Get the model. + if model: + llm_model = llm.get_model(model) + else: + llm_model = llm.get_model() + _LOG.debug("Using model: %s", llm_model.model_id) + # Execute with or without progress bar. + if expected_num_chars: + # Use streaming with progress bar. + response_parts = [] + with tqdm(total=expected_num_chars, unit="char") as pbar: + for chunk in llm_model.prompt( + input_str, system=system_prompt, stream=True + ): + chunk_str = str(chunk) + response_parts.append(chunk_str) + pbar.update(len(chunk_str)) + response = "".join(response_parts) + # Streaming doesn't provide usage info, so we can't calculate cost. + cost = 0.0 + _LOG.debug("Cost calculation not available for streaming mode") + else: + # Run without progress bar. + _LOG.trace("system_prompt=\n%s", system_prompt) + _LOG.trace("input_str=\n%s", input_str) + result = llm_model.prompt(input_str, system=system_prompt) + response = result.text() + _LOG.trace("response=\n%s", response) + # Calculate cost. + usage = result.usage() + cost = _calculate_cost_from_usage( + usage=usage, + model=llm_model.model_id, + ) + _LOG.debug( + "Cost: $%.6f (input: %d tokens, output: %d tokens)", + cost, + usage.input, + usage.output, + ) + return response, cost + + +# ############################################################################# +# Main functions +# ############################################################################# + + +@hcacsimp.simple_cache(cache_type="json", write_through=True) +def apply_llm( + input_str: str, + *, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + use_llm_executable: bool = False, + expected_num_chars: Optional[int] = None, +) -> Tuple[str, float]: + """ + Apply an LLM to process input text using either CLI executable or library. + + This function provides a unified interface to call LLMs either through the + llm command-line executable or through the llm Python library. It supports + optional system prompts, model selection, and progress bars for long outputs. + + :param input_str: the input text to process with the LLM + :param system_prompt: optional system prompt to guide the LLM's behavior + :param model: optional model name to use (e.g., "gpt-4", "claude-3-opus") + :param use_llm_executable: if True, use the llm CLI executable; if False, + use the llm Python library + :param expected_num_chars: optional expected number of characters in + output; if provided, displays a progress bar during generation + :return: tuple of (LLM response as string, cost in dollars) + """ + hdbg.dassert_isinstance(input_str, str) + hdbg.dassert_ne(input_str, "", "Input string cannot be empty") + if system_prompt is not None: + hdbg.dassert_isinstance(system_prompt, str) + if model is not None: + hdbg.dassert_isinstance(model, str) + hdbg.dassert_ne(model, "", "Model cannot be empty string") + if expected_num_chars is not None: + hdbg.dassert_isinstance(expected_num_chars, int) + hdbg.dassert_lt(0, expected_num_chars) + _LOG.debug("Applying LLM to input text") + _LOG.debug("use_llm_executable=%s", use_llm_executable) + # Route to appropriate implementation. + if use_llm_executable: + # Check that llm executable exists. + hdbg.dassert( + _check_llm_executable(), + "llm executable not found. Install it using: pip install llm", + ) + response, cost = _apply_llm_via_executable( + input_str, + system_prompt=system_prompt, + model=model, + expected_num_chars=expected_num_chars, + ) + else: + response, cost = _apply_llm_via_library( + input_str, + system_prompt=system_prompt, + model=model, + expected_num_chars=expected_num_chars, + ) + _LOG.debug("LLM processing completed") + return response, cost + + +def apply_llm_with_files( + input_file: str, + output_file: str, + *, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + use_llm_executable: bool = False, + expected_num_chars: Optional[int] = None, +) -> float: + """ + Apply an LLM to process text from an input file and save to output file. + + This is a convenience wrapper around apply_llm() that handles reading from + and writing to files. It reads the input file, processes the content using + the LLM, and writes the result to the output file. + + :param input_file: path to the input file containing text to process + :param output_file: path to the output file where result will be saved + :param system_prompt: optional system prompt to guide the LLM's behavior + :param model: optional model name to use (e.g., "gpt-4", "claude-3-opus") + :param use_llm_executable: if True, use the llm CLI executable; if False, + use the llm Python library + :param expected_num_chars: optional expected number of characters in + output; if provided, displays a progress bar during generation + :return: cost in dollars + """ + hdbg.dassert_isinstance(input_file, str) + hdbg.dassert_ne(input_file, "", "Input file cannot be empty") + hdbg.dassert_isinstance(output_file, str) + hdbg.dassert_ne(output_file, "", "Output file cannot be empty") + _LOG.debug("Reading input from file: %s", input_file) + # Read input file. + input_str = hio.from_file(input_file) + _LOG.debug("Read %d characters from input file", len(input_str)) + # Process with LLM. + response, cost = apply_llm( + input_str, + system_prompt=system_prompt, + model=model, + use_llm_executable=use_llm_executable, + expected_num_chars=expected_num_chars, + ) + # Write output file. + _LOG.debug("Writing output to file: %s", output_file) + hio.to_file(output_file, response) + _LOG.debug("Wrote %d characters to output file", len(response)) + return cost + + +# ############################################################################# +# Batch processing +# ############################################################################# + + +def _validate_batch_inputs( + prompt: str, + input_list: List[str], +) -> None: + """ + Validate prompt and input list for batch processing. + + :param prompt: System prompt to validate + :param input_list: List of inputs to validate + :raises: Assertion errors if validation fails + """ + hdbg.dassert_isinstance(prompt, str) + hdbg.dassert_isinstance(input_list, list) + hdbg.dassert_lt(0, len(input_list), "Input list cannot be empty") + for idx, input_str in enumerate(input_list): + hdbg.dassert_isinstance( + input_str, + str, + "Input at index %d must be a string", + idx, + ) + hdbg.dassert_ne( + input_str, + "", + "Input at index %d cannot be empty", + idx, + ) + + +@hcacsimp.simple_cache(cache_type="json", write_through=True) +def _llm( + system_prompt: str, + input_str: str, + model: str, +) -> Tuple[str, float]: + """ + Apply LLM using the llm Python library. + + :param input_str: the input text to process + :param system_prompt: optional system prompt to use + :param model: optional model name to use + :param expected_num_chars: optional expected number of characters in + output (used for progress bar) + :return: LLM response as string + """ + hdbg.dassert_isinstance(system_prompt, str) + _LOG.trace("system_prompt=\n%s", system_prompt) + # + hdbg.dassert_isinstance(input_str, str) + _LOG.trace("input_str=\n%s", input_str) + # + hdbg.dassert_isinstance(model, str) + hdbg.dassert_ne(model, "", "Model cannot be empty") + llm_model = llm.get_model(model) + _LOG.debug("model=%s", llm_model.model_id) + # Call the LLM. + result = llm_model.prompt(input_str, system=system_prompt) + response = result.text() + _LOG.trace("response=\n%s", response) + usage = result.usage() + cost = _calculate_cost_from_usage( + usage=usage, + model=model, + ) + return response, cost + + +def _call_llm_or_test_functor( + input_str: str, + system_prompt: Optional[str], + model: str, + testing_functor: Optional[Callable[[str], str]], +) -> Tuple[str, float]: + """ + Call LLM or testing functor if provided. + + :param input_str: Input text to process + :param system_prompt: System prompt (can be None) + :param model: Model name (required for cost calculation) + :param testing_functor: Optional testing functor + :return: Tuple of (response, cost) where cost is 0.0 if not calculated + """ + if testing_functor is None: + response, cost = _llm(system_prompt, input_str, model) + # # Calculate cost for this call. + # # Build full prompt for cost calculation. + # if system_prompt: + # full_prompt = system_prompt + "\n" + input_str + # else: + # full_prompt = input_str + # cost = _calculate_llm_cost(full_prompt, response, model) + else: + response = testing_functor(input_str) + cost = 0.0 + return response, cost + + +def _calculate_llm_cost( + prompt: str, + completion: str, + model: str, +) -> float: + """ + Calculate the cost of an LLM call using tokencost library. + + :param prompt: the prompt sent to the LLM + :param completion: the completion returned by the LLM + :param model: the model name used + :return: total cost in dollars + """ + prompt_cost = tokencost.calculate_prompt_cost(prompt, model) + completion_cost = tokencost.calculate_completion_cost(completion, model) + total_cost = prompt_cost + completion_cost + # Convert to float to ensure consistent type. + return float(total_cost) + + +def apply_llm_batch_individual( + prompt: str, + input_list: List[str], + *, + model: str, + testing_functor: Optional[Callable[[str], str]] = None, + progress_bar_object: Optional[tqdm] = None, +) -> Tuple[List[str], float]: + """ + Apply an LLM to process a batch of inputs one at the time. + """ + _validate_batch_inputs(prompt, input_list) + _LOG.debug("Processing batch of %d inputs individually", len(input_list)) + # Process each input sequentially with progress bar and error handling. + responses = [] + # Initialize total cost accumulator. + total_cost = 0.0 + for input_str in input_list: + response, cost = _call_llm_or_test_functor( + input_str=input_str, + system_prompt=prompt, + model=model, + testing_functor=testing_functor, + ) + total_cost += cost + responses.append(response) + if progress_bar_object is not None: + progress_bar_object.update(1) + _LOG.debug("Batch processing completed") + _LOG.debug("Total cost for batch with individual prompt: $%.6f", total_cost) + return responses, total_cost + + +def apply_llm_batch_with_shared_prompt( + prompt: str, + input_list: List[str], + *, + model: str, + testing_functor: Optional[Callable[[str], str]] = None, + progress_bar_object: Optional[tqdm] = None, +) -> Tuple[List[str], float]: + """ + Apply an LLM to process a batch of input texts using the same system prompt. + """ + _validate_batch_inputs(prompt, input_list) + _LOG.debug("Processing batch of %d inputs", len(input_list)) + # Process each input sequentially with progress bar. + responses = [] + total_cost = 0.0 + if testing_functor is None: + # TODO(gp): Factor this out and use a cache. + llm_model = llm.get_model(model) + conv = llm.Conversation(model=llm_model) + for input_str in input_list: + result = conv.prompt(input_str, system=prompt) + response = result.text() + usage = result.usage() + cost = _calculate_cost_from_usage( + usage=usage, + model=model, + ) + total_cost += cost + responses.append(response) + if progress_bar_object is not None: + progress_bar_object.update(1) + else: + for input_str in input_list: + response = testing_functor(input_str) + responses.append(response) + if progress_bar_object is not None: + progress_bar_object.update(1) + _LOG.debug("Batch processing completed") + _LOG.debug("Total cost for batch with shared prompt: $%.6f", total_cost) + return responses, total_cost + + +def apply_llm_batch_combined( + prompt: str, + input_list: List[str], + *, + model: str, + max_retries: int = 3, + testing_functor: Optional[Callable[[str], str]] = None, + progress_bar_object: Optional[tqdm] = None, +) -> Tuple[List[str], float]: + """ + Apply an LLM to process a batch using a single combined prompt. + + This function combines all queries into a single prompt and expects + structured JSON output. It includes retry logic for failed JSON parsing. + """ + _validate_batch_inputs(prompt, input_list) + hdbg.dassert_isinstance(max_retries, int) + hdbg.dassert_lt(0, max_retries) + _LOG.debug( + "Processing batch of %d inputs with combined prompt", len(input_list) + ) + # Build combined prompt. + + combined_prompt = f"{prompt}\n\n" + instruction = """ + Return the results only as a valid JSON object with string values, using + zero-based numeric keys that match the item numbers. + + Output format: + '{"0": "result1", "1": "result2", ...} + + """ + combined_prompt += hprint.dedent(instruction) + for idx, input_str in enumerate(input_list): + combined_prompt += f"{idx}: {input_str}\n" + combined_prompt += "\nReturn ONLY the JSON object, no other text." + _LOG.debug("Combined prompt:\n%s", combined_prompt) + # You are a calculator. Return only the numeric result. + # ``` + # Process the following items and return results as JSON in the format: + # {"0": "result1", "1": "result2", ...} + # 0: 2 + 2 + # 1: 3 * 3 + # 2: 10 - 5 + # 3: 20 / 4 + # Return ONLY the JSON object, no other text. + # ``` + # Process with retries for JSON parsing. + total_cost = 0.0 + if testing_functor is None: + for retry_num in range(max_retries): + _LOG.debug( + "Processing batch of %d inputs with combined prompt (attempt %d/%d)", + len(input_list), + retry_num + 1, + max_retries, + ) + system_prompt = combined_prompt + user_prompt = "Process the items listed above." + response, cost = _llm(system_prompt, user_prompt, model) + total_cost += cost + try: + # Parse JSON response. + # E.g., + # ``` + # {"0": "4", "1": "9", "2": "5", "3": "5"} + # ``` + _LOG.debug("Parsing JSON response:\n%s", response) + # Extract JSON from response (handle cases where LLM adds extra text). + response_stripped = response.strip() + # Find JSON object boundaries. + json_start = response_stripped.find("{") + json_end = response_stripped.rfind("}") + 1 + hdbg.dassert_lte(0, json_start) + hdbg.dassert_lt(json_start, json_end) + json_str = response_stripped[json_start:json_end] + result_dict = json.loads(json_str) + # Convert dict to list in order. + responses = [] + for idx in range(len(input_list)): + key = str(idx) + if key in result_dict: + responses.append(result_dict[key]) + else: + _LOG.warning("Missing result for index %d", idx) + responses.append("") + _LOG.debug("Successfully parsed JSON response") + if progress_bar_object is not None: + progress_bar_object.update(len(input_list)) + _LOG.debug( + "Total cost for batch with combined prompt: $%.6f", + total_cost, + ) + return responses, total_cost + except (json.JSONDecodeError, ValueError) as e: + _LOG.debug( + "JSON parsing failed (attempt %d/%d): %s", + retry_num + 1, + max_retries, + e, + ) + if retry_num == max_retries - 1: + hdbg.dfatal( + "Failed to parse JSON after %d retries", max_retries + ) + # Add instruction to retry. + combined_prompt += "\n\nPrevious response had invalid JSON format. Please return ONLY a valid JSON object." + else: + responses = [] + for input_str in input_list: + response = testing_functor(input_str) + responses.append(response) + if progress_bar_object is not None: + progress_bar_object.update(1) + total_cost = 0.0 + return responses, total_cost + # Should not reach here. + raise RuntimeError("Unexpected error in apply_llm_batch_combined") + + +# ############################################################################# + + +# TODO(gp): Move it somewhere else. +def get_tqdm_progress_bar() -> tqdm: + # Use appropriate tqdm for notebook or terminal + try: + from IPython import get_ipython + + if get_ipython() is not None: + from tqdm.notebook import tqdm as notebook_tqdm + + tqdm_progress = notebook_tqdm + else: + tqdm_progress = tqdm + except ImportError: + tqdm_progress = tqdm + return tqdm_progress + + +# TODO(gp): Skip values that already have a value in the target column. +# TODO(gp): Parallelize +def apply_llm_prompt_to_df( + prompt: str, + df: pd.DataFrame, + extractor: Callable[[Union[str, pd.Series]], str], + target_col: str, + batch_mode: str, + *, + model: str, + batch_size: int = 50, + dump_every_batch: Optional[str] = None, + tag: str = "Processing", + testing_functor: Optional[Callable[[str], str]] = None, + use_sys_stderr: bool = False, +) -> Tuple[pd.DataFrame, Dict[str, int]]: + """ + Apply an LLM to process a dataframe column using the same system prompt. + + This function processes text from dataframe rows using an extractor function, + applies the LLM to each item in batches, and stores the results in a target + column. It can optionally save progress to a file after each batch. + + :param prompt: system prompt to guide the LLM's behavior + :param df: dataframe to process + :param extractor: callable that extracts text from a row or string + :param target_col: name of column to store results + :param batch_mode: batch mode to use (individual, shared_prompt, combined) + :param model: model name to use (e.g., "gpt-4", "claude-3-opus") + :param batch_size: number of items to process in each batch + :param dump_every_batch: optional file path to dump the dataframe after each batch + :param tag: description tag for progress bar + :param testing_functor: optional functor to use for testing + :return: tuple of (dataframe with results, statistics dict) + """ + start_time = time.time() + hdbg.dassert_isinstance(prompt, str) + hdbg.dassert_ne(prompt, "", "Prompt cannot be empty") + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_lt(0, len(df), "Dataframe cannot be empty") + hdbg.dassert_isinstance(target_col, str) + hdbg.dassert_ne(target_col, "", "Target column cannot be empty") + hdbg.dassert_isinstance(model, str) + hdbg.dassert_ne(model, "", "Model cannot be empty") + hdbg.dassert_isinstance(batch_size, int) + hdbg.dassert_lt(0, batch_size) + if dump_every_batch is not None: + hdbg.dassert_isinstance(dump_every_batch, str) + hdbg.dassert_ne(dump_every_batch, "", "Dump file path cannot be empty") + # Create target column if it doesn't exist. + if target_col not in df.columns: + df[target_col] = None + # Process items in batches with progress bar for entire workload. + num_items = len(df) + num_batches = (num_items + batch_size - 1) // batch_size + _LOG.info( + "Processing %d items in %d batches of %d items each", + num_items, + num_batches, + batch_size, + ) + _LOG.info(hprint.to_str("model batch_mode")) + num_skipped = 0 + progress_bar_ctor = get_tqdm_progress_bar() + progress_bar_object = progress_bar_ctor( # type: ignore + total=num_items, + desc=tag, + dynamic_ncols=True, + # Workaround for unit tests. + file=sys.__stderr__ if use_sys_stderr else None, + ) + total_cost = 0.0 + # TODO(gp): Precompute the batch indices that needs to be processed. + for batch_num in range(num_batches): + # Get batch rows. + start_idx = batch_num * batch_size + end_idx = min(start_idx + batch_size, len(df)) + rows = df.iloc[start_idx:end_idx] + # Extract items from rows, filtering out invalid ones. + batch_items = [] + batch_indices = [] + for idx, row in rows.iterrows(): + extracted_text = extractor(row) + # Check if extraction returned valid text (not NaN/None/empty). + if extracted_text != "": + batch_items.append(extracted_text) + batch_indices.append(idx) + else: + # Set NaN for rows with missing company information. + df.at[idx, target_col] = "" + num_skipped += 1 + progress_bar_object.update(1) + # Call LLM only if there are valid items in this batch. + if batch_items: + _LOG.debug( + "Processing batch %d/%d (%d items, %d skipped)", + batch_num + 1, + num_batches, + len(batch_items), + len(rows) - len(batch_items), + ) + if batch_mode == "individual": + func = apply_llm_batch_individual + elif batch_mode == "shared_prompt": + func = apply_llm_batch_with_shared_prompt + elif batch_mode == "combined": + func = apply_llm_batch_combined + else: + hdbg.dfatal("Invalid batch mode: %s", batch_mode) + batch_responses, batch_cost = func( + prompt=prompt, + input_list=batch_items, + model=model, + testing_functor=testing_functor, + progress_bar_object=progress_bar_object, + ) + # Update total_cost. + total_cost += batch_cost + # Store results back into dataframe. + for idx, response in zip(batch_indices, batch_responses): + df.at[idx, target_col] = response + else: + _LOG.debug( + "Skipping batch %d/%d (all %d items have missing data)", + batch_num + 1, + num_batches, + len(rows), + ) + # Dump dataframe to file after batch if requested. + if dump_every_batch is not None: + _LOG.debug("Dumping dataframe to file: %s", dump_every_batch) + df.to_csv(dump_every_batch, index=False) + # Calculate elapsed time. + elapsed_time = time.time() - start_time + stats = { + "num_items": num_items, + "num_skipped": num_skipped, + "num_batches": num_batches, + "total_cost_in_dollars": total_cost, + "elapsed_time_in_seconds": elapsed_time, + } + _LOG.info("Processing completed:\n%s", pprint.pformat(stats)) + return df, stats diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py new file mode 100644 index 000000000..3d33b17d8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py @@ -0,0 +1,233 @@ +""" +Import as: + +import helpers.hllm_cost as hllmcost +""" + +import logging +import os +from typing import Any + +import requests + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# OpenRouter API Helpers +# ############################################################################# + + +def _get_models_info_file() -> str: + """ + Get the path to the file for storing OpenRouter models info. + """ + dir_path = hgit.get_helpers_root_dir() + file_path = os.path.join( + dir_path, "dev_scripts_helpers/llms", "openrouter_models_info.csv" + ) + return file_path + + +def _retrieve_openrouter_model_info() -> "pd.DataFrame": + """ + Retrieve OpenRouter models info from the OpenRouter API. + """ + import pandas as pd + + response = requests.get("https://openrouter.ai/api/v1/models") + # {'architecture': {'input_modalities': ['text', 'image'], + # 'instruct_type': None, + # 'modality': 'text+image->text', + # 'output_modalities': ['text'], + # 'tokenizer': 'Mistral'}, + # 'context_length': 131072, + # 'created': 1746627341, + # 'description': 'Mistral Medium 3 is a high-performance enterprise-grade ' + # 'language model designed to deliver frontier-level ' + # ... + # 'broad compatibility across cloud environments.', + # 'id': 'mistralai/mistral-medium-3', + # 'name': 'Mistral: Mistral Medium 3', + # 'per_request_limits': None, + # 'pricing': {'completion': '0.000002', + # 'image': '0', + # 'internal_reasoning': '0', + # 'prompt': '0.0000004', + # 'request': '0', + # 'web_search': '0'}, + # 'supported_parameters': ['tools', + # 'tool_choice', + # 'max_tokens', + # 'temperature', + # 'top_p', + # 'stop', + # 'frequency_penalty', + # 'presence_penalty', + # 'response_format', + # 'structured_outputs', + # 'seed'], + # 'top_provider': {'context_length': 131072, + # 'is_moderated': False, + # 'max_completion_tokens': None}} + response_json = response.json() + # There is only one key in the response. + hdbg.dassert_eq(list(response_json.keys()), ["data"]) + response_json = response_json["data"] + model_info_df = pd.DataFrame(response_json) + return model_info_df + + +def _save_models_info_to_csv( + model_info_df: "pd.DataFrame", + file_name: str, +) -> "pd.DataFrame": + """ + Save models info to a CSV file. + """ + hdbg.dassert_isinstance(file_name, str) + hdbg.dassert_ne(file_name, "") + # TODO(*): Save all the data. + # Extract prompt, completion pricing from pricing column. + model_info_df["prompt_pricing"] = model_info_df["pricing"].apply( + lambda x: x["prompt"] + ) + model_info_df["completion_pricing"] = model_info_df["pricing"].apply( + lambda x: x["completion"] + ) + required_columns = [ + "id", + "name", + "description", + "prompt_pricing", + "completion_pricing", + "supported_parameters", + ] + # Take only relevant columns. + model_info_df = model_info_df.loc[:, required_columns] + # Save to CSV file. + model_info_df.to_csv(file_name, index=False) + return model_info_df + + +# ############################################################################# +# LLMCostTracker +# ############################################################################# + + +class LLMCostTracker: + """ + Track the costs of LLM API calls through one of the providers. + """ + + def __init__(self, provider_name: str, model: str) -> None: + """ + Initialize the class. + """ + self.current_cost: float = 0.0 + self.provider_name = provider_name + self.model = model + + def end_logging_costs(self) -> None: + """ + End logging costs by resetting the current cost to 0. + """ + self.current_cost = 0.0 + + def accumulate_cost(self, cost: float) -> None: + """ + Accumulate the cost. + + :param cost: The cost to accumulate + """ + self.current_cost += cost + + def get_current_cost(self) -> float: + """ + Get the current accumulated cost. + + :return: The current cost + """ + return self.current_cost + + def calculate_cost( + self, + completion: Any, + *, + models_info_file: str = "", + ) -> float: + """ + Calculate the cost of an API call, based on the provider. + + :param completion: the completion response from API + :return: the calculated cost in dollars + """ + import pandas as pd + + # Get the number of input and output tokens. + usage = getattr(completion, "usage", None) + hdbg.dassert( + usage is not None, + "Completion/response object has no 'usage' attribute", + ) + if hasattr(usage, "prompt_tokens") and hasattr( + usage, "completion_tokens" + ): + prompt_tokens = usage.prompt_tokens + completion_tokens = usage.completion_tokens + elif hasattr(usage, "input_tokens") and hasattr(usage, "output_tokens"): + prompt_tokens = usage.input_tokens + completion_tokens = usage.output_tokens + else: + raise ValueError( + f"Unknown usage structure on completion object: {usage}" + ) + # Get the provider and model details. + if self.provider_name == "openai": + # Get the pricing for the selected model. + # TODO(gp): Use pricing from OpenAI or Openrouter API. + # https://openai.com/api/pricing/ + # https://gptforwork.com/tools/openai-chatgpt-api-pricing-calculator + # Cost per 1M tokens. + pricing = { + "gpt-3.5-turbo": {"prompt": 0.5, "completion": 1.5}, + "gpt-4o-mini": {"prompt": 0.15, "completion": 0.60}, + "gpt-4o": {"prompt": 2.5, "completion": 10}, + "gpt-5.2": {"prompt": 1.75, "completion": 14.0}, + "gpt-5.1": {"prompt": 1.25, "completion": 10.0}, + "gpt-5-mini": {"prompt": 0.25, "completion": 2.00}, + } + hdbg.dassert_in(self.model, pricing) + model_pricing = pricing[self.model] + # Calculate the cost. + cost = (prompt_tokens / 1e6) * model_pricing["prompt"] + ( + completion_tokens / 1e6 + ) * model_pricing["completion"] + elif self.provider_name == "openrouter": + # If the model info file doesn't exist, download one. + if models_info_file == "": + models_info_file = _get_models_info_file() + _LOG.debug(hprint.to_str("models_info_file")) + if not os.path.isfile(models_info_file): + model_info_df = _retrieve_openrouter_model_info() + _save_models_info_to_csv(model_info_df, models_info_file) + else: + model_info_df = pd.read_csv(models_info_file) + # Extract pricing for this model. + hdbg.dassert_in(self.model, model_info_df["id"].values) + row = model_info_df.loc[model_info_df["id"] == self.model].iloc[0] + prompt_price = row["prompt_pricing"] + completion_price = row["completion_pricing"] + # Compute cost. + cost = ( + prompt_tokens * prompt_price + + completion_tokens * completion_price + ) + else: + raise ValueError(f"Unknown provider: {self.provider_name}") + _LOG.debug(hprint.to_str("prompt_tokens completion_tokens cost")) + return cost diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py new file mode 100644 index 000000000..94738202c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py @@ -0,0 +1,809 @@ +""" +Import as: + +import helpers.hlogging as hloggin +""" + +import asyncio +import contextlib +import copy +import datetime +import logging +from typing import Any, Iterable, List, Optional, Tuple, Union + +# Avoid dependency from other helpers modules since this is used when the code +# is bootstrapped. + + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +# Copied from `helpers/hsystem.py` to avoid circular imports. +def _is_running_in_ipynb() -> bool: + try: + _ = get_ipython().config # type: ignore + res = True + except NameError: + res = False + return res + + +# Copied from `helpers/hsystem.py` to avoid circular dependencies. +def get_user_name() -> str: + import getpass + + res = getpass.getuser() + return res + + +# ############################################################################# +# Memory usage +# ############################################################################# + +# TODO(gp): Consider moving to hmemory.py + + +MemoryUsage = Tuple[float, float, float] + + +def get_memory_usage(process: Optional[Any] = None) -> MemoryUsage: + """ + Return the memory usage in terms of resident, virtual, and percent of total + used memory. + """ + if process is None: + import psutil + + process = psutil.Process() + rss_in_GB = process.memory_info().rss / (1024**3) + vms_in_GB = process.memory_info().vms / (1024**3) + mem_pct = process.memory_percent() + return (rss_in_GB, vms_in_GB, mem_pct) + + +def memory_to_str(resource_use: MemoryUsage, *, verbose: bool = True) -> str: + (rss_in_GB, vms_in_GB, mem_pct) = resource_use + if verbose: + txt = "rss=%.3fGB vms=%.3fGB mem_pct=%.0f%%" % ( + rss_in_GB, + vms_in_GB, + mem_pct, + ) + else: + txt = "%.3fGB %.3fGB %.0f%%" % (rss_in_GB, vms_in_GB, mem_pct) + return txt + + +def get_memory_usage_as_str(process: Optional[Any] = None) -> str: + """ + Like `get_memory_usage()` but returning a formatted string. + """ + resource_use = get_memory_usage(process) + txt = memory_to_str(resource_use) + return txt + + +# ############################################################################# +# Utils. +# ############################################################################# + +# White: 37. +# Red: 31 +# Green: 32 +# Yellow: 33 +# Blu: 34 +# Cyan: 36 +# White on red background: 41 + +_COLOR_MAPPING = { + # Green. + "TRACE": (32, "TRACE"), + # Blu. + "DEBUG": (34, "DEBUG"), + # Cyan. + "INFO": (36, "INFO "), + # White on red background. + "WARNING": (41, "WARN "), + "ERROR": (41, "ERROR"), + "CRITICAL": (41, "CRTCL"), +} + + +def reset_logger() -> None: + import importlib + + print("Resetting logger...") + logging.shutdown() + importlib.reload(logging) + + +def get_all_loggers() -> List: + """ + Return list of all registered loggers. + """ + logger_dict = logging.root.manager.loggerDict # type: ignore # pylint: disable=no-member + loggers = [logging.getLogger(name) for name in logger_dict] + return loggers + + +def get_matching_loggers( + module_names: Union[str, Iterable[str]], verbose: bool +) -> List: + """ + Find loggers that match a name or a name in a set. + """ + if isinstance(module_names, str): + module_names = [module_names] + loggers = get_all_loggers() + if verbose: + print("loggers=\n", "\n".join(map(str, loggers))) + # + sel_loggers = [] + for module_name in module_names: + if verbose: + print(f"module_name={module_name}") + # TODO(gp): We should have a regex. + # str(logger) looks like `` + sel_loggers_tmp = [ + logger + for logger in loggers + if str(logger).startswith(" None: + """ + Reduce the verbosity for external modules that are very chatty. + + :param verbosity: level of verbosity used for chatty modules: the + higher the better + :param verbose: print extra information + """ + module_names = [ + "aiobotocore", + "asyncio", + "boto", + "boto3", + "botocore", + "ccxt", # CCXT also needs to be shut up after the `exchange` is built. + "fsspec", + "hooks", + "httpcore", + "httpx", + "invoke", + "matplotlib", + "nose", + "openai", + "s3fs", + "s3transfer", + "urllib3", + # "ib_insync", + ] + # verbose = True + loggers = get_matching_loggers(module_names, verbose) + loggers = sorted(loggers, key=lambda logger: logger.name) + for logger in loggers: + logger.setLevel(verbosity) + if len(loggers) > 0: + logger_names = list({logger.name for logger in loggers}) + _LOG.debug( + "Shut up %d modules: %s", len(loggers), ", ".join(logger_names) + ) + # if _LOG.getEffectiveLevel() < logging.DEBUG: + # print(WARNING + + # " Shutting up %d modules: %s" + # % (len(loggers), ", ".join([logger.name for logger in loggers])) + # ) + + +# ############################################################################# +# _LocalTimeZoneFormatter +# ############################################################################# + + +# From https://stackoverflow.com/questions/32402502 +class _LocalTimeZoneFormatter: + """ + Override logging.Formatter to use an aware datetime object. + """ + + def __init__(self, *args: Any, **kwargs: Any): + super().__init__(*args, **kwargs) # type: ignore[call-arg] + try: + # TODO(gp): Automatically detect the time zone. It might be complicated in + # Docker. + import pytz + + self._tzinfo = pytz.timezone("America/New_York") + except ModuleNotFoundError: + # print(f"Can't import pytz: using UTC\n{str(e)}") + self._tzinfo = None + + def converter(self, timestamp: float) -> datetime.datetime: + # To make the linter happy and respecting the signature of the + # superclass method. + _ = self + # timestamp=1622423570.0147252 + dt = datetime.datetime.utcfromtimestamp(timestamp) + # Convert it to an aware datetime object in UTC time. + dt = dt.replace(tzinfo=datetime.timezone.utc) + if self._tzinfo is not None: + # Convert it to desired timezone. + dt = dt.astimezone(self._tzinfo) + return dt + + def formatTime( + self, record: logging.LogRecord, datefmt: Optional[str] = None + ) -> str: + dt = self.converter(record.created) + if datefmt: + s = dt.strftime(datefmt) + else: + try: + s = dt.isoformat(timespec="milliseconds") + except TypeError: + s = dt.isoformat() + return s + + +# ############################################################################# +# _ColoredFormatter +# ############################################################################# + + +# [mypy] error: Definition of "converter" in base class +# "_LocalTimeZoneFormatter" is incompatible with definition in base class +# "Formatter" +class _ColoredFormatter( # type: ignore[misc] + _LocalTimeZoneFormatter, logging.Formatter +): + """ + Logging formatter using colors for different levels. + """ + + _SKIP_DEBUG = True + + def format(self, record: logging.LogRecord) -> str: + colored_record = copy.copy(record) + # `levelname` is the internal name and can't be changed to `level_name` + # as per our conventions. + levelname = colored_record.levelname + if _ColoredFormatter._SKIP_DEBUG and levelname == "DEBUG": + colored_levelname = "" + else: + # Use white as default. + prefix = "\033[" + suffix = "\033[0m" + assert levelname in _COLOR_MAPPING, "Can't find info '%s'" + color_code, tag = _COLOR_MAPPING[levelname] + # Align the level name. + colored_levelname = f"{prefix}{color_code}m{tag}{suffix}" + colored_record.levelname = colored_levelname + return logging.Formatter.format(self, colored_record) + + +# From https://stackoverflow.com/questions/2183233 +def addLoggingLevel(levelName, levelNum, methodName=None): + """ + Comprehensively adds a new logging level to the `logging` module and the + currently configured logging class. + + `levelName` becomes an attribute of the `logging` module with the value + `levelNum`. `methodName` becomes a convenience method for both `logging` + itself and the class returned by `logging.getLoggerClass()` (usually just + `logging.Logger`). If `methodName` is not specified, `levelName.lower()` is + used. + + To avoid accidental clobberings of existing attributes, this method will + raise an `AttributeError` if the level name is already an attribute of the + `logging` module or if the method name is already present + + Example + ------- + >>> addLoggingLevel('TRACE', logging.DEBUG - 5) + >>> logging.getLogger(__name__).setLevel("TRACE") + >>> logging.getLogger(__name__).trace('that worked') + >>> logging.trace('so did this') + >>> logging.TRACE + 5 + """ + if not methodName: + methodName = levelName.lower() + + if hasattr(logging, levelName): + raise AttributeError( + "{} already defined in logging module".format(levelName) + ) + if hasattr(logging, methodName): + raise AttributeError( + "{} already defined in logging module".format(methodName) + ) + if hasattr(logging.getLoggerClass(), methodName): + raise AttributeError( + "{} already defined in logger class".format(methodName) + ) + + # This method was inspired by the answers to Stack Overflow post + # http://stackoverflow.com/q/2183233/2988730, especially + # http://stackoverflow.com/a/13638084/2988730 + def logForLevel(self, message, *args, **kwargs): + if self.isEnabledFor(levelNum): + self._log(levelNum, message, args, **kwargs) + + def logToRoot(message, *args, **kwargs): + logging.log(levelNum, message, *args, **kwargs) + + logging.addLevelName(levelNum, levelName) + setattr(logging, levelName, levelNum) + setattr(logging.getLoggerClass(), methodName, logForLevel) + setattr(logging, methodName, logToRoot) + + +addLoggingLevel("TRACE", 5) + + +# Note that this doesn't avoid evaluating the call. +# The only way to be completely sure that there is no evaluation is: +# ``` +# if False: _LOG.debug(...) +# ``` +def shut_up_log_debug(logger: logging.Logger) -> None: + logging.disable(logging.DEBUG) + # logger.debug = lambda *_: 0 + # logger.trace = lambda *_: 0 + + +# ############################################################################# +# ResourceUsageFilter +# ############################################################################# + + +# From https://stackoverflow.com/questions/10848342 +# and https://docs.python.org/3/howto/logging-cookbook.html#filters-contextual +class ResourceUsageFilter(logging.Filter): + """ + Add fields to the logger about memory and CPU use. + """ + + def __init__(self, report_cpu_usage: bool): + super().__init__() + import psutil + + self._process = psutil.Process() + self._report_cpu_usage = report_cpu_usage + if self._report_cpu_usage: + # Start sampling the CPU usage. + self._process.cpu_percent(interval=1.0) + + def filter(self, record: logging.LogRecord) -> bool: + """ + Override `logging.Filter()`, adding several fields to the logger. + """ + p = self._process + # Report memory usage. + resource_use = get_memory_usage_as_str(p) + # Report CPU usage. + if self._report_cpu_usage: + # CPU usage since the previous call. + cpu_use = p.cpu_percent(interval=None) + resource_use += " cpu=%.0f%%" % cpu_use + record.resource_use = resource_use # type: ignore + return True + + +# ############################################################################# + + +# TODO(gp): Replace `force_print_format` and `force_verbose_format` with `mode`. +def _get_logging_format( + force_print_format: bool, + force_verbose_format: bool, + force_no_warning: bool, + report_memory_usage: bool, + date_format_mode: str = "time", +) -> Tuple[str, str]: + """ + Compute the logging format depending whether running on notebook or in a + shell. + + The logging format can be: + - print: looks like a `print` statement + + :param force_print_format: force to use the non-verbose format + :param force_verbose_format: force to use the verbose format + """ + if _is_running_in_ipynb() and not force_no_warning: + print("WARNING: Running in Jupyter") + verbose_format = not _is_running_in_ipynb() + # + assert not (force_verbose_format and force_print_format), ( + f"Can't use both force_verbose_format={force_verbose_format} " + + f"and force_print_format={force_print_format}" + ) + if force_verbose_format: + verbose_format = True + if force_print_format: + verbose_format = False + # + if verbose_format: + # TODO(gp): We would like to have filename:name:funcName:lineno all + # justified on 15 chars. + # See https://docs.python.org/3/howto/logging-cookbook.html#use-of + # -alternative-formatting-styles + # Something like: + # {{asctime}-5s {{filename}{name}{funcname}{linedo}d}-15s {message} + # + # %(pathname)s Full pathname of the source file where the logging call was + # issued (if available). + # %(filename)s Filename portion of pathname. + # %(module)s Module (name portion of filename). + if True: + log_format = ( + # 04-28_08:08 INFO : + "%(asctime)-5s %(levelname)-5s" + ) + if report_memory_usage: + # rss=0.3GB vms=2.0GB mem_pct=2% cpu=91% + log_format += " [%(resource_use)-40s]" + log_format += ( + # lib_tasks _delete_branches + " %(module)-20s: %(funcName)-30s:" + # 142: ... + " %(lineno)-4d:" + " %(message)s" + ) + else: + # Super verbose: to help with debugging print more info without trimming. + log_format = ( + # 04-28_08:08 INFO : + "%(asctime)-5s %(levelname)-5s" + # .../src/lem1/amp/helpers/system_interaction.py + # _system : + " %(pathname)s %(funcName)-20s " + # 199: ... + " %(lineno)d:" + " %(message)s" + ) + if date_format_mode == "time": + date_fmt = "%H:%M:%S" + elif date_format_mode == "date_time": + date_fmt = "%m-%d_%H:%M" + elif date_format_mode == "date_timestamp": + date_fmt = "%Y-%m-%d %I:%M:%S %p" + else: + raise ValueError(f"Invalid date_format_mode='{date_format_mode}'") + else: + # Make logging look like a normal print(). + # TODO(gp): We want to still prefix with WARNING and ERROR. + log_format = "%(message)s" + date_fmt = "" + return date_fmt, log_format + + +def set_v1_formatter( + ch: Any, + root_logger: Any, + force_no_warning: bool, + force_print_format: bool, + force_verbose_format: bool, + report_cpu_usage: bool, + report_memory_usage: bool, +) -> _ColoredFormatter: + # Decide whether to use verbose or print format. + date_fmt, log_format = _get_logging_format( + force_print_format, + force_verbose_format, + force_no_warning, + report_memory_usage, + ) + # Use normal formatter. + # formatter = logging.Formatter(log_format, datefmt=date_fmt) + # Use formatter with colors. + formatter = _ColoredFormatter(log_format, date_fmt) + ch.setFormatter(formatter) + root_logger.addHandler(ch) + # Report resource usage. + if report_memory_usage: + # Get root logger. + log = logging.getLogger("") + # Create filter. + f = ResourceUsageFilter(report_cpu_usage) + # The ugly part:adding filter to handler. + log.handlers[0].addFilter(f) + return formatter + + +# ############################################################################# +# CustomFormatter +# ############################################################################# + + +# pylint: disable=line-too-long +class CustomFormatter(logging.Formatter): + """ + Override `format` to implement a completely custom logging formatting. + + The logging output looks like: + ``` + 07:37:17 /app/amp/helpers/hunit_test.py setUp 932 - Resetting random.seed to 20000101 + ``` + or for simulated time: + ``` + 07:43:17 @ 2022-01-18 02:43:17 workload /app/amp/helpers/test/test_hlogging.py workload:33 - -> wait + ``` + """ + + def __init__( + self, + *args: Any, + date_format_mode: str = "time", + report_memory_usage: bool = False, + report_cpu_usage: bool = False, + **kwargs: Any, + ): + super().__init__(*args, **kwargs) + self._date_fmt = self._get_date_format(date_format_mode) + # + try: + # TODO(gp): Automatically detect the time zone. It might be complicated + # in Docker. + import pytz + + self._tzinfo = pytz.timezone("America/New_York") + except ModuleNotFoundError: + # print(f"Can't import pytz: using UTC\n{str(e)}") + self._tzinfo = None + # + self._report_memory_usage = report_memory_usage + self._report_cpu_usage = report_cpu_usage + if self._report_memory_usage or self._report_cpu_usage: + import psutil + + self._process = psutil.Process() + if self._report_cpu_usage: + # Start sampling the CPU usage. + self._process.cpu_percent(interval=1.0) + + def format(self, record: logging.LogRecord) -> str: + # record = copy.copy(record) + # print(pprint.pformat(record.__dict__)) + # `record` looks like: + # {'args': (30,), + # 'created': 1642456725.5569131, + # 'exc_info': None, + # 'exc_text': None, + # 'filename': 'logging_main.py', + # 'funcName': 'test_logger', + # 'levelname': 'WARNING', + # 'levelno': 30, + # 'lineno': 105, + # 'module': 'logging_main', + # 'msecs': 556.9131374359131, + # 'msg': 'WARNING=%s', + # 'name': '__main__', + # 'pathname': 'helpers/logging_testing/logging_main.py', + # 'process': 16484, + # 'processName': 'MainProcess', + # 'relativeCreated': 29.956817626953125, + # 'stack_info': None, + # 'thread': 140250120021824, + # 'threadName': 'MainThread'} + msg = "" + # Add the wall clock time. + msg += self._get_wall_clock_time() + # Report memory usage, if needed. + # rss=0.240GB vms=1.407GB mem_pct=2% cpu=92% + if self._report_memory_usage: + msg_tmp = get_memory_usage_as_str(self._process) + # Escape the % to avoid confusing for a string to expand. + msg_tmp = msg_tmp.replace("%", "%%") + msg += " " + msg_tmp + # Report CPU usage, if needed. + if self._report_cpu_usage: + # CPU usage since the previous call. + msg_tmp = " cpu=%.0f" % self._process.cpu_percent(interval=None) + # Escape the % to avoid confusing for a string to expand. + msg_tmp += "%%" + msg += msg_tmp + # Get the (typically) simulated wall clock time. + import helpers.hwall_clock_time as hwacltim + + simulated_wall_clock_time = hwacltim.get_wall_clock_time() + if simulated_wall_clock_time is not None: + date_fmt = "%Y-%m-%d %I:%M:%S" + msg += " @ " + self._convert_time_to_string( + simulated_wall_clock_time, date_fmt + ) + # Colorize / shorten the logging level if it's not DEBUG. + if record.levelno != logging.DEBUG: + msg += f" - {self._colorize_level(record.levelname)}" + # Add information about which coroutine we are running in. + try: + asyncio.get_running_loop() + task = asyncio.current_task() + if task is not None: + msg += f" {task.get_name()}" + except (RuntimeError, AttributeError): + pass + # Add information about the caller. + # ``` + # /helpers/hunit_test.py setUp:932 + # ``` + # pathname = record.pathname.replace("/amp", "") + # msg += f" {pathname} {record.funcName}:{record.lineno}" + # ``` + # test_hlogging.py _print_time:28 + # ``` + msg += f" {record.filename} {record.funcName}:{record.lineno}" + # Indent. + if len(msg) < 50: + msg = "%-60s" % msg + else: + msg = "%-80s" % msg + # Add the caller string. + msg += f" {record.msg}" + record.msg = msg + return super().format(record) + + @staticmethod + def _get_date_format(date_format_mode: str) -> str: + if date_format_mode == "time": + date_fmt = "%H:%M:%S" + elif date_format_mode == "date_time": + date_fmt = "%m-%d_%H:%M" + elif date_format_mode == "date_timestamp": + date_fmt = "%Y-%m-%d %I:%M:%S %p" + else: + raise ValueError("Invalid date_format") + return date_fmt + + def _convert_time_to_string( + self, now: datetime.datetime, date_fmt: str + ) -> str: + # Convert it to an tz-aware datetime object in UTC time. + dt = now.replace(tzinfo=datetime.timezone.utc) + if self._tzinfo is not None: + # Convert it to desired timezone. + dt = dt.astimezone(self._tzinfo) + time_as_str = dt.strftime(date_fmt) + return time_as_str + + def _get_wall_clock_time(self) -> str: + dt = datetime.datetime.utcnow() + return self._convert_time_to_string(dt, self._date_fmt) + + def _colorize_level(self, level_name: str) -> str: + # Use white as default. + prefix = "\033[" + suffix = "\033[0m" + # Print stacktrace to debug. + if False: + import traceback + + txt = traceback.format_stack() + txt = "".join(txt) + print(txt) + + assert level_name in _COLOR_MAPPING, "Can't find info '%s'" + color_code, tag = _COLOR_MAPPING[level_name] + colored_level_name = f"{prefix}{color_code}m{tag}{suffix}" + return colored_level_name + + +def set_v2_formatter( + ch: Any, + root_logger: Any, + force_no_warning: bool, + force_print_format: bool, + force_verbose_format: bool, + report_memory_usage: bool, + report_cpu_usage: bool, +) -> Union[logging.Formatter, CustomFormatter]: + """ + See params in `init_logger()`. + """ + assert not (force_verbose_format and force_print_format), ( + f"Can't use both force_verbose_format={force_verbose_format} " + + f"and force_print_format={force_print_format}" + ) + # When running in a notebook make logging behave like a `print`. + verbose_format = True + if _is_running_in_ipynb(): + verbose_format = False + if not force_no_warning: + print("WARNING: Running in Jupyter") + # + if force_verbose_format: + verbose_format = True + if force_print_format: + verbose_format = False + # + if verbose_format: + # Force to report memory / CPU usage. + # report_memory_usage = report_cpu_usage = True + # print( + # "report_memory_usage=%s report_cpu_usage=%s" + # % (report_memory_usage, report_cpu_usage) + # ) + formatter: Union[logging.Formatter, CustomFormatter] = CustomFormatter( + report_memory_usage=report_memory_usage, + report_cpu_usage=report_cpu_usage, + ) + else: + # Make logging look like a normal `print()`. + log_format = "%(levelname)-5s %(message)s" + date_fmt = "" + formatter = logging.Formatter(log_format, datefmt=date_fmt) + ch.setFormatter(formatter) + root_logger.addHandler(ch) + return formatter + + +# TODO(gp): Not sure it works properly. +@contextlib.contextmanager +def set_level(logger: Any, level: int) -> None: + """ + Context manager changing the verbosity level. + """ + previous_level = logger.getEffectiveLevel() + try: + logger.setLevel(level) + yield + finally: + logger.setLevel(previous_level) + assert logger.getEffectiveLevel() == previous_level + + +# ############################################################################# + + +def getLogger(name: str) -> logging.Logger: + """ + Get logger with custom trace method support. + + This function provides the same functionality as `logging.getLogger()` + but with proper type hints that include the custom trace method. + + Usage: + ``` + # Instead of `import logging`. + import helpers.hlogging as hlogging + + _LOG = hlogging.getLogger(__name__) + _LOG.trace("This works without type checker errors") + _LOG.debug("Standard logging methods also work") + ``` + """ + return logging.getLogger(name) + + +def test_logger() -> None: + print("# Testing logger ...") + print("effective level=", _LOG.getEffectiveLevel()) + # + if hasattr(_LOG, "trace"): + if hasattr(logging, "TRACE"): + _LOG.trace("TRACE=%s", logging.TRACE) + else: + _LOG.trace("TRACE level not available") + # + _LOG.debug("DEBUG=%s", logging.DEBUG) + # + _LOG.info("INFO=%s", logging.INFO) + # + _LOG.warning("WARNING=%s", logging.WARNING) + # + _LOG.error("ERROR=%s", logging.ERROR) + # + _LOG.critical("CRITICAL=%s", logging.CRITICAL) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi new file mode 100644 index 000000000..993f9cc14 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi @@ -0,0 +1,14 @@ +""" +Type stub for hlogging module with custom Logger that includes trace method. +""" + +import logging +from typing import Any + +class Logger(logging.Logger): + """ + Custom Logger class that includes trace method. + """ + def trace(self, msg: str, *args: Any, **kwargs: Any) -> None: ... + +def getLogger(name: str) -> Logger: ... diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py new file mode 100644 index 000000000..07fe8d14f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py @@ -0,0 +1,18 @@ +""" +Import as: + +import helpers.hmarkdown as hmarkdo +""" + +from helpers.hmarkdown_bullets import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_coloring import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_comments import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_div_blocks import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_fenced_blocks import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_filtering import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_formatting import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_headers import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_rules import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_slides import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_tables import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_toc import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py new file mode 100644 index 000000000..0edb705a4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py @@ -0,0 +1,248 @@ +""" +Import as: + +import helpers.hmarkdown_bullets as hmarbull +""" + +import logging +import re +from typing import Generator, List, Tuple + +from helpers.hmarkdown_comments import ( + process_comment_block, + process_single_line_comment, +) + +_LOG = logging.getLogger(__name__) + +_TRACE = False + +# ############################################################################# +# Formatting markdown +# ############################################################################# + + +# These are the colors that are supported by Latex / markdown, are readable on +# white, and form an equidistant color palette. +_ALL_COLORS = [ + "red", + "orange", + "brown", + "olive", + "green", + "teal", + "cyan", + "blue", + "violet", + "darkgray", + "gray", +] + + +# TODO(gp): -> hmarkdown_color.py? +# TODO(gp): This seems the same as `_colorize_bullet_points()`. +def colorize_bold_text( + markdown_text: str, color_sequence: bool, *, use_abbreviations: bool = True +) -> str: + r""" + Add colors to bold text in markdown using equidistant colors from an array. + + The function finds all bold text (enclosed in ** or __) and adds + LaTeX color commands while preserving the rest of the markdown + unchanged. + + :param markdown_text: Input markdown text + :param color_sequence: Sequence of colors to use + :param use_abbreviations: Use LaTeX abbreviations for colors, + `\red{text}` instead of `\textcolor{red}{text}` + :return: Markdown text with colored bold sections + """ + # Remove any existing color formatting. + # Remove \color{text} format. + markdown_text = re.sub(r"\\[a-z]+\{([^}]+)\}", r"\1", markdown_text) + # Remove \textcolor{color}{text} format. + markdown_text = re.sub( + r"\\textcolor\{[^}]+\}\{([^}]+)\}", r"\1", markdown_text + ) + # Find all bold text (both ** and __ formats). + bold_pattern = r"\*\*(.*?)\*\*|__(.*?)__" + # matches will look like: + # For **text**: group(1)='text', group(2)=None. + # For __text__: group(1)=None, group(2)='text'. + matches = list(re.finditer(bold_pattern, markdown_text)) + if not matches: + return markdown_text + result = markdown_text + # Calculate color spacing to use equidistant colors. + if color_sequence == "equidistant": + color_step = len(_ALL_COLORS) / len(matches) + elif color_sequence == "fixed": + color_step = 1 + else: + raise ValueError(f"Invalid color sequence: {color_sequence}") + # Process matches in reverse to not mess up string indices. + for i, match in enumerate(reversed(matches)): + # Get the matched bold text (either ** or __ format). + bold_text = match.group(1) or match.group(2) + # Calculate `color_idx` using equidistant spacing. + color_idx = int((len(matches) - 1 - i) * color_step) % len(_ALL_COLORS) + color = _ALL_COLORS[color_idx] + # Create the colored version. + if use_abbreviations: + # E.g., \red{text} + colored_text = f"\\{color}{{{bold_text}}}" + else: + # E.g., \textcolor{red}{text} + colored_text = f"\\textcolor{{{color}}}{{{bold_text}}}" + # Apply bold. + colored_text = f"**{colored_text}**" + # Replace in the original text. + result = result[: match.start()] + colored_text + result[match.end() :] + return result + + +def remove_bullets(markdown_text: str) -> str: + """ + Remove bullet points (dashes) and leading spaces from markdown text. + + This function removes all leading dashes (`-`) from lines and removes + leading whitespace. Empty lines are preserved. + + :param markdown_text: Input markdown text + :return: Markdown text with bullets removed + """ + lines = markdown_text.split("\n") + result = [] + for line in lines: + # Check if line is not empty. + if line.strip(): + # Remove leading whitespace. + stripped_line = line.lstrip() + # Check if line starts with a bullet point. + if stripped_line.startswith("- "): + # Remove the bullet and the space after it. + result.append(stripped_line[2:]) + else: + # Keep the line as is (no leading whitespace). + result.append(stripped_line) + else: + # Preserve empty lines. + result.append("") + return "\n".join(result) + + +def format_first_level_bullets(markdown_text: str) -> str: + """ + Add empty lines only before first level bullets and remove all empty lines + from markdown text. + + :param markdown_text: Input markdown text + :return: Formatted markdown text + """ + # Split into lines and remove empty ones. + lines = [line for line in markdown_text.split("\n") if line.strip()] + # Add empty lines only before first level bullets. + result = [] + for i, line in enumerate(lines): + # Check if current line is a first level bullet (no indentation). + if re.match(r"^- ", line): + # Add empty line before first level bullet if not at start. + if i > 0: + result.append("") + result.append(line) + return "\n".join(result) + + +def process_code_block( + line: str, in_code_block: bool, i: int, lines: List[str] +) -> Tuple[bool, bool, List[str]]: + """ + Process lines of text to handle code blocks that start and end with '```'. + + The transformation is to: + - add an empty line before the start/end of the code + - indent the code block with four spaces + - replace '//' with '# ' to comment out lines in Python code + + :param line: The current line of text being processed. + :param in_code_block: A flag indicating if the function is currently + inside a code block. + :param i: The index of the current line in the list of lines. + :param lines: the lines of text to process + :return: tuple containing: + - `do_continue`: whether to continue processing the current line or skip + it + - `in_code_block`: boolean indicating whether the function is currently + inside a code block + - list of processed lines of text + """ + out: List[str] = [] + do_continue = False + # Look for a code block. + if re.match(r"^(\s*)```", line): + _LOG.debug(" -> code block") + in_code_block = not in_code_block + # Add empty line before the start of the code block. + if ( + in_code_block + and (i + 1 < len(lines)) + and re.match(r"\s*", lines[i + 1]) + ): + out.append("\n") + out.append(" " + line) + if ( + not in_code_block + and (i + 1 < len(lines)) + and re.match(r"\s*", lines[i + 1]) + ): + out.append("\n") + do_continue = True + return do_continue, in_code_block, out + if in_code_block: + line = line.replace("// ", "# ") + out.append(" " + line) + # We don't do any of the other post-processing. + do_continue = True + return do_continue, in_code_block, out + return do_continue, in_code_block, out + + +# TODO(gp): -> iterator +# TODO(gp): where is this used? +def process_lines(lines: List[str]) -> Generator[Tuple[int, str], None, None]: + """ + Process lines of text to handle comment blocks, code blocks, and single + line comments. + + :param lines: list of all the lines of text being processed + :return: generator of processed lines of text + """ + out: List[str] = [] + in_skip_block = False + in_code_block = False + for i, line in enumerate(lines): + _LOG.debug("%s:line=%s", i, line) + # 1) Remove comment block. + if _TRACE: + _LOG.debug("# 1) Process comment block.") + do_continue, in_skip_block = process_comment_block(line, in_skip_block) + if do_continue: + continue + # 2) Remove code block. + if _TRACE: + _LOG.debug("# 2) Process code block.") + do_continue, in_code_block, out_tmp = process_code_block( + line, in_code_block, i, lines + ) + out.extend(out_tmp) + if do_continue: + continue + # 3) Remove single line comment. + if _TRACE: + _LOG.debug("# 3) Process single line comment.") + do_continue = process_single_line_comment(line) + if do_continue: + continue + out.append(line) + # + yield from enumerate(out) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py new file mode 100644 index 000000000..ba7278726 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py @@ -0,0 +1,286 @@ +""" +Utilities for colorizing markdown and LaTeX text with color commands. + +Import as: + +import helpers.hmarkdown_coloring as hmarcolo +""" + +import logging +import re +from typing import Dict, List, Optional + +import helpers.hdbg as hdbg +from helpers.hmarkdown_fenced_blocks import ( + replace_fenced_blocks_with_tags, + replace_tags_with_fenced_blocks, +) +from helpers.hmarkdown_tables import ( + replace_tables_with_tags, + replace_tags_with_tables, +) + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Colorize +# ############################################################################# + +# Mapping of markdown color names to their LaTeX color equivalents for use in +# \textcolor{} commands. +_MD_COLORS_LATEX_MAPPING = { + "red": "red", + "orange": "orange", + "yellow": "yellow", + "lime": "lime", + "green": "darkgreen", + "teal": "teal", + "cyan": "cyan", + "blue": "blue", + "purple": "purple", + "violet": "violet", + "magenta": "magenta", + "pink": "pink", + "brown": "brown", + "olive": "olive", + "gray": "gray", + "darkgray": "darkgray", + "lightgray": "lightgray", + "black": "black", + "white": "white", +} + + +def get_md_colors_latex_mapping() -> Dict[str, str]: + """ + Get a copy of the markdown-to-LaTeX color mapping. + + :return: Dict mapping color names (e.g., 'red', 'blue') to LaTeX color names + """ + return dict(_MD_COLORS_LATEX_MAPPING) + + +# Curated list of colors that are visually distinguishable and work well in +# both markdown and LaTeX contexts (excludes ones which are too light or have +# poor contrast). +_MD_COLORS = [ + "red", + "orange", + # "yellow", + # "lime", + "green", + "teal", + "cyan", + "blue", + # "purple", + "violet", + "magenta", + # "pink", + "brown", + "olive", + "gray", + "darkgray", + # "lightgray", + "black", + # "white", +] + + +def get_md_colors() -> List[str]: + """ + Get a copy of the curated list of markdown colors. + + :return: List of color names suitable for colorizing markdown/LaTeX + """ + return list(_MD_COLORS) + + +def process_color_commands(in_line: str) -> str: + r""" + Transform color commands like `\red{xyz}` into valid LaTeX syntax. + + If the content is text (not math), wraps it in `\text{}`. + + E.g.: + - `\red{abc}` -> `\textcolor{red}{\text{abc}}` + - `\blue{x + y}` -> `\textcolor{blue}{x + y}` + + :param in_line: input line to process + :return: line with color commands transformed + """ + for md_color, latex_color in get_md_colors_latex_mapping().items(): + # This regex matches color commands like \red{content}, \blue{content}, + # etc. + pattern = re.compile( + rf""" + \\{md_color} # Match the color command (e.g., \red, \blue, etc.). + \{{ # Match the opening curly brace. + ([^}}]*) # Capture everything inside the curly braces. + \}} # Match the closing curly brace. + """, + re.VERBOSE, + ) + + def _replacement(match: re.Match, latex_color: str) -> str: + """ + Replace a color command with LaTeX \textcolor directive. + """ + content = match.group(1) + # Math expressions (containing operators, brackets, etc.) render + # directly; plain text needs \text{} wrapper for proper LaTeX rendering. + is_math_expr = any(c in content for c in "+-*/=<>{}[]()^_") + if is_math_expr: + ret = rf"\textcolor{{{latex_color}}}{{{content}}}" + else: + ret = rf"\textcolor{{{latex_color}}}{{\text{{{content}}}}}" + return ret + + # Replace the color command with the LaTeX color command. + in_line = re.sub( + pattern, lambda m: _replacement(m, latex_color), in_line + ) + return in_line + + +def has_color_command(text: str) -> bool: + """ + Check if text contains any color commands like `\\red{...}` or `\\blue{...}`. + + :param text: text to check + :return: True if text contains at least one color command + """ + hdbg.dassert_isinstance(text, str) + # hdbg.dassert_not_in("\n", line) + for color in _MD_COLORS_LATEX_MAPPING.keys(): + # This regex matches LaTeX color commands like \red{content}, + # \blue{content}, etc. + pattern = re.compile( + rf""" + \\{color} # Match the color command (e.g., \red, \blue, etc.). + \{{ # Match the opening curly brace. + ([^}}]*) # Capture everything inside the curly braces. + \}} # Match the closing curly brace. + """, + re.VERBOSE, + ) + if re.search(pattern, text): + return True + return False + + +# TODO(gp): -> List[str] +# TODO(gp): Use hmarkdown.process_lines() and test it. +def colorize_bullet_points_in_slide( + txt: str, + *, + use_abbreviations: bool = True, + interpolate_colors: bool = False, + all_md_colors: Optional[List[str]] = None, +) -> str: + r""" + Colorize bold markdown items `**text**` with color commands. + + Scans the text line-by-line for bold markdown items and wraps each in a + color command (e.g., `**\red{text}**`). Skips code blocks and tables to + preserve their formatting. Bold items are colored sequentially using the + provided color list. + + :param txt: Markdown text containing bold items to colorize + :param use_abbreviations: + - If True, use abbreviated color syntax (e.g., `\red{foo}`) + - If False, use full LaTeX syntax (e.g., `\textcolor{red}{foo}`) + :param interpolate_colors: + - If True, evenly space selected colors across all bold items + - If False, use a predefined sequence for common counts (1-4 items get + fixed color sets, more items cycle through all_md_colors) + :param all_md_colors: List of available colors to cycle through + - Default: curated list from `get_md_colors()` + :return: Markdown text with bold items wrapped in color commands + """ + hdbg.dassert_isinstance(txt, str) + if all_md_colors is None: + all_md_colors = list(get_md_colors()) + # Strip code blocks and tables to avoid colorizing content inside them. + lines = txt.split("\n") + lines, fence_map = replace_fenced_blocks_with_tags(lines) + _LOG.debug("Found %s fenced blocks", len(fence_map)) + lines, table_map = replace_tables_with_tags(lines) + _LOG.debug("Found %s tables", len(table_map)) + # Count bold markers (**) to determine how many bold items exist. + tot_bold = 0 + # Scan the text line by line and count how many bold items there are. + for line in lines: + # Count the number of bold items. + num_bold = len(re.findall(r"\*\*", line)) + tot_bold += num_bold + _LOG.debug("tot_bold=%s", tot_bold) + if tot_bold == 0: + return txt + # Divide by 2 since each bold item is wrapped with ** on both sides. + # hdbg.dassert_eq(tot_bold % 2, 0, "tot_bold=%s needs to be even", tot_bold) + num_bolds = tot_bold // 2 + + def _interpolate_colors(num_bolds: int) -> List[str]: + """ + Sample colors evenly spaced to cover all bold items distinctly. + """ + step = len(all_md_colors) // num_bolds + colors = list(all_md_colors)[::step][:num_bolds] + return colors + + if interpolate_colors: + colors = _interpolate_colors(num_bolds) + else: + # Use fixed color sequences for small numbers of bold items; for larger + # counts, cycle through the available colors. + if num_bolds == 1: + colors = ["red"] + elif num_bolds == 2: + colors = ["red", "blue"] + elif num_bolds == 3: + colors = ["red", "green", "blue"] + elif num_bolds == 4: + colors = ["red", "green", "blue", "violet"] + else: + colors = all_md_colors[:num_bolds] + _LOG.debug("colors=%s", colors) + hdbg.dassert_lte( + num_bolds, len(colors), "Number of bold items exceeds available colors" + ) + color_idx = 0 + txt_out = [] + for line in lines: + + def color_replacer(match: re.Match[str]) -> str: + """ + Replace strings like "**foo**" with strings like "**\red{foo}**". + """ + nonlocal color_idx + text = match.group(1) + hdbg.dassert_lte( + color_idx, + len(colors), + "Color index out of bounds; not enough colors assigned", + ) + color_to_use = colors[color_idx] + hdbg.dassert_in( + color_to_use, + get_md_colors_latex_mapping(), + "Selected color is not in the color mapping", + ) + latex_color = get_md_colors_latex_mapping()[color_to_use] + color_idx += 1 + if use_abbreviations: + ret = f"**\\{color_to_use}{{{text}}}**" + else: + ret = f"**\\textcolor{{{latex_color}}}{{{text}}}**" + return ret + + line = re.sub(r"\*\*([^*]+)\*\*", color_replacer, line) + txt_out.append(line) + # Restore code blocks and tables that were temporarily replaced with tags. + txt_out = replace_tags_with_fenced_blocks(txt_out, fence_map) + txt_out = replace_tags_with_tables(txt_out, table_map) + txt_out = "\n".join(txt_out) + return txt_out diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py new file mode 100644 index 000000000..5b626a15a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py @@ -0,0 +1,66 @@ +""" +Import as: + +import helpers.hmarkdown_comments as hmarcomm +""" + +import logging +import re +from typing import Tuple + +import helpers.hdbg as hdbg +from helpers.hmarkdown_headers import is_markdown_line_separator + +_LOG = logging.getLogger(__name__) + + +def process_single_line_comment(line: str) -> bool: + """ + Handle single line comment. + + We need to do it after the '//' in code blocks have been handled. + + :param line: line of text to process + :return: whether to continue processing the line or skip it + """ + do_continue = False + if line.startswith(r"%%") or line.startswith(r"//"): + do_continue = True + _LOG.debug(" -> do_continue=True") + return do_continue + # Skip frame. + if is_markdown_line_separator(line): + do_continue = True + _LOG.debug(" -> do_continue=True") + return do_continue + # Nothing to do. + return do_continue + + +def process_comment_block(line: str, in_skip_block: bool) -> Tuple[bool, bool]: + """ + Process lines of text to identify blocks that start with '' or '*/'. + + :param line: current line of text being processed + :param in_skip_block: flag indicating if the function is currently + inside a comment block + :return: tuple containing: + - `do_continue`: whether to continue processing the current line or skip + it + - `in_skip_block`: boolean indicating whether the function is currently + inside a comment block + """ + do_continue = False + if line.startswith(r"") or re.search(r"^\s*\*\/", line): + # End skipping comments. + in_skip_block = False + # Skip comment. + _LOG.debug(" -> skip") + do_continue = True + return do_continue, in_skip_block diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py new file mode 100644 index 000000000..169e06624 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py @@ -0,0 +1,132 @@ +""" +Utilities for handling div blocks in markdown files. + +This module provides functions to add and remove prettier-ignore comments +around div blocks in markdown files. + +Import as: + +import helpers.hmarkdown_div_blocks as hmadiblo +""" + +from typing import List, Tuple + + +def _split_lines_into_chunks( + lines: List[str], +) -> List[Tuple[bool, List[str]]]: + """ + Split lines into chunks of div blocks and non-div blocks. + + A div block starts with a line containing ::: and ends with another + line containing :::. + + :param lines: List of strings representing lines in a markdown file. + :return: List of tuples (is_div_block, chunk_lines) where is_div_block + indicates if the chunk is a div block. + """ + chunks = [] + i = 0 + while i < len(lines): + line = lines[i] + # Check if this line starts a div block. + if line.strip().startswith(":::"): + # Look ahead to find the closing div block. + j = i + 1 + while j < len(lines): + if lines[j].strip().startswith(":::"): + # Found the end of the div block. + chunk_lines = lines[i : j + 1] + chunks.append((True, chunk_lines)) + i = j + 1 + break + j += 1 + else: + # No closing div block found, treat as regular line. + chunks.append((False, [line])) + i += 1 + else: + # Start a non-div block chunk. + chunk_lines = [line] + i += 1 + # Continue collecting non-div lines. + while i < len(lines) and not lines[i].strip().startswith(":::"): + chunk_lines.append(lines[i]) + i += 1 + chunks.append((False, chunk_lines)) + return chunks + + +def add_prettier_ignore_to_div_blocks(lines: List[str]) -> List[str]: + """ + Add prettier-ignore comments around div blocks. + + A div block starts with a line containing ::: and has another line + with ::: following it. + + Examples of div blocks: + - :::: + ::::{.column width=40%} + - :::columns + ::::{.column width=60%} + - :::: + ::: + + :param lines: List of strings representing lines in a markdown file. + :return: List of strings with prettier-ignore comments added. + """ + # Step 1: Split into chunks. + chunks = _split_lines_into_chunks(lines) + # Step 2: Process chunks and add prettier-ignore comments. + result = [] + for is_div_block, chunk_lines in chunks: + if is_div_block: + # Add prettier-ignore comments around div blocks. + result.append("") + result.append("") + result.extend(chunk_lines) + result.append("") + result.append("") + else: + # Add non-div block lines as-is. + result.extend(chunk_lines) + return result + + +def remove_prettier_ignore_from_div_blocks(lines: List[str]) -> List[str]: + """ + Remove all prettier-ignore comments from lines. + + This function removes: + - lines + - lines + - Empty lines before prettier-ignore-start + - Empty lines after prettier-ignore-end + + :param lines: List of strings representing lines in a markdown file. + :return: List of strings with prettier-ignore comments removed. + """ + result = [] + i = 0 + while i < len(lines): + line = lines[i] + # Check if this is a prettier-ignore-start comment. + if line.strip() == "": + # Remove empty line before prettier-ignore-start if present. + if result and result[-1] == "": + result.pop() + # Skip the prettier-ignore-start line. + i += 1 + continue + # Check if this is a prettier-ignore-end comment. + if line.strip() == "": + # Skip the prettier-ignore-end line. + i += 1 + # Skip empty line after prettier-ignore-end if present. + if i < len(lines) and lines[i] == "": + i += 1 + continue + # Add all other lines. + result.append(line) + i += 1 + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py new file mode 100644 index 000000000..8d3614b9b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py @@ -0,0 +1,131 @@ +""" +Import as: + +import helpers.hmarkdown_fenced_blocks as hmafeblo +""" + +import logging +import pprint +import re +from typing import Dict, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Add a decorator like in hprint to process both strings and lists +# of strings. + + +def replace_fenced_blocks_with_tags( + lines: List[str], +) -> Tuple[List[str], Dict[str, str]]: + """ + Replace fenced blocks with a tag and return the mapping from tags to the + fenced block text. + + E.g., + ```` + hello + world + ```python + foo + ``` + bye + ```` + is replaced with: + ``` + hello + world + + bye + ``` + + :param lines: list of lines to process + :return: tuple containing: + - list of lines with the fenced blocks replaced by tags + - mapping from tags to the fenced block text + """ + hdbg.dassert_isinstance(lines, list) + result = [] + # True if we are inside a fenced block. + in_fenced_block = False + # Count the number of fenced blocks found. + fenced_block_count = 0 + # Store the mapping between the block number and the fence type. + fence_map = {} + # Store the text of the fenced block. + fence_depth = 0 + fence_text = [] + for i, line in enumerate(lines): + _LOG.debug("%d:line='%s'", i, line) + _LOG.debug( + " " + + hprint.to_str("fenced_block_count in_fenced_block fence_depth") + ) + # Look for the start of a fenced block. + fence_match = re.match(r"^\s*(`{3,})", line) + if fence_match: + _LOG.debug(" -> fence_match") + curr_fence_depth = len(fence_match.group(0)) + if not in_fenced_block: + # Start of a fenced block. + _LOG.debug(" -> start of fenced block") + in_fenced_block = True + fence_depth = curr_fence_depth + fenced_block_count += 1 + fence_text.append(line) + else: + # We are already in a fenced block. + fence_text.append(line) + if curr_fence_depth == fence_depth: + # End of block found. + _LOG.debug(" -> end of fenced block") + in_fenced_block = False + # Replace nested code block markers with tag. + result.append(f"") + fence_map[str(fenced_block_count)] = "\n".join(fence_text) + _LOG.debug(" -> added to fence_map") + # Reset state. + fence_depth = 0 + fence_text = [] + else: + if in_fenced_block: + _LOG.debug(" -> in_fenced_block") + fence_text.append(line) + else: + result.append(line) + return result, fence_map + + +def replace_tags_with_fenced_blocks( + lines: List[str], fence_map: Dict[str, str] +) -> List[str]: + """ + Replace tags with fenced blocks. + + :param lines: list of lines to process + :param fence_map: mapping from tags to fenced block text + :return: list of lines with tags replaced by fenced blocks + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_isinstance(fence_map, dict) + result = [] + for line in lines: + if line.startswith("")[0] + hdbg.dassert_in(tag, fence_map, "Found unmatched tag %s", tag) + result.append(fence_map[tag]) + del fence_map[tag] + else: + result.append(line) + hdbg.dassert_eq( + len(fence_map), + 0, + "Found %s unmatched tags:\n%s", + len(fence_map), + pprint.pformat(fence_map), + ) + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py new file mode 100644 index 000000000..666c3d03b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py @@ -0,0 +1,109 @@ +""" +Import as: + +import helpers.hmarkdown_filtering as hmarfilt +""" + +import logging +import re +from typing import List, Tuple + +import helpers.hdbg as hdbg +from helpers.hmarkdown_headers import ( + extract_section_from_markdown, +) +from helpers.hmarkdown_slides import extract_slides_from_markdown + +_LOG = logging.getLogger(__name__) + + +def filter_by_header(lines: List[str], header: str) -> List[str]: + """ + Extract a specific header from markdown text. + + :param lines: list of markdown lines to be processed + :param header: header to filter by (e.g., `# Introduction`) + :return: filtered lines + """ + hdbg.dassert_isinstance(lines, list) + # Filter by header. + txt_lines = extract_section_from_markdown(lines, header) + hdbg.dassert_isinstance(txt_lines, list) + return txt_lines + + +def _parse_range(range_as_str: str, max_value: int) -> Tuple[int, int]: + """ + Parse a 0-indexed range string like '0:10' into start and end indices. + + :param range_as_str: string in format 'start:end' where start/end + can be numbers or 'None' (None means 0 for start, max_value for end) + :param max_value: maximum value to use when 'None' is specified for end + :return: tuple of '(start_index, end_index)' as 0-indexed integers + """ + m = re.match(r"^(\S+):(\S+)$", range_as_str) + hdbg.dassert(m, "Invalid range_as_str='%s'", range_as_str) + assert m is not None + start_value, end_value = m.groups() + if start_value.lower() == "none": + start_value = 0 + else: + start_value = int(start_value) + if end_value.lower() == "none": + end_value = max_value + else: + end_value = int(end_value) + return start_value, end_value + + +def filter_by_lines(lines: List[str], filter_by_lines: str) -> List[str]: + """ + Filter the lines of text in `[start_line, end_line[` (0-indexed). + + :param lines: list of lines to be processed + :param filter_by_lines: 0-indexed range string like `0:10`, `0:None`, or `None:10` + :return: filtered lines + """ + hdbg.dassert_isinstance(lines, list) + start_line, end_line = _parse_range(filter_by_lines, len(lines)) + hdbg.dassert_lte(start_line, end_line) + txt = lines[start_line:end_line] + _LOG.warning( + "filter_by_lines='%s' -> lines=[%s:%s]", + filter_by_lines, + start_line, + end_line, + ) + hdbg.dassert_isinstance(txt, list) + return txt + + +def filter_by_slides(lines: List[str], filter_by_slides: str) -> List[str]: + """ + Filter the lines of text in `[start_slide, end_slide[` (0-indexed). + + :param lines: list of lines to be processed + :param filter_by_slides: 0-indexed range string like `0:10`, `0:None`, or `None:10` + :return: filtered lines + """ + hdbg.dassert_isinstance(lines, list) + slides_info, last_line_number = extract_slides_from_markdown(lines) + _LOG.debug("slides_info=%s\n%s", len(slides_info), slides_info) + start_slide, end_slide = _parse_range(filter_by_slides, len(slides_info)) + _LOG.debug("start_slide=%s, end_slide=%s", start_slide, end_slide) + hdbg.dassert_lte(start_slide, end_slide) + hdbg.dassert_lte(end_slide, len(slides_info)) + start_line = slides_info[start_slide].line_number + if end_slide == len(slides_info): + end_line = last_line_number + else: + end_line = slides_info[end_slide].line_number + _LOG.warning( + "filter_by_slides='%s' -> lines=[%s:%s]", + filter_by_slides, + start_line, + end_line, + ) + txt = lines[start_line - 1 : end_line - 1] + hdbg.dassert_isinstance(txt, list) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py new file mode 100644 index 000000000..f3fd1b4a9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py @@ -0,0 +1,530 @@ +""" +Import as: + +import helpers.hmarkdown_formatting as hmarform +""" + +import logging +import re +from typing import List + +import helpers.hdbg as hdbg +import helpers.hmarkdown_headers as hmarhead +import helpers.hmarkdown_slides as hmarslid +import dev_scripts_helpers.dockerize.lib_prettier as dshdlipr + +_LOG = logging.getLogger(__name__) + + +def remove_end_of_line_periods(lines: List[str]) -> List[str]: + """ + Remove periods at the end of each line in the given text. + + :param lines: list of input lines to process + :return: lines with end-of-line periods removed + """ + hdbg.dassert_isinstance(lines, list) + txt_out = [line.rstrip(".") for line in lines] + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +def remove_empty_lines(lines: List[str]) -> List[str]: + """ + Remove empty lines from the given text. + + :param lines: list of input lines to process + :return: lines with empty lines removed + """ + hdbg.dassert_isinstance(lines, list) + txt_out = [line for line in lines if line != ""] + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +# def remove_gdoc_artifacts(lines: List[str]) -> List[str]: +# """ +# Remove empty lines from the given text. + +# :param lines: list of input lines to process +# :return: lines with empty lines removed +# """ +# hdbg.dassert_isinstance(lines, list) +# # Remove “” and …. +# lines = re.sub(r"“", '"', lines) +# lines = re.sub(r"”", '"', lines) +# lines = re.sub(r"’", "'", lines) +# lines = re.sub(r"…", "", lines) +# hdbg.dassert_isinstance(lines, list) +# return lines + + +# TODO(gp): Add tests. +def remove_code_delimiters(lines: List[str]) -> List[str]: + """ + Remove ```python and ``` delimiters from a given text. + + :param lines: list of input lines containing code delimiters + :return: lines with the code delimiters removed + """ + hdbg.dassert_isinstance(lines, list) + # Join lines back to text, apply regex logic, then split again. + txt = "\n".join(lines) + # Replace the ```python and ``` delimiters with empty strings. + txt_out = txt.replace("```python", "").replace("```", "") + txt_out = txt_out.strip() + # Remove the numbers at the beginning of the line, if needed + # E.g., `3: """` -> `"""`. + txt_out = re.sub(r"(^\d+: )", "", txt_out, flags=re.MULTILINE) + # Split back into lines. + result = txt_out.split("\n") if txt_out else [] + hdbg.dassert_isinstance(result, list) + return result + + +def add_line_numbers(lines: List[str]) -> List[str]: + """ + Add line numbers to each line of text. + + :param lines: list of input lines to process + :return: lines with line numbers added + """ + hdbg.dassert_isinstance(lines, list) + numbered_lines = [] + for i, line in enumerate(lines, 1): + numbered_lines.append(f"{i}: {line}") + hdbg.dassert_isinstance(numbered_lines, list) + return numbered_lines + + +def remove_formatting(txt: str) -> str: + """ + Remove markdown and LaTeX formatting from text. + + :param txt: input text to process + :return: text with formatting removed + """ + # Replace bold markdown syntax with plain text. + txt = re.sub(r"\*\*(.*?)\*\*", r"\1", txt) + # Replace italic markdown syntax with plain text. + txt = re.sub(r"\*(.*?)\*", r"\1", txt) + # Remove \textcolor{red}{ ... }. + txt = re.sub(r"\\textcolor\{(.*?)\}\{(.*?)\}", r"\2", txt) + # Remove \red{ ... }. + txt = re.sub(r"\\\S+\{(.*?)\}", r"\1", txt) + return txt + + +def md_clean_up(txt: str) -> str: + """ + Clean up a Markdown file copy-pasted from Google Docs, ChatGPT. + + :param txt: input text to process + :return: text with the cleaning up applied + """ + # 0) General formatting. + # Remove dot at the end of each line. + txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) + # 1) ChatGPT formatting. + # E.g.,`` • Description Logics (DLs) are a family`` + # Replace `•` with `-` + txt = re.sub(r"•\s+", r"- ", txt) + # Replace `\t` with 2 spaces + txt = re.sub(r"\t", r" ", txt) + # Remove `⋅`. + txt = re.sub(r"⸻", r"", txt) + # “ + txt = re.sub(r"“", r'"', txt) + # ” + txt = re.sub(r"”", r'"', txt) + # ’ + txt = re.sub(r"’", r"'", txt) + # … + txt = re.sub(r"…", r"...", txt) + # 2) Latex formatting. + # Replace \( ... \) math syntax with $ ... $. + txt = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", txt) + # Replace \[ ... \] math syntax with $$ ... $$, handling multiline equations. + txt = re.sub(r"\\\[(.*?)\\\]", r"$$\1$$", txt, flags=re.DOTALL) + # Replace `P(.)`` with `\Pr(.)`. + txt = re.sub(r"P\((.*?)\)", r"\\Pr(\1)", txt) + # + txt = re.sub(r"\\left\[", r"[", txt) + txt = re.sub(r"\\right\]", r"]", txt) + # + txt = re.sub(r"\\mid", r"|", txt) + # + txt = re.sub(r"→", r"$\\rightarrow$", txt) + # Remove empty spaces at beginning / end of Latex equations $...$. + # E.g., $ \text{Student} $ becomes $\text{Student}$ + # txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) + # Transform `Example: Training a deep` into `E.g., training a deep`, + # converting the word after `Example:` to lower case. + txt = re.sub(r"\bExample:", "E.g.,", txt) + txt = re.sub(r"\bE.g.,\s+(\w)", lambda m: "E.g., " + m.group(1).lower(), txt) + return txt + + +def remove_empty_lines_from_markdown(lines: List[str]) -> List[str]: + """ + Remove all empty lines from markdown text. + + :param lines: list of input markdown lines + :return: formatted markdown lines + """ + hdbg.dassert_isinstance(lines, list) + # Remove empty lines. + result = [line for line in lines if line.strip()] + hdbg.dassert_isinstance(result, list) + return result + + +def prettier_markdown(txt: str) -> str: + """ + Format markdown text using `prettier`. + + :param txt: input text to format + :return: formatted text + """ + file_type = "md" + txt = dshdlipr.prettier_on_str(txt, file_type) + return txt + + +def format_markdown(txt: str) -> str: + """ + Format markdown text. + + :param txt: input text to format + :return: formatted text + """ + file_type = "md" + txt = dshdlipr.prettier_on_str(txt, file_type) + lines = txt.split("\n") + clean_lines = remove_empty_lines_from_markdown(lines) + txt = "\n".join(clean_lines) + return txt + + +def bold_first_level_bullets( + lines: List[str], *, max_length: int = 30 +) -> List[str]: + """ + Make first-level bullets bold in markdown text. + + :param lines: list of input markdown lines + :param max_length: max length of the bullet text to be bolded. The + value '-1' means no limit + :return: formatted markdown lines with first-level bullets in bold + """ + hdbg.dassert_isinstance(lines, list) + result = [] + for line in lines: + # Check if this is a first-level bullet point. + if re.match(r"^\s*- ", line): + # Check if the line has already bold text it in it. + if not re.search(r"\*\*", line): + # Bold first-level bullets. + indentation = len(line) - len(line.lstrip()) + if indentation == 0: + # First-level bullet, add bold markers. + m = re.match(r"^(\s*-\s+)(.*)", line) + hdbg.dassert(m, "Can't parse line='%s'", line) + bullet_text = m.group(2) # type: ignore[union-attr] + if max_length > -1 and len(bullet_text) <= max_length: + spaces = m.group(1) # type: ignore[union-attr] + line = spaces + "**" + bullet_text + "**" + result.append(line) + hdbg.dassert_isinstance(result, list) + return result + + +def format_figures(lines: List[str]) -> List[str]: + """ + Convert markdown slides with figures to use fenced div syntax with column + layout. + + If the input already uses column format or contains no figures, + returns unchanged. + + :param lines: list of input markdown lines + :return: formatted markdown lines with figures in column layout + """ + hdbg.dassert_isinstance(lines, list) + # Check if already in column format. + text = "\n".join(lines) + if "::: columns" in text and ":::: {.column" in text: + return lines + # Find first figure line to split content. + first_figure_idx = -1 + for i, line in enumerate(lines): + if re.match(r"^\s*!\[.*\]\(.*\)\s*$", line.strip()): + first_figure_idx = i + break + # If no figures found, return original lines unchanged. + if first_figure_idx == -1: + return lines + # Split content: slide titles (lines starting with *) stay outside columns, + # other content before first figure goes to left column, + # everything from first figure onwards goes to right column. + pre_figure_lines = lines[:first_figure_idx] + figure_content = lines[first_figure_idx:] + # Separate slide titles from other content + slide_titles = [] + text_lines = [] + for line in pre_figure_lines: + if line.strip().startswith("*"): + slide_titles.append(line) + else: + text_lines.append(line) + # Remove empty lines at the beginning and end of text_lines. + while text_lines and not text_lines[0].strip(): + text_lines.pop(0) + while text_lines and not text_lines[-1].strip(): + text_lines.pop() + # Build the column format. + result = [] + # Add slide titles first (outside columns) + result.extend(slide_titles) + result.append("::: columns") + result.append(":::: {.column width=65%}") + result.extend(text_lines) + result.append("::::") + result.append(":::: {.column width=40%}") + result.append("") + result.extend(figure_content) + result.append("::::") + result.append(":::") + hdbg.dassert_isinstance(result, list) + return result + + +def format_md_links_to_latex_format(lines: List[str]) -> List[str]: + r""" + Convert markdown links to formatted links with LaTeX styling. + + Convert markdown links: + - Plain URLs: + http://... or https://... + to the format: + [\textcolor{blue}{\underline{URL}}](URL) + + - Existing formatted links: + [Text](URL) + to the format: + [\textcolor{blue}{\underline{Text}}](URL) + + - Email links: + [](email@domain.com) or [](http://...) or [](https://...) + to the format: + [\textcolor{blue}{\underline{URL}}](URL) + + - Picture links + ![](lectures_source/.../lec_4_1_slide_5_image_1.png) + are left untouched + + :param lines: list of input markdown lines + :return: formatted markdown lines with styled links + """ + hdbg.dassert_isinstance(lines, list) + result = [] + # URL regex pattern. + url_pattern = r"https?://[^\s)}\]`]+" + # Pattern for URLs in backticks. + backtick_url_pattern = r"`(https?://[^\s`]+)`" + # Pattern for existing formatted links that need normalization. + # This matches [\textcolor{blue}{\underline{Text}}](URL) where Text != URL. + formatted_link_pattern = ( + r"\[\\textcolor\{blue\}\{\\underline\{([^}]+)\}\}\]\((https?://[^)]+)\)" + ) + # Pattern for markdown links: [Text](URL). + # Matches text that can include escaped underscores (\_ ). + markdown_link_pattern = r"\[((?:[^\]\\]|\\[_])+)\]\((https?://[^\)]+)\)" + # Pattern for email links: [email@domain.com](email@domain.com). + email_link_pattern = r"\[([^\]\\]+@[^\]\\]+)\]\(([^)]+@[^)]+)\)" + # Pattern for empty bracket links: [](URL) or [](email). + empty_bracket_pattern = r"\[\]\(([^\)]+)\)" + # Pattern for image links: ![...](...). + image_link_pattern = r"!\[.*?\]\([^\)]+\)" + for line in lines: + # Process the line for all URL patterns. + processed_line = line + # Store image links temporarily to avoid processing them. + image_placeholders = [] + + def store_image_link(match): + placeholder = f"__IMAGE_LINK_{len(image_placeholders)}__" + image_placeholders.append(match.group(0)) + return placeholder + + processed_line = re.sub( + image_link_pattern, store_image_link, processed_line + ) + + # Convert empty bracket links [](URL) or [](email). + def convert_empty_bracket_link(match): + target = match.group(1) + return rf"[\textcolor{{blue}}{{\underline{{{target}}}}}]({target})" + + processed_line = re.sub( + empty_bracket_pattern, convert_empty_bracket_link, processed_line + ) + + # Convert URLs in backticks. + def convert_backtick_url(match): + url = match.group(1) + return rf"[\textcolor{{blue}}{{\underline{{{url}}}}}]({url})" + + processed_line = re.sub( + backtick_url_pattern, convert_backtick_url, processed_line + ) + + # Normalize existing formatted links to keep existing display text. + def normalize_formatted_link(match): + text = match.group(1) + url = match.group(2) + return rf"[\textcolor{{blue}}{{\underline{{{text}}}}}]({url})" + + processed_line = re.sub( + formatted_link_pattern, normalize_formatted_link, processed_line + ) + + # Convert markdown links [Text](URL) to formatted links. + def convert_markdown_link(match): + text = match.group(1) + url = match.group(2) + return rf"[\textcolor{{blue}}{{\underline{{{text}}}}}]({url})" + + processed_line = re.sub( + markdown_link_pattern, convert_markdown_link, processed_line + ) + + # Convert email links [email@domain.com](email@domain.com) to formatted links. + def convert_email_link(match): + email = match.group(2) + return rf"[\textcolor{{blue}}{{\underline{{{email}}}}}]({email})" + + processed_line = re.sub( + email_link_pattern, convert_email_link, processed_line + ) + # Convert plain URLs (but avoid converting URLs that are already part + # of formatted links). + # First, temporarily replace formatted links to avoid interfering with + # them. + temp_placeholders = [] + # Store existing correctly formatted links temporarily. + correct_formatted_link_pattern = ( + r"\[\\textcolor\{blue\}\{\\underline\{([^}]+)\}\}\]\(([^)]+)\)" + ) + + def store_formatted_link(match): + placeholder = f"__FORMATTED_LINK_{len(temp_placeholders)}__" + temp_placeholders.append(match.group(0)) + return placeholder + + temp_line = re.sub( + correct_formatted_link_pattern, store_formatted_link, processed_line + ) + + # Convert remaining plain URLs. + def convert_plain_url(match): + url = match.group(0) + return rf"[\textcolor{{blue}}{{\underline{{{url}}}}}]({url})" + + temp_line = re.sub(url_pattern, convert_plain_url, temp_line) + # Restore formatted links. + for i, placeholder in enumerate(temp_placeholders): + temp_line = temp_line.replace(f"__FORMATTED_LINK_{i}__", placeholder) + # Restore image links. + for i, image_link in enumerate(image_placeholders): + temp_line = temp_line.replace(f"__IMAGE_LINK_{i}__", image_link) + result.append(temp_line) + hdbg.dassert_isinstance(result, list) + return result + + +# TODO(gp): -> format_first_level_bullets_in_slide +def format_first_level_bullets(lines: List[str]) -> List[str]: + """ + Add empty lines to separate first level bullets and remove all remaining + empty lines. + + This is the formatting we use in the slides. + + :param lines: list of input markdown lines + :return: formatted markdown lines + """ + hdbg.dassert_isinstance(lines, list) + # Remove empty lines. + lines_clean = [line for line in lines if line.strip()] + # Handle special case: if input was only empty lines, preserve structure. + if not lines_clean and lines: + return lines + # Add empty lines only before first level bullets. + result = [] + for i, line in enumerate(lines_clean): + # Check if current line is a first level bullet (no indentation). + if re.match(r"^- ", line): + # Add empty line before first level bullet if not at start. + if i > 0: + result.append("") + result.append(line) + hdbg.dassert_isinstance(result, list) + return result + + +# TODO(gp): Implement and add tests. +def format_column_blocks(lines: List[str]) -> List[str]: + """ + # Make sure that there is a single empty line before and after the following + # block: + # + # 1) + # ``` + # ::: columns + # :::: {.column width=55%} + # ``` + # 2) + # ``` + # :::: + # :::: {.column width=40%} + # ``` + # 3) + # ``` + # :::: + # ::: + # ``` + + # + """ + return lines + + +def format_markdown_slide(lines: List[str]) -> List[str]: + """ + Format markdown text for a slide. + + :param lines: input lines to format + :return: formatted slide text + """ + hdbg.dassert_isinstance(lines, list) + if False: + lines = bold_first_level_bullets(lines) + txt = "\n".join(lines) + # Format the markdown slides. + # TODO(gp): Maybe the conversion should be done inside `prettier_on_str` + # passing a marker to indicate that the text is a slide. + lines = hmarslid.convert_slide_to_markdown(lines) + # lines = format_column_blocks() + # + file_type = "md" + txt = "\n".join(lines) + txt = dshdlipr.prettier_on_str(txt, file_type) + # + lines = txt.split("\n") + lines = hmarslid.convert_markdown_to_slide(lines) + # Format the first level bullets. + lines = format_first_level_bullets(lines) + # + lines = hmarhead.capitalize_header(lines) + return lines diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py new file mode 100644 index 000000000..532de2aee --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py @@ -0,0 +1,841 @@ +""" +Import as: + +import helpers.hmarkdown_headers as hmarhead +""" + +import dataclasses +import logging +import re +from typing import List, Optional, Tuple, cast + +import helpers.hdbg as hdbg +import helpers.hparser as hparser +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + +_TRACE = False + + +def is_markdown_line_separator(line: str, *, min_repeats: int = 5) -> bool: + """ + Check if the given line is a Markdown separator. + + This function determines if a line consists of repeated characters + (`#`, `/`, `-`, `=`) that would indicate a markdown separator. + + :param line: current line of text being processed + :param min_repeats: minimum number of times the characters have to + be repeated to be considered a separator, e.g., if `min_repeats + = 2`, then `##`, `###`, `//` are considered to be line + separators, but `#`, `/` are not + :return: whether the line is a separator + """ + separator_pattern = rf""" + \#*\s* # Optional leading `#` and whitespace. + ([#/=\-])\1{{{min_repeats - 1},}} # Capture a character, then repeat it + # (`min_repeats` - 1) times. + \s*$ # Match only whitespace characters + # until the end of the line. + """ + res = bool(re.match(separator_pattern, line, re.VERBOSE)) + return res + + +def is_header(line: str) -> Tuple[bool, int, str]: + """ + Check if the given line is a Markdown header. + + :param line: line to check + :return: tuple containing: + - boolean indicating if the line is a header + - level of the header (`0` if not a header) + - title of the header (empty string if not a header) + """ + # hdbg.dassert(not is_markdown_line_separator(line), "line='%s'", line) + m = re.match(r"(#+)\s+(.*)", line) + is_header_ = bool(m) + if m: + level = len(m.group(1)) + title = m.group(2) + else: + level = 0 + title = "" + return is_header_, level, title + + +# ############################################################################# +# Frame chapters +# ############################################################################# + + +def _has_internal_capitals(word: str) -> bool: + """ + Check if a word has capital letters within it (not just at the start). + + This function detects words like `SimpleFeedForward`, `DeepNPTS` that + should be preserved without title case transformation. + + Note: uppercase letters immediately after an apostrophe are excluded + from this check, since they are not "internal capitals" but rather + normal English capitalization (e.g., "Won'T" has a capital T that is + not internal but rather a grammatical artifact of title case tools). + + :param word: word to check + :return: `True` if the word has internal capitals, `False` otherwise + """ + hdbg.dassert_isinstance(word, str) + # A word has internal capitals if it contains at least one uppercase letter + # after the first character, excluding uppercase letters immediately after + # an apostrophe. + if len(word) <= 1: + return False + for i in range(1, len(word)): + if word[i].isupper() and word[i - 1] != "'": + return True + return False + + +def frame_chapters(lines: List[str], *, max_lev: int = 4) -> List[str]: + """ + Add the frame around each chapter. + """ + hdbg.dassert_isinstance(lines, list) + txt_new: List[str] = [] + # _LOG.debug("lines=%s", lines) + for i, line in enumerate(lines): + _LOG.debug("line=%d:%s", i, line) + m = re.match(r"^(\#+) ", line) + txt_processed = False + if m: + comment = m.group(1) + lev = len(comment) + _LOG.debug(" -> lev=%s", lev) + if lev < max_lev: + sep = comment + " " + "#" * (80 - 1 - len(comment)) + txt_new.append(sep) + txt_new.append(line) + txt_new.append(sep) + txt_processed = True + else: + _LOG.debug( + " -> Skip formatting the chapter frame: lev=%d, max_lev=%d", + lev, + max_lev, + ) + if not txt_processed: + txt_new.append(line) + hdbg.dassert_isinstance(txt_new, list) + return txt_new + + +def has_mixed_case(word: str) -> bool: + """ + Check if a word has capital letters in positions other than the first. + + This detects words like "SimpleFeedForward", "DeepNPTS", etc. that should + be preserved as-is. + + :param word: word to check + :return: True if the word has capital letters after the first position + """ + if len(word) <= 1: + return False + # Check if any character after the first position is uppercase. + return any(c.isupper() for c in word[1:]) + + +def _capitalize_title_word(word: str) -> str: + """ + Capitalize the first letter of a word without capitalizing after apostrophes. + + Python's `str.title()` capitalizes the first letter after ANY non-alphanumeric + character, including apostrophes. For example, `"won't".title()` returns + `"Won'T"` instead of the expected `"Won't"`. + + This function instead capitalizes only the first letter of the word and + lowercases any uppercase letters that follow an apostrophe. + + :param word: word to capitalize + :return: word with proper title case (first letter capitalized, no capitals + after apostrophes) + """ + if not word: + return word + chars = list(word) + chars[0] = chars[0].upper() + for i in range(1, len(chars)): + if chars[i - 1] == "'": + chars[i] = chars[i].lower() + return "".join(chars) + + +def capitalize_header(lines: List[str]) -> List[str]: + """ + Improve the header and slide titles. + + - Headers start with one or more `#`s + - Slide titles start with one `*` + + - The title is transformed to title case as below: + - ML theory -> ML Theory + - A map of machine learning -> A Map of Machine Learning + - Business strategists -> + Business Strategists + - Establish a phased, collaborative approach -> + Establish a Phased, Collaborative Approach + + - Strings inside backticks, single quotes, and double quotes are preserved, + with careful handling to avoid matching apostrophes in contractions. + - Words with internal capital letters are preserved (e.g., SimpleFeedForward, + DeepNPTS). + - Contractions and words with apostrophes are properly capitalized + (e.g., "won't" becomes "Won't", not "Won'T"). + - Headers inside fenced code blocks are not processed. + """ + import helpers.hmarkdown_fenced_blocks as hmafeblo + + hdbg.dassert_isinstance(lines, list) + # Replace fenced blocks with tags to prevent processing headers inside them. + lines_without_fenced, fence_map = hmafeblo.replace_fenced_blocks_with_tags( + lines + ) + txt_new: List[str] = [] + for i, line in enumerate(lines_without_fenced): + # Parse header (starting with `#`) and slide title (starting with `*`). + m = re.match(r"^(\#+|\*) (.*)$", line) + if m: + # Parse the title. + title = m.group(2) + # Transform to title case, leaving words that are all capitalized + # and conjunctions as is, while preserving quoted strings. + non_cap_words = { + "a", + "an", + "and", + "as", + "at", + "but", + "by", + "for", + "in", + "of", + "on", + "or", + "the", + "to", + "vs", + "with", + } + # Find and temporarily replace quoted strings to preserve them. + quoted_strings = [] + placeholders = [] + # Pattern to match strings inside backticks, single quotes, or double quotes. + # Single quotes are matched only when not preceded or followed by word + # characters, to avoid matching apostrophes in contractions like "don't". + # Backtick and double-quote patterns are simpler since they're less likely + # to be used in natural text. + quote_pattern = r""" + ( # Start of alternation + `[^`]*` # Backtick-quoted string + | # OR + (? str: + quoted_strings.append(match.group(0)) + placeholder = f"__QUOTED_{len(quoted_strings) - 1}__" + placeholders.append(placeholder) + return placeholder + + # Replace quoted strings with placeholders. + title_with_placeholders = re.sub( + quote_pattern, replace_quoted, title, flags=re.VERBOSE + ) + # Split into words. + words = title_with_placeholders.split() + # Find the first non-numeric word index to always capitalize it, + # even if it's in non_cap_words (e.g., "4.4 the Victim" -> "4.4 The Victim"). + first_text_word_idx = None + for j, word in enumerate(words): + if word.startswith("__QUOTED_") and word.endswith("__"): + continue + # Skip numeric/punctuation-only prefixes like "4.4", "1.", "1.2.3". + if not re.match(r"^[\d\.\-]+$", word): + first_text_word_idx = j + break + # If all words are numeric, fall back to index 0. + if first_text_word_idx is None and words: + first_text_word_idx = 0 + # Process each word. + for i, word in enumerate(words): + if word.startswith("__QUOTED_") and word.endswith("__"): + # Skip placeholder words, they will be restored later. + continue + elif i == first_text_word_idx and not word.isupper(): + # Capitalize the first text word (may follow numeric prefix + # like "4.4") even if it's in non_cap_words. + if _has_internal_capitals(word): + # Preserve words with internal capitals. + pass + else: + words[i] = _capitalize_title_word(word) + elif word.isupper(): + # Skip words that are all caps (e.g. ML, API). + continue + elif _has_internal_capitals(word): + # Preserve words with internal capitals (e.g., SimpleFeedForward). + pass + elif word.lower() in non_cap_words: + # Don't capitalize conjunctions and other minor words. + words[i] = word.lower() + else: + # Capitalize other words. + words[i] = _capitalize_title_word(word) + title = " ".join(words) + # Restore quoted strings. + for i, placeholder in enumerate(placeholders): + title = title.replace(placeholder, quoted_strings[i]) + # Reconstruct the line. + line = m.group(1) + " " + title + txt_new.append(line) + else: + txt_new.append(line) + # Restore fenced blocks. + txt_new = hmafeblo.replace_tags_with_fenced_blocks(txt_new, fence_map) + hdbg.dassert_isinstance(txt_new, list) + return txt_new + + +# ############################################################################# +# Header processing +# ############################################################################# + + +# TODO(gp): This could be done by processing `HeaderList`. +def extract_section_from_markdown( + lines: List[str], header_name: str +) -> List[str]: + """ + Extract a section of text from a Markdown document based on the header + name. + + The function identifies a section by locating the specified header + and captures all lines until encountering another header of the same + or higher level. Headers are identified by the '#' prefix, and their + level is determined by the number of '#' characters. + + :param lines: markdown content as a list of strings + :param header_name: exact header name to extract (excluding `#` + symbols) + :return: extracted section as a list of strings, including the header line + itself and all lines until the next header of the same or higher + level + """ + hdbg.dassert_isinstance(lines, list) + _LOG.debug(hprint.to_str("lines")) + extracted_lines = [] + # Level of the current header being processed. + current_level: Optional[int] = None + # Flag to indicate if we're inside the desired section. + inside_section: bool = False + found = False + # Process each line in the markdown content. + for line in lines: + _LOG.debug(hprint.to_str("line")) + # Check if the line is a markdown header. + if line.strip().startswith("#"): + # Determine the level of the header by counting leading '#' + # characters. + header_level = len(line) - len(line.lstrip("#")) + # Extract the actual header text by stripping '#' and surrounding + # whitespace. + header_text = line.strip("#").strip() + _LOG.debug(hprint.to_str("header_level, header_text")) + # Handle the end of the desired section when encountering another + # header. + if inside_section: + hdbg.dassert_is_not(current_level, None) + current_level = cast(int, current_level) + if header_level <= current_level: + break + # Check if the current line is the desired header. + if header_text == header_name: + found = True + # Set the level of the matched header. + current_level = header_level + # Mark that we are now inside the desired section. + inside_section = True + # Add the line to the output if inside the desired section. + if inside_section: + extracted_lines.append(line) + _LOG.debug(hprint.to_str("extracted_lines")) + if not found: + raise ValueError(f"Header '{header_name}' not found") + hdbg.dassert_isinstance(extracted_lines, list) + return extracted_lines + + +# ############################################################################# +# HeaderInfo +# ############################################################################# + + +@dataclasses.dataclass +class HeaderInfo: + """ + Store the header level, the description, and the line number in the + original file. + + E.g., `(1, "Chapter 1", 5)` and `(2, "Section 1.1", 10)` + """ + + level: int + description: str + line_number: int + + def __init__(self, level: int, description: str, line_number: int): + hdbg.dassert_isinstance(level, int) + hdbg.dassert_lte(1, level) + self.level = level + # + hdbg.dassert_isinstance(description, str) + hdbg.dassert_ne( + description, + "", + "Invalid HeaderInfo: %s, %s, %s", + level, + description, + line_number, + ) + self.description = description + # + hdbg.dassert_isinstance(line_number, int) + hdbg.dassert_lte(1, line_number) + self.line_number = line_number + # + self.children: List[HeaderInfo] = [] + + def as_tuple(self) -> Tuple[int, str, int]: + return (self.level, self.description, self.line_number) + + def __repr__(self) -> str: + return ( + f"HeaderInfo({self.level}, '{self.description}', {self.line_number})" + ) + + +HeaderList = List[HeaderInfo] + + +def header_list_to_str(header_list: HeaderList) -> str: + """ + Convert a list of headers into a string. + + :param header_list: list of headers + :return: string representation of the header list + """ + return "\n".join([str(header) for header in header_list]) + + +def sanity_check_header_list(header_list: HeaderList) -> None: + """ + Check that the header list is valid. + + 1) The first header should be level 1. + 2) All level 1 headers are unique. + 3) Check that consecutive elements in the header list only increase by at + most one level at a time (even if it can decrease by multiple levels). + - E.g., the following is valid: + ``` + # Header 1 + # Header 2 + ## Header 2.1 + ## Header 2.2 + # Header 3 + ``` + - E.g., the following is valid: + ``` + # Header1 + ## Header 1.1 + ### Header 1.1.1 + # Header 2 + ``` + - E.g., the following is not valid: + ``` + # Header 1 + ### Header 1.0.1 + # Header 2 + ``` + + :param header_list: list of headers to validate + """ + # 1) The first header should be level 1. + if header_list and header_list[0].level > 1: + _LOG.warning( + "First header '%s' at line %s is not level 1, but %s", + header_list[0].description, + header_list[0].line_number, + header_list[0].level, + ) + # 2) All level 1 headers are unique. + level_1_headers = [ + header.description for header in header_list if header.level == 1 + ] + hdbg.dassert_no_duplicates(level_1_headers) + # 3) Check that consecutive elements in the header list only increase by at + # most one level at a time (even if it can decrease by multiple levels). + if len(header_list) > 1: + for i in range(1, len(header_list)): + hdbg.dassert_isinstance(header_list[i - 1], HeaderInfo) + hdbg.dassert_isinstance(header_list[i], HeaderInfo) + if header_list[i].level - header_list[i - 1].level > 1: + msg = [] + msg.append( + "Consecutive headers increase by more than one level:" + ) + msg.append(f" {header_list[i - 1]}") + msg.append(f" {header_list[i]}") + msg = "\n".join(msg) + raise ValueError(msg) + + +# TODO(gp): Move sanity check outside? +def extract_headers_from_markdown( + lines: List[str], max_level: int, *, sanity_check: bool = True +) -> HeaderList: + """ + Extract headers from Markdown file and return an `HeaderList`. + + :param lines: content of the input Markdown file as list of strings + :param max_level: maximum header levels to parse (e.g., '3' parses all levels + included `###`, but not `####`) + :param sanity_check: whether to check that the header list is valid + :return: generated `HeaderList`, e.g., + ``` + [ + (1, "Chapter 1", 5), + (2, "Section 1.1", 10), ...] + ``` + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_lte(1, max_level) + header_list: HeaderList = [] + # Process the input file to extract headers. + for line_number, line in enumerate(lines, start=1): + # TODO(gp): Use the iterator. + # Skip the visual separators. + if is_markdown_line_separator(line): + continue + # Get the header level and title. + is_header_, level, title = is_header(line) + if is_header_ and level <= max_level: + header_info = HeaderInfo(level, title, line_number) + header_list.append(header_info) + # Check the header list. + if sanity_check: + sanity_check_header_list(header_list) + else: + _LOG.debug("Skipping sanity check") + hdbg.dassert_isinstance(header_list, list) + return header_list + + +def header_list_to_vim_cfile( + markdown_file: str, header_list: HeaderList +) -> List[str]: + """ + Convert a list of headers into a Vim cfile format. + + Use the generated file in Vim as: + `:cfile ` + Use `:cnext` and `:cprev` to navigate between headers. + + :param markdown_file: path to the input Markdown file + :param header_list: list of headers, where each header is a tuple containing + the line number, level, and title + :return: generated cfile content as a list of strings in the format: + ``` + ... + ::

+ ... + ``` + """ + hdbg.dassert_isinstance(markdown_file, str) + hdbg.dassert_isinstance(header_list, list) + _LOG.debug(hprint.to_str("header_list")) + output_lines = [ + f"{markdown_file}:{header_info.line_number}:{header_info.description}" + for header_info in header_list + ] + hdbg.dassert_isinstance(output_lines, list) + return output_lines + + +def header_list_to_markdown(header_list: HeaderList, mode: str) -> List[str]: + """ + Convert a list of headers into a Markdown format. + + :param header_list: list of headers, where each header is a tuple + containing the level, title, and line number + :param mode: format of the output: + - `list`: indents headers to create a nested list + - `headers`: uses Markdown header syntax (e.g., '#', '##', '###') + :return: generated Markdown content as a list of strings + """ + hdbg.dassert_isinstance(header_list, list) + _LOG.debug(hprint.to_str("header_list mode")) + output_lines = [] + for header_info in header_list: + level, title, line_number = header_info.as_tuple() + _ = line_number + if mode == "list": + header_prefix = " " * (level - 1) + "-" + elif mode == "headers": + header_prefix = "#" * level + else: + raise ValueError(f"Invalid mode '{mode}'") + output_lines.append(f"{header_prefix} {title}") + hdbg.dassert_isinstance(output_lines, list) + return output_lines + + +# ############################################################################# +# Process headers. +# ############################################################################# + + +def format_headers(lines: List[str], out_file_name: str, max_lev: int) -> None: + """ + Format the headers in the input lines and write the formatted text to the + output file. + + :param lines: list of input lines to process + :param out_file_name: name of the output file to write the formatted + text to + :param max_lev: maximum level of headings to include in the + formatted text + """ + hdbg.dassert_isinstance(lines, list) + txt = lines[:] + # + for line in txt: + m = re.search(r"max_level=(\d+)", line) + if m: + max_lev = int(m.group(1)) + _LOG.warning("Inferred max_level=%s", max_lev) + break + hdbg.dassert_lte(1, max_lev) + # Remove all headings. + txt_tmp = [] + for line in txt: + # Keep the comments. + if not is_markdown_line_separator(line): + txt_tmp.append(line) + txt = txt_tmp[:] + # Add proper heading of the correct length. + txt_tmp = [] + for line in txt: + # Keep comments. + found = False + for i in range(1, max_lev + 1): + if line.startswith("#" * i + " "): + row = "#" * i + " " + "#" * (79 - 1 - i) + txt_tmp.append(row) + txt_tmp.append(line) + txt_tmp.append(row) + found = True + if not found: + txt_tmp.append(line) + # TODO(gp): Remove all empty lines after a heading. + # TODO(gp): Format title (first line capital and then small). + hparser.to_file(txt_tmp, out_file_name) + + +def modify_header_level(lines: List[str], level: int) -> List[str]: + """ + Increase or decrease the level of headings by the specified amount. + + :param lines: input lines to modify + :param level: amount to adjust header levels (positive increases, + negative decreases) + :return: modified lines with header levels adjusted + """ + hdbg.dassert_isinstance(lines, list) + txt_tmp = [] + for line in lines: + # TODO(gp): Use the iterator. + line = line.rstrip(r"\n") + is_header_, current_level, title = is_header(line) + if is_header_: + modified_level = current_level + level + # Ensure modified level is within valid range (1-6 for markdown headers). + hdbg.dassert_lte(1, modified_level) + hdbg.dassert_lte(modified_level, 6) + line = "#" * modified_level + " " + title + txt_tmp.append(line) + hdbg.dassert_isinstance(txt_tmp, list) + return txt_tmp + + +# ############################################################################# +# _HeaderTreeNode +# ############################################################################# + + +# This is a different representation of the data than the one in `HeaderList` +# because it is a tree structure. So we use a different type hint. +_HeaderTree = List[HeaderInfo] + + +def build_header_tree(header_list: HeaderList) -> _HeaderTree: + """ + Build a tree (list of Node objects) from the flat list. + + We assume that the level changes never jump by more than 1. + + :param header_list: flat list of headers + :return: tree structure of headers + """ + tree: _HeaderTree = [] + stack: _HeaderTree = [] + for node in header_list: + if node.level == 1: + tree.append(node) + stack = [node] + else: + # Pop until we find the proper parent: one with level < current + # level. + while stack and stack[-1].level >= node.level: + stack.pop() + if stack: + stack[-1].children.append(node) + else: + tree.append(node) + stack.append(node) + # hdbg.dassert_eq(len(header_list), len(tree)) + # hdbg.dassert_eq(len(stack), 0) + return tree + + +def _find_header_tree_ancestry( + tree: _HeaderTree, level: int, description: str +) -> Optional[_HeaderTree]: + """ + Recursively search for the node matching (level, description). + + If found, return the ancestry as a list from the root down to that + node. Otherwise return None. + + :param tree: header tree to search + :param level: header level to match + :param description: header description to match + :return: ancestry list from root to matching node, or None if not + found + """ + for node in tree: + if node.level == level and node.description == description: + return [node] + result = _find_header_tree_ancestry(node.children, level, description) + if result: + return [node] + result + return None + + +def header_tree_to_str( + tree: _HeaderTree, + ancestry: Optional[_HeaderTree], + *, + open_modifier: str = "**", + close_modifier: str = "**", + indent: int = 0, +) -> str: + """ + Return the tree as a string. + + Only expand (i.e. recursively include children) for a node if it is part of + the ancestry of the selected node. + + :param tree: tree to convert to a string + :param ancestry: ancestry of the selected node + :param open_modifier: modifier to use for the open of the selected node + :param close_modifier: modifier to use for the close of the selected node + :param indent: indent of the tree + :return: string representation of the tree + + - Nodes not in the ancestry are included on one line (even if they have + children). + - The selected node (last in the ancestry) is included highlighted. + """ + prefix = " " * indent + "- " + result = [] + for node in tree: + _LOG.debug(hprint.to_str("node")) + # Check if this node is the next expected one in the ancestry branch. + if ancestry and node is ancestry[0]: + # If this is the last in the ancestry, it is the selected node. + val = prefix + if len(ancestry) == 1: + val += open_modifier + node.description + close_modifier + else: + val += node.description + _LOG.debug("-> %s", hprint.to_str("val")) + if val: + result.append(val) + # Expand this node’s children using the rest of the ancestry. + val = header_tree_to_str( + node.children, + ancestry[1:], + indent=indent + 1, + open_modifier=open_modifier, + close_modifier=close_modifier, + ) + else: + # For nodes not on the selected branch, include them without + # expanding. + val = prefix + node.description + _LOG.debug("-> %s", hprint.to_str("val")) + if val: + result.append(val) + return "\n".join(result) + + +def selected_navigation_to_str( + tree: _HeaderTree, + level: int, + description: str, + *, + open_modifier: str = "**", + close_modifier: str = "**", +) -> str: + """ + Given a level and description for the selected node, print the navigation. + + :param tree: header tree + :param level: level of the selected node + :param description: description of the selected node + :param open_modifier: modifier for opening the selected node + :param close_modifier: modifier for closing the selected node + :return: navigation string with selected node highlighted + """ + ancestry = _find_header_tree_ancestry(tree, level, description) + hdbg.dassert_ne( + ancestry, + None, + "Node (%s, '%s') not found", + level, + description, + ) + _LOG.debug(hprint.to_str("ancestry")) + txt = header_tree_to_str( + tree, + ancestry, + open_modifier=open_modifier, + close_modifier=close_modifier, + ) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py new file mode 100644 index 000000000..a471a44cc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py @@ -0,0 +1,367 @@ +""" +Import as: + +import helpers.hmarkdown_rules as hmarrule +""" + +import logging +import re +from typing import Dict, List + +import helpers.hdbg as hdbg +import helpers.hmarkdown_headers as hmarhead +import helpers.hprint as hprint +from helpers.hmarkdown_headers import ( + extract_headers_from_markdown, + sanity_check_header_list, +) + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Add a decorator like in hprint to process both strings and lists +# of strings. + +# ############################################################################# +# Rules processing. +# ############################################################################# + +# Rules are organized in 4 levels of a markdown file: +# +# 1) Rule sets (level 1) +# - E.g., `General`, `Python`, `Notebooks`, `Markdown` +# - Level 1 is a set of rules determined mainly by the type of the file we +# are processing +# - Several sets of rules can be applied to a given file type +# - E.g., rules in `Python` and `Notebooks` apply to all Python files +# 2) Sections (level 2) +# - E.g., `Naming`, `Comments`, `Code_design`, `Imports`, `Type_annotations` +# 3) Targets (level 3) +# - E.g., LLM vs Linter +# 4) Atomic rules (level 4) +# - This is the set of rules that are applied to the file +# ``` +# - Spell commands in lower case and programs with the first letter in upper case +# - E.g., `git` as a command, `Git` as a program +# - E.g., capitalize the first letter of `Python` +# ``` + +# Extract the rules from the markdown file: +# ``` +# > extract_toc_from_txt.py \ +# -i docs/code_guidelines/all.coding_style_guidelines.reference.md \ +# --max_level 2 +# - General +# - Spelling +# - LLM +# - Linter +# - Python +# - Naming +# - LLM +# - Linter +# - Docstrings +# - ... +# - Comments +# - Code_implementation +# - Code_design +# - Imports +# - Type_annotations +# - Functions +# - Scripts +# - Logging +# - Misc +# - Unit_tests +# - All +# - Notebooks +# - General +# - Plotting +# - Jupytext +# - Markdown +# - Naming +# - General +# ``` + +# - The rules to apply to a Python file are automatically extractedas: +# `([`General:*`, `Python:*`], `LLM`)` +# - The rules to apply to a Notebook file are automatically extracted as: +# `([`General:*`, `Python:*`, `Notebooks:*`], `LLM`)` +# - A user can specify to apply a subset of rules like +# `([`General:*`, `Python:Naming,Docstrings`], `LLM,Linter`)` +# - Atomic rules are the first-level bullets of the markdown file, e.g., +# ``` +# - Spell commands in lower case and programs with the first letter in upper case +# - E.g., `git` as a command, `Git` as a program +# - E.g., capitalize the first letter of `Python` +# ``` + + +def sanity_check_rules(lines: List[str]) -> None: + """ + Sanity check the rules. + + :param lines: list of text lines to check + """ + header_list = extract_headers_from_markdown(lines, max_level=5) + # 1) Start with level 1 headers. + # 2) All level 1 headers are unique. + # 3) Header levels are increasing / decreasing by at most 1. + sanity_check_header_list(header_list) + # 4) Level 3 headers are always `LLM` or `Linter`. + # for header in header_list: + # if header.level != 3: + # hdbg.dassert_in(header.description, ["LLM", "Linter"]) + # TODO(gp): Implement this. + # 5) All headers have no spaces. + # TODO(gp): Implement this. + + +# A `Rule` is a string separated by `:` characters, where each part can be: +# - `*` (which means "match any string") +# - a `string` (e.g., `Spelling`) +# - a list of strings separated by `|` (e.g., `LLM|Linter`) +# +# E.g., valid rules are: +# - `General:*:LLM`, `*:*:Linter|LLM`, `General|Python:*:LLM`, `Python:*:Linter` +# - For a Python file -> `General|Python:*:LLM` +# - For a Notebook file -> `General|Python|Notebooks:*:LLM` +# - `Python:Naming|Docstrings|Comments:LLM` +SelectionRule = str + + +# A `Guidelines`` is a header list with only level 1 headers storing the full +# hierarchy of the rules as a description, e.g., +# `(1, "Spelling:All:LLM", xyz)` +# TODO(gp): Make Guidelines descend from HeaderList. + +HeaderInfo = hmarhead.HeaderInfo +HeaderList = hmarhead.HeaderList +Guidelines = HeaderList + + +def convert_header_list_into_guidelines( + header_list: HeaderList, +) -> Guidelines: + """ + Convert the header list into a `Guidelines` object with only level 1 + headers and full hierarchy of the rules as description. + + Expand a header list like: + ``` + - General + - Spelling + - LLM + - Linter + - Python + - Naming + - LLM + - Linter + ``` + represented internally as: + ``` + (1, "General", xyz), + (2, "Spelling", xyz), + (3, "LLM", xyz), + (3, "Linter", xyz), + (1, "Python", xyz), + (2, "Naming", xyz), + (3, "LLM", xyz), + (3, "Linter", xyz), + ``` + into: + ``` + [ + (1, "Spelling:All:LLM", xyz), + (1, "Spelling:All:Linter", xyz), + (1, "Python:Naming:LLM", xyz), + (1, "Python:Naming:Linter", xyz), + ] + ``` + + :param header_list: input header list to convert + :return: guidelines with flattened hierarchy + """ + hdbg.dassert_isinstance(header_list, list) + # Store the last level headers. + level_1 = "" + level_2 = "" + # Accumulate the last level headers. + level_3_headers = [] + # Scan the header list. + for header_info in header_list: + level, description, line_number = header_info.as_tuple() + # Store the headers found at each level. + if level == 1: + level_1 = description + elif level == 2: + level_2 = description + elif level == 3: + # Store the level 3 header. + hdbg.dassert_ne(level_1, "") + hdbg.dassert_ne(level_2, "") + full_level_3 = f"{level_1}:{level_2}:{description}" + header_info_tmp = HeaderInfo(1, full_level_3, line_number) + level_3_headers.append(header_info_tmp) + else: + raise ValueError(f"Invalid header info={header_info}") + return level_3_headers + + +def _convert_rule_into_regex(selection_rule: SelectionRule) -> str: + r""" + Convert a rule into an actual regular expression. + + E.g., + - `Spelling:*:LLM` -> `Spelling:(\S*):LLM` + - `*:*:Linter|LLM` -> `(\S*):(\S*):(Linter|LLM)` + - `Spelling|Python:*:LLM` -> `Spelling|Python:(\S*):LLM` + - `Python:*:Linter` -> `Python:(\S*):Linter` + + :param selection_rule: rule to convert to regex + :return: regex pattern string + """ + hdbg.dassert_isinstance(selection_rule, SelectionRule) + # Parse the rule into tokens. + selection_rule_parts = selection_rule.split(":") + hdbg.dassert_eq(len(selection_rule_parts), 3) + # Process each part of the rule regex. + rule_parts_out = [] + for rule_part_in in selection_rule_parts: + hdbg.dassert_not_in(" ", rule_part_in) + if rule_part_in == "*": + # Convert `*` into `\S*`. + rule_part_out = r"(\S*)" + elif "|" in rule_part_in: + # Convert `LLM|Linter` into `(LLM|Linter)`. + rule_part_out = "(" + rule_part_in + ")" + else: + # Keep the string as is. + rule_part_out = rule_part_in + rule_parts_out.append(rule_part_out) + # Join the parts of the rule back together. + rule_out = ":".join(rule_parts_out) + return rule_out + + +def extract_rules( + guidelines: Guidelines, selection_rules: List[SelectionRule] +) -> Guidelines: + """ + Extract the set of rules from the `guidelines` that match the rule regex. + + :param guidelines: guidelines to extract the rules from + :param selection_rules: selection rules to use to extract the rules + :return: extracted rules + """ + hdbg.dassert_isinstance(guidelines, list) + hdbg.dassert_isinstance(selection_rules, list) + # A rule regex is a string separated by `:` characters, where each part is + # - `*` (meaning "any string") + # - a `string` (e.g., `Spelling`) + # - a list of strings separated by `|` (e.g., `LLM|Linter`) + # E.g., `Spelling:*:LLM`, `*:*:Linter|LLM`, `Spelling|Python:*:LLM`. + # Convert each rule regex into a regular expression. + rule_regex_map: Dict[str, str] = {} + for rule_regex_str in selection_rules: + hdbg.dassert_isinstance(rule_regex_str, SelectionRule) + regex = _convert_rule_into_regex(rule_regex_str) + _LOG.debug(hprint.to_str("rule_regex_str regex")) + hdbg.dassert_not_in(rule_regex_str, rule_regex_map) + rule_regex_map[rule_regex_str] = regex + # Extract the set of rules from the `guidelines` that match the rule regex. + rule_sections = [] + for guideline in guidelines: + # A guideline description is a string separated by `:` characters, where each part is + # (1, "Python:Naming:Linter", xyz), + for k, v in rule_regex_map.items(): + if re.match(v, guideline.description): + _LOG.debug("%s matches %s", k, guideline.description) + if guideline not in rule_sections: + rule_sections.append(guideline) + # Select the rules. + _LOG.debug( + "Selected %s sections:\n%s", + len(rule_sections), + "\n".join([r.description for r in rule_sections]), + ) + return rule_sections + + +# TODO(gp): This seems private? +def parse_rules_from_txt(lines: List[str]) -> List[str]: + """ + Parse rules from a chunk of markdown text. + + - Extract first-level bullet point list items from text until the next one. + - Sub-lists nested under first-level items are extracted together with the + first-level items. + + :param lines: list of text lines to process + ``` + - Item 1 + - Item 2 + - Item 3 + - Item 4 + ``` + :return: extracted bullet points + """ + hdbg.dassert_isinstance(lines, list) + # Store the first-level bullet points. + bullet_points = [] + # Store the current item including the first level bullet point and all + # its sub-items. + current_item = "" + for line in lines: + line = line.rstrip() + if not line: + continue + if re.match(r"^- ", line): + # Match first-level bullet point item. + if current_item: + # Store the previous item, if any. + bullet_points.append(current_item) + # Start a new first-level bullet point item. + current_item = line + elif re.match(r"^\s+- ", line): + # Match a sub-item (non first-level bullet point item). + # Append a sub-item to the current item. + current_item += "\n" + line + elif len(line.strip()) != 0 and current_item: + # Append a line to the current item. + current_item += "\n" + line + # Add the last item if there is one. + if current_item: + bullet_points.append(current_item) + hdbg.dassert_isinstance(bullet_points, list) + return bullet_points + + +def extract_rules_from_section( + lines: List[str], start_line_number: int +) -> List[str]: + """ + Extract rules from a section of a markdown file. + + :param lines: list of markdown text lines to extract the rules from + :param start_line_number: line number of the section to start extracting + the rules from + :return: extracted rules + """ + hdbg.dassert_isinstance(lines, list) + # Find the line number of the next header. + end_line_number = start_line_number + while True: + hdbg.dassert_lt(end_line_number, len(lines)) + line = lines[end_line_number] + if line.startswith("#"): + break + end_line_number += 1 + _LOG.debug("end_line_number=%s", end_line_number) + # Parse the markdown text into a list of bullet points. + bullet_points = parse_rules_from_txt( + lines[start_line_number:end_line_number] + ) + # Extract the rules from the bullet points. + rules = [] + for bullet_point in bullet_points: + rules.append(bullet_point) + hdbg.dassert_isinstance(rules, list) + return rules diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py new file mode 100644 index 000000000..2cefec7a8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py @@ -0,0 +1,201 @@ +""" +Import as: + +import helpers.hmarkdown_slides as hmarslid +""" + +import logging +import re +from typing import Any, Callable, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +from helpers.hmarkdown_comments import process_comment_block +from helpers.hmarkdown_headers import ( + HeaderInfo, + HeaderList, + is_markdown_line_separator, +) + +_LOG = logging.getLogger(__name__) + + +_TRACE = True + + +def extract_slides_from_markdown( + lines: List[str], +) -> Tuple[HeaderList, int]: + """ + Extract slides (i.e., sections prepended by `*`) from Markdown file and + return an `HeaderList`. + + :param lines: content of the input Markdown file as list of strings + :return: tuple containing: + - generated `HeaderList` + ``` + [ + (1, "Slide 1", 5), + (1, "Slide 2", 10), ...] + ``` + - last line number of the file, e.g., '100' + """ + hdbg.dassert_isinstance(lines, list) + header_list: HeaderList = [] + # Process the input file to extract headers. + for line_number, line in enumerate(lines, start=1): + _LOG.debug("%d: %s", line_number, line) + # TODO(gp): Use the iterator. + # Skip the visual separators. + if is_markdown_line_separator(line): + continue + # Get the header level and title. + m = re.match(r"^\* (.*)$", line) + if m: + title = m.group(1) + header_info = HeaderInfo(1, title, line_number) + header_list.append(header_info) + last_line_number = len(lines) + # Return results. + hdbg.dassert_isinstance(header_list, list) + return header_list, last_line_number + + +# TODO(gp): Consider passing and returning List[str] +def process_slides(txt: str, transform: Callable[..., Any]) -> str: + """ + Process markdown text by applying a transform function to each slide. + + - Slides are sections prepended by `*` + - The text is processed by: + - Extracting the slides one by one + - Calling a `transform()` function on each slide (defined by the user) + - Joining the transformed slides back together + - Comments are left untouched. + + :param txt: markdown text to process + :param transform: function to transform each slide + :return: transformed text + """ + hdbg.dassert_isinstance(txt, str) + # Text of the current slide. + slide_txt: List[str] = [] + # Store all the transformed slides. + transformed_txt: List[str] = [] + # True inside a block to skip. + in_skip_block = False + # True inside a slide. + in_slide = False + # Track line number where slide started. + slide_start_line = 0 + lines = txt.splitlines() + for i, line in enumerate(lines): + _LOG.debug("%s:line='%s'", i, line) + # 1) Remove comment block. + do_continue, in_skip_block = process_comment_block(line, in_skip_block) + if _TRACE: + _LOG.debug(" -> %s", hprint.to_str("do_continue in_skip_block")) + if do_continue: + transformed_txt.append(line) + continue + # 2) Process slide. + if _TRACE: + _LOG.debug(" -> %s", hprint.to_str("in_slide")) + if line.startswith("* ") or line.startswith("#### "): + _LOG.debug("### Found slide") + # Found a slide or the end of the file. + if slide_txt: + _LOG.debug("# Transform slide") + # Transform the slide. + slide_title = slide_txt[0] + transformed_slide = transform( + slide_txt, + slide_title=slide_title, + slide_line_number=slide_start_line, + ) + hdbg.dassert_isinstance(transformed_slide, list) + transformed_txt.extend(transformed_slide) + else: + _LOG.debug("# First slide") + # Start a new slide. + slide_txt = [] + slide_txt.append(line) + slide_start_line = i + in_slide = True + elif in_slide: + _LOG.debug("# Accumulate slide") + slide_txt.append(line) + else: + _LOG.debug("# Accumulate txt outside slide") + transformed_txt.append(line) + # Process the last slide, if needed. + if slide_txt: + hdbg.dassert(in_slide) + in_slide = False + # Transform the slide. + slide_title = slide_txt[0] + transformed_slide = transform( + slide_txt, + slide_title=slide_title, + slide_line_number=slide_start_line, + ) + hdbg.dassert_isinstance(transformed_slide, list) + transformed_txt.extend(transformed_slide) + # + hdbg.dassert( + not in_skip_block, + "Found end of file while still parsing a comment block", + ) + hdbg.dassert(not in_slide, "Found end of file while still parsing a slide") + # Join the transformed slides back together. + result = "\n".join(transformed_txt) + return result + + +# ############################################################################# +# Slides conversion to markdown and back +# ############################################################################# + + +def convert_slide_to_markdown(lines: List[str], *, level: int = 5) -> List[str]: + """ + Convert slide to standard markdown. + + - Handle * bullets to markdown headers level 5 + + :param lines: list of lines to convert + :param level: level of the markdown headers to convert to + :return: list of converted lines + """ + hdbg.dassert_isinstance(lines, list) + converted_lines = [] + for line in lines: + if line.startswith("* "): + # Convert slide bullet to markdown header level 5. + converted_line = "#" * level + " " + line[2:] + converted_lines.append(converted_line) + else: + converted_lines.append(line) + return converted_lines + + +def convert_markdown_to_slide(lines: List[str], *, level: int = 5) -> List[str]: + """ + Convert standard markdown back to slide. + + - Handle markdown headers level 5 to * bullets + + :param lines: list of lines to convert + :param level: level of the markdown headers to convert to + :return: list of converted lines + """ + hdbg.dassert_isinstance(lines, list) + converted_lines = [] + for line in lines: + if line.startswith("#" * level + " "): + # Convert markdown header level 5 back to slide bullet. + converted_line = "* " + line[6:] + converted_lines.append(converted_line) + else: + converted_lines.append(line) + return converted_lines diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py new file mode 100644 index 000000000..becc00b09 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py @@ -0,0 +1,121 @@ +""" +Import as: + +import helpers.hmarkdown_tables as hmartabl +""" + +import logging +from typing import Dict, List, Tuple + +import helpers.hdbg as hdbg + +_LOG = logging.getLogger(__name__) + + +def replace_tables_with_tags( + lines: List[str], +) -> Tuple[List[str], Dict[str, str]]: + """ + Replace markdown tables with tag and return mapping from tags to the table. + + E.g., + ``` + Some text before + | Column 1 | Column 2 | + |----------|----------| + | Value 1 | Value 2 | + | Value 3 | Value 4 | + More text after + ``` + is replaced with: + ``` + Some text before + + More text after + ``` + + :param lines: list of lines to process + :return: tuple containing: + - list of lines with the tables replaced by tags + - mapping from tags to the table text + """ + hdbg.dassert_isinstance(lines, list) + result = [] + table_map = {} + table_count = 0 + i = 0 + while i < len(lines): + line = lines[i].strip() + # Check if this line starts a table (contains |). + if "|" in line and line.strip(): + # Look ahead to see if next line is a separator. + if i + 1 < len(lines): + next_line = lines[i + 1].strip() + # Check if next line is a table separator (contains --- and |). + if "|" in next_line and "-" in next_line: + # Found a table, collect all table lines. + table_lines = [] + # Add header line. + table_lines.append(lines[i]) + i += 1 + # Add separator line. + table_lines.append(lines[i]) + i += 1 + # Add data rows (continue while lines contain |). + while ( + i < len(lines) + and "|" in lines[i].strip() + and lines[i].strip() + ): + table_lines.append(lines[i]) + i += 1 + # Store the table. + table_count += 1 + table_text = "\n".join(table_lines) + table_map[str(table_count)] = table_text + result.append(f"") + continue + # Not a table line, add as-is. + result.append(lines[i]) + i += 1 + return result, table_map + + +def replace_tags_with_tables( + lines: List[str], table_map: Dict[str, str] +) -> List[str]: + """ + Replace tags with markdown tables. + + :param lines: list of lines to process + :param table_map: mapping from tags to table text + :return: list of lines with tags replaced by tables + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_isinstance(table_map, dict) + # Initialize output. + result = [] + table_map_copy = table_map.copy() + # Parse data. + for line in lines: + if line.startswith(""): + # Extract table number from tag like . + tag_match = line[6:-1] # Remove '' + hdbg.dassert_in( + tag_match, table_map_copy, f"Found unmatched tag {tag_match}" + ) + # Split table text into lines and add them. + table_text = table_map_copy[tag_match] + table_lines = table_text.split("\n") + result.extend(table_lines) + # Remove used tag from map. + del table_map_copy[tag_match] + else: + result.append(line) + # Ensure all tags were used. + hdbg.dassert_eq( + len(table_map_copy), + 0, + f"Found {len(table_map_copy)} unmatched tags: {list(table_map_copy.keys())}", + ) + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py new file mode 100644 index 000000000..7d8cb8d75 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py @@ -0,0 +1,164 @@ +""" +Import as: + +import helpers.hmarkdown_toc as hmartoc +""" + +import logging +import os +import re +import tempfile +from typing import Any, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hdocker as hdocker +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import dev_scripts_helpers.dockerize.lib_markdown_toc as dshdlmato + +_LOG = logging.getLogger(__name__) + +# ############################################################################# +# YAML preamble +# ############################################################################# + + +def extract_yaml_frontmatter(lines: List[str]) -> Tuple[List[str], List[str]]: + """ + Extract YAML front matter from the beginning of the file. + + YAML front matter is delimited by `---` at the beginning and end. + Example: + ``` + --- + title: My Document + date: 2024-01-01 + --- + ``` + + :param lines: The lines to be processed. + :return: A tuple of (frontmatter_lines, remaining_lines). + """ + _LOG.debug("lines=%s", lines) + # Check if file starts with YAML front matter. + if len(lines) < 3: + # Not enough lines for front matter. + return [], lines + if not re.match(r"^---\s*$", lines[0]): + # No front matter marker at the beginning. + return [], lines + # Find the closing --- marker. + for i in range(1, len(lines)): + if re.match(r"^---\s*$", lines[i]): + # Found closing marker. + frontmatter = lines[: i + 1] + remaining = lines[i + 1 :] + _LOG.debug("Found YAML front matter: %d lines", len(frontmatter)) + return frontmatter, remaining + # No closing marker found, treat as no front matter. + _LOG.debug("No closing YAML front matter marker found") + return [], lines + + +def reattach_yaml_frontmatter( + yaml_frontmatter: List[str], lines: List[str] +) -> List[str]: + """ + Reattach YAML front matter to the beginning of the content lines. + + :param yaml_frontmatter: The YAML front matter lines to reattach. + :param lines: The content lines to prepend the front matter to. + :return: Combined lines with YAML front matter reattached. + """ + if not yaml_frontmatter: + return lines + # Add an empty line after the front matter if the remaining content doesn't + # start with one. + if lines and lines[0] != "": + return yaml_frontmatter + [""] + lines + return yaml_frontmatter + lines + + +# ############################################################################# +# TOC +# ############################################################################# + + +def refresh_toc( + lines: List[str], + *, + use_dockerized_markdown_toc: bool = True, + # TODO(gp): Remove this. + **kwargs: Any, +) -> List[str]: + """ + Refresh the table of contents (TOC) in the given text. + + :param lines: The lines to be processed. + :param use_dockerized_markdown_toc: if True, run markdown-toc in a + Docker container + :return: The lines with the updated TOC. + """ + _LOG.debug("lines=%s", lines) + # Check whether there is a TOC otherwise add it. + # Add `` comment in the doc to generate the TOC after that + # line. By default, it will generate at the top of the file. + # This workaround is useful to generate the TOC after the heading of the doc + # at the top and not include it in the TOC. + if "" not in lines: + _LOG.warning("No tags for table of content in md file: adding it") + lines = [""] + lines + txt = "\n".join(lines) + # Write file. + curr_dir = os.getcwd() + tmp_file_name = tempfile.NamedTemporaryFile(dir=curr_dir).name + hio.to_file(tmp_file_name, txt) + # Process TOC. + cmd_opts: List[str] = [] + if use_dockerized_markdown_toc: + # Run `markdown-toc` in a Docker container. + use_sudo = hdocker.get_use_sudo() + force_rebuild = False + dshdlmato.run_dockerized_markdown_toc( + tmp_file_name, + cmd_opts, + use_sudo=use_sudo, + force_rebuild=force_rebuild, + ) + else: + # Run `markdown-toc` installed on the host directly. + executable = "markdown-toc" + cmd = [executable] + cmd_opts + cmd.append("-i " + tmp_file_name) + # + cmd_as_str = " ".join(cmd) + _, output_tmp = hsystem.system_to_string(cmd_as_str, abort_on_error=True) + _LOG.debug("output_tmp=%s", output_tmp) + # Read file. + txt = hio.from_file(tmp_file_name) + # Clean up. + os.remove(tmp_file_name) + # Remove empty lines introduced by `markdown-toc`. + txt = hprint.remove_lead_trail_empty_lines(txt) + ret = txt.split("\n") + hdbg.dassert_isinstance(ret, list) + return ret + + +def remove_table_of_contents(txt: str) -> str: + """ + Remove the table of contents from the text of a markdown file. + + The table of contents is stored between + ``` + + ... + + ``` + + :param txt: Input markdown text + :return: Text with table of contents removed + """ + txt = re.sub(r".*?", "", txt, flags=re.DOTALL) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py new file mode 100644 index 000000000..b8087b9fd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py @@ -0,0 +1,106 @@ +""" +Matplotlib utilities and plotting helpers. + +Import as: + +import helpers.hmatplotlib as hmatplo +""" + +import logging +import math +from typing import Any, Optional, Tuple + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np + +import helpers.hdbg as hdbg +import helpers.hio as hio + +_LOG = logging.getLogger(__name__) + +# Default figure size for plots. +# TODO(gp): Is this used? +FIG_SIZE = (20, 5) + + +def get_multiple_plots( + num_plots: int, + num_cols: int, + y_scale: Optional[float] = None, + *args: Any, + **kwargs: Any, +) -> Tuple[mpl.figure.Figure, np.array]: + """ + Create figure to accommodate `num_plots` plots. + + The figure is arranged in rows with `num_cols` columns. + + :param num_plots: number of plots + :param num_cols: number of columns to use in the subplot + :param y_scale: the height of each plot. If `None`, the size of the whole + figure equals the default `figsize` + :return: figure and array of axes + """ + hdbg.dassert_lte(1, num_plots) + hdbg.dassert_lte(1, num_cols) + # Heuristic to find the dimension of the fig. + if y_scale is not None: + hdbg.dassert_lt(0, y_scale) + ysize = math.ceil(num_plots / num_cols) * y_scale + figsize: Optional[Tuple[float, float]] = (20, ysize) + else: + figsize = None + if "tight_layout" not in kwargs and not kwargs.get( + "constrained_layout", False + ): + kwargs["tight_layout"] = True + fig, ax = plt.subplots( + math.ceil(num_plots / num_cols), + num_cols, + figsize=figsize, + *args, + **kwargs, + ) + if isinstance(ax, np.ndarray): + ax = ax.flatten() + else: + ax = np.array([ax]) + # Remove extra axes that can appear when `num_cols` > 1. + empty_axes = ax[num_plots:] + for empty_ax in empty_axes: + empty_ax.remove() + return fig, ax[:num_plots] + + +def save_fig( + fig: Optional[mpl.figure.Figure], + file_name: str, + *, + print_markdown: bool = False, + path_prefix: Optional[str] = None, +) -> None: + """ + Save matplotlib figure to file and optionally print markdown reference. + + :param fig: Matplotlib figure. If None, uses the active figure. + :param file_name: Output filename + :param print_markdown: If True, print markdown image reference + :param path_prefix: Path prefix for markdown reference (e.g., "msml610/lectures_source") + """ + if fig is None: + fig = plt.gcf() + hdbg.dassert_isinstance(fig, mpl.figure.Figure) + hdbg.dassert_isinstance(file_name, str) + hio.create_enclosing_dir(file_name, incremental=True) + fig.savefig(file_name, dpi=300, bbox_inches="tight") + # Use print instead of _LOG.info. + print(f"Saved figure to '{file_name}'") + # + if print_markdown: + if path_prefix: + markdown_path = f"{path_prefix}/{file_name}" + else: + markdown_path = file_name + markdown_ref = f"![]({markdown_path})" + print(markdown_ref) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py new file mode 100644 index 000000000..27e5130ca --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py @@ -0,0 +1,170 @@ +""" +Import as: + +import helpers.hmkdocs as hmkdocs +""" + +import re + +import helpers.hdbg as hdbg +import helpers.hmarkdown as hmarkdo + +# TODO(ai): Make function private. +# TODO(ai): Convert str to List[str] +# TODO(ai): Add unit tests. + + +# TODO(gp): -> hmarkdown_?.py +def dedent_python_code_blocks(txt: str) -> str: + """ + Dedent Python code blocks so they are aligned to column 0. + + This is needed by mkdocs to render a Python code block correctly. + + :param txt: Input markdown text + :return: Text with Python code blocks dedented + """ + import textwrap + + lines = txt.split("\n") + result = [] + # Store whether the parser is inside a code block. + in_python_block = False + # Store the current Python code block. + code_block_lines = [] + # Track whether current block is indented (inside a list item). + block_is_indented = False + for line in lines: + if line.strip() == "```python": + in_python_block = True + # Only dedent top-level blocks (fence at column 0). + block_is_indented = line != line.lstrip() + result.append(line) + elif line.strip() == "```" and in_python_block: + if code_block_lines and not block_is_indented: + # Dedent only top-level code blocks. + code_text = "\n".join(code_block_lines) + dedented_code = textwrap.dedent(code_text) + result.extend(dedented_code.split("\n")) + code_block_lines = [] + elif code_block_lines: + # Indented block: pass through unchanged. + result.extend(code_block_lines) + code_block_lines = [] + result.append(line) + in_python_block = False + block_is_indented = False + elif in_python_block: + code_block_lines.append(line) + else: + result.append(line) + return "\n".join(result) + + +def replace_indentation(txt: str, input_spaces: int, output_spaces: int) -> str: + """ + Replace indentation from input_spaces to output_spaces. + + :param txt: Input markdown text + :param input_spaces: Number of spaces to detect as one indentation + level + :param output_spaces: Number of spaces to replace each indentation + level with + :return: Text with indentation replaced + """ + hdbg.dassert_lte(1, input_spaces) + hdbg.dassert_lte(1, output_spaces) + lines = txt.split("\n") + result = [] + for line in lines: + # Count leading spaces. + leading_spaces = len(line) - len(line.lstrip()) + if leading_spaces > 0 and leading_spaces % input_spaces == 0: + # Calculate indentation level and convert to output spaces. + indentation_level = leading_spaces // input_spaces + new_indentation = " " * (indentation_level * output_spaces) + result.append(new_indentation + line.lstrip()) + else: + result.append(line) + return "\n".join(result) + + +def replace_indentation_with_four_spaces(txt: str) -> str: + """ + Replace 2 spaces indentation with 4 spaces since this is what mkdocs needs. + + :param txt: Input markdown text + :return: Text with 2-space indentation replaced with 4-space + indentation + """ + return replace_indentation(txt, input_spaces=2, output_spaces=4) + + +def convert_slides_to_markdown(txt: str, level: int) -> str: + """ + Convert strings storing "slides", i.e., `* ...` to markdown headers. + + E.g., + ``` + * Tools for Vision component + ``` + to: + ``` + #### Tools for Vision component + ``` + """ + lines = txt.split("\n") + result = [] + for line in lines: + if line.startswith("* "): + result.append("#" * level + " " + line[2:]) + else: + result.append(line) + return "\n".join(result) + + +def rewrite_absolute_doc_links(txt: str) -> str: + """ + Rewrite absolute /docs/ markdown links to root-relative HTML links. + + MkDocs only converts relative `.md` links to `.html`. Absolute links + like `/docs/path/file.md` are left unchanged and 404 at serve time. + This converts them to `/path/file.html` so they resolve correctly. + + :param txt: Input markdown text + :return: Text with absolute /docs/ links rewritten + """ + + def _replace(m: re.Match) -> str: + path = m.group(1) + # Strip /docs/ prefix and convert .md → .html. + path = re.sub(r"^/docs/", "/", path) + path = re.sub( + r"\.md(#[^)]*)?$", lambda h: ".html" + (h.group(1) or ""), path + ) + return f"({path})" + + # Match markdown links: ([text](/docs/...md)) including optional anchors. + txt = re.sub(r"\((/docs/[^)]+\.md(?:#[^)]*)?)\)", _replace, txt) + return txt + + +def preprocess_mkdocs_markdown(txt: str) -> str: + """ + Preprocess markdown text for mkdocs. + + This function applies the following transformations: + 1. Remove table of contents + 2. Dedent Python code blocks + 3. Replace 2 spaces indentation with 4 spaces + 4. Rewrite absolute /docs/ links to root-relative HTML links + + :param txt: Input markdown text + :return: Preprocessed markdown text + """ + txt = hmarkdo.remove_table_of_contents(txt) + txt = dedent_python_code_blocks(txt) + txt = replace_indentation_with_four_spaces(txt) + txt = convert_slides_to_markdown(txt, level=4) + txt = rewrite_absolute_doc_links(txt) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py new file mode 100644 index 000000000..66ed59b39 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py @@ -0,0 +1,121 @@ +""" +Import as: + +import helpers.hmodule as hmodule +""" + +import logging +import os +import subprocess +import textwrap +from typing import Any, Dict, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hserver as hserver + +_LOG = logging.getLogger(__name__) + +_WARNING = "\033[33mWARNING\033[0m" + + +# Use this to avoid extra dependencies from `hsystem`. +def _system_to_string(cmd: str) -> Tuple[int, str]: + """ + Run a command and return the output and the return code. + + :param cmd: command to run + :return: tuple of (return code, output) + """ + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + # Redirect stderr to stdout. + stderr=subprocess.STDOUT, + shell=True, + text=True, + ) + rc = result.returncode + output = result.stdout + output = output.strip() + return rc, output + + +def has_module(module: str) -> bool: + """ + Return whether a Python module can be imported or not. + """ + if module == "gluonts" and hserver.is_host_mac(): + # Gluonts and mxnet modules are not properly supported on the ARM + # architecture yet, see CmTask4886 for details. + return False + code = f""" + try: + import {module} + has_module_ = True + except ImportError as e: + _LOG.warning("%s: %s", _WARNING, str(e)) + has_module_ = False + """ + code = textwrap.dedent(code) + # To make the linter happy. + has_module_ = True + locals_: Dict[str, Any] = {} + # Need to explicitly declare and pass `locals_`: + # https://docs.python.org/3/library/functions.html#exec + # `Pass an explicit locals dictionary if you need to see effects + # of the code on locals after function exec() returns.` + exec(code, globals(), locals_) + has_module_ = locals_["has_module_"] + return has_module_ + + +def install_module_if_not_present( + import_name: str, + *, + package_name: Optional[str] = None, + use_sudo: bool = True, + use_activate: bool = False, + venv_path: Optional[str] = None, + quiet: bool = True, +) -> None: + """ + Install a Python module if it is not already installed. + + :param import_name: name used to import the module (e.g., "openai") + :param package_name: name of the package on PyPI (if different from `import_name`) + :param use_sudo: whether to use sudo to install the module + :param use_activate: whether to use the activate script to install the module + (e.g., "source /venv/bin/activate; pip install --quiet --upgrade openai") + :param venv_path: path to the virtual environment + E.g., /Users/saggese/src/venv/client_venv.helpers + :param quiet: whether to install the module quietly + """ + _has_module = has_module(import_name) + if _has_module: + print(f"Module '{import_name}' is already installed.") + return + print(f"Installing module '{import_name}'...") + # Sometime the package name is different from the import name. + # E.g., we import using `import dash_bootstrap_components` but the package + # name is `dash-bootstrap-components`. + if package_name is None: + package_name = import_name + # Sometime the package name is different from the import name. + # E.g., we import using `import dash_bootstrap_components` but the package + # name is `dash-bootstrap-components`. + if quiet: + quiet_flag = "--quiet" + else: + quiet_flag = "" + if venv_path is None: + venv_path = "/venv" + venv_path = os.path.join(venv_path, "bin/activate") + hdbg.dassert_file_exists(venv_path, "Can't find venv_path='{venv_path}'") + if use_activate: + cmd = f'/bin/bash -c "(source {venv_path}; pip install {quiet_flag} --upgrade {package_name})"' + else: + cmd = f"pip install {quiet_flag} {package_name}" + if use_sudo: + cmd = f"sudo {cmd}" + _, output = _system_to_string(cmd) + print(output) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py new file mode 100644 index 000000000..525673032 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py @@ -0,0 +1,111 @@ +""" +Import as: + +import helpers.hmoto as hmoto +""" + +import unittest.mock as umock +from typing import Generator, Union + +import pytest # isort:skip # noqa: E402 # pylint: disable=wrong-import-position + +# Equivalent to `import moto`, but skip this module if the module is not present. +# `moto` must be imported before `boto3` to properly mock it. +moto = pytest.importorskip("moto") + +# It is necessary that boto3 is imported after moto. +# If not, boto3 will access real AWS. +import boto3 # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position + +import helpers.hdbg as hdbg # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hs3 as hs3 # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hunit_test as hunitest # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position + + +# ############################################################################# +# S3Mock_TestCase +# ############################################################################# + + +@pytest.mark.requires_aws +@pytest.mark.requires_ck_infra +class S3Mock_TestCase(hunitest.TestCase): + # Mocked AWS credentials. + mock_aws_credentials_patch = umock.patch.dict( + hs3.os.environ, + { + "MOCK_AWS_ACCESS_KEY_ID": "mock_key_id", + "MOCK_AWS_SECRET_ACCESS_KEY": "mock_secret_access_key", + "MOCK_AWS_DEFAULT_REGION": "us-east-1", + }, + ) + mock_aws_credentials = None + mock_aws_profile = "__mock__" + # Mocked bucket. + mock_s3 = moto.mock_aws() + bucket_name = "mock_bucket" + # TODO(Nikola): Temporary here to ensure it is called only once. + # Used in some tests that are obtaining data from 3rd party providers. + binance_secret = None + + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def _mock_get_s3fs( + self, aws_profile: Union[str, hs3.S3FileSystem] + ) -> hs3.S3FileSystem: + """ + Mock implementation of `get_s3fs` to use the mocked environment + variables from `moto`. + """ + from s3fs import S3FileSystem + + hdbg.dassert_isinstance(aws_profile, (str, S3FileSystem)) + aws_profile = S3FileSystem(anon=False) + return aws_profile + + def set_up_test(self) -> None: + # Getting necessary secret before boto3 is mocked. + if self.binance_secret is None: + import helpers.hsecrets as hsecret + + self.binance_secret = hsecret.get_secret("binance.preprod.trading.1") + # Start boto3 mock. + self.mock_s3.start() + # Start AWS credentials mock. Must be started after moto mock, + # or it will be overridden by moto with `foobar` values. + self.mock_aws_credentials = self.mock_aws_credentials_patch.start() + # Initialize boto client and create bucket for testing. + s3_client = boto3.client("s3") + s3_client.create_bucket(Bucket=self.bucket_name) + # Precaution to ensure that we are using mocked botocore. + s3_test_client = boto3.client("s3") + buckets = s3_test_client.list_buckets()["Buckets"] + self.assertEqual(len(buckets), 1) + self.assertEqual(buckets[0]["Name"], self.bucket_name) + # Patch `get_s3fs` that uses the mocked environment variables. + self.mock_get_s3fs = umock.patch.object( + hs3, "get_s3fs", side_effect=self._mock_get_s3fs + ) + self.mock_get_s3fs.start() + + def tear_down_test(self) -> None: + # Empty the bucket otherwise deletion will fail. + s3_client = boto3.resource("s3") + hdbg.dassert_eq(self.bucket_name, "mock_bucket") + bucket = s3_client.Bucket(self.bucket_name) + bucket.objects.all().delete() + # Delete bucket. + bucket.delete() + # Stop mocked `get_s3fs`. + if hasattr(self, "mock_get_s3fs"): + self.mock_get_s3fs.stop() + # Stop moto. + self.mock_aws_credentials_patch.stop() + self.mock_s3.stop() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py new file mode 100644 index 000000000..13ae41c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py @@ -0,0 +1,97 @@ +""" +Import as: + +import helpers.hnetwork as hnetwor +""" + +import logging +import os +import re +from typing import Optional, Tuple + +import requests + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def check_url(url: str) -> None: + """ + Check that an URL responds. + """ + try: + request = requests.get(url) + exists = request.status_code == 200 + # pylint: disable=broad-except + except Exception: + # TODO(gp): RuntimeError doesn't seem to catch. Find a narrower + # exception to catch. + exists = False + if not exists: + _LOG.warning("url '%s' doesn't exist", url) + + +def get_prefixes(jupyter_port: Optional[int] = None) -> Tuple[str, str]: + """ + Return the prefixes that a file should have under a GitHub repo and a + Jupyter notebook. + """ + hsystem.get_user_name() + if jupyter_port is None: + jupyter_port = 10001 + _LOG.warning( + "jupyter_port not available: using the default one %s", jupyter_port + ) + repo_name = hgit.get_repo_full_name_from_client(super_module=False) + _LOG.debug("repo_name=%s", repo_name) + github_prefix = f"https://github.com/{repo_name}/blob/master" + jupyter_prefix = f"http://localhost:{jupyter_port}/tree" + return github_prefix, jupyter_prefix + + +# TODO(gp): -> get_canonical_file_name_from_url +def get_file_name(url: str) -> str: + """ + Given an URL from GitHub or from Jupyter server extract the path + corresponding to the file. + + E.g., + - http://localhost:10001/notebooks/research/... + oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb + -> + oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb + + - https://github.com/.../.../blob/master/... + oil/ST/Task229_Exploratory_analysis_of_ST_data.ipynb + -> + oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb + """ + # "http://localhost:10001/notebooks/... + # oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb" + ret = None + if ret is None: + m = re.search(r"http.*://localhost:\d+/(.*)", url) + if m: + ret = m.group(1) + to_remove = "notebooks/" + idx = ret.index(to_remove) + if idx >= 0: + end_idx = idx + len(to_remove) + ret = ret[end_idx:] + if ret is None: + # https://github.com/.../.../blob/master/... + # oil/ST/Task229_Exploratory_analysis_of_ST_data.ipynb + m = re.search(r"http.*://.*github.com/(.*)", url) + if m: + ret = m.group(1) + # Remove ".../.../blob/master" + ret = "/".join(ret.split("/")[4:]) + if ret is None: + if os.path.exists(url): + ret = url + if ret is None: + hdbg.dassert_is_not(ret, None, "url=%s", url) + return ret # type: ignore diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py new file mode 100644 index 000000000..75ecabfe4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py @@ -0,0 +1,105 @@ +""" +Import as: + +import helpers.hnotebook as hnotebo +""" + +import logging + + +def config_notebook(sns_set: bool = True) -> None: + """ + Configure the notebook for plotting. + """ + import helpers.hmodule as hmodule + + # Matplotlib. + module = "matplotlib" + if hmodule.has_module(module): + # Matplotlib. + import matplotlib.pyplot as plt + + # plt.rcParams + plt.rcParams["figure.figsize"] = (20, 5) + plt.rcParams["legend.fontsize"] = 14 + plt.rcParams["font.size"] = 14 + plt.rcParams["image.cmap"] = "rainbow" + if False: + # Tweak the size of the plots to make it more readable when embedded in + # documents or presentations. + # font = {'family' : 'normal', + # #'weight' : 'bold', + # 'size' : 32} + # matplotlib.rc('font', **font) + scale = 3 + small_size = 8 * scale + medium_size = 10 * scale + bigger_size = 12 * scale + # Default text sizes. + plt.rc("font", size=small_size) + # Fontsize of the axes title. + plt.rc("axes", titlesize=small_size) + # Fontsize of the x and y labels. + plt.rc("axes", labelsize=medium_size) + # Fontsize of the tick labels. + plt.rc("xtick", labelsize=small_size) + # Fontsize of the tick labels. + plt.rc("ytick", labelsize=small_size) + # Legend fontsize. + plt.rc("legend", fontsize=small_size) + # Fontsize of the figure title. + plt.rc("figure", titlesize=bigger_size) + else: + print("No module '{module}'") + # Seaborn. + module = "seaborn" + if hmodule.has_module(module): + import seaborn as sns + + if sns_set: + sns.set() + else: + print("No module '{module}'") + # Pandas. + module = "pandas" + if hmodule.has_module(module): + import pandas as pd + + pd.set_option("display.max_rows", 500) + pd.set_option("display.max_columns", 500) + pd.set_option("display.width", 1000) + else: + print("No module '{module}'") + # Warnings. + import helpers.hwarnings as hwarnin + + # Force the linter to keep this import. + _ = hwarnin + + +def _info_print(msg: str, *args, **kwargs) -> None: + """ + Print a message with optional formatting arguments. + """ + if args: + msg = msg % args + print(msg) + + +def set_logger_to_print(log) -> None: + """ + Replace logger.info method with a print function. + + :param log: logger object to modify + """ + log.info = _info_print + + +def set_all_loggers_to_print() -> None: + """ + Replace all loggers' info method with a print function. + """ + for name in logging.root.manager.loggerDict: + logger = logging.getLogger(name) + # print("Setting logger %s to print" % name) + set_logger_to_print(logger) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py new file mode 100644 index 000000000..47fc37975 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py @@ -0,0 +1,43 @@ +""" +Import as: + +import helpers.hnumba as hnumba +""" + +import logging +from typing import Any, Callable, TypeVar + +try: + import numba + + numba_available = True +except ImportError: + numba_available = False + +_LOG = logging.getLogger(__name__) + +# Switch to enable numba at run-time. +# For using in notebooks you need to force a reload of the library, like: +# import importlib +# importlib.reload(numba_) +# numba_.USE_NUMBA = False + +USE_NUMBA = True +RT = TypeVar("RT") # Return type for decorator. + + +def jit(f: Callable[..., RT]) -> Callable[..., RT]: + if USE_NUMBA and not numba_available: + _LOG.warning("numba is not installed") + use_numba = USE_NUMBA and numba_available + + if use_numba: + _LOG.debug("Using numba!") + wrapper: Callable[..., RT] = numba.jit(f) + else: + + def wrapper(*args: Any, **kwargs: Any) -> RT: + _LOG.debug("Not using numba!") + return f(*args, **kwargs) + + return wrapper diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py new file mode 100644 index 000000000..4cd0e8c4d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py @@ -0,0 +1,57 @@ +""" +Import as: + +import helpers.hnumpy as hnumpy +""" + +import contextlib +from typing import Iterator + +import numpy as np + +import helpers.hdbg as hdbg + + +# From https://stackoverflow.com/questions/49555991 +@contextlib.contextmanager +def random_seed_context(seed: int) -> Iterator: + """ + Context manager to isolate a numpy random seed. + """ + state = np.random.get_state() + np.random.seed(seed) + try: + yield + finally: + np.random.set_state(state) + + +# TODO(Juraj): unit test in CmTask5092. +def floor_with_precision(value: float, amount_precision: int) -> float: + """ + Floor a value using desired precision. + + The invariant for this function is that negative number are floored based + on their absolute value: e.g floor_with_precision(-4.6, 0) == -4. This is + useful for calculating share size where there are decimal precision + limitations. The desired behavior is to rather round down than overfill. + + Other examples: + floor_with_precision(0.125, 2) == 0.12 + floor_with_precision(0.4, 0) == 0.0 + + :param value: value to floor with desire + :param amount_precision: number of decimal points to floor to + :return: value floored using desired precision. + """ + # Custom solution to allow flooring using precision. + # https://stackoverflow.com/questions/58065055/floor-and-ceil-with-number-of-decimals/58065394#58065394 + # Precision < 0 does not make sense. + hdbg.dassert_lte(0, amount_precision) + # Store sign and get absolute value to get the desire + sign = -1 if value < 0 else 1 + value_abs = abs(value) + value_floored = np.true_divide( + np.floor(value_abs * 10**amount_precision), 10**amount_precision + ) + return value_floored * sign diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py new file mode 100644 index 000000000..e9424b8cc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py @@ -0,0 +1,500 @@ +""" +Methods to introspect and print the state of an object. + +Import as: + +import helpers.hobject as hobject +""" + +import abc +import logging +import pprint +from typing import Any, Dict, List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hpandas as hpandas +import helpers.hprint as hprint +import helpers.hstring as hstring + +_LOG = logging.getLogger(__name__) + +# Mute this module unless we want to debug it. +_LOG.setLevel(logging.INFO) + +# ############################################################################# +# _to_skip* +# ############################################################################# + + +def _to_skip(is_: bool, mode: str) -> bool: + """ + Return whether to skip the attribute. + + :param is_: if `True` the attribute is of the type we are checking + :param mode: how to handle the attribute + :return: whether to skip the attribute + """ + hdbg.dassert_in(mode, ("skip", "only", "all")) + skip = False + if mode == "skip": + if is_: + # Skip everything. + skip = True + elif mode == "only": + if not is_: + # Keep only the callables. + skip = True + elif mode == "all": + # Keep everything. + skip = False + else: + raise ValueError(f"Invalid mode='{mode}'") + return skip + + +def _to_skip_callable_attribute(attr_name: Any, mode: str) -> bool: + """ + Decide whether to skip a callable attribute. + """ + # Check whether the attribute is callable. + is_callable = callable(attr_name) + skip = _to_skip(is_callable, mode) + return skip + + +def _to_skip_private_attribute(attr_name: str, mode: str) -> bool: + """ + Decide whether to skip a private attribute. + """ + # _Object__hello + # TODO(gp): This can be improved by passing the name of the object. + is_dunder = attr_name.startswith("_") and "__" in attr_name + # We assume that private attributes start with `_` and are not dunder. + is_private = not is_dunder and attr_name.startswith("_") + skip = _to_skip(is_private, mode) + return skip + + +def _to_skip_dunder_attribute(attr_name: str, mode: str) -> bool: + """ + Decide whether to skip a double under attribute. + """ + # Check if it is a dunder (i.e., double under method). E.g., `__hello__`. + is_dunder = attr_name.startswith("_") and "__" in attr_name + skip = _to_skip(is_dunder, mode) + return skip + + +def _to_skip_attribute( + attr_name: Any, + attr_value: Any, + callable_mode: str, + private_mode: str, + dunder_mode: str, + attr_names_to_skip: Optional[List[str]], +) -> bool: + """ + Decide whether to skip an attribute. + + :param attr_name: name of the attribute + :param attr_value: value of the attribute + :param callable_mode: how to handle attributes that are callable methods + :param private_mode: how to handle attributes that are private (e.g., + `_hello`) + :param dunder_mode: how to handle attributes that are dunder (e.g., + `__hello`) + :param attr_names_to_skip: a list of attributes (e.g., private, callable, dunder) + to skip. `None` to skip nothing. + :return: whether to skip the attribute + """ + # Check whether the attribute is one that was requested explicitly to skip. + if attr_names_to_skip is not None: + if attr_name in attr_names_to_skip: + skip = True + return skip + # Handle callable methods. + skip = _to_skip_callable_attribute(attr_value, callable_mode) + if skip: + _LOG.debug("Skip callable") + return skip + # Handle private methods. + skip = _to_skip_private_attribute(attr_name, private_mode) + if skip: + _LOG.debug("Skip private") + return skip + # Handle dunder methods. + skip = _to_skip_dunder_attribute(attr_name, dunder_mode) + if skip: + _LOG.debug("Skip dunder") + return skip + return False + + +# ############################################################################# +# obj_to_str +# ############################################################################# + + +def _type_to_str(attr_value: Any) -> str: + """ + Print the attribute value together with its type. + + E.g., `a=False , b=hello , c=3.14 ` + """ + type_as_str = str(type(attr_value)) + # Convert from `` to `str`. + type_as_str = hstring.remove_prefix(type_as_str, "") + # Add `<` and `>` around the type. + type_as_str = f"<{type_as_str}>" + return type_as_str + + +def _attr_to_str(attr_value: Any, print_type: bool) -> str: + """ + Print the attribute value handling different types. + """ + _LOG.debug("type(attr_value)=%s", type(attr_value)) + if isinstance(attr_value, pd.DataFrame): + res = f"pd.df({attr_value.shape}" + elif isinstance(attr_value, pd.Series): + res = f"pd.srs({attr_value.shape}" + elif isinstance(attr_value, dict): + res = str(attr_value) + else: + res = str(attr_value) + # Add the type, if needed. + if print_type: + res += " " + _type_to_str(attr_value) + return res + + +def obj_to_str( + obj: Any, + *, + attr_mode: str = "__dict__", + sort: bool = False, + print_type: bool = False, + callable_mode: str = "skip", + private_mode: str = "skip", + dunder_mode: str = "skip", + attr_names_to_skip: Optional[List[str]] = None, +) -> str: + """ + Print the attributes of an object. + + An object is printed as name of its class and its attributes, e.g., + ``` + _Object1 at 0x...=(a=False, b=hello, c=3.14) + ``` + + :param attr_mode: use `__dict__` or `dir()` + - It doesn't seem to make much difference + :sort: sort the attributes in order of name, or not + :param print_type: print the type of the attribute + :param callable_mode: how to handle attributes that are callable (i.e., + methods) + - `skip`: skip the callable methods + - `only`: print only the callable methods + - `all`: always print + :param private_mode: how to handle private attributes. Same params as + `callable_mode` + :param dunder_mode: how to handle double under attributes. Same params as + `callable_mode` + :param attr_names_to_skip: a list of attributes (e.g., private, callable, + dunder) to skip. This is used to avoid to print data that is redundant + (e.g., a cached value) + """ + ret = [] + if attr_mode == "__dict__": + # Use `__dict__` to get the attributes of the object. + values = obj.__dict__ + elif attr_mode == "dir": + # Use `dir()` to get the attributes of the object. + values = dir(obj) + elif attr_mode == "config": + # Use object method to get the attributes to print info for. + values = obj.get_config_attributes() + else: + raise ValueError(f"Invalid attr_mode='{attr_mode}'") + if sort: + values = sorted(values) + for attr_name in values: + if attr_mode == "__dict__": + attr_value = obj.__dict__[attr_name] + elif attr_mode in ["dir", "config"]: + attr_value = getattr(obj, attr_name) + else: + raise ValueError(f"Invalid attr_mode='{attr_mode}'") + skip = _to_skip_attribute( + attr_name, + attr_value, + callable_mode, + private_mode, + dunder_mode, + attr_names_to_skip, + ) + # `attr_value` can be callable object and needs to be properly handled + # for string conversion and formatting. + _LOG.debug(hprint.to_str("attr_name attr_value skip")) + if skip: + continue + # + out = f"{attr_name}=" + _attr_to_str(attr_value, print_type) + ret.append(out) + # + txt = hprint.to_object_str(obj) + "=" + txt += "(" + ", ".join(ret) + ")" + return txt + + +# ############################################################################# +# obj_to_repr +# ############################################################################# + + +def _attr_to_repr(attr_name: Any, attr_value: Any, print_type: bool) -> str: + """ + Print an object as name of its class and its attributes. + + E.g., + ``` + : + a='False' + b='hello' + c='3.14' + ``` + """ + _LOG.debug("type(attr_value)=%s", type(attr_value)) + if isinstance(attr_value, (pd.DataFrame, pd.Series)): + attr_value_as_str = hpandas.df_to_str(attr_value) + elif isinstance(attr_value, dict): + attr_value_as_str = pprint.pformat(attr_value) + else: + attr_value_as_str = repr(attr_value) + # + if len(attr_value_as_str.split("\n")) > 1: + # The string representing the attribute value spans multiple lines, so + # print like: + # ``` + # attr_name= (type) + # attr_value + # ``` + out = f"{attr_name}=" + if print_type: + out += " " + _type_to_str(attr_value) + out += "\n" + hprint.indent(attr_value_as_str) + else: + # The string representing the attribute value is a single line, so print + # like: + # ``` + # attr_name='attr_value' (type) + # ``` + out = f"{attr_name}='{str(attr_value)}'" + if print_type: + out += " " + _type_to_str(attr_value) + return out + + +# TODO(gp): Merge the code with obj_to_repr() using a switch for the different +# code. +def obj_to_repr( + obj: Any, + *, + attr_mode: str = "__dict__", + sort: bool = False, + print_type: bool = False, + callable_mode: str = "skip", + private_mode: str = "skip", + dunder_mode: str = "skip", + attr_names_to_skip: Optional[List[str]] = None, +) -> str: + """ + Same interface and behavior as `obj_to_str()`. + + Use `_attr_to_repr()` instead of a simple `attr_name = attr_value` + like in `obj_to_str()`. + """ + ret = [] + # TODO(Grisha): factor out the logic in a function `get_class_attributes(attr_mode)`. + if attr_mode == "__dict__": + values = obj.__dict__ + elif attr_mode == "dir": + values = dir(obj) + elif attr_mode == "config": + values = obj.get_config_attributes() + else: + raise ValueError(f"Invalid attr_mode='{attr_mode}'") + if sort: + values = sorted(values) + for attr_name in values: + if attr_mode == "__dict__": + attr_value = obj.__dict__[attr_name] + elif attr_mode in ["dir", "config"]: + attr_value = getattr(obj, attr_name) + else: + raise ValueError(f"Invalid attr_mode='{attr_mode}'") + skip = _to_skip_attribute( + attr_name, + attr_value, + callable_mode, + private_mode, + dunder_mode, + attr_names_to_skip, + ) + # `attr_value` can be callable object and needs to be properly handled + # for string conversion and formatting. + _LOG.debug(hprint.to_str("attr_name attr_value skip")) + if skip: + continue + # + out = _attr_to_repr(attr_name, attr_value, print_type) + ret.append(out) + # + txt = [] + txt.append(hprint.to_object_repr(obj) + ":") + txt.append(hprint.indent("\n".join(ret))) + return "\n".join(txt) + + +# ############################################################################# +# PrintableMixin +# ############################################################################# + + +class PrintableMixin: + """ + Implement `__str__()` and `__repr__()` to print the state of an object. + + These methods can be overridden with more specific methods by + derived classes. + """ + + @staticmethod + @abc.abstractmethod + def get_config_attributes() -> List[str]: + """ + Get list of attributes that are relevant to the configuration of each + block. + """ + ... + + # TODO(Grisha): decide if we need this method: what are the use-cases? + # Ideally we should just save `SystemConfig` and load it when needed. + def to_config_dict(self) -> Dict[str, Any]: + """ + Get class configuration as dict. + """ + res_dict = {} + # Get class attribute names to print. + attributes = self.get_config_attributes() + hdbg.dassert_is_subset(attributes, self.__dict__.keys()) + # Iterate over attributes and add their state to the dict. + for attr in attributes: + value = getattr(self, attr) + # Get a list of types the value class is derived from. + value_parent_classes = value.__class__.__mro__ + if any( + "helpers.hobject.PrintableMixin" in str(parent_class) + for parent_class in value_parent_classes + ): + # Call the function recursively if value is also + # a `PrintableMixin` descendant. + dict_val = value.to_config_dict() + else: + # Get attribute value representation. + dict_val = _attr_to_repr(attr, value, print_type=True) + # Put value in the result dict. + res_dict[attr] = dict_val + return res_dict + + def to_config_str(self) -> str: + """ + Get class configuration as string. + """ + ret = [] + attributes = self.get_config_attributes() + hdbg.dassert_is_subset(attributes, self.__dict__.keys()) + # Iterate over attributes and add their state to the dict. + for attr in attributes: + value = getattr(self, attr) + if isinstance(value, PrintableMixin): + # Call the function recursively if value is also + # a `PrintableMixin` descendant. + dict_val = value.to_config_str() + # Add attribute name for string representation. + dict_val = f"{attr}={dict_val}" + else: + dict_val = _attr_to_repr(attr, value, print_type=True) + # Put value in the result dict. + ret.append(dict_val) + txt = [] + txt.append(hprint.to_object_repr(self) + ":") + txt.append(hprint.indent("\n".join(ret))) + txt = "\n".join(txt) + return txt + + def __repr__( + self, + *, + attr_names_to_skip: Optional[List[str]] = None, + ) -> str: + """ + Used for debugging and development and need to be unambiguous. + """ + txt = obj_to_repr( + self, + print_type=True, + private_mode="all", + attr_names_to_skip=attr_names_to_skip, + ) + return txt + + def __str__( + self, + *, + attr_names_to_skip: Optional[List[str]] = None, + ) -> str: + """ + Used for creating output for end user and need to be readable. + """ + txt = obj_to_str( + self, + print_type=True, + private_mode="all", + attr_names_to_skip=attr_names_to_skip, + ) + return txt + + +# ############################################################################# + + +# TODO(gp): CleanUp. This is for testing and should be in hobject_test.py. +# TODO(gp): -> check_object_signature +def test_object_signature( + self_: Any, obj: Any, *, remove_lines_regex: Optional[str] = None +) -> None: + """ + Print a string representation of an object using both `str()` and `repr()`. + + :param obj: the object to print + :param remove_lines_regex: a regex to remove certain lines from the + output + """ + txt = [] + # + txt.append(hprint.frame("str:")) + txt.append(str(obj)) + # + txt.append(hprint.frame("repr:")) + txt.append(repr(obj)) + # + txt = "\n".join(txt) + # Remove certain lines, if needed. + if remove_lines_regex: + txt = hprint.filter_text(remove_lines_regex, txt) + # + self_.check_string(txt, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py new file mode 100644 index 000000000..2c6d9c729 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py @@ -0,0 +1,106 @@ +""" +Support opening a file. + +Import as: + +import helpers.hopen as hopen +""" + +# TODO(gp): -> open_file or move it to system_interaction.py + +import logging +import os +from typing import Optional + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# ############################################################################# + + +def _cmd_open_html(file_name: str, os_name: str) -> Optional[str]: + """ + Get OS-specific command to open an HTML file. + """ + # Retrieve the executable. + os_cmds = { + "Darwin": "open", + "Windows": "start", + "Linux": "xdg-open", + } + hdbg.dassert_in(os_name, os_cmds) + exec_name = os_cmds[os_name] + if not hsystem.check_exec(exec_name): + _LOG.warning( + "Can't execute the command '%s' on this platform", exec_name + ) + return None + # Build the command. + full_cmd = f"{exec_name} {file_name}" + if os_name == "Linux": + _LOG.warning( + "To open files faster launch in background '%s &'", exec_name + ) + return full_cmd + + +def _cmd_open_pdf(file_name: str, os_name: str) -> Optional[str]: + """ + Get OS-specific command to open a PDF file. + """ + os_cmds = { + "Darwin": ( + "/usr/bin/osascript << EOF\n" + f'set theFile to POSIX file "{file_name}" as alias\n' + 'tell application "Skim"\n' + "activate\n" + "set theDocs to get documents whose path is " + "(get POSIX path of theFile)\n" + "if (count of theDocs) > 0 then revert theDocs\n" + "open theFile\n" + "end tell\n" + "EOF\n" + ) + } + if os_name not in os_cmds: + _LOG.warning("Opening PDF files on '%s' is not supported yet", os_name) + full_cmd = None + else: + full_cmd = os_cmds[os_name] + return full_cmd + + +def open_file(file_name: str) -> None: + """ + Open file locally if its extension is supported. + """ + # Detect file format by the (last) extension. + # E.g., 'hello.html.txt' is considered a txt file. + extension = os.path.split(file_name)[-1].split(".")[-1] + extension = extension.lower() + # Make sure file exists. + _LOG.info( + "\n%s", + hprint.frame( + f"Opening {extension} file '{file_name}'", char1="<", char2=">" + ), + ) + hdbg.dassert_path_exists(file_name) + # Get opening command. + os_name = hsystem.get_os_name() + cmd: Optional[str] + if extension == "pdf": + cmd = _cmd_open_pdf(file_name, os_name) + elif extension == "html": + cmd = _cmd_open_html(file_name, os_name) + else: + hdbg.dfatal(f"Opening '{extension}' files is not supported yet") + # Run command. + if cmd is not None: + _LOG.info("%s", cmd) + hio.to_file("open_file_cmd.sh", cmd) + hsystem.system("source open_file_cmd.sh", suppress_output=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py new file mode 100644 index 000000000..535e7f081 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py @@ -0,0 +1,18 @@ +""" +Import as: + +import helpers.hpandas as hpandas +""" + +from helpers.hpandas_analysis import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_check_summary import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_clean import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_compare import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_conversion import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_dassert import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_display import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_io import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_multiindex import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_stats import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_transform import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_utils import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old new file mode 100644 index 000000000..5be1b281a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old @@ -0,0 +1,2684 @@ +""" +Import as: + +import helpers.hpandas as hpandas +""" + +import csv +import dataclasses +import logging +import helpers.hlogging as hlogging +import random +import re +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd + +# Handle different versions of s3fs where core module may be at different +# locations. +try: + import s3fs + + # Try to access s3fs.core to check if it exists + if hasattr(s3fs, "core"): + from s3fs.core import S3File, S3FileSystem + else: + # In newer versions, classes might be directly in s3fs module. + try: + from s3fs import S3File, S3FileSystem + except ImportError: + # Fallback to dynamic import + S3File = getattr(s3fs, "S3File", None) + S3FileSystem = getattr(s3fs, "S3FileSystem", None) +except ImportError: + # If s3fs is not available, define dummy classes for type hints. + s3fs = None + + class S3File: + pass + + class S3FileSystem: + pass + + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +# Avoid the following dependency from other `helpers` modules to prevent import +# cycles: +# import helpers.hs3 as hs3 +# import helpers.hsql as hsql +# import helpers.hunit_test as hunitest + + +_LOG = hlogging.getLogger(__name__) + +# Enable extra verbose debugging. Do not commit. +_TRACE = False + +RowsValues = List[List[str]] + + +# ############################################################################# + + +def to_series(df: pd.DataFrame, *, series_dtype: str = "float64") -> pd.Series: + """ + Convert a pd.DataFrame with a single column into a pd.Series. The problem + is that empty df or df with a single row are not converted correctly to a + pd.Series. + + :param df: dataframe with a single column to convert to a series + :param series_dtype: dtype of the desired series in case a DataFrame + is empty, otherwise inherit dtype from a DataFrame + """ + # See https://stackoverflow.com/questions/33246771 + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_eq(df.shape[1], 1, "df=%s doesn't have a single column", df) + if df.empty: + srs = pd.Series(dtype=series_dtype) + elif df.shape[0] > 1: + srs = df.squeeze() + else: + srs = pd.Series(df.iloc[0, 0], index=[df.index.values[0]]) + srs.name = df.index.name + hdbg.dassert_isinstance(srs, pd.Series) + return srs + + +def as_series(data: Union[pd.DataFrame, pd.Series]) -> pd.Series: + """ + Convert a single-column dataframe to a series or no-op if already a series. + """ + if isinstance(data, pd.Series): + return data + return to_series(data) + + +def dassert_is_days( + timedelta: pd.Timedelta, *, min_num_days: Optional[int] = None +) -> None: + hdbg.dassert( + (timedelta / pd.Timedelta(days=1)).is_integer(), + "timedelta='%s' is not an integer number of days", + timedelta, + ) + if min_num_days is not None: + hdbg.dassert_lte(1, timedelta.days) + + +# ############################################################################# + + +def _get_index(obj: Union[pd.Index, pd.DataFrame, pd.Series]) -> pd.Index: + """ + Return the index of a Pandas object. + """ + if isinstance(obj, pd.Index): + index = obj + else: + hdbg.dassert_isinstance(obj, (pd.Series, pd.DataFrame)) + index = obj.index + return index + + +# TODO(gp): Maybe for symmetry with the other functions, rename to +# dassert_datetime_index +def dassert_index_is_datetime( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the dataframe has an index containing datetimes. + + It works for both single and multi-indexed dataframes. + """ + index = _get_index(obj) + if isinstance(index, pd.MultiIndex): + # In case of multi index check that at least one level is a datetime. + is_any_datetime = any( + isinstance(level, pd.DatetimeIndex) for level in index.levels + ) + hdbg.dassert(is_any_datetime, msg, *args) + else: + hdbg.dassert_isinstance(index, pd.DatetimeIndex, msg, *args) + + +def dassert_unique_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a unique index. + """ + index = _get_index(obj) + if not index.is_unique: + dup_indices = index.duplicated(keep=False) + df_dup = obj[dup_indices] + dup_msg = f"Duplicated rows are:\n{df_to_str(df_dup)}\n" + if msg is None: + msg = dup_msg + else: + msg = dup_msg + msg + hdbg.dassert(index.is_unique, msg=msg, *args) + + +# TODO(gp): @all Add unit tests. +def dassert_increasing_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has an increasing index. + """ + index = _get_index(obj) + if not index.is_monotonic_increasing: + # Print information about the problematic indices like: + # ``` + # Not increasing indices are: + # full_symbol open high + # timestamp + # 2018-08-17 01:39:00+00:00 binance::BTC_USDT 6339.250000 6348.910000 + # 2018-08-17 00:01:00+00:00 kucoin::ETH_USDT 286.712987 286.712987 + # ``` + # Find the problematic indices. + mask = np.diff(index) <= pd.Timedelta(seconds=0) + mask = np.insert(mask, 0, False) + # TODO(gp): We might want to specify an integer with how many rows before + # after we want to show. + # Shift back to get the previous index that was creating the issue. + mask_shift = np.empty_like(mask) + mask_shift[: len(mask) - 1] = mask[1 : len(mask)] + mask_shift[len(mask) - 1] = False + # + mask = mask | mask_shift + dup_msg = f"Not increasing indices are:\n{df_to_str(obj[mask])}\n" + if msg is None: + msg = dup_msg + else: + msg = dup_msg + msg + # Dump the data to file for further inspection. + # obj.to_csv("index.csv") + hdbg.dassert(index.is_monotonic_increasing, msg=msg, *args) + + +# TODO(gp): @all Add more info in case of failures and unit tests. +def dassert_strictly_increasing_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a strictly increasing index. + """ + dassert_unique_index(obj, msg, *args) + dassert_increasing_index(obj, msg, *args) + + +# TODO(gp): Not sure it's used or useful? +def dassert_monotonic_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a monotonic (i.e., strictly increasing or + decreasing index). + """ + dassert_unique_index(obj, msg, *args) + index = _get_index(obj) + cond = index.is_monotonic_increasing or index.is_monotonic_decreasing + hdbg.dassert(cond, msg=msg, *args) + + +# TODO(Paul): @gp -> dassert_datetime_indexed_df +def dassert_time_indexed_df( + df: pd.DataFrame, allow_empty: bool, strictly_increasing: bool +) -> None: + """ + Validate that input dataframe is time indexed and well-formed. + + It works for both single and multi-indexed dataframes. + + :param df: dataframe to validate + :param allow_empty: allow empty data frames + :param strictly_increasing: if True the index needs to be strictly + increasing, instead of just increasing + """ + # Verify that Pandas dataframe is passed as input. + hdbg.dassert_isinstance(df, pd.DataFrame) + if not allow_empty: + # Verify that a non-empty dataframe is passed as input. + hdbg.dassert_lt(0, df.shape[0]) + # Verify that the dataframe has at least 1 column. + hdbg.dassert_lte(1, len(df.columns)) + # Verify that the index is increasing. + if strictly_increasing: + dassert_strictly_increasing_index(df) + else: + dassert_increasing_index(df) + # Check that the index is in datetime format. + dassert_index_is_datetime(df) + # Check that the passed timestamp has timezone info. + index_item = df.index[0] + if isinstance(index_item, tuple): + # In case of multi index assume that the first level is a datetime. + index_item = index_item[0] + hdateti.dassert_has_tz(index_item) + + +def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None: + """ + Ensure that remapping rows / columns is valid. + """ + hdbg.dassert_isinstance(to_remap, list) + hdbg.dassert_isinstance(remap_dict, dict) + # All the rows / columns to remap, should exist. + hdbg.dassert_is_subset( + remap_dict.keys(), + to_remap, + "Keys to remap should be a subset of existing columns", + ) + # The mapping is invertible. + hdbg.dassert_no_duplicates(remap_dict.keys()) + hdbg.dassert_no_duplicates(remap_dict.values()) + # Rows / columns should not be remapped on existing rows / columns. + hdbg.dassert_not_intersection(remap_dict.values(), to_remap) + + +def dassert_series_type_is( + srs: pd.Series, + type_: type, + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the data type of `srs` is `type_`. + + Examples of valid series types are + - np.float64 + - np.int64 + - pd.Timestamp + """ + hdbg.dassert_isinstance(srs, pd.Series) + hdbg.dassert_isinstance(type_, type) + hdbg.dassert_eq(srs.dtype.type, type_, msg, *args) + + +def dassert_series_type_in( + srs: pd.Series, + types: List[type], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the data type of `srs` is one of the types in `types`. + """ + hdbg.dassert_isinstance(srs, pd.Series) + hdbg.dassert_container_type(types, list, type) + hdbg.dassert_in(srs.dtype.type, types, msg, *args) + + +def dassert_indices_equal( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + allow_series: bool = False, + only_warning: bool = False, +) -> None: + """ + Ensure that `df1` and `df2` share a common index. + + Print the symmetric difference of indices if equality does not hold. + """ + if allow_series: + if isinstance(df1, pd.Series): + df1 = df1.to_frame() + if isinstance(df2, pd.Series): + df2 = df2.to_frame() + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert( + df1.index.equals(df2.index), + "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", + df1.index.difference(df2.index), + df2.index.difference(df1.index), + only_warning=only_warning, + ) + + +def dassert_columns_equal( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + sort_cols: bool = False, + only_warning: bool = False, +) -> None: + """ + Ensure that `df1` and `df2` have the same columns. + + Print the symmetric difference of columns if equality does not hold. + """ + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + if sort_cols: + _LOG.debug("Sorting dataframe columns.") + df1 = df1.sort_index(axis=1) + df2 = df2.sort_index(axis=1) + hdbg.dassert( + df1.columns.equals(df2.columns), + "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", + df1.columns.difference(df2.columns), + df2.columns.difference(df1.columns), + only_warning=only_warning, + ) + + +def dassert_axes_equal( + df1: pd.DataFrame, df2: pd.DataFrame, *, sort_cols: bool = False +) -> None: + """ + Ensure that `df1` and `df2` have the same index and same columns. + """ + dassert_indices_equal(df1, df2) + dassert_columns_equal(df1, df2, sort_cols=sort_cols) + + +# TODO(Grisha): instead of passing `rtol` and `atol` use `**allclose_kwargs: Dict[str, Any]`. +def dassert_approx_eq( + val1: Any, + val2: Any, + rtol: float = 1e-05, + atol: float = 1e-08, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # Approximate comparison is not applicable for strings. + hdbg.dassert_is_not(type(val1), str) + hdbg.dassert_is_not(type(val2), str) + # Convert iterable inputs to list in order to comply with numpy. + if isinstance(val1, Iterable): + val1 = list(val1) + if isinstance(val2, Iterable): + val2 = list(val2) + cond = np.allclose( + np.array(val1), np.array(val2), rtol=rtol, atol=atol, equal_nan=True + ) + if not cond: + txt = f"'{val1}'\n==\n'{val2}' rtol={rtol}, atol={atol}" + hdbg._dfatal(txt, msg, *args, only_warning=only_warning) # type: ignore + + +# ############################################################################# + + +def resample_index(index: pd.DatetimeIndex, frequency: str) -> pd.DatetimeIndex: + """ + Resample `DatetimeIndex`. + + :param index: `DatetimeIndex` to resample + :param frequency: frequency from `pd.date_range()` to resample to + :return: resampled `DatetimeIndex` + """ + _LOG.debug(hprint.to_str("index frequency")) + hdbg.dassert_isinstance(index, pd.DatetimeIndex) + dassert_unique_index(index, msg="Index must have only unique values") + min_date = index.min() + max_date = index.max() + _LOG.debug("min_date=%s max_date=%s", min_date, max_date) + # TODO(gp): Preserve the index name. + # index_name = index.name + resampled_index = pd.date_range( + start=min_date, + end=max_date, + freq=frequency, + ) + # Enable detailed debugging. + if False: + if len(resampled_index) > len(index): + # Downsample. + _LOG.debug( + "Index length increased by %s = %s - %s", + len(resampled_index) - len(index), + len(resampled_index), + len(index), + ) + elif len(resampled_index) < len(index): + # Upsample. + _LOG.debug( + "Index length decreased by %s = %s - %s", + len(index) - len(resampled_index), + len(index), + len(resampled_index), + ) + else: + _LOG.debug("Index length=%s has not changed", len(index)) + # resampled_index.name = index_name + return resampled_index + + +def resample_df(df: pd.DataFrame, frequency: str) -> pd.DataFrame: + """ + Resample `DataFrame` by placing NaN in missing locations in the index. + + :param df: `DataFrame` to resample + :param frequency: frequency from `pd.date_range()` to resample to + :return: resampled `DataFrame` + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + # Preserve the index name. + index_name = df.index.name + resampled_index = resample_index(df.index, frequency) + df_reindex = df.reindex(resampled_index) + df_reindex.index.name = index_name + return df_reindex + + +def find_gaps_in_dataframes( + df1: pd.DataFrame, df2: pd.DataFrame +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Find data present in one dataframe and missing in the other one. + + :param df1: first dataframe for comparison + :param df2: second dataframe for comparison + :return: two dataframes with missing data + """ + # Get data present in first, but not present in second dataframe. + first_missing_indices = df2.index.difference(df1.index) + first_missing_data = df2.loc[first_missing_indices] + # Get data present in second, but not present in first dataframe. + second_missing_indices = df1.index.difference(df2.index) + second_missing_data = df1.loc[second_missing_indices] + return first_missing_data, second_missing_data + + +# TODO(Grisha): use this idiom everywhere in the codebase, e.g., in `compare_dfs()`. +def apply_index_mode( + df1: pd.DataFrame, + df2: pd.DataFrame, + mode: str, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Process DataFrames according to the index mode. + + :param df1: first input df + :param df2: second input df + :param mode: method of processing indices + - "assert_equal": check that both indices are equal, assert otherwise + - "intersect": restrict both dfs to a common index + - "leave_unchanged": ignore any indices mismatch and return dfs as-is + :return: transformed copy of the inputs + """ + _LOG.debug("mode=%s", mode) + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert_isinstance(mode, str) + # Copy in order not to modify the inputs. + df1_copy = df1.copy() + df2_copy = df2.copy() + if mode == "assert_equal": + dassert_indices_equal(df1_copy, df2_copy) + elif mode == "intersect": + # TODO(Grisha): Add sorting on demand. + common_index = df1_copy.index.intersection(df2_copy.index) + df1_copy = df1_copy[df1_copy.index.isin(common_index)] + df2_copy = df2_copy[df2_copy.index.isin(common_index)] + elif mode == "leave_unchanged": + _LOG.debug( + "Ignoring any index missmatch as per user's request.\n" + "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", + df1_copy.index.difference(df2_copy.index), + df2_copy.index.difference(df1_copy.index), + ) + else: + raise ValueError(f"Unsupported index_mode={mode}") + return df1_copy, df2_copy + + +def apply_columns_mode( + df1: pd.DataFrame, + df2: pd.DataFrame, + mode: str, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Process DataFrames according to the column mode. + + :param df1: first input df + :param df2: second input df + :param mode: method of processing columns + - "assert_equal": check that both dfs have equal columns, assert otherwise + - "intersect": restrict both dfs to only include common columns + - "leave_unchanged": ignore any column mismatches and return dfs as-is + :return: transformed copy of the inputs + """ + _LOG.debug("mode=%s", mode) + # Input validation. + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert_isinstance(mode, str) + # Copy in order not to modify the inputs. + df1_copy = df1.copy() + df2_copy = df2.copy() + if mode == "assert_equal": + # Check if columns are equal or not. + dassert_columns_equal(df1_copy, df2_copy) + elif mode == "intersect": + # Filter dataframes based on its common columns. + common_columns = df1_copy.columns.intersection(df2_copy.columns) + df1_copy = df1_copy[common_columns] + df2_copy = df2_copy[common_columns] + # Log the string representation of 2 dfs. + _LOG.debug("df1 after filtering=\n%s", df_to_str(df1)) + _LOG.debug("df2 after filtering=\n%s", df_to_str(df2)) + elif mode == "leave_unchanged": + # Ignore mismatch. + _LOG.debug( + "Ignoring any column missmatch as per user's request.\n" + "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", + df1.columns.difference(df2.columns), + df2.columns.difference(df1.columns), + ) + else: + raise ValueError(f"Unsupported column mode: {mode}") + return df1_copy, df2_copy + + +def find_gaps_in_time_series( + time_series: pd.Series, + start_timestamp: pd.Timestamp, + end_timestamp: pd.Timestamp, + freq: str, +) -> pd.Series: + """ + Find missing points on a time interval specified by [start_timestamp, + end_timestamp], where point distribution is determined by . + + If the passed time series is of a unix epoch format. It is + automatically tranformed to pd.Timestamp. + + :param time_series: time series to find gaps in + :param start_timestamp: start of the time interval to check + :param end_timestamp: end of the time interval to check + :param freq: distance between two data points on the interval. + Aliases correspond to pandas.date_range's freq parameter, i.e. + "S" -> second, "T" -> minute. + :return: pd.Series representing missing points in the source time + series. + """ + _time_series = time_series + if str(time_series.dtype) in ["int32", "int64"]: + _time_series = _time_series.map(hdateti.convert_unix_epoch_to_timestamp) + correct_time_series = pd.date_range( + start=start_timestamp, end=end_timestamp, freq=freq + ) + return correct_time_series.difference(_time_series) + + +def check_and_filter_matching_columns( + df: pd.DataFrame, required_columns: List[str], filter_data_mode: str +) -> pd.DataFrame: + """ + Check that columns are the required ones and if not filter data depending + on `filter_data_mode`. + + :param df: data to check columns for + :param required_columns: columns to return, skipping columns that are not required + :param filter_data_mode: control behaviour with respect to extra or missing columns + - "assert": raise an error if required columns do not match received columns + - "warn_and_trim": return the intersection of required and received columns and + issue a warning + :return: input data as it is if required columns match received columns otherwise + processed data, see `filter_data_mode` + """ + received_columns = df.columns.to_list() + hdbg.dassert_lte(1, len(received_columns)) + # + if filter_data_mode == "assert": + # Raise an assertion. + only_warning = False + elif filter_data_mode == "warn_and_trim": + # Just issue a warning. + only_warning = True + # Get columns intersection while preserving the order of the columns. + columns_intersection = [ + col_name + for col_name in required_columns + if col_name in received_columns + ] + hdbg.dassert_lte(1, len(columns_intersection)) + df = df[columns_intersection] + else: + raise ValueError(f"Invalid filter_data_mode='{filter_data_mode}'") + hdbg.dassert_set_eq( + required_columns, + received_columns, + only_warning=only_warning, + msg="Received columns do not match required columns.", + ) + return df + + +def compare_dataframe_rows(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: + """ + Compare contents of rows with same indices. + + Index is set to default sequential integer values because compare is + sensitive to multi index (probably because new multi indexes are created + for each difference in `compare`). Multi index columns are regular columns now. + Excess columns are removed so both dataframes are always same shape because + `compare` expects identical dataframes (same number of rows, columns, etc.). + + :param df1: first dataframe for comparison + :param df2: second dataframe for comparison + :return: dataframe with data with same indices and different contents + """ + # Get rows on which the two dataframe indices match. + idx_intersection = df1.index.intersection(df2.index) + # Remove excess columns and reset indexes. + trimmed_second = df2.loc[idx_intersection].reset_index() + trimmed_first = df1.loc[idx_intersection].reset_index() + # Get difference between second and first dataframe. + data_difference = trimmed_second.compare(trimmed_first) + # Update data difference with original dataframe index names + # for easier identification. + index_names = tuple(df2.index.names) + # If index or multi index is named, it will be visible in data difference. + if index_names != (None,): + for index in data_difference.index: + for column in index_names: + data_difference.loc[index, column] = trimmed_second.loc[index][ + column + ] + data_difference = data_difference.convert_dtypes() + return data_difference + + +def drop_duplicates( + data: Union[pd.Series, pd.DataFrame], + use_index: bool, + column_subset: Optional[List[str]] = None, + *args: Any, + **kwargs: Any, +) -> Union[pd.Series, pd.DataFrame]: + """ + Wrap `pandas.drop_duplicates()`. + + See the official docs: + - https://pandas.pydata.org/docs/reference/api/pandas.Series.drop_duplicates.html + - https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html + + :param use_index: + - if `True`, use index values together with a column subset for + identifying duplicates + - if `False`, duplicated rows are with the exact same values in a subset + and different indices + :param column_subset: a list of columns to consider for identifying duplicates + :return: data without duplicates + """ + _LOG.debug(hprint.to_str("use_index column_subset args kwargs")) + num_rows_before = data.shape[0] + # Get all columns list for subset if no subset is passed. + if column_subset is None: + column_subset = data.columns.tolist() + else: + hdbg.dassert_lte(1, len(column_subset), "Columns subset cannot be empty") + if use_index: + # Add dummy index column to use it for duplicates detection. + index_col_name = "use_index_col" + hdbg.dassert_not_in(index_col_name, data.columns.tolist()) + column_subset.insert(0, index_col_name) + data[index_col_name] = data.index + # + data_no_dups = data.drop_duplicates(subset=column_subset, *args, **kwargs) + # + if use_index: + # Remove dummy index column. + data_no_dups = data_no_dups.drop([index_col_name], axis=1) + # Report the change. + num_rows_after = data_no_dups.shape[0] + if num_rows_before != num_rows_after: + _LOG.debug( + "Removed %s rows", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + return data_no_dups + + +def dropna( + df: pd.DataFrame, + *args: Any, + drop_infs: bool = False, + report_stats: bool = False, + **kwargs: Any, +) -> pd.DataFrame: + """ + Create a wrapper around pd.dropna() reporting information about the removed + rows. + + :param df: dataframe to process + :param drop_infs: if +/- np.inf should be considered as nans + :param report_stats: if processing stats should be reported + :return: dataframe with nans dropped + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + num_rows_before = df.shape[0] + if drop_infs: + df = df.replace([np.inf, -np.inf], np.nan) + df = df.dropna(*args, **kwargs) + if report_stats: + num_rows_after = df.shape[0] + pct_removed = hprint.perc( + num_rows_before - num_rows_after, num_rows_before + ) + _LOG.info("removed rows with nans: %s", pct_removed) + return df + + +def drop_axis_with_all_nans( + df: pd.DataFrame, + drop_rows: bool = True, + drop_columns: bool = False, + drop_infs: bool = False, + report_stats: bool = False, +) -> pd.DataFrame: + """ + Remove columns and rows not containing information (e.g., with only nans). + + The operation is not performed in place and the resulting df is + returned. Assume that the index is timestamps. + + :param df: dataframe to process + :param drop_rows: remove rows with only nans + :param drop_columns: remove columns with only nans + :param drop_infs: remove also +/- np.inf + :param report_stats: report the stats of the operations + :return: dataframe with specific nan axis dropped + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + if drop_infs: + df = df.replace([np.inf, -np.inf], np.nan) + if drop_columns: + # Remove columns with all nans, if any. + cols_before = df.columns[:] + df = df.dropna(axis=1, how="all") + if report_stats: + # Report results. + cols_after = df.columns[:] + removed_cols = set(cols_before).difference(set(cols_after)) + pct_removed = hprint.perc( + len(cols_before) - len(cols_after), len(cols_after) + ) + _LOG.info( + "removed cols with all nans: %s %s", + pct_removed, + hprint.list_to_str(removed_cols), + ) + if drop_rows: + # Remove rows with all nans, if any. + rows_before = df.index[:] + df = df.dropna(axis=0, how="all") + if report_stats: + # Report results. + rows_after = df.index[:] + removed_rows = set(rows_before).difference(set(rows_after)) + if len(rows_before) == len(rows_after): + # Nothing was removed. + min_ts = max_ts = None + else: + # TODO(gp): Report as intervals of dates. + min_ts = min(removed_rows) + max_ts = max(removed_rows) + pct_removed = hprint.perc( + len(rows_before) - len(rows_after), len(rows_after) + ) + _LOG.info( + "removed rows with all nans: %s [%s, %s]", + pct_removed, + min_ts, + max_ts, + ) + return df + + +def reindex_on_unix_epoch( + df: pd.DataFrame, in_col_name: str, unit: str = "s" +) -> pd.DataFrame: + """ + Transform the column `in_col_name` into a datetime index. `in_col_name` + contains Unix epoch (e.g., 1638194400) and it is converted into a UTC time. + + :param df: dataframe with a unix epoch + :param in_col_name: column containing unix epoch + :param unit: the unit of unix epoch + """ + # Convert. + temp_col_name = in_col_name + "_tmp" + hdbg.dassert_in(in_col_name, df.columns) + hdbg.dassert_not_in(temp_col_name, df.columns) + # Save. + df[temp_col_name] = pd.to_datetime(df[in_col_name], unit=unit, utc=True) + df.set_index(temp_col_name, inplace=True, drop=True) + df.index.name = None + return df + + +def get_df_signature(df: pd.DataFrame, num_rows: int = 6) -> str: + """ + Compute a simple signature of a dataframe in string format. + + The signature contains metadata about dataframe size and certain + amount of rows from start and end of a dataframe. It is used for + testing purposes. + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + text: List[str] = [f"df.shape={str(df.shape)}"] + with pd.option_context( + "display.max_colwidth", int(1e6), "display.max_columns", None + ): + # If dataframe size exceeds number of rows, show only subset in form of + # first and last rows. Otherwise, whole dataframe is shown. + if len(df) > num_rows: + text.append(f"df.head=\n{df.head(num_rows // 2)}") + text.append(f"df.tail=\n{df.tail(num_rows // 2)}") + else: + text.append(f"df.full=\n{df}") + text: str = "\n".join(text) + return text + + +# ############################################################################# + + +def trim_df( + df: pd.DataFrame, + ts_col_name: Optional[str], + start_ts: Optional[pd.Timestamp], + end_ts: Optional[pd.Timestamp], + left_close: bool, + right_close: bool, +) -> pd.DataFrame: + """ + Trim the dataframe using values in `ts_col_name`. + + The dataframe is trimmed in the interval bounded by `start_ts` and `end_ts`. + + :param df: the dataframe to trim + :param ts_col_name: the name of the column; `None` means index + :param start_ts: the start boundary for trimming + :param end_ts: the end boundary for trimming + :param left_close: whether to include the start boundary of the interval + - True: [start_ts, ... + - False: (start_ts, ... + :param right_close: whether to include the end boundary of the interval + - True: ..., end_ts] + - False: ..., end_ts) + :return: the trimmed dataframe + """ + if _TRACE: + _LOG.trace( + df_to_str(df, print_dtypes=True, print_shape_info=True, tag="df") + ) + _LOG.debug( + hprint.to_str("ts_col_name start_ts end_ts left_close right_close") + ) + if _TRACE: + _LOG.trace("df=\n%s", df_to_str(df)) + if df.empty: + # If the df is empty, there is nothing to trim. + return df + if start_ts is None and end_ts is None: + # If no boundaries are specified, there are no points of reference to trim + # to. + return df + num_rows_before = df.shape[0] + if start_ts is not None and end_ts is not None: + # Confirm that the interval boundaries are valid. + hdateti.dassert_tz_compatible(start_ts, end_ts) + hdbg.dassert_lte(start_ts, end_ts) + # Get the values to filter by. + if ts_col_name is None: + values_to_filter_by = pd.Series(df.index, index=df.index) + else: + hdbg.dassert_in(ts_col_name, df.columns) + values_to_filter_by = df[ts_col_name] + if values_to_filter_by.is_monotonic_increasing: + _LOG.trace("df is monotonic") + # The values are sorted; using the `pd.Series.searchsorted()` method. + # Find the index corresponding to the left boundary of the interval. + if start_ts is not None: + side = "left" if left_close else "right" + left_idx = values_to_filter_by.searchsorted(start_ts, side) + else: + # There is nothing to filter, so the left index is the first one. + left_idx = 0 + _LOG.debug(hprint.to_str("start_ts left_idx")) + # Find the index corresponding to the right boundary of the interval. + if end_ts is not None: + side = "right" if right_close else "left" + right_idx = values_to_filter_by.searchsorted(end_ts, side) + else: + # There is nothing to filter, so the right index is None. + right_idx = df.shape[0] + _LOG.debug(hprint.to_str("end_ts right_idx")) + # + hdbg.dassert_lte(0, left_idx) + hdbg.dassert_lte(left_idx, right_idx) + hdbg.dassert_lte(right_idx, df.shape[0]) + _LOG.debug(hprint.to_str("start_ts left_idx")) + if right_idx < df.shape[0]: + _LOG.debug(hprint.to_str("end_ts right_idx")) + df = df.iloc[left_idx:right_idx] + else: + _LOG.trace("df is not monotonic") + # The values are not sorted; using the `pd.Series.between` method. + if left_close and right_close: + inclusive = "both" + elif left_close: + inclusive = "left" + elif right_close: + inclusive = "right" + else: + inclusive = "neither" + epsilon = pd.DateOffset(minutes=1) + if start_ts is None: + start_ts = values_to_filter_by.min() - epsilon + if end_ts is None: + end_ts = values_to_filter_by.max() + epsilon + df = df[ + values_to_filter_by.between(start_ts, end_ts, inclusive=inclusive) + ] + # Report the changes. + num_rows_after = df.shape[0] + if num_rows_before != num_rows_after: + _LOG.debug( + "Removed %s rows", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + return df + + +# TODO(Nina): Add `filter_data_mode`. +def merge_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + threshold_col_name: str, + *, + threshold: float = 0.9, + intersecting_columns: Optional[List[str]] = None, + **pd_merge_kwargs: Any, +) -> pd.DataFrame: + """ + Wrap `pd.merge`. + + :param threshold_col_name: a column's name to check the minimum + overlap on + :param threshold: minimum overlap of unique values in a specified + column to perform the merge + :param intersecting_columns: allow certain columns to appear in both + dataframes; store both in the resulting df with corresponding + suffixes + """ + _LOG.debug( + hprint.to_str( + "threshold_col_name threshold intersecting_columns pd_merge_kwargs" + ) + ) + # Sanity check column types. + threshold_col1 = df1[threshold_col_name] + threshold_col2 = df2[threshold_col_name] + only_first_elem = False + hdbg.dassert_array_has_same_type_element( + threshold_col1, threshold_col2, only_first_elem + ) + # TODO(Grisha): @Dan Implement asserts for each asset id. + # Check that an overlap of unique values is above the specified threshold. + threshold_unique_values1 = set(threshold_col1) + threshold_unique_values2 = set(threshold_col2) + threshold_common_values = set(threshold_unique_values1) & set( + threshold_unique_values2 + ) + threshold_common_values_share1 = len(threshold_common_values) / len( + threshold_unique_values1 + ) + threshold_common_values_share2 = len(threshold_common_values) / len( + threshold_unique_values2 + ) + hdbg.dassert_lte(threshold, threshold_common_values_share1) + hdbg.dassert_lte(threshold, threshold_common_values_share2) + # Use an empty set instead of None to perform set difference further. + intersecting_columns_set = ( + set() if intersecting_columns is None else set(intersecting_columns) + ) + # Check that there are no common columns except for the ones in `intersecting_columns`. + df1_cols = ( + set(df1.columns.to_list()) + - set(pd_merge_kwargs["on"]) + - intersecting_columns_set + ) + df2_cols = ( + set(df2.columns.to_list()) + - set(pd_merge_kwargs["on"]) + - intersecting_columns_set + ) + hdbg.dassert_not_intersection(df1_cols, df2_cols) + # + res_df = df1.merge(df2, **pd_merge_kwargs) + return res_df + + +# TODO(gp): Is this (ironically) a duplicate of drop_duplicates? +def drop_duplicated( + df: pd.DataFrame, *, subset: Optional[List[str]] = None +) -> pd.DataFrame: + """ + Implement `df.duplicated` but considering also the index and ignoring nans. + """ + _LOG.debug("before df=\n%s", df_to_str(df)) + # Move the index to the df. + old_index_name = df.index.name + new_index_name = "_index.tmp" + hdbg.dassert_not_in(new_index_name, df.columns) + df.index.name = new_index_name + df.reset_index(drop=False, inplace=True) + # Remove duplicates by ignoring nans. + if subset is not None: + hdbg.dassert_isinstance(subset, list) + subset = [new_index_name] + subset + duplicated = df.fillna(0.0).duplicated(subset=subset, keep="first") + # Report the result of the operation. + if duplicated.sum() > 0: + num_rows_before = df.shape[0] + _LOG.debug("Removing duplicates df=\n%s", df_to_str(df.loc[duplicated])) + df = df.loc[~duplicated] + num_rows_after = df.shape[0] + _LOG.warning( + "Removed repeated rows num_rows=%s", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + _LOG.debug("after removing duplicates df=\n%s", df_to_str(df)) + # Set the index back. + df.set_index(new_index_name, inplace=True) + df.index.name = old_index_name + _LOG.debug("after df=\n%s", df_to_str(df)) + return df + + +# ############################################################################# + + +def infer_column_types(col: pd.Series): + """ + Determine which data type is most prevalent in a column. + + Examine the values in the given pandas Series and decides whether the + majority of entries are strings, numeric values, or booleans. + + :param col: The column to inspect. + :return: One of `"is_string"`, `"is_numeric"`, or `"is_bool"`, representing + the predominant type. + """ + vals = { + "is_numeric": pd.to_numeric(col, errors="coerce").notna(), + #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), + "is_bool": col.map(lambda x: isinstance(x, bool)), + "is_string": col.map(lambda x: isinstance(x, str)), + } + vals = {k: float(v.mean()) for k, v in vals.items()} + # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", + # (vals["is_numeric"] >= vals["is_string"], "is_numeric", + # "is_string")) + if vals["is_bool"] >= vals["is_numeric"] and (vals["is_bool"] != 0): + type_ = "is_bool" + elif vals["is_numeric"] >= vals["is_string"] and (vals["is_numeric"] != 0): + type_ = "is_numeric" + else: + type_ = "is_string" + vals["type"] = type_ + return vals + + +def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Identify the predominant data type for each column in a DataFrame. + + :param df: The DataFrame whose columns will be analyzed. + :return: A DataFrame with two columns: + - `column`: the name of each original column. + - `predominant_type`: the most frequent type in that column, + one of `"string"`, `"numeric"`, or `"bool"`. + """ + return df.apply(lambda x: pd.Series(infer_column_types(x))).T + + +def convert_to_type(col: pd.Series, type_: str) -> pd.Series: + """ + Convert a pandas Series to a specified data type. + + :param col: The input column to be converted. + :param type_: The target data type. Expected values include: + - `"is_bool"`: convert values to booleans. + - `"is_int"`: convert values to integers. + - `"is_numeric"`: convert values to float. + - `"is_string"`: convert values to strings. + :return: A new Series with the same index as `col`, cast to the requested + type. + """ + if type_ == "is_bool": + return col.map( + lambda x: ( + True + if x in ["True", 1, "1", "true", True] + else False + if x in [0, "0", "False", False, "false"] + else None + ) + ) + elif type_ == "is_int": + return pd.to_numeric(col, errors="coerce", downcast="integer") + elif type_ == "is_numeric": + return pd.to_numeric(col, errors="coerce") + elif type_ == "is_string": + return col.astype(str) + else: + raise ValueError(f"Unknown column type: {type_}") + + +def convert_col_to_int( + df: pd.DataFrame, + col: str, +) -> pd.DataFrame: + """ + Convert a column to an integer column. + + Example use case: Parquet uses categoricals. If supplied with a + categorical-type column, this function will convert it to an integer + column. + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(col, str) + hdbg.dassert_in(col, df.columns) + # Attempt the conversion. + df[col] = df[col].astype("int64") + # Trust, but verify. + dassert_series_type_is(df[col], np.int64) + return df + + +def cast_series_to_type( + series: pd.Series, series_type: Optional[type] +) -> pd.Series: + """ + Convert a Pandas series to a given type. + + :param series: the input series + :param series_type: the type to convert the series into + - if None, then the series values are turned into Nones + :return: the series in the required type + """ + if series_type is None: + # Turn the series values into None. + series[:] = None + elif series_type is pd.Timestamp: + # Convert to timestamp. + series = pd.to_datetime(series) + elif series_type is dict: + # Convert to dict. + series = series.apply(eval) + else: + # Convert to the specified type. + series = series.astype(series_type) + return series + + +def _display(log_level: int, df: pd.DataFrame) -> None: + """ + Display a df in a notebook at the given log level. + + The behavior is similar to a command like `_LOG.log(log_level, ...)` but + for a notebook `display` command. + + :param log_level: log level at which to display a df. E.g., if `log_level = + logging.DEBUG`, then we display the df only if we are running with + `-v DEBUG`. If `log_level = logging.INFO` then we don't display it + """ + from IPython.display import display + + if ( + hsystem.is_running_in_ipynb() + and log_level >= hdbg.get_logger_verbosity() + ): + display(df) + + +def _df_to_str( + df: pd.DataFrame, + num_rows: Optional[int], + max_columns: int, + max_colwidth: int, + max_rows: int, + precision: int, + display_width: int, + use_tabulate: bool, + log_level: int, +) -> str: + is_in_ipynb = hsystem.is_running_in_ipynb() + out = [] + # Set dataframe print options. + with pd.option_context( + "display.max_colwidth", + max_colwidth, + # "display.height", 1000, + "display.max_rows", + max_rows, + "display.precision", + precision, + "display.max_columns", + max_columns, + "display.width", + display_width, + ): + if use_tabulate: + import tabulate + + out.append(tabulate.tabulate(df, headers="keys", tablefmt="psql")) + # TODO(Grisha): Add an option to display all rows since if `num_rows` + # is `None`, only first and last 5 rows are displayed. Consider using + # `df.to_string()` instead of `str(df)`. + if num_rows is None or df.shape[0] <= num_rows: + # Print the entire data frame. + if not is_in_ipynb: + out.append(str(df)) + else: + # Display dataframe. + _display(log_level, df) + else: + nr = num_rows // 2 + if not is_in_ipynb: + # Print top and bottom of df. + out.append(str(df.head(nr))) + out.append("...") + tail_str = str(df.tail(nr)) + # Remove index and columns from tail_df. + skipped_rows = 1 + if df.index.name: + skipped_rows += 1 + tail_str = "\n".join(tail_str.split("\n")[skipped_rows:]) + out.append(tail_str) + else: + # TODO(gp): @all use this approach also above and update all the + # unit tests. + df = [ + df.head(nr), + pd.DataFrame( + [["..."] * df.shape[1]], index=[" "], columns=df.columns + ), + df.tail(nr), + ] + df = pd.concat(df) + # Display dataframe. + _display(log_level, df) + if not is_in_ipynb: + txt = "\n".join(out) + else: + txt = "" + return txt + + +# TODO(gp): Maybe we can have a `_LOG_df_to_str(log_level, *args, **kwargs)` that +# calls `_LOG.log(log_level, hpandas.df_to_str(*args, **kwargs, log_level=log_level))`. +# TODO(gp): We should make sure this works properly in a notebook, although +# it's not easy to unit test. +def df_to_str( + df: Union[pd.DataFrame, pd.Series, pd.Index], + *, + # TODO(gp): Remove this hack in the integration. + # handle_signed_zeros: bool = False, + handle_signed_zeros: bool = True, + num_rows: Optional[int] = 6, + print_dtypes: bool = False, + print_shape_info: bool = False, + print_nan_info: bool = False, + print_memory_usage: bool = False, + memory_usage_mode: str = "human_readable", + tag: Optional[str] = None, + max_columns: int = 10000, + max_colwidth: int = 2000, + max_rows: int = 500, + precision: int = 6, + display_width: int = 10000, + use_tabulate: bool = False, + log_level: int = logging.DEBUG, +) -> str: + """ + Print a dataframe to string reporting all the columns without trimming. + + Note that code like: `_LOG.info(hpandas.df_to_str(df, num_rows=3))` works + properly when called from outside a notebook, i.e., the dataframe is printed + But it won't display the dataframe in a notebook, since the default level at + which the dataframe is displayed is `logging.DEBUG`. + + In this case to get the correct behavior one should do: + + ``` + log_level = ... + _LOG.log(log_level, hpandas.df_to_str(df, num_rows=3, log_level=log_level)) + ``` + + :param: handle_signed_zeros: convert `-0.0` to `0.0` + :param: num_rows: max number of rows to print (half from the top and half from + the bottom of the dataframe) + - `None` to print the entire dataframe + :param print_dtypes: report dataframe types and information about the type of + each column by looking at the first value + :param print_shape_info: report dataframe shape, index and columns + :param print_memory_usage: report memory use for each + """ + if df is None: + return "" + if isinstance(df, pd.Series): + df = pd.DataFrame(df) + elif isinstance(df, pd.Index): + df = df.to_frame(index=False) + hdbg.dassert_isinstance(df, pd.DataFrame) + # For some reason there are so-called "negative zeros", but we consider + # them equal to `0.0`. + df = df.copy() + if handle_signed_zeros: + for col_name in df.select_dtypes(include=[np.float64, float]).columns: + df[col_name] = df[col_name].where(df[col_name] != -0.0, 0.0) + out = [] + # Print the tag. + if tag is not None: + out.append(f"# {tag}=") + if not df.empty: + # Print information about the shape and index. + # TODO(Nikola): Revisit and rename print_shape_info to print_axes_info + if print_shape_info: + # TODO(gp): Unfortunately we can't improve this part of the output + # since there are many golden inside the code that would need to be + # updated. Consider automating updating the expected values in the code. + txt = f"index=[{df.index.min()}, {df.index.max()}]" + out.append(txt) + txt = f"columns={','.join(map(str, df.columns))}" + out.append(txt) + txt = f"shape={str(df.shape)}" + out.append(txt) + # Print information about the types. + if print_dtypes: + out.append("* type=") + + table = [] + + def _report_srs_stats(srs: pd.Series) -> List[Any]: + """ + Report dtype, the first element, and its type of series. + """ + row: List[Any] = [] + first_elem = srs.values[0] + num_unique = srs.nunique() + num_nans = srs.isna().sum() + row.extend( + [ + srs.dtype, + hprint.perc(num_unique, len(srs)), + hprint.perc(num_nans, len(srs)), + first_elem, + type(first_elem), + ] + ) + return row + + row = [] + col_name = "index" + row.append(col_name) + row.extend(_report_srs_stats(df.index)) + row = map(str, row) + table.append(row) + for col_name in df.columns: + row_: List[Any] = [] + row_.append(col_name) + row_.extend(_report_srs_stats(df[col_name])) + row_ = map(str, row_) + table.append(row_) + # + columns = [ + "col_name", + "dtype", + "num_unique", + "num_nans", + "first_elem", + "type(first_elem)", + ] + df_stats = pd.DataFrame(table, columns=columns) + stats_num_rows = None + df_stats_as_str = _df_to_str( + df_stats, + stats_num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + out.append(df_stats_as_str) + # Print info about memory usage. + if print_memory_usage: + out.append("* memory=") + mem_use_df = pd.concat( + [df.memory_usage(deep=False), df.memory_usage(deep=True)], + axis=1, + keys=["shallow", "deep"], + ) + # Add total row. + mem_use_df_total = pd.DataFrame({"total": mem_use_df.sum(axis=0)}) + mem_use_df = pd.concat([mem_use_df, mem_use_df_total.T]) + # Convert into the desired format. + if memory_usage_mode == "bytes": + pass + elif memory_usage_mode == "human_readable": + import helpers.hintrospection as hintros + + mem_use_df = mem_use_df.applymap(hintros.format_size) + else: + raise ValueError( + f"Invalid memory_usage_mode='{memory_usage_mode}'" + ) + memory_num_rows = None + memory_usage_as_txt = _df_to_str( + mem_use_df, + memory_num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + out.append(memory_usage_as_txt) + # Print info about nans. + if print_nan_info: + num_elems = df.shape[0] * df.shape[1] + num_nans = df.isna().sum().sum() + txt = f"num_nans={hprint.perc(num_nans, num_elems)}" + out.append(txt) + # + num_zeros = df.isnull().sum().sum() + txt = f"num_zeros={hprint.perc(num_zeros, num_elems)}" + out.append(txt) + # TODO(gp): np can't do isinf on objects like strings. + # num_infinite = np.isinf(df).sum().sum() + # txt = "num_infinite=" + hprint.perc(num_infinite, num_elems) + # out.append(txt) + # + num_nan_rows = df.dropna().shape[0] + txt = f"num_nan_rows={hprint.perc(num_nan_rows, num_elems)}" + out.append(txt) + # + num_nan_cols = df.dropna(axis=1).shape[1] + txt = f"num_nan_cols={hprint.perc(num_nan_cols, num_elems)}" + out.append(txt) + if hsystem.is_running_in_ipynb(): + if len(out) > 0 and log_level >= hdbg.get_logger_verbosity(): + print("\n".join(out)) + txt = None + # Print the df. + df_as_str = _df_to_str( + df, + num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + if not hsystem.is_running_in_ipynb(): + out.append(df_as_str) + txt = "\n".join(out) + return txt + + +def _assemble_df_rows(rows_values: RowsValues) -> RowsValues: + """ + Organize dataframe values into a column-row structure. + + - Indentation artifacts are removed + - The index placement is handled, i.e. + - if the index is named, the name is located and moved to the same + row as the column names + - if the index is not named, the row with the column names receives + a placeholder empty value in its place + - Empty columns are dropped + + :param rows_values: row values extracted from a string df representation + :return: row values assembled into a valid column-row structure + """ + # Clean up indentation artifacts. + if all(row[0] == "" for row in rows_values): + # Remove the first empty cell in each row. + for row in rows_values: + del row[0] + # If the index is named, its name is located in the second row, + # with an optional extra empty value cell value next to it. + if len(rows_values[1]) == 1 or ( + len(rows_values[1]) == 2 and rows_values[1][1] == "" + ): + # Move the index name to the row with all the column names. + if rows_values[0][0] == "": + rows_values[0][0] = rows_values[1][0] + else: + rows_values[0].insert(0, rows_values[1][0]) + # Drop the former index name row. + del rows_values[1] + else: + # Add an empty cell for the absent index name. + rows_values[0].insert(0, "") + # Identify and remove empty columns. + min_len_row = min(len(row) for row in rows_values) + idxs_to_delete = [] + for i in range(min_len_row): + if all(row[i] == "" for row in rows_values): + idxs_to_delete.append(i) + for idx in idxs_to_delete: + for row in rows_values: + del row[idx] + # Confirm that all the rows have the same number of values. + hdbg.dassert_eq(len({len(row) for row in rows_values}), 1) + return rows_values + + +def str_to_df( + df_as_str: str, + col_to_type: Dict[str, Optional[type]], + col_to_name_type: Dict[str, type], +) -> pd.DataFrame: + """ + Convert a string representation of a dataframe into a Pandas df. + + :param df_as_str: a df as a string + - the format of the string is the same as the output of + `hpandas.df_to_str()` on a pd.DataFrame, e.g. + ``` + col1 col2 col3 col4 + 0 0.1 a None 2020-01-01 + 1 0.2 "b c" None 2021-05-05 + ``` + - values (including column names) that contain spaces need + to be enclosed in double quotation marks, e.g. + "2023-03-15 16:35:41.205000+00:00" + :param col_to_type: a mapping between the column names and the + types of the values in these columns + - if a column is not present in the mapping, its values will + remain strings + - to indicate the type of index values, use {"__index__": ...} + mapping, e.g. {"__index__": pd.Timestamp} + :param col_to_name_type: a mapping between the column names and + the required types of these column names + - same conventions apply as for `col_to_type` (see above) + :return: a converted Pandas dataframe + """ + # Separate the rows. + rows = df_as_str.split("\n") + # Clean up extra spaces. + rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] + # Identify individual values in the rows. + rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) + # Remove the placeholder ["..."] row. + rows_values = [row for row in rows_values if row != ["..."]] + # Organize values into a proper column-row structure. + rows_values = _assemble_df_rows(rows_values) + # Get the column names. + column_names = rows_values[0][1:] + # Get the index. + index_values = [row[0] for row in rows_values[1:]] + index_name = rows_values[0][0] + # Construct the df. + df = pd.DataFrame( + [row[1:] for row in rows_values[1:]], + columns=column_names, + index=index_values, + ) + if index_name != "": + df.index.name = index_name + # Cast the columns into appropriate types. + for col, col_type in col_to_type.items(): + if col == "__index__": + df.index = cast_series_to_type(df.index, col_type) + else: + df[col] = cast_series_to_type(df[col], col_type) + # Cast the column names into appropriate types. + for col, col_name_type in col_to_name_type.items(): + if col == "__index__": + df.index = df.index.rename(col_name_type(df.index.name)) + else: + df = df.rename(columns={col: col_name_type(col)}) + return df + + +def convert_df_to_json_string( + df: pd.DataFrame, + n_head: Optional[int] = 10, + n_tail: Optional[int] = 10, + columns_order: Optional[List[str]] = None, +) -> str: + """ + Convert dataframe to pretty-printed JSON string. + + To select all rows of the dataframe, pass `n_head` as None. + + :param df: dataframe to convert + :param n_head: number of printed top rows + :param n_tail: number of printed bottom rows + :param columns_order: order for the KG columns sort + :return: dataframe converted to JSON string + """ + # Append shape of the initial dataframe. + shape = f"original shape={df.shape}" + # Reorder columns. + if columns_order is not None: + hdbg.dassert_set_eq(columns_order, df.cols) + df = df[columns_order] + # Select head. + if n_head is not None: + head_df = df.head(n_head) + else: + # If no n_head provided, append entire dataframe. + head_df = df + # Transform head to json. + head_json = head_df.to_json( + orient="index", + force_ascii=False, + indent=4, + default_handler=str, + date_format="iso", + date_unit="s", + ) + if n_tail is not None: + # Transform tail to json. + tail = df.tail(n_tail) + tail_json = tail.to_json( + orient="index", + force_ascii=False, + indent=4, + default_handler=str, + date_format="iso", + date_unit="s", + ) + else: + # If no tail specified, append an empty string. + tail_json = "" + # Join shape and dataframe to single string. + output_str = "\n".join([shape, "Head:", head_json, "Tail:", tail_json]) + return output_str + + +def convert_df( + df: pd.DataFrame, *, print_invalid_values: bool = False +) -> pd.DataFrame: + """ + Convert each DataFrame column to its predominant type. + + This function inspects every column in `df`, determines whether the + majority of its values are boolean, numeric, or string, and then + casts the column to that type using `convert_to_type`. + + :param df: The input DataFrame whose columns will be converted. + :param print_invalid_values: If True, print any original values that could + not be converted (they become NaN after conversion) + :return: a new DataFrame with each column cast to its detected predominant + type. + """ + df_out = pd.DataFrame(index=df.index) + for col in df.columns: + series = df[col] + # Determine the dominant datatype. + col_type = infer_column_types(series)["type"] + hdbg.dassert_in(col_type, ("is_bool", "is_numeric", "is_string")) + # Convert the column to dominant datatype. + converted = convert_to_type(series, col_type) + if print_invalid_values: + invalid_mask = series.notna() & converted.isna() + if invalid_mask.any(): + invalid = series[invalid_mask].tolist() + print(f"Column {col} dropped invalid values: {invalid}") + df_out[col] = converted + return df_out + + +# ############################################################################# + + +def read_csv_to_df( + stream: Union[str, S3File, S3FileSystem], + *args: Any, + **kwargs: Any, +) -> pd.DataFrame: + """ + Read a CSV file into a `pd.DataFrame`. + """ + # Gets filename from stream if it is not already a string, + # so it can be inspected for extension type. + file_name = stream if isinstance(stream, str) else vars(stream)["path"] + # Handle zipped files. + if any(file_name.endswith(ext) for ext in (".gzip", ".gz", ".tgz")): + hdbg.dassert_not_in("compression", kwargs) + kwargs["compression"] = "gzip" + elif file_name.endswith(".zip"): + hdbg.dassert_not_in("compression", kwargs) + kwargs["compression"] = "zip" + # Read. + _LOG.debug(hprint.to_str("args kwargs")) + df = pd.read_csv(stream, *args, **kwargs) + return df + + +def read_parquet_to_df( + stream: Union[str, S3File, S3FileSystem], + *args: Any, + **kwargs: Any, +) -> pd.DataFrame: + """ + Read a Parquet file into a `pd.DataFrame`. + """ + # Read. + _LOG.debug(hprint.to_str("args kwargs")) + df = pd.read_parquet(stream, *args, **kwargs) + return df + + +# ############################################################################# + + +# TODO(Paul): Add unit tests. +def compute_weighted_sum( + dfs: Dict[str, pd.DataFrame], + weights: pd.DataFrame, + *, + index_mode: str = "assert_equal", +) -> Dict[str, pd.DataFrame]: + """ + Compute weighted sums of `dfs` using `weights`. + + :param dfs: dataframes keyed by id; all dfs should have the same cols, + indices are handled based on the `index_mode` + :param weights: float weights indexed by id with unique col names + :param index_mode: same as `mode` in `apply_index_mode()` + :return: weighted sums keyed by weight col names + """ + hdbg.dassert_isinstance(dfs, dict) + hdbg.dassert(dfs, "dictionary of dfs must be nonempty") + # Get a dataframe from the dictionary and record its index and columns. + id_ = list(dfs)[0] + hdbg.dassert_isinstance(id_, str) + df = dfs[id_] + hdbg.dassert_isinstance(df, pd.DataFrame) + cols = df.columns + # Sanity-check dataframes in dictionary. + for key, value in dfs.items(): + hdbg.dassert_isinstance(key, str) + hdbg.dassert_isinstance(value, pd.DataFrame) + # The reference df is not modified. + _, value = apply_index_mode(df, value, index_mode) + hdbg.dassert( + value.columns.equals(cols), + "Column equality fails for keys=%s, %s", + id_, + key, + ) + # Sanity-check weights. + hdbg.dassert_isinstance(weights, pd.DataFrame) + hdbg.dassert_eq(weights.columns.nlevels, 1) + hdbg.dassert(not weights.columns.has_duplicates) + hdbg.dassert_set_eq(weights.index.to_list(), list(dfs)) + # Create a multiindexed dataframe to facilitate computing the weighted sums. + weighted_dfs = {} + combined_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys()) + # TODO(Paul): Consider relaxing the NaN-handling. + for col in weights.columns: + weighted_combined_df = combined_df.multiply(weights[col], level=0) + weighted_sums = weighted_combined_df.groupby(axis=1, level=1).sum( + min_count=len(dfs) + ) + weighted_dfs[col] = weighted_sums + return weighted_dfs + + +def subset_df(df: pd.DataFrame, nrows: int, seed: int = 42) -> pd.DataFrame: + """ + Remove N rows from the input data and shuffle the remaining ones. + + :param df: input data + :param nrows: the number of rows to remove from the original data + :param seed: see `random.seed()` + :return: shuffled data with removed rows + """ + hdbg.dassert_lte(1, nrows) + hdbg.dassert_lte(nrows, df.shape[0]) + idx = list(range(df.shape[0])) + random.seed(seed) + random.shuffle(idx) + idx = sorted(idx[nrows:]) + return df.iloc[idx] + + +def remap_obj( + obj: Union[pd.Series, pd.Index], + map_: Dict[Any, Any], + **kwargs: Any, +) -> pd.Series: + """ + Substitute each value of an object with another value from a dictionary. + + :param obj: an object to substitute value in + :param map_: values to substitute with + :return: remapped pandas series + """ + hdbg.dassert_lte(1, obj.shape[0]) + # TODO(Grisha): consider extending for other mapping types supported by + # `pd.Series.map`. + hdbg.dassert_isinstance(map_, dict) + # Check that every element of the object is in the mapping. + hdbg.dassert_is_subset(obj, map_.keys()) + new_srs = obj.map(map_, **kwargs) + return new_srs + + +def get_random_df( + num_cols: int, + seed: Optional[int] = None, + date_range_kwargs: Optional[Dict[str, Any]] = None, +) -> pd.DataFrame: + """ + Compute df with random data with `num_cols` columns and index obtained by + calling `pd.date_range(**kwargs)`. + + :param num_cols: the number of columns in a DataFrame to generate + :param seed: see `random.seed()` + :param date_range_kwargs: kwargs for `pd.date_range()` + """ + if seed: + np.random.seed(seed) + dt = pd.date_range(**date_range_kwargs) + df = pd.DataFrame(np.random.rand(len(dt), num_cols), index=dt) + return df + + +# ############################################################################# + +# TODO(gp): -> AxisNameSet +ColumnSet = Optional[Union[str, List[str]]] + + +# TODO(gp): -> _resolve_axis_names +def _resolve_column_names( + column_set: ColumnSet, + columns: Union[List[str], pd.Index], + *, + keep_order: bool = False, +) -> List[str]: + """ + Change format of the columns and perform some sanity checks. + + :param column_set: columns to proceed + :param columns: all columns available + :param keep_order: preserve the original order or allow sorting + """ + # Ensure that `columns` is well-formed. + if isinstance(columns, pd.Index): + columns = columns.to_list() + hdbg.dassert_isinstance(columns, list) + hdbg.dassert_lte(1, len(columns)) + # + if column_set is None: + # Columns were not specified, thus use the list of all the columns. + column_set = columns + else: + if isinstance(column_set, str): + column_set = [column_set] + hdbg.dassert_isinstance(column_set, list) + hdbg.dassert_lte(1, len(column_set)) + hdbg.dassert_is_subset(column_set, columns) + if keep_order: + # Keep the selected columns in the same order as in the original + # `columns`. + column_set = [c for c in columns if c in column_set] + return column_set + + +# TODO(Grisha): finish the function. +# TODO(Grisha): merge with the one in `dataflow.model.correlation.py`? +def remove_outliers( + df: pd.DataFrame, + lower_quantile: float, + *, + column_set: ColumnSet, + # TODO(Grisha): the params are not used. + fill_value: float = np.nan, + mode: str = "remove_outliers", + axis: Any = 0, + upper_quantile: Optional[float] = None, +) -> pd.DataFrame: + hdbg.dassert_eq(len(df.shape), 2, "Multi-index dfs not supported") + # + hdbg.dassert_lte(0.0, lower_quantile) + if upper_quantile is None: + upper_quantile = 1.0 - lower_quantile + hdbg.dassert_lte(lower_quantile, upper_quantile) + hdbg.dassert_lte(upper_quantile, 1.0) + # + df = df.copy() + if axis == 0: + all_columns = df.columns + columns = _resolve_column_names(column_set, all_columns) + hdbg.dassert_is_subset(columns, df.columns) + for column in all_columns: + if column in columns: + df[column] = df[column].quantile( + [lower_quantile, upper_quantile] + ) + elif axis == 1: + all_rows = df.rows + rows = _resolve_column_names(column_set, all_rows) + hdbg.dassert_is_subset(rows, df.rows) + for row in all_rows: + if row in rows: + df[row] = df[row].quantile([lower_quantile, upper_quantile]) + else: + raise ValueError(f"Invalid axis='{axis}'") + return df + + +# ############################################################################# + + +# TODO(Grisha): add assertions/logging. +def get_df_from_iterator( + iter_: Iterator[pd.DataFrame], + *, + sort_index: bool = True, +) -> pd.DataFrame: + """ + Concat all the dataframes in the iterator in one dataframe. + + :param iter_: dataframe iterator + :param sort_index: whether to sort output index or not + :return: combined iterator data + """ + # TODO(gp): @all make a copy of `iter_` so we don't consume it. + dfs = list(iter_) + df_res = pd.concat(dfs) + if sort_index: + df_res = df_res.sort_index() + return df_res + + +def heatmap_df(df: pd.DataFrame, *, axis: Any = None) -> pd.DataFrame: + """ + Colorize a df with a heatmap depending on the numeric values. + + :param axis: along which axis to compute the heatmap + - 0 colorize along rows + - 1 colorize along columns + - None: colorize everything + """ + # Keep it here to avoid long start up times. + import seaborn as sns + + cm = sns.diverging_palette(5, 250, as_cmap=True) + df = df.style.background_gradient(axis=axis, cmap=cm) + return df + + +def compare_nans_in_dataframes( + df1: pd.DataFrame, df2: pd.DataFrame +) -> pd.DataFrame: + """ + Compare equality of DataFrames in terms of NaNs. + + For example: + - `5 vs np.nan` is a mismatch + - `np.nan vs 5` is a mismatch + - `np.nan vs np.nan` is a match + - `np.nan vs np.inf` is a mismatch + + :param df1: dataframe to compare + :param df2: dataframe to compare with + :return: dataframe that shows the differences stacked side by side, see + `pandas.DataFrame.compare()` for an example + """ + dassert_axes_equal(df1, df2) + # Keep rows where df1's value is NaN and df2's value is not NaN and vice versa. + mask1 = df1.isna() & ~df2.isna() + mask2 = ~df1.isna() & df2.isna() + mask3 = mask1 | mask2 + # Compute a dataframe with the differences. + nan_diff_df = df1[mask3].compare(df2[mask3], result_names=("df1", "df2")) + return nan_diff_df + + +# TODO(Grisha): -> `compare_dataframes()`? +def compare_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + row_mode: str = "equal", + column_mode: str = "equal", + # TODO(Grisha): should be True by default? + compare_nans: bool = False, + diff_mode: str = "diff", + assert_diff_threshold: float = 1e-3, + close_to_zero_threshold: float = 1e-6, + zero_vs_zero_is_zero: bool = True, + remove_inf: bool = True, + log_level: int = logging.DEBUG, + only_warning: bool = True, +) -> pd.DataFrame: + """ + Compare two dataframes. + + This works for dataframes with and without multi-index. + + :param row_mode: control how the rows are handled + - "equal": rows need to be the same for the two dataframes + - "inner": compute the common rows for the two dataframes + :param column_mode: same as `row_mode` + :param compare_nans: include NaN comparison if True otherwise just + compare non-NaN values + :param diff_mode: control how the dataframes are compared in terms of + corresponding elements + - "diff": use the difference + - "pct_change": use the percentage difference + :param assert_diff_threshold: maximum allowed total difference + - do not assert if `None` + - works when `diff_mode` is "pct_change" + :param close_to_zero_threshold: round numbers below the threshold to 0 + :param zero_vs_zero_is_zero: replace the diff with 0 when comparing 0 to 0 + if True, otherwise keep the actual result + :param remove_inf: replace +-inf with `np.nan` + :param log_level: logging level + :param only_warning: when `True` the function issues a warning instead of aborting + :return: a singe dataframe with differences as values + """ + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + # Check value of `assert_diff_threshold`, if it was passed. + if assert_diff_threshold: + hdbg.dassert_lte(assert_diff_threshold, 1.0) + hdbg.dassert_lte(0.0, assert_diff_threshold) + # TODO(gp): Factor out this logic and use it for both compare_visually_dfs + # and + if row_mode == "equal": + dassert_indices_equal(df1, df2) + elif row_mode == "inner": + # TODO(gp): Add sorting on demand, otherwise keep the columns in order. + same_rows = list((set(df1.index)).intersection(set(df2.index))) + df1 = df1[df1.index.isin(same_rows)] + df2 = df2[df2.index.isin(same_rows)] + else: + raise ValueError(f"Invalid row_mode='{row_mode}'") + # + if column_mode == "equal": + hdbg.dassert_eq(sorted(df1.columns), sorted(df2.columns)) + elif column_mode == "inner": + # TODO(gp): Add sorting on demand, otherwise keep the columns in order. + col_names = sorted(list(set(df1.columns).intersection(set(df2.columns)))) + df1 = df1[col_names] + df2 = df2[col_names] + else: + raise ValueError(f"Invalid column_mode='{column_mode}'") + # Round small numbers to 0 to exclude them from the diff computation. + close_to_zero_threshold_mask = lambda x: abs(x) < close_to_zero_threshold + df1[close_to_zero_threshold_mask] = df1[close_to_zero_threshold_mask].round( + 0 + ) + df2[close_to_zero_threshold_mask] = df2[close_to_zero_threshold_mask].round( + 0 + ) + # Compute the difference df. + if diff_mode == "diff": + # Test and convert the assertion into a boolean. + is_ok = True + try: + pd.testing.assert_frame_equal( + df1, df2, check_like=True, check_dtype=False + ) + except AssertionError as e: + is_ok = False + _ = e + # Check `is_ok` and raise an assertion depending on `only_warning`. + if not is_ok: + hdbg._dfatal( + _, + "df1=\n%s\n and df2=\n%s\n are not equal.", + df_to_str(df1, log_level=log_level), + df_to_str(df2, log_level=log_level), + only_warning=only_warning, + ) + # Calculate the difference. + df_diff = df1 - df2 + if remove_inf: + df_diff = df_diff.replace([np.inf, -np.inf], np.nan) + elif diff_mode == "pct_change": + # Compare NaN values in dataframes. + nan_diff_df = compare_nans_in_dataframes(df1, df2) + _LOG.debug("Dataframe with NaN differences=\n%s", df_to_str(nan_diff_df)) + msg = "There are NaN values in one of the dataframes that are not in the other one." + hdbg.dassert_eq( + 0, nan_diff_df.shape[0], msg=msg, only_warning=only_warning + ) + # Compute pct_change. + df_diff = 100 * (df1 - df2) / df2.abs() + if zero_vs_zero_is_zero: + # When comparing 0 to 0 set the diff (which is NaN by default) to 0. + df1_mask = df1 == 0 + df2_mask = df2 == 0 + zero_vs_zero_mask = df1_mask & df2_mask + df_diff[zero_vs_zero_mask] = 0 + if remove_inf: + df_diff = df_diff.replace([np.inf, -np.inf], np.nan) + # Check if `df_diff` values are less than `assert_diff_threshold`. + if assert_diff_threshold is not None: + nan_mask = df_diff.isna() + within_threshold = ( + df_diff.abs() <= assert_diff_threshold + ) | nan_mask + expected = pd.DataFrame( + True, + index=within_threshold.index, + columns=within_threshold.columns, + ) + # Test and convert the assertion into boolean. + is_ok = True + try: + pd.testing.assert_frame_equal( + within_threshold, expected, check_exact=True + ) + except AssertionError as e: + is_ok = False + _ = e + # Check `is_ok` and raise assertion depending on `only_warning`. + if not is_ok: + hdbg._dfatal( + _, + "df1=\n%s\n and df2=\n%s\n have pct_change more than `assert_diff_threshold`.", + df_to_str(df1, log_level=log_level), + df_to_str(df2, log_level=log_level), + only_warning=only_warning, + ) + # Report max diff. + max_diff = df_diff.abs().max().max() + _LOG.log( + log_level, + "Maximum percentage difference between the two dataframes = %s", + max_diff, + ) + else: + raise ValueError(f"diff_mode={diff_mode}") + df_diff = df_diff.add_suffix(f".{diff_mode}") + return df_diff + + +# ############################################################################# +# Multi-index dfs +# ############################################################################# + + +# TODO(Grisha): should be a more elegant way to add a column. +def add_multiindex_col( + df: pd.DataFrame, multiindex_col: pd.DataFrame, col_name: str +) -> pd.DataFrame: + """ + Add column to a multiindex DataFrame. + + Note: each column in a multiindex DataFrame is a DataFrame itself. + + :param df: multiindex df + :param multiindex_col: column (i.e. singleindex df) of a multiindex df + :param col_name: name of a new column + :return: a multiindex DataFrame with a new column + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + hdbg.dassert_isinstance(multiindex_col, pd.DataFrame) + hdbg.dassert_isinstance(col_name, str) + hdbg.dassert_not_in(col_name, df.columns) + for col in multiindex_col.columns: + df[col_name, col] = multiindex_col[col] + return df + + +def list_to_str( + vals: List[Any], + *, + sep_char: str = ", ", + enclose_str_char: str = "'", + max_num: Optional[int] = 10, +) -> str: + """ + Convert a list of values into a formatted string representation. + + E.g., [1, "two", 3, 4, 5] -> "5 ['1', 'two', '3', '4', '5']" + + :param vals: values to be converted + :param sep_char: separator to use between elements + :param enclose_str_char: character to enclose each element's string + representation; if empty, elements are not enclosed + :param max_num: maximum number of elements to display in the output + :return: the formatted string representing the list + """ + vals_as_str = list(map(str, vals)) + # Add a str around. + if enclose_str_char: + vals_as_str = [ + enclose_str_char + v + enclose_str_char for v in vals_as_str + ] + # + ret = f"{len(vals)} [" + if max_num is not None and len(vals) > max_num: + hdbg.dassert_lt(1, max_num) + ret += sep_char.join(vals_as_str[: int(max_num / 2)]) + ret += sep_char + "..." + sep_char + ret += sep_char.join(vals_as_str[-int(max_num / 2) :]) + else: + ret += sep_char.join(vals_as_str) + ret += "]" + return ret + + +def multiindex_df_info( + df: pd.DataFrame, + *, + log_level: int = logging.INFO, + **list_to_str_kwargs: Dict[str, Any], +) -> str: + """ + Report information about a multi-index df. + """ + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + columns_level0 = df.columns.levels[0] + columns_level1 = df.columns.levels[1] + rows = df.index + ret = [] + ret.append( + f"shape={len(columns_level0)} x {len(columns_level1)} x {len(rows)}" + ) + ret.append( + "columns_level0=" + list_to_str(columns_level0, **list_to_str_kwargs) + ) + ret.append( + "columns_level1=" + list_to_str(columns_level1, **list_to_str_kwargs) + ) + ret.append("rows=" + list_to_str(rows, **list_to_str_kwargs)) + if isinstance(df.index, pd.DatetimeIndex): + # Display timestamp info. + start_timestamp = df.index.min() + end_timestamp = df.index.max() + frequency = df.index.freq + if frequency is None: + # Try to infer frequency. + frequency = pd.infer_freq(df.index) + ret.append(f"start_timestamp={start_timestamp}") + ret.append(f"end_timestamp={end_timestamp}") + ret.append(f"frequency={frequency}") + ret = "\n".join(ret) + _LOG.log(log_level, ret) + return ret + + +def subset_multiindex_df( + df: pd.DataFrame, + *, + # TODO(gp): Consider passing trim_df_kwargs as kwargs. + start_timestamp: Optional[pd.Timestamp] = None, + end_timestamp: Optional[pd.Timestamp] = None, + columns_level0: ColumnSet = None, + columns_level1: ColumnSet = None, + keep_order: bool = False, +) -> pd.DataFrame: + """ + Filter multi-index DataFrame by timestamp index and column levels. + + :param start_timestamp: see `trim_df()` + :param end_timestamp: see `trim_df()` + :param columns_level0: column names that corresponds to `df.columns.levels[0]` + - `None` means no filtering + :param columns_level1: column names that corresponds to `df.columns.levels[1]` + - `None` means no filtering + :param keep_order: see `_resolve_column_names()` + :return: filtered DataFrame + """ + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + # Filter by timestamp. + allow_empty = False + strictly_increasing = False + dassert_time_indexed_df(df, allow_empty, strictly_increasing) + df = trim_df( + df, + ts_col_name=None, + start_ts=start_timestamp, + end_ts=end_timestamp, + left_close=True, + right_close=True, + ) + # Filter level 0. + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + all_columns_level0 = df.columns.levels[0] + columns_level0 = _resolve_column_names( + columns_level0, all_columns_level0, keep_order=keep_order + ) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_is_subset(columns_level0, df.columns.levels[0]) + df = df[columns_level0] + # Filter level 1. + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + all_columns_level1 = df.columns.levels[1] + columns_level1 = _resolve_column_names( + columns_level1, all_columns_level1, keep_order=keep_order + ) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_is_subset(columns_level1, df.columns.levels[1]) + df = df.swaplevel(axis=1)[columns_level1].swaplevel(axis=1) + return df + + +# ############################################################################# + + +def compare_multiindex_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + subset_multiindex_df_kwargs: Optional[Dict[str, Any]] = None, + compare_dfs_kwargs: Optional[Dict[str, Any]] = None, +) -> pd.DataFrame: + """ + - Subset both multi-index dfs, if needed + - Compare dfs + + :param subset_multiindex_df: params for `subset_multiindex_df()` + :param compare_dfs_kwargs: params for `compare_dfs()` + :return: df with differences as values + """ + # Subset dfs. + if subset_multiindex_df_kwargs is None: + subset_multiindex_df_kwargs = {} + subset_df1 = subset_multiindex_df(df1, **subset_multiindex_df_kwargs) + subset_df2 = subset_multiindex_df(df2, **subset_multiindex_df_kwargs) + # Compare dfs. + if compare_dfs_kwargs is None: + compare_dfs_kwargs = {} + diff_df = compare_dfs(subset_df1, subset_df2, **compare_dfs_kwargs) + return diff_df + + +# ############################################################################# + + +def compute_duration_df( + tag_to_df: Dict[str, pd.DataFrame], + *, + intersect_dfs: bool = False, + valid_intersect: bool = False, +) -> Tuple[pd.DataFrame, Dict[str, pd.DataFrame]]: + """ + Compute a df with some statistics about the time index. + + E.g., + ``` + min_index max_index min_valid_index max_valid_index + tag1 + tag2 + ``` + + :param intersect_dfs: return a transformed dict with the intersection of + indices of all the dfs if True, otherwise return the input data as is + :param valid_intersect: intersect indices without NaNs if True, otherwise + intersect indices as is + :return: timestamp stats and updated dict of dfs, see `intersect_dfs` param + """ + hdbg.dassert_isinstance(tag_to_df, Dict) + # Create df and assign columns. + data_stats = pd.DataFrame() + min_col = "min_index" + max_col = "max_index" + min_valid_index_col = "min_valid_index" + max_valid_index_col = "max_valid_index" + # Collect timestamp info from all dfs. + for tag in tag_to_df.keys(): + # Check that the passed timestamp has timezone info. + hdateti.dassert_has_tz(tag_to_df[tag].index[0]) + dassert_index_is_datetime(tag_to_df[tag]) + # Compute timestamp stats. + data_stats.loc[tag, min_col] = tag_to_df[tag].index.min() + data_stats.loc[tag, max_col] = tag_to_df[tag].index.max() + data_stats.loc[tag, min_valid_index_col] = ( + tag_to_df[tag].dropna().index.min() + ) + data_stats.loc[tag, max_valid_index_col] = ( + tag_to_df[tag].dropna().index.max() + ) + # Make a copy so we do not modify the original data. + tag_to_df_updated = tag_to_df.copy() + # Change the initial dfs with intersection. + if intersect_dfs: + if valid_intersect: + # Assign start, end date column according to specs. + min_col = min_valid_index_col + max_col = max_valid_index_col + # The start of the intersection will be the max value amongt all start dates. + intersection_start_date = data_stats[min_col].max() + # The end of the intersection will be the min value amongt all end dates. + intersection_end_date = data_stats[max_col].min() + for tag in tag_to_df_updated.keys(): + df = trim_df( + tag_to_df_updated[tag], + ts_col_name=None, + start_ts=intersection_start_date, + end_ts=intersection_end_date, + left_close=True, + right_close=True, + ) + tag_to_df_updated[tag] = df + return data_stats, tag_to_df_updated + + +# ############################################################################# + + +# TODO(gp): Remove this since it's in Google API. +def to_gsheet( + df: pd.DataFrame, + gsheet_name: str, + gsheet_sheet_name: str, + overwrite: bool, +) -> None: + """ + Save a dataframe to a Google sheet. + + :param df: the dataframe to save to a Google sheet + :param gsheet_name: the name of the Google sheet to save the df + into; the Google sheet with this name must already exist on the + Google Drive + :param gsheet_sheet_name: the name of the sheet in the Google sheet + :param overwrite: if True, the contents of the sheet are erased + before saving the dataframe into it; if False, the dataframe is + appended to the contents of the sheet + """ + import gspread_pandas + + spread = gspread_pandas.Spread( + gsheet_name, sheet=gsheet_sheet_name, create_sheet=True + ) + if overwrite: + spread.clear_sheet() + else: + sheet_contents = spread.sheet_to_df(index=None) + combined_df = pd.concat([sheet_contents, df]) + df = combined_df.drop_duplicates() + spread.df_to_sheet(df, index=False) + + +# ############################################################################# +# _SummaryRow +# ############################################################################# + + +@dataclasses.dataclass +class _SummaryRow: + """ + Output of a check corresponding to a row of the summary df. + """ + + # Description of the check. + description: str + # Description of the output. + comment: str + # Whether the check was successful or not. + is_ok: bool + + +# ############################################################################# +# CheckSummary +# ############################################################################# + + +class CheckSummary: + """ + Collect and report the results of several checks performed in a notebook. + """ + + def __init__(self, *, title: Optional[str] = ""): + self.title = title + # + self._array: List[_SummaryRow] = [] + + def add(self, description: str, comment: str, is_ok: bool) -> None: + """ + Add the result of a single check. + """ + summary_row = _SummaryRow(description, comment, is_ok) + self._array.append(summary_row) + + def is_ok(self) -> bool: + """ + Compute whether all the checks were succesfull or not. + """ + is_ok = all(sr.is_ok for sr in self._array) + return is_ok + + def report_outcome( + self, *, notebook_output: bool = True, assert_on_error: bool = True + ) -> Optional[str]: + """ + Report the result of the entire check. + + :param notebook_output: report the result of the checks for a + notebook or as a string + :param assert_on_error: assert if one check failed + """ + df = pd.DataFrame(self._array) + + # Compute result as a string. + result = [] + if self.title: + result.append("# " + self.title) + result.append(str(df)) + is_ok = self.is_ok() + result.append(f"is_ok={is_ok}") + result = "\n".join(result) + # Display on a notebook, if needed. + if notebook_output: + if self.title: + print(self.title) + + # Convert DataFrame to HTML with colored rows based on 'is_ok' column. + def _color_rows(row: bool) -> str: + """ + Apply red/green color based on boolean value in `row["is_ok"]`. + """ + is_ok = row["is_ok"] + color = "#FA6B84" if not is_ok else "#ACF3AE" + return [f"background-color: {color}"] * len(row) + + df_html = df.style.apply(_color_rows, axis=1) + from IPython.display import display + + display(df_html) + print(f"is_ok={is_ok}") + # Assert if at least one of the check failed. + if not is_ok and assert_on_error: + raise ValueError("The checks have failed:\n" + result) + # For notebooks, we want to return None, since the outcome was + # already displayed. + if notebook_output: + result = None + return result + + +# ############################################################################# + + +def add_end_download_timestamp( + obj: Union[pd.DataFrame, Dict], *, timezone: str = "UTC" +) -> Union[pd.DataFrame, Dict]: + """ + Add a column 'end_download_timestamp' to the DataFrame with the current + time. + + :param obj: The DataFrame to which the column will be added. + :param timezone: The timezone for the current time. Defaults to + 'UTC'. + """ + # Get current timestamp. + current_ts = hdateti.get_current_time(timezone) + # Set value of end_download_timestamp. + obj["end_download_timestamp"] = current_ts + return obj + + +def filter_df( + df: pd.DataFrame, + col_name: str, + value: Any, + *, + invert: bool = False, + check_value: bool = True, + print_info: bool = True, +) -> pd.DataFrame: + hdbg.dassert_in(col_name, df.columns) + if isinstance(value, list): + mask = df[col_name].isin(value) + else: + if check_value: + hdbg.dassert_in(value, df[col_name].unique()) + mask = df[col_name] == value + if invert: + mask = ~mask + if print_info: + _LOG.info("selected=%s", hprint.perc(mask.sum(), df.shape[0])) + return df[mask] + + +def to_perc(vals: Union[List, pd.Series], **perc_kwargs: Dict[str, Any]) -> str: + """ + Report percentage of True for a list / series. + """ + if isinstance(vals, list): + vals = pd.Series(vals) + ret = hprint.perc(vals.sum(), len(vals), **perc_kwargs) + return ret diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py new file mode 100644 index 000000000..54ca04c93 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py @@ -0,0 +1,628 @@ +""" +Statistical analysis and ML functions for pandas DataFrames. + +Import as: + +import helpers.hpandas_analysis as hpananal +""" + +import datetime +import logging +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hprint as hprint + +# Lazy imports to avoid slow module loading. +# When a type checker analyzes the code: it pretends the imports exist, so you +# can use those names in type annotations without “unknown name” errors. +# These heavy dependencies are only imported when functions are actually called. +if TYPE_CHECKING: + import matplotlib as mpl + +_LOG = logging.getLogger(__name__) + + +def _get_num_pcs_to_plot(num_pcs_to_plot: int, max_pcs: int) -> int: + """ + Get the number of principal components to plot. + + :param num_pcs_to_plot: requested number of PCs to plot, use -1 for + all + :param max_pcs: maximum number of available principal components + :return: validated number of PCs to plot + """ + if num_pcs_to_plot == -1: + num_pcs_to_plot = max_pcs + hdbg.dassert_lte(0, num_pcs_to_plot) + hdbg.dassert_lte(num_pcs_to_plot, max_pcs) + return num_pcs_to_plot + + +def rolling_corr_over_time( + df: pd.DataFrame, com: float, nan_mode: str +) -> pd.DataFrame: + """ + Compute rolling correlation over time. + + :return: corr_df is a multi-index df storing correlation matrices + with labels + """ + import helpers.hpandas_dassert as hpandass + + hpandass.dassert_strictly_increasing_index(df) + # Handle NaNs based on mode. + if nan_mode == "drop": + df = df.dropna(how="any") + elif nan_mode == "fill_with_zero": + df = df.fillna(0.0) + elif nan_mode == "abort": + num_nans = np.isnan(df).sum().sum() + if num_nans > 0: + raise ValueError("df has %d nans\n%s" % (num_nans, df)) + else: + raise ValueError("Invalid nan_mode='%s'" % nan_mode) + corr_df = df.ewm(com=com, min_periods=3 * com).corr() + return corr_df + + +def _get_eigvals_eigvecs( + df: pd.DataFrame, dt: datetime.date, sort_eigvals: bool +) -> Tuple[np.array, np.array]: + """ + Compute eigenvalues and eigenvectors for a correlation matrix at a specific + date. + + :param df: correlation matrix dataframe with multiindex (date, + columns) + :param dt: date for which to compute eigenvalues/eigenvectors + :param sort_eigvals: whether to sort eigenvalues in descending order + :return: tuple of (eigenvalues array, eigenvectors array) + """ + hdbg.dassert_isinstance(dt, datetime.date) + df_tmp = df.loc[dt].copy() + # Compute rolling eigenvalues and eigenvectors. + # TODO(gp): Count and report inf and nans as warning. + df_tmp.replace([np.inf, -np.inf], np.nan, inplace=True) + df_tmp.fillna(0.0, inplace=True) + eigval, eigvec = np.linalg.eigh(df_tmp) + # Sort eigenvalues, if needed. + if not (sorted(eigval) == eigval).all(): + _LOG.debug("eigvals not sorted: %s", eigval) + if sort_eigvals: + _LOG.debug( + "Before sorting:\neigval=\n%s\neigvec=\n%s", eigval, eigvec + ) + _LOG.debug("eigvals: %s", eigval) + idx = eigval.argsort()[::-1] + eigval = eigval[idx] + eigvec = eigvec[:, idx] + _LOG.debug( + "After sorting:\neigval=\n%s\neigvec=\n%s", eigval, eigvec + ) + # + if (eigval == 0).all(): + eigvec = np.nan * eigvec + return eigval, eigvec + + +def rolling_pca_over_time( + df: pd.DataFrame, com: float, nan_mode: str, sort_eigvals: bool = True +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + """ + Compute rolling PCAs over time. + + :param sort_eigvals: sort the eigenvalues in descending orders + :return: + - eigval_df stores eigenvalues for the different components indexed by + timestamps + - eigvec_df stores eigenvectors as multiindex df + """ + import tqdm.autonotebook as tauton + + import helpers.hpandas_dassert as hpandass + + # Compute rolling correlation. + corr_df = rolling_corr_over_time(df, com, nan_mode) + # Compute eigvalues and eigenvectors. + timestamps = corr_df.index.get_level_values(0).unique() + eigval = np.zeros((timestamps.shape[0], df.shape[1])) + eigvec = np.zeros((timestamps.shape[0], df.shape[1], df.shape[1])) + for i, dt in tauton.tqdm( + enumerate(timestamps), + total=timestamps.shape[0], + desc="Computing rolling PCA", + ): + eigval[i], eigvec[i] = _get_eigvals_eigvecs(corr_df, dt, sort_eigvals) + # Package results. + eigval_df = pd.DataFrame(eigval, index=timestamps) + hdbg.dassert_eq(eigval_df.shape[0], len(timestamps)) + hpandass.dassert_strictly_increasing_index(eigval_df) + # Normalize by sum. + # TODO(gp): Move this up. + eigval_df = eigval_df.multiply(1 / eigval_df.sum(axis=1), axis="index") + # + # pylint ref: github.com/PyCQA/pylint/issues/3139 + eigvec = eigvec.reshape((-1, eigvec.shape[-1])) # pylint: disable=unsubscriptable-object + idx = pd.MultiIndex.from_product( + [timestamps, df.columns], names=["datetime", None] + ) + eigvec_df = pd.DataFrame(eigvec, index=idx, columns=range(df.shape[1])) # pylint: disable=unsubscriptable-object + hdbg.dassert_eq( + len(eigvec_df.index.get_level_values(0).unique()), len(timestamps) + ) + return corr_df, eigval_df, eigvec_df + + +def plot_pca_over_time( + eigval_df: pd.DataFrame, + eigvec_df: pd.DataFrame, + num_pcs_to_plot: int = 0, + num_cols: int = 2, +) -> None: + """ + Similar to plot_pca_analysis() but over time. + """ + import helpers.hmatplotlib as hmatplo + + # Plot eigenvalues. + eigval_df.plot(title="Eigenvalues over time", ylim=(0, 1)) + # Plot cumulative variance. + eigval_df.cumsum(axis=1).plot( + title="Fraction of variance explained by top PCs over time", ylim=(0, 1) + ) + # Plot eigenvalues. + max_pcs = eigvec_df.shape[1] + num_pcs_to_plot = _get_num_pcs_to_plot(num_pcs_to_plot, max_pcs) + _LOG.info("num_pcs_to_plot=%s", num_pcs_to_plot) + if num_pcs_to_plot > 0: + _, axes = hmatplo.get_multiple_plots( + num_pcs_to_plot, + num_cols=num_cols, + y_scale=4, + sharex=True, + sharey=True, + ) + for i in range(num_pcs_to_plot): + eigvec_df[i].unstack(1).plot( + ax=axes[i], ylim=(-1, 1), title="PC%s" % i + ) + + +def plot_time_distributions( + dts: List[Union[datetime.datetime, pd.Timestamp]], + mode: str, + density: bool = True, +) -> "mpl.axes.Axes": + """ + Compute distribution for an array of timestamps `dts`. + + - mode: see below + """ + hdbg.dassert_type_in(dts[0], (datetime.datetime, pd.Timestamp)) + hdbg.dassert_in( + mode, + ( + "time_of_the_day", + "weekday", + "minute_of_the_hour", + "day_of_the_month", + "month_of_the_year", + "year", + ), + ) + if mode == "time_of_the_day": + # Convert in minutes from the beginning of the day. + data = [dt.time() for dt in dts] + data = [t.hour * 60 + t.minute for t in data] + # 1 hour bucket. + step = 60 + bins = np.arange(0, 24 * 60 + step, step) + vals = pd.cut( + data, + bins=bins, + include_lowest=True, + right=False, + retbins=False, + labels=False, + ) + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = [ + "%02d:%02d" % (bins[k] / 60, bins[k] % 60) for k in count.index + ] + elif mode == "weekday": + data = [dt.date().weekday() for dt in dts] + bins = np.arange(0, 7 + 1) + vals = pd.cut( + data, + bins=bins, + include_lowest=True, + right=False, + retbins=False, + labels=False, + ) + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = "Mon Tue Wed Thu Fri Sat Sun".split() + elif mode == "minute_of_the_hour": + vals = [dt.time().minute for dt in dts] + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = list(map(str, list(range(1, 60 + 1)))) + elif mode == "day_of_the_month": + vals = [dt.date().day for dt in dts] + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = list(map(str, list(range(1, 31 + 1)))) + elif mode == "month_of_the_year": + vals = [dt.date().month for dt in dts] + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec".split() + elif mode == "year": + vals = [dt.date().year for dt in dts] + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = pd.Series(vals).unique().tolist() + else: + raise ValueError("Invalid mode='%s'" % mode) + hdbg.dassert_eq(count.sum(), len(dts)) + # + if density: + count /= count.sum() + label = "num points=%s" % len(dts) + ax = count.plot(kind="bar", label=label, figsize=(20, 7)) + ax.set_xticklabels(yticks) + if density: + ax.set_ylabel("Probability") + else: + ax.set_ylabel("Count") + ax.legend(loc="best") + return ax + + +# TODO(gp): It can't accept ax. Remove this limitation. +def jointplot( + df: pd.DataFrame, + predicted_var: str, + predictor_var: str, + height: Optional[int] = None, + *args: Any, + **kwargs: Any, +) -> None: + """ + Perform a scatterplot of two columns of a dataframe using + seaborn.jointplot(). + + :param df: dataframe + :param predicted_var: y-var + :param predictor_var: x-var :param args, kwargs: arguments passed to + seaborn.jointplot() + """ + import seaborn as sns + + hdbg.dassert_in(predicted_var, df.columns) + hdbg.dassert_in(predictor_var, df.columns) + df = df[[predicted_var, predictor_var]] + # Remove non-finite values. + # TODO(gp): Use explore.dropna(). + mask = np.all(np.isfinite(df.values), axis=1) + df = df[mask] + # Plot. + sns.jointplot( + x=predictor_var, y=predicted_var, data=df, height=height, *args, **kwargs + ) + + +def _preprocess_regression( + df: pd.DataFrame, + intercept: bool, + predicted_var: str, + predicted_var_delay: int, + predictor_vars: Union[str, List[str]], + predictor_vars_delay: int, +) -> Optional[Tuple[pd.DataFrame, List[str], List[str]]]: + """ + Preprocess data in dataframe form in order to perform a regression. + """ + # Sanity check vars. + hdbg.dassert_type_is(df, pd.DataFrame) + hdbg.dassert_lte(1, df.shape[0]) + if isinstance(predictor_vars, str): + predictor_vars = [predictor_vars] + hdbg.dassert_type_is(predictor_vars, list) + # hdbg.dassert_type_is(predicted_var, str) + hdbg.dassert_not_in(predicted_var, predictor_vars) + if not predictor_vars: + # No predictors. + _LOG.warning("No predictor vars: skipping") + return None + # + col_names = [predicted_var] + predictor_vars + hdbg.dassert_is_subset(col_names, df.columns) + df = df[col_names].copy() + num_rows = df.shape[0] + # Shift. + if predicted_var_delay != 0: + df[predicted_var] = df[predicted_var].shift(predicted_var_delay) + _LOG.warning("Shifting predicted_var=%s", predicted_var_delay) + if predictor_vars_delay != 0: + df[predictor_vars] = df[predictor_vars].shift(predictor_vars_delay) + _LOG.warning("Shifting predictor_vars=%s", predictor_vars_delay) + # Remove non-finite values. + # TODO(gp): Use the function. + df.dropna(how="all", inplace=True) + num_rows_after_drop_nan_all = df.shape[0] + if num_rows_after_drop_nan_all != num_rows: + _LOG.info( + "Removed %s rows with all nans", + hprint.perc(num_rows - num_rows_after_drop_nan_all, num_rows), + ) + # + df.dropna(how="any", inplace=True) + num_rows_after_drop_nan_any = df.shape[0] + if num_rows_after_drop_nan_any != num_rows_after_drop_nan_all: + _LOG.warning( + "Removed %s rows with any nans", + hprint.perc(num_rows - num_rows_after_drop_nan_any, num_rows), + ) + # Prepare data. + if intercept: + if "const" not in df.columns: + df.insert(0, "const", 1.0) + predictor_vars = ["const"] + predictor_vars[:] + param_names = predictor_vars[:] + hdbg.dassert(np.all(np.isfinite(df[predicted_var].values))) + hdbg.dassert( + np.all(np.isfinite(df[predictor_vars].values)), + msg="predictor_vars=%s" % predictor_vars, + ) + # Perform regression. + if df.shape[0] < 1: + return None + return df, param_names, predictor_vars + + +def ols_regress( + df: pd.DataFrame, + predicted_var: str, + predictor_vars: str, + intercept: bool, + print_model_stats: bool = True, + tsplot: bool = False, + tsplot_figsize: Optional[Any] = None, + jointplot_: bool = True, + jointplot_height: Optional[Any] = None, + predicted_var_delay: int = 0, + predictor_vars_delay: int = 0, + max_nrows: float = 1e4, +) -> Optional[Dict[str, Any]]: + """ + Perform OLS on columns of a dataframe. + + :param df: dataframe + :param predicted_var: y variable + :param predictor_vars: x variables + :param intercept: + :param print_model_stats: print or return the model stats + :param tsplot: plot a time-series if possible + :param tsplot_figsize: + :param jointplot_: plot a scatter plot + :param jointplot_height: + :param predicted_var_delay: + :param predictor_vars_delay: + :param max_nrows: do not plot if there are too many rows, since + notebook can be slow or hang + :return: + """ + import statsmodels.api + + import helpers.hmatplotlib as hmatplo + + obj = _preprocess_regression( + df, + intercept, + predicted_var, + predicted_var_delay, + predictor_vars, + predictor_vars_delay, + ) + if obj is None: + return None + df, param_names, predictor_vars = obj + hdbg.dassert_lte(1, df.shape[0]) + model = statsmodels.api.OLS( + df[predicted_var], df[predictor_vars], hasconst=intercept + ).fit() + regr_res = { + "param_names": param_names, + "coeffs": model.params, + "pvals": model.pvalues, + # pylint: disable=no-member + "rsquared": model.rsquared, + "adj_rsquared": model.rsquared_adj, + "model": model, + } + if print_model_stats: + # pylint: disable=no-member + _LOG.info(model.summary().as_text()) + if tsplot or jointplot_: + if max_nrows is not None and df.shape[0] > max_nrows: + _LOG.warning( + "Skipping plots since df has %d > %d rows", + df.shape[0], + max_nrows, + ) + else: + predictor_vars = [p for p in predictor_vars if p != "const"] + if len(predictor_vars) == 1: + if tsplot: + # Plot the data over time. + if tsplot_figsize is None: + tsplot_figsize = hmatplo.FIG_SIZE + df[[predicted_var, predictor_vars[0]]].plot( + figsize=tsplot_figsize + ) + if jointplot_: + # Perform scatter plot. + if jointplot_height is None: + jointplot_height = hmatplo.FIG_SIZE[1] + jointplot( + df, + predicted_var, + predictor_vars[0], + height=jointplot_height, + ) + else: + _LOG.warning( + "Skipping plots since there are too many predictors" + ) + if print_model_stats: + return None + return regr_res + + +def ols_regress_series( + srs1: pd.Series, + srs2: pd.Series, + intercept: bool, + srs1_name: Optional[Any] = None, + srs2_name: Optional[Any] = None, + convert_to_dates: bool = False, + **kwargs: Any, +) -> Dict[str, Any]: + """ + Regress two series against each other. + + Wrapper around regress() to regress series against each other. + """ + # Validate inputs are Series. + hdbg.dassert_isinstance(srs1, pd.Series) + hdbg.dassert_isinstance(srs2, pd.Series) + srs1 = srs1.copy() + srs2 = srs2.copy() + # + if convert_to_dates: + _LOG.warning("Sampling to date") + srs1.index = [pd.to_datetime(dt).date() for dt in srs1.index] + srs2.index = [pd.to_datetime(dt).date() for dt in srs2.index] + # + hdbg.dassert_array_has_same_type_element(srs1, srs2, only_first_elem=True) + # Check common indices. + common_idx = srs1.index.intersection(srs2.index) + hdbg.dassert_lte(1, len(common_idx)) + # Merge series into a dataframe. + if srs1_name is None: + srs1_name = srs1.name if srs1.name is not None else "" + if srs2_name is None: + srs2_name = srs2.name if srs2.name is not None else "" + if srs1_name == srs2_name: + srs1_name += "_1" + srs2_name += "_2" + _LOG.warning("Series have the same name: adding suffix to distinguish") + df = pd.concat([srs1, srs2], axis=1, join="outer") + df.columns = [srs1_name, srs2_name] + # + val = ols_regress(df, srs1_name, srs2_name, intercept=intercept, **kwargs) + val = cast(Dict[str, Any], val) + return val + + +def robust_regression( + df: pd.DataFrame, + predicted_var: str, + predictor_vars: str, + intercept: bool, + jointplot_: bool = True, + jointplot_figsize: Optional[Any] = None, + predicted_var_delay: int = 0, + predictor_vars_delay: int = 0, +) -> None: + """ + Perform robust regression using RANSAC algorithm to handle outliers. + + :param df: dataframe with data + :param predicted_var: dependent variable column name + :param predictor_vars: independent variable column name(s) + :param intercept: whether to include intercept in regression + :param jointplot_: whether to create a scatter plot + :param jointplot_figsize: size of the joint plot + :param predicted_var_delay: shift predicted variable by this many + periods + :param predictor_vars_delay: shift predictor variables by this many + periods + """ + import matplotlib.pyplot as plt + import sklearn.linear_model + + import helpers.hmatplotlib as hmatplo + + obj = _preprocess_regression( + df, + intercept, + predicted_var, + predicted_var_delay, + predictor_vars, + predictor_vars_delay, + ) + if obj is None: + return + # From http://scikit-learn.org/stable/auto_examples/linear_model/ + # plot_robust_fit.html#sphx-glr-auto-examples-linear-model-plot-robust-fit-py + # TODO(gp): Add also TheilSenRegressor and HuberRegressor. + + hdbg.dassert_eq(len(predictor_vars), 1) + y = df[predicted_var] + X = df[predictor_vars] + # Fit line using all data. + lr = sklearn.linear_model.LinearRegression() + lr.fit(X, y) + # Robustly fit linear model with RANSAC algorithm. + ransac = sklearn.linear_model.RANSACRegressor() + ransac.fit(X, y) + inlier_mask = ransac.inlier_mask_ + outlier_mask = np.logical_not(inlier_mask) + # Predict data of estimated models. + line_X = np.linspace(X.min().values[0], X.max().values[0], num=100)[ + :, np.newaxis + ] + line_y = lr.predict(line_X) + line_y_ransac = ransac.predict(line_X) + # Compare estimated coefficients + _LOG.info("Estimated coef for linear regression=%s", lr.coef_) + _LOG.info("Estimated coef for RANSAC=%s", ransac.estimator_.coef_) + if jointplot_: + if jointplot_figsize is None: + jointplot_figsize = hmatplo.FIG_SIZE + plt.figure(figsize=jointplot_figsize) + plt.scatter( + X[inlier_mask], + y[inlier_mask], + color="red", + marker="o", + label="Inliers", + ) + plt.scatter( + X[outlier_mask], + y[outlier_mask], + color="blue", + marker="o", + label="Outliers", + ) + plt.plot(line_X, line_y, color="green", linewidth=2, label="OLS") + plt.plot( + line_X, line_y_ransac, color="black", linewidth=3, label="RANSAC" + ) + plt.legend(loc="best") + plt.xlabel(", ".join(predictor_vars)) + plt.ylabel(predicted_var) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py new file mode 100644 index 000000000..0604afd67 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py @@ -0,0 +1,111 @@ +""" +Import as: + +import helpers.hpandas_check_summary as hpachsum +""" + +import dataclasses +from typing import List, Optional + +import pandas as pd + +import helpers.hlogging as hloggin + +_LOG = hloggin.getLogger(__name__) + + +# ############################################################################# +# _SummaryRow +# ############################################################################# + + +@dataclasses.dataclass +class _SummaryRow: + """ + Output of a check corresponding to a row of the summary df. + """ + + # Description of the check. + description: str + # Description of the output. + comment: str + # Whether the check was successful or not. + is_ok: bool + + +# ############################################################################# +# CheckSummary +# ############################################################################# + + +class CheckSummary: + """ + Collect and report the results of several checks performed in a notebook. + """ + + def __init__(self, *, title: Optional[str] = ""): + self.title = title + # Initialize the array for storing summary rows. + self._array: List[_SummaryRow] = [] + + def add(self, description: str, comment: str, is_ok: bool) -> None: + """ + Add the result of a single check. + """ + summary_row = _SummaryRow(description, comment, is_ok) + self._array.append(summary_row) + + def is_ok(self) -> bool: + """ + Compute whether all the checks were successful or not. + """ + is_ok = all(sr.is_ok for sr in self._array) + return is_ok + + def report_outcome( + self, *, notebook_output: bool = True, assert_on_error: bool = True + ) -> Optional[str]: + """ + Report the result of the entire check. + + :param notebook_output: report the result of the checks for a + notebook or as a string + :param assert_on_error: assert if one check failed + """ + df = pd.DataFrame(self._array) + + # Compute result as a string. + result = [] + if self.title: + result.append("# " + self.title) + result.append(str(df)) + is_ok = self.is_ok() + result.append(f"is_ok={is_ok}") + result = "\n".join(result) + # Display on a notebook, if needed. + if notebook_output: + if self.title: + print(self.title) + + # Convert DataFrame to HTML with colored rows based on 'is_ok' column. + def _color_rows(row: bool) -> str: + """ + Apply red/green color based on boolean value in `row["is_ok"]`. + """ + is_ok = row["is_ok"] + color = "#FA6B84" if not is_ok else "#ACF3AE" + return [f"background-color: {color}"] * len(row) + + df_html = df.style.apply(_color_rows, axis=1) + from IPython.display import display + + display(df_html) + print(f"is_ok={is_ok}") + # Assert if at least one of the check failed. + if not is_ok and assert_on_error: + raise ValueError("The checks have failed:\n" + result) + # For notebooks, we want to return None, since the outcome was + # already displayed. + if notebook_output: + result = None + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py new file mode 100644 index 000000000..c421095a3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py @@ -0,0 +1,282 @@ +""" +Import as: + +import helpers.hpandas_clean as hpanclea +""" + +from typing import Any, List, Optional, Union + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hpandas_utils as hpanutil +import helpers.hprint as hprint + +_LOG = hloggin.getLogger(__name__) + + +def drop_duplicates( + data: Union[pd.Series, pd.DataFrame], + use_index: bool, + column_subset: Optional[List[str]] = None, + *args: Any, + **kwargs: Any, +) -> Union[pd.Series, pd.DataFrame]: + """ + Wrap `pandas.drop_duplicates()` with additional index handling. + + See the official docs: + - https://pandas.pydata.org/docs/reference/api/pandas.Series.drop_duplicates.html + - https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html + + :param data: input series or dataframe + :param use_index: whether to consider index values when identifying duplicates + - if `True`, use index values together with a column subset for + identifying duplicates + - if `False`, duplicated rows are with the exact same values in a subset + and different indices + :param column_subset: a list of columns to consider for identifying duplicates + :param args: additional arguments passed to pandas.drop_duplicates() + :param kwargs: additional keyword arguments passed to pandas.drop_duplicates() + :return: data without duplicates + """ + _LOG.debug(hprint.to_str("use_index column_subset args kwargs")) + num_rows_before = data.shape[0] + # Get all columns list for subset if no subset is passed. + if column_subset is None: + column_subset = data.columns.tolist() + else: + hdbg.dassert_lte(1, len(column_subset), "Columns subset cannot be empty") + if use_index: + # Add dummy index column to use it for duplicates detection. + index_col_name = "use_index_col" + hdbg.dassert_not_in(index_col_name, data.columns.tolist()) + column_subset.insert(0, index_col_name) + data[index_col_name] = data.index + # Drop duplicates based on the column subset. + data_no_dups = data.drop_duplicates(subset=column_subset, *args, **kwargs) + # Clean up the temporary index column if it was added. + if use_index: + # Remove dummy index column. + data_no_dups = data_no_dups.drop([index_col_name], axis=1) + # Report the change. + num_rows_after = data_no_dups.shape[0] + if num_rows_before != num_rows_after: + _LOG.debug( + "Removed %s rows", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + return data_no_dups + + +def dropna( + df: pd.DataFrame, + *args: Any, + drop_infs: bool = False, + report_stats: bool = False, + **kwargs: Any, +) -> pd.DataFrame: + """ + Create a wrapper around pd.dropna() reporting information about the removed + rows. + + :param df: dataframe to process + :param drop_infs: if +/- np.inf should be considered as nans + :param report_stats: if processing stats should be reported + :return: dataframe with nans dropped + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + num_rows_before = df.shape[0] + if drop_infs: + df = df.replace([np.inf, -np.inf], np.nan) + df = df.dropna(*args, **kwargs) + if report_stats: + num_rows_after = df.shape[0] + pct_removed = hprint.perc( + num_rows_before - num_rows_after, num_rows_before + ) + _LOG.info("removed rows with nans: %s", pct_removed) + return df + + +def drop_axis_with_all_nans( + df: pd.DataFrame, + drop_rows: bool = True, + drop_columns: bool = False, + drop_infs: bool = False, + report_stats: bool = False, +) -> pd.DataFrame: + """ + Remove columns and rows not containing information (e.g., with only nans). + + The operation is not performed in place and the resulting df is + returned. Assume that the index is timestamps. + + :param df: dataframe to process + :param drop_rows: remove rows with only nans + :param drop_columns: remove columns with only nans + :param drop_infs: remove also +/- np.inf + :param report_stats: report the stats of the operations + :return: dataframe with specific nan axis dropped + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + if drop_infs: + df = df.replace([np.inf, -np.inf], np.nan) + if drop_columns: + # Remove columns with all nans, if any. + cols_before = df.columns[:] + df = df.dropna(axis=1, how="all") + if report_stats: + # Report results. + cols_after = df.columns[:] + removed_cols = set(cols_before).difference(set(cols_after)) + pct_removed = hprint.perc( + len(cols_before) - len(cols_after), len(cols_after) + ) + _LOG.info( + "removed cols with all nans: %s %s", + pct_removed, + hprint.list_to_str(removed_cols), + ) + if drop_rows: + # Remove rows with all nans, if any. + rows_before = df.index[:] + df = df.dropna(axis=0, how="all") + if report_stats: + # Report results. + rows_after = df.index[:] + removed_rows = set(rows_before).difference(set(rows_after)) + if len(rows_before) == len(rows_after): + # Nothing was removed. + min_ts = max_ts = None + else: + # TODO(gp): Report as intervals of dates. + min_ts = min(removed_rows) + max_ts = max(removed_rows) + pct_removed = hprint.perc( + len(rows_before) - len(rows_after), len(rows_after) + ) + _LOG.info( + "removed rows with all nans: %s [%s, %s]", + pct_removed, + min_ts, + max_ts, + ) + return df + + +def drop_duplicated( + df: pd.DataFrame, *, subset: Optional[List[str]] = None +) -> pd.DataFrame: + """ + Implement `df.duplicated` but considering also the index and ignoring nans. + """ + _LOG.debug("before df=\n%s", hpanutil.df_to_str(df)) + # Move the index to the df. + old_index_name = df.index.name + new_index_name = "_index.tmp" + hdbg.dassert_not_in(new_index_name, df.columns) + df.index.name = new_index_name + df.reset_index(drop=False, inplace=True) + # Remove duplicates by ignoring nans. + if subset is not None: + hdbg.dassert_isinstance(subset, list) + subset = [new_index_name] + subset + duplicated = df.fillna(0.0).duplicated(subset=subset, keep="first") + # Report the result of the operation. + if duplicated.sum() > 0: + num_rows_before = df.shape[0] + _LOG.debug( + "Removing duplicates df=\n%s", + hpanutil.df_to_str(df.loc[duplicated]), + ) + df = df.loc[~duplicated] + num_rows_after = df.shape[0] + _LOG.warning( + "Removed repeated rows num_rows=%s", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + _LOG.debug("after removing duplicates df=\n%s", hpanutil.df_to_str(df)) + # Set the index back. + df.set_index(new_index_name, inplace=True) + df.index.name = old_index_name + _LOG.debug("after df=\n%s", hpanutil.df_to_str(df)) + return df + + +def impute_nans(df: pd.DataFrame, column: str, value: Any) -> pd.DataFrame: + """ + Assign `value` to the `column` of `df` where the value is "nan". + + :param df: The DataFrame to modify. + :param column: The column in which to replace "nan" values. + :param value: The value to assign to "nan" entries. + :return: The DataFrame with the "nan" values assigned. + """ + df[column] = df[column].astype(str) + mask = df[column] == "nan" + # Assign the new value or keep the original value. + df[column] = np.where(mask, value, df[column]) + # There should be no more nans. + mask = df[column] == "nan" + hdbg.dassert_eq(mask.sum(), 0) + # + return df + + +# ############################################################################# + + +def remove_outliers( + df: pd.DataFrame, + lower_quantile: float, + *, + column_set: hpanutil.ColumnSet, + # TODO(Grisha): the params are not used. + fill_value: float = np.nan, + mode: str = "remove_outliers", + axis: Any = 0, + upper_quantile: Optional[float] = None, +) -> pd.DataFrame: + """ + Remove outliers from a dataframe based on quantile thresholds. + + :param df: input dataframe + :param lower_quantile: lower quantile threshold (0.0 to 1.0) + :param column_set: columns to apply outlier removal to + :param fill_value: value to use for filling outliers (currently unused) + :param mode: outlier removal mode (currently unused) + :param axis: axis along which to compute quantiles (0 for columns, 1 for rows) + :param upper_quantile: upper quantile threshold, defaults to 1 - lower_quantile + :return: dataframe with outliers removed based on quantile thresholds + """ + hdbg.dassert_eq(len(df.shape), 2, "Multi-index dfs not supported") + # Validate quantile parameters. + hdbg.dassert_lte(0.0, lower_quantile) + if upper_quantile is None: + upper_quantile = 1.0 - lower_quantile + hdbg.dassert_lte(lower_quantile, upper_quantile) + hdbg.dassert_lte(upper_quantile, 1.0) + # Create a copy of the dataframe to avoid modifying the original. + df = df.copy() + if axis == 0: + all_columns = df.columns + columns = hpanutil.resolve_column_names(column_set, all_columns) + hdbg.dassert_is_subset(columns, df.columns) + for column in all_columns: + if column in columns: + df[column] = df[column].quantile( + [lower_quantile, upper_quantile] + ) + elif axis == 1: + all_rows = df.rows + rows = hpanutil.resolve_column_names(column_set, all_rows) + hdbg.dassert_is_subset(rows, df.rows) + for row in all_rows: + if row in rows: + df[row] = df[row].quantile([lower_quantile, upper_quantile]) + else: + raise ValueError(f"Invalid axis='{axis}'") + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py new file mode 100644 index 000000000..b40308daa --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py @@ -0,0 +1,289 @@ +""" +Import as: + +import helpers.hpandas_compare as hpancomp +""" + +import logging +from typing import List + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hpandas_dassert as hpandass +import helpers.hpandas_utils as hpanutil + +_LOG = hloggin.getLogger(__name__) + +RowsValues = List[List[str]] + + +def compare_dataframe_rows(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: + """ + Compare contents of rows with same indices. + + Index is set to default sequential integer values because compare is + sensitive to multi index (probably because new multi indexes are created + for each difference in `compare`). Multi index columns are regular columns now. + Excess columns are removed so both dataframes are always same shape because + `compare` expects identical dataframes (same number of rows, columns, etc.). + + :param df1: first dataframe for comparison + :param df2: second dataframe for comparison + :return: dataframe with data with same indices and different contents + """ + # Get rows on which the two dataframe indices match. + idx_intersection = df1.index.intersection(df2.index) + # Remove excess columns and reset indexes. + trimmed_second = df2.loc[idx_intersection].reset_index() + trimmed_first = df1.loc[idx_intersection].reset_index() + # Get difference between second and first dataframe. + data_difference = trimmed_second.compare(trimmed_first) + # Update data difference with original dataframe index names + # for easier identification. + index_names = tuple(df2.index.names) + # If index or multi index is named, it will be visible in data difference. + if index_names != (None,): + for index in data_difference.index: + for column in index_names: + data_difference.loc[index, column] = trimmed_second.loc[index][ + column + ] + data_difference = data_difference.convert_dtypes() + return data_difference + + +def compare_nans_in_dataframes( + df1: pd.DataFrame, df2: pd.DataFrame +) -> pd.DataFrame: + """ + Compare equality of DataFrames in terms of NaNs. + + For example: + - `5 vs np.nan` is a mismatch + - `np.nan vs 5` is a mismatch + - `np.nan vs np.nan` is a match + - `np.nan vs np.inf` is a mismatch + + :param df1: dataframe to compare + :param df2: dataframe to compare with + :return: dataframe that shows the differences stacked side by side, see + `pandas.DataFrame.compare()` for an example + """ + hpandass.dassert_axes_equal(df1, df2) + # Keep rows where df1's value is NaN and df2's value is not NaN and vice versa. + mask1 = df1.isna() & ~df2.isna() + mask2 = ~df1.isna() & df2.isna() + mask3 = mask1 | mask2 + # Compute a dataframe with the differences. + nan_diff_df = df1[mask3].compare(df2[mask3], result_names=("df1", "df2")) + return nan_diff_df + + +# TODO(Grisha): -> `compare_dataframes()`? + + +def compare_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + row_mode: str = "equal", + column_mode: str = "equal", + # TODO(Grisha): should be True by default? + compare_nans: bool = False, + diff_mode: str = "diff", + assert_diff_threshold: float = 1e-3, + close_to_zero_threshold: float = 1e-6, + zero_vs_zero_is_zero: bool = True, + remove_inf: bool = True, + log_level: int = logging.DEBUG, + only_warning: bool = True, +) -> pd.DataFrame: + """ + Compare two dataframes. + + This works for dataframes with and without multi-index. + + :param row_mode: control how the rows are handled + - "equal": rows need to be the same for the two dataframes + - "inner": compute the common rows for the two dataframes + :param column_mode: same as `row_mode` + :param compare_nans: include NaN comparison if True otherwise just + compare non-NaN values + :param diff_mode: control how the dataframes are compared in terms of + corresponding elements + - "diff": use the difference + - "pct_change": use the percentage difference + :param assert_diff_threshold: maximum allowed total difference + - do not assert if `None` + - works when `diff_mode` is "pct_change" + :param close_to_zero_threshold: round numbers below the threshold to 0 + :param zero_vs_zero_is_zero: replace the diff with 0 when comparing 0 to 0 + if True, otherwise keep the actual result + :param remove_inf: replace +-inf with `np.nan` + :param log_level: logging level + :param only_warning: when `True` the function issues a warning instead of aborting + :return: a singe dataframe with differences as values + """ + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + # Check value of `assert_diff_threshold`, if it was passed. + if assert_diff_threshold: + hdbg.dassert_lte(assert_diff_threshold, 1.0) + hdbg.dassert_lte(0.0, assert_diff_threshold) + # TODO(gp): Factor out this logic and use it for both compare_visually_dfs + # and + if row_mode == "equal": + hpandass.dassert_indices_equal(df1, df2) + elif row_mode == "inner": + # TODO(gp): Add sorting on demand, otherwise keep the columns in order. + same_rows = list((set(df1.index)).intersection(set(df2.index))) + df1 = df1[df1.index.isin(same_rows)] + df2 = df2[df2.index.isin(same_rows)] + else: + raise ValueError(f"Invalid row_mode='{row_mode}'") + # Handle column comparison mode. + if column_mode == "equal": + hdbg.dassert_eq(sorted(df1.columns), sorted(df2.columns)) + elif column_mode == "inner": + # TODO(gp): Add sorting on demand, otherwise keep the columns in order. + col_names = sorted(list(set(df1.columns).intersection(set(df2.columns)))) + df1 = df1[col_names] + df2 = df2[col_names] + else: + raise ValueError(f"Invalid column_mode='{column_mode}'") + # Round small numbers to 0 to exclude them from the diff computation. + close_to_zero_threshold_mask = lambda x: abs(x) < close_to_zero_threshold + df1[close_to_zero_threshold_mask] = df1[close_to_zero_threshold_mask].round( + 0 + ) + df2[close_to_zero_threshold_mask] = df2[close_to_zero_threshold_mask].round( + 0 + ) + # Compute the difference df. + if diff_mode == "diff": + # Test and convert the assertion into a boolean. + is_ok = True + try: + pd.testing.assert_frame_equal( + df1, df2, check_like=True, check_dtype=False + ) + except AssertionError as e: + is_ok = False + _ = e + # Check `is_ok` and raise an assertion depending on `only_warning`. + if not is_ok: + hdbg._dfatal( + _, + "df1=\n%s\n and df2=\n%s\n are not equal.", + hpanutil.df_to_str(df1, log_level=log_level), + hpanutil.df_to_str(df2, log_level=log_level), + only_warning=only_warning, + ) + # Calculate the difference. + df_diff = df1 - df2 + if remove_inf: + df_diff = df_diff.replace([np.inf, -np.inf], np.nan) + elif diff_mode == "pct_change": + # Compare NaN values in dataframes. + nan_diff_df = compare_nans_in_dataframes(df1, df2) + _LOG.debug( + "Dataframe with NaN differences=\n%s", + hpanutil.df_to_str(nan_diff_df), + ) + msg = "There are NaN values in one of the dataframes that are not in the other one." + hdbg.dassert_eq( + 0, nan_diff_df.shape[0], msg=msg, only_warning=only_warning + ) + # Compute pct_change. + df_diff = 100 * (df1 - df2) / df2.abs() + if zero_vs_zero_is_zero: + # When comparing 0 to 0 set the diff (which is NaN by default) to 0. + df1_mask = df1 == 0 + df2_mask = df2 == 0 + zero_vs_zero_mask = df1_mask & df2_mask + df_diff[zero_vs_zero_mask] = 0 + if remove_inf: + df_diff = df_diff.replace([np.inf, -np.inf], np.nan) + # Check if `df_diff` values are less than `assert_diff_threshold`. + if assert_diff_threshold is not None: + nan_mask = df_diff.isna() + within_threshold = ( + df_diff.abs() <= assert_diff_threshold + ) | nan_mask + expected = pd.DataFrame( + True, + index=within_threshold.index, + columns=within_threshold.columns, + ) + # Test and convert the assertion into boolean. + is_ok = True + try: + pd.testing.assert_frame_equal( + within_threshold, expected, check_exact=True + ) + except AssertionError as e: + is_ok = False + _ = e + # Check `is_ok` and raise assertion depending on `only_warning`. + if not is_ok: + hdbg._dfatal( + _, + "df1=\n%s\n and df2=\n%s\n have pct_change more than `assert_diff_threshold`.", + hpanutil.df_to_str(df1, log_level=log_level), + hpanutil.df_to_str(df2, log_level=log_level), + only_warning=only_warning, + ) + # Report max diff. + max_diff = df_diff.abs().max().max() + _LOG.log( + log_level, + "Maximum percentage difference between the two dataframes = %s", + max_diff, + ) + else: + raise ValueError(f"diff_mode={diff_mode}") + df_diff = df_diff.add_suffix(f".{diff_mode}") + return df_diff + + +def find_common_columns( + names: List[str], dfs: List[pd.DataFrame] +) -> pd.DataFrame: + """ + Find common columns across multiple dataframes. + + :param names: list of names for each dataframe + :param dfs: list of dataframes to compare + :return: dataframe showing common columns between each pair of dataframes + """ + df = [] + for i, df1 in enumerate(dfs): + df1 = dfs[i].columns + for j in range(i + 1, len(dfs)): + df2 = dfs[j].columns + common_cols = [c for c in df1 if c in df2] + df.append( + ( + names[i], + len(df1), + names[j], + len(df2), + len(common_cols), + ", ".join(common_cols), + ) + ) + df = pd.DataFrame( + df, + columns=[ + "table1", + "num_cols1", + "num_cols2", + "table2", + "num_comm_cols", + "common_cols", + ], + ) + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py new file mode 100644 index 000000000..c9443c888 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py @@ -0,0 +1,221 @@ +""" +Import as: + +import helpers.hpandas_conversion as hpanconv +""" + +from typing import List, Optional, Union + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin + +_LOG = hloggin.getLogger(__name__) + +RowsValues = List[List[str]] + +# ############################################################################# +# DataFrame/Series Conversion +# ############################################################################# + + +def to_series(df: pd.DataFrame, *, series_dtype: str = "float64") -> pd.Series: + """ + Convert a pd.DataFrame with a single column into a pd.Series. The problem + is that empty df or df with a single row are not converted correctly to a + pd.Series. + + :param df: dataframe with a single column to convert to a series + :param series_dtype: dtype of the desired series in case a DataFrame + is empty, otherwise inherit dtype from a DataFrame + """ + # See https://stackoverflow.com/questions/33246771 + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_eq(df.shape[1], 1, "df=%s doesn't have a single column", df) + if df.empty: + srs = pd.Series(dtype=series_dtype) + elif df.shape[0] > 1: + srs = df.squeeze() + else: + srs = pd.Series(df.iloc[0, 0], index=[df.index.values[0]]) + srs.name = df.index.name + hdbg.dassert_isinstance(srs, pd.Series) + return srs + + +def as_series(data: Union[pd.DataFrame, pd.Series]) -> pd.Series: + """ + Convert a single-column dataframe to a series or no-op if already a series. + """ + if isinstance(data, pd.Series): + return data + return to_series(data) + + +# ############################################################################# +# Infer type +# ############################################################################# + + +def infer_column_types(col: pd.Series): + """ + Determine which data type is most prevalent in a column. + + Examine the values in the given pandas Series and decides whether + the majority of entries are strings, numeric values, or booleans. + + :param col: The column to inspect. + :return: One of `"is_string"`, `"is_numeric"`, or `"is_bool"`, + representing the predominant type. + """ + vals = { + "is_numeric": pd.to_numeric(col, errors="coerce").notna(), + #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), + "is_bool": col.map(lambda x: isinstance(x, bool)), + "is_string": col.map(lambda x: isinstance(x, str)), + } + vals = {k: float(v.mean()) for k, v in vals.items()} + # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", + # (vals["is_numeric"] >= vals["is_string"], "is_numeric", + # "is_string")) + if vals["is_bool"] >= vals["is_numeric"] and (vals["is_bool"] != 0): + type_ = "is_bool" + elif vals["is_numeric"] >= vals["is_string"] and (vals["is_numeric"] != 0): + type_ = "is_numeric" + else: + type_ = "is_string" + vals["type"] = type_ + return vals + + +def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Identify the predominant data type for each column in a DataFrame. + + :param df: The DataFrame whose columns will be analyzed. + :return: A DataFrame with two columns: + - `column`: the name of each original column. + - `predominant_type`: the most frequent type in that column, + one of `"string"`, `"numeric"`, or `"bool"`. + """ + return df.apply(lambda x: pd.Series(infer_column_types(x))).T + + +def convert_to_type(col: pd.Series, type_: str) -> pd.Series: + """ + Convert a pandas Series to a specified data type. + + :param col: The input column to be converted. + :param type_: The target data type. Expected values include: + - `"is_bool"`: convert values to booleans. + - `"is_int"`: convert values to integers. + - `"is_numeric"`: convert values to float. + - `"is_string"`: convert values to strings. + :return: A new Series with the same index as `col`, cast to the requested + type. + """ + if type_ == "is_bool": + return col.map( + lambda x: ( + True + if x in ["True", 1, "1", "true", True] + else False + if x in [0, "0", "False", False, "false"] + else None + ) + ) + elif type_ == "is_int": + return pd.to_numeric(col, errors="coerce", downcast="integer") + elif type_ == "is_numeric": + return pd.to_numeric(col, errors="coerce") + elif type_ == "is_string": + return col.astype(str) + else: + raise ValueError(f"Unknown column type: {type_}") + + +def convert_col_to_int( + df: pd.DataFrame, + col: str, +) -> pd.DataFrame: + """ + Convert a column to an integer column. + + Example use case: Parquet uses categoricals. If supplied with a + categorical-type column, this function will convert it to an integer + column. + """ + import helpers.hpandas_dassert as hpandass + + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(col, str) + hdbg.dassert_in(col, df.columns) + # Attempt the conversion. + df[col] = df[col].astype("int64") + # Trust, but verify. + hpandass.dassert_series_type_is(df[col], np.int64) + return df + + +def cast_series_to_type( + series: pd.Series, series_type: Optional[type] +) -> pd.Series: + """ + Convert a Pandas series to a given type. + + :param series: the input series + :param series_type: the type to convert the series into + - if None, then the series values are turned into Nones + :return: the series in the required type + """ + if series_type is None: + # Turn the series values into None. + series[:] = None + elif series_type is pd.Timestamp: + # Convert to timestamp. + series = pd.to_datetime(series) + elif series_type is dict: + # Convert to dict. + series = series.apply(eval) + else: + # Convert to the specified type. + series = series.astype(series_type) + return series + + +def convert_df( + df: pd.DataFrame, *, print_invalid_values: bool = False +) -> pd.DataFrame: + """ + Convert each DataFrame column to its predominant type. + + This function inspects every column in `df`, determines whether the + majority of its values are boolean, numeric, or string, and then + casts the column to that type using `convert_to_type`. + + :param df: The input DataFrame whose columns will be converted. + :param print_invalid_values: If True, print any original values that could + not be converted (they become NaN after conversion) + :return: a new DataFrame with each column cast to its detected predominant + type. + """ + df_out = pd.DataFrame(index=df.index) + for col in df.columns: + series = df[col] + # Determine the dominant datatype. + col_type = infer_column_types(series)["type"] + hdbg.dassert_in(col_type, ("is_bool", "is_numeric", "is_string")) + # Convert the column to dominant datatype. + converted = convert_to_type(series, col_type) + if print_invalid_values: + invalid_mask = series.notna() & converted.isna() + if invalid_mask.any(): + invalid = series[invalid_mask].tolist() + _LOG.info("Column %s dropped invalid values: %s", col, invalid) + df_out[col] = converted + return df_out + + +# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py new file mode 100644 index 000000000..7d62b84b3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py @@ -0,0 +1,371 @@ +""" +Import as: + +import helpers.hpandas_dassert as hpandass +""" + +from typing import Any, Dict, Iterable, List, Optional, Union + +import numpy as np +import pandas as pd + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin + +_LOG = hloggin.getLogger(__name__) + + +RowsValues = List[List[str]] + +# ############################################################################# +# Index/Axis Validation & Assertions +# ############################################################################# + + +def _get_index(obj: Union[pd.Index, pd.DataFrame, pd.Series]) -> pd.Index: + """ + Return the index of a Pandas object. + + :param obj: pandas Index, DataFrame, or Series + :return: the index of the object + """ + if isinstance(obj, pd.Index): + index = obj + else: + hdbg.dassert_isinstance(obj, (pd.Series, pd.DataFrame)) + index = obj.index + return index + + +# TODO(gp): Maybe for symmetry with the other functions, rename to +# dassert_datetime_index + + +def dassert_index_is_datetime( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the dataframe has an index containing datetimes. + + It works for both single and multi-indexed dataframes. + """ + index = _get_index(obj) + if isinstance(index, pd.MultiIndex): + # In case of multi index check that at least one level is a datetime. + is_any_datetime = any( + isinstance(level, pd.DatetimeIndex) for level in index.levels + ) + hdbg.dassert(is_any_datetime, msg, *args) + else: + hdbg.dassert_isinstance(index, pd.DatetimeIndex, msg, *args) + + +def dassert_unique_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a unique index. + """ + import helpers.hpandas_utils as hpanutil + + index = _get_index(obj) + if not index.is_unique: + dup_indices = index.duplicated(keep=False) + df_dup = obj[dup_indices] + df_dup_as_str = hpanutil.df_to_str(df_dup) + dup_msg = f"Duplicated rows are:\n{df_dup_as_str}\n" + if msg is None: + msg = dup_msg + else: + msg = dup_msg + msg + hdbg.dassert(index.is_unique, msg=msg, *args) + + +# TODO(gp): @all Add unit tests. + + +def dassert_increasing_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has an increasing index. + """ + import helpers.hpandas_utils as hpanutil + + index = _get_index(obj) + if not index.is_monotonic_increasing: + # Print information about the problematic indices like: + # ``` + # Not increasing indices are: + # full_symbol open high + # timestamp + # 2018-08-17 01:39:00+00:00 binance::BTC_USDT 6339.250000 6348.910000 + # 2018-08-17 00:01:00+00:00 kucoin::ETH_USDT 286.712987 286.712987 + # ``` + # Find the problematic indices. + mask = np.diff(index) <= pd.Timedelta(seconds=0) + mask = np.insert(mask, 0, False) + # TODO(gp): We might want to specify an integer with how many rows before + # after we want to show. + # Shift back to get the previous index that was creating the issue. + mask_shift = np.empty_like(mask) + mask_shift[: len(mask) - 1] = mask[1 : len(mask)] + mask_shift[len(mask) - 1] = False + # + mask = mask | mask_shift + df_dup_as_str = hpanutil.df_to_str(obj[mask]) + dup_msg = f"Not increasing indices are:\n{df_dup_as_str}\n" + if msg is None: + msg = dup_msg + else: + msg = dup_msg + msg + # Dump the data to file for further inspection. + # obj.to_csv("index.csv") + hdbg.dassert(index.is_monotonic_increasing, msg=msg, *args) + + +# TODO(gp): @all Add more info in case of failures and unit tests. + + +def dassert_strictly_increasing_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a strictly increasing index. + """ + dassert_unique_index(obj, msg, *args) + dassert_increasing_index(obj, msg, *args) + + +# TODO(gp): Not sure it's used or useful? + + +def dassert_monotonic_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a monotonic (i.e., strictly increasing or + decreasing index). + """ + dassert_unique_index(obj, msg, *args) + index = _get_index(obj) + cond = index.is_monotonic_increasing or index.is_monotonic_decreasing + hdbg.dassert(cond, msg=msg, *args) + + +# TODO(Paul): @gp -> dassert_datetime_indexed_df + + +def dassert_time_indexed_df( + df: pd.DataFrame, allow_empty: bool, strictly_increasing: bool +) -> None: + """ + Validate that input dataframe is time indexed and well-formed. + + It works for both single and multi-indexed dataframes. + + :param df: dataframe to validate + :param allow_empty: allow empty data frames + :param strictly_increasing: if True the index needs to be strictly + increasing, instead of just increasing + """ + # Verify that Pandas dataframe is passed as input. + hdbg.dassert_isinstance(df, pd.DataFrame) + if not allow_empty: + # Verify that a non-empty dataframe is passed as input. + hdbg.dassert_lt(0, df.shape[0]) + # Verify that the dataframe has at least 1 column. + hdbg.dassert_lte(1, len(df.columns)) + # Verify that the index is increasing. + if strictly_increasing: + dassert_strictly_increasing_index(df) + else: + dassert_increasing_index(df) + # Check that the index is in datetime format. + dassert_index_is_datetime(df) + # Check that the passed timestamp has timezone info. + index_item = df.index[0] + if isinstance(index_item, tuple): + # In case of multi index assume that the first level is a datetime. + index_item = index_item[0] + hdateti.dassert_has_tz(index_item) + + +def dassert_indices_equal( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + allow_series: bool = False, + only_warning: bool = False, +) -> None: + """ + Ensure that `df1` and `df2` share a common index. + + Print the symmetric difference of indices if equality does not hold. + """ + if allow_series: + if isinstance(df1, pd.Series): + df1 = df1.to_frame() + if isinstance(df2, pd.Series): + df2 = df2.to_frame() + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert( + df1.index.equals(df2.index), + "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", + df1.index.difference(df2.index), + df2.index.difference(df1.index), + only_warning=only_warning, + ) + + +def dassert_columns_equal( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + sort_cols: bool = False, + only_warning: bool = False, +) -> None: + """ + Ensure that `df1` and `df2` have the same columns. + + Print the symmetric difference of columns if equality does not hold. + """ + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + if sort_cols: + _LOG.debug("Sorting dataframe columns.") + df1 = df1.sort_index(axis=1) + df2 = df2.sort_index(axis=1) + hdbg.dassert( + df1.columns.equals(df2.columns), + "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", + df1.columns.difference(df2.columns), + df2.columns.difference(df1.columns), + only_warning=only_warning, + ) + + +def dassert_axes_equal( + df1: pd.DataFrame, df2: pd.DataFrame, *, sort_cols: bool = False +) -> None: + """ + Ensure that `df1` and `df2` have the same index and same columns. + """ + dassert_indices_equal(df1, df2) + dassert_columns_equal(df1, df2, sort_cols=sort_cols) + + +# TODO(Grisha): instead of passing `rtol` and `atol` use `**allclose_kwargs: Dict[str, Any]`. + + +def dassert_series_type_is( + srs: pd.Series, + type_: type, + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the data type of `srs` is `type_`. + + Examples of valid series types are + - np.float64 + - np.int64 + - pd.Timestamp + """ + hdbg.dassert_isinstance(srs, pd.Series) + hdbg.dassert_isinstance(type_, type) + hdbg.dassert_eq(srs.dtype.type, type_, msg, *args) + + +def dassert_series_type_in( + srs: pd.Series, + types: List[type], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the data type of `srs` is one of the types in `types`. + """ + hdbg.dassert_isinstance(srs, pd.Series) + hdbg.dassert_container_type(types, list, type) + hdbg.dassert_in(srs.dtype.type, types, msg, *args) + + +def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None: + """ + Ensure that remapping rows / columns is valid. + """ + hdbg.dassert_isinstance(to_remap, list) + hdbg.dassert_isinstance(remap_dict, dict) + # All the rows / columns to remap, should exist. + hdbg.dassert_is_subset( + remap_dict.keys(), + to_remap, + "Keys to remap should be a subset of existing columns", + ) + # The mapping is invertible. + hdbg.dassert_no_duplicates(remap_dict.keys()) + hdbg.dassert_no_duplicates(remap_dict.values()) + # Rows / columns should not be remapped on existing rows / columns. + hdbg.dassert_not_intersection(remap_dict.values(), to_remap) + + +def dassert_approx_eq( + val1: Any, + val2: Any, + rtol: float = 1e-05, + atol: float = 1e-08, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # Approximate comparison is not applicable for strings. + hdbg.dassert_is_not(type(val1), str) + hdbg.dassert_is_not(type(val2), str) + # Convert iterable inputs to list in order to comply with numpy. + if isinstance(val1, Iterable): + val1 = list(val1) + if isinstance(val2, Iterable): + val2 = list(val2) + cond = np.allclose( + np.array(val1), np.array(val2), rtol=rtol, atol=atol, equal_nan=True + ) + if not cond: + txt = f"'{val1}'\n==\n'{val2}' rtol={rtol}, atol={atol}" + hdbg._dfatal(txt, msg, *args, only_warning=only_warning) # type: ignore + + +# ############################################################################# + + +def dassert_is_days( + timedelta: pd.Timedelta, *, min_num_days: Optional[int] = None +) -> None: + """ + Assert that a timedelta represents an integer number of days. + + :param timedelta: the timedelta to check + :param min_num_days: optional minimum number of days to enforce + """ + hdbg.dassert( + (timedelta / pd.Timedelta(days=1)).is_integer(), + "timedelta='%s' is not an integer number of days", + timedelta, + ) + if min_num_days is not None: + hdbg.dassert_lte(1, timedelta.days) + + +# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py new file mode 100644 index 000000000..6c73c8988 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py @@ -0,0 +1,302 @@ +""" +Import as: + +import helpers.hpandas_display as hpandisp +""" + +import logging +import os +from typing import List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hlist as hlist +import helpers.hlogging as hloggin +import helpers.hsystem as hsystem + +_LOG = hloggin.getLogger(__name__) + + +# Invariant: +# - When we are in a notebook we want to: +# - Convert `_LOG.info()` in `print()` using `hnotebo.set_logger_to_print()` +# - Display any dataframe using the `hpandas.display` function +# - Do not return any value +# +# - When we are not in a notebook we want to: +# - Use `_LOG.info()` and `_LOG.debug()` to log messages +# - Print the dataframe with `_LOG.debug()` +# - Return the result through a `return` statement +# +# - Each function should have a `log_level` parameter to control the logging level. +# - If `log_level` is not provided, it should be set to `logging.DEBUG` if we are not in a notebook, +# and `logging.INFO` if we are in a notebook. + + +def get_df_signature(df: pd.DataFrame, num_rows: int = 6) -> str: + """ + Compute a simple signature of a dataframe in string format. + + The signature contains metadata about dataframe size and certain + amount of rows from start and end of a dataframe. It is used for + testing purposes. + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + text: List[str] = [f"df.shape={str(df.shape)}"] + with pd.option_context( + "display.max_colwidth", int(1e6), "display.max_columns", None + ): + # If dataframe size exceeds number of rows, show only subset in form of + # first and last rows. Otherwise, whole dataframe is shown. + if len(df) > num_rows: + text.append(f"df.head=\n{df.head(num_rows // 2)}") + text.append(f"df.tail=\n{df.tail(num_rows // 2)}") + else: + text.append(f"df.full=\n{df}") + text: str = "\n".join(text) + return text + + +# ############################################################################# + + +def convert_df_to_json_string( + df: pd.DataFrame, + n_head: Optional[int] = 10, + n_tail: Optional[int] = 10, + columns_order: Optional[List[str]] = None, +) -> str: + """ + Convert dataframe to pretty-printed JSON string. + + To select all rows of the dataframe, pass `n_head` as None. + + :param df: dataframe to convert + :param n_head: number of printed top rows + :param n_tail: number of printed bottom rows + :param columns_order: order for the KG columns sort + :return: dataframe converted to JSON string + """ + # Append shape of the initial dataframe. + shape = f"original shape={df.shape}" + # Reorder columns. + if columns_order is not None: + hdbg.dassert_set_eq(columns_order, df.columns) + df = df[columns_order] + # Select head. + if n_head is not None: + head_df = df.head(n_head) + else: + # If no n_head provided, append entire dataframe. + head_df = df + # Transform head to json. + head_json = head_df.to_json( + orient="index", + force_ascii=False, + indent=4, + default_handler=str, + date_format="iso", + date_unit="s", + ) + if n_tail is not None: + # Transform tail to json. + tail = df.tail(n_tail) + tail_json = tail.to_json( + orient="index", + force_ascii=False, + indent=4, + default_handler=str, + date_format="iso", + date_unit="s", + ) + else: + # If no tail specified, append an empty string. + tail_json = "" + # Join shape and dataframe to single string. + output_str = "\n".join([shape, "Head:", head_json, "Tail:", tail_json]) + return output_str + + +# ############################################################################# + + +def convert_df_to_png( + df: pd.DataFrame, + file_path: str, + index: bool = True, + table_conversion: str = "kaleido", + dpi: int = 300, + print_markdown: bool = False, + markdown_path_prefix: Optional[str] = None, +) -> None: + """ + Convert a dataframe to a PNG image file. + + Uses the dataframe_image library to render the DataFrame as an image + with HTML styling. + + :param df: dataframe to convert + :param file_path: path where the PNG image will be saved + :param index: whether to include the index in the image + :param table_conversion: conversion method ('kaleido', 'chrome', or 'playwright') + :param dpi: resolution in dots per inch (default: 300 for print quality, + higher values = higher resolution and larger file size) + :param print_markdown: if True, print markdown image reference like + ![](path/to/image.png) + :param markdown_path_prefix: optional path to prepend to the image path in + the markdown reference (e.g., '../figures/' or 'assets/') + """ + # Keep this import here since it's an optional one. + import dataframe_image as dfi + + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(file_path, str) + # Ensure the output directory exists. + hio.create_enclosing_dir(file_path, incremental=True) + # Prepare dataframe for export, handling index parameter. + export_df = df + if not index: + # Reset index to exclude it from the image. + export_df = df.reset_index(drop=True) + dfi.export(export_df, file_path, table_conversion=table_conversion, dpi=dpi) + # Use print instead of _LOG.info. + print(f"PNG image saved to: '{file_path}'") + if print_markdown: + # Construct the markdown path. + markdown_path = file_path + if markdown_path_prefix: + markdown_path = os.path.join(markdown_path_prefix, file_path) + markdown_ref = f"![]({markdown_path})" + # Use print instead of _LOG.info. + print(markdown_ref) + + +# ############################################################################# + + +def print_or_display( + df: pd.DataFrame, + *, + index: bool = True, + as_txt: bool = False, + log_level: int = logging.INFO, +) -> None: + """ + Print or display a dataframe in a notebook at the given log level. + + :param df: dataframe to print + :param index: whether to show the index or not + :param as_txt: print if True, otherwise render as usual HTML table + :param log_level: log level at which to print the dataframe + """ + # print(_LOG.getEffectiveLevel()) + # print(log_level) + # print(_LOG.isEnabledFor(log_level)) + if hsystem.is_running_in_ipynb() and not as_txt: + from IPython.display import display, HTML + + if _LOG.isEnabledFor(log_level): + display(HTML(df.to_html(index=index))) + else: + _LOG.log(log_level, "%s", df.to_string(index=index)) + + +def display_df( + df: pd.DataFrame, + *, + index: bool = True, + inline_index: bool = False, + max_lines: Optional[int] = 5, + tag: Optional[str] = None, + mode: Optional[str] = None, + as_txt: bool = False, + log_level: int = logging.INFO, +) -> None: + """ + Display a Pandas object (series, df, panel) in a better way than the + ipython display, e.g., by printing head and tail of the dataframe, and + other formatting options. + + :param index: whether to show the index or not + :param inline_index: make the index part of the dataframe. This is used + when cutting and pasting to other applications, which are not happy + with the output pandas HTML form + :param max_lines: number of lines to print + :param mode: use different formats temporarily overriding the default, e.g., + - "all_rows": print all the rows + - "all_cols": print all the columns + - "all": print the entire df (it could be huge) + :param as_txt: print if True, otherwise render as usual html table + :param log_level: log level at which to print the dataframe + """ + # Convert Series to DataFrame if needed. + if isinstance(df, pd.Series): + df = pd.DataFrame(df) + # + hdbg.dassert_type_is(df, pd.DataFrame) + hdbg.dassert_eq( + hlist.find_duplicates(df.columns.tolist()), + [], + msg="Find duplicated columns", + ) + if tag is not None: + _LOG.log(log_level, "tag=%s", tag) + # Shrink the dataframe to the number of lines specified by `max_lines`, + # if needed. + if max_lines is not None: + hdbg.dassert_lte(1, max_lines) + if df.shape[0] > max_lines: + # log.error("Printing only top / bottom %s out of %s rows", + # max_lines, df.shape[0]) + ellipses = pd.DataFrame( + [["..."] * len(df.columns)], columns=df.columns, index=["..."] + ) + df = pd.concat( + [ + df.head(int(max_lines / 2)), + ellipses, + df.tail(int(max_lines / 2)), + ], + axis=0, + ) + # Inline the index, if needed. + if inline_index: + df = df.copy() + # Copy the index to a column and don't print the index. + if df.index.name is None: + col_name = "." + else: + col_name = df.index.name + df.insert(0, col_name, df.index) + df.index.name = None + index = False + # Print or display the dataframe. + if mode is None: + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + elif mode == "all_rows": + with pd.option_context( + "display.max_rows", None, "display.max_columns", 3 + ): + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + elif mode == "all_cols": + with pd.option_context( + "display.max_colwidth", int(1e6), "display.max_columns", None + ): + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + elif mode == "all": + with pd.option_context( + "display.max_rows", + int(1e6), + "display.max_columns", + 3, + "display.max_colwidth", + int(1e6), + "display.max_columns", + None, + ): + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + else: + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + raise ValueError("Invalid mode=%s" % mode) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py new file mode 100644 index 000000000..a1049d77f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py @@ -0,0 +1,128 @@ +""" +Import as: + +import helpers.hpandas_io as hpanio +""" + +from typing import Any, Union + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hprint as hprint + +# Handle different versions of s3fs where core module may be at different +# locations. +try: + import s3fs + + # Try to access s3fs.core to check if it exists + if hasattr(s3fs, "core"): + from s3fs.core import S3File, S3FileSystem + else: + # In newer versions, classes might be directly in s3fs module. + try: + from s3fs import S3File, S3FileSystem + except ImportError: + # Fallback to dynamic import + S3File = getattr(s3fs, "S3File", None) + S3FileSystem = getattr(s3fs, "S3FileSystem", None) +except ImportError: + # If s3fs is not available, define dummy classes for type hints. + s3fs = None + + class S3File: + pass + + class S3FileSystem: + pass + + +_LOG = hloggin.getLogger(__name__) + + +def read_csv_to_df( + stream: Union[str, S3File, S3FileSystem], + *args: Any, + **kwargs: Any, +) -> pd.DataFrame: + """ + Read a CSV file into a `pd.DataFrame`. + + :param stream: file path, S3File, or S3FileSystem object + :param args: additional arguments passed to pd.read_csv() + :param kwargs: additional keyword arguments passed to pd.read_csv() + :return: dataframe with CSV contents + """ + # Gets filename from stream if it is not already a string, + # so it can be inspected for extension type. + file_name = stream if isinstance(stream, str) else vars(stream)["path"] + # Handle zipped files. + if any(file_name.endswith(ext) for ext in (".gzip", ".gz", ".tgz")): + hdbg.dassert_not_in("compression", kwargs) + kwargs["compression"] = "gzip" + elif file_name.endswith(".zip"): + hdbg.dassert_not_in("compression", kwargs) + kwargs["compression"] = "zip" + # Read. + _LOG.debug(hprint.to_str("args kwargs")) + df = pd.read_csv(stream, *args, **kwargs) + return df + + +def read_parquet_to_df( + stream: Union[str, S3File, S3FileSystem], + *args: Any, + **kwargs: Any, +) -> pd.DataFrame: + """ + Read a Parquet file into a `pd.DataFrame`. + + :param stream: file path, S3File, or S3FileSystem object + :param args: additional arguments passed to pd.read_parquet() + :param kwargs: additional keyword arguments passed to pd.read_parquet() + :return: dataframe with Parquet contents + """ + # Read. + _LOG.debug(hprint.to_str("args kwargs")) + df = pd.read_parquet(stream, *args, **kwargs) + return df + + +# ############################################################################# + + +# TODO(Paul): Remove this since it's a dup of hgoogle_drive_api.py. + + +def to_gsheet( + df: pd.DataFrame, + tab_name: str, + gsheet_tab_name: str, + overwrite: bool, +) -> None: + """ + Save a dataframe to a Google sheet. + + :param df: the dataframe to save to a Google sheet + :param tab_name: the name of the Google sheet to save the df + into; the Google sheet with this name must already exist on the + Google Drive + :param gsheet_tab_name: the name of the sheet in the Google sheet + :param overwrite: if True, the contents of the sheet are erased + before saving the dataframe into it; if False, the dataframe is + appended to the contents of the sheet + """ + import gspread_pandas + + spread = gspread_pandas.Spread( + tab_name, sheet=gsheet_tab_name, create_sheet=True + ) + if overwrite: + spread.clear_sheet() + else: + sheet_contents = spread.sheet_to_df(index=None) + combined_df = pd.concat([sheet_contents, df]) + df = combined_df.drop_duplicates() + spread.df_to_sheet(df, index=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py new file mode 100644 index 000000000..f139a3ba9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py @@ -0,0 +1,183 @@ +""" +Import as: + +import helpers.hpandas_multiindex as hpanmult +""" + +import logging +from typing import Any, Dict, List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hpandas_compare as hpancomp +import helpers.hpandas_dassert as hpandass +import helpers.hpandas_transform as hpantran +import helpers.hpandas_utils as hpanutil +import helpers.hprint as hprint + +_LOG = hloggin.getLogger(__name__) + +RowsValues = List[List[str]] + +# ############################################################################# +# Functions +# ############################################################################# + + +def add_multiindex_col( + df: pd.DataFrame, multiindex_col: pd.DataFrame, col_name: str +) -> pd.DataFrame: + """ + Add column to a multiindex DataFrame. + + Note: each column in a multiindex DataFrame is a DataFrame itself. + + :param df: multiindex df + :param multiindex_col: column (i.e. singleindex df) of a multiindex df + :param col_name: name of a new column + :return: a multiindex DataFrame with a new column + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + hdbg.dassert_isinstance(multiindex_col, pd.DataFrame) + hdbg.dassert_isinstance(col_name, str) + hdbg.dassert_not_in(col_name, df.columns) + for col in multiindex_col.columns: + df[col_name, col] = multiindex_col[col] + return df + + +def multiindex_df_info( + df: pd.DataFrame, + *, + log_level: int = logging.INFO, + **list_to_str_kwargs: Dict[str, Any], +) -> str: + """ + Report information about a multi-index df. + """ + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + columns_level0 = df.columns.levels[0] + columns_level1 = df.columns.levels[1] + rows = df.index + ret = [] + ret.append( + f"shape={len(columns_level0)} x {len(columns_level1)} x {len(rows)}" + ) + ret.append( + "columns_level0=" + + hprint.list_to_str2(columns_level0, **list_to_str_kwargs) + ) + ret.append( + "columns_level1=" + + hprint.list_to_str2(columns_level1, **list_to_str_kwargs) + ) + ret.append("rows=" + hprint.list_to_str2(rows, **list_to_str_kwargs)) + if isinstance(df.index, pd.DatetimeIndex): + # Display timestamp info. + start_timestamp = df.index.min() + end_timestamp = df.index.max() + frequency = df.index.freq + if frequency is None: + # Try to infer frequency. + frequency = pd.infer_freq(df.index) + ret.append(f"start_timestamp={start_timestamp}") + ret.append(f"end_timestamp={end_timestamp}") + ret.append(f"frequency={frequency}") + ret = "\n".join(ret) + _LOG.log(log_level, ret) + return ret + + +def subset_multiindex_df( + df: pd.DataFrame, + *, + # TODO(gp): Consider passing trim_df_kwargs as kwargs. + start_timestamp: Optional[pd.Timestamp] = None, + end_timestamp: Optional[pd.Timestamp] = None, + columns_level0: hpanutil.ColumnSet = None, + columns_level1: hpanutil.ColumnSet = None, + keep_order: bool = False, +) -> pd.DataFrame: + """ + Filter multi-index DataFrame by timestamp index and column levels. + + :param start_timestamp: see `trim_df()` + :param end_timestamp: see `trim_df()` + :param columns_level0: column names that corresponds to `df.columns.levels[0]` + - `None` means no filtering + :param columns_level1: column names that corresponds to `df.columns.levels[1]` + - `None` means no filtering + :param keep_order: see `hpandas_utils.resolve_column_names()` + :return: filtered DataFrame + """ + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + # Filter by timestamp. + allow_empty = False + strictly_increasing = False + hpandass.dassert_time_indexed_df(df, allow_empty, strictly_increasing) + df = hpantran.trim_df( + df, + ts_col_name=None, + start_ts=start_timestamp, + end_ts=end_timestamp, + left_close=True, + right_close=True, + ) + # Filter level 0. + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + all_columns_level0 = df.columns.levels[0] + columns_level0 = hpanutil.resolve_column_names( + columns_level0, all_columns_level0, keep_order=keep_order + ) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_is_subset(columns_level0, df.columns.levels[0]) + df = df[columns_level0] + # Filter level 1. + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + all_columns_level1 = df.columns.levels[1] + columns_level1 = hpanutil.resolve_column_names( + columns_level1, all_columns_level1, keep_order=keep_order + ) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_is_subset(columns_level1, df.columns.levels[1]) + df = df.swaplevel(axis=1)[columns_level1].swaplevel(axis=1) + return df + + +# ############################################################################# + + +def compare_multiindex_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + subset_multiindex_df_kwargs: Optional[Dict[str, Any]] = None, + compare_dfs_kwargs: Optional[Dict[str, Any]] = None, +) -> pd.DataFrame: + """ + - Subset both multi-index dfs, if needed + - Compare dfs + + :param subset_multiindex_df: params for `subset_multiindex_df()` + :param compare_dfs_kwargs: params for `compare_dfs()` + :return: df with differences as values + """ + # Subset dfs. + if subset_multiindex_df_kwargs is None: + subset_multiindex_df_kwargs = {} + subset_df1 = subset_multiindex_df(df1, **subset_multiindex_df_kwargs) + subset_df2 = subset_multiindex_df(df2, **subset_multiindex_df_kwargs) + # Compare dfs. + if compare_dfs_kwargs is None: + compare_dfs_kwargs = {} + diff_df = hpancomp.compare_dfs(subset_df1, subset_df2, **compare_dfs_kwargs) + return diff_df + + +# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py new file mode 100644 index 000000000..b0a6bf9d8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py @@ -0,0 +1,527 @@ +""" +Import as: + +import helpers.hpandas_stats as hpanstat +""" + +import logging +from typing import Any, Dict, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hpandas_dassert as hpandass +import helpers.hpandas_transform as hpantran +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = hloggin.getLogger(__name__) + + +def compute_duration_df( + tag_to_df: Dict[str, pd.DataFrame], + *, + intersect_dfs: bool = False, + valid_intersect: bool = False, +) -> Tuple[pd.DataFrame, Dict[str, pd.DataFrame]]: + """ + Compute a df with some statistics about the time index. + + E.g., + ``` + min_index max_index min_valid_index max_valid_index + tag1 2022-01-01 21:00:00+00:00 ... + tag2 2022-01-01 21:02:00+00:00 ... + tag3 2022-01-01 21:01:00+00:00 ... + ``` + + :param intersect_dfs: return a transformed dict with the intersection of + indices of all the dfs if True, otherwise return the input data as is + :param valid_intersect: intersect indices without NaNs if True, otherwise + intersect indices as is + :return: timestamp stats and updated dict of dfs, see `intersect_dfs` param + """ + hdbg.dassert_isinstance(tag_to_df, Dict) + # Create df and assign columns. + data_stats = pd.DataFrame() + min_col = "min_index" + max_col = "max_index" + min_valid_index_col = "min_valid_index" + max_valid_index_col = "max_valid_index" + # Collect timestamp info from all dfs. + for tag in tag_to_df.keys(): + # Check that the passed timestamp has timezone info. + first_idx = tag_to_df[tag].index[0] + hdateti.dassert_has_tz(cast(pd.Timestamp, first_idx)) + hpandass.dassert_index_is_datetime(tag_to_df[tag]) + # Compute timestamp stats. + data_stats.loc[tag, min_col] = tag_to_df[tag].index.min() + data_stats.loc[tag, max_col] = tag_to_df[tag].index.max() + data_stats.loc[tag, min_valid_index_col] = ( + tag_to_df[tag].dropna().index.min() + ) + data_stats.loc[tag, max_valid_index_col] = ( + tag_to_df[tag].dropna().index.max() + ) + # Make a copy so we do not modify the original data. + tag_to_df_updated = tag_to_df.copy() + # Change the initial dfs with intersection. + if intersect_dfs: + if valid_intersect: + # Assign start, end date column according to specs. + min_col = min_valid_index_col + max_col = max_valid_index_col + # The start of the intersection will be the max value amongt all start dates. + intersection_start_date = cast(pd.Timestamp, data_stats[min_col].max()) + # The end of the intersection will be the min value amongt all end dates. + intersection_end_date = cast(pd.Timestamp, data_stats[max_col].min()) + for tag in tag_to_df_updated.keys(): + df = hpantran.trim_df( + tag_to_df_updated[tag], + ts_col_name=None, + start_ts=intersection_start_date, + end_ts=intersection_end_date, + left_close=True, + right_close=True, + ) + tag_to_df_updated[tag] = df + return data_stats, tag_to_df_updated + + +# ############################################################################# + + +# TODO(gp): Remove this since it's in Google API. + + +def compute_weighted_sum( + dfs: Dict[str, pd.DataFrame], + weights: pd.DataFrame, + *, + index_mode: str = "assert_equal", +) -> Dict[str, pd.DataFrame]: + """ + Compute weighted sums of `dfs` using `weights`. + + :param dfs: dataframes keyed by id; all dfs should have the same cols, + indices are handled based on the `index_mode` + :param weights: float weights indexed by id with unique col names + :param index_mode: same as `mode` in `apply_index_mode()` + :return: weighted sums keyed by weight col names + """ + hdbg.dassert_isinstance(dfs, dict) + hdbg.dassert(dfs, "dictionary of dfs must be nonempty") + # Get a dataframe from the dictionary and record its index and columns. + id_ = list(dfs)[0] + hdbg.dassert_isinstance(id_, str) + df = dfs[id_] + hdbg.dassert_isinstance(df, pd.DataFrame) + cols = df.columns + # Sanity-check dataframes in dictionary. + for key, value in dfs.items(): + hdbg.dassert_isinstance(key, str) + hdbg.dassert_isinstance(value, pd.DataFrame) + # The reference df is not modified. + _, value = hpantran.apply_index_mode(df, value, index_mode) + hdbg.dassert( + value.columns.equals(cols), + "Column equality fails for keys=%s, %s", + id_, + key, + ) + # Sanity-check weights. + hdbg.dassert_isinstance(weights, pd.DataFrame) + hdbg.dassert_eq(weights.columns.nlevels, 1) + hdbg.dassert(not weights.columns.has_duplicates) + hdbg.dassert_set_eq(weights.index.to_list(), list(dfs)) + # Create a multiindexed dataframe to facilitate computing the weighted sums. + weighted_dfs = {} + combined_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys()) + # TODO(Paul): Consider relaxing the NaN-handling. + for col in weights.columns: + weighted_combined_df = combined_df.multiply(weights[col], level=0) + weighted_sums = weighted_combined_df.groupby(axis=1, level=1).sum( + min_count=len(dfs) + ) + weighted_dfs[col] = weighted_sums + return weighted_dfs + + +def remap_obj( + obj: Union[pd.Series, pd.Index], + map_: Dict[Any, Any], + **kwargs: Any, +) -> pd.Series: + """ + Substitute each value of an object with another value from a dictionary. + + :param obj: a Series or Index to remap values in + :param map_: dictionary mapping old values to new values + :param kwargs: additional keyword arguments passed to pd.Series.map() + :return: remapped pandas series + """ + hdbg.dassert_lte(1, obj.shape[0]) + # TODO(Grisha): consider extending for other mapping types supported by + # `pd.Series.map`. + hdbg.dassert_isinstance(map_, dict) + # Check that every element of the object is in the mapping. + hdbg.dassert_is_subset(obj, map_.keys()) + new_srs = obj.map(map_, **kwargs) + return cast(pd.Series, new_srs) + + +def get_random_df( + num_cols: int, + seed: Optional[int] = None, + date_range_kwargs: Optional[Dict[str, Any]] = None, +) -> pd.DataFrame: + """ + Compute df with random data with `num_cols` columns and index obtained by + calling `pd.date_range(**kwargs)`. + + :param num_cols: the number of columns in a DataFrame to generate + :param seed: see `random.seed()` + :param date_range_kwargs: kwargs for `pd.date_range()` + """ + if seed: + np.random.seed(seed) + if date_range_kwargs is None: + date_range_kwargs = {} + dt = pd.date_range(**date_range_kwargs) + df = pd.DataFrame(np.random.rand(len(dt), num_cols), index=dt) + return df + + +# ############################################################################# + + +def heatmap_df(df: pd.DataFrame, *, axis: Any = None) -> Any: + """ + Colorize a df with a heatmap depending on the numeric values. + + :param axis: along which axis to compute the heatmap + - 0 colorize along rows + - 1 colorize along columns + - None: colorize everything + """ + # Keep it here to avoid long start up times. + import seaborn as sns + + cm = sns.diverging_palette(5, 250, as_cmap=True) + return df.style.background_gradient(axis=axis, cmap=cm) + + +def to_perc(vals: Union[List, pd.Series], **perc_kwargs: Any) -> str: + """ + Report percentage of True values in a list or series. + + :param vals: list or series of boolean values + :param perc_kwargs: additional keyword arguments passed to hprint.perc() + :return: formatted percentage string + """ + if isinstance(vals, list): + vals = pd.Series(vals) + ret = hprint.perc(vals.sum(), len(vals), **perc_kwargs) + return cast(str, ret) + + +def add_end_download_timestamp( + obj: Union[pd.DataFrame, Dict], *, timezone: str = "UTC" +) -> Union[pd.DataFrame, Dict]: + """ + Add a column 'end_download_timestamp' to the DataFrame with the current + time. + + :param obj: The DataFrame to which the column will be added. + :param timezone: The timezone for the current time. Defaults to + 'UTC'. + """ + # Get current timestamp. + current_ts = hdateti.get_current_time(timezone) + # Set value of end_download_timestamp. + obj["end_download_timestamp"] = current_ts + return obj + + +def get_value_counts_stats_df( + df: pd.DataFrame, col_name: str, *, num_rows: int = 10 +) -> pd.DataFrame: + """ + Get the value counts of `col_name` in `df`. + + :param df: The DataFrame to get the value counts of `col_name` from. + :param col_name: The column name to get the value counts of. + :param num_rows: The number of rows to return. + :return: A DataFrame with the value counts of `col_name` in `df`. E.g., + ``` + count pct [%] + Venture Fund 1004 25.100 + Financial Services 274 6.850 + Venture Capital & Private Equity 176 4.400 + Computer Software 163 4.075 + Higher Education 133 3.325 + Information Technology & Services 73 1.825 + ``` + """ + hdbg.dassert_in(col_name, df.columns) + stats_df = df[col_name].value_counts().to_frame() + stats_df["pct [%]"] = stats_df["count"] / len(df) * 100 + if num_rows > 0: + stats_df = stats_df.head(num_rows) + return stats_df + + +def display_value_counts_stats_df( + df: pd.DataFrame, col_names: Union[str, List[str]], *, num_rows: int = 10 +) -> None: + if isinstance(col_names, list): + for col_name in col_names: + display_value_counts_stats_df(df, col_name, num_rows=num_rows) + return + import IPython.display + + hdbg.dassert_isinstance(col_names, str) + _LOG.info("# %s", col_names) + stats_df = get_value_counts_stats_df(df, col_names, num_rows=num_rows) + IPython.display.display(stats_df) + + +# ############################################################################# +# Functions moved from core/explore.py +# ############################################################################# + + +def report_zero_nan_inf_stats( + df: pd.DataFrame, + *, + zero_threshold: float = 1e-9, + verbose: bool = False, + as_txt: bool = False, + dbg_log_level: int = logging.DEBUG, +) -> pd.DataFrame: + """ + Report count and percentage about zeros, nans, infs for a df. + + :param df: dataframe to report the stats of + :param zero_threshold: threshold for classifying values as "zero" + :param verbose: if True, print the stats + :param as_txt: if True, print the stats as text + :param dbg_log_level: log level at which to print the debug info + :return: a DataFrame with the stats + """ + # Convert Series to DataFrame if needed. + if isinstance(df, pd.Series): + df = pd.DataFrame(df) + # Print stats about the input dataframe. + _LOG.log(dbg_log_level, "index in [%s, %s]", df.index.min(), df.index.max()) + num_rows = df.shape[0] + _LOG.log(dbg_log_level, "num_rows=%s", hprint.thousand_separator(num_rows)) + _LOG.log(dbg_log_level, "data=") + import helpers.hpandas_display as hpandisp + + hpandisp.display_df(df, as_txt=as_txt, log_level=dbg_log_level) + # Compute date-based stats only if index is datetime. + if isinstance(df.index, pd.DatetimeIndex): + # TODO(gp): Can we do this faster? + dates = [d.date() for d in df.index] + num_days = len(set(dates)) + _LOG.log(dbg_log_level, "num_days=%s", num_days) + num_weekdays = len(set(d for d in dates if d.weekday() < 5)) + _LOG.log(dbg_log_level, "num_weekdays=%s", num_weekdays) + # + stats_df = pd.DataFrame(None, index=df.columns) + if False: + # Find the index of the first non-nan value. + df = df.applymap(lambda x: not np.isnan(x)) + min_idx = df.idxmax(axis=0) + min_idx.name = "min_idx" + # Find the index of the last non-nan value. + max_idx = df.reindex(index=df.index[::-1]).idxmax(axis=0) + max_idx.name = "max_idx" + stats_df["num_rows"] = num_rows + # + num_zeros = (np.abs(df) < zero_threshold).sum(axis=0) + if verbose: + stats_df["num_zeros"] = num_zeros + stats_df["zeros [%]"] = (100.0 * num_zeros / num_rows).apply( + hprint.round_digits + ) + # + num_nans = np.isnan(df).sum(axis=0) + if verbose: + stats_df["num_nans"] = num_nans + stats_df["nans [%]"] = (100.0 * num_nans / num_rows).apply( + hprint.round_digits + ) + # + num_infs = np.isinf(df).sum(axis=0) + if verbose: + stats_df["num_infs"] = num_infs + stats_df["infs [%]"] = (100.0 * num_infs / num_rows).apply( + hprint.round_digits + ) + # + num_valid = df.shape[0] - num_zeros - num_nans - num_infs + if verbose: + stats_df["num_valid"] = num_valid + stats_df["valid [%]"] = (100.0 * num_valid / num_rows).apply( + hprint.round_digits + ) + # + _LOG.log(dbg_log_level, "stats_df=\n%s", stats_df) + return stats_df + + +def pvalue_to_stars(pval: Optional[float]) -> str: + """ + Convert p-value to star notation for statistical significance. + + :param pval: p-value to convert + :return: star notation (* to ****) or ? for non-significant, NA for NaN + """ + if pval is None or np.isnan(pval): + stars = "NA" + else: + hdbg.dassert_lte(0.0, pval) + hdbg.dassert_lte(pval, 1.0) + if pval < 0.005: + # More than 99.5% confidence. + stars = "****" + elif pval < 0.01: + # More than 99% confidence. + stars = "***" + elif pval < 0.05: + # More than 95% confidence. + stars = "**" + elif pval < 0.1: + # More than 90% confidence. + stars = "*" + else: + stars = "?" + return stars + + +def format_ols_regress_results(regr_res: Optional[pd.DataFrame]) -> pd.DataFrame: + """ + Format OLS regression results into a readable DataFrame. + + :param regr_res: regression results dictionary with coeffs, pvals, rsquared, etc. + :return: formatted DataFrame with coefficients and statistics + """ + if regr_res is None: + _LOG.warning("regr_res=None: skipping") + df = pd.DataFrame(None) + return df + row: List[Union[float, str]] = [ + "%.3f (%s)" % (coeff, pvalue_to_stars(pval)) + for (coeff, pval) in zip(regr_res["coeffs"], regr_res["pvals"]) + ] + row.append(float("%.2f" % (regr_res["rsquared"] * 100.0))) + row.append(float("%.2f" % (regr_res["adj_rsquared"] * 100.0))) + col_names = regr_res["param_names"] + ["R^2 [%]", "Adj R^2 [%]"] + df = pd.DataFrame([row], columns=col_names) + return df + + +# ############################################################################# +# Exploratory analysis functions +# ############################################################################# + + +def _get_unique_values_stats(df: pd.DataFrame) -> pd.DataFrame: + """ + Get unique values count and percentage for each column. + + :param df: dataframe to analyze + :return: DataFrame with num_unique and unique [%] columns + """ + stats_df = pd.DataFrame(None, index=df.columns) + num_unique = df.nunique() + stats_df["num_unique"] = num_unique + stats_df["unique [%]"] = (100.0 * num_unique / df.shape[0]).apply( + hprint.round_digits + ) + return stats_df + + +def explore_dataframe( + df: pd.DataFrame, + *, + show_distributions: bool = False, + show_correlations: bool = False, + zero_threshold: float = 1e-9, + dbg_log_level: int = logging.DEBUG, +) -> Optional[pd.DataFrame]: + """ + Perform comprehensive exploratory analysis of a DataFrame. + + Computes data quality metrics (zeros, NaNs, infinities, valid data), + optionally plots distributions of high-variability columns, and + optionally displays a correlation matrix. + + :param df: Input dataframe to analyze + :param show_distributions: If True, plots distributions of top-variability + columns in a 3-column grid + :param show_correlations: If True, displays correlation matrix as a heatmap + :param zero_threshold: Threshold for classifying values as "zero" in + quality report + :return: Statistics DataFrame from report_zero_nan_inf_stats with columns: + num_rows, zeros [%], nans [%], infs [%], valid [%] + """ + import matplotlib.pyplot as plt + from IPython.display import display + + hdbg.dassert_lt(0, len(df), "Dataframe is empty") + # Compute and display data quality statistics. + stats_df = report_zero_nan_inf_stats( + df, zero_threshold=zero_threshold, dbg_log_level=dbg_log_level + ) + # Add information about the number of unique values and percentage of unique values for each column. + unique_stats_df = _get_unique_values_stats(df) + stats_df = pd.concat([stats_df, unique_stats_df], axis=1) + if hsystem.is_running_in_ipynb(): + _LOG.info("stats_df=") + display(stats_df) + _LOG.debug("stats_df=\n%s", stats_df) + # Plot distributions if requested. + if hsystem.is_running_in_ipynb(): + if show_distributions: + _LOG.info("Univariate distributions:") + numeric_cols = df.select_dtypes(include="number").columns.tolist() + if len(numeric_cols) > 0: + # Compute standard deviation and select top columns. + std_vals = df[numeric_cols].std().sort_values(ascending=False) + num_to_plot = len(numeric_cols) + top_cols = std_vals.head(num_to_plot).index.tolist() + # Create grid of subplots. + import helpers.hmatplotlib as hmatplo + + fig, axes = hmatplo.get_multiple_plots( + num_to_plot, 3, y_scale=3.5 + ) + _ = fig + for i, col in enumerate(top_cols): + ax = axes[i] + col_data = df[col].dropna() + weights = np.ones_like(col_data) / len(col_data) * 100 + ax.hist(col_data, bins=30, weights=weights, edgecolor="k") + ax.set_title(col) + ax.set_xlabel("Value") + ax.set_ylabel("Percentage [%]") + plt.tight_layout() + plt.show() + # Display correlation matrix if requested. + if show_correlations: + numeric_df = df.select_dtypes(include="number") + if len(numeric_df.columns) >= 2: + corr_matrix = numeric_df.corr() + _LOG.info("Correlation matrix:") + # TODO(gp): Improve the plot changing the number of digits. + corr_heatmap = heatmap_df(corr_matrix) + display(corr_heatmap) + if hsystem.is_running_in_ipynb(): + return None + return stats_df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py new file mode 100644 index 000000000..6eae1fa57 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py @@ -0,0 +1,1023 @@ +""" +Import as: + +import helpers.hpandas_transform as hpantran +""" + +import csv +import logging +import math +import random +import re +from typing import ( + Any, + Callable, + Collection, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, +) + +import pandas as pd + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin + +# TODO(ai_gp): Import the file and not the package to avoid cyclic imports. +import helpers.hpandas_conversion as hpanconv +import helpers.hprint as hprint + +_LOG = hloggin.getLogger(__name__) + +# Enable extra verbose debugging. Do not commit. +_TRACE = False + +RowsValues = List[List[str]] + +# ############################################################################# +# Resampling & Time Series Operations +# ############################################################################# + + +def resample_index(index: pd.DatetimeIndex, frequency: str) -> pd.DatetimeIndex: + """ + Resample `DatetimeIndex`. + + :param index: `DatetimeIndex` to resample + :param frequency: frequency from `pd.date_range()` to resample to + :return: resampled `DatetimeIndex` + """ + # Import locally to avoid cyclic import. + import helpers.hpandas_dassert as hpandass + + _LOG.debug(hprint.to_str("index frequency")) + hdbg.dassert_isinstance(index, pd.DatetimeIndex) + hpandass.dassert_unique_index( + index, msg="Index must have only unique values" + ) + min_date = index.min() + max_date = index.max() + _LOG.debug("min_date=%s max_date=%s", min_date, max_date) + # TODO(gp): Preserve the index name. + # index_name = index.name + resampled_index = pd.date_range( + start=min_date, + end=max_date, + freq=frequency, + ) + # Enable detailed debugging. + if False: + if len(resampled_index) > len(index): + # Downsample. + _LOG.debug( + "Index length increased by %s = %s - %s", + len(resampled_index) - len(index), + len(resampled_index), + len(index), + ) + elif len(resampled_index) < len(index): + # Upsample. + _LOG.debug( + "Index length decreased by %s = %s - %s", + len(index) - len(resampled_index), + len(index), + len(resampled_index), + ) + else: + _LOG.debug("Index length=%s has not changed", len(index)) + # resampled_index.name = index_name + return resampled_index + + +def resample_df(df: pd.DataFrame, frequency: str) -> pd.DataFrame: + """ + Resample `DataFrame` by placing NaN in missing locations in the index. + + :param df: `DataFrame` to resample + :param frequency: frequency from `pd.date_range()` to resample to + :return: resampled `DataFrame` + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + # Preserve the index name. + index_name = df.index.name + resampled_index = resample_index(df.index, frequency) + df_reindex = df.reindex(resampled_index) + df_reindex.index.name = index_name + return df_reindex + + +def reindex_on_unix_epoch( + df: pd.DataFrame, in_col_name: str, unit: str = "s" +) -> pd.DataFrame: + """ + Transform the column `in_col_name` into a datetime index. `in_col_name` + contains Unix epoch (e.g., 1638194400) and it is converted into a UTC time. + + :param df: dataframe with a unix epoch + :param in_col_name: column containing unix epoch + :param unit: the unit of unix epoch + """ + # Convert. + temp_col_name = in_col_name + "_tmp" + hdbg.dassert_in(in_col_name, df.columns) + hdbg.dassert_not_in(temp_col_name, df.columns) + # Save. + df[temp_col_name] = pd.to_datetime(df[in_col_name], unit=unit, utc=True) + df.set_index(temp_col_name, inplace=True, drop=True) + df.index.name = None + return df + + +def find_gaps_in_dataframes( + df1: pd.DataFrame, df2: pd.DataFrame +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Find data present in one dataframe and missing in the other one. + + :param df1: first dataframe for comparison + :param df2: second dataframe for comparison + :return: two dataframes with missing data + """ + # Get data present in first, but not present in second dataframe. + first_missing_indices = df2.index.difference(df1.index) + first_missing_data = df2.loc[first_missing_indices] + # Get data present in second, but not present in first dataframe. + second_missing_indices = df1.index.difference(df2.index) + second_missing_data = df1.loc[second_missing_indices] + return first_missing_data, second_missing_data + + +# TODO(Grisha): use this idiom everywhere in the codebase, e.g., in `compare_dfs()`. + + +def find_gaps_in_time_series( + time_series: pd.Series, + start_timestamp: pd.Timestamp, + end_timestamp: pd.Timestamp, + freq: str, +) -> pd.Series: + """ + Find missing points on a time interval specified by [start_timestamp, + end_timestamp], where point distribution is determined by . + + If the passed time series is of a unix epoch format. It is + automatically tranformed to pd.Timestamp. + + :param time_series: time series to find gaps in + :param start_timestamp: start of the time interval to check + :param end_timestamp: end of the time interval to check + :param freq: distance between two data points on the interval. + Aliases correspond to pandas.date_range's freq parameter, i.e. + "S" -> second, "T" -> minute. + :return: pd.Series representing missing points in the source time + series. + """ + _time_series = time_series + if str(time_series.dtype) in ["int32", "int64"]: + _time_series = _time_series.map(hdateti.convert_unix_epoch_to_timestamp) + correct_time_series = pd.date_range( + start=start_timestamp, end=end_timestamp, freq=freq + ) + return correct_time_series.difference(_time_series) + + +# ############################################################################# +# DataFrame Transformation +# ############################################################################# + + +def apply_index_mode( + df1: pd.DataFrame, + df2: pd.DataFrame, + mode: str, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Process DataFrames according to the index mode. + + :param df1: first input df + :param df2: second input df + :param mode: method of processing indices + - "assert_equal": check that both indices are equal, assert otherwise + - "intersect": restrict both dfs to a common index + - "leave_unchanged": ignore any indices mismatch and return dfs as-is + :return: transformed copy of the inputs + """ + # Import locally to avoid cyclic import + import helpers.hpandas_dassert as hpandass + + _LOG.debug("mode=%s", mode) + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert_isinstance(mode, str) + # Copy in order not to modify the inputs. + df1_copy = df1.copy() + df2_copy = df2.copy() + if mode == "assert_equal": + hpandass.dassert_indices_equal(df1_copy, df2_copy) + elif mode == "intersect": + # TODO(Grisha): Add sorting on demand. + common_index = df1_copy.index.intersection(df2_copy.index) + df1_copy = df1_copy[df1_copy.index.isin(common_index)] + df2_copy = df2_copy[df2_copy.index.isin(common_index)] + elif mode == "leave_unchanged": + _LOG.debug( + "Ignoring any index missmatch as per user's request.\n" + "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", + df1_copy.index.difference(df2_copy.index), + df2_copy.index.difference(df1_copy.index), + ) + else: + raise ValueError(f"Unsupported index_mode={mode}") + return df1_copy, df2_copy + + +def apply_columns_mode( + df1: pd.DataFrame, + df2: pd.DataFrame, + mode: str, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Process DataFrames according to the column mode. + + :param df1: first input df + :param df2: second input df + :param mode: method of processing columns + - "assert_equal": check that both dfs have equal columns, assert otherwise + - "intersect": restrict both dfs to only include common columns + - "leave_unchanged": ignore any column mismatches and return dfs as-is + :return: transformed copy of the inputs + """ + # Import locally to avoid cyclic import + import helpers.hpandas_dassert as hpandass + import helpers.hpandas_utils as hpanutil + + _LOG.debug("mode=%s", mode) + # Input validation. + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert_isinstance(mode, str) + # Copy in order not to modify the inputs. + df1_copy = df1.copy() + df2_copy = df2.copy() + if mode == "assert_equal": + # Check if columns are equal or not. + hpandass.dassert_columns_equal(df1_copy, df2_copy) + elif mode == "intersect": + # Filter dataframes based on its common columns. + common_columns = df1_copy.columns.intersection(df2_copy.columns) + df1_copy = df1_copy[common_columns] + df2_copy = df2_copy[common_columns] + # Log the string representation of 2 dfs. + _LOG.debug("df1 after filtering=\n%s", hpanutil.df_to_str(df1)) + _LOG.debug("df2 after filtering=\n%s", hpanutil.df_to_str(df2)) + elif mode == "leave_unchanged": + # Ignore mismatch. + _LOG.debug( + "Ignoring any column missmatch as per user's request.\n" + "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", + df1.columns.difference(df2.columns), + df2.columns.difference(df1.columns), + ) + else: + raise ValueError(f"Unsupported column mode: {mode}") + return df1_copy, df2_copy + + +def trim_df( + df: pd.DataFrame, + ts_col_name: Optional[str], + start_ts: Optional[pd.Timestamp], + end_ts: Optional[pd.Timestamp], + left_close: bool, + right_close: bool, +) -> pd.DataFrame: + """ + Trim the dataframe using values in `ts_col_name`. + + The dataframe is trimmed in the interval bounded by `start_ts` and `end_ts`. + + :param df: the dataframe to trim + :param ts_col_name: the name of the column; `None` means index + :param start_ts: the start boundary for trimming + :param end_ts: the end boundary for trimming + :param left_close: whether to include the start boundary of the interval + - True: [start_ts, ... + - False: (start_ts, ... + :param right_close: whether to include the end boundary of the interval + - True: ..., end_ts] + - False: ..., end_ts) + :return: the trimmed dataframe + """ + if _TRACE: + # Import locally to avoid cyclic import + import helpers.hpandas_utils as hpanutil + + _LOG.trace( + hpanutil.df_to_str( + df, print_dtypes=True, print_shape_info=True, tag="df" + ) + ) + _LOG.debug( + hprint.to_str("ts_col_name start_ts end_ts left_close right_close") + ) + if _TRACE: + # Import locally to avoid cyclic import + import helpers.hpandas_utils as hpanutil + + _LOG.trace("df=\n%s", hpanutil.df_to_str(df)) + if df.empty: + # If the df is empty, there is nothing to trim. + return df + if start_ts is None and end_ts is None: + # If no boundaries are specified, there are no points of reference to trim + # to. + return df + num_rows_before = df.shape[0] + if start_ts is not None and end_ts is not None: + # Confirm that the interval boundaries are valid. + hdateti.dassert_tz_compatible(start_ts, end_ts) + hdbg.dassert_lte(start_ts, end_ts) + # Get the values to filter by. + if ts_col_name is None: + values_to_filter_by = pd.Series(df.index, index=df.index) + else: + hdbg.dassert_in(ts_col_name, df.columns) + values_to_filter_by = df[ts_col_name] + if values_to_filter_by.is_monotonic_increasing: + _LOG.trace("df is monotonic") + # The values are sorted; using the `pd.Series.searchsorted()` method. + # Find the index corresponding to the left boundary of the interval. + if start_ts is not None: + side = "left" if left_close else "right" + left_idx = values_to_filter_by.searchsorted(start_ts, side) + else: + # There is nothing to filter, so the left index is the first one. + left_idx = 0 + _LOG.debug(hprint.to_str("start_ts left_idx")) + # Find the index corresponding to the right boundary of the interval. + if end_ts is not None: + side = "right" if right_close else "left" + right_idx = values_to_filter_by.searchsorted(end_ts, side) + else: + # There is nothing to filter, so the right index is None. + right_idx = df.shape[0] + _LOG.debug(hprint.to_str("end_ts right_idx")) + # + hdbg.dassert_lte(0, left_idx) + hdbg.dassert_lte(left_idx, right_idx) + hdbg.dassert_lte(right_idx, df.shape[0]) + _LOG.debug(hprint.to_str("start_ts left_idx")) + if right_idx < df.shape[0]: + _LOG.debug(hprint.to_str("end_ts right_idx")) + df = df.iloc[left_idx:right_idx] + else: + _LOG.trace("df is not monotonic") + # The values are not sorted; using the `pd.Series.between` method. + if left_close and right_close: + inclusive = "both" + elif left_close: + inclusive = "left" + elif right_close: + inclusive = "right" + else: + inclusive = "neither" + epsilon = pd.DateOffset(minutes=1) + if start_ts is None: + start_ts = values_to_filter_by.min() - epsilon + if end_ts is None: + end_ts = values_to_filter_by.max() + epsilon + df = df[ + values_to_filter_by.between(start_ts, end_ts, inclusive=inclusive) + ] + # Report the changes. + num_rows_after = df.shape[0] + if num_rows_before != num_rows_after: + _LOG.debug( + "Removed %s rows", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + return df + + +def _assemble_df_rows(rows_values: RowsValues) -> RowsValues: + """ + Organize dataframe values into a column-row structure. + + - Indentation artifacts are removed + - The index placement is handled, i.e. + - if the index is named, the name is located and moved to the same + row as the column names + - if the index is not named, the row with the column names receives + a placeholder empty value in its place + - Empty columns are dropped + + :param rows_values: row values extracted from a string df representation + :return: row values assembled into a valid column-row structure + """ + # Clean up indentation artifacts. + if all(row[0] == "" for row in rows_values): + # Remove the first empty cell in each row. + for row in rows_values: + del row[0] + # If the index is named, its name is located in the second row, + # with an optional extra empty value cell value next to it. + if len(rows_values[1]) == 1 or ( + len(rows_values[1]) == 2 and rows_values[1][1] == "" + ): + # Move the index name to the row with all the column names. + if rows_values[0][0] == "": + rows_values[0][0] = rows_values[1][0] + else: + rows_values[0].insert(0, rows_values[1][0]) + # Drop the former index name row. + del rows_values[1] + else: + # Add an empty cell for the absent index name. + rows_values[0].insert(0, "") + # Identify and remove empty columns. + min_len_row = min(len(row) for row in rows_values) + idxs_to_delete = [] + for i in range(min_len_row): + if all(row[i] == "" for row in rows_values): + idxs_to_delete.append(i) + for idx in idxs_to_delete: + for row in rows_values: + del row[idx] + # Confirm that all the rows have the same number of values. + hdbg.dassert_eq(len({len(row) for row in rows_values}), 1) + return rows_values + + +# TODO(Nina): Add `filter_data_mode`. + + +def str_to_df( + df_as_str: str, + col_to_type: Dict[str, Optional[type]], + col_to_name_type: Dict[str, type], +) -> pd.DataFrame: + """ + Convert a string representation of a dataframe into a Pandas df. + + :param df_as_str: a df as a string + - the format of the string is the same as the output of + `hpandas_utils.df_to_str()` on a pd.DataFrame, e.g. + ``` + col1 col2 col3 col4 + 0 0.1 a None 2020-01-01 + 1 0.2 "b c" None 2021-05-05 + ``` + - values (including column names) that contain spaces need + to be enclosed in double quotation marks, e.g. + "2023-03-15 16:35:41.205000+00:00" + :param col_to_type: a mapping between the column names and the + types of the values in these columns + - if a column is not present in the mapping, its values will + remain strings + - to indicate the type of index values, use {"__index__": ...} + mapping, e.g. {"__index__": pd.Timestamp} + :param col_to_name_type: a mapping between the column names and + the required types of these column names + - same conventions apply as for `col_to_type` (see above) + :return: a converted Pandas dataframe + """ + # Separate the rows. + rows = df_as_str.split("\n") + # Clean up extra spaces. + rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] + # Identify individual values in the rows. + rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) + # Remove the placeholder ["..."] row. + rows_values = [row for row in rows_values if row != ["..."]] + # Organize values into a proper column-row structure. + rows_values = _assemble_df_rows(rows_values) + # Get the column names. + column_names = rows_values[0][1:] + # Get the index. + index_values = [row[0] for row in rows_values[1:]] + index_name = rows_values[0][0] + # Construct the df. + df = pd.DataFrame( + [row[1:] for row in rows_values[1:]], + columns=column_names, + index=index_values, + ) + if index_name != "": + df.index.name = index_name + # Cast the columns into appropriate types. + # Import locally to avoid cyclic import + import helpers.hpandas_conversion as hpanconv + + for col, col_type in col_to_type.items(): + if col == "__index__": + df.index = hpanconv.cast_series_to_type(df.index, col_type) + else: + df[col] = hpanconv.cast_series_to_type(df[col], col_type) + # Cast the column names into appropriate types. + for col, col_name_type in col_to_name_type.items(): + if col == "__index__": + df.index = df.index.rename(col_name_type(df.index.name)) + else: + df = df.rename(columns={col: col_name_type(col)}) + return df + + +# ############################################################################# +# Column Operations +# ############################################################################# + + +def check_and_filter_matching_columns( + df: pd.DataFrame, required_columns: List[str], filter_data_mode: str +) -> pd.DataFrame: + """ + Check that columns are the required ones and if not filter data depending + on `filter_data_mode`. + + :param df: data to check columns for + :param required_columns: columns to return, skipping columns that are not required + :param filter_data_mode: control behaviour with respect to extra or missing columns + - "assert": raise an error if required columns do not match received columns + - "warn_and_trim": return the intersection of required and received columns and + issue a warning + :return: input data as it is if required columns match received columns otherwise + processed data, see `filter_data_mode` + """ + received_columns = df.columns.to_list() + hdbg.dassert_lte(1, len(received_columns)) + # + if filter_data_mode == "assert": + # Raise an assertion. + only_warning = False + elif filter_data_mode == "warn_and_trim": + # Just issue a warning. + only_warning = True + # Get columns intersection while preserving the order of the columns. + columns_intersection = [ + col_name + for col_name in required_columns + if col_name in received_columns + ] + hdbg.dassert_lte(1, len(columns_intersection)) + df = df[columns_intersection] + else: + raise ValueError(f"Invalid filter_data_mode='{filter_data_mode}'") + hdbg.dassert_set_eq( + required_columns, + received_columns, + only_warning=only_warning, + msg="Received columns do not match required columns.", + ) + return df + + +# TODO(Grisha): finish the function. +# TODO(Grisha): merge with the one in `dataflow.model.correlation.py`? + + +# ############################################################################# +# Merge +# ############################################################################# + + +def merge_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + threshold_col_name: str, + *, + threshold: float = 0.9, + intersecting_columns: Optional[List[str]] = None, + **pd_merge_kwargs: Any, +) -> pd.DataFrame: + """ + Wrap `pd.merge`. + + :param threshold_col_name: a column's name to check the minimum + overlap on + :param threshold: minimum overlap of unique values in a specified + column to perform the merge + :param intersecting_columns: allow certain columns to appear in both + dataframes; store both in the resulting df with corresponding + suffixes + """ + _LOG.debug( + hprint.to_str( + "threshold_col_name threshold intersecting_columns pd_merge_kwargs" + ) + ) + # Sanity check column types. + threshold_col1 = df1[threshold_col_name] + threshold_col2 = df2[threshold_col_name] + only_first_elem = False + hdbg.dassert_array_has_same_type_element( + threshold_col1, threshold_col2, only_first_elem + ) + # TODO(Grisha): @Dan Implement asserts for each asset id. + # Check that an overlap of unique values is above the specified threshold. + threshold_unique_values1 = set(threshold_col1) + threshold_unique_values2 = set(threshold_col2) + threshold_common_values = set(threshold_unique_values1) & set( + threshold_unique_values2 + ) + threshold_common_values_share1 = len(threshold_common_values) / len( + threshold_unique_values1 + ) + threshold_common_values_share2 = len(threshold_common_values) / len( + threshold_unique_values2 + ) + hdbg.dassert_lte(threshold, threshold_common_values_share1) + hdbg.dassert_lte(threshold, threshold_common_values_share2) + # Use an empty set instead of None to perform set difference further. + intersecting_columns_set = ( + set() if intersecting_columns is None else set(intersecting_columns) + ) + # Check that there are no common columns except for the ones in `intersecting_columns`. + df1_cols = ( + set(df1.columns.to_list()) + - set(pd_merge_kwargs["on"]) + - intersecting_columns_set + ) + df2_cols = ( + set(df2.columns.to_list()) + - set(pd_merge_kwargs["on"]) + - intersecting_columns_set + ) + hdbg.dassert_not_intersection(df1_cols, df2_cols) + # + res_df = df1.merge(df2, **pd_merge_kwargs) + return res_df + + +# TODO(gp): Is this (ironically) a duplicate of drop_duplicates? + + +def get_df_from_iterator( + iter_: Iterator[pd.DataFrame], + *, + sort_index: bool = True, +) -> pd.DataFrame: + """ + Concat all the dataframes in the iterator in one dataframe. + + :param iter_: dataframe iterator + :param sort_index: whether to sort output index or not + :return: combined iterator data + """ + # TODO(gp): @all make a copy of `iter_` so we don't consume it. + dfs = list(iter_) + df_res = pd.concat(dfs) + if sort_index: + df_res = df_res.sort_index() + return df_res + + +# ############################################################################# +# Filter +# ############################################################################# + + +def subset_df(df: pd.DataFrame, nrows: int, seed: int = 42) -> pd.DataFrame: + """ + Remove N rows from the input data and shuffle the remaining ones. + + :param df: input data + :param nrows: the number of rows to remove from the original data + :param seed: see `random.seed()` + :return: shuffled data with removed rows + """ + hdbg.dassert_lte(1, nrows) + hdbg.dassert_lte(nrows, df.shape[0]) + idx = list(range(df.shape[0])) + random.seed(seed) + random.shuffle(idx) + idx = sorted(idx[nrows:]) + return df.iloc[idx] + + +def filter_df( + df: pd.DataFrame, + col_name: str, + value: Any, + *, + invert: bool = False, + check_value: bool = True, + # TODO(gp): -> verbose + print_info: bool = True, +) -> pd.DataFrame: + """ + Filter a dataframe based on a column value. + + :param df: dataframe to filter + :param col_name: column name to filter on + :param value: value to filter on + :param invert: whether to invert the filter + :param check_value: whether to check that the value is in the column + :param print_info: whether to print information about the filter + :return: filtered dataframe + """ + hdbg.dassert_in(col_name, df.columns) + if isinstance(value, list): + mask = df[col_name].isin(value) + else: + if check_value: + hdbg.dassert_in(value, df[col_name].unique()) + mask = df[col_name] == value + if invert: + mask = ~mask + if print_info: + _LOG.info("selected=%s", hprint.perc(mask.sum(), df.shape[0])) + return df[mask] + + +def remove_empty_columns( + df: pd.DataFrame, *, verbose: bool = True +) -> pd.DataFrame: + """ + Remove empty columns from a dataframe. + + :param df: dataframe to remove empty columns from + :return: dataframe with empty columns removed + """ + mask = df.apply(lambda col: col.notna() & (col != "")).any() + non_empty_columns = df.columns[mask] + empty_columns = df.columns[~mask] + if verbose: + _LOG.info( + "kept %s columns: %s", + hprint.perc(len(non_empty_columns), len(df.columns)), + hprint.list_to_str(non_empty_columns), + ) + _LOG.info( + "removed %s columns: %s", + hprint.perc(len(empty_columns), len(df.columns)), + hprint.list_to_str(empty_columns), + ) + df = df[non_empty_columns] + return df + + +def remove_stable_columns( + df: pd.DataFrame, *, threshold: float = 0.9, verbose: bool = True +) -> pd.DataFrame: + """ + Remove columns from a dataframe that have less than threshold unique + values. + + :param df: dataframe to remove stable columns from + :param threshold: threshold for the percentage of stable columns to + remove + :return: dataframe with stable columns removed + """ + high_variability_columns = [] + for col in df.columns: + unique_values = df[col].unique() + if len(unique_values) / len(df) >= threshold: + high_variability_columns.append(col) + # Compute the columns to remove. + columns_to_remove = df.columns[~df.columns.isin(high_variability_columns)] + if verbose: + _LOG.info( + "kept %s columns: %s", + hprint.perc(len(high_variability_columns), len(df.columns)), + hprint.list_to_str(high_variability_columns), + ) + _LOG.info( + "removed %s columns: %s", + hprint.perc(len(columns_to_remove), len(df.columns)), + hprint.list_to_str(columns_to_remove), + ) + df = df[high_variability_columns] + return df + + +def adapt_to_series(f: Callable) -> Callable: + """ + Extend a function working on dataframes so that it can work on series. + """ + + def wrapper( + obj: Union[pd.Series, pd.DataFrame], *args: Any, **kwargs: Any + ) -> Any: + # Convert a pd.Series to a pd.DataFrame. + was_series = False + if isinstance(obj, pd.Series): + obj = pd.DataFrame(obj) + was_series = True + hdbg.dassert_isinstance(obj, pd.DataFrame) + # Apply the function. + res = f(obj, *args, **kwargs) + # Transform the output, if needed. + if was_series: + if isinstance(res, tuple): + res_obj, res_tmp = res[0], res[1:] + res_obj_srs = hpanconv.to_series(res_obj) + res_obj_srs = [res_obj_srs] + res_obj_srs.extend(res_tmp) + res = tuple(res_obj_srs) + else: + res = hpanconv.to_series(res) + return res + + return wrapper + + +# ############################################################################# + + +def add_pct( + df: pd.DataFrame, + col_name: str, + total: int, + dst_col_name: str, + num_digits: int = 2, + use_thousands_separator: bool = True, +) -> pd.DataFrame: + """ + Add to df a column "dst_col_name" storing the percentage of values in + column "col_name" with respect to "total". The rest of the parameters are + the same as hprint.round_digits(). + + :return: updated df + """ + # Add column with percentage right after col_name. + pos_col_name = df.columns.tolist().index(col_name) + df.insert(pos_col_name + 1, dst_col_name, (100.0 * df[col_name]) / total) + # Format. + df[col_name] = [ + hprint.round_digits( + v, num_digits=None, use_thousands_separator=use_thousands_separator + ) + for v in df[col_name] + ] + df[dst_col_name] = [ + hprint.round_digits( + v, num_digits=num_digits, use_thousands_separator=False + ) + for v in df[dst_col_name] + ] + return df + + +# ############################################################################# + + +def remove_columns( + df: pd.DataFrame, cols: Collection[str], log_level: int = logging.DEBUG +) -> pd.DataFrame: + """ + Remove specified columns from a dataframe. + + :param df: dataframe to remove columns from + :param cols: collection of column names to remove + :param log_level: logging level for reporting removed columns + :return: dataframe with specified columns removed + """ + to_remove = set(cols).intersection(set(df.columns)) + _LOG.log(log_level, "to_remove=%s", hprint.list_to_str(to_remove)) + df.drop(to_remove, axis=1, inplace=True) + _LOG.debug("df=\n%s", df.head(3)) + _LOG.log(log_level, hprint.list_to_str(df.columns)) + return df + + +def filter_with_df( + df: pd.DataFrame, filter_df: pd.DataFrame, log_level: int = logging.DEBUG +) -> pd.Series: + """ + Compute a mask for DataFrame df using common columns and values in + "filter_df". + """ + mask = None + for c in filter_df: + hdbg.dassert_in(c, df.columns) + vals = filter_df[c].unique() + if mask is None: + mask = df[c].isin(vals) + else: + mask &= df[c].isin(vals) + mask: pd.DataFrame + _LOG.log(log_level, "after filter=%s", hprint.perc(mask.sum(), len(mask))) + return mask + + +def filter_by_time( + df: pd.DataFrame, + lower_bound: hdateti.StrictDatetime, + upper_bound: hdateti.StrictDatetime, + inclusive: str, + ts_col_name: Optional[str], + log_level: int = logging.DEBUG, +) -> pd.DataFrame: + """ + Filter data by time between `lower_bound` and `upper_bound`. + + Pass `None` to `ts_col_name` to filter by `DatetimeIndex`. + + :param df: data to filter + :param lower_bound: left limit point of the time interval + :param upper_bound: right limit point of the time interval + :param inclusive: include boundaries + - "both": `[lower_bound, upper_bound]` + - "neither": `(lower_bound, upper_bound)` + - "right": `(lower_bound, upper_bound]` + - "left": `[lower_bound, upper_bound)` + :param ts_col_name: name of a timestamp column to filter with, or None to + use the DatetimeIndex + :param log_level: the level of logging, e.g. `DEBUG` + :return: dataframe filtered by time + """ + hdateti.dassert_is_strict_datetime(lower_bound) + hdateti.dassert_is_strict_datetime(upper_bound) + # Time filtering is not working if timezones are different. + hdateti.dassert_tz_compatible_timestamp_with_df(lower_bound, df, ts_col_name) + hdateti.dassert_tz_compatible_timestamp_with_df(upper_bound, df, ts_col_name) + # + if ts_col_name is None: + # Filter data by index. + hdbg.dassert_isinstance(df.index, pd.DatetimeIndex) + # Cast index to `pd.Series` to use the `between` method. + mask = df.index.to_series().between(lower_bound, upper_bound, inclusive) + else: + # Filter data by a specified column. + hdbg.dassert_in(ts_col_name, df.columns) + mask = df[ts_col_name].between(lower_bound, upper_bound, inclusive) + # + _LOG.log( + log_level, + "Filtering between %s and %s with inclusive=`%s`, selected rows=%s", + lower_bound, + upper_bound, + inclusive, + hprint.perc(mask.sum(), df.shape[0]), + ) + return df[mask] + + +def filter_by_val( + df: pd.DataFrame, + col_name: str, + min_val: float, + max_val: float, + use_thousands_separator: bool = True, + log_level: int = logging.DEBUG, +) -> pd.DataFrame: + """ + Filter out rows of df where df[col_name] is not in [min_val, max_val]. + """ + # TODO(gp): If column is ordered, this can be done more efficiently with + # binary search. + num_rows = df.shape[0] + if min_val is not None and max_val is not None: + hdbg.dassert_lte(min_val, max_val) + mask = None + if min_val is not None: + mask = min_val <= df[col_name] + if max_val is not None: + mask2 = df[col_name] <= max_val + if mask is None: + mask = mask2 + else: + mask &= mask2 + res = df[mask] + hdbg.dassert_lt(0, res.shape[0]) + _LOG.log( + log_level, + "Rows kept %s, removed %s rows", + hprint.perc( + res.shape[0], + num_rows, + use_thousands_separator=use_thousands_separator, + ), + hprint.perc( + num_rows - res.shape[0], + num_rows, + use_thousands_separator=use_thousands_separator, + ), + ) + return res + + +# ############################################################################# +# PCA +# ############################################################################# + + +def sample_rolling_df( + rolling_df: pd.DataFrame, periods: int +) -> Tuple[pd.DataFrame, pd.DatetimeIndex]: + """ + Given a rolling metric stored as multiindex (e.g., correlation computed by + pd.ewm) sample `periods` equispaced samples. + + :return: sampled df, array of timestamps selected + """ + timestamps = rolling_df.index.get_level_values(0) + ts = timestamps[:: math.ceil(len(timestamps) / periods)] + _LOG.debug("timestamps=%s", str(ts)) + # rolling_df_out = rolling_df.unstack().reindex(ts).stack(dropna=False) + rolling_df_out = rolling_df.loc[ts] + return rolling_df_out, ts diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py new file mode 100644 index 000000000..aaacb290a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py @@ -0,0 +1,649 @@ +""" +Import as: + +import helpers.hpandas_utils as hpanutil +""" + +import logging +from typing import Any, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd +import tqdm.autonotebook as tauton + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = hloggin.getLogger(__name__) + +# Import add_pct for use in this module. + + +# TODO(gp): -> AxisNameSet +ColumnSet = Optional[Union[str, List[str]]] + + +# ############################################################################# + + +# TODO(gp): Maybe we can have a `_LOG_df_to_str(log_level, *args, **kwargs)` that +# calls `_LOG.log(log_level, hpandas.df_to_str(*args, **kwargs, log_level=log_level))`. +# TODO(gp): We should make sure this works properly in a notebook, although +# it's not easy to unit test. + + +def _display(log_level: int, df: pd.DataFrame) -> None: + """ + Display a dataframe in a notebook at the given log level. + + The behavior is similar to a command like `_LOG.log(log_level, ...)` but + for a notebook `display` command. + + :param log_level: log level at which to display a df. E.g., if `log_level = + logging.DEBUG`, then we display the df only if we are running with + `-v DEBUG`. If `log_level = logging.INFO` then we don't display it + :param df: dataframe to display + """ + from IPython.display import display + + if ( + hsystem.is_running_in_ipynb() + and log_level >= hdbg.get_logger_verbosity() + ): + display(df) + + +def _df_to_str( + df: pd.DataFrame, + num_rows: Optional[int], + max_columns: int, + max_colwidth: int, + max_rows: int, + precision: int, + display_width: int, + use_tabulate: bool, + log_level: int, +) -> str: + """ + Convert a DataFrame to a string representation. + + :param df: The DataFrame to convert to a string. + :param num_rows: The number of rows to display. + :param max_columns: The maximum number of columns to display. + :param max_colwidth: The maximum width of each column. + :param max_rows: The maximum number of rows to display. + :param precision: The precision of the numbers. + :param display_width: The width of the display. + :param use_tabulate: Whether to use the tabulate library to format + the DataFrame. + :param log_level: The log level to use. + :return: A string representation of the DataFrame. + """ + is_in_ipynb = hsystem.is_running_in_ipynb() + out = [] + # Set dataframe print options. + with pd.option_context( + "display.max_colwidth", + max_colwidth, + # "display.height", 1000, + "display.max_rows", + max_rows, + "display.precision", + precision, + "display.max_columns", + max_columns, + "display.width", + display_width, + ): + if use_tabulate: + import tabulate + + out.append(tabulate.tabulate(df, headers="keys", tablefmt="psql")) + # TODO(Grisha): Add an option to display all rows since if `num_rows` + # is `None`, only first and last 5 rows are displayed. Consider using + # `df.to_string()` instead of `str(df)`. + if num_rows is None or df.shape[0] <= num_rows: + # Print the entire data frame. + if not is_in_ipynb: + out.append(str(df)) + else: + # Display dataframe. + _display(log_level, df) + else: + nr = num_rows // 2 + if not is_in_ipynb: + # Print top and bottom of df. + out.append(str(df.head(nr))) + out.append("...") + tail_str = str(df.tail(nr)) + # Remove index and columns from tail_df. + skipped_rows = 1 + if df.index.name: + skipped_rows += 1 + tail_str = "\n".join(tail_str.split("\n")[skipped_rows:]) + out.append(tail_str) + else: + # TODO(gp): @all use this approach also above and update all the + # unit tests. + df = [ + df.head(nr), + pd.DataFrame( + [["..."] * df.shape[1]], index=[" "], columns=df.columns + ), + df.tail(nr), + ] + df = pd.concat(df) + # Display dataframe. + _display(log_level, df) + if not is_in_ipynb: + txt = "\n".join(out) + else: + txt = "" + return txt + + +def _report_srs_stats(srs: pd.Series) -> List[Any]: + """ + Report dtype, the first element, and its type of series. + + :param srs: The series to report the stats of. + :return: A list of the stats. + """ + row: List[Any] = [] + first_elem = srs.values[0] + num_unique = srs.nunique() + num_nans = srs.isna().sum() + row.extend( + [ + srs.dtype, + hprint.perc(num_unique, len(srs)), + hprint.perc(num_nans, len(srs)), + first_elem, + type(first_elem), + ] + ) + return row + + +def df_to_str( + df: Union[pd.DataFrame, pd.Series, pd.Index], + *, + # TODO(gp): Remove this hack in the integration. + # handle_signed_zeros: bool = False, + handle_signed_zeros: bool = True, + num_rows: Optional[int] = 6, + print_dtypes: bool = False, + print_shape_info: bool = False, + print_nan_info: bool = False, + print_memory_usage: bool = False, + memory_usage_mode: str = "human_readable", + tag: Optional[str] = None, + max_columns: int = 10000, + max_colwidth: int = 2000, + max_rows: int = 500, + precision: int = 6, + display_width: int = 10000, + use_tabulate: bool = False, + log_level: int = logging.DEBUG, +) -> str: + """ + Print a dataframe to string reporting all the columns without trimming. + + Note that code like: `_LOG.info(hpandas.df_to_str(df, num_rows=3))` works + properly when called from outside a notebook, i.e., the dataframe is printed + But it won't display the dataframe in a notebook, since the default level at + which the dataframe is displayed is `logging.DEBUG`. + + In this case to get the correct behavior one should do: + ``` + log_level = ... + _LOG.log(log_level, hpandas.df_to_str(df, num_rows=3, log_level=log_level)) + ``` + + :param: handle_signed_zeros: convert `-0.0` to `0.0` + :param: num_rows: max number of rows to print (half from the top and half from + the bottom of the dataframe) + - `None` to print the entire dataframe + :param print_dtypes: report dataframe types and information about the type of + each column by looking at the first value + :param print_shape_info: report dataframe shape, index and columns + :param print_memory_usage: report memory use for each + """ + if df is None: + return "" + if isinstance(df, pd.Series): + df = pd.DataFrame(df) + elif isinstance(df, pd.Index): + df = df.to_frame(index=False) + hdbg.dassert_isinstance(df, pd.DataFrame) + # Convert "negative zeros" to `0.0`. + df = df.copy() + if handle_signed_zeros: + for col_name in df.select_dtypes(include=[np.float64, float]).columns: + df[col_name] = df[col_name].where(df[col_name] != -0.0, 0.0) + out = [] + # Print the tag. + if tag is not None: + out.append(f"# {tag}=") + if not df.empty: + # Print information about the shape and index. + # TODO(Nikola): Revisit and rename print_shape_info to print_axes_info + if print_shape_info: + # TODO(gp): Unfortunately we can't improve this part of the output + # since there are many golden inside the code that would need to be + # updated. Consider automating updating the expected values in the code. + txt = f"index=[{df.index.min()}, {df.index.max()}]" + out.append(txt) + txt = f"columns={','.join(map(str, df.columns))}" + out.append(txt) + txt = f"shape={str(df.shape)}" + out.append(txt) + # Print information about the types. + if print_dtypes: + out.append("* type=") + table = [] + row = [] + col_name = "index" + row.append(col_name) + row.extend(_report_srs_stats(df.index)) + row = map(str, row) + table.append(row) + for col_name in df.columns: + row_: List[Any] = [] + row_.append(col_name) + row_.extend(_report_srs_stats(df[col_name])) + row_ = map(str, row_) + table.append(row_) + # + columns = [ + "col_name", + "dtype", + "num_unique", + "num_nans", + "first_elem", + "type(first_elem)", + ] + df_stats = pd.DataFrame(table, columns=columns) + stats_num_rows = None + df_stats_as_str = _df_to_str( + df_stats, + stats_num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + out.append(df_stats_as_str) + # Print info about memory usage. + if print_memory_usage: + out.append("* memory=") + mem_use_df = pd.concat( + [df.memory_usage(deep=False), df.memory_usage(deep=True)], + axis=1, + keys=["shallow", "deep"], + ) + # Add total row. + mem_use_df_total = pd.DataFrame({"total": mem_use_df.sum(axis=0)}) + mem_use_df = pd.concat([mem_use_df, mem_use_df_total.T]) + # Convert into the desired format. + if memory_usage_mode == "bytes": + pass + elif memory_usage_mode == "human_readable": + import helpers.hintrospection as hintros + + mem_use_df = mem_use_df.applymap(hintros.format_size) + else: + raise ValueError( + f"Invalid memory_usage_mode='{memory_usage_mode}'" + ) + memory_num_rows = None + memory_usage_as_txt = _df_to_str( + mem_use_df, + memory_num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + out.append(memory_usage_as_txt) + # Print info about nans. + if print_nan_info: + num_elems = df.shape[0] * df.shape[1] + num_nans = df.isna().sum().sum() + txt = f"num_nans={hprint.perc(num_nans, num_elems)}" + out.append(txt) + # + num_zeros = df.isnull().sum().sum() + txt = f"num_zeros={hprint.perc(num_zeros, num_elems)}" + out.append(txt) + # TODO(gp): np can't do isinf on objects like strings. + # num_infinite = np.isinf(df).sum().sum() + # txt = "num_infinite=" + hprint.perc(num_infinite, num_elems) + # out.append(txt) + # + num_nan_rows = df.dropna().shape[0] + txt = f"num_nan_rows={hprint.perc(num_nan_rows, num_elems)}" + out.append(txt) + # + num_nan_cols = df.dropna(axis=1).shape[1] + txt = f"num_nan_cols={hprint.perc(num_nan_cols, num_elems)}" + out.append(txt) + if hsystem.is_running_in_ipynb(): + if len(out) > 0 and log_level >= hdbg.get_logger_verbosity(): + print("\n".join(out)) + txt = None + # Print the df. + df_as_str = _df_to_str( + df, + num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + if not hsystem.is_running_in_ipynb(): + out.append(df_as_str) + txt = "\n".join(out) + return txt + + +# ############################################################################# + + +def head( + df: pd.DataFrame, + *, + print_columns: bool = False, + num_rows: int = 2, + seed: Union[int, None] = None, +) -> str: + """ + Display a sample of rows from a DataFrame. + + By default shows the first `num_rows` rows. When a seed is provided, + randomly samples `num_rows` rows instead. + + :param df: The DataFrame to sample from. + :param num_rows: Number of rows to display. + :param seed: Optional random seed for reproducible sampling. If None, shows + first rows. + """ + txt = "" + if print_columns: + txt += "columns=%s\n" % ",".join(df.columns.tolist()) + txt += "shape=%s\n" % str(df.shape) + # + if seed is not None: + np.random.seed(seed) + index = np.random.choice(df.index, num_rows, replace=False) + index = sorted(index) + df = df.loc[index] + else: + df = df.head(num_rows) + with pd.option_context( + "display.width", + 200, + "display.max_columns", + None, + "display.max_colwidth", + None, + ): + txt += "\n" + str(df) + return txt + + +# ############################################################################# + + +def resolve_column_names( + column_set: ColumnSet, + columns: Union[List[str], pd.Index], + *, + keep_order: bool = False, +) -> List[str]: + """ + Change format of the columns and perform some sanity checks. + + :param column_set: columns to proceed + :param columns: all columns available + :param keep_order: preserve the original order or allow sorting + """ + # Ensure that `columns` is well-formed. + if isinstance(columns, pd.Index): + columns = columns.to_list() + hdbg.dassert_isinstance(columns, list) + hdbg.dassert_lte(1, len(columns)) + # + if column_set is None: + # Columns were not specified, thus use the list of all the columns. + column_set = columns + else: + if isinstance(column_set, str): + column_set = [column_set] + hdbg.dassert_isinstance(column_set, list) + hdbg.dassert_lte(1, len(column_set)) + hdbg.dassert_is_subset(column_set, columns) + if keep_order: + # Keep the selected columns in the same order as in the original + # `columns`. + column_set = [c for c in columns if c in column_set] + return column_set + + +def _get_unique_elements_in_column(df: pd.DataFrame, col_name: str) -> List[Any]: + """ + Get unique elements in a column, handling unhashable types. + + :param df: dataframe containing the column + :param col_name: name of the column to get unique elements from + :return: list of unique elements + """ + try: + vals = df[col_name].unique() + except TypeError: + # TypeError: unhashable type: 'list' + _LOG.error("Column '%s' has unhashable types", col_name) + vals = list(set(map(str, df[col_name]))) + cast(List[Any], vals) + return vals + + +def _get_variable_cols( + df: pd.DataFrame, threshold: int = 1 +) -> Tuple[List[str], List[str]]: + """ + Return columns of a df that contain less than unique values. + + :return: (variable columns, constant columns) + """ + var_cols = [] + const_cols = [] + for col_name in df.columns: + unique_elems = _get_unique_elements_in_column(df, col_name) + num_unique_elems = len(unique_elems) + if num_unique_elems <= threshold: + const_cols.append(col_name) + else: + var_cols.append(col_name) + return var_cols, const_cols + + +def remove_columns_with_low_variability( + df: pd.DataFrame, threshold: int = 1, log_level: int = logging.DEBUG +) -> pd.DataFrame: + """ + Remove columns of a df that contain less than unique values. + + :return: df with only columns with sufficient variability + """ + var_cols, const_cols = _get_variable_cols(df, threshold=threshold) + _LOG.log(log_level, "# Constant cols") + for col_name in const_cols: + unique_elems = _get_unique_elements_in_column(df, col_name) + _LOG.log( + log_level, + " %s: %s", + col_name, + hprint.list_to_str(list(map(str, unique_elems))), + ) + _LOG.log(log_level, "# Var cols") + _LOG.log(log_level, hprint.list_to_str(var_cols)) + return df[var_cols] + + +# Start copy-paste From helpers/hpandas_transform.py + + +def add_pct( + df: pd.DataFrame, + col_name: str, + total: int, + dst_col_name: str, + num_digits: int = 2, + use_thousands_separator: bool = True, +) -> pd.DataFrame: + """ + Add to df a column "dst_col_name" storing the percentage of values in + column "col_name" with respect to "total". The rest of the parameters are + the same as hprint.round_digits(). + + :return: updated df + """ + # Add column with percentage right after col_name. + pos_col_name = df.columns.tolist().index(col_name) + df.insert(pos_col_name + 1, dst_col_name, (100.0 * df[col_name]) / total) + # Format. + df[col_name] = [ + hprint.round_digits( + v, num_digits=None, use_thousands_separator=use_thousands_separator + ) + for v in df[col_name] + ] + df[dst_col_name] = [ + hprint.round_digits( + v, num_digits=num_digits, use_thousands_separator=False + ) + for v in df[dst_col_name] + ] + return df + + +# End copy-paste. + + +def print_column_variability( + df: pd.DataFrame, + max_num_vals: int = 3, + num_digits: int = 2, + use_thousands_separator: bool = True, +) -> pd.DataFrame: + """ + Print statistics about the values in each column of a data frame. + + This is useful to get a sense of which columns are interesting. + """ + print(("# df.columns=%s" % hprint.list_to_str(df.columns))) + res = [] + for c in tauton.tqdm(df.columns, desc="Computing column variability"): + vals = _get_unique_elements_in_column(df, c) + try: + min_val = min(vals) + except TypeError as e: + _LOG.debug("Column='%s' reported %s", c, e) + min_val = "nan" + try: + max_val = max(vals) + except TypeError as e: + _LOG.debug("Column='%s' reported %s", c, e) + max_val = "nan" + if len(vals) <= max_num_vals: + txt = ", ".join(map(str, vals)) + else: + txt = ", ".join(map(str, [min_val, "...", max_val])) + row = ["%20s" % c, len(vals), txt] + res.append(row) + res = pd.DataFrame(res, columns=["col_name", "num", "elems"]) + res.sort_values("num", inplace=True) + # TODO(gp): Fix this. + # res = add_count_as_idx(res) + res = add_pct( + res, + "num", + df.shape[0], + "[diff %]", + num_digits=num_digits, + use_thousands_separator=use_thousands_separator, + ) + res.reset_index(drop=True, inplace=True) + return res + + +def breakdown_table( + df: pd.DataFrame, + col_name: str, + num_digits: int = 2, + use_thousands_separator: bool = True, + verbosity: bool = False, +) -> pd.DataFrame: + """ + Create a breakdown table showing value counts and percentages for a column. + + :param df: dataframe to analyze + :param col_name: column name to create breakdown for + :param num_digits: number of decimal digits for percentages + :param use_thousands_separator: whether to use thousands separator + in counts + :param verbosity: whether to print additional details + :return: breakdown table with counts and percentages + """ + if isinstance(col_name, list): + for c in col_name: + print(("\n" + hprint.frame(c).rstrip("\n"))) + res = breakdown_table(df, c) + print(res) + return None + # + if verbosity: + print(("# col_name=%s" % col_name)) + first_col_name = df.columns[0] + res = df.groupby(col_name)[first_col_name].count() + res = pd.DataFrame(res) + res.columns = ["count"] + res.sort_values(["count"], ascending=False, inplace=True) + res = pd.concat( + [res, pd.DataFrame([df.shape[0]], index=["Total"], columns=["count"])] + ) + res["pct"] = (100.0 * res["count"]) / df.shape[0] + # Format. + res["count"] = [ + hprint.round_digits( + v, num_digits=None, use_thousands_separator=use_thousands_separator + ) + for v in res["count"] + ] + res["pct"] = [ + hprint.round_digits( + v, num_digits=num_digits, use_thousands_separator=False + ) + for v in res["pct"] + ] + if verbosity: + for k, df_tmp in df.groupby(col_name): + print((hprint.frame("%s=%s" % (col_name, k)))) + cols = [col_name, "description"] + with pd.option_context( + "display.max_colwidth", 100000, "display.width", 130 + ): + print((df_tmp[cols])) + return res diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py new file mode 100644 index 000000000..319c6cf44 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py @@ -0,0 +1,1309 @@ +""" +Import as: + +import helpers.hparquet as hparque +""" + +import collections +import datetime +import glob +import logging +import os +from typing import Any, Callable, Iterator, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.fs as pafs +import pyarrow.parquet as pq + +# Check if S3FileSystem is available in `pyarrow.fs`. +if hasattr(pafs, "S3FileSystem"): + S3FileSystemAvailable = True + PyArrowS3FileSystem = pafs.S3FileSystem +else: + S3FileSystemAvailable = False + + # Define a dummy class for type hints when S3FileSystem is not available. + class PyArrowS3FileSystem: + def __init__(self, *args, **kwargs): + raise ImportError( + "S3FileSystem is not available in this version of pyarrow.fs" + ) + + +from tqdm.autonotebook import tqdm + +import helpers.hdataframe as hdatafr +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hpandas as hpandas +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# ParquetDataFrameGenerator +# ############################################################################# + + +class ParquetDataFrameGenerator: + # Allowed types. + OUTPUT_TYPES = ("basic", "verbose_open", "cm_task_1103") + + @staticmethod + def _wrap_all_assets_df(df: List[pd.DataFrame]) -> pd.DataFrame: + # Create a single dataframe for all the assets. + df = pd.concat(df) + _LOG.debug(hpandas.df_to_str(df, print_shape_info=True, tag="df")) + return df + + def _get_core_dataframes(self) -> List[pd.DataFrame]: + """ + Create core dataframes that are updated according to the output type. + + :return: list of core dataframes for specified assets with string values + Example: + + ``` + asset + 2000-01-01 A + 2000-01-02 A + 2000-01-03 A + ``` + """ + # Generate core dataframe for each asset. + df = [] + for asset in self._assets: + asset_df = pd.DataFrame( + {self._asset_col_name: asset}, + index=self._dataframe_index, + ) + _LOG.debug( + hpandas.df_to_str( + asset_df, print_shape_info=True, tag="asset_df" + ) + ) + df.append(asset_df) + return df + + def _get_daily_basic_dataframe(self) -> pd.DataFrame: + """ + Update core dataframes with additional columns. + + :return: updated core dataframe as presented below + Example: + + ``` + idx asset val1 val2 + 2000-01-01 0 A 00 00 + 2000-01-02 0 A 01 01 + 2000-01-03 0 A 02 02 + ``` + """ + asset_dataframes = self._get_core_dataframes() + for idx, asset_dataframe in enumerate(asset_dataframes): + # Positioned left from `asset` column. + asset_dataframe.insert(loc=0, column="idx", value=idx) + # Positioned right from `asset` column. + asset_dataframe.insert( + loc=2, + column="val1", + value=list(range(len(self._dataframe_index))), + ) + asset_dataframe.insert( + loc=3, + column="val2", + value=list(range(len(self._dataframe_index))), + ) + return self._wrap_all_assets_df(asset_dataframes) + + def _get_verbose_open_dataframe(self) -> pd.DataFrame: + """ + Update core dataframes with additional columns. + + :return: update core dataframe as presented below + Example: + + ``` + vendor_date interval start_time end_time ticker currency open id + 2021-11-24 60 1637762400 1637762460 A USD 100 1 + 2021-11-24 60 1637762400 1637762460 A USD 200 2 + ``` + """ + interval = self._dataframe_index[1] - self._dataframe_index[0] + interval = interval.seconds + asset_dataframes = self._get_core_dataframes() + for id_, asset_dataframe in enumerate(asset_dataframes): + start_time = ( + asset_dataframe.index - pd.Timestamp("1970-01-01") + ) // pd.Timedelta("1s") + end_time = start_time + interval + # Positioned left from `ticker` column. + asset_dataframe.insert( + loc=0, + column="vendor_date", + value=asset_dataframe.index.date.astype(str), + ) + asset_dataframe.insert(loc=1, column="interval", value=interval) + asset_dataframe.insert(loc=2, column="start_time", value=start_time) + asset_dataframe.insert(loc=3, column="end_time", value=end_time) + # Positioned right from `ticker` column. + asset_dataframe.insert(loc=5, column="currency", value="USD") + asset_dataframe.insert( + loc=6, + column="open", + value=list(range(len(self._dataframe_index))), + ) + asset_dataframe.insert(loc=7, column="id", value=id_) + return self._wrap_all_assets_df(asset_dataframes) + + # TODO(Dan): CmTask1490. + def _get_cm_task_1103_dataframe(self) -> pd.DataFrame: + """ + Update core dataframes with additional columns. + + :return: updated core dataframe as presented below + Example: + + ``` + full_symbol close + 2000-01-01 10689 100 + 2000-01-02 10689 200 + 2000-01-03 10689 300 + ``` + """ + asset_dataframes = self._get_core_dataframes() + for asset_dataframe in asset_dataframes: + # Positioned right from asset column. + asset_dataframe.insert( + loc=1, + column="close", + value=list(range(len(self._dataframe_index))), + ) + return self._wrap_all_assets_df(asset_dataframes) + + def __init__( + self, + start_date: str, + end_date: str, + output_type: str, + assets: List[Union[str, int]], + asset_col_name: str, + freq: str, + ) -> None: + """ + Constructor. + + :param start_date: start of date range including start_date + :param end_date: end of date range excluding end_date + :param output_type: type of data that is generated + :param assets: list of desired assets that can be names or ids + :param asset_col_name: name of the column that stores assets + :param freq: frequency of steps between start and end date + """ + self._start_date = start_date + self._end_date = end_date + self._output_type = output_type + self._assets = assets + self._asset_col_name = asset_col_name + self._freq = freq + self._dataframe_index = pd.date_range( + self._start_date, + self._end_date, + freq=self._freq, + inclusive="left", + tz="UTC", + ) + self._OUTPUT_TYPE_FUNCTION_MAP = { + "basic": self._get_daily_basic_dataframe, + "verbose_open": self._get_verbose_open_dataframe, + "cm_task_1103": self._get_cm_task_1103_dataframe, + } + + @property + def output_type_function(self) -> Callable: + """ + Return proper function for data generation depending on output type. + """ + return self._OUTPUT_TYPE_FUNCTION_MAP[self._output_type] + + def generate(self) -> pd.DataFrame: + """ + Generate specific dataframe based on inputs provided in instance + creation. + """ + if self._output_type not in self.OUTPUT_TYPES: + raise ValueError(f"Unsupported data type `{self._output_type}`!") + return self.output_type_function() + + +def add_date_partition_columns( + df: pd.DataFrame, partition_mode: str +) -> Tuple[pd.DataFrame, List[str]]: + """ + Add partition columns like year, month, day from datetime index. + + :param df: dataframe indexed by timestamp + :param partition_mode: + - "by_date": extract the date from the index + - E.g., an index like `2022-01-10 14:00:00+00:00` is transform to a + column `20220110` + - "by_year_month_day": split the index in year, month, day columns + - "by_year_month": split by year and month + - "by_year_week": split by year and week of the year + - "by_year": split by year + :return: + - df with additional partitioning columns + - list of partitioning columns + """ + with htimer.TimedScope(logging.DEBUG, "# add_date_partition_cols"): + if partition_mode == "by_date": + df["date"] = df.index.strftime("%Y%m%d") + partition_columns = ["date"] + else: + if partition_mode == "by_year_month_day": + partition_columns = ["year", "month", "day"] + elif partition_mode == "by_year_month": + partition_columns = ["year", "month"] + elif partition_mode == "by_year_week": + partition_columns = ["year", "weekofyear"] + elif partition_mode == "by_year": + partition_columns = ["year"] + elif partition_mode == "by_month": + partition_columns = ["month"] + else: + raise ValueError(f"Invalid partition_mode='{partition_mode}'") + # Add date columns chosen by partition mode. + for column_name in partition_columns: + # Extract data corresponding to `column_name` (e.g., + # `df.index.year`). + if column_name == "weekofyear": + # The `weekofyear` attribute has been deprecated in Pandas + # 2.1.0, so weeks are extracted using a function instead of + # the attribute name. + df["weekofyear"] = df.index.isocalendar().week + else: + df[column_name] = getattr(df.index, column_name) + return df, partition_columns + + +def to_partitioned_parquet( + df: pd.DataFrame, + partition_columns: List[str], + dst_dir: str, + *, + aws_profile: hs3.AwsProfile = None, + basename_template: str = None, +) -> None: + """ + Save the given dataframe as Parquet file partitioned along the given + columns. + + :param df: dataframe + :param partition_columns: partitioning columns + :param dst_dir: location of partitioned dataset + :param aws_profile: the name of an AWS profile or a s3fs filesystem + + E.g., in case of partition using `date`, the file layout looks like: + ``` + dst_dir/ + date=20211230/ + data.parquet + date=20211231/ + data.parquet + date=20220101/ + data.parquet + ``` + + In case of multiple columns like `asset`, `year`, `month`, the file layout + looks like: + ``` + dst_dir/ + asset=A/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ... + asset=B/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ``` + """ + # Use either S3 or local filesystem. + filesystem = None + if aws_profile is not None: + filesystem = hs3.get_s3fs(aws_profile) + # ParquetDataset appends an extra "/", creating an empty-named folder + # when saving on S3. + dst_dir = dst_dir.rstrip("/") + with htimer.TimedScope(logging.DEBUG, "# partition_dataset"): + # Read. + table = pa.Table.from_pandas(df) + # Write using partition. + # TODO(gp): add this logic to hparquet.to_parquet as a possible option. + _LOG.debug(hprint.to_str("partition_columns dst_dir")) + hdbg.dassert_is_subset(partition_columns, df.columns) + # TODO(gp): We would like to avoid overriding existing tiles. It's not clear + # how to do it. Either setting permissions to read-only before writing. + # Or having a list of files that will be written and ensure that none of + # those files already existing. + pq.write_to_dataset( + table, + dst_dir, + partition_cols=partition_columns, + filesystem=filesystem, + basename_template=basename_template, + ) + + +def generate_parquet_files( + start_date: str, + end_date: str, + assets: List[Union[str, int]], + asset_col_name: str, + dst_dir: str, + *, + freq: str = "1H", + output_type: str = "basic", + partition_mode: str = "by_date", + custom_partition_cols: Optional[str] = None, + reset_index: bool = False, +) -> None: + """ + Generate parquet files for testing. + + :param start_date: date from which the data is generated, value + included + :param end_date: date until which the data is generated, value + excluded + :param assets: list of assets that can be either names or ids + :param asset_col_name: name of the column that stores assets + :param dst_dir: destination dir for generated data + :param freq: frequency of data generation + :param output_type: type of data that is generated + :param partition_mode: Partition mode for parquet DataFrame, default + by date + :param custom_partition_cols: overrides default partition by time + :param reset_index: reset dataframe index to default sequential + integer values + """ + # Generate timespan. + hdbg.dassert_lt(start_date, end_date) + timespan = pd.date_range(start_date, end_date) + hdbg.dassert_lt(2, len(timespan)) + # Run dataframe generation. + pdg = ParquetDataFrameGenerator( + start_date, end_date, output_type, assets, asset_col_name, freq + ) + parquet_df = pdg.generate() + # Add partition columns to the dataframe. + df, partition_cols = add_date_partition_columns(parquet_df, partition_mode) + if custom_partition_cols: + # If custom partition is provided, it will override date partition. + # Sample: `["asset", "year", "month"]` + custom_partition_cols = custom_partition_cols.split(",") + # Ensure that date partition columns are present. + hdbg.dassert_is_subset(partition_cols, custom_partition_cols) + partition_cols = custom_partition_cols + # Partition and write dataset. + if reset_index: + df = df.reset_index(drop=True) + # TODO(Nikola): When direct run is possible, expose usage of `aws_profile` + # so generator can be used in conjunction with `helpers.hmoto.S3Mock_TestCase`. + # Will probably be part of CMTask #1490. + to_partitioned_parquet(df, partition_cols, dst_dir) + + +def get_pyarrow_s3fs(*args: Any, **kwargs: Any) -> PyArrowS3FileSystem: + """ + Return an Pyarrow S3Fs object from a given AWS profile. + + Same as `hs3.get_s3fs`, used specifically for accessing Parquet + datasets. + """ + # Check if S3FileSystem is available + hdbg.dassert( + S3FileSystemAvailable, + "S3FileSystem is not available in this version of pyarrow.fs", + ) + # When deploying jobs via ECS the container obtains credentials based on passed + # task role specified in the ECS task-definition, refer to: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html + if hserver.is_inside_ecs_container(): + _LOG.info("Fetching credentials from task IAM role") + s3fs_ = PyArrowS3FileSystem() + else: + aws_credentials = hs3.get_aws_credentials(*args, **kwargs) + s3fs_ = PyArrowS3FileSystem( + access_key=aws_credentials["aws_access_key_id"], + secret_key=aws_credentials["aws_secret_access_key"], + session_token=aws_credentials["aws_session_token"], + region=aws_credentials["aws_region"], + ) + return s3fs_ + + +def _get_parquet_tiles_from_file_path(file_path: str) -> List[Tuple[str, Any]]: + """ + Hacky function to help get tile values from parquet file path. + + Used by from_parquet when loading first n rows of a dataset only. + + Example + input: ...ccxt/binance/v1_0_0/currency_pair=CTK_USDT/ + year=2023/month=3/26dc59f62b87403d9a3e9f04c7c21382-0.parquet + output: [("currency_pair", "CTK_USDT"), ("year", 2023), ("month", 3)] + """ + path_parts = file_path.split("/") + tiles = [] + for part in path_parts: + if "=" in part: + col, value = part.split("=") + value = int(value) if value.isdigit() else value + tiles.append((col, value)) + return tiles + + +# TODO(Dan): Add mode to allow querying even when some non-existing columns are passed. +def from_parquet( + file_name: str, + *, + columns: Optional[List[str]] = None, + filters: Optional[List[Any]] = None, + n_rows: Optional[int] = None, + schema: Optional[List[Tuple[str, pa.DataType]]] = None, + log_level: int = logging.DEBUG, + report_stats: bool = False, + aws_profile: hs3.AwsProfile = None, +) -> pd.DataFrame: + """ + Load a dataframe from a Parquet file. + + The difference with `pd.read_pq` is that here we use Parquet + Dataset. + + :param file_name: path to a Parquet dataset + :param columns: columns to return, skipping reading columns that are not requested + - `None` means return all available columns + :param filters: Parquet query + :param n_rows: the number of rows to load, load all data if `None` + :param schema: see `pyarrow.Schema`, e.g., `schema = + [("int_col", pa.int32()), ("str_col", pa.string())]` + :param log_level: logging level to execute at + :param report_stats: whether to report Parquet file size or not + :param aws_profile: AWS profile to use if and only if using an S3 path, + otherwise `None` for local path + :return: data from Parquet dataset + """ + _LOG.debug(hprint.to_str("file_name columns filters schema")) + hdbg.dassert_isinstance(file_name, str) + hs3.dassert_is_valid_aws_profile(file_name, aws_profile) + if hs3.is_s3_path(file_name): + if isinstance(aws_profile, str): + filesystem = get_pyarrow_s3fs(aws_profile) + else: + # Note: `s3fs` filesystem is only to be used on exact file path + # as `pq.ParquetDataset` is not properly handling directory path. + filesystem = aws_profile + # Pyarrow S3FileSystem does not have `exists` method. + s3_filesystem = hs3.get_s3fs(aws_profile) + hs3.dassert_path_exists(file_name, s3_filesystem) + file_name = file_name.lstrip("s3://") + else: + filesystem = None + hdbg.dassert_path_exists(file_name) + # Load data. + with htimer.TimedScope( + logging.DEBUG, f"# Reading Parquet file '{file_name}'" + ) as ts: + if n_rows: + # Get the latest parquet file in the directory. + hdbg.dassert_isinstance( + aws_profile, + str, + "aws_profile must be a string for S3 operations", + ) + last_pq_file = hs3.get_latest_pq_in_s3_dir(file_name, aws_profile) + file = s3_filesystem.open(last_pq_file, "rb") + # Load the data. + parquet_file = pq.ParquetFile(file) + # Get the head of the data. + df = ( + parquet_file.read_row_group(0, columns=parquet_file.schema.names) + .to_pandas() + .head(n_rows) + ) + if columns: + # Note: `schema.names` also includes and index. + hdbg.dassert_is_subset(columns, parquet_file.schema.names) + df = df[columns] + # Hacky way to append tile values lost when obtaining particular .pq file. + tiles = _get_parquet_tiles_from_file_path(last_pq_file) + for col, value in tiles: + df[col] = value + else: + if schema is not None: + # Pass partition columns types explicitly. + schema = pa.schema(schema) + partitioning = ds.partitioning(schema, flavor="hive") + dataset = pq.ParquetDataset( + # Replace URI with path. + file_name, + filesystem=filesystem, + filters=filters, + partitioning=partitioning, + ) + if columns: + # Note: `schema.names` also includes and index. + hdbg.dassert_is_subset(columns, dataset.schema.names) + # To read also the index we need to use `read_pandas()`, instead of + # `read_table()`. + # See https://arrow.apache.org/docs/python/parquet.html#reading-and-writing-single-files. + table = dataset.read_pandas(columns=columns) + # Convert the Pandas Dataframe timestamp columns and index to `ns` + # resolution. The general approach is to preserve the time unit + # information after reading data back from Parquet files. + # Currently, it's challenging to resolve this issue since Parquet + # data is mixed with data from CSV files, which convert the time + # unit to `ns` by default. Refer to CmampTask7331 for details. + # https://github.com/cryptokaizen/cmamp/issues/7331 + df = table.to_pandas(coerce_temporal_nanoseconds=True) + if isinstance(df.index, pd.DatetimeIndex): + df.index = df.index.as_unit("ns") + # Report stats about the df. + _LOG.debug("df.shape=%s", str(df.shape)) + mem = df.memory_usage().sum() + _LOG.debug("df.memory_usage=%s", hintros.format_size(mem)) + # Report stats about the Parquet file size. + if report_stats: + file_size = hs3.du(file_name, human_format=True, aws_profile=aws_profile) + _LOG.log( + log_level, + "Loaded '%s' (size=%s, time=%.1fs)", + file_name, + file_size, + ts.elapsed_time, + ) + return df + + +# Copied from `hio.create_enclosing_dir()` to avoid circular dependencies. +def _create_enclosing_dir(file_name: str) -> Optional[str]: + dir_name = os.path.dirname(file_name) + if dir_name != "": + _LOG.debug( + "Creating dir_name='%s' for file_name='%s'", dir_name, file_name + ) + hdbg.dassert_is_not(dir_name, None) + dir_name = os.path.normpath(dir_name) + if os.path.normpath(dir_name) == ".": + _LOG.debug("Can't create dir '%s'", dir_name) + if os.path.exists(dir_name): + # The dir exists and we want to keep it, so we are done. + _LOG.debug("The dir '%s' exists: exiting", dir_name) + return None + _LOG.debug("Creating directory '%s'", dir_name) + try: + os.makedirs(dir_name) + except OSError as e: + _LOG.error(str(e)) + # It can happen that we try to create the directory while somebody else + # created it, so we neutralize the corresponding exception. + if e.errno == 17: + # OSError: [Errno 17] File exists. + pass + else: + raise e + hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) + return dir_name + + +def to_parquet( + df: pd.DataFrame, + file_name: str, + *, + log_level: int = logging.DEBUG, + report_stats: bool = False, + aws_profile: hs3.AwsProfile = None, +) -> None: + """ + Save a dataframe as Parquet. + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(file_name, str) + hs3.dassert_is_valid_aws_profile(file_name, aws_profile) + if hs3.is_s3_path(file_name): + filesystem = hs3.get_s3fs(aws_profile) + hs3.dassert_path_not_exists(file_name, filesystem) + file_name = file_name.lstrip("s3://") + else: + filesystem = None + hdbg.dassert_path_not_exists(file_name) + hdbg.dassert_file_extension(file_name, ["parquet", "pq"]) + # There is no concept of directory on S3. + # Only applicable to local filesystem. + if aws_profile is None: + _create_enclosing_dir(file_name) + # Report stats about the df. + _LOG.debug("df.shape=%s", str(df.shape)) + mem = df.memory_usage().sum() + _LOG.debug("df.memory_usage=%s", hintros.format_size(mem)) + # Save data. + with htimer.TimedScope( + logging.DEBUG, f"# Writing Parquet file '{file_name}'" + ) as ts: + table = pa.Table.from_pandas(df) + # This is needed to handle: + # ``` + # pyarrow.lib.ArrowInvalid: Casting from timestamp[ns, tz=America/New_York] + # to timestamp[us] would lose data: 1663595160000000030 + # ``` + # No need to cast to `us` since pyarrow >= 15.0.0. + # See + # https://github.com/cryptokaizen/cmamp/blob/master/docs/infra/all.parquet.explanation.md#time-unit-conversion-when-writing-to-parquet + # for details. + # parquet_args = { + # "coerce_timestamps": "us", + # "allow_truncated_timestamps": True, + # } + # pq.write_table(table, file_name, filesystem=filesystem, **parquet_args) + pq.write_table(table, file_name, filesystem=filesystem) + # Report stats about the Parquet file size. + if report_stats: + file_size = hs3.du(file_name, human_format=True, aws_profile=aws_profile) + _LOG.log( + log_level, + "Saved '%s' (size=%s, time=%.1fs)", + file_name, + file_size, + ts.elapsed_time, + ) + + +# ############################################################################# + + +def _yield_parquet_tile( + file_name: str, + columns: Optional[List[str]], + filters: List[Any], + asset_id_col: str, +) -> Iterator[pd.DataFrame]: + """ + Yield Parquet data in a single tile given the filters. + + It is assumed that data is partitioned by asset_id, year and month, i.e. + the file layout is: + + ``` + file_name/ + asset_id=1032127330/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ... + asset_id=2133227690/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ``` + + :param file_name: see `from_parquet()` + :param columns: see `from_parquet()` + :param filters: see `from_parquet()` + :param asset_id_col: name of the column with asset ids + :return: a generator of `from_parquet()` dataframe + """ + # Without the schema being provided `pyarrow` incorrectly infers + # type of the asset id column, i.e. `pyarrow` reads assets as + # strings instead of integers. See the related discussion at + # `https://issues.apache.org/jira/browse/ARROW-6114`. + int_type = np.int64 + pyarrow_int_type = pa.from_numpy_dtype(int_type) + schema = [ + (asset_id_col, pyarrow_int_type), + # TODO(Grisha): consider passing year and month column names as params. + ("year", pyarrow_int_type), + ("month", pyarrow_int_type), + ] + tile = from_parquet( + file_name, + columns=columns, + filters=filters, + schema=schema, + ) + hpandas.dassert_series_type_is(tile[asset_id_col], int_type) + yield tile + + +def build_asset_id_filter( + asset_ids: List[int], + asset_id_col: str, +) -> List[List[Tuple[str, str, int]]]: + filters = [] + for asset_id in asset_ids: + filters.append([(asset_id_col, "==", asset_id)]) + return filters + + +def build_year_month_filter( + start_date: datetime.date, + end_date: datetime.date, +) -> list: + """ + Use the year/months to build a Parquet filter. + + If `start_date.year == end_date.year`, then return a list of + three tuples (to be "ANDed" together) based on the year and months. + Else, return a list of list of tuples: + - the inner lists consist of AND filters; the inner lists are ORed + together if used as a single filter + - each inner list filter represents a calendar year or part thereof + + One use case of this function is to generate a filter whose OR + components can be processed one-by-one. For example, if memory constraints + prevent loading an entire tile at once, then one could instead attempt to + load one-year tiles one at a time. + + NOTE: `start_date.day` and `end_date.day` are ignored. + + TODO(Paul): Consider adding a switch to support smaller AND filter chunks + (e.g., at monthly instead of yearly granularity). + """ + hdbg.dassert_isinstance(start_date, datetime.date) + hdbg.dassert_isinstance(end_date, datetime.date) + hdbg.dassert_lte(start_date, end_date) + start_year = start_date.year + end_year = end_date.year + filter_ = [] + # + if start_year == end_year: + filter_.append(("year", "==", start_year)) + filter_.append(("month", ">=", start_date.month)) + filter_.append(("month", "<=", end_date.month)) + else: + start_year_filter = [] + start_year_filter.append(("year", "==", start_year)) + start_year_filter.append(("month", ">=", start_date.month)) + end_year_filter = [] + end_year_filter.append(("year", "==", end_year)) + end_year_filter.append(("month", "<=", end_date.month)) + filter_.append(start_year_filter) + filter_.append(end_year_filter) + for year in range(start_year + 1, end_year): + year_filter = [] + year_filter.append(("year", "==", year)) + filter_.append(year_filter) + return filter_ + + +def yield_parquet_tiles_by_year( + file_name: str, + start_date: datetime.date, + end_date: datetime.date, + cols: List[Union[int, str]], + *, + asset_ids: Optional[List[int]] = None, + asset_id_col: str = "asset_id", +) -> Iterator[pd.DataFrame]: + """ + Yield Parquet data in tiles up to one year in length. + + :param file_name: as in `from_parquet()` + :param start_date: first date to load; day is ignored + :param end_date: last date to load; day is ignored + :param cols: if an `int` is supplied, it is cast to a string before reading + :param asset_ids: asset ids to load + :param asset_id_col: see `_yield_parquet_tile()` + :return: a generator of `from_parquet()` dataframes + """ + time_filters = build_year_month_filter(start_date, end_date) + hdbg.dassert_isinstance(time_filters, list) + # The list should not be empty. + hdbg.dassert(time_filters) + if not isinstance(time_filters[0], list): + time_filters = [time_filters] + columns = [str(col) for col in cols] + if asset_ids is None: + asset_ids = [] + asset_id_filter = build_asset_id_filter(asset_ids, asset_id_col) + for time_filter in time_filters: + if asset_id_filter: + combined_filter = [ + id_filter + time_filter for id_filter in asset_id_filter + ] + else: + combined_filter = time_filter + yield from _yield_parquet_tile( + file_name, columns, combined_filter, asset_id_col + ) + + +# TODO(Paul): Add additional time-restriction filter. +def yield_parquet_tiles_by_assets( + file_name: str, + asset_ids: List[int], + asset_id_col: str, + asset_batch_size: int, + cols: Optional[List[Union[int, str]]], +) -> Iterator[pd.DataFrame]: + """ + Yield Parquet data in tiles batched by asset ids. + + :param file_name: as in `from_parquet()` + :param asset_ids: asset ids to load + :param asset_id_col: see `_yield_parquet_tile()` + :param asset_batch_size: the number of asset to load in a single batch + :param cols: if an `int` is supplied, it is cast to a string before reading + :return: a generator of `from_parquet()` dataframes + """ + hdbg.dassert_isinstance(asset_id_col, str) + hdbg.dassert(asset_id_col, "`asset_id_col` must be nonempty") + batches = [ + asset_ids[i : i + asset_batch_size] + for i in range(0, len(asset_ids), asset_batch_size) + ] + columns: Optional[List[str]] = None + if cols: + columns = [str(col) for col in cols] + for batch in tqdm(batches): + _LOG.debug("assets=%s", batch) + filter_ = build_asset_id_filter(batch, asset_id_col) + yield from _yield_parquet_tile(file_name, columns, filter_, asset_id_col) + + +def build_filter_with_only_equalities( + start_timestamp: pd.Timestamp, end_timestamp: pd.Timestamp +) -> list: + """ + Build a list of Parquet filters based on equality conditions for partition + columns. + + This function creates a filter for each partition column (year, month, day) based on the + equality conditions between components of the timestamp arguments when possible. + + Example: + Input args: + start_timestamp: 2022-08-31T00:01:00+00:00 + end-timestamp: 2022-08-31T23:59:59+00:00 + Output: + [("year", "=", 2022), ("month", "=", 8), ("day", "=", 31)] + + These filters enhance performance by allowing to load data quicker when used in tandem with timestamp filters. + Less memory will be used because less `.pq` need to be loaded. + + :param start_timestamp: start of the interval. + :param end_timestamp: end of the interval: + """ + hdbg.dassert_isinstance(start_timestamp, pd.Timestamp) + hdbg.dassert_isinstance(end_timestamp, pd.Timestamp) + filters = [] + if start_timestamp.year == end_timestamp.year: + filters.append(("year", "==", start_timestamp.year)) + if start_timestamp.month == end_timestamp.month: + filters.append(("month", "==", start_timestamp.month)) + if start_timestamp.day == end_timestamp.day: + filters.append(("day", "==", start_timestamp.day)) + return filters + + +# TODO(Paul): The `int` assumption is baked in. We can generalize to strings +# if needed, but if we do, then we should continue to handle string ints as +# ints as we do here (e.g., there are sorting advantages, among others). +def _process_walk_triple( + triple: tuple, start_depth: int +) -> Tuple[Tuple[str, ...], Tuple[int, ...]]: + """ + Process a triple returned by `os.walk()` + + :param triple: (dirpath: str, dirnames: List[str], filenames: List[str]) + :param start_depth: the "depth" of `path` used in the call + `os.walk(path)` + :return: tuple(lhs_vals), tuple(rhs_vals) + """ + lhs_vals: List[str] = [] + rhs_vals: List[int] = [] + # If there are subdirectories, do not process. + if triple[1]: + return tuple(lhs_vals), tuple(rhs_vals) + depth = len(triple[0].split("/")) + rel_depth = depth - start_depth + key = tuple(triple[0].split("/")[start_depth:]) + if len(key) == 0: + return tuple(lhs_vals), tuple(rhs_vals) + hdbg.dassert_eq(len(key), rel_depth) + lhs_vals = [] + rhs_vals = [] + for string in key: + lhs, rhs = string.split("=") + lhs_vals.append(lhs) + rhs_vals.append(int(rhs)) + hdbg.dassert_eq(len(lhs_vals), len(rhs_vals)) + return tuple(lhs_vals), tuple(rhs_vals) + + +def collate_parquet_tile_metadata( + path: str, +) -> pd.DataFrame: + """ + Report stats in a dataframe on Parquet file partitions. + + The directories should be of the form `lhs=rhs` where "rhs" is a string + representation of an `int`. + + :param path: path to top-level Parquet directory + :return: dataframe with two file size columns and a multiindex reflecting + the Parquet path structure. + """ + hdbg.dassert_dir_exists(path) + # Remove the trailing slash to simplify downstream accounting. + if path.endswith("/"): + path = path[:-1] + hdbg.dassert(not path.endswith("/")) + # Walk the path. + # os.walk() yields a 3-tuple of the form + # (dirpath: str, dirnames: List[str], filenames: List[str]) + start_depth = len(path.split("/")) + headers_set = set() + dict_ = collections.OrderedDict() + for triple in os.walk(path): + # If the walk has taken us to, e.g., + # asset_id=100/year=2010/month=1/data.parquet + # then we expect + # lhs = ("asset_id", "year", "month") + # rhs = (100, 2010, 1) + lhs, rhs = _process_walk_triple(triple, start_depth) + # If the walkabout has not yet taken us to a file, continue. + if not lhs: + continue + # The tuple `lhs` is to become the index headers. We check later + # for uniqueness. + headers_set.add(lhs) + # Get the file name and full path. + file_name = triple[2][0] + file_path = os.path.join(triple[0], file_name) + # Record the size of the file. We keep this in bytes for easy + # join aggregations. + size_in_bytes = os.path.getsize(file_path) + dict_[rhs] = size_in_bytes + # Ensure that headers are unambiguous. + hdbg.dassert_eq(len(headers_set), 1) + # Convert to a multiindexed dataframe. + df = pd.DataFrame(dict_.values(), index=dict_.keys()) + df.rename(columns={0: "file_size_in_bytes"}, inplace=True) + headers = headers_set.pop() + df.index.names = headers + df.sort_index(inplace=True) + # Add a more human-readable file size column. Keep the original numerical + # one for downstream aggregations. + file_size = df["file_size_in_bytes"].apply(hintros.format_size) + df["file_size"] = file_size + return df + + +# ############################################################################# + +# A Parquet filtering condition. e.g., `("year", "=", year)` +ParquetFilter = Tuple[str, str, Any] +# The AND of Parquet filtering conditions, e.g., +# `[("year", "=", year), ("month", "=", month)]` +ParquetAndFilter = List[ParquetFilter] +# A OR-AND Parquet filtering condition, e.g., +# ``` +# [[('year', '=', 2020), ('month', '=', 1)], +# [('year', '=', 2020), ('month', '=', 2)], +# [('year', '=', 2020), ('month', '=', 3)]] +# ``` +ParquetOrAndFilter = List[ParquetAndFilter] + + +# TODO(gp): @Nikola add light unit tests for `by_year_week` and for additional_filter. +# TODO(gp): Can we return a single type? +def get_parquet_filters_from_timestamp_interval( + partition_mode: str, + start_timestamp: Optional[pd.Timestamp], + end_timestamp: Optional[pd.Timestamp], + *, + additional_filters: Optional[List[ParquetFilter]] = None, +) -> Union[ParquetOrAndFilter, ParquetAndFilter]: + """ + Convert a constraint on a timestamp [start_timestamp, end_timestamp] into a + Parquet filters expression, based on the passed partitioning / tiling + criteria. + + :param partition_mode: control filtering of Parquet datasets. It needs to be + in sync with the way the data was saved + :param start_timestamp: start of the interval. `None` means no bound + :param end_timestamp: end of the interval. `None` means no bound + :param additional_filters: AND conditions to add to the final filter. + E.g., if we want to constraint also on `exchange_id` and 'currency_pair`, + we can specify + `[("exchange_id", "in", (...)),("currency_pair", "in", (...))]` + :return: list of OR-AND predicates + """ + # Check timestamp interval. + left_close = True + right_close = True + hdateti.dassert_is_valid_interval( + start_timestamp, + end_timestamp, + left_close=left_close, + right_close=right_close, + ) + or_and_filter = [] + if partition_mode == "by_year_month": + # Handle the first and last year of the interval. + if start_timestamp: + # `[('year', '==', 2020), ('month', '>=', 6)]` + and_filter = [ + ("year", "==", start_timestamp.year), + ("month", ">=", start_timestamp.month), + ] + or_and_filter.append(and_filter) + if end_timestamp: + # `[('year', '==', 2021), ('month', '<=', 3)]` + and_filter = [ + ("year", "==", end_timestamp.year), + ("month", "<=", end_timestamp.month), + ] + or_and_filter.append(and_filter) + if start_timestamp and end_timestamp: + number_of_years = len( + range(start_timestamp.year, end_timestamp.year + 1) + ) + if number_of_years == 1: + # For a one-year range, we overwrite the result with a single AND + # statement, e.g., `[Jan 2020, Mar 2020]` corresponds to + # `[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 3)]]`. + # Note that this interval is different from and OR-AND form as + # `[[('year', '==', 2020), ('month', '>=', 1)], + # [('year', '==', 2020), ('month', '<=', 3)]]` + # since the first AND clause include months <= 3 and the second one + # include months >= 1, and the OR corresponds to the entire year, + # instead of the interval `[Jan 2020, Mar 2020]`. + and_filter = [ + ("year", "==", start_timestamp.year), + ("month", ">=", start_timestamp.month), + ("month", "<=", end_timestamp.month), + ] + or_and_filter = [and_filter] + elif number_of_years > 2: + # For ranges over two years, one OR statement is necessary to bridge + # the gap between first and last AND statement. + # `[('year', '>', 2020), ('year', '<', 2023)]` + # Inserted in middle as bridge between AND statements. + and_filter = [ + ("year", ">", start_timestamp.year), + ("year", "<", end_timestamp.year), + ] + or_and_filter.insert(1, and_filter) + else: + # For intervals of exactly two years the two AND conditions are + # enough to select the desired period of time. + pass + elif len(or_and_filter) == 1: + # Handle the case when exactly one of the interval bounds is passed, + # e.g., [June 2020, None]. + # In this case the first year was covered by the code above (i.e., + # `year >= 2020 and month == 6`) and we need to specify the rest of + # the years (i.e., `year > 2020`). + operator = ">" if start_timestamp else "<" + timestamp = start_timestamp if start_timestamp else end_timestamp + hdbg.dassert_is_not(timestamp, None, "timestamp should not be None") + extra_filter = [("year", operator, timestamp.year)] + or_and_filter.append(extra_filter) + else: + # If there is no interval provided, leave empty `or_and_filter` as is. + pass + elif partition_mode == "by_year_week": + # TODO(gp): Consider using the same approach above for months also here. + # Partition by year and week. + hdbg.dassert_is_not( + end_timestamp, + None, + "Parquet backend can't determine the boundaries of the data", + ) + # Include last week in the interval. + end_timestamp += pd.DateOffset(weeks=1) + # Get all weeks in the interval. + hdbg.dassert_is_not( + start_timestamp, + None, + "start_timestamp should not be None for by_year_week partition mode", + ) + dates = pd.date_range( + start_timestamp.date(), end_timestamp.date(), freq="W" + ) + for date in dates: + year = date.year + # https://docs.python.org/3/library/datetime.html#datetime.date.isocalendar + weekofyear = date.isocalendar().week + and_filter = [("year", "=", year), ("weekofyear", "=", weekofyear)] + or_and_filter.append(and_filter) + else: + raise ValueError(f"Unknown partition mode `{partition_mode}`!") + if additional_filters: + hdbg.dassert_isinstance(additional_filters, list) + if or_and_filter: + # Append additional filters for every present timestamp filter. + or_and_filter = [ + additional_filters + and_filter for and_filter in or_and_filter + ] + else: + # If no timestamp filters are provided, use additional filters. + or_and_filter = additional_filters + _LOG.debug("or_and_filter=%s", str(or_and_filter)) + if len(or_and_filter) == 0: + # Empty list is not acceptable value for pyarrow dataset. + # Only logical expression or `None`. + or_and_filter = None + return or_and_filter + + +def list_and_merge_pq_files( + root_dir: str, + *, + file_name: str = "data.parquet", + aws_profile: hs3.AwsProfile = None, + drop_duplicates_mode: Optional[str] = None, +) -> None: + """ + Merge all files of the Parquet dataset. + + Can be generalized to any used partition. + + The standard partition (also known as "by-tile") assumed is: + + ``` + root_dir/ + currency_pair=ADA_USDT/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ... + currency_pair=EOS_USDT/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ``` + + :param root_dir: root directory of Parquet dataset + :param file_name: name of the single resulting file + :param aws_profile: the name of an AWS profile or a s3fs filesystem + """ + if aws_profile is not None: + filesystem = hs3.get_s3fs(aws_profile) + else: + filesystem = None + # Get full paths to each Parquet file inside root dir. + if filesystem: + # Use specialized S3 filesystem function to list Parquet files efficiently. + # since glob.glob() is very slow as it does a lot of accesses to S3. + # The extra `**/*` is needed by `pyarrow` >= 17. + parquet_files = filesystem.glob(f"{root_dir}/**/*.parquet") + else: + # For local filesystem, use glob.glob + parquet_files = glob.glob(f"{root_dir}/**/*.parquet", recursive=True) + _LOG.debug("Parquet files: '%s'", parquet_files) + # Get paths only to the lowest level of dataset folders. + dataset_folders = {f.rsplit("/", 1)[0] for f in parquet_files} + for folder in dataset_folders: + # Get files per folder and merge if there are multiple ones. + if filesystem: + # Use specialized S3 filesystem function to list Parquet files efficiently. + folder_files = filesystem.ls(folder) + else: + # For local filesystem, use os.listdir + folder_files = [os.path.join(folder, f) for f in os.listdir(folder)] + hdbg.dassert_ne( + len(folder_files), 0, msg=f"Empty folder `{folder}` detected!" + ) + if len(folder_files) == 1 and folder_files[0].endswith("/data.parquet"): + # If there is already single `data.parquet` file, no action is required. + continue + # Read all files in target folder. + # `partitioning=None` is required to read the dataset without + # partitioning columns. See CmTask7324 for details. + # https://github.com/cryptokaizen/cmamp/issues/7324 + data = pq.ParquetDataset( + folder_files, filesystem=filesystem, partitioning=None + ).read() + data = data.to_pandas() + # Drop duplicates on all non-metadata columns. + # TODO(gp): hparquet is general and we should pass the columns to remove + # or perform the transform after. + if drop_duplicates_mode is None: + duplicate_columns = data.columns.to_list() + for col_name in ["knowledge_timestamp", "end_download_timestamp"]: + if col_name in duplicate_columns: + duplicate_columns.remove(col_name) + control_column = None + elif drop_duplicates_mode == "bid_ask": + # Drop duplicates on timestamp index. + duplicate_columns = ["timestamp", "exchange_id"] + control_column = None + elif drop_duplicates_mode == "ohlcv": + # Drop duplicates on timestamp and keep one with largest volume. + duplicate_columns = ["timestamp", "exchange_id"] + control_column = "volume" + else: + hdbg.dfatal("Supported drop duplicates modes: ohlcv, bid_ask") + data = hdatafr.remove_duplicates(data, duplicate_columns, control_column) + # Remove all old files and write the new, merged one. + if filesystem: + filesystem.rm(folder, recursive=True) + pq.write_table( + pa.Table.from_pandas(data), + folder + "/" + file_name, + filesystem=filesystem, + ) + else: + # Use os.remove for local filesystem to remove files. + for file_path in folder_files: + os.remove(file_path) + data.to_parquet(os.path.join(folder, file_name)) + + +def maybe_cast_to_int(string: str) -> Union[str, int]: + """ + Return `string` as an `int` if convertible, otherwise a no-op. + + This is useful for parsing mixed-type dataframe columns that may + contain strings and ints. For example, a dataframe with columns + `feature1, feature2, 1, 2, 3` will be written and read back with + columns `1`, `2`, `3` as the strings "1", "2", "3" rather than the + ints. This function can be used to rectify that in a post-processing + column rename. + """ + hdbg.dassert_isinstance(string, str) + try: + val = int(string) + except ValueError: + val = string + return val diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py new file mode 100644 index 000000000..0ba179142 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py @@ -0,0 +1,1151 @@ +""" +Import as: + +import helpers.hparser as hparser +""" + +import argparse +import logging +import os +import sys +from typing import Any, Dict, List, Optional, Tuple, Union + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# TODO(gp): arg -> args + + +# ############################################################################# + + +def add_bool_arg( + parser: argparse.ArgumentParser, + name: str, + *, + default_value: bool = False, + help_: Optional[str] = None, +) -> argparse.ArgumentParser: + """ + Add options to a parser like `--xyz` and `--no_xyz`, controlled by + `args.xyz`. + + E.g., `add_bool_arg(parser, "run_diff_script", default_value=True)` adds + two options: + ``` + --run_diff_script Run the diffing script or not + --no_run_diff_script + ``` + corresponding to `args.run_diff_script`, where the default behavior is to have + that value equal to True unless one specifies `--no_run_diff_script`. + """ + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument("--" + name, dest=name, action="store_true", help=help_) + group.add_argument("--no_" + name, dest=name, action="store_false") + parser.set_defaults(**{name: default_value}) + return parser + + +# ############################################################################# + + +def add_verbosity_arg( + parser: argparse.ArgumentParser, *, log_level: str = "INFO" +) -> argparse.ArgumentParser: + parser.add_argument( + "-v", + dest="log_level", + default=log_level, + # TRACE=5 + # DEBUG=10 + # INFO=20 + # WARNING=30 + # CRITICAL=50 + choices=["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set the logging level", + ) + parser.add_argument( + "--no_report_command_line", + action="store_true", + help="Disable printing of executed commands", + ) + return parser + + +# TODO(gp): Use this everywhere. +def parse_verbosity_args( + args: argparse.Namespace, *args_: Any, **kwargs: Any +) -> None: + if hasattr(args, "no_report_command_line") and args.no_report_command_line: + report_command_line = False + else: + report_command_line = True + kwargs["report_command_line"] = report_command_line + # if args.log_level == "VERB_DEBUG": + # args.log_level = 5 + hdbg.init_logger(verbosity=args.log_level, *args_, **kwargs) + + +# ############################################################################# +# Command line for `@hcache_simple.simple_cache` functions. +# ############################################################################# + + +# TODO(gp): Use the ones from hcache_simple.py for DRY. +_CACHE_MODE_CHOICES = ("REFRESH_CACHE", "DISABLE_CACHE", "HIT_CACHE_OR_ABORT") + + +def add_cache_control_arg( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add `--cache_mode` switch controlling every + `@hcache_simple.simple_cache`-decorated function in the process. + + The resolved mode is applied globally via + `hcache_simple.set_global_cache_mode` in `parse_cache_control_args()`. + """ + parser.add_argument( + "--cache_mode", + action="store", + default=None, + choices=list(_CACHE_MODE_CHOICES), + help=( + "Override cache behavior for all @simple_cache functions. " + "REFRESH_CACHE repopulates, DISABLE_CACHE bypasses, " + "HIT_CACHE_OR_ABORT raises on miss." + ), + ) + parser.add_argument( + "--cache_debug", + action="store_true", + help=( + "Log at WARNING level for every @simple_cache call whether the " + "result was served from cache, computed on miss, or recomputed " + "because of `cache_mode`" + ), + ) + return parser + + +def parse_cache_control_args(args: argparse.Namespace) -> None: + """ + Apply `--cache_mode`, `--cache_debug` by setting the `hcache_simple` + process-wide globals. + """ + # Import lazily to avoid a circular dependency at module load time. + import helpers.hcache_simple as hcacsimp + + mode = getattr(args, "cache_mode", None) + if mode is not None: + _LOG.info("Setting global cache_mode=%s", mode) + hcacsimp.set_global_cache_mode(mode) + cache_debug = bool(getattr(args, "cache_debug", False)) + if cache_debug: + _LOG.info("Enabling cache_debug logging") + hcacsimp.set_cache_debug(cache_debug) + + +# ############################################################################# +# Command line options for handling the destination dir. +# ############################################################################# + + +def add_dst_dir_arg( + parser: argparse.ArgumentParser, + dst_dir_required: bool, + dst_dir_default: Optional[str] = None, +) -> argparse.ArgumentParser: + """ + Add command line options related to destination dir. + + E.g., `--dst_dir`, `--clean_dst_dir` + """ + # TODO(gp): Add unit test to check this. + # A required dst_dir implies no default dst_dir. + hdbg.dassert_imply( + dst_dir_required, + not dst_dir_default, + "Since dst_dir_required='%s', you need to specify a default " + "destination dir, instead of dst_dir_default='%s'", + dst_dir_required, + dst_dir_default, + ) + # If dst_dir is not required, then a default dst_dir must be specified. + hdbg.dassert_imply( + not dst_dir_required, + dst_dir_default, + "Since dst_dir_required='%s', you can't specify a default " + "destination dir, dst_dir_default='%s'", + dst_dir_required, + dst_dir_default, + ) + parser.add_argument( + "--dst_dir", + action="store", + default=dst_dir_default, + required=dst_dir_required, + help="Directory storing the results", + ) + parser.add_argument( + "--clean_dst_dir", + action="store_true", + help="Delete the destination dir before running", + ) + parser.add_argument( + "--no_confirm", + action="store_true", + help="Do not confirm before deleting dst dir", + ) + return parser + + +def parse_dst_dir_arg(args: argparse.Namespace) -> Tuple[str, bool]: + """ + Process the command line options related to destination dir. + + :return: a tuple (dst_dir, clean_dst_dir) + - dst_dir: the destination dir + - clean_dst_dir: whether to clean the destination dir or not + """ + dst_dir = args.dst_dir + _LOG.debug("dst_dir=%s", dst_dir) + # TODO(Dan): Fix `clean_dst_dir` usage since it is always `False` now. + clean_dst_dir = False + if args.clean_dst_dir: + _LOG.info("Cleaning dst_dir='%s'", dst_dir) + if os.path.exists(dst_dir): + _LOG.warning("Dir '%s' already exists", dst_dir) + if not args.no_confirm: + hsystem.query_yes_no( + f"Do you want to delete the dir '{dst_dir}'", + abort_on_no=True, + ) + hio.create_dir(dst_dir, incremental=False) + hio.create_dir(dst_dir, incremental=True) + _LOG.debug("clean_dst_dir=%s", clean_dst_dir) + return dst_dir, clean_dst_dir + + +# ############################################################################# +# Command line options related to selection actions. +# ############################################################################# + + +def add_action_arg( + parser: argparse.ArgumentParser, + valid_actions: List[str], + default_actions: Optional[List[str]], +) -> argparse.ArgumentParser: + """ + Add command line options to select actions to execute, skip, or enable. + + The function creates a mutually exclusive group with three options: + - `-a/--action`: specify exact actions to execute + - `-sa/--skip_action`: skip specific actions from default set + - `-e/--enable`: enable additional actions on top of defaults + + Available actions are listed once in the help epilog to avoid repetition. + + :param parser: parser to add the option to + :param valid_actions: list of valid actions + :param default_actions: list of default actions to execute + :return: parser with the option added + """ + # Add epilog with list of available actions to avoid repeating them. + actions_list = ", ".join(valid_actions) + if parser.epilog: + parser.epilog += f"\n\nAvailable actions: {actions_list}" + else: + parser.epilog = f"Available actions: {actions_list}" + # Create mutually exclusive group for action selection. + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "-a", + "--action", + action="append", + dest="action", + help="Actions to execute (see available actions below)", + ) + group.add_argument( + "-sa", + "--skip_action", + action="append", + dest="skip_action", + help="Actions to skip from default set (see available actions below)", + ) + group.add_argument( + "-e", + "--enable", + action="append", + dest="enable_action", + help="Enable additional actions on top of defaults (see available actions below)", + ) + if default_actions is not None: + hdbg.dassert_is_subset(default_actions, valid_actions) + parser.add_argument( + "--all", + action="store_true", + help=f"Run all the actions ({' '.join(default_actions)})", + ) + return parser + + +def actions_to_string( + actions: List[str], valid_actions: List[str], add_frame: bool +) -> str: + """ + Convert a list of actions to a string. + + :param actions: list of actions to convert + :param valid_actions: list of valid actions + :param add_frame: if `True`, add a frame around the actions + :return: string of the actions + """ + space = max(len(a) for a in valid_actions) + 2 + format_ = "%" + str(space) + "s: %s" + actions = [ + format_ % (a, "Yes" if a in actions else "-") for a in valid_actions + ] + actions_as_str = "\n".join(actions) + if add_frame: + ret = hprint.frame("# Action selected:") + "\n" + ret += hprint.indent(actions_as_str) + else: + ret = actions_as_str + return ret # type: ignore + + +def select_actions( + args: argparse.Namespace, + valid_actions: List[str], + default_actions: List[str], +) -> List[str]: + """ + Select actions based on the command line arguments. + + Supports three mutually exclusive modes: + - `--action`: run only specified actions + - `--skip_action`: run default actions minus specified ones + - `--enable`: run default actions plus specified additional ones + + :param args: command line arguments + :param valid_actions: list of valid actions + :param default_actions: list of default actions to execute + :return: list of selected actions + """ + hdbg.dassert( + not (args.action and args.all), + "You can't specify together --action and --all", + ) + hdbg.dassert( + not (args.action and args.skip_action), + "You can't specify together --action and --skip_action", + ) + # TODO(ai_gp): Is this still needed? + # Check for enable_action attribute (added for backward compatibility). + has_enable = hasattr(args, "enable_action") + if has_enable: + hdbg.dassert( + not (args.action and args.enable_action), + "You can't specify together --action and --enable", + ) + hdbg.dassert( + not (args.skip_action and args.enable_action), + "You can't specify together --skip_action and --enable", + ) + # Select actions. + if not args.action or args.all: + if default_actions is None: + default_actions = valid_actions[:] + hdbg.dassert_is_subset(default_actions, valid_actions) + # Convert it into list since through some code paths it can be a tuple. + actions = list(default_actions) + else: + # Validate actions specified by user. + for action in args.action: + hdbg.dassert_in( + action, + valid_actions, + "Invalid action '%s'", + action, + ) + actions = args.action[:] + hdbg.dassert_isinstance(actions, list) + hdbg.dassert_no_duplicates(actions) + # Remove actions, if needed. + if args.skip_action: + hdbg.dassert_isinstance(args.skip_action, list) + for skip_action in args.skip_action: + # Validate that skip_action is a valid action. + hdbg.dassert_in( + skip_action, + valid_actions, + "Invalid action '%s'", + skip_action, + ) + # Validate that skip_action is in the current action list. + if skip_action not in actions: + _LOG.warning( + "Skipping action '%s' since it's already not in actions='%s'", + skip_action, + actions, + ) + actions = [a for a in actions if a != skip_action] + # Add enabled actions on top of defaults. + if has_enable and args.enable_action: + hdbg.dassert_isinstance(args.enable_action, list) + for enable_action in args.enable_action: + hdbg.dassert_in( + enable_action, + valid_actions, + "Invalid action '%s'", + enable_action, + ) + if enable_action not in actions: + actions.append(enable_action) + # Reorder actions according to 'valid_actions'. + actions = [action for action in valid_actions if action in actions] + return actions + + +def mark_action( + action: str, actions: Optional[List[str]] +) -> Tuple[bool, Optional[List[str]]]: + """ + Mark an action as to be executed or skipped. + + :param action: action to mark + :param actions: list of actions, or None to execute all actions + :return: tuple of (to_execute, actions) + """ + if actions is None: + # If actions is None, execute all actions. + to_execute = True + else: + to_execute = action in actions + _LOG.debug("\n%s", hprint.frame(f"action={action}")) + if to_execute: + if actions is not None: + actions = [a for a in actions if a != action] + else: + _LOG.warning("Skip action='%s'", action) + return to_execute, actions + + +# ############################################################################# +# Command line options for input/output processing. +# ############################################################################# + +# For non-dockerized scripts the following idiom is used: +# +# ```python +# # Add input/output arguments to parser. +# hparser.add_input_output_args(parser) +# # Handle input/output arguments, including stdin/stdout. +# in_file_name, out_file_name = hparser.parse_input_output_args(args) +# ... +# # Read input file, handling stdin. +# in_lines = hparser.from_file(in_file_name) +# ... +# # Write output, handling stdout. +# hparser.to_file(txt, out_file_name) +# ``` +# See helpers_root/dev_scripts_helpers/coding_tools/transform_template.py as an +# example. + +# For dockerized scripts the following idiom is used inside the wrapper, which +# calls the dockerized script: +# +# ```python +# # Add input/output arguments to parser. +# hparser.add_input_output_args(parser) +# # Handle input/output arguments, including stdin/stdout. +# in_file_name, out_file_name = hparser.parse_input_output_args(args) +# tmp_in_file_name, tmp_out_file_name = hparser.adapt_input_output_args_for_dockerized_scripts( +# in_file_name, "llm_transform") +# ... +# # For stdin/stdout, suppress the output of the container. +# suppress_output = in_file_name == "-" or out_file_name == "-" +# _run_dockerized_llm_transform( +# tmp_in_file_name, +# cmd_line_opts, +# tmp_out_file_name, +# mode="system", +# force_rebuild=args.dockerized_force_rebuild, +# use_sudo=args.dockerized_use_sudo, +# suppress_output=suppress_output, +# ) +# ... +# # Write output, handling stdout. +# hparser.to_file(txt, out_file_name) +# ``` +# +# See helpers_root/dev_scripts_helpers/llms/llm_transform.py as an example. + + +def add_input_output_args( + parser: argparse.ArgumentParser, + *, + in_default: Optional[str] = None, + in_required: bool = True, + out_default: Optional[str] = None, + out_required: bool = False, +) -> argparse.ArgumentParser: + """ + Add options to parse input and output file name, and handle stdin / stdout. + + :param in_default: default file to be used for input + - If `None`, it must be specified by the user + :param in_required: whether the input file is required + :param out_default: default file to be used for output + - If `None`, it must be specified by the user + :param out_required: whether the output file is required + """ + parser.add_argument( + "-i", + "--input", + dest="input", + required=in_required, + type=str, + default=in_default, + help="Input file or `-` for stdin", + ) + parser.add_argument( + "-o", + "--output", + dest="output", + required=out_required, + type=str, + default=out_default, + help="Output file or `-` for stdout", + ) + return parser + + +def parse_input_output_args( + args: argparse.Namespace, *, clear_screen: bool = False +) -> Tuple[str, str]: + """ + Parse input and output file name, handling stdin / stdout. + + :return input and output file name. + """ + in_file_name = args.input + out_file_name = args.output + if out_file_name is None: + # If the output file is not specified, use the input file name, i.e., + # in place. + out_file_name = in_file_name + # Print summary. If we are using stdin / stdout, don't print anything since + # we don't want to pollute the output. + if in_file_name != "-": + if clear_screen: + os.system("clear") + _LOG.info(hprint.to_str("in_file_name")) + _LOG.info(hprint.to_str("out_file_name")) + + return in_file_name, out_file_name + + +def init_logger_for_input_output_transform( + args: argparse.Namespace, *, verbose: bool = True +) -> None: + """ + Initialize the logger when input/output transformation is used. + + :param verbose: if `False`, set the log level to `CRITICAL` so that no + output is printed and avoid to print: + ``` + 09:34:24 - INFO hdbg.py init_logger:1013 Saving log to file '/User... + 09:34:24 - INFO hdbg.py init_logger:1018 > cmd='/Users/saggese/src... + 09:34:24 - INFO hparser.py parse_input_output_args:368 in_file_name='lectures_source/Les... + 09:34:24 - INFO hparser.py parse_input_output_args:369 out_file_name='-' + ``` + """ + verbosity = args.log_level + if not verbose: + # Unless user has specified DEBUG level, set the log level to `CRITICAL` + # so that no output is printed. + if args.log_level == "INFO": + verbosity = "CRITICAL" + else: + # If the input is stdin, we don't want to print the command line or any + # other log messages, unless the user specified a more verbose log level. + if args.input == "-": + if args.log_level == "INFO": + verbosity = "CRITICAL" + else: + print("cmd line: " + hdbg.get_command_line()) + hdbg.init_logger(verbosity=verbosity, use_exec_path=True, force_white=False) + + +def from_file(file_name: str) -> List[str]: + """ + Read file or stdin (represented by `-`), returning an array of lines. + + If file_name is "pb" and the platform is macOS, read from clipboard. + """ + if file_name == "-": + _LOG.info("Reading from stdin") + # Read. + txt = [] + for line in sys.stdin: + txt.append(line.rstrip("\n")) + elif file_name == "pb": + # Read from clipboard (macOS only). + if hserver.is_host_mac(): + _LOG.info("Reading from clipboard") + cmd = "pbpaste" + rc, txt_str = hsystem.system_to_string(cmd) + txt = txt_str.split("\n") + else: + hdbg.dfatal("Reading from clipboard (pb) only works on macOS") + else: + txt = hio.from_file(file_name) + txt = txt.split("\n") + return txt + + +def to_file(txt: Union[str, List[str]], file_name: str) -> None: + """ + Write txt in a file or stdout (represented by `-`). + + If file_name is "pb" and the platform is macOS, write to clipboard. + """ + if isinstance(txt, str): + txt = [txt] + if file_name == "-": + _LOG.debug("Saving to stdout") + print("\n".join(txt)) + elif file_name == "pb": + # Write to clipboard (macOS only). + if hserver.is_host_mac(): + _LOG.info("Writing to clipboard") + txt_str = "\n".join(txt) + # Use echo with pbcopy, escaping single quotes. + txt_str_escaped = txt_str.replace("'", "'\\''") + cmd = f"echo -n '{txt_str_escaped}' | pbcopy" + hsystem.system(cmd) + _LOG.info("Written to clipboard") + else: + hdbg.dfatal("Writing to clipboard (pb) only works on macOS") + else: + _LOG.debug("Saving to file") + with open(file_name, "w") as f: + f.write("\n".join(txt)) + _LOG.info("Written file '%s'", file_name) + + +def adapt_input_output_args_for_dockerized_scripts( + in_file_name: str, tag: str +) -> Tuple[str, str]: + """ + Adapt input and output file name for dockerized scripts. + + Since we need to call a container and passing stdin/stdout is tricky, + we read the input and save it in a temporary file. + + :param tag: tag to be used for the temporary file name (e.g., `llm_transform`) + """ + # Since we need to call a container and passing stdin/stdout is tricky, + # we read the input and save it in a temporary file. + in_lines = from_file(in_file_name) + if in_file_name == "-": + tmp_in_file_name = f"tmp.{tag}.in.txt" + in_txt = "\n".join(in_lines) + hio.to_file(tmp_in_file_name, in_txt) + else: + tmp_in_file_name = in_file_name + # + tmp_out_file_name = f"tmp.{tag}.out.txt" + return tmp_in_file_name, tmp_out_file_name + + +# ############################################################################# +# Command line options for parallel processing. +# ############################################################################# + + +# pylint: disable=line-too-long +# TODO(gp): These should go in hjoblib.py +def add_parallel_processing_arg( + parser: argparse.ArgumentParser, + *, + num_threads_default: Optional[str] = None, +) -> argparse.ArgumentParser: + """ + Add parallel processing args. + + The "incremental idiom" means skipping processing computation that has + already been performed. E.g., if we need to transform files from one dir to + another we skip the files already processed (assuming that a file present + in the destination dir is an indication that it has already been + processed). + + The default behavior should always be incremental since "incremental mode" + is not destructive like the non-incremental, i.e., delete and restart + + The incremental behavior is disabled with `--no_incremental`. This implies + performing the computation in any case + - It is often implemented by deleting the destination dir and then running + again, even in incremental mode + - If the destination dir already exists, then we require the user to + explicitly use `--force` to confirm that the user knows what is doing + """ + parser.add_argument( + "--dry_run", + action="store_true", + help="Print the workload and exit without running it", + ) + parser.add_argument( + "--no_incremental", + action="store_true", + help="Skip workload already performed", + ) + parser.add_argument( + "--force", + action="store_true", + help="Confirm that one wants to remove the previous results. It works only together with --no_incremental", + ) + # + help = """ + Number of threads to use: + - '-1' to use all CPUs; + - '1' to use one-thread at the time but using the parallel execution (mainly used + for debugging) + - 'serial' to serialize the execution without using parallel execution""" + if num_threads_default is None: + parser.add_argument( + "--num_threads", + action="store", + help=help, + required=True, + ) + else: + parser.add_argument( + "--num_threads", + action="store", + help=help, + default=num_threads_default, + ) + parser.add_argument("--no_keep_order", action="store_true", help="") + parser.add_argument( + "--num_func_per_task", + action="store", + type=int, + default=None, + help="Number of function execute in a (parallel) task of the workload. `None` means automatically decided by the function", + ) + parser.add_argument( + "--skip_on_error", + action="store_true", + help="Continue execution after encountering an error", + ) + parser.add_argument( + "--num_attempts", + default=1, + type=int, + help="Repeat running an experiment up to `num_attempts` times", + required=False, + ) + return parser + + +def create_incremental_dir(dst_dir: str, args: argparse.Namespace) -> None: + """ + Create a dir using the "incremental idiom". + + If the dir already exists and the user requested the not + incremental, we require `--force` to confirm deleting the dir. + """ + if args.force: + hdbg.dassert( + args.no_incremental, "--force only works with --no_incremental" + ) + _LOG.debug(hprint.to_str("dst_dir args")) + if args.no_incremental: + # Create the dir from scratch. + _LOG.debug("No incremental mode") + if os.path.exists(dst_dir): + _LOG.debug("Dir '%s' already exists", dst_dir) + hdbg.dassert_dir_exists(dst_dir, "'%s' must be a directory") + if not args.force: + _LOG.warning( + "The directory '%s' already exists. To confirm deleting it use --force", + dst_dir, + ) + sys.exit(-1) + _LOG.warning("Deleting %s", dst_dir) + hio.create_dir(dst_dir, incremental=False) + else: + _LOG.debug("Incremental mode") + hio.create_dir(dst_dir, incremental=True) + + +# ############################################################################# +# Command line options for metadata output. +# ############################################################################# + + +def add_json_output_metadata_args( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add arguments related to storing the output metadata from a script. + + This data can be read / used by other scripts to post-process a + script results. + """ + parser.add_argument( + "--json_output_metadata", + type=str, + action="store", + help="File storing the output metadata of this script in JSON format", + ) + return parser + + +# Store the metadata about the output of a script. +OutputMetadata = Dict[str, str] + + +def process_json_output_metadata_args( + args: argparse.Namespace, + output_metadata: OutputMetadata, +) -> Optional[str]: + """ + Save the output metadata according to the command line options. + + :return: file name with the output metadata + """ + hdbg.dassert_isinstance(output_metadata, dict) + if args.json_output_metadata is None: + return None + file_name: str = args.json_output_metadata + _LOG.info("Saving output metadata into file '%s'", file_name) + if not file_name.endswith(".json"): + _LOG.warning( + "The output metadata file '%s' doesn't end in .json: adding it", + file_name, + ) + file_name += ".json" + hio.to_json(file_name, output_metadata) + _LOG.info("Saved output metadata into file '%s'", file_name) + return file_name + + +def read_output_metadata(output_metadata_file: str) -> OutputMetadata: + """ + Read the output metadata. + """ + output_metadata: OutputMetadata = hio.from_json(output_metadata_file) + return output_metadata + + +def str_to_bool(value: str) -> bool: + """ + Convert string representing true or false to the corresponding bool. + """ + if value.lower() == "true": + ret = True + elif value.lower() == "false": + ret = False + else: + raise argparse.ArgumentTypeError( + f"Invalid boolean value {value}. Use 'true' or 'false'." + ) + return ret + + +# ############################################################################# +# Command line options for dockerized scripts. +# ############################################################################# + + +def add_dockerized_script_arg( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add common command line arguments for dockerized scripts. + """ + parser.add_argument( + "--dockerized_force_rebuild", + action="store_true", + help="Force to rebuild the Docker container", + ) + parser.add_argument( + "--dockerized_use_sudo", + action="store_true", + help="Use sudo inside the container", + ) + return parser + + +def add_llm_prompt_arg( + parser: argparse.ArgumentParser, + *, + default_prompt: str = "", + is_required: bool = True, +) -> argparse.ArgumentParser: + """ + Add common command line arguments for `*llm_transform.py` scripts. + + :param default_prompt: default prompt to use + :param is_required: whether the prompt is required + :return: parser with the option added + """ + parser.add_argument( + "--debug", + action="store_true", + help="Print before/after the transform", + ) + if default_prompt != "": + is_required = False + parser.add_argument( + "-p", + "--prompt", + required=is_required, + type=str, + help="Prompt to apply", + default=default_prompt, + ) + parser.add_argument( + "-f", + "--fast_model", + action="store_true", + help="Use a fast LLM model vs a high-quality one", + ) + return parser + + +# ############################################################################# +# Command line options for limit range processing. +# ############################################################################# + + +def add_limit_range_arg( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add argument for limiting processing to a range of items. + + The range format is X:Y where X and Y are 1-indexed integers. + """ + parser.add_argument( + "--limit", + action="store", + help="Limit processing to item range X:Y (integers >= 1, inclusive)", + ) + return parser + + +def parse_limit_range(limit_str: str) -> Tuple[int, int]: + """ + Parse limit string in format "X:Y" and return tuple (start, end). + + :param limit_str: string in format "X:Y" where X and Y are integers >= 1 + :return: tuple in [start_index, end_index] + """ + hdbg.dassert( + ":" in limit_str, "Limit format must be X:Y, got: %s", limit_str + ) + parts = limit_str.split(":") + hdbg.dassert_eq( + len(parts), 2, "Limit format must be X:Y, got: %s", limit_str + ) + try: + start = int(parts[0]) + end = int(parts[1]) + except ValueError as e: + hdbg.dfatal("Invalid limit format, must be integers: %s" % str(e)) + hdbg.dassert_lte(1, start, "Start index must be >= 1, got: %s", start) + hdbg.dassert_lte(1, end, "End index must be >= 1, got: %s", end) + hdbg.dassert_lte( + start, end, "Start index must be <= end index, got: %s:%s", start, end + ) + return start, end + + +def parse_limit_range_args( + args: argparse.Namespace, +) -> Optional[Tuple[int, int]]: + """ + Parse limit range from command line arguments and log the result. + + :param args: parsed command line arguments containing 'limit' + attribute + :return: tuple of (start_index, end_index) as 0-indexed integers, or + None if no limit + """ + limit_range = None + if args.limit: + limit_range = parse_limit_range(args.limit) + _LOG.warning( + "Using limit range: [%s:%s]", limit_range[0], limit_range[1] + ) + return limit_range + + +def apply_limit_range( + items: List[Any], + limit_range: Optional[Tuple[int, int]] = None, + *, + item_name: str = "items", +) -> List[Any]: + """ + Apply limit range filtering to a list of items. + + :param items: list of items to filter + :param limit_range: optional tuple (start, end) for 0-indexed range + filtering + :param item_name: name of items for logging purposes + :return: filtered list of items + """ + if limit_range is not None: + start_idx, end_idx = limit_range + total_items = len(items) + hdbg.dassert_lt( + start_idx, + total_items, + "Start index %s exceeds available %s %s", + start_idx, + item_name, + total_items, + ) + hdbg.dassert_lt( + end_idx, + total_items, + "End index %s exceeds available %s %s", + end_idx, + item_name, + total_items, + ) + items = items[start_idx : end_idx + 1] + _LOG.warning( + "Found %s %s, limited to range %s:%s (%s %s)", + total_items, + item_name, + start_idx, + end_idx, + len(items), + item_name, + ) + else: + _LOG.info("Found %s %s to process", len(items), item_name) + # Print the items that will be processed. + _LOG.debug("Items to process:") + for i, item in enumerate(items): + _LOG.debug(" [%s]: %s", i, item) + return items + + +# ############################################################################# +# Command line options for multiple file input. +# ############################################################################# + + +def add_multi_file_args( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add command line options for specifying multiple input files. + + Three mutually exclusive methods are supported: + - `--files="file1,file2,..."`: comma-separated list of files + - `--from_files="file.txt"`: file containing one file per line + - `--input file1 --input file2`: repeated argument + + These options work alongside the existing `-i/--input` for backward + compatibility. + + :param parser: parser to add the options to + :return: parser with the options added + """ + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "--files", + type=str, + help="Comma-separated list of files to process (e.g., 'file1.txt,file2.txt,file3.txt')", + ) + group.add_argument( + "--from_files", + type=str, + help="Path to file containing one file path per line", + ) + group.add_argument( + "-i", + "--input", + action="append", + help="File to process (can be specified multiple times)", + ) + return parser + + +def parse_multi_file_args( + args: argparse.Namespace, +) -> List[str]: + """ + Parse multi-file command line arguments and return list of file paths. + + Handles three input methods: + - `--files="file1,file2,..."`: comma-separated list + - `--from_files="file.txt"`: file containing one file per line + - `--input file1 --input file2`: repeated argument + + If none of the multi-file options are specified, falls back to the single + `-i/--input` argument for backward compatibility. + + :param args: parsed command line arguments + :return: list of file paths to process + """ + file_list: List[str] = [] + # Check which multi-file option was specified. + if hasattr(args, "files") and args.files: + # Parse comma-separated list. + _LOG.debug("Using --files option") + file_list = [f.strip() for f in args.files.split(",")] + # Remove empty strings. + file_list = [f for f in file_list if f] + elif hasattr(args, "from_files") and args.from_files: + # Read file containing list of files. + _LOG.debug("Using --from_files option") + hdbg.dassert_path_exists(args.from_files) + content = hio.from_file(args.from_files) + lines = content.split("\n") + for line in lines: + # Strip whitespace. + line = line.strip() + # Skip empty lines and comments. + if line and not line.startswith("#"): + file_list.append(line) + elif hasattr(args, "input") and args.input: + # Check if args.input is a list (from --input repeated argument) or a string (from -i/--input single file). + if isinstance(args.input, list): + # Use repeated argument from add_multi_file_args. + _LOG.debug("Using --input option (repeated argument)") + file_list = args.input + else: + # Backward compatibility: support single file via -i/--input from add_input_output_args. + _LOG.debug( + "Using -i/--input option (single file, backward compatibility)" + ) + file_list = [args.input] + else: + # No file specified. + hdbg.dfatal("No input files specified") + # Validate that we have at least one file. + hdbg.dassert_isinstance(file_list, list) + hdbg.dassert_lt( + 0, len(file_list), "No input files specified after parsing arguments" + ) + # Validate that all files exist. + for file_path in file_list: + hdbg.dassert_path_exists(file_path) + _LOG.info("Found %s file(s) to process", len(file_list)) + return file_list diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py new file mode 100644 index 000000000..e46fc8143 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py @@ -0,0 +1,253 @@ +""" +Pickle and JSON serialization/deserialization routines. + +Import as: + +import helpers.hpickle as hpickle +""" + +import gzip +import json +import logging +import marshal +import os +import pickle +import types +from typing import Any, Callable, Optional + +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hio as hio + +# TODO(Grisha): Can this module depend on hs3? +import helpers.hs3 as hs3 +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + + +def to_pickleable(obj: Any, force_values_to_string: bool) -> Any: + """ + Convert an object into an object with the same nested structure (e.g., + lists and dicts), but where all values are replaced with their pickleable + representations. + + :param obj: object to convert + :param force_values_to_string: if True, store all the object values + as strings + :return: pickleable object + """ + if isinstance(obj, list): + # Process list values recursively. + out = [to_pickleable(k, force_values_to_string) for k in obj] + elif isinstance(obj, tuple): + # Process tuple values recursively. + out = tuple([to_pickleable(k, force_values_to_string) for k in obj]) + elif isinstance(obj, dict): + # Process dict keys and values recursively. + out = {} + for k, v in obj.items(): + k = to_pickleable(k, force_values_to_string) + v = to_pickleable(v, force_values_to_string) + out[k] = v + elif hintros.is_iterable(obj): + # TODO(Grisha): is it ok that we convert any Iterable (e.g., set) to list? + # This means that input and output data types do not match. + # Process other iterable values recursively. + out = [to_pickleable(v, force_values_to_string) for v in obj] + else: + # We need to use try_and_catch mode because of CmTask7713. + if hintros.is_pickleable(obj, mode="try_and_catch"): + # Store a pickleable object. + if force_values_to_string: + # Store as string if specified. + out = str(obj) + else: + out = obj + else: + # Store a string representation of an unpickleable object. + out = str(obj) + return out + + +# ############################################################################# +# pickle +# ############################################################################# + + +def to_pickle( + obj: Any, + file_name: str, + *, + backend: str = "pickle", + log_level: int = logging.DEBUG, + aws_profile: Optional[hs3.AwsProfile] = None, +) -> None: + """ + Pickle object `obj` into file `file_name`. + + :param file_name: the file_name is not changed, but it is checked for + consistency with the backend (e.g., `pickle_gzip` needs a `.pkl.gz` + extension) + :param backend: pickle, dill, pickle_gzip + """ + hdbg.dassert_type_is(file_name, str) + hio.create_enclosing_dir(file_name, incremental=True) + with htimer.TimedScope(logging.DEBUG, f"Pickling to '{file_name}'") as ts: + # We assume that the user always specifies a .pkl extension and then we + # change the extension based on the backend. + if backend in ("pickle", "dill"): + hdbg.dassert_file_extension(file_name, "pkl") + if backend == "pickle": + # Use S3 file system. + if hs3.is_s3_path(file_name): + s3fs_ = hs3.get_s3fs(aws_profile) + with s3fs_.open(file_name, "wb") as s3_file: + pickler = pickle.Pickler( + s3_file, pickle.HIGHEST_PROTOCOL + ) + pickler.fast = True + pickler.dump(obj) + # Use local file system. + else: + with open(file_name, "wb") as fd: + pickler = pickle.Pickler(fd, pickle.HIGHEST_PROTOCOL) + pickler.fast = True + pickler.dump(obj) + elif backend == "dill": + import dill + + with open(file_name, "wb") as fd: + dill.dump(obj, fd) + else: + raise ValueError(f"Invalid backend='{backend}'") + elif backend == "pickle_gzip": + hdbg.dassert_file_extension(file_name, "pkl.gz") + with gzip.open(file_name, "wb") as zfd: + pickler = pickle.Pickler(zfd, pickle.HIGHEST_PROTOCOL) + pickler.fast = True + pickler.dump(obj) + else: + raise ValueError(f"Invalid backend='{backend}'") + # Report time and size. + if hs3.is_s3_path(file_name): + file_size = hs3.du(file_name, aws_profile=aws_profile, human_format=True) + else: + file_size = hintros.format_size(os.path.getsize(file_name)) + _LOG.log( + log_level, + "Saved '%s' (size=%s, time=%.1fs)", + file_name, + file_size, + ts.elapsed_time, + ) + + +def from_pickle( + file_name: str, + backend: str = "pickle", + *, + log_level: int = logging.DEBUG, + aws_profile: Optional[hs3.AwsProfile] = None, +) -> Any: + """ + Unpickle and return object stored in `file_name`. + """ + hdbg.dassert_isinstance(file_name, str) + with htimer.TimedScope( + logging.DEBUG, f"Unpickling from '{file_name}'" + ) as ts: + # We assume that the user always specifies a .pkl extension and then we + # change the extension based on the backend. + if backend in ("pickle", "dill"): + hdbg.dassert_file_extension(file_name, "pkl") + if backend == "pickle": + # Use S3 file system. + if hs3.is_s3_path(file_name): + s3fs_ = hs3.get_s3fs(aws_profile) + with s3fs_.open(file_name) as s3_file: + unpickler = pickle.Unpickler(s3_file) + obj = unpickler.load() + else: + with open(file_name, "rb") as fd: + unpickler = pickle.Unpickler(fd) + obj = unpickler.load() + elif backend == "dill": + import dill + + with open(file_name, "rb") as fd: + obj = dill.load(fd) + else: + raise ValueError(f"Invalid backend='{backend}'") + elif backend == "pickle_gzip": + hdbg.dassert_file_extension(file_name, "pkl.gz") + with gzip.open(file_name, "rb") as zfd: + unpickler = pickle.Unpickler(zfd) + obj = unpickler.load() + else: + raise ValueError(f"Invalid backend='{backend}'") + # Report time and size. + if hs3.is_s3_path(file_name): + file_size = hs3.du(file_name, aws_profile=aws_profile, human_format=True) + else: + file_size = hintros.format_size(os.path.getsize(file_name)) + _LOG.log( + log_level, + "Read '%s' (size=%s, time=%.1fs)", + file_name, + file_size, + ts.elapsed_time, + ) + return obj + + +# ############################################################################# + + +# TODO(gp): -> to_pickle_function +def pickle_function(func: Callable) -> str: + """ + Pickle a function into bytecode stored into a string. + + - return: string + """ + hdbg.dassert_callable(func) + hdbg.dassert(hasattr(func, "__code__")) + assert hasattr(func, "__code__") + code_as_bytes = marshal.dumps(func.__code__) + return code_as_bytes.decode() + + +# TODO(gp): -> from_pickle_function +def unpickle_function(code_as_str: str, func_name: str) -> Callable: + """ + Unpickle a function saved into string . The function is + injected in the global namespace as . + + - return: function + """ + hdbg.dassert_isinstance(code_as_str, str) + code = marshal.loads(code_as_str.encode()) + func = types.FunctionType(code, globals(), name=func_name) + return func + + +# ############################################################################# +# JSON +# ############################################################################# + +# TODO(gp): Maybe move helpers/hjson.py? + + +# TODO(gp): Switch file_name and obj to be consistent with the pickle functions. +def to_json(file_name: str, obj: object) -> None: + hdbg.dassert_file_extension(file_name, "json") + with open(file_name, "w") as outfile: + json.dump(obj, outfile) + + +def from_json(file_name: str) -> object: + hdbg.dassert_path_exists(file_name) + hdbg.dassert_file_extension(file_name, "json") + obj = json.loads(hio.from_file(file_name)) + return obj diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py new file mode 100644 index 000000000..5e1df13c8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py @@ -0,0 +1,495 @@ +""" +Code to automatically generate unit tests for functions. + +Import as: + +import helpers.hplayback as hplayba +""" + +import inspect +import json +import logging +import os +from typing import Any, Callable, List, Optional + +import jsonpickle # type: ignore +import jsonpickle.ext.pandas as jepand # type: ignore +import pandas as pd + +import config_root.config as cconfig +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint + +jepand.register_handlers() + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): Use repr to serialize: +# >>> a = {"hello": [1, 2, (3, 4)]} +# >>> repr(a) +# "{'hello': [1, 2, (3, 4)]}" +# TODO(gp): Add more types. +# TODO(gp): -> _to_python_code +def to_python_code(obj: Any) -> str: + """ + Serialize an object into a string of Python code. + + :param obj: an object to serialize + :return: a string of Python code building the object + """ + output = [] + if isinstance(obj, (int, float)): + # Float 2.5 -> "2.5". + output.append(str(obj)) + elif isinstance(obj, str): + # String test -> '"test"'. + # Use jsonpickle to handle double quotes. + output.append(jsonpickle.encode(obj)) + elif isinstance(obj, list): + # List ["a", 1] -> '["a", 1]'. + output_tmp = "[" + for el in obj: + output_tmp += to_python_code(el) + ", " + output_tmp = output_tmp.rstrip(", ") + "]" + output.append(output_tmp) + elif isinstance(obj, tuple): + # Tuple ["a", 1] -> '["a", 1]'. + output_tmp = "(" + for el in obj: + output_tmp += to_python_code(el) + ", " + output_tmp = output_tmp.rstrip(", ") + ")" + output.append(output_tmp) + elif isinstance(obj, dict): + # Dict {"a": 1} -> '{"a": 1}'. + output_tmp = "{" + for key in obj: + output_tmp += ( + to_python_code(key) + ": " + to_python_code(obj[key]) + ", " + ) + output_tmp = output_tmp.rstrip(", ") + "}" + output.append(output_tmp) + elif isinstance(obj, pd.DataFrame): + # Dataframe with a column "a" and row values 1, 2 -> + # "pd.DataFrame.from_dict({'a': [1, 2]})". + vals = obj.to_dict(orient="list") + output.append(f"pd.DataFrame.from_dict({vals})") + elif isinstance(obj, pd.Series): + # Series init as pd.Series([1, 2]) + output.append( + f'pd.Series(data={obj.tolist()}, index={obj.index}, name="{obj.name}", ' + f"dtype={obj.dtype})" + ) + elif isinstance(obj, cconfig.Config): + # Config -> python_code -> "cconfig.Config.from_python(python_code)" + val = obj.to_python() + output.append(f'cconfig.Config.from_python("{val}")') + else: + # Use `jsonpickle` for serialization. + _LOG.warning( + "Type %s not found in serialization function: using jsonpickle.", + type(obj), + ) + output.append(f"r'{jsonpickle.encode(obj)}'") + output = "\n".join(output) + return output + + +# ############################################################################# +# Playback +# ############################################################################# + + +class Playback: + @staticmethod + def _get_test_file_name(file_with_code: str) -> str: + """ + Construct the test file name based on the file with the code to test. + + :param file_with_code: path to file with code to test. + :return: path to the file with generated test. + """ + # Get directory and filename of the testing code. + dirname_with_code, filename_with_code = os.path.split(file_with_code) + dirname_with_test = os.path.join(dirname_with_code, "test") + # Construct test file. + test_file = os.path.join( + dirname_with_test, f"test_by_playback_{filename_with_code}" + ) + return test_file + + def _update_code_to_existing(self) -> None: + """ + Get existing content from the file with test. + + If the file doesn't exist - creates it. + """ + # Create test file if it doesn't exist. + if not os.path.exists(self._test_file): + hio.create_enclosing_dir(self._test_file, True) + hio.to_file(self._test_file, "", mode="w") + else: + # Get already existing content in the test file. + self._code = hio.from_file(self._test_file).split("\n") + self._file_exists = True + + def _append(self, string: str, num_tabs: int = 0) -> None: + """ + Add indented line to the code. + """ + num_spaces = num_tabs * 4 + self._code.append(hprint.indent(string, num_spaces=num_spaces)) + + def __init__( + self, + mode: str, + to_file: Optional[bool] = None, + max_tests: Optional[int] = None, + ) -> None: + """ + Initialize the class variables. + + :param mode: the type of unit test to be generated (e.g. "assert_equal") + :param to_file: save playback output to the file + test/test_by_playback_.py + :param max_tests: limit a number of generated tests for the testing + function. Can be useful if the function is called a lot of times + during the execution. + """ + _LOG.debug(hprint.to_str("mode to_file max_tests")) + hdbg.dassert_in(mode, ("check_string", "assert_equal")) + self.mode = mode + # TODO(gp): Factor out in a function but need to discard one more level + # in the stack trace. + cur_frame = inspect.currentframe() + self._func_name = cur_frame.f_back.f_code.co_name # type: ignore + # We can use kw arguments for all args. Python supports this. + self._kwargs = cur_frame.f_back.f_locals.copy() # type: ignore + # It treats all arguments defined before itself as arguments. If this + # is done, it will mess up the function call that will be created in + # `Playback.run`. + expected_arg_count = cur_frame.f_back.f_code.co_argcount # type: ignore + if "kwargs" in self._kwargs: + expected_arg_count += 1 + _LOG.debug(hprint.to_str("expected_arg_count")) + # TODO(gp): Is this necessary? + # hdbg.dassert_eq( + # expected_arg_count, + # len(cur_frame.f_back.f_locals), # type: ignore + # msg="the Playback class should be the first thing instantiated in" + # " a function.", + # ) + # If the function is a method, store the parent class so we can also + # create that in the test. + if "self" in self._kwargs: + x = self._kwargs.pop("self") + self._parent_class = x + self._code = [ + f"# Test created for {cur_frame.f_back.f_globals['__name__']}" # type: ignore + f".{x.__class__.__name__}.{self._func_name}." + ] + else: + self._parent_class = None + self._code = [ + # pylint: disable=line-too-long + f"# Test created for {cur_frame.f_back.f_globals['__name__']}.{self._func_name}." # type: ignore + ] + self._append("") + # Check if need to write the code directly to file. + self._to_file = to_file if to_file is not None else False + # Find filename to write the code. + file_with_code = cur_frame.f_back.f_code.co_filename # type: ignore + self._test_file = self._get_test_file_name(file_with_code) + # Check if file exists, need to keep code already here. + self._file_exists = False + if self._to_file: + self._update_code_to_existing() + # Limit number of tests per tested function. + self._max_tests = max_tests or float("+inf") + + @staticmethod + def test_code(output: str) -> None: + # Try to execute in a fake environment. + # ``` + # local_env = {} + # _ = exec(output, local_env) + # ``` + _ = exec(output) # pylint: disable=exec-used + + def _check_code(self, func_output: Any) -> None: + """ + Generate test code that makes an assertion. + """ + if self.mode == "check_string": + if isinstance(func_output, (pd.DataFrame, pd.Series, str)): + if not isinstance(func_output, str): + self._append( + "actual = hpandas.df_to_str(actual, num_rows=None)", 2 + ) + if not isinstance(func_output, (str, bytes)): + self._append("actual = str(actual)", 2) + self._append("# Check output.", 2) + self._append("self.check_string(actual)", 2) + elif self.mode == "assert_equal": + self._append("# Define expected output.", 2) + func_output_as_code = to_python_code(func_output) + self._append(f"expected = {func_output_as_code}", 2) + if not isinstance( + func_output, (int, float, str, list, dict, pd.DataFrame) + ): + self._append("expected = jsonpickle.decode(expected)", 2) + + if isinstance(func_output, (pd.DataFrame, pd.Series)): + self._append( + "actual = hpandas.df_to_str(actual, num_rows=None)", 2 + ) + self._append( + "expected = hpandas.df_to_str(expected, num_rows=None)", 2 + ) + self._append("# Compare actual and expected output.", 2) + self._append("self.assertEqual(actual, expected)", 2) + else: + raise ValueError(f"Invalid mode='{self.mode}'") + + def _add_imports(self, additional: Optional[List[str]] = None) -> None: + """ + Add the code with imports. + """ + # Add imports. + self._append("import helpers.hpandas as hpandas") + self._append("import helpers.hunit_test as hunitest") + self._append("import jsonpickle") + self._append("import pandas as pd") + self._append("import config_root.config as cconfi") + for a in additional or []: + self._append(a) + self._code.extend(["", ""]) + + def _get_class_name_string(self) -> str: + """ + Get a string for the test code with the name of the test class. + + I.e. "class TestMyMethod(hunitest.TestCase):". + """ + test_name = ( + self._parent_class.__class__.__name__ + if self._parent_class is not None + else "" + ) + test_name += "".join( + [x.capitalize() for x in self._func_name.split("_")] + ) + class_string = f"class Test{test_name}(hunitest.TestCase):" + return class_string + + def _get_class_count(self) -> int: + """ + Find a number of already generated tests for the method. + """ + class_string = self._get_class_name_string() + count = 0 + for line in self._code: + count += line == class_string + return count + + def _add_test_class(self) -> None: + """ + Add the code with the test class definition and the test method + definition. + """ + # Add test class and test method. + class_string = self._get_class_name_string() + # Find how many times method was tested. + count = self._get_class_count() + if count >= self._max_tests: + # If it was already tested enough times, raise. + raise IndexError(f"{self._max_tests} tests already generated") + # Otherwise, continue to create a test code. + self._append(class_string) + self._append(f"def test{count + 1}(self) -> None:", 1) + + def _add_function_call(self) -> None: + """ + Add a call of the function to test to the test code. + """ + self._append("# Call function to test.", 2) + if self._parent_class is None: + fnc_call = [f"{k}={k}" for k in self._kwargs.keys()] + self._append(f"actual = {self._func_name}({', '.join(fnc_call)})", 2) + else: + var_code = to_python_code(self._parent_class) + # Re-create the parent class. + self._append(f"cls = {var_code}", 2) + self._append("cls = jsonpickle.decode(cls)", 2) + fnc_call = [f"{k}={k}" for k in self._kwargs.keys()] + # Call the method as a child of the parent class. + self._append( + f"actual = cls.{self._func_name}({', '.join(fnc_call)})", 2 + ) + + def _add_var_definitions(self) -> None: + """ + Add variables definitions for the function to test. + """ + if self._kwargs: + self._append("# Define input variables.", 2) + for key in self._kwargs: + as_python = to_python_code(self._kwargs[key]) + self._append(f"{key} = {as_python}", 2) + # Decode back to an actual Python object, if necessary. + if not isinstance( + self._kwargs[key], + ( + int, + float, + str, + list, + dict, + pd.DataFrame, + pd.Series, + cconfig.Config, + ), + ): + self._append(f"{key} = jsonpickle.decode({key})", 2) + + def _gen_code(self) -> str: + """ + Construct string with all generated test code. + """ + code = "\n".join(self._code) + "\n" + _LOG.debug("code=\n%s", code) + if self._to_file: + hio.to_file(self._test_file, code) + return code + + def run(self, func_output: Any) -> str: + """ + Generate a unit test for the function. + + The unit test compares the actual function output with the expected + `func_output`. + + :param func_output: the expected function output + :return: the code of the unit test + """ + if self._to_file and self._file_exists: + # Imports were added before, so skip. + pass + else: + # Start with imports. + self._add_imports() + # Count if we reached max number of tests generated for a single function. + try: + self._add_test_class() + except IndexError as exception: + # If there are already enough tests, not add anything. + _LOG.warning(str(exception)) + return "" + self._add_var_definitions() + self._add_function_call() + self._check_code(func_output) + return self._gen_code() + + +# ############################################################################# + + +def json_pretty_print(parsed: Any) -> str: + """ + Pretty print a JSON object. + + :param parsed: a JSON object + :return: a prettified JSON object + """ + if isinstance(parsed, str): + parsed = json.loads(parsed) + # `ret = pprint.pformat(parsed) + ret = json.dumps(parsed, indent=4, sort_keys=True) + return ret + + +def round_trip_convert(obj1: Any, log_level: int) -> Any: + """ + Encode and decode with `jsonpickle` ensuring the object remains the same. + + :param obj1: the initial object + :param log_level: the level of logging + :return: the object after encoding and decoding + """ + _LOG.log(log_level, "# obj1=\n%s", obj1) + _LOG.log(log_level, "class=%s", type(obj1)) + # Encode. + frozen = jsonpickle.encode(obj1) + _LOG.log(log_level, "# frozen=\n%s", json_pretty_print(frozen)) + # Decode. + obj2 = jsonpickle.decode(frozen) + _LOG.log(log_level, "# obj2=\n%s", obj2) + _LOG.log(log_level, "class=%s", type(obj1)) + # Check whether the decoded version is the same as the initial object. + if str(type(obj1)).startswith(" Callable: + def wrapper(*args: Any, **kwargs: Any) -> Any: + import helpers.hplayback as hplayba + + playback = hplayba.Playback("assert_equal") + res = func(*args, **kwargs) + code = playback.run(res) + print(code) + return res + + return wrapper(func) + + +# Inline the decorator as: +# +# 1) Rename `target_func` -> `target_func_tmp` +# ``` +# def target_function_tmp(...): +# ... +# ``` +# +# 2) Add wrapper: +# ``` +# def target_function_tmp(...): +# ... +# +# from typing import Any +# +# def target_function(*args: Any, **kwargs: Any) -> Any: +# import helpers.hplayback as hplayb +# playback = hplayb.Playback("assert_equal") +# res = target_func_tmp(*args, **kwargs) +# code = playback.run(res) +# print(code) +# return res +# ``` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py new file mode 100644 index 000000000..29a504226 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py @@ -0,0 +1,1076 @@ +""" +Import as: + +import helpers.hprint as hprint +""" + +import functools +import inspect +import logging +import pprint +import re +import sys +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union + +import helpers.hdbg as hdbg + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + +# Mute this module unless we want to debug it. +_LOG.setLevel(logging.INFO) + + +# ############################################################################# +# Debug output +# ############################################################################# + +_COLOR_MAP = { + "bold": 1, + # Colors. + "blue": 94, + "green": 92, + "white": 0, + "purple": 95, + "red": 91, + "yellow": 33, + # Blue. + "DEBUG": 34, + # Cyan. + "INFO": 36, + # Yellow. + "WARNING": 33, + # Red. + "ERROR": 31, + # White on red background. + "CRITICAL": 41, +} + + +def color_highlight(text: str, color: str) -> str: + """ + Return a colored string. + """ + prefix = "\033[" + suffix = "\033[0m" + hdbg.dassert_in(color, _COLOR_MAP) + color_code = _COLOR_MAP[color] + txt = f"{prefix}{color_code}m{text}{suffix}" + return txt + + +def clear_screen() -> None: + print((chr(27) + "[2J")) + + +def line(char: Optional[str] = None, num_chars: Optional[int] = None) -> str: + """ + Return a line with the desired character. + """ + char = "#" if char is None else char + num_chars = 80 if num_chars is None else num_chars + return char * num_chars + + +def pprint_pformat(obj: Any, *, sort_dicts: bool = False) -> str: + """ + Pretty-print in color. + """ + from pygments import highlight + from pygments.formatters import Terminal256Formatter + from pygments.lexers import PythonLexer + + txt = pprint.pformat(obj, sort_dicts=sort_dicts) + txt = highlight(txt, PythonLexer(), Terminal256Formatter()) + txt = txt.rstrip() + return txt + + +def pprint_color(obj: Any, *, tag: Optional[str] = None, sep: str = "") -> None: + """ + Pretty-print in color. + """ + txt = "" + if tag is not None: + txt += tag + "= " + sep + txt += pprint_pformat(obj) + print(txt) + + +# TODO(gp): -> Use *args instead of forcing to build a string to simplify the caller. +def frame( + message: str, + *, + char1: Optional[str] = None, + num_chars: Optional[int] = None, + char2: Optional[str] = None, + thickness: int = 1, + level: int = 0, +) -> str: + """ + Print a frame around a message. + + :param message: message to print + :param char1: char for top line of the frame + :param num_chars: how many chars in each line (by default 80 chars) + :param char2: char for bottom line of the frame + :param thickness: how many overlapping lines + - E.g., thickness = 2 + ``` + # #######... + # #######... + # hello + # #######... + # #######... + ``` + :param level: level of framing indent based on `#` char: + - E.g., level = 0 + ``` + #######... + hello + #######... + ``` + - E.g., level = 1 + ``` + # #######... + # hello + # #######... + ``` + """ + hdbg.dassert_isinstance(message, str) + # Fill in the default values. + if char1 is None: + # User didn't specify any char. + char1 = char2 = "#" + elif char1 is not None and char2 is None: + # User specified only one char. + char2 = char1 + elif char1 is None and char2 is not None: + # User specified the second char, but not the first one. + hdbg.dfatal(f"Invalid char1='{char1}' char2='{char2}'") + else: + # User specified both chars. Nothing to do. + pass + num_chars = 80 if num_chars is None else num_chars + # Sanity check. + hdbg.dassert_eq(len(char1), 1) + hdbg.dassert_lte(1, num_chars) + hdbg.dassert_eq(len(char2), 1) + hdbg.dassert_lte(1, thickness) + hdbg.dassert_lte(0, level) + # Build the return value. + prefix = "" + if level: + prefix = "#" * level + " " + ret = ( + (prefix + (line(char1, num_chars) + "\n") * thickness) + + (prefix + message + "\n") + + (prefix + (line(char2, num_chars) + "\n") * thickness) + ).rstrip("\n") + return ret + + +# ############################################################################# + + +StrOrList = Union[str, List[str]] + + +# TODO(gp): Use this everywhere in the codebase to avoid back-and-forth +# transforms between strings and lists of strings. +def split_lines(func: Callable) -> Callable: + """ + A decorator that splits a string input into lines before passing it to the + decorated function which expects a list of lines. + """ + + @functools.wraps(func) + def wrapper(txt: StrOrList, *args: Any, **kwargs: Any) -> StrOrList: + if isinstance(txt, str): + # Split the txt into lines. + lines = txt.splitlines() + is_str = True + else: + # The txt is already a list of lines: pass it as is. + hdbg.dassert_isinstance(txt, list) + lines = txt + is_str = False + # Call the function. + lines = func(lines, *args, **kwargs) + if is_str: + # Join the lines back together. + out = "\n".join(lines) + else: + # The output is already a list of lines. + hdbg.dassert_isinstance(lines, list) + out = lines + return out + + return wrapper + + +@split_lines +def prepend(lines: List[str], prefix: str) -> List[str]: + """ + Add `prefix` before each line of the string `txt`. + """ + hdbg.dassert_isinstance(lines, list) + lines_out = [prefix + curr_line for curr_line in lines] + hdbg.dassert_isinstance(lines_out, list) + return lines_out + + +@split_lines +def indent(lines: List[str], *, num_spaces: int = 2) -> List[str]: + """ + Add `num_spaces` spaces before each line of the passed string. + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_isinstance(num_spaces, int) + hdbg.dassert_lte(0, num_spaces) + spaces = " " * num_spaces + txt_out = [] + for curr_line in lines: + if curr_line.lstrip().rstrip() == "": + # Do not prepend any space to a line with only white characters. + txt_out.append("") + continue + txt_out.append(spaces + curr_line) + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +@split_lines +def strict_split(lines: List[str], max_length: int) -> List[str]: + """ + Split a string into chunks of `max_length` characters. + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_lte(1, max_length) + lines_out = [] + for line in lines: + for i in range(0, len(line), max_length): + lines_out.append(line[i : i + max_length]) + hdbg.dassert_isinstance(lines_out, list) + return lines_out + + +@split_lines +def remove_lead_trail_empty_lines(lines: List[str]) -> List[str]: + """ + Remove consecutive empty lines only at the beginning / end of a string. + """ + hdbg.dassert_isinstance(lines, list) + # Remove leading empty lines. + while lines and not lines[0].strip(): + lines.pop(0) + # Remove trailing empty lines. + while lines and not lines[-1].strip(): + lines.pop() + hdbg.dassert_isinstance(lines, list) + return lines + + +@split_lines +def dedent( + lines: List[str], *, remove_lead_trail_empty_lines_: bool = True +) -> List[str]: + """ + Remove from each line the minimum number of spaces to align the text on the + left. + + It is the opposite of `indent()`. + + :param txt: multi-line string + :param txt: multi-line string + :param remove_lead_trail_empty_lines_: if True, remove all the empty + lines at the beginning and at the end + """ + if remove_lead_trail_empty_lines_: + lines = remove_lead_trail_empty_lines(lines) + # Find the minimum number of leading spaces. + min_num_spaces = None + for curr_line in lines: + _LOG.debug( + "min_num_spaces=%s: curr_line='%s'", min_num_spaces, curr_line + ) + # Skip empty lines. + if curr_line.lstrip().rstrip() == "": + _LOG.debug(" -> Skipping empty line") + continue + m = re.search(r"^(\s*)", curr_line) + hdbg.dassert(m) + # The linter doesn't understand that `dassert` is equivalent to an + # `assert`. + assert m is not None + curr_num_spaces = len(m.group(1)) + _LOG.debug(" -> curr_num_spaces=%s", curr_num_spaces) + if min_num_spaces is None or curr_num_spaces < min_num_spaces: + min_num_spaces = curr_num_spaces + _LOG.debug("min_num_spaces=%s", min_num_spaces) + # Process each line and remove the minimum indentation. + txt_out = [] + for curr_line in lines: + _LOG.debug("curr_line='%s'", curr_line) + # Skip empty lines. + if curr_line.lstrip().rstrip() == "": + txt_out.append("") + continue + hdbg.dassert_lte(min_num_spaces, len(curr_line)) + txt_out.append(curr_line[min_num_spaces:]) + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +@split_lines +def align_on_left(lines: List[str]) -> List[str]: + """ + Remove all leading/trailing spaces for each line. + """ + hdbg.dassert_isinstance(lines, list) + txt_out = [] + for curr_line in lines: + curr_line = curr_line.rstrip(" ").lstrip(" ") + txt_out.append(curr_line) + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +@split_lines +def remove_empty_lines( + lines: List[str], *, mode: str = "no_empty_lines" +) -> List[str]: + """ + Remove empty lines from a multi-line string. + + :param lines: list of input lines to process + :param mode: + - no_empty_lines: remove all empty lines + - no_consecutive_empty_lines: remove consecutive empty lines + :return: lines with empty lines removed + """ + hdbg.dassert_isinstance(lines, list) + if mode == "no_empty_lines": + lines_out = [line for line in lines if line.rstrip().lstrip() != ""] + elif mode == "no_consecutive_empty_lines": + # If there are two or more consecutive empty lines, remove all but the last one. + lines_out = [] + prev_empty = False + for line in lines: + if re.search(r"^\s*$", line): + if prev_empty: + continue + prev_empty = True + else: + prev_empty = False + lines_out.append(line) + else: + raise ValueError(f"Invalid mode='{mode}'") + hdbg.dassert_isinstance(lines_out, list) + return lines_out + + +def vars_to_debug_string(vars_as_str: List[str], locals_: Dict[str, Any]) -> str: + """ + Create a string with var name -> var value. + + E.g., ["var1", "var2"] is converted into: ``` var1=... var2=... ``` + """ + txt = [] + for var in vars_as_str: + txt.append(var + "=") + txt.append(indent(str(locals_[var]))) + return "\n".join(txt) + + +# ############################################################################# +# Pretty print data structures. +# ############################################################################# + + +def to_object_str(obj: Any) -> str: + class_name = str(obj.__class__.__name__) + hex_str = str(hex(id(obj))) + return f"{class_name} at {hex_str}" + + +def to_object_repr(obj: Any) -> str: + class_module = str(obj.__class__.__module__) + class_name = str(obj.__class__.__name__) + hex_str = str(hex(id(obj))) + return f"<{class_module}.{class_name} at {hex_str}>" + + +def thousand_separator(v: float) -> str: + v = "{0:,}".format(v) + return v + + +# TODO(gp): -> to_percentage +def perc( + a: float, + b: float, + *, + invert: bool = False, + num_digits: int = 2, + only_perc: bool = False, + use_float: bool = False, + only_fraction: bool = False, + use_thousands_separator: bool = False, +) -> Union[str, float]: + """ + Calculate percentage a / b as a string. + + Asserts 0 <= a <= b. If true, returns a/b to `num_digits` decimal places. + + :param a: numerator + :param b: denominator + :param invert: assume the fraction is (b - a) / b + This is useful when we want to compute the complement of a count. + :param num_digits: number of digits to represent the percentage + :param only_perc: return only the percentage, without the fraction + - E.g., "50.00%" vs "10 / 20 = 50.00%" + :param use_float: return the percentage as a float. It requires + `only_perc = True` + :param only_fraction: return only the fraction, without the percentage + - E.g., "10 / 20" vs "10 / 20 = 50.00%" + :param use_thousands_separator: report the numbers using thousands separator + :return: string with a/b + """ + hdbg.dassert_lte(0, a) + hdbg.dassert_lte(a, b) + if invert: + a = b - a + if use_thousands_separator: + a_str = str("{0:,}".format(a)) + b_str = str("{0:,}".format(b)) + else: + a_str = str(a) + b_str = str(b) + # Validate and format the percentage. + hdbg.dassert_lte(0, num_digits) + if only_perc: + fmt = "%." + str(num_digits) + "f" + ret = fmt % (float(a) / b * 100.0) + if use_float: + # 57.27 + ret = float(ret) + else: + # 57.27% + hdbg.dassert_isinstance(ret, str) + ret += "%" + elif only_fraction: + # 4225 / 7377 + ret = f"{a_str} / {b_str}" + else: + # 4225 / 7377 = 57.27% + fmt = "%s / %s = %." + str(num_digits) + "f%%" + ret = fmt % (a_str, b_str, float(a) / b * 100.0) + return ret + + +def round_digits( + v: float, *, num_digits: int = 2, use_thousands_separator: bool = False +) -> str: + """ + Round digit returning a string representing the formatted number. + + :param v: value to convert + :param num_digits: number of digits to represent v on None is + (Default value = 2) + :param use_thousands_separator: use "," to separate thousands + (Default value = False) + :return: str with formatted value + """ + if (num_digits is not None) and isinstance(v, float): + fmt = "%0." + str(num_digits) + "f" + res = float(fmt % v) + else: + res = v + if use_thousands_separator: + res = "{0:,}".format(res) # type: ignore + res_as_str = str(res) + return res_as_str + + +# ############################################################################# +# Logging helpers +# ############################################################################# + + +# TODO(gp): Move this to hdbg.hlogging, but there are dependencies from this file. + +# https://stackoverflow.com/questions/2749796 has some solutions to find the +# name of variables from the caller. + + +_VarNamesType = Optional[Union[str, List[str]]] + + +def _to_var_list(expression: _VarNamesType) -> List[str]: + if isinstance(expression, List): + return expression + hdbg.dassert_isinstance(expression, str) + # If expression is a list of space-separated expressions, convert each in a + # string. + exprs = [v.lstrip().rstrip() for v in expression.split(" ")] + # Remove empty var names. + exprs = [v for v in exprs if v.strip().rstrip() != ""] + hdbg.dassert_isinstance(exprs, list) + hdbg.dassert_lte(1, len(exprs)) + return exprs + + +def to_str( + expression: str, + *, + frame_level: int = 1, + print_lhs: bool = True, + char_separator: str = ",", + mode: str = "repr", +) -> str: + """ + Return a string with the value of a variable / expression / multiple + variables. + + If expression is a space-separated compound expression, convert it into + `exp1=val1, exp2=val2, ...`. + + This is similar to Python 3.8 f-string syntax `f"{foo=} {bar=}"`. + We don't want to force to use Python 3.8 just for this feature. + ``` + > x = 1 + > to_str("x+1") + x+1=2 + ``` + + :param expression: the variable / expression to evaluate and print. + E.g., `to_str("exp1")` is converted into `exp1=val1`. + If expression is a space-separated compound expression, e.g., + `to_str("exp1 exp2 ...")`, it is converted into `exp1=val1, exp2=val2, ...` + :param frame_level: level of the frame to inspect + :param print_lhs: whether we want to print the left hand side (i.e., `exp1`) + :param char_separator: separator between the values of the expressions + when printed (e.g., `,`) + :param mode: select how to print the value of the expressions (e.g., `str`, + `repr`, `pprint`, `pprint_color`) + """ + # TODO(gp): If we pass an object it would be nice to find the name of it. + # E.g., https://github.com/pwwang/python-varname + hdbg.dassert_isinstance(expression, str) + if " " in expression: + exprs = _to_var_list(expression) + # Convert each expression into a value. + _to_str = lambda x: to_str(x, frame_level=frame_level + 2) + values = list(map(_to_str, exprs)) + # Assemble in a return value. + hdbg.dassert_lte(len(char_separator), 1) + sep = char_separator + " " + txt = sep.join(values) + return txt + # Certain expressions are evaluated as literals. + if expression in ("", "->", ":", "=", "\n"): + return expression + # Evaluate the expression. + frame_ = sys._getframe(frame_level) # pylint: disable=protected-access + ret = "" + if print_lhs: + ret += expression + "=" + try: + eval_ = eval(expression, frame_.f_globals, frame_.f_locals) + except Exception as e: + print("expression=''", expression) + raise e + if mode == "str": + ret += str(eval_) + elif mode == "repr": + ret += repr(eval_) + elif mode == "pprint": + ret += "\n" + indent(pprint.pformat(eval_)) + elif mode == "pprint_color": + ret += "\n" + indent(pprint_pformat(eval_)) + else: + raise ValueError(f"Invalid mode='{mode}'") + return ret + + +# TODO(gp): Extend this to work on class methods, static and not. +def _func_signature_to_str( + skip_vars: _VarNamesType, + assert_on_skip_vars_error: bool, + frame_level: int, +) -> Tuple[str, str]: + """ + Return the variables of the caller function as a string. + + Same params as `func_signature_to_str()`. + :return: function name and string with the variables of the caller function + as `var1 var2 ...` + """ + if skip_vars is not None: + skip_vars = _to_var_list(skip_vars) + # Get the caller's frame (i.e., the function that called this function). + caller_frame = inspect.currentframe() + for _ in range(frame_level): + hdbg.dassert_is_not( + caller_frame, None, "caller_frame should not be None" + ) + caller_frame = caller_frame.f_back + hdbg.dassert_is_not( + caller_frame, + None, + "caller_frame should not be None after traversing frames", + ) + caller_function_name = caller_frame.f_code.co_name + # _LOG.debug("caller_function_name=%s", caller_function_name) + # Retrieve the function object from the caller's frame. + caller_function = caller_frame.f_globals.get(caller_function_name, None) + if caller_function: + # Get the function's signature + sig = inspect.signature(caller_function) + var_names = list(sig.parameters.keys()) + if skip_vars: + if assert_on_skip_vars_error: + hdbg.dassert_is_subset(skip_vars, var_names) + var_names = [ + var_name for var_name in var_names if var_name not in skip_vars + ] + vars_str = " ".join(var_names) + else: + raise ValueError("Unable to determine caller function") + return caller_function_name, vars_str + + +def func_signature_to_str( + # We don't use * since we want to keep it simple to call this function. + skip_vars: _VarNamesType = None, + *, + assert_on_skip_vars_error: bool = True, + frame_level: int = 2, +) -> str: + r""" + Return the variables of the caller function as a string. + + Use like: + ``` + _LOG.debug("\n%s", hprint.func_signature_to_str()) + ``` + + :param skip_vars: list of variables to skip + :param assert_on_skip_vars_error: whether to assert if the variables to skip + are not found in the function signature + :param frame_level: level of the frame to inspect. By default we need to + access the frame of the caller of the caller, so frame_level = 2 + """ + # Get the variables. + func_name, func_signature = _func_signature_to_str( + skip_vars, + assert_on_skip_vars_error, + frame_level, + ) + # Get the value of the variables. + val = to_str(func_signature, frame_level=frame_level) + val = f"# {func_name}: {val}" + return val + + +# ############################################################################# + + +def log(logger: logging.Logger, verbosity: int, *vals: Any) -> None: + """ + Log at a certain verbosity. + + `log(_LOG, logging.DEBUG, "ticker", "exchange")` + + is equivalent to statements like: + + ``` + _LOG.debug("%s, %s", to_str("ticker"), to_str("exchange")) + _LOG.debug("ticker=%s, exchange=%s", ticker, exchange) + ``` + """ + logger_verbosity = hdbg.get_logger_verbosity() + # print("verbosity=%s logger_verbosity=%s" % (verbosity, logger_verbosity)) + # We want to avoid the overhead of converting strings, so we evaluate the + # expressions only if we are going to print. + if verbosity >= logger_verbosity: + # We need to increment frame_lev since we are 2 levels deeper in the stack. + _to_str = lambda x: to_str(x, frame_level=3) + num_vals = len(vals) + if num_vals == 1: + fstring = "%s" + vals = _to_str(vals[0]) # type: ignore + else: + fstring = ", ".join(["%s"] * num_vals) + vals = list(map(_to_str, vals)) # type: ignore + logger.log(verbosity, fstring, vals) + + +# TODO(gp): Replace calls to `_LOG.debug("\n%s", hprint.frame(...)` with this. +# TODO(gp): Consider changing the signature from +# _log_frame(_LOG, "hello", verbosity=logger.INFO)) +# to +# _log_frame(_LOG.info, "hello", ...) +# by using the first element as a Callable +def log_frame( + logger: logging.Logger, + fstring: str, + *args: Any, + level: int = 1, + char: str = "#", + verbosity: int = logging.DEBUG, +) -> None: + """ + Log using a frame around the text with different number of leading `#` (or + `char`) to organize the log visually. + + The logging output looks like: + _log_frame(_LOG, "hello", verbosity=logger.INFO)) + ``` + 07:44:51 printing : log_frame : 390 : + # ######################################################################### + # hello + # ######################################################################### + ``` + + :param txt: text to print in a frame + :param level: number of `#` (or `char`) to prepend the logged text + :param char: char to prepend the logged text with + :param verbosity: logging verbosity + """ + hdbg.dassert_isinstance(logger, logging.Logger) + hdbg.dassert_isinstance(fstring, str) + msg = fstring % args + msg = msg.rstrip().lstrip() + msg = frame(msg) + # Prepend a `# `, if needed. + if level > 0: + prefix = level * char + " " + msg = prepend(msg, prefix=prefix) + # Add an empty space. + msg = "\n" + msg + logger.log(verbosity, "%s", msg) + + +# ############################################################################# + + +def type_to_string(type_as_str: str) -> str: + """ + Return a short string representing the type of an object, e.g., + "dataflow.Node" (instead of "class <'dataflow.Node'>") + """ + if isinstance(type_as_str, type): + type_as_str = str(type_as_str) + hdbg.dassert_isinstance(type_as_str, str) + # Remove the extra string from: + # + prefix = " str: + ret = f"({type(obj)}) {obj}" + return ret + + +# ############################################################################# + + +def format_list( + list_: List[Any], + *, + sep: str = " ", + max_n: Optional[int] = None, + tag: Optional[str] = None, +) -> str: + # sep = ", " + if max_n is None: + max_n = 10 + hdbg.dassert_lte(1, max_n) + n = len(list_) + txt = "" + if tag is not None: + txt += f"{tag}: " + txt += f"({n}) " + if n < max_n: + txt += sep.join(map(str, list_)) + else: + num_elems = int(max_n / 2) + hdbg.dassert_lte(1, num_elems) + txt += sep.join(map(str, list_[:num_elems])) + txt += " ... " + # pylint: disable=invalid-unary-operand-type + txt += sep.join(map(str, list_[-num_elems:])) + return txt + + +# TODO(gp): Use format_list(). +def list_to_str( + list_: List, + *, + tag: str = "", + sort: bool = False, + axis: int = 0, + to_string: bool = False, +) -> str: + """ + Print list / index horizontally or vertically. + """ + # TODO(gp): Fix this. + _ = to_string + txt = "" + if axis == 0: + if list_ is None: + txt += f"{tag}: (0) None\n" + else: + # hdbg.dassert_in(type(l), (list, pd.Index, pd.Int64Index)) + vals = list(map(str, list_)) + if sort: + vals = sorted(vals) + txt += f"{tag}: ({len(list_)}) {' '.join(vals)}\n" + elif axis == 1: + txt += f"{tag} ({len(list_)}):\n" + vals = list(map(str, list_)) + if sort: + vals = sorted(vals) + txt += "\n".join(vals) + "\n" + else: + raise ValueError(f"Invalid axis='{axis}'") + return txt + + +def list_to_str2( + vals: List[Any], + *, + sep_char: str = ", ", + enclose_str_char: str = "'", + max_num: Optional[int] = 10, +) -> str: + """ + Convert a list of values into a formatted string representation. + + E.g., [1, "two", 3, 4, 5] -> "5 ['1', 'two', '3', '4', '5']" + + :param vals: values to be converted + :param sep_char: separator to use between elements + :param enclose_str_char: character to enclose each element's string + representation; if empty, elements are not enclosed + :param max_num: maximum number of elements to display in the output + :return: the formatted string representing the list + """ + vals_as_str = list(map(str, vals)) + # Add a str around. + if enclose_str_char: + vals_as_str = [ + enclose_str_char + v + enclose_str_char for v in vals_as_str + ] + # Build the output string with optional truncation. + ret = f"{len(vals)} [" + if max_num is not None and len(vals) > max_num: + hdbg.dassert_lt(1, max_num) + ret += sep_char.join(vals_as_str[: int(max_num / 2)]) + ret += sep_char + "..." + sep_char + ret += sep_char.join(vals_as_str[-int(max_num / 2) :]) + else: + ret += sep_char.join(vals_as_str) + ret += "]" + return ret + + +def set_diff_to_str( + obj1: Iterable, + obj2: Iterable, + *, + obj1_name: str = "obj1", + obj2_name: str = "obj2", + sep_char: str = " ", + add_space: bool = False, +) -> str: + """ + Compute the difference between two sequences of data and return a formatted + string. + + :param obj1: The first iterable object. + :param obj2: The second iterable object. + :param obj1_name: The name to use for the first object in the output string. + :param obj2_name: The name to use for the second object in the output string. + :param sep_char: The character to use for separating elements in the output + string. + :param add_space: Whether to add empty lines to make the output more readable. + :return: A formatted string showing the differences between the two objects. + + Example: + ``` + >>> obj1 = [1, 2, 3, 4] + >>> obj2 = [3, 4, 5, 6] + >>> set_diff_to_str(obj1, obj2, obj1_name="list1", obj2_name="list2") + * list1: (4) 1 2 3 4 + * list2: (4) 3 4 5 6 + * intersect=(2) 3 4 + * list1-list2=(2) 1 2 + * list2-list1=(2) 5 6 + ``` + """ + + def _to_string(obj: Iterable) -> str: + obj = sorted(list(obj)) + if sep_char == "\n": + txt = indent("\n" + sep_char.join(map(str, obj))) + else: + txt = sep_char.join(map(str, obj)) + return txt + + res: List[str] = [] + # obj1. + obj1 = set(obj1) + hdbg.dassert_lte(1, len(obj1)) + res.append(f"* {obj1_name}: ({len(obj1)}) {_to_string(obj1)}") + if add_space: + res.append("") + # obj2. + obj2 = set(obj2) + hdbg.dassert_lte(1, len(obj2)) + res.append(f"* {obj2_name}: ({len(obj2)}) {_to_string(obj2)}") + if add_space: + res.append("") + # obj1 intersect obj2. + intersection = obj1.intersection(obj2) + res.append(f"* intersect=({len(intersection)}) {_to_string(intersection)}") + if add_space: + res.append("") + # obj1 - obj2. + diff = obj1 - obj2 + res.append(f"* {obj1_name}-{obj2_name}=({len(diff)}) {_to_string(diff)}") + if add_space: + res.append("") + # obj2 - obj1. + diff = obj2 - obj1 + res.append(f"* {obj2_name}-{obj1_name}=({len(diff)}) {_to_string(diff)}") + if add_space: + res.append("") + # Join all result lines. + result = "\n".join(res) + return result + + +# ############################################################################# + + +def remove_non_printable_chars(txt: str) -> str: + # From https://stackoverflow.com/questions/14693701 + # 7-bit and 8-bit C1 ANSI sequences + ansi_escape = re.compile( + r""" + \x1B # ESC + (?: # 7-bit C1 Fe (except CSI) + [@-Z\\-_] + | # or [ for CSI, followed by a control sequence + \[ + [0-?]* # Parameter bytes + [ -/]* # Intermediate bytes + [@-~] # Final byte + ) + """, + re.VERBOSE, + ) + txt = ansi_escape.sub("", txt) + return txt + + +# TODO(gp): Maybe move to helpers/hpython.py since it's not about printing. +def sort_dictionary(dict_: Dict) -> Dict: + """ + Sort a dictionary recursively using nested OrderedDict. + """ + import collections + + res = collections.OrderedDict() + for k, v in sorted(dict_.items()): + if isinstance(v, dict): + res[k] = sort_dictionary(v) + else: + res[k] = v + return res + + +def to_pretty_str(obj: Any) -> str: + if isinstance(obj, dict): + res = pprint.pformat(obj) + # import json + # res = json.dumps(obj, indent=4, sort_keys=True) + else: + res = str(obj) + return res + + +# TODO(gp): GSI -> rename remove_lines()? +def filter_text(regex: str, txt: str) -> str: + """ + Remove lines in `txt` that match the regex `regex`. + """ + _LOG.debug("Filtering with '%s'", regex) + if regex is None: + return txt + txt_out = [] + txt_as_arr = txt.split("\n") + for line_ in txt_as_arr: + if re.search(regex, line_): + _LOG.debug("Skipping line='%s'", line_) + continue + txt_out.append(line_) + # We can only remove lines. + hdbg.dassert_lte( + len(txt_out), + len(txt_as_arr), + "txt_out=\n'''%s'''\ntxt=\n'''%s'''", + "\n".join(txt_out), + "\n".join(txt_as_arr), + ) + txt = "\n".join(txt_out) + return txt + + +def dassert_one_trailing_newline(txt: str) -> None: + match = re.search(r"\n*$", txt) + hdbg.dassert(match) + assert match is not None + num_newlines = len(match.group()) + hdbg.dassert_eq( + num_newlines, 0, "num_newlines='%s' txt='%s'", num_newlines, txt + ) + + +def to_info(tag: str, txt: Union[str, List[str]]) -> str: + """ + Return a string with a tag and the text indented. + + :param tag: the tag to add to the text + :param txt: the text to indent + :return: the string with the tag and the text indented + """ + hdbg.dassert_isinstance(tag, str) + hdbg.dassert_isinstance(txt, (str, list)) + txt_tmp = "" + txt_tmp += "# " + tag + "\n" + # Indent the text. + if not isinstance(txt, str): + for t in txt: + hdbg.dassert_isinstance(t, str) + txt = "\n".join(txt) + txt_tmp += indent(txt) + # Ensure that there is a single trailing newline. + txt_tmp = txt_tmp.rstrip("\n") + # txt_tmp += "\n" + # _dassert_one_trailing_newline(txt_tmp) + _LOG.debug("'%s'", txt_tmp) + return txt_tmp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py new file mode 100644 index 000000000..c9cdd7be4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py @@ -0,0 +1,266 @@ +""" +Import as: + +import helpers.hpytest as hpytest +""" + +import logging +import os +import shutil +import sys +from typing import List, Optional + +import junitparser + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def _pytest_show_artifacts( + dir_name: str, *, tag: Optional[str] = None +) -> List[str]: + hdbg.dassert_ne(dir_name, "") + hdbg.dassert_dir_exists(dir_name) + cd_cmd = f"cd {dir_name} && " + # There might be no pytest artifacts. + abort_on_error = False + file_names: List[str] = [] + # Find pytest artifacts. + cmd = 'find . -name ".pytest_cache" -type d' + _, output_tmp = hsystem.system_to_string( + cd_cmd + cmd, abort_on_error=abort_on_error + ) + file_names.extend(output_tmp.split()) + # + cmd = 'find . -name "__pycache__" -type d' + _, output_tmp = hsystem.system_to_string( + cd_cmd + cmd, abort_on_error=abort_on_error + ) + file_names.extend(output_tmp.split()) + # Find .pyc artifacts. + cmd = 'find . -name "*.pyc" -type f' + _, output_tmp = hsystem.system_to_string( + cd_cmd + cmd, abort_on_error=abort_on_error + ) + file_names.extend(output_tmp.split()) + # Remove empty lines. + file_names = hprint.remove_empty_lines(file_names) + # + if tag is not None: + num_files = len(file_names) + _LOG.info("%s: %d", tag, num_files) + _LOG.debug("\n%s", hprint.indent("\n".join(file_names))) + return file_names # type: ignore + + +def pytest_clean(dir_name: str, preview: bool = False) -> None: + """ + Clean pytest artifacts. + """ + _LOG.warning("Cleaning pytest artifacts") + hdbg.dassert_ne(dir_name, "") + hdbg.dassert_dir_exists(dir_name) + if preview: + _LOG.warning("Preview only: nothing will be deleted") + # Show before cleaning. + file_names = _pytest_show_artifacts(dir_name, tag="Before cleaning") + # Clean. + for f in file_names: + exists = os.path.exists(f) + _LOG.debug("%s -> exists=%s", f, exists) + if exists: + if not preview: + if os.path.isdir(f): + shutil.rmtree(f) + elif os.path.isfile(f): + os.remove(f) + else: + raise ValueError(f"Can't delete {f}") + else: + _LOG.debug("rm %s", f) + # Show after cleaning. + file_names = _pytest_show_artifacts(dir_name, tag="After cleaning") + hdbg.dassert_eq(len(file_names), 0) + + +# ############################################################################# +# JUnitReporter +# ############################################################################# + + +class JUnitReporter: + def __init__(self, xml_file: str): + self.xml_file = xml_file + self.xml_data = None + self.overall_stats = { + "passed": 0, + "failed": 0, + "error": 0, + "skipped": 0, + "total_time": 0.0, + "total_tests": 0, + } + + def _load(self) -> None: + """ + Load the JUnit XML file. + """ + self.xml_data = junitparser.JUnitXml.fromfile(self.xml_file) + + def parse(self): + """ + Parse the JUnit XML file. + """ + try: + self._load() + # Calculate overall statistics. + for suite in self.xml_data: + if isinstance(suite, junitparser.TestSuite): + self.overall_stats["total_time"] += suite.time or 0 + self.overall_stats["total_tests"] += suite.tests or 0 + self.overall_stats["passed"] += ( + (suite.tests or 0) + - (suite.failures or 0) + - (suite.errors or 0) + - (suite.skipped or 0) + ) + self.overall_stats["failed"] += suite.failures or 0 + self.overall_stats["error"] += suite.errors or 0 + self.overall_stats["skipped"] += suite.skipped or 0 + except Exception as e: + print(hprint.color_highlight(f"Error parsing XML file: {e}", "red")) + sys.exit(1) + + def _get_colored_status(self, case: junitparser.TestCase) -> str: + """ + Get the colored status representation of test case. + """ + if not case.result or len(case.result) == 0: + return hprint.color_highlight("PASSED", "green") + result_type = case.result[0].__class__.__name__ + if result_type == "Failure": + return hprint.color_highlight("FAILED", "red") + elif result_type == "Error": + return hprint.color_highlight("ERROR", "red") + elif result_type == "Skipped": + return hprint.color_highlight("SKIPPED", "yellow") + else: + return hprint.color_highlight("PASSED", "green") + + def _print_detailed_results(self): + print(hprint.color_highlight("=" * 70, "bold")) + print( + hprint.color_highlight( + f"collected {self.overall_stats['total_tests']} items", "bold" + ) + ) + for _, suite in enumerate(self.xml_data): + if not isinstance(suite, junitparser.TestSuite): + continue + # Print suite header. + print(f"\n{hprint.color_highlight('=' * 70, 'blue')}") + print(hprint.color_highlight(f"Test: {suite.name}", "bold")) + print( + hprint.color_highlight( + f"Timestamp: {getattr(suite, 'timestamp', 'Unknown')}", + "bold", + ) + ) + print(hprint.color_highlight("-" * 70, "blue")) + # Print each test case. + for case in suite: + if isinstance(case, junitparser.TestCase): + status_display = self._get_colored_status(case) + test_time = getattr(case, "time", 0) or 0 + print( + f" {case.classname}::{case.name} {status_display} ({test_time:.3f}s)" + ) + # Print suite summary. + suite_passed = ( + (suite.tests or 0) + - (suite.failures or 0) + - (suite.errors or 0) + - (suite.skipped or 0) + ) + summary_parts = [] + if suite_passed > 0: + summary_parts.append( + hprint.color_highlight(f"{suite_passed} passed", "green") + ) + if suite.failures and suite.failures > 0: + summary_parts.append( + hprint.color_highlight(f"{suite.failures} failed", "red") + ) + if suite.errors and suite.errors > 0: + summary_parts.append( + hprint.color_highlight(f"{suite.errors} error", "red") + ) + if suite.skipped and suite.skipped > 0: + summary_parts.append( + hprint.color_highlight(f"{suite.skipped} skipped", "WARNING") + ) + suite_summary = ( + ", ".join(summary_parts) if summary_parts else "no tests" + ) + suite_time = getattr(suite, "time", 0) or 0 + print( + hprint.color_highlight( + f"Summary: {suite_summary} in {suite_time:.3f}s", "INFO" + ) + ) + + def _print_final_summary(self): + summary_parts = [] + if self.overall_stats["passed"] > 0: + summary_parts.append( + hprint.color_highlight( + f"{self.overall_stats['passed']} passed", "green" + ) + ) + if self.overall_stats["failed"] > 0: + summary_parts.append( + hprint.color_highlight( + f"{self.overall_stats['failed']} failed", "red" + ) + ) + if self.overall_stats["error"] > 0: + summary_parts.append( + hprint.color_highlight( + f"{self.overall_stats['error']} error", "red" + ) + ) + if self.overall_stats["skipped"] > 0: + summary_parts.append( + hprint.color_highlight( + f"{self.overall_stats['skipped']} skipped", "yellow" + ) + ) + summary_text = ", ".join(summary_parts) if summary_parts else "no tests" + time_text = "in " + hprint.color_highlight( + f"{self.overall_stats['total_time']:.2f}s", "bold" + ) + # Determine overall status + if self.overall_stats["failed"] > 0 or self.overall_stats["error"] > 0: + status_indicator = hprint.color_highlight("FAILED", "red") + elif ( + self.overall_stats["skipped"] > 0 + and self.overall_stats["passed"] == 0 + ): + status_indicator = hprint.color_highlight("SKIPPED", "yellow") + else: + status_indicator = hprint.color_highlight("PASSED", "green") + # Print summary. + print(f"\n{hprint.color_highlight('=' * 70, 'bold')}") + print( + hprint.color_highlight( + f"Summary: {summary_text} {time_text}", "INFO" + ) + ) + print(hprint.color_highlight(f"Result: {status_indicator}", "INFO")) + + def print_summary(self): + self._print_detailed_results() + self._print_final_summary() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py new file mode 100644 index 000000000..2ee2166f9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py @@ -0,0 +1,94 @@ +""" +Import as: + +import helpers.hretry as hretry +""" + +import asyncio +import functools +import logging +import time +from typing import Any, Tuple + +_LOG = logging.getLogger(__name__) + + +def sync_retry( + num_attempts: int, exceptions: Tuple[Any], retry_delay_in_sec: int = 0 +) -> object: + """ + Decorator retrying the wrapped function/method num_attempts times if the + `exceptions` listed in exceptions are thrown. + + :param num_attempts: the number of times to repeat the wrapped function/method + - The function will be called `num_attempts` times. + :param exceptions: list of exceptions that trigger a retry attempt + :param retry_delay_in_sec: the number of seconds to wait between retry attempts + :return: the result of the wrapped function/method + """ + + def decorator(func) -> object: + @functools.wraps(func) + def retry_wrapper(*args, **kwargs): + attempts_count = 1 + last_exception = None + while attempts_count < num_attempts + 1: + try: + return func(*args, **kwargs) + except exceptions as e: + last_exception = e + _LOG.warning( + "Exception %s thrown when attempting to run %s, attempt " + "%d of %d", + e, + func, + attempts_count, + num_attempts, + ) + attempts_count += 1 + time.sleep(retry_delay_in_sec) + _LOG.error( + "Function %s failed after %d attempts", func, num_attempts + ) + raise last_exception + + return retry_wrapper + + return decorator + + +def async_retry( + num_attempts: int, exceptions: Tuple[Any], retry_delay_in_sec: int = 0 +) -> object: + """ + Same as `sync_retry` decorator but for `async` functions. + """ + + def decorator(func) -> object: + @functools.wraps(func) + async def retry_wrapper(*args, **kwargs): + attempts_count = 1 + last_exception = None + while attempts_count < num_attempts + 1: + try: + return await func(*args, **kwargs) + except exceptions as e: + last_exception = e + _LOG.warning( + "Exception %s thrown when attempting to run %s, attempt " + "%d of %d", + e, + func, + attempts_count, + num_attempts, + ) + attempts_count += 1 + await asyncio.sleep(retry_delay_in_sec) + _LOG.error( + "Function %s failed after %d attempts", func, num_attempts + ) + raise last_exception + + return retry_wrapper + + return decorator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py new file mode 100644 index 000000000..a28914cb7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py @@ -0,0 +1,1129 @@ +""" +Import as: + +import helpers.hs3 as hs3 +""" + +import argparse +import configparser +import copy +import functools +import gzip +import logging +import os +import pathlib +import re +from typing import Any, Dict, List, Optional, Tuple, Union + +_WARNING = "\033[33mWARNING\033[0m" + +try: + import s3fs + + # Handle different versions of s3fs where core module may be at different locations + if hasattr(s3fs, "core"): + from s3fs.core import S3File, S3FileSystem + else: + # In newer versions, classes might be directly in s3fs module + try: + from s3fs import S3File, S3FileSystem + except ImportError: + # Fallback to dynamic import + S3File = getattr(s3fs, "S3File", None) + S3FileSystem = getattr(s3fs, "S3FileSystem", None) +except ModuleNotFoundError: + _module = "s3fs" + print(_WARNING + f": Can't find {_module}: continuing") + # Define dummy classes for type hints when s3fs is not available + s3fs = None + + class S3File: + pass + + class S3FileSystem: + pass + + +# Avoid the following dependency from other `helpers` modules to prevent import cycles. +# import helpers.hpandas as hpandas +# import helpers.hsql as hsql +# import helpers.hunit_test as hunitest + +# To enforce this order of the imports we use the directive for the linter below. +import helpers.hdbg as hdbg # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hintrospection as hintros # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hio as hio # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hprint as hprint # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hserver as hserver # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hsystem as hsystem # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.htimer as htimer # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position + +_LOG = logging.getLogger(__name__) + +# AWS Region global constants +# Moved to hs3.py from haws.py due to cyclic imports detected in +# build https://github.com/cryptokaizen/cmamp/actions/runs/10729983412/job/29757600889 +AWS_EUROPE_REGION_1 = "eu-north-1" +AWS_TOKYO_REGION_1 = "ap-northeast-1" +AWS_US_REGION_1 = "us-east-1" +AWS_REGIONS = [AWS_EUROPE_REGION_1, AWS_TOKYO_REGION_1, AWS_US_REGION_1] + +# TODO(gp): @all separate S3 code in `helpers/hs3.py` from authentication and +# AWS profile code in `helpers/aws_authentication.py`. + +# ############################################################################# +# Basic utils. +# ############################################################################# + +AwsProfile = Optional[Union[str, S3FileSystem]] + + +def is_s3_path(s3_path: str) -> bool: + """ + Return whether a path is on an S3 bucket, i.e., if it starts with `s3://`. + """ + hdbg.dassert_isinstance(s3_path, str) + valid = s3_path.startswith("s3://") + if s3_path.startswith("s3://s3://"): + valid = False + return valid + + +def dassert_is_s3_path(s3_path: str) -> None: + """ + Assert if a file is not a S3 path. + """ + hdbg.dassert( + is_s3_path(s3_path), + "Invalid S3 file='%s'", + s3_path, + ) + + +def dassert_is_not_s3_path(s3_path: str) -> None: + """ + Assert if a file is a S3 path. + """ + hdbg.dassert( + not is_s3_path(s3_path), + "Passed an S3 file='%s' when it was not expected", + s3_path, + ) + + +def dassert_is_valid_aws_profile(path: str, aws_profile: AwsProfile) -> None: + """ + Check that the value of `aws_profile` is compatible with the S3 or local + file `path`. + + :param path: S3 or local path + :param aws_profile: AWS profile to use if and only if using an S3 path, + otherwise `None` for local path + """ + if is_s3_path(path): + hdbg.dassert_is_not( + aws_profile, None, "path=%s aws_profile=%s", path, aws_profile + ) + else: + hdbg.dassert_is( + aws_profile, None, "path=%s aws_profile=%s", path, aws_profile + ) + + +# /////////////////////////////////////////////////////////////////////////////// + + +def get_s3fs(aws_profile: AwsProfile) -> S3FileSystem: + """ + Return a `s3fs` object from a given AWS profile. + + :param aws_profile: the name of an AWS profile or a s3fs filesystem + """ + if hserver.is_ig_prod(): + # On IG prod machines we let the Docker container infer the right AWS + # account. + _LOG.warning("Not using AWS profile='%s'", aws_profile) + s3fs_ = S3FileSystem() + else: + if isinstance(aws_profile, str): + # When deploying jobs via ECS the container obtains credentials + # based on passed task role specified in the ECS task-definition, + # refer to: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html + if ( + # TODO(heanh): Centralize the list of supported profiles. + aws_profile in ["ck", "csfy"] + and hserver.is_inside_ecs_container() + ): + _LOG.info("Fetching credentials from task IAM role") + s3fs_ = S3FileSystem() + else: + # TODO(heanh): Make this manual extraction of credentials + # code obsoleted. + # From https://stackoverflow.com/questions/62562945 + # aws_credentials = get_aws_credentials(aws_profile) + # _LOG.debug("%s", pprint.pformat(aws_credentials)) + # s3fs_ = S3FileSystem( + # anon=False, + # key=aws_credentials["aws_access_key_id"], + # secret=aws_credentials["aws_secret_access_key"], + # token=aws_credentials["aws_session_token"], + # client_kwargs={"region_name": aws_credentials["aws_region"]}, + # ) + # + # We do not need to extract the credential from the file because + # the config (`~/.aws/config`) and credential + # (`~/.aws/credentials`) are already set. + s3fs_ = S3FileSystem(anon=False, profile=aws_profile) + elif isinstance(aws_profile, S3FileSystem): + s3fs_ = aws_profile + else: + raise ValueError(f"Invalid aws_profile='{aws_profile}'") + return s3fs_ + + +def dassert_path_exists( + path: str, aws_profile: Optional[AwsProfile] = None +) -> None: + """ + Assert if S3 or local path doesn't exist. `aws_profile` is specified if and + only if path is an S3 path. + + :param path: S3 or local path + :param aws_profile: the name of an AWS profile or a s3fs filesystem + """ + dassert_is_valid_aws_profile(path, aws_profile) + if is_s3_path(path): + s3fs_ = get_s3fs(aws_profile) + hdbg.dassert(s3fs_.exists(path), f"S3 path '{path}' doesn't exist!") + else: + hdbg.dassert_path_exists(path) + + +def dassert_path_not_exists( + path: str, aws_profile: Optional[AwsProfile] = None +) -> None: + """ + Assert if S3 or local path exist. `aws_profile` is specified if and only if + path is an S3 path. + + :param path: S3 or local path + :param aws_profile: the name of an AWS profile or a s3fs filesystem + """ + dassert_is_valid_aws_profile(path, aws_profile) + if is_s3_path(path): + s3fs_ = get_s3fs(aws_profile) + hdbg.dassert(not s3fs_.exists(path), f"S3 path '{path}' already exist!") + else: + hdbg.dassert_path_not_exists(path) + + +# TODO(gp): Consider using `s3fs.split_path`. +def split_path(s3_path: str) -> Tuple[str, str]: + """ + Separate an S3 path in the bucket and the rest of the path as absolute from + the root. + + E.g., for `s3://alphamatic-data/tmp/hello` returns (`alphamatic- + data`, /tmp/hello`) + """ + dassert_is_s3_path(s3_path) + # Remove the s3 prefix. + prefix = "s3://" + hdbg.dassert(s3_path.startswith(prefix)) + s3_path = s3_path[len(prefix) :] + # Break the path into dirs. + dirs = s3_path.split("/") + bucket = dirs[0] + abs_path = os.path.join("/", *dirs[1:]) + hdbg.dassert( + abs_path.startswith("/"), + "The path should be absolute instead of %s", + abs_path, + ) + return bucket, abs_path + + +def _replace_star_with_double_star(pattern_to_modify: str) -> str: + """ + Replace a single star with a double star in a pattern. + + Originally we simply used to do `pattern.replace("*", "**")`. + but in the newer versions of `s3fs` this is not allowed: + `ValueError: Invalid pattern: '**' can + only be an entire path component` + + We also need to take care of special such as: + *.csv* -> **/*.csv* + + Examples: + s3://bucket/*/path/* -> s3://bucket/**/*/path/**/* + s3://bucket/*/path/csv* -> s3://bucket/**/*/path/csv* + + :param pattern_to_modify: pattern to replace wildcards in + :return: pattern with wildcards replaced + """ + append_wildcard = False + # Handle the special case of ending with wildcard + # (e.g.: *.csv*). + if re.match(r"(?=.*[a-zA-Z0-9]).*\*$", pattern_to_modify): + pattern_to_modify = pattern_to_modify[:-1] + append_wildcard = True + new_pattern = pattern_to_modify.replace("*", "**/*") + new_pattern = new_pattern + "*" if append_wildcard else new_pattern + return new_pattern + + +def listdir( + dir_name: str, + pattern: str, + only_files: bool, + use_relative_paths: bool, + *, + exclude_git_dirs: bool = True, + aws_profile: Optional[AwsProfile] = None, + maxdepth: Optional[int] = None, +) -> List[str]: + """ + Counterpart to `hio.listdir` with S3 support. + + :param dir_name: S3 or local path + :param aws_profile: AWS profile to use if and only if using an S3 path, + otherwise `None` for local path + :param maxdepth: limit the depth of directory traversal + """ + dassert_is_valid_aws_profile(dir_name, aws_profile) + _LOG.debug("pattern=%s", pattern) + if is_s3_path(dir_name): + s3fs_ = get_s3fs(aws_profile) + dassert_path_exists(dir_name, s3fs_) + # Ensure that there are no multiple stars in pattern. + hdbg.dassert_not_in("**", pattern) + # `hio.listdir` is using `find` which looks for files and directories + # descending recursively in the directory. + # One star in glob will use `maxdepth=1`. + pattern = _replace_star_with_double_star(pattern) + _LOG.debug("pattern=%s", pattern) + # Detailed S3 objects in dict form with metadata. + path_objects = s3fs_.glob( + f"{dir_name}/{pattern}", detail=True, maxdepth=maxdepth + ) + if only_files: + # Original `path_objects` must not be changed during loop. + temp_path_objects = copy.deepcopy(list(path_objects.values())) + # Use metadata to distinguish files from directories without + # calling `s3fs_.isdir/isfile`. + for path_object in temp_path_objects: + if path_object["type"] != "file": + path_objects.pop(path_object["Key"]) + paths = list(path_objects.keys()) + if exclude_git_dirs: + paths = [ + path for path in paths if ".git" not in pathlib.Path(path).parts + ] + bucket, absolute_path = split_path(dir_name) + # Basically the goal is to remove `s3://` from the full S3 path. + root_path = f"{bucket}{absolute_path}" + # Remove redundant separators. + paths = {os.path.normpath(path) for path in paths} + # Remove special entries such as `.` (`root_path` in this case) and + # bucket name to keep the same return format as in `hio.listdir()`. + paths_to_exclude = [bucket, root_path] + paths = [path for path in paths if path not in paths_to_exclude] + if use_relative_paths: + paths = [os.path.relpath(path, start=root_path) for path in paths] + else: + paths = hio.listdir( + dir_name, + pattern, + only_files, + use_relative_paths, + exclude_git_dirs=exclude_git_dirs, + maxdepth=maxdepth, + ) + return paths + + +def du( + path: str, + *, + human_format: bool = False, + aws_profile: Optional[AwsProfile] = None, +) -> Union[int, str]: + """ + Counterpart to `hsystem.du` with S3 support. + + If and only if `aws_profile` is specified, S3 is used instead of + local filesystem. + """ + dassert_is_valid_aws_profile(path, aws_profile) + if is_s3_path(path): + s3fs_ = get_s3fs(aws_profile) + dassert_path_exists(path, s3fs_) + size: Union[int, str] = s3fs_.du(path) + if human_format: + size = hintros.format_size(size) + else: + size = hsystem.du(path, human_format=human_format) + return size + + +def to_file( + lines: str, + file_name: str, + *, + mode: Optional[str] = None, + force_flush: bool = False, + aws_profile: Optional[AwsProfile] = None, +) -> None: + """ + Counterpart to `hio.to_file` with S3 support. + + If and only if `aws_profile` is specified, S3 is used instead of + local filesystem. + """ + dassert_is_valid_aws_profile(file_name, aws_profile) + if is_s3_path(file_name): + # Ensure that `bytes` is used. + if mode is not None and "b" not in mode: + raise ValueError("S3 only allows binary mode!") + hdbg.dassert_isinstance(lines, str) + # Convert lines to bytes, only supported mode for S3. + # Also create a list of new lines as raw bytes is not supported. + os_sep = os.linesep + lines_lst = [f"{line}{os_sep}".encode() for line in lines.split(os_sep)] + # Inspect file name and path. + hio.dassert_is_valid_file_name(file_name) + s3fs_ = get_s3fs(aws_profile) + mode = "wb" if mode is None else mode + # Open S3 file. `rb` is the default mode for S3. + with s3fs_.open(file_name, mode) as s3_file: + if file_name.endswith((".gz", ".gzip")): + # Open and decompress gzipped file. + with gzip.GzipFile(fileobj=s3_file) as gzip_file: + gzip_file.writelines(lines_lst) + else: + # Any other file. + s3_file.writelines(lines_lst) + if force_flush: + # TODO(Nikola): Investigate S3 alternative for `os.fsync(f.fileno())`. + s3_file.flush() + else: + use_gzip = file_name.endswith((".gz", ".gzip")) + hio.to_file( + file_name, + lines, + mode=mode, + use_gzip=use_gzip, + force_flush=force_flush, + ) + + +def from_file( + file_name: str, + encoding: Optional[Any] = None, + aws_profile: Optional[AwsProfile] = None, +) -> str: + """ + Counterpart to `hio.from_file` with S3 support. + + If and only if `aws_profile` is specified, S3 is used instead of + local filesystem. + """ + dassert_is_valid_aws_profile(file_name, aws_profile) + if is_s3_path(file_name): + if encoding: + raise ValueError("Encoding is not supported when reading from S3!") + # Inspect file name and path. + hio.dassert_is_valid_file_name(file_name) + s3fs_ = get_s3fs(aws_profile) + dassert_path_exists(file_name, s3fs_) + # Open s3 file. + with s3fs_.open(file_name) as s3_file: + if file_name.endswith((".gz", ".gzip")): + # Open and decompress gzipped file. + with gzip.GzipFile(fileobj=s3_file) as gzip_file: + data = gzip_file.read().decode() + else: + # Any other file. + data = s3_file.read().decode() + else: + data = hio.from_file(file_name, encoding=encoding) + return data + + +# TODO(Nina): consider adding support for handling dirs. +# TODO(Grisha): consider extending for the regular file system. +def copy_file_to_s3( + file_path: str, + s3_dst_file_path: str, + aws_profile: str, +) -> None: + """ + Copy a local file to S3. + + :param file_path: path to a file to copy + :param s3_dst_file_path: S3 path to copy to + :param aws_profile: aws profile + """ + hdbg.dassert_file_exists(file_path) + dassert_is_s3_path(s3_dst_file_path) + dassert_is_valid_aws_profile(s3_dst_file_path, aws_profile) + aws_s3_cp_cmd = f"aws s3 cp {file_path} {s3_dst_file_path}" + if not hserver.is_inside_ecs_container(): + # There is no `~/.aws/credentials` file inside an ECS container + # but the AWS credentials are received via a task role. So + # no need to pass the profile option. + aws_s3_cp_cmd += f" --profile {aws_profile}" + _LOG.info("Copying from %s to %s", file_path, s3_dst_file_path) + hsystem.system(aws_s3_cp_cmd, suppress_output=False) + + +def get_local_or_s3_stream( + file_name: str, **kwargs: Any +) -> Tuple[Union[S3FileSystem, str], Any]: + """ + Get S3 stream for desired file or simply returns file name. + + :param file_name: file name or full path to file + """ + _LOG.debug(hprint.to_str("file_name kwargs")) + # Handle the s3fs param, if needed. + if is_s3_path(file_name): + # For S3 files we need to have an `s3fs` parameter. + hdbg.dassert_in( + "s3fs", + kwargs, + "Credentials through s3fs are needed to access an S3 path", + ) + s3fs_ = kwargs.pop("s3fs") + hdbg.dassert_isinstance(s3fs_, S3FileSystem) + dassert_path_exists(file_name, s3fs_) + stream = s3fs_.open(file_name) + else: + if "s3fs" in kwargs: + _LOG.warning("Passed `s3fs` without an S3 file: ignoring it") + _ = kwargs.pop("s3fs") + hdbg.dassert_file_exists(file_name) + stream = file_name + return stream, kwargs + + +# ############################################################################# +# AWS. +# ############################################################################# + + +def _get_aws_config(file_name: str) -> configparser.RawConfigParser: + """ + Return a parser to the config in `~/.aws/{file_name}`. + """ + file_name = os.path.join(os.path.expanduser("~"), ".aws", file_name) + hdbg.dassert_file_exists(file_name) + # Read the config. + config = configparser.RawConfigParser() + config.read(file_name) + _LOG.debug("config.sections=%s", config.sections()) + return config + + +# ############################################################################# +# Authentication. +# ############################################################################# + +# Architecture of the AWS authentication +# +# - There can be two or more AWS S3 systems with different credentials, paths to +# bucket, and other properties +# - Some code needs to refer always and only to a specific S3 bucket +# - E.g., AM S3 bucket for Kibot data +# - Other code needs to work with different AWS S3 systems +# - E.g., `publish_notebooks`, saving / retrieving experiments, caching +# +# - The desired AWS S3 systems are selected through an `aws_profile` parameter +# (e.g., `ck`) +# - The value of AWS profile is obtained from +# - the `--aws_profile` command line option; or +# - a client specifying the needed `aws_profile` +# +# - The AWS profile is then used to access the `~/.aws` files and extract: +# - the credentials (e.g., `aws_access_key_id`, `aws_secret_access_key`, +# `aws_region`) +# - other variables (e.g., `aws_s3_bucket`) +# - The variables that are extracted from the files are passed through env vars +# directly for GitHub Actions CI +# - One can specify env vars conditioned to different profiles using the AWS +# profile +# - E.g., `ck` profile for `AWS_ACCESS_KEY_ID` corresponds to +# `CSFY_AWS_ACCESS_KEY_ID` + + +@functools.lru_cache() +def get_aws_credentials( + aws_profile: str, +) -> Dict[str, Optional[str]]: + """ + Read the AWS credentials for a given profile from `~/.aws` or from env + vars. + + :return: a dictionary with `access_key_id`, `aws_secret_access_key`, + `aws_region` and optionally `aws_session_token` + """ + _LOG.debug("Getting credentials for aws_profile='%s'", aws_profile) + if aws_profile == "__mock__": + # `mock` profile is artificial construct used only in tests. + aws_profile = aws_profile.strip("__") + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + # profile_prefix = aws_profile.upper() + profile_prefix = ( + "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() + ) + result: Dict[str, Optional[str]] = {} + if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: + key_to_env_var: Dict[str, str] = { + "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + "aws_session_token": f"{profile_prefix}_AWS_SESSION_TOKEN", + # TODO(gp): AWS_DEFAULT_REGION -> AWS_REGION so we can use the invariant + # that the var is simply the capitalized version of the key. + "aws_region": f"{profile_prefix}_AWS_DEFAULT_REGION", + } + else: + key_to_env_var: Dict[str, str] = { + "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + # TODO(gp): AWS_DEFAULT_REGION -> AWS_REGION so we can use the invariant + # that the var is simply the capitalized version of the key. + "aws_region": f"{profile_prefix}_AWS_DEFAULT_REGION", + } + # If all the AWS credentials are passed through env vars, they override the + # config file. + env_var_override = False + set_env_vars = [ + (env_var in os.environ and os.environ[env_var] != "") + for env_var in sorted(key_to_env_var.values()) + ] + if any(set_env_vars): + if not all(set_env_vars): + _LOG.warning( + "Some but not all AWS env vars are set (%s): ignoring", + str(set_env_vars), + ) + else: + env_var_override = True + if env_var_override: + _LOG.debug("Using AWS credentials from env vars") + # If one variable is defined all should be defined. + for key, env_var in key_to_env_var.items(): + _LOG.debug("'%s' in env vars=%s", env_var, env_var in os.environ) + _LOG.debug( + "'%s' != ''=%s", env_var, os.environ.get(env_var, None) != "" + ) + hdbg.dassert_in(env_var, os.environ) + result[key] = os.environ[env_var] + if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: + result["aws_session_token"] = os.environ[ + f"{profile_prefix}_AWS_SESSION_TOKEN" + ] + else: + result["aws_session_token"] = None + else: + _LOG.debug("Using AWS credentials from files") + # > more ~/.aws/credentials + # [am] + # aws_access_key_id=AKI... + # aws_secret_access_key=mhg.. + # aws_session_token = Fwo... + file_name = "credentials" + config = _get_aws_config(file_name) + # + key = "aws_access_key_id" + result[key] = config.get(aws_profile, key) + # + key = "aws_secret_access_key" + result[key] = config.get(aws_profile, key) + # + key = "aws_session_token" + if config.has_option(aws_profile, key): + result[key] = config.get(aws_profile, key) + else: + result[key] = None + # + key = "aws_s3_bucket" + if config.has_option(aws_profile, key): + result[key] = config.get(aws_profile, key) + else: + result[key] = None + # > more ~/.aws/config + # [am] + # region = us-east-1 + file_name = "config" + config = _get_aws_config(file_name) + key = "aws_region" + # For ~/.aws/config the tag is `profile am` instead of `am`. + result[key] = config.get(f"profile {aws_profile}", "region") + # + hdbg.dassert_is_subset(key_to_env_var.keys(), result.keys()) + return result + + +# ############################################################################# +# Bucket +# ############################################################################# + + +# TODO(Nikola): CmTask #1810 "Increase test coverage in helpers/hs3.py" +def get_s3_bucket_path(aws_profile: str, add_s3_prefix: bool = True) -> str: + """ + Return the S3 bucket from environment variable corresponding to a given + `aws_profile`. + + E.g., `aws_profile="am"` uses the value in `AM_AWS_S3_BUCKET` which + is usually set to `s3://alphamatic-data`. + """ + hdbg.dassert_type_is(aws_profile, str) + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + prefix = aws_profile.upper() + prefix = ( + "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() + ) + env_var = f"{prefix}_AWS_S3_BUCKET" + if env_var in os.environ: + _LOG.debug("No env var '%s'", env_var) + s3_bucket = os.environ[env_var] + else: + # Fall-back to local credentials. + _LOG.debug("Checking credentials") + aws_credentials = get_aws_credentials(aws_profile) + _LOG.debug("%s", aws_credentials) + s3_bucket = aws_credentials.get("aws_s3_bucket", "") + hdbg.dassert_ne(s3_bucket, "") + hdbg.dassert( + not s3_bucket.startswith("s3://"), + "Invalid %s value '%s'", + env_var, + s3_bucket, + ) + if add_s3_prefix: + s3_bucket = "s3://" + s3_bucket + return s3_bucket + + +# TODO(sonaal): Do we really need aws profile as argument or +# we can use default? Ref. https://github.com/cryptokaizen/cmamp/pull/6045#discussion_r1380392748 +def get_s3_bucket_path_unit_test( + aws_profile: str, *, add_s3_prefix: bool = True +) -> str: + if aws_profile == "ck": + s3_bucket = "cryptokaizen-unit-test" + else: + hdbg.dfatal(f"Invalid aws_profile={aws_profile}") + if add_s3_prefix: + s3_bucket = "s3://" + s3_bucket + return s3_bucket + + +def get_latest_pq_in_s3_dir(s3_path: str, aws_profile: str) -> str: + """ + Get the latest Parquet file in the specified directory. + + :param s3_path: the path to s3 directory, e.g. + `cryptokaizen-data/reorg/daily_staged.airflow.pq/bid_ask/crypto_chassis.downloaded_1sec/binance` + :param aws_profile: AWS profile to use + :return: the path to the latest Parquet file in the directory, + E.g. `cryptokaizen-data/reorg/daily_staged.airflow.pq/bid_ask/crypto_chassis.downloaded_1sec/binance/ + currency_pair=ETH_USDT/year=2022/month=12/data.parquet` + """ + hdbg.dassert_type_is(aws_profile, str) + s3fs_ = get_s3fs(aws_profile) + dir_name = f"{s3_path}/**/*.parquet" + pq_files = s3fs_.glob(dir_name, detail=True) + hdbg.dassert_lte(1, len(pq_files), "dir_name=%s", dir_name) + _LOG.debug("pq_files=%s", pq_files) + # Sort the files by the date they were modified for the last time. + sorted_files = sorted( + pq_files.items(), key=lambda t: t[1]["LastModified"], reverse=True + ) + # Get the path to the latest file. + latest_file_path = sorted_files[0][0] + return latest_file_path + + +# ############################################################################# +# Parser. +# ############################################################################# + + +def add_s3_args(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: + """ + Add the command line options for the AWS credentials. + """ + parser.add_argument( + "--aws_profile", + action="store", + type=str, + help="The AWS profile to use for `.aws/credentials` or for env vars", + ) + parser.add_argument( + "--s3_path", + action="store", + type=str, + default=None, + help="Full S3 dir path to use (e.g., `s3://alphamatic-data/foobar/`), " + "overriding any other setting", + ) + return parser + + +def _dassert_all_env_vars_set(key_to_env_var: Dict[str, str]) -> None: + """ + Check that the required AWS env vars are set and are not empty strings. + """ + for v in key_to_env_var.values(): + hdbg.dassert_in(v, os.environ) + hdbg.dassert_ne(v, "") + + +def _get_aws_file_text(key_to_env_var: Dict[str, str]) -> List[str]: + """ + Generate text from env vars for AWS files. + + E.g.: + ``` + aws_access_key_id=*** # gitleaks:allow + aws_secret_access_key=*** # gitleaks:allow + aws_s3_bucket=*** + ``` + :param key_to_env_var: aws settings names to the corresponding env + var names mapping + :return: AWS file text + """ + txt = [] + for k, v in key_to_env_var.items(): + line = f"{k}={os.environ[v]}" + txt.append(line) + return txt + + +def _get_aws_config_text(aws_profile: str) -> str: + """ + Generate text for the AWS config file, i.e. ".aws/config". + """ + # Set which env vars we need to get. + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + # profile_prefix = aws_profile.upper() + profile_prefix = ( + "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() + ) + region_env_var = f"{profile_prefix}_AWS_DEFAULT_REGION" + key_to_env_var = {"region": region_env_var} + # Check that env vars are set. + _dassert_all_env_vars_set(key_to_env_var) + text = _get_aws_file_text(key_to_env_var) + text.insert(0, f"[profile {aws_profile}]") + text = "\n".join(text) + return text + + +def _get_aws_credentials_text(aws_profile: str) -> str: + """ + Generate text for the AWS credentials file, i.e. ".aws/credentials". + """ + # Set which env vars we need to get. + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + # profile_prefix = aws_profile.upper() + profile_prefix = ( + "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() + ) + # Check if AWS session token is set in environment variable. + if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: + key_to_env_var = { + "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + "aws_session_token": f"{profile_prefix}_AWS_SESSION_TOKEN", + # TODO(heanh): Is this needed? + "aws_s3_bucket": f"{profile_prefix}_AWS_S3_BUCKET", + } + else: + key_to_env_var = { + "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + # TODO(heanh): Is this needed? + "aws_s3_bucket": f"{profile_prefix}_AWS_S3_BUCKET", + } + # Check that env vars are set. + _dassert_all_env_vars_set(key_to_env_var) + text = _get_aws_file_text(key_to_env_var) + text.insert(0, f"[{aws_profile}]") + text = "\n".join(text) + return text + + +def generate_aws_files( + home_dir: str = "~", + aws_profiles: Optional[List[str]] = None, +) -> None: + """ + Generate AWS configuration files. + + This is needed to use the AWS CLI and the `boto3` library when we are in CI. + """ + if home_dir == "~": + home_dir = os.path.expanduser(home_dir) + config_file_name = os.path.join(home_dir, ".aws", "config") + credentials_file_name = os.path.join(home_dir, ".aws", "credentials") + # Check if the files already exist. + if os.path.exists(credentials_file_name) and os.path.exists( + config_file_name + ): + _LOG.info( + "Both files exist: %s and %s; exiting", + credentials_file_name, + config_file_name, + ) + return + if aws_profiles is None: + aws_profiles = ["ck"] + config_file_text = [] + credentials_file_text = [] + # Get text with settings for both files. + for profile in aws_profiles: + current_config_text = _get_aws_config_text(profile) + config_file_text.append(current_config_text) + current_credentials_text = _get_aws_credentials_text(profile) + credentials_file_text.append(current_credentials_text) + # Create both files. + config_file_text = "\n\n".join(config_file_text) + hio.to_file(config_file_name, config_file_text) + _LOG.debug("Saved AWS config to %s", config_file_name) + + # + credentials_file_text = "\n\n".join(credentials_file_text) + hio.to_file(credentials_file_name, credentials_file_text) + _LOG.debug("Saved AWS credentials to %s", credentials_file_name) + + +# ############################################################################# +# Archive and retrieve data from S3. +# ############################################################################# + + +# TODO(gp): -> helpers/aws_utils.py + + +def archive_data_on_s3( + src_dir: str, s3_path: str, aws_profile: Optional[str], tag: str = "" +) -> str: + """ + Compress dir `src_dir` and save it on AWS S3 under `s3_path`. + + A timestamp and a tag is added to make the name more informative. + The tgz is created so that when expanded a dir with the name `src_dir` is + created. + + :param src_dir: directory that will be compressed + :param s3_path: full S3 path starting with `s3://` + :param aws_profile: the profile to use. We use a string and not an + `AwsProfile` since this is typically the outermost caller in the stack, + and it doesn't reuse an S3 fs object + :param tag: a tag to add to the name of the file + """ + _LOG.info( + "# Archiving '%s' to '%s' with aws_profile='%s'", + src_dir, + s3_path, + aws_profile, + ) + hdbg.dassert_dir_exists(src_dir) + dassert_is_s3_path(s3_path) + _LOG.info( + "The size of '%s' is %s", + src_dir, + hsystem.du(src_dir, human_format=True), + ) + # Add a timestamp if needed. + dst_path = hsystem.append_timestamp_tag(src_dir, tag) + ".tgz" + # Compress the dir. + # > (cd .../TestRunExperimentArchiveOnS3.test_serial1; \ + # tar cvzf /app/.../TestRunExperimentArchiveOnS3.test_serial1.tgz experiment.RH1E) + # experiment.RH1E/ + # experiment.RH1E/log.20210802-123758.txt + # experiment.RH1E/output_metadata.json + # ... + _LOG.debug("Destination path is '%s'", dst_path) + with htimer.TimedScope(logging.INFO, "Compressing"): + dir_name = os.path.dirname(src_dir) + base_name = os.path.basename(src_dir) + hdbg.dassert_ne(base_name, "", "src_dir=%s", src_dir) + cmd = "" + if dir_name != "": + cmd += f"cd {dir_name} && " + cmd += f"tar czf {dst_path} {base_name}" + hsystem.system(cmd) + _LOG.info( + "The size of '%s' is %s", + dst_path, + hsystem.du(dst_path, human_format=True), + ) + # Test expanding the tgz. The package should expand to the original dir. + # > tar tf /app/.../TestRunExperimentArchiveOnS3.test_serial1.tgz + # experiment.RH1E/ + # experiment.RH1E/log.20210802-123758.txt + # experiment.RH1E/output_metadata.json + _LOG.info("Testing archive") + cmd = f"tar tvf {dst_path}" + hsystem.system(cmd, log_level=logging.INFO, suppress_output=False) + # Copy to S3. + s3_file_path = os.path.join(s3_path, os.path.basename(dst_path)) + _LOG.info("Copying '%s' to '%s'", dst_path, s3_file_path) + hdbg.dassert_file_exists(dst_path) + s3fs_ = get_s3fs(aws_profile) + # TODO(gp): Make sure the S3 dir exists. + s3fs_.put(dst_path, s3_file_path) + _LOG.info("Data archived on S3 to '%s'", s3_file_path) + return s3_file_path + + +def copy_data_from_s3_to_local_dir( + src_s3_dir: str, dst_local_dir: str, aws_profile: str +) -> None: + """ + Copy data from S3 to a local dir. + + :param src_s3_dir: path on S3 storing the data to copy + :param scratch_space_path: local path on scratch space + :param aws_profile: AWS profile to use + """ + _LOG.debug( + "Copying input data from %s to %s", + src_s3_dir, + dst_local_dir, + ) + cmd = f"aws s3 sync {src_s3_dir} {dst_local_dir} --profile {aws_profile}" + hsystem.system(cmd, suppress_output=False, log_level="echo") + + +def retrieve_archived_data_from_s3( + s3_file_path: str, + dst_dir: str, + aws_profile: Optional[str] = None, + incremental: bool = True, +) -> str: + """ + Retrieve tgz file from S3, unless it's already present (incremental mode). + + :param s3_file_path: path to the S3 file with the archived data. E.g., + `s3://.../experiment.20210802-121908.tgz` + :param dst_dir: destination directory where to save the data + :param aws_profile: the profile to use. We use a string and not an + `AwsProfile` since this is typically the outermost caller in the stack, + and it doesn't reuse an S3 fs object + :param incremental: skip if the tgz file is already present locally + :return: path with the local tgz file + """ + _LOG.info( + "# Retrieving archive from '%s' to '%s' with aws_profile='%s'", + s3_file_path, + dst_dir, + aws_profile, + ) + dassert_is_s3_path(s3_file_path) + # Download the tgz file. + hio.create_dir(dst_dir, incremental=True) + dst_file = os.path.join(dst_dir, os.path.basename(s3_file_path)) + _LOG.debug(hprint.to_str("s3_file_path dst_dir dst_file")) + if incremental and os.path.exists(dst_file): + _LOG.warning("Found '%s': skipping downloading", dst_file) + else: + # Download. + s3fs_ = get_s3fs(aws_profile) + dassert_path_exists(s3_file_path, s3fs_) + _LOG.debug("Getting from s3: '%s' -> '%s", s3_file_path, dst_file) + s3fs_.get(s3_file_path, dst_file) + _LOG.info("Saved to '%s'", dst_file) + return dst_file + + +def expand_archived_data(src_tgz_file: str, dst_dir: str) -> str: + """ + Expand an S3 tarball storing results of an experiment. + + E.g., + - given a tgz file like `s3://.../experiment.20210802-121908.tgz` (which is the + result of compressing a dir like `/app/.../experiment.RH1E`) + - expand it into a dir `{dst_dir}/experiment.RH1E` + + :param src_tgz_file: path to the local file with the archived data. E.g., + `/.../experiment.20210802-121908.tgz` + :param dst_dir: directory where expand the archive tarball + :return: dir with the expanded data (e.g., `{dst_dir/experiment.RH1E`) + """ + _LOG.debug("Expanding '%s'", src_tgz_file) + # Get the name of the including dir, e.g., `experiment.RH1E`. + cmd = f"cd {dst_dir} && tar tzf {src_tgz_file} | head -1" + rc, enclosing_tgz_dir_name = hsystem.system_to_one_line(cmd) + _ = rc + _LOG.debug(hprint.to_str("enclosing_tgz_dir_name")) + tgz_dst_dir = os.path.join(dst_dir, enclosing_tgz_dir_name) + if os.path.exists(tgz_dst_dir): + hdbg.dassert_dir_exists(dst_dir) + _LOG.info( + "While expanding '%s' dst dir '%s' already exists: skipping", + src_tgz_file, + tgz_dst_dir, + ) + else: + # Expand the tgz file. + # The output should be the original compressed dir under `{dst_dir}`. + # E.g., + # > tar tzf /app/.../experiment.20210802-133901.tgz + # experiment.RH1E/ + # experiment.RH1E/log.20210802-133859.txt + # experiment.RH1E/result_0/ + with htimer.TimedScope(logging.INFO, "Decompressing"): + hdbg.dassert_file_exists(src_tgz_file) + cmd = f"cd {dst_dir} && tar xzf {src_tgz_file}" + hsystem.system(cmd) + hdbg.dassert_dir_exists(tgz_dst_dir) + # Return `{dst_dir}/experiment.RH1E`. + return tgz_dst_dir + + +def get_s3_bucket_from_stage( + stage: str, *, add_suffix: Optional[str] = None +) -> str: + """ + Retrieve the S3 bucket name based on the provided deployment stage. + + :param stage: the deployment stage, which can be 'test', 'preprod', + or 'prod'. + :param add_suffix: optional suffix to append to the bucket name. + :return: return corresponding S3 bucket name. + """ + # Mapping of stages to their respective S3 bucket names. + _S3_BUCKET_BY_STAGE = { + "test": "cryptokaizen-data-test", + "preprod": "cryptokaizen-data.preprod", + "prod": "cryptokaizen-data", + } + # TODO(Juraj): hack applied until a solution for #CmTask6620 is found. + # Retrieve the region from the environment variable or use the default region 'eu-north-1'. + region = os.environ.get("CSFY_AWS_DEFAULT_REGION", "eu-north-1") + # TODO(Juraj): hack applied until a solution for #CmTask6620 is found. + if region == "ap-northeast-1": + _S3_BUCKET_BY_STAGE["preprod"] = "cryptokaizen-data-tokyo.preprod" + # Ensure the provided stage is valid. + hdbg.dassert_in(stage, _S3_BUCKET_BY_STAGE) + s3_bucket = _S3_BUCKET_BY_STAGE[stage] + # Append the suffix to the bucket name if provided. + if add_suffix: + s3_bucket = os.path.join(s3_bucket, add_suffix) + return s3_bucket diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py new file mode 100644 index 000000000..f86f50342 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py @@ -0,0 +1,233 @@ +""" +Import as: + +import helpers.hsecrets as hsecret +""" + +import atexit +import json +import sys +import warnings +from typing import Any, Dict, Optional + +from botocore.client import BaseClient +from botocore.exceptions import ClientError + +import helpers.hdbg as hdbg + + +def get_secrets_client(aws_profile: str) -> BaseClient: + """ + Return client to work with AWS Secrets Manager in the specified region. + """ + import helpers.haws as haws + + session = haws.get_session(aws_profile) + client = session.client(service_name="secretsmanager") + return client + + +def _get_flag_value(flag: str) -> str: + """ + Return flag value with concatenated date string. + + E.g., for flag = 'pytest' return 'pytest_20240619'. + """ + # Import here to avoid import extra dependencies in the thin environment. + import helpers.hdatetime as hdateti + + timestamp = hdateti.get_current_date_as_string("naive_ET") + updated_flag = "_".join([flag, timestamp]) + return updated_flag + + +def update_usedby( + secret_name: str, + secret_value: Dict[str, Any], + usedBy: str, + *, + remove: bool = False, +) -> Dict[str, Any]: + """ + Update the value of `usedBy` attribute from `secret_value` in AWS secrets + manager to lock the key. Unlock the key at the end of process using default + value of `usedBy`. + + :param secret_name: SecretId of record to be updated. + :param secret_value: Current value of SecretString. + :param usedBy: value of `usedBy` to be updated. Used to remove from + list on deallocation of resource, i.e., when remove is True. + :param remove: Boolean to decide addition or removal of `usedBy` value + in the secret value list of scripts. Default is False. + :return secret_value: SecretString with updated `usedBy` script. + """ + hdbg.dassert_isinstance(secret_name, str) + aws_profile = "ck" + client = get_secrets_client(aws_profile) + # Modify value of used by in secret value. + if not remove: + try: + secret_value["usedBy"].append(usedBy) + except KeyError: + secret_value["usedBy"] = [usedBy] + else: + secret_value["usedBy"].remove(usedBy) + # Update the modified secret value in AWS secret manager. + client.update_secret( + SecretId=secret_name, SecretString=json.dumps(secret_value) + ) + return secret_value + + +def lock_secret( + secret_name: str, secret_value: Dict[str, Any] +) -> Optional[Dict[str, Any]]: + """ + Lock access to a secret to the current script. + + Lock access to secret key with trading keyword in `secret_name`, for a + runtime instance of a script, to avoid parallel run. + Add the script name to `usedBy` list in the AWS secret manager. + Raise error if the same script tries to access a locked key. + + :param secret_name: SecretId of record to be updated. + :param secret_value: Current value of SecretString. + :return secret_value: SecretString with updated `usedBy` script if not + already locked. + """ + current_script = sys.argv[0].split("/")[-1] + # Check if the current script is already using this secret. + current_usedBy = list( + filter(lambda x: current_script in x, secret_value.get("usedBy", [])) + ) + # Check current value of usedBy to determine further action. + if not current_usedBy: + # Fetch and update value of usedBy if not locked. + usedBy = _get_flag_value(current_script) + secret_value = update_usedby(secret_name, secret_value, usedBy) + # Release secret key lock on termination. + atexit.register( + update_usedby, secret_name, secret_value, usedBy, remove=True + ) + else: + # Raise warning of locked resource with current use info. + # raise RuntimeError() + warnings.warn( + f"Secret key is already in use by {current_usedBy[0]}", + RuntimeWarning, + ) + return secret_value + + +# TODO(Juraj): add support to access secrets for different profiles, not important rn +def get_secret(secret_name: str) -> Optional[Dict[str, Any]]: + """ + Fetch secret values(s) from AWS secrets manager. + + :return a dictionary of key-value pairs. E.g., `get_secret('binance')` returns + ``` + { + 'apiKey': '', + 'secret': '' + } + ``` + """ + # TODO(Juraj): This assertion can't be applied universally. + # Check if the secret name format is valid. + # dassert_valid_secret(secret_name) + hdbg.dassert_isinstance(secret_name, str) + # Create a AWS Secrets Manager client. + aws_profile = "ck" + client = get_secrets_client(aws_profile) + # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html + # for the full list of exceptions. + # Define access key to check the entity requesting for secret key. + access_key = "trading" + try: + get_secret_value_response = client.get_secret_value(SecretId=secret_name) + secret_string = get_secret_value_response["SecretString"] + hdbg.dassert_isinstance(secret_string, str) + secret_val = json.loads(secret_string) + # Check access entity value to lock secret key to avoid parallel run. + if access_key in secret_name: + # TODO(Juraj): Temporarily disabled in #Cmtask10068. + # secret_val = lock_secret(secret_name, secret_val) + pass + except ClientError as e: + if e.response["Error"]["Code"] == "ResourceNotFoundException": + # Let user know the secret does not exist. + raise ValueError(f"No such secret: {secret_name}") from e + # If not yet implemented handler then just re-raise. + raise e + return secret_val + + +# TODO(Juraj): add support to store secrets in different regions, not important rn. +def store_secret( + secret_name: str, secret_value: Dict[str, str], *, description: str = "" +) -> Optional[bool]: + """ + Store secret values(s) into AWS secrets manager, specify secret as a dict + of key-value pairs. + + :return: bool representing whether writing was successful or not + """ + hdbg.dassert_isinstance(secret_name, str) + # Create a AWS Secrets Manager client. + aws_profile = "ck" + client = get_secrets_client(aws_profile) + # See + # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_CreateSecret.html + # for the full list of exceptions. + try: + create_secret_value_response = client.create_secret( + Name=secret_name, + Description=description, + SecretString=json.dumps(secret_value), + ) + # If no exception was thrown and we get back the name we passed in the + # response then the secret was stored successfully. + return_name = create_secret_value_response["Name"] + hdbg.dassert_isinstance(return_name, str) + res: bool = create_secret_value_response["Name"] == secret_name + return res + except ClientError as e: + if e.response["Error"]["Code"] == "ResourceExistsException": + # Let user know the secret with this name already exists. + raise ValueError( + "Secret with this name already exists:", secret_name + ) from e + # If not yet implemented handler then just re-raise. + raise e + # If we did not return inside try block then something went wrong. + return False + + +# TODO(Juraj): this might be deprecated since this is only fit for exchange API keys +def dassert_valid_secret(secret_id: str) -> None: + """ + Enforce that the valid format is `exchange_id.stage.account_type.num`. + """ + values = secret_id.split(".") + hdbg.dassert_eq(len(values), 4) + hdbg.dassert_in( + values[0], + [ + "binance", + "bitfinex", + "coinbase", + "coinbaseprime", + "coinbasepro", + "ftx", + "gateio", + "huobi", + "kraken", + "kucoin", + "test", + ], + ) + hdbg.dassert_in(values[1], ["local", "preprod"]) + hdbg.dassert_in(values[2], ["trading", "sandbox"]) + hdbg.dassert( + values[3].isnumeric(), "values[3] should be numeric, got: %s", values[3] + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py new file mode 100644 index 000000000..5aa297e5d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py @@ -0,0 +1,1160 @@ +""" +Identify on which server we are running. + +Import as: + +import helpers.hserver as hserver +""" + +import functools +import logging +import os +import shutil +import subprocess +from typing import Dict, List, Optional, Tuple + +# This module should depend only on: +# - Python standard modules +# See `helpers/dependencies.txt` for more details + +_LOG = logging.getLogger(__name__) + +_WARNING = "\033[33mWARNING\033[0m" + + +def _print(msg: str) -> None: + _ = msg + # _LOG.info(msg) + if False: + print(msg) + + +# Copied from hprint to avoid import cycles. +def _indent(txt: str, *, num_spaces: int = 2) -> str: + """ + Add `num_spaces` spaces before each line of the passed string. + """ + spaces = " " * num_spaces + txt_out = [] + for curr_line in txt.split("\n"): + if curr_line.lstrip().rstrip() == "": + # Do not prepend any space to a line with only white characters. + txt_out.append("") + continue + txt_out.append(spaces + curr_line) + res = "\n".join(txt_out) + return res + + +# We can't use `hsystem` to avoid import cycles. +def _system_to_string(cmd: str) -> Tuple[int, str]: + """ + Run a command and return the output and the return code. + + :param cmd: command to run + :return: tuple of (return code, output) + """ + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + # Redirect stderr to stdout. + stderr=subprocess.STDOUT, + shell=True, + text=True, + ) + rc = result.returncode + output = result.stdout + output = output.strip() + return rc, output + + +# ############################################################################# +# Host +# ############################################################################# + + +# We can't rely only on the name / version of the host to infer where we are +# running, since inside Docker the name of the host is like `01a7e34a82a5`. Of +# course, there is no way to know anything about the host for security reason, +# so we pass this value from the external environment to the container, through +# env vars (e.g., `CSFY_HOST_NAME`, `CSFY_HOST_OS_NAME`, `CSFY_HOST_OS_VERSION`). + + +# Sometimes we want to know if: +# - The processor is x86_64 or arm64 +# - The host is Mac or Linux +# - We are running on a Causify machine or on an external machine +# - We are inside CI or not +# TODO(gp): Grep all the use cases in the codebase and use the right function. + + +def get_host_user_name() -> Optional[str]: + """ + Return the name of the user running the host. + """ + return os.environ.get("CSFY_HOST_USER_NAME", None) + + +def get_dev_csfy_host_names() -> Tuple[str]: + """ + Return the names of the Causify dev servers. + """ + host_names = ("dev1", "dev2", "dev3") + return list(host_names) + + +# TODO(gp): -> is_inside_docker_container() +def is_inside_docker() -> bool: + """ + Return whether we are inside a container or not. + """ + # From https://stackoverflow.com/questions/23513045 + ret = os.path.exists("/.dockerenv") + return ret + + +def _get_host_name() -> str: + """ + Return the name of the host (not the machine) on which we are running. + + If we are inside a Docker container, we use the name of the host passed + through the `CSFY_HOST_NAME` env var. + """ + if is_inside_docker(): + host_name = os.environ["CSFY_HOST_NAME"] + else: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws' + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' + # machine='x86_64' + host_name = os.uname()[1] + _LOG.debug("host_name=%s", host_name) + return host_name + + +def _get_host_os_name() -> str: + """ + Return the name of the OS on which we are running (e.g., "Linux", + "Darwin"). + + If we are inside a Docker container, we use the name of the OS passed + through the `CSFY_HOST_OS_NAME` env var. + """ + if is_inside_docker(): + host_os_name = os.environ["CSFY_HOST_OS_NAME"] + else: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws' + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' + # machine='x86_64' + host_os_name = os.uname()[0] + _LOG.debug("host_os_name=%s", host_os_name) + return host_os_name + + +def _get_host_os_version() -> str: + """ + Return the version of the OS on which we are running. + + If we are inside a Docker container, we use the version of the OS passed + through the `CSFY_HOST_OS_VERSION` env var. + """ + if is_inside_docker(): + host_os_version = os.environ["CSFY_HOST_OS_VERSION"] + else: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws' + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' + # machine='x86_64' + host_os_version = os.uname()[2] + _LOG.debug("host_os_version=%s", host_os_version) + return host_os_version + + +def is_host_csfy_server() -> bool: + """ + Return whether we are running on a Causify dev server. + """ + host_name = _get_host_name() + ret = host_name in get_dev_csfy_host_names() + return ret + + +_MAC_OS_VERSION_MAPPING = { + "Catalina": "19.", + "Monterey": "21.", + "Ventura": "22.", + "Sequoia": "24.", +} + + +def get_host_mac_version() -> str: + """ + Get the macOS version (e.g., "Catalina", "Monterey", "Ventura"). + """ + host_os_version = _get_host_os_version() + for version, tag in _MAC_OS_VERSION_MAPPING.items(): + if tag in host_os_version: + return version + raise ValueError(f"Invalid host_os_version='{host_os_version}'") + + +def is_host_mac_version(version: str) -> bool: + """ + Return whether we are running on a Mac with a specific version (e.g., + "Catalina", "Monterey", "Ventura"). + """ + assert version in _MAC_OS_VERSION_MAPPING, f"Invalid version='{version}'" + host_mac_version = get_host_mac_version() + ret = version.lower() == host_mac_version.lower() + return ret + + +def is_host_gp_mac() -> bool: + """ + Return whether we are running on a Mac owned by GP. + + This is used to check if we can use a specific feature before + releasing it to all the users. + """ + host_name = _get_host_name() + ret = host_name.startswith("gpmac.") + return ret + + +# ############################################################################# +# Detect server. +# ############################################################################# + + +def is_inside_ci() -> bool: + """ + Return whether we are running inside the Continuous Integration flow. + """ + if "CSFY_CI" not in os.environ: + ret = False + else: + ret = os.environ["CSFY_CI"] != "" + return ret + + +def is_inside_unit_test() -> bool: + """ + Return whether we are running code insider the regressions. + """ + ret = "PYTEST_CURRENT_TEST" in os.environ + return ret + + +# TODO(gp): Remove! +def is_dev_csfy() -> bool: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws', + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025', + # machine='x86_64' + host_name = os.uname()[1] + host_names = ("dev1", "dev2", "dev3") + csfy_host_name = os.environ.get("CSFY_HOST_NAME", "") + _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) + is_dev_csfy_ = host_name in host_names or csfy_host_name in host_names + return is_dev_csfy_ + + +# TODO(gp): This is obsolete and should be removed. +def is_dev4() -> bool: + """ + Return whether it's running on dev4. + """ + host_name = os.uname()[1] + csfy_host_name = os.environ.get("CSFY_HOST_NAME", None) + dev4 = "cf-spm-dev4" + _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) + is_dev4_ = dev4 in (host_name, csfy_host_name) + # + if not is_dev4_: + dev4 = "cf-spm-dev8" + _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) + is_dev4_ = dev4 in (host_name, csfy_host_name) + return is_dev4_ + + +def is_host_mac(*, version: Optional[str] = None) -> bool: + """ + Return whether we are running on macOS and, optionally, on a specific + version. + + :param version: check whether we are running on a certain macOS version (e.g., + `Catalina`, `Monterey`) + """ + _LOG.debug("version=%s", version) + host_os_name = os.uname()[0] + _LOG.debug("os.uname()=%s", str(os.uname())) + csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) + _LOG.debug( + "host_os_name=%s csfy_host_os_name=%s", host_os_name, csfy_host_os_name + ) + is_mac_ = host_os_name == "Darwin" or csfy_host_os_name == "Darwin" + if version is None: + # The user didn't request a specific version, so we return whether we + # are running on a Mac or not. + _LOG.debug("is_mac_=%s", is_mac_) + return is_mac_ + else: + # The user specified a version: if we are not running on a Mac then we + # return False, since we don't even have to check the macOS version. + if not is_mac_: + _LOG.debug("is_mac_=%s", is_mac_) + return False + # Check the macOS version we are running. + if version == "Catalina": + # Darwin gpmac.local 19.6.0 Darwin Kernel Version 19.6.0: + # root:xnu-6153.141.2~1/RELEASE_X86_64 x86_64 + macos_tag = "19.6" + elif version == "Monterey": + # Darwin alpha.local 21.5.0 Darwin Kernel Version 21.5.0: + # root:xnu-8020.121.3~4/RELEASE_ARM64_T6000 arm64 + macos_tag = "21." + elif version == "Ventura": + macos_tag = "22." + elif version == "Sequoia": + # Darwin gpmac.local 24.4.0 Darwin Kernel Version 24.4.0: + # root:xnu-11417.101.15~1/RELEASE_ARM64_T8112 arm64 + macos_tag = "24." + else: + raise ValueError(f"Invalid version='{version}'") + _LOG.debug("macos_tag=%s", macos_tag) + host_os_version = os.uname()[2] + # 'Darwin Kernel Version 19.6.0: Mon Aug 31 22:12:52 PDT 2020; + # root:xnu-6153.141.2~1/RELEASE_X86_64' + csfy_host_os_version = os.environ.get("CSFY_HOST_VERSION", "") + _LOG.debug( + "host_os_version=%s csfy_host_os_version=%s", + host_os_version, + csfy_host_os_version, + ) + is_mac_ = macos_tag in host_os_version or macos_tag in csfy_host_os_version + _LOG.debug("is_mac_=%s", is_mac_) + return is_mac_ + + +def is_prod_csfy() -> bool: + """ + Detect whether we are running in a Causify production container. + + This env var is set inside `devops/docker_build/prod.Dockerfile`. + """ + # TODO(gp): CK -> CSFY + return bool(os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", False)) + + +# TODO(gp): Obsolete. +def is_ig_prod() -> bool: + """ + Detect whether we are running in an IG production container. + + This env var is set inside `//lime/devops_cf/setenv.sh` + """ + # CF sets up `DOCKER_BUILD` so we can use it to determine if we are inside + # a CF container or not. + # print("os.environ\n", str(os.environ)) + return bool(os.environ.get("DOCKER_BUILD", False)) + + +# TODO(Grisha): consider adding to `setup_to_str()`. +def is_inside_ecs_container() -> bool: + """ + Detect whether we are running in an ECS container. + """ + # When deploying jobs via ECS the container obtains credentials based + # on passed task role specified in the ECS task-definition, refer to: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html + ret = "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" in os.environ + return ret + + +# ############################################################################# + + +def is_external_linux() -> bool: + """ + Detect whether we are running on a non-server/non-CI Linux machine. + + This returns true when we run on the machine of an intern, or a non- + CSFY contributor. + """ + if is_host_csfy_server() or is_inside_ci(): + # Dev servers and CI are not external Linux systems. + ret = False + else: + # We need to check if the host is Linux. + host_os_name = _get_host_os_name() + ret = host_os_name == "Linux" + return ret + + +def is_external_dev() -> bool: + """ + Detect whether we are running on an system outside of Causify. + + E.g., a Linux / Mac contributor's laptop, an intern's laptop, a non- + CSFY machine. + """ + ret = is_host_mac() or is_external_linux() + return ret + + +# ############################################################################# +# Set up consistency. +# ############################################################################# + + +# TODO(gp): Update this. +def _get_setup_signature() -> str: + """ + Dump all the variables that are used to make a decision about the values of + the functions in `_get_setup_settings()`. + + This function is used to mock the state of the system for testing + purposes. + """ + cmds = [] + # is_prod_csfy() + cmds.append('os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", "*undef*")') + # is_dev4() + # is_dev_csfy() + # is_ig_prod() + cmds.append('os.environ.get("CSFY_HOST_NAME", "*undef*")') + # is_inside_ci() + cmds.append('os.environ.get("CSFY_CI", "*undef*")') + # is_mac() + cmds.append("os.uname()[0]") + cmds.append("os.uname()[2]") + # is_external_linux() + cmds.append('os.environ.get("CSFY_HOST_OS_NAME", "*undef*")') + # Build an array of strings with the results of executing the commands. + results = [] + for cmd in cmds: + result_tmp = cmd + "=" + str(eval(cmd)) + results.append(result_tmp) + # Join the results into a single string. + result = "\n".join(results) + return result + + +# The valid set ups are: +# - Running on a Causify server (e.g., `dev1`, `dev2`, `dev3`) +# - Container +# - Host +# - External Mac (GP, Paul, interns, contributors) +# - Container +# - Host +# - External Linux (interns, contributors) +# - Container +# - Host +# - Prod container on Linux +# - Container +# - CI +# - Container + + +def is_inside_docker_container_on_csfy_server() -> bool: + """ + Return whether we are running on a Docker container on a Causify server. + """ + ret = is_inside_docker() and is_host_csfy_server() + return ret + + +def is_outside_docker_container_on_csfy_server() -> bool: + """ + Return whether we are running outside a Docker container on a Causify + server. + """ + ret = not is_inside_docker() and is_host_csfy_server() + return ret + + +def is_inside_docker_container_on_host_mac() -> bool: + """ + Return whether we are running on a Docker container on a Mac host. + """ + ret = is_inside_docker() and is_host_mac() + return ret + + +def is_outside_docker_container_on_host_mac() -> bool: + """ + Return whether we are running outside of a Docker container on a Mac host. + """ + ret = not is_inside_docker() and is_host_mac() + return ret + + +def is_inside_docker_container_on_external_linux() -> bool: + """ + Return whether we are running on a Docker container on an external Linux. + """ + ret = is_inside_docker() and is_external_linux() + return ret + + +def is_outside_docker_container_on_external_linux() -> bool: + """ + Return whether we are outside of a Docker container on an external Linux. + """ + ret = not is_inside_docker() and is_external_linux() + return ret + + +def _get_setup_settings() -> List[Tuple[str, bool]]: + """ + Return a list of tuples with the name and value of the current server + setup. + + E.g., + ```bash + is_inside_docker_container_on_csfy_server=True + is_outside_docker_container_on_csfy_server=False + is_inside_docker_container_on_host_mac=False + is_outside_docker_container_on_host_mac=True + is_inside_docker_container_on_external_linux=False + is_outside_docker_container_on_external_linux=True + is_dev4=False + is_ig_prod=False + is_prod_csfy=False + is_inside_ci=False + ``` + """ + func_names = [ + "is_inside_docker_container_on_csfy_server", + "is_outside_docker_container_on_csfy_server", + # + "is_inside_docker_container_on_host_mac", + "is_outside_docker_container_on_host_mac", + # + "is_inside_docker_container_on_external_linux", + "is_outside_docker_container_on_external_linux", + # + "is_dev4", + "is_ig_prod", + "is_prod_csfy", + "is_inside_ci", + ] + # Store function name / value pairs as tuples. + setups = [] + for func_name in func_names: + val = eval(f"{func_name}()") + setups.append((func_name, val)) + return setups + + +def _setup_to_str(setups: List[Tuple[str, bool]]) -> str: + """ + Return a string representation of the current server setup configuration. + + :return: string with each setting on a new line, aligned with + padding + """ + # Find maximum length of setting names. + max_len = max(len(name) for name, _ in setups) + 1 + # Format each line with computed padding. + txt = [] + for name, value in setups: + txt.append(f"{name:<{max_len}}{value}") + return "\n".join(txt) + + +def _dassert_setup_consistency() -> None: + """ + Check that one and only one setup configuration is true. + + This is used to ensure that the setup configuration is one of the + expected ones and uniquely defined. + """ + setups = _get_setup_settings() + # One and only one set-up should be true. + sum_ = sum([value for _, value in setups]) + if sum_ != 1: + msg = "One and only one set-up config should be true:\n" + msg += _setup_to_str(setups) + "\n" + msg += "_get_setup_signature() returns:\n" + msg += _indent(_get_setup_signature()) + raise ValueError(msg) + + +# If the env var is not defined then we want to check. The only reason to skip +# it's if the env var is defined and equal to False. +check_repo = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") != "False" +_is_called = False +if check_repo: + # The repo check is executed at import time, before the logger is initialized. + # To debug the repo check, enable the following block. + if False: + import helpers.hdbg as hdbg + + hdbg.init_logger(verbosity=logging.DEBUG) + # Compute and cache the result. + if not _is_called: + _dassert_setup_consistency() + _is_called = True +else: + _LOG.warning("Skipping repo check in %s", __file__) + + +# ############################################################################# +# Detect Docker functionalities. +# ############################################################################# + + +# Each function below should run without asserting. E.g., when we check if +# docker supports privileged mode, we should check if `docker` is available, +# and then if docker supports privileged mode, instead of asserting if `docker` +# doesn't exist on the system. + + +@functools.lru_cache() +def has_docker() -> bool: + """ + Return whether we have Docker installed. + """ + return shutil.which("docker") is not None + + +@functools.lru_cache() +def docker_needs_sudo() -> bool: + """ + Return whether Docker commands need to be run with sudo. + """ + if not has_docker(): + return False + # This check is required to ensure it does not cause issues when running on ECS + # Fargate through Airflow, since ECS Fargate does not support either DinD + # or sibling containers. + # See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/fargate-security-considerations.html + # TODO(heanh): Check if we can use `is_inside_ecs_container()` to check if + # we are inside Airflow. + if not has_dind_support() and not use_docker_sibling_containers(): + return False + # Another way to check is to see if your user is in the docker group: + # > groups | grep docker + rc = os.system("docker run hello-world 2>&1 >/dev/null") + if rc == 0: + return False + # + rc = os.system("sudo docker run hello-world 2>&1 >/dev/null") + if rc == 0: + return True + assert False, "Failed to run docker" + + +def get_docker_executable() -> str: + """ + Return the docker executable, wrapper with `sudo` if needed. + """ + docker_needs_sudo_ = docker_needs_sudo() + executable = "docker" + if docker_needs_sudo_: + executable = "sudo " + executable + return executable + + +@functools.lru_cache() +def has_docker_privileged_mode() -> bool: + """ + Return whether the current container supports privileged mode. + + Docker privileged mode gives containers nearly all the same capabilities as + the host system's kernel. + + Privileged mode allows to: + - run Docker-in-Docker + - mount filesystems + """ + if not has_docker(): + return False + docker_executable = get_docker_executable() + cmd = f"{docker_executable} run --privileged hello-world 2>&1 >/dev/null" + rc = os.system(cmd) + _print(f"cmd={cmd} -> rc={rc}") + has_privileged_mode = rc == 0 + return has_privileged_mode + + +def has_docker_sibling_containers_support() -> bool: + """ + Return whether the current container supports running sibling containers. + """ + # We need to be inside a container to run sibling containers. + if not is_inside_docker(): + return False + # We assume that if the socket exists then we can run sibling containers. + if os.path.exists("/var/run/docker.sock"): + return True + return False + + +def has_docker_children_containers_support() -> bool: + """ + Return whether the current container supports Docker-in-Docker. + """ + # We need to be inside a container to run docker-in-docker. + if not is_inside_docker(): + return False + # We assume that if we have privileged mode then we can run docker-in-docker. + return has_docker_privileged_mode() + + +def is_csfy_dind_enabled() -> bool: + """ + Return whether `CSFY_ENABLE_DIND` is enabled (e.g. users opt-in to use + Docker-in-Docker). + """ + val = os.environ.get("CSFY_ENABLE_DIND", "0") + return val == "1" or val.lower() in ("true", "yes") + + +def can_run_docker_from_docker() -> bool: + """ + Return whether we can run docker from docker, either as children or sibling + container. + """ + return ( + has_docker_children_containers_support() + or has_docker_sibling_containers_support() + ) + + +def get_docker_info() -> str: + txt_tmp: List[str] = [] + # + has_docker_ = has_docker() + txt_tmp.append(f"has_docker={has_docker_}") + # + cmd = r"docker version --format '{{.Server.Version}}'" + _, docker_version = _system_to_string(cmd) + txt_tmp.append(f"docker_version='{docker_version}'") + # + docker_needs_sudo_ = docker_needs_sudo() + txt_tmp.append(f"docker_needs_sudo={docker_needs_sudo_}") + # + has_privileged_mode_ = has_docker_privileged_mode() + txt_tmp.append(f"has_privileged_mode={has_privileged_mode_}") + # + is_inside_docker_ = is_inside_docker() + txt_tmp.append(f"is_inside_docker={is_inside_docker_}") + # + if is_inside_docker_: + has_docker_sibling_containers_support_ = ( + has_docker_sibling_containers_support() + ) + has_docker_children_containers_support_ = ( + has_docker_children_containers_support() + ) + else: + has_docker_sibling_containers_support_ = "*undef*" + has_docker_children_containers_support_ = "*undef*" + txt_tmp.append( + f"has_docker_sibling_containers_support={has_docker_sibling_containers_support_}" + ) + txt_tmp.append( + f"has_docker_children_containers_support={has_docker_children_containers_support_}" + ) + # Format as title with indented items. + txt = "Docker info" + "\n" + _indent("\n".join(txt_tmp)) + return txt + + +def _is_mac_version_with_sibling_containers() -> bool: + if not is_host_mac(): + return False + mac_version = get_host_mac_version() + return mac_version in ("Monterey", "Ventura", "Sequoia") + + +# ############################################################################# +# Detect Docker functionalities, based on the set-up. +# ############################################################################# + + +# TODO(gp): These approach is sub-optimal. We deduce what we can do based on the +# name of the set-up. We should base our decisions on the actual capabilities of +# the system. + + +# TODO(gp): -> has_docker_privileged_mode +@functools.lru_cache() +def has_dind_support() -> bool: + """ + Return whether the current container supports privileged mode. + + This is needed to use Docker-in-Docker. + """ + _print(f"is_inside_docker()={is_inside_docker()}") + if not is_inside_docker(): + # Outside Docker there is no privileged mode. + _print("-> ret = False") + return False + # TODO(gp): Not sure this is really needed since we do this check + # after enable_privileged_mode controls if we have dind or not. + if _is_mac_version_with_sibling_containers(): + return False + # TODO(gp): This part is not multi-process friendly. When multiple + # processes try to run this code they interfere. A solution is to run `ip + # link` in the entrypoint and create a `has_docker_privileged_mode` file + # which contains the value. + # We rely on the approach from https://stackoverflow.com/questions/32144575 + # to check if there is support for privileged mode. + # Sometimes there is some state left, so we need to clean it up. + # TODO(Juraj): this is slow and inefficient, but works for now. + cmd = "sudo docker run hello-world" + rc = os.system(cmd) + _print(f"cmd={cmd} -> rc={rc}") + has_dind = rc == 0 + # dind is supported on both Mac and GH Actions. + # TODO(Juraj): HelpersTask16. + # if check_repo: + # if hserver.is_inside_ci(): + # # Docker-in-docker is needed for GH actions. For all other builds is optional. + # assert has_dind, ( + # f"Expected privileged mode: has_dind={has_dind}\n" + # + hserver.setup_to_str() + # ) + # else: + # only_warning = True + # _raise_invalid_host(only_warning) + # return False + # else: + # csfy_repo_config = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") + # print( + # _WARNING + # + ": Skip checking since CSFY_REPO_CONFIG_CHECK=" + # + f"'{csfy_repo_config}'" + # ) + return has_dind + + +def _raise_invalid_host(only_warning: bool) -> None: + host_os_name = os.uname()[0] + am_host_os_name = os.environ.get("AM_HOST_OS_NAME", None) + msg = ( + f"Don't recognize host: host_os_name={host_os_name}, " + f"am_host_os_name={am_host_os_name}" + ) + if only_warning: + _LOG.warning(msg) + else: + raise ValueError(msg) + + +# TODO(gp): -> use_docker_in_docker_support +def enable_privileged_mode() -> bool: + """ + Return whether a host supports privileged mode for its containers. + """ + import helpers.repo_config_utils as hrecouti + + repo_name = hrecouti.get_repo_config().get_name() + # TODO(gp): Remove this dependency from a repo. + if repo_name in ("//dev_tools",): + ret = False + else: + # Keep this in alphabetical order. + if is_dev_csfy(): + ret = True + elif is_inside_ci(): + ret = True + elif is_external_linux(): + ret = True + elif is_host_mac(): + mac_version = get_host_mac_version() + if mac_version == "Catalina": + # Docker for macOS Catalina supports dind. + ret = True + elif mac_version in ("Monterey", "Ventura", "Sequoia"): + # Docker doesn't seem to support dind for these versions of macOS. + ret = False + else: + raise ValueError(f"Invalid version='{mac_version}'") + # Docker doesn't seem to support dind for these versions of macOS. + ret = False + elif is_prod_csfy(): + ret = False + else: + ret = False + only_warning = True + _raise_invalid_host(only_warning) + return ret + + +# TODO(gp): -> use_docker_sudo_in_commands +def has_docker_sudo() -> bool: + """ + Return whether Docker commands should be run with `sudo` or not. + """ + # Keep this in alphabetical order. + if is_dev_csfy(): + ret = True + elif is_external_linux(): + ret = True + elif is_inside_ci(): + ret = False + elif is_host_mac(): + # macOS runs Docker with sudo by default. + # TODO(gp): This is not true. + ret = True + elif is_prod_csfy(): + ret = False + else: + ret = False + only_warning = True + _raise_invalid_host(only_warning) + return ret + + +# TODO(gp): -> use_docker_sibling_container_support +def use_docker_sibling_containers() -> bool: + """ + Return whether to use Docker sibling containers. + + Using sibling containers requires that all Docker containers are in + the same network so that they can communicate with each other. + """ + return has_docker_sibling_containers_support() + # if is_dev_csfy(): + # val = True + # else: + # val = is_dev4() or _is_mac_version_with_sibling_containers() + # return val + + +# TODO(gp): -> use_docker_main_network +def use_main_network() -> bool: + # TODO(gp): Replace this. + return use_docker_sibling_containers() + + +# TODO(gp): -> get_docker_shared_data_dir_map +def get_shared_data_dirs() -> Optional[Dict[str, str]]: + """ + Get path of dir storing data shared between different users on the host and + Docker. + + E.g., one can mount a central dir `/data/shared`, shared by multiple + users, on a dir `/shared_data` in Docker. + """ + # TODO(gp): Keep this in alphabetical order. + if is_dev4(): + shared_data_dirs = { + "/local/home/share/cache": "/cache", + "/local/home/share/data": "/data", + } + elif is_dev_csfy(): + shared_data_dirs = { + "/data/shared": "/shared_data", + "/data/shared2": "/shared_data2", + "/data/shared_k8s": "/shared_k8s", + "/data/shared_test": "/shared_test", + } + elif is_external_dev() or is_inside_ci() or is_prod_csfy(): + shared_data_dirs = None + else: + shared_data_dirs = None + only_warning = True + _raise_invalid_host(only_warning) + return shared_data_dirs + + +def use_docker_network_mode_host() -> bool: + # TODO(gp): Not sure this is needed any more, since we typically run in + # bridge mode. + ret = is_host_mac() or is_dev_csfy() + ret = False + if ret: + assert use_docker_sibling_containers() + return ret + + +def use_docker_db_container_name_to_connect() -> bool: + """ + Connect to containers running DBs just using the container name, instead of + using port and localhost / hostname. + """ + if _is_mac_version_with_sibling_containers(): + # New Macs don't seem to see containers unless we connect with them + # directly with their name. + ret = True + else: + ret = False + if ret: + # This implies that we are using Docker sibling containers. + assert use_docker_sibling_containers() + return ret + + +# TODO(gp): This seems redundant with use_docker_sudo_in_commands +def run_docker_as_root() -> bool: + """ + Return whether Docker should be run with root user. + + I.e., adding `--user $(id -u):$(id -g)` to docker compose or not. + """ + # Keep this in alphabetical order. + if is_dev4() or is_ig_prod(): + # //lime runs on a system with Docker remap which assumes we don't + # specify user credentials. + ret = True + elif is_dev_csfy(): + # On dev1 / dev2 we run as users specifying the user / group id as + # outside. + ret = False + elif is_external_linux(): + ret = False + elif is_inside_ci(): + # When running as user in GH action we get an error: + # ``` + # /home/.config/gh/config.yml: permission denied + # ``` + # see https://github.com/alphamatic/amp/issues/1864 + # So we run as root in GH actions. + ret = True + elif is_host_mac(): + ret = False + elif is_prod_csfy(): + ret = False + else: + ret = False + only_warning = True + _raise_invalid_host(only_warning) + return ret + + +# TODO(gp): Probably obsolete +def get_docker_user() -> str: + """ + Return the user that runs Docker, if any. + """ + if is_dev4(): + val = "spm-sasm" + else: + val = "" + return val + + +# TODO(gp): Probably obsolete +def get_docker_shared_group() -> str: + """ + Return the group of the user running Docker, if any. + """ + if is_dev4(): + val = "sasm-fileshare" + else: + val = "" + return val + + +# TODO(gp): -> repo_config.yaml +def skip_submodules_test() -> bool: + """ + Return whether the tests in the submodules should be skipped. + + E.g. while running `i run_fast_tests`. + """ + import helpers.repo_config_utils as hrecouti + + repo_name = hrecouti.get_repo_config().get_name() + # TODO(gp): Why do we want to skip running tests? + # TODO(gp): Remove this dependency from a repo. + if repo_name in ("//dev_tools",): + # Skip running `amp` tests from `dev_tools`. + return True + return False + + +# ############################################################################# +# S3 buckets. +# ############################################################################# + + +def is_AM_S3_available() -> bool: + # AM bucket is always available. + val = True + _LOG.debug("val=%s", val) + return val + + +def is_CK_S3_available() -> bool: + val = True + if is_inside_ci(): + import helpers.repo_config_utils as hrecouti + + repo_name = hrecouti.get_repo_config().get_name() + # TODO(gp): Remove this dependency from a repo. + if repo_name in ("//amp", "//dev_tools"): + # No CK bucket. + val = False + # TODO(gp): We might want to enable CK tests also on lemonade. + if repo_name in ("//lemonade",): + # No CK bucket. + val = False + elif is_dev4(): + # CK bucket is not available on dev4. + val = False + _LOG.debug("val=%s", val) + return val + + +# ############################################################################# +# Functions. +# ############################################################################# + + +def config_func_to_str() -> str: + """ + Print the value of all the config functions. + """ + ret: List[str] = [] + # Get the functions with: + # grep "def " helpers/hserver.py | sort | awk '{ print $2 }' | perl -i -ne 'print "$1\n" if /^([^\(]+)/' + function_names = [ + "enable_privileged_mode", + "get_docker_shared_group", + "get_docker_user", + "get_host_user_name", + "get_shared_data_dirs", + "has_dind_support", + "has_docker_sudo", + "is_AM_S3_available", + "is_CK_S3_available", + "is_csfy_dind_enabled", + "is_dev4", + "is_dev_csfy", + "is_external_linux", + "is_host_mac", + "is_ig_prod", + "is_inside_ci", + "is_inside_docker", + "is_inside_ecs_container", + "is_inside_unit_test", + "is_prod_csfy", + "run_docker_as_root", + "skip_submodules_test", + "use_docker_db_container_name_to_connect", + "use_docker_network_mode_host", + "use_docker_sibling_containers", + "use_main_network", + ] + for func_name in sorted(function_names): + try: + _LOG.debug("func_name=%s", func_name) + func_value = eval(f"{func_name}()") + except NameError: + func_value = "*undef*" + msg = f"{func_name}='{func_value}'" + ret.append(msg) + # Package. + result = "\n".join(ret) + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py new file mode 100644 index 000000000..b960bd8bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py @@ -0,0 +1,204 @@ +""" +Import as: + +import helpers.hsftp as hsftp +""" + +import logging +import os +import subprocess +import sys +from io import BytesIO +from typing import List + +import helpers.haws as haws +import helpers.hmodule as hmodule +import helpers.hsecrets as hsecret + +hmodule.install_module_if_not_present("pysftp") + +import pysftp # noqa: E402 + +# Create a logger instance. +_LOG = logging.getLogger(__name__) + + +def install_lftp(): + """ + Install `lftp` using the system package manager. + """ + try: + subprocess.run(["sudo", "apt-get", "update"], check=True) + subprocess.run(["sudo", "apt-get", "install", "-y", "lftp"], check=True) + _LOG.info("`lftp` successfully installed using `apt`.") + except Exception as e: + _LOG.error("Failed to install `lftp`: %s", e) + sys.exit(1) + + +def check_lftp_connection(): + """ + Check if `lftp` is installed. + + If not, install it using the package manager. + """ + try: + # Check if `lftp` is available by trying to run it. + subprocess.run( + ["lftp", "--version"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + _LOG.info("`lftp` is already installed.") + except subprocess.CalledProcessError: + _LOG.error("Error occurred while checking `lftp` version.") + sys.exit(1) + except FileNotFoundError: + _LOG.warning("`lftp` is not installed. Attempting to install it...") + install_lftp() + + +def download_file_using_lftp( + remote_data_path: str, save_path: str, hostname: str, secret_name: str +) -> None: + """ + Download files from a remote SFTP server using `lftp` and a private SSH + key. + + :param remote_data_path: path to the remote directory on the SFTP + server from which files should be downloaded. + :param save_path: local directory where the downloaded files will be + saved. + :param hostname: hostname of the SFTP server. + :param secret_name: Name of the secret in AWS Secrets Manager that + stores the SFTP credentials, including the username and private + key. + :return: None. + """ + # Fetch the private key from AWS Secrets Manager + secret_dict = hsecret.get_secret(secret_name) + username = secret_dict["username"] + private_key = secret_dict["private_key"] + # Write the private key to a temporary file + with open("/tmp/temp_key.pem", "w") as temp_key_file: + temp_key_file.write(private_key) + # Ensure the key file has the correct permissions + os.chmod("/tmp/temp_key.pem", 0o600) + private_key_path = "/tmp/temp_key.pem" + # Construct the lftp command. + # The 'set sftp:connect-program' allows specifying custom SSH options for the SFTP connection. + # -o GSSAPIAuthentication=no: Disables GSSAPI to avoid unnecessary authentication mechanisms. + # -o StrictHostKeyChecking=no: Bypasses the host key verification prompt for new hosts. + # -a: Enables SSH agent forwarding for more seamless authentication. + # -x: Disables X11 forwarding (not needed for file transfer). + # -i {private_key_path}: Specifies the private key for SSH authentication. + # 'mirror --parallel=10': Downloads files from the remote server, with 10 parallel downloads to speed up the process. + lftp_cmd = ( + f"lftp -u {username}, -e \"set sftp:connect-program 'ssh -o GSSAPIAuthentication=no " + f"-o StrictHostKeyChecking=no -a -x -i {private_key_path}'; " + f'mirror --parallel=10 {remote_data_path} {save_path}; quit" ' + f"sftp://{hostname}" + ) + try: + _LOG.info("Executing lftp command: %s", lftp_cmd) + subprocess.run( + lftp_cmd, + shell=True, + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + _LOG.error( + "lftp command failed with error: %s", + e.stderr, + ) + + +def get_sftp_connection(hostname: str, secret_name: str) -> pysftp.Connection: + """ + Return SFTP connection object using a private key stored in AWS Secrets + Manager. + + :param hostname: hostname of the SFTP server. + :param secret_name: name of the secret in AWS Secrets Manager + containing the private key. + :return: active SFTP connection object. + """ + # Fetch the private key from AWS Secrets Manager + secret_dict = hsecret.get_secret(secret_name) + username = secret_dict["username"] + private_key = secret_dict["private_key"] + # Write the private key to a temporary file + with open("/tmp/temp_key.pem", "w") as temp_key_file: + temp_key_file.write(private_key) + # Ensure the key file has the correct permissions + os.chmod("/tmp/temp_key.pem", 0o600) + # Ensure pysftp is installed before attempting connection. + cnopts = pysftp.CnOpts() + # Disable host key checking. + cnopts.hostkeys = None + sftp = pysftp.Connection( + hostname, + username=username, + private_key="/tmp/temp_key.pem", + cnopts=cnopts, + ) + # Remove the temporary key file after establishing the connection + os.remove("/tmp/temp_key.pem") + return sftp + + +def download_file_to_s3( + sftp: pysftp.Connection, + s3_client: haws.BaseClient, + remote_dir: str, + filename: str, + s3_bucket: str, + s3_prefix: str, +) -> None: + """ + Download data from an SFTP server and upload it to an S3 bucket. + + :param sftp: An active SFTP Connection object. + :param s3_client: An AWS Base client object to interact with S3. + :param remote_dir: The directory on the SFTP server where the file + is located. + :param filename: The name of the file to download from the SFTP + server. + :param s3_bucket: The name of the S3 bucket to upload the file to. + :param s3_prefix: The prefix (path) in the S3 bucket where the file + will be stored. + :return: None. + """ + remote_path = f"{remote_dir}/{filename}" + s3_key = f"{s3_prefix}/{filename}" + with sftp.open(remote_path) as file_obj: + # Download data from sftp server. + file_data = file_obj.read() + try: + # Upload data to S3. + s3_client.upload_fileobj(BytesIO(file_data), s3_bucket, s3_key) + _LOG.info( + "Uploaded: %s to s3://%s/%s", remote_path, s3_bucket, s3_key + ) + except Exception as e: + _LOG.error("Failed to upload file to S3. Error: %s", str(e)) + raise e + + +def get_file_names(sftp: pysftp.Connection, sftp_remote_dir: str) -> List[str]: + """ + Retrieve all file names from a specified directory on a remote SFTP server. + + :param sftp: An active SFTP Connection object. + :param sftp_remote_dir: The directory on the SFTP server from which + to list file names. + :return: A list of file names present in the specified directory on + the SFTP server. + """ + file_names = [] + for item in sftp.listdir_attr(sftp_remote_dir): + file_names.append(item.filename) + return file_names diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py new file mode 100644 index 000000000..41c4cf571 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py @@ -0,0 +1,66 @@ +""" +Slack notification utilities for sending messages to Slack channels. + +Import as: + +import helpers.hslack as hslack +""" + +import logging +import os +from typing import Optional + +import requests + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# SlackNotifier +# ############################################################################# + + +class SlackNotifier: + """ + Send notifications to Slack channels using bot tokens. + """ + + def __init__(self, bot_token: Optional[str] = None) -> None: + """ + Initialize Slack notifier. + + :param bot_token: Slack bot token (starts with 'xoxb-') + """ + self.bot_token = bot_token or os.environ.get("SLACK_BOT_TOKEN") + if not self.bot_token: + raise ValueError( + "No bot token provided via parameter or SLACK_BOT_TOKEN env var" + ) + + def send_message( + self, + channel: str, + message: str, + ) -> None: + """ + Send a message to a Slack channel. + + :param channel: Slack channel ID (e.g., 'C1234567890') or + channel name (e.g., '#notifications') + :param message: Message text to send + """ + URL = "https://slack.com/api/chat.postMessage" + headers = { + "Authorization": f"Bearer {self.bot_token}", + "Content-Type": "application/json", + } + payload = { + "channel": channel, + "text": message, + } + response = requests.post(URL, headers=headers, json=payload, timeout=30) + response.raise_for_status() + result = response.json() + if not result.get("ok"): + raise ValueError(f"Slack API error: {result.get('error')}") + _LOG.info("Message sent successfully to %s", channel) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py new file mode 100644 index 000000000..4c3f6a748 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py @@ -0,0 +1,36 @@ +""" +Import as: + +import helpers.hsql as hsql +""" + +import helpers.hmodule as hmodule + +# The problem here is that part of the code base end up including `hsql` which +# requires `psycopg2` even though it's not called at run-time. +# To simplify the dependency management we include the code of `hsql` only if +# `psycopg2` is present. If not, we just create a stub for the needed type hints. +if hmodule.has_module("psycopg2"): + from helpers.hsql_implementation import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import + +else: + from typing import Any, List + + DbConnection = Any + + +def create_in_operator(values: List[str], column_name: str) -> str: + """ + Transform a list of possible values into an IN operator clause. + + :param values: a list of possible values for the given column, e.g. `["binance", "ftx"]` + :param column_name: the name of the column, e.g. 'exchange_id' + :return: IN operator clause with specified values, + e.g. `"exchange_id IN ('binance', 'ftx')"` + """ + in_operator = ( + f"{column_name} IN (" + + ",".join([f"'{value}'" for value in values]) + + ")" + ) + return in_operator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py new file mode 100644 index 000000000..ddd48d1e4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py @@ -0,0 +1,954 @@ +""" +Import as: + +import helpers.hsql_implementation as hsqlimpl +""" + +import collections +import io +import logging +import os +import re +import time +from typing import Any, Dict, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd +import psycopg2 as psycop +import psycopg2.extras as extras +import psycopg2.sql as psql + +import helpers.hasyncio as hasynci +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hpandas as hpandas +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hsecrets as hsecret +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + +# ############################################################################# +# Connection +# ############################################################################# + +DbConnection = Any + +# Invariant: keep the arguments in the interface in the same order as: +# host, dbname, port, user, password. +DbConnectionInfo = collections.namedtuple( + "DbConnectionInfo", ["host", "dbname", "port", "user", "password"] +) + + +def get_connection( + host: str, + dbname: str, + port: int, + user: str, + password: str, + autocommit: bool = True, +) -> DbConnection: + """ + Create a connection and cursor for a SQL database. + """ + _LOG.debug(hprint.to_str("host dbname port user")) + connection = psycop.connect( + host=host, dbname=dbname, port=port, user=user, password=password + ) + if autocommit: + connection.autocommit = True + return connection + + +def get_connection_from_aws_secret( + aws_region: str, + *, + stage: str = "prod", +) -> DbConnection: + """ + Create an SQL connection using credentials obtained from AWS + SecretsManager. + + The function uses `ck` AWS profile on the backend. + The intended usage is obtaining connection to a DB on RDS instances. + + :param aws_region: AWS DB region, e.g. "eu-north-1", "ap-northeast-1" + :param stage: DB stage to connect to. For "prod" stage it is only possible to obtain a read-only connection via this method. + """ + hdbg.dassert_in(stage, ["prod", "preprod", "test"]) + hdbg.dassert_in(aws_region, hs3.AWS_REGIONS) + dbname = f"{stage}.im_data_db" + if stage == "prod": + secret_name = f"{dbname}.read_only" + else: + secret_name = ( + dbname + if aws_region == hs3.AWS_EUROPE_REGION_1 + else f"{dbname}.{aws_region}" + ) + _LOG.info("Fetching secret: %s", secret_name) + db_creds = hsecret.get_secret(secret_name) + connection = get_connection( + host=db_creds["host"], + dbname=dbname, + port=db_creds["port"], + user=db_creds["username"], + password=db_creds["password"], + ) + return connection + + +def get_connection_from_env_vars() -> DbConnection: + """ + Create a SQL connection with the information from the environment + variables. + """ + # Get values from the environment variables. + host = os.environ["POSTGRES_HOST"] + dbname = os.environ["POSTGRES_DB"] + port = int(os.environ["POSTGRES_PORT"]) + user = os.environ["POSTGRES_USER"] + password = os.environ["POSTGRES_PASSWORD"] + # Build the + connection = get_connection( + host=host, + dbname=dbname, + port=port, + user=user, + password=password, + ) + return connection + + +def get_connection_from_string( + conn_as_str: str, + autocommit: bool = True, +) -> DbConnection: + """ + Create a connection from a string. + + E.g., `host=localhost dbname=im_db_local port=5432 user=... + password=...` + """ + regex = r"host=\w+ dbname=\w+ port=\d+ user=\w+ password=\w+" + m = re.match(regex, conn_as_str) + hdbg.dassert(m, "Invalid connection string: '%s'", conn_as_str) + connection = psycop.connect(conn_as_str) + if autocommit: + connection.autocommit = True + return connection + + +def get_connection_info_from_env_file(env_file_path: str) -> DbConnectionInfo: + """ + Get connection parameters from environment file. + + :param env_file_path: path to an environment file that contains db + connection parameters + """ + import dotenv + + db_config = dotenv.dotenv_values(env_file_path) + params = { + "host": db_config["POSTGRES_HOST"], + "dbname": db_config["POSTGRES_DB"], + "user": db_config["POSTGRES_USER"], + "password": db_config["POSTGRES_PASSWORD"], + } + key = "POSTGRES_PORT" + if key in db_config: + params["port"] = int(db_config[key]) + else: + params["port"] = 5432 + # The parameters' names are fixed and cannot be changed, see + # `https:://hub.docker.com/_/postgres`. + connection_parameters = DbConnectionInfo(**params) + return connection_parameters + + +def check_db_connection( + host: str, + dbname: str, + port: int, + user: str, + password: str, +) -> Tuple[bool, Optional[psycop.OperationalError]]: + """ + Check whether a connection to a DB exists, in a non-blocking way. + """ + try: + get_connection( + host=host, dbname=dbname, port=port, user=user, password=password + ) + connection_exist = True + error = None + except psycop.OperationalError as e: + connection_exist = False + error = e + return connection_exist, error + + +def wait_db_connection( + host: str, + dbname: str, + port: int, + user: str, + password: str, + *, + timeout_in_secs: int = 30, +) -> None: + """ + Wait until the database is available. + + :param timeout_in_secs: secs before timing out with `RuntimeError`. + """ + hdbg.dassert_lte(1, timeout_in_secs) + _LOG.debug("dbname=%s, port=%s, host=%s", dbname, port, host) + elapsed_secs = 0 + while True: + _LOG.info("Waiting for PostgreSQL to become available...") + conn_exists = check_db_connection(host, dbname, port, user, password) + if conn_exists[0]: + _LOG.info("PostgreSQL is available (after %s seconds)", elapsed_secs) + break + if elapsed_secs > timeout_in_secs: + raise psycop.OperationalError( + f"Cannot connect to db host={host} dbname={dbname} port={port} " + f"due to timeout={timeout_in_secs} seconds" + f"\n{conn_exists[1]}" + ) + elapsed_secs += 1 + time.sleep(1) + + +def db_connection_to_tuple(connection: DbConnection) -> DbConnectionInfo: + """ + Get database connection details using connection. Connection details + include: + + - Host + - Database name + - Port + - Username + - Password + + :param connection: a database connection + :return: database connection details + """ + info = connection.info + ret = DbConnectionInfo( + host=info.host, + dbname=info.dbname, + port=info.port, + user=info.user, + password=info.password, + ) + return ret + + +# ############################################################################# +# State of the whole DB +# ############################################################################# + + +def get_engine_version(connection: DbConnection) -> str: + """ + Report information on the SQL engine. + + E.g., ``` PostgreSQL 11.5 on x86_64-pc-linux-gnu compiled by gcc + (GCC) 4.8.3 20140911 (Red Hat 4.8.3-9), 64-bit ``` + """ + query = "SELECT version();" + df = pd.read_sql_query(query, connection) + # pylint: disable=no-member + info: str = df.iloc[0, 0] + return info + + +# ############################################################################# +# Tables +# ############################################################################# + + +def get_table_names(connection: DbConnection) -> List[str]: + """ + Report the name of the tables. + + E.g., tables=['entities', 'events', 'stories', 'taxonomy'] + """ + query = """ + SELECT table_name + FROM information_schema.tables + WHERE table_type = 'BASE TABLE' + AND table_schema = 'public' + """ + cursor = connection.cursor() + cursor.execute(query) + tables = [x[0] for x in cursor.fetchall()] + return tables + + +# TODO(gp): Test / fix this. +def get_indexes(connection: DbConnection) -> pd.DataFrame: + res = [] + tables = get_table_names(connection) + cursor = connection.cursor() + for table in tables: + query = f"""SELECT * FROM pg_indexes WHERE tablename = '{table}' """ + cursor.execute(query) + z = cursor.fetchall() + res.append(pd.DataFrame(z)) + tmp: pd.DataFrame = pd.concat(res) + tmp["index_type"] = tmp[4].apply( + lambda w: w.split("USING")[1].lstrip().split(" ")[0] + ) + tmp.columns = [ + "type: public/private", + "table_name", + "key_name", + "None", + "Statement", + "index_type", + ] + tmp["columns"] = tmp["Statement"].apply(lambda w: w.split("(")[1][:-1]) + + return tmp + + +def disconnect_all_clients(connection: DbConnection) -> None: + # From https://stackoverflow.com/questions/36502401 + # Not sure this will work in our case, since it might kill our own connection. + dbname = connection.info.host + query = f""" + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE datname = '{dbname}';""" + connection.cursor().execute(query) + + +# ############################################################################# +# Database +# ############################################################################# + + +def get_db_names(connection: DbConnection) -> List[str]: + """ + Return the names of the available DBs. + + E.g., ['postgres', 'rdsadmin', 'template0', 'template1'] + """ + query = "SELECT datname FROM pg_database;" + cursor = connection.cursor() + cursor.execute(query) + dbs = list(zip(*cursor.fetchall()))[0] + dbs = sorted(dbs) + return dbs + + +def create_database( + connection: DbConnection, + dbname: str, + *, + overwrite: Optional[bool] = None, +) -> None: + """ + Create empty database. + + :param connection: database connection + :param dbname: database to create + :param overwrite: overwrite existing database + """ + _LOG.debug("connection=%s", connection) + with connection.cursor() as cursor: + if overwrite: + cursor.execute( + psql.SQL("DROP DATABASE IF EXISTS {} WITH (FORCE);").format( + psql.Identifier(dbname) + ) + ) + else: + if dbname in get_table_names(connection): + raise ValueError(f"Database {dbname} already exists") + cursor.execute( + psql.SQL("CREATE DATABASE {};").format(psql.Identifier(dbname)) + ) + + +def remove_database(connection: DbConnection, dbname: str) -> None: + """ + Remove database in current environment. + + :param connection: a database connection + :param dbname: database name to drop, e.g. `im_db_local` + """ + # Drop database. + # From https://stackoverflow.com/questions/36502401 + connection.cursor().execute( + psql.SQL("DROP DATABASE {} WITH (FORCE);").format( + psql.Identifier(dbname) + ) + ) + + +def get_tables_size( + connection: DbConnection, + only_public: bool = True, + summary: bool = True, +) -> pd.DataFrame: + """ + Report the size of each table. + + E.g., + + ``` + table_name row_estimate total index toast table + 0 events 0.0 26 GB 0 bytes 192 bytes 26 GB + 1 stories 0.0 15 GB 43 GB 192 bytes 12 GB + 2 entities 10823400.0 76 MB 0 bytes 192 bytes 76 MB + 3 taxonomy 20691.0 690 kB 0 bytes 192 bytes 652 kB + ``` + """ + q = """SELECT *, pg_size_pretty(total_bytes) AS total + , pg_size_pretty(index_bytes) AS INDEX + , pg_size_pretty(toast_bytes) AS toast + , pg_size_pretty(table_bytes) AS TABLE + FROM ( + SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM ( + SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME + , c.reltuples AS row_estimate + , pg_total_relation_size(c.oid) AS total_bytes + , pg_indexes_size(c.oid) AS index_bytes + , pg_total_relation_size(reltoastrelid) AS toast_bytes + FROM pg_class c + LEFT JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE relkind = 'r' + ) a + ) a + ORDER by total_bytes DESC""" + df = pd.read_sql_query(q, connection) + if only_public: + df = df[df["table_schema"] == "public"] + if summary: + cols = "table_name row_estimate total index toast table".split() + df = df[cols] + return df + + +# ############################################################################# +# Query +# ############################################################################# + + +# TODO(gp): -> as_df +def execute_query_to_df( + connection: DbConnection, + query: str, + limit: Optional[int] = None, + offset: Optional[int] = None, + use_timer: bool = False, + profile: bool = False, + verbose: bool = False, +) -> pd.DataFrame: + """ + Execute a query. + """ + if False: + # Ask the user before executing a query. + print(f"query=\n{query}") + import helpers.hsystem as hsystem + + hsystem.query_yes_no("Ok to execute?") + if limit is not None: + query += f" LIMIT {limit}" + if offset is not None: + query += f" OFFSET {offset}" + if profile: + query = "EXPLAIN ANALYZE " + query + if verbose: + _LOG.info("> %s", query) + # Compute. + if use_timer: + idx = htimer.dtimer_start(0, "Sql time") + cursor = connection.cursor() + try: + df = pd.read_sql_query(query, connection) + except psycop.OperationalError: + # Catch error and execute query directly to print error. + try: + cursor.execute(query) + except psycop.Error as e: + print(e.pgerror) + raise e + if use_timer: + htimer.dtimer_stop(idx) + if profile: + _LOG.info("df=%s", df) + return df + + +def head_table( + connection: DbConnection, + table: str, + limit: int = 5, +) -> str: + """ + Report the head of the table as str. + """ + txt = [] + query = f"SELECT * FROM {table} LIMIT {limit} " + df = execute_query_to_df(connection, query) + # pd.options.display.max_columns = 1000 + # pd.options.display.width = 130 + txt.append(str(df)) + txt = "\n".join(txt) + return txt + + +def head_tables( + connection: DbConnection, + tables: Optional[List[str]] = None, + limit: int = 5, +) -> str: + txt = [] + if tables is None: + tables = get_table_names(connection) + for table in tables: + txt.append("\n" + "#" * 80 + "\n" + table + "\n" + "#" * 80) + txt_tmp = head_table(connection, table, limit=limit) + txt.append(txt_tmp) + txt = "\n".join(txt) + return txt + + +def get_table_columns(connection: DbConnection, table_name: str) -> List[str]: + """ + Get column names for given table. + """ + query = f""" + SELECT column_name + FROM information_schema.columns + WHERE TABLE_NAME = '{table_name}'""" + cursor = connection.cursor() + cursor.execute(query) + columns = [x[0] for x in cursor.fetchall()] + return columns + + +def find_tables_common_columns( + connection: DbConnection, + tables: List[str], + as_df: bool = False, +) -> Optional[pd.DataFrame]: + limit = 5 + df = [] + for i, table in enumerate(tables): + table = tables[i] + query = f"SELECT * FROM {table} LIMIT {limit} " + df1 = execute_query_to_df(connection, query, verbose=False) + if df1 is None: + continue + for j in range(i + 1, len(tables)): + table = tables[j] + query = f"SELECT * FROM {table} LIMIT {limit} " + df2 = execute_query_to_df(connection, query, verbose=False) + if df2 is None: + continue + common_cols = [c for c in df1 if c in df2] + if as_df: + df.append( + ( + tables[i], + tables[j], + len(common_cols), + " ".join(common_cols), + ) + ) + else: + print(f"'{tables[i]}' vs '{tables[j]}'") + print(f" ({len(common_cols)}): {' '.join(common_cols)}") + obj = None + if as_df: + obj = pd.DataFrame( + df, columns=["table1", "table2", "num_comm_cols", "common_cols"] + ) + return obj + + +def remove_table( + connection: DbConnection, table_name: str, cascade: bool = False +) -> None: + """ + Remove a table from a database. + + :param connection: database connection + :param table_name: table name + :param cascade: whether to drop the objects dependent on the table + """ + query = f"DROP TABLE IF EXISTS {table_name}" + if cascade: + query = " ".join([query, "CASCADE"]) + connection.cursor().execute(query) + + +def remove_all_tables(connection: DbConnection, cascade: bool = False) -> None: + """ + Remove all the tables from a database. + + :param connection: database connection + :param cascade: whether to drop the objects dependent on the tables + """ + table_names = get_table_names(connection) + _LOG.warning("Deleting all the tables: %s", table_names) + for table_name in table_names: + _LOG.warning("Deleting %s ...", table_name) + remove_table(connection, table_name, cascade) + + +# ############################################################################# +# Insert +# ############################################################################# + + +def csv_to_series(csv_as_txt: str, sep: str = ",") -> pd.Series: + """ + Convert a text with (key, value) separated by `sep` into a `pd.Series`. + + :param csv_as_txt: a string containing csv data + E.g., + ``` + tradedate,2021-11-12 + targetlistid,1 + ``` + :param sep: csv separator, e.g. `,` + :return: series + """ + lines = hprint.dedent(csv_as_txt).split("\n") + tuples = [tuple(line.split(sep)) for line in lines] + # Remove empty tuples. + tuples = [t for t in tuples if t[0] != ""] + # Build series. + index, data = zip(*tuples) + # _LOG.debug("index=%s", index) + # _LOG.debug("data=%s", data) + srs = pd.Series(data, index=index) + return srs + + +def copy_rows_with_copy_from( + connection: DbConnection, df: pd.DataFrame, table_name: str +) -> None: + """ + Copy dataframe contents into DB directly from buffer. + + This function works much faster for large dataframes (>10000 rows). + + :param connection: DB connection + :param df: data to insert + :param table_name: name of the table for insertion + """ + # The target table needs to exist. + hdbg.dassert_in(table_name, get_table_names(connection)) + # Read the data. + buffer = io.StringIO() + df.to_csv(buffer, index=False, header=False) + buffer.seek(0) + # Copy the data to the DB. + cur = connection.cursor() + cur.copy_from(buffer, table_name, sep=",") + # TODO(gp): CmampTask413, is this still needed because the autocommit. + connection.commit() + + +# TODO(gp): -> table_name, df +def create_insert_query(df: pd.DataFrame, table_name: str) -> str: + """ + Create an INSERT query to insert data into a DB. + + :param df: data to insert into DB + :param table_name: name of the table for insertion + :return: sql query, e.g., + ``` + INSERT INTO ccxt_ohlcv_spot(timestamp,open,high,low,close) VALUES %s + ``` + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + columns = ",".join(list(df.columns)) + query = f"INSERT INTO {table_name}({columns}) VALUES %s" + _LOG.debug("query=%s", query) + return query + + +# TODO(gp): -> table_name, df +def create_insert_on_conflict_do_nothing_query( + df: pd.DataFrame, table_name: str, unique_columns: List[str] +) -> str: + """ + Create an INSERT query to insert data into a DB. If a unique constraint is + violated for a provided set of columns, duplicates are not inserted. + + :param df: data to insert into DB + :param table_name: name of the table for insertion + :param unique_columns: set of columns which should be unique record-wise. + :return: sql query, e.g., + ``` + INSERT INTO ccxt_bid_ask(timestamp,bid_size,bid_price,ask_size, + ask_price,exchange_id,currency_pair) VALUES %s + ON CONFLICT (timestamp, exchange_id, currency_pair) DO NOTHING; + ``` + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + # Check that the constraint is actually applied to columns + # of the DataFrame. + hdbg.dassert_is_subset(unique_columns, list(df.columns)) + columns = ",".join(list(df.columns)) + unique_columns_str = ",".join(unique_columns) + query = f"INSERT INTO {table_name}({columns}) VALUES %s ON CONFLICT ({unique_columns_str}) \ + DO NOTHING" + _LOG.debug("query=%s", query) + return query + + +# TODO(gp): -> connection, table_name, obj +def execute_insert_query( + connection: DbConnection, + obj: Union[pd.DataFrame, pd.Series], + table_name: str, +) -> None: + """ + Insert a DB as multiple rows into the database. + + :param connection: connection to the DB + :param obj: data to insert + :param table_name: name of the table for insertion + """ + if isinstance(obj, pd.Series): + df = obj.to_frame().T + else: + df = obj + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_in(table_name, get_table_names(connection)) + _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) + # Ensure the DataFrame has compatible types with + # downstream consumers (e.g., database). + df = df.applymap(lambda x: float(x) if isinstance(x, np.float64) else x) + # Transform dataframe into list of tuples. + values = [tuple(v) for v in df.to_numpy()] + # Generate a query for multiple rows. + query = create_insert_query(df, table_name) + # Execute query for each provided row. + cur = connection.cursor() + extras.execute_values(cur, query, values) + connection.commit() + + +# TODO(gp): -> connection, table_name, obj +def execute_insert_on_conflict_do_nothing_query( + connection: DbConnection, + obj: Union[pd.DataFrame, pd.Series], + table_name: str, + unique_columns: List[str], +) -> None: + """ + Insert a DB as multiple rows into the database. If a a UNIQUE constraint is + violated for a provided set of columns, duplicates are not inserted. + + :param connection: connection to the DB + :param obj: data to insert + :param table_name: name of the table for insertion + :param unique_columns: set of columns which should be unique record-wise. + If unique_columns is an empty list, a regular DB insert is executed + without the UNIQUE constraint. + """ + if isinstance(obj, pd.Series): + df = obj.to_frame().T + else: + df = obj + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_in(table_name, get_table_names(connection)) + _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) + # Transform dataframe into list of tuples. + values = [tuple(v) for v in df.to_numpy()] + # Generate a query for multiple rows. + if not unique_columns: + # If unique_columns is an empty list, currently used when saving + # bid/ask RT data, to experiment with using no uniqueness constraints. + query = create_insert_query(df, table_name) + else: + query = create_insert_on_conflict_do_nothing_query( + df, table_name, unique_columns + ) + # Execute query for each provided row. + cur = connection.cursor() + try: + extras.execute_values(cur, query, values) + connection.commit() + except Exception as e: + _LOG.error( + "Failed to insert data with the '%s'. Query %s. Values: %s", + str(e), + query, + values, + ) + raise e + + +def execute_query(connection: DbConnection, query: str) -> List[tuple]: + """ + Use for generic simple operations. + + :param connection: connection to the DB + :param query: generic query that can be: insert, update, delete, etc. + :return: list of tuples with the results of the query + """ + _LOG.debug(hprint.to_str("query")) + with connection.cursor() as cursor: + cursor.execute(query) + if not connection.autocommit: + connection.commit() + try: + result = cursor.fetchall() + except psycop.ProgrammingError: + result = [()] + return result + + +# ############################################################################# +# Build more complex SQL queries. +# ############################################################################# + + +# Invariants for functions with SQL queries +# +# - Functions creating tables +# - accept a parameter `incremental that has the same behavior as in +# `hio.create_dir(..., incremental)` +# - It controls the behavior of this function if the target table already exists. +# If `incremental` is True, then skip creating it and reuse it as it is; if +# False delete it and create it from scratch. +# +# - Function creating / execution SQL queries +# - We prefer functions that directly perform SQL queries implementing a given +# functionality (e.g., `get_num_rows()`) +# - Use `get_..._query()` returning the query text only when we want to freeze +# the query in a test, e.g., because it is complex + + +def get_remove_duplicates_query( + table_name: str, id_col_name: str, column_names: List[str] +) -> str: + """ + Get a query to remove duplicates from table, keeping last duplicated row. + + :param table_name: name of table + :param id_col_name: name of unique id column + :param column_names: names of columns to compare on + :return: query to execute duplicate removal + """ + # TODO(*): Add a "limit" parameter if possible, to check only in top N rows. + remove_statement = [] + remove_statement.append(f"DELETE FROM {table_name} a USING {table_name} b") + remove_statement.append(f"WHERE a.{id_col_name} < b.{id_col_name}") + for c in column_names: + remove_statement.append(f"AND a.{c} = b.{c}") + remove_statement = " ".join(remove_statement) + return remove_statement + + +def get_num_rows(connection: DbConnection, table_name: str) -> int: + """ + Return the number of rows in a DB table. + """ + cursor = connection.cursor() + query = f"SELECT COUNT(*) FROM {table_name}" + cursor.execute(query) + vals = cursor.fetchall() + # The return value is like: vals=[(0,)] + hdbg.dassert_eq(len(vals), 1) + return vals[0][0] # type: ignore[no-any-return] + + +# ############################################################################# +# Polling functions +# ############################################################################# + + +def is_row_with_value_present( + connection: DbConnection, + table_name: str, + field_name: str, + target_value: str, + *, + show_db_state: bool = True, +) -> hasynci.PollOutput: + """ + Check with a polling function if a row with `field_name` == `target_value` + is present in the table `table_name` of the DB. + + E.g., this can be used with polling to wait for the target value + "hello_world.txt" in the "filename" field of the table "table_name" to appear + + :return: + - success if the value is present + - result: None + """ + _LOG.debug(hprint.to_str("connection table_name field_name target_value")) + # Print the state of the DB, if needed. + if show_db_state: + query = f"SELECT * FROM {table_name} ORDER BY filename" + df = execute_query_to_df(connection, query) + _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) + # Check if the required row is available. + query = f"SELECT {field_name} FROM {table_name} WHERE {field_name}='{target_value}'" + df = execute_query_to_df(connection, query) + _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) + # Package results. + success = df.shape[0] > 0 + result = None + return success, result + + +# TODO(gp): Add unit test. +async def wait_for_change_in_number_of_rows( + get_wall_clock_time: hdateti.GetWallClockTime, + db_connection: DbConnection, + table_name: str, + poll_kwargs: Dict[str, Any], + *, + tag: Optional[str] = None, +) -> int: + """ + Wait until the number of rows in a table changes. + + :param get_wall_clock_time: a function to get current time + :param db_connection: connection to the target DB + :param table_name: name of the table to poll + :param poll_kwargs: a dictionary with the kwargs for `poll()` + :param tag: name of the caller function + :return: number of new rows found + """ + num_rows = get_num_rows(db_connection, table_name) + + def _is_number_of_rows_changed() -> hasynci.PollOutput: + new_num_rows = get_num_rows(db_connection, table_name) + _LOG.debug("new_num_rows=%s num_rows=%s", new_num_rows, num_rows) + success = new_num_rows != num_rows + diff_num_rows = new_num_rows - num_rows + return success, diff_num_rows + + # Poll. + if tag is None: + # Use name of the caller function. + tag = hintros.get_function_name(count=0) + if poll_kwargs is None: + poll_kwargs = hasynci.get_poll_kwargs(get_wall_clock_time) + num_iters, diff_num_rows = await hasynci.poll( + _is_number_of_rows_changed, + tag=tag, + **poll_kwargs, + ) + _ = num_iters + diff_num_rows = cast(int, diff_num_rows) + return diff_num_rows diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py new file mode 100644 index 000000000..2aeff7c6c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py @@ -0,0 +1,273 @@ +""" +Import as: + +import helpers.hsql_test as hsqltest +""" + +import abc +import logging +import os + +import pytest + +import helpers.hdocker as hdocker +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsql as hsql +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestDbHelper +# ############################################################################# + + +@pytest.mark.requires_docker_in_docker +@pytest.mark.skipif( + not hserver.can_run_docker_from_docker(), + reason="Need docker children / sibling support", +) +class TestDbHelper(hunitest.TestCase, abc.ABC): + """ + Allow testing code that interacts with a DB. + + It creates / destroys a test DB during setup / teardown of the class. This means + that the same DB is reused for multiple test methods of the same class. + + The invariant is that each test method should: + - (ideally) find a clean DB to work with + - not assume that the DB is clean. If the DB is not clean, tests should clean it + before starting, or work around it + - E.g., if a test needs to write a table, but the table is already present and + partially filled as a leftover from a previous test, the new test should + delete the table and create it again + - clean the DB after themselves, i.e., undo the work that has been done + - E.g., if a test creates a table, then the test should delete the table at + the end of the test + + - An existing DB can be reused + - A user can create a persistent local DB in the Docker container, e.g. for OMS: + ``` + docker> (cd oms; sudo docker-compose \ + --file /app/oms/devops/compose/tmp.docker-compose.yml up \ + -d \ + oms_postgres) + ``` + or + ``` + docker> invoke oms_docker_up + ``` + - Then this class skips creating / destructing the DB, making the tests faster + and allowing easier debugging. + """ + + @classmethod + def setUpClass(cls) -> None: + """ + Initialize the test database inside test container. + """ + _LOG.info("\n%s", hprint.frame("setUpClass")) + cls._create_docker_files() + # Read the connection parameters from the env file. + cls.db_env_file = cls._get_db_env_path() + connection_info = hsql.get_connection_info_from_env_file(cls.db_env_file) + _LOG.debug("connection_info=%s", connection_info) + conn_exists = hsql.check_db_connection(*connection_info)[0] + if conn_exists: + _LOG.warning("DB is already up: skipping docker compose") + # Since we have found the DB already up, we assume that we need to + # leave it running after the tests + cls.bring_down_db = False + else: + # Start the service. + cls.docker_compose_file_path = os.path.join( + hgit.get_amp_abs_path(), cls._get_compose_file() + ) + # TODO(Grisha): use invoke task CMTask #547. + cmd = ( + "sudo docker-compose " + f"--file {cls.docker_compose_file_path} " + f"--env-file {cls.db_env_file} " + f"up -d {cls._get_service_name()}" + ) + _LOG.debug("cmd=%s", cmd) + hsystem.system(cmd, suppress_output=False) + # Wait for the DB to be available. + hsql.wait_db_connection(*connection_info) + cls.bring_down_db = True + # Save connection info. + # TODO(gp): -> db_connection + cls.connection = hsql.get_connection(*connection_info, autocommit=True) + + # TODO(Grisha): difference between cmamp and kaizenflow. + @classmethod + def tearDownClass(cls) -> None: + """ + Bring down the test container. + """ + _LOG.info("\n%s", hprint.frame("tearDown")) + docker_compose_cleanup = cls.bring_down_db + if docker_compose_cleanup: + if hserver.use_main_network(): + # When using sibling containers `docker-compose down` tries to shut + # down also the `main_network`, while it is attached to the Docker + # container running the tests + # So we clean up the containers and volumes directly. + # TODO(gp): This could become an invoke target. + # Remove the container, e.g., `compose-oms_postgres7482-1`. + service_name = cls._get_service_name() + container_name = f"compose-{service_name}-1" + use_sudo = hdocker.get_use_sudo() + hdocker.container_rm(container_name, use_sudo) + # Remove the volume, e.g., `compose_oms_postgres7482_data`. + volume_name = f"compose_{service_name}_data" + hdocker.volume_rm(volume_name, use_sudo) + else: + # TODO(Grisha): use invoke task CMTask #547. + cmd = ( + "sudo docker-compose " + f"--file {cls.docker_compose_file_path} " + f"--env-file {cls.db_env_file} " + "down -v" + ) + hsystem.system(cmd, suppress_output=False) + else: + _LOG.warning("Leaving DB up") + if not hunitest.get_incremental_tests(): + os.unlink(cls._get_compose_file()) + os.unlink(cls._get_db_env_path()) + + @classmethod + @abc.abstractmethod + def get_id(cls) -> int: + """ + Return a unique ID to create an OMS instance. + + This ID is used to generate Docker compose / env files and + services, so that we can avoid collisions in case of parallel + execution. + + This function is specified by the unit test in a way that is + unique to each test. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _get_compose_file(cls) -> str: + """ + Get path to Docker compose file. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _get_service_name(cls) -> str: + """ + Get service name. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _get_db_env_path(cls) -> str: + """ + Get path to env file that contains DB connection parameters. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _create_docker_files(cls) -> str: + """ + Create the compose and env file for the DB run. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _get_postgres_db(cls) -> str: + """ + Return the name of the postgres DB to use (e.g., im_postgres_db_local). + """ + raise NotImplementedError + + +# ############################################################################# +# TestImOmsDbHelper +# ############################################################################# + + +class TestImOmsDbHelper(TestDbHelper, abc.ABC): + # TODO(gp): Rewrite building a YAML with a package. + @classmethod + def _create_docker_files(cls) -> None: + # Create compose file. + service_name = cls._get_service_name() + idx = cls.get_id() + host_port = 5432 + idx + txt = f"""version: '3.5' +services: + # Docker container running Postgres DB. + {service_name}: + image: postgres:13 + restart: "no" + environment:""" + if not hserver.use_docker_db_container_name_to_connect(): + # Use the port to connect. + txt += f""" + - POSTGRES_HOST=${{POSTGRES_HOST}} + - POSTGRES_DB=${{POSTGRES_DB}} + - POSTGRES_PORT=${{POSTGRES_PORT}} + - POSTGRES_USER=${{POSTGRES_USER}} + - POSTGRES_PASSWORD=${{POSTGRES_PASSWORD}} + volumes: + - {service_name}_data:/var/lib/postgresql/data + ports: + - {host_port}:5432""" + else: + # Do not use the port to connect. + txt += f""" + - POSTGRES_HOST=${{POSTGRES_HOST}} + - POSTGRES_DB=${{POSTGRES_DB}} + - POSTGRES_USER=${{POSTGRES_USER}} + - POSTGRES_PASSWORD=${{POSTGRES_PASSWORD}} + volumes: + - {service_name}_data:/var/lib/postgresql/data""" + # + txt += f""" +volumes: + {service_name}_data: {{}} + +networks: + default: + #name: {service_name}_network + name: main_network""" + compose_file_name = cls._get_compose_file() + hio.to_file(compose_file_name, txt) + # Create env file. + txt = [] + if not hserver.use_docker_db_container_name_to_connect(): + if hserver.is_dev4(): + host = "cf-spm-dev4" + else: + # host = os.environ["CSFY_HOST_NAME"] + host = "localhost" + else: + # Use the service name, e.g., `im_postgres...`. + host = service_name + postgres_db = cls._get_postgres_db() + txt.append(f"POSTGRES_HOST={host}") + txt.append(f"POSTGRES_DB={postgres_db}") + if not hserver.use_docker_db_container_name_to_connect(): + txt.append(f"POSTGRES_PORT={host_port}") + txt.append("POSTGRES_USER=aljsdalsd") + txt.append("POSTGRES_PASSWORD=alsdkqoen") + txt = "\n".join(txt) + env_file_name = cls._get_db_env_path() + hio.to_file(env_file_name, txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py new file mode 100644 index 000000000..a56f9b0a1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py @@ -0,0 +1,176 @@ +""" +Import as: + +import helpers.hstring as hstring +""" + +import logging +import os +import re +import tempfile +from typing import List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def remove_prefix(string: str, prefix: str, assert_on_error: bool = True) -> str: + if string.startswith(prefix): + res = string[len(prefix) :] + else: + res = string + if assert_on_error: + raise RuntimeError( + f"string='{string}' doesn't start with prefix ='{prefix}'" + ) + return res + + +def remove_suffix(string: str, suffix: str, assert_on_error: bool = True) -> str: + if string.endswith(suffix): + res = string[: -len(suffix)] + else: + res = string + if assert_on_error: + raise RuntimeError( + f"string='{string}' doesn't end with suffix='{suffix}'" + ) + return res + + +def diff_strings( + txt1: str, + txt2: str, + txt1_descr: Optional[str] = None, + txt2_descr: Optional[str] = None, + width: int = 130, +) -> str: + # Write file. + def _to_file(txt: str, txt_descr: Optional[str]) -> str: + file_name = tempfile.NamedTemporaryFile().name + if txt_descr is not None: + txt = "# " + txt_descr + "\n" + txt + hio.to_file(file_name, txt) + return file_name + + file_name1 = _to_file(txt1, txt1_descr) + file_name2 = _to_file(txt2, txt2_descr) + # Get the difference between the files. + cmd = f"sdiff --width={width} {file_name1} {file_name2}" + _, txt = hsystem.system_to_string( + cmd, + # We don't care if they are different. + abort_on_error=False, + ) + return txt + + +# TODO(gp): GFI. Move to hpython_code.py +def get_docstring_line_indices(lines: List[str]) -> List[int]: + """ + Get indices of lines of code that are inside (doc)strings. + + :param lines: the code lines to check + :return: the indices of docstrings + """ + docstring_line_indices = [] + quotes = {'"""': False, "'''": False, "```": False} + for i, line in enumerate(lines): + # Determine if the current line is inside a (doc)string. + for quote in quotes: + quotes_matched = re.findall(quote, line) + for q in quotes_matched: + # Switch the docstring flag. + # pylint: disable=modified-iterating-dict + quotes[q] = not quotes[q] + if q in ('"""', "'''") and not quotes[q]: + # A triple-quote has just been closed. + # Reset the triple backticks flag. + quotes["```"] = False + if any(quotes.values()): + # Store the index if the quotes have been opened but not closed yet. + docstring_line_indices.append(i) + return docstring_line_indices + + +def get_docstrings(lines: List[str]) -> List[List[int]]: + """ + Get line indices grouped together by the docstring they belong to. + + :param lines: lines from the file to process + :return: grouped lines within docstrings + """ + # Get indices of lines that are within docstrings. + doc_indices = get_docstring_line_indices(lines) + # Group these indices into consecutive docstrings. + docstrings = [] + if doc_indices: + current_docstring = [doc_indices[0]] + for idx in doc_indices[1:]: + if idx == current_docstring[-1] + 1: + current_docstring.append(idx) + else: + docstrings.append(current_docstring) + current_docstring = [idx] + docstrings.append(current_docstring) + return docstrings + + +# TODO(gp): GFI. Move to hpython_code.py +def get_code_block_line_indices(lines: List[str]) -> List[int]: + """ + Get indices of lines that are inside code blocks. + + Code blocks are lines surrounded by triple backticks, e.g., + ``` + This line. + ``` + Note that the backticks need to be the leftmost element of their line. + + :param lines: the lines to check + :return: the indices of code blocks + """ + code_block_line_indices = [] + quotes = {"```": False} + for i, line in enumerate(lines): + # Determine if the current line is inside a code block. + for quote in quotes: + quotes_matched = re.findall(rf"^\s*({quote})", line) + for q in quotes_matched: + # Switch the flag. + # pylint: disable=modified-iterating-dict + quotes[q] = not quotes[q] + if any(quotes.values()): + # Store the index if the quotes have been opened but not closed yet. + code_block_line_indices.append(i) + return code_block_line_indices + + +def extract_version_from_file_name(file_name: str) -> Tuple[int, int]: + """ + Extract version number from filename_vXX.json file. + + E.g. + - 'universe_v3.1.json' -> (3, 1) + - 'universe_v1.json' -> (1, 0) + - 'dataset_schema_v3.json' -> (3, 0) + + Currently only JSON file extension is supported. + + :param file_name: file to extract version part from + :return: file version tuple in format (major, minor) + """ + basename = os.path.basename(file_name).rstrip(".json") + m = re.search(r"v(\d+(\.\d+)?)$", basename) + hdbg.dassert( + m, + "Can't parse file '%s', correct format is e.g. 'universe_v03.json'.", + basename, + ) + # Groups return tuple. + version = m.groups(1)[0].split(".") # type: ignore[arg-type, union-attr] + major, minor = int(version[0]), 0 if len(version) == 1 else int(version[1]) + return major, minor diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py new file mode 100644 index 000000000..b63bd34f4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py @@ -0,0 +1,1097 @@ +""" +Contain all the code needed to interact with the outside world, e.g., through +system commands, env vars, ... + +Import as: + +import helpers.hsystem as hsystem +""" + +import contextlib +import datetime +import getpass +import glob +import logging +import os +import re +import signal +import subprocess +import sys +import time +from typing import ( + Any, + Callable, + Generator, + List, + Match, + Optional, + Tuple, + Union, + cast, +) + +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hprint as hprint +import helpers.hserver as hserver + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + +# Set logging level of this file higher to avoid too much chatter. +_LOG.setLevel(logging.INFO) + +# ############################################################################# + + +# TODO(gp): Move to hdatetime.py and maybe merge with `timestamp_to_str()`. +def get_timestamp() -> str: + timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + return timestamp + + +# TODO(gp): Maybe move to hserver.py +def is_running_in_ipynb() -> bool: + # From https://stackoverflow.com/questions/15411967 + try: + _ = get_ipython().config # type: ignore + res = True + except NameError: + res = False + return res + + +# ############################################################################# + +_USER_NAME = None + + +def set_user_name(user_name: str) -> None: + """ + To impersonate a user. + + To use only in rare cases for testing or back-door. + """ + _LOG.warning("Setting user to '%s'", user_name) + global _USER_NAME + _USER_NAME = user_name + + +def get_user_name() -> str: + if _USER_NAME is None: + res = getpass.getuser() + else: + res = _USER_NAME + hdbg.dassert_ne(res, "") + return res + + +def get_server_name() -> str: + res = os.uname() + # posix.uname_result( + # sysname='Darwin', + # nodename='gpmac.lan', + # release='18.2.0', + # version='Darwin Kernel Version 18.2.0: Mon Nov 12 20:24:46 PST 2018; + # root:xnu-4903.231.4~2/RELEASE_X86_64', + # machine='x86_64') + # This is not compatible with python2.7 + # return res.nodename + return res[1] + + +def get_os_name() -> str: + res = os.uname() + # This is not compatible with python2.7 + # return res.sysname + return res[0] + + +def get_env_var(env_var_name: str) -> str: + if env_var_name not in os.environ: + msg = f"Can't find '{env_var_name}': re-run dev_scripts/setenv.sh?" + _LOG.error(msg) + raise RuntimeError(msg) + return os.environ[env_var_name] + + +# ############################################################################# +# system(), system_to_string() +# ############################################################################# + + +# pylint: disable=too-many-branches,too-many-statements,too-many-arguments,too-many-locals +def _system( + cmd: str, + print_command: bool, + abort_on_error: bool, + suppress_error: Optional[Any], + suppress_output: Union[bool, str], + blocking: bool, + wrapper: Optional[Any], + output_file: Optional[Any], + num_error_lines: Optional[int], + tee: bool, + dry_run: bool, + log_level: Union[int, str], +) -> Tuple[int, str]: + """ + Execute a shell command. + + To print the command and see the output call this as: + ``` + _system(cmd, suppress_output=False, log_level="echo") + ``` + + See `system()` for options. + """ + _LOG.debug(hprint.func_signature_to_str()) + _LOG.debug("##> %s", cmd) + orig_cmd = cmd[:] + _LOG.debug("orig_cmd=%s", orig_cmd) + # Handle `suppress_output`. + hdbg.dassert_in(suppress_output, ("ON_DEBUG_LEVEL", True, False)) + if suppress_output == "ON_DEBUG_LEVEL": + # Show the output if we are at (or lower than) DEBUG level, since + # logging.DEBUG=10 and logging.INFO=20. + show_output = _LOG.getEffectiveLevel() <= logging.DEBUG + suppress_output = not show_output + _LOG.debug(hprint.to_str("suppress_output")) + # Prepare the command line. + cmd = f"({cmd})" + hdbg.dassert_imply(tee, output_file is not None) + if output_file is not None: + # Redirect to a file. + dir_name = os.path.dirname(output_file) + if not dir_name: + dir_name = "." + if not os.path.exists(dir_name): + _LOG.debug("Dir '%s' doesn't exist: creating", dir_name) + hdbg.dassert(bool(dir_name), "dir_name='%s'", dir_name) + os.makedirs(dir_name) + if tee: + cmd += f" 2>&1 | tee -a {output_file};" + cmd += " exit ${PIPESTATUS[0]}" + else: + cmd += f" 2>&1 >{output_file}" + else: + # Do not redirect to a file. + cmd += " 2>&1" + # Handle `wrapper`. + if wrapper: + cmd = wrapper + " && " + cmd + # Handle `log_level`. + # TODO(gp): Make it "ECHO" or "PRINT". + if isinstance(log_level, str): + hdbg.dassert_in(log_level, ("echo", "echo_frame")) + if log_level == "echo_frame": + print(hprint.frame(f"> {cmd}")) + elif log_level == "echo": + print(f"> {cmd}") + else: + raise ValueError(f"Invalid log_level='{log_level}'") + _LOG.debug("> %s", cmd) + else: + _LOG.log(log_level, "> %s", cmd) + output = "" + # Handle `dry_run`. + if dry_run: + _LOG.warning("As per user request, not executing command:\n%s", cmd) + rc = 0 + return rc, output + # Execute the command. + try: + stdout = subprocess.PIPE + stderr = subprocess.STDOUT + if print_command: + _LOG.info("> %s", cmd) + with subprocess.Popen( + cmd, + shell=True, + executable="/bin/bash", + stdout=stdout, + stderr=stderr, + ) as p: + output = "" + if blocking: + # Blocking call: get the output. + while True: + line = p.stdout.readline().decode("utf-8", errors="replace") # type: ignore + if not line: + break + if not suppress_output: + # print(" ==> " + line.rstrip("\n")) + print(" ... " + line.rstrip("\n")) + output += line + p.stdout.close() # type: ignore + rc = p.wait() + else: + # Not blocking. + # Wait until process terminates (without using p.wait()). + max_cnt = 20 + cnt = 0 + while p.poll() is None: + # Process hasn't exited yet, let's wait some time. + time.sleep(0.1) + cnt += 1 + _LOG.debug("cnt=%s, rc=%s", cnt, p.returncode) + if cnt > max_cnt: + break + if cnt > max_cnt: + # Timeout: we assume it worked. + rc = 0 + else: + rc = p.returncode + if suppress_error is not None: + hdbg.dassert_isinstance(suppress_error, set) + if rc in suppress_error: + rc = 0 + except OSError as e: + rc = -1 + _LOG.error("error=%s", str(e)) + _LOG.debug(" ==> rc=%s", rc) + if abort_on_error and rc != 0: + # Report the last `num_error_lines` of the output. + num_error_lines = num_error_lines or 30 + output_error = "\n".join(output.split("\n")[-num_error_lines:]) + msg = [] + msg.append("\n" + hprint.frame("_system() failed", thickness=2)) + msg.append(hprint.func_signature_to_str()) + msg.append(hprint.frame(f"cmd='{cmd}'", char1="%", thickness=1)) + msg.append(f"- rc='{rc}'") + msg.append(f"- output='\n{output_error}'") + # Save the output in a file. + file_name = "tmp.system_output.txt" + with open(file_name, "w") as f: + f.write(output) + msg.append(f"- Output saved in '{file_name}'") + # Save the command in an executable file. + file_name = "tmp.system_cmd.sh" + msg.append(f"- Command saved in '{file_name}'") + with open(file_name, "w") as f: + f.write(cmd) + os.chmod(file_name, 0o755) + # + msg = "\n".join(msg) + raise RuntimeError(msg) + # hdbg.dassert_type_in(output, (str, )) + return rc, output + + +# pylint: disable=too-many-arguments +def system( + cmd: str, + *, + print_command: bool = False, + abort_on_error: bool = True, + suppress_error: Optional[Any] = None, + suppress_output: Union[str, bool] = "ON_DEBUG_LEVEL", + blocking: bool = True, + wrapper: Optional[Any] = None, + output_file: Optional[Any] = None, + num_error_lines: Optional[int] = None, + tee: bool = False, + dry_run: bool = False, + log_level: Union[int, str] = logging.DEBUG, +) -> int: + """ + Execute a shell command, without capturing its output. + + :param cmd: string with command to execute + :param print_command: whether to print the command using `_LOG.info()` + :param abort_on_error: whether we should assert in case of error or not + :param suppress_error: set of error codes to suppress + :param suppress_output: whether to print the output or not + - If "ON_DEBUG_LEVEL" then print the output if the log level is DEBUG + :param blocking: blocking system call or not + :param wrapper: another command to prepend the execution of cmd + :param output_file: redirect stdout and stderr to this file + :param num_error_lines: number of lines of the output to display when + raising `RuntimeError` + :param tee: if True, tee append (i.e., `tee -a`) stdout and stderr to + `output_file` + :param dry_run: print the final command but not execute it + :param log_level: print the command to execute at level "log_level". + - If `echo` then print the command line to screen as `print()` and not + logging + :return: + - return code as int + - output of the command as str + """ + # print("cmd=", cmd) + # print("suppress_output=", suppress_output) + cmd = hprint.dedent(cmd) + rc, _ = _system( + cmd, + print_command=print_command, + abort_on_error=abort_on_error, + suppress_error=suppress_error, + suppress_output=suppress_output, + blocking=blocking, + wrapper=wrapper, + output_file=output_file, + num_error_lines=num_error_lines, + tee=tee, + dry_run=dry_run, + log_level=log_level, + ) + return rc + + +# def _system_to_string(cmd): +# py_ver = sys.version_info[0] +# if py_ver == 2: +# txt = subprocess.check_output(cmd) +# elif py_ver == 3: +# txt = subprocess.getoutput(cmd) +# else: +# raise RuntimeError("Invalid py_ver=" + py_ver) +# txt = [f for f in txt.split("\n") if f] +# hdbg.dassert_eq(len(txt), 1) +# return txt[0] + + +def system_to_string( + cmd: str, + *, + print_command: bool = False, + abort_on_error: bool = True, + suppress_output: Union[bool, str] = "ON_DEBUG_LEVEL", + wrapper: Optional[Any] = None, + dry_run: bool = False, + log_level: Union[int, str] = logging.DEBUG, +) -> Tuple[int, str]: + """ + Execute a shell command and capture its output. + + See _system() for options. + """ + rc, output = _system( + cmd, + print_command=print_command, + abort_on_error=abort_on_error, + suppress_error=None, + suppress_output=suppress_output, + # If we want to see the output the system call must be blocking. + blocking=True, + wrapper=wrapper, + output_file=None, + num_error_lines=None, + tee=False, + dry_run=dry_run, + log_level=log_level, + ) + output = output.rstrip("\n") + return rc, output + + +# ############################################################################# +# system_to_one_line() +# ############################################################################# + + +def get_first_line(output: str) -> str: + """ + Return the first (and only) line from a string. + + This is used when calling system_to_string() and expecting a single + line output. + """ + output = hprint.remove_empty_lines(output) + output_as_arr: List[str] = output.split("\n") + # Remove the annoying spurious matches under `tmp.base`. + output_as_arr = [line for line in output_as_arr if "/tmp.base/" not in line] + hdbg.dassert_eq(len(output_as_arr), 1, "output='%s'", output) + output = output_as_arr[0] + output = output.rstrip().lstrip() + return output + + +# TODO(gp): Move it to a more general file, e.g., `helpers/printing.py`? +def text_to_list(txt: str) -> List[str]: + """ + Convert a string (e.g., from system_to_string) into a list of lines. + """ + res = [line.rstrip().lstrip() for line in txt.split("\n")] + res = [line for line in res if line != ""] + return res + + +def system_to_one_line(cmd: str, *args: Any, **kwargs: Any) -> Tuple[int, str]: + """ + Execute a shell command, capturing its output (expected to be a single + line). + + This is a thin wrapper around system_to_string(). + """ + rc, output = system_to_string(cmd, *args, **kwargs) + output = get_first_line(output) + return rc, output + + +# ############################################################################# +# system_to_files() +# ############################################################################# + + +def to_normal_paths(files: List[str]) -> List[str]: + files = list(map(os.path.normpath, files)) + return files + + +def to_absolute_paths(files: List[str]) -> List[str]: + files = list(map(os.path.abspath, files)) + return files + + +def _remove_files_non_present(files: List[str]) -> List[str]: + """ + Return list of files from `files` excluding the files that don't exist. + """ + files_tmp = [] + for f in files: + if os.path.exists(f): + files_tmp.append(f) + else: + _LOG.warning("File '%s' doesn't exist: skipping", f) + return files_tmp + + +def remove_dirs(files: List[str]) -> List[str]: + """ + Return list of files from `files` excluding the files that are directories. + """ + files_tmp: List[str] = [] + dirs_tmp: List[str] = [] + for file in files: + if os.path.isdir(file): + _LOG.debug("file='%s' is a dir: skipping", file) + dirs_tmp.append(file) + else: + files_tmp.append(file) + if dirs_tmp: + _LOG.warning("Removed dirs: %s", ", ".join(dirs_tmp)) + return files_tmp + + +def select_result_file_from_list( + files: List[str], mode: str, file_name: str +) -> List[str]: + """ + Select a file from a list according to various approaches encoded in + `mode`. + + :param files: list of files to select from + :param file_name: name of the file we are looking for + :param mode: + - "return_all_results": return the list of files, whatever it is + - "assert_unless_one_result": assert unless there is a single file and return + the only file. Note that we still return a list to keep the interface + simple. + """ + res: List[str] = [] + if mode == "assert_unless_one_result": + # Expect to have a single result and return that. + if len(files) == 0: + hdbg.dfatal(f"mode={mode}: didn't find file {file_name}") + elif len(files) > 1: + hdbg.dfatal( + f"mode={mode}: found multiple files:\n" + "\n".join(files) + ) + res = [files[0]] + elif mode == "return_all_results": + # Return all files. + res = files + else: + hdbg.dfatal(f"Invalid mode='{mode}'") + return res + + +def system_to_files( + cmd: str, + dir_name: Optional[str] = None, + remove_files_non_present: bool = False, + mode: str = "return_all_results", +) -> List[str]: + """ + Execute command `cmd` in `dir_name` and return the output as a list of + strings. + + :param remove_files_non_present: remove files that don't exist on + the filesystem + :param mode: like in `select_result_file_from_list()` + """ + if dir_name is None: + dir_name = "." + hdbg.dassert_dir_exists(dir_name) + cmd = f"cd {dir_name} && {cmd}" + _, output = system_to_string(cmd) + # Remove empty lines. + _LOG.debug("output=\n%s", output) + files = output.split("\n") + files = [line.rstrip().rstrip() for line in files] + files = [line for line in files if line != ""] + _LOG.debug("files=%s", " ".join(files)) + # Convert to normalized paths. + files = [os.path.join(dir_name, f) for f in files] + files: List[str] = list(map(os.path.normpath, files)) # type: ignore + _LOG.debug(hprint.to_str("files")) + # Remove non-existent files, if needed. + if remove_files_non_present: + files = _remove_files_non_present(files) + # Process output. + files = select_result_file_from_list(files, mode, cmd) + return files + + +# ############################################################################# +# Functions handling processes +# ############################################################################# + + +def get_process_pids( + keep_line: Callable[[str], bool], +) -> Tuple[List[int], List[str]]: + """ + Find all the processes corresponding to `ps ax` filtered line by line with + `keep_line()`. + + :return: list of pids and filtered output of `ps ax` + """ + cmd = "ps ax" + rc, txt = system_to_string(cmd, abort_on_error=False) + _LOG.debug("txt=\n%s", txt) + pids: List[int] = [] + txt_out: List[str] = [] + if rc == 0: + for line in txt.split("\n"): + _LOG.debug("line=%s", line) + # PID TT STAT TIME COMMAND + if "PID" in line and "TT" in line and "STAT" in line: + txt_out.append(line) + continue + keep = keep_line(line) + _LOG.debug(" keep=%s", keep) + if not keep: + continue + # > ps ax | grep 'ssh -i' | grep localhost + # 19417 ?? Ss 0:00.39 ssh -i /Users/gp/.ssh/id_rsa -f -nNT \ + # -L 19999:localhost:19999 gp@54.172.40.4 + fields = line.split() + try: + pid = int(fields[0]) + except ValueError as e: + _LOG.error( + "Can't parse fields '%s' from line '%s'", fields, line + ) + raise e + _LOG.debug("pid=%s", pid) + pids.append(pid) + txt_out.append(line) + return pids, txt_out + + +def kill_process( + get_pids: Callable[[], Tuple[List[int], str]], + timeout_in_secs: int = 5, + polltime_in_secs: float = 0.1, +) -> None: + """ + Kill all the processes returned by the function `get_pids()`. + + :param timeout_in_secs: how many seconds to wait at most before + giving up + :param polltime_in_secs: how often to check for dead processes + """ + import tqdm + + pids, txt = get_pids() + _LOG.info("Killing %d pids (%s)\n%s", len(pids), pids, "\n".join(txt)) + if not pids: + return + for pid in pids: + try: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError as e: + _LOG.warning(str(e)) + # + _LOG.info("Waiting %d processes (%s) to die", len(pids), pids) + for _ in tqdm.tqdm( + range(int(timeout_in_secs / polltime_in_secs)), desc="Polling process" + ): + time.sleep(polltime_in_secs) + pids, _ = get_pids() + if not pids: + break + pids, txt = get_pids() + hdbg.dassert_eq(len(pids), 0, "Processes are still alive:%s", "\n".join(txt)) + _LOG.info("Processes dead") + + +# ############################################################################# +# User interaction +# ############################################################################# + + +def query_yes_no(question: str, *, abort_on_no: bool = True) -> bool: + """ + Ask a yes/no question via `input()` and return their answer. + + :param question: string with the question presented to the user + :param abort_on_no: exit if the user answers "no" + :return: True for "yes" or False for "no" + """ + hdbg.dassert_isinstance(question, str) + hdbg.dassert_isinstance(abort_on_no, bool) + valid = { + "yes": True, + "y": True, + # + "no": False, + "n": False, + } + prompt = " [y/n] " + while True: + sys.stdout.write(question + prompt) + choice = input().lower() + if choice in valid: + ret = valid[choice] + break + _LOG.debug("ret=%s", ret) + if abort_on_no: + if not ret: + print("You answer no: exiting") + sys.exit(-1) + return ret + + +def press_enter_to_continue(prompt: str = "") -> None: + hdbg.dassert_isinstance(prompt, str) + if not prompt: + prompt = "Press Enter to continue..." + sys.stdout.write(prompt) + _ = input() + + +# ############################################################################# +# Functions similar to Linux commands. +# ############################################################################# + + +def check_exec(tool: str) -> bool: + """ + Check if an executable can be executed. + + :return: True if the executables "tool" can be executed. + """ + suppress_output = _LOG.getEffectiveLevel() > logging.DEBUG + cmd = f"which {tool}" + abort_on_error = False + rc = system( + cmd, + abort_on_error=abort_on_error, + suppress_output=suppress_output, + log_level=logging.DEBUG, + ) + return rc == 0 + + +def to_pbcopy(txt: str, pbcopy: bool) -> None: + """ + Save the content of txt in the system clipboard. + """ + txt = txt.rstrip("\n") + if not pbcopy: + print(txt) + return + if not txt: + print("Nothing to copy") + return + if hserver.is_host_mac(): + # -n = no new line + cmd = f"echo -n '{txt}' | pbcopy" + system(cmd) + _LOG.warning("\n# Copied to system clipboard:\n%s", txt) + else: + _LOG.warning("pbcopy works only on macOS") + print(txt) + + +# ############################################################################# + +# Copied from hgit to avoid import cycles. + + +def _find_git_root(path: str = ".") -> str: + """ + Find recursively the dir of the outermost super module. + + This function traverses the directory hierarchy upward from a specified + starting path to find the root directory of a Git repository. + It supports: + - standard git repository: where a `.git` directory exists at the root + - submodule: where repository is nested inside another, and the `.git` file contains + a `gitdir:` reference to the submodule's actual Git directory + - linked repositories: where the `.git` file points to a custom Git directory + location, such as in Git worktrees or relocated `.git` directories + + :param path: starting file system path. Defaults to the current directory (".") + :return: absolute path to the top-level Git repository directory + """ + path = os.path.abspath(path) + git_root_dir = None + while True: + git_dir = os.path.join(path, ".git") + _LOG.debug("git_dir=%s", git_dir) + # Check if `.git` is a directory which indicates a standard Git repository. + if os.path.isdir(git_dir): + # Found the Git root directory. + git_root_dir = path + break + # Check if `.git` is a file which indicates submodules or linked setups. + if os.path.isfile(git_dir): + # Using the `open()` to avoid import cycles with the `hio` module. + with open(git_dir, "r") as f: + txt = f.read() + lines = txt.split("\n") + for line in lines: + # Look for a `gitdir:` line that specifies the linked directory. + # Example: `gitdir: ../.git/modules/helpers_root`. + if line.startswith("gitdir:"): + git_dir_path = line.split(":", 1)[1].strip() + _LOG.debug("git_dir_path=%s", git_dir_path) + # Resolve the relative path to the absolute path of the Git directory. + abs_git_dir = os.path.abspath( + os.path.join(path, git_dir_path) + ) + # Traverse up to find the top-level `.git` directory. + while True: + # Check if the current directory is a `.git` directory. + if os.path.basename(abs_git_dir) == ".git": + git_root_dir = os.path.dirname(abs_git_dir) + # Found the root. + break + # Move one level up in the directory structure. + parent = os.path.dirname(abs_git_dir) + # Reached the filesystem root without finding the `.git` directory. + hdbg.dassert_ne( + parent, + abs_git_dir, + "Top-level .git directory not found.", + ) + # Continue traversing up. + abs_git_dir = parent + break + # Exit the loop if the Git root directory is found. + if git_root_dir is not None: + break + # Move up one level in the directory hierarchy. + parent = os.path.dirname(path) + # Reached the filesystem root without finding `.git`. + hdbg.dassert_ne( + parent, + path, + "No .git directory or file found in any parent directory.", + ) + # Update the path to the parent directory for the next iteration. + path = parent + return git_root_dir + + +# End copy. + + +def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: + """ + Find file in the repo. + """ + if root_dir is None: + root_dir = _find_git_root() + _, file_name_out = system_to_one_line( + rf"find {root_dir} -name {file_name} -not -path '*/\.git/*'" + ) + hdbg.dassert_ne(file_name_out, "", "File not found in repo: '%s'", file_name) + return file_name_out + + +# TODO(gp): Use find_file +def _find_file(filename: str, *, search_path: str = ".") -> Optional[str]: + """ + Find a file in a directory and report its absolute path. + + :param filename: the name of the file to find (e.g., "helpers_root") + :param search_path: the directory to search in (e.g., "/Users/saggese/src/helpers1") + :return: the absolute path of the file + """ + # Recursive glob. + search_path = os.path.join(search_path, "**", filename) + files = glob.glob(search_path, recursive=True) + if len(files) == 1: + return files[0] + elif len(files) > 1: + msg = f"Found multiple files with basename '{filename}' in directory '{search_path}':\n" + msg += "\n".join(files) + raise RuntimeError(msg) + else: + return None + + +# TODO(gp): -> find_path_greedily +def find_path( + path: str, *, dir_name: str = ".", abort_on_error: bool = False +) -> str: + """ + Find a path in a directory and report its absolute path. + + :param path: the path to find (e.g., "system_tools/path.py") + :param dir_name: the directory to search in (e.g., "/Users/saggese/src/helpers1") + :param abort_on_error: if True, raise an error if the path doesn't exist + :return: the absolute path of the path + """ + # Make the path absolute. + path_out = os.path.abspath(path) + # If the path exists, return it. + if os.path.exists(path_out): + return path_out + # If the path doesn't exist, abort. + if abort_on_error: + msg = f"path '{path}' doesn't exist in '{dir_name}'" + raise RuntimeError(msg) + # Look for a file with the same basename in ``dir_name``. + dir_name = os.path.abspath(dir_name) + basename = os.path.basename(path) + path_out = _find_file(basename, search_path=dir_name) + # If the file doesn't exist, abort. + if path_out is None: + msg = f"path '{path}' doesn't exist in '{dir_name}'" + raise RuntimeError(msg) + return path_out + + +# TODO(Nikola): Use filesystem's `du` and move to `hio` instead? +def du(path: str, human_format: bool = False) -> Union[int, str]: + """ + Return the size in bytes of a file or a directory (recursively). + + :param human_format: represent the size in KB, MB, ... instead of bytes + using `hintrospection.format_size()` + """ + hdbg.dassert_path_exists(path) + cmd = f"du -d 0 {path}" + " | awk '{print $1}'" + # > du -d 0 core + # 20 core + _, txt = system_to_one_line(cmd) + _LOG.debug("txt=%s", txt) + # `du` returns size in KB. + size_in_bytes = int(txt) * 1024 + size: Union[int, str] + if human_format: + size = hintros.format_size(size_in_bytes) + else: + size = size_in_bytes + return size + + +def _compute_file_signature(file_name: str, dir_depth: int) -> Optional[List]: + """ + Compute a signature for files using basename and `dir_depth` enclosing + dirs. + + :return: tuple of extracted enclosing dirs + - E.g., `("core", "dataflow_model", "utils.py")` + """ + # Split a file like: + # /app/amp/core/test/TestCheckSameConfigs.test_check_same_configs_error/output/test.txt + # into + # ['', 'app', 'amp', 'core', 'test', + # 'TestCheckSameConfigs.test_check_same_configs_error', 'output', 'test.txt'] + path = os.path.normpath(file_name) + paths = path.split(os.sep) + hdbg.dassert_lte(1, dir_depth) + if dir_depth > len(paths): + _LOG.warning( + "Can't compute signature of file_name='%s' with" + " dir_depth=%s, len(paths)=%s", + file_name, + dir_depth, + len(paths), + ) + signature = None + else: + signature = paths[-(dir_depth + 1) :] + return signature + + +# TODO(gp): -> hio.py +def find_file_with_dir( + file_name: str, + *, + root_dir: str = ".", + dir_depth: int = -1, + mode: str = "return_all_results", + candidate_files: Optional[List[str]] = None, +) -> List[str]: + """ + Find a file matching basename and several enclosing dir name starting from + `root_dir`. + + E.g., find a file matching `amp/core/dataflow_model/utils.py` with `dir_depth=1` + means looking for a file with basename 'utils.py' under a dir 'dataflow_model'. + + :param dir_depth: how many enclosing dirs in order to declare a match. + - `-1` to use as many enclosing dirs as possible. E.g., + `/app/amp/core/dataflow/utils.py` will use 3 levels, since `/app` is + removed + :param mode: control the returned list of files, like in + `select_result_file_from_list()` + :param candidate_files: list of results from the `find` command for unit test + mocking + :return: list of files found + """ + _LOG.debug(hprint.func_signature_to_str()) + # Find all the files in the dir with the same basename. + if candidate_files is None: + base_name = os.path.basename(file_name) + cmd = rf"find . -name '{base_name}' -not -path '*/\.git/*'" + # > find . -name "utils.py" + # ./amp/core/dataflow/utils.py + # ./amp/core/dataflow_model/utils.py + # ./amp/im/common/test/utils.py + mode_ = "return_all_results" + candidate_files = system_to_files(cmd, dir_name=root_dir, mode=mode_) + _LOG.debug("candidate files=\n%s", "\n".join(candidate_files)) + # + if dir_depth == -1: + # Remove "/app" if present. + prefix = "/app/" + if file_name.startswith(prefix): + file_name = file_name[len(prefix) :] + # Remove "amp" if present. + prefix = "amp/" + if file_name.startswith(prefix): + file_name = file_name[len(prefix) :] + # Count how many dirs levels there are. + dir_depth = len(os.path.normpath(file_name).split("/")) - 1 + _LOG.debug( + "inferred dir_depth=%s for file_name=%s", dir_depth, file_name + ) + # Check the matching files. + matching_files = [] + for candidate_file_name in sorted(candidate_files): + signature1 = _compute_file_signature(candidate_file_name, dir_depth) + signature2 = _compute_file_signature(file_name, dir_depth) + is_equal = signature1 == signature2 + _LOG.debug("found_file=%s -> is_equal=%s", candidate_file_name, is_equal) + if is_equal: + matching_files.append(candidate_file_name) + _LOG.debug( + "Found %d files:\n%s", len(matching_files), "\n".join(matching_files) + ) + # Select the result based on mode. + res = select_result_file_from_list(matching_files, mode, file_name) + _LOG.debug("-> res=%s", str(res)) + return res + + +# https://stackoverflow.com/questions/169070 +@contextlib.contextmanager +def cd(dir_name: str) -> Generator[None, None, None]: + """ + Context manager managing changing directory. + """ + hdbg.dassert_dir_exists(dir_name) + current_dir = os.getcwd() + _LOG.debug("Entering ctx manager: " + hprint.to_str("current_dir")) + try: + os.chdir(dir_name) + _LOG.debug("Switched to dir '%s'", os.getcwd()) + yield + finally: + _LOG.debug("Switching back to dir '%s'", current_dir) + os.chdir(current_dir) + _LOG.debug("Exiting ctx manager") + + +# ############################################################################# +# File timestamping. +# ############################################################################# + + +def has_timestamp(file_name: str) -> bool: + """ + Check whether `file_name` contains a timestamp. + + The timestamp is in the format `%Y%m%d-%H_%M_%S` (e.g., + 20210724-12_45_51). E.g., this function for + `experiment.RH1E.5T.20210724-12_45_51` returns True. + """ + file_name = os.path.basename(file_name) + # E.g., %Y%m%d-%H_%M_%S + # The separator is _, -, or nothing. + sep = "[-_]?" + regex = sep.join( + [r"\d{4}", r"\d{2}", r"\d{2}", r"\d{2}", r"\d{2}", r"\d{2}"] + ) + _LOG.debug("regex=%s", regex) + occurrences = re.findall(regex, file_name) + hdbg.dassert_lte( + len(occurrences), 1, "Found more than one timestamp", str(occurrences) + ) + m = re.search("(" + regex + ")", file_name) + has_timestamp_ = m is not None + if has_timestamp_: + m = cast(Match[str], m) + _LOG.debug("Found a timestamp '%s' in '%s'", m.group(1), file_name) + return has_timestamp_ + + +def append_timestamp_tag(file_name: str, tag: str) -> str: + """ + Add a tag and the current timestamp to a filename, before the extension. + + :return: new filename + """ + dir_name = os.path.dirname(file_name) + base_name = os.path.basename(file_name) + name, extension = os.path.splitext(base_name) + tag_ = "" + # E.g., 20210723-20_52_00 + if not has_timestamp(file_name): + import helpers.hdatetime as hdateti + + tag_ += "." + hdateti.get_current_timestamp_as_string(tz="ET") + # Add tag, if specified. + if tag: + # If the tag is specified prepend a `.` in the filename. + tag_ += "." + tag + new_file_name = os.path.join(dir_name, "".join([name, tag_, extension])) + _LOG.debug(hprint.to_str("file_name new_file_name")) + return new_file_name + + +def tee( + cmd: str, executable: str, abort_on_error: bool +) -> Tuple[int, List[str]]: + """ + Execute command and return its exit code and output lines. + + Captures output, removes empty lines, and optionally aborts on error. + + :param cmd: Command string to execute + :param executable: Executable to use for running the command + :param abort_on_error: Whether to abort execution if command fails + :return: Tuple of (exit code, list of non-empty output lines) + """ + _LOG.debug("cmd=%s executable=%s", cmd, executable) + rc, output = system_to_string(cmd, abort_on_error=abort_on_error) + hdbg.dassert_isinstance(output, str) + output1 = output.split("\n") + _LOG.debug("output1= (%d)\n'%s'", len(output1), "\n".join(output1)) + output2 = hprint.remove_empty_lines(output1) + _LOG.debug("output2= (%d)\n'%s'", len(output2), "\n".join(output2)) + hdbg.dassert_list_of_strings(output2) + return rc, output2 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py new file mode 100644 index 000000000..5278e3984 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py @@ -0,0 +1,180 @@ +""" +Import as: + +import helpers.htable as htable +""" + +import copy +import csv +import logging +from typing import Any, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + + +TableType = List[List[str]] + + +# ############################################################################# +# Table +# ############################################################################# + + +class Table: + """ + A simple (rectangular) table without introducing a dependency from Pandas. + + The element in the table can be anything. + """ + + @staticmethod + def _check_table(table: TableType, column_names: List[str]) -> None: + """ + Check that the table is well-formed (e.g., the list of lists is + rectangular). + """ + hdbg.dassert_isinstance(table, list) + hdbg.dassert_isinstance(column_names, list) + hdbg.dassert_no_duplicates(column_names) + # Columns have no leading or trailing spaces. + for column_name in column_names: + hdbg.dassert_eq(column_name, column_name.rstrip().lstrip()) + # Check that the list of lists is rectangular. + for row in table: + hdbg.dassert_isinstance(table, list) + hdbg.dassert_eq( + len(row), + len(column_names), + "Invalid row='%s' for cols='%s'", + row, + column_names, + ) + + def __repr__(self) -> str: + res = "" + res += f"cols={str(self._column_names)}" + res += "\ntable=\n" + "\n".join(map(str, self._table)) + res += "\n" + f"size={str(self.size())}" + return res + + def __init__(self, table: TableType, column_names: List[str]) -> None: + # Check that the inputs are well-formed. + self._check_table(table, column_names) + # Save state. + self._table = table + self._column_names = column_names + _LOG.debug("%s", self.__repr__()) + # Map a column name to the index of the corresponding column, to allow + # indexing by column. + self._col_to_idx = { + col: idx for idx, col in enumerate(self._column_names) + } + _LOG.debug("col_to_idx=%s", str(self._col_to_idx)) + + @classmethod + def from_text(cls, cols: List[str], txt: str, delimiter: str) -> "Table": + """ + Build a table from a list of columns and the body of a CSV file. + """ + hdbg.dassert_isinstance(txt, str) + table = list(csv.reader(txt.split("\n"), delimiter=delimiter)) + return cls(table, cols) + + def size(self) -> Tuple[int, int]: + """ + Return the size of the table. + + :return: number of rows x columns (i.e., numpy / Pandas convention) + """ + return len(self._table), len(self._column_names) + + def filter_rows(self, column_name: str, value: str) -> "Table": + """ + Return a Table filtered with rows filtered by the criteria "field == + value". + """ + _LOG.debug("self=\n%s", repr(self)) + # Filter the rows. + hdbg.dassert_in(column_name, self._col_to_idx.keys()) + rows_filter = [ + row + for row in self._table + if row[self._col_to_idx[column_name]] == value + ] + _LOG.debug(hprint.to_str("rows_filter")) + # Build the resulting table. + table_filter = Table(rows_filter, self._column_names) + _LOG.debug("table_filter=\n%s", repr(table_filter)) + return table_filter + + def get_column(self, column_name: str) -> List[Any]: + """ + Return the list of unique values for a row / field. + """ + hdbg.dassert_in(column_name, self._column_names) + column_idx = self._col_to_idx[column_name] + # Scan the rows to extract the column. + vals = [] + for row in self._table: + vals.append(row[column_idx]) + return vals + + def unique(self, column_name: str) -> List[Any]: + """ + Return a list of unique values for a field. + """ + vals = self.get_column(column_name) + vals = sorted(list(set(vals))) + return vals + + def remove_column(self, column_name: str) -> "Table": + """ + Return a new Table with the specified column removed. + + :param column_name: name of the column to remove + :return: new Table without the specified column + """ + hdbg.dassert_in(column_name, self._column_names) + # Find the index of the column to remove. + column_idx = self._col_to_idx[column_name] + # Create new column names list without the removed column. + new_column_names = [ + col for col in self._column_names if col != column_name + ] + # Create new table rows without the removed column. + new_table = [ + [val for idx, val in enumerate(row) if idx != column_idx] + for row in self._table + ] + # Build and return the new table. + return Table(new_table, new_column_names) + + def __str__(self) -> str: + """ + Return a string representing the table with columns aligned. + """ + table = copy.deepcopy(self._table) + table.insert(0, self._column_names) + # Convert the cells to strings. + table_as_str = [[str(cell) for cell in row] for row in table] + # Find the length of each columns. + lengths = [max(map(len, col)) for col in zip(*table_as_str)] + _LOG.debug(hprint.to_str("lengths")) + # Compute format for the columns. + fmt = " ".join(f"{{:{x}}} |" for x in lengths) + _LOG.debug(hprint.to_str("fmt")) + # Add the row separating the column names. + row_sep = ["-" * length for length in lengths] + table.insert(1, row_sep) + table_as_str = [[str(cell) for cell in row] for row in table] + # Format rows. + rows_as_str = [fmt.format(*row) for row in table_as_str] + # Remove trailing spaces. + rows_as_str = [row.rstrip() for row in rows_as_str] + # Create string. + res = "\n".join(rows_as_str) + # res += "\nsize=" + str(self.size()) + return res diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py new file mode 100644 index 000000000..8ef0e3a4f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +""" +Add a description of what the script does and examples of command lines. + +Check dev_scripts/linter.py to see an example of a script using this +template. + +Import as: + +import dev_scripts_helpers.script_template as dscscske +""" + +import argparse +import logging + +import helpers.hlogging as hloggin +import helpers.hparser as hparser + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("positional", nargs="*", help="...") + parser.add_argument("--dst_dir", action="store", help="Destination dir") + hparser.add_verbosity_arg(parser) + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hparser.parse_verbosity_args(args, use_exec_path=True) + hloggin.test_logger() + # + # logging.disable(logging.WARNING) + hloggin.shut_up_log_debug(_LOG) + hloggin.test_logger() + + +if __name__ == "__main__": + _main(_parse()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py new file mode 100644 index 000000000..7b6506ce6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py @@ -0,0 +1,262 @@ +""" +Utilities for protecting content during text processing. + +Extract and restore content that should not be modified by formatters and text +transformations (code blocks, comments, etc.). + +Import as: + +import helpers.htext_protect as htexprot +""" + +import logging +import re +from typing import Dict, List, Optional, Tuple + +import helpers.hdbg as hdbg + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Helper functions +# ############################################################################# + + +def _is_fenced_block_delimiter(line: str) -> bool: + """ + Check if line is a fenced block delimiter (```). + + :param line: Line to check + :return: True if line matches fenced block delimiter pattern + """ + return bool(re.match(r"^\s*```", line)) + + +def _is_math_block_delimiter(line: str) -> bool: + """ + Check if line is a math block delimiter ($$). + + :param line: Line to check + :return: True if line matches math block delimiter pattern + """ + return bool(re.match(r"^\s*\$\$\s*$", line)) + + +def _extract_single_line_html_comment(line: str) -> Optional[str]: + """ + Extract single-line HTML comment from line if present. + + Skips TOC markers ( and ) as they need to be + processed by the TOC generation logic. + + :param line: Line to check + :return: Full comment string if found, None otherwise + """ + # Skip TOC markers: they are processed by `refresh_toc`. + if "" in line or "" in line: + return None + # Match on single line. + m = re.match(r"^(\s*\s*)$", line) + if m: + return m.group(1) + return None + + +def _is_html_comment_start(line: str) -> bool: + """ + Check if line starts an HTML comment. + + Skips TOC markers as they need to be processed by TOC generation logic. + + :param line: Line to check + :return: True if line contains + """ + # Skip TOC markers. + if "" in line or "" in line: + return False + return "" not in line + + +def _is_html_comment_end(line: str) -> bool: + """ + Check if line ends an HTML comment. + + :param line: Line to check + :return: True if line contains --> without opening " in line and ") for .md and .txt files + - LaTeX comments (% ...) for .tex files + + :param lines: The lines to be processed + :param file_type: File extension ('md', 'txt', or 'tex') + :return: Tuple of (lines with placeholders, mapping of placeholders to + original content) + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_in(file_type, ["md", "txt", "tex"]) + _LOG.debug("Extracting protected content for file_type=%s", file_type) + # + protected_map: Dict[str, str] = {} + counter = 1 + lines_new: List[str] = [] + # State tracking. + in_fenced_block = False + in_math_block = False + in_html_comment = False + fenced_block_lines: List[str] = [] + math_block_lines: List[str] = [] + html_comment_lines: List[str] = [] + # Process each line. + for line in lines: + # Handle fenced blocks (for .md and .txt files). + if file_type in ["md", "txt"] and _is_fenced_block_delimiter(line): + if not in_fenced_block: + # Opening delimiter. + in_fenced_block = True + lines_new.append(line) + fenced_block_lines = [] + else: + # Closing delimiter: protect only content, keep delimiters visible. + placeholder = f"<<>>" + protected_map[placeholder] = "\n".join(fenced_block_lines) + counter += 1 + lines_new.append(placeholder) + lines_new.append(line) + in_fenced_block = False + fenced_block_lines = [] + continue + # Inside fenced block: accumulate. + if in_fenced_block: + fenced_block_lines.append(line) + continue + # Handle math blocks (for all file types). + if _is_math_block_delimiter(line): + if not in_math_block: + # Opening delimiter. + in_math_block = True + lines_new.append(line) + math_block_lines = [] + else: + # Closing delimiter: protect only content, keep delimiters visible. + placeholder = f"<<>>" + protected_map[placeholder] = "\n".join(math_block_lines) + counter += 1 + lines_new.append(placeholder) + lines_new.append(line) + in_math_block = False + math_block_lines = [] + continue + # Inside math block: accumulate. + if in_math_block: + math_block_lines.append(line) + continue + # Handle HTML comments (for .md and .txt files). + if file_type in ["md", "txt"]: + # Single-line HTML comment. + single_line_comment = _extract_single_line_html_comment(line) + if single_line_comment: + placeholder = f"<<>>" + protected_map[placeholder] = single_line_comment + counter += 1 + lines_new.append(placeholder) + continue + # Multi-line HTML comment start. + if _is_html_comment_start(line): + in_html_comment = True + html_comment_lines = [line] + continue + # Multi-line HTML comment end. + if in_html_comment and _is_html_comment_end(line): + html_comment_lines.append(line) + placeholder = f"<<>>" + protected_map[placeholder] = "\n".join(html_comment_lines) + counter += 1 + lines_new.append(placeholder) + in_html_comment = False + html_comment_lines = [] + continue + # Inside multi-line HTML comment: accumulate. + if in_html_comment: + html_comment_lines.append(line) + continue + # Handle LaTeX comments (for .tex files). + if file_type == "tex" and _is_latex_comment(line): + placeholder = f"<<>>" + protected_map[placeholder] = line + counter += 1 + lines_new.append(placeholder) + continue + # Regular line: keep as-is. + lines_new.append(line) + # Check for unclosed blocks. + if in_fenced_block: + _LOG.warning("Unclosed fenced block detected") + if in_math_block: + _LOG.warning("Unclosed math block detected") + if in_html_comment: + _LOG.warning("Unclosed HTML comment detected") + _LOG.debug("Extracted %d protected content blocks", len(protected_map)) + return lines_new, protected_map + + +def restore_protected_content( + lines: List[str], + protected_map: Dict[str, str], +) -> List[str]: + """ + Restore protected content by replacing placeholders with original text. + + :param lines: Lines containing placeholders + :param protected_map: Mapping of placeholders to original content + :return: Lines with restored content + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_isinstance(protected_map, dict) + _LOG.debug("Restoring %d protected content blocks", len(protected_map)) + # + lines_new: List[str] = [] + for line in lines: + # Check if line contains any placeholder. + restored = False + for placeholder, original in protected_map.items(): + if placeholder in line: + if line.strip() == placeholder: + # Placeholder is entire line: replace with multi-line content. + lines_new.extend(original.split("\n")) + restored = True + break + else: + # Placeholder embedded in line: replace inline. + line = line.replace(placeholder, original) + if not restored: + lines_new.append(line) + return lines_new diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py new file mode 100644 index 000000000..31cd642cf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +""" +`timeout` decorator which is used to limit function execution time. + +Import as: + +import helpers.hthreading as hthread +""" + +import _thread +import sys +import threading +from typing import Any + + +def _timeout_handler() -> None: + sys.stderr.flush() + # Raise KeyboardInterrupt. + _thread.interrupt_main() + + +def timeout(timeout_sec: int) -> Any: + """ + Exit process if its execution takes longer than timeout_sec seconds. This + is a decorator that issue a KeyboardInterrupt, that will be raised if time + limit is exceed. + + :param timeout_sec: time limit + """ + + def outer(fn: Any) -> Any: + def inner(*args: Any, **kwargs: Any) -> Any: + timer = threading.Timer(timeout_sec, _timeout_handler) + timer.start() + try: + result = fn(*args, **kwargs) + finally: + timer.cancel() + return result + + return inner + + return outer diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py new file mode 100644 index 000000000..c3aed5e80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py @@ -0,0 +1,275 @@ +""" +Import as: + +import helpers.htimer as htimer +""" + +import logging +import time +from typing import Any, Callable, Optional, Tuple, cast + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin + +# Avoid dependency from other `helpers` modules to prevent import cycles. + + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Timer +# ############################################################################# + + +class Timer: + """ + Measure time elapsed in one or more intervals. + """ + + def __init__(self, *, start_on_creation: bool = True): + """ + Create a timer. + + If "start_on_creation" is True start automatically the timer. + """ + self._stop: Optional[float] = None + # Store the time for the last elapsed interval. + self._last_elapsed: Optional[float] = None + # Store the total time for all the measured intervals. + self._total_elapsed = 0.0 + if start_on_creation: + # For better accuracy start the timer as last action, after all the + # bookkeeping. + self._start: Optional[float] = time.time() + else: + self._start = None + + def stop(self) -> None: + """ + Stop the timer and accumulate the interval. + """ + # Timer must have not been stopped before. + hdbg.dassert(self.is_started() and not self.is_stopped()) + # For better accuracy stop the timer as first action. + self._stop = time.time() + # Update the total elapsed time. + # Sometimes we get numerical error tripping this assertion + # (e.g., '1619552498.813126' <= '1619552498.805193') so we give + # a little slack to the assertion. + # hdbg.dassert_lte(self._start, self._stop + 1e-2) + self._last_elapsed = cast(float, self._stop) - cast(float, self._start) + self._total_elapsed += self._last_elapsed + # Stop. + self._start = None + self._stop = None + + def get_elapsed(self) -> float: + """ + Stop if not stopped already, and return the elapsed time. + """ + if not self.is_stopped(): + self.stop() + hdbg.dassert_is_not(self._last_elapsed, None) + return cast(float, self._last_elapsed) + + # ///////////////////////////////////////////////////////////////////////// + + def resume(self) -> None: + """ + Resume the timer after a stop. + """ + # Timer must have been stopped before. + hdbg.dassert(self.is_started() or self.is_stopped()) + self._stop = None + # Start last for better accuracy. + self._start = time.time() + + def is_started(self) -> bool: + return ( + self._start is not None and self._start >= 0 and self._stop is None + ) + + def is_stopped(self) -> bool: + return self._start is None and self._stop is None + + def get_total_elapsed(self) -> float: + """ + Stop if not stopped already, and return the total elapsed time. + """ + if not self.is_stopped(): + self.stop() + return self._total_elapsed + + def accumulate(self, *, timer: "Timer") -> None: + """ + Accumulate the value of a timer to the current object. + """ + # Both timers must be stopped. + hdbg.dassert(timer.is_stopped()) + hdbg.dassert(self.is_stopped()) + hdbg.dassert_lte(0.0, timer.get_total_elapsed()) + self._total_elapsed += timer.get_total_elapsed() + + def __repr__(self) -> str: + """ + Return string with the intervals measured so far. + """ + measured_time = self._total_elapsed + if self.is_started() and not self.is_stopped(): + # Timer still running. + measured_time += time.time() - cast(float, self._start) + ret = "%.3f secs" % measured_time + return ret + + +# ############################################################################# + + +_TimerMemento = Tuple[int, str, Timer] + + +def dtimer_start(log_level: int, message: str) -> _TimerMemento: + """ + Start measuring time. + + :return: memento of the timer. + """ + _LOG.log(log_level, "%s ...", message) + memento = log_level, message, Timer() + return memento + + +def dtimer_stop(memento: _TimerMemento) -> Tuple[str, float]: + """ + End measuring time. + + :return: + - message as as string + - time in seconds (int) + """ + log_level, message, timer = memento + timer.stop() + elapsed_time = round(timer.get_elapsed(), 3) + msg = f"{message} done (%.3f s)" % elapsed_time + _LOG.log(log_level, msg) + return msg, elapsed_time + + +# TODO(gp): Is this useful / used? +def stop_timer(timer: Timer) -> str: + timer.stop() + elapsed_time = round(timer.get_elapsed(), 3) + msg = "%.3f s" % elapsed_time + return msg + + +# ############################################################################# +# TimedScope +# ############################################################################# + + +class TimedScope: + """ + Measure the execution time of a block of code. + + ``` + with htimer.TimedScope(logging.INFO, "Work") as ts: + ... work work work ... + ``` + """ + + def __init__( + self, log_level: int, message: str, *, profile_memory: bool = False + ): + self._log_level = log_level + self._message = message + # TODO(gp): Implement profiling also memory using dmemory_start/end. + # State. + self._memento: Optional[_TimerMemento] = None + self.elapsed_time = None + + def get_result(self) -> str: + msg: str = f"{self._message} done (%.3f s)" % self.elapsed_time + return msg + + def __enter__(self) -> "TimedScope": + self._memento = dtimer_start(self._log_level, self._message) + return self + + def __exit__(self, *args: Any) -> None: + if self._memento is not None: + msg, self.elapsed_time = dtimer_stop(self._memento) + _ = msg + + +# ############################################################################# +# Decorator. +# ############################################################################# + + +def timed(f: Callable) -> Callable: + """ + Add a timer around the invocation of a function. + """ + + def wrapper(*args: Any, **kwargs: Any) -> Any: + func_name = getattr(f, "__name__", "unknown_function") + # + timer = dtimer_start(0, func_name) + v = f(*args, **kwargs) + dtimer_stop(timer) + return v + + return wrapper + + +# TODO(gp): Add an object that accumulates the times from multiple timers. +# E.g., use a dict for message -> time + + +# ############################################################################# + + +_MemoryMemento = Tuple[int, str, hloggin.MemoryUsage] + + +def dmemory_start(log_level: int, message: str) -> _MemoryMemento: + """ + Start measuring memory. + + :return: memento of the memory profile + """ + _LOG.log(log_level, "%s ...", message) + memory_usage = hloggin.get_memory_usage() + memento = (log_level, message, memory_usage) + return memento + + +def dmemory_stop(memento: _MemoryMemento, *, mode: str = "all") -> str: + """ + Stop measuring memory. + + :return: message as as string + """ + log_level, message, start_memory_usage = memento + end_memory_usage = hloggin.get_memory_usage() + verbose = False + start_mem = hloggin.memory_to_str(start_memory_usage, verbose=verbose) + end_mem = hloggin.memory_to_str(end_memory_usage, verbose=verbose) + diff_mem = tuple(x - y for x, y in zip(end_memory_usage, start_memory_usage)) + diff_mem = hloggin.memory_to_str(diff_mem, verbose=verbose) + # Package the output. + msg = [] + msg.append(f"{message} done:") + if mode == "all": + msg.append(f"start=({start_mem})") + msg.append(f"end=({end_mem})") + msg.append(f"diff=({diff_mem})") + elif mode == "only_diff": + msg.append(f"diff=({diff_mem})") + else: + raise ValueError(f"Invalid mode='{mode}'") + msg = " ".join(msg) + _LOG.log(log_level, msg) + return msg diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py new file mode 100644 index 000000000..bb16ad381 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py @@ -0,0 +1,48 @@ +""" +Import as: + +import helpers.htqdm as htqdm +""" + +import io +import logging +from typing import Any, Optional + +# Avoid dependency from other `helpers` modules, such as `helpers.hjoblib`, to +# prevent import cycles. + + +# ############################################################################# +# TqdmToLogger +# ############################################################################# + + +# From https://github.com/tqdm/tqdm/issues/313 +class TqdmToLogger(io.StringIO): + """ + Output stream for `tqdm` which will output to logger module instead of the + `stdout`. + + Use as: + ``` + from tqdm.autonotebook import tqdm + + tqdm_out = TqdmToLogger(_LOG, level=logging.INFO) + for ... tqdm(..., file=tqdm_out): + ``` + """ + + logger = None + level = None + buf = "" + + def __init__(self, logger: Any, level: Optional[int] = None): + super().__init__() + self.logger = logger + self.level = level or logging.INFO + + def write(self, buf: str) -> None: + self.buf = buf.strip("\r\n\t ") + + def flush(self) -> None: + self.logger.log(self.level, self.buf) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py new file mode 100644 index 000000000..03de65ce1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py @@ -0,0 +1,228 @@ +""" +Import as: + +import helpers.htraceback as htraceb +""" + +import logging +import os +import re +from typing import Any, List, Match, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hgit as hgit + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): Move some code to `hcfile.py`. + +# Store elements parsed from a line of a traceback: +# (file_name, line_num, text) +# E.g., +# ("test/test_lib_tasks.py", +# 27, +# "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)" +# ) +CfileRow = Tuple[str, int, str] + + +def cfile_row_to_str(cfile_row: CfileRow) -> str: + # helpers/git.py:295:def get_repo_long_name_from_client(super_module + hdbg.dassert_isinstance(cfile_row, tuple) + return ":".join(list(map(str, cfile_row))) + + +def cfile_to_str(cfile: List[CfileRow]) -> str: + hdbg.dassert_isinstance(cfile, list) + return "\n".join(map(cfile_row_to_str, cfile)) + + +def parse_traceback( + txt: str, *, purify_from_client: bool = True +) -> Tuple[List[CfileRow], Optional[str]]: + """ + Parse a string containing text including a Python traceback. + + :param txt: the text to parse + :param purify_from_client: express the files with respect to the Git root + :return: + - a list of `CFILE_ROW`, e.g., + ``` + ("test/test_lib_tasks.py", + 27, + "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)") + - a string storing the traceback, like: + ``` + Traceback (most recent call last): + File "/app/amp/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "/app/amp/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": + NameError: name 'repo_short_name' is not defined + ``` + - A `None` value means that no traceback was found. + """ + # TODO(gp): Horrible hack to get the tests to pass. IMO this whole function + # needs to be rewritten using a proper parser or library. Now it's full + # of weird handling of edge cases. + txt += "\n" + # + lines = txt.split("\n") + # pylint: disable=line-too-long + # Remove the artifacts of a GH run. E.g., + # "Run_fast_tests Run fast tests 2022-02-19T16:53:07.0945561Z NameError: name 'cofinanc' is not defined" -> + # -> "NameError: name 'cofinanc' is not defined". + lines = [ + re.split( + r"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+Z ", + line, + )[-1] + for line in lines + ] + state = "look_for" + cfile: List[CfileRow] = [] + i = 0 + start_idx = end_idx = 0 + while i < len(lines): + line = lines[i] + _LOG.debug("state=%-10s i=%d: line='%s'", state, i, line) + if state == "look_for": + if line.startswith("Traceback (most recent call last):"): + start_idx = i + # Update the state. + state = "parse" + i += 1 + continue + elif state == "parse": + # The file looks like: + # File "/app/amp/test/test_lib_tasks.py", line 27, in test_get_gh + # actual = ltasks._get_gh_issue_title(issue_id, repo) + regex = r"^\s*File \"(.+)\", line (\d+), in (\S+)$" + m = re.match(regex, line) + hdbg.dassert(m, "Can't parse '%s'", line) + m: Match[Any] + file_name = m.group(1) + line_num = int(m.group(2)) + func_name = m.group(3) + _LOG.debug(" -> %s %d %s", file_name, line_num, func_name) + # + # Parse the next line until the next `File...`. + _LOG.debug("Search end of snippet") + j = i + 1 + hdbg.dassert_lte(j, len(lines)) + while j < len(lines): + _LOG.debug(" j=%d: line='%s'", j, lines[j]) + if lines[j].startswith(' File "') or not lines[j].startswith( + " " + ): + _LOG.debug(" Found end of snippet") + break + j += 1 + # Concatenate the lines into a single line. + code = lines[i + 1 : j] + _LOG.debug(" -> code: [%d, %d]\n%s", i, j, "\n".join(code)) + code = map(lambda x: x.rstrip().lstrip(), code) + code_as_single_line = "/".join(code) + _LOG.debug(" -> code_as_single_line=\n%s", code_as_single_line) + # Assemble the result. + file_name = os.path.normpath(file_name) + cfile_row = ( + file_name, + line_num, + func_name + ":" + code_as_single_line, + ) + _LOG.debug(" => cfile_row='%s'", cfile_row_to_str(cfile_row)) + cfile.append(cfile_row) + # Update the state. + if not lines[j].startswith(" "): + _LOG.debug(" Found end of traceback") + end_idx = j + state = "end" + break + state = "parse" + i = j + continue + # + i += 1 + # + if state == "look_for": + # We didn't find a traceback. + cfile = [] + traceback = None + elif state == "end": + if ( + end_idx < len(lines) - 1 + and "Error:" not in lines[end_idx - 1] + and "Error:" in lines[end_idx] + ): + # Extend the traceback to the lines with the error description. + # E.g., for the snippet below: + # ``` + # if repo_short_name == "amp": + # NameError: name 'repo_short_name' is not defined + # ``` + # If the parsed traceback stops at 'if repo_short_name == "amp":', + # and thus, its last line does not include the error description + # ("NameError:..."), and the following line does include the error + # description, then the traceback will be extended to include the + # following line, making the parsed traceback end with the following + # two lines: + # ``` + # if repo_short_name == "amp": + # NameError: name 'repo_short_name' is not defined + # ``` + to_break = False + while end_idx < len(lines) - 1 and not to_break: + end_idx += 1 + line = lines[end_idx] + _LOG.debug( + "Extend traceback: to_break=%s, end_idx=%s, line='%s'", + to_break, + end_idx, + line, + ) + if ( + "________ Test" in line + or "====== slowest 3 durations" in line + ): + # Stop if we have reached the next traceback or the end of the + # pytest report. + to_break = True + hdbg.dassert_lte(0, start_idx) + hdbg.dassert_lte(start_idx, end_idx) + hdbg.dassert_lt(end_idx, len(lines)) + _LOG.debug("start_idx=%d end_idx=%d", start_idx, end_idx) + traceback = "\n".join(lines[start_idx:end_idx]) + else: + raise ValueError(f"Invalid state='{state}'") + _LOG.debug("traceback=\n%s", traceback) + _LOG.debug("cfile=\n%s", cfile_to_str(cfile)) + # Purify filenames from client so that refer to files in this client. + if cfile and purify_from_client: + _LOG.debug("# Purifying from client") + cfile_tmp = [] + for cfile_row in cfile: + file_name, line_num, text = cfile_row + # Leave the files relative to the current dir. + root_dir = hgit.get_client_root(super_module=False) + mode = "return_all_results" + file_names = hgit.find_docker_file( + file_name, root_dir=root_dir, mode=mode + ) + if len(file_names) == 0: + _LOG.warning("Can't find file corresponding to '%s'", file_name) + elif len(file_names) > 1: + _LOG.warning( + "Found multiple potential files corresponding to '%s'", + file_name, + ) + else: + file_name = file_names[0] + cfile_tmp.append((file_name, line_num, text)) + cfile = cfile_tmp + _LOG.debug("# After purifying from client") + _LOG.debug("cfile=\n%s", cfile_to_str(cfile)) + return cfile, traceback diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py new file mode 100644 index 000000000..d706292ed --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +""" +Allow translating text using AWS Translate. It can be used as a module or CLI +tool. + +Supported languages and languages codes: +https://docs.aws.amazon.com/translate/latest/dg/what-is.html + +Import as: + +import helpers.htranslate as htransl +""" + +import argparse +import configparser +import logging +import pathlib +import sys +from typing import Optional, Tuple + +import boto3 + +_LOG = logging.getLogger(__name__) + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "lang", + help=( + "source language code. " + "https://docs.aws.amazon.com/translate/latest/dg/what-is.html" + ), + ) + parser.add_argument("text", help="string to translate") + parser.add_argument( + "--aws", + type=pathlib.Path, + dest="credentials", + default=pathlib.Path().home() / ".aws/credentials", + help="Path to the aws credentials file.", + ) + return parser.parse_args() + + +def _load_credentials(conf_path: pathlib.Path) -> Tuple[str, str]: + """ + Load aws credentilas from config file. + + :param conf_path:credentials file path. + :return: A tuple consist of aws_access and aws_secret keys. + """ + config = configparser.ConfigParser() + config.read(conf_path) + try: + access = config.get("default", "aws_access_key_id") + secret = config.get("default", "aws_secret_access_key") + except configparser.NoOptionError as err: + _LOG.error("Unable to read option for: %s", err.args) + sys.exit(1) + else: + return access, secret + + +# ############################################################################# +# TranslateAPI +# ############################################################################# + + +class TranslateAPI: + def __init__( + self, + aws_access_key: str, + aws_secret_key: str, + region: Optional[str] = "us-east-2", + ) -> None: + self._translate = boto3.client( + service_name="translate", + region_name=region, + use_ssl=True, + aws_access_key_id=aws_access_key, + aws_secret_access_key=aws_secret_key, + ) + + def translate_text(self, text: str, lang_code: str) -> str: + """ + Translate given text into English. Amazon has a limit on text size: + 5,000 bytes. + + :param text: Foreing language text. + :param lang_code: Language code in accordance with supported + languages and code of Amazon. + :return: English text. + """ + tr = self._translate.translate_text( + Text=text, SourceLanguageCode=lang_code, TargetLanguageCode="en" + ) + return str(tr.get("TranslatedText")) + + +if __name__ == "__main__": + args = _parse_args() + aws_access, aws_secret = _load_credentials(args.credentials) + api = TranslateAPI(aws_access, aws_secret) + result = api.translate_text(args.text, args.lang) + print(result) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py new file mode 100644 index 000000000..1bb3472d7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py @@ -0,0 +1,11 @@ +""" +Contain general types based on standard Python libraries. + +Import as: + +import helpers.htypes as htypes +""" + +from typing import Any, Dict + +Kwargs = Dict[str, Any] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py new file mode 100644 index 000000000..d585faeef --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py @@ -0,0 +1,1876 @@ +""" +Enhanced unit testing framework built on top of unittest and pytest. + +This module provides: +- TestCase base class with golden file testing capabilities +- Utilities for comparing strings, dataframes, and other outputs +- Test outcome management with update and incremental modes +- Directory management for input, output, and scratch space +- Integration with Git for managing test outcomes + +Import as: + +import helpers.hunit_test as hunitest +""" + +import abc +import collections +import inspect +import logging +import os +import pprint +import random +import re +import sys +import traceback +import unittest +from typing import Any, Dict, List, Mapping, Optional, Tuple + +import pytest + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.htimer as htimer +import helpers.hunit_test_purification as huntepur +import helpers.hwall_clock_time as hwacltim +import helpers.repo_config_utils as hrecouti + +# We use strings as type hints (e.g., 'pd.DataFrame') since we are not sure +# we have the corresponding libraries installed. + + +# Minimize dependencies from installed packages. + +# TODO(gp): Use `hprint.color_highlight`. +_WARNING = "\033[33mWARNING\033[0m" + +try: + import numpy as np + + _HAS_NUMPY = True +except ImportError as e: + print(_WARNING + ": " + str(e)) + _HAS_NUMPY = False +try: + import pandas as pd + + _HAS_PANDAS = True +except ImportError as e: + print(_WARNING + ": " + str(e)) + _HAS_PANDAS = False + +try: + import matplotlib.pyplot as plt + + _HAS_MATPLOTLIB = True +except ImportError as e: + print(_WARNING + ": " + str(e)) + _HAS_MATPLOTLIB = False + + +_LOG = logging.getLogger(__name__) + +# Mute this module unless we want to debug it. +_LOG.setLevel(logging.INFO) + +# ############################################################################# + +# Global setter / getter for updating test. + +# This controls whether the output of a test is updated or not. +# Set by `conftest.py`. +_UPDATE_TESTS = False + + +# TODO(gp): -> ..._update_outcomes. +def set_update_tests(val: bool) -> None: + """ + Set the global flag for updating test outcomes. + + :param val: True to enable updating test outcomes, False otherwise + """ + global _UPDATE_TESTS + _UPDATE_TESTS = val + + +def get_update_tests() -> bool: + """ + Get the current state of the update tests flag. + + :return: True if test outcomes should be updated, False otherwise + """ + return _UPDATE_TESTS + + +# ############################################################################# + +# Global setter / getter for incremental mode. + +# This is useful when a long test wants to reuse some data already generated. +# Set by conftest.py. +_INCREMENTAL_TESTS = False + + +def set_incremental_tests(val: bool) -> None: + """ + Set the global flag for incremental test mode. + + :param val: True to enable incremental mode, False otherwise + """ + global _INCREMENTAL_TESTS + _INCREMENTAL_TESTS = val + + +def get_incremental_tests() -> bool: + """ + Get the current state of the incremental tests flag. + + :return: True if incremental mode is enabled, False otherwise + """ + return _INCREMENTAL_TESTS + + +# ############################################################################# + +_CONFTEST_IN_PYTEST = False + + +# TODO(gp): Use https://stackoverflow.com/questions/25188119 +# TODO(gp): -> is_in_unit_test() +def in_unit_test_mode() -> bool: + """ + Return True if we are inside a pytest run. + + This is set by `conftest.py`. + """ + return _CONFTEST_IN_PYTEST + + +# ############################################################################# + + +# Set by `conftest.py`. +_GLOBAL_CAPSYS = None + + +def pytest_print(txt: str) -> None: + """ + Print bypassing `pytest` output capture. + """ + with _GLOBAL_CAPSYS.disabled(): # type: ignore + sys.stdout.write(txt) + + +def pytest_warning(txt: str, prefix: str = "") -> None: + """ + Print a warning bypassing `pytest` output capture. + + :param prefix: prepend the message with a string + """ + txt_tmp = "" + if prefix: + txt_tmp += prefix + txt_tmp += hprint.color_highlight("WARNING", "yellow") + f": {txt}" + pytest_print(txt_tmp) + + +# ############################################################################# +# Generation and conversion functions. +# ############################################################################# + + +# TODO(gp): Is this dataflow Info? If so it should go somewhere else. +def convert_info_to_string(info: Mapping) -> str: + """ + Convert info to string for verifying test results. + + Info often contains `pd.Series`, so pandas context is provided to print all rows + and all contents. + + :param info: info to convert to string + :return: string representation of info + """ + output = [] + # Provide context for full representation of `pd.Series` in info. + with pd.option_context( + "display.max_colwidth", + int(1e6), + "display.max_columns", + None, + "display.max_rows", + None, + ): + output.append(hprint.frame("info")) + output.append(pprint.pformat(info)) + output_str = "\n".join(output) + return output_str + + +# TODO(gp): This seems the python3.9 version of `to_str`. Remove if possible. +def to_string(var: str) -> str: + """ + Generate an f-string expression for debugging variable values. + + :param var: the variable name to create an f-string for + :return: an f-string expression that will print the variable name and value + """ + return f"""f"{var}={{{var}}}""" + + +# ############################################################################# + + +def diff_files( + file_name1: str, + file_name2: str, + *, + tag: Optional[str] = None, + abort_on_exit: bool = True, + dst_dir: str = ".", + error_msg: str = "", +) -> None: + """ + Compare the passed filenames and create script to compare them with + vimdiff. + + :param tag: add a banner the tag + :param abort_on_exit: whether to assert or not + :param dst_dir: dir where to save the comparing script + """ + _LOG.debug(hprint.func_signature_to_str()) + file_name1 = os.path.relpath(file_name1, os.getcwd()) + file_name2 = os.path.relpath(file_name2, os.getcwd()) + msg = [] + # Add tag. + if tag is not None: + msg.append("\n" + hprint.frame(tag, char1="-")) + # Diff to screen. + _, res = hsystem.system_to_string( + f"echo; sdiff --expand-tabs -l -w 150 {file_name1} {file_name2}", + abort_on_error=False, + log_level=logging.DEBUG, + ) + msg.append(res) + # Save a script to diff. + diff_script = os.path.join(dst_dir, "tmp_diff.sh") + vimdiff_cmd = f""" + #!/bin/bash + if [[ $1 == "wrap" ]]; then + cmd='vimdiff -c "windo set wrap"' + else + cmd='vimdiff' + fi; + cmd="$cmd {file_name1} {file_name2}" + eval $cmd + """ + vimdiff_cmd = hprint.dedent(vimdiff_cmd) + # TODO(gp): Use hio.create_executable_script(). + hio.to_file(diff_script, vimdiff_cmd) + cmd = "chmod +x " + diff_script + hsystem.system(cmd) + # Report how to diff. + msg.append("Diff with:") + msg.append("> " + diff_script) + msg_as_str = "\n".join(msg) + # Append also error_msg to the current message. + if error_msg: + msg_as_str += "\n" + error_msg + # Add also the stack trace to the logging error. + if False: + log_msg_as_str = ( + msg_as_str + + "\n" + + hprint.frame("Traceback", char1="-") + + "\n" + + "".join(traceback.format_stack()) + ) + _LOG.error(log_msg_as_str) + # Assert. + if abort_on_exit: + raise RuntimeError(msg_as_str) + + +# ############################################################################# + + +def _remove_spaces(txt: str) -> str: + """ + Remove leading / trailing spaces and empty lines. + + This is used to implement fuzzy matching. + """ + txt = txt.replace("\\n", "\n").replace("\\t", "\t") + # Convert multiple empty spaces (but not newlines) into a single one. + txt = re.sub(r"[^\S\n]+", " ", txt) + # Remove insignificant crap. + lines = [] + for line in txt.split("\n"): + # Remove leading and trailing spaces. + line = re.sub(r"^\s+", "", line) + line = re.sub(r"\s+$", "", line) + # Skip empty lines. + if line != "": + lines.append(line) + txt = "\n".join(lines) + return txt + + +def _remove_banner_lines(txt: str) -> str: + """ + Remove lines of separating characters long at least 20 characters. + """ + txt_tmp: List[str] = [] + for line in txt.split("\n"): + if re.match(r"^\s*[\#\-><=]{20,}\s*$", line): + continue + txt_tmp.append(line) + txt = "\n".join(txt_tmp) + return txt + + +def _fuzzy_clean(txt: str) -> str: + """ + Remove irrelevant artifacts to make string comparison less strict. + """ + hdbg.dassert_isinstance(txt, str) + # Ignore spaces. + txt = _remove_spaces(txt) + # Ignore separation lines. + txt = _remove_banner_lines(txt) + return txt + + +def _ignore_line_breaks(txt: str) -> str: + """ + Replace all line breaks with spaces for loose comparison. + + :param txt: the input text + :return: text with line breaks replaced by spaces + """ + # Ignore line breaks. + txt = txt.replace("\n", " ") + return txt + + +def _sort_lines(txt: str) -> str: + """ + Sort the lines in alphabetical order. + + This is used when we want to perform a comparison of equality but + without order. Of course there are false negatives, since the + relative order of lines might matter. + """ + lines = txt.split("\n") + lines.sort() + lines = "\n".join(lines) + return lines + + +def _save_diff( + actual: str, + expected: str, + tag: str, + test_dir: str, +) -> None: + """ + Save actual and expected strings to temporary files for comparison. + + :param actual: the actual test output + :param expected: the expected test output + :param tag: identifier tag for the files + :param test_dir: directory to save files in + """ + if tag != "": + tag += "." + # Save expected strings to dir. + for dst_dir in (".", test_dir): + act_file_name = f"{dst_dir}/tmp.{tag}actual.txt" + hio.to_file(act_file_name, actual) + exp_file_name = f"{dst_dir}/tmp.{tag}expected.txt" + hio.to_file(exp_file_name, expected) + + +def assert_equal( + actual: str, + expected: str, + full_test_name: str, + test_dir: str, + *, + check_string: bool = False, + remove_lead_trail_empty_lines: bool = False, + dedent: bool = False, + purify_text: bool = False, + purify_expected_text: bool = False, + fuzzy_match: bool = False, + ignore_line_breaks: bool = False, + split_max_len: Optional[int] = None, + sort: bool = False, + abort_on_error: bool = True, + dst_dir: str = ".", + error_msg: str = "", +) -> bool: + """ + See interface in `TestCase.assert_equal()`. + + :param full_test_name: e.g., `TestRunNotebook1.test2` + :param check_string: if it was invoked by `check_string()` or directly + """ + _LOG.debug(hprint.func_signature_to_str("actual expected")) + # Store a mapping tag after each transformation (e.g., original, sort, ...) to + # (actual, expected). + values: Dict[str, str] = collections.OrderedDict() + + def _append(tag: str, actual: str, expected: str) -> None: + _LOG.debug( + "tag=%s\n actual='\n%s'\n expected='\n%s'", tag, actual, expected + ) + hdbg.dassert_not_in(tag, values) + values[tag] = (actual, expected) + + # + _LOG.debug("Before any transformation:") + tag = "original" + _append(tag, actual, expected) + # 1) Remove white spaces. + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_white_spaces(actual) + expected = text_purifier.purify_white_spaces(expected) + tag = "purify_white_spaces" + _append(tag, actual, expected) + # Remove empty leading / trailing lines. + if remove_lead_trail_empty_lines: + tag = "remove_lead_trail_empty_lines" + actual = hprint.remove_lead_trail_empty_lines(actual) + expected = hprint.remove_lead_trail_empty_lines(expected) + _append(tag, actual, expected) + # Dedent only expected since we often align it to make it look more readable + # in the Python code, if needed. + if dedent: + tag = "dedent" + expected = hprint.dedent(expected) + _append(tag, actual, expected) + # Purify text, if needed. + if purify_text: + tag = "purify_text" + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + if purify_expected_text: + expected = text_purifier.purify_txt_from_client(expected) + _append(tag, actual, expected) + # Ensure that there is a single `\n` at the end of the strings. + actual = actual.rstrip("\n") + "\n" + expected = expected.rstrip("\n") + "\n" + # Sort the lines. + if sort: + tag = "sort" + actual = _sort_lines(actual) + expected = _sort_lines(expected) + _append(tag, actual, expected) + # Fuzzy match, if needed. + if fuzzy_match: + tag = "fuzzy_match" + actual = _fuzzy_clean(actual) + expected = _fuzzy_clean(expected) + _append(tag, actual, expected) + # Ignore line breaks, if needed. + if ignore_line_breaks: + tag = "ignore_line_breaks" + actual = _ignore_line_breaks(actual) + expected = _ignore_line_breaks(expected) + _append(tag, actual, expected) + # Split the strings into lines of at most `split_max_len` characters. + if split_max_len: + tag = "split_max_len" + actual = hprint.strict_split(actual, split_max_len) + expected = hprint.strict_split(expected, split_max_len) + _append(tag, actual, expected) + # Check. + tag = "final" + _append(tag, actual, expected) + # + is_equal = expected == actual + _LOG.debug(hprint.to_str("is_equal")) + if is_equal: + return is_equal + _LOG.error( + "%s", + "\n" + + hprint.frame( + f"Test '{full_test_name}' failed", char1="=", num_chars=80 + ), + ) + if not check_string: + # If this is a `self.assert_equal()` and not a `self.check_string()`, + # then print the correct output, like: + # expected = r'""" + # 2021-02-17 09:30:00-05:00 + # 2021-02-17 10:00:00-05:00 + # 2021-02-17 11:00:00-05:00 + # """ + txt = [] + txt.append(hprint.frame(f"ACTUAL VARIABLE: {full_test_name}", char1="-")) + # TODO(gp): Switch to expected or expected_result. + exp_var = "expected = r" + # We always return the variable exactly as this should be, even if we + # could make it look better through indentation in case of fuzzy match. + actual_orig = values["original"][0] + if actual_orig.startswith('"'): + sep = "'''" + else: + sep = '"""' + exp_var += sep + if fuzzy_match: + # We can print in a more readable way since spaces don't matter. + exp_var += "\n" + exp_var += actual_orig + if fuzzy_match: + # We can print in a more readable way since spaces don't matter. + exp_var += "\n" + exp_var += sep + # Save the expected variable to files. + exp_var_file_name = f"{test_dir}/tmp.exp_var.txt" + hio.to_file(exp_var_file_name, exp_var) + # + exp_var_file_name = "tmp.exp_var.txt" + hio.to_file(exp_var_file_name, exp_var) + _LOG.info("Saved exp_var in %s", exp_var_file_name) + # + txt.append(exp_var) + txt = "\n".join(txt) + error_msg += txt + # Save all the values after the transformations. + debug = False + if debug: + for idx, key in enumerate(values.keys()): + actual_tmp, expected_tmp = values[key] + tag = f"{idx}.{key}" + _save_diff(actual_tmp, expected_tmp, tag, test_dir) + else: + key = "final" + actual_tmp, expected_tmp = values[key] + _save_diff(actual_tmp, expected_tmp, key, test_dir) + # Compare the last values. + act_file_name = f"{test_dir}/tmp.final.actual.txt" + exp_file_name = f"{test_dir}/tmp.final.expected.txt" + if fuzzy_match: + msg = "FUZZY ACTUAL vs FUZZY EXPECTED" + else: + msg = "ACTUAL vs EXPECTED" + msg += f": {full_test_name}" + diff_files( + act_file_name, + exp_file_name, + tag=msg, + abort_on_exit=abort_on_error, + dst_dir=dst_dir, + error_msg=error_msg, + ) + return is_equal + + +# TODO(gp): @all move to hpandas +def compare_df(df1: "pd.DataFrame", df2: "pd.DataFrame") -> None: + """ + Compare two dfs including their metadata. + """ + if not df1.equals(df2): + print(df1.compare(df2)) + raise ValueError("Dfs are different") + + def _compute_df_signature(df: "pd.DataFrame") -> str: + txt = [] + txt.append(f"df1=\n{str(df)}") + txt.append(f"df1.dtypes=\n{str(df.dtypes)}") + if hasattr(df.index, "freq"): + txt.append(f"df1.index.freq=\n{str(df.index.freq)}") + return "\n".join(txt) + + full_test_name = "dummy" + test_dir = "." + assert_equal( + _compute_df_signature(df1), + _compute_df_signature(df2), + full_test_name, + test_dir, + ) + + +# ############################################################################# + + +def create_test_dir( + dir_name: str, incremental: bool, file_dict: Dict[str, str] +) -> None: + """ + Create a directory `dir_name` with the files from `file_dict`. + + `file_dict` is interpreted as pair of files relative to `dir_name` + and content. + """ + hdbg.dassert_no_duplicates(file_dict.keys()) + hio.create_dir(dir_name, incremental=incremental) + for file_name in file_dict: + dst_file_name = os.path.join(dir_name, file_name) + _LOG.debug("file_name=%s -> %s", file_name, dst_file_name) + hio.create_enclosing_dir(dst_file_name, incremental=incremental) + file_content = file_dict[file_name] + hio.to_file(dst_file_name, file_content) + + +# TODO(gp): Make remove_dir_name=True default. +def get_dir_signature( + dir_name: str, + include_file_content: bool, + *, + remove_dir_name: bool = False, + num_lines: Optional[int] = None, +) -> str: + """ + Compute a string with the content of the files in `dir_name`. + + :param include_file_content: include the content of the files, besides the + name of files and directories + :param remove_dir_name: use paths relative to `dir_name` + :param num_lines: number of lines to include for each file + + The output looks like: + ``` + # Dir structure + $GIT_ROOT/.../tmp.scratch + $GIT_ROOT/.../tmp.scratch/dummy_value_1=1 + $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A + $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet + ... + + # File signatures + len(file_names)=3 + file_names=$GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet, + $GIT_ROOT/.../tmp.scratch/dummy_value_1=2/dummy_value_2=B/data.parquet, ... + # $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet + num_lines=13 + ''' + original shape=(1, 1) + Head: + { + "0":{ + "dummy_value_3":0 + } + } + Tail: + { + "0":{ + "dummy_value_3":0 + } + } + ''' + # $GIT_ROOT/.../tmp.scratch/dummy_value_1=2/dummy_value_2=B/data.parquet + ``` + """ + + def _remove_dir_name(file_name: str) -> str: + if remove_dir_name: + res = os.path.relpath(file_name, dir_name) + else: + res = file_name + return res + + txt: List[str] = [] + # Find all the files under `dir_name`. + _LOG.debug("dir_name=%s", dir_name) + hdbg.dassert_path_exists(dir_name) + cmd = f'find {dir_name} -name "*"' + remove_files_non_present = False + dir_name_tmp = None + file_names = hsystem.system_to_files( + cmd, dir_name_tmp, remove_files_non_present + ) + file_names = sorted(file_names) + # Save the directory / file structure. + txt.append("# Dir structure") + txt.append("\n".join(map(_remove_dir_name, file_names))) + # + if include_file_content: + txt.append("# File signatures") + # Remove the directories. + file_names = hsystem.remove_dirs(file_names) + # Scan the files. + txt.append(f"len(file_names)={len(file_names)}") + txt.append(f"file_names={', '.join(map(_remove_dir_name, file_names))}") + for file_name in file_names: + _LOG.debug("file_name=%s", file_name) + txt.append("# " + _remove_dir_name(file_name)) + # Read file. + txt_tmp = hio.from_file(file_name) + # This seems unstable on different systems. + # txt.append("num_chars=%s" % len(txt_tmp)) + txt_tmp = txt_tmp.split("\n") + # Filter lines, if needed. + txt.append(f"num_lines={len(txt_tmp)}") + if num_lines is not None: + hdbg.dassert_lte(1, num_lines) + txt_tmp = txt_tmp[:num_lines] + txt.append("'''\n" + "\n".join(txt_tmp) + "\n'''") + else: + hdbg.dassert_is(num_lines, None) + # Concat everything in a single string. + result = "\n".join(txt) + return result + + +# TODO(gp): GSI. Use the copy in helpers/hprint.py +def filter_text(regex: str, txt: str) -> str: + """ + Remove lines in `txt` that match the regex `regex`. + """ + _LOG.debug("Filtering with '%s'", regex) + if regex is None: + return txt + txt_out = [] + txt_as_arr = txt.split("\n") + for line in txt_as_arr: + if re.search(regex, line): + _LOG.debug("Skipping line='%s'", line) + continue + txt_out.append(line) + # We can only remove lines. + hdbg.dassert_lte( + len(txt_out), + len(txt_as_arr), + "txt_out=\n'''%s'''\ntxt=\n'''%s'''", + "\n".join(txt_out), + "\n".join(txt_as_arr), + ) + txt = "\n".join(txt_out) + return txt + + +def diff_strings( + string1: str, + string2: str, + *, + tag: Optional[str] = None, + abort_on_exit: bool = True, + dst_dir: str = ".", +) -> None: + """ + Compare two strings using the diff_files() flow by creating a script to + compare with vimdiff. + + :param dst_dir: where to save the intermediatary files + """ + _LOG.debug(hprint.to_str("tag abort_on_exit dst_dir")) + # Save the actual and expected strings to files. + file_name1 = f"{dst_dir}/tmp.string1.txt" + hio.to_file(file_name1, string1) + # + file_name2 = f"{dst_dir}/tmp.string2.txt" + hio.to_file(file_name2, string2) + # Compare with diff_files. + if tag is None: + tag = "string1 vs string2" + diff_files( + file_name1, + file_name2, + tag=tag, + abort_on_exit=abort_on_exit, + dst_dir=dst_dir, + ) + + +def diff_df_monotonic( + df: "pd.DataFrame", + *, + tag: Optional[str] = None, + abort_on_exit: bool = True, + dst_dir: str = ".", +) -> None: + """ + Check for a dataframe to be monotonic using the vimdiff flow from + diff_files(). + """ + _LOG.debug(hprint.to_str("abort_on_exit dst_dir")) + if not df.index.is_monotonic_increasing: + df2 = df.copy() + df2.sort_index(inplace=True) + diff_strings( + df.to_csv(), + df2.to_csv(), + tag=tag, + abort_on_exit=abort_on_exit, + dst_dir=dst_dir, + ) + + +# ############################################################################# + + +# pylint: disable=protected-access +def get_pd_default_values() -> "pd._config.config.DictWrapper": + """ + Get a deep copy of the current pandas default options. + + :return: a copy of pandas configuration options + """ + import copy + + vals = copy.deepcopy(pd.options) + return vals + + +def set_pd_default_values() -> None: + """ + Set pandas display options to standard default values for testing. + + This ensures consistent output across different test environments. + """ + # 'display': + default_pd_values = { + "chop_threshold": None, + "colheader_justify": "right", + "date_dayfirst": False, + "date_yearfirst": False, + "encoding": "UTF-8", + "expand_frame_repr": True, + "float_format": None, + "html": {"border": 1, "table_schema": False, "use_mathjax": True}, + "large_repr": "truncate", + "latex": { + "escape": True, + "longtable": False, + "multicolumn": True, + "multicolumn_format": "l", + "multirow": False, + "repr": False, + }, + "max_categories": 8, + "max_columns": 20, + "max_colwidth": 50, + "max_info_columns": 100, + "max_info_rows": 1690785, + "max_rows": 60, + "max_seq_items": 100, + "memory_usage": True, + "min_rows": 10, + "multi_sparse": True, + "notebook_repr_html": True, + "pprint_nest_depth": 3, + "precision": 6, + "show_dimensions": "truncate", + "unicode": {"ambiguous_as_wide": False, "east_asian_width": False}, + "width": 80, + } + section = "display" + for key, new_val in default_pd_values.items(): + if isinstance(new_val, dict): + continue + full_key = f"{section}.{key}" + old_val = pd.get_option(full_key) + if old_val != new_val: + _LOG.debug( + "-> Assigning a different value: full_key=%s, " + "old_val=%s, new_val=%s", + full_key, + old_val, + new_val, + ) + pd.set_option(full_key, new_val) + + +# If a golden outcome is missing asserts (instead of updating golden and adding +# it to Git repo, corresponding to "update"). +_ACTION_ON_MISSING_GOLDEN = "assert" + + +# ############################################################################# +# TestCase +# ############################################################################# + + +# TODO(gp): Remove all the calls to `dedent()` and use the `dedent` switch. +class TestCase(unittest.TestCase): + """ + Add some functions to compare actual results to a golden outcome. + """ + + def setUp(self) -> None: + """ + Execute before any test method. + """ + # Set up the base class in case it does something, current + # implementation does nothing, see + # https://docs.python.org/3/library/unittest.html#unittest.TestCase.setUp. + super().setUp() + # Print banner to signal the start of a new test. + func_name = f"{self.__class__.__name__}.{self._testMethodName}" + _LOG.info("\n%s", hprint.frame(func_name)) + # Set the random seed. + random_seed = 20000101 + _LOG.debug("Resetting random.seed to %s", random_seed) + random.seed(random_seed) + if _HAS_NUMPY: + _LOG.debug("Resetting np.random.seed to %s", random_seed) + np.random.seed(random_seed) + # Disable matplotlib plotting by overwriting the `show` function. + if _HAS_MATPLOTLIB: + plt.show = lambda: 0 + # Name of the dir with artifacts for this test. + self._scratch_dir: Optional[str] = None + # The base directory is the one including the class under test. + self._base_dir_name = os.path.dirname(inspect.getfile(self.__class__)) + _LOG.debug("base_dir_name=%s", self._base_dir_name) + # Store whether a test needs to be updated or not. + self._update_tests = get_update_tests() + self._overriden_update_tests = False + # Store whether the golden outcome of this test was updated. + self._test_was_updated = False + # Store whether the output files need to be added to hgit. + self._git_add = True + # Error message printed when comparing actual and expected outcome. + self._error_msg = "" + # Set the default pandas options (see AmpTask1140). + if _HAS_PANDAS: + self._old_pd_options = get_pd_default_values() + set_pd_default_values() + # Reset the timestamp of the current bar. + hwacltim.reset_current_bar_timestamp() + # Start the timer to measure the execution time of the test. + self._timer = htimer.Timer() + + def tearDown(self) -> None: + """ + Execute after each test method completes. + + Handles cleanup, timing, and restoration of default settings. + """ + # Stop the timer to measure the execution time of the test. + self._timer.stop() + pytest_print("(%.2f s) " % self._timer.get_total_elapsed()) + # Report if the test was updated + if self._test_was_updated: + if not self._overriden_update_tests: + pytest_warning("Test was updated) ", prefix="(") + else: + # We forced an update from the unit test itself, so no need + # to report an update. + pass + # Recover the original default pandas options. + if _HAS_PANDAS: + pd.options = self._old_pd_options + # Force matplotlib to close plots to decouple tests. + if _HAS_MATPLOTLIB: + plt.close() + plt.clf() + # Delete the scratch dir, if needed. + if self._scratch_dir and os.path.exists(self._scratch_dir): + if False: + # We want to keep this if the test failed, as an alternative + # to just re-running with --incremental. + result = self._outcome.result + # From https://stackoverflow.com/questions/4414234/getting-pythons-unittest-results-in-a-teardown-method + # https://github.com/pytest-dev/pytest/issues/10631 + # This doesn't work any longer. + # has_error = test_result.failures or test_result.errors + has_error = result._excinfo is not None + else: + # TODO(gp): The problem is that when there is a failure during + # the regressions, having artifacts in the scratch dir causes + # more tests to fail (especially the ones in the cycle detector). + # We need to make tests more robust to this and then we can enable + # the logic to keep files for the failed tests in the scratch dir. + has_error = False + if has_error or get_incremental_tests(): + _LOG.warning("Skipping deleting %s", self._scratch_dir) + else: + _LOG.debug("Deleting %s", self._scratch_dir) + hio.delete_dir(self._scratch_dir) + # Tear down the base class in case it does something, current + # implementation does nothing, see + # https://docs.python.org/3/library/unittest.html#unittest.TestCase.tearDown. + super().tearDown() + + def set_base_dir_name(self, base_dir_name: str) -> None: + """ + Set the base directory for the input, output, and scratch directories. + + This is used to override the standard location of the base + directory which is close to the class under test. + """ + self._base_dir_name = base_dir_name + _LOG.debug("Setting base_dir_name to '%s'", self._base_dir_name) + hio.create_dir(self._base_dir_name, incremental=True) + + def mock_update_tests(self) -> None: + """ + When unit testing the unit test framework we want to test updating the + golden outcome. + """ + self._update_tests = True + self._overriden_update_tests = True + self._git_add = False + + def _get_current_path( + self, + use_only_class_name: bool, + test_class_name: Optional[str], + test_method_name: Optional[str], + use_absolute_path: bool, + ) -> str: + """ + Return the name of the directory containing the input / output data. + + E.g., + ``` + ./core/dataflow/test/outcomes/TestContinuousSarimaxModel.test_compare + ``` + + The parameters have the same meaning as in `get_input_dir()`. + """ + if test_class_name is None: + test_class_name = self.__class__.__name__ + if use_only_class_name: + # Use only class name. + dir_name = test_class_name + else: + # Use both class and test method. + if test_method_name is None: + test_method_name = self._testMethodName + dir_name = f"{test_class_name}.{test_method_name}" + if use_absolute_path: + # E.g., `.../dataflow/test/outcomes/TestContinuousSarimaxModel.test_compare`. + dir_name = os.path.join(self._base_dir_name, "outcomes", dir_name) + else: + # E.g., `outcomes/TestContinuousSarimaxModel.test_compare`. + dir_name = os.path.join("outcomes", dir_name) + return dir_name + + def get_input_dir( + self, + *, + use_only_test_class: bool = False, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + use_absolute_path: bool = True, + ) -> str: + """ + Return the path of the directory storing input data for this test + class. + + E.g., `TestLinearRegression1.test1`. + + :param use_only_test_class: use only the name on the test class and not of + the method. E.g., when one wants all the test methods to use a single + file for testing + :param test_class_name: `None` uses the current test class name + :param test_method_name: `None` uses the current test method name + :param use_absolute_path: use the path from the file containing the test + :return: dir name + """ + # Get the dir of the test. + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + # Add `input` to the dir. + dir_name = os.path.join(dir_name, "input") + return dir_name + + def get_output_dir( + self, + *, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + ) -> str: + """ + Return the path of the directory storing output data for this test + class. + + :param test_class_name: override the current test class name + :param test_method_name: override the current test method name + :return: dir name + """ + # The output dir is specific of this dir. + use_only_test_class = False + use_absolute_path = True + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + # Add `output` to the dir. + dir_name = os.path.join(dir_name, "output") + return dir_name + + # TODO(gp): -> get_scratch_dir(). + def get_scratch_space( + self, + *, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + use_absolute_path: bool = True, + ) -> str: + """ + Return the path of the directory storing scratch data for this test. + + The directory is also created and cleaned up based on whether + the incremental behavior is enabled or not. + """ + if self._scratch_dir is None: + # Create the dir on the first invocation on a given test. + use_only_test_class = False + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + # Add `tmp.scratch` to the dir. + dir_name = os.path.join(dir_name, "tmp.scratch") + # On the first invocation create the dir. + incremental = get_incremental_tests() + hio.create_dir(dir_name, incremental=incremental) + # Store the value. + self._scratch_dir = dir_name + return self._scratch_dir + + def get_s3_scratch_dir( + self, + *, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + ) -> str: + """ + Return the path of a directory storing scratch data on S3 for this + test. + + E.g., + s3://alphamatic-data/tmp/cache.unit_test/ + root.98e1cf5b88c3.amp.TestTestCase1.test_get_s3_scratch_dir1 + """ + # Make the path unique for the test. + use_only_test_class = False + use_absolute_path = False + test_path = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + # Make the path unique for the current user. + user_name = hsystem.get_user_name() + server_name = hsystem.get_server_name() + project_dirname = hgit.get_project_dirname() + dir_name = f"{user_name}.{server_name}.{project_dirname}" + # Assemble everything in a single path. + import helpers.hs3 as hs3 + + aws_profile = "ck" + s3_bucket = hs3.get_s3_bucket_path_unit_test(aws_profile) + scratch_dir = f"{s3_bucket}/tmp/cache.unit_test/{dir_name}.{test_path}" + return scratch_dir + + def get_s3_input_dir( + self, + *, + use_only_test_class: bool = False, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + use_absolute_path: bool = False, + ) -> str: + """ + Return the S3 path for storing input data for this test. + + :param use_only_test_class: use only the test class name, not method + :param test_class_name: override the current test class name + :param test_method_name: override the current test method name + :param use_absolute_path: use the path from the file containing the test + :return: S3 path for test input data + """ + s3_bucket = hrecouti.get_repo_config().get_unit_test_bucket_path() + hdbg.dassert_isinstance(s3_bucket, str) + # Make the path unique for the test. + test_path = self.get_input_dir( + use_only_test_class=use_only_test_class, + test_class_name=test_class_name, + test_method_name=test_method_name, + use_absolute_path=use_absolute_path, + ) + hdbg.dassert_isinstance(test_path, str) + # Assemble everything in a single path. + input_dir = os.path.join(s3_bucket, test_path) + return input_dir + + def _get_test_name(self) -> str: + """ + Return the full test name as `class.method`. + """ + return f"{self.__class__.__name__}.{self._testMethodName}" + + # /////////////////////////////////////////////////////////////////////// + + def assert_equal( + self, + actual: str, + expected: str, + *, + remove_lead_trail_empty_lines: bool = False, + dedent: bool = False, + purify_text: bool = False, + purify_expected_text: bool = False, + fuzzy_match: bool = False, + ignore_line_breaks: bool = False, + split_max_len: Optional[int] = None, + sort: bool = False, + abort_on_error: bool = True, + dst_dir: str = ".", + ) -> bool: + """ + Return if `actual` and `expected` are different and report the + difference. + + Implement a better version of `self.assertEqual()` that reports + mismatching strings with sdiff and save them to files for + further analysis with vimdiff. + + The interface is similar to `check_string()`. + """ + _LOG.debug(hprint.to_str("fuzzy_match abort_on_error dst_dir")) + hdbg.dassert_in(type(actual), (bytes, str), "actual=%s", str(actual)) + hdbg.dassert_in( + type(expected), (bytes, str), "expected=%s", str(expected) + ) + # Get the current dir name. + use_only_test_class = False + test_class_name = None + test_method_name = None + use_absolute_path = True + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + _LOG.debug("dir_name=%s", dir_name) + hio.create_dir(dir_name, incremental=True) + hdbg.dassert_path_exists(dir_name) + # + test_name = self._get_test_name() + is_equal = assert_equal( + actual, + expected, + test_name, + dir_name, + check_string=False, + remove_lead_trail_empty_lines=remove_lead_trail_empty_lines, + dedent=dedent, + purify_text=purify_text, + purify_expected_text=purify_expected_text, + fuzzy_match=fuzzy_match, + ignore_line_breaks=ignore_line_breaks, + split_max_len=split_max_len, + sort=sort, + abort_on_error=abort_on_error, + dst_dir=dst_dir, + ) + return is_equal + + def assert_dfs_close( + self, + actual: "pd.DataFrame", + expected: "pd.DataFrame", + **kwargs: Any, + ) -> None: + """ + Assert dfs have same indexes and columns and that all values are close. + + This is a more robust alternative to `compare_df()`. In + particular, it is less sensitive to floating point round-off + errors. + """ + self.assertEqual(actual.index.to_list(), expected.index.to_list()) + self.assertEqual(actual.columns.to_list(), expected.columns.to_list()) + # Often the output of a failing assertion is difficult to parse + # so we resort to our special `assert_equal()`. + if not np.allclose(actual, expected, **kwargs): + import helpers.hpandas as hpandas + + self.assert_equal( + hpandas.df_to_str(actual), hpandas.df_to_str(expected) + ) + np.testing.assert_allclose(actual, expected, **kwargs) + + # /////////////////////////////////////////////////////////////////////// + + # TODO(gp): This needs to be moved to `helper.git` and generalized. + def _git_add_file(self, file_name: str) -> None: + """ + Add to git repo `file_name`, if needed. + """ + _LOG.debug(hprint.to_str("file_name")) + if self._git_add: + # Find the file relative to here. + mode = "assert_unless_one_result" + # The problem is that when we run from an included repo, we look + # for files like: + # ``` + # helpers_root/helpers/test/outcomes/TestCheckString1.test_check_string_missing3/output/test.txt + # ``` + # but in our directory we find files like: + # ``` + # helpers/test/outcomes/TestCheckString1.test_check_string_missing3/output/test.txt + # ``` + # so we need to make the file relative to the innermost repo. + git_root = hgit.get_client_root(super_module=False) + rel_file_name = os.path.relpath(file_name, git_root) + _LOG.debug(hprint.to_str("rel_file_name")) + file_names_tmp = hgit.find_docker_file(rel_file_name, mode=mode) + hdbg.dassert_eq(len(file_names_tmp), 1) + file_name_tmp = file_names_tmp[0] + _LOG.debug(hprint.to_str("file_name_tmp")) + cmd = f"cd amp; git add -u {file_name_tmp}" + rc = hsystem.system(cmd, abort_on_error=False) + if rc: + pytest_warning( + f"Can't git add file\n'{file_name}' -> '{file_name_tmp}'\n" + "You need to git add the file manually\n", + prefix="\n", + ) + pytest_print(f"> {cmd}\n") + + def _check_string_update_outcome( + self, file_name: str, actual: str, use_gzip: bool + ) -> None: + """ + Update the golden outcome file with actual test output. + + :param file_name: path to the golden outcome file + :param actual: the actual test output to save + :param use_gzip: whether to compress the file with gzip + """ + _LOG.debug(hprint.to_str("file_name")) + hio.to_file(file_name, actual, use_gzip=use_gzip) + # Add to git repo. + self._git_add_file(file_name) + + # /////////////////////////////////////////////////////////////////////// + + def _get_golden_outcome_file_name( + self, + tag: str, + *, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + ) -> Tuple[str, str]: + """ + Get the directory and file name for the golden outcome file. + + :param tag: identifier tag for the golden outcome file + :param test_class_name: override the current test class name + :param test_method_name: override the current test method name + :return: tuple of (directory_path, file_path) + """ + # Get the current dir name. + use_only_test_class = False + use_absolute_path = True + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + _LOG.debug("dir_name=%s", dir_name) + hio.create_dir(dir_name, incremental=True) + hdbg.dassert_path_exists(dir_name) + # Get the expected outcome. + file_name = ( + self.get_output_dir( + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + + f"/{tag}.txt" + ) + return dir_name, file_name + + # TODO(gp): There is a lot of similarity between `check_string()` and + # `check_df_string()` that can be factored out if we extract the code that + # reads and saves the golden file. + def check_string( + self, + actual: str, + *, + remove_lead_trail_empty_lines: bool = False, + dedent: bool = False, + purify_text: bool = False, + fuzzy_match: bool = False, + ignore_line_breaks: bool = False, + split_max_len: Optional[int] = None, + sort: bool = False, + use_gzip: bool = False, + tag: str = "test", + abort_on_error: bool = True, + action_on_missing_golden: str = _ACTION_ON_MISSING_GOLDEN, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + ) -> Tuple[bool, bool, Optional[bool]]: + """ + Check the actual outcome of a test against the expected outcome + contained in the file. If `--update_outcomes` is used, updates the + golden reference file with the actual outcome. + + :param actual: actual outcome of the test + :param remove_lead_trail_empty_lines: remove leading and trailing empty + :param dedent: call `dedent` on the expected string to align it to the + beginning of the row + :param purify_text: remove some artifacts (e.g., usernames, + directories, reference to Git client) + :param fuzzy_match: ignore differences in spaces + :param ignore_line_breaks: ignore difference due to line breaks + :param split_max_len: split the string into lines of at most this length + :param sort: sort the text and then compare it. In other terms we check + whether the lines are the same although in different order + :param use_gzip: use gzip to compress/decompress the golden outcome + :param tag: tag to identify the golden outcome file + :param abort_on_error: whether to raise an exception if the outcome is + different from the golden outcome + :param action_on_missing_golden: what to do (e.g., "assert" or "update" + when the golden outcome is missing) + :param test_class_name: override the current test class name + :param test_method_name: override the current test method name + :return: outcome_updated, file_exists, is_equal + :raises: `RuntimeError` if there is a mismatch. If `abort_on_error` is False + (which should be used only for unit testing) return the result but do not + assert + """ + _LOG.debug( + hprint.to_str( + "remove_lead_trail_empty_lines dedent purify_text fuzzy_match " + "ignore_line_breaks split_max_len sort use_gzip tag " + "abort_on_error action_on_missing_golden test_class_name " + "test_method_name" + ) + ) + hdbg.dassert_in(type(actual), (bytes, str), "actual='%s'", actual) + # + dir_name, file_name = self._get_golden_outcome_file_name( + tag, + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + if use_gzip: + file_name += ".gz" + _LOG.debug("file_name=%s", file_name) + # Remove reference from the current environment. + # TODO(gp): Not sure why we purify here and not delegate to `assert_equal`. + if purify_text: + _LOG.debug("Purifying actual outcome") + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + _LOG.debug("actual=\n%s", actual) + outcome_updated = False + file_exists = os.path.exists(file_name) + _LOG.debug("file_exists=%s", file_exists) + is_equal: Optional[bool] = None + if self._update_tests: + _LOG.debug("# Update golden outcomes") + # Determine whether outcome needs to be updated. + if file_exists: + expected = hio.from_file(file_name) + is_equal = expected == actual + if not is_equal: + outcome_updated = True + else: + # The golden outcome doesn't exist. + outcome_updated = True + _LOG.debug("outcome_updated=%s", outcome_updated) + if outcome_updated: + # Update the golden outcome. + self._check_string_update_outcome(file_name, actual, use_gzip) + else: + # Check the test result. + _LOG.debug("# Check golden outcomes") + if file_exists: + # Golden outcome is available: check the actual outcome against + # the golden outcome. + expected = hio.from_file(file_name) + test_name = self._get_test_name() + is_equal = assert_equal( + actual, + expected, + test_name, + dir_name, + check_string=True, + remove_lead_trail_empty_lines=remove_lead_trail_empty_lines, + dedent=dedent, + # We have handled the purification of the output earlier. + purify_text=False, + fuzzy_match=fuzzy_match, + ignore_line_breaks=ignore_line_breaks, + split_max_len=split_max_len, + sort=sort, + abort_on_error=abort_on_error, + ) + else: + # No golden outcome available. + _LOG.warning("Can't find golden outcome file '%s'", file_name) + if action_on_missing_golden == "assert": + # Save the result to a temporary file and assert. + file_name += ".tmp" + hio.to_file(file_name, actual, use_gzip=use_gzip) + msg = ( + "The golden outcome doesn't exist: saved the actual " + f"output in '{file_name}'" + ) + _LOG.error(msg) + if abort_on_error: + hdbg.dfatal(msg) + elif action_on_missing_golden == "update": + # Create golden file and add it to the repo. + _LOG.warning("Creating the golden outcome") + outcome_updated = True + self._check_string_update_outcome( + file_name, actual, use_gzip + ) + is_equal = None + else: + hdbg.dfatal( + "Invalid action_on_missing_golden=" + + f"'{action_on_missing_golden}'" + ) + self._test_was_updated = outcome_updated + _LOG.debug(hprint.to_str("outcome_updated file_exists is_equal")) + return outcome_updated, file_exists, is_equal + + # /////////////////////////////////////////////////////////////////////// + + def _check_df_update_outcome( + self, + file_name: str, + actual: "pd.DataFrame", + ) -> None: + """ + Update the golden outcome file with actual dataframe output. + + :param file_name: path to the golden outcome file + :param actual: the actual dataframe to save + """ + _LOG.debug(hprint.to_str("file_name")) + hio.create_enclosing_dir(file_name) + actual.to_csv(file_name) + pytest_warning(f"Update golden outcome file '{file_name}'", prefix="\n") + # Add to git repo. + self._git_add_file(file_name) + + def _to_error(self, msg: str) -> None: + """ + Append error message to the accumulated error log. + + :param msg: error message to log and accumulate + """ + self._error_msg += msg + "\n" + _LOG.error(msg) + + def _check_df_compare_outcome( + self, file_name: str, actual: "pd.DataFrame", err_threshold: float + ) -> Tuple[bool, "pd.DataFrame"]: + """ + Compare actual dataframe with golden outcome from file. + + :param file_name: path to the golden outcome file + :param actual: the actual dataframe to compare + :param err_threshold: relative error threshold for numerical comparison + :return: tuple of (is_equal, expected_dataframe) + """ + _LOG.debug(hprint.to_str("file_name")) + _LOG.debug("actual_=\n%s", actual) + hdbg.dassert_lte(0, err_threshold) + hdbg.dassert_lte(err_threshold, 1.0) + # Load the expected df from file. + expected = pd.read_csv(file_name, index_col=0) + _LOG.debug("expected=\n%s", expected) + hdbg.dassert_isinstance(expected, pd.DataFrame) + ret = True + # Compare columns. + if actual.columns.tolist() != expected.columns.tolist(): + msg = f"Columns are different:\n{str(actual.columns)}\n{str(expected.columns)}" + self._to_error(msg) + ret = False + # Compare the values. + _LOG.debug("actual.shape=%s", str(actual.shape)) + _LOG.debug("expected.shape=%s", str(expected.shape)) + # From https://numpy.org/doc/stable/reference/generated/numpy.allclose.html + # absolute(a - b) <= (atol + rtol * absolute(b)) + # absolute(a - b) / absolute(b)) <= rtol + is_close = np.allclose( + actual, expected, rtol=err_threshold, equal_nan=True + ) + if not is_close: + _LOG.error("Dataframe values are not close") + if actual.shape == expected.shape: + close_mask = np.isclose(actual, expected, equal_nan=True) + # + msg = f"actual=\n{actual}" + self._to_error(msg) + # + msg = f"expected=\n{expected}" + self._to_error(msg) + # + actual_masked = np.where(close_mask, np.nan, actual) + msg = f"actual_masked=\n{actual_masked}" + self._to_error(msg) + # + expected_masked = np.where(close_mask, np.nan, expected) + msg = f"expected_masked=\n{expected_masked}" + self._to_error(msg) + # + err = np.abs((actual_masked - expected_masked) / expected_masked) + msg = f"err=\n{err}" + self._to_error(msg) + max_err = np.nanmax(np.nanmax(err)) + msg = "max_err=%.3f" % max_err + self._to_error(msg) + else: + msg = ( + "Shapes are different:\n" + f"actual.shape={str(actual.shape)}\nexpected.shape={str(expected.shape)}" + ) + self._to_error(msg) + ret = False + _LOG.debug("ret=%s", ret) + return ret, expected + + def check_dataframe( + self, + actual: "pd.DataFrame", + *, + err_threshold: float = 0.05, + dedent: bool = False, + tag: str = "test_df", + abort_on_error: bool = True, + action_on_missing_golden: str = _ACTION_ON_MISSING_GOLDEN, + ) -> Tuple[bool, bool, Optional[bool]]: + """ + Like `check_string()` but for pandas dataframes, instead of strings. + """ + _LOG.debug(hprint.to_str("err_threshold tag abort_on_error")) + hdbg.dassert_isinstance(actual, pd.DataFrame) + # + dir_name, file_name = self._get_golden_outcome_file_name(tag) + _LOG.debug("file_name=%s", file_name) + outcome_updated = False + file_exists = os.path.exists(file_name) + _LOG.debug(hprint.to_str("file_exists")) + is_equal: Optional[bool] = None + if self._update_tests: + _LOG.debug("# Update golden outcomes") + # Determine whether outcome needs to be updated. + if file_exists: + is_equal, _ = self._check_df_compare_outcome( + file_name, actual, err_threshold + ) + _LOG.debug(hprint.to_str("is_equal")) + if not is_equal: + outcome_updated = True + else: + # The golden outcome doesn't exist. + outcome_updated = True + _LOG.debug("outcome_updated=%s", outcome_updated) + if outcome_updated: + # Update the golden outcome. + self._check_df_update_outcome(file_name, actual) + else: + # Check the test result. + _LOG.debug("# Check golden outcomes") + if file_exists: + # Golden outcome is available: check the actual outcome against + # the golden outcome. + is_equal, expected = self._check_df_compare_outcome( + file_name, actual, err_threshold + ) + # If not equal, report debug information. + if not is_equal: + test_name = self._get_test_name() + assert_equal( + str(actual), + str(expected), + test_name, + dir_name, + check_string=True, + remove_lead_trail_empty_lines=False, + dedent=dedent, + purify_text=False, + fuzzy_match=False, + ignore_line_breaks=False, + split_max_len=None, + sort=False, + abort_on_error=abort_on_error, + error_msg=self._error_msg, + ) + else: + # No golden outcome available. + _LOG.warning("Can't find golden outcome file '%s'", file_name) + if action_on_missing_golden == "assert": + # Save the result to a temporary file and assert. + file_name += ".tmp" + hio.create_enclosing_dir(file_name) + actual.to_csv(file_name) + msg = ( + "The golden outcome doesn't exist: saved the actual " + f"output in '{file_name}'" + ) + _LOG.error(msg) + if abort_on_error: + hdbg.dfatal(msg) + elif action_on_missing_golden == "update": + # Create golden file and add it to the repo. + _LOG.warning("Creating the golden outcome") + outcome_updated = True + self._check_df_update_outcome(file_name, actual) + is_equal = None + else: + hdbg.dfatal( + "Invalid action_on_missing_golden=" + + f"'{action_on_missing_golden}'" + ) + self._test_was_updated = outcome_updated + # TODO(gp): Print the file with the updated test. + _LOG.debug(hprint.to_str("outcome_updated file_exists is_equal")) + return outcome_updated, file_exists, is_equal + + def check_df_output( + self, + actual_df: "pd.DataFrame", + expected_length: Optional[int], + expected_column_names: Optional[List[str]], + expected_column_unique_values: Optional[Dict[str, List[Any]]], + expected_signature: str, + ) -> None: + """ + Verify that actual outcome dataframe matches the expected one. + + :param actual_df: actual outcome dataframe + :param expected_length: expected outcome dataframe length + - If `None`, skip the check + :param expected_column_names: expected outcome dataframe column names + - If `None`, skip the check + :param expected_column_unique_values: dict of column names and unique values + that they should contain + - If `None`, skip the check + :param expected_signature: expected outcome dataframe as string + - If `__CHECK_STRING__` use the value in `self.check_string()` + """ + # TODO(Grisha): get rid of `hpandas` dependency. + import helpers.hpandas as hpandas + + hdbg.dassert_isinstance(actual_df, pd.DataFrame) + if expected_length: + # Verify that the output length is correct. + actual_length = actual_df.shape[0] + self.assert_equal(str(actual_length), str(expected_length)) + if expected_column_names: + # Verify that the column names are correct. + self.assert_equal( + str(sorted(actual_df.columns)), + str(sorted(expected_column_names)), + ) + if expected_column_unique_values: + hdbg.dassert_is_subset( + list(expected_column_unique_values.keys()), actual_df.columns + ) + # Verify that the unique values in specified columns are correct. + for column in expected_column_unique_values: + actual_one_column_unique_values = sorted( + list(actual_df[column].unique()) + ) + self.assert_equal( + str(actual_one_column_unique_values), + str(sorted(expected_column_unique_values[column])), + ) + # Build signature. + actual_signature = hpandas.df_to_str( + actual_df, + print_shape_info=True, + tag="df", + ) + _LOG.debug("\n%s", actual_signature) + # Check signature. + if expected_signature == "__CHECK_STRING__": + self.check_string(actual_signature, dedent=True, fuzzy_match=True) + else: + hdbg.dassert_isinstance(expected_signature, str) + self.assert_equal( + actual_signature, + expected_signature, + dedent=True, + fuzzy_match=True, + ) + + def check_srs_output( + self, + actual_srs: "pd.Series", + expected_length: Optional[int], + expected_unique_values: Optional[List[Any]], + expected_signature: str, + ) -> None: + """ + Verify that actual outcome series matches the expected one. + + :param actual_srs: actual outcome series + :param expected_length: expected outcome series length + - If `None`, skip the check + :param expected_unique_values: list of expected unique values in series + - If `None`, skip the check + :param expected_signature: expected outcome series as string + """ + # Import `hpandas` dynamically to exclude `pandas` from the thin client + # requirements. See CmTask6613 for details. + import helpers.hpandas as hpandas + + hdbg.dassert_isinstance(actual_srs, pd.Series) + if expected_length: + # Verify that output length is correct. + self.assert_equal(str(actual_srs.shape[0]), str(expected_length)) + if expected_unique_values: + # Verify that unique values in series are correct. + self.assert_equal( + str(sorted(list(actual_srs.unique()))), + str(sorted(expected_unique_values)), + ) + # Build signature. + actual_signature = hpandas.df_to_str(actual_srs, num_rows=None) + _LOG.debug("\n%s", actual_signature) + # Check signature. + if expected_signature == "__CHECK_STRING__": + self.check_string(actual_signature, dedent=True, fuzzy_match=True) + else: + hdbg.dassert_isinstance(expected_signature, str) + self.assert_equal( + actual_signature, + expected_signature, + dedent=True, + fuzzy_match=True, + ) + + +# ############################################################################# +# QaTestCase +# ############################################################################# + + +@pytest.mark.qa +@pytest.mark.skipif( + hserver.is_inside_docker(), reason="Test needs to be run outside Docker" +) +class QaTestCase(TestCase, abc.ABC): + """ + Use for QA to test functionalities (e.g., invoke tasks) that run the dev / + prod container. + """ + + # TODO(Grisha): Linter should not remove `pass` statement from an empty class + # DevToolsTask #476. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py new file mode 100644 index 000000000..cf429b5ac --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py @@ -0,0 +1,450 @@ +""" +Import as: + +import helpers.hunit_test_purification as huntepur +""" + +import datetime +import logging +import os +import re +from typing import List, Tuple + +import helpers.hgit as hgit +import helpers.hintrospection as hintros +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# Mute this module unless we want to debug it. +_LOG.setLevel(logging.INFO) + + +# ############################################################################# +# TextPurifier +# ############################################################################# + + +# TODO(gp): Not sure the class is really needed since now it's in a separate +# file. +class TextPurifier: + """ + A class to purify text by removing environment-specific information and + standardizing output for test comparisons. + """ + + def purify_txt_from_client(self, txt: str) -> str: + """ + Apply all purification steps to the input text. + + :param txt: input text to purify + :return: purified text + """ + # The order of substitutions is important. We want to start from the "most + # specific" (e.g., `amp/helpers/test/...`) to the "least specific" (e.g., + # `amp`). + txt = self.purify_directory_paths(txt) + txt = self.purify_from_environment(txt) + # Correct order: -> `app` -> `amp` -> + # Start with `app.amp.helpers_root.helpers...` + # After purifying app references -> `amp.helpers_root.helpers...` + # After purifying amp references -> `helpers_root.helpers...` + # + # Incorrect order: -> `amp` -> `app` -> + # Start with `amp.helpers_root.helpers...` + # After purifying `amp` references -> `app.amp.helpers_root.helpers...` + # After purifying `app` references -> `amp.helpers_root.helpers...` + # + txt = self.purify_app_references(txt) + txt = self.purify_amp_references(txt) + txt = self.purify_from_env_vars(txt) + txt = self.purify_object_representation(txt) + txt = self.purify_today_date(txt) + txt = self.purify_white_spaces(txt) + txt = self.purify_parquet_file_names(txt) + txt = self.purify_helpers(txt) + txt = self.purify_docker_image_name(txt) + return txt + + def purify_directory_paths(self, txt: str) -> str: + """ + Replace known directory paths with standardized placeholders. + + Apply replacements in this order: + 1. Replace Git root paths with `$GIT_ROOT`. + 2. Replace `CSFY_HOST_GIT_ROOT_PATH` with `$CSFY_HOST_GIT_ROOT_PATH`. + 3. Replace current working directory with `$PWD`. + + :param txt: input text that needs to be purified + :return: purified text + """ + _LOG.debug("Before: txt='\n%s'", txt) + # Collect all paths to replace with their priorities. + replacements = [] + # 1. Git root paths. + # Remove references to Git modules starting from the innermost one. + for super_module in [False, True]: + # Replace the git root path with `$GIT_ROOT`. + git_root = hgit.get_client_root(super_module=super_module) + if git_root and git_root != "/": + replacements.append((git_root, "$GIT_ROOT")) + _LOG.debug("Added git root '%s' for replacement", git_root) + else: + # Skip git root path if it is `/`. + pass + # 2. CSFY_HOST_GIT_ROOT_PATH environment variable. + # Replace the CSFY_HOST_GIT_ROOT_PATH with `$CSFY_HOST_GIT_ROOT_PATH`. + csfy_git_root = os.environ.get("CSFY_HOST_GIT_ROOT_PATH") + if csfy_git_root: + replacements.append((csfy_git_root, "$CSFY_HOST_GIT_ROOT_PATH")) + _LOG.debug( + "Added CSFY_HOST_GIT_ROOT_PATH '%s' for replacement", + csfy_git_root, + ) + # 3. Current working directory. + # Replace the path of current working directory with `$PWD`. + pwd = os.getcwd() + if pwd and pwd != "/": + replacements.append((pwd, "$PWD")) + _LOG.debug("Added PWD '%s' for replacement", pwd) + # Apply replacements in order of priority. + for path, replacement in replacements: + # Use word boundaries to avoid replacing path fragments. + # E.g., To avoid replacing `app` in `application.py`. + pattern = rf"(? str: + """ + Replace environment-specific values with placeholders. + + Perform these transformations: + 1. Replace directory paths with standardized placeholders. + 2. Replace the current user name with $USER_NAME. + 3. Handle special cases like usernames in paths and commands. + + :param txt: input text that needs to be purified + :return: purified text + """ + # Replace current username with `$USER_NAME`. + user_name = hsystem.get_user_name() + # Set a regex pattern that finds a user name surrounded by dot, dash or space. + # E.g., `IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0`, + # `--name $USER_NAME.amp_test.app.app`, `run --rm -l user=$USER_NAME`. + regex = rf"([\s\n\-\.\=]|^)+{user_name}+([.\s/-]|$)" + # Use `\1` and `\2` to preserve specific characters around `$USER_NAME`. + target = r"\1$USER_NAME\2" + txt = re.sub(regex, target, txt) + _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) + return txt + + def _apply_regex_replacements( + self, txt: str, regex_patterns: List[Tuple[str, str]] + ) -> str: + """ + Apply a series of regex replacements to text. + + :param txt: input text to process + :param regex_patterns: list of (pattern, replacement) tuples to + apply in order + :return: text with all regex replacements applied + """ + # Apply regex replacements in order. + txt_out = txt + for regex_pattern, replacement in regex_patterns: + txt_out = re.sub(regex_pattern, replacement, txt_out) + _LOG.debug( + "Applying %s -> %s: before=%s, after=%s", + regex_pattern, + replacement, + txt, + txt_out, + ) + return txt_out + + def purify_amp_references(self, txt: str) -> str: + """ + Remove references to amp from text by applying a series of regex + substitutions. + + Handle these patterns: + 1. Replace path references + - E.g., "amp/helpers/test/..." -> "helpers/test/..." + 2. Replace class references + - E.g., "" -> "" + 3. Replace comment references + - E.g., "# Test created for amp.helpers.test" -> "# Test created for helpers.test" + 4. Replace module references + - E.g., "amp.helpers.test.TestClass" -> "helpers.test.TestClass" + + :param txt: input text containing amp references + :return: text with amp references removed + """ + amp_patterns = [ + # Remove 'amp/' prefix from quoted paths. + (r"'amp/", "'"), + # Remove 'amp/' prefix from path segments. + (r"(?m)(^\s*|\s+)amp/", r"\1"), + # Replace '/amp/' with '/' and '/amp:' with ':' in paths. + (r"(?m)/amp/", "/"), + (r"(?m)/amp:", ":"), + # Remove 'amp.' prefix from class representations and tracebacks. + (r" str: + """ + Remove references to `/app` from text by applying a series of regex + substitutions. + + :param txt: input text containing app references + :return: text with app references removed + """ + app_patterns = [ + # Remove trailing '/app/' references. + (r"(? str: + """ + Replace environment variable values with their variable names. + + :param txt: input text containing environment variable values + :return: text with environment variable values replaced + """ + for env_var in [ + "CSFY_AWS_S3_BUCKET", + "CSFY_ECR_BASE_PATH", + ]: + if env_var in os.environ: + val = os.environ[env_var] + if val == "": + _LOG.debug("Env var '%s' is empty", env_var) + else: + txt = txt.replace(val, f"${env_var}") + _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) + return txt + + def purify_object_representation(self, txt: str) -> str: + """ + Remove references like `at 0x7f43493442e0`. + + :param txt: input text containing object representations + :return: text with object representations standardized + """ + object_patterns = [ + (r"at 0x[0-9A-Fa-f]+", "at 0x"), + (r" id='\d+'>", " id='xxx'>"), + (r"port=\d+", "port=xxx"), + (r"host=\S+ ", "host=xxx "), + ( + r"wall_clock_time=Timestamp\('.*?',", + r"wall_clock_time=Timestamp('xxx',", + ), + ] + txt = self._apply_regex_replacements(txt, object_patterns) + _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) + return txt + + def purify_today_date(self, txt: str) -> str: + """ + Remove today's date like `20220810`. + + :param txt: input text containing dates + :return: text with dates standardized + """ + today_date = datetime.date.today() + today_date_as_str = today_date.strftime("%Y%m%d") + # Replace predict.3.compress_tails.df_out.20220627_094500.YYYYMMDD_171106.csv.gz. + txt = re.sub( + today_date_as_str + r"_\d{6}", + "YYYYMMDD_HHMMSS", + txt, + flags=re.MULTILINE, + ) + txt = re.sub(today_date_as_str, "YYYYMMDD", txt, flags=re.MULTILINE) + return txt + + def purify_white_spaces(self, txt: str) -> str: + """ + Remove trailing white spaces. + + :param txt: input text with whitespace + :return: text with standardized whitespace + """ + txt_new = [] + for line in txt.split("\n"): + line = line.rstrip() + txt_new.append(line) + txt = "\n".join(txt_new) + return txt + + def purify_line_number(self, txt: str) -> str: + """ + Replace line number with `$LINE_NUMBER`. + + :param txt: input text containing line numbers + :return: text with line numbers standardized + """ + txt = re.sub(r"\.py::\d+", ".py::$LINE_NUMBER", txt, flags=re.MULTILINE) + return txt + + def purify_parquet_file_names(self, txt: str) -> str: + """ + Replace UUIDs file names to `data.parquet` in the golden outcomes. + + :param txt: input text containing parquet file names + :return: text with standardized parquet file names + """ + pattern = r""" + [0-9a-f]{32}-[0-9].* # GUID pattern. + (?=\.parquet) # positive lookahead assertion that matches a + # position followed by ".parquet" without + # consuming it. + """ + # TODO(Vlad): Need to change the replacement to `$FILE_NAME` as in the + # `purify_from_environment()` function. For now, some tests are expecting + # `data.parquet` files. + replacement = "data" + # flags=re.VERBOSE allows us to use whitespace and comments in the pattern. + txt = re.sub(pattern, replacement, txt, flags=re.VERBOSE) + return txt + + def purify_helpers(self, txt: str) -> str: + """ + Replace the path `helpers_root.helpers` with `helpers`. + + :param txt: input text containing helper references + :return: text with standardized helper references + """ + txt = re.sub( + r"helpers_root\.helpers\.", "helpers.", txt, flags=re.MULTILINE + ) + txt = re.sub( + r"helpers_root/helpers/", "helpers/", txt, flags=re.MULTILINE + ) + txt = re.sub( + r"helpers_root\.config_root", "config_root", txt, flags=re.MULTILINE + ) + txt = re.sub( + r"helpers_root/config_root/", "config_root/", txt, flags=re.MULTILINE + ) + txt = re.sub( + r"helpers_root/dev_scripts_helpers/", + "dev_scripts_helpers/", + txt, + flags=re.MULTILINE, + ) + return txt + + def purify_docker_image_name(self, txt: str) -> str: + """ + Remove temporary docker image name. + + :param txt: input text containing docker image names + :return: text with standardized docker image names + """ + # Purify command like: + # > docker run --rm ... tmp.latex.edb567be .. + # > ... tmp.latex.aarch64.2f590c86.2f590c86 + pattern = r""" + ^ # Start of line + ( # Start capture group 1 + .*docker.* # Any text containing "docker" + \s+ # One or more whitespace + tmp\.\S+\. # tmp.something. + ) # End capture group 1 + [a-z0-9]{8} # 8 character hex hash + ( # Start capture group 2 + \s+ # One or more whitespace + .* # Rest of the line + ) # End capture group 2 + $ # End of line + """ + txt = re.sub( + pattern, + r"\1xxxxxxxx\2", + txt, + flags=re.MULTILINE | re.VERBOSE, + ) + # Handle patterns like `tmp.latex.aarch64.2f590c86.2f590c86`. + pattern = r""" + ^ # Start of line + ( # Start capture group 1 + .*docker.* # Any text containing "docker" + \s+ # One or more whitespace + tmp\.\S+\.\S+\. # tmp.something.something. + ) # End capture group 1 + [a-z0-9]{8} # 8 character hex hash + \. # Literal dot + [a-z0-9]{8} # Another 8 character hex hash + ( # Start capture group 2 + \s+ # One or more whitespace + .* # Rest of the line + ) # End capture group 2 + $ # End of line + """ + txt = re.sub( + pattern, + r"\1xxxxxxxx\2", + txt, + flags=re.MULTILINE | re.VERBOSE, + ) + return txt + + def purify_file_names(self, file_names: List[str]) -> List[str]: + """ + Express file names in terms of the root of git repo, removing reference + to `amp`. + """ + git_root = hgit.get_client_root(super_module=True) + file_names = [os.path.relpath(f, git_root) for f in file_names] + # Apply amp reference purification to file paths. + file_names = list(map(self.purify_amp_references, file_names)) + return file_names + + +def purify_text(txt: str) -> str: + """ + Purify text by removing environment-specific information and standardizing + output for test comparisons. + """ + purifier = TextPurifier() + return purifier.purify_txt_from_client(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py new file mode 100644 index 000000000..4848ea094 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py @@ -0,0 +1,586 @@ +""" +Import as: + +import helpers.hunit_test_utils as hunteuti +""" + +import abc +import contextlib +import glob +import logging +import os +import re +from typing import Any, Dict, Generator, List, Optional, Tuple +import unittest.mock as mock + +import pytest + +import helpers.hdbg as hdbg +import helpers.henv as henv +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hserver as hserver +import helpers.hstring as hstring +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def get_test_directories(root_dir: str) -> List[str]: + """ + Get paths of all the directories that contain unit tests. + + :param root_dir: the dir to start the search from, e.g. + `/src/cmamp1/helpers` + :return: paths of test directories + """ + paths = [] + for path, _, _ in os.walk(root_dir): + # Iterate over the paths to find the test directories. + if path.endswith("/test"): + paths.append(path) + hdbg.dassert_lte(1, len(paths)) + return paths + + +# ############################################################################# +# UnitTestRenamer +# ############################################################################# + + +class UnitTestRenamer: + """ + Rename a unit test in Python code and the corresponding directories + containing the inputs and the expected outputs. + """ + + @staticmethod + def _check_names(old_test_name: str, new_test_name: str) -> None: + """ + Check if the test names are valid. + + :param old_test_name: the old name of the test + :param new_test_name: the new name of the test + """ + # Assert if the classname does not start with `Test`. + for name in [old_test_name, new_test_name]: + hdbg.dassert( + name.startswith("Test"), + "Invalid test_class_name='%s'. A test class should start with `Test`", + name, + ) + # Assert if the names are the same. + hdbg.dassert_ne(old_test_name, new_test_name) + + @staticmethod + def _process_parameters( + old_test_name: str, + new_test_name: str, + ) -> Dict[str, str]: + """ + Build the processing config with the renaming parameters. + + :param old_test_name: the old name of the test + :param new_test_name: the new name of the test + :return: config for renaming process, i.e. a dictionary which includes the fields: + - `old_class`: old name of the class + - `new_class`: new name of the class + - `old_method`: new name of the method. If empty, only class should be renamed + - `new_method`: new name of the method + """ + # Build the processing config. + config: Dict[str, str] = {} + # Split by "." to separate class name and method name. + split_old_name = old_test_name.split(".") + split_new_name = new_test_name.split(".") + # Check the consistency of the names - they should have the same length. + hdbg.dassert_eq( + len(split_old_name), + len(split_new_name), + "The test names are not consistent; one has a method and the other does not.", + ) + # Check the format of the test name. + hdbg.dassert_in( + len(split_old_name), + [1, 2], + msg="Wrong test name format: it must contain no more than 1 dot", + ) + if len(split_old_name) == 1: + # Class name split by `.` is one element array, e.g. `["TestClassName"]`. + old_class_name, old_method_name = split_old_name[0], "" + new_class_name, new_method_name = split_new_name[0], "" + _LOG.debug( + "Trying to change the name of `{old_test_name}` unit test class to `%s`.", + new_test_name, + ) + else: + # Method name split by `.` is 2 element array, e.g. + # TestClassName.test2` - >`["TestClassName", "test2"]`. + old_class_name, old_method_name = split_old_name + new_class_name, new_method_name = split_new_name + hdbg.dassert_eq( + old_class_name, + new_class_name, + "To change the name of the method, specify the methods of the \ + same class. E.g. `--old TestCache.test1 --new TestCache.new_test1`", + ) + _LOG.debug( + "Trying to change the name of `%s` method of `%s` class to `%s`.", + old_method_name, + old_class_name, + new_method_name, + ) + # Fill the processing parameters. + config["old_class"] = old_class_name + config["old_method"] = old_method_name + config["new_class"] = new_class_name + config["new_method"] = new_method_name + return config + + def __init__( + self, old_test_name: str, new_test_name: str, root_dir: str + ) -> None: + """ + Construct the UnitTestRenamer. + + :param old_test_name: the old name of the test + :param new_test_name: the new name of the test + :param root_dir: the directory to start the search from + """ + # Check if the names of the test are valid. + self._check_names(old_test_name, new_test_name) + # Get the directories containing tests. + self.test_dirs = get_test_directories(root_dir) + # Construct the renaming config. + self.cfg = self._process_parameters(old_test_name, new_test_name) + + def _rename_class( + self, + content: str, + ) -> Tuple[str, int]: + """ + Rename a class in a Python file. + + :param content: the content of the file + :return: the content of the file with the class name replaced, + the number of substitutions replaced + """ + lines = content.split("\n") + docstring_line_indices = hstring.get_docstring_line_indices(lines) + num_replaced = 0 + for ind, line in enumerate(lines): + # Skip if the line is inside a docstring. + if ind not in docstring_line_indices: + # Rename the class. + new_line, num_replaced = re.subn( + rf"class {self.cfg['old_class']}\(", + rf"class {self.cfg['new_class']}(", + line, + ) + if num_replaced != 0: + lines[ind] = new_line + break + content = "\n".join(lines) + return content, num_replaced + + def _rename_method( + self, + content: str, + ) -> Tuple[str, int]: + """ + Rename the method of the class. + + :param content: the content of the file + :return: content of the file with the method renamed, the number + of substitutions made + """ + lines = content.split("\n") + # Flag that informs if the class border was found. + class_found = False + # The number of substitutions made in the content of the file. + num_replaced = 0 + class_pattern = rf"class {self.cfg['old_class']}\(" + method_pattern = rf"def {self.cfg['old_method']}\(" + docstring_line_indices = hstring.get_docstring_line_indices(lines) + for ind, line in enumerate(lines): + # Iterate over the lines of the file to find the specific method of the + # class that should be renamed. + # Skip if the line is inside a docstring. + if class_found and ind not in docstring_line_indices: + if line.startswith("class"): + # Break if the next class started and the method was not found. + break + # Rename the method. + new_line, num_replaced = re.subn( + method_pattern, f"def {self.cfg['new_method']}(", line + ) + if num_replaced != 0: + # Replace the line with method definition. + lines[ind] = new_line + break + else: + if re.search(class_pattern, line): + class_found = True + new_content = "\n".join(lines) + return new_content, num_replaced + + def _rename_in_file( + self, + test_dir: str, + file_path: str, + ) -> None: + """ + Process the file: + + - check if the content of the file contains target class + - change the class name, e.g. `TestClassName` -> `TestClassNameNew` + / change the method name `TestClassName.test2` -> `TestClassName.test_new` + - rename the outcomes if they exist + + :param test_dir: the path to the test directory containing the file, e.g. + `/src/cmamp1/helpers/test` + :param file_path: the path to the file, `/src/cmamp1/helpers/test/test_lib_tasks.py` + """ + content = hio.from_file(file_path) + if not re.search(rf"class {self.cfg['old_class']}\(", content): + # Return if target test class does not appear in file content. + return + if self.cfg["old_method"] == "": + # Rename the class. + content, n_replaced = self._rename_class(content) + if n_replaced != 0: + _LOG.info( + "%s: class `%s` was renamed to `%s`.", + file_path, + self.cfg["old_class"], + self.cfg["new_class"], + ) + else: + # Rename the method of the class. + content, n_replaced = self._rename_method(content) + if n_replaced != 0: + _LOG.info( + "%s: method `%s` of `%s` class was renamed to `%s`.", + file_path, + self.cfg["old_method"], + self.cfg["old_class"], + self.cfg["new_method"], + ) + # Rename the directories that contain target test outcomes. + self.rename_outcomes( + test_dir, + ) + # Write processed content back to file. + hio.to_file(file_path, content) + + def run(self) -> None: + """ + Run the renamer tool on the files under `root_dir`. + """ + # Iterate over test directories. + for path in self.test_dirs: + # Get all Python test files from this directory. + _LOG.debug("Scanning `%s` directory.", path) + search_pattern = os.path.join(path, "test_*.py") + files = glob.glob(search_pattern) + for test_file in files: + self._rename_in_file( + path, + test_file, + ) + + @staticmethod + def _rename_directory(outcome_path_old: str, outcome_path_new: str) -> None: + """ + Rename the outcomes directory and add it to git. + + :param outcome_path_old: the old name of outcome directory, e.g. + `/src/cmamp1/helpers/test/outcomes/TestRename.test_old` + :param outcome_path_new: the new name of outcome directory, e.g. + `/src/cmamp1/helpers/test/outcomes/TestRename.test_new` + """ + cmd = f"mv {outcome_path_old} {outcome_path_new}" + # Rename the directory. + rc = hsystem.system(cmd, abort_on_error=True, suppress_output=False) + _LOG.info( + "Renaming `%s` directory to `%s`. Output log: %s", + outcome_path_old, + outcome_path_new, + rc, + ) + # Add to git new outcome directory and remove the old one. + # The sequence of commands is used because `git mv` does not work + # properly while unit testing. + cmd = f"git add {outcome_path_new} && git rm -r {outcome_path_old}" + hsystem.system(cmd, abort_on_error=True, suppress_output=False) + + def _process_outcomes_dir( + self, outcome_dir: str, outcomes_path: str + ) -> bool: + """ + Process the directory containing target test outcomes. + + The stages of processing are: + - generate the new name of the directory + - rename and add it to git + + :param outcome_dir: the name of the directory containing the outcomes + :param outcomes_path: the path to the outcomes directory + :return: if the outcomes were renamed + """ + # Contruct the path to outcomes directory. + outcome_path_old = os.path.join(outcomes_path, outcome_dir) + # Construct old and new target dir names, e.g. + # `TestOldName.` and `TestNewName.` if class should be renamed or + # `TestOldName.test_old` and `TestOldName.test_new` if method should be renamed. + old_target = ".".join([self.cfg["old_class"], self.cfg["old_method"]]) + new_target = ".".join([self.cfg["new_class"], self.cfg["new_method"]]) + if self.cfg["old_method"] == "" and outcome_dir.startswith(old_target): + # Check if the class should be renamed, e.g. + # if `outcome_dir` is `TestOld.test1` and `old_target` is `TestOld.`. + # Split old directory name - the part before "." is the class name. + class_method = outcome_dir.split(".") + # Replace old class name with the new one, `["TestOld", "test1"]` + # -> `["TestNew", "test1"]`. + class_method[0] = self.cfg["new_class"] + # Construct the new outcome directory name -> `TestNew.test1`. + outcome_name_new = ".".join(class_method) + outcome_path_new = os.path.join(outcomes_path, outcome_name_new) + elif self.cfg["old_method"] != "" and outcome_dir == old_target: + # Check if the dir should be renamed. E.g. given that `old_target` + # is `TestOld.test1_new`, then if `outcome_dir` is `TestOld.test1`, + # it should not be renamed, and if `outcome_dir` is `TestOld.test1_new`, + # it should be renamed. + outcome_path_new = os.path.join(outcomes_path, new_target) + else: + return False + # Rename the directory and add it to git. + self._rename_directory(outcome_path_old, outcome_path_new) + return True + + def rename_outcomes( + self, + path: str, + ) -> None: + """ + Rename the directory that contains test outcomes. + + :param path: the path to the test directory, e.g. + `cmamp1/helpers/test/` + """ + outcomes_path = os.path.join(path, "outcomes") + dir_items = os.listdir(outcomes_path) + # Get the list of outcomes directories. + outcomes = [ + dir_name + for dir_name in dir_items + if os.path.isdir(os.path.join(outcomes_path, dir_name)) + ] + renamed = False + for outcome_dir in outcomes: + renamed = self._process_outcomes_dir(outcome_dir, outcomes_path) + if not renamed: + _LOG.info( + "No outcomes for `%s` were found in `%s`.", + self.cfg["old_class"], + outcomes_path, + ) + + +# ############################################################################# +# Obj_to_str_TestCase +# ############################################################################# + + +class Obj_to_str_TestCase(abc.ABC): + """ + Test case for testing `obj_to_str()` and `obj_to_repr()`. + """ + + def helper(self, obj: Any, method_name: str, expected_str: str) -> None: + """ + Common method for testing `__repr__` and `__str__`. + """ + hdbg.dassert_is_not(obj, None) + actual_str = getattr(obj, method_name)() + self.assert_equal( # type: ignore + actual_str, expected_str, purify_text=True, fuzzy_match=True + ) + + def run_test_repr(self, obj: Any, expected_str: str) -> None: + """ + Check that `__repr__` is printed correctly. + """ + method_name = "__repr__" + self.helper(obj, method_name, expected_str) + + def run_test_str(self, obj: Any, expected_str: str) -> None: + """ + Check that `__str__` is printed correctly. + """ + method_name = "__str__" + self.helper(obj, method_name, expected_str) + + def run_test_to_config_str(self, obj: Any, expected_str: str) -> None: + """ + Check that `to_config_str()` is printed correctly. + """ + method_name = "to_config_str" + self.helper(obj, method_name, expected_str) + + +# ############################################################################# + + +def _get_repo_short_name() -> str: + dir_name = "." + include_host_name = False + repo_name = hgit.get_repo_full_name_from_dirname(dir_name, include_host_name) + _LOG.debug("repo_name=%s", repo_name) + # ck/cmamp + short_repo_name = repo_name.split("/")[1] + _LOG.debug("short_repo_name=%s", short_repo_name) + return short_repo_name + + +def execute_only_in_target_repo(target_name: str) -> None: + repo_short_name = _get_repo_short_name() + if repo_short_name != target_name: + pytest.skip(f"Only run on {target_name} and not {repo_short_name}") + + +# TODO(gp): Remove and use pytest.skipif(). +def execute_only_on_ci() -> None: + is_inside_ci_ = hserver.is_inside_ci() + if not is_inside_ci_: + pytest.skip("Only run in CI") + + +def execute_only_on_dev4() -> None: + is_dev4_ = hserver.is_dev4() + if not is_dev4_: + pytest.skip("Only run on dev4") + + +def execute_only_on_dev_csfy() -> None: + is_dev_csfy_ = hserver.is_dev_csfy() + if not is_dev_csfy_: + pytest.skip("Only run on dev CSFY") + + +def execute_only_on_mac(*, version: Optional[str] = None) -> None: + is_host_mac_ = hserver.is_host_mac() + if version: + is_host_mac_ = hserver.is_host_mac_version(version) + if not is_host_mac_: + pytest.skip(f"Only run on Mac with version={version}") + + +def check_env_to_str( + self_: Any, expected: str, *, skip_secrets_vars: bool = False +) -> None: + actual = henv.env_to_str(system_signature=False) + actual = hunitest.filter_text("get_name", actual) + actual = hunitest.filter_text("get_repo_map", actual) + actual = hunitest.filter_text("CSFY_HOST_", actual) + if skip_secrets_vars: + # TODO(gp): Difference between amp and cmamp. + actual = hunitest.filter_text( + "AM_AWS_|CSFY_AWS_|GH_ACTION_ACCESS_TOKEN", actual + ) + self_.assert_equal(actual, expected, fuzzy_match=True, purify_text=True) + + +def get_test_file_for_source(source_file: str) -> Optional[str]: + """ + Map a source Python file to its corresponding test file. + + E.g., helpers/hdbg.py -> helpers/test/test_hdbg.py + + :param source_file: path to a source Python file + :return: path to corresponding test file if it exists and source is not + already a test file; None otherwise + """ + base_name = os.path.basename(source_file) + is_test = ( + "test" in source_file.split("/") + and base_name.startswith("test_") + and source_file.endswith(".py") + ) + if is_test: + return None + dir_name = os.path.dirname(source_file) + test_file = os.path.join(dir_name, "test", f"test_{base_name}") + if os.path.exists(test_file): + return test_file + return None + + +# ############################################################################# +# System call capture utilities +# ############################################################################# + + +@contextlib.contextmanager +def capture_system_calls( + side_effect: Optional[Any] = None, +) -> Generator[List[Dict[str, Any]], None, None]: + """ + Context manager that captures all system calls to `subprocess.run()` and + `hsystem._system()`, returning them as a list of invocations. + + Each invocation is a dict with 'function', 'args', and 'kwargs' keys. + + :param side_effect: Exception or return value to use for mocked calls + :return: List of invocations, each as {'function': str, 'args': tuple, + 'kwargs': dict} + + Example: + ``` + with capture_system_calls() as invocations: + my_function() + # Check captured calls. + assert len(invocations) == 1 + assert invocations[0]['function'] == 'subprocess.run' + ``` + """ + invocations: List[Dict[str, Any]] = [] + + def mock_subprocess_run(*args: Any, **kwargs: Any) -> Any: + invocations.append( + { + "function": "subprocess.run", + "args": args, + "kwargs": kwargs, + } + ) + if side_effect is not None: + if isinstance(side_effect, type) and issubclass( + side_effect, BaseException + ): + raise side_effect() + elif isinstance(side_effect, BaseException): + raise side_effect + return None + + def mock_hsystem(*args: Any, **kwargs: Any) -> Any: + invocations.append( + { + "function": "hsystem._system", + "args": args, + "kwargs": kwargs, + } + ) + if side_effect is not None: + if isinstance(side_effect, type) and issubclass( + side_effect, BaseException + ): + raise side_effect() + elif isinstance(side_effect, BaseException): + raise side_effect + return (0, "") # Return code and output + + with mock.patch("subprocess.run", side_effect=mock_subprocess_run): + with mock.patch("helpers.hsystem._system", side_effect=mock_hsystem): + yield invocations diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py new file mode 100644 index 000000000..18aea68c5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py @@ -0,0 +1,300 @@ +""" +Import as: + +import helpers.hversion as hversio +""" + +# This code implements version control for code +# The code version is used in two circumstances: +# 1) when any code using `hdbg.py` (which is included everywhere) starts in +# order to verify that the running code and the container in which the code +# is running are compatible +# 2) when a container is built to know what version of the code was used to build +# it + +import functools +import logging +import os +import re +from typing import List, Optional, cast + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + +_LOG = logging.getLogger(__name__) + + +_INFO = "\033[36mINFO\033[0m" +_WARNING = "\033[33mWARNING\033[0m" +_ERROR = "\033[31mERROR\033[0m" +# +_VERSION_RE = r"\d+\.\d+\.\d+" + + +# Copied from helpers.hgit to avoid circular dependencies. + + +@functools.lru_cache() +def _is_inside_submodule(git_dir: str = ".") -> bool: + """ + Return whether a dir is inside a Git submodule or a Git supermodule. + + We determine this checking if the current Git repo is included + inside another Git repo. + """ + cmd = [] + # - Find the git root of the current directory + # - Check if the dir one level up is a valid Git repo + # Go to the dir. + cmd.append(f"cd {git_dir}") + # > cd im/ + # > git rev-parse --show-toplevel + # /Users/saggese/src/.../amp + cmd.append('cd "$(git rev-parse --show-toplevel)/.."') + # > git rev-parse --is-inside-work-tree + # true + cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") + cmd_as_str = " && ".join(cmd) + rc = hsystem.system(cmd_as_str, abort_on_error=False) + ret: bool = rc == 0 + return ret + + +@functools.lru_cache() +def _get_client_root(super_module: bool) -> str: + """ + Return the full path of the root of the Git client. + + E.g., `/Users/saggese/src/.../amp`. + + :param super_module: if True use the root of the Git super_module, + if we are in a submodule. Otherwise use the Git sub_module root + """ + if super_module and _is_inside_submodule(): + # https://stackoverflow.com/questions/957928 + # > cd /Users/saggese/src/.../amp + # > git rev-parse --show-superproject-working-tree + # /Users/saggese/src/... + cmd = "git rev-parse --show-superproject-working-tree" + else: + # > git rev-parse --show-toplevel + # /Users/saggese/src/.../amp + cmd = "git rev-parse --show-toplevel" + # TODO(gp): Use system_to_one_line(). + _, out = hsystem.system_to_string(cmd) + out = out.rstrip("\n") + hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") + client_root: str = os.path.realpath(out) + return client_root + + +# End copy. + + +def get_changelog_version( + container_dir_name: str, *, file_name: str = None +) -> Optional[str]: + """ + Return latest version from changelog.txt file. + + :param container_dir_name: container directory relative to the root + directory + :param file_name: changelog file name + """ + version: Optional[str] = None + supermodule = True + root_dir = _get_client_root(supermodule) + # Note: for `amp` as submodule one should pass `container_dir_name` relative + # to the root, e.g., `amp/optimizer` and not just `optimizer`. + hdbg.dassert_ne(container_dir_name, "") + if file_name is None: + file_name = "changelog.txt" + changelog_file = os.path.join(root_dir, container_dir_name, file_name) + hdbg.dassert_file_exists(changelog_file) + changelog = hio.from_file(changelog_file) + match = re.search(_VERSION_RE, changelog) + if match: + version = match.group() + return version + + +def get_container_version() -> Optional[str]: + """ + Return the container version. + + :return: container code version from the env var + """ + container_version: Optional[str] = None + if hserver.is_inside_docker(): + env_var = "AM_CONTAINER_VERSION" + if env_var not in os.environ: + # This can happen when GH Actions pull the image using invoke + # inside their container (but not inside ours), thus there is no + # AM_CONTAINER_VERSION. + print( + _WARNING + + f": The env var {env_var} should be defined when running inside a" + " container" + ) + else: + # We are running inside a container. + # Keep the code and the container in sync by versioning both and + # requiring to be the same. + container_version = os.environ["AM_CONTAINER_VERSION"] + return container_version + + +def _check_version(code_version: str, container_version: str) -> bool: + """ + Check whether the code version and the container version are the same. + + :param code_version: code version from the changelog + :param container_version: container code version from the env var + :return: whether the versions are the same or not + """ + # Since the code version from the changelog is extracted with the + # `_VERSION_RE` regex, we apply the same regex to the container version + # to keep the representations comparable. + match = re.search(_VERSION_RE, container_version) + hdbg.dassert( + match, + ( + "Invalid format of the container code version '%s'; " + "it should contain a number like '1.0.0'" + ), + container_version, + ) + container_version = match.group() # type: ignore + # Check if the versions are the same. + is_ok = container_version == code_version + if not is_ok: + msg = f""" + ----------------------------------------------------------------------------- + This code is not in sync with the container: + code_version='{code_version}' != container_version='{container_version}' + ----------------------------------------------------------------------------- + You need to: + - merge origin/master into your branch with `invoke git_merge_master` + - pull the latest container with `invoke docker_pull` + """ + msg = hprint.dedent(msg) + # Highlight in red. + # TODO(gp): Use the proper function, if dependencies allow it. + msg = f"\033[31m{msg}\033[0m" + print(msg) + if False: + raise RuntimeError(msg) + return is_ok + + +def check_version(container_dir_name: str) -> None: + """ + Check that the code and container code have compatible version, otherwise + raises `RuntimeError`. + + :param container_dir_name: container directory relative to the root + directory + """ + # TODO(gp): -> CK_SKIP_VERSION_CHECK. + if "SKIP_VERSION_CHECK" in os.environ: + # Skip the check altogether. + return + # Get code version. + code_version = get_changelog_version(container_dir_name) + container_version = get_container_version() + # Check version, if possible. + if container_version is None: + # No need to check. + return + code_version = cast(str, code_version) + _check_version(code_version, container_version) + + +def get_latest_changelog_entry( + changelog_path: str, +) -> dict: + """ + Parse the latest changelog entry from a changelog file. + + :param changelog_path: path to the changelog.txt file + :return: dict with keys: 'version', 'date', 'changes' (list of + change lines) + """ + hdbg.dassert_file_exists(changelog_path) + changelog = hio.from_file(changelog_path) + lines = changelog.split("\n") + version = None + date = None + changes = [] + in_entry = False + for line in lines: + line = line.rstrip() + # Check for version header (e.g., "# csfy-2.2.0"). + version_match = re.match(r"^#\s+(.+)$", line) + if version_match: + if version is None: + # This is the first (latest) entry. + version = version_match.group(1) + in_entry = True + else: + # We've reached the next entry, stop. + break + elif in_entry: + # Check for date (e.g., "- 2025-10-06"). + date_match = re.match(r"^-\s+(\d{4}-\d{2}-\d{2})$", line) + if date_match and date is None: + date = date_match.group(1) + # Collect change lines. + elif line.startswith("- ") and not date_match: + changes.append(line) + return {"version": version, "date": date, "changes": changes} + + +def bump_version(version: str, *, bump_type: str = "minor") -> str: + """ + Bump a semantic version number. + + :param version: version string in format X.Y.Z (e.g., "2.2.0") + :param bump_type: type of version bump - "major", "minor", or "patch" + :return: bumped version string + """ + hdbg.dassert_in(bump_type, ("major", "minor", "patch")) + # Parse version using regex. + match = re.match(r"^(\d+)\.(\d+)\.(\d+)$", version) + hdbg.dassert( + match, + f"Invalid version format: '{version}'. Expected X.Y.Z format.", + ) + major, minor, patch = map(int, match.groups()) + # Bump according to type. + if bump_type == "major": + major += 1 + minor = 0 + patch = 0 + elif bump_type == "minor": + minor += 1 + patch = 0 + else: # patch + patch += 1 + return f"{major}.{minor}.{patch}" + + +def get_container_version_info() -> str: + txt_tmp: List[str] = [] + # + container_version = str(get_container_version()) + txt_tmp.append(f"container_version='{container_version}'") + # + container_dir_name = "." + changelog_version = str(get_changelog_version(container_dir_name)) + txt_tmp.append(f"changelog_version='{changelog_version}'") + # + txt = hprint.to_info("Container version", txt_tmp) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py new file mode 100644 index 000000000..ea8392f6e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py @@ -0,0 +1,125 @@ +""" +Import as: + +import helpers.hwall_clock_time as hwacltim +""" + +# This should have no dependencies besides Python standard libraries since it's used +# in `helpers/hlogging.py`. + +import datetime +import logging +from typing import Callable, Optional, Union + +_LOG = logging.getLogger(__name__) + +# ############################################################################# +# Simulated real time +# ############################################################################# + +# Copied from `helpers/hdatetime.py` +# +# Function returning the current (true, replayed, simulated) wall-clock time as a +# timestamp. +_GetWallClockTime = Callable[[], "pd.Timestamp"] # noqa: F821 + +_get_wall_clock_time_func: Optional[_GetWallClockTime] = None + + +def set_wall_clock_time(get_wall_clock_time_func_: _GetWallClockTime) -> None: + """ + Set the global function to retrieve the wall clock time. + """ + assert callable(get_wall_clock_time_func_) + global _get_wall_clock_time_func + _get_wall_clock_time_func = get_wall_clock_time_func_ + + +def get_wall_clock_time_func() -> Optional[_GetWallClockTime]: + """ + Retrieve the global function retrieve the wall clock time. + """ + return _get_wall_clock_time_func + + +# We don't want to import `Pandas` just for a type. +def get_wall_clock_time() -> Optional["pd.Timestamp"]: # noqa: F821 + """ + Return the wall clock time (according to the set function) or `None` if no + function was set. + """ + func = _get_wall_clock_time_func + if func is None: + timestamp = None + else: + timestamp = func() + return timestamp + + +# ############################################################################# +# Real-world / machine real time. +# ############################################################################# + + +# TODO(Sameep): Redundant fuction replace by `hdatetime.timestamp_to_str()`. +def to_timestamp_str( + timestamp: "pd.Timestamp", # noqa: F821 + include_msec: bool = False, +) -> str: + if include_msec: + # Chop the last 4 miliseconds digits. This is needed for CcxtBroker_v2. + return timestamp.strftime("%Y%m%d_%H%M%S%f")[:-4] + else: + return timestamp.strftime("%Y%m%d_%H%M%S") + + +# This is redundant with `hdatetime.get_current_time()` and +# `hdateti.get_current_timestamp_as_string()` but we keep them to simplify +# dependencies. +def get_machine_wall_clock_time( + *, + as_str: bool = False, + include_msec: bool = False, +) -> Union[str, datetime.datetime]: + ret = datetime.datetime.utcnow() + if as_str: + ret = to_timestamp_str(ret, include_msec) + return ret + + +# ############################################################################# +# Current bar being processed. +# ############################################################################# + + +_CURR_BAR_TIMESTAMP: Optional["pd.Timestamp"] = None # noqa: F821 + + +def reset_current_bar_timestamp() -> None: + global _CURR_BAR_TIMESTAMP + _LOG.debug("Reset") + _CURR_BAR_TIMESTAMP = None + + +def set_current_bar_timestamp(timestamp: "pd.Timestamp") -> None: # noqa: F821 + _LOG.debug("timestamp=%s", timestamp) + global _CURR_BAR_TIMESTAMP + if _CURR_BAR_TIMESTAMP is not None: + # TODO(Grisha): should we relax the check by using + # `<=` instead of `<`? + assert _CURR_BAR_TIMESTAMP < timestamp, ( + "Bar timestamp can only move forward: " + + f"{_CURR_BAR_TIMESTAMP} <= {timestamp}" + ) + _CURR_BAR_TIMESTAMP = timestamp + + +def get_current_bar_timestamp( + *, + as_str: bool = False, + include_msec: bool = False, +) -> Optional[Union[str, "pd.Timestamp"]]: # noqa: F821 + ret = _CURR_BAR_TIMESTAMP + if _CURR_BAR_TIMESTAMP and as_str: + ret = to_timestamp_str(ret, include_msec=include_msec) + return ret diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py new file mode 100644 index 000000000..4f740f572 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py @@ -0,0 +1,156 @@ +""" +When this module is imported certain annoying warnings are disabled. + +Import as: + +import helpers.hwarnings as hwarnin +""" + +if False: + _WARNING = "\033[33mWARNING\033[0m" + print(f"{_WARNING}: Disabling annoying warnings") + +# Avoid dependency from other `helpers` modules, such as `helpers.hprint`, to +# prevent import cycles. + +import warnings + +# From https://docs.python.org/3/library/warnings.html + +# TODO(gp): For some reason "once" doesn't work, so we ignore all of the warnings. +action = "ignore" + +try: + import statsmodels # noqa: F401 + + _HAS_STATSMODELS = True +except ImportError: + _HAS_STATSMODELS = False + + +if _HAS_STATSMODELS: + # /venv/lib/python3.8/site-packages/statsmodels/tsa/stattools.py:1910: + # InterpolationWarning: The test statistic is outside of the range of p-values + # available in the look-up table. The actual p-value is greater than the + # p-value returned. + from statsmodels.tools.sm_exceptions import InterpolationWarning + + # warnings.simplefilter("ignore", category=InterpolationWarning) + + # /venv/lib/python3.8/site-packages/statsmodels/tsa/stattools.py:1906: + # InterpolationWarning: The test statistic is outside of the range of p-values + # available in the look-up table. The actual p-value is smaller than the + # p-value returned. + warnings.filterwarnings( + action, + category=InterpolationWarning, + module=".*statsmodels.*", + lineno=1906, + append=False, + ) + + warnings.filterwarnings( + action, + category=InterpolationWarning, + module=".*statsmodels.*", + lineno=1910, + append=False, + ) + + +# /venv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: +# DeprecationWarning: `should_run_async` will not call `transform_cell` +# automatically in the future. Please pass the result to `transformed_cell` +# argument and any exception that happen during thetransform in +# `preprocessing_exc_tuple` in IPython 7.17 and above. +# and should_run_async(code) +warnings.filterwarnings( + action, + category=DeprecationWarning, + module=".*ipykernel.*", + lineno=283, + append=False, +) + + +# TODO(gp): Add this TqdmExperimentalWarning + +try: + import pandas as pd + + _HAS_PANDAS = True +except ImportError: + _HAS_PANDAS = False + + +if _HAS_PANDAS: + pd.set_option("mode.chained_assignment", None) + # TODO(gp): We should fix the issues and re-enable. + # See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy + # row["net_cost"] -= cost + # /app/amp/oms/order_processing/order_processor.py:376: SettingWithCopyWarning: + # A value is trying to be set on a copy of a slice from a DataFrame + + # /venv/lib/python3.8/site-packages/pandas/io/sql.py:761: UserWarning: pandas + # only support SQLAlchemy connectable(engine/connection) ordatabase string URI or + # sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider + # using SQLAlchemy + # + # This seems a false alarm: + # https://github.com/pandas-dev/pandas/issues/45660#issuecomment-1077355514 + warnings.filterwarnings( + action, + category=UserWarning, + module=".*pandas.*", + lineno=761, + append=False, + ) + + # run_leq_node: 38%|███▊ | 3/8 [00:05<00:09, 1.98s/it]/app/amp/helpers/hdbg.py:309: PerformanceWarning: indexing past lexsort depth may impact performance. + # cond = value in valid_values + warnings.filterwarnings( + action, + category=pd.errors.PerformanceWarning, + module=".*hdbg.py.*", + lineno=309, + append=False, + ) + + # run_leq_node: 0%| | 0/8 [00:00 str: + """ + Get the shared configs S3 bucket. + + :param environment: environment to get the shared configs for + :return: shared configs S3 bucket + """ + hdbg.dassert_in(environment, ["prod", "preprod", "test"]) + bucket_name = hrecouti.get_repo_config().get_shared_configs_bucket_name( + environment + ) + hdbg.dassert_is_not( + bucket_name, + None, + f"Shared configs bucket is not defined in `repo_config.yaml` for environment: {environment}", + ) + return bucket_name + + +def _get_ecs_task_definition_template(environment: str) -> Dict[str, Any]: + """ + Get the ECS task definition template. + + :return: ECS task definition template + """ + s3_bucket = _get_shared_configs_s3_bucket(environment) + s3_path = f"{s3_bucket}/{environment}/templates/ecs/ecs_task_definition_template.json" + hs3.dassert_is_s3_path(s3_path) + task_definition_config = hs3.from_file( + s3_path, aws_profile=haws.AWS_PROFILE[environment] + ) + task_definition_config = json.loads(task_definition_config) + return task_definition_config + + +def _get_efs_mount_config_template(environment: str) -> Dict[str, Any]: + """ + Get the EFS mount config template. + + :return: EFS mount config template + """ + s3_bucket = _get_shared_configs_s3_bucket(environment) + s3_path = ( + f"{s3_bucket}/{environment}/templates/efs/efs_mount_config_template.json" + ) + hs3.dassert_is_s3_path(s3_path) + efs_config = hs3.from_file( + s3_path, aws_profile=haws.AWS_PROFILE[environment] + ) + efs_config = json.loads(efs_config) + return efs_config + + +def _set_task_definition_config( + task_definition_config: Dict, + task_definition_name: str, + region: str, + environment: str, +) -> Dict[str, Any]: + """ + Update template of ECS task definition with concrete values. + + :param task_definition_config: task definition config template + :param task_definition_name: name of the task definition + :param region: region to create the task definition in + :return: full formed task definition config dictionary + """ + # Replace placeholder values inside container definition + # from the template with concrete values. + # We use single container inside our task definition and + # the convention is to set the same name as the task + # definition itself. + task_definition_config["containerDefinitions"][0]["name"] = ( + task_definition_name + ) + # Set placeholder image URL. + # Get the base registry URL in the base region. + base_registry_url = hrecouti.get_repo_config().get_container_registry_url() + # Build the region-specific ECR registry URL for the target region. + # ECR registry URL format: `{account_id}.dkr.ecr.{region}.amazonaws.com`. + account_id = base_registry_url.split(".")[0] + registry_url = f"{account_id}.dkr.ecr.{region}.amazonaws.com" + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + # Make sure that the ECR replication is configured for the target region, + # so images are available in any new regions. + task_definition_config["containerDefinitions"][0]["image"] = ( + _IMAGE_URL_TEMPLATE.format(registry_url, image_name) + ) + # Set log configuration options. + log_config_opts = copy.deepcopy(_TASK_DEFINITION_LOG_OPTIONS_TEMPLATE) + log_config_opts["awslogs-group"] = log_config_opts["awslogs-group"].format( + task_definition_name + ) + log_config_opts["awslogs-region"] = region + task_definition_config["containerDefinitions"][0]["logConfiguration"][ + "options" + ] = log_config_opts + # Index is based on the order of the environment variables in the template. + # Set environment variable `CSFY_ECR_BASE_PATH`. + task_definition_config["containerDefinitions"][0]["environment"][0][ + "value" + ] = registry_url + # Set environment variable `CSFY_AWS_DEFAULT_REGION`. + task_definition_config["containerDefinitions"][0]["environment"][1][ + "value" + ] = region + # Configure access to EFS. + efs_config = _get_efs_mount_config_template(environment) + task_definition_config["volumes"] = efs_config[region]["volumes"] + task_definition_config["containerDefinitions"][0]["mountPoints"] = ( + efs_config[region]["mountPoints"] + ) + return task_definition_config + + +def _register_task_definition( + task_definition_name: str, region: str, environment: str +) -> None: + """ + Register a new ECS task definition. + + :param task_definition_name: name of the new task definition. + :param config_file: path to the JSON file containing the task + definition configuration. + :param region: region to create the task definition in + :param environment: environment to create the task definition in + """ + task_definition_config = _get_ecs_task_definition_template(environment) + client = haws.get_ecs_client(haws.AWS_PROFILE[environment], region=region) + # Prevent overwriting existing task definition if it exists. + if haws.is_task_definition_exists(task_definition_name, region=region): + _LOG.info( + "Task definition %s already exists in region %s", + task_definition_name, + region, + ) + return + # + task_definition_config = _set_task_definition_config( + task_definition_config, task_definition_name, region, environment + ) + client.register_task_definition( + family=task_definition_name, + taskRoleArn=task_definition_config.get("taskRoleArn", ""), + executionRoleArn=task_definition_config["executionRoleArn"], + networkMode=task_definition_config["networkMode"], + containerDefinitions=task_definition_config["containerDefinitions"], + volumes=task_definition_config.get("volumes", []), + placementConstraints=task_definition_config.get( + "placementConstraints", [] + ), + requiresCompatibilities=task_definition_config[ + "requiresCompatibilities" + ], + cpu=task_definition_config["cpu"], + memory=task_definition_config["memory"], + ) + _LOG.info( + "Registered new task definition: %s in region %s", + task_definition_name, + region, + ) + + +def aws_update_ecs_task_definition( + *, + task_definition: str, + image_tag: str, + region: str, + environment: str, +) -> None: + """ + Update an existing ECS task definition. + + :param task_definition: the name of the ECS task definition for + which an update to container image URL is made, e.g. cmamp-test + :param image_tag: the hash of the new candidate image, e.g. + 13538588e + :param region: region to update the task definition in + """ + hdbg.dassert_in(region, hs3.AWS_REGIONS) + old_image_url = haws.get_task_definition_image_url( + task_definition, environment=environment, region=region + ) + # Edit container version, e.g. cmamp:prod-12a45 - > cmamp:prod-12b46`. + new_image_url = re.sub("prod-(.+)$", f"prod-{image_tag}", old_image_url) + haws.update_task_definition( + task_definition, new_image_url, region=region, environment=environment + ) + + +@task +def aws_create_test_task_definition( + ctx, + issue_id: Optional[int] = None, + region: str = hs3.AWS_EUROPE_REGION_1, +) -> None: + """ + Create a new ECS task definition. + + :param issue_id: issue ID to create the task definition for + :param region: region to create the task definition in + """ + _ = ctx + hlitauti.report_task() + # Check if the `issue_id` provided is valid. + hdbg.dassert_is_not(issue_id, None, "issue_id is required") + is_valid_issue_id = str(issue_id).isdigit() + hdbg.dassert(is_valid_issue_id, f"issue_id '{issue_id}' must be an integer") + # Check if the `region` provided is valid. + hdbg.dassert_in(region, hs3.AWS_REGIONS) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-test-{issue_id}" + # Register task definition. + _register_task_definition( + task_definition_name, region=region, environment="test" + ) + + +@task +def aws_create_preprod_task_definition( + ctx, + region: str = hs3.AWS_EUROPE_REGION_1, +) -> None: + """ + Create a new ECS task definition for preprod environment. + + :param region: region to create the task definition in + """ + _ = ctx + hlitauti.report_task() + hdbg.dassert_in(region, hs3.AWS_REGIONS) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-preprod" + # Register task definition. + _register_task_definition( + task_definition_name, region=region, environment="preprod" + ) + + +@task +def aws_create_prod_task_definition( + ctx, + region: str = hs3.AWS_US_REGION_1, +) -> None: + """ + Create a new ECS task definition. + + :param region: region to create the task definition in + """ + _ = ctx + hlitauti.report_task() + hdbg.dassert_in(region, hs3.AWS_REGIONS) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-prod" + # Register task definition. + _register_task_definition( + task_definition_name, region=region, environment="prod" + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py new file mode 100644 index 000000000..111fa2815 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py @@ -0,0 +1,104 @@ +""" +Import as: + +import helpers.lib_tasks_bash as hlitabas +""" + +import logging +import os + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hfile_tree as hfiltree +import helpers.hsystem as hsystem +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): GFI: Unit test. +@task +def bash_print_path(ctx): # type: ignore + """ + Print the bash path. + """ + _ = ctx + cmd = r"echo $PATH | sed 's/:/\n/g'" + _, ret = hsystem.system_to_string(cmd) + paths = ret.split("\n") + paths.sort() + # + all_paths = [] + # Remove empty lines. + for path in paths: + if path.strip() == "": + _LOG.error("Empty path: '%s'", path) + continue + if not os.path.exists(path): + _LOG.error("Dir doesn't exist: '%s'", path) + continue + if not os.path.isdir(path): + _LOG.error("Not a dir: '%s'", path) + continue + # TODO(gp): Make it efficient. + if paths.count(path) > 1: + _LOG.error("Duplicate path: '%s'", path) + continue + all_paths.append(path) + # Print the paths. + _LOG.info("Valid paths:") + for path in all_paths: + print(path) + + +@task +def bash_print_tree( # type: ignore + ctx, + path=".", + depth=0, + clean=False, + include_tests=False, + include_python=False, + only_dirs=False, + output="", +): + """ + Print a directory tree, and optionally update or create a markdown file. + + ``` + # To print tree for current directory: + > i bash_print_tree + + # Limit depth to 2 and include test files: + > i bash_print_tree --path="devops" --depth=2 --include-tests + + # Include python files: + > i bash_print_tree --path="devops" --include-python + + # Only show directories: + > i bash_print_tree --path="devops" --only-dirs + + # Write the tree to file, preserving comments: + > i bash_print_tree --path="devops" --output="README.md" + ``` + + :param path: directory path to traverse + :param depth: maximum depth to traverse + :param clean: clean untracked files in directory + :param include_tests: include test files or directories + :param include_python: include python files + :param only_dirs: only show directories + :param output: path of the markdown file to create or update + """ + _ = ctx + hdbg.dassert_lte(0, depth, "Depth must be non-negative: %s", depth) + if clean: + cmd = "git clean -fd" + hlitauti.run(ctx, cmd) + tree = hfiltree.generate_tree( + path, depth, include_tests, include_python, only_dirs, output + ) + print(tree) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py new file mode 100644 index 000000000..f7dcadc54 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py @@ -0,0 +1,1590 @@ +""" +Import as: + +import helpers.lib_tasks_docker as hlitadoc +""" + +import functools +import getpass +import logging +import os +import re +from typing import Any, Dict, List, Optional, Union, cast + +# TODO(gp): We should use `pip install types-PyYAML` to get the mypy stubs. +import yaml +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hdict as hdict +import helpers.hdocker as hdocker +import helpers.henv as henv +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hsecrets as hsecret +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hversion as hversio +import helpers.lib_tasks_utils as hlitauti +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +# ############################################################################# +# Basic Docker commands. +# ############################################################################# + + +def _get_docker_exec(sudo: bool) -> str: + docker_exec = "docker" + if sudo: + docker_exec = "sudo " + docker_exec + return docker_exec + + +# //////////////////////////////////////////////////////////////////////////// +# Docker login +# //////////////////////////////////////////////////////////////////////////// + + +@functools.lru_cache() +def _get_aws_cli_version() -> int: + # > aws --version + # aws-cli/1.19.49 Python/3.7.6 Darwin/19.6.0 botocore/1.20.49 + # aws-cli/1.20.1 Python/3.9.5 Darwin/19.6.0 botocore/1.20.106 + cmd = "aws --version" + res = hsystem.system_to_one_line(cmd)[1] + # Parse the output. + m = re.match(r"aws-cli/((\d+)\.\d+\.\d+)\s", res) + hdbg.dassert_is_not(m, None, "Can't parse '%s'", res) + assert m is not None + version = m.group(1) + _LOG.debug("version=%s", version) + major_version = int(m.group(2)) + _LOG.debug("major_version=%s", major_version) + return major_version + + +def _check_docker_login(repo_name: str) -> bool: + """ + Check if we are already logged in to the Docker registry `repo_name`. + """ + file_name = os.path.join(os.environ["HOME"], ".docker/config.json") + json_data = hio.from_json(file_name) + # > more ~/.docker/config.json + # ``` + # { + # "auths": { + # "623860924167.dkr.ecr.eu-north-1.amazonaws.com": {}, + # "665840871993.dkr.ecr.us-east-1.amazonaws.com": {}, + # "https://index.docker.io/v1/": {} + # }, + # ``` + _LOG.debug("json_data=%s", json_data) + is_logged = any(repo_name in val for val in json_data["auths"].keys()) + return is_logged + + +def _docker_login_dockerhub() -> None: + """ + Log into the Docker Hub which is a public Docker image registry. + """ + # Check if we are already logged in to the target registry. + # TODO(gp): Enable caching https://github.com/causify-ai/helpers/issues/20 + use_cache = False + if use_cache: + is_logged = _check_docker_login("623860924167.dkr.ecr") + if is_logged: + _LOG.warning("Already logged in to the target registry: skipping") + return + _LOG.info("Logging in to the target registry") + secret_id = "causify_dockerhub" + secret = hsecret.get_secret(secret_id) + username = hdict.typed_get(secret, "username", expected_type=str) + password = hdict.typed_get(secret, "password", expected_type=str) + cmd = f"docker login -u {username} -p {password}" + hsystem.system(cmd, suppress_output=False) + + +def _docker_login_ecr() -> None: + """ + Log in the AM Docker repo_short_name on AWS. + """ + hlitauti.report_task() + if hserver.is_inside_ci(): + _LOG.warning("Running inside GitHub Action: skipping `docker_login`") + return + # TODO(gp): Enable caching https://github.com/causify-ai/helpers/issues/20 + use_cache = False + if use_cache: + # Check if we are already logged in to the target registry. + is_logged = _check_docker_login("623860924167.dkr.ecr") + if is_logged: + _LOG.warning("Already logged in to the target registry: skipping") + return + _LOG.info("Logging in to the target registry") + # Log in the target registry. + major_version = _get_aws_cli_version() + # docker login \ + # -u AWS \ + # -p eyJ... \ + # -e none \ + # https://*****.dkr.ecr.us-east-1.amazonaws.com + # TODO(gp): Move this to var in repo_config.py. + # TODO(gp): Hack + profile = "ck" + region = hs3.AWS_EUROPE_REGION_1 + cmd = "" + if major_version == 1: + cmd = f"eval $(aws ecr get-login --profile {profile} --no-include-email --region {region})" + elif major_version == 2: + if profile == "ck": + env_var = "CSFY_ECR_BASE_PATH" + else: + env_var = f"{profile.upper()}_ECR_BASE_PATH" + ecr_base_path = hlitauti.get_default_param(env_var) + # TODO(Nikola): Remove `_get_aws_cli_version()` and use only `aws ecr get-login-password` + # as it is present in both versions of `awscli`. + cmd = ( + "docker login -u AWS -p " + f"$(aws ecr get-login-password --profile {profile}) " + f"https://{ecr_base_path}" + ) + else: + NotImplementedError( + f"Docker login for awscli v{major_version} is not implemented!" + ) + # TODO(Grisha): fix properly. We pass `ctx` despite the fact that we do not + # need it with `use_system=True`, but w/o `ctx` invoke tasks (i.e. ones + # with `@task` decorator) do not work. + hsystem.system(cmd, suppress_output=False) + + +@task +def docker_login(ctx, target_registry="aws_ecr.ck"): # type: ignore + """ + Log in the target registry and skip if we are in kaizenflow. + + :param ctx: invoke context + :param target_registry: target Docker image registry to log in to + - "dockerhub.causify": public Causify Docker image registry + - "aws_ecr.ck": private AWS CK ECR + """ + _ = ctx + hlitauti.report_task() + # No login required as the `helpers` and `tutorials` images are accessible + # on the public DockerHub registry. + if not hserver.is_dev_csfy() and hrecouti.get_repo_config().get_name() in [ + "//helpers", + "//tutorials", + ]: + _LOG.warning("Skipping Docker login process for Helpers or Tutorials") + return + # We run everything using `hsystem.system(...)` but `ctx` is needed + # to make the function work as an invoke target. + if target_registry == "aws_ecr.ck": + _docker_login_ecr() + elif target_registry == "dockerhub.causify": + _docker_login_dockerhub() + else: + raise ValueError(f"Invalid Docker image registry='{target_registry}'") + + +@task +def docker_images_ls_repo(ctx, sudo=False): # type: ignore + """ + List images in the logged in repo_short_name. + """ + hlitauti.report_task() + docker_login(ctx) + # TODO(gp): Move this to a var ECR_BASE_PATH="CSFY_ECR_BASE_PATH" in repo_config.py. + ecr_base_path = hlitauti.get_default_param("CSFY_ECR_BASE_PATH") + docker_exec = _get_docker_exec(sudo) + hlitauti.run(ctx, f"{docker_exec} image ls {ecr_base_path}") + + +# //////////////////////////////////////////////////////////////////////////////// +# Version. +# //////////////////////////////////////////////////////////////////////////////// + + +_IMAGE_VERSION_RE = r"\d+\.\d+\.\d+" + + +def _dassert_is_version_valid(version: str) -> None: + """ + Check that the version is valid, i.e. looks like `1.0.0`. + """ + hdbg.dassert_isinstance(version, str) + hdbg.dassert_ne(version, "") + regex = rf"^({_IMAGE_VERSION_RE})$" + _LOG.debug("Testing with regex='%s'", regex) + m = re.match(regex, version) + hdbg.dassert(m, "Invalid version: '%s'", version) + + +# //////////////////////////////////////////////////////////////////////////////// +# Image. +# //////////////////////////////////////////////////////////////////////////////// + + +# This pattern aims to match the full image name including +# both registry and image path. +# Examples of valid matches include: +# - '623860924167.dkr.ecr.eu-north-1.amazonaws.com/cmamp' +# - 'ghcr.io/cryptokaizen/cmamp' +# This change is introduced to match the GHCR registry path, +# since it already includes `/` in the registry name itself. +_FULL_IMAGE_NAME_RE = r"([a-z0-9]+(-[a-z0-9]+)*\.)*[a-z]{2,}(\/[a-z0-9_-]+){1,2}" +_IMAGE_USER_RE = r"[a-z0-9_-]+" +# For candidate prod images which have added hash for easy identification. +_IMAGE_HASH_RE = r"[a-z0-9]{9}" +_IMAGE_STAGE_RE = rf"(local(?:-{_IMAGE_USER_RE})?|dev|prod|prod(?:-{_IMAGE_USER_RE})(?:-{_IMAGE_HASH_RE})?|prod(?:-{_IMAGE_HASH_RE})?)" + + +# TODO(Grisha): call `_dassert_is_base_image_name_valid()` and a separate +# function that validates an image tag. +def dassert_is_image_name_valid(image: str) -> None: + """ + Check whether an image name is valid. + + Invariants: + - Local images contain a username and a version + - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0` + - `dev` and `prod` images have an instance with a version and one without + to indicate the latest + - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0` + and `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev` + - `prod` candidate image has an optional tag (e.g., a username) and + a 9 character hash identifier corresponding Git commit + - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-4rf74b83a` + - and `*****.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-saggese-4rf74b83a` + + An image should look like: + + *****.dkr.ecr.us-east-1.amazonaws.com/amp:dev + *****.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0 + *****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0 + ghcr.io/cryptokaizen/cmamp:dev + """ + regex = "".join( + [ + # E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/cmamp` + # or `sorrentum/cmamp` or ghcr.io/cryptokaizen/cmamp. + rf"^{_FULL_IMAGE_NAME_RE}", + # E.g., `:local-saggese`. + rf"(:{_IMAGE_STAGE_RE})?", + # E.g., `-1.0.0`. + rf"(-{_IMAGE_VERSION_RE})?$", + ] + ) + _LOG.debug("Testing with regex='%s'", regex) + m = re.match(regex, image) + hdbg.dassert(m, "Invalid image: '%s'", image) + + +def _dassert_is_base_image_name_valid(base_image: str) -> None: + """ + Check that the base image is valid, i.e. looks like below. + + *****.dkr.ecr.us-east-1.amazonaws.com/amp ghcr.io/cryptokaizen/cmamp + """ + regex = rf"^{_FULL_IMAGE_NAME_RE}$" + _LOG.debug("regex=%s", regex) + m = re.match(regex, base_image) + hdbg.dassert(m, "Invalid base_image: '%s'", base_image) + + +# TODO(Grisha): instead of using `base_image` which is Docker registry address +# + image name, use those as separate parameters. See CmTask5074. +def _get_base_image(base_image: str) -> str: + """ + :return: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + """ + if base_image == "": + # TODO(gp): Use os.path.join. + base_image = ( + hlitauti.get_default_param("CSFY_ECR_BASE_PATH") + + "/" + + hlitauti.get_default_param("BASE_IMAGE") + ) + _dassert_is_base_image_name_valid(base_image) + return base_image + + +# This code path through Git tag was discontinued with CmTask746. +# def get_git_tag( +# version: str, +# ) -> str: +# """ +# Return the tag to be used in Git that consists of an image name and +# version. +# :param version: e.g., `1.0.0`. If None, the latest version is used +# :return: e.g., `amp-1.0.0` +# """ +# hdbg.dassert_is_not(version, None) +# _dassert_is_version_valid(version) +# base_image = hlibtaskut.get_default_param("BASE_IMAGE") +# tag_name = f"{base_image}-{version}" +# return tag_name + + +# TODO(gp): Consider using a token "latest" in version, so that it's always a +# string and we avoid a special behavior encoded in None. +def get_image( + base_image: str, + stage: str, + version: Optional[str], +) -> str: + """ + Return the fully qualified image name. + + For local stage, it also appends the username to the image name. + + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param stage: e.g., `local`, `dev`, `prod` + :param version: e.g., `1.0.0`, if None empty, the latest version is used + :return: e.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local` or + `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local-1.0.0` + """ + # Docker refers the default image as "latest", although in our stage + # nomenclature we call it "dev". + hdbg.dassert_in(stage, "local dev prod".split()) + # Get the base image. + base_image = _get_base_image(base_image) + _dassert_is_base_image_name_valid(base_image) + # Get the full image name. + image = [base_image] + # Handle the stage. + image.append(f":{stage}") + if stage == "local": + user = hsystem.get_user_name() + image.append(f"-{user}") + # Handle the version. + if version is not None and version != "": + _dassert_is_version_valid(version) + image.append(f"-{version}") + # + image = "".join(image) + dassert_is_image_name_valid(image) + return image + + +@task +def docker_remove_image(ctx, base_image="") -> None: # type: ignore + """ + Delete the current dev image to free up disk space. + + :param base_image: base name of the image (e.g., `*****.dkr.ecr.us- + east-1.amazonaws.com/amp`) + """ + # Display disk space before cleanup. + _LOG.info("Disk space before cleanup:") + hsystem.system("df -h", suppress_output=False) + # Handle the image. + stage = "dev" + version = "" + image = get_image(base_image, stage, version) + _LOG.info("Deleting Docker image: %s", image) + # Get Docker executable configuration. + use_sudo = hdocker.get_use_sudo() + docker_exec = hdocker.get_docker_executable(use_sudo) + # Delete the specific image. + cmd = f"{docker_exec} rmi -f {image}" + _LOG.info("Running: %s", cmd) + try: + result = hsystem.system(cmd, abort_on_error=False, suppress_output=False) + if result != 0: + _LOG.warning( + "Docker image deletion failed with exit code %s for image: %s", + result, + image, + ) + else: + _LOG.info("Successfully deleted Docker image: %s", image) + except Exception as e: + _LOG.error("Error during Docker image deletion: %s", e) + # Display disk space after cleanup. + _LOG.info("Disk space after cleanup:") + hsystem.system("df -h", suppress_output=False) + + +@task +def docker_ps(ctx, sudo=False): # type: ignore + # pylint: disable=line-too-long + """ + List all the running containers. + + ``` + > docker_ps + CONTAINER ID user IMAGE COMMAND CREATED STATUS PORTS service + 2ece37303ec9 gp *****....:latest "./docker_build/entry.sh" 5 seconds ago Up 4 seconds user_space + ``` + """ + hlitauti.report_task() + # pylint: enable=line-too-long + fmt = ( + r"""table {{.ID}}\t{{.Label "user"}}\t{{.Image}}\t{{.Command}}""" + + r"\t{{.RunningFor}}\t{{.Status}}\t{{.Ports}}" + + r'\t{{.Label "com.docker.compose.service"}}' + ) + docker_exec = _get_docker_exec(sudo) + cmd = f"{docker_exec} ps --format='{fmt}'" + cmd = hlitauti._to_single_line_cmd(cmd) + hlitauti.run(ctx, cmd) + + +def _get_last_container_id(sudo: bool) -> str: + docker_exec = _get_docker_exec(sudo) + # Get the last started container. + cmd = f"{docker_exec} ps -l | grep -v 'CONTAINER ID'" + # CONTAINER ID IMAGE COMMAND CREATED + # 90897241b31a eeb33fe1880a "/bin/sh -c '/bin/bash ... + _, txt = hsystem.system_to_one_line(cmd) + # Parse the output: there should be at least one line. + hdbg.dassert_lte(1, len(txt.split(" ")), "Invalid output='%s'", txt) + container_id: str = txt.split(" ")[0] + return container_id + + +@task +def docker_stats( # type: ignore + ctx, + all=False, # pylint: disable=redefined-builtin + sudo=False, +): + # pylint: disable=line-too-long + """ + Report last started Docker container stats, e.g., CPU, RAM. + + ``` + > docker_stats + CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS + 2ece37303ec9 ..._user_space_run_30 0.00% 15.74MiB / 31.07GiB 0.05% 351kB / 6.27kB 34.2MB / 12.3kB 4 + ``` + + :param all: report stats for all the containers + """ + # pylint: enable=line-too-long + hlitauti.report_task(txt=hprint.to_str("all")) + _ = ctx + fmt = ( + r"table {{.ID}}\t{{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" + + r"\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}\t{{.PIDs}}" + ) + docker_exec = _get_docker_exec(sudo) + cmd = f"{docker_exec} stats --no-stream --format='{fmt}'" + _, txt = hsystem.system_to_string(cmd) + if all: + output = txt + else: + # Get the id of the last started container. + container_id = _get_last_container_id(sudo) + print(f"Last container id={container_id}") + # Parse the output looking for the given container. + txt = txt.split("\n") + output = [] + # Save the header. + output.append(txt[0]) + for line in txt[1:]: + if line.startswith(container_id): + output.append(line) + # There should be at most two rows: the header and the one corresponding to + # the container. + hdbg.dassert_lte( + len(output), 2, "Invalid output='%s' for '%s'", output, txt + ) + output = "\n".join(output) + print(output) + + +@task +def docker_kill( # type: ignore + ctx, + all=False, # pylint: disable=redefined-builtin + sudo=False, +): + """ + Kill the last Docker container started. + + :param all: kill all the containers (be careful!) + :param sudo: use sudo for the Docker commands + """ + hlitauti.report_task(txt=hprint.to_str("all")) + docker_exec = _get_docker_exec(sudo) + # Last container. + opts = "-l" + if all: + _LOG.warning("Killing all the containers") + # TODO(gp): Ask if we are sure and add a --just-do-it option. + opts = "-a" + # Print the containers that will be terminated. + cmd = f"{docker_exec} ps {opts}" + hlitauti.run(ctx, cmd) + # Kill. + cmd = f"{docker_exec} rm -f $({docker_exec} ps {opts} -q)" + hlitauti.run(ctx, cmd) + + +# docker system prune +# docker container ps -f "status=exited" +# docker container rm $(docker container ps -f "status=exited" -q) +# docker rmi $(docker images --filter="dangling=true" -q) + +# pylint: disable=line-too-long +# Remove the images with hash +# > docker image ls +# REPOSITORY TAG IMAGE ID CREATED SIZE +# *****.dkr.ecr.us-east-2.amazonaws.com/im 07aea615a2aa9290f7362e99e1cc908876700821 d0889bf972bf 6 minutes ago 684MB +# *****.dkr.ecr.us-east-2.amazonaws.com/im rc d0889bf972bf 6 minutes ago 684MB +# python 3.7-slim-buster e7d86653f62f 14 hours ago 113MB +# *****.dkr.ecr.us-east-1.amazonaws.com/amp 415376d58001e804e840bf3907293736ad62b232 e6ea837ab97f 18 hours ago 1.65GB +# *****.dkr.ecr.us-east-1.amazonaws.com/amp dev e6ea837ab97f 18 hours ago 1.65GB +# *****.dkr.ecr.us-east-1.amazonaws.com/amp local e6ea837ab97f 18 hours ago 1.65GB +# *****.dkr.ecr.us-east-1.amazonaws.com/amp 9586cc2de70a4075b9fdcdb900476f8a0f324e3e c75d2447da79 18 hours ago 1.65GB +# pylint: enable=line-too-long + + +# ############################################################################# +# Docker development. +# ############################################################################# + +# TODO(gp): We might want to organize the code in a base class using a Command +# pattern, so that it's easier to generalize the code for multiple repos. +# +# class DockerCommand: +# def pull(): +# ... +# def cmd(): +# ... +# +# For now we pass the customizable part through the default params. + + +# //////////////////////////////////////////////////////////////////////////// +# Docker pull. +# //////////////////////////////////////////////////////////////////////////// + + +def _docker_pull( + ctx: Any, base_image: str, stage: str, version: Optional[str] +) -> None: + """ + Pull images from the registry. + """ + docker_login(ctx) + # + image = get_image(base_image, stage, version) + _LOG.info("image='%s'", image) + dassert_is_image_name_valid(image) + cmd = f"docker pull {image}" + hlitauti.run(ctx, cmd, pty=True) + + +@task +def docker_pull(ctx, stage="dev", version=None, skip_pull=False): # type: ignore + """ + Pull latest dev image corresponding to the current repo from the registry. + + :param skip_pull: if True skip pulling the docker image + """ + hlitauti.report_task() + if stage == "local": + _LOG.warning("Setting skip_pull to True for local stage") + skip_pull = True + if skip_pull: + _LOG.warning("Skipping pulling docker image as per user request") + return + # + base_image = "" + _docker_pull(ctx, base_image, stage, version) + + +@task +def docker_pull_helpers(ctx, stage="prod", version=None): # type: ignore + """ + Pull latest prod image of `helpers` from the registry. + + :param ctx: invoke context + :param stage: stage of the Docker image + :param version: version of the Docker image + """ + base_image = hlitauti.get_default_param("CSFY_ECR_BASE_PATH") + "/helpers" + _LOG.debug("base_image=%s", base_image) + _docker_pull(ctx, base_image, stage, version) + + +# //////////////////////////////////////////////////////////////////////////////// +# Compose files. +# //////////////////////////////////////////////////////////////////////////////// + +# TODO(gp): All this code can become `DockerComposeFileGenerator`. + +# There are several combinations to consider: +# - whether the Docker host can run with / without privileged mode +# - amp as submodule / as supermodule +# - different supermodules for amp + +# TODO(gp): use_privileged_mode -> use_docker_privileged_mode +# use_sibling_container -> use_docker_containers_containers + +DockerComposeServiceSpec = Dict[str, Union[str, List[str]]] + + +def _get_linter_service(stage: str) -> DockerComposeServiceSpec: + """ + Get the linter service specification for the `tmp.docker-compose.yml` file. + + :return: linter service specification + """ + superproject_path, submodule_path = hgit.get_path_from_supermodule() + if superproject_path: + # We are running in a Git submodule. + work_dir = f"/src/{submodule_path}" + repo_root = superproject_path + else: + work_dir = "/src" + repo_root = os.getcwd() + # TODO(gp): To avoid linter getting confused between `Sequence[str]` and + # `List[str]`, we should assign one element at the time. + linter_service_spec = { + "extends": "base_app", + "volumes": [ + f"{repo_root}:/src", + ], + "working_dir": work_dir, + "environment": [ + "MYPYPATH", + ], + } + if stage != "prod": + # When we run a development Linter container, we need to mount the + # Linter repo under `/app`. For prod container instead we copy / freeze + # the repo code in `/app`, so we should not mount it. + volumes = cast(List[str], linter_service_spec["volumes"]) + if superproject_path: + # When running in a Git submodule we need to go one extra level up. + # TODO(*): Clean up the indentation, #2242 (also below). + volumes.append("../../../:/app") + else: + volumes.append("../../:/app") + if stage == "prod": + # Use the `repo_config.py` inside the helpers container instead of + # the one in the calling repo. + environment = cast(List[str], linter_service_spec["environment"]) + environment.append("CSFY_REPO_CONFIG_PATH=/app/repo_config.py") + return linter_service_spec + + +# TODO(gp): Remove mount_as_submodule +def _generate_docker_compose_file( + stage: str, + use_privileged_mode: bool, + use_sibling_container: bool, + shared_data_dirs: Optional[Dict[str, str]], + mount_as_submodule: bool, + use_network_mode_host: bool, + use_main_network: bool, + file_name: Optional[str], +) -> str: + """ + Generate `tmp.docker-compose.yml` file and save it. + + :param shared_data_dirs: data directory in the host filesystem to mount + inside the container. `None` means no dir sharing + :param use_main_network: use `main_network` as default network + """ + _LOG.debug( + hprint.to_str( + "use_privileged_mode " + "use_sibling_container " + "shared_data_dirs " + "mount_as_submodule " + "use_network_mode_host " + "use_main_network " + "file_name " + ) + ) + # We could pass the env var directly, like: + # ``` + # - CSFY_ENABLE_DIND=$CSFY_ENABLE_DIND + # ``` + # but we prefer to inline it. + if use_privileged_mode: + CSFY_ENABLE_DIND = 1 + else: + CSFY_ENABLE_DIND = 0 + # ``` + # sysname='Linux' + # nodename='cf-spm-dev4' + # release='3.10.0-1160.53.1.el7.x86_64' + # version='#1 SMP Fri Jan 14 13:59:45 UTC 2022' + # machine='x86_64' + # ``` + csfy_host_os_name = os.uname()[0] + csfy_host_name = os.uname()[1] + csfy_host_os_version = os.uname()[2] + csfy_host_user_name = getpass.getuser() + # We assume that we don't use this code inside a container, since otherwise + # we would need to distinguish the container style (see + # docs/work_tools/docker/all.dockerized_flow.explanation.md) to find the + # outermost Git root. + if not hserver.is_inside_unit_test(): + hdbg.dassert(not hserver.is_inside_docker()) + else: + # We call this function as part of the unit tests, which we run insider + # the container. + pass + git_host_root_path = hgit.find_git_root() + # Find git root path in the container. + # The Git root is always mounted in the container at `/app`. So we need to + # use that as starting point. + # E.g. For CSFY_GIT_ROOT_PATH, we need to use `/app`, rather than + # `/data/dummy/src/cmamp1`. + # E.g. For CSFY_HELPERS_ROOT_PATH, we need to use `/app/helpers_root`. + # rather than `/data/dummy/src/cmamp1/helpers_root`. + git_root_path = "/app" + # Find helpers root path in the container. + helper_dir = hgit.find_helpers_root() + helper_relative_path = os.path.relpath(helper_dir, git_host_root_path) + helper_root_path = os.path.normpath( + os.path.join(git_root_path, helper_relative_path) + ) + # A super repo is a repo that contains helpers as a submodule and + # is not a helper itself. + use_helpers_as_nested_module = ( + 0 if hgit.is_in_helpers_as_supermodule() else 1 + ) + # We could do the same also with IMAGE for symmetry. + # Keep the env vars in sync with what we print in `henv.get_env_vars()`. + # Configure `base_app` service. + # TODO(gp): Use henv.get_env_vars() to get the env vars. + environment = [ + f"CSFY_ENABLE_DIND={CSFY_ENABLE_DIND}", + "CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL", + f"CSFY_HOST_NAME={csfy_host_name}", + f"CSFY_HOST_OS_NAME={csfy_host_os_name}", + f"CSFY_HOST_OS_VERSION={csfy_host_os_version}", + f"CSFY_HOST_USER_NAME={csfy_host_user_name}", + "CSFY_REPO_CONFIG_CHECK=True", + # Use inferred path for `repo_config.py`. + "CSFY_REPO_CONFIG_PATH=", + "CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID", + "CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION", + "CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE", + "CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET", + "CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY", + "CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN", + "CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH", + # The path of the outermost Git root on the host. + f"CSFY_HOST_GIT_ROOT_PATH={git_host_root_path}", + # The path of the outermost Git root in the Docker container. + f"CSFY_GIT_ROOT_PATH={git_root_path}", + # The path of the helpers dir in the Docker container (e.g., + # `/app`, `/app/helpers_root`) + f"CSFY_HELPERS_ROOT_PATH={helper_root_path}", + f"CSFY_USE_HELPERS_AS_NESTED_MODULE={use_helpers_as_nested_module}", + "CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN", + # This env var is used by GH Action to signal that we are inside the + # CI. It's set up by default by the GH Action runner. See: + # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables + "CSFY_CI=$CSFY_CI", + # TODO(Vlad): consider removing, locally we use our personal tokens + # from files and inside GitHub actions we use the `GH_TOKEN` + # environment variable. + ] + environment.extend( + [ + "GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN", + # Inside GitHub Actions we use `GH_TOKEN` environment variable, + # see https://cli.github.com/manual/gh_auth_login. + "GH_TOKEN=$GH_ACTION_ACCESS_TOKEN", + ] + ) + api_key_env_vars = henv.get_api_key_env_vars() + environment.extend([f"{env_var}=${env_var}" for env_var in api_key_env_vars]) + # + base_app_spec = { + "cap_add": ["SYS_ADMIN"], + "environment": environment, + "image": "${IMAGE}", + "restart": "no", + "volumes": [ + # TODO(gp): We should pass the value of $HOME from dev.Dockerfile to here. + # E.g., we might define $HOME in the env file. + "~/.aws:/home/.aws", + "~/.config/gspread_pandas/:/home/.config/gspread_pandas/", + "~/.config/gh:/home/.config/gh", + "~/.ssh:/home/.ssh", + ], + } + if use_privileged_mode: + # This is needed: + # - for Docker-in-docker (dind) + # - to mount fstabs + base_app_spec["privileged"] = use_privileged_mode + if shared_data_dirs: + # Mount shared dirs. + shared_volumes = [ + f"{host}:{container}" for host, container in shared_data_dirs.items() + ] + # Mount all dirs that are specified. + base_app_spec["volumes"].extend(shared_volumes) + if False: + # No need to mount file systems. + base_app_spec["volumes"].append("../docker_build/fstab:/etc/fstab") + if use_sibling_container: + # Use sibling-container approach. + base_app_spec["volumes"].append( + "/var/run/docker.sock:/var/run/docker.sock" + ) + if False: + base_app_spec["deploy"] = { + "resources": { + "limits": { + # This should be passed from command line depending on how much + # memory is available. + "memory": "60G", + }, + }, + } + if use_network_mode_host: + # Default network mode set to host so we can reach e.g. + # a database container pointing to localhost:5432. + # In tests we use dind so we need set back to the default "bridge". + # See CmTask988 and https://stackoverflow.com/questions/24319662 + base_app_spec["network_mode"] = "${NETWORK_MODE:-host}" + # Configure `app` service. + # Mount `amp` when it is used as submodule. In this case we need to + # mount the super project in the container (to make git work with the + # supermodule) and then change dir to `amp`. + app_spec = { + "extends": "base_app", + } + # Use absolute path of the dir to mount the volume and set working dir. + # The `app_dir` dir points to the root of the repo. + # The `working_dir` points to the path of the runnable dir. + # - If the runnable dir is the root of the repo, then `working_dir` is `/app`. + # - If the runnable dir is a subdirectory of the repo, then `working_dir` is `/app/subdir`. + curr_dir = os.getcwd() + rel_dir1 = os.path.relpath(curr_dir, git_host_root_path) + rel_dir2 = os.path.relpath(git_host_root_path, curr_dir) + app_dir = os.path.abspath(os.path.join(curr_dir, rel_dir2)) + working_dir = os.path.normpath(os.path.join("/app", rel_dir1)) + app_spec["volumes"] = [f"{app_dir}:/app"] + app_spec["working_dir"] = working_dir + # Configure `linter` service. + linter_spec = _get_linter_service(stage) + # Configure `jupyter_server` service. + # For Jupyter server we cannot use "host" network_mode because + # it is incompatible with the port bindings. + jupyter_server = { + "command": "devops/docker_run/run_jupyter_server.sh", + "environment": [ + "PORT=${PORT}", + ], + "extends": "app", + "network_mode": "${NETWORK_MODE:-bridge}", + # TODO(gp): Rename `AM_PORT`. + "ports": [ + "${PORT}:${PORT}", + ], + } + # Configure `jupyter_server_test` service. + # TODO(gp): For some reason the following doesn't work. + # jupyter_server_test: + # command: jupyter notebook -h 2>&1 >/dev/null + # extends: + # jupyter_server + jupyter_server_test = { + "command": "jupyter notebook -h 2>&1 >/dev/null", + "environment": [ + "PORT=${PORT}", + ], + "extends": "app", + "network_mode": "${NETWORK_MODE:-bridge}", + "ports": [ + "${PORT}:${PORT}", + ], + } + # Specify structure of the docker-compose file. + docker_compose = { + "version": "3", + "services": { + "base_app": base_app_spec, + "app": app_spec, + "linter": linter_spec, + "jupyter_server": jupyter_server, + "jupyter_server_test": jupyter_server_test, + }, + } + # Configure networks. + if use_main_network: + docker_compose["networks"] = {"default": {"name": "main_network"}} + + class _Dumper(yaml.Dumper): + """ + A custom YAML Dumper class that adjusts indentation. + """ + + def increase_indent(self_: Any, flow=False, indentless=False) -> Any: + """ + Override the method to modify YAML indentation behavior. + """ + return super().increase_indent(flow=False, indentless=False) + + # Convert the dictionary to YAML format. + yaml_str = yaml.dump( + docker_compose, + Dumper=_Dumper, + default_flow_style=False, + indent=2, + sort_keys=False, + ) + yaml_str = cast(str, yaml_str) + # Save YAML to file if file_name is specified. + if file_name: + if os.path.exists(file_name) and hserver.is_inside_ci(): + # Permission error is raised if we try to overwrite existing file. + # See CmTask #2321 for detailed info. + compose_directory = os.path.dirname(file_name) + hsystem.system(f"sudo rm -rf {compose_directory}") + hio.to_file(file_name, yaml_str) + return yaml_str + + +def get_base_docker_compose_path() -> str: + """ + Return the absolute path to the Docker compose file. + + E.g., `devops/compose/tmp.docker-compose.yml`. + """ + # Add the default path. + dir_name = "devops/compose" + # TODO(gp): Factor out the piece below. + docker_compose_path = "tmp.docker-compose.yml" + docker_compose_path = os.path.join(dir_name, docker_compose_path) + docker_compose_path = os.path.abspath(docker_compose_path) + return docker_compose_path + + +def _get_docker_compose_files( + stage: str, + generate_docker_compose_file: bool, + service_name: str, + extra_docker_compose_files: Optional[List[str]], +) -> List[str]: + """ + Generate the Docker compose file and return the list of Docker compose + paths. + + :return: list of the Docker compose paths + """ + docker_compose_files = [] + # Get the repo short name (e.g., `amp`). + repo_short_name = hrecouti.get_repo_config().get_repo_short_name() + _LOG.debug("repo_short_name=%s", repo_short_name) + # Check submodule status, if needed. + mount_as_submodule = False + if repo_short_name in ("amp", "cmamp"): + # Check if `amp` is a submodule. + path, _ = hgit.get_path_from_supermodule() + if path != "": + _LOG.warning("amp is a submodule") + mount_as_submodule = True + # Write Docker compose file. + file_name = get_base_docker_compose_path() + if service_name == "linter": + # Since we are running the prod `helpers` container we need to use the + # settings from the `repo_config` from that container, and not the settings + # launch the container corresponding to this repo. + enable_privileged_mode = False + use_docker_sibling_containers = False + get_shared_data_dirs = None + use_docker_network_mode_host = False + use_main_network = False + else: + # Use the settings from the `repo_config` corresponding to this container. + enable_privileged_mode = hserver.enable_privileged_mode() + use_docker_sibling_containers = hserver.use_docker_sibling_containers() + get_shared_data_dirs = hserver.get_shared_data_dirs() + use_docker_network_mode_host = hserver.use_docker_network_mode_host() + use_main_network = hserver.use_main_network() + # + if generate_docker_compose_file: + _generate_docker_compose_file( + stage, + enable_privileged_mode, + use_docker_sibling_containers, + get_shared_data_dirs, + mount_as_submodule, + use_docker_network_mode_host, + use_main_network, + file_name, + ) + else: + _LOG.warning("Skipping generating Docker compose file '%s'", file_name) + docker_compose_files.append(file_name) + # Add the compose files from command line. + if extra_docker_compose_files: + hdbg.dassert_isinstance(extra_docker_compose_files, list) + docker_compose_files.extend(extra_docker_compose_files) + # Add the compose files from the global params. + key = "DOCKER_COMPOSE_FILES" + if hlitauti.has_default_param(key): + docker_compose_files.append(hlitauti.get_default_param(key)) + # + _LOG.debug(hprint.to_str("docker_compose_files")) + for docker_compose in docker_compose_files: + hdbg.dassert_path_exists(docker_compose) + return docker_compose_files + + +_IMAGE_VERSION_FROM_CHANGELOG = "FROM_CHANGELOG" + + +def resolve_version_value( + version: str, + *, + container_dir_name: str = ".", +) -> str: + """ + Pass a version (e.g., 1.0.0) or a symbolic value (e.g., FROM_CHANGELOG) and + return the resolved value of the version. + + :return: full version with patch for prod (e.g., 1.3.2) + """ + hdbg.dassert_isinstance(version, str) + if version == _IMAGE_VERSION_FROM_CHANGELOG: + version = hversio.get_changelog_version(container_dir_name) + _dassert_is_version_valid(version) + prod_version = version + return prod_version + + +def to_dev_version(prod_version: str) -> str: + """ + Pass a prod version (e.g., 1.1.1) and strip the patch value. + + :return: stripped version without patch for dev (e.g., 1.1.0) + """ + hdbg.dassert_isinstance(prod_version, str) + _dassert_is_version_valid(prod_version) + # Strip patch value from the version. + dev_version = prod_version.split(".")[:-1] + dev_version = ".".join(dev_version) + ".0" + return dev_version + + +def dassert_is_subsequent_version( + version: str, + *, + container_dir_name: str = ".", +) -> None: + """ + Check that `version` is bigger than the current one as specified in the + changelog. + """ + if version != _IMAGE_VERSION_FROM_CHANGELOG: + current_version = hversio.get_changelog_version(container_dir_name) + hdbg.dassert_lte(current_version, version) + + +# //////////////////////////////////////////////////////////////////////////////// +# Misc. +# //////////////////////////////////////////////////////////////////////////////// + + +def _run_docker_as_user(as_user_from_cmd_line: bool) -> bool: + as_root = hserver.run_docker_as_root() + as_user = as_user_from_cmd_line + if as_root: + as_user = False + _LOG.debug( + "as_user_from_cmd_line=%s as_root=%s -> as_user=%s", + as_user_from_cmd_line, + as_root, + as_user, + ) + return as_user + + +def _get_container_name(service_name: str) -> str: + """ + Create a container name based on various information. + + E.g., `grisha.cmamp.app.cmamp1.20220317_232120` + + The information used to build a container is: + - Linux username + - Base Docker image name + - Service name + - Project directory that was used to start a container + - Container start timestamp + + :param service_name: `docker-compose` service name, e.g., `app` + :return: container name + """ + hdbg.dassert_ne(service_name, "", "You need to specify a service name") + # Get linux username. + linux_user = hsystem.get_user_name() + # Get dir name. + project_dir = hgit.get_project_dirname() + # Get Docker image base name. + image_name = hlitauti.get_default_param("BASE_IMAGE") + # Get current timestamp. + current_timestamp = hlitauti.get_ET_timestamp() + # Build container name. + container_name = f"{linux_user}.{image_name}.{service_name}.{project_dir}.{current_timestamp}" + _LOG.debug( + "get_container_name: container_name=%s", + container_name, + ) + return container_name + + +def _get_docker_base_cmd( + base_image: str, + stage: str, + version: str, + service_name: str, + # Params from `_get_docker_compose_cmd()`. + generate_docker_compose_file: bool, + extra_env_vars: Optional[List[str]], + extra_docker_compose_files: Optional[List[str]], + skip_docker_image_compatibility_check: bool, +) -> List[str]: + r""" + Get base `docker-compose` command encoded as a list of strings. + + It can be used as a base to build more complex commands, e.g., `run`, `up`, + `down`. + + E.g., + ``` + ['IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev', + '\n docker-compose', + '\n --file amp/devops/compose/tmp.docker-compose.yml', + '\n --file amp/devops/compose/tmp.docker-compose_as_submodule.yml', + '\n --env-file devops/env/default.env'] + ``` + :param generate_docker_compose_file: whether to generate or reuse the existing + Docker compose file + :param extra_env_vars: represent vars to add, e.g., `["PORT=9999", "DRY_RUN=1"]` + :param extra_docker_compose_files: `docker-compose` override files + :param skip_docker_image_compatibility_check: if True, skip checking image + architecture compatibility + """ + _LOG.debug(hprint.func_signature_to_str()) + docker_cmd_: List[str] = [] + # - Handle the image. + image = get_image(base_image, stage, version) + _LOG.debug("base_image=%s stage=%s -> image=%s", base_image, stage, image) + dassert_is_image_name_valid(image) + # The check is mainly for developers to avoid using the wrong image (e.g., + # an x86 vs ARM architecture). + # We can skip the image compatibility check during the CI or when + # explicitly skipped. + if not (hserver.is_inside_ci() or skip_docker_image_compatibility_check): + hdocker.check_image_compatibility_with_current_arch(image) + else: + _LOG.warning("Skipping docker image compatibility check") + docker_cmd_.append(f"IMAGE={image}") + # - Handle extra env vars. + if extra_env_vars: + hdbg.dassert_isinstance(extra_env_vars, list) + for env_var in extra_env_vars: + docker_cmd_.append(f"{env_var}") + # + docker_cmd_.append(r""" + docker compose""") + docker_compose_files = _get_docker_compose_files( + stage, + generate_docker_compose_file, + service_name, + extra_docker_compose_files, + ) + file_opts = " ".join([f"--file {dcf}" for dcf in docker_compose_files]) + _LOG.debug(hprint.to_str("file_opts")) + # TODO(gp): Use something like `.append(rf"{space}{...}")` + docker_cmd_.append(rf""" + {file_opts}""") + # - Handle the env file. + env_file = "devops/env/default.env" + docker_cmd_.append(rf""" + --env-file {env_file}""") + return docker_cmd_ + + +def _get_docker_compose_cmd( + base_image: str, + stage: str, + version: str, + cmd: str, + *, + # TODO(gp): make these params mandatory. + extra_env_vars: Optional[List[str]] = None, + extra_docker_compose_files: Optional[List[str]] = None, + extra_docker_run_opts: Optional[List[str]] = None, + service_name: str = "app", + use_entrypoint: bool = True, + generate_docker_compose_file: bool = True, + as_user: bool = True, + print_docker_config: bool = False, + use_bash: bool = False, + skip_docker_image_compatibility_check: bool = False, +) -> str: + """ + Get `docker-compose` run command. + + E.g., + ``` + IMAGE=*****..dkr.ecr.us-east-1.amazonaws.com/amp:dev \ + docker-compose \ + --file /amp/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name grisha.cmamp.app.cmamp1.20220317_232120 \ + --user $(id -u):$(id -g) \ + app \ + bash + ``` + :param cmd: command to run inside Docker container + :param extra_docker_run_opts: additional `docker-compose` run options + :param service_name: service to use to run a command + :param use_entrypoint: whether to use the `entrypoint.sh` or not + :param generate_docker_compose_file: generate the Docker compose file or not + :param as_user: pass the user / group id or not + :param print_docker_config: print the docker config for debugging purposes + :param use_bash: run command through a shell + :param skip_docker_image_compatibility_check: if True, skip checking image architecture compatibility + """ + _LOG.debug(hprint.func_signature_to_str()) + # - Get the base Docker command. + docker_cmd_ = _get_docker_base_cmd( + base_image, + stage, + version, + service_name, + generate_docker_compose_file, + extra_env_vars, + extra_docker_compose_files, + skip_docker_image_compatibility_check, + ) + # - Add the `config` command for debugging purposes. + docker_config_cmd: List[str] = docker_cmd_[:] + # TODO(gp): Use yaml approach like done for other parts of the code. + docker_config_cmd.append(r""" + config""") + # - Add the `run` command. + docker_cmd_.append(r""" + run \ + --rm""") + # - Add a name to the container. + container_name = _get_container_name(service_name) + docker_cmd_.append(rf""" + --name {container_name}""") + # - Handle the user. + as_user = _run_docker_as_user(as_user) + if as_user: + docker_cmd_.append(r""" + --user $(id -u):$(id -g)""") + # - Handle the extra docker options. + if extra_docker_run_opts: + hdbg.dassert_isinstance(extra_docker_run_opts, list) + extra_opts = " ".join(extra_docker_run_opts) + docker_cmd_.append(rf""" + {extra_opts}""") + # - Handle entrypoint. + if use_entrypoint: + docker_cmd_.append(rf""" + {service_name}""") + if cmd: + if use_bash: + cmd = f"bash -c '{cmd}'" + docker_cmd_.append(rf""" + {cmd}""") + else: + # No entrypoint. + docker_cmd_.append(rf""" + --entrypoint bash \ + {service_name}""") + # Print the config for debugging purpose. + if print_docker_config: + docker_config_cmd_as_str = hlitauti.to_multi_line_cmd(docker_config_cmd) + _LOG.debug("docker_config_cmd=\n%s", docker_config_cmd_as_str) + _LOG.debug( + "docker_config=\n%s", + hsystem.system_to_string(docker_config_cmd_as_str)[1], + ) + # Print the config for debugging purpose. + docker_cmd_: str = hlitauti.to_multi_line_cmd(docker_cmd_) + return docker_cmd_ + + +# //////////////////////////////////////////////////////////////////////////////// +# bash and cmd. +# //////////////////////////////////////////////////////////////////////////////// + + +def _docker_cmd( + ctx: Any, + docker_cmd_: str, + *, + skip_pull: bool = False, + **ctx_run_kwargs: Any, +) -> Optional[int]: + """ + Print and execute a Docker command. + + :param kwargs: kwargs for `ctx.run()` + """ + if hserver.is_inside_ci(): + import helpers.hs3 as hs3 + + # Generate files with the AWS settings that are missing when running + # inside CI. + hs3.generate_aws_files() + docker_pull(ctx, skip_pull=skip_pull) + _LOG.debug("cmd=%s", docker_cmd_) + rc: Optional[int] = hlitauti.run( + ctx, docker_cmd_, pty=True, **ctx_run_kwargs + ) + return rc + + +@task +def docker_bash( # type: ignore + ctx, + base_image="", + stage="dev", + version="", + use_entrypoint=True, + as_user=True, + generate_docker_compose_file=True, + container_dir_name=".", + skip_pull=False, + skip_docker_image_compatibility_check=False, +): + """ + Start a bash shell inside the container corresponding to a stage. + + :param use_entrypoint: whether to use the `entrypoint.sh` or not + :param as_user: pass the user / group id or not + :param generate_docker_compose_file: generate the Docker compose file or not + :param skip_pull: if True skip pulling the docker image + """ + _LOG.debug(hprint.func_signature_to_str("ctx")) + hlitauti.report_task(container_dir_name=container_dir_name) + # + cmd = "bash" + docker_cmd_ = _get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + generate_docker_compose_file=generate_docker_compose_file, + use_entrypoint=use_entrypoint, + as_user=as_user, + skip_docker_image_compatibility_check=skip_docker_image_compatibility_check, + ) + _LOG.debug("docker_cmd_=%s", docker_cmd_) + _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) + + +@task +def docker_cmd( # type: ignore + ctx, + base_image="", + stage="dev", + version="", + cmd="", + as_user=True, + generate_docker_compose_file=True, + use_bash=False, + container_dir_name=".", + skip_pull=False, +): + """ + Execute the command `cmd` inside a container corresponding to a stage. + + :param as_user: pass the user / group id or not + :param generate_docker_compose_file: generate or reuse the Docker + compose file + :param use_bash: run command through a shell + """ + hlitauti.report_task(container_dir_name=container_dir_name) + hdbg.dassert_ne(cmd, "") + # TODO(gp): Do we need to overwrite the entrypoint? + docker_cmd_ = _get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + generate_docker_compose_file=generate_docker_compose_file, + as_user=as_user, + use_bash=use_bash, + ) + _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) + + +# //////////////////////////////////////////////////////////////////////////////// +# Jupyter. +# //////////////////////////////////////////////////////////////////////////////// + + +def _get_docker_jupyter_cmd( + base_image: str, + stage: str, + version: str, + port: int, + self_test: bool, + *, + use_entrypoint: bool = True, + print_docker_config: bool = False, +) -> str: + cmd = "" + extra_env_vars = [f"PORT={port}"] + extra_docker_run_opts = ["--service-ports"] + service_name = "jupyter_server_test" if self_test else "jupyter_server" + # + docker_cmd_ = _get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + extra_env_vars=extra_env_vars, + extra_docker_run_opts=extra_docker_run_opts, + service_name=service_name, + use_entrypoint=use_entrypoint, + print_docker_config=print_docker_config, + ) + return docker_cmd_ + + +@task +def docker_jupyter( # type: ignore + ctx, + stage="dev", + version="", + base_image="", + auto_assign_port=True, + use_entrypoint=True, + port=None, + self_test=False, + container_dir_name=".", + skip_pull=False, +): + """ + Run Jupyter notebook server. + + :param auto_assign_port: use the UID of the user and the inferred + number of the repo (e.g., 4 for `~/src/amp4`) to get a unique + port + :param skip_pull: if True skip pulling the docker image + """ + hlitauti.report_task(container_dir_name=container_dir_name) + if port is None: + if auto_assign_port: + uid = os.getuid() + _LOG.debug("uid=%s", uid) + git_repo_idx = hgit.get_project_dirname(only_index=True) + git_repo_idx = int(git_repo_idx) + _LOG.debug("git_repo_idx=%s", git_repo_idx) + # We assume that there are no more than `max_idx_per_users` clients. + max_idx_per_user = 10 + hdbg.dassert_lte(git_repo_idx, max_idx_per_user) + port = (uid * max_idx_per_user) + git_repo_idx + else: + port = 9999 + _LOG.info("Assigned port is %s", port) + # + print_docker_config = False + docker_cmd_ = _get_docker_jupyter_cmd( + base_image, + stage, + version, + port, + self_test, + use_entrypoint=use_entrypoint, + print_docker_config=print_docker_config, + ) + _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) + + +def _get_docker_dash_app_cmd( + base_image: str, + stage: str, + version: str, + port: int, + *, + print_docker_config: bool = False, +) -> str: + cmd = "" + extra_env_vars = [f"PORT={port}"] + extra_docker_run_opts = ["--service-ports"] + service_name = "dash_app" + # + docker_cmd_ = _get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + extra_env_vars=extra_env_vars, + extra_docker_run_opts=extra_docker_run_opts, + service_name=service_name, + print_docker_config=print_docker_config, + ) + return docker_cmd_ + + +@task +def docker_dash_app( # type: ignore + ctx, + stage="dev", + version="", + base_image="", + auto_assign_port=True, + port=None, + container_dir_name=".", +): + """ + Run dash app. + + :param auto_assign_port: use the UID of the user and the inferred + number of the repo (e.g., 4 for `~/src/amp4`) to get a unique + port + """ + hlitauti.report_task(container_dir_name=container_dir_name) + if port is None: + if auto_assign_port: + uid = os.getuid() + _LOG.debug("uid=%s", uid) + git_repo_idx = hgit.get_project_dirname(only_index=True) + git_repo_idx = int(git_repo_idx) + _LOG.debug("git_repo_idx=%s", git_repo_idx) + # We assume that there are no more than `max_idx_per_users` clients. + max_idx_per_user = 10 + hdbg.dassert_lte(git_repo_idx, max_idx_per_user) + port = (uid * max_idx_per_user) + git_repo_idx + else: + port = 9999 + # + _LOG.info("Assigned port is %s", port) + print_docker_config = False + docker_cmd_ = _get_docker_dash_app_cmd( + base_image, + stage, + version, + port, + print_docker_config=print_docker_config, + ) + _docker_cmd(ctx, docker_cmd_) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py new file mode 100644 index 000000000..4c2149f52 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py @@ -0,0 +1,1890 @@ +""" +Import as: + +import helpers.lib_tasks_docker_release as hltadore +""" + +import datetime +import logging +import os +from operator import attrgetter +from typing import Any, Optional + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hversion as hversio +import helpers.lib_tasks_aws as hlitaaws +import helpers.lib_tasks_docker as hlitadoc +import helpers.lib_tasks_gh as hlitagh +import helpers.lib_tasks_pytest as hlitapyt +import helpers.lib_tasks_utils as hlitauti +import helpers.repo_config_utils as hrecouti + +_DEFAULT_TARGET_REGISTRY = "aws_ecr.ck" +_LOG = logging.getLogger(__name__) +_AUTO_RELEASE_LABEL = "Automated release" + +# pylint: disable=protected-access + + +# ############################################################################# +# Docker image workflows. +# ############################################################################# + + +def _to_abs_path(filename: str) -> str: + filename = os.path.abspath(filename) + hdbg.dassert_path_exists(filename) + return filename + + +def _prepare_docker_ignore( + ctx: Any, + docker_ignore: str, + *, + copy_to_git_root: bool = True, +) -> None: + """ + Copy the target `docker_ignore` in the proper position for `docker build`. + + :param ctx: invoke context + :param docker_ignore: path to the `.dockerignore` file + :param copy_to_git_root: if True, copy the `.dockerignore` file to the + git root directory; otherwise, copy it to the current directory + """ + # Currently there is no built-in way to control which `.dockerignore` to + # use (https://stackoverflow.com/questions/40904409). + hdbg.dassert_path_exists(docker_ignore) + # Since all the runnable dirs copy the entire repo content, we use + # the Git root dir as a docker context so we need to copy the `.dockerignore` + # file to the Git root dir. + if copy_to_git_root: + dest_docker_ignore = os.path.join(hgit.find_git_root(), ".dockerignore") + else: + dest_docker_ignore = ".dockerignore" + cmd = f"cp -f {docker_ignore} {dest_docker_ignore}" + hlitauti.run(ctx, cmd) + + +def _get_dev_version(version: str, container_dir_name: str) -> str: + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + dev_version = hlitadoc.to_dev_version(prod_version) + _LOG.debug("prod_version=%s -> dev_version=%s", prod_version, dev_version) + return dev_version + + +def _create_multiarch_builder( + ctx: Any, +) -> None: + """ + Create a multi-arch builder for Docker buildx. + + :param ctx: invoke context + """ + # Create a multi-arch builder. + platform_builder_name = "multiarch_builder" + cmd = rf""" + docker buildx rm {platform_builder_name} + """ + # We do not abort on error since the platform builder might be present + # or not from previous executions. + hsystem.system(cmd, abort_on_error=False) + cmd = rf""" + docker buildx create \ + --name {platform_builder_name} \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use {platform_builder_name} + """ + hlitauti.run(ctx, cmd) + + +# ############################################################################# +# Local/Dev image flow +# ############################################################################# +# - A "local" image (which is a release candidate for the DEV image) is built +# with: +# ``` +# > i docker_build_local_image +# ``` +# - This creates a local image like `helpers:local.saggese-1.0.0` +# - A qualification process (e.g., running all unit tests and the QA tests) is +# performed on the local image (e.g., locally or through GitHub actions) +# - If the qualification process is passed, the image is released as `dev` on +# the registries + + +# Use Docker buildkit or not. +# DOCKER_BUILDKIT = 1 +DOCKER_BUILDKIT = 0 + + +def _build_multi_arch_image( + ctx: Any, + opts: str, + multi_arch: str, + build_args: str, + build_image: str, + dockerfile: str, +) -> None: + """ + Build a multi-architecture Docker image in a remote Docker registry. + + :param ctx: invoke context + :param opts: build options (e.g., --no-cache) + :param multi_arch: target architectures to build for (e.g., + `linux/amd64,linux/arm64`) + :param build_args: build arguments for the Docker build command + :param build_image: name of the image to build + :param dockerfile: path to the Dockerfile to use for building + """ + # Build the multi-arch image. + # Compress the current directory (in order to dereference symbolic + # links) into a tar stream and pipes it to the `docker build` command. + # See HelpersTask197. + cmd = rf""" + tar -czh . | DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ + time \ + docker buildx build \ + {opts} \ + --push \ + --platform {multi_arch} \ + {build_args} \ + --tag {build_image} \ + --file {dockerfile} \ + - + """ + hlitauti.run(ctx, cmd) + + +def _list_image(ctx: Any, image: str) -> None: + """ + List Docker image. + + :param ctx: invoke context + :param image: docker image reference in REPOSITORY[:TAG] format + Examples: + - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0` + - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev` + - `sorrentum/cmamp:dev-1.0.0` + - `ghcr.io/cryptokaizen/cmamp:prod` + """ + cmd = f"docker image ls {image}" + hlitauti.run(ctx, cmd) + + +def _run_tests( + ctx: Any, + stage: str, + version: str, + *, + skip_tests: Optional[bool] = False, + fast_tests: Optional[bool] = True, + slow_tests: Optional[bool] = True, + superslow_tests: Optional[bool] = True, + qa_tests: Optional[bool] = True, +) -> None: + """ + Run tests for a given stage and version. + + :param ctx: invoke context + :param stage: image stage (must be one of `local`, `dev`, or `prod`) + :param version: version to test + :param skip_tests: skip all tests if True + :param fast_tests: run fast tests + :param slow_tests: run slow tests + :param superslow_tests: run superslow tests + :param qa_tests: run QA tests + """ + hdbg.dassert_in(stage, ("local", "dev", "prod")) + if skip_tests: + _LOG.warning("Skipping all tests") + return + if fast_tests: + hlitapyt.run_fast_tests(ctx, stage=stage, version=version) + if slow_tests: + hlitapyt.run_slow_tests(ctx, stage=stage, version=version) + if superslow_tests: + hlitapyt.run_superslow_tests(ctx, stage=stage, version=version) + if qa_tests: + hlitapyt.run_qa_tests(ctx, stage=stage, version=version) + + +# TODO(sandeep): Consider promoting this to an invoke target and removing the callers. +# Reason: the caller invoke targets only contain this helper call. +def _docker_tag_and_push_multi_arch_image( + ctx: Any, + version: str, + base_image: str, + target_registry: str, + container_dir_name: str, + source_stage: str, + target_stage: str, +) -> None: + """ + Tag and push a multi-arch image to the target registry using `docker buildx + imagetools`. + + :param ctx: invoke context + :param version: version to tag the image with + :param base_image: base name of the image (e.g., + `*****.dkr.ecr.us-east-1.amazonaws.com/amp`) + :param target_registry: target Docker registry to push to (e.g., + `aws_ecr.ck` or `dockerhub.causify`) + :param container_dir_name: directory where Dockerfile is located + :param source_stage: source stage of the image (must be one of `local` or + `prod`) + :param target_stage: target stage to push the image as (must be one + of `dev` or `prod`) + """ + hdbg.dassert_in(source_stage, ("local", "prod")) + hdbg.dassert_in(target_stage, ("dev", "prod")) + # + hlitadoc.docker_login(ctx, target_registry) + # Get source version string. + if source_stage == "local": + source_stage_version = _get_dev_version(version, container_dir_name) + elif source_stage == "prod": + source_stage_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + else: + raise ValueError( + f"Invalid source stage='{source_stage}' for tagging and pushing" + ) + source_image_versioned = hlitadoc.get_image( + base_image, source_stage, source_stage_version + ) + _LOG.info( + "Pushing the %s image %s to the target_registry %s ", + source_stage, + source_image_versioned, + target_registry, + ) + if target_registry == "aws_ecr.ck": + # Use AWS Docker registry. + target_base_image = "" + elif target_registry == "dockerhub.causify": + # Use public GitHub Docker registry. + target_base_image_name = ( + hrecouti.get_repo_config().get_docker_base_image_name() + ) + target_base_image = f"causify/{target_base_image_name}" + else: + raise ValueError( + f"Invalid target Docker image registry='{target_registry}'" + ) + # Only create a versioned image for the 'dev' stage or for the + # `dockerhub.causify` registry. + if target_stage == "dev" or target_registry == "dockerhub.causify": + # Tag and push the source image as versioned target image. + target_versioned_image = hlitadoc.get_image( + target_base_image, target_stage, source_stage_version + ) + cmd = f"docker buildx imagetools create -t {target_versioned_image} {source_image_versioned}" + hlitauti.run(ctx, cmd) + # Tag and push the source image as target image. + target_latest_version = None + target_latest_image = hlitadoc.get_image( + target_base_image, target_stage, version=target_latest_version + ) + cmd = f"docker buildx imagetools create -t {target_latest_image} {source_image_versioned}" + hlitauti.run(ctx, cmd) + + +@task +def docker_push_dev_image( # type: ignore + ctx, + version, + base_image="", + container_dir_name=".", +): + """ + Push the "dev" image to ECR. + + :param ctx: invoke context + :param version: version to tag the image and code with + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # + dev_version = _get_dev_version(version, container_dir_name) + # + hlitadoc.docker_login(ctx) + # Push Docker versioned tag. + image_versioned_dev = hlitadoc.get_image(base_image, "dev", dev_version) + cmd = f"docker push {image_versioned_dev}" + hlitauti.run(ctx, cmd, pty=True) + # Push Docker tag. + latest_version = None + image_dev = hlitadoc.get_image(base_image, "dev", latest_version) + cmd = f"docker push {image_dev}" + hlitauti.run(ctx, cmd, pty=True) + + +@task +def docker_push_prod_image( # type: ignore + ctx, + version, + base_image="", + container_dir_name=".", +): + """ + Push the "prod" image to ECR. + + :param ctx: invoke context + :param version: version to tag the image and code with + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + # + hlitadoc.docker_login(ctx) + # Push versioned tag. + image_versioned_prod = hlitadoc.get_image(base_image, "prod", prod_version) + cmd = f"docker push {image_versioned_prod}" + hlitauti.run(ctx, cmd, pty=True) + # + latest_version = None + image_prod = hlitadoc.get_image(base_image, "prod", latest_version) + cmd = f"docker push {image_prod}" + hlitauti.run(ctx, cmd, pty=True) + + +# TODO(gp): We moved away from versioning of the prod image because we release +# continuously and so it's easier to track the hash. +def _docker_rollback_image( + ctx: Any, + base_image: str, + stage: str, + version: str, + push_to_repo: bool, +) -> None: + """ + Rollback the versioned image for a particular stage and optionally push it + to ECR. + + :param ctx: invoke context + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param stage: select a specific stage for the Docker image (must be + one of `dev` or `prod`) + :param version: version to tag the image and code with + :param push_to_repo: whether to push the rolled back image to ECR + """ + hdbg.dassert_in(stage, ("dev", "prod")) + # TODO(sandeep): Consider removing the redundant pull-push step. Instead of + # pulling the versioned image and pushing it back to ECR, directly push + # the local image. However, note that this may not work for multi-arch images + # since local images are arch-specific, while remote tags include all architectures. + # 1) Ensure that version of the image exists locally. + hlitadoc._docker_pull( + ctx, base_image=base_image, stage=stage, version=version + ) + # 2) Promote requested image to target stage. + image_versioned = hlitadoc.get_image(base_image, stage, version) + latest_version = None + image_latest = hlitadoc.get_image(base_image, stage, latest_version) + cmd = f"docker tag {image_versioned} {image_latest}" + hlitauti.run(ctx, cmd) + # 3) Push the image to ECR. + if push_to_repo: + if stage == "dev": + docker_push_dev_image(ctx, version=version) + elif stage == "prod": + docker_push_prod_image(ctx, version=version) + else: + raise ValueError(f"Invalid stage='{stage}' for rollback") + else: + _LOG.warning("Skipping pushing %s image to ECR, as requested", stage) + + +@task +def docker_build_local_image( # type: ignore + ctx, + version, + cache=True, + base_image="", + poetry_mode="update", + container_dir_name=".", + just_do_it=False, + multi_arch="", + cleanup_installation=True, +): + """ + Build a local image, i.e., a release candidate "dev" image. + + :param ctx: invoke context + :param version: version to tag the image with + :param cache: use the cache + :param base_image: the name for the base image + E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp`. + For base_image, we use "" as default instead None since `invoke` can + only infer a single type. + :param poetry_mode: + - `update`: run `poetry lock` to update the packages + - `no_update`: it uses the current `poetry.lock` file, if it is valid + according to the constraints. This is useful when the goal is to + remove / add / update only a single package without updating + everything + :param container_dir_name: directory where the Dockerfile is located + :param just_do_it: execute the action ignoring the checks + :param multi_arch: + - if not specified, build for the current architecture + - if specified, build for the specified multiple architectures. E.g., + `linux/amd64,linux/arm64` + :param cleanup_installation: force clean up Docker installation. This can + be disabled to speed up the build process + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # For poetry_mode="update", the `poetry.lock` file is updated and saved as + # `/install/poetry.lock.out` to the container. + # For poetry_mode="no_update", the `poetry.lock` file from the repo is used, + # and it's passed as `/install/poetry.lock.in` to the container. + hdbg.dassert_in(poetry_mode, ("update", "no_update")) + if just_do_it: + _LOG.warning("Skipping subsequent version check") + else: + hlitadoc.dassert_is_subsequent_version( + version, container_dir_name=container_dir_name + ) + dev_version = _get_dev_version(version, container_dir_name) + # Prepare `.dockerignore`. + docker_ignore = "devops/docker_build/dockerignore.dev" + _prepare_docker_ignore(ctx, docker_ignore) + # Build the local image. + stage = "local" + image_local = hlitadoc.get_image(base_image, stage, dev_version) + # + dockerfile = "devops/docker_build/dev.Dockerfile" + # Keep the relative path instead of an absolute path to ensure it matches + # files inside the tar stream and avoids file not found errors. + # dockerfile = _to_abs_path(dockerfile) + opts = "--no-cache" if not cache else "" + build_args = [ + ("AM_CONTAINER_VERSION", dev_version), + ("INSTALL_DIND", True), + ("POETRY_MODE", poetry_mode), + ("CLEAN_UP_INSTALLATION", cleanup_installation), + ] + build_args = " ".join(f"--build-arg {k}={v}" for k, v in build_args) + # Build for both a single arch or multi-arch. + if multi_arch: + # Login to AWS ECR because for multi-arch we need to build the local + # image remotely. + hlitadoc.docker_login(ctx) + _create_multiarch_builder(ctx) + _build_multi_arch_image( + ctx, opts, multi_arch, build_args, image_local, dockerfile + ) + # TODO(sandeep): If possible, switch to using hlitadoc._docker_pull(). + # Pull the image from registry after building. + cmd = f"docker pull {image_local}" + hlitauti.run(ctx, cmd) + else: + # Build for a single architecture using `docker build`. + # Compress the current directory (in order to dereference symbolic + # links) into a tar stream and pipes it to the `docker build` command. + # See HelpersTask197. + cmd = rf""" + tar -czh . | DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ + time \ + docker build \ + {opts} \ + {build_args} \ + --tag {image_local} \ + --file {dockerfile} \ + - + """ + hlitauti.run(ctx, cmd) + # Retrieve the package files, if present. + if poetry_mode == "update": + # TODO(gp): Not sure it works properly for multi-arch build, since on + # different platforms the generated poetry.lock might be different. + # TODO(gp): For some reason we can't use more than one bash command in + # docker_cmd. + cmd = "cp -f /install/poetry.lock.out /install/pip_list.txt ." + opts = [ + "--stage local", + f"--version {version}", + f"--cmd '{cmd}'", + ] + opts.append("--skip-pull") + cmd = "invoke docker_cmd " + " ".join(opts) + hlitauti.run(ctx, cmd) + # The destination dir is always in the same relative position. + dst_dir = "./devops/docker_build" + hdbg.dassert_dir_exists(dst_dir) + cmd = f"cp -f poetry.lock.out {dst_dir}/poetry.lock" + hlitauti.run(ctx, cmd) + cmd = f"cp -f pip_list.txt {dst_dir}/pip_list.txt" + hlitauti.run(ctx, cmd) + # Check image and report stats. + _list_image(ctx, image_local) + + +@task +def docker_tag_local_image_as_dev( # type: ignore + ctx, + version, + base_image="", + container_dir_name=".", +): + """ + Mark the "local" image as "dev". + + :param ctx: invoke context + :param version: version to tag the image and code with + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # Get the version. + dev_version = _get_dev_version(version, container_dir_name) + # Tag local image as versioned dev image (e.g., `dev-1.0.0`). + image_versioned_local = hlitadoc.get_image(base_image, "local", dev_version) + image_versioned_dev = hlitadoc.get_image(base_image, "dev", dev_version) + cmd = f"docker tag {image_versioned_local} {image_versioned_dev}" + hlitauti.run(ctx, cmd) + # Tag local image as dev image. + latest_version = None + image_dev = hlitadoc.get_image(base_image, "dev", latest_version) + cmd = f"docker tag {image_versioned_local} {image_dev}" + hlitauti.run(ctx, cmd) + + +@task +def docker_release_dev_image( # type: ignore + ctx, + version, + cache=True, + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=False, + qa_tests=True, + push_to_repo=True, + poetry_mode="update", + container_dir_name=".", +): + """ + Build, test, and release to ECR the latest "dev" image. + + This can be used to test the entire flow from scratch by building an image, + running the tests, and pushing if needed. + + Phases: + 1) Build local image + 2) Run the unit tests (e.g., fast, slow, superslow) on the local image + 3) Mark local as dev image + 4) Run the QA tests on the dev image + 5) Push dev image to the repo + + :param ctx: invoke context + :param version: version to tag the image and code with + :param cache: use the cache + :param skip_tests: skip all the tests and release the dev image + :param fast_tests: run fast tests, unless all tests skipped + :param slow_tests: run slow tests, unless all tests skipped + :param superslow_tests: run superslow tests, unless all tests skipped + :param qa_tests: run QA tests (e.g., end-to-end linter tests) + :param push_to_repo: push the image to the repo_short_name + :param poetry_mode: same as + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # 1) Build "local" image. + docker_build_local_image( + ctx, + version, + cache=cache, + poetry_mode=poetry_mode, + container_dir_name=container_dir_name, + ) + # Run resolve after `docker_build_local_image` so that a proper check + # for subsequent version can be made in case `FROM_CHANGELOG` token + # is used. + dev_version = _get_dev_version(version, container_dir_name) + # 2) Run tests for the "local" image. + stage = "local" + _run_tests( + ctx, + stage, + dev_version, + skip_tests=skip_tests, + fast_tests=fast_tests, + slow_tests=slow_tests, + superslow_tests=superslow_tests, + qa_tests=False, + ) + # 3) Promote the "local" image to "dev". + docker_tag_local_image_as_dev( + ctx, dev_version, container_dir_name=container_dir_name + ) + # 4) Run QA tests for the (local version) of the dev image. + stage = "dev" + _run_tests( + ctx, + stage, + dev_version, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=qa_tests, + ) + # 5) Push the "dev" image to ECR. + if push_to_repo: + docker_push_dev_image( + ctx, dev_version, container_dir_name=container_dir_name + ) + else: + _LOG.warning( + "Skipping pushing dev image to repo_short_name, as requested" + ) + _LOG.info("==> SUCCESS <==") + + +# ///////////////////////////////////////////////////////////////////////////// +# Multi-arch build flow +# ///////////////////////////////////////////////////////////////////////////// + + +# TODO(gp): multi_build -> multi_arch + + +@task +def docker_tag_push_multi_build_local_image_as_dev( # type: ignore + ctx, + version, + local_base_image="", + target_registry=_DEFAULT_TARGET_REGISTRY, + container_dir_name=".", +): + """ + Mark the multi-arch "local" image as "dev" and push it. + + `base_image` and `target_registry` both contain information about the target + Docker registry. Docker image registry address in `local_base_image` name + is ignored when pushing, instead the `target_registry` param provides a + Docker image registry address to push to. + + :param ctx: invoke context + :param version: version to tag the image and code with + :param local_base_image: base name of a local image, + e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param target_registry: target Docker image registry to push the image to + - "dockerhub.causify": public Causify Docker image registry + - "aws_ecr.ck": private AWS CK ECR + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + source_stage = "local" + target_stage = "dev" + _docker_tag_and_push_multi_arch_image( + ctx, + version, + local_base_image, + target_registry, + container_dir_name, + source_stage, + target_stage, + ) + + +# TODO(gp): This needs to be merged with docker_release_dev_image. +@task +def docker_release_multi_build_dev_image( # type: ignore + ctx, + version, + cache=True, + poetry_mode="update", + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=False, + qa_tests=True, + # TODO(Grisha): use iterable values, see + # https://docs.pyinvoke.org/en/stable/concepts/invoking-tasks.html#iterable-flag-values + # target_registries=... + target_registries=_DEFAULT_TARGET_REGISTRY, + container_dir_name=".", +): + """ + Build, test, and release the latest multi-arch "dev" image. + + :param version: version to tag the image and code with + :param cache: use the cache + :param skip_tests: skip all the tests and release the dev image + :param fast_tests: run fast tests, unless all tests skipped + :param slow_tests: run slow tests, unless all tests skipped + :param superslow_tests: run superslow tests, unless all tests + skipped + :param qa_tests: run QA tests (e.g., end-to-end linter tests) + :param poetry_mode: update package dependencies using poetry + :param target_registries: comma separated list of target Docker + image registries to push the image to. E.g., + "aws_ecr.ck,dockerhub.causify". See `docker_login()` for + details. + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + target_registries = target_registries.split(",") + # 1) Build "local" image remotely in the CK AWS ECR registry and pull once + # it is built. + docker_build_local_image( + ctx, + version, + cache=cache, + poetry_mode=poetry_mode, + container_dir_name=container_dir_name, + multi_arch="linux/amd64,linux/arm64", + ) + # Run resolve after `docker_build_local_image` so that a proper check + # for subsequent version can be made in case `FROM_CHANGELOG` token + # is used. + dev_version = _get_dev_version(version, container_dir_name) + # 2) Run tests for the "local" image. + # 3) Run QA tests using the local version of an image. + # Use the local image because it is not possible to tag a multi-arch + # image as dev without releasing (pushing) it. + # The difference between a local and a dev image is just a tag. + stage = "local" + _run_tests( + ctx, + stage, + dev_version, + skip_tests=skip_tests, + fast_tests=fast_tests, + slow_tests=slow_tests, + superslow_tests=superslow_tests, + qa_tests=qa_tests, + ) + # 4) Tag the image as dev image and push it to the target registries. + for target_registry in target_registries: + docker_tag_push_multi_build_local_image_as_dev( + ctx, + version=dev_version, + target_registry=target_registry, + container_dir_name=container_dir_name, + ) + _LOG.info("==> SUCCESS <==") + + +# ############################################################################# +# Prod image flow: +# ############################################################################# +# - Prod image has no release candidate +# - Start from a Dev image already built and qualified +# - The prod image is created from the dev image by copying the code inside the +# image +# - The prod image is tagged as "prod" +# The prod flow doesn't support multi-arch because we only run on x86 in prod. + + +@task +def docker_build_prod_image( # type: ignore + ctx, + version, + cache=True, + base_image="", + candidate=False, + user_tag="", + container_dir_name=".", + tag=None, +): + """ + Build a prod image from a dev image. + + :param version: version to tag the image and code with + :param cache: note that often the prod image is just a copy of the + dev image so caching makes no difference + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param candidate: build a prod image with a tag format: prod-{hash} + where hash is the output of `hgit.get_head_hash()` + :param user_tag: the name of the user building the candidate image + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + # Prepare `.dockerignore`. + docker_ignore = "devops/docker_build/dockerignore.prod" + _prepare_docker_ignore(ctx, docker_ignore) + # TODO(gp): We should do a `i git_clean` to remove artifacts and check that + # the client is clean so that we don't release from a dirty client. + # Build prod image. + if candidate: + # For candidate prod images which need to be tested on the AWS infra add + # a hash identifier. + latest_version = None + image_versioned_prod = hlitadoc.get_image( + base_image, "prod", latest_version + ) + if not tag: + head_hash = hgit.get_head_hash(short_hash=True) + else: + head_hash = tag + # Add username to the prod image name. + if user_tag: + image_versioned_prod += f"-{user_tag}" + # Add head hash to the prod image name. + image_versioned_prod += f"-{head_hash}" + + else: + image_versioned_prod = hlitadoc.get_image( + base_image, "prod", prod_version + ) + # + dockerfile = "devops/docker_build/prod.Dockerfile" + dockerfile = _to_abs_path(dockerfile) + # + # TODO(gp): Use to_multi_line_cmd() + opts = "--no-cache" if not cache else "" + # Use dev version for building prod image. + dev_version = hlitadoc.to_dev_version(prod_version) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + hdbg.dassert( + not hgit.is_inside_submodule(), + "The build should be run from a super repo, not a submodule.", + ) + git_root_dir = hgit.find_git_root() + # TODO(heanh): Expose the build context to the interface and use `git_root_dir` by default. + cmd = rf""" + DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ + time \ + docker build \ + {opts} \ + --tag {image_versioned_prod} \ + --file {dockerfile} \ + --build-arg VERSION={dev_version} \ + --build-arg ECR_BASE_PATH={os.environ["CSFY_ECR_BASE_PATH"]} \ + --build-arg IMAGE_NAME={image_name} \ + {git_root_dir} + """ + hlitauti.run(ctx, cmd) + if candidate: + _LOG.info("Head hash: %s", head_hash) + _list_image(ctx, image_versioned_prod) + else: + # Tag versioned image as latest prod image. + latest_version = None + image_prod = hlitadoc.get_image(base_image, "prod", latest_version) + cmd = f"docker tag {image_versioned_prod} {image_prod}" + hlitauti.run(ctx, cmd) + # + _list_image(ctx, image_prod) + + +@task +def docker_build_multi_arch_prod_image( # type: ignore + ctx, + version, + cache=True, + base_image="", + user_tag="", + container_dir_name=".", + tag=None, + multi_arch="linux/amd64,linux/arm64", +): + """ + Build a multi arch. versioned prod image from a dev image. For e.g.: we + have the dev image `helpers:dev-1.0.0` and we want to build a prod image + `helpers:prod-1.0.0`. + + :param version: version to tag the image and code with + :param cache: note that often the prod image is just a copy of the + dev image so caching makes no difference + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param user_tag: the name of the user building the candidate image + :param container_dir_name: directory where the Dockerfile is located + :param multi_arch: comma separated list of target architectures to + build the image for. E.g., `linux/amd64,linux/arm64` + """ + hlitauti.report_task(container_dir_name=container_dir_name) + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + # Prepare `.dockerignore`. + docker_ignore = "devops/docker_build/dockerignore.prod" + _prepare_docker_ignore(ctx, docker_ignore) + # TODO(gp): We should do a `i git_clean` to remove artifacts and check that + # the client is clean so that we don't release from a dirty client. + # Build prod image. + image_versioned_prod = hlitadoc.get_image(base_image, "prod", prod_version) + # Prepare the build. + dockerfile = "devops/docker_build/prod.Dockerfile" + # Keep the relative path instead of an absolute path to ensure it matches + # files inside the tar stream and avoids file not found errors. + # dockerfile = _to_abs_path(dockerfile) + # + opts = "--no-cache" if not cache else "" + # Use dev version for building prod image. + dev_version = hlitadoc.to_dev_version(prod_version) + build_args = [ + ("VERSION", dev_version), + ("ECR_BASE_PATH", os.environ["CSFY_ECR_BASE_PATH"]), + ] + build_args = " ".join(f"--build-arg {k}={v}" for k, v in build_args) + # Login to AWS ECR because for multi-arch we need to build the local + # image remotely. + hlitadoc.docker_login(ctx) + _create_multiarch_builder(ctx) + _build_multi_arch_image( + ctx, opts, multi_arch, build_args, image_versioned_prod, dockerfile + ) + # TODO(sandeep): If possible, switch to hlitadoc._docker_pull(). + # Pull the image from registry after building. + cmd = f"docker pull {image_versioned_prod}" + hlitauti.run(ctx, cmd) + _list_image(ctx, image_versioned_prod) + + +@task +def docker_tag_push_multi_arch_prod_image( # type: ignore + ctx, + version, + base_image="", + target_registry=_DEFAULT_TARGET_REGISTRY, + container_dir_name=".", +): + """ + Mark the multi-arch versioned "prod" image as "prod" and push them to the + target registry. + + `base_image` and `target_registry` both contain information about the target + Docker registry. + + :param ctx: invoke context + :param version: version to tag the image and code with + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param target_registry: target Docker image registry to push the image to + - "dockerhub.causify": public Causify Docker image registry + - "aws_ecr.ck": private AWS CK ECR + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + source_stage = "prod" + target_stage = "prod" + _docker_tag_and_push_multi_arch_image( + ctx, + version, + base_image, + target_registry, + container_dir_name, + source_stage, + target_stage, + ) + + +# TODO(gp): Can we merge this with docker_push_prod_image? +@task +def docker_push_prod_candidate_image( # type: ignore + ctx, + candidate, + base_image="", + container_dir_name=".", +): + """ + (ONLY CI/CD) Push the "prod" candidate image to ECR. + + :param ctx: invoke context + :param candidate: hash of the candidate prod image to push + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # + hlitadoc.docker_login(ctx) + # Push image with tagged with a hash ID. + image_versioned_prod = hlitadoc.get_image(base_image, "prod", None) + cmd = f"docker push {image_versioned_prod}-{candidate}" + hlitauti.run(ctx, cmd, pty=True) + + +@task +# TODO(Vlad): Add the release flow with the multi-arch support. +# See HelpersTask339. +def docker_release_prod_image( # type: ignore + ctx, + version, + cache=True, + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=False, + qa_tests=True, + push_to_repo=True, + container_dir_name=".", +): + """ + Build, test, and release to ECR the prod image. + + - Build prod image + - Run the tests + - Push the prod image repo + + :param ctx: invoke context + :param version: version to tag the image and code with + :param cache: use the cache + :param skip_tests: skip all the tests and release the dev image + :param fast_tests: run fast tests, unless all tests skipped + :param slow_tests: run slow tests, unless all tests skipped + :param superslow_tests: run superslow tests, unless all tests skipped + :param qa_tests: run QA tests (e.g., end-to-end linter tests) + :param push_to_repo: push the image to the repo_short_name + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + # 1) Build prod image. + docker_build_prod_image( + ctx, + cache=cache, + version=prod_version, + container_dir_name=container_dir_name, + ) + # 2) Run tests. + if skip_tests: + _LOG.warning("Skipping all tests and releasing") + fast_tests = slow_tests = superslow_tests = False + stage = "prod" + if fast_tests: + hlitapyt.run_fast_tests(ctx, stage=stage, version=prod_version) + if slow_tests: + hlitapyt.run_slow_tests(ctx, stage=stage, version=prod_version) + if superslow_tests: + hlitapyt.run_superslow_tests(ctx, stage=stage, version=prod_version) + # 3) Run QA tests using the local version of the prod image before pushing + # it to ECR. + if qa_tests: + hlitapyt.run_qa_tests(ctx, stage=stage, version=prod_version) + # 4) Push prod image. + if push_to_repo: + docker_push_prod_image( + ctx, version=prod_version, container_dir_name=container_dir_name + ) + else: + _LOG.warning("Skipping pushing image to repo_short_name as requested") + _LOG.info("==> SUCCESS <==") + + +@task(iterable=["docker_registry"]) +def docker_release_multi_arch_prod_image( + ctx, + version, + cache=True, + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=False, + qa_tests=True, + docker_registry=None, + container_dir_name=".", +): + """ + Build, test, and release to Docker registries the multi-arch prod image. + :param ctx: invoke context + :param version: version to tag the image and code with + :param cache: use the cache + :param skip_tests: skip all the tests + :param fast_tests: run fast tests, unless all tests skipped + :param slow_tests: run slow tests, unless all tests skipped + :param superslow_tests: run superslow tests, unless all tests skipped + :param qa_tests: run QA tests (e.g., end-to-end linter tests) + :param docker_registry: list of Docker image registries to push the image to + :param container_dir_name: directory where the Dockerfile is located + Example usage: + > invoke docker_release_multi_arch_prod_image \ + --version 1.2.0 + --docker-registry dockerhub.causify \ + --docker-registry aws_ecr.ck + """ + hlitauti.report_task() + # The default value for iterative task parameter will be an empty list. + # https://docs.pyinvoke.org/en/stable/concepts/invoking-tasks.html#iterable-flag-values + if len(docker_registry) == 0: + docker_registry = [_DEFAULT_TARGET_REGISTRY] + _LOG.warning( + "No Docker registries provided, using default: %s", docker_registry + ) + # 1) Build prod image. + docker_build_multi_arch_prod_image( + ctx, + version, + cache=cache, + container_dir_name=container_dir_name, + multi_arch="linux/amd64,linux/arm64", + ) + # 2) Run tests. + stage = "prod" + _run_tests( + ctx, + stage, + version, + skip_tests=skip_tests, + fast_tests=fast_tests, + slow_tests=slow_tests, + superslow_tests=superslow_tests, + qa_tests=qa_tests, + ) + # 3) Push prod image. + for registry in docker_registry: + docker_tag_push_multi_arch_prod_image( + ctx, + version=version, + target_registry=registry, + container_dir_name=container_dir_name, + ) + _LOG.info("==> SUCCESS <==") + + +# # TODO(gp): Useless IMO. +@task +def docker_release_all(ctx, version, container_dir_name="."): # type: ignore + """ + (ONLY CI/CD) Release both dev and prod image to ECR. + + This includes: + - docker_release_dev_image + - docker_release_prod_image + + :param version: version to tag the image and code with + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task() + docker_release_dev_image(ctx, version, container_dir_name=container_dir_name) + docker_release_prod_image( + ctx, version, container_dir_name=container_dir_name + ) + _LOG.info("==> SUCCESS <==") + + +@task +def docker_rollback_dev_image( # type: ignore + ctx, + version, + push_to_repo=True, +): + """ + Rollback the version of the dev image. + + Phases: + 1) Ensure that version of the image exists locally + 2) Promote versioned image as dev image + 3) Push dev image to the repo + + :param ctx: invoke context + :param version: version to tag the image and code with + :param push_to_repo: push the image to the ECR repo + """ + hlitauti.report_task() + stage = "dev" + _docker_rollback_image( + ctx, + base_image="", + stage=stage, + version=version, + push_to_repo=push_to_repo, + ) + _LOG.info("==> SUCCESS <==") + + +@task +def docker_rollback_prod_image( # type: ignore + ctx, + version, + push_to_repo=True, +): + """ + Rollback the version of the prod image. + + Same as parameters and meaning as `docker_rollback_dev_image`. + """ + hlitauti.report_task() + stage = "prod" + _docker_rollback_image( + ctx, + base_image="", + stage=stage, + version=version, + push_to_repo=push_to_repo, + ) + _LOG.info("==> SUCCESS <==") + + +def _check_workspace_dir_sizes() -> None: + """ + Check if user doesn't have large files/directories in their workspace. + + Use-case is running the function before building a candidate image. + Large files significanty slow dwon image creation and subsequent + pulling. Overtime it also increases costs of ECR usage. + """ + # Execute system command and split into a list of tuples [size, dir]. + # Threshold is chosen heuristically according to current repo dir sizes. + git_root = hgit.find_git_root() + with hsystem.cd(git_root): + fs_item_max_threshold = "200M" + directory_size_list = hsystem.system_to_string( + f"du --threshold {fs_item_max_threshold} -hs $(ls -A) | sort -hr" + )[1].split("\n") + # Filter out directories ignored by `dockerignore.prod` + "amp/" + # as submodule. + ignored_dirs = [ + "amp", + "ck.infra", + "amp/ck.infra", + "docs", + ".git", + "amp/.git", + ] + offending_items = [ + it.replace("\t", " ") + for it in directory_size_list + if it.split("\t")[1] not in ignored_dirs + ] + hdbg.dassert( + len(offending_items) == 0, + ( + "Your workspace contains one or more files/directories " + f"larger than {fs_item_max_threshold} move " + f"or delete the items:\n\t {offending_items}" + ), + ) + + +@task +def docker_create_candidate_image(ctx, container_dir_name=".", user_tag=""): # type: ignore + """ + Create new prod candidate image and update the specified ECS task + definition such that the Image URL specified in container definition points + to the new candidate image. + + :param task_definition: the name of the ECS task definition for + which an update to container image URL is made, e.g. cmamp-test + :param container_dir_name: the runnable dir path (e.g. + `./ck.infra/`) + :param user_tag: the name of the user creating the image, empty + parameter means the command was run via gh actions + :param region: AWS Region, for Tokyo region specify 'ap-northeast-1' + :return: the tag used for the image + """ + _check_workspace_dir_sizes() + # Get the hash of the image. + tag = hgit.get_head_hash(".", short_hash=True) + if user_tag: + # Add user name to the candidate tag. + tag = f"{user_tag}-{tag}" + # Create new prod image. + docker_build_prod_image( + ctx, + container_dir_name=container_dir_name, + version=hlitadoc._IMAGE_VERSION_FROM_CHANGELOG, + candidate=True, + tag=tag, + ) + # Push candidate image. + docker_push_prod_candidate_image(ctx, tag) + return tag + + +# ############################################################################# +# ECS task definition workflows. +# ECS task definition is a wrapper around a container definition. +# ############################################################################# + + +@task +def docker_release_test_task_definition( + ctx, + task_definition: Optional[str] = None, + user_tag: Optional[str] = None, + region: str = hs3.AWS_EUROPE_REGION_1, +): # type: ignore + """ + Release candidate image to test ECS task definition. + + :param region: region to create the task definition in + """ + hdbg.dassert_in(region, hs3.AWS_REGIONS) + # Verify that task definition is provided. + hdbg.dassert_is_not(task_definition, None, "task definition is required") + # Create candidate image. + current_dir = os.getcwd() + image_tag = docker_create_candidate_image(ctx, current_dir, user_tag) + # Update ECS task definition with new image URL. + hlitaaws.aws_update_ecs_task_definition( + task_definition=task_definition, + image_tag=image_tag, + region=region, + environment="test", + ) + + +@task +def docker_release_preprod_task_definition( + ctx, region: str = hs3.AWS_EUROPE_REGION_1 +): # type: ignore + """ + Release candidate image to preprod ECS task definition. + + :param region: region to create the task definition in + """ + hdbg.dassert_in(region, hs3.AWS_REGIONS) + # Preprod release should be done from master branch and the client should be + # clean. + curr_branch = hgit.get_branch_name() + hdbg.dassert_eq( + curr_branch, "master", msg="You should release from master branch" + ) + _ = hgit.is_client_clean(abort_if_not_clean=True) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-preprod" + # Create candidate image. + current_dir = os.getcwd() + image_tag = docker_create_candidate_image(ctx, current_dir) + # Update ECS task definition with new image URL. + hlitaaws.aws_update_ecs_task_definition( + task_definition=task_definition_name, + image_tag=image_tag, + region=region, + environment="preprod", + ) + + +@task +def docker_release_prod_task_definition(ctx, region: str = hs3.AWS_US_REGION_1): # type: ignore + """ + Release candidate image to prod ECS task definition. + + :param region: region to create the task definition in + """ + hdbg.dassert_in(region, hs3.AWS_REGIONS) + # Prod release should be done from master branch and the client should be + # clean. + curr_branch = hgit.get_branch_name() + hdbg.dassert_eq( + curr_branch, "master", msg="You should release from master branch" + ) + _ = hgit.is_client_clean(abort_if_not_clean=True) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-prod" + # Create candidate image. + current_dir = os.getcwd() + image_tag = docker_create_candidate_image(ctx, current_dir) + # Update ECS task definition with new image URL. + hlitaaws.aws_update_ecs_task_definition( + task_definition=task_definition_name, + image_tag=image_tag, + region=region, + environment="prod", + ) + + +@task +def copy_ecs_task_definition_image_url(ctx, src_task_def, dst_task_def): # type: ignore + """ + Copy image URL from one task definition to another. + + Currently the implementation assumes the source region is Stockholm + and destination #TODO(Juraj): Because this is the configuration we + need at the moment. + + :param src_task_def: source ECS task definition (located in eu- + north-1) + :param dst_task_def: destination ECS task definition (located in ap- + northeast-1) + """ + # TODO(Vlad): Import locally to avoid redundant dependencies. + # See for detals: https://github.com/cryptokaizen/cmamp/issues/8086. + import helpers.haws as haws + + # + _ = ctx + src_image_url = haws.get_task_definition_image_url( + src_task_def, region=hs3.AWS_EUROPE_REGION_1 + ) + # We have cross-region replication enabled in ECR, all images live in both regions. + dst_image_url = src_image_url.replace( + hs3.AWS_EUROPE_REGION_1, hs3.AWS_TOKYO_REGION_1 + ) + haws.update_task_definition( + dst_task_def, dst_image_url, region=hs3.AWS_TOKYO_REGION_1 + ) + + +# TODO(gp): This might become obsolete. +@task +def docker_update_prod_task_definition( + ctx, version, preprod_tag, airflow_dags_s3_path, task_definition +): # type: ignore + """ + Update image in prod task definition to the desired version. + + :param version: latest version from `changelog.txt` or custom one (e.g., `1.1.1`) + :param preprod_tag: image that will be re-tagged with prod version + e.g., `preprod-d8sf76s` -> `prod-1.1.1` + :param airflow_dags_s3_path: S3 bucket from which airflow will load DAGs + :param task_definition: which ECS task definition to use + currently our prod ECS task definitions match short name of repos. + """ + # TODO(Nikola): Convert `haws` part to script so it can be called via `docker_cmd`. + # https://github.com/cryptokaizen/cmamp/pull/2594/files#r948551787 + import helpers.haws as haws + + # + # TODO(Nikola): Use env var for CK profile. + s3fs_ = hs3.get_s3fs(aws_profile="ck") + super_module = not hgit.is_inside_submodule() + # Prepare params for listing DAGs. + root_dir = hgit.get_client_root(super_module) + dags_path = [root_dir, "datapull", "airflow", "dags"] + if super_module and hgit.is_amp_present(): + # Main DAGs location is always in `cmamp`. + dags_path.insert(1, "amp") + dir_name = os.path.join(*dags_path) + pattern = "preprod.*.py" + only_files = True + use_relative_paths = False + # List preprod DAGs. + dag_paths = hs3.listdir(dir_name, pattern, only_files, use_relative_paths) + for dag_path in dag_paths: + # Abort in case one of the preprod DAGs is out of sync. + _, dag_name = os.path.split(dag_path) + hdbg.dassert_eq( + hs3.from_file(dag_path), + s3fs_.cat(airflow_dags_s3_path + dag_name).decode(), + msg=f"Preprod file `{dag_name}` is out of sync with `{airflow_dags_s3_path}`!", + ) + # Prepare params to compose new prod image url. + prod_version = hlitadoc.resolve_version_value(version) + base_image = "" + stage = "prod" + # Compose new prod image url. + new_prod_image_url = hlitadoc.get_image(base_image, stage, prod_version) + version = None + new_prod_image_url_no_version = hlitadoc.get_image( + base_image, stage, version + ) + # Check if preprod tag exist in preprod task definition as precaution. + preprod_task_definition_name = f"{task_definition}-preprod" + preprod_image_url = haws.get_task_definition_image_url( + preprod_task_definition_name + ) + preprod_tag_from_image = preprod_image_url.split(":")[-1] + msg = ( + f"Preprod tag is different in the image url `{preprod_tag_from_image}`!" + ) + hdbg.dassert_eq(preprod_tag_from_image, preprod_tag, msg=msg) + # Pull preprod image for re-tag. + hlitadoc.docker_login(ctx) + cmd = f"docker pull {preprod_image_url}" + hlitauti.run(ctx, cmd) + # Re-tag preprod image to prod. + cmd = f"docker tag {preprod_image_url} {new_prod_image_url}" + hlitauti.run(ctx, cmd) + cmd = f"docker tag {preprod_image_url} {new_prod_image_url_no_version}" + hlitauti.run(ctx, cmd) + cmd = f"docker rmi {preprod_image_url}" + hlitauti.run(ctx, cmd) + # Get original prod image for potential rollback. + original_prod_image_url = haws.get_task_definition_image_url(task_definition) + # Track successful uploads for potential rollback. + successful_uploads = [] + try: + # Update prod task definition to the latest prod tag. + haws.update_task_definition( + task_definition, new_prod_image_url, environment="prod" + ) + # Add prod DAGs to airflow s3 bucket after all checks are passed. + for dag_path in dag_paths: + # Update prod DAGs. + _, dag_name = os.path.split(dag_path) + prod_dag_name = dag_name.replace("preprod.", "prod.") + dag_s3_path = airflow_dags_s3_path + prod_dag_name + s3fs_.put(dag_path, dag_s3_path) + _LOG.info("Successfully uploaded `%s`!", dag_s3_path) + successful_uploads.append(dag_s3_path) + # Upload new tag to ECS. + docker_push_prod_image(ctx, prod_version) + except Exception as ex: + _LOG.info("Rollback started!") + # Rollback prod task definition image URL. + haws.update_task_definition( + task_definition, original_prod_image_url, environment="prod" + ) + _LOG.info( + "Reverted prod task definition image url to `%s`!", + original_prod_image_url, + ) + # Notify for potential rollback for airflow S3 bucket, if any. + if successful_uploads: + _LOG.warning("Starting S3 rollback!") + # Prepare bucket resource. + s3 = haws.get_service_resource(aws_profile="ck", service_name="s3") + bucket_name, _ = hs3.split_path(airflow_dags_s3_path) + if hasattr(s3, "Bucket"): + bucket = s3.Bucket(bucket_name) + else: + # We'll need to handle this differently since client doesn't + # have object_versions. + raise NotImplementedError( + "S3 resource Bucket attribute not available, fallback implementation needed" + ) + for successful_upload in successful_uploads: + # TODO(Nikola): Maybe even Telegram notification? + # Rollback successful upload. + _, prefix = hs3.split_path(successful_upload) + prefix = prefix.lstrip(os.sep) + versions = sorted( + bucket.object_versions.filter(Prefix=prefix), + key=attrgetter("last_modified"), + reverse=True, + ) + latest_version = versions[0] + latest_version.delete() + _LOG.info("Deleted version `%s`.", latest_version.version_id) + if len(versions) > 1: + rollback_version = versions[1] + _LOG.info( + "Active version is now `%s`!", + rollback_version.version_id, + ) + elif len(versions) == 1: + _LOG.info( + "Deleted version was also the only version. Nothing to rollback." + ) + else: + # TODO(Nikola): Do we need custom exception? + raise NotImplementedError + s3_rollback_message = ( + f"S3 uploads reverted: {successful_uploads}" + if successful_uploads + else "No S3 uploads." + ) + _LOG.info("Rollback completed! %s", s3_rollback_message) + raise ex + + +@task +def docker_build_frontend_feature_image( + ctx, + stage, + dev_image_version=None, + app_version=None, +): + """ + Build frontend image for releasing the features. + + :param stage: stage to release the image + :param dev_image_version: base dev image version to use + :param app_version: app version for feature releases + """ + hdbg.dassert_in(stage, ["test", "preprod", "prod"]) + # Get changelog paths. + current_dir = os.getcwd() + # Get image and app version. + if not dev_image_version: + dev_image_version = hversio.get_changelog_version(current_dir) + if not app_version: + errors = [] + # Here we assume FE has its own runnable dir or the app changelog file + # is inside `app` dir of a parent runnable dir. + for file_name in [ + "app_changelog.txt", + os.path.join("app", "app_changelog.txt"), + ]: + try: + app_version = hversio.get_changelog_version( + current_dir, file_name=file_name + ) + break + except AssertionError as e: + errors.append(str(e)) + else: + raise FileNotFoundError( + f"App changelog file not found. Provide app version explicitly. Errors: {errors}" + ) + # Set ECR base path. + if stage in ("test", "preprod"): + ecr_base_path = "623860924167.dkr.ecr.eu-north-1.amazonaws.com" + else: + ecr_base_path = "726416904550.dkr.ecr.us-east-1.amazonaws.com" + # Set prod docker file name. + dockerfile = "devops/docker_build/prod.Dockerfile" + dockerfile = _to_abs_path(dockerfile) + # Set image tag. + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + image_tag = f"{ecr_base_path}/{image_name}:{stage}-{app_version}" + git_root_dir = hgit.find_git_root() + # Docker build command. + cmd = rf""" + docker build --no-cache \ + --file {dockerfile} \ + --build-arg VERSION={dev_image_version} \ + --build-arg ECR_BASE_PATH={ecr_base_path} \ + --build-arg IMAGE_NAME={image_name} \ + --tag {image_tag} \ + {git_root_dir} + """ + hlitauti.run(ctx, cmd) + _list_image(ctx, image_tag) + + +# ############################################################################# +# Test dev image flow +# ############################################################################# + + +@task +def docker_build_test_dev_image( # type: ignore + ctx, + assignee="", + reviewers="", + container_dir_name=".", +): + """ + Automate the complete periodic release workflow for the dev image. + + This task performs: + 1) Bump version (e.g., 2.2.0 -> 2.3.0) + 2) Get release team members + 3) Create branch with date-based name + 4) Build image locally with the bumped version number + 5) Run tests (fast, slow, superslow) + 6) Add changelog entry for the release + 7) Stage poetry.lock and pip_list.txt files + 8) Commit changes with versioned message + 9) Push changes + 10) Create PR + 11) Tag and push image to GHCR + + :param ctx: invoke context + :param assignee: GitHub username to assign the PR to + :param reviewers: GitHub username(s) to request PR review. If not + specified, uses the release team members from GitHub team + configured in repo_config.yaml + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # 1) Bump version. + _LOG.info("Step 1: Bumping version") + current_version = hversio.get_changelog_version(container_dir_name) + hdbg.dassert(current_version, "Could not find current version in changelog") + _LOG.info("Current version: %s", current_version) + version = hversio.bump_version(current_version, bump_type="minor") + _LOG.info("Bumped version: %s -> %s", current_version, version) + # 2) Get release team members. + _LOG.info("Step 2: Getting release team members") + if not reviewers: + release_team_name = hrecouti.get_repo_config().get_release_team() + # Get team members from GitHub team. + team_members = hlitagh.gh_get_team_member_names(release_team_name) + reviewers = ",".join(team_members) + _LOG.info("Release team '%s' members: %s", release_team_name, reviewers) + # 3) Create branch with date-based name. + _LOG.info("Step 3: Creating branch with date-based name") + issue_prefix = hrecouti.get_repo_config().get_issue_prefix() + # Get current date in YYYYMMDD format. + today = datetime.date.today().strftime("%Y%m%d") + branch_name = f"{issue_prefix}_Periodic_image_release_{today}" + _LOG.info("Branch name: %s", branch_name) + cmd = f"git checkout -b {branch_name}" + hlitauti.run(ctx, cmd) + # 4) Build image locally. + _LOG.info("Step 4: Building local image with version %s", version) + docker_build_local_image( + ctx, + version=version, + cache=True, + poetry_mode="update", + container_dir_name=container_dir_name, + ) + # 5) Run tests. + _LOG.info("Step 5: Running tests") + dev_version = _get_dev_version(version, container_dir_name) + stage = "dev" + _run_tests( + ctx, + stage, + dev_version, + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=True, + qa_tests=False, + ) + # 6) Add changelog entry. + _LOG.info("Step 6: Adding changelog entry") + supermodule = True + root_dir = hversio._get_client_root(supermodule) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + changelog_file = os.path.join(root_dir, container_dir_name, "changelog.txt") + hdbg.dassert_file_exists(changelog_file) + # Read the current changelog. + changelog_content = hio.from_file(changelog_file) + # Prepare new entry. + today = datetime.date.today().strftime("%Y-%m-%d") + new_entry = f"""# {image_name}-{version} +- {today} +- Periodic release: {today} + +""" + # Prepend new entry to changelog. + updated_changelog = new_entry + changelog_content + # Write back to file. + hio.to_file(changelog_file, updated_changelog) + _LOG.info("Added changelog entry for version %s", version) + # 7) Stage files. + _LOG.info("Step 7: Staging files") + # Fix git permissions in CI to avoid "insufficient permission" errors. + if hserver.is_inside_ci(): + _LOG.info("Running in CI, fixing git permissions") + cmd = "sudo chmod -R 777 .git/objects/" + hlitauti.run(ctx, cmd) + files_to_stage = [ + "devops/docker_build/poetry.lock", + "devops/docker_build/pip_list.txt", + "changelog.txt", + ] + for file_path in files_to_stage: + full_path = os.path.join(root_dir, container_dir_name, file_path) + if os.path.exists(full_path): + cmd = f"git add {full_path}" + hlitauti.run(ctx, cmd) + _LOG.info("Staged %s", full_path) + else: + _LOG.warning("File not found, skipping: %s", full_path) + # 8) Commit changes. + _LOG.info("Step 8: Committing changes") + commit_message = f"Poetry output from the v{version} build" + # --no-verify to skip pre-commit checks since the `poetry.lock` file is + # too big and the `check_file_size` is failed. + cmd = f'git commit -m "{commit_message}" --no-verify' + hlitauti.run(ctx, cmd) + # 9) Push changes. + _LOG.info("Step 9: Pushing changes") + cmd = f"git push origin {branch_name}" + hlitauti.run(ctx, cmd) + # 10) Create PR. + _LOG.info("Step 10: Creating pull request") + pr_body = f"- Periodic release of {image_name} dev image version {version}" + label = _AUTO_RELEASE_LABEL + hlitagh.gh_create_pr( + ctx, + body=pr_body, + draft=False, + reviewer=reviewers, + labels=label, + assignee=assignee, + ) + _LOG.info("PR submitted for branch %s", branch_name) + # 11) Tag and push to GHCR. + _LOG.info("Step 11: Tagging and pushing image to GHCR") + # Get GHCR base image path from repo config. + ghcr_base = hrecouti.get_repo_config().get_container_registry_url("ghcr") + ghcr_image_name = hrecouti.get_repo_config().get_docker_base_image_name() + ghcr_base_image = f"{ghcr_base}/{ghcr_image_name}" + _LOG.info("GHCR base image: %s", ghcr_base_image) + # Get local image name. + local_stage = "local" + image_local = hlitadoc.get_image("", local_stage, dev_version) + # Tag local image as versioned GHCR dev image (e.g., ghcr.io/causify-ai/csfy:dev-2.3.0). + ghcr_image_versioned = f"{ghcr_base_image}:dev-{version}" + cmd = f"docker tag {image_local} {ghcr_image_versioned}" + hlitauti.run(ctx, cmd) + _LOG.info("Tagged as versioned GHCR dev image: %s", ghcr_image_versioned) + # Push versioned GHCR dev image. + cmd = f"docker push {ghcr_image_versioned}" + hlitauti.run(ctx, cmd, pty=True) + _LOG.info("Pushed versioned GHCR dev image: %s", ghcr_image_versioned) + _LOG.info("==> SUCCESS <==") + + +@task +def docker_tag_push_dev_image( + ctx, + version="", + base_image="", + target_registries="ghcr,ecr", + container_dir_name=".", + dry_run=False, +): + """ + Pulls a versioned dev image from a base registry, then tags and pushes + it to the specified target registries (both as versioned and latest). + + :param ctx: invoke context + :param version: version to tag the image and code with. If empty, reads + from changelog + :param base_image: base image path to pull from (e.g., + ghcr.io/causify-ai/csfy). If empty, uses GHCR from repo config + :param target_registries: comma separated list of target Docker + image registries to push the image to. E.g., "ghcr,ecr". + See the `helpers.repo_config_utils.RepoConfig.get_container_registry_url()` + for supported registry names + :param container_dir_name: directory where the Dockerfile is located + :param dry_run: if True, only print the commands without executing + them + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # Get version. + if not version: + version = hversio.get_changelog_version(container_dir_name) + # Get base image if not provided. + if not base_image: + ghcr_base = hrecouti.get_repo_config().get_container_registry_url("ghcr") + ghcr_image_name = hrecouti.get_repo_config().get_docker_base_image_name() + base_image = f"{ghcr_base}/{ghcr_image_name}" + # Pull the image. + stage = "dev" + source_dev_image_versioned = hlitadoc.get_image(base_image, stage, version) + cmd = f"docker pull {source_dev_image_versioned}" + hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) + # Tag and push to target registries. + for registry in target_registries.split(","): + # Strip whitespace from registry name. + registry = registry.strip() + # Tag and push the image to the target registry as latest dev image. + target_base = hrecouti.get_repo_config().get_container_registry_url( + registry + ) + target_image_name = ( + hrecouti.get_repo_config().get_docker_base_image_name() + ) + target_base_image = f"{target_base}/{target_image_name}" + latest_version = None + target_dev_image_latest = hlitadoc.get_image( + target_base_image, stage, latest_version + ) + cmd = ( + f"docker tag {source_dev_image_versioned} {target_dev_image_latest}" + ) + hlitauti.run(ctx, cmd, dry_run=dry_run) + cmd = f"docker push {target_dev_image_latest}" + hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) + # Tag and push versioned dev image to target registry. + target_dev_image_versioned = hlitadoc.get_image( + target_base_image, stage, version + ) + cmd = f"docker tag {source_dev_image_versioned} {target_dev_image_versioned}" + hlitauti.run(ctx, cmd, dry_run=dry_run) + cmd = f"docker push {target_dev_image_versioned}" + hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py new file mode 100644 index 000000000..7c1c360a6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py @@ -0,0 +1,606 @@ +""" +Import as: + +import helpers.lib_tasks_find as hlitafin +""" + +import functools +import glob +import logging +import os +import re +from typing import Iterator, List, Optional, Tuple + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hlist as hlist +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# ############################################################################# +# Find test. +# ############################################################################# + + +def _find_test_files( + dir_name: Optional[str] = None, use_absolute_path: bool = False +) -> List[str]: + """ + Find all the files containing test code in `abs_dir`. + """ + dir_name = dir_name or "." + hdbg.dassert_dir_exists(dir_name) + _LOG.debug("abs_dir=%s", dir_name) + # Find all the file names containing test code. + _LOG.info("Searching from '%s'", dir_name) + path = os.path.join(dir_name, "**", "test_*.py") + _LOG.debug("path=%s", path) + file_names = glob.glob(path, recursive=True) + _LOG.debug("Found %d files: %s", len(file_names), str(file_names)) + hdbg.dassert_no_duplicates(file_names) + # Test files should always under a dir called `test`. + for file_name in file_names: + if "/old/" in file_name: + continue + if "/compute/" in file_name: + continue + hdbg.dassert_eq( + os.path.basename(os.path.dirname(file_name)), + "test", + "Test file '%s' needs to be under a `test` dir ", + file_name, + ) + hdbg.dassert_not_in( + "notebook/", + file_name, + "Test file '%s' should not be under a `notebook` dir", + file_name, + ) + # Make path relatives, if needed. + if use_absolute_path: + file_names = [os.path.abspath(file_name) for file_name in file_names] + # + file_names = sorted(file_names) + _LOG.debug("file_names=%s", file_names) + hdbg.dassert_no_duplicates(file_names) + return file_names + + +# TODO(gp): -> find_class since it works also for any class. +def _find_test_class( + class_name: str, file_names: List[str], exact_match: bool = False +) -> List[str]: + """ + Find test file containing `class_name` and report it in pytest format. + + E.g., for "TestLibTasksRunTests1" return + "test/test_lib_tasks.py::TestLibTasksRunTests1" + + :param exact_match: find an exact match or an approximate where `class_name` + is included in the class name + """ + # > jackpy TestLibTasksRunTests1 + # test/test_lib_tasks.py:60:class TestLibTasksRunTests1(hut.TestCase): + regex = r"^\s*class\s+(\S+)\s*\(" + _LOG.debug("regex='%s'", regex) + res: List[str] = [] + # Scan all the files. + for file_name in file_names: + _LOG.debug("file_name=%s", file_name) + txt = hio.from_file(file_name) + # Search for the class in each file. + for i, line in enumerate(txt.split("\n")): + # _LOG.debug("file_name=%s i=%s: %s", file_name, i, line) + # TODO(gp): We should skip ```, """, ''' + m = re.match(regex, line) + if m: + found_class_name = m.group(1) + _LOG.debug(" %s:%d -> %s", line, i, found_class_name) + if exact_match: + found = found_class_name == class_name + else: + found = class_name in found_class_name + if found: + res_tmp = f"{file_name}::{found_class_name}" + _LOG.debug("-> res_tmp=%s", res_tmp) + res.append(res_tmp) + res = sorted(list(set(res))) + return res + + +# TODO(gp): Extend this to accept only the test method. +# TODO(gp): Have a single `find` command with multiple options to search for different +# things, e.g., class names, test names, pytest_mark, ... +@task +def find_test_class( + ctx, class_name, dir_name=".", pbcopy=True, exact_match=False +): # type: ignore + """ + Report test files containing `class_name` in a format compatible with + pytest. + + :param class_name: the class to search + :param dir_name: the dir from which to search (default: .) + :param pbcopy: save the result into the system clipboard (only on + macOS) + """ + hlitauti.report_task(txt="class_name abs_dir pbcopy") + hdbg.dassert_ne(class_name, "", "You need to specify a class name") + _ = ctx + file_names = _find_test_files(dir_name) + res = _find_test_class(class_name, file_names, exact_match) + res = " ".join(res) + # Print or copy to clipboard. + hsystem.to_pbcopy(res, pbcopy) + + +# ////////////////////////////////////////////////////////////////////////////////// + + +@functools.lru_cache() +def _get_python_files(subdir: str) -> List[str]: + pattern = "*.py" + only_files = False + use_relative_paths = False + python_files = hio.listdir(subdir, pattern, only_files, use_relative_paths) + # Remove tmp files. + python_files = [f for f in python_files if not f.startswith("tmp")] + return python_files + + +# File, line number, line, info1, info2 +_FindResult = Tuple[str, int, str, str, str] +_FindResults = List[_FindResult] + + +def _scan_files(python_files: List[str]) -> Iterator: + for file_ in python_files: + _LOG.debug("file=%s", file_) + txt = hio.from_file(file_) + for line_num, line in enumerate(txt.split("\n")): + # TODO(gp): Skip commented lines. + # _LOG.debug("%s:%s line='%s'", file_, line_num, line) + yield file_, line_num, line + + +def _find_short_import(iterator: Iterator, short_import: str) -> _FindResults: + """ + Find imports in the Python files with the given short import. + + E.g., for dtfcodarun dataflow/core/test/test_builders.py:9:import + dataflow.core.dag_runner as dtfcodarun returns + """ + # E.g., + # `import dataflow.core.dag_runner as dtfcodarun` + regex = rf"import\s+(\S+)\s+as\s+({short_import})" + regex = re.compile(regex) + # + results: _FindResults = [] + for file_, line_num, line in iterator: + m = regex.search(line) + if m: + # E.g., + # dataflow/core/test/test_builders.py:9:import dataflow.core.dag_runner as dtfcodarun + _LOG.debug(" --> line:%s=%s", line_num, line) + long_import_txt = m.group(1) + short_import_txt = m.group(2) + full_import_txt = f"import {long_import_txt} as {short_import_txt}" + res = (file_, line_num, line, short_import_txt, full_import_txt) + # E.g., + _LOG.debug(" => %s", str(res)) + results.append(res) + return results + + +def _find_func_class_uses(iterator: Iterator, regex: str) -> _FindResults: + regexs = [] + # E.g., + # `dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)` + regexs.append(rf"\s+(\w+)\.(\w*{regex})\(") + # `dag_builder: dtfcodabui.DagBuilder` + regexs.append(rf":\s*(\w+)\.(\w*{regex})") + # + _LOG.debug("regexs=%s", str(regexs)) + regexs = [re.compile(regex_) for regex_ in regexs] + # + results: _FindResults = [] + for file_, line_num, line in iterator: + _LOG.debug("line='%s'", line) + m = None + for regex_ in regexs: + m = regex_.search(line) + if m: + # _LOG.debug("--> regex matched") + break + if m: + _LOG.debug(" --> line:%s=%s", line_num, line) + short_import_txt = m.group(1) + obj_txt = m.group(2) + res = (file_, line_num, line, short_import_txt, obj_txt) + # E.g., + # ('./helpers/lib_tasks.py', 10226, 'dtfsys', 'RealTimeDagRunner') + # ('./dataflow/core/test/test_builders.py', 70, 'dtfcodarun', 'FitPredictDagRunner') + # ('./dataflow/core/test/test_builders.py', 157, 'dtfcodarun', 'FitPredictDagRunner') + _LOG.debug(" => %s", str(res)) + results.append(res) + return results + + +def _process_find_results(results: _FindResults, how: str) -> List: + filtered_results: List = [] + if how == "remove_dups": + # Remove duplicates. + for result in results: + (_, _, _, info1, info2) = result + filtered_results.append((info1, info2)) + filtered_results = hlist.remove_duplicates(filtered_results) + filtered_results = sorted(filtered_results) + elif how == "all": + filtered_results = sorted(results) + else: + raise ValueError(f"Invalid how='{how}'") + return filtered_results + + +@task +def find(ctx, regex, mode="all", how="remove_dups", subdir="."): # type: ignore + """ + Find symbols, imports, test classes and so on. + + Example: + ``` + > i find DagBuilder + ('dtfcodabui', 'DagBuilder') + ('dtfcore', 'DagBuilder') + ('dtfcodabui', 'import dataflow.core.dag_builder as dtfcodabui') + ('dtfcore', 'import dataflow.core as dtfcore') + ``` + + :param regex: function or class use to search for + :param mode: what to look for + - `symbol_import`: look for uses of function or classes + E.g., `DagRunner` + returns + ``` + ('cdataf', 'PredictionDagRunner') + ('cdataf', 'RollingFitPredictDagRunner') + ``` + - `short_import`: look for the short import + E.g., `'dtfcodabui' + returns + ``` + ('dtfcodabui', 'import dataflow.core.dag_builder as dtfcodabui') + ``` + :param how: how to report the results + - `remove_dups`: report only imports and calls that are the same + """ + hlitauti.report_task(txt=hprint.to_str("regex mode how subdir")) + _ = ctx + # Process the `where`. + python_files = _get_python_files(subdir) + iter_ = _scan_files(python_files) + # Process the `what`. + if mode == "all": + for mode_tmp in ("symbol_import", "short_import"): + find(ctx, regex, mode=mode_tmp, how=how, subdir=subdir) + return + if mode == "symbol_import": + results = _find_func_class_uses(iter_, regex) + filtered_results = _process_find_results(results, "remove_dups") + print("\n".join(map(str, filtered_results))) + # E.g., + # ('cdataf', 'PredictionDagRunner') + # ('cdataf', 'RollingFitPredictDagRunner') + # Look for each short import. + results = [] + for short_import, _ in filtered_results: + iter_ = _scan_files(python_files) + results.extend(_find_short_import(iter_, short_import)) + elif mode == "short_import": + results = _find_short_import(iter_, regex) + else: + raise ValueError(f"Invalid mode='{mode}'") + # Process the `how`. + filtered_results = _process_find_results(results, how) + print("\n".join(map(str, filtered_results))) + + +# ############################################################################# +# Find test decorator. +# ############################################################################# + + +# TODO(gp): decorator_name -> pytest_mark +def _find_test_decorator( + decorator_name: str, file_names: List[str] +) -> List[str]: + """ + Find test files containing tests with a certain decorator + `@pytest.mark.XYZ`. + """ + hdbg.dassert_isinstance(file_names, list) + # E.g., + # @pytest.mark.slow(...) + # @pytest.mark.qa + string = f"@pytest.mark.{decorator_name}" + regex = rf"^\s*{re.escape(string)}\s*[\(]?" + _LOG.debug("regex='%s'", regex) + res: List[str] = [] + # Scan all the files. + for file_name in file_names: + _LOG.debug("file_name=%s", file_name) + txt = hio.from_file(file_name) + # Search for the class in each file. + for i, line in enumerate(txt.split("\n")): + # _LOG.debug("file_name=%s i=%s: %s", file_name, i, line) + # TODO(gp): We should skip ```, """, '''. We can add a function to + # remove all the comments, although we need to keep track of the + # line original numbers. + m = re.match(regex, line) + if m: + _LOG.debug(" -> found: %d:%s", i, line) + res.append(file_name) + # + res = sorted(list(set(res))) + return res + + +@task +def find_test_decorator(ctx, decorator_name="", dir_name="."): # type: ignore + """ + Report test files containing `class_name` in pytest format. + + :param decorator_name: the decorator to search + :param dir_name: the dir from which to search + """ + hlitauti.report_task() + _ = ctx + hdbg.dassert_ne(decorator_name, "", "You need to specify a decorator name") + file_names = _find_test_files(dir_name) + res = _find_test_decorator(decorator_name, file_names) + res = " ".join(res) + print(res) + + +# ############################################################################# +# Find / replace `check_string`. +# ############################################################################# + + +@task +def find_check_string_output( # type: ignore + ctx, class_name, method_name, as_python=True, fuzzy_match=False, pbcopy=True +): + """ + Find output of `check_string()` in the test running + class_name::method_name. + + E.g., for `TestResultBundle::test_from_config1` return the content of the file + `./core/dataflow/test/TestResultBundle.test_from_config1/output/test.txt` + + :param as_python: if True return the snippet of Python code that replaces the + `check_string()` with a `assert_equal` + :param fuzzy_match: if True return Python code with `fuzzy_match=True` + :param pbcopy: save the result into the system clipboard (only on macOS) + """ + hlitauti.report_task() + _ = ctx + hdbg.dassert_ne(class_name, "", "You need to specify a class name") + hdbg.dassert_ne(method_name, "", "You need to specify a method name") + # Look for the directory named `class_name.method_name`. + cmd = f"find . -name '{class_name}.{method_name}' -type d" + # > find . -name "TestResultBundle.test_from_config1" -type d + # ./core/dataflow/test/TestResultBundle.test_from_config1 + _, txt = hsystem.system_to_string(cmd, abort_on_error=False) + file_names = txt.split("\n") + if not txt: + hdbg.dfatal(f"Can't find the requested dir with '{cmd}'") + if len(file_names) > 1: + hdbg.dfatal(f"Found more than one dir with '{cmd}':\n{txt}") + dir_name = file_names[0] + # Find the only file underneath that dir. + hdbg.dassert_dir_exists(dir_name) + cmd = f"find {dir_name} -name 'test.txt' -type f" + _, file_name = hsystem.system_to_one_line(cmd) + hdbg.dassert_file_exists(file_name) + # Read the content of the file. + _LOG.info("Found file '%s' for %s::%s", file_name, class_name, method_name) + txt = hio.from_file(file_name) + if as_python: + # Package the code snippet. + if not fuzzy_match: + # Align the output at the same level as 'expected = r...'. + num_spaces = 8 + txt = hprint.indent(txt, num_spaces=num_spaces) + output = f""" + actual = + expected = r\"\"\" +{txt} + \"\"\".lstrip().rstrip() + self.assert_equal(actual, expected, fuzzy_match={fuzzy_match}) + """ + else: + output = txt + # Print or copy to clipboard. + hsystem.to_pbcopy(output, pbcopy=pbcopy) + return output + + +# ############################################################################# +# Find module dependencies. +# ############################################################################# + + +standard_libs = [ + "abc", + "argparse", + "datetime", + "importlib", + "logging", + "os", + "pandas", + "pytest", + "re", + "unittest", +] + + +@task +def find_dependency( # type: ignore + ctx, + module_name, + mode="print_deps", + only_module="", + ignore_standard_libs=True, + ignore_helpers=True, + remove_dups=True, +): + """ + E.g., ``` + + # Find all the dependency of a module from itself + > i find_dependency --module-name "amp.dataflow.model" --mode "find_lev2_deps" --ignore-helpers --only-module dataflow + amp/dataflow/model/stats_computer.py:16 dataflow.core + amp/dataflow/model/model_plotter.py:4 dataflow.model + ``` + + :param module_name: the module path to analyze (e.g., `amp.dataflow.model`) + :param mode: + - `print_deps`: print the result of grepping for imports + - `find_deps`: find all the dependencies + - `find_lev1_deps`, `find_lev2_deps`: find all the dependencies + :param only_module: keep only imports containing a certain module (e.g., `dataflow`) + :param ignore_standard_libs: ignore the Python standard libs (e.g., `os`, `...`) + :param ignore_helpers: ignore the `helper` lib + :param remove_dups: remove the duplicated imports + """ + _ = ctx + # (cd amp/dataflow/model/; jackpy "import ") | grep -v notebooks | grep -v test | grep -v __init__ | grep "import dataflow" + src_dir = module_name.replace(".", "/") + hdbg.dassert_dir_exists(src_dir) + # Find all the imports. + cmd = f'find {src_dir} -name "*.py" | xargs grep -n -r "^import "' + _, txt = hsystem.system_to_string(cmd) + # + if mode == "print_deps": + print(txt) + return + # Parse the output. + _LOG.debug("\n" + hprint.frame("Parse")) + lines = txt.split("\n") + lines_out = [] + for line in lines: + # ./forecast_evaluator_from_prices.py:16:import helpers.hpandas as hpandas + # import helpers.hunit_test as hunitest # pylint: disable=no-name-in-module' + data = line.split(":") + hdbg.dassert_lte(3, len(data), "Invalid line='%s'", line) + file, line_num, import_code = data[:3] + _LOG.debug(hprint.to_str("file line_num import_code")) + lines_out.append((file, line_num, import_code)) + lines = lines_out + _LOG.debug("Found %d imports", len(lines)) + # Remove irrelevant files and imports. + _LOG.debug("\n" + hprint.frame("Remove irrelevant entries")) + lines_out = [] + for line in lines: + file, line_num, import_code = line + _LOG.debug("# " + hprint.to_str("file line_num import_code")) + if "__init__.py" in file: + _LOG.debug("Remove because init") + continue + if "/test/" in file: + _LOG.debug("Remove because test") + continue + if "notebooks/" in file: + _LOG.debug("Remove because notebook") + continue + if "from typing import" in import_code: + _LOG.debug("Remove because typing") + continue + lines_out.append(line) + lines = lines_out + _LOG.debug("After removal %d imports", len(lines)) + # Process. + _LOG.debug("\n" + hprint.frame("Process entries")) + lines_out = [] + for line in lines: + # ./forecast_evaluator_from_prices.py:16:import helpers.hpandas as hpandas + file, line_num, import_code = line + _LOG.debug("# " + hprint.to_str("file line_num import_code")) + # Parse import code. + m = re.match(r"^import\s+(\S+)(\s+as)?", import_code) + hdbg.dassert(m, "Can't parse line='%s'", import_code) + assert m is not None + import_name = m.group(1) + _LOG.debug("import_name='%s'", import_name) + lev1_import = import_name.split(".")[0] + if ignore_standard_libs: + if lev1_import in standard_libs: + _LOG.debug("Ignoring standard lib '%s'", lev1_import) + continue + if ignore_helpers: + if lev1_import.startswith("helpers"): + _LOG.debug("Ignoring helpers '%s'", lev1_import) + continue + if only_module: + if only_module not in import_name: + _LOG.debug( + "Ignoring '%s' since it doesn't contain %s", + import_name, + only_module, + ) + continue + # + if mode == "find_deps": + dep = import_name + elif mode == "find_lev1_deps": + deps = import_name.split(".") + if len(deps) > 1: + dep = deps[0] + else: + dep = import_name + elif mode == "find_lev2_deps": + deps = import_name.split(".") + if len(deps) > 1: + dep = ".".join(deps[:2]) + else: + dep = import_name + else: + raise ValueError(f"Invalid mode='{mode}'") + lines_out.append((file, line_num, dep)) + lines = lines_out + # Remove repeated tuples. + if remove_dups: + _LOG.debug("\n" + hprint.frame("Remove repeated tuples")) + import_names = set() + lines_out = [] + for line in lines: + if line[2] in import_names: + continue + lines_out.append(line) + import_names.add(line[2]) + lines = lines_out + else: + _LOG.warning("Remove dups skipped") + # Sort. + _LOG.debug("\n" + hprint.frame("Sort tuples")) + lines = sorted(lines, key=lambda x: x[2]) + # Print and save. + print(hprint.frame("Results")) + _LOG.debug("\n" + hprint.frame("Print")) + txt = "\n".join([":".join(line) for line in lines]) + file_name = "cfile" + hio.to_file(file_name, txt) + _LOG.info("%s saved", file_name) + # + txt = "\n".join(["%s:%s\t\t\t%s" % line for line in lines]) + print(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py new file mode 100644 index 000000000..53c9600af --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py @@ -0,0 +1,1252 @@ +""" +Import as: + +import helpers.lib_tasks_gh as hlitagh +""" + +import datetime +import json +import logging +import os +import re +from typing import Any, Dict, List, Optional, Tuple + +import invoke.exceptions as invexc +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.htable as htable +import helpers.lib_tasks_utils as hlitauti +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# ############################################################################# +# GitHub CLI. +# ############################################################################# + + +@task +def gh_login( # type: ignore + ctx, + account="", + print_status=False, +): + hlitauti.report_task() + # + if not account: + # Retrieve the name of the repo, e.g., "alphamatic/amp". + full_repo_name = hgit.get_repo_full_name_from_dirname( + ".", include_host_name=False + ) + _LOG.debug(hprint.to_str("full_repo_name")) + account = full_repo_name.split("/")[0] + _LOG.info(hprint.to_str("account")) + # + ssh_filename = os.path.expanduser(f"~/.ssh/id_rsa.{account}.github") + _LOG.debug(hprint.to_str("ssh_filename")) + if os.path.exists(ssh_filename): + cmd = f"export GIT_SSH_COMMAND='ssh -i {ssh_filename}'" + print(cmd) + else: + _LOG.warning("Can't find file '%s'", ssh_filename) + # + if print_status: + cmd = "gh auth status" + hlitauti.run(ctx, cmd) + # + github_pat_filename = os.path.expanduser(f"~/.ssh/github_pat.{account}.txt") + if os.path.exists(github_pat_filename): + cmd = f"gh auth login --with-token <{github_pat_filename}" + hlitauti.run(ctx, cmd) + else: + _LOG.warning("Can't find file '%s'", github_pat_filename) + # + if print_status: + cmd = "gh auth status" + hlitauti.run(ctx, cmd) + + +# ############################################################################# + + +def _get_branch_name(branch_mode: str) -> Optional[str]: + if branch_mode == "current_branch": + branch_name: Optional[str] = hgit.get_branch_name() + elif branch_mode == "master": + branch_name = "master" + elif branch_mode == "all": + branch_name = None + else: + raise ValueError(f"Invalid branch='{branch_mode}'") + return branch_name + + +def _get_org_name(org_name: str) -> str: + """ + Get organization name, inferring from current repo if not provided. + + :param org_name: organization name or empty string + :return: organization name + """ + if not org_name: + # Infer organization from current repo. + full_repo_name = hgit.get_repo_full_name_from_dirname( + ".", include_host_name=False + ) + org_name = full_repo_name.split("/")[0] + return org_name + + +def _get_workflow_table() -> htable.TableType: + """ + Get a table with the status of the GH workflow for the current repo. + """ + # Get the workflow status from GH. + cmd = "export NO_COLOR=1; gh run list" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug(hprint.to_str("txt")) + # pylint: disable=line-too-long + # > gh run list + # STATUS TITLE WORKFLOW BRANCH EVENT ID ELAPSED AGE + # * AmpTask1786_Integrate_20230518_2 Fast tests AmpTask1786_Integrate_20230518_2 pull_request 5027911519 4m49s 4m + # > gh run list | more + # completed success AmpTask1786_Integrate_20230518_2 Fast tests AmpTask1786_Integrate_20230518_2 pull_request 5027911519 7m17s 10m + # in_progress AmpTask1786_Integrate_20230518_2 Slow tests AmpTask1786_Integrate_20230518_2 pull_request 5027911518 10m9s 10m + # pylint: enable=line-too-long + # The output is tab separated, so convert it into CSV. + first_line = txt.split("\n")[0] + _LOG.debug("first_line=%s", first_line.replace("\t", ",")) + num_cols = len(first_line.split("\t")) + _LOG.debug(hprint.to_str("first_line num_cols")) + cols = [ + # E.g., completed, in_progress. + "completed", + # E.g., success, failure. + "status", + # Aka title: parse but don't use. + "name", + "workflow", + "branch", + "event", + "id", + "elapsed", + "age", + ] + hdbg.dassert_eq(num_cols, len(cols)) + # Build the table. + table = htable.Table.from_text(cols, txt, delimiter="\t") + _LOG.debug(hprint.to_str("table")) + # Remove the "name" column as it's redundant with "workflow". + table = table.remove_column("name") + return table + + +def _print_table(table: htable.TableType) -> None: + table_str = str(table) + # Colorize the table. + color_map = {"success": "green", "failure": "red", "in progress": "yellow"} + for status, color in color_map.items(): + table_str = table_str.replace( + status, hprint.color_highlight(status, color) + ) + # Report the full status. + print(table_str) + + +# TODO(Grisha): seems like GH changed the output format, we should update accordingly, +# see CmTask #4672 "Slow tests fail (9835540316)" for details. +@task +def gh_workflow_list( # type: ignore + ctx, + filter_by_branch="current_branch", + filter_by_completed="all", + report_only_status=True, + show_stack_trace=False, + print_table=True, +): + """ + Report the status of the GH workflows. + + :param filter_by_branch: name of the branch to check + - `current_branch` for the current Git branch + - `master` for master branch + - `all` for all branches + :param filter_by_completed: filter table by the status of the workflow + - E.g., "failure", "success" + :param report_only_status: if True, report only the status of the workflows + :param show_stack_trace: in case of error run `pytest_repro` reporting also + the stack trace + :param print_table: if True, print the table with the status of the workflows + """ + hlitauti.report_task( + txt=hprint.to_str("filter_by_branch filter_by_completed") + ) + # Login. + gh_login(ctx) + # Get the table. + table = _get_workflow_table() + # Filter table based on the branch. + if filter_by_branch != "all": + field = "branch" + value = _get_branch_name(filter_by_branch) + print(f"Filtering table by {field}={value}") + table = table.filter_rows(field, value) + # Filter table by the workflow status. + if filter_by_completed != "all": + field = "completed" + value = filter_by_completed + print(f"Filtering table by {field}={value}") + table = table.filter_rows(field, value) + if ( + filter_by_branch not in ("current_branch", "master") + or not report_only_status + ): + _print_table(table) + return + # For each workflow find the last success. + branch_name = hgit.get_branch_name() + workflows = table.unique("workflow") + print(f"workflows={workflows}") + for workflow in workflows: + table_tmp = table.filter_rows("workflow", workflow) + if print_table: + print(hprint.frame(workflow)) + _print_table(table_tmp) + # Find the first success. + num_rows = table.size()[0] + _LOG.debug("num_rows=%s", num_rows) + for i in range(num_rows): + status_column = table_tmp.get_column("status") + _LOG.debug("status_column=%s", str(status_column)) + hdbg.dassert_lt( + i, len(status_column), "status_column=", status_column + ) + status = status_column[i] + if status == "success": + print(f"Workflow '{workflow}' for '{branch_name}' is ok") + break + if status == "failure": + _LOG.error( + "Workflow '%s' for '%s' is broken", workflow, branch_name + ) + # Get the output of the broken run. + # > gh run view 1477484584 --log-failed + workload_id = table_tmp.get_column("id")[i] + log_file_name = f"tmp.failure.{workflow}.{branch_name}.txt" + log_file_name = log_file_name.replace(" ", "_").lower() + cmd = f"gh run view {workload_id} --log-failed >{log_file_name}" + hsystem.system(cmd) + # Remove non-printable chars. + # TODO(heanh): Consider adding all the helpers util scripts + # to the `PATH` (when inside the container) so we can just use + # them without specifying the full path. + helpers_root_dir = hgit.find_helpers_root() + file_path = ( + f"{helpers_root_dir}/dev_scripts_helpers/system_tools" + ) + cmd = f"{file_path}/remove_escape_chars.py -i {log_file_name}" + hsystem.system(cmd) + print(f"# Log is in '{log_file_name}'") + # Run_fast_tests Run fast tests 2021-12-19T00:19:38.3394316Z FAILED data + # cmd = rf"grep 'Z FAILED ' {log_file_name}" + workflow_as_str = workflow.lower().replace(" ", "_") + script_name = f"./tmp.pytest_repro.{workflow_as_str}.sh" + cmd = f"invoke pytest_repro --file-name {log_file_name} --script-name {script_name}" + if show_stack_trace: + cmd += " -s" + hsystem.system(cmd, suppress_output=False, abort_on_error=False) + break + if status in ("startup_failure", "cancelled", "skipped"): + _LOG.debug( + "Workflow '%s' for '%s' has status '%s', skipping", + workflow, + branch_name, + status, + ) + break + if status == "": + if i == (len(status_column) - 1): + # If all the runs in the table are in progress, i.e. there is no + # failed or succesful run, issue a warning and exit. E.g., + # ######################################################### + # Superslow tests + # ######################################################### + # completed | status | workflow | branch | event | id | elapsed | age | + # ----------- | ------ | --------------- | ------ | ----------------- | ---------- | ------- | --- | + # in_progress | | Superslow tests | master | workflow_dispatch | 5421740561 | 13m25s | 13m | + _LOG.warning( + "No failed/successful run found for workflow=%s for branch=%s, all runs are in progress, exiting.", + workflow, + branch_name, + ) + else: + _LOG.debug( + "Workflow=%s for branch %s is in progress, skipping further checks", + workflow, + branch_name, + ) + break + else: + raise ValueError(f"Invalid status='{status}'") + + +@task +def gh_workflow_run(ctx, branch="current_branch", workflows="all"): # type: ignore + """ + Run GH workflows in a branch. + """ + hlitauti.report_task(txt=hprint.to_str("branch workflows")) + # Login. + gh_login(ctx) + # Get the branch name. + if branch == "current_branch": + branch_name = hgit.get_branch_name() + elif branch == "master": + branch_name = "master" + else: + raise ValueError(f"Invalid branch='{branch}'") + _LOG.debug(hprint.to_str("branch_name")) + # Get the workflows. + if workflows == "all": + gh_tests = ["fast_tests", "slow_tests"] + else: + gh_tests = [workflows] + _LOG.debug(hprint.to_str("workflows")) + # Run. + for gh_test in gh_tests: + gh_test += ".yml" + # gh workflow run fast_tests.yml --ref AmpTask1251_Update_GH_actions_for_amp + cmd = f"gh workflow run {gh_test} --ref {branch_name}" + hlitauti.run(ctx, cmd) + + +# ############################################################################# + + +# TODO(gp): Remove repo_short_name. +def _get_repo_full_name_from_cmd(repo_short_name: str) -> Tuple[str, str]: + """ + Convert the `repo_short_name` from command line (e.g., "current", "amp", + "lm") to the repo_short_name full name without host name. + """ + repo_full_name_with_host: str + if repo_short_name == "current": + # Get the repo name from the current repo. + repo_full_name_with_host = hgit.get_repo_full_name_from_dirname( + ".", include_host_name=True + ) + hdbg.dassert_eq( + repo_full_name_with_host, + hrecouti.get_repo_config().get_repo_full_name_with_hostname(), + ) + ret_repo_short_name = hrecouti.get_repo_config().get_repo_short_name() + else: + hdbg.dfatal("This code path is obsolete") + _LOG.debug( + "repo_short_name=%s -> repo_full_name_with_host=%s ret_repo_short_name=%s", + repo_short_name, + repo_full_name_with_host, + ret_repo_short_name, + ) + return repo_full_name_with_host, ret_repo_short_name + + +def _get_gh_issue_title(issue_id: int, repo_short_name: str) -> Tuple[str, str]: + """ + Get the title of a GitHub issue. + + :param repo_short_name: `current` refer to the repo where we are in, + otherwise a `repo_short_name` (e.g., "amp") + """ + # TODO(gp): I don't see applications where we need to pass the repo_short_name. + # One should always operate in the dir corresponding to a repo. + hdbg.dassert_eq(repo_short_name, "current") + repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( + repo_short_name + ) + # > (export NO_COLOR=1; gh issue view 1251 --json title) + # {"title":"Update GH actions for amp"} + hdbg.dassert_lte(1, issue_id) + cmd = f"gh issue view {issue_id} --repo {repo_full_name_with_host} --json title,url" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug("txt=\n%s", txt) + # Parse json. + dict_ = json.loads(txt) + _LOG.debug("dict_=\n%s", dict_) + title = dict_["title"] + _LOG.debug("title=%s", title) + url = dict_["url"] + _LOG.debug("url=%s", url) + # Remove some annoying chars. + for char in ": + ( ) / ` *".split(): + title = title.replace(char, "") + # Replace multiple spaces with one. + title = re.sub(r"\s+", " ", title) + title = title.replace(" ", "_") + # Remove some annoying chars. + for char in "- ' ` \"".split(): + title = title.replace(char, "_") + # Add the prefix `AmpTaskXYZ_...` + task_prefix = hrecouti.get_repo_config().get_issue_prefix() + # task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + _LOG.debug("task_prefix=%s", task_prefix) + title = f"{task_prefix}{issue_id}_{title}" + return title, url + + +@task +def gh_issue_title(ctx, issue_id, repo_short_name="current", pbcopy=True): # type: ignore + """ + Print the title that corresponds to the given issue and repo_short_name. + E.g., AmpTask1251_Update_GH_actions_for_amp. + + Before running the invoke, one must check their login status on GH + by running `gh auth status`. + + :param issue_id: id number of the issue to create the branch for + :param repo_short_name: short name of the repo to use for the branch + name building. "current" refers to the repo where the call is + implemented + :param pbcopy: save the result into the system clipboard (only on + macOS) + """ + hlitauti.report_task(txt=hprint.to_str("issue_id repo_short_name")) + # Login. + gh_login(ctx) + # + issue_id = int(issue_id) + hdbg.dassert_lte(1, issue_id) + title, url = _get_gh_issue_title(issue_id, repo_short_name) + # Print or copy to clipboard. + msg = f"{title}: {url}" + hsystem.to_pbcopy(msg, pbcopy=pbcopy) + + +@task +def gh_issue_create( # type: ignore + ctx, + title="", + body="", + labels="", + assignees="", + project="", + repo_short_name="current", +): + """ + Create a new GitHub issue in the specified repository. + + ``` + # Create a simple issue + > invoke gh_issue_create --title "Fix bug in parser" + + # Create an issue with body and labels + > invoke gh_issue_create --title "Add new feature" --body "Description here" --labels "enhancement,priority-high" + + # Create an issue with assignees + > invoke gh_issue_create --title "Review PR" --assignees "user1,user2" + + # Create an issue and add to a project + > invoke gh_issue_create --title "Implement feature" --project "Development Board" + ``` + + :param title: title of the issue (required) + :param body: body/description of the issue + :param labels: comma-separated list of labels to apply + :param assignees: comma-separated list of GitHub usernames to assign + :param project: GitHub project name or number to add the issue to + :param repo_short_name: `current` refer to the repo where we are in, + otherwise a `repo_short_name` (e.g., "amp") + :return: issue ID (integer) of the created issue + """ + hlitauti.report_task(txt=hprint.to_str("title repo_short_name")) + # Login. + gh_login(ctx) + # + hdbg.dassert(title, "Title is required") + hdbg.dassert_eq(repo_short_name, "current") + repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( + repo_short_name + ) + _LOG.info( + "Creating issue with title '%s' in %s", + title, + repo_full_name_with_host, + ) + # Build the command. + cmd = ( + "gh issue create" + + f" --repo {repo_full_name_with_host}" + + f' --title "{title}"' + ) + if body: + cmd += f' --body "{body}"' + if labels: + cmd += f' --label "{labels}"' + if assignees: + cmd += f' --assignee "{assignees}"' + if project: + cmd += f' --project "{project}"' + # Execute the command and capture output. + # gh issue create outputs the URL of the created issue, e.g., + # https://github.com/cryptokaizen/csfy/issues/7572 + _, output = hsystem.system_to_string(cmd) + _LOG.debug("gh issue create output: %s", output) + # Extract the issue ID from the URL. + # The URL format is: https://github.com/org/repo/issues/123 + match = re.search(r"/issues/(\d+)", output) + hdbg.dassert(match, f"Could not extract issue ID from output: {output}") + issue_id = int(match.group(1)) + _LOG.info("Created issue #%s", issue_id) + return issue_id + + +# ############################################################################# + + +def _check_if_pr_exists(title: str) -> bool: + """ + Return whether a PR exists or not. + """ + # > gh pr diff AmpTask1955_Lint_20211219 + # no pull requests found for branch "AmpTask1955_Lint_20211219" + cmd = f"gh pr diff {title}" + rc = hsystem.system(cmd, abort_on_error=False) + pr_exists: bool = rc == 0 + return pr_exists + + +@task +def gh_create_pr( # type: ignore + ctx, + body="", + draft=True, + auto_merge=False, + repo_short_name="current", + title="", + reviewer="", + labels="", + assignee="", +): + """ + Create a draft PR for the current branch in the corresponding + repo_short_name. + + ``` + # To open a PR in the web browser + > gh pr view --web + + # To see the status of the checks + > gh pr checks + ``` + + :param body: the body of the PR + :param draft: draft or ready-to-review PR + :param auto_merge: enable auto merging PR + :param repo_short_name: `current` refer to the repo where we are in, + otherwise a `repo_short_name` (e.g., "amp") + :param title: title of the PR or the branch name, if title is empty + :param reviewer: GitHub username to request review from + :param labels: comma-separated list of labels to apply + :param assignee: GitHub username to assign the PR to + """ + hlitauti.report_task() + # Login. + gh_login(ctx) + # + branch_name = hgit.get_branch_name() + if not title: + # Use the branch name as title. + title = branch_name + repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( + repo_short_name + ) + _LOG.info( + "Creating PR with title '%s' for '%s' in %s", + title, + branch_name, + repo_full_name_with_host, + ) + if auto_merge: + hdbg.dassert( + not draft, "The PR can't be a draft in order to auto merge it" + ) + pr_exists = _check_if_pr_exists(title) + _LOG.debug(hprint.to_str("pr_exists")) + if pr_exists: + _LOG.warning("PR '%s' already exists: skipping creation", title) + else: + # Link the PR automatically to the branch, if possible. + issue_id = hgit.extract_gh_issue_number_from_branch(branch_name) + _LOG.debug(hprint.to_str("issue_id")) + if issue_id and str(issue_id) not in body: + body += f"\n\n#{issue_id}" + _LOG.info("Added issue id %s to the PR body", issue_id) + cmd = ( + "gh pr create" + + f" --repo {repo_full_name_with_host}" + + (" --draft" if draft else "") + + f' --title "{title}"' + + f' --body "{body}"' + ) + if reviewer: + cmd += f" --reviewer {reviewer}" + _LOG.info("Added reviewer %s to the PR", reviewer) + if labels: + cmd += f' --label "{labels}"' + _LOG.info("Added labels %s to the PR", labels) + if assignee: + cmd += f" --assignee {assignee}" + # TODO(gp): Use _to_single_line_cmd + hlitauti.run(ctx, cmd) + if auto_merge: + cmd = f"gh pr ready {title}" + hlitauti.run(ctx, cmd) + cmd = f"gh pr merge {title} --auto --delete-branch --squash" + hlitauti.run(ctx, cmd) + + +# TODO(gp): Add gh_open_pr to jump to the PR from this branch. + +# TODO(Grisha): probably the section deserves a separate lib. +# ############################################################################# +# Buildmeister dashboard +# ############################################################################# + + +# TODO(Grisha): consider moving to cmamp as we run the workflow from cmamp. +@task +def gh_publish_buildmeister_dashboard_to_s3(ctx, mark_as_latest=True): # type: ignore + """ + Run the buildmeister dashboard notebook and publish it to S3. + + :param mark_as_latest: if True, mark the dashboard as `latest`, otherwise + just publish a timestamped copy + """ + hlitauti.report_task() + # Login to GH CLI. + if hserver.is_inside_ci(): + _LOG.info("Skipping login since running inside CI") + else: + gh_login(ctx) + # Run and publish the Buildmeister dashboard Jupyter notebook locally. + run_notebook_script_path = hgit.find_file_in_git_tree("run_notebook.py") + amp_abs_path = hgit.get_amp_abs_path() + notebook_path = os.path.join( + amp_abs_path, "devops/notebooks/Master_buildmeister_dashboard.ipynb" + ) + dst_local_dir = os.path.join(amp_abs_path, "tmp.notebooks") + cmd_run_txt = [ + run_notebook_script_path, + f"--notebook {notebook_path}", + # The notebook does not require a config, so using a random dummy config. + # TODO(Grisha): consider creating a separate config builder for the notebook. + "--config_builder 'datapull.optima.common.qa.qa_check.build_dummy_data_reconciliation_config()'", + f"--dst_dir '{dst_local_dir}'", + "--publish", + "--num_threads serial", + ] + cmd_run_txt = " ".join(cmd_run_txt) + hsystem.system(cmd_run_txt) + # To avoid the dependency on `helpers.hs3`. + import helpers.hs3 as hs3 + + # Get HTML file name. + tmp_local_dir_name = os.path.join(amp_abs_path, "tmp.notebooks") + pattern = "Master_buildmeister_dashboard.0*.html" + only_files = True + use_relative_paths = False + local_html_files = hio.listdir( + tmp_local_dir_name, + pattern, + only_files=only_files, + use_relative_paths=use_relative_paths, + ) + # Assert if more than 1 file is returned. + hdbg.dassert_eq( + len(local_html_files), + 1, + f"Found more than one file in {tmp_local_dir_name} - {local_html_files}", + ) + local_html_file = local_html_files[0] + s3_build_path = os.path.join( + hrecouti.get_repo_config().get_html_bucket_path(), + "build/buildmeister_dashboard", + ) + aws_profile = "ck" + if mark_as_latest: + # Copy the dashboard notebook to S3 as latest build. + s3_latest_build_path = os.path.join( + s3_build_path, "Master_buildmeister_dashboard.latest.html" + ) + hs3.copy_file_to_s3(local_html_file, s3_latest_build_path, aws_profile) + # Copy the timestamped version of the dashboard notebook to S3. + # Need to add a trailing slash to the path to copy the file into the folder. + # https://docs.python.org/3/library/os.path.html#os.path.join + s3_build_path_folder = os.path.join(s3_build_path, "") + hs3.copy_file_to_s3(local_html_file, s3_build_path_folder, aws_profile) + + +def _gh_run_and_get_json(cmd: str) -> List[Dict[str, Any]]: + """ + Run a `gh` command and remove colors when running inside a notebook. + + :param cmd: `gh` command to run + :return: parsed JSON output of a command + """ + _, _txt = hsystem.system_to_string(cmd) + if hsystem.is_running_in_ipynb(): + # Remove the colors from the text. + _txt = re.sub(r"\x1b\[((1;)*[0-9]{2})*m", "", _txt) + _LOG.debug(hprint.to_str("_txt")) + ret: List[Dict[str, Any]] = json.loads(_txt) + return ret + + +def gh_get_open_prs(repo: str) -> List[Dict[str, Any]]: + """ + Return a list of open PRs. + + :param repo: repo name in the format "organization/repo", e.g., + "cryptokaizen/cmamp" + """ + cmd = f"gh pr list --state 'open' --json id --repo {repo}" + pull_requests = _gh_run_and_get_json(cmd) + return pull_requests + + +def _get_best_workflow_run( + workflow_name: str, + workflow_runs: List[Dict[str, Any]], + *, + preferred_event: Optional[str] = None, +) -> Optional[Dict[str, Any]]: + """ + Pick the best available workflow run: + - If `preferred_event` is specified (e.g., "schedule"), try that first. + - Otherwise, return the most recent success/failure run. + + :param workflow_name: GitHub Actions workflow name + :param workflow_runs: run metadata, sorted most-recent-first + :param preferred_event: trigger type to prioritize (e.g., "schedule") + :return: best-matching run + e.g., + ``` + { + 'conclusion': 'success', + 'status': 'completed', + 'url': 'https://github.com/cryptokaizen/cmamp/actions/runs/8714881296', + 'workflowName': 'Allure fast tests' + } + """ + run_status = None + if preferred_event: + for run in workflow_runs: + if run.get("event") == preferred_event and run["conclusion"] in [ + "success", + "failure", + ]: + run_status = run + break + if run_status is None: + _LOG.warning( + "No '%s' run found for workflow '%s'", + preferred_event, + workflow_name, + ) + if run_status is None: + for run in workflow_runs: + if run["conclusion"] in ["success", "failure"]: + run_status = run + break + return run_status + + +def gh_get_workflows( + repo_name: str, *, sort: bool = True +) -> List[Dict[str, str]]: + """ + Get a list of workflows for a given repo. + + :param repo_name: git repo name in the format "organization/repo", + e.g., "cryptokaizen/cmamp" + :param sort: if True, sort the list of workflow names + :return: list of workflows, e.g., [{"id": "12520125", "name": "Fast + tests"}, {"id": "12520124", "name": "Slow tests"}] + """ + hdbg.dassert_isinstance(repo_name, str) + _LOG.debug(hprint.to_str("repo_name")) + # Get the workflow list. + cmd = f"gh workflow list --json id,name --repo {repo_name}" + workflows = _gh_run_and_get_json(cmd) + workflows = [ + {"id": str(workflow["id"]), "name": workflow["name"]} + for workflow in workflows + ] + # sort workflow by name + if sort: + workflows = sorted(workflows, key=lambda workflow: workflow["name"]) + return workflows + + +def gh_get_workflow_details( + repo_name: str, workflow_id: str, fields: List[str], limit: int +) -> List[Dict[str, Any]]: + """ + Return the stats for a given workflow. + + :param repo_name: git repo name in the format "organization/repo", + e.g., "cryptokaizen/cmamp" + :param workflow_id: workflow id, e.g., "12520125" + :param fields: list of fields to return, e.g., ["workflowName", "status"] + :param limit: number of runs to return + :return: workflow stats + Example output: + ``` + [ + { + "conclusion": "success", + "status": "completed", + "url": "https://github.com/cryptokaizen/cmamp/actions/runs/7757345960", + "workflowName": "Slow tests" + } + ] + ``` + """ + hdbg.dassert_isinstance(repo_name, str) + hdbg.dassert_isinstance(workflow_id, str) + hdbg.dassert_container_type(fields, List, str) + _LOG.debug(hprint.to_str("repo_name workflow_id fields")) + # Fetch the latest `limit` runs for status calculation. + cmd = f""" + gh run list \ + --json {",".join(fields)} \ + --repo {repo_name} \ + --branch master \ + --limit {limit} \ + --workflow "{workflow_id}" + """ + workflow_statuses = _gh_run_and_get_json(cmd) + # We still want to return the statuses even there are less runs than requested. E.g., there is a new workflow with a few runs or there is a workflow that was never run. + hdbg.dassert_eq(len(workflow_statuses), limit, only_warning=True) + _LOG.debug("workflow_statuses=\n%s", workflow_statuses) + return workflow_statuses + + +def gh_get_details_for_all_workflows( + repo_list: List[str], +) -> "pd.DataFrame": # noqa: F821 + """ + Get status for all the workflows. + + :param repo_list: list of repos to get the status for e.g., + ["cryptokaizen/cmamp", "cryptokaizen/orange"] + :return: a table with the status of all the workflows, e.g., + ``` + Repo workflowName url status + cryptokaizen/cmamp Allure fast tests https://github.com/cryptokaizen/cmamp/actions/... completed + cryptokaizen/cmamp Allure slow tests https://github.com/cryptokaizen/cmamp/actions/... completed + ``` + """ + import pandas as pd + + # TODO(Grisha): expose cols to the interface, i.e. a caller decides what to do. + gh_cols = ["workflowName", "url", "status", "conclusion", "event"] + # Import locally in order not to introduce external dependencies to the lib. + repo_dfs = [] + for repo_name in repo_list: + # Get all workflows for the given repo. + workflows = gh_get_workflows(repo_name) + # For each workflow find the last run. + for workflow in workflows: + # Get at least a few runs to compute the status; this is useful when + # the latest run is not completed, in this case the run before the + # latest one tells the status for a workflow. + limit = 10 + workflow_id = workflow["id"] + workflow_name = workflow["name"] + workflow_statuses = gh_get_workflow_details( + repo_name, workflow_id, gh_cols, limit + ) + if len(workflow_statuses) < limit: + # TODO(Grisha): should we just insert empty rows as placeholders so that + # we know that such workflows exist? + _LOG.warning( + "Not enough runs to compute status for '%s', repo '%s', skipping the workflow", + workflow_name, + repo_name, + ) + continue + # Get the latest successful or failed workflow run (prioritize scheduled run if available). + SCHEDULED_WORKFLOWS = { + "Gitleaks Scan", + } + preferred_event = ( + "schedule" if workflow_name in SCHEDULED_WORKFLOWS else None + ) + workflow_status = _get_best_workflow_run( + workflow_name, workflow_statuses, preferred_event=preferred_event + ) + if workflow_status is None: + _LOG.warning( + "No successful or failed runs found for '%s', repo '%s', skipping the workflow", + workflow_name, + repo_name, + ) + continue + # Access the info of latest workflow run. + workflow_status = pd.DataFrame([workflow_status]) + workflow_status["repo_name"] = repo_name + repo_dfs.append(workflow_status) + # Collect per-repo tables into a single DataFrame. + df = pd.concat(repo_dfs, ignore_index=True) + # Rename the columns. + df = df.drop(columns=["status"]) + df = df.rename(columns={"workflowName": "workflow_name"}) + return df + + +def gh_get_overall_build_status_for_repo( + repo_df: "pd.Dataframe", # noqa: F821 + *, + use_colors: bool = True, +) -> str: + """ + Return the overall status of the workflows for a repo. + + :param repo_df: table with the status of the workflows for a repo + :param use_colors: if True, return the status with colors + :return: overall status of the build for a repo + """ + if use_colors: + hdbg.dassert( + hsystem.is_running_in_ipynb(), + msg="The use_colors option is applicable only when running inside a Jupyter notebook", + ) + # See: https://stackoverflow.com/questions/19746350/how-to-change-color-in-markdown-cells-ipython-jupyter-notebook + failed_status = 'Failed' + success_status = 'Success' + else: + failed_status = "Failed" + success_status = "Success" + if "failure" in repo_df["conclusion"].values: + # The build is failed if at least one workflow is failed. + overall_status = failed_status + else: + overall_status = success_status + return overall_status + + +def gh_get_workflow_type_names( + repo_name: str, *, sort: bool = True +) -> List[str]: + """ + Get a list of workflow names for a given repo. + + :param repo_name: git repo name in the format "organization/repo", + e.g., "cryptokaizen/cmamp" + :param sort: if True, sort the list of workflow names + :return: list of workflow names, e.g., ["Fast tests", "Slow tests"] + """ + hdbg.dassert_isinstance(repo_name, str) + _LOG.debug(hprint.to_str("repo_name")) + # Get the workflow list. + cmd = f"gh workflow list --json name --repo {repo_name}" + workflow_types = _gh_run_and_get_json(cmd) + workflow_names = [workflow["name"] for workflow in workflow_types] + if sort: + workflow_names = sorted(workflow_names) + # Check for duplicate workflow names. + hdbg.dassert_no_duplicates( + workflow_names, + f"Found duplicate workflow names in repo '{repo_name}'", + ) + return workflow_names + + +def gh_get_org_team_names(org_name: str = "", *, sort: bool = True) -> List[str]: + """ + Get a list of team names for a GitHub organization. + + :param org_name: organization name, e.g., "causify-ai". If empty, + infers from the current repo + :param sort: if True, sort team names alphabetically + :return: list of team names (slugs) + Example output: + ``` + ["dev_system", "dev_frontend", "qa_team"] + ``` + """ + org_name = _get_org_name(org_name) + _LOG.debug(hprint.to_str("org_name")) + # Get the team list using GitHub API. + cmd = f"gh api /orgs/{org_name}/teams --paginate" + teams_data = _gh_run_and_get_json(cmd) + # Extract team slugs from the response. + team_names = [team["slug"] for team in teams_data] + # Sort team names if requested. + if sort: + team_names = sorted(team_names) + _LOG.debug("Found %s teams for org '%s'", len(team_names), org_name) + return team_names + + +def gh_get_team_member_names(team_slug: str, *, org_name: str = "") -> List[str]: + """ + Get a list of member usernames for a specific team in a GitHub + organization. + + :param team_slug: team slug (URL-friendly team name), e.g., "dev_system" + :param org_name: organization name, e.g., "causify-ai". If empty, + infers from the current repo + :return: list of member usernames (login names) + Example output: + ``` + ["username1", "username2", "username3"] + ``` + """ + org_name = _get_org_name(org_name) + hdbg.dassert_isinstance(team_slug, str) + _LOG.debug(hprint.to_str("org_name team_slug")) + # Get the team members using GitHub API. + cmd = f"gh api /orgs/{org_name}/teams/{team_slug}/members --paginate" + members_data = _gh_run_and_get_json(cmd) + # Extract usernames from the response. + usernames = [member["login"] for member in members_data] + _LOG.debug( + "Found %s members in team '%s' (org: '%s')", + len(usernames), + team_slug, + org_name, + ) + return usernames + + +def make_clickable(url: str) -> str: + """ + Wrap a URL as an HTML anchor tag. + + :param url: URL to wrap (e.g., "https://github.com/causify-ai/cmamp/actions/...") + :return: HTML anchor string that makes the URL clickable in rendered Markdown + """ + anchor = f'{url}' + return anchor + + +def color_format(val: str, status_color_mapping: Dict[str, str]) -> str: + """ + Return a background-color style for DataFrame.style.map based on status. + + :param val: value to evaluate for status-based styling (e.g., + "success" or "failure") + :param status_color_mapping: map status strings to color values, + e.g.: { "success": "green", "failure": "red" } + :return: CSS string to apply as a style, e.g., "background-color: + green" + """ + color = status_color_mapping.get(val, "grey") + style = f"background-color: {color}" + return style + + +def render_repo_workflow_status_table( + workflow_df: "pd.DataFrame", # noqa: F821 + status_color_mapping: Dict[str, str], + timezone: str = "America/New_York", +) -> None: + """ + Render a dashboard summary of workflow statuses grouped by repo. + + :param workflow_df: data with columns ["repo_name", "workflow_name", + "conclusion", "url"] + :param status_color_mapping: color for outcomes {"success": "green", + "failure": "red"} + :param timezone: timezone for timestamp display + """ + import pandas as pd + from IPython.display import Markdown, display + + workflow_df["url"] = workflow_df["url"].apply(make_clickable) + repos = workflow_df["repo_name"].unique() + display(Markdown("## Overall Status")) + current_timestamp = pd.Timestamp.now(tz=timezone) + display(Markdown(f"**Last run: {current_timestamp}**")) + for repo in repos: + repo_df = workflow_df[workflow_df["repo_name"] == repo] + overall_status = gh_get_overall_build_status_for_repo(repo_df) + display(Markdown(f"## {repo}: {overall_status}")) + repo_df = repo_df.drop(columns=["repo_name"]) + display( + repo_df.style.map( + color_format, + status_color_mapping=status_color_mapping, + subset=["conclusion"], + ) + ) + + +def get_workflow_run_ids( + repo_path: str, workflow_id: str, *, older_than_days: Optional[int] = None +) -> List[str]: + """ + Get workflow run IDs, optionally filtering by age. + + :param repo_path: repository path in format "org/repo" + :param workflow_id: GitHub workflow ID + :param older_than_days: if specified, only return runs older than + this many days + :return: list of run IDs + """ + # See GitHub CLI API documentation: https://cli.github.com/manual/gh_api + # We use the -q/--jq option to filter results using jq syntax. + if older_than_days is not None: + # Use jq to filter runs by age directly in the gh api command. + # jq date filtering breakdown: + # - `fromdateiso8601` converts ISO 8601 date to Unix timestamp (seconds since epoch) + # - `now` returns current Unix timestamp + # - Days are converted to seconds (days * 86400 seconds/day) + # - Example: if older_than_days=30, cutoff = now - (30 * 86400) + # Only runs where created_at timestamp < cutoff are selected + cutoff_seconds = older_than_days * 86400 + # Log the cutoff date for debugging. + cutoff_date = datetime.datetime.now( + datetime.timezone.utc + ) - datetime.timedelta(days=older_than_days) + _LOG.debug("Filtering runs created before: %s", cutoff_date.isoformat()) + jq_filter = ( + f".workflow_runs[] | " + f"select((.created_at | fromdateiso8601) < (now - {cutoff_seconds})) | " + f".id" + ) + # WARNING: Using --paginate to fetch all workflow runs can be slow + # for workflows with a large number of runs (e.g., 1000+ runs). + # The GitHub API paginates results, and jq filters each page. + cmd = ( + f"gh api /repos/{repo_path}/actions/workflows/{workflow_id}/runs " + f"--paginate -q '{jq_filter}'" + ) + else: + # Get all run IDs without date filtering. + # Example API output (one ID per line): + # 11758293857 + # 11758293856 + # 11758293855 + cmd = ( + f"gh api /repos/{repo_path}/actions/workflows/{workflow_id}/runs " + "--paginate -q '.workflow_runs[].id'" + ) + # Execute command and parse output. + _, run_ids_output = hsystem.system_to_string(cmd) + run_ids = [ + run_id.strip() + for run_id in run_ids_output.strip().split("\n") + if run_id.strip() + ] + return run_ids + + +@task +def gh_delete_workflow_runs( # type: ignore + ctx, workflow_name, older_than_days=None, dry_run=False, confirmation=True +): + """ + Delete all workflow runs for a given workflow. + + :param workflow_name: name of the workflow to delete runs for + :param older_than_days: only delete runs older than this many days + (optional). If None, delete all runs. Example: + older_than_days=30 deletes runs created more than 30 days ago + :param dry_run: if True, show what would be deleted without actually + deleting + :param confirmation: if True, prompt user for confirmation before + deletion (default: True) + """ + hlitauti.report_task( + txt=hprint.to_str("workflow_name older_than_days dry_run confirmation") + ) + # Convert older_than_days to int if provided (invoke passes strings). + if older_than_days is not None: + older_than_days = int(older_than_days) + hdbg.dassert_lte(1, older_than_days) + # Login. + gh_login(ctx) + # + repo_full_name_with_host, _ = _get_repo_full_name_from_cmd("current") + # Get workflow ID by name. + repo_path = repo_full_name_with_host.replace("github.com/", "") + workflows = gh_get_workflows(repo_path) + workflow_id = None + for workflow in workflows: + if workflow["name"] == workflow_name: + workflow_id = workflow["id"] + break + if not workflow_id: + available_workflows = [w["name"] for w in workflows] + raise ValueError( + f"Workflow '{workflow_name}' not found. " + f"Available workflows: {available_workflows}" + ) + _LOG.info("Found workflow '%s' with ID: %s", workflow_name, workflow_id) + # Get all run IDs for this workflow, optionally filtering by date. + run_ids = get_workflow_run_ids( + repo_path, workflow_id, older_than_days=older_than_days + ) + # Check if any runs were found. + age_filter_msg = ( + f" older than {older_than_days} days" + if older_than_days is not None + else "" + ) + if not run_ids: + _LOG.info( + "No workflow runs%s found for '%s'", age_filter_msg, workflow_name + ) + return + _LOG.info("Found %d workflow runs%s to delete", len(run_ids), age_filter_msg) + # Prompt for confirmation if required. + if confirmation and not dry_run: + confirmation_msg = ( + f"\nAre you sure you want to delete {len(run_ids)} workflow run(s)" + f"{age_filter_msg} for '{workflow_name}'?\n" + f"Repository: {repo_full_name_with_host}\n" + f"Type 'yes' or 'y' to confirm: " + ) + user_input = input(confirmation_msg).strip().lower() + if user_input not in ("yes", "y"): + _LOG.info("Deletion cancelled by user") + return + _LOG.info("User confirmed deletion, proceeding...") + # Delete each run. + deleted_count = 0 + failed_count = 0 + for run_id in run_ids: + try: + cmd = f"gh api -X DELETE /repos/{repo_path}/actions/runs/{run_id}" + _LOG.info("Deleting run %s", run_id) + hlitauti.run(ctx, cmd, dry_run=dry_run) + deleted_count += 1 + except (invexc.UnexpectedExit, RuntimeError) as e: + _LOG.error("Failed to delete run %s: %s", run_id, str(e)) + failed_count += 1 + _LOG.info( + "Deletion complete: %d successful, %d failed out of %d total runs", + deleted_count, + failed_count, + len(run_ids), + ) + + +# ############################################################################# + +# def gh_get_pr_title(pr_url: str) -> str: +# > gh pr view https://github.com/causify-ai/helpers/pull/754 --json title -q .title +# HelpersTask705_Extend_coverage_in_pytest_to_cover_when_we_run_through_system diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py new file mode 100644 index 000000000..b77bb125d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py @@ -0,0 +1,1500 @@ +""" +Import as: + +import helpers.lib_tasks_git as hlitagit +""" + +import logging +import os +import re +import stat +import subprocess +import time +from typing import Any, List + +from invoke.tasks import task + +import helpers.hdbg as hdbg +import helpers.hsystem as hsystem + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.lib_tasks_gh as hlitagh +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# Bits matching `chmod a+w` / `chmod a-w` on the symlink inode (not the target). +_SYMLINK_WRITE_BITS = stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH + + +def _collect_symlinks(dir: str) -> List[str]: + """ + Collect symlink paths under a given directory. + + :param dir: directory to walk + :return: symlink paths under `dir` + """ + out: List[str] = [] + for dirpath, dirnames, filenames in os.walk(dir, topdown=True): + # Skips `.git` directories. Does not follow symlinked directories. + if ".git" in dirnames: + dirnames.remove(".git") + for name in filenames: + path = os.path.join(dirpath, name) + if os.path.islink(path): + out.append(path) + for name in dirnames: + path = os.path.join(dirpath, name) + if os.path.islink(path): + out.append(path) + return out + + +def _add_write_perm_to_symlink(dir: str) -> None: + """ + Add write permission for all on each symlink under the given directory. + + :param dir: directory to walk + """ + _LOG.info("Adding write permission for all on each symlink under %s", dir) + for path in _collect_symlinks(dir): + try: + mode = os.lstat(path).st_mode + os.chmod( + path, + mode | _SYMLINK_WRITE_BITS, + ) + except OSError: + hdbg.dassert( + False, + "Failed to add write permissions to symlink; manual intervention may be needed", + ) + + +def _remove_write_perm_from_symlink(dir: str) -> None: + """ + Remove write permission for all on each symlink under a given directory. + + :param dir: directory to walk + """ + _LOG.info("Removing write permission for all on each symlink under %s", dir) + for path in _collect_symlinks(dir): + if not os.path.exists(path): + _LOG.warning("Skipping broken symlink: %s", path) + continue + try: + mode = os.lstat(path).st_mode + os.chmod( + path, + mode & ~_SYMLINK_WRITE_BITS, + ) + except OSError: + hdbg.dassert( + False, + "Failed to remove write permissions from symlink; manual intervention may be needed", + ) + + +def run_git_recursively(ctx: Any, cmd_: str) -> None: + """ + Execute a git command in the main repository and all submodules. + + :param ctx: Invoke context + :param cmd_: Git command to execute + """ + cmd = cmd_ + hlitauti.run(ctx, cmd) + # Run the same command on all submodules. + cmd = f"git submodule foreach '{cmd_}'" + hlitauti.run(ctx, cmd) + + +@task +def git_pull(ctx): # type: ignore + """ + Pull latest changes from remote for main repo and all submodules. + + Temporarily enables write permissions on symlinks to allow pull operations. + """ + hlitauti.report_task() + # Temporarily grant write access to symlinks needed for pulling. + root_dir = hgit.get_client_root(super_module=False) + _add_write_perm_to_symlink(root_dir) + try: + # Pull with autostash to preserve local changes during pull. + cmd = "git pull --autostash" + run_git_recursively(ctx, cmd) + finally: + # Restore restricted permissions on symlinks after pull completes. + _remove_write_perm_from_symlink(root_dir) + + +@task +def git_fetch_master(ctx): # type: ignore + """ + Fetch master branch from remote without switching to it. + + Updates the local master branch to track the latest remote master without + affecting the current branch. + """ + hlitauti.report_task() + # Fetch remote master directly into local master ref (colon syntax). + cmd = "git fetch origin master:master" + run_git_recursively(ctx, cmd) + + +@task +def git_merge_master( + ctx, + abort_if_not_ff=False, + abort_if_not_clean=True, + skip_fetch=False, + auto_merge=False, # type: ignore +): + """ + Merge `origin/master` into the current branch. + + :param abort_if_not_ff: abort if fast-forward is not possible + :param abort_if_not_clean: abort if the client is not clean + :param skip_fetch: skip fetching master + :param auto_merge: automatically commit and push if merge is + successful + """ + hlitauti.report_task() + # Verify working directory is clean before merging to avoid losing changes. + hgit.is_client_clean(dir_name=".", abort_if_not_clean=abort_if_not_clean) + # Fetch latest master from remote to ensure we merge the latest changes. + if not skip_fetch: + git_fetch_master(ctx) + # Perform merge, optionally restricting to fast-forward only to maintain linear history. + cmd = "git merge master" + if abort_if_not_ff: + cmd += " --ff-only" + hlitauti.run(ctx, cmd) + # Commit and push automatically if merge succeeded and user requested it. + if auto_merge: + _LOG.info("Auto-merge enabled: committing and pushing changes") + cmd = 'git commit -am "Merge master" && git push' + hlitauti.run(ctx, cmd) + + +@task +def git_clean(ctx, fix_perms_=False, dry_run=False): # type: ignore + """ + Clean the repo_short_name and its submodules from artifacts. + + Run `git status --ignored` to see what it's skipped. + """ + hlitauti.report_task(txt=hprint.to_str("dry_run")) + + def _run_all_repos(cmd: str) -> None: + # Use `run(ctx, cmd)` instead of `hsystem.system()` so unit tests can easily mock context. + hlitauti.run(ctx, cmd) + # Also clean submodules to ensure they're included in cleanup. + cmd = f"git submodule foreach '{cmd}'" + hlitauti.run(ctx, cmd) + + # Remove untracked files and directories from main repo and submodules. + git_clean_cmd = "git clean -fd" + if dry_run: + git_clean_cmd += " --dry-run" + # Suppress errors since git clean reports non-fatal warnings. + git_clean_cmd += " >/dev/null 2>&1" + _run_all_repos(git_clean_cmd) + # TODO(*): Add "are you sure?" or a `--force switch` to avoid to cancel by + # mistake. + # Fix permissions on symlinks if requested, then clean any temporary files created. + if fix_perms_: + cmd = "invoke fix_perms" + hlitauti.run(ctx, cmd) + # Remove temporary files that may have been created during permission fix. + _run_all_repos(git_clean_cmd) + # Remove common build artifacts and cache directories. + to_delete = [ + r"*\.pyc", + r"*\.pyo", + r".coverage", + r".DS_Store", + r".ipynb_checkpoints", + r".mypy_cache", + r".pytest_cache", + r".ruff_cache", + r".venv", + r"__pycache__", + r"cfile", + r"tmp.*", + r"*.tmp", + r".*_cache", + "htmlcov", + ] + opts = [f"-name '{opt}'" for opt in to_delete] + opts = " -o ".join(opts) + cmd = f"find . {opts} | sort" + if not dry_run: + cmd += " | xargs rm -rf" + hlitauti.run(ctx, cmd) + + +@task +def git_add_all_untracked(ctx): # type: ignore + """ + Add all untracked files to Git. + """ + hlitauti.report_task() + # cmd = "git add $(git ls-files -o --exclude-standard)" + cmd = "git ls-files -o --exclude-standard -z | xargs -0 git add" + hlitauti.run(ctx, cmd) + + +@task +def git_patch_create( # type: ignore + ctx, mode="diff", modified=False, branch=False, last_commit=False, files="" +): + """ + Create a patch file for the entire repo_short_name client from the base + revision. This script accepts a list of files to package, if specified. + + The parameters `modified`, `branch`, `last_commit` have the same meaning as + in `_get_files_to_process()`. + + :param mode: what kind of patch to create + - "diff": (default) creates a patch with the diff of the files + - "tar": creates a tar ball with all the files + """ + hlitauti.report_task( + txt=hprint.to_str("mode modified branch last_commit files") + ) + _ = ctx + # TODO(gp): Check that the current branch is up to date with master to avoid + # failures when we try to merge the patch. + hdbg.dassert_in( + mode, + ("tar", "diff"), + "Patch mode must be either 'tar' for archives or 'diff' for patches", + ) + # Currently only handles the current submodule (not parent repos). + # TODO(gp): Extend this to handle also nested repos. + super_module = False + git_client_root = hgit.get_client_root(super_module) + hash_ = hgit.get_head_hash(git_client_root, short_hash=True) + # Use timestamp and hash to ensure unique patch filenames across time. + timestamp = hlitauti.get_ET_timestamp() + tag = os.path.basename(git_client_root) + dst_file = f"patch.{tag}.{hash_}.{timestamp}" + if mode == "tar": + dst_file += ".tgz" + elif mode == "diff": + dst_file += ".patch" + else: + hdbg.dfatal("Invalid code path") + _LOG.debug("dst_file=%s", dst_file) + # Show what changes will be included in the patch. + _LOG.info( + "Difference between HEAD and master:\n%s", + hgit.get_summary_files_in_branch("master", dir_name="."), + ) + # Determine which files to include in the patch. + all_ = False + # Allow optional user-specified file subset (can be combined with other selectors). + mutually_exclusive = False + # Filter out directories; patches only work with files. + remove_dirs = True + files_as_list = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files, + mutually_exclusive, + remove_dirs, + ) + _LOG.info("Files to save:\n%s", hprint.indent("\n".join(files_as_list))) + if not files_as_list: + _LOG.warning("Nothing to patch: exiting") + return + files_as_str = " ".join(files_as_list) + # Choose command based on patch format: archive vs diff. + cmd = "" + if mode == "tar": + # Create compressed tar archive of the selected files. + cmd = f"tar czvf {dst_file} {files_as_str}" + cmd_inv = "tar xvzf" + elif mode == "diff": + # Generate diff against various targets for different merge strategies. + opts: str + if modified: + # Only uncommitted changes in working tree. + opts = "HEAD" + elif branch: + # All changes since branch point (includes commits on current branch). + opts = "master..." + elif last_commit: + # Only changes in the most recent commit. + opts = "HEAD^" + else: + raise ValueError( + "You need to specify one among -modified, --branch, " + "--last-commit" + ) + cmd = f"git diff {opts} --binary {files_as_str} >{dst_file}" + cmd_inv = "git apply" + else: + raise ValueError(f"Invalid cmd='{cmd}'") + # Execute the patch creation command. + _LOG.info("Creating the patch into %s", dst_file) + hdbg.dassert_ne( + cmd, + "", + "Patch creation command must not be empty", + ) + _LOG.debug("cmd=%s", cmd) + rc = hsystem.system(cmd, abort_on_error=False) + if not rc: + _LOG.warning("Command failed with rc=%d", rc) + # Provide instructions for applying the patch on different environments. + remote_file = os.path.basename(dst_file) + abs_path_dst_file = os.path.abspath(dst_file) + msg = f""" + # To apply the patch and execute: + > git checkout {hash_} + > {cmd_inv} {abs_path_dst_file} + + # To apply the patch to a remote client: + > export SERVER="server" + > export CLIENT_PATH="~/src" + > scp {dst_file} $SERVER: + > ssh $SERVER 'cd $CLIENT_PATH && {cmd_inv} ~/{remote_file}'" + """ + msg = hprint.dedent(msg) + print(msg) + + +def _filter_git_files_by_type( + file_paths: List[str], + file_extensions: List[str], +) -> List[str]: + """ + Filter files by type for git_files task. + + Unlike linters2 version, this returns a flat list (not a tuple) + and does not separate paired jupytext files. + + :param file_paths: files to filter + :param file_extensions: list of file extensions to include (e.g., ["py", "ipynb", "md"]) + :return: filtered list of files + """ + hdbg.dassert_isinstance(file_extensions, list) + filtered = [] + for f in file_paths: + for ext in file_extensions: + if f.endswith(f".{ext}"): + filtered.append(f) + break + return filtered + + +@task +def git_files( # type: ignore + ctx, + modified=False, + branch=False, + last_commit=False, + file_types="", + pbcopy=False, + only_print_files=False, +): + """ + Report which files are changed in the current branch with respect to master. + + The params have the same meaning as in `_get_files_to_process()`. + + Examples: + > invoke git_files --modified + > invoke git_files --branch --file_types "py,ipynb" + > invoke git_files --last_commit --file_types "py" + + :param file_types: comma-separated list of file extensions to include + - E.g., "py,ipynb,md" + :param only_print_files: only print files without logging headers/footers (default: False) + """ + if not only_print_files: + hlitauti.report_task() + _ = ctx + all_ = False + files = "" + # Use mutually_exclusive=True to enforce exactly one filter mode. + mutually_exclusive = True + remove_dirs = True + files_as_list = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files, + mutually_exclusive, + remove_dirs, + ) + # Parse file_types into a list of extensions. + if file_types: + file_extensions = [ext.strip() for ext in file_types.split(",")] + # Filter by file type. + files_as_list = _filter_git_files_by_type(files_as_list, file_extensions) + else: + # file_types="" means every file, so don't filter. + pass + print("\n".join(sorted(files_as_list))) + # Optionally copy the file list to clipboard for easy pasting. + if not only_print_files: + res = " ".join(files_as_list) + hsystem.to_pbcopy(res, pbcopy) + + +@task +def git_last_commit_files(ctx, pbcopy=True): # type: ignore + """ + Print the status of the files in the previous commit. + + :param pbcopy: save the result into the system clipboard (only on + macOS) + """ + # Display the raw git log output for the latest commit. + cmd = 'git log -1 --name-status --pretty=""' + hlitauti.run(ctx, cmd) + # Parse the files that were actually committed (filtering out deletions if needed). + files = hgit.get_previous_committed_files(".") + txt = "\n".join(files) + print(f"\n# The files modified are:\n{txt}") + # Optionally copy the file list to clipboard for easy pasting into commands. + res = " ".join(files) + hsystem.to_pbcopy(res, pbcopy) + + +@task +def git_roll_amp_forward(ctx): # type: ignore + """ + Update amp submodule pointer to the latest master commit. + + Checks out master in amp, pulls latest changes, updates the parent repo's + submodule pointer, and commits the change. + """ + hlitauti.report_task() + AMP_DIR = "amp" + if os.path.exists(AMP_DIR): + # Update amp submodule to point to the latest master. + cmds = [ + f"cd {AMP_DIR} && git checkout master", + f"cd {AMP_DIR} && git pull", + # Stage the submodule pointer change in the parent repository. + f"git add {AMP_DIR}", + f"git commit -m 'Roll {AMP_DIR} pointer forward'", + "git push", + ] + for cmd in cmds: + hlitauti.run(ctx, cmd) + else: + _LOG.warning("%s does not exist, aborting", AMP_DIR) + + +# TODO(gp): Add git_co(ctx) +# Reuse hgit.git_stash_push() and hgit.stash_apply() +# git stash save your-file-name +# git checkout master +# # do whatever you had to do with master +# git checkout staging +# git stash pop + + +# ############################################################################# +# Branches workflows +# ############################################################################# + + +# TODO(gp): Consider renaming the commands as `git_branch_*` + + +@task +def git_branch_files(ctx): # type: ignore + """ + Report which files were added, changed, and modified in the current branch + with respect to master. + + This is a more detailed version of `invoke git_files --branch`, showing file + statuses (added, modified, deleted) rather than just the file list. + """ + hlitauti.report_task() + _ = ctx + # Display the detailed summary of changes made on this branch. + print( + "Difference between HEAD and master:\n" + + hgit.get_summary_files_in_branch("master", dir_name=".") + ) + + +@task +def git_branch_create( # type: ignore + ctx, + branch_name="", + issue_id=0, + repo_short_name="current", + suffix="", + only_branch_from_master=True, + check_branch_name=True, +): + """ + Create and push upstream branch `branch_name` or the one corresponding to + `issue_id` in repo_short_name `repo_short_name`. + + E.g., + ``` + > git checkout -b LemTask169_Get_GH_actions + > git push --set- upstream origin LemTask169_Get_GH_actions + ``` + + :param branch_name: name of the branch to create (e.g., + `LemTask169_Get_GH_actions`) + :param issue_id: use the canonical name for the branch corresponding to that + issue + :param repo_short_name: name of the GitHub repo_short_name that the `issue_id` + belongs to + - "current" (default): the current repo_short_name + - short name (e.g., "amp", "lm") of the branch + :param suffix: suffix (e.g., "02") to add to the branch name when using issue_id + :param only_branch_from_master: only allow to branch from master + :param check_branch_name: make sure the name of the branch is valid like + `{Amp,...}TaskXYZ_...` + """ + hlitauti.report_task() + if issue_id > 0: + # Convert GitHub issue ID to branch name. + hdbg.dassert_eq( + branch_name, + "", + "Cannot specify both --issue and --branch_name; choose one", + ) + title, _ = hlitagh._get_gh_issue_title(issue_id, repo_short_name) + branch_name = title + _LOG.info( + "Issue %d in %s repo_short_name corresponds to '%s'", + issue_id, + repo_short_name, + branch_name, + ) + if suffix != "": + # Add the suffix. + _LOG.debug("Adding suffix '%s' to '%s'", suffix, branch_name) + if suffix[0] in ("-", "_"): + _LOG.warning( + "Suffix '%s' should not start with '%s': removing", + suffix, + suffix[0], + ) + suffix = suffix.rstrip("-_") + branch_name += "_" + suffix + _LOG.info("branch_name='%s'", branch_name) + hdbg.dassert_ne( + branch_name, + "", + "Branch name cannot be empty", + ) + if check_branch_name: + # Reject numeric-only branch names to avoid confusion with commit SHAs. + m = re.match(r"^\d+$", branch_name) + hdbg.dassert( + not m, + "Branch names with only numbers are invalid", + ) + # Enforce naming convention `{RepoPrefix}TaskXYZ_Description` for consistency. + # The valid format of a branch name is `AmpTask1903_Implemented_system_...`. + m = re.match(r"^\S+Task\d+_\S+$", branch_name) + hdbg.dassert( + m, + "Branch name must follow convention: '{RepoPrefix,Amp,...}TaskXYZ_...'", + ) + # Prevent accidental duplicate branches. + hdbg.dassert( + not hgit.does_branch_exist(branch_name, mode="all"), + "Branch '%s' already exists", + branch_name, + ) + # Make sure we are branching from `master`, unless that's what the user wants. + # TODO(Vlad): Remove before merging - temporarily allowing branching from non-master. + curr_branch = hgit.get_branch_name() + if curr_branch != "master": + if only_branch_from_master: + _LOG.warning( + f"Branching from '{curr_branch}' instead of 'master'. " + "This is temporarily allowed but should be reviewed before merging." + ) + # hdbg.dfatal( + # f"You should branch from master and not from '{curr_branch}'" + # ) + # Fetch master. + cmd = "git pull --autostash --rebase" + hlitauti.run(ctx, cmd) + # git checkout -b LmTask169_Get_GH_actions_working_on_lm + cmd = f"git checkout -b {branch_name}" + hlitauti.run(ctx, cmd) + cmd = f"git push --set-upstream origin {branch_name}" + hlitauti.run(ctx, cmd) + + +# TODO(gp): @all Move to hgit. +def _delete_branches(ctx: Any, tag: str, confirm_delete: bool) -> None: + """ + Delete branches that have been merged into master. + + :param ctx: Invoke context + :param tag: Either "local" for local branches or "remote" for remote branches + :param confirm_delete: If True, ask user for confirmation before deleting + """ + if tag == "local": + # Delete local branches that are already merged into master. + # > git branch --merged + # * AmpTask1251_Update_GH_actions_for_amp_02 + find_cmd = r"git branch --merged master | grep -v master | grep -v \*" + delete_cmd = "git branch -d" + elif tag == "remote": + # Get the branches to delete. + find_cmd = ( + "git branch -r --merged origin/master" + + r" | grep -v master | sed 's/origin\///'" + ) + delete_cmd = "git push origin --delete" + else: + raise ValueError(f"Invalid tag='{tag}'") + # TODO(gp): Use system_to_lines + _, txt = hsystem.system_to_string(find_cmd, abort_on_error=False) + branches = hsystem.text_to_list(txt) + # Print info. + _LOG.info( + "There are %d %s branches to delete:\n%s", + len(branches), + tag, + "\n".join(branches), + ) + if not branches: + # No branch to delete, then we are done. + return + # Ask whether to continue. + if confirm_delete: + hsystem.query_yes_no( + hdbg.WARNING + f": Delete these {tag} branches?", abort_on_no=True + ) + for branch in branches: + cmd_tmp = f"{delete_cmd} {branch}" + hlitauti.run(ctx, cmd_tmp) + + +@task +def git_branch_delete_merged(ctx, confirm_delete=True): # type: ignore + """ + Remove (both local and remote) branches that have been merged into master. + """ + hlitauti.report_task() + # Ensure user is on master since we're deleting branches merged into master. + hdbg.dassert_eq( + hgit.get_branch_name(), + "master", + "Must be on master branch to safely delete merged branches", + ) + # + cmd = "git fetch --all --prune" + hlitauti.run(ctx, cmd) + # Delete local and remote branches that are already merged into master. + _delete_branches(ctx, "local", confirm_delete) + _delete_branches(ctx, "remote", confirm_delete) + # + cmd = "git fetch --all --prune" + hlitauti.run(ctx, cmd) + + +@task +def git_branch_rename(ctx, new_branch_name): # type: ignore + """ + Rename current branch both locally and remotely. + """ + hlitauti.report_task() + old_branch_name = hgit.get_branch_name(".") + # Ensure new branch name is actually different to avoid no-op rename. + hdbg.dassert_ne( + old_branch_name, + new_branch_name, + "New branch name must be different from current branch name", + ) + msg = ( + f"Do you want to rename the current branch '{old_branch_name}' to " + f"'{new_branch_name}'" + ) + hsystem.query_yes_no(msg, abort_on_no=True) + # https://stackoverflow.com/questions/30590083 + # Rename the local branch to the new name. + # > git branch -m + cmd = f"git branch -m {new_branch_name}" + hlitauti.run(ctx, cmd) + # Delete the old branch on remote. + # > git push --delete + cmd = f"git push origin --delete {old_branch_name}" + hlitauti.run(ctx, cmd) + # Prevent Git from using the old name when pushing in the next step. + # Otherwise, Git will use the old upstream name instead of . + # > git branch --unset-upstream + cmd = f"git branch --unset-upstream {new_branch_name}" + hlitauti.run(ctx, cmd) + # Push the new branch to remote. + # > git push + cmd = f"git push origin {new_branch_name}" + hlitauti.run(ctx, cmd) + # Reset the upstream branch for the new_name local branch. + # > git push -u + cmd = f"git push origin u {new_branch_name}" + hlitauti.run(ctx, cmd) + print("Done") + + +@task +def git_branch_next_name(ctx, branch_name=None, method="auto"): # type: ignore + """ + Return a name derived from the current branch so that the branch doesn't + exist. + + :param branch_name: if `None` use the current branch name, otherwise specify it + :param method: method to use ('auto', 'github_api', 'linear_scan') + - 'auto' (default): tries GitHub API first, falls back to linear scan + - 'github_api': use only GitHub API method (fast) + - 'linear_scan': use only linear scan method (always works) + + E.g., `AmpTask1903_Implemented_system_Portfolio` -> + `AmpTask1903_Implemented_system_Portfolio_3` + """ + hlitauti.report_task() + _ = ctx + branch_next_name = hgit.get_branch_next_name( + curr_branch_name=branch_name, method=method, log_verb=logging.INFO + ) + print(f"branch_next_name='{branch_next_name}'") + + +@task +def git_branch_copy( # type: ignore + ctx, + new_branch_name="", + skip_git_merge_master=False, + use_patch=False, + check_branch_name=True, +): + """ + Create a new branch with the same content of the current branch. + + :param new_branch_name: name for the new branch + :param skip_git_merge_master: skip merging master into current branch + :param use_patch: apply patching instead of merging + :param check_branch_name: enforce branch naming convention like + `{Amp,...}TaskXYZ_...` + """ + # Patch-based copying is not yet implemented. + hdbg.dassert( + not use_patch, + "Patch-based branch copying is not yet implemented", + ) + # Remove untracked files to ensure clean state when copying branch. + cmd = "git clean -fd" + hlitauti.run(ctx, cmd) + curr_branch_name = hgit.get_branch_name() + # Cannot copy master branch since it would be copying the source to itself. + hdbg.dassert_ne( + curr_branch_name, + "master", + "Cannot copy master branch", + ) + # Sync with master first to ensure new branch includes latest changes (if requested). + if not skip_git_merge_master: + cmd = "invoke git_merge_master --abort-if-not-ff" + hlitauti.run(ctx, cmd) + else: + _LOG.warning("Skipping git_merge_master as requested") + if use_patch: + # TODO(gp): Create a patch or do a `git merge`. + pass + # Generate unique branch name if not provided. + if new_branch_name is None or new_branch_name == "": + new_branch_name = hgit.get_branch_next_name() + _LOG.info("new_branch_name='%s'", new_branch_name) + hdbg.dassert_ne( + new_branch_name, + None, + "Branch name must not be None after generation", + ) + # Allow scratch branches to bypass naming convention. + if new_branch_name.startswith("gp_scratch"): + check_branch_name = False + # Create or checkout the target branch. + mode = "all" + new_branch_exists = hgit.does_branch_exist(new_branch_name, mode) + if new_branch_exists: + # Switch to existing branch to copy changes into it. + cmd = f"git checkout {new_branch_name}" + else: + # Create new branch from master as base. + cmd = f"git checkout master && invoke git_branch_create -b '{new_branch_name}'" + if not check_branch_name: + cmd += " --no-check-branch-name" + hlitauti.run(ctx, cmd) + if use_patch: + # TODO(gp): Apply the patch. + pass + # Squash merge copies all commits as a single change without creating a merge commit. + cmd = f"git merge --squash --ff {curr_branch_name} && git reset HEAD" + hlitauti.run(ctx, cmd) + + +# /////////////////////////////////////////////////////////////////////////////// + + +def _git_diff_with_branch( + ctx: Any, + hash_: str, + tag: str, + # + dir_name: str, + subdir: str, + # + diff_type: str, + keep_extensions: str, + skip_extensions: str, + file_name: str, + # + only_print_files: bool, + dry_run: bool, +) -> None: + """ + Diff files from this client against files in a branch using vimdiff. + + Same parameters as `git_branch_diff_with`. + """ + _LOG.debug( + hprint.to_str( + "hash_ tag dir_name diff_type subdir keep_extensions skip_extensions" + " file_name only_print_files dry_run" + ) + ) + # Diff only works on non-master branches to avoid comparing with itself. + curr_branch_name = hgit.get_branch_name() + hdbg.dassert_ne( + curr_branch_name, + "master", + "Cannot diff master branch against itself", + ) + # Retrieve the list of changed files between current state and the given hash. + cmd = [] + cmd.append("git diff") + if diff_type: + cmd.append(f"--diff-filter={diff_type}") + cmd.append(f"--name-only HEAD {hash_}") + cmd = " ".join(cmd) + files = hsystem.system_to_files( + cmd, dir_name, remove_files_non_present=False + ) + files = sorted(files) + _LOG.debug("%s", "\n".join(files)) + # Filter to a single specific file if requested. + if file_name: + _LOG.debug("Filter by file_name") + _LOG.info("Before filtering files=%s", len(files)) + files_tmp = [] + for f in files: + if f == file_name: + files_tmp.append(f) + hdbg.dassert_eq( + 1, + len(files_tmp), + "Can't find file_name='%s' in\n%s", + file_name, + "\n".join(files), + ) + files = files_tmp + _LOG.info("After filtering by file_name: files=%s", len(files)) + _LOG.debug("%s", "\n".join(files)) + # Keep only files with specified extensions (useful for focusing on code vs docs). + if keep_extensions: + _LOG.debug("# Filter by keep_extensions") + _LOG.debug("Before filtering files=%s", len(files)) + extensions_lst = keep_extensions.split(",") + _LOG.warning( + "Keeping files with %d extensions: %s", + len(extensions_lst), + extensions_lst, + ) + files_tmp = [] + for f in files: + if any(f.endswith(ext) for ext in extensions_lst): + files_tmp.append(f) + files = files_tmp + _LOG.info("After filtering by keep_extensions: files=%s", len(files)) + _LOG.debug("%s", "\n".join(files)) + # Exclude files with specified extensions (useful for skipping config or build files). + if skip_extensions: + _LOG.debug("# Filter by skip_extensions") + _LOG.debug("Before filtering files=%s", len(files)) + extensions_lst = skip_extensions.split(",") + _LOG.warning( + "Skipping files with %d extensions: %s", + len(extensions_lst), + extensions_lst, + ) + files_tmp = [] + for f in files: + if not any(f.endswith(ext) for ext in extensions_lst): + files_tmp.append(f) + files = files_tmp + _LOG.info("After filtering by skip_extensions: files=%s", len(files)) + _LOG.debug("%s", "\n".join(files)) + # Limit diff to files within a specific subdirectory. + if subdir != "": + _LOG.debug("# Filter by subdir") + _LOG.debug("Before filtering files=%s", len(files)) + files_tmp = [] + for f in files: + if f.startswith(subdir): + files_tmp.append(f) + files = files_tmp + _LOG.info("After filtering by subdir: files=%s", len(files)) + _LOG.debug("%s", "\n".join(files)) + # Summary of what will be diffed. + _LOG.info("\n" + hprint.frame(f"# files={len(files)}")) + _LOG.info("\n" + "\n".join(files)) + if len(files) == 0: + _LOG.warning("No files match the filter criteria: exiting") + return + if only_print_files: + _LOG.warning("Exiting as per user request with --only-print-files") + return + # Create temporary directory to store base versions for comparison. + root_dir = hgit.get_repo_full_name_from_client(super_module=True) + # TODO(gp): We should get a temp dir. + dst_dir = f"/tmp/{root_dir}/tmp.{tag}" + hio.create_dir(dst_dir, incremental=False) + # Build vimdiff commands for each file, retrieving base version from source hash. + script_txt = [] + for branch_file in files: + _LOG.debug("\n%s", hprint.frame(f"branch_file={branch_file}")) + # Use current file as right side (what the branch currently has). + if os.path.exists(branch_file): + right_file = branch_file + else: + # For deleted files, use /dev/null as the right side. + right_file = "/dev/null" + # Flatten directory structure to avoid naming conflicts in temp directory. + tmp_file = branch_file + tmp_file = tmp_file.replace("/", "_") + tmp_file = os.path.join(dst_dir, tmp_file) + _LOG.debug( + "Extracting base version of %s to %s", + branch_file, + tmp_file, + ) + # Extract the base version from the specified hash/branch. + cmd = f"git show {hash_}:{branch_file} >{tmp_file}" + rc = hsystem.system(cmd, abort_on_error=False) + if rc != 0: + # File is new in the branch (didn't exist in base hash). + _LOG.debug("File '%s' is new (doesn't exist in base)", branch_file) + left_file = "/dev/null" + else: + left_file = tmp_file + # Generate vimdiff command to compare base and current versions. + cmd = f"vimdiff {left_file} {right_file}" + _LOG.debug("-> %s", cmd) + script_txt.append(cmd) + script_txt = "\n".join(script_txt) + # Display the diff commands that will be executed. + _LOG.info("\n%s" % hprint.frame("Diffing script")) + _LOG.info(script_txt) + # Create executable script for easy manual re-running. + script_file_name = f"./tmp.vimdiff_branch_with_{tag}.sh" + msg = f"To diff against {tag} run" + hio.create_executable_script(script_file_name, script_txt, msg=msg) + hlitauti.run(ctx, script_file_name, dry_run=dry_run, pty=True) + # Clean up temporary files. + cmd = f"rm -rf {dst_dir}" + hlitauti.run(ctx, cmd, dry_run=dry_run) + + +def _git_diff_with_branch_wrapper( + ctx: Any, + hash_: str, + tag: str, + # + dir_name: str, + subdir: str, + include_submodules: bool, + # + diff_type: str, + keep_extensions: str, + skip_extensions: str, + python: bool, + file_name: str, + # + only_print_files: bool, + dry_run: bool, +) -> None: + """ + Wrapper for _git_diff_with_branch that handles Python-specific filtering and submodules. + + Applies Python-specific extension filter if requested, then delegates to _git_diff_with_branch. + If include_submodules is True, also runs the diff for the amp submodule if present. + + Parameters are the same as _git_diff_with_branch with the addition of: + :param include_submodules: if True, also diff the amp submodule + :param python: if True, only diff Python files (overrides extension filters) + """ + hdbg.dassert_eq(dir_name, ".") + # If Python mode is enabled, override all extension filters to only diff Python files. + if python: + hdbg.dassert_eq( + diff_type, + "", + "Cannot specify diff_type with python mode", + ) + hdbg.dassert_eq( + keep_extensions, + "", + "Cannot specify keep_extensions with python mode", + ) + hdbg.dassert_eq( + skip_extensions, + "", + "Cannot specify skip_extensions with python mode", + ) + hdbg.dassert_eq( + file_name, + "", + "Cannot specify file_name with python mode", + ) + keep_extensions = "py" + # Diff files in the main repository. + _git_diff_with_branch( + ctx, + hash_, + tag, + dir_name, + subdir, + diff_type, + keep_extensions, + skip_extensions, + file_name, + only_print_files, + dry_run, + ) + # Also diff the amp submodule if it exists and was requested. + if include_submodules: + if hgit.is_amp_present(): + with hsystem.cd("amp"): + _git_diff_with_branch( + ctx, + hash_, + tag, + dir_name, + subdir, + diff_type, + keep_extensions, + skip_extensions, + file_name, + only_print_files, + dry_run, + ) + + +@task +def git_branch_diff_with( # type: ignore + ctx, + target="base", + hash_value="", + # Where to diff. + subdir="", + include_submodules=False, + # What files to diff. + diff_type="", + keep_extensions="", + skip_extensions="", + python=False, + file_name="", + # What actions. + only_print_files=False, + dry_run=False, +): + """ + Diff files of the current branch with master at the branching point. + + :param subdir: subdir to consider for diffing, instead of `.` + :param target: + - `base`: diff with respect to the branching point + - `master`: diff with respect to `origin/master` + - `head`: diff modified files + - `hash`: diff with respect to hash specified in `hash` + :param hash_value: the hash to use with target="hash" + :param include_submodules: run recursively on all submodules + :param diff_type: files to diff using git `--diff-filter` options + :param keep_extensions: a comma-separated list of extensions to check, e.g., + 'csv,py'. An empty string means keep all the extensions + :param skip_extensions: a comma-separated list of extensions to skip, e.g., + 'txt'. An empty string means do not skip any extension + :param only_print_files: print files to diff and exit + :param dry_run: execute diffing script or not + """ + # Determine the comparison target based on user preference. + dir_name = "." + hdbg.dassert_in(target, ("base", "master", "head", "hash"), "Invalid target") + # Resolve target to a specific git hash for consistent diffing. + if target == "base": + # Compare against the point where this branch diverged from master. + hdbg.dassert_eq( + hash_value, + "", + "Cannot specify hash_value when target is 'base'", + ) + hash_value = hgit.get_branch_hash(dir_name=dir_name) + tag = "base" + elif target == "master": + # Compare against the current state of the remote master branch. + hdbg.dassert_eq( + hash_value, + "", + "Cannot specify hash_value when target is 'master'", + ) + hash_value = "origin/master" + tag = "origin_master" + elif target == "head": + # Compare working directory against HEAD (uncommitted changes). + hdbg.dassert_eq( + hash_value, + "", + "Cannot specify hash_value when target is 'head'", + ) + hash_value = "" + tag = "head" + elif target == "hash": + # Compare against a user-specified commit hash. + hdbg.dassert_ne( + hash_value, + "", + "Must provide hash_value when target is 'hash'", + ) + tag = f"hash@{hash_value}" + else: + raise ValueError(f"Invalid target='{target}") + _git_diff_with_branch_wrapper( + ctx, + hash_value, + tag, + # + dir_name, + subdir, + include_submodules, + # + diff_type, + keep_extensions, + skip_extensions, + python, + file_name, + # + only_print_files, + dry_run, + ) + + +@task +def git_repo_copy(ctx, file_name, src_git_dir, dst_git_dir): # type: ignore + """ + Copy the code from the src Git client to the dst Git client. + + :param file_name: the name of the file to copy (which is under + `src_git_dir`) + :param src_git_dir: the directory of the source Git client (e.g., + "/Users/saggese/src/helpers1") + :param dst_git_dir: the directory of the destination Git client (e.g., + "/Users/saggese/src/helpers2") + """ + _ = ctx + src_git_dir = hgit.resolve_git_client_dir(src_git_dir) + dst_git_dir = hgit.resolve_git_client_dir(dst_git_dir) + # Map source file path to equivalent path in destination repository. + dst_file_path = hgit.project_file_name_in_git_client( + file_name, + src_git_dir, + dst_git_dir, + check_src_file_exists=True, + check_dst_file_exists=False, + ) + _LOG.info("Copying code from '%s' to '%s' ...", file_name, dst_git_dir) + # Perform the file copy operation. + hsystem.system_to_string(f"cp {file_name} {dst_file_path}") + + +# ############################################################################# + + +def _get_submodule_paths() -> List[str]: + """ + Get list of submodule paths from .gitmodules file. + + :return: List of submodule directory paths, empty if no submodules + found + """ + gitmodules_path = ".gitmodules" + if not os.path.exists(gitmodules_path): + _LOG.info("No .gitmodules file found") + return [] + # Extract submodule paths from git config using the .gitmodules file. + cmd = "git config --file .gitmodules --get-regexp path" + _, output = hsystem.system_to_string(cmd) + submodule_paths = [] + for line in output.strip().split("\n"): + if line: + # Parse format: "submodule..path " to extract path. + path = line.split(" ", 1)[1] + submodule_paths.append(path) + return submodule_paths + + +def _get_branch_name(submodule_path: str) -> str: + """ + Get the current branch name for a git repository. + + :param submodule_path: Path to the git repository directory + :return: Branch name or error message + """ + hdbg.dassert_dir_exists(submodule_path) + hdbg.dassert_path_exists(os.path.join(submodule_path, ".git")) + # Query git to get the symbolic name of the current HEAD. + cmd = f"cd {submodule_path} && git rev-parse --abbrev-ref HEAD" + _, branch_name = hsystem.system_to_string(cmd) + return branch_name.strip() + + +@task +def git_branches(ctx): # type: ignore + """ + Print the branch name for the main repository and each git submodule + directory. + + Example usage:: + > dev_scripts_helpers/git/print_git_branches.py + . (main): master + submodule1: feature/new-feature + submodule2: develop + submodule3: main + """ + _ = ctx + # Display main repository branch first for clarity. + main_branch = _get_branch_name(".") + print(f". -> {main_branch}") + # List submodule branches to detect if any are out of sync. + submodule_paths = _get_submodule_paths() + if not submodule_paths: + _LOG.debug("No git submodules found in this repository") + return + # Report branch for each submodule. + for path in submodule_paths: + branch_name = _get_branch_name(path) + print(f"{path} -> {branch_name}") + + +@task +def git_branch_is_merged(ctx): # type: ignore + """ + Check if the current branch was merged into master using GitHub API and git. + + Uses GitHub API to check for open/closed PRs and git to verify branch presence on remote. + """ + _ = ctx + hlitauti.report_task() + branch_name = hgit.get_branch_name() + print(f"branch_name='{branch_name}'") + # Check for PRs targeting master from the current branch on GitHub. + cmd = f"gh pr list --base master --head {branch_name}" + ctx.run(cmd, pty=True) + # Verify if the branch still exists on the remote repository. + cmd = f"git ls-remote --heads origin {branch_name}" + ctx.run(cmd, pty=True) + + +@task +def git_backup( + ctx, + file_mode="all", + backup_dir=None, + include_subrepos=True, + dry_run=False, +): # type: ignore + """ + Create a zip file with modified and/or untracked files from the current + repository and optionally its submodules. + + The zip file is created with a timestamp-based name in the specified + backup directory (default: $HOME/src/backups). + Example: `modified_files.helpers_root.20251119_130034.zip` + + :param file_mode: which files to include: "all" (default), "modified", or + "untracked" + :param backup_dir: directory where to save the zip file (default: + $HOME/src/backups) + :param include_subrepos: whether to include submodule files (default: True) + :param dry_run: if True, only print the files that would be included + without creating the zip + """ + hlitauti.report_task( + txt=hprint.to_str("file_mode, backup_dir, include_subrepos, dry_run") + ) + _ = ctx + # Validate backup scope to ensure user intent is clear. + valid_modes = ["all", "modified", "untracked"] + hdbg.dassert_in( + file_mode, + valid_modes, + "Invalid file_mode '%s'; must be one of: %s", + file_mode, + ", ".join(valid_modes), + ) + # Use default backup location if not specified. + if backup_dir is None: + backup_dir = os.path.join(os.path.expanduser("~"), "src", "backups") + hio.create_dir(backup_dir, incremental=True) + # Determine repository name for readable backup file naming. + super_module = False + git_client_root = hgit.get_client_root(super_module) + # Include timestamp to avoid overwriting previous backups. + timestamp = hlitauti.get_ET_timestamp() + repo_name = os.path.basename(git_client_root) + zip_file_name = f"modified_files.{repo_name}.{timestamp}.zip" + # Collect files from the main repository. + _LOG.info("Collecting %s files from main repository...", file_mode) + main_repo_files = hgit.get_modified_and_untracked_files(".", mode=file_mode) + _LOG.info("Found %d files in main repository", len(main_repo_files)) + all_files = [] + for file_path in main_repo_files: + all_files.append((".", file_path)) + # Also include submodule files if requested to ensure complete backup. + if include_subrepos: + submodule_paths = _get_submodule_paths() + if submodule_paths: + _LOG.info( + "Found %d submodule(s), collecting files...", + len(submodule_paths), + ) + for submodule_path in submodule_paths: + hdbg.dassert_dir_exists( + submodule_path, + msg=f"Submodule path does not exist: {submodule_path}", + ) + _LOG.info("Checking submodule: %s", submodule_path) + submodule_files = hgit.get_modified_and_untracked_files( + submodule_path, mode=file_mode + ) + _LOG.info( + "Found %d files in submodule %s", + len(submodule_files), + submodule_path, + ) + for file_path in submodule_files: + all_files.append((submodule_path, file_path)) + else: + _LOG.info("No submodules found") + else: + _LOG.info("Skipping submodules (include_subrepos=False)") + # Verify there's content to backup before proceeding. + if not all_files: + _LOG.warning("No %s files found. Nothing to zip.", file_mode) + return + # Display summary of what will be backed up. + _LOG.info( + "\n%s\nFound %d total files to include:\n%s", + hprint.frame("Files to include in zip"), + len(all_files), + hprint.indent( + "\n".join( + [ + ( + os.path.join(repo_path, file_path) + if repo_path != "." + else file_path + ) + for repo_path, file_path in all_files + ] + ) + ), + ) + if dry_run: + _LOG.warning("Dry-run mode: not creating zip file") + return + # Create zip file with all collected files. + zip_file_path = os.path.join(backup_dir, zip_file_name) + _LOG.info("Creating zip file: %s", zip_file_path) + import zipfile + + with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zipf: + for repo_path, file_path in all_files: + full_path = os.path.join(repo_path, file_path) + # Maintain directory hierarchy in archive for easy restoration. + arcname = ( + os.path.join(repo_path, file_path) + if repo_path != "." + else file_path + ) + try: + zipf.write(full_path, arcname=arcname) + _LOG.debug("Added to zip: %s", arcname) + except Exception as e: + _LOG.warning("Failed to add %s to zip: %s", full_path, e) + _LOG.info("Successfully created zip file: %s", zip_file_path) + # Display location for easy access. + abs_zip_path = os.path.abspath(zip_file_path) + print(f"\nZip file created at: {abs_zip_path}") + + +@task +def gh_watch(ctx, *, interval=60): # type: ignore + """ + Watch GitHub workflow status with periodic updates. + + Runs `invoke gh_workflow_list` every N seconds using the `watch` command. + If running in tmux, temporarily renames the window to "*GH_WATCH*" for + visibility and restores it on exit. + + :param interval: Update interval in seconds + """ + hlitauti.report_task() + # Check if running inside tmux and save original window name. + old_pane_title = None + if os.environ.get("TMUX"): + _LOG.info("Running in tmux, saving window name") + _, old_pane_title = hsystem.system_to_one_line( + "tmux display-message -p '#W'" + ) + _LOG.info("Original window name: %s", old_pane_title) + # Rename window to indicate we're watching workflows. + hsystem.system("tmux rename-window '*GH_WATCH*'") + try: + # Watch workflows by repeatedly running gh_workflow_list. + while True: + # Clear screen before displaying updated workflow status. + subprocess.run("clear; invoke gh_workflow_list", shell=True) + _LOG.info("Sleeping for %d seconds before next update", interval) + time.sleep(interval) + finally: + # Restore original tmux window name if it was changed. + if old_pane_title is not None: + _LOG.info("Restoring window name: %s", old_pane_title) + hsystem.system(f"tmux rename-window '{old_pane_title}'") + + +# TODO(gp): Add the following scripts: +# dev_scripts/git/gcl +# dev_scripts/git/git_branch.sh +# dev_scripts/git/git_branch_point.sh +# dev_scripts/create_class_diagram.sh diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py new file mode 100644 index 000000000..7f437780b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py @@ -0,0 +1,837 @@ +""" +Import as: + +import helpers.lib_tasks_integrate as hlitaint +""" + +import datetime +import logging +import os +from typing import List, Optional, Set, Tuple + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.lib_tasks_gh as hlitagh +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + + +DEFAULT_SRC_DIR_BASENAME = "cmamp1" +DEFAULT_DST_DIR_BASENAME = "kaizenflow1" + +# DEFAULT_SRC_DIR_BASENAME="amp1" +# DEFAULT_DST_DIR_BASENAME="cmamp1" + + +def _dassert_current_dir_matches(expected_dir_basename: str) -> None: + """ + Ensure that the name of the current dir is the one expected. + + E.g., `/Users/saggese/src/cmamp1` is a valid dir for an integration + branch for `cmamp1`. + """ + _LOG.debug(hprint.to_str("expected_dir_basename")) + # Get the basename of the current dir. + curr_dir_basename = os.path.basename(os.getcwd()) + # Check that it's what is expected. + hdbg.dassert_eq( + curr_dir_basename, + expected_dir_basename, + "The current dir '%s' doesn't match the expected dir '%s'", + curr_dir_basename, + expected_dir_basename, + ) + + +# TODO(gp): -> _dassert_is_integration_dir +def _dassert_is_integration_branch(abs_dir: str) -> None: + """ + Ensure that the branch in `abs_dir` is a valid integration or lint branch. + + E.g., `AmpTask1786_Integrate_20220402` is a valid integration + branch. + """ + _LOG.debug(hprint.to_str("abs_dir")) + branch_name = hgit.get_branch_name(dir_name=abs_dir) + hdbg.dassert_ne(branch_name, "master") + hdbg.dassert( + ("_Integrate_" in branch_name) or ("_Lint_" in branch_name), + "Invalid branch_name='%s' in abs_dir='%s'", + branch_name, + abs_dir, + ) + + +def _clean_both_integration_dirs(abs_dir1: str, abs_dir2: str) -> None: + """ + Run `i git_clean` on the passed dirs. + + :param abs_dir1, abs_dir2: full paths of the dirs to clean + """ + _LOG.debug(hprint.to_str("abs_dir1 abs_dir2")) + # + cmd = f"cd {abs_dir1} && invoke git_clean" + hsystem.system(cmd) + # + cmd = f"cd {abs_dir2} && invoke git_clean" + hsystem.system(cmd) + + +@task +def integrate_create_branch(ctx, dir_basename, dry_run=False): # type: ignore + """ + Create the branch for integration of `dir_basename` (e.g., amp1) in the + current dir. + + :param dir_basename: specify the dir name (e.g., `amp1`) to ensure the set-up is + correct. + """ + hlitauti.report_task() + # Check that the current dir has the name `dir_basename`. + _dassert_current_dir_matches(dir_basename) + # Login in GitHub. + hlitagh.gh_login(ctx) + # Create the integration branch with the current date, e.g., + # `AmpTask1786_Integrate_20211231`. + date = datetime.datetime.now().date() + date_as_str = date.strftime("%Y%m%d") + branch_name = f"AmpTask1786_Integrate_{date_as_str}" + # query_yes_no("Are you sure you want to create the branch ") + _LOG.info("Creating branch '%s'", branch_name) + cmd = f"invoke git_branch_create -b '{branch_name}'" + hlitauti.run(ctx, cmd, dry_run=dry_run) + + +# ############################################################################# + + +def _resolve_src_dst_names( + src_dir_basename: str, + dst_dir_basename: str, + subdir: str, + *, + check_exists: bool = True, +) -> Tuple[str, str]: + """ + Return the full path of `src_dir_basename` and `dst_dir_basename`. + + :param src_dir_basename: the current dir (e.g., `amp1`) + :param dst_dir_basename: a dir parallel to the current one (`cmamp1`) + :param check_exists: check that the dst dir exists + + :return: absolute paths of both directories + """ + curr_parent_dir = os.path.dirname(os.getcwd()) + # + abs_src_dir = os.path.join(curr_parent_dir, src_dir_basename, subdir) + abs_src_dir = os.path.normpath(abs_src_dir) + hdbg.dassert_dir_exists(abs_src_dir) + # + abs_dst_dir = os.path.join(curr_parent_dir, dst_dir_basename, subdir) + abs_dst_dir = os.path.normpath(abs_dst_dir) + if check_exists: + hdbg.dassert_dir_exists(abs_dst_dir) + return abs_src_dir, abs_dst_dir + + +@task +def integrate_diff_dirs( # type: ignore + ctx, + src_dir_basename=DEFAULT_SRC_DIR_BASENAME, + dst_dir_basename=DEFAULT_DST_DIR_BASENAME, + reverse=False, + subdir="", + copy=False, + use_linux_diff=False, + check_branches=True, + clean_branches=True, + remove_usual=False, + run_diff_script=True, + dry_run=False, +): + """ + Integrate repos from dirs `src_dir_basename` to `dst_dir_basename` by diffing + or copying all the files with differences. + + ``` + # Use the default values for src / dst dirs to represent the usual set-up. + > i integrate_diff_dirs \ + --src-dir-basename amp1 \ + --dst-dir-basename cmamp1 \ + --subdir . + ``` + + :param src_dir_basename: dir with the source branch (e.g., amp1) + :param dst_dir_basename: dir with the destination branch (e.g., cmamp1) + :param reverse: switch the roles of the default source and destination branches + :param subdir: filter to the given subdir for both dirs (e.g., + `src_dir_basename/subdir` and `dst_dir_basename/subdir`) + :param copy: copy the files instead of diffing + :param use_linux_diff: use Linux `diff` instead of `diff_to_vimdiff.py` + :param remove_usual: remove the usual mismatching files (e.g., `.github`) + :param run_diff_script: run the diff script + :param dry_run: do not execute the commands + """ + _ = ctx + hlitauti.report_task() + if reverse: + src_dir_basename, dst_dir_basename = dst_dir_basename, src_dir_basename + _LOG.warning( + "Reversing dirs: %s", + hprint.to_str2(src_dir_basename, dst_dir_basename), + ) + # Check that the integration branches are in the expected state. + # _dassert_current_dir_matches(src_dir_basename) + # When we integrate a dir that doesn't exist in the dst branch, we need to + # skip the check for existence. + check_exists = False + abs_src_dir, abs_dst_dir = _resolve_src_dst_names( + src_dir_basename, dst_dir_basename, subdir, check_exists=check_exists + ) + hio.create_dir(abs_dst_dir, incremental=True) + if check_branches: + _dassert_is_integration_branch(abs_src_dir) + _dassert_is_integration_branch(abs_dst_dir) + else: + _LOG.warning("Skipping integration branch check") + # Clean branches if needed. + if clean_branches: + # We can clean up only the root dir. + if subdir == "": + _clean_both_integration_dirs(abs_src_dir, abs_dst_dir) + else: + _LOG.warning("Skipping integration branch cleaning") + # Copy or diff dirs. + _LOG.info("abs_src_dir=%s", abs_src_dir) + _LOG.info("abs_dst_dir=%s", abs_dst_dir) + hdbg.dassert_ne(abs_src_dir, abs_dst_dir) + if copy: + # Copy the files. + if dry_run: + cmd = f"diff -r --brief {abs_src_dir} {abs_dst_dir}" + else: + rsync_opts = "--delete -a" + cmd = f"rsync {rsync_opts} {abs_src_dir}/ {abs_dst_dir}" + else: + # Diff the files. + if use_linux_diff: + cmd = f"diff -r --brief {abs_src_dir} {abs_dst_dir}" + else: + cmd = "diff_to_vimdiff.py" + if run_diff_script: + cmd += " --run_diff_script" + else: + cmd += " --no_run_diff_script" + _LOG.warning("Skipping running diff script") + cmd += f" --dir1 {abs_src_dir} --dir2 {abs_dst_dir}" + if remove_usual: + vals = [ + r"\/\.github\/", + ] + regex = "|".join(vals) + cmd += f" --ignore_files='{regex}'" + # We need to use `system` to get vimdiff to connect to stdin and stdout. + if not dry_run: + # hlitauti.run(ctx, cmd, dry_run=dry_run, print_cmd=True) + os.system(cmd) + + +# ############################################################################# + + +# TODO(gp): Allow to pass the hash of the last integration to consider. +# Factor out the logic to find the hash + +# Sometimes we want to see the changes in one dir since an integration point + +# E.g., find all the changes in `datapull` since the last integration +# +# > git log --oneline datapull +# 77f612f75 SorrIssue244 CCXT timestamp representation unit test (#317) +# 6b981b1f6 Sorrtask298 rename get docker cmd to get docker run cmd (#331) +# bd33a5fb9 SorrTask267_Parquet_to_CSV (#267) +# 9819fd117 AmpTask1786_Integrate_20230518_im (#273) <==== +# d530ed561 Update (#272) +# b75eab7ad AmpTask1786_Integrate_20230518_3 (#271) +# +# > git difftool 9819fd117.. datapull +# ... +# +# > git diff --name-only 9819fd117.. datapull +# datapull/ccxt/data/extract/test/test_ccxt_extractor.py +# datapull/common/data/transform/convert_pq_to_csv.py +# datapull/im_lib_tasks.py +# datapull/test/test_im_lib_tasks.py +# +# for file in datapull/ccxt/data/extract/test/test_ccxt_extractor.py datapull/common/data/transform/convert_pq_to_csv.py datapull/im_lib_tasks.py datapull/test/test_im_lib_tasks.py; do +# vimdiff ~/src/cmamp1/$file ~/src/kaizenflow1/$file +# done + + +def _find_files_touched_since_last_integration( + abs_dir: str, subdir: str +) -> List[str]: + """ + Return the list of files modified since the last integration for `abs_dir`. + + :param abs_dir: directory to cd before executing this script + :param subdir: consider only the files under `subdir` + """ + _LOG.debug(hprint.to_str2(abs_dir)) + dir_basename = os.path.basename(abs_dir) + # TODO(gp): dir_basename can be computed from abs_dir_name to simplify the + # interface. + # Change the dir to the desired one. + old_dir = os.getcwd() + try: + os.chdir(abs_dir) + # Find the hash of all integration commits. + cmd = "git log --date=local --oneline --date-order | grep AmpTask1786_Integrate" + # Remove integrations like "'... Merge branch 'master' into + # AmpTask1786_Integrate_20220113'" + cmd += " | grep -v \"Merge branch 'master' into \"" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug("integration commits=\n%s", txt) + txt = txt.split("\n") + # > git log --date=local --oneline --date-order | grep AmpTask1786_Integrate + # 72a1a101 AmpTask1786_Integrate_20211218 (#1975) + # 2acfd6d7 AmpTask1786_Integrate_20211214 (#1950) + # 318ab0ff AmpTask1786_Integrate_20211210 (#1933) + hdbg.dassert_lte(2, len(txt)) + print(f"# last_integration: '{txt[0]}'") + last_integration_hash = txt[0].split()[0] + print("* " + hprint.to_str("last_integration_hash")) + # Find the first commit after the commit with the last integration. + cmd = f"git log --oneline --reverse --ancestry-path {last_integration_hash}^..master" + _, txt = hsystem.system_to_string(cmd) + print(f"* commits after last integration=\n{txt}") + txt = txt.split("\n") + # > git log --oneline --reverse --ancestry-path 72a1a101^..master + # 72a1a101 AmpTask1786_Integrate_20211218 (#1975) + # 90e90353 AmpTask1955_Lint_20211218 (#1976) + # 4a2b45c6 AmpTask1858_Implement_buildmeister_workflows_in_invoke (#1860) + hdbg.dassert_lte(2, len(txt)) + first_commit_hash = txt[1].split()[0] + _LOG.debug("first_commit: '%s'", txt[1]) + _LOG.debug(hprint.to_str("first_commit_hash")) + # Find all the files touched in each branch. + cmd = f"git diff --name-only {first_commit_hash}..HEAD" + _, txt = hsystem.system_to_string(cmd) + files: List[str] = txt.split("\n") + finally: + os.chdir(old_dir) + _LOG.debug("Files modified since the integration=\n%s", "\n".join(files)) + # Filter files by subdir, if needed. + if subdir: + filtered_files = [] + for file in files: + if file.startswith(subdir): + filtered_files.append(file) + files = filtered_files + # Reorganize the files. + hdbg.dassert_no_duplicates(files) + files = sorted(files) + # Save to file for debugging. + file_name = os.path.join( + f"tmp.integrate_find_files_touched_since_last_integration.{dir_basename}.txt" + ) + hio.to_file(file_name, "\n".join(files)) + _LOG.debug("Saved file to '%s'", file_name) + return files + + +@task +def integrate_find_files_touched_since_last_integration( # type: ignore + ctx, + subdir="", +): + """ + Print the list of files modified since the last integration for this dir. + """ + hlitauti.report_task() + abs_dir = os.getcwd() + _ = ctx + files = _find_files_touched_since_last_integration(abs_dir, subdir) + # Print the result. + tag = "Files modified since the integration" + print(hprint.frame(tag)) + print("\n".join(files)) + + +# ############################################################################# + + +def _integrate_files( + files: Set[str], + abs_left_dir: str, + abs_right_dir: str, + only_different_files: bool, +) -> List[Tuple[str, str, str]]: + """ + Build a list of files to compare based on the pattern. + + :param files: relative path of the files to compare :param + abs_left_dir, abs_right_dir: path of the left / right dir + :param only_different_files: include in the script only the files + that are different + :return: list of files to compare + """ + _LOG.debug(hprint.to_str("abs_left_dir abs_right_dir only_different_files")) + files_to_diff: List[Tuple[str, str, str]] = [] + for file in sorted(list(files)): + _LOG.debug(hprint.to_str("file")) + left_file = os.path.join(abs_left_dir, file) + right_file = os.path.join(abs_right_dir, file) + # Check if both the files exist and are the same. + both_exist = os.path.exists(left_file) and os.path.exists(right_file) + if not both_exist: + # Both files don't exist: nothing to do. + equal: Optional[bool] = False + skip: Optional[bool] = True + else: + # They both exist. + if only_different_files: + # We want to check if they are the same. + try: + equal = hio.from_file(left_file) == hio.from_file(right_file) + except RuntimeError as e: + # RuntimeError: error='utf-8' codec can't decode byte 0xd0 in + # position 10: invalid continuation byte + _LOG.error("Caught error:\n%s", e) + equal = True + skip = equal + else: + # They both exist, and we want to process even if they are the + # same. + equal = None + skip = False + _ = left_file, right_file, both_exist, equal, skip + _LOG.debug(hprint.to_str("left_file right_file both_exist equal skip")) + # Execute the action on the 2 files. + if skip: + _LOG.debug(" Skip %s", file) + else: + _LOG.debug(" -> (%s, %s)", left_file, right_file) + files_to_diff.append((file, left_file, right_file)) + return files_to_diff + + +@task +def integrate_files( # type: ignore + ctx, + src_dir_basename=DEFAULT_SRC_DIR_BASENAME, + dst_dir_basename=DEFAULT_DST_DIR_BASENAME, + reverse=False, + subdir="", + mode="vimdiff", + file_direction="", + only_different_files=True, + check_branches=True, +): + """ + Find and copy the files that are touched only in one branch or in both. + + :param ctx: invoke ctx + :param src_dir_basename: dir with the source branch (e.g., amp1) + :param dst_dir_basename: dir with the destination branch (e.g., cmamp1) + :param reverse: switch the roles of the default source and destination branches + :param subdir: directory to select + :param mode: + - "print_dirs": print the directories + - "vimdiff": diff the files + - "copy": copy the files + :param file_direction: which files to diff / copy: + - "common_files": files touched in both branches + - "union_files": files touched in either branch + - "only_files_in_src": files touched only in the src dir + - "only_files_in_dst": files touched only in the dst dir + :param only_different_files: consider only the files that are different among + the branches + :param check_branches: ensure that the current branches are for integration + and not `master` + """ + hlitauti.report_task() + _ = ctx + if reverse: + src_dir_basename, dst_dir_basename = dst_dir_basename, src_dir_basename + _LOG.warning( + "Reversing dirs: %s", + hprint.to_str2(src_dir_basename, dst_dir_basename), + ) + # Check that the integration branches are in the expected state. + _dassert_current_dir_matches(src_dir_basename) + # We want to stay at the top level dir, since the subdir is handled by + # `integrate_find_files_touched_since_last_integration`. + abs_src_dir, abs_dst_dir = _resolve_src_dst_names( + src_dir_basename, dst_dir_basename, subdir="" + ) + if check_branches: + _dassert_is_integration_branch(abs_src_dir) + _dassert_is_integration_branch(abs_dst_dir) + else: + _LOG.warning("Skipping integration branch check") + # Find the files touched in each branch since the last integration. + src_files = set( + _find_files_touched_since_last_integration(abs_src_dir, subdir) + ) + dst_files = set( + _find_files_touched_since_last_integration(abs_dst_dir, subdir) + ) + # + if file_direction == "common_files": + files = src_files.intersection(dst_files) + elif file_direction == "only_files_in_src": + files = src_files - dst_files + elif file_direction == "only_files_in_dst": + files = dst_files - src_files + elif file_direction == "union_files": + files = src_files.union(dst_files) + else: + raise ValueError(f"Invalid file_direction='{file_direction}'") + # + files_to_diff = _integrate_files( + files, + abs_src_dir, + abs_dst_dir, + only_different_files, + ) + # Print the files. + print(hprint.frame(file_direction)) + _LOG.debug(hprint.to_str("files_to_diff")) + files_set = list(zip(*files_to_diff)) + if not files_set: + _LOG.warning("No file found: skipping") + return + files_set = sorted(list(files_set[0])) + txt = "\n".join(files_set) + print(hprint.indent(txt)) + # Process the files touched. + if mode == "print_dirs": + files_lst = [] + for file, left_file, right_file in files_to_diff: + dir_name = os.path.dirname(file) + # Skip empty dir, e.g., for `pytest.ini`. + if dir_name != "": + files_lst.append(dir_name) + files_lst = sorted(list(set(files_lst))) + print(hprint.frame("Dirs changed")) + print("\n".join(files_lst)) + else: + # Build the script with the operations to perform. + if mode == "copy" and file_direction == "only_files_in_dst": + raise ValueError("Can't copy files from destination") + script_txt = [] + for file, left_file, right_file in files_to_diff: + if mode == "copy": + cmd = f"cp -f {left_file} {right_file}" + elif mode == "vimdiff": + cmd = f"vimdiff {left_file} {right_file}" + else: + raise ValueError(f"Invalid mode='{mode}'") + _LOG.debug(" -> %s", cmd) + script_txt.append(cmd) + script_txt = "\n".join(script_txt) + # Execute / save the script. + if mode == "copy": + for cmd in script_txt.split("\n"): + hsystem.system(cmd) + elif mode == "vimdiff": + # Save the diff script. + script_file_name = f"./tmp.vimdiff.{file_direction}.sh" + hio.create_executable_script(script_file_name, script_txt) + print(f"# To diff run:\n> {script_file_name}") + else: + raise ValueError(f"Invalid mode='{mode}'") + + +@task +def integrate_find_files( # type: ignore + ctx, + subdir="", +): + """ + Find the files that are touched in the current branch since last + integration. + """ + hlitauti.report_task() + _ = ctx + # + abs_src_dir = "." + abs_src_dir = os.path.normpath(abs_src_dir) + hdbg.dassert_dir_exists(abs_src_dir) + # Find the files touched in each branch since the last integration. + src_files = sorted( + _find_files_touched_since_last_integration(abs_src_dir, subdir) + ) + print("* Files touched:\n" + "\n".join(src_files)) + + +@task +def integrate_diff_overlapping_files( # type: ignore + ctx, src_dir_basename, dst_dir_basename, subdir="" +): + """ + Find the files modified in both branches `src_dir_basename` and + `dst_dir_basename` Compare these files from HEAD to master version before + the branch point. + + This is used to check what changes were made to files modified by + both branches. + """ + hlitauti.report_task() + _ = ctx + # Check that the integration branches are in the expected state. + _dassert_current_dir_matches(src_dir_basename) + # When we integrate a dir that doesn't exist in the dst branch, we need to + # skip the check for existence. + check_exists = False + src_dir_basename, dst_dir_basename = _resolve_src_dst_names( + src_dir_basename, dst_dir_basename, subdir, check_exists=check_exists + ) + _dassert_is_integration_branch(src_dir_basename) + _dassert_is_integration_branch(dst_dir_basename) + _clean_both_integration_dirs(src_dir_basename, dst_dir_basename) + # Find the files modified in both branches. + src_hash = hgit.get_branch_hash(src_dir_basename) + _LOG.info("src_hash=%s", src_hash) + dst_hash = hgit.get_branch_hash(dst_dir_basename) + _LOG.info("dst_hash=%s", dst_hash) + diff_files1 = os.path.abspath("./tmp.files_modified1.txt") + diff_files2 = os.path.abspath("./tmp.files_modified2.txt") + cmd = f"cd {src_dir_basename} && git diff --name-only {src_hash} HEAD >{diff_files1}" + hsystem.system(cmd) + cmd = f"cd {dst_dir_basename} && git diff --name-only {dst_hash} HEAD >{diff_files2}" + hsystem.system(cmd) + common_files = "./tmp.common_files.txt" + cmd = f"comm -12 {diff_files1} {diff_files2} >{common_files}" + hsystem.system(cmd) + # Get the base files to diff. + files = hio.from_file(common_files).split("\n") + files = [f for f in files if f != ""] + _LOG.info("Found %d files to diff:\n%s", len(files), "\n".join(files)) + # Retrieve the original file and create the diff command. + script_txt = [] + for src_file in files: + hdbg.dassert_file_exists(src_file) + # TODO(gp): Add function to add a suffix to a name, using + # os.path.dirname(), os.path.basename(), os.path.split_extension(). + dst_file = src_file.replace(".py", ".base.py") + # Save the base file. + cmd = f"git show {src_hash}:{src_file} >{dst_file}" + rc = hsystem.system(cmd, abort_on_error=False) + if rc == 0: + # The file was created: nothing to do. + pass + elif rc == 128: + # Note that the file potentially could not exist, i.e., it was added + # in the branch. In this case Git returns: + # ``` + # rc=128 fatal: path 'dataflow/pipelines/real_time/test/ + # test_dataflow_pipelines_real_time_pipeline.py' exists on disk, but + # not in 'ce54877016204315766e90df7c45192bec1fbf20' + src_file = "/dev/null" + else: + raise ValueError(f"cmd='{cmd}' returned {rc}") + # Update the script to diff. + script_txt.append(f"vimdiff {dst_file} {src_file}") + # Save the script to compare. + script_file_name = "./tmp.vimdiff_overlapping_files.sh" + script_txt = "\n".join(script_txt) + hio.create_executable_script(script_file_name, script_txt) + print(f"# To diff against the base run:\n> {script_file_name}") + + +# ############################################################################# + + +def _infer_dst_file_path( + src_file_path: str, + *, + default_src_dir_basename: str = DEFAULT_SRC_DIR_BASENAME, + default_dst_dir_basename: str = DEFAULT_DST_DIR_BASENAME, + check_exists: bool = True, +) -> Tuple[str, str]: + """ + Convert a file path across two dirs with the same data structure. + + E.g., + `.../src/cmamp1/.../test_data_snapshots/alpha_numeric_data_snapshots` + is converted into + `.../src/amp1/.../test_data_snapshots/alpha_numeric_data_snapshots` + """ + _LOG.debug(hprint.to_str("src_file_path")) + src_file_path = os.path.normpath(src_file_path) + if check_exists: + hdbg.dassert_path_exists(src_file_path) + # Extract the repo dir name, by looking for one of the default basenames. + target_dir = f"/{default_dst_dir_basename}/" + idx = src_file_path.find(target_dir) + if idx >= 0: + src_dir_basename = default_dst_dir_basename + dst_dir_basename = default_src_dir_basename + subdir = src_file_path[idx + len(target_dir) :] + else: + target_dir = f"/{default_src_dir_basename}/" + idx = src_file_path.find(target_dir) + if idx >= 0: + src_dir_basename = default_src_dir_basename + dst_dir_basename = default_dst_dir_basename + subdir = src_file_path[idx + len(target_dir) :] + else: + raise ValueError( + f"Can't find either '{default_src_dir_basename}' or " + f"'{default_dst_dir_basename}' in file_path=" + f"'{src_file_path}'" + ) + # Replace src dir (e.g., `cmamp1`) with dst dir (e.g., `amp1`). + dst_file_path = src_file_path.replace( + f"/{src_dir_basename}/", f"/{dst_dir_basename}/" + ) + _LOG.debug(hprint.to_str("dst_file_path subdir")) + if check_exists: + hdbg.dassert_path_exists(dst_file_path) + return dst_file_path, subdir + + +@task +def integrate_rsync( # type: ignore + ctx, + src_dir, + src_dir_basename=DEFAULT_SRC_DIR_BASENAME, + dst_dir_basename=DEFAULT_DST_DIR_BASENAME, + dst_dir="", + check_dir=True, + dry_run=False, +): + """ + Use `rsync` to bring two dirs to sync. + + E.g., + ``` + > invoke integrate_diff_dirs + ... + ... Only in .../cmamp1/.../alpha_numeric_data_snapshots: alpha + ... Only in .../amp1/.../alpha_numeric_data_snapshots: latest + + # Accept the `cmamp1` side vs the `amp1` side with: + > invoke integrate_rsync .../cmamp1/.../alpha_numeric_data_snapshots/ + ``` + + :param src_dir: dir to be used. If empty, it is inferred from file_name + :param dst_dir: dir to be used. If empty, it is inferred from file_name + :param check_dir: force checking that src_dir and dst_dir are valid + integration dirs + :param dry_run: print the system command instead of executing them + """ + hlitauti.report_task() + _ = ctx + src_dir = os.path.normpath(src_dir) + hdbg.dassert_path_exists(src_dir) + _LOG.info(hprint.to_str("src_dir")) + if check_dir: + _dassert_is_integration_branch(src_dir) + # Resolve the dst dir. + if dst_dir == "": + dst_dir, _ = _infer_dst_file_path( + src_dir, + default_src_dir_basename=src_dir_basename, + default_dst_dir_basename=dst_dir_basename, + ) + if check_dir: + _dassert_is_integration_branch(dst_dir) + dst_dir = os.path.normpath(dst_dir) + hdbg.dassert_path_exists(dst_dir) + _LOG.info(hprint.to_str("dst_dir")) + # + _LOG.info("Syncing:\n'%s'\nto\n'%s'", src_dir, dst_dir) + cmd = f"rsync --delete -a -r {src_dir}/ {dst_dir}/" + hsystem.system(cmd, log_level=logging.INFO, dry_run=dry_run) + + +@task +def integrate_file( # type: ignore + ctx, + file_name, + src_dir_basename=DEFAULT_SRC_DIR_BASENAME, + dst_dir_basename=DEFAULT_DST_DIR_BASENAME, + dry_run=False, +): + """ + Diff corresponding files in two different repos. + + ``` + # The path is assumed referred to current dir. + > i integrate_file --file-name helpers/lib_tasks_integrate.py + + > i integrate_file --file-name /Users/saggese/src/kaizenflow1/helpers/lib_tasks_integrate.py + + > i integrate_file \ + --file-name helpers/lib_tasks_integrate.py \ + --src-dir-name cmamp1 + --dst-dir-name kaizenflow1 + ``` + + :param file_name: it can be a full path (e.g., + `/Users/saggese/src/kaizenflow1/helpers/lib_tasks_integrate.py`) + or a relative path to the root of the Git repo (e.g., + `helpers/lib_tasks_integrate.py) + :param dst_dir: dir to be used. If empty, it is inferred from file_name + :param check_dir: force checking that src_dir and dst_dir are valid + integration dirs + :param dry_run: print the system command instead of executing them + """ + hlitauti.report_task() + _ = ctx + file_name = os.path.normpath(file_name) + hdbg.dassert_file_exists(file_name) + # If the file is in the current dir, we need to prepend the dir name. + if not file_name.startswith("/"): + file_name = os.path.join(os.getcwd(), file_name) + _LOG.info(hprint.to_str("file_name")) + # Resolve the src / dst dir, if needed. + dst_file_name, _ = _infer_dst_file_path( + file_name, + default_src_dir_basename=src_dir_basename, + default_dst_dir_basename=dst_dir_basename, + ) + _LOG.info(hprint.to_str("file_name dst_file_name")) + # + _LOG.info("Syncing:\n'%s'\nto\n'%s'", file_name, dst_file_name) + cmd = f"vimdiff {file_name} {dst_file_name}" + # We need to use `system` to get vimdiff to connect to stdin and stdout. + if not dry_run: + # hlitauti.run(ctx, cmd, dry_run=dry_run, print_cmd=True) + os.system(cmd) + + +# Compare the timestamp of last modification of a file. +# FILE=helpers/lib_tasks_git.py; (cd ~/src/cmamp1; git log -1 $FILE); (cd ~/src/kaizenflow1; git log -1 $FILE) + +# > git log --pretty=format:"%h - %an, %ad : %s" --date=short | grep _Integrate_ | head -5 +# fffa1c8b2 - GP Saggese, 2023-06-30 : AmpTask1786_Integrate_20230627_7 (#367) +# 5a05a0c94 - GP Saggese, 2023-06-29 : AmpTask1786_Integrate_20230627_6 (#365) +# 6c3ad7d87 - GP Saggese, 2023-06-29 : AmpTask1786_Integrate_20230627_5 (#364) +# 36abfd8b3 - GP Saggese, 2023-06-28 : AmpTask1786_Integrate_20230627_3 (#361) +# 65fe42d38 - GP Saggese, 2023-06-28 : AmpTask1786_Integrate_20230627_2 (#360) + +# In Sorr +# GIT_INTEGR_HASH=fffa1c8b2 +# fffa1c8b2 - GP Saggese, 2023-06-30 : AmpTask1786_Integrate_20230627_7 (#367) + +# In cmamp +# 20526ed09 - GP Saggese, 2023-08-10 : AmpTask1786_Integrate_20230810_2 (#5011) + +# Show files changed since an integration point +# > git diff --name-only $GIT_INTEGR_HASH dataflow_amp +# dataflow_amp/system/mock1/test/test_mock1_forecast_system.py + +# Show the difference since an integration point +# git difftool $GIT_INTEGR_HASH.. dataflow_amp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py new file mode 100644 index 000000000..8fe792c97 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py @@ -0,0 +1,444 @@ +""" +Import as: + +import helpers.lib_tasks_lint as hlitalin +""" + +import datetime +import filecmp +import logging +import os + +from invoke.tasks import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.lib_tasks_docker as hlitadoc +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +# ############################################################################# +# Linter. +# ############################################################################# + + +@task +def lint_check_python_files_in_docker( # type: ignore + ctx, + python_compile=True, + python_execute=True, + modified=False, + branch=False, + last_commit=False, + all_=False, + files="", +): + """ + Compile and execute Python files checking for errors. + + This is supposed to be run inside Docker. + + The params have the same meaning as in `_get_files_to_process()`. + """ + hlitauti.report_task() + _ = ctx + # We allow to filter through the user specified `files`. + mutually_exclusive = False + remove_dirs = True + file_list = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files, + mutually_exclusive, + remove_dirs, + ) + _LOG.debug("Found %d files:\n%s", len(file_list), "\n".join(file_list)) + # Filter keeping only Python files. + _LOG.debug("Filtering for Python files") + exclude_paired_jupytext = True + file_list = hio.keep_python_files(file_list, exclude_paired_jupytext) + _LOG.debug("file_list=%s", "\n".join(file_list)) + _LOG.info("Need to process %d files", len(file_list)) + if not file_list: + _LOG.warning("No files were selected") + # Scan all the files. + failed_filenames = [] + for file_name in file_list: + _LOG.info("Processing '%s'", file_name) + if python_compile: + import compileall + + success = compileall.compile_file(file_name, force=True, quiet=1) + _LOG.debug("file_name='%s' -> python_compile=%s", file_name, success) + if not success: + msg = f"'{file_name}' doesn't compile correctly" + _LOG.error(msg) + failed_filenames.append(file_name) + # TODO(gp): Add also `python -c "import ..."`, if not equivalent to `compileall`. + if python_execute: + cmd = f"python {file_name}" + rc = hsystem.system(cmd, abort_on_error=False, suppress_output=False) + _LOG.debug("file_name='%s' -> python_compile=%s", file_name, rc) + if rc != 0: + msg = f"'{file_name}' doesn't execute correctly" + _LOG.error(msg) + failed_filenames.append(file_name) + hprint.log_frame( + _LOG, + f"failed_filenames={len(failed_filenames)}", + verbosity=logging.INFO, + ) + _LOG.info("\n".join(failed_filenames)) + error = len(failed_filenames) > 0 + return error + + +@task +def lint_check_python_files( # type: ignore + ctx, + python_compile=True, + python_execute=True, + modified=False, + branch=False, + last_commit=False, + all_=False, + files="", +): + """ + Compile and execute Python files checking for errors. + + The params have the same meaning as in `_get_files_to_process()`. + """ + _ = ( + python_compile, + python_execute, + modified, + branch, + last_commit, + all_, + files, + ) + # Execute the same command line but inside the container. E.g., + # /Users/saggese/src/venv/amp.client_venv/bin/invoke lint_docker_check_python_files --branch + cmd_line = hdbg.get_command_line() + # Replace the full path of invoke with just `invoke`. + cmd_line = cmd_line.split() + cmd_line = ["/venv/bin/invoke lint_check_python_files_in_docker"] + cmd_line[ + 2: + ] + docker_cmd_ = " ".join(cmd_line) + cmd = f'invoke docker_cmd --cmd="{docker_cmd_}"' + hlitauti.run(ctx, cmd) + + +def _get_lint_docker_cmd( + base_image: str, + docker_cmd_: str, + stage: str, + version: str, + *, + use_entrypoint: bool = True, +) -> str: + """ + Create a command to run in Linter service. + + :param docker_cmd_: command to run + :param stage: the image stage to use + :return: the full command to run + """ + if base_image == "": + base_path = os.environ["CSFY_ECR_BASE_PATH"] + # Get an image to run the linter on. + linter_image = f"{base_path}/helpers" + else: + linter_image = base_image + _LOG.debug(hprint.to_str("linter_image")) + # Execute command line. + cmd: str = hlitadoc._get_docker_compose_cmd( + linter_image, + stage, + version, + docker_cmd_, + use_entrypoint=use_entrypoint, + ) + return cmd + + +@task +def lint_detect_cycles( # type: ignore + ctx, + dir_name=".", + stage="prod", + version="", + out_file_name="lint_detect_cycles.output.txt", + debug_tool=False, +): + """ + Detect cyclic imports in the directory files. + + For param descriptions, see `lint()`. + + :param dir_name: the name of the dir to detect cyclic imports in + - By default, the check will be carried out in the dir from where + the task is run + :param debug_tool: print the output of the cycle detector + """ + hlitauti.report_task() + # Remove the log file. + if os.path.exists(out_file_name): + cmd = f"rm {out_file_name}" + hlitauti.run(ctx, cmd) + # Prepare the command line. + docker_cmd_opts = [dir_name] + if debug_tool: + docker_cmd_opts.append("-v DEBUG") + docker_cmd_ = ( + "$(find -wholename '*import_check/detect_import_cycles.py') " + + hlitauti._to_single_line_cmd(docker_cmd_opts) + ) + # Execute command line. + base_image = "" + cmd = _get_lint_docker_cmd(base_image, docker_cmd_, stage, version) + # Use `PIPESTATUS` otherwise the exit status of the pipe is always 0 + # because writing to a file succeeds. + cmd = f"({cmd}) 2>&1 | tee -a {out_file_name}; exit $PIPESTATUS" + # Run. + hlitauti.run(ctx, cmd) + + +# pylint: disable=line-too-long +@task +def lint( # type: ignore + ctx, + base_image="", + stage="prod", + version="", + files="", + from_file="", + skip_files="", + dir_name="", + modified=False, + last_commit=False, + branch=False, + # It needs to be a string to allow the user to specify "serial". + num_threads="serial", + only_format=False, + only_check=False, +): + """ + Lint files. + + ``` + # To lint specific files: + > i lint --files="dir1/file1.py dir2/file2.md" + + # To lint the files changed in the last commit, excluding specific files: + > i lint --last-commit --skip-files="dir1/file1.py dir2/file2.md" + + # To lint all the files in the current dir using only formatting actions: + > i lint --dir-name . --only-format + + # To lint the files modified in the current git client: + > i lint --modified + + # To exclude certain paths from linting: + > i lint --files="$(find . -name '*.py' -not -path './compute/*' -not -path './amp/*')" + ``` + + :param stage: the image stage to use (e.g., "prod", "dev", "local") + :param version: the version of the container to use + :param files: specific files to lint (e.g. "dir1/file1.py dir2/file2.md") + :param from_file: specific file storing files to lint + :param skip_files: specific files to skip during linting (e.g. "dir1/file1.py dir2/file2.md") + :param dir_name: name of the dir where all files should be linted + :param modified: lint the files modified in the current git client + :param last_commit: lint the files modified in the previous commit + :param branch: lint the files modified in the current branch w.r.t. master + :param num_threads: number of threads to use ("serial", -1, 0, 1, 2, ...) + :param only_format: run only the modifying actions of Linter (e.g., black) + :param only_check: run only the non-modifying actions of Linter (e.g., pylint) + """ + # Check if the user is in a repo root. + hdbg.dassert( + hgit.is_cwd_git_repo(), + msg="Linter should run from repo root", + ) + hlitauti.report_task() + # Prepare the command line. + lint_cmd_opts = [] + # Add the file selection argument. + hdbg.dassert_eq( + int(len(files) > 0) + + int(len(from_file) > 0) + + int(len(dir_name) > 0) + + int(modified) + + int(last_commit) + + int(branch), + 1, + msg="Specify exactly one among --files, --from_file, --dir-name, " + "--modified, --last-commit, --branch", + ) + if len(files) > 0: + lint_cmd_opts.append(f"--files {files}") + elif len(from_file) > 0: + lint_cmd_opts.append(f"--from_file {from_file}") + elif len(dir_name) > 0: + lint_cmd_opts.append(f"--dir_name {dir_name}") + elif modified: + lint_cmd_opts.append("--modified") + elif last_commit: + lint_cmd_opts.append("--last_commit") + elif branch: + lint_cmd_opts.append("--branch") + else: + raise ValueError("No file selection arguments are specified") + if len(skip_files) > 0: + lint_cmd_opts.append(f"--skip_files {skip_files}") + # + lint_cmd_opts.append(f"--num_threads {num_threads}") + # Add the action selection argument, if needed. + hdbg.dassert_lte( + int(only_format) + int(only_check), + 1, + msg="Specify only one among --only-format, --only-check", + ) + if only_format: + lint_cmd_opts.append("--only_format") + elif only_check: + lint_cmd_opts.append("--only_check") + else: + _LOG.info("All Linter actions selected") + # Compose the command line. + if hserver.is_host_mac(): + find_cmd = "$(find . -path '*linters/base.py')" + else: + find_cmd = "$(find -wholename '*linters/base.py')" + lint_cmd_ = find_cmd + " " + hlitauti._to_single_line_cmd(lint_cmd_opts) + docker_cmd_ = _get_lint_docker_cmd( + base_image, lint_cmd_, stage=stage, version=version + ) + # Run. + hlitauti.run(ctx, docker_cmd_) + + +@task +def lint_check_if_it_was_run(ctx): # type: ignore + """ + Check if the linter was run in the current branch. + + - abort the task with error if the files were modified + """ + hlitauti.report_task() + # Check if the files were modified. + hgit.is_client_clean(abort_if_not_clean=True) + + +@task +def lint_create_branch(ctx, dry_run=False): # type: ignore + """ + Create the branch for linting in the current dir. + + The dir needs to be specified to ensure the set-up is correct. + """ + hlitauti.report_task() + # + date = datetime.datetime.now().date() + date_as_str = date.strftime("%Y%m%d") + branch_name = f"AmpTask1955_Lint_{date_as_str}" + # query_yes_no("Are you sure you want to create the branch '{branch_name}'") + _LOG.info("Creating branch '%s'", branch_name) + cmd = f"invoke git_branch_create -b '{branch_name}'" + hlitauti.run(ctx, cmd, dry_run=dry_run) + + +@task +def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): # type: ignore + """ + Sync code needed to run linter / ai_review from a Git client to the current one. + + :param git_client_name: the name of the Git client to sync from. It can be + something like "helpers1" and it will be used from "$HOME/src" or can + be a full path. + :param revert_to_original: if `True`, revert the changes to the original + """ + _ = ctx + hlitauti.report_task() + # Copy the code from the src Git client to the current one. + src_git_dir = hgit.resolve_git_client_dir(git_client_name) + # + files_to_copy = [ + # "hgit.py", + # "hmarkdown.py", + "llm_prompts.py", + "llm_transform.py", + "inject_todos.py", + "all.coding_style_guidelines.reference.md", + ] + # Revert the files in the current git client to the original code. + if revert_to_original: + _LOG.debug("Reverting to original code ...") + for file_name in files_to_copy: + _LOG.debug("Reverting %s to original code", file_name) + src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) + git_root_dir = hgit.find_git_root(src_git_dir) + src_file_path = os.path.relpath(src_file_path, git_root_dir) + cmd = "git checkout -- " + src_file_path + hsystem.system(cmd) + _LOG.info("Done") + return + # Get the path to the helpers repo. + src_helpers_dir = hgit.find_helpers_root(src_git_dir) + hdbg.dassert_ne(src_helpers_dir, "") + hdbg.dassert_dir_exists(src_helpers_dir) + # + dst_helpers_dir = hgit.find_helpers_root() + hdbg.dassert_dir_exists(dst_helpers_dir) + _LOG.debug(hprint.to_str("src_helpers_dir dst_helpers_dir")) + # + _LOG.info( + "Copying files from '%s' to '%s' ...", src_helpers_dir, dst_helpers_dir + ) + # Find the files to copy. + for file_name in files_to_copy: + _LOG.debug(hprint.to_str("file_name")) + # Get the path to the file in the src Git client. + src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) + src_file_path = os.path.abspath(os.path.join(src_git_dir, src_file_path)) + _LOG.debug(hprint.to_str("src_file_path")) + hdbg.dassert_file_exists(src_file_path) + # Get the path to the file in the dst Git client. + dst_file_path = hgit.project_file_name_in_git_client( + src_file_path, src_helpers_dir, dst_helpers_dir + ) + _LOG.debug(hprint.to_str("dst_file_path")) + # Copy the file. + _LOG.debug(hprint.to_str("src_file_path dst_file_path")) + dir_name = os.path.dirname(dst_file_path) + # Check that the files are different. + if os.path.exists(src_file_path) and os.path.isdir(dst_file_path): + if filecmp.cmp(src_file_path, dst_file_path, shallow=False): + _LOG.info( + "File '%s' is identical to '%s', skipping", + src_file_path, + dst_file_path, + ) + continue + # Copy the file. + hio.create_dir(dir_name, incremental=True) + cmd = f"cp -f {src_file_path} {dst_file_path}" + _LOG.debug(hprint.to_str("cmd")) + _LOG.info("Copying file '%s' to '%s' ...", src_file_path, dst_file_path) + hsystem.system(cmd) + _LOG.info("Done") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py new file mode 100644 index 000000000..215820d4d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py @@ -0,0 +1,380 @@ +""" +Import as: + +import helpers.lib_tasks_perms as hlitaper +""" + +import grp +import logging +import os +import pwd +import stat +from typing import Dict, List, Tuple + +import tqdm +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# ############################################################################# +# Fix permission +# ############################################################################# + + +# The desired invariants are that all files +# 1) are owned by our user or by Docker user +# 2) have the shared group as group +# 3) have the same user and group permissions + +# E.g., +# -rw-rw-r-- 1 sasm sasm-fileshare 21877 Nov 3 18:11 pytest_logger.log + +# The possible problems are: +# -r--r--r-- 1 sasm sasm-fileshare ./.git/objects/02/4df16f66c87bdfb +# -rw-r--r-- 1 265533 sasm-fileshare ./core_lime/dataflow/nodes/test/te +# -rw-rw-r-- 1 265533 sasm-fileshare ./research/real_time/notebooks/Lim + +# drwxr-sr-x 2 gsaggese sasm-fileshare 35 Oct 12 21:51 test +# chmod g=u amp/dev_scripts/git/git_hooks/test + + +def _save_dir_status(dir_name: str, filename: str) -> None: + cmd = f'find {dir_name} -name "*" | sort | xargs ls -ld >{filename}' + hsystem.system(cmd) + _LOG.info("Saved dir status in %s", filename) + + +# From https://stackoverflow.com/questions/1830618 +def _get_user_group(filename: str) -> Tuple[str, str]: + """ + Return the symbolic name of user and group of a file. + """ + uid = os.stat(filename).st_uid + try: + user = pwd.getpwuid(uid).pw_name + except KeyError as e: + # _LOG.warning("Error: ", str(e)) + _ = e + user = str(uid) + # + gid = os.stat(filename).st_gid + try: + group = grp.getgrgid(gid).gr_name + except KeyError as e: + _ = e + group = str(gid) + return user, group + + +def _find_files_for_user(dir_name: str, user: str, is_equal: bool) -> List[str]: + """ + Find all the files under `abs_dir` that are owned or not by `user`. + """ + _LOG.debug("") + mode = "\\!" if not is_equal else "" + cmd = f'find {dir_name} -name "*" {mode} -user "{user}"' + _, txt = hsystem.system_to_string(cmd) + files: List[str] = txt.split("\n") + return files + + +def _find_files_for_group( + dir_name: str, group: str, is_equal: bool +) -> List[str]: + """ + Find all the files under `abs_dir` that are owned by a group `group`. + """ + _LOG.debug("") + mode = "\\!" if not is_equal else "" + cmd = f'find {dir_name} -name "*" {mode} -group "{group}"' + _, txt = hsystem.system_to_string(cmd) + files: List[str] = txt.split("\n") + return files + + +def _compute_stats_by_user_and_group(dir_name: str) -> Tuple[Dict, Dict, Dict]: + """ + Scan all the files reporting statistics in terms of users and groups. + + It also compute a mapping from file to user and group. + """ + _LOG.debug("") + # Find all files. + cmd = f'find {dir_name} -name "*"' + _, txt = hsystem.system_to_string(cmd) + files = txt.split("\n") + # Get the user of each file. + user_to_files: Dict[str, List[str]] = {} + group_to_files: Dict[str, List[str]] = {} + file_to_user_group: Dict[str, Tuple[str, str]] = {} + for file in files: + user, group = _get_user_group(file) + # Update mapping from user to files. + if user not in user_to_files: + user_to_files[user] = [] + user_to_files[user].append(file) + # Update mapping from group to files. + if group not in group_to_files: + group_to_files[group] = [] + group_to_files[group].append(file) + # Update the mapping from file to (user, group). + hdbg.dassert_not_in(file, file_to_user_group) + file_to_user_group[file] = (user, group) + # Print stats. + txt1 = "" + for user, files in user_to_files.items(): + txt1 += f"{user}({len(files)}), " + _LOG.info("user=%s", txt1) + # + txt2 = "" + for group, files in group_to_files.items(): + txt2 += f"{group}({len(files)}), " + _LOG.info("group=%s", txt2) + return user_to_files, group_to_files, file_to_user_group + + +def _ls_l(files: List[str], size: int = 100) -> str: + """ + Run `ls -l` on the files using chunks of size `size`. + """ + txt = [] + for pos in range(0, len(files), size): + files_tmp = files[pos : pos + size] + files_tmp = [f"'{f}'" for f in files_tmp] + cmd = f"ls -ld {' '.join(files_tmp)}" + _, txt_tmp = hsystem.system_to_string(cmd) + txt.append(txt_tmp) + return "\n".join(txt) + + +def _exec_cmd_by_chunks( + cmd: str, files: List[str], abort_on_error: bool, size: int = 100 +) -> None: + """ + Execute `cmd` on files using chunks of size `size`. + """ + for pos in range(0, len(files), size): + files_tmp = files[pos : pos + size] + files_tmp = [f"'{f}'" for f in files_tmp] + cmd = f"{cmd} {' '.join(files_tmp)}" + hsystem.system(cmd, abort_on_error=abort_on_error) + + +def _print_problems(dir_name: str = ".") -> None: + """ + Do `ls -l` on files that are not owned by the current user and its group. + + This function is used for debugging. + """ + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + user = hsystem.get_user_name() + docker_user = hserver.get_docker_user() + # user_group = f"{user}_g" + # shared_group = hserver.get_docker_shared_group() + files_with_problems = [] + for file, (curr_user, curr_group) in file_to_user_group.items(): + _ = curr_user, curr_group + # Files owned by our user and + # if curr_user == user and curr_group == user_group: + # continue + if curr_user in (user, docker_user): + continue + # if curr_group == shared_group: + # continue + files_with_problems.append(file) + # + txt = _ls_l(files_with_problems) + print(txt) + + +def _change_file_ownership(file: str, abort_on_error: bool) -> None: + """ + Change ownership of files with an invalid user (e.g., 265533) by copying + and deleting. + """ + # pylint: disable=line-too-long + # > ls -l ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py + # -rw-r--r-- 1 265533 sasm-fileshare 14327 Nov 3 14:01 ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py + # + # > mv ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py{,.OLD} + # + # > cp ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py{.OLD,} + # + # > ls -l ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py + # -rw-r--r-- 1 gsaggese sasm-fileshare 14327 Nov 5 17:58 ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py + # + # > rm -rf ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py.OLD + # pylint: enable=line-too-long + hdbg.dassert_file_exists(file) + tmp_file = file + ".OLD" + # + cmd = f"mv {file} {tmp_file}" + hsystem.system(cmd, abort_on_error=abort_on_error) + # + cmd = f"cp {tmp_file} {file}" + hsystem.system(cmd, abort_on_error=abort_on_error) + # + cmd = f"rm -rf {tmp_file}" + hsystem.system(cmd, abort_on_error=abort_on_error) + + +def _fix_invalid_owner(dir_name: str, fix: bool, abort_on_error: bool) -> None: + """ + Fix files that are owned by a user that is not the current user or the + Docker one. + """ + _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) + # + _LOG.info("Before fix") + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + # + user = hsystem.get_user_name() + docker_user = hserver.get_docker_user() + for file, (curr_user, _) in tqdm.tqdm(file_to_user_group.items()): + if curr_user not in (user, docker_user): + _LOG.info("Fixing file '%s'", file) + hdbg.dassert_file_exists(file) + cmd = f"ls -l {file}" + hsystem.system( + cmd, abort_on_error=abort_on_error, suppress_output=False + ) + if fix: + _change_file_ownership(file, abort_on_error) + # + _LOG.info("After fix") + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + + +def _fix_group(dir_name: str, fix: bool, abort_on_error: bool) -> None: + """ + Ensure that all files are owned by the shared group. + """ + _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) + _LOG.info("Before fix") + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + if fix: + # Get the user and the group. + user = hsystem.get_user_name() + user_group = f"{user}_g" + shared_group = hserver.get_docker_shared_group() + # + for file, (curr_user, curr_group) in file_to_user_group.items(): + # If the group is the shared group there is nothing to do. + if curr_group == shared_group: + continue + cmd = f"chgrp {shared_group} {file}" + if curr_user == user: + # This is a paranoia check. + hdbg.dassert_eq(curr_group, user_group) + else: + # For files not owned by the current user, we need to `sudo`. + cmd = f"sudo -u {curr_user} {cmd}" + hsystem.system(cmd, abort_on_error=abort_on_error) + _LOG.info("After fix") + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + else: + _LOG.warning("Skipping fix") + + +def _fix_group_permissions(dir_name: str, abort_on_error: bool) -> None: + """ + Ensure that all files are owned by the shared group. + """ + _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + user = hsystem.get_user_name() + # docker_user = get_default_param("DOCKER_USER") + for file, (curr_user, curr_group) in tqdm.tqdm(file_to_user_group.items()): + _ = curr_group + st_mode = os.stat(file).st_mode + perms = oct(st_mode & 0o777) + # perms=0o775 + if perms[2] != perms[3]: + _LOG.debug("%s -> %s, %s", file, oct(st_mode), perms) + cmd = f"chmod g=u {file}" + if curr_user != user: + # For files not owned by the current user, we need to `sudo`. + cmd = f"sudo -u {curr_user} {cmd}" + hsystem.system(cmd, abort_on_error=abort_on_error) + is_dir = os.path.isdir(file) + if is_dir: + # pylint: disable=line-too-long + # From https://www.gnu.org/software/coreutils/manual/html_node/Directory-Setuid-and-Setgid.html + # If a directory + # inherit the same group as the directory, + # pylint: enable=line-too-long + has_set_group_id = st_mode & stat.S_ISGID + if not has_set_group_id: + cmd = f"chmod g+s {file}" + if curr_user != user: + # For files not owned by the current user, we need to `sudo`. + cmd = f"sudo -u {curr_user} {cmd}" + hsystem.system(cmd, abort_on_error=abort_on_error) + + +@task +def fix_perms( # type: ignore + ctx, dir_name=".", action="all", fix=True, abort_on_error=True +): + """ + :param action: + - `all`: run all the fixes + - `print_stats`: print stats about file users and groups + - `print_problems`: + - `fix_invalid_owner`: fix the files with an invalid owner (e.g., mysterious + 265533) + - `fix_group`: ensure that shared group owns all the files + - `fix_group_permissions`: ensure that the group permissions are the same + as the owner ones + """ + _ = ctx + hlitauti.report_task() + # + if hserver.is_dev4(): + if action == "all": + action = ["fix_invalid_owner", "fix_group", "fix_group_permissions"] + else: + action = [action] + # + file_name1 = "./tmp.fix_perms.before.txt" + _save_dir_status(dir_name, file_name1) + # + if "print_stats" in action: + _compute_stats_by_user_and_group(dir_name) + if "print_problems" in action: + _print_problems(dir_name) + if "fix_invalid_owner" in action: + _fix_invalid_owner(dir_name, fix, abort_on_error) + if "fix_group" in action: + _fix_group(dir_name, fix, abort_on_error) + if "fix_group_permissions" in action: + _fix_group_permissions(dir_name, abort_on_error) + # + file_name2 = "./tmp.fix_perms.after.txt" + _save_dir_status(dir_name, file_name2) + # + cmd = f"To compare run:\n> vimdiff {file_name1} {file_name2}" + print(cmd) + elif hserver.is_dev_csfy(): + user = hsystem.get_user_name() + group = user + cmd = f"sudo chown -R {user}:{group} *" + hsystem.system(cmd) + cmd = f"sudo chown -R {user}:{group} .pytest_cache" + hsystem.system(cmd, abort_on_error=False) + elif hserver.is_external_dev(): + # Nothing to do. + pass + else: + raise ValueError(f"Invalid machine {os.uname()[1]}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py new file mode 100644 index 000000000..512c09a60 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py @@ -0,0 +1,103 @@ +""" +Import as: + +import helpers.lib_tasks_print as hlitapri +""" + +import logging +import os +import re + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.henv as henv +import helpers.hgit as hgit +import helpers.hsystem as hsystem +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# ############################################################################# +# Set-up. +# ############################################################################# + + +@task +def print_setup(ctx): # type: ignore + """ + Print some configuration variables. + """ + hlitauti.report_task() + _ = ctx + var_names = "CSFY_ECR_BASE_PATH BASE_IMAGE".split() + for v in var_names: + print(f"{v}={hlitauti.get_default_param(v)}") + + +@task +def print_tasks(ctx, as_code=False): # type: ignore + """ + Print all the available tasks in `lib_tasks.py`. + + These tasks might be exposed or not by different. + + :param as_code: print as python code so that it can be embed in a + `from helpers.lib_tasks import ...` + """ + hlitauti.report_task() + _ = ctx + func_names = [] + lib_tasks_file_name = os.path.join( + hgit.get_amp_abs_path(), "helpers/lib_tasks.py" + ) + hdbg.dassert_file_exists(lib_tasks_file_name) + # TODO(gp): Use __file__ instead of hardwiring the file. + cmd = rf'\grep "^@task" -A 1 {lib_tasks_file_name} | grep def' + # def print_setup(ctx): # type: ignore + # def git_pull(ctx): # type: ignore + # def git_fetch_master(ctx): # type: ignore + _, txt = hsystem.system_to_string(cmd) + for line in txt.split("\n"): + _LOG.debug("line=%s", line) + m = re.match(r"^def\s+(\S+)\(", line) + if m: + func_name = m.group(1) + _LOG.debug(" -> %s", func_name) + func_names.append(func_name) + func_names = sorted(func_names) + if as_code: + print("\n".join([f"{fn}," for fn in func_names])) + else: + print("\n".join(func_names)) + + +@task +def print_env( + ctx, + repo_config=True, + server_config=True, + system_signature=True, + env_vars=True, +): # type: ignore + """ + Print the repo configuration. + """ + _ = ctx + print( + henv.env_to_str( + repo_config=repo_config, + server_config=server_config, + system_signature=system_signature, + env_vars=env_vars, + ) + ) + + +# TODO(gp): +# Print a CSV +# cat /share/data/cf_production/20221005/system_log_dir/process_forecasts/target_positions/20221005_153006.csv | column -t -s, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py new file mode 100644 index 000000000..98a9b203e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py @@ -0,0 +1,1743 @@ +""" +Import as: + +import helpers.lib_tasks_pytest as hlitapyt +""" + +import json +import logging +import os +import re +import sys +from typing import Any, List, Optional, Tuple + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hcoverage as hcovera +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hlist as hlist +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.htraceback as htraceb +import helpers.lib_tasks_docker as hlitadoc +import helpers.lib_tasks_lint as hlitalin +import helpers.lib_tasks_utils as hlitauti +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +# ############################################################################# +# Run tests. +# ############################################################################# + + +_COV_PYTEST_OPTS = [ + # Only compute coverage for current project and not venv libraries. + "--cov=.", + "--cov-branch", + # Report the missing lines. + # Name Stmts Miss Cover Missing + # ------------------------------------------------------------------------- + # myproj/__init__ 2 0 100% + # myproj/myproj 257 13 94% 24-26, 99, 149, 233-236, 297-298 + "--cov-report term-missing", + # Report data in the directory `htmlcov`. + "--cov-report html", + # "--cov-report annotate", +] + + +_TEST_TIMEOUTS_IN_SECS = { + "fast_tests": 5, + "slow_tests": 30, + "superslow_tests": 60 * 60, +} + + +_NUM_TIMEOUT_TEST_RERUNS = { + "fast_tests": 2, + "slow_tests": 1, + "superslow_tests": 1, +} + + +@task +def run_blank_tests(ctx, stage="dev", version=""): # type: ignore + """ + (ONLY CI/CD) Test that pytest in the container works. + """ + hlitauti.report_task() + _ = ctx + base_image = "" + cmd = '"pytest -h >/dev/null"' + docker_cmd_ = hlitadoc._get_docker_compose_cmd( + base_image, stage, version, cmd + ) + hsystem.system(docker_cmd_, abort_on_error=False, suppress_output=False) + + +def _select_tests_to_skip(test_list_name: str) -> str: + """ + Generate text for pytest specifying which tests to deselect. + """ + if test_list_name == "fast_tests": + skipped_tests = "not slow and not superslow" + elif test_list_name == "slow_tests": + skipped_tests = "slow and not superslow" + elif test_list_name == "superslow_tests": + skipped_tests = "not slow and superslow" + else: + raise ValueError(f"Invalid `test_list_name`={test_list_name}") + return skipped_tests + + +def _build_run_command_line( + test_list_name: str, + custom_marker: str, + pytest_opts: str, + skip_submodules: bool, + coverage: bool, + collect_only: bool, + tee_to_file: bool, + n_threads: str, + *, + allure_dir: Optional[str] = None, +) -> str: + """ + Build the pytest run command. + + E.g., + + ``` + pytest -m "optimizer and not slow and not superslow" \ + . \ + -o timeout_func_only=true \ + --timeout 5 \ + --reruns 2 \ + --only-rerun "Failed: Timeout" + ``` + + The rest of params are the same as in `run_fast_tests()`. + + The invariant is that we don't want to duplicate pytest options that can be + passed by the user through `-p` (unless really necessary). + + :param test_list_name: "fast_tests", "slow_tests" or + "superslow_tests" + :param custom_marker: specify a space separated list of + `pytest` markers to skip (e.g., `optimizer` for the optimizer + tests, see `pytest.ini`). Empty means no marker to skip + :param allure_dir: directory to save allure results to. If specified, allure + plugin will be installed on-the-fly and results will be generated + and saved to the specified directory + """ + hdbg.dassert_in( + test_list_name, _TEST_TIMEOUTS_IN_SECS, "Invalid test_list_name" + ) + pytest_opts = pytest_opts or "." + pytest_opts_tmp = [] + # Select tests to skip based on the `test_list_name` (e.g., fast tests) + # and on the custom marker, if present. + skipped_tests = _select_tests_to_skip(test_list_name) + timeout_in_sec = _TEST_TIMEOUTS_IN_SECS[test_list_name] + # Detect if we are running on a CK dev server / inside CI + # or a laptop outside the CK infra. + is_outside_ck_infra = ( + not hserver.is_dev_csfy() and not hserver.is_inside_ci() + ) + if is_outside_ck_infra: + timeout_multiplier = 10 + _LOG.warning( + f"Tests are running outside the CK server and CI, timeout increased {timeout_multiplier} times." + ) + # Since we are running outside the CK server we increase the duration + # of the timeout, since the thresholds are set for the CK server. + timeout_in_sec *= timeout_multiplier + if custom_marker != "": + pytest_opts_tmp.append(f'-m "{custom_marker} and {skipped_tests}"') + else: + pytest_opts_tmp.append(f'-m "{skipped_tests}"') + if pytest_opts: + pytest_opts_tmp.append(pytest_opts) + # Adding `timeout_func_only` is a workaround for + # https://github.com/pytest-dev/pytest-rerunfailures/issues/99. Because of + # it, we limit only run time, without setup and teardown time. + pytest_opts_tmp.append("-o timeout_func_only=true") + pytest_opts_tmp.append(f"--timeout {timeout_in_sec}") + num_reruns = _NUM_TIMEOUT_TEST_RERUNS[test_list_name] + pytest_opts_tmp.append( + f'--reruns {num_reruns} --only-rerun "Failed: Timeout"' + ) + if hserver.skip_submodules_test(): + # For some repos submodules should be skipped + # regardless of the passed value. + skip_submodules = True + if skip_submodules: + submodule_paths = hgit.get_submodule_paths() + _LOG.warning( + "Skipping %d submodules: %s", len(submodule_paths), submodule_paths + ) + pytest_opts_tmp.append( + " ".join([f"--ignore {path}" for path in submodule_paths]) + ) + if coverage: + pytest_opts_tmp.append(" ".join(_COV_PYTEST_OPTS)) + if collect_only: + _LOG.warning("Only collecting tests as per user request") + pytest_opts_tmp.append("--collect-only") + # Indicate the number of threads for parallelization. + if n_threads != "serial": + pytest_opts_tmp.append(f"-n {str(n_threads)}") + if allure_dir is not None: + pytest_opts_tmp.append(f"--alluredir={allure_dir}") + # Generate test report. + pytest_opts_tmp.append("--junit-xml=tmp.junit.xml") + # Add runnable dir image name to the test report. + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + pytest_opts_tmp.append(f'-o junit_suite_name="{image_name}"') + # Concatenate the options. + _LOG.debug("pytest_opts_tmp=\n%s", str(pytest_opts_tmp)) + pytest_opts_tmp = [po for po in pytest_opts_tmp if po != ""] + # TODO(gp): Use to_multi_line_cmd() + pytest_opts = " ".join([po.rstrip().lstrip() for po in pytest_opts_tmp]) + cmd = f"pytest {pytest_opts}" + if allure_dir is not None: + # Install the `allure-pytest` before running the tests. This is needed + # to generate Allure results which serve as an input for generating + # Allure HTML reports. + # Excluding the command `"source /venv/bin/activate"` because post-activation, + # the `PATH` variable lacks necessary values, causing a failure in a test + # associated with `publish_notebook.py`. + cmd = f"sudo /venv/bin/pip install allure-pytest && {cmd}" + if tee_to_file: + cmd += f" 2>&1 | tee tmp.pytest.{test_list_name}.log" + return cmd + + +def _run_test_cmd( + ctx: Any, + stage: str, + version: str, + cmd: str, + coverage: bool, + collect_only: bool, + skip_pull: bool, + start_coverage_script: bool, + **ctx_run_kwargs: Any, +) -> Optional[int]: + """ + See params in `run_fast_tests()`. + """ + if collect_only: + # Clean files. + hlitauti.run(ctx, "rm -rf ./.coverage*") + # Run. + base_image = "" + # We need to add some " to pass the string as it is to the container. + cmd = f"'{cmd}'" + # We use "host" for the app container to allow access to the database + # exposing port 5432 on localhost (of the server), when running dind we + # need to switch back to bridge. See CmTask988. + extra_env_vars = ["NETWORK_MODE=bridge"] + docker_cmd_ = hlitadoc._get_docker_compose_cmd( + base_image, stage, version, cmd, extra_env_vars=extra_env_vars + ) + _LOG.info("cmd=%s", docker_cmd_) + # We can't use `hsystem.system()` because of buffering of the output, + # losing formatting and so on, so we stick to executing through `ctx`. + rc: Optional[int] = hlitadoc._docker_cmd( + ctx, docker_cmd_, skip_pull=skip_pull, **ctx_run_kwargs + ) + # Print message about coverage. + if coverage: + msg = """ + - The coverage results in textual form are above + + - To browse the files annotate with coverage, start a server (not from the + container): + > (cd ./htmlcov; python -m http.server 33333) + - Then go with your browser to `localhost:33333` to see which code is + covered + """ + msg = hprint.dedent(msg) + print(msg) + if start_coverage_script: + # Create and run a script to show the coverage in the browser. + script_txt = """ + (sleep 2; open http://localhost:33333) & + (cd ./htmlcov; python -m http.server 33333) + """ + script_txt = hprint.dedent(script_txt) + script_name = "./tmp.coverage.sh" + hio.create_executable_script(script_name, script_txt) + coverage_rc = hsystem.system(script_name) + if coverage_rc != 0: + _LOG.warning( + "Setting `rc` to `0` even though the coverage script fails." + ) + rc = 0 + return rc + + +def _run_tests( + ctx: Any, + test_list_name: str, + stage: str, + version: str, + custom_marker: str, + pytest_opts: str, + skip_pull: bool, + skip_submodules: bool, + coverage: bool, + collect_only: bool, + tee_to_file: bool, + n_threads: str, + git_clean_: bool, + *, + start_coverage_script: bool = False, + allure_dir: Optional[str] = None, + # TODO(Grisha): do we need to expose ctx kwargs to the invoke targets? + # E.g., to `run_fast_tests`. See CmTask3602 "All tests fail". + **ctx_run_kwargs: Any, +) -> Optional[int]: + """ + See params in `run_fast_tests()`. + """ + if git_clean_: + cmd = "invoke git_clean --fix-perms" + hlitauti.run(ctx, cmd) + # Build the command line. + cmd = _build_run_command_line( + test_list_name, + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + allure_dir=allure_dir, + ) + # Execute the command line. + rc = _run_test_cmd( + ctx, + stage, + version, + cmd, + coverage, + collect_only, + skip_pull, + start_coverage_script, + **ctx_run_kwargs, + ) + return rc + + +# TODO(Grisha): "Unit tests run_*_tests invokes" CmTask #1652. +@task +def run_tests( # type: ignore + ctx, + test_lists, + abort_on_first_error=False, + stage="dev", + version="", + custom_marker="", + pytest_opts="", + skip_pull=False, + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, + **kwargs, +): + """ + :param test_lists: comma separated list with test lists to run (e.g., `fast_test,slow_tests`) + :param abort_on_first_error: stop after the first test list failing + """ + results = [] + for test_list_name in test_lists.split(","): + rc = _run_tests( + ctx, + test_list_name, + stage, + version, + custom_marker, + pytest_opts, + skip_pull, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + warn=True, + allure_dir=allure_dir, + **kwargs, + ) + if rc != 0: + _LOG.error("'%s' tests failed", test_list_name) + if abort_on_first_error: + sys.exit(-1) + results.append((test_list_name, rc)) + # + rc = any(result[1] for result in results) + # Summarize the results. + _LOG.info("# Tests run summary:") + for test_list_name, rc in results: + if rc != 0: + _LOG.error("'%s' tests failed", test_list_name) + else: + _LOG.info("'%s' tests succeeded", test_list_name) + return rc + + +def _get_custom_marker( + *, + run_only_test_list: str = "", + skip_test_list: str = "", +) -> str: + """ + Get a custom pytest marker from comma-separated string representations of + test lists to run or skip. + + :param run_only_test_list: a string of comma-separated markers to + run, e.g. `run_only_test_list = + "requires_ck_infra,requires_aws"` + :param skip_test_list: a string of comma-separated markers to skip + :return: custom pytest marker + """ + # If we are running outside the CK server / CI, tests requiring CK infra + # should be automatically skipped. + is_outside_ck_infra = ( + not hserver.is_dev_csfy() and not hserver.is_inside_ci() + ) + # Skip tests that requires CK infra. + if is_outside_ck_infra: + _LOG.warning( + "Skipping the tests that require CK " + "infra when running outside the CK server / CI." + ) + if skip_test_list: + skip_test_list = "requires_ck_infra," + skip_test_list + else: + skip_test_list = "requires_ck_infra" + # Convert string representations of lists to actual lists. + if run_only_test_list: + # This works as expected when there is a single test in the list. + run_only_test_list_items = run_only_test_list.split(",") + _LOG.warning("Running only tests inside %s.", run_only_test_list_items) + else: + run_only_test_list_items = [] + if skip_test_list: + # This works as expected when there is a single test in the list. + skip_test_list_items = skip_test_list.split(",") + _LOG.warning("Skipping the tests inside %s.", skip_test_list_items) + else: + # The list can be empty when running inside CK infra. + skip_test_list_items = [] + # Convert marker strings for `pytest -m` using `and` and `not`. + run_only_marker_string = " and ".join(run_only_test_list_items) + skip_marker_string = " and ".join( + [("not " + item) for item in skip_test_list_items] + ) + if run_only_marker_string: + if skip_marker_string: + custom_marker = run_only_marker_string + " and " + skip_marker_string + else: + custom_marker = run_only_marker_string + else: + custom_marker = skip_marker_string + return custom_marker + + +# TODO(gp): Pass a test_list in fast, slow, ... instead of duplicating all the code CmTask #1571. +@task +def run_fast_tests( # type: ignore + ctx, + stage="dev", + version="", + pytest_opts="", + run_only_test_list="", + skip_test_list="", + skip_pull=False, + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run fast tests. check `gh auth status` before invoking to avoid auth + errors. + + :param stage: select a specific stage for the Docker image + :param pytest_opts: additional options for `pytest` invocation. It can be empty + :param run_only_test_list: select markers to run. Takes comma-separated tokens, + e.g. `--run_only_test_list = requires_ck_infra,requires_aws` + :param skip_test_list: select markers to skip. Takes comma-separated tokens. + :param skip_submodules: ignore all the dir inside a submodule + :param coverage: enable coverage computation + :param collect_only: do not run tests but show what will be executed + :param tee_to_file: save output of pytest in `tmp.pytest.log` + :param n_threads: the number of threads to run the tests with + - "auto": distribute the tests across all the available CPUs + :param git_clean_: run `invoke git_clean --fix-perms` before running the tests + :param allure_dir: directory to save allure results to. If specified, allure + plugin will be installed on-the-fly and results will be generated + and saved to the specified directory + """ + hlitauti.report_task() + hdbg.dassert( + not (run_only_test_list and skip_test_list), + "You can't specify both --run_only_test_list and --skip_test_list", + ) + test_list_name = "fast_tests" + # Convert cmd line marker lists to a pytest marker list. + custom_marker = _get_custom_marker( + run_only_test_list=run_only_test_list, skip_test_list=skip_test_list + ) + rc = _run_tests( + ctx, + test_list_name, + stage, + version, + custom_marker, + pytest_opts, + skip_pull, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir=allure_dir, + ) + return rc + + +@task +def run_slow_tests( # type: ignore + ctx, + stage="dev", + version="", + pytest_opts="", + run_only_test_list="", + skip_test_list="", + skip_pull=False, + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run slow tests. + + Same params as `invoke run_fast_tests`. + """ + hlitauti.report_task() + test_list_name = "slow_tests" + # Convert cmd line marker lists to a pytest marker list. + custom_marker = _get_custom_marker( + run_only_test_list=run_only_test_list, skip_test_list=skip_test_list + ) + rc = _run_tests( + ctx, + test_list_name, + stage, + version, + custom_marker, + pytest_opts, + skip_pull, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir=allure_dir, + ) + return rc + + +@task +def run_superslow_tests( # type: ignore + ctx, + stage="dev", + version="", + pytest_opts="", + run_only_test_list="", + skip_test_list="", + skip_pull=False, + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run superslow tests. + + Same params as `invoke run_fast_tests`. + """ + hlitauti.report_task() + test_list_name = "superslow_tests" + # Convert cmd line marker lists to a pytest marker list. + custom_marker = _get_custom_marker( + run_only_test_list=run_only_test_list, skip_test_list=skip_test_list + ) + rc = _run_tests( + ctx, + test_list_name, + stage, + version, + custom_marker, + pytest_opts, + skip_pull, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir=allure_dir, + ) + return rc + + +@task +def run_fast_slow_tests( # type: ignore + ctx, + abort_on_first_error=False, + stage="dev", + version="", + pytest_opts="", + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run fast and slow tests back-to-back. + + Same params as `invoke run_fast_tests`. + """ + hlitauti.report_task() + # Run fast tests but do not fail on error. + test_lists = "fast_tests,slow_tests" + custom_marker = "" + rc = run_tests( + ctx, + test_lists, + abort_on_first_error, + stage, + version, + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir, + ) + return rc + + +@task +def run_fast_slow_superslow_tests( # type: ignore + ctx, + abort_on_first_error=False, + stage="dev", + version="", + pytest_opts="", + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run fast, slow, superslow tests back-to-back. + + Same params as `invoke run_fast_tests`. + """ + hlitauti.report_task() + # Run fast tests but do not fail on error. + test_lists = "fast_tests,slow_tests,superslow_tests" + custom_marker = "" + rc = run_tests( + ctx, + test_lists, + abort_on_first_error, + stage, + version, + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir, + ) + return rc + + +@task +def run_qa_tests( # type: ignore + ctx, + stage="dev", + version="", +): + """ + Run QA tests independently. + + :param version: version to tag the image and code with + :param stage: select a specific stage for the Docker image + """ + hlitauti.report_task() + # + qa_test_fn = hlitauti.get_default_param("QA_TEST_FUNCTION") + # Run the call back function. + rc = qa_test_fn(ctx, stage, version) + if not rc: + msg = "QA tests failed" + _LOG.error(msg) + raise RuntimeError(msg) + + +# ############################################################################# +# Coverage report +# ############################################################################# + + +def _publish_html_coverage_report_on_s3(aws_profile: str) -> None: + """ + Publish HTML coverage report on S3 so that it can be accessed via browser. + + Target S3 dir is constructed from linux user and Git branch name, e.g. + `s3://...-html/html_coverage/grisha_CmTask1047_fix_tests`. + """ + # Build the dir name from user and branch name. + user = hsystem.get_user_name() + branch_name = hgit.get_branch_name() + _LOG.debug("User='%s', branch_name='%s'", user, branch_name) + s3_html_coverage_dir = f"{user}_{branch_name}" + # Get the full path to the dir. + s3_html_base_dir = "html_coverage" + s3_html_bucket_path = hrecouti.get_repo_config().get_html_bucket_path() + s3_html_coverage_path = os.path.join( + s3_html_bucket_path, s3_html_base_dir, s3_html_coverage_dir + ) + # Copy HTML coverage data from the local dir to S3. + local_coverage_path = "./htmlcov" + # TODO(Nikola): Revert to `s3fs_.put` after `s3fs` is updated to latest + # version. See CmTask #2400. + use_aws_copy = True + if use_aws_copy: + sudo_prefix = "" + if hserver.is_inside_ci(): + # There is no AWS config in GH action, thus create default one from + # chosen profile. To bypass permission errors, `sudo` is used. + sudo_prefix = "sudo " + aws_set_param_cmd = "sudo aws configure set" + aws_set_profile_cmd = f"--profile {aws_profile}" + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + # profile_prefix = aws_profile.upper() + profile_prefix = ( + "CSFY" + if aws_profile.upper() in ["AM", "CK"] + else aws_profile.upper() + ) + # Check if AWS session token is set in environment variable. + if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: + aws_set_value_pairs = [ + f"aws_access_key_id ${profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + f"aws_secret_access_key ${profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + f"aws_session_token ${profile_prefix}_AWS_SESSION_TOKEN", + f"region ${profile_prefix}_AWS_DEFAULT_REGION", + ] + else: + aws_set_value_pairs = [ + f"aws_access_key_id ${profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + f"aws_secret_access_key ${profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + f"region ${profile_prefix}_AWS_DEFAULT_REGION", + ] + aws_config_cmds = [ + f"{aws_set_param_cmd} {aws_set_value_pair} {aws_set_profile_cmd}" + for aws_set_value_pair in aws_set_value_pairs + ] + aws_config_pipe_cmd = " && ".join(aws_config_cmds) + hsystem.system(aws_config_pipe_cmd) + cp_cmd = ( + f"{sudo_prefix}aws s3 cp {local_coverage_path} {s3_html_coverage_path} " + f"--recursive --profile {aws_profile}" + ) + hsystem.system(cp_cmd) + else: + # Use `s3fs` to copy data to AWS S3. + s3fs_ = hs3.get_s3fs(aws_profile) + s3fs_.put(local_coverage_path, s3_html_coverage_path, recursive=True) + _LOG.info( + "HTML coverage report is published on S3: path=`%s`", + s3_html_coverage_path, + ) + + +@task +def run_coverage_report( # type: ignore + ctx, + target_dir, + generate_html_report=False, + publish_html_on_s3=True, + aws_profile="ck", +): + """ + Compute test coverage stats. + + The flow is: + - Run tests and compute coverage stats for each test type + - Combine coverage stats in a single file + - Generate a text report + - Generate a HTML report (optional) + - Post it on S3 (optional) + + :param target_dir: directory to compute coverage stats for. The value '.' + uses all the dirs in the current working directory + :param generate_html_report: whether to generate HTML coverage report or not + :param publish_html_on_s3: whether to publish HTML coverage report or not + :param aws_profile: the AWS profile to use for publishing HTML report + """ + # TODO(Grisha): allow user to specify which tests to run. + # Run fast tests for the target dir and collect coverage results. + fast_tests_cmd = f"invoke run_fast_tests --coverage -p {target_dir}" + hlitauti.run(ctx, fast_tests_cmd, use_system=False) + fast_tests_coverage_file = ".coverage_fast_tests" + create_fast_tests_file_cmd = f"mv .coverage {fast_tests_coverage_file}" + hsystem.system(create_fast_tests_file_cmd) + # Run slow tests for the target dir and collect coverage results. + slow_tests_cmd = f"invoke run_slow_tests --coverage -p {target_dir}" + hlitauti.run(ctx, slow_tests_cmd, use_system=False) + slow_tests_coverage_file = ".coverage_slow_tests" + create_slow_tests_file_cmd = f"mv .coverage {slow_tests_coverage_file}" + hsystem.system(create_slow_tests_file_cmd) + # Check that coverage files are present for both fast and slow tests. + hdbg.dassert_file_exists(fast_tests_coverage_file) + hdbg.dassert_file_exists(slow_tests_coverage_file) + # + report_cmd: List[str] = [] + # Clean the previous coverage results. For some docker-specific reasons + # command which combines stats does not work when being run first in + # the chain `bash -c "cmd1 && cmd2 && cmd3"`. So `erase` command which + # does not affect the coverage results was added as a workaround. + report_cmd.append("coverage erase") + # Merge stats for fast and slow tests into single dir. + report_cmd.append( + f"coverage combine --keep {fast_tests_coverage_file} {slow_tests_coverage_file}" + ) + # Specify the dirs to include and exclude in the report. + exclude_from_report = None + if target_dir == ".": + # Include all dirs. + include_in_report = "*" + if hserver.skip_submodules_test(): + # Exclude submodules. + submodule_paths = hgit.get_submodule_paths() + exclude_from_report = ",".join( + path + "/*" for path in submodule_paths + ) + else: + # Include only the target dir. + include_in_report = f"*/{target_dir}/*" + # Generate text report with the coverage stats. + report_stats_cmd = ( + f"coverage report --include={include_in_report} --sort=Cover" + ) + if exclude_from_report is not None: + report_stats_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_stats_cmd) + if generate_html_report: + # Generate HTML report with the coverage stats. + report_html_cmd = f"coverage html --include={include_in_report}" + if exclude_from_report is not None: + report_html_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_html_cmd) + # Execute commands above one-by-one inside docker. Coverage tool is not + # installed outside docker. + full_report_cmd = " && ".join(report_cmd) + docker_cmd_ = f"invoke docker_cmd --use-bash --cmd '{full_report_cmd}'" + hlitauti.run(ctx, docker_cmd_) + if publish_html_on_s3: + # Publish HTML report on S3. + _publish_html_coverage_report_on_s3(aws_profile) + + +def _get_inclusion_settings(target_dir: str) -> Tuple[str, Optional[str]]: + """ + Determine include/omit glob patterns for the coverage report for both text + and HTML coverage reports. + + :param target_dir: directory for coverage stats; use "." to indicate all directories + :return: glob pattern to include and a comma-separated glob pattern to omit + + Examples: + 1. Cover everything (no submodules to omit): + `_get_inclusion_settings(".")` -> `("*", "")` + + 2. Only cover code under a specific directory: + `_get_inclusion_settings("helpers")` -> `("*/helpers/*", None)` + + In `_run_coverage`: + - To cover the entire repo coverage (e.g. `helpers` project root): + `_get_inclusion_settings(".")` corresponds to + ``` + > coverage report --include=* --sort=Cover + > coverage html --include=* [--omit=submodule1/*,submodule2/*] + ``` + + - To cover a single-directory: + ` _get_inclusion_settings("helpers")` corresponds to: + ``` + > coverage report --include=*/helpers/* --sort=Cover + > coverage html --include=*/helpers/* [--omit=...] + ``` + """ + if target_dir == ".": + include_in_report = "*" + exclude_from_report = "" + if hserver.skip_submodules_test(): + submodule_paths: List[str] = hgit.get_submodule_paths() + exclude_from_report = ",".join( + f"{path}/*" for path in submodule_paths + ) + else: + include_in_report = f"*/{target_dir}/*" + exclude_from_report = None + return include_in_report, exclude_from_report + + +@task +def run_coverage(ctx, suite, target_dir=".", generate_html_report=False): # type: ignore + """ + Task to run coverage for any test suite. + + :param ctx: invoke context + :param suite: suite to run ("fast", "slow", "superslow") + :param target_dir: directory to measure coverage + """ + hdbg.dassert_in(suite, ("fast", "slow", "superslow")) + # Build the command line. + test_cmd_parts = [ + # Invoke the "_tests" task. + "invoke", + f"run_{suite}_tests", + # Enable coverage computation. + "--coverage", + # Specify which directory to test. + "-p", + target_dir, + ] + test_cmd = hlitauti.to_multi_line_cmd(test_cmd_parts) + # Run the tests under coverage. + hlitauti.run(ctx, test_cmd, use_system=False) + hdbg.dassert_file_exists(".coverage") + # Compute which files/dirs to include and omit in the report. + include_in_report, exclude_from_report = _get_inclusion_settings(target_dir) + report_cmd: List[str] = [ + # Reset any previous coverage data to avoid contamination. + "coverage erase" + ] + # Generate a text report, including only our target paths. + report_stats_cmd = ( + f"coverage report --include={include_in_report} --sort=Cover" + ) + if exclude_from_report: + report_stats_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_stats_cmd) + # Produce HTML output for interactive browsing. + if generate_html_report: + report_html_cmd = f"coverage html --include={include_in_report}" + if exclude_from_report: + report_html_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_html_cmd) + # Export XML coverage report to integrate with Codecov. + report_cmd.append("coverage xml -o coverage.xml") + full_report_cmd = " && ".join(report_cmd) + docker_cmd_ = f"invoke docker_cmd --use-bash --cmd '{full_report_cmd}'" + hlitauti.run(ctx, docker_cmd_) + + +@task +def run_coverage_subprocess(ctx, target_dir=".", generate_html_report=False): # type: ignore + """ + Run comprehensive coverage using subprocess mode with hcoverage injection + and direct coverage run. This function runs all tests (fast, slow, + superslow) to generate complete coverage. + + :param ctx: invoke context + :param target_dir: directory to measure coverage + :param generate_html_report: whether to generate HTML coverage + report or not + """ + _LOG.info("Running comprehensive test coverage with subprocess injection...") + # Inject coverage hooks. + hcovera.inject() + try: + # Setup coverage environment for subprocess. + hcovera.coverage_commands_subprocess() + # Clean any existing coverage data. + erase_cmd = "coverage erase" + hsystem.system(erase_cmd, abort_on_error=True) + # Build the coverage command with parallel mode - run all tests. + coverage_cmd = ["coverage", "run", "--parallel-mode", "-m", "pytest"] + # Add target directory. + coverage_cmd.append(target_dir) + test_cmd = hlitauti.to_multi_line_cmd(coverage_cmd) + _LOG.debug("About to run command: {test_cmd}") + # Run tests with coverage tracking directly. + hsystem.system(test_cmd, abort_on_error=True) + # Combine coverage data from subprocesses directly. + hcovera.coverage_combine() + hdbg.dassert_file_exists(".coverage") + include_in_report, exclude_from_report = _get_inclusion_settings( + target_dir + ) + include_in_report = include_in_report.replace("/./", "/").replace( + "//", "/" + ) + report_cmd: List[str] = [] + # Generate a text report, including only our target paths. + report_stats_cmd = ( + f"coverage report --include={include_in_report} --sort=Cover" + ) + if exclude_from_report: + exclude_from_report = exclude_from_report.replace( + "/./", "/" + ).replace("//", "/") + report_stats_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_stats_cmd) + if generate_html_report: + # Generate HTML report with the coverage stats. + report_html_cmd = f"coverage html --include={include_in_report}" + if exclude_from_report: + report_html_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_html_cmd) + # Export XML coverage report to integrate with Codecov. + report_cmd.append("coverage xml -o coverage.xml") + full_report_cmd = " && ".join(report_cmd) + # Run coverage report commands directly (avoid Docker-in-Docker issues). + hsystem.system(full_report_cmd, abort_on_error=True) + except Exception as e: + _LOG.error("Coverage with subprocess failed: %s", e) + raise + finally: + # Always cleanup coverage hooks. + hcovera.remove() + + +# ############################################################################# +# Traceback. +# ############################################################################# + + +# TODO(gp): Consolidate the code from dev_scripts_helpers/testing here. + + +@task +def traceback(ctx, log_name="tmp.pytest_script.txt", purify=True): # type: ignore + """ + Parse the traceback from Pytest and navigate it with vim. + + ``` + # Run a unit test. + > pytest helpers/test/test_traceback.py 2>&1 | tee tmp.pytest.log + > pytest.sh helpers/test/test_traceback.py + # Parse the traceback + > invoke traceback -i tmp.pytest.log + ``` + + :param log_name: the file with the traceback + :param purify: purify the filenames from client (e.g., from running inside Docker) + """ + hlitauti.report_task() + # + dst_cfile = "cfile" + hio.delete_file(dst_cfile) + # Convert the traceback into a cfile. + cmd = [] + cmd.append("traceback_to_cfile.py") + if log_name: + cmd.append(f"-i {log_name}") + cmd.append(f"-o {dst_cfile}") + # Purify the file names. + if purify: + cmd.append("--purify_from_client") + else: + cmd.append("--no_purify_from_client") + cmd = " ".join(cmd) + hlitauti.run(ctx, cmd) + # Read and navigate the cfile with vim. + if os.path.exists(dst_cfile): + cmd = 'vim -c "cfile cfile"' + hlitauti.run(ctx, cmd, pty=True) + else: + _LOG.warning("Can't find %s", dst_cfile) + + +# ############################################################################# +# pytest_clean +# ############################################################################# + + +@task +def pytest_clean(ctx): # type: ignore + """ + Clean pytest artifacts. + """ + hlitauti.report_task() + _ = ctx + import helpers.hpytest as hpytest + + hpytest.pytest_clean(".") + + +# ############################################################################# +# pytest_repro +# ############################################################################# + + +def _get_failed_tests_from_file(file_name: str) -> List[str]: + hdbg.dassert_file_exists(file_name) + txt = hio.from_file(file_name) + if file_name.endswith("/cache/lastfailed"): + # Decode the json-style string. + # { + # "vendors/test/test_vendors.py::Test_gp::test1": true, + # "vendors/test/test_vendors.py::Test_kibot_utils1::...": true, + # } + vals = json.loads(txt) + hdbg.dassert_isinstance(vals, dict) + tests = [k for k, v in vals.items() if v] + else: + # Extract failed tests from the regular text output. + tests = re.findall(r"FAILED (\S+\.py::\S+::\S+)\b", txt) + return tests + + +@task +def pytest_repro( # type: ignore + ctx, + mode="tests", + file_name="./.pytest_cache/v/cache/lastfailed", + show_stacktrace=False, + create_script=True, + script_name="./tmp.pytest_repro.sh", +): + """ + Generate commands to reproduce the failed tests after a `pytest` run. + + The workflow is: + ``` + # Run a lot of tests, e.g., the entire regression suite. + server> i run_fast_slow_tests 2>&1 | log pytest.txt + docker> pytest ... 2>&1 | log pytest.txt + + # Run the `pytest_repro` to summarize test failures and to generate + # commands to reproduce them. + server> i pytest_repro + ``` + + :param mode: the granularity level for generating the commands + - "tests" (default): failed test methods, e.g., + ``` + pytest helpers/test/test_cache.py::TestCachingOnS3::test_with_caching1 + pytest helpers/test/test_cache.py::TestCachingOnS3::test_with_caching2 + ``` + - "classes": classes of the failed tests, e.g., + ``` + pytest helpers/test/test_cache.py::TestCachingOnS3 + pytest helpers/test/test_cache.py::TestCachingOnS3_2 + ``` + - "files": files with the failed tests, e.g., + :param file_name: the name of the file containing the pytest output file to parse + :param show_stacktrace: whether to show the stacktrace of the failed tests + - only if it is available in the pytest output file + :param create_script: create a script to run the tests + :return: commands to reproduce pytest failures at the requested granularity level + """ + hlitauti.report_task() + _ = ctx + # Read file. + _LOG.info("Reading file_name='%s'", file_name) + hdbg.dassert_file_exists(file_name) + _LOG.info("Reading failed tests from file '%s'", file_name) + # E.g., vendors/test/test_vendors.py::Test_gp::test1 + tests = _get_failed_tests_from_file(file_name) + if len(tests) == 0: + _LOG.info("Found 0 failed tests") + return "" + _LOG.debug("tests=%s", str(tests)) + # Process the tests. + targets = [] + for test in tests: + data = test.split("::") + hdbg.dassert_lte(len(data), 3, "Can't parse '%s'", test) + # E.g., dev_scripts/testing/test/test_run_tests.py + # E.g., helpers/test/helpers/test/test_list.py::Test_list_1 + # E.g., core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5 + test_file_name = test_class = test_method = "" + if len(data) >= 1: + test_file_name = data[0] + if len(data) >= 2: + test_class = data[1] + if len(data) >= 3: + test_method = data[2] + _LOG.debug( + "test=%s -> (%s, %s, %s)", + test, + test_file_name, + test_class, + test_method, + ) + if mode == "tests": + targets.append(test) + elif mode == "files": + if test_file_name != "": + targets.append(test_file_name) + else: + _LOG.warning( + "Skipping test='%s' since test_file_name='%s'", + test, + test_file_name, + ) + elif mode == "classes": + if test_file_name != "" and test_class != "": + targets.append(f"{test_file_name}::{test_class}") + else: + _LOG.warning( + "Skipping test='%s' since test_file_name='%s', test_class='%s'", + test, + test_file_name, + test_class, + ) + else: + hdbg.dfatal(f"Invalid mode='{mode}'") + # Package the output. + # targets is a list of tests in the format + # `helpers/test/test_env.py::Test_env1::test_get_system_signature1`. + hdbg.dassert_isinstance(targets, list) + targets = hlist.remove_duplicates(targets) + targets = sorted(targets) + failed_test_output_str = ( + f"Found {len(targets)} failed pytest '{mode}' target(s); " + "to reproduce run:\n" + ) + res = [f"pytest {t}" for t in targets] + res = "\n".join(res) + failed_test_output_str += res + # + if show_stacktrace: + # Get the stacktrace block from the pytest output. + txt = hio.from_file(file_name) + if ( + "====== FAILURES ======" in txt + and "====== slowest 3 durations ======" in txt + ): + failures_blocks = txt.split("====== FAILURES ======")[1:] + failures_blocks = [ + x.split("====== slowest 3 durations ======")[0] + for x in failures_blocks + ] + txt = "\n".join([x.rstrip("=").lstrip("=") for x in failures_blocks]) + # Get the classes and names of the failed tests, e.g. + # "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5" -> + # -> "TestSmaModel.test5". + failed_test_names = [ + test.split("::")[1] + "." + test.split("::")[2] for test in tests + ] + tracebacks = [] + for name in failed_test_names: + # Get the stacktrace for the individual test failure. + # Its start is marked with the name of the test, e.g. + # "___________________ TestSmaModel.test5 ___________________". + start_block = "__ " + name + " __" + traceback_block = txt.rsplit(start_block, maxsplit=1)[-1] + end_block_options = [ + "__ " + n + " __" for n in failed_test_names if n != name + ] + for end_block in end_block_options: + # The end of the traceback for the current failed test is the + # start of the traceback for the next failed test. + if end_block in traceback_block: + traceback_block = traceback_block.split(end_block)[0] + _, traceback_ = htraceb.parse_traceback( + traceback_block, purify_from_client=False + ) + traceback_text = ( + traceback_.strip() if traceback_ is not None else "" + ) + tracebacks.append("\n".join(["# " + name, traceback_text, ""])) + # Combine the stacktraces for all the failures. + full_traceback = "\n\n" + "\n".join(tracebacks) + failed_test_output_str += full_traceback + res += full_traceback + _LOG.info("%s", failed_test_output_str) + if create_script: + # pytest \ + # amp/oms/test/test_portfolio.py::TestDatabasePortfolio2::test1 \ + # ... + # $* + script_txt = [] + # pytest or pytest_log + script_txt.append("pytest_log \\") + script_txt.extend([f" {t} \\" for t in targets]) + script_txt.append(" $*") + script_txt = "\n".join(script_txt) + msg = "To run the tests" + hio.create_executable_script(script_name, script_txt, msg=msg) + return res + + +# ############################################################################# +# pytest_rename_test +# ############################################################################# + + +@task +def pytest_rename_test(ctx, old_test_class_name, new_test_class_name): # type: ignore + """ + Rename the test and move its golden outcome. + + E.g., to rename a test class and all the test methods: + + :param old_test_class_name: old class name + :param new_test_class_name: new class name + """ + hlitauti.report_task() + _ = ctx + root_dir = os.getcwd() + # `lib_tasks` is used from outside the Docker container in the thin dev + # environment and we want to avoid pulling in too many dependencies, unless + # necessary, so we import dynamically. + import helpers.hunit_test_utils as hunteuti + + renamer = hunteuti.UnitTestRenamer( + old_test_class_name, new_test_class_name, root_dir + ) + renamer.run() + + +# ############################################################################# +# pytest_find_ununsed_goldens +# ############################################################################# + + +@task +def pytest_find_unused_goldens( # type: ignore + ctx, + dir_name=".", + stage="prod", + version="", + out_file_name="pytest_find_unused_goldens.output.txt", +): + """ + Detect mismatches between tests and their golden outcome files. + + - When goldens are required by the tests but the corresponding files + do not exist + - When the existing golden files are not actually required by the + corresponding tests + + :param dir_name: the head dir to start the check from + """ + hlitauti.report_task() + # Remove the log file. + if os.path.exists(out_file_name): + cmd = f"rm {out_file_name}" + hlitauti.run(ctx, cmd) + # Prepare the command line. + amp_abs_path = hgit.get_amp_abs_path() + amp_path = amp_abs_path.replace( + os.path.commonpath([os.getcwd(), amp_abs_path]), "" + ) + script_path = os.path.join( + amp_path, "dev_scripts/find_unused_golden_files.py" + ).lstrip("/") + docker_cmd_opts = [f"--dir_name {dir_name}"] + docker_cmd_ = f"{script_path} " + hlitauti._to_single_line_cmd( + docker_cmd_opts + ) + # Execute command line. + base_image = "" + cmd = hlitalin._get_lint_docker_cmd(base_image, docker_cmd_, stage, version) + cmd = f"({cmd}) 2>&1 | tee -a {out_file_name}" + # Run. + hlitauti.run(ctx, cmd) + + +# ############################################################################# +# pytest_compare_logs +# ############################################################################# + + +def _purify_log_file( + file_name: str, remove_line_numbers: bool, grep_regex: str +) -> str: + txt = hio.from_file(file_name) + # Remove leading `16:34:27`. + txt = re.sub(r"^\d\d:\d\d:\d\d ", "", txt, flags=re.MULTILINE) + # Remove references like `at 0x7f43493442e0`. + txt = re.sub(r"at 0x\S{12}", "at 0x", txt, flags=re.MULTILINE) + # Remove `done (0.014 s)`. + txt = re.sub(r"(done) \(\d+\.\d+ s\)", "\\1", txt, flags=re.MULTILINE) + # Remove wall_clock_time='2022-06-17 04:36:56.062645-04:00'. + txt = re.sub(r"(wall_clock_time=)'.*'", "\\1", txt, flags=re.MULTILINE) + # Remove `real_wall_clock_time = '2022-06-17 04:33:19.946025-04:00'`. + txt = re.sub(r"(real_wall_clock_time=)'.*'", "\\1", txt, flags=re.MULTILINE) + # Remove `tqdm [00:00<00:00, 4.05it/s]`. + txt = re.sub(r"(htqdm.py.*)\[.*\]", "\\1", txt, flags=re.MULTILINE) + # Remove `Task-3`. + txt = re.sub(r"(Task-)\d+", "\\1", txt, flags=re.MULTILINE) + # Remove line number, e.g., + # `htqdm.py abstract_market_data.py get_data_for_interval:259` + if remove_line_numbers: + txt = re.sub( + r"(\.py [a-zA-Z_][a-zA-Z0-9_]*):\d+ ", + "\\1:0 ", + txt, + flags=re.MULTILINE, + ) + # + if grep_regex: + lines = [] + for line in txt.split("\n"): + if re.search(grep_regex, line): + lines.append(line) + txt = "\n".join(lines) + return txt + + +@task +def pytest_compare_logs( # type: ignore + ctx, file1, file2, remove_line_numbers=False, grep_regex="", dry_run=False +): + """ + Diff two log files removing the irrelevant parts (e.g., timestamps, object + pointers). + + :param remove_line_numbers: remove line numbers from function calls + (e.g., `abstract_market_data.py get_data_for_interval:259` + :param grep_regex: select lines based on a regex + """ + suffix = "tmp" + # + txt = _purify_log_file(file1, remove_line_numbers, grep_regex) + file1_tmp = hio.add_suffix_to_filename(file1, suffix) + hio.to_file(file1_tmp, txt) + # + txt = _purify_log_file(file2, remove_line_numbers, grep_regex) + file2_tmp = hio.add_suffix_to_filename(file2, suffix) + hio.to_file(file2_tmp, txt) + # Save the script to compare. + script_file_name = "./tmp.vimdiff_log.sh" + script_txt = f"vimdiff {file1_tmp} {file2_tmp}" + msg = "To diff run:" + hio.create_executable_script(script_file_name, script_txt, msg=msg) + hlitauti.run(ctx, script_file_name, dry_run=dry_run, pty=True) + + +# ############################################################################# +# pytest_buildmeister +# ############################################################################# + + +def _run( + cmd: str, + *, + abort_on_error: bool = False, + output_file: Optional[str] = None, + tee: bool = False, +) -> int: + rc = hsystem.system( + cmd, + abort_on_error=abort_on_error, + suppress_output=False, + log_level="echo_frame", + output_file=output_file, + tee=tee, + ) + return rc + + +def _get_invoke_cmd_line(target: str, opts: str, pytest_opts: str) -> str: + """ + + :param opts: options to pass to invoke + """ + cmd = ["invoke"] + cmd.append(target) + if opts: + cmd.append(opts) + if pytest_opts: + cmd.append("--pytest-opts " + pytest_opts) + cmd.append("2>&1") + return " ".join(cmd) + + +def _run_cmd_and_tg(cmd: str, *args: Any, **kwargs: Any) -> None: + rc = _run(cmd, *args, **kwargs) + if rc != 0: + # pytest returns 5, if there are no tests to run. + # On error, send Telegram message. + cmd = "tg.py" + _run(cmd, abort_on_error=False) + + +@task +def pytest_buildmeister_check(ctx, print_output=False): # type: ignore + """ + + :param print_output: print content of the file with the output of the + buildmeister run + """ + _ = ctx + # Concat the files generated by `invoke pytest_...` + log_file = "bm.log.txt" + if os.path.exists(log_file): + cmd = f"rm -rf {log_file}" + _run(cmd) + log_file = "bm.log.txt" + cmd = 'cat $(find . -name "bm.log*.txt" | sort) >' + log_file + _run(cmd) + # + if print_output: + print(hprint.frame("Print output")) + cmd = f"cat {log_file}" + _run(cmd) + # Report failures using `invoke pytest_repro`. + print(hprint.frame("Failures")) + # "> sudo -u sasm rm ./tmp.pytest_repro.sh; i pytest_repro -f {log_file}" + if os.path.exists("./tmp.pytest_repro.sh"): + cmd = "sudo -u sasm rm ./tmp.pytest_repro.sh" + _run(cmd) + # + cmd = f"invoke pytest_repro -f {log_file}" + _run(cmd) + # Report failures using `grep`. + print(hprint.frame("grep Failures")) + cmd = f"grep '^FAILED' {log_file} | sort" + _run(cmd) + + +@task +def pytest_buildmeister( # type: ignore + ctx, opts="", pytest_opts="", docker_clean=False, test=False +): + """ + Run the regression tests. + + - Run updating all the tests + + :param docker_clean: remove all dead Docker instances + :param opts: options to pass to the invoke (e.g., `--version 1.2.0` to test + a specific version of the Docker container) + :param pytest_opts: options to pass to pytest + :param test: just run a single quick test to verify functionality of this + script + """ + _ = ctx + if test: + # For testing. + pytest_opts = "amp/dataflow/backtest/test/test_dataflow_backtest_utils.py::Test_get_configs_from_command_line_Amp1::test1" + if docker_clean: + cmd = "dev_scripts_lime/docker_clean.sh" + _run(cmd) + # Clean and sync. + cmd = "invoke git_clean -f" + _run(cmd) + # + cmd = "invoke git_pull" + _run(cmd) + # + log_file = "bm.log*txt" + if os.path.exists(log_file): + cmd = f"rm -rf {log_file}" + _run(cmd) + # + files_to_merge = [] + # + target = "run_fast_tests" + cmd = _get_invoke_cmd_line(target, opts, pytest_opts) + log_file = f"bm.log.{target}.txt" + files_to_merge.append(log_file) + cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" + cmd = f"bash -c '{cmd}'" + _run_cmd_and_tg(cmd) + # + cmd = "invoke fix_perms" + hsystem.system(cmd) + # + target = "run_slow_tests" + cmd = _get_invoke_cmd_line(target, opts, pytest_opts) + log_file = f"bm.log.{target}.txt" + files_to_merge.append(log_file) + cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" + cmd = f"bash -c '{cmd}'" + _run_cmd_and_tg(cmd) + # + cmd = "invoke fix_perms" + _run(cmd) + # + target = "run_superslow_tests" + log_file = f"bm.log.{target}.txt" + files_to_merge.append(log_file) + cmd = _get_invoke_cmd_line(target, opts, pytest_opts) + cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" + cmd = f"bash -c '{cmd}'" + _run_cmd_and_tg(cmd) + # + pytest_buildmeister_check(ctx) + + +# ############################################################################# +# pytest_collect_only +# ############################################################################# + + +@task +def pytest_collect_only(ctx): # type: ignore + _ = ctx + cmd = 'invoke docker_cmd --cmd "pytest --collect-only 2>&1"' + hsystem.system(cmd, suppress_output=False) + + +# ############################################################################# +# pytest_add_untracked_golden_outcomes +# ############################################################################# + + +@task +def pytest_add_untracked_golden_outcomes(ctx): # type: ignore + """ + Add the golden outcomes files that are not tracked under git. + """ + _ = ctx + cmd = 'git add $(git ls-files . --exclude-standard --others | grep "output" | grep -v tmp)' + hsystem.system(cmd, suppress_output=False) + + +# ############################################################################# +# pytest_failed +# ############################################################################# + + +def _parse_failed_tests( + txt: str, only_file: bool, only_class: bool +) -> Tuple[List[str], int, int]: + """ + Parse the failed tests from the pytest output. + + :param only_file: return only the file name + :param only_class: return only the class name + :return: + - failed_tests: list of failed tests + - num_failed: number of failed tests + - num_passed: number of passed tests + """ + hdbg.dassert_lte(only_file + only_class, 1) + failed_tests = [] + num_failed = num_passed = 0 + for line in txt.split("\n"): + # Remove non printable characters. + line = re.sub(r"[^\x20-\x7E]", "", line) + # FAILED oms/broker/ccxt/test/test_ccxt_execution_quality.py::Test_compute_adj_fill_ecdfs::test3 - RuntimeError: + m = re.search(r"^(FAILED|ERROR) (\S+) -", line) + if m: + test_name = m.group(2) + _LOG.debug("line=%s ->\n\ttest_name='%s'", line, test_name) + failed_tests.append(test_name) + # helpers_root/helpers/test/test_hserver.py::Test_hserver1::test_gp1 (0.00 s) PASSED [ 36%] + m = re.search(r"(\S+) \(\S+ s\) (FAILED|ERROR)", line) + if m: + test_name = m.group(1) + _LOG.debug("line=%s ->\n\ttest_name='%s'", line, test_name) + failed_tests.append(test_name) + # ============ 11 failed, 917 passed, 113 skipped in 64.57s (0:01:04) ============ + # ======================== 4 failed, 43 passed in 40.48s ========================= + m = re.search(r"=+\s+(\d+)\s+failed,\s+(\d+)\s+passed.*", line) + if m: + num_failed = int(m.group(1)) + num_passed = int(m.group(2)) + failed_tests = sorted(list(set(failed_tests))) + # + if num_failed and num_passed and num_failed != len(failed_tests): + _LOG.warning( + "n_failed=%s len(failed_tests)=%s", num_failed, len(failed_tests) + ) + print(f"Failed tests: {num_failed}/{num_passed}") + # Filter, if needed. + if only_file or only_class: + failed_tests_tmp = [] + for test in failed_tests: + # oms/broker/ccxt/test/test_ccxt_execution_quality.py::Test_compute_adj_fill_ecdfs::test3 + m = re.match(r"(\S+)::(\S+)::\S+$", test) + hdbg.dassert(m, f"Can't parse '{test}'") + if only_file: + failed_tests_tmp.append(m.group(1)) + elif only_class: + failed_tests_tmp.append(m.group(1) + "::" + m.group(2)) + else: + raise RuntimeError("Unexpected") + failed_tests = sorted(list(set(failed_tests_tmp))) + return failed_tests, num_failed, num_passed + + +@task +def pytest_failed( + ctx, only_file=False, only_class=False, file_name="tmp.pytest_script.txt" +): # type: ignore + _ = ctx + hlitauti.report_task() + # Read file. + txt = hio.from_file(file_name) + # Extract info. + failed_tests, _, _ = _parse_failed_tests(txt, only_file, only_class) + print("\n".join(failed_tests)) + # Write the repro in a file. + repro_file_name = "tmp.pytest_failed.sh" + repro_txt = "pytest_log " + " ".join(failed_tests) + " $*" + hio.to_file(repro_file_name, repro_txt) + # + hio.create_executable_script(repro_file_name, repro_txt) + _LOG.warning("To run the failed tests run: %s", repro_file_name) + # Save to clipboard. + txt = " ".join(failed_tests) + hsystem.to_pbcopy(txt, pbcopy=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py new file mode 100644 index 000000000..8039a1b07 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py @@ -0,0 +1,395 @@ +""" +Import as: + +import helpers.lib_tasks_utils as hlitauti +""" + +import datetime +import glob +import logging +import os +import pprint +import re +import sys +from typing import Any, Dict, List, Optional, Union + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hversion as hversio + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Default params. +# ############################################################################# + +# This is used to inject the default params. +# TODO(gp): Using a singleton here is not elegant but simple. +_DEFAULT_PARAMS = {} + + +def set_default_params(params: Dict[str, Any]) -> None: + global _DEFAULT_PARAMS + _DEFAULT_PARAMS = params + _LOG.debug("Assigning:\n%s", pprint.pformat(params)) + + +def has_default_param(key: str) -> bool: + hdbg.dassert_isinstance(key, str) + return key in _DEFAULT_PARAMS + + +def get_default_param(key: str, *, override_value: Any = None) -> Any: + """ + Return the value from the default parameters dictionary, optionally + overriding it. + """ + hdbg.dassert_isinstance(key, str) + value = None + if has_default_param(key): + value = _DEFAULT_PARAMS[key] + if override_value: + _LOG.info("Overriding value %s with %s", value, override_value) + value = override_value + hdbg.dassert_is_not( + value, None, "key='%s' not defined from %s", key, _DEFAULT_PARAMS + ) + return value + + +def reset_default_params() -> None: + params: Dict[str, Any] = {} + set_default_params(params) + + +# ############################################################################# +# Utils. +# ############################################################################# + + +def parse_command_line() -> None: + # Since it's not easy to add global command line options to invoke, we + # piggy back the option that already exists. + # If one uses the debug option for `invoke` we turn off the code + # debugging. + # TODO(gp): Check http://docs.pyinvoke.org/en/1.0/concepts/library.html# + # modifying-core-parser-arguments + if ("-d" in sys.argv) or ("--debug" in sys.argv): + verbosity = logging.DEBUG + else: + verbosity = logging.INFO + # Suppress command line logging if only_print_files is requested. + report_command_line = "--only-print-files" not in sys.argv + hdbg.init_logger(verbosity=verbosity, report_command_line=report_command_line) + + +# NOTE: We need to use a `# type: ignore` for all the @task functions because +# pyinvoke infers the argument type from the code and mypy annotations confuse +# it (see https://github.com/pyinvoke/invoke/issues/357). + +# In the following, when using `lru_cache`, we use functions from `hsyste` +# instead of `ctx.run()` since otherwise `lru_cache` would cache `ctx`. + +# We prefer not to cache functions running `git` to avoid stale values if we +# call git (e.g., if we cache Git hash and then we do a `git pull`). + +# pyinvoke `ctx.run()` is useful for unit testing, since it allows to: +# - mock the result of a system call +# - register the issued command line (to create the expected outcome of a test) +# On the other side `system_interaction.py` contains many utilities that make +# it easy to interact with the system. +# Once AmpPart1347 is implemented we can replace all the `ctx.run()` with calls +# to `system_interaction.py`. + + +_WAS_FIRST_CALL_DONE = False + + +# TODO(gp): This can be part of the @task +def report_task(txt: str = "", container_dir_name: str = ".") -> None: + """ + Print the task description. + + Each task should call this function at the beginning to print the + task name. + """ + # On the first invocation check the version of the container. + global _WAS_FIRST_CALL_DONE + if not _WAS_FIRST_CALL_DONE: + _WAS_FIRST_CALL_DONE = True + hversio.check_version(container_dir_name) + # Print the name of the function. + msg = hprint.func_signature_to_str( + skip_vars="ctx", assert_on_skip_vars_error=False, frame_level=3 + ) + print(hprint.color_highlight(msg, color="purple")) + + +# TODO(gp): Move this to helpers.system_interaction and allow to add the switch +# globally. +def _to_single_line_cmd(cmd: Union[str, List[str]]) -> str: + """ + Convert a multiline command (as a string or list of strings) into a single + line. + + E.g., convert + ``` + IMAGE=.../amp:dev \ + docker-compose \ + --file devops/compose/tmp.docker-compose.yml \ + --file devops/compose/tmp.docker-compose_as_submodule.yml \ + --env-file devops/env/default.env + ``` + into + ``` + IMAGE=.../amp:dev docker-compose --file ... + ``` + """ + if isinstance(cmd, list): + cmd = " ".join(cmd) + hdbg.dassert_isinstance(cmd, str) + cmd = cmd.rstrip().lstrip() + # Remove `\` at the end of the line. + cmd = re.sub(r" \\\s*$", " ", cmd, flags=re.MULTILINE) + # Use a single space between words in the command. + # TODO(gp): This is a bit dangerous if there are multiple spaces in a string + # that for some reason are meaningful. + cmd = " ".join(cmd.split()) + return cmd + + +def to_multi_line_cmd(docker_cmd_: List[str]) -> str: + r""" + Convert a command encoded as a list of strings into a single command + separated by `\`. + + E.g., convert + ``` + ['IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev', + '\n docker-compose', + '\n --file amp/devops/compose/tmp.docker-compose.yml', + '\n --file amp/devops/compose/tmp.docker-compose_as_submodule.yml', + '\n --env-file devops/env/default.env'] + ``` + into + ``` + IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ + docker-compose \ + --file devops/compose/tmp.docker-compose.yml \ + --file devops/compose/tmp.docker-compose_as_submodule.yml \ + --env-file devops/env/default.env + ``` + """ + # Expand all strings into single lines. + _LOG.debug("docker_cmd=%s", docker_cmd_) + docker_cmd_tmp = [] + for dc in docker_cmd_: + # Add a `\` at the end of each string. + hdbg.dassert(not dc.endswith("\\"), "dc='%s'", dc) + dc += " \\" + docker_cmd_tmp.extend(dc.split("\n")) + docker_cmd_ = docker_cmd_tmp + # Remove empty lines. + docker_cmd_ = [cmd for cmd in docker_cmd_ if cmd.rstrip().lstrip() != ""] + # Package the command. + result = "\n".join(docker_cmd_) + # Remove a `\` at the end, since it is not needed. + result = result.rstrip("\\") + _LOG.debug("docker_cmd=%s", result) + return result + + +# TODO(gp): Pass through command line using a global switch or an env var. +use_one_line_cmd = False + + +def run( + ctx: Any, + cmd: str, + *args: Any, + dry_run: bool = False, + use_system: bool = False, + print_cmd: bool = False, + **ctx_run_kwargs: Any, +) -> Optional[int]: + cmd = hprint.dedent(cmd) + _LOG.debug(hprint.to_str("cmd dry_run")) + if use_one_line_cmd: + cmd = _to_single_line_cmd(cmd) + _LOG.debug("cmd=%s", cmd) + if dry_run: + print(f"Dry-run: > {cmd}") + _LOG.warning("Skipping execution of '%s'", cmd) + res = None + else: + if print_cmd: + print(f"> {cmd}") + if use_system: + # TODO(gp): Consider using only `hsystem.system()` since it's more + # reliable. + res = hsystem.system(cmd, suppress_output=False) + else: + result = ctx.run(cmd, *args, **ctx_run_kwargs) + res = result.return_code + return res + + +# TODO(gp): -> system_interaction.py ? +def _to_pbcopy(txt: str, pbcopy: bool) -> None: + """ + Save the content of txt in the system clipboard. + """ + txt = txt.rstrip("\n") + if not pbcopy: + print(txt) + return + if not txt: + print("Nothing to copy") + return + if hserver.is_host_mac(): + # -n = no new line + cmd = f"echo -n '{txt}' | pbcopy" + hsystem.system(cmd) + print(f"\n# Copied to system clipboard:\n{txt}") + else: + _LOG.warning("pbcopy works only on macOS") + print(txt) + + +def _filter_existing_paths(paths_from_user: List[str]) -> List[str]: + """ + Filter out the paths to non-existent files. + + :param paths_from_user: paths passed by user + :return: existing paths + """ + paths = [] + for user_path in paths_from_user: + if user_path.endswith("/*"): + # Get the files according to the "*" pattern. + dir_files = glob.glob(user_path) + if dir_files: + # Check whether the pattern matches files. + paths.extend(dir_files) + else: + _LOG.error( + ( + "'%s' pattern doesn't match any files: " + "the directory is empty or path does not exist" + ), + user_path, + ) + elif os.path.exists(user_path): + paths.append(user_path) + else: + _LOG.error("'%s' does not exist", user_path) + return paths + + +# TODO(gp): We should factor out the meaning of the params in a string and add it +# to all the tasks' help. +def _get_files_to_process( + modified: bool, + branch: bool, + last_commit: bool, + # TODO(gp): Pass abs_dir, instead of `all_` and remove the calls from the + # outer clients. + all_: bool, + files_from_user: str, + mutually_exclusive: bool, + remove_dirs: bool, +) -> List[str]: + """ + Get a list of files to process. + + The files are selected based on the switches: + - `branch`: changed in the branch + - `modified`: changed in the client (both staged and modified) + - `last_commit`: part of the previous commit + - `all`: all the files in the repo + - `files_from_user`: passed by the user + + :param modified: return files modified in the client (i.e., changed with + respect to HEAD) + :param branch: return files modified with respect to the branch point + :param last_commit: return files part of the previous commit + :param all: return all repo files + :param files_from_user: return files passed to this function + :param mutually_exclusive: ensure that all options are mutually exclusive + :param remove_dirs: whether directories should be processed + :return: paths to process + """ + _LOG.debug( + hprint.to_str( + "modified branch last_commit all_ files_from_user " + "mutually_exclusive remove_dirs" + ) + ) + if mutually_exclusive: + # All the options are mutually exclusive. + hdbg.dassert_eq( + int(modified) + + int(branch) + + int(last_commit) + + int(all_) + + int(len(files_from_user) > 0), + 1, + msg="Specify only one among --modified, --branch, --last-commit, " + "--all_files, and --files", + ) + else: + # We filter the files passed from the user through other the options, + # so only the filtering options need to be mutually exclusive. + hdbg.dassert_eq( + int(modified) + int(branch) + int(last_commit) + int(all_), + 1, + msg="Specify only one among --modified, --branch, --last-commit", + ) + dir_name = "." + if modified: + files = hgit.get_modified_files(dir_name) + elif branch: + files = hgit.get_modified_files_in_branch("master", dir_name) + elif last_commit: + files = hgit.get_previous_committed_files(dir_name) + elif all_: + pattern = "*" + only_files = True + use_relative_paths = True + files = hio.listdir(dir_name, pattern, only_files, use_relative_paths) + if files_from_user: + # If files were passed, filter out non-existent paths. + files = _filter_existing_paths(files_from_user.split()) + # Convert into a list. + hdbg.dassert_isinstance(files, list) + files_to_process = [f for f in files if f != ""] + # We need to remove `amp` to avoid copying the entire tree. + files_to_process = [f for f in files_to_process if f != "amp"] + _LOG.debug("files_to_process='%s'", str(files_to_process)) + # Remove dirs, if needed. + if remove_dirs: + files_to_process = hsystem.remove_dirs(files_to_process) + _LOG.debug("files_to_process='%s'", str(files_to_process)) + # Ensure that there are files to process. + if not files_to_process: + _LOG.warning("No files were selected") + return files_to_process + + +# Copied from helpers.datetime_ to avoid dependency from pandas. + + +def get_ET_timestamp() -> str: + # The timezone depends on how the shell is configured. + timestamp = datetime.datetime.now() + return timestamp.strftime("%Y%m%d_%H%M%S") + + +# End copy. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py new file mode 100644 index 000000000..631a68e5f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +""" +Import as: + +import helpers.logging_testing.logging_main as hlteloma +""" + +import logging +import sys +from typing import Union + +import helpers.hlogging as hloggin + +_LOG = logging.getLogger(__name__) +print(f"_LOG={_LOG}") + + +def install_basic_formatter() -> None: + # The output looks like + # ``` + # DEBUG:__main__: message + # ``` + logging.basicConfig() + + +def _install_formatter( + formatter: Union[hloggin.CustomFormatter, logging.Formatter], +) -> None: + root_logger_ = logging.getLogger() + ch = logging.StreamHandler(sys.stdout) + ch.setFormatter(formatter) + root_logger_.addHandler(ch) + + +def install_current_formatter() -> None: + date_fmt = "%m-%d_%H:%M" + log_format = ( + # 04-28_08:08 INFO : + "%(asctime)-5s %(levelname)-5s" + ) + log_format += ( + # lib_tasks _delete_branches + " %(module)-20s: %(funcName)-30s:" + # 142: ... + " %(lineno)-4d:" + " %(message)s" + ) + formatter = logging.Formatter(log_format, datefmt=date_fmt) + # + _install_formatter(formatter) + + +def install_custom_formatter() -> None: + formatter = hloggin.CustomFormatter() + _install_formatter(formatter) + + +if __name__ == "__main__": + # + print("\n# Installing formatter") + # install_basic_formatter() + # install_current_formatter() + install_custom_formatter() + # + print("\n# Loggers before setLevel") + root_logger = logging.getLogger() + print(f"root_logger={root_logger}") + # Show the loggers that have registered. + print(f"loggers={hloggin.get_all_loggers()}") + # + verbosity = logging.DEBUG + # verbosity = logging.ERROR + print(f"\n# Loggers after setLevel {verbosity}") + root_logger.setLevel(verbosity) + # Setting the verbosity for the root logger sets the verbosity for all the + # children ones. + print(f"root_logger={root_logger}") + print(f"loggers={hloggin.get_all_loggers()}") + # + hloggin.test_logger() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py new file mode 100644 index 000000000..ad88346fe --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py @@ -0,0 +1,10 @@ +""" +Import as: + +import helpers.logging_testing.logging_module as hltelomo +""" + +import logging + +_LOG = logging.getLogger(__name__) +print(f"_LOG={_LOG}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py new file mode 100644 index 000000000..5b0445a31 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py @@ -0,0 +1,17 @@ +import pathlib +from typing import Any, Optional + + +def pytest_ignore_collect( # type: ignore + collection_path: pathlib.Path, path: Any, config: Any +) -> Optional[bool]: + """ + Skip all tests in this directory. + + :param collection_path: path to analyze + :param path: path to analyze (deprecated) + :param config: pytest config object + :return: True if the path should be ignored + """ + # Ignore this directory and all its subdirectories. + return True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb new file mode 100644 index 000000000..7df18640d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb @@ -0,0 +1,638 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Test Cache in Jupyter Notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T16:23:59.696680Z", + "start_time": "2021-08-16T16:23:58.792511Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING\u001b[0m: Disabling annoying warnings\n", + "\u001b[0m\u001b[36mINFO\u001b[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-89e1d81f-7cff-47ee-9790-af936835f517.json'\n", + "\u001b[33mWARNING\u001b[0m: Running in Jupyter\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import logging\n", + "\n", + "import joblib\n", + "\n", + "import helpers.hcache as hcache\n", + "import helpers.hdbg as hdbg\n", + "import helpers.hs3 as hs3\n", + "\n", + "hnotebook.config_notebook()\n", + "\n", + "# hdbg.init_logger(verbosity=logging.DEBUG)\n", + "hdbg.init_logger(verbosity=logging.INFO)\n", + "# hdbg.test_logger()\n", + "_LOG = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "lines_to_next_cell": 2, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Define computation function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-15T12:07:59.739169Z", + "start_time": "2021-08-15T12:07:59.714831Z" + } + }, + "outputs": [], + "source": [ + "def func(a, b):\n", + " # hello\n", + " # assert 0\n", + " out = a * b\n", + " print(f\"Multiplication: {a} * {b} = {out}\")\n", + " return out\n", + "\n", + "\n", + "inputs = (1, 2)\n", + "exp_output = 2\n", + "\n", + "func(*inputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:32:30.476809Z", + "start_time": "2021-08-14T23:32:30.202040Z" + } + }, + "outputs": [], + "source": [ + "!ls hello/joblib/__main__*/f/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:42:14.671491Z", + "start_time": "2021-08-14T23:42:13.356163Z" + } + }, + "outputs": [], + "source": [ + "!pip install https://github.com/aabadie/joblib-s3.git" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:46:59.749548Z", + "start_time": "2021-08-14T23:46:54.455947Z" + } + }, + "outputs": [], + "source": [ + "#!git clone git://github.com/aabadie/joblib-s3.git\n", + "# !(cd joblib-s3 && pip install -r requirements.txt .)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:56:02.954013Z", + "start_time": "2021-08-14T23:56:02.793451Z" + }, + "scrolled": false + }, + "outputs": [], + "source": [ + "# import joblibs3\n", + "\n", + "# joblibs3.register_s3fs_store_backend()\n", + "\n", + "# # dict(compress=False, bucket=None, anon=False,\n", + "# #key=None, secret=None, token=None, use_ssl=True)\n", + "# dict2 = {\n", + "# \"bucket\": \"alphamatic-data\",\n", + "# \"key\": dict_[\"aws_access_key_id\"],\n", + "# \"secret\": dict_[\"aws_secret_access_key\"],\n", + "# }\n", + "# mem = joblib.Memory('joblib_cache', backend='s3', verbose=100, compress=True,\n", + "# backend_options=dict2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T13:24:11.630748Z", + "start_time": "2021-08-16T13:24:10.983061Z" + } + }, + "outputs": [], + "source": [ + "# hjoblib.register_s3fs_store_backend()\n", + "\n", + "s3fs = hs3.get_s3fs(\"am\")\n", + "\n", + "dict2 = {\n", + " \"bucket\": \"alphamatic-data\",\n", + " # \"key\": dict_[\"aws_access_key_id\"],\n", + " # \"secret\": dict_[\"aws_secret_access_key\"],\n", + " \"s3fs\": s3fs,\n", + "}\n", + "\n", + "mem = joblib.Memory(\n", + " \"joblib_cache\",\n", + " backend=\"s3\",\n", + " verbose=100,\n", + " compress=True,\n", + " backend_options=dict2,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-17T15:51:32.654896Z", + "start_time": "2021-08-17T15:51:32.258447Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "ename": "PermissionError", + "evalue": "Access Denied", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter)\u001b[0m\n\u001b[1;32m 531\u001b[0m \u001b[0mdircache\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m \u001b[0;32masync\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mit\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 533\u001b[0m \u001b[0mdircache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CommonPrefixes\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/aiobotocore/paginate.py\u001b[0m in \u001b[0;36m__anext__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcurrent_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0mparsed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_extract_parsed_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/aiobotocore/client.py\u001b[0m in \u001b[0;36m_make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0merror_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 154\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merror_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_response\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moperation_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 155\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mClientError\u001b[0m: An error occurred (AccessDenied) when calling the ListObjectsV2 operation: Access Denied", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0ms3fs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m#mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0mself\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msync\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36msync\u001b[0;34m(loop, func, timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBaseException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 54\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36m_runner\u001b[0;34m(event, coro, result, timeout)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mcoro\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoro\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mcoro\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_ls\u001b[0;34m(self, path, detail, refresh)\u001b[0m\n\u001b[1;32m 719\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsbuckets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 721\u001b[0;31m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 722\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrefresh\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter)\u001b[0m\n\u001b[1;32m 553\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"name\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Key\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 554\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mClientError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 555\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mtranslate_boto_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 556\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdelimiter\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mPermissionError\u001b[0m: Access Denied" + ] + } + ], + "source": [ + "# hjoblib.register_s3fs_store_backend()\n", + "\n", + "s3fs = hs3.get_s3fs(\"am\")\n", + "dict_ = {}\n", + "\n", + "dict2 = {\n", + " \"bucket\": \"alphamatic-data\",\n", + " # \"key\": dict_[\"aws_access_key_id\"],\n", + " # \"secret\": dict_[\"aws_secret_access_key\"],\n", + " \"s3fs\": s3fs,\n", + "}\n", + "path = \"/tmp/cache.unit_test/root.98e1cf5b88c3.app.TestCachingOnS3.test_with_caching1\"\n", + "\n", + "\n", + "s3fs.ls(path)\n", + "\n", + "# mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:54:50.021139Z", + "start_time": "2021-08-14T23:54:50.017180Z" + } + }, + "outputs": [], + "source": [ + "print(dict_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:51:25.440705Z", + "start_time": "2021-08-14T23:51:25.419214Z" + } + }, + "outputs": [], + "source": [ + "# dict_[\"bucket\"] = \"alphamatic-data/tmp\"\n", + "\n", + "print(dict_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def dec(func=None, val=5):\n", + " if func is not None:\n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68549a47", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:53:08.985727Z", + "start_time": "2021-08-14T23:53:08.795065Z" + } + }, + "outputs": [], + "source": [ + "dict_ = hs3.get_aws_credentials(\"am\")\n", + "print(dict_)\n", + "# s3fs = hs3.get_s3fs(\"am\")\n", + "# s3fs.ls(\"s3://alphamatic-data/tmp\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T13:25:34.841885Z", + "start_time": "2021-08-16T13:25:34.820510Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "s3fs.clear_instance_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T16:23:17.621301Z", + "start_time": "2021-08-16T16:23:16.722753Z" + } + }, + "outputs": [], + "source": [ + "# import joblib\n", + "\n", + "# cachedir = \"./hello\"\n", + "# memory = joblib.Memory(cachedir, verbose=0)\n", + "\n", + "\n", + "@mem.cache()\n", + "def f(x):\n", + " # hello\n", + " print(f\"Running f({x})\")\n", + " return x\n", + "\n", + "\n", + "f(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T16:26:31.661915Z", + "start_time": "2021-08-16T16:26:31.640938Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'hello'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hcache.cache(set_verbose_mode=True)\n", + "\n", + "\n", + "def hello():\n", + " return \"hello\"\n", + "\n", + "\n", + "hello()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Memory cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T22:19:51.526004Z", + "start_time": "2021-08-14T22:19:51.259763Z" + } + }, + "outputs": [], + "source": [ + "!ls /app/tmp.cache.disk/joblib/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T22:19:25.253342Z", + "start_time": "2021-08-14T22:19:24.986513Z" + } + }, + "outputs": [], + "source": [ + "!ls /mnt/tmpfs/tmp.cache.mem/joblib/lib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:21:32.636049Z", + "start_time": "2021-08-14T23:21:32.479710Z" + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "memory_cached_func = hcache._Cached(\n", + " func, use_mem_cache=True, use_disk_cache=False\n", + ")\n", + "\n", + "print(memory_cached_func.get_function_cache_info())\n", + "\n", + "# cache_type = None\n", + "# memory_cached_func.clear_function_cache(cache_type)\n", + "\n", + "hdbg.dassert_eq(memory_cached_func(*inputs), exp_output)\n", + "hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), \"no_cache\")\n", + "\n", + "hdbg.dassert_eq(memory_cached_func(*inputs), exp_output)\n", + "hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), \"mem\")\n", + "\n", + "print(\"memory caching checks passed\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:56:58.620662Z", + "start_time": "2020-09-03T19:56:58.610337Z" + } + }, + "outputs": [], + "source": [ + "def computation_function(a, b):\n", + " # hello\n", + " # assert 0\n", + " out = a * b\n", + " print(f\"Multiplication: {a} * {b} = {out}\")\n", + " return out\n", + "\n", + "\n", + "inputs = (1, 2)\n", + "exp_output = 2\n", + "\n", + "# hdbg.dassert_eq(memory_cached_computation(*inputs), exp_output)\n", + "# hdbg.dassert_eq(memory_cached_computation.get_last_cache_accessed(), \"mem\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Disk cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:45:20.999548Z", + "start_time": "2020-09-03T19:45:20.987298Z" + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "disk_cached_computation = hcache._Cached(\n", + " computation_function, use_mem_cache=False, use_disk_cache=True\n", + ")\n", + "\n", + "disk_cached_computation.clear_function_cache()\n", + "\n", + "hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), \"no_cache\")\n", + "\n", + "hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), \"disk\")\n", + "\n", + "print(\"disk caching checks passed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Full cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:34:54.851944Z", + "start_time": "2020-09-03T19:34:54.839379Z" + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "fully_cached_computation = hcache._Cached(\n", + " computation_function, use_mem_cache=True, use_disk_cache=True\n", + ")\n", + "\n", + "fully_cached_computation.clear_function_cache()\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"no_cache\")\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", + "\n", + "print(\"Clear mem cache\")\n", + "fully_cached_computation.clear_function_cache()\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"disk\")\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", + "\n", + "print(\"full caching checks passed\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:34:54.856369Z", + "start_time": "2020-09-03T19:34:54.853563Z" + } + }, + "outputs": [], + "source": [ + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:34:54.995926Z", + "start_time": "2020-09-03T19:34:54.859279Z" + } + }, + "outputs": [], + "source": [ + "# This should fail all the times, because we clear the memory cache.\n", + "fully_cached_computation.clear_function_cache()\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py new file mode 100644 index 000000000..3469f42b7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py @@ -0,0 +1,274 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] pycharm={"name": "#%% md\n"} +# # Test Cache in Jupyter Notebook + +# %% +# %load_ext autoreload +# %autoreload 2 + +import logging + +import joblib + +import helpers.hcache as hcache +import helpers.hdbg as hdbg +import helpers.hs3 as hs3 + +hnotebook.config_notebook() + +# hdbg.init_logger(verbosity=logging.DEBUG) +hdbg.init_logger(verbosity=logging.INFO) +# hdbg.test_logger() +_LOG = logging.getLogger(__name__) + + +# %% [markdown] pycharm={"name": "#%% md\n"} +# # Define computation function + + +# %% +def func(a, b): + # hello + # assert 0 + out = a * b + print(f"Multiplication: {a} * {b} = {out}") + return out + + +inputs = (1, 2) +exp_output = 2 + +func(*inputs) + +# %% +# !ls hello/joblib/__main__*/f/ + +# %% +# !pip install https://github.com/aabadie/joblib-s3.git + +# %% +# #!git clone git://github.com/aabadie/joblib-s3.git +# !(cd joblib-s3 && pip install -r requirements.txt .) + +# %% +# import joblibs3 + +# joblibs3.register_s3fs_store_backend() + +# # dict(compress=False, bucket=None, anon=False, +# #key=None, secret=None, token=None, use_ssl=True) +# dict2 = { +# "bucket": "alphamatic-data", +# "key": dict_["aws_access_key_id"], +# "secret": dict_["aws_secret_access_key"], +# } +# mem = joblib.Memory('joblib_cache', backend='s3', verbose=100, compress=True, +# backend_options=dict2) + +# %% +# hjoblib.register_s3fs_store_backend() + +s3fs = hs3.get_s3fs("am") + +dict2 = { + "bucket": "alphamatic-data", + # "key": dict_["aws_access_key_id"], + # "secret": dict_["aws_secret_access_key"], + "s3fs": s3fs, +} + +mem = joblib.Memory( + "joblib_cache", + backend="s3", + verbose=100, + compress=True, + backend_options=dict2, +) + +# %% +# hjoblib.register_s3fs_store_backend() + +s3fs = hs3.get_s3fs("am") +dict_ = {} + +dict2 = { + "bucket": "alphamatic-data", + # "key": dict_["aws_access_key_id"], + # "secret": dict_["aws_secret_access_key"], + "s3fs": s3fs, +} +path = "/tmp/cache.unit_test/root.98e1cf5b88c3.app.TestCachingOnS3.test_with_caching1" + + +s3fs.ls(path) + +# mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2) + + +# %% +print(dict_) + +# %% +# dict_["bucket"] = "alphamatic-data/tmp" + +print(dict_) + + +# %% +def dec(func=None, val=5): + if func is not None: + return + + +# %% + +# %% +dict_ = hs3.get_aws_credentials("am") +print(dict_) +# s3fs = hs3.get_s3fs("am") +# s3fs.ls("s3://alphamatic-data/tmp") + +# %% +s3fs.clear_instance_cache() + + +# %% +# import joblib + +# cachedir = "./hello" +# memory = joblib.Memory(cachedir, verbose=0) + + +@mem.cache() +def f(x): + # hello + print(f"Running f({x})") + return x + + +f(1) + +# %% +hcache.cache(set_verbose_mode=True) + + +def hello(): + return "hello" + + +hello() + +# %% [markdown] pycharm={"name": "#%% md\n"} +# ## Memory cache + +# %% +# !ls /app/tmp.cache.disk/joblib/ + +# %% +# !ls /mnt/tmpfs/tmp.cache.mem/joblib/lib + +# %% pycharm={"name": "#%%\n"} +memory_cached_func = hcache._Cached( + func, use_mem_cache=True, use_disk_cache=False +) + +print(memory_cached_func.get_function_cache_info()) + +# cache_type = None +# memory_cached_func.clear_function_cache(cache_type) + +hdbg.dassert_eq(memory_cached_func(*inputs), exp_output) +hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), "no_cache") + +hdbg.dassert_eq(memory_cached_func(*inputs), exp_output) +hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), "mem") + +print("memory caching checks passed") + + +# %% +def computation_function(a, b): + # hello + # assert 0 + out = a * b + print(f"Multiplication: {a} * {b} = {out}") + return out + + +inputs = (1, 2) +exp_output = 2 + +# hdbg.dassert_eq(memory_cached_computation(*inputs), exp_output) +# hdbg.dassert_eq(memory_cached_computation.get_last_cache_accessed(), "mem") + +# %% [markdown] +# ## Disk cache + +# %% pycharm={"name": "#%%\n"} +disk_cached_computation = hcache._Cached( + computation_function, use_mem_cache=False, use_disk_cache=True +) + +disk_cached_computation.clear_function_cache() + +hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), "no_cache") + +hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), "disk") + +print("disk caching checks passed") + +# %% [markdown] +# ## Full cache + +# %% pycharm={"name": "#%%\n"} +fully_cached_computation = hcache._Cached( + computation_function, use_mem_cache=True, use_disk_cache=True +) + +fully_cached_computation.clear_function_cache() + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "no_cache") + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") + +print("Clear mem cache") +fully_cached_computation.clear_function_cache() + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "disk") + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") + +print("full caching checks passed") + +# %% +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") + +# %% +# This should fail all the times, because we clear the memory cache. +fully_cached_computation.clear_function_cache() +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb new file mode 100644 index 000000000..3b3c5ae1e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb @@ -0,0 +1,653 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9ad5fd70-a4de-4671-86c4-9f3e87c32df1", + "metadata": {}, + "source": [ + "# Using hcache_simple for Caching in Python\n", + "\n", + "This tutorial provides a detailed walkthrough of the `hcache_simple` module,\n", + "which implements a lightweight caching mechanism." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "aa084398-eba9-4e8f-aad9-6348d62f8fc1", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d20a80f4-b837-487f-96df-ebb9e8202cfc", + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary modules.\n", + "import logging\n", + "import time\n", + "\n", + "import helpers.hcache_simple as hcacsimp\n", + "import helpers.hdbg as hdbg" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5d76eda3-044b-47c9-bf5f-eb09aad51ad1", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Running in Jupyter\n", + "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-4f3ae573-f3ef-4865-b9b0-386ca4221989.json'\n" + ] + } + ], + "source": [ + "hdbg.init_logger(verbosity=logging.INFO)\n", + "\n", + "_LOG = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3d440b77-178a-4e3e-9bb9-0d508f1948a1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Force a reload.\n", + "import importlib\n", + "\n", + "importlib.reload(hcacsimp)" + ] + }, + { + "cell_type": "markdown", + "id": "90aa14ab-f441-468c-a114-77cf9c6baff1", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "## Setting up caching\n", + "\n", + "The `@hcsi.simple_cache` decorator enables caching for a function and supports both memory- and disk-based storage (json or pickle format).\n", + "\n", + "We'll demonstrate this with a function that simulates a slow computation." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dbc25952-6587-4bb9-a5e9-064ed0317550", + "metadata": {}, + "outputs": [], + "source": [ + "# cache_type=\"json\": The cache will be stored in JSON format on disk.\n", + "# write_through=True: Any changes to the cache will be written to disk immediately.\n", + "@hcacsimp.simple_cache(cache_type=\"json\", write_through=True)\n", + "def slow_square(x):\n", + " \"\"\"\n", + " Simulate a slow function that computes the square of a number.\n", + "\n", + " The `@hcsi.simple_cache` decorator caches the results of this\n", + " function to avoid recomputation for the same input.\n", + " \"\"\"\n", + " # Simulate a time-consuming computation.\n", + " print(\"Computing ...\")\n", + " time.sleep(2)\n", + " return x**2" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bce58692-fd3f-49fe-ab7c-fb07357697e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# func_name=slow_square\n", + "type: json\n" + ] + } + ], + "source": [ + "print(hcacsimp.cache_property_to_str(\"slow_square\"))" + ] + }, + { + "cell_type": "markdown", + "id": "738a112b-3eac-4488-bd6b-8cba124d3f2d", + "metadata": {}, + "source": [ + "## Demonstration: First and Subsequent Calls\n", + "\n", + "Let's see how caching works:\n", + "\n", + "- On the first call with a specific input, the function takes time to compute.\n", + "- On subsequent calls with the same input, the result is retrieved instantly from the cache." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "aa1f2b9d-bdd4-4714-a5e1-ebafe05632f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING Resetting disk cache\n", + "WARNING Resetting /app/tmp.cache_simple_property.pkl\n" + ] + } + ], + "source": [ + "cache_file = hcacsimp._get_cache_file_name(\"slow_square\")\n", + "hdbg.dassert_eq(cache_file, \"/app/tmp.cache_simple.slow_square.json\")\n", + "\n", + "hcacsimp.reset_cache(interactive=False)\n", + "hcacsimp.reset_cache_property()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "66ff027f-a6d2-438e-bded-7d631b2faace", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '/app/tmp.cache_simple.*': No such file or directory\n" + ] + } + ], + "source": [ + "!ls /app/tmp.cache_simple.*" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7b081f89-b5a2-4757-8936-ed567eaa049c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '/app/tmp.cache_simple.slow_square.json': No such file or directory\n" + ] + } + ], + "source": [ + "# There should be no cache file yet.\n", + "!ls -l $cache_file" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "789c978c-c25a-48ba-a8fc-34c9fc0b6243", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# First call (expected delay):\n", + "Computing ...\n", + "Result: 16\n" + ] + } + ], + "source": [ + "# First call is slow: the result is computed and cached.\n", + "print(\"# First call (expected delay):\")\n", + "result = slow_square(4)\n", + "print(f\"Result: {result}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0836b391-30ca-443e-bfe8-7794fb91151c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"{\\\"args\\\": [4], \\\"kwargs\\\": {}}\": 16\n", + "}" + ] + } + ], + "source": [ + "# The cache file is created and stores the content.\n", + "!cat $cache_file" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "af3af183-d293-45b4-9d60-c826e382a786", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Second call (retrieved from cache):\n", + "Result: 16\n" + ] + } + ], + "source": [ + "# Second call is fast: the result is retrieved from the cache.\n", + "print(\"# Second call (retrieved from cache):\")\n", + "result = slow_square(4)\n", + "print(f\"Result: {result}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f0cbe4e0-b87e-4ed8-b373-c2114647076d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computing ...\n" + ] + }, + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '/app/tmp.cache_simple.slow_square.json'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[13], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Call another value -> cache miss.\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mslow_square\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResult: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m/app/helpers_root/helpers/hcache_simple.py:888\u001b[0m, in \u001b[0;36msimple_cache..decorator..wrapper\u001b[0;34m(force_refresh, abort_on_cache_miss, report_on_cache_miss, *args, **kwargs)\u001b[0m\n\u001b[1;32m 886\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m write_through:\n\u001b[1;32m 887\u001b[0m _LOG\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWriting through to disk\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 888\u001b[0m \u001b[43mflush_cache_to_disk\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 889\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m value\n", + "File \u001b[0;32m/app/helpers_root/helpers/hcache_simple.py:593\u001b[0m, in \u001b[0;36mflush_cache_to_disk\u001b[0;34m(func_name)\u001b[0m\n\u001b[1;32m 591\u001b[0m disk_cache\u001b[38;5;241m.\u001b[39mupdate(mem_cache)\n\u001b[1;32m 592\u001b[0m \u001b[38;5;66;03m# Save merged cache to disk.\u001b[39;00m\n\u001b[0;32m--> 593\u001b[0m \u001b[43m_save_cache_dict_to_disk\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdisk_cache\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[38;5;66;03m# Update the memory cache.\u001b[39;00m\n\u001b[1;32m 595\u001b[0m \u001b[38;5;28;01mglobal\u001b[39;00m _CACHE\n", + "File \u001b[0;32m/app/helpers_root/helpers/hcache_simple.py:447\u001b[0m, in \u001b[0;36m_save_cache_dict_to_disk\u001b[0;34m(func_name, data)\u001b[0m\n\u001b[1;32m 445\u001b[0m pickle\u001b[38;5;241m.\u001b[39mdump(data, file)\n\u001b[1;32m 446\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m cache_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mjson\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 447\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mw\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mutf-8\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[1;32m 448\u001b[0m json\u001b[38;5;241m.\u001b[39mdump(data, file, indent\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m, sort_keys\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, ensure_ascii\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 449\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/app/tmp.cache_simple.slow_square.json'" + ] + } + ], + "source": [ + "# Call another value -> cache miss.\n", + "result = slow_square(3)\n", + "print(f\"Result: {result}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7c6e693-05a3-4e2c-9159-892887eb91de", + "metadata": {}, + "outputs": [], + "source": [ + "!cat $cache_file" + ] + }, + { + "cell_type": "markdown", + "id": "124703fe-bf5d-4756-aa63-12dbed57db12", + "metadata": {}, + "source": [ + "## Monitoring Cache Performance\n", + "\n", + "The `hcache_simple` module provides utilities to track cache performance metrics,\n", + "such as the total number of calls, cache hits, and cache misses." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "879c8230-656d-455c-9370-1cd5afb8f59a", + "metadata": {}, + "outputs": [], + "source": [ + "# Enable cache performance monitoring for the function `slow_square`.\n", + "hcacsimp.enable_cache_perf(\"slow_square\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4dcf4f5d-e4ee-490e-a62e-a152af9c0e62", + "metadata": {}, + "outputs": [], + "source": [ + "# Retrieve and display cache performance statistics.\n", + "print(\"# Cache Performance Stats:\")\n", + "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" + ] + }, + { + "cell_type": "markdown", + "id": "8a0e90b3-4fa7-43c7-8251-c7b713e33073", + "metadata": {}, + "source": [ + "Explanation of Performance Metrics\n", + "\n", + "- Total Calls (tot): The total number of times the function was invoked.\n", + "- Cache Hits (hits): The number of times the result was retrieved from the cache.\n", + "- Cache Misses (misses): The number of times the function had to compute the result due to a cache miss.\n", + "- Hit Rate: The percentage of calls where the result was retrieved from the cache." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4b46455-314f-4ff3-9f8a-93c91dcab334", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "hcacsimp.reset_cache(interactive=False)\n", + "hcacsimp.reset_cache_perf(\"slow_square\")\n", + "\n", + "print(\"# First call (expected delay):\")\n", + "result = slow_square(4) # This call will be recorded as a cache miss.\n", + "print(f\"Result: {result}\")\n", + "\n", + "print(\"\\n# Second call (retrieved from cache):\")\n", + "result = slow_square(4) # This call will be recorded as a cache hit.\n", + "print(f\"Result: {result}\")\n", + "\n", + "print(\"\\n# Cache performance stats:\")\n", + "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" + ] + }, + { + "cell_type": "markdown", + "id": "3d614729-924d-4285-b5bc-4ed16006ba12", + "metadata": {}, + "source": [ + "## Flush Cache to Disk" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21ecd2ea-812b-4d67-8fb2-0d2aba944175", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# The following cell writes the current in‑memory cache to disk. This is useful\n", + "# if you want persistence across sessions.\n", + "print(\"# Flushing cache to disk for 'slow_square'...\")\n", + "hcacsimp.flush_cache_to_disk(\"slow_square\")\n", + "\n", + "# The `hcsi.cache_stats_to_str` function provides a summary of the current cache\n", + "# state, including the number of items stored in memory and on disk.\n", + "print(\"\\n# Cache stats:\")\n", + "print(hcacsimp.cache_stats_to_str(\"slow_square\"))" + ] + }, + { + "cell_type": "markdown", + "id": "39b16e4a-c007-4cee-8566-0ad9057c54ea", + "metadata": {}, + "source": [ + "## Reset In‑Memory Cache\n", + "\n", + "Now reset the in‑memory cache. After this, the in‑memory cache will be empty until reloaded from disk." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd4374f3-8f89-422a-923a-3bf4bd01f8a1", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"# Resetting in-memory cache for 'slow_square'...\")\n", + "hcacsimp.reset_mem_cache(\"slow_square\")\n", + "\n", + "print(\"\\n# Cache stats:\")\n", + "print(hcacsimp.cache_stats_to_str(\"slow_square\"))" + ] + }, + { + "cell_type": "markdown", + "id": "04677407-f3d4-46de-b818-100eafb2bf1d", + "metadata": {}, + "source": [ + "## Force Cache from Disk\n", + "\n", + "Now we force the in‑memory cache to update from disk. This should repopulate our\n", + "cache based on the disk copy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a42de2d9-05d8-4dd8-947b-71d6751108fb", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"# Forcing cache from disk for 'slow_square'...\")\n", + "hcacsimp.force_cache_from_disk(\"slow_square\")\n", + "\n", + "print(\"\\n# Cache stats:\")\n", + "print(hcacsimp.cache_stats_to_str(\"slow_square\"))" + ] + }, + { + "cell_type": "markdown", + "id": "0516463d-d450-4071-9284-c1e839cec62a", + "metadata": {}, + "source": [ + "## Attempt to Reset Disk Cache\n", + "\n", + "The `reset_disk_cache` function is currently not implemented (it contains an assertion).\n", + "We'll catch the expected error to confirm its behavior." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59bbf120-4d21-430a-9fad-d4c68a1e4af5", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " print(\n", + " \"\\nAttempting to reset disk cache for 'slow_square' (expected to fail)...\"\n", + " )\n", + " hcacsimp.reset_disk_cache(\"slow_square\")\n", + "except AssertionError:\n", + " print(\"reset_disk_cache raised an AssertionError as expected.\")" + ] + }, + { + "cell_type": "markdown", + "id": "74c97383-d444-4760-a2cd-25d7c2bbbf27", + "metadata": {}, + "source": [ + "# Dynamic parameters" + ] + }, + { + "cell_type": "markdown", + "id": "865b8e7d-ebb3-4bca-8edd-784c71f142b5", + "metadata": {}, + "source": [ + "## force_refresh" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "103b9d4d-0a6b-439e-b722-5c3125baef3e", + "metadata": {}, + "outputs": [], + "source": [ + "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))\n", + "hcacsimp.reset_cache_perf(\"slow_square\")\n", + "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5919331a-f6b9-4b7b-ac8d-354ea2bd5226", + "metadata": {}, + "outputs": [], + "source": [ + "slow_square(4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55e1679d-b529-4f3f-a27a-5a7173180ab8", + "metadata": {}, + "outputs": [], + "source": [ + "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8e17839-fdeb-4950-b662-e04cdcd73406", + "metadata": {}, + "outputs": [], + "source": [ + "# Force a recompute.\n", + "slow_square(4, force_refresh=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11c745bf-28e4-4ec4-bdb7-c6e73956fbf4", + "metadata": {}, + "outputs": [], + "source": [ + "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" + ] + }, + { + "cell_type": "markdown", + "id": "f0119e5a-74c2-47d6-a80c-7b2e6d70f8be", + "metadata": {}, + "source": [ + "## abort_on_cache_miss" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b8b63dd-c80e-4a26-a242-5a09ddc3d76c", + "metadata": {}, + "outputs": [], + "source": [ + "hcacsimp.reset_cache(interactive=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e643920-6d2b-4055-9dd3-50745feb2373", + "metadata": {}, + "outputs": [], + "source": [ + "# This call doesn't abort since it's not a cache miss.\n", + "slow_square(4, abort_on_cache_miss=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5de5d0fe-b331-427e-bfa1-95bcf8efd40a", + "metadata": {}, + "outputs": [], + "source": [ + "# This call aborts since it's a cache miss.\n", + "try:\n", + " slow_square(16, abort_on_cache_miss=True)\n", + "except ValueError as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff84291a-1827-4c6b-b2d7-5386b7e127a6", + "metadata": {}, + "outputs": [], + "source": [ + "slow_square(16, report_on_cache_miss=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "281adfc6-38cf-4e66-935e-95760e5fa5cf", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py new file mode 100644 index 000000000..5acf042c0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py @@ -0,0 +1,257 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Using hcache_simple for Caching in Python +# +# This tutorial provides a detailed walkthrough of the `hcache_simple` module, +# which implements a lightweight caching mechanism. + +# %% +# %load_ext autoreload +# %autoreload 2 + +# %% +# Import necessary modules. +import logging +import time + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + + +# %% +# Force a reload. +import importlib + +importlib.reload(hcacsimp) + +# %% [markdown] +# ## Setting up caching +# +# The `@hcsi.simple_cache` decorator enables caching for a function and supports both memory- and disk-based storage (json or pickle format). +# +# We'll demonstrate this with a function that simulates a slow computation. + + +# %% +# cache_type="json": The cache will be stored in JSON format on disk. +# write_through=True: Any changes to the cache will be written to disk immediately. +@hcacsimp.simple_cache(cache_type="json", write_through=True) +def slow_square(x): + """ + Simulate a slow function that computes the square of a number. + + The `@hcsi.simple_cache` decorator caches the results of this + function to avoid recomputation for the same input. + """ + # Simulate a time-consuming computation. + print("Computing ...") + time.sleep(2) + return x**2 + + +# %% +print(hcacsimp.cache_property_to_str("slow_square")) + +# %% [markdown] +# ## Demonstration: First and Subsequent Calls +# +# Let's see how caching works: +# +# - On the first call with a specific input, the function takes time to compute. +# - On subsequent calls with the same input, the result is retrieved instantly from the cache. + +# %% +cache_file = hcacsimp._get_cache_file_name("slow_square") +hdbg.dassert_eq(cache_file, "/app/tmp.cache_simple.slow_square.json") + +hcacsimp.reset_cache(interactive=False) +hcacsimp.reset_cache_property() + +# %% +# !ls /app/tmp.cache_simple.* + +# %% +# There should be no cache file yet. +# !ls -l $cache_file + +# %% +# First call is slow: the result is computed and cached. +print("# First call (expected delay):") +result = slow_square(4) +print(f"Result: {result}") + +# %% +# The cache file is created and stores the content. +# !cat $cache_file + +# %% +# Second call is fast: the result is retrieved from the cache. +print("# Second call (retrieved from cache):") +result = slow_square(4) +print(f"Result: {result}") + +# %% +# Call another value -> cache miss. +result = slow_square(3) +print(f"Result: {result}") + +# %% +# !cat $cache_file + +# %% [markdown] +# ## Monitoring Cache Performance +# +# The `hcache_simple` module provides utilities to track cache performance metrics, +# such as the total number of calls, cache hits, and cache misses. + +# %% +# Enable cache performance monitoring for the function `slow_square`. +hcacsimp.enable_cache_perf("slow_square") + +# %% +# Retrieve and display cache performance statistics. +print("# Cache Performance Stats:") +print(hcacsimp.get_cache_perf_stats("slow_square")) + +# %% [markdown] +# Explanation of Performance Metrics +# +# - Total Calls (tot): The total number of times the function was invoked. +# - Cache Hits (hits): The number of times the result was retrieved from the cache. +# - Cache Misses (misses): The number of times the function had to compute the result due to a cache miss. +# - Hit Rate: The percentage of calls where the result was retrieved from the cache. + +# %% +hcacsimp.reset_cache(interactive=False) +hcacsimp.reset_cache_perf("slow_square") + +print("# First call (expected delay):") +result = slow_square(4) # This call will be recorded as a cache miss. +print(f"Result: {result}") + +print("\n# Second call (retrieved from cache):") +result = slow_square(4) # This call will be recorded as a cache hit. +print(f"Result: {result}") + +print("\n# Cache performance stats:") +print(hcacsimp.get_cache_perf_stats("slow_square")) + + +# %% [markdown] +# ## Flush Cache to Disk + +# %% +# The following cell writes the current in‑memory cache to disk. This is useful +# if you want persistence across sessions. +print("# Flushing cache to disk for 'slow_square'...") +hcacsimp.flush_cache_to_disk("slow_square") + +# The `hcsi.cache_stats_to_str` function provides a summary of the current cache +# state, including the number of items stored in memory and on disk. +print("\n# Cache stats:") +print(hcacsimp.cache_stats_to_str("slow_square")) + + +# %% [markdown] +# ## Reset In‑Memory Cache +# +# Now reset the in‑memory cache. After this, the in‑memory cache will be empty until reloaded from disk. + +# %% +print("# Resetting in-memory cache for 'slow_square'...") +hcacsimp.reset_mem_cache("slow_square") + +print("\n# Cache stats:") +print(hcacsimp.cache_stats_to_str("slow_square")) + +# %% [markdown] +# ## Force Cache from Disk +# +# Now we force the in‑memory cache to update from disk. This should repopulate our +# cache based on the disk copy. + +# %% +print("# Forcing cache from disk for 'slow_square'...") +hcacsimp.force_cache_from_disk("slow_square") + +print("\n# Cache stats:") +print(hcacsimp.cache_stats_to_str("slow_square")) + +# %% [markdown] +# ## Attempt to Reset Disk Cache +# +# The `reset_disk_cache` function is currently not implemented (it contains an assertion). +# We'll catch the expected error to confirm its behavior. + +# %% +try: + print( + "\nAttempting to reset disk cache for 'slow_square' (expected to fail)..." + ) + hcacsimp.reset_disk_cache("slow_square") +except AssertionError: + print("reset_disk_cache raised an AssertionError as expected.") + +# %% [markdown] +# # Dynamic parameters + +# %% [markdown] +# ## force_refresh + +# %% +print(hcacsimp.get_cache_perf_stats("slow_square")) +hcacsimp.reset_cache_perf("slow_square") +print(hcacsimp.get_cache_perf_stats("slow_square")) + +# %% +slow_square(4) + +# %% +print(hcacsimp.get_cache_perf_stats("slow_square")) + +# %% +# Force a recompute. +slow_square(4, force_refresh=True) + +# %% +print(hcacsimp.get_cache_perf_stats("slow_square")) + +# %% [markdown] +# ## abort_on_cache_miss + +# %% +hcacsimp.reset_cache(interactive=False) + +# %% +# This call doesn't abort since it's not a cache miss. +slow_square(4, abort_on_cache_miss=True) + +# %% +# This call aborts since it's a cache miss. +try: + slow_square(16, abort_on_cache_miss=True) +except ValueError as e: + print(e) + +# %% +slow_square(16, report_on_cache_miss=True) + +# %% diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb new file mode 100644 index 000000000..7b505f87e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb @@ -0,0 +1,424 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", + "metadata": {}, + "source": [ + "CONTENTS:\n", + "- [hgoogle_file_api.py](#hgoogle_file_api.py)\n", + " - [Get Credentials for your drive](#get-credentials-for-your-drive)\n", + " - [Get Tab/Sheet id of a particular google sheet](#get-tab/sheet-id-of-a-particular-google-sheet)\n", + " - [Freeze Rows](#freeze-rows)\n", + " - [Change the height of certin rows](#change-the-height-of-certin-rows)\n", + " - [Read some nice data](#read-some-nice-data)\n", + " - [Write this nice data](#write-this-nice-data)" + ] + }, + { + "cell_type": "markdown", + "id": "982ab891-de0a-47d5-946a-0f4fd3f16307", + "metadata": {}, + "source": [ + "\n", + "# hgoogle_file_api.py" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6b997caf-4bfc-47bc-b7e1-584f02da328f", + "metadata": {}, + "outputs": [], + "source": [ + "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade google-api-python-client)\"\n", + "# !sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade pip install oauth2client)\"\n", + "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade gspread)\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0fdf8a01-00ed-4e40-8b8b-3e4ecfe37d45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import importlib\n", + "import helpers.hgoogle_drive_api as hgodrapi\n", + "\n", + "importlib.reload(hgodrapi)" + ] + }, + { + "cell_type": "markdown", + "id": "f9733115-f65b-43fb-8b56-32be7588c617", + "metadata": {}, + "source": [ + "\n", + "## Get Credentials for your drive" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0f3eb12a-bd7e-4846-a8f0-331ece997137", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "google_creds = hgodrapi.get_credentials()\n", + "print(google_creds)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "feb74dae-ff52-44ce-b698-4c04cc2bc8f3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "service = hgodrapi.get_sheets_service(google_creds)\n", + "print(service)" + ] + }, + { + "cell_type": "markdown", + "id": "9e1c8840-c759-4bd6-a2c5-f30d94daf72b", + "metadata": {}, + "source": [ + "\n", + "## Get Tab/Sheet id of a particular google sheet" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "67fe7cc1-0f90-4b45-b93d-c6eaecd25028", + "metadata": {}, + "outputs": [], + "source": [ + "tab_name = \"cleaned_profiles_1\"\n", + "url = \"https://docs.google.com/spreadsheets/d/1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA/edit?gid=1687996260#gid=1687996260\"\n", + "sheet_id = \"1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA\"\n", + "credentials = google_creds" + ] + }, + { + "cell_type": "markdown", + "id": "f18db947-8170-4cba-8799-dfe792e1c732", + "metadata": {}, + "source": [ + "\n", + "## Freeze Rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "232a1ee0-83d2-4449-a8c0-a8e8eca02fc5", + "metadata": {}, + "outputs": [], + "source": [ + "row_indices = [0, 1, 2]\n", + "hgodrapi.freeze_rows(\n", + " credentials,\n", + " sheet_id=sheet_id,\n", + " row_indices=row_indices,\n", + " tab_name=tab_name,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "dd9b9b7d-2dc6-416d-bd9c-a8039fadaba2", + "metadata": {}, + "source": [ + "\n", + "## Change the height of certin rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50db6e3d-8d05-47ea-9ace-dc79ce131f37", + "metadata": {}, + "outputs": [], + "source": [ + "hgodrapi.set_row_height(\n", + " google_creds,\n", + " sheet_id=sheet_id,\n", + " height=20,\n", + " start_index=0,\n", + " end_index=2,\n", + " tab_name=tab_name,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3569d048-d69e-4e4b-ab53-a93b6f4a41d1", + "metadata": {}, + "source": [ + "\n", + "## Read some nice data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e63bede3-2948-4a37-b444-36b4dba81c6d", + "metadata": {}, + "outputs": [], + "source": [ + "nice_data = hgodrapi.from_gsheet(google_creds, url, tab_name=tab_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "59233081-ac03-4ac7-96b1-4de1b07fae75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameTitleFirmLocationPersonal Investment FocusGeographic FocusTypical Deal SizePreferred StagesNotable Personal AI InvestmentsPotential Fit with KaizenfirstNamelastName
0Michael MoritzManaging PartnerSequoia CapitalMenlo ParkAI/MLGlobal$10MEarly to GrowthGoogleHigh'''MichaelMoritz
1Navid AlipourManaging PartnerAnalytics VenturesSan DiegoAI/MLSan Diego$5MSeedCureMetrixHigh''NavidAlipour
2Aaref HilalyPartnerBain Capital VenturesPalo AltoReal-time AnalyticsBurlingameSeedSeed/Early StageRubrikMediumAarefHilaly
3Aaron FleishmanPrincipalTola CapitalSeattle WAEnterprise AIPNW$5M-$20MSeries ADatabricksHighAaronFleishman
4Aaron JacobsonPartnerNew Enterprise AssociatesMenlo ParkMLOpsNorth AmericaSeries A$10M-$30MDatabricksHighAaronJacobson
\n", + "
" + ], + "text/plain": [ + " Name Title Firm Location \\\n", + "0 Michael Moritz Managing Partner Sequoia Capital Menlo Park \n", + "1 Navid Alipour Managing Partner Analytics Ventures San Diego \n", + "2 Aaref Hilaly Partner Bain Capital Ventures Palo Alto \n", + "3 Aaron Fleishman Principal Tola Capital Seattle WA \n", + "4 Aaron Jacobson Partner New Enterprise Associates Menlo Park \n", + "\n", + " Personal Investment Focus Geographic Focus Typical Deal Size \\\n", + "0 AI/ML Global $10M \n", + "1 AI/ML San Diego $5M \n", + "2 Real-time Analytics Burlingame Seed \n", + "3 Enterprise AI PNW $5M-$20M \n", + "4 MLOps North America Series A \n", + "\n", + " Preferred Stages Notable Personal AI Investments Potential Fit with Kaizen \\\n", + "0 Early to Growth Google High''' \n", + "1 Seed CureMetrix High'' \n", + "2 Seed/Early Stage Rubrik Medium \n", + "3 Series A Databricks High \n", + "4 $10M-$30M Databricks High \n", + "\n", + " firstName lastName \n", + "0 Michael Moritz \n", + "1 Navid Alipour \n", + "2 Aaref Hilaly \n", + "3 Aaron Fleishman \n", + "4 Aaron Jacobson " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nice_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "30dcc791-cbdb-45f1-9298-a74e0a7babab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(100, 12)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nice_data.shape" + ] + }, + { + "cell_type": "markdown", + "id": "5c4cafb4-fe5f-4f6e-b594-759b199acb7e", + "metadata": {}, + "source": [ + "\n", + "## Write this nice data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b1f4a89-cb96-417a-86f4-ebc513c18510", + "metadata": {}, + "outputs": [], + "source": [ + "hgodrapi.to_gsheet(google_creds, nice_data, url, tab_name=\"testing_tab\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py new file mode 100644 index 000000000..a76ac9e94 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py @@ -0,0 +1,107 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# CONTENTS: +# - [hgoogle_file_api.py](#hgoogle_file_api.py) +# - [Get Credentials for your drive](#get-credentials-for-your-drive) +# - [Get Tab/Sheet id of a particular google sheet](#get-tab/sheet-id-of-a-particular-google-sheet) +# - [Freeze Rows](#freeze-rows) +# - [Change the height of certin rows](#change-the-height-of-certin-rows) +# - [Read some nice data](#read-some-nice-data) +# - [Write this nice data](#write-this-nice-data) + +# %% [markdown] +# +# # hgoogle_file_api.py + +# %% +# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade google-api-python-client)" +# # !sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade pip install oauth2client)" +# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade gspread)" + +# %% +import importlib +import helpers.hgoogle_drive_api as hgodrapi + +importlib.reload(hgodrapi) + +# %% [markdown] +# +# ## Get Credentials for your drive + +# %% +google_creds = hgodrapi.get_credentials() +print(google_creds) + +# %% +service = hgodrapi.get_sheets_service(google_creds) +print(service) + +# %% [markdown] +# +# ## Get Tab/Sheet id of a particular google sheet + +# %% +tab_name = "cleaned_profiles_1" +url = "https://docs.google.com/spreadsheets/d/1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA/edit?gid=1687996260#gid=1687996260" +sheet_id = "1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA" +credentials = google_creds + +# %% [markdown] +# +# ## Freeze Rows + +# %% +row_indices = [0, 1, 2] +hgodrapi.freeze_rows( + credentials, + sheet_id=sheet_id, + row_indices=row_indices, + tab_name=tab_name, +) + +# %% [markdown] +# +# ## Change the height of certin rows + +# %% +hgodrapi.set_row_height( + google_creds, + sheet_id=sheet_id, + height=20, + start_index=0, + end_index=2, + tab_name=tab_name, +) + +# %% [markdown] +# +# ## Read some nice data + +# %% +nice_data = hgodrapi.from_gsheet(google_creds, url, tab_name=tab_name) + +# %% +nice_data.head() + +# %% +nice_data.shape + +# %% [markdown] +# +# ## Write this nice data + +# %% +hgodrapi.to_gsheet(google_creds, nice_data, url, tab_name="testing_tab") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb new file mode 100644 index 000000000..3bb70bdef --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb @@ -0,0 +1,13040 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "CONTENTS:\n", + "- [Description](#description)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Description\n", + "\n", + "This notebook examines ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet jupyterlab-vim)\"\n", + "#!jupyter labextension enable" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2021-04-02T18:11:14.828251Z", + "start_time": "2021-04-02T18:11:14.514771Z" + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import logging\n", + "\n", + "import helpers.hdbg as hdbg\n", + "import helpers.henv as henv" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2021-04-02T18:11:24.635995Z", + "start_time": "2021-04-02T18:11:18.239237Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# System signature\n", + " # Container version\n", + " container_version='1.2.0'\n", + " changelog_version='2.0.0'\n", + " # Git info\n", + " branch_name='CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI'\n", + " hash='0ca93d8c'\n", + " # Last commits:\n", + " * 0ca93d8c GP Saggese Merge ( 5 minutes ago) Fri May 9 22:09:03 2025 (HEAD -> CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI, origin/CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI)\n", + " |\\ \n", + " * | 99cbbf22 GP Saggese Lint ( 6 minutes ago) Fri May 9 22:08:07 2025 \n", + " | * 27b38c48 GP Saggese CmampTask12067_Read_docs_about_DataPull_4 (#698) ( 8 minutes ago) Fri May 9 22:06:25 2025 (origin/master, origin/HEAD, master)\n", + " # Platform info\n", + " system=Linux\n", + " node name=0f79e8b845ee\n", + " release=6.10.14-linuxkit\n", + " version=#1 SMP Thu Mar 20 16:32:56 UTC 2025\n", + " machine=aarch64\n", + " processor=aarch64\n", + " # psutils info\n", + " cpu count=8\n", + " cpu freq=None\n", + " memory=svmem(total=16749285376, available=14575529984, percent=13.0, used=1910644736, free=9673363456, active=2843516928, inactive=3252117504, buffers=490647552, cached=4674629632, shared=1093632, slab=694362112)\n", + " disk usage=sdiskusage(total=270233210880, used=102272610304, free=154199986176, percent=39.9)\n", + " # Docker info\n", + " has_docker=True\n", + " docker_version='28.0.4'\n", + " docker_needs_sudo=False\n", + " has_privileged_mode=True\n", + " is_inside_docker=True\n", + " has_docker_sibling_containers_support=True\n", + " has_docker_children_containers_support=True\n", + " # Packages\n", + " python: 3.12.3\n", + " cvxopt: ?\n", + " cvxpy: ?\n", + " gluonnlp: ?\n", + " gluonts: ?\n", + " joblib: 1.4.2\n", + " mxnet: ?\n", + " numpy: 2.2.3\n", + " pandas: 2.2.3\n", + " pyarrow: 19.0.1\n", + " scipy: 1.15.2\n", + " seaborn: 0.13.2\n", + " sklearn: 1.6.1\n", + " statsmodels: 0.14.4\n" + ] + } + ], + "source": [ + "print(henv.get_system_signature()[0])\n", + "\n", + "hnotebook.config_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2021-04-02T18:11:24.668793Z", + "start_time": "2021-04-02T18:11:24.638503Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Running in Jupyter\n", + "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-0f2f4a10-7f18-4858-af02-b60808101345.json'\n" + ] + } + ], + "source": [ + "# hdbg.init_logger(verbosity=logging.DEBUG)\n", + "hdbg.init_logger(verbosity=logging.INFO)\n", + "# hdbg.test_logger()\n", + "_LOG = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet openai requests)\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import helpers.hllm as hllm\n", + "import helpers.hpandas as hpandas" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "val = hllm.get_model_stats()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'architecture': {'input_modalities': ['text', 'image'],\n", + " 'instruct_type': None,\n", + " 'modality': 'text+image->text',\n", + " 'output_modalities': ['text'],\n", + " 'tokenizer': 'Mistral'},\n", + " 'context_length': 131072,\n", + " 'created': 1746627341,\n", + " 'description': 'Mistral Medium 3 is a high-performance enterprise-grade '\n", + " 'language model designed to deliver frontier-level '\n", + " 'capabilities at significantly reduced operational cost. It '\n", + " 'balances state-of-the-art reasoning and multimodal '\n", + " 'performance with 8× lower cost compared to traditional large '\n", + " 'models, making it suitable for scalable deployments across '\n", + " 'professional and industrial use cases.\\n'\n", + " '\\n'\n", + " 'The model excels in domains such as coding, STEM reasoning, '\n", + " 'and enterprise adaptation. It supports hybrid, on-prem, and '\n", + " 'in-VPC deployments and is optimized for integration into '\n", + " 'custom workflows. Mistral Medium 3 offers competitive '\n", + " 'accuracy relative to larger models like Claude Sonnet '\n", + " '3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining '\n", + " 'broad compatibility across cloud environments.',\n", + " 'id': 'mistralai/mistral-medium-3',\n", + " 'name': 'Mistral: Mistral Medium 3',\n", + " 'per_request_limits': None,\n", + " 'pricing': {'completion': '0.000002',\n", + " 'image': '0',\n", + " 'internal_reasoning': '0',\n", + " 'prompt': '0.0000004',\n", + " 'request': '0',\n", + " 'web_search': '0'},\n", + " 'supported_parameters': ['tools',\n", + " 'tool_choice',\n", + " 'max_tokens',\n", + " 'temperature',\n", + " 'top_p',\n", + " 'stop',\n", + " 'frequency_penalty',\n", + " 'presence_penalty',\n", + " 'response_format',\n", + " 'structured_outputs',\n", + " 'seed'],\n", + " 'top_provider': {'context_length': 131072,\n", + " 'is_moderated': False,\n", + " 'max_completion_tokens': None}}\n" + ] + } + ], + "source": [ + "import pprint\n", + "\n", + "pprint.pprint(val[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecreateddescriptioncontext_lengthper_request_limitssupported_parametersarchitecture_modalityarchitecture_input_modalitiesarchitecture_output_modalitiesarchitecture_tokenizerarchitecture_instruct_typepricing_promptpricing_completionpricing_requestpricing_imagepricing_web_searchpricing_internal_reasoningtop_provider_context_lengthtop_provider_max_completion_tokenstop_provider_is_moderatedpricing_input_cache_readpricing_input_cache_write
0mistralai/mistral-medium-3Mistral: Mistral Medium 31746627341Mistral Medium 3 is a high-performance enterpr...131072None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone0.00000040.0000020000131072.0NaNFalseNaNNaN
1google/gemini-2.5-pro-previewGoogle: Gemini 2.5 Pro Preview1746578513Gemini 2.5 Pro is Google’s state-of-the-art AI...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[text, image, file][text]GeminiNone0.000001250.0000100.00516001048576.065535.0False0.000000310.000001625
2arcee-ai/caller-largeArcee AI: Caller Large1746487869Caller Large is Arcee's specialist \"function‑c...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.000000550.00000085000032768.0NaNFalseNaNNaN
3arcee-ai/spotlightArcee AI: Spotlight1746481552Spotlight is a 7‑billion‑parameter vision‑lang...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[image, text][text]OtherNone0.000000180.000000180000131072.065537.0FalseNaNNaN
4arcee-ai/maestro-reasoningArcee AI: Maestro Reasoning1746481269Maestro Reasoning is Arcee's flagship analysis...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000090.00000330000131072.032000.0FalseNaNNaN
5arcee-ai/virtuoso-largeArcee AI: Virtuoso Large1746478885Virtuoso‑Large is Arcee's top‑tier general‑pur...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000750.00000120000131072.064000.0FalseNaNNaN
6arcee-ai/coder-largeArcee AI: Coder Large1746478663Coder‑Large is a 32 B‑parameter offspring of Q...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000050.0000008000032768.0NaNFalseNaNNaN
7arcee-ai/virtuoso-medium-v2Arcee AI: Virtuoso Medium V21746478434Virtuoso‑Medium‑v2 is a 32 B model distilled f...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000050.00000080000131072.032768.0FalseNaNNaN
8arcee-ai/arcee-blitzArcee AI: Arcee Blitz1746470100Arcee Blitz is a 24 B‑parameter dense model di...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000450.00000075000032768.0NaNFalseNaNNaN
9microsoft/phi-4-reasoning-plus:freeMicrosoft: Phi 4 Reasoning Plus (free)1746130961Phi-4-reasoning-plus is an enhanced 14B parame...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
10microsoft/phi-4-reasoning-plusMicrosoft: Phi 4 Reasoning Plus1746130961Phi-4-reasoning-plus is an enhanced 14B parame...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.000000070.00000035000032768.0NaNFalseNaNNaN
11microsoft/phi-4-reasoning:freeMicrosoft: Phi 4 Reasoning (free)1746121275Phi-4-reasoning is a 14B parameter dense decod...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
12qwen/qwen3-0.6b-04-28:freeQwen: Qwen3 0.6B (free)1746043526Qwen3-0.6B is a lightweight, 0.6 billion param...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000032000.0NaNFalseNaNNaN
13inception/mercury-coder-small-betaInception: Mercury Coder Small Beta1746033880Mercury Coder Small is the first diffusion lar...32000None[max_tokens, frequency_penalty, presence_penal...text->text[text][text]OtherNone0.000000250.000001000032000.0NaNFalseNaNNaN
14qwen/qwen3-1.7b:freeQwen: Qwen3 1.7B (free)1746031388Qwen3-1.7B is a compact, 1.7 billion parameter...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000032000.0NaNFalseNaNNaN
15qwen/qwen3-4b:freeQwen: Qwen3 4B (free)1746031104Qwen3-4B is a 4 billion parameter dense langua...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None000000128000.0NaNFalseNaNNaN
16opengvlab/internvl3-14b:freeOpenGVLab: InternVL3 14B (free)1746021355The 14b version of the InternVL3 series. An ad...32000None[max_tokens, temperature, top_p]text+image->text[image, text][text]OtherNone00000032000.0NaNFalseNaNNaN
17opengvlab/internvl3-2b:freeOpenGVLab: InternVL3 2B (free)1746019807The 2b version of the InternVL3 series, for an...32000None[max_tokens, temperature, top_p]text+image->text[image, text][text]OtherNone00000032000.0NaNFalseNaNNaN
18deepseek/deepseek-prover-v2:freeDeepSeek: DeepSeek Prover V2 (free)1746013094DeepSeek Prover V2 is a 671B parameter model, ...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
19deepseek/deepseek-prover-v2DeepSeek: DeepSeek Prover V21746013094DeepSeek Prover V2 is a 671B parameter model, ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone0.00000050.000002180000131072.0NaNFalseNaNNaN
20meta-llama/llama-guard-4-12bMeta: Llama Guard 4 12B1745975193Llama Guard 4 is a Llama 4 Scout-derived multi...163840None[max_tokens, temperature, top_p, stop, frequen...text+image->text[image, text][text]OtherNone0.000000050.000000050000163840.0NaNFalseNaNNaN
21qwen/qwen3-30b-a3b:freeQwen: Qwen3 30B A3B (free)1745878604Qwen3, the latest generation in the Qwen large...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
22qwen/qwen3-30b-a3bQwen: Qwen3 30B A3B1745878604Qwen3, the latest generation in the Qwen large...40960None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Qwen3None0.00000010.0000003000040960.040960.0FalseNaNNaN
23qwen/qwen3-8b:freeQwen: Qwen3 8B (free)1745876632Qwen3-8B is a dense 8.2B parameter causal lang...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.040960.0FalseNaNNaN
24qwen/qwen3-8bQwen: Qwen3 8B1745876632Qwen3-8B is a dense 8.2B parameter causal lang...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.0000000350.0000001380000128000.0NaNFalseNaNNaN
25qwen/qwen3-14b:freeQwen: Qwen3 14B (free)1745876478Qwen3-14B is a dense 14.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
26qwen/qwen3-14bQwen: Qwen3 14B1745876478Qwen3-14B is a dense 14.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.000000070.00000024000040960.040960.0FalseNaNNaN
27qwen/qwen3-32b:freeQwen: Qwen3 32B (free)1745875945Qwen3-32B is a dense 32.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
28qwen/qwen3-32bQwen: Qwen3 32B1745875945Qwen3-32B is a dense 32.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.00000010.0000003000040960.0NaNFalseNaNNaN
29qwen/qwen3-235b-a22b:freeQwen: Qwen3 235B A22B (free)1745875757Qwen3-235B-A22B is a 235B parameter mixture-of...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
30qwen/qwen3-235b-a22bQwen: Qwen3 235B A22B1745875757Qwen3-235B-A22B is a 235B parameter mixture-of...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.000000140.000002000040960.040960.0FalseNaNNaN
31tngtech/deepseek-r1t-chimera:freeTNG: DeepSeek R1T Chimera (free)1745760875DeepSeek-R1T-Chimera is created by merging Dee...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
32thudm/glm-z1-rumination-32bTHUDM: GLM Z1 Rumination 32B1745601495THUDM: GLM Z1 Rumination 32B is a 32B-paramete...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000240.00000024000032000.0NaNFalseNaNNaN
33thudm/glm-z1-9b:freeTHUDM: GLM Z1 9B (free)1745601140GLM-Z1-9B-0414 is a 9B-parameter language mode...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032000.0NaNFalseNaNNaN
34thudm/glm-4-9b:freeTHUDM: GLM 4 9B (free)1745601023GLM-4-9B-0414 is a 9 billion parameter languag...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032000.0NaNFalseNaNNaN
35microsoft/mai-ds-r1:freeMicrosoft: MAI DS R1 (free)1745194100MAI-DS-R1 is a post-trained variant of DeepSee...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
36thudm/glm-z1-32b:freeTHUDM: GLM Z1 32B (free)1744924148GLM-Z1-32B-0414 is an enhanced reasoning varia...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
37thudm/glm-z1-32bTHUDM: GLM Z1 32B1744924148GLM-Z1-32B-0414 is an enhanced reasoning varia...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000240.00000024000032000.0NaNFalseNaNNaN
38thudm/glm-4-32b:freeTHUDM: GLM 4 32B (free)1744920915GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
39thudm/glm-4-32bTHUDM: GLM 4 32B1744920915GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000240.00000024000032000.0NaNFalseNaNNaN
40google/gemini-2.5-flash-previewGoogle: Gemini 2.5 Flash Preview1744914667Gemini 2.5 Flash is Google's state-of-the-art ...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[image, text, file][text]GeminiNone0.000000150.000000600.0006192001048576.065535.0False0.00000003750.0000002333
41google/gemini-2.5-flash-preview:thinkingGoogle: Gemini 2.5 Flash Preview (thinking)1744914667Gemini 2.5 Flash is Google's state-of-the-art ...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[image, text, file][text]GeminiNone0.000000150.000003500.0006192001048576.065535.0False0.00000003750.0000002333
42openai/o4-mini-highOpenAI: o4 Mini High1744824212OpenAI o4-mini-high is the same model as [o4-m...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text, file][text]OtherNone0.00000110.000004400.000841500200000.0100000.0True0.000000275NaN
43openai/o3OpenAI: o31744823457o3 is a well-rounded and powerful model across...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text, file][text]OtherNone0.000010.0000400.0076500200000.0100000.0True0.0000025NaN
44openai/o4-miniOpenAI: o4 Mini1744820942OpenAI o4-mini is a compact reasoning model in...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text][text]OtherNone0.00000110.000004400.000841500200000.0100000.0True0.000000275NaN
45shisa-ai/shisa-v2-llama3.3-70b:freeShisa AI: Shisa V2 Llama 3.3 70B (free)1744754858Shisa V2 Llama 3.3 70B is a bilingual Japanese...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3None00000032768.0NaNFalseNaNNaN
46qwen/qwen2.5-coder-7b-instructQwen: Qwen2.5 Coder 7B Instruct1744734887Qwen2.5-Coder-7B-Instruct is a 7B parameter in...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]QwenNone0.000000010.00000003000032768.0NaNFalseNaNNaN
47openai/gpt-4.1OpenAI: GPT-4.11744651385GPT-4.1 is a flagship large language model opt...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.0000020.00000800001047576.032768.0True0.0000005NaN
48openai/gpt-4.1-miniOpenAI: GPT-4.1 Mini1744651381GPT-4.1 Mini is a mid-sized model delivering p...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.00000040.000001600001047576.032768.0True0.0000001NaN
49openai/gpt-4.1-nanoOpenAI: GPT-4.1 Nano1744651369For tasks that demand low latency, GPT‑4.1 nan...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.00000010.000000400001047576.032768.0True0.000000025NaN
50eleutherai/llemma_7bEleutherAI: Llemma 7b1744643225Llemma 7B is a language model for mathematics....4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Othercode-llama0.00000080.000001200004096.04096.0FalseNaNNaN
51alfredpros/codellama-7b-instruct-solidityAlfredPros: CodeLLaMa 7B Instruct Solidity1744641874A finetuned 7 billion parameters Code LLaMA - ...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otheralpaca0.00000080.000001200004096.04096.0FalseNaNNaN
52arliai/qwq-32b-arliai-rpr-v1:freeArliAI: QwQ 32B RpR v1 (free)1744555982QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
53agentica-org/deepcoder-14b-preview:freeAgentica: Deepcoder 14B Preview (free)1744555395DeepCoder-14B-Preview is a 14B parameter code ...96000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000096000.0NaNFalseNaNNaN
54moonshotai/kimi-vl-a3b-thinking:freeMoonshot AI: Kimi VL A3B Thinking (free)1744304841Kimi-VL is a lightweight Mixture-of-Experts vi...131072None[max_tokens, temperature, top_p, reasoning, in...text+image->text[image, text][text]OtherNone000000131072.0NaNFalseNaNNaN
55x-ai/grok-3-mini-betaxAI: Grok 3 Mini Beta1744240195Grok 3 Mini is a lightweight, smaller thinking...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.00000030.00000050000131072.0NaNFalseNaNNaN
56x-ai/grok-3-betaxAI: Grok 3 Beta1744240068Grok 3 is the latest model from xAI. It's thei...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000030.0000150000131072.0NaNFalseNaNNaN
57nvidia/llama-3.3-nemotron-super-49b-v1:freeNVIDIA: Llama 3.3 Nemotron Super 49B v1 (free)1744119494Llama-3.3-Nemotron-Super-49B-v1 is a large lan...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone000000131072.0NaNFalseNaNNaN
58nvidia/llama-3.3-nemotron-super-49b-v1NVIDIA: Llama 3.3 Nemotron Super 49B v11744119494Llama-3.3-Nemotron-Super-49B-v1 is a large lan...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000130.00000040000131072.0NaNFalseNaNNaN
59nvidia/llama-3.1-nemotron-ultra-253b-v1:freeNVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free)1744115059Llama-3.1-Nemotron-Ultra-253B-v1 is a large la...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3None000000131072.0NaNFalseNaNNaN
60meta-llama/llama-4-maverick:freeMeta: Llama 4 Maverick (free)1743881822Llama 4 Maverick 17B Instruct (128E) is a high...256000None[max_tokens, temperature, top_p, structured_ou...text+image->text[text, image][text]OtherNone000000256000.0NaNFalseNaNNaN
61meta-llama/llama-4-maverickMeta: Llama 4 Maverick1743881822Llama 4 Maverick 17B Instruct (128E) is a high...1048576None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0.000000170.000000600.0006684001048576.016384.0FalseNaNNaN
62meta-llama/llama-4-scout:freeMeta: Llama 4 Scout (free)1743881519Llama 4 Scout 17B Instruct (16E) is a mixture-...512000None[max_tokens, temperature, top_p, structured_ou...text+image->text[text, image][text]OtherNone000000512000.0NaNFalseNaNNaN
63meta-llama/llama-4-scoutMeta: Llama 4 Scout1743881519Llama 4 Scout 17B Instruct (16E) is a mixture-...1048576None[max_tokens, temperature, top_p, presence_pena...text+image->text[text, image][text]OtherNone0.000000080.000000300001048576.01048576.0FalseNaNNaN
64all-hands/openhands-lm-32b-v0.1OpenHands LM 32B V0.11743613013OpenHands LM v0.1 is a 32B open-source coding ...16384None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.00000260.0000034000016384.04096.0FalseNaNNaN
65mistral/ministral-8bMistral: Ministral 8B1743430021Ministral 8B is a state-of-the-art language mo...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000010.00000010000131072.0NaNFalseNaNNaN
66deepseek/deepseek-v3-base:freeDeepSeek: DeepSeek V3 Base (free)1743272023Note that this is a base model mostly meant fo...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
67scb10x/llama3.1-typhoon2-8b-instructTyphoon2 8B Instruct1743196511Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000180.0000001800008192.0NaNFalseNaNNaN
68scb10x/llama3.1-typhoon2-70b-instructTyphoon2 70B Instruct1743196170Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000880.0000008800008192.0NaNFalseNaNNaN
69allenai/molmo-7b-d:freeAllenAI: Molmo 7B D (free)1743023247Molmo is a family of open vision-language mode...4096None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0000004096.0NaNFalseNaNNaN
70bytedance-research/ui-tars-72b:freeBytedance: UI-TARS 72B (free)1743020065UI-TARS 72B is an open-source multimodal AI mo...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone00000032768.0NaNFalseNaNNaN
71qwen/qwen2.5-vl-3b-instruct:freeQwen: Qwen2.5 VL 3B Instruct (free)1743014573Qwen2.5 VL 3B is a multimodal LLM from the Qwe...64000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone00000064000.0NaNFalseNaNNaN
72google/gemini-2.5-pro-exp-03-25Google: Gemini 2.5 Pro Experimental1742922099Gemini 2.5 Pro is Google’s state-of-the-art AI...1000000None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[text, image, file][text]GeminiNone0000001000000.065535.0FalseNaNNaN
73qwen/qwen2.5-vl-32b-instruct:freeQwen: Qwen2.5 VL 32B Instruct (free)1742839838Qwen2.5-VL-32B is a multimodal vision-language...8192None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0000008192.0NaNFalseNaNNaN
74qwen/qwen2.5-vl-32b-instructQwen: Qwen2.5 VL 32B Instruct1742839838Qwen2.5-VL-32B is a multimodal vision-language...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000090.00000090000128000.0NaNFalseNaNNaN
75deepseek/deepseek-chat-v3-0324:freeDeepSeek: DeepSeek V3 0324 (free)1742824755DeepSeek V3, a 685B-parameter, mixture-of-expe...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
76deepseek/deepseek-chat-v3-0324DeepSeek: DeepSeek V3 03241742824755DeepSeek V3, a 685B-parameter, mixture-of-expe...163840None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]DeepSeekNone0.00000030.000000880000163840.0NaNFalseNaNNaN
77featherless/qwerky-72b:freeQwerky 72B (free)1742481597Qwerky-72B is a linear-attention RWKV variant ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.04096.0FalseNaNNaN
78openai/o1-proOpenAI: o1-pro1742423211The o1 series of models are trained with reinf...200000None[max_tokens, temperature, top_p, reasoning, in...text+image->text[text, image][text]GPTNone0.000150.000600.2167500200000.0100000.0TrueNaNNaN
79mistralai/mistral-small-3.1-24b-instruct:freeMistral: Mistral Small 3.1 24B (free)1742238937Mistral Small 3.1 24B Instruct is an upgraded ...96000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone00000096000.096000.0FalseNaNNaN
80mistralai/mistral-small-3.1-24b-instructMistral: Mistral Small 3.1 24B1742238937Mistral Small 3.1 24B Instruct is an upgraded ...131072None[max_tokens, temperature, top_p, presence_pena...text+image->text[text, image][text]MistralNone0.000000050.000000150000131072.0NaNFalseNaNNaN
81open-r1/olympiccoder-32b:freeOlympicCoder 32B (free)1742077228OlympicCoder-32B is a high-performing open-sou...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
82google/gemma-3-1b-it:freeGoogle: Gemma 3 1B (free)1741963556Gemma 3 1B is the smallest of the new Gemma 3 ...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma00000032768.08192.0FalseNaNNaN
83google/gemma-3-4b-it:freeGoogle: Gemma 3 4B (free)1741905510Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma000000131072.08192.0FalseNaNNaN
84google/gemma-3-4b-itGoogle: Gemma 3 4B1741905510Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.000000020.000000040000131072.0NaNFalseNaNNaN
85ai21/jamba-1.6-largeAI21: Jamba 1.6 Large1741905173AI21 Jamba Large 1.6 is a high-performance hyb...256000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.0000020.0000080000256000.04096.0FalseNaNNaN
86ai21/jamba-1.6-miniAI21: Jamba Mini 1.61741905171AI21 Jamba Mini 1.6 is a hybrid foundation mod...256000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.00000020.00000040000256000.04096.0FalseNaNNaN
87google/gemma-3-12b-it:freeGoogle: Gemma 3 12B (free)1741902625Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma000000131072.08192.0FalseNaNNaN
88google/gemma-3-12b-itGoogle: Gemma 3 12B1741902625Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.000000050.00000010000131072.0NaNFalseNaNNaN
89cohere/command-aCohere: Command A1741894342Command A is an open-weights 111B parameter mo...256000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000250.000010000256000.08192.0FalseNaNNaN
90openai/gpt-4o-mini-search-previewOpenAI: GPT-4o-mini Search Preview1741818122GPT-4o mini Search Preview is a specialized mo...128000None[web_search_options, max_tokens, response_form...text->text[text][text]GPTNone0.000000150.00000060.02750.00021700128000.016384.0TrueNaNNaN
91openai/gpt-4o-search-previewOpenAI: GPT-4o Search Preview1741817949GPT-4o Search Previewis a specialized model fo...128000None[web_search_options, max_tokens, response_form...text->text[text][text]GPTNone0.00000250.000010.0350.00361300128000.016384.0TrueNaNNaN
92rekaai/reka-flash-3:freeReka: Flash 3 (free)1741812813Reka Flash 3 is a general-purpose, instruction...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
93google/gemma-3-27b-it:freeGoogle: Gemma 3 27B (free)1741756359Gemma 3 introduces multimodality, supporting v...96000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma00000096000.08192.0FalseNaNNaN
94google/gemma-3-27b-itGoogle: Gemma 3 27B1741756359Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.00000010.000000200.000025600131072.016384.0FalseNaNNaN
95thedrummer/anubis-pro-105b-v1TheDrummer: Anubis Pro 105B V11741642290Anubis Pro 105B v1 is an expanded and refined ...131072None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]OtherNone0.00000080.0000010000131072.0131072.0FalseNaNNaN
96thedrummer/skyfall-36b-v2TheDrummer: Skyfall 36B V21741636566Skyfall 36B v2 is an enhanced iteration of Mis...32768None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]OtherNone0.00000050.0000008000032768.032768.0FalseNaNNaN
97microsoft/phi-4-multimodal-instructMicrosoft: Phi 4 Multimodal Instruct1741396284Phi-4 Multimodal Instruct is a versatile 5.6B ...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0.000000050.000000100.0001768500131072.0NaNFalseNaNNaN
98perplexity/sonar-reasoning-proPerplexity: Sonar Reasoning Pro1741313308Note: Sonar Pro pricing includes Perplexity se...128000None[max_tokens, temperature, top_p, reasoning, in...text+image->text[text, image][text]Otherdeepseek-r10.0000020.000008000.0050128000.0NaNFalseNaNNaN
99perplexity/sonar-proPerplexity: Sonar Pro1741312423Note: Sonar Pro pricing includes Perplexity se...200000None[max_tokens, temperature, top_p, web_search_op...text+image->text[text, image][text]OtherNone0.0000030.000015000.0050200000.08000.0FalseNaNNaN
100perplexity/sonar-deep-researchPerplexity: Sonar Deep Research1741311246Sonar Deep Research is a research-focused mode...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.0000020.000008000.0050.000003128000.0NaNFalseNaNNaN
101deepseek/deepseek-r1-zero:freeDeepSeek: DeepSeek R1 Zero (free)1741297434DeepSeek-R1-Zero is a model trained via large-...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r1000000163840.0NaNFalseNaNNaN
102qwen/qwq-32b:freeQwen: QwQ 32B (free)1741208814QwQ is the reasoning model of the Qwen series....40000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwenqwq00000040000.040000.0FalseNaNNaN
103qwen/qwq-32bQwen: QwQ 32B1741208814QwQ is the reasoning model of the Qwen series....131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwenqwq0.000000150.00000020000131072.0NaNFalseNaNNaN
104moonshotai/moonlight-16b-a3b-instruct:freeMoonshot AI: Moonlight 16B A3B Instruct (free)1740719801Moonlight-16B-A3B-Instruct is a 16B-parameter ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0000008192.0NaNFalseNaNNaN
105nousresearch/deephermes-3-llama-3-8b-preview:freeNous: DeepHermes 3 Llama 3 8B Preview (free)1740719372DeepHermes 3 Preview is the latest version of ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone000000131072.0NaNFalseNaNNaN
106openai/gpt-4.5-previewOpenAI: GPT-4.5 (Preview)1740687810GPT-4.5 (Preview) is a research preview of Ope...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GPTNone0.0000750.0001500.10837500128000.016384.0True0.0000375NaN
107google/gemini-2.0-flash-lite-001Google: Gemini 2.0 Flash Lite1740506212Gemini 2.0 Flash Lite offers a significantly f...1048576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GeminiNone0.0000000750.000000300001048576.08192.0FalseNaNNaN
108anthropic/claude-3.7-sonnetAnthropic: Claude 3.7 Sonnet1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.064000.0False0.00000030.00000375
109anthropic/claude-3.7-sonnet:thinkingAnthropic: Claude 3.7 Sonnet (thinking)1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.064000.0False0.00000030.00000375
110anthropic/claude-3.7-sonnet:betaAnthropic: Claude 3.7 Sonnet (self-moderated)1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[max_tokens, temperature, stop, reasoning, inc...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.0128000.0False0.00000030.00000375
111perplexity/r1-1776Perplexity: R1 17761740004929R1 1776 is a version of DeepSeek-R1 that has b...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r10.0000020.0000080000128000.0NaNFalseNaNNaN
112mistralai/mistral-sabaMistral: Saba1739803239Mistral Saba is a 24B-parameter language model...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000020.0000006000032768.0NaNFalseNaNNaN
113cognitivecomputations/dolphin3.0-r1-mistral-24...Dolphin3.0 R1 Mistral 24B (free)1739462498Dolphin 3.0 R1 is the next generation of the D...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
114cognitivecomputations/dolphin3.0-mistral-24b:freeDolphin3.0 Mistral 24B (free)1739462019Dolphin 3.0 is the next generation of the Dolp...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
115meta-llama/llama-guard-3-8bLlama Guard 3 8B1739401318Llama Guard 3 is a Llama-3.1-8B pretrained mod...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.000000020.000000060000131072.0NaNFalseNaNNaN
116openai/o3-mini-highOpenAI: o3 Mini High1739372611OpenAI o3-mini-high is the same model as [o3-m...200000None[tools, tool_choice, seed, max_tokens, respons...text->text[text][text]OtherNone0.00000110.00000440000200000.0100000.0True0.00000055NaN
117deepseek/deepseek-r1-distill-llama-8bDeepSeek: R1 Distill Llama 8B1738937718DeepSeek R1 Distill Llama 8B is a distilled la...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10.000000040.00000004000032000.032000.0FalseNaNNaN
118google/gemini-2.0-flash-001Google: Gemini 2.0 Flash1738769413Gemini Flash 2.0 offers a significantly faster...1000000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GeminiNone0.00000010.000000400.0000258001000000.08192.0False0.0000000250.0000001833
119qwen/qwen-vl-plusQwen: Qwen VL Plus1738731255Qwen's Enhanced Large Visual Language Model. S...7500None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0.000000210.0000006300.0002688007500.01500.0FalseNaNNaN
120aion-labs/aion-1.0AionLabs: Aion-1.01738697557Aion-1.0 is a multi-model system designed for ...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.0000040.0000080000131072.032768.0FalseNaNNaN
121aion-labs/aion-1.0-miniAionLabs: Aion-1.0-Mini1738697107Aion-1.0-Mini 32B parameter model is a distill...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.00000070.00000140000131072.032768.0FalseNaNNaN
122aion-labs/aion-rp-llama-3.1-8bAionLabs: Aion-RP 1.0 (8B)1738696718Aion-RP-Llama-3.1-8B ranks the highest in the ...32768None[max_tokens, temperature, top_p]text->text[text][text]OtherNone0.00000020.0000002000032768.032768.0FalseNaNNaN
123qwen/qwen-vl-maxQwen: Qwen VL Max1738434304Qwen VL Max is a visual understanding model wi...7500None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0.00000080.000003200.001024007500.01500.0FalseNaNNaN
124qwen/qwen-turboQwen: Qwen-Turbo1738410974Qwen-Turbo, based on Qwen2.5, is a 1M context ...1000000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.000000050.000000200001000000.08192.0FalseNaNNaN
125qwen/qwen2.5-vl-72b-instruct:freeQwen: Qwen2.5 VL 72B Instruct (free)1738410311Qwen2.5-VL is proficient in recognizing common...131072None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone000000131072.02048.0FalseNaNNaN
126qwen/qwen2.5-vl-72b-instructQwen: Qwen2.5 VL 72B Instruct1738410311Qwen2.5-VL is proficient in recognizing common...32000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.000000250.00000075000032000.0NaNFalseNaNNaN
127qwen/qwen-plusQwen: Qwen-Plus1738409840Qwen-Plus, based on the Qwen2.5 foundation mod...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.00000040.00000120000131072.08192.0FalseNaNNaN
128qwen/qwen-maxQwen: Qwen-Max1738402289Qwen-Max, based on Qwen2.5, provides the best ...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.00000160.0000064000032768.08192.0FalseNaNNaN
129openai/o3-miniOpenAI: o3 Mini1738351721OpenAI o3-mini is a cost-efficient language mo...200000None[tools, tool_choice, seed, max_tokens, respons...text->text[text][text]OtherNone0.00000110.00000440000200000.0100000.0True0.00000055NaN
130deepseek/deepseek-r1-distill-qwen-1.5bDeepSeek: R1 Distill Qwen 1.5B1738328067DeepSeek R1 Distill Qwen 1.5B is a distilled l...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000180.000000180000131072.032768.0FalseNaNNaN
131mistralai/mistral-small-24b-instruct-2501:freeMistral: Mistral Small 3 (free)1738255409Mistral Small 3 is a 24B-parameter language mo...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]MistralNone00000032768.0NaNFalseNaNNaN
132mistralai/mistral-small-24b-instruct-2501Mistral: Mistral Small 31738255409Mistral Small 3 is a 24B-parameter language mo...28000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]MistralNone0.000000060.00000012000028000.014000.0FalseNaNNaN
133deepseek/deepseek-r1-distill-qwen-32b:freeDeepSeek: R1 Distill Qwen 32B (free)1738194830DeepSeek R1 Distill Qwen 32B is a distilled la...16000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r100000016000.016000.0FalseNaNNaN
134deepseek/deepseek-r1-distill-qwen-32bDeepSeek: R1 Distill Qwen 32B1738194830DeepSeek R1 Distill Qwen 32B is a distilled la...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r10.000000120.000000180000131072.016384.0FalseNaNNaN
135deepseek/deepseek-r1-distill-qwen-14b:freeDeepSeek: R1 Distill Qwen 14B (free)1738193940DeepSeek R1 Distill Qwen 14B is a distilled la...64000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r100000064000.0NaNFalseNaNNaN
136deepseek/deepseek-r1-distill-qwen-14bDeepSeek: R1 Distill Qwen 14B1738193940DeepSeek R1 Distill Qwen 14B is a distilled la...64000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r10.000000150.00000015000064000.064000.0FalseNaNNaN
137perplexity/sonar-reasoningPerplexity: Sonar Reasoning1738131107Sonar Reasoning is a reasoning model provided ...127000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.0000010.0000050.005000127000.0NaNFalseNaNNaN
138perplexity/sonarPerplexity: Sonar1738013808Sonar is lightweight, affordable, fast, and si...127072None[max_tokens, temperature, top_p, web_search_op...text+image->text[text, image][text]OtherNone0.0000010.0000010.005000127072.0NaNFalseNaNNaN
139liquid/lfm-7bLiquid: LFM 7B1737806883LFM-7B, a new best-in-class language model. LF...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000010.00000001000032768.0NaNFalseNaNNaN
140liquid/lfm-3bLiquid: LFM 3B1737806501Liquid's LFM 3B delivers incredible performanc...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000020.00000002000032768.0NaNFalseNaNNaN
141deepseek/deepseek-r1-distill-llama-70b:freeDeepSeek: R1 Distill Llama 70B (free)1737663169DeepSeek R1 Distill Llama 70B is a distilled l...8192None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10000008192.04096.0FalseNaNNaN
142deepseek/deepseek-r1-distill-llama-70bDeepSeek: R1 Distill Llama 70B1737663169DeepSeek R1 Distill Llama 70B is a distilled l...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10.00000010.00000040000131072.016384.0FalseNaNNaN
143deepseek/deepseek-r1:freeDeepSeek: R1 (free)1737381095DeepSeek R1 is here: Performance on par with [...163840None[max_tokens, reasoning, include_reasoning, tem...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
144deepseek/deepseek-r1DeepSeek: R11737381095DeepSeek R1 is here: Performance on par with [...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r10.00000050.000002180000163840.0163840.0FalseNaNNaN
145minimax/minimax-01MiniMax: MiniMax-011736915462MiniMax-01 is a combines MiniMax-Text-01 for t...1000192None[max_tokens, temperature, top_p]text+image->text[text, image][text]OtherNone0.00000020.000001100001000192.01000192.0FalseNaNNaN
146mistralai/codestral-2501Mistral: Codestral 25011736895522[Mistral](/mistralai)'s cutting-edge language ...262144None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000030.00000090000262144.0NaNFalseNaNNaN
147microsoft/phi-4Microsoft: Phi 41736489872[Microsoft Research](/microsoft) Phi-4 is desi...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000070.00000014000016384.016384.0FalseNaNNaN
148deepseek/deepseek-chat:freeDeepSeek: DeepSeek V3 (free)1735241320DeepSeek-V3 is the latest model from the DeepS...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
149deepseek/deepseek-chatDeepSeek: DeepSeek V31735241320DeepSeek-V3 is the latest model from the DeepS...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone0.000000380.000000890000163840.0163840.0FalseNaNNaN
150sao10k/l3.3-euryale-70bSao10K: Llama 3.3 Euryale 70B1734535928Euryale L3.3 70B is a model focused on creativ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000070.00000080000131072.016384.0FalseNaNNaN
151openai/o1OpenAI: o11734459999The latest and strongest model family from Ope...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[text, image][text]GPTNone0.0000150.0000600.02167500200000.0100000.0True0.0000075NaN
152eva-unit-01/eva-llama-3.33-70bEVA Llama 3.33 70B1734377303EVA Llama 3.33 70b is a roleplay and storywrit...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.0000040.000006000016384.04096.0FalseNaNNaN
153x-ai/grok-2-vision-1212xAI: Grok 2 Vision 12121734237338Grok 2 Vision 1212 advances image-based AI wit...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GrokNone0.0000020.0000100.00360032768.0NaNFalseNaNNaN
154x-ai/grok-2-1212xAI: Grok 2 12121734232814Grok 2 1212 introduces significant enhancement...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000020.000010000131072.0NaNFalseNaNNaN
155cohere/command-r7b-12-2024Cohere: Command R7B (12-2024)1734158152Command R7B (12-2024) is a small, fast update ...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]CohereNone0.00000003750.000000150000128000.04000.0FalseNaNNaN
156google/gemini-2.0-flash-exp:freeGoogle: Gemini 2.0 Flash Experimental (free)1733937523Gemini Flash 2.0 offers a significantly faster...1048576None[max_tokens, temperature, top_p, stop]text+image->text[text, image][text]GeminiNone0000001048576.08192.0FalseNaNNaN
157meta-llama/llama-3.3-70b-instruct:freeMeta: Llama 3.3 70B Instruct (free)1733506137The Meta Llama 3.3 multilingual large language...8000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30000008000.08000.0FalseNaNNaN
158meta-llama/llama-3.3-70b-instructMeta: Llama 3.3 70B Instruct1733506137The Meta Llama 3.3 multilingual large language...131000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000090.000000350000131000.0131000.0FalseNaNNaN
159amazon/nova-lite-v1Amazon: Nova Lite 1.01733437363Amazon Nova Lite 1.0 is a very low-cost multim...300000None[tools, max_tokens, temperature, top_p, top_k,...text+image->text[text, image][text]NovaNone0.000000060.0000002400.0000900300000.05120.0TrueNaNNaN
160amazon/nova-micro-v1Amazon: Nova Micro 1.01733437237Amazon Nova Micro 1.0 is a text-only model tha...128000None[tools, max_tokens, temperature, top_p, top_k,...text->text[text][text]NovaNone0.0000000350.000000140000128000.05120.0TrueNaNNaN
161amazon/nova-pro-v1Amazon: Nova Pro 1.01733436303Amazon Nova Pro 1.0 is a capable multimodal mo...300000None[tools, max_tokens, temperature, top_p, top_k,...text+image->text[text, image][text]NovaNone0.00000080.000003200.001200300000.05120.0TrueNaNNaN
162qwen/qwq-32b-preview:freeQwen: QwQ 32B Preview (free)1732754541QwQ-32B-Preview is an experimental research mo...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwendeepseek-r100000016384.0NaNFalseNaNNaN
163qwen/qwq-32b-previewQwen: QwQ 32B Preview1732754541QwQ-32B-Preview is an experimental research mo...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwendeepseek-r10.000000090.00000027000032768.0NaNFalseNaNNaN
164google/learnlm-1.5-pro-experimental:freeGoogle: LearnLM 1.5 Pro Experimental (free)1732216551An experimental version of [Gemini 1.5 Pro](/g...40960None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone00000040960.08192.0FalseNaNNaN
165eva-unit-01/eva-qwen-2.5-72bEVA Qwen2.5 72B1732210606EVA Qwen2.5 72B is a roleplay and storywriting...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000040.000006000016384.04096.0FalseNaNNaN
166openai/gpt-4o-2024-11-20OpenAI: GPT-4o (2024-11-20)1732127594The 2024-11-20 version of GPT-4o offers a leve...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
167mistralai/mistral-large-2411Mistral Large 24111731978685Mistral Large 2 2411 is an update of [Mistral ...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000131072.0NaNFalseNaNNaN
168mistralai/mistral-large-2407Mistral Large 24071731978415This is Mistral AI's flagship model, Mistral L...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000131072.0NaNFalseNaNNaN
169mistralai/pixtral-large-2411Mistral: Pixtral Large 24111731977388Pixtral Large is a 124B parameter, open-weight...131072None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone0.0000020.00000600.00288800131072.0NaNFalseNaNNaN
170x-ai/grok-vision-betaxAI: Grok Vision Beta1731976624Grok Vision Beta is xAI's experimental languag...8192None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GrokNone0.0000050.00001500.009008192.0NaNFalseNaNNaN
171infermatic/mn-inferor-12bInfermatic: Mistral Nemo Inferor 12B1731464428Inferor 12B is a merge of top roleplay models,...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.00000080.0000012000016384.04096.0FalseNaNNaN
172qwen/qwen-2.5-coder-32b-instruct:freeQwen2.5 Coder 32B Instruct (free)1731368400Qwen2.5-Coder is the latest series of Code-Spe...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.0NaNFalseNaNNaN
173qwen/qwen-2.5-coder-32b-instructQwen2.5 Coder 32B Instruct1731368400Qwen2.5-Coder is the latest series of Code-Spe...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000060.00000015000032768.016384.0FalseNaNNaN
174raifle/sorcererlm-8x22bSorcererLM 8x22B1731105083SorcererLM is an advanced RP and storytelling ...16000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralvicuna0.00000450.0000045000016000.0NaNFalseNaNNaN
175eva-unit-01/eva-qwen-2.5-32bEVA Qwen2.5 32B1731104847EVA Qwen2.5 32B is a roleplaying/storywriting ...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000260.0000034000016384.04096.0FalseNaNNaN
176thedrummer/unslopnemo-12bUnslopnemo 12B1731103448UnslopNemo v4.1 is the latest addition from th...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.000000450.00000045000032000.016000.0FalseNaNNaN
177anthropic/claude-3.5-haiku:betaAnthropic: Claude 3.5 Haiku (self-moderated)1730678400Claude 3.5 Haiku features offers enhanced capa...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0False0.000000080.000001
178anthropic/claude-3.5-haikuAnthropic: Claude 3.5 Haiku1730678400Claude 3.5 Haiku features offers enhanced capa...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0True0.000000080.000001
179anthropic/claude-3.5-haiku-20241022:betaAnthropic: Claude 3.5 Haiku (2024-10-22) (self...1730678400Claude 3.5 Haiku features enhancements across ...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0False0.000000080.000001
180anthropic/claude-3.5-haiku-20241022Anthropic: Claude 3.5 Haiku (2024-10-22)1730678400Claude 3.5 Haiku features enhancements across ...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0True0.000000080.000001
181neversleep/llama-3.1-lumimaid-70bNeverSleep: Lumimaid v0.2 70B1729555200Lumimaid v0.2 70B is a finetune of [Llama 3.1 ...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000150.00000225000016384.02048.0FalseNaNNaN
182anthracite-org/magnum-v4-72bMagnum v4 72B1729555200This is a series of models designed to replica...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000150.00000225000016384.01024.0FalseNaNNaN
183anthropic/claude-3.5-sonnet:betaAnthropic: Claude 3.5 Sonnet (self-moderated)1729555200New Claude 3.5 Sonnet delivers better-than-Opu...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0False0.00000030.00000375
184anthropic/claude-3.5-sonnetAnthropic: Claude 3.5 Sonnet1729555200New Claude 3.5 Sonnet delivers better-than-Opu...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0True0.00000030.00000375
185x-ai/grok-betaxAI: Grok Beta1729382400Grok Beta is xAI's experimental language model...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000050.0000150000131072.0NaNFalseNaNNaN
186mistralai/ministral-8bMistral: Ministral 8B1729123200Ministral 8B is an 8B parameter model featurin...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000010.00000010000128000.0NaNFalseNaNNaN
187mistralai/ministral-3bMistral: Ministral 3B1729123200Ministral 3B is a 3B parameter model optimized...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000040.000000040000131072.0NaNFalseNaNNaN
188qwen/qwen-2.5-7b-instruct:freeQwen2.5 7B Instruct (free)1729036800Qwen2.5 7B is the latest series of Qwen large ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.032768.0FalseNaNNaN
189qwen/qwen-2.5-7b-instructQwen2.5 7B Instruct1729036800Qwen2.5 7B is the latest series of Qwen large ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000050.0000001000032768.016384.0FalseNaNNaN
190nvidia/llama-3.1-nemotron-70b-instructNVIDIA: Llama 3.1 Nemotron 70B Instruct1728950400NVIDIA's Llama 3.1 Nemotron 70B is a language ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000120.00000030000131072.0131072.0FalseNaNNaN
191inflection/inflection-3-productivityInflection: Inflection 3 Productivity1728604800Inflection 3 Productivity is optimized for fol...8000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000250.0000100008000.01024.0FalseNaNNaN
192inflection/inflection-3-piInflection: Inflection 3 Pi1728604800Inflection 3 Pi powers Inflection's [Pi](https...8000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000250.0000100008000.01024.0FalseNaNNaN
193google/gemini-flash-1.5-8bGoogle: Gemini 1.5 Flash 8B1727913600Gemini Flash 1.5 8B is optimized for speed and...1000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.00000003750.0000001500001000000.08192.0False0.000000010.0000000583
194thedrummer/rocinante-12bRocinante 12B1727654400Rocinante 12B is designed for engaging storyte...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000250.0000005000032768.0NaNFalseNaNNaN
195anthracite-org/magnum-v2-72bMagnum v2 72B1727654400From the maker of [Goliath](https://openrouter...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000030.000003000032768.0NaNFalseNaNNaN
196liquid/lfm-40bLiquid: LFM 40B MoE1727654400Liquid's 40.3B Mixture of Experts (MoE) model....32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000150.00000015000032768.0NaNFalseNaNNaN
197meta-llama/llama-3.2-3b-instruct:freeMeta: Llama 3.2 3B Instruct (free)1727222400Llama 3.2 3B is a 3-billion-parameter multilin...20000None[max_tokens, temperature, top_p]text->text[text][text]Llama3llama300000020000.020000.0FalseNaNNaN
198meta-llama/llama-3.2-3b-instructMeta: Llama 3.2 3B Instruct1727222400Llama 3.2 3B is a 3-billion-parameter multilin...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000010.000000020000131072.016384.0FalseNaNNaN
199meta-llama/llama-3.2-1b-instruct:freeMeta: Llama 3.2 1B Instruct (free)1727222400Llama 3.2 1B is a 1-billion-parameter language...131000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama3000000131000.0NaNFalseNaNNaN
200meta-llama/llama-3.2-1b-instructMeta: Llama 3.2 1B Instruct1727222400Llama 3.2 1B is a 1-billion-parameter language...131072None[max_tokens, temperature, top_p, top_k, stop, ...text->text[text][text]Llama3llama30.0000000050.000000010000131072.0NaNFalseNaNNaN
201meta-llama/llama-3.2-90b-vision-instructMeta: Llama 3.2 90B Vision Instruct1727222400The Llama 90B Vision model is a top-tier, 90-b...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama30.00000120.000001200.00173400131072.02048.0FalseNaNNaN
202meta-llama/llama-3.2-11b-vision-instruct:freeMeta: Llama 3.2 11B Vision Instruct (free)1727222400Llama 3.2 11B Vision is a multimodal model wit...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama3000000131072.02048.0FalseNaNNaN
203meta-llama/llama-3.2-11b-vision-instructMeta: Llama 3.2 11B Vision Instruct1727222400Llama 3.2 11B Vision is a multimodal model wit...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama30.0000000490.00000004900.0000794800131072.016384.0FalseNaNNaN
204qwen/qwen-2.5-72b-instruct:freeQwen2.5 72B Instruct (free)1726704000Qwen2.5 72B is the latest series of Qwen large...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.0NaNFalseNaNNaN
205qwen/qwen-2.5-72b-instructQwen2.5 72B Instruct1726704000Qwen2.5 72B is the latest series of Qwen large...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Qwenchatml0.000000120.00000039000032768.016384.0FalseNaNNaN
206qwen/qwen-2.5-vl-72b-instructQwen: Qwen2.5-VL 72B Instruct1726617600Qwen2.5 VL 72B is a multimodal LLM from the Qw...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000060.000000600.0005780032768.0NaNFalseNaNNaN
207neversleep/llama-3.1-lumimaid-8bNeverSleep: Lumimaid v0.2 8B1726358400Lumimaid v0.2 8B is a finetune of [Llama 3.1 8...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000032768.02048.0FalseNaNNaN
208openai/o1-previewOpenAI: o1-preview1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.0000150.000060000128000.032768.0True0.0000075NaN
209openai/o1-preview-2024-09-12OpenAI: o1-preview (2024-09-12)1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.0000150.000060000128000.032768.0True0.0000075NaN
210openai/o1-miniOpenAI: o1-mini1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.00000110.00000440000128000.065536.0True0.00000055NaN
211openai/o1-mini-2024-09-12OpenAI: o1-mini (2024-09-12)1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.00000110.00000440000128000.065536.0True0.00000055NaN
212mistralai/pixtral-12bMistral: Pixtral 12B1725926400The first multi-modal, text+image-to-text mode...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]MistralNone0.00000010.000000100.00014450032768.0NaNFalseNaNNaN
213cohere/command-r-plus-08-2024Cohere: Command R+ (08-2024)1724976000command-r-plus-08-2024 is an update of the [Co...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000250.000010000128000.04000.0FalseNaNNaN
214cohere/command-r-08-2024Cohere: Command R (08-2024)1724976000command-r-08-2024 is an update of the [Command...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.000000150.00000060000128000.04000.0FalseNaNNaN
215qwen/qwen-2.5-vl-7b-instruct:freeQwen: Qwen2.5-VL 7B Instruct (free)1724803200Qwen2.5 VL 7B is a multimodal LLM from the Qwe...64000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone00000064000.064000.0FalseNaNNaN
216qwen/qwen-2.5-vl-7b-instructQwen: Qwen2.5-VL 7B Instruct1724803200Qwen2.5 VL 7B is a multimodal LLM from the Qwe...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000020.000000200.00014450032768.0NaNFalseNaNNaN
217sao10k/l3.1-euryale-70bSao10K: Llama 3.1 Euryale 70B v2.21724803200Euryale L3.1 70B v2.2 is a model focused on cr...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000070.00000080000131072.016384.0FalseNaNNaN
218google/gemini-flash-1.5-8b-expGoogle: Gemini 1.5 Flash 8B Experimental1724803200Gemini Flash 1.5 8B Experimental is an experim...1000000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GeminiNone0000001000000.08192.0FalseNaNNaN
219microsoft/phi-3.5-mini-128k-instructMicrosoft: Phi-3.5 Mini 128K Instruct1724198400Phi-3.5 models are lightweight, state-of-the-a...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.000000030.000000090000131072.0NaNFalseNaNNaN
220nousresearch/hermes-3-llama-3.1-70bNous: Hermes 3 70B Instruct1723939200Hermes 3 is a generalist language model with m...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.000000120.00000030000131072.0131072.0FalseNaNNaN
221nousresearch/hermes-3-llama-3.1-405bNous: Hermes 3 405B Instruct1723766400Hermes 3 is a generalist language model with m...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.00000080.00000080000131072.0131072.0FalseNaNNaN
222openai/chatgpt-4o-latestOpenAI: ChatGPT-4o1723593600OpenAI ChatGPT 4o is continually updated by Op...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GPTNone0.0000050.00001500.00722500128000.016384.0TrueNaNNaN
223sao10k/l3-lunaris-8bSao10K: Llama 3 8B Lunaris1723507200Lunaris 8B is a versatile generalist and rolep...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000020.0000000500008192.0NaNFalseNaNNaN
224aetherwiing/mn-starcannon-12bAetherwiing: Starcannon 12B1723507200Starcannon 12B v2 is a creative roleplay and s...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000080.0000012000016384.04096.0FalseNaNNaN
225openai/gpt-4o-2024-08-06OpenAI: GPT-4o (2024-08-06)1722902400The 2024-08-06 version of GPT-4o offers improv...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
226meta-llama/llama-3.1-405b:freeMeta: Llama 3.1 405B (base) (free)1722556800Meta's latest class of model (Llama 3.1) launc...64000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none00000064000.0NaNFalseNaNNaN
227meta-llama/llama-3.1-405bMeta: Llama 3.1 405B (base)1722556800Meta's latest class of model (Llama 3.1) launc...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.0000020.000002000032768.0NaNFalseNaNNaN
228nothingiisreal/mn-celeste-12bMistral Nemo 12B Celeste1722556800A specialized story writing and roleplaying mo...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000080.0000012000016384.04096.0FalseNaNNaN
229perplexity/llama-3.1-sonar-small-128k-onlinePerplexity: Llama 3.1 Sonar 8B Online1722470400Llama 3.1 Sonar is Perplexity's latest model f...127072None[max_tokens, temperature, top_p, top_k, freque...text->text[text][text]Llama3None0.00000020.00000020.005000127072.0NaNFalseNaNNaN
230perplexity/llama-3.1-sonar-large-128k-onlinePerplexity: Llama 3.1 Sonar 70B Online1722470400Llama 3.1 Sonar is Perplexity's latest model f...127072None[max_tokens, temperature, top_p, top_k, freque...text->text[text][text]Llama3None0.0000010.0000010.005000127072.0NaNFalseNaNNaN
231meta-llama/llama-3.1-8b-instruct:freeMeta: Llama 3.1 8B Instruct (free)1721692800Meta's latest class of model (Llama 3.1) launc...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama3000000131072.04096.0FalseNaNNaN
232meta-llama/llama-3.1-8b-instructMeta: Llama 3.1 8B Instruct1721692800Meta's latest class of model (Llama 3.1) launc...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000020.00000003000016384.016384.0FalseNaNNaN
233meta-llama/llama-3.1-405b-instructMeta: Llama 3.1 405B Instruct1721692800The highly anticipated 400B class of Llama3 is...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Llama3llama30.00000080.0000008000032768.016384.0FalseNaNNaN
234meta-llama/llama-3.1-70b-instructMeta: Llama 3.1 70B Instruct1721692800Meta's latest class of model (Llama 3.1) launc...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Llama3llama30.00000010.000000280000131072.016384.0FalseNaNNaN
235mistralai/codestral-mambaMistral: Codestral Mamba1721347200A 7.3B parameter Mamba-based model designed fo...262144None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000250.000000250000262144.0NaNFalseNaNNaN
236mistralai/mistral-nemo:freeMistral: Mistral Nemo (free)1721347200A 12B parameter model with a 128k token contex...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral000000128000.0128000.0FalseNaNNaN
237mistralai/mistral-nemoMistral: Mistral Nemo1721347200A 12B parameter model with a 128k token contex...98304None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.000000030.00000007000098304.049152.0FalseNaNNaN
238openai/gpt-4o-miniOpenAI: GPT-4o-mini1721260800GPT-4o mini is OpenAI's newest model after [GP...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image, file][text]GPTNone0.000000150.000000600.00021700128000.016384.0True0.000000075NaN
239openai/gpt-4o-mini-2024-07-18OpenAI: GPT-4o-mini (2024-07-18)1721260800GPT-4o mini is OpenAI's newest model after [GP...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.000000150.000000600.00722500128000.016384.0True0.000000075NaN
240google/gemma-2-27b-itGoogle: Gemma 2 27B1720828800Gemma 2 27B by Google is an open model built f...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0.00000010.000000300008192.0NaNFalseNaNNaN
241alpindale/magnum-72bMagnum 72B1720656000From the maker of [Goliath](https://openrouter...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000040.000006000016384.04096.0FalseNaNNaN
242google/gemma-2-9b-it:freeGoogle: Gemma 2 9B (free)1719532800Gemma 2 9B by Google is an advanced, open-sour...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0000008192.08192.0FalseNaNNaN
243google/gemma-2-9b-itGoogle: Gemma 2 9B1719532800Gemma 2 9B by Google is an advanced, open-sour...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0.000000020.0000000600008192.0NaNFalseNaNNaN
24401-ai/yi-large01.AI: Yi Large1719273600The Yi Large model was designed by 01.AI with ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]YiNone0.0000030.000003000032768.04096.0FalseNaNNaN
245ai21/jamba-instructAI21: Jamba Instruct1719273600The Jamba-Instruct model, introduced by AI21 L...256000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000050.00000070000256000.04096.0FalseNaNNaN
246anthropic/claude-3.5-sonnet-20240620:betaAnthropic: Claude 3.5 Sonnet (2024-06-20) (sel...1718841600Claude 3.5 Sonnet delivers better-than-Opus ca...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0False0.00000030.00000375
247anthropic/claude-3.5-sonnet-20240620Anthropic: Claude 3.5 Sonnet (2024-06-20)1718841600Claude 3.5 Sonnet delivers better-than-Opus ca...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0True0.00000030.00000375
248sao10k/l3-euryale-70bSao10k: Llama 3 Euryale 70B v2.11718668800Euryale 70B v2.1 is a model focused on creativ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000001480.0000014800008192.08192.0FalseNaNNaN
249cognitivecomputations/dolphin-mixtral-8x22bDolphin 2.9.2 Mixtral 8x22B 🐬1717804800Dolphin 2.9 is designed for instruction follow...16000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000090.0000009000016000.0NaNFalseNaNNaN
250qwen/qwen-2-72b-instructQwen 2 72B Instruct1717718400Qwen2 72B is a transformer-based model that ex...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000090.0000009000032768.04096.0FalseNaNNaN
251mistralai/mistral-7b-instruct:freeMistral: Mistral 7B Instruct (free)1716768000A high-performing, industry-standard 7.3B para...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral00000032768.016384.0FalseNaNNaN
252mistralai/mistral-7b-instructMistral: Mistral 7B Instruct1716768000A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.0000000280.000000054000032768.016384.0FalseNaNNaN
253nousresearch/hermes-2-pro-llama-3-8bNousResearch: Hermes 2 Pro - Llama-3 8B1716768000Hermes 2 Pro is an upgraded, retrained version...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.0000000250.000000040000131072.0131072.0FalseNaNNaN
254mistralai/mistral-7b-instruct-v0.3Mistral: Mistral 7B Instruct v0.31716768000A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.0000000280.000000054000032768.016384.0FalseNaNNaN
255microsoft/phi-3-mini-128k-instructMicrosoft: Phi-3 Mini 128K Instruct1716681600Phi-3 Mini is a powerful 3.8B parameter model ...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.00000010.00000010000128000.0NaNFalseNaNNaN
256microsoft/phi-3-medium-128k-instructMicrosoft: Phi-3 Medium 128K Instruct1716508800Phi-3 128K Medium is a powerful 14-billion par...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.00000010.00000030000131072.0NaNFalseNaNNaN
257neversleep/llama-3-lumimaid-70bNeverSleep: Llama 3 Lumimaid 70B1715817600The NeverSleep team is back, with a Llama 3 70...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.0000040.00000600008192.04096.0FalseNaNNaN
258deepseek/deepseek-coderDeepSeek-Coder-V21715644800DeepSeek-Coder-V2, an open-source Mixture-of-E...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000040.000000120000128000.0NaNFalseNaNNaN
259google/gemini-flash-1.5Google: Gemini 1.5 Flash1715644800Gemini 1.5 Flash is a foundation model that pe...1000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.0000000750.000000300.00004001000000.08192.0False0.000000018750.0000001583
260openai/gpt-4oOpenAI: GPT-4o1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
261openai/gpt-4o:extendedOpenAI: GPT-4o (extended)1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.0000060.00001800.00722500128000.064000.0TrueNaNNaN
262meta-llama/llama-guard-2-8bMeta: LlamaGuard 2 8B1715558400This safeguard model has 8B parameters and is ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.00000020.000000200008192.0NaNFalseNaNNaN
263openai/gpt-4o-2024-05-13OpenAI: GPT-4o (2024-05-13)1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.0000050.00001500.00722500128000.04096.0TrueNaNNaN
264allenai/olmo-7b-instructOLMo 7B Instruct1715299200OLMo 7B Instruct by the Allen Institute for AI...2048None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherzephyr0.000000080.0000002400002048.0NaNFalseNaNNaN
265neversleep/llama-3-lumimaid-8b:extendedNeverSleep: Llama 3 Lumimaid 8B (extended)1714780800The NeverSleep team is back, with a Llama 3 8B...24576None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000024576.02048.0FalseNaNNaN
266neversleep/llama-3-lumimaid-8bNeverSleep: Llama 3 Lumimaid 8B1714780800The NeverSleep team is back, with a Llama 3 8B...24576None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000024576.02048.0FalseNaNNaN
267sao10k/fimbulvetr-11b-v2Fimbulvetr 11B v21713657600Creative writing model, routed with permission...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000080.000001200004096.04096.0FalseNaNNaN
268meta-llama/llama-3-8b-instructMeta: Llama 3 8B Instruct1713398400Meta's latest class of model (Llama 3) launche...8192None[max_tokens, temperature, top_p, top_k, seed, ...text->text[text][text]Llama3llama30.000000030.0000000600008192.016384.0FalseNaNNaN
269meta-llama/llama-3-70b-instructMeta: Llama 3 70B Instruct1713398400Meta's latest class of model (Llama 3) launche...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000030.000000400008192.016384.0FalseNaNNaN
270mistralai/mixtral-8x22b-instructMistral: Mixtral 8x22B Instruct1713312000Mistral's official instruct fine-tuned version...65536None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.00000040.0000012000065536.0NaNFalseNaNNaN
271microsoft/wizardlm-2-8x22bWizardLM-2 8x22B1713225600WizardLM-2 8x22B is Microsoft AI's most advanc...65536None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]Mistralvicuna0.00000050.0000005000065536.016384.0FalseNaNNaN
272google/gemini-pro-1.5Google: Gemini 1.5 Pro1712620800Google's latest multimodal model, supports ima...2000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.000001250.00000500.0006575002000000.08192.0FalseNaNNaN
273openai/gpt-4-turboOpenAI: GPT-4 Turbo1712620800The latest GPT-4 Turbo model with vision capab...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GPTNone0.000010.0000300.0144500128000.04096.0TrueNaNNaN
274cohere/command-r-plusCohere: Command R+1712188800Command R+ is a new, 104B-parameter LLM from C...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.0000030.0000150000128000.04000.0FalseNaNNaN
275cohere/command-r-plus-04-2024Cohere: Command R+ (04-2024)1712016000Command R+ is a new, 104B-parameter LLM from C...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.0000030.0000150000128000.04000.0FalseNaNNaN
276sophosympatheia/midnight-rose-70bMidnight Rose 70B1711065600A merge with a complex family tree, this model...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000080.000000800004096.0NaNFalseNaNNaN
277cohere/commandCohere: Command1710374400Command is an instruction-following conversati...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]CohereNone0.0000010.00000200004096.04000.0FalseNaNNaN
278cohere/command-rCohere: Command R1710374400Command-R is a 35B parameter model that perfor...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000050.00000150000128000.04000.0FalseNaNNaN
279anthropic/claude-3-haiku:betaAnthropic: Claude 3 Haiku (self-moderated)1710288000Claude 3 Haiku is Anthropic's fastest and most...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.000000250.0000012500.000400200000.04096.0False0.000000030.0000003
280anthropic/claude-3-haikuAnthropic: Claude 3 Haiku1710288000Claude 3 Haiku is Anthropic's fastest and most...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.000000250.0000012500.000400200000.04096.0True0.000000030.0000003
281anthropic/claude-3-opus:betaAnthropic: Claude 3 Opus (self-moderated)1709596800Claude 3 Opus is Anthropic's most powerful mod...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000150.00007500.02400200000.04096.0False0.00000150.00001875
282anthropic/claude-3-opusAnthropic: Claude 3 Opus1709596800Claude 3 Opus is Anthropic's most powerful mod...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000150.00007500.02400200000.04096.0True0.00000150.00001875
283anthropic/claude-3-sonnet:betaAnthropic: Claude 3 Sonnet (self-moderated)1709596800Claude 3 Sonnet is an ideal balance of intelli...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.04096.0False0.00000030.00000375
284anthropic/claude-3-sonnetAnthropic: Claude 3 Sonnet1709596800Claude 3 Sonnet is an ideal balance of intelli...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.04096.0True0.00000030.00000375
285cohere/command-r-03-2024Cohere: Command R (03-2024)1709341200Command-R is a 35B parameter model that perfor...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000050.00000150000128000.04000.0FalseNaNNaN
286mistralai/mistral-largeMistral Large1708905600This is Mistral AI's flagship model, Mistral L...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000128000.0NaNFalseNaNNaN
287openai/gpt-3.5-turbo-0613OpenAI: GPT-3.5 Turbo (older v0613)1706140800GPT-3.5 Turbo is OpenAI's fastest model. It ca...4095None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000010.00000200004095.04096.0FalseNaNNaN
288openai/gpt-4-turbo-previewOpenAI: GPT-4 Turbo Preview1706140800The preview GPT-4 model with improved instruct...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000010.000030000128000.04096.0TrueNaNNaN
289nousresearch/nous-hermes-2-mixtral-8x7b-dpoNous: Hermes 2 Mixtral 8x7B DPO1705363200Nous Hermes 2 Mixtral 8x7B DPO is the new flag...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000060.0000006000032768.02048.0FalseNaNNaN
290mistralai/mistral-mediumMistral Medium1704844800This is Mistral AI's closed-source, medium-sid...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000002750.0000081000032768.0NaNFalseNaNNaN
291mistralai/mistral-smallMistral Small1704844800With 22 billion parameters, Mistral Small v24....32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000020.0000006000032768.0NaNFalseNaNNaN
292mistralai/mistral-tinyMistral Tiny1704844800Note: This model is being deprecated. Recommen...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000250.00000025000032768.0NaNFalseNaNNaN
293mistralai/mistral-7b-instruct-v0.2Mistral: Mistral 7B Instruct v0.21703721600A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.00000020.0000002000032768.0NaNFalseNaNNaN
294mistralai/mixtral-8x7b-instructMistral: Mixtral 8x7B Instruct1702166400Mixtral 8x7B Instruct is a pretrained generati...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.000000080.00000024000032768.0NaNFalseNaNNaN
295neversleep/noromaid-20bNoromaid 20B1700956800A collab between IkariDev and Undi. This merge...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.000000750.000001500008192.02048.0FalseNaNNaN
296anthropic/claude-2.1:betaAnthropic: Claude v2.1 (self-moderated)1700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0FalseNaNNaN
297anthropic/claude-2.1Anthropic: Claude v2.11700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0TrueNaNNaN
298anthropic/claude-2:betaAnthropic: Claude v2 (self-moderated)1700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0FalseNaNNaN
299anthropic/claude-2Anthropic: Claude v21700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0TrueNaNNaN
300undi95/toppy-m-7bToppy M 7B1699574400A wild 7B parameter model that merges several ...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralalpaca0.00000080.000001200004096.04096.0FalseNaNNaN
301alpindale/goliath-120bGoliath 120B1699574400A large LLM created by combining two fine-tune...6144None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000656250.00000937500006144.0512.0FalseNaNNaN
302openrouter/autoAuto Router1699401600Your prompt will be processed by a meta-model ...2000000None[]text->text[text][text]RouterNone-1-1NaNNaNNaNNaNNaNNaNFalseNaNNaN
303openai/gpt-3.5-turbo-1106OpenAI: GPT-3.5 Turbo 16k (older v1106)1699228800An older GPT-3.5 Turbo model with improved ins...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000010.000002000016385.04096.0TrueNaNNaN
304openai/gpt-4-1106-previewOpenAI: GPT-4 Turbo (older v1106)1699228800The latest GPT-4 Turbo model with vision capab...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000010.000030000128000.04096.0TrueNaNNaN
305jondurbin/airoboros-l2-70bAiroboros 70B1698537600A Llama 2 70B fine-tune using synthetic data (...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000050.000000500004096.0NaNFalseNaNNaN
306openai/gpt-3.5-turbo-instructOpenAI: GPT-3.5 Turbo Instruct1695859200This model is a variant of GPT-3.5 Turbo tuned...4095None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]GPTchatml0.00000150.00000200004095.04096.0TrueNaNNaN
307mistralai/mistral-7b-instruct-v0.1Mistral: Mistral 7B Instruct v0.11695859200A 7.3B parameter model that outperforms Llama ...2824None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.000000110.0000001900002824.0NaNFalseNaNNaN
308pygmalionai/mythalion-13bPygmalion: Mythalion 13B1693612800A blend of the new Pygmalion-13b and MythoMax....8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000056250.00000112500008192.01024.0FalseNaNNaN
309openai/gpt-3.5-turbo-16kOpenAI: GPT-3.5 Turbo 16k1693180800This model offers four times the context lengt...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000030.000004000016385.04096.0TrueNaNNaN
310openai/gpt-4-32kOpenAI: GPT-4 32k1693180800GPT-4-32k is an extended version of GPT-4, wit...32767None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000060.00012000032767.04096.0TrueNaNNaN
311openai/gpt-4-32k-0314OpenAI: GPT-4 32k (older v0314)1693180800GPT-4-32k is an extended version of GPT-4, wit...32767None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000060.00012000032767.04096.0TrueNaNNaN
312mancer/weaverMancer: Weaver (alpha)1690934400An attempt to recreate Claude-style verbosity,...8000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.0000011250.00000112500008000.01000.0FalseNaNNaN
313anthropic/claude-2.0:betaAnthropic: Claude v2.0 (self-moderated)1690502400Anthropic's flagship model. Superior performan...100000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000100000.04096.0FalseNaNNaN
314anthropic/claude-2.0Anthropic: Claude v2.01690502400Anthropic's flagship model. Superior performan...100000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000100000.04096.0TrueNaNNaN
315undi95/remm-slerp-l2-13bReMM SLERP 13B1689984000A recreation trial of the original MythoMax-L2...6144None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000056250.00000112500006144.01024.0FalseNaNNaN
316gryphe/mythomax-l2-13bMythoMax 13B1688256000One of the highest performing and most popular...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.0000000650.00000006500004096.04096.0FalseNaNNaN
317meta-llama/llama-2-70b-chatMeta: Llama 2 70B Chat1687219200The flagship, 70 billion parameter language mo...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2llama20.00000090.000000900004096.0NaNFalseNaNNaN
318openai/gpt-3.5-turboOpenAI: GPT-3.5 Turbo1685232000GPT-3.5 Turbo is OpenAI's fastest model. It ca...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.00000050.0000015000016385.04096.0TrueNaNNaN
319openai/gpt-3.5-turbo-0125OpenAI: GPT-3.5 Turbo 16k1685232000The latest GPT-3.5 Turbo model with improved i...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.00000050.0000015000016385.04096.0TrueNaNNaN
320openai/gpt-4OpenAI: GPT-41685232000OpenAI's flagship model, GPT-4 is a large-scal...8191None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000030.0000600008191.04096.0TrueNaNNaN
321openai/gpt-4-0314OpenAI: GPT-4 (older v0314)1685232000GPT-4-0314 is the first version of GPT-4 relea...8191None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000030.0000600008191.04096.0TrueNaNNaN
\n", + "
" + ], + "text/plain": [ + " id name created description context_length per_request_limits supported_parameters architecture_modality architecture_input_modalities architecture_output_modalities architecture_tokenizer architecture_instruct_type pricing_prompt pricing_completion pricing_request pricing_image pricing_web_search pricing_internal_reasoning top_provider_context_length top_provider_max_completion_tokens top_provider_is_moderated pricing_input_cache_read pricing_input_cache_write\n", + "0 mistralai/mistral-medium-3 Mistral: Mistral Medium 3 1746627341 Mistral Medium 3 is a high-performance enterpr... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.0000004 0.000002 0 0 0 0 131072.0 NaN False NaN NaN\n", + "1 google/gemini-2.5-pro-preview Google: Gemini 2.5 Pro Preview 1746578513 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0.00000125 0.00001 0 0.00516 0 0 1048576.0 65535.0 False 0.00000031 0.000001625\n", + "2 arcee-ai/caller-large Arcee AI: Caller Large 1746487869 Caller Large is Arcee's specialist \"function‑c... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.00000055 0.00000085 0 0 0 0 32768.0 NaN False NaN NaN\n", + "3 arcee-ai/spotlight Arcee AI: Spotlight 1746481552 Spotlight is a 7‑billion‑parameter vision‑lang... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000018 0.00000018 0 0 0 0 131072.0 65537.0 False NaN NaN\n", + "4 arcee-ai/maestro-reasoning Arcee AI: Maestro Reasoning 1746481269 Maestro Reasoning is Arcee's flagship analysis... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000009 0.0000033 0 0 0 0 131072.0 32000.0 False NaN NaN\n", + "5 arcee-ai/virtuoso-large Arcee AI: Virtuoso Large 1746478885 Virtuoso‑Large is Arcee's top‑tier general‑pur... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000075 0.0000012 0 0 0 0 131072.0 64000.0 False NaN NaN\n", + "6 arcee-ai/coder-large Arcee AI: Coder Large 1746478663 Coder‑Large is a 32 B‑parameter offspring of Q... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 NaN False NaN NaN\n", + "7 arcee-ai/virtuoso-medium-v2 Arcee AI: Virtuoso Medium V2 1746478434 Virtuoso‑Medium‑v2 is a 32 B model distilled f... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "8 arcee-ai/arcee-blitz Arcee AI: Arcee Blitz 1746470100 Arcee Blitz is a 24 B‑parameter dense model di... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000045 0.00000075 0 0 0 0 32768.0 NaN False NaN NaN\n", + "9 microsoft/phi-4-reasoning-plus:free Microsoft: Phi 4 Reasoning Plus (free) 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "10 microsoft/phi-4-reasoning-plus Microsoft: Phi 4 Reasoning Plus 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.00000007 0.00000035 0 0 0 0 32768.0 NaN False NaN NaN\n", + "11 microsoft/phi-4-reasoning:free Microsoft: Phi 4 Reasoning (free) 1746121275 Phi-4-reasoning is a 14B parameter dense decod... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "12 qwen/qwen3-0.6b-04-28:free Qwen: Qwen3 0.6B (free) 1746043526 Qwen3-0.6B is a lightweight, 0.6 billion param... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "13 inception/mercury-coder-small-beta Inception: Mercury Coder Small Beta 1746033880 Mercury Coder Small is the first diffusion lar... 32000 None [max_tokens, frequency_penalty, presence_penal... text->text [text] [text] Other None 0.00000025 0.000001 0 0 0 0 32000.0 NaN False NaN NaN\n", + "14 qwen/qwen3-1.7b:free Qwen: Qwen3 1.7B (free) 1746031388 Qwen3-1.7B is a compact, 1.7 billion parameter... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "15 qwen/qwen3-4b:free Qwen: Qwen3 4B (free) 1746031104 Qwen3-4B is a 4 billion parameter dense langua... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 128000.0 NaN False NaN NaN\n", + "16 opengvlab/internvl3-14b:free OpenGVLab: InternVL3 14B (free) 1746021355 The 14b version of the InternVL3 series. An ad... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "17 opengvlab/internvl3-2b:free OpenGVLab: InternVL3 2B (free) 1746019807 The 2b version of the InternVL3 series, for an... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "18 deepseek/deepseek-prover-v2:free DeepSeek: DeepSeek Prover V2 (free) 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "19 deepseek/deepseek-prover-v2 DeepSeek: DeepSeek Prover V2 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.0000005 0.00000218 0 0 0 0 131072.0 NaN False NaN NaN\n", + "20 meta-llama/llama-guard-4-12b Meta: Llama Guard 4 12B 1745975193 Llama Guard 4 is a Llama 4 Scout-derived multi... 163840 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000005 0.00000005 0 0 0 0 163840.0 NaN False NaN NaN\n", + "21 qwen/qwen3-30b-a3b:free Qwen: Qwen3 30B A3B (free) 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "22 qwen/qwen3-30b-a3b Qwen: Qwen3 30B A3B 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "23 qwen/qwen3-8b:free Qwen: Qwen3 8B (free) 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "24 qwen/qwen3-8b Qwen: Qwen3 8B 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.000000035 0.000000138 0 0 0 0 128000.0 NaN False NaN NaN\n", + "25 qwen/qwen3-14b:free Qwen: Qwen3 14B (free) 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "26 qwen/qwen3-14b Qwen: Qwen3 14B 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000007 0.00000024 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "27 qwen/qwen3-32b:free Qwen: Qwen3 32B (free) 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "28 qwen/qwen3-32b Qwen: Qwen3 32B 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 NaN False NaN NaN\n", + "29 qwen/qwen3-235b-a22b:free Qwen: Qwen3 235B A22B (free) 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "30 qwen/qwen3-235b-a22b Qwen: Qwen3 235B A22B 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000014 0.000002 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "31 tngtech/deepseek-r1t-chimera:free TNG: DeepSeek R1T Chimera (free) 1745760875 DeepSeek-R1T-Chimera is created by merging Dee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "32 thudm/glm-z1-rumination-32b THUDM: GLM Z1 Rumination 32B 1745601495 THUDM: GLM Z1 Rumination 32B is a 32B-paramete... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", + "33 thudm/glm-z1-9b:free THUDM: GLM Z1 9B (free) 1745601140 GLM-Z1-9B-0414 is a 9B-parameter language mode... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "34 thudm/glm-4-9b:free THUDM: GLM 4 9B (free) 1745601023 GLM-4-9B-0414 is a 9 billion parameter languag... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "35 microsoft/mai-ds-r1:free Microsoft: MAI DS R1 (free) 1745194100 MAI-DS-R1 is a post-trained variant of DeepSee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "36 thudm/glm-z1-32b:free THUDM: GLM Z1 32B (free) 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "37 thudm/glm-z1-32b THUDM: GLM Z1 32B 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", + "38 thudm/glm-4-32b:free THUDM: GLM 4 32B (free) 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "39 thudm/glm-4-32b THUDM: GLM 4 32B 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", + "40 google/gemini-2.5-flash-preview Google: Gemini 2.5 Flash Preview 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000006 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", + "41 google/gemini-2.5-flash-preview:thinking Google: Gemini 2.5 Flash Preview (thinking) 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000035 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", + "42 openai/o4-mini-high OpenAI: o4 Mini High 1744824212 OpenAI o4-mini-high is the same model as [o4-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", + "43 openai/o3 OpenAI: o3 1744823457 o3 is a well-rounded and powerful model across... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.00001 0.00004 0 0.00765 0 0 200000.0 100000.0 True 0.0000025 NaN\n", + "44 openai/o4-mini OpenAI: o4 Mini 1744820942 OpenAI o4-mini is a compact reasoning model in... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", + "45 shisa-ai/shisa-v2-llama3.3-70b:free Shisa AI: Shisa V2 Llama 3.3 70B (free) 1744754858 Shisa V2 Llama 3.3 70B is a bilingual Japanese... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "46 qwen/qwen2.5-coder-7b-instruct Qwen: Qwen2.5 Coder 7B Instruct 1744734887 Qwen2.5-Coder-7B-Instruct is a 7B parameter in... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen None 0.00000001 0.00000003 0 0 0 0 32768.0 NaN False NaN NaN\n", + "47 openai/gpt-4.1 OpenAI: GPT-4.1 1744651385 GPT-4.1 is a flagship large language model opt... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.000002 0.000008 0 0 0 0 1047576.0 32768.0 True 0.0000005 NaN\n", + "48 openai/gpt-4.1-mini OpenAI: GPT-4.1 Mini 1744651381 GPT-4.1 Mini is a mid-sized model delivering p... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000004 0.0000016 0 0 0 0 1047576.0 32768.0 True 0.0000001 NaN\n", + "49 openai/gpt-4.1-nano OpenAI: GPT-4.1 Nano 1744651369 For tasks that demand low latency, GPT‑4.1 nan... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000001 0.0000004 0 0 0 0 1047576.0 32768.0 True 0.000000025 NaN\n", + "50 eleutherai/llemma_7b EleutherAI: Llemma 7b 1744643225 Llemma 7B is a language model for mathematics.... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other code-llama 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "51 alfredpros/codellama-7b-instruct-solidity AlfredPros: CodeLLaMa 7B Instruct Solidity 1744641874 A finetuned 7 billion parameters Code LLaMA - ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "52 arliai/qwq-32b-arliai-rpr-v1:free ArliAI: QwQ 32B RpR v1 (free) 1744555982 QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "53 agentica-org/deepcoder-14b-preview:free Agentica: Deepcoder 14B Preview (free) 1744555395 DeepCoder-14B-Preview is a 14B parameter code ... 96000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 96000.0 NaN False NaN NaN\n", + "54 moonshotai/kimi-vl-a3b-thinking:free Moonshot AI: Kimi VL A3B Thinking (free) 1744304841 Kimi-VL is a lightweight Mixture-of-Experts vi... 131072 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [image, text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "55 x-ai/grok-3-mini-beta xAI: Grok 3 Mini Beta 1744240195 Grok 3 Mini is a lightweight, smaller thinking... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.0000003 0.0000005 0 0 0 0 131072.0 NaN False NaN NaN\n", + "56 x-ai/grok-3-beta xAI: Grok 3 Beta 1744240068 Grok 3 is the latest model from xAI. It's thei... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000003 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", + "57 nvidia/llama-3.3-nemotron-super-49b-v1:free NVIDIA: Llama 3.3 Nemotron Super 49B v1 (free) 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "58 nvidia/llama-3.3-nemotron-super-49b-v1 NVIDIA: Llama 3.3 Nemotron Super 49B v1 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000013 0.0000004 0 0 0 0 131072.0 NaN False NaN NaN\n", + "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free) 1744115059 Llama-3.1-Nemotron-Ultra-253B-v1 is a large la... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "60 meta-llama/llama-4-maverick:free Meta: Llama 4 Maverick (free) 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 256000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 256000.0 NaN False NaN NaN\n", + "61 meta-llama/llama-4-maverick Meta: Llama 4 Maverick 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 1048576 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000017 0.0000006 0 0.0006684 0 0 1048576.0 16384.0 False NaN NaN\n", + "62 meta-llama/llama-4-scout:free Meta: Llama 4 Scout (free) 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 512000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 512000.0 NaN False NaN NaN\n", + "63 meta-llama/llama-4-scout Meta: Llama 4 Scout 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 1048576 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Other None 0.00000008 0.0000003 0 0 0 0 1048576.0 1048576.0 False NaN NaN\n", + "64 all-hands/openhands-lm-32b-v0.1 OpenHands LM 32B V0.1 1743613013 OpenHands LM v0.1 is a 32B open-source coding ... 16384 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "65 mistral/ministral-8b Mistral: Ministral 8B 1743430021 Ministral 8B is a state-of-the-art language mo... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "66 deepseek/deepseek-v3-base:free DeepSeek: DeepSeek V3 Base (free) 1743272023 Note that this is a base model mostly meant fo... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "67 scb10x/llama3.1-typhoon2-8b-instruct Typhoon2 8B Instruct 1743196511 Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000018 0.00000018 0 0 0 0 8192.0 NaN False NaN NaN\n", + "68 scb10x/llama3.1-typhoon2-70b-instruct Typhoon2 70B Instruct 1743196170 Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000088 0.00000088 0 0 0 0 8192.0 NaN False NaN NaN\n", + "69 allenai/molmo-7b-d:free AllenAI: Molmo 7B D (free) 1743023247 Molmo is a family of open vision-language mode... 4096 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 4096.0 NaN False NaN NaN\n", + "70 bytedance-research/ui-tars-72b:free Bytedance: UI-TARS 72B (free) 1743020065 UI-TARS 72B is an open-source multimodal AI mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "71 qwen/qwen2.5-vl-3b-instruct:free Qwen: Qwen2.5 VL 3B Instruct (free) 1743014573 Qwen2.5 VL 3B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", + "72 google/gemini-2.5-pro-exp-03-25 Google: Gemini 2.5 Pro Experimental 1742922099 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1000000 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0 0 0 0 0 0 1000000.0 65535.0 False NaN NaN\n", + "73 qwen/qwen2.5-vl-32b-instruct:free Qwen: Qwen2.5 VL 32B Instruct (free) 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 8192 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", + "74 qwen/qwen2.5-vl-32b-instruct Qwen: Qwen2.5 VL 32B Instruct 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000009 0.0000009 0 0 0 0 128000.0 NaN False NaN NaN\n", + "75 deepseek/deepseek-chat-v3-0324:free DeepSeek: DeepSeek V3 0324 (free) 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "76 deepseek/deepseek-chat-v3-0324 DeepSeek: DeepSeek V3 0324 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] DeepSeek None 0.0000003 0.00000088 0 0 0 0 163840.0 NaN False NaN NaN\n", + "77 featherless/qwerky-72b:free Qwerky 72B (free) 1742481597 Qwerky-72B is a linear-attention RWKV variant ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 4096.0 False NaN NaN\n", + "78 openai/o1-pro OpenAI: o1-pro 1742423211 The o1 series of models are trained with reinf... 200000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] GPT None 0.00015 0.0006 0 0.21675 0 0 200000.0 100000.0 True NaN NaN\n", + "79 mistralai/mistral-small-3.1-24b-instruct:free Mistral: Mistral Small 3.1 24B (free) 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 96000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0 0 0 0 0 0 96000.0 96000.0 False NaN NaN\n", + "80 mistralai/mistral-small-3.1-24b-instruct Mistral: Mistral Small 3.1 24B 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 131072 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Mistral None 0.00000005 0.00000015 0 0 0 0 131072.0 NaN False NaN NaN\n", + "81 open-r1/olympiccoder-32b:free OlympicCoder 32B (free) 1742077228 OlympicCoder-32B is a high-performing open-sou... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "82 google/gemma-3-1b-it:free Google: Gemma 3 1B (free) 1741963556 Gemma 3 1B is the smallest of the new Gemma 3 ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 32768.0 8192.0 False NaN NaN\n", + "83 google/gemma-3-4b-it:free Google: Gemma 3 4B (free) 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", + "84 google/gemma-3-4b-it Google: Gemma 3 4B 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000002 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", + "85 ai21/jamba-1.6-large AI21: Jamba 1.6 Large 1741905173 AI21 Jamba Large 1.6 is a high-performance hyb... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.000002 0.000008 0 0 0 0 256000.0 4096.0 False NaN NaN\n", + "86 ai21/jamba-1.6-mini AI21: Jamba Mini 1.6 1741905171 AI21 Jamba Mini 1.6 is a hybrid foundation mod... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000002 0.0000004 0 0 0 0 256000.0 4096.0 False NaN NaN\n", + "87 google/gemma-3-12b-it:free Google: Gemma 3 12B (free) 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", + "88 google/gemma-3-12b-it Google: Gemma 3 12B 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000005 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "89 cohere/command-a Cohere: Command A 1741894342 Command A is an open-weights 111B parameter mo... 256000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 256000.0 8192.0 False NaN NaN\n", + "90 openai/gpt-4o-mini-search-preview OpenAI: GPT-4o-mini Search Preview 1741818122 GPT-4o mini Search Preview is a specialized mo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.00000015 0.0000006 0.0275 0.000217 0 0 128000.0 16384.0 True NaN NaN\n", + "91 openai/gpt-4o-search-preview OpenAI: GPT-4o Search Preview 1741817949 GPT-4o Search Previewis a specialized model fo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.0000025 0.00001 0.035 0.003613 0 0 128000.0 16384.0 True NaN NaN\n", + "92 rekaai/reka-flash-3:free Reka: Flash 3 (free) 1741812813 Reka Flash 3 is a general-purpose, instruction... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "93 google/gemma-3-27b-it:free Google: Gemma 3 27B (free) 1741756359 Gemma 3 introduces multimodality, supporting v... 96000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 96000.0 8192.0 False NaN NaN\n", + "94 google/gemma-3-27b-it Google: Gemma 3 27B 1741756359 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.0000001 0.0000002 0 0.0000256 0 0 131072.0 16384.0 False NaN NaN\n", + "95 thedrummer/anubis-pro-105b-v1 TheDrummer: Anubis Pro 105B V1 1741642290 Anubis Pro 105B v1 is an expanded and refined ... 131072 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000008 0.000001 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "96 thedrummer/skyfall-36b-v2 TheDrummer: Skyfall 36B V2 1741636566 Skyfall 36B v2 is an enhanced iteration of Mis... 32768 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 32768.0 False NaN NaN\n", + "97 microsoft/phi-4-multimodal-instruct Microsoft: Phi 4 Multimodal Instruct 1741396284 Phi-4 Multimodal Instruct is a versatile 5.6B ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000005 0.0000001 0 0.00017685 0 0 131072.0 NaN False NaN NaN\n", + "98 perplexity/sonar-reasoning-pro Perplexity: Sonar Reasoning Pro 1741313308 Note: Sonar Pro pricing includes Perplexity se... 128000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0 128000.0 NaN False NaN NaN\n", + "99 perplexity/sonar-pro Perplexity: Sonar Pro 1741312423 Note: Sonar Pro pricing includes Perplexity se... 200000 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000003 0.000015 0 0 0.005 0 200000.0 8000.0 False NaN NaN\n", + "100 perplexity/sonar-deep-research Perplexity: Sonar Deep Research 1741311246 Sonar Deep Research is a research-focused mode... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0.000003 128000.0 NaN False NaN NaN\n", + "101 deepseek/deepseek-r1-zero:free DeepSeek: DeepSeek R1 Zero (free) 1741297434 DeepSeek-R1-Zero is a model trained via large-... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "102 qwen/qwq-32b:free Qwen: QwQ 32B (free) 1741208814 QwQ is the reasoning model of the Qwen series.... 40000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0 0 0 0 0 0 40000.0 40000.0 False NaN NaN\n", + "103 qwen/qwq-32b Qwen: QwQ 32B 1741208814 QwQ is the reasoning model of the Qwen series.... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0.00000015 0.0000002 0 0 0 0 131072.0 NaN False NaN NaN\n", + "104 moonshotai/moonlight-16b-a3b-instruct:free Moonshot AI: Moonlight 16B A3B Instruct (free) 1740719801 Moonlight-16B-A3B-Instruct is a 16B-parameter ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", + "105 nousresearch/deephermes-3-llama-3-8b-preview:free Nous: DeepHermes 3 Llama 3 8B Preview (free) 1740719372 DeepHermes 3 Preview is the latest version of ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "106 openai/gpt-4.5-preview OpenAI: GPT-4.5 (Preview) 1740687810 GPT-4.5 (Preview) is a research preview of Ope... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.000075 0.00015 0 0.108375 0 0 128000.0 16384.0 True 0.0000375 NaN\n", + "107 google/gemini-2.0-flash-lite-001 Google: Gemini 2.0 Flash Lite 1740506212 Gemini 2.0 Flash Lite offers a significantly f... 1048576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.000000075 0.0000003 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", + "108 anthropic/claude-3.7-sonnet Anthropic: Claude 3.7 Sonnet 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", + "109 anthropic/claude-3.7-sonnet:thinking Anthropic: Claude 3.7 Sonnet (thinking) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", + "110 anthropic/claude-3.7-sonnet:beta Anthropic: Claude 3.7 Sonnet (self-moderated) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [max_tokens, temperature, stop, reasoning, inc... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 128000.0 False 0.0000003 0.00000375\n", + "111 perplexity/r1-1776 Perplexity: R1 1776 1740004929 R1 1776 is a version of DeepSeek-R1 that has b... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.000002 0.000008 0 0 0 0 128000.0 NaN False NaN NaN\n", + "112 mistralai/mistral-saba Mistral: Saba 1739803239 Mistral Saba is a 24B-parameter language model... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", + "113 cognitivecomputations/dolphin3.0-r1-mistral-24... Dolphin3.0 R1 Mistral 24B (free) 1739462498 Dolphin 3.0 R1 is the next generation of the D... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "114 cognitivecomputations/dolphin3.0-mistral-24b:free Dolphin3.0 Mistral 24B (free) 1739462019 Dolphin 3.0 is the next generation of the Dolp... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "115 meta-llama/llama-guard-3-8b Llama Guard 3 8B 1739401318 Llama Guard 3 is a Llama-3.1-8B pretrained mod... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.00000002 0.00000006 0 0 0 0 131072.0 NaN False NaN NaN\n", + "116 openai/o3-mini-high OpenAI: o3 Mini High 1739372611 OpenAI o3-mini-high is the same model as [o3-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", + "117 deepseek/deepseek-r1-distill-llama-8b DeepSeek: R1 Distill Llama 8B 1738937718 DeepSeek R1 Distill Llama 8B is a distilled la... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.00000004 0.00000004 0 0 0 0 32000.0 32000.0 False NaN NaN\n", + "118 google/gemini-2.0-flash-001 Google: Gemini 2.0 Flash 1738769413 Gemini Flash 2.0 offers a significantly faster... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.0000001 0.0000004 0 0.0000258 0 0 1000000.0 8192.0 False 0.000000025 0.0000001833\n", + "119 qwen/qwen-vl-plus Qwen: Qwen VL Plus 1738731255 Qwen's Enhanced Large Visual Language Model. S... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.00000021 0.00000063 0 0.0002688 0 0 7500.0 1500.0 False NaN NaN\n", + "120 aion-labs/aion-1.0 AionLabs: Aion-1.0 1738697557 Aion-1.0 is a multi-model system designed for ... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.000004 0.000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "121 aion-labs/aion-1.0-mini AionLabs: Aion-1.0-Mini 1738697107 Aion-1.0-Mini 32B parameter model is a distill... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.0000007 0.0000014 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "122 aion-labs/aion-rp-llama-3.1-8b AionLabs: Aion-RP 1.0 (8B) 1738696718 Aion-RP-Llama-3.1-8B ranks the highest in the ... 32768 None [max_tokens, temperature, top_p] text->text [text] [text] Other None 0.0000002 0.0000002 0 0 0 0 32768.0 32768.0 False NaN NaN\n", + "123 qwen/qwen-vl-max Qwen: Qwen VL Max 1738434304 Qwen VL Max is a visual understanding model wi... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.0000008 0.0000032 0 0.001024 0 0 7500.0 1500.0 False NaN NaN\n", + "124 qwen/qwen-turbo Qwen: Qwen-Turbo 1738410974 Qwen-Turbo, based on Qwen2.5, is a 1M context ... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.00000005 0.0000002 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", + "125 qwen/qwen2.5-vl-72b-instruct:free Qwen: Qwen2.5 VL 72B Instruct (free) 1738410311 Qwen2.5-VL is proficient in recognizing common... 131072 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", + "126 qwen/qwen2.5-vl-72b-instruct Qwen: Qwen2.5 VL 72B Instruct 1738410311 Qwen2.5-VL is proficient in recognizing common... 32000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.00000025 0.00000075 0 0 0 0 32000.0 NaN False NaN NaN\n", + "127 qwen/qwen-plus Qwen: Qwen-Plus 1738409840 Qwen-Plus, based on the Qwen2.5 foundation mod... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000004 0.0000012 0 0 0 0 131072.0 8192.0 False NaN NaN\n", + "128 qwen/qwen-max Qwen: Qwen-Max 1738402289 Qwen-Max, based on Qwen2.5, provides the best ... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000016 0.0000064 0 0 0 0 32768.0 8192.0 False NaN NaN\n", + "129 openai/o3-mini OpenAI: o3 Mini 1738351721 OpenAI o3-mini is a cost-efficient language mo... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", + "130 deepseek/deepseek-r1-distill-qwen-1.5b DeepSeek: R1 Distill Qwen 1.5B 1738328067 DeepSeek R1 Distill Qwen 1.5B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000018 0.00000018 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "131 mistralai/mistral-small-24b-instruct-2501:free Mistral: Mistral Small 3 (free) 1738255409 Mistral Small 3 is a 24B-parameter language mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "132 mistralai/mistral-small-24b-instruct-2501 Mistral: Mistral Small 3 1738255409 Mistral Small 3 is a 24B-parameter language mo... 28000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0.00000006 0.00000012 0 0 0 0 28000.0 14000.0 False NaN NaN\n", + "133 deepseek/deepseek-r1-distill-qwen-32b:free DeepSeek: R1 Distill Qwen 32B (free) 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 16000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16000.0 16000.0 False NaN NaN\n", + "134 deepseek/deepseek-r1-distill-qwen-32b DeepSeek: R1 Distill Qwen 32B 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000012 0.00000018 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "135 deepseek/deepseek-r1-distill-qwen-14b:free DeepSeek: R1 Distill Qwen 14B (free) 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", + "136 deepseek/deepseek-r1-distill-qwen-14b DeepSeek: R1 Distill Qwen 14B 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000015 0.00000015 0 0 0 0 64000.0 64000.0 False NaN NaN\n", + "137 perplexity/sonar-reasoning Perplexity: Sonar Reasoning 1738131107 Sonar Reasoning is a reasoning model provided ... 127000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000001 0.000005 0.005 0 0 0 127000.0 NaN False NaN NaN\n", + "138 perplexity/sonar Perplexity: Sonar 1738013808 Sonar is lightweight, affordable, fast, and si... 127072 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", + "139 liquid/lfm-7b Liquid: LFM 7B 1737806883 LFM-7B, a new best-in-class language model. LF... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000001 0.00000001 0 0 0 0 32768.0 NaN False NaN NaN\n", + "140 liquid/lfm-3b Liquid: LFM 3B 1737806501 Liquid's LFM 3B delivers incredible performanc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000002 0.00000002 0 0 0 0 32768.0 NaN False NaN NaN\n", + "141 deepseek/deepseek-r1-distill-llama-70b:free DeepSeek: R1 Distill Llama 70B (free) 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 8192 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0 0 0 0 0 0 8192.0 4096.0 False NaN NaN\n", + "142 deepseek/deepseek-r1-distill-llama-70b DeepSeek: R1 Distill Llama 70B 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.0000001 0.0000004 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "143 deepseek/deepseek-r1:free DeepSeek: R1 (free) 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, reasoning, include_reasoning, tem... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "144 deepseek/deepseek-r1 DeepSeek: R1 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.0000005 0.00000218 0 0 0 0 163840.0 163840.0 False NaN NaN\n", + "145 minimax/minimax-01 MiniMax: MiniMax-01 1736915462 MiniMax-01 is a combines MiniMax-Text-01 for t... 1000192 None [max_tokens, temperature, top_p] text+image->text [text, image] [text] Other None 0.0000002 0.0000011 0 0 0 0 1000192.0 1000192.0 False NaN NaN\n", + "146 mistralai/codestral-2501 Mistral: Codestral 2501 1736895522 [Mistral](/mistralai)'s cutting-edge language ... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000003 0.0000009 0 0 0 0 262144.0 NaN False NaN NaN\n", + "147 microsoft/phi-4 Microsoft: Phi 4 1736489872 [Microsoft Research](/microsoft) Phi-4 is desi... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000007 0.00000014 0 0 0 0 16384.0 16384.0 False NaN NaN\n", + "148 deepseek/deepseek-chat:free DeepSeek: DeepSeek V3 (free) 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "149 deepseek/deepseek-chat DeepSeek: DeepSeek V3 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.00000038 0.00000089 0 0 0 0 163840.0 163840.0 False NaN NaN\n", + "150 sao10k/l3.3-euryale-70b Sao10K: Llama 3.3 Euryale 70B 1734535928 Euryale L3.3 70B is a model focused on creativ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "151 openai/o1 OpenAI: o1 1734459999 The latest and strongest model family from Ope... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [text, image] [text] GPT None 0.000015 0.00006 0 0.021675 0 0 200000.0 100000.0 True 0.0000075 NaN\n", + "152 eva-unit-01/eva-llama-3.33-70b EVA Llama 3.33 70B 1734377303 EVA Llama 3.33 70b is a roleplay and storywrit... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "153 x-ai/grok-2-vision-1212 xAI: Grok 2 Vision 1212 1734237338 Grok 2 Vision 1212 advances image-based AI wit... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000002 0.00001 0 0.0036 0 0 32768.0 NaN False NaN NaN\n", + "154 x-ai/grok-2-1212 xAI: Grok 2 1212 1734232814 Grok 2 1212 introduces significant enhancement... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000002 0.00001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "155 cohere/command-r7b-12-2024 Cohere: Command R7B (12-2024) 1734158152 Command R7B (12-2024) is a small, fast update ... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.0000000375 0.00000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "156 google/gemini-2.0-flash-exp:free Google: Gemini 2.0 Flash Experimental (free) 1733937523 Gemini Flash 2.0 offers a significantly faster... 1048576 None [max_tokens, temperature, top_p, stop] text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", + "157 meta-llama/llama-3.3-70b-instruct:free Meta: Llama 3.3 70B Instruct (free) 1733506137 The Meta Llama 3.3 multilingual large language... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 8000.0 8000.0 False NaN NaN\n", + "158 meta-llama/llama-3.3-70b-instruct Meta: Llama 3.3 70B Instruct 1733506137 The Meta Llama 3.3 multilingual large language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009 0.00000035 0 0 0 0 131000.0 131000.0 False NaN NaN\n", + "159 amazon/nova-lite-v1 Amazon: Nova Lite 1.0 1733437363 Amazon Nova Lite 1.0 is a very low-cost multim... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.00000006 0.00000024 0 0.00009 0 0 300000.0 5120.0 True NaN NaN\n", + "160 amazon/nova-micro-v1 Amazon: Nova Micro 1.0 1733437237 Amazon Nova Micro 1.0 is a text-only model tha... 128000 None [tools, max_tokens, temperature, top_p, top_k,... text->text [text] [text] Nova None 0.000000035 0.00000014 0 0 0 0 128000.0 5120.0 True NaN NaN\n", + "161 amazon/nova-pro-v1 Amazon: Nova Pro 1.0 1733436303 Amazon Nova Pro 1.0 is a capable multimodal mo... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.0000008 0.0000032 0 0.0012 0 0 300000.0 5120.0 True NaN NaN\n", + "162 qwen/qwq-32b-preview:free Qwen: QwQ 32B Preview (free) 1732754541 QwQ-32B-Preview is an experimental research mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16384.0 NaN False NaN NaN\n", + "163 qwen/qwq-32b-preview Qwen: QwQ 32B Preview 1732754541 QwQ-32B-Preview is an experimental research mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0.00000009 0.00000027 0 0 0 0 32768.0 NaN False NaN NaN\n", + "164 google/learnlm-1.5-pro-experimental:free Google: LearnLM 1.5 Pro Experimental (free) 1732216551 An experimental version of [Gemini 1.5 Pro](/g... 40960 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 40960.0 8192.0 False NaN NaN\n", + "165 eva-unit-01/eva-qwen-2.5-72b EVA Qwen2.5 72B 1732210606 EVA Qwen2.5 72B is a roleplay and storywriting... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "166 openai/gpt-4o-2024-11-20 OpenAI: GPT-4o (2024-11-20) 1732127594 The 2024-11-20 version of GPT-4o offers a leve... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", + "167 mistralai/mistral-large-2411 Mistral Large 2411 1731978685 Mistral Large 2 2411 is an update of [Mistral ... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", + "168 mistralai/mistral-large-2407 Mistral Large 2407 1731978415 This is Mistral AI's flagship model, Mistral L... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", + "169 mistralai/pixtral-large-2411 Mistral: Pixtral Large 2411 1731977388 Pixtral Large is a 124B parameter, open-weight... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.000002 0.000006 0 0.002888 0 0 131072.0 NaN False NaN NaN\n", + "170 x-ai/grok-vision-beta xAI: Grok Vision Beta 1731976624 Grok Vision Beta is xAI's experimental languag... 8192 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000005 0.000015 0 0.009 0 0 8192.0 NaN False NaN NaN\n", + "171 infermatic/mn-inferor-12b Infermatic: Mistral Nemo Inferor 12B 1731464428 Inferor 12B is a merge of top roleplay models,... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "172 qwen/qwen-2.5-coder-32b-instruct:free Qwen2.5 Coder 32B Instruct (free) 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "173 qwen/qwen-2.5-coder-32b-instruct Qwen2.5 Coder 32B Instruct 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000006 0.00000015 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "174 raifle/sorcererlm-8x22b SorcererLM 8x22B 1731105083 SorcererLM is an advanced RP and storytelling ... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral vicuna 0.0000045 0.0000045 0 0 0 0 16000.0 NaN False NaN NaN\n", + "175 eva-unit-01/eva-qwen-2.5-32b EVA Qwen2.5 32B 1731104847 EVA Qwen2.5 32B is a roleplaying/storywriting ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "176 thedrummer/unslopnemo-12b Unslopnemo 12B 1731103448 UnslopNemo v4.1 is the latest addition from th... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000045 0.00000045 0 0 0 0 32000.0 16000.0 False NaN NaN\n", + "177 anthropic/claude-3.5-haiku:beta Anthropic: Claude 3.5 Haiku (self-moderated) 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", + "178 anthropic/claude-3.5-haiku Anthropic: Claude 3.5 Haiku 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", + "179 anthropic/claude-3.5-haiku-20241022:beta Anthropic: Claude 3.5 Haiku (2024-10-22) (self... 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", + "180 anthropic/claude-3.5-haiku-20241022 Anthropic: Claude 3.5 Haiku (2024-10-22) 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", + "181 neversleep/llama-3.1-lumimaid-70b NeverSleep: Lumimaid v0.2 70B 1729555200 Lumimaid v0.2 70B is a finetune of [Llama 3.1 ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000015 0.00000225 0 0 0 0 16384.0 2048.0 False NaN NaN\n", + "182 anthracite-org/magnum-v4-72b Magnum v4 72B 1729555200 This is a series of models designed to replica... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000015 0.00000225 0 0 0 0 16384.0 1024.0 False NaN NaN\n", + "183 anthropic/claude-3.5-sonnet:beta Anthropic: Claude 3.5 Sonnet (self-moderated) 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", + "184 anthropic/claude-3.5-sonnet Anthropic: Claude 3.5 Sonnet 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", + "185 x-ai/grok-beta xAI: Grok Beta 1729382400 Grok Beta is xAI's experimental language model... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000005 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", + "186 mistralai/ministral-8b Mistral: Ministral 8B 1729123200 Ministral 8B is an 8B parameter model featurin... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", + "187 mistralai/ministral-3b Mistral: Ministral 3B 1729123200 Ministral 3B is a 3B parameter model optimized... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000004 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", + "188 qwen/qwen-2.5-7b-instruct:free Qwen2.5 7B Instruct (free) 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 32768.0 False NaN NaN\n", + "189 qwen/qwen-2.5-7b-instruct Qwen2.5 7B Instruct 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000005 0.0000001 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "190 nvidia/llama-3.1-nemotron-70b-instruct NVIDIA: Llama 3.1 Nemotron 70B Instruct 1728950400 NVIDIA's Llama 3.1 Nemotron 70B is a language ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "191 inflection/inflection-3-productivity Inflection: Inflection 3 Productivity 1728604800 Inflection 3 Productivity is optimized for fol... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", + "192 inflection/inflection-3-pi Inflection: Inflection 3 Pi 1728604800 Inflection 3 Pi powers Inflection's [Pi](https... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", + "193 google/gemini-flash-1.5-8b Google: Gemini 1.5 Flash 8B 1727913600 Gemini Flash 1.5 8B is optimized for speed and... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.0000000375 0.00000015 0 0 0 0 1000000.0 8192.0 False 0.00000001 0.0000000583\n", + "194 thedrummer/rocinante-12b Rocinante 12B 1727654400 Rocinante 12B is designed for engaging storyte... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000025 0.0000005 0 0 0 0 32768.0 NaN False NaN NaN\n", + "195 anthracite-org/magnum-v2-72b Magnum v2 72B 1727654400 From the maker of [Goliath](https://openrouter... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000003 0.000003 0 0 0 0 32768.0 NaN False NaN NaN\n", + "196 liquid/lfm-40b Liquid: LFM 40B MoE 1727654400 Liquid's 40.3B Mixture of Experts (MoE) model.... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000015 0.00000015 0 0 0 0 32768.0 NaN False NaN NaN\n", + "197 meta-llama/llama-3.2-3b-instruct:free Meta: Llama 3.2 3B Instruct (free) 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 20000 None [max_tokens, temperature, top_p] text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 20000.0 20000.0 False NaN NaN\n", + "198 meta-llama/llama-3.2-3b-instruct Meta: Llama 3.2 3B Instruct 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000001 0.00000002 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "199 meta-llama/llama-3.2-1b-instruct:free Meta: Llama 3.2 1B Instruct (free) 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131000.0 NaN False NaN NaN\n", + "200 meta-llama/llama-3.2-1b-instruct Meta: Llama 3.2 1B Instruct 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131072 None [max_tokens, temperature, top_p, top_k, stop, ... text->text [text] [text] Llama3 llama3 0.000000005 0.00000001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "201 meta-llama/llama-3.2-90b-vision-instruct Meta: Llama 3.2 90B Vision Instruct 1727222400 The Llama 90B Vision model is a top-tier, 90-b... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.0000012 0.0000012 0 0.001734 0 0 131072.0 2048.0 False NaN NaN\n", + "202 meta-llama/llama-3.2-11b-vision-instruct:free Meta: Llama 3.2 11B Vision Instruct (free) 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", + "203 meta-llama/llama-3.2-11b-vision-instruct Meta: Llama 3.2 11B Vision Instruct 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.000000049 0.000000049 0 0.00007948 0 0 131072.0 16384.0 False NaN NaN\n", + "204 qwen/qwen-2.5-72b-instruct:free Qwen2.5 72B Instruct (free) 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "205 qwen/qwen-2.5-72b-instruct Qwen2.5 72B Instruct 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen chatml 0.00000012 0.00000039 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "206 qwen/qwen-2.5-vl-72b-instruct Qwen: Qwen2.5-VL 72B Instruct 1726617600 Qwen2.5 VL 72B is a multimodal LLM from the Qw... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000006 0.0000006 0 0.000578 0 0 32768.0 NaN False NaN NaN\n", + "207 neversleep/llama-3.1-lumimaid-8b NeverSleep: Lumimaid v0.2 8B 1726358400 Lumimaid v0.2 8B is a finetune of [Llama 3.1 8... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 32768.0 2048.0 False NaN NaN\n", + "208 openai/o1-preview OpenAI: o1-preview 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", + "209 openai/o1-preview-2024-09-12 OpenAI: o1-preview (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", + "210 openai/o1-mini OpenAI: o1-mini 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", + "211 openai/o1-mini-2024-09-12 OpenAI: o1-mini (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", + "212 mistralai/pixtral-12b Mistral: Pixtral 12B 1725926400 The first multi-modal, text+image-to-text mode... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Mistral None 0.0000001 0.0000001 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", + "213 cohere/command-r-plus-08-2024 Cohere: Command R+ (08-2024) 1724976000 command-r-plus-08-2024 is an update of the [Co... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000025 0.00001 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "214 cohere/command-r-08-2024 Cohere: Command R (08-2024) 1724976000 command-r-08-2024 is an update of the [Command... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.00000015 0.0000006 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "215 qwen/qwen-2.5-vl-7b-instruct:free Qwen: Qwen2.5-VL 7B Instruct (free) 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 64000.0 False NaN NaN\n", + "216 qwen/qwen-2.5-vl-7b-instruct Qwen: Qwen2.5-VL 7B Instruct 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000002 0.0000002 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", + "217 sao10k/l3.1-euryale-70b Sao10K: Llama 3.1 Euryale 70B v2.2 1724803200 Euryale L3.1 70B v2.2 is a model focused on cr... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "218 google/gemini-flash-1.5-8b-exp Google: Gemini 1.5 Flash 8B Experimental 1724803200 Gemini Flash 1.5 8B Experimental is an experim... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", + "219 microsoft/phi-3.5-mini-128k-instruct Microsoft: Phi-3.5 Mini 128K Instruct 1724198400 Phi-3.5 models are lightweight, state-of-the-a... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.00000003 0.00000009 0 0 0 0 131072.0 NaN False NaN NaN\n", + "220 nousresearch/hermes-3-llama-3.1-70b Nous: Hermes 3 70B Instruct 1723939200 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "221 nousresearch/hermes-3-llama-3.1-405b Nous: Hermes 3 405B Instruct 1723766400 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.0000008 0.0000008 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "222 openai/chatgpt-4o-latest OpenAI: ChatGPT-4o 1723593600 OpenAI ChatGPT 4o is continually updated by Op... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 16384.0 True NaN NaN\n", + "223 sao10k/l3-lunaris-8b Sao10K: Llama 3 8B Lunaris 1723507200 Lunaris 8B is a versatile generalist and rolep... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000005 0 0 0 0 8192.0 NaN False NaN NaN\n", + "224 aetherwiing/mn-starcannon-12b Aetherwiing: Starcannon 12B 1723507200 Starcannon 12B v2 is a creative roleplay and s... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "225 openai/gpt-4o-2024-08-06 OpenAI: GPT-4o (2024-08-06) 1722902400 The 2024-08-06 version of GPT-4o offers improv... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", + "226 meta-llama/llama-3.1-405b:free Meta: Llama 3.1 405B (base) (free) 1722556800 Meta's latest class of model (Llama 3.1) launc... 64000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", + "227 meta-llama/llama-3.1-405b Meta: Llama 3.1 405B (base) 1722556800 Meta's latest class of model (Llama 3.1) launc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.000002 0.000002 0 0 0 0 32768.0 NaN False NaN NaN\n", + "228 nothingiisreal/mn-celeste-12b Mistral Nemo 12B Celeste 1722556800 A specialized story writing and roleplaying mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "229 perplexity/llama-3.1-sonar-small-128k-online Perplexity: Llama 3.1 Sonar 8B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.0000002 0.0000002 0.005 0 0 0 127072.0 NaN False NaN NaN\n", + "230 perplexity/llama-3.1-sonar-large-128k-online Perplexity: Llama 3.1 Sonar 70B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", + "231 meta-llama/llama-3.1-8b-instruct:free Meta: Llama 3.1 8B Instruct (free) 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 4096.0 False NaN NaN\n", + "232 meta-llama/llama-3.1-8b-instruct Meta: Llama 3.1 8B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000003 0 0 0 0 16384.0 16384.0 False NaN NaN\n", + "233 meta-llama/llama-3.1-405b-instruct Meta: Llama 3.1 405B Instruct 1721692800 The highly anticipated 400B class of Llama3 is... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000008 0.0000008 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "234 meta-llama/llama-3.1-70b-instruct Meta: Llama 3.1 70B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000001 0.00000028 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "235 mistralai/codestral-mamba Mistral: Codestral Mamba 1721347200 A 7.3B parameter Mamba-based model designed fo... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 262144.0 NaN False NaN NaN\n", + "236 mistralai/mistral-nemo:free Mistral: Mistral Nemo (free) 1721347200 A 12B parameter model with a 128k token contex... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 128000.0 128000.0 False NaN NaN\n", + "237 mistralai/mistral-nemo Mistral: Mistral Nemo 1721347200 A 12B parameter model with a 128k token contex... 98304 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000003 0.00000007 0 0 0 0 98304.0 49152.0 False NaN NaN\n", + "238 openai/gpt-4o-mini OpenAI: GPT-4o-mini 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.000217 0 0 128000.0 16384.0 True 0.000000075 NaN\n", + "239 openai/gpt-4o-mini-2024-07-18 OpenAI: GPT-4o-mini (2024-07-18) 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.007225 0 0 128000.0 16384.0 True 0.000000075 NaN\n", + "240 google/gemma-2-27b-it Google: Gemma 2 27B 1720828800 Gemma 2 27B by Google is an open model built f... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.0000001 0.0000003 0 0 0 0 8192.0 NaN False NaN NaN\n", + "241 alpindale/magnum-72b Magnum 72B 1720656000 From the maker of [Goliath](https://openrouter... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "242 google/gemma-2-9b-it:free Google: Gemma 2 9B (free) 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0 0 0 0 0 0 8192.0 8192.0 False NaN NaN\n", + "243 google/gemma-2-9b-it Google: Gemma 2 9B 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.00000002 0.00000006 0 0 0 0 8192.0 NaN False NaN NaN\n", + "244 01-ai/yi-large 01.AI: Yi Large 1719273600 The Yi Large model was designed by 01.AI with ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Yi None 0.000003 0.000003 0 0 0 0 32768.0 4096.0 False NaN NaN\n", + "245 ai21/jamba-instruct AI21: Jamba Instruct 1719273600 The Jamba-Instruct model, introduced by AI21 L... 256000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000005 0.0000007 0 0 0 0 256000.0 4096.0 False NaN NaN\n", + "246 anthropic/claude-3.5-sonnet-20240620:beta Anthropic: Claude 3.5 Sonnet (2024-06-20) (sel... 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", + "247 anthropic/claude-3.5-sonnet-20240620 Anthropic: Claude 3.5 Sonnet (2024-06-20) 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", + "248 sao10k/l3-euryale-70b Sao10k: Llama 3 Euryale 70B v2.1 1718668800 Euryale 70B v2.1 is a model focused on creativ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000148 0.00000148 0 0 0 0 8192.0 8192.0 False NaN NaN\n", + "249 cognitivecomputations/dolphin-mixtral-8x22b Dolphin 2.9.2 Mixtral 8x22B 🐬 1717804800 Dolphin 2.9 is designed for instruction follow... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000009 0.0000009 0 0 0 0 16000.0 NaN False NaN NaN\n", + "250 qwen/qwen-2-72b-instruct Qwen 2 72B Instruct 1717718400 Qwen2 72B is a transformer-based model that ex... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000009 0.0000009 0 0 0 0 32768.0 4096.0 False NaN NaN\n", + "251 mistralai/mistral-7b-instruct:free Mistral: Mistral 7B Instruct (free) 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "252 mistralai/mistral-7b-instruct Mistral: Mistral 7B Instruct 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "253 nousresearch/hermes-2-pro-llama-3-8b NousResearch: Hermes 2 Pro - Llama-3 8B 1716768000 Hermes 2 Pro is an upgraded, retrained version... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.000000025 0.00000004 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "254 mistralai/mistral-7b-instruct-v0.3 Mistral: Mistral 7B Instruct v0.3 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "255 microsoft/phi-3-mini-128k-instruct Microsoft: Phi-3 Mini 128K Instruct 1716681600 Phi-3 Mini is a powerful 3.8B parameter model ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", + "256 microsoft/phi-3-medium-128k-instruct Microsoft: Phi-3 Medium 128K Instruct 1716508800 Phi-3 128K Medium is a powerful 14-billion par... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000003 0 0 0 0 131072.0 NaN False NaN NaN\n", + "257 neversleep/llama-3-lumimaid-70b NeverSleep: Llama 3 Lumimaid 70B 1715817600 The NeverSleep team is back, with a Llama 3 70... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 8192.0 4096.0 False NaN NaN\n", + "258 deepseek/deepseek-coder DeepSeek-Coder-V2 1715644800 DeepSeek-Coder-V2, an open-source Mixture-of-E... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000004 0.00000012 0 0 0 0 128000.0 NaN False NaN NaN\n", + "259 google/gemini-flash-1.5 Google: Gemini 1.5 Flash 1715644800 Gemini 1.5 Flash is a foundation model that pe... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.000000075 0.0000003 0 0.00004 0 0 1000000.0 8192.0 False 0.00000001875 0.0000001583\n", + "260 openai/gpt-4o OpenAI: GPT-4o 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", + "261 openai/gpt-4o:extended OpenAI: GPT-4o (extended) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000006 0.000018 0 0.007225 0 0 128000.0 64000.0 True NaN NaN\n", + "262 meta-llama/llama-guard-2-8b Meta: LlamaGuard 2 8B 1715558400 This safeguard model has 8B parameters and is ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.0000002 0.0000002 0 0 0 0 8192.0 NaN False NaN NaN\n", + "263 openai/gpt-4o-2024-05-13 OpenAI: GPT-4o (2024-05-13) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 4096.0 True NaN NaN\n", + "264 allenai/olmo-7b-instruct OLMo 7B Instruct 1715299200 OLMo 7B Instruct by the Allen Institute for AI... 2048 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other zephyr 0.00000008 0.00000024 0 0 0 0 2048.0 NaN False NaN NaN\n", + "265 neversleep/llama-3-lumimaid-8b:extended NeverSleep: Llama 3 Lumimaid 8B (extended) 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", + "266 neversleep/llama-3-lumimaid-8b NeverSleep: Llama 3 Lumimaid 8B 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", + "267 sao10k/fimbulvetr-11b-v2 Fimbulvetr 11B v2 1713657600 Creative writing model, routed with permission... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "268 meta-llama/llama-3-8b-instruct Meta: Llama 3 8B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, top_k, seed, ... text->text [text] [text] Llama3 llama3 0.00000003 0.00000006 0 0 0 0 8192.0 16384.0 False NaN NaN\n", + "269 meta-llama/llama-3-70b-instruct Meta: Llama 3 70B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000003 0.0000004 0 0 0 0 8192.0 16384.0 False NaN NaN\n", + "270 mistralai/mixtral-8x22b-instruct Mistral: Mixtral 8x22B Instruct 1713312000 Mistral's official instruct fine-tuned version... 65536 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.0000004 0.0000012 0 0 0 0 65536.0 NaN False NaN NaN\n", + "271 microsoft/wizardlm-2-8x22b WizardLM-2 8x22B 1713225600 WizardLM-2 8x22B is Microsoft AI's most advanc... 65536 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Mistral vicuna 0.0000005 0.0000005 0 0 0 0 65536.0 16384.0 False NaN NaN\n", + "272 google/gemini-pro-1.5 Google: Gemini 1.5 Pro 1712620800 Google's latest multimodal model, supports ima... 2000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.00000125 0.000005 0 0.0006575 0 0 2000000.0 8192.0 False NaN NaN\n", + "273 openai/gpt-4-turbo OpenAI: GPT-4 Turbo 1712620800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.00001 0.00003 0 0.01445 0 0 128000.0 4096.0 True NaN NaN\n", + "274 cohere/command-r-plus Cohere: Command R+ 1712188800 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "275 cohere/command-r-plus-04-2024 Cohere: Command R+ (04-2024) 1712016000 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "276 sophosympatheia/midnight-rose-70b Midnight Rose 70B 1711065600 A merge with a complex family tree, this model... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000008 0.0000008 0 0 0 0 4096.0 NaN False NaN NaN\n", + "277 cohere/command Cohere: Command 1710374400 Command is an instruction-following conversati... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.000001 0.000002 0 0 0 0 4096.0 4000.0 False NaN NaN\n", + "278 cohere/command-r Cohere: Command R 1710374400 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "279 anthropic/claude-3-haiku:beta Anthropic: Claude 3 Haiku (self-moderated) 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 False 0.00000003 0.0000003\n", + "280 anthropic/claude-3-haiku Anthropic: Claude 3 Haiku 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 True 0.00000003 0.0000003\n", + "281 anthropic/claude-3-opus:beta Anthropic: Claude 3 Opus (self-moderated) 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 False 0.0000015 0.00001875\n", + "282 anthropic/claude-3-opus Anthropic: Claude 3 Opus 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 True 0.0000015 0.00001875\n", + "283 anthropic/claude-3-sonnet:beta Anthropic: Claude 3 Sonnet (self-moderated) 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 False 0.0000003 0.00000375\n", + "284 anthropic/claude-3-sonnet Anthropic: Claude 3 Sonnet 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 True 0.0000003 0.00000375\n", + "285 cohere/command-r-03-2024 Cohere: Command R (03-2024) 1709341200 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "286 mistralai/mistral-large Mistral Large 1708905600 This is Mistral AI's flagship model, Mistral L... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 128000.0 NaN False NaN NaN\n", + "287 openai/gpt-3.5-turbo-0613 OpenAI: GPT-3.5 Turbo (older v0613) 1706140800 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 4095 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 4095.0 4096.0 False NaN NaN\n", + "288 openai/gpt-4-turbo-preview OpenAI: GPT-4 Turbo Preview 1706140800 The preview GPT-4 model with improved instruct... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", + "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo Nous: Hermes 2 Mixtral 8x7B DPO 1705363200 Nous Hermes 2 Mixtral 8x7B DPO is the new flag... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000006 0.0000006 0 0 0 0 32768.0 2048.0 False NaN NaN\n", + "290 mistralai/mistral-medium Mistral Medium 1704844800 This is Mistral AI's closed-source, medium-sid... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000275 0.0000081 0 0 0 0 32768.0 NaN False NaN NaN\n", + "291 mistralai/mistral-small Mistral Small 1704844800 With 22 billion parameters, Mistral Small v24.... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", + "292 mistralai/mistral-tiny Mistral Tiny 1704844800 Note: This model is being deprecated. Recommen... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 32768.0 NaN False NaN NaN\n", + "293 mistralai/mistral-7b-instruct-v0.2 Mistral: Mistral 7B Instruct v0.2 1703721600 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000002 0.0000002 0 0 0 0 32768.0 NaN False NaN NaN\n", + "294 mistralai/mixtral-8x7b-instruct Mistral: Mixtral 8x7B Instruct 1702166400 Mixtral 8x7B Instruct is a pretrained generati... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000008 0.00000024 0 0 0 0 32768.0 NaN False NaN NaN\n", + "295 neversleep/noromaid-20b Noromaid 20B 1700956800 A collab between IkariDev and Undi. This merge... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.00000075 0.0000015 0 0 0 0 8192.0 2048.0 False NaN NaN\n", + "296 anthropic/claude-2.1:beta Anthropic: Claude v2.1 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", + "297 anthropic/claude-2.1 Anthropic: Claude v2.1 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", + "298 anthropic/claude-2:beta Anthropic: Claude v2 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", + "299 anthropic/claude-2 Anthropic: Claude v2 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", + "300 undi95/toppy-m-7b Toppy M 7B 1699574400 A wild 7B parameter model that merges several ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "301 alpindale/goliath-120b Goliath 120B 1699574400 A large LLM created by combining two fine-tune... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000065625 0.000009375 0 0 0 0 6144.0 512.0 False NaN NaN\n", + "302 openrouter/auto Auto Router 1699401600 Your prompt will be processed by a meta-model ... 2000000 None [] text->text [text] [text] Router None -1 -1 NaN NaN NaN NaN NaN NaN False NaN NaN\n", + "303 openai/gpt-3.5-turbo-1106 OpenAI: GPT-3.5 Turbo 16k (older v1106) 1699228800 An older GPT-3.5 Turbo model with improved ins... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "304 openai/gpt-4-1106-preview OpenAI: GPT-4 Turbo (older v1106) 1699228800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", + "305 jondurbin/airoboros-l2-70b Airoboros 70B 1698537600 A Llama 2 70B fine-tune using synthetic data (... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000005 0.0000005 0 0 0 0 4096.0 NaN False NaN NaN\n", + "306 openai/gpt-3.5-turbo-instruct OpenAI: GPT-3.5 Turbo Instruct 1695859200 This model is a variant of GPT-3.5 Turbo tuned... 4095 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] GPT chatml 0.0000015 0.000002 0 0 0 0 4095.0 4096.0 True NaN NaN\n", + "307 mistralai/mistral-7b-instruct-v0.1 Mistral: Mistral 7B Instruct v0.1 1695859200 A 7.3B parameter model that outperforms Llama ... 2824 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000011 0.00000019 0 0 0 0 2824.0 NaN False NaN NaN\n", + "308 pygmalionai/mythalion-13b Pygmalion: Mythalion 13B 1693612800 A blend of the new Pygmalion-13b and MythoMax.... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 8192.0 1024.0 False NaN NaN\n", + "309 openai/gpt-3.5-turbo-16k OpenAI: GPT-3.5 Turbo 16k 1693180800 This model offers four times the context lengt... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000003 0.000004 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "310 openai/gpt-4-32k OpenAI: GPT-4 32k 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", + "311 openai/gpt-4-32k-0314 OpenAI: GPT-4 32k (older v0314) 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", + "312 mancer/weaver Mancer: Weaver (alpha) 1690934400 An attempt to recreate Claude-style verbosity,... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000001125 0.000001125 0 0 0 0 8000.0 1000.0 False NaN NaN\n", + "313 anthropic/claude-2.0:beta Anthropic: Claude v2.0 (self-moderated) 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 False NaN NaN\n", + "314 anthropic/claude-2.0 Anthropic: Claude v2.0 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 True NaN NaN\n", + "315 undi95/remm-slerp-l2-13b ReMM SLERP 13B 1689984000 A recreation trial of the original MythoMax-L2... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 6144.0 1024.0 False NaN NaN\n", + "316 gryphe/mythomax-l2-13b MythoMax 13B 1688256000 One of the highest performing and most popular... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000000065 0.000000065 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "317 meta-llama/llama-2-70b-chat Meta: Llama 2 70B Chat 1687219200 The flagship, 70 billion parameter language mo... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 llama2 0.0000009 0.0000009 0 0 0 0 4096.0 NaN False NaN NaN\n", + "318 openai/gpt-3.5-turbo OpenAI: GPT-3.5 Turbo 1685232000 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "319 openai/gpt-3.5-turbo-0125 OpenAI: GPT-3.5 Turbo 16k 1685232000 The latest GPT-3.5 Turbo model with improved i... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "320 openai/gpt-4 OpenAI: GPT-4 1685232000 OpenAI's flagship model, GPT-4 is a large-scal... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN\n", + "321 openai/gpt-4-0314 OpenAI: GPT-4 (older v0314) 1685232000 GPT-4-0314 is the first version of GPT-4 relea... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Normalize the nested JSON\n", + "df = pd.json_normalize(val, sep=\"_\")\n", + "df\n", + "# View the resulting DataFrame\n", + "# print(df.T) # Transpose just for readable vertical inspection" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id mistralai/mistral-medium-3\n", + "name Mistral: Mistral Medium 3\n", + "created 1746627341\n", + "description Mistral Medium 3 is a high-performance enterpr...\n", + "context_length 131072\n", + "per_request_limits None\n", + "supported_parameters [tools, tool_choice, max_tokens, temperature, ...\n", + "architecture_modality text+image->text\n", + "architecture_input_modalities [text, image]\n", + "architecture_output_modalities [text]\n", + "architecture_tokenizer Mistral\n", + "architecture_instruct_type None\n", + "pricing_prompt 0.0000004\n", + "pricing_completion 0.000002\n", + "pricing_request 0\n", + "pricing_image 0\n", + "pricing_web_search 0\n", + "pricing_internal_reasoning 0\n", + "top_provider_context_length 131072.0\n", + "top_provider_max_completion_tokens NaN\n", + "top_provider_is_moderated False\n", + "pricing_input_cache_read NaN\n", + "pricing_input_cache_write NaN\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0].T" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "col_names = [\"id\", \"context_length\", \"pricing_prompt\", \"pricing_completion\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id object\n", + "name object\n", + "created int64\n", + "description object\n", + "context_length int64\n", + "per_request_limits object\n", + "supported_parameters object\n", + "architecture_modality object\n", + "architecture_input_modalities object\n", + "architecture_output_modalities object\n", + "architecture_tokenizer object\n", + "architecture_instruct_type object\n", + "pricing_prompt object\n", + "pricing_completion object\n", + "pricing_request object\n", + "pricing_image object\n", + "pricing_web_search object\n", + "pricing_internal_reasoning object\n", + "top_provider_context_length float64\n", + "top_provider_max_completion_tokens float64\n", + "top_provider_is_moderated bool\n", + "pricing_input_cache_read object\n", + "pricing_input_cache_write object\n", + "dtype: object" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.38819875776397517, 'type': 'is_bool'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.5962732919254659, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 1.0, 'is_bool': 1.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.13043478260869565, 'is_bool': 0.0, 'is_string': 0.13043478260869565, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.07142857142857142, 'is_bool': 0.0, 'is_string': 0.07142857142857142, 'type': 'is_numeric'}\n" + ] + } + ], + "source": [ + "for col in df.columns:\n", + " print(hpandas.infer_column_types(df[col]))" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
is_numericis_boolis_string
id0.0000000.01.000000
name0.0000000.01.000000
created1.0000000.00.000000
description0.0000000.01.000000
context_length1.0000000.00.000000
per_request_limits0.0000000.00.000000
supported_parameters0.0000000.00.000000
architecture_modality0.0000000.01.000000
architecture_input_modalities0.0000000.00.000000
architecture_output_modalities0.0000000.00.000000
architecture_tokenizer0.0000000.01.000000
architecture_instruct_type0.0000000.00.388199
pricing_prompt1.0000000.01.000000
pricing_completion1.0000000.01.000000
pricing_request0.9968940.00.996894
pricing_image0.9968940.00.996894
pricing_web_search0.9968940.00.996894
pricing_internal_reasoning0.9968940.00.996894
top_provider_context_length0.9968940.00.000000
top_provider_max_completion_tokens0.5962730.00.000000
top_provider_is_moderated1.0000001.00.000000
pricing_input_cache_read0.1304350.00.130435
pricing_input_cache_write0.0714290.00.071429
\n", + "
" + ], + "text/plain": [ + " is_numeric is_bool is_string\n", + "id 0.000000 0.0 1.000000\n", + "name 0.000000 0.0 1.000000\n", + "created 1.000000 0.0 0.000000\n", + "description 0.000000 0.0 1.000000\n", + "context_length 1.000000 0.0 0.000000\n", + "per_request_limits 0.000000 0.0 0.000000\n", + "supported_parameters 0.000000 0.0 0.000000\n", + "architecture_modality 0.000000 0.0 1.000000\n", + "architecture_input_modalities 0.000000 0.0 0.000000\n", + "architecture_output_modalities 0.000000 0.0 0.000000\n", + "architecture_tokenizer 0.000000 0.0 1.000000\n", + "architecture_instruct_type 0.000000 0.0 0.388199\n", + "pricing_prompt 1.000000 0.0 1.000000\n", + "pricing_completion 1.000000 0.0 1.000000\n", + "pricing_request 0.996894 0.0 0.996894\n", + "pricing_image 0.996894 0.0 0.996894\n", + "pricing_web_search 0.996894 0.0 0.996894\n", + "pricing_internal_reasoning 0.996894 0.0 0.996894\n", + "top_provider_context_length 0.996894 0.0 0.000000\n", + "top_provider_max_completion_tokens 0.596273 0.0 0.000000\n", + "top_provider_is_moderated 1.000000 1.0 0.000000\n", + "pricing_input_cache_read 0.130435 0.0 0.130435\n", + "pricing_input_cache_write 0.071429 0.0 0.071429" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.apply(lambda x: pd.Series(hpandas.infer_column_types(x))).T" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
is_numericis_boolis_stringtype
id0.00.01.0is_bool
name0.00.01.0is_bool
created1.00.00.0is_numeric
description0.00.01.0is_bool
context_length1.00.00.0is_numeric
per_request_limits0.00.00.0is_bool
supported_parameters0.00.00.0is_bool
architecture_modality0.00.01.0is_bool
architecture_input_modalities0.00.00.0is_bool
architecture_output_modalities0.00.00.0is_bool
architecture_tokenizer0.00.01.0is_bool
architecture_instruct_type0.00.00.388199is_bool
pricing_prompt1.00.01.0is_numeric
pricing_completion1.00.01.0is_numeric
pricing_request0.9968940.00.996894is_numeric
pricing_image0.9968940.00.996894is_numeric
pricing_web_search0.9968940.00.996894is_numeric
pricing_internal_reasoning0.9968940.00.996894is_numeric
top_provider_context_length0.9968940.00.0is_numeric
top_provider_max_completion_tokens0.5962730.00.0is_numeric
top_provider_is_moderated1.01.00.0is_bool
pricing_input_cache_read0.1304350.00.130435is_numeric
pricing_input_cache_write0.0714290.00.071429is_numeric
\n", + "
" + ], + "text/plain": [ + " is_numeric is_bool is_string type\n", + "id 0.0 0.0 1.0 is_bool\n", + "name 0.0 0.0 1.0 is_bool\n", + "created 1.0 0.0 0.0 is_numeric\n", + "description 0.0 0.0 1.0 is_bool\n", + "context_length 1.0 0.0 0.0 is_numeric\n", + "per_request_limits 0.0 0.0 0.0 is_bool\n", + "supported_parameters 0.0 0.0 0.0 is_bool\n", + "architecture_modality 0.0 0.0 1.0 is_bool\n", + "architecture_input_modalities 0.0 0.0 0.0 is_bool\n", + "architecture_output_modalities 0.0 0.0 0.0 is_bool\n", + "architecture_tokenizer 0.0 0.0 1.0 is_bool\n", + "architecture_instruct_type 0.0 0.0 0.388199 is_bool\n", + "pricing_prompt 1.0 0.0 1.0 is_numeric\n", + "pricing_completion 1.0 0.0 1.0 is_numeric\n", + "pricing_request 0.996894 0.0 0.996894 is_numeric\n", + "pricing_image 0.996894 0.0 0.996894 is_numeric\n", + "pricing_web_search 0.996894 0.0 0.996894 is_numeric\n", + "pricing_internal_reasoning 0.996894 0.0 0.996894 is_numeric\n", + "top_provider_context_length 0.996894 0.0 0.0 is_numeric\n", + "top_provider_max_completion_tokens 0.596273 0.0 0.0 is_numeric\n", + "top_provider_is_moderated 1.0 1.0 0.0 is_bool\n", + "pricing_input_cache_read 0.130435 0.0 0.130435 is_numeric\n", + "pricing_input_cache_write 0.071429 0.0 0.071429 is_numeric" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hpandas.infer_column_types_df(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 True\n", + "1 True\n", + "2 True\n", + "3 True\n", + "4 True\n", + "5 True\n", + "6 True\n", + "7 True\n", + "8 True\n", + "9 True\n", + "10 True\n", + "11 True\n", + "12 True\n", + "13 True\n", + "14 True\n", + "15 True\n", + "16 True\n", + "17 True\n", + "18 True\n", + "19 True\n", + "20 True\n", + "21 True\n", + "22 True\n", + "23 True\n", + "24 True\n", + "25 True\n", + "26 True\n", + "27 True\n", + "28 True\n", + "29 True\n", + "30 True\n", + "31 True\n", + "32 True\n", + "33 True\n", + "34 True\n", + "35 True\n", + "36 True\n", + "37 True\n", + "38 True\n", + "39 True\n", + "40 True\n", + "41 True\n", + "42 True\n", + "43 True\n", + "44 True\n", + "45 True\n", + "46 True\n", + "47 True\n", + "48 True\n", + "49 True\n", + "50 True\n", + "51 True\n", + "52 True\n", + "53 True\n", + "54 True\n", + "55 True\n", + "56 True\n", + "57 True\n", + "58 True\n", + "59 True\n", + "60 True\n", + "61 True\n", + "62 True\n", + "63 True\n", + "64 True\n", + "65 True\n", + "66 True\n", + "67 True\n", + "68 True\n", + "69 True\n", + "70 True\n", + "71 True\n", + "72 True\n", + "73 True\n", + "74 True\n", + "75 True\n", + "76 True\n", + "77 True\n", + "78 True\n", + "79 True\n", + "80 True\n", + "81 True\n", + "82 True\n", + "83 True\n", + "84 True\n", + "85 True\n", + "86 True\n", + "87 True\n", + "88 True\n", + "89 True\n", + "90 True\n", + "91 True\n", + "92 True\n", + "93 True\n", + "94 True\n", + "95 True\n", + "96 True\n", + "97 True\n", + "98 True\n", + "99 True\n", + "100 True\n", + "101 True\n", + "102 True\n", + "103 True\n", + "104 True\n", + "105 True\n", + "106 True\n", + "107 True\n", + "108 True\n", + "109 True\n", + "110 True\n", + "111 True\n", + "112 True\n", + "113 True\n", + "114 True\n", + "115 True\n", + "116 True\n", + "117 True\n", + "118 True\n", + "119 True\n", + "120 True\n", + "121 True\n", + "122 True\n", + "123 True\n", + "124 True\n", + "125 True\n", + "126 True\n", + "127 True\n", + "128 True\n", + "129 True\n", + "130 True\n", + "131 True\n", + "132 True\n", + "133 True\n", + "134 True\n", + "135 True\n", + "136 True\n", + "137 True\n", + "138 True\n", + "139 True\n", + "140 True\n", + "141 True\n", + "142 True\n", + "143 True\n", + "144 True\n", + "145 True\n", + "146 True\n", + "147 True\n", + "148 True\n", + "149 True\n", + "150 True\n", + "151 True\n", + "152 True\n", + "153 True\n", + "154 True\n", + "155 True\n", + "156 True\n", + "157 True\n", + "158 True\n", + "159 True\n", + "160 True\n", + "161 True\n", + "162 True\n", + "163 True\n", + "164 True\n", + "165 True\n", + "166 True\n", + "167 True\n", + "168 True\n", + "169 True\n", + "170 True\n", + "171 True\n", + "172 True\n", + "173 True\n", + "174 True\n", + "175 True\n", + "176 True\n", + "177 True\n", + "178 True\n", + "179 True\n", + "180 True\n", + "181 True\n", + "182 True\n", + "183 True\n", + "184 True\n", + "185 True\n", + "186 True\n", + "187 True\n", + "188 True\n", + "189 True\n", + "190 True\n", + "191 True\n", + "192 True\n", + "193 True\n", + "194 True\n", + "195 True\n", + "196 True\n", + "197 True\n", + "198 True\n", + "199 True\n", + "200 True\n", + "201 True\n", + "202 True\n", + "203 True\n", + "204 True\n", + "205 True\n", + "206 True\n", + "207 True\n", + "208 True\n", + "209 True\n", + "210 True\n", + "211 True\n", + "212 True\n", + "213 True\n", + "214 True\n", + "215 True\n", + "216 True\n", + "217 True\n", + "218 True\n", + "219 True\n", + "220 True\n", + "221 True\n", + "222 True\n", + "223 True\n", + "224 True\n", + "225 True\n", + "226 True\n", + "227 True\n", + "228 True\n", + "229 True\n", + "230 True\n", + "231 True\n", + "232 True\n", + "233 True\n", + "234 True\n", + "235 True\n", + "236 True\n", + "237 True\n", + "238 True\n", + "239 True\n", + "240 True\n", + "241 True\n", + "242 True\n", + "243 True\n", + "244 True\n", + "245 True\n", + "246 True\n", + "247 True\n", + "248 True\n", + "249 True\n", + "250 True\n", + "251 True\n", + "252 True\n", + "253 True\n", + "254 True\n", + "255 True\n", + "256 True\n", + "257 True\n", + "258 True\n", + "259 True\n", + "260 True\n", + "261 True\n", + "262 True\n", + "263 True\n", + "264 True\n", + "265 True\n", + "266 True\n", + "267 True\n", + "268 True\n", + "269 True\n", + "270 True\n", + "271 True\n", + "272 True\n", + "273 True\n", + "274 True\n", + "275 True\n", + "276 True\n", + "277 True\n", + "278 True\n", + "279 True\n", + "280 True\n", + "281 True\n", + "282 True\n", + "283 True\n", + "284 True\n", + "285 True\n", + "286 True\n", + "287 True\n", + "288 True\n", + "289 True\n", + "290 True\n", + "291 True\n", + "292 True\n", + "293 True\n", + "294 True\n", + "295 True\n", + "296 True\n", + "297 True\n", + "298 True\n", + "299 True\n", + "300 True\n", + "301 True\n", + "302 False\n", + "303 True\n", + "304 True\n", + "305 True\n", + "306 True\n", + "307 True\n", + "308 True\n", + "309 True\n", + "310 True\n", + "311 True\n", + "312 True\n", + "313 True\n", + "314 True\n", + "315 True\n", + "316 True\n", + "317 True\n", + "318 True\n", + "319 True\n", + "320 True\n", + "321 True\n", + "Name: pricing_request, dtype: bool" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(df[\"pricing_request\"], errors=\"coerce\").notna()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0.000002\n", + "1 0.00001\n", + "2 0.00000085\n", + "3 0.00000018\n", + "4 0.0000033\n", + "5 0.0000012\n", + "6 0.0000008\n", + "7 0.0000008\n", + "8 0.00000075\n", + "9 0\n", + "10 0.00000035\n", + "11 0\n", + "12 0\n", + "13 0.000001\n", + "14 0\n", + "15 0\n", + "16 0\n", + "17 0\n", + "18 0\n", + "19 0.00000218\n", + "20 0.00000005\n", + "21 0\n", + "22 0.0000003\n", + "23 0\n", + "24 0.000000138\n", + "25 0\n", + "26 0.00000024\n", + "27 0\n", + "28 0.0000003\n", + "29 0\n", + "30 0.000002\n", + "31 0\n", + "32 0.00000024\n", + "33 0\n", + "34 0\n", + "35 0\n", + "36 0\n", + "37 0.00000024\n", + "38 0\n", + "39 0.00000024\n", + "40 0.0000006\n", + "41 0.0000035\n", + "42 0.0000044\n", + "43 0.00004\n", + "44 0.0000044\n", + "45 0\n", + "46 0.00000003\n", + "47 0.000008\n", + "48 0.0000016\n", + "49 0.0000004\n", + "50 0.0000012\n", + "51 0.0000012\n", + "52 0\n", + "53 0\n", + "54 0\n", + "55 0.0000005\n", + "56 0.000015\n", + "57 0\n", + "58 0.0000004\n", + "59 0\n", + "60 0\n", + "61 0.0000006\n", + "62 0\n", + "63 0.0000003\n", + "64 0.0000034\n", + "65 0.0000001\n", + "66 0\n", + "67 0.00000018\n", + "68 0.00000088\n", + "69 0\n", + "70 0\n", + "71 0\n", + "72 0\n", + "73 0\n", + "74 0.0000009\n", + "75 0\n", + "76 0.00000088\n", + "77 0\n", + "78 0.0006\n", + "79 0\n", + "80 0.00000015\n", + "81 0\n", + "82 0\n", + "83 0\n", + "84 0.00000004\n", + "85 0.000008\n", + "86 0.0000004\n", + "87 0\n", + "88 0.0000001\n", + "89 0.00001\n", + "90 0.0000006\n", + "91 0.00001\n", + "92 0\n", + "93 0\n", + "94 0.0000002\n", + "95 0.000001\n", + "96 0.0000008\n", + "97 0.0000001\n", + "98 0.000008\n", + "99 0.000015\n", + "100 0.000008\n", + "101 0\n", + "102 0\n", + "103 0.0000002\n", + "104 0\n", + "105 0\n", + "106 0.00015\n", + "107 0.0000003\n", + "108 0.000015\n", + "109 0.000015\n", + "110 0.000015\n", + "111 0.000008\n", + "112 0.0000006\n", + "113 0\n", + "114 0\n", + "115 0.00000006\n", + "116 0.0000044\n", + "117 0.00000004\n", + "118 0.0000004\n", + "119 0.00000063\n", + "120 0.000008\n", + "121 0.0000014\n", + "122 0.0000002\n", + "123 0.0000032\n", + "124 0.0000002\n", + "125 0\n", + "126 0.00000075\n", + "127 0.0000012\n", + "128 0.0000064\n", + "129 0.0000044\n", + "130 0.00000018\n", + "131 0\n", + "132 0.00000012\n", + "133 0\n", + "134 0.00000018\n", + "135 0\n", + "136 0.00000015\n", + "137 0.000005\n", + "138 0.000001\n", + "139 0.00000001\n", + "140 0.00000002\n", + "141 0\n", + "142 0.0000004\n", + "143 0\n", + "144 0.00000218\n", + "145 0.0000011\n", + "146 0.0000009\n", + "147 0.00000014\n", + "148 0\n", + "149 0.00000089\n", + "150 0.0000008\n", + "151 0.00006\n", + "152 0.000006\n", + "153 0.00001\n", + "154 0.00001\n", + "155 0.00000015\n", + "156 0\n", + "157 0\n", + "158 0.00000035\n", + "159 0.00000024\n", + "160 0.00000014\n", + "161 0.0000032\n", + "162 0\n", + "163 0.00000027\n", + "164 0\n", + "165 0.000006\n", + "166 0.00001\n", + "167 0.000006\n", + "168 0.000006\n", + "169 0.000006\n", + "170 0.000015\n", + "171 0.0000012\n", + "172 0\n", + "173 0.00000015\n", + "174 0.0000045\n", + "175 0.0000034\n", + "176 0.00000045\n", + "177 0.000004\n", + "178 0.000004\n", + "179 0.000004\n", + "180 0.000004\n", + "181 0.00000225\n", + "182 0.00000225\n", + "183 0.000015\n", + "184 0.000015\n", + "185 0.000015\n", + "186 0.0000001\n", + "187 0.00000004\n", + "188 0\n", + "189 0.0000001\n", + "190 0.0000003\n", + "191 0.00001\n", + "192 0.00001\n", + "193 0.00000015\n", + "194 0.0000005\n", + "195 0.000003\n", + "196 0.00000015\n", + "197 0\n", + "198 0.00000002\n", + "199 0\n", + "200 0.00000001\n", + "201 0.0000012\n", + "202 0\n", + "203 0.000000049\n", + "204 0\n", + "205 0.00000039\n", + "206 0.0000006\n", + "207 0.00000075\n", + "208 0.00006\n", + "209 0.00006\n", + "210 0.0000044\n", + "211 0.0000044\n", + "212 0.0000001\n", + "213 0.00001\n", + "214 0.0000006\n", + "215 0\n", + "216 0.0000002\n", + "217 0.0000008\n", + "218 0\n", + "219 0.00000009\n", + "220 0.0000003\n", + "221 0.0000008\n", + "222 0.000015\n", + "223 0.00000005\n", + "224 0.0000012\n", + "225 0.00001\n", + "226 0\n", + "227 0.000002\n", + "228 0.0000012\n", + "229 0.0000002\n", + "230 0.000001\n", + "231 0\n", + "232 0.00000003\n", + "233 0.0000008\n", + "234 0.00000028\n", + "235 0.00000025\n", + "236 0\n", + "237 0.00000007\n", + "238 0.0000006\n", + "239 0.0000006\n", + "240 0.0000003\n", + "241 0.000006\n", + "242 0\n", + "243 0.00000006\n", + "244 0.000003\n", + "245 0.0000007\n", + "246 0.000015\n", + "247 0.000015\n", + "248 0.00000148\n", + "249 0.0000009\n", + "250 0.0000009\n", + "251 0\n", + "252 0.000000054\n", + "253 0.00000004\n", + "254 0.000000054\n", + "255 0.0000001\n", + "256 0.0000003\n", + "257 0.000006\n", + "258 0.00000012\n", + "259 0.0000003\n", + "260 0.00001\n", + "261 0.000018\n", + "262 0.0000002\n", + "263 0.000015\n", + "264 0.00000024\n", + "265 0.00000075\n", + "266 0.00000075\n", + "267 0.0000012\n", + "268 0.00000006\n", + "269 0.0000004\n", + "270 0.0000012\n", + "271 0.0000005\n", + "272 0.000005\n", + "273 0.00003\n", + "274 0.000015\n", + "275 0.000015\n", + "276 0.0000008\n", + "277 0.000002\n", + "278 0.0000015\n", + "279 0.00000125\n", + "280 0.00000125\n", + "281 0.000075\n", + "282 0.000075\n", + "283 0.000015\n", + "284 0.000015\n", + "285 0.0000015\n", + "286 0.000006\n", + "287 0.000002\n", + "288 0.00003\n", + "289 0.0000006\n", + "290 0.0000081\n", + "291 0.0000006\n", + "292 0.00000025\n", + "293 0.0000002\n", + "294 0.00000024\n", + "295 0.0000015\n", + "296 0.000024\n", + "297 0.000024\n", + "298 0.000024\n", + "299 0.000024\n", + "300 0.0000012\n", + "301 0.000009375\n", + "302 -1\n", + "303 0.000002\n", + "304 0.00003\n", + "305 0.0000005\n", + "306 0.000002\n", + "307 0.00000019\n", + "308 0.000001125\n", + "309 0.000004\n", + "310 0.00012\n", + "311 0.00012\n", + "312 0.000001125\n", + "313 0.000024\n", + "314 0.000024\n", + "315 0.000001125\n", + "316 0.000000065\n", + "317 0.0000009\n", + "318 0.0000015\n", + "319 0.0000015\n", + "320 0.00006\n", + "321 0.00006\n", + "Name: pricing_completion, dtype: object" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"pricing_completion\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcontext_lengthpricing_promptpricing_completion
302openrouter/auto2000000-1-1
133deepseek/deepseek-r1-distill-qwen-32b:free1600000
59nvidia/llama-3.1-nemotron-ultra-253b-v1:free13107200
113cognitivecomputations/dolphin3.0-r1-mistral-24...3276800
57nvidia/llama-3.3-nemotron-super-49b-v1:free13107200
114cognitivecomputations/dolphin3.0-mistral-24b:free3276800
54moonshotai/kimi-vl-a3b-thinking:free13107200
53agentica-org/deepcoder-14b-preview:free9600000
52arliai/qwq-32b-arliai-rpr-v1:free3276800
231meta-llama/llama-3.1-8b-instruct:free13107200
226meta-llama/llama-3.1-405b:free6400000
125qwen/qwen2.5-vl-72b-instruct:free13107200
45shisa-ai/shisa-v2-llama3.3-70b:free3276800
87google/gemma-3-12b-it:free13107200
92rekaai/reka-flash-3:free3276800
131mistralai/mistral-small-24b-instruct-2501:free3276800
81open-r1/olympiccoder-32b:free3276800
60meta-llama/llama-4-maverick:free25600000
236mistralai/mistral-nemo:free12800000
62meta-llama/llama-4-scout:free51200000
83google/gemma-3-4b-it:free13107200
93google/gemma-3-27b-it:free9600000
79mistralai/mistral-small-3.1-24b-instruct:free9600000
251mistralai/mistral-7b-instruct:free3276800
77featherless/qwerky-72b:free3276800
75deepseek/deepseek-chat-v3-0324:free16384000
242google/gemma-2-9b-it:free819200
73qwen/qwen2.5-vl-32b-instruct:free819200
135deepseek/deepseek-r1-distill-qwen-14b:free6400000
72google/gemini-2.5-pro-exp-03-25100000000
70bytedance-research/ui-tars-72b:free3276800
69allenai/molmo-7b-d:free409600
101deepseek/deepseek-r1-zero:free16384000
102qwen/qwq-32b:free4000000
66deepseek/deepseek-v3-base:free16384000
104moonshotai/moonlight-16b-a3b-instruct:free819200
105nousresearch/deephermes-3-llama-3-8b-preview:free13107200
71qwen/qwen2.5-vl-3b-instruct:free6400000
218google/gemini-flash-1.5-8b-exp100000000
82google/gemma-3-1b-it:free3276800
156google/gemini-2.0-flash-exp:free104857600
204qwen/qwen-2.5-72b-instruct:free3276800
21qwen/qwen3-30b-a3b:free4096000
38thudm/glm-4-32b:free3276800
157meta-llama/llama-3.3-70b-instruct:free800000
18deepseek/deepseek-prover-v2:free16384000
17opengvlab/internvl3-2b:free3200000
23qwen/qwen3-8b:free4096000
16opengvlab/internvl3-14b:free3200000
14qwen/qwen3-1.7b:free3200000
202meta-llama/llama-3.2-11b-vision-instruct:free13107200
12qwen/qwen3-0.6b-04-28:free3200000
11microsoft/phi-4-reasoning:free3276800
162qwen/qwq-32b-preview:free1638400
9microsoft/phi-4-reasoning-plus:free3276800
15qwen/qwen3-4b:free12800000
164google/learnlm-1.5-pro-experimental:free4096000
148deepseek/deepseek-chat:free16384000
199meta-llama/llama-3.2-1b-instruct:free13100000
36thudm/glm-z1-32b:free3276800
35microsoft/mai-ds-r1:free16384000
34thudm/glm-4-9b:free3200000
33thudm/glm-z1-9b:free3200000
188qwen/qwen-2.5-7b-instruct:free3276800
172qwen/qwen-2.5-coder-32b-instruct:free3276800
25qwen/qwen3-14b:free4096000
197meta-llama/llama-3.2-3b-instruct:free2000000
141deepseek/deepseek-r1-distill-llama-70b:free819200
29qwen/qwen3-235b-a22b:free4096000
143deepseek/deepseek-r1:free16384000
27qwen/qwen3-32b:free4096000
215qwen/qwen-2.5-vl-7b-instruct:free6400000
31tngtech/deepseek-r1t-chimera:free16384000
200meta-llama/llama-3.2-1b-instruct1310720.0000000050.00000001
198meta-llama/llama-3.2-3b-instruct1310720.000000010.00000002
139liquid/lfm-7b327680.000000010.00000001
46qwen/qwen2.5-coder-7b-instruct327680.000000010.00000003
243google/gemma-2-9b-it81920.000000020.00000006
232meta-llama/llama-3.1-8b-instruct163840.000000020.00000003
84google/gemma-3-4b-it1310720.000000020.00000004
140liquid/lfm-3b327680.000000020.00000002
115meta-llama/llama-guard-3-8b1310720.000000020.00000006
223sao10k/l3-lunaris-8b81920.000000020.00000005
253nousresearch/hermes-2-pro-llama-3-8b1310720.0000000250.00000004
254mistralai/mistral-7b-instruct-v0.3327680.0000000280.000000054
252mistralai/mistral-7b-instruct327680.0000000280.000000054
268meta-llama/llama-3-8b-instruct81920.000000030.00000006
219microsoft/phi-3.5-mini-128k-instruct1310720.000000030.00000009
237mistralai/mistral-nemo983040.000000030.00000007
160amazon/nova-micro-v11280000.0000000350.00000014
24qwen/qwen3-8b1280000.0000000350.000000138
193google/gemini-flash-1.5-8b10000000.00000003750.00000015
155cohere/command-r7b-12-20241280000.00000003750.00000015
187mistralai/ministral-3b1310720.000000040.00000004
117deepseek/deepseek-r1-distill-llama-8b320000.000000040.00000004
258deepseek/deepseek-coder1280000.000000040.00000012
203meta-llama/llama-3.2-11b-vision-instruct1310720.0000000490.000000049
80mistralai/mistral-small-3.1-24b-instruct1310720.000000050.00000015
20meta-llama/llama-guard-4-12b1638400.000000050.00000005
189qwen/qwen-2.5-7b-instruct327680.000000050.0000001
97microsoft/phi-4-multimodal-instruct1310720.000000050.0000001
88google/gemma-3-12b-it1310720.000000050.0000001
124qwen/qwen-turbo10000000.000000050.0000002
132mistralai/mistral-small-24b-instruct-2501280000.000000060.00000012
173qwen/qwen-2.5-coder-32b-instruct327680.000000060.00000015
159amazon/nova-lite-v13000000.000000060.00000024
316gryphe/mythomax-l2-13b40960.0000000650.000000065
10microsoft/phi-4-reasoning-plus327680.000000070.00000035
147microsoft/phi-4163840.000000070.00000014
26qwen/qwen3-14b409600.000000070.00000024
259google/gemini-flash-1.510000000.0000000750.0000003
107google/gemini-2.0-flash-lite-00110485760.0000000750.0000003
63meta-llama/llama-4-scout10485760.000000080.0000003
294mistralai/mixtral-8x7b-instruct327680.000000080.00000024
264allenai/olmo-7b-instruct20480.000000080.00000024
163qwen/qwq-32b-preview327680.000000090.00000027
158meta-llama/llama-3.3-70b-instruct1310000.000000090.00000035
266neversleep/llama-3-lumimaid-8b245760.000000093750.00000075
207neversleep/llama-3.1-lumimaid-8b327680.000000093750.00000075
265neversleep/llama-3-lumimaid-8b:extended245760.000000093750.00000075
212mistralai/pixtral-12b327680.00000010.0000001
28qwen/qwen3-32b409600.00000010.0000003
49openai/gpt-4.1-nano10475760.00000010.0000004
186mistralai/ministral-8b1280000.00000010.0000001
118google/gemini-2.0-flash-00110000000.00000010.0000004
234meta-llama/llama-3.1-70b-instruct1310720.00000010.00000028
65mistral/ministral-8b1310720.00000010.0000001
240google/gemma-2-27b-it81920.00000010.0000003
256microsoft/phi-3-medium-128k-instruct1310720.00000010.0000003
255microsoft/phi-3-mini-128k-instruct1280000.00000010.0000001
94google/gemma-3-27b-it1310720.00000010.0000002
22qwen/qwen3-30b-a3b409600.00000010.0000003
142deepseek/deepseek-r1-distill-llama-70b1310720.00000010.0000004
307mistralai/mistral-7b-instruct-v0.128240.000000110.00000019
205qwen/qwen-2.5-72b-instruct327680.000000120.00000039
190nvidia/llama-3.1-nemotron-70b-instruct1310720.000000120.0000003
134deepseek/deepseek-r1-distill-qwen-32b1310720.000000120.00000018
220nousresearch/hermes-3-llama-3.1-70b1310720.000000120.0000003
58nvidia/llama-3.3-nemotron-super-49b-v11310720.000000130.0000004
30qwen/qwen3-235b-a22b409600.000000140.000002
90openai/gpt-4o-mini-search-preview1280000.000000150.0000006
136deepseek/deepseek-r1-distill-qwen-14b640000.000000150.00000015
214cohere/command-r-08-20241280000.000000150.0000006
238openai/gpt-4o-mini1280000.000000150.0000006
103qwen/qwq-32b1310720.000000150.0000002
196liquid/lfm-40b327680.000000150.00000015
41google/gemini-2.5-flash-preview:thinking10485760.000000150.0000035
40google/gemini-2.5-flash-preview10485760.000000150.0000006
239openai/gpt-4o-mini-2024-07-181280000.000000150.0000006
61meta-llama/llama-4-maverick10485760.000000170.0000006
67scb10x/llama3.1-typhoon2-8b-instruct81920.000000180.00000018
130deepseek/deepseek-r1-distill-qwen-1.5b1310720.000000180.00000018
3arcee-ai/spotlight1310720.000000180.00000018
216qwen/qwen-2.5-vl-7b-instruct327680.00000020.0000002
262meta-llama/llama-guard-2-8b81920.00000020.0000002
293mistralai/mistral-7b-instruct-v0.2327680.00000020.0000002
86ai21/jamba-1.6-mini2560000.00000020.0000004
122aion-labs/aion-rp-llama-3.1-8b327680.00000020.0000002
229perplexity/llama-3.1-sonar-small-128k-online1270720.00000020.0000002
145minimax/minimax-0110001920.00000020.0000011
291mistralai/mistral-small327680.00000020.0000006
112mistralai/mistral-saba327680.00000020.0000006
119qwen/qwen-vl-plus75000.000000210.00000063
39thudm/glm-4-32b320000.000000240.00000024
32thudm/glm-z1-rumination-32b320000.000000240.00000024
37thudm/glm-z1-32b320000.000000240.00000024
13inception/mercury-coder-small-beta320000.000000250.000001
292mistralai/mistral-tiny327680.000000250.00000025
126qwen/qwen2.5-vl-72b-instruct320000.000000250.00000075
194thedrummer/rocinante-12b327680.000000250.0000005
279anthropic/claude-3-haiku:beta2000000.000000250.00000125
235mistralai/codestral-mamba2621440.000000250.00000025
280anthropic/claude-3-haiku2000000.000000250.00000125
55x-ai/grok-3-mini-beta1310720.00000030.0000005
269meta-llama/llama-3-70b-instruct81920.00000030.0000004
146mistralai/codestral-25012621440.00000030.0000009
76deepseek/deepseek-chat-v3-03241638400.00000030.00000088
149deepseek/deepseek-chat1638400.000000380.00000089
270mistralai/mixtral-8x22b-instruct655360.00000040.0000012
0mistralai/mistral-medium-31310720.00000040.000002
48openai/gpt-4.1-mini10475760.00000040.0000016
127qwen/qwen-plus1310720.00000040.0000012
8arcee-ai/arcee-blitz327680.000000450.00000075
176thedrummer/unslopnemo-12b320000.000000450.00000045
278cohere/command-r1280000.00000050.0000015
19deepseek/deepseek-prover-v21310720.00000050.00000218
285cohere/command-r-03-20241280000.00000050.0000015
7arcee-ai/virtuoso-medium-v21310720.00000050.0000008
6arcee-ai/coder-large327680.00000050.0000008
271microsoft/wizardlm-2-8x22b655360.00000050.0000005
96thedrummer/skyfall-36b-v2327680.00000050.0000008
144deepseek/deepseek-r11638400.00000050.00000218
305jondurbin/airoboros-l2-70b40960.00000050.0000005
318openai/gpt-3.5-turbo163850.00000050.0000015
319openai/gpt-3.5-turbo-0125163850.00000050.0000015
245ai21/jamba-instruct2560000.00000050.0000007
2arcee-ai/caller-large327680.000000550.00000085
308pygmalionai/mythalion-13b81920.00000056250.000001125
315undi95/remm-slerp-l2-13b61440.00000056250.000001125
206qwen/qwen-2.5-vl-72b-instruct327680.00000060.0000006
289nousresearch/nous-hermes-2-mixtral-8x7b-dpo327680.00000060.0000006
150sao10k/l3.3-euryale-70b1310720.00000070.0000008
121aion-labs/aion-1.0-mini1310720.00000070.0000014
217sao10k/l3.1-euryale-70b1310720.00000070.0000008
5arcee-ai/virtuoso-large1310720.000000750.0000012
295neversleep/noromaid-20b81920.000000750.0000015
221nousresearch/hermes-3-llama-3.1-405b1310720.00000080.0000008
233meta-llama/llama-3.1-405b-instruct327680.00000080.0000008
224aetherwiing/mn-starcannon-12b163840.00000080.0000012
179anthropic/claude-3.5-haiku-20241022:beta2000000.00000080.000004
95thedrummer/anubis-pro-105b-v11310720.00000080.000001
180anthropic/claude-3.5-haiku-202410222000000.00000080.000004
51alfredpros/codellama-7b-instruct-solidity40960.00000080.0000012
50eleutherai/llemma_7b40960.00000080.0000012
267sao10k/fimbulvetr-11b-v240960.00000080.0000012
276sophosympatheia/midnight-rose-70b40960.00000080.0000008
123qwen/qwen-vl-max75000.00000080.0000032
161amazon/nova-pro-v13000000.00000080.0000032
171infermatic/mn-inferor-12b163840.00000080.0000012
300undi95/toppy-m-7b40960.00000080.0000012
177anthropic/claude-3.5-haiku:beta2000000.00000080.000004
178anthropic/claude-3.5-haiku2000000.00000080.000004
228nothingiisreal/mn-celeste-12b163840.00000080.0000012
68scb10x/llama3.1-typhoon2-70b-instruct81920.000000880.00000088
74qwen/qwen2.5-vl-32b-instruct1280000.00000090.0000009
249cognitivecomputations/dolphin-mixtral-8x22b160000.00000090.0000009
4arcee-ai/maestro-reasoning1310720.00000090.0000033
317meta-llama/llama-2-70b-chat40960.00000090.0000009
250qwen/qwen-2-72b-instruct327680.00000090.0000009
230perplexity/llama-3.1-sonar-large-128k-online1270720.0000010.000001
287openai/gpt-3.5-turbo-061340950.0000010.000002
277cohere/command40960.0000010.000002
138perplexity/sonar1270720.0000010.000001
137perplexity/sonar-reasoning1270000.0000010.000005
303openai/gpt-3.5-turbo-1106163850.0000010.000002
42openai/o4-mini-high2000000.00000110.0000044
210openai/o1-mini1280000.00000110.0000044
211openai/o1-mini-2024-09-121280000.00000110.0000044
44openai/o4-mini2000000.00000110.0000044
129openai/o3-mini2000000.00000110.0000044
116openai/o3-mini-high2000000.00000110.0000044
312mancer/weaver80000.0000011250.000001125
201meta-llama/llama-3.2-90b-vision-instruct1310720.00000120.0000012
272google/gemini-pro-1.520000000.000001250.000005
1google/gemini-2.5-pro-preview10485760.000001250.00001
248sao10k/l3-euryale-70b81920.000001480.00000148
181neversleep/llama-3.1-lumimaid-70b163840.00000150.00000225
306openai/gpt-3.5-turbo-instruct40950.00000150.000002
182anthracite-org/magnum-v4-72b163840.00000150.00000225
128qwen/qwen-max327680.00000160.0000064
169mistralai/pixtral-large-24111310720.0000020.000006
286mistralai/mistral-large1280000.0000020.000006
85ai21/jamba-1.6-large2560000.0000020.000008
154x-ai/grok-2-12121310720.0000020.00001
47openai/gpt-4.110475760.0000020.000008
100perplexity/sonar-deep-research1280000.0000020.000008
227meta-llama/llama-3.1-405b327680.0000020.000002
153x-ai/grok-2-vision-1212327680.0000020.00001
168mistralai/mistral-large-24071310720.0000020.000006
98perplexity/sonar-reasoning-pro1280000.0000020.000008
111perplexity/r1-17761280000.0000020.000008
167mistralai/mistral-large-24111310720.0000020.000006
166openai/gpt-4o-2024-11-201280000.00000250.00001
225openai/gpt-4o-2024-08-061280000.00000250.00001
260openai/gpt-4o1280000.00000250.00001
192inflection/inflection-3-pi80000.00000250.00001
91openai/gpt-4o-search-preview1280000.00000250.00001
213cohere/command-r-plus-08-20241280000.00000250.00001
191inflection/inflection-3-productivity80000.00000250.00001
89cohere/command-a2560000.00000250.00001
64all-hands/openhands-lm-32b-v0.1163840.00000260.0000034
175eva-unit-01/eva-qwen-2.5-32b163840.00000260.0000034
290mistralai/mistral-medium327680.000002750.0000081
195anthracite-org/magnum-v2-72b327680.0000030.000003
284anthropic/claude-3-sonnet2000000.0000030.000015
283anthropic/claude-3-sonnet:beta2000000.0000030.000015
309openai/gpt-3.5-turbo-16k163850.0000030.000004
184anthropic/claude-3.5-sonnet2000000.0000030.000015
183anthropic/claude-3.5-sonnet:beta2000000.0000030.000015
275cohere/command-r-plus-04-20241280000.0000030.000015
274cohere/command-r-plus1280000.0000030.000015
109anthropic/claude-3.7-sonnet:thinking2000000.0000030.000015
110anthropic/claude-3.7-sonnet:beta2000000.0000030.000015
99perplexity/sonar-pro2000000.0000030.000015
24401-ai/yi-large327680.0000030.000003
246anthropic/claude-3.5-sonnet-20240620:beta2000000.0000030.000015
247anthropic/claude-3.5-sonnet-202406202000000.0000030.000015
56x-ai/grok-3-beta1310720.0000030.000015
108anthropic/claude-3.7-sonnet2000000.0000030.000015
152eva-unit-01/eva-llama-3.33-70b163840.0000040.000006
257neversleep/llama-3-lumimaid-70b81920.0000040.000006
241alpindale/magnum-72b163840.0000040.000006
165eva-unit-01/eva-qwen-2.5-72b163840.0000040.000006
120aion-labs/aion-1.01310720.0000040.000008
174raifle/sorcererlm-8x22b160000.00000450.0000045
263openai/gpt-4o-2024-05-131280000.0000050.000015
222openai/chatgpt-4o-latest1280000.0000050.000015
170x-ai/grok-vision-beta81920.0000050.000015
185x-ai/grok-beta1310720.0000050.000015
261openai/gpt-4o:extended1280000.0000060.000018
301alpindale/goliath-120b61440.00000656250.000009375
313anthropic/claude-2.0:beta1000000.0000080.000024
297anthropic/claude-2.12000000.0000080.000024
299anthropic/claude-22000000.0000080.000024
298anthropic/claude-2:beta2000000.0000080.000024
314anthropic/claude-2.01000000.0000080.000024
296anthropic/claude-2.1:beta2000000.0000080.000024
304openai/gpt-4-1106-preview1280000.000010.00003
43openai/o32000000.000010.00004
273openai/gpt-4-turbo1280000.000010.00003
288openai/gpt-4-turbo-preview1280000.000010.00003
151openai/o12000000.0000150.00006
282anthropic/claude-3-opus2000000.0000150.000075
281anthropic/claude-3-opus:beta2000000.0000150.000075
208openai/o1-preview1280000.0000150.00006
209openai/o1-preview-2024-09-121280000.0000150.00006
321openai/gpt-4-031481910.000030.00006
320openai/gpt-481910.000030.00006
311openai/gpt-4-32k-0314327670.000060.00012
310openai/gpt-4-32k327670.000060.00012
106openai/gpt-4.5-preview1280000.0000750.00015
78openai/o1-pro2000000.000150.0006
\n", + "
" + ], + "text/plain": [ + " id context_length pricing_prompt pricing_completion\n", + "302 openrouter/auto 2000000 -1 -1\n", + "133 deepseek/deepseek-r1-distill-qwen-32b:free 16000 0 0\n", + "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free 131072 0 0\n", + "113 cognitivecomputations/dolphin3.0-r1-mistral-24... 32768 0 0\n", + "57 nvidia/llama-3.3-nemotron-super-49b-v1:free 131072 0 0\n", + "114 cognitivecomputations/dolphin3.0-mistral-24b:free 32768 0 0\n", + "54 moonshotai/kimi-vl-a3b-thinking:free 131072 0 0\n", + "53 agentica-org/deepcoder-14b-preview:free 96000 0 0\n", + "52 arliai/qwq-32b-arliai-rpr-v1:free 32768 0 0\n", + "231 meta-llama/llama-3.1-8b-instruct:free 131072 0 0\n", + "226 meta-llama/llama-3.1-405b:free 64000 0 0\n", + "125 qwen/qwen2.5-vl-72b-instruct:free 131072 0 0\n", + "45 shisa-ai/shisa-v2-llama3.3-70b:free 32768 0 0\n", + "87 google/gemma-3-12b-it:free 131072 0 0\n", + "92 rekaai/reka-flash-3:free 32768 0 0\n", + "131 mistralai/mistral-small-24b-instruct-2501:free 32768 0 0\n", + "81 open-r1/olympiccoder-32b:free 32768 0 0\n", + "60 meta-llama/llama-4-maverick:free 256000 0 0\n", + "236 mistralai/mistral-nemo:free 128000 0 0\n", + "62 meta-llama/llama-4-scout:free 512000 0 0\n", + "83 google/gemma-3-4b-it:free 131072 0 0\n", + "93 google/gemma-3-27b-it:free 96000 0 0\n", + "79 mistralai/mistral-small-3.1-24b-instruct:free 96000 0 0\n", + "251 mistralai/mistral-7b-instruct:free 32768 0 0\n", + "77 featherless/qwerky-72b:free 32768 0 0\n", + "75 deepseek/deepseek-chat-v3-0324:free 163840 0 0\n", + "242 google/gemma-2-9b-it:free 8192 0 0\n", + "73 qwen/qwen2.5-vl-32b-instruct:free 8192 0 0\n", + "135 deepseek/deepseek-r1-distill-qwen-14b:free 64000 0 0\n", + "72 google/gemini-2.5-pro-exp-03-25 1000000 0 0\n", + "70 bytedance-research/ui-tars-72b:free 32768 0 0\n", + "69 allenai/molmo-7b-d:free 4096 0 0\n", + "101 deepseek/deepseek-r1-zero:free 163840 0 0\n", + "102 qwen/qwq-32b:free 40000 0 0\n", + "66 deepseek/deepseek-v3-base:free 163840 0 0\n", + "104 moonshotai/moonlight-16b-a3b-instruct:free 8192 0 0\n", + "105 nousresearch/deephermes-3-llama-3-8b-preview:free 131072 0 0\n", + "71 qwen/qwen2.5-vl-3b-instruct:free 64000 0 0\n", + "218 google/gemini-flash-1.5-8b-exp 1000000 0 0\n", + "82 google/gemma-3-1b-it:free 32768 0 0\n", + "156 google/gemini-2.0-flash-exp:free 1048576 0 0\n", + "204 qwen/qwen-2.5-72b-instruct:free 32768 0 0\n", + "21 qwen/qwen3-30b-a3b:free 40960 0 0\n", + "38 thudm/glm-4-32b:free 32768 0 0\n", + "157 meta-llama/llama-3.3-70b-instruct:free 8000 0 0\n", + "18 deepseek/deepseek-prover-v2:free 163840 0 0\n", + "17 opengvlab/internvl3-2b:free 32000 0 0\n", + "23 qwen/qwen3-8b:free 40960 0 0\n", + "16 opengvlab/internvl3-14b:free 32000 0 0\n", + "14 qwen/qwen3-1.7b:free 32000 0 0\n", + "202 meta-llama/llama-3.2-11b-vision-instruct:free 131072 0 0\n", + "12 qwen/qwen3-0.6b-04-28:free 32000 0 0\n", + "11 microsoft/phi-4-reasoning:free 32768 0 0\n", + "162 qwen/qwq-32b-preview:free 16384 0 0\n", + "9 microsoft/phi-4-reasoning-plus:free 32768 0 0\n", + "15 qwen/qwen3-4b:free 128000 0 0\n", + "164 google/learnlm-1.5-pro-experimental:free 40960 0 0\n", + "148 deepseek/deepseek-chat:free 163840 0 0\n", + "199 meta-llama/llama-3.2-1b-instruct:free 131000 0 0\n", + "36 thudm/glm-z1-32b:free 32768 0 0\n", + "35 microsoft/mai-ds-r1:free 163840 0 0\n", + "34 thudm/glm-4-9b:free 32000 0 0\n", + "33 thudm/glm-z1-9b:free 32000 0 0\n", + "188 qwen/qwen-2.5-7b-instruct:free 32768 0 0\n", + "172 qwen/qwen-2.5-coder-32b-instruct:free 32768 0 0\n", + "25 qwen/qwen3-14b:free 40960 0 0\n", + "197 meta-llama/llama-3.2-3b-instruct:free 20000 0 0\n", + "141 deepseek/deepseek-r1-distill-llama-70b:free 8192 0 0\n", + "29 qwen/qwen3-235b-a22b:free 40960 0 0\n", + "143 deepseek/deepseek-r1:free 163840 0 0\n", + "27 qwen/qwen3-32b:free 40960 0 0\n", + "215 qwen/qwen-2.5-vl-7b-instruct:free 64000 0 0\n", + "31 tngtech/deepseek-r1t-chimera:free 163840 0 0\n", + "200 meta-llama/llama-3.2-1b-instruct 131072 0.000000005 0.00000001\n", + "198 meta-llama/llama-3.2-3b-instruct 131072 0.00000001 0.00000002\n", + "139 liquid/lfm-7b 32768 0.00000001 0.00000001\n", + "46 qwen/qwen2.5-coder-7b-instruct 32768 0.00000001 0.00000003\n", + "243 google/gemma-2-9b-it 8192 0.00000002 0.00000006\n", + "232 meta-llama/llama-3.1-8b-instruct 16384 0.00000002 0.00000003\n", + "84 google/gemma-3-4b-it 131072 0.00000002 0.00000004\n", + "140 liquid/lfm-3b 32768 0.00000002 0.00000002\n", + "115 meta-llama/llama-guard-3-8b 131072 0.00000002 0.00000006\n", + "223 sao10k/l3-lunaris-8b 8192 0.00000002 0.00000005\n", + "253 nousresearch/hermes-2-pro-llama-3-8b 131072 0.000000025 0.00000004\n", + "254 mistralai/mistral-7b-instruct-v0.3 32768 0.000000028 0.000000054\n", + "252 mistralai/mistral-7b-instruct 32768 0.000000028 0.000000054\n", + "268 meta-llama/llama-3-8b-instruct 8192 0.00000003 0.00000006\n", + "219 microsoft/phi-3.5-mini-128k-instruct 131072 0.00000003 0.00000009\n", + "237 mistralai/mistral-nemo 98304 0.00000003 0.00000007\n", + "160 amazon/nova-micro-v1 128000 0.000000035 0.00000014\n", + "24 qwen/qwen3-8b 128000 0.000000035 0.000000138\n", + "193 google/gemini-flash-1.5-8b 1000000 0.0000000375 0.00000015\n", + "155 cohere/command-r7b-12-2024 128000 0.0000000375 0.00000015\n", + "187 mistralai/ministral-3b 131072 0.00000004 0.00000004\n", + "117 deepseek/deepseek-r1-distill-llama-8b 32000 0.00000004 0.00000004\n", + "258 deepseek/deepseek-coder 128000 0.00000004 0.00000012\n", + "203 meta-llama/llama-3.2-11b-vision-instruct 131072 0.000000049 0.000000049\n", + "80 mistralai/mistral-small-3.1-24b-instruct 131072 0.00000005 0.00000015\n", + "20 meta-llama/llama-guard-4-12b 163840 0.00000005 0.00000005\n", + "189 qwen/qwen-2.5-7b-instruct 32768 0.00000005 0.0000001\n", + "97 microsoft/phi-4-multimodal-instruct 131072 0.00000005 0.0000001\n", + "88 google/gemma-3-12b-it 131072 0.00000005 0.0000001\n", + "124 qwen/qwen-turbo 1000000 0.00000005 0.0000002\n", + "132 mistralai/mistral-small-24b-instruct-2501 28000 0.00000006 0.00000012\n", + "173 qwen/qwen-2.5-coder-32b-instruct 32768 0.00000006 0.00000015\n", + "159 amazon/nova-lite-v1 300000 0.00000006 0.00000024\n", + "316 gryphe/mythomax-l2-13b 4096 0.000000065 0.000000065\n", + "10 microsoft/phi-4-reasoning-plus 32768 0.00000007 0.00000035\n", + "147 microsoft/phi-4 16384 0.00000007 0.00000014\n", + "26 qwen/qwen3-14b 40960 0.00000007 0.00000024\n", + "259 google/gemini-flash-1.5 1000000 0.000000075 0.0000003\n", + "107 google/gemini-2.0-flash-lite-001 1048576 0.000000075 0.0000003\n", + "63 meta-llama/llama-4-scout 1048576 0.00000008 0.0000003\n", + "294 mistralai/mixtral-8x7b-instruct 32768 0.00000008 0.00000024\n", + "264 allenai/olmo-7b-instruct 2048 0.00000008 0.00000024\n", + "163 qwen/qwq-32b-preview 32768 0.00000009 0.00000027\n", + "158 meta-llama/llama-3.3-70b-instruct 131000 0.00000009 0.00000035\n", + "266 neversleep/llama-3-lumimaid-8b 24576 0.00000009375 0.00000075\n", + "207 neversleep/llama-3.1-lumimaid-8b 32768 0.00000009375 0.00000075\n", + "265 neversleep/llama-3-lumimaid-8b:extended 24576 0.00000009375 0.00000075\n", + "212 mistralai/pixtral-12b 32768 0.0000001 0.0000001\n", + "28 qwen/qwen3-32b 40960 0.0000001 0.0000003\n", + "49 openai/gpt-4.1-nano 1047576 0.0000001 0.0000004\n", + "186 mistralai/ministral-8b 128000 0.0000001 0.0000001\n", + "118 google/gemini-2.0-flash-001 1000000 0.0000001 0.0000004\n", + "234 meta-llama/llama-3.1-70b-instruct 131072 0.0000001 0.00000028\n", + "65 mistral/ministral-8b 131072 0.0000001 0.0000001\n", + "240 google/gemma-2-27b-it 8192 0.0000001 0.0000003\n", + "256 microsoft/phi-3-medium-128k-instruct 131072 0.0000001 0.0000003\n", + "255 microsoft/phi-3-mini-128k-instruct 128000 0.0000001 0.0000001\n", + "94 google/gemma-3-27b-it 131072 0.0000001 0.0000002\n", + "22 qwen/qwen3-30b-a3b 40960 0.0000001 0.0000003\n", + "142 deepseek/deepseek-r1-distill-llama-70b 131072 0.0000001 0.0000004\n", + "307 mistralai/mistral-7b-instruct-v0.1 2824 0.00000011 0.00000019\n", + "205 qwen/qwen-2.5-72b-instruct 32768 0.00000012 0.00000039\n", + "190 nvidia/llama-3.1-nemotron-70b-instruct 131072 0.00000012 0.0000003\n", + "134 deepseek/deepseek-r1-distill-qwen-32b 131072 0.00000012 0.00000018\n", + "220 nousresearch/hermes-3-llama-3.1-70b 131072 0.00000012 0.0000003\n", + "58 nvidia/llama-3.3-nemotron-super-49b-v1 131072 0.00000013 0.0000004\n", + "30 qwen/qwen3-235b-a22b 40960 0.00000014 0.000002\n", + "90 openai/gpt-4o-mini-search-preview 128000 0.00000015 0.0000006\n", + "136 deepseek/deepseek-r1-distill-qwen-14b 64000 0.00000015 0.00000015\n", + "214 cohere/command-r-08-2024 128000 0.00000015 0.0000006\n", + "238 openai/gpt-4o-mini 128000 0.00000015 0.0000006\n", + "103 qwen/qwq-32b 131072 0.00000015 0.0000002\n", + "196 liquid/lfm-40b 32768 0.00000015 0.00000015\n", + "41 google/gemini-2.5-flash-preview:thinking 1048576 0.00000015 0.0000035\n", + "40 google/gemini-2.5-flash-preview 1048576 0.00000015 0.0000006\n", + "239 openai/gpt-4o-mini-2024-07-18 128000 0.00000015 0.0000006\n", + "61 meta-llama/llama-4-maverick 1048576 0.00000017 0.0000006\n", + "67 scb10x/llama3.1-typhoon2-8b-instruct 8192 0.00000018 0.00000018\n", + "130 deepseek/deepseek-r1-distill-qwen-1.5b 131072 0.00000018 0.00000018\n", + "3 arcee-ai/spotlight 131072 0.00000018 0.00000018\n", + "216 qwen/qwen-2.5-vl-7b-instruct 32768 0.0000002 0.0000002\n", + "262 meta-llama/llama-guard-2-8b 8192 0.0000002 0.0000002\n", + "293 mistralai/mistral-7b-instruct-v0.2 32768 0.0000002 0.0000002\n", + "86 ai21/jamba-1.6-mini 256000 0.0000002 0.0000004\n", + "122 aion-labs/aion-rp-llama-3.1-8b 32768 0.0000002 0.0000002\n", + "229 perplexity/llama-3.1-sonar-small-128k-online 127072 0.0000002 0.0000002\n", + "145 minimax/minimax-01 1000192 0.0000002 0.0000011\n", + "291 mistralai/mistral-small 32768 0.0000002 0.0000006\n", + "112 mistralai/mistral-saba 32768 0.0000002 0.0000006\n", + "119 qwen/qwen-vl-plus 7500 0.00000021 0.00000063\n", + "39 thudm/glm-4-32b 32000 0.00000024 0.00000024\n", + "32 thudm/glm-z1-rumination-32b 32000 0.00000024 0.00000024\n", + "37 thudm/glm-z1-32b 32000 0.00000024 0.00000024\n", + "13 inception/mercury-coder-small-beta 32000 0.00000025 0.000001\n", + "292 mistralai/mistral-tiny 32768 0.00000025 0.00000025\n", + "126 qwen/qwen2.5-vl-72b-instruct 32000 0.00000025 0.00000075\n", + "194 thedrummer/rocinante-12b 32768 0.00000025 0.0000005\n", + "279 anthropic/claude-3-haiku:beta 200000 0.00000025 0.00000125\n", + "235 mistralai/codestral-mamba 262144 0.00000025 0.00000025\n", + "280 anthropic/claude-3-haiku 200000 0.00000025 0.00000125\n", + "55 x-ai/grok-3-mini-beta 131072 0.0000003 0.0000005\n", + "269 meta-llama/llama-3-70b-instruct 8192 0.0000003 0.0000004\n", + "146 mistralai/codestral-2501 262144 0.0000003 0.0000009\n", + "76 deepseek/deepseek-chat-v3-0324 163840 0.0000003 0.00000088\n", + "149 deepseek/deepseek-chat 163840 0.00000038 0.00000089\n", + "270 mistralai/mixtral-8x22b-instruct 65536 0.0000004 0.0000012\n", + "0 mistralai/mistral-medium-3 131072 0.0000004 0.000002\n", + "48 openai/gpt-4.1-mini 1047576 0.0000004 0.0000016\n", + "127 qwen/qwen-plus 131072 0.0000004 0.0000012\n", + "8 arcee-ai/arcee-blitz 32768 0.00000045 0.00000075\n", + "176 thedrummer/unslopnemo-12b 32000 0.00000045 0.00000045\n", + "278 cohere/command-r 128000 0.0000005 0.0000015\n", + "19 deepseek/deepseek-prover-v2 131072 0.0000005 0.00000218\n", + "285 cohere/command-r-03-2024 128000 0.0000005 0.0000015\n", + "7 arcee-ai/virtuoso-medium-v2 131072 0.0000005 0.0000008\n", + "6 arcee-ai/coder-large 32768 0.0000005 0.0000008\n", + "271 microsoft/wizardlm-2-8x22b 65536 0.0000005 0.0000005\n", + "96 thedrummer/skyfall-36b-v2 32768 0.0000005 0.0000008\n", + "144 deepseek/deepseek-r1 163840 0.0000005 0.00000218\n", + "305 jondurbin/airoboros-l2-70b 4096 0.0000005 0.0000005\n", + "318 openai/gpt-3.5-turbo 16385 0.0000005 0.0000015\n", + "319 openai/gpt-3.5-turbo-0125 16385 0.0000005 0.0000015\n", + "245 ai21/jamba-instruct 256000 0.0000005 0.0000007\n", + "2 arcee-ai/caller-large 32768 0.00000055 0.00000085\n", + "308 pygmalionai/mythalion-13b 8192 0.0000005625 0.000001125\n", + "315 undi95/remm-slerp-l2-13b 6144 0.0000005625 0.000001125\n", + "206 qwen/qwen-2.5-vl-72b-instruct 32768 0.0000006 0.0000006\n", + "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo 32768 0.0000006 0.0000006\n", + "150 sao10k/l3.3-euryale-70b 131072 0.0000007 0.0000008\n", + "121 aion-labs/aion-1.0-mini 131072 0.0000007 0.0000014\n", + "217 sao10k/l3.1-euryale-70b 131072 0.0000007 0.0000008\n", + "5 arcee-ai/virtuoso-large 131072 0.00000075 0.0000012\n", + "295 neversleep/noromaid-20b 8192 0.00000075 0.0000015\n", + "221 nousresearch/hermes-3-llama-3.1-405b 131072 0.0000008 0.0000008\n", + "233 meta-llama/llama-3.1-405b-instruct 32768 0.0000008 0.0000008\n", + "224 aetherwiing/mn-starcannon-12b 16384 0.0000008 0.0000012\n", + "179 anthropic/claude-3.5-haiku-20241022:beta 200000 0.0000008 0.000004\n", + "95 thedrummer/anubis-pro-105b-v1 131072 0.0000008 0.000001\n", + "180 anthropic/claude-3.5-haiku-20241022 200000 0.0000008 0.000004\n", + "51 alfredpros/codellama-7b-instruct-solidity 4096 0.0000008 0.0000012\n", + "50 eleutherai/llemma_7b 4096 0.0000008 0.0000012\n", + "267 sao10k/fimbulvetr-11b-v2 4096 0.0000008 0.0000012\n", + "276 sophosympatheia/midnight-rose-70b 4096 0.0000008 0.0000008\n", + "123 qwen/qwen-vl-max 7500 0.0000008 0.0000032\n", + "161 amazon/nova-pro-v1 300000 0.0000008 0.0000032\n", + "171 infermatic/mn-inferor-12b 16384 0.0000008 0.0000012\n", + "300 undi95/toppy-m-7b 4096 0.0000008 0.0000012\n", + "177 anthropic/claude-3.5-haiku:beta 200000 0.0000008 0.000004\n", + "178 anthropic/claude-3.5-haiku 200000 0.0000008 0.000004\n", + "228 nothingiisreal/mn-celeste-12b 16384 0.0000008 0.0000012\n", + "68 scb10x/llama3.1-typhoon2-70b-instruct 8192 0.00000088 0.00000088\n", + "74 qwen/qwen2.5-vl-32b-instruct 128000 0.0000009 0.0000009\n", + "249 cognitivecomputations/dolphin-mixtral-8x22b 16000 0.0000009 0.0000009\n", + "4 arcee-ai/maestro-reasoning 131072 0.0000009 0.0000033\n", + "317 meta-llama/llama-2-70b-chat 4096 0.0000009 0.0000009\n", + "250 qwen/qwen-2-72b-instruct 32768 0.0000009 0.0000009\n", + "230 perplexity/llama-3.1-sonar-large-128k-online 127072 0.000001 0.000001\n", + "287 openai/gpt-3.5-turbo-0613 4095 0.000001 0.000002\n", + "277 cohere/command 4096 0.000001 0.000002\n", + "138 perplexity/sonar 127072 0.000001 0.000001\n", + "137 perplexity/sonar-reasoning 127000 0.000001 0.000005\n", + "303 openai/gpt-3.5-turbo-1106 16385 0.000001 0.000002\n", + "42 openai/o4-mini-high 200000 0.0000011 0.0000044\n", + "210 openai/o1-mini 128000 0.0000011 0.0000044\n", + "211 openai/o1-mini-2024-09-12 128000 0.0000011 0.0000044\n", + "44 openai/o4-mini 200000 0.0000011 0.0000044\n", + "129 openai/o3-mini 200000 0.0000011 0.0000044\n", + "116 openai/o3-mini-high 200000 0.0000011 0.0000044\n", + "312 mancer/weaver 8000 0.000001125 0.000001125\n", + "201 meta-llama/llama-3.2-90b-vision-instruct 131072 0.0000012 0.0000012\n", + "272 google/gemini-pro-1.5 2000000 0.00000125 0.000005\n", + "1 google/gemini-2.5-pro-preview 1048576 0.00000125 0.00001\n", + "248 sao10k/l3-euryale-70b 8192 0.00000148 0.00000148\n", + "181 neversleep/llama-3.1-lumimaid-70b 16384 0.0000015 0.00000225\n", + "306 openai/gpt-3.5-turbo-instruct 4095 0.0000015 0.000002\n", + "182 anthracite-org/magnum-v4-72b 16384 0.0000015 0.00000225\n", + "128 qwen/qwen-max 32768 0.0000016 0.0000064\n", + "169 mistralai/pixtral-large-2411 131072 0.000002 0.000006\n", + "286 mistralai/mistral-large 128000 0.000002 0.000006\n", + "85 ai21/jamba-1.6-large 256000 0.000002 0.000008\n", + "154 x-ai/grok-2-1212 131072 0.000002 0.00001\n", + "47 openai/gpt-4.1 1047576 0.000002 0.000008\n", + "100 perplexity/sonar-deep-research 128000 0.000002 0.000008\n", + "227 meta-llama/llama-3.1-405b 32768 0.000002 0.000002\n", + "153 x-ai/grok-2-vision-1212 32768 0.000002 0.00001\n", + "168 mistralai/mistral-large-2407 131072 0.000002 0.000006\n", + "98 perplexity/sonar-reasoning-pro 128000 0.000002 0.000008\n", + "111 perplexity/r1-1776 128000 0.000002 0.000008\n", + "167 mistralai/mistral-large-2411 131072 0.000002 0.000006\n", + "166 openai/gpt-4o-2024-11-20 128000 0.0000025 0.00001\n", + "225 openai/gpt-4o-2024-08-06 128000 0.0000025 0.00001\n", + "260 openai/gpt-4o 128000 0.0000025 0.00001\n", + "192 inflection/inflection-3-pi 8000 0.0000025 0.00001\n", + "91 openai/gpt-4o-search-preview 128000 0.0000025 0.00001\n", + "213 cohere/command-r-plus-08-2024 128000 0.0000025 0.00001\n", + "191 inflection/inflection-3-productivity 8000 0.0000025 0.00001\n", + "89 cohere/command-a 256000 0.0000025 0.00001\n", + "64 all-hands/openhands-lm-32b-v0.1 16384 0.0000026 0.0000034\n", + "175 eva-unit-01/eva-qwen-2.5-32b 16384 0.0000026 0.0000034\n", + "290 mistralai/mistral-medium 32768 0.00000275 0.0000081\n", + "195 anthracite-org/magnum-v2-72b 32768 0.000003 0.000003\n", + "284 anthropic/claude-3-sonnet 200000 0.000003 0.000015\n", + "283 anthropic/claude-3-sonnet:beta 200000 0.000003 0.000015\n", + "309 openai/gpt-3.5-turbo-16k 16385 0.000003 0.000004\n", + "184 anthropic/claude-3.5-sonnet 200000 0.000003 0.000015\n", + "183 anthropic/claude-3.5-sonnet:beta 200000 0.000003 0.000015\n", + "275 cohere/command-r-plus-04-2024 128000 0.000003 0.000015\n", + "274 cohere/command-r-plus 128000 0.000003 0.000015\n", + "109 anthropic/claude-3.7-sonnet:thinking 200000 0.000003 0.000015\n", + "110 anthropic/claude-3.7-sonnet:beta 200000 0.000003 0.000015\n", + "99 perplexity/sonar-pro 200000 0.000003 0.000015\n", + "244 01-ai/yi-large 32768 0.000003 0.000003\n", + "246 anthropic/claude-3.5-sonnet-20240620:beta 200000 0.000003 0.000015\n", + "247 anthropic/claude-3.5-sonnet-20240620 200000 0.000003 0.000015\n", + "56 x-ai/grok-3-beta 131072 0.000003 0.000015\n", + "108 anthropic/claude-3.7-sonnet 200000 0.000003 0.000015\n", + "152 eva-unit-01/eva-llama-3.33-70b 16384 0.000004 0.000006\n", + "257 neversleep/llama-3-lumimaid-70b 8192 0.000004 0.000006\n", + "241 alpindale/magnum-72b 16384 0.000004 0.000006\n", + "165 eva-unit-01/eva-qwen-2.5-72b 16384 0.000004 0.000006\n", + "120 aion-labs/aion-1.0 131072 0.000004 0.000008\n", + "174 raifle/sorcererlm-8x22b 16000 0.0000045 0.0000045\n", + "263 openai/gpt-4o-2024-05-13 128000 0.000005 0.000015\n", + "222 openai/chatgpt-4o-latest 128000 0.000005 0.000015\n", + "170 x-ai/grok-vision-beta 8192 0.000005 0.000015\n", + "185 x-ai/grok-beta 131072 0.000005 0.000015\n", + "261 openai/gpt-4o:extended 128000 0.000006 0.000018\n", + "301 alpindale/goliath-120b 6144 0.0000065625 0.000009375\n", + "313 anthropic/claude-2.0:beta 100000 0.000008 0.000024\n", + "297 anthropic/claude-2.1 200000 0.000008 0.000024\n", + "299 anthropic/claude-2 200000 0.000008 0.000024\n", + "298 anthropic/claude-2:beta 200000 0.000008 0.000024\n", + "314 anthropic/claude-2.0 100000 0.000008 0.000024\n", + "296 anthropic/claude-2.1:beta 200000 0.000008 0.000024\n", + "304 openai/gpt-4-1106-preview 128000 0.00001 0.00003\n", + "43 openai/o3 200000 0.00001 0.00004\n", + "273 openai/gpt-4-turbo 128000 0.00001 0.00003\n", + "288 openai/gpt-4-turbo-preview 128000 0.00001 0.00003\n", + "151 openai/o1 200000 0.000015 0.00006\n", + "282 anthropic/claude-3-opus 200000 0.000015 0.000075\n", + "281 anthropic/claude-3-opus:beta 200000 0.000015 0.000075\n", + "208 openai/o1-preview 128000 0.000015 0.00006\n", + "209 openai/o1-preview-2024-09-12 128000 0.000015 0.00006\n", + "321 openai/gpt-4-0314 8191 0.00003 0.00006\n", + "320 openai/gpt-4 8191 0.00003 0.00006\n", + "311 openai/gpt-4-32k-0314 32767 0.00006 0.00012\n", + "310 openai/gpt-4-32k 32767 0.00006 0.00012\n", + "106 openai/gpt-4.5-preview 128000 0.000075 0.00015\n", + "78 openai/o1-pro 200000 0.00015 0.0006" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(\"pricing_prompt\")[col_names]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABqwAAAHJCAYAAADwyhjGAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXl0VFXWt58aMlZSGSAEQoAMShImIQgKwYi2imCLLWqD2gqKiC1qK+DYyiAo6CvtAMqkyCCCoiKDjAoSgwwKUQhhzCBDIASSVCWVqZKq7498uVBGEAJHbjjnWetddm6d+9Rv34vd683mnG1wu91uFAqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLhPFSB1AoFAqFQqFQKBQKhUKhUCgUCoVCoVDIjWpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLimpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLivlSB1BcXrjdblwu96WOoUuMRsNFfTYX2yfCKWNGGWsW4dS7T4RTZZTDJ8KpMsrhE+GUMaOMNYtwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjPn2XA0ajAYPBcE5rVcNKcVFxudwUFDgudQzdYTYbCQmxYLeXUlXl0p1PhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4dS7T4RTZdRnRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzXg5EBpqwWQ6t4aVOhJQoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUV3DavMzEweeughOnbsSFJSEm+++SaVlZV/ep/b7WbGjBn07NmTDh060L9/f3755Zc66/Ly8njyySfp1KkTXbt25b///S8lJSV11q1bt46+ffvSvn17evXqxZdffllnTWVlJW+88QZJSUl07NiRhx56iKysrHrXtGjRInr16kX79u3p27cv69evr7OmuLiYl156ia5du9KpUyeeeuopjh8/Xmedy+Vi9uzZ3HrrrbRr146kpCRGjBjhseaBBx4gLi6uzv9lZmbW8SkUCoVCoVAoFAqFQqFQKBQKhUKhUIjCfKkDnI7NZmPgwIFERUUxefJk8vLymDhxIuXl5YwaNeqs986cOZP33nuPkSNHEhcXx/z583n44YdZsmQJLVq0AMDpdPLII48AMGnSJMrLy3njjTcYMWIE06dP11w///wzTzzxBHfffTcvvfQSmzdv5r///S8Wi4Vbb71VWzd+/HhWrFjBCy+8QHh4ONOmTWPQoEF88803BAYGnldN33zzDa+88gqPPfYY1157LStWrOCJJ55g/vz5dOzYUVv39NNPc+DAAcaMGYOPjw/vvPMOQ4YM4csvv8RsPvU6R40axfr163n88ce58soryc/PZ9u2bXWeW2JiIs8//7zHtcjIyD97VQqFQqFQKBQKhUKhUCgUCoVCoVA0WI6edJB5rASLt5HGVt9LHUeBzhpWCxcuxOFwMGXKFIKDgwGorq5m7NixDB06lPDw8D+8r6KigunTp/Pwww8zaNAgADp37sytt97KRx99xJgxYwBYvXo1+/fvZ8WKFcTExABgtVoZPHgwO3bsoEOHDgBMnTqVDh068OqrrwJw7bXXcujQId577z2tYXXs2DG++OILRo8ezd133w1A+/btueGGG1i4cCFDhgw5r5ree+89brvtNp5++mntO/ft28f777/PzJkzAUhLSyM1NZWPPvqIHj16ABAdHU2fPn1Ys2YNffr0AWDTpk0sXryYr776iri4OO053XbbbXWendVq9WiIKRQKhUKhUCgUCoVCoVAoFAqFQnG5UlLmZMbSXaRnF2jX2kWHMvSOtlh8vS5hMoWujgRMSUmhW7duWmMHoHfv3rhcLjZu3HjG+7Zv305JSQm9e/fWrnl7e3PzzTeTkpLi4Y+Li9OaVQBJSUkEBwezYcMGoOaYvy1btnjspALo06cPmZmZHD58GIDU1FRcLpfHuuDgYJKSkup855/VdOjQIXJycjzy137npk2btOMDU1JSsFqtJCUlaWtiYmJISEjw+M7PP/+crl27ejSrFAqFQqFQKBQKhUKhUCgUCoVCoZCdGUt3kZFT4HEtI6eA6Ut2XaJEilp0tcMqKyuLu+66y+Oa1WolLCzsD2dDnX4f4NGIAoiNjWXOnDmUl5fj6+tLVlZWnTUGg4Ho6GjNcfDgQZxO5x+6AJ544glycnIA8PX1xc/Pr866L774ok5NM2bM4NNPP6WgoICEhASCg4O176z9p9Vq5cknnyQ1NRUvLy86dOiA0+nk0KFDxMbGkpWVRXR0NOvXr+edd94hOzubiIgIgoKCPJ7Pr7/+SqNGjejZsydHjx4FoGPHjkycOJHo6Ght3cmTJ9m2bZvW2PLz8+P+++9n5MiRGAyGMz7vP8Ns1lUfVBeYTEaPf+rNJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zChjzSKcF8t39KTDY2dVLS43pGcXcMJeTtNQ/0uaUWZ01bCy2+1YrdY614OCgrDZbGe9z9vbGx8fH4/rVqsVt9uNzWbD19cXu92uzZY6k7/2n7/PUdvAqaioYPLkyUybNo3t27czceJEj1lUVqvVI6vdbmf37t1s2bLFY77Wr7/+ypEjRzy+c/To0ZjNZm2+1vjx4z0+t9vtuN3uOvO1pk6dSlhYmPad+fn55Obm4u/vT2JiItu3b+fEiRMMHjyYlStXas+ppKSEtm3bkpycTHV1NV988QUffvghRUVFvPbaa2d83mfDaDQQEmKp170yYLX6/fmiS+gT4ZQxo4w1i3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoxw+EU4ZMmYeKznr545K1wX/flvEc5QFXTWs9MzixYsBGDRoENdddx2rV68mJyeHhQsXnnW+FsDWrVvrzNfq1KkTe/fu9ViXlZXFypUrtd1dBQUFjBs3jszMTBITE4GaHWC/n6+1YsUKcnNzNY/b7cbb25s1a9aQmZnJgw8+yLPPPsszzzzDsmXLtJlbX3/9NaGhodp9jz76KElJSSxevJhx48ZhNJ5/J9jlcmO3l573fZc7JpMRq9UPu72M6mqX7nwinDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYMsCu7gEMnHLQMs9AmKvTPb/gTLlZGf6+znyxm8TZSWOiol1vEc7wcsFr9znnXma4aVlarleLi4jrXbTYbQUFBZ72vsrKSiooKj11Wdrsdg8Gg3Wu1WikpqdtBtdlsNGvWDEBb+/scP/zwA4C2rnb3Vu0sqn79+mnfeXpWPz8/iouL68zX8vb2Ji8vz+M7o6OjPY4ijIqKAmD37t0ABAQEYLPZ6szXatKkCb/99huHDx8mMjISq9VK06ZNady4MZmZmQA0b96cpk2bcuDAAe2+05tVAP7+/rRp04aff/6Z0tJSAgIC6jyrc6GqSv3LeCaqq10X9flcbJ8Ip4wZZaxZhFPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcF4MX15hKa/N/ZmSsirtWoCfmVcGXk1YcP2O2judC80YFuRHu+hQMnIKcLlPXTcaoE1UKI2tvhf8DES8a1nQ1WGKMTExdWZVFRcXk5+fX2em1O/vA8jOzva4npWVRUREBL6+vmf0u91usrOzNUfLli3x8vKqs652blXtupiYGAoKCmjUqJHH2t/PyQoJCfG4r7amsrIySkpKKC8v1z77fQMpOzsbg8HAyZMntc/dbrfHHCqAwsJC7bsBrrjiijM8qZojDc9Gfn4+RqOx3s0qhUKhUCgUCoVCoVAoFAqFQqFQyMnvm1UAJWVVjJvz8yVKVJehd7Sts+urTVQoQ+9oe4kSKWrR1Q6r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSko6432JiYkEBATw+uuvc/DgQQoKCoiPjyc3N5ebbrrJw7906VIefvhh0tLS8PLyokOHDhQVFXH99dcDNbufrrnmGj777DO+/PJLsrOziYiIoKSkhNDQUCIjIwHo0aMHRqMRh8PB3Llz+eSTT2jfvj07d+5k2LBh2ndGRkZy8OBBHnnkEY+aDAaDNl+rRYsWmEwmdu/eTXJyMnl5eTz33HN8++23hISEaLu9rrzySgDGjRvHf/7zH8rKyvjss8+0XVS1s66Cg4P56aef6NatG6WlNcfzrVy5kmPHjtG27al/6W688UZtjtbpnK05eC6Yzbrqg+oCvQ4ZFOmUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSp5h2ZJ+o0q2opKati98FC2sc0qpf7YtYcFODDc/cnkl9Uhr28GquvibDgC587JeJdy4bB7Xa7/3zZX8OhQ4e45ZZbsFgsPPLII/z222989dVXtG7dmmXLlmnrBg4cSG5uLmvXrtWuDR48mNTUVG666SYSExOZPXs2x48fZ+7cuVxzzTUAlJaWav/5kUceoaSkhE8++YTg4GA2bdqkuebPn8+rr75KbGws99xzD2vXrmXbtm0kJiayYMECbV2fPn3IzMwkKiqKO++8k5kzZ1JaWsq6deu0owMnTpzIxx9/TGBgoEdNzZo14+jRo6SkpBAeHk779u2prKykbdu27Nq1i44dO5Kenk779u3x9/dn1qxZ/PDDDzzyyCN4eXkRGxvLnj17iI2NxWg0sn//ft566y1uv/12kpOTKS4uplGjRlx99dXa/K3GjRvz7bff4ufnx88//8zgwYO54ooruPPOOzl06BDz5s3D5XKxYMECOnXqVK936Ha7MRjOfg6oQqFQKBQKhUKhUCgUCoVCoVAoLi8WrNnDp6v3nvHz+3rFce8t8X9hIkVDQ1c7rFasWIG3tzfx8fFMnToVi8VCUlISmzZtIi8vj/DwcABcLhfV1dXafRUVFaSlpdG1a1fS09NJSUkhPj4el8vFypUrtSbVunXrqKysJCkpidmzZ2M2m+nevTupqans2LGDDh06aOtiYmIwmUxMmjSJiIgITCaTx9F/x44dIzs7Gy8vL/Ly8pg6dSodOnRgx44dLF++nCFDhgDw22+/AdC6dWuPmjZu3OgxXysuLg6omYEFcOTIEaZMmcL06dO1NbVNsGuuuYaff67ZQtmiRQseffRR7rvvPm3d119/jcPh4LXXXmPFihVAzZyswMBA/PxqOsVhYWG43W4OHDjAhAkTcLlc+Pn58f7779e7WVXzbtzY7aX1vv9yxWTS/yBElVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM2NDqHlXdgGHTjhoGWapc1Te+RARevZdSs0b+VNY6KiX+3hRGcUXcTcUNIx3fTlgtfqd864zXTWsUlJSSEpK4oMPPtCu2e12unbtysaNG+nXrx8A8+bN87hv+/btOBwOXnrpJRISErTrEyZM8NiFVdvImjVrlnbN7XZz7bXXsmHDBjp06EBlZSVbtmxh5MiRDBo0SFt32223ceDAAQ4fPkxkZCSpqam4XC7cbjevvvqqlu2JJ54gJSVFa1gdPnwYgFGjRhEfH6/V1KVLF4KCgjzma+3bt481a9YQFxfHQw89RM+ePXnhhRe04xBr52slJydz22238eKLLzJhwgR++eUXzQE1s65CQ0OZNm0aW7Zs4cEHH6Rnz55s2LBBq6dVq1Y0btyYHj16sH//fo4ePcpnn32mNQUvBDVQ7szocRCiaKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU8aMMtYswiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwXQxnXmFpnZlTAX5mXhl4NWHB/ufta9MqlAA/8x8eCxjgZyahZch55y0pczJj6S7Sswu0a+2ia+ZNWXy9zjvjH9EQ3rUs6KphlZWVxV133eVxzWq1EhYW5rG7CSAzM5Px48eTlpaG0VjTnaudL1VLbGwsc+bMoby8HF9fX7KysoiJiWHGjBl8+umnFBQUkJCQQJMmTTT/wYMHcTqdhIaG8uSTT5KamoqXlxdNmjQBID09ncjISLKysggICKC0tJQPP/yQ0aNHExERQVRUlEfW/Px8vLy8GDVqFEeOHMHhcHDVVVdhNBq1HVNwar7WgAEDAJgyZQo7duz4w/la8+fPp6SkBIB//etfNGrUiNjYWK3+yspK3nnnHX799Vd27Nih5f79bCq3282iRYtwuWr+5bnlllt48MEHGTFixHm9N4VCoVAoFAqFQqFQKBQKhUKhUDQsft+sgppZU+Pm/Mx7/0mul/OVgVczbs4fN8Hqw4ylu8jIKfC4lpFTwPQluxjev2O9nAr9oquGld1ux2q11rkeFBSEzWbTfrbZbAwcOJCoqCgmT57MwoUL+fbbb3n77bcZNWqUts5qteJ2u7HZbPj6+mK32zl8+DBr165l5MiRxMXFMX/+fNatW0dgYKDmBnjvvffw8/Nj0qRJlJeX89prrwHw7rvvEhAQwK+//orDUbN98eqrr2bUqFFs3ryZqVOnemQvLi6mcePG/Prrr9x2223aDi+Xy0WLFi20dd26dcNoNLJnzx4ArrzySlavXk1ERIR2VCFAx44dSU1NJSIiAgCz2czWrVt5+umntTXl5eUsWLCAFi1a0LRpUw4ePEhmZiaDBw9m586dtG/fHqg5WtHlchEXF0fr1q1Zt24dM2bMoKCggNGjR+Pt7X2eb5D/n0kNlfs9eh2EKNIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zKjXmndknvjDnVBQ07TafbCQ9jGNztvbrHEAH4zoSUZOAQfzL+yYwaMnHR47q2pxuSE9u4AT9nKahp7/TrBaGsK7lg1dNazOlYULF+JwOJgyZQrBwcGkp6fz/fffs3DhQoYOHXrGY+3cbjcZGRkMHjxYO+6vc+fOXH311Rw5csRj7aFDh1i5cqW2K8lgMPD000/jdDoZNmwYbrcbk8lE27ZtefXVVwG49tprmTNnDqWlpR7feezYMW666Sa2bdvG2rVrufLKKyksLOTYsWPaui+++AIvLy+uueYavv/+e/bs2UOnTp345ZdfPOZ3LV++nKuvvlo7atDhcNCyZUu2b9+uuaxWK23btuWnn37yqOmjjz6ioKCAiRMnAnDixAkA9u7dy969p4bhffHFFwwZMoSoqKhzeyGnYTQaCAmxnPd9smC1XpzzVUX5RDhlzChjzSKceveJcKqMcvhEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinynj5+47kl7Bvdx4RjS1EhAXUy5FbcOjs33GylOTOLevlBkgKsZBU77tryDxWctbPHZWui/K7aD2/a9nQVcPKarVSXFxc57rNZiMoKEj7OSUlhW7duhEcHKzdV1VV0w0+fdaV3W7HYDBo95pMJqqqqujdu7fm8vb29tjBVbs2MjLS4wi92vlTHTt25K233mLixIl8/PHH9OnTxyPrddddx+rVq7VZV76+vjgcDl5//XWPGjp06EB+fr5HTbXzu+Li4vjPf/7DPffc4zG/69ChQ+Tk5PDss89it9t58cUXWbRoEcuXL+fNN9+ksrJS2xX1ySefcOTIEf7xj39gt9v54YcftGMNAcrKyqiurmbs2LHaMYQAd911F+np6eTn59erYeVyubHbS/98oWSYTPof4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mvJi+kjInUxfvZGfWqV1H7WNCefzO9lj8zm+eU0To2ZsqzRv5U1joqFdOuDh1+3sZzvq5xdt4yTOK9F0uWK1+57zrTFcNq5iYmDqzqoqLi8nPz/doHv1+1lXtZyEhIR73Z2VlERERga+vL4B27N/pLrfbTUlJCWVlZZSXl9OyZUsMBoO2tpbs7GwtD9Q0yYA6u7lqZ0tlZWURGRlJQEAATqfTo1lVXFxMRUWFR3PuXOZ31f4zOjqaX3/9VVsXGxuL0+nk0KFDxMbGAlBQUMDgwYOxWCzY7XbMZs9XXVlZidvtrnPsX+08sMzMTLp06UJ9UAPlzkxDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzXgzfB1/trDPPaVd2Ae9/tfO85zm1aRVKgJ/5D48FDPAzk9Ay5KLUfyF1hwX50S46lIycAlzuU9eNBmgTFUpjq+8lz/hX+GRCVw2r5ORkpk2b5jHLatWqVRiNRpKSTm0g/P2sq8TERAICarY+1u6UcjqdrFmzhuTkZG1dREQEO3bs4OjRo9ruoU2bNmlH+NlsNsLDw/Hx8fHY/QSwYsUK/P39cTqdAFxxxRUAZGRkaDu2bDYbaWlpHjnCwsI4fvx4nZoMBgPl5eVnrKmW03d/1f7z9+tqf6793OFwMGTIEJxOJ4MGDWLChAl/6PXz82Py5Mm8+eabFBYW8uqrr2oztE6fGXa+qBlWdWkI56GqjPrziXDKmFHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVGfGS+WT8Q8pzEPd2XMrK0eTasAPzNjHu56wb/jvVh1D+vXng9+t6usbXTNrjK9ZBTlkxFdNawGDBjAvHnzGDZsGEOHDiUvL48333yTAQMGeOxkqq6u5qOPPuLRRx8FwMfHh6FDh/K///2Pffv2sWnTJhYsWEBRURGDBw/W7rvyyitZvXo1Tz75JMOHD6esrIw333yTNm3akJGRoa0LCgri+PHjjBkzht69e7NlyxaWL19OQkKCtqZRo5qBc/PnzycqKorw8HCmT5+OxWLxmGEVGxvL7t2769TUsWNH0tPTtXVut5t9+/axatUqAO0/Oxx1tzSuX79eayytX7+ekydPenz+5JNPsmfPHl577TVycnIASE9Px2q10rFjR6BmFpbb7SY3N5fWrVtTWFjIW2+9pTXkDIazb7c8E2qG1dlpCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjBfqEzHPKSTEwoLxt5G29zh7fisgvlUoneKa/PmN58GF1h0SAq8Pu47c/BJyTzguaG7XmdDbu5YZXTWsgoKCmDNnDuPGjWPYsGFYLBbuvvtunnnmGY91tbOoTmfIkCFMmzaN/fv38+ijj5KQkMBHH31EixYttDUhISG43W5atGjB8OHDMZvN3HzzzcTHx7N7927t2L4mTZrQokULtm3bxhdffEFERATjx4/niy++0NbU/jMpKYlJkybhcDhITEzk9ddfZ8iQIdrnjRs3xmq1YjKZPGoymUwcOnRqsJ2XlxfLli1j2bJlAHz99dd8/fXXGI3GOt/5yiuvaPe99NJLHs8PauZ4ATz//PMezwdg7969QM2Mrnbt2rFjxw727dsH1Bxn+O9//5sPPviAsLCwc3pnv0fNsPpjTCb9n4eqMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZjxeVEZxeTVWXxNhwfVvZIic5xTbLJBOcU2w28suaCbU6Vzs5xjgY+LqhHBdZxTx5/tywGptoDOsoGZH0uzZs8+6pmPHjgQHB3tcKykpobS0lNdff51+/fr94X21s6ueeuopPvjgA+36xIkTPWZdxcTEsG/fPq15BDU7oP7v//5PO5qwZcuWeHl5cfXVVzN58mRt3bp16zy+KyYmhqKiIt59912POVZPPvmkxyytdu3aERwczPvvv69dKy4upkuXLh4ugPfff5+bbrpJWzdv3jzeeOMNrTlX25QC+Oqrr3jxxRfZtGkToaGhHs9w/vz5QM3uq7vuuouJEycSHR3NBx98wFVXXfWHz/BcUOdznpmGcB6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2WUwyfCqTLK4RPhVBnl8IlwyphRxppFOGXIWFLmZMbSXR7H+LWLDmXoHW2x+Hqdt++vmOckw3v5K5wiMspCgzxMMTk5mR9//BG73a5d+6NZV7+ndtbVypUrtWt/NOsqOTmZPXv2aMfpQc2sq6KiIq6//noAvL29ueaaa1i9erXHd6xYsYLY2FgiIyMB6NGjB0ajkTVr1mhrbDYbqampdb7zz2pq0aIFUVFR2rGBp39nt27d8Pb2PvNDOwu1zT8fHx/mz5/P1Vdf7dFMUygUCoVCoVAoFAqFQqFQKBQKxbkzY+kuMnIKPK5l5BQwfcmuejuH3tGWNlGhHtfaRNU0wRSKywHd7bDKzMxk/PjxpKWlYbFYuOOOO3j66ac9mjFnmnXVvn17+vfvT0FBAQkJCVRVVWG321m7di1Q05C57777mDFjBh999BHe3t4EBwfXmXXVq1cvJk2axO23347L5SIkJITKykp69uxJhw4dtHVDhgxh0KBBXHXVVbhcLpo0acKRI0d4++23tTVNmzbllltuYfTo0YwdOxY/Pz/8/f0JCAhgwIAB2rp+/foxbdo0unfvDtQcS1hYWOgxv6u4uJjAwECWLVvGihUr8PPzIyAggPz8fG23FMALL7zA4sWLPZ5rt27dABg0aBAvvvii9p83bdqkrfnPf/4DwJgxY+rx5k5xocPuLkcawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqJ+PRkw6PnVW1uNyQnl3ACXs5TUP9z9sbFODDc/cnkl9Uhv0iHDNYiyzvRbRTREbZ0FXDymazMXDgQKKiopg8eTJ5eXlMnDiR8vJyRo0apa37o1lXrVu35pdffuHZZ58lLi6O+fPn891333nMYnI6naxfv57Q0FCqq6spLi7m+PHjtGvXzmPW1a+//sqxY8eIiIjgxIkT2Gw2Kisr6d27t0feFStW4Ovri9Vq5eTJk+Tn5xMYGEiPHj08avrpp59o3LgxpaWllJWV4XA46N27N4GBgdq6999/H5fLRfPmzcnNzSU/P5+qqir69u2rramsrKRVq1bExsayadMmTp48SXV1NWaz2eOIxMcff5zU1FTy8/PrPOPS0lKqq6sxmUwYjUa8vLwwGAxUVlYSGxvL448/ru0iqw9Go+G8h/vJREMY4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwiXCqjHL4RDhVRjl8IpwyZpSxZhHOyz1j5rGSs37uqHRd0O9QRf3+9XJ/L3+VU0RGWdBVw2rhwoU4HA6mTJmiNWCqq6sZO3YsQ4cO1XYageesq4qKCrp3787gwYMZNGgQAJ07d+bWW2/1OHZv9erVHDhwgBUrVmhH3qWmpjJ48GB27Nih7Z6aOnUqV111FQsXLtTuHTFiBDNmzOAf//gHAMeOHeOLL75g9OjR9O/fH4CioiJuuOEGFi5cyJAhQ7SaSktLWb9+vVbTZ599xtixY3nuuecIDw8nLy+Pzz//nBdffJEHHngAqJmZ1bdvX6ZPn87UqVMBaNSoEZMmTfJ4Zg6HQzua8LHHHgNq5mulpqZqa7Zs2cKDDz4IwOeff05OTg7z5s2jSZMmREVFMW3aNP72t7/x1FNPceutt57nW/PE5XJjt5dekONyxGTS/wA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTJeHOfxojKKL3D3kr+X4ayfW7yNFBY66uUGOd+LrBkvB6xWv3PedaarhlVKSgrdunXz2C3Uu3dvRo8ezcaNG+nXr98f3rd9+3ZKSko8dkB5e3tz8803a8cB1vrj4uI85jMlJSURHBzMhg0b6NChA5WVlWzZsoWRI0d6fEefPn1Yvnw5hw8fJjIyktTUVFwul0eDJzg4mKSkJFJSUrSG1bnUtGfPHqqrqz3mbxkMBnr06MEnn3xCZWXlGedT+fv74+Pjg9PpPONzbdKkCQaDgQEDBnDXXXdhsYjdAaUGyp2ZhjDAT2XUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqkyyuET4VQZ5fCJcKqMcvhEOGXMKGPNIpx6zFhS5mTG0l0eR/m1i66ZD2Xx9TovV1iQH+2iQ8nIKcDlPnXdaKiZOdXY6ntR6pfhvYj2iXCKyCgLujpMMSsry6OZBGC1WgkLCyMrK+us9wF17o2NjSU3N5fy8vIz+g0GA82aNWPBggV07NiR6667DqfTScuWLeu4Tv+urKwsGjVqxGeffabNturfvz8BAQEeWbOysggPD+fJJ5+kU6dOdO3alTfeeIPGjRtr6yorKwHYtm0bffv2pX379vTq1YusrCwqKys5fPiw5qusrGTixIl0796dq666ihtuuAFA2/kFsHLlSv7973+TnJxMx44deeCBB3C73QwePJj27dt7PIPffvuN22+/HaiZYdWzZ88zPmeFQqFQKBQKhUKhUCgUCoVCobgcmbF0Fxk5BR7XMnIKmL5kV718Q+9oS5uoUI9rbaJqGmAKheKP0dUOK7vdjtVqrXM9KCgIm8121vu8vb3x8fHxuG61WnG73dhsNnx9fbHb7R5zo6BmxlRmZia+vr5MnjyZrVu3MmPGDL7++mtuvPFGD1ft+trvrK6u5r333mPkyJHa3KxvvvkGl+tU99Rut7NmzRpCQ0OZNGkS5eXlvPHGGzgcDs3VqlUrAF555RX++c9/8tJLL7F582btKMDTax8/fjyLFy/Wmly1c7NO38E1e/ZsmjdvzgsvvEBISAgjRowAYMmSJTzxxBPauuDgYPr27YuPjw/z58/HaDRy9OhRpkyZ4rHufDGbddUH1QUNYYCfyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnr7zx60uGxs6oWlxvSsws4YS+naaj/eTmDAnx47v5E8ovKsF/gEYO/R5b3ItInwikio2zoqmF1KVi4cCFVVVXEx8dz3XXX4e/vz4wZM1i7di15eXkec7NOp7q6mqKiIh599FGPuVlJSUmUlJwaqudyuThx4gTz5s3TdjZZrVYGDx7MyZMnAWjdujVWq5Xy8nLuvPNOoqKi2LVrFwaDAbfbjcFQc+Zp7dysZ555hmuvvZb8/Hw+/fRTfvjhB959911efvlloGYGV2hoKACZmZmcPHmSTp068fHHH/P4449jNNb8C1NUVMTixYs9sgLMnDmz3g0ro9EgbOjf5UBDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRzovpe3vBNtIzT9Lhisb8Z0BivRyZx0rO+rmj0lXv33mK/F2pnt+LKKesGWVBVw0rq9VKcXFxnes2m42goKCz3ldZWUlFRYXHLiu73Y7BYNDutVqtHs0kqJkxFRgYSOPGjQG0tS6Xy2Nult1u9/i8rKwMt9tdZ25WVFQU6enp2jWz2UxAQECduVkGg4HCwkKg5pi/0tJSGjduzIABAwBo3rw5t912G8uXL6e6uhpAm5v1z3/+U8vRs2dPunbtytKlS7WGVW2zCmDZsmWYzWb+9re/kZaWRmlpKQEBAQBMnDiRxx9/nL59+7Jw4UJmz57NDz/8wIkTJygvL8fX1/eMz/xMuFxu7PbS877vcsdk0v8AP5VRfz4RThkzylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYswiljRhlrFuGUMaOMNYtw6t0nwilbxtQdR5ixdLf287c/HeLbnw7x2B1t6N4+4rxc/l6Gs35u8TZSWOioV07Z3osop6wZLwesVr9z3nWmq4ZVTExMnVlVxcXF5Ofn15k99fv7ALKzs4mPj9euZ2VlERERoTVeYmJi2Ldvn8e9mZmZlJeXa46WLVvi5eWFj49PnVlUp3+X2Vzz6GobXbVUVVXhcrm0ho/ZbMbLy3MoX0lJCW63m6qqKgAOHjxIVVUV48aNIyYmhvLycqKjo/nf//4HgMPh0DI0atTIo3lnNBpp3rw5+/fv/8Nn880339CtWzf27NlDeHi41qyq5bXXXuOOO+7weG4Xihood2YawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPhlDGjjDWLcF4M3+nNqtOZtiSDrglNz8sVFuRHu+hQMnIKcLlPXTcaauZONbb6XnBeWd6LaKesGWVBVw2r5ORkpk2b5jHLatWqVRiNRpKSks54X2JiIgEBAaxcuVJrvDidTlasWAFAx44dsVgstGvXjj179pCTk0NUVBRQs3vL5XJx/fXXAzW7pLp27cqWLVv45JNPmDt3LgkJCQQEBBAbG0tkZCQATZo0AeDRRx8lJycHLy8vrr/+eg4cOKB5fX198fb25sSJE9x2220cPHiQiIgIOnXqpH1X7VqomTG1efNmHA4HHTp04NChQx6f2+12fH19eeihh0hLS8NisXD77bdz4sQJj7lZZWVlfPDBByxevJj8/HxsNht2u53nnntOW1NZWcnw4cP54Ycf8PLyYuHChfTu3Zvi4mKuvPLKeu2uqkXNsKpLQzgPVWXUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRThlzChjzSKceveJcMqUcfqSnWf9/OMVGQzp2+68nMP6teeDxTvZmVWgXWsbHcrjd7a/oN93yvReRDplzSgbumpYDRgwgHnz5jFs2DCGDh1KXl4eb775JgMGDPCYJTVw4EByc3NZu3YtAD4+PgwdOpTJkycTGhpK69atmTt3LsePH6ddu3a89tpr5OXlMWHCBKxWK08++STDhw+nrKwMl8tFTEwMHTp00PyRkZFs3LiRoKAgHnnkERYtWsQvv/zCK6+8oq3x968Zsrdnzx7uv/9+/P39mTVrFm73aS14wMvLi+rqamw2G0899RTbtm1j8eLFhISEeBxfCLBmzRoeeOABXC4XCxcupLKy0uPzffv2ceTIEby9vXniiSc4ePAgc+fOxeVyaXOpAF599VXWrFlDmzZtKCoqwul04na7KSoq0tbs3buXtWvX0qhRI5o1a0Z6ejpbt26loqKC//znP/V8g2qG1Z/REM5DVRn15xPhlDGjjDWLcMqYUcaaRThlzChjzSKceveJcKqMcvhEOFVGOXwinCqjHD4RThkzylizCOeF+vYftp/1872HbOf9O8qQEHh92HXk5peQe8JBRGMLEWEBf37jOSLDe/krnLJmlAVdNayCgoKYM2cO48aNY9iwYVgsFu6++26eeeYZj3Uul4vauU61DBkyBLfbzaxZsygoKCA0NBRvb28+/PBDgoODAaiurmbMmDFcddVVDB8+HLPZjLe3N8nJyZqnoqKCb775Bm9vb9xuN//73/9o1qwZwcHB2u4pgCNHjgDQt29fVqxYgcPh4MorryQjI8NjblZZWRmBgYFcccUVTJkyBYvFQkxMDEeOHNHWOJ1OoGbH1dy5cwkODua2225j2bJlVFdXa+vKy8txu90UFhbyzjvvEBYWRlxcHBkZGR6zt1auXMlDDz3EZ599ho+PDxEREVx55ZWsWLGC4cOHA7Bu3Tr8/PwwmUzs3r1bu7dZs2Zcc801VFZWajvAzgc1w+qPMZn0fx6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhbAgZjxeVUVxejdXXRFhw/X+pf2WkleOFZWf8PK5FUL1nTgX4mLg6IRy7vazejtNpCO9FZdRvxssBq7WBzrACiI2NZfbs2WddM2/evDrXDAYDQ4cOZejQoQDcf//9tG3bVmtWAfTu3ZvRo0fTu3dvZs6cqa07fPiwtmb79u2UlJRgMBh49tln6devHwATJkzQdnQBHDt2DICHHnqIN954AwC3281VV12Ft7c3vr6+VFZW4nA4aNKkiUdN3377LcOGDdPmX+Xk5AA1xwvW5gc4cOAAv/zyizY3q6KiAoDVq1drDSq73U6XLl20n2tnY/n7+9OqVSuOHj3Khx9+yMyZMz12fx09epSysjLKyk79F3thYSEAXbp0YcyYMdx7771neQtnRp3PeWYawnmoKqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWNGGWu+GM6SMiczlu4iPbtAu9YuOpShd7TF4ut13r7Bt7Vl4868M37+UJ82ups5pcf3ItonwilrRlnQXcPqYpGVlcVdd93lcc1qtRIWFkZWVpZ2LTk5malTp/LAAw+wc+dO7Wi938/Nio2NZc6cOZSXl+Pr64vNZsNsNvP6669z8OBBCgoKiI+Pp7q6mpCQEAAOHjyI2+3m+PHjPPzww6SlpeHl5cWVV14JoM3DOnjwIF5eXixZsoRvvvmG7OxsIiIiKC0txWQyaeuKioowGAw888wz7N27F4fDQfv27QEICwsDwGQyERQUxFtvveVRYy3ffPMNt912G0OGDGH37t3s2bOnzrObOHEi3bt3r+eTVygUCoVCoVAoFAqFQqFQKBSKU8xYuouMnAKPaxk5BUxfsovh/TvWy/lo3wRmLN39h9cVCkXD5LJtWNntdqxWa53rQUFB2Gw27ec+ffrwzjvvsHv3bh577DFWr15NRkYGsbGxHnOz5s6di9vtxmaz4evrS0lJCY0bN2bLli3cdNNNJCYmMnv2bKqqqrTvrf0es9nMTz/9xCOPPEJJSQmffPIJAKGhoR5ZMzMziY2NZfjw4axdu5acnByP2VQlJSWEhISwceNG+vXrR6tWrbSdYk2aNNHWJSYmsmbNGo+6mzdvzrFjx+jcuTNQ04CzWq0kJiZyzTXXMHXqVLp168ahQ4fo06dPnfla58OFDCG8XGkIA/xURv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinjBllrFmEU8aMMtYswql3nwinXjMePenw2FlVi8sN6dkFnLCX0zTU/7y9PTo0p0eH5ny0LJ3dB20ktAxi8O3t6p2zFlnei0ifCKesGWXjsm1YnSsrVqzA29ub+Ph4pk6ditFoxGAwcODAAfLy8rSm1enH6dX+nJ+fT9euXUlPTyclJYX4+HgKCws5ceKEx1qn00lSUhKzZ8/GbDbTpUsXtmzZwsGDB7U1paWlxMTEYDKZmDRpEhERETRr1oyjR496fGdBQQHdu3dnw4YNOBwOOnTowNatWz1cP/74Y506a2duVVVVeVy3Wq20bNkSgJCQEE6cOHFBzSqj0XDeAw1loiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRn35tu89zt7NvxHfKpROcU3+/IbzQK81i3TKmFHGmkU49e4T4dRbxsxjJWf93FHpuqDfJ4588Jp633s2Lvf38lf4RDhlzSgLl23Dymq1UlxcXOe6zWbT5j0BpKSkkJSUxAcffADA/PnzefXVV3G73dpOJoCBAwcyatQo7V6TyUR1dTUvvfQSCQmntpled9112s6q2rWRkZHMmjVLW5Odnc2tt95KdnY2AAEBAZSVldG/f38GDRqkrXvqqac4evQohw8fJjIyEl9fXxwOB++8845HDR06dCA/Px+Affv2UVJSwtSpU7nxxhuBmrlc9957L2az2WOm1+955ZVXtF1f9cXlcmO3l16Q43LEZNL/AD+VUX8+EU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8a8glLGfryVkrJTf2E0wM/MmIe70iTk/HcQiMgoyifCKWNGGWsW4dS7T4RTrxn9vQxn/dzibaSw0FEvN+j/Oer1vYj0iXDKmvFywGr1O+ddZ5dtwyomJsZjVhVAcXEx+fn5xMTEaNd+P+uq9rOQkBCP+7OysoiIiMDX1xeAwMBAj/VQswOqpKSEsrIyysvLadmyJQaDQVtbS22jqrahVnuE4OlHEELNEYC13x0ZGUlAQABOp9OjWVVcXExFRYXmOnDgAIBHE2358uX4+PhQUVFBXl4eAQEB2mdbt25l48aNAPz73/9m5MiRdOnSpe4DPQ/UQLkz0xAG+KmM+vOJcMqYUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRjl8Ipwqoz58v29WAZSUVTFm1lbe+0/yGe46P/RW81/hlDGjjDWLcOrdJ8Kpt4xhQX60iw4lI6cA12mHWBkN0CYqlMZW34uSV+/PUW/v5a/wiXDKmlEWGmzDKjMzk/Hjx5OWlobFYuGOO+7g6aefxtvbG4Dk5GSmTZvmMctq1apVGI1GcnJy6NmzJwUFBVRUVGiNIaiZ/xQQEIDL5WLFihXMnz8fs9lMVVUVffr00dZFRESwY8cOFi9ezKeffkp2djYhISGUltbsLrLZbISHh+Pj40N+fj5vvPEGS5cuxeFwYLFY8PX1xel0AnDFFVcANUf5ff7556SlpeHn54fdbtdcAGFhYRw/fpzHHnuMjIwM8vLy6NWrFwaDgfLycqBmVhXACy+8QG5uLnl5eVqTq7KykoiICK0Gt9ut5QX45Zdf+Ne//sXgwYN57rnn6v1u1AyrujSE81BVRv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinLBl3ZJ6o06yqpaSsit0HC2kf06jefj3WLNopY0YZaxbh1LtPhFPPGYf1a88Hi3eyM6tAu9Y2OpTH72x/wb9L1Ptz1PN7EeUT4ZQ1o2wY3L8fztQAsNls3HbbbURFRTF06FDy8vKYOHEiffv2ZdSoUR5roqOjPdbExMSwa9cuRo4cSVxcnHYE37fffkuLFi0AmDp1Ku+88w6+vr4MHTqU9evXs3PnTrp27crcuXMBmDJlCpMnTwZqjgFs3749c+fO1ZpfKSkphIeHk5ycTF5eHl5eXjz44IMcP36cZcuWYTab6dy5M3PnzmXbtm3cd999GAwGWrVqxZ133snXX39NTk4Obrebt956i9tvv53nnnuOpUuX4ufnR5cuXdiwYQM+Pj60adOG9PR00tPTqa6uJjk5maKiIvr06YOPjw+LFi0Cahpeqamp2nMcOXIkq1at4sYbb2T16tUMHjyYRYsWER0dzeeff16vd+N2uzEYzr7NV6FQKBQKhUKhUCgUisuRBWv28OnqvWf8/L5ecdx7S/xfmEihUCj0RW5+CbknHEQ0thARFvDnNygUCqlokDusFi5ciMPhYMqUKdpMpurqasaOHcvQoUMJDw8nKCiIOXPmMG7cOIYNG4bFYuHOO+/kiy++4OGHH9YaVWazmerqaj766CPGjBkD1MycAjAYDEydOpWEhAReeOEFJkyYwI4dO+jQoQMhISFAzZyqbdu28euvv9KrVy92795NRkaGdmxfSEgIeXl5hIaGMnfuXCIiIvjvf//LhAkTKCoq0hwARqMRm83G1KlTSUxMpEePHsybN4/anmLjxo0JCgoiISGBLVu2ADXzqzp27MihQ4cAMJlMzJ49mzlz5vDjjz9y7NgxjEYjN954I99++y3p6em0a9cOgO+//x6n08nq1asB+Oijj4CaOVj1Rc2w+mNMJv2fh6oy6s8nwtkQMu7KLuDQCQctwyy0iQrVXT4RTpVRnxllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpS8aI0LMPWW/eyP+yntMiwiljRhlrFuHUu0+EsyFkDPAxcXVCOHZ72QX99+Hp6P05NoT3ojLqN+PlwGU/wyolJYVu3bppzSqA3r17M3r0aDZu3Ei/fv0AiI2NZfbs2dqaTZs2MWfOHHr37q1d69ixI8ePHyclJUW7tm7dOgBGjRqludxuN1OnTmXDhg106NCBli1bAnDXXXfx/PPPa/c+/vjjZGRkcOLECSIjI7WZV8uWLdMaU263mzfffFObO1U76yomJobly5drruXLlzNv3jzy8/OBmnlZNpuNd999l6CgIOLi4rjhhhv45ZdfPGZpXXnllYwfP57y8nK6detG3759efDBB/n22285fvy4tm7gwIHMmjWLtLQ07drYsWO1BlZ9UedznpmGcB6qyqg/nwinHjPmFZby2tyf6wynfmXg1YQFX9hw6ouR769wqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCOflnrFNq1AC/Mx/eCxggJ+ZhJYhUsxpEeGUMaOMNYtw6t0nwqkyyuET4VQZ9emTiQZ5mGJWVpZHgwbAarUSFhZGVlbWWe8DPO5NTk7m2LFjHDlyRJsD9euvv2IwGEhKStLWGQwGoqOjNUejRjVnTh87dkxb43Q6SU9P9/iu2qZaYWGhtm7Tpk04nU4cjpq/ReDt7Y3JZNJmVtXy/fffYzKZtHt79OiB0WhkzZo12pry8nJSU1NJTk6uU++6desoLS3l9ttvZ9u2bXVqr73/2muvpU2bNtx666188803tG/f/ozPUKFQKETx+2YV1JzzP27Oz5cokUKhUCgUCoVCcf68MvBqAvw8/35w7V/EUigUCoVCoVCcmQa5w8put2O1WutcDwoKwmazaT9nZmYyfvx40tLSsFgstGrVCm9vb3x8fLQ1AwYM4MMPP6SyspK1a9dSWVnJ0aNHCQ0NpX///hQUFJCQkEBVVRUHDhzA37/mb/mXlZUBsHLlStauXYu3tzfBwcHarqnaHI0aNcJsNnP77bfjcrkICQmhsrKSmJgY7Rg/qNl1lZeXx1VXXYXL5aJJkyYcOXKE8PBwzdW0aVNuueUWRo8ezdixYwH4+OOPCQgIYMCAAZprxYoVrFy5kg0bNgDw448/smjRIv72t78RFRWlrZkyZQpwqpmWnZ0NoM3yqi8XOijxcqQhDPBTGfXnE+HUa0aRw6n1WrNInwinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8ZmjQP4YERPMnIKOJh/8Y66Pj2b3moW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyppRNhpkw+pcsNlsDBw4kKioKCZPnkxeXh5jx46lqsrzF6JBQUEMGzaMCRMm8N///peAgAC8vLwoKCjghRdeIC4ujvnz5/Pdd99hNp96XLUeq9WK0WikuLiY48ePEx8fz86dO7V1J0+epKqqisjISE6cOIHNZqOyspKkpCSPhpXL5cJsNhMUFMTJkyfJz88nMDBQa5DV1vTTTz/RuHFjSktLcTqdlJSUcP311xMYGKitW7VqFTk5OTidTgDtmMHaGV1waoeYxWKhvLwcX19fWrduTXV1NV9//TXPP/88Xl5e5/3cjUYDISGW875PFqzWs59nfql9IpwyZpSx5gt15hYcOuvnR06Wkty5Zb39oL+a/wqfCKeMGWWsWYRTxowy1izCqXefCKfKKIdPhFNl1JcvKcRC0p8vqxd6rVmkU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPKarVqO5lOx2azaXOiFi5ciMPhYMqUKdqxfJs2bWL58uUcPHhQm0EF4O/vj8FgYOvWrRgMBjp27EhMTAyDBg0CoHPnztx6661UVFRo/tqm1LPPPss999wDQGpqKoMHDwbQ1u3fvx+z2cx3332nfd+IESNITU3V1hw7dgy32023bt348MMPASgqKuKGG27g+PHjHjWVlpayfv16goODiYuL4+abb2blypU899xzhIeHA/DOO+9gNBpxu93Ex8dz9OhRFi1aRJMmTbQMzZs3B+D111/n1ltv1a6vXLmSp59+moMHDxIbG3t+LwZwudzY7aXnfd/ljsmk/wF+MmY8XlRGcXk1Vl8TYcEX/j8kyzZms+dgEW1aBXNb9+gL9oE870XkcGq91izSJ8IpY0YZaxbhlDGjjDWLcOrdJ8KpMuozo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpyyZrwcsFr9znnXWYNsWMXExNSZVVVcXEx+fr42oyklJYVu3bppzSqAPn36sHz5cpYtW8awYcO061lZWURERODr68umTZtwuVy4XKf+QHl7e3PTTTcxb948zb97924ASktPNWeSkpKwWCw4HA5iYmKorKzk2LFjVFdXezTTanNcddVVQE2jC8BoPPXSgoOD6dq1K99///1Za4qPj2ft2rVs3LiRfv36eXjeeOMNAO68807i4+PP6xlfCGqg3JlpCAP8ZMhYUuZkxtJdpGcXaNfaRYcy9I62WHzPf2dhRk4Bby38Rft5Z+ZJPluXyXP3dSS+5cU5+uNyfy9/xXBqvdX8V/hEOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpa0ZZaJCHKSYnJ/Pjjz9it9u1a6tWrcJoNJKUVLPhPisrS2v01NKjRw8MBgMpKSnaNafTyZo1a0hOTtbuA8jJySEnJ0dbZ7PZcLlczJgxg6SkJDZv3kyjRo1YvXq1tsZgMODt7U1AQACRkZEcPHiQ6upqDAYDN998Mx06dKB///4UFRUBcMUVV2jf6efnR2pqKh07dqRr167897//1Y70O70mk8lE3759ad++PVCzgyssLMyjgVdZWcl9993Hxx9/DMDmzZvrNPi2bNkCwNNPP01cXBwJCQn8/e9/58svv8RqtXrsQFMoLjdmLN1FRk6Bx7WMnAKmL9lVL9/pzarTefPTP76u+GPUcGqFQqFQKBQKhUKhUCgUCoVCXhrkDqsBAwYwb948hg0bxtChQ8nLy+PNN99kwIAB2rF4drud1atXs3r1atauXQuAj48PoaGh7Nixgzlz5tC6dWsWLFhAUVGRdpSf3W7Hy8uL6OhonnzySYYPH87JkydZsmQJUHOEXkVFBS+//DIRERH88ssvjBkzht69e7NlyxYKCwuJi4sDappctVRUVPCvf/2LtLQ0Xn75ZQA6duwI1Bz/V9ucioyM5MYbb2TBggWUlJRgNBq1mmw2G2vWrKF79+7cfPPNTJkyhZUrVxIcHMyePXu07xoyZAjbtm2jY8eO/PLLL5SWlnLffffxv//9j/j4eEJDQyktLcXHx4dOnToRFRVFRkYGO3bsYP/+/fWeX1WL2dwg+6BCaQgD/GTJePSkw2NnVS0uN6RnF3DCXk7TUP8/uPOPWfJD1lk/X7n5N27vUf/jAWV5LyBuOLWeaxblE+GUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mlLFmEU5ZM8qGwe12uy91iPqQmZnJuHHjSEtLw2KxcMcdd/DMM8/g7e0NQNu2bWnatClut5t169Zp99122234+fmRn59PQUEBCQkJvPjii3Tq1AmAqVOn8sEHH/Dtt98yfvx4UlNTqa6uxul04nK5SElJITw8nG7dulFYWMhrr73G7Nmzyc7O1o4VbNy4MbNmzWLz5s0MHDiQ22+/nbCwMJYsWYLD4cDlclFZWclbb73F7bffzoMPPsiWLVuYPn06s2fPJi0tDW9vb+x2O0ajUTt+MD4+HovFQklJSZ3n4e/vT1paGseOHaNnz56c6bVOmDBBOzrw6aefZseOHZw4cQKDwYDFYuHkyZOMGTOGe++9t17vxe12YzAY6nWvQvFX8PPuPMZ+uPmMn49+5FquTgg/Z99LH6SyM/PkGT9vH9uI1x/vcV4ZFQqFPliz5Td2HsjnqivDuKlrq0sdR6FQKBQKhUKhUCgUCoXisqZB7rACiI2NZfbs2Wf83Gq10qdPH0aMGOFx3W63c8MNNzBy5Mgz3ldZWUlwcDCTJ08G4P7776ekpIS9e/dqc6iaNm1KQUEBBoOBZcuWafcPGDBAW3P48GEAEhMTue+++3j++ecBePHFF/nqq6+0dSdPnsRkMtGzZ0969uwJ1DR+rrrqKkwmE1BzzJ/b7aZ9+/YedX/33Xc8/vjj9O3bFzg1D2vr1q0EBQURFxfHc889R1paGjabTWtWAbzzzjseta9evZqnnnpKa5DVB5fLjd1e+ucLJcNk0v8AP1ky+nudvaFq8TZSWOg4Z1/ryKCzNqziWwSfl+/3yPJeGpJPhFNl1FfG7Fwbr87+iVrF99uPMGXRL4x+qCtRzay6yCjKJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxppFOGXNeDlgtfqd866zBtuw+jNiYmLqzG0qLi4mPz+/zmyr398HkJ2dTXx8PFAzO6p58+baDiqAK6+8kn379nl8h9vtJjs7W5s55XDU/JK6rKzM4zv8/PwAaN68ufZ5dXU1NptNa2IZDAZ8fX21htXBgwf/0NW0aVMALVdWVhaNGjXSPLXExsbyxRdf1Km3urqaqqoqDhw4wPvvv4/JZCIwMPCMz+dcUAPlzkxDGOB3uWcMC/KjXXQoGTkFuE7biGg0QJuoUBpbfc/LfVu3KL7ccOZjAXtf2+qi1H+5v5eG6BPhVBn14Tu9WaU5XTD2463MfO7GC0z3/306q/mvcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCKesGWXhsm1YJScnM23aNOx2O1Zrzd+GXrVqFUajUWso1ZKZmcn48eNJS0vD398fLy8vli9frjWsbDYbVVVV3HbbbR7+JUuWsGHDBpYvX05BQQGRkZEUFRVx/fXXA1BaWorBYGDVqlX88ssvpKam4uXlpc2HCggI0HwGg4F3332Xn3/+mezsbMLDw7Hb7bRs2VLLALBnzx5effVVVq9ejcPh0OZbtWjRAqjZQebr68uoUaP49ddfAZg1axYPP/ywx0wtqGl+JSUlaY01b29vqquradOmzQU9ezXDqi4N4TxUmTIO69eeDxbvZGdWgXatbXQoj9/Zvl5/fl/8VyITPtn+h9cv9N8Hmd5LQ/GJcKqM+sn4fdrhOs2qWqpd8GP6UZI7Nq+3X481i3bKmFHGmkU49e4T4VQZ9ZlRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRTlkzysZl27AaMGAA8+bNY9iwYQwdOpS8vDzefPNNBgwYoDV5oOa4v+3bt9O5c2cmT55MXl4eY8eO5aOPPiIsLIzWrVvjcrkoLy9n8ODB2n29evXiueeeY9++fdx99900adKEWbNmYTKZCAkJ0daZTCZ27NjBb7/9xtChQ9m1axdr1qzxyGo2m2nWrBnz58+nS5cuPPXUU3z66ae43W6tuVWLy+Vi4cKFPPDAAwDMmzcPAB8fH21NWVkZq1evplWrmnkbFRUV7N27F5fr1G/gJk6cSGpqKk6nk169enHixAm2bdsGQEZGhkdz7nwwGg2EhFjqda8MWK1+uvaJcOoxY0gIvD7sOnLzS8g94SCisYWIsIA/v/EMdA+xsKxTCxZ9t4+0vcfpFNeEe/7W+oIy/h4Z3ktD84lwqoyX3pd1tPisnx/ItXPHDRf+77eeav6rnDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOlVEOnwinjBllrFmEU8aMMtYswilrRlm4bBtWQUFBzJkzh3HjxjFs2DAsFgt33303zzzzjMe6Y8eO4Xa7mTJlCsHBwQBUVVUxZswYZs6cic1mw2g00rt3b20XE9Q0jqqrqwkKCmLFihWYzWZ69+7N5s2b+eijjxgzZgxWq5WqqioAQkJCmDJlChEREXTr1o1NmzZx8OBBwsPDsVqt5Obm0qRJE7Kysti5cyeJiYmUlJRw7NgxrR4Ap9NJdHQ0CxcuxGKx0KdPH5YtW0ZaWhr33HMPVquV0tJSysrKKCoqAmqOQlyyZAlG46nObkxMDLNnz8bLy4vvvvuO8PBwbr/9dpYtW8bixYt59tln6/Xc1QyrP8Zk0v95qDJmDPAxcXVCOHZ72QXNmaqlV5cW3PO31hfNB3K+F737RDhVRv1kjGkWyPdn+fyKCKuaS3eJfSKceveJcMqYUcaaRThlzChjzSKcMmaUsWYRThkzylizCKfefSKcIjIeLyqjuLwaq6+JsOAL/yW8jM9RxppFOGXNeDlgtaoZVkDN3KbZs2efdU3Tpk2Ji4vTmlUAffr0YcyYMQwfPpx+/fpx//33a8fm1ZKamgrAgw8+yBNPPKFdnzBhAmvXrgVOzcOKiopi9erVHms2b97M5s2b6dKlC1FRUezcuZMXX3yRQYMGATXzsDp37ozD4eDw4cO0bNkSk8lEdXU1Cxcu1BpY69atY9myZRw4cED7zvLycrZu3UpQUBAvvPAC6enpREdHaw0sgLvuuotXX32VESNGeHznN998g9PpPLcHfAbU+ZxnpiGch6oy6s8nwiljRhlrFuG83DP2aB/BnJV7/vBYQJMRurdrpubS6cQnwql3nwinjBllrFmEU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwnkxfCVlTmYs3UV6doF2rV10KEPvaIvF1+ssd54bsjxHkT4RTpVRnz6Z0F3D6vR5UhaLhTvuuIOnn34ab2/vs97ndruZOXMmn376KQUFBSQkJPDiiy/SsWNHj3V5eXmMHz9emydVXl7OgAEDPNZYrVasViv/93//x+jRo/H396e0tNRjHtaKFSsAOHr0qDYHqlOnTlx99dXk5uZSXl5OYmIiRqOR6upqHnroIW1GVkVFBaGhoWRlZQEQHx/PsmXL2L17N7169dJ2W9U2ybKysoiMjKRp06bk5eXxxhtv8O233+J0OgkKCiIoKIgjR44A0KNHDwC6du3qUdP+/fu56aabtJ9NJhP9+vXjk08+ITExkc2bNzNp0iQAbrjhhnN+XwqFQqFQXG68PPBqxs/52aNpZTLWXFcoFAqFQqFQKBQKGZixdBcZOQUe1zJyCpi+ZBfD+3e8NKEUCsVlj64aVjabjYEDBxIVFaXNk5o4cSLl5eWMGjXqrPfOnDmT9957j5EjRxIXF8f8+fN5+OGHWbJkiXaUn9Pp5JFHHgFg0qRJlJeX88wzz5CSksJLL72kuX7++WdsNhsxMTG8/fbbbNiwgVmzZnHvvffy4osvkpeXp+2iWr16NS+88ALh4eE888wzbN68Gbfbjc1mIzw8nMDAQA4dOgTA448/zqpVq9i1axf+/v7YbDYA2rdvD8DXX39N79696dOnj8fOsNp1rVu35siRI6xYsYKHH36Y7OxsVqxYgdVq1XZPNW3alDZt2rBr1y4ee+wxNm7cyN69ewkICOC5557TnE888QRt2rShZcuW3HPPPdp1Ly8vxo8ff97v7nTMZjVU7vc0hAF+KqP+fCKcMmaUsWYRTpkyxjYP5uOXbiJ1Ry77DttoHRlEjw4RFyOibmsW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwXizf0ZMOj51VtbjckJ5dwAl7OU1D/S9pRpFOvftEOFVG/WaUDYPb7XZf6hC1TJ8+nWnTprF+/XrtiL7PPvuMsWPHsn79esLDw//wvoqKCrp3787999/P8OHDAaisrOTWW28lOTmZMWPGALB8+XJGjhzJihUrtOP62rRpQ3V1NYsWLaJDhw4ADB48mJ9//pm+ffsybtw4AB599FG2bt2K2+3GYrEQERHBzp07efXVV+nfvz8A9957L9u3bwcgJSWF8PBwunbtSnFxMY0bN6aoqIiEhAS6dOnChx9+yNVXX838+fPZtm0b9913H40aNaKsrAyz2czNN9/M7t27ycjI4K233uL222/n3//+N+vWrSMyMpK8vDwiIiK48847eeeddzCZTGRkZADwzjvvMHPmTIKCgigqKsLb25svv/yS2NhY7ZnNmDGDjz/+mMLCQgwGA76+vpjNZoqLixk5cqTW2Dtf3G43BoOhXvcqFAqFQqFQKBQKhUKhUCgUsrB973H2/lZAfKtQOsU1udRxNH7encfYDzef8fPRj1zL1Ql//HtahUKhuBB0tcMqJSWFbt26ecyT6t27N6NHj2bjxo3069fvD+/bvn07JSUl9O7dW7vm7e3NzTffrO2EqvXHxcVpzSqAoKAgHA4HGzZsoEOHDlRWVrJlyxZ8fHy0OVEA/fv3Z8OGDXz33XdERkYycuRIdu7c6XF83oIFC/jHP/7B7t27tXudTidhYWGkpKRo6+x2Ox9++CGVlZUAlJWVATBgwACeeuopbd0777xDRkYGFosFgIKCAgwGA99++61HU+iTTz6huLhY+9lkMuHt7c2PP/6ozbA6vVkF0LNnTyZNmsSIESOYNm0aq1atYsSIEeTn5/Puu+8yYMAAAgIC/vB5nw2Xy43dXnre913umEz6H+CnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU49+/IKShn78VZKyqq0awF+ZsY83JUmIfXbuXQxM/p7nf0vo1u8jRQWOurlVn925KhZhFPWjJcDVqvfOe8601XDKisri7vuusvjmtVqJSwsTJv3dKb7AI9GFEBsbCxz5syhvLwcX19fsrKy6qyJiYlh//79muPgwYM4nU6qqqo81tY2fGrnSVVV1fwPSkFBAU2anPobEF5eXhiNRnx9fQGoqqrS1tYSGBiIwWDAbK55/OXl5UBNc+t0vLxqBhie7nK73djtdo9mmtls1ly1lJeXc+2111JUVITZbObzzz/nn//8p/b5gQMHgJpjCB977DGthoCAACorK8nLy6tXw6omp/qX8Uw0hAF+KqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWPGi+H7fbMKoKSsijGztvLef5IvyA0XnjEsyI920aFk5BTgOu1sLqMB2kSF0tjqe8HPQP3Z0adTZdSnTyZ01bCy2+1YrdY614OCgrQ5TrVkZmYyfvx40tLSMBgMmEymOkfRWa1WbZ6Ur68vdrudwMBAZsyYwaeffkpBQQGhoaEUFxdz8uRJ4NS8KIPBwMqVKxk3bhxeXl4kJyd7fO7n54fBYGDq1KlkZ2eTnZ1Ns2bNOH78uEeGqqoqTp48yUsvvcSGDRtwOBxER0fjdrsJCQkBoLS0ZkfS999/T3p6OmlpaVgsFnx8fIBTDavanwcNGsTJkyfJy8vjqaee4sSJEwQGBmrf2bJlS0aOHEmbNm2YPn06W7du5ZVXXuG7775j+vTpADRv3hyA48ePs3TpUj744AOqqqrw8fHBYDAQEVH/WR1qhlVdGsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhFPGjDLWLMIpY0YZaxbh1KtvR+aJOs2qWkrKqth9sJD2MY3q5b6YNQ/r154PFu9kZ1aBdq1tdCiP39n+gn73p/7sXBxUxotDQ8goG7pqWJ0rNpuNgQMHEhUVxeTJk1m4cCHffvstEydOZNSoUWe9d/fu3Xz11VeMHDmSuLg4Zs+ezdGjR9m1axepqals2bIFAH9/f44dO8akSZMoLy/nueee8/CYTCYCAgJYtWoVnTp14plnnmHRokWUlZVhNJ76A1k7H2rx4sXcf//9+Pv78/HHHwPUac7t27ePwsJChg0bxrZt21i/fr3H5z4+Pvj4+LBnzx46d+5MXl4eixcvxsvLy6Nhdcstt7BhwwZsNhsnT56kdkxZamoqeXl5hIeH06xZMwwGA8XFxXTo0IE777yTmTNnUlhYSKNGjfDz8zvPt1KD0WggJMRSr3tlwGqt33P9q3winDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOGTPKWLMIp4wZZaxZhFPGjDLWLMIpY0YZaxbh1Jsvt+DQWT8/crKU5M4tL+g7LkbNISHw+rDryM0vIfeEg4jGFiLC6nca0x+h/uzo06ky6tMnE7pqWFmtVo9ZTLXYbDaPI/AWLlyIw+FgypQpBAcHc/DgQb799lsWLFjA0KFDCQ+vGfpnt9sxGAzavYGBgWRkZDB48GAGDRoEQOfOnUlMTMTpdDJs2DBtN1NJSQnvvvuudizge++9R3Z2NgUFBVrWsrIymjVrxtGjR3n77bdJSEigefPmHDt2TMsaGBhIUVERbdq04csvv8RsNtOrVy+WLVvGkSNHALR8Xl5eBAUF8e677xIREUG3bt3YtGmTdqSg1WqldevWdOnShSVLlgBgNBq55pprKCws1L7z5MmT/Oc//6nzHKuqqvjxxx+58847mTJliraLKysri59//hmA0NBQTp48yaFDh2jRosV5vT9QM6zOhMmk//NQVUb9+UQ4ZcwoY80inDJmlLFmEU4ZM8pYswin3n0inCqjPjPKWLMIp4wZZaxZhFPGjDLWLMJ5sX3Hi8ooLq/G6msiLLj+v4yOCD37vc0b+etqPlSAj4mrE8Kx28vqnet0ZPyzI2PNIpyyZrwcsFob6AyrmJiYOrOqiouLyc/P95gnlZKSQrdu3QgODtbuA3C5XGzcuJF+/foBNfOmIiIitCaU1WqlqqqK3r17a67amVPe3t78/PPPVFZW0qFDB8LCwjy+89lnn+Xxxx/n8OHDALRq1Yqqqir69+/Pv//9b21d//79OXLkCIcPHyYyMpLg4GCKioqYPXu21pgqLi5m2bJl5Ofne+RPSEhg0aJFmmvmzJls2rSJnJwcunbtSkxMDJs2bWLRokU8//zzxMXF0b9/f7755htat26t3RcZGcnevXuZOnUq3333HYsWLSI+Ph6ADh06AJCdnU1JSQklJSUez7uiogKAbdu21athBWqG1dloCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8IpwwZS8qczFi6i/TsAu1au+hQht7RFouv13n72rQKJcDP/IfHAgb4mUloGaLmQ+nEqXefCKfKqE+fTOjqMMXk5GR+/PFH7Ha7dm3VqlUYjUaSkpK0a1lZWR7NpMTERAICAvD399caXk6nkzVr1mizpwCaNm0K1BznV8umTZuoqKiguLiY8vJyvL29sVgsOJ1Oj2wrV67E19eXEydOADUNK0DbcQU1O8H27t2rZQRo3LgxBoPBY77WqlWrMBgMFBUVAdCiRQuMRqM2y6qWdevW4e3tzcGDB7XnY7PZ2LRpk7amoKCAjIwMjzoBcnNzmTFjBi+//LL23T4+PrRs2RKAl156icceewyTycQTTzzB9OnTiY6Oxs/Pj44dO9KzZ08UCoVCoVAoFAqFQqFQKBQKmZmxdBcZOQUe1zJyCpi+ZFe9na8MvJoAP899BAF+Zl4ZeHW9nQqFQnE5oKsdVgMGDGDevHkMGzaMoUOHkpeXx5tvvsmAAQO0Y/4ACgsLWbRoESNHjgRqGjFDhw7lf//7Hz/99BObNm1iwYIFFBQUsHfvXjp27IjFYqFly5YYDAZGjBjB8OHDKSsr480336RNmzZkZGRgs9nw9fXFYrGQl5dHp06dcDqdhIaGcvz4cRISErDZbEDNziyA+fPns3DhQsxmM/7+/lgsFsrKyrR1zZs3Z/v27SQnJ+N0OgkKCsLhcNCxY0fS09M96j9w4AAdO3akurqakJAQTpw4QbNmzTRXp06d6Ny5M0OHDtXumT9/Pq1bt+aWW27RrvXr148TJ05QXV3N/fffr82jSkpK0nJ/++23TJs2DYApU6Z45IiNjdV2r9WHCxm8eLnSEAb4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRThkzylizCKeMGWWsWYTzYviOnnR47KyqxeWG9OwCTtjLaRrqf97eZo0D+GBETzJyCjiY76BlmIU2UaH1zlmLLO9FtFPvPhFOlVG/GWVDVw2roKAg5syZw7hx4xg2bBgWi4W7776bZ555xmOd2+3G5fLcUjdkyBBmz57N3r17efTRR2ndujU+Pj6YTCYmT55MXl4eY8eOBSAqKorhw4djNpu5+eab6dq1K88//7zmqqiowGAwYLFYKCwsxG634+XlRWBgoLamdq6Uv3/N/yiVl5djt9tp06aNtgsLanZduVwurFYrhYWFlJaWUl5eTmRkpEfDyu12YzKZ8Pf3x2azYbfbCQwMxNvb28OVk5OD1WqltLSUyspKKioqaNOmDWbzqVdZVlZGXl4eJpMJk8mkNakiIiK0Nffccw/e3t5MnTqV22+/nYSEBCZPnqw1+dxut8eusHPFaDQQEmI57/tkoSEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRjl8Ipwqoxw+Ec7LPWPmsZKzfu6odF3Q78GSQiwk/fmy8+Zyfy9/lVPvPhFOlVGfPpnQVcMKanb3zJ49+6xrQkNDufvuuz2uGQwGTCYT//rXvxg5ciTTp09n2rRpTJkyRdsttGnTJpYvX86zzz7L5MmTtXs///xzDAYDQUFBVFRUYLfbiYmJYcWKFQBUVlZy6623kpmZSdeuXQHYuXMnAM8//zz33HMPAKmpqQwePBhAm1e1f/9+zGYzKSkp2veNGDGCH374QVtz7Ngx3G433bt358MPPwSgqKiIG264gePHj2vrFi5cSFlZGevXryc4OJi4uDhuuukmli5dyjPPPKPtQjt48CBXXHEFCxYs0L6zS5cu/PDDD9jtdqxWK+Hh4cyePZsBAwbwwgsvALBjxw7WrFlDeno6GzdupEePHn/6vn6Py+XGbi/984WSYTLpf4CfyqifAayno/eaRTj17hPhVBn1mVHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMI58Xw+Xud/S9zW7yNFBY66uUGfdYs2iljRhlrFuGUNePlgNXqd867znTXsDoXYmJitBlRtRQXF5Ofn6/NtkpJSaFbt24eR9v16dOH5cuXs2zZMoYNG6Zdz8rKIiIiAl9fXzZt2oTL5fLYweXt7c1NN93EvHnzNP/u3bsBPOZOJSUlYbFYcDgcxMTEUFlZybFjx6iursZms2mNp9ocV111FVDT6AIwGk+9tODgYLp27cr3339/1pri4+NZu3YtGzdupF+/fhw6dIiqqioOHDhAly5dPJ7Rb7/9RpcuXdixYwclJSUUFBQQHx8P1OwqW7t2Lb169eKrr77S5mbVBzVQ7sw0hAF+KuP5c7EHsP4Reqv5r3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMI54X4woL8aBcdSkZOAS73qetGA7SJCqWx1feiZNVTzX+VU8aMMtYswilrRllokA2r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSqrZSJuVlcVdd93lcV+PHj0wGAwsWrSIRYsWaQ2b3NxcbrrpJu0+gJycHB5++GHS0tLw8vIiLCwMl8vFtddeC9Q0fxo3bsxnn33Gl19+SXZ2NhEREbjdbgICAoiMjOTAgQNUV1djNBp55pln2Lt3Lw6HgyuuuAJA+2dWVhb+/v5s3brVoyZfX1+AOjUtWrRI24m1YMECrFarlrv2n2PGjGHdunVs27aNqqoqKioqAJg4cSJeXl6EhoZiMBh4/vnnPY5D/OqrrwC09QqF4s852wDW4f07XppQCoVCoVAoFAqFQqFQKC6YoXe0ZfoSz7+k2iaq5i+pKhQKheLi0iAbVn369OGdd95h9+7dPPbYY/z222989dVXtG7dWjsWz263s3r1alavXs3atWsB8PHxwc/Pj6NHj3LTTTeRmJjI7Nmzyc/Pp3fv3tp9Xl5eGAwGfvrpJx555BFKSkqYN28eAC1bttTWtWzZku3btxMbG8vw4cNZu3YtOTk52qwom80GQLNmzbQdUK1atWLGjBkAJCQkaK7Q0FByc3Pr1AR41LR3715mzpzJLbfcQk5ODhaLhfz8fLZu3erxnStWrODgwYOMHz8eHx8fHn/8cQAiIyO1nVx///vfWbFiBf/4xz9IT0/n8OHDOJ1Oqqur6d+/f73fj9mshsr9noYwwE9lrB+iBrBezIwifSKceveJcKqM+swoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhPNi+YICfHju/kTyi8qwCxgDcPo/9eYT4ZQxo4w1i3DKmlE2GmTDasWKFXh7exMfH8/UqVOxWCwkJSWxadMm8vLytAaPy+XC7T61X7eiooKysjLCwsJIT08nJSWF+Ph4XC4XK1eu5JprrgHA7XbjdDpJSkpi9uzZmM1m4uLi2LNnD7t379b8ubm5xMTEYDKZmDRpEhEREQQGBlJYWOiRNzc3l+7du7NhwwYcDgcJCQmkpaWRlpbGAw88AIDD4ahTU6tWrfjtt988atqyZQsAa9asAWp2gsGpmVq1bN26lY8++qjOHKqtW7dqRwW+/vrrXHHFFSxevJicnBz8/f2prq7m9ttvx9+/fr9gNxoNFzRs8nKnIQzwUxnPD9EDWGvRU81/lVPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmi+m80h+Cft25xHR2EJEWMAF+0T+vkum9yLKJ8Kpd58Ip8qoT59MNMiGVUpKCklJSXzwwQfaNbvdTteuXbWdTFarlT59+jBixAhtzfbt23G73Vx33XVMmDBBuz5hwgRtF5bVaqWqqorWrVsza9Ysbc1nn33GqFGjSEtLo2fPngQGBnLw4EFeeOEFBg0apK3r3bs3WVlZHD58WJtZ5Xa7eeedd7Sfc3Jy6NWrF/v379e+s6SkhOTkZI+aJk6cyMcff6zVFBAQQFFREe+//752hCFA586dKS0tpbKyUvuO2iZeLbNmzeLhhx8mIyNDu+bt7c1jjz1Go0aNePnllxk6dChvv/32Be2ucrnc2O2lf75QMkwm/Q/wUxnVAFa9OPXuE+FUGfWZUcaaRThlzChjzSKceveJcKqM+swoY80inDJmlLFmEU4ZM8pY88V0lpQ5mbp4JzuzTp2G0j4mlMfvbI/Fr/5zpvVcsyifCKeMGWWsWYRT1oyXA1ar3znvOmuQDas/mk9ltVoJCwvTZjjFxMRo/7mW2mZNp06dPK7HxsYyZ84cysvLiYmJAaBJkyYea7Kzs/H29ubgwYPa5+np6dp6qGlMnThxQst47bXXYjQa8fPz0xpJtZ8B5Ofna1mdTifNmzf3+M4jR47g5eWlrQ8LC6OoqIjo6GhtTXFxMQ6HA7fbzaFDh7Q8YWFhGAynfpGelZWF0WgkLy+vzvNcvnw5MTExpKWl0bx5cxITE+usOR/UQLkz0xAG+KmM54cawCrOqXefCKfKKIdPhFNllMMnwiljRhlrFuGUMaOMNYtwyphRxppFOGXMKGPNF8P5wVc768yZ3pVdwPtf7bwoc6b1WLNonwinjBllrFmEU9aMstAgG1Z2ux2n08lDDz1EWloaFouFO+64A6vVqs1wSk5OZtq0adjtdqxWK1Czwwrg8OHD9OzZk4KCAhISErjhhhtwu93YbDYSExMxGAzk5eXx5JNPkpqaitlspqqqitDQUM3fpk0b1q1bx5YtW3jrrbfIzs4mJCQEu90O1MyS8vb21ppMb7zxBkuXLsXhcGCxWAgODqakpOYosdpj+w4cOKDV5OfnR3FxsUdNV1xxBfv372f16tUsWbKE3NxcQkJCtOdSm9/b25ujR49y8803c/z4ccLDwykvLyc8PJzi4mJt/eTJk5kyZYr2c21jbMGCBdx77731fj9qhlVdGsJ5qCpj/RnWrz0f/O5vb7WNrvnbWxf674Neaxbp1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNF8spcs60XmsW6RPhlDGjjDWLcMqaUTYaZMPK7XazaNEi2rRpw+TJk8nLy2PixIkYjaf+IAwYMIB58+YxbNgwhg4dSl5eHj/88AMGg4FZs2YxcuRI4uLiGD58OG+//bZ2n4+PD1arlf3791NUVMSQIUNYv349O3fuxMvr1Lbhrl27AvDhhx9y3XXX8be//Y25c+fWydqmTRvWr1/PvHnzePDBBzl+/DjLli3D19dXm6/VtGlTDAYDP/74I1FRUTz22GN8/fXXFBYW4nQ6Nde1117LypUreffdd/n73/9+xu/09/enqKiImJgY7r//ftasWcO2bdsICQnBx8fHY21tM+6ee+5h0aJF/N///Z/HUYLni5phdXYawnmoKuP5ExICrw+7jtz8EnJPOC7a+dino7ea/wqn3n0inCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpY0YZaxbhlDGjjDVfqPOvmDOtt5r/Cp8Ip4wZZaxZhFPWjLLQIBtW3t7eVFRUMGXKFIKDgwGorq5m1KhRmM01JQUFBTFnzhzGjRvHsGHDsFgsdOzYkS1btvDggw9qc6diYmIoLCzE7XZrx/YFBgZis9lwu91MnTqVhIQEXnjhBSZMmKA1mRo1agTUzIratm0bv/76K7169SIvL4/U1FTNFR4eDkBoaChz584lIiKC//73v7zxxht4e3trNfn4+FBRUYHNZmPq1KkkJiby97//ncmTJ2s1NWvWDIDg4GC+/fZbLBYLDzzwAN9//z1ZWVnadyYmJrJ//36ys7PZuHEj0dHRPPzww8yaNYvGjRt7PEuXy0WHDh3IysoiLi6Ovn37XtC7UTOs/hiTSf/noaqMF+4L8DFxdUI4dnvZBc2tOh291yzCuSu7gEMnHLQMs9AmKlR3+UQ4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1nyxnCLnTOu1ZpE+EU4ZM8pYswinrBkvB6zWy3yGlclkIiAgQGtWAVx33XUAVFZWatdiY2OZPXu29vNHH33Eli1baNeunXZt/vz59OvXjz179uDr66td9/HxYePGjdrPLpeLiRMnajuemjZtCkC3bt14//33tXVvv/02qamp+PvXbC+uqKjQvqdFixbaupkzZ1JWVqb9bDabCQwMJDU1VbuWm5vL5MmTtZpq8/3zn/9kxIgR2rrDhw+TlZWlZYqPj2fbtm1s2bJFm2OVmZnJrFmztEZbLb6+vrz77rvceOONDB8+nIuBOp/zzDSE81BVRv35RDj1mDGvsJTX5v5MSVmVdi3Az8wrA68mLLh+RzZczHx/hVNllMMnwqkyyuET4ZQxo4w1i3DKmFHGmkU4ZcwoY80inDJmlLHmC3X+FXOm9VbzX+ET4ZQxo4w1i3DKmlEWGmTDqrq6msLCQh544AF27tyJxWIhPj4ewGPX0u+p3an06aef8uabb1JQUEB8fDwHDx6kurqa8vJyfH19MRgMVFRU8PDDD5OWloaXlxcdOnTA7XZrxwIeO3YMgF27dtG3b1+ys7OJiIjQmkqlpTW7jGqP4Bs9ejR79+7F4XDQvn17CgoKPLKeS03l5eUAbNiwgTVr1pCbm0t0dLQ2N+vYsWPExsYSERGBzWYjOTkZu91OeHi4trPqxhtv9HgmZWVl3Hjjjbjdbt5++22+//57Jk+eXKexpVAoFKL5fbMKoKSsinFzfua9/yRfolQKhUKhUCgUCoVCoWhIDL2jLdOX7PKYZdUmKpShd7S9hKkUCoVCcS40yIZVRUUFbreb3bt389hjj/Hbb7/x1Vdf4e3tTVXVqV92Dhw4kNzcXNauXQvUNJEMBgM//fQTN910E4mJicyePZvi4mIAbDab1nAyGo389NNPPPLII5SUlPDJJ5/g5eWl7Viy2WwAHD16FH9/f4YPH87atWvZtm2bx+fV1dV4eXmxceNG+vXrR6tWrZg5cybV1dXa7qtzranWuXfvXjp37syAAQNYtGgRR48e9fg8KysLq9VKRUUFjzzyCEeOHGHx4sWYTCbuv/9+7Tvz8/Nxu90EBgYSGBhIWFgY27Zt4+6772b9+vX1fj9msxoq93sawgA/lVF/PhFOvWbckXmiTrOqlpKyKnYfLKR9TP0a6XqtWaRPhFPGjDLWLMIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmi+mMyjAh+fuTyS/qAx7eTVWXxNhwRc+T0bPNYvyiXDKmFHGmkU4Zc0oGw2yYQVgMpmIj49n6tSpWCwWkpKS2Lhxo8cxey6Xi+rqau3nqqoq3G43Xbt2JT09nZSUFOLj4ykrK9OaVlCz68jlctGlSxdmz56N2Wyme/fupKameqwDiIiIwGQyMWnSJCIiImjbti27du3SPi8tLcXpdNK9e3c2bNiAw+GgQ4cObN++HZfLc1vgudQEkJCQwIkTJ5g0aRLR0dE0b96cI0eOaJ8PGTKExx9/nAkTJjB37lxKSkowm81UVVWxZ88e7UjEI0eO0L59e3bu3Mnw4cO57777uOuuu0hPT+fQoUMeRxieK0aj4YKHV17ONIQBfiqj/nwinHrLmFtw6KyfHzlZSnLnlvX2w8Wt+Uh+Cft25xHR2EJEWMBF8+rtvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtZ8MZ2ifj+l55pF+UQ4ZcwoY80inLJmlIUG2bAyGo00b96cTz75RLtmt9vp0qWLdjwewLx58zzuq92BNHLkSK666irt+kMPPcSPP/5IUFAQUNPYCgwMZNasWdoat9tN27ZtteZR7Yyq6667jldffVVbt2DBAnbt2qXNuqrN884772h+gFtvvZXDhw+fV021s6zuuecej51SY8aMYcGCBVqm0NBQAF577TWcTiffffcdb7zxBk888QTHjx/X7quqqiIsLIy9e/dq12qbeYcPH65Xw8rlcmO3l573fZc7JpP+B/ipjPrziXDqNWNE6Nn/h7x5I39dDMYtKXMydfFOdmadOlqifUwoj9/ZHoufV729en0vIn0inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinrBkvB6xWv3PeddYgG1Z/RO1RfRdr7ZnWuN3uP7xenxz1df3+vjOte+ONN1i5ciUzZ87k0KGa3QsxMTHa53fffTcvvvgiq1atokePHhw9epQ1a9YA0KRJk3Ou4/eogXJnpiEM8FMZ9ecT4dRbxjatQgnwM//hsYABfmYSWoboYjDuB1/tJCOnwOParuwC3v9qJ8P7d7wgN+jvvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPK5XJx5MgRHnjgAXbu3InFYiE+Ph4Aq9V6xvtqdzi99dZbHDx4kIKCAuLj48nOzgZOzbAym82cOHGChx9+mLS0NLy8vOjQoQPV1dXaLqbS0ppdRKmpqfTt25fs7GwiIiIICKg5GsrLy8sjzzPPPMPevXtxOBy0b9+ew4cPexwJeC41eXt7A/Dll18yb948cnNziY6OxuFweGQCmDx5Mh9//DF+fn78+9//xuVy0aNHD6KiorQ1t99+Oxs3buQ///mPds3f35/AwEBatmx57i/kd6gZVnVpCOehqoz684lw6jnjmIe7MmbWVo+mVYCfmTEPd72g/165WPmOnnR4DO2txeWG9OwCTtjLaRrqf0kzinTKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKesGWWjQTasAKqrq9m9ezePPfYYv/32G1999RXe3t74+Z06VmrgwIHk5uaydu1aAMxmMwaDgS1btnDTTTeRmJjI7NmzPY4RBPDz88NoNPLTTz/xyCOPUFJSwieffIKXlxeBgYEea48cOUJsbCzDhw9n7dq1bNu2zeNzf39/vLy82LhxI/369aNVq1bMnDmTqqoqjEbPP7jnUhNARkYGnTt3ZsCAASxatMjjaEGAZcuWMWXKFHx9fXnkkUdYsmQJhw8fJicnh+PHj2u7p7p3787JkyeJiYmhY8eOpKamcvz4cRo3bozZXL8/GmqG1dlpCOehqoz684lw6jFjSIiFBeNvI23vcfb8VkB8q1A6xdV/t+fvudB8mcdKzvq5o9J1wf/9p8f3Itonwql3nwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcKqM+vTJRINsWPn4+FBZWUl8fDxTp07FYrGQlJTExo0bPRotLpeL6upq7Wd/f3/cbjdXX3016enppKSkEB8fT2lpKSUlJR4zplwuF126dGH27NmYzWa6d+9Oamqqdhxf7dqmTZtiMpmYNGkSERERxMXFsXfvXu1zk8mE0+mke/fubNiwAYfDQYcOHdi2bRs+Pj7nVVOtMy4ujhMnTjBp0iSio6Np2rQpx44d0z5funQpAOXl5UyePFn7jsOHD/PBBx8wZswY3G43J0+exGQyceTIEXJzc2ndujXh4eHs3LmTDRs20LNnz/N+N2qG1R9jMun/PFSVUX8+Ec6GkDG2WSCd4ppgt5fVe26ViHz+Xmc/8tXibdTFnC1RThkzylizCKeMGWWsWYRT7z4RTpVRnxllrFmEU8aMMtYswiljRhlrFuE8XlRGcXk1Vl8TYcEX/svjhlCzyqjPjDLWLMIpa8bLAav1Mp9hZTKZCA0N5ZNPPtGu5ebmcsMNN1BZWaldmzdvnsd9VVU1x0zdf//99OnTR7ver18/9uzZg6+vL1AzI8rHx4dZs2Zpa1wuF23atMHpdAI1jSqAdu3a8f7772vr3n77bfbu3asdHVhRUQHAq6++SosWLbR11113HWVlZedVU22+66+/nhEjRmjrnnzySY4dO+aR6ZdffqFfv37Mnz+fmTNn0q1bN+68807Ky8s1N8Add9zBhAkTNNfq1at56qmn2LRpU70aVqBmWJ2NhnAeqsqoP58Ip4wZL9QXFuRHu+hQMnIKcJ02StBogDZRoTS2+upizpZop4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPh1GPGkjInM5bu8jjSvV10KEPvaIvF1+uS5/srnCqjHD4RTpVRnz6ZaJCHKVZXV1NUVORxlN/GjRuBU3Oe/ojanUoZGRnaNafTyZEjR6iurtaaOQaDgYqKCnJycrR1mzdvxu12a7Opjh07BuCxBmDfvn3AqXlStbuoNm/erK2x2WwUFhZy+u6vc6mpNt/+/fs9vjMrK8sjU1ZWFv7+/syZM4eJEyfSrVs3AGJiYrS1FkvNsVV5eXkertpGVm1zT6FQKBSnGHpHW9pEhXpcaxNV8//4KBQKhUKhUCgUCoXi0jNj6S4ycgo8rmXkFDB9ya5LlEihUCgU50qD3GFVUVGBwWDgmmuuISAggKuuuopffvmFoKAgj0bL72dYlZaWYjAYmDlzJrNmzSIqKorGjRtrO51sNhu+vr5UV1djNpu59dZb8fPzo3379uTk5BAaGorBYNDWAhw4cICEhATCwsJo27Yt69ev9/jc6XTi7e3NK6+8wtixY7nyyisxmUz4+PhoDahzranWuX79ehISEmjevDnR0dFkZ2drnx86dIgffvgBh8OB0Whk3LhxfP755wwYMACn00lBQc3/YAcHB2M2m9m4cSMJCQkEBQXRqVMnfvrpJwBCQz1/IXs+mM0Nsg8qlIYwwE9l1J9PhFPGjBfTFxTgw3P3J5JfVIb9Ih8tcfo/LwZ6fo6inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzyljzxXIePenw2FlVi8sN6dkFnLCX0zTU/5LlE+1UGfWZUcaaRThlzSgbDa5hZbPZcLlchIeHExoayoEDB0hNTSU+Pl47rq+W38+w+umnn3C73Vx//fXs3LmT7OxssrOzGThwIB9//DFQ02DKz8/Hz8+P2NhYdu/ezdatWwkPD6dNmzbaDKs9e/YA0KFDBwoKCsjNzWXdunXceeedLF68WPvOtLQ0nE4nSUlJ/Prrr+zZswez2Uz//v1ZuHDhedX0yy+/ANC9e3cyMzM5cuQIhw4d4sEHH2Tu3LkAOBwOrWaXy0VRURFbtmxhy5YtADRq1EhzVVVVYTAYsFqtFBcXs27dOozGmn+ZaneSnS9Go4GQEEu97pWBhjDAT2XUn0+EU8aMF9Mn6r/nZHwvIpx694lwqoxy+EQ4ZcwoY80inDJmlLFmEU4ZM8pYswinjBllrPlCnZnHSs76uaPSdcH//5zeav4rfCKcMmaUsWYRTlkzykKDa1jVNnl69erFf//7XwA+++wzxo4dS0hICEFBQdra02dYVVRU8PPPPwMwefJkfHx8qKys5NZbbyU9PR2DwUBQUBCrV6/G6XTSvXt3ZsyYAUBqaiqDBw8mKCiI2NhYAJYvXw7AU089xXXXXQfAiBEjtO8ICgri2LFjZGdnY7FY+OijjwAoKirihhtuYN++fVrWc62pdqfYK6+8QkxMDAADBgwgLS1N+87Y2Fh69uzJsWPH+Oyzz4CaYwt79epFp06dcLlc2jNISEggNjaWb775RjvusLbZFRYWVq/343K5sdtL63Xv5YzJpP8Bfiqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhPN4URnFF3GHOuj/OTaE96Iy6jOjjDVfLKe/l+Gsn1u8jRQWOurl1mvNIn0inDJmlLFmEU5ZM14OWK1+57zrrME1rFJSUggODtZmLQH07t2bUaNGcfLkSa2R83u2b99ORUUFANnZ2cTHx+Pt7c3NN9/MF198QUREBL6+vqSkpGC1Wjl+/Lh2b1JSEkFBQfz222/cfPPNVFZWsmPHDoxGI1lZWVrDqk+fPlojKyYmhtTUVNxuNw6HA5vNRlBQEMHBwSQlJfHzzz9z5ZVXnnNNhw4d0jJlZWVpdfbp04cJEybg5eVFixYttO/etGkTbrcbg8FAcHAwUDOv6pprrgFg9+7d3HXXXYwYMYL//ve/5OfnU1FRwT333APAVVddVe93pAbKnZmGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLK4RPh1GPGkjInM5bu8jiiq110zQxQi2/9Tvb4PXp/jnp8L6J9IpwyZpSx5gt1hgX50S46lIycAlzuU9eNhpr5w42tvhecV281/xU+EU4ZM8pYswinrBllQXeHKWZmZvLQQw/RsWNHkpKSePPNN6msrNQ+z8rK4sorr+THH3/EbrcDYLVaCQwMxO128/bbb9OhQwf69++vHaFXex+Av78/Tz/9NJ06daJr1678+uuvlJSUkJSUpK2LiYlhz5493HrrrbRv355bb71Vmzl1/fXXc/DgQaqqqoiLi+Ojjz4iKSmJjh07MnPmTACaNWtGZGQkWVlZhISEYDAYuPfee7WacnNzKSwsJDk52aOmlJQUbrrpJtq3b8+//vUv/Pxq/tZZUlKSlj8iIoKJEyfStWtXOnXqxKpVq3C5XFx11VV4e3sDkJycjM1m44cffiAtLY3u3bsDcPToUe07KyoqcLvdvPLKK/Tu3Zt//vOfjBs3DoCmTZuesfGnUCgUCoVCoVAoFArFuTJj6S4ycgo8rmXkFDB9ya5LlEihUFzuDL2jLW2iQj2utYmqaZQrFAqFQt/oaoeVzWZj4MCBREVFMXnyZPLy8pg4cSLl5eWMGjUKALvdTteuXcnJyWHYsGEMHTqUvLw8SkpqzqgdPHgwcXFxzJ8/n3vvvZemTZuyfv167HY73t7e+Pv7k5OTw913301YWJjWZOrXr5/mj46Oxu12U1BQwLBhw0hPT2ft2rVYLBY6dOjAtm3bgJrm1+7du+natSvJycnaEYTXX3+95rJarZSVlZGTk8MDDzwAwJw5c4Ca4/xq14WEhFBZWUl1dTVPPfUUGzZsoKysjMjISMLDw7UZVH5+fmRmZnLLLbfQpk0bPvzwQwBuu+027Tl26tSJsLAwhgwZAoDBULMd+oorruCWW24BoFWrVixYsACXy8WAAQMwm83Mnz8fgPj4+At6j2az7vqgl5yGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNV8s59GTDo+dVbW43JCeXcAJezlNQ/0vacaG5BPhVBn1mVHGmi+mMyjAh+fuTyS/qAz7RTyKVM81i/KJcMqYUcaaRThlzSgbumpYLVy4EIfDwZQpU7Rj7Kqrqxk7dixDhw4lPDwcAF9fX+bMmcO4ceMYNmwY/v7+uN1u4uLiGDRoEACdO3emc+fO2Gw2ze9yuThx4gSDBg1i1apVFBQUEBISwvHjxz3W7dmzh7Zt29K8eXOmT5+O2WzG19eX2vlOtaSlpXHffffx888/8+6772pzn4qLi7U1drsdg8FA//79WbZsGQ6Hg5CQEAoKCigtLSUwMBCArVu3cv3111NZWcmUKVOwWCyYTCacTqfHd2ZmZjJo0CDWr1/P+vXrady4MSUlJRQWFnqsmzlzJmPGjPHYZfbCCy9gNte88uuuu45du3YRFBTEp59+itFo1Oo7cuTIeb230zEaDRc8vPJypiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDj1ljHzWMlZP3dUui7K//+o9+eot/fyV/hEOGXMeDF9R/JL2Lc7j4jGFiLCAi6aV8/vRdTvp/RcsyifCKeMGWWsWYRT1oyyoKuGVUpKCt26ddOaVVAzy2n06NFs3LiRfv36YbVaKS4uJjY2ltmzZwOwadMmBg0aRNu2p7b2ent7c99997F27Vqg5tjAqqoqWrduzYsvvsiLL74IwGeffcaoUaNIS0ujZ8+eBAYGcvDgQR566CGt+VWbIysri8OHDxMUFATUNMCefvpp7eecnBx69erF/v37te8sKSkhOTmZ0aNHM3r0aAAmTpzIxx9/rNUUEBBAUVER//znP7npppu07+zcuTP5+flUVlZq32GxWHjhhRe0/Bs3buThhx8mIyPD41leccUVFBcX88orr2A2mxk9ejQrVqygR48eADRp0gQAh8NBVVUVXl5ePPHEE7z//vvk5eXV8w2Cy+XGbi+t9/2XKyaT/gf4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUseaL5fT3Mpz1c4u3kcJCR73coP/nqNf3ItInwiljxovpKylzMnXxTnZmndrt2D4mlMfvbI/Fr/5z5NR7kaNmEU4ZM8pYswinrBkvB6xWv3PedaarhlVWVhZ33XWXxzWr1UpYWJg2wykmJkb7z7XUNms6derkcT02NpY5c+ZQXl6uzWSqbdTUkp2djbe3NwcPHtQ+T09P95jh5Ha7OXHihJbx2muvxWg04ufnpzWSaj8DyM/P17I6nU6aN2/u8Z1HjhzBy8tLWx8WFkZRURHR0dHamuLiYhwOB263m0OHDml5wsLCtCP+ar/TaDTWaTLNnTsXk8nEvffey5IlSwA4fPiw9rnT6cRkMpGamkpeXh4RERFUVVXx9ttvU1lZSXl5Ob6+vtQHNVDuzDSEAX4qo/58IpwyZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVEOnwin3jKGBfnRLjqUjJwCXO5T142Gmnkyja2+FyWv3p+j3t7LX+ET4ZQx48XwffDVzjpz5HZlF/D+VzsZ3r/jBblBvRe9OlVGOXwinCqjPn0yoauGVe3Mp98TFBSkHdmXnJzMtGnTPNZu374dODU7qhar1Yrb7cZms5GYmIjBYKCg4NT/SDudTtasWUNQUBAbN26kY8eO2menHxG4adMm7Ha7dt3b21trMs2YMYNPP/2UgoIC/P39CQoK0uZp1e5mysnJ4cknnyQ1NRWz2UxpaSlWq1X7jiuuuIL9+/ezZcsWnnnmGbKzs7WjAmu/MzExEV9fXxwOB2+88QZLly7F4XBgNptp3LixxzGEeXl5TJ48mZiYGDp37ozRWNO9bNasmbamVatWVFdXc/fdd1NQUIDBYMDf3x+j0YjL5cJut9e7YaVmWNWlIZyHqjLqzyfCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxpovpnNYv/Z88LvdHW2ja3Z3XOj/76j356jn9yLKJ8IpY8aL5RM5R069l4uDynhx0HtGGWsW4ZQ1o2zoqmF1LgwYMIB58+YxbNgwhg4dSl5eHj/88ANGo1GbcQUwcOBADhw4oP3s4+NDUFAQe/bsYc6cObRu3ZoFCxZQVFREZWUlfn5+TJ48mc2bN/Phhx8yZswYAgICKCsr480336R79+78+OOPmq9NmzasX7+et99+mwEDBuBwOFiyZIk2IwqgadOmGAwGfvjhB5o0acLgwYNZsWIF2dnZlJWVaeuuvfZaVq5cydixY7n++uvp0aMH8+bNw+0+7a+gAS1atGD//v188sknPPDAAxw4cICUlBQqKiq05tLkyZNZunQpTqeTqqoqBgwYwOLFiwEoLT11VJ/VasXb25uSkhL69u1LdnY2W7du1b7z9F1c54OaYXV2GsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUQ6fCKceM4aEwOvDriM3v4TcE46LPj8H9P8c9fheRPtEOGXMeKG+v2KOnHov+nSqjHL4RDhVRn36ZEJXDava+VS/x2azaUfvBQUFMWfOHMaNG8ewYcOwWCwkJiayadMmKioq8PHxAWrmS1VVVWEwGLR7IyMjcTqdzJo1i4KCAhISErj99tv5/PPP6dq1K9dddx3NmjXjww8/pKSkhGeeeQYvLy9uvvlm7r//fvr166e5WrVqBUBwcDCLFi0iIiKCV199lTfffJOqqiotu7+/Pw6Hg4qKCmbOnEliYiIPPfQQL7/8MhUVFcCpnU/+/v5s2bKF9PR0/vWvf7Fz505++ukn7TubN2/O/v37CQgIYO7cuURHR/PWW2/x3HPP4XQ6AfD19dWON8zKyqKkpIRWrVpRVFTEt99+y2+//UarVq3w9/enVatWHDhwgIULFwI1DbGKigry8/M95oidD2qG1R9jMun/PFSVUX8+EU4ZM8pYswinjBllrFmEU0TGXdkFHDrhoGWYhTZRoRfsawg1q4z684lwqoz6zChjzSKcAT4mrk4Ix24vu6C5Vaej9+fYEN6LyqjPjBfLJ3KOnHovctQswiljRhlrFuGUNePlgNXaQGdY/dF8quLiYvLz8z1mSsXGxjJ79mzt502bNrFp0yays7OJj48HYN68eUycOJE1a9Zou49iY2PZt28fGzZs0O697777MJlM2n0tW7bEbDZTVVXFqFGj6NevHwDr1q3TMgLaMXvvvfceXbp00XwfffQRR48e1X729fXFYDCwZcsW7Zrdbufll1/Wjg6MjIwEoHfv3rz++uvaupdffpmffvpJ+67aXU8rV670aCiNGTNGa1g1atRIu+50Ojly5AhHjhwBapp4I0eOZNGiRSQkJLB8+XLcbje//fYbbrebqKgo/va3v+Hv74+XV/0Hb6rzOc9MQzgPVWXUn0+EU8aMMtYswiljRhlrFuG8GL68wlJem/szJWWn/nJQgJ+ZVwZeTVhw/Y6zOR091izaKWNGGWsW4ZQxo4w1i3DKmFHGmkU4Zcx4ob6/Yo6cei/6dKqMcvhEOFVGffpk4qI0rA4cOMChQ4c85j6dzj/+8Y9z8iQn151PtWrVKoxGI0lJSR5rMzMzGT9+PGlpaVqDZfny5VrjqXY+VXJysod/6dKlvPHGG6xcuZKCggKcTicul0ubf+Xt7c211/4/9s48Lqp6///PmYFhGRwWRQx3MAEXRE3NjRat1G56M7tZplbmpSK7V/O2+C3TLDNvZmVqWpq7Vje7LmlmlpFmVmqK+wK4gCDIMjAsAzPz+2N+HJ1QbzLzsYOf83o8eiTnfM7zvF7ngzjDez6f98389NNPzJs3jylTpuDr60twcDAtW7ZUikvV/tavX8+UKVNIT08nIiKCs2fPUlVVRXl5Of7+/vj4+FBYWMirr77Kpk2bsFqtREZGAijb71X//9ixYzz66KPs2bMHk8mkHD99+jQtW7ZUCm9Dhw4lLy8Po9FIq1at3Fal9e7dm8DAQMLCwrDZbBQUFBAQEKA809atWytjZ82axfvvv19jHm666aY/NF+aNGnSpEmTJk3XUr8vVgGUlFUxZfGvvPePxMtcpUmTJk2aNGnSJK+SBrVl3poDbr2s2rQII2lQ2z/RlSZNmjRp0nRpeVSwOnXqFP/617/Yt29fjX5L1dLpdH+4YHWp/lTTp09n6NChbv2phg0bxu7du+ncuTOzZs0iJyeHyZMns2DBAsLDw936U40aNUq57q677uKNN95g4cKFDBkyhIYNGzJnzhx0Oh2hoaHKuL///e9s27aNrKwskpOTOXDgAF9//TVt2rRx86vX61m1ahVdunThmWeeYcWKFcp2gEVFRfj7+ysrlVatWsXw4cMB1+ovvV6P1WpVxgLs27ePyMhIkpOTSUlJ4eeff3Y7b7fbAcjNzeXhhx8GYPHixcCF/lTh4eGUlZXhdDp5+umnadGiBf/5z39Yu3YtNptNWa0F8Ouvv2IwGBQuuLYdfOedd/7QfF1OnjbOvR5VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUxv8fadyKtRrKpWSVkVh04V0D6q/iXP/y+pNbNIpoweZcwsgimjRxkzi2DK6FHGzCKYMnr0Ji84yI/nhnUit7AMS7kds7+B8BDPe6to8+IdaR69I7V7lDGzCKasHmWTRwWriRMncvToUSZMmMBNN92krDqqrS7Vn2rIkCGMHTvWbVx2djZOp5P3339f2RqvqqqKSZMm8eGHH1JUVERcXBwLFiygadOmynUOh4PS0lJatmzJhg0b8PHxQafTERQUxIIFC5g0aRLgKgiBa7XV+++/T2RkJI888giLFi1i3759xMfHK7yGDRuSlpZGamoqnTp1IiQkhIMHDyr3dDqd2O12oqKiWLVqFSaTiaFDh7JixQqys7Pdcvn4+KDX63n33Xdp2bIl9913H59//rlSsDKZTDRr1owuXbooxbHu3buzfft2ZUtAcBXShg4dygMPPABAZmYma9eupby8nPz8C5+oKS8vx+Fw4OPjQ3BwME2aNOHAgQNs3bqV+++/v1ZzqNfrPG7YeT2rLjTw0zyqjyeCKaNHGTOLYMroUcbMIpie8rLyT1/xfOb5UhI7N/PoHmrLfC2YMnqUMbMIpoweZcwsgimjRxkzi2B6k7f7yDmO/HSS2OZhdIxp6DWumjOL+l2NbN87IngimJpHOXgimJpHdfJkkkcFq927d5OUlKSsHPKGft+f6lJq1KgRMTExbn2cBgwYwKRJkxg3bpzSd+pSfktLS5k5cyZxcXEAdO/encaNG5OSkqKMS0lJwcfHhwcffJDx48cDrsLTf//7X77//nvi4+MxmVz/0I8cOZLHH39cuXby5MkcPHiQkpISt1Vhq1atIjg4WPl6/fr1ylZ+1cfbtWvHJ598oozZv38/n3/+OadPu35BExYWRllZGVOnTnXrddW7d28KCwuVr4ODg922CRw8eDAdOnRgwIABbiusevXqxdGjR9mzZ49ybOrUqUybNo3BgwdjMBgu+RyvJIfDicVSetXXXe8yGNTfwE/zqD6eCKaMHmXMLIIpo0cZM4tgeosXGXblF/yN6weqpmm4TPMikql2ngim5lGdHmXMLIIpo0cZM4tgepOXk1/K5I9/rtEPc9JjXWkYWvt+mGrOLIopo0cZM4tgyuhRxswimLJ6vB5kNgf84VVnHhWsQkNDqVevnieIWiktLY377rvP7ZjZbCY8PJy0tLQrXgcQFRWlHIuKisJqtZKVlaX0nTp27BhVVVVu43Q6HS1btlQY1QWrgAD3X56Ul5cDrlVN0dHRBAQEYDAY3IpVTqeTiooKdDodAM2aNbsk69y5c27MqKgo8vLyKCoqUngWi4W8vDy34lhUVFSN57B9+3YA2rdvX8PvzTffjMVioUWLFiQkJFBSUkJ+fj7h4eGXfZZXktZQ7vKqCw38NI/q44lgyuhRxswimDJ6lDGzCKanvDbNwwgK8LnktoBBAT7ENQtVXdNwGeblWjDVzhPB1DzKwRPB1DzKwRPBlMXj74tV4NpaeNLCn73SD1ONmUUzZfQoY2YRTBk9yphZBFNWj7LIo4LV0KFDWbt2LcOGDavVapzaymKxcKntB4ODg5Xt8y53ndFoxM/PTzmWmJjI+++/j9PppFu3bgQFBVFSUoJOp6Nnz56X5UdGRgLw+eef8+GHH5Kfn09sbCxnzpwBLvSdql+/PsePH+exxx5jz549+Pr6Eh8fT3l5ufLMjEYjOp2Oo0ePMnDgQNLT04mMjCQkJASj0YjD4frm7tWrF3q9nrFjx3LkyBGsVitBQUE4HA4GDhzolmnu3LkMHz6c1NRUAgIClEyDBg1SxjVr1ozx48fTpk0bdu7cydy5czlx4gRGo9Gtp9fVSuthVVN1YT9UzaP6eCKYMnqUMbMIpoweZcwsgulN3qTHujJp4aU/Fe3J6w81ZxbFlNGjjJlFMGX0KGNmEUwZPcqYWQTTWzytH6bmUW08EUzNozo9yphZBFNWj7LJo4JVixYtcDgcDBo0iPvuu49GjRpxqcLVnXfe6clthGrAgAHMnDkTgOHDh3P+/HlWr15NcHCw26qlkSNH8ttvv9G5c2fAVWQCOHDgAH379qVTp04sWrSI8+fPu/GrC1u//PILjz/+OCUlJSxbtozAwEC3vlM6nY7z588TEhLCuHHj2Lx5M7t27XLb9rBRo0a0aNGC7du3M3jwYIqLi9m8eTM6nY4HH3zQLdM777zDoUOHGD16NCtXrsRmsxEVFaVkOnz4MOvWraNfv37YbDY++eQT/Pz8qKioAFyrwGojrYfVlVUX9kPVPKqPJ4Ipo0cZM4tgyuhRxswimN7ghYaaWPna3ew5co7DJ/Ol6jshiimjRxkzi2DK6FHGzCKYMnqUMbMIpqc8rR+mGKaMHmXMLIIpo0cZM4tgyupRFnlUsBo7dqzy5zfffPOSY3Q6HYcOHfLkNjVkNpvdejRV6+Kt8i53nc1mo6KiQllltWHDBnx8fKisrGTp0qWYTCaCgoIoKioiJydHKfA4HA4cDofCr96+r3Xr1uzfv5+UlBRiY2OprKykoKBAGVdQUABAly5dWLRoET4+PvTo0YNt27a5eTUYDNSrVw+DwcCMGTOIjIwkPj6eAwcOKOOys7NJT0+nR48ebN68meLiYiIjI8nPz2f9+vWMHj1ayWQ0GomJiWH27NnY7XYSEhLYt2+fkqlBgwaYzWbmzJlDdnY2Op2O4OBgAgICKCws5NSpU0RHR1/13Gg9rC4tg0H9+6FqHtXHE8GU0aOMmUUwZfQoY2aAddvTOXyqkDbNQ7i7R0uPeSI8Bhr13Ng0FJNRX+u+VRerLsyL5lF9PBFMzaM6PcqYWQRTRo8yZhbB9BZP64epeVQbTwRT86hOjzJmFsGU1eP1ILP5GvWwWrJkiSeX11qX6tFUXFxMbm6uW9+pS10HkJ6eTmxsLAApKSnccMMN2O12vv32W8BViNuwYYOykglcWW+++WaFkZeXB0Dv3r157rnnlHs89dRTbNmyRRlXWFgIwMyZM5XCk9PppH379kofLJvNRlVVFREREaxbt05hrV+/nmeffVZZZbVt2zacTiejRo3i6aefZsiQIbz++us8/fTTpKSkKAWrlJQUevbsSbNmzdi7dy8LFiygbdu2dO3aVcnUoEED3n77bU6dOsXAgQNZtWoVixYtYseOHVczFZeUtj/n5VUX9kPVPKqPJ4Ipo0cZM4tgyuhRlswHM/J5a9VvytepJ87zybcneO6hBGKbhXno0DseS8oqmb/2APvT85Vj7VqGkTSoLSZ/X08tqnJeRDNl9ChjZhFMGT3KmFkEU0aPMmYWwfSUp/XDFMOU0aOMmUUwZfQoY2YRTFk9yiKPNlPs2rXrH/rP20pMTOTHH3/EYrEox7766iv0en2NvlMXq1OnTgQFBbFx40bl2IkTJ8jPzycxMVE51qdPHwD27NmjHNuxYweFhYXccsstAJw+7VpKvnv3brd75ObmAtCgQQMAysvL0el0fP3118oYi8WC3W6nXr16AJw6dQqn08nJkyfdMlXfo2FD11Y3aWlpBAcH8+yzz3LzzTczefJkAKKjo90KeGlpaVgsFhYtWsS0adPo3r07ZrOZ8PDwGoW+119/nUGDBikFvOLiYsxmM82aNbvsc9SkSZMmTZo0XX+6uFh1saavuPTxP0Pz1x7gYEa+27GDGfnMW3PgT3KkSZMmTZo0aapLennkTQQFuH92OyjAh5dH3vQnOdKkSZMmTZo0XSyPVlhdrOPHj5OZmQlA48aNadWqlbfQNTR06FAWLVpEnz59KC8vx2g0YrPZuP/++2v0ncrKymLz5s0A+Pn58fe//513332XFStWUFZWRmVlJT4+PowaNUq57q677uKFF15g9erVrFmzBr3eVdfr1asX8fHxgKvo5OPjw969e+nWrRslJSWYTCaKiooA1/aE/v7+lJWV0bJlS1599VWmTZuGzWbD398fg8GgrJyqvsbPz69GJnBtqwhw7tw5iouLMZlMHDx4kA4dOhAZGUlUVJSykgtcq7p++eUXBg4cSJMmTRg2bBi//vorwcHB5OTkKOO6dOmiFMhWrVqlHO/Xrx++vrX/lLInTc+vV9WFBn6aR/XxRDBl9ChjZhFMGT3KlHnND2lXPL/xp5Pc06t22wN6y+PZ81a3lVXVcjhhf3o+eZZyGoUF/qkeRfFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9CbvhgZBzHn2Vg5m5HMq10qzcBNtWni+klzNmUUxZfQoY2YRTBk9yphZBFNWj7LJ44LVN998w7Rp05RiVbWaNGnCCy+8oKxW8racTqfbny/+uloOhwO73X7Jay++5lLX/hFVF5L+yPV/5H5Op9ON+ftx1SuzLBaLUmg6deoUp06dqsEBWLt2LWvXrlWOFxUVcfbsWQAqKiooLXX1mvL19UWn0+Hn54evry8TJ078n3kuJ71eR2ioqdbXX++qCw38NI/q44lgyuhRxswimDJ6lCHz0TNFVzx/+HQhIzz8991TjyeyS6543mpzePwaRG3zci2YMnqUMbMIpoweZcwsgimjRxkzi2B6k9cz1MTl9+epvdScWRRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9//33PPPMM0RGRjJ27Fiio6MB1zZ7n376KWPGjOGDDz5w227PG1q1ahUVFRV89913yiqlTz75hMmTJ/PUU08pq6yWLl3qdl1FRQXz589n9OjRjBs3DoDu3btjs9lYsGABkyZNAmDTpk1UVlZy//3389prrwGu/lGjRo1i3759xMfHYzabqayspEOHDnz66afKPYYMGUJqaqrSryogIIAjR44wefJkHnjgAcC1Aqp79+7KqqjqsTabjZSUFCXTnDlzePfdd5UCVKtWrdi2bRv9+/dnxowZyj0TExPJz7/wiePQ0FCGDBnCmDFj+Mtf/kJSUhITJkwgKCiIhIQEABYvXozJZFLuCTB58mQOHTqEr68vNpsNo9F41XPjcDixWEqv+rrrXQaD+hv4aR7VxwM4V1hGcbkds7+B8BDP/7HT5kWOzCKYMnqUKXPrJsGknjh/2fOxTUP+9Cbkgb66K543GfV/ukdRPBFMGT3KmFkEU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9Xg8ymwP+8KozjwpWc+bMISYmhuXLlxMYeGELlj59+vDwww/z0EMPMXv2bK8XrFJSUujevbtS2AHo378/r7zyCtu3b2fw4MGXvG737t2UlJTQv39/5VhUVBTnzp1TijYAW7ZsAVw9r6rVs2dPQkJC+P7774mPj1d6PHXu3NntHtX9pvLy8mjSpAn+/v44nU769eunjAkODsZgMFBcXAxAs2bN0Ol0NGvWzC1T9T2q+2KFhIRgt9uVPlrVCgkJ4dy5c0qRKSoqirS0NBYsWIDZbGbw4MFMmDCBkpISoqKiAFefq+qtCLt06eLG69KlC5MmTeLBBx+85HP8X9Iayl1edaGBn+ZRHbySskrmrz3gtv1Vu5ZhJA1qi8m/9lt2VkubF3UyNY9y8EQwPeXd3b0Fn39/+W0B+9/c/E9vQh4eHEC7lmEczMjHcdEidL0O2rQIo4HZ/0/3KJongimjRxkzi2DK6FHGzCKYMnqUMbMIpoweZcwsgql2ngim5lEOngim5lGdPJnk0WaKR44c4a9//atbsapagYGB3HvvvRw5csSTW1xSaWlpSuGlWmazmfDwcNLSLvyy5cSJEzz66KMkJCTQs2dP5s2bB+B2bWJiItnZ2WRmZlJeXg7A3r17AZg5cybx8fE88MAD7N27l5YtWyr8+vXrA7BhwwY6duxI165defHFF0lNTVU8AkoB6oEHHqB9+/bcddddTJ8+ncrKSqxW16eAjUYjBoOBzMxMevbsSUJCAo8++ijr16/HYDBQUFAAXCiGvffee0qmKVOmkJGRgdPp5PTp00qm7du3M2/ePO666y5iY2MB1xaGPXu6Fr0PHDjwks/WYDCwZMkSbr/99quYEU2aNHlb89ce4GBGvtuxgxn5zFtz4E9ypEmTputdzz2UcFXH/wwlDWpbo89EmxauYr4mTZo0adKkSZMmTZo0adKkqW7LoxVWfn5+yiqdS6moqAg/Pz9PbnFJWSwWzGZzjePBwcGKn6KiIkaOHEmLFi2YNWsWOTk5TJ48Gb1e7+Zp6NChfPTRR9hsNjZv3ozNZiMrKwuAUaNGKSvIHnzwQXx9fZXiXPXqqOzsbO6//37Cw8NZsGABVVVVyv3B1UcLID8/n+TkZPbv38/ChQuJiIhw28avqqqKqqoq2rVrR2JiIsuXL+fs2bM0bNhQYVVUVACQlZXF8OHDAde2hz4+Pm73HDp0KLNnz8ZkMvHxxx8TFBRESUkJCQkJynaJ1au3EhMTSUxMpLKyko8//phz586xf/9+unXrVuv58fHRmsr9XnWhgZ/mUT28s+etbiurquVwwv70fPIs5TQKq/lBgWvpUSRT7TwRTM2jOj3KlrldVAOWvNSXL39M5+DJQto0D+HuHi095nrTY3CQH88N60RuYRkWL2+XevH/1cYTwZTRo4yZRTBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT86hOjzJmFsGU1aNs8qhg1a1bN5YsWULv3r3p2LGj27m9e/eydOlSZUXPtdaqVauwWq28//77yiqnb7/9li1btpCTk6MUboKDg0lOTuaNN97g//7v/zCZTDidTmJjY3nkkUcA17Z/nTt3VopRAD///DMAAwYM4IcffiA/P58mTZq4rfAC1yo0o9FIt27dmDdvHj4+PjRt2pSSkguNw7OzswHo0KEDhYWFvPvuu0RERGA0GrHb7cq4HTt2ADBo0CDWrVuH1WqlefPmpKenu90zNTUVnU6Hn58fOTk5+Pq6tg+7/fbbazyn++67T9mu8OjRo2zevJm5c+cyYsQI5bqrkV6v87jh+fWsutDAT/P45/NOZJdc8bzV5vD475k2L+pkah7l4IlgepP38N3tvMa6WN70KOq1hprnRRRTRo8yZhbBlNGjjJlFMGX0KGNmbzIzc0s4eiiHyAYmIsODvMKsltqfo5rnRRRPBFPtPBFMzaMcPBFMzaM6eTLJo4LVv/71L4YOHcpDDz1EfHw8LVu6PoWbnp7Ovn37qF+/PuPHj/eK0YtlNpuVFU4Xq6ioiODgYODSfa46d+7Mli1b2Lp1Kw888IByPDAwEJ1Ox88//8yePXt45JFHCA8PV84bjUYeeughVqxYofAPHHBtyzV48GBmzpwJgNPppEuXLhQXFxMcHIzNZiM3N5eAgABmzZql8LZs2cJTTz1FaGgoANu2bQMgISGBCRMmKOOefvpptm7dqtzz2LFjAIwePZo33ngDcK02q+5BVT3utdde49577+WLL75g6dKlrFixgi+//JKqqqrLrk4DmDZtGrfccgv//Oc/OXXqFNHR0VeYhUvL4XBisZRe9XXXuwwG9Tfw0zyqhxfoq7vieZNRT0GBtVZsbV7kyCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYMnqUMbM3mSVllcz9IpXUtAu7T7SPCuOpe9tjCvCsr6/an6Oa50UUTwRT7TwRTM2jOj3KmFkEU1aP14PM5oA/vOrMo4JV06ZNWbt2LfPmzSMlJYUNGzYAEBkZyYgRI/j73/+u9HrypqKiomqsZCouLiY3N1fpT5WWlsZ9993nNqZNmzYA/Pbbb24Fq7S0NCIjI/H391e4ubm5Ne5ps9mUrfTOnTuHTqcjLS2N3r17A64eUeHh4RQXFxMVFcWpU6dwOByUlpa6FdOqC0HVRbG0tDR8fHzIzMx0u2eTJk2orKxUMlV7uriHl9lsJigoiNLSUpo2bQq4CobVq66GDRum8N59913effdd9u3bpxybNGkSY8eOJSQkhD59+tRYKVcbaQ3lLq+60MBP8/jn88KDA2jXMoyDGfk4nBeO63WuXi0NzP4e+9XmRZ1MzaMcPBFMzaMcPBFMGT3KmFkEU0aPMmYWwZTRo4yZvcGcszq1Rl/fA+n5zF6dyrgHEjx055Lan6Ma50U0TwRT7TwRTM2jHDwRTM2jOnkyyaOCFUD9+vWZMGGC28og0UpMTGTu3LkMHz6c1NRUTCYTsbGx6PV6ZQvCS60k6tSpE3q9nh9++IFbb72V/Px8YmNjycrKom/fvsp1BoOBI0eO8Nhjj7Fnzx58fX2JjIwEXKugAEpKSoiIiOCTTz7h888/Jz09ncjISM6fP09AQABNmjRh165dgKuQNXbsWI4cOYLVaiU2NhaAG2+8UblnvXr12L59u1umoCDXUvfqTFarldDQUObPn8+///1vsrKyaNmyJTabjYYNG2I0GgF48MEH+fTTTwkNDaWoqAiDwUB5eTlDhgxh4MCB+Pr6YjQaMZlMFBQUAK4eW5999hmfffYZgHK8NtJ6WNVUXdgPVfOoLl7y4PbM+d2nCdu2dH2a0JO/Y9q8eEeaR+9I7R5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzN5iiuzr6y2PdYkngimjRxkzi2DK6FHGzCKYsnqUTR4XrP4MDRgwgHfeeYdDhw7xxBNPcPLkSVavXk3r1q2V3lSAUoDZvHkzAH5+fvj7+5Obm0vfvn3p1KkTixYtIjc3l/79+yvX6fV6DAYDv/zyC48//jglJSUsXboUuLBKC1wryXbv3k10dDTjxo1j8+bNZGRkKMWtat1www1s376dwYMH07x5c+bPnw/gtprJZDJRVFRUIxPglql58+b89ttvdO7cmaFDh/LZZ59hs9lo0qQJABUVFaxfv57IyEhGjx5NixYtmDFjBnv37uXnn3/m9ddfB6Bhw4ZERESQmJhIZGQkNpuNdevWKT23qgtzVyuth9WVVRf2Q9U8qoMXGgpTk3uTlVtCVp7V6/u1a/OiTqbmUQ6eCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyuhRxsyeMq9FX19Q/3NU27xcC54Iptp5IpiaRzl4IpiaR3XyZNJVFaxefPFFdDodU6ZMwWAw8OKLL/7Pa3Q6HVOnTq21wUtpw4YNGI1GYmNjmTt3LiaTiZ49e7Jjxw5ycnKIiIjAbDZTWVmJXn+hmllRUUFpaSk33HAD+/fvJyUlhdjYWBwOBxs3bqRbt27KdeBa2bRo0SJ8fHyIjo7m+PHjnDp1SuGfOnWKqKgoDAYDM2bMIDIyksDAQCwWC3Chp1RWVhY9evTg+++/x2q1cuONN5KamsqhQ4cA17Z+BQUFNTI1adKEM2fOuGVKT08nPj6evLw8ZsyYQcuWLTEYDOTk5ACwePFigoOD+fTTT/HxcU1v06ZN2bt3L6dOnWLPnj1KoSwmJobNmzeTl5eHTqejVatWgGvLwuprr1ZaD6tLy2BQ/36omkf18QCC/AzcFBeBxVJW675VF0ubFzkyi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKaMHutC5nOFZRSX2zH7GwgP8c4v17zhUWRfX1D/XNeF7x0ZPcqYWQRTRo8yZhbBlNXj9SCzWVAPq507d6LT6XA4HBgMBnbu3Pk/r9HprvwiozZKSUmhZ8+ezJkzRzlmsVjo2rWrspIpKiqKkJAQZs+erYzZtm0bAEOGDOHpp59Wjr/xxhvKKqzq3lAtWrRg4cKFbmNOnDjBTz/9RJcuXWjRogWpqamMHj2aRx55BACn00nnzp0pKSnhzJkzNGvWDIPBgN1u55133lEKWN9++y1PPvkkx48fV+5ptVpJTEzkww8/VO75xBNPcObMGSVT48aNSU1NJSkpSdnCsLi4mJtuuomsrCxsNhtpaWmcOXOG7t27X/LZrVmzRilYvfPOO27n1q5dy7/+9S+lcFVbaftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXmUg+cNZklZJfPXHnDbdq9dyzCSBrXF5O/rDYseebwWfX099VgXeSKYMnqUMbMIpoweZcwsgimrR1l0VQWrb7/99opfXyulpaVx3333uR0zm82Eh4eTlpYGuPpcffDBB269rDZs2ADAoEGDalybmZlJhw4dMJlM6HQ6t5VZlZWVbN68mfr16yv82NhY1q1bx65du1i0aBH5+fk0adIEq9WqeGzSpAmNGjUiJyeHl156iW3btuHr60twcDAhISFkZmYC0KtXLwDy8vIYOHAg6enpREREkJOTg8lkUu7ZqlUrUlNT2bhxI6+88gpWq5XIyEj0ej12u53Tp08zevRo7r33XrKysli6dCnHjh3D6XSi1+upqKjgnnvucctusVh47733+Oqrr8jNzQVwy65JkyZNmjRp0qRJkyZNmjRpur40f+0BDmbkux07mJHPvDUHGPdAwp9j6ndKGtSWeWvci2ptWriKapo0adKkSZOm61Me9bDKysoiLCwMf3//S54vLy8nPz+/Rk8nT3VxEepiBQcHU1RUBMDQoUNZunQpycnJJCUlkZOTw+bNm9Hr9TRt2lS5ZtiwYezatQuA1157jYqKCv7v//6P9PR0Fi9eTOvWrVm5ciWFhYW0a9dO4bdv3x6Ar7/+miFDhtCwYUMWLlyIXq/H4XAo41q1akVmZiY7d+4kKSmJAwcO8PXXX9OwYUMKCgoAaNSoETqdjoMHD9KlSxeeeeYZVqxYgc1mIyQkRGElJCTwxRdfsGHDBkaMGAHA0qVL8fX1paKigqKiIjp16kSDBg149tlnadGiBXPmzOHdd98lNTWVJk2a0LlzZwCmTZuG3W7nm2++wdfXl4SEBDZv3kxQUBA33nijR/Pj46MVvH6vutDAT/OoPp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimDJ6VGvms+etbkWgajmcsD89nzxLOY3CAv9UjwDBQX48N6wTuYVlWARsW3jx/693ngimjB5lzCyCKaNHGTOLYMrqUTZ5VLDq06cP06dPr7Fqp1rffvstzz77rNKr6VoqODiYxYsXM2XKFJKTkzGZTLRr1459+/a5jcvOzsbpdK0v79q1KxEREbz11lsUFBTw4YcfUlRURFxcHAsWLOCDDz5Q+ltVX3PDDTewYcMGfHx86N+/P9u3b+fcuXMKv7y8HIDQ0FDef/99IiMjeeSRR1i0aBEGg0EZ53Q6ldVUqampdOrUidatW7N9+3ZlTPW9mzRpwqpVqzCZTAwdOpTPPvvMLdOqVauwWq28//77mEwmXnzxRfR6PVlZWUo/rOjoaN555x3Onz+PwWCgsLAQgAULFpCQkFDr567X67zS+PR6VV1o4Kd5VB9PBFNGjzJmFsGU0aOaM3/yzRH2Hs2lY0xD7u/T2ivMasn0HOsKTwRTRo8yZhbBlMmj9rNW86g2nqfME9klVzxvtTm88r7eW7lF/o5B7XOttu+da8ETwVQ7TwRT8ygHTwRT86hOnkzyqGBVXbS5nCorK4VsL2c2mykuLq5xvKioSOkTBRAdHc2iRYuUr5cvX86uXbuoqKjAz88PcK1uCgoK4siRI8q1N9xwAwUFBYwbN47Bgwe78W+44QYAzpw5A8Df//53HnroIWXMiy++yOrVqxVWdUFo06ZNyhin08nKlSuVgpXNZgMgPj7eze+WLVvYunWr8gxPnToFwIwZM4iPj1fGHTp0iN27dyv3TElJoXv37gQHB/Pcc89RWlrK0qVLefjhh5V+WPfffz/vvvsu//jHP3jyySd56KGHKCkp8ahYBeBwOLFYSj1iXI8yGNTfwE/zqD6eCKaMHmXMLIIpo0c1Zz6Qfp43l+9Rvk49cZ4lGw7x4sOdiGsRpgqPongimGrniWDK6FHGzCKYMnnUftZqHtXG8xYz0PfKvcZNRj0FBdZasUGe51iXeCKYMnqUMbMIpoweZcwsgimrx+tBZnPAH151dtUFq5KSEiwWi/J1YWEhWVlZNcZZLBY2bNhAeHj41d7ifyoqKkrp61St4uJicnNziYqKuuJ1AOnp6cTGxgKuXlONGzcmMjJS2drwxhtv5OjRo273cDqdpKen07NnTwClV1VZWZnbPQICXNXTxo0bK+ftdrtbMU2n0+Hv768UrKoLUb9nNWrUCEDxVX3+4hVcF9+zeqvD6h5fb775Jhs3buTDDz/kpptucuvxdebMGXJzcwkNDeWRRx5h165d+Pv789JLL/Hiiy9iMtX+E0xaQ7nLqy408NM8qo8ngimjRxkzi2DK6FGNmS/+BerFemPZbha+cHutuRdLhudY13gimDJ6lDGzCKYMHrWftWKYMnpUW+bw4ADatQzjYEY+jos+h6zXuXpENTD7e8Xv9f4c6yJPBFNGjzJmFsGU0aOMmUUwZfUoi666YLVo0SJmz54NuAovU6dOZerUqZcc63Q6+ec//+mRwUspMTGRDz74wK2X1VdffYVer1cKSpdSp06dCAoKYuPGjUrBqqioiKqqKu6++243/po1a5RVVACff/45hYWFfPjhh6xatYrmzZuj0+nYsmULo0aNUsYdPnwYgKCgILd733HHHZSXlxMXF8eYMWMoKSmhSZMmigeAffv2kZCQgNFo5I477qBZs2YAbj23DAYDL7zwAhUVFURGRvL3v/+djIwMdDodRqMRuFAszMzMxNfXl/nz5xMREeHW4ysvLw+A119/XVnB5XQ6Wb16NcXFxbz77rt/bDIuIa2HVU3Vhf1QNY/q44lgyuhRxswimDJ6VGvmNT+kXfH8xp9Ock+vlrXmy/Ic6xJPBFNGjzJmFsGUxaP2s1bzqEaeN5nJg9sz54tUUtPylWNtW4bx1L3tPX5PL9NzrCs8EUwZPcqYWQRTRo8yZhbBlNWjbLrqglXPnj0JDAzE6XTy73//m7vvvpu2bdu6jdHpdAQEBNC2bVvat2/vNbPVGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOJSIiQhk3cuRIsrKy2Lx5MwB+fn4kJSUxa9YswsLCaN26NQ6Hg/Lycrei01133cWLL77Ijz/+yHfffcf58+d5+eWXCQ4OZsaMGeTk5DB58mQAfvvtNyZNmkT//v3ZuXMnu3fvdvNavRKroqKChx9+mD179jB69GgMBoOygqqqqkoZ36RJE26//XZWrlypXFu9Mis3Nxe73U5xcTGDBg3C6XQyYcIEdDqd29aLVVVVZGZmkpCQQP/+/fniiy946KGHCAgIUHpqORwOZWz9+vVp1qwZTz/9NMuWLeOrr77i9OnTboWyPyqth9WVVRf2Q9U8qo8ngimjRxkzi2DK6FFtmY+eKbri+cOnCxmhor4TongimGrniWDK6FHGzCKY17tH7WetOKaMHtWYOTQUpib3Jiu3hKw8K5ENTESGB/3vC69CMjzHusYTwZTRo4yZRTBl9ChjZhFMWT3KoqsuWHXs2JGOHTsCri3q7rzzTlq39m7j2f+l4OBgFi9ezJQpU0hOTsZkMjFkyBDGjh3rNs7hcGC3292OjR49GqfTycKFC8nPz0ev19O/f3+34oyvry/16tWjXr16jBs3Drvdjk6nY/Xq1cqqqB07drB+/XomTpzI8uXL+c9//kNkZCSDBg1izZo1BAcHU1FRgcViISoqiltvvZX//ve/WK1WfH19CQgIoH79+gCkpqYC8PTTT/PLL7+wePFijEaj4r26YHXs2DF8fHyYNGkSH374IVlZWdSrVw+Hw6FsC5idna3k+O233/jtt9+UrwsKCpQVVtXbOnbr1o0dO3bw1FNPcdddd9GmTRv69u3LsWPHalWw0npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGtmVs3CSb1xPnLno9tGqL1nbjOeCKYMnqUMbMIpiwetZ+1mkdv8H7Ym8WxzCJaNwmmV3ykxzzwvscgPwM3xUVgsZR59D19sdQ+LyKYaueJYMroUcbMIpgyepQxswhmXfB4ID2f03lWmoWbaONhz9PrSWazwB5WF+vpp592+7q4uJjAwECqezOJVHR0NIsWLbrimKVLl9Y4ptPpSEpKIikpCYBhw4YpK5mqVVxcTH5+PuPHj2fw4MEMGzaM4OBgpVgFMGDAANavX8/58+dZt26dcnzatGlKP6wdO3bgcDhwOp08//zzPP/88wBMnTqVpUuXKj21Dh06BLi2EazO5HQ66dy5M1arlaioKGw2G9nZ2djtdu68807uv/9+ALZs2cJTTz1Fq1atANi2bRsAt9xyC/Pnz1d8JSUlsXXrVvr16wfAzp07AYiLi7vkc6yoqLjis72StP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPast8d/cWfP795beq6n9zc63vxHXKE8GU0aOMmUUwr3eP2s9acUwZPKaftTB16a9U/95r6+5MFqw/yEsjb6J5hFkVHkXzRDBl9ChjZhFMtfNEMDWPcvBEMDWPtVNOQSmvL/mVkrILO6kFBfjw8sibCA8J9IZFaeTxZoqpqamMGjWKDh060K1bN37++WcA8vPzefLJJ5XCiFqVmJjIjz/+qKw4gpr9sNLS0pTiUrV69eqFTqcjJSVFOVZZWcnXX39NYmKich1ARkYGGRkZyjidTofD4eDmm28G4OTJkzRo0IBNmza5jTEajQQFBdGkSRNOnTqF3W5Hr9fz9ddfK+MaNmwIoBSs0tLSCAwM5JdffnHL5HS6OqlWZ9q/fz/h4eGsW7eO7t27065dO4YOHcqnn34KUGObR02aNGnSpEmTevTcQwlXdVyTJk2aNF29tJ+1mmqri4tV1bI74LXFv/45hjRp0qRJkyZNQvX7YhVASVkVU7R/+69aHq2w2r17NyNHjiQiIoKBAwfy2WefKefCwsIoKSnhk08+oVu3bh4b/b1OnDjBa6+9xp49ezCZTAwaNIh//vOfGI3GK17ndDr58MMPWbFiBfn5+dx44434+fnV6Ic1cOBAXnvtNbZt20ZpaSkrVqxg48aNbNmyBXD1wwoLC2Pv3r307t2b/Px8ZRu/6n5YFosFX19fmjdvztChQ7Hb7ZSVlSnFo2bNminj4uPj2bp1K4mJiRQUFGAwGCgrK1O2W6zeyu/222/ntddeY+bMmRQVFeHj45rChIQEhdWgQQNKSkro27cvdrudqqoqKioq0Ol0So+v3NxciouLKS8vx8/PD7vdTmpqKnv27OHOO+9UvNVGnjZovR5VFxr4aR7VxxPBlNGjjJlFMGX0qObM7aIasOSlvnz5YzoHTxbSpnkId/do6Q2LUj3HusITwZTRo4yZRTBl8qj9rNU81kZb95ypUayqlt0BP+4/S2JC41rz1ZhZNFNGjzJmFsFUO08EU/OoTo8yZhbBVKvHfSfyahSrqlVSVsWhUwW0j6pfa75s8qhgNXPmTKKjo/n0008pKSlxK1iBqz/SF1984ZHBS6moqIiRI0fSokULZs2aRU5ODtOmTaO8vJyJEyde8doPP/yQ9957j/HjxxMTE8Py5cs5ceIElZWVSj+swYMHs337dnQ6HTNmzCA5OZmysjKqqty/8fz9/XE6nRQXF6PT6QgKCuLcuXMcOHBA6f+k0+mIi4tj48aN6PV6fH19MRgMFBcXu21FGBwcTFBQEBaLBYfDQb169SgvL3dbJQXQunVrvvnmG3Q6HXq9Xhl37tw5t3F2ux273U5FRQX+/v74+flRVFTEkSNHiImJwel0Ul5eDri2IiwqKsLX15eqqqoaWyRejfR6HaFeaD58vaouNPDTPKqPJ4Ipo0cZM4tgyuhRzZkfvrudVziXkkzPsa7wRDBl9ChjZhFMmTxqP2s1j1ejtLPFVzx/PMvCoNs87wOupszXiimjRxkzi2CqnSeCqXmUgyeCqXm8emXln77i+czzpSR2rv3iENnkUcEqNTWVcePGYTQa0el0Nc5HRESQl5fnyS0uqVWrVmG1Wnn//fcJCQkBXAWayZMnk5SUpKwi+r0qKiqYN28ejz32GI888ggAnTt3pl+/fsTGxrJq1SoA1q9fz+LFi9mwYQNRUVGEhIRw8803s2HDBvbt20d8fDzgWqXUsGFDfvjhB+Uezz77LO+99x79+vXDbDZjs9n48ssvmTRpEg888AAAixYt4o033uCrr77iqaeewmw2c/DgQaqqqti6dauSqW/fvpw5c4acnByCg4MB+Pzzz/nLX/7CjBkzANd2g3fddRffffcdTz/9NGazmYKCAoqLi9myZYvSd+vf//43H330Ed988w0xMTGYzWb0ej19+/Zl1qxZiv/evXvz888/U1lZia+v71XPjcPhxGIpverrrncZDPI1GZTRo4yZRTDVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEcy64PFcYRnF5XbM/gbCQ2r3i6uoG+qx9QrnW0WaKSio/YdEZZwXGT3KmFkEU+08EUzNozo9yphZBFOtHiPDrvyaoXH9QI/+7b8eZDYH/OFVbB4VrHx8fHA4Lj+ROTk5BAZ6v6lYSkoK3bt3Vwo7AP379+eVV15h+/btDB48+JLX7d69m5KSEvr3768cMxqN3HHHHWzevNmNHxMTo/StioqKwmazERISwvfff098fDznz5/HZrPRpUsXt3sMGDCA9evXc+bMGeV6h8NBv379lDHZ2dn4+/uzY8cOnnrqKaKiovj666/p0aOHksnpdGKxWHA6nWzfvp2//OUv+Pj4kJOT4+a/uk/W4cOHsdlsREVFUVJSAkC9evWUcSdPnkSv1yvbEbZs2ZJ9+/YpPa2qFRwczLlz5/jtt99qZPuj8nbTu+tJMjQZFM0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY9y8EQwNY+1U0lZJfPXHmB/er5yrF3LMJIGtcXkf3Uf5OzVPpLFGw9fcltAgx56tLvBK/llmBfRPBFMtfNEMGX0KGNmEUwZPcqYWQRTbR7bNA8jKMDnktsCBgX4ENcsVPt9+VXIow0fO3TowKZNmy55rrS0lNWrV9e66HElpaWlERYWxqOPPkpCQgI9e/bkgw8+IDw8XCngXO46gO+++45bb72V+Ph4HnjgAYxGI1lZWcoWeWlpaURGRjJmzBg6duxIamoqW7dupUmTJgqjejVWeHg4AwcOpH379tx1110cP35cYXTq1AlfX18CAgL44IMP6NmzJx06dGDFihU0btxYYSUmJlJWVoavr6+SqWvXrhQVFREaGkpaWhpGo1HpZ3X06FHuuusu2rdvz3PPPUdoaChVVVWcPn2aXr16KVsP9u/fnw4dOhATE8N3332Hj48PgwYNAqBHjx4Aygq47OxsEhISOHbsGODqEaZJkyZNmjRp0qRJkyZNmjRd75q/9gAHM/Ldjh3MyGfemgO14r008iZ+/yFig951XJMmTZo0adJ0/enlkTcRFOC+NigowIeXtX/7r1oerbB65plnePjhh/n73//O3XffDcCRI0c4c+YMCxYsID8/n6eeesorRi9WUVERGzZsoE2bNm49rPR6PUVFRZe9zmKxYDAYmDNnjlsPq8WLF+N0OikqKsLf35+ioiJOnTpFREQEM2bMID8/n5dffpnDhw/jcDj4/PPP+eijjwBYsmQJ999/PxMmTGDChAm8/fbbikc/Pz9iY2NJTU1l+fLlDBs2jL1797Jnzx7Onj2LzWYD4K677uLZZ59l06ZNtGrVihEjRrBq1SoMBgMOh0PJdOutt3Lw4EHeffddBg4cSIcOHVizZg16vV65Z3R0NIMGDWL16tUUFRUpfbfsdjvz589XemsNHDiQCRMm8PHHHxMVFcXSpUuprKx0e8a1lY+P9xrfXS+SpcmgSJ4Iptp5IpgyepQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2Cq1ePZ81a3lVXVcjhhf3o+eZZyGoVd3c4x0Y1D+HhCX7bty+LomSJaNwmmV3xkrT1eLFnmRSRPBFPtPBFMGT3KmFkEU0aPMmYWwVSzxxsaBDHn2Vs5mJHPqVwrzcJNtGkR5g2L0smjglWHDh2YP38+kyZN4vnnnwdg2rRpADRr1oz58+cTGxvrucvfyel0YrPZavSwmjhxImVlZZe9rqqqCrvdzuOPP+7Ww+qWW26hoqJCGVdaWkpRURGrVq1y29bv5Zdf5tChQ8yYMYPbbruNL7/8klatWvHqq68C0LhxY86fP6+s1AJo2rQpqamp+Pn5sWzZMuLi4pg/fz5PPvmksp2ir6+v0gMsMzOTlStXcscddxATE8PUqVOVTC1btgQgKCiIjRs3EhkZyeuvv86SJUs4cuSIcs9nnnmGb7/9FqvVisFgwG6306VLF5599lmWL19OdHQ0er2eESNGsHjxYp5//nlsNhsNGzbk3LlzAJfsSfZHpNfrCA011epaGXS9Nxm8FjwRTLXzRDC9ycvMLeHooRwiG5iIDA/yGlfNmb3NFPUMQa7nWFd4IpiaRzl4IpgyepQxswimjB5lzCyCqTaPJ7JLrnjeanPU+j3uPbfcWKvr/oiu93m5FjwRTLXzRDBl9ChjZhFMGT3KmFkEU80ee4aa6Pm/h2m6gjwqWAF0796dTZs2cejQITIyMnA6nTRt2pR27drVuujxv6TX64mMjKzRw2rixIlYLJbLXle9aqhPnz7KMaPRSGxsLD/++CPBwcGAq7BVr149pVgFcP/99zNp0iSioqJYv349hw4d4ssvv6Rjx47KmKVLl7Jy5UomTZqkrFYqLi4G4JtvvlH4AE2aNOHMmTPK1waDgcaNG/P1118rxywWC1OnTlUyVa/IGjduHMOGDVPG7d+/nyNHjij9wpYsWYLRaOSHH35g/fr1vPjii7z99tsMHz6cOXPmMGPGDAD++c9/cu7cOb788ksACgsLFWZ4ePhln+OV5HA4sVhKa3Xt9SyDQY4mgyJ5Iphq54lgepNXUlbJ3C9SSU278InU9lFhPHVve0wBV7fXvyiPInjeZIp6ht70KIongql2ngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCqVaPgb5X/r2FyaivdaN0tWYWyRPBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOr2DwuWFUrLi6OuLg4b+GuWldTHPsjYy83xul0es1HbVm/v+73444fP05UVBRGo1E5ZjAYiImJ4dSpU8oxf39/2rZty6FDh3j77bfZu3cvr7zyCuBaPVdbaU3kLq/rvcngteCJYKqdJ4LpDd6c1ak19vo/kJ7P7NWpjHsgwSM2qDOzt5minyHI8RzrGk8EU/MoB08EU0aPMmYWwZTRo4yZRTDV5jE8OIB2LcM4mJGP46K32nodtGkRRgOzv8d+1Zb5WvBEMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkqypY/fLLL7W6SZcuXWp13eXkcDjIysrCYrFgNpsB+OqrrwCUrwFOnDjBa6+9xp49ezCZTEr/pi1bthAfHw9AZWUlhw8fBlB6WPn4+JCXl0evXr2wWCzExcVx9913Y7fblVVMpaWuVURffvkl69atw9fXlzvuuIOCggLAtc3fxX7uueceCgoKiIyMZPjw4Zw5c0bZErA60+nTp+nevTtlZWV07NhReW7VjOoC1OzZs3nrrbcwmUwMGjSIXbt2uXmqqqpiz5499O7dW9niz263c/jwYaWoaLPZeP311/n000/x8fHhr3/9Ky+99BIACQkJbqvLrlZaD6uakmnPVlE8EUy180QwvcUTsde/tz2K4nmLKfIZesujSJ4Iptp5IpiaR3V6lDGzCKbaeSKYmkd1epQxswimmj0mD27PnN+tem/b0rXq3ZP3t2rOLIongimjRxkzi2CqnSeCqXlUp0cZM4tgyupRNl1VwWr48OFXvYJIp9Nx6NChqzZ2Jel0OoxGI8nJySQlJZGTk8P06dMJDg4mIMC132RRURGDBg1Cr9cze/ZscnJymDx5MjqdjoULFxIWFkbr1q1ZuXIlVqv78v7q7fwAnnzySbZs2cLUqVMJCQmhXr16gKsoBFBSUkJiYiLt27dn0aJFNVjVX+fn5zNixAhycnKYMmUK/v7+biulqotXwcHBjBw5ki+++IL33nsPs9msZKruZZWfn88999xDw4YNWbJkieLlYlZFRQV6/YW/GBMmTODkyZO89tprAJSXl/Ppp5/i7+9PVFQU+/fv57PPPgPgX//611XPSbW0HlZXlkx7toriiWCqnSeC6SlP5F7/1VJbZm8zr8UzhOv/OdZFngim5lEOngimjB5lzCyCKaNHGTOLYKrRY2goTE3uTVZuCVl5VtX3ZhXB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiqypYLVmyRJSPq1JwcDB9+vThzJkzJCcnYzKZGDJkCOvWrVP6RK1atQq73U6DBg3o3bs3ADt27GD9+vU8/PDDLFy4kPz8fOLi4hgxYgQfffQRwcHBVFRUUFxcTPPmzYmJiWH+/PkYDAYCAgLQ6/UKPzU1FYBhw4bxyy+/sGPHDkJDQ5UCVfW4Y8eOYTAYGD58OP/973+xWq00aNCAiooKgoJcL4Czs7MB6Ny5M0ajkblz5xIQEIDBYMButyus3bt3A/DEE0+wceNGsrKyCAsLU1ZRVY/7+OOP2blzJ3PmzFHYhYWFzJ8/X1m1deLECXQ6HQ0bNuTIkSOAaxUWQEREBGVlZUqh7Gqk9bC6tAwG9e+HqnlUH08E01u8urTXv1rnReQzBHmeY13iiWBqHtXpUcbMIphq5wGcKyyjuNyO2d9AeIjnb0plnBcRTLXzRDA1j95hBvkZuCkuAoulzKPXYaL8iWBqHtXpUcbMIphq54lgah7V6VHGzCKYsnq8HmQ2C+ph1bVr11oZ8raioqIoKChg0aJFyrHi4mI+/vhjZSu7lJQUbrvtNubMmaOMGTBgAOvXrycoKIjvv/9eOT5t2jQiIyPx9/dnx44dOBwO9Ho9s2bNUsZMnTqVpUuXKvzqVWPNmzdn4sSJgGtFWefOnbFarURFRWGz2cjOzsZut/PEE0/w/PPPA64tCZ966imFtW3bNgCCgoKYP3++cs+kpCS2bt2qjDtx4gQA7du3Z+zYsQBYLBa6dOmCwWBQtjzU6/V0796d7t27ExMTA8C8efMICwtT2Onp6djtdjIyMpRjx48fB6Bv374MGDCAmTNn/tEpcZO2P+flVRf2Q9U8qo8ngukpry7u9a+2ebkWz9BTj9eCJ4Kpdp4IpuZRDp4IpowevcErKatk/toDblu7tmsZRtKgtpj8fT21KOW8iGCqnSeCqXmUgyeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFV1WwupLOnz9PZmYmAI0bN6Z+/freQtdQYmIic+fOZfjw4aSmpmIymYiNjUWv19OzZ08A0tLSuO+++9yu69WrFzqdjs8++4zPPvuM/Px8YmNjycrKom/fvsp1ABkZGTz22GPs2bMHX19fwsPDcTgc3HzzzQCcPHmSBg0a8Mknn/D555+Tnp5OZGQkTqeToKAgmjRpwvHjx7Hb7ej1esaOHcuRI0ewWq20atUKQPl/WloagYGB/Pzzz26Z/P39AZRMmZmZBAcHM3/+fP7973+TlZVFy5YtMRgMREZGKj2uwFXAe+ONN5SvJ0yYwKuvvkrDhg0B6N27N4888gi//vorx48fp7y8nAYNGpCXl8fs2bNp0aKFV+dMkyZN15+SBrVl3hr3X9a1aeH6ZZ2mPybtGWrSpElT3dT8tQc4mJHvduxgRj7z1hxg3AMJf44pTZo0adKkSZMmTZo0aarj8rhgtWPHDv7973/X6FMVFxfH+PHj6dGjh6e3qKEBAwbwzjvvcOjQIZ544glOnjzJ6tWrad26NREREYBr5dGmTZvYtGkTmzdvBsDPz4+AgADOnj1L37596dSpE4sWLSI3N5f+/fsr1/n6+qLT6fjll194/PHHKSkpYenSpQA0a9ZMGdesWTN2795NdHQ048aNY/PmzWRkZBAZGQm4+mgB3HDDDWzfvp3BgwfTvHlzZRVVXFycwgoLCyMrK6tGJsAtU7t27fjtt9/o3LkzQ4cO5bPPPsNutytjqjVixAiysrKUrw8ePMgDDzzA3LlziY2NJTw8nE2bNtGrVy8SEhJYtmwZTZs2JS8vj+zsbKWAVxt50pT2elVdaOCneVQfTwTTm7zgID+eG9aJ3MIyLF7eDuni/6uN502mqGd4sTcZnmNd4Ylgah7V6VHGzCKYauWdPW91+6BBtRxO2J+eT56lnEZhgX+qR5FMGT3KmFkEU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomzwqWG3evJl//OMf1K9fn8cff1xZlZOens6aNWsYPXo077zzDnfccYc3vCrasGEDRqOR2NhY5s6di8lkomfPnuzYsYOcnByleONwOHA6L+yzVFFRQVlZGeHh4ezfv5+UlBRiY2NxOBxs3LiRbt26Aa6t/SorK+nZsyeLFi3Cx8eHmJgYDh8+zKFDhxR+VlYWUVFRGAwGZsyYQWRkJPXq1aOgoMDNb1ZWFj169OD777/HarUSFxfHnj172LNnD8OHDwfAarXWyNS8eXNOnjzplikjI4P4+Hjy8vKYMWMGLVu2xNfXlzNnzij327NnDwcPHnTzkJOTA8CcOXN47733AFi9ejVhYWFKYezuu+9mz549rFixgocffrhWc6PX6wgNNdXqWhlUFxr4aR7VxxPB9CZP1N95NWf2NlPkz02ZnmNd4GXmlnD0UI7WzF2lTLXzRDBl8uitv38nskuueN5qc3j8c12meRHJVDtPBFPzKAdPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWL3zzjvceOONLF++nKAg9zd9TzzxBA8++KCQglVKSgo9e/Z0609lsVjo2rWrspLJbDYzYMAAnn32WWXM7t27cTqd9O7d2227vDfeeENZhWU2m6mqqqJ169YsXLhQGfPJJ58wceJE9uzZw6233kq9evU4deoUL7zwAo888ogyrn///qSlpXHmzBmCg4MBVwHsnXfeUb7OyMjgrrvu4tixY8o9S0pKSExMdMs0bdo0Pv74YyVTUFAQhYWFvP76624roDp37sy5c+ew2WwYjUZSUlIwm838/PPPxMbG8txzzzFq1CjuvfdeAgMvfNrz4p5W4CpYZWdn85///OfqJ+X/y+FwYrGU1vr661UGg/ob+Gke1ccTwZTRo4yZRTBl9OhNXklZJXO/SCU17cKqjPZRYTx1b3tMAbXvdyPjvIhgqp0ngimTR2///Qv01V3xvMmop6DAetVckGteRDLVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEUxZPV4PMpsD/vCqM48KVqdPn+bZZ5+tUawCCAoKYsiQIbz99tue3OKSulR/KrPZTHh4uNKDKioqSvlztapXHXXs2NHteHR0NIsXL6a8vJyoqCgApddTtdLT0zEajZw6dUo5v3//fmU8uApTeXl5isebb74ZvV5PQECAUqyqPgeQm5ureK2srKRx48Zu98zMzMTX11cZHx4eTmFhIS1btlTGFBcXY7VacTqdnD59mujoaNLS0mjZsiU6nfub6Us9k99r165dbplqI62h3OVVFxr4aR7VxxPBlNGjjJlFMGX06A3enNWpNfrdHEjPZ/bqVK/0u5FxXkQw1c4TwZTBo7f//oUHB9CuZRgHM/JxXNjMAb3O1Yewgdnf4/wyzMu1YKqdJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUeFayioqLIz8+/7Pnz588r2wR6UxaLhcrKSh599FH27NmDyWRi0KBBmM1mpW9UYmIiH3zwARaLBbPZDLhWWAGcOXOGW2+9lfz8fOLi4rjttttwOp0UFRXRqVMndDodOTk5jBkzhm3btuHj40NVVRVhYWEKv02bNnz77bfs3LmTt956i/T0dEJDQ7FYLICrf5XRaFSKTG+++SZr167FarViMpkICQmhpMS1nUivXr0AOH78uJIpICCA4uJit0ytWrXi2LFjbNq0iTVr1pCVlUVoaKjyXKrHWSwWAgMDmTBhAgAzZ85k7969mEwmZQxAamoqK1as4IcffgBcfa+OHTvG7NmzPZofrYdVTdWF/VA1j+rjiWDK6FHGzCKYMnr0Fk/rdyOfRxkzi2B6gyfq71/y4PbM+d2qrbYtXau2PHktLMu8iGaqnSeCqXlUp0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNHhWs/vWvfzFu3Djat2/vtkUduPpbffLJJ8ycOdMjg5eS0+nks88+o02bNsyaNYucnBymTZuGXn/hG2Ho0KEsXbqU5ORkkpKSyMnJ4YcffkCn07Fw4ULGjx9PTEwM48aNc/Po5+eH2Wzm2LFjFBYWMnr0aL777jtSU1Px9b2wZUjXrl0B+Oijj+jduzd9+vRhyZIlNby2adOG7777jqVLlzJixAjOnTvHunXr8Pf3V/prNWrUCJ1Ox48//kiLFi144okn+O9//0tBQQGVlZUK6+abb2bjxo28++67/OUvf7nsPUtLSzl69CiHDh0CoFOnTuzbtw+LxUKDBg2UcZ999hkpKSmYTK499o8fP063bt08KjJqPayurLqwH6rmUX08EUwZPcqYWQRTRo+e8rR+N2J4Iphq54lgXu8eRf39Cw2Fqcm9ycotISvPqvWlUylT7TwRTM2jHDwRTM2jHDwRTBk9yphZBFNGjzJmFsGU1aMs8qhgtXTpUkJDQxkzZgwNGzakWbNmAJw6dYpz587RokULlixZ4lZU0el0zJ071yPTRqORiooK3n//fUJCQgCw2+1MnDgRHx9XpODgYBYvXsyUKVNITk7GZDKRkJDAzp07GTFihNJ3KioqioKCApxOp7JtX7169SgqKsLpdDJ37lzi4uJ44YUXeOONN5QiU/369QEwmUzs2rWLvXv3ctddd5GTk8O2bdsUVkREBODqF7VkyRIiIyP5v//7P958802MRqOSyc/Pj4qKCoqKipg7dy6dOnXiL3/5C7NmzVIy3XDDDQCEhITwzTffYDKZGD58OFu3biUtLU25Z25uLqWlpZSWunpJ7dy5U7nPxQWrH374gby8PGUbQ6fTyc6dO9m4cSNjxoyp1dxoPawuLYNB/fuhah7VxxPBlNGjjJlFMGX06C2e1u9GPo8yZhbB9AZP5N8/gCA/AzfFRWCxlHnEqZYs8yKaqXaeCKbmUZ0eZcwsglkXPJ4rLKO43I7Z30B4iOe/pKwLmWX0KGNmEUwZPcqYWQRTVo/Xg8zma9TD6ujRo8CFQkpmZiYABoOBG264gYqKCmVMtX7fV6k2MhgMBAUFKcUqgN69ewNgs9mUY9HR0SxatEj5esGCBezcuZN27dopx5YvX87gwYM5fPgw/v7+ynE/Pz+2b9+ufO1wOJg2bZqy4qlRo0YAdO/e3W0LvZkzZ7Jt2zYCA11bi1RUVCj3adq0qTLuww8/pKysTPnax8eHevXqsW3bNuVYVlYWs2bNUjJV+/vb3/7Gs88+q4w7c+YMaWlpiqcbbriBzMxMDh065LbqrEOHDkrBDeC7774jPz+fhx56iJycHDp16sSCBQvwVNr+nJdXXdgPVfOoPp4IpoweZcwsgimjR0954cEBxDYL4fCpwhrn4pqHaP1uVMRUO08E83r3eC36TXnq8VrwRDBl9ChjZhFMGT3KmFkEU40eS8oqmb/2gNv2s+1ahpE0qC0mf98rXHlt/F0LpoweZcwsgimjRxkzi2DK6lEWeVSw+vbbb73l46pkt9spLCx0609VXVy6eNXS71W9UungwYMMGDAAgMrKSjIzM7Hb7ZSXl+Pv749Op6OiooKMjAxle7yffvoJp9OpbAuYnZ0NQEZGhts9qgt01aub/Pz8lOurC1ZFRUUUFBS4bTH4RzKVl5cDcOzYMbd7pqWlKZ6io6MxGAzKPXv06AFAeno65eXlbgVDq9XK6NGjqayspHXr1srz0aRJkyZNmjR5V5f9uI7zcic0adLkLSUNasu8Ne6/TGzTwvXLRE2aNGnSpMkTzV97gIMZ+W7HDmbkM2/NAcY9kPDnmNKkSZMmTZrqsOpkhcJms+Hv7+/Wn2r69OkEBwdTVVWljBs5ciRZWVls3rwZcBWRDAYDixcvJjw8nNatW7Ny5UqluNStWzeCgoKoqKjAbDYzZswYxo0bR1lZGdOnTycsLEwp+BQVFQFw4sQJOnbsSGVlJWFhYZw7d87tvN1uJygoiEmTJvHqq6/i4+NDYGAgfn5+SgGqOpOvry+JiYlUVlYSHByM1Wp1y1TN/O6770hISMButxMaGkpubq7beYPBgNlsJikpCXAVzXx8fNxWpAHceuutWCwWwLVKCyAmJoa3336bu+++u9bz40mj6etVdaGBn+ZRfTwRTBk9yphZBFNGj97inT1v5dAlVlcBHDpVSJ6lnEZhgbViyzgvIphq54lgyuQxOMiP54Z1IrewDIsXt2u62JvaMotkyuhRxswimDJ6lDGzCKZaPZ49b3X7MES1HE7Yn56vvcZTAVPtPBFMzaM6PcqYWQRTVo+yySsFq8rKSnJycrBYLG5bzlWrbVvvfnpRp9Nx//33c+TIEaU/1ZAhQ0hJSXEb53A4sNvtbsf0ej1PP/00CxcuJD8/X1lZZLPZeO2116ioqOCll16iefPmtGjRgnHjxuHj48Mdd9xBTk5OjXw6nQ6TyURBQQEWiwVfX1+3bQkdDgfl5eXUq1ePqqoqysvLsVgsNGzYkJycHDdWeXk5ERERFBQUUFpaSnl5udKX6mIZjUYCAwMpKirCYrEQGBhIScmFhtJVVVWUlpYSEhJCaWkpFRUVlJWV0axZMxyOC0sRq4tVv9e4ceNqXbDS63UeN46/nlUXGvhpHtXHE8GU0aOMmUUwZfToKe9EdskVz1ttDo//7ZRxXkQw1c4TwZTJo8jXqGrNLJIpo0cZM4tgyuhRxswimGrzqL3GE8MTwVQ7TwRT8ygHTwRT86hOnkzyqGBlsVh48803WbdundLb6WI5nU50Oh2HDh3y5DY1ZDabMRqNbv2pANavX+9W4Fm6dGmN6yorK3nkkUeU1Ufz5s3j/fffR6fTcccdd+Dv78+cOXM4efIkS5YsISIiQrl+6NChSr+ugADXN12/fv2YOXMm4Fol1bdvX3JychQf58+fp6qqihUrVhAVFQXAtm3bGDVqlJtXvV5PgwYN3Ipuzz77LBs3blTGVRebnnjiCZKTkwEoLCwkMTERQBlXWFiIw+Hgyy+/VFZVffLJJ7zyyivExcUp/K5duxIYGMi8efMYPny48mdP5HA4sVhKPWJcjzIY1N3QFbzv0ds8EUy180QwZfQoY2YRTBk9eosX6Hvl/p0mo56CAmut2DLOiwim2nkimDJ6lDGzCKaMHmXMLIIpwqO33xfVhcyaR/V41F7jyedRxswimDJ6lDGzCKasHq8Hmc0Bf3jVmUcFqxdeeIHvvvuOAQMG0KFDB+rVq+cJ7g8rKipK6dtUreLiYnJzc5Wi0OWuA1c/p9jYWABSUlJo1KgRdrsdf39/ADp06EBmZibbt29n8ODBgKv4lp6eTs+ePQHIy8sDUApY4Fr51K5dO3JycpR7FRYWAlC/fn1lXM+ePfH19cVkcn3SxmazUVVVVWM11W233cb69euVolP1doPVvbAAQkJCaNGiBUePHlWOFxcXo9fr3Xj9+/dn4sSJSkaR0hrKXV5qb+gKdaPJoNo9yphZBFPtPBFMzeP1yQsPDqBdyzAOZuTjuGihtl7n6qPTwOzvsV8Z50UEU+08EUwZPcqYWQRTRo8yZhbB9AZP9PsiNWYWzdQ8Xr2013hieCKYaueJYGoe5eCJYGoe1cmTSR4VrLZv387w4cOZMGGCt/z8ISUmJjJ37lyGDx9OamoqJpOJ2NhY9Hq9UlC6lDp16kRQUBBTp07l1KlT5OfnU1VVhZ+fH4MGDVLG9enThw0bNvD+++8zZcoUfH19iY+Pp7CwkFtuuQWA06dPA65+Utu2bSM9PZ3IyEiqtyBs0KAB4NrmT6fTMXbsWI4cOYLVaqV9+/bY7XalwHfq1CmcTicZGRlumZo3bw5Aw4YNAVfxy2AwsGzZMmbPnk1WVhYtW7YkJycHHx8fjEYjACUlJVRVVTFw4EBOnz5NWVkZn3zyCYBbH6tz585x6tQpYmJiADAYDMydO5cnn3zSswnSJExaQ1dNmjRpqptKGtSWeWvcf7HWpoXrF2uaNGnSpEmTpquT9r5Ik1qkvcbTpEmTJk2avCuPClYhISFKUeVaasCAAbzzzjscOnSIJ554gpMnT7J69Wpat27ttoXfyJEjycrKYvPmzQD4+fmRkJDAtm3b6Nu3L506dWL69OmUlpbSv39/5brbb78dnU7H2bNneeKJJygpKWHZsmWEhYURHx8PuLZDNBgMpKWlER0dzbhx49i8eTO7du0CoKioCH9/f8rKyqhXr56yWqt58+Z8+OGHOBwOgoKClLHg6gX2+0zg6pNVfU+TycTevXvp3LkzQ4cO5bPPPqOwsBC9/sKSutLSUnQ6HRkZGURFRXH48GEmTJiA0WgkLCxMGZebm0uLFi2Ii4vjxx9/xGKx8M4773Dw4EGmT5+ubHt4tfLx0ZrK/V5qb+jqLY8ieSKYaueJYMroUcbMIpgyevQmLzjIj+eGdSK3sAyLl7cuuvj/3pCan6Moptp5IpgyepQxswimjB5lzCyC6S2eyPdFas0skql59IypvcaTy6OMmUUwZfQoY2YRTFk9yiaPClZ/+9vf+PLLL3nwwQfdCiaitWHDBoxGI7GxscydOxeTyUTPnj3ZsWMHOTk5StHK4XAoK54AKioq2LNnD127dmX//v1Kv6jAwEA2btxIt27dAPj2229xOp00bNiQRYsW4ePjQ48ePdi2bRv79u1TilYOh4OoqCgMBgMzZswgMjKSZs2acerUKeWeVVVVWCwWevTowffff4/VaiU+Pp5ffvlF2VawWj4+Pm6Zunbtys8//6wUtMBVjIqPjycvL48ZM2bQsmVLwsLCKCgoUMbodDqeeuopsrOz+fLLLwHXNoIOh8NtnqZPn05ycnKN7RW//vprnn/+eZo0aXLVc6PX64Q2tK7rUntDV6gbTQbV7lHGzCKYaueJYGoer3+eqH8jZZwXEUy180QwZfQoY2YRTG/yMnNLOHooh8gGJiLDg7zGVXNmUUwZ5uVavC9S87yIYmoePZP2Gs+7UrtHGTOLYMroUcbMIpiyepRFHhWskpOTsdls3HfffQwaNIiIiAgMBkONcXfeeacnt6mhlJQUevbsyZw5c5RjFouFrl27uvWdWrp0qdt1u3fvxmq1MmHCBOLi4gDo3r07jRs3VopX1XwfHx8GDhzI+PHjAVcPq5tvvpnvv/+e+Ph4TCYTTqeT++67j8cff1y5dvLkyaxYsYKSkhK31V7vvPOOW0+prl27UlxcDKAcb9u2LcuWLVPG7N+/n/vuu0/ZflCn01FVVUVSUhJ9+/ZVxj366KP8+OOP2Gw2jEYjZrOZyspKpk6dyk033cSLL77IG2+8waBBg9w89O3blyNHjrg9o7/+9a8cPny4VsUqAIfDicVSWqtrr2cZDOpu6Are8SiSJ4Kpdp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYMrmsaSskrlfpJKadmG1TPuoMJ66tz2mgNr3IVJzZlFMmeZF5PsiNc+LKKbmUZ0eZcwsgql2ngim5lGdHmXMLIIpq8frQWZzwB9edeZRwSonJ4edO3dy6NAhDh06dMkxOp3usudqq7S0NO677z63Y2azmfDw8BqrhX5/HUBUVJRyLCoqCqvVSlZWFuXl5fj7+3Ps2DGqqqrcxul0Olq2bKkwTCbXp2d+v21eeXk5AJmZmURHRxMQEIDBYHArFDmdTioqKpSt/po1a3ZJ1rlz59yY/v7+wIX+WNWqqKgAXH21oqOjiYqKqvEcSkpKyM3Ndct0KeXm5l6y6Hg10hrKXV5qb+jqqcdrwRPBVDtPBFNGjzJmFsGU0aOMmUUwZfQoY2YRTLXzRDBl8ThndWqNPkQH0vOZvTrVK32I1JhZNFOGebkW74vUOC+imZpHOXgimDJ6lDGzCKaMHmXMLIIpq0dZ5FHBasKECRw4cICkpCTi4+OpV6+et3xdURaLBbPZXON4cHCw2/Z5J06c4LXXXmPPnj2YTCaaN2+O0WjEz89PGZOYmMjs2bNxOp1K36ns7GwAZs6cyaRJk4iLi+PFF19040dGRgLw0Ucf8dZbb+Hr60ufPn348ccfgQt9qerXr8/x48fp168fmZmZREZGcvvtt1NeXq4UhoxGIzqdjtTUVHr27InVaqVjx474+flhNBpxOFzf3E2bNgXgueee49y5c5hMJvr168eBAwfc7pmYmMgHH3zAxIkT2bBhAwBDhgwBoGfPnpd8pq+//jpLliwBoF27dlcxGzWl9bCqKW/tX5o8uD1zfvdpx7YtXZ929PS514U9W9XuUcbMIphq54lgah7V6VHGzCKYMnqUMbMIptp5IpgyedT6EKnTY12ZF1Hvi9Q6LyKZmkd1epQxswim2nkimJpHdXqUMbMIpqweZZNHBatdu3YxevRonnnmGW/58ZqKiooYOXIkLVq0YNasWeTk5DB58mSqqqrcxg0dOpSPPvqIiooKfv75Z2w2m9IPatSoUcTExLB8+XIefPBBfH19uemmmwCUXlBZWVncf//9hIeHs2DBghr86uJYfn4+ycnJ7N+/n4ULFxIREUF+/oUX106nk5KSEtq0aUNiYiLLly/n7NmzNGzYsAbrzJkzDB8+HHBte+jj4z6NgwYN4r333mPdunV07dqVrVu3Ul5eTkJCgrJN4a+//spHH33EHXfcgcPhYOXKlcr1r7zySi2futbD6n/J0/1LQ0NhanJvsnJLyMqzen0/eagbe7aq3aOMmUUw1c4TwdQ8ysETwdQ8ysETwZTRo4yZRTC1PkTqZMoyL6LfF6ltXq4FU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYNWjQwG2ru2sls9ms9H+6WEVFRYqfVatWYbVaef/99wkJCQFgx44drF+/nlOnTinb8AUHBzNy5EhmzZrFSy+9RGBgIE6nk6ioKB555BEAOnfuTOfOnamsrFT4qampANxxxx388MMP5Ofn06RJE2UrvupxJ06cwGAw0K1bN+bNm4ePjw9NmzbFYrEoY6pXdLVp04bCwkLeffddIiIiMBqNlJaWKuN2794NQP/+/Vm3bh1Wq5XmzZuTnp7uds9Vq1bRoEEDmjVrxvbt2wG47777ePHFF5VnFR4eTmVlJTNnziQvLw+n04ler6dv377Ex8fXem60HlaXlsHg3f1Lg/wM3BQXgcVS5lHfqovlbY/e5olgqp0ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpgyedT6EHmXea6wjOJyO2Z/A+Ehtf8FSV2aF/D++yIZv3c0j+r0KGNmEUy180QwNY/q9ChjZhFMWT1eDzKbr1EPq0cffZRVq1YxZMgQpafTtdClejQVFxe79WhKSUmhe/fuSrEKYMCAAaxfv55169aRnJysHC8pKaFx48Z8++237Nixg0ceeQSn88Jm2EajkQcffJClS5cq/Oq+XF26dOH9998HXKukOnfujNVqJSoqCpvNRnZ2Nna7nddee00pKG3ZsoWnnnqKDh06ALBt2zbAVUSaP3++ct+kpCS2bt2q3PPEiROAq2A1Y8YMwLU9YpcuXTAYDMqWgf/5z38YNmwYTz75JKtXr+bFF19kzJgxGI1Ghd28eXMWLFjAypUrmTJlCq1atcJisRAeHn71E/I7aftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKYNHrQ+Rd5glZZXMX3vAbRu/di3DSBrUFpO/71Xz6uK8iGCqnSeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFHhWsbDYbPj4+3HnnnfTv359GjRpR3ZepWjqdTlmp5C0lJrp6NF3cy+qrr75Cr9crPZrS0tLo06cPjz76qNLD6u6770an05GSkqIUrCorK/n6669JTExUrgNIT0+nV69eWCwW4uLiiIyMxOFwcPPNNwNw8uRJwsLCmDNnDu+88w6+vr7ccccd+Pr6EhQURJMmTTh+/Dh2ux2dTsc999xDQUEBkZGRDBgwAIBWrVop9wwMDGT79u10796dsrIyOnbsiM1mAy70ncrMzCQoKIhXXnmF8ePHYzKZGDRoEL6+vjRq1Aij0ciZM2fIzc1l+fLlzJo1C7vdDkBycjIfffSRUlhMS0tj/vz5fPHFF4BrdZrFYqG8vNyrc6VJkyZNmjRp0qRJkyZNl1LSoLbMW+NebGnTwlVs0fTHNH/tAQ5m5LsdO5iRz7w1Bxj3QEKtmNq8aNKkSZMmTZo0afqz5FHB6s0331T+vGzZskuOEVGwGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOVXo0FRUV8fnnn+Pr68vs2bPJyclh2rRpGI1G9u3bx+LFi2ndujUrV66ksLCQUaNGAa4VS3q9HofDVQF98skn2bJlCxs2bABQthIsKiqioqICq9VKYmIi7du3Z9GiRVitVmJiYpQx4Fp5lZ+fz4gRI8jJyWHOnDkAJCQkuN3TbrcrWxR+8cUXZGRkoNPp3DL5+PhQUlLCPffcQ8OGDVmyZAmVlZXceOONAOTl5QFQUFBAdHQ0gYGB/Pbbb+zevZvBgwcze/ZsWrVqxY8//si6desA10oug8HABx98wMaNG7n33nvp0qVLrefHkya316vqQgM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKZvH4CA/nhvWidzCMixe2M6uWmrO7E3m2fNWt6JStRxO2J+eT56lnEZhgVfNrSvzIoKpdp4IpuZRnR5lzCyCqXaeCKbmUZ0eZcwsgimrR9nkUcFqy5Yt3vJxVQoODmbx4sVMmTKF5ORkTCYTQ4YMYezYscoYp9OJ0+kkLCyM3r17A2C325k4cSKxsbEsXLiQ/Px84uLiWLBggbKdXlVVFQ6Hg4cffphz584xf/58DAYDRqNRWfEEUFpaitVq5ZVXXmHlypXs2LGD0NBQrFYrfn5+bn5btWpFYmIi//3vf7FarYSFhZGfn09AQIDCKikpYcyYMfz666/MnTuXgIAA9Hq929aETqcTh8PB//3f/7Fs2TKysrIICwsjJydHWTlVXWjT6XQcPXrUzUdGRgarVq3ipZdeIi4ujqqqKgDmzZvnluvhhx/myJEjtZobvV7ncRPe61l1oYGf5lF9PBFMGT3KmFkEU0aPsmXefeQcR346SWzzMDrGNPQaV7bnKIIngimjRxkzi2B6kyfq/YOaM3uDeSK75IrnrTaHR8+2rsyLCKbaeSKYmkc5eCKYMnqUMbMIpoweZcwsgimrR1nkUcGqcePG3vJx1YqOjmbRokWXPa/X62ncuDFff/21cqx///5MnDiRiIgI1qxZc8nrqldFDRw4UOkxBa5+XT/++KPSh6qqqop69erx0EMP8dBDDwGuglLbtm0pKysDIDDQ9Wm2zp078/zzz/P8888DsHLlSiZNmkRlZSXgWmEFMHz4cJ5++mnlnv369ePMmTNumSIjIxkxYgQjRoxQru3SpYvCqPY3fPhw5X4AI0eO5KeffqJHjx4AzJkzh4EDB/Lyyy8rY+655x6ys7MZP348DocDvf7qK8EOhxOLpfSqr7veZTCov4Gf5lF9PBFMGT3KmFkEU0aPsmXOyS9l8sc/U1JWpRwLCvBh0mNdaRh69Z/QF+FRFFPtPBFMGT3KmFkEU0aPas0c6Ku74nmTUU9BgbVWbJDnOdYlngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCKavH60Fmc8AfXnXmUcGqWqWlpfzyyy9kZmYCrkJWly5dlIKNWqTTXfkF/dWOvdyYi1dFeerjallNmzbFaDTidDqpqqqitLSUb7/9ll27dgFQUVEBuHp0bdu2jbVr19ZgvvXWW9x+++1ER0f/YZ8XS2sod3nVhQZ+mkf18UQwZfQoY2YRTBk9ypL598UqgJKyKiYt/Jn3/pHoERvkeY4ieSKYMnqUMbMIpowe1ZY5PDiAdi3DOJiRj+Oit416navnVAOzv1f8Xu/PsS7yRDA1j3LwRDBl9ChjZhFMGT3KmFkEU1aPssjjgtXSpUt55513KC0tdSuumEwmxo4dy8MPP+zpLWolh8NBZmYmw4cPJzU1FZPJRGxsLABms/my11WvUHrrrbc4deoU+fn5xMbGkp6eDrhWYPn7++Pj40NeXh6PPfYYe/bswdfXl/j4eOx2u1KoKy11rTTatm0bAwcOJD09ncjISIKCggDw9fV18zN27FiOHDmC1Wqlffv2nDlzRtni749mMhqN3HjjjaxatYqPP/4YcK3Muvvuu1m3bh1t27oa5b799ttK8QpgxowZ7N27F6PRyIcffkhkZGStn73Ww6qm6sJ+qJpH9fFEMGX0KGNmEUwZPcqUed+JvBrFqmqVlFVx6FQB7aPq14ot03MUxRPBlNGjjJlFMGX0qObMyYPbM+eLVFLT8pVjbVuG8dS97T1+XybTc6wrPBFMzaM6PcqYWQRT7TwRTM2jOj3KmFkEU1aPssmjgtV///tfXn/9dRISEhgxYgRRUVEApKWlsXTpUl5//XWCgoL461//6g2vVy273c6hQ4d44oknOHnyJKtXr8ZoNCq9o8C1VV5WVhabN28GwMfHB51Ox86dO+nbty+dOnVi0aJFypZ71aruMfXLL7/w+OOPU1JSwrJly/D19aVevXpuYzMzM4mOjmbcuHFs3rxZWe1UrcDAQHx9fdm+fTuDBw+mefPmfPjhh1RVVdXYlu+PZKpXrx7l5eU0atSI7OxsmjRpwrp16+jQoQPNmjUDICEhQRn//fffc/jwYQCioqK4+eaba/nEtR5W/0t1YT9UzaP6eCKYMnqUMbMIpoweZciclX/6iuczz5eS2LmZR/eQ4TmK5olgyuhRxswimDJ6VGPm0FCYmtybrNwSsvKsRDYwERke5CV3LsnwHOsaTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9/PHHdOnShUWLFmEwGJTjsbGx3HXXXTzyyCN8/PHHf0rBys/PD5vNRmxsLHPnzsVkMtGzZ0+2b9+Oj8+F2A6HA7vdrnwdGBiI0+nkpptuYv/+/aSkpBAbG0tpaSklJSXKCqzqa6vz+/j40KNHD7Zt26asNKse26hRIwwGAzNmzCAyMpKYmBiOHDminDcYDFRWVtKjRw++//57rFYr8fHx7Nq1Cz8/v6vO9PHHH7Nz507eeustsrOzOX/+PG3atOH48ePY7Xa3ubLZbIwfP17pp3Vx4as20npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGmzJFhV/73v3H9wFr3QZHpOYriiWDK6FHGzCKYMnoUkflcYRnF5XbM/gbCQzz/hUaQn4Gb4iKwWMo86lt1serCc1S7Rxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vB5kNl+jHlbp6ek8//zzbgWQahkMBvr168ebb77pyS1qLYPBQFhYGMuWLVOOZWVlcdttt2Gz2ZRjS5cudbuuqsq1Hc6wYcMYMGCAcnzw4MEcPnwYf39/wNVbys/Pj4ULFypjHA4Hbdq0UYo/jRo1AqBdu3bMnj1bGTdz5kyOHDmibB1YvTXfq6++StOmTZVxvXv3pqys7Koz6fV6unfvzueff05MTAzJyck0aNCA5557jvz8fMLDw5WxY8aMwWKxMG3aNF544QXuuOOO//Fk/7e0/Tkvr7qwH6rmUX08EUwZPcqYWQRTRo8yZG7TPIygAJ9LbgsYFOBDXLNQj/3K8BxF80QwZfQoY2YRTBk9eoNXUlbJ/LUH2J9+YQu/di3DSBrUFpO/r6cWpZwXEUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LIo80U69Wrx5kzZy57/syZM0q/pmstu91OYWGh21Z+27dvB1x9ni6n6pVKBw8eVI5VVlaSmZmJ3W6nvLwcAJ1OR0VFBRkZGcq4n376CafTqfSmys7OBnAbA3D06FHgQo+r6lVUP/30kzKmqKiIgoICLl79VdtMALt27SIoKIjQ0FDl2NKlS9m6dSsPPfQQ99577xWv16RJkyZNmjRd33p55E0EBbh/likowIeXR970JznSpEmTputP89ce4GBGvtuxgxn5zFtz4E9ypEmTJk2aNGnSpEmTeuTRCqtbbrmFZcuW0a5dO+6++263cxs2bGD58uXcc889Hhm8lE6cOMFrr73Gnj17MJlMDBo0iH/+859uRRubzYa/vz/JyckkJSWRk5PD9OnT8fPz44svvuDzzz8nLi6OqqoqLBaL0sOqtLQUg8HAggULWLRoEUajkZCQEGWlU1FREf7+/uh0OgIDA7nnnntwOByEhoZis9kICwtDp9MpY6v9dujQAYfDQcOGDcnMzHQ7b7fbMZlMvPLKK0yePJmAgAACAwPx8/NTCmTVmQwGA7169cJut9OgQQMsFgvBwcHKyrDDhw/z1ltv0bZtW1JSUgB46623cDgcjBgxQinIzZ8/nxkzZqDT6fjkk0+U/BkZGWRnZyurw2ojT5v7Xo+qCw38NI/q44lgyuhRxswimDJ6lC3zDQ2CmPPsrRzMyOdUrpVm4SbatAjzmCvbcxTBE8GU0aOMmUUwZfToLd7Z81a3lVXVcjhhf3o+eZZyGoUF/qkeRTJl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWj7LJo4LV+PHj+e233xg/fjzTpk2jRYsWgKvokZeXR1RUFM8++6w3fCoqKipi5MiRtGjRglmzZpGTk8O0adMoLy9n4sSJyjidTsf999/PkSNHSE5OxmQy0bp1a3799VcSEhL45z//yfLly9myZYvbFnl2u10pBtntdoqLizl37hyNGzd2WylVUVFBaWkpTZo0IS8vj6KiImw2GzExMTU8+/r6EhwczPnz58nNzcVkMlFSUqKct9lslJWVER4eTmlpKWVlZVitVlq3bs3x48eVcU6nE5vNRuPGjcnNzaWgoACbzea2aqpBgwbYbDY++OAD9HrXX4ywsDC6du1K//79lXHffvutwrTb7eTm5gLw6aef0rBhQ8aMGVOr+dHrdYSGmmp1rQyqCw38NI/q44lgyuhRxswimDJ6lC1zz1ATPb1GuyDZnqMIngimjB5lzCyCKaNHT3knskuueN5qc3j8XkrGeRHBVDtPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWIWFhfHFF1+watUqUlJSyMrKAqB169aMHj2aBx54QNnuzltatWoVVquV999/n5CQEMBVZJo8eTJJSUlEREQAYDabMRqNLFq0CHAVmHr06EFgYCBdunShe/fudO7cmX79+pGYmKjwq1c/LViwgNjYWAC2bdvGqFGj0Ol0BAcHA2C1WgkJCWHLli3Ktc8++yzffPMN0dHRAEovqwceeICXXnoJgMLCQm655RYAhZWZmYnD4WDt2rVKpk8++YRXXnnFrRgFEB0dzYYNG5Svhw4dyoEDBxRWSEgIZ86c4fHHH+df//oXMTExPPbYY4waNcqNU1hYyBNPPOF2vEuXLgBuha2rlcPhxGIprfX116sMBvU38NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR+8wzxWWUVxux+xvIDzE818WeMtfoK/uiudNRj0FBdZasevCvMjoUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwerweZzQF/eNWZRwUrcPVfGjlyJCNHjvQU9YeUkpJC9+7dlcIOuAosr7zyCtu3b2fw4MEAREVFkZaWpozZvXs3JSUl6HQ6oqKiAFffpzvuuEPZDg8u9J26WD179sTPzw+j0Yi/vz82mw2r1UrDhg3dxvXv35/169fToEED4ELvqotXcIWEhBAbG8tvv/2m+MjLywNQthKsZk2cOFEpRJ0+fRqHw1GjJ9jtt9/Onj17aNasGQA//vgjmZmZjBgx4orPMT09nQ8++IAPPvigxrm7776bffv21brYqDWUu7zqQr2YNyQAAQAASURBVAM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmsnUrKKpm/9oDbtnvtWoaRNKgtJn/fP91feHAA7VqGcTAjH4fzwnG9Dtq0CKOB2d/jZ6rGeRHNE8FUO08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYFRYWkp2draxE+r2OHDlCo0aNlKKLN5SWlsZ9991Xo49VQEAAx44dU8YlJibywQcfYLFYMJvNSvFKr9eTkZHBrbfeSn5+Pg0bNiQrK4vy8nL8/f0pKirCx8eH//znP+Tk5LBt2zZ8fHyorKxUCk+nTp3C6XRy7tw5Vq1axYoVK0hPT6devXoANGnSRBnn6+vL1q1bKSwsZO3atVitVsVH9bjCwkJ0Oh3Lli3j119/Zc+ePQQEuD4JWL9+fSU3wNGjR1myZAnLly8nKysLk8m1ZUT1dox79+4lJCSEn3/+mUmTJgEwffp0NmzYwNy5c5Ui25IlSwCoqqpi06ZNfPPNN5w/fx69Xk/Xrl3x9fX8DZ0mTZo0adKkSZMmTZo0XSvNX3uAgxn5bscOZuQzb80Bxj2Q8OeY+p2SBrVl3hr3olqbFq6imiZNmjRp0qRJkyZNssujgtUbb7xBeno6n3766SXPv/LKK0RFRTF16lRPbuMmi8WC0Wis0cfq5ZdfJiUlheeffx5wbZW3dOlSkpOTSUpKYseOHQC0b9+ehQsXMn78eGJiYnj66adxOp0cOnSIjh07UlJSQlxcHMuWLaNBgwaMHj2a7777jn379mGxWABXHy1wrZx65ZVX6N27N3369GHx4sWAazVUtdcGDRqwZ88eUlNTGTFiBOfOnWPdunUAFBcXU69ePUpKSmjTpg2zZs2iefPmPPHEE/z3v/8lPz+f8+fPu93TYDDw+uuv85e//MXtnunp6XTt2pXc3FysVivPPfccRqNR8bl//37++te/kpKSgo+PD926dQMgISGBsrIy5fneddddhIWFKf2vaiMfH62p3O9VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM1j7Zlnz1vdikDVcjhhf3o+eZZyGoUF/mn+qhUc5MdzwzqRW1iGxcvbFl78f29Ilu+dusQTwdQ8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9SibPCpY/fTTTzz44IOXPX/bbbexatUqT25xSe3du7dGH6t3332XEydOkJOTQ0REBMHBwSxevJgpU6aQnJyMXq9Hr9dz7NgxHnvsMR555BEAIiIiKCkpYeXKlXTs2BEAk8mE0+nE6XQyd+5c4uLiiI6O5sSJE+zbt0/xUb9+fSoqKti1axd79+7ltttuY/369WzatEnpWeXr64tOpyMsLIwlS5YQGRlJnz592LJlC6tWrWL06NGAq5+VwWCgqKiIuXPn0qlTJ4qKisjIyCAnJ0e5p8lkwt/fn2+++QaTycSAAQNYu3Ytn3/+OQ888ABOp1PpnVVeXg5Abm4uAOfPn+frr79mwIABAOzYsQObzcb06dOx2Wy89NJLHDhwgK+++qrWc6PX6zxuFHw9qy408NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIprd4u4+c48hPJ4ltHkbHmIb/+4KrkFozi2SqzeOJ7JIrnrfaHB6/T/FmZlHvmdQ2L9eCJ4Kpdp4IpuZRDp4Ippo9ZuaWcPRQDpENTESGB/3vC/6g1JxZFE8EU0aPMmYWwZTVoyzyqGCVn59PaGjoZc+HhIQoK4S8JbPZzPHjx2v0saouMF3cxyo6OppFixYBsHz5cl599VWsViv9+/dXrnvkkUd4+eWX+eWXXxT+6dOniY2NZc2aNcq4oUOH4uvry/fff69cf/ToUZ577jml+JWRkcH69es5d+4cZ86cwWw2k5+fj9PpZN26dcrWiDNnzuSHH34gJSWF0aNHK1sW3nLLLcyZM0e5Z69evZRM1VsD5uTkMHv2bPr27QvA9u3bWbt2LQcOHMBms2E2mwFXYWvXrl1KX6z33nuPDz74gJSUFKVg9emnn9KtWzcGDRoEuFafDRo0iM2bN9OvX79azY/D4cRiKa3VtdezDAb1N/DTPKqPJ4Ipo0cZM4tgyuhRxswimDJ6lDGzCKa3eDn5pUz++GdKyqqUY0EBPkx6rCsNQ2u34sbbHkXxRDDV6jHQV3fF8yajnoICa63Yas0skieCKaNHGTOLYMroUcbM3mSWlFUy94tUUtMurLxtHxXGU/e2xxRQ+xYYas4siieCKaNHGTOLYMrq8XqQ2Rzwh1edeVSwCg8P5+DBg5c9f+DAAcLCwjy5RQ1FRUWxd+9eoqKilGPFxcXk5eVRr149pdfTpa671J/T0tIICQnh7NmzlJeXExUVxbFjx+jQoYMyxul0kp6eTv369UlLS6NZs2b4+PhQVVVVg3Xxn6OioigpKSEsLMytj1daWprC+qOZunTpopxr2bKlG6vay+nTp7nxxhsBaNy4sVKsulgXe6xeFfb666/zxRdfUFFRgU6n47fffqt1wQrQGspdQXWhgZ/mUX08EUwZPcqYWQRTRo8yZhbBlNGjjJlFMD3l/b5YBVBSVsWkhT/z3j8SPbUHqC/ztWCqzWN4cADtWoZxMCMfh/PCcb3O1SOqgdnfY79qy3wteCKYMnqUMbMIpoweZczsDeac1ak1ehoeSM9n9upUr/Q0VGNm0TwRTBk9yphZBFNWj7LIo4JV3759WbFiBYmJifTp08ft3DfffMPq1asZOnSoRwZ/r8TERH799VelPxPAV199hV6vp0GDBkqvp9+rU6dOGI1Gqqqq8PPzA6CyspKvv/6atm3bsn37doqKikhMTHRbWQWurfMKCwuJi4tj+/btdO3aVTkXEHBhed+GDRto0aIFGRkZFBUV0atXL3Q6HXq9nvnz57NixQrOnz9PVVUVHTp0YP/+/W6ZbDYbY8aMYdu2bTidrndZYWFhFBUV0bRpU8LDw8nNzWXfvn2MHTuW9PR0dDodkZGRnDp1yu2e586d49FHHyU1NZXi4mJat26tsKqVk5PDsmXLajyrFStWMHbsWOU5Xa20HlY1VRf2Q9U8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIpjd4+07k1ShWVaukrIpDpwpoH1W/1nw1ZhbNVLPH5MHtmfO7T9S3ben6RL0n71HUnFkUTwRTRo8yZhbBlNGjjJm9xawrPQ1FMTWP6vQoY2YRTFk9yiads7oyUgsVFxfz0EMPcfz4cWJjY5XVPceOHePw4cNER0ezYsUKZZs6b6ioqIiuXbvSpEkTJk+eTE5ODtOmTeOee+7h559/pmPHjkyZMoWRI0eSlZXF5s2blWsfe+wxtm/fzoQJE2jdujUrV65k27ZtjBs3jilTppCSkkJYWBgdO3YkMDCQN998k7KyMqZPn05UVBS//PIL/v7+vP3226xdu5a1a9fStGlTXn/9dXbu3MmcOXOYMmUKL730Em+99Rb33HMP/fv3Jz09Hb1ez5AhQ/jtt9/IyMjA4XDgdDo5cOCAksnHx4eIiAhuv/12/vOf/ygrpP7yl78wZcoUJk+ezIoVKwBXfzCA77//HofDVa1duXIlnTp14rbbbiMrK4umTZvSsGFDZWvA2267jbS0NDZt2sTTTz/NN998g9PpJCkpidLSUtatW0dgYCBnz57ltddeY8iQIVc9P06n85IruzRp0qRJkyZNmjRputZa+fVhVmw6ctnzD90Vw4N3xl5DR5quhbJyS8jKs3q9Z4kmTZo0aVK/fj2Uw+SPfrrs+Vcev5mb4iKuoSNNmjRp0nQ18miFVb169fjkk0/46KOP2Lx5M5s2bQKgWbNmPPXUU4waNYrAQM/2hf+9goODCQ4Oxul0kpycjMlkYsiQIYwdO5Y+ffooW+85HA7sdrvbtbfffjvbt29nwYIFFBQUEBcXx4IFCzh27Bg6nY7g4GB8fX2Jjo6msLCQcePG4ePjwx133EGjRo3YsWMHXbt2pXfv3txwww2sXbuW06dPM2rUKCIjI3nttde46aabFJ8APXv2JC0tDaPRyJo1a+jUqRPTp09n2LBhVFVVKWMDAwMpLS0lLy+PDRs28OCDD9KtWzeSkpKoqKgA4NZbb2XFihX4+vqybds2WrZsyZw5c/j444/ZuXOncs927dpht9ux2Wzs3bsXgOeee44DBw4oY+Lj49myZQtOp5MlS5bQpEkTHnroIUaNGsU999zD8ePHazU/Wg+rS8tgUP9+qJpH9fFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9AYvMuzKzY4b1w+sdV8jUGfm3+tcYRnF5XbM/gbCQzxv/lwXvneC/AzcFBeBxVLm0fyK8ieCqXlUp0cZM4tgyuhRxszeYmo9DTWPavQoY2YRTFk9Xg+6Zj2sAAIDA3nmmWd45pln/ufYyspKfvvtN2JjY6lXr16t73njjTcSEhLC7NmzlWPFxcXk5uYqfaCWLl1a47ro6GgA5s+fT2zshU9Sbtq0icjISPz9/QGIiYnh6NGjfP/998qYhx56CIPBoFx3cR+rV199lcGDBwPw7bffAhf6ZPn6upo5fvjhh259qOrXr8/Zs2eVrwMCAtDr9ezatUs5ZrFYACgpKQGgSZMmAAwcOJCpU6cq47Zs2cLOnTvR612T3qpVK3bu3MnOnTv5+eefGTFiBF26dGH9+vW0bt0agL///e988cUXpKWl8e2339boNVZdJKuNtP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPMmYWwZTRo4yZRTA94bVpHkZQgM8ltwUMCvAhrlmoV7yqKXO1Ssoqmb/2gNu2SO1ahpE0qC0m/9o3nK/W9f69cy14IpiaRzl4IpiaRzl4Iphq86j1NBTDE8GU0aOMmUUwZfUoi67pZopFRUWMGDFC6d1UWyUmJvLjjz8qBR240MeqZ8+el72uU6dOBAUFsXHjRuVYdR+rxMREN/7hw4fJyMhQjh09epTKykpuueUWAIxGIzfffDO+vr6kpaUp4zZs2EB0dLRSXKreDvHYsWPKmKKiIs6ePYvNZqO8vBwAHx8fysrK3DJVr1ir3rWx+v9nzpxxy3Xw4EEATp8+rfgvKipix44dypisrCwOHjzolrO6+DZgwADi4uLo06cPb7zxBtnZ2bRt2/ayz1GTJk2aNGnSpEmTprqil0feRFCA++f0ggJ8eHnkTX+So2uj+WsP1Gg4fzAjn3lrDvxJjjRp0qRJk6Zro6RBbWnTwv2D2W1auD60oUmTJk2a1C2PV1hdrTxomaVo6NChLF26lOTkZJKSksjJyWH69OkMHTqUiIgL+9D+7W9/48iRI+h0OkwmE4MGDWLUqFHMnTuXsLAwpY9VYWEho0aNUq678847qV+/PnfffTcAjRs3pri4mKioKOLj4918bNu2jY8++ogVK1bQrFkzDh8+zMyZM9386nQ6pkyZwmuvvUaDBg0IDAwkICAAm81GUVER/v7+ykqs7t27o9fradq0KTk5OURGRiqrnYqKigDYuXMnbdu2JTAwkMjISKUYVn2+Y8eO3HjjjTz++OPK854yZQoxMTHceeedAOzbt4/z58+j0+koLCxUCmuLFi2iXr16SvbayJOGxter6kIDP82j+ngimDJ6lDGzCKaMHmXMLIIpo0cZM4tgeot3Q4Mg5jx7Kwcz8jmVa6VZuKnGL7FqK7Vm1hrOq5sngql5VKdHGTOLYMroUcbM3mQGB/nx3LBO5BaWYfHytrje8CeSqXlUp0cZM4tgyupRNl3zgpU3FBwczOLFi5kyZUqNPlbVKioq4uDBg+h0OubMmUNOTg7Tpk3jnnvu4emnn2bhwoXk5+crfayaNm2qXLto0SIKCwuJjo7m5MmTZGZmArit3qqsrOS9997Dx8cHo9FIeXk5R48epXXr1vTv318Zl5mZidPppHXr1pw9e5bz58+Tk5PDvffeyxdffKGMKywsRKfT0bx5c06fPs3JkycxGo20aNFCGWO1uvbYveGGG7Db7Zw/f56jR49y88038+OPPyrjvvzyS44dO0ZsbCzp6elUVFSQm5vLq6++io+Pa8o3btxIVVUV//znP9m6dSv79+9XimdWq5Xi4mICAq7+H3O9XkdoqOmqr5NFZrPnL5BE8kQwZfQoY2YRTLXzRDA1j3LwRDA1j3LwRDBl8tgz1MTl92LwTGrLfCK75IrnrTaHx6/ZZfreEcUTwdQ8ysETwdQ8ysETwVSzR1G/n1JzZlE8EUwZPcqYWQRTVo+yqE4WrMDVj2rRokWXPb9q1Sp8fX357rvvCAkJAcButzN58mS+++47kpKSLnldRUUF8+bNY9SoUYwbNw4Am81Ghw4d+PXXX5VxmzZt4tixY4SFhTF48GDGjx/Ptm3bGDVqFPv27VNWYu3evRuAzz77DD8/PwCeffZZtm3bhk6nIzg4mOzsbEpKSmjfvj3/+c9/AFcB67bbbiMjI4OEhAQAtm3bBsALL7xAv379APjkk0+YNGkS4CrkAbz33nv85S9/YcaMGezcuZMRI0YQExPDypUruf322wEYPXq00rfqiSeeAGDdunWMHz8egEOHDtGwYcP/NQ015HA4sVhKr/q6610Gg/ob+Gke1ccTwZTRo7ebzYtgyjgvoP7nKOu8aB7VxxPBlNGjiMwrNh/h8MlC2rQIYWjfGI953vKoNZxXN08EU/OoTo8yZhbBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOrzupswep/KSUlhe7duyvFKoD+/fvzyiuvsH37dgYPHnzJ63bv3k1JSYnbKimj0Uh4eDjp6elu/FatWnH8+HGioqIA1wqskJAQvv/+e+Lj47HZbEofrPT0dLeeUevXryciIgJ/f3+lEFW99R9ASEgIPXr0YOvWrQo/NTUVnU5HTk6OW6aJEycCEBUVxenTp8nIyOBf//qXW64ePXqwbNkybDYbRqNRKVZdrDZt2lz5of5BaQ3lLq+60MBP86g+ngimDB5FNJvXGth7h1fXnqMs8yKaKaNHGTOLYKqRt+vIOWZ/caEvb0Z2MRt+Os0zQ9qR0OrqP/T1e3nqUWs4Xzd4IpiaRzl4IpiaRzl4IpgyepQxswimjB5lzCyCKatHWVRnN1M8ceIEjz76KAkJCfTs2ZPp06djs9mU82lpaUqhp1pms5nw8HBWr17NrbfeSnx8PA888AC//fab23UAJpOJMWPG0LFjR7p27YrBYKC8vJxz584p4/z9/dHpdHz00Ue0b9+efv36YTabFcapU6ew2+34+/szceJEevbsSUJCAvPnzwcgLi5OYdWrV49jx44xdOhQJVNaWhpVVVXccsstAGRkZBAZGcny5cu56667aN++PQ8//DC+vr6EhITQpEkT5d4RERFMmDCBJ598EoAdO3ZQWVnJ6dOnL/k8s7Oz+etf/wqAXq/3WvFKkyZNmv4siWg2rzWw946056hJk6a6rouLVRfrvf9c+vifIa3hvCZNmjRp0qRJkyZNmuqa6uQKq6KiIkaOHEmLFi2YNWuW0p+qvLxcWW1ksVgwm801rrXb7fz666+88MILxMTEsHz5ch577DHWrFlD06ZNsVgsGI1GkpOTAZgxYwbl5eW88sorAIwZM4YxY8aQlZVFUVERTqeTm266iYkTJ/LTTz8xd+5czp49q/gEaNy4MXv37uXuu+8mNjaWjz76CIBOnTopXkNDQyktLeXw4cM8/vjj5ObmsmrVKgBle0GLxUJMTAw7duygbdu2jBs3jnXr1lFZWUmTJk3c7vnSSy+RnZ1Nr1692LRpE9nZ2QD8+uuvREdHAzBq1Ci6detGTEwMs2fPVgp+999/P+Hh4bWeHx+fOlsHFaa60MBP86g+ngimLB5FNJvXGth7h1eXnqNM8yKSKaNHGTOLYKqVt2zT4Sue//TbYzx0Z+22B/RmZq3hvHp5IpiaR3V6lDGzCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2VQnC1arVq3CarXy/vvv1+hPlZSURERExCWvq6iooKCggJiYGB555BEAOnfuTL9+/ViwYIHSC8rhcHDs2DE2bNigrNI6evQoc+fOpby8nOTkZGw2G/7+/tx44428+uqrANx888189NFHVFVVud03LS2Nvn37smvXLjZv3kxUVBRFRUUcOXJEGVNcXIyvry/dunVjwYIF+Pj40LhxYzIzM8nJyVEyHTp0iJtuugmLxcKMGTOIjIzEx8eHvLw8t3sePux6I71p0ybA1RMLYOXKlTzwwAMAtGzZks8//5ysrCxsNht6vR6Hw8E//vGPq50SRXq9TlhTy+tBdaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9BYvM7eEo4dyiGxgIjI8qNaco6eLrnj+8KlCVf0cqwsN53cfOceRn04S2zyMjjGeb6lYLbV+L4pkah7l4Ilgah7l4IlgyuhRzZm99XrnUpLpOdYVngim5lGdPJl0TQtW9erV44033uDGG2/0iPNH+lOZzWaKi4vdrtu9ezcOh8Ntuzuj0cgdd9zB5s2bAde2gVVVVbRu3dptS8EbbrgBgFtvvZWxY8dy3333ceDAAQYMGOB2j6ZNm5KWlsaZM2cIDg4GwOl0MnXqVOXrjIwM7rrrLo4dO6bcs6SkhMTERObMmaOwpk2bxscff6xkCgoKorCwkEcffZS+ffsq4zp37sy5c+ew2WzKPUwmE7t27UKnczVc3r59O4899hiNGzdWrnvppZew2Wx0794dgCeeeIL33ntPuaY2cjicWCyltb7+epXBoP4GfppH9fFEMGXxKKLZvNbA3ju8uvQcZZoXkUwZPcqYWQTTW7ySskrmfpFKatqF1Z3to8J46t72mAKuvm9e66bBZGQXX/Z8bLMQ1fwcE8H0Ji8nv5TJH/9MSdmFD/wFBfgw6bGuNAyt3Wpbb3sUwRPB1Dyq06OMmUUwZfQoY2YRTLXzvMn09usdER5F8UQw1c4TwdQ8qtfj9SCzOeAPrzrzqGD1yy+/XPG8TqfDaDTSqFEjGjZsiJ+fH/fee68ntwRcK5buu+8+t2PV/amqezhFRUUpf67WwYMHAejYsaPb8ejoaBYvXkx5eblSpGrY0P2Tfenp6RiNRk6dOqWc379/v1tRy+l0Kiud0tLSuPnmm9Hr9QQEBCiFpOpzALm5uYrXyspKt2ISQGZmJr6+vsr48PBwCgsLadmypTKmuLgYq9WK0+nk9OnTip/w8HC3wlNaWhp6vZ6cnBy3ezz66KNYrVY++ugjpT+Xp9Iayl1edaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9JQ3Z3Vqjb55B9Lzmb06lXEPJFw1b2if1nz9y5nLnv/b7Teq7ueYCKY3eL8vVgGUlFUxaeHPvPePRI/YoM7MopmaRzl4IpiaRzl4IpgyelRjZm+/3rmUZHiOdY0ngql5VCdPJnlUsBo+fPgfXo3TvHlznnnmmRorkmoji8VCZWUljz76KHv27MFkMjFo0CDMZrPSwykxMZEPPvjArZfV7t27AThz5gy33nor+fn5xMXFcdttt+F0OikqKqJTp07odDpycnIYM2YM27Ztw8fHh6qqKsLCwhR+mzZt+Pbbb9m5cydvvfUW6enphIaGYrFYAFcvKaPRqBSZ3nzzTdauXYvVasVkMhESEkJJiWtbpF69egFw/PhxJVNAQADFxcVumVq1asWxY8fYtGkTa9asISsri9DQUOW5VPv39/cnJyeHYcOGceDAAcrKyoiPj6dRo0Zuq84efPBB5ZmMGjVKOb569Woef/zxWs+P1sOqpurCfqiaR/XxRDBl8pg8uD1zfvcJs7YtXZ8wq+3PKRFMkGteoO48R9nmRRRTRo8yZhbB9AZPVN+8f94fzzuf7bvkcTX9HBPB9BZv34m8GsWqapWUVXHoVAHto+rXiq3WzCKZmkd1epQxswimjB5lzCyCqXaet5gi+wR7y6NIngim2nkimJpH9XqUTR4VrD766CPeeustbDYbf/vb32jWrBkAJ0+e5LPPPsPf358nn3ySzMxMPvnkE5599ln0ej39+vXzyLTT6eSzzz6jTZs2zJo1i5ycHKZNm4Zef+EbYejQoSxdupTk5GSSkpLIycnhhx9+QKfTsXDhQsaPH09MTAzjxo1j5syZynV+fn6YzWaOHTtGYWEho0eP5rvvviM1NRVf3wtLaLt27ao8g969e9OnTx+WLFlSw2ubNm34f+yde1xUdf7/n3NhuAxyk4siKkIqqJhSWmpSpl3UzI10czVvWV8qat1ca7VatXLT3G2r1aQ0De92WU0tzeymoWZ5SRHvgoKgqAwwMHKdmd8f/Dg6oW7CfOzg57wej31snPM5z/N6nc/AjLz5fN7fffcdS5YsYdSoUZw9e5Z169bh5eWF01nzp+XNmjVDp9Oxbds2IiMjefLJJ/nss88oLCykqqpKYd1+++1s2LCBd955hwceeOCK92zRogXHjx8nPz+f6Oho9u/fT0ZGBn379uXIkSMArFu3TilWvfbaa0BNQW/16tXEx8fXe260HlZXV2PYD1XzqD6eCKYMHgMD4fXk3uSdKyXvvM0te3iLYF4qGeYFGt9zlGVeRDNl9ChjZhHMhvBE9c3re3sb+t7ehgVr9/PL4bN0aR/KuAc71ddmHd3o8wKQZ8m56vncggsk3NKqQfdQW+brwdQ8ysETwdQ8ysETwZTRo9oyX48+wXDjP8fGyBPB1DyqkyeTGlSw+uGHH/D09OTjjz/GZDK5nBs+fDgjR47kl19+4fnnn+dPf/oTDz/8MPPnz29wwcpkMlFRUcGcOXOUPlZ2u50pU6ZgNNZE8vf3Z9GiRbz22mskJydjNpvp0qULO3bsYNSoUYwZMwao2Y6vsLAQp9OpbNvXpEkTiouLcTqdpKSkEBsby6RJk5gxY4ZSZGratOav/Wp7Re3du5f77ruP/Px80tLSFFZYWBgAQUFBLF68mPDwcF566SXeeOMNl2fm6elJRUUFxcXFpKSkEB8fzwMPPMDs2bOVTLV9tAICAvj6668xm82MHDmS77//nszMTOWebdu2paKiAr1ez8GDB4GaflibN29Wxqxdu1a599///neX57t7925iY2Px9r72byyth9Xl5e79S93NE8GU0aOMmUUw3c0rKS0HoLS0nEJj/Xv0XSpfTwO3xoZhtZbVu0/JpZJxXgAycwrJOW+jorwSbzfMTWPIrHmUw6OMmUUw3cET2TcPYOhd0Yx7sJP2flAPhQdd/d8aLZr6qKYXmEzzcqnOFpVRUm7Hz8tASEDDf+ki43MUkTkjy0LOeRutQsx0iAxqME/GeRHBVDtPBFNGj2rNLPrzjizPsTHxRDA1j+r1eCPouvWwWrduHU899VSdYhXUFGAGDRrEe++9x/PPP4+npycPPvggc+fObcgtATAYDPj6+irFKoDevXsDUFlZqRyLjo4mNTVV+XrBggXs2LGDTp0u/vXjsmXLSExM5NChQ3h5ebn437p1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWloaPj41S20rKiqU+7Rs2VIZN3/+fMrKypSvjUYjTZo0IS0tTTmWl5fH7NmzlUy1/v74xz/y17/+VRl36tQpMjMzFU9RUVFs376dHTt2sHr1aiZPnswdd9xBamoq7dq1A6B///5s2bLlss/3n//8JxkZGS4rz65F2v6cV1Zj2A9V86g+ngimDB5Ly6qYtzbDZWuETm2CSBrcEbNXw5rO1kptma8H0x28/MIL/GPxTpetoHy9jfx99K2EBNR/q4paqTGzaKbmUQ6eCOaN7vF69M1rqMfrwRPBbCivQ+sgfL2Nl90W0NfbSGyrQNX1ApNhXkD8ZyhZnqO7eY3t85MIpoweZcwsgql2XkOZ2ucdcUy180QwNY/q5MmkBm2mWFZWxvnz5694/ty5c1y4cHG1TZMmTVy27auv7HY7RUVFSr8oQCkuXa54VqvalUoHDhxQjlVVVZGbm4vdbqe8vOav8HU6HRUVFZw4cUIZ9+OPP+J0OpVtAc+cOQPgMgZgz549AIwdO5ZevXopK5x+/PFHZUxxcTGFhYXY7XaXTAUFBSQkJNC5c2ceeeQRVqxY4ZKp1t/q1avp2rUr3bt356WXXuLYsWMunhISEiguLqZfv368/PLLACxatIgDBw6QkJAA1BT4/vCHP6DT6TAYDC4Znn32WZKTk6/4HDVp0qSpMWje2ow6TWcPnLDw/pqM38mRplr9+pctUNOv5LVFO38nR5o0abqRlTS4Y51VCB0ia375run31d9H34qvt+vfUNb+Al7T7yftM5Q6pX1+0qRJ09Wkfd7RpEnTjaIGrbC67bbbWLx4MV26dKFPnz4u57799lsWL17M7bffrhw7ePAgLVq0aMgtgZpVVF5eXi79qWbNmoW/vz/V1Rc/wI0ePZq8vDw2bdoEwIULFzAYDCxatIiQkBDatWvHihUrlJVOxcXFeHl5odPp8Pf359lnn2XChAmUlZUxa9YsgoKC0Ol0yliAzMxMpk2bpqxY2rFjh3LvyMhIpk2bhoeHB7NmzUKv1xMWFsb777+Pp6enUoCCmmKU0+nE09OTZ599lvXr1zNv3jyaNGmiZLJYav7RcO7cOQYMGECbNm1YuHChi39AKYRZLBbuvPNOvv32W9577z1atGjBvffeC0BISAjR0dGYTCY6d+7Mbbfdxvr168nMzOT48ePcdNNN9Z6fhjSavlHVGBr4aR7VxxPBlMWj1nRWvR73HT9/2b+mh5pfuhzMLiQuqmm92GrNLJKpeVSnRxkzi2C6i+fv68kLI+I5V1SG1Y3bm13qTW2ZRTLdyWse7Mvcv97FgRMWss+5d4uzS/9fbTwRTHfxRH6Gkuk5upvXmD4/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lE2NahgNWXKFEaNGsXTTz9NWFiYsuVdTk4O+fn5hIeHK/2RKioqOH36NEOHDm2waZ1Ox9ChQzl8+LDSn2rIkCF1trhzOBwuq5gA9Ho9zzzzDAsXLsRisRAbG8tTTz3F22+/7cJPSEigoqKCCRMmYDQaueeee8jPz1d6WNXqr3/9K2vWrOHTTz/FbDbj4eFBVVUV7du3Z9CgQaxatYpdu3aRmJjIm2++ic1mIz4+nkceeYTFixcrz8bpdBIXF4evry9z5szBx8cHX19fpUAGsG/fPgCeeOIJvvrqKzZt2kRYWBinTp1y8ZSSkkLnzp1p27YtX3zxBQChoaEYjUZllRnA999/z913361kb9GiBZMnT2bz5s1UVVUpq8muRXq9zi2NHG9UNYYGfppH9fFEMG90j1rTWXHMhvLyLDlXPZ9bcIGEW1o16B5qy3w9mJpHOXgimGr2mHuulCMH8wkPNhMe4ttgnsjPqDLNiwher0AzvdxGuyg1ZxbFbCjvenyGkuE5upvXGD8/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lEWNahgFR4ezrp161i5ciVpaWnk5uYCNb2jRo8ezSOPPKL0cvL09GT+/PkNdwz4+flhMplc+lMBfP755/j7+ytfL1mypM51VVVVjBkzhqSkJOX4xx9/rKyqqh3ncDiYPXu2y/XDhg2jefPmAMrY9u3bs27dOgBGjBiBh4cH27dvV8536NCBXbt20aVLF1599VWF9dZbbyljdu/eDUC7du14/fXXlTEzZsxg8eLFyrjMzEwAEhMTmThxIgBOp5NbbrkFm82Gv78/lZWV7Nixg4kTJzJmzBhuvfVWJk+ezMSJE3nhhRc4deoUERERHD9+nF27drF8+XLlfomJiXh7e/OXv/yF7OxsoqOj/8dM1JXD4cRqvfC/B0omg0H9Dfw0j+rjiWDK4lFrOqtej+FBV//Q1qKpT73nRq2ZRTI1j+r0KGNmdzJLy6pIWZ1OeubFVR5xUUE8/VAcZu/6989Rc2ZRPBFMGT3KlFnkZyiZnqO7eY3p85MIpoweZcwsgql2ngim5lGdHmXMLIIpq8cbQX5+3r951VmDClYA3t7ejB07lrFjxzYU9ZsVFRWlFG9qVVJSwrlz54iKirrqdQBZWVnExMQoxzMzMwkPD8fLy0sZd+TIEZdrnU4nWVlZ9OpV87d/rVq1wsPDg8zMTHr37q1wbr31Vpd71d6ntpfVpfesHVObpbCw0GVMixYtcDgcysq12vOXXqvT6QgMDKSsrIyWLVuSnZ1NVVVVnecQGRmpXBsREcHevXuBmm0SH3roIQ4fPkxoaCjdunW74vP7rdIayl1ZjaGBn+ZRfTwRzBvdo9Z0VhyzobwOrYPw9TZedlsbX28jsa0CG+xXbZmvB1PzKAdPBFONHueuSq/TPycjy8K7q9KZ8EiXBrpTZ2bRPBFMGT3KkPl6fIaS4Tm6m9cYPz+JYMroUcbMIphq54lgah7l4Ilgah7VyZNJDS5Y/R5KSEjgvffew2q14ufnB8CXX36JXq9XCkoAx48fZ/r06ezZswez2czAgQPx9fVlw4YNSiGpqqqKr776ioSEBOW63r17s2bNGu644w6sViuxsbEMHDiQoqIi7rzzTgBMJhNdunRh7ty5vP3223h4eGC1Wjlx4gTR0dFEREQAcMcddwDwySefsHLlSsLDwxk5ciRpaWk8/fTTAFitVgwGA1u2bKFHjx6UlZXRtWtXAgICAOjUqRNQ0+fKbDYzdepUJk6ciNlsZvDgwVitVvz9/TGZTEofq507d/KPf/yDnJyarQNqC2a158+fPw/Ak08+CdSsgDObzaxduxYvLy9atWpV7/nReljVVWPYD1XzqD6eCKZMHpMT45j7q7/Q79im5i/0G/pzSq2ZRTLdyZv2WHemLfzJ5Zcuvt5Gpj3WvUFzo+bMopiaR3V6lDGzu5ha/xzNoww8EUx38kR9hpLtObqb11g+P4lgyuhRxswimGrniWBqHtXpUcbMIpiyepRNDS5Y/fDDD3z66afk5ORgtVrr9HjS6XR8/fXXDb2Ni4YNG8aSJUtITk4mKSmJ/Px8Zs2axbBhwwgLCwNqCjODBw9Gr9fz7rvvkp+fz8yZM7nppptYuHAhQUFBtGvXjhUrVlBUVMS4ceMU/qU9oZ566im++eYbXn/9dW677TY6d+4M1BS6zpw5Q1FREQkJCcTFxfHuu+9y5MgRl35YtayKigrGjh1Lfn4+r732Gv7+/gwbNkwZ53DUVFz9/f0ZPXo0q1evZtu2bQAEBwcrYyorK7HZbAwaNIjQ0FAWL15MVVUVcXFxLs/o/fff59577yU6OppvvvmGadOmAXDmzBmgpvgFNYW3xx57DJvNxsqVK5Vj9elfBVoPq/+lxrAfquZRfTwRTBk8BgbC68m9yTtXSt55m9t6oFwqtWW+Hkx38AIDzayYPpA9h89y6KSFmNZBdG0f6gZ3NVJjZtFMzaMcPBFMtXnU+ueI4YlgyuhRlsyiP0PJ8hzdzWtsn59EMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkBhWsPvjgA958802aNm1K586dad++vbt8XVX+/v4sWrSI1157jeTkZMxmM0OGDOG5555TxqxcuRK73U5wcLCyZZ/dbmfatGk8/vjjLFy4EIvFQmxsLAsWLFC23auoqOCDDz7g0Ucf5ezZs8ybNw+DwYC3t7cyBmDjxo2cOnWKqVOnsmLFCrZv345Op8PpdNKiRQtlXEpKCkajkdjYWD777DNsNhvBwcF4e3vTpEkTZZzT6eTJJ59k7969pKSk4O3tjV6vx+FwKD2sKisrcTgcvPTSSyxdupS8vDyCgoLIz88nJCREeTZQU3T66quvXPgAhw8fBuDQoUPAxd5iOp2OiIgIjh07htVqpbS0FF/fa/9HidbD6vIyGNS/H2pj8LhuaxaHsovo0DqAgT3bNJjXGDJrHhvO8/U0cGtsGFZrWYP6Vl0qtWcWwRThMbp5E7q2D3Xb3DSGzDJ6PFtURkm5HT8vAyEB7vnQrvbn2BjmRa0etf45mkc1epQxM7j/M5SMz1HGz08imDJ6lDGzCKbaeSKYmkd1epQxswimrB5vBPn5XaceVosXL+b2229n3rx59V6RU19FR0eTmpp6xfNbtmyhT58+zJ07VznWv39/pk6dSps2bdi8efNlr9u9ezelpaUMGTKE2NhY5fiMGTPYtGmTC799+/YMHz6c4cOHAzBixAj27t3L5s2b6dy5M5WVlfz4449UV1czfPhwEhMTAfjmm294+umnOXXqFBEREVitVqBm+8BLi2733HMPZ86cUXprVVdX4+3tzahRoxg1ahRQs5Kse/fu6HQ1/7iv/f8HH3yQf/zjHwrr5Zdf5pNPPiE5ORmA0tJSJVefPn2UcV26dKGsrIzKysorPtv/JW1/ziurMeyHqkaPB05Y+NfKX5Sv048X8NG3x3lheBdiWgU10KE6M4tmyuhRxswimDJ6lDGzO5ilZVXMW5vhsr1bpzZBJA3uiNnLPZ8b1f4c1TgvonkNZWr9c8TwRDBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT8ygHTwRT86hOnkxq0GaKVquV++6777oXq36LMjMziYqKcjnm5+dHSEgImZmZV70OqHNtdHQ0eXl5ylZ6l+MnJCRgt9uVVUzZ2dlUV1fX6a0VHR3tci+73Y5OpyMtLU0ZU1VVRWFhIXr9xSkqLy+ntLSUEydOKMcyMjIAlNVQtVsQ1vasqlVtL6varQe9vb0xGo3KtoO1cjgceHh4EBTU8CKAJk3u0qXFqks1a/nlj2vSpEmTpt9f89ZmcOCExeXYgRMW3l+T8Ts50tRYlDS4Ix0iXT+LdoisKXZq0qRJkyZNmjRp0qRJk6YbVw1aYRUXF0dWVpa7vFyTjh8/zvTp09mzZw9ms5nBgwfzl7/8BZPJBNQU0/z8/Opc5+/vz48//shdd92lbAk4efJkunTpolxnMpkoKipi+vTppKWl4eHhQWxsLE6nk+LiYry8vLBarTRp0oRvv/2Wt99+m6ysLMLCwtDpdOzYsYO0tDR27NgBQN++fUlNTWXt2rXYbDaMxprHXlxcDMCFCxcICAhgwYIFfPnll+Tl5QE1K6pqV0xBTcEqODiYUaNG4XQ6KSgoQKfT4enpiaenpwvz4MGD9OnTB6vVSmVlpbJiqvZ8VFQUO3bsYOnSpXzxxReUlJRgMBioqKhocLGqIQ1fb1Q1hgZ+avW45ocrF5gBNvx4kkF31G97QLVmFsmU0aOMmUUwZfQoY2Z3MU8X2FxWVtXK4YT9WRbOW8tpFuTzu3psTDwRTDV79Pf15IUR8ZwrKsPqxu0k1ZxZFE8EU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9yqYGFaymTZvGE088QadOnRg0aJC7PP1PFRcXM3r0aCIjI5k9ezb5+fnMnDmT8vJypkyZctVrLRYLFouFSZMm0b59e5YtW8Zjjz3GmjVrXHpUPf744wC8+eablJeX88orr9RhnTt3jmeeeYYhQ4bw4osv8uOPP5KSkkJ1dTXJyclK8ay6uppPPvmESZMmERYWxvjx4wGU1VoATZo0obKyktzcXJxOJ82bN+fMmTPY7XZljE6n4+abb+abb77Bw8MDT09PwsLCyMrKoqCgwMWbt7c3FouF6upqgoKC8PLyIjs7m+zsbOLj4xk6dCipqak4HA7Ky8ux2+1Knyubrf77YOv1ugY3wr6R1Rga+KnN45FTxVc9fyiniFEqa74uw7xcD6baeSKYmkc5eCKYavN4/EzpVc/bKh1u+byg9ueotnm5Hjx3MkV9plRzZlE8EUwZPcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLD6y1/+QnV1NS+88ALTpk2jWbNmLlvYQU2RZe3atQ0y+WutXLkSm83GnDlzCAgIAGq21XvllVdISkoiLCwMPz8/SkpKXK6rqKjAYrFw8803M2bMGABuueUW7r//fhYsWMC0adPw8/OjsrKSI0eOsGHDBmXbvz179rB48WKys7MV/r59++jcuTOvvvoqALfffjsrV66kqqqKPXv2cOzYMQYOHMjmzZuZNm0aQ4YMASA1NZWhQ4eyZ88ehg4dip+fHwUFBTidTtLS0pRMTzzxBFu2bCE/P1+5586dO3nggQd48803lVydOnXiyJEjQM0KMqgpOn3zzTdEREQA8N133/Hkk0+Snp7OH/7wB6KiooiOjiYzM1MpUPXt25fs7GwOHTrE6dOnad68+TXPjcPhxGq9cM3X3egyGNTfwE+tHttF+JN+vOCK52NaBqim+bpM8yKSqXaeCKbmUZ0eZczsLqaPh+6q580mfYMaxav9Oap1XkTyAM4WlVHi5hVRas+seZTDo4yZRTBl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWjzeC/Py8f/OqswYVrAICAggICKB169YNwVyztmzZQo8ePZTCDkD//v2ZOnUqW7duJTExkaioqDq9qtLS0nA6nfTu3Vs5ZjKZuOeee9i0aRNwsXdVZGSkS48qvV6PTqfjxx9/pFu3bkRGRpKens4TTzyhjHE6nVRWVmKz2Th16hStWrXCYDBgt9u5//77lXHnz58H4NixY8o9bTYbCQkJLpkMBgOAkqlFixakp6fTv39/ZUxJSQlVVVXk5eVRWVnp4rlJkybKf9f2sLp0m8SmTZvSokUL/vrXv+Lv709YWBh33nmnkqW+0hrKXVmNoYGf2jwO7BHJfzdfeVvA/re3Vl3zdRnm5Xow1c4TwdQ8ysETwVSbxxB/b2JaBXAou6jOudjWAQT7ebnFr9qfo9rmRRSvtKyKeWszXLaB7NSmpueU2avhvW7VmFk0U/MoB08EU/MoB08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLGlSwWrJkibt8XJMyMzN5+OGHXY75+fkREhKiFKkSEhJISUlh5MiRpKenYzablWLN4MGDXa6Njo5m0aJFlJeXEx8fj16vx2q1Kn2uYmJiyMvLo2nTpgo/JiaGdevWsWbNGt555x08PDzo3LmzslopMzOTiIgImjVrxpkzZxg5ciRZWVmEh4cTEBBAYGAgubm5ANxxxx1ATV+uXr16YbPZiIuLU3zX3vOmm24iPT2dlJQUJk6ciNlsJiYmBr1ej91uJycnh+joaFq1akVeXh533303FRUVtGrVioKCAgwGA4mJiQBUVlbi5eXF5s2b2bZtG5WVlbz44oucOXOG2267jfDwcBFTp0lTvfTC8C7MWv7LZY9r0qRJkyZ16oprrOr/NzGaVKp5azM4cMLicuzACQvvr8lgwiNdfh9TmjRp0qRJkyZNmjRp0qSp0alBBavfS1ar1WWlUK38/f0pLq7pdzNgwADefvttDh48yJNPPsnJkydZtWoVgEuvqtGjR3Ps2DGcTifFxcWEhYVhMpmwWCz069eP+Ph4UlNTOXfuHDfffLPCj42NBeDo0aM88cQTlJaWsnTpUgIDAyksLFTGhYSEkJubS3V1NRMmTGDTpk3s2rWLW2+9lb179wLQrFkzAHJzc0lMTKR169bMnz9fKTbVsmJiYgDIyspyydSqVSuys7OVcQkJCSxdupSqqiqqqqo4fvw4ULPFYG32wsJCtm3bRlhYGGfOnAHg9ddfp1WrVrz//vsNmh+jUWsq92s1hgZ+avbYKSqYxS/344ttWRw4WUSH1gEM7NmmwVw1ZxbFlNGjjJlFMGX0KGNmdzFPF9g4eJnVVQAHs4s4by2nWZBPvflqf45qnRcRvNMFNpeVVbVyOGF/lqVBc63WzCKZmkd1epQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6lE2XVPB6ueffwagW7duLl//L9WOv55av349JpOJmJgYUlJSMJvNSmGnticUgMPhwOG4uDyvoqKC8vJyQkJC2L9/P1u2bCEmJgaHw8G5c+fw9fUFYPfu3UBNESk1NRWj0UjPnj1JS0tz8XH69GmMRiMGg4E333yT8PBwOnfuTFZWljKmtmDUqlUrNm/ejM1mo3Pnzuzbt8+lD9cvv/wCQNu2bZVMvXr1Ytu2bS733Lx5M56enuh0OoxGI82aNaOwsJDU1FQeeughoqOj8ff3JyoqikOHDqHX63E4HDz44IPs2rWLv/zlL7z33nvodFfvP3E56fU6YQ2ybwQ1hgZ+avb46MBObmNdKjVnFsWUyeNXO06SfuwcN7cNoV9392xh+9HXh9l75Bxd24cytG87tzBBrnkRyVQ7TwRTbR6Pnym96nlbpcMtnxfU/hzVNi8ieNdjrtWW+XowNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LomgpWI0eORKfTsXfvXkwmk/L1leR0OtHpdBw8eLDBRi+Vn5+fSyGnVsXFxfj7+wM1fa569erF3LlzlfMLFixg1qxZfP/99zzyyCNAzbaGH3/8MVOmTMHf39+lEPXBBx8o186YMYPly5fTpUsXADIyMgAYP3680hPL6XTSrVs3SkpK8Pf3p7KyknPnzuHt7c26desU1jfffMPTTz9NYGAggFLk6tOnDy+++KIy7plnnuH7779XMh09elTxUturymq1KgVBf39/cnJyyMnJwd/fn7S0NEwmEwCpqanMmDGD2bNn8/bbb+Pp6cnZs2cZO3Ys7dq1Y/LkyUyePJmsrCyGDx/O1q1bla0Kr0UOhxOr9cI1X3ejy2BQfwM/zaP6eCKYMnnMyivm1dSfqUV8vzuXOZ/8wtSx3YlsXneV7m9RRlYBbyzbo3ydfryAxesPMvnReGIjg+rtVaZ5EclUO08EU60efTyu/kcvZpOewkJbvdig/ueo1nkRwRM512rNLJKpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ9dUsFq8eDGAUgSp/fp6KyoqSunrVKuSkhLOnTunFHIu1+eqQ4cOQM1KpdqCVe3Y8PBwvLy8FO65c+fq3LOyspJWrVoBcPbsWXQ6HZmZmUrBSqfTERISQklJCVFRUWRnZ+NwOLhw4YJLMS06Ohqo2S6w9v5Go1HpaVWriIgIqqqqlEy1njIzM5Vjfn5++Pr6cuHCBVq2bMn27dsBiIyMVOYJalZlAcr2gBaLRenPdekKs9pnlJ2dfblH/5ukNZS7shpDAz/No/p4IpgyeLy0WKUwHfDKhz8x/4W768W8tFh1qWYs3c3CSfVjXioZ5uV6MNXOE8FUm8cQf286tQniwAkLjkt6Vul10CEyiGA/L7f4VftzVNu8iOBdj7lWW+brwdQ8ysETwdQ8ysETwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KomsqWHXv3v2qX18vJSQk8N5777n0svryyy/R6/X06tULqFl5VFVVxdixY9mzZw9ms5mBAwei1+tdVnxVVVXx1VdfkZCQoFxnMBg4dOgQd9xxB1arldjYWKVAVLvCqrS0lODgYObOncvbb7+Nh4cH99xzDyUlJXh7exMREcGuXbuU+wwaNIjCwkLCw8NJTEwELhaRrFYrvr6+fP/99/To0YOysjK6du2KXl9TdazNZLPZ8PPzY+rUqUycOBGz2czgwYOprq4mNDQUk8mk9LHav38/Xbp0Qa/X06ZNG/r27QtAUFCQ8v/e3t4cOHBA6Y3Vo0cP7rnnHgBatGhR7/nReljVVWPYD1XzqD6eCKYsHr/fc6pOsapWdgds23+ahC7X9nNuzQ+ZVz2/4ceTDLqjfr3VZJkX0Uy180Qw1ewxOTGOuavTSc+0KMc6tgni6YfiGvxZQe3PUc3zIoInaq7VnFkUU/OoTo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY/q9ChjZhFMWT3KpmsqWP1a1dXVlJeXK32dfq3S0lK8vLwwGht0mzoaNmwYS5YsITk5maSkJPLz85k1axbDhg1TelM5nU4WL16Mh4cH7777Lvn5+cycORMPDw8OHz7MokWLaNeuHStWrKCoqIhx48YpfKfz4p+HPvXUU3zzzTesWrUKuLgCyel0cuHCBWw2GwkJCcTFxZGamorNZqN9+/Yufp1OJxaLhVGjRpGfn8+///1vAG6//XZlTHV1NXa7HX9/f0aPHs3q1as5ceIEOp3OJVNFRQVWq5VBgwYRGhrK4sWLqaqqUnxdes82bdpw991388svv/DOO+8AcO+99wI1q8H++Mc/snz5crp27QrUrJzbunUrbdu2pUePHvWaG62H1dXVGPZD1TyqjyeCeaN7zDxdd9vYS3Usz8rgPtfWe+rIqeKrnj+UU8QolfVpEcGU0aOMmd3BDAyE15N7k3eulLzzNsKDzYSHXP4zY32l9ueoxnkRwRM912rMLJqpeZSDJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUNqiRNnz6dnTt38vnnn1/2/J/+9Cduu+02Xn755Ybcpo78/f1ZtGgRr732GsnJyZjNZoYMGcJzzz2njDGZTJSVlREUFKRs2We325kyZQo333wzCxcuxGKxEBsby4IFC2jZsiUAPj4+OBwOhg0bhsViYd68eRgMBkwmE5WVlcq2fk6nE5vNxtSpU1mxYgXbt28nMDAQm81G06ZNFZ9Qs53gXXfdxWeffYbNZiMwMJDCwkKaNWsGgMFgoLS0lGeffZadO3eSkpKCt7c3Op0OLy8vl0xVVVW89NJLLF26lLy8PIKCgsjPz1dYtff8xz/+werVq1m6dCkOhwMvLy/Ky8tdimQTJ04kKCiIOXPmADVFsxYtWvDBBx+4bCd4LdJ6WF1eBoP690PVPKqPJ4Ipi8eo5k34/irnbwr3u+aeKu0i/Ek/XnDF8zEtA1TTp0UEU0aPMmYWwfT1NHBrbBhWa1mD+lZdKrU/x8YwLxlZFnLO22gVYqZDA3rwXSp3z7WM86J5VKdHGTOLYMroUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwebwT5+QnqYfVr/fDDD/zhD3+44vn77ruPtWvXNuQWV1R0dDSpqalXPG8wGAgJCeH7779XjtUWrtq2bcvHH3982euqq6sBuO222xgwYIByPDExkUOHDikFJKfTiaenJ8OHD2f48OEAOBwOOnToQFVVFYBSRIqKiuJvf/sbf/vb3wB46623eO+99/Dx8QGgoqICgMGDB/PMM8+4+C0rK3PJ5Ovry6hRoxg1ahQAeXl59OnTh8rKSuVeUNPbasmSJcq1DzzwAMeOHVMKc1BTAAsPD8fHx4fi4mJCQ0Pp2bOn4ru+0vbnvLIaw36omkf18UQwb3SPd8SFs2jDoctuC2jQQ89Oza+ZPbBHJP/dfOVtAfvf3lp1fVpEMGX0KGNmEUwZPaoxc37hBf6xeCelZdXKMV9vI38ffSshAT7usKj656jGeRHNE8GU0aOMmUUwZfQoY2YRTBk9yphZBFPtPBFMzaMcPBFMzaM6eTKpQQWrs2fPKtvVXU6hoaHk5+c35Bb1lt1up6ioyKXP1datWwHqrB46fvw406dPZ8+ePeh0OgDS09OVglVVVRW5ubnY7XbKy8vx8vJCp9NRUVHBG2+8wYYNG7BYLEREROB0OvHw8ADgzJkzCv/ZZ58lLS0NDw8PpVB14ULNSiRPT08APvzwQ3bu3ElWVhZhYWEUFBQo52ozFRYW8uqrr7Jx40ZsNhvh4eEumVq2bElERARvvPEG//nPfzh69CghISGcPXuWmJgYl+zDhw936bN15swZli1bxogRI4iOjm7wHGjSpEnT76mXR9/K9EU7XYpWBn3N8frqheFdmLX8l8se16RJk6bGoF8XqwBKy6p5bdFO/jM+4XdypUmTJk2aNGnSpEmTJk2aNDWwYBUQEEBWVtYVzx8/fvyK/a1Eq7KyEi8vrzp9rvz9/ZVVVAAjRoxg9+7d3HLLLcyePZuVK1fy9ddfk5qaSrNmzZQ+V7UrnYqLi5WClZeXFwsXLmTIkCGEhoaycOFC4OKKqeLimn4nWVlZWCwWkpKSyMjI4KuvvnI5b7fb8fHxYdmyZXTr1o0///nPLF++XCmQXZpJp9OxcuVKRo4cCcCSJUvQ6XQuK7HuuecePvzwQ+Xr/Px89Ho9U6dOdXlGmZmZeHl5ceedd7Jx40aaNGlCVFSU4r++amgj9RtRjaGBn+ZRfTwRTJk8RrcI4MMX+5G2L48jp4ppF+HPHZ3DG8TsFBXM4pf78cW2LA6cLKJD6wAG9mzTICbINS8imWrniWBqHtXpUa2Z9x0/X6dYVavSsmoOZhcSF9W03ny1P0e1zotIngimjB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnqUTQ0qWPXu3ZuVK1cyaNAgOnTo4HIuIyODjz/+mPvvv79BBusrnU7H0KFDOXz4sEufqy1btriMO3PmDE6nkzlz5hAQEMD+/fv57rvvcDgczJ8/n+LiYmJjY3nqqad4++23Xa6tqqqiTZs2rF+/HqPRSP/+/fniiy/Izc2t4ycwMJA5c+YQHh7OsGHDWLlyJdnZ2cp5h8NBaGgomZmZpKenEx8fj5eXF5mZrttPVVdXExUVxcqVKzGbzQwbNoxly5Zx/PhxZcwLL7xAREQE8+fPJz8/H51Oh06n49ixY3Tt2hWAo0ePUlhYCMDGjRsBKCkpYe/evXzzzTd15vO3Sq/XERhorte1MqgxNPDTPKqPJ4Ipk8dBd7Z1C+dSPTqwk9uZINe8iGSqnSeCKZvHMa9+SUFxBcEBnnz4d/d91lRzZncw8yw5Vz2fW3CBhFta1ZtfK7U/R7XNy/XgiWDK6FHGzCKYMnqULfNHXx9m75FzdG0fytC+7dzGle05iuCJYMroUcbMIpgyepQxswimrB5lUYMKVuPHj+eHH35g6NCh3H333dx0001ATTHku+++IygoiPHjx7vF6LXKz88Pk8lUp8/V559/jr+/v/J1s2bNaN++PQEBAcp1drsdgAkTJpCYmAjAxx9/jE6nU641GAzY7XbeeustYmNjFd7WrVuVlVO1YyMiIpSiENSsuFq5cqWyOs3X15fy8nKee+45xowZo4z785//TGZmJqdOnSIiIgIvLy9sNhsrV650yfDpp59y7tw55Wu9Xs+jjz7Ko48+yqRJk9i/fz89e/Zk5syZJCYmYjAYmDlzJk2bNiUmJkYpxD344IP07t2b0aNH43A40OuvvRLscDixWi9c83U3ugwG9Tfw0zyqjyeCKaNHGTOLYMroUcbMIpju5L332T627T+rfH2+qIJBf11DQudmPP5g/QvJas7sTmZ40NX/0dSiqQ+FhbZ6sUH9z1Gt8yKSJ4Ipo0cZM4tgyuhRtswZWQW8sWyP8nX68QIWrz/I5EfjiY0MUoVHUUy180QwZfQoY2YRTBk9yphZBFNWjzeC/Py8f/OqswYVrMLCwvjvf//Lm2++yTfffMOmTZuAmgLMoEGDeO65567a40qkoqKi6qxOKikp4dy5c0RFRSnHMjMzefjhh12ug5oVUZden5mZSXh4OF5eXgA0adLEZTyA0+mktLSUsrIyysvLadWqFTqdThlbq9pCVUlJCVBTJAPqPKvS0lLl3hEREfj6+lJVVeVSrCopKaGiokJhXUkdO3Zk0aJFWCwWQkJCyMrKoqCggK1bt9KtWzdl3Mcff8zHH3/M+vXr693HSmsod2U1hgZ+mkf18UQwZfQoY2YRTBk9yphZBNMdvEuLVZdqy74zjBlQv9Xhl0qNmd3J7NA6CF9v42W3BfT1NhLbKtAtftX+HNU2L9eDJ4Ipo0cZM4tgyuhRlsyXFqsu1Yylu1k46e4GsUGe5yiSJ4Ipo0cZM4tgyuhRxswimLJ6lEUNKlgBhIaG8sYbb+B0OrFYLAAEBQWh0+kabK4hSkhIICUlhZEjR5Keno7ZbCYmJga9Xk+vXr2UcVarVSkYAcTHxysrnpYuXcrixYuJiYkhLy+Pfv36KePCw8PZt28fTz31FHv27MHDw4POnTtz4ULN6qLi4mLCwsLw9PQkNzeXBx98kKysLMLDwwkICMDHx4eqqioAZWXawoULmT59Ojabjbi4OPbt26ewAEJCQjh79mydTDqdzqXXFcAnn3zCBx98QHZ2NkajkXXr1uHr60tgYCAAkyZN4tlnn73ss/v73/9OeHj9+7xoPazqqjHsh6p5VB9PBFNGjzJmFsGU0aOMmUUw3cUb//bmq56f+O4PvD3+znqx1ZpZBHPaY92ZtvAnl6KVr7eRaY91b/BnOLU/RzXPiyieCKaMHmXMLIIpo0eZMq/5IfOq5zf8eJJBd9Sv96tMz1EUTwRTRo8yZhbBlNGjjJlFMGX1KJsaXLCqlU6no2nT+jdpdrcGDBjA22+/zcGDB3nyySc5efIkq1atol27di4rmex2OwsWLOD//u//APD09KRLly6kpaXRokULRowYQWpqKufOnaN///7KdbUrq37++Wcef/xxSktLWbp0Kb6+vsrKKABvb28KCwsJCQlhwoQJbNq0iV27dhEREaGMqX1u+/btIzExkdatWzN//nwqKytdMrVs2ZL9+/fXyRQcHKwUtQ4dOsTf/vY3Dh06xKBBg/D29ubYsWP88MMPDB8+HKOxZspre1RNmDCB2267DYDk5GRsNhubN2/m0Ucfrddz13pYXV2NYT9UzaP6eCKYMnqUMbMIpoweZcwsgtlQXmFp1VXPW0qqGvwZRG2ZRTADA82smD6QPYfPcuikhZjWQXRtH+omdzVS+3NU47yI5olgyuhRxswimDJ6lCHzkVPFVz1/KKeIUSp7nxbBVDtPBFNGjzJmFsGU0aOMmUUwZfUoi66pYDVnzhx0Oh1PPfUUer2eOXPm/M9rdDodycnJ9TZYX61fvx6TyURMTAwpKSmYzWZ69erF9u3byc/PV4pWBoOB6uqLf2FaUVGhrJgqKiri7bffJiYmBofDwYYNG5TizpkzZwC45ZZbSE1NxWg00rNnT9LS0lx6XVVVVWE2mzEYDLz55puEh4fTuXNnjhw5QufOnZUxUFNE2rx5Mzabjc6dO/PLL79QWVmpsGr7VP0609atWwkKCgIgODiYnJwcvL292bhxIzqdDqPRSEREBKdOnarznFq3bk2XLl2AmmJdYGAgGRkZ9X7uWg+ry8tgUP9+qJrHhvMysizknLfRKsRMhwbs0X6p1J5ZBPNsURkl5Xb8vAyEBDT8Db4xZNY8qtOjjJlFMN31PR3o63HVolVQE49691+ScV6imzeha/tQrNayBvWtulRqf46NYV40j+r0KGNmEUwZPcqUuV2EP+nHC654PqZlgGrep0Uw1c4TwZTRo4yZRTBl9ChjZhFMWT3eCPLzE9TDqrZg9cQTT2AymVRdsNqyZQu9evVi7ty5yjGr1Ur37t3ZunUriYmJAHTp0oWAgABlzO7du7HZbOh0Ol599VVl3IwZM5QeXXCxYDVp0iRiYmKAmh5WN998MyaTCS8vLyorK7HZbISGhrJu3Trl2q+//prk5GSCg4MBOHHiBAD3338/SUlJyrhHHnmEX375RVnNdf78eQDeffddpYhltVrp1q2b8nVZWRk2m413332Xfv36MWnSJPbv388f//hHZs2aRWVlJSaT6bLP7Ntvv+WVV15h48aN1/Ko60jbn/PKagz7oWoer135hRf4x+KddbZX+vvoWwkJ8HGHRdVlFsEsLati3toM9mdZlGOd2gSRNLgjZi+P393f9WBqHuXgiWCq0aO7v6fffKY3j8389orn/5Xcu8HPQIZ5Ec0TwVQ7TwRT8ygHTwRT8ygHTwSzobyBPSL57+YrbwvY//bWqnufFsFUO08EU0aPMmYWwZTRo4yZRTBl9SiLrmkzxUOHDnHw4EGl4HHo0KH/+b+DBw8KMf6/lJmZqRR6auXn50dISAiZmRc/RCUkJLBt2zasVqtyHVCn11V0dDR5eXlKr6ji4mKMRiMbNmxQxlRXV+N0OpU+UdnZ2TidTs6ePasUpQDlXrXbAmZnZ+Ph4cHmza69GWw2GwaDQRlXVFSETqfjq6++UsY4nU50Oh0hISEu/tu0cd0bOjo6mqqqKnJyclyOT5s2jdjYWHr06MGkSZP49ttviYuLu8JT1aRJ0+X062IVQGlZNa8t2vk7OWqcmrc2gwMnLC7HDpyw8P6a+q/61KRJ0+8nEd/Td8SFXdNxTZo0adKkSdP10wvDu1zTcU2aNGnSpEmTpl+r3j2sKisr+eGHH2jRooWywkhNslqtVFVVMXbsWPbs2YPZbGbw4MH4+fkp/Z4Ahg0bxpIlS0hOTiYpKYnt27cDEBcXxyOPPILFYiE2NpaCggKcTifFxcV4eXlRWlpK27ZtmTdvHgsWLMBkMhEQEIDdbleKR7X3adq0KYMGDcLhcBAYGEhFRQWAso2f1WqladOm7N69m5tvvhmHw0FoaCinTp3CYDAoXmvvOXXqVF555RW8vb3x8fFBr9cTHh7ucs/333+fbdu2cf78eQwGg7LCKycnh+joaA4ePEiLFi3o06cPLVu2ZOHChaxevRqAt956q0HPvqENu29ENYYGfprH+mnf8fN1ilW1Ki2r5mB2IXFR9e/vp8bMIpinC2wuqzBq5XDC/iwL563lNAuq32o1tWYWyRPBlNGjjJndxRT1Pf1/g+P4v8FxPPefzRRYq2jq58Fbf76z3j5rJcu8iOSJYKqdJ4KpeVSnRxkzi2DK6FG2zJ2igln8cj++2JbFgZNFdGgdwMCebf73hf9Dsj1HETwRTBk9yphZBFNGjzJmFsGU1aNsqnfBysPDg/Hjx/PSSy+psmDldDr55JNP6NChA7NnzyY/P5+ZM2ei17u+WPz9/Vm0aBGvvfYaycnJ6PV6dDod+/fv5/nnn6d9+/YsW7aMvXv31uHn5uYSFBSE3W6npKSEs2fP4uPjg5eXl8vYgoICWrRowfnz5ykuLqaysrKOX5vNhpeXF35+fhQUFHDu3Dk8PT2x2+0u9zx16hTBwcFcuHBB2f7P29u7zjZ/a9asUf67urpaKUbV9rFq164dbdq0YePGjVgsFhyOi0sUa7c7rI/0el2DG57fyGoMDfw0j9emPEvOVc/nFlwg4ZZW9ebXSk2ZRTCPnym96nlbpaPBP1vUlvl68EQwZfQoY+aGMkV/T6dOHVDva6+mG31ergdPBFPtPBFMzaMcPBFMzaMcPBFMd/IeHdjJbaxLJdtzFMETwZTRo4yZRTBl9ChjZhFMWT3KonoXrHQ6HZGRkRQWFrrTj9tkMpmoqKhgzpw5So8qu93OlClTMBpdY0dHR5OamgrAokWLeP311xk9ejRjxowB4JZbbqF3794UFRUpvaKcTidWq5UNGzYoWw+mpaUxbtw4nE4ngDI2KiqK9evXK/dLSkri+++/V84bDAZKSkp49dVXeeSRR4Ca7f969erlUogymUxUVlaydu1aJdNHH33kkqmWedddd/H+++8r1w4YMIDjx4/To0cPAFq3bs2CBQuorKzkgQceICkpiRdffBEPDw8yMjIYMKB+vwhyOJxYrRfqde2NLINB/Q38NI/144UHXf0NqEVTnwY1s1djZhFMHw/dVc+bTXpVNWn+YW8eR3OLaRfhzx2dwxvMU+u8iOSJYKqdJ4KpVo8iv6fB/bnPFpVRUm7Hz8tASEDD/2Gh1nkRyRPBVDsPtNeOLB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53Vu2AFNYWXmTNncv/999fpF/V7y2Aw4OvrqxR2AHr37g1w2RVOtaqurtnaq1Oni38RZDKZaNGiBSUlJcrqKafTiaenp0vunj17otPpqKqqAqBZs2ZA3X5SMTExfP/99/j41GyFU7tFYM+ePZUxAQEBBAUFUVZWdk2Zav21a9fO5Z7R0dEcP35c8VSrBQsW4OfnR2JiIi+++OIVn8u1SGsod2U1hgZ+msdrU4fWQfh6Gy+7LaCvt5HYVoFu8aqmzCKYIf7edGoTxIETFhzOi8f1OugQGUSwn5cqmjRnnbby+pKd1H7m+H53Lgs+P8DLo2+ldZhfg9ju8iiaKaNHGTM3lBni701MqwAOZRfVORfbKsAt39PQ8NylZVXMW5vhsn1hpzZBJA3uiNnL43f3dz2YMnp0B0977cjpUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdSggtXevXsJCAhg0KBBdO/enRYtWtTZDg/g5Zdfbsht6iW73U5hYSEjR44kPT0ds9msbF346+3zLlXtSqXly5cza9YsLBYLMTExZGdnY7fbKS8vx8vLC51OR0VFBY899hh79uzBw8ODzp0743Q68fCo+Udq7dZ6GRkZPPjgg2RlZREeHq48owsXalYieXp6AjB16lQOHz6MzWYjLi4Oi8Xi4vW3ZCovLwdg9erVrFmzhvz8fF544QUyMzMVT9HR0QDk5eWRkpJC27ZtiY+PB6CqqkrpraVJk6bfpr+PvpXXFu10KVr5ehv5++hbf0dXjU9Jgzvy/hrXX/51iKz55Z9adGmxqlZ2B0xftJP5L9z9+5jSpEml0l1pkdXVF19dV81bm8GBExaXYwdOWHh/TQYTHuny+5jS1CikvXY0adKkSZMmTZo0adKkyf1qUMFq6dKlyn9v3779smN0Ot3vUrCqqKjA6XRy8OBBnnzySU6ePMmqVaswmUzKKiqA0aNHk5eXx6ZNm4CaIpJOp+Pnn3+mX79+xMfHk5qaSklJCQDFxcVKwUmv1/Pzzz/z+OOPU1paytKlS/Hw8ED3/39DU1xcDMDp06fx8fFhwoQJbNq0iV27drmct9vteHh4sHXrVhITE2ndujXz58/Hbrcrq69+a6Za5rlz5+jYsSP5+fl89dVXZGVluZyfOXMm69evp7KykpCQENq3b89///tfzGYznTt3btCzNxq1pnK/VmNo4Kd5rL+aB/sy9693ceCEhexzNlqFmOkQ6Z7Cr1ozi2D6+3rywoh4zhWVYXXz9kru8Pf9nlN1ilW1sjtg2/7TJHRpUS+2mudFFE8EU+08EUy1ejxdYOPgyaLLnjt4sojz1nKaBfnUm+8uj5cWyGvlcML+LEuDPKp1XkTyRDDVytNeO/J5lDGzCKaMHmXMLIIpo0cZM4tgqp0ngql5VKdHGTOLYMrqUTY1qGB16NAhd/kQIoPBQExMDCkpKZjNZnr16sXWrVtdttlzOBzY7Xbl6+rqapxOJ927d2f//v1s2bKFmJgYysrKlKIVQFlZGQ6Hg27dupGamorRaKRnz56kpaW5jAMIDw/HYDDw5ptvEh4eTseOHcnIyFDOX7hwgaqqKnr27MnmzZux2Wx07tyZ3bt343C4/nb0t2QCmDRpEitWrAAgNzeX559/npkzZyrn7XY7+fn5eHh48MMPPxAWFgbA448/Trdu3er7yNHrdQ1qon6jKvdcKUcO5hMebCY8xNdtXFmbDKrVY69AM73cQqortWYWwRT1M6Sh/jJPl1z1/LE8K4P7tLvqmP8lNc+LKJ4Iptp5Iphq83j8TOlVz9sqHW75Xle7R7XNy/XgiWCqjae9dsTwRDDVzhPB1DzKwRPB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiBhWsanXkyBE2b95Mbm4uABERESQkJNTpo3Q9pdfradGihcsqMKvVSrdu3bBarcqxJUuWuFxXuwJp4sSJ3HzzzcrxsWPHsm3bNvz9/YGawlaTJk1YuHChMsbpdNKxY0eleFTbo6p37968+uqryrgVK1aQkZGh9Lqq9fP2228rfID777+fU6dOXVOm2uvvvPNOxo4dS/v27Rk7dqwyF7Xnv/rqKwA+++wzQkNDAZRCldVqxc+vfr1YHA4nVuuFel17I6q0rIqU1emkZ178K9y4qCCefigOs3f9+xsYDHI2GVS7Rxkzi2CqlRfVvAnfX+X8TeF+FBba6sWWcV5EMNXOE8FUq0cfj6vv+2c26ev9/QLq96jWeRHJE8FUK0977cjnUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUZ0eZcwsgimrxxtBfn7ev3nVWYMKVpWVlUyZMoU1a9bgdDrR62tu6nA4ePPNNxk0aBDTp0+/as+o6yndFZsp1G/slcY4nU63+bhWVlRUFACZmZnKf9d+7eHhQcuWLYGL/bUGDhzocv0777zDO++8w759+5TeWtcqraHcRc1dlV6nv0FGloV3V6W7pb+BrE0G1e5RxswimGrj3REXzqINhy67LaBBDz07NW+wXxnnRQRT7TwRTLV5DPH3plObIA6csOC45KOMXlfTmy7Yz8stftXuUW3zcj14Iphq42mvHTE8EUy180QwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoQQWrf/7zn3z22WcMHz6cRx99lFatWqHT6Th58iRLlixhxYoV+Pv789JLL7nL72+Ww+EgLy/PZbXQl19+CVBn9dDx48eZPn06e/bsUY599dVXSi+nqqoqZfvD2h5WRqOR8+fP88Ybb7BhwwYsFgsRERHY7XZlZdWFCzUrjXbu3Mmzzz5LWloaHh4e+PrWbAnn4eHh4uedd95h586dZGVlERYWRl5enkvBqjbTq6++ysaNG7HZbISHh7swWrZsSWRkJJ9++inLli0DYM6cOTRp0oTbbrtNKR726dOHtLQ0AEwmE+Xl5dRujThhwgTFW32k9bCqkdbfQD6PMmYWwVQzb+rY7rzy4U8uRSuDvuZ4Q372yTgvIphq54lgqtljcmIcc3+1yrhjm5pVxg39rKB2j2qeF1E8EUw187TXjlweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6lE0NKlitXbuWwYMHM2XKFJfjUVFRTJ06ldLSUtauXfu7FKx0Oh0mk4nk5GSSkpLIz89n1qxZ+Pv74+19cQ/JESNGsHv3bm655RZmz57NypUr+frrr1mwYAEhISG0a9eOFStWYLO5buvh4+ODl5cXCxcuZMiQIYSGhirbA/662HP8+HHOnz9PUlISGRkZynZ8l7J8fHxYtmwZ3bp1489//jPLly/Hbrcrq9ZqMzmdTlauXMnIkSOBmi0NdTodRuPFqRw3bhx///vflWJW06ZNycnJcdniMCQkhKqqKkwmE3/961+Jjo5m9OjRQM2WhUlJSfV67loPq4vS+huI4Ylgqp0ngimjR3fwAgPNfPbPwXz900n2Hj3HzW1D6Ne9tRvc1UjGeRHBVDtPBFONHgMD4fXk3uSdKyXvvM3tfRxB/R7VOC+ieSKYauRprx05PcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLCqrq52KYL8Wl27duW7775ryC3qLX9/f/r27cupU6dITk7GbDYzZMgQ1q1b59In6syZMzidTubMmUNAQADZ2dl8/fXXOJ1O5s+fT3FxMbGxsYwaNYoPPvhAudbPz4/c3FzatGnD+vXrMRqN9O/fn88//5xz584pHmoVGBjInDlzCA8PJzExkVWrVmGxWBRWZWUloaGhZGZmkp6eTnx8PEajUekLBtCkSROKioqIiopi5cqVmM1mhg0bxrJly1zGFRYWYjKZlGJXeXk5I0aMYOXKleTn5xMWFkaTJk0AePjhhxkxYoRybUhICPn5+fV+7loPq4vS+hvI51HGzCKYaucBdI8JpV/31litZQ3qw1MrGedFBFPtPBHM5ZsOc+hkER0iAxjWr70bHLrfo6+ngVtjw9z2/QLq99gYXjsyehSRWXvtyOHxbFEZJeV2/LwMhAS455cPas8sgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vBHk53edeljdcccdpKWlMXz48Mue/+GHH+jVq1dDblFvRUVFUVhYSGpqqnKspKSEDz/80KW3U7NmzWjfvj0BAQHKdVDTO2rChAkkJiYCMHPmTMLDw/Hy8gJqikx2u5233nqL2NhY5Zr169dTWFgIoGyRGBISwsaNG5V7fvPNN6xatYpTp04B0Lp1a6qrqxk+fDhPPfWUMu6RRx4hOzubU6dOERERQUBAAEVFRaxcuVIphpWUlLBs2TKlSAawZcsWevfuzdy5c2nfvj1jx45l6NChLF++nK1bt5KYmMhNN90EgNl8cYXP4cOHGTp0KOfPn2/Ak9d6WNVK628ghieCqXaeCKaMHmXMLIIpo0c1Zt51+Czvrt6vfH3iTAnrf8zhz0M60eWmUHdYlOI5NjaeCKaMHmXMLIIpg8fSsirmrc1w2ea7U5sgkgZ3xOxV/y3UL5XaMl8PpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNWgzxfHjx3Pq1CmeeeYZtm/fTm5uLrm5uWzbto3k5GTy8vIYP348RUVFLv9zh44fP87YsWPp0qULvXr1YtasWVRWVirnExIS2LZtG1arVTn25ZdfotfrOXHiBHfddRedO3fml19+UXpKAcTHx+Pr64u3tzfvv/8+Xbt2pVu3bnz00Uf06NFDGdesWTOgpj/Vgw8+SFxcHHfeeScVFRWUlJRQXl6OyWTCbDZTVVXFG2+8Qa9evejSpQtTpkzBZDIphaHWrWu2k8rKylIy9ejRg/37a34RlZmZCUBwcDA6nY41a9Zw3333ERcXx8CBA9HpdC7PNTMzk4iICF588UUA3nrrLV5++WWaNm2qsBISEtDpdHz44YfEx8fTqVMnEhISSE9Pp0WLFm6ZI02QNLgjHSKDXI51iKz5R64mTZo0adJUX11arLpU//n08sc1adKkSdO1a97aDA6csLgcO3DCwvtrMn4nR5o0adKkSZMmTZo03dhq0AqrAQMGAHDkyBG++eYbl3NOZ82SkoEDB9a57uDBgw25LcXFxYwePZrIyEhmz55Nfn4+M2fOpLy8XOmnNWzYMJYsWVKnh1VcXBwLFy5k4sSJtG/fnjFjxrBmzRqeffZZWrZsiaenJ48//jhvv/02Z86cISkpie+++4709HRycnIUD7VFnenTp9O7d2/69u3L4sWLXTx6eXlhNpvJz89nyZIljBo1irNnz7Ju3TqMRiMFBQXAxZ5Xa9eupXXr1jz55JN89tlnykqt4uJi5Z67d+/mH//4Bw888IByT6fT6VKYKy4u5osvvqC6uhqoKcLt27ePoqIihRkcHMytt97Kzp07lf5ctVsBXroCrT5qaCP1G0n+vp68MCKec0VlWN24jYisTQbV7lHGzCKYaueJYGoe1elRrZmXbjx01fMff3uU4ffWf3tAWZ5jY+KJYMroUcbMIpiyeDxdYHNZWVUrhxP2Z1k4by2nWZDP7+pRJE8EU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomxpUsEpOTkanu3qfHhFauXIlNptN6TsFYLfbeeWVV0hKSiIsLAx/f38WLVrEa6+9pvSweuihh/j000957LHHGDNmjMLT6XQsWLCAadOmARAREaGcS0lJITY2lkmTJjFjxgz27dtH586dMRgMQE2fql27drF3717uu+8+jh07xt69e5XrjcaaRxwUFMTixYsJDw/npZdeYsaMGZw5c8Yll16vp7i4mJSUFOLj4+nbty8ffPCBUrAymUzodDqCg4P5+uuvMZvNjBw5kg0bNriwHA6Hy7Z+O3bsUP67dhtCgI4dO5KRkYHRaOTChQsEBgZSUFDAtm3bsNvtSsZrkV6vIzDQ/L8HSiZRz0TWJoNq9yhjZhFMtfNEMDWPNz4v91wpRw7mEx5sJjzE939fcBkdySm+6vlD2UVued9R83MUxVQ7TwRTJo/u+P67ktSaWSTzRvd4/EzpVc/bKh3az1qV8EQw1c4TwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KogYVrJ599ll3+bgmbdmyhR49eijFKoD+/fszdepUpUcTQHR0tEsPq+3bt7No0SL69++vHAsKCqJFixZs2bJFOfbDDz9gNBoZOXIkEydOBGpWjKWkpLB582Y6d+6s9H76v//7Px5//HHl2ldeeYW9e/dSWlpKWFiYcnzdunVK3ymAOXPmUFJSAqAcj4uL46OPPlLG7N+/nw8++EBZ2aXT6ZTCXL9+/ZRxhw4d4vTp01RWVmIymfDy8sJut7Nv3z6XgmLHjh2VVV1HjhwhNTWVlJQU7r77bmXME088wZYtWygpKXF5vr9VDocTq/XCNV93o8tgUH8DP82j+ngimDJ6lDGzCKaMHt3JKy2rImV1OumZF/9aPy4qiKcfisPsfW19UNq19OfEmZIrno9pFUBhoa3eXtX8HEUx1c4TwZTJozu//0R5FMUTwZTFo4/H1f8w02zSaz9rf2eeCKbaeSKYmkd1epQxswim2nkimJpHdXqUMbMIpqwebwT5+Xn/5lVnDSpY/V7KzMzk4Ycfdjnm5+dHSEiI0qPpSteB65Z3UVFR2Gw28vLyKC8vx8vLi6NHj1JdXe0yTqfT0aZNG4VRW7Dy9natlpaXlwOQm5tLdHQ03t7eGAwGl2KV0+mkoqJCKSa1atXqsqyzZ8+6ML28vICa7fwuVUVFBQA5OTlER0crBatLi1UlJSVUV1crrGPHjgEQGxtLdXU1VVVVZGRkkJ6eDkBBQUG9ClaA1lDuKmoMDfw0j+rjiWDK6FHGzCKYMnp0B2/uqvQ6fVAysiy8uyqdCY90uSbWsL7t+OrnU1c8/8e727olvxqfo2im2nkimDJ4dOf335WktszXg3mjewzx96ZTmyAOnLDgcF48rtfV9KQN9vPSftaqhCeCqXaeCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpXVasXPz6/OcX9/f2X7vCtdZzKZ8PT0VI4lJCTw7rvv4nQ6lb5TZ86cQafT0atXL5frjUYjW7dupUuXLkrfqb179zJixAgAqqqq2L59O3Cx71TTpk05duwYb7zxBhs2bMBisRAREUF5ebmy5V7tVn9ZWVk8++yzpKWl4eHhgb+/Px4eHjgcNS/uli1bAvDRRx8xZcoUsrKyCAsLU7YDrL1nkyZNOHXqFK+++iobN27EZrMRHh6OTqdTeovV9uBKT0+vs1JOp9MRHh5+9Um4irQeVnXVGPZD1TyqjyeCKaNHGTOLYMro0V08EX1Q/jK0M29/su+yxxv6PqzW5yiSqXaeCKYsHrU+RJrHhig5MY65v1qd17FNzeo87Wft788TwVQ7TwRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBSt3atiwYXzwwQdUVFTw008/UVlZSVFREdHR0S5b+o0YMYKdO3fi5+fH7Nmz+emnn5g3bx7r1q2jY8eOtGvXjhUrVtQpmEVERPDzzz+zcOFChgwZQmhoKAsXLkSn07msgNLr9Zw5c4aysjKSkpLIyMjgq6++wmQyKWNqVzytWrWKbt268ec//5nly5dTVVXlcs9mzZpx6tQpVq5cyciRIwFYsmQJer0evb7mm6VTp0506tRJ6fu1d+9edu3aRVVVFWFhYXVWe/1WaT2srq7GsB+q5lF9PBFMGT3KmFkEU0aPDeWJ6IPS9/Y29L29DQvW7ueXw2fp0j6UcQ92aojNOlLbc7weTLXzRDBvdI9aHyJxTBk8BgbC68m9yTtXSt55m9b/TKU8EUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoURas/Pz8lP5Pl6q4uNhl673LXVdZWUlFRYWyysrf35/Ro0cze/ZsXn75ZcxmM0FBQbRt29bl2tpVTN27d6d37940b96cefPm4XQ6mT9/PsXFxcTGxvKPf/yD5557TvHRpEkTHA4Hbdq0Yf369RiNRvr3789XX32F3W5X+N7e3pSWlhIYGMicOXMIDw9nzJgxpKamKlv+1TKDgoLIzMwkPT2d+Ph4goOD2bdvn3Lex8cHh8NBVFQUK1euxGw2M2zYMFasWKEUtwwGA++99x7vvPMOq1ev5uzZszRt2pSCggLOnDlDeno6cXFx1zw3Wg+ry8tgUP9+qJpH9fFEMGX0KGNmEUwZPZ4tKqOk3I6fl4GQgPp/2BTZB2XoXdGMe7ATVmtZg3qpXCq1z4sIptp5IpiyeNT6EGke3cHz9TRwa2yY9rNWZTwRTLXzRDA1j+r0KGNmEUy180QwNY/q9ChjZhFMWT3eCPLzu8F7WEVFRdXpVVVSUsK5c+dc+k5d7jqArKwsYmJilOOlpaW0aNGCb7/9FoAXXniBI0eOuFwbFhZGfn6+cl2rVq0wGo1UV1czYcIEEhMTARRG7b1qVzS99tprdOvWTeHt3r2b06dPK1/XFtA2btyoHLNaraSmplJaWvOXoREREQD06dOH119/XRn38ssvs2/fPuVetSu3VqxY4dKHau3atS6rsUJCQpg+fTpjx47lgQce4K677mLUqFEAZGdn16tgBVoPq6upMeyHqnlUH08EU0aPMmYWwZTBY2lZFfPWZrhsI9apTRBJgzti9vK4Zt716IMiw7xcD6baeSKYN7pHrQ+ROKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHuXgiWBqHtXJk0mNcjPFhIQEtm3bhtVqVY59+eWX6PX6On2nLlV8fDy+vr5s2LBBOVZVVcVXX31FQkKCC//QoUOcOHFCOXbkyBGqqqq48847gZq+U7fffjseHh4uxbP169cTHR2tFJdqe20dPXpUGVNcXMzp06eprKykvLwcqOmPVVZW5pKptnhV23eq9v9PnXJttH7gwAEAcnJyADCbzS7HoaZIV1JSoqzWuvS5HTlyhOTkZJfjtf2yNGnSpEmTJtk0b20GB05YXI4dOGHh/TUZ9WYmDe5Ih8ggl2MdImuKYJo0aRIr7ftPkyZNmjRp0qRJkyZNmhqHGuUKq2HDhrFkyRKSk5NJSkoiPz+fWbNmMWzYMJe+U3/84x85fPgwOp0Os9nM4MGDGTduHCkpKQQFBSl9p4qKihg3bpxy3b333kvTpk0ZOHAgAC1atKCkpISoqCg6d+7s4iMtLY0PPviA5cuX06pVKw4dOsRbb73l4len0/Haa68xffp0goOD8fHxwdvbm8rKSoqLi/Hy8sLDo+Yvtnv06IFer6dly5bk5+cTHh6uFJlq+2Pt2LGDjh074uPjQ3h4uFIMqz3v7e2Nt7c3L774IgMGDGDBggWYTCZCQ0OxWC7+Au5Pf/oTR48eJTExkXHjxvHLL78A0KVLF5ec16qGNiC+EdUYGvhpHtXHE8GU0aOMmUUwZfF4usDmsrKqVg4n7M+ycN5aTrMgn2vm+vt68sKIeM4VlWF1wzaDtZJlXkQz1c4TwZTJo6jvv0u9qS2zSKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNjbJg5e/vz6JFi3jttddITk7GbDYzZMgQnnvuOWVMcXExBw4cQKfTMXfuXPLz85k5cyaDBg3imWeeYeHChVgsFmJjY1mwYIHLiqLU1FSKioqIjo7m5MmT5ObmAris3qqqquI///kPRqMRk8lEeXk5R44coV27dvTv318Zl5ubi9PppF27dpw+fZqCggLy8/N56KGHWL16tTKuqKgInU5H69atycnJ4eTJk5hMJiIjI5UxNlvNfunNmzfHbrdTUFDAkSNHuP3229m2bZvLMwoNDaVr164sXLgQnU6Hh4cHDz/8MB988IFLhsrKSpfeVgB/+9vf6js16PU6tzSuvlHVGBr4aR7VxxPBlNGjjJlFMG90j8fPlF71vK3S0aD3OVHvkTf6vFwvptp5IpgyeRT5GVWtmUUyZfQoY2YRTBk9yphZBFNGjzJmFsFUO08EU/MoB08EU/OoTp5MapQFK4Do6GhSU1OveH7lypV4eHjw3XffKX2c7HY7r7zyCt999x1JSUmXva6iooL333+fcePGMWHCBAAqKyu5+eab2blzpzJu48aNHD16lKCgIBITE5k4cSJpaWmMGzeOffv2KSuUdu/eDcAnn3yi9Kn661//SlpaGjqdDn9/f86cOUNpaSlxcXF8+umnQE0Bq0+fPpw4cYIuXboAkJaWBsCkSZO4//77Afjoo4+YNm0aUFPIg5ptCC9cuEB4eDi33norERER7N+/H6fTqYzJzc3l0KFDvP3228ycOZMxY8bw2muvATWFLJvNpmwteC1yOJxYrReu+bobXQaD+hv4aR7VxxPBlNGjjJlFMBuDx/lr93M4p5jYVv6MG9SpXgwfD91Vz5tNegoLbfVig5zzIqNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ422YPW/tGXLFnr06KEUqwD69+/P1KlT2bp1K4mJiZe9bvfu3ZSWlrqskjKZTISEhJCVleXCv+mmmzh27BhRUVFAzQqsgIAANm/eTOfOnamsrFT6YGVlZRETEwPAgAED+PzzzwkLC8PLy0spRF3aXyogIICePXvy/fffK/z09HR0Oh35+fkumaZMmQKgjIuKiuL8+fMsXLiQjz76SCnsZWZmKmNOnTpFVVWV0ruqtlgFMGrUKG6++WY+/vjj3/Ko60hrKHdlNYYGfppH9fFEMGX0KGNmEUw1etyanseCLw4pX58tLGPz3jP834Ox3N6h+TWxQvy96dQmiAMnLDicF4/rdTU9b4L9vNySX4Z5Ec0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jHDwRTM2jOnky6YYtWGVmZtK3b1/Gjh3Lnj17lB5WISEhZGZmXvU6gO+++46nnnpK2TawVatW/Pzzz5w9e5bQ0FAyMzMxGAwAvPLKK8ycOZN77rmH1q1bK4zs7Gzsdjsmk4mxY8dSWlpKeHg4gwcPBiA2Nla5p6+vr7K9X3l5OV27dsXDw4Pq6mruvPNOAE6cOEFoaCizZ8/mrbfeUjJ5enri7e1NREQEAPv27cPpdFJZWcnw4cPx8PDAZDKRlpbG008/7XLvy+mVV14hLi6uIY9fkyZNmjRpum66tFh1qeatPXjNBSuApMEdeX9Nhksvqw6RQSQN7lhvj5o0adKkSZMmTZo0adKkSZMmTZqurhu2YFVcXMz69evp0KEDs2fPVnpY6fV6iouLr3id1WrFYDAwd+5cJk6cSPv27Vm2bBmbN28G4Nlnn+XZZ58lNzcXi8WCv78/M2fOpLy8nDfeeIPz589z+PBhxQPUbLFXVFTEn/70J6xWK++88w4APXr0UO4JoNfrMZlMPProo3zxxRdkZWWh0+mU7QWLi4vx8PCgvLycu+66i+joaJYsWUJlZSXx8fFKhtzcXAwGAyaTiaFDh7J+/XrOnj2Lv78/w4YNA2q2DYSa1VgzZswA4JFHHgGgY8eOdOxY/1/KGY1aU7lfqzE08NM8qo8ngimjRxkzi2Cq1eP7a9Kvev7D9Qd44sFr2x7Q39eTF0bEc66oDGu5HT8vAyEB7tl/WpZ5EckTwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsumGLVjVrjCaM2eOSw+rKVOmUFZWdsXrqqursdvtPP7444wZMwaAW265hTvvvBOLxYLBYCA5OVnZvm/JkiW0b98eqCkCjRs3Dp3Otf/FTTfdxKBBg1i+fLlS5CouLqZp06YAXLhwgdLSUiZOnMi+fftYsGABtau3fp3Jbrfzz3/+k/nz57N161aaNGmCxWIhODgYqNlW8OTJkzz//POcPXuWNWvWUFRUhF6vJyoqiiZNmgAX+2Hdd999So8sd0iv1wltaN3Y1Rga+Gke1ccTwZTRo4yZRTDdycs9V8qRg/mEB5sJD/GtF+PoKetVzx/OKa73+5LI9zM1z4sopoweZcwsgql2ngim5lEOngim5lEOngim5lEOngimjB5lzCyCKaNHGTOLYMrqURbdsAUrvV5PeHh4nR5WU6ZMUVY0XU61q6L69u2rHDOZTMTExLBt2zYWLlyIl5cXt912G3a7XSlWQU0PK4PBQGRkJAA+Pj4AxMfHk5SURFJSEgArVqxg2rRpVFVVARdXWP3xj3/kiSeeUHj3338/p06dqpPpwQcf5MEHH1Su7datm8JYtGgRer2ewYMHYzQaeeqpp3jllVf49ttv0el0VFZWYjKZmD59OgAeHh4uz2P8+PG0bt36tzziy8rhcGK1Xqj39TeqDAb1N/DTPKqPJ4Ipo0cZM4tgupNXWlZFyup00jMvbrkXFxXE0w/FYfb2uCZW2wg/zhZe+Q9R2rf0p7DQVi+fss2LKKaMHmXMLIKpdp4IpuZRnR5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53dsAWry+nXK58aOvZKY5xO52WP18fHtbIyMzM5efKkst3gpdq9ezf//e9/+dOf/kRWVhYA//nPf/jPf/6jjHnnnXd455132LdvH56enr/Z56XSGspdWY2hgZ/mUX08EUwZPcqYWQTTHby5q9I5cMLiciwjy8K7q9KZ8EiXa2KNG9iRren5Vzw/dkCHBvuVZV5EM2X0KGNmEUy180QwNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7Lohi1YORwOcnNzGTlyJOnp6ZjNZmJiYoCL/ZsuJ39/fwD+9a9/kZ2djcViISYmRinwFBcX4+XlhdFo5Pz58zz22GPs2bMHDw8POnfujN1uV1ZWXbhQs9IoLS2NBx98kKysLMLDw/H1rdn2yMPDw8XPc889x+HDh7HZbMTFxXHq1Ckcjosv7N+S6YknnuChhx5i2bJlbNy4UbnWbDaTkpKirP5avHgx77//Ps2aNWPXrl3k5ORgt9vR6/VMmjRJ8VYfaT2s6qox7IeqeVQfTwRTRo8yZhbBdBfvdIGN/VmWOscdTtifZeG8tZxmQT7XxHxycAfeW3Pgsscb8p4k07yIZMroUcbMIphq54lgah7V6VHGzCKYMnqUMbMIpoweZcwsgql2ngim5lGdHmXMLIIpq0fZdMMWrKCmZ9XBgwd58sknOXnyJKtWrcJkMuHtfXEPydGjR5OXl8emTZsAMBqN6HQ6duzYQb9+/YiPjyc1NbXONoLe3t7o9Xp+/vlnHn/8cUpLS1m6dCkeHh5Kn6ha5ebmEh0dzYQJE9i0aRO7du1yOe/j44OHhwdbt24lMTGR1q1bM3/+fKqrq9HrXV/c/ytTdHQ0hw4dYuPGjTz88MNER0fz3nvvYbVa2bp1K7fddhsAt912Gw6Hg//7v//j4YcfZsqUKTz22GM4HA6ysrLq3Pe3SuthdXU1hv1QNY/q44lgyuhRxswimA3lHT9TetXztkrHNb+PDExoy8CEtryzcjf7jp2n803BjB8W3xCbLpJhXq4HU0aPMmYWwVQ7TwRT8ygHTwRT8ygHTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KItu2IKVp6cnlZWVxMTEkJKSgtlsplevXmzduhWj8WJsh8OB3W5Xvvbx8cHpdHLrrbeyf/9+tmzZQkxMDBcuXKC0tFRZgVV7bbdu3UhNTcVoNNKzZ0/S0tKUbfxqxzZr1gyDwcCbb75JeHg47du35/Dhw8p5g8FAVVUVPXv2ZPPmzdhsNjp37syuXbtctuX7rZn+85//8MADD/D6668DcPToUdauXcsHH3zA+PHjMRgMVFdX89JLLzFq1Cief/555dqmTZtSVFRU7+eu9bC6vAwG9e+HqnlUH08EU0aPMmYWwXQXz8fj6tvimk36evecGjsgVvFYX8alkmleRDJl9Hi2qIyScjt+XgZCAtzzDxW1ZxbBVDsParYzzTlvo1WImQ6RQQ3myTgvIphq54lgah7V6VHGzCKYMnpsDJnd/R4I6n+OjWFeNI/q9ChjZhFMWT3eCPLz03pYYTAYCAoKYunSpcqxvLw8+vTpQ2VlpXJsyZIlLtdVV1cDMGLECAYMGKAcT0xM5NChQ3h5eQE1vaU8PT1ZuHChMsbhcNChQweqqqqAmkIVQKdOnXj33XeVcW+99RaHDx9Wtg6sqKgA4NVXX6Vly5bKuN69e1NWdrGR/G/JlJOTw4kTJ1yKUDNnzsRut7N27Vry8/MJDw9n27Zt5ObmMmrUKGXc4cOHGThw4NUe62+Stj/nldUY9kPVPKqPJ4Ipo0cZM4tgNpQX4u9NRIiZU+fqFpRahpoJ9vNSXc8pGeblejBl8FhaVsW8tRku2152ahNE0uCOmL3qv93ypVJb5uvBVCMvv/AC/1i8k9KyauWYr7eRv4++lZCAa9vW9HKScV5EMNXOE8HUPMrBE8HUPMrBcwdT9HsgqP85qnFeRPNEMGX0KGNmEUxZPcqiRluwOn78ONOnT2fPnj2YzWYGDx7MX/7yF0wmE1CzdV5RURFWq5Xa/k5bt24FalYc3XXXXVgsFmJjY5k8eTJdunQBUFYq/fzzz2zYsIG0tDSMRiPl5eXY7XbKy8vx8vJCp9NRUVHBypUrWb58OVlZWQQGBuJ0OpX+T2fOnAEgKyuLN954g7Vr12Kz2ZSiV22Pq9pVVGvXrmXnzp3s2bMHb29vioqKlLGXZlq8eDHLli0jLy+PwMBAACV3ZmYmAJ999hlvvPEGZ8+eJSwsTOmFVVpasxXT3r17CQgIID09nVGjRik9rADuv/9+902UJk2aNGnS9CtdrlgFkHO24auiNGn6PTVvbQYHTlhcjh04YeH9NRlMeKTL72NKkxD9+hd1AKVl1by2aCf/GZ/wO7nSpEmTJk2axEt7D9SkSZMmTSLVKAtWxcXFjB49msjISGbPnk1+fj4zZ86kvLycKVOmAFBZWYmXlxfJyckkJSWRn5/PrFmz8PT0ZN++fUyaNIn27dszYcIEhg0bxqZNm2jZsiUXLlzAYDCwYsUKgoODeeKJJ/juu+/Yt2+fcu/agpXZbGbq1Kn07t2bvn37snjxYgAKCgqUsVBTRMrOzmbUqFGcPXuWdevWAZCfnw/UFKJ8fX2ZPXs2rVu35sknn+Szzz7DYrG4rLCqrKzEaDTyj3/8gwceeMDlnrWsgwcPKv/fr18/AgIC2LJlC7t37wYuFqx++OEHrFYrEydOZMiQIWzevJns7GwAwsPDGzQ/DWlwf6OqMTTw0zyqjyeCKaNHGTOLYLqLt3Tjoaue//jbowy/t3292GrNLJKpeVSPx9MFNpeVVbVyOGF/loXz1nKaBdX/r47VmFk0U628fcfP1/lFXa1Ky6o5mF1IXFTTerFlnBcRTLXzRDA1j+r0KGNmEUwZPao1s8j3QFD/c1TrvIjkiWDK6FHGzCKYsnqUTY2yYLVy5UpsNhtz5swhICAAqCn6vPLKKyQlJREWFoZOp2Po0KEcPnyY5ORkzGYzDz30EIsXL6Z9+/aMGTMGgKioKPbs2cOCBQuYNm2acg+n04nT6SQlJYXY2FgeeughVq9ezcGDBxW+j48PRqORXbt2sXfvXu677z6Xws+lCgoKYvHixYSHhzNhwgT+/e9/s337doYOHQrUrOwyGAwUFxeTkpJCfHw84eHhbN26lfz8fOWeJpOJJk2a8PXXX2M2mxk5ciTLli0jIyMDgCZNmgA1WxsuX76cJk2aEBUVxQMPPMDnn39OZmYm8fHxNGnSBIfDgdPpZPny5TgcDuLi4khPT+fjjz/m4Ycfrtfc6PU6AgPN9bpWBjWGBn6aR/XxRDDdycs9V8qRg/mEB5sJD/F1G1fNmUUx1ezxqx0nST92jpvbhtCve+t6c47kFF/1/KHsoga/j8g0L6J4Ipg3usfjZ0qvet5W6WjQa3v34bMc/vEkMa2D6No+tN6cX+tGnxcRvDxLzlXP5xZcIOGWVg26h4zzIoKpdp4Ipoyf8UQw1c4TwdQ8ysFrKPN6vAeC+p+j2ublevBEMGX0KGNmEUxZPcqiRlmw2rJlCz169FCKVQD9+/dn6tSpbN26lcTERPz8/DCZTKSmpipjtm/fzqJFi+jQoYNybNmyZcyYMYNNmzYB4Ofnh91up127dspKKICPPvqI1atXs2fPHu666y6aNGlCdnY2kyZNUopftT7Onz/PqVOn8Pf3B2qKX+vWrVO+PnHiBP/+9785evSock+bzcadd97J3LlzFdbMmTPZunWrksnX15eioiJmzpxJv379lHEff/wxZ8+epbKykoiICAA+/PBDoqKilDGffPIJn3/+udLrKiYmhrS0NFasWMHEiRMZPnw4Pj4+TJ48WdlWsD5yOJxYrRfqff2NKoNB/Q38NI/q44lgupNXWlZFyup00jMvriiIiwri6YfiMHvXv1eLmjOLYqrZY1ZeMa+m/kwt4vvducz55Bemju1OZHO/a+a1a+nPiTMlVzwf0yqAwsL6bQ0o07yI4olgyuLRx0N31fNmk75er+18ywVe+fCnOn0ipj3WndDAhq3YkmFeRPDCg67+j88WTX1U83NMBFNGj7Jlbiyf8UQw1c4TwdQ8qtOjWjOLfA8E9T9Htc6LSJ4IpoweZcwsgimrxxtBfn7ev3nVWaMsWGVmZtZZAeTn50dISIhSbImKiqpTeDlw4AAAXbt2dTkeHR3NokWLKC8vV4o8oaGuf7malZWFyWRSVk+Fhoayf/9+l6KQ0+nk/Pnzisfbb78dvV6Pt7e3UqyqPQdw7tw5xWtVVRUtWrRwuWdubi4eHh7K+JCQEIqKimjTpo0ypqSkBJvNhtPpJCcnR/GTmZnp4m379u0AdOvWDYC2bdsCNX2zDAYDf/rTn1izZg2AUtSqr7SGcldWY2jgp3lUH08E0x28uavS6/Rqyciy8O6qdLf0alFjZtFMNXq8tFilMB3wyoc/Mf+Fu6+ZN6xvO776+dQVz//x7rYNfgYyzItongjmje4xxN+bTm2COHDCgsN58bheBx0igwj286oX+9fFKqjZcmfawp/c0ifiRp8XEbwOrYPw9TZedkskX28jsa0CVfdzTARTRo+yZG5sn/FEMNXOE8HUPMrBayjzerwHgvqfo9rm5XrwRDBl9ChjZhFMWT3KItUVrI4fP8706dPZs2cPZrOZwYMH85e//AWTyaSMsVqt+Pm5/lW30+nEbrezdOlSFi9eTFBQEPv373cZW9vHadOmTcyYMQMPDw/uueceunXrhtPppLi4mPj4eHQ6HSdPnuTBBx8kKyuL5s2bY7VaCQ4OVvpSdejQgW+//ZZly5YxefJkbDYbbdq0wWq1AjX9q0wmEyEhIRQWFjJ27Fglk7+/P0FBQZSU1PyF+R133AHAzz//zH333UdeXh6tWrUiJycHPz8/5Z5RUVEcPXqUF154gePHj1NWVsbkyZPR6XQu/oODg3nppZd48cUXKSsrIywsjNzcXIKCgpRC1R133IHRaGTRokUEBQURHx+P2VyzTU379vXrHVIrrYdVXTWG/VA1j+rjiWC6iyeyV4taM4tkqtXj93tO1SlW1crugG37T5PQpcXlB1xFfxnambc/2XfZ4w15D5FlXkTyRDBl8picGMfcX61K6NimZlVCfV7bWq8k9fKmPdadaQsvv/JNTT/HRDBl9ChT5sb0GU8EU+08EUzNozo9qjmzqPdAd3psLDwRTM2jOj3KmFkEU1aPsklVBavi4mJGjx5NZGQks2fPJj8/n5kzZ1JeXs6UKVOueu38+fMpKCigS5cu/OUvfyE1NZXNmzczbtw4xo8fT35+Pj/88AMAZ86c4c0336S8vJy//e1vysoiAE9PT8xmMzk5OXTt2pXnnnuOTz75hMLCQpo2vfgLge7duwOwefNmHn30UXx8fPjwww8xmUwuK5Tatm1LWloaR48eJTk5mV27dvHdd98RGRmpFKyaNWsGwMGDB+nTpw9Dhgxh0aJFVFRUuNwzPj6ejRs3cvLkSaKiosjIyGDOnDnce++9fPnll8q4nj17snbtWvr370+nTp1ISUnB4XAQGxurjAkODsbLy4vS0lJuueUW4uLiWL58OVCzkqu+0npYXV2NYT9UzaP6eCKYDeWJ7tUC6st8PZhq85h5+spb9wEcy7MyuE+7a+b2vb0NfW9vw4K1+/nl8Fm6tA9l3IOd6muzjm70ebkePBFMGTwGBsLryb3JO1dK3nlbg/u+aL2S1MsLDDSzYvpA9hw+y6GTFq23mEqZaueJYMr4GU8EU+08EUzNoxw8dzBFvweC+p+jGudFNE8EU0aPMmYWwZTVoyxSVcFq5cqV2Gw25syZo/SnstvtvPLKKyQlJREWFgbUbP9XW+wBqKio4P3338fHx4du3brRo0cPbrnlFvr27Ut+fj7JycmYzWZatWrF0aNH+ec//0lMTAwA7733HocPH0an07n0nPLy8uL06dO89dZbxMbG0qtXL3bt2kW7du2UMQAtW7bkv//9L0ajkfvuu4+vvvoKQGGVl5crX7/zzjuEh4fzhz/8gTVr1hAYGKhk0Ol0+Pn5sW/fPrZv366slDp+/LjCqt0KMDo6moyMDAAGDRrEXXfdxZdffqmM++c//0n37t2ZP38+GzduxOl00rNnT3bt2oXdbsdgMLBr1y5KS0vp2LEje/bs4dtvvyUoKAiAjIwMysrK8Pa+9m8srYfV5dUY9kPVPKqPJ4LpLp6oXi2g3swimWeLyigpt+PnZSAkwD0fatzhMap5E76/yvmbwv0atEf90LuiGfdgJ6zWsgZxaiXja0fzqE6Pvp4Gbo0Na/BrW+uVpG4eQHTzJnRtH6ran2MimDJ6lClzY/qMJ4Kpdp4IpuZRnR4bQ2Z3vweC+p9jY5gXzaM6PcqYWQRTVo83ghptD6stW7bQo0cPpVgF0L9/f6ZOncrWrVtJTEwE6van2r17N6Wlpeh0OqVvk8lkYsCAAWzatIm9e/cCMHbsWI4ePepyzzVr1nDzzTdjMpnw8vKisrKSCxcuEBoayubNm5VxX3/9NVu3biU4OBiAEydOADBkyBCSkpKUcY888gi//PKL4qO2p9Xy5cuVgpLVauWzzz5Tvs7JycHpdBIZGcnHH3+ssObNm8ebb75Jq1atlNwATzzxBFarlcmTJ/Pss8/yxRdf4OHhQcuWLZVrhw4dyvHjx1m6dCnz58/n0KFD/PTTTzgcDgwGA1lZWQBK4QsgPz8fgNOnT/Piiy/y1ltvXXW+riRtf84rqzHsh6p5VB9PBLOhPFG9WtzpUTTPHczSsirmrc1w2XqnU5sgkgZ3xOxV/6bml6ohHu+IC2fRhkOX3RbQoIeenZpre9SrhKl5vDF5Wq+kxsETwdQ8ysETwZTxM54Iptp5IpiaRzl4IpgyepQxswimjB5lzCyCKatHWaSqzRQzMzOVokyt/Pz8CAkJcSlQJSQksG3bNqVfVO05vV5Pr169lHHR0dHk5eUpq5yKi4sxGo1s2LBBGVNdXY3T6VRWO2VnZ+N0Ojl79qxSlAKUe0VERCjjPDw8XIpaADabDYPBoIwrKipCp9MpK6+gZnWWTqdTtt6r9X/kyBHlPgAFBQUAREZGAjWruSIjI122/wNYv349PXr0cOnzNW/ePFJTU3n11Vfx8PBg0aJF/OlPf8LDo+YXoHfeeSd9+vQhJCSEadOmMW/ePAYMGABAYmIiycnJaNKkSdPVlDS4Ix0ig1yOdYisKbZo+m2atzajTlPzAycsvL8m4wpXXH+9PPpWfv1HMAZ9zXFNmjSJ199H34qvt+vfmPl6G/m79j2oSZMmQdI+42nSpEmTJk2aNGn6vaSqFVZWqxU/P786x/39/SkuLla+HjZsGEuWLCE5OZmkpCS2b9+uHK/dNhDggw8+wOl0ctttt+Hr60tlZSWxsbEsXLiQoKAg2rVrx4oVK7Db7UrxqPY+fn5+DBw4EIAWLVooWxDWbptntVoJDAxk9+7ddOzYEU9PT2XLQYPBoHgoLS2ldevWTJkyhalTpxIcHIyPjw8Gg4Hw8HCXezqdTnr06IFer6dly5bk5eUBYDRenKY//vGPzJo1iy+++AKAUaNGkZWVxdKlS5Ux69at48033wRg8uTJAHTs2JEBAwZgsVgICgqivLwch8NBaWkp06ZNc3nezz//vJKzPmpok80bUY2hgZ/mUX08EUx38vx9PXlhRDznisqwunE7OzVndidTZFNzd3kEiG4RwIcv9iNtXx5HThXTLsKfOzqHN4hZK7XPtVpfOyJ5IpgyenQnr3mwL3P/ehcHTljIPmejVYi5zi+S6yNtXtwjzaN7pHaPsmVuLJ/xRDDVzhPB1Dyq06OMmUUw1c4TwdQ8qtOjjJlFMGX1KJtUVbD6rfL392fRokW89tprJCcno9fr0ev1TJo0SRlTXFzMqVOnAJg+fToVFRW8/PLLXLhwgWeeeYaFCxdisViIjY2lc+fOeHl5udyjpKSEtm3bcvLkSXJzc5WeVbVyOBwUFhbSvHlzdDodZ86c4ciRIwQHB7sU15xOJydOnKB9+/acPn2agoIC8vPzCQ4OdlkRBTV9rFq3bk1OTg4nT550KVTVZvrwww+JjIykoKCAkpISjh07RkJCAl27dlXGffTRR3WeWUZGBn/605+YMWMGiYmJ2Gw2Tp8+jdFoZNy4cXh6erJ48WJKS0uZP38+f/vb365xVmqk1+sa3IT3RlZjaOCneVQfTwTTnTxR3/NqzuwO5vVoag7uyz3ozrZu4VxOap9rtb12rgdPBFNGj+7k9Qo00+t/D7tmafOiTqbmUQ6eCKaMn/FEMNXOE8HUPMrBE8GU0aOMmUUwZfQoY2YRTFk9yiJVFaz8/PyUlUyXqri4WOn3VKvo6GhSU1MBWLZsGa+++qpLUWnlypUYDAYcDgf33HMPXl5ezJ07l8zMTP7whz+49J0aNmyYwvf2rnkx3X///UoPp8rKSvr160d+fr4yrqCggKqqKhYsWKBsY5iWlsa4ceNcvOr1eoKDg1m7dq1y7K9//SsbNmxQxjkcNftZPvHEE8pWfEVFRSQkJAAo41auXInNZuPzzz/n22+/ZfLkybzwwgv861//Ij8/X1lddu7cOR544AFllVVtnqysLNq2bav4OnLkCCkpKdx9990ADB48mPvuu4/FixeTnJyMr6/vlSfrCnI4nFitF675uhtdBoP6G/hpHtXHE8GU0aOIzOu2ZnEou4gOrQMY2LNNvRgim5pD43iOavcoY2YRTBk9ish8tqiMEjevdNDmpeGS0WNGloWc8+5b7Qfqf46NYV4ag0fttaPOedE8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9XgjyM/P+zevOlNVwSoqKsqlVxXUrHQ6d+5cnd5Wv74OICsri5iYGAC2bNlCs2bNsNvtyuqpm2++mdzcXLZu3UpiYiJQswIqKytL6X11/vx5AJo3b67wTSYTnTp1Ij8/X7lXUVERAE2bNlXG9erVCw8PD8zmmr9Eq6yspLq6uk6xrU+fPnz++ecEBAQAcPbsWaCmR1WtAgICiIyM5MiRI8rxLVu20KNHD+U6gH79+jFr1iwlU05ODidOnOD55593uecDDzzA7NmzyczMJC4ujmPHjgEQGxvrck+o6euVn59fr4JVzfXaN+OV1Bga+Gke1ccTwZTRozt4B05Y+NfKX5Sv048X8NG3x3lheBdiWl3bL16uR1NzUOdzFM1UO08EU/N44/JKy6qYtzbDZQvRTm1qesmYvTwaalGbF5Uy1egxv/AC/1i8k9KyauVYbT+1kID6b2F7qdT+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08maSqglVCQgLvvfeeSy+rL7/8Er1erxSUanX8+HGmT5/Onj178PHxwcPDg88//1wpWB0/fpyqqioGDRqkXNO3b1/Wr1/P8uXL+c9//oPFYiEiIoKioiLuvPNOAHJycgDYsWMHzz77LGlpaXh4eKDX11QAg4ODASgvL0en0/HOO++wc+dOsrKyCAsLo7q6miZNmgCQnZ2N0+nk5MmTvPrqq2zcuBGbzaYwQkNDgZril8FgYP369axevZo9e/ZgNpu5cOECRqNR2TowMzOThx9+mE8++URZ/fXkk0/i5+enFPpq/z8sLIwXX3yRr7/+mqqqKmX1lYdHzS82WrRoAcBdd9112bmo7a+lSZMmTZou6tJi1aWatfwXFk66+5p5SYM78v4a119Ea03NNWnSdKnmrc3gwAmLy7EDJyy8vyaDCY90+X1MaZJSvy44AJSWVfPaop38Z3zC7+RKU2OQ9trRpEmTJk2aNGnS9FulqoLVsGHDWLJkCcnJySQlJZGfn8+sWbMYNmyYUnABGDFiBLt37+aWW25h9uzZ5Ofn88orr7BgwQJCQkJo164dRUVFeHh4MG7cOOW6++67jxdeeIH09HSGDBlCaGgoCxcuxGAwEBgYCIDVasVoNLJ//35ycnJISkoiIyODr776CqjZntDLy4uysjKaN2/OsmXL6NatG3/+859Zvnw5TqcTu92ujIWaVVwrV65k5MiRACxZsgSAiooK5Z6BgYF89913hIeHk5yczJYtW/jpp5/Q6S5uGWW1Wvnll1+YP38+nTp1oqCggKCgII4fP86+fftc7jlhwgRKSkoYNmwYubm5fPHFFwCEhIQA0KlTJ0JDQ5XVXQC+vr5UVFRw//33K1sj1kdGo9ZU7tdqDA38NI/q44lgyujRXbw1P2Re9fyGH08y6I5r2x5QVFNzUO9zFMlUO08EU/OoTo/u4p0usLkUtGvlcML+LAvnreU0C6rf6gRtXtwjWTzuO36+TsGhVqVl1RzMLiQuqullz/8Wqf05qnVeRPLcxdReO+qcF5E8EUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsklVBSt/f38WLVrEa6+9RnJyMmazmSFDhvDcc8+5jDtz5gxOp5M5c+a4bGM3bdo05s+fT3FxMU6nk4cffthlmz2Hw4HD4cDX15f169djNBrp378/P/74IwsWLGDatGku9wkMDGTOnDmEh4dz11138f3333Pw4EGleGa32wkNDSUzM5P09HTi4+MpLy8nNzfXhVNVVUWbNm1YuXIlZrOZhx56iE8//ZTt27czdOhQhWUymdDr9bzzzju0adOGrl27smfPHpf+VDt37gRg//79APz8888uX9cqOzsbk8nE8uXLiYiIYPDgwXz22Wf89NNPdOvWDYPBwMCBA1m8eDFBQUEUFRVRVVWFp6en0kerPtLrdcKa894IagwN/DSP6uOJYMrkMfdcKUcO5hMebCY8pH5bnQIcOVV81fOHcooYVc+ffyJ/bqp1XkQy1c4TwdQ83pi842dKr3reVulo8M8PmebFXe8Hl5NaM7uLmWfJuer53IILJNzSqt78Wqn9OaptXq4Hr6FM7bUjhieCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFqipYAURHR5OamnrVMc2aNaN9+/YuvZwGDBjAtGnTmDBhAomJifTo0UPZmq9Wu3fvxul0cu+99zJjxgzl+IwZM9i0aRMAfn5+VFdX065dO9atW6eM+eijj/j+++/Zs2cPd911F02aNCE7O5tJkyYxZswYZVz//v2xWCycOnVK6V1Vu8Kq9usTJ07w6aefcvToUeWepaWlJCQkMHfuXIU1c+ZM9uzZo/Sn8vX1paioiHfffZd+/fop42655RYuXLhAZWWlcg+z2cyuXbuUFVpbt27ls88+48CBA8p1ZrMZT09P0tLSACgrK+Pee+9lyZIlTJky5apzcCU5HE6s1gv1uvZGlsGg/gZ+mkf18UQwZfJYWlZFyup00jMvrk6Iiwri6YfiMHtfe9+XdhH+pB8vuOL5mJYBFBba6uVVpnkRyVQ7TwRT86hOj+7i+XjornrebNJrP3d+g9z9fiDCoyieu5jhQVf/B3eLpj71fi2C+p+jWudFJM9dTO21o855EckTwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfV4I8jPz/s3rzpTXcHqt6i2l9Ol8vPzIyQkROnhFBUVpfx3rWqLNV27dnU5Hh0dzaJFiygvLycqKgq42F+qVllZWZhMJrKzs5Xz+/fvV8ZDTWHq/Pnzisfbb78dvV6Pt7e3UkiqPQdw7tw5xWtVVZXSV6pWubm5eHh4KONDQkIoKiqiTZuLW06VlJRgs9lwOp3k5OQofkJCQly2E8zMzESv15Ofn+9yj/Lycm6//XasViuRkZH4+vpy8uRJGiKtodyV1Rga+Gke1ccTwZTB49xV6XX6vmRkWXh3VXq9+r4M7BHJfzdfeVvA/re3bnB+GeblejDVzhPB1DzemLwQf286tQniwAkLDufF43pdTb+7YD8v7efOb5C73w8uJ7VldjezQ+sgfL2Nl93azdfbSGyrQLf4VftzVNu8XA9eQ5naa0cMTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIsaZcHKarVSVVXF2LFj2bNnD2azmcGDB+Pn56f0cEpISOC9997DarXi5+cH1KywAjh16hR33XUXFouF2NhY+vTpg9PppLi4mPj4eHQ6Hfn5+Tz77LOkpaVhNBqprq4mKChI4Xfo0IFvv/2WHTt28K9//YusrCwCAwOxWq1ATS8pk8mkFJneeOMN1q5di81mw2w2ExAQQGlpzTYvd9xxBwDHjh1TMnl7e1NSUuKSqXXr1hw9epTx48dz5swZTCYTwcHB6HQ6F/9eXl7k5+czYsQIMjIyKCsro3PnzjRr1oySkhLlObZq1Yp7772X/fv3Y7PZOHXqFBUVFfj41K8XQq20HlZ11Rj2Q9U8qo8ngimLR1F9XyY/Gs+Mpbsve7whP/tkmRfRTLXzRDA1j+r06E5ecmIcc3+1Oqhjm5rVQdrPnf8tkX3A3OVRJM+dzGmPdWfawp9cCg++3kamPda9wZ//1f4c1TwvonjuZGqvHXXOiyieCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2dQoC1ZOp5NPPvmEDh06MHv2bPLz85k5cyZ6/cUXwrBhw1iyZAnJyckkJSWRn5/PDz/8gE6nY+HChUycOJH27dszYcIE3nrrLeU6T09P/Pz8OHr0KEVFRTzxxBN89913pKen4+FxccuQ7t27A/DBBx/Qu3dv+vbty+LFi+t47dChA9999x1Llixh1KhRnD17lnXr1uHl5YXTWfOnss2aNUOn07Ft2zYiIyN58skn+eyzzygsLKSqqkphtW/fnq+//hqr1crjjz/O+fPnWb58eZ17tmjRguPHj5Ofn090dDT79+8nIyODvn37cuTIEQBmz57NL7/8QlpaGgMGDCA8PJz169eTl5fHgQMHqKqqcsn7W6X1sLq6GsN+qJpH9fFEMG90j6L6vvQMNLOua0s++eYIew6fpWv7UIb2bVdfm3V0o8/L9WKqnSeCqXm8cXmBgfB6cm/yzpWSd97m9v5LN/q8XI8+YKCuzKKYgYFmVkwfyJ7DZzl00kJM6yC6tg/93xdeg9T+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdQoC1Ymk4mKigrmzJmj9LGy2+1MmTIFo7Emkr+/P4sWLeK1114jOTkZs9lMly5d2LFjB6NGjVL6TkVFRVFYWIjT6VS27WvSpAnFxcU4nU5SUlKIjY1l0qRJzJgxQykyNW3aFLjYK2rv3r3cd9995Ofnk5aWprDCwsIACAoKYvHixYSHh/PSSy/xxhtvYDKZlEyenp5UVFRQXFxMSkoK8fHxPPDAA8yePVvJ1L59ewCaN29OSkoKZrOZ4cOH89///pcLFy4o92zbti0VFRXo9XoOHjwI1PTD2rx5szKmQ4cOLF68GKPRyNdff01YWBi33347VVVVrFu3jl27dnH77bdf89xoPawuL4NB/fuhah4bzsvIspBz3karEDMdIoMazAP1ZxbBdAdPZN8XgPu6tWRo33ZYrWUN4tRKlnkRzVQ7TwRT86hOjyIy+3oauDU2TPu5c40S/X6gxsyimdHNm9C1fajbXoug/ufYGOalMXjUXjvqnBfNozo9yphZBFPtPBFMzaM6PcqYWQRTVo83gm74HlYGgwFfX1+lWAXQu3dvACorK5Vj0dHRpKamKl8vWLCAHTt20KlTJ+XYsmXLSExM5NChQ3h5eSnHPT092bp1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWlqasq1eRUWFcp+WLVsq4+bPn09ZWZnytdFopEmTJqSlpSnH8vLymD17tpKpQ4cOADzxxBP069dPGffNN99QVlam8KOioti+fTs7duxg9erVTJ48mTvuuIPU1FTatatZCdC3b186dOiAj48PKSkpCuuNN94AUApz9ZG2P+eV1Rj2Q9U8XrvyCy/wj8U762xz8vfRtxIS0LAtNmultszXg9kQ3vXo+9JQj9eDJ4Ipo0cZM4tgyuhRxswimNr7gTqZMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNcqCld1up6ioyKU/VW1x6dJVS79W7UqlAwcOMGDAAACqqqrIycnBbrdz88034+vrS0VFBRUVFZw4cYLIyEgAfvzxR5xOp7JN3pkzZwDYs2ePSz8sg8EAwIULNauMPD09Afjzn//MiRMn8PDw4M4778Risbh4tdvtWCwWBg4cSHZ2NuHh4XTt2tUlU8uWLYmMjOTf//43U6dOxWazERcXx+nTp4mIiFDGJSQkMHfuXBITEzl69CgAr7/+OgcOHODxxx9X7nnTTTexbNkyunfvTnFxMY899hiffPIJRqORW2+99donRpMmSfXrYhVAaVk1ry3ayX/GJ/xOrjQlDe7I+2syXHqXdIgMImlwx9/RlSZNmjRput7S3g80adKkSZMmTZo0adKkqXGoURasKisr8fLyculPNWvWLPz9/amuvvhL49GjR5OXl8emTZuAmiKSwWBg0aJFhISE0K5dOxYvXozVagVg+vTpVFRU8NJLL2EymXj22WeZMGECZWVlzJo1i6CgIHS6mm1FiouLASgoKKBr166MGjWKTz75hMzMTJfzVVVV6PV6Dh06xIgRI/Dx8WHhwoU4nU6X1WAVFRU4HA6Ki4v585//zK5du1i9ejU+Pj4umZo1a8aPP/5Inz59uOWWW5gzZw5Op5Px48crY6KiovDw8ODYsWP06tWL77//ns8//5yAgADuvfdeZdzatWvx9fVV8i9cuBCA559/vl79q2rV0Ma5N6IaQwM/zWP9tO/4+TrFqlqVllVzMLuQuKim9earMbNoprt4/r6evDAinnNFZVjL7fh5GQgJcM8ewmrNLJIpo0cZM4tgyuhRxswimNr7gXukeVQfTwRT86hOjzJmFsGU0aOMmUUw1c4TwdQ8qtOjjJlFMGX1KJsaZcFKp9MxdOhQDh8+rPSnGjJkCFu2bHEZ53A4sNvtLsf0ej3PPPMMCxcuxGKxEBQUhNFopLq6mu7duxMWFsabb76JxWKhWbNmTJgwAaPRyD333EN+fr6yVV7t1oC33XYbhYWF/Pvf/6Z58+b4+fkpBSCAU6dO4XA4SExMZP369dhsNtq2bcuBAweoXY0FNVvwBQcHc9NNNzFnzhzMZjNRUVFkZ2crY86cOcPPP//M4MGD+eWXX9iyZQt2ux2j0ais+AJYuXIlRqORfv36KcW6tm3bcuzYMQoKCpS+WtHR0fzyyy/o9XrsdjsBAQHodDp+/vlnxo0bpxTnrkV6vc4tjatvVDWGBn6ax2tTniXnqudzCy6QcEurevNrpabMopm550o5cjCf8GAz4SG+DeaJ/Jkk07yI4olgqp0ngulOnru/B2sl23MUwRPBlMmj9n6geVQbTwRTNo+7D5/l8I8niWkdRNf2oW7jqjmzKKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpWfnx8mk8mlPxXA559/jr+/v/L1kiVL6lxXVVXFmDFjSEpKAmDEiBEEBARw+PBh5dpmzZphsVjo378/8+fPV64fNmwYzZs3B2oKUQD3338/w4cPV8ZMnjyZVatWKayCggIMBgMzZsxQxjidTm6++WalYFVZWYnT6aRt27Yumb755huefvpp9PqaimxaWhoOh4OXXnqJX375haeffpqnn36ao0ePsmXLFp544gkAtmzZQs+ePfnXv/7FqlWrmDx5Mu+++y733nsvW7duJTExEafTycmTJxkzZgyTJk2iffv2/N///R9dunRh+PDhbN26lTvuuOMaZwYcDidW64Vrvu5Gl8Gg/gZ+msf68cKDrv4G1KKpj9bM/TeqtKyKlNXppGde3LIpLiqIpx+Kw+xd/1Wfas4siieCKaNH2TJr34Pq5YlgyuhRxswimDJ6lDGzCKY7efmWC7zy4U91eshOe6w7oYH17yGr5syimJpHdXqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ42yYBUVFaVsvVerkpISzp07R1RU1FWvA8jKyiImJgaAzMxMWrRoQXh4OF5eXkDNaqQjR4643MPpdJKVlUWvXr0AsNlqfgFdVlbmcg9v75pfXrdo0UI5b7fbKS4uVopYOp0OLy8vpWBVu4rq16xmzZoBKL4yMzNp2rQpWVlZjB8/nj/84Q+MHz+et956i08//VS5LjMzk4cfftiF5evrS0hIiJLJYrFgsViU51CrDh06uHiqj7SGcldWY2jgp3m8NnVoHYSvt/Gy2wL6ehuJbRWoNXP/jZq7Kp0DJywuxzKyLLy7Kp0Jj3RpoDt1ZhbNE8GU0aMsmbXvQfXzRDBl9ChjZhFMGT3KmFkE0x28XxeroGY77mkLf3JLD1k1ZhbN1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiRlmwSkhIICUlhZEjR5Keno7ZbCYmJga9Xq8UlC6n+Ph4fH19ef3118nOzsZisVBRUUFFRQUPPvigC3/NmjWsXbuWZcuW4eHhQefOnSkqKuLOO+8Eavph6XQ6/vvf/7JmzRqysrIIDw9Xtgz09b24hY5Op+O5557j8OHD2Gw24uLiKCkpoWXLlsDFflcHDx50yRQdHQ2gjLNarXh6ejJmzBgA1qxZQ3p6Oh07dlQYtbyvv/6atWvXcv78eQBefPFFvL29lXFBQUF4eHjwt7/9jb/97W8AzJo1i1mzZgEXC271kdbDqq4aw36omsf6a9pj3Zm28PJ/4dnQ7we1ZnY383SBjf1ZljrHHU7Yn2XhvLWcZkH1+2tZtWYWyRPBlNGjTJm170F180QwZfQoY2YRTBk9yphZBNNdPJE9ZNWaWSRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBasBAwbw9ttvc/DgQZ588klOnjzJqlWraNeundKfCWD06NHk5eUpfZw8PT3p0qULaWlp9OvXj/j4eGbNmoXNZqN///7KdXfffTc6nY5z587x5JNPUlpaytKlSwkKCqJz587KOL1ez/Hjx4mOjmbChAls2rSJXbt2uXg1Go00adJE2YqvdevWzJ8/H4fDQXBwsMvYysrKOpkAZWVWeXk5p0+fxuFwMGjQILp06cK2bdtYvXp1nX5TDoeDfv36kZ+fz9dff82BAwdcVqDpdDq6d+/O1q1bGTRoEOvWraNHjx4cPXoUX19fevToUa+50XpYXV2NYT9UzeO1KzDQzIrpA9lz+CyHTlrcvoc+qC+zu5nHz5Re9byt0tHgny1qy3w9eCKYMnqUIbP2Pdg4eCKYMnqUMbMIpoweZcwsgtlQ3vXoIau2zNeDqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFjbJgtX79ekwmEzExMaSkpGA2m+nVqxfbt28nPz9fKVo5HA7sdrtyXUVFBXv27KF79+7s37+fLVu2YDAYMJlMbNiwgdtuuw2Ab7/9FqfTSXh4OKmpqRiNRnr27ElaWhr79u2jc+fO+Pn5YbfbiYyMxGAw8Oabb/L/2Hvv8KiK9v//tTVlk00nEFpIgCR0kB5AaUoTfBAU9YvYUdGPCioWQFRQHhRFsSGCFBUriiAWBCEU6UgnlBAgJCSBlE02ZTe7+/sjvzPu0h4pRxfPvK7LC7N79n3ue2bOzJwp98TFxVG7dm1OnDghJpmCgoI4evQonTt3ZvXq1djtdlq0aMHmzZspLa0eHFKuNRqNPj61adOGbdu2iV1bLpcLt7t6K+GSJUtYsmTJOdMnLCyM06dP8+mnn4rPcnNzgepwiAp9+vRh3bp17Ny5E4Ddu3fTs2dPnnjiCcxm8yXljTzD6twYDP4fD1XaePl6ibVCaZ1UA5ut/LLOrfLG330GyCsqp6TChTXQQEz4pTXIwSbdBb+3mPWXnKZaLItqaGrRRi35LJ9B/9ZTQ1OLNmrRZzU0tWijFn1WQ/NK6al5hqy/+qym5pXoy5+JFtPR3/XU0NSijVr0WQ1NLdqoRZ/V0NSqjf8GrNZ/+RlWaWlppKam8t5774nPbDab2DE0ePBgABYsWODzu23btmG323nuuedISUkB4I477iAvL4+0tDRx3YoVKwB45JFHhJbH46Fjx46sXr2aFi1aUK9e9YqtHj16iJB6AA8//DAnTpzg1KlT1KlTR5w/NX36dDEx5fF4RFhAgHr16qHT6YiPj+eTTz4RWkuXLmXbtm3k5+cDkJKSwrJly3jttdd8QhgOHDiQAwcO4HA4MJvNJCQkEB4ezrvvviuuycrKomfPnrRt21Z8ZjRWZ//nn39Op06deOihh7j33nv/ajacFxmf8/xcDfFQpY3+p6eG5pXQKy138uH3e3zCiDVrEMnIQU2xBJouSismLIhmDSLZm1mA2/Pn53odNImPJNoaeNn2aiVf1NbUoo1a8Fk+g1eHnhqaWrRRiz6roalFG7Xosxqal6v3d5wh628+q6F5Jfvy50ML6Xi16amhqUUbteizGppatFGLPquhqVUbtcJVGUwxIyNDhLZTsFqtxMTEkJGRccHfAT6/7datGydPnuTEiRNUVFQAsGPHDnQ6nc95WDqdjgYNGgiNqKjqmNgnT54U1zidTnbv3u1zr/DwcAAKCwvFdb///jtOpxO7vXrVl9lsxmAwYLPZfOxdtWoVBoNB/LZGjeoQY8eP/xkCobi4mMzMTDwej/i8W7durF+/nqKiIpxOJ1lZWTzxxBMA55yQGjBgAAAzZ85k5syZeO9Kk0gkkvPx4fd72JtZ4PPZ3swCZi7ec0l6Iwc1pUl8pM9nTeKrX5olEon6yGdQIpFIJFcL40e0JSTId/1tSJCR8SPanucXkjO50n15iUQikUgkkivBVbnDymazYbVaz/o8LCyM4uLiC/7ObDYTEBAgPhs2bBgfffQRDoeD5cuX43A4yMnJITEx8azzsPbt20dwcPWB4+Xl5QAsX76cefPm0bhxYxYuXCh2TSl2REVFYTKZePTRRxk9ejTl5eVMnTqVhIQEn4knj8dDXl4eEydOpG/fvmzcuJGlS5cSGxsrtPT66vnFOXPmULNmTWJjY5k5cyYWi4XKykpx3bBhw1iwYAH9+vXj9OnTQPWE24033ugzWed0OunatauYXCspKeGNN95gy5YtzJo16y/lxbkwGq/KeVBVuRoO8JM2+p+eGppXSi/ntN1nNaaC2wO7jxRwylZBzcjgi9IMCwng6TvakF9Uju0KhiXRUr6oqalFG7Xms3wG/VdPDU0t2qhFn9XQ1KKNWvRZDc0rqVcrOoT3xlzH3swCjuXbqRdjOWvRxaXgzz5fSU01+vJX2kY19dTQ9Hc9NTS1aKMWfVZDU4s2atFnNTS1aqPWuConrK4kYWFhjBo1ildffZXnn3+ekJAQQkJCaNWqlc91brdbnCXlzc0338ycOXMoKCggJSWF6dOn88ADD4jv9Xo9NWvWJD4+ntGjR2M0Gunduzd16tTxCWmo0+kYOHAgW7du5euvvyYuLo5JkyYxd+7cs+45YMAApk2bht1up02bNrzwwgs89thjPj7NmzeP559/npKSEgICAggODmbz5s1kZ2cTFxcHwLFjx1izZo2Pj1AdcjEvL0/s6LoY9HrdZR/K/m/majjAT9rof3pqaF6u3uGTpRf83u5wX3JdoFYdooV8+Ts0tWij1nyWz6D/6qmhqUUbteizGppatFGLPquheSX1UiMspP7vyy4af/b5Smiq2Zf35t+ejlejnhqaWrRRiz6roalFG7XosxqaWrVRK1yVE1ZWq1XsZPKmuLhYnBN1vt85HA4qKyt9dlkFBwej0+nYtGkTgYGBDBkyROygUliwYAHDhg0T+sq/vXr14sUXXxTXZWZm+nxvtVqpqKhgxowZPnpvvvmmj61Wq5XY2FimTp3qc91bb7111j1HjBjhc89169b5fA+QmJjI559/Lv4uLy/n+uuv56OPPmLChAninsHBwfz2228idOEbb7zBzJkz+f333xk0aNB50/J8uN0ebLayi/7dvx2Dwf8P8JM2+p+eGpp3TvpV/P/8cb0uWSfYpLvg9xazXh52/Q/qqaGpRRu16LMamlq0UYs+q6Hp73pqaEob/dNGLfqshqYWbfRXn9Xsy4N20vFq0lNDU4s2atFnNTS1aKMWfVZDU6s2/huwWoP+8q6zq3LCKiEh4ayzqkpKSsjPzz/rbKszfwdw5MgRkpOTxecZGRnExcURGBgorjtw4IDPbz0eD0eOHBHnWtWrVw+TyURGRgZdu3b10fK+V0JCAqdOnTprMu3Mc7j+ik/Kv2f+NiMjA5PJRN26dc/re1BQEImJiRw9elR8lpaWRqdOncRkFUCnTp2YOXMme/fuvaQJK0AeKHcBroYD/KSN/qd3JTTvmbLyrM+Uyas5z/S4aL2YsCCaNYhkb2YBbq/Np3pd9Zk30dZAedi1H+ipoalFG7XosxqaWrRRiz6roenvempoShu1oaeGprRRG3qXq/l39OUv18a/Q08NTX/XU0NTizZq0Wc1NLVooxZ9VkNTqzZqhatywqpbt268//77DB8+nF27dmGxWEhOTkav14sJpXPRpk0bQkJCeOWVVzh27BgFBQUkJyeTnZ1Nr169fPS///577rnnHrZv347JZKJFixYUFRVx7bXXAmA2m+nQoQNffPEF33zzDUeOHCEuLo7w8HASExOpU6cOAF26dEGv1/PEE0+Qnp6O3W6nefPm7Nq1i1GjRl2UT3Xr1iU+Pp4PP/yQ1157jezsbBo0aEBVVRWdOnXCbDYDMGPGDN55551zpkFiYqL4/z179lBeXk5SUtJZ1zmdzr+aHRKJRKOMHNSUmYv3+MS/bxIfychBTf9BqyQSiUQikUgkEsn/QvblJRKJRCKR+CNX5YRVv379mD59Ovv27ePBBx/k6NGjLFq0iMaNGxMbGyuuGzFiBNnZ2SxfvhyAgIAAWrVqxdq1a+nVqxdt2rRh7ty55Ofn07dvX/G7Hj16YDKZ2Lx5M/fddx+lpaV88sknREZG0qJFC5/rXnrpJRITExk9ejTLly9n69at3HXXXeIa5fyqdevWMXjwYOrXr8+sWbOorKxkwIABF+1Tly5d+OSTT7jmmmsYNmwYX331FYcPH/a5Z2lpKf369SMpKQmr1Up+fj7ffPMNubm53HHHHeK68vJy9Ho9Tz/9NABbtmxhxYoVhIaGXtaEldEoD5U7k6vhAD9po//pXSlN7zCA5+KeKSsvKTxgWEgAT9/RhvyicmwVLqyBBmLCLz9Gr1byRU09NTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0NSijf7ss1p9eW/btJCOV4ueGppatFGLPquhqUUbteizGppatVFr6Dwej+d/X+ZfzJw5k/fee0/sVFJ2I/3++++sWrVKTPAMHz6cEydOsHJldSisyspKOnXqRNOmTc/aYdW7d28mTpwIwNKlSxkzZgypqals374do9FIixYtWLt2LV999ZWYtLr33nvJzs7GaDSKHVZhYWHY7XaWLVsGwMmTJ+nevTsdO3YUO6xatGjBzp07eeSRR7j//vsvyqcbbrgBq9VKcXGxzw6rOnXqMGvWLABWrFjB3LlzOXDgAGVlZcTGxqLT6SgsLGT9+vViJ5aysyowMBC32018fDxDhw7liy++oE2bNrz88ssXnTcejwed7sLxsCUSyV/nRH4pOafsxEVbiIsJuSSNG8cs/p/XLJl2aSFA1eBK+CyRSCQSiUQikUgkkquLXzYeZdehfFo2iqFX+/r/tDkSiUQi+Qe4KndYpaWlkZqaynvvvSc+s9lstG/fXuxkAliwYIHP77Zt24bdbue5554jJSVFfP7qq6+KXViKfnJyMnPmzBGfeTweOnbsyOrVq2nRogUOh4ONGzfy5JNP+uxuWrFiBQ8//DBZWVnUqVOHtWvX4vF4mD59us8ZVo888ghpaWliwuqv+HT8+HEyMzN59913fUIYzp8/n6lTp+JwODCbzfTs2ZOePXuK7ysrK+ncuTP9+vUTk1VQfa6V0+lkx44dPuk0a9YsH1svBrfbg81Wdkm//TdjMPj/AX7SRv/SKy138v63u9iV8WeIjuYJkTz8n+ZYgkyXa+pZ+MOhymr5rMWyqIamFm3Uos9qaGrRRi36rIamv+upoSlt9E8bteizGppatFGLPquhqUUbtebzkexiXpq7GUVm1bYTvPPVH7xwd3via1n9wkY19NTQlDb6p41a9FkNTa3a+G/Aag36y7vOrsoJq4yMDG6++Wafz6xWKzExMWRkZFzwdwAJCQk+nycmJjJv3jwqKioIDAwkIyPjrGt0Oh0NGjQQGseOHcPpdJ5TS7lXnTp1yMjIICoq6qwJoMTERL7++uuL8kn5t0GDBmdpOZ1Ojh8/7nNGlcJvv/1GaWmpTwhCgIiICLKzs+nYsSM2m434+HiGDRtGfn7+WX5dDPJAufNzNRzgJ230D733Fu1ib2aBz2d7jhTw7qJdjL611UVpzXmmB/dMWXnB7/3hUOUr6fO50GJZVENTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1pozb01NCUNmpDTw3NK6HnPVkldN3w4sebmPV0j8vSBv/0WW1NaaM29NTQlDb6p56WuConrGw2G1br2SsswsLCKC4uFn8fPnyYSZMmsX37diwWC/Xr18dsNhMQEODzO6vVisfjobi4mMDAQGw2G1lZWVx33XUUFBSQkpLCs88+66Ov/Dtr1iwee+wxTCYTvXv35sEHH/T53mazYTAYGDhwoAgb+MADD4iwft4+bdu2jdTUVOx2O61bt2b8+PHnvOeECRPYs2cPFouFQYMG0bt3b5/vAb766is++ugjsrOzMZvNhIeH065dOx+/ExISyMnJETuzCgsLmTx5MjqdjtTU1EvImWrkGVZnczXEQ5U2+o9ezmm7z+HHCm4P7D5SwClbBTUjgy/rHt5c7jN7JfxW02ctlkU1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KKNWvRZDU0t2qgln1dtzzprskrB5Yb1u3Po1qr2JWn7q89qakob/dNGLfqshqZWbdQaV+WE1V+huLiYESNGEB8fz4wZM8jNzeXFF1+kqqrqL/32+PHjjB07lqSkJD799FPuuecemjVrhtFYnWSKTk5ODtOmTaOiooL//ve/ZGdn+2jl5+eTm5vLddddx3PPPceGDRt4/vnnGThwoM91LpeLXbt28cILLxAbG8sHH3zAXXfdhcViEdeUl5eLeys+TZkyhRMnTvho/fDDD4wfP54HH3yQ5s2b88gjj+DxeNi5cyetWrUCoKCggD179mAwGIiLi6N///4sW7aMgoICDAYDkZGRF5fg/z96vY6ICMv/vlCjWK1X5hBbtfTU0NSijZerd/hk6QW/tzvcF/2cKWdUeZ9ndaXPrbocv9Xw+Uy0WBbV0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NKWN2tBTQ/Ny9TJySi74/aFsG4O6N76se/ibz3+HprRRG3pqaEob/VNPS1yVE1ZWq5WSkrMbtOLiYhF67/PPP8dut/POO+8QHh4OwO+//87SpUs5duwY9erVE7+z2WzodDrCwsKorKzEZrPRoEEDcTbVNddcQ58+fTh8+DDt27cHYNeuXQCMHDmSHj16CLvuvfdeAGHHwYMHMRqNvPTSSwB07NiR48ePs3r1anHNyZMn8Xg8dOjQgSFDhgDQvHlzunfvjt1uF9dt27YNgOeee46WLVsC1RNdEydO9Lnn22+/Tf/+/Xn88cf56quvcLvdJCUl8e677zJr1iyRFoWFhcyZM4dZs2bxwQcfEBwcjE6no6qqimPHjp0zvOD/Qp5hdW4MBv+PhyptvHy9NTuyOXiimMZ1wujSIu6SdYJNugt+bzHrL/nMqU9fuF74fDnnVnlzJdJRTZ+1WBbV0NSijVr0Ga5cXaagxXTUos9qaPq7nhqa0kb/tFENn/ccKeD4KTv1Yiw0ib+0xYLeaDFf1ND0dz01NKWN/mmjlnxOqBXKqgt83zDO6jfvglrKFzU1tWijFn1WQ1OrNv4bsFr/5WdYJSQknHVWVUlJic/ZS2lpaXTq1ElMVgH069ePpUuXsmTJEkaNGiU+z8jIIC4ujsDAQH7//Xfcbjdu958Fymw206tXLxYsWCD09+3bB0BZ2Z+TM6mpqVgsFux2OwkJCTgcDk6ePInL5fKZTFPsUCad1q5dC4Be/2emhYeH0759e1atWiXuefjwYaB615ZC3759mTBhAgaDgbp163L8+HEyMzN56qmnAFi6dCkJCQkMGTKEqVOnivB/TqcTgGbNmjF37lwAPB4PrVq1oqKi4q9nxjmQ8TnPz9UQD1XaePEcybHxyoItPgfEzl66l3Ej2lI/9uzwpf+LmLAgmjWIZG9mAW7Pn5/rddAkPpJoa+Bl++9v+aJFn/8OPTU0tWijVny+0nWZGjaqrenvempoatFGLfqshqYWbbwSermFZUyev4XS8j8jf4QEGRk/oi0x4Zcf8lmL+aKGpr/rqaEpbdSGnhqal6vXpXkc837cf86wgAY9dG5Wy+/eBbWQL3+HphZt1KLPamhq1UatcFUGU+zWrRvr16/HZrOJz3766Sf0er04eykjI4PIyEjuvvtuWrVqRWpqKhs3bkSn05GWliZ+53Q6+eWXX+jWrZv4HcCRI0fo0qULLVq04NZbbyU/Px+3203Hjh0BOHr0KJGRkbz33nu0bt2a9u3bM27cOEwmEyEhIdSpU4djx47hcrnQ6XTceOONNG/enBtuuIHdu3cD0LBhQ3HP4OBg1q1bR6dOnWjVqhV33303paXVIbIUn06cOEFISAgvvPCC8OmDDz7AZDIRFxeH2WwW9n/22WekpqayYcMG4uLiSExMxOl0cvz4cQBq1KhBQEAA3bt3p0WLFvTs2ZNbb70Vp9NJSEiIzw40iURyYbwHeBVcbpg0b8sla44c1PSs1bZN4iMZOajpJWv6O1r0WSLxJ9SoyyQSiUTiy5mTVQCl5VW8LOtaiUSiYcaNaMuZC+8N+urPJRKJRKItrsodVsOGDWPBggWMGjWKkSNHkpuby9SpUxk2bBixsbFAdXjAb775BpPJxLvvvivOezKbzezcuZN58+bRuHFjFi5cSFFRkQjlZ7PZ0Ov1YofVQw89xIoVK1i2bBmAmMgpLi6msrISu91Ot27daN68OXPnzsVut5OUlCSugeqdSwUFBdx5553k5uby3nvvAYjzpJR7ulwuwsLCGDFiBN9++y2ZmZnodDofn4xGI6Wlpdx4443UqFGD+fPn43Q6adSokc898/LyqFOnDqdOnSIlJQWr1erz/Zo1a9DpdHg8HiorK8nKyiIrKwuA//u//8NkMl1y/hiNV+U8qKpcDQf4SRsvDbUOiA0LCeDpO9qQX1SOrcKFNdBATPjlx7/153zRos9q6amhqUUbteTz1XTYtRqa/q6nhqYWbdSiz2poatHGK6W38/CpsyarFErLq9h3rJDmCVGXpK3FfFFD09/11NCUNvqnjVrzObF2OB8/14u1O7M5kHVlQ1N7/+tvempoShv900Yt+qyGplZt1BpX5YRVWFgY8+bN4+WXX2bUqFFYLBaGDBnCE088Ia7xeDx4PB4iIyPp2rUrUH3e04QJE0hOTmbOnDkUFBSQkpLC7NmzqVu3LgBVVVW43W7+3//7f+Tl5fHhhx9iMBgwm804HA6hX1ZWht1u54UXXmDhwoX8/vvvREREYLfbCQgI8LG3YcOGdOvWje+++w673U5kZCQFBQUEBQUJrdLSUh599FG2bNnC+++/T1BQEHq9Ho/nz9hYHo8Ht9vN888/zyeffEJ2djaRkZHk5uZisVh87vnxxx/z4IMPAhAREXFWGt588818+eWXOJ1OTCYTOp0Og8FAeXm5mCC7FPR6HRERlv99ocbYlp5H+oajJNePpHVSjSumq9VDBq+U5on8Ug7syyUu2kJcTMglaah9QKxaz5M/54sWfVZLTw1NLdqoBZ+vxsOu1dD0dz01NLVooxZ9VkNTizZerl52wfELfn/idBndrrm8SBdazBc1NP1dTw1NaaN/6V2Jd9Xz4a8+A9x4baMrpuWNP/uslqa0URt6amhKG/1TT0tclRNWAImJieLspXOh1+upXbs2v/zyi/hMOe8pNjaWxYsXn/N3yg6kgQMHijOmAO6++27Wr18vzqGqqqoiNDSU22+/ndtvvx2onlBq2rQp5eXlAAQHV8cgv+aaaxg7dixjx44FYOHChUycOFGcI6WENhw+fDiPPPKIuGefPn3ErifFp7i4OO68807uvPNO8dt27doJDcW+kpISvvnmG7Hb68zvv/nmG4KDg1mxYgVmsxmAAwcOcOONN7JgwQL69Olz3rS9EG63B5ut7H9fqBFyC8p48eNNZ8Won3hPe2pEXHqMeoNBm4cMXinN0nIn73+7i10ZBeKz5gmRPPyf5liCLm53oZoHxIK28uVq0VNDU9ronzZqyeer6bBrNTT9XU8NTS3aqEWf1dDUoo1XSi8u8sIDF7WjgmVd+w9r+rueGprSRv+y8Uq+q6plo1p6amj6u54amtJG/7RRiz6roalVG/8NWK1Bf3nX2VU7YXUp6HS6K3rt+a7x3hV1uXZcrFZCQgJQfS6W8v/K3yaTSewkO3ToEAkJCWKyCuCPP/4Aqie7Lgd5oNyfnDlZBdXhPibO2cTbj3W7bH2tHjJ4uZrvLdrF3swCn8/2HCng3UW7GH1rq4vS+jsOiAVt5MvVpqeGprRRG3pqaF6Jw67n/LD/vN/742HXamj6u54amlq0UYs+q6GpRRsvV69J/UhCgoznDAsYEmQkpV6ErGv9RNPf9dTQlDb6h96VfFc9H/7m89+h6e96amhKG7Whp4amtNE/9bTEv3bCyu12c+LECYYPH86uXbuwWCwkJycDiPOczoWyA+n111/n2LFjFBQUkJyczJEjR4DqHViBgYEYjUZOnTrFPffcw/bt2zGZTLRo0QKXyyV2VpWVVe80Wrt2LQMHDuTIkSPExcURElK9nVs5J0qx54knniA9PR273U7z5s3JysoSZ2n9VZ/q1q2L1Wpl7Nix4rfz588nKCiITp06iQmqmJgYli5dSvfu3Tl9+jSBgYGUlZURGBjoM9F1KcgzrKqRMer908ac03Z2Hyk463O3B3YfKeCUrYKakRe3++2Fu9vz4sebfCatDPrqzy/3edBKvlxNempoShv900Yt+Tz9y+0X/P69b3fyf0NbXZK2ltJRLT01NLVooxZ9VkNTizZeSb2J97Rn4pxzR2C4nH6jFvNFDU1/11NDU9roPzaq8a56pW1UU08NTX/XU0NT2uifNmrRZzU0tWqj1vjXTlhB9ZlV+/bt48EHH+To0aMsWrQIs9kszo4CGDFiBNnZ2SxfvhwAo9GITqdj48aN9OrVizZt2jB37lwRUk9BOWNq8+bN3HfffZSWlvLJJ59gMpkIDQ31ufbEiRMkJiYyevRoli9fztatW32+Dw4OxmQysW7dOgYPHkz9+vWZNWsWVVVV6PW+hfuv+NSkSRM2bNjAgAEDWLp0KQ6Hg5MnT9K/f39xTf/+/Vm0aBHBwcEMHjyYb7/9FpvNhtPppEePHpec5vIMqz+RMerV0btczcMnSy/4vd3hvugyHBFh4bvXBvHrpqPsOJhPy0Yx9Gpf/5JtPBf/9ny5GvXU0JQ2akNPDc3L1TucfeHd1QdP2C67fddCOqqtp4amFm3Uos9qaGrRxiuhFxFhYeGk/mxPz2P/0QJ5xq2favq7nhqa0sZ/Xk+Nd9Vz4U8+/12a/q6nhqa0URt6amhKG/1TT0v8ayesAgICcDgcJCcn8/7772OxWEhNTWXdunUYjX+67Xa7cblc4u/g4GA8Hg9t27Zl9+7dpKWlkZycTFlZGaWlpWIHlvLbdu3aMXfuXIxGI507d2bt2rUijJ9ybc2aNTEYDEybNo24uDiSkpJIT08X3xsMBpxOJ507d2b16tXY7XZatGjB1q1bCQgIuGif5s2bx1dffcWsWbOEfkJCAtu3/7l6OjU1lY8//ph3332XmTNn4nQ6SUpK4tChQ2RnZ19yusszrP5Exqi/8jbmFZVTUuHCGmggJvzSKv5g04VDclrM+kvOl/bJNejVvj42W/llnVvlzdWQL/5uoxZ9VkNTizZqyefEuFC2HTh93u8b1ZZnWP2TempoatFGLfqshqYWbVTD58RaobROqnHF+o1azBc1NP1dTw1NaaP/2Kjmuyr4p89qa/q7nhqa0kb/tFGLPquhqVUb/w3IM6yonqSJjIzkk08+EZ9lZ2fTvXt3HA6H+GzBggU+v6uqqg7NcMcdd9CvXz/x+eDBg9m/fz+BgYFA9dlSAQEBzJkzR1zjdrtp0qQJTqcTqJ6oAmjWrBnvvvuuuO7NN98kPT1dhA6srKwE4KWXXhJnTAF07dqV8vLyi/YJYOjQoQwdOpSkpCTuvvtuNm/ejN3u27Hp1KkTq1ev5o8//mDu3Ll07NiRtm3bCvsvFRmfsxoZo/7K6ZWWO/nw+z0+4RGaNYhk5KCmWAIv7uDZmLAgmjWIZG9mAW6vI+L0OmgSH0m0NVDmi59o+rueGprSRm3oqaF5uXqPDG7JPVNWnvf7h//TQtaNfqCnhqYWbdSiz2poatFGLfqshqYWbdSiz2po/ttt/DveVS/Xxr9DTw1Nf9dTQ1PaqA09NTSljf6ppyX+tcEUXS4XRUVFPqH81q1bByDOcToXyk6lvXv3is+cTicnTpzA5XJRUVEBgE6no7KykszMTHHdhg0b8Hg84myqkydPAvhcA3DgwAHgzzOulF1UGzZsENcUFxdTWFiI9+6vi/HJ4/GIybfdu3ezbt067rjjDp9rPvzwQ+bOncvkyZNJTExkypQp6PV6brrppvOmj+TiGD+iLSFBvvPCIUFGxo9o+w9ZdHXy4fd7zjp4dm9mATMX77kkvZGDmtIkPtLnsybx1RNgEolEolVG9Gl8UZ9LJBKJRCKRSK4s8l1VIpFIJFrnqt1hdfjwYSZNmsT27duxWCwMGjSIxx9/XEzcOBwOAgMDGTVqFCNHjiQ3N5epU6cSFhbGrl27uO666ygoKMBkMmGxWEhLSwOqJ5EMBgPz5s0jLS2NzMxMXC6XCPNXXFxMYGAgOp2OsLAw7rnnHuDPySmLxYJOpxPXAmRkZDBo0CCys7MpKysTE0nK9y6Xi9DQUKZMmcLHH3/M8ePHcbvdYlJMQfFp6NChlJeXc+rUKTweD8HBwUITYN++fUycOJE//vgDgGXLljFo0CCfc66WLFnCtGnTqFGjBk8//TRQPQnXpk0bn3teCpdzWPC/jVrRIbw35jr2ZhZwLN9OvRjLWZ3PS0FLhwyqcfBsWEgAT9/RhvyicmyXGWLQGy3li5qa/q6nhqa00T9t1JrPPdvWo2fbesz4+g8OZNloXMfKo0NaXbau1tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaq9a7qbZu/+aympr/rqaEpbfRPG7XosxqaWrVRa1yVE1bFxcWMGDGC+Ph4ZsyYQW5uLlOmTKGiooIJEyYA1ZMvQ4cOJT09nVGjRmGxWBgyZAjfffcdu3fv5plnniEpKYknnniCvLw8jh8/LsLx6fV6rFYrhw8fBqB27dqcPn2a0tI/D8DU6XQ0a9ZMnB8VEBBAvXr12L9/P6dOnfKxt1mzZuzevRudTkdUVBRms5msrCyxWwsgIiKC/Px8jh49il6vp379+hw7dkxMlCn3vOaaa0hLS8NoNGKxWKhVqxbp6emcPv3nuRM//PCDmKxSWLx4MatXr2bjxo3Anzuz8vLyxDUej4etW7dy7733snr16kvKG71ed0UOAf23kRphIVUFXS0cMqjmwbNqlVUt5MvfoenvempoShu1oaeG5pXUm3C/Gi2W9tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaqOa7irz6rqenvempoShu1oaeGprTRP/W0xFU5YfX5559jt9t55513CA8PB6p3Kb344ouMHDmS2NhYrFYrZrOZuXPnit9VVlby8ccf07JlS+666y4A0tLS6NOnD7Nnz2bixIlYrVacTienTp3ixx9/JCEhAYDJkyczf/58jh07JvTT09Np3bo1n3/+ubhHx44dOXHiBABhYWFAdUi+iRMncuuttwKwc+dOhg4dyvbt2xk6dChWq5XTp0+j0+lYt26d8On+++8nLS2N3Nxccc8dO3YwYMAApk2bJu7ZrFkzEWYQ4Mknn2TMmDHodDoWLVrEs88+y6233soPP/yAy+XCYDAwZcoU8vPzcTgc4hwvt9tNz549yc7OJicnh1q1al103rjdHmy2sov+3b8dg8H/D/C70pprdmRz8EQxjeuE0aVF3CXrqHnwrBbzRYs2atFnNTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0MwrKqdEhR0j/pyOV0O+SBv900Yt+qyGpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dlRNWaWlpdOrUSUzsAPTt25cXXniBdevWMXjwYBISEsjIyPD53dq1a/F4PHTt2lV8Zjab6d27N8uXLwcQE1Tx8fHi/6F615VOp2PDhg20a9eO+Ph4du3axf333y+u8Xg8OBwO7HY7WVlZ1KtXD4PBgMvlok+fPuI6ZQfWoUOHxD3tdjvdunXz8clgMAAIn2rXrs2uXbvo27evuKakpASn00l2djYOh0OERFTCEiokJyfzxRdfUFBQQExMDABVVVWEhIT4+Fi/fn2ys7N9dnZdLPJAufNzNRzgd7maR3JsvLJgC0qdvGrbCWYv3cu4EW2pH2u9aL2/4+BZLeSL2npqaPq7nhqa0kZt6KmhKW3Uhp4amlq0UYs+q6GpRRu16POV0Cwtd/Lh93t8wnw3a1B9Jo8l0HQlTPT7dPTHfFFbTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9ron3pa4qqcsMrIyKBnz57cfffdPmdYxcTEiEmqbt268cEHH2Cz2bBaqwfJly1bBlSfU6WcYZWSkkL79u3Jzs6moqKCNm3aoNfr8Xg8PProo6xduxaj0UhVVRWRkZFCPzk5mSVLlnD8+HEGDhzIkSNHiIiIwG63Cxvr1KlDzZo1yc3N5YMPPuD777/HbrdjsViwWq1iJ1aXLl0AsNvtwqegoCBKSkqwWCzing0bNmTXrl3s2rWL1157jezsbCIiItDpdLhcLo4fP05iYiIABw4cYNq0aWzatAmAd955h6CgICIiIkQ6hoaGsnz5cpKSknzSNzg4mLi4S98RI9E23pNVCi43TJq3hVlP97gkzZGDmjJzse9Lrjx4ViKRSCQSiUQi8X8+/H4PezMLfD7bm1nAzMV7GH1rq3/GKIlEIpFIJBKJX3JVTlgVFxezbNkymjRp4nOGlV6vp7i4GIBhw4axYMECRo0axciRI8nNzWX58uXodDrmz5/Pk08+SVJSEqNHjxbnPRUXFxMbG0toaChHjx6lrKyM+++/n99++41du3ZhMpmEfvPmzQH45JNP6Nq1Kz179mT+/Pk+NgI0btyYEydOsGDBAu68807y8vJYsmQJgYGBlJVVh86rWbMmOp2OrVu3Eh8fz4MPPsh3331HYWEhQUFBQqtVq1Z8++23fPDBBwwYMOCc99y/fz+vvvoqu3btombNmrRv355Vq1Zx+vRpDAYDhw8fJikpiS+++IITJ06g11dvxXO7q2cYdDodr7zyymXlj9EoD5U7k6vhAL8roblqe9ZZk1UKLjes351Dt1a1L1pXrYNntZIvauqpoenvempoShv900Yt+qyGphZt1KLPamj6u54amtJG/7RRiz5fKc2c03afRWcKbg/sPlLAKVsFNSOD/1EbryY9NTSljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJKyX03plnWE2YMIHy8nKg+vyoefPm8fLLLzNq1CgsFgtNmjRh+/bt3HPPPeIMq4SEBIqKisSEDYDRaBT3ef/990lJSeGZZ57h1VdfpaSkxOeakJAQtm7dyo4dO7jhhhsoLCxk5cqVQis4uLrzHRkZyfz584mLi+P5559n6tSpPvcExITb+++/T5s2bRgwYAAzZswQPnlr/frrr1gsFoYPH87atWvFGVbR0dFUVlZit9s5duwYubm5AEyaNImXX36ZX3/9laSkJBo2bEheXh5utxu9Xk9UVBS1atUiLy+P7777jj59+pwVVvCvoNfrVD0c9GrHnw/wO5FfyoF9ucRFW4iLCfnfPzgHGTklF/z+ULaNQd0bX5I2qHfwrD/ni1qaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShu1oaeGpr/ZePhk6QW/tzvcV6SP7+/p6G/58nfoqaGpRRu16LMamv6up4amtFEbempoShv9U09LXJUTVnq9nri4uLPOsJowYQI2m018lpiYyNy5c8XfkyZNYvv27fTs2VN89umnn3L33Xezfv16wsLCgOrJr9DQUNatWyeu83g8TJ069azJo/79+/PSSy+J6xYuXMjKlStxOp0Awp4lS5YIfYDPPvuMrKws8bfBYKB27dr88ssv4jObzcaMGTOEhsPhAOCRRx7hjjvuENeVl5dz4MABgoODiY6OZtiwYWzfvp1169axYsUKnn32WXr27MmUKVPE2VRt2rTBbrdjNBrZs2eP0Nq6dSu3334769atE6EKLwa324PNVnbRv/u3YzD47wF+peVO3v92F7sy/lz52Dwhkof/0xxL0MXFlE+oFcqqC3zfMM5KYaH90gzFv9NRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2+qeNWvT5SmkGmy68CNJi1sv3g39YU9ronzZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KqN/was1qC/vOvsqpywOhcXsxvor1x7vmuUCZ8rYcelap35uzOv6969O9HR0UyZMkWcT/Xee++h0+kYNGgQAAUFBZSXl6PT6ejYsSM2m434+Hhuv/12AI4dO/aX/TgTeaDc+fHHA/zeW7TrrJjye44U8O6iXRcdU75L8zjm/bj/nGEBDXro3KzWFfHfH9NRTT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1/szEmLIhmDSLZm1mA2+s1Vq+rPpM22hoo3w/8RFPaqA09NTS1aKMWfVZDU4s2atFnNTS1aqNWuConrNxuN9nZ2dhsNqxWKwA//fQTgPj7XCg7nFasWEGLFi0AcDqd7N+/H6g+AyowMBCj0cipU6fIzMwkPj4egN9//x2Xy0VOTg6tWrXCbDYDsH37dp97rF+/HgCTyeRjz/PPP8/u3bspKCigUaNGHD9+3CckoOLTgw8+yMaNGzGZTDRs2NBHQ7nnkiVL+PLLLzly5AhxcXFUVFQAiDOxwsLCmDt3LrfffjuLFi0Cqnd0vfbaa9StWxeoDitoMpkICwsT52Tl5eXx8ssvA1C79sWfM6Qgz7A6G3+Nh6pGTPkX7m7Pix9v8pm0MuirP7/csuGv6aiWnhqaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShv900Yt+nwlNUcNbs57Z0R0aNqgOqKDfD/45zWljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJK51Oh9lsZtSoUYwcOZLc3FymTp0qJl8URowYQXZ2NsuXLweqz50yGAzMmTOHyMhIGjduzMKFC7HbfUMQBAcHExYWxqOPPsro0aMpLy9nypQp6HQ6DAYDb731Fps2beLDDz/k4MGDTJw4kb59+7Jx40afkH6KltlsZvny5fTv35+UlBRmz55NVVUVBoPBxyeXy8WGDRu47777yM/P54svvsBoNPr4BPDHH3/QtGlTRo8ezffff09mZqbP96dPn2bo0KFUVlaSlJREeno6AQEBjB49GqgOY6jT6ejSpQu//fYbiYmJxMXF4XA4cDqdOBwO2rZte0l5I8+wujD+Fg9VjZjyEREWvnttEL9uOsqOg/m0bBRDr/b1L8fMs/C3dFRbTw1NLdqoRZ/V0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FND0x9tjIiAV0Z1JTu/lOxT9ss6M/d8+Hs6+mO+qK2nhqYWbdSiz2po+rueGprSRm3oqaEpbfRPPS1xVU5YhYWF0bNnT7Kyshg1ahQWi4UhQ4acdU6U2+3G5XKJv61WKy6Xi1GjRjFnzhwKCgpISUnhzjvv5KOPPhK/DQsLo1GjRhgMBkaPHo3RaKRu3brk5eXRvn17unbtSq1atfjwww8B2LhxI19//TVxcXE88cQTvPHGG0LLYrHgcDho0aIFGzZsYMWKFbRq1YqdO3f67LAKCgqitLSUpKQkZs2ahcVi4frrr+fnn3+msrJS2AVQq1YtSktLmTZtGg0aNKBx48YcOHBAfP/WW2+Js7bS09OBP3dfTZ48mf79+wMwffp05s6dy+LFi8nJyUGn09G6dWs2b97MyZMnSUxMvOi8kWdYnRuD4crGL80rKqekwoU10EBM+KVXgGrGlG+fXINe7etjs5VfVlx6b650Ovq7nhqaWrRRiz6roalFG68Gn/ccKeD4KTv1Yiw0iY+8AhZqMx39XU8NTS3aeDX4/N9Pt3Ikp4TEuFCeuv2aK2ChNtPR3/XU0LwabAwJMNA2JVa+H/iZprTRP23Uos9qaPq7nhqa0kb/tFGLPquhqVUb/w1Yrf/yM6wSEhIoLCxk7ty54rOSkhI+/vhjEhISxGcLFiw463cAPXv2ZNSoUeLzKVOmEBcXR2BgoLjuwIEDfPfdd+Ka22+/HaPRSHJyMgD16tXDaDRSVVXF/fffz+DBgwFYuXKlz730+uqMePrpp2nXrp3Qu/7668nJyRF/BwQEAPDFF1+Iz2w2Gz///DOlpdW7YOrUqQNA586deeWVV8R148aN48CBA+JeSpjCTZs2+UzgdevWTWgBBAYG8uCDD/Lggw/yzDPPsHv3bu644w42b97M5SDjc56fy41fWlru5MPv9/iE8WvWIJKRg5piCTRdtN7fEVNei3FlteizGpr+rqeGprRRG3pXQjO3sIzJ87dQWl4lPgsJMjJ+RFtiwi8ulOv50EI6Xm16amhq0UZ/9PnHDZl8tSpD/L0ro5A7J/3KsJ6JXN/uyuxU10I6Xm16amhKG7Whp4amtFEbempoatFGLfqshqYWbdSiz2poatVGrXBVBlPs1q0b69evx2azic9++ukn9Ho9qamp5/1dmzZtCAkJ4ccffxSfOZ1OfvnlF7p16+ajv3//fp9QewcOHMDpdHLttdcC1edJdezYEZPJREbGny+Xy5YtIzExUUwuKedPHTx4UFxTXFxMTk4ODodDnD9lNBopLy/38ennn38GwOPx+PyblZXl49fevXsBOH78OFA9SabX68UEHIDL5aKiogKn03ne9FHst1qt1KtX74LXSf4ZPvx+D3szC3w+25tZwMzFey5Zc+Sgpmetym8SXz0JJpFIJBL/5MzJKoDS8ipenrflH7JIIpFcDt6TVd58vuLw32yJRCKRSCQSiUQikfxzXJU7rIYNG8aCBQvOOsNq2LBhxMbGiuvOPMMqICCAkSNHMmPGDJ8zrAoKCkhPT6dVq1ZYLBZuvPFGEhMTfc6wKikpISEhgRYtWgj9hx56iLVr1zJnzhzmzp1LZGQkeXl5vPnmmz726vV6Jk+ezKuvvorRaCQ4OJjAwEAcDgfFxcUEBgZiMpkwGAx069YNp9NJWFgYdruduLg4ERKwuLgYqA5B2KpVK1wuFxEREZw6dcrn+zp16rB//366detGWVkZgYGBhIeHU1xcLM7NKi0tpU+fPhiNRoqKinA6nXg8Hg4ePMhzzz2HyXTxu3UULvfg3H8jV+LAvZzTdp+dVQpuD+w+UsApWwU1Iy9+VX1YSABP39GG/KJybFcgzKCCFg9C1KLPamj6u54amtJG/7TRX33eefjUWZNVCqXlVew7VkjzhKhL1tdKOl5NempoatFGf/V5yicXnmh+44vtPH3HpYcH1Eo6Xk16amhKG/3TRi36rIamFm3Uos9qaPq7nhqa0kb/tFGLPquhqVUbtcZVOWEVFhbGvHnzePnll33OsHriiSd8rjvzDCuA+++/H4/HI86waty4MQEBARgMBmbMmEFubi5TpkyhV69e2O12cYaVTqejX79+Plrbtm0DwGQyUVVVhc1mw2Qy0axZM3GNy+XC7XYTFhZGVVUVFRUV2Gw26tat67Obyul04nA4iI2NpbCwkLKyMioqKnxC+imYzWaCg4MpLi7GZrNhsVh8tCwWC3q9HpfLhclkwuFwcOLECRITEzl27BgA2dnZ2Gw2zGYzVVVVuN1uPB4PJpOJLl26XGLOgF6vIyLCcsm//7dyIr+UA/tyL+uA4cMnSy/4vd3hvqy0VyvftHgQohZ9VkPT3/XU0LySelei3jkXWktHNfQuVzO74PgFvz9xuoxu11z+Tul/ezpejXpqaGrRRn/zOfN/9PEyckquSD/t356OV6OeGppas/GXjUfZdSiflo1i6NX+yoTPBP/2WS1Nrdl4tfSVtZYvamn6u54amtJGbeipoSlt9E89LXFVTlgBJCYm+pxhdS7OPMMKQKfTMXLkSEaOHAnAzJkz+eCDD3jnnXcIDw8HqieZXnzxRX777TexY6tTp044HA6hU1lZycyZMwkODuaOO+7gySefxOFw0KdPH2bPns3EiRMBOHHiBABz584V51+tXbuWe++9F51OJyak7HY74eHhpKWliXuMGTOGX3/9lQYNGgCIcH633nor48aNA6CoqEiEKVS0Tpw4gdvt5tdffxU+ffHFF7zwwgtEREQA1buwNm7cSFBQ9cPzzDPPsHPnTk6fPs1nn33G+PHjL5i258Pt9mCzlV3Sb/+NlJY7ef/bXezK+HNnVPOESB7+T3MsQRe3iy3YpLvg9xaz/rIOLzYY/P+QQX+3UYs+q6Hp73pqaF5JvStZ76hlo1qa/q53pTTjIi/c8a0dFSzbg3+ZnhqaWrTRX32OrxnC3syi836fUCtUPtP/Mj01NLVm45HsYl6auxlFZtW2E7zz1R+8cHd74mtZ/cJGNfTU0NSajVdLX1lr+aKWpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dtRNWV4q0tDQ6deokJnYA+vbtywsvvMC6desYPHgwAAkJCT5nVW3bto3S0lJ0Oh0JCQlA9c6n3r17ixCEACdPnjzrnqmpqQQEBGA2m0VoQLvdTo0aNXyu69u3L0uXLiU6OhpAnKkVExMjrgkPDyc5OZk//vhD2KGECNTpdD5aEyZMEJNawcFnh47T6/XUq1ePvLy8/5FqF0YeKPcn7y3addaZU3uOFPDuol2MvrXVRWnFhAXRrEEkezMLcHv+/Fyvqz5zKtoaeEXS/mo4ZNDfbdSiz2po+rueGppXQu9K1jvnQivpqKbe5Wo2qR+JQQ/n6vsa9JBSL0K2B/9SPTU0tWijv/n85LA23DNl5Xm/H31ra/lM/0v11NDUio3ek1VC1w0vfryJWU/3uCxt8E+f1dbUio1XW19ZK/mitqa/66mhKW3Uhp4amtJG/9TTEpqfsMrIyODmm2/2+cxqtRITE+MzQdWtWzc++OADbDYbVqtVfKfX60lNTRXXJSYmMm/ePCoqKggMDKS4uBij0ciPP/4odlhVVVXh8XjEbqdjx47h8XjIy8sjMzOT+Ph4ABHmr06dOuI6k8nE6tWrxQ4xqN6dZTAYxHVFRUXodDp++eUXhg4dCoDH40Gn0/lMdp2Jy+Xi4MGDdO7c+eIT0gt5hlU1apw5NWpwc947YzVY0wbVq8EuN92vhpit/m6jFn1WQ9Pf9dTQvFJ6ap11dyVtVFPT3/WulGbOafs5J6ugerDucvIZtJOOV5OeGppatNGffb69V0M++/XQOT+Xfbx/n54amlqycdX2rAu2g+t359CtVe1L0vZXn9XU1JKNV1NfWUv5oqamv+upoSlt9E8bteizGppatVFraH7CSpmAOpOwsDCKi4vF38OGDWPBggWMGjWKkSNH8vvvv4vPlbCBAPPnz8fj8VBcXExgYCClpaU0adKEOXPmEBkZSePGjVm4cCEul0tMHin3qVOnDo8++iijR4+mvLyc6dOnAxAZGSlsjY6O5o8//mDixIn07duXjRs3cvDgQQwGg7ChtLSU5s2bM3XqVPR6PbGxscycORODwUBcXJy4rry8nNWrVwPVYQRPnjyJy+WiVq1aFBQUiPteDPIMqz9R48ypiAh4ZVRXsvNLyT5lv+LxtuHqiNnq7zZq0Wc1NP1dTw3Ny9VT+6w70EY6qq13uZp/Rz7Dvz8dr0Y9NTS1aKM/+nxb36bc1rcpE2auI/1YEUn1wnlpZOr//uFFoIV0vNr01NDUgo0ZOSUX/P5Qto1B3Rtf1j38zee/Q1MLNl6NfWUt5MvfoenvempoShu1oaeGprTRP/W0hOYnrP4qYWFhzJs3j5dffplRo0ah1+vR6/U888wzPtd5PJ6zfpucnEyvXr2YM2cOBQUFpKSk0KJFCwIDA32ue+655/j2228ZPXo0RqORbt26sXTpUp9rAgMDmTFjBtOnT+frr78mLi6OPn36sGLFCp/runfvTklJCdOmTcNut9OmTRtq166N2WwW15w+fZrHHnvsLHtfeOEFGjRoQIcOHS46neQZVn+i5plTIQEG2qbEYrOVX9aZBt5c6RirV1pPDU1/11NDU4s2aslnNesdLaWjWnpXSlOeaej/NmrRZzU0/V1PDc0nb2sj9GQf79+rp4amlmxMqBXKqgt83zDO6jf9HS3li5qaWuwraylf1NT0dz01NKWN/mmjFn1WQ1OrNv4bkGdYXQRWq5WSkrNXaBUXF4vznhQSExOZO3cuAJ9++ikvvfTSWRNUI0aM8Dkrymq1YrfbGTlypE8Yv2HDholrlH9NJhMzZswQ12RmZrJ06VIfrdLSUnr27EnPnj3FdW+++aaPrVarlfLycsaOHcvYsWPF5127dvW5rk6dOqSnp7N69WoefvhhHnjggXNOYF0sMj5nNX/HmVNajdnq7zZq0Wc1NP1dTw3Ny9WT9c7VoXe5mvJMQ/U0/V1PDU0t2qhFn9XQ1KKNWvRZDc3L1evSPI55P+4/71mOnZvV8rv+jhby5e/Q1GJfWQv58ndo+rueGprSRm3oqaEpbfRPPS2h+WCKCQkJPmdVAZSUlJCfn09CQsIFfwdw5MgRn88zMjKIi4sTu6fOpe/xeDhy5IjQqFevHiaT6azrlL+V6xISEjh16pRPqELlOm9bL8anP/74g8cee4ybbrrpikxWSXwZOagpTeJ9Qys2iY9k5KCm/5BFEonk346sd7SBzGeJRCKRaJlxI9py5iJdg776c4nkQsg+lEQikUgk/o3md1h169aNDz74wOcsq59++gm9Xk9q6vnjxrdp04aQkBB+/PFHkpOTAXA6nfzyyy9069bNR//7778nMzOT+Ph4AH7//XeKioq49tprATCbzXTo0IGff/6ZESNGiN8uW7aMxMRE6tSpA0CXLl3Q6/X88ssvDB06FKjeCbZ27Voefvjhi/bp0KFDjBw5ko4dO/Liiy9echpKzo8l0MToW1txylaB3eHGYtYTbQ383z+USCSSS0TWO9pA5rNEIpFItEz9WCuznu7B+t05HMq20TDOSudmtf5psyRXAbIPJZFIJBKJf6P5Cathw4axYMECRo0axciRI8nNzWXq1KkMGzaM2NhYcd2IESPIzs5m+fLlAAQEBDBy5EhmzJhBZGQkjRs3ZuHChRQVFXHvvfeK391www3MnDmTRx99lNGjR1NeXs7UqVO57rrraNGihbjuoYce4s4772TixIn07duXjRs3snTpUt58801xTc2aNRkyZAhTp05Fr9cTGxvLzJkzCQ0NZdiwYRfl0+nTp7n33nsJCAhgxIgR7N69W/w+JCSEhg0bXvnE1jA1I4OJiLBQWGiX20ElEsnfgqx3tIHMZ4lEIpFomW6tajOoe2PZDkouGtmHkkgkEonEP9H8hFVYWBjz5s3j5ZdfZtSoUVgsFoYMGcITTzzhc53b7cblcvl8dv/99+PxeJgzZw4FBQWkpKQwe/Zs6tatK64xmUx89NFHTJo0idGjR2M0GunduzfPPfecj1bbtm2ZMWMG06dP5+uvvyYuLo5JkybRt29fn+vGjRuHxWJh2rRp2O122rRpw8cff0xoaOhF+XTo0CFOnjwJwF133eVzj/bt27NgwYKLT0yJRCKRSCQSiUQikUgkEolEIpFIJJJLQPMTVgCJiYnMnTv3gtecawJHp9MxcuRIRo4cecHfxsbGMmPGjP9pR8+ePenZs+cFrzGbzYwdO5axY8de8Lr/5VOHDh1IT0//nzZJJBKJRCKRSCQSiUQikUgkEolEIpGojf5/XyKRSCQSiUQikUgkEolEIpFIJBKJRCKRqIecsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/ipywkkgkEolEIpFIJBKJRCKRSCQSiUQikfyjyAkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KnLCSSCQSiUQikUgkEolEIpFIJBKJRCKR/KPICSuJRCKRSCQSiUQikUgkEolEIpFIJBLJP4qcsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/is7j8Xj+aSMk/x48Hg9utyxS58Jg0ONyuf1WTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaM29NTQlDb6p96/Ab1eh06n+0vXygkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KDAkokUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8UOWElkUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8U4z9tgERyJTl8+DCTJk1i+/btWCwWBg0axOOPP47ZbL7g7zweD7NmzWL+/PmcOnUKnU5HaGgoQ4YM8fl9bm4ukyZNYu3atZhMJnr37s2zzz6LxWJh1qxZfPbZZxQUFBAXF4fL5SInJweDwYDL5SIsLIzevXuTn5/P+vXrMRqN1KxZk1OnTlFeXk7r1q0ZP348CQkJrFixggkTJnDq1CkAAgICAAgKCiI4OJjCwkKCg4OJiYnh8OHDOJ1O9Ho9gYGBOJ1OEhISeOKJJ+jevTuHDx/miSeeID09Xfhbp04datasyZYtW3j66aepVasWP/zwA2vXrqWiogIAnU6HXq8nPDycm266iZ49e/L111/z+++/c/LkSTweDwBWq5Unn3ySW2+9FY/Hw7Bhw/jjjz/OSuPY2Fjmzp1LQkICubm5PP/886xZs+as6/R6PSaTiZSUFHr37s2cOXM4ffr0WdfpdDo8Hg8BAQEkJiZy8uRJbDYbQUFBOBwOKisradiwIceOHSMuLo6bbrqJjRs3snnzZqqqqny0DAYDRqORa665hnbt2vHNN99w4sQJkeZdunShqKiInTt3otfr8Xg8VFZWijTQ6XTUrl2btm3bsn37dk6cOEFAQABOp5OAgACRzzab7ax7KwQHB1OrVi2ys7PR6XSUlZWddY3RaKR+/fpUVFSQn59PgwYNfPJ50qRJbN26lcrKyrN+Gx8fT2FhIcXFxeIzi8VCVVUVgYGBoiyHhIT4/G7IkCHs2rVL+FmnTh1ef/111qxZwzvvvHNOX4xGIz179uSVV17x0Tt8+DCjR49m//79Qs9gMNCsWTOeffZZWrVqBcDChQt54403sNlsProBAQHceuutPPXUU1RWVvLss8/y22+/UVVVhU6nE9coz5vNZsPhcJzTxubNmzN16lQsFgtPPvkkW7Zswe12C7u8y/7jjz/O2rVree655ygsLDxLq0aNGrz00kukpqby5ptv8tVXX1FaWgpUlx+lnNSpU4fi4mKKioowm82Ul5fjcrnOaZ/ZbMZgMIiyoNRJQ4cO5fHHH2fNmjU+dYT37+Li4ggMDOTo0aN4PB4cDgdutxuDwUDNmjUpKSmhqqqKrl27Mm7cOEpKSnzyRUGn0xEbG8stt9zC/fffzzfffHPOfPHm5ptvZv/+/ezbt0/cU/F95MiRDB48WNSV+fn56HQ6nE4nACaTiXbt2jF+/HgsFgtPPfUUW7duFfliMpmoqqoSz2BYWBht2rTh6NGjHDlyBIPBgNPpFGmamJjIU089RWpqKm+88QZffPEF5eXleDwegoKCcLvdVFZW8tBDD/Hdd99x+vRpTCYTZWVlIs/OxGis7jbp9XocDgd6vR6z2SzKXOvWrcnMzCQjIwOj0YjT6cTtdhMYGEibNm0YP348derUEfac+ZxHRESQlpaG2Wzm8OHDTJgwgW3btuHxeDCZTAwYMIDnn3/ep83Jz88HEHWLyWQCOKvNUdJGyZfWrVvz8ssvizbnueeeo6ioyCf/o6KiRDsKMHnyZL755huRZ1D9zHXs2JExY8aQlJQknvP09HQ8Hg8GgwG9Xk9YWBjx8fFkZWVRWFhITEwMBQUFonwnJSXx5ptvYrFYePnll1m1ahVVVVU+eWE0GuncuTMOh4Nt27bhcrlEfptMJgICAqisrBRtoPczWVJSclZeXnPNNdx33328++677Ny5U5Q1JY+Dg4Pp06cPzz77LO+++y5paWlkZWWJZ0pph7zrRqUe++qrr5g1axbHjh3z8cFsNjNs2DAef/xx3n33XXbs2MGuXbuorKwUekq7esstt/D444/z66+/nrPtSE5OFmmm9E2qqqoIDg6muLhYlHW9Xk9VVZXomxw7duyc5Ruq24XatWuLvknz5s0JDw9n48aNlJeXYzQaKS8vF2mk0+kICwujfv367Nu3T3ynPPtKHRgQEEB4eDg2m42SkhIaNGiAyWTi4MGD6PV68TwqZQ+q28WkpCTy8/PJycnBZDKJZ9hkMmGxWLDb7cTExBAaGkpeXp7ow1RVVWGxWIiKiiI3N5fS0lJCQkKw2+0iH9xu91llzLtcX3vttRQVFbFr1y6fPqV3/avT6QgPDycgIICCggLRLqempjJ58mQWLVok2iHFL4PBQPfu3RkxYgSvv/46O3bsEDbo9XqCgoKA6jqsQYMGbNq0ifz8fDwejyjvMTExANhsNho2bIjH4+HAgQO4XC6hFRgYSEREBLm5ubjdbkwmE8HBwZSUlGA0GtHr9VgsFuLi4igpKSE7O5tatWpRt25ddu/e7VMfKCh92vPRokULAgMD2bVrF1VVVT51hTcRERH/M8282yyHwyHyOygoCJ1OR0FBwXntALjvvvv44osvznr2vVm3bh0TJ05k5cqVwi+z2XxJde2hQ4fYvn07brdb5IHBYOCaa65h4sSJ561rofo5ufXWWxkzZgyATzoofarevXvz5JNPEhUVhdls5vTp0+j1elHmveta77rf7XbjcrlwOp0+da1iz5NPPnnOPidU9wFfeOEFH3t0Oh1ms5nKykruvfde9u3bx/bt2wkMDPTJF4vFQmxsLCdPnhR1ZGBgIHv37vV57hSbXnzxRVF/lpSU8Oyzz/rkC1Q/H02bNmXcuHG0atUKt9vNmDFj+Omnn3zq76CgIPr378+zzz5LVlYW9913n8g/b2rXrs2oUaO4+eab+fzzz/nll1/Ys2ePqD+90el0NGzYkEmTJpGdnc2PP/7I1q1bz3pHMZlM9OvXjwkTJoj3xs2bN4v2VyEqKooxY8Zw8803c/z4cSZNmsTevXvFs+59X+/0WbhwIW+//fZZ5d+73IaEhPj0372JjIzkySef5OabbxaflZSUMHnyZH744Qef+srj8WA0Gunbty8dOnTgq6++4vDhw9jtdtG+63Q6goODuf766xk+fDiDBg06R0mqJjAwEJfLJdqoM9HpdJhMJoKCgqioqCAgIACDwUBRUZFPedHr9URERDBo0CBGjRrFO++8w6JFi86ZbwozZ85k3rx5bN68WdRLer2eNm3aMHPmTJFX27dvJzg4GL1eL8pMYGAgDz30EA8++CBZWVn07NnzvD4mJSXx0ksvibK8bNkyvv76azZt2uRTH3qX5ejo6PNqepfl3Nxcxo4de8589S7LANu2bePFF188q2+vpGHDhg0pKio653OhoJTlwYMHs3jxYrZs2UJ2djZut9unLAOEhIRw66238vjjj1NZWcnkyZNZunSpT1q3bNmSV155hQYNGoh69fTp0xgMBtF3MJvNREdHU1hYSFBQEBaLhezs7LPandq1azN+/Hi6d+/OypUrefPNNzl06NBZdlmtVqZPn067du148803+fbbb88qJ3FxcTRu3Jj9+/dz+vRpzGYzFRUVPu0pVNexERERoh9hMBjE+975UJ6j82E0Gn36d8r1Y8aM4YEHHhDv91u2bKGqqkr0PS0WC5GRkRw7doynn34aq9XKrFmzOH78OIBPOgQHB9OgQQPy8vJEfeXxeER78Z///IfHH3+cgoIChgwZ4lMmlGc8MTFRjDksWrSISZMmiXRQ7DYajVgsFmw2G263W/TtoDr/AwIC0Ov1NGzYkAceeIBevXqxf/9+HnzwQXJycs6bRjt37iQ9PZ2HH374LNsGDBjAhAkTAPj4449ZvXo1hw4dorKbDUD5AACB/0lEQVSyUtzbu66dMWPGeccvoqKi+OSTT0hISDjvdUajkeHDhzN69GgxPlheXs4rr7zCd9995zPuoNfr0ev1PuMcSUlJ57y3TqdDp9P5tG/ffvst8+bN4+DBg6Is6nQ6AgMD6devH3Fxcfz++++i3AcHB2Oz2cSzFBcXx6hRo3juuefOm7bKvc8c91i8eDHvvfeeyBdlDOKxxx5j3rx5fPLJJ+Tl5QGI8m00GkU7n5mZKfoe5+sztmzZkgcffJDp06dz6NAhPB6PyDOj0YjH4yEiIoL27dtTs2ZNFi9eTGFhoU/ZNpvNdOjQgaeeeoqkpCS+/fZbPvzwQzIyMs55z6SkJKKiotiyZQuAeA/wHoNISEgAwOFwMH36dDZs2MDevXvFM6Pk0/PPP0+rVq1E32HHjh2iPtDpdAQFBdGvXz8xvtajRw8xvujNmXW3YtfHH3/MzJkzRVsZHR3NggULhH3Dhw9n06ZNZ+nddNNNjBs3jtDQ0HOmwdWGznOhGlQiuYooLi6mf//+xMfHM3LkSHJzc5kyZQoDBw4UDdn5+PDDD3nrrbcIDAykdu3aBAcHs2fPHkwmEzfddBMTJkzA6XQyePBgAJ544gkqKir473//S3JyMtdccw1vv/02Tz75JACvvvoqBoMBi8WCxWIhJyeHW2+9la+++orQ0FCmTJnC/Pnz2bBhA40bN+app57igw8+4Pjx40yePJn7778fnU7Hf/7zH5YuXUplZSVGo5GAgAAqKiro1q0bJSUlbNmyhaCgILp168bPP/8sKtCmTZvyzTffMHPmTMaMGUNRURE6nY727duzdetWn47R008/zY4dO9iwYYOoEE0mEwaDgcrKShITE8nNzaVOnTqYzWYOHDhAeXm5mAxyOp14PB7eeustDhw4wLvvvntW+kZERFBYWEhkZCTLli3jzjvvxOl0cuTIETHBYLVaKSoqwmg00rVrV4qLi9myZQuNGjUiIyNDNHahoaGUlpbi8Xi46aab+OGHH3A6nbRt25a9e/diNBopKyujqqqKm266if/85z+sXr2aOXPmEBkZSWFhoWhYTSaT6EhbrVaio6PJyMggIiICi8VCfHw869atE9/37NmTxYsX43K5CAoKory8HIPBgNVqxeVyYbPZROerpKSEkpISmjRpwp49e2jcuDH33Xcfs2fPJi8vj5EjR/Lf//6X5s2bs2PHDjHQ+uCDD7Jnzx7WrFlDREQE8fHx7NmzB4AGDRqQnp6OxWLh9ddfZ9WqVSKfn3nmGeLj44mKiuKnn34SA9I6nY6dO3ficDhE+WncuDEHDhxAr6/eZPvoo4/yxRdfkJyczMyZM0W+ffTRR7z22msAtGrVirp167JkyRICAgIYNGgQX375JSkpKZjNZnbs2IFOpyM5OZnKykoyMzPp3Lkzs2fPFs/n9ddfT1FREQ0aNODo0aPodDpcLhctWrTg8OHDLF68mJ07dzJ69GgxsGaz2cRgZ0BAAHa7nWHDhnH48GE2bdpEzZo1ycnJEb4FBwdTVlZGixYt2LlzJ7Vq1SInJ0d8rvybnJxMQUEBoaGhZGdnU15eTrNmzdi9e7foYCsTTB07dmTFihUYjUYcDodP3ns8HurXr8/x48e57rrrWLduHZWVlXTt2pXNmzeLwfi2bduyZcsWoqKicDqdlJeXiwE5l8tFSEgIbreb8vJyLBYLAwcO5LPPPhMDAddccw1r164lMDCQ1NRUfv31VzHhFxgYKAbkxo4dy1tvvUVFRQUtW7Zk586dADRr1oxDhw5RXl5OUlISjz32GNOnT8fj8ZCfny/yJTMzE6jufCYkJJCVlQVAmzZt2LBhg8gXpa4wm80YjUYqKyu59tpr2bRpE5WVlWLip3Xr1uzfv5/ExET27NlD//79+fnnn+nXrx+LFy/2eVHT6/U0btxY5EtBQQGRkZE0a9aM77//HrPZLMqxTqejQYMG7N27lwYNGoiJxKKiIpGmDRo0EPmSlpaGy+Vi4MCBLF68WHQ6XS4XJpOJUaNGMWfOHFG3NG7cmPT0dAIDA0lJSRETg+Hh4eTk5Ii/lQHhnJwcevbsyYoVK0hISBADdMrL3Jtvvsmnn37K8ePHSU1NZenSpVRVVYlJNu+Jj9tvv53HHnuMfv36YbfbiYyMpHv37nz99dfo9Xrat28v2hwlHZX0U/QAnzYnLi6OQ4cO4XQ6qV27NgAFBQVYrVbR5kD1S215ebl4IR49ejQffvghAwcOxOFw8PXXXwN/dsTtdjt6vZ6oqChKS0uZO3cuI0eOpKioiM6dO/PHH3+ICQblmWzTpg3t27fngw8+AGDo0KHk5uaSlpZGREQEkZGRFBcXU1hYiMFgEM+JwWAgNjaW48ePi3YgMDCQiooKoqKiOHXqFAEBASQnJ5OcnMw333zj80zWqlWLEydOiPJRr1498WKvtNd2u12UR6vVKl4MU1JSiI2NpVatWnz88cdYrVbxW71eT7169cjMzCQgIIAffviBnTt3MmbMGJKTk9m3b5+oU5U2SK/XM3jwYH7++WdSUlJEu9ypUyeaNGnCggULcDgcDBgwgGXLlolBAO8Jc6UuU9LMYDAwcuRInn/+eSoqKsSzpSzIaNq0Kdu2bcNoNPLII4+wbt06Nm/eTLt27cjMzKSgoACXy0WzZs1Em/XUU08xevRoSktLue+++5g1a5ZoO3U6HZWVlWKSPDMzk+DgYBo2bCgGpAFq1apFRUUFnTp14scff8RoNDJ27FimTp1KVVUVvXv3Zvny5bjdbuLi4igoKKCyspLrrrsOs9nMzz//TL169aiqqiInJwePx0PXrl1Zs2YNOp1O1LXl5eUivWJjY8nPzxcTXDVr1uTkyZMA9O3bl82bN4vJJmViWRkgtVgsPPzwwwC8/vrrhIaGMm3aNNGn7Nixo6h/b731VtLT09m2bRsGg4EpU6awZcsWvvnmG7p168aqVauIiorCbrdTUVGB2+2mZs2aFBcXYzKZRP3gcrlEu6zw8MMPs2bNGnbt2kXr1q3Zvn27sDc2NpasrCyCg4P573//y7PPPktpaSm1a9fm9OnTVFVVERMTIwYawsLC6NatG0uWLBEDws2aNWP79u3079+fpUuXYjAYmDRpEgsWLGDv3r0kJyezf/9+wsLCcLvdBAQEcOrUKYKCgqhRowYul4uCggLRZiUlJbFt2zaxuGLatGlMnjyZY8eOcc0117B582YAevXqRX5+Pn/88cd50ywmJoahQ4cye/Zs0d/Mz8/H4XCI/mR5eTlxcXG0a9eOH374gaqqKp544gk6duzI66+/zt69e7Hb7RgMBp8JJGUAyWQy0a1bN4qLi9m4cSMmk0m0w8XFxQQFBZ23rj0Tpa41mUzUqVMHj8fDkSNHiImJEXVkcHCwqGuVut77FTwgIAC3280tt9yCw+Hgm2++EemwbNkyMjMzRd+8vLycxx57jFmzZgl9ZQLJ4/H41P1DhgwR/bC6devSoEED0tLSiIyMZOrUqeJ949prr2Xt2rU4nU6MRiOdOnVizZo1dOvWjdjYWGFPSEgIx44dExNOFouFJk2a8P/+3//jmWeeEfnSsmVLfvzxR1GWGzVqxPPPP094eLhYmKVMzttsNkwmE2azmcWLF1O3bl1GjBgh8sV7MDsgIIDQ0FDKy8tZvHgxzz//PBs3bhTtsbKgTGmvmzZtyr59+ygoKPCZbNXpdFitVqqqqigrK2P69OlMmTKF9u3b89tvv6HT6UT/U9FWJpwCAgK45ppryM3N5ejRo1RWVqLX67FardjtdjHw2LZtWw4cOEBoaKjP4JlSdyt2TJ8+nfj4eBYsWMDx48fFQh2lHdbr9RiNRoxGIy+++CJPPfUUAQEBVFVViUHMkJAQsaCnffv2TJ06ldTUVPR6vUgPg8FAZGSk6L9Mnz6dPn36AHDvvfeydetWPB4PDRs2ZPfu3aL/WbduXfbv30/Tpk3p1KkTX331lZiItNlsGAwGOnbsyOHDhwkMDCQrK0uUn6ioKJ8JvejoaE6dOkVoaCglJSUEBgaKPqSy8Ke4uJjg4GAAUUd6PB5iY2PJzc0V7249evRg8+bNwhaA0tJSTCYTLpdL5Pf06dOxWCw888wzREZGkp6eTlBQEC6XS/QpkpKSKCgoEGMIr732Gunp6eh0OiIjIzGbzeTk5DBlyhSSk5O56aabiIuLIy8vT7R1kZGRYiI6KChIlOWHHnqI1atXi7pe6bsbDAZRlj/44AOGDx8u0ikwMBBATEYrZXn//v1iolKn0/lMBgQFBYmynJKSwqBBg8T7aklJibjWuyy73W4iIyMJDg7m5MmToj8aFhbmU5Zr1KhBjRo1SEhI4KuvvjpnPaj4PXjwYI4ePcrWrVvFwi6LxSIWg0VERHDLLbcwc+ZM0fc+cyGcUift27eP3Nxcn4kPk8kkFm/o9XomTJjASy+9RFJSEnv37hUaShkzm83UqlWLDh06sGzZMiwWC06nk4KCAsxmsyhrRUVFXHfddWzfvh2Xy0VpaSnh4eEUFRWJd3Sl7MXFxZGdnY3RaMTtdpOUlER6ejp169aloKBAvEso9ir1alVVlah3IiMjxaIDZRGc0t+tqqqiRYsWfPTRR/Tv35+oqCjS09MxGo24XC7q1q3L0aNHha833ngjS5cuJSUlhSNHjuBwOHzeLR0OB1VVVQQEBBAUFERpaSkul4saNWqId5UBAwawfPlyTp06RXx8PFarVbxDAnTo0IGtW7fy3HPP8dJLL2EwGMT7mN1uF31oqH43rFGjBllZWaLfoix2uvfeeykpKeHLL79kwoQJTJ06VdRR3pPSSl/ltddeo0WLFvTs2VM818p7OCDaqzFjxnDPPffQuXNnvv/+e7GwUSn7Sl3bqlUrTp48yaxZs1i1ahV6vZ7mzZuzdetWDAYDUVFRLFu2jA8++ICPPvpIvOt7axkMBm655RYxvjhmzBh++OEHjEYjkZGR6HQ6Tp48idVqpW/fvhQUFLB+/XoWL15Mr169GD58OOvXr+f48eN4PB4xjnX48GHR3t1yyy18+eWXDBgwgC+//FIs1OncuTPLly8Xz+gdd9xB586d+fTTT1m3bp0YV6moqODEiRPiuRs+fDjXXXcdTz75pBizUvpTyvPVtm1b0tPTadGiBevWrRP1f2xsrBiDaNy4MXv27KF9+/asW7dOvJvDnwvsKysriY6ORq/Xi0ktpZ4yGAw0b96csWPHkpmZyXPPPce1117Lb7/9JsqA8qyYTCaefvppvvjiC3Jzc2ncuLFYxGk0GomKiiIvL4/Q0FCcTidDhw7l66+/xu12i7ozOjqaoqIiqqqqeOihh/jiiy9ITEzE7Xazd+9eUY6GDh3KkSNHOH78OD/88AOhoaHYbDZ69OhBRUWFaEt79erFunXriI6OpqCggMWLFzN8+HDq1avHxo0bCQ0Nxe12i/ozMjKS5s2bM3PmTKHldDopKSmhRYsWYgFAQUGBT5s8btw4lixZIvJOr9fz7bffotfrhX3Dhw8nNzeXFi1a0KhRI0JCQjh+/DjffvstTZs2Zc6cOeetq68mZEhAyb+Gzz//HLvdzjvvvEPXrl0ZMmQITz31FJ9//jm5ubnn/V1lZSUzZ86kTZs2uN1u5s+fz/z584mJiaFJkybi9z///DMHDx7krbfeokePHvTr14/JkyezatUq3nvvPe655x7uuusu1qxZQ8uWLUXn7LvvvmPAgAH89ttvuN1uSkpKqFGjBps2bWL48OGkp6cTFhbGu+++S0lJCS+99BI6nY7777+f+vXrYzAY6NWrl1h9/NRTT7F69Wq2bt0q7N+zZw/XX389ZrOZ3bt3M2TIEJo3b87LL79McXExer2eBx54gPnz54tVFspkBcAzzzyDzWYTq38BMUB86NAhbrvtNg4cOECHDh2orKykRYsW/PDDD7zyyiuiczFt2jRmzZoldHU6HRMmTBCdveTkZIqKinj55Zc5ePAgL7zwAlDdME2ZMoWCggIGDRrEuHHjWLVqFR6PB7PZLF7iBwwYQOvWrSkpKSEgIIDmzZuTk5MjXj63bdvGjz/+yPLly8VnY8eOpWPHjoSHh6PX6yksLCQ0NBS9Xs/DDz+M0+nEarXSoEEDbDabaFwKCwuZNWsWs2fPpmbNmkD1hMuPP/7INddcIzpdTZo0oWbNmrRv3140/Dt37uTEiRN8/vnnpKSksHv3bpHP8fHxzJs3j4qKChYuXEjLli1JSEgQK46uv/56li9fLgaclV1d48aNY9y4caSnp9OtWzc8Hg+HDx/mpZdeEvlst9uZNm2a2LU2ZMgQtm/fzpQpU0Q6R0ZGMmDAAEaOHAlUr95o1KgR27dvF2VZ6Zw6HA7efvttUSauv/56Xn/9dfr164fL5RIvX1OnTmXnzp3odDoeeeQRDhw4wNtvv43JZGLt2rVC7/PPP8dms9GsWTPy8/O5//77eeGFF9DpdBQVFREeHs7s2bN544030Ol0DBo0iOLiYgICAkhJScHpdHL99dfjcrlYuHChmDypW7curVq1YtiwYUD1Kqfu3buze/duunbtSk5ODl26dKGiooKwsDAcDgfR0dFERERQVFTE4cOHARg5ciTXX3+9KG+hoaFkZWVx++23s3z5cuLi4kSHLDo6mri4OGrUqIHb7ebo0aMkJCTw66+/ioGahx56iLKyMgICAmjUqBFbt27llltu4dSpU2JCVRlcUSZ3lMnfsrIy0tLSSE5Oxmw2Y7fbefTRR2nZsiW1a9dm+fLloo7YtWsXUVFRhIWFAbB06VL0ej1JSUns2LEDgEceeYQ9e/ZQXl6OyWQiPT2dmJgY3nrrLQ4ePEhxcbHIlwceeIAXX3wRnU4nBj0bNmzIhg0bfPJFr9eLARKlTKxatYrbbrsNk8kknpv33nuPZ599ln379nHdddexbNky7rnnHnbs2EHr1q1FOgA0adLEJ1/ef/99li1bRmJiohiMMxqNVFRUcOedd7J3714xUK4MnCu7HQCOHTsm8kWn03HfffcxZcoU1q9fLwZJAG677TaxSt3tdhMTEyMGzxwOB9u3bycrK4sPP/yQL7/8kuTkZBITE4HqlfZKHb9+/XrxEjl//nwqKiqoU6cOAB07duTdd9+luLiYRYsWicEsk8nEL7/8QmBgICaTicDAQD7//HM++ugjbDYbFRUVfPTRR4wfP57nnnuOiooKnzZnx44dxMTE0LJlS0JCQvB4PDRv3pyYmBifNmffvn24XC7uvPNOTpw4wVNPPUV5eTmFhYU+bc7IkSMJDAwUaZidnc1TTz3FwoUL+frrr8Xk8W+//cbKlSvF86LsTnv11Vex2Wy0aNGCjh07inoYYNeuXXTt2pUdO3aIgd/w8HCMRiOzZs2iefPmIu/Lysq47rrrcDgcYvApLCxM2FVSUkLt2rWpqKjgkUce4dSpU8TGxhIZGcmOHTsYMmQISUlJ4pls3rw5xcXFjBgxQkxqKZNAShtWWlrKTTfdJNovZVdH165dWbVqFUOGDEGv1+N0OqlZsyZ6vZ7Y2FgxMdqjRw9cLhezZ8/m7bffpkePHmJlc2BgIM2bNycpKUnsyPvmm29YsmQJqampos164403ePrppxk3bhwAP/zwA23btsVoNIpdN+Hh4dSqVYuysjKSkpJEmr311lsMGDCAhx56SAxmAtxxxx08//zzbNu2jSZNmohBv/z8fOLi4jh69CinTp3CZDIRERHh02YpA5JKOa1ZsyYBAQE4HA48Hg/XX389YWFhZGZmotPpKC8vZ8+ePUyYMIEBAwYA8Morr1BRUcGvv/7KXXfdhcViERMkNWrUYOXKlQQHB9OlSxdycnKoqKggJiaGmjVrkp6eTocOHTh27BjZ2dmi7d+9ezeJiYl4PB7WrFlDeXk5PXr0YMWKFbRs2VIs2rjvvvvo27ev6AM2adKEZ555hoKCArEbRGlvlQH4srIyBgwYIF6Wi4uLCQsLE31K7/p34sSJFBQUUKtWLXQ6Hdu2bRMDZytXrsTj8dC/f38xOGgymTh58iTDhw+npKRErDxt0aIFS5YsYezYsaIPtXTpUo4cOUKdOnXYtWsXOp2O8ePHExMTQ25uLjExMVRUVPDTTz9RWlpKdHS0GCCYNm2az2plm83GyZMnRZpVVVUxffp0+vfvL9IsNDSUTZs2kZ6eTtOmTcWu/LFjx+J0Ojl16pTYxf/ll1/yzTff+KRZdnY28fHxPmkWExNDYmIimzZtQqfT8cADD/DOO+9QVFR0wTSbO3cuXbp0EW3WoUOHqKioYN68eWRnZ1NRUUHt2rXJzc1lzJgxTJgwAZ1Ox6JFi8RgioLRaKRdu3aibfB4PAQHB9OsWTOWL1/Ohg0bxET/p59+ysKFC/F4PJSXl59V14aFhYlV4lC9w9G7rnU6nUydOpWjR48yfPhw8vLyxOBccXGxqGvbtWsnnunw8HB0Oh0tWrTA5XLx2Wefibp27ty5PProoyxcuBCo7u9XVlaKPq2y0EZpi8eOHYtOp2PFihWi7l+2bBk6nY4777yT48eP8+ijj4q61rvub926NSaTiV69egGQlZVFUFAQa9eu9bFn6dKlrF27Vqwsr6ys5J133hETN0q+bN++nS5duqDT6fjhhx/EBL+C2Wyma9eu2O12kpOTxeT67Nmz2b59u8gXZUfXrbfeClT3TU+dOkVwcDCvvPIKGzduJCIigvr161NZWcl3331H69atiYmJISgoiHXr1lFcXCz6WzVq1BA2KM9gUlISb7/9NosWLSIxMRGHw0FJSYmYmA8ODhYDycHBwSKqwoABA0T74Xa7WbhwIePHj6eqqorKykrWrVuHzWbD6XSKXaB169alsrKSZs2aERMTg8Vi4e233yY5OZlHH31U7MQymUwicoWyuMZgMPD+++8TFxfHQw89JHab16hRg9LSUu644w7RR5g+fbqYHImJiaF169a88MILFBQU0LRpU6Kjo3n77bcB2L59O2vXrhXvmAcOHBCr251OJ3379iU8PJy9e/eKNvD//b//R0lJCcOGDUOn07FhwwYee+wxMjMzufHGGzl+/DgDBgwgKSlJ9LV0Oh2NGjUS73JQPSHz+OOPizqyc+fOuN1u/u///o+ysjJq1qwp0njQoEEYDAbi4uKwWCysXbuW++67j0OHDtGlSxdKS0u59dZbad68OVA9iO92u1m6dCl79uzxWZCyaNEixo0bR3Z2Nq1btyY9PR2bzcY777xDo0aNxOQAVC9e+e677zAYDEydOpUlS5YA0LBhQzEJoCy0fOihh8TguDJR3LJlS5/J0g4dOoiFsEpZViaBQkNDRVn+9ttvzyrL3jvNGjRoQN26dcXElcPhEGV59uzZmEwm9Hq9z45a70VAykRNZGQkQ4cOFe97Ho/nrLJ8/PhxnnnmGTZv3izKstIfa9iwoZjMDgkJYeHChSJii8fjYfz48SxcuFC8QxUUFPDhhx9yzz33oNfrxSSHshtAmZBavXq1GOx2u90EBQXRoUMHnE4nwcHBREdHY7FYmD59uliUp9ChQwdiYmIIDw/H6XRy9OhRvv76a/r06UNeXh5BQUF0794do9FI9+7dKSoqIiEhgQ0bNuBwOMRizHr16mE2m3E6nTRr1kz0/7Kzs+nSpYuY9HvppZdo2bIlcXFxYvJG4cYbb8RgMIj3NJvNRvfu3cnKyqJHjx6UlZXx5ZdfsmrVKoKCgkQ/dM+ePXz22WfY7XbCwsKoWbOmGMBX6mcliklaWho9evQgPT2d1NRUsZigefPmeDweUWYqKyvp1KmTeE9r27Ytubm59OnTh4ULF3Lq1CkGDRrEzz//jM1mo127dkD1zqPCwkKaN2/OtGnTAMQ7hd1uJykpSeSjMvmWlZVFRESEuParr77CYDDw7bff8vLLL5OamsqMGTOoqKjAZDKJRSNt2rQR0QGMRiO///47Y8eOFc+P2+3mq6++4oUXXhALFFatWkVRURHLly/n9OnThISE0Lp1a3777TesVivx8fGirq1ZsyY1a9Zk5cqVuN1uxo0bR7169QgNDcVkMlFYWMjnn3/O7t27Rb0VEBDATz/9JCaonE6nGB90u9389NNPoq5dvHgxpaWl1KhRA5vNxqhRo3jjjTfEOAdUL+R66KGHxA5qqN4t8/rrr3Pq1CnMZjNz587lzTff5MSJE9SrV4/y8nK+/vpr3nnnHQYMGEBQUBBVVVUMGDCA5ORk1q1bR3x8PAEBAURFRbFgwQI8Ho/Y0VWrVi0xFqBM5A4bNgyz2SzGIrZu3crIkSNZt26deM+dP38+s2bNomXLltSqVYs//viDIUOGsGPHDh544AFq1aoldpsr72t16tTh1KlT3H333aJMKHlXv359tm3bhl6vZ8mSJWJyTafT0b9/f3Q6HbVq1aJfv354PB52797NkSNHeOCBB8Rk1YIFCzCbzQwZMgSz2SzGMBcsWMANN9yAw+EgOTkZgPHjx4s+4sKFC6moqOCdd97h9ddfx+FwMHDgQKB6bEcZi/38889F3X/fffeJPAJ4+eWXefbZZzlx4gShoaHMnj2bRYsWkZubS+vWrSktLeXrr78mNTWVgIAAatWqJcbXKisrOX36tBi7+vLLL5k/fz4FBQW0bt1atMm///672NH+/PPP89RTTzFmzBiWLl3qY5/SDrz++uuMHDmSO+64g2eeeYYxY8awbt26C45/X03ICSvJv4a0tDQ6deokOlBQvZLW7XaLXTLnYtu2bZSWllJaWip+bzab6d27N1lZWeL3aWlpJCUliW2YAKmpqVgsFsrLy+nbty8Oh4ONGzfSr18/0dCGh4fTr18/8vLyiI+Px+Px8Nlnn+F2uxk1ahTh4eGsXr2a8PBwOnXqxLFjx3C73fTt21f4NHjwYLGi4+abbxarRRMSEvB4PGRlZTFo0CC6du2KwWBg9erV9OvXj8zMTLG9tm/fvgA+nT+F9evXi1V1APXr16d+/fpCT9H4+eefxQAMVA/+KBw7dgyHwyHC1AAMGDCAvn37Cm2Px8OGDRtISkqibt26QPXW3OjoaKD6BbZv3764XC7++OMPWrZsyZEjR/B4PPTt21c0iG3atGHgwIEijJuymmH9+vWiA35m2VDS3ul0EhkZSVRUFAApKSli1Xpubi7h4eEEBweLfFZe2pR8NplMNG7cGKhuJHr37i1CIMbExJCXl0dCQgIJCQnEx8cDcP/995+Vz0ePHqVXr14sX76c0NBQOnfuzE033cThw4fJyMgQL6Jut5s+ffrQokULAOrVq0dqaippaWkA9OvXj6NHj9KhQwcyMjJEh1EJlbJp0yaxLT87O1uUK8W/wMBAfv/9d9q1aydshOrQGd5hD73ztKqqivz8fAwGAzt37hT5e+edd+J2u8XAtFIWATEJ2bx5c0pLS+nbt68oG8eOHaNjx46sXLlS7OhRwv6kpqYyePBgsWLZarWKl8HOnTuzbds2+vTpw+7du8WAvbKi+tprrwX+HIhs0qQJVVVV9O/fny1bthAZGUlAQIB4ftPS0khNTSU8PJy4uDgAkU7Kah69Xs/x48e58cYbycnJEavXlEmXo0eP0qdPH9LS0rBarXTr1g273Y7b7aZHjx4EBwcTHByMx+MRk6dKGQsODhbhs7KysnA6ncIe5ZlWVukqz3RWVhaZmZki9MiBAwfo1KmTeJYTExNFvgQGBnLttdeKfElISBD3u1C+eIcPU/LFarWKgQTvfFm+fDkdOnTwWfmn1MNKvii7UZTJwpYtWwLQu3dvkS/BwcG0bt1aPL+pqalih413vjRr1gyPxyNWunbu3Fms5vXOl8rKSlEHRkRE0KVLFxGyrGfPnmLFeXJysnimFHuVFXcJCQlUVVWxceNGhg4dClR3ZsPDw+nduzfl5eVcf/31YuDK7XaTmpoKQE5ODuHh4cLuyspK0ebUr19f2KOEmvvll1+IjIz0aXOUdFTCSSrhBwsKCnzanIEDB1JQUCDaHLfbLcqY0uYcOnSIJk2aiHAibrebfv36iTYnJiYGj8fDypUrxX09Ho/Y4VezZk3Cw8Pp2rWrWJlntVpFfd2/f3+hpYQ48Hg8oi3LyclBp9OJ5w5g4MCBYqFCWVkZzZo1AxCLDuLi4khPTxf5pryIKWF0UlJSyMnJITQ0lNWrV4v69+jRozRp0oTS0lL+85//iAmijIwMMegaExMjVsQ1atRI6CckJHD06FHxDKalpdGhQwe2bduG2+2mbt26YgdK7dq1qaqqYvny5WRmZornQ0kzpc1SVr0rbZZSN3qjlFWPx0NBQQGtWrUSuxL69u0rQk4oExBBQUGinKxZs4bU1FThW8+ePUUaKW3WypUrxcBiXl6emPxISUkB/myzvvvuO6xWK126dOHo0aMYjUZSU1MJDg4mKiqKm266SYRnUeo1pc3q2rUrAPv376dJkyY4HA4GDRoknv/OnTvTsmVLnE4npaWlPPvss3g8HvGiqtg4fPhwTCaTuHe/fv0oLCxk8ODBWK1WkXbXX3895eXldOrUiYCAANFfaNy4sbgmODiYtWvXivpXKUtQPWin1IdKn8+7/lXSUqlb+vbty/Hjx8nMzGTgwIFUVVWxcuVKAFH2EhMT2b17N3FxcVitVlH/KgNIygCh0qcaMmSIsPXYsWOUlpaSkpIiBlkHDBhA8+bNcTqd4nnZsGEDycnJDBgwgH379tGpUyduuOEGsTNFKUfbtm1j6NChoq5et26dqLdSU1OpV68eBw4cwO1206VLF2GHxWIRZSs0NFT0a8PDw4mKihJppkyQnZlmSl/rYtIsISFBtKFKO64sBlHud8sttwg/lDbr6NGjfPXVV5SVlYm+kFLXKuGjoXoXgNK/1+l0REdHi7o2ISGBpKQkURd517XFxcX06tVLTFgpK6cv1L9XdrM0btxY1LXKgGbnzp1FXav015X/GjZsKNJd2emjTFyEhoaKeuPGG28Udf/NN9+Mx+MRK+S988X7fWPgwIG43e7zvm9UVVWRmZnJDTfcIOp+xR4lbJDyfCttoGKPki8nT54Uz/TRo0dZvny5qMeUfBk5ciRut1v0V4KCgkhLSyMtLQ2DwUB0dDR169alrKyM2267TeRLUFAQcXFx4t3OZrMRGhoq8rBfv34UFBSIsOxGo5EGDRrgdrtFfijpnJiYiMvlEgsl0tLSxLXKxLyyGywxMVG8Y6xZs0b0GwCx+0RpL5UJ8ZiYGI4fP87p06ex2+0MGTIEt9tNfHy82Ilx+PBhsrKyWLt2rUjvyspKMenfuHFjQkNDCQoKEv3ONWvWEBkZSVhYmCiP7du3FwP7q1atIiQkhIqKCgoKCujTp4+wTbm3ct+0tDQxoVezZk0cDgdt2rShSZMmREVF+Txjv/zyC506dWLr1q0kJSWJcuZ2u8WEgcPhIDMzk169erF582YxaN2wYUO2bNniM/gHiBDSVquV7du3Exoayu7du8U7m8Ivv/xC06ZNOXr0KC1btsThcIj+nrI79bbbbhO7HI4fP05AQAC///67aLszMjLOyqumTZuKPAwPD2fRokUA4t2jrKyM8PBwmjVrRkFBAWvXrhV2n1mWlfpOr9eLvo2SV97vvcq9lbK8fv16oLqPfaGyrGgrz5V3mxEcHCzK8sqVKwkMDPQpy0p4rjPLcnZ29v8sywEBAXz//fdkZmaKsqz0r1u3bi3Kcn5+vk+f0+Px0KdPHxISEmjSpAnR0dEEBgbicDjo27cvq1evxuOpjmqgvOe2bdtWPKPeIa3Ky8sZPnw44eHhxMTEiInlwsJCkpOTxYRYSEgII0aMICMjg65du+LxeEQ/TenjZWdnc91115Gamip2ZcXFxVFRUUFycrIYg9i/fz+dOnUS5QAQYxZKeD6r1Sre0TZv3uxzrcVi4dixYzRt2lSUVaPRKCbwld19CQkJREZGkpqaKt6fXC4Xa9asEX1OZSfRzTffLMJqKn3B4uJiatasKeq4sLAw4uLi2LJlC8nJyT4h2bKysujcuTM33HADf/zxBykpKT4hYe+++27RTiuLvEpKSsjLy+P666/HbreLcp6ZmYnVauX//u//AMTuPGXR4rBhw0Q52LVrl3iGsrKySElJETsSld1lQUFBop1WyntaWhrbtm0jJCQEq9Uq0uvM8rlx40YMBgMbNmzAbrfTp08fLBYL9erVIzw8/Lx1bY8ePVi+fDl9+/ala9euhISEkJaWJt59Q0JC6Ny5M+Hh4aIeBET/w+PxiMUBt9xyC/v376e0tFS8lyu7kHr37i3qBKVeqFu3rtjhA3/2n5TJ5k6dOoldO97vg/369ePkyZPifWft2rW4XC6OHj0qQj+Gh4f7LIgrLi4W7ZvH46Fjx45s3ryZTp06AX++Tyi73Ww2m8+4ar9+/Thy5Ih4FktLS0VYfu++ldFoFHX+3r17adSokUgzZTIzPDyclStXsnHjRnr16sXGjRtxu90UFRWRlJTE8ePH6dSpE1VVVWJBkNL3a9SoEe3btyc1NZXNmzeL8Sbl/spxCD169BDpquRbUVERrVq1Ijw8XIRLfuyxx0RZVtLszHxSogedqZeYmEhaWhp2u13sglfyKSUlBZ1OJyIVrF69WuzSPHDggNhJpVyrHAWQlZXFl19+KdpW5TrgnPadCyXPzheS+2pDTlhJ/jVkZGT4TCZB9YtUTEzMeeOYKr+D6gFF798r5yIpYeLOpa/T6USlnJCQIELkJSQkUFpaSkVFBRUVFaIij4mJISYmhoMHDxIVFUV4eDgNGjQQNigdAEVPuafye2UXgbLCLzk5WazYadCgAQ0bNkSn05GRkSF+462XnZ3N/PnzsVqt4nOn0ynOW1EG6hQ7FL0TJ04QExMjzq5S0kHZ5eU9Sei9qqhTp078+uuv4j5KY+GdjgcOHBArMNatWyfCE7hcLpKTk8Wgw7PPPivitBYXF4uXzIiICBo2bEhAQAAZGRlnnRei5LFiozIAuG7dOrG6WglFqOSpd8hEpTOlrGIyGo2iYXS5XCQmJopJQO+wEMr3gDivwzufPR4PJSUllJaWijNXlDwrKioSE4oej4fbbruNb775RpSBxMREoaV0MJRyqjB16lQ8Hg8vvviiOEcHEHYkJyeLlXdOp5OsrCzxXXZ2Nh999BHnQrFRefF56aWXgOrn56effhLPm3dZhOrzq7wH6BISErBarSKtgoODRTqGh4eTlZWFy+US6eLxeES5VlbqKOH1lGclNjYWo9EowmV4/+sdlqRTp044nU4RIkGxR9Fv0KCBWP2plGclvIOSjh9++CFQ3TlSOvNKGVC0GjRoQGJiorDD5XJRVVWFw+EgKipK7LRStq0XFxdTUVEhXgJPnTol7FFs837pUO6j5CtUP2f16tUT1ykDpUoYnTPzRfH/fPmiTOafmS9BQUEilIESvkbZPRAVFeVjp1IPK+ngHb85KipKTA7GxcWdlS+A8F2n04m0UupdJZa0on3nnXf6aHufXXJm/a683NasWVOEHVXKW0lJiVjx5XK5KCoq4ssvvxR1/Jn1q7L7JSQkxKeOV3ZYKaEWlUF08G1zEhMTRXif6OhoTp48KezxTscaNWqIwSkljZXrlDanbt26PufcKIP3Z7Y5CQkJPrHWvdscxZ+8vDyfNsftdvvY1LBhQ7Hj4OTJkyL8n7eW1WoVAyMBAQGizQoPD6dJkyZix4T3rjWoHrzX6XQiJK3yQquU1dLSUhISEkS9oTw3NWrU8Akj633GnTIgrUyyKPW7MvielZVFo0aNiImJITg4GJPJxJEjR0SaKSFjFe0dO3YwYsQIYmJixMuMMiCqhOVT0iwxMVGstFfSLCMjQ0zceuOdZkpYEagO7aacQeR9Pt65nhelzapfv77w88w2SxkYUnZGKPdR2iylXVDaLKX9rqqqoqqqyuc5sNvtYld0Xl4e8+fPx2w2U1BQIMpTdHQ0Bw8epKKigoSEBPE8WK1W8Yzk5OSwZMkSsTJQuU5JR8Wv0NBQEhMTRYib77//HqiuA6dNmyZ2hHrXRdu3b2fFihVERkZSWlqK0+kUC1Dy8vIoLy8XO3W82wOlvvQ+d8i7/lXSUTlDy7v+9a6jvftU3iHhlGdKqSuVsgxw8OBBoLqMhoWFiTRr1aqVT54odVq9evVEKB2lvQoLCxN5r6RZRkaGyBe3283BgwdFiBXvemrixIliQK+wsFD0a5Vy53K5xI7VoqKis9Js+/btQuvll1/mp59++p9pBog2VJnMUJ5hpaw3adJE9De826wlS5aIPqziX05ODkajkX379uHxeMjNzRVnogFn1bWNGjUSYbiV7xVuvvlm4b9yJhycv3+v7DJXQrEq9ijPgGLjqVOniI6Oxmg0ip0oCpMnTxaTyVDdjih9Ku+632w2i3SIiYnxqWu96/5zvR+c+b7h8XgYMGCAqPu97QFEvigo9jRp0oTg4GACAgLIzMwU/bXdu3eL9ljxuUWLFsTExIhBdOUctYMHD4q2V6mTEhISRL4oYbiVOtflcrFnzx4OHDjAvffei8ViEfmiTHqYTCbCw8PFMxEdHU1QUBAmk0nU2Uodr5yP510elJ3Xdrsdu91OdnY2hw8fFvdR0l1pp5W0V8qrshNDKbdnnuuh3Fs5swfgs88+o1atWrRv356Kigrsdjvl5eUcOnSIvXv3ilCmp0+fpkmTJnTt2pUaNWpgtVo5deqUz7vKa6+9xsMPP0x4eLjPvZX7hoaGivP+oHoxUEJCgljcoSxQzMnJIT4+nkOHDhEeHs68efO4/fbbiYmJETuKlUlSZUemEkI6ISEBp9N51plJ33zzjaiz8vPzqVGjhjgDUykzoaGhnDx5UqyaV9pHJby50qYpz3FRURGbN2+msrKSkpIS0tPTRTtWv359kVfKxAf8Obj3xRdfANWLqJTJFUA8g8rCOm8/Fi1aRHR0tAhBXl5eLvo2Sn9EKctRUVGinChl2Xuh1e7du8nIyGDcuHHExsb6lGWXyyXOg6yqqqJJkybUqFFD7BJVynJ+fr4Ij+hdlqG6n6qUZY/Hg91uZ8uWLSKsonK9d1kODg4WE3RKWVbaw9atW/uUZWW3TFBQkE8ECKU8Kc9tQkKCqJO7desmdj/Gx8dTo0YNEbrVOyJMQkICDRo0oLy83OccKmWsQFmAMHnyZKB6saSSdkajkaysLOrXry8WJCQmJor89D7D2+l0ikmwhIQEn3cYpT0sKCggOjpavJMr/Xjvd0WlblXKLVTXCUq7cPjwYfH+VlVVJRaOQPV7zpEjR8S7rtLWl5aWotPpxISIghIe+9ixY7Rq1YqMjAyf93+drvrswRMnToi6Pjs7m/r165OVlSXS2btvk5CQQI0aNXA4HMTHx4v+V61atejSpQvr16+nVq1aot32eDwiEg1ULzCuUaMGQUFBZGRkiGcoIyNDjCF59zdMJhM///wzgIh8k52dLZ7NsrIy0tPTRdhTpZ0LDg4WY0DKRGJCQgI2m42DBw/6pL93XRsVFcXWrVspLS1lwIABJCYmimfWO9T8ypUr6devnxjnUBaYZmRkYDAYxM6isrIyJk2aBFRPsBuNRrFbXUlvqB5DWLx48VnnGSnh7pWQjZMnT8bpdLJ7925OnjwpIqco7bTyvqOMc1VVVYmF08p1Sp3wySefsGXLFmFP7dq1ycjIoHHjxqLfqdfrRZ/TZrOd9d6slCWlrlCee+VdV+mLKCE+09PTadSokWgflYUypaWlfPvttzidTkJCQnzaI6VPrqSjzWYjLCxM9OOUvojy3Cn96rKyMmJjY9mzZw8ej0eEnR83bhwdOnQQfij9j4yMDJ/3dOX59h5jU647czG80nYoi8GVts97jHPr1q1iIaPyLqD0mZ1OJ2+88Qb3338/6enpJCQkiPo0IyNDHBUSEBBAz549adGiBffeey9Hjhw5y75NmzbRqlUrmjVrxu23386XX37Ju+++S48ePYRvVztywkryr8Fms/lMxCgo8bAv9Duz2UxJSYnP75WVuyEhIRQXF4sVfGeihIwICAgQ97FarWLFRHFxsdBVtoSXlJQILW/7lAZeOTxe8cnbruLiYnFdaGioeKFWrnO5XD739NZ79dVX6d27t1gdBPg0LIpN3pqKnhJOTfm8oKCAGTNm0LNnT9EQwZ8TOzExMbz33ntiRZnSQCqrQ5XwXX369OHmm29Gp9ORk5PDXXfd5TO4pfDSSy+Jv/fs2SNi3QYFBREWFobBYKC4uPiceW2z2USaORwOevbsyW+//SZWHcOfk27KpIKCsupIGRhPTEwUK0yUkIJKh/nMw+aVDpaSfmfm89atW4mNjaW8vNwnn51OJ926dRMdopSUFBYsWCDyy2q1+pQ15Z5KWb7tttuYNGkStWvXFg2/gnJegLJSWnlx8Lbx1VdfFYNJ3h1J7/tBdciFDh06iLj448ePx+12i/KnlB1ATHq4XC4xqOmd7vBn7GolbrCSvso9lU6Lcq13uA7FJ6XTAn8OLirnkigDw0qnXOnIK/Yoz1tQUJB4aVcGyZTVicrWeiUsgLKa0Hvljbc9VqtVfKeE/VOeAaW8PPjgg8TFxYkVicrnpaWlWK1WkS/eaa8MeCvp652O3mezKfmnxEg/M1+U67zzpaqqSnTclBeHM/Olfv363HPPPUJbecFTwvadSVhYmMgXpcPepEkTkd5n5ov3M6jki3JmSs+ePUU94z0RDr6Dgt5pr9SB3t8pHeTKykrhl5Jn3vmi7IIbP348n376qfg9/LlT1ftf7zpe8U2pY5Uyfmabo8S0h+o6tLKyUthzZjoqE7/eKyK92xzvl3uDwSD+PrPNCQsL8ykn3s+A984M7zZHeSbPTEflJd37/CfvNllJeyVMpmKPUn96l2/lmVQGLPLy8ti9e7cYLFFCDTocDkJDQ0VZ9k7b4uJin5Vl3uV7zZo1orx4LwxQyndoaChhYWFissm7blRCOiqMGzeOPn36EBYW5lMHKPZ5p5nin3IGgdJmne/Abu80UygqKhIhRWrWrOkTzk5BSXcl373riTPbrFq1aonfDBgw4Kw2q7y8XLQVii1WqxWn03lWWahduzYOh4OKigoGDBhAWFgYdevWpbi4WNSjSpjZM+2oqKjgueeew2AwUKtWLZ/BceU6ZSWzd+hipe2/7rrrxOr+WbNm8dVXXzFr1izq1q3rUw7uv/9+fvvtN06fPi2el7vuuotHH30Ui8Uizl/avHkzhYWFPvWvki5wdv2rpKNSlr2fK5vNJup87/rXe2GNdzp6T0rp9Xoxkac8v8rvvCdTvOstZWedsno4MTFR/FbxRyl7SvlPS0tDp9NRp04dcRYAVC/umDRpkngBV8qZMriqlAOHw0FcXJyo05Q0a9eunVhEoNNVn3n25ptv/s80A3zaUMVP5Z7evpzZBu7du1fs2jWbzaKubdSoEffdd5+wRdFSJjS869qwsDAxoaj0TRWUEFNK/iicq38fFBREeno6PXv2FIssTCaTCDd6Zl0bEhIibFM0Vq5cyfbt2+nRo4fIL71eL+qNM983lHRQzhfx1jpXX+JC7xvKBJG3hsKZbaC3Pcrg+XPPPSf8huqwsN75oixgUNJTCSFWWFgoBnWU87mUa5Uznc5cOaxEL8jJyRFlTLFPeRaU9lixU6njlUkR5V1PGcxW8vf+++8XOzSUtPZ4PKLfAPj0L5TQkWfuEFLu6+2zgnJvp9MpBt/37NnDxx9/LBZEKAtCJk+eTFlZGadPnxaLW2bNmiXKoJI+3osgR40aJcIAe0+0KPfV6/WiDYTqwXLFTu8y43A4mD17Nnv27GHDhg107NiR5557TpQtJZyud9lQ2kJl0E9p05TFVLVq1RLhoJUBTOW5Vn6rnB+i+KT44H3WpVIvt2vXjl69emEwGMSAXUlJiWj3lfcI5f+VtlvZ8aloW61WMRANf76TlpeX+9QxBoOBX375BZvNRmFhIcHBwaJP5J2vSnlSninvvPJuDyIjI0lJSeHHH3/kjTfeEJ8r6amc4eNdnpRzj7wXaSnPk2KjsvjIbDb7lGUlfZX8OXr0qNgBpNiolBVvlIVGZ75nKnWYckaXd1p79++LiopEWQgLCxM+mUwmkUfeCy8Uf5UxCG+8n8usrCwRJl6JyqCUI5vNRmRkJOHh4ezcuVPUn8riKPiznVPyOy8vj7y8PFF+rrnmGuG/0l88s15V0iEsLAybzSbKrbKLTJkcsNvtHDx4kKqqKpo2bcrs2bN93u+9xy+U/terr74qwi96o/SJbTYbbdq0EWfTKpMcyiJr77re46mOAqDc81zjWUr6e4eNDAwMZMaMGRgMBtLT0/nPf/4j7GjZsqX4jVI+jUYjxcXFIk1/++03tm/fjsfjEee5mc1mevbsyfjx432eUeXZUNqYa6+9lieeeIKffvqJ8vJyn/LpXUatViuvvfYaOp2O2267TXyuPJNK+Vy6dCmxsbG0a9dOvDcWFxdTWVkpztK66aabSElJEeMcyplcyv2UOuLDDz8UfdGuXbvidrtZsWKFsMfj8XDDDTcwceJEYmNjxU40JX8Bn/ZB+W1ERARGo5F77rmH06dP+/T3FH+U56eoqIi77rpL3FOpEwYPHuwTqWTp0qWi3Crtm9vtFhN1Sh/FOz0VSktLRTQM+LN/4D3eAb79OGVXZ2hoKPXq1ROLDJS0UxaPLF++HKhetKig7KT0vpcyFub9/nzy5Elhw0MPPUR0dDROp5O3335b9EU2bdqE0+k8a1xXqUO8x9gUH4CzxjbCwsJEPa9M4JWVlREaGsqSJUvYvn07gwcPBhDPWFhYmBinuPfeezl27Bi33347BoNBtPvFxcXk5+eLRRuvv/66CPN97733+pwj3q5dO55//nk++ugjgoOD2bp1K+PHjycgIECE7vw3ICesJBKNsHbtWtauXcuYMWMuW6uqqorRo0cD1atvvVEq4sDAQK677jpGjBgBcFb8emVFWMOGDcULRePGjdm3b59PJ1qhbdu2YtWxcgD0peDxeFi1ahVNmjTBaDSKVaJnHvh6PoYOHSrsq6ys9Hnx8x5I+ysoB56fifcZB0ajkWnTpomY7GfuHjsXEydOpFevXgQHB/usKvorFBYWsnbtWp/O3flo2bIlsbGx6HQ6EYaooKDA5+XCX1BWK/0vXC6Xz6G9Z6IM3Clxvb0H9y6V2267jeDgYDGp5D3w8U9gNBrFajll4vJMUlNTRXgsZaWf946BC6GsuPcOH/q/UFZgw9l1DuCzI+FKoeSL8tJ5ww038O23317x+1ztKKt+r7nmmouuAy/E6dOncblchIeH07x5c9FObNy48bJ0u3Tp4jPQDBcXNqF3795A9QDPpEmTLngA+ZVCSdfY2FgxMHDmKvXLQdlNcTnk5OSI3U7//e9/OXr0KDk5OT51gnLI9pnlxOFw8Mgjj4iD4/9K++Nt+7p160R9MnDgQDp06MBDDz101u61uXPn0qRJE3F+ElTvyBk1apTY2VKjRg0RRvB8k4l/F263+7x18IVQJpEAcSbRhdi7dy8TJkw4axXpzTffTK9evcTAmJJmZ4Yjcblc1KtX7yzd//u//+O6664Tv12wYME5F5ZdSVwulwgn602PHj3o1q2bsOViWLp06UXb4XQ6RV/iXG3WX6GyspJXXnmFRx999JzhrtXAu8xfTn3udrt55JFHxMCj9/mSahAQEECdOnV45513xIryy0E5U0mv1/+lZ+hKoeyGNxqNxMTEcPfdd5/1fjJp0iR0Op3Y0e3xeHjggQfEDhEF74n/zp07s2DBAoxGo1h0d6nccccdxMbGijNinn322Qtef76yGxsbi8FgIDAwkLCwsP+vvfOOr7q+/v/r7tzkJiEJGWSQkAAJK8jeCSsqIkNAoiB0WmmlCmKp8KXQAoKigKyCVFtBFBSihBE2DkgAQYYIBLJDhtn7Zt3c/P7I4xw+nyTgaJWf7Xk+Hn1Ybu79jPc47/N+n8VRXMpIVuCOQ54yXRU5sLQ2Rp977jmEh4ezkxZBNWnuRmNjI1asWMERL/di9OjRbDzV6XRYvXo1amtrOVrn38FoNCIsLAyrV69mh4UfG4q+Icgx5tv4d+bGqlWr2CBwNygKhigpKUFKSkqLsa6kffv2fNhN6c6U2QQAYNq0afjoo49U+z2lY4ISLy8vBAQE8D7jwoUL3/Jmd4cMnTRuKFq2uLgYe/bswbRp01g/bU2Opaen4/Tp0yqH3ta4desW3nnnHQBQRc3Sof93JTk5mduF0sASCxYsQHV1NUJDQ7Fp0yb+nKJN7kVMTAwmTZrUou8XLVqE8ePHszO4EpIV3t7eePLJJ/HnP/8ZlZWVKoctJZ988gnrnMrahUrsdjs+++wzjB07tsW+gJwxNRoNgoODsXr1aj7naO06QFPU9eOPPw6dTocLFy7A0dGR0yvSGcSCBQswZswYODg4wMPDgw0hlBZUCRkdqU5eY2MjduzY0eq7kGycPHkyp1RX0rNnT45ABO5kjvkp0Ov16NOnD6f3bt++PTu0kywH7uwnAagM4Q0NDaryBErIEcZgMKgMmBERETh48CB0Oh2uX7/OBquKigo2iv0YlJaWYsmSJZg0aRL69++v+ltwcDA/R2RkJDujNz93ojSa3t7eGD58OKKiorBx40bk5uaqvvvcc89hypQp6Nu3L7Zv34533nkHbdq0QWJiImbNmvWdzr1+DojBSvivQelFoIQs2vf6HXmlKH9PBfwqKyvh6uoKFxeXVg8vyDOqtraW70PpegCwdw0A9qx1dnZW5R1unjqCIhXonZSbFbLoA+B0cnRP8hhR3pOut3TpUsycORNms1mlsGi1WlbC6JmU+WvpemVlZazMbtiwAV999RX+8Y9/wMvLS7W5UKbBo2soP9fpdCpPTPLIbWxsRPv27eHo6MieGUVFRaywVFRU8KaHCs4D4LQADQ0NcHV1bbWvld4XjY2NCAsLw8CBA2G32/ngipRZyndP0D3JQ9PPz4/z3Z4+fZpDloE70WVKbxHqs9b62WazYdy4cS362cnJCRUVFZx2CbijdNtsNlWkkbJ9aSw3j5pSQu1NSh4ZR+gZMzIyMHPmTFZo6CCXolCU48pms/FmkOofUU0I5dhRto1er1c9o3LsKPuaPBuV7aL0eqF0McrvV1RUoLGxkfuMDkfMZjPKysrYuEaKJaWaoOdxcXHBgQMHUF5ejg4dOnDhT6BpA0mFgIE74fB0L6Wiq2zj8vJybmNKMabX61FZWamSERTFokw5Sam/qB+VbU9RXdS+Sm8gm83GY0+ZjoY8gpX9Qt9r3i+0GVSmISotLb1rv1RWVnIbtKYglZWVcVuR0knjhK5DhmBqI4L6BWjaDCjT7ZBHEkV5KsPklTKLZCpB7QA0bQTovajPyEhFHsNarRZjxozhZyT5Ru+s/K9Sxis9tuidAbRYc5RelFRvhZ6neTuSYVB5CKRcc5QGYzp0o7WMrkHvRv3ffM2hPqR2oHFEfU/QRkGn06Fnz548z5VzErizpjg6OrZYs5ReqNSGdXV12LJlCzQaDUaNGsV1c6i/gKY5Setz87ZVRqkA6vGt1Wr5N3StkpISjmisrKxEWVkZbDYbf5fazNXVlZ/DbrcjOjoar7zyCsrKyloYmunfzeeLzWbjAuyurq4tvGQJZZuR3KHaItSftA42ny8VFRU8DpRygiJ+qK1IjpnNZnTu3LnFmmU2m1UHAiSTDAYDFzomPDw8eE5NnDgRr732GqxWKyorKzki53e/+x0ee+wxfkblgcTAgQM5okV5XfoepbClttBoNCgrK+MUos8++yyAptR7a9euRZs2bfD3v/9dNQ6ioqIwcOBA2Gw2nj9nzpxBfHw8y52ysjKWexkZGap1lP7bXP5SO9JYVspfijgh2ULyV/nuyjlF70TMmDFD9R36HY0P5bVpjCxcuJDnvbKeBq0nNPauXbsGoKn2DfULpZGi/qZ7ajQa/rcyxSn9LTQ0lPv1bm2m0+nYw/xebQZAtYbSe9I9le9C1yddwt/fnyOhKY0oyVq6FtUXIrndXNZSBBy1H6VvAppqndIzKJ2rmuv3CxcuRHl5OQYMGAAvLy+Vfk9ZBprLWuUBUmVlJbZt2watVouxY8eioKCAn9dms3Harub7DWX6T6WspfdqbX+glP0HDx7kvzXfbyhRymsAKtlPvxk4cCA/j5OTEx88U7+QJzu1J/WLm5sbH+ZS/R36LkVbNo/+d3Bw4DSxyqgucgRSphSm5ycZr4xAowhSZZQQ3ZsM6tRfpDcAUOkXFL1Iae6U0Lht/jndm3Q1m82Gv//971zHhpwBgKbsBm5ubpyS2s/PD9evX0dsbCzfW6mv0n1JB1XKG7ovGehJVyktLeXnVI4Zo9EIJycneHl5wdfXFy+//DL27t2LoqIijgCn9qT9AxmzSQ6RHKG2pjlD65hyXptMJo6QBoB//vOfAMDp0CgFNUUNKfevFPFIz00Hzcrxr1y7KUqLHP3y8vL4s/Lyco4IoD0aPVtdXR0GDhzIke9Wq5UdIKh9lZFCND+UfaVcD2gsR0ZGqvQKmmt2u53HslKm6/V6VcSA2WzmiGZ6TqvVyu90t7FsNpt5bVBG4jbfT5Ju0tygSrKzoaGhhVxV6veHDx9mWVhYWKjaC5Dc0Gq1eOaZZ/ga48aNQ2FhITp06KCqQamMniQ9R4ky4qeyshLPPPMMoqKicODAAZ5zZOClNie9haKDadyS0Yn6qTW5SvOPopmaGzmo3piTkxMaGxtx48YN9OjRAy4ud9LbVlVV8ffoN8ePH8fMmTM5LaASWkNcXFxw7NgxdtCgfiM92Gw2q/TvmpoaVfYSpW7zwgsvqPRCkikFBQU4fPgw+vTpA7PZrEoR+8knn/BYpfFps9ng6urK9bk6dOiApUuX8hyi8amM0Gke0UKRgjSmKNsCyZnmZ0Bbt27FH/7wBzz22GMtztEo2r2oqAh1dXUYN24cAPCenb6jjNIG7tQSJr3T1dUVt27d4nZ5+OGH0aVLFzQ0NOA3v/kNP+tDDz3Ejt4vvvgigDvRayQPyLFbmdWCHJYoNWnXrl2RnJys0vcoIovG2EMPPcTvqowioncinY0i36iPaK2k9VP53nQtwmKxoK6ujucEfU953kHvqJwjJKfKysp4XSCDKK0F9LlyfivXb2pTiio7fPgwR4i2adOG37eiooJTzycnJ7NM0el0uHbtWotzXWVGFOU4ojWxuWGU+kmj0XCknMFgwKefforw8HAsXbqU24zmGJ3T0fN5eXmhT58+KCgoYJ2Ovufm5qbSMYODg+Hj44OcnJxWzzrDwsIwaNAgPPLII3BwcMC5c+d+VMPcT4kYrIT/GpQ5d4mKigoUFBS0qD3V/HdAkxGkec5SHx8fFBYWct2L5tdXequkpaWhffv2MBgMSE1NhcVigYODAxwcHPh3hYWFKCgoQKdOnVBYWIjS0lKkpaXxMygVGvqcctMCTQtBfX09rFYrGhsbkZiYyAfVyloYrT1rRkYGtmzZgn79+qkMVps2bcK7774Lm83GeWvpOeh6fn5+KCgoYIH8xRdfYNOmTQgLC0NFRYXquUmhp82O0tOGPCyVuZGVtcHatGmDxsZG1NTUQKfTITExkb1iqHYIAFXqK/K2ojpQVL+heR8rDSN2ux2+vr6w2+2or69Xed4oU83RMwF3Fsi0tDT23IqIiMDUqVM5vJoWaGpfZSqV1vrZw8MDXbt2bdHPQUFBuHXrFqxWK2pqalSpBoxGo6rNUlNTodFoUFRUxJ+lpaWpxr5yY0y/TUxM5BRLBoMB/v7+SEtLQ1VVFbZs2YLo6GhVG65btw79+vXj1I4GgwGZmZlczyQpKYn7xd/fXzUWAXANIlLq6BmpLaqrq7kfSktL4e/vD51Ox+1CHk6pqam88SgqKuL5FhwcjLy8PNhsNj4Uof9Seg4fHx/4+voiKyuLD1ypj9LS0mAwGJCcnAxHR0dYLBb4+vrymLdYLNDpdKyUkYcdzT9SNPR6PT9PWloaUlJSePNBBikKe3d0dOQc/wUFBTAYDNDpdLypd3NzQ0pKikoWKBU45Zgij1nqF3qvnJwcVFRUcB5/SlVAv6Pv3atfyHhZXl5+z35paGiAVqtFUVGR6jlpLFJ/0L327duHoqIiNkYtWrSI0/8o5yD1C/2N2ga4My8pDzgV/qXxoZR1Sq/i1NRUvlZeXh6Cg4NV70X1tPLz81uk9dHr9S0iN5SKtFLGk7dtUFAQALXCq1xzqI6gTqdDYWEhfHx8+HmU7Zifn8/KLrURfY/WnKysLG7/wsJCTi/YfM1JS0tTHYYoxxnNZS8vL9Wao3ymy5cvcz0aZf0vjUbDcyA1NRUVFRU8puvq6njOlJaW4saNG/D19VWtk/ReX3/9NSwWC9LT05GWlga9Xg+j0cgOFhaLhQuoK/uA5B5dh1Jt0jumpqbymFamsSgsLIS/vz+SkpJQUFDAKTw7dOjAbRMcHKwa323btuU+p+eia1N6TeW4orlPhwLKmjFKlG3Wrl07ljvUZhRNRO+snC/BwcFISUlhY25mZibPF5JP1GaUWpfWruZrFq0L5DxC6zeNVeX4JNlFaxZ5T1ZWVrIHJUUU09hQzquqqirYbDZYrVakpqayTE1JSeF0SqmpqfxeFRUVLHd8fHxUBgGdTofQ0FBkZmaqZFFQUBCCg4NZ/tL4J5mp0+lQUFDAkeK5ubkqOas8CFd+TuuBl5cX13MCmuSvUkaT/PXz81PVdKR2JFmpPPymcUU6FbXZV199peoTkmknTpxAXFwcv8+tW7dQXFwMnU6HlJQUpKamcupWyvFPhyLUNtSW9B6U9qT5ONHpdHzY17FjRx5792oz2mjfq83oWZRtRtD3rl+/znNduWY9/PDDfG+SYyRrac1ydnbmAvX0HsqxnJyczPUmAbUTwHPPPcefL1q0iPtQqd8XFBTg4MGDcHR05IN1pZ7crl07lnn0jG3btuW2b2xsRFJSElJTU5GRkYFBgwbhX//6Fz9HWloaH4QoZb/y0KqwsFAla5Wyv/n+QPn5oUOHADRFvyhlP+0PiOZrIMn+69ev8ztVVVXx8wQFBXG70d+//vprFBQU8KGPs7MzfH19uUi7UueisUBpz0iXov6j9QBQ1y2kyJH6+nqUlJSooheqq6tRX1/PMpv2EKmpqSpjDz0npasj/TAkJITlC70nrdPUP83TMtO4VTon0b2Dg4O5mD3QZNSlesoODg5sFOzWrRvrLNQWbm5uSE5ORn5+PioqKtC2bVtuOxrfFRUVLVKX030rKipQWFjI+5mvv/6a113aM9DYVa7v5MlfUlLChh5y9KK6w5Tui4rS0/pFdR1pzlD/VVVVITQ0lA0pQJPuEBAQwMZoaovw8HDY7XZe00jPoxp4+fn5nLaNjIh0aE7yVpleNSMjg3XS5557DrW1tSgtLUW/fv04AiIoKIjPCEgef/3115zW02q1wmw2s26j7CsaKzROqP/o+ZuPZXpmGss6nQ41NTUoKSmBXq/H9evXkZ+fz0Y7Gsuenp5wcnJCfX19C2Mz1W+821imd1KO5erqapUDm1arhYeHB8tV5VimVLHV1dUqOUTjidqbHDBramqwbds2bvfdu3dzunWr1aqKUli8eDEcHBzg5OQEHx8fbhul4092djbLp+ZOlv7+/khNTYWDgwNWr16N4cOHc00ziuKn2lmJiYm85y4oKODzAIrgdXZ25nFL84Geh76r3HMDTbLPYDDwGYQyeoy+r4zwa9u2Le9127Rpg7KyMmzZsgUFBQXYu3ev6rf79u1DYWEhfH19uR6ycv9P/erm5qba62RmZiIgIEC1vtC+lWqlOjs7w8HBgfUv0oN79uypWqf1ej3XZwaaMmpQfVBvb29OM//aa6/BYDCo5hDdm/Rfs9kMi8XC46W5vkTjmmr7kfwmGd+jRw92MFauec1lbWBgIEd30tig7zQ/xyP5WVNTw/ekd6fnI/1DaUR94YUXMG7cOHh6emLZsmX8veaO9o2NjUhLS2PnjU6dOsFgMKhkQm1tLf//goICuLu7s2GCdHxCud+hdyJH+7q6Ojg5OeHWrVsoKChATU0N7HY7Rza5uLi0uBa1Lcl40sdoD066CMnt0NBQ3lN16NABNTU1MJvNSEtLQ7du3QA0yXaSH76+vqwn0BpMDhW0pyJd5OLFi6isrES7du2g1Wr5mZT7UEKpi9B1Sd+lZ1c6virPjWmsKKG1o7GxEb6+vrz2kfFt48aNfD6l1+u5ZnVwcDBycnL4b0RtbS3LjODgYHTs2BEmk0klP4mSkpJ7nmsDYIefu0Wl/dwQg5XwX0NERAQSEhJUyglZ3YcMGXLX3/Xu3ZuVNvp9fX09jh49ioCAAP59REQEEhMTVeH5Z86cQVVVFcxmMw4dOgSj0YgBAwZwXl1SxuLi4uDl5YW0tDRoNBoO+d68eTNKS0sRGRmJsrIyJCQk8GJ76NAhfqePP/4YWq0WDQ0N/P81Gg1SUlK47sC+ffvw+eefw2azITIyEnFxcQgKCmIv0vHjx2P79u2qujMA8MQTT+CNN96ARqPhRSA9PR1ZWVl8PaqDQgp1QEAAR/wcPnwYGo0G7du3h9FoxLVr1zid2KFDh9hbk55j4MCB3I70fnSonJmZyeG8DzzwAC5fvoyRI0ciKCgIhw8fZqUmKSkJWq0Wffv2Zc836qfW0glGREQgPT2dn+HmzZtswLpx4wYvUu7u7pxTm/qZDuGV/Xzy5EkATYtGfHw8L7q5ubnw8vJCSkoK0tPT+Rpvv/22qp/j4+MB3PFEoXaIjY1FSEgIoqKikJCQwM979OhRxMXFAWg6eDh9+jR7TsXFxSEwMBBnz55Fx44dYbFYcOjQIR77ykKT7u7uOHz4MCIiIliZr66uxqBBg/Dll19yGPP27duxfft2dO3aVTVOtm/fjiNHjkCv16NLly5ISEjgEPOkpCS89dZbAJrCnJVjEQCGDx8OjUbDh9D0jDR2zpw5oyoQabFYUF1djfj4eB7zwcHB7AXU2NiIhIQE9OnTB0eOHEH37t3Z44z6jNIWKY3KERERiIuLQ9++fVFcXMxecytWrGDP6crKSmRnZyMiIoL7WqvVorq6GhcvXkRAQAD279+Pdu3a8YExKUBBQUE4cuQIIiIiUFZWhlOnTsFisUCr1eLkyZOwWq2cL508kT/++GP2zKTxq9FoYDabcfr0aR47cXFxrKSQjAgICEBQUBAuX74MoCmEPiEhgfs3OTkZO3bs4Of/7LPPuF/S0tL4We7VL8qw/Hv1C9B0EHv27FneAJKMIPml0Wgwfvx4+Pj4cAo5mgfBwcEIDw9HcXExz8GtW7dyv9jtduTk5KC8vJz7pbCwkK/h5eWFtm3bstKn1Wp5bptMJj6Io36hzdnJkycRERGB4uJi3LhxA3FxcYiIiFA9d1FREXbv3g0XFxf0798fe/bsAQCOEDt27BjMZjOOHz8OrVbLMp4OONq1a4eysjLuF5PJxGtOVlYWTp06xQcOWq0WDz74IIqLi1VrDj1PbW0tzGYzLl26hKCgIJ7XtObs27cPHh4evOZQjQSNRsNrTqdOnXDt2jUUFxe3uuYUFBRAo9Fg5MiRfF+NRsN9f/nyZTz99NO8QSP5S/Oc2pDWLxrTNGbatWuHxsZG/h7QtNmmaFdHR0fePNEcyM7ORlhYGKdJ0Wq1SExM5KLaiYmJaNeuHcrLyxEZGcntFhQUhBs3bsBisWDv3r04ffo0H8KRYYDWHYvFwgWvacMTFBTEczAiIgJnz55Fnz59WDabTCZotVpkZ2dDr9cjKioKQUFByMvLU7XZvn370LdvX5w+fZpllLLNlJAhkKINLl26BF9fX24zMrQVFxfzPeh9IyIicPr0aX63kydP8nzJyMjA0aNHeV3dv38/X6e1NWvixIm8ZgUGBqKhoQHx8fGwWq0oLi5GbGwsHz7m5+er1qxt27YBAM6fP8+HxjQHIyIiEB8fj6+++gp6vR5arRbbtm2DRtNULyguLo6jB3bs2MF1L+Lj4xEXFwc3Nzd8/PHHHHFYWlqKI0eOwGw249y5c2hoaEBiYiL8/PyQlJSkOugYOnSoSv4CTYdLVNeKHEA0Gg1u3LjBfU9tSWNPKX/37dsHvV6PkSNHArgTgZScnIwePXogNzcXZWVlLH91Op3KgYPW99jYWP7Mz88PFosFly5d4nXx0KFDuHr1KgwGA2JjY1mnIrnl7u6OzMxMPPXUUxyhHR8fD61Wi969e2PPnj28hr733ntwd3fnNgPupMuk31y8eFElt0hvCgkJQVlZmWoukp6sXLOat9n+/ftx4cIFmM3me7YZ6YfUZkCTIevq1avcR7t37+b70nwBmtKVBQQEcJspZS2ttXR4RhQVFbGsTUtLQ2JiInvVOzg48JrVpk0bdOrUiZ0dAgMD4ebmptLvycA4ZMgQjpJort9bLBZUVVUhPj6eZS1FddP/kpKS8Mgjj2D79u3YvHmzyvDh7OyMxx9/HFqtFgcPHmTZT+/Xtm1bfh46xFHuN/bt2wedTtdC9sfHx+PmzZvQarUtZH9SUhL3UVlZGetXtAaSHKN+0ev1PKcDAwMRFRWF7Oxsfg+TyYQ333wTWq0WycnJ3C8RERGIiIjgdTczMxOOjo7YuXMn94vVakV2dja6d+/ORtuqqiokJibi888/R3l5OSwWC9q1a8c1FtPT06HVavnwjto5JSUFer0eISEhcHNzQ25uLurq6lhmGY1GvPnmm/zdqqoqNDY28nPSIVh+fj7S09O5zajeVEFBAQICAuDh4QGLxYKYmBhotVqkpaXxZyEhIfD39+f5RzrNvn37WAbRIa6LiwuuX7/O9y4rK4PVakVJSQkb5KxWK4YPH45Lly4hICAA7u7uOHLkCGJjYwE0Ob55eHjwfSMiInhe5efnw2g04ssvv8S1a9dQVFSEIUOG4ODBg9BoNHjwwQeRkJCAfv36ITExUbVWka5oMpkQFBTE62NSUhLCwsKQlJSEfv36qaIKgSYDHEWnA00H6+Hh4dBoNCoDx5gxY5CcnIygoCCcPXsWnp6ebKggQ9auXbtQVlaG06dPo3379qitrUX//v25nlNISAjy8vJUfUXRRA0NDdi8eTPeeOMNAHccWnx8fLB582bk5ubC3d2d92iku5hMJqxYsQIAVJEq9HfqKzo8vHnzJt+bxjKtye7u7jyWP/jgA446oLEM3DFiBQUFISYmhtuRolJCQkIwcuRI1NTUsA5IuiWtPfcay3V1dQgMDFSN5ZqaGr6/s7MzLBYLGhoakJCQwDonpahU6pwajQZHjx5FWloarl27hsLCQtTU1MBoNGL8+PF4/PHH2RlJmYmDxsc333zDOp7ZbMbRo0dRWlqK9PR0jihzc3PDzZs3WbZUVlZi+/btCA4O5vWDZKfFYmFZX1ZWhi+++IJrUt28eRN+fn64evUqevXqhcuXLyMsLIz34+SwQc4WZrOZx21kZCQOHjzI89dgMHCEemBgIL7++msEBQXh1KlTXAeY+pLalMYtObgBTVFEpHM2NDTAwcEB06dPh1arRZ8+fXi8OTk5sV5KjpYJCQno27cvbty4oTIo+vv7Iz4+HkeOHEGvXr1w/fp1dhACgB07duCvf/0rG1Lr6ur4zOfo0aNwcnLiNPuBgYEoKyvDxo0beQ4BdyKod+7cyeNg165dqKurg5ubG8sKSrFNsvbQoUO8Z6CaSpRBhjLqUP/961//AtAkL2pqahAZGYnMzExef5TvHBcX16qsBe6kTKS9YWVlJc+JqqoqVFZW8vkgpXCj8TRkyBB2cNJoNDh9+jSfKyrThUdFReHixYsYPXo07+MjIiKQmZnJ8i0wMBBnzpxBaWkpp8FPSUnBgAEDWCZcvXoV3bp1Q1xcHO93jh49ys6kdAZB70P7HaWsJ93AarXy+AbAZ4qkw7i4uKjOVePi4tiYSsbES5cuITAwkHXEmpoa2Gw21hW7deuGW7du8XVtNhuMRiNKS0t57BYWFmLAgAHQaptSuGZlZbGM1+v17FDVtm1b1kXi4uLwxRdfICAgAEVFRbDZbBg7dixKS0vRtWtX3oeWlJTg2rVrHMXq5OQEu92OkJAQHgPr168H0LRXpDYj2U39dOPGDZWDJ52TJCcnIyIiAu7u7hzlW1dXx7pxXFwcunTpotpDlpeXo3Pnzjhy5Ajy8vJw/vx5dp6hfhoxYgQKCwt5P0X9k5ubi7y8PNXzKbFarfj0008RGBiI+vp61oN/7mgaf6rklYLwI1NWVoaxY8eiQ4cOeOaZZ5CXl4dXXnkF48aNw+LFi/l7v/jFL5CTk6MKk9y6dSvWr1/PqRUsFguuXr0Ko9GICRMmYPHixaivr8ekSZNw+/ZtWCwWLFy4EKtWrUJoaCj69OmDDRs2cJjvihUroNPp4OTkBIvFgpycHERHR2P37t1wdnbGq6++im3btuHs2bMs9IOCgpCRkYGXX36ZDwInT56Mffv2sTesyWTiRbm8vJzz4w4bNgxHjhxhhYpCZ7ds2YIXX3wRpaWl0Gg0GDBgAC5cuMCp4GprazF//nxERkZi2bJlOHfuHEcYURqBTp06IS8vD927d0dCQgIMBgMr0FQUub6+Hm+88QaSkpKwadMmODk5sReOEnd3d8TFxWHEiBGora3FsGHDuPghhdZrNBoMHz4clZWVOH/+PFxcXNCjRw828tD9gaa0Q3FxcZyOYcCAATh48CAyMzNRV1cHk8mEp59+Grm5uYiJiYGHhweKi4vZs1mv1/O1HB0d4ePjw5FcVquVDyyApoV79OjR7NEUFBSElJQU9pqkBTsqKgo3btxARUUFKioq0K1bN3z99dfo3Lkz5s+fjzfffJP/rtVqER0djYiICMybNw9WqxWzZs3C3r178c0336BNmzbw8/PDjRs3YLfb4e3tjby8PDg5OWH16tVYvHgx8vPz8dZbb2HBggXo0KEDSkpKkJycDL1ez0YcMkQ6ODigpqYGoaGhfCgBAH/84x/x4Ycforq6mlMJAE1K+VNPPYXGxkb06tUL7du3R2xsLM+T/Px8+Pn5cX5goOlg3tHREWlpaRg8eDBsNhtycnKwZ88ePPjggygtLUWHDh1449bQ0ICePXsiOTkZsbGxuHLlCubNmwej0Qiz2cx5600mE0wmE6xWK6Kjo5GSkoIvvviCw6MJSinZs2dPXLlyhaMjKAWXxWJBZWUlunTpgqKiIjg7O7OXo6+vL3JycrgA7JQpU7B//34MHDgQJ06cUBW7r6mp4QPHtm3borS0FMOHD0dCQgKsVisX1KVx1LdvX1y4cAEeHh7seUiHIvTcyiLIAwYMwLlz51jRNxqNnMph8ODBOH78ODQaDSIjI5GRkcGeOhMnTsTBgwdRX1+Pnj17ciqjHj16cBQcbezpML6goID7hRTZxsZGBAcH4/bt2zAYDAgICMDNmzd5U0GebZTegw78n3/+ebz99tvQarUoLS3lw+jAwEAkJydj7NixOHr0KMaMGYPY2FguKgw0Kf+hoaHcL1arFbm5ufweJNucnZ35ea9fv47g4GA+uC8pKeG2Cg4ORmZmJoYPH45Tp06xN1mbNm14M2Wz2aDX6/Hwww/j008/ZW9yFxcXWK1WaLVadOzYERkZGaisrMSDDz4IrVbLhzTk+VZSUoLRo0fj+PHj6NSpE8aPH48PPviAI6ycnZ3h7e2NkpIS3thS6kZKCUHja+jQoXjkkUfw+uuvw2q1wm63w2g0std4//79ec2hdqT2o2L3AFRrjp+fH5KTk1FfX8/XMhqNcHFxUa05AwYMwPnz5/lejz76KI4fP47x48fDbrdjz549KuMKpfkaPXo0fv3rX6OxsRGzZs1CaWkpTCYTdDodH9zRnOzTpw/69evH0R1eXl7sZevm5gZ3d3eUlZWhpKRElUpWp9PB19cX6enpfOBLMs3DwwOFhYVwcHBAaGgocnNzkZ+fj9GjRyMhIQE1NTVwcXFBaWkpy4KAgABkZWXxmufk5MRRVcCdgycHBwf+/MUXX8TGjRthsVi4b2l8U8TawYMHceXKFbz44ovo0qWLKt84rUFarRZPPPEEIiMjUVRUhCVLlqCurg4GgwHDhg3jQ41HH30UBw4c4MMcZZQGreHUZjk5ObBYLBgzZgzeffddGAwGTv2j0+nQuXNnXLt2DXq9Hq+88gqOHDmCY8eOwdXVlVOmtbZmzZ07F5WVlXj66aexdetWHve0PhqNRrRp0wb5+flwcnJCSEgIrl69yuOZ5sioUaMQExMDi8WC559/HqtWrYLNZsODDz6II0eOcEqe27dvsyPAqFGjcOLECbRv3x42m41rYg0dOhSnT5+GRtNU7/H8+fNch+vAgQPcx0OHDkV8fDw6derEm+Y+ffpwehAay+7u7hwdRu02ePBgnD17FhqNBh4eHpgzZw5eeeUVDBgwgOXvk08+icTERHz55ZfQ6XR49dVXceHCBezZswcRERH49NNP0bZtW/YAttvt8PHx4dRIpCc1NDTAbDarHG7mzJmDvLw87Nq1i9cc6ktvb29kZWXB0dERq1atwksvvcSHj3TYFxgYiKKiIk7lOmLECMTGxvJ6Qp7QU6ZMwe7du2E2m7F48WLs2LED165dg6enJ4qKiuDu7s73Juegjh078mad0gu++uqrKCoqwl/+8heW0z4+PkhPT0e3bt1w8eJF1r1CQ0ORmJh41zbz8vLC1KlT8Y9//APV1dXo2LEjCgoK+LCrpKQE1dXV8PX1ZaNOXV0dHBwc4OXlhWPHjuHAgQOYN28eR4EBTYfIdKBIqbi6du2KK1eusCymiDGz2XxXWas0NiplLckK8oo3Go1wdHREu3btUFBQgJUrV3JNCqX+SfPEbrdjypQpsNvtiImJgV6vh9lsZgPU4MGD2bFi7ty5ePPNN1FVVcUH0rW1tWhsbFTJ/ilTpnA9GpPJxPsDd3d3rFq1SrXf+Oijjzhlz5w5c7B161aMGzcOdXV1iImJgZeXF0aPHo1jx45x9LGDgwP8/f0xatQobN++nfvF3d2dawb5+fmhpKQEVqsV7dq1Q25uLrch6QAU9d65c2cUFRXB19cX586dYx2DIk6pxlN1dTXef/99/PKXv2SnJJPJhLq6Ou6bNm3aoFu3bkhMTOQ1idYjcgqirA4dO3ZESkoK7HY715BSGrXNZjNsNhvsdjtMJhPWr1+PgoICrFixgtOWUspbGmN9+/bFrVu34OLioopIU6Y+12g06NixI/Lz8zFx4kQkJSXhiy++4LFIawy1w8yZM7F582YMGjQIly9fZkOMMr3zgAEDMG3aNK4h1rzeIcm9tWvXYteuXcjJyUH79u15jnbs2BFXr17lAz5/f38kJibC3d0dM2fOxL/+9S94e3sjJSWFnS8GDx6MlJQUODg4sAHm8OHDrHP7+/sjKyuLIwkp6o2MAEqDCNCkMzU2NqKqqorna5cuXfjwvaGhAc7OztBomuoO5uXlcVpDMihTPZTg4GCkp6ezHpaYmAiz2cyRWbdv30ZoaCiKi4v5DOG1115DYmIiR/OYTCbk5OSwwYsie2/fvq2KSKU+ozS7RUVFPM9qa2tZ79doNJwetrq6GmPGjEFBQQEbWEiPJZRjmfQAahcaI2azGVarlXVW0vccHBxUaYJpLJMT2a9+9SsAwLvvvst7XrPZzJHNGk1TJoUbN26gd+/euHjxIj+XVqtVRaCazWZMmjQJGRkZuHjxImpra9nARtHubm5umDp1KrZu3YrZs2fjrbfe4kgTZcrfyMhI3LhxA/n5+fD19eVICNp3k/PHX/7yFyxbtgxhYWFsfKQxRNclR9tDhw6htraWs3GQkY+iSWfOnInY2FjY7XZUVFTw+CVdk84gPD09OTMG7S1pL0P7l+7du+Orr75inUIp8+rr6zl1ITkhmEwmNtrW1tbCy8sLc+fOxSuvvAJfX18kJiZCr9dzzcj09HTWGx599FEcPHgQXbt2xfXr11XRKRQtTHsOMjhR+3l6esJqtWLs2LE4duwYOwJ27doV165dg0ajwezZs5GYmMg165YtWwaNRsPp6KqqqritSBbT3NJomurtUV9Q1h43NzesWLECzz77LK9dzWUU0OQouWTJErz44os8Ho1GIxvZjUYjBg0ahBUrVmDSpEkcFQkAffr0gaOjIz7//HNoNBqsXbsWY8aMAdDkiHvp0iWYTCZMmzYNCQkJfAYUFxeHp556ivVRkrG1tbVskHz88cexePFizJw5ExcvXuR9XFhYGMrLy/lsYtCgQXBxccHp06c5deSyZctQXFyMdevW8bihiFC9Xs/RgNXV1ZgwYQI2btzIc23UqFE4fvw42rRpwzrE66+/jpiYGHZW69OnDyorK3H79m1UVVVh8ODBePzxx2EymTB//nzuJ9KnSKbSmkXnbqRzOjk5obS0FE5OTujUqROuX7+OwYMH49SpU7wnJxlI54eenp7QaDTslENnTqSH1dfXIygoCLdv38aIESNw4sQJ/juNa4PBAG9vb9bXQ0ND8eWXX/JY8fHxQX5+Pnr06IGXXnoJS5YsYYc6q9XKRiQAvO+12+3o1asXxo8fj3feeYfX5sGDByM/Px+FhYV89nns2DHExcVhyZIlaGhoQFVVFcLDw3k9rKioQGxsLJYsWYIzZ85w5iIHBwf4+vriypUrcHd3R48ePTBu3Dh88sknuHXrFnJzczn9P0VXlZaW8visrKzE+PHjeV8yceJEnDx5kg19cXFxuHnzJt566y3k5eWha9eucHZ2xokTJ5CVlQUXFxd4e3tjz549qtqEP1fEYCX8V5GSkoJly5bh0qVLcHJywoQJEzB37lzVZJ0xYways7PZ6xNoEo5bt27F9u3bOWexi4sLJk+erPp9Xl4eJkyYgNLSUjg7OyMqKgoLFy6Ek5MTtm7divfffx/FxcXw9fVFQ0MDcnNzWblwcXFBVFQUCgsLkZCQAJ1OBx8fH6SlpcFms2Hw4MFYtGgRQkJCcPz4cSxZsoQt9CaTiQW4o6MjSkpK4OjoCE9PT06ro9VqebOs1+uxceNGjBgxAikpKZgzZw57jQNN3jWvv/46nnjiCcyfPx9Wq5W9Y5rj7u6OiRMnoqioiDfrzZk4cSJeffVVNDY24qGHHuJ0C0rCwsKwZs0ahISEIDo6GteuXeP6KgR5WdlsNnTp0gVRUVF48803W4RMGwwGVgxMJhOngyspKWm1gKSrqyt+9atf4ezZs2ywU0KHoX379kW/fv2wZ88eVowdHR0xdOhQlJSU4KuvvoJW21RfRVmnym63w9/fnxX57OxsTs9iNBo5tWR1dTV69+6NgoICmM1mzJo1C2+88QbS0tLg6ekJs9mM7Oxs2O12NgIojWtarZYVGEopYjabcebMGR7758+fZ88cer7Ro0fjoYcewpIlS1ShxZQb22QyISoqCunp6fjmm29Uc+P48eNcGwRoUvpfe+01bNu2DV9++SWHRJNySWlcRo0ahZUrV+L3v/89z7eUlBTMnTuX0wrShq1bt25YsGABp1nYuXMn1qxZ0yIHuclkwtSpUzF//nzU1tZiwYIFOHnypMpbk5QRFxcXBAUFIT09nVMX0EEUAJ5vFosF0dHRdy22O2zYMPz973/HqVOn8Kc//alVQ2ynTp0wb948DBkyBGvXrsW2bdt4I+jg4MDt4+fnh7KyMpSWlrKyrRz/QNN8o0M+KvhNBxvkVT137lx8/vnnKhnRHA8PDz7ArK+vZ8OCt7c3KioqUFVVBaPRiKNHj6KyslLVL4RWq0VAQACmTZuGadOmISYmptV+AZrC+F9//XX86U9/4rmjxNXVFfPnz8fkyZNZVpL3EG3KDQYD+vXrx/3yxBNPqIyRzZ+tTZs26N27NzIyMjjVCOXqB5oitubPn48hQ4ZgzZo12LZtW4v2bn5NBwcHnnf0mcFg4JQuN2/eVB0yNm/zXr16ISMjo0X6JGL69Ol46aWXsGbNGnzwwQfct62xcuVK7N69mw8HjEYjxo4di0WLFqnWHBoDyjpTAFqsOdQ21AZ9+/bF0qVLec1ZuHBhi9QDxOHDh+Hn54fly5djz549dy3k2r9/f/z1r3/F5MmTVamHyHMuKCgIWVlZKCkpgaenJx9iAnfWCIvFgmXLluHTTz9VGfKApjWif//+aGhowMWLF1XvQweetbW17N362WefYe3atdi9e3eLdYQ2uTNnzuS6jMrxodU25UR/6KGHkJSUhKSkJLi7u3PaW/Igpucj2UhybPfu3di6dStu376tegej0Yjo6GjMnz8fDz/8cKvzxWQy4f/+7/8wdepUTJs2DZcvX2517CrbjHQTAK2ugzqdDgEBAWhoaMA333wDT09P5OTkwN3dHeXl5Xdds8LDw+Hq6opz586hpqaG1yZqI6BprFGqqHsVQyfIG99gMCApKYkNrTS36DDRZDKxsSI3N5flJh0eODo6oqqqCh4eHqwblZeX8xqq0+lU6fWUODg4sNMNQYd+5EQzcOBAnDlzBhUVFfDw8GCdsrn8dXNzg8lkQlFRETp06IAXXngBQ4YMwfLly/Hxxx+zjKN1UqfTYfjw4Zg5cybWrFmDK1eutNpnzfsPuOPB7OnpyTUy6MD7busBjVODwQCTydRqPVb6XkBAAB9eK+WTwWBgw4Oyvk9YWBj8/f2RkJAAvV6vajM6ZK+vr2ddWJmW6NvaTLlm0eE2RbuTgwIAdpIKDAyEzWZjHWbnzp1YvXp1i7lP0Qnr1q2Dr68vFixYgBMnTqhkyaOPPvq9Ze1nn312V11i7ty5mDVr1veStR9++GELnWrZsmU4deoUDAYD6zY055SyVin7yUud2l6pjzffb9BhnLOzM493AC3Gcmtjh2qzKQ1I5OCi0+mwYMECHD9+vIWeSvLgb3/7G9asWYPs7GzExsa26BegaY527doVixYtQq9evXD79m08++yzLfQXBwcHjB07FgsXLuS9W2trt7OzM1566SXExsZyCru7odFoEBISguXLl+P06dN33TcZDAY88sgjWLx4MfLy8rBs2TLeeyjnuYeHB1544QXExsYiOTkZHTt2xM2bN9lRS3lfap8HHngAmzdvxrp161q999ChQ7Fu3ToUFRXhpZdewtWrV1u8t6enJ+bMmYMpU6bwnjg2NhbLly9nJ0C6LxnBH374Ybi5uXFUOBk8APBe5MEHH8SCBQvw/vvv46233mJDJBlBGxoa+N908N/82eierq6uHIlDhiiC9HmNRoNf/vKXePbZZ7Fp0ybExMS0aDtHR0f069cPs2fPhpOTE++TaA5rNBr07t2bo/npDMHR0RHOzs68n3VwcMCsWbOQkJCAW7dusRG5NV2oc+fOWLp0KY/lxx577K5jpXv37li0aBGSk5Oxc+dOJCcnq9YsirZTjuU///nPXD9Uia+vL5599lnExsbi8uXLcHFxgYeHR4u5Qdf18/Pj9L93W7dpLDc0NHDKvrvh5OSE6OhozJ07F7W1tVi+fDk70AFNczc8PBwrVqxAcHAwy1VKeUfrDUUAlpSUwGw2o6GhodV9h06nw6ZNm/iwe+3atUhOTm6xloaHh2Pjxo1wc3NT7dGU7zho0CB06NABR44cQVFRERu3lftGAJx+k+oNUipk5Z577969yMjIwNq1a5GSkoLGxsYWz6TVatGhQwe0bdsWly9fvqueAjQ5dYWEhODChQuqurSOjo6Ijo7GP//5T8yfPx8uLi7YunVrq2nAHB0dERgYiIKCAjZ4klx1c3PDY489hrlz56K4uJijo1tjy5YtGDFiBGJiYrB8+XKVjkBnFY6OjjwPmxs0WyM2NhbPPPMMOxIq26h///5YsmQJgoODERMTg2XLlqkce5Sy9tq1a5g5c2ar99Dr9fjb3/7GJR0A4LHHHkNeXh6qqqrYKYfGZ0hICObMmaM65yBMJhOefPJJdq6dMWMGMjMz0bNnT5w8eVI13qlGV5cuXbBgwQL85S9/QXp6uqqNmuPk5IS3334bgYGBWLlyJT755BMulUHjxGQyYfjw4aro8uZQhPWoUaNw9epVLvvg7OwMnU6H4uJi7hsyKLm6umLixImYO3cuYmNjsXTpUtV68Ktf/Qpz5szBO++8g/feew95eXmt3tvDw4PnCNUoa419+/YhKysLb7zxBqdWVD6TyWRCZGQkXnzxRezcuRN79+6961kf0JSuMyAgAMePH2+xv27Tpg0mTJiAcePGYfXq1ap1QImXlxeCgoL47GrkyJGt7tO8vLywfv169OrVix2BW2Py5MlYuHAhkpOTsXr1aty6dYvrcdJ7+vr64g9/+INqfN6+fRvLli1DfHw819vs1asXli1bhpCQEGRkZGDp0qW4dOkSn01ptVr4+flh3Lhx+M1vfnPXGsk/N8RgJQiCIAiCIAiCIAiCIAiCIAiCINxXpIaVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIws+A0NBQbNiw4Xv95ty5cwgNDcW5c+d+pKcSBEEQBEEQBEH4zyAGK0EQBEEQBEEQBEH4AVy8eBEbNmxAeXn5/X4UQRAEQRAEQfjZo2lsbGy83w8hCIIgCIIgCIIg3Jva2lrodDro9frv/Bu73Y76+noYDAZoteKv+J/m7bffxqpVq3DixAn4+/vf78cRBEEQBEEQhJ81smMRBEEQBEEQBEH4/xS73Y7a2loAgMlk+l7GKgDQarUwmUw/a2NVY2Mjampq7vdjCIIgCIIgCILwI/Pz3bUIgiAIgiAIgiD8TNiwYQNCQ0ORkpKC559/Hr1798aAAQOwfPlyNkgBTXWqli5din379mHs2LHo0aMHTp06xX9rXsMqLy8PCxcuxNChQ9G9e3eMHDkSS5YsQV1dHYDWa1jNmDEDjz76KJKTkzFjxgz07NkTw4YNwz/+8Y8Wz52dnY1Zs2bhgQcewKBBg7BixQqcOnXqe9fF+uijjxAaGorz589j8eLFGDBgAHr37o358+ejrKxM9d2RI0fimWeewalTpzBp0iSEh4dj165dAIDbt2/jueeeQ//+/dGzZ09MnToVn376qer39M5xcXHYuHEjhg0bhl69euG5555DRUUF6urq8PLLL2PQoEHo1asXFixYwO3VWj889NBD6NGjByZNmoTz58/zdzZs2IBVq1YBAEaNGoXQ0FCEhoYiKyvrO7eLIAiCIAiCIAh3+H7ueYIgCIIgCIIgCMIPZs6cOfDz88O8efNw+fJlvPvuuygvL2fDBwCcPXsWhw4dwvTp0+Hm5gY/P79Wr5WXl4cpU6agoqICU6dORXBwMPLy8nDkyBHU1NTAaDTe9TnKysrw29/+FlFRURgzZgyOHDmC119/HZ07d0ZkZCQAwGq14he/+AUKCgowc+ZMtG3bFgcOHPhehqrmLF26FC4uLpg9ezbS0tKwc+dO5OTk4N1334VGo+HvpaWlYd68eYiOjsbUqVPRoUMHFBYW4oknnkB1dTVmzJgBNzc3fPzxx/j973+P9evXIyoqSnWvrVu3wsHBAb/73e+QkZGBHTt2QK/XQ6PRoLy8HLNnz8aVK1fw0Ucfwc/PD7Nnz1b9/vz584iLi8OMGTNgNBqxc+dO/Pa3v8Xu3bvRuXNnREVFIT09HQcOHMCCBQvg5uYGAHB3d//B7SMIgiAIgiAI/8uIwUoQBEEQBEEQBOEnwt/fH5s3bwYATJ8+HRaLBe+//z5+/etfIywsDECTsWb//v3o2LHjPa+1Zs0aFBYW4sMPP0SPHj348+effx7fVqo4Pz8fr776KiZOnAgAmDJlCkaOHImYmBg2WH3wwQe4ffs2Nm3ahNGjRwMAnnjiCf7ND8FgMOCdd96BwWAAAPj6+uK1117DyZMnMWrUKP5eRkYG3nrrLQwbNow/W7FiBQoLC/Hee++hb9++AIDHH38c48ePx8qVKzFq1ChV6sOGhga8++67fK+SkhIcPHhQFU02ffp0ZGZm4qOPPmphsLp16xZiYmLQvXt3AMDYsWPx8MMPY/369di4cSPCwsLQtWtXHDhwAKNHj5YaVoIgCIIgCILwbyIpAQVBEARBEARBEH4ipk+frvr3U089BQD4/PPP+bN+/fp9q7HKbrfj+PHjGDFihMpYRSijlVrD0dEREyZM4H8bjUb06NEDt2/f5s9OnToFb29vlSHJZDJh6tSp97z2vYiOjmYDEgA8+eST0Ov1+Oyzz1Tf8/f3VxmrAOCzzz5DeHg4G6sAwMnJCdHR0cjOzkZycrLq+xMmTFDdKzw8HI2NjZg8ebLqe+Hh4cjNzYXNZlN93qtXLzZWAU3GtVGjRuH06dNoaGj4nm8uCIIgCIIgCMK3IQYrQRAEQRAEQRCEn4jAwEDVv9u3bw+tVquqe/RdInWKi4tRWVmJTp06/aDn8PHxaWHUcnV1VdWTys7ORvv27Vt8r3379j/onkDL93dycoKnpyeys7NVn7fWBjk5OejQoUOLz4ODg/nvSnx9fVX/dnZ2BgC0a9euxed2ux0VFRX3fFYACAoKQnV1NYqLi1v8TRAEQRAEQRCEfw8xWAmCIAiCIAiCINwnWouEcnBw+NHvq9PpfvR7/Dv8J9pAmR7wu3z+bWkUBUEQBEEQBEH4cRGDlSAIgiAIgiAIwk9ERkZGi3/b7fbvXf/I3d0dFosFSUlJ/8nHU+Hn54fMzMwWhpzMzMwffM3m719VVYWCggL4+fl96299fX2RlpbW4vPU1FT++3+S5s8KAOnp6TCbzXB3dwfw7akXBUEQBEEQBEH47ojBShAEQRAEQRAE4SfivffeU/17x44dAICIiIjvdR2tVovRo0fjk08+wdWrV1v8/T8RLTR06FDk5eXhxIkT/FltbS0+/PDDH3zNDz74APX19fzvnTt3wmazfaf3j4yMxFdffYVLly7xZ1arFR9++CH8/Py+te7X9+XSpUu4du0a/zs3NxcnTpzAkCFDOELNbDYDQIt0goIgCIIgCIIgfH/09/sBBEEQBEEQBEEQ/lfIysrCrFmzMGzYMFy+fBn79u3Do48+irCwsO99rRdeeAHx8fGYMWMGpk6dipCQEBQUFODw4cN4//334eLi8m89a3R0NHbs2IF58+Zh5syZ8PT0xP79+2EymQD8sOii+vp6/PKXv8SYMWOQlpaG999/H3369MGoUaO+9be/+93vcPDgQTz99NOYMWMGXF1dsXfvXmRlZWHDhg13TfX3Q+ncuTN+85vfYMaMGTAajdi5cycA4I9//CN/p1u3bgCAtWvX4pFHHoHBYMCIESPg6Oj4H30WQRAEQRAEQfhfQAxWgiAIgiAIgiAIPxFvvPEG1q1bh9WrV0Ov1+Opp57C/Pnzf9C1vL298eGHH2LdunXYv38/Kisr4e3tjYiIiP9IDSgnJyds27YNy5cvx/bt2+Ho6IiJEyeiV69e+OMf/8iGq+/D4sWLsX//fqxfvx719fUYO3YsFi1a9J2MX23btsWuXbvw2muvYceOHaitrUVoaCi2bNmC4cOH/4A3vDf9+vXDAw88gE2bNiEnJwcdO3bEypUrVcbF8PBwPP/889i1axdOnToFu92OEydOiMFKEARBEARBEH4AmkapLCsIgiAIgiAIgvCjsmHDBmzcuBFnzpzh+kc/V9555x2sXLkSn3/+Oby9vb/Tbz766CMsWLAAe/bsQY8ePX7kJ/z3CQ0NxfTp07F48eL7/SiCIAiCIAiC8D+D1LASBEEQBEEQBEEQWqWmpkb179raWnzwwQcICgr6zsYqQRAEQRAEQRCE74KkBBQEQRAEQRAEQRBaZfbs2fD19UVYWBgqKyuxb98+pKam4vXXXwfQZNCqqKi45zVcXV1/ikcVBEEQBEEQBOFnjhisBEEQBEEQBEEQhFYZOnQo9uzZg/3796OhoQEdO3bE2rVr8cgjjwAA4uLisGDBgnteY/v27T/FowqCIAiCIAiC8DNHalgJgiAIgiAIgiAIP4j8/HwkJyff8zvdunWTKCtBEARBEARBEL4VMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xXt/X4AQRAEQRAEQRAEQRAEQRAEQRAE4X8bMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI95X/B/V7Nk7OqfW/AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df[[\"pricing_prompt\", \"pricing_completion\"]].plot.scatter(\n", + " x=\"pricing_prompt\", y=\"pricing_completion\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for /: 'str' and 'str'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:218\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 218\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:242\u001b[0m, in \u001b[0;36mevaluate\u001b[0;34m(op, a, b, use_numexpr)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_numexpr:\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# error: \"None\" not callable\u001b[39;00m\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_str\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _evaluate_standard(op, op_str, a, b)\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:73\u001b[0m, in \u001b[0;36m_evaluate_standard\u001b[0;34m(op, op_str, a, b)\u001b[0m\n\u001b[1;32m 72\u001b[0m _store_test_result(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[46], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprice_ratio\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_prompt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/common.py:76\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer..new_method\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[1;32m 74\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/arraylike.py:210\u001b[0m, in \u001b[0;36mOpsMixin.__truediv__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__truediv__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__truediv__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtruediv\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/series.py:6135\u001b[0m, in \u001b[0;36mSeries._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 6133\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_arith_method\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, op):\n\u001b[1;32m 6134\u001b[0m \u001b[38;5;28mself\u001b[39m, other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_align_for_op(other)\n\u001b[0;32m-> 6135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexOpsMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/base.py:1382\u001b[0m, in \u001b[0;36mIndexOpsMixin._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 1379\u001b[0m rvalues \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(rvalues\u001b[38;5;241m.\u001b[39mstart, rvalues\u001b[38;5;241m.\u001b[39mstop, rvalues\u001b[38;5;241m.\u001b[39mstep)\n\u001b[1;32m 1381\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(\u001b[38;5;28mall\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 1382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_construct_result(result, name\u001b[38;5;241m=\u001b[39mres_name)\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:283\u001b[0m, in \u001b[0;36marithmetic_op\u001b[0;34m(left, right, op)\u001b[0m\n\u001b[1;32m 279\u001b[0m _bool_arith_check(op, left, right) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 281\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_na_arithmetic_op\" has incompatible type\u001b[39;00m\n\u001b[1;32m 282\u001b[0m \u001b[38;5;66;03m# \"Union[ExtensionArray, ndarray[Any, Any]]\"; expected \"ndarray[Any, Any]\"\u001b[39;00m\n\u001b[0;32m--> 283\u001b[0m res_values \u001b[38;5;241m=\u001b[39m \u001b[43m_na_arithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res_values\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:227\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_cmp \u001b[38;5;129;01mand\u001b[39;00m (\n\u001b[1;32m 221\u001b[0m left\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(right, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[1;32m 222\u001b[0m ):\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# Don't do this for comparisons, as that will handle complex numbers\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# incorrectly, see GH#32047\u001b[39;00m\n\u001b[0;32m--> 227\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43m_masked_arith_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:163\u001b[0m, in \u001b[0;36m_masked_arith_op\u001b[0;34m(x, y, op)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# See GH#5284, GH#5035, GH#19448 for historical reference\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[0;32m--> 163\u001b[0m result[mask] \u001b[38;5;241m=\u001b[39m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43myrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_scalar(y):\n", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'" + ] + } + ], + "source": [ + "df[\"price_ratio\"] = df[\"pricing_completion\"] / df[\"pricing_prompt\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# df[\"total_price\"] =" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py new file mode 100644 index 000000000..c94786208 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py @@ -0,0 +1,118 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# CONTENTS: +# - [Description](#description) + +# %% [markdown] +# +# # Description +# +# This notebook examines ... + +# %% +# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet jupyterlab-vim)" +# #!jupyter labextension enable + +# %% +# %load_ext autoreload +# %autoreload 2 + +import logging + +import helpers.hdbg as hdbg +import helpers.henv as henv + +# %% +print(henv.get_system_signature()[0]) + +hnotebook.config_notebook() + +# %% +# hdbg.init_logger(verbosity=logging.DEBUG) +hdbg.init_logger(verbosity=logging.INFO) +# hdbg.test_logger() +_LOG = logging.getLogger(__name__) + +# %% +# !sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet openai requests)" + +# %% +import helpers.hllm as hllm +import helpers.hpandas as hpandas + +# %% +val = hllm.get_model_stats() + +# %% +import pprint + +pprint.pprint(val[0]) + +# %% +import pandas as pd + +# %% +# Normalize the nested JSON +df = pd.json_normalize(val, sep="_") +df +# View the resulting DataFrame +# print(df.T) # Transpose just for readable vertical inspection + +# %% +df.iloc[0].T + +# %% +col_names = ["id", "context_length", "pricing_prompt", "pricing_completion"] + +# %% +df.dtypes + +# %% [markdown] +# # + +# %% +for col in df.columns: + print(hpandas.infer_column_types(df[col])) + +# %% +df.apply(lambda x: pd.Series(hpandas.infer_column_types(x))).T + +# %% +hpandas.infer_column_types_df(df) + + +# %% +pd.to_numeric(df["pricing_request"], errors="coerce").notna() + +# %% +df["pricing_completion"] + +# %% +df.sort_values("pricing_prompt")[col_names] + +# %% +df[["pricing_prompt", "pricing_completion"]].plot.scatter( + x="pricing_prompt", y="pricing_completion" +) + +# %% +df["price_ratio"] = df["pricing_completion"] / df["pricing_prompt"] + +# %% + +# %% +# df["total_price"] = diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb new file mode 100644 index 000000000..60491a1c6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb @@ -0,0 +1,993 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-07T22:25:23.663978Z", + "start_time": "2020-06-07T22:25:23.661756Z" + } + }, + "source": [ + "# Description" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:30:40.920362Z", + "start_time": "2020-06-09T19:30:40.864535Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline\n", + "\n", + "import json\n", + "import logging\n", + "\n", + "import jsonpickle\n", + "import jsonpickle.ext.pandas as jsonpickle_pandas\n", + "\n", + "jsonpickle_pandas.register_handlers()\n", + "\n", + "import pandas as pd # noqa: E402\n", + "\n", + "import helpers.hdbg as hdbg # noqa: E402\n", + "import helpers.henv as henv # noqa: E402\n", + "import helpers.hplayback as hplayba # noqa: E402" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:30:43.871255Z", + "start_time": "2020-06-09T19:30:43.739350Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Logger already initialized: skipping\n", + "# Packages\n", + " python: 3.7.6\n", + " gluonnlp: 0.9.1\n", + " gluonts: 0.5.0\n", + " joblib: 0.15.1\n", + " mxnet: 1.6.0\n", + " numpy: 1.18.4\n", + " pandas: 1.0.3\n", + " pyarrow: 0.17.1\n", + " scipy: 1.4.1\n", + " seaborn: 0.10.1\n", + " sklearn: 0.23.1\n", + " statsmodels: 0.11.1\n", + "# Last commits:\n", + " * 268f2f1 saggese PTask2231: Checkpoint ( 2 days ago) Sun Jun 7 20:58:52 2020 (HEAD -> PTask2231_Playback_approach_for_unit_testing, origin/PTask2231_Playback_approach_for_unit_testing)\n", + " * 7025106 pavel-... PTask2291: Add args, kwargs. New tests ( 6 days ago) Wed Jun 3 11:38:56 2020 \n", + " * 60e0b11 saggese PTask2291: Add leftover files ( 10 days ago) Sat May 30 10:06:29 2020 \n" + ] + } + ], + "source": [ + "hdbg.init_logger(verbosity=logging.INFO)\n", + "\n", + "_LOG = logging.getLogger(__name__)\n", + "\n", + "_LOG.info(\"%s\", henv.get_system_signature()[0])\n", + "\n", + "hnotebook.config_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:53:12.564104Z", + "start_time": "2020-06-09T19:53:12.513350Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Product Price\n", + "hello \n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200\n" + ] + } + ], + "source": [ + "data = {\n", + " \"Product\": [\"Desktop Computer\", \"Tablet\", \"iPhone\", \"Laptop\"],\n", + " \"Price\": [700, 250, 800, 1200],\n", + "}\n", + "\n", + "df = pd.DataFrame(data, columns=[\"Product\", \"Price\"])\n", + "df.index.name = \"hello\"\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:56:24.324137Z", + "start_time": "2020-06-09T19:56:24.279767Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Product': hello\n", + " 0 Desktop Computer\n", + " 1 Tablet\n", + " 2 iPhone\n", + " 3 Laptop\n", + " Name: Product, dtype: object,\n", + " 'Price': hello\n", + " 0 700\n", + " 1 250\n", + " 2 800\n", + " 3 1200\n", + " Name: Price, dtype: int64}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# df.to_json(orient=\"\")\n", + "df.to_dict(orient=\"series\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:58:05.848188Z", + "start_time": "2020-06-09T19:58:05.747808Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\"pd.DataFrame({'Product': ['Desktop Computer', 'Tablet', 'iPhone', 'Laptop'], 'Price': [700, 250, 800, 1200]})\"" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hplayba.to_python_code(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:58:31.870465Z", + "start_time": "2020-06-09T19:58:31.822189Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProductPrice
0Desktop Computer700
1Tablet250
2iPhone800
3Laptop1200
\n", + "
" + ], + "text/plain": [ + " Product Price\n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame.from_dict(\n", + " {\n", + " \"Product\": [\"Desktop Computer\", \"Tablet\", \"iPhone\", \"Laptop\"],\n", + " \"Price\": [700, 250, 800, 1200],\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T20:14:52.983985Z", + "start_time": "2020-06-09T20:14:52.861966Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Initialize values for unit test.\n", + "dummy_0 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", + "dummy_0 = jsonpickle.decode(dummy_0)\n", + "dummy_1 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", + "dummy_1 = jsonpickle.decode(dummy_1)\n", + "# Call function.\n", + "act = F(dummy_0, dummy_1)\n", + "# Create expected value of function output.\n", + "exp = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop ComputerDesktop Computer,1400\\nTabletTablet,500\\niPhoneiPhone,1600\\nLaptopLaptop,2400\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", + "exp = jsonpickle.decode(exp)\n", + "# Check.\n", + "assert act.equals(exp)\n" + ] + } + ], + "source": [ + "use_playback = True\n", + "\n", + "\n", + "def F(a, b):\n", + " if use_playback:\n", + " playback = Playback(\"assert_equal\", \"F\", a, b)\n", + " playback.start()\n", + " c = a + b\n", + " if use_playback:\n", + " output = playback.end(c)\n", + " res = output\n", + " else:\n", + " res = c\n", + " return res\n", + "\n", + "\n", + "a = df\n", + "b = df\n", + "print(F(a, b))" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T20:20:24.981307Z", + "start_time": "2020-06-09T20:20:24.839197Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'[3, 3, ]'" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hplayba.to_python_code([\"3\", 3])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:30:54.111194Z", + "start_time": "2020-06-09T19:30:54.046499Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# obj1=\n", + " Product Price\n", + "hello \n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200\n", + "class=\n", + "# frozen=\n", + "{\n", + " \"meta\": {\n", + " \"dtypes\": {\n", + " \"Price\": \"int64\",\n", + " \"Product\": \"object\"\n", + " },\n", + " \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"\n", + " },\n", + " \"py/object\": \"pandas.core.frame.DataFrame\",\n", + " \"txt\": true,\n", + " \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\"\n", + "}\n", + "# obj2=\n", + " Product Price\n", + "hello \n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200\n", + "class=\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProductPrice
hello
0Desktop Computer700
1Tablet250
2iPhone800
3Laptop1200
\n", + "
" + ], + "text/plain": [ + " Product Price\n", + "hello \n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hplayba.round_trip_convert(df, logging.INFO)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-07T22:32:12.623139Z", + "start_time": "2020-06-07T22:32:12.577435Z" + } + }, + "outputs": [], + "source": [ + "hplayba.round_trip_convert(\"hello\", logging.INFO)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:45:27.442281Z", + "start_time": "2020-06-09T19:45:27.380299Z" + } + }, + "outputs": [], + "source": [ + "def F(a, b):\n", + " return a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:45:36.907940Z", + "start_time": "2020-06-09T19:45:36.861549Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# Initialize values for unit test.\n", + "dummy_0 = r\"3\"\n", + "dummy_0 = jsonpickle.decode(dummy_0)\n", + "dummy_1 = r\"2\"\n", + "dummy_1 = jsonpickle.decode(dummy_1)\n", + "# Call function.\n", + "act = F(dummy_0, dummy_1)\n", + "# Create expected value of function output.\n", + "exp = r\"5\"\n", + "exp = jsonpickle.decode(exp)\n", + "# Check.\n", + "assert act == exp\n", + "\n", + "\n", + "# #############################################################################\n", + "# Playback\n", + "# #############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:31:00.704146Z", + "start_time": "2020-05-29T18:31:00.695276Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "class Playback:\n", + " # def __init__(self, file_name, mode, *args, **kwargs):\n", + " # self.args = args\n", + " # self.kwargs = kwargs\n", + " def __init__(self, file_name, mode, func_name, a, b):\n", + " self.a = a\n", + " self.b = b\n", + "\n", + " def start(self):\n", + " self.a_json = jsonpickle.encode(self.a)\n", + " self.b_json = jsonpickle.encode(self.b)\n", + "\n", + " def end(self, ret):\n", + " self.ret_json = jsonpickle.encode(ret)\n", + " output = []\n", + " output.append(\"# Initialize values for unit test.\")\n", + " output.append(\"a = %s\" % jsonpickle.decode(self.a_json))\n", + " output.append(\"b = %s\" % jsonpickle.decode(self.b_json))\n", + " output.append(\"# Apply values.\")\n", + " output.append(\"act = F(a, b)\")\n", + " output.append(\"exp = %s\" % jsonpickle.decode(self.ret_json))\n", + " # output.append(\"self.assertEqual(act, exp)\")\n", + " # output.append(\"assert act == exp\")\n", + " output = \"\\n\".join(output)\n", + " print(\"output=\", output)\n", + "\n", + "\n", + "# def F(a: int, b: int):\n", + "# c = {}\n", + "# c[\"pavel\"] = a + b\n", + "# return c\n", + "\n", + "\n", + "def F(a: int, b: int):\n", + " playback = Playback(\"\", \"\", \"F\", a, b)\n", + " playback.start()\n", + " c = {}\n", + " c[\"pavel\"] = a + b\n", + " playback.end(c)\n", + " return c\n", + "\n", + "\n", + "res = F(3, 4)\n", + "print(res)\n", + "\n", + "\n", + "# #############################################################################\n", + "# Playback\n", + "# #############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:55:14.350318Z", + "start_time": "2020-05-29T18:55:14.319820Z" + } + }, + "outputs": [], + "source": [ + "class Playback: # noqa: F811\n", + " # def __init__(self, file_name, mode, *args, **kwargs):\n", + " # self.args = args\n", + " # self.kwargs = kwargs\n", + " def __init__(self, file_name, mode, func_name, a, b):\n", + " self.a = a\n", + " self.b = b\n", + "\n", + " def start(self):\n", + " self.a_json = jsonpickle.encode(self.a)\n", + " self.b_json = jsonpickle.encode(self.b)\n", + "\n", + " def end(self, ret):\n", + " self.ret_json = jsonpickle.encode(ret)\n", + " output = []\n", + " output.append(\"# Initialize values for unit test.\")\n", + " # output.append(\"a = %s\" % jsonpickle.decode(self.a_json))\n", + " # output.append(\"b = %s\" % jsonpickle.decode(self.b_json))\n", + " output.append(f\"a = r'{self.a_json}'\")\n", + " output.append(\"a = jsonpickle.decode(a)\")\n", + " output.append(f\"b = r'{self.b_json}'\")\n", + " output.append(\"b = jsonpickle.decode(b)\")\n", + " output.append(\"# Apply values.\")\n", + " # output.append(\"act = F(a, b)[1]\")\n", + " output.append(\"act = F(a, b)\")\n", + " output.append(f\"exp = r'{self.ret_json}'\")\n", + " output.append(\"exp = jsonpickle.decode(exp)\")\n", + " # output.append(\"self.assertEqual(act, exp)\")\n", + " output.append(\"assert act.equals(exp)\")\n", + " # output.append(\"assert act == exp\")\n", + " output = \"\\n\".join(output)\n", + " return output\n", + "\n", + "\n", + "# def F(a: int, b: int):\n", + "# c = {}\n", + "# c[\"pavel\"] = a + b\n", + "# return c\n", + "\n", + "use_playback = True\n", + "\n", + "\n", + "def F(a: pd.DataFrame, b: pd.DataFrame):\n", + " if use_playback:\n", + " playback = Playback(\"\", \"\", \"F\", a, b)\n", + " playback.start()\n", + " # c = {}\n", + " # c[\"pavel\"] = a + b\n", + " c = a + b\n", + " if use_playback:\n", + " output = playback.end(c)\n", + " res = output, c\n", + " else:\n", + " res = c\n", + " return res\n", + "\n", + "\n", + "a = pd.DataFrame({\"Price\": [700, 250, 800, 1200]})\n", + "b = pd.DataFrame({\"Price\": [1, 1, 1, 1]})\n", + "\n", + "res = F(a, b)\n", + "output = res[0]\n", + "print(output)\n", + "exec(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:51:02.968918Z", + "start_time": "2020-05-29T18:51:02.964513Z" + } + }, + "outputs": [], + "source": [ + "# Initialize values for unit test.\n", + "a = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}'\n", + "a = jsonpickle.decode(a)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:44:10.790106Z", + "start_time": "2020-05-29T18:44:10.779459Z" + } + }, + "outputs": [], + "source": [ + "a = pd.DataFrame({\"Price\": [700, 250, 800, 1200]})\n", + "\n", + "# round_trip(a)\n", + "frozen = jsonpickle.encode(a)\n", + "print(frozen)\n", + "print(f\"frozen2 = '{frozen}'\")\n", + "# print(\"frozen = '%s'\" % frozen)\n", + "assert 0\n", + "#\n", + "print(\"frozen=\")\n", + "print(json_pretty_print(frozen)) # noqa: F821\n", + "#\n", + "obj2 = jsonpickle.decode(frozen)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:49:44.390404Z", + "start_time": "2020-05-29T18:49:44.384524Z" + } + }, + "outputs": [], + "source": [ + "frozen2 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}'\n", + "print(frozen2)\n", + "# print(\"\\n\")\n", + "# print(frozen)\n", + "if False and isinstance(frozen2, str):\n", + " # print(frozen2[61])\n", + " # assert 0\n", + " frozen2 = json.loads(frozen2)\n", + " print(frozen2)\n", + "frozen2 = jsonpickle.decode(frozen2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:40:34.682031Z", + "start_time": "2020-05-29T18:40:34.668987Z" + } + }, + "outputs": [], + "source": [ + "a = \"\"\"{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}\"\"\"\n", + "a = jsonpickle.decode(a)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:39:18.728676Z", + "start_time": "2020-05-29T18:39:18.711958Z" + } + }, + "outputs": [], + "source": [ + "# Initialize values for unit test.\n", + "a = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", + "a = jsonpickle.decode(a)\n", + "b = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n1\\n1\\n1\\n1\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", + "b = jsonpickle.decode(b)\n", + "# Apply values.\n", + "act = F(a, b)\n", + "exp = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n701\\n251\\n801\\n1201\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", + "exp = jsonpickle.decode(exp)\n", + "assert act == exp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:27:30.781670Z", + "start_time": "2020-05-29T18:27:30.777539Z" + } + }, + "outputs": [], + "source": [ + "# Initialize values for unit test.\n", + "a = 3\n", + "b = 4\n", + "# Apply values.\n", + "act = F(a, b)\n", + "exp = {\"pavel\": 7}\n", + "assert act == exp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:10:03.802405Z", + "start_time": "2020-05-29T18:10:03.790642Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "df2 = round_trip(df) # noqa: F821\n", + "\n", + "\n", + "# #############################################################################\n", + "# Thing\n", + "# #############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T21:48:14.394447Z", + "start_time": "2020-05-11T21:48:14.384307Z" + } + }, + "outputs": [], + "source": [ + "class Thing:\n", + " def __init__(self, name):\n", + " self.name = name\n", + "\n", + "\n", + "obj = Thing(\"Awesome\")\n", + "\n", + "round_trip(obj) # noqa: F821" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T19:58:38.314059Z", + "start_time": "2020-05-11T19:58:38.309331Z" + } + }, + "outputs": [], + "source": [ + "def test(a: int, b: int):\n", + " print(round_trip(a)) # noqa: F821\n", + "\n", + "\n", + "test(\"strunz\", 6)\n", + "test(4, 6)\n", + "test([\"hello\"], 6)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T14:03:41.315868Z", + "start_time": "2020-05-11T14:03:41.311264Z" + } + }, + "outputs": [], + "source": [ + "df.index.dtype #" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T14:03:00.632566Z", + "start_time": "2020-05-11T14:03:00.623714Z" + } + }, + "outputs": [], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:16:34.748252Z", + "start_time": "2020-05-29T18:16:34.736249Z" + } + }, + "outputs": [], + "source": [ + "# import io\n", + "# import io.StringIO\n", + "# from io import StringIO\n", + "\n", + "# output = StringIO.StringIO()\n", + "\n", + "orient = \"columns\"\n", + "# orient = \"split\"\n", + "# orient = \"records\"\n", + "# orient = \"table\"\n", + "df_as_str = df.to_json(orient=orient)\n", + "\n", + "# split\n", + "# records\n", + "# index\n", + "# values\n", + "# table\n", + "# columns (the default format)\n", + "\n", + "python_code = []\n", + "target_var = \"df_as_str\"\n", + "python_code.append(f\"{target_var} = {df_as_str}\")\n", + "python_code.append(f\"{target_var}.index.name = '{df.index.name}'\")\n", + "python_code = \"\\n\".join(python_code)\n", + "print(python_code)\n", + "\n", + "exec(python_code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T13:42:52.575973Z", + "start_time": "2020-05-11T13:42:52.568178Z" + } + }, + "outputs": [], + "source": [ + "arr = eval(df_as_str)\n", + "df2 = pd.DataFrame.from_dict(arr, orient=\"columns\")\n", + "df2.index.name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:.conda-develop] *", + "language": "python", + "name": "conda-env-.conda-develop-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "165px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py new file mode 100644 index 000000000..22176ce52 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py @@ -0,0 +1,374 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python [conda env:.conda-develop] * +# language: python +# name: conda-env-.conda-develop-py +# --- + +# %% [markdown] +# # Description + +# %% [markdown] +# # Imports + +# %% +# %load_ext autoreload +# %autoreload 2 +# %matplotlib inline + +import json +import logging + +import jsonpickle +import jsonpickle.ext.pandas as jsonpickle_pandas + +jsonpickle_pandas.register_handlers() + +import pandas as pd # noqa: E402 + +import helpers.hdbg as hdbg # noqa: E402 +import helpers.henv as henv # noqa: E402 +import helpers.hplayback as hplayba # noqa: E402 + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + +_LOG.info("%s", henv.get_system_signature()[0]) + +hnotebook.config_notebook() + +# %% +data = { + "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], + "Price": [700, 250, 800, 1200], +} + +df = pd.DataFrame(data, columns=["Product", "Price"]) +df.index.name = "hello" +print(df) + +# %% +# df.to_json(orient="") +df.to_dict(orient="series") + +# %% +hplayba.to_python_code(df) + +# %% +pd.DataFrame.from_dict( + { + "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], + "Price": [700, 250, 800, 1200], + } +) + +# %% +use_playback = True + + +def F(a, b): + if use_playback: + playback = Playback("assert_equal", "F", a, b) + playback.start() + c = a + b + if use_playback: + output = playback.end(c) + res = output + else: + res = c + return res + + +a = df +b = df +print(F(a, b)) + +# %% +hplayba.to_python_code(["3", 3]) + +# %% +hplayba.round_trip_convert(df, logging.INFO) + +# %% +hplayba.round_trip_convert("hello", logging.INFO) + + +# %% +def F(a, b): + return a + b + + +# %% +# Initialize values for unit test. +dummy_0 = r"3" +dummy_0 = jsonpickle.decode(dummy_0) +dummy_1 = r"2" +dummy_1 = jsonpickle.decode(dummy_1) +# Call function. +act = F(dummy_0, dummy_1) +# Create expected value of function output. +exp = r"5" +exp = jsonpickle.decode(exp) +# Check. +assert act == exp + + +# ############################################################################# +# Playback +# ############################################################################# + + +# %% +class Playback: + # def __init__(self, file_name, mode, *args, **kwargs): + # self.args = args + # self.kwargs = kwargs + def __init__(self, file_name, mode, func_name, a, b): + self.a = a + self.b = b + + def start(self): + self.a_json = jsonpickle.encode(self.a) + self.b_json = jsonpickle.encode(self.b) + + def end(self, ret): + self.ret_json = jsonpickle.encode(ret) + output = [] + output.append("# Initialize values for unit test.") + output.append("a = %s" % jsonpickle.decode(self.a_json)) + output.append("b = %s" % jsonpickle.decode(self.b_json)) + output.append("# Apply values.") + output.append("act = F(a, b)") + output.append("exp = %s" % jsonpickle.decode(self.ret_json)) + # output.append("self.assertEqual(act, exp)") + # output.append("assert act == exp") + output = "\n".join(output) + print("output=", output) + + +# def F(a: int, b: int): +# c = {} +# c["pavel"] = a + b +# return c + + +def F(a: int, b: int): + playback = Playback("", "", "F", a, b) + playback.start() + c = {} + c["pavel"] = a + b + playback.end(c) + return c + + +res = F(3, 4) +print(res) + + +# ############################################################################# +# Playback +# ############################################################################# + + +# %% +class Playback: # noqa: F811 + # def __init__(self, file_name, mode, *args, **kwargs): + # self.args = args + # self.kwargs = kwargs + def __init__(self, file_name, mode, func_name, a, b): + self.a = a + self.b = b + + def start(self): + self.a_json = jsonpickle.encode(self.a) + self.b_json = jsonpickle.encode(self.b) + + def end(self, ret): + self.ret_json = jsonpickle.encode(ret) + output = [] + output.append("# Initialize values for unit test.") + # output.append("a = %s" % jsonpickle.decode(self.a_json)) + # output.append("b = %s" % jsonpickle.decode(self.b_json)) + output.append(f"a = r'{self.a_json}'") + output.append("a = jsonpickle.decode(a)") + output.append(f"b = r'{self.b_json}'") + output.append("b = jsonpickle.decode(b)") + output.append("# Apply values.") + # output.append("act = F(a, b)[1]") + output.append("act = F(a, b)") + output.append(f"exp = r'{self.ret_json}'") + output.append("exp = jsonpickle.decode(exp)") + # output.append("self.assertEqual(act, exp)") + output.append("assert act.equals(exp)") + # output.append("assert act == exp") + output = "\n".join(output) + return output + + +# def F(a: int, b: int): +# c = {} +# c["pavel"] = a + b +# return c + +use_playback = True + + +def F(a: pd.DataFrame, b: pd.DataFrame): + if use_playback: + playback = Playback("", "", "F", a, b) + playback.start() + # c = {} + # c["pavel"] = a + b + c = a + b + if use_playback: + output = playback.end(c) + res = output, c + else: + res = c + return res + + +a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) +b = pd.DataFrame({"Price": [1, 1, 1, 1]}) + +res = F(a, b) +output = res[0] +print(output) +exec(output) + +# %% +# Initialize values for unit test. +a = r'{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}' +a = jsonpickle.decode(a) + +# %% +a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) + +# round_trip(a) +frozen = jsonpickle.encode(a) +print(frozen) +print(f"frozen2 = '{frozen}'") +# print("frozen = '%s'" % frozen) +assert 0 +# +print("frozen=") +print(json_pretty_print(frozen)) # noqa: F821 +# +obj2 = jsonpickle.decode(frozen) + +# %% +frozen2 = r'{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}' +print(frozen2) +# print("\n") +# print(frozen) +if False and isinstance(frozen2, str): + # print(frozen2[61]) + # assert 0 + frozen2 = json.loads(frozen2) + print(frozen2) +frozen2 = jsonpickle.decode(frozen2) + +# %% +a = """{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}""" +a = jsonpickle.decode(a) + +# %% +# Initialize values for unit test. +a = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' +a = jsonpickle.decode(a) +b = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n1\n1\n1\n1\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' +b = jsonpickle.decode(b) +# Apply values. +act = F(a, b) +exp = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n701\n251\n801\n1201\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' +exp = jsonpickle.decode(exp) +assert act == exp + +# %% +# Initialize values for unit test. +a = 3 +b = 4 +# Apply values. +act = F(a, b) +exp = {"pavel": 7} +assert act == exp + +# %% +df2 = round_trip(df) # noqa: F821 + + +# ############################################################################# +# Thing +# ############################################################################# + + +# %% +class Thing: + def __init__(self, name): + self.name = name + + +obj = Thing("Awesome") + +round_trip(obj) # noqa: F821 + + +# %% +def test(a: int, b: int): + print(round_trip(a)) # noqa: F821 + + +test("strunz", 6) +test(4, 6) +test(["hello"], 6) + +# %% +df.index.dtype # + +# %% +df.dtypes + +# %% +# import io +# import io.StringIO +# from io import StringIO + +# output = StringIO.StringIO() + +orient = "columns" +# orient = "split" +# orient = "records" +# orient = "table" +df_as_str = df.to_json(orient=orient) + +# split +# records +# index +# values +# table +# columns (the default format) + +python_code = [] +target_var = "df_as_str" +python_code.append(f"{target_var} = {df_as_str}") +python_code.append(f"{target_var}.index.name = '{df.index.name}'") +python_code = "\n".join(python_code) +print(python_code) + +exec(python_code) + +# %% +arr = eval(df_as_str) +df2 = pd.DataFrame.from_dict(arr, orient="columns") +df2.index.name + +# %% diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb new file mode 100644 index 000000000..4516033f2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb @@ -0,0 +1,1774 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "895cb286", + "metadata": {}, + "source": [ + "Show Parquet / Pyarrow API." + ] + }, + { + "cell_type": "markdown", + "id": "b068d525", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "id": "8f46ec68", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:53:22.684558Z", + "start_time": "2021-06-16T20:53:22.645267Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0m\u001b[33mWARNING\u001b[0m: Logger already initialized: skipping\n" + ] + } + ], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "import pandas as pd\n", + "import pyarrow as pa\n", + "import pyarrow.dataset as ds\n", + "import pyarrow.parquet as pq\n", + "from pyarrow.dataset import DirectoryPartitioning\n", + "\n", + "import helpers.hdbg as hdbg\n", + "import helpers.hio as hio\n", + "\n", + "hdbg.init_logger(verbosity=logging.INFO)\n", + "_LOG = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "215ff89e", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:19:03.323062Z", + "start_time": "2021-06-15T11:19:03.303632Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " idx instr val1 val2\n", + "2000-01-01 0 A 99 30\n", + "2000-01-02 0 A 54 46\n", + "2000-01-03 0 A 85 86\n", + "2000-01-04 0 A 97 62\n", + "2000-01-05 0 A 12 25\n" + ] + } + ], + "source": [ + "def get_df() -> pd.DataFrame:\n", + " \"\"\"\n", + " Create pandas random data, like:\n", + "\n", + " ```\n", + " idx instr val1 val2\n", + " 2000-01-01 0 A 99 30\n", + " 2000-01-02 0 A 54 46\n", + " 2000-01-03 0 A 85 86\n", + " ```\n", + " \"\"\"\n", + " instruments = \"A B C D E\".split()\n", + " \"id stock val1 val2\".split()\n", + " df_idx = pd.date_range(\n", + " pd.Timestamp(\"2000-01-01\"), pd.Timestamp(\"2000-01-15\"), freq=\"1D\"\n", + " )\n", + " # print(df_idx)\n", + " random.seed(1000)\n", + "\n", + " df = []\n", + " for idx, inst in enumerate(instruments):\n", + " df_tmp = pd.DataFrame(\n", + " {\n", + " \"idx\": idx,\n", + " \"instr\": inst,\n", + " \"val1\": [random.randint(0, 100) for k in range(len(df_idx))],\n", + " \"val2\": [random.randint(0, 100) for k in range(len(df_idx))],\n", + " },\n", + " index=df_idx,\n", + " )\n", + " # print(df_tmp)\n", + " df.append(df_tmp)\n", + " df = pd.concat(df)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "8e8235d0", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:35:16.903580Z", + "start_time": "2021-06-15T11:35:16.895316Z" + } + }, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "EOL while scanning string literal (, line 4)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m4\u001b[0m\n\u001b[0;31m txt += \"# df=\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m EOL while scanning string literal\n" + ] + } + ], + "source": [ + "def df_to_str(df: pd.DataFrame) -> str:\n", + " txt = \"\"\n", + " txt += \"# df=\\n%s\" % df.head(3)\n", + " txt += \"\\n# df.shape=\\n%s\" % str(df.shape)\n", + " txt += \"\\n# df.dtypes=\\n%s\" % str(df.dtypes)\n", + " return txt" + ] + }, + { + "cell_type": "markdown", + "id": "17cc474b", + "metadata": {}, + "source": [ + "# Save and load all data in one file" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "cb399156", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:25:27.514505Z", + "start_time": "2021-06-15T11:25:27.496811Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " idx instr val1 val2\n", + "2000-01-01 0 A 99 30\n", + "2000-01-02 0 A 54 46\n", + "2000-01-03 0 A 85 86\n", + "# df.shape=\n", + "(75, 4)\n", + "# df.dtypes=\n", + "idx int64\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "df = get_df()\n", + "# print(df.head())\n", + "print(df_to_str(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "940dc7d2", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:25:34.893472Z", + "start_time": "2021-06-15T11:25:34.886977Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table=\n", + "pyarrow.Table\n", + "idx: int64\n", + "instr: string\n", + "val1: int64\n", + "val2: int64\n", + "__index_level_0__: timestamp[ns]\n" + ] + } + ], + "source": [ + "table = pa.Table.from_pandas(df)\n", + "\n", + "print(\"table=\\n%s\" % table)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "93df67fc", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:25:38.560269Z", + "start_time": "2021-06-15T11:25:38.533905Z" + } + }, + "outputs": [], + "source": [ + "# Save.\n", + "file_name = \"df_in_one_file.pq\"\n", + "pq.write_table(table, file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "155e36c0", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:25:51.016044Z", + "start_time": "2021-06-15T11:25:51.001034Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pyarrow.Table\n", + "idx: int64\n", + "instr: string\n", + "val1: int64\n", + "val2: int64\n", + "__index_level_0__: timestamp[us]\n", + "# df=\n", + " idx instr val1 val2\n", + "2000-01-01 0 A 99 30\n", + "2000-01-02 0 A 54 46\n", + "2000-01-03 0 A 85 86\n", + "# df.shape=\n", + "(75, 4)\n", + "# df.dtypes=\n", + "idx int64\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Load.\n", + "df2 = pq.read_table(file_name)\n", + "print(df2)\n", + "\n", + "df2 = df2.to_pandas()\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "markdown", + "id": "1098757c", + "metadata": {}, + "source": [ + "## Read a subset of columns" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "6f4a652f", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:27:11.924350Z", + "start_time": "2021-06-15T11:27:11.910680Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pyarrow.Table\n", + "idx: int64\n", + "val1: int64\n", + "# df=\n", + " idx val1\n", + "0 0 99\n", + "1 0 54\n", + "2 0 85\n", + "# df.shape=\n", + "(75, 2)\n", + "# df.dtypes=\n", + "idx int64\n", + "val1 int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "df2 = pq.read_table(file_name, columns=[\"idx\", \"val1\"])\n", + "print(df2)\n", + "\n", + "df2 = df2.to_pandas()\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "markdown", + "id": "012cebdb", + "metadata": {}, + "source": [ + "## Partitioned dataset\n", + "\n", + "from https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data\n", + "\n", + "- A dataset can exploit a nested structure, where the sub-dir names hold information about which subset of the data is stored in that dir\n", + "- E.g., \"Hive\" patitioning scheme \"key=vale\" dir names" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ca26642e", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:30:11.964993Z", + "start_time": "2021-06-15T11:30:11.947282Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " idx instr val1 val2\n", + "2000-01-01 0 A 99 30\n", + "2000-01-02 0 A 54 46\n", + "2000-01-03 0 A 85 86\n", + "# df.shape=\n", + "(75, 4)\n", + "# df.dtypes=\n", + "idx int64\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "df = get_df()\n", + "print(df_to_str(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "7cae349f", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:30:27.617064Z", + "start_time": "2021-06-15T11:30:27.541418Z" + } + }, + "outputs": [], + "source": [ + "base = \".\"\n", + "dir_name = os.path.join(base, \"parquet_dataset_partitioned\")\n", + "os.system(\"rm -rf %s\" % dir_name)\n", + "\n", + "pq.write_to_dataset(table, dir_name, partition_cols=[\"idx\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "fd57116d", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:30:30.672054Z", + "start_time": "2021-06-15T11:30:30.389512Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" + ] + } + ], + "source": [ + "!ls parquet_dataset_partitioned" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "ac82b5ad", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:31:29.322947Z", + "start_time": "2021-06-15T11:31:29.298883Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./parquet_dataset_partitioned/idx=0/cab9de6eff0c47bcb688a1ce437c7f89.parquet\n", + "./parquet_dataset_partitioned/idx=1/56813e569097420cae892720d3bb0789.parquet\n", + "./parquet_dataset_partitioned/idx=2/5c9a17d2e1294dd58c7d8695868c2cb5.parquet\n", + "./parquet_dataset_partitioned/idx=3/b28576eb22d54999980a313a24511497.parquet\n", + "./parquet_dataset_partitioned/idx=4/8ee3f0d7585b48959a560c954562add8.parquet\n" + ] + } + ], + "source": [ + "# Read data back.\n", + "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "print(\"\\n\".join(dataset.files))" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "64394b7f", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:32:01.839074Z", + "start_time": "2021-06-15T11:32:01.822727Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " instr val1 val2 idx\n", + "2000-01-01 A 99 30 0\n", + "2000-01-02 A 54 46 0\n", + "2000-01-03 A 85 86 0\n", + "# df.shape=\n", + "(75, 4)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Read everything.\n", + "df2 = dataset.to_table().to_pandas()\n", + "\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "df96e1db", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:33:05.171630Z", + "start_time": "2021-06-15T11:33:05.147040Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " instr val1 val2 idx\n", + "2000-01-01 B 18 22 1\n", + "2000-01-02 B 59 89 1\n", + "2000-01-03 B 91 90 1\n", + "# df.shape=\n", + "(15, 4)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "dtype: object\n", + "# df=\n", + " instr val1 val2 idx\n", + "2000-01-01 A 99 30 0\n", + "2000-01-02 A 54 46 0\n", + "2000-01-03 A 85 86 0\n", + "# df.shape=\n", + "(45, 4)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Load part of the data.\n", + "\n", + "df2 = dataset.to_table(filter=ds.field(\"idx\") == 1).to_pandas()\n", + "print(df_to_str(df2))\n", + "\n", + "df2 = dataset.to_table(filter=ds.field(\"idx\") < 3).to_pandas()\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "markdown", + "id": "b3c27848", + "metadata": {}, + "source": [ + "## Add year-month partitions" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "69d2ea15", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:36:11.106142Z", + "start_time": "2021-06-15T11:36:11.087701Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " idx instr val1 val2 year month\n", + "2000-01-01 0 A 99 30 2000 1\n", + "2000-01-02 0 A 54 46 2000 1\n", + "2000-01-03 0 A 85 86 2000 1\n", + "# df.shape=\n", + "(75, 6)\n", + "# df.dtypes=\n", + "idx int64\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "year int64\n", + "month int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "df = get_df()\n", + "df[\"year\"] = df.index.year\n", + "df[\"month\"] = df.index.month\n", + "\n", + "print(df_to_str(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "1a2f8c3a", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:01.738085Z", + "start_time": "2021-06-15T11:37:01.730748Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table=\n", + "pyarrow.Table\n", + "idx: int64\n", + "instr: string\n", + "val1: int64\n", + "val2: int64\n", + "year: int64\n", + "month: int64\n", + "__index_level_0__: timestamp[ns]\n" + ] + } + ], + "source": [ + "table = pa.Table.from_pandas(df)\n", + "\n", + "print(\"table=\\n%s\" % table)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "9112ed65", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:04.832037Z", + "start_time": "2021-06-15T11:37:04.702121Z" + } + }, + "outputs": [], + "source": [ + "base = \".\"\n", + "dir_name = os.path.join(base, \"pq_partitioned2\")\n", + "os.system(\"rm -rf %s\" % dir_name)\n", + "\n", + "pq.write_to_dataset(table, dir_name, partition_cols=[\"idx\", \"year\", \"month\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "844913cc", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:17.553902Z", + "start_time": "2021-06-15T11:37:17.276875Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" + ] + } + ], + "source": [ + "!ls $dir_name" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "e5ba8be3", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:07.695235Z", + "start_time": "2021-06-15T11:37:07.433612Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bc6b2314c7f640a38c62029280f6f65e.parquet\r\n" + ] + } + ], + "source": [ + "!ls $dir_name/idx=0/year=2000/month=1" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "2d93f116", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:26.153218Z", + "start_time": "2021-06-15T11:37:26.109040Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./pq_partitioned2/idx=0/year=2000/month=1/bc6b2314c7f640a38c62029280f6f65e.parquet\n", + "./pq_partitioned2/idx=1/year=2000/month=1/bb178ff0bdd344ca8328f9d67398b322.parquet\n", + "./pq_partitioned2/idx=2/year=2000/month=1/16081eea25fd4da6bd802037b541766c.parquet\n", + "./pq_partitioned2/idx=3/year=2000/month=1/1557b3c461054eadba16e3072fbd3a8a.parquet\n", + "./pq_partitioned2/idx=4/year=2000/month=1/07a0c7fcf054450296b35452b57236ef.parquet\n" + ] + } + ], + "source": [ + "# Read data back.\n", + "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "print(\"\\n\".join(dataset.files))" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "21148afd", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:39:19.396955Z", + "start_time": "2021-06-15T11:39:19.374534Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " instr val1 val2 idx year month\n", + "2000-01-01 C 99 37 2 2000 1\n", + "2000-01-02 C 98 48 2 2000 1\n", + "2000-01-03 C 70 58 2 2000 1\n", + "# df.shape=\n", + "(15, 6)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "year int32\n", + "month int32\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Read data back.\n", + "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "df2 = dataset.to_table(filter=ds.field(\"idx\") == 2).to_pandas()\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "d9e4e596", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:54:34.478646Z", + "start_time": "2021-06-16T20:54:34.250254Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "idx: int64\n", + "instr: string\n", + "val1: int64\n", + "val2: int64\n", + "year: int64\n", + "month: int64\n", + "__index_level_0__: timestamp[ns]\n", + "-- schema metadata --\n", + "pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975\n" + ] + } + ], + "source": [ + "# We could scan manually and create the dirs manually if we don't want to add\n", + "# add a new dir.\n", + "base = \".\"\n", + "dir_name = os.path.join(base, \"parquet_dataset_partitioned2\")\n", + "os.system(\"rm -rf %s\" % dir_name)\n", + "\n", + "schemas = []\n", + "\n", + "schema = pa.Table.from_pandas(df).schema\n", + "print(schema)\n", + "# assert 0\n", + "# idx: int64\n", + "# instr: string\n", + "# val1: int64\n", + "# val2: int64\n", + "# year: int64\n", + "# month: int64\n", + "\n", + "# grouped = df.groupby(lambda x: x.day)\n", + "group_by_idx = df.groupby(\"idx\")\n", + "for idx, df_tmp in group_by_idx:\n", + " _LOG.debug(\"idx=%s -> df.shape=%s\", idx, str(df_tmp.shape))\n", + " #\n", + " group_by_year = df_tmp.groupby(lambda x: x.year)\n", + " for year, df_tmp2 in group_by_year:\n", + " _LOG.debug(\"year=%s -> df.shape=%s\", year, str(df_tmp2.shape))\n", + " #\n", + " group_by_month = df_tmp2.groupby(lambda x: x.month)\n", + " for month, df_tmp3 in group_by_month:\n", + " _LOG.debug(\"month=%s -> df.shape=%s\", month, str(df_tmp3.shape))\n", + " # file_name = \"df_in_one_file.pq\"\n", + " # pq.write_table(table, file_name)\n", + " # /app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet\n", + " subdir_name = os.path.join(\n", + " dir_name, f\"idx={idx}\", f\"year={year}\", f\"month={month}\"\n", + " )\n", + " table = pa.Table.from_pandas(df_tmp3, schema=schema)\n", + " schemas.append(table.schema)\n", + " # print(df_tmp3)\n", + " # print(table.schema)\n", + " # pq.write_to_dataset(table,\n", + " # subdir_name, schema=schema)\n", + " file_name = os.path.join(subdir_name, \"df_out.pq\")\n", + " hio.create_enclosing_dir(file_name)\n", + " pq.write_table(table, file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "8309de4a", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:41:14.320037Z", + "start_time": "2021-06-16T20:41:14.314354Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "schemas[0] == schemas[4]" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "f0e49f46", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:42:26.864001Z", + "start_time": "2021-06-16T20:42:26.856395Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", + " idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", + " idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", + " idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", + " idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975]" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "schemas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1130cbc2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "e5bdcdd8", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:53:50.373825Z", + "start_time": "2021-06-16T20:53:50.099251Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df_out.pq\r\n" + ] + } + ], + "source": [ + "!ls $dir_name/idx=0/year=2000/month=1" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "aaf67ae6", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:55:02.764098Z", + "start_time": "2021-06-16T20:55:02.717192Z" + } + }, + "outputs": [ + { + "ename": "ArrowInvalid", + "evalue": "Unable to merge: Field month has incompatible types: int64 vs int32", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mArrowInvalid\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#src_dir = f\"{dir_name}/idx=0/year=2000/month=1\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0msrc_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"{dir_name}/idx=0/year=2000\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m dataset = ds.dataset(src_dir,\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"parquet\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m partitioning=\"hive\")\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/dataset.py\u001b[0m in \u001b[0;36mdataset\u001b[0;34m(source, schema, format, filesystem, partitioning, partition_base_dir, exclude_invalid_files, ignore_prefixes)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 656\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_filesystem_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 657\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 658\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_is_path_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msource\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/dataset.py\u001b[0m in \u001b[0;36m_filesystem_dataset\u001b[0;34m(source, schema, filesystem, partitioning, format, partition_base_dir, exclude_invalid_files, selector_ignore_prefixes)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mfactory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mFileSystemDatasetFactory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpaths_or_selector\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfactory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfinish\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/_dataset.pyx\u001b[0m in \u001b[0;36mpyarrow._dataset.DatasetFactory.finish\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.pyarrow_internal_check_status\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.check_status\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mArrowInvalid\u001b[0m: Unable to merge: Field month has incompatible types: int64 vs int32" + ] + } + ], + "source": [ + "# Read data back.\n", + "# https://github.com/dask/dask/issues/4194\n", + "# src_dir = f\"{dir_name}/idx=0/year=2000/month=1\"\n", + "src_dir = f\"{dir_name}/idx=0/year=2000\"\n", + "dataset = ds.dataset(src_dir, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "df2 = dataset.to_table().to_pandas()\n", + "# print(df_to_str(df2))\n", + "print(\"\\n\".join(dataset.files))" + ] + }, + { + "cell_type": "markdown", + "id": "98f4111d", + "metadata": {}, + "source": [ + "## Partition manually" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "id": "f0b33d85", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T00:57:11.260871Z", + "start_time": "2021-06-15T00:57:11.235982Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(((year == 2009) and (month == 11)) and (day == 3))\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Neither field_names nor schema was passed; cannot infer field_names", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpartitioning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/2009/11/3\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mpartitioning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdiscover\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/_dataset.pyx\u001b[0m in \u001b[0;36mpyarrow._dataset.DirectoryPartitioning.discover\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Neither field_names nor schema was passed; cannot infer field_names" + ] + } + ], + "source": [ + "partitioning = DirectoryPartitioning(\n", + " pa.schema([(\"year\", pa.int16()), (\"month\", pa.int8()), (\"day\", pa.int8())])\n", + ")\n", + "print(partitioning.parse(\"/2009/11/3\"))\n", + "\n", + "# partitioning.discover()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "ad70cbee", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:03:31.809969Z", + "start_time": "2021-06-16T11:03:31.526597Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" + ] + } + ], + "source": [ + "!ls /app/data" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "b19d1189", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:18:31.838549Z", + "start_time": "2021-06-16T11:18:31.821223Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet\n", + "/app/data/idx=0/year=2000/month=1/0435eeb615b14155bdc26266b91a4b1b.parquet\n", + "/app/data/idx=0/year=2000/month=1/05cc8a039ec746acb5668fde0a372028.parquet\n", + "/app/data/idx=0/year=2000/month=1/06039c8c6e9e4d54be7dcbef2bcdfa78.parquet\n", + "/app/data/idx=0/year=2000/month=1/0bb8e349594445a08fca4e337a7922d4.parquet\n", + "/app/data/idx=0/year=2000/month=1/115df7cedef540469cec56ee40ac19bd.parquet\n", + "/app/data/idx=0/year=2000/month=1/1174a70ffe614f4a9875b680e255902b.parquet\n", + "/app/data/idx=0/year=2000/month=1/122bdd75109c4fc7918d6db00f7bed41.parquet\n", + "/app/data/idx=0/year=2000/month=1/1543e41a14234c279fdfaa8656e8a71d.parquet\n", + "/app/data/idx=0/year=2000/month=1/17bd91618d5240fe83309608e91cb1ef.parquet\n", + "/app/data/idx=0/year=2000/month=1/18461c0ee57845768a503cfc865e323b.parquet\n", + "/app/data/idx=0/year=2000/month=1/1994694468184272a388fb8b40f03d5b.parquet\n", + "/app/data/idx=0/year=2000/month=1/1eb5f1adbe57418fa5d866d35902c39a.parquet\n", + "/app/data/idx=0/year=2000/month=1/2bd3c3ae435b489bb194ef7b2a715d9a.parquet\n", + "/app/data/idx=0/year=2000/month=1/2ded0d292def4e1186653d90852295f6.parquet\n", + "/app/data/idx=0/year=2000/month=1/2ff4a2fa54664e67bab85a76324738ed.parquet\n", + "/app/data/idx=0/year=2000/month=1/378e55b8faf24033abf1c275741a88e8.parquet\n", + "/app/data/idx=0/year=2000/month=1/37a96e8834af4a87bc64ec3d1199ad54.parquet\n", + "/app/data/idx=0/year=2000/month=1/3a24331d6e51402d9a86c974f8a3bd05.parquet\n", + "/app/data/idx=0/year=2000/month=1/3ae93c6a21cc4a88bbaf90219f275563.parquet\n", + "/app/data/idx=0/year=2000/month=1/3b5f35d2add64a738cec5061659e35a2.parquet\n", + "/app/data/idx=0/year=2000/month=1/3d16749690f840c49facce0e37461a7e.parquet\n", + "/app/data/idx=0/year=2000/month=1/44bf70580b9a43829addb2a9e8f89dc6.parquet\n", + "/app/data/idx=0/year=2000/month=1/46237f338cee47c69f33b15fcb83817a.parquet\n", + "/app/data/idx=0/year=2000/month=1/46f19733b2d642c29adb58bf9499b6ca.parquet\n", + "/app/data/idx=0/year=2000/month=1/485f7d3d06b3486ca4bc8b35420f997a.parquet\n", + "/app/data/idx=0/year=2000/month=1/4cd8551c6c8f4daab7313732b9c1cea8.parquet\n", + "/app/data/idx=0/year=2000/month=1/4eca6951932d47d8a5678422da4a3d70.parquet\n", + "/app/data/idx=0/year=2000/month=1/512a55d6f91c412f951ba66728bfe118.parquet\n", + "/app/data/idx=0/year=2000/month=1/521193be72e1465ca33034cfd8e93ac9.parquet\n", + "/app/data/idx=0/year=2000/month=1/59aa37cc4792493899e812215b3bb822.parquet\n", + "/app/data/idx=0/year=2000/month=1/5aaa32a61b614d65a91747336c8028f0.parquet\n", + "/app/data/idx=0/year=2000/month=1/5cf9306e97ae42fdae53369710a5d0b6.parquet\n", + "/app/data/idx=0/year=2000/month=1/5fb97e575a9c4ff282293e9810040594.parquet\n", + "/app/data/idx=0/year=2000/month=1/613e6048f8434fccafed8c9d457fddc1.parquet\n", + "/app/data/idx=0/year=2000/month=1/61d608f23a69494eaef248d79a776ede.parquet\n", + "/app/data/idx=0/year=2000/month=1/62bf226aebb641229b33f7e3bf9f5cb1.parquet\n", + "/app/data/idx=0/year=2000/month=1/62c09d56d67d4c738568fed318152ca9.parquet\n", + "/app/data/idx=0/year=2000/month=1/652129318d7a4d5b83e256a94803ecdc.parquet\n", + "/app/data/idx=0/year=2000/month=1/67607b3708e44233927974a861972a99.parquet\n", + "/app/data/idx=0/year=2000/month=1/69915fb955c24302a905e4520a76f547.parquet\n", + "/app/data/idx=0/year=2000/month=1/6cc812431ef44bd195e5baf9715095a6.parquet\n", + "/app/data/idx=0/year=2000/month=1/6ce2493e58b34b53ae42da84ee0ef165.parquet\n", + "/app/data/idx=0/year=2000/month=1/6e88cf1910bc4a71bcea865ed1605363.parquet\n", + "/app/data/idx=0/year=2000/month=1/70c1ba22a85f4b489096f80eacd5855c.parquet\n", + "/app/data/idx=0/year=2000/month=1/7705f37eac7e40ceb2fba4c9fd2cb81d.parquet\n", + "/app/data/idx=0/year=2000/month=1/7bdb4feb6a874697b8c2f9a6cb03a6e6.parquet\n", + "/app/data/idx=0/year=2000/month=1/7fd82496e8274e999d217df302fd46b0.parquet\n", + "/app/data/idx=0/year=2000/month=1/8130570ceae44ca69ce7b2cd9865c3ec.parquet\n", + "/app/data/idx=0/year=2000/month=1/83f8e04fd5ac49ec80ac7b98e8221278.parquet\n", + "/app/data/idx=0/year=2000/month=1/8469e01698bf47f28fda41a3935eeb64.parquet\n", + "/app/data/idx=0/year=2000/month=1/874aa31290804dd0abca1a8f40dc4875.parquet\n", + "/app/data/idx=0/year=2000/month=1/888ef49654f241df8cae8454a5cd3f07.parquet\n", + "/app/data/idx=0/year=2000/month=1/8aa4e41d00fc438c9de0906ecc66bbb9.parquet\n", + "/app/data/idx=0/year=2000/month=1/90e662712235472ebae79fd64eaae094.parquet\n", + "/app/data/idx=0/year=2000/month=1/91e7dcbfc57a495a943bad2400690bc1.parquet\n", + "/app/data/idx=0/year=2000/month=1/9394c04aef64432fb94219d0e8b50286.parquet\n", + "/app/data/idx=0/year=2000/month=1/9740961302bc40b192d20715c52d6ef6.parquet\n", + "/app/data/idx=0/year=2000/month=1/99e92f5585514ed4bd43b5bf50bdaaa8.parquet\n", + "/app/data/idx=0/year=2000/month=1/9bf5c3598f69411fb1acdc30779b25bd.parquet\n", + "/app/data/idx=0/year=2000/month=1/9d81c342203c4396ac2d9efcbb0cae7a.parquet\n", + "/app/data/idx=0/year=2000/month=1/9d8b2486e80f40468cf4ae50a41fda41.parquet\n", + "/app/data/idx=0/year=2000/month=1/a170565f336f4b3b99994c8d83012a4d.parquet\n", + "/app/data/idx=0/year=2000/month=1/a50138dae90f478781bf032908703ef4.parquet\n", + "/app/data/idx=0/year=2000/month=1/a5ab58aa310e47669e9d3604bf94f155.parquet\n", + "/app/data/idx=0/year=2000/month=1/a5bd118e999e4df6ab3306e52671228e.parquet\n", + "/app/data/idx=0/year=2000/month=1/a5c0a7da693147b98f68811b4af7c79e.parquet\n", + "/app/data/idx=0/year=2000/month=1/a84afce396eb4afa91de3b08129e2ab7.parquet\n", + "/app/data/idx=0/year=2000/month=1/a8c1f364a7c944bb89d59d354059e596.parquet\n", + "/app/data/idx=0/year=2000/month=1/aa3bb180eda948c4aab93428ece443a8.parquet\n", + "/app/data/idx=0/year=2000/month=1/aa868fa8e11a4a838c19a1a260dcf6f6.parquet\n", + "/app/data/idx=0/year=2000/month=1/b01aa53c572d492f9667f157455742fc.parquet\n", + "/app/data/idx=0/year=2000/month=1/b6a7fc9dd14a4af6a3635cd138abdfe2.parquet\n", + "/app/data/idx=0/year=2000/month=1/b740e474de9f4b5497877c14f688faed.parquet\n", + "/app/data/idx=0/year=2000/month=1/b81d3d9c4045498c9deb3968b935e422.parquet\n", + "/app/data/idx=0/year=2000/month=1/b8c5a9f58500424785e4c83520931127.parquet\n", + "/app/data/idx=0/year=2000/month=1/b9176233e3934efebb0b12e1a780a3b1.parquet\n", + "/app/data/idx=0/year=2000/month=1/ba3d62351b7745f5a4e18f27159d5820.parquet\n", + "/app/data/idx=0/year=2000/month=1/bb9f583ed63840b39ada7bb0f45b9d57.parquet\n", + "/app/data/idx=0/year=2000/month=1/c55358bb09194e7aad9828678b5eaa61.parquet\n", + "/app/data/idx=0/year=2000/month=1/c5e31c9f04a6491dbf068fa889095e27.parquet\n", + "/app/data/idx=0/year=2000/month=1/c70308ef1a954ccea429f0de60c41fb3.parquet\n", + "/app/data/idx=0/year=2000/month=1/cf1e928b55ba4dd09bfa2765dadffb76.parquet\n", + "/app/data/idx=0/year=2000/month=1/d08715970c714455b7b9fbf18a86e8c0.parquet\n", + "/app/data/idx=0/year=2000/month=1/d27b68dc839f47e2a25814d805b9d759.parquet\n", + "/app/data/idx=0/year=2000/month=1/d46043c1511647a5b3b96450580ce6e1.parquet\n", + "/app/data/idx=0/year=2000/month=1/d592794fbc7f4ed0877d5a350fabf8d4.parquet\n", + "/app/data/idx=0/year=2000/month=1/d8b05ee145d046a1ac321708b68e91de.parquet\n", + "/app/data/idx=0/year=2000/month=1/d9163626e55f40bb88142c43eb4b9fab.parquet\n", + "/app/data/idx=0/year=2000/month=1/dcb0cd8bc9084246955a6090f643a43d.parquet\n", + "/app/data/idx=0/year=2000/month=1/dd0db6d0e040442bb0b950efa6ac6e6a.parquet\n", + "/app/data/idx=0/year=2000/month=1/dd306d9fd65a459fbbf1e32fc9260ae3.parquet\n", + "/app/data/idx=0/year=2000/month=1/e05e535b8969470680658f6c2924bb68.parquet\n", + "/app/data/idx=0/year=2000/month=1/e3aff8e0f7094609b4de8bacac5faa4c.parquet\n", + "/app/data/idx=0/year=2000/month=1/e439d12c5539461da2b12a54d7dbb1c3.parquet\n", + "/app/data/idx=0/year=2000/month=1/e51258868c044644a708c74ff4c2ca46.parquet\n", + "/app/data/idx=0/year=2000/month=1/ea632843bd34467496837fea693443ff.parquet\n", + "/app/data/idx=0/year=2000/month=1/ecf1306aadb04ecdabb50803116eb0fa.parquet\n", + "/app/data/idx=0/year=2000/month=1/ef2355b80a7346afbabd33743d7e69a2.parquet\n", + "/app/data/idx=0/year=2000/month=1/ef7d760f2a2245e08f8c038bdf554edd.parquet\n", + "/app/data/idx=0/year=2000/month=1/f4ca5d31138248eca2beb467548461ed.parquet\n", + "/app/data/idx=0/year=2000/month=1/fba715c8fda84ad88d370f71b2408c12.parquet\n", + "/app/data/idx=0/year=2000/month=1/fe435999dba9476baec1b3009d529d32.parquet\n", + "/app/data/idx=0/year=2000/month=1/fe53414bfef84cb39ca04b48c8e8332c.parquet\n", + "/app/data/idx=0/year=2000/month=1/ff75b3e1006f42c9ba9deb689324ee3e.parquet\n", + "/app/data/idx=1/year=2000/month=1/056b4d30021044298d7fde4cdd296561.parquet\n", + "/app/data/idx=1/year=2000/month=1/0c138f0939f347928f5c2d1c92207d57.parquet\n", + "/app/data/idx=1/year=2000/month=1/0cb27647424c4302b7a1cd47369b4e6d.parquet\n", + "/app/data/idx=1/year=2000/month=1/1064ed9fc62a450890a19bd906d7953a.parquet\n", + "/app/data/idx=1/year=2000/month=1/14f3b6e2235c4a2eabf23840c82059ec.parquet\n", + "/app/data/idx=1/year=2000/month=1/1541e4cf70a048b88c7f8296456b8437.parquet\n", + "/app/data/idx=1/year=2000/month=1/19e5b00a91f64342be20a2faee8ef69c.parquet\n", + "/app/data/idx=1/year=2000/month=1/2512f9bc30c04375bd71f270e1901050.parquet\n", + "/app/data/idx=1/year=2000/month=1/2641066820c74d5fadd5d1a42b40d23f.parquet\n", + "/app/data/idx=1/year=2000/month=1/2b1c634e1ded48a2887abbb539f1ea41.parquet\n", + "/app/data/idx=1/year=2000/month=1/2bc577092b964473943428b8c04f6414.parquet\n", + "/app/data/idx=1/year=2000/month=1/2bc84c76804345c581c00b8e0ad59752.parquet\n", + "/app/data/idx=1/year=2000/month=1/2bd2238465b1416a8870494b579fae42.parquet\n", + "/app/data/idx=1/year=2000/month=1/2d5c13231ffc48aeb76bdb071663ceff.parquet\n", + "/app/data/idx=1/year=2000/month=1/2e48508ad08c4154813996117b6a833a.parquet\n", + "/app/data/idx=1/year=2000/month=1/3ca7d082ede544aab9f1f564acbffc14.parquet\n", + "/app/data/idx=1/year=2000/month=1/3d1f61cf39764307bbf39762d9c38af7.parquet\n", + "/app/data/idx=1/year=2000/month=1/40a2f2b0bd8c49be95aafc319ffd4a69.parquet\n", + "/app/data/idx=1/year=2000/month=1/4201c94937bc44f3809d9bf883b49cd7.parquet\n", + "/app/data/idx=1/year=2000/month=1/422474d1c6934fd298944ef7c9f21bfe.parquet\n", + "/app/data/idx=1/year=2000/month=1/444a6621429443c8b6550c6c04b27a24.parquet\n", + "/app/data/idx=1/year=2000/month=1/4940c21244274606bd6b543df4738ccf.parquet\n", + "/app/data/idx=1/year=2000/month=1/4b87781720884af7ae79d3f59fd69cd3.parquet\n", + "/app/data/idx=1/year=2000/month=1/4dd866c257864005a62854991f666b25.parquet\n", + "/app/data/idx=1/year=2000/month=1/4f06000c93bb45f18edfa84eeb89a1b9.parquet\n", + "/app/data/idx=1/year=2000/month=1/50716e5b2e004ba38d414a101ae09427.parquet\n", + "/app/data/idx=1/year=2000/month=1/50fc4338cf41483091d11a2616eb6221.parquet\n", + "/app/data/idx=1/year=2000/month=1/563109ba1ed647ef9518393a9d1ddb2e.parquet\n", + "/app/data/idx=1/year=2000/month=1/586e3969f1084af2bf28cee6f721cdc6.parquet\n", + "/app/data/idx=1/year=2000/month=1/5a1ba9682db3414ea33666e64d055535.parquet\n", + "/app/data/idx=1/year=2000/month=1/5e2241ecbf364a0784626be86e38d6eb.parquet\n", + "/app/data/idx=1/year=2000/month=1/6300ef1b3beb44f0937dc8f890e845ce.parquet\n", + "/app/data/idx=1/year=2000/month=1/64aeabc396ba42ada56c695a32ed12a7.parquet\n", + "/app/data/idx=1/year=2000/month=1/664ca39e99134dabbe6d4c7402f626aa.parquet\n", + "/app/data/idx=1/year=2000/month=1/68efc5543f394005bb82c0dc63a3b01f.parquet\n", + "/app/data/idx=1/year=2000/month=1/6c51260b47964705a3dcfa1cf25ca106.parquet\n", + "/app/data/idx=1/year=2000/month=1/6f9ad552153244679f73a058dfc5b42e.parquet\n", + "/app/data/idx=1/year=2000/month=1/718ffd8c75a14cde953e8e3275341d31.parquet\n", + "/app/data/idx=1/year=2000/month=1/728984a554734a25a69f0eb1f32f842f.parquet\n", + "/app/data/idx=1/year=2000/month=1/75296fd97a724c74bc09e9d64b528f50.parquet\n", + "/app/data/idx=1/year=2000/month=1/76ca85d0dfd849829f105ee6fddb6439.parquet\n", + "/app/data/idx=1/year=2000/month=1/77ac6bd92e7f4a46bbc7634de174bbf3.parquet\n", + "/app/data/idx=1/year=2000/month=1/79a48d3eb0c144ccb13fa4baf944c92b.parquet\n", + "/app/data/idx=1/year=2000/month=1/7a1ae42ab80b4cbf9c00a5b7f213a12c.parquet\n", + "/app/data/idx=1/year=2000/month=1/7af9fe9698494063a751f9a8f5a317dc.parquet\n", + "/app/data/idx=1/year=2000/month=1/7cd226f5679b4cae9af7b881fa1787b7.parquet\n", + "/app/data/idx=1/year=2000/month=1/7fed9a3f251c44209ce0933cfe60ec98.parquet\n", + "/app/data/idx=1/year=2000/month=1/842f90063cbb44b4ae1e7d6b9b4aa59e.parquet\n", + "/app/data/idx=1/year=2000/month=1/84dceabacd264c82981347142463feb9.parquet\n", + "/app/data/idx=1/year=2000/month=1/85d7b8fa841e42b097e34dcd8f13beca.parquet\n", + "/app/data/idx=1/year=2000/month=1/878a1b363a0a48c3b0af294e9f885d72.parquet\n", + "/app/data/idx=1/year=2000/month=1/887e26b6f1004e4fb2a5e373b4d9c5f3.parquet\n", + "/app/data/idx=1/year=2000/month=1/88bc144aa2ed4334b077b19f702a9a99.parquet\n", + "/app/data/idx=1/year=2000/month=1/88fe979886ee453789ca1b1083300618.parquet\n", + "/app/data/idx=1/year=2000/month=1/8b75d58338e64ae1bc694bb0d7044597.parquet\n", + "/app/data/idx=1/year=2000/month=1/8cf24285a4a5450ca5c56c731f5c87a0.parquet\n", + "/app/data/idx=1/year=2000/month=1/8d873dde8103478ba44283b5c90e5060.parquet\n", + "/app/data/idx=1/year=2000/month=1/8e25293517d8490b9f12892f63f35b3a.parquet\n", + "/app/data/idx=1/year=2000/month=1/92bbf16c4b7f4888ae4f93efcec6d40a.parquet\n", + "/app/data/idx=1/year=2000/month=1/9443d531d13f41b491771f22caa9d5a4.parquet\n", + "/app/data/idx=1/year=2000/month=1/94b871d36d384a24a6f42f34d56f822c.parquet\n", + "/app/data/idx=1/year=2000/month=1/9543cef54d3340ba9c8a2dca154947b8.parquet\n", + "/app/data/idx=1/year=2000/month=1/985415e78a0c4abcb42a96c44bdef44b.parquet\n", + "/app/data/idx=1/year=2000/month=1/9b501f9c98c3455ab37f13dc32d4836e.parquet\n", + "/app/data/idx=1/year=2000/month=1/9f0ac6f2e23242b1afb424389a8a1f08.parquet\n", + "/app/data/idx=1/year=2000/month=1/a20bfc0770454e1185f3d1b91efed93c.parquet\n", + "/app/data/idx=1/year=2000/month=1/a31f4026dbab4ef9807081ad9be5e5cc.parquet\n", + "/app/data/idx=1/year=2000/month=1/a472f43a45da4357b63cb0b5535e3237.parquet\n", + "/app/data/idx=1/year=2000/month=1/a74453d72e364b0f819ecf238d9b53fd.parquet\n", + "/app/data/idx=1/year=2000/month=1/a94d3fce611243d29a21b612f01e5a18.parquet\n", + "/app/data/idx=1/year=2000/month=1/a990f67b865f4e599ffa926341915ae2.parquet\n", + "/app/data/idx=1/year=2000/month=1/aa28c2d20ed140b18ddead5b11b96a0b.parquet\n", + "/app/data/idx=1/year=2000/month=1/aa724649481e4f7aa95b78cfe333c72d.parquet\n", + "/app/data/idx=1/year=2000/month=1/ac4487b08071423481580622be8d9914.parquet\n", + "/app/data/idx=1/year=2000/month=1/ad2a3795a1ad46f0b7b509a6ebdc85f4.parquet\n", + "/app/data/idx=1/year=2000/month=1/afa56f8175ed41a8b34bac4ac6786cf3.parquet\n", + "/app/data/idx=1/year=2000/month=1/b6c7cee2c50642bbaacf29e16dbbece5.parquet\n", + "/app/data/idx=1/year=2000/month=1/b9c0158311a04c3fa9c594d6db280053.parquet\n", + "/app/data/idx=1/year=2000/month=1/bbbd7a1b72b645ed8afdada3a0fd9fac.parquet\n", + "/app/data/idx=1/year=2000/month=1/bbce481ce9fc404684db9578007edd4b.parquet\n", + "/app/data/idx=1/year=2000/month=1/bbf2ea53874d4bb49b7ebf959c24b060.parquet\n", + "/app/data/idx=1/year=2000/month=1/bd054b89ad8a46f29968468a4fd6d34d.parquet\n", + "/app/data/idx=1/year=2000/month=1/c1a395d1127240c1b9d7ebcb0d63842f.parquet\n", + "/app/data/idx=1/year=2000/month=1/c27376832ccd439685bdc3b11cdcec0f.parquet\n", + "/app/data/idx=1/year=2000/month=1/c5c55b01bbe1494e9297385e99e9f0d3.parquet\n", + "/app/data/idx=1/year=2000/month=1/c872faa9a863454cadc603827abd3f6c.parquet\n", + "/app/data/idx=1/year=2000/month=1/c9528d72e8574a279c0995c3de171de3.parquet\n", + "/app/data/idx=1/year=2000/month=1/cb7475b11c924a689515ade22ec7b134.parquet\n", + "/app/data/idx=1/year=2000/month=1/cb9a2e526b7845daaaf8f3ced61d8597.parquet\n", + "/app/data/idx=1/year=2000/month=1/cd356e54f63c483ea4792e842667c1ac.parquet\n", + "/app/data/idx=1/year=2000/month=1/cdd3925db9ae44a0ba2760031b229219.parquet\n", + "/app/data/idx=1/year=2000/month=1/d118c630c6194befaae2217985c9073c.parquet\n", + "/app/data/idx=1/year=2000/month=1/d68ddf28bd144430a5dc2c4437f37472.parquet\n", + "/app/data/idx=1/year=2000/month=1/d7adfebd0e9249f989f41e10ca61bf59.parquet\n", + "/app/data/idx=1/year=2000/month=1/d9b7947e9c6b400080d2226093fcc571.parquet\n", + "/app/data/idx=1/year=2000/month=1/d9f610ef03c748619ee5ef2ddcde2634.parquet\n", + "/app/data/idx=1/year=2000/month=1/dcbf892a4231404c90139ee3adfc6815.parquet\n", + "/app/data/idx=1/year=2000/month=1/e083fc488a7446bbbdad82c37f8fca29.parquet\n", + "/app/data/idx=1/year=2000/month=1/e5f84abccb0d407898e892f78dcb9ce1.parquet\n", + "/app/data/idx=1/year=2000/month=1/e74ca84dac2e4d53977a54d9daeb7adc.parquet\n", + "/app/data/idx=1/year=2000/month=1/e85272be7a1c411a886bc856c6012396.parquet\n", + "/app/data/idx=1/year=2000/month=1/ec83d2e5ff534be1b28b4cf511b67e0d.parquet\n", + "/app/data/idx=1/year=2000/month=1/ef6709a1008c43cc994cf01278474c94.parquet\n", + "/app/data/idx=1/year=2000/month=1/f7249440aa6f403f934e5018d34a583c.parquet\n", + "/app/data/idx=1/year=2000/month=1/fc96559adfd2419a9a1cf883b4d521fb.parquet\n", + "/app/data/idx=2/year=2000/month=1/0210672cfa44441bbcf4c07a2bd3c467.parquet\n", + "/app/data/idx=2/year=2000/month=1/0259160641d446518dffe477c5265240.parquet\n", + "/app/data/idx=2/year=2000/month=1/04cce58d49ba4c3982dd0823f43f29a9.parquet\n", + "/app/data/idx=2/year=2000/month=1/058031e2ce2d4bd99cbe7297756dd547.parquet\n", + "/app/data/idx=2/year=2000/month=1/06918413b55f43a19fb7f4e13712c396.parquet\n", + "/app/data/idx=2/year=2000/month=1/07a8bd5cb80140a48f709d86fe3e00aa.parquet\n", + "/app/data/idx=2/year=2000/month=1/0bca80d1ee444038871e5fbb1ccc4d21.parquet\n", + "/app/data/idx=2/year=2000/month=1/0bd86024c6234346b739be5af1a49ed2.parquet\n", + "/app/data/idx=2/year=2000/month=1/0c2d3de1afda4b8f82f43cf658a09fb8.parquet\n", + "/app/data/idx=2/year=2000/month=1/0d4d954eab7043a0a8d7bd751897deb5.parquet\n", + "/app/data/idx=2/year=2000/month=1/0d976fec817b4dd88d3082fe39e6f2b6.parquet\n", + "/app/data/idx=2/year=2000/month=1/12255adedd3948d4b8ced88001a61e04.parquet\n", + "/app/data/idx=2/year=2000/month=1/1406843e1322465e8384ba8685a9eb9d.parquet\n", + "/app/data/idx=2/year=2000/month=1/15ab3cbd13ce4fc7ab69d5c2b1672ca2.parquet\n", + "/app/data/idx=2/year=2000/month=1/18b68b427e2947bbaee4122bc2b0fbf8.parquet\n", + "/app/data/idx=2/year=2000/month=1/1a883ab5889441578fbf5f0a2c822c07.parquet\n", + "/app/data/idx=2/year=2000/month=1/1cc7612ae5e34455a716fc38b84427bb.parquet\n", + "/app/data/idx=2/year=2000/month=1/1e188269ac30443fa796a8bdbea70e46.parquet\n", + "/app/data/idx=2/year=2000/month=1/1e803b9281ef4d4289f8a207de2fd2a2.parquet\n", + "/app/data/idx=2/year=2000/month=1/2099fd988d544989b1117a45cd92e2c5.parquet\n", + "/app/data/idx=2/year=2000/month=1/245b2e48c3d442f990dfd9f3f18f5544.parquet\n", + "/app/data/idx=2/year=2000/month=1/2cc8cd4af02e48728683551df1d9b517.parquet\n", + "/app/data/idx=2/year=2000/month=1/2e389e9f7c4f43ea8ff96d1fa13f0347.parquet\n", + "/app/data/idx=2/year=2000/month=1/2edcd33c70704b64b80987aba03d724e.parquet\n", + "/app/data/idx=2/year=2000/month=1/3170732421924aeaa451ca82a4b77131.parquet\n", + "/app/data/idx=2/year=2000/month=1/3227923c1dab4e7fbe07511111e76f67.parquet\n", + "/app/data/idx=2/year=2000/month=1/3607d6e90ab64fff84b4f2c9477540ce.parquet\n", + "/app/data/idx=2/year=2000/month=1/3b733f19c98f44ebb6ab31e93d18f09b.parquet\n", + "/app/data/idx=2/year=2000/month=1/3d79e3dd93d44a208aadd899a9632005.parquet\n", + "/app/data/idx=2/year=2000/month=1/3e2d5106997b4d2a8a4aaaada70b5c34.parquet\n", + "/app/data/idx=2/year=2000/month=1/3ea888ba5f0c4c46aaa55795799c8614.parquet\n", + "/app/data/idx=2/year=2000/month=1/4065fbfffe364f5b8f661dd0caff5c00.parquet\n", + "/app/data/idx=2/year=2000/month=1/4438f729a59e4bee856e9766a7866777.parquet\n", + "/app/data/idx=2/year=2000/month=1/489418f859104268b59905195289b433.parquet\n", + "/app/data/idx=2/year=2000/month=1/4a07e13d3bab4ee4bed09868f4d0ae6a.parquet\n", + "/app/data/idx=2/year=2000/month=1/4ade79216a6f42ffbfa7ee5c2949d904.parquet\n", + "/app/data/idx=2/year=2000/month=1/508e221eeacc4624977761af65fdf95f.parquet\n", + "/app/data/idx=2/year=2000/month=1/520f6ff1dee6468099730664d5bea3de.parquet\n", + "/app/data/idx=2/year=2000/month=1/537a5c5b6d2949eca8c35db48dcc123f.parquet\n", + "/app/data/idx=2/year=2000/month=1/552fbc5a37494e7bb792e3c225cd4021.parquet\n", + "/app/data/idx=2/year=2000/month=1/589b0598f3eb4f178125912219919413.parquet\n", + "/app/data/idx=2/year=2000/month=1/62c0c2448a5d49889e2d2b8421264798.parquet\n", + "/app/data/idx=2/year=2000/month=1/6312935db784424a957645de2de4a4c2.parquet\n", + "/app/data/idx=2/year=2000/month=1/64f32e163bed483b860f21c6666b0a7d.parquet\n", + "/app/data/idx=2/year=2000/month=1/66414c74b1ab4c3cb155b440359b1705.parquet\n", + "/app/data/idx=2/year=2000/month=1/6f0e1508809f47efba9fe398311b711c.parquet\n", + "/app/data/idx=2/year=2000/month=1/724d5c288c834e34846ad8871a94ee10.parquet\n", + "/app/data/idx=2/year=2000/month=1/740e15b45d2745a997e81672fc58481e.parquet\n", + "/app/data/idx=2/year=2000/month=1/75d5db2fb8404493bd6f6ebbeee50e91.parquet\n", + "/app/data/idx=2/year=2000/month=1/765f6b9e1260430680f79e9c4b8de8a1.parquet\n", + "/app/data/idx=2/year=2000/month=1/7893a366f6fd4770ac34af71a74af552.parquet\n", + "/app/data/idx=2/year=2000/month=1/7d6b206a0cdc4c7baefb675350602e10.parquet\n", + "/app/data/idx=2/year=2000/month=1/7d7c7ec0eaf04cf386ce6d93c5107246.parquet\n", + "/app/data/idx=2/year=2000/month=1/7e5eb92603774185bce487436db2af8f.parquet\n", + "/app/data/idx=2/year=2000/month=1/7f393857790e43da9549ed4c69797d18.parquet\n", + "/app/data/idx=2/year=2000/month=1/7f72ff606a804972a50960d0efcebcae.parquet\n", + "/app/data/idx=2/year=2000/month=1/8415983fe0a549c89ea28b25db102138.parquet\n", + "/app/data/idx=2/year=2000/month=1/86cf478f40914946b5b86106be97f7d8.parquet\n", + "/app/data/idx=2/year=2000/month=1/86f1de6e862141be8bd612465486fd16.parquet\n", + "/app/data/idx=2/year=2000/month=1/895fb45b8f554034a79ebd9c8eff9cad.parquet\n", + "/app/data/idx=2/year=2000/month=1/896bad5a081440b582d71fbb5baa4998.parquet\n", + "/app/data/idx=2/year=2000/month=1/8c2163530eef4b7b9e22fc1d4d99d6d5.parquet\n", + "/app/data/idx=2/year=2000/month=1/8c3b5f112ddf48e1a165bcad69f7e548.parquet\n", + "/app/data/idx=2/year=2000/month=1/8f54037c274c424fa2e13e83afe6a983.parquet\n", + "/app/data/idx=2/year=2000/month=1/9267bc6aecba4d66952bc7778a97bbb0.parquet\n", + "/app/data/idx=2/year=2000/month=1/978623e40a264ecbb8e3e7afee4a9221.parquet\n", + "/app/data/idx=2/year=2000/month=1/9b501c10edd94539b8147571202e7dfe.parquet\n", + "/app/data/idx=2/year=2000/month=1/a2cbd94909a7409cb233cc388fcd53be.parquet\n", + "/app/data/idx=2/year=2000/month=1/a570b6d3b72d4c8090c4efcb2eeb2d70.parquet\n", + "/app/data/idx=2/year=2000/month=1/a88b8e956c104202a8f2d279c7e58741.parquet\n", + "/app/data/idx=2/year=2000/month=1/a97573410ce04706ac3d5c88f9cd285e.parquet\n", + "/app/data/idx=2/year=2000/month=1/a9c31f330c2d454a8911627eaafe7e31.parquet\n", + "/app/data/idx=2/year=2000/month=1/aa941bd2b9574ce294967019aa4cd515.parquet\n", + "/app/data/idx=2/year=2000/month=1/af86ac06c6f7484c8bbb8215a408ce73.parquet\n", + "/app/data/idx=2/year=2000/month=1/b35d48ff673541559bf27f4c3e1feab6.parquet\n", + "/app/data/idx=2/year=2000/month=1/b5b85036b2c540f9add4b86012873462.parquet\n", + "/app/data/idx=2/year=2000/month=1/b8b4abc89c824a17a263d898f4bca476.parquet\n", + "/app/data/idx=2/year=2000/month=1/bb6a1df466d84085bc0900641233cbc3.parquet\n", + "/app/data/idx=2/year=2000/month=1/bb95334225ce41768c1175ccabad174b.parquet\n", + "/app/data/idx=2/year=2000/month=1/bca9c21e480249eebb26aeed167b1293.parquet\n", + "/app/data/idx=2/year=2000/month=1/bf49382a8e024ffe9c17e4849ce4127f.parquet\n", + "/app/data/idx=2/year=2000/month=1/c06c38062a2b4e13b4e1ee1eaf03bfa2.parquet\n", + "/app/data/idx=2/year=2000/month=1/c1f40b6256444001af06dc2fb98f5e5c.parquet\n", + "/app/data/idx=2/year=2000/month=1/c4968d0cbcd54c83a0dd3e57039f0578.parquet\n", + "/app/data/idx=2/year=2000/month=1/c6afa57132184a71becf083d1b553473.parquet\n", + "/app/data/idx=2/year=2000/month=1/c87a24c747984bf58745b666dac98323.parquet\n", + "/app/data/idx=2/year=2000/month=1/cc34429087f54f7aaf1e84bc12517c26.parquet\n", + "/app/data/idx=2/year=2000/month=1/cc839cdd3fbe465abc78861a4cc11acf.parquet\n", + "/app/data/idx=2/year=2000/month=1/db6c45d7e8234bc1949ddd8973010d7f.parquet\n", + "/app/data/idx=2/year=2000/month=1/dbb0a2e2bdbc4319a07d04af0d9356fc.parquet\n", + "/app/data/idx=2/year=2000/month=1/dbde0aee2a4647939d6f027a99e37cc4.parquet\n", + "/app/data/idx=2/year=2000/month=1/ddd0738116b5496391991ad6d3e781b9.parquet\n", + "/app/data/idx=2/year=2000/month=1/e52fd781bd78475789d4160624a6e34a.parquet\n", + "/app/data/idx=2/year=2000/month=1/e9c5c04f931f4fd4b6afb51db34cda54.parquet\n", + "/app/data/idx=2/year=2000/month=1/eee841a6139a4fe19620045f04c2f908.parquet\n", + "/app/data/idx=2/year=2000/month=1/ef42e36ceb794730ac25dad68f73294d.parquet\n", + "/app/data/idx=2/year=2000/month=1/efe26f73b0494f828fcf2686b6874c71.parquet\n", + "/app/data/idx=2/year=2000/month=1/f15094f2f10748e59573fecb5435ecc4.parquet\n", + "/app/data/idx=2/year=2000/month=1/f1e37026291c41c5ae698956baa6bf39.parquet\n", + "/app/data/idx=2/year=2000/month=1/f1f56b07a73646e4a5219a2623b04489.parquet\n", + "/app/data/idx=2/year=2000/month=1/f25704c4b00a418c9fa2385f9018adc7.parquet\n", + "/app/data/idx=2/year=2000/month=1/f60540924a1641de9d64f66c1af980dd.parquet\n", + "/app/data/idx=2/year=2000/month=1/f62eada23e1d430dacb69eeff0d5ba59.parquet\n", + "/app/data/idx=2/year=2000/month=1/f9b43fe646ec4607baa500b1360a6e1c.parquet\n", + "/app/data/idx=2/year=2000/month=1/fc3a31bc82ba4f17a93a18138887d9d5.parquet\n", + "/app/data/idx=3/year=2000/month=1/00b291e6d0d2494a8652e6ffcf1746c5.parquet\n", + "/app/data/idx=3/year=2000/month=1/01b6882837054cc4801c6929a630abd7.parquet\n", + "/app/data/idx=3/year=2000/month=1/09ebeae420f348c28a365f607978aeda.parquet\n", + "/app/data/idx=3/year=2000/month=1/0c41010bec604c93b974e72fa35cc2c7.parquet\n", + "/app/data/idx=3/year=2000/month=1/0cb995ed168f4829a38db4f75d4ed14b.parquet\n", + "/app/data/idx=3/year=2000/month=1/0cf1a660ee984efcaabe1d1bb9263a9a.parquet\n", + "/app/data/idx=3/year=2000/month=1/0d0bbc2ee628424f8204240680f44389.parquet\n", + "/app/data/idx=3/year=2000/month=1/0f72553d38cb47f095fdf35e03507dd3.parquet\n", + "/app/data/idx=3/year=2000/month=1/0ff3e55ae9464e369302d1fb2abaec40.parquet\n", + "/app/data/idx=3/year=2000/month=1/1165cf18728c41edb7bb8a765ae7854d.parquet\n", + "/app/data/idx=3/year=2000/month=1/12a3b4dadd4f43389c269f4b736278c2.parquet\n", + "/app/data/idx=3/year=2000/month=1/1a204362f488461da026ee347c817e2e.parquet\n", + "/app/data/idx=3/year=2000/month=1/1c306421662241b48b85f24d033898fc.parquet\n", + "/app/data/idx=3/year=2000/month=1/22155eaaf5ce4e36bbb36b162dadae9e.parquet\n", + "/app/data/idx=3/year=2000/month=1/229cb1d3321f4660866b414f3a647fff.parquet\n", + "/app/data/idx=3/year=2000/month=1/280b6ca59e1f4312b872fd23d96ed6df.parquet\n", + "/app/data/idx=3/year=2000/month=1/2859c7dccfe54951a955941fa23a33b1.parquet\n", + "/app/data/idx=3/year=2000/month=1/2a17999c98294f38ac3e60af45779214.parquet\n", + "/app/data/idx=3/year=2000/month=1/2e3b411a5a3a48aba5e52053e54dbe9f.parquet\n", + "/app/data/idx=3/year=2000/month=1/2eb295d22ddd4ca9801d7b0a6a950261.parquet\n", + "/app/data/idx=3/year=2000/month=1/313a5fc7ea2c49009cd68f31ce030eb3.parquet\n", + "/app/data/idx=3/year=2000/month=1/319b8c873aba46d9a39aaed1d7ade697.parquet\n", + "/app/data/idx=3/year=2000/month=1/36c17affd08e450ba034d29818f6c94f.parquet\n", + "/app/data/idx=3/year=2000/month=1/37170fb9855d47f0871cbf1b3c4a5763.parquet\n", + "/app/data/idx=3/year=2000/month=1/3772fba9cef64744a8aa5ad999a1d48d.parquet\n", + "/app/data/idx=3/year=2000/month=1/3d68d10aee3b46e9ab4c2341f395e9f8.parquet\n", + "/app/data/idx=3/year=2000/month=1/3da7295cc0ee4953aad41cddb746c0ec.parquet\n", + "/app/data/idx=3/year=2000/month=1/401a2d5e38ee4581ac5950131e7739ed.parquet\n", + "/app/data/idx=3/year=2000/month=1/40bb809ba5824fa48218e2543e1317d8.parquet\n", + "/app/data/idx=3/year=2000/month=1/42c11bbbec28471d818c4eda7ffa0316.parquet\n", + "/app/data/idx=3/year=2000/month=1/430d92d720ef40aca2043cdd9a4216a7.parquet\n", + "/app/data/idx=3/year=2000/month=1/4344d9475f474d4289c16c14e3d76205.parquet\n", + "/app/data/idx=3/year=2000/month=1/4965043c1c58485fb9a81ca502c9704c.parquet\n", + "/app/data/idx=3/year=2000/month=1/4c954d56c1f040f8adcb92a116fc3e4a.parquet\n", + "/app/data/idx=3/year=2000/month=1/4cb7c012e50c4e45988d6c73f931babf.parquet\n", + "/app/data/idx=3/year=2000/month=1/4d11aa2de91047638fd1fbb49180b828.parquet\n", + "/app/data/idx=3/year=2000/month=1/59de1ba8fd7b41d7819849137f7b9817.parquet\n", + "/app/data/idx=3/year=2000/month=1/5a31ef5acc2340b7a575b1d77e9e9917.parquet\n", + "/app/data/idx=3/year=2000/month=1/5b14185275384ee5ae5839b6d69c714e.parquet\n", + "/app/data/idx=3/year=2000/month=1/5b35b2943a7c476aa5dc3a2af08f13fe.parquet\n", + "/app/data/idx=3/year=2000/month=1/5e6bb9eceb2d4a4ebddd39e06db86d67.parquet\n", + "/app/data/idx=3/year=2000/month=1/5f8372dbc36a4681bdebfaa9f3328eec.parquet\n", + "/app/data/idx=3/year=2000/month=1/6317cb7958d2459595a28bdca41f42d5.parquet\n", + "/app/data/idx=3/year=2000/month=1/67ba93ec02b44b0593c0ff37aa3db5b7.parquet\n", + "/app/data/idx=3/year=2000/month=1/69be17b95a9046c2a4553f5c077f5fff.parquet\n", + "/app/data/idx=3/year=2000/month=1/6ac05cada45b48b89ec15b0f76df21ac.parquet\n", + "/app/data/idx=3/year=2000/month=1/6ce38fe0d6a54853a757745eb148960a.parquet\n", + "/app/data/idx=3/year=2000/month=1/7000686e11b34200ae44dfe294dc8c8e.parquet\n", + "/app/data/idx=3/year=2000/month=1/70f44eb7513c4100aa2cd5779e3c5d67.parquet\n", + "/app/data/idx=3/year=2000/month=1/7421bdc2222640b38ada8d94e10e5865.parquet\n", + "/app/data/idx=3/year=2000/month=1/78f4a6251bb7423e800ada3444bb54c1.parquet\n", + "/app/data/idx=3/year=2000/month=1/874eb82772844f269bc5360ef1971245.parquet\n", + "/app/data/idx=3/year=2000/month=1/87baf01b30ce467ca976e26ad5bec1e2.parquet\n", + "/app/data/idx=3/year=2000/month=1/8a31ab99c92a4a8b829f37561cc99956.parquet\n", + "/app/data/idx=3/year=2000/month=1/8aa9003415c649288a13560a1352805b.parquet\n", + "/app/data/idx=3/year=2000/month=1/8ae3a6e6214f4816b469f09b01c2e955.parquet\n", + "/app/data/idx=3/year=2000/month=1/8ff02b303fca4f86a129197874e8e6fe.parquet\n", + "/app/data/idx=3/year=2000/month=1/94c27fe8b6084f7b8606cef710bab753.parquet\n", + "/app/data/idx=3/year=2000/month=1/94c4de33006f424e8cb424accfad8a2c.parquet\n", + "/app/data/idx=3/year=2000/month=1/9c9b600151fb47e5a073e51a735e1537.parquet\n", + "/app/data/idx=3/year=2000/month=1/9e59161660e140209e94cab5f7ea5098.parquet\n", + "/app/data/idx=3/year=2000/month=1/9fadcdc1ab7a4b9783128af7b744d705.parquet\n", + "/app/data/idx=3/year=2000/month=1/9fd3848ab9c54869b34c3a5d8e79be9a.parquet\n", + "/app/data/idx=3/year=2000/month=1/a2c45c983d5b469997c55c4e2ad72427.parquet\n", + "/app/data/idx=3/year=2000/month=1/a3f1f0a5cca84c4eaa7f2a1bef1f88b0.parquet\n", + "/app/data/idx=3/year=2000/month=1/a43049d78c9341668d77a63fc3b4d57f.parquet\n", + "/app/data/idx=3/year=2000/month=1/aa89184d32ca40c28f44109c97cee774.parquet\n", + "/app/data/idx=3/year=2000/month=1/ab3cf71e9caa44ec90adc43a56867162.parquet\n", + "/app/data/idx=3/year=2000/month=1/acab0d093d9a4bca854719e790512a25.parquet\n", + "/app/data/idx=3/year=2000/month=1/acf77747edbf4df5b457cfc8a77e0dc0.parquet\n", + "/app/data/idx=3/year=2000/month=1/b5672b45b393472986217241b378742f.parquet\n", + "/app/data/idx=3/year=2000/month=1/b7fd4df9bc9440ff94d713a7e43959d2.parquet\n", + "/app/data/idx=3/year=2000/month=1/b81af51b094e457faa6c786d1fffc470.parquet\n", + "/app/data/idx=3/year=2000/month=1/bbedc33b622c46b7af6af9c62e139163.parquet\n", + "/app/data/idx=3/year=2000/month=1/befaac43d5fa49f0a118ffaac6b5c4d3.parquet\n", + "/app/data/idx=3/year=2000/month=1/c0a4a83a65d94f2281b2039cac0e2c9e.parquet\n", + "/app/data/idx=3/year=2000/month=1/c4f44bc2181f45a3866cc232d80f2e46.parquet\n", + "/app/data/idx=3/year=2000/month=1/c63bff60ba67488d8ce536aa47774b53.parquet\n", + "/app/data/idx=3/year=2000/month=1/c74c114cc7e34985aeb20e14c2b26f3c.parquet\n", + "/app/data/idx=3/year=2000/month=1/c7eb09b4b0cf44eab86d88f11d00c222.parquet\n", + "/app/data/idx=3/year=2000/month=1/ce3160350479478da1a327405dc4cbe8.parquet\n", + "/app/data/idx=3/year=2000/month=1/cfb6a5a4bdbb4bb0a6afa699aa2e100a.parquet\n", + "/app/data/idx=3/year=2000/month=1/d137ffa9eeeb418491e792c7871334c6.parquet\n", + "/app/data/idx=3/year=2000/month=1/d43ffbf42b694713ae6e4b1e408529f9.parquet\n", + "/app/data/idx=3/year=2000/month=1/d7f91f13f3444032995bc7c6c0bcd1cd.parquet\n", + "/app/data/idx=3/year=2000/month=1/d9aa67eaa7f144fc8613ce81bd072167.parquet\n", + "/app/data/idx=3/year=2000/month=1/ddeb24d5cdb043f380654ff98d83adc9.parquet\n", + "/app/data/idx=3/year=2000/month=1/e1e0b2ae05154f459914dad148a7779f.parquet\n", + "/app/data/idx=3/year=2000/month=1/e74ecdc304164cd8b953c808a1353bfd.parquet\n", + "/app/data/idx=3/year=2000/month=1/e7eb8d26146c423eaa1a77343d16920b.parquet\n", + "/app/data/idx=3/year=2000/month=1/e937a5e6dd0241c1a50b24a1c9b4ea7a.parquet\n", + "/app/data/idx=3/year=2000/month=1/e94bfddc06704799a2699d3a90d9843b.parquet\n", + "/app/data/idx=3/year=2000/month=1/f08328e844ab486ca07eda98bf1ca9ba.parquet\n", + "/app/data/idx=3/year=2000/month=1/f154e97e55b0428185553c4acb9ce227.parquet\n", + "/app/data/idx=3/year=2000/month=1/f38d8f30947f4bd08fb1c10bc81d8ee7.parquet\n", + "/app/data/idx=3/year=2000/month=1/f3fb7ade438a4929aba0109858f4abe4.parquet\n", + "/app/data/idx=3/year=2000/month=1/f458cf905d5845f1ac64183bba7a4826.parquet\n", + "/app/data/idx=3/year=2000/month=1/f6ce7accff3e4eb8b601078583655865.parquet\n", + "/app/data/idx=3/year=2000/month=1/f7d729c528904fd182207989fef04050.parquet\n", + "/app/data/idx=3/year=2000/month=1/f9d5734d70c542a3bf5ba9e004cb2e95.parquet\n", + "/app/data/idx=3/year=2000/month=1/fbeb2f31e5784074a90d737fb8c4e047.parquet\n", + "/app/data/idx=3/year=2000/month=1/fc1fb4ad31c448eeb8724a3069e760f0.parquet\n", + "/app/data/idx=3/year=2000/month=1/fde846fa6d8649c9b1770638786fb18c.parquet\n", + "/app/data/idx=3/year=2000/month=1/fe9c940d68fd4759a90408a1245022a6.parquet\n", + "/app/data/idx=3/year=2000/month=1/ffebea86d7fe4a64a973415ab3b6eccf.parquet\n", + "/app/data/idx=4/year=2000/month=1/01a585864dc644b6a4a7b13ae97c1f85.parquet\n", + "/app/data/idx=4/year=2000/month=1/0251c252cf544dc49285c7e4fcbf9784.parquet\n", + "/app/data/idx=4/year=2000/month=1/026b7ed2f32a4a4d9b1fe4bf2e2c45ce.parquet\n", + "/app/data/idx=4/year=2000/month=1/03343bb5f29d42f19ce58caddb755df7.parquet\n", + "/app/data/idx=4/year=2000/month=1/04f9e581b08c424595f85fa85f87cb2c.parquet\n", + "/app/data/idx=4/year=2000/month=1/05751ecfd2734eedb17546ca81f8344a.parquet\n", + "/app/data/idx=4/year=2000/month=1/05f08cd7531f42a792e243c617b344f1.parquet\n", + "/app/data/idx=4/year=2000/month=1/061bd006ae35412eb8e5b758c50102c4.parquet\n", + "/app/data/idx=4/year=2000/month=1/06ba2d68586e4088921c99eddd5a5d86.parquet\n", + "/app/data/idx=4/year=2000/month=1/06df2daa4186437791d71a6b8e23519d.parquet\n", + "/app/data/idx=4/year=2000/month=1/07369c0250b5496bbac305aa1909eaa1.parquet\n", + "/app/data/idx=4/year=2000/month=1/0beb1321d8304074994a90b3a7eb94c5.parquet\n", + "/app/data/idx=4/year=2000/month=1/0f0e0602ffe5408a82d5265b2dc5ec18.parquet\n", + "/app/data/idx=4/year=2000/month=1/0fc5d753f2184cb0868ae28fc84c227e.parquet\n", + "/app/data/idx=4/year=2000/month=1/135fcc4c1e5a4823ae050c1e89fa413c.parquet\n", + "/app/data/idx=4/year=2000/month=1/156b561654924ad1b111bd5c965a46c2.parquet\n", + "/app/data/idx=4/year=2000/month=1/168d6922b1824cedb14d5654d75ba284.parquet\n", + "/app/data/idx=4/year=2000/month=1/1827f11f108341ccb48a0bb6ab694a64.parquet\n", + "/app/data/idx=4/year=2000/month=1/18e1c91f8c724d30a77bdd47e665c571.parquet\n", + "/app/data/idx=4/year=2000/month=1/19016c157bce43e394b117e8e0ed2557.parquet\n", + "/app/data/idx=4/year=2000/month=1/1a2c4e9d435f4c5faf83efbbb559118b.parquet\n", + "/app/data/idx=4/year=2000/month=1/1d3c8ecb9804470c87bfd7c25a3dab28.parquet\n", + "/app/data/idx=4/year=2000/month=1/1ee5f78eb54548278ae0a857c616e84c.parquet\n", + "/app/data/idx=4/year=2000/month=1/1ff311b87ba74e998ff7a5267ba52832.parquet\n", + "/app/data/idx=4/year=2000/month=1/285e2e6ef8c34d45b73916b4bfe1a2bf.parquet\n", + "/app/data/idx=4/year=2000/month=1/288d2d389b1e4a7695454e12fc442592.parquet\n", + "/app/data/idx=4/year=2000/month=1/2bfb7829ce324e1bb182159d8a6e7966.parquet\n", + "/app/data/idx=4/year=2000/month=1/2cb8084772654371bc4aab66bab3d5fc.parquet\n", + "/app/data/idx=4/year=2000/month=1/2d0d1ce706fe41feadf69279c0290101.parquet\n", + "/app/data/idx=4/year=2000/month=1/2d4a13244f154d278d237535e957d174.parquet\n", + "/app/data/idx=4/year=2000/month=1/2d73ffb2b7314b48b25c924dad691fa1.parquet\n", + "/app/data/idx=4/year=2000/month=1/30c6048fdac04824831e0a984445c238.parquet\n", + "/app/data/idx=4/year=2000/month=1/3b3610138fd84568b3f6b20ccce2b296.parquet\n", + "/app/data/idx=4/year=2000/month=1/3d33b2adeb0c406aafda7296398833d2.parquet\n", + "/app/data/idx=4/year=2000/month=1/3fb3450af6ed4ddc996b10c7316018af.parquet\n", + "/app/data/idx=4/year=2000/month=1/4384e6f19b984984a0e583891fab8200.parquet\n", + "/app/data/idx=4/year=2000/month=1/4499b3a4074d42ad87a6a74f031bad48.parquet\n", + "/app/data/idx=4/year=2000/month=1/578cd70733f54818812b7fee342f7922.parquet\n", + "/app/data/idx=4/year=2000/month=1/5a42dc9b52a845b394f570bc7e233637.parquet\n", + "/app/data/idx=4/year=2000/month=1/5e3f996936cd466c8f182e4925b457b9.parquet\n", + "/app/data/idx=4/year=2000/month=1/6171f6c076d442ce9ee9b2223a1c9e29.parquet\n", + "/app/data/idx=4/year=2000/month=1/637fabc040bd4139901780de2f98df24.parquet\n", + "/app/data/idx=4/year=2000/month=1/649b57f24c1c49e7aa025d1a111f31a6.parquet\n", + "/app/data/idx=4/year=2000/month=1/65dac4a30aba4d3e9a18e731bef42800.parquet\n", + "/app/data/idx=4/year=2000/month=1/6759ad29fa9a416498d408a97082da2d.parquet\n", + "/app/data/idx=4/year=2000/month=1/682c068895b54404aa02c22ec59d98d7.parquet\n", + "/app/data/idx=4/year=2000/month=1/6a2c44eebd7c447ab0eac8b5596612ce.parquet\n", + "/app/data/idx=4/year=2000/month=1/6c36185edd4a41bc8869406a3bc9b533.parquet\n", + "/app/data/idx=4/year=2000/month=1/6ce5ab2e0fce43c9be58cd6ca0ab1b0c.parquet\n", + "/app/data/idx=4/year=2000/month=1/6e7dfa62c7ab4743bd5b47c2d65fcd3f.parquet\n", + "/app/data/idx=4/year=2000/month=1/7137092484b641e3a41226810acbe2b7.parquet\n", + "/app/data/idx=4/year=2000/month=1/727c5b50be444555bb0c8cb3493f136c.parquet\n", + "/app/data/idx=4/year=2000/month=1/761ed2925727400586b3f95bebe32b12.parquet\n", + "/app/data/idx=4/year=2000/month=1/779bc731cfdc4eb582b7d45275f45f7d.parquet\n", + "/app/data/idx=4/year=2000/month=1/79ee8145c2814549a38530b2c506544e.parquet\n", + "/app/data/idx=4/year=2000/month=1/7b59b765fa454ce0a9fbd88628d6f604.parquet\n", + "/app/data/idx=4/year=2000/month=1/8292f989424444f6aa18bbcfc68f1734.parquet\n", + "/app/data/idx=4/year=2000/month=1/83b3730a855b494487dd6728a517ee3b.parquet\n", + "/app/data/idx=4/year=2000/month=1/84ebbce76a7a4107b939b685da66b5f4.parquet\n", + "/app/data/idx=4/year=2000/month=1/855921d6f64644a38bd2be5d9669fe0a.parquet\n", + "/app/data/idx=4/year=2000/month=1/85ab57ea6d0e48efac390b6047a6f435.parquet\n", + "/app/data/idx=4/year=2000/month=1/8b974e69e33e41cdb5bde25a6a422fd6.parquet\n", + "/app/data/idx=4/year=2000/month=1/8dd5278b54e9413ebd42286dea00c4a3.parquet\n", + "/app/data/idx=4/year=2000/month=1/93128598152643a297db72dec38a07b5.parquet\n", + "/app/data/idx=4/year=2000/month=1/95448aeaacdc40fe97d207b2c80ca784.parquet\n", + "/app/data/idx=4/year=2000/month=1/9571568631184e1386c3528b8ce9ed26.parquet\n", + "/app/data/idx=4/year=2000/month=1/95f8e19b3af344db98dcc5c5f9546c3a.parquet\n", + "/app/data/idx=4/year=2000/month=1/9921911b40d041f6ac72c4d44578c5cf.parquet\n", + "/app/data/idx=4/year=2000/month=1/9b4d80c840c14d3b9c67da4c9877b628.parquet\n", + "/app/data/idx=4/year=2000/month=1/9e259ea36fbb4c0ba9b6535a3f34544e.parquet\n", + "/app/data/idx=4/year=2000/month=1/9e3706d0ded44106bf8e0dee8900cd28.parquet\n", + "/app/data/idx=4/year=2000/month=1/9e67c71850a54fe1aa354c43d2cd9c38.parquet\n", + "/app/data/idx=4/year=2000/month=1/9ef278bcdc3b41e89059c309bcbb005e.parquet\n", + "/app/data/idx=4/year=2000/month=1/a33e6304bb1b47daa86853f19b009366.parquet\n", + "/app/data/idx=4/year=2000/month=1/a9bc812dc596492eafcc73f01d0e53a3.parquet\n", + "/app/data/idx=4/year=2000/month=1/abf0e405806744df9ea3e9908eb0451f.parquet\n", + "/app/data/idx=4/year=2000/month=1/acd468d1addc4d75944766e48c3eb324.parquet\n", + "/app/data/idx=4/year=2000/month=1/adbdc6fec62c463aa94e0ce707ae1768.parquet\n", + "/app/data/idx=4/year=2000/month=1/b24807369dfc461e92eb8a56a7931070.parquet\n", + "/app/data/idx=4/year=2000/month=1/b2f3d43c99f44131969e0fcf27cfbf3c.parquet\n", + "/app/data/idx=4/year=2000/month=1/b461c7cfd0f4483f8309f670f4f4265d.parquet\n", + "/app/data/idx=4/year=2000/month=1/b584791f45f74432a067632281285b9a.parquet\n", + "/app/data/idx=4/year=2000/month=1/b7ca6973a34c4f92831f16216beb33f2.parquet\n", + "/app/data/idx=4/year=2000/month=1/b7f16808e8e4491e8f86d3ae9766f2b9.parquet\n", + "/app/data/idx=4/year=2000/month=1/b9a2d05a74a84d71a1b65a0f05895011.parquet\n", + "/app/data/idx=4/year=2000/month=1/bc3036cc653e4584893f8b36e33c8f85.parquet\n", + "/app/data/idx=4/year=2000/month=1/be1318c7564d48be8435c11344627932.parquet\n", + "/app/data/idx=4/year=2000/month=1/bf90009dc7b14cfaab939f435d975a0b.parquet\n", + "/app/data/idx=4/year=2000/month=1/c0105d7e54fc42dc93d5140782960815.parquet\n", + "/app/data/idx=4/year=2000/month=1/c450cbe2674e488d8e30953252bc7a4b.parquet\n", + "/app/data/idx=4/year=2000/month=1/c82d37b18d65434ca1fe1b9cf4d29ccb.parquet\n", + "/app/data/idx=4/year=2000/month=1/cb53085f9145493b9a171d31b682e75f.parquet\n", + "/app/data/idx=4/year=2000/month=1/cc14bf7a74c9498889bc52e29f83edff.parquet\n", + "/app/data/idx=4/year=2000/month=1/ce3c90dd7e7a4f5a862580c14aa22c28.parquet\n", + "/app/data/idx=4/year=2000/month=1/d47149e3e1e34123a48f623ca121e8a8.parquet\n", + "/app/data/idx=4/year=2000/month=1/d9e0e3e786a942f5892c6ce17b37eb4a.parquet\n", + "/app/data/idx=4/year=2000/month=1/dbb82450694e4e76ab34f3e650d36594.parquet\n", + "/app/data/idx=4/year=2000/month=1/dc67b56f0c814648b9ebf8e1c483b923.parquet\n", + "/app/data/idx=4/year=2000/month=1/de2e16496bcd405b8d48aec4da4d5ae4.parquet\n", + "/app/data/idx=4/year=2000/month=1/de37409ac14b49c38c9c0da26d6c721f.parquet\n", + "/app/data/idx=4/year=2000/month=1/e8072594944141a5b078b74e739307d3.parquet\n", + "/app/data/idx=4/year=2000/month=1/e9b36b985eb44b44a5436af438f7ceb0.parquet\n", + "/app/data/idx=4/year=2000/month=1/eefdd2cdde1d4085964d1469a11f462c.parquet\n", + "/app/data/idx=4/year=2000/month=1/f89525bf20e540f29b021ce5f4d9eb3c.parquet\n", + "/app/data/idx=4/year=2000/month=1/fab7e098a4c8489785225a74b71ec2ef.parquet\n" + ] + } + ], + "source": [ + "dir_name = \"/app/data\"\n", + "\n", + "# Read data back.\n", + "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "print(\"\\n\".join(dataset.files))" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "ba4d7dc4", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:17:50.865185Z", + "start_time": "2021-06-16T11:17:50.378460Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " instr val1 val2 idx year month\n", + "2000-01-09 00:00:00-05:00 A 99 54 0 2000 1\n", + "2000-01-13 09:30:00-05:00 A 99 62 0 2000 1\n", + "2000-01-13 09:35:00-05:00 A 54 76 0 2000 1\n", + "# df.shape=\n", + "(18075, 6)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "year int32\n", + "month int32\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Read everything.\n", + "df2 = dataset.to_table().to_pandas()\n", + "\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "68e84388", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:05:43.018220Z", + "start_time": "2021-06-16T11:05:43.007510Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['A' 'B' 'C' 'D' 'E']\n", + "DatetimeIndex(['2000-01-06 00:00:00-05:00', '2000-01-10 00:00:00-05:00',\n", + " '2000-01-01 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", + " '2000-01-08 00:00:00-05:00', '2000-01-12 00:00:00-05:00',\n", + " '2000-01-09 00:00:00-05:00', '2000-01-02 00:00:00-05:00',\n", + " '2000-01-14 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", + " '2000-01-07 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", + " '2000-01-15 00:00:00-05:00', '2000-01-05 00:00:00-05:00',\n", + " '2000-01-11 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", + " '2000-01-05 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", + " '2000-01-02 00:00:00-05:00', '2000-01-14 00:00:00-05:00',\n", + " '2000-01-12 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", + " '2000-01-13 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", + " '2000-01-07 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", + " '2000-01-08 00:00:00-05:00', '2000-01-10 00:00:00-05:00',\n", + " '2000-01-11 00:00:00-05:00', '2000-01-09 00:00:00-05:00',\n", + " '2000-01-02 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", + " '2000-01-05 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", + " '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", + " '2000-01-10 00:00:00-05:00', '2000-01-11 00:00:00-05:00',\n", + " '2000-01-14 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", + " '2000-01-07 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", + " '2000-01-08 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", + " '2000-01-09 00:00:00-05:00', '2000-01-08 00:00:00-05:00',\n", + " '2000-01-14 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", + " '2000-01-03 00:00:00-05:00', '2000-01-02 00:00:00-05:00',\n", + " '2000-01-04 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", + " '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", + " '2000-01-10 00:00:00-05:00', '2000-01-07 00:00:00-05:00',\n", + " '2000-01-05 00:00:00-05:00', '2000-01-11 00:00:00-05:00',\n", + " '2000-01-09 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", + " '2000-01-11 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", + " '2000-01-14 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", + " '2000-01-10 00:00:00-05:00', '2000-01-09 00:00:00-05:00',\n", + " '2000-01-12 00:00:00-05:00', '2000-01-07 00:00:00-05:00',\n", + " '2000-01-06 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", + " '2000-01-01 00:00:00-05:00', '2000-01-08 00:00:00-05:00',\n", + " '2000-01-02 00:00:00-05:00', '2000-01-05 00:00:00-05:00',\n", + " '2000-01-15 00:00:00-05:00'],\n", + " dtype='datetime64[ns, America/New_York]', freq=None)\n" + ] + } + ], + "source": [ + "print(df2[\"instr\"].unique())\n", + "print(df2.index)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "205.6px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py new file mode 100644 index 000000000..d7d5f9e56 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py @@ -0,0 +1,304 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# Show Parquet / Pyarrow API. + +# %% [markdown] +# ## Imports + +# %% +import logging +import os +import random + +import pandas as pd +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.parquet as pq +from pyarrow.dataset import DirectoryPartitioning + +import helpers.hdbg as hdbg +import helpers.hio as hio + +hdbg.init_logger(verbosity=logging.INFO) +_LOG = logging.getLogger(__name__) + + +# %% +def get_df() -> pd.DataFrame: + """ + Create pandas random data, like: + + ``` + idx instr val1 val2 + 2000-01-01 0 A 99 30 + 2000-01-02 0 A 54 46 + 2000-01-03 0 A 85 86 + ``` + """ + instruments = "A B C D E".split() + "id stock val1 val2".split() + df_idx = pd.date_range( + pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-15"), freq="1D" + ) + # print(df_idx) + random.seed(1000) + + df = [] + for idx, inst in enumerate(instruments): + df_tmp = pd.DataFrame( + { + "idx": idx, + "instr": inst, + "val1": [random.randint(0, 100) for k in range(len(df_idx))], + "val2": [random.randint(0, 100) for k in range(len(df_idx))], + }, + index=df_idx, + ) + # print(df_tmp) + df.append(df_tmp) + df = pd.concat(df) + return df + + +# %% +def df_to_str(df: pd.DataFrame) -> str: + txt = "" + txt += "# df=\n%s" % df.head(3) + txt += "\n# df.shape=\n%s" % str(df.shape) + txt += "\n# df.dtypes=\n%s" % str(df.dtypes) + return txt + + +# %% [markdown] +# # Save and load all data in one file + +# %% +df = get_df() +# print(df.head()) +print(df_to_str(df)) + +# %% +table = pa.Table.from_pandas(df) + +print("table=\n%s" % table) + +# %% +# Save. +file_name = "df_in_one_file.pq" +pq.write_table(table, file_name) + +# %% +# Load. +df2 = pq.read_table(file_name) +print(df2) + +df2 = df2.to_pandas() +print(df_to_str(df2)) + +# %% [markdown] +# ## Read a subset of columns + +# %% +df2 = pq.read_table(file_name, columns=["idx", "val1"]) +print(df2) + +df2 = df2.to_pandas() +print(df_to_str(df2)) + +# %% [markdown] +# ## Partitioned dataset +# +# from https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data +# +# - A dataset can exploit a nested structure, where the sub-dir names hold information about which subset of the data is stored in that dir +# - E.g., "Hive" patitioning scheme "key=vale" dir names + +# %% +df = get_df() +print(df_to_str(df)) + +# %% +base = "." +dir_name = os.path.join(base, "parquet_dataset_partitioned") +os.system("rm -rf %s" % dir_name) + +pq.write_to_dataset(table, dir_name, partition_cols=["idx"]) + +# %% +# !ls parquet_dataset_partitioned + +# %% +# Read data back. +dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") + +print("\n".join(dataset.files)) + +# %% +# Read everything. +df2 = dataset.to_table().to_pandas() + +print(df_to_str(df2)) + +# %% +# Load part of the data. + +df2 = dataset.to_table(filter=ds.field("idx") == 1).to_pandas() +print(df_to_str(df2)) + +df2 = dataset.to_table(filter=ds.field("idx") < 3).to_pandas() +print(df_to_str(df2)) + +# %% [markdown] +# ## Add year-month partitions + +# %% +df = get_df() +df["year"] = df.index.year +df["month"] = df.index.month + +print(df_to_str(df)) + +# %% +table = pa.Table.from_pandas(df) + +print("table=\n%s" % table) + +# %% +base = "." +dir_name = os.path.join(base, "pq_partitioned2") +os.system("rm -rf %s" % dir_name) + +pq.write_to_dataset(table, dir_name, partition_cols=["idx", "year", "month"]) + +# %% +# !ls $dir_name + +# %% +# !ls $dir_name/idx=0/year=2000/month=1 + +# %% +# Read data back. +dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") + +print("\n".join(dataset.files)) + +# %% +# Read data back. +dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") + +df2 = dataset.to_table(filter=ds.field("idx") == 2).to_pandas() +print(df_to_str(df2)) + +# %% +# We could scan manually and create the dirs manually if we don't want to add +# add a new dir. +base = "." +dir_name = os.path.join(base, "parquet_dataset_partitioned2") +os.system("rm -rf %s" % dir_name) + +schemas = [] + +schema = pa.Table.from_pandas(df).schema +print(schema) +# assert 0 +# idx: int64 +# instr: string +# val1: int64 +# val2: int64 +# year: int64 +# month: int64 + +# grouped = df.groupby(lambda x: x.day) +group_by_idx = df.groupby("idx") +for idx, df_tmp in group_by_idx: + _LOG.debug("idx=%s -> df.shape=%s", idx, str(df_tmp.shape)) + # + group_by_year = df_tmp.groupby(lambda x: x.year) + for year, df_tmp2 in group_by_year: + _LOG.debug("year=%s -> df.shape=%s", year, str(df_tmp2.shape)) + # + group_by_month = df_tmp2.groupby(lambda x: x.month) + for month, df_tmp3 in group_by_month: + _LOG.debug("month=%s -> df.shape=%s", month, str(df_tmp3.shape)) + # file_name = "df_in_one_file.pq" + # pq.write_table(table, file_name) + # /app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet + subdir_name = os.path.join( + dir_name, f"idx={idx}", f"year={year}", f"month={month}" + ) + table = pa.Table.from_pandas(df_tmp3, schema=schema) + schemas.append(table.schema) + # print(df_tmp3) + # print(table.schema) + # pq.write_to_dataset(table, + # subdir_name, schema=schema) + file_name = os.path.join(subdir_name, "df_out.pq") + hio.create_enclosing_dir(file_name) + pq.write_table(table, file_name) + +# %% +schemas[0] == schemas[4] + +# %% +schemas + +# %% + +# %% +# !ls $dir_name/idx=0/year=2000/month=1 + +# %% +# Read data back. +# https://github.com/dask/dask/issues/4194 +# src_dir = f"{dir_name}/idx=0/year=2000/month=1" +src_dir = f"{dir_name}/idx=0/year=2000" +dataset = ds.dataset(src_dir, format="parquet", partitioning="hive") + +df2 = dataset.to_table().to_pandas() +# print(df_to_str(df2)) +print("\n".join(dataset.files)) + +# %% [markdown] +# ## Partition manually + +# %% +partitioning = DirectoryPartitioning( + pa.schema([("year", pa.int16()), ("month", pa.int8()), ("day", pa.int8())]) +) +print(partitioning.parse("/2009/11/3")) + +# partitioning.discover() + +# %% +# !ls /app/data + +# %% +dir_name = "/app/data" + +# Read data back. +dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") + +print("\n".join(dataset.files)) + +# %% +# Read everything. +df2 = dataset.to_table().to_pandas() + +print(df_to_str(df2)) + +# %% +print(df2["instr"].unique()) +print(df2.index) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb new file mode 100644 index 000000000..6dcf8078c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "81a273af", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:42.864614Z", + "start_time": "2021-06-16T11:41:42.860710Z" + } + }, + "outputs": [], + "source": [ + "# https://s3fs.readthedocs.io/en/latest/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8fef0639", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:42.888158Z", + "start_time": "2021-06-16T11:41:42.869135Z" + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 3" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "37fe11a3", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.140014Z", + "start_time": "2021-06-16T11:41:42.890655Z" + } + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import helpers.hs3 as hs3" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a4130a2c", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.145271Z", + "start_time": "2021-06-16T11:41:43.141535Z" + } + }, + "outputs": [], + "source": [ + "aws_profile = \"am\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a49a28ff", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.158474Z", + "start_time": "2021-06-16T11:41:43.148428Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "aws_region=%s us-east-1\n" + ] + } + ], + "source": [ + "# s3 = s3fs.S3FileSystem(anon=False, key=aws_access_key_id, secret=aws_secret_access_key)\n", + "\n", + "s3 = hs3.get_s3fs(aws_profile)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1795133f", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.819759Z", + "start_time": "2021-06-16T11:41:43.160432Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bucket=alphamatic-data\n" + ] + }, + { + "data": { + "text/plain": [ + "['alphamatic-data/README.md', 'alphamatic-data/data']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bucket = hs3.get_s3_bucket_path(aws_profile, add_s3_prefix=False)\n", + "print(\"bucket=\" + bucket)\n", + "s3.ls(bucket)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9bc9623e", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.828493Z", + "start_time": "2021-06-16T11:41:43.822315Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['alphamatic-data/README.md', 'alphamatic-data/data']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s3.ls(bucket)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "65f95a8a", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.839153Z", + "start_time": "2021-06-16T11:41:43.832520Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(os.environ[\"AWS_DEFAULT_REGION\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py new file mode 100644 index 000000000..65aa9d9f8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py @@ -0,0 +1,44 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +# %% +# https://s3fs.readthedocs.io/en/latest/ + +# %% +# %load_ext autoreload +# %autoreload 3 + +# %% +import os + +import helpers.hs3 as hs3 + +# %% +aws_profile = "am" + +# %% +# s3 = s3fs.S3FileSystem(anon=False, key=aws_access_key_id, secret=aws_secret_access_key) + +s3 = hs3.get_s3fs(aws_profile) + +# %% +bucket = hs3.get_s3_bucket_path(aws_profile, add_s3_prefix=False) +print("bucket=" + bucket) +s3.ls(bucket) + +# %% +s3.ls(bucket) + +# %% +print(os.environ["AWS_DEFAULT_REGION"]) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb new file mode 100644 index 000000000..9f3df144d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb @@ -0,0 +1,448 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ae351077", + "metadata": {}, + "source": [ + "# Maple\n", + "\n", + "https://www.sagemath.org/" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "67b105e6", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T08:59:38.281663Z", + "start_time": "2022-11-24T08:59:32.166395Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting sagemath\n", + " Downloading sagemath-1.3.0.tar.gz (9.4 kB)\n", + "Collecting cython>=0.26\n", + " Downloading Cython-0.29.32-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (1.9 MB)\n", + "\u001b[K |████████████████████████████████| 1.9 MB 3.2 MB/s eta 0:00:01\n", + "\u001b[?25hBuilding wheels for collected packages: sagemath\n", + " Building wheel for sagemath (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for sagemath: filename=sagemath-1.3.0-py3-none-any.whl size=9330 sha256=eb8efd936116026e66a021d4bdd88dc4d9ce207fd633706229625d26878de267\n", + " Stored in directory: /root/.cache/pip/wheels/da/63/1f/6dc0b464e0fec31a0d318d11748e11be903fe893fd6fb713fe\n", + "Successfully built sagemath\n", + "Installing collected packages: cython, sagemath\n", + "Successfully installed cython-0.29.32 sagemath-1.3.0\n" + ] + } + ], + "source": [ + "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install sagemath)\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70f1c613", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "8dd49c0c", + "metadata": {}, + "source": [ + "# Sympy" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bab397f4", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T08:59:31.082906Z", + "start_time": "2022-11-24T08:59:08.303577Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting sympy\n", + " Downloading sympy-1.11.1-py3-none-any.whl (6.5 MB)\n", + "\u001b[K |████████████████████████████████| 6.5 MB 4.4 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting mpmath>=0.19\n", + " Downloading mpmath-1.2.1-py3-none-any.whl (532 kB)\n", + "\u001b[K |████████████████████████████████| 532 kB 6.2 MB/s eta 0:00:01\n", + "\u001b[?25hInstalling collected packages: mpmath, sympy\n", + "Successfully installed mpmath-1.2.1 sympy-1.11.1\n" + ] + } + ], + "source": [ + "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install sympy)\"" + ] + }, + { + "cell_type": "markdown", + "id": "c32a78b2", + "metadata": {}, + "source": [ + "## Features\n", + "\n", + "https://docs.sympy.org/latest/tutorials/intro-tutorial/features.html#" + ] + }, + { + "cell_type": "markdown", + "id": "547104ae", + "metadata": {}, + "source": [ + "## Logic\n", + "\n", + "https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html#what-is-symbolic-computation" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "016ffec6", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T08:59:48.097485Z", + "start_time": "2022-11-24T08:59:47.660109Z" + } + }, + "outputs": [], + "source": [ + "import sympy\n", + "from sympy import * # noqa: F403" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "15a65c7c", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:01:58.628860Z", + "start_time": "2022-11-24T09:01:58.614742Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle y \\vee \\left(x \\wedge y\\right)$" + ], + "text/plain": [ + "y | (x & y)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x, y = sympy.symbols(\"x,y\")\n", + "y | (x & y)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c016e526", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:02:16.425181Z", + "start_time": "2022-11-24T09:02:16.418742Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle x \\Rightarrow y$" + ], + "text/plain": [ + "Implies(x, y)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x >> y" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "961ab5b7", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:02:36.687945Z", + "start_time": "2022-11-24T09:02:36.681518Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle \\text{True}$" + ], + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Evaluate an expression.\n", + "(y & x).subs({x: True, y: True})" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d36a6df4", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:03:53.122377Z", + "start_time": "2022-11-24T09:03:53.108926Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle \\left(x \\wedge \\neg w\\right) \\vee \\left(y \\wedge z \\wedge \\neg x\\right)$" + ], + "text/plain": [ + "(x & ~w) | (y & z & ~x)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "w, x, y, z = sympy.symbols(\"w x y z\")\n", + "minterms = [{w: 0, x: 1}, {y: 1, z: 1, x: 0}]\n", + "sympy.SOPform([w, x, y, z], minterms)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "351f8a29", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:04:52.260031Z", + "start_time": "2022-11-24T09:04:52.244286Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle \\neg x \\wedge \\neg y$" + ], + "text/plain": [ + "~x & ~y" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b = (~x & ~y & ~z) | (~x & ~y & z)\n", + "sympy.simplify_logic(b)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "6997a50b", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:07:01.070407Z", + "start_time": "2022-11-24T09:07:01.063092Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 0] -> True\n", + "[0, 1] -> True\n", + "[1, 0] -> False\n", + "[1, 1] -> True\n" + ] + } + ], + "source": [ + "# Compute truth table.\n", + "from sympy.logic.boolalg import truth_table # noqa: E402\n", + "\n", + "table = truth_table(x >> y, [x, y])\n", + "for t in table:\n", + " print(f\"{t[0]} -> {t[1]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "c70e51cf", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:08:01.433951Z", + "start_time": "2022-11-24T09:08:01.298800Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sympy.satisfiable(x & ~x)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f9d0eda7", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:08:14.379803Z", + "start_time": "2022-11-24T09:08:14.364702Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{y: True, x: True}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sympy.satisfiable((x | y) & (x | ~y) & (~x | y))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "782bd93c", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:28:42.188931Z", + "start_time": "2022-11-24T09:28:42.124276Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{L: True, Q: True, B: False, N: False}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# - (not L => Q and B and N)\n", + "# - (N => not L)\n", + "# - not Q => B\n", + "# - not B\n", + "\n", + "L, N, Q, B = sympy.symbols(\"L N Q B\")\n", + "\n", + "C = (\n", + " sympy.Implies(~L, Q & B & N)\n", + " & sympy.Implies(N, ~L)\n", + " & sympy.Implies(~Q, B)\n", + " & ~B\n", + ")\n", + "sympy.satisfiable(C)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1298f34b", + "metadata": {}, + "outputs": [], + "source": [ + "## Stats\n", + "\n", + "# https://docs.sympy.org/latest/modules/stats.html#" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py new file mode 100644 index 000000000..bd5b8a5aa --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py @@ -0,0 +1,98 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Maple +# +# https://www.sagemath.org/ + +# %% +# !sudo /bin/bash -c "(source /venv/bin/activate; pip install sagemath)" + +# %% + +# %% [markdown] +# # Sympy + +# %% +# !sudo /bin/bash -c "(source /venv/bin/activate; pip install sympy)" + +# %% [markdown] +# ## Features +# +# https://docs.sympy.org/latest/tutorials/intro-tutorial/features.html# + +# %% [markdown] +# ## Logic +# +# https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html#what-is-symbolic-computation + +# %% +import sympy +from sympy import * # noqa: F403 + +# %% +x, y = sympy.symbols("x,y") +y | (x & y) + +# %% +x >> y + +# %% +# Evaluate an expression. +(y & x).subs({x: True, y: True}) + +# %% +w, x, y, z = sympy.symbols("w x y z") +minterms = [{w: 0, x: 1}, {y: 1, z: 1, x: 0}] +sympy.SOPform([w, x, y, z], minterms) + +# %% +b = (~x & ~y & ~z) | (~x & ~y & z) +sympy.simplify_logic(b) + +# %% +# Compute truth table. +from sympy.logic.boolalg import truth_table # noqa: E402 + +table = truth_table(x >> y, [x, y]) +for t in table: + print(f"{t[0]} -> {t[1]}") + +# %% +sympy.satisfiable(x & ~x) + +# %% +sympy.satisfiable((x | y) & (x | ~y) & (~x | y)) + +# %% +# - (not L => Q and B and N) +# - (N => not L) +# - not Q => B +# - not B + +L, N, Q, B = sympy.symbols("L N Q B") + +C = ( + sympy.Implies(~L, Q & B & N) + & sympy.Implies(N, ~L) + & sympy.Implies(~Q, B) + & ~B +) +sympy.satisfiable(C) + +# %% +## Stats + +# https://docs.sympy.org/latest/modules/stats.html# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py new file mode 100644 index 000000000..7550952ca --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py @@ -0,0 +1,192 @@ +""" +Import as: + +import helpers.old.conda as holdcond +""" + +import json +import logging +import os +from typing import Any, Dict, List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hsystem as hsystem +import helpers.old.user_credentials as holuscre + +_LOG = logging.getLogger(__name__) + + +def conda_system(cmd: str, *args: Any, **kwargs: Any) -> int: + """ + When running a conda command we need to execute a script to configure + conda. This script is typically executed in .bashrc but here we create a + new bash shell every time to execute a command, so we need to re-initialize + the shell before any conda command. + + :param cmd: + :param args: + :param kwargs: + :return: + """ + # TODO(gp): Pass conda_env_name as done in get_conda_list() + path = holuscre.get_credentials()["conda_sh_path"] + hdbg.dassert_path_exists(path) + hdbg.dassert(os.path.isfile(path), "'%s' is not a file", path) + cmd = f"source {path} && {cmd}" + output: int = hsystem.system(cmd, *args, **kwargs) + return output + + +def conda_system_to_string( + cmd: str, *args: Any, **kwargs: Any +) -> Tuple[int, str]: + path = holuscre.get_credentials()["conda_sh_path"] + hdbg.dassert_path_exists(path) + hdbg.dassert(os.path.isfile(path), "'%s' is not a file", path) + cmd = f"source {path} && {cmd}" + output: Tuple[int, str] = hsystem.system_to_string(cmd, *args, **kwargs) + return output + + +def get_conda_envs_dirs() -> List[str]: + """ + :return: list of the env dirs from conda + """ + _, ret = conda_system_to_string(r"conda config --show envs_dirs --json") + _LOG.debug("ret=%s", ret) + envs = json.loads(ret) + hdbg.dassert_in("envs_dirs", envs) + envs = envs["envs_dirs"] + hdbg.dassert_isinstance(envs, list) + return list(envs) + + +def set_conda_env_root(conda_env_path: str) -> None: + """ + Set conda env dirs so that it matches what specified in. + + > conda config --show envs_dirs --json + { + "envs_dirs": [ + "/Users/gp/.conda/envs", + ] + } + + > conda config --prepend envs_dirs /data/gp_wd/anaconda2/envs2 + """ + envs = get_conda_envs_dirs() + # + if not envs or envs[0] != conda_env_path: + _LOG.warning( + "%s is not the first env dir in %s", conda_env_path, str(envs) + ) + # Reset the list of conda envs. + _LOG.debug("Resetting envs_dir %s", str(envs)) + for env in envs: + _LOG.debug("Deleting %s", env) + cmd = f"conda config --remove envs_dirs {env}" + # We don't abort because of a bug in conda not deleting the key + # when asked for. + # CondaKeyError: 'envs_dirs': u'/data/shared/anaconda2/envs' is not + # in the u'envs_dirs' key of the config file + conda_system(cmd, abort_on_error=False) + envs = get_conda_envs_dirs() + _LOG.debug("Current envs: %s", str(envs)) + # Add the conda env. + cmd = f"conda config --prepend envs_dirs {conda_env_path}" + conda_system(cmd) + # Check. + envs = get_conda_envs_dirs() + hdbg.dassert( + envs or envs[0] != conda_env_path, + msg=f"{conda_env_path} is not first env dir in {envs}", + ) + else: + _LOG.debug( + "Nothing to do, since %s is already in %s", conda_env_path, envs + ) + + +def get_conda_info_envs() -> Tuple[dict, None]: + """ + :return: (env_dict, active_env) + - env_dict: map 'conda env name -> conda env path' + - active_env: name of the active conda env + """ + # > conda info --envs + # # conda environments: + # # + # aws /Users/gp/.conda/envs/aws + # bbg /Users/gp/.conda/envs/bbg + # deeplearning /Users/gp/.conda/envs/deeplearning + # jupyter /Users/gp/.conda/envs/jupyter + # test_conda /Users/gp/.conda/envs/test_conda + # TODO(gp): Use --json but we need to parse the json without any module. + ret = conda_system_to_string(r"conda info --envs")[1] + _LOG.debug("Parsing conda info\n%s", ret) + ret = ret.split("\n") + env_dict = {} + active_env = None + for line in ret: + line = line.rstrip().lstrip() + if line == "": + continue + if line.startswith("#"): + continue + vals = line.split() + if len(vals) == 2: + env_name, env_path = vals + env_dict[env_name] = env_path + elif len(vals) == 3: + env_name, star, env_path = vals + hdbg.dassert_eq(star, "*") + env_dict[env_name] = env_path + else: + _LOG.debug("Can't parse line='%s'", line) + return env_dict, active_env + + +def get_conda_list(conda_env_name: str) -> Dict[str, Dict[str, str]]: + """ + :return: env_dict mapping package name to their info + - env_dict: map 'conda env name -> conda env path' + - active_env: name of the active conda env + """ + # > conda list + # # packages in environment at /Users/gp/.conda/envs/: + # # + # # Name Version Build Channel + # absl-py 0.5.0 py_0 conda-forge + # agate 1.6.0 py_3 conda-forge + # agate-dbf 0.2.0 py27_0 conda-forge + # agate-excel 0.2.2 py_0 conda-forge + # TODO(gp): Use --json but we need to parse the json without any module. + cmd = rf"(conda activate {conda_env_name} 2>&1) >/dev/null && conda list" + ret = conda_system_to_string(cmd)[1] + ret = ret.split("\n") + env_dict = {} + labels = {1: "version", 2: "build", 3: "channel"} + for line in ret: + line = line.rstrip().lstrip() + _LOG.debug("line='%s'", line) + if line == "": + continue + if line.startswith("#"): + continue + vals = line.split() + env_dict[vals[0]] = {labels[k]: vals[k] for k in range(1, len(vals[:4]))} + return env_dict + + +_CONDA_PATH = None + + +def get_conda_path() -> Optional[str]: + global _CONDA_PATH + if not _CONDA_PATH: + rc, txt = conda_system_to_string("which conda", abort_on_error=False) + if rc == 0: + _CONDA_PATH = str(txt) + else: + _CONDA_PATH = "n/a" + return _CONDA_PATH diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py new file mode 100644 index 000000000..5b0445a31 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py @@ -0,0 +1,17 @@ +import pathlib +from typing import Any, Optional + + +def pytest_ignore_collect( # type: ignore + collection_path: pathlib.Path, path: Any, config: Any +) -> Optional[bool]: + """ + Skip all tests in this directory. + + :param collection_path: path to analyze + :param path: path to analyze (deprecated) + :param config: pytest config object + :return: True if the path should be ignored + """ + # Ignore this directory and all its subdirectories. + return True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py new file mode 100644 index 000000000..f51cb5d8d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py @@ -0,0 +1,75 @@ +""" +Import as: + +import helpers.old.env2 as holdenv2 +""" + +import logging +import os +from typing import Tuple + +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.old.conda as holdcond + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +def get_system_info(add_frame: bool) -> str: + msg = "" + if add_frame: + msg += hprint.frame("System info") + "\n" + msg += f"user name={hsystem.get_user_name()}\n" + msg += f"server name={hsystem.get_server_name()}\n" + msg += f"os name={hsystem.get_os_name()}\n" + msg += f"conda path={holdcond.get_conda_path()}\n" + msg += f"conda env root={str(holdcond.get_conda_envs_dirs())}\n" + return msg + + +def get_package_summary(conda_env_name: str, add_frame: bool) -> str: + msg = "" + if add_frame: + msg += hprint.frame("Package summary") + "\n" + conda_list = holdcond.get_conda_list(conda_env_name) + msg = "" + for package in ["pandas", "numpy", "scipy", "arrow-cpp"]: + ver = conda_list[package]["version"] if package in conda_list else "None" + line = f"{package}: {ver}" + msg += line + "\n" + return msg + + +def get_conda_export_list(conda_env_name: str, add_frame: bool) -> str: + msg = "" + if add_frame: + msg += hprint.frame("Package summary") + "\n" + cmd = rf"(conda activate {conda_env_name} 2>&1 >/dev/null) && conda list --export" + _, msg_tmp = holdcond.conda_system_to_string(cmd) + msg += msg_tmp + return msg + + +def save_env_file(conda_env_name: str, dir_name: str) -> Tuple[str, str]: + msg = "" + msg += get_system_info(add_frame=True) + msg += get_package_summary(conda_env_name, add_frame=True) + msg += get_conda_export_list(conda_env_name, add_frame=True) + # Save results. + if dir_name is not None: + file_name = ( + f"{conda_env_name}.{hsystem.get_user_name()}.{hsystem.get_os_name()}." + f"{hsystem.get_server_name()}.txt" + ) + dst_file = os.path.join(dir_name, file_name) + dst_file = os.path.abspath(dst_file) + hio.create_enclosing_dir(dst_file, incremental=True) + _LOG.info("Saving conda env signature to '%s'", dst_file) + hio.to_file(dst_file, msg) + else: + dst_file = None + return msg, dst_file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py new file mode 100644 index 000000000..a9d6b4f46 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py @@ -0,0 +1,267 @@ +""" +Import as: + +import helpers.old.tunnels as holdtunn +""" + +import logging +import os +from typing import Any, Dict, List, Tuple, Union, cast + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.old.user_credentials as holuscre + +_LOG = logging.getLogger(__name__) + + +def _get_services_info() -> list: + # Server ports. + services = [ + # service name, server public IP, local port, remote port. + ("MongoDb", hsystem.get_env_var("OLD_DEV_SERVER"), 27017, 27017), + ("Jenkins", hsystem.get_env_var("JENKINS_SERVER"), 8080, 8080), + # ("Reviewboard", hsystem.get_env_var("REVIEWBOARD_SERVER"), 8000, 8000), + # ("Doc server", hsystem.get_env_var("REVIEWBOARD_SERVER"), 8001, 80), + # Netdata to Jenkins and Dev server. + # ("Dev system performance", DEV_SERVER, 19999), + # ("Jenkins system performance", DEV_SERVER, 19999), + ] + return services + + +# ############################################################################# + + +def get_tunnel_info() -> Tuple[list, str]: + credentials = holuscre.get_credentials() + # + tunnel_info = credentials["tunnel_info"] + hdbg.dassert_is_not(tunnel_info, None) + # Add tunnels for standard services. + services = _get_services_info() + tunnel_info.extend(services) + # + ssh_key_path = credentials["ssh_key_path"] + hdbg.dassert_is_not(ssh_key_path, None) + # TODO(gp): Add check to make sure that the source ports are all different. + return tunnel_info, ssh_key_path + + +def tunnel_info_to_string(tunnel_info: list) -> str: + ret = "\n".join(map(str, tunnel_info)) + ret = hprint.indent(ret) + return ret + + +def parse_service( + service: Tuple[str, str, int, int], +) -> Dict[str, Union[str, int]]: + hdbg.dassert_eq(len(service), 4, "service=%s", service) + service_name, server, local_port, remote_port = service + return { + "service_name": service_name, + "server": server, + "local_port": local_port, + "remote_port": remote_port, + } + + +def find_service( + service_name: str, tunnel_info: list +) -> Tuple[str, str, int, int]: + found_service = False + for service in tunnel_info: + if service_name == parse_service(service)["service_name"]: + hdbg.dassert(not found_service) + found_service = True + ret: Tuple[str, str, int, int] = service + hdbg.dassert(found_service) + return ret + + +def get_server_ip(service_name: str) -> str: # pylint: disable=unused-argument + tunnel_info, _ = get_tunnel_info() + _LOG.debug("tunnels=\n%s", tunnel_info_to_string(tunnel_info)) + service = find_service("Doc server", tunnel_info) + server = parse_service(service)["server"] + server = cast(str, server) + return server + + +def _get_tunnel_info() -> Tuple[Any, str]: + credentials = holuscre.get_credentials() + # + tunnel_info = credentials["tunnel_info"] + hdbg.dassert_is_not(tunnel_info, None) + # Add tunnels for standard services. + services = _get_services_info() + tunnel_info.extend(services) + # + ssh_key_path = credentials["ssh_key_path"] + hdbg.dassert_is_not(ssh_key_path, None) + # TODO(gp): Add check to make sure that the source ports are all different. + return tunnel_info, ssh_key_path + + +def _tunnel_info_to_string(tunnel_info: list) -> str: + ret = "\n".join(map(str, tunnel_info)) + ret = hprint.indent(ret) + return ret + + +def _service_to_string(service: Tuple[str, str, str, str]) -> str: + service_name, server, local_port, remote_port = service + ret = ( + f"tunnel for service '{service_name}'" + + f" server='{server}'" + + f" port='{local_port}->{remote_port}'" + ) + return ret + + +# ############################################################################# + + +def _get_ssh_tunnel_process( + local_port: int, remote_port: int, fuzzy_match: bool +) -> Tuple[List[int], str]: + """ + Return the pids of the processes attached to a given port. + """ + + def _keep_line(line: str) -> bool: + keep = "ssh -i" in line + if keep: + if fuzzy_match: + keep = (f" {local_port}:localhost " in line) or ( + f" localhost:{remote_port} " in line + ) + else: + keep = f" {local_port}:localhost:{remote_port} " in line + return keep + + _LOG.debug("local_port=%d -> remote_port=%d", local_port, remote_port) + pids, txt = hsystem.get_process_pids(_keep_line) + _LOG.debug("pids=%s", pids) + _LOG.debug("txt=\n%s", txt) + return pids, txt + + +def _create_tunnel( + server_name: str, + local_port: int, + remote_port: int, + user_name: str, + ssh_key_path: str, +) -> None: + """ + Create tunnel from localhost to 'server' for the ports `local_port -> + remote_port` and `user_name`. + """ + ssh_key_path = os.path.expanduser(ssh_key_path) + _LOG.debug("ssh_key_path=%s", ssh_key_path) + hdbg.dassert_path_exists(ssh_key_path) + # + cmd = ( + "ssh -i {ssh_key_path} -f -nNT -L {local_port}:localhost:{remote_port}" + + " {user_name}@{server}" + ) + cmd = cmd.format( + user_name=user_name, + ssh_key_path=ssh_key_path, + local_port=local_port, + remote_port=remote_port, + server=server_name, + ) + hsystem.system(cmd, blocking=False) + # Check that the tunnel is up and running. + pids = _get_ssh_tunnel_process(local_port, remote_port, fuzzy_match=True) + hdbg.dassert_lte(1, len(pids)) + + +def _kill_ssh_tunnel_process(local_port: int, remote_port: int) -> None: + """ + Kill all the processes attached to either local or remote port. + """ + get_pids = lambda: _get_ssh_tunnel_process( + local_port, remote_port, fuzzy_match=True + ) + hsystem.kill_process(get_pids) + + +# ############################################################################# + + +def start_tunnels(user_name: str) -> None: + """ + Start all the tunnels for the given user. + """ + _LOG.debug("user_name=%s", user_name) + # Get tunnel info. + tunnel_info, ssh_key_path = _get_tunnel_info() + _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) + # + for service in tunnel_info: + _, server, local_port, remote_port = service + pids, _ = _get_ssh_tunnel_process( + local_port, remote_port, fuzzy_match=False + ) + if not pids: + _LOG.info("Starting %s", _service_to_string(service)) + _create_tunnel( + server, local_port, remote_port, user_name, ssh_key_path + ) + else: + _LOG.warning( + "%s already exists: skipping", _service_to_string(service) + ) + + +def stop_tunnels() -> None: + """ + Stop all the tunnels for the given user. + """ + # Get the tunnel info. + tunnel_info, _ = _get_tunnel_info() + _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) + # + for service in tunnel_info: + _, _, local_port, remote_port = service + _LOG.info("Stopping %s", _service_to_string(service)) + _kill_ssh_tunnel_process(local_port, remote_port) + + +def check_tunnels() -> None: + """ + Check the status of the tunnels for the given user. + """ + # Get the tunnel info. + tunnel_info, _ = _get_tunnel_info() + _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) + # + for service in tunnel_info: + _, _, local_port, remote_port = service + pids, _ = _get_ssh_tunnel_process( + local_port, remote_port, fuzzy_match=False + ) + if pids: + msg = f"exists with pid={pids}" + else: + msg = "doesn't exist" + _LOG.info("%s -> %s", _service_to_string(service), msg) + + +def kill_all_tunnel_processes() -> None: + """ + Kill all the processes that have `ssh -i ...:localhost:...". + """ + + # cmd = "ps ax | grep 'ssh -i' | grep localhost: | grep -v grep" + def _keep_line(line: str) -> bool: + keep = ("ssh -i" in line) and (":localhost:" in line) + return keep + + get_pids = lambda: hsystem.get_process_pids(_keep_line) + hsystem.kill_process(get_pids) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py new file mode 100755 index 000000000..5faded15d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python +""" +Import as: + +import helpers.old.user_credentials as holuscre +""" + +import argparse +import logging +import os +import pprint +from typing import Any, Dict, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hparser as hparser +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def get_dev_server_ip() -> str: + """ + Get the dev server name from the user environment. + """ + env_var_name = "" + if env_var_name not in os.environ: + _LOG.error( + "Can't find '%s': re-run dev_scripts/setenv.sh?", env_var_name + ) + raise RuntimeError + dev_server = os.environ[env_var_name] + return dev_server + + +# pylint: disable=too-many-statements +def get_credentials() -> Dict[str, Any]: + """ + Report information about a user set-up as a function of: 1) user name 2) + server name 3) git repository name. + + The mandatory information are: + 1) git_user_name + 2) git_user_email + 3) conda_sh_path: the path of the script bootstrapping conda + - To find "conda_sh_path": + > which conda + /data/root/anaconda3/bin/conda + > find /data/root/anaconda3 -name "conda.sh" + - In one instruction: + > CONDA_DIR=$(dirname $(which conda))"/.."; find $CONDA_DIR -name "conda.sh" + - If there are multiple ones you want to pick the one under + `profile.d`, e.g., `/anaconda3/etc/profile.d/conda.sh` + 4) conda_env_path: the path of the dir storing the conda environments + - To find "conda_env_path" + > conda info + ... + envs directories : /data/saggese/.conda/envs + + The optional information are: + 5) ssh_key_path: the path of the ssh key to use + 6) tunnel_info: list of "personal" ports to forward + - This is an advanced behavior that allows to specify in your user + config a set of ports to forward from one computer (typically your + laptop) to a set of services that are specific of your set-up (e.g., + started through `run_jupyter_server.py`) + - E.g., + ```python + if server_name in ("gpmac.local", "gpmac.lan"): + if git_repo_name == "": + service = ("Jupyter1", get_dev_server_ip(), 10003, 10003) + ``` + when GP runs `ssh_tunnels.py` from his laptop in a + `` client, a tunnel is open to the dev + server where `run_jupyter_server.py` will have started a notebook server + 7) jupyter_port: on which port to start a jupyter server on a specific server + - It's a good idea for everybody to have a different port to avoid port + collisions + 8) notebook_html_path: the path where to save html of notebooks + 9) notebook_backup_path: the path where to backup the source .ipynb code of + notebooks + """ + # + user_name = hsystem.get_user_name() + server_name = hsystem.get_server_name() + _LOG.debug("user_name='%s'", user_name) + _LOG.debug("server_name='%s'", server_name) + git_repo_name = hgit.get_repo_full_name_from_client(super_module=True) + # Values to assign. + git_user_name = "" + git_user_email = "" + conda_sh_path = "" + ssh_key_path = "~/.ssh/id_rsa" + tunnel_info: List[Tuple[str, str, str, str]] = [] + jupyter_port = -1 + notebook_html_path = "" + notebook_backup_path = "" + # + conda_env_path = "~/.conda/envs" + conda_env_path = os.path.expanduser(conda_env_path) + if server_name in (): + conda_sh_path = "/anaconda3/etc/profile.d/conda.sh" + if user_name == "saggese": + # GP. + git_user_name = "saggese" + git_user_email = "abc@xyz.com" + if server_name.startswith("gpmac") or server_name.startswith( + "giacintos-mbp" + ): + # Laptop. + conda_sh_path = "/Users/saggese/opt/anaconda3/etc/profile.d/conda.sh" + conda_env_path = "/Users/saggese/.conda/envs" + if git_repo_name == "": + # Forward port 10003 to the notebook server that is started by + # `run_jupyter_server.py` when executed on the dev server. + # service = ("Jupyter1", get_dev_server_ip(), 10003, 10003) + # tunnel_info.append(service) + # jupyter_port = 10001 + pass + elif server_name == "": + if git_repo_name == "": + jupyter_port = 10003 + else: + hdbg.dassert_ne(conda_sh_path, "") + elif user_name == "paul": + # Paul. + git_user_name = "paul" + git_user_email = "abc@xyz.com" + if server_name in ("Pauls-MacBook-Pro.local", "Pauls-MBP"): + conda_sh_path = "/Users/paul/anaconda3/etc/profile.d/conda.sh" + conda_env_path = "/Users/paul/.conda/envs" + # Check. + for var_name, val_name in [ + ("git_user_name", git_user_name), + ("git_user_email", git_user_email), + ("conda_sh_path", conda_sh_path), + ("conda_env_path", conda_env_path), + # We allow the rest of the variables (e.g., ssh_key_path, tunnel_info) to + # be empty since in some configurations they can be undefined. + ]: + hdbg.dassert_is_not( + val_name, + None, + "Undefined '%s': add your credentials for user_name='%s' and " + "server_name='%s' to '%s'", + var_name, + user_name, + server_name, + __file__, + ) + conda_sh_path = os.path.expanduser(conda_sh_path) + conda_sh_path = os.path.abspath(conda_sh_path) + hdbg.dassert_path_exists(conda_sh_path) + # + conda_env_path = os.path.abspath(os.path.expanduser(conda_env_path)) + # Not necessarily the conda_env_path exists. + if not os.path.exists(conda_env_path): + _LOG.warning("The dir '%s' doesn't exist: creating it", conda_env_path) + hio.create_dir(conda_env_path, incremental=True) + hdbg.dassert_path_exists(os.path.dirname(conda_env_path)) + # + for service in tunnel_info: + # TODO(gp): We should call in ssh_tunnels.py to keep this encapsulated. + hdbg.dassert_eq(len(service), 4) + service_name, server, local_port, remote_port = service + _ = service_name, server, local_port, remote_port + ret = { + "git_user_name": git_user_name, + "git_user_email": git_user_email, + "conda_sh_path": conda_sh_path, + "conda_env_path": conda_env_path, + "ssh_key_path": ssh_key_path, + "tunnel_info": tunnel_info, + "jupyter_port": jupyter_port, + "notebook_html_path": notebook_html_path, + "notebook_backup_path": notebook_backup_path, + } + _LOG.debug("Credentials: %s", ret) + return ret + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--user", action="store", default=None, help="Impersonate a user" + ) + hparser.add_verbosity_arg(parser) + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + if args.user: + hsystem.set_user_name(args.user) + usc = get_credentials() + pprint.pprint(usc) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh new file mode 100644 index 000000000..45acd8194 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh @@ -0,0 +1,113 @@ +#!/bin/sh + +# NOTE TO MAINTAINERS: this must be updated each time a new texlive is +# released! +default_version=2024 +tlversion=${1:-"$default_version"} +installer_archive=install-tl-unx.tar.gz + +usage () +{ + printf 'Install TeXLive\n' + printf 'Usage: %s [OPTIONS]\n\n' "$0" + printf 'Options:\n' + printf ' -t: TeXLive version (default %s)\n' "$default_version" + printf ' -m: mirror URL\n' +} + +if ! args=$(getopt 't:m:' "$@"); then + usage && exit 1 +fi +# The variable is intentionally left unquoted. +# shellcheck disable=SC2086 +set -- $args + +tlversion= +mirror_url= + +while true; do + case "$1" in + (-t) + tlversion="${2}" + shift 2 + ;; + (-m) + mirror_url="${2}" + shift 2 + ;; + (--) + shift + break + ;; + (*) + printf 'Unknown option: %s\n' "$1" + usage + exit 1 + ;; + esac +done + +[ -n "$tlversion" ] || tlversion="$default_version" + +if [ -z "$mirror_url" ] && [ "$tlversion" != "$default_version" ]; then + # Default mirror for historic releases + mirror_url="ftp://tug.org/historic/" +fi + +if [ -z "$mirror_url" ]; then + # Get the mirror URL from the redirect. Otherwise, if we were to + # always use the mirror URL, we'd run into problems whenever we get + # installer and signatures from different mirrors that are not 100% + # in sync. + mirror_url=$(wget -4 --quiet --output-document=/dev/null \ + --server-response \ + http://mirror.ctan.org/ \ + 2>&1 | \ + sed -ne 's/.*Location: \(.*\)$/\1/p' | head -n 1) +fi + +# Trim trailing slash(es) +mirror_url=$(echo "$mirror_url" | sed -e 's/\/*$//') + +if [ "$tlversion" = "$default_version" ]; then + installer_url="$mirror_url/systems/texlive/tlnet/" + repository= +else + installer_url="$mirror_url/systems/texlive/$tlversion/tlnet-final/" + repository=$installer_url +fi + +# Log the installer and repository url +printf 'installer URL: %s\n' "${installer_url}" +printf 'repository: %s\n' "${repository}" + +# Download the install-tl perl script. The archive integrity and signature is +# verified later, so it's ok if we use an insecure connection. +wget -4 --no-verbose --no-check-certificate \ + "$installer_url/$installer_archive" \ + "$installer_url/$installer_archive".sha512 \ + "$installer_url/$installer_archive".sha512.asc \ + || exit 1 + +## Verifiy installer integrity +# get current signing key +gpg --keyserver keyserver.ubuntu.com \ + --receive-key 0xC78B82D8C79512F79CC0D7C80D5E5D9106BAB6BC || exit 5 +gpg --verify "$installer_archive".sha512.asc || exit 5 +sha512sum "$installer_archive".sha512 || exit 5 + +## Proceed with installation +# Extract installer +mkdir -p ./install-tl +tar --strip-components 1 -zvxf "$installer_archive" -C "$PWD/install-tl" \ + || exit 1 + +# Run the default installation with the specified profile. +./install-tl/install-tl ${repository:+-repository "$repository"} \ + --profile=/root/texlive.profile + +# Cleanup installation artifacts. +rm -rf ./install-tl \ + "$installer_archive" \ + "$installer_archive.sha512" \ + "$installer_archive.sha512.asc" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt new file mode 100644 index 000000000..9e4ccf64f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt @@ -0,0 +1,115 @@ +# Packages listed in https://pandoc.org/MANUAL.html#creating-a-pdf + +######################################################################### +### Packages required by pandoc +amsfonts # math fonts +amsmath # math commands +babel # required when pandoc is used with lang +beamer # for presentations +bidi # used by xelatex if the `dir` variable is set +bookmark # bookmarks with hyperref +booktabs # nicer-looking tables +caption # customize captions in floating envs; required for beamer +csquotes # used for typography if the `csquotes` variable is set to true +euler # use AMS Euler fonts for math +eurosym # Metafont and macros for Euro sign +fancyvrb # Verbatim environments for code blocks +framed # Needed with certain `--highlight-style` options +geometry # required if the `geometry` variable set +graphics # required if the document contains images +hyperref # hyperlinks +listings # if the `--listing` option is used +lm # Latin modern fonts +lm-math # Latin modern fonts for math +memoir # frequently used document class +multirow # Tabular cells spanning multiple rows +pgf # for TikZ and beamer +setspace # required if the `linestretch` variable is used +soul # required for underlined text +subfig # Figures broken into subfigures +tools # the LaTeX standard tools bundle; e.g., calc, longtable +xcolor # colors + +# Deprecated! Only used by older pandoc versions before 3.0. +ulem + +######################################################################### +### Semi-optional packages +# +# The following packages will be used to improve output quality if +# present, but pandoc does not require them to be present: +footnotehyper # to allow footnotes in tables +microtype # for better spacing adjustments +parskip # for better inter-paragraph spaces +upquote # for straight quotes in verbatim environments +xurl # for better line breaks in URLs + +######################################################################### +### Intentionally **NOT** installed due to size constraints. +# +#xeCJR # If CJKmainfont is set, xeCJK is needed. + +######################################################################### +### Required when using pandoc-crossref +cleveref # Intelligent cross-referencing +float # Improved interface for floating objects + +######################################################################### +### Extra engines and packages for XeLaTeX and LuaLaTeX. +fontspec # required with xelatex or lualatex +ifmtarg # if-then-else commands used in the default template +iftex # Checks for the specific LaTeX engine being used +latexmk +lua-ul # LuaLaTeX replacement of soul +luacode +luacolor +lualatex-math # LuaTeX specific math patches +luatexbase +mathspec # used by xelatex if the `mathspec` variable is set +selnolig # Used with LuaLaTeX to disable illegal typographic ligatures +unicode-math # Unicode math support for XeTeX and LuaTeX +xetex + +######################################################################### +### Reference management tools +biber +biblatex +bibtex +natbib + +######################################################################### +### I18n and languages +# +# The choice of selected languages is historic, those were the ones +# installed by TeXLive by default for a long time. +bidi +babel-basque +babel-czech +babel-danish +babel-dutch +babel-english +babel-finnish +babel-french +babel-german +babel-hungarian +babel-italian +babel-norsk +babel-polish +babel-portuges +babel-spanish +babel-swedish +hyphen-basque +hyphen-czech +hyphen-danish +hyphen-dutch +hyphen-english +hyphen-finnish +hyphen-french +hyphen-german +hyphen-hungarian +hyphen-italian +hyphen-norwegian +hyphen-polish +hyphen-portuguese +hyphen-spanish +hyphen-swedish diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile new file mode 100644 index 000000000..dd5364e87 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile @@ -0,0 +1,32 @@ +# texlive.profile written on Tue Feb 5 09:43:07 2019 UTC +# It will NOT be updated and reflects only the +# installation profile at installation time. +# +# NOTE: see also alpine/latex.Dockerfile which appends +# `binary_x86_64-linuxmusl 1` to this file, use for non-glibc distributions. +selected_scheme scheme-basic +TEXDIR /opt/texlive/texdir +TEXMFLOCAL /opt/texlive/texmf-local +TEXMFSYSVAR /opt/texlive/texdir/texmf-var +TEXMFSYSCONFIG /opt/texlive/texdir/texmf-config +TEXMFVAR ~/.texlive/texmf-var +TEXMFCONFIG ~/.texlive/texmf-config +TEXMFHOME ~/texmf +instopt_adjustpath 0 +instopt_adjustrepo 1 +instopt_letter 0 +instopt_portable 0 +instopt_write18_restricted 1 +tlpdbopt_autobackup 1 +tlpdbopt_backupdir tlpkg/backups +tlpdbopt_create_formats 1 +tlpdbopt_desktop_integration 1 +tlpdbopt_file_assocs 1 +tlpdbopt_generate_updmap 0 +tlpdbopt_install_docfiles 0 +tlpdbopt_install_srcfiles 0 +tlpdbopt_post_code 1 +tlpdbopt_sys_bin /usr/local/bin +tlpdbopt_sys_info /usr/local/share/info +tlpdbopt_sys_man /usr/local/share/man +tlpdbopt_w32_multi_user 1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py new file mode 100644 index 000000000..d8807f46b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py @@ -0,0 +1,411 @@ +""" +Import as: + +import helpers.repo_config_utils as hrecouti +""" + +import logging +import os +from typing import Any, Dict, List, Optional, Union + +import yaml + +_LOG = logging.getLogger(__name__) + +# ############################################################################# + +# Copied from hprint to avoid import cycles. + + +# TODO(gp): It should use *. +def indent(txt: str, num_spaces: int = 2) -> str: + """ + Add `num_spaces` spaces before each line of the passed string. + """ + spaces = " " * num_spaces + txt_out = [] + for curr_line in txt.split("\n"): + if curr_line.lstrip().rstrip() == "": + # Do not prepend any space to a line with only white characters. + txt_out.append("") + continue + txt_out.append(spaces + curr_line) + res = "\n".join(txt_out) + return res + + +# End copy. + + +# ############################################################################# + + +def _find_config_file(file_name: str) -> str: + """ + Find recursively the dir of config file. + + This function traverses the directory hierarchy upward from a + specified starting path to find the directory that contains the + config file. + + :param file_name: name of the file to find + :return: path to the file + """ + curr_dir = os.getcwd() + while True: + path = os.path.join(curr_dir, file_name) + if os.path.exists(path): + break + parent = os.path.dirname(curr_dir) + if parent == curr_dir: + # We cannot use helpers since it creates circular import. + raise FileNotFoundError( + f"Could not find '{file_name}' in current directory or any parent directories" + ) + curr_dir = parent + return path + + +def _get_env_var( + env_name: str, + as_bool: bool = False, + default_value: Any = None, + abort_on_missing: bool = True, +) -> Union[str, bool]: + """ + Get an environment variable by name. + + :param env_name: name of the env var + :param as_bool: convert the value into a Boolean + :param default_value: the default value to use in case it's not + defined + :param abort_on_missing: if the env var is not defined aborts, + otherwise use the default value + :return: value of env var + """ + if env_name not in os.environ: + if abort_on_missing: + assert 0, f"Can't find env var '{env_name}' in '{str(os.environ)}'" + else: + return default_value + value = os.environ[env_name] + if as_bool: + # Convert the value into a boolean. + if value in ("0", "", "None", "False"): + value = False + else: + value = True + return value + + +# ############################################################################# +# RepoConfig +# ############################################################################# + + +class RepoConfig: + def __init__(self, data: Dict) -> None: + """ + Set the data to be used by the module. + """ + self._data = data + + def set_repo_config_data(self, data: Dict) -> None: + self._data = data + + @classmethod + def from_file(cls, file_name: Optional[str] = None) -> "RepoConfig": + """ + Return the text of the code stored in `repo_config.yaml`. + """ + if file_name is None: + file_name = RepoConfig._get_repo_config_file() + assert os.path.exists(file_name), f"File '{file_name}' doesn't exist" + _LOG.debug("Reading file_name='%s'", file_name) + try: + with open(file_name, "r") as file: + # Use `safe_load()` to avoid executing arbitrary code. + data = yaml.safe_load(file) + assert isinstance(data, dict), ( + "data=\n%s\nis not a dict but %s", + str(data), + type(data), + ) + except Exception as e: + raise ValueError(f"Error reading YAML file {file_name}: {e}") + return cls(data) + + # TODO(gp): -> __str__? + def config_func_to_str(self) -> str: + """ + Return the string representation of the config function. + """ + ret: List[str] = [] + ret.append(f"get_host_name='{self.get_host_name()}'") + ret.append( + f"get_html_dir_to_url_mapping='{self.get_html_dir_to_url_mapping()}'" + ) + ret.append(f"get_invalid_words='{self.get_invalid_words()}'") + ret.append( + f"get_docker_base_image_name='{self.get_docker_base_image_name()}'" + ) + ret.append(f"get_release_team='{self.get_release_team()}'") + txt = "\n".join(ret) + return txt + + # repo_info + + # TODO(gp): -> get_repo_name + def get_name(self) -> str: + """ + Return the name of the repo, e.g., in `//amp`. + """ + value = self._data["repo_info"]["repo_name"] + return f"//{value}" + + def get_github_repo_account(self) -> str: + """ + Return the account name of the repo on GitHub, e.g., `causify-ai`, + `gpsaggese`. + """ + value = self._data["repo_info"]["github_repo_account"] + return value + + def get_repo_short_name(self) -> str: + """ + Return the short name of the repo, e.g., `amp`. + """ + value = self._data["repo_info"]["repo_name"] + return value + + def get_repo_full_name(self) -> str: + """ + Return the full name of the repo, e.g., `causify-ai/amp`, + `gpsaggese/notes`. + """ + github_repo_account = self._data["repo_info"]["github_repo_account"] + repo_name = self._data["repo_info"]["repo_name"] + value = f"{github_repo_account}/{repo_name}" + return value + + def get_repo_full_name_with_hostname(self) -> str: + """ + Return the full name of the repo, e.g., `github.com/causify-ai/amp`. + """ + repo_full_name = self.get_repo_full_name() + host_name = self.get_host_name() + value = f"{host_name}/{repo_full_name}" + return value + + # TODO(gp): We should replace this with `get_full_repo_name()`, since + # the mapping is not needed. + def get_repo_map(self) -> Dict[str, str]: + """ + Return a mapping of short repo name -> long repo name. + + E.g., + ``` + {"amp": "causify-ai/amp"} + {"helpers": "causify-ai/helpers"} + ``` + """ + repo_name = self._data["repo_info"]["repo_name"] + github_repo_account = self._data["repo_info"]["github_repo_account"] + repo_map = {repo_name: f"{github_repo_account}/{repo_name}"} + return repo_map + + # TODO(gp): Is this needed? + def get_extra_amp_repo_sym_name(self) -> str: + github_repo_account = self._data["repo_info"]["github_repo_account"] + repo_name = self._data["repo_info"]["repo_name"] + if repo_name in ["orange", "lemonade"]: + # TODO(Grisha): it should return cmamp name, not the current + return f"{github_repo_account}/cmamp" + else: + return f"{github_repo_account}/{repo_name}" + + # TODO(gp): -> get_github_host_name + def get_host_name(self) -> str: + """ + Return the host name of the repo, e.g., `github.com`. + """ + value = self._data["repo_info"]["github_host_name"] + return value + + def get_invalid_words(self) -> List[str]: + """ + Return a list of words that are considered invalid in the repo. + """ + values = self._data["repo_info"]["invalid_words"] + if values is None: + invalid_words = [] + else: + invalid_words = values.split(",") + return invalid_words + + def get_issue_prefix(self) -> str: + """ + Return the prefix for the issue, e.g., `CmampTask`, `HelpersTask`. + """ + value = self._data["repo_info"]["issue_prefix"] + return value + + # docker_info + + def get_docker_base_image_name(self) -> str: + """ + Return a base name for docker image. + + E.g., `helpers`. + """ + value = self._data["docker_info"]["docker_image_name"] + return value + + def get_release_team(self) -> str: + """ + Return the release team name for docker image. + + E.g., `dev_system`. + """ + value = self._data["docker_info"].get("release_team") + return value + + # s3_bucket_info + + def get_unit_test_bucket_path(self) -> str: + """ + Return the path to the unit test bucket. + """ + value = self._data["s3_bucket_info"]["unit_test_bucket_name"] + return value + + def get_html_bucket_path(self) -> str: + """ + Return the path to the bucket where published HTMLs are stored. + """ + value = self._data["s3_bucket_info"]["html_bucket_name"] + return value + + def get_html_bucket_path_v2(self) -> str: + """ + Return the path to the bucket with published HTMLs. + + "v2" version allows for the published HTMLs to be browsed. + """ + html_bucket = self.get_html_bucket_path() + html_bucket_path = os.path.join(html_bucket, "v2") + return html_bucket_path + + def get_html_ip(self) -> str: + """ + Return the IP of the bucket where published HTMLs are stored. + """ + value = self._data["s3_bucket_info"]["html_ip"] + return value + + def get_html_ip_v2(self) -> str: + """ + Return the IP of the bucket with published HTMLs. + + "v2" version allows for the published HTMLs to be browsed. + """ + ip = self.get_html_ip() + ip_v2 = f"{ip}/v2" + return ip_v2 + + def get_html_dir_to_url_mapping(self) -> Dict[str, str]: + """ + Return a mapping between directories mapped on URLs. + + This is used when we have web servers serving files from + specific directories. + """ + dir_to_url = { + self.get_html_bucket_path(): self.get_html_ip(), + self.get_html_bucket_path_v2(): self.get_html_ip_v2(), + } + return dir_to_url + + def get_shared_configs_bucket_name(self, environment: str) -> str: + """ + Return the name of the shared configs bucket. + """ + if "shared_configs_bucket_name" not in self._data["s3_bucket_info"]: + return None + value: Dict[str, str] = self._data["s3_bucket_info"][ + "shared_configs_bucket_name" + ] + bucket_name = value.get(environment, None) + return bucket_name + + def get_dir_suffix(self) -> str: + """ + Return the suffix of the dev_scripts_{dir_suffix} dir for the repo. + + E.g., `helpers` for `dev_scripts_helpers` in //helpers repo. + """ + value = self._data["runnable_dir_info"]["dir_suffix"] + return value + + def use_helpers_as_nested_module(self) -> bool: + """ + Return whether the helpers repo is used as a nested module. + """ + value = bool( + self._data["runnable_dir_info"]["use_helpers_as_nested_module"] + ) + return value + + # TODO(gp): Add functions for container_registry_info. + + def get_container_registry_url(self, registry: str = "ecr") -> str: + """ + Return the URL of the container registry. + + :param registry: the name of the container registry (e.g., `ecr`, `ghcr`) + :return: the URL of the container registry + """ + return self._data["container_registry_info"][registry] + + # Utils. + + @staticmethod + def _get_repo_config_file() -> str: + """ + Return the absolute path to `repo_config.yml` that should be used. + + The `repo_config.yml` is determined based on an overriding env var or + based on the root of the Git path. + """ + env_var = "CSFY_REPO_CONFIG_PATH" + file_path = _get_env_var(env_var, abort_on_missing=False) + if file_path: + _LOG.warning( + "Using value '%s' for %s from env var", file_path, env_var + ) + else: + # client_root = _find_git_root() + # We cannot use git root here because the config file doesn't always + # reside in the root of the repo (e.g., it can be in subdir such as + # //cmamp/ck.infra for runnable dir). + file_path = _find_config_file("repo_config.yaml") + file_path = os.path.abspath(file_path) + _LOG.debug("Reading file_name='%s'", file_path) + # Check if path exists. + # We can't use helpers since it creates circular import. + if not os.path.exists(file_path): + raise FileNotFoundError(f"File '{file_path}' doesn't exist") + return file_path + + +_REPO_CONFIG = None + + +def get_repo_config() -> RepoConfig: + """ + Return the repo config object. + """ + global _REPO_CONFIG + if _REPO_CONFIG is None: + _REPO_CONFIG = RepoConfig.from_file() + return _REPO_CONFIG diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py new file mode 100644 index 000000000..cd24fecf1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py @@ -0,0 +1,83 @@ +""" +Import as: + +import helpers.stage_linked_file as hstlifil +""" + +import argparse +import logging +import os +import shutil +from typing import List + +_LOG = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def find_symlinks(dst_dir: str) -> List[str]: + """ + Find all symbolic links in the destination directory. + + :param dst_dir: Directory to search for symbolic links. + :return: List of paths to symbolic links. + """ + symlinks = [] + for root, _, files in os.walk(dst_dir): + for file in files: + file_path = os.path.join(root, file) + if os.path.islink(file_path): + symlinks.append(file_path) + return symlinks + + +def stage_links(symlinks: List[str]) -> None: + """ + Replace symbolic links with writable copies of the linked files. + + :param symlinks: List of symbolic links to replace. + """ + for link in symlinks: + # Resolve the original file the symlink points to. + target_file = os.readlink(link) + if not os.path.exists(target_file): + _LOG.warning( + f"Warning: Target file does not exist for link {link} -> {target_file}" + ) + continue + # Replace the symlink with a writable copy of the target file. + try: + os.remove(link) + # Copy file to the symlink location. + shutil.copy2(target_file, link) + # Make the file writable. + os.chmod(link, 0o644) + _LOG.info("Staged: %s -> %s", link, target_file) + except Exception as e: + _LOG.error("Error staging link %s: %s", link, e) + + +def main(): + parser = argparse.ArgumentParser( + description="Stage symbolic links for modification." + ) + parser.add_argument( + "--dst_dir", required=True, help="Destination directory." + ) + args = parser.parse_args() + symlinks = find_symlinks(args.dst_dir) + if not symlinks: + _LOG.info("No symbolic links found to stage.") + return + stage_links(symlinks) + _LOG.info("Staged %s files for modification.", len(symlinks)) + + +if __name__ == "__main__": + main() + +""" +Usage + + - python3 stage_linked_file.py --dst_dir /path/to/dst + +""" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py new file mode 100644 index 000000000..27344070d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py @@ -0,0 +1,30 @@ +""" +Import as: + +import helpers.telegram_notify.config as htenocon +""" + +import getpass +import os +from typing import Tuple + +import helpers.hdbg as hdbg + +NOTIFY_JUPYTER_TOKEN = os.environ["CSFY_TELEGRAM_TOKEN"] + + +def get_info() -> Tuple[str, str]: + user = getpass.getuser() + # telegram_token is the token of your bot + # - You can use @NotifyJupyterBot, its token is + # '***REMOVED***' + # chat_id: To get it, start messaging with the bot. Then go to + # https://api.telegram.org/bot/getUpdates and get your chat id. + # (If you are using @NotifyJupyterBot, go to + # https://api.telegram.org/bot***REMOVED***/getUpdates ) + if user in ("saggese", "gsaggese", "root"): + telegram_token = NOTIFY_JUPYTER_TOKEN + chat_id = "967103049" + else: + hdbg.dfatal(f"User `{user}` is not in the config.py") + return telegram_token, chat_id diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py new file mode 100644 index 000000000..e90c3968d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +""" +Import as: + +import helpers.telegram_notify.get_chat_id as htngchid +""" + +import argparse +import json +import logging +from typing import Dict, cast + +import requests + +import helpers.telegram_notify.config as htenocon +import helpers.telegram_notify.telegram_notify as htnoteno + +_LOG = logging.getLogger(__name__) +_LOG.setLevel(logging.INFO) + + +def _get_updates_dict(token: str) -> dict: + updates_cont = requests.post( + f"https://api.telegram.org/bot{token}/getUpdates" + ).content + updates_dict = json.loads(updates_cont) + assert updates_dict["ok"], updates_dict + return cast(dict, updates_dict) + + +def _get_username_id(updates_dict: dict) -> Dict[str, str]: + return { + result["message"]["from"]["username"]: result["message"]["from"]["id"] + for result in updates_dict["result"] + } + + +def _get_chat_id_updates_dict(username: str, updates_dict: dict) -> str: + username_id = _get_username_id(updates_dict) + assert username in username_id.keys(), ( + "Either the username is wrong or you" + " have not sent a message to the bot yet" + ) + return username_id[username] + + +def send_chat_id(token: str, username: str) -> str: + updates_dict = _get_updates_dict(token) + chat_id = _get_chat_id_updates_dict(username, updates_dict) + htnoteno.TelegramNotify.send( + text=f"Your chat id is: {chat_id}", token=token, chat_id=chat_id + ) + return chat_id + + +def _main() -> None: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("--username", required=True, action="store", type=str) + parser.add_argument("--token", required=False, action="store", type=str) + args = parser.parse_args() + username = args.username + if args.token: + token_ = args.token + else: + _LOG.info("Using default token for NotifyJupyterBot.") + token_ = htenocon.NOTIFY_JUPYTER_TOKEN + chat_id_ = send_chat_id(token_, username) + print(f"Your chat id is: {chat_id_}") + + +if __name__ == "__main__": + _main() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py new file mode 100644 index 000000000..6e0e3eb16 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py @@ -0,0 +1,155 @@ +""" +Import as: + +import helpers.telegram_notify.telegram_notify as htnoteno +""" + +import json +import logging +import os +import os.path +import re +import sys +from typing import Optional + +import requests + +# Alternative that works for both Python 2 and 3: +import requests.compat as rcompa + +import helpers.telegram_notify.config as htenocon + +_LOG = logging.getLogger(__name__) + + +def _get_launcher_name() -> str: + """ + Return the name of jupyter notebook or path to python file you are running. + """ + import ipykernel + + try: # Python 3 (see Edit2 below for why this may not work in Python 2) + import notebook.notebookapp as ihnb + except ImportError: # Python 2 + import warnings + + import IPython.utils.shimmodule as iush + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=iush.ShimWarning) + import IPython.html.notebookapp as ihnb + launcher = sys.argv[0] + if os.path.basename(launcher) == "ipykernel_launcher.py": + match = re.search( + "kernel-(.*).json", ipykernel.connect.get_connection_file() + ) + if match is None: + return launcher + kernel_id = match.group(1) + servers = ihnb.list_running_servers() + for ss in servers: + response = requests.get( + rcompa.urljoin(ss["url"], "api/sessions"), # type: ignore + params={"token": ss.get("token", "")}, + ) + for nn in json.loads(response.text): + if nn["kernel"]["id"] == kernel_id: + relative_path = nn["notebook"]["path"] + return str(os.path.basename(relative_path)) + return launcher + + +# ############################################################################# +# TelegramNotebookNotify +# ############################################################################# + + +class TelegramNotebookNotify: + """ + Sends notifications. + """ + + def __init__(self) -> None: + self.launcher_name = _get_launcher_name() + self.token, self.chat_id = htenocon.get_info() + + @staticmethod + def send( + text: str, token: Optional[str], chat_id: Optional[str] + ) -> Optional[bytes]: + if chat_id is None or token is None: + _LOG.warning( + "Not sending notifications. To send notifications, both " + "`chat_id` and `token` need to be specified. Go to README.md" + "for more information." + ) + return None + payload = {"chat_id": chat_id, "text": text, "parse_mode": "HTML"} + return requests.post( + f"https://api.telegram.org/bot{token}/sendMessage", + data=payload, + ).content + + def notify(self, message: str) -> None: + msg = f"
{self.launcher_name}
: {message}" + self.send(msg, self.token, self.chat_id) + + +# ############################################################################# +# _RequestsHandler +# ############################################################################# + + +class _RequestsHandler(logging.Handler): + def emit(self, record: logging.LogRecord) -> bytes: # type: ignore + token, chat_id = htenocon.get_info() + log_entry = self.format(record) + payload = {"chat_id": chat_id, "text": log_entry, "parse_mode": "HTML"} + return requests.post( + f"https://api.telegram.org/bot{token}/sendMessage", + data=payload, + ).content + + +# ############################################################################# +# _LogFormatter +# ############################################################################# + + +class _LogFormatter(logging.Formatter): + def format(self, record: logging.LogRecord) -> str: + launcher_name = _get_launcher_name() + return f"
{launcher_name}
: {record.msg}" + + +def init_tglogger(log_level: int = logging.DEBUG) -> None: + """ + Send notifications using logging. + """ + _tg_log = logging.getLogger("telegram_notify") + _tg_log.setLevel(log_level) + handler = _RequestsHandler() + formatter = _LogFormatter() + handler.setFormatter(formatter) + _tg_log.handlers = [handler] + + +# ############################################################################# +# TelegramNotify +# ############################################################################# + + +class TelegramNotify: + """ + Send notifications. + """ + + def __init__(self) -> None: + self.token, self.chat_id = htenocon.get_info() + + def send(self, text: str) -> Optional[bytes]: + payload = {"chat_id": self.chat_id, "text": text, "parse_mode": "HTML"} + return requests.post( + f"https://api.telegram.org/bot{self.token}/sendMessage", + data=payload, + ).content diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt new file mode 100644 index 000000000..3135b8c8e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt new file mode 100644 index 000000000..3135b8c8e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt new file mode 100644 index 000000000..3135b8c8e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt new file mode 100644 index 000000000..2f396a270 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt new file mode 100644 index 000000000..2f396a270 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt new file mode 100644 index 000000000..2f396a270 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt new file mode 100644 index 000000000..00529190c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt @@ -0,0 +1,3 @@ + a b c +0 0 2 2 +1 3 4 5 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt new file mode 100644 index 000000000..95d09f2b1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt new file mode 100644 index 000000000..b68450ebb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt @@ -0,0 +1 @@ +hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt new file mode 100644 index 000000000..b68450ebb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt @@ -0,0 +1 @@ +hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt new file mode 100644 index 000000000..b68450ebb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt @@ -0,0 +1 @@ +hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt new file mode 100644 index 000000000..efbdde823 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt @@ -0,0 +1,31 @@ +original shape=(7, 2) +Head: +{ + "0":{ + "col_1":1.0, + "col_2":1 + }, + "1":{ + "col_1":2.0, + "col_2":2 + }, + "2":{ + "col_1":3.0, + "col_2":3 + } +} +Tail: +{ + "4":{ + "col_1":5.0, + "col_2":5 + }, + "5":{ + "col_1":6.0, + "col_2":6 + }, + "6":{ + "col_1":7.0, + "col_2":7 + } +} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt new file mode 100644 index 000000000..cab20a014 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"421470c7-7797-4a94-b584-eb83ff2de88a", + "col_2":1 + }, + "1":{ + "col_1":"22cde381-1782-43dc-8c7a-8712cbdf5ee1", + "col_2":2 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt new file mode 100644 index 000000000..4a6c9e821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"2020-01-01T00:00:00", + "col_2":1.0 + }, + "1":{ + "col_1":"2020-05-12T00:00:00", + "col_2":2.0 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt new file mode 100644 index 000000000..4a6c9e821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"2020-01-01T00:00:00", + "col_2":1.0 + }, + "1":{ + "col_1":"2020-05-12T00:00:00", + "col_2":2.0 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt new file mode 100644 index 000000000..3c50fde31 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt @@ -0,0 +1,31 @@ +original shape=(7, 2) +Head: +{ + "0":{ + "col_1":1.0, + "col_2":1 + }, + "1":{ + "col_1":2.0, + "col_2":2 + }, + "2":{ + "col_1":3.0, + "col_2":3 + } +} +Tail: +{ + "4":{ + "col_1":5.0, + "col_2":5 + }, + "5":{ + "col_1":6.0, + "col_2":6 + }, + "6":{ + "col_1":7.0, + "col_2":7 + } +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt new file mode 100644 index 000000000..4a6c9e821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"2020-01-01T00:00:00", + "col_2":1.0 + }, + "1":{ + "col_1":"2020-05-12T00:00:00", + "col_2":2.0 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt new file mode 100644 index 000000000..4a6c9e821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"2020-01-01T00:00:00", + "col_2":1.0 + }, + "1":{ + "col_1":"2020-05-12T00:00:00", + "col_2":2.0 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt new file mode 100644 index 000000000..cab20a014 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"421470c7-7797-4a94-b584-eb83ff2de88a", + "col_2":1 + }, + "1":{ + "col_1":"22cde381-1782-43dc-8c7a-8712cbdf5ee1", + "col_2":2 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt new file mode 100644 index 000000000..9c8c2a07e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt @@ -0,0 +1,4 @@ +## docker_images_ls_repo: +## docker_login: +eval $(aws ecr get-login --profile am --no-include-email --region us-east-1) +docker image ls 665840871993.dkr.ecr.us-east-1.amazonaws.com diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt new file mode 100644 index 000000000..e2df28b1f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt @@ -0,0 +1,3 @@ +## docker_kill: all=True +docker ps -a +docker rm -f $(docker ps -a -q) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt new file mode 100644 index 000000000..44a4748dc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt @@ -0,0 +1,3 @@ +## docker_kill: all=False +docker ps -l +docker rm -f $(docker ps -l -q) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt new file mode 100644 index 000000000..613a41c2d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt @@ -0,0 +1 @@ +docker ps --format='table {{.ID}}\t{{.Label "user"}}\t{{.Image}}\t{{.Command}}\t{{.RunningFor}}\t{{.Status}}\t{{.Ports}}\t{{.Label "com.docker.compose.service"}}' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt new file mode 100644 index 000000000..0c262d7ea --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt @@ -0,0 +1,3 @@ +report_memory_usage=False report_cpu_usage=False +## git_clean: dry_run=False +find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt new file mode 100644 index 000000000..e8a2a8473 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt @@ -0,0 +1,3 @@ +report_memory_usage=False report_cpu_usage=False +## git_fetch_master: +git fetch origin master:master diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt new file mode 100644 index 000000000..36f22574b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt @@ -0,0 +1,4 @@ +report_memory_usage=False report_cpu_usage=False +## git_pull: +git pull --autostash +git submodule foreach 'git pull --autostash' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt new file mode 100644 index 000000000..06d15ab26 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt @@ -0,0 +1,3 @@ +## print_setup: +ECR_BASE_PATH=665840871993.dkr.ecr.us-east-1.amazonaws.com +BASE_IMAGE=amp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt new file mode 100644 index 000000000..265ef5fcf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt @@ -0,0 +1,2 @@ +call('eval $(aws ecr get-login --no-include-email --region us-east-1)') +call('docker image ls 665840871993.dkr.ecr.us-east-1.amazonaws.com') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt new file mode 100644 index 000000000..202366437 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt @@ -0,0 +1,2 @@ +call('docker ps -a') +call('docker rm -f $(docker ps -a -q)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt new file mode 100644 index 000000000..4ee19d730 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt @@ -0,0 +1,2 @@ +call('docker ps -l') +call('docker rm -f $(docker ps -l -q)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt new file mode 100644 index 000000000..c8b46747d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt @@ -0,0 +1 @@ +call('eval $(aws ecr get-login --profile am --no-include-email --region us-east-1)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt new file mode 100644 index 000000000..614c9318f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt @@ -0,0 +1 @@ +call('docker ps --format=\'table {{.ID}}\\t{{.Label "user"}}\\t{{.Image}}\\t{{.Command}}\\t{{.RunningFor}}\\t{{.Status}}\\t{{.Ports}}\\t{{.Label "com.docker.compose.service"}}\'') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt new file mode 100644 index 000000000..029e8a64f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt @@ -0,0 +1,3 @@ +call('eval $(aws ecr get-login --no-include-email --region us-east-1)') +call('docker pull 665840871993.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev', pty=True) +call('docker pull 665840871993.dkr.ecr.us-east-1.amazonaws.com/helpers:prod', pty=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt new file mode 100644 index 000000000..7d238de7e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt @@ -0,0 +1 @@ +call("docker stats --no-stream --format='table {{.ID}}\\t{{.Name}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}\\t{{.PIDs}}'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt new file mode 100644 index 000000000..dc7c8a671 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt @@ -0,0 +1 @@ +call('gh pr create --repo alphamatic/amp --draft --title "AmpTask1310_Implement_RH1E" --body ""') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt new file mode 100644 index 000000000..1aa1034a0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt @@ -0,0 +1 @@ +call('gh pr create --repo github.com/alphamatic/amp --draft --title "test" --body "\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt new file mode 100644 index 000000000..d93250129 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt @@ -0,0 +1 @@ +call('gh pr create --repo github.com/alphamatic/amp --draft --title "test" --body "hello_world\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt new file mode 100644 index 000000000..a7010f356 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt @@ -0,0 +1 @@ +call('gh pr create --repo github.com/alphamatic/amp --title "test" --body "\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt new file mode 100644 index 000000000..7e38db5a7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt @@ -0,0 +1,2 @@ +call('gh workflow run fast_tests.yml --ref AmpTask1310_Implement_RH1E') +call('gh workflow run slow_tests.yml --ref AmpTask1310_Implement_RH1E') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt new file mode 100644 index 000000000..e79742c64 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt @@ -0,0 +1,3 @@ +call('git pull --autostash') +call('git checkout -b test') +call('git push --set-upstream origin test') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt new file mode 100644 index 000000000..25c178bb7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt @@ -0,0 +1,3 @@ +call('git pull --autostash --rebase') +call('git checkout -b AmpTask123_test') +call('git push --set-upstream origin AmpTask123_test') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt new file mode 100644 index 000000000..72eb80ddc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt @@ -0,0 +1,3 @@ +call('git pull --autostash --rebase') +call('git checkout -b CmampTask1_fix_amp_tmux_session_script') +call('git push --set-upstream origin CmampTask1_fix_amp_tmux_session_script') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt new file mode 100644 index 000000000..b7c58a3d2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt @@ -0,0 +1,3 @@ +call('git clean -fd >/dev/null 2>&1') +call("git submodule foreach 'git clean -fd >/dev/null 2>&1'") +call("find . -name '*\\.pyc' -o -name '*\\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' -o -name '.*_cache' -o -name 'htmlcov' | sort | xargs rm -rf") \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt new file mode 100644 index 000000000..b7c58a3d2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt @@ -0,0 +1,3 @@ +call('git clean -fd >/dev/null 2>&1') +call("git submodule foreach 'git clean -fd >/dev/null 2>&1'") +call("find . -name '*\\.pyc' -o -name '*\\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' -o -name '.*_cache' -o -name 'htmlcov' | sort | xargs rm -rf") \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt new file mode 100644 index 000000000..0241acc2e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt @@ -0,0 +1,2 @@ +call('git fetch origin master:master') +call("git submodule foreach 'git fetch origin master:master'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt new file mode 100644 index 000000000..d9d3fc510 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt @@ -0,0 +1,3 @@ +call('git fetch origin master:master') +call("git submodule foreach 'git fetch origin master:master'") +call('git merge master') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt new file mode 100644 index 000000000..78883f1ba --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt @@ -0,0 +1,2 @@ +call('git pull --autostash') +call("git submodule foreach 'git pull --autostash'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt new file mode 100644 index 000000000..70a06c388 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt @@ -0,0 +1 @@ +call('./linters/base.py --files ./helpers/lib_tasks.py ./helpers/test/TestDryRunTasks2.test_git_branch_create/output/test.txt ./helpers/test/TestDryRunTasks2.test_git_merge_master/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint1/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint2/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint3/output/test.txt ./helpers/test/test_lib_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt new file mode 100644 index 000000000..28b088e72 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt @@ -0,0 +1 @@ +call('./linters/base.py --files core/dataflow/builders.py core/dataflow/core.py core/dataflow/dataflow_design.md core/dataflow/runners.py core/dataflow/visualization.py core/test/test_core.py dev_scripts/client_setup/build.sh devops/docker_build/install_packages.sh devops/docker_build/install_requirements.sh devops/docker_build/poetry.lock devops/docker_build/pyproject.toml documentation/general/workflows.txt helpers/datetime_.py helpers/git.py helpers/lib_tasks.py helpers/test/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt helpers/test/TestDryRunTasks1.test_docker_kill_all/output/test.txt helpers/test/TestDryRunTasks1.test_docker_kill_last/output/test.txt helpers/test/TestDryRunTasks1.test_docker_ps/output/test.txt helpers/test/TestDryRunTasks1.test_docker_stats/output/test.txt helpers/test/TestDryRunTasks1.test_git_clean/output/test.txt helpers/test/TestDryRunTasks1.test_git_pull/output/test.txt helpers/test/TestDryRunTasks1.test_git_pull_master/output/test.txt helpers/test/TestDryRunTasks1.test_print_setup/output/test.txt helpers/test/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt helpers/test/TestDryRunTasks2.test_docker_kill_all/output/test.txt helpers/test/TestDryRunTasks2.test_docker_kill_last/output/test.txt helpers/test/TestDryRunTasks2.test_docker_login/output/test.txt helpers/test/TestDryRunTasks2.test_docker_ps/output/test.txt helpers/test/TestDryRunTasks2.test_docker_pull/output/test.txt helpers/test/TestDryRunTasks2.test_docker_stats/output/test.txt helpers/test/TestDryRunTasks2.test_gh_create_pr/output/test.txt helpers/test/TestDryRunTasks2.test_gh_issue_title/output/test.txt helpers/test/TestDryRunTasks2.test_gh_workflow_list/output/test.txt helpers/test/TestDryRunTasks2.test_gh_workflow_run/output/test.txt helpers/test/TestDryRunTasks2.test_git_branch_files/output/test.txt helpers/test/TestDryRunTasks2.test_git_clean/output/test.txt helpers/test/TestDryRunTasks2.test_git_clean2/output/test.txt helpers/test/TestDryRunTasks2.test_git_pull/output/test.txt helpers/test/TestDryRunTasks2.test_git_pull_master/output/test.txt helpers/test/TestDryRunTasks2.test_print_setup/output/test.txt helpers/test/test_cache.py helpers/test/test_lib_tasks.py im/kibot/data/load/kibot_s3_data_loader.py im/kibot/data/load/test/test_s3_data_loader.py tasks.py test/test_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt new file mode 100644 index 000000000..9fac068a3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt @@ -0,0 +1 @@ +call('./linters/base.py --files /app/amp/helpers/test/test_lib_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt new file mode 100644 index 000000000..ac6627a2e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt @@ -0,0 +1,101 @@ +,Name,Frequency,Country,Unit,Start Date,End Date,Commodity,Contracts,Business Category,is_alive,source_code,dataset_code,series_code,original_name,extracted_frequency,is_downloaded,WIND Commodity,Update,id_is_broken +0,Coal and coke CO2 emissions – Aruba – million metric tonnes carbon dioxide,Annual,Aruba,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ABW-MMTCD.A,"Coal and coke CO2 emissions, Aruba, Annual — million metric tonnes carbon dioxide",Annual,success,,, +1,Coal and coke CO2 emissions – Albania – million metric tonnes carbon dioxide,Annual,Albania,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ALB-MMTCD.A,"Coal and coke CO2 emissions, Albania, Annual — million metric tonnes carbon dioxide",Annual,success,,, +2,Coal and coke CO2 emissions – United Arab Emirates – million metric tonnes carbon dioxide,Annual,United Arab Emirates,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARE-MMTCD.A,"Coal and coke CO2 emissions, United Arab Emirates, Annual — million metric tonnes carbon dioxide",Annual,success,,, +3,Coal and coke CO2 emissions – Argentina – million metric tonnes carbon dioxide,Annual,Argentina,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARG-MMTCD.A,"Coal and coke CO2 emissions, Argentina, Annual — million metric tonnes carbon dioxide",Annual,success,,, +4,Coal and coke CO2 emissions – Armenia – million metric tonnes carbon dioxide,Annual,Armenia,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARM-MMTCD.A,"Coal and coke CO2 emissions, Armenia, Annual — million metric tonnes carbon dioxide",Annual,success,,, +5,Germany: Term Structure of Interest Rate on Listed Federal Securities: 1,Daily,Germany,%,1997-08-07,2019-12-19,,,Upstream,True,WIND,Deutsche Bundesbank,G0008063,,,success,Gold,2019-12-20,False +6,Germany: Term Structure of Interest Rate on Listed Federal Securities: 10,Daily,Germany,%,1997-08-07,2019-12-19,,,Upstream,True,WIND,Deutsche Bundesbank,SG000S6E,,,not_attempted,Gold,2019-12-20,True +7,France: Treasury Bills Reference Rate: 1Y,Daily,France,%,1989-01-03,2019-12-19,,,Upstream,True,WIND,Banque de France,G0008146,,,success,Gold,2019-12-20,False +8,France: Treasury Bills Reference Rate: 10Y,Daily,France,%,1987-01-02,2019-12-19,,,Upstream,True,WIND,Banque de France,G1400003,,,success,Gold,2019-12-20,False +9,Spain: Government Securities Yields: 12M,Daily,Spain,%,1987-07-01,2019-12-19,,,Upstream,True,WIND,Bank of Spain,G2700068,,,success,Gold,2019-12-20,False +10,Spain: Government Securities Yields: 10Y,Daily,Spain,%,1989-07-18,2019-12-19,,,Upstream,True,WIND,Bank of Spain,G2700075,,,success,Gold,2019-12-20,False +11,Italy: Government Securities Yields: 3Y_,Daily,Italy,%,1989-07-24,2019-12-19,,,Upstream,True,WIND,Bank of Italy,G1700018,,,success,Gold,2019-12-20,False +12,Italy: Government Securities Yields: 10,Daily,Italy,%,1991-03-05,2019-12-19,,,Upstream,True,WIND,Bank of Italy,G1700020,,,success,Gold,2019-12-20,False +13,Futures Closing Price (Active Contract): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M0066358,,,success,Gold,2019-12-20,False +14,Futures Settlement Price (Continuous 3M): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0068142,,,success,Gold,2019-12-20,False +15,Futures Closing Price (Continuous): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0147027,,,success,Gold,2019-12-20,False +16,Futures Closing Price (Continuous 3M): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0031868,,,success,Gold,2019-12-20,False +17,Futures Settlement Price (Active Contract): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0181376,,,success,Gold,2019-12-20,False +18,Futures Trading Volume: Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6409,,,not_attempted,Gold,2019-12-20,True +19,Futures Turnover: Gold,Daily,China,"CNY, in 10,000s",2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6410,,,not_attempted,Gold,2019-12-20,True +20,Futures Position: Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6411,,,not_attempted,Gold,2019-12-20,True +21,Futures Trading Volume (Active Contract): Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M0096581,,,success,Gold,2019-12-20,False +22,Futures Position (Active Contract): Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00096614,,,not_attempted,Gold,2019-12-20,True +23,Closing Stock on Warrant: Gold,Daily,China,kg,2008-01-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0049497,,,success,Gold,2019-12-20,False +24,Duplicate) Closing Stock on Warrant: Gold: Total,Daily,China,kg,2008-01-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0049505,,,success,Gold,2019-12-20,False +25,Futures Closing Price (Continuous): COMEX Gold,Daily,United States,USD/ounce,1975-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0069669,,,success,Gold,2019-12-20,False +26,Futures Closing Price (Active Contract}: COMEX Gold),Daily,United States,USD/ounce,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0180903,,,success,Gold,2019-12-20,False +27,Futures Closing Price (Continuous): COMEX Mini Gold,Daily,United States,USD/ounce,2010-12-06,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0147014,,,success,Gold,2019-12-19,False +28,Futures Settlement Price (Active Contract}: COMEX Gold),Daily,United States,USD/ounce,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G0945,,,not_attempted,Gold,2019-12-20,True +29,Futures Closing Price (Active Contract: COMEX Mini Gold),Daily,United States,USD/ounce,2013-01-03,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G0906,,,not_attempted,Gold,2019-12-20,True +30,Futures Settlement Price (Active Contract}: COMEX Mini Gold),Daily,United States,USD/ounce,2013-01-03,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G094E,,,not_attempted,Gold,2019-12-20,True +31,Futures Trading Volume (Active Contract): COMEX Gold,Daily,United States,lots,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,M00096642,,,not_attempted,Gold,2019-12-20,True +32,Futures Position (Active Contract): COMEX Gold,Daily,United States,lots,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,M0096645,,,success,Gold,2019-12-20,False +33,COMEX: Silver: Inventory,Daily,United States,ozt,1992-09-01,2019-12-19,Silver,"COMEX:6Q,COMEX:QI,COMEX:SI,COMEX:SIL,COMEX:SIT,COMEX:SSP,COMEX:SV,COMEX:XY,COMEX:YV,DGCX:DS,ICEUS:YI,ICEUS:ZI,IFUS:HIO,IFUS:YI,IFUS:ZI,LME:AG,MCX:SILVER,SHFE:AG,TCE:12",Midstream,True,WIND,CME,S0114145,,,success,Gold,2019-12-20,False +34,SGE Gold: Closing Price: Au9995,Daily,China,yuan/g,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035818,,,success,Gold,2019-12-20,False +35,SGE Gold: Closing Price: Au9999,Daily,China,yuan/g,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035819,,,success,Gold,2019-12-20,False +36,SGE Gold: Closing Price: Au100G,Daily,China,yuan/g,2006-12-25,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035820,,,success,Gold,2019-12-20,False +37,SGE Gold: Closing Price: AuT+D,Daily,China,yuan/g,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035821,,,success,Gold,2019-12-20,False +38,SGE Gold: Settlement Price: Au (T+D),Daily,China,yuan/g,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0202645,,,success,Gold,2019-12-20,False +39,SGE Gold: Volume: Au9995,Daily,China,kg,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035824,,,success,Gold,2019-12-20,False +40,SGE Gold: Volume: Au9999,Daily,China,kg,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035825,,,success,Gold,2019-12-20,False +41,SGE Gold: Volume: Au100g,Daily,China,kg,2006-12-25,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035826,,,success,Gold,2019-12-20,False +42,SGE Gold: Volume: AuT+D,Daily,China,kg,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035827,,,success,Gold,2019-12-20,False +43,SGE Gold: Position: Au (T+D),Daily,China,kg,2008-08-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S5806075,,,success,Gold,2019-12-20,False +44,SGE Gold: Deferred Payment of Direction: Au (T+D),Daily,China,,2008-08-22,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0182163,,,success,Gold,2019-12-20,False +45,SGE Gold: Delivery Volume: Au (T+D),Daily,China,kg,2008-08-22,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0181748,,,success,Gold,2019-12-20,False +46,Loco London Gold: In USD,Daily,United Kingdom,USD/ounce,1968-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031645,,,success,Gold,2019-12-20,False +47,Loco Londen Gold: In EUR,Daily,United Kingdom,EUR/ounce,1999-01-04,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031646,,,success,Gold,2019-12-20,False +48,Loco Londen Gold: In GBP,Daily,United Kingdom,GBP/ounce,1968-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031647,,,success,Gold,2019-12-20,False +49,Closing Price: Paper Gold: Bank of China,Daily,China,yuan/g,2011-01-20,2014-10-31,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Company Website,S5806366,,,not_attempted,Gold,2014-11-03,False +50,Closing Price: Paper Gold: China Construction Bank,Daily,China,yuan/g,2011-01-20,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Company Website,S5806367,,,success,Gold,2019-12-20,False +51,Closing Price: Paper Gold: Industrial and Commercial Bank of China,Daily,China,yuan/g,2011-01-20,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Company Website,S5806365,,,success,Gold,2019-12-20,False +52,Price: Gold: 99.95,Daily,China,yuan/g,2007-01-04,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S5801701,,,success,Gold,2019-12-20,False +53,Price: Gold: 99.99,Daily,China,yuan/g,2007-01-04,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S5801702,,,success,Gold,2019-12-20,False +54,SPDR Gold Shares: Total Net Asset Value Qunces in the Trust,Daily,United States,ozt,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105520,,,success,Gold,2019-12-20,False +55,SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United States,tons,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105521,,,success,Gold,2019-12-20,False +56,SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United States,USD,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105522,,,success,Gold,2019-12-20,False +57,iShares: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United States,USD,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807688,,,success,Gold,2019-12-20,False +58,iShares: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United States,ozt,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807690,,,success,Gold,2019-12-20,False +59,iShares: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United States,tons,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807691,,,success,Gold,2019-12-20,False +60,GBS: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,USD,2004-04-01,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807692,,,success,Gold,2019-12-20,False +61,GBS: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807693,,,success,Gold,2019-12-20,False +62,GBS: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807694,,,success,Gold,2019-12-20,False +63,PHAU: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,USD,2007-04-25,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807695,,,success,Gold,2019-12-20,False +64,PHAU: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-04-24,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807696,,,success,Gold,2019-12-20,False +65,PHAU: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-04-24,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807697,,,success,Gold,2019-12-20,False +66,SGBS: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,Switzerland,USD,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808213,,,success,Gold,2019-12-20,False +67,SGBS: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,Switzerland,ozt,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808214,,,success,Gold,2019-12-20,False +68,SGBS: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,Switzerland,tons,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808215,,,success,Gold,2019-12-20,False +69,GOLD: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,AUD,2004-01-09,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807698,,,success,Gold,2019-12-20,False +70,GOLD: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807699,,,success,Gold,2019-12-20,False +71,GOLD: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807700,,,success,Gold,2019-12-20,False +72,SGOL: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,Switzerland,USD,2009-09-04,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807701,,,success,Gold,2019-10-31,False +73,SGOL: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,Switzerland,ozt,2009-09-02,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807702,,,success,Gold,2019-10-31,False +74,SGOL: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,Switzerland,tons,2009-09-02,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807703,,,success,Gold,2019-10-31,False +75,Price: Chinese Major Ports FOB: Silicon: 98.5,Daily,United Kingdom,USD/ton,2006-06-02,2014-05-30,,,Upstream,False,WIND,According to the Press Finishing,S0149035,,,not_attempted,,2014-06-03,False +76,"Price: Silicon Powder: -200 Mesh,-300 Mesh: Shanghai-made",Daily,China,yuan/kg,2005-01-04,2019-12-24,,,Upstream,True,WIND,According to the Press Finishing,S5801759,,,success,,2019-12-24,False +77,Market Price: Secondary Metallurgical Coke: National,Daily,China,yuan/ton,2013-12-31,2019-12-20,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,National Bureau of Statistics of China,S5914487,,,success,,2019-12-24,False +78,"Ex-factory Price (Tax-inclusive): Metallurgical Coke Grade 3 (A15%,0.6%): Yunng",Daily,China,yuan/ton,2004-10-22,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S0033511,,,success,,2019-12-24,False +79,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Rizhao,Daily,China,yuan/ton,2012-03-08,2015-02-11,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,False,WIND,Wind,S5118432,,,success,,2015-02-11,False +80,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Zibo,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118277,,,success,,2019-12-24,False +81,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Yinchuan,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118276,,,success,,2019-12-24,False +82,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Xinjiang County,Daily,China,yuan/ton,2012-03-05,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118275,,,success,,2019-12-24,False +83,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Xuzhou,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118273,,,success,,2019-12-24,False +84,Exit Price (Tax-inclusive): Secondary Metallurgical Coke: Tianjin,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118270,,,success,,2019-12-24,False +85,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Shuangyashan,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118267,,,success,,2019-12-24,False +86,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Shijiazhuang,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118266,,,success,,2019-12-24,False +87,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Shanghai,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118264,,,success,,2019-12-24,False +88,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Qigihar,Daily,China,yuan/ton,2011-09-09,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118263,,,success,,2019-12-24,False +89,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Panzhihua,Daily,China,yuan/ton,2010-04-13,2019-12-06,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118260,,,success,,2019-12-06,False +90,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Mudanjlang,Daily,China,yuan/ton,2011-09-01,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118259,,,success,,2019-12-24,False +91,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Lvliang,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118258,,,success,,2019-12-24,False +92,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Linyt,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118255,,,success,,2019-12-24,False +93,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Linfen,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118254,,,success,,2019-12-24,False +94,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Jinzhong,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118252,,,success,,2019-12-24,False +95,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Hancheng,Daily,China,yuan/ton,2012-03-06,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118247,,,success,,2019-12-24,False +96,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Fukang,Daily,China,yuan/ton,2012-03-05,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118246,,,success,,2019-12-24,False +97,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Fushun,Daily,China,yuan/ton,2011-09-09,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118245,,,success,,2019-12-24,False +98,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Ordos,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118243,,,success,,2019-12-24,False +99,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Anyang,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118241,,,success,,2019-12-24,False diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt new file mode 100644 index 000000000..2de8022c8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt @@ -0,0 +1,8 @@ + Name Frequency Country Unit Start Date End Date Commodity Contracts Business Category is_alive source_code dataset_code series_code original_name extracted_frequency is_downloaded WIND Commodity Update id_is_broken +5 Germany: Term Structure of Interest Rate on Listed Federal Securities: 1 Daily Germany % 1997-08-07 2019-12-19 NaN NaN Upstream True WIND Deutsche Bundesbank G0008063 NaN NaN success Gold 2019-12-20 False +7 France: Treasury Bills Reference Rate: 1Y Daily France % 1989-01-03 2019-12-19 NaN NaN Upstream True WIND Banque de France G0008146 NaN NaN success Gold 2019-12-20 False +8 France: Treasury Bills Reference Rate: 10Y Daily France % 1987-01-02 2019-12-19 NaN NaN Upstream True WIND Banque de France G1400003 NaN NaN success Gold 2019-12-20 False +... +97 Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Fushun Daily China yuan/ton 2011-09-09 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118245 NaN NaN success NaN 2019-12-24 False +98 Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Ordos Daily China yuan/ton 2010-04-20 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118243 NaN NaN success NaN 2019-12-24 False +99 Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Anyang Daily China yuan/ton 2010-04-13 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118241 NaN NaN success NaN 2019-12-24 False diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt new file mode 100644 index 000000000..8c6bdf3cf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt @@ -0,0 +1,18 @@ +def func1(): + """ + First function. + + ``` + foo + ``` + """ + + +def func2(): + """ + Second function. + + ``` + foo + ``` + """ \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt new file mode 100644 index 000000000..3f4d616bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt @@ -0,0 +1,52 @@ +# Test created for __main__.plbck_sum. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestPlbckSum(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 0 + b = 1 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test2(self) -> None: + # Define input variables. + a = 1 + b = 2 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test3(self) -> None: + # Define input variables. + a = 2 + b = 3 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test4(self) -> None: + # Define input variables. + a = 3 + b = 4 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt new file mode 100644 index 000000000..3f4d616bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt @@ -0,0 +1,52 @@ +# Test created for __main__.plbck_sum. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestPlbckSum(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 0 + b = 1 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test2(self) -> None: + # Define input variables. + a = 1 + b = 2 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test3(self) -> None: + # Define input variables. + a = 2 + b = 3 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test4(self) -> None: + # Define input variables. + a = 3 + b = 4 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt new file mode 100644 index 000000000..1a2ceab1a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt @@ -0,0 +1,30 @@ +# Test created for __main__.plbck_sum. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestPlbckSum(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 0 + b = 1 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test2(self) -> None: + # Define input variables. + a = 1 + b = 2 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt new file mode 100644 index 000000000..b5439e39d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 3 + b = 2 + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = 5 + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt new file mode 100644 index 000000000..6631e9e27 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt @@ -0,0 +1,19 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = {"1": 2} + b = {"3": 4} + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt new file mode 100644 index 000000000..80e85048a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}) + b = pd.DataFrame.from_dict({'Price': [1, 1, 1, 1]}) + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = hpandas.df_to_str(actual, num_rows=None) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt new file mode 100644 index 000000000..1d91a4a88 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = {"1": ["a", 2]} + b = {"3": pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}), "4": {"5": 6}} + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = {"1": ["a", 2], "3": pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}), "4": {"5": 6}} + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt new file mode 100644 index 000000000..badcab6f7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = pd.Series(data=[10, 20, 15], index=RangeIndex(start=0, stop=3, step=1), name="N Numbers", dtype=int64) + b = pd.Series(data=[10.0, 0.0, 5.5], index=RangeIndex(start=0, stop=3, step=1), name="Z Numbers", dtype=float64) + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = hpandas.df_to_str(actual, num_rows=None) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt new file mode 100644 index 000000000..6b92491e1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt @@ -0,0 +1,23 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = pd.Series(data=[10, 20, 15], index=RangeIndex(start=0, stop=3, step=1), name="N Numbers", dtype=int64) + b = pd.Series(data=[10.0, 0.0, 5.5], index=RangeIndex(start=0, stop=3, step=1), name="Z Numbers", dtype=float64) + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = pd.Series(data=[20.0, 20.0, 20.5], index=RangeIndex(start=0, stop=3, step=1), name="None", dtype=float64) + expected = jsonpickle.decode(expected) + actual = hpandas.df_to_str(actual, num_rows=None) + expected = hpandas.df_to_str(expected, num_rows=None) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt new file mode 100644 index 000000000..403295821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt @@ -0,0 +1,19 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = cconfig.Config.from_python("Config({'meta': 'meta value 1', 'list': [1, 2]})") + b = cconfig.Config.from_python("Config({'meta': 'meta value 2'})") + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt new file mode 100644 index 000000000..5a0f6c938 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt @@ -0,0 +1,21 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = cconfig.Config.from_python("Config({'meta': 'meta value 1', 'list': [1, 2]})") + b = cconfig.Config.from_python("Config({'meta': 'meta value 2'})") + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = cconfig.Config.from_python("Config({'meta': 'meta value 2', 'list': [1, 2]})") + expected = jsonpickle.decode(expected) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt new file mode 100644 index 000000000..1884fe5bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt @@ -0,0 +1,15 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string_none. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckStringNone(hunitest.TestCase): + def test1(self) -> None: + # Call function to test. + actual = get_result_check_string_none() + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt new file mode 100644 index 000000000..710587bb8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt @@ -0,0 +1,17 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal_none. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqualNone(hunitest.TestCase): + def test1(self) -> None: + # Call function to test. + actual = get_result_assert_equal_none() + # Define expected output. + expected = "Some string." + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt new file mode 100644 index 000000000..40dc558c5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = "test" + b = "case" + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = "testcase" + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt new file mode 100644 index 000000000..68b93d84d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = [1, 2, 3] + b = [4, 5, 6] + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = [1, 2, 3, 4, 5, 6] + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt new file mode 100644 index 000000000..faa6861c2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = {"1": 2} + b = {"3": 4} + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = {"1": 2, "3": 4} + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt new file mode 100644 index 000000000..abfa197bd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt @@ -0,0 +1,22 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}) + b = pd.DataFrame.from_dict({'Price': [1, 1, 1, 1]}) + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = pd.DataFrame.from_dict({'Price': [701, 251, 801, 1201]}) + actual = hpandas.df_to_str(actual, num_rows=None) + expected = hpandas.df_to_str(expected, num_rows=None) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt new file mode 100644 index 000000000..f7fa7c8c9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt @@ -0,0 +1,23 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = r'{"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B98BAQ=="]]}' + a = jsonpickle.decode(a) + b = r'{"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B9wBAQ=="]]}' + b = jsonpickle.decode(b) + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = r'{"py/reduce": [{"py/type": "datetime.timedelta"}, {"py/tuple": [1096, 0, 0]}]}' + expected = jsonpickle.decode(expected) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt new file mode 100644 index 000000000..25588d901 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt @@ -0,0 +1,19 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 3 + b = 2 + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt new file mode 100644 index 000000000..cd51f2ced --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt @@ -0,0 +1,18 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = "test" + b = "case" + # Call function to test. + actual = get_result_check_string(a=a, b=b) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt new file mode 100644 index 000000000..c42805818 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt @@ -0,0 +1,19 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = [1, 2, 3] + b = [4, 5, 6] + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt new file mode 100644 index 000000000..8547d2955 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt @@ -0,0 +1,30 @@ +original shape=(5, 3) +Head: +{ + "0":{ + "id":1, + "column_1":1000.0, + "column_2":"test_string_1" + }, + "1":{ + "id":2, + "column_1":1001.0, + "column_2":"test_string_2" + }, + "2":{ + "id":3, + "column_1":1002.0, + "column_2":"test_string_3" + }, + "3":{ + "id":4, + "column_1":1003.0, + "column_2":"test_string_4" + }, + "4":{ + "id":5, + "column_1":1004.0, + "column_2":"test_string_5" + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt new file mode 100644 index 000000000..4f0f96902 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt @@ -0,0 +1 @@ +INSERT INTO test_table(id,column_1,column_2) VALUES %s diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt new file mode 100644 index 000000000..c5faf0358 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt @@ -0,0 +1,20 @@ +original shape=(3, 3) +Head: +{ + "0":{ + "id":1, + "column_1":1000.0, + "column_2":"test_string_1" + }, + "1":{ + "id":4, + "column_1":1002.0, + "column_2":"test_string_3" + }, + "2":{ + "id":5, + "column_1":1001.0, + "column_2":"test_string_2" + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt new file mode 100644 index 000000000..8547d2955 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt @@ -0,0 +1,30 @@ +original shape=(5, 3) +Head: +{ + "0":{ + "id":1, + "column_1":1000.0, + "column_2":"test_string_1" + }, + "1":{ + "id":2, + "column_1":1001.0, + "column_2":"test_string_2" + }, + "2":{ + "id":3, + "column_1":1002.0, + "column_2":"test_string_3" + }, + "3":{ + "id":4, + "column_1":1003.0, + "column_2":"test_string_4" + }, + "4":{ + "id":5, + "column_1":1004.0, + "column_2":"test_string_5" + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt new file mode 100644 index 000000000..8547d2955 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt @@ -0,0 +1,30 @@ +original shape=(5, 3) +Head: +{ + "0":{ + "id":1, + "column_1":1000.0, + "column_2":"test_string_1" + }, + "1":{ + "id":2, + "column_1":1001.0, + "column_2":"test_string_2" + }, + "2":{ + "id":3, + "column_1":1002.0, + "column_2":"test_string_3" + }, + "3":{ + "id":4, + "column_1":1003.0, + "column_2":"test_string_4" + }, + "4":{ + "id":5, + "column_1":1004.0, + "column_2":"test_string_5" + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt new file mode 100644 index 000000000..cd2308af6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt @@ -0,0 +1,65 @@ +# Dir structure +. +dummy_value_1=1 +dummy_value_1=1/dummy_value_2=A +dummy_value_1=1/dummy_value_2=A/data.parquet +dummy_value_1=2 +dummy_value_1=2/dummy_value_2=B +dummy_value_1=2/dummy_value_2=B/data.parquet +dummy_value_1=3 +dummy_value_1=3/dummy_value_2=C +dummy_value_1=3/dummy_value_2=C/data.parquet +# File signatures +len(file_names)=3 +file_names=dummy_value_1=1/dummy_value_2=A/data.parquet, dummy_value_1=2/dummy_value_2=B/data.parquet, dummy_value_1=3/dummy_value_2=C/data.parquet +# dummy_value_1=1/dummy_value_2=A/data.parquet +num_lines=13 +''' +original shape=(1, 1) +Head: +{ + "0":{ + "dummy_value_3":0 + } +} +Tail: +{ + "0":{ + "dummy_value_3":0 + } +} +''' +# dummy_value_1=2/dummy_value_2=B/data.parquet +num_lines=13 +''' +original shape=(1, 1) +Head: +{ + "0":{ + "dummy_value_3":0 + } +} +Tail: +{ + "0":{ + "dummy_value_3":0 + } +} +''' +# dummy_value_1=3/dummy_value_2=C/data.parquet +num_lines=13 +''' +original shape=(1, 1) +Head: +{ + "0":{ + "dummy_value_3":0 + } +} +Tail: +{ + "0":{ + "dummy_value_3":0 + } +} +''' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt new file mode 100644 index 000000000..ca3ab848c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt @@ -0,0 +1,4 @@ + description comment is_ok +0 hello Number of not submitted OMS child orders=0 / 7... True +1 hello2 ok True +is_ok=True \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt new file mode 100644 index 000000000..b0e7738bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt @@ -0,0 +1,4 @@ + description comment is_ok +0 hello Number of not submitted OMS child orders=0 / 7... True +1 hello2 not_ok False +is_ok=False \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt new file mode 100644 index 000000000..393449cf4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt @@ -0,0 +1,19 @@ +Some text before + + + +:::: +::::{.column width=40%} + + + +Middle text + + + +:::columns +::::{.column width=60%} + + + +Some text after \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt new file mode 100644 index 000000000..f3bdbccbf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt @@ -0,0 +1,5 @@ + + +:::: +::: + diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt new file mode 100644 index 000000000..d5e54b365 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt @@ -0,0 +1,9 @@ + +################################################################################ +* Failed assertion * +cond=False +df1.columns.difference(df2.columns)= +Index(['B'], dtype='object') +df2.columns.difference(df1.columns)= +Index(['C'], dtype='object') +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt new file mode 100644 index 000000000..464343e55 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt @@ -0,0 +1,9 @@ + +################################################################################ +* Failed assertion * +cond=False +df1.index.difference(df2.index)= +Index([1, 4], dtype='int64') +df2.index.difference(df1.index)= +Index([5, 6], dtype='int64') +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json new file mode 100644 index 000000000..1e4b47491 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json @@ -0,0 +1,10 @@ +{ + "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"10 + 15\", \"gpt-5-nano\"], \"kwargs\": {}}": [ + "25", + 3.195e-05 + ], + "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"2 + 3\", \"gpt-5-nano\"], \"kwargs\": {}}": [ + "5", + 3.195e-05 + ] +} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt new file mode 100644 index 000000000..dbd21a9a0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt @@ -0,0 +1,41 @@ + 0 +2010-01-31 NaN +2010-02-28 NaN +2010-03-31 NaN +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-06-30 NaN +2010-07-31 NaN +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 +2013-02-28 NaN +2013-03-31 NaN +2013-04-30 NaN diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt new file mode 100644 index 000000000..6e33e1427 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt @@ -0,0 +1,33 @@ + 0 +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt new file mode 100644 index 000000000..3a043159d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt @@ -0,0 +1,41 @@ + 0 +2010-01-31 NaN +2010-02-28 NaN +2010-03-31 NaN +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-06-30 0.146756 +2010-07-31 0.146756 +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 +2013-02-28 0.686501 +2013-03-31 0.686501 +2013-04-30 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt new file mode 100644 index 000000000..200d35c7a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt @@ -0,0 +1,38 @@ + 0 +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-06-30 0.146756 +2010-07-31 0.146756 +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 +2013-02-28 0.686501 +2013-03-31 0.686501 +2013-04-30 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt new file mode 100644 index 000000000..590e9e5f7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt @@ -0,0 +1,41 @@ + 0 +2010-01-31 0.000000 +2010-02-28 0.000000 +2010-03-31 0.000000 +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-06-30 0.000000 +2010-07-31 0.000000 +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 +2013-02-28 0.000000 +2013-03-31 0.000000 +2013-04-30 0.000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt new file mode 100644 index 000000000..9f8585df5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt @@ -0,0 +1,3 @@ +Consecutive headers increase by more than one level: + HeaderInfo(1, 'Chapter 1', 1) + HeaderInfo(3, 'Subsection 1.1.1', 6) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt new file mode 100644 index 000000000..ce0136250 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt @@ -0,0 +1 @@ +hello diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv new file mode 100644 index 000000000..0ddcc75ab --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv @@ -0,0 +1,5 @@ +col1,col2,col3 +a,a,a +b,b,b +c,,c +d,, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt new file mode 100644 index 000000000..4f8eb6107 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +cond=False +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt new file mode 100644 index 000000000..b31ec5ee2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt @@ -0,0 +1,6 @@ + +################################################################################ +* Failed assertion * +cond=False +hello +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt new file mode 100644 index 000000000..134e5b23c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt @@ -0,0 +1,6 @@ + +################################################################################ +* Failed assertion * +cond=False +hello world +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt new file mode 100644 index 000000000..f99e55fe0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +cond=False +Caught assertion while formatting message: +'not all arguments converted during string formatting' +hello %s world too_many +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt new file mode 100644 index 000000000..5ebc30e5f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +cond=False +Caught assertion while formatting message: +'not enough arguments for format string' +hello %s +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt new file mode 100644 index 000000000..c941ca91b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt @@ -0,0 +1 @@ +You passed '['hello']' or type '' instead of str diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt new file mode 100644 index 000000000..41b8447e3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +'1' +== +'2' +hello world +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt new file mode 100644 index 000000000..41b8447e3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +'1' +== +'2' +hello world +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt new file mode 100644 index 000000000..3bdf77365 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt @@ -0,0 +1,10 @@ + +################################################################################ +* Failed assertion * +'1' +== +'2' +Caught assertion while formatting message: +'not enough arguments for format string' +hello %s +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt new file mode 100644 index 000000000..5e9f4aa95 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt @@ -0,0 +1 @@ +issubclass() arg 2 must be a class, a tuple of classes, or a union \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt new file mode 100644 index 000000000..3eeaf0ce1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +Instance '' of class '_Man' is not a subclass of '' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt new file mode 100644 index 000000000..e5b23c85f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +Instance '' of class '_Man' is not a subclass of '' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt new file mode 100644 index 000000000..69b3f64e9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +val1=3 +[1, 2, 3] +val2=3 +[1, 2, 4] +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt new file mode 100644 index 000000000..11a472589 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +'a' in '['xyz']' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt new file mode 100644 index 000000000..bb58d202b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +'a' is 'None' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt new file mode 100644 index 000000000..fca016604 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +Instance of 'a' is '' instead of '' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt new file mode 100644 index 000000000..b377f94fe --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +Instance of 'a' is '' instead of '(, )' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt new file mode 100644 index 000000000..1c61bf06a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt @@ -0,0 +1,9 @@ + +################################################################################ +* Failed assertion * +val1= +[1, 2, 4, 3] +is not sorted +sorted(val1)= +[1, 2, 3, 4] +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt new file mode 100644 index 000000000..a13f9d582 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt @@ -0,0 +1,9 @@ + +################################################################################ +* Failed assertion * +val1= +[1, 2, 4, 3] +is not sorted +sorted(val1)= +[4, 3, 2, 1] +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt new file mode 100644 index 000000000..9fe19e631 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +val1= +[1, 3, 3] +has duplicates +3 +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt new file mode 100644 index 000000000..a1f1fdce9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt @@ -0,0 +1 @@ +date='2022-11-01' doesn't have the right format: time data '2022-11-01' does not match format '%Y%m%d' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt new file mode 100644 index 000000000..48cd44539 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt @@ -0,0 +1,28 @@ +################################################################################ +data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +2 7 8 9 +3 10 11 12 +################################################################################ +filters +################################################################################ +{'col_0': (('gt', 1), ('lt', 7)), 'col_1': ('eq', 5)} +################################################################################ +filtered_data +################################################################################ + col_0 col_1 col_2 +1 4 5 6 +################################################################################ +info +################################################################################ +OrderedDict([('nrows', 4), + ('n_col_0_gt_1', np.int64(3)), + ('perc_col_0_gt_1', '3 / 4 = 75.00%'), + ('n_col_0_lt_7', np.int64(2)), + ('perc_col_0_lt_7', '2 / 4 = 50.00%'), + ('n_col_1_eq_5', np.int64(1)), + ('perc_col_1_eq_5', '1 / 4 = 25.00%'), + ('nrows_remaining', np.int64(1))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt new file mode 100644 index 000000000..c935f88e6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt @@ -0,0 +1,28 @@ +################################################################################ +data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +2 7 8 9 +3 10 11 12 +################################################################################ +filters +################################################################################ +{'col_0': ('gt', 2), 'col_1': ('eq', 5)} +################################################################################ +filtered_data +################################################################################ + col_0 col_1 col_2 +1 4 5 6 +2 7 8 9 +3 10 11 12 +################################################################################ +info +################################################################################ +OrderedDict([('nrows', 4), + ('n_col_0_gt_2', np.int64(3)), + ('perc_col_0_gt_2', '3 / 4 = 75.00%'), + ('n_col_1_eq_5', np.int64(1)), + ('perc_col_1_eq_5', '1 / 4 = 25.00%'), + ('nrows_remaining', np.int64(3))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt new file mode 100644 index 000000000..456d06923 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt @@ -0,0 +1,26 @@ +################################################################################ +data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +################################################################################ +filters +################################################################################ +{'col_0': (1, 12), 'col_1': (2, 11), 'col_2': (3, 6)} +################################################################################ +filtered_data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +################################################################################ +info +################################################################################ +OrderedDict([('nrows', 2), + ('n_col_0', np.int64(1)), + ('perc_col_0', '1 / 2 = 50.00%'), + ('n_col_1', np.int64(1)), + ('perc_col_1', '1 / 2 = 50.00%'), + ('n_col_2', np.int64(2)), + ('perc_col_2', '2 / 2 = 100.00%'), + ('nrows_remaining', np.int64(1))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt new file mode 100644 index 000000000..ae70053b9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt @@ -0,0 +1,27 @@ +################################################################################ +data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +################################################################################ +filters +################################################################################ +{'col_0': (1, 12), 'col_1': (2, 11), 'col_2': (3, 6)} +################################################################################ +filtered_data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +################################################################################ +info +################################################################################ +OrderedDict([('nrows', 2), + ('n_col_0', np.int64(1)), + ('perc_col_0', '1 / 2 = 50.00%'), + ('n_col_1', np.int64(1)), + ('perc_col_1', '1 / 2 = 50.00%'), + ('n_col_2', np.int64(2)), + ('perc_col_2', '2 / 2 = 100.00%'), + ('nrows_remaining', np.int64(2))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt new file mode 100644 index 000000000..a947c3402 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt @@ -0,0 +1 @@ +A fake check_string output to use for test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt new file mode 100644 index 000000000..62b216ee4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt @@ -0,0 +1 @@ +A fake check_string output to use for test2 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt new file mode 100644 index 000000000..3b18e512d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt @@ -0,0 +1 @@ +hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt new file mode 100644 index 000000000..3b18e512d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt @@ -0,0 +1 @@ +hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt new file mode 100644 index 000000000..3b18e512d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt @@ -0,0 +1 @@ +hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt new file mode 100644 index 000000000..3b18e512d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt @@ -0,0 +1 @@ +hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv new file mode 100644 index 000000000..abc3dac80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv @@ -0,0 +1,2 @@ +A,B,C,D,E +1,2.3456,c,d,78 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types new file mode 100644 index 000000000..81816c1d2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types @@ -0,0 +1 @@ +{'A': 'int64', 'B': 'float64', 'C': 'object', 'D': 'object', 'E': 'int64'} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt new file mode 100644 index 000000000..4a3a582fe --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt @@ -0,0 +1,58 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt new file mode 100644 index 000000000..47371468a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt @@ -0,0 +1,58 @@ +stage='prod', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs={'/data/shared': '/shared_data'}, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=0 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + - /data/shared:/shared_data + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt new file mode 100644 index 000000000..5ebe91b26 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt @@ -0,0 +1,60 @@ +stage='prod', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=0 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} +networks: + default: + name: main_network diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt new file mode 100644 index 000000000..eb8d4824a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt @@ -0,0 +1,57 @@ +stage='dev', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=0 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + - ../../:/app + environment: + - MYPYPATH + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt new file mode 100644 index 000000000..2c9d5ecf0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt @@ -0,0 +1,56 @@ +stage='dev', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=0 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + - CSFY_CI=$CSFY_CI + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + - ../../../:/app + environment: + - MYPYPATH + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt new file mode 100644 index 000000000..9ba5c60c9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt @@ -0,0 +1,63 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_GIT_ROOT_PATH=/app + - CSFY_HELPERS_ROOT_PATH=/app/helpers_root + - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /data/dummy/src/cmamp1:/app + working_dir: /app + linter: + extends: base_app + volumes: + - /data/dummy/src/cmamp1:/src + working_dir: /src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt new file mode 100644 index 000000000..91e37ffc3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt @@ -0,0 +1,63 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_GIT_ROOT_PATH=/app + - CSFY_HELPERS_ROOT_PATH=/app + - CSFY_USE_HELPERS_AS_NESTED_MODULE=0 + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /data/dummy/src/helpers1:/app + working_dir: /app + linter: + extends: base_app + volumes: + - /data/dummy/src/helpers1:/src + working_dir: /src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt new file mode 100644 index 000000000..a16d2f133 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt @@ -0,0 +1,63 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_GIT_ROOT_PATH=/app + - CSFY_HELPERS_ROOT_PATH=/app/helpers_root + - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /data/dummy/src/cmamp1:/app + working_dir: /app/ck.infra + linter: + extends: base_app + volumes: + - /data/dummy/src/cmamp1/ck.infra:/src + working_dir: /src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt new file mode 100644 index 000000000..b4afb6c80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt @@ -0,0 +1,63 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_GIT_ROOT_PATH=/app + - CSFY_HELPERS_ROOT_PATH=/app/amp/helpers_root + - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /data/dummy/src/orange1:/app + working_dir: /app + linter: + extends: base_app + volumes: + - /data/dummy/src/orange1:/src + working_dir: /src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl new file mode 100644 index 0000000000000000000000000000000000000000..25ffea79afb3dad6014da937fd8ff7c64cfbb55f GIT binary patch literal 405 zcmbu(u}TCn5C-5=POmty^Br!R#KqQ5tptU`K4g&`x>)b*xwLl zAPnC>A16QGM~haxHX(s9!SC>iPp{kDcc1UPQuAlsa1IP%@R`>S$wC5qgNF9qcEih^ zM)$?XXzv!h9D($~l)kwaw@T#&&7%2;aOs%O@yz+ry2;oJv-TCg^Z#rJt zOv+;7YpG+DdW`ZPBmWrrRNJ=rhZ*Uc+e;dwsBxOiGhf16goGn_QVDW#6MA: + a='False' + b='hello' + c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt new file mode 100644 index 000000000..7aad26473 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False, b=hello, c=3.14) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt new file mode 100644 index 000000000..d491215bd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False , b=hello , c=3.14 ) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt new file mode 100644 index 000000000..b5e297083 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt @@ -0,0 +1,12 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False, b=hello, c=3.14, hello=. at 0x>) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' + hello='. at 0x>' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt new file mode 100644 index 000000000..b69634f84 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt @@ -0,0 +1,12 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False, b=hello, c=3.14, _hello=under) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' + _hello='under' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt new file mode 100644 index 000000000..332cd0a1d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt @@ -0,0 +1,12 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False, b=hello, c=3.14, _Object1__hello=double_dunder) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' + _Object1__hello='double_dunder' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt new file mode 100644 index 000000000..28193b95d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt new file mode 100644 index 000000000..fccd31195 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(object2=_Object2 at 0x=(x=True, y=world, z=6.28), p=p, q=q) +################################################################################ +repr: +################################################################################ +: + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' + p='p' + q='q' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt new file mode 100644 index 000000000..08aebee19 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p , q=q , object2=_Object2 at 0x=(x=True, y=world, z=6.28) ) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt new file mode 100644 index 000000000..28193b95d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt new file mode 100644 index 000000000..28193b95d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt new file mode 100644 index 000000000..28193b95d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt new file mode 100644 index 000000000..14a9380bb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt @@ -0,0 +1 @@ +xdg-open a.html diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt new file mode 100644 index 000000000..b0047fa49 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt @@ -0,0 +1 @@ +None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt new file mode 100644 index 000000000..b0047fa49 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt @@ -0,0 +1 @@ +None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt new file mode 100644 index 000000000..b0047fa49 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt @@ -0,0 +1 @@ +None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt new file mode 100644 index 000000000..34d8d7aa1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt @@ -0,0 +1,16 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + ```python + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + ``` +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt new file mode 100644 index 000000000..38f3146a7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt @@ -0,0 +1,20 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + + + ```python + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + ``` + + +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt new file mode 100644 index 000000000..34d8d7aa1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt @@ -0,0 +1,16 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + ```python + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + ``` +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt new file mode 100644 index 000000000..dacb761b7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt @@ -0,0 +1,20 @@ +0:- Functions can be declared in the body of another function +1:- E.g., to hide utility functions in the scope of the function that uses them +2: + +3: ```python +4: def print_integers(values): +5: +6: def _is_integer(value): +7: try: +8: return value == int(value) +9: except: +10: return False +11: +12: for v in values: +13: if _is_integer(v): +14: print(v) +15: ``` +16: + +17:- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt new file mode 100644 index 000000000..52f34afc3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt @@ -0,0 +1,2 @@ +$AM_AWS_S3_BUCKET = $AM_AWS_S3_BUCKET +$CSFY_AWS_S3_BUCKET = $CSFY_AWS_S3_BUCKET diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed new file mode 100644 index 000000000..0850990c3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed @@ -0,0 +1,12 @@ +{ + "dev_scripts/testing/test/test_run_tests.py": true, + "dev_scripts/testing/test/test_run_tests2.py": true, + "helpers/test/test_printing.py::Test_dedent1::test1": true, + "helpers/test/test_printing.py::Test_dedent1::test2": true, + "helpers/test/test_printing.py::Test_dedent2::test1": true, + "documentation/scripts/test/test_all.py": true, + "documentation/scripts/test/test_render_md.py": true, + "helpers/test/helpers/test/test_list.py::Test_list_1": true, + "helpers/test/helpers/test/test_list.py::Test_list_2": true, + "helpers/test/test_cache.py::TestAmpTask1407": true +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt new file mode 100644 index 000000000..61323668a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt @@ -0,0 +1,15 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 10 failed pytest 'tests' target(s); to reproduce run: +pytest dev_scripts/testing/test/test_run_tests.py +pytest dev_scripts/testing/test/test_run_tests2.py +pytest documentation/scripts/test/test_all.py +pytest documentation/scripts/test/test_render_md.py +pytest helpers/test/helpers/test/test_list.py::Test_list_1 +pytest helpers/test/helpers/test/test_list.py::Test_list_2 +pytest helpers/test/test_cache.py::TestAmpTask1407 +pytest helpers/test/test_printing.py::Test_dedent1::test1 +pytest helpers/test/test_printing.py::Test_dedent1::test2 +pytest helpers/test/test_printing.py::Test_dedent2::test1 +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt new file mode 100644 index 000000000..9e66e81bd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt @@ -0,0 +1,325 @@ +============================= test session starts ============================== +platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 +cachedir: .pytest_cache +rootdir: /app, configfile: pytest.ini +plugins: flaky-3.7.0, timeout-2.0.2, rerunfailures-10.2, cov-3.0.0, instafail-0.4.2, xdist-2.5.0, forked-1.4.0 +collecting ... >>ENV<<: is_inside_container=True: code_version=1.0.6, container_version=1.0.6, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=False AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True +# Git + branch_name='CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests' + hash='ca2dbf510' + # Last commits: + * ca2dbf510 Sonya Nikiforova Merge branch 'master' into CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests ( 2 hours ago) Mon Feb 14 16:25:29 2022 (HEAD -> CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests, origin/CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests) + |\ + * | 63a471cca sonniki CmTask695: Update for reproducibility ( 2 hours ago) Mon Feb 14 16:15:14 2022 + | * 0d236ad57 Nikola Jašek CMTask1103: Add tests for HistoricalPqByTileClient (#1176) ( 2 hours ago) Mon Feb 14 16:01:56 2022 (origin/master, origin/HEAD) +# Machine info + system=Linux + node name=61ceebd0998a + release=5.11.0-1028-aws + version=#31~20.04.1-Ubuntu SMP Fri Jan 14 14:37:50 UTC 2022 + machine=x86_64 + processor=x86_64 + cpu count=8 + cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) + memory=svmem(total=33295769600, available=23499386880, percent=29.4, used=9048117248, free=17212899328, active=2693218304, inactive=12081451008, buffers=651313152, cached=6383439872, shared=286130176, slab=934486016) + disk usage=sdiskusage(total=104021790720, used=40223850496, free=63781163008, percent=38.7) +# Packages + python: 3.8.10 + gluonnlp: ? + gluonts: 0.6.7 + joblib: 1.1.0 + mxnet: 1.9.0 + numpy: 1.22.0 + pandas: 1.3.5 + pyarrow: 6.0.1 + scipy: 1.6.1 + seaborn: 0.11.2 + sklearn: 1.0.2 + statsmodels: 0.13.1 +INFO: > cmd='/venv/bin/pytest datapull/common/data/client/test/test_historical_pq_clients.py' +INFO: Saving log to file 'tmp.pytest.log' +collected 9 items + +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 (1.14 s) PASSED [ 11%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_start_ts_for_symbol1 (1.05 s) PASSED [ 22%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_universe1 (0.00 s) PASSED [ 33%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 (1.26 s) FAILED [ 44%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 (1.44 s) FAILED [ 55%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 (1.09 s) FAILED [ 66%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 (0.95 s) FAILED [ 77%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 (0.86 s) FAILED [ 88%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data6 (1.05 s) PASSED [100%] + +=================================== FAILURES =================================== +________________ TestHistoricalPqByTileClient1.test_read_data1 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 89, in test_read_data1 + self._test_read_data1( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 44, in _test_read_data1 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data1 +-------------------------------------------------------------------------------- + +# df= ( +index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(4320, 4) | df.shape=(4320, 4) +full_symbol close year month ( +timestamp ( +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( +2021-12-30 00:02:00+00:00 1467591036 2 2021 12 ( +... ( +2022-01-01 23:57:00+00:00 1467591036 4317 2022 1 ( +2022-01-01 23:58:00+00:00 1467591036 4318 2022 1 ( +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data1/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data1/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data1 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month +shape=(4320, 4) + full_symbol close year month +timestamp +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 +2021-12-30 00:02:00+00:00 1467591036 2 2021 12 +... +2022-01-01 23:57:00+00:00 1467591036 4317 2022 1 +2022-01-01 23:58:00+00:00 1467591036 4318 2022 1 +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1""" +________________ TestHistoricalPqByTileClient1.test_read_data2 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 132, in test_read_data2 + self._test_read_data2( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 61, in _test_read_data2 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data2 +-------------------------------------------------------------------------------- + +# df= ( +index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(8640, 4) | df.shape=(8640, 4) +full_symbol close year month ( +timestamp ( +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( +2021-12-30 00:00:00+00:00 1508924190 0 2021 12 ( +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( +... ( +2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 ( +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( +2022-01-01 23:59:00+00:00 1508924190 4319 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data2/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data2/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data2 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month +shape=(8640, 4) + full_symbol close year month +timestamp +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 +2021-12-30 00:00:00+00:00 1508924190 0 2021 12 +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 +... +2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 +2022-01-01 23:59:00+00:00 1508924190 4319 2022 1""" +________________ TestHistoricalPqByTileClient1.test_read_data3 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 176, in test_read_data3 + self._test_read_data3( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 79, in _test_read_data3 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data3 +-------------------------------------------------------------------------------- + +# df= ( +index=[2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(2640, 4) | df.shape=(2640, 4) +full_symbol close year month ( +timestamp ( +2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 ( +2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 ( +2022-01-01 02:01:00+00:00 1467591036 3001 2022 1 ( +... ( +2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 ( +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( +2022-01-01 23:59:00+00:00 1508924190 4319 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data3/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data3/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data3 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month +shape=(2640, 4) + full_symbol close year month +timestamp +2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 +2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 +2022-01-01 02:01:00+00:00 1467591036 3001 2022 1 +... +2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 +2022-01-01 23:59:00+00:00 1508924190 4319 2022 1""" +________________ TestHistoricalPqByTileClient1.test_read_data4 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 221, in test_read_data4 + self._test_read_data4( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 97, in _test_read_data4 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data4 +-------------------------------------------------------------------------------- + +# df= ( +index=[2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(6002, 4) | df.shape=(6002, 4) +full_symbol close year month ( +timestamp ( +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( +2021-12-30 00:00:00+00:00 1508924190 0 2021 12 ( +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( +... ( +2022-01-01 01:59:00+00:00 1508924190 2999 2022 1 ( +2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 ( +2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data4/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data4/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data4 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] +columns=full_symbol,close,year,month +shape=(6002, 4) + full_symbol close year month +timestamp +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 +2021-12-30 00:00:00+00:00 1508924190 0 2021 12 +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 +... +2022-01-01 01:59:00+00:00 1508924190 2999 2022 1 +2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 +2022-01-01 02:00:00+00:00 1508924190 3000 2022 1""" +________________ TestHistoricalPqByTileClient1.test_read_data5 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 267, in test_read_data5 + self._test_read_data5( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 114, in _test_read_data5 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data5 +-------------------------------------------------------------------------------- + +# df= ( +index=[2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] | df.index in [2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(242, 4) | df.shape=(242, 4) +full_symbol close year month ( +timestamp ( +2021-12-31 23:00:00+00:00 1467591036 2820 2021 12 ( +2021-12-31 23:00:00+00:00 1508924190 2820 2021 12 ( +2021-12-31 23:01:00+00:00 1467591036 2821 2021 12 ( +... ( +2022-01-01 00:59:00+00:00 1508924190 2939 2022 1 ( +2022-01-01 01:00:00+00:00 1467591036 2940 2022 1 ( +2022-01-01 01:00:00+00:00 1508924190 2940 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data5/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data5/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data5 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] +columns=full_symbol,close,year,month +shape=(242, 4) + full_symbol close year month +timestamp +2021-12-31 23:00:00+00:00 1467591036 2820 2021 12 +2021-12-31 23:00:00+00:00 1508924190 2820 2021 12 +2021-12-31 23:01:00+00:00 1467591036 2821 2021 12 +... +2022-01-01 00:59:00+00:00 1508924190 2939 2022 1 +2022-01-01 01:00:00+00:00 1467591036 2940 2022 1 +2022-01-01 01:00:00+00:00 1508924190 2940 2022 1""" +============================= slowest 3 durations ============================== +1.44s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 +1.26s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 +1.14s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 +=========================== short test summary info ============================ +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 +========================= 5 failed, 4 passed in 10.94s ========================= diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt new file mode 100644 index 000000000..c297aad27 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt @@ -0,0 +1,10 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 5 failed pytest 'tests' target(s); to reproduce run: +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt new file mode 100644 index 000000000..8c9d7793d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt @@ -0,0 +1,10 @@ +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1175206Z SKIPPED [1] core/statistics/test/test_requires_statsmodels.py:315: cmamp #654. +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1175722Z SKIPPED [1] config_root/config/test/test_config.py:325: See AmpTask1573 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1176275Z XFAIL core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1176859Z XFAIL core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1177550Z FAILED dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1178650Z FAILED dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1179474Z FAILED dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 - Na... +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1180384Z ^[[31m= ^[[31m^[[1m3 failed^[[0m, ^[[32m1511 passed^[[0m, ^[[33m155 skipped^[[0m, ^[[33m60 deselected^[[0m, ^[[33m2 xfailed^[[0m, ^[[33m1 rerun^[[0m^[[31m in 211.15s (0:03:31)^[[0m^[[31m =^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1367972Z 11:53:07 @ 2022-02-19 06:51:34 - ^[[36mINFO ^[[0m hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=28.0 KB +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1381857Z 11:53:07 @ 2022-02-19 06:51:34 - ^[[33mWARN ^[[0m hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt new file mode 100644 index 000000000..e16188c74 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt @@ -0,0 +1,8 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 +pytest dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 +pytest dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt new file mode 100644 index 000000000..58f583b0e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt @@ -0,0 +1,61 @@ +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0521158Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_daily1 (0.03 s) ^[[32mPASSED^[[0m^[[31m [ 99%]^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0932903Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) ^[[32mPASSED^[[0m^[[31m [100%]^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0933619Z +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0933865Z =================================== FAILURES =================================== +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0934800Z ^[[31m^[[1m_____________________ TestRealTimeMvnReturnsWithOms1.test1 _____________________^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0935555Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0936347Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 388, in test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0937188Z market_data = self.get_market_data(event_loop) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0938027Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 325, in get_market_data +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0939155Z df = self.get_market_data_df() +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0939988Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 310, in get_market_data_df +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0940754Z df = node.fit()["df_out"] +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0941392Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0941905Z self._lazy_load(fit=True) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0942562Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0943252Z rets = self._generate_returns(fit) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0943957Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0944732Z vol = cofinanc.compute_annualized_volatility(avg_rets) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0945561Z NameError: name 'cofinanc' is not defined +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0959832Z ^[[31m^[[1m____________________ TestMultivariateNormalDataSource.test1 ____________________^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0961700Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0968475Z File "/app/dataflow/core/nodes/test/test_sources.py", line 175, in test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0970838Z df = node.fit()["df_out"] +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0972952Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0973577Z self._lazy_load(fit=True) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0974176Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0976810Z rets = self._generate_returns(fit) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0977529Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0978880Z vol = cofinanc.compute_annualized_volatility(avg_rets) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0981739Z NameError: name 'cofinanc' is not defined +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0982702Z ^[[31m^[[1m_________________________ TestMvnReturnsBuilder.test1 __________________________^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0985191Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0985837Z File "/app/dataflow/core/test/test_builders.py", line 74, in test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0986469Z result_bundle = dag_runner.fit() +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0987113Z File "/app/dataflow/core/dag_runner.py", line 170, in fit +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0987711Z return self._run_dag(method) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0988321Z File "/app/dataflow/core/dag_runner.py", line 181, in _run_dag +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0988936Z df_out, info = self._run_dag_helper(method) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0989566Z File "/app/dataflow/core/dag_runner.py", line 110, in _run_dag_helper +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0990221Z df_out = self.dag.run_leq_node(nid, method)["df_out"] +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0991397Z File "/app/dataflow/core/dag.py", line 428, in run_leq_node +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0992521Z self._run_node(id_, pred_nid, method) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0993298Z File "/app/dataflow/core/dag.py", line 593, in _run_node +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0993800Z output = getattr(node, method)(**kwargs) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0994361Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0994834Z self._lazy_load(fit=True) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0995336Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0995859Z rets = self._generate_returns(fit) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0996779Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0997405Z vol = cofinanc.compute_annualized_volatility(avg_rets) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0998205Z NameError: name 'cofinanc' is not defined +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1048290Z ============================= slowest 3 durations ============================== +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1048893Z 26.48s setup oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1049478Z 8.44s call helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1050189Z 5.32s setup dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1116212Z =========================== short test summary info ============================ +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z SKIPPED [1] test/test_tasks.py:68: Test needs to be run outside Docker +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119804Z SKIPPED [1] test/test_tasks.py:60: Test needs to be run outside Docker +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt new file mode 100644 index 000000000..bc2ab8612 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt @@ -0,0 +1,61 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 +pytest dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 +pytest dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 + +# TestRealTimeMvnReturnsWithOms1.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 388, in test1 + market_data = self.get_market_data(event_loop) + File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 325, in get_market_data + df = self.get_market_data_df() + File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 310, in get_market_data_df + df = node.fit()["df_out"] + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit + self._lazy_load(fit=True) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load + rets = self._generate_returns(fit) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns + vol = cofinanc.compute_annualized_volatility(avg_rets) +NameError: name 'cofinanc' is not defined +^[[31m^[[1m__________________ + +# TestMultivariateNormalDataSource.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/core/nodes/test/test_sources.py", line 175, in test1 + df = node.fit()["df_out"] + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit + self._lazy_load(fit=True) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load + rets = self._generate_returns(fit) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns + vol = cofinanc.compute_annualized_volatility(avg_rets) +NameError: name 'cofinanc' is not defined +^[[31m^[[1m_______________________ + +# TestMvnReturnsBuilder.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/core/test/test_builders.py", line 74, in test1 + result_bundle = dag_runner.fit() + File "$GIT_ROOT/dataflow/core/dag_runner.py", line 170, in fit + return self._run_dag(method) + File "$GIT_ROOT/dataflow/core/dag_runner.py", line 181, in _run_dag + df_out, info = self._run_dag_helper(method) + File "$GIT_ROOT/dataflow/core/dag_runner.py", line 110, in _run_dag_helper + df_out = self.dag.run_leq_node(nid, method)["df_out"] + File "$GIT_ROOT/dataflow/core/dag.py", line 428, in run_leq_node + self._run_node(id_, pred_nid, method) + File "$GIT_ROOT/dataflow/core/dag.py", line 593, in _run_node + output = getattr(node, method)(**kwargs) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit + self._lazy_load(fit=True) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load + rets = self._generate_returns(fit) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns + vol = cofinanc.compute_annualized_volatility(avg_rets) +NameError: name 'cofinanc' is not defined + +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt new file mode 100644 index 000000000..b0f4950ce --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt @@ -0,0 +1,36 @@ +amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] + +=================================== FAILURES =================================== +__________________________ TestE8c_ModelBuilder.test1 __________________________ +Traceback (most recent call last): + File "/app/dataflow/pipelines/E8/test/test_E8c_pipeline.py", line 79, in test1 + self.check_string(actual) + File "/app/amp/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "/app/amp/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '/app/dataflow/pipelines/E8/test/TestE8c_ModelBuilder.test1/output/test.txt.tmp' +################################################################################ + +__________________________ TestE8a_ModelBuilder.test1 __________________________ +Traceback (most recent call last): + File "/app/dataflow/pipelines/E8/test/test_E8a_pipeline.py", line 72, in test1 + self.check_string(actual) + File "/app/amp/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "/app/amp/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '/app/dataflow/pipelines/E8/test/TestE8a_ModelBuilder.test1/output/test.txt.tmp' +################################################################################ + +============================= slowest 3 durations ============================== +10.36s call dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 +7.77s call dataflow/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit +7.31s call dataflow/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit +=========================== short test summary info ============================ +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_ModelBuilder::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt new file mode 100644 index 000000000..063e0af62 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt @@ -0,0 +1,36 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 2 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_ModelBuilder::test1 +pytest dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 + +# TestE8a_ModelBuilder.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/pipelines/E8/test/test_E8a_pipeline.py", line 72, in test1 + self.check_string(actual) + File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow/pipelines/E8/test/TestE8a_ModelBuilder.test1/output/test.txt.tmp' +################################################################################ + +# TestE8c_ModelBuilder.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/pipelines/E8/test/test_E8c_pipeline.py", line 79, in test1 + self.check_string(actual) + File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow/pipelines/E8/test/TestE8c_ModelBuilder.test1/output/test.txt.tmp' +################################################################################ + +________________________ + +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt new file mode 100644 index 000000000..a2ee5ad54 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt @@ -0,0 +1,2533 @@ +INFO: > cmd='/local/home/gsaggese/src/venv/amp.client_venv/bin/invoke run_fast_slow_superslow_tests' +>>ENV<<: is_inside_container=False: code_version=1.0.3, container_version=None, is_inside_docker=False, is_inside_ci=False, CI_defined=False, CSFY_CI='nan' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=False +## run_fast_slow_superslow_tests:  +## run_fast_tests:  +15:12:49 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"' +IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"'  +WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. +WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. +WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. +WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. +Creating compose_app_run ... + + +Creating compose_app_run ... done +##> devops/docker_run/entrypoint.sh +UID=0 +GID=0 +# Activate environment +##> devops/docker_build/entrypoint/patch_environment_variables.sh +# Set PATH +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +# Set PYTHONPATH +PYTHONPATH=/app/amp:/app: +# Configure env +Testing sudo +/app +Setting up Docker +{ "storage-driver": "vfs" } + * Starting Docker: docker  +[ OK ] + * Docker is running +# Check AWS authentication setup +AWS_DEFAULT_REGION='us-east-1' + Name Value Type Location + ---- ----- ---- -------- + profile am manual --profile +access_key ****************3J32 shared-credentials-file +secret_key ****************QpHW shared-credentials-file + region us-east-1 env AWS_DEFAULT_REGION +CONTAINER_VERSION='' +BUILD_TAG='' +which python: /venv/bin/python +python -V: Python 3.8.10 +docker -v: Docker version 20.10.12, build e91ed57 +docker-compose -v: docker-compose version 1.25.0, build unknown +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +PYTHONPATH=/app/amp:/app: +entrypoint.sh: 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"' +============================= test session starts ============================== +platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 +cachedir: .pytest_cache +rootdir: /app, configfile: pytest.ini +plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 +timeout: 5.0s +timeout method: signal +timeout func_only: True +collecting ...  +collecting 0 items  +collecting 0 items  +collecting 67 items  +collecting 70 items  +collecting 230 items  +collecting 548 items  +collecting 622 items  +collecting 801 items  +collecting 1084 items  +collecting 1419 items  +collecting 1775 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True +----------------------------------------------------------------------------- +This code is not in sync with the container: +code_version='1.0.3' != container_version='amp-1.0.3' +----------------------------------------------------------------------------- +You need to: +- merge origin/master into your branch with `invoke git_merge_master` +- pull the latest container with `invoke docker_pull` +# Git + branch_name='AmpTask2163_Implement_tiled_backtesting_5' + hash='29bdaf1' + # Last commits: + * 29bdaf1 saggese Lint ( 3 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) + * c26c937 saggese Checkpoint ( 3 minutes ago) Mon Mar 7 20:09:34 2022 + * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) +# Machine info + system=Linux + node name=5f6da4732626 + release=3.10.0-1160.36.2.el7.x86_64 + version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 + machine=x86_64 + processor=x86_64 + cpu count=8 + cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) + memory=svmem(total=66548252672, available=51710918656, percent=22.3, used=11804581888, free=14433091584, active=30353010688, inactive=18354896896, buffers=0, cached=40310579200, shared=2491396096, slab=2053443584) + disk usage=sdiskusage(total=107362627584, used=32545419264, free=74817208320, percent=30.3) +# Packages + python: 3.8.10 + gluonnlp: ? + gluonts: 0.6.7 + joblib: 1.1.0 + mxnet: 1.8.0 + numpy: 1.21.1 + pandas: 1.3.4 + pyarrow: 6.0.1 + scipy: 1.6.1 + seaborn: 0.11.2 + sklearn: 1.0.1 + statsmodels: 0.13.1 +INFO: > cmd='/venv/bin/pytest -m not slow and not superslow . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun Failed: Timeout' +INFO: Saving log to file 'tmp.pytest.log' + +collected 1874 items / 81 deselected / 1793 selected  + +amp/dataflow/model/test/test_experiment_utils.py::Test_get_configs_from_command_line1::test1 (0.02 s) PASSED [ 0%] +amp/core/finance/test/test_prediction_processing.py::TestStackPredictionDf::test1 (0.03 s) PASSED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call1 SKIPPED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call2 SKIPPED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call3 SKIPPED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call4 SKIPPED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_parser SKIPPED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_get_df1 (0.01 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_read_with_filter1 (0.03 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_everything1 (0.02 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_one_column1 (0.02 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_two_columns1 (0.02 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_merge1 (0.08 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read1 (0.05 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read2 (0.06 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read3 (0.03 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read4 (0.02 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_full1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_half1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_half2 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_invalid1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_invalid2 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_one_year1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_one_year2 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_over_two_years1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_two_years1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns2 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns3 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns4 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_get_test_data1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_to_partitioned_dataset SKIPPED [ 1%] +amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_to_partitioned_dataset_wrong_column (0.00 s) PASSED [ 1%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test1 (0.03 s) PASSED [ 1%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test2 (0.02 s) PASSED [ 1%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test3 (0.02 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test4 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test5 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test6 (0.02 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test7 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test8 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexSkLearnModel::test1 (0.07 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexSkLearnModel::test2 (0.10 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexPooledSkLearnModel::test1 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexPooledSkLearnModel::test2 (0.08 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_end_ts_for_symbol1 (0.21 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_universe1 (0.00 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data1 (0.05 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data2 (0.07 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data3 (0.08 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data4 (0.08 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data5 (0.08 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_unadjusted_data5 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_parquet_data2 (0.06 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_parquet_data5 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_metadata1 (0.11 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_universe1 (0.00 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data1 (0.05 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data2 (0.07 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data3 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data4 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data5 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_expiry_data5 (0.09 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_data2 (0.06 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_data5 (0.07 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_expiry_data5 (0.07 s) PASSED [ 3%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_mixed_constraints SKIPPED [ 3%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_only_gmv_constraint SKIPPED [ 3%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_restrictions SKIPPED [ 4%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_short_ban SKIPPED [ 4%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer2::test1 SKIPPED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse1 (0.00 s) PASSED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse2 (0.38 s) PASSED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse3 (0.00 s) PASSED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse4 (0.00 s) PASSED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse_empty_traceback1 (0.00 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestGhLogin1::test_gh_login (0.23 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_images_ls_repo (0.56 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_kill_all SKIPPED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_kill_last SKIPPED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_ps (0.21 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_stats SKIPPED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean (0.22 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_fetch_master (0.22 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_pull (0.21 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_images_ls_repo (0.36 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_kill_all SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_kill_last SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_login (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_ps (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_pull (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_stats SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_find_test_class1 (0.14 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr1 SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr2 SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr3 SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_issue_title (0.42 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_workflow_list SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_files (0.15 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_clean (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_clean2 (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create3 (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_fetch_master (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_merge_master (0.08 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_pull (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint1 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint2 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint3 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_print_setup (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title1 (0.47 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title3 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title4 (0.44 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRemoveSpaces1::test1 (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash1 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash2 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash3 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash4 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash5 (0.02 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_jupyter1 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests1 (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests2 (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests4 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests5 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class1 (0.14 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class2 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class3 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_decorator1 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_decorator2 SKIPPED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_files1 (0.09 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_files2 (0.09 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_diff_files_abort1 (0.16 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_branch1 (0.33 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_files1 (0.23 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_last_commit1 (0.37 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_modified1 (0.44 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_parse_linter_output1::test1 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_parse_linter_output1::test2 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test1 (0.15 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2 (0.14 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert1 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert3 (0.07 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_branch1 SKIPPED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files1 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files3 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_last_commit1 (0.03 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_modified1 (0.07 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_classes1 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_classes2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_files1 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_files2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_tests1 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_tests2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test1 (0.22 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test2 (0.22 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test3 (0.22 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test4 (0.22 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test5 (0.22 s) PASSED [ 9%] +amp/helpers/test/test_lib_tasks.py::TestFailing::test_failing (0.00 s) PASSED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data2 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input1 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input2 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input3 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input4 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_order_book SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_order_book_invalid_input1 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_get_exchange_currency_pairs SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_initialize_class SKIPPED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_1 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_2 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_3 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_2tiles_1 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_2tiles_2 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_3tiles_1 (0.00 s) PASSED [ 9%] +amp/helpers/test/test_hpandas.py::Test_dassert_is_unique1::test_dassert_is_unique1 (0.00 s) PASSED [ 9%] +amp/helpers/test/test_hpandas.py::Test_dassert_is_unique1::test_dassert_is_unique2 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_to_series1::test1 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_to_series1::test2 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_to_series1::test3 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df1 (0.02 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df2 (0.02 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df3 (0.02 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df4 (0.01 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types1 (0.01 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types2 (0.01 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types3 (0.01 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str1 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str2 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str3 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str4 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str5 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_datetime (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_timestamp (0.00 s) PASSED [ 11%] +amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_uuid (0.00 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_universe1 (0.00 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data2 (0.06 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data3 (0.07 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data4 (0.06 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data5 (0.07 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data6 (0.00 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_universe1 (0.00 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data2 (0.06 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data3 (0.07 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data4 (0.06 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data5 (0.07 s) PASSED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data6 (0.00 s) PASSED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_end_ts_for_symbol1 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_start_ts_for_symbol1 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_universe1 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data1 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data2 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data3 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data4 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data5 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data6 SKIPPED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_infs (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_zero_in_bin_interior_false (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_zero_in_bin_interior_true (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_digitize1::test1 (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_digitize1::test_heaviside1 (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_compute_weighted_sum1::test1 (0.00 s) PASSED [ 12%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal1 (0.00 s) PASSED [ 12%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal5 (0.03 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal_fuzzy_match1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_not_equal1 (0.03 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_not_equal2 (0.04 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir2 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir3 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir4 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_output_dir1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_s3_scratch_dir1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_s3_scratch_dir2 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space2 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space3 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_equal1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_equal2 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_not_equal1 (0.03 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_not_equal_debug SKIPPED [ 13%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string1 (0.01 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing1 (0.01 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing2 (0.00 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing3 (0.15 s) (WARNING: Test was updated) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal1 (0.04 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal2 (0.03 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal3 (0.04 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal1 (0.02 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal2 (0.02 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal3 (0.02 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing1 +WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_missing1/output/test_df.txt'(0.02 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing2 (0.01 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing3 +WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_missing3/output/test_df.txt'(0.15 s) (WARNING: Test was updated) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal1 (0.06 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal2 (0.05 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal3 +WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt'(0.03 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal4 (0.05 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::Test_check_string_debug1::test1 (0.16 s) (WARNING: Test was updated) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_check_string_debug1::test2 +WARNING: Update golden outcome file '/app/amp/helpers/test/Test_check_string_debug1.test2/output/test_df.txt'(0.15 s) (WARNING: Test was updated) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_unit_test1::test_purify_txt_from_client1 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_unit_test1::test_purify_txt_from_client2 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::TestSubsetDf1::test1 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_get_dir_signature1::test1 (0.02 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_get_dir_signature1::test2 (0.02 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test1 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test2 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test3 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test1 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test2 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test3 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_amp_reference1::test1 (0.00 s) PASSED [ 15%] +amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeReturnPipeline1::test1 (0.47 s) PASSED [ 15%] +amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimePipelineWithOms1::test1 (0.98 s) PASSED [ 15%] +amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 SKIPPED [ 15%] +amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms2::test1 SKIPPED [ 15%] +amp/datapull/common/data/transform/test/test_convert_csv_to_pq.py::TestCsvToPq::test_csv_to_pq_script SKIPPED [ 16%] +dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 (0.00 s) FAILED [ 16%] +research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit (6.31 s) RERUN [ 16%] +research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit (1.61 s) PASSED [ 16%] +research/RH2E/test/test_RH2E_pipeline.py::TestRH2E_DagBuilder::test1 (4.68 s) PASSED [ 16%] +research/RH2E/test/test_RH2Ec_pipeline.py::TestRH2Ec_DagBuilder::test1 (0.18 s) PASSED [ 16%] +research/RH2E/test/test_RH2Ed_pipeline.py::TestRH2Ed_DagBuilder::test1 (0.19 s) PASSED [ 16%] +research/RH2E/test/test_RH2Ee_pipeline.py::TestRH2Ee_DagBuilder::test1 (0.39 s) PASSED [ 16%] +research/RH2E/test/test_RH2Ef_pipeline.py::TestRH2Ef_DagBuilder::test1 (4.61 s) PASSED [ 16%] +research/RH2E/test/test_RH2Eg_pipeline.py::TestRH2Eg_DagBuilder::test1 (3.68 s) PASSED [ 16%] +research/RH1E/test/test_RH1E_pipeline.py::TestRH1E_DagBuilder::test1 (2.02 s) PASSED [ 16%] +research/RH1E/test/test_RH1Eb_pipeline.py::TestRH1Eb_DagBuilder::test1 (0.17 s) PASSED [ 16%] +research/RH1E/test/test_RH1Eb_pipeline.py::TestRH1Eb_DagBuilder::test2 (1.97 s) PASSED [ 16%] +oms_lime/test/test_eg_broker.py::TestEgBroker1::test_place_order1 (0.94 s) PASSED [ 16%] +oms_lime/test/test_eg_portfolio_example.py::TestEgPortfolioExample1::test_get_eg_portfolio_example1 (1.74 s) PASSED [ 16%] +oms_lime/test/test_eg_portfolio_example.py::TestEgPortfolioExample1::test_get_eg_portfolio_example2 (0.06 s) PASSED [ 16%] +oms_lime/test/test_eg_restrictions.py::TestEgRestrictions1::test_get_trading_restrictions (0.02 s) PASSED [ 16%] +dataflow_lime/system/test/test_E8d_replayed_system_runner.py::TestReplayedE8dWithMockedOms1::test_save_data SKIPPED [ 16%] +dataflow_lime/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_DagBuilder::test1 (0.47 s) PASSED [ 16%] +dataflow_lime/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_DagBuilder::test1 (4.82 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (5.13 s) RERUN [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (6.41 s) RERUN [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (2.90 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_predict (0.79 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution (6.48 s) RERUN [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution (4.11 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder3::test_get_dag1 (0.01 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder3::test_get_dag2 (0.01 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder4::test_fit (2.90 s) PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove1 PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove2 PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove3 PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove4 PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove5 PASSED [ 17%] +amp/oms/test/test_oms_db.py::TestOmsDbRemoveAllTables1::test1 SKIPPED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio1::test_state (0.02 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics1 (0.02 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics2 (0.09 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics3 (0.01 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_initialization_with_cash1 (0.02 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_initialization_with_holdings1 (0.08 s) PASSED [ 18%] +amp/oms/test/test_portfolio.py::TestMockedPortfolio1::test1 SKIPPED [ 18%] +amp/oms/test/test_portfolio.py::TestMockedPortfolio1::test2 SKIPPED [ 18%] +amp/oms/test/test_portfolio.py::TestMockedPortfolio2::test1 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestSimulatedProcessForecasts1::test_initialization1 (0.63 s) PASSED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts1::test_mocked_system1 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system1 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system2 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system3 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system4 SKIPPED [ 18%] +amp/oms/test/test_restrictions.py::TestRestrictions1::test1 SKIPPED [ 18%] +amp/oms/test/test_restrictions.py::TestRestrictions1::test2 SKIPPED [ 18%] +amp/dataflow/system/test/test_real_time_dag_adapter.py::TestRealtimeDagAdapter1::testMvnReturnsBuilder1 (0.05 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes1 (0.01 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes2 (0.01 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes3 (0.01 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes4 (0.02 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes5 (0.13 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes1 (0.02 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes10 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes2 (0.02 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes3 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes4 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes5 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes6 (0.02 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes7 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes8 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes9 (0.01 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks1 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks2 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks3 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag_adapter.py::TestDagAdapter1::test1 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag_adapter.py::TestDagAdapter1::test2 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_from_config1 (0.01 s) PASSED [ 19%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_get_columns_for_tag1 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_get_tags_for_column1 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_pickle1 (0.05 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_to_config1 (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_to_dict_and_back (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_feature_col_names1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags2 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags3 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_targets_and_predictions_for_tags1 (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_prediction_col_names1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_target_col_names1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_to_config1 (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_runners.py::TestRollingFitPredictDagRunner1::test1 (0.43 s) PASSED [ 20%] +amp/dataflow/core/test/test_runners.py::TestIncrementalDagRunner1::test1 (0.47 s) PASSED [ 20%] +amp/dataflow/core/test/test_utils.py::Test_get_df_info_as_string::test1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_utils.py::Test_get_df_info_as_string::test2 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_visualization.py::Test_dataflow_core_visualization1::test_draw1 (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_visualization.py::Test_dataflow_core_visualization1::test_draw_to_file1 (0.01 s) PASSED [ 20%] +amp/core/plotting/test/test_portfolio_stats.py::Test_plot_portfolio_stats1::test1 PASSED [ 20%] +amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test1 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test2 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test3 (0.00 s) PASSED [ 21%] +amp/config_root/config/test/test_config_builders.py::TestGetConfigsFromBuilder1::test1 (0.00 s) PASSED [ 21%] +amp/config_root/config/test/test_config_builders.py::TestGetConfigFromEnv::test_no_env_variables (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test1 (0.01 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test2 (0.01 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test3 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test4 (0.01 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test1 (0.02 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test2 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test3 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test1 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test2 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test3 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test4 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::TestComputeTurn1::test1 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::TestMaximizeWeightEntropy1::test1 (0.13 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::TestFindNearestAffinePoint1::test1 (0.01 s) PASSED [ 22%] +research/returns/test/test_dataflow_lime_returns_pipeline.py::TestReturnsPipeline::test1 (0.11 s) PASSED [ 22%] +im_lime/eg/test/test_eg_transform_pq_by_date_to_by_asset.py::TestEgTransformByDateToByTile1::test_transform1 (4.30 s) PASSED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test1 (3.12 s) PASSED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache1 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache2 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache3 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache4 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache5 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache6 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_historical1 (0.70 s) PASSED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_real_time1 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_replayed_time1 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_save_data SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgMultipleInstrumentDataReader1::test_historical1 (0.75 s) PASSED [ 22%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_incorrect_datetime (0.00 s) PASSED [ 22%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_integer_datetime (0.00 s) PASSED [ 22%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_string_datetime (0.00 s) PASSED [ 22%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_index_already_present (0.00 s) PASSED [ 23%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_milliseconds (0.00 s) PASSED [ 23%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_seconds (0.00 s) PASSED [ 23%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_wrong_column (0.00 s) PASSED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_start_ts_for_symbol1 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_universe1 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data6 SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_command_line SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_function_call1 SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_function_call2 SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_process_chunk SKIPPED [ 23%] +amp/helpers/test/test_lib_tasks_find.py::Test_find_short_import1::test1 (0.00 s) PASSED [ 23%] +amp/helpers/test/test_lib_tasks_find.py::Test_find_func_class_uses1::test1 (0.00 s) PASSED [ 24%] +amp/dataflow/system/test/test_real_time_runner.py::TestRealTimeDagRunner1::test_replayed_time1 SKIPPED [ 24%] +amp/dataflow/system/test/test_real_time_runner.py::TestRealTimeDagRunner1::test_simulated_replayed_time1 (0.46 s) PASSED [ 24%] +amp/dataflow/core/test/test_builders.py::TestArmaReturnsBuilder::test1 (0.16 s) PASSED [ 24%] +amp/dataflow/core/test/test_builders.py::TestArmaReturnsBuilder::test_str1 (0.00 s) PASSED [ 24%] +amp/dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 (0.15 s) PASSED [ 24%] +amp/dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test_str1 (0.00 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_gluonts_models.py::TestDeepARGlobalModel::test_fit1 (1.13 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_gluonts_models.py::TestDeepARGlobalModel::test_fit_dag1 (1.07 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_col_csv1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_col_parquet1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_index_csv1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_index_parquet1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_filter_dates1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_filter_dates_open_boundary1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestArmaDataSource::test1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test1 (0.07 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test2 (0.02 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test3 (0.07 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test4 (0.08 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test5 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2 (0.13 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3 (0.16 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test01 (0.10 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test02 (0.10 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test03 (0.13 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test04 (0.13 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test05 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test06 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test07 (0.19 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test08 SKIPPED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test09 (0.37 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test10 (0.10 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test11 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test12 (0.13 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test13 (0.16 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1 (0.20 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2 (0.25 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3 (0.34 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1 (0.01 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2 (0.01 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1 (0.01 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test1 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test2 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test3 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test4 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test5 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test1 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test2 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test3 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test4 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test5 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test1 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test10 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test11 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test12 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test2 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test3 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test4 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test5 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test6 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test7 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test8 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test9 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_t_test.py::TestTTest1samp::test1 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_t_test.py::TestTTest1samp::test2 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_t_test.py::TestTTest1samp::test3 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_t_test.py::TestTTest1samp::test4 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator1::test_high_sample_count (0.02 s) PASSED [ 27%] +amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator1::test_moderate_sample_count (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator2::test_high_sample_count (0.41 s) PASSED [ 28%] +amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator2::test_moderate_sample_count (0.20 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test1 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test2 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test3 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test4 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test5 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test6 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test7 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test1 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test2 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test3 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test4 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test5 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test6 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test1 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test2 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test3 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test4 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test5 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test6 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeNumFiniteSamples::test1 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeNumUniqueValues::test1 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeDenominatorAndPackage::test1 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test1 (0.01 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test2 (0.01 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test3 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test1 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test2 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test3 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test4 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test5 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test6 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_2dof (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_4dof (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_almost_normal (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test1 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test10 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test11 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test12 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test2 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test3 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test5 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test6 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test7 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test8 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test9 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test1 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test2 (0.01 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test3 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test4 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test5 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test6 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test7 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test3 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeNormalizedDrawdownCdf::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeNormalizedDrawdownCdf::test2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test3 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test3 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::Test_compute_drawdown::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::Test_compute_time_under_water::test1 (0.01 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::Test_compute_time_under_water::test2 (0.01 s) PASSED [ 31%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_scale_invariance1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test4 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test5 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test3 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test4 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test5 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test6 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_small_df (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_small_series (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_user_supplied_pi0 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test0 SKIPPED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test1 (0.38 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test_generate_input_data SKIPPED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test0 SKIPPED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test1 (0.02 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test2 (0.05 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test3 (0.04 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test_generate_input_data SKIPPED [ 33%] +amp/core/statistics/test/test_requires_statsmodels.py::TestComputeKratio::test1 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_requires_statsmodels.py::TestComputeKratio::test2 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test1 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 (0.00 s) XFAIL [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test3 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test1 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test2 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test3 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test4 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test5 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test6 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 (0.01 s) XFAIL [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test1 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test2 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test3 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test4 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test5 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test6 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test7 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test8 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test1 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test2 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test3 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test4 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test5 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test6 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test7 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test8 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test1 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test2 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test3 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test4 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test5 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test6 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test7 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test8 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test9 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test1 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test2 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test3 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test4 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test5 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test6 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test7 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test8 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test_nan (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test_smoke (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test1 (0.03 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test2 (0.03 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test3 (0.03 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test1 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test2 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test3 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test1 (0.01 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test2 (0.01 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test3 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeSharpeRatio::test1 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeSharpeRatioStandardError::test1 (0.00 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test1 (0.02 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test2 (0.09 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test3 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatioStandardError::test1 (0.02 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatioStandardError::test2 (0.09 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_summarize_sharpe_ratio::test1 (0.00 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test1 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test2 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test3 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test4 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_nans1 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_oos_not_from_interval1 (0.00 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_zeros1 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestSharpeRatioCorrelationConversion::test1 (0.00 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestSharpeRatioCorrelationConversion::test2 (0.00 s) PASSED [ 37%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_smooth_derivative1::test1 (0.03 s) PASSED [ 37%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_smooth_moving_average1::test1 (0.01 s) PASSED [ 37%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test1 (0.00 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test2 (0.00 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test3 (0.00 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test4 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test5 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test6 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test7 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_moment1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_norm1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_var1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_std1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_demean1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_skew1::test1 (0.02 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_kurtosis1::test1 (0.02 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_sharpe_ratio1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_corr1::test1 (0.02 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zcorr1::test1 (0.03 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_atol1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_clean1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_inf1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_inf2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_nan1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_nan2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_zero1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_zero2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_default_values1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_default_values2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_atol1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_clean1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_inf1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_inf2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_nan1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_nan2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_zero1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_zero2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_atol1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_clean1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_inf1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_inf2 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_nan1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_nan2 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_zero1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_zero2 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_annualized_sharpe_ratio::test1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test1 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test2 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test3 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test4 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test5 (0.16 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test6 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test2 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test3 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test4 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test5 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_nan1 (0.14 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_nan2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_zero1 (0.10 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_zero2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_winsorize1 (0.11 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_winsorize2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test1 (0.00 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test2 (0.00 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test3 (0.00 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_clean1 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_depth (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode1 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode3 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode1 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode3 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test1 (0.03 s) PASSED [ 42%] +amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test2 (0.03 s) PASSED [ 42%] +amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test3 (0.03 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test1 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test2 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test_lag_1 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test_correlate_with_lagged_cumsum::test1 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test_correlate_with_lagged_cumsum::test2 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test_calculate_inverse::test1 (0.00 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test_calculate_presudoinverse::test1 (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_set_non_ath_to_nan1::test1 (0.01 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_remove_times_outside_window::test_bypass (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_remove_times_outside_window::test_remove (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_set_weekends_to_nan::test1 (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_set_weekends_to_nan::test2 (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_remove_weekends::test_bypass (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_remove_weekends::test_remove (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_ask_value (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_bid_value (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_centered_order_book_imbalance (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_geometric_mid (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_log_relative_spread (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_mid (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_mid_value (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_order_book_imbalance (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_quoted_spread (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_relative_spread (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_weighted_mid (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_time_bars1::test1 (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_time_bars1::test2 (0.02 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test1 (0.02 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test2 (0.02 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test3 (0.02 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_nans1 (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_nans2 (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_no_nans1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_no_nans2 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_offset (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_endpoints_daily (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_endpoints_intraday (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_invariance (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_business_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_month1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_week1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_year1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_no_freq_day_to_business_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_business_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_minute1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_upsample_business_day_to_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_upsample_month_to_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_business_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_month1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_week1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_year1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_no_freq_day_to_business_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_business_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_minute1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_upsample_business_day_to_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_upsample_month_to_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::TestComputeOvernightReturns::test1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test1 (0.00 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test2 (0.00 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test3 (0.00 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test4 (0.00 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test5 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test6 (0.00 s) PASSED [ 45%] +amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_config_with_function (0.00 s) PASSED [ 45%] +amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_config_with_object (0.00 s) PASSED [ 45%] +amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_roundtrip_transform1 (0.00 s) PASSED [ 45%] +amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_set1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_existing_key2 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key2 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key3 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key4 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigIn1::test_in1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigIn1::test_not_in1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key2 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key3 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key4 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key2 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key3 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key2 SKIPPED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key3 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key4 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_not_existing_key1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config_print1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config_to_python1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_roundtrip_transform1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_in1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_in2 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in2 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in3 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in4 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update2 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update3 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigFlatten1::test_flatten1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigFlatten1::test_flatten2 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestSubtractConfig1::test_test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config.py::TestSubtractConfig1::test_test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config.py::TestDassertIsSerializable1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config.py::TestDassertIsSerializable1::test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config.py::TestFromEnvVar1::test1 (0.44 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_validate_configs1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_validate_configs1::test_check_same_configs_error (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_get_config_from_flattened_dict1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_get_config_from_flattened_dict1::test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_get_config_from_nested_dict1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_get_config_from_nested_dict1::test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_intersect_configs1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_intersect_configs1::test_same_config (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_subtract_configs1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_subtract_configs1::test_same_config (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test_same_config (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_convert_to_dataframe1::test1 (0.01 s) PASSED [ 49%] +amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test1 (0.00 s) PASSED [ 49%] +amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test2 (0.00 s) PASSED [ 49%] +amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test3 (0.00 s) PASSED [ 49%] +dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test1 (0.47 s) PASSED [ 49%] +dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test2 (0.26 s) PASSED [ 49%] +dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test3 (0.27 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_log_portfolio_read_portfolio (0.10 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_multiday_overnight_returns_injected (0.02 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_1_asset_floating_gmv (0.02 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_1_asset_targeted_gmv (0.02 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_3_assets_floating_gmv (0.03 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_3_assets_targeted_gmv (0.03 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_4_assets_dollar_neutrality_demean (0.04 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_4_assets_dollar_neutrality_side_preserving (0.04 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_multiday_1_asset_targeted_gmv (0.02 s) PASSED [ 49%] +im_lime/eg/test/test_eg_historical_pq_by_date_taq_bar_client.py::TestEgHistoricalPqByDateTaqBarClient1::test_read_data1 (1.75 s) PASSED [ 49%] +im_lime/eg/test/test_eg_historical_pq_by_date_taq_bar_client.py::TestEgHistoricalPqByDateTaqBarClient1::test_read_data2 (1.82 s) PASSED [ 49%] +market_data_lime/test/test_eg_historical_market_data.py::TestEgHistoricalMarketData1::test_get_data_at_timestamp1 (2.07 s) PASSED [ 50%] +market_data_lime/test/test_eg_historical_market_data.py::TestEgHistoricalMarketData1::test_should_be_online1 (0.00 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_at_timestamp1 (0.15 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval1 SKIPPED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval2 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval3 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval4 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval5 (0.15 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period1 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period2 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period3 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period4 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period5 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period6 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period7 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_last_end_time1 (0.06 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_last_price1 (0.23 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_twap_price1 (0.15 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_is_online1 (0.06 s) PASSED [ 51%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_should_be_online1 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test1 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test2 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test3 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test4 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test5 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test6 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test7 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestParseFullSymbol::test1 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestParseFullSymbol::test2 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestConstructFullSymbol::test1 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestConstructFullSymbol::test2 (0.00 s) PASSED [ 51%] +amp/datapull/common/universe/test/test_universe_utils.py::TestStringToNumericalId::test1 (0.00 s) PASSED [ 51%] +amp/datapull/common/universe/test/test_universe_utils.py::TestBuildNumericalToStringIdMapping::test1 (0.00 s) PASSED [ 51%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates1 (0.00 s) PASSED [ 51%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates2 (0.00 s) PASSED [ 51%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates3 (0.00 s) PASSED [ 51%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates4 (0.00 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_get_available_dates1 (0.00 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test1 (1.39 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test2 (2.44 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test3 (2.81 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval1::test_tsla1 (1.61 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval1::test_tsla2 (1.05 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_convert_string_to_timestamp1 (0.02 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_generate_raw_eg_data SKIPPED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_process_bar_data1 (0.02 s) PASSED [ 52%] +vendors_lime/datastream_liquidity/test/test_datastream_liquidity_utils.py::TestDatastreamLiquidityUtils1::test_get_liquidity_data1 (0.82 s) PASSED [ 52%] +vendors_lime/datastream_liquidity/test/test_datastream_liquidity_utils.py::TestDatastreamLiquidityUtils1::test_get_liquidity_data2 (0.74 s) PASSED [ 52%] +research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline1::test_replayed_time1 SKIPPED [ 52%] +research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline1::test_save_data SKIPPED [ 52%] +research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_tiny1 (0.00 s) PASSED [ 52%] +research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v1 (0.00 s) PASSED [ 52%] +research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v2_all (0.20 s) PASSED [ 52%] +research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v2_top100 (0.08 s) PASSED [ 52%] +oms_lime/test/test_eg_portfolio.py::TestEgPortfolio1::test_send_orders1 SKIPPED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData1::test_should_be_online1 (0.02 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_data1 (0.04 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_data3 (0.05 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_last_end_time1 (0.01 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_is_online1 (0.02 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_sql_get_query1 (0.01 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData1::test_save_market_data1 SKIPPED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data1 (0.19 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_at_timestamp1 (0.19 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_at_timestamp2 (0.17 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_for_interval1 (0.18 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_print_info_for_serialized_data1 SKIPPED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_round_trip1 (0.16 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData3::test_get_data1 (0.50 s) PASSED [ 53%] +market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period1 (0.00 s) SKIPPED [ 53%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data1 (0.08 s) PASSED [ 53%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data2 (0.08 s) PASSED [ 53%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data3 (0.12 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data4 (0.12 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data5 (0.12 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_0 (0.02 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_1 (0.07 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_3 (0.08 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_6 (0.08 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_63 (0.11 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_at_timestamp1 (0.04 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_at_timestamp2 (0.02 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_for_interval1 (0.03 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_for_interval2 (0.04 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_get_last_end_time1 (0.03 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available1 (0.07 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available2 (0.05 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available3 (0.84 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData4::test_is_last_bar_available1 (0.08 s) PASSED [ 54%] +amp/dataflow/model/test/test_stats_computer.py::TestStatsComputer1::test_compute_portfolio_stats1 (0.04 s) PASSED [ 54%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_bash SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_cmd1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_images_ls_repo1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_jupyter1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_login1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_ps SKIPPED (T...) [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_stats SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_list SKIPPED (Test n...) [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_print_setup1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_collect_only2 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_docker_build_local_image SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_docker_build_prod_image SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_docker_jupyter1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_docker_pull1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_lint1 SKIPPED (Test ...) [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_run_blank_tests1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_run_fast_tests SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_run_fast_tests_failed SKIPPED [ 55%] +amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order1 SKIPPED [ 56%] +amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order2 SKIPPED [ 56%] +amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order_and_timeout1 SKIPPED [ 56%] +amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order_and_timeout2 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerCmd::test1 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerCmd::test2 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerDown::test1 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerDown::test2 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerUp::test1 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerUp::test2 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test1 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test2 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test3 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test4 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test1 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test2 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test3 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestImDockerCmd::test1 SKIPPED [ 56%] +amp/datapull/ccxt/universe/test/test_universe.py::TestGetUniverse::test_get_universe1 (0.00 s) PASSED [ 57%] +amp/datapull/ccxt/universe/test/test_universe.py::TestGetUniverse::test_get_universe2 (0.00 s) PASSED [ 57%] +amp/datapull/ccxt/universe/test/test_universe.py::TestGetVendorUniverse::test1 (0.00 s) PASSED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_exchange_id1 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_exchange_id2 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_symbol_id1 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_symbol_id2 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_trade_symbol_id1 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_trade_symbol_id2 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data1 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data2 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data3 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data4 SKIPPED [ 57%] +amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test1 (0.00 s) PASSED [ 57%] +amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test2 (0.00 s) PASSED [ 57%] +amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test3 (0.00 s) PASSED [ 57%] +amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test4 (0.00 s) PASSED [ 57%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test1 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test10 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test11 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test12 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test13 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test14 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test15 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test16 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test17 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test18 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test2 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test3 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test4 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test5 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test6 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test7 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test8 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test9 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_config1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_dataseries1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_df1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_dict1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_float1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_float2 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_float3 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_int1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_int2 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_int3 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_list1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_str1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_str2 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_str3 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestPlaybackFilePath1::test1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test1 (0.50 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test2 (0.50 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test3 (0.49 s) PASSED [ 59%] +amp/helpers/test/test_printing.py::Test_printing1::test_color_highlight1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test2 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test3 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test4 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test5 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test6 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_log::test2 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_log::test3 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_log::test4 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_sort_dictionary::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_indent1::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_dedent1::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_dedent1::test2 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_dedent1::test_roundtrip1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_align_on_left1::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_logging1::test_log_frame1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_logging1::test_log_frame2 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_logging1::test_log_frame3 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test1 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test2 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test3 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test4 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test5 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test6 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test7 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system2::test_get_os_name (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system2::test_get_server_name (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system2::test_get_user_name (0.10 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test1 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test2 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test3 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test1 (0.17 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test2 (0.18 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test3 (0.18 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test4 (0.18 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test5 (0.18 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_Linux_commands1::test_du1 (0.19 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_not_timestamp1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp2 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp3 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp4 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp5 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test2 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test_no_timestamp1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test_no_timestamp2 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestCacheFunctions::test_get_cache_name1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_changed_function (0.12 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_redefined_function (0.12 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching1 (0.30 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching2 (0.30 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching3 (0.30 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching4 (0.32 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching5 (0.29 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_disk_reset (0.39 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_mem_reset (0.40 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_mem_reset2 (0.43 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_without_caching1 (0.00 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestFunctionSpecificCache1::test_with_caching1 (0.70 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestFunctionSpecificCache1::test_with_caching2 (0.64 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCachePerformance::test_performance_dataframe (0.17 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCachePerformance::test_performance_series (0.16 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheDecorator::test_decorated_function (0.11 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheDecorator::test_decorated_function_no_mem (0.11 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestAmpTask1407::test1 (0.10 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestAmpTask1407::test2 (0.11 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCachingOnS3::test_with_caching1 SKIPPED [ 63%] +amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_disk_cache1 (0.33 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_mem_cache1 (0.32 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_mem_disk_cache1 (0.32 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheUpdateFunction1::test1 (0.01 s) PASSED [ 64%] +amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_disk_cache1 (0.32 s) PASSED [ 64%] +amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_mem_cache1 (0.32 s) PASSED [ 64%] +amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_mem_disk_cache1 (0.33 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test1 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test2 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test3 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test4 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test5 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test6 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test7 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test1 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test2 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test3 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test4 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test5 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_eq_all1 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_eq_all2 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_in1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_in2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance3 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance4 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance5 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted3 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted4 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_subset1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_subset2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_no_duplicates1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_no_duplicates2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_not_intersection1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_not_intersection2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_set_eq1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_set_eq2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test3 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test3 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert3 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert4 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert3 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_fail1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man2 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man_fail1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man_fail2 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_callable1::test1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_callable1::test_fail1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_branch_name1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_client_root1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_client_root2 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_path_from_supermodule1 (0.11 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_project_dirname1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_submodule_paths1 (0.05 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_is_amp (0.11 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_is_inside_submodule1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_get_head_hash1 (0.05 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_get_remote_head_hash1 (0.05 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes2 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes3 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_all_repo_names1 (0.10 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_all_repo_names2 (0.11 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_client1 (0.05 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_client2 (0.05 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_dirname1 (0.05 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_dirname2 (0.05 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name1 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name2 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name4 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name_rountrip1 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_task_prefix_from_repo_short_name1 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name1 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name2 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name3 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name4 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root1 SKIPPED [ 68%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root2 SKIPPED [ 69%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root3 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root4 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root5 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files1 (0.11 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files_in_branch1 (0.05 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_previous_committed_files1 (0.07 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_summary_files_in_branch1 (0.47 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_git_log1 (0.07 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test1 (0.13 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test2 (0.13 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test3 (0.17 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test4 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test5 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_docker_base_image_name1 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_host_name1 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_repo_map1 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_has_didn_support1 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_hasyncio.py::Test_hasyncio1::test_real_time1 (1.00 s) PASSED [ 70%] +amp/helpers/test/test_hasyncio.py::Test_hasyncio1::test_simulated_time1 (0.00 s) PASSED [ 70%] +amp/helpers/test/test_hlogging.py::Test_logging1::test_logging_levels1 (0.00 s) PASSED [ 70%] +amp/helpers/test/test_hlogging.py::Test_hlogging_asyncio1::test_real_time1 (1.00 s) PASSED [ 70%] +amp/helpers/test/test_hlogging.py::Test_hlogging_asyncio1::test_simulated_time1 (0.00 s) PASSED [ 70%] +amp/helpers/test/test_io_.py::Test_find_all_files1::test1 (0.20 s) PASSED [ 70%] +amp/helpers/test/test_io_.py::Test_change_filename_extension1::test1 (0.00 s) PASSED [ 70%] +amp/helpers/test/test_io_.py::Test_load_df_from_json::test1 (0.01 s) PASSED [ 70%] +amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_get_file_name1 (0.00 s) PASSED [ 70%] +amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_get_file_name2 (0.00 s) PASSED [ 70%] +amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_run1 (0.36 s) PASSED [ 70%] +amp/dev_scripts/test/test_amp_dev_scripts.py::Test_env1::test_get_system_signature1 (0.16 s) PASSED [ 70%] +amp/dev_scripts/infra/test/test_all.py::Test_ssh_tunnel::test1 SKIPPED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_caesar1 (0.00 s) PASSED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_author1 SKIPPED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_file_size1 (0.28 s) PASSED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_master1 (0.05 s) PASSED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_words_in_text1 (0.00 s) PASSED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex1 (0.00 s) PASSED [ 71%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex2 (0.00 s) PASSED [ 71%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex3 (0.00 s) PASSED [ 71%] +amp/dataflow/model/test/test_forecast_mixer.py::TestForecastMixer1::test_generate_portfolio_bar_metrics_df (0.05 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_model_selection1 (1.77 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_model_return_correlation1 (0.28 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_multiple_tests_adjustment1 (0.18 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_positions1 (0.43 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_rets_and_vol1 (0.71 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_rets_signal_analysis1 (0.58 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_return_correlation1 (0.32 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_returns_and_predictions1 (1.13 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_sharpe_ratio_panel1 (0.40 s) PASSED [ 71%] +amp/dataflow/model/test/test_regression_analyzer.py::TestRegressionAnalyzer1::test_compute_moments (0.06 s) PASSED [ 71%] +amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_replayed_time1 (0.00 s) PASSED [ 71%] +amp/core/test/test_real_time.py::TestReplayedTime1::test1 (0.00 s) PASSED [ 71%] +amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_real_time1 (3.03 s) PASSED [ 71%] +amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_replayed_time1 (4.01 s) PASSED [ 72%] +amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_simulated_replayed_time1 (0.00 s) PASSED [ 72%] +amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_simulated_time1 (0.00 s) PASSED [ 72%] +amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test1 (0.40 s) PASSED [ 72%] +amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test2 (0.39 s) PASSED [ 72%] +amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test3 (0.39 s) PASSED [ 72%] +amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test4 (0.39 s) PASSED [ 72%] +amp/optimizer/test/test_utils.py::Test_compute_tangency_portfolio::test_precision_equivalency (0.00 s) PASSED [ 72%] +amp/optimizer/test/test_utils.py::Test_compute_tangency_portfolio::test_toy_case (0.00 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_get_data1 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_get_twap_price1 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread1 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread2 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread3 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread4 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint1 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint2 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint3 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread1 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread2 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread3 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread4 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread5 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread6 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price1 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price2 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price3 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test1 (0.06 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random1 (0.06 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random2 (0.08 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random3 (0.10 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test1 (0.03 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test2 (0.03 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test3 (0.07 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test_perf1 SKIPPED [ 73%] +amp/oms/test/test_api.py::Test_Contract1::test1 (0.00 s) PASSED [ 73%] +amp/oms/test/test_api.py::Test_Contract1::test_cmp1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Contract1::test_cmp2 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Order1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_OrderStatus1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Trade1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_cmp1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_cmp2 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_diff1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_diff2 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_diff3 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_OMS1::test1 SKIPPED (unconditional skip) [ 74%] +amp/oms/test/test_api.py::Test_OMS1::test2 SKIPPED (unconditional skip) [ 74%] +amp/oms/test/test_broker.py::TestSimulatedBroker1::test_submit_and_fill1 (0.05 s) PASSED [ 74%] +amp/oms/test/test_broker.py::TestMockedBroker1::test1 SKIPPED (Need ...) [ 74%] +amp/oms/test/test_order.py::TestOrder1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_order.py::TestOrders1::test1 (0.00 s) PASSED [ 74%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_exchange_exist1 SKIPPED [ 74%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_symbol_exist1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_trade_symbol_exist1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_get_remaining_data_to_load SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_daily_data1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_daily_data_with_holes SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_minute_data1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_minute_data_with_holes SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_daily_data1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_minute_data1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_tick_data1 SKIPPED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract1 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract2 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract3 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol1 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol2 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol3 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol4 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract1 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract2 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract3 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract4 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract5 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract6 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract7 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract1 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract2 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract3 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract1 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract2 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract3 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract4 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contract_slow1 (0.66 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts1 (0.06 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts2 (0.06 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts3 (0.06 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures1 (0.05 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures3 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures4 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures5 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures6 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures_slow1 (0.39 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures_slow2 (0.39 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata1 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata2 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata3 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata4 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata5 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow1 (0.38 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow2 (0.40 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow3 (0.39 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_zero_element1 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_zero_element2 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_kibot_hardcoded_contract_lifetime_computer1 (0.00 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_kibot_hardcoded_contract_lifetime_computer2 (0.00 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_load.py::TestTickerListLoader::test_parsing_logic (0.00 s) PASSED [ 78%] +amp/im/kibot/metadata/test/test_load.py::TestTickerListLoader::test_real_call SKIPPED [ 78%] +amp/im/kibot/metadata/test/test_load.py::TestAdjustmentsLoader::test_real_call SKIPPED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_etfs (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_forex (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_futures (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_stocks (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_sp500 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_daily (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_minutely (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_tick (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractContractType::test_continuous (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractContractType::test_expiry (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test1 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test10 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test11 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test12 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test13 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test14 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test2 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test3 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test4 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test5 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test6 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test7 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test8 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test9 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_s3_data_loader.py::TestKibotS3DataLoader::test1 (0.17 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_s3_data_loader.py::TestKibotS3DataLoader::test_read_data_with_start_end_ts SKIPPED [ 79%] +amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_daily_data_from_s3_1 SKIPPED [ 79%] +amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_daily_data_from_s3_2 SKIPPED [ 79%] +amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_minutely_data_from_s3_1 SKIPPED [ 79%] +amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_minutely_data_from_s3_2 SKIPPED [ 79%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol1 (0.00 s) PASSED [ 79%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol2 (0.00 s) PASSED [ 79%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol3 (0.00 s) PASSED [ 79%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol4 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name1 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name2 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name3 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name4 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name5 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_parse_symbols_file1 (0.00 s) PASSED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_exchange_exist1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_symbol_exist1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_trade_symbol_exist1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_get_remaining_data_to_load SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_daily_data1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_daily_data_with_holes SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_minute_data1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_minute_data_with_holes SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_daily_data1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_minute_data1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_tick_data1 SKIPPED [ 80%] +amp/im/kibot/data/extract/test/test_kibot_data_download.py::TestKibotDownload::test_extract_dataset_links (0.03 s) PASSED [ 81%] +amp/im/kibot/data/extract/test/test_kibot_data_download.py::TestKibotDownload::test_extract_payload_links (1.53 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path1 (0.00 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path2 (0.00 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path3 (0.00 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_get_latest_symbols_file1 (0.03 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_dtypes1 (0.04 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data1 (0.10 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data2 (0.04 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data3 (0.10 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data_check_date_type (0.05 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data_with_start_end_ts (1.51 s) PASSED [ 81%] +amp/im/eoddata/test/test_read_symbol_list.py::Test_read_symbols_from_file::test1 (0.00 s) PASSED [ 81%] +amp/im/ib/connect/test/test_im_tasks.py::TestImTwsStartIbInterface::test1 SKIPPED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_filter_table1 (0.00 s) PASSED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_filter_table2 (0.00 s) PASSED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_filter_table3 (0.00 s) PASSED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_from_text1 (0.00 s) PASSED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_from_text_invalid1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_from_text_invalid2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_repr1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_str1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_unique1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_unique2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_timer.py::TestTimedScope::test_1 (1.00 s) PASSED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test__check_version1 SKIPPED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test__check_version2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test_check_version1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test_get_changelog_version1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test_get_container_version1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_find_duplicates1::test1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_find_duplicates1::test2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test3 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_extract1::test1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_extract1::test2 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test3 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test4 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test5 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test6 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test7 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test2 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test3 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test4 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test5 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_find_duplicates1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_find_duplicates2 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates2 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates3 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_open.py::Test_open_unknown::test_unknown_extension1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_open.py::Test_open_unknown::test_unknown_os1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_open.py::Test_open_html::test_linux1 SKIPPED (...) [ 84%] +amp/helpers/test/test_open.py::Test_open_html::test_mac1 SKIPPED (Se...) [ 84%] +amp/helpers/test/test_open.py::Test_open_html::test_windows1 SKIPPED [ 84%] +amp/helpers/test/test_open.py::Test_open_pdf::test_mac1 (0.06 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_get_credentials1::test1 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_functions1::test_extract_bucket_from_path1 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_exists1 (0.01 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_exists2 (0.05 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_exists3 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_glob1 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_ls1 (0.01 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_dry_run1 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_asyncio_threading1 (0.06 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_asyncio_threading2 (0.03 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_loky1 (0.06 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_loky2 (1.97 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_serial1 (0.06 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_asyncio_threading1 (0.02 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_asyncio_threading2 (0.02 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_loky1 (1.34 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_loky2 (1.19 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_serial1 (0.02 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_serial2 (0.02 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading1 (0.08 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading2 (0.04 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading3 (0.08 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading4 (0.04 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky1 (0.08 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky2 (1.38 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky3 PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_serial1 (0.07 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_serial2 (0.08 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_joblib_example1::test1 SKIPPED [ 85%] +amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_datetime1 (0.00 s) PASSED [ 85%] +amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_datetime_fail1 (0.00 s) PASSED [ 85%] +amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_strict_datetime1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_strict_datetime_fail1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_dassert_is_datetime1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_dassert_is_datetime_assert1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_datetime_conversions (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime2 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime3 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp2 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp_assert1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp_assert2 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_ET (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_UTC (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_naive_ET (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_naive_UTC (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_annual1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_bimonthly1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_daily1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_index1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly2 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly3 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly4 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly5 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly2 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly3 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_semiannual1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_semiannual2 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_srs1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_weekly1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test2 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test3 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test2 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test3 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test2 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test3 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test4 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test5 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test6 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test7 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test8 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test9 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_env.py::Test_env1::test_get_system_signature1 (0.17 s) PASSED [ 88%] +amp/helpers/test/test_hnumpy.py::TestRandomSeedContext::test_example1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_hnumpy.py::TestRandomSeedContext::test_example2 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_csv_helpers.py::Test_convert_csv_to_dict::test1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_csv_helpers.py::Test_from_typed_csv::test1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_csv_helpers.py::Test_to_typed_csv::test1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dataframe.py::Test_filter_data_by_values1::test_conjunction1 (0.01 s) PASSED [ 88%] +amp/helpers/test/test_dataframe.py::Test_filter_data_by_values1::test_disjunction1 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_filter_data_by_comparison::test_conjunction1 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_filter_data_by_comparison::test_disjunction1 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::TestFilterDataByMethod::test1 (0.02 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test1 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test2 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test3 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test4 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test5 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test6 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test1 (0.01 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test2 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test3 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test4 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test5 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test6 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test7 (0.00 s) PASSED [ 89%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md1::test_uml_file_names1 (0.00 s) PASSED [ 89%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md2::test_render_command1 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md2::test_render_command2 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml1 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml2 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml3 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml4 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml_playback1 (0.01 s) PASSED [ 90%] +amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test1 (0.00 s) PASSED [ 90%] +amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test2 (0.00 s) PASSED [ 90%] +amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test3 (0.00 s) PASSED [ 90%] +amp/dataflow/pipelines/features/test/test_feature_pipeline.py::TestFeaturePipeline::test1 (0.23 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_fit_with_oos (0.02 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_fit_without_oos (0.03 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_predict_with_oos (0.04 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_predict_without_oos (0.00 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1 (0.01 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1 (0.01 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2 (0.01 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json3 (0.00 s) PASSED [ 91%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_merge (0.01 s) PASSED [ 91%] +amp/dataflow/model/test/test_model_evaluator.py::TestModelEvaluator1::test_aggregate_models1 (0.26 s) PASSED [ 91%] +amp/dataflow/model/test/test_model_evaluator.py::TestModelEvaluator1::test_calculate_stats1 (1.41 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer1::test_column_arithmetic (0.03 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer2::test_resampling (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer3::test_multicolumn_processing1 (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer3::test_multicolumn_processing2 (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer4::test_drop_nans (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer4::test_drop_nans_without_reindexing (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer1::test1 (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans (0.01 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_then_join (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_without_reindexing (0.01 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_without_reindexing_then_attempt_join (0.01 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer1::test1 (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer2::test1 (0.04 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer2::test2 (0.51 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans (0.01 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_then_join (0.02 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_without_reindexing (0.01 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_without_reindexing_then_attempt_join (0.01 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestFunctionWrapper::test1 (0.01 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestTwapVwapComputer::test1 (0.02 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestTwapVwapComputer::test2 (0.03 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestMultiindexTwapVwapComputer::test1 (0.08 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestMultiindexTwapVwapComputer::test2 (0.13 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test1 (0.02 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test2 (0.03 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test3 (0.05 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test1 (0.04 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test2 (0.06 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test3 (0.08 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test1 (0.04 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test2 (0.04 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test3 (0.06 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_local_level_model.py::TestLocalLevelModel::test1 (0.01 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test0 SKIPPED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test1 (0.06 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test2 (0.05 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test3 (0.09 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test4 (0.05 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test5 (0.09 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test6 (0.05 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test7 (0.09 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test_generate_input_data SKIPPED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1 (1.04 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2 (1.07 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_step_one1 SKIPPED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_with_constant1 (1.25 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict2 (1.07 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_different_intervals1 (1.46 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_different_intervals_no_x1 (1.28 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_with_nan (1.08 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test1 (0.02 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test_invert_zret_0_zscoring1 (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test_invert_zret_3_zscoring1 (0.02 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_pass_through (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_pass_through_no_writing (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_write (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_pass_through (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_pass_through_no_writing (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_write (0.01 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_linearize_eigval_eigvec (0.01 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_sort_eigval1 (0.00 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_sort_eigval2 (0.00 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_stabilize_eigenvec1 (0.01 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_stabilize_eigenvec2 (0.02 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer2::test1 (0.23 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer2::test2 (0.35 s) PASSED [ 94%] +amp/core/test/test_timeseries_study.py::TestTimeSeriesDailyStudy::test_usual_case (0.29 s) PASSED [ 94%] +amp/core/test/test_timeseries_study.py::TestTimeSeriesMinutelyStudy::test_usual_case (0.58 s) PASSED [ 94%] +amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test1 (0.13 s) PASSED [ 95%] +amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test2 (0.13 s) PASSED [ 95%] +amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test3 (0.13 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test_shape1 (0.00 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test_truncate1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_local_ts (0.28 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_none_x_vars (0.00 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_series_target (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_correctness SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_correctness_local_ts SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform_artificial_ts SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform_none_x_vars SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluonForecasts::test_transform1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToSklean::test_transform1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToSklean::test_transform_none_x_vars1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromSklean::test_transform1 (0.01 s) PASSED [ 96%] +amp/core/test/test_explore.py::Test_explore1::test_ols_regress_series (0.20 s) PASSED [ 96%] +amp/core/test/test_explore.py::Test_explore1::test_rolling_pca_over_time1 SKIPPED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column1 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column2 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column3 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column4 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index1 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index2 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index3 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index4 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_no_intersection (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pairs::test1 (0.01 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_difference1 (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_difference2 (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_mean (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_difference1 (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_difference2 (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_difference_of_logs (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_mean (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_mean_of_logs (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_normalized_difference1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_normalized_difference2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_identity_1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_identity_2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_3 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_3 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_3 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compare_subspaces::test1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_effective_rank::test1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_effective_rank::test2 (0.00 s) PASSED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test1 (0.02 s) PASSED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test2 SKIPPED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test3 SKIPPED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test4 SKIPPED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test1 (0.01 s) PASSED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test2 SKIPPED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test3 (0.01 s) PASSED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test4 (0.01 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestResampleIndex1::test1 (0.01 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test1 (0.01 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test2 (0.09 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test3 (0.08 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test4 (0.13 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test5 (0.02 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestReadDataFromS3::test_read_csv1 (0.10 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestReadDataFromS3::test_read_parquet1 (1.08 s) PASSED [ 98%] +amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test1 (0.00 s) PASSED [ 98%] +amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test2 (0.00 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test3 (0.00 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::TestMultivariateNormalProcess::test1 (0.01 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::TestMultivariateNormalProcess::test2 (0.00 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::Test_generate_arima_signal_and_response::test1 (0.00 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::TestGenerateRecipeDataset::test1 (0.01 s) PASSED [ 99%] +amp/core/test/test_backtest.py::TestGeneratePredictions::test1 SKIPPED [ 99%] +amp/core/test/test_backtest.py::TestGeneratePredictions::test2 SKIPPED [ 99%] +amp/core/test/test_backtest.py::TestGeneratePredictions::test3 SKIPPED [ 99%] +amp/core/information_bars/test/test_bars.py::TestBars::test_get_dollar_bars (0.07 s) PASSED [ 99%] +amp/core/information_bars/test/test_bars.py::TestBars::test_get_tick_bars (0.02 s) PASSED [ 99%] +amp/core/information_bars/test/test_bars.py::TestBars::test_get_volume_bars (0.07 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_daily1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_daily_shift_freq1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_minutely1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_multiple_responses_daily1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_daily1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] + +=================================== FAILURES =================================== +__________________ Test_get_configs_from_command_line1.test1 ___________________ +Traceback (most recent call last): + File "/app/dataflow_lime/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 + configs = dtfmoexuti.get_configs_from_command_line(args) + File "/app/amp/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + configs = cconfig.get_configs_from_builder(config_builder) + File "/app/amp/config_root/config/builder.py", line 46, in get_configs_from_builder + imp = importlib.import_module(import_) + File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1014, in _gcd_import + File "", line 991, in _find_and_load + File "", line 973, in _find_and_load_unlocked +ModuleNotFoundError: No module named 'dataflow_lime.pipelines.E8.8Ed_configs' +============================= slowest 3 durations ============================== +6.49s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution +6.41s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit +6.31s call research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit +=========================== short test summary info ============================ +SKIPPED [5] amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py: Need dind support +SKIPPED [1] amp/helpers/test/test_hparquet.py:741: CmTask1305: after removing circular dependencies in `hio.from_file`, this test fails reading a parquet file +SKIPPED [5] amp/optimizer/test/test_single_period_optimization.py: Requires special docker container. +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:200: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:192: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:184: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:263: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:271: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:287: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:298: Only run in amp as supermodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:307: Only run in amp as supermodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:316: Only run in amp as supermodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:332: Only run in amp +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:390: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:399: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:408: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:481: CmampTask #683. +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:536: Only run in amp as submodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:571: Only run in amp as submodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:600: Only run in amp as submodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:635: Only run in amp as supermodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:698: Only run in amp as submodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:792: Only run in amp +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:1003: Only run in amp +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:1343: This test makes sense for a branch +SKIPPED [9] amp/datapull/ccxt/data/extract/test/test_exchange_class.py: Enable after CMTask1292 is resolved. +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:789: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:769: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:809: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:530: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:573: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:620: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:666: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:711: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:757: Need dind support +SKIPPED [1] amp/helpers/test/test_unit_test.py:335: This is only used to debug the debugging the infrastructure +SKIPPED [1] amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py:380: Need dind support +SKIPPED [1] amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py:534: Need dind support +SKIPPED [1] amp/datapull/common/data/transform/test/test_convert_csv_to_pq.py:60: CmTask1305: after removing circular dependencies in `hio.from_file`, this test fails reading a parquet file +SKIPPED [1] dataflow_lime/system/test/test_E8d_replayed_system_runner.py:130: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py: Need dind support +SKIPPED [1] amp/oms/test/test_portfolio.py:291: Need dind support +SKIPPED [1] amp/oms/test/test_portfolio.py:320: Need dind support +SKIPPED [1] amp/oms/test/test_portfolio.py:412: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:119: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:238: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:243: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:248: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:253: Need dind support +SKIPPED [1] amp/oms/test/test_restrictions.py:18: Need dind support +SKIPPED [1] amp/oms/test/test_restrictions.py:45: Need dind support +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:57: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:75: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:93: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:124: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:150: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:198: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:274: Next PR will rewrite this +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:325: LimeTask296: Break 2022-01-06 +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:311: Run manually +SKIPPED [9] amp/datapull/common/data/client/test/test_historical_pq_clients.py: Some tests are returning an empty df +SKIPPED [4] amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py: TODO(gp): Need to update this tests after transform v1.3 +SKIPPED [1] amp/dataflow/system/test/test_real_time_runner.py:39: Too slow for real time +SKIPPED [1] amp/dataflow/core/nodes/test/test_volatility_models.py:423: unconditional skip +SKIPPED [1] amp/core/statistics/test/test_regression.py:46: This test fails on some computers due to AmpTask1649 +SKIPPED [1] amp/core/statistics/test/test_regression.py:17: This test generates the input data +SKIPPED [1] amp/core/statistics/test/test_regression.py:137: This test fails on some computers due to AmpTask1649 +SKIPPED [1] amp/core/statistics/test/test_regression.py:108: This test generates the input data +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:270: cmamp #654. +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:283: cmamp #654. +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:296: cmamp #654. +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:303: cmamp #654. +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:315: cmamp #654. +SKIPPED [1] amp/config_root/config/test/test_config.py:325: See AmpTask1573 +SKIPPED [1] amp/market_data/test/test_market_data_im_client.py:134: CmTask882. +SKIPPED [1] vendors_lime/taq_bars/test/test_taq_bars_utils.py:304: This is used to generate the frozen input +SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:52: LimeTask222 Use volume for volume everywhere +SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:39: Run manually +SKIPPED [1] oms_lime/test/test_eg_portfolio.py:14: Finish this +SKIPPED [1] market_data_lime/test/test_eg_replayed_market_data.py:26: Run manually +SKIPPED [1] market_data_lime/test/test_eg_replayed_market_data.py:110: Run manually +SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:36: Skip on Mondays +SKIPPED [1] amp/test/test_tasks.py:68: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:60: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:44: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:64: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:56: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:48: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:52: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:36: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:40: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:122: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:95: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:102: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:85: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:89: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:142: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:112: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:129: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:134: Test needs to be run outside Docker +SKIPPED [1] amp/oms/test/test_order_processor.py:70: Need dind support +SKIPPED [1] amp/oms/test/test_order_processor.py:78: Need dind support +SKIPPED [1] amp/oms/test/test_order_processor.py:86: Need dind support +SKIPPED [1] amp/oms/test/test_order_processor.py:96: Need dind support +SKIPPED [7] amp/datapull/test/test_im_lib_tasks.py: CMTask #789. +SKIPPED [1] amp/datapull/test/test_im_lib_tasks.py:240: amp #1189 +SKIPPED [10] amp/im/kibot/data/load/test/test_sql_data_loader.py: CmTask666 +SKIPPED [1] amp/helpers/test/test_cache.py:731: See CMTask #952. +SKIPPED [1] amp/helpers/test/test_git.py:217: Run only in amp as super-module +SKIPPED [1] amp/helpers/test/test_git.py:229: Run only in amp as sub-module +SKIPPED [1] amp/dev_scripts/infra/test/test_all.py: unconditional skip +SKIPPED [1] amp/dev_scripts/git/git_hooks/test/test_install_hooks.py:21: There are no Git credentials inside Docker +SKIPPED [1] amp/oms/test/test_pnl_simulator.py:432: For performance measurement +SKIPPED [1] amp/oms/test/test_api.py:162: unconditional skip +SKIPPED [1] amp/oms/test/test_api.py:191: unconditional skip +SKIPPED [1] amp/oms/test/test_broker.py:55: Need dind support +SKIPPED [11] amp/im/kibot/test/test_kibot_sql_writer_backend.py: CmTask666 +SKIPPED [1] amp/im/kibot/metadata/test/test_load.py:47: Disabled waiting for PTask4139 +SKIPPED [1] amp/im/kibot/metadata/test/test_load.py:66: Disabled waiting for PTask4139 +SKIPPED [1] amp/im/kibot/data/load/test/test_s3_data_loader.py:23: Not implemented yet +SKIPPED [4] amp/im/ib/data/transform/test/test_transform.py: CmTask666 +SKIPPED [11] amp/im/ib/test/test_ib_sql_writer_backend.py: CmTask666 +SKIPPED [1] amp/im/ib/connect/test/test_im_tasks.py: unconditional skip +SKIPPED [1] amp/helpers/test/test_versioning.py:23: CmampTask570 +SKIPPED [3] amp/helpers/test/test_open.py: See cryptomtc/cmamp#321 +SKIPPED [1] amp/helpers/test/test_joblib_helpers.py: Just for experimenting with joblib +SKIPPED [1] amp/dataflow/core/nodes/test/test_regression_models.py:35: This test fails on some computers due to AmpTask1649 +SKIPPED [1] amp/dataflow/core/nodes/test/test_regression_models.py:18: This test generates the input data +SKIPPED [1] amp/dataflow/core/nodes/test/test_sarimax_models.py:39: cmamp #654. +SKIPPED [1] amp/core/test/test_data_adapters.py:146: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_data_adapters.py:161: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_data_adapters.py:118: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_data_adapters.py:177: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_data_adapters.py:132: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_explore.py:25: https://github.com/.../.../issues/3676 +SKIPPED [1] amp/core/test/test_features.py:510: Apparent instability +SKIPPED [1] amp/core/test/test_features.py:517: Apparent instability +SKIPPED [1] amp/core/test/test_features.py:524: Apparent instability +SKIPPED [1] amp/core/test/test_features.py:556: Apparent instability +SKIPPED [1] amp/core/test/test_backtest.py:27: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_backtest.py:69: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_backtest.py:111: Disabled because of PTask2440 +XFAIL amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 +XFAIL amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 +FAILED dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 += 1 failed, 1581 passed, 209 skipped, 81 deselected, 2 xfailed, 4 rerun in 200.01s (0:03:20) = +15:16:12 @ 2022-03-07 10:15:22 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=120.0 KB +15:16:12 @ 2022-03-07 10:15:22 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' +15:16:12 @ 2022-03-07 10:15:22 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... +15:16:12 @ 2022-03-07 10:15:22 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan +ERROR: 1 +15:16:15 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3720 Fast tests failed +## run_slow_tests:  +15:16:15 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"' +IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"'  +WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. +WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. +WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. +WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. +Creating compose_app_run ... + + +Creating compose_app_run ... done +##> devops/docker_run/entrypoint.sh +UID=0 +GID=0 +# Activate environment +##> devops/docker_build/entrypoint/patch_environment_variables.sh +# Set PATH +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +# Set PYTHONPATH +PYTHONPATH=/app/amp:/app: +# Configure env +Testing sudo +/app +Setting up Docker +{ "storage-driver": "vfs" } + * Starting Docker: docker  +[ OK ] + * Docker is running +# Check AWS authentication setup +AWS_DEFAULT_REGION='us-east-1' + Name Value Type Location + ---- ----- ---- -------- + profile am manual --profile +access_key ****************3J32 shared-credentials-file +secret_key ****************QpHW shared-credentials-file + region us-east-1 env AWS_DEFAULT_REGION +CONTAINER_VERSION='' +BUILD_TAG='' +which python: /venv/bin/python +python -V: Python 3.8.10 +docker -v: Docker version 20.10.12, build e91ed57 +docker-compose -v: docker-compose version 1.25.0, build unknown +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +PYTHONPATH=/app/amp:/app: +entrypoint.sh: 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"' +============================= test session starts ============================== +platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 +cachedir: .pytest_cache +rootdir: /app, configfile: pytest.ini +plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 +timeout: 30.0s +timeout method: signal +timeout func_only: True +collecting ...  +collecting 0 items  +collecting 0 items  +collecting 67 items  +collecting 70 items  +collecting 230 items  +collecting 548 items  +collecting 562 items  +collecting 794 items  +collecting 1037 items  +collecting 1375 items  +collecting 1424 items  +collecting 1775 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True +----------------------------------------------------------------------------- +This code is not in sync with the container: +code_version='1.0.3' != container_version='amp-1.0.3' +----------------------------------------------------------------------------- +You need to: +- merge origin/master into your branch with `invoke git_merge_master` +- pull the latest container with `invoke docker_pull` +# Git + branch_name='AmpTask2163_Implement_tiled_backtesting_5' + hash='29bdaf1' + # Last commits: + * 29bdaf1 saggese Lint ( 6 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) + * c26c937 saggese Checkpoint ( 7 minutes ago) Mon Mar 7 20:09:34 2022 + * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) +# Machine info + system=Linux + node name=d232c57e32e2 + release=3.10.0-1160.36.2.el7.x86_64 + version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 + machine=x86_64 + processor=x86_64 + cpu count=8 + cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) + memory=svmem(total=66548252672, available=51706417152, percent=22.3, used=11809091584, free=14425956352, active=30357913600, inactive=18355712000, buffers=0, cached=40313204736, shared=2491396096, slab=2054676480) + disk usage=sdiskusage(total=107362627584, used=32545501184, free=74817126400, percent=30.3) +# Packages + python: 3.8.10 + gluonnlp: ? + gluonts: 0.6.7 + joblib: 1.1.0 + mxnet: 1.8.0 + numpy: 1.21.1 + pandas: 1.3.4 + pyarrow: 6.0.1 + scipy: 1.6.1 + seaborn: 0.11.2 + sklearn: 1.0.1 + statsmodels: 0.13.1 +INFO: > cmd='/venv/bin/pytest -m slow and not superslow . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun Failed: Timeout' +INFO: Saving log to file 'tmp.pytest.log' + +collected 1874 items / 1803 deselected / 71 selected  + +amp/helpers/test/test_sql.py::TestSql1::test_copy_rows_with_copy_from1 SKIPPED [ 1%] +amp/helpers/test/test_sql.py::TestSql1::test_create_database SKIPPED [ 2%] +amp/helpers/test/test_sql.py::TestSql1::test_create_insert_query SKIPPED [ 4%] +amp/helpers/test/test_sql.py::TestSql1::test_db_connection_to_tuple SKIPPED [ 5%] +amp/helpers/test/test_sql.py::TestSql1::test_duplicate_removal1 SKIPPED [ 7%] +amp/helpers/test/test_sql.py::TestSql1::test_duplicate_removal2 SKIPPED [ 8%] +amp/helpers/test/test_sql.py::TestSql1::test_execute_insert_query1 SKIPPED [ 9%] +amp/helpers/test/test_sql.py::TestSql1::test_remove_database1 SKIPPED [ 11%] +amp/helpers/test/test_sql.py::TestSql1::test_remove_database_invalid SKIPPED [ 12%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create1 (0.84 s) PASSED [ 14%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create2 (0.47 s) PASSED [ 15%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data1_database_portfolio SKIPPED [ 16%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data1_database_vs_dataframe_portfolio SKIPPED [ 18%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data2_database_portfolio SKIPPED [ 19%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data2_database_vs_dataframe_portfolio SKIPPED [ 21%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data3_database_portfolio SKIPPED [ 22%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data1 SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_extract_data_from_db.py::TestExtractDataFromDb1::test_extract_data_from_db SKIPPED [ 25%] +dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 (1.29 s) FAILED [ 26%] +dataflow_lime/system/test/test_E8d_replayed_system_runner.py::TestReplayedE8dWithMockedOms1::test1 SKIPPED [ 28%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance1 (19.17 s) PASSED [ 29%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance2 (19.22 s) PASSED [ 30%] +research/RH2E/test/test_RH2E_prod_models.py::Test_RH2Eg_ProdModels::test_end_to_end_slow1 SKIPPED [ 32%] +amp/oms/test/test_oms_db.py::TestOmsDbSubmittedOrdersTable1::test_create_table1 SKIPPED [ 33%] +amp/oms/test/test_oms_db.py::TestOmsDbAcceptedOrdersTable1::test_create_table1 SKIPPED [ 35%] +amp/oms/test/test_oms_db.py::TestOmsDbAcceptedOrdersTable1::test_insert1 SKIPPED [ 36%] +amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table1 SKIPPED [ 38%] +amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table2 SKIPPED [ 39%] +amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table3 SKIPPED [ 40%] +amp/oms/test/test_oms_db.py::TestOmsDbCurrentPositionsTable1::test_create_table1 SKIPPED [ 42%] +amp/oms/test/test_oms_db.py::TestOmsDbRestrictionsTable1::test_create_table1 SKIPPED [ 43%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentSuccess1::test_parallel1 (8.08 s) PASSED [ 45%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentSuccess1::test_serial1 (11.31 s) PASSED [ 46%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_parallel1 (11.48 s) PASSED [ 47%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_parallel2 (11.42 s) PASSED [ 49%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_serial1 (14.94 s) PASSED [ 50%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_serial2 (15.19 s) PASSED [ 52%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentArchiveOnS3::test_serial1 (8.62 s) PASSED [ 53%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test_parallel1 (11.59 s) PASSED [ 54%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test_serial1 (11.41 s) PASSED [ 56%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_parallel1 (15.41 s) PASSED [ 57%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_parallel2 (15.34 s) PASSED [ 59%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_serial1 (11.61 s) PASSED [ 60%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_serial2 (12.15 s) PASSED [ 61%] +im_lime/eg/test/test_eg_historical_pq_by_asset_taq_bar_client.py::TestEgHistoricalPqByTileTaqBarClient1::test_read_data_for_multiple_symbols1 (1.92 s) PASSED [ 63%] +amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_equities1 (1.31 s) PASSED [ 64%] +amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_futures1 (16.72 s) PASSED [ 66%] +amp/dataflow/core/nodes/test/test_gluonts_models.py::TestContinuousDeepArModel::test_fit_dag1 (6.20 s) PASSED [ 67%] +amp/dataflow/core/nodes/test/test_gluonts_models.py::TestContinuousDeepArModel::test_predict_dag1 (4.90 s) PASSED [ 69%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval_perf1::test1 (13.29 s) PASSED [ 70%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval_perf1::test2 (7.24 s) PASSED [ 71%] +research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline2::test_real_time1 SKIPPED [ 73%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_last_end_time2 (0.03 s) PASSED [ 74%] +market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period2 (0.00 s) SKIPPED [ 76%] +market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period3 (0.00 s) SKIPPED [ 77%] +market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period_compare1 (0.00 s) SKIPPED [ 78%] +amp/test/test_tasks.py::TestExecuteTasks2::test_collect_only1 SKIPPED [ 80%] +amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_create_all_tables1 SKIPPED [ 81%] +amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_create_im_database SKIPPED [ 83%] +amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_up1 SKIPPED [ 84%] +amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_real_time1 (3.62 s) PASSED [ 85%] +amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_real_time2 (3.99 s) PASSED [ 87%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_parse_symbols_file2 (11.92 s) PASSED [ 88%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky4 (1.33 s) PASSED [ 90%] +amp/dataflow/system/test/test_source_nodes.py::TestKibotEquityReader::test1 (7.63 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1 (1.90 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1 (1.48 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict1 (1.49 s) PASSED [ 95%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_no_x1 (1.57 s) PASSED [ 97%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_summary (2.32 s) PASSED [ 98%] +amp/core/test/test_backtest.py::TestGeneratePredictions::test4 SKIPPED [100%] + +=================================== FAILURES =================================== +_________________ Test_TiledBacktest_E8d.test_end_to_end_slow1 _________________ +Traceback (most recent call last): + File "/app/dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py", line 35, in test_end_to_end_slow1 + self._test(config_builder, experiment_builder, run_model_extra_opts) + File "/app/amp/dataflow/model/run_prod_model_flow.py", line 175, in _test + self.check_string(configs_signature, fuzzy_match=True, tag=tag) + File "/app/amp/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "/app/amp/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '/app/dataflow_lime/pipelines/E8/test/Test_TiledBacktest_E8d.test_end_to_end_slow1/output/configs_signature.txt.tmp' +################################################################################ + +============================= slowest 3 durations ============================== +19.22s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance2 +19.17s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance1 +16.72s call amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_futures1 +=========================== short test summary info ============================ +SKIPPED [1] amp/helpers/test/test_sql.py:95: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:36: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:46: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:21: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:111: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:131: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:79: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:58: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:71: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:126: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:210: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:162: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:226: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:200: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/extract/test/test_exchange_class.py:35: Enable after CMTask1292 is resolved. +SKIPPED [1] amp/datapull/common/data/transform/test/test_extract_data_from_db.py:38: Need dind support +SKIPPED [1] dataflow_lime/system/test/test_E8d_replayed_system_runner.py:250: Need dind support +SKIPPED [1] research/RH2E/test/test_RH2E_prod_models.py:61: Disabled since cache was invalidated +SKIPPED [1] amp/oms/test/test_oms_db.py:46: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:127: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:136: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:192: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:203: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:223: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:292: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:310: Need dind support +SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:131: LimeTask222 Use volume for volume everywhere +SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:59: Skip on Mondays +SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:81: Skip on Mondays +SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:105: Skip on Mondays +SKIPPED [1] amp/test/test_tasks.py:116: Test needs to be run outside Docker +SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:20: Need dind support +SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:49: Need dind support +SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:12: Need dind support +SKIPPED [1] amp/core/test/test_backtest.py:153: Disabled because of PTask2440 +FAILED dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 +==== 1 failed, 35 passed, 35 skipped, 1803 deselected in 297.23s (0:04:57) ===== +15:21:15 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=35.7 MB +15:21:15 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' +15:21:15 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... +15:21:15 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan +ERROR: 1 +15:21:18 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3737 Slow tests failed +## run_superslow_tests:  +15:21:18 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"' +IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"'  +WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. +WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. +WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. +WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. +Creating compose_app_run ... + + +Creating compose_app_run ... done +##> devops/docker_run/entrypoint.sh +UID=0 +GID=0 +# Activate environment +##> devops/docker_build/entrypoint/patch_environment_variables.sh +# Set PATH +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +# Set PYTHONPATH +PYTHONPATH=/app/amp:/app: +# Configure env +Testing sudo +/app +Setting up Docker +{ "storage-driver": "vfs" } + * Starting Docker: docker  +[ OK ] + * Docker is running +# Check AWS authentication setup +AWS_DEFAULT_REGION='us-east-1' + Name Value Type Location + ---- ----- ---- -------- + profile am manual --profile +access_key ****************3J32 shared-credentials-file +secret_key ****************QpHW shared-credentials-file + region us-east-1 env AWS_DEFAULT_REGION +CONTAINER_VERSION='' +BUILD_TAG='' +which python: /venv/bin/python +python -V: Python 3.8.10 +docker -v: Docker version 20.10.12, build e91ed57 +docker-compose -v: docker-compose version 1.25.0, build unknown +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +PYTHONPATH=/app/amp:/app: +entrypoint.sh: 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"' +============================= test session starts ============================== +platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 +cachedir: .pytest_cache +rootdir: /app, configfile: pytest.ini +plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 +timeout: 3600.0s +timeout method: signal +timeout func_only: True +collecting ...  +collecting 0 items  +collecting 0 items  +collecting 67 items  +collecting 70 items  +collecting 230 items  +collecting 548 items  +collecting 641 items  +collecting 801 items  +collecting 1084 items  +collecting 1391 items  +collecting 1671 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True +----------------------------------------------------------------------------- +This code is not in sync with the container: +code_version='1.0.3' != container_version='amp-1.0.3' +----------------------------------------------------------------------------- +You need to: +- merge origin/master into your branch with `invoke git_merge_master` +- pull the latest container with `invoke docker_pull` +# Git + branch_name='AmpTask2163_Implement_tiled_backtesting_5' + hash='29bdaf1' + # Last commits: + * 29bdaf1 saggese Lint (11 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) + * c26c937 saggese Checkpoint (12 minutes ago) Mon Mar 7 20:09:34 2022 + * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) +# Machine info + system=Linux + node name=61bb36f6d969 + release=3.10.0-1160.36.2.el7.x86_64 + version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 + machine=x86_64 + processor=x86_64 + cpu count=8 + cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) + memory=svmem(total=66548252672, available=51712106496, percent=22.3, used=11803402240, free=14392971264, active=30350835712, inactive=18393743360, buffers=0, cached=40351879168, shared=2491396096, slab=2055942144) + disk usage=sdiskusage(total=107362627584, used=32546025472, free=74816602112, percent=30.3) +# Packages + python: 3.8.10 + gluonnlp: ? + gluonts: 0.6.7 + joblib: 1.1.0 + mxnet: 1.8.0 + numpy: 1.21.1 + pandas: 1.3.4 + pyarrow: 6.0.1 + scipy: 1.6.1 + seaborn: 0.11.2 + sklearn: 1.0.1 + statsmodels: 0.13.1 +INFO: > cmd='/venv/bin/pytest -m not slow and superslow . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun Failed: Timeout' +INFO: Saving log to file 'tmp.pytest.log' + +collected 1874 items / 1864 deselected / 10 selected  + +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data3_database_vs_dataframe_portfolio SKIPPED [ 10%] +dataflow_lime/system/test/test_E8d_replayed_system_runner.py::Test_E8d_Replayed_SystemRunner::test1 + + +(462.17 s) PASSED [ 20%] +research/RH2E/test/test_RH2E_prod_models.py::Test_RH2Eg_ProdModels::test_end_to_end_superslow1 SKIPPED [ 30%] +im_lime/eg/test/test_eg_historical_pq_by_asset_taq_bar_client.py::TestEgHistoricalPqByTileTaqBarClient1::test_read_data1 (30.67 s) PASSED [ 40%] +amp/core/test/test_gallery_signal_processing.py::Test_gallery_signal_processing1::test_notebook1 (47.16 s) PASSED [ 50%] +dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource2::test1 (47.27 s) PASSED [ 60%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_1 (14.17 s) PASSED [ 70%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_2 (0.21 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_3 (0.26 s) PASSED [ 90%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_4 (0.21 s) PASSED [100%] + +============================= slowest 3 durations ============================== +462.17s call dataflow_lime/system/test/test_E8d_replayed_system_runner.py::Test_E8d_Replayed_SystemRunner::test1 +47.27s call dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource2::test1 +47.16s call amp/core/test/test_gallery_signal_processing.py::Test_gallery_signal_processing1::test_notebook1 +=========================== short test summary info ============================ +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:239: Need dind support +SKIPPED [1] research/RH2E/test/test_RH2E_prod_models.py:88: Disabled since cache was invalidated +========== 8 passed, 2 skipped, 1864 deselected in 610.66s (0:10:10) =========== +15:31:32 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=0.0 b +15:31:32 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' +15:31:32 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... +15:31:32 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan +15:31:34 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3758 Fast tests failed +15:31:34 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3763 Slow tests failed +15:31:34 - INFO  lib_tasks.py run_fast_slow_superslow_tests:3770 Superslow tests passed +Traceback (most recent call last): + File "/local/home/gsaggese/src/venv/amp.client_venv/bin/invoke", line 8, in + sys.exit(program.run()) + File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/program.py", line 384, in run + self.execute() + File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/program.py", line 566, in execute + executor.execute(*self.tasks) + File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/executor.py", line 129, in execute + result = call.task(*args, **call.kwargs) + File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/tasks.py", line 127, in __call__ + result = self.body(*args, **kwargs) + File "/local/home/gsaggese/src/sasm-lime4/amp/helpers/lib_tasks.py", line 3772, in run_fast_slow_superslow_tests + raise RuntimeError("Some tests failed") +RuntimeError: Some tests failed diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt new file mode 100644 index 000000000..4168d0576 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt @@ -0,0 +1,41 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 2 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 +pytest dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 + +# Test_get_configs_from_command_line1.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow_lime/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 + configs = dtfmoexuti.get_configs_from_command_line(args) + File "$GIT_ROOT/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + configs = cconfig.get_configs_from_builder(config_builder) + File "$GIT_ROOT/config_root/config/builder.py", line 46, in get_configs_from_builder + imp = importlib.import_module(import_) + File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1014, in _gcd_import + File "", line 991, in _find_and_load + File "", line 973, in _find_and_load_unlocked +ModuleNotFoundError: No module named 'dataflow_lime.pipelines.E8.8Ed_configs' + + +_______________ + +# Test_TiledBacktest_E8d.test_end_to_end_slow1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py", line 35, in test_end_to_end_slow1 + self._test(config_builder, experiment_builder, run_model_extra_opts) + File "$GIT_ROOT/dataflow/model/run_prod_model_flow.py", line 175, in _test + self.check_string(configs_signature, fuzzy_match=True, tag=tag) + File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow_lime/pipelines/E8/test/Test_TiledBacktest_E8d.test_end_to_end_slow1/output/configs_signature.txt.tmp' +################################################################################ + +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt new file mode 100644 index 000000000..955be2326 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt @@ -0,0 +1,396 @@ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4532280Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4532780Z +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4533026Z =================================== FAILURES =================================== +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4533724Z _______________________ TestDryRunTasks1.test_git_clean ________________________ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4534485Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535021Z File "/app/helpers/test/test_lib_tasks.py", line 189, in test_git_clean +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535516Z self.dry_run(target) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535998Z File "/app/helpers/test/test_lib_tasks.py", line 170, in dry_run +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4536460Z self.check_string(act) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4536939Z File "/app/helpers/hunit_test.py", line 1266, in check_string +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4537409Z is_equal = assert_equal( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4537889Z File "/app/helpers/hunit_test.py", line 881, in assert_equal +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4538319Z diff_files( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4538746Z File "/app/helpers/hunit_test.py", line 586, in diff_files +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4539220Z raise RuntimeError(msg_as_str) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4539617Z RuntimeError: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4540266Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4540856Z ACTUAL vs EXPECTED: TestDryRunTasks1.test_git_clean +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541568Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541928Z +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4542194Z report_memory_usage=False report_cpu_usage=False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4542753Z ## git_clean: dry_run=False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4543426Z > git clean -fd >/dev/null 2>&1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4544154Z > git submodule foreach 'git clean -fd >/dev/null 2>&1' +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4544817Z > git clean -fd +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4545492Z > git submodule foreach 'git clean -fd' +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4546194Z find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.i ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4546664Z Diff with: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4547307Z > vimdiff helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.actual.txt helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.expected.txt +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4547931Z or running: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4548273Z > ./tmp_diff.sh +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4548887Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4549484Z EXPECTED VARIABLE: TestDryRunTasks1.test_git_clean +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4550166Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4550742Z exp = r"""report_memory_usage=False report_cpu_usage=False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4551201Z ## git_clean: dry_run=False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4552300Z find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4553032Z """ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4637180Z ____ Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 ____ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4638716Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4641551Z File "/app/dataflow/system/example1/test/test_example1_forecast_system.py", line 50, in test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4642218Z self._test_fit_over_backtest_period1(system, output_col_name) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4642836Z File "/app/dataflow/system/dtfamsys.py", line 114, in _test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4643452Z self.check_string(actual, fuzzy_match=True, purify_text=True) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644017Z File "/app/helpers/hunit_test.py", line 1266, in check_string +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644483Z is_equal = assert_equal( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644949Z File "/app/helpers/hunit_test.py", line 881, in assert_equal +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4645544Z diff_files( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4645997Z File "/app/helpers/hunit_test.py", line 586, in diff_files +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4646473Z raise RuntimeError(msg_as_str) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4646868Z RuntimeError: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4647564Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4648290Z FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649091Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649436Z +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649668Z system_config ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4650153Z dag_config: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4650634Z filter_ath: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4651136Z col_mode: replace_all ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4651652Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4652148Z start_time: 09:30:00 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4652605Z end_time: 16:00:00 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4653072Z resample: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4653690Z in_col_groups: [('close',), ('volume',), ('feature1',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4654208Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4654696Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4655175Z rule: 5T ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4655784Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4656444Z vwap_groups: [('close', 'volume', 'vwap')] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4656971Z reindex_like_input: False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4657465Z join_output_with_input: False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4657953Z compute_ret_0: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4658550Z in_col_groups: [('close',), ('vwap',), ('twap',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4659057Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4659540Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660015Z mode: log_rets ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660489Z col_mapping: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660968Z close: close.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4661457Z vwap: vwap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4661945Z twap: twap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4662415Z compute_vol: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4663124Z in_col_group: ('vwap.ret_0',) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4663731Z out_col_group: ('vwap.ret_0.vol',) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4664238Z drop_nans: True ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4664863Z permitted_exceptions: (,) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4665372Z adjust_rets: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4665979Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4666481Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4666971Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4667587Z term1_col: vwap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4668076Z term2_col: vwap.ret_0.vol ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4668576Z out_col: vwap.ret_0.vol_adj ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4669067Z term2_delay: 2 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4669552Z operation: div ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4670017Z drop_nans: True ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4670500Z compress_rets: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4671111Z in_col_groups: [('vwap.ret_0.vol_adj',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4671611Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4672094Z col_mapping: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4672591Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4673126Z dag_builder_object: nid_prefix= ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4673669Z dag_builder_class: Example1_DagBuilder < +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4674415Z system_class: Example1_ForecastSystem < +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4674950Z dag_config_config: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4675420Z resample: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4675903Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4676387Z rule: 1T ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4676933Z dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.expected.txt +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4695757Z or running: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4696094Z > ./tmp_diff.sh +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4696806Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4697507Z EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4698303Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4698867Z exp = r"""################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4699274Z system_config +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4699701Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700106Z dag_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700449Z filter_ath: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700810Z col_mode: replace_all +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701206Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701589Z start_time: 09:30:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701975Z end_time: 16:00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4702320Z resample: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4702851Z in_col_groups: [('close',), ('volume',), ('feature1',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4703292Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4703670Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4704018Z rule: 5T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4704700Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4705360Z vwap_groups: [('close', 'volume', 'vwap')] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4705811Z reindex_like_input: False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4706221Z join_output_with_input: False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4707219Z compute_ret_0: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4707770Z in_col_groups: [('close',), ('vwap',), ('twap',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708195Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708558Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708947Z mode: log_rets +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4709311Z col_mapping: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4709685Z close: close.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710049Z vwap: vwap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710416Z twap: twap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710775Z compute_vol: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4711241Z in_col_group: ('vwap.ret_0',) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4711723Z out_col_group: ('vwap.ret_0.vol',) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4712125Z drop_nans: True +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4712643Z permitted_exceptions: (,) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4713077Z adjust_rets: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4713595Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714124Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714543Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714941Z term1_col: vwap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4715350Z term2_col: vwap.ret_0.vol +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4715752Z out_col: vwap.ret_0.vol_adj +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4716142Z term2_delay: 2 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4716723Z operation: div +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717096Z drop_nans: True +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717445Z compress_rets: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717947Z in_col_groups: [('vwap.ret_0.vol_adj',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4718433Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4718799Z col_mapping: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4719194Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4719637Z dag_builder_object: nid_prefix= +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4720674Z dag_builder_class: Example1_DagBuilder +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721179Z system_class: Example1_ForecastSystem +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721575Z dag_config_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721934Z resample: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4722300Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4722662Z rule: 1T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4723350Z dag_runner_object: > +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724172Z market_data_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724575Z asset_id_col_name: asset_id +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724989Z asset_ids: [1467591036, 3303714233] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4725381Z backtest_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4725884Z universe_str: example1_v1-top2 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4726310Z trading_period_str: 1T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4726712Z time_interval_str: Jan2000 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4727275Z start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4727822Z end_timestamp: 2000-01-31 00:00:00+00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4728377Z market_object: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4728897Z dag_object: name=None +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4729261Z mode=strict +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4732047Z nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4734910Z edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4735787Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4736220Z vwap.ret_0.vol_adj.c +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4736654Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737097Z 1467591036 3303714233 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737459Z end_ts +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737924Z 2000-01-01 10:00:00-05:00 -0.98 -0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4738406Z 2000-01-01 10:05:00-05:00 0.98 0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4738892Z 2000-01-01 10:10:00-05:00 -0.98 -0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4739261Z """ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4739878Z ________ Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 _________ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4740430Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4741028Z File "/app/dataflow/system/example1/test/test_example1_forecast_system.py", line 57, in test_fit_over_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4741597Z self._test_fit_over_period1( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4742128Z File "/app/dataflow/system/dtfamsys.py", line 137, in _test_fit_over_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4742734Z self.check_string(actual, fuzzy_match=True, purify_text=True) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4743389Z File "/app/helpers/hunit_test.py", line 1266, in check_string +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4743841Z is_equal = assert_equal( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4744328Z File "/app/helpers/hunit_test.py", line 881, in assert_equal +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4744767Z diff_files( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4745216Z File "/app/helpers/hunit_test.py", line 586, in diff_files +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4745672Z raise RuntimeError(msg_as_str) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4746068Z RuntimeError: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4746708Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4747409Z FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748182Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748547Z +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748848Z system_config ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4749340Z dag_config: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4749833Z filter_ath: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4750343Z col_mode: replace_all ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4750846Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4751336Z start_time: 09:30:00 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4751805Z end_time: 16:00:00 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4752323Z resample: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4752956Z in_col_groups: [('close',), ('volume',), ('feature1',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4753459Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4754106Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4754581Z rule: 5T ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4755213Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4755868Z vwap_groups: [('close', 'volume', 'vwap')] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4756379Z reindex_like_input: False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4756884Z join_output_with_input: False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4757379Z compute_ret_0: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4757975Z in_col_groups: [('close',), ('vwap',), ('twap',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4758480Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4758952Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4759435Z mode: log_rets ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4759916Z col_mapping: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4760412Z close: close.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4786435Z vwap: vwap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4787169Z twap: twap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4787693Z compute_vol: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4788439Z in_col_group: ('vwap.ret_0',) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4789095Z out_col_group: ('vwap.ret_0.vol',) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4789615Z drop_nans: True ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4790296Z permitted_exceptions: (,) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4790847Z adjust_rets: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4791745Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4792292Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4792798Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4793324Z term1_col: vwap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4794102Z term2_col: vwap.ret_0.vol ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4794647Z out_col: vwap.ret_0.vol_adj ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4795177Z term2_delay: 2 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4795669Z operation: div ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4796310Z drop_nans: True ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4796825Z compress_rets: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4797500Z in_col_groups: [('vwap.ret_0.vol_adj',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4798031Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4798529Z col_mapping: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4799069Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4799632Z dag_builder_object: nid_prefix= ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4800220Z dag_builder_class: Example1_DagBuilder < +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4800794Z system_class: Example1_ForecastSystem < +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4801362Z dag_config_config: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4801873Z resample: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4802385Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4802893Z rule: 1T ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4803458Z dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.expected.txt +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4817745Z or running: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4818089Z > ./tmp_diff.sh +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4818751Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4819543Z EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4820358Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4820978Z exp = r"""################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4821394Z system_config +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4821833Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822241Z dag_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822588Z filter_ath: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822959Z col_mode: replace_all +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4823370Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4823780Z start_time: 09:30:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4824182Z end_time: 16:00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4824541Z resample: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825093Z in_col_groups: [('close',), ('volume',), ('feature1',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825551Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825951Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4826312Z rule: 5T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4827017Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4827800Z vwap_groups: [('close', 'volume', 'vwap')] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4828262Z reindex_like_input: False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4828701Z join_output_with_input: False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4829101Z compute_ret_0: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4829632Z in_col_groups: [('close',), ('vwap',), ('twap',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830079Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830488Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830867Z mode: log_rets +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4831247Z col_mapping: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4831634Z close: close.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832038Z vwap: vwap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832422Z twap: twap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832798Z compute_vol: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4833272Z in_col_group: ('vwap.ret_0',) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4833972Z out_col_group: ('vwap.ret_0.vol',) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4834393Z drop_nans: True +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4834954Z permitted_exceptions: (,) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4835410Z adjust_rets: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4835964Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4836403Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4836795Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4837206Z term1_col: vwap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4837629Z term2_col: vwap.ret_0.vol +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838047Z out_col: vwap.ret_0.vol_adj +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838451Z term2_delay: 2 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838829Z operation: div +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4839211Z drop_nans: True +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4839571Z compress_rets: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840090Z in_col_groups: [('vwap.ret_0.vol_adj',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840527Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840906Z col_mapping: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4841320Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4841792Z dag_builder_object: nid_prefix= +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4842257Z dag_builder_class: Example1_DagBuilder +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4842739Z system_class: Example1_ForecastSystem +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4843303Z dag_config_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4843654Z resample: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4844032Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4844412Z rule: 1T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4845150Z dag_runner_object: > +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4845882Z market_data_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4846282Z asset_id_col_name: asset_id +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4846700Z asset_ids: [1467591036, 3303714233] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847096Z backtest_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847559Z universe_str: example1_v1-top2 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847978Z trading_period_str: 1T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4848377Z time_interval_str: Jan2000 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4848941Z start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4849581Z end_timestamp: 2000-01-31 00:00:00+00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4850153Z market_object: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4850683Z dag_object: name=None +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4851051Z mode=strict +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4853864Z nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4856657Z edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4857531Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4857948Z vwap.ret_0.vol_adj.c +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4858372Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4858854Z 1467591036 3303714233 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4859226Z end_ts +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4859697Z 2000-01-01 10:00:00-05:00 -0.98 -0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4860188Z 2000-01-01 10:05:00-05:00 0.98 0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4860695Z 2000-01-01 10:10:00-05:00 -0.98 -0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4861053Z """ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4862316Z ============================= slowest 3 durations ============================== +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4866438Z 26.87s setup oms/test/test_broker.py::TestDatabaseBroker1::test1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4867269Z 6.46s setup datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_parser +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4868189Z 6.24s setup datapull/talos/data/client/test/test_talos_clients.py::TestTalosSqlRealTimeImClient1::test_build_numerical_to_string_id_mapping +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4868892Z =========================== short test summary info ============================ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4956618Z FAILED helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean - Run... +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4957400Z FAILED dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4958274Z FAILED dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_period1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt new file mode 100644 index 000000000..94e600076 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt @@ -0,0 +1,399 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_backtest_period1 +pytest dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_period1 +pytest helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean + +# TestDryRunTasks1.test_git_clean +Traceback (most recent call last): + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 189, in test_git_clean + self.dry_run(target) + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 170, in dry_run + self.check_string(act) + File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string + is_equal = assert_equal( + File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal + diff_files( + File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +ACTUAL vs EXPECTED: TestDryRunTasks1.test_git_clean +-------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541928Z +report_memory_usage=False report_cpu_usage=False ( +## git_clean: dry_run=False ( + > git clean -fd >/dev/null 2>&1 + > git submodule foreach 'git clean -fd >/dev/null 2>&1' + > git clean -fd + > git submodule foreach 'git clean -fd' +find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.i ( +Diff with: +> vimdiff helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.actual.txt helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestDryRunTasks1.test_git_clean +-------------------------------------------------------------------------------- +exp = r"""report_memory_usage=False report_cpu_usage=False +## git_clean: dry_run=False +find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf +""" +__ + +# Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/system/example1/test/test_example1_forecast_system.py", line 50, in test_fit_over_backtest_period1 + self._test_fit_over_backtest_period1(system, output_col_name) + File "$GIT_ROOT/dataflow/system/dtfamsys.py", line 114, in _test_fit_over_backtest_period1 + self.check_string(actual, fuzzy_match=True, purify_text=True) + File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string + is_equal = assert_equal( + File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal + diff_files( + File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +-------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649436Z +system_config ( +dag_config: ( +filter_ath: ( +col_mode: replace_all ( +transformer_kwargs: ( +start_time: 09:30:00 ( +end_time: 16:00:00 ( +resample: ( +in_col_groups: [('close',), ('volume',), ('feature1',)] ( +out_col_group: () ( +transformer_kwargs: ( +rule: 5T ( +resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( +vwap_groups: [('close', 'volume', 'vwap')] ( +reindex_like_input: False ( +join_output_with_input: False ( +compute_ret_0: ( +in_col_groups: [('close',), ('vwap',), ('twap',)] ( +out_col_group: () ( +transformer_kwargs: ( +mode: log_rets ( +col_mapping: ( +close: close.ret_0 ( +vwap: vwap.ret_0 ( +twap: twap.ret_0 ( +compute_vol: ( +in_col_group: ('vwap.ret_0',) ( +out_col_group: ('vwap.ret_0.vol',) ( +drop_nans: True ( +permitted_exceptions: (,) ( +adjust_rets: ( +in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( +out_col_group: () ( +transformer_kwargs: ( +term1_col: vwap.ret_0 ( +term2_col: vwap.ret_0.vol ( +out_col: vwap.ret_0.vol_adj ( +term2_delay: 2 ( +operation: div ( +drop_nans: True ( +compress_rets: ( +in_col_groups: [('vwap.ret_0.vol_adj',)] ( +out_col_group: () ( +col_mapping: ( +vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( +dag_builder_object: nid_prefix= ( +dag_builder_class: Example1_DagBuilder < +system_class: Example1_ForecastSystem < +dag_config_config: ( +resample: ( +transformer_kwargs: ( +rule: 1T ( +dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +-------------------------------------------------------------------------------- +exp = r"""################################################################################ +system_config +################################################################################ +dag_config: + filter_ath: + col_mode: replace_all + transformer_kwargs: + start_time: 09:30:00 + end_time: 16:00:00 + resample: + in_col_groups: [('close',), ('volume',), ('feature1',)] + out_col_group: () + transformer_kwargs: + rule: 5T + resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] + vwap_groups: [('close', 'volume', 'vwap')] + reindex_like_input: False + join_output_with_input: False + compute_ret_0: + in_col_groups: [('close',), ('vwap',), ('twap',)] + out_col_group: () + transformer_kwargs: + mode: log_rets + col_mapping: + close: close.ret_0 + vwap: vwap.ret_0 + twap: twap.ret_0 + compute_vol: + in_col_group: ('vwap.ret_0',) + out_col_group: ('vwap.ret_0.vol',) + drop_nans: True + permitted_exceptions: (,) + adjust_rets: + in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] + out_col_group: () + transformer_kwargs: + term1_col: vwap.ret_0 + term2_col: vwap.ret_0.vol + out_col: vwap.ret_0.vol_adj + term2_delay: 2 + operation: div + drop_nans: True + compress_rets: + in_col_groups: [('vwap.ret_0.vol_adj',)] + out_col_group: () + col_mapping: + vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c +dag_builder_object: nid_prefix= +dag_builder_class: Example1_DagBuilder +system_class: Example1_ForecastSystem +dag_config_config: + resample: + transformer_kwargs: + rule: 1T +dag_runner_object: > +market_data_config: + asset_id_col_name: asset_id + asset_ids: [1467591036, 3303714233] +backtest_config: + universe_str: example1_v1-top2 + trading_period_str: 1T + time_interval_str: Jan2000 + start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 + end_timestamp: 2000-01-31 00:00:00+00:00 +market_object: +dag_object: name=None +mode=strict +nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] +edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] +################################################################################ +vwap.ret_0.vol_adj.c +################################################################################ + 1467591036 3303714233 +end_ts +2000-01-01 10:00:00-05:00 -0.98 -0.98 +2000-01-01 10:05:00-05:00 0.98 0.98 +2000-01-01 10:10:00-05:00 -0.98 -0.98 +""" +______ + +# Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/system/example1/test/test_example1_forecast_system.py", line 57, in test_fit_over_period1 + self._test_fit_over_period1( + File "$GIT_ROOT/dataflow/system/dtfamsys.py", line 137, in _test_fit_over_period1 + self.check_string(actual, fuzzy_match=True, purify_text=True) + File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string + is_equal = assert_equal( + File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal + diff_files( + File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +-------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748547Z +system_config ( +dag_config: ( +filter_ath: ( +col_mode: replace_all ( +transformer_kwargs: ( +start_time: 09:30:00 ( +end_time: 16:00:00 ( +resample: ( +in_col_groups: [('close',), ('volume',), ('feature1',)] ( +out_col_group: () ( +transformer_kwargs: ( +rule: 5T ( +resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( +vwap_groups: [('close', 'volume', 'vwap')] ( +reindex_like_input: False ( +join_output_with_input: False ( +compute_ret_0: ( +in_col_groups: [('close',), ('vwap',), ('twap',)] ( +out_col_group: () ( +transformer_kwargs: ( +mode: log_rets ( +col_mapping: ( +close: close.ret_0 ( +vwap: vwap.ret_0 ( +twap: twap.ret_0 ( +compute_vol: ( +in_col_group: ('vwap.ret_0',) ( +out_col_group: ('vwap.ret_0.vol',) ( +drop_nans: True ( +permitted_exceptions: (,) ( +adjust_rets: ( +in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( +out_col_group: () ( +transformer_kwargs: ( +term1_col: vwap.ret_0 ( +term2_col: vwap.ret_0.vol ( +out_col: vwap.ret_0.vol_adj ( +term2_delay: 2 ( +operation: div ( +drop_nans: True ( +compress_rets: ( +in_col_groups: [('vwap.ret_0.vol_adj',)] ( +out_col_group: () ( +col_mapping: ( +vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( +dag_builder_object: nid_prefix= ( +dag_builder_class: Example1_DagBuilder < +system_class: Example1_ForecastSystem < +dag_config_config: ( +resample: ( +transformer_kwargs: ( +rule: 1T ( +dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +-------------------------------------------------------------------------------- +exp = r"""################################################################################ +system_config +################################################################################ +dag_config: + filter_ath: + col_mode: replace_all + transformer_kwargs: + start_time: 09:30:00 + end_time: 16:00:00 + resample: + in_col_groups: [('close',), ('volume',), ('feature1',)] + out_col_group: () + transformer_kwargs: + rule: 5T + resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] + vwap_groups: [('close', 'volume', 'vwap')] + reindex_like_input: False + join_output_with_input: False + compute_ret_0: + in_col_groups: [('close',), ('vwap',), ('twap',)] + out_col_group: () + transformer_kwargs: + mode: log_rets + col_mapping: + close: close.ret_0 + vwap: vwap.ret_0 + twap: twap.ret_0 + compute_vol: + in_col_group: ('vwap.ret_0',) + out_col_group: ('vwap.ret_0.vol',) + drop_nans: True + permitted_exceptions: (,) + adjust_rets: + in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] + out_col_group: () + transformer_kwargs: + term1_col: vwap.ret_0 + term2_col: vwap.ret_0.vol + out_col: vwap.ret_0.vol_adj + term2_delay: 2 + operation: div + drop_nans: True + compress_rets: + in_col_groups: [('vwap.ret_0.vol_adj',)] + out_col_group: () + col_mapping: + vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c +dag_builder_object: nid_prefix= +dag_builder_class: Example1_DagBuilder +system_class: Example1_ForecastSystem +dag_config_config: + resample: + transformer_kwargs: + rule: 1T +dag_runner_object: > +market_data_config: + asset_id_col_name: asset_id + asset_ids: [1467591036, 3303714233] +backtest_config: + universe_str: example1_v1-top2 + trading_period_str: 1T + time_interval_str: Jan2000 + start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 + end_timestamp: 2000-01-31 00:00:00+00:00 +market_object: +dag_object: name=None +mode=strict +nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] +edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] +################################################################################ +vwap.ret_0.vol_adj.c +################################################################################ + 1467591036 3303714233 +end_ts +2000-01-01 10:00:00-05:00 -0.98 -0.98 +2000-01-01 10:05:00-05:00 0.98 0.98 +2000-01-01 10:10:00-05:00 -0.98 -0.98 +""" + +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt new file mode 100644 index 000000000..d0b931699 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt @@ -0,0 +1,7 @@ + +```python + +def check_empty_lines(): + print("Check empty lines are present!") + +``` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt new file mode 100644 index 000000000..34d8d7aa1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt @@ -0,0 +1,16 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + ```python + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + ``` +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt new file mode 100644 index 000000000..de229ba17 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt @@ -0,0 +1,16 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt new file mode 100644 index 000000000..fb18a0a9c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt @@ -0,0 +1,9 @@ +```python +def no_start_python(): + print("No mention of python at the start")``` +``` + +``` + A markdown paragraph contains + delimiters that needs to be removed. +``` \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt new file mode 100644 index 000000000..6c1304cfb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt @@ -0,0 +1,7 @@ +Text before +:::: +::::{.column width=40%} +Middle text +:::columns +::::{.column width=60%} +Text after \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt new file mode 100644 index 000000000..0ac895652 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt @@ -0,0 +1,2 @@ +:::: +::: \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt new file mode 100644 index 000000000..9f8585df5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt @@ -0,0 +1,3 @@ +Consecutive headers increase by more than one level: + HeaderInfo(1, 'Chapter 1', 1) + HeaderInfo(3, 'Subsection 1.1.1', 6) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt new file mode 100644 index 000000000..ab5bbf048 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt @@ -0,0 +1,71 @@ +################################################################################ +level=1, description='Chapter 1' +################################################################################ +- **Chapter 1** + - Section 1.1 + - Section 1.2 +- Chapter 2 +################################################################################ +level=2, description='Section 1.1' +################################################################################ +- Chapter 1 + - **Section 1.1** + - Subsection 1.1.1 + - Subsection 1.1.2 + - Section 1.2 +- Chapter 2 +################################################################################ +level=3, description='Subsection 1.1.1' +################################################################################ +- Chapter 1 + - Section 1.1 + - **Subsection 1.1.1** + - Subsection 1.1.2 + - Section 1.2 +- Chapter 2 +################################################################################ +level=3, description='Subsection 1.1.2' +################################################################################ +- Chapter 1 + - Section 1.1 + - Subsection 1.1.1 + - **Subsection 1.1.2** + - Section 1.2 +- Chapter 2 +################################################################################ +level=2, description='Section 1.2' +################################################################################ +- Chapter 1 + - Section 1.1 + - **Section 1.2** +- Chapter 2 +################################################################################ +level=1, description='Chapter 2' +################################################################################ +- Chapter 1 +- **Chapter 2** + - Section 2.1 + - Section 2.2 +################################################################################ +level=2, description='Section 2.1' +################################################################################ +- Chapter 1 +- Chapter 2 + - **Section 2.1** + - Subsection 2.1.1 + - Section 2.2 +################################################################################ +level=3, description='Subsection 2.1.1' +################################################################################ +- Chapter 1 +- Chapter 2 + - Section 2.1 + - **Subsection 2.1.1** + - Section 2.2 +################################################################################ +level=2, description='Section 2.2' +################################################################################ +- Chapter 1 +- Chapter 2 + - Section 2.1 + - **Section 2.2** \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt new file mode 100644 index 000000000..df89fcd63 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt @@ -0,0 +1,40 @@ +################################################################################ +level=1, description='Models' +################################################################################ +- **Models** + - Naive Bayes + - Decision trees + - Random forests + - Linear models +################################################################################ +level=2, description='Naive Bayes' +################################################################################ +- Models + - **Naive Bayes** + - Decision trees + - Random forests + - Linear models +################################################################################ +level=2, description='Decision trees' +################################################################################ +- Models + - Naive Bayes + - **Decision trees** + - Random forests + - Linear models +################################################################################ +level=2, description='Random forests' +################################################################################ +- Models + - Naive Bayes + - Decision trees + - **Random forests** + - Linear models +################################################################################ +level=2, description='Linear models' +################################################################################ +- Models + - Naive Bayes + - Decision trees + - Random forests + - **Linear models** \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt new file mode 100644 index 000000000..1c6176761 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt @@ -0,0 +1,40 @@ +OrderedDict([('build-system', + OrderedDict([('build-backend', 'poetry.masonry.api'), + ('requires', ['poetry>=0.12'])])), + ('tool', + OrderedDict([('poetry', + OrderedDict([('authors', ['']), + ('dependencies', + OrderedDict([('awscli', '*'), + ('boto3', '*'), + ('bs4', '*'), + ('flaky', '*'), + ('fsspec', '*'), + ('gluonts', '*'), + ('invoke', '*'), + ('jsonpickle', '*'), + ('jupyter', '*'), + ('lxml', '*'), + ('matplotlib', '*'), + ('mxnet', '*'), + ('networkx', '*'), + ('pandas', '^1.1.0'), + ('psycopg2', '*'), + ('pyarrow', '*'), + ('pytest', '^6.0.0'), + ('pytest-cov', '*'), + ('pytest-instafail', + '*'), + ('pytest-xdist', '*'), + ('python', '^3.7'), + ('pywavelets', '*'), + ('requests', '*'), + ('s3fs', '*'), + ('seaborn', '*'), + ('sklearn', '*'), + ('statsmodels', '*'), + ('tqdm', '*')])), + ('description', ''), + ('dev-dependencies', OrderedDict()), + ('name', 'lm'), + ('version', '0.1.0')]))]))]) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt new file mode 100644 index 000000000..66475c930 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt @@ -0,0 +1,4 @@ +time data "28-07-2023 15:05:13" doesn't match format "%Y%m%d_%H%M%S", at position 0. You might want to try: + - passing `format` if your strings have a consistent format; + - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format; + - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this. \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt new file mode 100644 index 000000000..41895df11 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt @@ -0,0 +1 @@ +Unknown datetime string format, unable to parse: qwe28abc07-201234, at position 0 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt new file mode 100644 index 000000000..0498168e2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt @@ -0,0 +1,16 @@ + +################################################################################ +################################################################################ +_system() failed +################################################################################ +################################################################################ +# _system: cmd='(ls this_file_doesnt_exist) 2>&1', abort_on_error=True, suppress_error=None, suppress_output=True, blocking=True, wrapper=None, output_file=None, num_error_lines=30, tee=False, dry_run=False, log_level=10 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cmd='(ls this_file_doesnt_exist) 2>&1' +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +- rc='' +- output=' +ls: cannot access 'this_file_doesnt_exist': No such file or directory +' +- Output saved in 'tmp.system_output.txt' +- Command saved in 'tmp.system_cmd.sh' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv new file mode 100644 index 000000000..abc3dac80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv @@ -0,0 +1,2 @@ +A,B,C,D,E +1,2.3456,c,d,78 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py new file mode 100644 index 000000000..7b0473b8a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py @@ -0,0 +1,136 @@ +import filecmp +import os +import pathlib +import shutil +from typing import List, Tuple + +import dev_scripts_helpers.system_tools.create_links as dshstcrli +import helpers.hio as hio +import helpers.hunit_test as hunitest + + +# ############################################################################# +# Test_create_links +# ############################################################################# + + +class Test_create_links(hunitest.TestCase): + """ + Unit tests for the `create_links.py` script. + """ + + def create_file( + self, dir_path: pathlib.Path, file_name: str, content: str + ) -> pathlib.Path: + """ + Create a file with the given content in the specified directory. + + This helper function ensures the directory exists before + creating the file and writing the specified content into it. + + :param dir_path: path to the directory where the file will be + created + :param file_name: name of the file to create + :param content: content to write into the file + :return: full path to the created file + """ + dir_path = pathlib.Path(dir_path) + file_path = dir_path / file_name + hio.to_file(file_name=str(file_path), txt=content) + return file_path + + def test__find_common_files(self) -> None: + """ + Test identifying common files between two directories. + + Create two directories, each containing identical files, + and checks that the `_find_common_files` function identifies these files. + """ + base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) + src_dir: pathlib.Path = base_dir / "test_src_dir" + dst_dir: pathlib.Path = base_dir / "test_dst_dir" + src_dir.mkdir(parents=True, exist_ok=True) + dst_dir.mkdir(parents=True, exist_ok=True) + file1_src: pathlib.Path = self.create_file( + src_dir, "file1.txt", "Hello, World!" + ) + file1_dst: pathlib.Path = shutil.copy(file1_src, dst_dir) + common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( + str(src_dir), str(dst_dir) + ) + self.assertEqual(len(common_files), 1) + self.assertEqual(common_files[0], (str(file1_src), str(file1_dst))) + + def test__replace_with_links_absolute(self) -> None: + """ + Test replacing common files with absolute symbolic links. + + Create identical files in two directories and replace the files + in the destination directory with absolute symbolic links + pointing to the source files. + """ + base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) + src_dir: pathlib.Path = base_dir / "test_src_dir" + dst_dir: pathlib.Path = base_dir / "test_dst_dir" + file1: pathlib.Path = self.create_file( + src_dir, "file1.txt", "Hello, World!" + ) + shutil.copy(file1, dst_dir) + common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( + str(src_dir), str(dst_dir) + ) + dshstcrli._replace_with_links(common_files, use_relative_paths=False) + for _, dst_file in common_files: + self.assertTrue(os.path.islink(dst_file)) + self.assert_equal(os.readlink(dst_file), str(file1)) + + def test__replace_with_links_relative(self) -> None: + """ + Test replacing common files with relative symbolic links. + + Create identical files in two directories and replace the files + in the destination directory with relative symbolic links + pointing to the source files. + """ + base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) + src_dir: pathlib.Path = base_dir / "test_src_dir" + dst_dir: pathlib.Path = base_dir / "test_dst_dir" + file1: pathlib.Path = self.create_file( + src_dir, "file1.txt", "Hello, World!" + ) + shutil.copy(file1, dst_dir) + common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( + src_dir, dst_dir + ) + dshstcrli._replace_with_links(common_files, use_relative_paths=True) + for src_file, dst_file in common_files: + self.assertTrue(os.path.islink(dst_file)) + expected_link: str = os.path.relpath( + src_file, os.path.dirname(dst_file) + ) + self.assert_equal(os.readlink(dst_file), expected_link) + + def test__stage_links(self) -> None: + """ + Test replacing symbolic links with writable file copies. + + Create symbolic links in a directory and then stage them by + replacing each link with a copy of the original file it points + to. + """ + base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) + src_dir: pathlib.Path = base_dir / "test_src_dir" + dst_dir: pathlib.Path = base_dir / "test_dst_dir" + src_dir.mkdir(parents=True, exist_ok=True) + dst_dir.mkdir(parents=True, exist_ok=True) + file1: pathlib.Path = self.create_file( + src_dir, "file1.txt", "Hello, World!" + ) + link1: pathlib.Path = dst_dir / "file1.txt" + os.symlink(file1, link1) + symlinks: List[str] = dshstcrli._find_symlinks(dst_dir) + dshstcrli._stage_links(symlinks) + for link in symlinks: + self.assertFalse(os.path.islink(link)) + self.assertTrue(os.path.isfile(link)) + self.assertTrue(filecmp.cmp(link, file1, shallow=False)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py new file mode 100644 index 000000000..98994cb5a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py @@ -0,0 +1,96 @@ +import asyncio +import logging +from typing import Optional + +import helpers.hasyncio as hasynci +import helpers.hdatetime as hdateti +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_hasyncio1 +# ############################################################################# + + +class Test_hasyncio1(hunitest.TestCase): + """ + Execute a workload using different time semantics: + + - real time + - simulated time + """ + + @staticmethod + async def workload(get_wall_clock_time: hdateti.GetWallClockTime) -> None: + """ + Coroutine simulating a workload waiting for 1s. + """ + + def _print_time() -> None: + true_wall_clock_time = hdateti.get_current_time("ET") + _LOG.debug("wall_clock_time=%s", true_wall_clock_time) + event_loop_time = get_wall_clock_time() + _LOG.debug("event_loop_time=%s", event_loop_time) + + _print_time() + # The execution here is just waiting. + _LOG.debug(" -> execute") + await asyncio.sleep(1.0) + # + _print_time() + + def run_test( + self, + event_loop: Optional[asyncio.AbstractEventLoop], + get_wall_clock_time: hdateti.GetWallClockTime, + ) -> None: + coroutine = self.workload(get_wall_clock_time) + hasynci.run(coroutine, event_loop=event_loop) + + def test_real_time1(self) -> None: + """ + Use real-time semantic. + + In this case: + ``` + wall_clock_time=2021-09-27 20:40:43.775683-04:00 + event_loop_time=2021-09-27 20:40:43.799074-04:00 + -> execute + wall_clock_time=2021-09-27 20:40:44.808990-04:00 + event_loop_time=2021-09-27 20:40:44.812472-04:00 + ``` + + - the wall clock time and the event loop time both advance + """ + # Use the wall clock time with no special event loop. + get_wall_clock_time = lambda: hdateti.get_current_time(tz="ET") + event_loop = None + # Run. + self.run_test(event_loop, get_wall_clock_time) + + def test_simulated_time1(self) -> None: + """ + Use simulated time semantic. + + In this case: + ``` + wall_clock_time=2021-09-27 20:38:47.843501-04:00 + event_loop_time=2021-09-27 20:38:47.841555-04:00 + -> execute + wall_clock_time=2021-09-27 20:38:47.868272-04:00 + event_loop_time=2021-09-27 20:38:48.841555-04:00 + ``` + + - the wall_clock time doesn't advance since the execution is instantaneous + - the event loop time moves forward 1 sec + """ + # Use the solipsistic event loop to simulate the real-time faster. + with hasynci.solipsism_context() as event_loop: + # Use the simulated wall clock time. + get_wall_clock_time = lambda: hdateti.get_current_time( + tz="ET", event_loop=event_loop + ) + # Run. + self.run_test(event_loop, get_wall_clock_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py new file mode 100644 index 000000000..5469e009e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py @@ -0,0 +1,276 @@ +import os +import unittest.mock as umock +from typing import Optional + +import boto3 +import pytest +from botocore.client import BaseClient +from moto import mock_aws + +import helpers.haws as haws +import helpers.hunit_test as hunitest + + +# ############################################################################# +# Haws_test_case +# ############################################################################# + + +class Haws_test_case(hunitest.TestCase): + @pytest.fixture(autouse=True, scope="class") + def aws_credentials(self) -> None: + """ + Mocked AWS credentials for moto. + """ + os.environ["MOCK_AWS_ACCESS_KEY_ID"] = "testing" + os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["MOCK_AWS_SECURITY_TOKEN"] = "testing" + os.environ["MOCK_AWS_SESSION_TOKEN"] = "testing" + os.environ["MOCK_AWS_DEFAULT_REGION"] = "us-east-1" + + +# ############################################################################# +# Test_get_session +# ############################################################################# + + +class Test_get_session(Haws_test_case): + @pytest.fixture(autouse=True) + def set_up_test(self) -> None: + os.environ["MOCK_AWS_S3_BUCKET"] = "mock_aws_bucket" + + @mock_aws + @umock.patch("boto3.Session") + def test_get_session1(self, mock_boto3_session: umock.Mock) -> None: + """ + Test that `haws.get_session` correctly return a session without region + parameter. + """ + aws_profile = "__mock__" + # Create a mock session. + mock_session = umock.MagicMock() + mock_boto3_session.return_value = mock_session + # Test that get_session returns a session object. + session = haws.get_session(aws_profile) + self.assertEqual(session, mock_session) + # Verify that `boto3.Session` was called with the correct profile. + mock_boto3_session.assert_called_once_with(profile_name=aws_profile) + + @mock_aws + @umock.patch("boto3.Session") + def test_get_session2(self, mock_boto3_session: umock.Mock) -> None: + """ + Test that `haws.get_session` correctly return a session with region + parameter. + """ + aws_profile = "__mock__" + region = "us-east-1" + # Create a mock session + mock_session = umock.MagicMock() + mock_boto3_session.return_value = mock_session + # Test that `get_session` returns a session object with the specified region. + session = haws.get_session(aws_profile, region=region) + self.assertEqual(session, mock_session) + # Verify that `boto3.Session` was called with the correct profile and region. + mock_boto3_session.assert_called_once_with( + profile_name=aws_profile, region_name=region + ) + + +# ############################################################################# +# Test_get_service_client +# ############################################################################# + + +class Test_get_service_client(Haws_test_case): + @mock_aws + @umock.patch("helpers.haws.get_session") + def test1(self, mock_get_session: umock.Mock) -> None: + """ + Test `haws.get_service_client()` returns a client for S3. + """ + aws_profile = "__mock__" + service_name = "s3" + region = "us-east-1" + # Create a mock session with the expected credentials. + mock_session = boto3.session.Session( + aws_access_key_id="testing", + aws_secret_access_key="testing", + region_name=region, + ) + mock_get_session.return_value = mock_session + # Create mock client for S3. + client = haws.get_service_client( + aws_profile=aws_profile, service_name=service_name, region=region + ) + # Check that the returned client is for the S3 service. + self.assert_equal(client.meta.service_model.service_name, "s3") + # Check for region. + self.assert_equal(client.meta.region_name, region) + + +# ############################################################################# +# Test_get_service_resource +# ############################################################################# + + +class Test_get_service_resource(Haws_test_case): + @mock_aws + @umock.patch("helpers.haws.get_session") + def test1(self, mock_get_session: umock.Mock) -> None: + """ + Test that `haws.get_service_resource()` correctly retrieves a S3 + resource. + """ + aws_profile = "__mock__" + service_name = "s3" + # Create a mock session with the expected credentials. + mock_session = boto3.session.Session( + aws_access_key_id="testing", + aws_secret_access_key="testing", + region_name="us-east-1", + ) + mock_get_session.return_value = mock_session + # Create mock S3 bucket. + s3 = boto3.resource("s3") + s3.create_bucket(Bucket="my-test-bucket") + s3_resource = haws.get_service_resource( + aws_profile=aws_profile, service_name=service_name + ) + # Get all `S3` buckets. + buckets = list(s3_resource.buckets.all()) + bucket_names = [bucket.name for bucket in buckets] + # Check. + self.assertIn("my-test-bucket", bucket_names) + + +# ############################################################################# +# Test_get_task_definition_image_url +# ############################################################################# + + +class Test_get_task_definition_image_url(Haws_test_case): + @mock_aws + @umock.patch("helpers.haws.get_service_client") + def test1(self, mock_get_service_client: umock.Mock) -> None: + """ + Test that `get_task_definition_image_url` retrieves correct image URL. + """ + # Mock data. + task_definition_name = "my-task-definition" + mock_image_url = "old_image_url" + region = "us-east-1" + # Mock the return value of `get_service_client`. + mock_client = boto3.client("ecs", region_name=region) + mock_get_service_client.return_value = mock_client + # Create a mock task definition. + mock_client.register_task_definition( + family=task_definition_name, + # The following are required parameters. + containerDefinitions=[ + {"name": "my-container", "image": mock_image_url, "memory": 512} + ], + ) + image_url = haws.get_task_definition_image_url( + task_definition_name, environment="test" + ) + self.assertEqual(image_url, mock_image_url) + + +# ############################################################################# +# Test_update_task_definition +# ############################################################################# + + +class Test_update_task_definition(Haws_test_case): + @mock_aws + @umock.patch("helpers.haws.get_ecs_client") + def test1(self, mock_get_ecs_client: BaseClient) -> None: + """ + Test updating a task definition with a new image URL. + """ + # Mock data. + task_definition_name = "my-task-definition" + old_image_url = "old_image_url" + new_image_url = "new_image_url" + region = "us-east-1" + # Mock the return value of `get_ecs_client`. + mock_client = boto3.client("ecs", region_name=region) + mock_get_ecs_client.return_value = mock_client + # Create a mock task definition. + mock_client.register_task_definition( + family=task_definition_name, + containerDefinitions=[ + {"name": "my-container", "image": old_image_url} + ], + executionRoleArn="__mock__", + networkMode="bridge", + requiresCompatibilities=["EC2"], + cpu="256", + memory="512", + ) + # Update task definition. + haws.update_task_definition( + task_definition_name, + new_image_url, + region=region, + environment="test", + ) + # Check if the task definition is updated. + task_description = mock_client.describe_task_definition( + taskDefinition=task_definition_name + ) + updated_image_url = task_description["taskDefinition"][ + "containerDefinitions" + ][0]["image"] + self.assertEqual(updated_image_url, new_image_url) + + +# ############################################################################# +# Test_get_ecs_client +# ############################################################################# + + +class Test_get_ecs_client(Haws_test_case): + def mock_aws_client( + self, mock_get_session: umock.Mock, *, region: Optional[str] = None + ) -> None: + aws_profile = "__mock__" + test_cluster_name = "test-cluster" + # Create a mock session with the expected credentials. + mock_session = boto3.session.Session( + aws_access_key_id="testing", + aws_secret_access_key="testing", + region_name=region or "us-east-1", + ) + mock_get_session.return_value = mock_session + # Create mock ECS client. + ecs_client = boto3.client("ecs", region_name="us-east-1") + ecs_client.create_cluster(clusterName=test_cluster_name) + # Get ECS client. + if region: + test_client = haws.get_ecs_client(aws_profile, region=region) + else: + test_client = haws.get_ecs_client(aws_profile) + # Get the created cluster. + cluster_name = test_client.list_clusters()["clusterArns"][0] + # Check cluster name. + self.assertIn(test_cluster_name, cluster_name) + + @mock_aws + @umock.patch("helpers.haws.get_session") + def test1(self, mock_get_session: umock.Mock) -> None: + """ + Test that `haws.get_ecs_client()` correctly return a client to work + with ECS within a specified region. + """ + self.mock_aws_client(mock_get_session, region="us-east-1") + + @mock_aws + @umock.patch("helpers.haws.get_session") + def test2(self, mock_get_session: umock.Mock) -> None: + """ + Test that `haws.get_ecs_client()` correctly return a client to work + with ECS without a specified region. + """ + self.mock_aws_client(mock_get_session) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py new file mode 100644 index 000000000..1699e7bcd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py @@ -0,0 +1,1002 @@ +import logging +import tempfile +import time +from typing import Any, Callable, Generator, Tuple + +import numpy as np +import pandas as pd +import pytest + +import helpers.hcache as hcache +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Do not commit this. +# _LOG.debug = _LOG.info + + +# TODO(gp): Use hprint.log_frame +def _LOG_frame(txt: str) -> None: + _LOG.debug("\n%s", hprint.frame(txt)) + + +# ############################################################################# + + +def _get_add_function() -> Callable: + """ + Return a function with the ability to track state, used for testing. + """ + + def func(x: int, y: int) -> int: + func.executed = True # type: ignore[attr-defined] + return x + y + + func.executed = False # type: ignore[attr-defined] + return func + + +def _reset_add_function(func: Callable) -> None: + """ + Reset the function before another execution, so we can verify if it was + executed or not. + + We should do this every time we run the cached version of the + function. + """ + func.executed = False # type: ignore[attr-defined] + hdbg.dassert(not func.executed) # type: ignore[attr-defined] + + +# ############################################################################# + + +# ############################################################################# +# _ResetGlobalCacheHelper +# ############################################################################# + + +class _ResetGlobalCacheHelper(hunitest.TestCase): + """ + Create a global cache for each test method and resets it at every test + method invocation. + """ + + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def _remove_all_caches(self) -> None: + """ + Clean and remove all the caches for this test. + """ + cache_type = "all" + hcache.clear_global_cache(cache_type, tag=self.cache_tag, destroy=True) + + def set_up_test(self) -> None: + # Create a tag like "TestCacheFeatures::test_without_caching1". + self.cache_tag = f"{self.__class__.__name__}::{self._testMethodName}" + # Clean all the caches before this test method is run. + self._remove_all_caches() + + def tear_down_test(self) -> None: + # Clean and remove all the caches after the test method is run. + self._remove_all_caches() + + def _get_f_cf_functions( + self, **cached_kwargs: Any + ) -> Tuple[Callable, hcache._Cached]: + """ + Create the intrinsic function `f` and its cached version `cf`. + """ + # Make sure that we are using the unit test cache. + # disk_cache_name = hcache._get_global_cache_name("disk", self.cache_tag) + # _LOG.debug("disk_cache_name=%s", disk_cache_name) + # _LOG.debug( + # "disk_cache_path=%s", hcache._get_global_cache_path("disk", self.cache_tag) + # ) + # TODO(gp): Add an assertion. + # Create the intrinsic function. + f = _get_add_function() + # Create the cached function. + cf = hcache._Cached(f, tag=self.cache_tag, **cached_kwargs) + # Reset all the caches. + hcache.clear_global_cache("all", self.cache_tag) + cf._reset_cache_tracing() + return f, cf + + def _execute_and_check_state( + self, + f: Callable, + cf: hcache._Cached, + val1: int, + val2: int, + exp_cf_state: str, + ) -> None: + """ + Call the function `f(val1, val2) and its cached function `cf(val1, + val2)` and check whether the intrinsic function was executed and what + caches were used, according to `exp_f_state` and `exp_cf_state`. + """ + # If there was no caching then we must have executed the function. + exp_f_state = exp_cf_state == "no_cache" + _LOG.debug( + "\n%s", + hprint.frame( + f"val1={val1}, val2={val2}, exp_f_state={exp_f_state}, " + f"exp_cf_state={exp_cf_state}", + char1="<", + ), + ) + # Reset the intrinsic function since we want to verify if it was called + # or not when we call the cached function. + _reset_add_function(f) + # Call the cached function. + actual = cf(val1, val2) + expected = val1 + val2 + # Check the result. + self.assertEqual(actual, expected) + # Check which function was executed and what caches were used. + _LOG.debug( + "f.executed=%s vs %s", + f.executed, # type: ignore[attr-defined] + exp_f_state, + ) + _LOG.debug( + "cf.get_last_cache_accessed=%s vs %s", + cf.get_last_cache_accessed(), + exp_cf_state, + ) + self.assertEqual(f.executed, exp_f_state) # type: ignore[attr-defined] + self.assertEqual(cf.get_last_cache_accessed(), exp_cf_state) + + +# ############################################################################# + + +# ############################################################################# +# TestCacheFunctions +# ############################################################################# + + +class TestCacheFunctions(hunitest.TestCase): + def test_get_cache_name1(self) -> None: + """ + Make sure we are using the unit test cache and not the development + cache, by checking the name of the disk cache. + """ + cache_tag = "unittest" + disk_cache_name = hcache._get_global_cache_name("disk", cache_tag) + _LOG.debug("disk_cache_name=%s", disk_cache_name) + self.assertIn(cache_tag, disk_cache_name) + + +# ############################################################################# + + +# ############################################################################# +# TestGlobalCache1 +# ############################################################################# + + +class TestGlobalCache1(_ResetGlobalCacheHelper): + def test_without_caching1(self) -> None: + """ + If we execute two times without caching, we get two executions of the + intrinsic function. + """ + f = _get_add_function() + self.assertFalse(f.executed) # type: ignore[attr-defined] + # Execute. + actual = f(3, 4) + self.assertEqual(actual, 7) + # The function was executed. + self.assertTrue(f.executed) # type: ignore[attr-defined] + # Reset. + _reset_add_function(f) + self.assertFalse(f.executed) # type: ignore[attr-defined] + # Execute again. + actual = f(3, 4) + self.assertEqual(actual, 7) + # Check that the function is executed again, since there is no caching. + self.assertTrue(f.executed) # type: ignore[attr-defined] + + def test_with_caching1(self) -> None: + """ + - Leave the caches enabled + - Show that the memory cache is used + """ + # Both memory and disk cache enabled. + f, cf = self._get_f_cf_functions() + # 1) Execute and verify that it is executed, since it was not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Execute and verify that it is not executed, since it's cached in memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 3) Execute and verify that it is not executed, since it's cached. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + + def test_with_caching2(self) -> None: + """ + - Leave the caches enabled + - Cache different values + """ + # Both memory and disk cache enabled. + f, cf = self._get_f_cf_functions() + # 1) Execute and verify that it is executed, since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Use a different workload. + _LOG.debug("\n%s", hprint.frame("Execute")) + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + # 3) Execute the second time: verify that it is not executed, since cached. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 4) Use a different workload: not executed since cached. + _LOG.debug("\n%s", hprint.frame("Execute")) + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="mem") + + def test_with_caching3(self) -> None: + """ + - Disable both mem and disk cache + - Cache a single value + """ + # Disable both memory and disk cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=False, use_disk_cache=False + ) + # 1) Execute the first time: executed since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + # 2) Execute the second time: executed since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + + def test_with_caching4(self) -> None: + """ + - Disable only the disk cache + - Cache different values + """ + # Use only memory cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=True, use_disk_cache=False + ) + # 1) Execute and verify that it is executed since not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + # 2) Execute the second time: verify that it was cached from memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="mem") + + def test_with_caching5(self) -> None: + """ + - Disable only the memory cache + - Cache different values + """ + # Use only disk cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=False, use_disk_cache=True + ) + # 1) Verify that it is executed since there is no cache. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + # 2) Verify that it is executed, since it's cached in memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="disk") + + # //////////////////////////////////////////////////////////////////////////// + + def test_with_caching_mem_reset(self) -> None: + """ + - Use only the memory cache + - Execute and cache + - Reset the mem cache + - Execute again + - Check that the cached function is recomputed + """ + # Use only memory cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=True, use_disk_cache=False + ) + # 1) Verify that it is executed, since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Verify that it is not executed, since it's cached in memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 3) Reset memory cache. + _LOG.debug("\n%s", hprint.frame("Reset memory cache")) + hcache.clear_global_cache("mem", self.cache_tag) + # 4) Verify that it is executed, since the cache was emptied. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + + def test_with_caching_disk_reset(self) -> None: + """ + Same as `test_with_caching_mem_reset()` but using the disk cache. + """ + # Use only disk cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=False, use_disk_cache=True + ) + # 1) Verify that it is executed, since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Verify that it is not executed, since cached in disk. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 3) Reset disk cache. + _LOG.debug("\n%s", hprint.frame("Reset memory cache")) + hcache.clear_global_cache("disk", self.cache_tag) + # 4) Verify that it is executed, since the cache was emptied. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + + def test_with_caching_mem_reset2(self) -> None: + """ + - Use both caches + - Execute and cache + - Reset the mem cache + - Execute again + - Check that the cached value is found in the disk cache + """ + # Use both memory and disk cache + f, cf = self._get_f_cf_functions(use_mem_cache=True, use_disk_cache=True) + # 1) Verify that it is executed. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Verify that it is not executed, since it's cached in memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 3) Reset memory cache. + hcache.clear_global_cache("mem", self.cache_tag) + # 4) Verify that it is not executed, since it's in the disk cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + + # //////////////////////////////////////////////////////////////////////////// + + def test_redefined_function(self) -> None: + """ + If the cached function is redefined, but it's still the same, then the + intrinsic function should not be recomputed. + """ + # Define the function inline imitating working in a notebook. + _LOG.debug("\n%s", hprint.frame("Define function")) + add = _get_add_function() + cached_add = hcache._Cached(add, tag=self.cache_tag) + # 1) Execute the first time. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state( + add, cached_add, 1, 2, exp_cf_state="no_cache" + ) + # 2) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 3) Redefine the function inline. + _LOG.debug("\n%s", hprint.frame("Redefine function")) + add = _get_add_function() + cached_add = hcache._Cached(add, tag=self.cache_tag) + # 4) Execute the third time. Should still use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 5) Execute the fourth time. Should still use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 6) Check that call with other arguments miss the cache. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state( + add, cached_add, 3, 4, exp_cf_state="no_cache" + ) + + def test_changed_function(self) -> None: + """ + If the function is redefined, but the code is not the same, then the + intrinsic function should be recomputed. + """ + # Define the function imitating working in a notebook. + _LOG.debug("\n%s", hprint.frame("Define function")) + + def add(x: int, y: int) -> int: + add.executed = True # type: ignore[attr-defined] + return x + y + + cached_add = hcache._Cached(add, tag=self.cache_tag) + # 1) Execute the first time. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state( + add, cached_add, 1, 2, exp_cf_state="no_cache" + ) + # 2) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 3) Redefine the function with different code. + _LOG.debug("\n%s", hprint.frame("Redefine function")) + + # pylint: disable=function-redefined + def add(x: int, y: int) -> int: # type: ignore[no-redef] + add.executed = True # type: ignore[attr-defined] + z = x + y + return z + + cached_add = hcache._Cached(add, tag=self.cache_tag) + # 4) Execute the third time. Should still use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state( + add, cached_add, 1, 2, exp_cf_state="no_cache" + ) + # 5) Execute the fourth time. Should still use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 6) Check that call with other arguments miss the cache. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state( + add, cached_add, 3, 4, exp_cf_state="no_cache" + ) + + +# ############################################################################# + + +# ############################################################################# +# _ResetFunctionSpecificCacheHelper +# ############################################################################# + + +class _ResetFunctionSpecificCacheHelper(_ResetGlobalCacheHelper): + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test2() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test2(self) -> None: + self.set_up_test() + # Create temp directories to store the cache. + self.disk_cache_dir = tempfile.mkdtemp() + # Clear global cache. + hcache.clear_global_cache("all", tag=self.cache_tag) + + +# ############################################################################# +# TestFunctionSpecificCache1 +# ############################################################################# + + +class TestFunctionSpecificCache1(_ResetFunctionSpecificCacheHelper): + def test_with_caching1(self) -> None: + """ + - Test using the function-specific disk cache + - Disable function-specific cache and switching to global cache + - Test using the global cache + """ + # Use a global cache and + _LOG.debug("\n%s", hprint.frame("Starting")) + _LOG.debug( + "# get_global_cache_info()=\n%s", + hcache.get_global_cache_info(tag=self.cache_tag), + ) + f, cf = self._get_f_cf_functions( + use_mem_cache=False, + use_disk_cache=True, + disk_cache_path=self.disk_cache_dir, + ) + _LOG.debug( + "# cf.get_function_cache_info()=\n%s", cf.get_function_cache_info() + ) + # 1) Execute and verify that it is executed. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Execute and verify that it is not executed, since it's cached on disk. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 3) Clear the global cache. + _LOG.debug("\n%s", hprint.frame("clear_global_cache")) + hcache.clear_global_cache("all") + # 4) Execute and verify that it is not executed, since it's cached on disk. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + + def test_with_caching2(self) -> None: + """ + - Test using the function-specific disk cache + - Disable function-specific cache and switching to global cache + - Test using the global cache + """ + # Use only per-function disk cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=False, disk_cache_path=self.disk_cache_dir + ) + # 1) Execute and verify that it is executed. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Clear the global cache. + _LOG.debug("\n%s", hprint.frame("clear_global_cache")) + hcache.clear_global_cache("all") + # 3) Execute and verify that it is not executed. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 4) Use the global cache. + _LOG.debug( + "\n%s", hprint.frame("Disable function cache and use global cache") + ) + cf.set_function_cache_path(None) + # 5) Execute and verify that function is executed with global cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 6) Execute. Now we get the value from the memory cache since disabling + # the function cache means enabling the memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 7) Restore back specific cache. + _LOG.debug("\n%s", hprint.frame("Restore function cache")) + cf.set_function_cache_path(self.disk_cache_dir) + # Verify that it is *NOT* executed with specific cache. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + + +# ############################################################################# + + +# ############################################################################# +# TestCachePerformance +# ############################################################################# + + +class TestCachePerformance(_ResetGlobalCacheHelper): + @staticmethod + # pylint: disable=unused-argument + def _computation(*args: Any) -> None: + """ + Simulate work. + + :param args: throw away arguments + """ + # Emulate small quantity of work. + time.sleep(0.01) + + @staticmethod + def _timeit(func: Callable, *args: Any) -> float: + """ + Get performance measure of the call to fn with args. + + :param fn: callable function + :param args: any arguments to pass to the function fn + :return: precise time in seconds + """ + perf_start = time.perf_counter() + func(*args) + perf_diff = time.perf_counter() - perf_start + return perf_diff + + def _test_performance(self, val: Any) -> None: + """ + Test performance of the cache over some argument val. + + :param val: any hashable argument + """ + # Create cached versions of the computation function. + _mem_cached_computation = hcache._Cached( + self._computation, + tag=self.cache_tag, + use_mem_cache=True, + use_disk_cache=False, + ) + _disk_cached_computation = hcache._Cached( + self._computation, + tag=self.cache_tag, + use_mem_cache=False, + use_disk_cache=True, + ) + # First step: no cache. + no_cache_ct = self._timeit(lambda: self._computation(val)) + print(f"no cache run time={no_cache_ct}") + # Second step: memory cache. + memory_no_cache_ct = self._timeit(lambda: _mem_cached_computation(val)) + print(f"empty memory cache run time={memory_no_cache_ct}") + print(f"empty memory cache overhead={memory_no_cache_ct - no_cache_ct}") + memory_cache_ct = self._timeit(lambda: _mem_cached_computation(val)) + print(f"hot memory cache run time={memory_cache_ct}") + print(f"hot memory cache benefit={no_cache_ct - memory_cache_ct}") + # Third step: disk cache. + disk_no_cache_ct = self._timeit(lambda: _disk_cached_computation(val)) + print(f"empty disk cache run time={disk_no_cache_ct}") + print(f"empty disk cache overhead={disk_no_cache_ct - no_cache_ct}") + disk_cache_ct = self._timeit(lambda: _disk_cached_computation(val)) + print(f"hot disk cache run time={disk_cache_ct}") + print(f"hot disk cache benefit={no_cache_ct - disk_cache_ct}") + + def test_performance_dataframe(self) -> None: + """ + Test performance of the cache over pandas DataFrame. + """ + # Create a somewhat big DataFrame with random data. + df = pd.DataFrame( + np.random.randint(0, 100, size=(100, 4)), columns=list("ABCD") + ) + print("testing pandas dataframe, with sample size", df.shape) + self._test_performance(df) + + def test_performance_series(self) -> None: + """ + Test performance of the cache over pandas Series. + """ + # Create a somewhat big DataFrame with random data. + s = pd.Series(np.random.randint(0, 100, size=100)) + print("testing pandas series, with sample size", s.shape) + self._test_performance(s) + + +# ############################################################################# + + +# ############################################################################# +# TestCacheDecorator +# ############################################################################# + + +class TestCacheDecorator(_ResetGlobalCacheHelper): + def test_decorated_function(self) -> None: + """ + Test decorator with both caches enabled. + """ + + # Define the function inline imitating working in a notebook. + @hcache.cache(tag=self.cache_tag) + def add(x: int, y: int) -> int: + add.__wrapped__.executed = True + return x + y + + # Execute the first time. + self._execute_and_check_state( + add.__wrapped__, add, 1, 2, exp_cf_state="no_cache" + ) + # Execute the second time. Must use memory cache. + self._execute_and_check_state( + add.__wrapped__, add, 1, 2, exp_cf_state="mem" + ) + + def test_decorated_function_no_mem(self) -> None: + """ + Test decorator with only disk cache. + """ + + # Define the function inline imitating working in a notebook. + @hcache.cache(tag=self.cache_tag, use_mem_cache=False) + def add(x: int, y: int) -> int: + add.__wrapped__.executed = True + return x + y + + # Execute the first time. + self._execute_and_check_state( + add.__wrapped__, add, 1, 2, exp_cf_state="no_cache" + ) + # Execute the second time. Must use disk cache. + self._execute_and_check_state( + add.__wrapped__, add, 1, 2, exp_cf_state="disk" + ) + + +# ############################################################################# + + +# ############################################################################# +# TestAmpTask1407 +# ############################################################################# + + +class TestAmpTask1407(_ResetGlobalCacheHelper): + def test1(self) -> None: + """ + A class method can't be cached. + """ + + class _AmpTask1407Class: + def __init__(self, string: str) -> None: + self._string = string + + @hcache.cache(tag=self.cache_tag) + def print(self, n: int) -> str: + string = "" + for _ in range(n): + string += "hello" + ("o" * len(self._string)) + " " + return string + + obj = _AmpTask1407Class("test") + with self.assertRaises(ValueError): + obj.print(5) + + def test2(self) -> None: + """ + A static method can be cached. + """ + + class _AmpTask1407Class: + def __init__(self, string: str) -> None: + self._string = string + + @staticmethod + @hcache.cache(tag=self.cache_tag) + def static_print(n: int) -> str: + print("--> hello: ", n) + string = "" + for _ in range(n): + string += "hello" + ("o" * len("world")) + " " + return string + + @hcache.cache(tag=self.cache_tag) + def print(self, n: int) -> str: + string = "" + for _ in range(n): + string += "hello" + ("o" * len(self._string)) + " " + return string + + obj = _AmpTask1407Class("test") + obj.static_print(5) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "no_cache") + # + obj.static_print(5) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") + obj.static_print(5) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") + # + obj.static_print(6) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "no_cache") + obj.static_print(6) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") + + +# ############################################################################# + + +# ############################################################################# +# TestCachingOnS3 +# ############################################################################# + + +class TestCachingOnS3(_ResetFunctionSpecificCacheHelper): + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test3() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test3(self) -> None: + self.set_up_test2() + # Get a directory to store the cache on S3. + self.disk_cache_dir = self.get_s3_scratch_dir() + self.aws_profile = "am" + # Clear global cache. + hcache.clear_global_cache("all", tag=self.cache_tag) + + @pytest.mark.skip(reason="See CMTask #952.") + def test_with_caching1(self) -> None: + """ + - Test using the function-specific cache + - Disable function-specific cache and switching to global cache + - Test using the global cache + """ + _LOG.debug("\n%s", hprint.frame("Starting")) + _LOG.debug( + "\n%s", + hcache.get_global_cache_info(tag=self.cache_tag, add_banner=True), + ) + f, cf = self._get_f_cf_functions( + use_mem_cache=False, + disk_cache_path=self.disk_cache_dir, + aws_profile=self.aws_profile, + ) + _LOG.debug("\n%s", cf.get_function_cache_info(add_banner=True)) + cf.clear_function_cache(destroy=False) + # 1) Execute and verify that it is executed, since the value is not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Execute and verify that it is not executed, since it's cached on disk. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 3) Clear the global cache. + _LOG.debug("\n%s", hprint.frame("Clear global cache")) + hcache.clear_global_cache("all") + # 4) Verify that it is *NOT* executed, since the S3 cache is used. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 5) Clear the function cache. + _LOG.debug("\n%s", hprint.frame("Clear function cache")) + cf.clear_function_cache() + # 6) Clear the function cache. + _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 7) Verify that it is executed. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + + +# ############################################################################# + + +# ############################################################################# +# TestCacheEnableReadOnly1 +# ############################################################################# + + +class TestCacheEnableReadOnly1(_ResetGlobalCacheHelper): + def _helper(self, cache_from: str, **kwargs: Any) -> None: + """ + Test that when enabling read-only mode we get an assertion only if the + function invocation was not cached. + """ + # Both memory and disk cache enabled, although we use only memory. + f, cf = self._get_f_cf_functions(**kwargs) + # Execute and verify that it is executed, since it was not cached. + _LOG_frame("Execute the 1st time") + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # Execute and verify that it is not executed, since it's cached in memory. + _LOG_frame("Execute the 2nd time") + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) + _LOG_frame("Execute the 3rd time") + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) + # + # Enable the read-only mode. + # + _LOG_frame("Enable read-only mode") + cf.enable_read_only(True) + # This is cached so it doesn't raise. + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) + # This is not cached so it should raise. + with self.assertRaises(hcache.NotCachedValueException) as cm: + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + actual = str(cm.exception) + self.check_string(actual) + # + # Disable the read-only mode. + # + _LOG_frame("Disable read-only mode") + cf.enable_read_only(False) + # Now this doesn't assert even if it's not in the cache. + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + + def test_mem_cache1(self) -> None: + self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=False) + + def test_disk_cache1(self) -> None: + self._helper(cache_from="disk", use_mem_cache=False, use_disk_cache=True) + + def test_mem_disk_cache1(self) -> None: + self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=True) + + +# ############################################################################# + + +# ############################################################################# +# TestCacheUpdateFunction1 +# ############################################################################# + + +class TestCacheUpdateFunction1(_ResetGlobalCacheHelper): + def test1(self) -> None: + # Define the function imitating working in a notebook. + _LOG.debug("\n%s", hprint.frame("Define function")) + + def add(x: int, y: int) -> int: + add.executed = True # type: ignore[attr-defined] + return x + y + + disk_cache_dir = self.get_scratch_space() + _LOG.debug("disk_cache_dir=%s", disk_cache_dir) + cached_add = hcache._Cached( + add, + use_mem_cache=False, + use_disk_cache=True, + disk_cache_path=disk_cache_dir, + ) + # 1) Execute the first time. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state( + add, cached_add, 1, 2, exp_cf_state="no_cache" + ) + # 2) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + func_path = cached_add._get_function_specific_code_path() + code_before = hio.from_file(func_path) + _LOG.debug("code_before=\n%s", code_before) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="disk") + # 3) Redefine the function with different code while running. + _LOG.debug("\n%s", hprint.frame("Update function")) + + # This function is redefined on purpose to test the code. + def add(x: int, y: int) -> int: # type: ignore[no-redef] + add.executed = True # type: ignore[attr-defined] + return x * y + + cached_add._func = add + cached_add._disk_cached_func.func = add + cached_add.update_func_code_without_invalidating_cache() + # + code_after = hio.from_file(func_path) + _LOG.debug("code_after=\n%s", code_after) + self.assertNotEqual(code_before, code_after) + # 4) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="disk") + + +# ############################################################################# + + +# ############################################################################# +# TestCacheEnableCheckOnlyIfPresent1 +# ############################################################################# + + +class TestCacheEnableCheckOnlyIfPresent1(_ResetGlobalCacheHelper): + def _helper(self, cache_from: str, **kwargs: Any) -> None: + # Both memory and disk cache enabled. + f, cf = self._get_f_cf_functions(**kwargs) + # 1) Execute the first time. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 1, 2, exp_cf_state="no_cache") + # 2) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) + # 3) Enable the `check_only_if_present` mode. + _LOG.debug("\n%s", hprint.frame("Enable check_only_if_present")) + cf.enable_check_only_if_present(True) + # Since the value was cached, we should get an assertion. + with self.assertRaises(hcache.CachedValueException) as cm: + self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) + actual = str(cm.exception) + self.check_string(actual) + # 4) Try with a new value. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 2, 2, exp_cf_state="no_cache") + # 5) Disable the `check_only_if_present` mode. + _LOG.debug("\n%s", hprint.frame("Disable check_only_if_present")) + cf.enable_check_only_if_present(False) + # 6) Execute a value: we should get a cache hit. + _LOG.debug("\n%s", hprint.frame("Execute the 4rd time")) + self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) + # 7) Execute a value: we should get a cache hit. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state(f, cf, 2, 2, exp_cf_state=cache_from) + + # TODO(gp): Add a test for verbose mode in __call__ + # TODO(gp): get_function_cache_info + def test_mem_cache1(self) -> None: + self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=False) + + def test_disk_cache1(self) -> None: + self._helper(cache_from="disk", use_mem_cache=False, use_disk_cache=True) + + def test_mem_disk_cache1(self) -> None: + self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py new file mode 100644 index 000000000..15ae65c1a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py @@ -0,0 +1,1815 @@ +import logging +import os +from typing import Any, Dict + +import pandas as pd +import pytest + +import helpers.hcache_simple as hcacsimp +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_json_double(x: int) -> int: + """ + Return double the input and cache it using JSON. + + :param x: input integer to be doubled + :return: doubled value (x * 2) + """ + res = x * 2 + return res + + +@hcacsimp.simple_cache(cache_type="pickle") +def _cached_pickle_square(x: int) -> int: + """ + Return the square of the input and cache it using pickle. + + :param x: input integer to be squared + :return: squared value (x**2) + """ + res = x**2 + return res + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_multi_arg_sum(a: int, b: int) -> int: + """ + Return the sum of two numbers. + + :param a: first number + :param b: second number + :return: sum of a and b. + """ + res = a + b + return res + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_refreshable_func(x: int) -> int: + """ + Return x multiplied by 10 and update the call count. + + :param x: The input integer + :return: The result of multiplying x by 10 + """ + _cached_refreshable_func.call_count += 1 + res = x * 10 + return res + + +# Initialize the call counter for the refreshable function. +_cached_refreshable_func.call_count = 0 + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_kwarg_diff(a: int, b: int = 0) -> int: + """ + Return the difference between a and b. + + :param a: The minuend + :param b: The subtrahend (defaults to 0) + :return: The difference (a - b) + """ + res = a - b + return res + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_add_100(x: int) -> int: + """ + Return x plus 100. Used primarily for testing cache statistics. + + :param x: The input integer + :return: value (x + 100) + """ + res = x + 100 + return res + + +# ############################################################################# +# _BaseCacheTest +# ############################################################################# + + +class _BaseCacheTest(hunitest.TestCase): + """ + Base test class to provide common setup and teardown functionality. + + Instead of using setUp/tearDown, we use set_up_test/tear_down_test along + with a pytest fixture that ensures these methods run before and after each + test. + """ + + @pytest.fixture(autouse=True) + def setup_teardown_test(self): + # Run common setup before each test. + self.set_up_test() + yield + # Run common teardown after each test. + self.tear_down_test() + + def set_up_test(self) -> None: + """ + Setup operations to run before each test: + + - Set specific cache properties needed for the tests. + """ + _LOG.debug("set_up_test") + super().setUp() + # + self._cache_dir = hcacsimp.get_cache_dir() + hcacsimp.set_cache_dir(self.get_scratch_space()) + + def tear_down_test(self) -> None: + """ + Teardown operations to run after each test: + - Reset cache(in-memory, disk). + - Reset cache properties. + """ + _LOG.debug("tear_down_test") + hcacsimp.reset_cache("", interactive=False) + hcacsimp.reset_cache_property() + hcacsimp.set_cache_dir(self._cache_dir) + + +# ############################################################################# +# Test_get_cache +# ############################################################################# + + +class Test_get_cache(_BaseCacheTest): + """ + Test get_cache functionality for retrieving cached values. + """ + + def test1(self) -> None: + """ + Verify that get_cache returns a cache with the expected key and value. + """ + # Populate the cache by calling _cached_json_double. + _cached_json_double(2) + # Retrieve the in-memory cache for _cached_json_double. + cache: Dict[str, Any] = hcacsimp.get_cache("_cached_json_double") + # Assert that the key '{"args": [2], "kwargs": {}}' is in the cache and + # its value is 4. + self.assertIn('{"args": [2], "kwargs": {}}', cache) + self.assertEqual(cache['{"args": [2], "kwargs": {}}'], 4) + + +# ############################################################################# +# Test_flush_cache_to_disk +# ############################################################################# + + +class Test_flush_cache_to_disk(_BaseCacheTest): + """ + Test flush_cache_to_disk functionality for persisting cache to disk. + """ + + def test1(self) -> None: + """ + Verify that flushing creates a cache file on disk. + """ + # Call _cached_json_double to populate the cache. + _cached_json_double(3) + # Flush the cache to disk. + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Define expected cache file name. + cache_file = hcacsimp._get_cache_file_name("_cached_json_double") + # Assert that the cache file now exists on disk. + self.assertTrue( + os.path.exists(cache_file), + f"Cache file {cache_file} should exist on disk.", + ) + + def test2(self) -> None: + """ + Verify that the disk cache file contains the expected key and value. + """ + # Populate cache and flush to disk. + _cached_json_double(3) + # Flush the cache to disk. + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Define the expected cache file name. + cache_file = hcacsimp._get_cache_file_name("_cached_json_double") + # # Open and load the disk cache file. + disk_cache = hcacsimp._load_func_cache_data_from_file(cache_file, "json") + # Assert that the disk cache contains the key '{"args": [3], "kwargs": + # {}}' with the correct value. + self.assertIn('{"args": [3], "kwargs": {}}', disk_cache) + # Assert that the value for key '{"args": [3], "kwargs": {}}' is 6. + self.assertEqual(disk_cache['{"args": [3], "kwargs": {}}'], 6) + + +# ############################################################################# +# Test_reset_mem_cache +# ############################################################################# + + +class Test_reset_mem_cache(_BaseCacheTest): + """ + Test reset_mem_cache functionality for clearing in-memory cache. + """ + + def test1(self) -> None: + """ + Verify that the cache is empty after `reset_mem_cache` is called. + """ + # Populate the in-memory cache. + _cached_json_double(5) + # Reset the in-memory cache. + hcacsimp.reset_mem_cache("_cached_json_double") + # Retrieve the memory cache after reset. + cache_after: Dict[str, Any] = hcacsimp.get_mem_cache( + "_cached_json_double" + ) + # Verify that the key '{"args": [5], "kwargs": {}}' is no longer in the cache. + self.assertNotIn('{"args": [5], "kwargs": {}}', cache_after) + + +# ############################################################################# +# Test_force_cache_from_disk +# ############################################################################# + + +class Test_force_cache_from_disk(_BaseCacheTest): + """ + Test force_cache_from_disk functionality for loading cache from disk. + """ + + def test1(self) -> None: + """ + Verify that the memory cache is empty after a reset. + """ + # Populate cache and flush to disk. + _cached_json_double(7) + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Reset in-memory cache. + hcacsimp.reset_mem_cache("_cached_json_double") + mem_cache: Dict[str, Any] = hcacsimp.get_mem_cache("_cached_json_double") + # Ensure that the in-memory cache is empty. + self.assertNotIn( + '{"args": [7], "kwargs": {}}', + mem_cache, + "Memory cache should be empty after reset.", + ) + + def test2(self) -> None: + """ + Populate disk cache, reset memory, force reload, and verify that the + key appears. + """ + # Populate cache, flush to disk, and then reset in-memory cache. + _cached_json_double(7) + hcacsimp.flush_cache_to_disk("_cached_json_double") + hcacsimp.reset_mem_cache("_cached_json_double") + _LOG.debug("Force reload disk cache for '_cached_json_double'") + # Force reload cache from disk. + hcacsimp.force_cache_from_disk("_cached_json_double") + full_cache: Dict[str, Any] = hcacsimp.get_cache("_cached_json_double") + # Assert that the key is restored in the in-memory cache. + self.assertIn( + '{"args": [7], "kwargs": {}}', + full_cache, + "After forcing, disk key should appear in memory.", + ) + + +# ############################################################################# +# Test_get_cache_perf +# ############################################################################# + + +class Test_get_cache_perf(_BaseCacheTest): + """ + Test cache performance tracking functionality. + """ + + def test1(self) -> None: + """ + Verify that performance tracking records hits and misses correctly. + """ + # Enable performance tracking. + hcacsimp.enable_cache_perf("_cached_json_double") + _LOG.debug("Call _cached_json_double(8) twice") + # First call should be a miss. + _LOG.debug("# First call should be a miss") + _cached_json_double(8) + # Second call should be a hit. + _LOG.debug("# Second call should be a hit") + _cached_json_double(8) + # Retrieve performance statistics. + stats: str = hcacsimp.get_cache_perf_stats("_cached_json_double") + # Verify that one hit and one miss are recorded. + self.assertIn("hits=1", stats) + self.assertIn("misses=1", stats) + + def test2(self) -> None: + """ + Verify that disabling performance tracking returns None. + """ + # Disable performance tracking. + hcacsimp.disable_cache_perf("_cached_json_double") + # Assert that performance data is no longer available. + self.assertIsNone(hcacsimp.get_cache_perf("_cached_json_double")) + + +# ############################################################################# +# Test_set_cache_property +# ############################################################################# + + +class Test_set_cache_property(_BaseCacheTest): + """ + Test set_cache_property and get_cache_property functionality. + """ + + def test1(self) -> None: + """ + Verify that setting a valid cache property works and can be retrieved. + """ + # Set a valid cache property. + hcacsimp.set_cache_property( + "_cached_json_double", "report_on_cache_miss", True + ) + # Retrieve and verify the property. + val: bool = hcacsimp.get_cache_property( + "_cached_json_double", "report_on_cache_miss" + ) + self.assertTrue(val) + + def test2(self) -> None: + """ + Verify that resetting cache properties clears previously set + properties. + """ + # Set and verify the cache property. + hcacsimp.set_cache_property( + "_cached_json_double", "report_on_cache_miss", True + ) + self.assertTrue( + hcacsimp.get_cache_property( + "_cached_json_double", "report_on_cache_miss" + ) + ) + # Reset all cache properties. + hcacsimp.reset_cache_property() + # Verify that the property is no longer True. + self.assertFalse( + hcacsimp.get_cache_property( + "_cached_json_double", "report_on_cache_miss" + ) + ) + + def test3(self) -> None: + """ + Verify that setting an invalid cache property raises an error. + """ + # Verify that setting an invalid property raises an error. + with self.assertRaises(AssertionError): + hcacsimp.set_cache_property( + "_cached_json_double", "invalid_prop", True + ) + + def test4(self) -> None: + """ + Verify return of a string containing the property value. + """ + # Set force_refresh property and verify that it appears in the properties string. + hcacsimp.set_cache_property("_cached_json_double", "force_refresh", True) + prop_str: str = hcacsimp.cache_property_to_str("_cached_json_double") + # Check output. + self.assertIn("force_refresh: True", prop_str) + + +# ############################################################################# +# Test_get_cache_func_names +# ############################################################################# + + +class Test_get_cache_func_names(_BaseCacheTest): + """ + Test get_cache_func_names functionality for retrieving cached function names. + """ + + def test1(self) -> None: + """ + Verify that memory cache function names include `_cached_json_double`. + """ + # Populate in-memory cache. + _cached_json_double(9) + # Retrieve function names from the memory cache. + mem_funcs = hcacsimp.get_cache_func_names("mem") + # Check output. + self.assertIn("_cached_json_double", mem_funcs) + + def test2(self) -> None: + """ + Verify that all cache function names include both JSON and pickle + functions. + """ + # Populate and flush caches for JSON and pickle functions. + _cached_json_double(2) + # Flush _cached_json_double cache to disk. + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Call _cached_pickle_square with input 2. + _cached_pickle_square(2) + # Flush _cached_pickle_square cache to disk. + hcacsimp.flush_cache_to_disk("_cached_pickle_square") + # Retrieve all cache function names (both memory and disk). + all_funcs = hcacsimp.get_cache_func_names("all") + # Check output. + self.assertIn("_cached_json_double", all_funcs) + self.assertIn("_cached_pickle_square", all_funcs) + + def test3(self) -> None: + """ + Verify that disk cache function names include `_cached_json_double` after + flushing. + """ + # Flush JSON cache to disk and verify disk cache function names. + _cached_json_double(2) + # Flush _cached_json_double cache to disk. + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Retrieve function names from the disk cache. + disk_funcs = hcacsimp.get_cache_func_names("disk") + # Check output. + self.assertIn("_cached_json_double", disk_funcs) + + +# ############################################################################# +# Test_cache_stats_to_str +# ############################################################################# + + +class Test_cache_stats_to_str(_BaseCacheTest): + """ + Test cache_stats_to_str functionality for generating cache statistics. + """ + + def test1(self) -> None: + """ + Verify that cache_stats_to_str returns a DataFrame with 'memory' and + 'disk' columns. + """ + # Populate cache. + _cached_add_100(1) + stats_df: pd.DataFrame = hcacsimp.cache_stats_to_str("_cached_add_100") + # Assert that the returned object is a DataFrame. + self.assertIsInstance(stats_df, pd.DataFrame) + # Verify that it contains the 'memory' and 'disk' columns. + self.assertIn("memory", stats_df.columns) + self.assertIn("disk", stats_df.columns) + + +# ############################################################################# +# Test__cached_kwarg_diff +# ############################################################################# + + +class Test__cached_kwarg_diff(_BaseCacheTest): + """ + Test caching behavior with keyword arguments. + """ + + def test1(self) -> None: + """ + Test that verifies keyword arguments are handled correctly by the + cache. + """ + # Call with different keyword argument values. + res1: int = _cached_kwarg_diff(5, b=3) + res2: int = _cached_kwarg_diff(5, b=10) + # Both calls should return the different result as both args, kwargs are used for caching. + self.assertNotEqual(res1, res2) + + +# ############################################################################# +# Test__cached_multi_arg_sum +# ############################################################################# + + +class Test__cached_multi_arg_sum(_BaseCacheTest): + """ + Test caching behavior with multiple positional arguments. + """ + + def test1(self) -> None: + """ + Verify that the cache for _cached_multi_arg_sum contains the correct key. + """ + # Populate the cache. + _cached_multi_arg_sum(1, 2) + cache: Dict[str, Any] = hcacsimp.get_cache("_cached_multi_arg_sum") + _LOG.debug("cache=%s", cache) + # Verify that the cache key is formatted as '{"args": [1, 2], "kwargs": {}}'. + self.assertIn('{"args": [1, 2], "kwargs": {}}', cache) + + +# ############################################################################# +# Test__cached_pickle_square +# ############################################################################# + + +class Test__cached_pickle_square(_BaseCacheTest): + """ + Test caching with pickle serialization. + """ + + def test1(self) -> None: + """ + Ensure that _cached_pickle_square returns the correct value and disk + file. + """ + # Call the function to square the input. + res: int = _cached_pickle_square(4) + # Flush the cache to disk. + hcacsimp.flush_cache_to_disk("_cached_pickle_square") + cache_file = hcacsimp._get_cache_file_name("_cached_pickle_square") + # Open and load the pickle cache file. + func_cache_data = hcacsimp._load_func_cache_data_from_file( + cache_file, "pickle" + ) + _LOG.debug("func_cache_data=%s", func_cache_data) + # Verify the result and cache contents. + self.assertEqual(res, 16) + self.assertIn('{"args": [4], "kwargs": {}}', func_cache_data) + self.assertEqual(func_cache_data['{"args": [4], "kwargs": {}}'], 16) + + +# ############################################################################# +# Test__cached_refreshable_func +# ############################################################################# + + +class Test__cached_refreshable_func(_BaseCacheTest): + """ + Test force_refresh cache property functionality. + """ + + def test1(self) -> None: + """ + Verify that `_cached_refreshable_func` is called only once initially. + """ + # Reset call counter. + _cached_refreshable_func.call_count = 0 + # Call the function twice with the same input. + _cached_refreshable_func(3) + _cached_refreshable_func(3) + # Verify that the function was only called once (cache hit on the second + # call). + self.assertEqual( + _cached_refreshable_func.call_count, + 1, + "Function should be called only once initially.", + ) + + def test2(self) -> None: + """ + Verify that enabling `force_refresh` causes `_cached_refreshable_func` to + be re-called. + """ + # Call the function normally. + res: int = _cached_refreshable_func(3) + # Enable force_refresh so that the function will be re-called. + hcacsimp.set_cache_property( + "_cached_refreshable_func", "force_refresh", True + ) + # Verify that the function returns the correct value (3 * 10 = 30). + self.assertEqual(res, 30) + # Verify that the function's call count has incremented, indicating it + # was re-called. + self.assertEqual( + _cached_refreshable_func.call_count, + 2, + "Function should be re-called when force_refresh is enabled.", + ) + + +# ############################################################################# +# Test_reset_cache_perf +# ############################################################################# + + +class Test_reset_cache_perf(_BaseCacheTest): + """ + Test reset_cache_perf functionality for resetting performance statistics. + """ + + def test1(self) -> None: + """ + Verify that reset_cache_perf resets stats for a single function. + """ + # Prepare inputs. + hcacsimp.enable_cache_perf("_cached_json_double") + _cached_json_double(5) + _cached_json_double(5) + # Run test. + hcacsimp.reset_cache_perf("_cached_json_double") + # Check outputs. + perf = hcacsimp.get_cache_perf("_cached_json_double") + self.assertEqual(perf["tot"], 0) + self.assertEqual(perf["hits"], 0) + self.assertEqual(perf["misses"], 0) + + def test2(self) -> None: + """ + Verify that reset_cache_perf with empty func_name resets all + functions. + """ + # Prepare inputs. + hcacsimp.enable_cache_perf("_cached_json_double") + hcacsimp.enable_cache_perf("_cached_multi_arg_sum") + _cached_json_double(1) + _cached_multi_arg_sum(1, 2) + # Run test. + hcacsimp.reset_cache_perf("") + # Check outputs. + perf1 = hcacsimp.get_cache_perf("_cached_json_double") + perf2 = hcacsimp.get_cache_perf("_cached_multi_arg_sum") + self.assertEqual(perf1["tot"], 0) + self.assertEqual(perf2["tot"], 0) + + +# ############################################################################# +# Test_disable_cache_perf +# ############################################################################# + + +class Test_disable_cache_perf(_BaseCacheTest): + """ + Test disable_cache_perf functionality for disabling performance tracking. + """ + + def test1(self) -> None: + """ + Verify that disable_cache_perf with empty func_name disables all + functions. + """ + # Prepare inputs. + hcacsimp.enable_cache_perf("_cached_json_double") + hcacsimp.enable_cache_perf("_cached_multi_arg_sum") + _cached_json_double(1) + _cached_multi_arg_sum(1, 2) + # Run test. + hcacsimp.disable_cache_perf("") + # Check outputs. + perf1 = hcacsimp.get_cache_perf("_cached_json_double") + perf2 = hcacsimp.get_cache_perf("_cached_multi_arg_sum") + # After disabling, perf should be None. + self.assertIsNone(perf1) + self.assertIsNone(perf2) + + +# ############################################################################# +# Test_get_cache_perf_stats +# ############################################################################# + + +class Test_get_cache_perf_stats(_BaseCacheTest): + """ + Test get_cache_perf_stats for retrieving performance statistics. + """ + + def test1(self) -> None: + """ + Verify that get_cache_perf_stats returns empty string when no stats + exist. + """ + # Prepare inputs. + # Ensure no perf stats exist for a non-tracked function. + hcacsimp.disable_cache_perf("_cached_json_double") + # Run test. + stats = hcacsimp.get_cache_perf_stats("_cached_json_double") + # Check outputs. + self.assertEqual(stats, "") + + +# ############################################################################# +# Test_cache_property_to_str +# ############################################################################# + + +class Test_cache_property_to_str(_BaseCacheTest): + """ + Test cache_property_to_str for converting properties to string. + """ + + def test1(self) -> None: + """ + Verify that cache_property_to_str with empty func_name returns all + functions. + """ + # Prepare inputs. + # Call functions to ensure they are cached. + _cached_json_double(1) + _cached_multi_arg_sum(1, 2) + hcacsimp.set_cache_property("_cached_json_double", "force_refresh", True) + hcacsimp.set_cache_property("_cached_multi_arg_sum", "enable_perf", True) + # Run test. + result = hcacsimp.cache_property_to_str("") + # Check outputs. + self.assertIn("_cached_json_double", result) + self.assertIn("_cached_multi_arg_sum", result) + self.assertIn("force_refresh: True", result) + self.assertIn("enable_perf: True", result) + + +# ############################################################################# +# Test_reset_mem_cache_all +# ############################################################################# + + +class Test_reset_mem_cache_all(_BaseCacheTest): + """ + Test reset_mem_cache with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that reset_mem_cache with empty func_name resets all caches. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + # Run test. + hcacsimp.reset_mem_cache("") + # Check outputs. + cache1 = hcacsimp.get_mem_cache("_cached_json_double") + cache2 = hcacsimp.get_mem_cache("_cached_multi_arg_sum") + self.assertEqual(len(cache1), 0) + self.assertEqual(len(cache2), 0) + + +# ############################################################################# +# Test_reset_disk_cache_all +# ############################################################################# + + +class Test_reset_disk_cache_all(_BaseCacheTest): + """ + Test reset_disk_cache with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that reset_disk_cache with empty func_name removes all cache + files. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + hcacsimp.flush_cache_to_disk("_cached_json_double") + hcacsimp.flush_cache_to_disk("_cached_multi_arg_sum") + # Run test. + hcacsimp.reset_disk_cache("", interactive=False) + # Check outputs. + cache_file1 = hcacsimp._get_cache_file_name("_cached_json_double") + self.assertFalse(os.path.exists(cache_file1)) + cache_file2 = hcacsimp._get_cache_file_name("_cached_multi_arg_sum") + self.assertFalse(os.path.exists(cache_file2)) + + +# ############################################################################# +# Test_force_cache_from_disk_all +# ############################################################################# + + +class Test_force_cache_from_disk_all(_BaseCacheTest): + """ + Test force_cache_from_disk with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that force_cache_from_disk with empty func_name loads all + caches. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + hcacsimp.flush_cache_to_disk("_cached_json_double") + hcacsimp.flush_cache_to_disk("_cached_multi_arg_sum") + hcacsimp.reset_mem_cache("") + # Run test. + hcacsimp.force_cache_from_disk("") + # Check outputs. + cache1 = hcacsimp.get_mem_cache("_cached_json_double") + cache2 = hcacsimp.get_mem_cache("_cached_multi_arg_sum") + self.assertGreater(len(cache1), 0) + self.assertGreater(len(cache2), 0) + + +# ############################################################################# +# Test_flush_cache_to_disk_all +# ############################################################################# + + +class Test_flush_cache_to_disk_all(_BaseCacheTest): + """ + Test flush_cache_to_disk with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that flush_cache_to_disk with empty func_name flushes all + caches. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + # Run test. + hcacsimp.flush_cache_to_disk("") + # Check outputs. + cache_file1 = hcacsimp._get_cache_file_name("_cached_json_double") + self.assertTrue(os.path.exists(cache_file1)) + # + cache_file2 = hcacsimp._get_cache_file_name("_cached_multi_arg_sum") + self.assertTrue(os.path.exists(cache_file2)) + + +# ############################################################################# +# Test_cache_stats_to_str_all +# ############################################################################# + + +class Test_cache_stats_to_str_all(_BaseCacheTest): + """ + Test cache_stats_to_str with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that cache_stats_to_str with empty func_name returns stats for + all functions. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + # Run test. + result = hcacsimp.cache_stats_to_str("") + # Check outputs. + self.assertIsNotNone(result) + self.assertIn("_cached_json_double", result.index) + self.assertIn("_cached_multi_arg_sum", result.index) + + +# ############################################################################# +# Test_get_cache_func_names_invalid +# ############################################################################# + + +class Test_get_cache_func_names_invalid(_BaseCacheTest): + """ + Test get_cache_func_names with invalid type parameter. + """ + + def test1(self) -> None: + """ + Verify that get_cache_func_names raises ValueError for invalid type. + """ + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hcacsimp.get_cache_func_names("invalid_type") + self.assertIn("Invalid type", str(cm.exception)) + + +# ############################################################################# +# Test__get_cache_file_name +# ############################################################################# + + +class Test__get_cache_file_name(_BaseCacheTest): + """ + Test _get_cache_file_name for invalid cache type. + """ + + def test1(self) -> None: + """ + Verify that _get_cache_file_name raises ValueError for invalid cache + type. + """ + # Prepare inputs. + hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hcacsimp._get_cache_file_name("_cached_json_double") + self.assertIn("Invalid cache type", str(cm.exception)) + # Reset type to valid value for teardown. + hcacsimp.set_cache_property("_cached_json_double", "type", "json") + + +# ############################################################################# +# Test__save_cache_dict_to_disk +# ############################################################################# + + +class Test__save_cache_dict_to_disk(_BaseCacheTest): + """ + Test _save_cache_dict_to_disk for invalid cache type. + """ + + def test1(self) -> None: + """ + Verify that _save_cache_dict_to_disk raises ValueError for invalid + cache type. + """ + # Prepare inputs. + hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") + data = {"key": "value"} + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hcacsimp._save_cache_dict_to_disk("_cached_json_double", data) + self.assertIn("Invalid cache type", str(cm.exception)) + # Reset type to valid value for teardown. + hcacsimp.set_cache_property("_cached_json_double", "type", "json") + + +# ############################################################################# +# Test_get_disk_cache_invalid +# ############################################################################# + + +class Test_get_disk_cache_invalid(_BaseCacheTest): + """ + Test get_disk_cache for invalid cache type. + """ + + def test1(self) -> None: + """ + Verify that get_disk_cache raises ValueError for invalid cache type. + """ + # Prepare inputs. + hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hcacsimp.get_disk_cache("_cached_json_double") + self.assertIn("Invalid cache type", str(cm.exception)) + # Reset type to valid value for teardown. + hcacsimp.set_cache_property("_cached_json_double", "type", "json") + + +@hcacsimp.simple_cache(cache_type="json") +def _cache_mode_function(x: int) -> int: + """ + Test function to verify cache_mode parameter. + + :param x: input integer + :return: x * 5 + """ + _cache_mode_function.call_count += 1 + res = x * 5 + return res + + +_cache_mode_function.call_count = 0 + + +# ############################################################################# +# Test_cache_mode +# ############################################################################# + + +class Test_cache_mode(_BaseCacheTest): + """ + Test cache_mode parameter functionality. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_cache_mode_function", "type", "json") + _cache_mode_function.call_count = 0 + + def tear_down_test(self) -> None: + """ + Teardown operations to run after each test. + """ + super().tear_down_test() + hcacsimp.reset_cache("_cache_mode_function", interactive=False) + + def test1(self) -> None: + """ + Verify that setting force_refresh property forces cache refresh. + """ + # Prepare inputs. + _cache_mode_function(10) + initial_count = _cache_mode_function.call_count + # Set force_refresh property. + hcacsimp.set_cache_property( + "_cache_mode_function", "force_refresh", True + ) + # Run test. + result = _cache_mode_function(10) + # Check outputs. + self.assertEqual(result, 50) + self.assertEqual(_cache_mode_function.call_count, initial_count + 1) + + def test2(self) -> None: + """ + Verify that setting abort_on_cache_miss property aborts on cache miss. + """ + # Prepare inputs. + hcacsimp.set_cache_property( + "_cache_mode_function", "abort_on_cache_miss", True + ) + # Run test and check output. + with self.assertRaises(ValueError) as cm: + _cache_mode_function(99) + self.assertIn("Cache miss", str(cm.exception)) + + def test3(self) -> None: + """ + Verify that calling with different arguments bypasses cache. + """ + # Prepare inputs. + _cache_mode_function(15) + initial_count = _cache_mode_function.call_count + # Run test. + result1 = _cache_mode_function(16) + result2 = _cache_mode_function(17) + # Check outputs. + self.assertEqual(result1, 80) + self.assertEqual(result2, 85) + self.assertEqual(_cache_mode_function.call_count, initial_count + 2) + + +@hcacsimp.simple_cache(cache_type="json") +def _abort_test_function(x: int) -> int: + """ + Test function to verify abort_on_cache_miss parameter. + + :param x: input integer + :return: x * 7 + """ + res = x * 7 + return res + + +# ############################################################################# +# Test_abort_on_cache_miss +# ############################################################################# + + +class Test_abort_on_cache_miss(_BaseCacheTest): + """ + Test abort_on_cache_miss functionality. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_abort_test_function", "type", "json") + + def tear_down_test(self) -> None: + """ + Teardown operations to run after each test. + """ + super().tear_down_test() + hcacsimp.reset_cache("_abort_test_function", interactive=False) + + def test1(self) -> None: + """ + Verify that abort_on_cache_miss=True raises error on cache miss. + """ + # Run test and check output. + with self.assertRaises(ValueError) as cm: + _abort_test_function(100, abort_on_cache_miss=True) + self.assertIn("Cache miss", str(cm.exception)) + + +@hcacsimp.simple_cache(cache_type="json") +def _report_test_function(x: int) -> int: + """ + Test function to verify report_on_cache_miss parameter. + + :param x: input integer + :return: x * 8 + """ + res = x * 8 + return res + + +# ############################################################################# +# Test_report_on_cache_miss +# ############################################################################# + + +class Test_report_on_cache_miss(_BaseCacheTest): + """ + Test report_on_cache_miss functionality. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_report_test_function", "type", "json") + + def tear_down_test(self) -> None: + """ + Teardown operations to run after each test. + """ + super().tear_down_test() + hcacsimp.reset_cache("_report_test_function", interactive=False) + + def test1(self) -> None: + """ + Verify that report_on_cache_miss=True returns '_cache_miss_' on miss. + """ + # Run test. + result = _report_test_function(200, report_on_cache_miss=True) + # Check outputs. + self.assertEqual(result, "_cache_miss_") + + +@hcacsimp.simple_cache(cache_type="json", write_through=True) +def _write_through_function(x: int) -> int: + """ + Test function to verify write_through parameter. + + :param x: input integer + :return: x * 9 + """ + res = x * 9 + return res + + +# ############################################################################# +# Test_write_through +# ############################################################################# + + +class Test_write_through(_BaseCacheTest): + """ + Test write_through functionality for automatic disk caching. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_write_through_function", "type", "json") + + def tear_down_test(self) -> None: + """ + Teardown operations to run after each test. + """ + super().tear_down_test() + hcacsimp.reset_cache("_write_through_function", interactive=False) + + def test1(self) -> None: + """ + Verify that write_through=True automatically writes to disk. + """ + # Run test. + _write_through_function(11) + # Check outputs. + cache_file = hcacsimp._get_cache_file_name("_write_through_function") + self.assertTrue(os.path.exists(cache_file)) + # + disk_cache = hcacsimp._load_func_cache_data_from_file(cache_file, "json") + self.assertIn('{"args": [11], "kwargs": {}}', disk_cache) + self.assertEqual(disk_cache['{"args": [11], "kwargs": {}}'], 99) + + +@hcacsimp.simple_cache(cache_type="json") +def _test_cache_mode_kwarg(x: int, **kwargs) -> int: + """ + Test function that accepts kwargs to test cache_mode parameter. + + :param x: input integer + :param kwargs: additional keyword arguments + :return: x * 3 + """ + _test_cache_mode_kwarg.call_count += 1 + res = x * 3 + return res + + +_test_cache_mode_kwarg.call_count = 0 + + +# ############################################################################# +# Test_cache_mode_parameter +# ############################################################################# + + +class Test_cache_mode_parameter(_BaseCacheTest): + """ + Test cache_mode parameter as a keyword argument. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_test_cache_mode_kwarg", "type", "json") + _test_cache_mode_kwarg.call_count = 0 + + def tear_down_test(self) -> None: + """ + Teardown operations to run after each test. + """ + super().tear_down_test() + hcacsimp.reset_cache("_test_cache_mode_kwarg", interactive=False) + + def test1(self) -> None: + """ + Verify that cache_mode='REFRESH_CACHE' keyword forces refresh. + """ + # Prepare inputs. + _test_cache_mode_kwarg(20) + initial_count = _test_cache_mode_kwarg.call_count + # Run test. + result = _test_cache_mode_kwarg(20, cache_mode="REFRESH_CACHE") + # Check outputs. + self.assertEqual(result, 60) + self.assertEqual(_test_cache_mode_kwarg.call_count, initial_count + 1) + + def test2(self) -> None: + """ + Verify that cache_mode='HIT_CACHE_OR_ABORT' raises error on miss. + """ + # Run test and check output. + with self.assertRaises(ValueError) as cm: + _test_cache_mode_kwarg(88, cache_mode="HIT_CACHE_OR_ABORT") + self.assertIn("Cache miss", str(cm.exception)) + + def test3(self) -> None: + """ + Verify that cache_mode='DISABLE_CACHE' bypasses cache. + """ + # Prepare inputs. + _test_cache_mode_kwarg(30) + initial_count = _test_cache_mode_kwarg.call_count + # Run test. + result1 = _test_cache_mode_kwarg(30, cache_mode="DISABLE_CACHE") + result2 = _test_cache_mode_kwarg(30, cache_mode="DISABLE_CACHE") + # Check outputs. + self.assertEqual(result1, 90) + self.assertEqual(result2, 90) + self.assertEqual(_test_cache_mode_kwarg.call_count, initial_count + 2) + + +# ############################################################################# +# Module-level helpers for new tests. +# ############################################################################# + + +@hcacsimp.simple_cache(cache_type="json") +def _test_intrinsic_func_intrinsic(x: int) -> int: + """ + Return x times 3. Named with `_intrinsic` suffix to test suffix stripping. + + :param x: input integer + :return: x * 3 + """ + res = x * 3 + return res + + +@hcacsimp.simple_cache(cache_type="json", exclude_keys=["session_id"]) +def _test_exclude_keys_func(x: int, *, session_id: str = "") -> int: + """ + Return x times 2, ignoring session_id for caching purposes. + + :param x: input integer + :param session_id: session identifier (excluded from cache key) + :return: x * 2 + """ + res = x * 2 + return res + + +@hcacsimp.simple_cache(cache_type="json", write_through=False) +def _test_no_write_through(x: int) -> int: + """ + Return x plus 1, with write_through disabled. + + :param x: input integer + :return: x + 1 + """ + res = x + 1 + return res + + +# ############################################################################# +# Test_sanity_check_function_cache +# ############################################################################# + + +class Test_sanity_check_function_cache(_BaseCacheTest): + """ + Test sanity_check_function_cache for validating function cache dicts. + """ + + def test1(self) -> None: + """ + Verify that sanity_check_function_cache passes for valid cache data. + """ + # Prepare inputs. + func_cache_data = {'{"args": [1], "kwargs": {}}': 2} + # Run test. + hcacsimp.sanity_check_function_cache(func_cache_data) + # Check outputs (no exception raised). + + def test2(self) -> None: + """ + Verify that sanity_check_function_cache passes for empty dict when + assert_on_empty=False. + """ + # Prepare inputs. + func_cache_data: dict = {} + # Run test. + hcacsimp.sanity_check_function_cache( + func_cache_data, assert_on_empty=False + ) + # Check outputs (no exception raised). + + +# ############################################################################# +# Test_sanity_check_cache +# ############################################################################# + + +class Test_sanity_check_cache(_BaseCacheTest): + """ + Test sanity_check_cache for validating nested cache dicts. + """ + + def test1(self) -> None: + """ + Verify that sanity_check_cache passes for valid nested cache data. + """ + # Prepare inputs. + cache_data = {"my_func": {'{"args": [1], "kwargs": {}}': 42}} + # Run test. + hcacsimp.sanity_check_cache(cache_data) + # Check outputs (no exception raised). + + def test2(self) -> None: + """ + Verify that sanity_check_cache passes for empty dict when + assert_on_empty=False. + """ + # Prepare inputs. + cache_data: dict = {} + # Run test. + hcacsimp.sanity_check_cache(cache_data, assert_on_empty=False) + # Check outputs (no exception raised). + + +# ############################################################################# +# Test_cache_data_to_str +# ############################################################################# + + +class Test_cache_data_to_str(_BaseCacheTest): + """ + Test cache_data_to_str for converting cache data to a string. + """ + + def test1(self) -> None: + """ + Verify that cache_data_to_str returns a string with the function name + and cache key. + """ + # Prepare inputs. + cache_data = {"my_func": {'{"args": [1], "kwargs": {}}': 42}} + # Run test. + result = hcacsimp.cache_data_to_str(cache_data) + # Check outputs. + self.assertIn("my_func", result) + self.assertIn('{"args": [1], "kwargs": {}}', result) + self.assertIn("42", result) + + +# ############################################################################# +# Test_get_cache_property_system +# ############################################################################# + + +class Test_get_cache_property_system(_BaseCacheTest): + """ + Test get_cache_property for system properties on unknown functions. + """ + + def test1(self) -> None: + """ + Verify that get_cache_property returns None for a system property when + the function is not in the cache property dict. + """ + # Run test. + val = hcacsimp.get_cache_property("_nonexistent_func_xyz", "type") + # Check outputs. + self.assertIsNone(val) + + +# ############################################################################# +# Test_set_cache_property_new_func +# ############################################################################# + + +class Test_set_cache_property_new_func(_BaseCacheTest): + """ + Test set_cache_property for a brand new function not yet in cache property. + """ + + def test1(self) -> None: + """ + Verify that set_cache_property creates a new entry for a function that + was not previously registered. + """ + # Run test. + hcacsimp.set_cache_property("_brand_new_func_xyz", "force_refresh", True) + # Check outputs. + val = hcacsimp.get_cache_property("_brand_new_func_xyz", "force_refresh") + self.assertTrue(val) + + +# ############################################################################# +# Test_cache_property_to_str_no_props +# ############################################################################# + + +class Test_cache_property_to_str_no_props(_BaseCacheTest): + """ + Test cache_property_to_str for a function with no properties in the cache. + """ + + def test1(self) -> None: + """ + Verify that cache_property_to_str returns the function name header even + when the function has no registered cache properties. + """ + # Run test with a function name not in _CACHE_PROPERTY. + result = hcacsimp.cache_property_to_str("_nonexistent_func_xyz") + # Check outputs. + self.assertIn("_nonexistent_func_xyz", result) + + +# ############################################################################# +# Test__get_cache_file_name_auto_detect +# ############################################################################# + + +class Test__get_cache_file_name_auto_detect(_BaseCacheTest): + """ + Test _get_cache_file_name when cache type is None (auto-detect from disk). + """ + + def test1(self) -> None: + """ + Verify that _get_cache_file_name infers .pkl extension when a .pkl + file exists on disk. + """ + # Prepare inputs: create a valid .pkl file in the cache dir. + cache_dir = hcacsimp.get_cache_dir() + func_name = "_auto_detect_pkl_func" + pkl_path = os.path.join(cache_dir, f"tmp.cache_simple.{func_name}.pkl") + hcacsimp._save_func_cache_data_to_file(pkl_path, "pickle", {}) + # Run test. + file_name = hcacsimp._get_cache_file_name(func_name) + # Check outputs. + self.assertTrue(file_name.endswith(".pkl")) + + def test2(self) -> None: + """ + Verify that _get_cache_file_name infers .json extension when a .json + file exists on disk. + """ + # Prepare inputs: create a valid .json file in the cache dir. + cache_dir = hcacsimp.get_cache_dir() + func_name = "_auto_detect_json_func" + json_path = os.path.join(cache_dir, f"tmp.cache_simple.{func_name}.json") + hcacsimp._save_func_cache_data_to_file(json_path, "json", {}) + # Run test. + file_name = hcacsimp._get_cache_file_name(func_name) + # Check outputs. + self.assertTrue(file_name.endswith(".json")) + + def test3(self) -> None: + """ + Verify that _get_cache_file_name defaults to .json when no file exists. + """ + # Prepare inputs: use a brand new function name with no disk file. + func_name = "_no_file_func_xyz" + # Run test. + file_name = hcacsimp._get_cache_file_name(func_name) + # Check outputs. + self.assertTrue(file_name.endswith(".json")) + + +# ############################################################################# +# Test__save_func_cache_data_to_file_infer +# ############################################################################# + + +class Test__save_func_cache_data_to_file_infer(_BaseCacheTest): + """ + Test _save_func_cache_data_to_file when cache_type is None (inferred from + file extension). + """ + + def test1(self) -> None: + """ + Verify that _save_func_cache_data_to_file infers pickle format from + .pkl extension when cache_type is None. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + file_name = os.path.join(scratch_dir, "tmp_test_infer.pkl") + data = {'{"args": [1], "kwargs": {}}': 42} + # Run test. + hcacsimp._save_func_cache_data_to_file(file_name, None, data) + # Check outputs. + self.assertTrue(os.path.exists(file_name)) + loaded = hcacsimp._load_func_cache_data_from_file(file_name, "pickle") + self.assertEqual(loaded, data) + + +# ############################################################################# +# Test__load_func_cache_data_from_file_infer +# ############################################################################# + + +class Test__load_func_cache_data_from_file_infer(_BaseCacheTest): + """ + Test _load_func_cache_data_from_file when cache_type is None (inferred + from file extension). + """ + + def test1(self) -> None: + """ + Verify that _load_func_cache_data_from_file infers pickle format from + .pkl extension when cache_type is None. + """ + # Prepare inputs: save a pickle file. + scratch_dir = self.get_scratch_space() + file_name = os.path.join(scratch_dir, "tmp_test_load_infer.pkl") + data = {'{"args": [5], "kwargs": {}}': 25} + hcacsimp._save_func_cache_data_to_file(file_name, "pickle", data) + # Run test with None cache_type (should infer from .pkl). + result = hcacsimp._load_func_cache_data_from_file(file_name, None) + # Check outputs. + self.assertEqual(result, data) + + +# ############################################################################# +# Test_reset_disk_cache_no_file +# ############################################################################# + + +class Test_reset_disk_cache_no_file(_BaseCacheTest): + """ + Test reset_disk_cache when the target function has no disk cache file. + """ + + def test1(self) -> None: + """ + Verify that reset_disk_cache does not raise when the function has no + cache file on disk. + """ + # Prepare inputs: use a function that has never been cached to disk. + func_name = "_cached_json_double" + # Ensure no disk file exists. + hcacsimp.reset_disk_cache(func_name, interactive=False) + cache_file = hcacsimp._get_cache_file_name(func_name) + self.assertFalse(os.path.exists(cache_file)) + # Run test: reset again when no file exists (should not raise). + hcacsimp.reset_disk_cache(func_name, interactive=False) + # Check outputs (no exception raised). + + +# ############################################################################# +# Test_mock_cache +# ############################################################################# + + +class Test_mock_cache(_BaseCacheTest): + """ + Test mock_cache for inserting values directly into the cache. + """ + + def test1(self) -> None: + """ + Verify that mock_cache inserts a value into the function cache that can + be retrieved as a cache hit. + """ + # Prepare inputs. + func_name = "_cached_json_double" + cache_key = '{"args": [99], "kwargs": {}}' + value = 198 + # Run test. + hcacsimp.mock_cache(func_name, cache_key, value) + # Check outputs. + cache = hcacsimp.get_cache(func_name) + self.assertEqual(cache[cache_key], value) + + def test2(self) -> None: + """ + Verify that a mocked cache value causes a cache hit when the decorated + function is called. + """ + # Prepare inputs. + func_name = "_cached_json_double" + cache_key = '{"args": [77], "kwargs": {}}' + value = 154 + # Run test. + hcacsimp.mock_cache(func_name, cache_key, value) + result = _cached_json_double(77, abort_on_cache_miss=True) + # Check outputs. + self.assertEqual(result, value) + + +# ############################################################################# +# Test_mock_cache_from_args_kwargs +# ############################################################################# + + +class Test_mock_cache_from_args_kwargs(_BaseCacheTest): + """ + Test mock_cache_from_args_kwargs for inserting values via args/kwargs. + """ + + def test1(self) -> None: + """ + Verify that mock_cache_from_args_kwargs inserts the correct value into + the cache for the given args and kwargs. + """ + # Prepare inputs. + func_name = "_cached_json_double" + args = (55,) + kwargs: dict = {} + value = 110 + # Run test. + hcacsimp.mock_cache_from_args_kwargs(func_name, args, kwargs, value) + # Check outputs. + expected_key = '{"args": [55], "kwargs": {}}' + cache = hcacsimp.get_cache(func_name) + self.assertEqual(cache[expected_key], value) + + +# ############################################################################# +# Test_mock_cache_from_disk +# ############################################################################# + + +class Test_mock_cache_from_disk(_BaseCacheTest): + """ + Test mock_cache_from_disk for bulk-inserting cache data from a dict. + """ + + def test1(self) -> None: + """ + Verify that mock_cache_from_disk populates the cache from a dict of + pre-computed values. + """ + # Prepare inputs. + func_name = "_cached_json_double" + func_cache_data = { + '{"args": [33], "kwargs": {}}': 66, + '{"args": [44], "kwargs": {}}': 88, + } + # Run test. + hcacsimp.mock_cache_from_disk(func_name, func_cache_data) + # Check outputs. + cache = hcacsimp.get_cache(func_name) + self.assertEqual(cache['{"args": [33], "kwargs": {}}'], 66) + self.assertEqual(cache['{"args": [44], "kwargs": {}}'], 88) + + +# ############################################################################# +# Test_simple_cache_intrinsic +# ############################################################################# + + +class Test_simple_cache_intrinsic(_BaseCacheTest): + """ + Test simple_cache decorator with a function whose name ends in _intrinsic. + """ + + def tear_down_test(self) -> None: + """ + Teardown including reset of the intrinsic function cache. + """ + super().tear_down_test() + hcacsimp.reset_cache("_test_intrinsic_func", interactive=False) + + def test1(self) -> None: + """ + Verify that the _intrinsic suffix is stripped and the cache key uses + the base function name. + """ + # Run test. + result = _test_intrinsic_func_intrinsic(5) + # Check outputs. + self.assertEqual(result, 15) + # Cache should be stored under the base name (without _intrinsic). + cache = hcacsimp.get_cache("_test_intrinsic_func") + self.assertIn('{"args": [5], "kwargs": {}}', cache) + + +# ############################################################################# +# Test_simple_cache_existing_type +# ############################################################################# + + +class Test_simple_cache_existing_type(_BaseCacheTest): + """ + Test that simple_cache preserves a pre-existing cache type setting. + """ + + def test1(self) -> None: + """ + Verify that applying simple_cache with cache_type='json' does not + override an existing 'pickle' type already set for the function. + """ + # Prepare inputs: set the type before decoration. + hcacsimp.set_cache_property("_inline_type_func", "type", "pickle") + + def _inline_type_func(x: int) -> int: + return x + + # Apply decorator with a different cache_type. + hcacsimp.simple_cache(cache_type="json")(_inline_type_func) + # Check outputs: type should remain 'pickle'. + val = hcacsimp.get_cache_property("_inline_type_func", "type") + self.assertEqual(val, "pickle") + + +# ############################################################################# +# Test_simple_cache_exclude_keys +# ############################################################################# + + +class Test_simple_cache_exclude_keys(_BaseCacheTest): + """ + Test simple_cache decorator with exclude_keys parameter. + """ + + def tear_down_test(self) -> None: + """ + Teardown including reset of the exclude_keys test function cache. + """ + super().tear_down_test() + hcacsimp.reset_cache("_test_exclude_keys_func", interactive=False) + + def test1(self) -> None: + """ + Verify that calls with the same primary arg but different excluded + kwargs produce a single cache entry (the excluded key is ignored). + """ + # Run test: two calls with same x but different session_id. + result1 = _test_exclude_keys_func(5, session_id="abc") + result2 = _test_exclude_keys_func(5, session_id="xyz") + # Check outputs. + self.assertEqual(result1, 10) + self.assertEqual(result2, 10) + # Only one cache entry should exist. + cache = hcacsimp.get_cache("_test_exclude_keys_func") + self.assertEqual(len(cache), 1) + + +# ############################################################################# +# Test_simple_cache_no_write_through +# ############################################################################# + + +class Test_simple_cache_no_write_through(_BaseCacheTest): + """ + Test simple_cache decorator with write_through=False. + """ + + def tear_down_test(self) -> None: + """ + Teardown including reset of the no-write-through test function cache. + """ + super().tear_down_test() + hcacsimp.reset_cache("_test_no_write_through", interactive=False) + + def test1(self) -> None: + """ + Verify that with write_through=False the computed value is not + automatically persisted to disk after a function call. + """ + # Run test. + result = _test_no_write_through(7) + self.assertEqual(result, 8) + # Reset memory cache so that reading goes to disk. + hcacsimp.reset_mem_cache("_test_no_write_through") + # Check outputs: disk cache should not contain the computed value. + disk_cache = hcacsimp.get_disk_cache("_test_no_write_through") + self.assertNotIn('{"args": [7], "kwargs": {}}', disk_cache) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py new file mode 100644 index 000000000..4ab1219a4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py @@ -0,0 +1,335 @@ +import logging +import os +from typing import Any, List + +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.hcfile as hcfile + +_LOG = logging.getLogger(__name__) + + +def _create_test_file(self_: Any, filename: str, content: str) -> str: + """ + Create a test file with given content in the scratch directory. + + :param scratch_dir: Directory to create file in + :param filename: Name of file to create + :param content: Content to write to file + :return: Full path to created file + """ + scratch_dir = self_.get_scratch_space() + file_path = os.path.join(scratch_dir, filename) + content = hprint.dedent(content) + hio.to_file(file_path, content) + return file_path + + +def _create_cfile(self_: Any, cfile_content: List[str]) -> str: + """ + Create a cfile with TODOs in the scratch directory. + + :param scratch_dir: Directory to create file in + :param cfile_content: List of TODO lines to write + :return: Full path to created cfile + """ + content = "\n".join(cfile_content) + return _create_test_file(self_, "cfile.txt", content) + + +# ############################################################################# +# Test_parse_cfile1 +# ############################################################################# + + +class Test_parse_cfile1(hunitest.TestCase): + def helper(self, cfile_content: str, expected: str) -> None: + """ + Helper function to test parsing a cfile. + + :param cfile_content: Content to write to the test cfile + :param expected: Expected output from parse_cfile + """ + # Prepare inputs. + cfile_path = _create_test_file(self, "cfile.txt", cfile_content) + # Run function under test. + actual = hcfile.parse_cfile(cfile_path) + actual = "\n".join(map(str, actual)) + # Check output. + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test parsing a cfile with valid entries. + """ + cfile_content = r""" + file1.py:10: Add docstring + file2.py:20: Add type hints + file3.py:30: Fix formatting + """ + expected = r""" + ('file1.py', '10', ' Add docstring') + ('file2.py', '20', ' Add type hints') + ('file3.py', '30', ' Fix formatting') + """ + self.helper(cfile_content, expected) + + def test2(self) -> None: + """ + Test parsing a cfile with valid entries. + """ + cfile_content = r""" + dev_scripts_helpers/llms/llm_transform.py:63:33: F821 undefined name '_extract_bullet_points' [flake8] + dev_scripts_helpers/llms/llm_cli.py:23: [C0301(line-too-long), ] Line too long (109/100) [pylint] + helpers/hio.py: 'pandas' is imported multiple times [normalize_imports] + helpers/hmarkdown.py:770:38: W605 invalid escape sequence '\S' [flake8] + """ + expected = r""" + ('dev_scripts_helpers/llms/llm_transform.py', '63', "33: F821 undefined name '_extract_bullet_points' [flake8]") + ('dev_scripts_helpers/llms/llm_cli.py', '23', ' [C0301(line-too-long), ] Line too long (109/100) [pylint]') + ('helpers/hmarkdown.py', '770', "38: W605 invalid escape sequence '\\S' [flake8]") + """ + self.helper(cfile_content, expected) + + def test_empty_file(self) -> None: + """ + Test parsing an empty cfile. + """ + self.helper("", "") + + def test_invalid_entries(self) -> None: + """ + Test parsing a cfile with invalid entries that should be skipped. + """ + cfile_content = r""" + file1.py:10: Valid entry + Invalid line without proper format + file2.py:20: Another valid entry + :30: Missing filename + file3.py:: Missing line number + """ + expected = r""" + ('file1.py', '10', ' Valid entry') + ('file2.py', '20', ' Another valid entry') + (' ', '30', ' Missing filename') + """ + self.helper(cfile_content, expected) + + +# ############################################################################# +# Test_inject_todos_from_cfile1 +# ############################################################################# + + +class Test_inject_todos_from_cfile1(hunitest.TestCase): + def _inject_todos(self, cfile_content: str) -> None: + """ + Helper to inject TODOs with standard parameters. + """ + todo_user = "user" + comment_prefix = "#" + hcfile.inject_todos_from_cfile(cfile_content, todo_user, comment_prefix) + + def test1(self) -> None: + """ + Test injecting TODOs from a cfile into a Python file. + """ + # Create a test file. + test_file_content = """ + def hello(msg): + print(msg) + + def world(): + print("world") + """ + file_path = _create_test_file(self, "test.py", test_file_content) + # Create cfile with TODOs. + cfile_content = [ + f"{file_path}:1: Add type hints.", + f"{file_path}:4: Add docstring.", + ] + _create_cfile(self, cfile_content) + # Run the function under test. + self._inject_todos("\n".join(cfile_content)) + # Check output. + actual = hio.from_file(file_path) + expected = """ + # TODO(user): Add type hints. + def hello(msg): + print(msg) + + # TODO(user): Add docstring. + def world(): + print("world") + """ + self.assert_equal(actual, expected, dedent=True) + + def test_one_line_file(self) -> None: + """ + Test injecting TODOs into an empty file. + """ + # Create an empty test file + test_file_content = """ + print("hello") + """ + file_path = _create_test_file(self, "empty.py", test_file_content) + # Create cfile with TODOs + cfile_content = [f"{file_path}:1: Add content to empty file."] + _create_cfile(self, cfile_content) + # Run the function under test + self._inject_todos("\n".join(cfile_content)) + # Check output + actual = hio.from_file(file_path) + expected = """ + # TODO(user): Add content to empty file. + print("hello") + """ + self.assert_equal(actual, expected, dedent=True) + + def test_invalid_line_numbers(self) -> None: + """ + Test handling of TODOs with invalid line numbers. + """ + # Create a test file + test_file_content = """ + line1 + line2 + """ + file_path = _create_test_file(self, "test.py", test_file_content) + # Create cfile with invalid line numbers + cfile_content = [ + f"{file_path}:999: This line number doesn't exist.", + ] + _create_cfile(self, cfile_content) + # This should raise an assertion error due to invalid line numbers + with self.assertRaises(AssertionError) as err: + self._inject_todos("\n".join(cfile_content)) + # Check output. + expected = """ + ################################################################################ + * Failed assertion * + 998 < 2 + ################################################################################ + """ + self.assert_equal( + str(err.exception), expected, dedent=True, fuzzy_match=True + ) + + def test2(self) -> None: + """ + Test injecting TODOs from a cfile into a Python file with a complex + class. + """ + # Create a test file. + test_file_content = """ + import logging + from typing import List, Optional + + class DataProcessor: + def __init__(self): + self.logger = logging.getLogger(__name__) + self.data = [] + + def process_batch(self, items): + for item in items: + self.data.append(self._transform(item)) + + def _transform(self, item): + return item.upper() + + def get_results(self): + return self.data + + def clear(self): + self.data = [] + """ + file_path = _create_test_file(self, "test.py", test_file_content) + # Create cfile with TODOs. + cfile_content = [ + f"{file_path}:4: Add class docstring explaining purpose and usage", + f"{file_path}:5: Add type hints for instance variables", + f"{file_path}:9: Add type hints for items parameter", + f"{file_path}:10: Consider adding batch size validation", + f"{file_path}:13: Add error handling for non-string inputs", + f"{file_path}:16: Add return type hint and docstring", + f"{file_path}:19: Add docstring explaining clear behavior", + ] + _create_cfile(self, cfile_content) + # Run function under test. + self._inject_todos("\n".join(cfile_content)) + # Check output. + actual = hio.from_file(file_path) + expected = """ + import logging + from typing import List, Optional + + # TODO(user): Add class docstring explaining purpose and usage + class DataProcessor: + # TODO(user): Add type hints for instance variables + def __init__(self): + self.logger = logging.getLogger(__name__) + self.data = [] + + # TODO(user): Add type hints for items parameter + def process_batch(self, items): + # TODO(user): Consider adding batch size validation + for item in items: + self.data.append(self._transform(item)) + + # TODO(user): Add error handling for non-string inputs + def _transform(self, item): + return item.upper() + + # TODO(user): Add return type hint and docstring + def get_results(self): + return self.data + + # TODO(user): Add docstring explaining clear behavior + def clear(self): + self.data = [] + """ + self.assert_equal(actual, expected, dedent=True) + + def test3(self) -> None: + """ + Test injecting TODOs from a cfile into multiple Python files. + """ + # Create first test file. + test_file1_content = """ + def foo(): + pass + """ + file_path1 = _create_test_file(self, "test1.py", test_file1_content) + # Create second test file. + test_file2_content = """ + def bar(): + return None + """ + file_path2 = _create_test_file(self, "test2.py", test_file2_content) + # Create cfile. + cfile_content = [ + f"{file_path1}:1: Add docstring for foo.", + f"{file_path2}:1: Add docstring for bar.", + f"{file_path2}:2: Add type hint for return.", + ] + _create_cfile(self, cfile_content) + # Run function under test. + self._inject_todos("\n".join(cfile_content)) + # Check output. + actual1 = hio.from_file(file_path1) + expected1 = """ + # TODO(user): Add docstring for foo. + def foo(): + pass + """ + self.assert_equal(actual1, expected1, dedent=True) + # + actual2 = hio.from_file(file_path2) + expected2 = """ + # TODO(user): Add docstring for bar. + def bar(): + # TODO(user): Add type hint for return. + return None + """ + self.assert_equal(actual2, expected2, dedent=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py new file mode 100644 index 000000000..d8f2c19e2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py @@ -0,0 +1,81 @@ +import logging +import os + +import pandas as pd + +import helpers.hcsv as hcsv +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_convert_csv_to_dict +# ############################################################################# + + +class Test_convert_csv_to_dict(hunitest.TestCase): + def test1(self) -> None: + dir_name = self.get_input_dir() + test_csv_path = os.path.join(dir_name, "test.csv") + actual_result = hcsv.convert_csv_to_dict(test_csv_path, remove_nans=True) + expected_result = { + "col1": ["a", "b", "c", "d"], + "col2": ["a", "b"], + "col3": ["a", "b", "c"], + } + self.assertEqual(actual_result, expected_result) + + +# ############################################################################# +# Test_from_typed_csv +# ############################################################################# + + +class Test_from_typed_csv(hunitest.TestCase): + """ + Check the opportunity to load correctly. + + .csv file with dtype param, which exist in .types prefix file. And + finally it checks that dtypes of loaded dataframe didn't change + compared with the original one. + """ + + def test1(self) -> None: + dir_name = self.get_input_dir() + test_csv_path = os.path.join(dir_name, "test.csv") + os.path.join(dir_name, "test.csv.types") + actual_result = ( + hcsv.from_typed_csv(test_csv_path) + .dtypes.apply(lambda x: x.name) + .to_dict() + ) + expected_result = { + "A": "int64", + "B": "float64", + "C": "object", + "D": "object", + "E": "int64", + } + self.assertEqual(actual_result, expected_result) + + +# ############################################################################# +# Test_to_typed_csv +# ############################################################################# + + +class Test_to_typed_csv(hunitest.TestCase): + """ + Check whether the function 'to_typed_csv' create file with '.types' prefix + or not. + """ + + def test1(self) -> None: + dir_name = self.get_input_dir() + test_csv_path = os.path.join(dir_name, "test.csv") + test_csv_types_path = os.path.join(dir_name, "test.csv.types") + df = pd.read_csv(test_csv_path) + hcsv.to_typed_csv(df, test_csv_path) + self.assertTrue(os.path.exists(test_csv_types_path)) + os.remove(test_csv_types_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py new file mode 100644 index 000000000..aaa5c0c9e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py @@ -0,0 +1,299 @@ +""" +Import as: + +import helpers.test.test_dataframe as httdat +""" + +import collections +import logging +import os + +import numpy as np +import pandas as pd + +import helpers.hdataframe as hdatafr +import helpers.hpandas as hpandas +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_filter_data_by_values1 +# ############################################################################# + + +class Test_filter_data_by_values1(hunitest.TestCase): + def test_conjunction1(self) -> None: + data = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + data = data.add_prefix("col_") + filters = {"col_0": (1, 12), "col_1": (2, 11), "col_2": (3, 6)} + info: collections.OrderedDict = collections.OrderedDict() + filtered_data = hdatafr.filter_data_by_values(data, filters, "and", info) + # TODO(gp): Factor out the common code. + str_output = ( + f"{hprint.frame('data')}\n" + f"{hpandas.df_to_str(data)}\n" + f"{hprint.frame('filters')}\n{filters}\n" + f"{hprint.frame('filtered_data')}\n" + f"{hpandas.df_to_str(filtered_data)}\n" + f"{hunitest.convert_info_to_string(info)}" + ) + self.check_string(str_output) + + def test_disjunction1(self) -> None: + data = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + data = data.add_prefix("col_") + filters = {"col_0": (1, 12), "col_1": (2, 11), "col_2": (3, 6)} + info: collections.OrderedDict = collections.OrderedDict() + filtered_data = hdatafr.filter_data_by_values(data, filters, "or", info) + str_output = ( + f"{hprint.frame('data')}\n" + f"{hpandas.df_to_str(data)}\n" + f"{hprint.frame('filters')}\n{filters}\n" + f"{hprint.frame('filtered_data')}" + f"\n{hpandas.df_to_str(filtered_data)}\n" + f"{hunitest.convert_info_to_string(info)}" + ) + self.check_string(str_output) + + +# ############################################################################# +# Test_filter_data_by_comparison +# ############################################################################# + + +class Test_filter_data_by_comparison(hunitest.TestCase): + def test_conjunction1(self) -> None: + data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + data = data.add_prefix("col_") + filters = {"col_0": (("gt", 1), ("lt", 7)), "col_1": ("eq", 5)} + info: collections.OrderedDict = collections.OrderedDict() + filtered_data = hdatafr.filter_data_by_comparison( + data, filters, "and", info + ) + str_output = ( + f"{hprint.frame('data')}\n" + f"{hpandas.df_to_str(data)}\n" + f"{hprint.frame('filters')}\n{filters}\n" + f"{hprint.frame('filtered_data')}\n" + f"{hpandas.df_to_str(filtered_data)}\n" + f"{hunitest.convert_info_to_string(info)}" + ) + self.check_string(str_output) + + def test_disjunction1(self) -> None: + data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + data = data.add_prefix("col_") + filters = {"col_0": ("gt", 2), "col_1": ("eq", 5)} + info: collections.OrderedDict = collections.OrderedDict() + filtered_data = hdatafr.filter_data_by_comparison( + data, filters, "or", info + ) + str_output = ( + f"{hprint.frame('data')}\n" + f"{hpandas.df_to_str(data)}\n" + f"{hprint.frame('filters')}\n{filters}\n" + f"{hprint.frame('filtered_data')}" + f"\n{hpandas.df_to_str(filtered_data)}\n" + f"{hunitest.convert_info_to_string(info)}" + ) + self.check_string(str_output) + + +# ############################################################################# +# TestFilterDataByMethod +# ############################################################################# + + +class TestFilterDataByMethod(hunitest.TestCase): + """ + Test was generated automatically with Playback. + """ + + def test1(self) -> None: + # Define input variables. + input_path = os.path.join(self.get_input_dir(), "test.txt") + data = pd.read_csv(input_path, index_col=0) + filters = { + "Frequency": {"isin": {"values": ["Monthly", "Weekly", "Daily"]}}, + "source_code": {"isin": {"values": ["WIND"]}}, + "is_downloaded": {"isin": {"values": ["success"]}}, + } + mode = "and" + info: collections.OrderedDict = collections.OrderedDict() + # Call function to test. + actual = hdatafr.filter_data_by_method( + df=data, filters=filters, mode=mode, info=info + ) + actual = hpandas.df_to_str(actual, precision=3) + # Check output. + self.check_string(actual, fuzzy_match=True) + + +# ############################################################################# +# Test_apply_nan_mode +# ############################################################################# + + +class Test_apply_nan_mode(hunitest.TestCase): + @staticmethod + def _get_series_with_nans(seed: int) -> pd.Series: + date_range = {"start": "1/1/2010", "periods": 40, "freq": "M"} + series = hpandas.get_random_df( + num_cols=1, + seed=seed, + date_range_kwargs=date_range, + )[0] + series[:3] = np.nan + series[-3:] = np.nan + series[5:7] = np.nan + return series + + def test1(self) -> None: + """ + Test for `mode=leave_unchanged`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series) + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + def test2(self) -> None: + """ + Test for `mode="drop"`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series, mode="drop") + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + def test3(self) -> None: + """ + Test for `mode="ffill"`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series, mode="ffill") + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + def test4(self) -> None: + """ + Test for `mode="ffill_and_drop_leading"`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series, mode="ffill_and_drop_leading") + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + def test5(self) -> None: + """ + Test for `mode="fill_with_zero"`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series, mode="fill_with_zero") + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + # Smoke test for empty input. + def test6(self) -> None: + series = pd.Series(dtype="float64") + hdatafr.apply_nan_mode(series) + + +# ############################################################################# +# Test_compute_points_per_year_for_given_freq +# ############################################################################# + + +class Test_compute_points_per_year_for_given_freq(hunitest.TestCase): + def test1(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("T") + np.testing.assert_equal(actual, 525780.125) + + def test2(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("B") + np.testing.assert_equal(actual, 260.875) + + def test3(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("D") + np.testing.assert_equal(actual, 365.25) + + def test4(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("W") + np.testing.assert_equal(actual, 52.25) + + def test5(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("M") + np.testing.assert_equal(actual, 12.0) + + def test6(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("Y") + np.testing.assert_equal(actual, 1.0) + + def test7(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("0D") + np.testing.assert_equal(actual, 0.0) + + +# ############################################################################# +# TestRemoveDuplicates +# ############################################################################# + + +class TestRemoveDuplicates(hunitest.TestCase): + def test_remove_duplicates1(self) -> None: + test_data = { + "dummy_value_1": [1, 2, 1], + "dummy_value_2": ["A", "A", "A"], + "knowledge_timestamp": [3, 2, 1], + "end_download_timestamp": [3, 2, 1], + } + df = pd.DataFrame(data=test_data) + duplicate_columns = ["dummy_value_1", "dummy_value_2"] + control_column = None + actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) + actual = hpandas.df_to_str(actual) + expected = r""" + dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp + 0 1 A 3 3 + 1 2 A 2 2""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_remove_duplicates2(self) -> None: + test_data = { + "dummy_value_1": [1, 2, 1], + "dummy_value_2": ["A", "A", "A"], + "knowledge_timestamp": [3, 2, 1], + "end_download_timestamp": [3, 2, 1], + } + df = pd.DataFrame(data=test_data) + duplicate_columns = None + control_column = "knowledge_timestamp" + actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) + actual = hpandas.df_to_str(actual) + expected = r""" + dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp + 0 1 A 3 3 + 1 2 A 2 2 + 2 1 A 1 1""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_remove_duplicates3(self) -> None: + test_data = { + "dummy_value_1": [1, 2, 1], + "dummy_value_2": ["A", "A", "A"], + "knowledge_timestamp": [3, 2, 1], + "end_download_timestamp": [3, 2, 1], + } + df = pd.DataFrame(data=test_data) + duplicate_columns = ["dummy_value_1", "dummy_value_2"] + control_column = "knowledge_timestamp" + actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) + actual = hpandas.df_to_str(actual) + expected = r""" + dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp + 1 2 A 2 2 + 2 1 A 1 1""" + self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py new file mode 100644 index 000000000..fac073570 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py @@ -0,0 +1,932 @@ +import datetime +import logging + +import pandas as pd +import pytz + +import helpers.hdatetime as hdateti +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + +_STR_TS_NAIVE = "2021-01-04 09:30:00" +_STR_TS_UTC = "2021-01-04 09:30:00-00:00" +_STR_TS_ET = "2021-01-04 09:30:00-05:00" + +_PD_TS_NAIVE = pd.Timestamp("2021-01-04 09:30:00") +_PD_TS_UTC = pd.Timestamp("2021-01-04 09:30:00-00:00", tz="UTC") +_PD_TS_ET = pd.Timestamp("2021-01-04 09:30:00-05:00", tz="America/New_York") + +_DT_DT_NAIVE = datetime.datetime(2021, 1, 4, 9, 30, 0) +_DT_DT_UTC = pytz.timezone("UTC").localize(_DT_DT_NAIVE) +_DT_DT_ET = pytz.timezone("America/New_York").localize(_DT_DT_NAIVE) + + +# ############################################################################# +# Test_dassert_is_datetime1 +# ############################################################################# + + +class Test_dassert_is_datetime1(hunitest.TestCase): + def test_is_datetime1(self) -> None: + """ + Test valid datetime objects. + """ + objs = [ + _STR_TS_NAIVE, + _STR_TS_UTC, + _STR_TS_ET, + _PD_TS_NAIVE, + _PD_TS_UTC, + _PD_TS_ET, + _DT_DT_NAIVE, + _DT_DT_UTC, + _DT_DT_ET, + ] + for obj in objs: + _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) + hdateti.dassert_is_datetime(obj) + + def test_is_datetime_fail1(self) -> None: + """ + Test invalid datetime objects. + """ + objs = [0, 0.0] + for obj in objs: + _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) + with self.assertRaises(AssertionError): + hdateti.dassert_is_datetime(obj) + + def test_is_strict_datetime1(self) -> None: + """ + Test valid datetime objects. + """ + objs = [ + _PD_TS_NAIVE, + _PD_TS_UTC, + _PD_TS_ET, + _DT_DT_NAIVE, + _DT_DT_UTC, + _DT_DT_ET, + ] + for obj in objs: + _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) + hdateti.dassert_is_strict_datetime(obj) + + def test_is_strict_datetime_fail1(self) -> None: + """ + Test invalid datetime objects. + """ + objs = [0, _STR_TS_NAIVE, _STR_TS_UTC, _STR_TS_ET, "hello"] + for obj in objs: + _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) + with self.assertRaises(AssertionError): + hdateti.dassert_is_strict_datetime(obj) + + +# ############################################################################# +# Test_dassert_tz1 +# ############################################################################# + + +class Test_dassert_tz1(hunitest.TestCase): + def test_datetime_conversions(self) -> None: + # Get a tz-naive datetime. + dt = datetime.datetime(2020, 1, 5, 9, 30, 0) + hdateti.dassert_is_tz_naive(dt) + # Localize it to UTC. + dt_utc = pytz.timezone("UTC").localize(dt) + hdateti.dassert_has_tz(dt_utc) + hdateti.dassert_has_UTC_tz(dt_utc) + # Convert to ET. + dt_et = dt_utc.astimezone(pytz.timezone("US/Eastern")) + hdateti.dassert_has_tz(dt_et) + hdateti.dassert_has_ET_tz(dt_et) + # Convert it back to UTC. + dt_utc2 = dt_et.astimezone(pytz.timezone("UTC")) + hdateti.dassert_has_tz(dt_utc2) + hdateti.dassert_has_UTC_tz(dt_utc2) + self.assertEqual(dt_utc, dt_utc2) + # Make it naive. + dt2 = dt_utc2.replace(tzinfo=None) + hdateti.dassert_is_tz_naive(dt2) + self.assertEqual(dt, dt2) + + def test_dassert_is_datetime1(self) -> None: + for obj in [ + _STR_TS_NAIVE, + _STR_TS_UTC, + _STR_TS_ET, + _PD_TS_NAIVE, + _PD_TS_UTC, + _PD_TS_ET, + _DT_DT_NAIVE, + _DT_DT_UTC, + _DT_DT_ET, + ]: + hdateti.dassert_is_datetime(obj) + + def test_dassert_is_datetime_assert1(self) -> None: + datetime_ = 5 + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_is_datetime(datetime_) + actual = str(cm.exception) + # pylint: disable=line-too-long + expected = r""" + * Failed assertion * + Instance of '5' is '' instead of '(, , )' + datetime_='5' of type '' is not a DateTimeType + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_to_datetime1(self) -> None: + """ + Apply `to_datetime` to a naive datetime. + """ + for obj in [ + _STR_TS_NAIVE, + _PD_TS_NAIVE, + _DT_DT_NAIVE, + ]: + _LOG.debug("obj='%s' type='%s'", obj, type(obj)) + actual = hdateti.to_datetime(obj) + expected = _DT_DT_NAIVE + self.assertEqual(actual, expected) + # Check the tz info. + hdateti.dassert_is_tz_naive(actual) + with self.assertRaises(AssertionError): + hdateti.dassert_has_tz(actual) + hdateti.dassert_has_UTC_tz(actual) + hdateti.dassert_has_ET_tz(actual) + + def test_to_datetime2(self) -> None: + """ + Apply `to_datetime` to a UTC datetime. + """ + for obj in [ + _STR_TS_UTC, + _PD_TS_UTC, + _DT_DT_UTC, + ]: + _LOG.debug("obj='%s' type='%s'", obj, type(obj)) + actual = hdateti.to_datetime(obj) + expected = _DT_DT_UTC + self.assertEqual(actual, expected) + # Check the tz info. + hdateti.dassert_has_tz(actual) + hdateti.dassert_has_UTC_tz(actual) + with self.assertRaises(AssertionError): + hdateti.dassert_is_tz_naive(actual) + hdateti.dassert_has_ET_tz(actual) + + def test_to_datetime3(self) -> None: + """ + Apply `to_datetime` to an ET datetime. + """ + for obj in [ + _STR_TS_ET, + _PD_TS_ET, + _DT_DT_ET, + ]: + _LOG.debug("obj='%s' type='%s'", obj, type(obj)) + actual = hdateti.to_datetime(obj) + expected = _DT_DT_ET + self.assertEqual(str(actual), str(expected)) + + +# ############################################################################# +# Test_dassert_tz_compatible1 +# ############################################################################# + + +class Test_dassert_tz_compatible1(hunitest.TestCase): + def test_dassert_compatible_timestamp1(self) -> None: + """ + Both datetimes are naive. + """ + for datetime1 in [_PD_TS_NAIVE, _DT_DT_NAIVE]: + for datetime2 in [_PD_TS_NAIVE, _DT_DT_NAIVE]: + hdateti.dassert_tz_compatible(datetime1, datetime2) + + def test_dassert_compatible_timestamp2(self) -> None: + """ + Both datetimes have tz info. + """ + for datetime1 in [_PD_TS_UTC, _PD_TS_ET]: + for datetime2 in [_DT_DT_UTC, _DT_DT_ET]: + hdateti.dassert_tz_compatible(datetime1, datetime2) + + def test_dassert_compatible_timestamp_assert1(self) -> None: + """ + Test a single not compatible pair of datetimes and check the raised + exception. + """ + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_tz_compatible(_PD_TS_NAIVE, _DT_DT_UTC) + actual = str(cm.exception) + # pylint: disable=line-too-long + expected = """ + * Failed assertion * + 'False' + == + 'True' + datetime1='2021-01-04 09:30:00' and datetime2='2021-01-04 09:30:00+00:00' are not compatible + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_dassert_compatible_timestamp_assert2(self) -> None: + """ + Test a pairs of non-compatible datetimes making sure the assertion is + raised. + """ + for datetime1 in [ + _PD_TS_NAIVE, + _DT_DT_NAIVE, + _PD_TS_NAIVE, + _DT_DT_NAIVE, + ]: + for datetime2 in [_PD_TS_UTC, _PD_TS_ET, _DT_DT_UTC, _DT_DT_ET]: + with self.assertRaises(AssertionError): + hdateti.dassert_tz_compatible(datetime1, datetime2) + + +# ############################################################################# +# Test_dassert_have_same_tz1 +# ############################################################################# + + +class Test_dassert_have_same_tz1(hunitest.TestCase): + """ + Test an assertion that checks that timezones are equal for input + timestamps. + """ + + def test1(self) -> None: + """ + Timezones are equal. + """ + hdateti.dassert_have_same_tz(_DT_DT_ET, _PD_TS_ET) + + def test2(self) -> None: + """ + Both timestamps are tz-naive. + """ + hdateti.dassert_have_same_tz(_PD_TS_NAIVE, _DT_DT_NAIVE) + + def test3(self) -> None: + """ + Different timezones. + """ + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_have_same_tz(_DT_DT_ET, _DT_DT_UTC) + actual = str(cm.exception) + # pylint: disable=line-too-long + expected = """ + * Failed assertion * + 'America/New_York' + == + 'UTC' + datetime1=2021-01-04 09:30:00-05:00 (datetime1.tzinfo=America/New_York) datetime2=2021-01-04 09:30:00+00:00 (datetime2.tzinfo=UTC) + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Same timezone but different DST mode (i.e. EST vs EDT). + """ + ts_est = pd.Timestamp("2023-03-12 01:55:00-05:00", tz="America/New_York") + ts_edt = pd.Timestamp("2023-03-12 03:00:00-04:00", tz="America/New_York") + hdateti.dassert_have_same_tz(ts_est, ts_edt) + + +# ############################################################################# +# Test_get_current_time1 +# ############################################################################# + + +class Test_get_current_time1(hunitest.TestCase): + def test_get_current_time_UTC(self) -> None: + tz = "UTC" + dt = hdateti.get_current_time(tz) + _LOG.debug("tz=%s -> dt=%s", tz, dt) + hdateti.dassert_has_UTC_tz(dt) + + def test_get_current_time_ET(self) -> None: + tz = "ET" + dt = hdateti.get_current_time(tz) + _LOG.debug("tz=%s -> dt=%s", tz, dt) + hdateti.dassert_has_ET_tz(dt) + + def test_get_current_time_naive_UTC(self) -> None: + tz = "naive_UTC" + dt = hdateti.get_current_time(tz) + _LOG.debug("tz=%s -> dt=%s", tz, dt) + hdateti.dassert_is_tz_naive(dt) + + def test_get_current_time_naive_ET(self) -> None: + tz = "naive_ET" + dt = hdateti.get_current_time(tz) + _LOG.debug("tz=%s -> dt=%s", tz, dt) + hdateti.dassert_is_tz_naive(dt) + + +# ############################################################################# +# Test_to_generalized_datetime +# ############################################################################# + + +class Test_to_generalized_datetime(hunitest.TestCase): + def test_srs1(self) -> None: + srs = pd.Series(["2010-01-01", "2010-01-02"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_index1(self) -> None: + idx = pd.Index(["2010-01-01", "2010-01-02"]) + actual = hdateti.to_generalized_datetime(idx) + expected = pd.Index( + [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] + ) + pd.testing.assert_index_equal(actual, expected) + + def test_daily1(self) -> None: + srs = pd.Series(["1 Jan 2010", "2 Jan 2010"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_weekly1(self) -> None: + srs = pd.Series(["2021-W14", "2021-W15"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-04-10"), pd.Timestamp("2021-04-17")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_semiannual1(self) -> None: + srs = pd.Series(["2021-S1", "2021-S2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-06-30"), pd.Timestamp("2021-12-31")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_semiannual2(self) -> None: + srs = pd.Series(["2021/S1", "2021/S2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-06-30"), pd.Timestamp("2021-12-31")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_bimonthly1(self) -> None: + srs = pd.Series(["2021-B1", "2021-B2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-01-01"), pd.Timestamp("2021-03-01")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly1(self) -> None: + srs = pd.Series(["2020-M1", "2020-M2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly2(self) -> None: + srs = pd.Series(["2020M01", "2020M02"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly3(self) -> None: + srs = pd.Series(["2020-01", "2020-02"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly4(self) -> None: + srs = pd.Series(["2020 Jan", "2020 Feb"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly5(self) -> None: + srs = pd.Series(["January 2020", "February 2020"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_quarterly1(self) -> None: + srs = pd.Series(["2020-Q1", "2020-Q2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_quarterly2(self) -> None: + srs = pd.Series(["2020Q1", "2020Q2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_quarterly3(self) -> None: + srs = pd.Series(["Q1 2020", "Q2 2020"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_annual1(self) -> None: + srs = pd.Series(["2021", "2022"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-12-31"), pd.Timestamp("2022-12-31")] + ) + pd.testing.assert_series_equal(actual, expected) + + +# ############################################################################# +# Test_find_bar_timestamp1 +# ############################################################################# + + +class Test_find_bar_timestamp1(hunitest.TestCase): + """ + Use mode="round". + """ + + def helper1(self, current_timestamp: pd.Timestamp) -> None: + bar_duration_in_secs = 15 * 60 + max_distance_in_secs = 10 + actual = hdateti.find_bar_timestamp( + current_timestamp, + bar_duration_in_secs, + max_distance_in_secs=max_distance_in_secs, + ) + expected = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test1(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") + self.helper1(current_timestamp) + + def test2(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T08:00:05", tz="UTC") + self.helper1(current_timestamp) + + def test3(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T07:59:55", tz="UTC") + self.helper1(current_timestamp) + + def test4(self) -> None: + current_timestamp = pd.Timestamp( + "2021-09-09 08:01:59.500000+0000", tz="UTC" + ) + bar_duration_in_secs = 1 + # + actual = hdateti.find_bar_timestamp( + current_timestamp, bar_duration_in_secs, mode="round" + ) + expected = pd.Timestamp("2021-09-09T08:02:00+0000", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + # /////////////////////////////////////////////////////////////////////////// + + def test5(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T07:59:20", tz="UTC") + with self.assertRaises(AssertionError) as cm: + self.helper1(current_timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 40 <= 10 + current_timestamp=2021-09-09 07:59:20+00:00 is too distant from bar_timestamp=2021-09-09 08:00:00+00:00 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test6(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T08:10:20", tz="UTC") + with self.assertRaises(AssertionError) as cm: + self.helper1(current_timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 280 <= 10 + current_timestamp=2021-09-09 08:10:20+00:00 is too distant from bar_timestamp=2021-09-09 08:15:00+00:00 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_find_bar_timestamp2 +# ############################################################################# + + +class Test_find_bar_timestamp2(hunitest.TestCase): + """ + Use mode="floor". + """ + + def test1(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T07:59:55", tz="UTC") + bar_duration_in_secs = 15 * 60 + # + actual = hdateti.find_bar_timestamp( + current_timestamp, bar_duration_in_secs, mode="floor" + ) + expected = pd.Timestamp("2021-09-09T07:45:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test2(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T08:01:55", tz="UTC") + bar_duration_in_secs = 15 * 60 + # + actual = hdateti.find_bar_timestamp( + current_timestamp, bar_duration_in_secs, mode="floor" + ) + expected = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test3(self) -> None: + current_timestamp = pd.Timestamp( + "2021-09-09 08:01:59.500000+0000", tz="UTC" + ) + bar_duration_in_secs = 1 + # + actual = hdateti.find_bar_timestamp( + current_timestamp, bar_duration_in_secs, mode="floor" + ) + expected = pd.Timestamp("2021-09-09T08:01:59+0000", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# +# Test_convert_seconds_to_minutes +# ############################################################################# + + +class Test_convert_seconds_to_minutes(hunitest.TestCase): + def test1(self) -> None: + """ + Check that conversion is implemented correcty. + """ + num_secs = 300 + actual = hdateti.convert_seconds_to_minutes(num_secs) + expected = int(num_secs / 60) + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + Check that an error is raised when input is not an integer number of + minutes. + """ + num_secs = 10 + with self.assertRaises(AssertionError) as cm: + hdateti.convert_seconds_to_minutes(num_secs) + actual = str(cm.exception) + expected = """ + * Failed assertion * + '10' + == + '0' + num_secs=10 is not an integer number of minutes + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_convert_unix_epoch_to_timestamp +# ############################################################################# + + +class Test_convert_unix_epoch_to_timestamp(hunitest.TestCase): + def test1(self) -> None: + """ + Test with default parameter values. + """ + epoch = 1631145600000 + actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch) + expected = pd.Timestamp("2021-09-09T00:00:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test2(self) -> None: + """ + Test with specified unit. + """ + epoch = 1631145600 + unit = "s" + actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch, unit=unit) + expected = pd.Timestamp("2021-09-09T00:00:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test3(self) -> None: + """ + Test with specified timezone. + """ + epoch = 1631145600000 + tz = "US/Pacific" + actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch, tz=tz) + expected = pd.Timestamp("2021-09-08T17:00:00", tz="US/Pacific") + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# +# Test_convert_timestamp_to_unix_epoch +# ############################################################################# + + +class Test_convert_timestamp_to_unix_epoch(hunitest.TestCase): + def test1(self) -> None: + """ + Test with default parameter values. + """ + timestamp = pd.Timestamp("2021-09-09") + actual = hdateti.convert_timestamp_to_unix_epoch(timestamp=timestamp) + expected = 1631145600000 + self.assert_equal(str(actual), str(expected)) + + def test2(self) -> None: + """ + Test with specified unit. + """ + timestamp = pd.Timestamp("2021-09-09") + unit = "s" + actual = hdateti.convert_timestamp_to_unix_epoch( + timestamp=timestamp, unit=unit + ) + expected = 1631145600 + self.assert_equal(str(actual), str(expected)) + + def test3(self) -> None: + """ + Test for a timestamp with specified timezone. + """ + timestamp = pd.Timestamp("2021-09-08T17:00:00", tz="US/Pacific") + actual = hdateti.convert_timestamp_to_unix_epoch(timestamp=timestamp) + expected = 1631145600000 + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# +# Test_str_to_timestamp1 +# ############################################################################# + + +class Test_str_to_timestamp1(hunitest.TestCase): + """ + Test if string representation of datetime is converted correctly. + """ + + def test1(self) -> None: + """ + - `datetime_str` has a valid format + - `datetime_format` has a valid pattern for `datetime_str` + """ + datetime_str = "20230728_150513" + timezone_info = "US/Eastern" + datetime_format = "%Y%m%d_%H%M%S" + actual = hdateti.str_to_timestamp( + datetime_str, timezone_info, datetime_format=datetime_format + ) + expected = pd.Timestamp("2023-07-28 15:05:13-0400", tz="US/Eastern") + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + - `datetime_str` has a valid format + - `datetime_format` has an valid pattern for `datetime_str` + - `timezone_info` is UTC + """ + datetime_str = "20230728_150513" + timezone_info = "UTC" + format = "%Y%m%d_%H%M%S" + actual = hdateti.str_to_timestamp( + datetime_str, timezone_info, datetime_format=format + ) + expected = pd.Timestamp("2023-07-28 15:05:13+0000", tz="UTC") + self.assertEqual(actual, expected) + + def test3(self) -> None: + """ + - `datetime_str` has a valid format + - `datetime_format` has an invalid pattern for `datetime_str` + """ + datetime_str = "28-07-2023 15:05:13" + timezone_info = "US/Eastern" + datetime_format = "%Y%m%d_%H%M%S" + # The datetime format does not match the string representation of datetime. + with self.assertRaises(ValueError) as err: + hdateti.str_to_timestamp( + datetime_str, timezone_info, datetime_format=datetime_format + ) + actual = str(err.exception) + self.check_string(actual) + + def test4(self) -> None: + """ + - `datetime_str` has an invalid format + - `datetime_format` is not defined + """ + datetime_str = "qwe28abc07-201234" + timezone_info = "US/Eastern" + # Invalid datetime, should raise a ValueError. + with self.assertRaises(ValueError) as err: + hdateti.str_to_timestamp(datetime_str, timezone_info) + actual = str(err.exception) + self.check_string(actual) + + +# ############################################################################# +# Test_dassert_str_is_date +# ############################################################################# + + +class Test_dassert_str_is_date(hunitest.TestCase): + """ + Test that the function checks a string representation of date correctly. + """ + + def test1(self) -> None: + """ + - date has a valid format + """ + date_str = "20221101" + hdateti.dassert_str_is_date(date_str) + + def test2(self) -> None: + """ + - date has an invalid format + """ + date = "2022-11-01" + with self.assertRaises(ValueError) as err: + hdateti.dassert_str_is_date(date) + actual = str(err.exception) + self.check_string(actual) + + +# ############################################################################# +# Test_dassert_is_valid_timestamp +# ############################################################################# + + +class Test_dassert_is_valid_timestamp(hunitest.TestCase): + def test1(self) -> None: + """ + Test should not raise an exception when timestamp has a timezone. + """ + timestamp = pd.Timestamp( + "2021-01-04 09:30:00-05:00", tz="America/New_York" + ) + hdateti.dassert_is_valid_timestamp(timestamp) + + def test2(self) -> None: + """ + Test should raise an exception when timestamp is without timezone info. + """ + # Set inputs. + timestamp = pd.Timestamp("2021-01-04 09:30:00") + # Run. + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_is_valid_timestamp(timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 'None' is not 'None' + datetime_='2021-01-04 09:30:00' doesn't have timezone info + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Test should not raise an exception when timestamp is none. + """ + timestamp = None + hdateti.dassert_is_valid_timestamp(timestamp) + + def test4(self) -> None: + """ + Test should raise an exception when timestamp is of type string. + """ + # Set input. + timestamp = "2021-01-04 09:30:00" + # Run. + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_is_valid_timestamp(timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + Instance of '2021-01-04 09:30:00' is '' instead of '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_timestamp_lt +# ############################################################################# + + +class Test_dassert_timestamp_lt(hunitest.TestCase): + def test1(self) -> None: + """ + Test with valid timestamps where start is less than end. + """ + start_timestamp = pd.Timestamp("2021-01-02 09:30:00-00:00", tz="UTC") + end_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + + def test2(self) -> None: + """ + Test with equal timestamps, this is should raise an exception. + """ + # Set inputs. + start_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") + end_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") + # Run. + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 2021-02-02 09:30:00+00:00 < 2021-02-02 09:30:00+00:00 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Test with start timestamp greater than end timestamp, this is should + raise an exception. + """ + # Set inputs. + start_timestamp = pd.Timestamp( + "2021-02-04 09:30:00-05:00", tz="America/New_York" + ) + end_timestamp = pd.Timestamp( + "2021-01-04 09:30:00-05:00", tz="America/New_York" + ) + # Run. + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 2021-02-04 09:30:00-05:00 < 2021-01-04 09:30:00-05:00 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Test with start timestamp as None. + """ + start_timestamp = None + end_timestamp = pd.Timestamp( + "2021-01-04 09:30:00-05:00", tz="America/New_York" + ) + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + + def test5(self) -> None: + """ + Test with end timestamp as None. + """ + start_timestamp = pd.Timestamp( + "2021-01-04 09:30:00-05:00", tz="America/New_York" + ) + end_timestamp = None + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + + def test6(self) -> None: + """ + Test with both timestamps as None. + """ + start_timestamp = None + end_timestamp = None + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py new file mode 100644 index 000000000..9dd38d00e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py @@ -0,0 +1,934 @@ +import collections +import logging +from typing import List, Tuple + +import helpers.hdbg as hdbg +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Make sure the coverage is 100%. + +# ############################################################################# + + +# ############################################################################# +# Test_dassert1 +# ############################################################################# + + +# TODO(gp): Use a self.assert_equal() instead of a check_string() since this +# code needs to be stable. +class Test_dassert1(hunitest.TestCase): + """ + Test `dassert()`. + """ + + def test1(self) -> None: + """ + An assertion that is verified. + """ + hdbg.dassert(True) + + def test2(self) -> None: + """ + An assertion that is not verified. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False) + self.check_string(str(cm.exception)) + + def test3(self) -> None: + """ + An assertion with a message. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False, msg="hello") + self.check_string(str(cm.exception)) + + def test4(self) -> None: + """ + An assertion with a message to format. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False, "hello %s", "world") + self.check_string(str(cm.exception)) + + def test5(self) -> None: + """ + Too many parameters. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False, "hello %s", "world", "too_many") + self.check_string(str(cm.exception)) + + def test6(self) -> None: + """ + Not enough parameters. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False, "hello %s") + self.check_string(str(cm.exception)) + + def test7(self) -> None: + """ + Common error of calling `dassert()` instead of `dassert_eq()`. + + According to the user's intention the assertion should trigger, + but, because of using `dassert()` instead of `dassert_eq()`, the + assertion will not trigger. We notice that the user passed a + list instead of a string as `msg` and raise. + """ + with self.assertRaises(AssertionError) as cm: + y = ["world"] + hdbg.dassert(y, ["hello"]) + self.check_string(str(cm.exception)) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_eq1 +# ############################################################################# + + +class Test_dassert_eq1(hunitest.TestCase): + def test1(self) -> None: + hdbg.dassert_eq(1, 1) + + def test2(self) -> None: + hdbg.dassert_eq(1, 1, msg="hello world") + + def test3(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_eq(1, 2, msg="hello world") + self.check_string(str(cm.exception)) + + def test4(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_eq(1, 2, "hello %s", "world") + self.check_string(str(cm.exception)) + + def test5(self) -> None: + """ + Raise assertion with incorrect message. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_eq(1, 2, "hello %s") + self.check_string(str(cm.exception)) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_misc1 +# ############################################################################# + + +# TODO(gp): Break it in piece. +class Test_dassert_misc1(hunitest.TestCase): + # dassert_in + + def test_in1(self) -> None: + hdbg.dassert_in("a", "abc") + + def test_in2(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_in("a", "xyz".split()) + self.check_string(str(cm.exception)) + + # dassert_is + + def test_is1(self) -> None: + a = None + hdbg.dassert_is(a, None) + + def test_is2(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is("a", None) + self.check_string(str(cm.exception)) + + # dassert_isinstance + + def test_is_instance1(self) -> None: + hdbg.dassert_isinstance("a", str) + + def test_is_instance2(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_isinstance("a", int) + self.check_string(str(cm.exception)) + + def test_is_instance3(self) -> None: + hdbg.dassert_isinstance("a", (str, int)) + + def test_is_instance4(self) -> None: + hdbg.dassert_isinstance(5.0, (float, int)) + + def test_is_instance5(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_isinstance("a", (float, int)) + # TODO(gp): Replace all check_string with assert_equal + self.check_string(str(cm.exception)) + + # dassert_set_eq + + def test_set_eq1(self) -> None: + a = [1, 2, 3] + b = [2, 3, 1] + hdbg.dassert_set_eq(a, b) + + def test_set_eq2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 3] + b = [2, 2, 1] + hdbg.dassert_set_eq(a, b) + # Check. + actual = str(cm.exception) + expected = """ + * Failed assertion * + val1 - val2=[3] + val2 - val1=[] + val1=[1, 2, 3] + set eq + val2=[1, 2] + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + # dassert_is_subset + + def test_is_subset1(self) -> None: + a = [1, 2] + b = [2, 1, 3] + hdbg.dassert_is_subset(a, b) + + def test_is_subset2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 3] + b = [4, 2, 1] + hdbg.dassert_is_subset(a, b) + # Check. + actual = str(cm.exception) + expected = """ + * Failed assertion * + val1=[1, 2, 3] + issubset + val2=[1, 2, 4] + val1 - val2=[3] + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + # dassert_not_intersection + + def test_not_intersection1(self) -> None: + a = [1, 2, 3] + b = [4, 5] + hdbg.dassert_not_intersection(a, b) + + def test_not_intersection2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 3] + b = [4, 2, 1] + hdbg.dassert_not_intersection(a, b) + actual = str(cm.exception) + expected = """ + * Failed assertion * + val1=[1, 2, 3] + has no intersection + val2=[1, 2, 4] + val1.intersection(val2)=[1, 2] + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + # dassert_no_duplicates + + def test_no_duplicates1(self) -> None: + a = [1, 2, 3] + hdbg.dassert_no_duplicates(a) + + def test_no_duplicates2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 3, 3] + hdbg.dassert_no_duplicates(a) + self.check_string(str(cm.exception)) + + # dassert_is_sorted + + def test_is_sorted1(self) -> None: + a = [1, 2, 3] + hdbg.dassert_is_sorted(a) + + def test_is_sorted2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 4, 3] + hdbg.dassert_is_sorted(a) + self.check_string(str(cm.exception)) + + def test_is_sorted3(self) -> None: + """ + Test an array that is sorted descending. + """ + a = [3, 2, 2] + hdbg.dassert_is_sorted(a, sort_kwargs={"reverse": True}) + + def test_is_sorted4(self) -> None: + """ + Test an array that is not sorted descending. + """ + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 4, 3] + sort_kwargs = {"reverse": True} + hdbg.dassert_is_sorted(a, sort_kwargs=sort_kwargs) + self.check_string(str(cm.exception)) + + # dassert_eq_all + + def test_eq_all1(self) -> None: + a = [1, 2, 3] + b = [1, 2, 3] + hdbg.dassert_eq_all(a, b) + + def test_eq_all2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 3] + b = [1, 2, 4] + hdbg.dassert_eq_all(a, b) + self.check_string(str(cm.exception)) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_lgt1 +# ############################################################################# + + +class Test_dassert_lgt1(hunitest.TestCase): + def test1(self) -> None: + """ + No assertion raised since `0 <= 0 <= 3`. + """ + hdbg.dassert_lgt( + 0, 0, 3, lower_bound_closed=True, upper_bound_closed=True + ) + + def test2(self) -> None: + """ + Raise assertion since it is not true that `0 < 0 <= 3`. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_lgt( + 0, 0, 3, lower_bound_closed=False, upper_bound_closed=True + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 0 < 0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Raise assertion since it is not true that `0 < 100 <= 3`. + + The formatting of the assertion is correct. + """ + with self.assertRaises(AssertionError) as cm: + lower_bound_closed = False + upper_bound_closed = True + hdbg.dassert_lgt( + 0, + 100, + 3, + lower_bound_closed, + upper_bound_closed, + "hello %s", + "world", + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 100 <= 3 + hello world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_is_proportion1 +# ############################################################################# + + +class Test_dassert_is_proportion1(hunitest.TestCase): + def test1(self) -> None: + """ + Passing assertion with correct message and format. + """ + hdbg.dassert_is_proportion(0.1, "hello %s", "world") + + def test2(self) -> None: + """ + Passing assertion with correct message and format. + """ + hdbg.dassert_is_proportion(0.0, "hello %s", "world") + + def test3(self) -> None: + """ + Passing assertion with correct message and format. + """ + hdbg.dassert_is_proportion(1.0, "hello %s", "world") + + def test_assert1(self) -> None: + """ + Failing assertion with correct message and format. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_proportion(1.01, "hello %s", "world") + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 1.01 <= 1 + hello world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert2(self) -> None: + """ + Failing assertion with correct message. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_proportion(1.01, "hello world") + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 1.01 <= 1 + hello world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert3(self) -> None: + """ + Failing assertion with incorrect message formatting. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_proportion(1.01, "hello", "world") + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 1.01 <= 1 + Caught assertion while formatting message: + 'not all arguments converted during string formatting' + hello world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert4(self) -> None: + """ + Failing assertion with incorrect message formatting. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_proportion(1.01, "hello %s %s", "world") + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 1.01 <= 1 + Caught assertion while formatting message: + 'not enough arguments for format string' + hello %s %s world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_container_type1 +# ############################################################################# + + +class Test_dassert_container_type1(hunitest.TestCase): + def test1(self) -> None: + list_ = "a b c".split() + hdbg.dassert_container_type(list_, List, str) + + def test_assert1(self) -> None: + """ + Check that assertion fails since a list is not a tuple. + """ + list_ = "a b c".split() + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_container_type(list_, Tuple, str) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '['a', 'b', 'c']' is '' instead of 'typing.Tuple' + obj='['a', 'b', 'c']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert2(self) -> None: + """ + Check that assertion fails since a list contains strings and ints. + """ + list_ = ["a", 2, "c", "d"] + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_container_type(list_, list, str) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '2' is '' instead of '' + obj='['a', 2, 'c', 'd']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert3(self) -> None: + """ + Like `test_assert3()` but with a message. + """ + list_ = ["a", 2, "c", "d"] + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_container_type( + list_, list, str, "list_ is %s homogeneous", "not" + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '2' is '' instead of '' + list_ is not homogeneous + obj='['a', 2, 'c', 'd']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# _Animal +# ############################################################################# + + +class _Animal: + pass + + +# ############################################################################# +# _Man +# ############################################################################# + + +class _Man(_Animal): + pass + + +# ############################################################################# +# _Vegetable +# ############################################################################# + + +class _Vegetable: + pass + + +# ############################################################################# +# Test_dassert_issubclass1 +# ############################################################################# + + +class Test_dassert_issubclass1(hunitest.TestCase): + def test_man1(self) -> None: + """ + An instance of `_Man` descends from `_Animal`. + """ + man = _Man() + hdbg.dassert_issubclass(man, _Man) + + def test_man2(self) -> None: + """ + An instance of `_Man` descends from object. + """ + man = _Man() + hdbg.dassert_issubclass(man, object) + + def test_man_fail1(self) -> None: + """ + An instance of `_Man` doesn't descends from `_Vegetable`. + """ + man = _Man() + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_issubclass(man, _Vegetable) + # We need to purify from object references. + self.check_string(str(cm.exception), purify_text=True) + + def test_man_fail2(self) -> None: + """ + An instance of `_Man` doesn't descends from `int`. + """ + man = _Man() + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_issubclass(man, int) + self.check_string(str(cm.exception), purify_text=True) + + def test1(self) -> None: + """ + In Python everything is an object. + """ + hdbg.dassert_issubclass(5, object) + hdbg.dassert_issubclass(int, object) + hdbg.dassert_issubclass(int, (object, int)) + + def test_fail1(self) -> None: + """ + `issubclass` only accepts classes and not instances as second argument. + """ + with self.assertRaises(Exception) as cm: + hdbg.dassert_issubclass(int, 5.0) + self.check_string(str(cm.exception), purify_text=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_callable1 +# ############################################################################# + + +class Test_dassert_callable1(hunitest.TestCase): + def test1(self) -> None: + func = lambda x: x + hdbg.dassert_callable(func) + + def test_fail1(self) -> None: + func = 4 + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_callable(func) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Obj '4' of type '' is not callable + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_all_defined_or_all_None +# ############################################################################# + + +class Test_dassert_all_defined_or_all_None(hunitest.TestCase): + def test1(self) -> None: + """ + Verify that test passes when all the values are defined. + """ + vals = [1, 2, 3] + hdbg.dassert_all_defined_or_all_None(vals) + + def test2(self) -> None: + """ + Verify that assertion is raised when at least one of the values is not + defined. + """ + vals = [1, 2, None, None] + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_all_defined_or_all_None(vals) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Some values in list are defined and some are None: '[1, 2, None, None]' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Verify that test passes when all the values are not defined. + """ + vals = [None, None, None] + hdbg.dassert_all_defined_or_all_None(vals) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_related_params1 +# ############################################################################# + + +class Test_dassert_related_params1(hunitest.TestCase): + def test1(self) -> None: + obj = {"val1": 1, "val2": 1, "val3": "hello"} + mode = "all_or_none_non_null" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + + def test2(self) -> None: + obj = {"val1": 0, "val2": None, "val3": ""} + mode = "all_or_none_non_null" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + + def test3(self) -> None: + obj = {"val1": 1, "val2": 0, "val3": "hello"} + with self.assertRaises(Exception) as cm: + mode = "all_or_none_non_null" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + actual = str(cm.exception) + expected = """ + * Failed assertion * + All or none parameter should be non-null: + val2=0 + params={'val1': 1, 'val2': 0, 'val3': 'hello'} + message 'hello world' + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_related_params2 +# ############################################################################# + + +class Test_dassert_related_params2(hunitest.TestCase): + def test1(self) -> None: + obj = {"val1": 1, "val2": 1, "val3": "hello"} + mode = "all_or_none_non_None" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + + def test2(self) -> None: + obj = { + "val1": None, + "val2": None, + "val3": None, + } + mode = "all_or_none_non_None" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + + def test3(self) -> None: + obj = {"val1": None, "val2": None, "val3": "hello"} + with self.assertRaises(Exception) as cm: + mode = "all_or_none_non_None" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + actual = str(cm.exception) + expected = """ + * Failed assertion * + All or none parameter should be non-None: + val1=None + params={'val1': None, 'val2': None, 'val3': 'hello'} + message 'hello world' + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_all_attributes_are_same1 +# ############################################################################# + + +class Test_dassert_all_attributes_are_same1(hunitest.TestCase): + def test1(self) -> None: + """ + Wrong type of object. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_all_attributes_are_same(5, "a") + actual = str(cm.exception) + expected = """ + * Failed assertion * + Instance of '5' is '' instead of '' + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test2(self) -> None: + """ + Wrong type of attribute. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_all_attributes_are_same([1, 2, 3], 1) + actual = str(cm.exception) + expected = """ + * Failed assertion * + Instance of '1' is '' instead of '' + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test3(self) -> None: + """ + Attribute with different values. + """ + Obj = collections.namedtuple("Obj", ["a", "b"]) + list_ = [Obj(1, 2), Obj(1, 3)] + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_all_attributes_are_same(list_, "b") + actual = str(cm.exception) + expected = """ + * Failed assertion * + Elements in the list have different values for + attribute b: + {2, 3} + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test4(self) -> None: + """ + Attribute with same values. + """ + Obj = collections.namedtuple("Obj", ["a", "b"]) + list_ = [Obj(1, 2), Obj(1, 2)] + hdbg.dassert_all_attributes_are_same(list_, "b") + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_lt +# ############################################################################# + + +class Test_dassert_lt(hunitest.TestCase): + def test1(self) -> None: + """ + Test that the function doesn't raise an exception if first value is + less than second value. + """ + val1 = 1 + val2 = 2 + hdbg.dassert_lt(val1, val2) + + def test2(self) -> None: + """ + Test that the function raises an exception if first value is equal to + second value. + """ + # Set inputs. + val1 = 2 + val2 = 2 + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_lt(val1, val2) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 2 < 2 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Test that the function raises an exception if first value is greater + than second value. + """ + # Set inputs. + val1 = 3 + val2 = 2 + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_lt(val1, val2) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 3 < 2 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Test that the function doesn't raise an exception when we pass string + inputs. + """ + val1 = "a" + val2 = "b" + hdbg.dassert_lt(val1, val2) + + def test5(self) -> None: + """ + Test that the function raises an exception where first value is greater + than second value with floats. + """ + # Set inputs. + val1 = 2.0 + val2 = 1.0 + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_lt(val1, val2) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 2.0 < 1.0 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_is_integer +# ############################################################################# + + +class Test_dassert_is_integer(hunitest.TestCase): + def test1(self) -> None: + """ + Test that the function do not raise the exception with integer values. + """ + val = 5 + hdbg.dassert_is_integer(val) + + def test2(self) -> None: + """ + Test that the function do not raise the exception with float values + that represent an integer. + """ + val = 5.0 + hdbg.dassert_is_integer(val) + + def test3(self) -> None: + """ + Test that the function raises an exception for float values that do not + represent an integer. + """ + # Set inputs. + val = 5.5 + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_integer(val) + actual = str(cm.exception) + expected = """ + * Failed assertion * + Invalid val='5.5' of type '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Test that the function raises an exception for non-integer and non- + float types. + """ + # Set inputs. + val = "5" + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_integer(val) + actual = str(cm.exception) + expected = """ + * Failed assertion * + Invalid val='5' of type '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py new file mode 100644 index 000000000..b3f6d7f04 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py @@ -0,0 +1,107 @@ +import logging + +import config_root.config as cconfig +import helpers.hdict as hdict +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_get_nested_dict_iterator +# ############################################################################# + + +class Test_get_nested_dict_iterator(hunitest.TestCase): + def test1(self) -> None: + """ + Test basic case with no nesting. + """ + dict_ = {"key0": "value0", "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0",), "value0"), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test2(self) -> None: + """ + Test simple nested case. + """ + dict_ = { + "key0": {"key00": "value00", "key01": "value01"}, + "key1": "value1", + } + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [ + (("key0", "key00"), "value00"), + (("key0", "key01"), "value01"), + (("key1",), "value1"), + ] + self.assertListEqual(actual_result, expected_result) + + def test3(self) -> None: + """ + Test multilevel nested case. + """ + dict_ = {"key0": {"key00": {"key000": "value000"}}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [ + (("key0", "key00", "key000"), "value000"), + (("key1",), "value1"), + ] + self.assertListEqual(actual_result, expected_result) + + def test4(self) -> None: + """ + Test flat case with `None` value. + """ + dict_ = {"key0": "value0", "key1": None} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0",), "value0"), (("key1",), None)] + self.assertListEqual(actual_result, expected_result) + + def test5(self) -> None: + """ + Test nested case with `None` value. + """ + dict_ = {"key0": {"key00": None}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0", "key00"), None), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test6(self) -> None: + """ + Test flat case with empty dict value. + """ + dict_ = {"key0": {}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0",), {}), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test7(self) -> None: + """ + Test nested case with empty dict value. + """ + dict_ = {"key0": {"key00": {}}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0", "key00"), {}), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test8(self) -> None: + """ + Test flat case with empty Config value. + """ + config = cconfig.Config() + dict_ = {"key0": config, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0",), config), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test9(self) -> None: + """ + Test nexted case with empty Config value. + """ + config = cconfig.Config() + dict_ = {"key0": {"key00": config}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0", "key00"), config), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py new file mode 100644 index 000000000..7220d1474 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py @@ -0,0 +1,624 @@ +import logging +import os +import unittest.mock as umock +from typing import List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hdocker as hdocker +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_replace_shared_root_path1 +# ############################################################################# + + +class Test_replace_shared_root_path1(hunitest.TestCase): + def test1(self) -> None: + """ + Test replacing shared root path. + """ + # Mock `hserver.get_shared_data_dirs()` to return a dummy mapping. + mock_mapping = { + "/data/shared1": "/shared_folder1", + "/data/shared2": "/shared_folder2", + } + with umock.patch.object( + hserver, "get_shared_data_dirs", return_value=mock_mapping + ): + # Test replacing shared root path. + path1 = "/data/shared1/asset1" + act1 = hdocker.replace_shared_root_path(path1) + exp1 = "/shared_folder1/asset1" + self.assertEqual(act1, exp1) + # + path2 = "/data/shared2/asset2" + act2 = hdocker.replace_shared_root_path(path2) + exp2 = "/shared_folder2/asset2" + self.assertEqual(act2, exp2) + # + path3 = 'object("/data/shared2/asset2/item")' + act3 = hdocker.replace_shared_root_path(path3) + exp3 = 'object("/shared_folder2/asset2/item")' + self.assertEqual(act3, exp3) + + def test2(self) -> None: + """ + Test replacing shared root path with the `replace_ecs_tokyo` parameter. + """ + # Mock `hserver.get_shared_data_dirs()` to return a dummy mapping. + mock_mapping = { + "/data/shared": "/shared_folder", + } + with umock.patch.object( + hserver, "get_shared_data_dirs", return_value=mock_mapping + ): + # Test if `ecs_tokyo` is replaced if `replace_ecs_tokyo = True`. + path1 = 'object("/data/shared/ecs_tokyo/asset2/item")' + replace_ecs_tokyo = True + act1 = hdocker.replace_shared_root_path( + path1, replace_ecs_tokyo=replace_ecs_tokyo + ) + exp1 = 'object("/shared_folder/ecs/asset2/item")' + self.assertEqual(act1, exp1) + # Test if `ecs_tokyo` is not replaced if `replace_ecs_tokyo` is not + # defined. + path2 = 'object("/data/shared/ecs_tokyo/asset2/item")' + act2 = hdocker.replace_shared_root_path(path2) + exp2 = 'object("/shared_folder/ecs_tokyo/asset2/item")' + self.assertEqual(act2, exp2) + + +# ############################################################################# +# Test_convert_to_docker_path1 +# ############################################################################# + + +class Test_convert_to_docker_path1(hunitest.TestCase): + @staticmethod + def convert_caller_to_callee_docker_path( + in_file_path: str, + is_caller_host: bool, + use_sibling_container_for_callee: bool, + check_if_exists: bool, + ) -> Tuple[str, str]: + """ + Prepare inputs and call the function to convert a file name to Docker + paths. + + :return: A tuple containing + - docker_file_path: the Docker file path + - mount: the Docker mount string + """ + ( + source_host_path, + callee_mount_path, + mount, + ) = hdocker.get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + docker_file_path = hdocker.convert_caller_to_callee_docker_path( + in_file_path, + source_host_path, + callee_mount_path, + check_if_exists=check_if_exists, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + return docker_file_path, mount + + def helper( + self, + in_file_path: str, + is_caller_host: bool, + use_sibling_container_for_callee: bool, + check_if_exists: bool, + exp_docker_file_path: str, + exp_mount: str, + ) -> None: + """ + Test converting a file name to Docker paths. + """ + # Run test. + docker_file_path, mount = self.convert_caller_to_callee_docker_path( + in_file_path, + is_caller_host, + use_sibling_container_for_callee, + check_if_exists, + ) + # Check output. + self.assert_equal(docker_file_path, exp_docker_file_path) + self.assert_equal(mount, exp_mount) + + def test1(self) -> None: + """ + Test converting a file name to Docker paths. + """ + # - Prepare inputs. + dir_name = self.get_input_dir() + in_file_path = os.path.join(dir_name, "tmp.llm_transform.in.txt") + is_caller_host = True + use_sibling_container_for_callee = True + check_if_exists = False + # - Prepare outputs. + helpers_root_path = hgit.find_helpers_root() + exp_docker_file_path = os.path.join( + helpers_root_path, + "helpers/test/outcomes", + "Test_convert_to_docker_path1.test1/input", + "tmp.llm_transform.in.txt", + ) + exp_mount = "type=bind,source=/app,target=/app" + self.helper( + in_file_path, + is_caller_host, + use_sibling_container_for_callee, + check_if_exists, + exp_docker_file_path, + exp_mount, + ) + + def test2(self) -> None: + """ + Test converting a file name of an existing file to a Docker path. + """ + # - Prepare inputs. + dir_name = self.get_input_dir() + # Create a file. + # E.g., in_file_path='/app/helpers/test/outcomes/Test_convert_to_docker_path1.test2/input/input.md' + in_file_path = os.path.join(dir_name, "tmp.input.md") + hio.to_file(in_file_path, "empty") + _LOG.debug(hprint.to_str("in_file_path")) + is_caller_host = True + use_sibling_container_for_callee = True + check_if_exists = True + # - Prepare outputs. + helpers_root_path = hgit.find_helpers_root() + exp_docker_file_path = os.path.join( + helpers_root_path, + "helpers/test/outcomes", + "Test_convert_to_docker_path1.test2/input", + "tmp.input.md", + ) + exp_mount = "type=bind,source=/app,target=/app" + self.helper( + in_file_path, + is_caller_host, + use_sibling_container_for_callee, + check_if_exists, + exp_docker_file_path, + exp_mount, + ) + + +# ############################################################################# +# Test_is_path1 +# ############################################################################# + + +class Test_is_path1(hunitest.TestCase): + def helper(self, path: str, expected: bool) -> None: + """ + Test helper for `is_path()` function. + """ + # Run test. + actual = hdocker.is_path(path) + # Check outputs. + _LOG.debug(hprint.to_str("path actual expected")) + self.assertEqual(actual, expected) + + def test_file_with_extension(self) -> None: + """ + Test paths with file extensions. + """ + # Prepare inputs. + test_cases = [ + ("file.txt", True), + ("document.pdf", True), + ("script.py", True), + ("data.csv", True), + ("image.jpg", True), + ("config.json", True), + ("readme.md", True), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + def test_absolute_paths(self) -> None: + """ + Test absolute paths. + """ + # Prepare inputs. + test_cases = [ + ("/path/to/file.py", True), + ("/usr/bin/python", True), + ("/etc/config", True), + ("/home/user", True), + ("/", True), + ("/data/shared", True), + ] + # Check outputs. + for path, expected in test_cases: + self.helper(path, expected) + + def test_relative_paths(self) -> None: + """ + Test relative paths starting with ./ or ../. + """ + # Prepare inputs and run tests. + test_cases = [ + ("./file.txt", True), + ("../data.csv", True), + ("./folder/subfolder", True), + ("../parent/file", True), + ("./", True), + ("../", True), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + def test_trailing_slash_paths(self) -> None: + """ + Test paths ending with slash (indicating directories). + """ + # Prepare inputs and run tests. + test_cases = [ + ("folder/", True), + ("data/", True), + ("my_directory/", True), + ("nested/folder/", True), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + def test_non_path_strings(self) -> None: + """ + Test strings that should not be considered paths. + """ + # Prepare inputs and run tests. + test_cases = [ + ("readme", False), + ("hello", False), + ("command", False), + ("data", False), + ("test", False), + ("python", False), + ("docker", False), + ("", False), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + def test_edge_cases(self) -> None: + """ + Test edge cases and complex scenarios. + """ + # Prepare inputs and run tests. + test_cases = [ + # - Files with multiple extensions. + ("file.tar.gz", True), + ("backup.sql.bz2", True), + # - Hidden files. + (".hidden", True), + (".gitignore", True), + # - Complex paths. + ("./nested/folder/file.txt", True), + ("../parent/folder/", True), + ("/absolute/path/file.py", True), + # - Files without extension in paths. + # True because it contains a slash. + ("folder/README", True), + # True because starts with "./". + ("./config", True), + # True because starts with "/". + ("/usr/bin/python", True), + # - Strings that might be confused with paths. + # True because has extension. + ("folder.name", True), + # False because no extension, slash, or path prefix. + ("file-name", False), + # False because no extension, slash, or path prefix. + ("under_score", False), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + +# ############################################################################# +# Test_convert_all_paths_from_caller_to_callee_docker_path1 +# ############################################################################# + + +class Test_convert_all_paths_from_caller_to_callee_docker_path1( + hunitest.TestCase +): + def helper( + self, + cmd_opts: List[str], + expected_str: str, + *, + is_caller_host: bool = True, + use_sibling_container_for_callee: bool = True, + create_files: Optional[List[str]] = None, + ) -> None: + """ + Helper for `convert_all_paths_from_caller_to_callee_docker_path()`. + """ + hdbg.dassert_isinstance(cmd_opts, list) + hdbg.dassert_isinstance(expected_str, str) + # Prepare inputs. + if create_files: + # Create temporary files for testing existing file paths. + for file_path in create_files: + dir_name = os.path.dirname(file_path) + if dir_name: + hio.create_dir(dir_name, incremental=True) + hio.to_file(file_path, "test content") + # Get docker mount info for the test. + ( + caller_mount_path, + callee_mount_path, + _, + ) = hdocker.get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + # Run test. + actual = hdocker.convert_all_paths_from_caller_to_callee_docker_path( + cmd_opts, + caller_mount_path, + callee_mount_path, + is_caller_host, + use_sibling_container_for_callee, + ) + _LOG.debug("actual=\n%s", str(actual)) + # Check outputs. + actual_str = "\n".join(actual) + actual_str = huntepur.purify_text(actual_str) + expected_str = huntepur.purify_text(expected_str) + self.assert_equal(actual_str, expected_str, dedent=True) + + # ///////////////////////////////////////////////////////////////////////////// + + def test_mixed_options_with_paths_and_non_paths(self) -> None: + """ + Test converting mixed command options with paths and non-paths. + """ + # Prepare inputs. + cmd_opts = [ + "--verbose", + "file.txt", # Path-like (has extension) + "--output", + "./output.log", # Path-like (relative path) + "command", # Not a path + # "/absolute/path", # Path-like (absolute) + "--flag", + "folder/", # Path-like (trailing slash) + ] + expected_output = [ + "--verbose", + "/app/file.txt", # Converted + "--output", + "/app/output.log", # Converted + "command", # Not converted + # "/app/absolute/path", # Converted + "--flag", + "/app/folder", # Converted + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_existing_files_get_converted(self) -> None: + """ + Test that existing files are converted even without path-like + appearance. + """ + # Prepare inputs. + temp_dir = self.get_scratch_space() + existing_file = os.path.join(temp_dir, "testfile") + cmd_opts = [ + "--input", + existing_file, # Will exist, should be converted + "nonexistent", # Doesn't exist and not path-like, won't be converted + ] + expected_output = [ + "--input", + f"/app/{os.path.relpath(existing_file, hgit.find_git_root())}", # Converted + "nonexistent", # Not converted + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output, create_files=[existing_file]) + + def test_path_like_strings_without_existing_files(self) -> None: + """ + Test that path-like strings are converted even if files don't exist. + """ + # Prepare inputs. + cmd_opts = [ + "script.py", # Path-like (extension) but doesn't exist + "./config.json", # Path-like (relative) but doesn't exist + # "/usr/bin/tool", # Path-like (absolute) but doesn't exist + "plain_word", # Not path-like and doesn't exist + ] + expected_output = [ + "/app/script.py", # Converted (has extension) + "/app/config.json", # Converted (relative path) + # "/app/usr/bin/tool", # Converted (absolute path) + "plain_word", # Not converted + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_empty_command_options(self) -> None: + """ + Test handling of empty command options list. + """ + # Prepare inputs. + cmd_opts = [] + expected_output = [] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_only_non_path_options(self) -> None: + """ + Test command options with no paths. + """ + # Prepare inputs. + cmd_opts = [ + "--verbose", + "--debug", + "command", + "argument", + "--flag", + ] + expected_output = [ + "--verbose", + "--debug", + "command", + "argument", + "--flag", + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_only_path_options(self) -> None: + """ + Test command options with only paths. + """ + # Prepare inputs. + cmd_opts = [ + "input.txt", + "./config.yaml", + # "/var/log/app.log", + "data/", + "./output.json", + ] + expected_output = [ + "/app/input.txt", + "/app/config.yaml", + # "/app/var/log/app.log", + "/app/data", + "/app/output.json", + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_complex_paths_with_extensions(self) -> None: + """ + Test complex paths with multiple extensions and special cases. + """ + # Prepare inputs. + cmd_opts = [ + "archive.tar.gz", # Multiple extensions + ".hidden", # Hidden file + "backup.sql.bz2", # Multiple extensions + ".gitignore", # Hidden config file + ] + expected_output = """ + $GIT_ROOT/archive.tar.gz + $GIT_ROOT/.hidden + $GIT_ROOT/backup.sql.bz2 + $GIT_ROOT/.gitignore + """ + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_sibling_vs_child_container_modes(self) -> None: + """ + Test different container modes (sibling vs child). + """ + # Prepare inputs. + cmd_opts = ["input.txt", "output/"] + # Test sibling container mode. + expected_output = ["/app/input.txt", "/app/output"] + expected_output = "\n".join(expected_output) + self.helper( + cmd_opts, + expected_output, + is_caller_host=True, + use_sibling_container_for_callee=True, + ) + # Test child container mode. + expected_output = ["/app/input.txt", "/app/output"] + expected_output = "\n".join(expected_output) + self.helper( + cmd_opts, + expected_output, + is_caller_host=True, + use_sibling_container_for_callee=False, + ) + + +# ############################################################################# +# Test_get_docker_mount_info1 +# ############################################################################# + + +class Test_get_docker_mount_info1(hunitest.TestCase): + def test1(self) -> None: + """ + With CSFY_ENABLE_DIND, sibling-style docker.sock must still bind the + repo root inside this container, not CSFY_HOST_GIT_ROOT_PATH. + """ + # - Prepare inputs. + git_root = hgit.find_git_root() + env = { + "CSFY_ENABLE_DIND": "1", + "CSFY_HOST_GIT_ROOT_PATH": "/path/only/on/outer/host", + } + # - Prepare outputs. + exp_target = "/app" + exp_mount = f"type=bind,source={git_root},target=/app" + # Run test. + with umock.patch.dict(os.environ, env, clear=False): + source, target, mount = hdocker.get_docker_mount_info( + is_caller_host=False, + use_sibling_container_for_callee=True, + ) + # Check outputs. + self.assert_equal(source, git_root) + self.assert_equal(target, exp_target) + self.assert_equal(mount, exp_mount) + + def test2(self) -> None: + """ + Without DinD, sibling mode uses CSFY_HOST_GIT_ROOT_PATH for bind + source. + """ + # - Prepare inputs. + host_root = "/tmp/explicit_host_git_root_for_test" + env = { + "CSFY_ENABLE_DIND": "0", + "CSFY_HOST_GIT_ROOT_PATH": host_root, + } + # - Prepare outputs. + exp_target = "/app" + exp_mount = f"type=bind,source={host_root},target=/app" + # Run test. + with umock.patch.dict(os.environ, env, clear=False): + source, target, mount = hdocker.get_docker_mount_info( + is_caller_host=False, + use_sibling_container_for_callee=True, + ) + # Check outputs. + self.assert_equal(source, host_root) + self.assert_equal(target, exp_target) + self.assert_equal(mount, exp_mount) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py new file mode 100644 index 000000000..203ae012e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py @@ -0,0 +1,158 @@ +""" +Unit tests for hdocker_tests.py +""" + +import logging +import os + +import helpers.hdocker_tests as hdoctest +import helpers.hio as hio +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_get_docker_test_files +# ############################################################################# + + +class Test_get_docker_test_files(hunitest.TestCase): + """ + Test the get_docker_test_files function. + """ + + def test1(self) -> None: + """ + Test finding docker test files in a directory. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + hio.to_file(os.path.join(scratch_dir, "docker_test_1.py"), "") + hio.to_file(os.path.join(scratch_dir, "docker_test_2.py"), "") + hio.to_file(os.path.join(scratch_dir, "other_file.py"), "") + # Run test. + actual = hdoctest.get_docker_test_files(scratch_dir) + # Check outputs. + self.assertEqual(len(actual), 2) + self.assertTrue(any("docker_test_1.py" in f for f in actual)) + self.assertTrue(any("docker_test_2.py" in f for f in actual)) + + def test2(self) -> None: + """ + Test with no matching files. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create non-matching files. + hio.to_file(os.path.join(scratch_dir, "test_file.py"), "") + hio.to_file(os.path.join(scratch_dir, "other_file.py"), "") + # Run test. + actual = hdoctest.get_docker_test_files(scratch_dir) + # Check outputs. + self.assertEqual(len(actual), 0) + + def test3(self) -> None: + """ + Test with single docker test file. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + hio.to_file(os.path.join(scratch_dir, "docker_test_single.py"), "") + # Run test. + actual = hdoctest.get_docker_test_files(scratch_dir) + # Check outputs. + self.assertEqual(len(actual), 1) + self.assertTrue("docker_test_single.py" in actual[0]) + + def test4(self) -> None: + """ + Test that files are returned in sorted order. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + hio.to_file(os.path.join(scratch_dir, "docker_test_z.py"), "") + hio.to_file(os.path.join(scratch_dir, "docker_test_a.py"), "") + hio.to_file(os.path.join(scratch_dir, "docker_test_m.py"), "") + # Run test. + actual = hdoctest.get_docker_test_files(scratch_dir) + # Check outputs. + self.assertEqual(len(actual), 3) + basenames = [os.path.basename(f) for f in actual] + self.assertEqual( + basenames, + ["docker_test_a.py", "docker_test_m.py", "docker_test_z.py"], + ) + + +# ############################################################################# +# Test_run_docker_cmd +# ############################################################################# + + +class Test_run_docker_cmd(hunitest.TestCase): + """ + Test the run_docker_cmd function. + """ + + def test1(self) -> None: + """ + Test that error is raised when docker_cmd.sh does not exist in + script_dir. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Run test and check output. + with self.assertRaises(AssertionError): + hdoctest.run_docker_cmd(scratch_dir) + + def test2(self) -> None: + """ + Test that error is raised when script_dir does not exist. + """ + # Prepare inputs. + nonexistent_dir = "/nonexistent_dir_that_does_not_exist" + # Run test and check output. + with self.assertRaises(AssertionError): + hdoctest.run_docker_cmd(nonexistent_dir) + + +# ############################################################################# +# Test_run_all_tests +# ############################################################################# + + +class Test_run_all_tests(hunitest.TestCase): + """ + Test the run_all_tests function. + """ + + def test1(self) -> None: + """ + Test with no docker test files returns 0. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create non-matching files. + hio.to_file(os.path.join(scratch_dir, "test_file.py"), "") + # Run test. + actual = hdoctest.run_all_tests(scratch_dir) + # Check outputs. + self.assertEqual(actual, 0) + + def test2(self) -> None: + """ + Test with docker test files when docker_cmd_script doesn't exist. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + hio.to_file(os.path.join(scratch_dir, "docker_test_1.py"), "") + nonexistent_docker_cmd = os.path.join( + scratch_dir, "nonexistent_docker_cmd.sh" + ) + # Run test and check output. + with self.assertRaises(AssertionError): + hdoctest.run_all_tests( + scratch_dir, docker_cmd_script=nonexistent_docker_cmd + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py new file mode 100644 index 000000000..d1f229435 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py @@ -0,0 +1,17 @@ +import logging + +import helpers.henv as henv +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_env1 +# ############################################################################# + + +class Test_env1(hunitest.TestCase): + def test_get_system_signature1(self) -> None: + txt = henv.get_system_signature() + _LOG.debug(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py new file mode 100644 index 000000000..f50f79994 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py @@ -0,0 +1,347 @@ +import logging +import pathlib + +import helpers.hfile_tree as hfiltree +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_generate_tree +# ############################################################################# + + +class Test_generate_tree(hunitest.TestCase): + def test1(self) -> None: + """ + Test generating default tree. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = False + include_python = False + only_dirs = False + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- compose", + "- docker_build", + " - create_users.sh", + " - pip_list.txt", + "- docker_run", + ] + ) + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + Test generating default tree with depth. + """ + # Prepare inputs. + path = self.devops_dir + depth = 1 + include_tests = False + include_python = False + only_dirs = False + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- compose", + "- docker_build", + "- docker_run", + ] + ) + self.assertEqual(actual, expected) + + def test3(self) -> None: + """ + Test generating tree including test files and dirs. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = True + include_python = False + only_dirs = False + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- compose", + "- docker_build", + "- docker_run", + "- test", + " - test_docker.py", + ] + ) + self.assertEqual(actual, expected) + + def test4(self) -> None: + """ + Test generating tree including python files. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = False + include_python = True + only_dirs = False + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- __init__.py", + "- compose", + "- docker_build", + "- docker_run", + " - execute.py", + "- user_credentials.py", + ] + ) + self.assertEqual(actual, expected) + + def test5(self) -> None: + """ + Test generating tree with only directories. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = False + include_python = False + only_dirs = True + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- compose", + "- docker_build", + "- docker_run", + ] + ) + self.assertEqual(actual, expected) + + def test6(self) -> None: + """ + Test generating tree including tests, python files, and only + directories. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = True + include_python = True + only_dirs = True + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- __init__.py", + "- compose", + "- docker_build", + "- docker_run", + " - execute.py", + "- test", + " - test_docker.py", + "- user_credentials.py", + ] + ) + self.assertEqual(actual, expected) + + def test7(self) -> None: + """ + Test writing tree to file. + """ + # Prepare inputs. + scratch = pathlib.Path(self.get_scratch_space()) + path = self.devops_dir + depth = 0 + include_tests = False + include_python = False + only_dirs = False + output = scratch / "TREE.md" + # Call tested function. + _ = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + actual = output.read_text(encoding="utf-8") + # Check output. + expected = ( + "\n".join( + [ + "", + "devops", + "- compose", + "- docker_build", + " - create_users.sh", + " - pip_list.txt", + "- docker_run", + "", + ] + ) + + "\n" + ) + self.assertEqual(actual, expected) + + def test8(self) -> None: + """ + Test updating tree on existing file, preserving comments. + """ + # Prepare inputs. + scratch = pathlib.Path(self.get_scratch_space()) + path = self.devops_dir + depth = 0 + include_tests = False + include_python = False + only_dirs = False + output = scratch / "TREE.md" + # Create existing file. + content = ( + "\n".join( + [ + "", + "devops", + "- compose # compose-comment", + "- docker_build", + " - pip_list.txt # pip-comment", + "", + ] + ) + + "\n" + ) + output.write_text(content, encoding="utf-8") + # Call tested function. + _ = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + actual = output.read_text(encoding="utf-8") + # Check output. + expected = ( + "\n".join( + [ + "", + "devops", + "- compose # compose-comment", + "- docker_build", + " - create_users.sh", + " - pip_list.txt # pip-comment", + "- docker_run", + "", + ] + ) + + "\n" + ) + self.assertEqual(actual, expected) + + def setUp(self) -> None: + """ + Create a `devops` directory in scratch space. + + Scratch directory layout: + ``` + devops + - __init__.py + - user_credentials.py + - compose + - docker_run + - execute.py + - docker_build + - create_users.sh + - pip_list.txt + - test + - TestDocker + - test_docker.py + ``` + """ + super().setUp() + scratch = self.get_scratch_space() + self.devops_dir = pathlib.Path(scratch) / "devops" + self.devops_dir.mkdir() + structure = { + "": ["__init__.py", "user_credentials.py"], + "compose": [], + "docker_run": ["execute.py"], + "docker_build": ["create_users.sh", "pip_list.txt"], + "test": ["TestDocker", "test_docker.py"], + } + # Create empty dirs and files. + for subdir, files in structure.items(): + folder = self.devops_dir / subdir if subdir else self.devops_dir + if subdir: + folder.mkdir() + for name in files: + (folder / name).touch() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py new file mode 100644 index 000000000..8a7135578 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py @@ -0,0 +1,822 @@ +import logging +import os +import tempfile +from typing import Generator, List, Optional + +import pytest + +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# Unfortunately we can't check the outcome of some of these functions since we +# don't know in which dir we are running. Thus we just test that the function +# completes and visually inspect the outcome, if possible. + + +# ############################################################################# +# Test_git_submodule1 +# ############################################################################# + + +class Test_git_submodule1(hunitest.TestCase): + def test_get_client_root1(self) -> None: + actual = hgit.get_client_root(super_module=True) + _LOG.debug("actual=%s", actual) + + def test_get_client_root2(self) -> None: + actual = hgit.get_client_root(super_module=False) + _LOG.debug("actual=%s", actual) + + def test_get_project_dirname1(self) -> None: + actual = hgit.get_project_dirname() + _LOG.debug("actual=%s", actual) + + def test_get_branch_name1(self) -> None: + actual = hgit.get_branch_name() + _LOG.debug("actual=%s", actual) + + def test_is_inside_submodule1(self) -> None: + actual = hgit.is_inside_submodule() + _LOG.debug("actual=%s", actual) + + # Outside CK infra, the following call hangs, so we skip it. + # TODO(gp): I don't see why it requires our infra. + @pytest.mark.requires_ck_infra + def test_is_amp(self) -> None: + actual = hgit.is_amp() + _LOG.debug("actual=%s", actual) + + def test_get_path_from_supermodule1(self) -> None: + actual = hgit.get_path_from_supermodule() + _LOG.debug("actual=%s", actual) + + def test_get_submodule_paths1(self) -> None: + actual = hgit.get_submodule_paths() + _LOG.debug("actual=%s", actual) + + +# ############################################################################# +# Test_git_submodule2 +# ############################################################################# + + +class Test_git_submodule2(hunitest.TestCase): + # def test_get_submodule_hash1(self) -> None: + # dir_name = "amp" + # _ = hgit._get_submodule_hash(dir_name) + + def test_get_remote_head_hash1(self) -> None: + dir_name = "." + actual = hgit.get_head_hash(dir_name) + _LOG.debug("actual=%s", actual) + + # def test_report_submodule_status1(self) -> None: + # dir_names = ["."] + # short_hash = True + # _ = hgit.report_submodule_status(dir_names, short_hash) + + def test_get_head_hash1(self) -> None: + dir_name = "." + actual = hgit.get_head_hash(dir_name) + _LOG.debug("actual=%s", actual) + + def _helper_group_hashes( + self, + head_hash: str, + remh_hash: str, + subm_hash: Optional[str], + expected: str, + ) -> None: + actual = hgit._group_hashes(head_hash, remh_hash, subm_hash) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_group_hashes1(self) -> None: + head_hash = "a2bfc704" + remh_hash = "a2bfc704" + subm_hash = None + expected = "head_hash = remh_hash = a2bfc704" + # + self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) + + def test_group_hashes2(self) -> None: + head_hash = "22996772" + remh_hash = "92167662" + subm_hash = "92167662" + expected = """ + head_hash = 22996772 + remh_hash = subm_hash = 92167662 + """ + # + self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) + + def test_group_hashes3(self) -> None: + head_hash = "7ea03eb6" + remh_hash = "7ea03eb6" + subm_hash = "7ea03eb6" + expected = "head_hash = remh_hash = subm_hash = 7ea03eb6" + # + self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) + + +# ############################################################################# +# Test_git_repo_name1 +# ############################################################################# + + +class Test_git_repo_name1(hunitest.TestCase): + def test_parse_github_repo_name1(self) -> None: + repo_name = "git@github.com:alphamatic/amp" + host_name, repo_name = hgit._parse_github_repo_name(repo_name) + self.assert_equal(host_name, "github.com") + self.assert_equal(repo_name, "alphamatic/amp") + + def test_parse_github_repo_name2(self) -> None: + repo_name = "https://github.com/alphamatic/amp" + hgit._parse_github_repo_name(repo_name) + host_name, repo_name = hgit._parse_github_repo_name(repo_name) + self.assert_equal(host_name, "github.com") + self.assert_equal(repo_name, "alphamatic/amp") + + def test_parse_github_repo_name3(self) -> None: + repo_name = "git@github.fake.com:alphamatic/amp" + host_name, repo_name = hgit._parse_github_repo_name(repo_name) + self.assert_equal(host_name, "github.fake.com") + self.assert_equal(repo_name, "alphamatic/amp") + + def test_parse_github_repo_name4(self) -> None: + repo_name = "https://github.fake.com/alphamatic/amp" + host_name, repo_name = hgit._parse_github_repo_name(repo_name) + self.assert_equal(host_name, "github.fake.com") + self.assert_equal(repo_name, "alphamatic/amp") + + def test_get_repo_full_name_from_dirname1(self) -> None: + actual = hgit.get_repo_full_name_from_dirname( + dir_name=".", include_host_name=False + ) + _LOG.debug("actual=%s", actual) + + def test_get_repo_full_name_from_dirname2(self) -> None: + actual = hgit.get_repo_full_name_from_dirname( + dir_name=".", include_host_name=True + ) + _LOG.debug("actual=%s", actual) + + def test_get_repo_full_name_from_client1(self) -> None: + actual = hgit.get_repo_full_name_from_client(super_module=True) + _LOG.debug("actual=%s", actual) + + def test_get_repo_full_name_from_client2(self) -> None: + actual = hgit.get_repo_full_name_from_client(super_module=False) + _LOG.debug("actual=%s", actual) + + +# ############################################################################# +# Test_git_path1 +# ############################################################################# + + +# Outside CK infra, the following class hangs, so we skip it. +@pytest.mark.requires_ck_infra +class Test_git_path1(hunitest.TestCase): + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", + ) + def test_get_path_from_git_root1(self) -> None: + file_name = "/app/helpers/test/test_hgit.py" + actual = hgit.get_path_from_git_root(file_name, super_module=True) + _LOG.debug("get_path_from_git_root()=%s", actual) + # Check. + expected = "helpers/test/test_hgit.py" + self.assert_equal(actual, expected) + + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), + reason="Run only in amp as sub-module", + ) + def test_get_path_from_git_root2(self) -> None: + file_name = "/app/amp/helpers/test/test_hgit.py" + actual = hgit.get_path_from_git_root(file_name, super_module=True) + _LOG.debug("get_path_from_git_root()=%s", actual) + # Check. + expected = "amp/helpers/test/test_hgit.py" + self.assert_equal(actual, expected) + + def test_get_path_from_git_root3(self) -> None: + file_name = "/app/amp/helpers/test/test_hgit.py" + git_root = "/app" + actual = hgit.get_path_from_git_root( + file_name, super_module=False, git_root=git_root + ) + # Check. + expected = "amp/helpers/test/test_hgit.py" + self.assert_equal(actual, expected) + + def test_get_path_from_git_root4(self) -> None: + file_name = "/app/amp/helpers/test/test_hgit.py" + git_root = "/app/amp" + actual = hgit.get_path_from_git_root( + file_name, super_module=False, git_root=git_root + ) + # Check. + expected = "helpers/test/test_hgit.py" + self.assert_equal(actual, expected) + + def test_get_path_from_git_root5(self) -> None: + file_name = "helpers/test/test_hgit.py" + git_root = "/app/amp" + with self.assertRaises(ValueError): + hgit.get_path_from_git_root( + file_name, super_module=False, git_root=git_root + ) + + +# ############################################################################# +# Test_git_modified_files1 +# ############################################################################# + + +# Outside CK infra, the following class hangs, so we skip it. +@pytest.mark.requires_ck_infra +@pytest.mark.slow(reason="Around 7s") +@pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", +) +class Test_git_modified_files1(hunitest.TestCase): + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + + def set_up_test(self) -> None: + """ + All these tests need a reference to Git master branch. + """ + hgit.fetch_origin_master_if_needed() + + def test_get_modified_files1(self) -> None: + actual = hgit.get_modified_files() + _LOG.debug("actual=%s", actual) + + def test_get_previous_committed_files1(self) -> None: + actual = hgit.get_previous_committed_files() + _LOG.debug("actual=%s", actual) + + def test_get_modified_files_in_branch1(self) -> None: + actual = hgit.get_modified_files_in_branch("master") + _LOG.debug("actual=%s", actual) + + def test_get_summary_files_in_branch1(self) -> None: + actual = hgit.get_summary_files_in_branch("master") + _LOG.debug("actual=%s", actual) + + def test_git_log1(self) -> None: + actual = hgit.git_log() + _LOG.debug("actual=%s", actual) + + +# ############################################################################# + + +# ############################################################################# +# Test_find_docker_file1 +# ############################################################################# + + +# Outside CK infra, the following class hangs, so we skip it. +@pytest.mark.requires_ck_infra +class Test_find_docker_file1(hunitest.TestCase): + def test1(self) -> None: + """ + Test for a file `amp/helpers/test/test_hgit.py` that is not from Docker + (i.e., it doesn't start with `/app`) and exists in the repo. + """ + amp_dir = hgit.get_amp_abs_path() + # Use this file since `find_docker_file()` needs to do a `find` in the + # repo, and we need to have a fixed file structure. + file_name = hgit.find_file_in_git_tree("test_hgit.py") + actual = hgit.find_docker_file( + file_name, + root_dir=amp_dir, + ) + expected = ["helpers/test/test_hgit.py"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test2(self) -> None: + """ + Test for a file `/app/amp/helpers/test/test_hgit.py` that is from + Docker (i.e., it starts with `/app`) and exists in the repo. + """ + amp_dir = hgit.get_amp_abs_path() + # Use this file since `find_docker_file()` needs to do a `find` in the + # repo, and we need to have a fixed file structure. + file_name = hgit.find_file_in_git_tree("test_hgit.py") + expected = ["helpers/test/test_hgit.py"] + actual = hgit.find_docker_file( + file_name, + root_dir=amp_dir, + ) + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test3(self) -> None: + """ + Test for a file `/venv/lib/python3.8/site-packages/invoke/tasks.py` + that is from Docker (e.g., it starts with `/app`), but doesn't exist in + the repo. + """ + file_name = "/venv/lib/python3.8/site-packages/invoke/tasks.py" + actual = hgit.find_docker_file(file_name) + expected: List[str] = [] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test4(self) -> None: + """ + Test for a file `./core/dataflow/utils.py` that is from Docker (i.e., + it starts with `/app`), but has multiple copies in the repo. + """ + amp_dir = hgit.get_amp_abs_path() + file_name = "/app/amp/core/dataflow/utils.py" + dir_depth = 1 + candidate_files = [ + "core/dataflow/utils.py", + "core/foo/utils.py", + "core/bar/utils.py", + ] + candidate_files = [os.path.join(amp_dir, f) for f in candidate_files] + actual = hgit.find_docker_file( + file_name, + root_dir=amp_dir, + dir_depth=dir_depth, + candidate_files=candidate_files, + ) + # Only one candidate file matches basename and one dirname. + expected = ["core/dataflow/utils.py"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test5(self) -> None: + amp_dir = hgit.get_amp_abs_path() + file_name = "/app/amp/core/dataflow/utils.py" + dir_depth = -1 + candidate_files = [ + "core/dataflow/utils.py", + "bar/dataflow/utils.py", + "core/foo/utils.py", + ] + candidate_files = [os.path.join(amp_dir, f) for f in candidate_files] + actual = hgit.find_docker_file( + file_name, + root_dir=amp_dir, + dir_depth=dir_depth, + candidate_files=candidate_files, + ) + # Only one file matches `utils.py` using all the 3 dir levels. + expected = ["core/dataflow/utils.py"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_extract_gh_issue_number_from_branch +# ############################################################################# + + +class Test_extract_gh_issue_number_from_branch(hunitest.TestCase): + def test_extract_gh_issue_number_from_branch1(self) -> None: + """ + Tests extraction from a branch name with a specific format. + """ + branch_name = "CmampTask10725_Add_more_tabs_to_orange_tmux" + actual = hgit.extract_gh_issue_number_from_branch(branch_name) + expected = "10725" + self.assert_equal(str(actual), expected) + + def test_extract_gh_issue_number_from_branch2(self) -> None: + """ + Tests extraction from another branch name format. + """ + branch_name = "HelpersTask23_Add_more_tabs_to_orange_tmux" + actual = hgit.extract_gh_issue_number_from_branch(branch_name) + expected = "23" + self.assert_equal(str(actual), expected) + + def test_extract_gh_issue_number_from_branch3(self) -> None: + """ + Tests extraction from a short branch name format. + """ + branch_name = "CmTask3434" + actual = hgit.extract_gh_issue_number_from_branch(branch_name) + expected = "3434" + self.assert_equal(str(actual), expected) + + def test_extract_gh_issue_number_from_branch4(self) -> None: + """ + Tests behavior when no issue number is present in the branch name. + """ + branch_name = "NoTaskNumberHere" + actual = hgit.extract_gh_issue_number_from_branch(branch_name) + expected = "None" + self.assert_equal(str(actual), expected) + + +# ############################################################################# +# Test_find_git_root1 +# ############################################################################# + + +class Test_find_git_root1(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is a super repo (e.g. //orange) + - the repo contains another super repo (e.g. //amp) as submodule (first level) + - the first level submodule contains another submodule (e.g. //helpers) (second level) + + Directory structure: + orange/ + |-- .git/ + `-- amp/ + |-- .git (points to ../.git/modules/amp) + |-- ck.infra/ + `-- helpers_root/ + `-- .git (points to ../../.git/modules/amp/modules/helpers_root) + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create `orange` repo. + self.repo_dir = os.path.join(temp_dir, "orange") + hio.create_dir(self.repo_dir, incremental=False) + self.git_dir = os.path.join(self.repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create `amp` submodule under `orange`. + self.submodule_dir = os.path.join(self.repo_dir, "amp") + hio.create_dir(self.submodule_dir, incremental=False) + submodule_git_file = os.path.join(self.submodule_dir, ".git") + txt = "gitdir: ../.git/modules/amp" + hio.to_file(submodule_git_file, txt) + submodule_git_file_dir = os.path.join( + self.repo_dir, ".git", "modules", "amp" + ) + hio.create_dir(submodule_git_file_dir, incremental=False) + # Create `helpers_root` submodule under `amp`. + self.subsubmodule_dir = os.path.join(self.submodule_dir, "helpers_root") + hio.create_dir(self.subsubmodule_dir, incremental=False) + subsubmodule_git_file = os.path.join(self.subsubmodule_dir, ".git") + txt = "gitdir: ../../.git/modules/amp/modules/helpers_root" + hio.to_file(subsubmodule_git_file, txt) + subsubmodule_git_file_dir = os.path.join( + self.repo_dir, ".git", "modules", "amp", "modules", "helpers_root" + ) + hio.create_dir(subsubmodule_git_file_dir, incremental=False) + # Create `ck.infra` runnable dir under `amp`. + self.runnable_dir = os.path.join(self.submodule_dir, "ck.infra") + hio.create_dir(self.runnable_dir, incremental=False) + + def test1(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in the super repo (e.g. //orange) + """ + self.set_up_test() + with hsystem.cd(self.repo_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test2(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in first level submodule (e.g. //amp) + """ + self.set_up_test() + with hsystem.cd(self.submodule_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test3(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in second level submodule (e.g. //helpers) + """ + self.set_up_test() + with hsystem.cd(self.subsubmodule_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test4(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in a runnable dir (e.g. ck.infra) under the + first level submodule (e.g. //amp) + """ + self.set_up_test() + with hsystem.cd(self.runnable_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + +# ############################################################################# +# Test_find_git_root2 +# ############################################################################# + + +class Test_find_git_root2(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is a super repo (e.g. //cmamp) + - the repo contains //helpers as submodule + + Directory structure: + cmamp/ + |-- .git/ + |-- ck.infra/ + `-- helpers_root/ + `-- .git (points to ../.git/modules/helpers_root) + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create `cmamp` repo. + self.repo_dir = os.path.join(temp_dir, "cmamp") + hio.create_dir(self.repo_dir, incremental=False) + self.git_dir = os.path.join(self.repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create `helpers_root` submodule under `cmamp`. + self.submodule_dir = os.path.join(self.repo_dir, "helpers_root") + hio.create_dir(self.submodule_dir, incremental=False) + submodule_git_file = os.path.join(self.submodule_dir, ".git") + txt = "gitdir: ../.git/modules/helpers_root" + hio.to_file(submodule_git_file, txt) + submodule_git_file_dir = os.path.join( + self.repo_dir, ".git", "modules", "helpers_root" + ) + hio.create_dir(submodule_git_file_dir, incremental=False) + # Create `ck.infra` runnable dir under `cmamp`. + self.runnable_dir = os.path.join(self.repo_dir, "ck.infra") + hio.create_dir(self.runnable_dir, incremental=False) + + def test1(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in the super repo (e.g. //cmamp) + """ + self.set_up_test() + with hsystem.cd(self.repo_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test2(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is the submodule (e.g. //helpers) + """ + self.set_up_test() + with hsystem.cd(self.submodule_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test3(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in a runnable dir (e.g. ck.infra) + """ + self.set_up_test() + with hsystem.cd(self.runnable_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + +# ############################################################################# +# Test_find_git_root3 +# ############################################################################# + + +class Test_find_git_root3(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is //helpers + + Directory structure: + helpers/ + |-- .git/ + `-- arbitrary1/ + `-- arbitrary1a/ + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create `helpers` repo. + self.repo_dir = os.path.join(temp_dir, "helpers") + hio.create_dir(self.repo_dir, incremental=False) + self.git_dir = os.path.join(self.repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create arbitrary directory under `helpers`. + self.arbitrary_dir = os.path.join( + self.repo_dir, "arbitrary1", "arbitrary1a" + ) + hio.create_dir(self.arbitrary_dir, incremental=False) + + def test1(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is the root of repo + """ + self.set_up_test() + with hsystem.cd(self.repo_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test2(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in an arbitrary directory under the repo + """ + self.set_up_test() + with hsystem.cd(self.arbitrary_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + +# ############################################################################# +# Test_find_git_root4 +# ############################################################################# + + +class Test_find_git_root4(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is a linked repo + + Directory structure: + repo/ + `-- .git/ + linked_repo/ + `-- .git (points to /repo/.git) + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create repo. + self.repo_dir = os.path.join(temp_dir, "repo") + hio.create_dir(self.repo_dir, incremental=False) + self.git_dir = os.path.join(self.repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create linked repo. + self.linked_repo_dir = os.path.join(temp_dir, "linked_repo") + hio.create_dir(self.linked_repo_dir, incremental=False) + # Create pointer from linked repo to the actual repo. + linked_git_file = os.path.join(self.linked_repo_dir, ".git") + txt = f"gitdir: {self.git_dir}\n" + hio.to_file(linked_git_file, txt) + + def test1(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is the linked repo + """ + self.set_up_test() + with hsystem.cd(self.linked_repo_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + +# ############################################################################# +# Test_find_git_root5 +# ############################################################################# + + +class Test_find_git_root5(hunitest.TestCase): + """ + Check that the error is raised when no .git directory is found. + + Directory structure: + arbitrary_dir/ + broken_repo/ + `-- .git (points to /nonexistent/path/to/gitdir) + """ + + @pytest.fixture(autouse=True) + def setup_teardown_test(self): + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test(self) -> None: + # `self.get_scratch_space()` does not work in the case as it creates + # a temp directory within the repo where `.git` exists by default + # (e.g. /app/helpers/test/outcomes/Test_find_git_root5.test1/tmp.scratch) + # This preventing the exception from being raised. + # We need a structure without `.git` for this test. + self.temp_dir = tempfile.TemporaryDirectory() + # Create arbitrary directory that is not a git repo. + self.arbitrary_dir = os.path.join(self.temp_dir.name, "arbitrary_dir") + hio.create_dir(self.arbitrary_dir, incremental=False) + # Create arbitrary directory that is a submodule or linked repo that + # point to non existing super repo. + self.repo_dir = os.path.join(self.temp_dir.name, "broken_repo") + hio.create_dir(self.repo_dir, incremental=False) + # Create an invalid `.git` file with a non-existent `gitdir`. + invalid_git_file = os.path.join(self.repo_dir, ".git") + txt = "gitdir: /nonexistent/path/to/gitdir" + hio.to_file(invalid_git_file, txt) + + def tear_down_test(self) -> None: + self.temp_dir.cleanup() + + def test1(self) -> None: + """ + Check that the error is raised when the caller is in a directory that + is not either a git repo or a submodule. + """ + with ( + hsystem.cd(self.arbitrary_dir), + self.assertRaises(AssertionError) as cm, + ): + _ = hgit.find_git_root(".") + actual = str(cm.exception) + expected = """ + * Failed assertion * + '/' + != + '/' + No .git directory or file found in any parent directory. + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test2(self) -> None: + """ + Check that the error is raised when the caller is in a submodule or + linked repo that points to non existing super repo. + """ + with hsystem.cd(self.repo_dir), self.assertRaises(AssertionError) as cm: + _ = hgit.find_git_root(".") + actual = str(cm.exception) + expected = """ + * Failed assertion * + '/' + != + '/' + Top-level .git directory not found. + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + +# ############################################################################# +# Test_find_git_root6 +# ############################################################################# + + +class Test_find_git_root6(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is a worktree + + Directory structure: + main_repo/ + `-- .git/ + |-- config + `-- worktrees/ + `-- csfy2/ + |-- HEAD + `-- config + csfy2/ (worktree) + `-- .git (points to /main_repo/.git/worktrees/csfy2) + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create main repo with a .git directory. + self.main_repo_dir = os.path.join(temp_dir, "main_repo") + hio.create_dir(self.main_repo_dir, incremental=False) + self.git_dir = os.path.join(self.main_repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create worktree git metadata directory. + self.worktree_git_dir = os.path.join(self.git_dir, "worktrees", "csfy2") + hio.create_dir(self.worktree_git_dir, incremental=False) + # Create worktree directory. + self.worktree_dir = os.path.join(temp_dir, "csfy2") + hio.create_dir(self.worktree_dir, incremental=False) + # Create pointer from worktree to the git directory. + worktree_git_file = os.path.join(self.worktree_dir, ".git") + txt = f"gitdir: {self.worktree_git_dir}\n" + hio.to_file(worktree_git_file, txt) + + def test1(self) -> None: + """ + Check that the function returns the worktree root when called from a worktree. + """ + self.set_up_test() + with hsystem.cd(self.worktree_dir): + git_root = hgit.find_git_root(".") + # For worktrees, the function should return the worktree root, + # not the main repository root. + self.assert_equal(git_root, self.worktree_dir) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py new file mode 100644 index 000000000..2e4a97ca4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py @@ -0,0 +1,406 @@ +import logging +import os +import re +from typing import Any, Callable + +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hpickle as hpickle +import helpers.hstring as hstring +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_is_pickleable +# ############################################################################# + + +def hello() -> bool: + return False + + +# ############################################################################# +# _ClassPickleable +# ############################################################################# + + +class _ClassPickleable: + """ + Class with pickleable param values. + """ + + def __init__(self) -> None: + self._arg1 = 1 + self._arg2 = ["2", 3] + + @staticmethod + def say2(self) -> None: + print("Hello") + + def say(self) -> None: + print("Hello") + + +# ############################################################################# +# _ClassNonPickleable +# ############################################################################# + + +class _ClassNonPickleable: + """ + Class with non-pickleable param values. + """ + + def __init__(self) -> None: + self._arg1 = lambda x: x + self._arg2 = 2 + + +# ############################################################################# +# Test_is_pickleable1 +# ############################################################################# + + +class Test_is_pickleable1(hunitest.TestCase): + def helper( + self, + obj: Any, + exp_str: str, + exp_bound: bool, + exp_lambda: bool, + exp_pickled: bool, + ) -> None: + _LOG.debug("obj=%s", obj) + # + act_str = str(obj) + _LOG.debug("act_str=%s", act_str) + _LOG.debug("exp_str=%s", exp_str) + self.assert_equal(act_str, exp_str, purify_text=True) + # + act_bound = hintros.is_bound_to_object(obj) + _LOG.debug("act_bound=%s", act_bound) + _LOG.debug("exp_bound=%s", exp_bound) + self.assertEqual(act_bound, exp_bound) + # + act_lambda = hintros.is_lambda_function(obj) + _LOG.debug("act_lambda=%s", act_lambda) + _LOG.debug("exp_lambda=%s", exp_lambda) + self.assertEqual(act_lambda, exp_lambda) + # Try to pickle. + try: + file_name = os.path.join(self.get_scratch_space(), "obj.pkl") + hpickle.to_pickle(obj, file_name) + act_pickled = True + except AttributeError as e: + _LOG.error("e=%s", e) + act_pickled = False + _LOG.debug("act_pickled=%s", act_pickled) + _LOG.debug("exp_pickled=%s", exp_pickled) + self.assertEqual(act_pickled, exp_pickled) + + def test_lambda1(self) -> None: + # Local lambda. + lambda_ = lambda: 0 + func = lambda_ + exp_str = r". at 0x>" + # A lambda is not bound to an object. + exp_bound = False + exp_lambda = True + # A lambda is not pickleable. + exp_pickled = False + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_lambda2(self) -> None: + lambda_ = lambda x: x + func = lambda_ + exp_str = r". at 0x>" + # A lambda is not bound to an object. + exp_bound = False + exp_lambda = True + # A lambda is not pickleable. + exp_pickled = False + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_func1(self) -> None: + def _hello() -> bool: + return False + + # + func = _hello + exp_str = ( + r"._hello at 0x>" + ) + exp_bound = False + exp_lambda = False + # A local object is not pickleable. + exp_pickled = False + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_func2(self) -> None: + # Global function. + func = hello + exp_str = r"" + exp_bound = False + exp_lambda = False + # A global function is pickleable since it's not bound locally or + # to an object. + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_method1(self) -> None: + # A class method but unbound to an object. + func = _ClassPickleable.say + exp_str = r"" + exp_bound = False + exp_lambda = False + # A unbound class method is actually pickleable. + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_method2(self) -> None: + # A static class method. + func = _ClassPickleable.say2 + exp_str = r"" + exp_bound = False + exp_lambda = False + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_method3(self) -> None: + # A bound method. + class_instance = _ClassPickleable() + func = class_instance.say + exp_str = r">" + exp_bound = True + exp_lambda = False + # A method bound to an object is just a function, so it's pickleable. + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_method4(self) -> None: + # A static class method. + class_instance = _ClassPickleable() + func = class_instance.say2 + exp_str = r"" + exp_bound = False + exp_lambda = False + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + +# ############################################################################# +# Test_is_pickleable2 +# ############################################################################# + + +class Test_is_pickleable2(hunitest.TestCase): + def helper( + self, + obj: Any, + mode: str, + expected: bool, + ) -> None: + """ + Check that picklebility is detected correctly for specified mode. + """ + _LOG.debug("obj=%s", obj) + actual = hintros.is_pickleable(obj, mode=mode) + _LOG.debug("actual=%s", actual) + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + + def test_non_callable1(self) -> None: + obj = [1, "2", 0.3] + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_non_callable2(self) -> None: + obj = [1, "2", 0.3] + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_lambda1(self) -> None: + obj = lambda x: x + mode = "type_search" + expected = False + self.helper(obj, mode, expected) + + def test_lambda2(self) -> None: + obj = lambda x: x + mode = "try_and_catch" + expected = False + self.helper(obj, mode, expected) + + def test_local_object1(self) -> None: + def _hello() -> bool: + return False + + obj = _hello + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_local_object2(self) -> None: + def _hello() -> bool: + return False + + obj = _hello + mode = "try_and_catch" + expected = False + self.helper(obj, mode, expected) + + def test_global_object1(self) -> None: + obj = hello + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_global_object2(self) -> None: + obj = hello + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_unbound_class_method1(self) -> None: + obj = _ClassPickleable.say + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_unbound_class_method2(self) -> None: + obj = _ClassPickleable.say + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_static_class_method1(self) -> None: + obj = _ClassPickleable.say + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_static_class_method2(self) -> None: + obj = _ClassPickleable.say + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_bound_to_object_method1(self) -> None: + class_instance = _ClassPickleable() + obj = class_instance.say + mode = "type_search" + expected = False + self.helper(obj, mode, expected) + + def test_bound_to_object_method2(self) -> None: + class_instance = _ClassPickleable() + obj = class_instance.say + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_pickleable_class1(self) -> None: + obj = _ClassPickleable() + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_pickleable_class2(self) -> None: + obj = _ClassPickleable() + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_nonpickleable_class1(self) -> None: + obj = _ClassNonPickleable() + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_nonpickleable_class2(self) -> None: + obj = _ClassNonPickleable() + mode = "try_and_catch" + expected = False + self.helper(obj, mode, expected) + + +# ############################################################################# +# Test_get_function_name1 +# ############################################################################# + + +def test_function() -> None: + pass + + +# ############################################################################# +# Test_get_function_name1 +# ############################################################################# + + +class Test_get_function_name1(hunitest.TestCase): + def test1(self) -> None: + actual = hintros.get_function_name() + expected = "test1" + self.assert_equal(actual, expected, purify_text=True) + + +# ############################################################################# +# Test_get_name_from_function1 +# ############################################################################# + + +class Test_get_name_from_function1(hunitest.TestCase): + def test1(self) -> None: + actual = hintros.get_name_from_function(test_function) + actual = hstring.remove_prefix(actual, "amp.", assert_on_error=False) + expected = "helpers.test.test_hintrospection.test_function" + self.assert_equal(actual, expected, purify_text=True) + + +# ############################################################################# +# Test_get_function_from_string1 +# ############################################################################# + + +def dummy_function() -> None: + pass + + +# ############################################################################# +# Test_get_function_from_string1 +# ############################################################################# + + +class Test_get_function_from_string1(hunitest.TestCase): + def test1(self) -> None: + """ + Test that function is correctly extracted from a string. + """ + func_str = "helpers.test.test_hintrospection.dummy_function" + # Compute the actual value. + act_func = hintros.get_function_from_string(func_str) + actual = hintros.get_name_from_function(act_func) + actual = hstring.remove_prefix(actual, "amp.", assert_on_error=False) + # Compute the expected value. + exp_func = dummy_function + expected = hintros.get_name_from_function(exp_func) + expected = hstring.remove_prefix(expected, "amp.", assert_on_error=False) + # Run. + hdbg.dassert_isinstance(act_func, Callable) + # The function can have different names depending on whether `helpers` + # is a sub-repo or a super-repo: + # helpers.test.test_hintrospection.dummy_function + # helpers_root.helpers.test.test_hintrospection.dummy_function + # + actual = re.sub( + r"helpers_root\.helpers\.", "helpers.", actual, flags=re.MULTILINE + ) + expected = re.sub( + r"helpers_root\.helpers\.", "helpers.", expected, flags=re.MULTILINE + ) + self.assert_equal(actual, expected, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py new file mode 100644 index 000000000..cbf1f16f3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py @@ -0,0 +1,225 @@ +import logging +import os + +import numpy as np +import pandas as pd + +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_rename_file_if_exists +# ############################################################################# + + +class Test_rename_file_if_exists(hunitest.TestCase): + """ + Test that the function renames existing files correctly. + """ + + def check_file( + self, + file_to_rename: str, + before_extension: bool, + expected_file_name: str, + ) -> None: + """ + Check that file is renamed correctly. + """ + # Create a target file to rename. + scratch_dir = self.get_scratch_space() + file_name = "test_file.txt" + file_path = os.path.join(scratch_dir, file_name) + lines = "" + hio.to_file(file_path, lines) + # Rename the file. + file_to_rename = os.path.join(scratch_dir, file_to_rename) + suffix = "suffix" + hio.rename_file_if_exists( + file_to_rename, suffix, before_extension=before_extension + ) + # Check that file is renamed. + expected_file_path = os.path.join(scratch_dir, expected_file_name) + self.assertTrue(os.path.exists(expected_file_path)) + + def test1(self) -> None: + """ + Test that suffix is added before an extension. + """ + file_to_rename = "test_file.txt" + before_extension = True + expected_file_name = "test_file.suffix.txt" + self.check_file(file_to_rename, before_extension, expected_file_name) + + def test2(self) -> None: + """ + Test that suffix is added after an extension. + """ + file_to_rename = "test_file.txt" + before_extension = False + expected_file_name = "test_file.txt.suffix" + self.check_file(file_to_rename, before_extension, expected_file_name) + + def test3(self) -> None: + """ + Test that non-existing file is not renamed. + """ + file_to_rename = "not_exist.txt" + before_extension = False + expected_file_name = "not_exist.txt" + with self.assertRaises(AssertionError): + self.check_file(file_to_rename, before_extension, expected_file_name) + + +# ############################################################################# +# Test_find_all_files1 +# ############################################################################# + + +class Test_find_all_files1(hunitest.TestCase): + def test1(self) -> None: + dir_name = hgit.get_client_root(super_module=False) + # Check that there are files. + pattern = "*" + only_files = True + use_relative_paths = True + all_files = hio.listdir( + dir_name, pattern, only_files, use_relative_paths + ) + self.assertGreater(len(all_files), 0) + # Check that there are more files than Python files. + exclude_paired_jupytext = False + py_files = hio.keep_python_files(all_files, exclude_paired_jupytext) + self.assertGreater(len(py_files), 0) + self.assertGreater(len(all_files), len(py_files)) + # Check that there are more Python files than not paired Python files. + exclude_paired_jupytext = True + not_paired_py_files = hio.keep_python_files( + all_files, exclude_paired_jupytext + ) + self.assertGreater(len(not_paired_py_files), 0) + self.assertGreater(len(py_files), len(not_paired_py_files)) + + +# ############################################################################# +# Test_change_filename_extension1 +# ############################################################################# + + +class Test_change_filename_extension1(hunitest.TestCase): + def test1(self) -> None: + file_name = "./core/dataflow_model/notebooks/Master_experiment_runner.py" + actual = hio.change_filename_extension(file_name, "py", "ipynb") + expected = ( + "./core/dataflow_model/notebooks/Master_experiment_runner.ipynb" + ) + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_load_df_from_json +# ############################################################################# + + +class Test_load_df_from_json(hunitest.TestCase): + def test1(self) -> None: + test_json_path = os.path.join(self.get_input_dir(), "test.json") + actual_result = hio.load_df_from_json(test_json_path) + expected_result = pd.DataFrame( + { + "col1": ["a", "b", "c", "d"], + "col2": ["a", "b", np.nan, np.nan], + "col3": ["a", "b", "c", np.nan], + } + ) + actual_result = hpandas.df_to_str(actual_result) + expected_result = hpandas.df_to_str(expected_result) + self.assertEqual(actual_result, expected_result) + + +# ############################################################################# +# Test_safe_rm_file +# ############################################################################# + + +class Test_safe_rm_file(hunitest.TestCase): + def test_successful_removal_within_git_client(self) -> None: + """ + Test successful removal of directory within Git client. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + test_dir = os.path.join(scratch_dir, "test_dir_to_remove") + os.makedirs(test_dir) + # Create a test file in the directory to ensure it has content + test_file = os.path.join(test_dir, "test_file.txt") + hio.to_file(test_file, "test content") + # Verify directory exists before removal + self.assertTrue(os.path.exists(test_dir)) + # Run test. + hio.safe_rm_file(test_dir) + # Check output. + self.assertFalse(os.path.exists(test_dir)) + + def test_removal_of_nested_directory(self) -> None: + """ + Test removal of deeply nested directory structure. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + nested_dir = os.path.join(scratch_dir, "parent", "child", "grandchild") + os.makedirs(nested_dir) + # Create files at different levels + hio.to_file(os.path.join(nested_dir, "file1.txt"), "content1") + hio.to_file( + os.path.join(os.path.dirname(nested_dir), "file2.txt"), "content2" + ) + parent_dir = os.path.join(scratch_dir, "parent") + # Verify directory exists + self.assertTrue(os.path.exists(parent_dir)) + # Run test. + hio.safe_rm_file(parent_dir) + # Check output. + self.assertFalse(os.path.exists(parent_dir)) + + def test_directory_does_not_exist(self) -> None: + """ + Test that function raises assertion error for non-existent directory. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + non_existent_dir = os.path.join(scratch_dir, "non_existent_directory") + # Ensure directory doesn't exist + self.assertFalse(os.path.exists(non_existent_dir)) + # Run test and check output. + with self.assertRaises(AssertionError) as cm: + hio.safe_rm_file(non_existent_dir) + self.assertIn("does not exist", str(cm.exception)) + + def test_cannot_delete_git_root(self) -> None: + """ + Test that function prevents deletion of Git client root directory. + """ + # Prepare inputs. + git_root = hgit.find_git_root() + # Run test and check output. + with self.assertRaises(AssertionError) as cm: + hio.safe_rm_file(git_root) + self.assertIn("Cannot delete Git client root", str(cm.exception)) + + def test_directory_outside_git_client_rejected(self) -> None: + """ + Test that function rejects directories outside Git client. + """ + # Prepare inputs. + # Use /tmp which should be outside any Git client + outside_dir = "/tmp" + # Run test and check output. + with self.assertRaises(AssertionError) as cm: + hio.safe_rm_file(outside_dir) + self.assertIn("is not within Git client root", str(cm.exception)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py new file mode 100644 index 000000000..70450e943 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py @@ -0,0 +1,665 @@ +""" +Unit tests for hlatex module. + +This module tests LaTeX text processing utilities including: +- Removing LaTeX formatting commands +- Detecting LaTeX line separators +- Framing sections with separator lines +- Detecting LaTeX comments +- Extracting section headers and their hierarchy +""" + +import logging + +import helpers.hlatex as hlatex +import helpers.hmarkdown_headers as hmarhead +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +# ############################################################################# +# Test_remove_latex_formatting1 +# ############################################################################# + + +class Test_remove_latex_formatting1(hunitest.TestCase): + """ + Test the remove_latex_formatting function. + """ + + def test1(self) -> None: + """ + Test removal of textcolor commands from LaTeX text. + """ + # Prepare inputs. + txt = r""" + - If there is \textcolor{red}{no pattern}, we can try learning: + - Measure if \textcolor{blue}{learning works}. + - In the \textcolor{orange}{worst case}, conclude that it + \textcolor{green}{does not work}. + - If we can find the \textcolor{purple}{solution in one step} or + \textcolor{cyan}{program the solution}: + - \textcolor{brown}{Machine learning} is not the \textcolor{teal}{recommended + technique}, but it still works. + - Without \textcolor{magenta}{data}, we cannot do anything: + \textcolor{violet}{data is all that matters}. + """ + txt = hprint.dedent(txt) + # Prepare outputs. + expected = r""" + - If there is no pattern, we can try learning: + - Measure if learning works. + - In the worst case, conclude that it + does not work. + - If we can find the solution in one step or + program the solution: + - Machine learning is not the recommended + technique, but it still works. + - Without data, we cannot do anything: + data is all that matters.""" + expected = hprint.dedent(expected) + # Run test. + actual = hlatex.remove_latex_formatting(txt) + # Check outputs. + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_is_latex_line_separator1 +# ############################################################################# + + +class Test_is_latex_line_separator1(hunitest.TestCase): + """ + Test the _is_latex_line_separator function. + """ + + def test1(self) -> None: + """ + Test that a line with repeated # characters is recognized as separator. + """ + # Prepare inputs. + line = "% ##########" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertTrue(actual) + + def test2(self) -> None: + """ + Test that a line with repeated = characters is recognized as separator. + """ + # Prepare inputs. + line = "% ==========" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertTrue(actual) + + def test3(self) -> None: + """ + Test that a line with repeated - characters is recognized as separator. + """ + # Prepare inputs. + line = "% ----------" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertTrue(actual) + + def test4(self) -> None: + """ + Test that a line with too few repeated characters is not a separator. + """ + # Prepare inputs. + line = "% ####" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertFalse(actual) + + def test5(self) -> None: + """ + Test that a regular comment is not recognized as separator. + """ + # Prepare inputs. + line = "% This is a regular comment" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertFalse(actual) + + +# ############################################################################# +# Test_frame_sections1 +# ############################################################################# + + +class Test_frame_sections1(hunitest.TestCase): + """ + Test the frame_sections function. + """ + + def helper(self, input_txt: str, expected: str) -> None: + """ + Helper method to test frame_sections function. + + :param input_txt: Input LaTeX text + :param expected: Expected output after processing + """ + # Prepare inputs. + lines = hprint.dedent(input_txt) + lines = lines.split("\n") + # Run test. + actual = hlatex.frame_sections(lines) + actual = "\n".join(actual) + # Prepare outputs. + expected = hprint.dedent(expected) + # Check outputs. + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test adding separator before a single section command. + """ + # Prepare inputs. + input_txt = r""" + \section{Introduction} + This is the introduction. + """ + # Prepare outputs. + expected = r""" + % ############################################################################## + \section{Introduction} + This is the introduction. + """ + # Run test. + self.helper(input_txt, expected) + + def test2(self) -> None: + """ + Test adding separators before section, subsection, and subsubsection. + """ + # Prepare inputs. + input_txt = r""" + \section{Proposed framework} + + \subsection{Combining Physics-Informed and Data-Driven Approaches} + + \subsubsection{Detailed Analysis} + """ + # Prepare outputs. + expected = r""" + % ############################################################################## + \section{Proposed framework} + + % ============================================================================== + \subsection{Combining Physics-Informed and Data-Driven Approaches} + + % ------------------------------------------------------------------------------ + \subsubsection{Detailed Analysis} + """ + # Run test. + self.helper(input_txt, expected) + + def test3(self) -> None: + """ + Test that existing separators are removed and replaced with correct ones. + """ + # Prepare inputs. + input_txt = r""" + % ============== + \section{Introduction} + + % ############## + \subsection{Background} + """ + # Prepare outputs. + expected = r""" + % ############################################################################## + \section{Introduction} + + % ============================================================================== + \subsection{Background} + """ + # Run test. + self.helper(input_txt, expected) + + def test4(self) -> None: + """ + Test that multiple consecutive empty lines are reduced to one. + """ + # Prepare inputs. + input_txt = r""" + \section{Introduction} + + + + This is text after multiple empty lines. + """ + # Prepare outputs. + expected = r""" + % ############################################################################## + \section{Introduction} + + This is text after multiple empty lines. + """ + # Run test. + self.helper(input_txt, expected) + + def test5(self) -> None: + """ + Test with mixed content including text, sections, and empty lines. + """ + # Prepare inputs. + input_txt = r""" + This is some introductory text. + + \section{Methods} + + We describe the methods here. + + + \subsection{Data Collection} + + Details about data collection. + + \subsubsection{Sampling Strategy} + + Sampling details here. + """ + # Prepare outputs. + expected = r""" + This is some introductory text. + + % ############################################################################## + \section{Methods} + + We describe the methods here. + + % ============================================================================== + \subsection{Data Collection} + + Details about data collection. + + % ------------------------------------------------------------------------------ + \subsubsection{Sampling Strategy} + + Sampling details here. + """ + # Run test. + self.helper(input_txt, expected) + + def test6(self) -> None: + """ + Test that lines without section commands are left unchanged. + """ + # Prepare inputs. + input_txt = r""" + This is regular text. + No sections here. + Just content. + """ + # Prepare outputs. + expected = r""" + This is regular text. + No sections here. + Just content. + """ + # Run test. + self.helper(input_txt, expected) + + +# ############################################################################# +# Test_is_latex_comment +# ############################################################################# + + +class Test_is_latex_comment(hunitest.TestCase): + """ + Test the _is_latex_comment function. + """ + + def test1(self) -> None: + """ + Test that a line starting with % is recognized as a comment. + """ + # Prepare inputs. + line = "% This is a comment" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertTrue(actual) + + def test2(self) -> None: + """ + Test that a line with leading whitespace and % is a comment. + """ + # Prepare inputs. + line = " % This is a comment" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertTrue(actual) + + def test3(self) -> None: + """ + Test that a regular line is not recognized as a comment. + """ + # Prepare inputs. + line = "This is regular text" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertFalse(actual) + + def test4(self) -> None: + """ + Test that a line with escaped % character is not a comment. + """ + # Prepare inputs. + line = r"The value is \% of the total" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertFalse(actual) + + def test5(self) -> None: + """ + Test that a line with % in the middle is not a comment. + """ + # Prepare inputs. + line = r"Text before \% and after" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertFalse(actual) + + def test6(self) -> None: + """ + Test that a line with only % is a comment. + """ + # Prepare inputs. + line = "%" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertTrue(actual) + + +# ############################################################################# +# Test_extract_latex_section +# ############################################################################# + + +class Test_extract_latex_section(hunitest.TestCase): + """ + Test the _extract_latex_section function. + """ + + def helper( + self, line: str, expected_level: int, expected_title: str + ) -> None: + """ + Helper method to test extraction of LaTeX section commands. + + :param line: LaTeX line to parse + :param expected_level: Expected section level (0 if no section) + :param expected_title: Expected title (empty string if no section) + """ + # Prepare inputs - line_number is arbitrary for testing. + line_number = 1 + # Run test. + header_info = hlatex._extract_latex_section(line, line_number) + # Check outputs. + if expected_level == 0: + # No section expected. + self.assertIsNone(header_info) + else: + # Section expected. + self.assertIsNotNone(header_info) + self.assert_equal(str(header_info.level), str(expected_level)) + self.assert_equal(header_info.description, expected_title) + + def test1(self) -> None: + """ + Test extraction of basic section command. + """ + line = r"\section{Introduction}" + self.helper(line, 1, "Introduction") + + def test2(self) -> None: + """ + Test extraction of basic subsection command. + """ + line = r"\subsection{Background}" + self.helper(line, 2, "Background") + + def test3(self) -> None: + """ + Test extraction of basic subsubsection command. + """ + line = r"\subsubsection{Details}" + self.helper(line, 3, "Details") + + def test4(self) -> None: + """ + Test extraction of section with nested LaTeX commands. + """ + line = r"\section{Introduction to \textbf{Machine Learning}}" + self.helper(line, 1, r"Introduction to \textbf{Machine Learning}") + + def test5(self) -> None: + """ + Test extraction of section with optional short title. + """ + line = r"\section[Short Title]{Long Title for Table of Contents}" + # Should extract the long title (in curly braces). + self.helper(line, 1, "Long Title for Table of Contents") + + def test6(self) -> None: + """ + Test extraction of section with escaped special characters. + """ + line = r"\section{Cost Analysis: \$100 \& More}" + self.helper(line, 1, r"Cost Analysis: \$100 \& More") + + def test7(self) -> None: + """ + Test extraction of section with leading whitespace. + """ + line = r" \section{Methods}" + self.helper(line, 1, "Methods") + + def test8(self) -> None: + """ + Test that a regular line is not recognized as a section. + """ + line = "This is regular text" + self.helper(line, 0, "") + + def test9(self) -> None: + """ + Test that section with empty title is not extracted. + """ + line = r"\section{}" + # Sections with empty titles should not be extracted. + self.helper(line, 0, "") + + +# ############################################################################# +# Test_extract_headers_from_latex +# ############################################################################# + + +class Test_extract_headers_from_latex(hunitest.TestCase): + """ + Test the extract_headers_from_latex function. + """ + + def helper(self, lines: str, expected: str, *, max_level: int = 3) -> None: + """ + Helper method to test header extraction from LaTeX documents. + + :param lines: LaTeX document content as a string + :param expected: Expected string representation of header list + :param max_level: Maximum header level to extract (default: 3) + """ + # Prepare inputs. + lines_list = hprint.dedent(lines).split("\n") + # Run test. + actual = hlatex.extract_headers_from_latex( + lines_list, max_level, sanity_check=False + ) + actual_str = hmarhead.header_list_to_str(actual) + # Prepare outputs. + expected = hprint.dedent(expected) + # Check outputs. + self.assert_equal(actual_str, expected) + + def test1(self) -> None: + """ + Test extraction from a basic LaTeX document with multiple section levels. + """ + # Prepare inputs. + lines = r""" + \section{Introduction} + This is the introduction. + + \subsection{Background} + Background information here. + + \section{Methods} + Methods description. + """ + # Prepare outputs. + expected = """ + HeaderInfo(1, 'Introduction', 1) + HeaderInfo(2, 'Background', 4) + HeaderInfo(1, 'Methods', 7)""" + # Run test. + self.helper(lines, expected) + + def test2(self) -> None: + """ + Test that commented-out sections are skipped. + """ + # Prepare inputs. + lines = r""" + \section{Introduction} + % \section{Old Section} + \subsection{Current Subsection} + % \subsection{Old Subsection} + """ + # Prepare outputs. + expected = """ + HeaderInfo(1, 'Introduction', 1) + HeaderInfo(2, 'Current Subsection', 3)""" + # Run test. + self.helper(lines, expected) + + def test3(self) -> None: + """ + Test that only headers up to max_level are extracted. + """ + # Prepare inputs. + lines = r""" + \section{Chapter 1} + \subsection{Section 1.1} + \subsubsection{Section 1.1.1} + """ + # Prepare outputs. + # Should only get section and subsection, not subsubsection. + expected = """ + HeaderInfo(1, 'Chapter 1', 1) + HeaderInfo(2, 'Section 1.1', 2)""" + # Run test. + self.helper(lines, expected, max_level=2) + + def test4(self) -> None: + """ + Test extraction with nested LaTeX commands in titles. + """ + # Prepare inputs. + lines = r""" + \section{Introduction to \textbf{ML}} + \subsection{Using \emph{Neural Networks}} + """ + # Prepare outputs. + expected = r""" + HeaderInfo(1, 'Introduction to \textbf{ML}', 1) + HeaderInfo(2, 'Using \emph{Neural Networks}', 2)""" + # Run test. + self.helper(lines, expected) + + def test5(self) -> None: + """ + Test that line numbers are correctly recorded. + """ + # Prepare inputs. + lines = r""" + Some text here. + + \section{First Section} + More text. + + \subsection{First Subsection} + Even more text. + """ + # Prepare outputs. + # Line numbers should be 3 and 6 (1-indexed). + expected = """ + HeaderInfo(1, 'First Section', 3) + HeaderInfo(2, 'First Subsection', 6)""" + # Run test. + self.helper(lines, expected) + + def test6(self) -> None: + """ + Test extraction from document with no sections. + """ + # Prepare inputs. + lines = """ + This is just regular text. + No sections here. + """ + # Prepare outputs. + expected = "" + # Run test. + self.helper(lines, expected) + + def test7(self) -> None: + """ + Test extraction with all three section levels. + """ + # Prepare inputs. + lines = r""" + \section{Chapter 1} + Introduction to chapter. + + \subsection{Section 1.1} + Section content. + + \subsubsection{Subsection 1.1.1} + Detailed content. + + \subsection{Section 1.2} + More content. + + \section{Chapter 2} + Second chapter. + """ + # Prepare outputs. + expected = """ + HeaderInfo(1, 'Chapter 1', 1) + HeaderInfo(2, 'Section 1.1', 4) + HeaderInfo(3, 'Subsection 1.1.1', 7) + HeaderInfo(2, 'Section 1.2', 10) + HeaderInfo(1, 'Chapter 2', 13)""" + # Run test. + self.helper(lines, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py new file mode 100644 index 000000000..f8d9b237d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py @@ -0,0 +1,176 @@ +import logging +from typing import List, Optional + +import helpers.hlist as hlist +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_list_find_duplicates1 +# ############################################################################# + + +class Test_list_find_duplicates1(hunitest.TestCase): + def test1(self) -> None: + list_ = "a b c d".split() + list_out = hlist.find_duplicates(list_) + self.assertEqual(list_out, []) + + def test2(self) -> None: + list_ = "a b c a d e f f".split() + list_out = hlist.find_duplicates(list_) + self.assertEqual(set(list_out), set("a f".split())) + + +# ############################################################################# +# Test_list_remove_duplicates1 +# ############################################################################# + + +class Test_list_remove_duplicates1(hunitest.TestCase): + def test1(self) -> None: + list_ = "a b c d".split() + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "a b c d".split()) + + def test2(self) -> None: + list_ = "a b c a d e f f".split() + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "a b c d e f".split()) + + def test3(self) -> None: + list_ = "a b c a d e f f".split() + list_ = list(reversed(list_)) + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "f e d a c b".split()) + + +# ############################################################################# +# Test_list_extract1 +# ############################################################################# + + +class Test_list_extract1(hunitest.TestCase): + def _helper( + self, + start_idx: Optional[int], + end_idx: Optional[int], + expected_list: List[str], + ) -> None: + list_ = "a b c d".split() + actual_list = hlist.extract(list_, start_idx, end_idx) + self.assertEqual(actual_list, expected_list) + + def test1(self) -> None: + start_idx = 0 + end_idx = 1 + expected_list = "a".split() + self._helper(start_idx, end_idx, expected_list) + + def test2(self) -> None: + start_idx = 1 + end_idx = None + expected_list = "b c d".split() + self._helper(start_idx, end_idx, expected_list) + + def test3(self) -> None: + start_idx = None + end_idx = None + expected_list = "a b c d".split() + self._helper(start_idx, end_idx, expected_list) + + def test4(self) -> None: + start_idx = None + end_idx = 2 + expected_list = "a b".split() + self._helper(start_idx, end_idx, expected_list) + + def test5(self) -> None: + start_idx = None + end_idx = 2 + expected_list = "a b".split() + self._helper(start_idx, end_idx, expected_list) + + def test6(self) -> None: + start_idx = 0 + end_idx = 4 + expected_list = "a b c d".split() + self._helper(start_idx, end_idx, expected_list) + + def test7(self) -> None: + start_idx = 0 + end_idx = 3 + expected_list = "a b c".split() + self._helper(start_idx, end_idx, expected_list) + + +# ############################################################################# +# Test_list_chunk1 +# ############################################################################# + + +class Test_list_chunk1(hunitest.TestCase): + def _helper(self, n: int, expected_list: List[List[str]]) -> None: + list_ = "a b c d e f".split() + actual_list = hlist.chunk(list_, n) + self.assertEqual(actual_list, expected_list) + + def test1(self) -> None: + n = 1 + expected_list = ["a b c d e f".split()] + self._helper(n, expected_list) + + def test2(self) -> None: + n = 2 + expected_list = [["a", "b", "c"], ["d", "e", "f"]] + self._helper(n, expected_list) + + def test3(self) -> None: + n = 3 + expected_list = [["a", "b"], ["c", "d"], ["e", "f"]] + self._helper(n, expected_list) + + def test4(self) -> None: + n = 4 + expected_list = [["a", "b"], ["c", "d"], ["e"], ["f"]] + self._helper(n, expected_list) + + def test5(self) -> None: + n = 6 + expected_list = [["a"], ["b"], ["c"], ["d"], ["e"], ["f"]] + self._helper(n, expected_list) + + +# ############################################################################# +# Test_list1 +# ############################################################################# + + +class Test_list1(hunitest.TestCase): + def test_find_duplicates1(self) -> None: + list_ = "a b c d".split() + list_out = hlist.find_duplicates(list_) + self.assertEqual(list_out, []) + + def test_find_duplicates2(self) -> None: + list_ = "a b c a d e f f".split() + list_out = hlist.find_duplicates(list_) + self.assertEqual(set(list_out), set("a f".split())) + + def test_remove_duplicates1(self) -> None: + list_ = "a b c d".split() + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "a b c d".split()) + + def test_remove_duplicates2(self) -> None: + list_ = "a b c a d e f f".split() + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "a b c d e f".split()) + + def test_remove_duplicates3(self) -> None: + list_ = "a b c a d e f f".split() + list_ = list(reversed(list_)) + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "f e d a c b".split()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py new file mode 100644 index 000000000..820d21519 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py @@ -0,0 +1,361 @@ +import os +import types +import unittest.mock as umock +from typing import Any, Dict + +import pandas as pd +import pytest + +pytest.importorskip("openai") # noqa: E402 # pylint: disable=wrong-import-position +import helpers.hdbg as hdbg # noqa: E402 +import helpers.hllm as hllm # noqa: E402 +import helpers.hunit_test as hunitest # noqa: E402 + +_USER_PROMPT1 = "what is machine learning?" +_USER_PROMPT2 = _USER_PROMPT1.upper() + +_SYSTEM_PROMPT1 = "You are a helpful AI assistant." +_SYSTEM_PROMPT2 = ( + "You are a helpful AI assistant and excellent in explaining things." +) + +_TEMPERATURE1 = 0.1 +_TEMPERATURE2 = 0.2 + +_TOP_P1 = 0.5 + +_MODEL1 = "gpt-4o-mini" +_MODEL2 = "gpt-3.5-turbo" +_MODEL3 = "deepseek/deepseek-r1-0528-qwen3-8b:free" +_MODEL4 = "openai/gpt-4o-mini" + + +# Test functions for the unit tests. +def _get_completion_parameters1() -> Dict[str, Any]: + data = { + "user_prompt": _USER_PROMPT1, + "system_prompt": _SYSTEM_PROMPT1, + "temperature": _TEMPERATURE1, + "model": _MODEL1, + } + return data + + +def _get_completion_parameters2() -> Dict[str, Any]: + data = { + "user_prompt": _USER_PROMPT2, + "system_prompt": _SYSTEM_PROMPT2, + "temperature": _TEMPERATURE2, + "model": _MODEL2, + "top_p": _TOP_P1, + } + return data + + +def _get_completion_parameters3() -> Dict[str, Any]: + data = { + "user_prompt": _USER_PROMPT2, + "system_prompt": _SYSTEM_PROMPT2, + "temperature": _TEMPERATURE2, + "model": _MODEL3, + "top_p": _TOP_P1, + } + return data + + +def _get_completion_parameters4() -> Dict[str, Any]: + data = { + "user_prompt": _USER_PROMPT1, + "system_prompt": _SYSTEM_PROMPT1, + "temperature": _TEMPERATURE1, + "model": _MODEL4, + } + return data + + +# ############################################################################# +# Test_get_completion +# ############################################################################# + + +class Test_get_completion(hunitest.TestCase): + def test1(self) -> None: + """ + Verify that get_completion() returns response from cache with the + expected response. + """ + parameters1 = _get_completion_parameters1() + actual_response = hllm.get_completion( + **parameters1, cache_mode="HIT_CACHE_OR_ABORT" + ) + self.assertIsInstance(actual_response, str) + self.check_string(actual_response) + + def test2(self) -> None: + """ + Verify with different openai models. + """ + parameters2 = _get_completion_parameters2() + actual_response = hllm.get_completion( + **parameters2, cache_mode="HIT_CACHE_OR_ABORT" + ) + self.assertIsInstance(actual_response, str) + self.check_string(actual_response) + + def test3(self) -> None: + """ + Verify if hllm.get_completion() support openrouter models. + """ + parameters3 = _get_completion_parameters3() + actual_response = hllm.get_completion( + **parameters3, cache_mode="HIT_CACHE_OR_ABORT" + ) + self.assertIsInstance(actual_response, str) + self.check_string(actual_response) + + def test4(self) -> None: + """ + Verify with OpenAI-prefixed models. + """ + parameters4 = _get_completion_parameters4() + actual_response = hllm.get_completion( + **parameters4, cache_mode="HIT_CACHE_OR_ABORT" + ) + self.assertIsInstance(actual_response, str) + self.check_string(actual_response) + + +# ############################################################################# +# Test_response_to_txt +# ############################################################################# + + +class Test_response_to_txt(hunitest.TestCase): + # Dummy classes to satisfy `isinstance` checks. + + class DummyChatCompletion: + def __init__(self, text: str = "") -> None: + msg = types.SimpleNamespace(content=text) + choice = types.SimpleNamespace(message=msg) + self.choices = [choice] + + class DummyThreadMessage: + def __init__(self, text: str = "") -> None: + # mimic .content[0].text.value + value_obj = types.SimpleNamespace(value=text) + text_obj = types.SimpleNamespace(text=value_obj) + self.content = [text_obj] + + @umock.patch( + "openai.types.chat.chat_completion.ChatCompletion", + new=DummyChatCompletion, + ) + def test_chat_completion_branch(self) -> None: + resp = Test_response_to_txt.DummyChatCompletion("hello chat") + actual = hllm.response_to_txt(resp) + expected = "hello chat" + self.assert_equal(actual, expected) + + @umock.patch( + "openai.types.beta.threads.message.Message", + new=DummyThreadMessage, + ) + def test_thread_message_branch(self) -> None: + resp = Test_response_to_txt.DummyThreadMessage("thread reply") + actual = hllm.response_to_txt(resp) + expected = "thread reply" + self.assert_equal(actual, expected) + + def test_str_pass_through(self) -> None: + actual = hllm.response_to_txt("just a string") + expected = "just a string" + self.assert_equal(actual, expected) + + def test_unknown_type_raises(self) -> None: + with self.assertRaises(ValueError) as cm: + hllm.response_to_txt(12345) + self.assertIn("Unknown response type", str(cm.exception)) + + +# ############################################################################# +# Test_retrieve_openrouter_model_info +# ############################################################################# + + +class Test_retrieve_openrouter_model_info(hunitest.TestCase): + @umock.patch("requests.get") + def test_retrieve_success(self, mock_get) -> None: + # Prepare dummy JSON data. + data = [ + {"id": "model1", "name": "Model One"}, + {"id": "model2", "name": "Model Two"}, + ] + mock_response = umock.Mock() + mock_response.json.return_value = {"data": data} + mock_get.return_value = mock_response + # Call the function under test. + df = hllm._retrieve_openrouter_model_info() + # Build expected DataFrame. + expected_df = pd.DataFrame(data) + # Verify DataFrame content. + self.assertEqual( + df.to_dict(orient="records"), expected_df.to_dict(orient="records") + ) + # Ensure the correct URL was requested. + mock_get.assert_called_once_with("https://openrouter.ai/api/v1/models") + + @umock.patch("requests.get") + def test_missing_data_key_raises(self, mock_get) -> None: + # JSON missing the 'data' key. + mock_response = umock.Mock() + mock_response.json.return_value = {"wrong": []} + mock_get.return_value = mock_response + # Expect an assertion from hdbg.dassert_eq. + with self.assertRaises(AssertionError): + hllm._retrieve_openrouter_model_info() + + +# ############################################################################# +# Test_save_models_info_to_csv +# ############################################################################# + + +class Test_save_models_info_to_csv(hunitest.TestCase): + def get_temp_path(self) -> str: + """ + Helper function for creating temporary directory. + """ + self.tmp_dir = self.get_scratch_space() + tmp_file_name = "tmp.models_info.csv" + self.tmp_path = os.path.join(self.tmp_dir, tmp_file_name) + return self.tmp_path + + def test_save_models_info(self) -> None: + """ + Save Dataframe as a CSV and check. + """ + # Prepare a DataFrame with extra columns. + data = [ + { + "id": "m1", + "name": "Model1", + "description": "desc1", + "pricing": {"prompt": 0.1, "completion": 0.2}, + "supported_parameters": ["a", "b"], + "extra_col": 123, + }, + { + "id": "m2", + "name": "Model2", + "description": "desc2", + "pricing": {"prompt": 0.3, "completion": 0.4}, + "supported_parameters": ["c"], + "extra_col": 456, + }, + ] + df = pd.DataFrame(data) + output_file: str = self.get_temp_path() + # Call the function under test. + returned_df = hllm._save_models_info_to_csv(df, output_file) + # The returned DataFrame should have only the selected columns. + expected_columns = [ + "id", + "name", + "description", + "prompt_pricing", + "completion_pricing", + "supported_parameters", + ] + hdbg.dassert_eq(list(returned_df.columns), expected_columns) + # Verify pricing values are extracted correctly. + self.assert_equal( + str(returned_df["prompt_pricing"]), + str(pd.Series([0.1, 0.3], name="prompt_pricing", dtype=float)), + ) + self.assert_equal( + str(returned_df["completion_pricing"]), + str(pd.Series([0.2, 0.4], name="completion_pricing", dtype=float)), + ) + # File should be created and readable. + hdbg.dassert_file_exists(output_file) + saved_df = pd.read_csv(output_file) + self.assert_equal( + str(returned_df["completion_pricing"]), + str(saved_df["completion_pricing"]), + ) + self.assert_equal( + str(returned_df["prompt_pricing"]), str(saved_df["prompt_pricing"]) + ) + + +# ############################################################################# +# Test_calculate_cost +# ############################################################################# + + +class Test_calculate_cost(hunitest.TestCase): + def get_tmp_path(self) -> str: + """ + Return temporary file path. + """ + self.tmp_dir = self.get_scratch_space() + tmp_file_name: str = "tmp.models_info.csv" + self.tmp_path = os.path.join(self.tmp_dir, tmp_file_name) + return self.tmp_path + + def test_openai_cost(self) -> None: + """ + Known OpenAI model and token counts produce expected cost. + """ + comp = types.SimpleNamespace( + usage=types.SimpleNamespace( + prompt_tokens=1000000, completion_tokens=2000000 + ) + ) + llm_cost_tracker = hllm.LLMCostTracker() + cost = llm_cost_tracker.calculate_cost( + comp, model="gpt-3.5-turbo", models_info_file="" + ) + # 1000000*(0.5/1000000) + 20000000*(1.5/1000000) = 3.5 + self.assertAlmostEqual(cost, 3.5) + + def test_openai_unknown_model(self) -> None: + """ + Passing an unknown OpenAI model should raise an assertion or + ValueError. + """ + comp = types.SimpleNamespace( + usage=types.SimpleNamespace(prompt_tokens=1, completion_tokens=1) + ) + llm_cost_tracker = hllm.LLMCostTracker() + with pytest.raises(AssertionError): + llm_cost_tracker.calculate_cost( + comp, model="nonexistent-model", models_info_file="" + ) + + def test_openrouter_load_existing_csv(self) -> None: + """ + Assume that the CSV file exists for OpenRouter. + + Then we should load CSV and calculate cost without fetching. + """ + # Write a tiny CSV: id,prompt_pricing,completion_pricing + temp_csv_file = self.get_tmp_path() + pd.DataFrame( + { + "id": ["deepseek/m1"], + "prompt_pricing": [0.1], + "completion_pricing": [0.2], + } + ).to_csv(temp_csv_file, index=False) + comp = types.SimpleNamespace( + usage=types.SimpleNamespace(prompt_tokens=1, completion_tokens=1) + ) + llm_cost_tracker = hllm.LLMCostTracker() + cost = llm_cost_tracker.calculate_cost( + comp, + model="deepseek/m1", + models_info_file=temp_csv_file, + ) + # 1*0.1 + 1*0.2 = 0.1 + 0.2 = 0.3 + self.assertAlmostEqual(cost, 0.3) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py new file mode 100644 index 000000000..fc684420b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py @@ -0,0 +1,1403 @@ +import logging +import os +import time +from typing import Callable, Dict, Optional + +import pandas as pd +import pytest + +import helpers.hcache_simple as hcacsimp +import helpers.hio as hio +import helpers.hllm_cli as hllmcli +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +from helpers.test.test_hcache_simple import _BaseCacheTest + +_LOG = logging.getLogger(__name__) + +# Disable calling LLM when testing. +_RUN_REAL_LLM = False +# _RUN_REAL_LLM = True + +# ############################################################################# +# Test_apply_llm_with_files +# ############################################################################# + +# Test cases shared across both library and executable tests. +# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. +_TEST_CASES = [ + # llm_cli.py --input_file input.txt --output_file output.txt + ( + "Basic usage with input file", + {}, + ), + # llm_cli.py --input_file input.txt --output_file output.txt --system_prompt "You are a helpful math assistant. Solve the problem step by step." + ( + "With custom system prompt", + { + "system_prompt": "You are a helpful math assistant. Solve the problem step by step." + }, + ), + # llm_cli.py --input_file input.txt --output_file output.txt --model gpt-4 + ( + "With specific model selection", + {"model": "gpt-4"}, + ), + # llm_cli.py --input_file input.txt --output_file output.txt --expected_num_chars 500 + ( + "With progress bar (expected character count)", + {"expected_num_chars": 500}, + ), + # llm_cli.py --input_file input.txt --output_file output.txt --system_prompt "You are a helpful assistant that provides concise answers" --model gpt-4o-mini --expected_num_chars 1000 + ( + "Complete example with all options", + { + "system_prompt": "You are a helpful assistant that provides concise answers", + "model": "gpt-4o-mini", + "expected_num_chars": 1000, + }, + ), +] + +# Test cases for input_text functionality. +# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. +_TEST_CASES_INPUT_TEXT = [ + # llm_cli.py --input_text "2+2=" --output_file output.txt + ( + "Basic usage with input text", + { + "input_text": "2+2=", + }, + ), + # llm_cli.py --input_text "What is Python?" --output_file output.txt --system_prompt "You are a helpful assistant" + ( + "With input text and system prompt", + { + "input_text": "What is Python?", + "system_prompt": "You are a helpful assistant", + }, + ), + # llm_cli.py --input_text "Explain recursion" --output_file output.txt --model gpt-4o-mini + ( + "With input text and specific model", + { + "input_text": "Explain recursion", + "model": "gpt-4o-mini", + }, + ), +] + +# Test cases for print_only functionality. +# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. +_TEST_CASES_PRINT_ONLY = [ + # llm_cli.py --input_text "2+2=" --output_file - + ( + "Print to screen with input text", + { + "input_text": "2+2=", + "print_only": True, + }, + ), +] + + +# ############################################################################# +# TestApplyLlmBase +# ############################################################################# + + +class TestApplyLlmBase(_BaseCacheTest): + """ + Base class with helper methods for testing apply_llm functions. + + Provides common helper methods used across different test classes to + reduce code duplication and maintain consistency. + """ + + def _run_test_cases(self, use_llm_executable: bool) -> None: + """ + Helper method to run test cases with specified interface. + + :param use_llm_executable: if True, use CLI executable; if False, use library + """ + # Get scratch space for test files. + scratch_dir = self.get_scratch_space() + # Create input file. + input_file = os.path.join(scratch_dir, "input.txt") + hio.to_file(input_file, "2+2=") + # Run each test case. + for idx, (description, kwargs) in enumerate(_TEST_CASES, 1): + _LOG.info("Running test case %d: %s", idx, description) + output_file = os.path.join(scratch_dir, f"output_{idx}.txt") + # Run test. + hllmcli.apply_llm_with_files( + input_file=input_file, + output_file=output_file, + use_llm_executable=use_llm_executable, + **kwargs, + ) + # Check that output file was created. + self.assertTrue(os.path.exists(output_file)) + # Check that output file is not empty. + output_content = hio.from_file(output_file) + self.assertGreater(len(output_content), 0) + + def _run_test_cases_input_text(self, use_llm_executable: bool) -> None: + """ + Helper method to run input_text test cases with specified interface. + + :param use_llm_executable: if True, use CLI executable; if False, use library + """ + # Get scratch space for test files. + scratch_dir = self.get_scratch_space() + # Run each test case. + for idx, (description, kwargs) in enumerate(_TEST_CASES_INPUT_TEXT, 1): + _LOG.info("Running test case %d: %s", idx, description) + output_file = os.path.join(scratch_dir, f"output_text_{idx}.txt") + # Extract input_text from kwargs. + kwargs_copy = kwargs.copy() + input_text = kwargs_copy.pop("input_text") + # Run test using apply_llm directly. + response = hllmcli.apply_llm( + input_text, + use_llm_executable=use_llm_executable, + **kwargs_copy, + ) + # Write output to file. + hio.to_file(output_file, response) + # Check that output file was created. + self.assertTrue(os.path.exists(output_file)) + # Check that output file is not empty. + output_content = hio.from_file(output_file) + self.assertGreater(len(output_content), 0) + + +# ############################################################################# +# Test_apply_llm_with_files1 +# ############################################################################# + + +@pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", +) +class Test_apply_llm_with_files1(TestApplyLlmBase): + """ + Test apply_llm_with_files using both library and executable interfaces. + + Tests run various command-line configurations to ensure they execute + without errors. Does not verify output correctness. + """ + + def test_library(self) -> None: + """ + Test multiple command-line configurations using library interface. + + Tests various command-line argument combinations to ensure they + execute without errors. Does not verify output correctness. + """ + self._run_test_cases(use_llm_executable=False) + + @pytest.mark.skipif( + not hllmcli._check_llm_executable(), reason="llm executable not found" + ) + def test_executable(self) -> None: + """ + Test multiple command-line configurations using executable interface. + + Tests various command-line argument combinations to ensure they + execute without errors. Does not verify output correctness. + """ + self._run_test_cases(use_llm_executable=True) + + +# ############################################################################# +# Test_apply_llm_with_files2 +# ############################################################################# + + +@pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", +) +class Test_apply_llm_with_files2(TestApplyLlmBase): + def test1_library(self) -> None: + """ + Test input_text parameter using library interface. + + Tests that input_text parameter works correctly when text is provided + directly instead of from a file. Does not verify output correctness. + """ + self._run_test_cases_input_text(use_llm_executable=False) + + @pytest.mark.skipif( + not hllmcli._check_llm_executable(), reason="llm executable not found" + ) + def test1_executable(self) -> None: + """ + Test input_text parameter using executable interface. + + Tests that input_text parameter works correctly when text is provided + directly instead of from a file. Does not verify output correctness. + """ + self._run_test_cases_input_text(use_llm_executable=True) + + # ////////////////////////////////////////////////////////////////////////// + + def _run_test_cases_print_only(self, use_llm_executable: bool) -> None: + """ + Helper method to run print_only test cases with specified interface. + + :param use_llm_executable: if True, use CLI executable; if False, use library + """ + # Run each test case. + for idx, (description, kwargs) in enumerate(_TEST_CASES_PRINT_ONLY, 1): + _LOG.info("Running test case %d: %s", idx, description) + # Extract parameters from kwargs. + kwargs_copy = kwargs.copy() + input_text = kwargs_copy.pop("input_text") + kwargs_copy.pop("print_only") # Not needed for apply_llm + # Run test using apply_llm directly - this should print to stdout. + response = hllmcli.apply_llm( + input_text, + use_llm_executable=use_llm_executable, + **kwargs_copy, + ) + # Print response to stdout (simulating print_only behavior). + print(response) + + def test2_library(self) -> None: + """ + Test print_only parameter using library interface. + + Tests that print_only parameter works correctly when output should be + printed to screen instead of written to file. Does not verify output + correctness. + """ + self._run_test_cases_print_only(use_llm_executable=False) + + @pytest.mark.skipif( + not hllmcli._check_llm_executable(), reason="llm executable not found" + ) + def test2_executable(self) -> None: + """ + Test print_only parameter using executable interface. + + Tests that print_only parameter works correctly when output should be + printed to screen instead of written to file. Does not verify output + correctness. + """ + self._run_test_cases_print_only(use_llm_executable=True) + + +# ############################################################################# +# Test_llm1 +# ############################################################################# + + +@pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", +) +class Test_llm1(hunitest.TestCase): + """ + Test _llm() function with different models and prompt lengths. + + Tests verify that _llm() correctly processes prompts of varying lengths + across different models, and tracks timing and cost information. + """ + + @staticmethod + def get_short_prompt() -> str: + """ + Get a short test prompt. + + :return: short system prompt string + """ + prompt = "You are a helpful assistant. Answer concisely." + return prompt + + @staticmethod + def get_medium_prompt() -> str: + """ + Get a medium-length test prompt. + + :return: medium-length system prompt string + """ + prompt = """ + You are a helpful assistant. Your task is to provide clear and + accurate answers to questions. Always be concise but thorough in + your explanations. If you don't know something, acknowledge it. + Use simple language that anyone can understand. + """ + prompt = hprint.dedent(prompt) + return prompt + + @staticmethod + def get_long_prompt() -> str: + """ + Get a long test prompt. + + :return: long system prompt string + """ + prompt = """ + You are a highly knowledgeable AI assistant with expertise across + multiple domains including technology, science, mathematics, and + general knowledge. Your primary objectives are: + + 1. Provide accurate and well-researched information + 2. Explain concepts clearly and thoroughly + 3. Use examples when they help clarify complex topics + 4. Cite sources or acknowledge uncertainty when appropriate + 5. Adapt your language to the user's level of understanding + 6. Break down complex problems into manageable steps + 7. Verify calculations and logical reasoning before responding + 8. Consider multiple perspectives when discussing controversial topics + + When answering questions: + - Start with a direct answer to the question + - Follow with supporting details and context + - Use bullet points or numbered lists for clarity + - Provide examples when helpful + - Suggest follow-up resources if relevant + + Always maintain a professional, helpful, and respectful tone. + """ + prompt = hprint.dedent(prompt) + return prompt + + def test1(self) -> None: + """ + Test _llm() with multiple models and prompt lengths. + + Tests short, medium, and long prompts across different models to + verify proper handling and cost calculation. Reports results in a + comprehensive table with time, cost, and cost-per-character metrics. + """ + hcacsimp.set_cache_property("_test_llm", "mode", "DISABLE_CACHE") + # Define test configurations with model-specific inputs. + # Questions are designed to elicit longer responses for more accurate cost + # comparisons. + test_configs = [ + ( + "gpt-5-nano", + "Explain the concept of machine learning and provide examples of its applications in real-world scenarios.", + ), + ( + "gpt-4o-mini", + "Describe the history and culture of Paris, France, including its major landmarks and contributions to art and literature.", + ), + ( + "gpt-4o", + "Explain what recursion is in computer science, provide multiple examples with code, and discuss when to use recursion versus iteration.", + ), + ] + # Store results for tabular reporting. + results = [] + # Run tests for each model and prompt type combination. + for model, input_str in test_configs: + for prompt_type, prompt_getter in [ + ("short", self.get_short_prompt), + ("medium", self.get_medium_prompt), + ("long", self.get_long_prompt), + ]: + _LOG.info("Testing model=%s with %s prompt", model, prompt_type) + system_prompt = prompt_getter() + # Run test. + start_time = time.time() + response, cost = hllmcli._llm(system_prompt, input_str, model) + elapsed_time = time.time() - start_time + # Check outputs. + self.assertIsInstance(response, str) + self.assertGreater(len(response), 0) + self.assertIsInstance(cost, float) + self.assertGreaterEqual(cost, 0.0) + # Calculate cost per character and cost per 1M characters. + response_len = len(response) + cost_per_char = cost / response_len if response_len > 0 else 0.0 + cost_per_1m_chars = ( + cost_per_char * 1_000_000 if response_len > 0 else 0.0 + ) + # Store results. + results.append( + { + "Model": model, + "Prompt Type": prompt_type, + "Time (s)": elapsed_time, + "Cost ($)": cost, + "Response Length": response_len, + "Cost/Char ($)": cost_per_char, + "Cost/1M Chars ($)": cost_per_1m_chars, + } + ) + # Create DataFrame for tabular display. + results_df = pd.DataFrame(results) + # Format numeric columns. + results_df["Time (s)"] = results_df["Time (s)"].round(2) + results_df["Cost ($)"] = results_df["Cost ($)"].round(6) + results_df["Cost/Char ($)"] = results_df["Cost/Char ($)"].round(8) + results_df["Cost/1M Chars ($)"] = results_df["Cost/1M Chars ($)"].round( + 2 + ) + # Log results table. + _LOG.info("\n%s", hprint.frame("LLM Test Results")) + with pd.option_context( + "display.max_columns", + None, + "display.max_rows", + None, + "display.width", + None, + "display.max_colwidth", + None, + ): + _LOG.info("\n%s", results_df.to_string(index=False)) + + +# ############################################################################# +# Test_apply_llm_batch1 +# ############################################################################# + + +def _eval_functor(input_str: str, *, delay: float = 0.0) -> str: + """ + Evaluate the input string using eval and return the result as a string. + + :param input_str: mathematical expression to evaluate + :return: result of evaluation as a string + """ + _LOG.debug("input_str='%s'", input_str) + if delay > 0.0: + time.sleep(delay) + result = eval(input_str) + result_str = str(result) + _LOG.debug("-> result_str='%s'", result_str) + return result_str + + +# ############################################################################# +# Test_apply_llm_batch1 +# ############################################################################# + + +class Test_apply_llm_batch1(hunitest.TestCase): + """ + Test and compare three batch processing approaches. + + Tests: + - apply_llm_batch_individual() + - apply_llm_batch_with_shared_prompt() + - apply_llm_batch_combined() + to verify they return consistent results using a testing functor that uses + eval. + """ + + @staticmethod + def get_test_prompt() -> str: + """ + Get a simple test prompt for batch processing. + + :return: system prompt string + """ + prompt = "You are a calculator. Return only the numeric result." + return prompt + + def helper( + self, + model: str, + func: Callable, + testing_functor: Optional[Callable[[str], str]], + ) -> None: + """ + Helper function to run a batch processing function with test inputs. + + :param func: batch processing function to test + :param testing_functor: optional testing functor for mocking + """ + _LOG.trace(hprint.to_str("model func testing_functor")) + # Create test inputs. + prompt = self.get_test_prompt() + input_list = ["2 + 2", "3 * 3", "10 - 5", "20 / 4"] + expected_responses = ["4", "9", "5", "5"] + # Run the function. + responses, cost = func( + prompt=prompt, + input_list=input_list, + model=model, + testing_functor=testing_functor, + ) + # Check basic properties. + responses = [str(int(float(r))) for r in responses] + self.assertEqual(responses, expected_responses) + if testing_functor is None: + self.assertGreater(cost, 0.0) + else: + self.assertEqual(cost, 0.0) + + @pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", + ) + def test_individual1(self) -> None: + """ + Test apply_llm_batch_individual without testing_functor. + + This test uses the real LLM API. + """ + model = "gpt-5-nano" + func = hllmcli.apply_llm_batch_individual + testing_functor = None + self.helper( + model, + func, + testing_functor, + ) + + def test_individual2(self) -> None: + """ + Test apply_llm_batch_individual with testing_functor. + + This test uses a mock calculator instead of the real LLM API. + """ + model = "" + func = hllmcli.apply_llm_batch_individual + testing_functor = _eval_functor + self.helper( + model, + func, + testing_functor, + ) + + @pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", + ) + def test_shared1(self) -> None: + """ + Test apply_llm_batch_with_shared_prompt without testing_functor. + + This test uses the real LLM API. + """ + model = "gpt-5-nano" + func = hllmcli.apply_llm_batch_with_shared_prompt + testing_functor = None + self.helper( + model, + func, + testing_functor, + ) + + def test_shared2(self) -> None: + """ + Test apply_llm_batch_with_shared_prompt with testing_functor. + + This test uses a mock calculator instead of the real LLM API. + """ + model = "" + func = hllmcli.apply_llm_batch_with_shared_prompt + testing_functor = _eval_functor + self.helper( + model, + func, + testing_functor, + ) + + @pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", + ) + def test_combined1(self) -> None: + """ + Test apply_llm_batch_combined without testing_functor. + + This test uses the real LLM API. + """ + model = "gpt-5-nano" + # model = "gpt-4o-mini" + func = hllmcli.apply_llm_batch_combined + testing_functor = None + self.helper( + model, + func, + testing_functor, + ) + + def test_combined2(self) -> None: + """ + Test apply_llm_batch_combined with testing_functor. + + This test uses a mock calculator instead of the real LLM API. + """ + model = "" + func = hllmcli.apply_llm_batch_combined + testing_functor = _eval_functor + self.helper( + model, + func, + testing_functor, + ) + + +# ############################################################################# +# Test_apply_llm_prompt_to_df1 +# ############################################################################# + + +class Test_apply_llm_prompt_to_df1(hunitest.TestCase): + """ + Test apply_llm_prompt_to_df with testing_functor. + + This is used to test the logic around `apply_llm_batch_*()` functions. + """ + + @staticmethod + def _extract_expression(obj) -> str: + """ + Extract mathematical expression from a DataFrame row or string. + + :param obj: either a string or a pandas Series + :return: extracted string for evaluation + """ + if isinstance(obj, pd.Series): + # Extract from DataFrame row. + if "expression" in obj.index: + expr = obj["expression"] + # Handle None, NaN, or empty string. + if pd.isna(expr) or expr == "": + return "" + return str(expr) + return "" + else: + # Already a string. + if pd.isna(obj) or obj == "": + return "" + return str(obj) + + def helper( + self, + df: pd.DataFrame, + batch_size: int, + expected_df: pd.DataFrame, + expected_stats: Dict[str, int], + ) -> None: + """ + Test apply_llm_prompt_to_df with testing_functor that uses eval. + """ + # Prepare inputs. + prompt = "Dummy" + extractor = self._extract_expression + # To test the progress bar. + # delay = 0.5 + delay = 0.0 + testing_functor = lambda input_str: _eval_functor(input_str, delay=delay) + # Run test. + result_df, stats = hllmcli.apply_llm_prompt_to_df( + prompt=prompt, + df=df, + extractor=extractor, + target_col="result", + batch_mode="individual", + batch_size=batch_size, + model="gpt-5-nano", + testing_functor=testing_functor, + use_sys_stderr=True, + ) + # Check outputs. + self.assert_equal(str(result_df), str(expected_df)) + elapsed_time = stats.pop("elapsed_time_in_seconds") + self.assertGreater(elapsed_time, 0.0) + self.assertEqual(stats, expected_stats) + + def helper_test1(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with testing_functor that uses eval. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": ["2 + 3", "10 * 5", "100 - 25", "15 / 3"], + } + ) + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": ["2 + 3", "10 * 5", "100 - 25", "15 / 3"], + "result": ["5", "50", "75", "5.0"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 0, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + def helper_test2(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with larger dataframe and batch_size > 1. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": [ + "1 + 1", + "2 * 3", + "10 - 5", + "20 / 4", + "3 ** 2", + "100 // 3", + "15 % 4", + ], + } + ) + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": [ + "1 + 1", + "2 * 3", + "10 - 5", + "20 / 4", + "3 ** 2", + "100 // 3", + "15 % 4", + ], + "result": ["2", "6", "5", "5.0", "9", "33", "3"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 0, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + def helper_test3(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with pre-filled target column values. + + This test verifies that all rows are processed and pre-filled values + are overwritten with computed results from the testing_functor. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": [ + "5 + 5", + "3 * 4", + "20 - 8", + "16 / 2", + "2 ** 3", + ], + } + ) + # Pre-fill some values in the target column. + df["result"] = [None, "12", None, None, "8"] + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": [ + "5 + 5", + "3 * 4", + "20 - 8", + "16 / 2", + "2 ** 3", + ], + "result": ["10", "12", "12", "8.0", "8"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 0, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + def helper_test4(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with rows that have empty extraction results. + + This test verifies that rows with empty or None expressions are skipped + and marked with empty string in the result column. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": ["5 + 5", "", "10 + 10", None, "15 + 15"], + } + ) + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": ["5 + 5", "", "10 + 10", None, "15 + 15"], + "result": ["10", "", "20", "", "30"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 2, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + def helper_test5(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with batch where all items have missing data. + + This test verifies that batches with all empty/None items are skipped + entirely and the else branch is executed. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": ["1 + 1", "", None, "", "5 + 5"], + } + ) + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": ["1 + 1", "", None, "", "5 + 5"], + "result": ["2", "", "", "", "10"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 3, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + # batch_size=1 + + def test1_num_batch1(self) -> None: + self.helper_test1(batch_size=1) + + def test2_num_batch1(self) -> None: + self.helper_test2(batch_size=1) + + def test3_num_batch1(self) -> None: + self.helper_test3(batch_size=1) + + def test4_num_batch1(self) -> None: + self.helper_test4(batch_size=1) + + def test5_num_batch1(self) -> None: + self.helper_test5(batch_size=1) + + # batch_size=2 + + def test1_num_batch2(self) -> None: + self.helper_test1(batch_size=2) + + def test2_num_batch2(self) -> None: + self.helper_test2(batch_size=2) + + def test3_num_batch2(self) -> None: + self.helper_test3(batch_size=2) + + def test4_num_batch2(self) -> None: + self.helper_test4(batch_size=2) + + def test5_num_batch2(self) -> None: + self.helper_test5(batch_size=2) + + # batch_size=3 + + def test1_num_batch3(self) -> None: + self.helper_test1(batch_size=3) + + def test2_num_batch3(self) -> None: + self.helper_test2(batch_size=3) + + def test3_num_batch3(self) -> None: + self.helper_test3(batch_size=3) + + def test4_num_batch3(self) -> None: + self.helper_test4(batch_size=3) + + def test5_num_batch3(self) -> None: + self.helper_test5(batch_size=3) + + # batch_size=10 + + def test1_num_batch10(self) -> None: + self.helper_test1(batch_size=10) + + def test2_num_batch10(self) -> None: + self.helper_test2(batch_size=10) + + def test3_num_batch10(self) -> None: + self.helper_test3(batch_size=10) + + def test4_num_batch10(self) -> None: + self.helper_test4(batch_size=10) + + def test5_num_batch10(self) -> None: + self.helper_test5(batch_size=10) + + +# ############################################################################# +# Test_apply_llm_prompt_to_df2 +# ############################################################################# + + +# TODO(gp): Convert this into a unit test for apply_llm_prompt. +class Test_apply_llm_prompt_to_df2(_BaseCacheTest): + """ + Test apply_llm_prompt_to_df with mocked cache. + """ + + @staticmethod + def get_test_prompt() -> str: + """ + Get a simple test prompt for LLM. + + This prompt asks the LLM to sum two numbers, providing a simple + and predictable test case. + + :return: system prompt string + """ + prompt = """ + You are a calculator. Given input in the format "a + b", return only + the sum as a number. + + Return ONLY the numeric result, nothing else. + """ + prompt = hprint.dedent(prompt) + return prompt + + @staticmethod + def extract_test_fields(obj) -> str: + """ + Extract test fields from a DataFrame row or string. + + :param obj: either a string or a pandas Series + :return: extracted string for LLM processing + """ + if isinstance(obj, pd.Series): + # Extract from DataFrame row. + if "num1" in obj.index and "num2" in obj.index: + num1 = obj["num1"] + num2 = obj["num2"] + return f"{num1} + {num2}" + return "" + else: + # Already a string. + return obj + + def create_test_df(self) -> pd.DataFrame: + """ + Create a minimal DataFrame with test data (2 rows). + """ + df = pd.DataFrame( + { + "num1": [2, 10], + "num2": [3, 15], + } + ) + return df + + def run_cached_apply_llm_prompt_to_df(self) -> None: + prompt = self.get_test_prompt() + df = self.create_test_df() + prompt = self.get_test_prompt() + extractor = self.extract_test_fields + result_df, _ = hllmcli.apply_llm_prompt_to_df( + prompt=prompt, + df=df, + extractor=extractor, + target_col="sum", + batch_mode="individual", + model="gpt-5-nano", + batch_size=10, + use_sys_stderr=True, + ) + _LOG.debug("result_df=%s", result_df) + # Check outputs. + expected_df = pd.DataFrame( + { + "num1": [2, 10], + "num2": [3, 15], + "sum": ["5", "25"], + } + ) + self.assert_equal(str(result_df), str(expected_df)) + + @pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", + ) + def test1(self) -> None: + """ + Warm up cache by calling apply_llm and save cache to file. + + This test creates a cache by calling apply_llm with test data, + then saves the cache to a file for use in subsequent tests. + """ + # Create a file with the cache content for test2 in the input directory. + input_dir = self.get_input_dir( + test_class_name=self.__class__.__name__, + test_method_name="test2", + ) + hcacsimp.set_cache_dir(input_dir) + # Call apply_llm to warm up the cache for both inputs. + self.run_cached_apply_llm_prompt_to_df() + # Flush the cache to disk to ensure it's saved. + hcacsimp.flush_cache_to_disk("_llm") + func_cache_data = hcacsimp.get_disk_cache("_llm") + # Check that the cache file exists and is not empty. + hcacsimp.sanity_check_function_cache( + func_cache_data, assert_on_empty=True + ) + + def test2(self) -> None: + """ + Test apply_llm_prompt_to_df with mocked cache. + + This test + - loads the cache file created in test1 + - mocks the cache with the data from the cache file + - verifies that apply_llm_prompt_to_df uses the cached values without + hitting the LLM API. + """ + # Prepare inputs. + # # Set up temporary cache directory. + scratch_dir = self.get_scratch_space() + hcacsimp.set_cache_dir(scratch_dir) + # Load the saved cache file from test2's input directory. + input_dir = self.get_input_dir() + # Load the cache data from the cache file. + cache_file = os.path.join(input_dir, "tmp.cache_simple._llm.json") + _LOG.debug("cache_file=%s", cache_file) + func_cache_data = hcacsimp._load_func_cache_data_from_file( + cache_file, "json" + ) + _LOG.debug("func_cache_data=%s", func_cache_data) + hcacsimp.sanity_check_function_cache( + func_cache_data, assert_on_empty=True + ) + _LOG.debug("Loaded func_cache_data=\n%s", func_cache_data) + hcacsimp.mock_cache_from_disk("_llm", func_cache_data) + try: + # Set abort_on_cache_miss to ensure we don't hit the LLM API. + hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", True) + # Run apply_llm_prompt_to_df with mocked cache. + self.run_cached_apply_llm_prompt_to_df() + finally: + # Reset the cache property. + hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", False) + + def test3(self) -> None: + """ + Test apply_llm_prompt_to_df without mocked cache. + + This test verifies that apply_llm_prompt_to_df raises an error when the + cache is missed and abort_on_cache_miss=True. + """ + # Set up temporary cache directory. + scratch_dir = self.get_scratch_space() + hcacsimp.set_cache_dir(scratch_dir) + try: + # Set abort_on_cache_miss to ensure we don't hit the LLM API. + hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", True) + with self.assertRaises(ValueError) as fail: + # Run apply_llm_prompt_to_df without mocked cache. + self.run_cached_apply_llm_prompt_to_df() + self.assertIn("Cache miss", str(fail.exception)) + finally: + # Reset the cache property. + hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", False) + + +# ############################################################################# +# Test_apply_llm_batch_cost_comparison +# ############################################################################# + + +@pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", +) +class Test_apply_llm_batch_cost_comparison(hunitest.TestCase): + """ + Test and compare costs of different batch processing approaches. + + Tests both direct batch function calls and apply_llm_prompt_to_df with + different batch modes. + """ + + @staticmethod + def get_person_industry_prompt() -> str: + """ + Get the industry classification prompt for testing. + + :return: system prompt string + """ + prompt = """ + Given the following list of industries with examples, classify the text into the + corresponding industry: + - Industrial & Built Environment + - Transportation & Logistics + - Consumer & Retail + - Technology & Digital Services + - Health & Life Sciences + - Finance & Professional Services + - Public & Social Sector + - Media, Marketing & Experiences + + You MUST report the industry exactly as one of the options above. Do not + include any other text. + If you are not sure about the industry, return "unknown". + """ + prompt = hprint.dedent(prompt) + return prompt + + @staticmethod + def get_test_industries() -> list: + """ + Get a list of test company descriptions for industry classification. + + :return: list of company descriptions + """ + industries = [ + "A company that sells fresh produce and operates farms", + "A car manufacturer that produces electric vehicles", + "A construction company specializing in residential buildings", + "A company that manufactures consumer electronics and appliances", + "An online learning platform providing courses for students", + "An electric utility company providing power generation services", + "A civil engineering firm providing infrastructure design", + "A company organizing corporate events and conferences", + "A bank providing retail banking and investment services", + "A nonprofit organization focused on environmental conservation", + "A hospital providing emergency and surgical medical services", + "A staffing agency providing recruitment and temp worker services", + "A data center company providing server hardware and infrastructure", + "A software development company creating enterprise resource planning systems", + "A cybersecurity firm providing threat detection and penetration testing", + "A cloud infrastructure provider offering scalable computing resources", + "An IT company providing network management and server maintenance", + "A consulting firm helping businesses integrate SAP and Oracle systems", + "A help desk company providing 24/7 technical support services", + "A data analytics company building business intelligence dashboards", + "A DevOps company providing CI/CD pipeline automation tools", + "A law firm specializing in corporate mergers and acquisitions", + "A shipping company providing international freight and logistics", + "A factory manufacturing industrial machinery and equipment", + "An advertising agency creating brand campaigns for consumer products", + "A streaming service providing movies and TV shows online", + "A pharmaceutical company developing new drugs and vaccines", + "A commercial real estate firm managing office building portfolios", + "An online retailer selling clothing and accessories through eCommerce", + "A sports equipment manufacturer producing gear for athletes", + "A telecommunications company providing mobile and internet services", + "A hotel chain operating luxury resorts and vacation properties", + ] + return industries + + def helper(self, model: str, batch_size: int) -> None: + """ + Compare costs and time of different batch modes in apply_llm_prompt_to_df. + + This test compares the performance of three batch modes: + 1. individual: processes each query separately + 2. shared_prompt: uses shared prompt context + 3. combined: combines all queries into single API call + """ + # Reset cache before each batch mode to ensure fair comparison. + hcacsimp.set_cache_dir(self.get_scratch_space()) + _LOG.info("Cache directory: %s", hcacsimp.get_cache_dir()) + hcacsimp.reset_cache("", interactive=False) + # Prepare inputs. + prompt = self.get_person_industry_prompt() + industries = self.get_test_industries() + testing_functor = None + # Create DataFrame from test data. + df = pd.DataFrame({"description": industries}) + + # Extractor function to get text from DataFrame row. + def extractor(obj): + if isinstance(obj, pd.Series): + return obj["description"] + return str(obj) + + # Test each batch mode. + batch_modes = ["individual", "shared_prompt", "combined"] + results = [] + # Store result DataFrames to compare across batch modes. + result_dfs = {} + for batch_mode in batch_modes: + _LOG.info( + "\n%s", hprint.frame("Testing batch mode: %s" % batch_mode) + ) + # Create a copy of the DataFrame for this batch mode. + df_copy = df.copy() + # Call apply_llm_prompt_to_df with the current batch mode. + result_df, stats = hllmcli.apply_llm_prompt_to_df( + prompt=prompt, + df=df_copy, + extractor=extractor, + target_col="industry", + batch_mode=batch_mode, + model=model, + batch_size=batch_size, + testing_functor=testing_functor, + use_sys_stderr=True, + ) + # Get elapsed time from stats. + elapsed_time = stats["elapsed_time_in_seconds"] + # Print time and cost for this batch mode. + _LOG.info( + "Batch mode '%s': Time=%.2fs, Cost=$%.6f", + batch_mode, + elapsed_time, + stats["total_cost_in_dollars"], + ) + # Store results. + results.append( + { + "Batch Mode": batch_mode, + "Time (s)": elapsed_time, + "Num Items": stats["num_items"], + "Num Skipped": stats["num_skipped"], + "Num Batches": stats["num_batches"], + "Total Cost ($)": stats["total_cost_in_dollars"], + } + ) + # Store result DataFrame for comparison. + result_dfs[batch_mode] = result_df + # Verify results. + self.assertEqual(len(result_df), len(industries)) + self.assertIn("industry", result_df.columns) + # Check that all batch modes produce the same results. + # Compare each batch mode's results with the first batch mode. + first_batch_mode = batch_modes[0] + first_result_df = result_dfs[first_batch_mode]["industry"].reset_index( + drop=True + ) + for batch_mode in batch_modes[1:]: + compare_result_df = result_dfs[batch_mode]["industry"].reset_index( + drop=True + ) + # Create a comparison DataFrame between the two batch modes. + match_df = pd.DataFrame( + { + first_batch_mode: first_result_df, + batch_mode: compare_result_df, + } + ) + # Add a column with whether they match or not. + match_df["Match"] = ( + match_df[first_batch_mode] == match_df[batch_mode] + ) + all_match = match_df["Match"].all() + if not all_match: + _LOG.error( + "Results mismatch between '%s' and '%s':\n%s", + first_batch_mode, + batch_mode, + match_df, + ) + _LOG.info( + "Results match between '%s' and '%s'", + first_batch_mode, + batch_mode, + ) + # Create comparison DataFrame. + comparison_df = pd.DataFrame(results) + # Add relative metrics compared to individual mode. + individual_time = comparison_df.loc[ + comparison_df["Batch Mode"] == "individual", "Time (s)" + ].iloc[0] + individual_cost = comparison_df.loc[ + comparison_df["Batch Mode"] == "individual", "Total Cost ($)" + ].iloc[0] + comparison_df["Time Ratio"] = comparison_df["Time (s)"] / individual_time + comparison_df["Cost Ratio"] = ( + comparison_df["Total Cost ($)"] / individual_cost + ) + # Format the DataFrame for better readability. + comparison_df["Time (s)"] = comparison_df["Time (s)"].round(2) + comparison_df["Total Cost ($)"] = comparison_df["Total Cost ($)"].round( + 6 + ) + comparison_df["Time Ratio"] = comparison_df["Time Ratio"].round(2) + comparison_df["Cost Ratio"] = comparison_df["Cost Ratio"].round(2) + # Print comparison_df without truncation. + with pd.option_context( + "display.max_columns", + None, + "display.max_rows", + None, + "display.width", + None, + "display.max_colwidth", + None, + ): + _LOG.info("Batch mode comparison:\n%s", comparison_df) + + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 17.98 32 4 0.000653 1.00 1.00 + # shared_prompt 17.60 32 4 0.000998 0.98 1.53 + # combined 8.42 32 4 0.000330 0.47 0.51 + # + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 19.27 32 2 0.000651 1.00 1.00 + # shared_prompt 19.34 32 2 0.001385 1.00 2.13 + # combined 7.45 32 2 0.000277 0.39 0.43 + # + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 16.38 32 1 0.000651 1.00 1.00 + # shared_prompt 17.51 32 1 0.002148 1.07 3.30 + # combined 6.15 32 1 0.000251 0.38 0.39 + def test1(self) -> None: + model = "gpt-4o-mini" + batch_size = 8 + self.helper(model, batch_size) + # + batch_size = 16 + self.helper(model, batch_size) + # + batch_size = 32 + self.helper(model, batch_size) + + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 68.57 32 4 0.002711 1.00 1.00 + # shared_prompt 53.07 32 4 0.002638 0.77 0.97 + # combined 29.30 32 4 0.001654 0.43 0.61 + # + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 68.40 32 2 0.002788 1.00 1.00 + # shared_prompt 53.88 32 2 0.002809 0.79 1.01 + # combined 25.99 32 2 0.001643 0.38 0.59 + # + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 59.38 32 1 0.002610 1.00 1.00 + # shared_prompt 52.61 32 1 0.002482 0.89 0.95 + # combined 15.79 32 1 0.001118 0.27 0.43 + def test2(self) -> None: + model = "gpt-5-nano" + batch_size = 8 + self.helper(model, batch_size) + # + batch_size = 16 + self.helper(model, batch_size) + # + batch_size = 32 + self.helper(model, batch_size) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py new file mode 100644 index 000000000..a7e567679 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py @@ -0,0 +1,103 @@ +import asyncio +import logging +from typing import Optional + +import helpers.hasyncio as hasynci +import helpers.hdatetime as hdateti +import helpers.hlogging as hloggin +import helpers.hunit_test as hunitest +import helpers.hwall_clock_time as hwacltim + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +# ############################################################################# +# Test_logging1 +# ############################################################################# + + +class Test_logging1(hunitest.TestCase): + def test_logging_levels1(self) -> None: + hloggin.test_logger() + + +# ############################################################################# + + +# ############################################################################# +# Test_hlogging_asyncio1 +# ############################################################################# + + +class Test_hlogging_asyncio1(hunitest.TestCase): + @staticmethod + async def workload(get_wall_clock_time: hdateti.GetWallClockTime) -> None: + """ + Coroutine simulating a workload waiting for 1s. + """ + # Set the coroutine name. + task = asyncio.current_task() + task.set_name("workload") + + def _print_time() -> None: + true_wall_clock_time = hdateti.get_current_time("ET") + _LOG.debug("wall_clock_time=%s", true_wall_clock_time) + event_loop_time = get_wall_clock_time() + _LOG.debug("event_loop_time=%s", event_loop_time) + + _print_time() + _LOG.debug(" -> wait") + await asyncio.sleep(1.0) + _print_time() + + def run_test( + self, + event_loop: Optional[asyncio.AbstractEventLoop], + get_wall_clock_time: hdateti.GetWallClockTime, + ) -> None: + coroutine = self.workload(get_wall_clock_time) + hasynci.run(coroutine, event_loop=event_loop) + + # pylint: disable=line-too-long + def test_real_time1(self) -> None: + """ + Use the logger. + + The output is like: + + ``` + 07:55:54 hunit_test.py setUp:932 Resetting random.seed to 20000101 + 07:55:54 hunit_test.py setUp:935 Resetting np.random.seed to 20000101 + 07:55:54 hunit_test.py setUp:944 base_dir_name=/app/amp/helpers/test + ``` + """ + # Use the wall clock time with no special event loop. + get_wall_clock_time = lambda: hdateti.get_current_time(tz="ET") + event_loop = None + # Run. + self.run_test(event_loop, get_wall_clock_time) + + # pylint: disable=line-too-long + def test_simulated_time1(self) -> None: + """ + Use the logger with event_loop and asyncio. + + The output is like: + + ``` + 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py _print_time:28 wall_clock_time=2022-01-18 07:52:55.337574-05:00 + 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py _print_time:30 event_loop_time=2022-01-18 07:52:55.310587-05:00 + 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py workload:33 -> wait + ``` + """ + with hasynci.solipsism_context() as event_loop: + # Use the simulate wall clock time. + get_wall_clock_time = lambda: hdateti.get_current_time( + tz="ET", event_loop=event_loop + ) + hwacltim.set_wall_clock_time(get_wall_clock_time) + # Run. + self.run_test(event_loop, get_wall_clock_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py new file mode 100644 index 000000000..2f1653c79 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py @@ -0,0 +1,716 @@ +import logging +import os +from typing import List + +import helpers.hio as hio +import helpers.hmarkdown as hmarkdo +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_remove_bullets +# ############################################################################# + + +class Test_remove_bullets(hunitest.TestCase): + """ + Test the remove_bullets function. + """ + + def helper(self, text: str, expected: str) -> None: + """ + Helper to test remove_bullets function. + + :param text: Input text with bullets + :param expected: Expected output with bullets removed + """ + # Run test. + text = hprint.dedent(text) + actual = hmarkdo.remove_bullets(text) + # Check outputs. + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test basic bullet removal. + """ + # Prepare inputs. + text = """ + - First item + - Second item + - Third item + """ + # Prepare outputs. + expected = """ + First item + Second item + Third item + """ + # Run test. + self.helper(text, expected) + + def test2(self) -> None: + """ + Test nested bullets removal. + """ + # Prepare inputs. + text = """ + - First item + - Nested item + - Another nested + - Second item + """ + # Prepare outputs. + expected = """ + First item + Nested item + Another nested + Second item + """ + # Run test. + self.helper(text, expected) + + def test3(self) -> None: + """ + Test mixed content with bullets and non-bullets. + """ + # Prepare inputs. + text = """ + - Bullet item + Regular text line + - Another bullet + More regular text + """ + # Prepare outputs. + expected = """ + Bullet item + Regular text line + Another bullet + More regular text + """ + # Run test. + self.helper(text, expected) + + def test4(self) -> None: + """ + Test empty lines preservation. + """ + # Prepare inputs. + text = """ + - First item + + - Second item + + - Third item + """ + # Prepare outputs. + expected = """ + First item + + Second item + + Third item + """ + # Run test. + self.helper(text, expected) + + +# ############################################################################# +# Test_bold_first_level_bullets1 +# ############################################################################# + + +class Test_bold_first_level_bullets1(hunitest.TestCase): + def helper(self, text: str, expected: str) -> None: + """ + Helper to test bold_first_level_bullets function. + """ + text = hprint.dedent(text) + lines = text.split("\n") + actual_lines = hmarkdo.bold_first_level_bullets(lines) + actual = "\n".join(actual_lines) + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test basic first-level bullet bolding. + """ + text = r""" + - First item + - Sub item + - Second item + """ + expected = r""" + - **First item** + - Sub item + - **Second item** + """ + self.helper(text, expected) + + def test2(self) -> None: + """ + Test with mixed content including non-bullet text. + """ + text = r""" + Some text here + - First bullet + More text + - Second bullet + - Nested bullet + Final text + """ + expected = r""" + Some text here + - **First bullet** + More text + - **Second bullet** + - Nested bullet + Final text + """ + self.helper(text, expected) + + def test3(self) -> None: + """ + Test with multiple levels of nesting. + """ + text = r""" + - Top level + - Second level + - Third level + - Back to second + - Another top + """ + expected = r""" + - **Top level** + - Second level + - Third level + - Back to second + - **Another top** + """ + self.helper(text, expected) + + def test4(self) -> None: + """ + Test with empty lines between bullets. + """ + text = r""" + - First item + + - Second item + - Sub item + + - Third item + """ + expected = r""" + - **First item** + + - **Second item** + - Sub item + + - **Third item** + """ + self.helper(text, expected) + + def test5(self) -> None: + """ + Test with text that already contains some bold markers. + """ + text = r""" + - First **important** point + - Sub point + - Second point with emphasis + """ + expected = r""" + - First **important** point + - Sub point + - **Second point with emphasis** + """ + self.helper(text, expected) + + +# ############################################################################# +# Test_colorize_bold_text1 +# ############################################################################# + + +class Test_colorize_bold_text1(hunitest.TestCase): + def test1(self) -> None: + """ + Test basic case with single bold text. + """ + text = "This is **bold** text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"This is **\red{bold}** text" + self.assert_equal(actual, expected) + + def test2(self) -> None: + """ + Test multiple bold sections get different colors. + """ + text = "**First** normal **Second** text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"**\red{First}** normal **\teal{Second}** text" + self.assert_equal(actual, expected) + + def test3(self) -> None: + """ + Test underscore style bold text. + """ + text = "This is __bold__ text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"This is **\red{bold}** text" + self.assert_equal(actual, expected) + + def test4(self) -> None: + """ + Test text with no bold sections returns unchanged. + """ + text = "This is plain text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = "This is plain text" + self.assert_equal(actual, expected) + + def test5(self) -> None: + """ + Test mixed bold styles in same text. + """ + text = "**First** and __Second__ bold" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"**\red{First}** and **\teal{Second}** bold" + self.assert_equal(actual, expected) + + def test6(self) -> None: + """ + Test with abbreviations=False uses full \textcolor syntax. + """ + text = "This is **bold** text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=False + ) + expected = r"This is **\textcolor{red}{bold}** text" + self.assert_equal(actual, expected) + + def test7(self) -> None: + """ + Test with multiple bullet lists and different colors. + """ + text = """ + **List 1:** + - First item + - Second item + + **List 2:** + - Another item + - Final item + """ + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r""" + **\red{List 1:}** + - First item + - Second item + + **\teal{List 2:}** + - Another item + - Final item + """ + self.assert_equal(actual, expected) + + def test8(self) -> None: + text = hprint.dedent( + r""" + - **\red{Objective}** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **\orange{Key Components}** + - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - Utility update: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **\blue{Learning Process}** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **\violet{Advantages}** + - More sample-efficient than direct utility estimation + - Leverages structure of the MDP to generalize better + + - **\pink{Challenges}** + - Requires accurate model estimation + - Computational cost of solving Bellman equations repeatedly + + - **\olive{Example}** + - A thermostat estimates room temperature dynamics and uses them to predict + comfort level under a fixed heating schedule + + - **\darkgray{Use Case}** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + ) + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = hprint.dedent( + r""" + - **\red{Objective}** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **\orange{Key Components}** + - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - Utility update: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **\olive{Learning Process}** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **\green{Advantages}** + - More sample-efficient than direct utility estimation + - Leverages structure of the MDP to generalize better + + - **\cyan{Challenges}** + - Requires accurate model estimation + - Computational cost of solving Bellman equations repeatedly + + - **\blue{Example}** + - A thermostat estimates room temperature dynamics and uses them to predict + comfort level under a fixed heating schedule + + - **\darkgray{Use Case}** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + ) + self.assert_equal(actual, expected) + + def test9(self) -> None: + """ + Test basic case with single bold text. + """ + text = "**First** normal **Second** text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"**\red{First}** normal **\teal{Second}** text" + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_format_first_level_bullets1 +# ############################################################################# + + +class Test_format_first_level_bullets1(hunitest.TestCase): + # TODO(ai): Rename -> helper + def format_and_compare_markdown(self, text: str, expected: str) -> None: + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # + lines = text.split("\n") + actual_lines = hmarkdo.format_first_level_bullets(lines) + actual = "\n".join(actual_lines) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test basic case with single first level bullet. + """ + text = """ + Some text + - First bullet + More text""" + expected = """ + Some text + + - First bullet + More text""" + self.format_and_compare_markdown(text, expected) + + def test2(self) -> None: + """ + Test multiple first level bullets. + """ + text = """ + - First bullet + - Second bullet + - Third bullet""" + expected = """ + - First bullet + + - Second bullet + + - Third bullet""" + self.format_and_compare_markdown(text, expected) + + def test3(self) -> None: + """ + Test mixed first level and indented bullets. + """ + text = """ + - First level + + - Second level + - Another second + - Back to first""" + expected = """ + - First level + - Second level + - Another second + + - Back to first""" + self.format_and_compare_markdown(text, expected) + + def test4(self) -> None: + """ + Test mixed content with text and bullets. + """ + text = """ + Some initial text + - First bullet + Some text in between + - Second bullet + Final text""" + expected = """ + Some initial text + + - First bullet + Some text in between + + - Second bullet + Final text""" + self.format_and_compare_markdown(text, expected) + + def test5(self) -> None: + """ + Test nested bullets with multiple levels. + """ + text = """ + - Level 1 + - Level 2 + - Level 3 + - Another level 1 + - Level 2 again""" + expected = """ + - Level 1 + - Level 2 + - Level 3 + + - Another level 1 + - Level 2 again""" + self.format_and_compare_markdown(text, expected) + + def test6(self) -> None: + """ + Test empty lines handling. + """ + text = """ + - First bullet + + - Second bullet + + - Third bullet""" + expected = """ + - First bullet + + - Second bullet + + - Third bullet""" + self.format_and_compare_markdown(text, expected) + + def test7(self) -> None: + """ + Test mixed content with bullets and text. + """ + text = """ + Some text here + - First bullet + More text + - Second bullet + - Nested bullet + Final paragraph + - Last bullet""" + expected = """ + Some text here + + - First bullet + More text + + - Second bullet + - Nested bullet + Final paragraph + + - Last bullet""" + self.format_and_compare_markdown(text, expected) + + def test8(self) -> None: + """ + Test bullets with inline formatting. + """ + text = """ + - **Bold bullet** point + - *Italic nested* bullet + - `Code bullet` here + - **_Mixed_** formatting""" + expected = """ + - **Bold bullet** point + - *Italic nested* bullet + + - `Code bullet` here + - **_Mixed_** formatting""" + self.format_and_compare_markdown(text, expected) + + def test9(self) -> None: + """ + Test bullets with special characters. + """ + text = """ + - Bullet with (parentheses) + - Bullet with [brackets] + - Bullet with {braces} + - Bullet with $math$""" + expected = """ + - Bullet with (parentheses) + - Bullet with [brackets] + + - Bullet with {braces} + - Bullet with $math$""" + self.format_and_compare_markdown(text, expected) + + def test10(self) -> None: + text = hprint.dedent( + r""" + - **Objective** + + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **Key Components** + + - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - **Utility update**: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **Learning Process** + + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **Use Case** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + ) + expected = hprint.dedent( + r""" + - **Objective** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **Key Components** + - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - **Utility update**: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **Learning Process** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **Use Case** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + ) + self.format_and_compare_markdown(text, expected) + + +# ############################################################################# +# Test_process_lines1 +# ############################################################################# + + +class Test_process_lines1(hunitest.TestCase): + # TODO(gp): This doesn't seem correct. + def test1(self) -> None: + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + txt_in = hio.from_file(input_file_path) + txt_in = hprint.dedent(txt_in) + lines = txt_in.split("\n") + out = [] + for i, line in hmarkdo.process_lines(lines): + _LOG.debug(hprint.to_str("line")) + out.append(f"{i}:{line}") + actual = "\n".join(out) + self.check_string( + actual, dedent=True, remove_lead_trail_empty_lines=True + ) + + +# ############################################################################# +# Test_process_code_block1 +# ############################################################################# + + +class Test_process_code_block1(hunitest.TestCase): + def helper(self, txt: str) -> str: + out: List[str] = [] + in_code_block = False + lines = txt.split("\n") + for i, line in enumerate(lines): + _LOG.debug("%s:line=%s", i, line) + # Process the code block. + do_continue, in_code_block, out_tmp = hmarkdo.process_code_block( + line, in_code_block, i, lines + ) + out.extend(out_tmp) + if do_continue: + continue + # + out.append(line) + return "\n".join(out) + + def test1(self) -> None: + # Prepare inputs. + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + txt_in = hio.from_file(input_file_path) + txt_in = hprint.dedent(txt_in, remove_lead_trail_empty_lines_=True) + # Run function. + actual = self.helper(txt_in) + # Check output. + self.check_string( + actual, dedent=True, remove_lead_trail_empty_lines=True + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py new file mode 100644 index 000000000..e33c04dc8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py @@ -0,0 +1,205 @@ +import helpers.hmarkdown as hmarkdo +import helpers.hunit_test as hunitest + + +# ############################################################################# +# Test_process_color_commands1 +# ############################################################################# + + +class Test_process_color_commands1(hunitest.TestCase): + def test_text_content1(self) -> None: + """ + Test with plain text content. + """ + txt_in = r"\red{Hello world}" + expected = r"\textcolor{red}{\text{Hello world}}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + def test_math_content1(self) -> None: + """ + Test color command with mathematical content. + """ + txt_in = r"\blue{x + y = z}" + expected = r"\textcolor{blue}{x + y = z}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + def test_multiple_colors1(self) -> None: + """ + Test multiple color commands in the same line. + """ + txt_in = r"The \red{quick} \blue{fox} \green{jumps}" + expected = r"The \textcolor{red}{\text{quick}} \textcolor{blue}{\text{fox}} \textcolor{darkgreen}{\text{jumps}}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + def test_mixed_content1(self) -> None: + """ + Test color commands with both text and math content. + """ + txt_in = r"\red{Result: x^2 + y^2}" + expected = r"\textcolor{red}{Result: x^2 + y^2}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + def test_nested_braces1(self) -> None: + """ + Test color command with nested braces. + """ + txt_in = r"\blue{f(x) = {x + 1}}" + expected = r"\textcolor{blue}{f(x) = {x + 1}}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_colorize_bullet_points_in_slide1 +# ############################################################################# + + +class Test_colorize_bullet_points_in_slide1(hunitest.TestCase): + def test1(self) -> None: + # Prepare inputs. + text = r""" + - **VC Theory** + - Measures model + + - **Bias-Variance Decomposition** + - Prediction error + - **Bias** + - **Variance** + + - **Computation Complexity** + - Balances model + - Related to + - E.g., Minimum + + - **Bayesian Approach** + - Treats ML as probability + - Combines prior knowledge with observed data to update belief about a model + + - **Problem in ML Theory:** + - Assumptions may not align with practical problems + """ + # Run function. + all_md_colors = [ + "red", + "orange", + "yellow", + "lime", + "green", + "teal", + "cyan", + "blue", + "purple", + "violet", + "magenta", + "pink", + "brown", + "olive", + "gray", + "darkgray", + "lightgray", + "black", + "white", + ] + + actual = hmarkdo.colorize_bullet_points_in_slide( + text, all_md_colors=all_md_colors + ) + # Check output. + expected = r""" + - **\red{VC Theory}** + - Measures model + + - **\orange{Bias-Variance Decomposition}** + - Prediction error + - **\yellow{Bias}** + - **\lime{Variance}** + + - **\green{Computation Complexity}** + - Balances model + - Related to + - E.g., Minimum + + - **\teal{Bayesian Approach}** + - Treats ML as probability + - Combines prior knowledge with observed data to update belief about a model + + - **\cyan{Problem in ML Theory:}** + - Assumptions may not align with practical problems + """ + self.assert_equal(actual, expected) + + def test2(self) -> None: + # Prepare inputs. + text = r""" + * Machine Learning Flow + + ::: columns + :::: {.column width=90%} + - Question + - E.g., "How can we predict house prices?" + - Input data + - E.g., historical data of house sales + + - _"If I were given one hour to save the planet, I would spend 59 minutes + defining the problem and one minute resolving it"_ (Albert Einstein) + + - **Not all phases are equally important!** + - Question $>$ Data $>$ Features $>$ Algorithm + - Clarity of the question impacts project success + - Quality and relevance of data are crucial for performance + - Proper feature selection simplifies the model and improves accuracy + - Algorithm is often less important (contrary to popular belief!) + :::: + :::: {.column width=5%} + + ```graphviz[height=90%] + digraph BayesianFlow { + rankdir=TD; + splines=true; + ... + } + ``` + :::: + ::: + """ + # Run function. + actual = hmarkdo.colorize_bullet_points_in_slide(text) + # Check output. + expected = r""" + * Machine Learning Flow + + ::: columns + :::: {.column width=90%} + - Question + - E.g., "How can we predict house prices?" + - Input data + - E.g., historical data of house sales + + - _"If I were given one hour to save the planet, I would spend 59 minutes + defining the problem and one minute resolving it"_ (Albert Einstein) + + - **\red{Not all phases are equally important!}** + - Question $>$ Data $>$ Features $>$ Algorithm + - Clarity of the question impacts project success + - Quality and relevance of data are crucial for performance + - Proper feature selection simplifies the model and improves accuracy + - Algorithm is often less important (contrary to popular belief!) + :::: + :::: {.column width=5%} + + ```graphviz[height=90%] + digraph BayesianFlow { + rankdir=TD; + splines=true; + ... + } + ``` + :::: + ::: + """ + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py new file mode 100644 index 000000000..8d47a3966 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py @@ -0,0 +1,355 @@ +import logging +from typing import List, Tuple + +import helpers.hprint as hprint +import helpers.hmarkdown_div_blocks as hmadiblo +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def _prepare_div_block_inputs(txt: str, expected: str) -> Tuple[List[str], str]: + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=False) + if expected.startswith("\n"): + expected = expected[1:] + if expected.endswith("\n"): + expected = expected[:-1] + lines = txt.split("\n") + return lines, expected + + +# ############################################################################# +# Test_add_prettier_ignore_to_div_blocks +# ############################################################################# + + +class Test_add_prettier_ignore_to_div_blocks(hunitest.TestCase): + """ + Test the function to add prettier-ignore comments around div blocks. + """ + + def helper(self, txt: str, expected: str) -> None: + # Prepare inputs. + lines, expected = _prepare_div_block_inputs(txt, expected) + # Run test. + actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.assert_equal(actual, expected) + + def test_simple_div_block(self) -> None: + """ + Test a simple div block with two colons. + """ + txt = """ + :::: + ::: + """ + # Add a leading empty line in expected since function adds it. + expected = """ + + + :::: + ::: + + + """ + self.helper(txt, expected) + + def test_div_block_with_attributes(self) -> None: + """ + Test a div block with column attributes. + """ + txt = """ + :::: + ::::{.column width=40%} + """ + expected = """ + + + :::: + ::::{.column width=40%} + + + """ + self.helper(txt, expected) + + def test_multiple_div_blocks(self) -> None: + """ + Test multiple div blocks in the same content. + """ + txt = """ + Some text before + + :::: + ::::{.column width=40%} + + Middle text + + :::columns + ::::{.column width=60%} + + Some text after + """ + expected = """ + Some text before + + + + :::: + ::::{.column width=40%} + + + + Middle text + + + + :::columns + ::::{.column width=60%} + + + + Some text after + """ + self.helper(txt, expected) + + def test_no_div_blocks(self) -> None: + """ + Test content with no div blocks. + """ + txt = """ + Some normal text + with no div blocks + at all + """ + expected = """ + Some normal text + with no div blocks + at all + """ + self.helper(txt, expected) + + def test_unclosed_div_block(self) -> None: + """ + Test a div block that is not closed. + """ + txt = """ + Some text + + :::: + + More text + """ + expected = """ + Some text + + :::: + + More text + """ + self.helper(txt, expected) + + +# ############################################################################# +# Test_remove_prettier_ignore_from_div_blocks +# ############################################################################# + + +class Test_remove_prettier_ignore_from_div_blocks(hunitest.TestCase): + """ + Test the function to remove prettier-ignore comments from div blocks. + """ + + def helper(self, txt: str, expected: str) -> None: + # Prepare inputs. + lines, expected = _prepare_div_block_inputs(txt, expected) + # Run test. + actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.assert_equal(actual, expected) + + def test_remove_simple_block(self) -> None: + """ + Test removing prettier-ignore from a simple div block. + """ + txt = """ + + + :::: + ::: + + + """ + expected = """ + :::: + ::: + """ + self.helper(txt, expected) + + def test_remove_block_with_content(self) -> None: + """ + Test removing prettier-ignore from a div block with content. + """ + txt = """ + Some text before + + + :::: + ::::{.column width=40%} + + + Some text after + """ + expected = """ + Some text before + :::: + ::::{.column width=40%} + Some text after + """ + self.helper(txt, expected) + + def test_remove_multiple_blocks(self) -> None: + """ + Test removing prettier-ignore from multiple div blocks. + """ + txt = """ + Text before + + + :::: + ::::{.column width=40%} + + + Middle text + + + :::columns + ::::{.column width=60%} + + + Text after + """ + expected = """ + Text before + :::: + ::::{.column width=40%} + Middle text + :::columns + ::::{.column width=60%} + Text after + """ + self.helper(txt, expected) + + def test_no_prettier_ignore_comments(self) -> None: + """ + Test content with no prettier-ignore comments. + """ + txt = """ + Some normal text + with no prettier-ignore comments + at all + """ + expected = """ + Some normal text + with no prettier-ignore comments + at all + """ + self.helper(txt, expected) + + +# ############################################################################# +# Test_add_remove_prettier_ignore_roundtrip +# ############################################################################# + + +class Test_add_remove_prettier_ignore_roundtrip(hunitest.TestCase): + """ + Test that adding and removing prettier-ignore comments is a roundtrip. + """ + + def helper(self, txt: str) -> None: + # Prepare inputs. + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + # Add prettier-ignore comments. + lines_with_comments = hmadiblo.add_prettier_ignore_to_div_blocks(lines) + # Remove prettier-ignore comments. + lines_restored = hmadiblo.remove_prettier_ignore_from_div_blocks( + lines_with_comments + ) + actual = "\n".join(lines_restored) + expected = txt + # Check outputs. + self.assert_equal(actual, expected) + + def test_roundtrip_simple(self) -> None: + """ + Test that add and remove operations are inverses for simple div block. + """ + txt = """ + :::: + ::: + """ + self.helper(txt) + + def test_roundtrip_complex1(self) -> None: + """ + Test roundtrip for content with multiple div blocks and text. + """ + txt = """ + Text1 + + :::: + ::::{.column width=40%} + + Text2 + + :::columns + ::::{.column width=60%} + + Text3 + """ + self.helper(txt) + + def test_roundtrip_complex2(self) -> None: + """ + Test roundtrip for content with multiple div blocks and text. + """ + txt = """ + Text1 + ::: + ::::{.column width=40%} + Text2 + :::: + ::::{.column width=40%} + Text3 + :::columns + ::::{.column width=60%} + Text4 + """ + self.helper(txt) + + def test_roundtrip_complex3(self) -> None: + """ + Test roundtrip for content with multiple div blocks and text. + """ + txt = """ + Text1 + + ::: + ::::{.column width=40%} + + Text2 + :::: + ::::{.column width=40%} + + Text3 + :::columns + ::::{.column width=60%} + Text4 + """ + self.helper(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py new file mode 100644 index 000000000..c8ccc96b8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py @@ -0,0 +1,218 @@ +import logging +import pprint +from typing import Dict, List + +import helpers.hmarkdown as hmarkdo +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_replace_fenced_blocks_with_tags1 +# ############################################################################# + + +class Test_replace_fenced_blocks_with_tags1(hunitest.TestCase): + def helper( + self, text: str, expected_lines: List[str], expected_map: Dict[str, str] + ) -> None: + """ + Test replacing fenced code blocks with tags. + """ + lines = hprint.dedent(text, remove_lead_trail_empty_lines_=True) + lines = lines.split("\n") + # Call function. + actual_lines, fence_map = hmarkdo.replace_fenced_blocks_with_tags(lines) + # Check output. + fence_map_as_str = pprint.pformat(fence_map) + expected_map_as_str = pprint.pformat(expected_map) + self.assert_equal(fence_map_as_str, expected_map_as_str) + # + actual_lines = "\n".join(actual_lines) + expected_lines = hprint.dedent( + expected_lines, remove_lead_trail_empty_lines_=True + ) + self.assert_equal(actual_lines, expected_lines) + + def helper_round_trip(self, text: str) -> None: + """ + Test the round trip. + """ + # Do the round trip. + lines = text.split("\n") + actual_lines, fence_map = hmarkdo.replace_fenced_blocks_with_tags(lines) + act_text = hmarkdo.replace_tags_with_fenced_blocks( + actual_lines, fence_map + ) + # Check output. + act_text = "\n".join(act_text) + self.assert_equal(act_text, text) + + def test1(self) -> None: + """ + Test replacing fenced code blocks with tags. + """ + # Prepare inputs. + text = """ + Some text before + ```python + def foo(): + return 42 + ``` + Text between blocks + ```` + Plain code block + ```` + Some text after + """ + # Prepare outputs. + expected_lines = """ + Some text before + + Text between blocks + + Some text after + """ + # Check fence map. + expected_map = { + "1": "```python\ndef foo():\n return 42\n```", + "2": "````\nPlain code block\n````", + } + self.helper(text, expected_lines, expected_map) + + def test2(self) -> None: + """ + Test nested fenced blocks. + """ + text = """ + ```` + Outer block + ```python + def nested(): + pass + ``` + Still outer + ```` + """ + expected_lines = """ + + """ + expected_map = { + "1": "````\nOuter block\n```python\ndef nested():\n pass\n```\nStill outer\n````" + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test3(self) -> None: + """ + Test empty fenced blocks. + """ + text = """ + Before + ``` + ``` + After + ```python + ``` + End + """ + expected_lines = """ + Before + + After + + End + """ + expected_map = {"1": "```\n```", "2": "```python\n```"} + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test4(self) -> None: + """ + Test blocks with different fence lengths. + """ + text = """ + Start + ``` + Three + ``` + Middle + ````` + Five + ````` + End + """ + expected_lines = """ + Start + + Middle + + End + """ + expected_map = {"1": "```\nThree\n```", "2": "`````\nFive\n`````"} + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test5(self) -> None: + """ + Test blocks with language specifiers. + """ + text = """ + ```python + def foo(): pass + ``` + ```bash + echo hello + ``` + ```javascript + console.log('hi'); + ``` + """ + expected_lines = """ + + + + """ + expected_map = { + "1": "```python\ndef foo(): pass\n```", + "2": "```bash\necho hello\n```", + "3": "```javascript\nconsole.log('hi');\n```", + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test6(self) -> None: + """ + Test blocks with indentation. + """ + text = """ + Outside + ``` + Indented block + More indent + ``` + ```python + def foo(): + pass + ``` + End + """ + expected_lines = """ + Outside + + + End + """ + expected_map = { + "1": " ```\n Indented block\n More indent\n ```", + "2": " ```python\n def foo():\n pass\n ```", + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py new file mode 100644 index 000000000..91efef1f4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py @@ -0,0 +1,449 @@ +import logging + +import helpers.hmarkdown_filtering as hmarfilt +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_filter_by_header1 +# ############################################################################# + + +class Test_filter_by_header1(hunitest.TestCase): + def test_basic_header_extraction(self) -> None: + """ + Test basic header extraction functionality. + """ + # Prepare inputs. + test_content = """ + # Introduction + This is the introduction section. + Some content here. + + ## Section 1 + Content for section 1. + + # Conclusion + Final thoughts here. + """ + test_content = hprint.dedent( + test_content, remove_lead_trail_empty_lines_=False + ) + lines = test_content.split("\n") + # Run test. + result_lines = hmarfilt.filter_by_header(lines, "Introduction") + result_content = "\n".join(result_lines) + # Check outputs. + expected = """ + # Introduction + This is the introduction section. + Some content here. + + ## Section 1 + Content for section 1. + """ + self.assert_equal(result_content, expected, dedent=True) + + def test_header_not_found(self) -> None: + """ + Test behavior when header is not found. + """ + # Prepare inputs. + test_content = """ + # Introduction + This is the introduction section. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + # Check outputs. + with self.assertRaises(ValueError): + hmarfilt.filter_by_header(lines, "NonExistent") + + +# ############################################################################# +# Test_parse_range1 +# ############################################################################# + + +class Test_parse_range1(hunitest.TestCase): + def test_numeric_range(self) -> None: + """ + Test parsing numeric range (0-indexed). + """ + # Run test. + start, end = hmarfilt._parse_range("0:10", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 10) + + def test_none_start(self) -> None: + """ + Test range with None start (defaults to 0). + """ + # Run test. + start, end = hmarfilt._parse_range("None:10", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 10) + + def test_none_end(self) -> None: + """ + Test range with None end (defaults to max_value). + """ + # Run test. + start, end = hmarfilt._parse_range("0:None", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 20) + + def test_both_none(self) -> None: + """ + Test range with both None (0:max_value). + """ + # Run test. + start, end = hmarfilt._parse_range("None:None", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 20) + + def test_invalid_range(self) -> None: + """ + Test invalid range format. + """ + # Run test. + with self.assertRaises(AssertionError): + hmarfilt._parse_range("invalid", 20) + + def test_case_insensitive_none(self) -> None: + """ + Test case insensitive None parsing. + """ + # Run test. + start, end = hmarfilt._parse_range("NONE:none", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 20) + + +# ############################################################################# +# Test_filter_by_lines1 +# ############################################################################# + + +class Test_filter_by_lines1(hunitest.TestCase): + def test_basic_line_filtering(self) -> None: + """ + Test basic line filtering functionality (0-indexed). + """ + # Prepare inputs. + test_content = """ + Line 1 + Line 2 + Line 3 + Line 4 + Line 5 + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (indices 1:3 = Line 2 and Line 3). + result_lines = hmarfilt.filter_by_lines(lines, "1:3") + result_content = "\n".join(result_lines) + # Check outputs. + expected = "Line 2\nLine 3" + self.assertEqual(result_content, expected) + + def test_line_filtering_with_none(self) -> None: + """ + Test line filtering with None start (defaults to 0). + """ + # Prepare inputs. + test_content = """ + Line 1 + Line 2 + Line 3 + Line 4 + Line 5 + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (None:2 = indices 0:2 = Line 1 and Line 2). + result_lines = hmarfilt.filter_by_lines(lines, "None:2") + result_content = "\n".join(result_lines) + # Check outputs. + expected = "Line 1\nLine 2" + self.assertEqual(result_content, expected) + + def test_line_filtering_to_end(self) -> None: + """ + Test line filtering from start to end. + """ + # Prepare inputs. + test_content = """ + Line 1 + Line 2 + Line 3 + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (1:None = indices 1:3 = Line 2 and Line 3). + result_lines = hmarfilt.filter_by_lines(lines, "1:None") + result_content = "\n".join(result_lines) + # Check outputs. + expected = "Line 2\nLine 3" + self.assertEqual(result_content, expected) + + def test_invalid_range_order(self) -> None: + """ + Test that start line <= end line is enforced. + """ + # Prepare inputs. + test_content = "Line 1\nLine 2\nLine 3" + lines = test_content.split("\n") + # Run test. + # Check outputs. + with self.assertRaises(AssertionError): + hmarfilt.filter_by_lines(lines, "2:1") + + +# ############################################################################# +# Test_filter_by_slides1 +# ############################################################################# + + +class Test_filter_by_slides1(hunitest.TestCase): + def test_basic_slide_filtering(self) -> None: + """ + Test basic slide filtering functionality. + """ + # Prepare inputs. + test_content = """ + # Header 1 + + + + + * Slide 1 + Content for slide 1. + + * Slide 2 + Content for slide 2. + + * Slide 3 + Content for slide 3. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + result_lines = hmarfilt.filter_by_slides(lines, "0:1") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("Slide 1", result_content) + self.assertNotIn("Slide 2", result_content) + + def test_slide_filtering_with_none_end(self) -> None: + """ + Test slide filtering to the end. + """ + # Prepare inputs. + test_content = """ + * Slide 1 + Content 1. + + * Slide 2 + Content 2. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + result_lines = hmarfilt.filter_by_slides(lines, "0:None") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("Slide 1", result_content) + self.assertIn("Slide 2", result_content) + + def test_slide_filtering_invalid_range(self) -> None: + """ + Test that invalid slide ranges raise errors. + """ + # Prepare inputs. + test_content = """ + * Slide 1 + Content 1. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + # Check outputs. + with self.assertRaises(AssertionError): + hmarfilt.filter_by_slides(lines, "1:0") + + def test_slide_filtering_beyond_slides(self) -> None: + """ + Test filtering with end beyond available slides. + """ + # Prepare inputs. + test_content = """ + * Slide 1 + Content 1. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + # Check outputs. + with self.assertRaises(AssertionError): + hmarfilt.filter_by_slides(lines, "0:5") + + def test_no_slides_content(self) -> None: + """ + Test behavior with content that has no slides. + """ + # Prepare inputs. + test_content = """ + # Header 1 + Just regular content without slides. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + # Check outputs (should fail validation since there are no slides). + with self.assertRaises(AssertionError): + hmarfilt.filter_by_slides(lines, "0:1") + + def test_slide_filtering_single_slide(self) -> None: + """ + Test filtering a single slide when there's only one slide (0-indexed). + """ + # Prepare inputs. + test_content = """ + * Only Slide + This is the only content. + Additional content after the slide. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (0:1 = only slide at index 0). + result_lines = hmarfilt.filter_by_slides(lines, "0:1") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("Only Slide", result_content) + self.assertIn("This is the only content.", result_content) + + def test_slide_end_boundary(self) -> None: + """ + Test filtering to the end of slides (0-indexed). + """ + # Prepare inputs. + test_content = """ + * Slide 1 + Content 1. + + * Slide 2 + Content 2. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (0:2 = slides 0 and 1). + result_lines = hmarfilt.filter_by_slides(lines, "0:2") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("Slide 1", result_content) + self.assertIn("Slide 2", result_content) + + +# ############################################################################# +# Test_additional_edge_cases1 +# ############################################################################# + + +class Test_additional_edge_cases1(hunitest.TestCase): + def test_filter_by_header_with_subsection(self) -> None: + """ + Test extracting a subsection header. + """ + # Prepare inputs. + test_content = """ + # Introduction + This is the introduction. + + ## Subsection 1 + Content for subsection 1. + + ## Subsection 2 + Content for subsection 2. + + # Conclusion + Final thoughts. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + result_lines = hmarfilt.filter_by_header(lines, "Subsection 1") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("## Subsection 1", result_content) + self.assertIn("Content for subsection 1.", result_content) + + def test_parse_range_edge_cases(self) -> None: + """ + Test edge cases for range parsing (0-indexed). + """ + # Run test. + start, end = hmarfilt._parse_range("0:0", 1) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 0) + # Run test. + start, end = hmarfilt._parse_range("None:None", 1000) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 1000) + + def test_filter_lines_single_line(self) -> None: + """ + Test filtering with empty range (0:0). + """ + # Prepare inputs. + test_content = "Single line content" + lines = test_content.split("\n") + # Run test (0:0 = empty range). + result_lines = hmarfilt.filter_by_lines(lines, "0:0") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertEqual(result_content, "") + + def test_filter_lines_exact_range(self) -> None: + """ + Test filtering with exact boundaries (0-indexed). + """ + # Prepare inputs. + test_content = """ + Line 1 + Line 2 + Line 3 + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (0:2 = indices 0 and 1 = Line 1 and Line 2). + result_lines = hmarfilt.filter_by_lines(lines, "0:2") + result_content = "\n".join(result_lines) + # Check outputs. + expected = "Line 1\nLine 2" + self.assertEqual(result_content, expected) + + def test_parse_range_invalid_formats(self) -> None: + """ + Test various invalid range formats. + """ + # Run test. + with self.assertRaises(AssertionError): + hmarfilt._parse_range("5", 10) + # Run test. + with self.assertRaises(AssertionError): + hmarfilt._parse_range("", 10) + # Run test. + with self.assertRaises(ValueError): + hmarfilt._parse_range("1:2:3", 10) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py new file mode 100644 index 000000000..abf2faf66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py @@ -0,0 +1,1403 @@ +import logging +import os + +import helpers.hio as hio +import helpers.hmarkdown_div_blocks as hmadiblo +import helpers.hmarkdown_formatting as hmarform +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_remove_end_of_line_periods1 +# ############################################################################# + + +class Test_remove_end_of_line_periods1(hunitest.TestCase): + def helper(self, input_text: str, expected_text: str) -> None: + # Prepare inputs. + input_text = hprint.dedent(input_text).strip() + expected_text = hprint.dedent(expected_text).strip() + lines = input_text.split("\n") + # Run test. + actual_lines = hmarform.remove_end_of_line_periods(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.assertEqual(actual, expected_text) + + def test_standard_case(self) -> None: + input_text = """ + Hello. + World. + This is a test. + """ + expected_text = """ + Hello + World + This is a test + """ + self.helper(input_text, expected_text) + + def test_no_periods(self) -> None: + input_text = """ + Hello + World + This is a test + """ + expected_text = """ + Hello + World + This is a test + """ + self.helper(input_text, expected_text) + + def test_multiple_periods(self) -> None: + input_text = """ + Line 1..... + Line 2..... + End. + """ + expected_text = """ + Line 1 + Line 2 + End + """ + self.helper(input_text, expected_text) + + def test_empty_string(self) -> None: + input_text = "" + expected_text = "" + self.helper(input_text, expected_text) + + def test_leading_and_trailing_periods(self) -> None: + input_text = """ + .Line 1. + .Line 2. + ..End.. + """ + expected_text = """ + .Line 1 + .Line 2 + ..End + """ + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_md_clean_up1 +# ############################################################################# + + +class Test_md_clean_up1(hunitest.TestCase): + def test1(self) -> None: + # Prepare inputs. + txt = r""" + **States**: + - \( S = \{\text{Sunny}, \text{Rainy}\} \) + **Observations**: + - \( O = \{\text{Yes}, \text{No}\} \) (umbrella) + + ### Initial Probabilities: + \[ + P(\text{Sunny}) = 0.6, \quad P(\text{Rainy}) = 0.4 + \] + + ### Transition Probabilities: + \[ + \begin{aligned} + P(\text{Sunny} \to \text{Sunny}) &= 0.7, \quad P(\text{Sunny} \to \text{Rainy}) = 0.3 \\ + P(\text{Rainy} \to \text{Sunny}) &= 0.4, \quad P(\text{Rainy} \to \text{Rainy}) = 0.6 + \end{aligned} + \] + + ### Observation (Emission) Probabilities: + \[ + \begin{aligned} + P(\text{Yes} \mid \text{Sunny}) &= 0.1, \quad P(\text{No} \mid \text{Sunny}) = 0.9 \\ + P(\text{Yes} \mid \text{Rainy}) &= 0.8, \quad P(\text{No} \mid \text{Rainy}) = 0.2 + \end{aligned} + \] + """ + txt = hprint.dedent(txt) + actual = hmarform.md_clean_up(txt) + actual = hprint.dedent(actual) + expected = r""" + **States**: + - $S = \{\text{Sunny}, \text{Rainy}\}$ + **Observations**: + - $O = \{\text{Yes}, \text{No}\}$ (umbrella) + + ### Initial Probabilities: + $$ + \Pr(\text{Sunny}) = 0.6, \quad \Pr(\text{Rainy}) = 0.4 + $$ + + ### Transition Probabilities: + $$ + \begin{aligned} + \Pr(\text{Sunny} \to \text{Sunny}) &= 0.7, \quad \Pr(\text{Sunny} \to \text{Rainy}) = 0.3 \\ + \Pr(\text{Rainy} \to \text{Sunny}) &= 0.4, \quad \Pr(\text{Rainy} \to \text{Rainy}) = 0.6 + \end{aligned} + $$ + + ### Observation (Emission) Probabilities: + $$ + \begin{aligned} + \Pr(\text{Yes} | \text{Sunny}) &= 0.1, \quad \Pr(\text{No} | \text{Sunny}) = 0.9 \\ + \Pr(\text{Yes} | \text{Rainy}) &= 0.8, \quad \Pr(\text{No} | \text{Rainy}) = 0.2 + \end{aligned} + $$""" + self.assert_equal(actual, expected, dedent=True) + + +# ############################################################################# +# Test_remove_code_delimiters1 +# ############################################################################# + + +class Test_remove_code_delimiters1(hunitest.TestCase): + def test1(self) -> None: + """ + Test a basic example. + """ + # Prepare inputs. + content = r""" + ```python + def hello_world(): + print("Hello, World!") + ``` + """ + content = hprint.dedent(content) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + def hello_world(): + print("Hello, World!") + """ + self.assert_equal(actual, expected, dedent=True) + + def test2(self) -> None: + """ + Test an example with empty lines at the start and end. + """ + # Prepare inputs. + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + content = hio.from_file(input_file_path) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + def check_empty_lines(): + print("Check empty lines are present!") + """ + self.assert_equal(actual, expected, dedent=True) + + def test3(self) -> None: + """ + Test a markdown with headings, Python and yaml blocks. + """ + # Prepare inputs. + content = r""" + # Section 1 + + This section contains comment and python code. + + > "Knowledge is like a tree, growing stronger with each branch of understanding." + + ```python + def greet(name): + return f"Hello, {name}!" + print(greet("World")) + ``` + + # Section 2 + + Key points below. + + - Case Study 1: Implementation in modern industry + - Case Study 2: Comparative analysis of traditional vs. modern methods + + ```yaml + future: + - AI integration + - Process optimization + - Sustainable solutions + ``` + """ + content = hprint.dedent(content) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + # Section 1 + + This section contains comment and python code. + + > "Knowledge is like a tree, growing stronger with each branch of understanding." + + + def greet(name): + return f"Hello, {name}!" + print(greet("World")) + + + # Section 2 + + Key points below. + + - Case Study 1: Implementation in modern industry + - Case Study 2: Comparative analysis of traditional vs. modern methods + + yaml + future: + - AI integration + - Process optimization + - Sustainable solutions + + """ + self.assert_equal(actual, expected, dedent=True) + + def test4(self) -> None: + """ + Test another markdown with headings and multiple indent Python blocks. + """ + # Prepare inputs. + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + content = hio.from_file(input_file_path) + content = hprint.dedent(content) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + self.check_string(actual, dedent=True) + + def test5(self) -> None: + """ + Test an empty string. + """ + # Prepare inputs. + content = "" + lines = content.split("\n") if content else [] + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = "" + self.assert_equal(actual, expected, dedent=True) + + def test6(self) -> None: + """ + Test a Python and immediate markdown code block. + """ + # Prepare inputs. + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + content = hio.from_file(input_file_path) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + def no_start_python(): + print("No mention of python at the start") + + + + A markdown paragraph contains + delimiters that needs to be removed. + """ + self.assert_equal(actual, expected, dedent=True) + + +# ############################################################################# +# Test_format_markdown_slide +# ############################################################################# + + +class Test_format_markdown_slide(hunitest.TestCase): + def helper(self, input_text: str, expected_text: str) -> None: + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual = hmarform.format_markdown_slide(lines) + actual = "\n".join(actual) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + _LOG.debug("actual=\n%s", actual) + _LOG.debug("expected=\n%s", expected) + self.assert_equal(str(actual), str(expected)) + + def test1(self) -> None: + """ + Test formatting a simple slide with bullets. + """ + input_text = """ + * Slide title + - First bullet + - Second bullet + """ + expected_text = """ + * Slide Title + + - First bullet + + - Second bullet + """ + self.helper(input_text, expected_text) + + def test2(self) -> None: + """ + Test formatting multiple slides. + """ + input_text = """ + * First slide + - Point A + - Point B + * Second slide + - Point X + - Point Y + """ + expected_text = """ + * First Slide + + - Point A + + - Point B + * Second Slide + + - Point X + + - Point Y + """ + self.helper(input_text, expected_text) + + def test3(self) -> None: + """ + Test formatting slides with nested bullets. + """ + input_text = """ + * Main slide + - First level + - Nested point + - Another nested + - Second level + """ + expected_text = """ + * Main Slide + + - First level + - Nested point + - Another nested + + - Second level + """ + self.helper(input_text, expected_text) + + def test4(self) -> None: + """ + Test formatting empty input. + """ + # Prepare inputs. + input_text = """ + """ + # Check outputs. + expected_text = """ + """ + self.helper(input_text, expected_text) + + def test5(self) -> None: + """ + Test formatting slide title capitalization. + """ + input_text = """ + * mixed case slide title + - Point one + """ + expected_text = """ + * Mixed Case Slide Title + + - Point one + """ + self.helper(input_text, expected_text) + + def test6(self) -> None: + """ + Test formatting slide with only title, no bullet points. + """ + input_text = """ + * Solo slide title + """ + expected_text = """ + * Solo Slide Title + """ + self.helper(input_text, expected_text) + + def test7(self) -> None: + """ + Test formatting slide with deeply nested bullets. + """ + input_text = """ + * Main slide + - Level 1 + - Level 2 + - Level 3 + - Level 4 + - Back to level 1 + """ + expected_text = """ + * Main Slide + + - Level 1 + - Level 2 + - Level 3 + - Level 4 + + - Back to level 1 + """ + self.helper(input_text, expected_text) + + def test8(self) -> None: + """ + Test formatting slide with nested bullets and special formatting. + """ + input_text = r""" + * What Are Data Analytics? + - **Collections of data** + + - Aggregated, organized data sets for analysis + + - E.g., customer purchase histories in a CRM system + - **Dashboards** + + - Visual displays of key metrics for insights + - E.g., dashboard showing quarterly revenue, expenses + + - **Descriptive statistics** + - Summary metrics: mean, median, mode, standard deviation + - E.g., average sales per quarter to understand trends + - **Historical reports** + + - Examination of past performance + - E.g., monthly sales reports for past fiscal year + - **Models** + - Statistical representations to forecast, explain phenomena + + - E.g., predictive model to anticipate customer churn based on behavioral data + """ + expected_text = r""" + * What Are Data Analytics? + + - **Collections of data** + - Aggregated, organized data sets for analysis + - E.g., customer purchase histories in a CRM system + + - **Dashboards** + - Visual displays of key metrics for insights + - E.g., dashboard showing quarterly revenue, expenses + + - **Descriptive statistics** + - Summary metrics: mean, median, mode, standard deviation + - E.g., average sales per quarter to understand trends + + - **Historical reports** + - Examination of past performance + - E.g., monthly sales reports for past fiscal year + + - **Models** + - Statistical representations to forecast, explain phenomena + - E.g., predictive model to anticipate customer churn based on behavioral data + """ + self.helper(input_text, expected_text) + + def test9(self) -> None: + """ + This reproduces a broken behavior of prettier with fenced divs. + """ + input_text = r""" + * Incremental vs Iterative + ::: columns + :::: {.column width=55%} + + - **Incremental Development** + - Each increment adds functional components + - Require upfront planning to divide features meaningfully + - Integration of increments can be complex + + - **Iterative Development** + - Each increment delivers usable system + - Refine and improve product through repeated cycles + - Get feedback + - Uncover and adjust for unknown requirements + + - **Incremental $\gg$ Iterative** + + :::: + :::: {.column width=40%} + + ![](msml610/lectures_source/figures/Lesson02_Monalisa_incremental.png){width=90%} + + \small _Incremental + + \vspace{0.5cm} + + ![](msml610/lectures_source/figures/Lesson02_Monalisa_iterative.png){width=90%} + + \small _Iterative_ + + \vspace{0.5cm} + + ![](msml610/lectures_source/figures/Lesson02_Skateboard.png){width=90%} + + \small _Incremental vs Iterative_ + :::: + ::: + """ + expected_text = r""" + * Incremental vs Iterative + ::: columns + :::: {.column width=55%} + + - **Incremental Development** + - Each increment adds functional components + - Require upfront planning to divide features meaningfully + - Integration of increments can be complex + + - **Iterative Development** + - Each increment delivers usable system + - Refine and improve product through repeated cycles + - Get feedback + - Uncover and adjust for unknown requirements + + - **Incremental $\gg$ Iterative** + :::: + :::: {.column width=40%} + ![](msml610/lectures_source/figures/Lesson02_Monalisa_incremental.png){width=90%} + \small \_Incremental + \vspace{0.5cm} + ![](msml610/lectures_source/figures/Lesson02_Monalisa_iterative.png){width=90%} + \small _Iterative_ + \vspace{0.5cm} + ![](msml610/lectures_source/figures/Lesson02_Skateboard.png){width=90%} + \small _Incremental vs Iterative_ + :::: + ::: + """ + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_format_figures +# ############################################################################# + + +class Test_format_figures(hunitest.TestCase): + def helper(self, input_text: str, expected_text: str) -> None: + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual_lines = hmarform.format_figures(lines) + actual = "\n".join(actual_lines) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + self.assert_equal(actual, expected) + + def test_basic_text_with_figures(self) -> None: + """ + Test converting basic text with figures to column format. + """ + input_text = """ + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - Read / write small amounts of data frequently + - **Columnar DBs** + - E.g., Amazon Redshift, Snowflake + - Read / write large amounts of data infrequently + - Analytics requires a few columns + - Better data compression + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) + """ + expected_text = """ + ::: columns + :::: {.column width=65%} + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - Read / write small amounts of data frequently + - **Columnar DBs** + - E.g., Amazon Redshift, Snowflake + - Read / write large amounts of data infrequently + - Analytics requires a few columns + - Better data compression + :::: + :::: {.column width=40%} + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + def test_no_figures_no_change(self) -> None: + """ + Test that text without figures remains unchanged. + """ + input_text = """ + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - **Columnar DBs** + - E.g., Amazon Redshift, Snowflake + - Better data compression + """ + expected_text = """ + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - **Columnar DBs** + - E.g., Amazon Redshift, Snowflake + - Better data compression + """ + self.helper(input_text, expected_text) + + def test_already_in_columns_format_no_change(self) -> None: + """ + Test that text already in columns format remains unchanged. + """ + input_text = """ + ::: columns + :::: {.column width=65%} + - **Row-based DBs** + - E.g., MySQL, Postgres + :::: + :::: {.column width=40%} + ![](some_image.png) + :::: + ::: + """ + expected_text = """ + ::: columns + :::: {.column width=65%} + - **Row-based DBs** + - E.g., MySQL, Postgres + :::: + :::: {.column width=40%} + ![](some_image.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + def test_single_figure(self) -> None: + """ + Test converting text with a single figure. + """ + input_text = """ + - **Important concept** + - This is the main point + - Supporting detail + + ![](path/to/image.png) + """ + expected_text = """ + ::: columns + :::: {.column width=65%} + - **Important concept** + - This is the main point + - Supporting detail + :::: + :::: {.column width=40%} + + ![](path/to/image.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + def test_mixed_content_with_figures(self) -> None: + """ + Test converting mixed content including text and figures. + """ + input_text = """ + ## Section header + + Some introductory text here. + + - **Point one** + - Detail A + - Detail B + - **Point two** + - Detail X + - Detail Y + + ![](image1.png) + + Additional text between figures. + + ![](image2.png) + """ + expected_text = """ + ::: columns + :::: {.column width=65%} + ## Section header + + Some introductory text here. + + - **Point one** + - Detail A + - Detail B + - **Point two** + - Detail X + - Detail Y + :::: + :::: {.column width=40%} + + ![](image1.png) + + Additional text between figures. + + ![](image2.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + def test_empty_input(self) -> None: + """ + Test that empty input returns empty output. + """ + input_text = "" + expected_text = "" + self.helper(input_text, expected_text) + + def test_with_slide_title(self) -> None: + """ + Test that slide title is left unchanged. + """ + input_text = """ + * VCS: How to Track Data + + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - Read / write small amounts of data frequently + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) + """ + expected_text = """ + * VCS: How to Track Data + ::: columns + :::: {.column width=65%} + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - Read / write small amounts of data frequently + :::: + :::: {.column width=40%} + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_format_md_links_to_latex_format +# ############################################################################# + + +class Test_format_md_links_to_latex_format(hunitest.TestCase): + def helper(self, input_text: str, expected_text: str) -> None: + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual_lines = hmarform.format_md_links_to_latex_format(lines) + actual = "\n".join(actual_lines) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + self.assert_equal(actual, expected) + + # ========================================================================= + # Edge cases. + # ========================================================================= + + def test_empty_input(self) -> None: + """ + Test empty input. + """ + # Prepare inputs. + input_text = "" + expected_text = "" + # Run test. + self.helper(input_text, expected_text) + + def test_no_links(self) -> None: + """ + Test content without any links. + """ + # Prepare inputs. + input_text = """ + # Important Notes + + - This is regular text + - No links here + - Just plain content + """ + expected_text = """ + # Important Notes + + - This is regular text + - No links here + - Just plain content + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Plain URL conversion: http://... or https://... + # ========================================================================= + + def test_plain_http_url(self) -> None: + """ + Test converting single plain HTTP URL. + """ + # Prepare inputs. + input_text = """ + Visit http://example.com + """ + expected_text = r""" + Visit [\textcolor{blue}{\underline{http://example.com}}](http://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_https_url(self) -> None: + """ + Test converting single plain HTTPS URL. + """ + # Prepare inputs. + input_text = """ + Visit https://example.com + """ + expected_text = r""" + Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_with_path(self) -> None: + """ + Test converting plain URLs with paths. + """ + # Prepare inputs. + input_text = """ + Check out https://ubuntu.com/tutorials/command-line-for-beginners + """ + expected_text = r""" + Check out [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_with_query_parameters(self) -> None: + """ + Test converting plain URL with query parameters. + """ + # Prepare inputs. + input_text = """ + Search: https://example.com/search?q=python&page=1 + """ + expected_text = r""" + Search: [\textcolor{blue}{\underline{https://example.com/search?q=python&page=1}}](https://example.com/search?q=python&page=1) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_with_fragment(self) -> None: + """ + Test converting plain URL with fragment. + """ + # Prepare inputs. + input_text = """ + Docs: https://docs.python.org/3/tutorial/index.html#tutorial-index + """ + expected_text = r""" + Docs: [\textcolor{blue}{\underline{https://docs.python.org/3/tutorial/index.html#tutorial-index}}](https://docs.python.org/3/tutorial/index.html#tutorial-index) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_at_line_start(self) -> None: + """ + Test plain URL at beginning of line. + """ + # Prepare inputs. + input_text = """ + https://example.com is a good site + """ + expected_text = r""" + [\textcolor{blue}{\underline{https://example.com}}](https://example.com) is a good site + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_at_line_end(self) -> None: + """ + Test plain URL at end of line. + """ + # Prepare inputs. + input_text = """ + Check this link https://example.com + """ + expected_text = r""" + Check this link [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # URL in backticks conversion: `http://...` or `https://...` + # ========================================================================= + + def test_backtick_url(self) -> None: + """ + Test converting single URL in backticks. + """ + # Prepare inputs. + input_text = """ + Visit `https://example.com` for details + """ + expected_text = r""" + Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) for details + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Markdown link conversion: [Text](URL) + # ========================================================================= + + def test_markdown_link_simple(self) -> None: + """ + Test converting simple markdown link [Text](URL). + """ + # Prepare inputs. + input_text = """ + Check out [this tutorial](https://example.com/tutorial) + """ + expected_text = r""" + Check out [\textcolor{blue}{\underline{this tutorial}}](https://example.com/tutorial) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_markdown_link_preserves_text(self) -> None: + """ + Test that markdown link preserves the display text. + """ + # Prepare inputs. + input_text = """ + See [documentation](https://docs.example.com) here + """ + expected_text = r""" + See [\textcolor{blue}{\underline{documentation}}](https://docs.example.com) here + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Email link conversion: [email@domain.com](email@domain.com) + # ========================================================================= + + def test_email_link_simple1(self) -> None: + """ + Test converting simple email link. + """ + # Prepare inputs. + input_text = """ + Contact: [support@example.com](support@example.com) + """ + expected_text = r""" + Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_email_link_simple2(self) -> None: + """ + Test converting simple email link. + """ + # Prepare inputs. + input_text = """ + Contact: [](support@example.com) + """ + expected_text = r""" + Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Multiple URLs. + # ========================================================================= + + def test_multiple_urls_same_line(self) -> None: + """ + Test converting multiple URLs on same line. + """ + # Prepare inputs. + input_text = """ + Visit https://example.com and https://another.com + """ + expected_text = r""" + Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) and [\textcolor{blue}{\underline{https://another.com}}](https://another.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_multiple_urls_different_lines(self) -> None: + """ + Test converting multiple URLs on different lines. + """ + # Prepare inputs. + input_text = """ + Tutorial: https://ubuntu.com/tutorials/command-line-for-beginners + + Documentation: https://docs.python.org/3/ + """ + expected_text = r""" + Tutorial: [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) + + Documentation: [\textcolor{blue}{\underline{https://docs.python.org/3/}}](https://docs.python.org/3/) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Mixed link types. + # ========================================================================= + + def test_mixed_plain_and_backtick_urls(self) -> None: + """ + Test handling mixed plain and backtick URLs. + """ + # Prepare inputs. + input_text = """ + Plain: https://example.com + Backtick: `https://docs.example.com` + """ + expected_text = r""" + Plain: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + Backtick: [\textcolor{blue}{\underline{https://docs.example.com}}](https://docs.example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_mixed_plain_and_markdown_links(self) -> None: + """ + Test handling mixed plain URLs and markdown links. + """ + # Prepare inputs. + input_text = """ + Plain: https://example.com + Markdown: [Click here](https://docs.example.com) + """ + expected_text = r""" + Plain: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + Markdown: [\textcolor{blue}{\underline{Click here}}](https://docs.example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_mixed_all_types(self) -> None: + """ + Test handling all link types in same content. + """ + # Prepare inputs. + input_text = r""" + ## Resources + + - Plain URL: https://ubuntu.com/tutorials/command-line-for-beginners + - Backtick URL: `https://docs.python.org/3/` + - Markdown link: [Click here](https://github.com) + - Email: [support@example.com](support@example.com) + - Already formatted: [\textcolor{blue}{\underline{https://stackoverflow.com}}](https://stackoverflow.com) + """ + expected_text = r""" + ## Resources + + - Plain URL: [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) + - Backtick URL: [\textcolor{blue}{\underline{https://docs.python.org/3/}}](https://docs.python.org/3/) + - Markdown link: [\textcolor{blue}{\underline{Click here}}](https://github.com) + - Email: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) + - Already formatted: [\textcolor{blue}{\underline{https://stackoverflow.com}}](https://stackoverflow.com) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Complex scenarios. + # ========================================================================= + + def test_url_with_file_extension(self) -> None: + """ + Test URL pointing to file with extension. + """ + # Prepare inputs. + input_text = """ + Download: https://cdn.example.com/files/document.pdf + """ + expected_text = r""" + Download: [\textcolor{blue}{\underline{https://cdn.example.com/files/document.pdf}}](https://cdn.example.com/files/document.pdf) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_already_formatted_link_preserved(self) -> None: + """ + Test that already formatted links are preserved. + """ + # Prepare inputs. + input_text = r""" + Link: [\textcolor{blue}{\underline{Example Site}}](https://example.com) + """ + expected_text = r""" + Link: [\textcolor{blue}{\underline{Example Site}}](https://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Image/picture links should be left untouched. + # ========================================================================= + + def test_filter_image_simple(self) -> None: + """ + Test that simple image links are left untouched. + """ + # Prepare inputs. + input_text = """ + Check this image: ![](path/to/image.png) + """ + expected_text = """ + Check this image: ![](path/to/image.png) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_filter_jpg_images(self) -> None: + """ + Test that JPG image links are left untouched. + """ + # Prepare inputs. + input_text = """ + ![](lectures_source/images/lec_4_1_slide_5_image_1.jpg) + """ + expected_text = """ + ![](lectures_source/images/lec_4_1_slide_5_image_1.jpg) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_filter_mixed_images_and_emails(self) -> None: + """ + Test that image links are not processed while email links are. + """ + # Prepare inputs. + input_text = """ + Contact: [](support@example.com) + Image: ![](path/to/image.png) + Link: https://example.com + """ + expected_text = r""" + Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) + Image: ![](path/to/image.png) + Link: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_filter_image_with_alt_text(self) -> None: + """ + Test that image links with alt text are left untouched. + """ + # Prepare inputs. + input_text = """ + ![Alt text](path/to/image.png) + """ + expected_text = """ + ![Alt text](path/to/image.png) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_filter_multiple_images(self) -> None: + """ + Test that multiple image links are left untouched. + """ + # Prepare inputs. + input_text = """ + ![](image1.png) + ![](image2.jpg) + ![](image3.gif) + """ + expected_text = """ + ![](image1.png) + ![](image2.jpg) + ![](image3.gif) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_markdown_link_with_escaped_underscores(self) -> None: + """ + Test markdown link with escaped underscores in the text. + """ + # Prepare inputs. + input_text = r""" + [tutorial\_docker\_compose](https://github.com/gpsaggese/umd_classes/tree/main/data605/tutorials/tutorial_docker_compose) + """ + expected_text = r""" + [\textcolor{blue}{\underline{tutorial\_docker\_compose}}](https://github.com/gpsaggese/umd_classes/tree/main/data605/tutorials/tutorial_docker_compose) + """ + # Run test. + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_add_prettier_ignore_to_div_blocks +# ############################################################################# + + +class Test_add_prettier_ignore_to_div_blocks(hunitest.TestCase): + """ + Test the function to add prettier-ignore comments around div blocks. + """ + + def test_simple_div_block(self) -> None: + """ + Test a simple div block with two colons. + """ + # Prepare inputs. + txt = """ + :::: + ::: + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.check_string(actual) + + def test_multiple_div_blocks(self) -> None: + """ + Test multiple div blocks in the same content. + """ + # Prepare inputs. + txt = """ + Some text before + + :::: + ::::{.column width=40%} + + Middle text + + :::columns + ::::{.column width=60%} + + Some text after + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.check_string(actual) + + +# ############################################################################# +# Test_remove_prettier_ignore_from_div_blocks +# ############################################################################# + + +class Test_remove_prettier_ignore_from_div_blocks(hunitest.TestCase): + """ + Test the function to remove prettier-ignore comments from div blocks. + """ + + def test_remove_simple_block(self) -> None: + """ + Test removing prettier-ignore from a simple div block. + """ + # Prepare inputs. + txt = """ + + + :::: + ::: + + + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.check_string(actual) + + def test_remove_multiple_blocks(self) -> None: + """ + Test removing prettier-ignore from multiple div blocks. + """ + # Prepare inputs. + txt = """ + Text before + + + :::: + ::::{.column width=40%} + + + Middle text + + + :::columns + ::::{.column width=60%} + + + Text after + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py new file mode 100644 index 000000000..34ea20964 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py @@ -0,0 +1,2002 @@ +import logging +import os +import pprint +from typing import Any, List, Tuple, cast + +import helpers.hio as hio +import helpers.hmarkdown as hmarkdo +import helpers.hmarkdown_headers as hmarhead +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def _to_header_list(data: List[Tuple[int, str]]) -> hmarkdo.HeaderList: + res = [ + hmarkdo.HeaderInfo(level, text, 5 * i + 1) + for i, (level, text) in enumerate(data) + ] + return res + + +def get_header_list1() -> hmarkdo.HeaderList: + data = [ + (1, "Chapter 1"), + (2, "Section 1.1"), + (3, "Subsection 1.1.1"), + (3, "Subsection 1.1.2"), + (2, "Section 1.2"), + (1, "Chapter 2"), + (2, "Section 2.1"), + (3, "Subsection 2.1.1"), + (2, "Section 2.2"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_header_list2() -> hmarkdo.HeaderList: + data = [ + (1, "Module Alpha"), + (2, "Lesson Alpha-1"), + (3, "Topic Alpha-1.a"), + (3, "Topic Alpha-1.b"), + (2, "Lesson Alpha-2"), + (3, "Topic Alpha-2.a"), + (1, "Module Beta"), + (2, "Lesson Beta-1"), + (3, "Topic Beta-1.a"), + (2, "Lesson Beta-2"), + (1, "Module Gamma"), + (2, "Lesson Gamma-1"), + (3, "Topic Gamma-1.a"), + (3, "Topic Gamma-1.b"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_header_list3() -> hmarkdo.HeaderList: + data = [ + (1, "Topic A"), + (2, "Subtopic A.1"), + (3, "Detail A.1.i"), + (3, "Detail A.1.ii"), + (2, "Subtopic A.2"), + (1, "Topic B"), + (2, "Subtopic B.1"), + (3, "Detail B.1.i"), + (2, "Subtopic B.2"), + (3, "Detail B.2.i"), + (3, "Detail B.2.ii"), + (2, "Subtopic B.3"), + (1, "Topic C"), + (2, "Subtopic C.1"), + (3, "Detail C.1.i"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_header_list4() -> hmarkdo.HeaderList: + data = [ + (1, "Chapter 1"), + (3, "Subsection 1.1.1"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_header_list5() -> hmarkdo.HeaderList: + data = [ + (1, "Chapter 1"), + (2, "Section 1.1"), + (3, "Subsection 1.1.1"), + (1, "Chapter 2"), + ] + header_list = _to_header_list(data) + return header_list + + +def _get_markdown_example1() -> str: + content = r""" + # Header1 + Content under header 1. + ## Header2 + Content under subheader 2. + # Header3 + Content under header 3. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_example2() -> str: + content = r""" + # Header1 + Content under header 1. + ## Header2 + Content under subheader 2. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_no_header_example1() -> str: + content = r""" + This is some content without any headers. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_example4() -> str: + content = r""" + # Chapter 1 + + Welcome to the first chapter. This chapter introduces fundamental concepts and + lays the groundwork for further exploration. + + ## Section 1.1 + + This section discusses the initial principles and key ideas that are crucial for + understanding the topic. + + ### Subsection 1.1.1 + + The first subsection dives deeper into the details, providing examples and + insights that help clarify the concepts. + + Example: + ```python + def greet(name): + return f"Hello, {name}!" + print(greet("World")) + ``` + + ### Subsection 1.1.2 + + Here, we examine alternative perspectives and additional considerations that + were not covered in the previous subsection. + + - Key Point 1: Understanding different viewpoints enhances comprehension. + - Key Point 2: Practical application reinforces learning. + + ## Section 1.2 + + This section introduces new frameworks and methodologies that build upon the + foundation established earlier. + + > "Knowledge is like a tree, growing stronger with each branch of understanding." + + # Chapter 2 + + Moving forward, this chapter explores advanced topics and real-world + applications. + + ## Section 2.1 + + This section provides an in-depth analysis of core mechanisms that drive the + subject matter. + + ### Subsection 2.1.1 + + A deep dive into specific case studies and empirical evidence that support + theoretical claims. + + - Case Study 1: Implementation in modern industry + - Case Study 2: Comparative analysis of traditional vs. modern methods + + ## Section 2.2 + + The final section of this chapter presents summary conclusions, key takeaways, + and potential future developments. + + ```yaml + future: + - AI integration + - Process optimization + - Sustainable solutions + ``` + + Stay curious and keep exploring! + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_example5() -> hmarkdo.HeaderList: + content = r""" + # Models + test + ## Naive Bayes + test2 + ## Decision trees + test3 + ## Random forests + ## Linear models + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_slides_example1() -> str: + content = r""" + # Header1 + + * Slide 1 + Content 1. + + ## Header2 + + * Slide 2 + Content 2. + + * Slide 3 + Content 3. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_slides_example2() -> str: + content = r""" + # Header1 + + * Slide1 + Content 1. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _test_navigation_flow( + self_: Any, + txt: str, + header_list_exp: str, + header_tree_exp: str, + level: int, + description: str, + nav_str_exp: str, +) -> None: + # 1) Extract headers. + lines = txt.split("\n") + header_list = hmarkdo.extract_headers_from_markdown(lines, max_level=3) + actual = pprint.pformat(header_list) + self_.assert_equal( + actual, header_list_exp, dedent=True, remove_lead_trail_empty_lines=True + ) + # 2) Build header tree. + tree = hmarkdo.build_header_tree(header_list) + actual = hmarkdo.header_tree_to_str(tree, ancestry=None) + self_.assert_equal( + actual, header_tree_exp, dedent=True, remove_lead_trail_empty_lines=True + ) + # 3) Compute the navigation bar for a specific header. + actual = hmarkdo.selected_navigation_to_str(tree, level, description) + self_.assert_equal( + actual, nav_str_exp, dedent=True, remove_lead_trail_empty_lines=True + ) + + +def _test_full_navigation_flow(self_: Any, txt: str) -> None: + res: List[str] = [] + # Extract headers. + lines = txt.split("\n") + header_list = hmarkdo.extract_headers_from_markdown(lines, max_level=3) + # Build header tree. + tree = hmarkdo.build_header_tree(header_list) + # Create a navigation map for any header. + for node in header_list: + level, description, _ = node.as_tuple() + res_tmp = hprint.frame(hprint.to_str("level description")) + res.append(res_tmp) + # + res_tmp = hmarkdo.selected_navigation_to_str(tree, level, description) + res.append(res_tmp) + # Check. + actual = "\n".join(res) + self_.check_string(actual) + + +# ############################################################################# +# Test_header_list_to_vim_cfile1 +# ############################################################################# + + +class Test_header_list_to_vim_cfile1(hunitest.TestCase): + def test1(self) -> None: + """ + Test conversion of header list to vim cfile format with multiple + levels. + """ + # Prepare inputs. + markdown_file = "test.py" + headers = get_header_list1() + # Call function. + actual_lines = hmarkdo.header_list_to_vim_cfile(markdown_file, headers) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + test.py:1:Chapter 1 + test.py:6:Section 1.1 + test.py:11:Subsection 1.1.1 + test.py:16:Subsection 1.1.2 + test.py:21:Section 1.2 + test.py:26:Chapter 2 + test.py:31:Section 2.1 + test.py:36:Subsection 2.1.1 + test.py:41:Section 2.2 + """ + self.assert_equal(actual, expected, dedent=True) + + +# ############################################################################# +# Test_header_list_to_markdown1 +# ############################################################################# + + +class Test_header_list_to_markdown1(hunitest.TestCase): + def helper( + self, headers: hmarkdo.HeaderList, mode: str, expected: str + ) -> None: + """ + Helper method to test header_list_to_markdown function. + + :param headers: list of HeaderInfo objects + :param mode: conversion mode ("list" or "headers") + :param expected: expected output string + """ + # Call function. + actual_lines = hmarkdo.header_list_to_markdown(headers, mode) + actual = "\n".join(actual_lines) + # Check output. + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test conversion of header list to markdown list format with + indentation. + """ + # Prepare inputs. + headers = get_header_list1() + mode = "list" + # Prepare outputs. + expected = r""" + - Chapter 1 + - Section 1.1 + - Subsection 1.1.1 + - Subsection 1.1.2 + - Section 1.2 + - Chapter 2 + - Section 2.1 + - Subsection 2.1.1 + - Section 2.2 + """ + # Run test. + self.helper(headers, mode, expected) + + def test2(self) -> None: + """ + Test conversion of header list to markdown headers format with + proper heading levels. + """ + # Prepare inputs. + headers = get_header_list1() + mode = "headers" + # Prepare outputs. + expected = r""" + # Chapter 1 + ## Section 1.1 + ### Subsection 1.1.1 + ### Subsection 1.1.2 + ## Section 1.2 + # Chapter 2 + ## Section 2.1 + ### Subsection 2.1.1 + ## Section 2.2 + """ + # Run test. + self.helper(headers, mode, expected) + + +# ############################################################################# +# Test_is_markdown_line_separator1 +# ############################################################################# + + +class Test_is_markdown_line_separator1(hunitest.TestCase): + def helper(self, line: str, expected: bool) -> None: + """ + Helper method to test is_markdown_line_separator function. + + :param line: input line to test + :param expected: expected boolean result + """ + # Call function. + actual = hmarkdo.is_markdown_line_separator(line) + # Check output. + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test that a line with only dashes is recognized as a separator. + """ + # Prepare inputs. + line = "-----------------------" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test2(self) -> None: + """ + Test that a line with hash prefix and dashes is a valid separator. + """ + # Prepare inputs. + line = "# ------" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test3(self) -> None: + """ + Test that a line with hash prefix and hash characters is a valid + separator. + """ + # Prepare inputs. + line = "# #########" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test4(self) -> None: + """ + Test that a line with triple hash prefix and equals is a valid + separator. + """ + # Prepare inputs. + line = "### =====" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test5(self) -> None: + """ + Test that a line with hash and slashes is a valid separator. + """ + # Prepare inputs. + line = "#//////" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test6(self) -> None: + """ + Test that a line with hash, spaces, and slashes is a valid + separator. + """ + # Prepare inputs. + line = "# //////" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test7(self) -> None: + """ + Test that plain text is not recognized as a separator. + """ + # Prepare inputs. + line = "Not a separator" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test8(self) -> None: + """ + Test that a short dash line is not a valid separator. + """ + # Prepare inputs. + line = "# --" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test9(self) -> None: + """ + Test that mixed separator characters are not valid. + """ + # Prepare inputs. + line = "# ###---" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test10(self) -> None: + """ + Test that two equals signs alone are not a valid separator. + """ + # Prepare inputs. + line = "==" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test11(self) -> None: + """ + Test that dash prefix with slashes is not a valid separator. + """ + # Prepare inputs. + line = "- //////" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test12(self) -> None: + """ + Test that separators with trailing text are not valid. + """ + # Prepare inputs. + line = "=== Not a seperator" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test13(self) -> None: + """ + Test that separators with surrounding text are not valid. + """ + # Prepare inputs. + line = "--- Not a seperator ---" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + +# ############################################################################# +# Test_extract_section_from_markdown1 +# ############################################################################# + + +class Test_extract_section_from_markdown1(hunitest.TestCase): + def helper(self, content: str, header_name: str, expected: str) -> None: + """ + Helper method to test extract_section_from_markdown function. + + :param content: markdown content to extract from + :param header_name: name of header to extract + :param expected: expected output string + """ + # Call function. + lines = content.split("\n") + actual_lines = hmarkdo.extract_section_from_markdown(lines, header_name) + actual = "\n".join(actual_lines) + # Check output. + self.assert_equal(actual, expected, dedent=True) + + # TODO(gp): This doesn't seem correct. + def test1(self) -> None: + """ + Test extracting a section that includes a subheader. + """ + # Prepare inputs. + content = _get_markdown_example1() + # Prepare outputs. + expected = r""" + # Header1 + Content under header 1. + ## Header2 + Content under subheader 2. + """ + # Run test. + self.helper(content, "Header1", expected) + + def test2(self) -> None: + """ + Test extracting a subheader section only. + """ + # Prepare inputs. + content = _get_markdown_example1() + content = hprint.dedent(content) + # Prepare outputs. + expected = r""" + ## Header2 + Content under subheader 2. + """ + # Run test. + self.helper(content, "Header2", expected) + + def test3(self) -> None: + """ + Test extracting the last header section in the document. + """ + # Prepare inputs. + content = _get_markdown_example1() + content = hprint.dedent(content) + # Prepare outputs. + expected = r""" + # Header3 + Content under header 3. + """ + # Run test. + self.helper(content, "Header3", expected) + + def test4(self) -> None: + """ + Test extracting a header that spans to the end of document. + """ + # Prepare inputs. + content = _get_markdown_example2() + # Prepare outputs. + expected = r""" + # Header1 + Content under header 1. + ## Header2 + Content under subheader 2. + """ + # Run test. + self.helper(content, "Header1", expected) + + def test5(self) -> None: + # Prepare inputs. + content = _get_markdown_no_header_example1() + # Call tested function. + with self.assertRaises(ValueError) as fail: + lines = content.split("\n") + hmarkdo.extract_section_from_markdown(lines, "Header4") + # Check output. + actual = str(fail.exception) + expected = r"Header 'Header4' not found" + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_extract_headers_from_markdown1 +# ############################################################################# + + +class Test_extract_headers_from_markdown1(hunitest.TestCase): + def helper(self, content: str, max_level: int, expected: str) -> None: + """ + Helper method to test extract_headers_from_markdown function. + + :param content: markdown content to extract headers from + :param max_level: maximum header level to extract + :param expected: expected output string representation + """ + # Call function. + lines = content.split("\n") + actual = hmarkdo.extract_headers_from_markdown( + lines, max_level=max_level + ) + # Check output. + self.assert_equal(str(actual), expected) + + def test1(self) -> None: + """ + Test extracting multiple headers with different levels from markdown + content. + """ + # Prepare inputs. + content = _get_markdown_example1() + max_level = 3 + # Prepare outputs. + expected = r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3), HeaderInfo(1, 'Header3', 5)]""" + # Run test. + self.helper(content, max_level, expected) + + def test2(self) -> None: + """ + Test extracting headers from a simple two-level structure. + """ + # Prepare inputs. + content = _get_markdown_example2() + max_level = 3 + # Prepare outputs. + expected = ( + r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3)]""" + ) + # Run test. + self.helper(content, max_level, expected) + + def test3(self) -> None: + # Prepare inputs. + content = r""" + This is some content without any headers. + """ + content = hprint.dedent(content) + # Call function. + lines = content.split("\n") + actual = hmarkdo.extract_headers_from_markdown(lines, max_level=3) + # Check output. + expected: List[str] = [] + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# +# Test_extract_slides_from_markdown1 +# ############################################################################# + + +class Test_extract_slides_from_markdown1(hunitest.TestCase): + def helper(self, content: str, expected: str) -> None: + """ + Helper method to test extract_slides_from_markdown function. + + :param content: markdown content to extract slides from + :param expected: expected output string representation + """ + # Call function. + lines = content.split("\n") + actual = hmarkdo.extract_slides_from_markdown(lines) + # Check output. + self.assert_equal(str(actual), expected) + + def test1(self) -> None: + """ + Test extracting multiple slides from markdown presentation format. + """ + # Prepare inputs. + content = _get_markdown_slides_example1() + # Prepare outputs. + expected = r"""([HeaderInfo(1, 'Slide 1', 3), HeaderInfo(1, 'Slide 2', 8), HeaderInfo(1, 'Slide 3', 11)], 12)""" + # Run test. + self.helper(content, expected) + + def test2(self) -> None: + """ + Test extracting a single slide from markdown presentation format. + """ + # Prepare inputs. + content = _get_markdown_slides_example2() + # Prepare outputs. + expected = r"""([HeaderInfo(1, 'Slide1', 3)], 4)""" + # Run test. + self.helper(content, expected) + + def test3(self) -> None: + # Prepare inputs. + content = _get_markdown_no_header_example1() + # Call function. + lines = content.split("\n") + actual = hmarkdo.extract_slides_from_markdown(lines) + # Check output. + expected = r"""([], 1)""" + self.assert_equal(str(actual), expected) + + +# ############################################################################# +# Test_selected_navigation_to_str1 +# ############################################################################# + + +class Test_selected_navigation_to_str1(hunitest.TestCase): + def test1(self) -> None: + """ + Create navigation bar from Markdown text `_get_markdown_example4()`. + """ + txt = _get_markdown_example4() + header_list_exp = """ + [HeaderInfo(1, 'Chapter 1', 1), + HeaderInfo(2, 'Section 1.1', 6), + HeaderInfo(3, 'Subsection 1.1.1', 11), + HeaderInfo(3, 'Subsection 1.1.2', 23), + HeaderInfo(2, 'Section 1.2', 31), + HeaderInfo(1, 'Chapter 2', 38), + HeaderInfo(2, 'Section 2.1', 43), + HeaderInfo(3, 'Subsection 2.1.1', 48), + HeaderInfo(2, 'Section 2.2', 56)] + """ + header_tree_exp = """ + - Chapter 1 + - Chapter 2 + """ + level = 3 + description = "Subsection 1.1.2" + nav_str_exp = """ + - Chapter 1 + - Section 1.1 + - Subsection 1.1.1 + - **Subsection 1.1.2** + - Section 1.2 + - Chapter 2 + """ + _test_navigation_flow( + self, + txt, + header_list_exp, + header_tree_exp, + level, + description, + nav_str_exp, + ) + + def test2(self) -> None: + txt = _get_markdown_example4() + _test_full_navigation_flow(self, txt) + + +# ############################################################################# +# Test_selected_navigation_to_str2 +# ############################################################################# + + +class Test_selected_navigation_to_str2(hunitest.TestCase): + def test1(self) -> None: + """ + Create navigation bar from Markdown text `_get_markdown_example5()`. + """ + txt = _get_markdown_example5() + header_list_exp = r""" + [HeaderInfo(1, 'Models', 1), + HeaderInfo(2, 'Naive Bayes', 3), + HeaderInfo(2, 'Decision trees', 5), + HeaderInfo(2, 'Random forests', 7), + HeaderInfo(2, 'Linear models', 8)] + """ + header_tree_exp = """ + - Models + """ + level = 2 + description = "Decision trees" + nav_str_exp = """ + - Models + - Naive Bayes + - **Decision trees** + - Random forests + - Linear models + """ + _test_navigation_flow( + self, + txt, + header_list_exp, + header_tree_exp, + level, + description, + nav_str_exp, + ) + + def test2(self) -> None: + txt = _get_markdown_example5() + _test_full_navigation_flow(self, txt) + + +# ############################################################################# +# Test_modify_header_level1 +# ############################################################################# + + +class Test_modify_header_level1(hunitest.TestCase): + def helper( + self, input_lines: List[str], level: int, expected_lines: List[str] + ) -> None: + """ + Helper method to test `modify_header_level` function. + + :param input_lines: list of input text lines + :param level: level adjustment to apply + :param expected_lines: list of expected output lines + """ + # Prepare inputs. + input_text = "\n".join(input_lines) + # Call tested function. + actual_lines = hmarkdo.modify_header_level(input_lines, level) + actual = "\n".join(actual_lines) + # Check output. + expected = "\n".join(expected_lines) + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test the inputs to increase headings. + """ + # Prepare inputs and outputs. + input_lines = [ + "# Chapter 1", + "## Section 1.1", + "### Subsection 1.1.1", + "#### Sub-subsection 1.1.1.1", + ] + level = 1 + expected_lines = [ + "## Chapter 1", + "### Section 1.1", + "#### Subsection 1.1.1", + "##### Sub-subsection 1.1.1.1", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test2(self) -> None: + """ + Test inputs to increase headings with level 5 becoming level 6. + """ + # Prepare inputs and outputs. + input_lines = ["# Chapter 1", "##### Sub-sub-subsection 1.1.1.1.1"] + level = 1 + expected_lines = ["## Chapter 1", "###### Sub-sub-subsection 1.1.1.1.1"] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test3(self) -> None: + """ + Test inputs to increase headings including a paragraph which remains + unchanged. + """ + # Prepare inputs and outputs. + input_lines = ["# Chapter 1", "Paragraph 1"] + level = 1 + expected_lines = ["## Chapter 1", "Paragraph 1"] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test4(self) -> None: + """ + Test inputs of paragraphs which remain unchanged. + """ + # Prepare inputs and outputs. + input_lines = ["Paragraph 1", "Paragraph 2"] + level = 1 + expected_lines = ["Paragraph 1", "Paragraph 2"] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test5(self) -> None: + """ + Test to increase headings with mixed levels. + """ + # Prepare inputs and outputs. + input_lines = [ + "# Chapter 1", + "##### Sub-sub-subsection 1.1.1.1.1", + "# Chapter 2", + "### Subsection 2.1", + "# Chapter 3", + ] + level = 1 + expected_lines = [ + "## Chapter 1", + "###### Sub-sub-subsection 1.1.1.1.1", + "## Chapter 2", + "#### Subsection 2.1", + "## Chapter 3", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test6(self) -> None: + """ + Test the inputs to decrease headings. + """ + # Prepare inputs and outputs. + input_lines = [ + "## Section 1.1", + "### Subsection 1.1.1", + "#### Sub-subsection 1.1.1.1", + "##### Sub-sub-subsection 1.1.1.1.1", + ] + level = -1 + expected_lines = [ + "# Section 1.1", + "## Subsection 1.1.1", + "### Sub-subsection 1.1.1.1", + "#### Sub-sub-subsection 1.1.1.1.1", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test7(self) -> None: + """ + Test inputs to decrease headings by one level. + """ + # Prepare inputs and outputs. + input_lines = [ + "## Chapter 1", + "##### Sub-subsection 1.1.1.1", + ] + level = -1 + expected_lines = [ + "# Chapter 1", + "#### Sub-subsection 1.1.1.1", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test8(self) -> None: + """ + Test inputs of paragraphs which remain unchanged. + """ + # Prepare inputs and outputs. + input_lines = ["Paragraph 1", "Paragraph 2", "Paragraph 3"] + level = -1 + expected_lines = ["Paragraph 1", "Paragraph 2", "Paragraph 3"] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test9(self) -> None: + """ + Test increasing headers by 2 levels. + """ + # Prepare inputs and outputs. + input_lines = [ + "# Chapter 1", + "## Section 1.1", + "### Subsection 1.1.1", + ] + level = 2 + expected_lines = [ + "### Chapter 1", + "#### Section 1.1", + "##### Subsection 1.1.1", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test10(self) -> None: + """ + Test decreasing headers by 2 levels. + """ + # Prepare inputs and outputs. + input_lines = [ + "### Chapter 1", + "#### Section 1.1", + "##### Subsection 1.1.1", + ] + level = -2 + expected_lines = [ + "# Chapter 1", # 3-2=1 + "## Section 1.1", # 4-2=2 + "### Subsection 1.1.1", # 5-2=3 + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test11(self) -> None: + """ + Test increasing headers by 2 levels. + """ + # Prepare inputs and outputs. + input_lines = [ + "### Level 3", + "#### Level 4", + ] + level = 2 + expected_lines = [ + "##### Level 3", # 3+2=5 + "###### Level 4", # 4+2=6 + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + +# ############################################################################# +# Test_format_headers1 +# ############################################################################# + + +class Test_format_headers1(hunitest.TestCase): + def helper( + self, input_text: List[str], expected: List[str], max_lev: int + ) -> None: + """ + Process the given text with a specified maximum level and compare the + result with the expected output. + + :param input_text: the text to be processed + :param expected: the expected output after processing the text + :param max_lev: the maximum heading level to be formatted + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + write_file = os.path.join(scratch_dir, "write_file.txt") + # Call tested function. + hmarkdo.format_headers(input_text, write_file, max_lev=max_lev) + # Check output. + actual = hio.from_file(write_file) + self.assertEqual(actual, "\n".join(expected)) + + def test1(self) -> None: + """ + Test the inputs to check the basic formatting of headings. + """ + input_text = [ + "# Chapter 1", + "section text", + ] + expected = [ + "# #############################################################################", + "# Chapter 1", + "# #############################################################################", + "section text", + ] + self.helper(input_text, expected, max_lev=1) + + def test2(self) -> None: + """ + Test inputs with headings beyond the maximum level to ensure they are + ignored during formatting. + """ + input_text = [ + "# Chapter 1", + "## Section 1.1", + "### Section 1.1.1", + ] + expected = [ + "# #############################################################################", + "# Chapter 1", + "# #############################################################################", + "## ############################################################################", + "## Section 1.1", + "## ############################################################################", + "### Section 1.1.1", + ] + self.helper(input_text, expected, max_lev=2) + + def test3(self) -> None: + """ + Test the inputs to check that markdown line separators are removed. + """ + input_text = [ + "# Chapter 1", + "-----------------", + "Text", + "############", + ] + expected = [ + "# #############################################################################", + "# Chapter 1", + "# #############################################################################", + "Text", + ] + self.helper(input_text, expected, max_lev=1) + + def test4(self) -> None: + """ + Test inputs where max_level is inferred from the file content. + """ + input_text = [ + "# Chapter 1", + "max_level=1", + "## Section 1.1", + ] + expected = [ + "# #############################################################################", + "# Chapter 1", + "# #############################################################################", + "max_level=1", + "## Section 1.1", + ] + self.helper(input_text, expected, max_lev=2) + + def test5(self) -> None: + """ + Test inputs with no headers to ensure they remain unchanged. + """ + input_text = [ + "Only text", + "No headings", + ] + expected = [ + "Only text", + "No headings", + ] + self.helper(input_text, expected, max_lev=3) + + +# ############################################################################# +# Test_sanity_check_header_list1 +# ############################################################################# + + +class Test_sanity_check_header_list1(hunitest.TestCase): + def test1(self) -> None: + """ + Test that the header list with valid level increase is accepted. + """ + # Prepare inputs. + header_list = get_header_list1() + # Call function. + hmarkdo.sanity_check_header_list(header_list) + + def test2(self) -> None: + """ + Test that the header list with an increase of more than one level + raises an error. + """ + # Prepare inputs. + header_list = get_header_list4() + # Call function. + with self.assertRaises(ValueError) as err: + hmarkdo.sanity_check_header_list(header_list) + # Check output. + actual = str(err.exception) + self.check_string(actual) + + def test3(self) -> None: + """ + Test that the header list is accepted when heading levels decrease by + more than one. + """ + # Prepare inputs. + header_list = get_header_list5() + # Call function. + hmarkdo.sanity_check_header_list(header_list) + + +# ############################################################################# +# Test__has_internal_capitals1 +# ############################################################################# + + +class Test__has_internal_capitals1(hunitest.TestCase): + """ + Test `_has_internal_capitals` function. + """ + + def helper(self, word: str, expected: bool) -> None: + """ + Test helper for `_has_internal_capitals`. + + :param word: word to test + :param expected: expected result + """ + # Run test. + actual = hmarhead._has_internal_capitals(word) + # Check outputs. + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test word with internal capital letters. + """ + # Prepare inputs. + word = "SimpleFeedForward" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test2(self) -> None: + """ + Test word with multiple internal capital letters. + """ + # Prepare inputs. + word = "DeepNPTS" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test3(self) -> None: + """ + Test word with capital only at the start. + """ + # Prepare inputs. + word = "Machine" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test4(self) -> None: + """ + Test all lowercase word. + """ + # Prepare inputs. + word = "learning" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test5(self) -> None: + """ + Test all uppercase word. + """ + # Prepare inputs. + word = "ML" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test6(self) -> None: + """ + Test single lowercase character. + """ + # Prepare inputs. + word = "a" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test7(self) -> None: + """ + Test single uppercase character. + """ + # Prepare inputs. + word = "A" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test8(self) -> None: + """ + Test empty string. + """ + # Prepare inputs. + word = "" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test9(self) -> None: + """ + Test camelCase word. + """ + # Prepare inputs. + word = "camelCase" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + +# ############################################################################# +# Test_capitalize_header1 +# ############################################################################# + + +class Test_capitalize_header1(hunitest.TestCase): + def helper(self, txt: str, expected: str) -> None: + # Prepare inputs. + txt = hprint.dedent(txt) + # Run function. + lines = txt.split("\n") + actual_lines = hmarkdo.capitalize_header(lines) + actual = "\n".join(actual_lines) + # Check outputs. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test capitalizing a short two-word title. + """ + txt = r""" + * ML theory + """ + expected = r""" + * ML Theory + """ + self.helper(txt, expected) + + def test2(self) -> None: + """ + Test capitalizing a longer multi-word title. + """ + txt = r""" + * A map of machine learning + """ + expected = r""" + * A Map of Machine Learning + """ + self.helper(txt, expected) + + def test3(self) -> None: + """ + Test that strings inside backticks are preserved. + """ + txt = r""" + # Using `python` for Machine Learning + """ + expected = r""" + # Using `python` for Machine Learning + """ + self.helper(txt, expected) + + def test4(self) -> None: + """ + Test that strings inside single quotes are preserved. + """ + txt = r""" + * Working with 'machine learning' algorithms + """ + expected = r""" + * Working with 'machine learning' Algorithms + """ + self.helper(txt, expected) + + def test5(self) -> None: + """ + Test that strings inside double quotes are preserved. + """ + txt = r""" + # Understanding "deep learning" concepts + """ + expected = r""" + # Understanding "deep learning" Concepts + """ + self.helper(txt, expected) + + def test6(self) -> None: + """ + Test mixed usage of quotes and backticks. + """ + txt = r""" + * Using `python` and "machine learning" for 'data science' + """ + expected = r""" + * Using `python` and "machine learning" for 'data science' + """ + self.helper(txt, expected) + + def test7(self) -> None: + """ + Test complex title with various quote types. + """ + txt = r""" + # Introduction to `sklearn` and "data preprocessing" in 'python' + """ + expected = r""" + # Introduction to `sklearn` and "data preprocessing" in 'python' + """ + self.helper(txt, expected) + + def test8(self) -> None: + """ + Test that words with internal capitals are preserved. + """ + txt = r""" + # SimpleFeedForward model + """ + expected = r""" + # SimpleFeedForward Model + """ + self.helper(txt, expected) + + def test9(self) -> None: + """ + Test multiple words with internal capitals. + """ + txt = r""" + * DeepNPTS and SimpleFeedForward models + """ + expected = r""" + * DeepNPTS and SimpleFeedForward Models + """ + self.helper(txt, expected) + + def test10(self) -> None: + """ + Test mixed normal words and words with internal capitals. + """ + txt = r""" + # Using SimpleFeedForward for machine learning + """ + expected = r""" + # Using SimpleFeedForward for Machine Learning + """ + self.helper(txt, expected) + + def test11(self) -> None: + """ + Test that headers inside fenced code blocks are not processed. + """ + txt = r""" + # Main header + + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + ``` + + ## Another header + """ + expected = r""" + # Main Header + + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + ``` + + ## Another Header + """ + self.helper(txt, expected) + + def test12(self) -> None: + """ + Test headers inside multiple fenced code blocks are not processed. + """ + txt = r""" + # First header + + ```python + # comment in code + x = 1 + ``` + + ## Second header + + ```bash + # shell comment + echo "hello" + ``` + """ + expected = r""" + # First Header + + ```python + # comment in code + x = 1 + ``` + + ## Second Header + + ```bash + # shell comment + echo "hello" + ``` + """ + self.helper(txt, expected) + + def test13(self) -> None: + """ + Test that the first word after a numeric prefix is capitalized. + """ + txt = r""" + ## 4.4 the Victim Triangle + """ + expected = r""" + ## 4.4 The Victim Triangle + """ + self.helper(txt, expected) + + def test14(self) -> None: + """ + Test that "of", "a", "an" after a numeric prefix are capitalized. + """ + txt = r""" + ## 1.1 of mice and men + """ + expected = r""" + ## 1.1 Of Mice and Men + """ + self.helper(txt, expected) + + def test15(self) -> None: + """ + Test that "of", "a", "an" are capitalized. + """ + txt = r""" + ## of mice and men + """ + expected = r""" + ## Of Mice and Men + """ + self.helper(txt, expected) + + +# ############################################################################# +# Test_capitalize_header2 +# ############################################################################# + + +class Test_capitalize_header2(hunitest.TestCase): + """ + Test enhanced capitalize_header functionality for mixed case words and + fenced blocks. + """ + + def helper(self, txt: str, expected: str) -> None: + """ + Helper method to test capitalize_header function. + + :param txt: input text to process + :param expected: expected output after processing + """ + # Prepare inputs. + txt = hprint.dedent(txt) + # Run function. + lines = txt.split("\n") + actual_lines = hmarkdo.capitalize_header(lines) + actual = "\n".join(actual_lines) + # Check outputs. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test that SimpleFeedForward is preserved as-is. + """ + txt = r""" + # using SimpleFeedForward for predictions + """ + expected = r""" + # Using SimpleFeedForward for Predictions + """ + self.helper(txt, expected) + + def test2(self) -> None: + """ + Test that DeepNPTS is preserved as-is. + """ + txt = r""" + # training with DeepNPTS model + """ + expected = r""" + # Training with DeepNPTS Model + """ + self.helper(txt, expected) + + def test3(self) -> None: + """ + Test multiple mixed case words in the same header. + """ + txt = r""" + # comparing SimpleFeedForward and DeepNPTS models + """ + expected = r""" + # Comparing SimpleFeedForward and DeepNPTS Models + """ + self.helper(txt, expected) + + def test4(self) -> None: + """ + Test mixed case words combined with all caps words. + """ + txt = r""" + # using API with SimpleFeedForward for ML tasks + """ + expected = r""" + # Using API with SimpleFeedForward for ML Tasks + """ + self.helper(txt, expected) + + def test5(self) -> None: + """ + Test mixed case word as the first word in header. + """ + txt = r""" + # SimpleFeedForward network architecture + """ + expected = r""" + # SimpleFeedForward Network Architecture + """ + self.helper(txt, expected) + + def test6(self) -> None: + """ + Test that headers inside fenced blocks are not capitalized. + """ + txt = r""" + # Main header + Some text + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + ``` + """ + expected = r""" + # Main Header + Some text + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + ``` + """ + self.helper(txt, expected) + + def test7(self) -> None: + """ + Test that multiple headers inside fenced blocks are not capitalized. + """ + txt = r""" + # introduction to forecasting + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + q75 = forecast.quantile(0.75) + + # 90% confidence interval + q05 = forecast.quantile(0.05) + q95 = forecast.quantile(0.95) + + # mean and median + mean = forecast.mean + median = forecast.quantile(0.5) + ``` + # conclusion + """ + expected = r""" + # Introduction to Forecasting + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + q75 = forecast.quantile(0.75) + + # 90% confidence interval + q05 = forecast.quantile(0.05) + q95 = forecast.quantile(0.95) + + # mean and median + mean = forecast.mean + median = forecast.quantile(0.5) + ``` + # Conclusion + """ + self.helper(txt, expected) + + def test8(self) -> None: + """ + Test that headers in fenced blocks with language specifier are not + capitalized. + """ + txt = r""" + # data processing + ```bash + # run the script + python script.py + ``` + """ + expected = r""" + # Data Processing + ```bash + # run the script + python script.py + ``` + """ + self.helper(txt, expected) + + def test9(self) -> None: + """ + Test mixed case words inside fenced blocks are preserved. + """ + txt = r""" + # using SimpleFeedForward model + ```python + # SimpleFeedForward implementation + class SimpleFeedForward: + pass + ``` + """ + expected = r""" + # Using SimpleFeedForward Model + ```python + # SimpleFeedForward implementation + class SimpleFeedForward: + pass + ``` + """ + self.helper(txt, expected) + + def test10(self) -> None: + """ + Test multiple fenced blocks in the same document. + """ + txt = r""" + # first section + ```python + # code block 1 + x = 1 + ``` + # second section + ```python + # code block 2 + y = 2 + ``` + """ + expected = r""" + # First Section + ```python + # code block 1 + x = 1 + ``` + # Second Section + ```python + # code block 2 + y = 2 + ``` + """ + self.helper(txt, expected) + + def test11(self) -> None: + """ + Test that slide titles (starting with *) also preserve mixed case. + """ + txt = r""" + * using SimpleFeedForward for predictions + """ + expected = r""" + * Using SimpleFeedForward for Predictions + """ + self.helper(txt, expected) + + def test12(self) -> None: + """ + Test mixed case words with punctuation. + """ + txt = r""" + # SimpleFeedForward: a neural network approach + """ + expected = r""" + # SimpleFeedForward: a Neural Network Approach + """ + self.helper(txt, expected) + + def test13(self) -> None: + """ + Test that normal words without mixed case are still capitalized + properly. + """ + txt = r""" + # introduction to machine learning + """ + expected = r""" + # Introduction to Machine Learning + """ + self.helper(txt, expected) + + def test14(self) -> None: + """ + Test empty fenced blocks don't cause issues. + """ + txt = r""" + # header before + ``` + ``` + # header after + """ + expected = r""" + # Header Before + ``` + ``` + # Header After + """ + self.helper(txt, expected) + + +# ############################################################################# +# Test_has_mixed_case1 +# ############################################################################# + + +class Test_has_mixed_case1(hunitest.TestCase): + """ + Test the _has_mixed_case helper function. + """ + + def helper(self, word: str, expected: bool) -> None: + """ + Test helper for has_mixed_case. + + :param word: word to test + :param expected: expected result + """ + # Call function. + actual = hmarkdo.has_mixed_case(word) + # Check output. + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test SimpleFeedForward has mixed case. + """ + # Prepare inputs. + word = "SimpleFeedForward" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test2(self) -> None: + """ + Test DeepNPTS has mixed case (all caps after first). + """ + # Prepare inputs. + word = "DeepNPTS" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test3(self) -> None: + """ + Test Machine does not have mixed case (only first char capital). + """ + # Prepare inputs. + word = "Machine" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test4(self) -> None: + """ + Test lowercase word has no mixed case. + """ + # Prepare inputs. + word = "machine" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test5(self) -> None: + """ + Test all caps word has mixed case (caps after first position). + """ + # Prepare inputs. + word = "API" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test6(self) -> None: + """ + Test single character has no mixed case. + """ + # Prepare inputs. + word = "A" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test7(self) -> None: + """ + Test two character word with first capital has no mixed case. + """ + # Prepare inputs. + word = "At" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test8(self) -> None: + """ + Test two character word with both caps has mixed case. + """ + # Prepare inputs. + word = "ML" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test9(self) -> None: + """ + Test camelCase word has mixed case. + """ + # Prepare inputs. + word = "camelCase" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py new file mode 100644 index 000000000..f12ae2d5a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py @@ -0,0 +1,377 @@ +import logging +from typing import List, Tuple, cast + +import helpers.hmarkdown as hmarkdo +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def _to_header_list(data: List[Tuple[int, str]]) -> hmarkdo.HeaderList: + res = [ + hmarkdo.HeaderInfo(level, text, 5 * i + 1) + for i, (level, text) in enumerate(data) + ] + return res + + +def get_header_list6() -> hmarkdo.HeaderList: + """ + - Spelling + - All + - LLM + - Linter + - Python + - Naming + - LLM + - Linter + - Docstrings + - LLM + - Linter + - Unit_tests + - All + - LLM + - Linter + """ + data = [ + (1, "Spelling"), + (2, "All"), + (3, "LLM"), + (3, "Linter"), + (1, "Python"), + (2, "Naming"), + (3, "LLM"), + (3, "Linter"), + (2, "Docstrings"), + (3, "LLM"), + (3, "Linter"), + (1, "Unit_tests"), + (2, "All"), + (3, "LLM"), + (3, "Linter"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_guidelines_txt1() -> str: + txt = r""" + # General + + ## Spelling + + ### LLM + + ### Linter + + - Spell commands in lower case and programs with the first letter in upper case + - E.g., `git` as a command, `Git` as a program + - E.g., capitalize the first letter of `Python` + - Capitalize `JSON`, `CSV`, `DB` and other abbreviations + + # Python + + ## Naming + + ### LLM + + - Name functions using verbs and verbs/actions + - Good: `download_data()`, `process_input()`, `calculate_sum()` + - Good: Python internal functions as `__repr__`, `__init__` are valid + - Good: Functions names like `to_dict()`, `_parse()`, `_main()` are valid + - Name classes using nouns + - Good: `Downloader()`, `DataProcessor()`, `User()` + - Bad: `DownloadStuff()`, `ProcessData()`, `UserActions()` + + ### Linter + + - Name executable Python scripts using verbs and actions + - E.g., `download.py` and not `downloader.py` + + # Unit_tests + + ## Rules + + ### LLM + + - A test class should test only one function or class to help understanding + test failures + - A test method should only test a single case to ensures clarity and + precision in testing + - E.g., "for these inputs the function responds with this output" + """ + txt = hprint.dedent(txt) + txt = cast(str, txt) + return txt + + +# ############################################################################# +# Test_convert_header_list_into_guidelines1 +# ############################################################################# + + +class Test_convert_header_list_into_guidelines1(hunitest.TestCase): + def test1(self) -> None: + """ + Test converting a header list into guidelines. + """ + # Prepare inputs. + header_list = get_header_list6() + # Call function. + guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + # Check output. + actual = "\n".join(map(str, guidelines)) + expected = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + HeaderInfo(1, 'Spelling:All:Linter', 16) + HeaderInfo(1, 'Python:Naming:LLM', 31) + HeaderInfo(1, 'Python:Naming:Linter', 36) + HeaderInfo(1, 'Python:Docstrings:LLM', 46) + HeaderInfo(1, 'Python:Docstrings:Linter', 51) + HeaderInfo(1, 'Unit_tests:All:LLM', 66) + HeaderInfo(1, 'Unit_tests:All:Linter', 71) + """ + self.assert_equal(actual, expected, dedent=True) + + +# ############################################################################# +# Test_extract_rules1 +# ############################################################################# + + +class Test_extract_rules1(hunitest.TestCase): + def helper(self, selection_rules: List[str], expected: str) -> None: + """ + Test extracting rules from a markdown file. + """ + # Prepare inputs. + guidelines = get_header_list6() + guidelines = hmarkdo.convert_header_list_into_guidelines(guidelines) + # Call function. + selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) + # Check output. + actual = "\n".join(map(str, selected_guidelines)) + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["Spelling:*:LLM"] + expected = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + """ + self.helper(selection_rules, expected) + + def test2(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["Spelling:NONE:LLM"] + expected = """ + """ + self.helper(selection_rules, expected) + + def test3(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["Spelling:All:*"] + expected = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + HeaderInfo(1, 'Spelling:All:Linter', 16) + """ + self.helper(selection_rules, expected) + + def test4(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["Spelling:All:*", "Python:*:*"] + expected = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + HeaderInfo(1, 'Spelling:All:Linter', 16) + HeaderInfo(1, 'Python:Naming:LLM', 31) + HeaderInfo(1, 'Python:Naming:Linter', 36) + HeaderInfo(1, 'Python:Docstrings:LLM', 46) + HeaderInfo(1, 'Python:Docstrings:Linter', 51) + """ + self.helper(selection_rules, expected) + + +# ############################################################################# +# Test_parse_rules_from_txt1 +# ############################################################################# + + +class Test_parse_rules_from_txt1(hunitest.TestCase): + def helper(self, text: str, expected: List[str]) -> None: + # Prepare inputs. + text = hprint.dedent(text) + lines = text.split("\n") + # Call function. + actual = hmarkdo.parse_rules_from_txt(lines) + # Check output. + actual = str(actual) + expected = str(expected) + self.assert_equal(actual, expected, dedent=True) + + def test_basic_list1(self) -> None: + """ + Test extracting simple first-level bullet points. + """ + text = """ + - Item 1 + - Item 2 + - Item 3 + """ + expected = ["- Item 1", "- Item 2", "- Item 3"] + self.helper(text, expected) + + def test_nested_list1(self) -> None: + """ + Test extracting bullet points with nested sub-items. + """ + text = """ + - Item 1 + - Item 2 + - Sub-item 2.1 + - Sub-item 2.2 + - Item 3 + """ + expected = [ + "- Item 1", + "- Item 2\n - Sub-item 2.1\n - Sub-item 2.2", + "- Item 3", + ] + self.helper(text, expected) + + def test_empty_list1(self) -> None: + """ + Test handling empty input. + """ + text = "" + expected = [] + self.helper(text, expected) + + +# ############################################################################# +# Test_end_to_end_rules1 +# ############################################################################# + + +class Test_end_to_end_rules1(hunitest.TestCase): + def test_get_header_list1(self) -> None: + """ + Test extracting headers from a markdown file. + """ + # Prepare inputs. + txt = get_guidelines_txt1() + max_level = 4 + # Run function. + lines = txt.split("\n") + header_list = hmarkdo.extract_headers_from_markdown(lines, max_level) + # Check output. + actual = "\n".join(map(str, header_list)) + expected = """ + HeaderInfo(1, 'General', 1) + HeaderInfo(2, 'Spelling', 3) + HeaderInfo(3, 'LLM', 5) + HeaderInfo(3, 'Linter', 7) + HeaderInfo(1, 'Python', 14) + HeaderInfo(2, 'Naming', 16) + HeaderInfo(3, 'LLM', 18) + HeaderInfo(3, 'Linter', 28) + HeaderInfo(1, 'Unit_tests', 33) + HeaderInfo(2, 'Rules', 35) + HeaderInfo(3, 'LLM', 37) + """ + self.assert_equal(actual, expected, dedent=True) + # Run function. + guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + # Check output. + actual = "\n".join(map(str, guidelines)) + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Python:Naming:Linter', 28) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.assert_equal(actual, expected, dedent=True) + + def helper_extract_rules( + self, selection_rules: List[str], expected: str + ) -> None: + """ + Helper function to test extracting rules from a markdown file. + """ + # Prepare inputs. + txt = get_guidelines_txt1() + max_level = 4 + lines = txt.split("\n") + header_list = hmarkdo.extract_headers_from_markdown(lines, max_level) + guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + # Call function. + selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) + # Check output. + actual = "\n".join(map(str, selected_guidelines)) + self.assert_equal(actual, expected, dedent=True) + + def test_extract_rules1(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["General:*:LLM"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules2(self) -> None: + selection_rules = ["General:NONE:LLM"] + expected = """ + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules3(self) -> None: + selection_rules = ["*:*:LLM"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules4(self) -> None: + selection_rules = ["*:*:LLM", "General:*:*"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules5(self) -> None: + selection_rules = ["*:*:*"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Python:Naming:Linter', 28) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules6(self) -> None: + selection_rules = ["*:*:*", "General:*:*"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Python:Naming:Linter', 28) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper_extract_rules(selection_rules, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py new file mode 100644 index 000000000..39137551e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py @@ -0,0 +1,399 @@ +import logging +from typing import List + +import helpers.hmarkdown as hmarkdo +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_process_slides +# ############################################################################# + + +class Test_process_slides(hunitest.TestCase): + @staticmethod + def transform( + slide_text: List[str], + *, + slide_title: str = "", + slide_line_number: int = 0, + ) -> str: + """ + Example adding a `@` to the beginning of each line of the slide. + + :param slide_text: List of lines in the slide + :param slide_title: Title of the slide + :param slide_line_number: Line number of the slide + :return: Transformed text + """ + _LOG.debug("input=\n%s", "\n".join(slide_text)) + # Transform. + text_out = [f"@{line}" for line in slide_text] + _LOG.debug("output=\n%s", "\n".join(text_out)) + return text_out + + def helper(self, text: str, expected: str) -> None: + """ + Test helper for process_slides. + + :param text: Input text with slides + :param expected: Expected output after transformation + """ + # Prepare inputs. + text = hprint.dedent(text, remove_lead_trail_empty_lines_=False) + # Process. + actual = hmarkdo.process_slides(text, self.transform) + # Check output. + expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=False) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test multiple slides. + """ + text = """ + * Slide 1 + - Point 1 + - Point 2 + + * Slide 2 + - Point A + - Point B + """ + expected = """ + @* Slide 1 + @ - Point 1 + @ - Point 2 + @ + @* Slide 2 + @ - Point A + @ - Point B + """ + self.helper(text, expected) + + def test2(self) -> None: + """ + Test single line slide. + """ + text = """ + * Single line slide + """ + expected = """ + @* Single line slide + """ + self.helper(text, expected) + + def test3(self) -> None: + """ + Test slide with inline comment. + """ + text = """ + * Slide with comment + # This is a comment + - Point 1 + """ + expected = """ + @* Slide with comment + @ # This is a comment + @ - Point 1 + """ + self.helper(text, expected) + + def test4(self) -> None: + """ + Test slide with comment block. + """ + text = """ + * Slide with block + + - Point 1 + """ + expected = """ + @* Slide with block + @ + @ - Point 1 + """ + self.helper(text, expected) + + def test5(self) -> None: + text = """ + * Slide 1 + * Slide 2 + """ + expected = """ + @* Slide 1 + @* Slide 2 + """ + self.helper(text, expected) + + def test6(self) -> None: + text = """ + + * Slide 1 + * Slide 2 + """ + expected = """ + + @* Slide 1 + @* Slide 2 + """ + self.helper(text, expected) + + def test7(self) -> None: + text = """ + + * Slide 1 + * Slide 2 + + """ + expected = """ + + @* Slide 1 + @* Slide 2 + @ + """ + self.helper(text, expected) + + def test8(self) -> None: + text = """ + //* Slide 1 + * Slide 2 + + """ + expected = """ + //* Slide 1 + @* Slide 2 + @ + """ + self.helper(text, expected) + + +# ############################################################################# +# Test_convert_slide_to_markdown +# ############################################################################# + + +class Test_convert_slide_to_markdown(hunitest.TestCase): + """ + Test converting slide bullets to markdown headers. + """ + + def helper(self, input_text, expected_text) -> None: + """ + Test helper for convert_slide_to_markdown. + + :param input_text: Input text with slide bullets + :param expected_text: Expected output with markdown headers + """ + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual = hmarkdo.convert_slide_to_markdown(lines) + actual = "\n".join(actual) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test converting a simple slide bullet to markdown header. + """ + input_text = """* This is a slide title""" + expected_text = """##### This is a slide title""" + self.helper(input_text, expected_text) + + def test2(self) -> None: + """ + Test converting multiple slide bullets. + """ + input_text = """ + * First slide + - Some content + * Second slide + - More content + """ + expected_text = """ + ##### First slide + - Some content + ##### Second slide + - More content + """ + self.helper(input_text, expected_text) + + def test3(self) -> None: + """ + Test converting slides mixed with other content. + """ + input_text = """ + Some intro text + * Slide title + - Point 1 + - Point 2 + Regular markdown text + * Another slide + """ + expected_text = """ + Some intro text + ##### Slide title + - Point 1 + - Point 2 + Regular markdown text + ##### Another slide + """ + self.helper(input_text, expected_text) + + def test4(self) -> None: + """ + Test converting text with no slide bullets. + """ + input_text = """ + Regular text + More text + - Regular bullet point + """ + expected_text = """ + Regular text + More text + - Regular bullet point + """ + self.helper(input_text, expected_text) + + def test5(self) -> None: + """ + Test converting empty input. + """ + input_text = "" + expected_text = "" + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_convert_markdown_to_slide +# ############################################################################# + + +class Test_convert_markdown_to_slide(hunitest.TestCase): + """ + Test converting markdown headers to slide bullets. + """ + + def helper(self, input_text: str, expected_text: str) -> None: + """ + Test helper for convert_markdown_to_slide. + + :param input_text: Input text with markdown headers + :param expected_text: Expected output with slide bullets + """ + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual = hmarkdo.convert_markdown_to_slide(lines) + actual = "\n".join(actual) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test converting a simple h5 header to slide bullet. + """ + input_text = """ + ##### This is a slide title + """ + expected_text = """ + * This is a slide title + """ + self.helper(input_text, expected_text) + + def test2(self) -> None: + """ + Test converting multiple h5 headers. + """ + input_text = """ + ##### First slide + - Some content + ##### Second slide + - More content + """ + expected_text = """ + * First slide + - Some content + * Second slide + - More content + """ + self.helper(input_text, expected_text) + + def test3(self) -> None: + """ + Test converting headers mixed with other content. + """ + input_text = """ + Some intro text + ##### Slide title + - Point 1 + - Point 2 + Regular markdown text + ##### Another slide + """ + expected_text = """ + Some intro text + * Slide title + - Point 1 + - Point 2 + Regular markdown text + * Another slide + """ + self.helper(input_text, expected_text) + + def test4(self) -> None: + """ + Test converting text with no h5 headers. + """ + input_text = """ + Regular text + # H1 header + ## H2 header + #### H4 header + """ + expected_text = """ + Regular text + # H1 header + ## H2 header + #### H4 header + """ + self.helper(input_text, expected_text) + + def test5(self) -> None: + """ + Test converting empty input. + """ + input_text = "" + expected_text = "" + self.helper(input_text, expected_text) + + def test6(self) -> None: + """ + Test that converting slide to markdown and back gives original result. + """ + # Prepare inputs. + input_text = """ + * First slide + - Some content + * Second slide + Regular text + """ + original_lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + markdown_lines = hmarkdo.convert_slide_to_markdown(original_lines) + roundtrip_lines = hmarkdo.convert_markdown_to_slide(markdown_lines) + # Check outputs. + self.assert_equal(str(roundtrip_lines), str(original_lines)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py new file mode 100644 index 000000000..f651aa3bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py @@ -0,0 +1,196 @@ +import logging +import pprint +from typing import Dict, List + +import helpers.hmarkdown_tables as hmartabl +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_replace_tables_with_tags1 +# ############################################################################# + + +class Test_replace_tables_with_tags1(hunitest.TestCase): + def helper( + self, text: str, expected_lines: List[str], expected_map: Dict[str, str] + ) -> None: + """ + Test replacing markdown tables with tags. + """ + lines = hprint.dedent(text, remove_lead_trail_empty_lines_=True) + lines = lines.split("\n") + # Call function. + actual_lines, table_map = hmartabl.replace_tables_with_tags(lines) + # Check output. + table_map_as_str = pprint.pformat(table_map) + expected_map_as_str = pprint.pformat(expected_map) + self.assert_equal(table_map_as_str, expected_map_as_str) + # + actual_lines = "\n".join(actual_lines) + expected_lines = hprint.dedent( + expected_lines, remove_lead_trail_empty_lines_=True + ) + self.assert_equal(actual_lines, expected_lines) + + def helper_round_trip(self, text: str) -> None: + """ + Test the round trip. + """ + # Do the round trip. + lines = text.split("\n") + actual_lines, table_map = hmartabl.replace_tables_with_tags(lines) + act_text = hmartabl.replace_tags_with_tables(actual_lines, table_map) + # Check output. + act_text = "\n".join(act_text) + self.assert_equal(act_text, text) + + def test1(self) -> None: + """ + Test replacing simple markdown table with tags. + """ + # Prepare inputs. + text = """ + Some text before + | Column 1 | Column 2 | + |----------|----------| + | Value 1 | Value 2 | + | Value 3 | Value 4 | + Text between tables + | Name | Age | City | + |------|-----|------| + | John | 25 | NYC | + Some text after + """ + # Prepare outputs. + expected_lines = """ + Some text before + + Text between tables + + Some text after + """ + # Check table map. + expected_map = { + "1": "| Column 1 | Column 2 |\n|----------|----------|\n| Value 1 | Value 2 |\n| Value 3 | Value 4 |", + "2": "| Name | Age | City |\n|------|-----|------|\n| John | 25 | NYC |", + } + self.helper(text, expected_lines, expected_map) + + def test2(self) -> None: + """ + Test table with alignment indicators. + """ + text = """ + | Left | Center | Right | + |:-----|:------:|------:| + | L1 | C1 | R1 | + | L2 | C2 | R2 | + """ + expected_lines = """ + + """ + expected_map = { + "1": "| Left | Center | Right |\n|:-----|:------:|------:|\n| L1 | C1 | R1 |\n| L2 | C2 | R2 |" + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test3(self) -> None: + """ + Test table with minimal structure. + """ + text = """ + Before + | A | B | + |---|---| + | 1 | 2 | + After + """ + expected_lines = """ + Before + + After + """ + expected_map = {"1": "| A | B |\n|---|---|\n| 1 | 2 |"} + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test4(self) -> None: + """ + Test table with empty cells. + """ + text = """ + | Col1 | Col2 | Col3 | + |------|------|------| + | A | | C | + | | B | | + """ + expected_lines = """ + + """ + expected_map = { + "1": "| Col1 | Col2 | Col3 |\n|------|------|------|\n| A | | C |\n| | B | |" + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test5(self) -> None: + """ + Test multiple tables with different column counts. + """ + text = """ + First table: + | A | B | + |---|---| + | 1 | 2 | + + Second table: + | X | Y | Z | W | + |---|---|---|---| + | a | b | c | d | + | e | f | g | h | + """ + expected_lines = """ + First table: + + + Second table: + + """ + expected_map = { + "1": "| A | B |\n|---|---|\n| 1 | 2 |", + "2": "| X | Y | Z | W |\n|---|---|---|---|\n| a | b | c | d |\n| e | f | g | h |", + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test6(self) -> None: + """ + Test table with indentation. + """ + text = """ + Outside + | Col1 | Col2 | + |------|------| + | Val1 | Val2 | + End + """ + expected_lines = """ + Outside + + End + """ + expected_map = { + "1": " | Col1 | Col2 |\n |------|------|\n | Val1 | Val2 |" + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py new file mode 100644 index 000000000..fc88b62a1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py @@ -0,0 +1,228 @@ +import logging + +import helpers.hmarkdown as hmarkdo +import helpers.hmarkdown_toc as hmartoc +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_extract_yaml_frontmatter1 +# ############################################################################# + + +class Test_extract_yaml_frontmatter1(hunitest.TestCase): + """ + Test the extract_yaml_frontmatter function. + """ + + def helper( + self, + txt: str, + expected_frontmatter: list, + expected_remaining: list, + ) -> None: + """ + Test helper for extract_yaml_frontmatter. + + :param txt: Input text to process + :param expected_frontmatter: Expected front matter lines + :param expected_remaining: Expected remaining lines + """ + # Prepare inputs. + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Run test. + frontmatter, remaining = hmartoc.extract_yaml_frontmatter(lines) + # Check outputs. + self.assertEqual(frontmatter, expected_frontmatter) + self.assertEqual(remaining, expected_remaining) + + def test1(self) -> None: + """ + Test extracting YAML front matter from a file. + """ + # Prepare inputs. + txt = """ + --- + title: My Document + date: 2024-01-01 + --- + # Content + This is the main content. + """ + # Prepare outputs. + expected_frontmatter = [ + "---", + "title: My Document", + "date: 2024-01-01", + "---", + ] + expected_remaining = ["# Content", "This is the main content."] + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + def test2(self) -> None: + """ + Test processing a file without YAML front matter. + """ + # Prepare inputs. + txt = """ + # Content + This is the main content. + """ + # Prepare outputs. + expected_frontmatter = [] + expected_remaining = ["# Content", "This is the main content."] + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + def test3(self) -> None: + """ + Test handling incomplete YAML front matter (missing closing delimiter). + """ + # Prepare inputs. + txt = """ + --- + title: My Document + # Content without closing delimiter + """ + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Prepare outputs. + expected_frontmatter = [] + expected_remaining = lines + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + def test4(self) -> None: + """ + Test extracting empty YAML front matter. + """ + # Prepare inputs. + txt = """ + --- + --- + # Content + """ + # Prepare outputs. + expected_frontmatter = ["---", "---"] + expected_remaining = ["# Content"] + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + def test5(self) -> None: + """ + Test that separators not at the beginning are not treated as front matter. + """ + # Prepare inputs. + txt = """ + # Content + --- + More content + """ + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Prepare outputs. + expected_frontmatter = [] + expected_remaining = lines + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + +# ############################################################################# +# Test_remove_table_of_contents1 +# ############################################################################# + + +class Test_remove_table_of_contents1(hunitest.TestCase): + def test1(self) -> None: + """ + Test removing table of contents from markdown text. + """ + # Prepare inputs. + text = """ + # Introduction + + This is an introduction. + + + - [Section 1](#section-1) + - [Section 2](#section-2) + + + ## Section 1 + + Content of section 1. + """ + expected = """ + # Introduction + + This is an introduction. + + + + ## Section 1 + + Content of section 1. + """ + text = hprint.dedent(text) + # Run test. + actual = hmarkdo.remove_table_of_contents(text) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test2(self) -> None: + """ + Test text without table of contents remains unchanged. + """ + # Prepare inputs. + text = """ + # Introduction + + This is an introduction. + + ## Section 1 + + Content of section 1. + """ + text = hprint.dedent(text) + # Run test. + actual = hmarkdo.remove_table_of_contents(text) + # Check output. + self.assert_equal(actual, text) + + def test3(self) -> None: + """ + Test removing multi-line table of contents. + """ + # Prepare inputs. + text = """ + # Introduction + + + - [Section 1](#section-1) + - [Subsection 1.1](#subsection-11) + - [Section 2](#section-2) + - [Subsection 2.1](#subsection-21) + - [Subsection 2.2](#subsection-22) + + + ## Section 1 + """ + expected = """ + # Introduction + + + + ## Section 1 + """ + text = hprint.dedent(text) + # Run test. + actual = hmarkdo.remove_table_of_contents(text) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py new file mode 100644 index 000000000..16f0f097a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py @@ -0,0 +1,394 @@ +import logging + +import helpers.hmkdocs as hmkdocs +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_dedent_python_code_blocks1 +# ############################################################################# + + +class Test_dedent_python_code_blocks1(hunitest.TestCase): + def test_simple_code_block(self) -> None: + """ + Test dedenting a simple Python code block. + """ + # Prepare inputs. + text = """ + # Example + + ```python + def hello(): + print("Hello") + ``` + """ + expected = """ + # Example + + ```python + def hello(): + print("Hello") + ``` + """ + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # Run test. + actual = hmkdocs.dedent_python_code_blocks(text) + # Check output. + self.assert_equal(actual, expected) + + def test_multiple_code_blocks(self) -> None: + """ + Test dedenting multiple Python code blocks. + """ + # Prepare inputs. + text = """ + # Example 1 + + ```python + def hello(): + print("Hello") + ``` + + # Example 2 + + ```python + def goodbye(): + print("Goodbye") + ``` + """ + expected = """ + # Example 1 + + ```python + def hello(): + print("Hello") + ``` + + # Example 2 + + ```python + def goodbye(): + print("Goodbye") + ``` + """ + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # Run test. + actual = hmkdocs.dedent_python_code_blocks(text) + # Check output. + self.assert_equal(actual, expected) + + def test_no_python_blocks(self) -> None: + """ + Test text without Python code blocks remains unchanged. + """ + # Prepare inputs. + text = """ + # Example + + This is just text. + + ```javascript + console.log("Hello"); + ``` + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.dedent_python_code_blocks(text) + # Check output. + self.assert_equal(actual, text) + + def test_already_aligned_code(self) -> None: + """ + Test code that is already aligned. + """ + # Prepare inputs. + text = """ + # Example + + ```python + def hello(): + print("Hello") + ``` + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.dedent_python_code_blocks(text) + # Check output. + self.assert_equal(actual, text) + + +# ############################################################################# +# Test_replace_indentation1 +# ############################################################################# + + +class Test_replace_indentation1(hunitest.TestCase): + def test_two_to_four_spaces(self) -> None: + """ + Test replacing 2-space indentation with 4-space indentation. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + - Sub item 2 + """ + expected = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + - Sub item 2 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=4 + ) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test_four_to_two_spaces(self) -> None: + """ + Test replacing 4-space indentation with 2-space indentation. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + - Sub item 2 + """ + expected = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + - Sub item 2 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=4, output_spaces=2 + ) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test_two_to_eight_spaces(self) -> None: + """ + Test replacing 2-space indentation with 8-space indentation. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + expected = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=8 + ) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test_three_to_six_spaces(self) -> None: + """ + Test replacing 3-space indentation with 6-space indentation. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + expected = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=3, output_spaces=6 + ) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test_no_indentation(self) -> None: + """ + Test text without indentation remains unchanged. + """ + # Prepare inputs. + text = """ + - Item 1 + - Item 2 + - Item 3 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=4 + ) + # Check output. + self.assert_equal(actual, text) + + def test_same_input_output_spaces(self) -> None: + """ + Test that using same input and output spaces leaves text unchanged. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=2 + ) + # Check output. + self.assert_equal(actual, text) + + def test_empty_text(self) -> None: + """ + Test empty text handling. + """ + # Prepare inputs. + text = "" + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=4 + ) + # Check output. + self.assert_equal(actual, text) + + def test_zero_to_four_spaces(self) -> None: + """ + Test converting zero indentation to 4 spaces (edge case). + """ + # Prepare inputs. + text = """ + Item 1 + Item 2 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=1, output_spaces=4 + ) + # Check output. + self.assert_equal(actual, text) + + +# ############################################################################# +# Test_preprocess_mkdocs_markdown1 +# ############################################################################# + + +class Test_preprocess_mkdocs_markdown1(hunitest.TestCase): + def test_full_preprocessing(self) -> None: + """ + Test the complete preprocessing pipeline. + """ + # Prepare inputs. + text = """ + # Introduction + + + - [Section 1](#section-1) + - [Section 2](#section-2) + + + ## Section 1 + + Here is some Python code: + + ```python + def example(): + print("Hello") + if True: + print("World") + ``` + + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + """ + expected = """ + # Introduction + + + + ## Section 1 + + Here is some Python code: + + ```python + def example(): + print("Hello") + if True: + print("World") + ``` + + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + """ + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # Run test. + actual = hmkdocs.preprocess_mkdocs_markdown(text) + # Check output. + self.assert_equal(actual, expected) + + def test_empty_text(self) -> None: + """ + Test preprocessing empty text. + """ + # Prepare inputs. + text = "" + # Run test. + actual = hmkdocs.preprocess_mkdocs_markdown(text) + # Check output. + self.assert_equal(actual, text) + + def test_text_without_preprocessing_needs(self) -> None: + """ + Test text that doesn't need any preprocessing. + """ + # Prepare inputs. + text = """ + # Simple Markdown + + This is just simple text. + + - Item 1 + - Item 2 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.preprocess_mkdocs_markdown(text) + # Check output. + self.assert_equal(actual, text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py new file mode 100644 index 000000000..abb48a154 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py @@ -0,0 +1,25 @@ +import logging + +import helpers.hmodule as hmodule +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_hmodule1 +# ############################################################################# + + +class Test_hmodule1(hunitest.TestCase): + def test_has_module1(self) -> None: + """ + Check that the function returns true for the existing package. + """ + self.assertTrue(hmodule.has_module("numpy")) + + def test_has_not_module1(self) -> None: + """ + Check that the function returns false for the non-existing package. + """ + self.assertFalse(hmodule.has_module("no_such_module")) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py new file mode 100644 index 000000000..4d6b7bceb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py @@ -0,0 +1,215 @@ +import logging + +import numpy as np +import collections + +import helpers.hnumpy as hnumpy +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestRandomSeedContext +# ############################################################################# + + +class TestRandomSeedContext(hunitest.TestCase): + def test_example1(self) -> None: + """ + Getting more random numbers without context manager changes the + sequence of random numbers. + """ + n = 3 + # First batch. + np.random.seed(0) + vals1a = np.random.randn(n) + vals2a = np.random.randn(n) + # Second batch. + np.random.seed(0) + vals1b = np.random.randn(n) + vals = np.random.randn(n) + _ = vals + vals2b = np.random.randn(n) + # Check. + self.assertEqual(str(vals1a), str(vals1b)) + # Of course this might fail with a vanishingly small probability. + self.assertNotEqual(str(vals2a), str(vals2b)) + + def test_example2(self) -> None: + """ + Getting more random numbers with context manager doesn't change the + sequence of random numbers. + """ + n = 3 + # First batch. + np.random.seed(0) + vals1a = np.random.randn(n) + vals2a = np.random.randn(n) + # Second batch. + np.random.seed(0) + vals1b = np.random.randn(n) + with hnumpy.random_seed_context(42): + vals = np.random.randn(n) + _ = vals + vals2b = np.random.randn(n) + # Check. + self.assertEqual(str(vals1a), str(vals1b)) + self.assertEqual(str(vals2a), str(vals2b)) + + +# ############################################################################# +# TestFloorWithPrecision +# ############################################################################# + + +class TestFloorWithPrecision(hunitest.TestCase): + def _test_floor_with_precision( + self, + value: float, + precision: int, + expected: str, + ) -> None: + """ """ + actual = hnumpy.floor_with_precision(value, precision) + self.assert_equal(str(actual), expected) + + def test_floor_with_precision1(self) -> None: + """ + Test for negative float values as input. + """ + expected_as_str = "-4.63" + self._test_floor_with_precision(-4.6385, 2, expected_as_str) + + def test_floor_with_precision2(self) -> None: + """ + Test for Zero precision. + """ + expected_as_str = "-4.0" + self._test_floor_with_precision(-4.6385, 0, expected_as_str) + + def test_floor_with_precision3(self) -> None: + """ + Test for negative precision. + """ + value = 4.6385 + amount_precision = -2 + with self.assertRaises(AssertionError) as cm: + hnumpy.floor_with_precision(value, amount_precision) + # Check. + actual = str(cm.exception) + expected = """ + * Failed assertion * + 0 <= -2 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_floor_with_precision4(self) -> None: + """ + Test for positive float values as input. + """ + expected_as_str = "4.63" + self._test_floor_with_precision(4.6385, 2, expected_as_str) + + def test_floor_with_precision5(self) -> None: + """ + Test for integer values as input. + """ + expected_as_str = "4.0" + self._test_floor_with_precision(4, 0, expected_as_str) + + def test_floor_with_precision6(self) -> None: + """ + Test for very small value as input. + """ + expected = 0.0000532 + self._test_floor_with_precision(0.0000532999, 7, str(expected)) + + def test_floor_with_precision7(self) -> None: + """ + Test for very large value as input. + """ + expected_as_str = "4289734.12345" + self._test_floor_with_precision(4289734.1234599999, 5, expected_as_str) + + +# ############################################################################# +# Test_OrderedDict_repr_str +# ############################################################################# + + +class Test_OrderedDict_repr_str(hunitest.TestCase): + """ + The tests are used to gatekeep the expected behavior of + dunder method __str__ and __repr__ for the OrderedDict class. + + The tests stem from changes in Python 3.12. Observe below: + + Python 3.9.5: + >>> from collections import OrderedDict + >>> import numpy + >>> dct = OrderedDict({ "test": numpy.int64(42)}) + >>> dct["test"] + 42 + >>> print(dct) + OrderedDict([('test', 42)]) + >>> str(dct) + "OrderedDict([('test', 42)])" + >>> repr(dct) + "OrderedDict([('test', 42)])" + >>> str(dct["test"]) + '42' + >>> repr(dct["test"]) + '42' + + Python 3.12.3: + >>> from collections import OrderedDict + >>> import numpy + >>> dct = OrderedDict({"test": numpy.int64(42)}) + >>> dct = OrderedDict({"test": numpy.int64(42)}) + KeyboardInterrupt + >>> str(dct) + "OrderedDict({'test': np.int64(42)})" + >>> repr(dct) + "OrderedDict({'test': np.int64(42)})" + >>> str(dct["test"]) + '42' + >>> repr(dct["test"]) + 'np.int64(42)' + """ + + def test_str_single1(self) -> None: + """ + Test that the __str__ method on a single item in OrderedDict returns the expected string. + """ + d = collections.OrderedDict({"test": np.int64(42)}) + actual = str(d["test"]) + expected = "42" + self.assert_equal(actual, expected) + + def test_repr_single1(self) -> None: + """ + Test that the __repr__ method on a single item in OrderedDict returns the expected string. + """ + d = collections.OrderedDict({"test": np.int64(42)}) + actual = repr(d["test"]) + expected = "np.int64(42)" + self.assert_equal(actual, expected) + + def test_str_full1(self) -> None: + """ + Test that the __str__ method of OrderedDict returns the expected string. + """ + d = collections.OrderedDict({"test": np.int64(42)}) + actual = str(d) + expected = "OrderedDict({'test': np.int64(42)})" + self.assert_equal(actual, expected) + + def test_repr_full1(self) -> None: + """ + Test that the __repr__ method of OrderedDict returns the expected string. + """ + d = collections.OrderedDict({"test": np.int64(42)}) + actual = repr(d) + expected = "OrderedDict({'test': np.int64(42)})" + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py new file mode 100644 index 000000000..6106dd551 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py @@ -0,0 +1,392 @@ +import abc +import logging +from typing import Any, Callable, List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hobject as hobject +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# _Obj_to_str_TestCase +# ############################################################################# + + +# Note that we can't derive this class from `hunitest.TestCase` otherwise the +# unit test framework will try to run the tests in this class. +class _Obj_to_str_TestCase(abc.ABC): + """ + Test case for testing `obj_to_str()` and `obj_to_repr()`. + """ + + @abc.abstractmethod + def get_object(self) -> Any: + """ + Build object to test. + """ + ... + + def helper(self, *, expected: Optional[str] = None, **kwargs: Any) -> None: + obj = self.get_object() + hdbg.dassert_is_not(obj, None) + # + txt: List[str] = [] + # Get `str()`. + txt.append(hprint.frame("str:")) + txt.append(hobject.obj_to_str(obj, **kwargs)) + # Get `repr()`. + txt.append(hprint.frame("repr:")) + txt.append(hobject.obj_to_repr(obj, **kwargs)) + # Concat. + txt = "\n".join(txt) + # Check. + if expected is None: + self.check_string(txt, purify_text=True) + else: + hdbg.dassert_isinstance(expected, str) + self.assert_equal(txt, expected, purify_text=True, fuzzy_match=True) + + def test1(self, expected: str) -> None: + """ + Use `__dict__` to extract the attributes. + """ + self.helper(expected=expected, attr_mode="__dict__") + + def test2(self, expected: str) -> None: + """ + Use `dir` to extract the attributes. + """ + self.helper(expected=expected, attr_mode="dir") + + def test3(self, expected: str) -> None: + """ + Use `__dict__` and print the type of the attributes. + """ + self.helper(expected=expected, print_type=True) + + def test4(self) -> None: + """ + Print only callable attributes. + """ + self.helper(callable_mode="all") + + def test5(self) -> None: + """ + Print only private attributes. + """ + self.helper(private_mode="all") + + def test6(self) -> None: + """ + Print only dunder attributes. + """ + self.helper(dunder_mode="all") + + +# ############################################################################# +# _Object1 +# ############################################################################# + + +class _Object1: + """ + Object storing only scalar members and not other nested objects. + """ + + def __init__(self) -> None: + self.a = False + self.b = "hello" + self.c = 3.14 + self._hello = "under" + self.__hello = "double_dunder" + self.hello = lambda x: x + 1 + + +# ############################################################################# +# Test_obj_to_str1 +# ############################################################################# + + +class Test_obj_to_str1(hunitest.TestCase, _Obj_to_str_TestCase): + def get_object(self) -> Any: + obj = _Object1() + return obj + + def test1(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object1 at 0x=(a=False, b=hello, c=3.14) + ################################################################################ + repr: + ################################################################################ + : + a='False' + b='hello' + c='3.14' + """ + super().test1(expected) + + def test2(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object1 at 0x=(a=False, b=hello, c=3.14) + ################################################################################ + repr: + ################################################################################ + : + a='False' + b='hello' + c='3.14' + """ + super().test2(expected) + + def test3(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object1 at 0x=(a=False , b=hello , c=3.14 ) + ################################################################################ + repr: + ################################################################################ + : + a='False' + b='hello' + c='3.14' + """ + super().test3(expected) + + +# ############################################################################# +# _Object2 +# ############################################################################# + + +class _Object2: + """ + Object using a `obj_to_str()` as repr. + """ + + def __init__(self) -> None: + self.x = True + self.y = "world" + self.z = 6.28 + self._hello = "under" + self.__hello = "double_dunder" + self.hello = lambda x: x + 1 + + def __repr__(self) -> str: + return hobject.obj_to_str(self) + + +# ############################################################################# +# _Object3 +# ############################################################################# + + +class _Object3: + """ + Object storing another object. + """ + + def __init__(self) -> None: + self.p = "p" + self.q = "q" + self.object2 = _Object2() + + +# ############################################################################# +# Test_obj_to_str2 +# ############################################################################# + + +class Test_obj_to_str2(hunitest.TestCase, _Obj_to_str_TestCase): + def get_object(self) -> Any: + obj = _Object3() + return obj + + def test1(self) -> None: + # TODO(gp): object2 in repr should be printed recursively as repr, but + # it's not. + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) + ################################################################################ + repr: + ################################################################################ + : + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' + """ + super().test1(expected) + + def test2(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object3 at 0x=(object2=_Object2 at 0x=(x=True, y=world, z=6.28), p=p, q=q) + ################################################################################ + repr: + ################################################################################ + : + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' + p='p' + q='q' + """ + super().test2(expected) + + def test3(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object3 at 0x=(p=p , q=q , object2=_Object2 at 0x=(x=True, y=world, z=6.28) ) + ################################################################################ + repr: + ################################################################################ + : + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' + """ + super().test3(expected) + + +# ############################################################################# +# _Abstract_ClassA +# ############################################################################# + + +class _Abstract_ClassA(abc.ABC, hobject.PrintableMixin): + """ + Abstract class descending from `PrintableMixin`. + """ + + def __init__(self) -> None: + self._arg0 = 0 + self._arg1 = "one" + self._arg2 = 2 + + @staticmethod + def get_config_attributes() -> List[str]: + return ["_arg1", "_arg2"] + + +# ############################################################################# +# _ClassB +# ############################################################################# + + +class _ClassB(hobject.PrintableMixin): + """ + Class descending from `PrintableMixin`. + """ + + def __init__(self, get_wall_clock_time: Callable) -> None: + self._arg5 = {"key1": "five", "key2": 5} + self._arg6 = "abc" + self._get_wall_clock_time = get_wall_clock_time + + @staticmethod + def get_config_attributes() -> List[str]: + return ["_arg5", "_get_wall_clock_time"] + + def get_wall_clock_time(self) -> pd.Timestamp: + """ + Return wall clock time in the timezone specified in the ctor. + + Initially wall clock time can be in any timezone, but cannot be + timezone-naive. + """ + wall_clock_time = self._get_wall_clock_time() + return wall_clock_time + + +# ############################################################################# +# _ClassA +# ############################################################################# + + +class _ClassA(_Abstract_ClassA): + """ + Class descending from `_AbstractClassA` and embedding `_ClassB`. + """ + + def __init__(self) -> None: + super().__init__() + self._arg3 = [3, 3, 3] + get_wall_clock_time = lambda: pd.Timestamp( + "2022-04-23", tz="America/New_York" + ) + helper_class = _ClassB(get_wall_clock_time) + self._arg4 = helper_class + self._arg10 = { + "key": 1, + "get_wall_clock_time": helper_class.get_wall_clock_time, + } + + def get_config_attributes(self) -> List[str]: + config_attributes = super().get_config_attributes() + child_class_attributes = ["_arg3", "_arg4", "_arg10"] + config_attributes.extend(child_class_attributes) + return config_attributes + + +# ############################################################################# +# Test_PrintableMixin_to_config_str +# ############################################################################# + + +class Test_PrintableMixin_to_config_str(hunitest.TestCase): + def check_test_class_str(self, test_class: Any, expected: str) -> None: + actual = test_class.to_config_str() + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Print `_Abstract_ClassA`. + """ + test_class = _Abstract_ClassA() + expected = r""" + : + _arg1='one' + _arg2='2' + """ + self.check_test_class_str(test_class, expected) + + def test2(self) -> None: + """ + Print `_ClassA`. + """ + test_class = _ClassA() + expected = r""" + : + _arg1='one' + _arg2='2' + _arg3='[3, 3, 3]' + _arg4=: + _arg5='{'key1': 'five', 'key2': 5}' + _get_wall_clock_time='. at 0x>' + _arg10= + {'get_wall_clock_time': : + _arg5='{'key1': 'five', 'key2': 5}' + _arg6='abc' >, + 'key': 1} + """ + self.check_test_class_str(test_class, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py new file mode 100644 index 000000000..9e9887915 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py @@ -0,0 +1,92 @@ +import logging + +import pytest + +import helpers.hopen as hopen +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): Some of these tests should be executed outside of the container to +# test other systems. + + +# ############################################################################# +# Test_open_unknown +# ############################################################################# + + +class Test_open_unknown(hunitest.TestCase): + """ + Test unknown extension and unknown systems. + """ + + def test_unknown_extension1(self) -> None: + """ + Test unknown extension raises an error. + """ + with self.assertRaises(AssertionError) as cm: + hopen.open_file("a.unknown_ext") + # Check error text. + self.assertIn("unknown_ext", str(cm.exception)) + + def test_unknown_os1(self) -> None: + """ + Test unknown OS raises an error. + """ + with self.assertRaises(AssertionError) as cm: + hopen._cmd_open_html("b.html", "UnknownOS") + # Check error text. + self.assertIn("UnknownOS", str(cm.exception)) + + +# ############################################################################# +# Test_open_html +# ############################################################################# + + +@pytest.mark.skip(reason="See cryptomtc/cmamp#321") +class Test_open_html(hunitest.TestCase): + """ + Test different command correctness for opening html file. + """ + + def test_linux1(self) -> None: + """ + Test Linux. + """ + cmd = hopen._cmd_open_html("a.html", "Linux") + self.check_string(str(cmd)) + + def test_windows1(self) -> None: + """ + Test Windows. + """ + cmd = hopen._cmd_open_html("b.html", "Windows") + self.check_string(str(cmd)) + + def test_mac1(self) -> None: + """ + Test Darwin. + """ + cmd = hopen._cmd_open_html("c.html", "Darwin") + self.check_string(str(cmd)) + + +# ############################################################################# +# Test_open_pdf +# ############################################################################# + + +class Test_open_pdf(hunitest.TestCase): + """ + Test different command correctness for opening pdf file. + """ + + def test_mac1(self) -> None: + """ + Test Darwin. + """ + cmd = hopen._cmd_open_html("a.pdf", "Darwin") + self.check_string(str(cmd)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py new file mode 100644 index 000000000..be5200d47 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py @@ -0,0 +1,42 @@ +import logging + +import numpy as np +import pandas as pd +import pytest + +import helpers.hpandas_analysis as hpananal +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_explore1 +# ############################################################################# + + +class Test_explore1(hunitest.TestCase): + def test_ols_regress_series(self) -> None: + x = 5 * np.random.randn(100) + y = x + np.random.randn(*x.shape) + df = pd.DataFrame() + df["x"] = x + df["y"] = y + hpananal.ols_regress_series( + df["x"], df["y"], intercept=True, print_model_stats=False + ) + + @pytest.mark.skip(reason="https://github.com/.../.../issues/3676") + def test_rolling_pca_over_time1(self) -> None: + np.random.seed(42) + df = pd.DataFrame(np.random.randn(10, 5)) + df.index = pd.date_range("2017-01-01", periods=10) + corr_df, eigval_df, eigvec_df = hpananal.rolling_pca_over_time( + df, 0.5, "fill_with_zero" + ) + txt = ( + "corr_df=\n%s\n" % corr_df.to_string() + + "eigval_df=\n%s\n" % eigval_df.to_string() + + "eigvec_df=\n%s\n" % eigvec_df.to_string() + ) + self.check_string(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py new file mode 100644 index 000000000..595877a97 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py @@ -0,0 +1,67 @@ +import logging + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_CheckSummary +# ############################################################################# + + +class Test_CheckSummary(hunitest.TestCase): + def test1(self) -> None: + """ + All the tests have passed. + """ + # Prepare inputs. + obj = hpandas.CheckSummary() + obj.add( + "hello", + "Number of not submitted OMS child orders=0 / 73 = 0.00%", + True, + ) + obj.add("hello2", "ok", True) + # Check. + is_ok = obj.is_ok() + self.assertTrue(is_ok) + # + actual = obj.report_outcome(notebook_output=False, assert_on_error=False) + self.check_string(actual) + # No assertion expected. + obj.report_outcome() + + def test2(self) -> None: + """ + Not all the tests have passed. + """ + # Prepare inputs. + obj = hpandas.CheckSummary() + obj.add( + "hello", + "Number of not submitted OMS child orders=0 / 73 = 0.00%", + True, + ) + obj.add("hello2", "not_ok", False) + # Check. + is_ok = obj.is_ok() + self.assertFalse(is_ok) + # + actual = obj.report_outcome(notebook_output=False, assert_on_error=False) + self.check_string(actual) + # + with self.assertRaises(ValueError) as e: + actual = obj.report_outcome() + actual_exception = str(e.exception) + expected_exception = r""" + The checks have failed: + description comment is_ok + 0 hello Number of not submitted OMS child orders=0 / 7... True + 1 hello2 not_ok False + is_ok=False + """ + self.assert_equal(actual_exception, expected_exception, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py new file mode 100644 index 000000000..a65340957 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py @@ -0,0 +1,364 @@ +import logging + +import numpy as np +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# TestDropNa +# ############################################################################# + + +class TestDropNa(hunitest.TestCase): + def test_dropna1(self) -> None: + """ + Test if all types of NaNs are dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, 1, 3, 2, 0], + "dummy_value_2": ["0", "A", "B", None, "D"], + "dummy_value_3": [0, 0, pd.NA, 0, 0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.dropna(test_df, drop_infs=False) + # Prepare expected result. + expected = { + "dummy_value_1": [1, 0], + "dummy_value_2": ["A", "D"], + "dummy_value_3": [0, 0], + } + # Set the dtype of numeral columns to float to match the dataframe after NA dropping. + expected = pd.DataFrame(data=expected).astype( + {"dummy_value_1": "float64", "dummy_value_3": "object"} + ) + # Set the index of the rows that remained. + expected = expected.set_index(pd.Index([1, 4])) + # Check. + hunitest.compare_df(actual, expected) + + def test_dropna2(self) -> None: + """ + Test if infs are dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [-np.inf, 1, 3, 2, 0], + "dummy_value_2": ["0", "A", "B", "C", "D"], + "dummy_value_3": [0, 0, np.inf, 0, 0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.dropna(test_df, drop_infs=True) + # Prepare expected result. + expected = { + "dummy_value_1": [1, 2, 0], + "dummy_value_2": ["A", "C", "D"], + "dummy_value_3": [0, 0, 0], + } + # Set the dtype of numeral columns to float to match the dataframe after NA dropping. + expected = pd.DataFrame(data=expected).astype( + {"dummy_value_1": "float64", "dummy_value_3": "float64"} + ) + # Set the index of the rows that remained. + expected = expected.set_index(pd.Index([1, 3, 4])) + # Check. + hunitest.compare_df(actual, expected) + + +# ############################################################################# +# TestDropAxisWithAllNans +# ############################################################################# + + +class TestDropAxisWithAllNans(hunitest.TestCase): + def test_drop_rows1(self) -> None: + """ + Test if row full of nans is dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, 2, 3], + "dummy_value_2": [pd.NA, "B", "C"], # type: ignore + "dummy_value_3": [None, 1.0, 1.0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.drop_axis_with_all_nans(test_df, drop_rows=True) + # Prepare expected result. + expected = { + "dummy_value_1": [2, 3], + "dummy_value_2": ["B", "C"], + "dummy_value_3": [1.0, 1.0], + } + # Set the dtype of numeral columns to float to match the dataframe after NA dropping. + expected = pd.DataFrame(data=expected).astype( + {"dummy_value_1": "float64"} + ) + # Set the index of the rows that remained. + expected = expected.set_index(pd.Index([1, 2])) + # Check. + hunitest.compare_df(actual, expected) + + def test_drop_rows2(self) -> None: + """ + Test if non fully nan row is not dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, 2, 3], + "dummy_value_2": ["A", "B", "C"], # type: ignore + "dummy_value_3": [None, 1.0, 1.0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.drop_axis_with_all_nans(test_df, drop_rows=True) + # Prepare expected result. + expected = { + "dummy_value_1": [np.nan, 2, 3], + "dummy_value_2": ["A", "B", "C"], # type: ignore + "dummy_value_3": [None, 1.0, 1.0], + } + # Set the dtype of numeral columns to float to match the dataframe after NA dropping. + expected = pd.DataFrame(data=expected).astype( + {"dummy_value_1": "float64"} + ) + # Set the index of the rows that remained. + expected = expected.set_index(pd.Index([0, 1, 2])) + # Check. + hunitest.compare_df(actual, expected) + + def test_drop_columns1(self) -> None: + """ + Test if column full of nans is dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, pd.NA, None], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [1.0, 1.0, 1.0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.drop_axis_with_all_nans(test_df, drop_columns=True) + # Prepare expected result. + expected = { + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [1.0, 1.0, 1.0], + } + expected = pd.DataFrame(data=expected) + # Check. + hunitest.compare_df(actual, expected) + + def test_drop_columns2(self) -> None: + """ + Test if column that is not full of nans is not dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, 2, None], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [1.0, 1.0, 1.0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.drop_axis_with_all_nans(test_df, drop_columns=True) + # Prepare expected result. + expected = { + "dummy_value_1": [np.nan, 2, None], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [1.0, 1.0, 1.0], + } + expected = pd.DataFrame(data=expected) + # Check. + hunitest.compare_df(actual, expected) + + +# ############################################################################# +# TestDropDuplicates +# ############################################################################# + + +class TestDropDuplicates(hunitest.TestCase): + """ + Test that duplicates are dropped correctly. + """ + + @staticmethod + def get_test_data() -> pd.DataFrame: + test_data = [ + (1, "A", 3.2), + (1, "A", 3.2), + (10, "B", 3.2), + (8, "A", 3.2), + (4, "B", 8.2), + (10, "B", 3.2), + ] + index = [ + "dummy_value1", + "dummy_value3", + "dummy_value2", + "dummy_value1", + "dummy_value1", + "dummy_value2", + ] + columns = ["int", "letter", "float"] + df = pd.DataFrame(data=test_data, index=index, columns=columns) + return df + + def test_drop_duplicates1(self) -> None: + """ + - use_index = True + - column_subset is not None + """ + # Prepare test data. + df = self.get_test_data() + use_index = True + column_subset = ["float"] + no_duplicates_df = hpandas.drop_duplicates( + df, use_index, column_subset=column_subset + ) + no_duplicates_df = hpandas.df_to_str(no_duplicates_df) + # Prepare expected result. + expected_signature = r""" + int letter float + dummy_value1 1 A 3.2 + dummy_value3 1 A 3.2 + dummy_value2 10 B 3.2 + dummy_value1 4 B 8.2 + """ + # Check. + self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) + + def test_drop_duplicates2(self) -> None: + """ + - use_index = True + - column_subset = None + """ + # Prepare test data. + df = self.get_test_data() + use_index = True + no_duplicates_df = hpandas.drop_duplicates(df, use_index) + no_duplicates_df = hpandas.df_to_str(no_duplicates_df) + # Prepare expected result. + expected_signature = r""" + int letter float + dummy_value1 1 A 3.2 + dummy_value3 1 A 3.2 + dummy_value2 10 B 3.2 + dummy_value1 8 A 3.2 + dummy_value1 4 B 8.2 + """ + # Check. + self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) + + def test_drop_duplicates3(self) -> None: + """ + - use_index = False + - column_subset = None + """ + # Prepare test data. + df = self.get_test_data() + use_index = False + no_duplicates_df = hpandas.drop_duplicates(df, use_index) + no_duplicates_df = hpandas.df_to_str(no_duplicates_df) + # Prepare expected result. + expected_signature = r""" + int letter float + dummy_value1 1 A 3.2 + dummy_value2 10 B 3.2 + dummy_value1 8 A 3.2 + dummy_value1 4 B 8.2 + """ + # Check. + self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) + + def test_drop_duplicates4(self) -> None: + """ + - use_index = False + - column_subset is not None + """ + # Prepare test data. + df = self.get_test_data() + use_index = False + column_subset = ["letter", "float"] + no_duplicates_df = hpandas.drop_duplicates( + df, use_index, column_subset=column_subset + ) + no_duplicates_df = hpandas.df_to_str(no_duplicates_df) + # Prepare expected result. + expected_signature = r""" + int letter float + dummy_value1 1 A 3.2 + dummy_value2 10 B 3.2 + dummy_value1 4 B 8.2 + """ + # Check. + self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) + + +# ############################################################################# +# Test_impute_nans +# ############################################################################# + + +class Test_impute_nans(hunitest.TestCase): + def test1(self) -> None: + """ + Test basic imputation of "nan" strings with empty string. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": ["value1", "nan", "value3"], + "col2": ["a", "b", "c"], + } + ) + # Call function to test. + result_df = hpandas.impute_nans(df, "col1", "") + # Check output. + self.assertEqual(result_df["col1"].tolist(), ["value1", "", "value3"]) + self.assertEqual(result_df["col2"].tolist(), ["a", "b", "c"]) + + def test2(self) -> None: + """ + Test imputation with a custom value. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": ["value1", "nan", "value3"], + "col2": ["a", "nan", "c"], + } + ) + # Call function to test. + result_df = hpandas.impute_nans(df, "col2", "MISSING") + # Check output. + self.assertEqual(result_df["col1"].tolist(), ["value1", "nan", "value3"]) + self.assertEqual(result_df["col2"].tolist(), ["a", "MISSING", "c"]) + + def test3(self) -> None: + """ + Test with no "nan" values present. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": ["value1", "value2", "value3"], + "col2": ["a", "b", "c"], + } + ) + # Call function to test. + result_df = hpandas.impute_nans(df, "col1", "") + # Check output - should be unchanged. + self.assertEqual( + result_df["col1"].tolist(), ["value1", "value2", "value3"] + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py new file mode 100644 index 000000000..9567c91e5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py @@ -0,0 +1,650 @@ +import logging +from typing import Tuple + +import numpy as np +import pandas as pd +import pytest + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# TestCompareDataframeRows +# ############################################################################# + + +class TestCompareDataframeRows(hunitest.TestCase): + def get_test_data(self) -> pd.DataFrame: + test_data = { + "dummy_value_1": [0, 1, 3, 2, 0], + "dummy_value_2": ["0", "A", "C", "B", "D"], + "dummy_value_3": [0, 0, 0, 0, 0], + } + df = pd.DataFrame(data=test_data) + df.index.name = "test" + return df + + def test_compare_dataframe_rows1(self) -> None: + """ + Verify that differences are caught and displayed properly. + """ + # Prepare inputs. + test_data = self.get_test_data() + edited_test_data = test_data.copy()[1:-1] + edited_test_data.loc[1, "dummy_value_2"] = "W" + edited_test_data.loc[2, "dummy_value_2"] = "Q" + edited_test_data.loc[2, "dummy_value_3"] = "1" + # Run. + data_difference = hpandas.compare_dataframe_rows( + test_data, edited_test_data + ) + # Check output. + actual = hpandas.df_to_str(data_difference) + expected = r""" dummy_value_2 dummy_value_3 test + self other self other + 0 W A 1 + 1 Q C 1 0 2""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_compare_dataframe_rows2(self) -> None: + """ + Verify that differences are caught and displayed properly without + original index. + """ + # Prepare inputs. + test_data = self.get_test_data() + test_data.index.name = None + edited_test_data = test_data.copy()[1:-1] + edited_test_data.loc[1, "dummy_value_2"] = "W" + edited_test_data.loc[2, "dummy_value_2"] = "Q" + edited_test_data.loc[2, "dummy_value_3"] = "1" + # Run. + data_difference = hpandas.compare_dataframe_rows( + test_data, edited_test_data + ) + # Check output. + actual = hpandas.df_to_str(data_difference) + expected = r""" dummy_value_2 dummy_value_3 + self other self other + 0 W A NaN NaN + 1 Q C 1 0.0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_compare_dfs +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +class Test_compare_dfs(hunitest.TestCase): + """ + - Define two DataFrames that can be either equal or different in terms of columns or rows + - Compare its values by calculating the difference + """ + + @staticmethod + def get_test_dfs_equal() -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Both DataFrames have only equal rows and columns names. + """ + timestamp_index1 = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + values1 = { + "tsA": pd.Series([1, 2, 3]), + "tsB": pd.Series([4, 5, 6]), + "tsC": pd.Series([7, 8, 9]), + "timestamp": timestamp_index1, + } + df1 = pd.DataFrame(data=values1) + df1 = df1.set_index("timestamp") + # + timestamp_index2 = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + values2 = { + "tsA": pd.Series([1.1, 1.9, 3.15]), + "tsB": pd.Series([0, 5, 5.8]), + "tsC": pd.Series([6.5, 8.6, 9.07]), + "timestamp": timestamp_index2, + } + df2 = pd.DataFrame(data=values2) + df2 = df2.set_index("timestamp") + return df1, df2 + + @staticmethod + def get_test_dfs_close_to_zero() -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + DataFrames with values that are close to 0. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + ] + values1 = { + "tsA": [3e-9, -3e-9], + "tsB": [6e-3, 4e-9], + "timestamp": timestamp_index, + } + df1 = pd.DataFrame(data=values1) + df1 = df1.set_index("timestamp") + # + values2 = { + "tsA": [15e-3, -5e-9], + "tsB": [5e-9, 3e-9], + "timestamp": timestamp_index, + } + df2 = pd.DataFrame(data=values2) + df2 = df2.set_index("timestamp") + return df1, df2 + + def get_test_dfs_different(self) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + DataFrames have both unique and equal rows and columns. + """ + df1, df2 = self.get_test_dfs_equal() + df2 = df2.rename( + columns={"tsC": "extra_col"}, + index={ + pd.Timestamp("2022-01-01 21:03:00+00:00"): pd.Timestamp( + "2022-01-01 21:04:00+00:00" + ) + }, + ) + return df1, df2 + + def test1(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "diff" + """ + df1, df2 = self.get_test_dfs_equal() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="diff", + assert_diff_threshold=None, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.diff tsB.diff tsC.diff + timestamp + 2022-01-01 21:01:00+00:00 -0.10 4.0 0.50 + 2022-01-01 21:02:00+00:00 0.10 0.0 -0.60 + 2022-01-01 21:03:00+00:00 -0.15 0.2 -0.07 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "pct_change" + - zero_vs_zero_is_zero = False + - remove_inf = False + """ + df1, df2 = self.get_test_dfs_equal() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="pct_change", + assert_diff_threshold=None, + zero_vs_zero_is_zero=False, + remove_inf=False, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change + timestamp + 2022-01-01 21:01:00+00:00 -9.090909 inf 7.692308 + 2022-01-01 21:02:00+00:00 5.263158 0.000000 -6.976744 + 2022-01-01 21:03:00+00:00 -4.761905 3.448276 -0.771775 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + - DataFrames are not equal + - Column and row modes are `inner` + - diff_mode = "diff" + """ + df1, df2 = self.get_test_dfs_different() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="inner", + column_mode="inner", + diff_mode="diff", + assert_diff_threshold=None, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.diff tsB.diff + timestamp + 2022-01-01 21:01:00+00:00 -0.1 4.0 + 2022-01-01 21:02:00+00:00 0.1 0.0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + - DataFrames are not equal + - Column and row modes are `inner` + - diff_mode = "pct_change" + """ + df1, df2 = self.get_test_dfs_different() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="inner", + column_mode="inner", + diff_mode="pct_change", + assert_diff_threshold=None, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change + timestamp + 2022-01-01 21:01:00+00:00 -9.090909 NaN + 2022-01-01 21:02:00+00:00 5.263158 0.0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test5(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "diff" + - All values of the second DataFrame are zeros + + Check that if the second DataFrame consists of zeros, + the function will perform comparison to the initial DataFrame. + """ + df1, df2 = self.get_test_dfs_different() + # Create DataFrame with zeros. + df2 = df1 * 0 + # Compare. + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="diff", + assert_diff_threshold=None, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.diff tsB.diff tsC.diff + timestamp + 2022-01-01 21:01:00+00:00 1 4 7 + 2022-01-01 21:02:00+00:00 2 5 8 + 2022-01-01 21:03:00+00:00 3 6 9 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test6(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "pct_change" + - close_to_zero_threshold = 1e-6 + - zero_vs_zero_is_zero = True + - remove_inf = True + + The second DataFrame has numbers below the close_to_zero_threshold. + """ + df1, df2 = self.get_test_dfs_close_to_zero() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="pct_change", + assert_diff_threshold=None, + zero_vs_zero_is_zero=True, + remove_inf=True, + ) + # + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change + timestamp + 2022-01-01 21:01:00+00:00 -100.0 NaN + 2022-01-01 21:02:00+00:00 0.0 0.0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test7(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "pct_change" + - close_to_zero_threshold = 1e-6 + - zero_vs_zero_is_zero = False + - remove_inf = False + + The second DataFrame has numbers below the close_to_zero_threshold. + """ + df1, df2 = self.get_test_dfs_close_to_zero() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="pct_change", + assert_diff_threshold=None, + zero_vs_zero_is_zero=False, + remove_inf=False, + ) + # + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change + timestamp + 2022-01-01 21:01:00+00:00 -100.0 inf + 2022-01-01 21:02:00+00:00 NaN NaN + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test8(self) -> None: + """ + Test NaN comparison with NaNs present at different location in two + dataframes. + """ + # Build test dataframes. + df1 = pd.DataFrame( + data={ + "A": [1.1, np.nan, 3.1, np.nan, np.inf, np.inf], + "B": [0, 0, 0, 0, 0, 0], + } + ) + df2 = pd.DataFrame( + data={ + "A": [3.0, 2.2, np.nan, np.nan, np.nan, np.inf], + "B": [0, 0, 0, 0, 0, 0], + } + ) + # Check. + with self.assertRaises(AssertionError) as cm: + compare_nans = True + hpandas.compare_dfs( + df1, df2, compare_nans=compare_nans, only_warning=False + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + DataFrame.iloc[:, 0] (column name="A") are different + + DataFrame.iloc[:, 0] (column name="A") values are different (66.66667 %) + [index]: [0, 1, 2, 3, 4, 5] + [left]: [1.1, nan, 3.1, nan, inf, inf] + [right]: [3.0, 2.2, nan, nan, nan, inf] + At positional index 0, first diff: 1.1 != 3.0 + df1= + A B + 0 1.1 0 + 1 NaN 0 + 2 3.1 0 + 3 NaN 0 + 4 inf 0 + 5 inf 0 + and df2= + A B + 0 3.0 0 + 1 2.2 0 + 2 NaN 0 + 3 NaN 0 + 4 NaN 0 + 5 inf 0 + are not equal. + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test9(self) -> None: + """ + Test to verify the error when df1 and df2 have different index types. + """ + df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + # Create df2 with a DatetimeIndex. + dates = pd.date_range("2021-01-01", periods=3) + df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "timestamp": dates}) + df2 = df2.set_index("timestamp") + with self.assertRaises(AssertionError) as cm: + hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + df1.index.difference(df2.index)= + RangeIndex(start=0, stop=3, step=1) + df2.index.difference(df1.index)= + DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq=None) + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test10(self) -> None: + """ + Check `assert_diff_threshold` functionality in presence of NaN values + in df_diff. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + df2 = pd.DataFrame( + { + "tsA": [100, 200, 300], + "tsB": [400, 500, 600], + "tsC": [700, 800, 900], + "timestamp": timestamp_index, + } + ) + df2 = df2.set_index("timestamp") + adjustment_factor = 1.000001 + df1 = df2 * adjustment_factor + df1.iloc[1, 2] = np.nan + df_diff = hpandas.compare_dfs( + df1, + df2, + diff_mode="pct_change", + only_warning=True, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change + timestamp + 2022-01-01 21:01:00+00:00 0.0001 0.0001 0.0001 + 2022-01-01 21:02:00+00:00 0.0001 0.0001 NaN + 2022-01-01 21:03:00+00:00 0.0001 0.0001 0.0001 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test11(self) -> None: + """ + Check functionality for `remove_inf = False` in presence of `diff_mode + = 'pct_change'`. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + df2 = pd.DataFrame( + { + "tsA": [100, 200, 300], + "tsB": [400, 500, 600], + "tsC": [700, 800, 900], + "timestamp": timestamp_index, + } + ) + df2 = df2.set_index("timestamp") + adjustment_factor = 1.00001 + df1 = df2 * adjustment_factor + df1.iloc[1, 2] = np.inf + with self.assertRaises(AssertionError) as cm: + hpandas.compare_dfs( + df1, + df2, + diff_mode="pct_change", + remove_inf=False, + only_warning=False, + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + DataFrame.iloc[:, 0] (column name="tsA") are different + + DataFrame.iloc[:, 0] (column name="tsA") values are different (100.0 %) + [index]: [2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00, 2022-01-01 21:03:00+00:00] + [left]: [False, False, False] + [right]: [True, True, True] + df1= + tsA tsB tsC + timestamp + 2022-01-01 21:01:00+00:00 100.001 400.004 700.007 + 2022-01-01 21:02:00+00:00 200.002 500.005 inf + 2022-01-01 21:03:00+00:00 300.003 600.006 900.009 + and df2= + tsA tsB tsC + timestamp + 2022-01-01 21:01:00+00:00 100 400 700 + 2022-01-01 21:02:00+00:00 200 500 800 + 2022-01-01 21:03:00+00:00 300 600 900 + have pct_change more than `assert_diff_threshold`. + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test12(self) -> None: + """ + Check functionality for `remove_inf = True` in presence of `diff_mode = + 'pct_change'`. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + df2 = pd.DataFrame( + { + "tsA": [100, 200, 300], + "tsB": [400, 500, 600], + "tsC": [700, 800, 900], + "timestamp": timestamp_index, + } + ) + df2 = df2.set_index("timestamp") + adjustment_factor = 1.00001 + df1 = df2 * adjustment_factor + df1.iloc[1, 2] = np.inf + df_diff = hpandas.compare_dfs( + df1, + df2, + diff_mode="pct_change", + only_warning=True, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change + timestamp + 2022-01-01 21:01:00+00:00 0.001 0.001 0.001 + 2022-01-01 21:02:00+00:00 0.001 0.001 NaN + 2022-01-01 21:03:00+00:00 0.001 0.001 0.001 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test13(self) -> None: + """ + Check test case when negative values in df2. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + df2 = pd.DataFrame( + { + "tsA": [100, 200, -300], + "tsB": [400, -500, 600], + "tsC": [700, -800, 900], + "timestamp": timestamp_index, + } + ) + df2 = df2.set_index("timestamp") + adjustment_factor = 1.00001 + df1 = df2 * adjustment_factor + df_diff = hpandas.compare_dfs( + df1, + df2, + diff_mode="pct_change", + only_warning=True, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change + timestamp + 2022-01-01 21:01:00+00:00 0.001 0.001 0.001 + 2022-01-01 21:02:00+00:00 0.001 -0.001 -0.001 + 2022-01-01 21:03:00+00:00 -0.001 0.001 0.001 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_invalid_input(self) -> None: + """ + Put two different DataFrames with `equal` mode. + """ + df1, df2 = self.get_test_dfs_different() + with self.assertRaises(AssertionError): + hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="pct_change", + ) + + +# ############################################################################# +# Test_compare_nans_in_dataframes +# ############################################################################# + + +class Test_compare_nans_in_dataframes(hunitest.TestCase): + def test1(self) -> None: + """ + Check that NaN differences are identified correctly. + """ + # Build test dataframes. + df1 = pd.DataFrame( + data={ + "A": [1.1, np.nan, 3.1, np.nan, np.inf, np.inf], + "B": [0, 0, 0, 0, 0, 0], + } + ) + df2 = pd.DataFrame( + data={ + "A": [3.0, 2.2, np.nan, np.nan, np.nan, np.inf], + "B": [0, 0, 0, 0, 0, 0], + } + ) + df = hpandas.compare_nans_in_dataframes(df1, df2) + actual = hpandas.df_to_str(df) + expected = r""" + A + df1 df2 + 1 NaN 2.2 + 2 3.1 NaN + 4 inf NaN + """ + self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py new file mode 100644 index 000000000..0bd4eaeee --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py @@ -0,0 +1,276 @@ +import logging + +import numpy as np +import pandas as pd +import pytest + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_to_series1 +# ############################################################################# + + +class Test_to_series1(hunitest.TestCase): + def helper(self, n: int, expected: str) -> None: + vals = list(range(n)) + df = pd.DataFrame([vals], columns=[f"a{i}" for i in vals]) + df = df.T + _LOG.debug("df=\n%s", df) + srs = hpandas.to_series(df) + _LOG.debug("srs=\n%s", srs) + actual = str(srs) + self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) + + def test1(self) -> None: + n = 0 + expected = r""" + Series([], dtype: float64) + """ + self.helper(n, expected) + + def test2(self) -> None: + n = 1 + expected = r""" + a0 0 + dtype: int64""" + self.helper(n, expected) + + def test3(self) -> None: + n = 5 + expected = r""" + a0 0 + a1 1 + a2 2 + a3 3 + a4 4 + Name: 0, dtype: int64""" + self.helper(n, expected) + + +# ############################################################################# +# Test_cast_series_to_type +# ############################################################################# + + +class Test_cast_series_to_type(hunitest.TestCase): + """ + Test converting a series into a given type. + """ + + def test1(self) -> None: + series = pd.Series(["1", "2", "3"]) + series_type = int + actual = hpandas.cast_series_to_type(series, series_type) + self.assertEqual(actual.dtype.type, np.int64) + + def test2(self) -> None: + series = pd.Series(["0.1", "0.2", "0.3"]) + series_type = float + actual = hpandas.cast_series_to_type(series, series_type) + self.assertEqual(actual.dtype.type, np.float64) + + def test3(self) -> None: + series = pd.Series(["None", "None", "None"]) + series_type = None + actual = hpandas.cast_series_to_type(series, series_type) + for i in range(len(actual)): + self.assertIsNone(actual.iloc[i]) + + def test4(self) -> None: + series = pd.Series(["2020-01-01", "2020-02-02", "2020-03-03"]) + series_type = pd.Timestamp + actual = hpandas.cast_series_to_type(series, series_type) + self.assertEqual(actual.dtype.type, np.datetime64) + + def test5(self) -> None: + series = pd.Series(["{}", "{1: 2, 3: 4}", "{'a': 'b'}"]) + series_type = dict + actual = hpandas.cast_series_to_type(series, series_type) + for i in range(len(actual)): + self.assertEqual(type(actual.iloc[i]), dict) + + +# ############################################################################# +# Test_convert_to_type +# ############################################################################# + + +class Test_convert_to_type(hunitest.TestCase): + def test_convert_to_type_bool(self) -> None: + """ + Check converting to bool column. + """ + # Mix of booleans, truthy/falsy strings, numerics, and invalid values + data = [True, False, "True", "false", 1, 0, "1", "0", "yes", None] + series = pd.Series(data) + result = hpandas.convert_to_type(series, "is_bool") + expected = pd.Series( + [True, False, True, False, True, False, True, False, None, None] + ) + pd.testing.assert_series_equal(result, expected) + + def test_convert_to_type_int_and_numeric(self) -> None: + """ + Check converting to numeric and int column. + """ + # Strings that parse to numbers, floats, invalid strings, and ints + series = pd.Series(["1", "2", "3.5", "abc", 4], dtype=object) + # is_int should coerce numeric strings to numbers, invalid -> NaN + result_int = hpandas.convert_to_type(series, "is_int") + expected_int = pd.to_numeric(series, errors="coerce") + pd.testing.assert_series_equal(result_int, expected_int) + # is_numeric is the same as to_numeric + result_numeric = hpandas.convert_to_type(series, "is_numeric") + pd.testing.assert_series_equal(result_numeric, expected_int) + + def test_convert_to_type_string(self) -> None: + """ + Check converting to string column. + """ + # Strings vs non-strings + data = ["a", 1, None, "hello", True, 3.14] + series = pd.Series(data, dtype=object) + result = hpandas.convert_to_type(series, "is_string") + expected = pd.Series(["a", "1", "None", "hello", "True", "3.14"]) + pd.testing.assert_series_equal(result, expected) + + def test_convert_to_type_unknown(self) -> None: + "Check converting to invalid datatype column." + series = pd.Series([1, 2, 3], dtype=object) + with pytest.raises(ValueError) as exc: + hpandas.convert_to_type(series, "invalid_type") + self.assertIn("Unknown column type: invalid_type", str(exc.value)) + + +# ############################################################################# +# Test_infer_column_types +# ############################################################################# + + +class Test_infer_column_types(hunitest.TestCase): + def test_numeric_dominance(self) -> None: + """ + Check with numeric dominant column. + """ + # 5 elements: '1','2',3 (numeric), 'a', None + col = pd.Series(["1", "2", 3, "a", None], dtype=object) + vals = hpandas.infer_column_types(col) + # is_numeric: True for "1","2",3 → 3/5 = 0.6 + assert pytest.approx(vals["is_numeric"], rel=1e-6) == 0.6 + # is_bool: none are bool → 0.0 + assert vals["is_bool"] == 0.0 + # is_string: "1","2","a" are str → 3/5 = 0.6 + assert pytest.approx(vals["is_string"], rel=1e-6) == 0.6 + # numeric ≥ string, and bool < numeric ⇒ type is numeric + self.assert_equal(vals["type"], "is_numeric") + + def test_bool_dominance(self) -> None: + """ + Check with bool dominant column. + """ + # 4 elements: True, False, True (bool), "x" + col = pd.Series([True, False, True, "x"], dtype=object) + vals = hpandas.infer_column_types(col) + # is_bool: 3/4 = 0.75 + assert pytest.approx(vals["is_bool"], rel=1e-6) == 0.75 + # is_numeric: True→1, False→0, True→1, "x"→NaN → notna → 3/4 = 0.75 + assert pytest.approx(vals["is_numeric"], rel=1e-6) == 0.75 + # is_string: only "x" → 1/4 = 0.25 + assert pytest.approx(vals["is_string"], rel=1e-6) == 0.25 + # bool ≥ numeric ⇒ type is bool + self.assert_equal(vals["type"], "is_bool") + + def test_string_dominance(self) -> None: + """ + Check with string dominant column. + """ + # 3 elements: 1.5 (numeric), "a","b" (strings) + col = pd.Series([1.5, "a", "b"], dtype=object) + vals = hpandas.infer_column_types(col) + # is_bool: none are bool → 0/3 = 0.0 + assert pytest.approx(vals["is_bool"], rel=1e-6) == 0.0 + # is_numeric: 1/3 ≈ 0.333... + assert pytest.approx(vals["is_numeric"], rel=1e-6) == pytest.approx( + 1 / 3, rel=1e-6 + ) + # is_string: 2/3 ≈ 0.666... + assert pytest.approx(vals["is_string"], rel=1e-6) == pytest.approx( + 2 / 3, rel=1e-6 + ) + # bool < numeric < string ⇒ type is string + self.assert_equal(vals["type"], "is_string") + + +# ############################################################################# +# Test_convert_df +# ############################################################################# + + +class Test_convert_df(hunitest.TestCase): + def test_convert_df_all_bool(self) -> None: + """ + A column of pure booleans should stay booleans. + """ + df = pd.DataFrame({"flag": [True, False, True, False]}) + df_out = hpandas.convert_df(df) + # Expect a DataFrame back + assert isinstance(df_out, pd.DataFrame) + # Column dtype must be bool + self.assert_equal(df_out["flag"].dtype.name, "bool") + # Values preserved + self.assert_equal( + str(df_out["flag"].tolist()), str([True, False, True, False]) + ) + + def test_convert_df_all_numeric(self) -> None: + """ + A column of numeric strings and ints should become floats. + """ + df = pd.DataFrame({"score": ["1", 2, "3.5", 4]}, dtype=object) + df_out = hpandas.convert_df(df) + assert isinstance(df_out, pd.DataFrame) + # dtype should be float64 + assert df_out["score"].dtype == float + # Values converted correctly + assert df_out["score"].tolist() == [1.0, 2.0, 3.5, 4.0] + + def test_convert_df_all_string(self) -> None: + """ + A column of strings (and mixed non-numeric non-bool) stays as-is. + """ + df = pd.DataFrame( + {"name": ["alice", "bob", "", "charlie"]}, dtype=object + ) + df_out = hpandas.convert_df(df) + print(df_out.head(5)) + assert isinstance(df_out, pd.DataFrame) + # dtype remains object (strings) + self.assert_equal(df_out["name"].dtype.name, "object") + self.assert_equal( + str(df_out["name"].tolist()), str(["alice", "bob", "", "charlie"]) + ) + + def test_convert_df_mixed_columns(self) -> None: + """ + Different datatype columns should convert accordingly. + """ + df = pd.DataFrame( + { + "flag": [True, False, False], + "value": [10, 20, "xyz"], + "text": ["one", "hello", 2], + }, + dtype=object, + ) + df_out = hpandas.convert_df(df) + # flag → bool + self.assert_equal(df_out["flag"].dtype.name, "bool") + self.assertIn("float", df_out["value"].dtype.name) + self.assert_equal(df_out["text"].dtype.name, "object") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py new file mode 100644 index 000000000..44b7c7b18 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py @@ -0,0 +1,448 @@ +import logging + +import numpy as np +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_dassert_is_unique1 +# ############################################################################# + + +class Test_dassert_is_unique1(hunitest.TestCase): + def get_df1(self) -> pd.DataFrame: + """ + Return a df without duplicated index. + """ + num_rows = 5 + idx = [ + pd.Timestamp("2000-01-01 9:00") + pd.Timedelta(minutes=i) + for i in range(num_rows) + ] + values = [[i] for i in range(len(idx))] + df = pd.DataFrame(values, index=idx) + _LOG.debug("df=\n%s", df) + # + actual = hpandas.df_to_str(df) + expected = r""" + 0 + 2000-01-01 09:00:00 0 + 2000-01-01 09:01:00 1 + 2000-01-01 09:02:00 2 + 2000-01-01 09:03:00 3 + 2000-01-01 09:04:00 4""" + self.assert_equal(actual, expected, fuzzy_match=True) + return df + + def test_dassert_is_unique1(self) -> None: + df = self.get_df1() + hpandas.dassert_unique_index(df) + + def get_df2(self) -> pd.DataFrame: + """ + Return a df with duplicated index. + """ + num_rows = 4 + idx = [ + pd.Timestamp("2000-01-01 9:00") + pd.Timedelta(minutes=i) + for i in range(num_rows) + ] + idx.append(idx[0]) + values = [[i] for i in range(len(idx))] + df = pd.DataFrame(values, index=idx) + _LOG.debug("df=\n%s", df) + # + actual = hpandas.df_to_str(df) + expected = r""" + 0 + 2000-01-01 09:00:00 0 + 2000-01-01 09:01:00 1 + 2000-01-01 09:02:00 2 + 2000-01-01 09:03:00 3 + 2000-01-01 09:00:00 4""" + self.assert_equal(actual, expected, fuzzy_match=True) + return df + + def test_dassert_is_unique2(self) -> None: + df = self.get_df2() + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_unique_index(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + Duplicated rows are: + 0 + 2000-01-01 09:00:00 0 + 2000-01-01 09:00:00 4 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_valid_remap +# ############################################################################# + + +class Test_dassert_valid_remap(hunitest.TestCase): + def test1(self) -> None: + """ + Check that the function works with correct inputs. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] + remap_dict = { + "dummy_value_1": "1, 2, 3", + "dummy_value_2": "A, B, C", + } + # Check. + hpandas.dassert_valid_remap(to_remap, remap_dict) + + def test2(self) -> None: + """ + Check that an assertion is raised if dictionary keys are not a subset. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2"] + remap_dict = { + "dummy_value_1": "1, 2, 3", + "dummy_value_2": "A, B, C", + "dummy_value_3": "A1, A2, A3", + } + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + val1=['dummy_value_1', 'dummy_value_2', 'dummy_value_3'] + issubset + val2=['dummy_value_1', 'dummy_value_2'] + val1 - val2=['dummy_value_3'] + Keys to remap should be a subset of existing columns""" + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check that an assertion is raised if the duplicate values are present + in the dict. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] + remap_dict = { + "dummy_value_1": 1, + "dummy_value_2": "A, B, C", + "dummy_value_3": "A, B, C", + } + # Run. + with self.assertRaises(AttributeError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + 'dict_values' object has no attribute 'count'""" + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Check that an assertion is raised if the input is not a list. + """ + # Set inputs. + to_remap = {"dummy_value_1"} + remap_dict = { + "dummy_value_1": "1, 2, 3", + } + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '{'dummy_value_1'}' is '' instead of '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test5(self) -> None: + """ + Check that an assertion is raised if the input is not a dictionary. + """ + # Set inputs. + to_remap = ["dummy_value_1"] + remap_dict = [ + "dummy_value_1 : 1, 2, 3", + ] + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '['dummy_value_1 : 1, 2, 3']' is '' instead of '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_increasing_index +# ############################################################################# + + +class Test_dassert_increasing_index(hunitest.TestCase): + def test1(self) -> None: + """ + Check that a monotonically increasing index passes the assert. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:03"), + pd.Timestamp("2000-01-01 9:04"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + hpandas.dassert_increasing_index(df) + + def test2(self) -> None: + """ + Check that an assert is raised when index is not monotonically + increasing. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:04"), + pd.Timestamp("2000-01-01 9:03"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_increasing_index(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + Not increasing indices are: + 0 + 2000-01-01 09:04:00 0 + 2000-01-01 09:03:00 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check that a monotonically increasing index with duplicates passes the + assert. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:00"), + pd.Timestamp("2000-01-01 9:00"), + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:01"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + hpandas.dassert_increasing_index(df) + + +# ############################################################################# +# Test_dassert_strictly_increasing_index +# ############################################################################# + + +class Test_dassert_strictly_increasing_index(hunitest.TestCase): + def test1(self) -> None: + """ + Check that unique and monotonically increasing index passes the assert. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:03"), + pd.Timestamp("2000-01-01 9:04"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + hpandas.dassert_strictly_increasing_index(df) + + def test2(self) -> None: + """ + Check that an assert is raised for an increasing index with duplicates. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:03"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_strictly_increasing_index(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + Duplicated rows are: + 0 + 2000-01-01 09:01:00 0 + 2000-01-01 09:01:00 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check that an assert is raised for a not monotonically increasing + index. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:03"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:04"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_strictly_increasing_index(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + Not increasing indices are: + 0 + 2000-01-01 09:03:00 0 + 2000-01-01 09:02:00 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_index_is_datetime +# ############################################################################# + + +class Test_dassert_index_is_datetime(hunitest.TestCase): + @staticmethod + def get_multiindex_df( + index_is_datetime: bool, + ) -> pd.DataFrame: + """ + Helper function to get test multi-index dataframe. Example of dataframe + returned when `index_is_datetime = True`: + + ``` + column1 column2 + index timestamp + index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431 + 2022-01-01 21:10:00+00:00 1.303778 -0.288235 + index2 2022-01-01 21:00:00+00:00 1.237079 1.168012 + 2022-01-01 21:10:00+00:00 1.333692 1.708455 + ``` + + Example of dataframe returned when `index_is_datetime = False`: + + ``` + column1 column2 + index timestamp + index1 string1 -0.122140 -1.949431 + string2 1.303778 -0.288235 + index2 string1 1.237079 1.168012 + string2 1.333692 1.708455 + ``` + """ + if index_is_datetime: + index_inner = [ + pd.Timestamp("2022-01-01 21:00:00", tz="UTC"), + pd.Timestamp("2022-01-01 21:10:00", tz="UTC"), + ] + else: + index_inner = ["string1", "string2"] + index_outer = ["index1", "index2"] + iterables = [index_outer, index_inner] + index = pd.MultiIndex.from_product( + iterables, names=["index", "timestamp"] + ) + columns = ["column1", "column2"] + nums = np.random.uniform(-2, 2, size=(4, 2)) + df = pd.DataFrame(nums, index=index, columns=columns) + return df + + def test1(self) -> None: + """ + Check that multi-index dataframe index is datetime type. + """ + index_is_datetime = True + df = self.get_multiindex_df(index_is_datetime) + hpandas.dassert_index_is_datetime(df) + + def test2(self) -> None: + """ + Check that multi-index dataframe index is not datetime type. + """ + index_is_datetime = False + df = self.get_multiindex_df(index_is_datetime) + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_index_is_datetime(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check for empty dataframe. + """ + df = pd.DataFrame() + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_index_is_datetime(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of 'RangeIndex(start=0, stop=0, step=1)' is '' instead of '' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Check that single-indexed dataframe index is datetime type. + """ + index_is_datetime = True + df = self.get_multiindex_df(index_is_datetime) + df = df.loc["index1"] + hpandas.dassert_index_is_datetime(df) + + +# ############################################################################# +# Test_dassert_approx_eq1 +# ############################################################################# + + +class Test_dassert_approx_eq1(hunitest.TestCase): + def test1(self) -> None: + hpandas.dassert_approx_eq(1, 1.0000001) + + def test2(self) -> None: + srs1 = pd.Series([1, 2.0000001]) + srs2 = pd.Series([0.999999, 2.0]) + hpandas.dassert_approx_eq(srs1, srs2, msg="hello world") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py new file mode 100644 index 000000000..2c69e4fe7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py @@ -0,0 +1,685 @@ +import datetime +import logging +import unittest.mock +import uuid +from typing import Optional, Union + +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hpandas_display as hpandisp +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestDataframeToJson +# ############################################################################# + + +class TestDataframeToJson(hunitest.TestCase): + """ + Test dataframe to JSON conversion. + """ + + def test1(self) -> None: + """ + Verify correctness of dataframe to JSON transformation. + """ + # Prepare inputs. + test_dataframe = pd.DataFrame( + { + "col_1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], + "col_2": [1, 2, 3, 4, 5, 6, 7], + } + ) + # Run test. + output_str = hpandas.convert_df_to_json_string( + test_dataframe, n_head=3, n_tail=3 + ) + # Check output. + self.check_string(output_str) + + def test2(self) -> None: + """ + Verify correctness of UUID-containing dataframe transformation. + """ + # Prepare inputs. + test_dataframe = pd.DataFrame( + { + "col_1": [ + uuid.UUID("421470c7-7797-4a94-b584-eb83ff2de88a"), + uuid.UUID("22cde381-1782-43dc-8c7a-8712cbdf5ee1"), + ], + "col_2": [1, 2], + } + ) + # Run test. + output_str = hpandas.convert_df_to_json_string( + test_dataframe, n_head=None, n_tail=None + ) + # Check output. + self.check_string(output_str) + + def test3(self) -> None: + """ + Verify correctness of transformation of a dataframe with Timestamps. + """ + # Prepare inputs. + test_dataframe = pd.DataFrame( + { + "col_1": [ + pd.Timestamp("2020-01-01"), + pd.Timestamp("2020-05-12"), + ], + "col_2": [1.0, 2.0], + } + ) + # Run test. + output_str = hpandas.convert_df_to_json_string( + test_dataframe, n_head=None, n_tail=None + ) + # Check output. + self.check_string(output_str) + + def test4(self) -> None: + """ + Verify correctness of transformation of a dataframe with datetime. + """ + # Prepare inputs. + test_dataframe = pd.DataFrame( + { + "col_1": [ + datetime.datetime(2020, 1, 1), + datetime.datetime(2020, 5, 12), + ], + "col_2": [1.0, 2.0], + } + ) + # Run test. + output_str = hpandas.convert_df_to_json_string( + test_dataframe, n_head=None, n_tail=None + ) + # Check output. + self.check_string(output_str) + + +# ############################################################################# +# Test_list_to_str +# ############################################################################# + + +class Test_list_to_str(hunitest.TestCase): + """ + Test list to string conversion. + """ + + def test1(self) -> None: + """ + Check that a list is converted to string correctly. + """ + # Prepare inputs. + items = [1, "two", 3, 4, "five"] + # Run test. + actual = hprint.list_to_str2(items, enclose_str_char="|", sep_char=" ; ") + # Check output. + expected = "5 [|1| ; |two| ; |3| ; |4| ; |five|]" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + Check that a list is converted to string and truncated correctly. + """ + # Prepare inputs. + items = list(range(15)) + # Run test. + actual = hprint.list_to_str2(items, enclose_str_char="", sep_char=" - ") + # Check output. + expected = "15 [0 - 1 - 2 - 3 - 4 - ... - 10 - 11 - 12 - 13 - 14]" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check that a list is converted to string correctly, without additional + parameters. + """ + # Prepare inputs. + items = [1, 2, 3, 4, "five"] + # Run test. + actual = hprint.list_to_str2(items) + # Check output. + expected = "5 ['1', '2', '3', '4', 'five']" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_display_df +# ############################################################################# + + +class Test_display_df(hunitest.TestCase): + """ + Test the display_df function. + """ + + def helper_test_display_df( + self, + df: Union[pd.DataFrame, pd.Series], + expected: Optional[str], + **kwargs, + ) -> None: + """ + Test helper for display_df. + + :param df: Input dataframe or series + :param expected: Expected output to compare with actual output + :param kwargs: Keyword arguments to pass to display_df + """ + # Capture the output from print_or_display and logging. + outputs = [] + tag = kwargs.get("tag") + + def mock_print_or_display( + mock_df: pd.DataFrame, + *, + index: bool = True, + as_txt: bool = False, + log_level: int = logging.INFO, + ) -> None: + """ + Capture the dataframe string representation. + """ + if as_txt or not index: + output = mock_df.to_string(index=index) + else: + output = mock_df.to_html(index=index) + outputs.append(output) + + # Run test. + with unittest.mock.patch( + "helpers.hpandas_display.print_or_display", + side_effect=mock_print_or_display, + ): + with unittest.mock.patch( + "helpers.hpandas_display._LOG.log" + ) as mock_log: + hpandisp.display_df( + df, + log_level=logging.DEBUG, + **kwargs, + ) + # Capture tag logging if present. + if tag is not None and mock_log.called: + for call in mock_log.call_args_list: + if "tag=" in str(call): + outputs.append(f"tag={tag}") + # Check output if expected is provided. + if expected is not None: + expected = hprint.dedent(expected) + actual = "\n".join(outputs) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Test display_df with small dataframe. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
01a
12b
23c
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected) + + def test2(self) -> None: + """ + Test display_df with large dataframe and max_lines. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": list(range(100)), + "col_2": [f"val_{i}" for i in range(100)], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
00val_0
11val_1
.........
9898val_98
9999val_99
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, max_lines=5) + + def test3(self) -> None: + """ + Test display_df with inline_index=True. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + } + ) + # Prepare outputs. + expected = """ + . col_1 col_2 + 0 1 a + 1 2 b + 2 3 c + """ + # Run test. + self.helper_test_display_df( + df, expected=expected, inline_index=True, index=True + ) + + def test4(self) -> None: + """ + Test display_df with index=False. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + } + ) + # Prepare outputs. + expected = """ + col_1 col_2 + 1 a + 2 b + 3 c + """ + # Run test. + self.helper_test_display_df(df, expected=expected, index=False) + + def test5(self) -> None: + """ + Test display_df with named index and inline_index=True. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + } + ) + df.index.name = "my_index" + # Prepare outputs. + expected = """ + my_index col_1 col_2 + 0 1 a + 1 2 b + 2 3 c + """ + # Run test. + self.helper_test_display_df( + df, expected=expected, inline_index=True, index=False + ) + + def test6(self) -> None: + """ + Test display_df with Pandas Series (should convert to DataFrame). + """ + # Prepare inputs. + series = pd.Series([1, 2, 3, 4, 5], name="my_series") + # Prepare outputs. + expected = """ + . my_series + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + + """ + # Run test. + self.helper_test_display_df( + series, expected=expected, inline_index=True, index=False + ) + + def test7(self) -> None: + """ + Test display_df with tag parameter. + """ + # Prepare inputs. + df = pd.DataFrame({"col_1": [1, 2, 3]}) + # Prepare outputs. + expected = """ + . col_1 + 0 1 + 1 2 + 2 3 + tag=my_tag + """ + # Run test. + self.helper_test_display_df( + df, expected=expected, tag="my_tag", inline_index=True, index=False + ) + + def test8(self) -> None: + """ + Test display_df with mode='all_rows'. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": list(range(50)), + "col_2": [f"val_{i}" for i in range(50)], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
00val_0
11val_1
.........
4848val_48
4949val_49
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, mode="all_rows") + + def test9(self) -> None: + """ + Test display_df with mode='all_cols'. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + "col_3": [10.5, 20.5, 30.5], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2col_3
01a10.5
12b20.5
23c30.5
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, mode="all_cols") + + def test10(self) -> None: + """ + Test display_df with mode='all'. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": list(range(50)), + "col_2": [f"val_{i}" for i in range(50)], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
00val_0
11val_1
.........
4848val_48
4949val_49
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, mode="all") + + def test11(self) -> None: + """ + Test display_df with invalid mode raises error. + """ + # Prepare inputs. + df = pd.DataFrame({"col_1": [1, 2, 3]}) + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hpandisp.display_df( + df, + mode="invalid_mode", + log_level=logging.DEBUG, + ) + self.assertIn("Invalid mode", str(cm.exception)) + + def test12(self) -> None: + """ + Test display_df with duplicate columns raises assertion. + """ + # Prepare inputs. + df = pd.DataFrame([[1, 2], [3, 4]]) + df.columns = ["col", "col"] + # Run test and check output. + with self.assertRaises(AssertionError): + hpandisp.display_df(df, log_level=logging.DEBUG) + + def test13(self) -> None: + """ + Test display_df with single row dataframe. + """ + # Prepare inputs. + df = pd.DataFrame({"col_1": [1], "col_2": ["a"]}) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + +
col_1col_2
01a
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, max_lines=5) + + def test14(self) -> None: + """ + Test display_df with max_lines=1 (edge case). + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": list(range(10)), + "col_2": [f"val_{i}" for i in range(10)], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
00val_0
11val_1
.........
88val_8
99val_9
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, mode="all") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py new file mode 100644 index 000000000..c1f66b0d8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py @@ -0,0 +1,43 @@ +import logging +import os + +import pytest + +import helpers.hpandas as hpandas +import helpers.hs3 as hs3 +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# TestReadDataFromS3 +# ############################################################################# + + +class TestReadDataFromS3(hunitest.TestCase): + def test_read_csv1(self) -> None: + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_name = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + # TODO(sonaal): Reorganize all s3 input data, CmampTask5650. + "alphamatic-data", + "data/kibot/all_stocks_1min/RIMG.csv.gz", + ) + hs3.dassert_path_exists(file_name, s3fs) + stream, kwargs = hs3.get_local_or_s3_stream(file_name, s3fs=s3fs) + hpandas.read_csv_to_df(stream, **kwargs) + + @pytest.mark.slow("~15 sec.") + def test_read_parquet1(self) -> None: + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_name = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "data/kibot/pq/sp_500_1min/AAPL.pq", + ) + hs3.dassert_path_exists(file_name, s3fs) + stream, kwargs = hs3.get_local_or_s3_stream(file_name, s3fs=s3fs) + hpandas.read_parquet_to_df(stream, **kwargs) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py new file mode 100644 index 000000000..0e1b813fa --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py @@ -0,0 +1,680 @@ +import logging + +import numpy as np +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_subset_multiindex_df +# ############################################################################# + + +class Test_subset_multiindex_df(hunitest.TestCase): + """ + Filter Multiindex DataFrame with 2 column levels. + """ + + @staticmethod + def get_multiindex_df() -> pd.DataFrame: + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + ] + iterables = [["asset1", "asset2"], ["open", "high", "low", "close"]] + index = pd.MultiIndex.from_product(iterables, names=[None, "timestamp"]) + nums = np.array( + [ + [ + 0.77650806, + 0.12492164, + -0.35929232, + 1.04137784, + 0.20099949, + 1.4078602, + -0.1317103, + 0.10023361, + ], + [ + -0.56299812, + 0.79105046, + 0.76612895, + -1.49935339, + -1.05923797, + 0.06039862, + -0.77652117, + 2.04578691, + ], + [ + 0.77348467, + 0.45237724, + 1.61051308, + 0.41800008, + 0.20838053, + -0.48289112, + 1.03015762, + 0.17123323, + ], + [ + 0.40486053, + 0.88037142, + -1.94567068, + -1.51714645, + -0.52759748, + -0.31592803, + 1.50826723, + -0.50215196, + ], + [ + 0.17409714, + -2.13997243, + -0.18530403, + -0.48807381, + 0.5621593, + 0.25899393, + 1.14069646, + 2.07721856, + ], + ] + ) + df = pd.DataFrame(nums, index=timestamp_index, columns=index) + return df + + def test1(self) -> None: + """ + Filter by: + + - Timestamp index range + - Level 1 columns + - Level 2 columns + """ + df = self.get_multiindex_df() + df_filtered = hpandas.subset_multiindex_df( + df, + start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), + end_timestamp=pd.Timestamp("2022-01-01 21:03:00+00:00"), + columns_level0=["asset1"], + columns_level1=["high", "low"], + ) + expected_length = 3 + expected_column_names = [("asset1", "high"), ("asset1", "low")] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:03:00+00:00] + columns=('asset1', 'high'),('asset1', 'low') + shape=(3, 2) + asset1 + timestamp high low + 2022-01-01 21:01:00+00:00 0.124922 -0.359292 + 2022-01-01 21:02:00+00:00 0.791050 0.766129 + 2022-01-01 21:03:00+00:00 0.452377 1.610513 + """ + self.check_df_output( + df_filtered, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test2(self) -> None: + """ + Filter by: + + - Timestamp index range + - Level 1 columns + """ + df = self.get_multiindex_df() + df_filtered = hpandas.subset_multiindex_df( + df, + start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), + end_timestamp=pd.Timestamp("2022-01-01 21:02:00+00:00"), + columns_level1=["close"], + ) + expected_length = 2 + expected_column_names = [("asset1", "close"), ("asset2", "close")] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00] + columns=('asset1', 'close'),('asset2', 'close') + shape=(2, 2) + asset1 asset2 + timestamp close close + 2022-01-01 21:01:00+00:00 1.041378 0.100234 + 2022-01-01 21:02:00+00:00 -1.499353 2.045787 + """ + self.check_df_output( + df_filtered, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test3(self) -> None: + """ + Filter by: + + - Timestamp index range + - Level 2 columns + """ + df = self.get_multiindex_df() + df_filtered = hpandas.subset_multiindex_df( + df, + start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), + end_timestamp=pd.Timestamp("2022-01-01 21:02:00+00:00"), + columns_level0=["asset2"], + ) + expected_length = 2 + expected_column_names = [ + ("asset2", "close"), + ("asset2", "high"), + ("asset2", "low"), + ("asset2", "open"), + ] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00] + columns=('asset2', 'close'),('asset2', 'high'),('asset2', 'low'),('asset2', 'open') + shape=(2, 4) + asset2 + timestamp close high low open + 2022-01-01 21:01:00+00:00 0.100234 1.407860 -0.131710 0.200999 + 2022-01-01 21:02:00+00:00 2.045787 0.060399 -0.776521 -1.059238 + """ + self.check_df_output( + df_filtered, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test4(self) -> None: + """ + Filter by: + + - Level 1 columns + - Level 2 columns + """ + df = self.get_multiindex_df() + df_filtered = hpandas.subset_multiindex_df( + df, + columns_level0=["asset2"], + columns_level1=["low"], + ) + expected_length = 5 + expected_column_names = [("asset2", "low")] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:05:00+00:00] + columns=('asset2', 'low') + shape=(5, 1) + asset2 + timestamp low + 2022-01-01 21:01:00+00:00 -0.131710 + 2022-01-01 21:02:00+00:00 -0.776521 + 2022-01-01 21:03:00+00:00 1.030158 + 2022-01-01 21:04:00+00:00 1.508267 + 2022-01-01 21:05:00+00:00 1.140696 + """ + self.check_df_output( + df_filtered, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test_columns_level0_invalid_input(self) -> None: + df = self.get_multiindex_df() + with self.assertRaises(AssertionError): + hpandas.subset_multiindex_df( + df, + columns_level0=["invalid_input"], + ) + + def test_columns_level1_invalid_input(self) -> None: + df = self.get_multiindex_df() + with self.assertRaises(AssertionError): + hpandas.subset_multiindex_df( + df, + columns_level1=["invalid_input"], + ) + + +# ############################################################################# +# Test_compare_multiindex_dfs +# ############################################################################# + + +class Test_compare_multiindex_dfs(hunitest.TestCase): + """ + Subset Multiindex DataFrames with 2 column levels and compare its values. + """ + + @staticmethod + def get_multiindex_dfs() -> pd.DataFrame: + timestamp_index1 = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + ] + iterables1 = [["asset1", "asset2"], ["open", "high", "low", "close"]] + index1 = pd.MultiIndex.from_product( + iterables1, names=[None, "timestamp"] + ) + nums1 = np.array( + [ + [ + 0.77650806, + 0.12492164, + -0.35929232, + 1.04137784, + 0.20099949, + 1.4078602, + -0.1317103, + 0.10023361, + ], + [ + -0.56299812, + 0.79105046, + 0.76612895, + -1.49935339, + -1.05923797, + 0.06039862, + -0.77652117, + 2.04578691, + ], + [ + 0.77348467, + 0.45237724, + 1.61051308, + 0.41800008, + 0.20838053, + -0.48289112, + 1.03015762, + 0.17123323, + ], + [ + 0.40486053, + 0.88037142, + -1.94567068, + -1.51714645, + -0.52759748, + -0.31592803, + 1.50826723, + -0.50215196, + ], + [ + 0.17409714, + -2.13997243, + -0.18530403, + -0.48807381, + 0.5621593, + 0.25899393, + 1.14069646, + 2.07721856, + ], + ] + ) + df1 = pd.DataFrame(nums1, index=timestamp_index1, columns=index1) + # + timestamp_index2 = [ + pd.Timestamp("2022-01-01 21:00:00+00:00"), + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + ] + iterables2 = [ + ["asset1", "asset2", "asset3"], + ["open", "high", "low", "close", "volume"], + ] + index2 = pd.MultiIndex.from_product( + iterables2, names=[None, "timestamp"] + ) + nums2 = [ + [ + 0.79095104, + -0.10304008, + -0.69848962, + 0.50078409, + 0.41756371, + -1.33487885, + 1.04546138, + 0.191062, + 0.08841533, + 0.61717725, + -2.15558483, + 1.21036169, + 2.60355386, + 0.07508052, + 1.00702849, + ], + [ + 0.56223723, + 0.97433151, + -1.40471182, + 0.53292355, + 0.24381913, + 0.64343069, + -0.46733655, + -1.20471491, + -0.08347491, + 0.33365524, + 0.04370572, + -0.53547653, + -1.07622168, + 0.7318155, + -0.47146482, + ], + [ + -0.48272741, + 1.17859032, + -0.40816664, + 0.46684297, + 0.42518077, + -1.52913855, + 1.09925095, + 0.48817537, + 1.2662552, + -0.59757824, + 0.23724902, + -0.00660826, + 0.09780482, + -0.17166633, + -0.54515917, + ], + [ + -0.37618442, + -0.3086281, + 1.09168123, + -1.1751162, + 0.38291194, + 1.80830268, + 1.28318855, + 0.75696503, + -1.04042572, + 0.06493231, + -0.10392893, + 1.89053412, + -0.21200498, + 1.61212857, + -2.00765278, + ], + [ + -0.19674075, + -1.02532132, + -0.22486018, + 0.37664998, + 0.35619408, + -0.77304675, + 0.59053699, + -1.53249898, + 0.57548424, + -0.32093537, + -0.52109972, + 1.70938034, + -0.55419632, + 0.45531674, + 0.66878119, + ], + [ + 0.05903553, + 1.2040308, + 0.62323671, + -0.23639535, + 0.87270792, + 2.60253287, + -0.77788842, + 0.80645833, + 1.85438743, + -1.77561587, + 0.41469478, + -0.29791883, + 0.75140743, + 0.50389702, + 0.55311024, + ], + [ + -0.97820763, + -1.32155197, + -0.6143911, + 0.01473404, + 0.87798665, + 0.1701048, + -0.75376376, + 0.72503616, + 0.5791076, + 0.43942739, + 0.62505817, + 0.44998739, + 0.37350664, + -0.73485633, + -0.70406184, + ], + [ + -1.35719477, + -1.82401288, + 0.77263763, + 2.36399552, + -0.45353019, + 0.33983713, + -0.62895329, + 1.34256611, + 0.2207564, + 0.24146184, + 0.90769186, + 0.57426869, + -0.04587782, + -1.6319128, + 0.38094798, + ], + ] + df2 = pd.DataFrame(nums2, index=timestamp_index2, columns=index2) + return df1, df2 + + def test1(self) -> None: + """ + - Subset by both columns and index + - Make inner intersection and compute pct_change + """ + df1, df2 = self.get_multiindex_dfs() + subset_multiindex_df_kwargs = { + "start_timestamp": pd.Timestamp("2022-01-01 21:02:00+00:00"), + "end_timestamp": pd.Timestamp("2022-01-01 21:04:00+00:00"), + "columns_level0": ["asset1", "asset2"], + "columns_level1": ["low", "high"], + } + compare_dfs_kwargs = { + "column_mode": "inner", + "row_mode": "inner", + "diff_mode": "pct_change", + "assert_diff_threshold": None, + } + df_diff = hpandas.compare_multiindex_dfs( + df1, + df2, + subset_multiindex_df_kwargs=subset_multiindex_df_kwargs, + compare_dfs_kwargs=compare_dfs_kwargs, + ) + expected_length = 3 + expected_column_names = [ + ("asset1.pct_change", "high.pct_change"), + ("asset1.pct_change", "low.pct_change"), + ("asset2.pct_change", "high.pct_change"), + ("asset2.pct_change", "low.pct_change"), + ] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:02:00+00:00, 2022-01-01 21:04:00+00:00] + columns=('asset1.pct_change', 'high.pct_change'),('asset1.pct_change', 'low.pct_change'),('asset2.pct_change', 'high.pct_change'),('asset2.pct_change', 'low.pct_change') + shape=(3, 4) + asset1.pct_change asset2.pct_change + timestamp high.pct_change low.pct_change high.pct_change low.pct_change + 2022-01-01 21:02:00+00:00 -32.881643 287.700041 -94.505475 -259.066028 + 2022-01-01 21:03:00+00:00 246.576815 47.525948 -137.632125 36.090517 + 2022-01-01 21:04:00+00:00 185.862978 -765.280229 -153.498432 198.418808 + """ + self.check_df_output( + df_diff, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + +# ############################################################################# +# Test_multiindex_df_info1 +# ############################################################################# + + +class Test_multiindex_df_info1(hunitest.TestCase): + @staticmethod + def get_multiindex_df_with_datetime_index() -> pd.DataFrame: + datetime_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + ] + iterables = [["asset1", "asset2"], ["open", "high", "low", "close"]] + index = pd.MultiIndex.from_product(iterables, names=[None, "timestamp"]) + nums = np.array( + [ + [ + 0.77650806, + 0.12492164, + -0.35929232, + 1.04137784, + 0.20099949, + 1.4078602, + -0.1317103, + 0.10023361, + ], + [ + -0.56299812, + 0.79105046, + 0.76612895, + -1.49935339, + -1.05923797, + 0.06039862, + -0.77652117, + 2.04578691, + ], + [ + 0.77348467, + 0.45237724, + 1.61051308, + 0.41800008, + 0.20838053, + -0.48289112, + 1.03015762, + 0.17123323, + ], + [ + 0.40486053, + 0.88037142, + -1.94567068, + -1.51714645, + -0.52759748, + -0.31592803, + 1.50826723, + -0.50215196, + ], + [ + 0.17409714, + -2.13997243, + -0.18530403, + -0.48807381, + 0.5621593, + 0.25899393, + 1.14069646, + 2.07721856, + ], + ] + ) + df = pd.DataFrame(nums, index=datetime_index, columns=index) + return df + + @staticmethod + def get_multiindex_df_with_non_datetime_index() -> pd.DataFrame: + non_datetime_index = ["M", "N"] + index = pd.MultiIndex.from_product([["A", "B"], ["X", "Y"]]) + data = [[1, 2, 3, 4], [5, 6, 7, 8]] + df = pd.DataFrame(data, index=non_datetime_index, columns=index) + return df + + def test1(self) -> None: + """ + Test DataFrame with a datetime index. + """ + df = self.get_multiindex_df_with_datetime_index() + actual = hpandas.multiindex_df_info(df) + # This is required by `pandas` >= 2.2. + expected = """ + shape=2 x 4 x 5 + columns_level0=2 ['asset1', 'asset2'] + columns_level1=4 ['close', 'high', 'low', 'open'] + rows=5 ['2022-01-01 21:01:00+00:00', '2022-01-01 21:02:00+00:00', '2022-01-01 21:03:00+00:00', '2022-01-01 21:04:00+00:00', '2022-01-01 21:05:00+00:00'] + start_timestamp=2022-01-01 21:01:00+00:00 + end_timestamp=2022-01-01 21:05:00+00:00 + frequency=min + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + Test DataFrame with a non-frequency datetime index. + """ + df = self.get_multiindex_df_with_datetime_index() + non_frequency_datetime_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:04:30+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + ] + df.index = non_frequency_datetime_index + actual = hpandas.multiindex_df_info(df) + expected = """ + shape=2 x 4 x 5 + columns_level0=2 ['asset1', 'asset2'] + columns_level1=4 ['close', 'high', 'low', 'open'] + rows=5 ['2022-01-01 21:01:00+00:00', '2022-01-01 21:02:00+00:00', '2022-01-01 21:04:00+00:00', '2022-01-01 21:04:30+00:00', '2022-01-01 21:06:00+00:00'] + start_timestamp=2022-01-01 21:01:00+00:00 + end_timestamp=2022-01-01 21:06:00+00:00 + frequency=None + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Test DataFrame with a non-datetime index. + """ + df = self.get_multiindex_df_with_non_datetime_index() + actual = hpandas.multiindex_df_info(df) + expected = """ + shape=2 x 2 x 2 + columns_level0=2 ['A', 'B'] + columns_level1=2 ['X', 'Y'] + rows=2 ['M', 'N'] + """ + self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py new file mode 100644 index 000000000..f0295958f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py @@ -0,0 +1,426 @@ +import logging +from typing import Dict, List + +import pandas as pd + +import helpers.hprint as hprint +import helpers.hpandas as hpandas +import helpers.hpandas_stats as hpanstat +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_compute_duration_df +# ############################################################################# + + +class Test_compute_duration_df(hunitest.TestCase): + """ + Compute timestamp stats from dfs and check the intersection. + """ + + @staticmethod + def get_dict_with_dfs() -> Dict[str, pd.DataFrame]: + timestamp_index1 = [ + pd.Timestamp("2022-01-01 21:00:00+00:00"), + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + ] + timestamp_index2 = [ + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + ] + timestamp_index3 = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + ] + # + value1 = {"value1": [None, None, 1, 2, 3, 4, 5, None]} + value2 = {"value2": [1, 2, 3, None]} + value3 = {"value3": [None, None, 1, 2]} + # + df1 = pd.DataFrame(value1, index=timestamp_index1) + df2 = pd.DataFrame(value2, index=timestamp_index2) + df3 = pd.DataFrame(value3, index=timestamp_index3) + # + tag_to_df = { + "tag1": df1, + "tag2": df2, + "tag3": df3, + } + return tag_to_df + + def helper( + self, + valid_intersect: bool, + expected_start_timestamp: pd.Timestamp, + expected_end_timestamp: pd.Timestamp, + ) -> None: + """ + Checks if the intersection is valid and the same amongst all dfs. + """ + tag_to_df = self.get_dict_with_dfs() + _, tag_dfs = hpandas.compute_duration_df( + tag_to_df, valid_intersect=valid_intersect, intersect_dfs=True + ) + # Collect all start timestamps. + start_timestamps = [tag_dfs[tag].index.min() for tag in tag_dfs] + # Check that all start timestamps are equal. + start_equal = all( + element == start_timestamps[0] for element in start_timestamps + ) + self.assertTrue(start_equal) + # Check that start intersection is correct. + required_start_intersection = expected_start_timestamp + self.assertEqual(start_timestamps[0], required_start_intersection) + # Collect all end timestamps. + end_timestamps = [tag_dfs[tag].index.max() for tag in tag_dfs] + # Check that all end timestamps are equal. + end_equal = all( + element == end_timestamps[0] for element in end_timestamps + ) + self.assertTrue(end_equal) + # Check that end intersection is correct. + required_end_intersection = expected_end_timestamp + self.assertEqual(end_timestamps[0], required_end_intersection) + + def test1(self) -> None: + """ + Check only timestamp stats. + """ + tag_to_df = self.get_dict_with_dfs() + df_stats, _ = hpandas.compute_duration_df(tag_to_df) + expected_length = 3 + expected_column_names = [ + "max_index", + "max_valid_index", + "min_index", + "min_valid_index", + ] + expected_column_unique_values = None + expected_signature = r""" + # df= + index=[tag1, tag3] + columns=min_index,max_index,min_valid_index,max_valid_index + shape=(3, 4) + min_index max_index min_valid_index max_valid_index + tag1 2022-01-01 21:00:00+00:00 2022-01-01 21:06:00+00:00 2022-01-01 21:02:00+00:00 2022-01-01 21:06:00+00:00 + tag2 2022-01-01 21:02:00+00:00 2022-01-01 21:05:00+00:00 2022-01-01 21:02:00+00:00 2022-01-01 21:04:00+00:00 + tag3 2022-01-01 21:01:00+00:00 2022-01-01 21:04:00+00:00 2022-01-01 21:03:00+00:00 2022-01-01 21:04:00+00:00 + """ + expected_signature = hprint.dedent(expected_signature) + self.check_df_output( + df_stats, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test2(self) -> None: + """ + Modify initial DataFrames in dictionary with non-valid intersection + (incl NaNs). + """ + valid_intersect = False + expected_start_timestamp = pd.Timestamp("2022-01-01 21:02:00+00:00") + expected_end_timestamp = pd.Timestamp("2022-01-01 21:04:00+00:00") + self.helper( + valid_intersect, expected_start_timestamp, expected_end_timestamp + ) + + def test3(self) -> None: + """ + Modify initial DataFrames in dictionary with valid intersection + (excluding NaNs). + """ + valid_intersect = True + expected_start_timestamp = pd.Timestamp("2022-01-01 21:03:00+00:00") + expected_end_timestamp = pd.Timestamp("2022-01-01 21:04:00+00:00") + self.helper( + valid_intersect, expected_start_timestamp, expected_end_timestamp + ) + + +# ############################################################################# +# Test_compute_weighted_sum +# ############################################################################# + + +class Test_compute_weighted_sum(hunitest.TestCase): + def helper( + self, + index1: List[int], + index2: List[int], + weights_data: Dict[str, List[float]], + index_mode: str, + expected_signature: str, + ) -> None: + """ + Build inputs and check that function output is correct. + """ + # Create test data. + data1 = {"A": [1, 2], "B": [3, 4]} + df1 = pd.DataFrame(data1, index=index1) + data2 = {"A": [5, 6], "B": [7, 8]} + df2 = pd.DataFrame(data2, index=index2) + dfs = {"df1": df1, "df2": df2} + # Create weights DataFrame. + weights = pd.DataFrame(weights_data, index=dfs.keys()) + # Run the function. + weighted_sums = hpandas.compute_weighted_sum( + dfs=dfs, weights=weights, index_mode=index_mode + ) + actual_signature = str(weighted_sums) + self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) + + def test1(self) -> None: + """ + Check that weighted sums are computed correctly. + + index_mode = "assert_equal". + """ + index1 = [0, 1] + index2 = [0, 1] + weights_data = {"w1": [0.2, 0.8]} + index_mode = "assert_equal" + expected_signature = r""" + {'w1': A B + 0 4.2 6.2 + 1 5.2 7.2} + """ + expected_signature = hprint.dedent(expected_signature) + self.helper(index1, index2, weights_data, index_mode, expected_signature) + + def test2(self) -> None: + """ + Check that weighted sums are computed correctly. + + index_mode = "intersect". + """ + index1 = [0, 1] + index2 = [0, 2] + weights_data = {"w1": [0.2, 0.8], "w2": [0.5, 0.5]} + index_mode = "intersect" + expected_signature = r""" + {'w1': A B + 0 4.2 6.2 + 1 NaN NaN + 2 NaN NaN, 'w2': A B + 0 3.0 5.0 + 1 NaN NaN + 2 NaN NaN} + """ + expected_signature = hprint.dedent(expected_signature) + self.helper(index1, index2, weights_data, index_mode, expected_signature) + + def test3(self) -> None: + """ + Check that weighted sums are computed correctly. + + index_mode = "leave_unchanged". + """ + index1 = [0, 1] + index2 = [2, 3] + weights_data = {"w1": [0.2, 0.8]} + index_mode = "leave_unchanged" + expected_signature = r""" + {'w1': A B + 0 NaN NaN + 1 NaN NaN + 2 NaN NaN + 3 NaN NaN} + """ + expected_signature = hprint.dedent(expected_signature) + self.helper(index1, index2, weights_data, index_mode, expected_signature) + + def test4(self) -> None: + """ + Check that an assertion is raised if input is an empty dict. + """ + dfs: Dict[str, pd.DataFrame] = {} + weights_data = {"w1": [0.2, 0.8]} + index_mode = "assert_equal" + with self.assertRaises(AssertionError) as cm: + hpandas.compute_weighted_sum( + dfs=dfs, + weights=pd.DataFrame(weights_data), + index_mode=index_mode, + ) + actual_signature = str(cm.exception) + expected_signature = r""" + * Failed assertion * + cond={} + dictionary of dfs must be nonempty + """ + expected_signature = hprint.dedent(expected_signature) + self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) + + +# ############################################################################# +# Test_get_value_counts_stats_df +# ############################################################################# + + +class Test_get_value_counts_stats_df(hunitest.TestCase): + """ + Test value counts statistics computation. + """ + + def helper( + self, + category_data: List[str], + num_rows: int, + expected: str, + ) -> None: + """ + Test value counts with given parameters. + """ + # Prepare inputs. + df = pd.DataFrame({"category": category_data}) + # Run test. + result_df = hpandas.get_value_counts_stats_df( + df, "category", num_rows=num_rows + ) + # Check outputs. + actual = str(result_df) + expected = hprint.dedent(expected) + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test basic value counts with default parameters. + """ + # Prepare inputs. + category_data = ["A", "B", "A", "C", "A", "B", "D", "A", "C", "A"] + num_rows = 10 + # Prepare outputs. + expected = """ + count pct [%] + category + A 5 50.0 + B 2 20.0 + C 2 20.0 + D 1 10.0 + """ + # Run test. + self.helper(category_data, num_rows, expected) + + def test2(self) -> None: + """ + Test limiting the number of rows returned. + """ + # Prepare inputs. + category_data = ["A", "B", "A", "C", "A", "B", "D", "A", "C", "A"] + num_rows = 2 + # Prepare outputs. + expected = """ + count pct [%] + category + A 5 50.0 + B 2 20.0 + """ + # Run test. + self.helper(category_data, num_rows, expected) + + def test3(self) -> None: + """ + Test with num_rows=0 to return all rows. + """ + # Prepare inputs. + category_data = ["A", "B", "A", "C", "A", "B"] + num_rows = 0 + # Prepare outputs. + expected = """ + count pct [%] + category + A 3 50.000000 + B 2 33.333333 + C 1 16.666667 + """ + # Run test. + self.helper(category_data, num_rows, expected) + + +# ############################################################################# +# Test__get_unique_values_stats +# ############################################################################# + + +class Test__get_unique_values_stats(hunitest.TestCase): + """ + Test unique values count and percentage computation. + """ + + def helper(self, df_data: Dict, expected: str) -> None: + """ + Test unique values stats computation. + """ + # Prepare inputs. + df = pd.DataFrame(df_data) + # Run test. + result_df = hpanstat._get_unique_values_stats(df) + # Check outputs. + actual = str(result_df) + expected = hprint.dedent(expected) + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test basic unique values computation. + """ + df_data = { + "col1": [1, 2, 1, 3, 1], + "col2": ["a", "b", "a", "c", "d"], + "col3": [1.0, 1.0, 1.0, 1.0, 1.0], + } + expected = """ + num_unique unique [%] + col1 3 60.0 + col2 4 80.0 + col3 1 20.0 + """ + self.helper(df_data, expected) + + def test2(self) -> None: + """ + Test with NaN values. + """ + df_data = { + "col1": [1, 2, 1, None, 1], + "col2": ["a", "b", "a", None, "c"], + } + expected = """ + num_unique unique [%] + col1 2 40.0 + col2 3 60.0 + """ + self.helper(df_data, expected) + + def test3(self) -> None: + """ + Test with single unique value. + """ + df_data = { + "col1": [5, 5, 5, 5], + "col2": ["x", "x", "x", "x"], + } + expected = """ + num_unique unique [%] + col1 1 25.0 + col2 1 25.0 + """ + self.helper(df_data, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py new file mode 100644 index 000000000..f11d6988a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py @@ -0,0 +1,1888 @@ +import csv +import io +import logging +import re +import time +from typing import Any, Dict, List, Optional, Tuple + +import numpy as np +import pandas as pd +import pytest + +import helpers.hdatetime as hdateti +import helpers.hpandas as hpandas +import helpers.hpandas_transform as hpantran +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_trim_df1 +# ############################################################################# + + +class Test_trim_df1(hunitest.TestCase): + def get_df(self, *args: Any, **kwargs: Any) -> pd.DataFrame: + """ + Return a df where the CSV txt is read verbatim without inferring dates. + + The `start_time` column is thus a str. + """ + txt = """ + ,start_time,egid,close + 4,2022-01-04 21:38:00.000000,13684,1146.48 + 8,2022-01-04 21:38:00.000000,17085,179.45 + 14,2022-01-04 21:37:00.000000,13684,1146.26 + 18,2022-01-04 21:37:00.000000,17085,179.42 + 24,2022-01-04 21:36:00.000000,13684,1146.0 + 27,2022-01-04 21:36:00.000000,17085,179.46 + 34,2022-01-04 21:35:00.000000,13684,1146.0 + 38,2022-01-04 21:35:00.000000,17085,179.42 + 40,2022-01-04 21:34:00.000000,17085,179.42 + 44,2022-01-04 21:34:00.000000,13684,1146.0 + """ + txt = hprint.dedent(txt) + df = pd.read_csv(io.StringIO(txt), *args, index_col=0, **kwargs) + df["start_time"] = pd.to_datetime(df["start_time"]) + return df + + def test_types1(self) -> None: + """ + Check the types of a df coming from `read_csv()`. + + The timestamps in `start_time` are left as strings. + """ + df = self.get_df() + # + actual = hpandas.df_to_str( + df, print_dtypes=True, print_shape_info=True, tag="df" + ) + expected = r"""# df= + index=[4, 44] + columns=start_time,egid,close + shape=(10, 3) + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 + 1 start_time datetime64[ns] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 + 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 + 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + ... + 38 2022-01-04 21:35:00 17085 179.42 + 40 2022-01-04 21:34:00 17085 179.42 + 44 2022-01-04 21:34:00 13684 1146.00""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def get_df_with_parse_dates(self) -> pd.DataFrame: + """ + Read the CSV parsing `start_time` as timestamps. + + The inferred type is a nasty `datetime64` which is not as well- + behaved as our beloved `pd.Timestamp`. + """ + df = self.get_df(parse_dates=["start_time"]) + return df + + def test_types2(self) -> None: + """ + Check the types of a df coming from `read_csv()` forcing parsing some + values as dates. + """ + df = self.get_df_with_parse_dates() + # Check. + actual = hpandas.df_to_str( + df, print_dtypes=True, print_shape_info=True, tag="df" + ) + expected = r"""# df= + index=[4, 44] + columns=start_time,egid,close + shape=(10, 3) + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 + 1 start_time datetime64[ns] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 + 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 + 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + ... + 38 2022-01-04 21:35:00 17085 179.42 + 40 2022-01-04 21:34:00 17085 179.42 + 44 2022-01-04 21:34:00 13684 1146.00""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def get_df_with_tz_timestamp(self) -> pd.DataFrame: + """ + Force the column parsed as `datetime64` into a tz-aware object. + + The resulting object is a `datetime64[ns, tz]`. + """ + df = self.get_df_with_parse_dates() + # Apply the tz. + col_name = "start_time" + df[col_name] = ( + df[col_name].dt.tz_localize("UTC").dt.tz_convert("America/New_York") + ) + df[col_name] = pd.to_datetime(df[col_name]) + return df + + def test_types3(self) -> None: + """ + Check the types of a df coming from `read_csv()` after conversion to + tz-aware objects. + """ + df = self.get_df_with_tz_timestamp() + # Check. + actual = hpandas.df_to_str( + df, print_dtypes=True, print_shape_info=True, tag="df" + ) + expected = r"""# df= + index=[4, 44] + columns=start_time,egid,close + shape=(10, 3) + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 + 1 start_time datetime64[ns, America/New_York] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 + 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 + 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 + start_time egid close + 4 2022-01-04 16:38:00-05:00 13684 1146.48 + 8 2022-01-04 16:38:00-05:00 17085 179.45 + 14 2022-01-04 16:37:00-05:00 13684 1146.26 + ... + 38 2022-01-04 16:35:00-05:00 17085 179.42 + 40 2022-01-04 16:34:00-05:00 17085 179.42 + 44 2022-01-04 16:34:00-05:00 13684 1146.00""" + self.assert_equal(actual, expected, fuzzy_match=True) + + # ////////////////////////////////////////////////////////////////////////////// + + def helper( + self, + df: pd.DataFrame, + ts_col_name: Optional[str], + start_ts: Optional[pd.Timestamp], + end_ts: Optional[pd.Timestamp], + left_close: bool, + right_close: bool, + expected: str, + ) -> None: + """ + Run trimming and check the outcome. + + See param description in `hpandas.trim_df`. + + :param expected: the expected oucome of the trimming + """ + df_trim = hpandas.trim_df( + df, ts_col_name, start_ts, end_ts, left_close, right_close + ) + actual = hpandas.df_to_str(df_trim, print_shape_info=True, tag="df_trim") + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_trim_df1(self) -> None: + """ + Test trimming: baseline case. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[4, 38] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + ... + 27 2022-01-04 21:36:00 17085 179.46 + 34 2022-01-04 21:35:00 13684 1146.00 + 38 2022-01-04 21:35:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df2(self) -> None: + """ + Trim a df with a column that is `datetime64` without tz using a + `pd.Timestamp` without tz. + + This operation is valid. + """ + df = self.get_df_with_parse_dates() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[4, 38] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + ... + 27 2022-01-04 21:36:00 17085 179.46 + 34 2022-01-04 21:35:00 13684 1146.00 + 38 2022-01-04 21:35:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df3(self) -> None: + """ + Trim a df with a column that is `datetime64` with tz vs a `pd.Timestamp + with tz. + + This operation is valid. + """ + df = self.get_df_with_tz_timestamp() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00", tz="UTC") + end_ts = pd.Timestamp("2022-01-04 21:38:00", tz="UTC") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[4, 38] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 4 2022-01-04 16:38:00-05:00 13684 1146.48 + 8 2022-01-04 16:38:00-05:00 17085 179.45 + 14 2022-01-04 16:37:00-05:00 13684 1146.26 + ... + 27 2022-01-04 16:36:00-05:00 17085 179.46 + 34 2022-01-04 16:35:00-05:00 13684 1146.00 + 38 2022-01-04 16:35:00-05:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + # pylint: disable=line-too-long + def test_trim_df4(self) -> None: + """ + Trim a df with a column that is `datetime64` with tz vs a + `pd.Timestamp` without tz. + + This operation is invalid and we expect an assertion. + """ + df = self.get_df_with_tz_timestamp() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + with self.assertRaises(TypeError) as cm: + hpandas.trim_df( + df, ts_col_name, start_ts, end_ts, left_close, right_close + ) + # Check. + actual = str(cm.exception) + expected = r""" + Invalid comparison between dtype=datetime64[ns, America/New_York] and Timestamp""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_trim_df5(self) -> None: + """ + Test filtering on the index. + """ + df = self.get_df() + df = df.set_index("start_time") + # Run. + ts_col_name = None + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] + columns=egid,close + shape=(8, 2) + egid close + start_time + 2022-01-04 21:38:00 13684 1146.48 + 2022-01-04 21:38:00 17085 179.45 + 2022-01-04 21:37:00 13684 1146.26 + ... + 2022-01-04 21:36:00 17085 179.46 + 2022-01-04 21:35:00 13684 1146.00 + 2022-01-04 21:35:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df6(self) -> None: + """ + Test excluding the lower boundary. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = False + right_close = True + expected = r"""# df_trim= + index=[4, 27] + columns=start_time,egid,close + shape=(6, 3) + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + 18 2022-01-04 21:37:00 17085 179.42 + 24 2022-01-04 21:36:00 13684 1146.00 + 27 2022-01-04 21:36:00 17085 179.46""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df7(self) -> None: + """ + Test excluding the upper boundary. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = False + expected = r"""# df_trim= + index=[14, 38] + columns=start_time,egid,close + shape=(6, 3) + start_time egid close + 14 2022-01-04 21:37:00 13684 1146.26 + 18 2022-01-04 21:37:00 17085 179.42 + 24 2022-01-04 21:36:00 13684 1146.00 + 27 2022-01-04 21:36:00 17085 179.46 + 34 2022-01-04 21:35:00 13684 1146.00 + 38 2022-01-04 21:35:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df8(self) -> None: + """ + Test filtering on a sorted column. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + df = df.sort_values(ts_col_name) + expected = r"""# df_trim= + index=[4, 38] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 34 2022-01-04 21:35:00 13684 1146.00 + 38 2022-01-04 21:35:00 17085 179.42 + 24 2022-01-04 21:36:00 13684 1146.00 + ... + 18 2022-01-04 21:37:00 17085 179.42 + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df9(self) -> None: + """ + Test filtering on a sorted index. + """ + df = self.get_df() + df = df.set_index("start_time") + # Run. + ts_col_name = None + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + df = df.sort_index() + expected = r"""# df_trim= + index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] + columns=egid,close + shape=(8, 2) + egid close + start_time + 2022-01-04 21:35:00 13684 1146.00 + 2022-01-04 21:35:00 17085 179.42 + 2022-01-04 21:36:00 13684 1146.00 + ... + 2022-01-04 21:37:00 17085 179.42 + 2022-01-04 21:38:00 13684 1146.48 + 2022-01-04 21:38:00 17085 179.45""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df10(self) -> None: + """ + Test filtering on a sorted index, excluding lower and upper boundaries. + """ + df = self.get_df() + df = df.set_index("start_time") + # Run. + ts_col_name = None + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = False + right_close = False + df = df.sort_index() + expected = r"""# df_trim= + index=[2022-01-04 21:36:00, 2022-01-04 21:37:00] + columns=egid,close + shape=(4, 2) + egid close + start_time + 2022-01-04 21:36:00 13684 1146.00 + 2022-01-04 21:36:00 17085 179.46 + 2022-01-04 21:37:00 13684 1146.26 + 2022-01-04 21:37:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df11(self) -> None: + """ + Test filtering on a non-sorted column, with `start_ts` being None. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = None + end_ts = pd.Timestamp("2022-01-04 21:37:00") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[14, 44] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 14 2022-01-04 21:37:00 13684 1146.26 + 18 2022-01-04 21:37:00 17085 179.42 + 24 2022-01-04 21:36:00 13684 1146.00 + ... + 38 2022-01-04 21:35:00 17085 179.42 + 40 2022-01-04 21:34:00 17085 179.42 + 44 2022-01-04 21:34:00 13684 1146.00""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df12(self) -> None: + """ + Test filtering on a sorted index, with `end_ts` being None. + """ + df = self.get_df() + df = df.set_index("start_time") + # Run. + ts_col_name = None + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = None + left_close = True + right_close = True + df = df.sort_index() + expected = r"""# df_trim= + index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] + columns=egid,close + shape=(8, 2) + egid close + start_time + 2022-01-04 21:35:00 13684 1146.00 + 2022-01-04 21:35:00 17085 179.42 + 2022-01-04 21:36:00 13684 1146.00 + ... + 2022-01-04 21:37:00 17085 179.42 + 2022-01-04 21:38:00 13684 1146.48 + 2022-01-04 21:38:00 17085 179.45""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + +# ############################################################################# +# Test_trim_df2 +# ############################################################################# + + +@pytest.mark.skip( + "Used for comparing speed of different trimming methods (CmTask1404)." +) +class Test_trim_df2(Test_trim_df1): + """ + Test the speed of different approaches to df trimming. + """ + + def get_data( + self, set_as_index: bool, sort: bool + ) -> Tuple[pd.DataFrame, str, pd.Timestamp, pd.Timestamp]: + """ + Get the data for experiments. + + :param set_as_index: whether to set the filtering values as + index + :param sort: whether to sort the filtering values + :return: the df to trim, the parameters for trimming + """ + # Get a large df. + df = self.get_df() + df = df.loc[df.index.repeat(100000)].reset_index(drop=True) + # Define the params. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + # Prepare the data. + if set_as_index: + df = df.set_index(ts_col_name, append=True, drop=False) + if sort: + df = df.sort_index(level=ts_col_name) + elif sort: + df = df.sort_values(ts_col_name) + return df, ts_col_name, start_ts, end_ts + + def check_trimmed_df( + self, + df: pd.DataFrame, + ts_col_name: str, + start_ts: pd.Timestamp, + end_ts: pd.Timestamp, + ) -> None: + """ + Confirm that the trimmed df matches what is expected. + + The trimmed df is compared to the one produced by + `hpandas.trim_df()` with lower and upper boundaries included. + Thus, it is ensured that all the trimming methods produce the + same output. + + See param descriptions in `hpandas.trim_df()`. + + :param df: the df trimmed in a test, to compare with the + `hpandas.trim_df()` one + """ + # Clean up the df from the test. + if df.index.nlevels > 1: + df = df.droplevel(ts_col_name) + df = df.reset_index(drop=True) + df = df.sort_values(by=[ts_col_name, "egid"], ascending=[False, True]) + # Get the reference trimmed df. + left_close = True + right_close = True + df_trim_for_comparison = hpandas.trim_df( + df, ts_col_name, start_ts, end_ts, left_close, right_close + ) + assert df.equals(df_trim_for_comparison) + + def test_simple_mask_col(self) -> None: + """ + Trim with a simple mask; filtering on a column. + """ + set_as_index = False + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + mask = df[ts_col_name] >= start_ts + df = df[mask] + if not df.empty: + mask = df[ts_col_name] <= end_ts + df = df[mask] + end_time = time.time() + _LOG.info( + "Simple mask trim (column): %.2f seconds", (end_time - start_time) + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_simple_mask_idx(self) -> None: + """ + Trim with a simple mask; filtering on an index. + """ + set_as_index = True + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + mask = df.index.get_level_values(ts_col_name) >= start_ts + df = df[mask] + if not df.empty: + mask = df.index.get_level_values(ts_col_name) <= end_ts + df = df[mask] + end_time = time.time() + _LOG.info( + "Simple mask trim (index): %.2f seconds", (end_time - start_time) + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_between_col(self) -> None: + """ + Trim using `pd.Series.between`; filtering on a column. + """ + set_as_index = False + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df[df[ts_col_name].between(start_ts, end_ts, inclusive="both")] + end_time = time.time() + _LOG.info( + "`pd.Series.between` trim (column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_between_idx(self) -> None: + """ + Trim using `pd.Series.between`; filtering on an index. + """ + set_as_index = True + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + filter_values = pd.Series( + df.index.get_level_values(ts_col_name) + ).between(start_ts, end_ts, inclusive="both") + df = df.droplevel(ts_col_name) + df = df[filter_values] + end_time = time.time() + _LOG.info( + "`pd.Series.between` trim (index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_truncate_non_sorted_col(self) -> None: + """ + Trim using `pd.DataFrame.truncate`; filtering on a non-sorted column. + """ + set_as_index = False + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df.set_index(df[ts_col_name], append=True).sort_index( + level=ts_col_name + ) + df = df.swaplevel() + df = df.truncate(before=start_ts, after=end_ts) + end_time = time.time() + _LOG.info( + "`pd.DataFrame.truncate` trim (non-sorted column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_truncate_non_sorted_idx(self) -> None: + """ + Trim using `pd.DataFrame.truncate`; filtering on a non-sorted index. + """ + set_as_index = True + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + df = df.swaplevel() + # Run. + start_time = time.time() + df = df.sort_index(level=ts_col_name) + df = df.truncate(before=start_ts, after=end_ts) + end_time = time.time() + _LOG.info( + "`pd.DataFrame.truncate` trim (non-sorted index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_truncate_sorted_col(self) -> None: + """ + Trim using `pd.DataFrame.truncate`; filtering on a sorted column. + """ + set_as_index = False + sort = True + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df.set_index(ts_col_name, drop=False) + df = df.truncate(before=start_ts, after=end_ts) + end_time = time.time() + _LOG.info( + "`pd.DataFrame.truncate` trim (sorted column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_truncate_sorted_idx(self) -> None: + """ + Trim using `pd.DataFrame.truncate`; filtering on a sorted index. + """ + set_as_index = True + sort = True + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + df = df.swaplevel() + # Run. + start_time = time.time() + df = df.truncate(before=start_ts, after=end_ts) + end_time = time.time() + _LOG.info( + "`pd.DataFrame.truncate` trim (sorted index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_searchsorted_non_sorted_col(self) -> None: + """ + Trim using `pd.Series.searchsorted`; filtering on a non-sorted column. + """ + set_as_index = False + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df.sort_values(ts_col_name, ascending=True) + left_idx = df[ts_col_name].searchsorted(start_ts, side="left") + right_idx = df[ts_col_name].searchsorted(end_ts, side="right") + df = df.iloc[left_idx:right_idx] + end_time = time.time() + _LOG.info( + "`pd.Series.searchsorted` trim (non-sorted column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_searchsorted_non_sorted_idx(self) -> None: + """ + Trim using `pd.Series.searchsorted`; filtering on a non-sorted index. + """ + set_as_index = True + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df.sort_index(level=ts_col_name) + left_idx = df.index.get_level_values(ts_col_name).searchsorted( + start_ts, side="left" + ) + right_idx = df.index.get_level_values(ts_col_name).searchsorted( + end_ts, side="right" + ) + df = df.iloc[left_idx:right_idx] + end_time = time.time() + _LOG.info( + "`pd.Series.searchsorted` trim (non-sorted index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_searchsorted_sorted_col(self) -> None: + """ + Trim using `pd.Series.searchsorted`; filtering on a sorted column. + """ + set_as_index = False + sort = True + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + left_idx = df[ts_col_name].searchsorted(start_ts, side="left") + right_idx = df[ts_col_name].searchsorted(end_ts, side="right") + df = df.iloc[left_idx:right_idx] + end_time = time.time() + _LOG.info( + "`pd.Series.searchsorted` trim (sorted column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_searchsorted_sorted_idx(self) -> None: + """ + Trim using `pd.Series.searchsorted`; filtering on a sorted index. + """ + set_as_index = True + sort = True + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + left_idx = df.index.get_level_values(ts_col_name).searchsorted( + start_ts, side="left" + ) + right_idx = df.index.get_level_values(ts_col_name).searchsorted( + end_ts, side="right" + ) + df = df.iloc[left_idx:right_idx] + end_time = time.time() + _LOG.info( + "`pd.Series.searchsorted` trim (sorted index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + +# ############################################################################# +# Test_assemble_df_rows +# ############################################################################# + + +class Test_assemble_df_rows(hunitest.TestCase): + """ + Test assembing df values into a column-row structure. + """ + + @staticmethod + def get_rows_values_example(df_as_str: str) -> hpantran.RowsValues: + """ + Prepare the input. + """ + # Separate the rows. + rows = df_as_str.split("\n") + # Clean up extra spaces. + rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] + # Identify individual values in the rows. + rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) + return rows_values + + def test1(self) -> None: + """ + Test unnamed index, compact df. + """ + # Get the input. + df_as_str = """ + col1 col2 col3 col4 + 0 0.1 0.1 0.1 0.1 + 1 0.2 0.2 0.2 0.2""" + rows_values = self.get_rows_values_example(df_as_str) + # Run. + actual = hpantran._assemble_df_rows(rows_values) + # Check. + expected = [ + ["", "col1", "col2", "col3", "col4"], + ["0", "0.1", "0.1", "0.1", "0.1"], + ["1", "0.2", "0.2", "0.2", "0.2"], + ] + self.assertListEqual(actual, expected) + + def test2(self) -> None: + """ + Test unnamed index, large df. + """ + # Get the input. + df_as_str = """ + column_with_a_very_long_name_1 column_with_a_very_long_name_2 column_with_a_very_long_name_3 column_with_a_very_long_name_4 column_with_a_very_long_name_5 + 0 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 + 1 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789""" + rows_values = self.get_rows_values_example(df_as_str) + # Run. + actual = hpantran._assemble_df_rows(rows_values) + # Check. + expected = [ + [ + "", + "column_with_a_very_long_name_1", + "column_with_a_very_long_name_2", + "column_with_a_very_long_name_3", + "column_with_a_very_long_name_4", + "column_with_a_very_long_name_5", + ], + [ + "0", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + ], + [ + "1", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + ], + ] + self.assertListEqual(actual, expected) + + def test3(self) -> None: + """ + Test named index, compact df. + """ + # Get the input. + df_as_str = """ + col1 col2 col3 col4 + idx + 0 0.1 0.1 0.1 0.1 + 1 0.2 0.2 0.2 0.2""" + rows_values = self.get_rows_values_example(df_as_str) + # Run. + actual = hpantran._assemble_df_rows(rows_values) + # Check. + expected = [ + ["idx", "col1", "col2", "col3", "col4"], + ["0", "0.1", "0.1", "0.1", "0.1"], + ["1", "0.2", "0.2", "0.2", "0.2"], + ] + self.assertListEqual(actual, expected) + + def test4(self) -> None: + """ + Test named index, large df. + """ + # Get the input. + df_as_str = """ + column_with_a_very_long_name_1 column_with_a_very_long_name_2 column_with_a_very_long_name_3 column_with_a_very_long_name_4 column_with_a_very_long_name_5 + idx + 0 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 + 1 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789""" + rows_values = self.get_rows_values_example(df_as_str) + # Run. + actual = hpantran._assemble_df_rows(rows_values) + # Check. + expected = [ + [ + "idx", + "column_with_a_very_long_name_1", + "column_with_a_very_long_name_2", + "column_with_a_very_long_name_3", + "column_with_a_very_long_name_4", + "column_with_a_very_long_name_5", + ], + [ + "0", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + ], + [ + "1", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + ], + ] + self.assertListEqual(actual, expected) + + +# ############################################################################# +# Test_str_to_df +# ############################################################################# + + +class Test_str_to_df(hunitest.TestCase): + """ + Test converting a string representation of a dataframe into a Pandas df. + """ + + def test1(self) -> None: + # Prepare input. + df_as_str = """ + col1 col2 col3 col4 + 0 0.1 a None 2020-01-01 + 1 0.2 "b c" None 2021-05-05""" + col_to_type = { + "__index__": int, + "col1": float, + "col2": str, + "col3": None, + "col4": pd.Timestamp, + } + col_to_name_type: Dict[str, type] = {} + # Run. + actual = hpandas.str_to_df(df_as_str, col_to_type, col_to_name_type) + # Check. + expected = pd.DataFrame( + { + "col1": [0.1, 0.2], + "col2": ["a", "b c"], + "col3": [None, None], + "col4": [ + pd.Timestamp("2020-01-01"), + pd.Timestamp("2021-05-05"), + ], + }, + index=[0, 1], + ) + hunitest.compare_df(actual, expected) + + def test2(self) -> None: + """ + Run a full circle check. + + The df used for testing: + + 1 2 + end_timestamp + 2023-08-15 0.21 1.7 + 2023-08-16 0.22 1.8 + 2023-08-17 0.23 1.9 + """ + # Create a df from the data. + data = { + 1: [0.21, 0.22, 0.23], + 2: [1.7, 1.8, 1.9], + } + timestamps = [ + pd.Timestamp("2023-08-15"), + pd.Timestamp("2023-08-16"), + pd.Timestamp("2023-08-17"), + ] + expected = pd.DataFrame(data, index=timestamps) + expected.index.name = "end_timestamp" + # Convert the df into a string. + df_as_str = hpandas.df_to_str(expected) + # Convert the resulting string back into a df. + col_to_type = { + "__index__": pd.Timestamp, + "1": float, + "2": float, + } + col_to_name_type = { + "1": int, + "2": int, + } + actual = hpandas.str_to_df(df_as_str, col_to_type, col_to_name_type) + # Check that the initial df and the final df are the same. + hunitest.compare_df(actual, expected) + + +# ############################################################################# +# TestFindGapsInDataframes +# ############################################################################# + + +class TestFindGapsInDataframes(hunitest.TestCase): + def test_find_gaps_in_dataframes(self) -> None: + """ + Verify that gaps are caught. + """ + # Prepare inputs. + test_data = pd.DataFrame( + data={ + "dummy_value_1": [1, 2, 3], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [0, 0, 0], + } + ) + # Run. + missing_data = hpandas.find_gaps_in_dataframes( + test_data.head(2), test_data.tail(2) + ) + # Check output. + actual = pd.concat(missing_data) + actual = hpandas.df_to_str(actual) + expected = r""" dummy_value_1 dummy_value_2 dummy_value_3 + 2 3 C 0 + 0 1 A 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# TestSubsetDf1 +# ############################################################################# + + +class TestSubsetDf1(hunitest.TestCase): + def test1(self) -> None: + # Generate some random data. + np.random.seed(42) + df = pd.DataFrame( + np.random.randint(0, 100, size=(20, 4)), columns=list("ABCD") + ) + # Subset. + df2 = hpandas.subset_df(df, nrows=5, seed=43) + # Check. + actual = hpandas.df_to_str(df2) + expected = r""" + A B C D + 0 51 92 14 71 + 1 60 20 82 86 + 3 23 2 21 52 + ... + 17 80 35 49 3 + 18 1 5 53 3 + 19 53 92 62 17 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# TestCheckAndFilterMatchingColumns +# ############################################################################# + + +class TestCheckAndFilterMatchingColumns(hunitest.TestCase): + """ + Test that matching columns are filtered correctly. + """ + + @staticmethod + def get_test_data() -> pd.DataFrame: + df = pd.DataFrame( + data=[[3, 4, 5]] * 3, + columns=["col1", "col2", "col3"], + ) + return df + + def test_check_and_filter_matching_columns1(self) -> None: + """ + - required columns = received columns + - `filter_data_mode` = "assert" + """ + df = self.get_test_data() + columns = ["col1", "col2", "col3"] + filter_data_mode = "assert" + df = hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + actual_columns = df.columns.to_list() + self.assert_equal(str(actual_columns), str(columns)) + + def test_check_and_filter_matching_columns2(self) -> None: + """ + - received columns contain some columns apart from required ones + - `filter_data_mode` = "assert" + """ + df = self.get_test_data() + columns = ["col1", "col3"] + filter_data_mode = "assert" + with self.assertRaises(AssertionError): + hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + + def test_check_and_filter_matching_columns3(self) -> None: + """ + - received columns do not contain some of required columns + - `filter_data_mode` = "assert" + """ + df = self.get_test_data() + columns = ["col1", "col4"] + filter_data_mode = "assert" + with self.assertRaises(AssertionError): + hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + + def test_check_and_filter_matching_columns4(self) -> None: + """ + - received columns contain some columns apart from required ones + - `filter_data_mode` = "warn_and_trim" + """ + df = self.get_test_data() + columns = ["col1", "col3"] + filter_data_mode = "warn_and_trim" + df = hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + actual_columns = df.columns.to_list() + self.assert_equal(str(actual_columns), str(columns)) + + def test_check_and_filter_matching_columns5(self) -> None: + """ + - received columns do not contain some of required columns + - `filter_data_mode` = "warn_and_trim" + """ + df = self.get_test_data() + columns = ["col1", "col2", "col4"] + filter_data_mode = "warn_and_trim" + df = hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + actual_columns = df.columns.to_list() + expected_columns = ["col1", "col2"] + self.assert_equal(str(actual_columns), str(expected_columns)) + + +# ############################################################################# + + +# ############################################################################# +# Test_merge_dfs1 +# ############################################################################# + + +class Test_merge_dfs1(hunitest.TestCase): + """ + Test that 2 dataframes are merged correctly. + """ + + @staticmethod + def get_dataframe(data: Dict, index: List[int]) -> pd.DataFrame: + df = pd.DataFrame.from_dict(data) + index = pd.Index(index) + df = df.set_index(index, drop=True) + return df + + def test1(self) -> None: + """ + Overlap of `threshold_col` values is 100%. + """ + # Create test data. + data1 = { + "col1": [1, 10, 100], + "col2": [2, np.nan, 200], + "col3": [3, 30, 300], + "threshold_col": [7, 70, 700], + } + index1 = [1, 2, 3] + df1 = self.get_dataframe(data1, index1) + # + data2 = { + "col3": [3, 30, 300], + "col4": [4, 40, 400], + "col5": [5, np.nan, 500], + "threshold_col": [7, 70, 700], + } + index2 = [3, 4, 5] + df2 = self.get_dataframe(data2, index2) + # + threshold_col_name = "threshold_col" + cols_to_merge_on = ["col3", "threshold_col"] + merged_df = hpandas.merge_dfs( + df1, + df2, + threshold_col_name, + how="outer", + on=cols_to_merge_on, + ) + # Set expected values. + expected_length = 3 + expected_column_names = [ + "col1", + "col2", + "col3", + "col4", + "col5", + "threshold_col", + ] + expected_column_unique_values = None + expected_signature = r""" + # df= + index=[0, 2] + columns=col1,col2,col3,threshold_col,col4,col5 + shape=(3, 6) + col1 col2 col3 threshold_col col4 col5 + 0 1 2.0 3 7 4 5.0 + 1 10 NaN 30 70 40 NaN + 2 100 200.0 300 700 400 500.0 + """ + # Check. + self.check_df_output( + merged_df, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test2(self) -> None: + """ + Overlap of `threshold_col` values is below the threshold. + """ + # Create test data. + data1 = { + "col1": [1, 10, 100], + "col2": [2, np.nan, 200], + "col3": [3, 30, 300], + "threshold_col": [7, 70, 700], + } + index1 = [1, 2, 3] + df1 = self.get_dataframe(data1, index1) + # + data2 = { + "col3": [3, 30, 300], + "col4": [4, 40, 400], + "col5": [5, np.nan, 500], + "threshold_col": [7, 60, 600], + } + index2 = [3, 4, 5] + df2 = self.get_dataframe(data2, index2) + # + threshold_col_name = "threshold_col" + cols_to_merge_on = ["col3", "threshold_col"] + # Check. + with self.assertRaises(AssertionError): + hpandas.merge_dfs( + df1, + df2, + threshold_col_name, + how="outer", + on=cols_to_merge_on, + ) + + def test3(self) -> None: + """ + Overlap of `threshold_col` values is above the threshold. + """ + # Create test data. + data1 = { + "col1": [1, 3, 5, 7, 10, 100, 100, 100, 100, 10, 10], + "col2": [2, 4, 6, 8, np.nan, 200, 200, np.nan, 10, 10, 100], + "col3": [1, 2, 3, 4, 30, 300, 300, np.nan, 300, 300, 30], + "threshold_col": [0, 1, 3, 5, 7, 9, 11, 13, 15, 70, 700], + } + index1 = range(0, 11) + df1 = self.get_dataframe(data1, index1) + # + data2 = { + "col3": [3, 30, 300, 1, 2, 3, 4, 30, 300, 300, np.nan], + "col4": [4, 40, 400, 2, 4, 6, 8, 11, 13, 15, 70], + "col5": [5, np.nan, 500, 5, 7, 10, 1, 2, 3, 4, 30], + "threshold_col": [1, 2, 3, 5, 7, 9, 11, 13, 15, 70, 700], + } + index2 = range(9, 20) + df2 = self.get_dataframe(data2, index2) + # + threshold_col_name = "threshold_col" + cols_to_merge_on = ["col3", "threshold_col"] + merged_df = hpandas.merge_dfs( + df1, + df2, + threshold_col_name, + how="outer", + on=cols_to_merge_on, + ) + # Set expected values. + expected_length = 20 + expected_column_names = [ + "col1", + "col2", + "col3", + "col4", + "col5", + "threshold_col", + ] + expected_column_unique_values = None + # This is required by `pandas` >= 2.2. + expected_signature = r""" + # df= + index=[0, 19] + columns=col1,col2,col3,threshold_col,col4,col5 + shape=(20, 6) + col1 col2 col3 threshold_col col4 col5 + 0 1.0 2.0 1.0 0 NaN NaN + 1 NaN NaN 1.0 5 2.0 5.0 + 2 3.0 4.0 2.0 1 NaN NaN + ... + 17 10.0 10.0 300.0 70 15.0 4.0 + 18 100.0 NaN NaN 13 NaN NaN + 19 NaN NaN NaN 700 70.0 30.0 + """ + # Check. + self.check_df_output( + merged_df, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test4(self) -> None: + """ + There are common columns (besides columns to merge on) in dataframes. + """ + # Create test data. + data1 = { + "col1": [1, 10, 100], + "col5": [2, np.nan, 200], + "col3": [3, 30, 300], + "threshold_col": [7, 70, 700], + } + index1 = [1, 2, 3] + df1 = self.get_dataframe(data1, index1) + # + data2 = { + "col3": [3, 30, 300], + "col4": [4, 40, 400], + "col5": [5, np.nan, 500], + "threshold_col": [7, 70, 700], + } + index2 = [3, 4, 5] + df2 = self.get_dataframe(data2, index2) + # + threshold_col_name = "threshold_col" + cols_to_merge_on = ["col3", "threshold_col"] + # Check. + with self.assertRaises(AssertionError): + hpandas.merge_dfs( + df1, + df2, + threshold_col_name, + how="outer", + on=cols_to_merge_on, + ) + + +# ############################################################################# +# Test_apply_index_mode +# ############################################################################# + + +class Test_apply_index_mode(hunitest.TestCase): + @staticmethod + def get_test_data() -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Generate toy dataframes for the test. + """ + # Define common columns. + columns = ["A", "B"] + # Build dataframes with intersecting indices. + idx1 = [0, 1, 2, 3, 4] + data1 = [ + [0.21, 0.44], + [0.11, 0.42], + [1.99, 0.8], + [3.1, 0.91], + [3.5, 1.4], + ] + df1 = pd.DataFrame(data1, columns=columns, index=idx1) + # + idx2 = [0, 6, 2, 3, 5] + data1 = [ + [0.1, 0.4], + [0.11, 0.2], + [1.29, 0.38], + [0.1, 0.9], + [3.3, 2.4], + ] + df2 = pd.DataFrame(data1, columns=columns, index=idx2) + return df1, df2 + + def test1(self) -> None: + """ + Check that returned dataframes have indices that are equal to the + common index. + + - `mode="intersect"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + # Use an index intersection to transform dataframes. + mode = "intersect" + df1_out, df2_out = hpandas.apply_index_mode(df1_in, df2_in, mode) + # Check that indices are common. + common_index = df1_in.index.intersection(df2_in.index) + common_index = hpandas.df_to_str(common_index) + idx1 = hpandas.df_to_str(df1_out.index) + idx2 = hpandas.df_to_str(df2_out.index) + self.assert_equal(idx1, common_index) + self.assert_equal(idx2, common_index) + + def test2(self) -> None: + """ + Check that dataframe indices did not change after applying an index + mode. + + - `mode="leave_unchanged"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + mode = "leave_unchanged" + df1_out, df2_out = hpandas.apply_index_mode(df1_in, df2_in, mode) + # Check that indices are as-is. + df1_in_idx = hpandas.df_to_str(df1_in.index) + df1_out_idx = hpandas.df_to_str(df1_out.index) + self.assert_equal(df1_in_idx, df1_out_idx) + # + df2_in_idx = hpandas.df_to_str(df2_in.index) + df2_out_idx = hpandas.df_to_str(df2_out.index) + self.assert_equal(df2_in_idx, df2_out_idx) + + def test3(self) -> None: + """ + Check that an assertion is raised when indices are not equal. + + - `mode="assert_equal"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + mode = "assert_equal" + # Check that both indices are equal, assert otherwise. + with self.assertRaises(AssertionError) as cm: + hpandas.apply_index_mode(df1_in, df2_in, mode) + actual = str(cm.exception) + # Check the error exception message. + self.check_string(actual) + + +# ############################################################################# +# Test_apply_column_mode +# ############################################################################# + + +class Test_apply_column_mode(hunitest.TestCase): + """ + Test that function applies column modes correctly. + """ + + @staticmethod + def get_test_data() -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Generate toy dataframes for the test. + """ + # Build dataframes with intersecting columns. + columns_1 = ["A", "B"] + data1 = [ + [0.21, 0.44], + [0.11, 0.42], + [1.99, 0.8], + [3.1, 0.91], + [3.5, 1.4], + ] + df1 = pd.DataFrame(data1, columns=columns_1) + # + columns_2 = ["A", "C"] + data2 = [ + [0.1, 0.4], + [0.11, 0.2], + [1.29, 0.38], + [0.1, 0.9], + [3.3, 2.4], + ] + df2 = pd.DataFrame(data2, columns=columns_2) + return df1, df2 + + def test1(self) -> None: + """ + Check that returned dataframes have columns that are equal to the + common ones. + + - `mode="intersect"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + # Use a column intersection mode to transform dataframes. + mode = "intersect" + df1_out, df2_out = hpandas.apply_columns_mode(df1_in, df2_in, mode) + # Check that dfs have equal column names. + common_columns = df1_in.columns.intersection(df2_in.columns) + common_columns = hpandas.df_to_str(common_columns) + columns1 = hpandas.df_to_str(df1_out.columns) + self.assert_equal(columns1, common_columns) + # + columns2 = hpandas.df_to_str(df2_out.columns) + self.assert_equal(columns2, common_columns) + + def test2(self) -> None: + """ + Check that dataframes' columns did not change after applying a column + mode. + + - `mode="leave_unchanged"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + mode = "leave_unchanged" + df1_out, df2_out = hpandas.apply_columns_mode(df1_in, df2_in, mode) + # Check that columns are as-is. + df1_in_columns = hpandas.df_to_str(df1_in.columns) + df1_out_columns = hpandas.df_to_str(df1_out.columns) + self.assert_equal(df1_in_columns, df1_out_columns) + # + df2_in_columns = hpandas.df_to_str(df2_in.columns) + df2_out_columns = hpandas.df_to_str(df2_out.columns) + self.assert_equal(df2_in_columns, df2_out_columns) + + def test3(self) -> None: + """ + Check that an assertion is raised when columns are not equal. + + - `mode="assert_equal"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + mode = "assert_equal" + # Check that both dataframes columns are equal, assert otherwise. + with self.assertRaises(AssertionError) as cm: + hpandas.apply_columns_mode(df1_in, df2_in, mode) + actual = str(cm.exception) + # Compare the actual outcome with an expected one. + self.check_string(actual) + + +# ############################################################################# + + +# ############################################################################# +# Test_get_df_from_iterator +# ############################################################################# + + +class Test_get_df_from_iterator(hunitest.TestCase): + def test1(self) -> None: + """ + Check that a dataframe is correctly built from an iterator of + dataframes. + """ + # Build iterator of dataframes for the test. + data1 = { + "num_col": [1, 2], + "str_col": ["A", "B"], + } + df1 = pd.DataFrame(data=data1) + data2 = { + "num_col": [3, 4], + "str_col": ["C", "D"], + } + df2 = pd.DataFrame(data=data2) + data3 = { + "num_col": [5, 6], + "str_col": ["E", "F"], + } + df3 = pd.DataFrame(data=data3) + # Run. + iter_ = iter([df1, df2, df3]) + df = hpandas.get_df_from_iterator(iter_) + actual_signature = hpandas.df_to_str(df) + expected_signature = """ num_col str_col + 0 1 A + 0 3 C + 0 5 E + 1 2 B + 1 4 D + 1 6 F + """ + self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) + + +# ############################################################################# +# TestFilterByTime +# ############################################################################# + + +class TestFilterByTime(hunitest.TestCase): + @staticmethod + def _get_test_data() -> pd.DataFrame: + """ + Get data for testing. + + :return: data for testing + """ + df = pd.DataFrame( + { + "col1": [1, 2, 3, 4], + "col2": [ + hdateti.to_datetime("2018-04-05"), + hdateti.to_datetime("2018-04-06"), + hdateti.to_datetime("2018-04-07"), + hdateti.to_datetime("2018-04-08"), + ], + } + ) + df.index = pd.date_range("2017-01-01", periods=4) + return df + + def test_filter_by_index1(self) -> None: + """ + Verify that `[lower_bound, upper_bound)` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2017-01-02") + upper_bound = hdateti.to_datetime("2017-01-04") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="left", + ts_col_name=None, + ) + expected = df[1:3] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_index2(self) -> None: + """ + Verify that `(lower_bound, upper_bound]` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2017-01-02") + upper_bound = hdateti.to_datetime("2017-01-04") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="right", + ts_col_name=None, + ) + expected = df[2:4] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_index3(self) -> None: + """ + Verify that `[lower_bound, upper_bound]` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2017-01-02") + upper_bound = hdateti.to_datetime("2017-01-04") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="both", + ts_col_name=None, + ) + expected = df[1:4] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_index4(self) -> None: + """ + Verify that `(lower_bound, upper_bound)` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2017-01-02") + upper_bound = hdateti.to_datetime("2017-01-04") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="neither", + ts_col_name=None, + ) + expected = df[2:3] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_column1(self) -> None: + """ + Verify that `[lower_bound, upper_bound)` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2018-04-06") + upper_bound = hdateti.to_datetime("2018-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="left", + ts_col_name="col2", + ) + expected = df[1:3] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_column2(self) -> None: + """ + Verify that `(lower_bound, upper_bound]` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2018-04-06") + upper_bound = hdateti.to_datetime("2018-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="right", + ts_col_name="col2", + ) + expected = df[2:4] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_column3(self) -> None: + """ + Verify that `[lower_bound, upper_bound]` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2018-04-06") + upper_bound = hdateti.to_datetime("2018-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="both", + ts_col_name="col2", + ) + expected = df[1:4] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_column4(self) -> None: + """ + Verify that `(lower_bound, upper_bound)` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2018-04-06") + upper_bound = hdateti.to_datetime("2018-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="neither", + ts_col_name="col2", + ) + expected = df[2:3] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_no_intersection(self) -> None: + """ + Verify that if time interval is not covered by data then empty + DataFrame is returned. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2021-04-06") + upper_bound = hdateti.to_datetime("2021-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="both", + ts_col_name=None, + ) + self.assertEqual(actual.shape[0], 0) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py new file mode 100644 index 000000000..67eddb250 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py @@ -0,0 +1,251 @@ +import logging + +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_df_to_str +# ############################################################################# + + +class Test_df_to_str(hunitest.TestCase): + @staticmethod + def get_test_data() -> pd.DataFrame: + test_data = { + "dummy_value_1": [1, 2, 3], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [0, 0, 0], + } + df = pd.DataFrame(data=test_data) + return df + + def test_df_to_str1(self) -> None: + """ + Test common call to `df_to_str` with basic df. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df) + expected = r""" + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str2(self) -> None: + """ + Test common call to `df_to_str` with tag. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df, tag="df") + expected = r"""# df= + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str3(self) -> None: + """ + Test common call to `df_to_str` with print_shape_info. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df, print_shape_info=True) + expected = r""" + index=[0, 2] + columns=dummy_value_1,dummy_value_2,dummy_value_3 + shape=(3, 3) + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str4(self) -> None: + """ + Test common call to `df_to_str` with print_dtypes. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df, print_dtypes=True) + expected = r""" + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 3 / 3 = 100.00% 0 / 3 = 0.00% 0 + 1 dummy_value_1 int64 3 / 3 = 100.00% 0 / 3 = 0.00% 1 + 2 dummy_value_2 object 3 / 3 = 100.00% 0 / 3 = 0.00% A + 3 dummy_value_3 int64 1 / 3 = 33.33% 0 / 3 = 0.00% 0 + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str5(self) -> None: + """ + Test common call to `df_to_str` with multiple args. + """ + df = self.get_test_data() + actual = hpandas.df_to_str( + df, print_shape_info=True, print_dtypes=True, tag="df" + ) + expected = r""" + # df= + index=[0, 2] + columns=dummy_value_1,dummy_value_2,dummy_value_3 + shape=(3, 3) + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 3 / 3 = 100.00% 0 / 3 = 0.00% 0 + 1 dummy_value_1 int64 3 / 3 = 100.00% 0 / 3 = 0.00% 1 + 2 dummy_value_2 object 3 / 3 = 100.00% 0 / 3 = 0.00% A + 3 dummy_value_3 int64 1 / 3 = 33.33% 0 / 3 = 0.00% 0 + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str6(self) -> None: + """ + Test common call to `df_to_str` with `pd.Series`. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df["dummy_value_2"]) + expected = r""" + dummy_value_2 + 0 A + 1 B + 2 C + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str7(self) -> None: + """ + Test common call to `df_to_str` with `pd.Index`. + """ + df = self.get_test_data() + index = df.index + index.name = "index_name" + actual = hpandas.df_to_str(index) + expected = r""" + index_name + 0 0 + 1 1 + 2 2 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str8(self) -> None: + """ + Test that `-0.0` is replaced with `0.0`. + """ + test_data = { + "dummy_value_1": [1, 2, 3, 4], + "dummy_value_2": ["A", "B", "C", "D"], + "dummy_value_3": [0, 0, 0, 0], + "dummy_value_4": [+0.0, -0.0, +0.0, -0.0], + } + df = pd.DataFrame(data=test_data) + actual = hpandas.df_to_str(df, handle_signed_zeros=True) + expected = r""" + dummy_value_1 dummy_value_2 dummy_value_3 dummy_value_4 + 0 1 A 0 0.0 + 1 2 B 0 0.0 + 2 3 C 0 0.0 + 3 4 D 0 0.0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str9(self) -> None: + """ + Test that `-0.0` is replaced with `0.0` in a multi-index dataframe. + """ + test_data = { + ("A", "X"): [-0.0, 5.0, -0.0], + ("A", "Y"): [2, 6, 0], + ("B", "X"): [0, 7, 3], + ("B", "Y"): [4.4, -0.0, 5.1], + } + df = pd.DataFrame(data=test_data) + actual = hpandas.df_to_str(df, handle_signed_zeros=True) + expected = r""" + A B + X Y X Y + 0 0.0 2 0 4.4 + 1 5.0 6 7 0.0 + 2 0.0 0 3 5.1""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str10(self) -> None: + """ + Test common call to `df_to_str` with `print_memory_usage = True`. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df, print_memory_usage=True) + # This is required by `numpy` >= 2.1.0 + expected = r""" + * memory= + shallow deep + Index 132.0 b 132.0 b + dummy_value_1 24.0 b 24.0 b + dummy_value_2 24.0 b 150.0 b + dummy_value_3 24.0 b 24.0 b + total 204.0 b 330.0 b + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_head +# ############################################################################# + + +class Test_head(hunitest.TestCase): + def test1(self) -> None: + """ + Test basic head functionality without seed. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": [1, 2, 3, 4, 5], + "col2": ["a", "b", "c", "d", "e"], + } + ) + hpandas.head(df, num_rows=2) + + def test2(self) -> None: + """ + Test head with a seed for reproducible sampling. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": list(range(10)), + "col2": list("abcdefghij"), + } + ) + hpandas.head(df, seed=42, num_rows=3) + + def test3(self) -> None: + """ + Test head with different num_rows parameter. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": list(range(5)), + "col2": list("abcde"), + } + ) + hpandas.head(df, num_rows=4) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py new file mode 100644 index 000000000..a1be56d40 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py @@ -0,0 +1,1468 @@ +import datetime +import logging +import os +import random +from typing import Any, List, Optional, Tuple + +import pandas as pd +import pyarrow +import pyarrow.parquet as parquet +import pytest + +import helpers.hdbg as hdbg +import helpers.hmoto as hmoto +import helpers.hpandas as hpandas +import helpers.hparquet as hparque +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# Most of these unit tests are taken from +# `amp/helpers/notebooks/gallery_parquet.ipynb` + + +def _get_df(date: datetime.date, seed: int = 42) -> pd.DataFrame: + """ + Create pandas random data, like: + + ``` + idx instr val1 val2 + 2000-01-01 0 A 99 30 + 2000-01-02 0 A 54 46 + 2000-01-03 0 A 85 86 + ``` + """ + instruments = "A B C D E".split() + date = pd.Timestamp(date, tz="America/New_York") + start_date = date.replace(hour=9, minute=30) + end_date = date.replace(hour=16, minute=0) + df_idx = pd.date_range(start_date, end_date, freq="5T") + _LOG.debug("df_idx=[%s, %s]", min(df_idx), max(df_idx)) + _LOG.debug("len(df_idx)=%s", len(df_idx)) + random.seed(seed) + # For each instruments generate random data. + df = [] + for idx, inst in enumerate(instruments): + df_tmp = pd.DataFrame( + { + "idx": idx, + "instr": inst, + "val1": [random.randint(0, 100) for _ in range(len(df_idx))], + "val2": [random.randint(0, 100) for _ in range(len(df_idx))], + }, + index=df_idx, + ) + df.append(df_tmp) + # Create a single df for all the instruments. + df = pd.concat(df) + return df + + +def _get_test_df_with_timestamps() -> pd.DataFrame: + """ + Create a DataFrame with timestamps. + """ + timestamp = pd.Timestamp("2022-01-01 00:00:00.123456", tz="America/New_York") + index = [timestamp for _ in range(6)] + df = pd.DataFrame( + { + "n_legs": [2, 2, 4, 4, 5, 100], + "animal": [ + "Flamingo", + "Parrot", + "Dog", + "Horse", + "Brittle stars", + "Centipede", + ], + "year": [2001, 2002, 2001, 2003, 2003, 2001], + }, + index=index, + ) + knowledge_timestamp = pd.Timestamp.now(tz="UTC") + df["knowledge_timestamp"] = knowledge_timestamp + return df + + +def _get_df_example1() -> pd.DataFrame: + date = datetime.date(2020, 1, 1) + df = _get_df(date) + _LOG.debug("df=\n%s", df.head(3)) + return df + + +def _compare_dfs(self: Any, df1: pd.DataFrame, df2: pd.DataFrame) -> str: + df1_as_str: str = hpandas.df_to_str(df1, print_shape_info=True, tag="") + df2_as_str = hpandas.df_to_str(df2, print_shape_info=True, tag="") + self.assert_equal(df1_as_str, df2_as_str, fuzzy_match=True) + # When Parquet reads partitioned dataset can convert partitioning columns into + # categorical variables that can create false positives. + pd.testing.assert_frame_equal( + df1, df2, check_dtype=False, check_categorical=False + ) + return df1_as_str + + +# ############################################################################# + + +# ############################################################################# +# TestParquet1 +# ############################################################################# + + +class TestParquet1(hunitest.TestCase): + def test_get_df1(self) -> None: + """ + Check the output of `_get_df()`. + """ + # Prepare data. + df = _get_df_example1() + # Check. + actual = hpandas.df_to_str(df, print_shape_info=True, tag="df") + expected = r"""# df= + index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] + columns=idx,instr,val1,val2 + shape=(395, 4) + idx instr val1 val2 + 2020-01-01 09:30:00-05:00 0 A 81 35 + 2020-01-01 09:35:00-05:00 0 A 14 58 + 2020-01-01 09:40:00-05:00 0 A 3 81 + ... + 2020-01-01 15:50:00-05:00 4 E 57 3 + 2020-01-01 15:55:00-05:00 4 E 33 50 + 2020-01-01 16:00:00-05:00 4 E 96 75""" + self.assert_equal(actual, expected, fuzzy_match=True) + + # ////////////////////////////////////////////////////////////////////////////// + + def get_file_name(self) -> str: + dir_name = self.get_scratch_space() + file_name = os.path.join(dir_name, "df.parquet") + return file_name + + def write_data_as_parquet(self) -> Tuple[pd.DataFrame, str]: + # Prepare data. + df = _get_df_example1() + # Save data. + file_name = self.get_file_name() + hparque.to_parquet(df, file_name, log_level=logging.INFO) + return df, file_name + + def write_and_read_helper(self, columns: List[str]) -> None: + """ + - Save a dataframe as Parquet + - Read back certain columns of the data from the file + - Check that the df is what expected + """ + df, file_name = self.write_data_as_parquet() + # Read back one column of the data. + df2 = hparque.from_parquet( + file_name, columns=columns, log_level=logging.INFO + ) + _LOG.debug("df2=\n%s", df2.head(3)) + # Check. + df = df[columns] + _compare_dfs(self, df, df2) + + def test_write_and_read_everything1(self) -> None: + """ + Read all the columns from the file. + """ + df, file_name = self.write_data_as_parquet() + # Read data back. + df2 = hparque.from_parquet(file_name, log_level=logging.INFO) + _LOG.debug("df2=\n%s", df2.head(3)) + # Check. + _compare_dfs(self, df, df2) + + def test_write_and_read_one_column1(self) -> None: + """ + - Read back one column of the data from the file. + """ + # Read back one column of the data. + columns = ["val1"] + self.write_and_read_helper(columns) + + def test_write_and_read_two_columns1(self) -> None: + """ + Read back one column of the data from the file. + """ + # Read back two columns of the data. + columns = ["idx", "val1"] + self.write_and_read_helper(columns) + + # ////////////////////////////////////////////////////////////////////////////// + + def read_filtered_parquet( + self, file_name: str, filters: Any + ) -> pd.DataFrame: + filesystem = None + dataset = parquet.ParquetDataset( + file_name, + filesystem=filesystem, + filters=filters, + ) + columns = None + table = dataset.read(columns=columns) + df = table.to_pandas() + _LOG.debug("df=\n%s", df.head(3)) + return df + + def test_read_with_filter1(self) -> None: + """ + Read only a subset of the rows. + """ + _, file_name = self.write_data_as_parquet() + # Read. + filters = [] + filters.append([("idx", "=", 0)]) + df2 = self.read_filtered_parquet(file_name, filters) + # Check. + actual = hpandas.df_to_str(df2, print_shape_info=True, tag="df") + expected = r"""# df= + index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] + columns=idx,instr,val1,val2 + shape=(79, 4) + idx instr val1 val2 + 2020-01-01 09:30:00-05:00 0 A 81 35 + 2020-01-01 09:35:00-05:00 0 A 14 58 + 2020-01-01 09:40:00-05:00 0 A 3 81 + ... + 2020-01-01 15:50:00-05:00 0 A 29 76 + 2020-01-01 15:55:00-05:00 0 A 12 8 + 2020-01-01 16:00:00-05:00 0 A 48 49""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_write_and_read_partition_parquet_files_with_unit(self) -> None: + """ + Write the Pandas DataFrame to partitioned Parquet files and read it + back, verifying the retention of time unit information in the index. + """ + # Prepare test data. + dst_dir = os.path.join(self.get_scratch_space(), "tmp.partition_parquet") + initial_df = _get_test_df_with_timestamps() + initial_df.index = initial_df.index.as_unit("us") + partition_columns = initial_df.columns.tolist() + # The `to_partitioned_parquet` saves the given dataframe as Parquet + # files partitioned along the given columns. + hparque.to_partitioned_parquet(initial_df, partition_columns, dst_dir) + df_from_parquet_files = hparque.from_parquet(dst_dir) + # Check that the time unit is ns. + self.assert_equal("ns", df_from_parquet_files.index.unit) + # TODO(Vlad): Refactor after CmampTask7331 is resolved. + # self.assert_equal(initial_df.index.unit, df.index.unit) + + def test_write_and_read_parquet_file_with_unit(self) -> None: + """ + Write the provided DataFrame to Parquet file and read it back, + verifying the retention of time unit information in the index. + """ + test_parquet_file = os.path.join( + self.get_scratch_space(), "tmp_dummy.parquet" + ) + initial_df = _get_test_df_with_timestamps() + initial_df.index = initial_df.index.as_unit("us") + # The `to_parquet` function writes a DF to a single parquet file without + # any partition. + hparque.to_parquet(initial_df, test_parquet_file) + df = hparque.from_parquet(test_parquet_file) + self.assert_equal("ns", df.index.unit) + # TODO(Vlad): Refactor after CmampTask7331 is resolved. + # self.assert_equal(initial_df.index.unit, df.index.unit) + + @pytest.mark.skip(reason="TODO(Juraj): HelpersTask21.") + def test_save_read_concat_data(self) -> None: + """ + Verify that data produced by different version of Pandas preserves + types when reading/writing to/from Parquet. + """ + # Copy sample data that saved with the Pandas v.1.5.1 from S3 to the + # scratch dir. + s3_path = self.get_s3_input_dir() + local_path = self.get_scratch_space() + aws_profile = "ck" + hs3.copy_data_from_s3_to_local_dir(s3_path, local_path, aws_profile) + # Read sample data from the scratch dir. + sample_data = hparque.from_parquet(local_path) + # Generate artificial test data. + data = { + "timestamp": [1696896000000], + "open": [27578.4], + "high": [27584.3], + "low": [27571.2], + "close": [27571.3], + "volume": [154.933], + "exchange_id": ["binance"], + "knowledge_timestamp": [ + pd.Timestamp("2023-11-06 14:15:11.241716+0000", tz="UTC") + ], + } + index = pd.Series( + [pd.Timestamp("2023-10-10T00:00:00+00:00")], name="timestamp" + ) + test_data = pd.DataFrame(data, index=index) + # Concatenate sample and test data and save it to the scratch dir. + combined_test_data = pd.concat([sample_data, test_data]) + local_combined_file_path = os.path.join( + local_path, "combined_dummy.parquet" + ) + hparque.to_parquet(combined_test_data, local_combined_file_path) + # Read the data back from the scratch dir. + actual_df = hparque.from_parquet(local_combined_file_path) + # Check that the data types the same as in the sample data. + dtypes_sample = str(sample_data.dtypes) + dtypes_actual = str(actual_df.dtypes) + self.assert_equal(dtypes_sample, dtypes_actual, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# TestPartitionedParquet1 +# ############################################################################# + + +class TestPartitionedParquet1(hunitest.TestCase): + # From https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data + # A dataset can exploit a nested structure, where the sub-dir names hold + # information about which subset of the data is stored in that dir + # E.g., "Hive" partitioning scheme "key=vale" dir names + + def write_partitioned_dataset_and_check( + self, + df: pd.DataFrame, + partition_cols: List[str], + exp_dir_signature: Optional[str], + ) -> str: + """ + - Write df as a partitioned dataset + - (Optional) Check the signature of the directory + + :param partition_cols: columns used for + :param exp_dir_signature: expected signature of the written directory + :return path to the saved Parquet data + """ + _LOG.debug(hprint.to_str("partition_cols")) + # Prepare data. + dir_name = os.path.join(self.get_scratch_space(), "data.parquet") + table = pyarrow.Table.from_pandas(df) + # Write partitioned dataset. + parquet.write_to_dataset( + table, + dir_name, + partition_cols, + ) + # Check dir signature. + if exp_dir_signature is not None: + include_file_content = False + remove_dir_name = True + dir_signature = hunitest.get_dir_signature( + dir_name, include_file_content, remove_dir_name=remove_dir_name + ) + self.assert_equal( + dir_signature, + exp_dir_signature, + fuzzy_match=True, + purify_text=True, + ) + return dir_name + + def write_and_read_helper( + self, + df: pd.DataFrame, + partition_cols: List[str], + exp_dir_signature: Optional[str], + columns_to_read: Optional[List[str]], + ) -> str: + """ + - Write df as a partitioned dataset using `partitioned_cols` + - Read certain column back + + :param partition_cols: columns used for + :param exp_dir_signature: expected signature of the written directory + :return: read df as string + """ + _LOG.debug(hprint.to_str("partition_cols columns_to_read")) + # Write and check. + dir_name = self.write_partitioned_dataset_and_check( + df, partition_cols, exp_dir_signature + ) + # Read back certain columns. + df2 = hparque.from_parquet( + dir_name, columns=columns_to_read, log_level=logging.INFO + ) + # Compare. + if columns_to_read is not None: + df = df[columns_to_read] + # + hdbg.dassert_set_eq(df.columns, df2.columns) + df2 = df2[df.columns] + df_as_str = _compare_dfs(self, df, df2) + return df_as_str + + # ////////////////////////////////////////////////////////////////////////////// + + def test_write_and_read1(self) -> None: + """ + - Write a partitioned dataset with one partitioning column + - Read everything back + """ + df = _get_df_example1() + partition_cols = ["idx"] + exp_dir_signature = r""" + # Dir structure + . + idx=0 + idx=0/data.parquet + idx=1 + idx=1/data.parquet + idx=2 + idx=2/data.parquet + idx=3 + idx=3/data.parquet + idx=4 + idx=4/data.parquet""" + columns_to_read = None + self.write_and_read_helper( + df, partition_cols, exp_dir_signature, columns_to_read + ) + + def test_write_and_read2(self) -> None: + """ + - Write a partitioned dataset with two partitioning columns + - Read everything back + """ + df = _get_df_example1() + partition_cols = ["idx", "instr"] + exp_dir_signature = r"""# Dir structure + . + idx=0 + idx=0/instr=A + idx=0/instr=A/data.parquet + idx=1 + idx=1/instr=B + idx=1/instr=B/data.parquet + idx=2 + idx=2/instr=C + idx=2/instr=C/data.parquet + idx=3 + idx=3/instr=D + idx=3/instr=D/data.parquet + idx=4 + idx=4/instr=E + idx=4/instr=E/data.parquet""" + # Read back everything. + columns_to_read = None + self.write_and_read_helper( + df, partition_cols, exp_dir_signature, columns_to_read + ) + + def test_write_and_read3(self) -> None: + """ + - Write a partitioned dataset with one partitioning column + - Read two columns back + """ + df = _get_df_example1() + partition_cols = ["idx"] + exp_dir_signature = None + columns_to_read = ["idx", "instr"] + df_as_str = self.write_and_read_helper( + df, partition_cols, exp_dir_signature, columns_to_read + ) + expected = r"""# = + index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] + columns=idx,instr + shape=(395, 2) + idx instr + 2020-01-01 09:30:00-05:00 0 A + 2020-01-01 09:35:00-05:00 0 A + 2020-01-01 09:40:00-05:00 0 A + ... + 2020-01-01 15:50:00-05:00 4 E + 2020-01-01 15:55:00-05:00 4 E + 2020-01-01 16:00:00-05:00 4 E""" + self.assert_equal(df_as_str, expected, fuzzy_match=True) + + def test_write_and_read4(self) -> None: + """ + - Write a partitioned dataset with one partitioning column + - Read two columns back filtering by the one of the partitioned column + """ + df = _get_df_example1() + partition_cols = ["idx"] + exp_dir_signature = None + # Write and check. + dir_name = self.write_partitioned_dataset_and_check( + df, partition_cols, exp_dir_signature + ) + # Read back everything. + columns_to_read = ["idx", "instr"] + filters = [] + filters.append(("idx", "=", 0)) + # Note that `from_parquet` doesn't work with filters. + # df2 = hparque.from_parquet( + # dir_name, + # columns=columns_to_read, + # filters=filters, + # log_level=logging.INFO, + # ) + filesystem = None + dataset = parquet.ParquetDataset( + dir_name, + filesystem=filesystem, + filters=filters, + ) + table = dataset.read(columns=columns_to_read) + df2 = table.to_pandas() + # Compare. + df_as_str = hpandas.df_to_str(df2, print_shape_info=True, tag="df") + expected = r"""# df= + index=[0, 78] + columns=idx,instr + shape=(79, 2) + idx instr + 0 0 A + 1 0 A + 2 0 A + ... + 76 0 A + 77 0 A + 78 0 A""" + self.assert_equal(df_as_str, expected, fuzzy_match=True) + + # ////////////////////////////////////////////////////////////////////////////// + + def test_merge1(self) -> None: + """ + - Write a partitioned dataset in multiple chunks using the same partitioning + column + - Make sure that reading it back we get the original data. + """ + df = _get_df_example1() + # + partition_cols = ["idx"] + # Write the first chunk. + df_chunk1 = df[df["idx"].isin([0, 1])] + exp_dir_signature = """ + # Dir structure + . + idx=0 + idx=0/data.parquet + idx=1 + idx=1/data.parquet""" + # Write and check. + _ = self.write_partitioned_dataset_and_check( + df_chunk1, partition_cols, exp_dir_signature + ) + # Write the second chunk. + df_chunk2 = df[df["idx"].isin([2, 3, 4])] + exp_dir_signature = """ + # Dir structure + . + idx=0 + idx=0/data.parquet + idx=1 + idx=1/data.parquet + idx=2 + idx=2/data.parquet + idx=3 + idx=3/data.parquet + idx=4 + idx=4/data.parquet""" + # Write and check. + dir_name = self.write_partitioned_dataset_and_check( + df_chunk2, partition_cols, exp_dir_signature + ) + # Read everything. + columns_to_read = None + df2 = hparque.from_parquet( + dir_name, columns=columns_to_read, log_level=logging.INFO + ) + # Compare. + hdbg.dassert_set_eq(df.columns, df2.columns) + df2 = df2[df.columns] + df_as_str = _compare_dfs(self, df, df2) + expected = r""" + # = + index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] + columns=idx,instr,val1,val2 + shape=(395, 4) + idx instr val1 val2 + 2020-01-01 09:30:00-05:00 0 A 81 35 + 2020-01-01 09:35:00-05:00 0 A 14 58 + 2020-01-01 09:40:00-05:00 0 A 3 81 + ... + 2020-01-01 15:50:00-05:00 4 E 57 3 + 2020-01-01 15:55:00-05:00 4 E 33 50 + 2020-01-01 16:00:00-05:00 4 E 96 75""" + self.assert_equal(df_as_str, expected, fuzzy_match=True) + self.assert_equal(df_as_str, expected, fuzzy_match=True) + + def _run_write_and_read_mixed_units_partitioned_dataset( + self, first_unit: str, second_unit: str + ) -> None: + """ + Write two DataFrames with different time units to a partitioned Parquet + dataset and read it back. + + :param first_unit: time unit of the first DataFrame + :param second_unit: time unit of the second DataFrame + """ + initial_df = _get_test_df_with_timestamps() + partition_columns = ["n_legs", "animal", "year"] + dst_dir = os.path.join(self.get_scratch_space(), "tmp.pp_mixed_units") + # Write first DF as partitioned parquet. + first_df = initial_df.copy() + first_df.index = first_df.index.as_unit(first_unit) + first_df["knowledge_timestamp"] = first_df["knowledge_timestamp"].astype( + f"datetime64[{first_unit}, UTC]" + ) + hparque.to_partitioned_parquet(first_df, partition_columns, dst_dir) + # Write second DF as partitioned parquet. + second_df = initial_df.copy() + second_df.index = second_df.index.as_unit(second_unit) + second_df["knowledge_timestamp"] = second_df[ + "knowledge_timestamp" + ].astype(f"datetime64[{second_unit}, UTC]") + hparque.to_partitioned_parquet(second_df, partition_columns, dst_dir) + # Read it back. + _ = hparque.from_parquet(dst_dir) + + def test_write_and_read_mixed_units_partition_dataset_1(self) -> None: + """ + Write two DataFrames with different time units to a partitioned Parquet + dataset and read it back. + + The combination `ns` and `us` should not raise an error. + See CmampTask7331 for details. + """ + self._run_write_and_read_mixed_units_partitioned_dataset("ns", "us") + + @pytest.mark.skip( + reason="Since names and order the files is not guaranteed, the test is " + "flaky, decided to skip it for now.", + ) + def test_write_and_read_mixed_units_partition_dataset_2(self) -> None: + """ + Write two DataFrames with different time units to a partitioned Parquet + dataset and read it back. + + The combination `ms` and `us` should raise an error. + """ + with self.assertRaises(pyarrow.lib.ArrowInvalid): + self._run_write_and_read_mixed_units_partitioned_dataset("ms", "us") + + +# ############################################################################# + + +# ############################################################################# +# TestGetParquetFiltersFromTimestampInterval1 +# ############################################################################# + + +class TestGetParquetFiltersFromTimestampInterval1(hunitest.TestCase): + def test_no_interval(self) -> None: + """ + No timestamps provided. + """ + partition_mode = "by_year_month" + start_ts = None + end_ts = None + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + self.assertIsNone(filters) + + def test_by_month_half1(self) -> None: + """ + Test a left-bound interval [..., None]. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = None + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = r"[[('year', '==', 2020), ('month', '>=', 1)], [('year', '>', 2020)]]" + self.assert_equal(actual, expected) + + def test_by_month_half2(self) -> None: + """ + Test a right-bound interval [None, ...]. + """ + partition_mode = "by_year_month" + start_ts = None + end_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = r"[[('year', '==', 2020), ('month', '<=', 1)], [('year', '<', 2020)]]" + self.assert_equal(actual, expected) + + def test_by_month_one_year1(self) -> None: + """ + Test an interval contained in a whole year. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = pd.Timestamp("2020-12-02 09:31:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = ( + r"[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 12)]]" + ) + self.assert_equal(actual, expected) + + def test_by_month_one_year2(self) -> None: + """ + Test an interval contained in a whole year. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = pd.Timestamp("2020-01-02 09:32:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = ( + r"[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 1)]]" + ) + self.assert_equal(actual, expected) + + def test_by_month_invalid1(self) -> None: + """ + Test an invalid interval. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = pd.Timestamp("2020-01-02 09:30:00+00:00") + with self.assertRaises(AssertionError) as fail: + hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(fail.exception) + expected = r""" + * Failed assertion * + 2020-01-02 09:31:00+00:00 <= 2020-01-02 09:30:00+00:00 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_by_month_invalid2(self) -> None: + """ + Test an invalid partition mode. + """ + partition_mode = "new_mode" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = pd.Timestamp("2020-01-02 09:32:00+00:00") + with self.assertRaises(ValueError) as fail: + hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(fail.exception) + expected = r"Unknown partition mode `new_mode`!" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_by_month_two_years1(self) -> None: + """ + Test an interval spanning two years. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") + end_ts = pd.Timestamp("2021-12-02 09:31:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = ( + r"[[('year', '==', 2020), ('month', '>=', 6)], " + r"[('year', '==', 2021), ('month', '<=', 12)]]" + ) + self.assert_equal(actual, expected) + + def test_by_month_over_two_years1(self) -> None: + """ + Test an interval longer than two years. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = ( + r"[[('year', '==', 2020), ('month', '>=', 6)], " + r"[('year', '>', 2020), ('year', '<', 2022)], " + r"[('year', '==', 2022), ('month', '<=', 12)]]" + ) + self.assert_equal(actual, expected) + + def test_additional_filters1(self) -> None: + """ + No timestamps provided while a single additional filter is provided. + """ + partition_mode = "by_year_month" + start_ts = None + end_ts = None + additional_filters = [ + ( + "currency_pair", + "in", + ("BTC_USDT",), + ) + ] + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, + start_ts, + end_ts, + additional_filters=additional_filters, + ) + actual = str(filters) + expected = r"[('currency_pair', 'in', ('BTC_USDT',))]" + self.assert_equal(actual, expected) + + def test_additional_filters2(self) -> None: + """ + Test an interval with multiple additional filters. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") + additional_filters = [ + ("exchange_id", "in", ("binance")), + ("currency_pairs", "in", ("ADA_USDT", "BTC_USDT")), + ] + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, + start_ts, + end_ts, + additional_filters=additional_filters, + ) + actual = str(filters) + expected = ( + r"[[('exchange_id', 'in', 'binance'), " + r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " + r"('year', '==', 2020), ('month', '>=', 6)], " + r"[('exchange_id', 'in', 'binance'), " + r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " + r"('year', '>', 2020), ('year', '<', 2022)], " + r"[('exchange_id', 'in', 'binance'), " + r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " + r"('year', '==', 2022), ('month', '<=', 12)]]" + ) + self.assert_equal(actual, expected) + + +# ############################################################################# + + +# ############################################################################# +# TestAddDatePartitionColumns +# ############################################################################# + + +class TestAddDatePartitionColumns(hunitest.TestCase): + def add_date_partition_columns_helper( + self, partition_mode: str, expected: str + ) -> None: + # Prepare inputs. + test_data = { + "dummy_value": [1, 2, 3], + "dummy_timestamp": [1638646800000, 1638646860000, 1638646960000], + } + start_timestamp = "2021-12-04 19:40:00+00:00" + end_timestamp = "2021-12-04 19:42:00+00:00" + index = pd.date_range(start_timestamp, end_timestamp, freq="1T") + df = pd.DataFrame(index=index, data=test_data) + # Run. + hparque.add_date_partition_columns(df, partition_mode) + # Check output. + actual = hpandas.df_to_str(df) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_add_date_partition_columns1(self) -> None: + partition_mode = "by_date" + expected = r""" dummy_value dummy_timestamp date + 2021-12-04 19:40:00+00:00 1 1638646800000 20211204 + 2021-12-04 19:41:00+00:00 2 1638646860000 20211204 + 2021-12-04 19:42:00+00:00 3 1638646960000 20211204""" + self.add_date_partition_columns_helper(partition_mode, expected) + + def test_add_date_partition_columns2(self) -> None: + partition_mode = "by_year" + expected = r""" dummy_value dummy_timestamp year + 2021-12-04 19:40:00+00:00 1 1638646800000 2021 + 2021-12-04 19:41:00+00:00 2 1638646860000 2021 + 2021-12-04 19:42:00+00:00 3 1638646960000 2021""" + self.add_date_partition_columns_helper(partition_mode, expected) + + def test_add_date_partition_columns3(self) -> None: + partition_mode = "by_year_month_day" + # pylint: disable=line-too-long + expected = r""" dummy_value dummy_timestamp year month day + 2021-12-04 19:40:00+00:00 1 1638646800000 2021 12 4 + 2021-12-04 19:41:00+00:00 2 1638646860000 2021 12 4 + 2021-12-04 19:42:00+00:00 3 1638646960000 2021 12 4""" + self.add_date_partition_columns_helper(partition_mode, expected) + + def test_add_date_partition_columns4(self) -> None: + partition_mode = "by_year_week" + expected = r""" dummy_value dummy_timestamp year weekofyear + 2021-12-04 19:40:00+00:00 1 1638646800000 2021 48 + 2021-12-04 19:41:00+00:00 2 1638646860000 2021 48 + 2021-12-04 19:42:00+00:00 3 1638646960000 2021 48""" + self.add_date_partition_columns_helper(partition_mode, expected) + + +# ############################################################################# + + +# ############################################################################# +# TestToPartitionedDataset +# ############################################################################# + + +class TestToPartitionedDataset(hunitest.TestCase): + @staticmethod + def get_test_data1() -> pd.DataFrame: + test_data = { + "dummy_value_1": [1, 2, 3], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [0, 0, 0], + } + df = pd.DataFrame(data=test_data) + return df + + def test_get_test_data1(self) -> None: + test_data = self.get_test_data1() + actual = hpandas.df_to_str(test_data) + expected = r""" + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + @pytest.mark.skip( + reason="CmTask1305: after removing circular dependencies in " + "`hio.from_file`, this test fails reading a parquet file" + ) + def test_to_partitioned_dataset(self) -> None: + """ + Test partitioned Parquet datasets with existing columns. + """ + # Prepare inputs. + test_dir = self.get_scratch_space() + df = self.get_test_data1() + # Run. + partition_cols = ["dummy_value_1", "dummy_value_2"] + hparque.to_partitioned_parquet(df, partition_cols, test_dir) + # Check output. + include_file_content = False + remove_dir_name = True + dir_signature = hunitest.get_dir_signature( + test_dir, include_file_content, remove_dir_name=remove_dir_name + ) + expected = r""" + # Dir structure + . + dummy_value_1=1 + dummy_value_1=1/dummy_value_2=A + dummy_value_1=1/dummy_value_2=A/data.parquet + dummy_value_1=2 + dummy_value_1=2/dummy_value_2=B + dummy_value_1=2/dummy_value_2=B/data.parquet + dummy_value_1=3 + dummy_value_1=3/dummy_value_2=C + dummy_value_1=3/dummy_value_2=C/data.parquet""" + self.assert_equal( + dir_signature, expected, purify_text=True, fuzzy_match=True + ) + # + include_file_content = True + dir_signature = hunitest.get_dir_signature( + test_dir, include_file_content, remove_dir_name=remove_dir_name + ) + self.check_string(dir_signature, purify_text=True, fuzzy_match=True) + + def test_to_partitioned_dataset_wrong_column(self) -> None: + """ + Assert that wrong columns are detected before partitioning. + """ + # Prepare inputs. + test_dir = self.get_scratch_space() + df = self.get_test_data1() + # Run. + partition_cols = ["void_column", "dummy_value_2"] + # Check output. + with self.assertRaises(AssertionError) as cm: + hparque.to_partitioned_parquet(df, partition_cols, test_dir) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + val1=['dummy_value_2', 'void_column'] + issubset + val2=['dummy_value_1', 'dummy_value_2', 'dummy_value_3'] + val1 - val2=['void_column'] + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# TestListAndMergePqFiles +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +@pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", +) +class TestListAndMergePqFiles(hmoto.S3Mock_TestCase): + def generate_test_data(self) -> hs3.AwsProfile: + """ + Upload test daily Parquet files for 3 days to the mocked S3 bucket. + """ + start_date = "2022-02-02" + end_date = "2022-02-04" + assets = ["A", "B", "C", "D", "E", "F"] + asset_col_name = "asset" + test_dir = self.get_scratch_space() + partition_mode = "by_year_month" + custom_partition_cols = "asset,year,month" + hparque.generate_parquet_files( + start_date, + end_date, + assets, + asset_col_name, + test_dir, + partition_mode=partition_mode, + custom_partition_cols=custom_partition_cols, + ) + s3fs_ = hs3.get_s3fs(self.mock_aws_profile) + s3_bucket = f"s3://{self.bucket_name}" + s3fs_.put(test_dir, s3_bucket, recursive=True) + return s3fs_ + + @pytest.mark.slow("~7 seconds.") + def test_list_and_merge_pq_files(self) -> None: + """ + Check if predefined generated Parquet files are properly merged. + """ + s3fs_ = self.generate_test_data() + # Prepare common `hs3.listdir` params. + s3_bucket = f"s3://{self.bucket_name}" + pattern = "*.parquet" + only_files = True + use_relative_paths = True + # Check bucket content before merge. + parquet_path_list_before = hs3.listdir( + s3_bucket, + pattern, + only_files, + use_relative_paths, + aws_profile=s3fs_, + ) + self.assertEqual(len(parquet_path_list_before), 6) + # Add extra parquet files and rename existing one. + # e.g., `dummy.parquet`, `dummy_new.parquet`. + # Every second file is left intact to replicate ready out-of-the-box folder. + # e.g., `asset=A/year=2022/month=2/77a2534aaf9649fab6511cea53a6bf7f-0.parquet`. + for path in parquet_path_list_before[::2]: + original_path = f"{s3_bucket}/{path}" + original_file_name = os.path.basename(original_path) + renamed_path = original_path.replace( + original_file_name, "dummy.parquet" + ) + additional_path = original_path.replace( + original_file_name, "dummy_new.parquet" + ) + s3fs_.rename(original_path, renamed_path) + s3fs_.copy(renamed_path, additional_path) + # Check if edits are in place. + updated_parquet_path_list = hs3.listdir( + s3_bucket, + pattern, + only_files, + use_relative_paths, + aws_profile=s3fs_, + ) + data_parquet_path_list = [ + path for path in updated_parquet_path_list if "dummy" not in path + ] + self.assertEqual(len(updated_parquet_path_list), 9) + self.assertEqual(len(data_parquet_path_list), 3) + # Check bucket content after merge. + hparque.list_and_merge_pq_files(self.bucket_name, aws_profile=s3fs_) + parquet_path_list_after = hs3.listdir( + s3_bucket, + pattern, + only_files, + use_relative_paths, + aws_profile=s3fs_, + ) + parquet_path_list_after.sort() + expected_list = [ + "tmp.scratch/asset=A/year=2022/month=2/data.parquet", + "tmp.scratch/asset=B/year=2022/month=2/data.parquet", + "tmp.scratch/asset=C/year=2022/month=2/data.parquet", + "tmp.scratch/asset=D/year=2022/month=2/data.parquet", + "tmp.scratch/asset=E/year=2022/month=2/data.parquet", + "tmp.scratch/asset=F/year=2022/month=2/data.parquet", + ] + self.assertListEqual(parquet_path_list_after, expected_list) + + def test_list_and_merge_pq_files_duplicate_drop(self) -> None: + # Prepare test data. + test_data = { + "dummy_value_1": [1, 1, 1], + "dummy_value_2": ["A", "A", "A"], + "knowledge_timestamp": [1, 2, 3], + "end_download_timestamp": [3, 2, 1], + } + df = pd.DataFrame(data=test_data) + # Save test data to s3 bucket. + s3fs_ = hs3.get_s3fs(self.mock_aws_profile) + s3_bucket = f"s3://{self.bucket_name}" + original_sample_path = f"{s3_bucket}/dummy/data.parquet" + dummy_sample_path = original_sample_path.replace( + "data.parquet", "dummy.parquet" + ) + hparque.to_parquet(df, dummy_sample_path, aws_profile=s3fs_) + # Check if new columns are in place. + df = hparque.from_parquet(dummy_sample_path, aws_profile=s3fs_) + self.assertIn("knowledge_timestamp", df.columns) + self.assertIn("end_download_timestamp", df.columns) + self.assertEqual(len(df), 3) + # Check if duplicates are dropped after merge. + hparque.list_and_merge_pq_files(self.bucket_name, aws_profile=s3fs_) + df = hparque.from_parquet(original_sample_path, aws_profile=s3fs_) + self.assertEqual(len(df), 1) + + +# ############################################################################# + + +# ############################################################################# +# TestListAndMergePqFilesMixedUnits +# ############################################################################# + + +class TestListAndMergePqFilesMixedUnits(hunitest.TestCase): + def _list_and_merge_mixed_units_pq_files( + self, first_unit: str, second_unit: str + ) -> None: + """ + Run `list_and_merge_pq_files` with different time units in the same + column and index. + + :param first_unit: first time unit. + :param second_unit: second time unit. + """ + # Prepare test data. + dst_dir = os.path.join(self.get_scratch_space(), "tmp.list_and_merge") + first_file_name = os.path.join(dst_dir, "tmp.1first.parquet") + second_file_name = os.path.join(dst_dir, "tmp.2second.parquet") + merged_file_name = os.path.join(dst_dir, "tmp.merged.parquet") + # Write first DF with the `first_unit`. + initial_df = _get_test_df_with_timestamps() + first_df = initial_df.copy() + first_df.index = first_df.index.as_unit(first_unit) + first_df["knowledge_timestamp"] = first_df["knowledge_timestamp"].astype( + f"datetime64[{first_unit}, UTC]" + ) + hparque.to_parquet(first_df, first_file_name) + # Write second DF with the `second_unit`. + second_df = initial_df.copy() + second_df.index = second_df.index.as_unit(second_unit) + second_df["knowledge_timestamp"] = second_df[ + "knowledge_timestamp" + ].astype(f"datetime64[{second_unit}, UTC]") + hparque.to_parquet(second_df, second_file_name) + # List and merge. + hparque.list_and_merge_pq_files(dst_dir, file_name="tmp.merged.parquet") + # Read it back. + _ = hparque.from_parquet(merged_file_name) + + def test_parquet_files_with_mixed_time_units_1(self) -> None: + """ + Test merging Parquet files with the `ns` and `us`. + """ + first_unit = "ns" + second_unit = "us" + self._list_and_merge_mixed_units_pq_files(first_unit, second_unit) + + # TODO(Nina): @Samarth fix the test. + @pytest.mark.skip(reason="Broken.") + def test_parquet_files_with_mixed_time_units_2(self) -> None: + """ + Test merging Parquet files with the `ms` and `ns`. + + It should raise an error. See CmampTask7331 for details. + + The test will not raise an asserion when the time units is `ms` and + `us`. The reason is that we do not lose data when converting from + the first time unit, which is `ms`, to the second time unit, which + is `us`, transitioning from low resolution to high resolution. + """ + first_unit = "us" + second_unit = "ms" + with self.assertRaises(pyarrow.lib.ArrowInvalid): + self._list_and_merge_mixed_units_pq_files(first_unit, second_unit) + + +# ############################################################################# + + +# ############################################################################# +# TestYieldParquetTiles +# ############################################################################# + + +class TestYieldParquetTiles(hunitest.TestCase): + def generate_test_data(self) -> None: + """ + Generate test data and write it to a scratch dir. + + Data has the following structure: + + ``` + asset_id ... year month + end_ts + 2021-11-01 100 2021 11 + 2021-11-01 200 2021 11 + 2021-11-01 300 2021 11 + ... + 2022-02-01 200 2022 2 + 2022-02-01 300 2022 2 + 2022-02-01 400 2022 2 + ``` + """ + # Generate synthetic data. + asset_ids = [100, 200, 300, 400] + prices = list(range(1, 17)) + volatility = list(range(17, 33)) + dates = ["2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01"] + dates = map(pd.Timestamp, dates) + index_ = [dates, asset_ids] + multi_index = pd.MultiIndex.from_product( + index_, names=["end_ts", "asset_id"] + ) + df = pd.DataFrame( + {"price": prices, "volatility": volatility}, index=multi_index + ) + df["year"] = df.index.get_level_values(0).year + df["month"] = df.index.get_level_values(0).month + df = df.reset_index(level=1) + _LOG.debug("Test data: df=\n%s", hpandas.df_to_str(df)) + # Write the data to a scratch dir. + partition_columns = ["asset_id", "year", "month"] + dst_dir = self.get_scratch_space() + hparque.to_partitioned_parquet(df, partition_columns, dst_dir) + + def test_yield_tiles_by_asset(self) -> None: + """ + Test reading only certain asset ids. + """ + self.generate_test_data() + # Read data. + file_name = self.get_scratch_space() + asset_ids = [100, 200] + asset_id_col = "asset_id" + asset_batch_size = 1 + columns = [asset_id_col, "price"] + generator_ = hparque.yield_parquet_tiles_by_assets( + file_name, asset_ids, asset_id_col, asset_batch_size, columns + ) + df = pd.concat(generator_) + _LOG.debug("Filtered data: df=\n%s", hpandas.df_to_str(df)) + # Check asset ids filtering. + actual = str(asset_ids) + expected = str(df[asset_id_col].unique().tolist()) + self.assert_equal(actual, expected) + + def test_yield_tiles_by_year(self) -> None: + """ + Test reading only certain asset ids and dates. + """ + self.generate_test_data() + # Read data. + file_name = self.get_scratch_space() + start_year = 2021 + start_month = 12 + start_date = datetime.date(start_year, start_month, 1) + end_year = 2022 + end_month = 1 + end_date = datetime.date(end_year, end_month, 2) + asset_ids = [300, 400] + asset_id_col = "asset_id" + columns = [asset_id_col, "price"] + generator_ = hparque.yield_parquet_tiles_by_year( + file_name, + start_date, + end_date, + columns, + asset_ids=asset_ids, + asset_id_col=asset_id_col, + ) + df = pd.concat(generator_) + _LOG.debug("Filtered data: df=\n%s", hpandas.df_to_str(df)) + # Check asset ids filtering. + actual = str(asset_ids) + expected = str(df[asset_id_col].unique().tolist()) + self.assert_equal(actual, expected) + # Check start date filtering. + min_date = df.index.min() + self.assertEqual(min_date.month, start_month) + self.assertEqual(min_date.year, start_year) + # Check end date filtering. + max_date = df.index.max() + self.assertEqual(max_date.month, end_month) + self.assertEqual(max_date.year, end_year) + + +# ############################################################################# + + +# ############################################################################# +# TestBuildFilterWithOnlyEqualities +# ############################################################################# + + +class TestBuildFilterWithOnlyEqualities(hunitest.TestCase): + def test_year_month_day_equality(self) -> None: + """ + Test interval with same year, month and day. + """ + start_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-02 21:31:00+00:00") + filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) + actual = str(filters) + expected = ( + r"[('year', '==', 2022), ('month', '==', 12), ('day', '==', 2)]" + ) + self.assert_equal(actual, expected) + + def test_year_month_equality(self) -> None: + """ + Test interval with same year and month. + """ + start_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-28 21:31:00+00:00") + filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) + actual = str(filters) + expected = r"[('year', '==', 2022), ('month', '==', 12)]" + self.assert_equal(actual, expected) + + def test_year_equality(self) -> None: + """ + Test interval with same year. + """ + start_ts = pd.Timestamp("2022-10-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-02 21:31:00+00:00") + filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) + actual = str(filters) + expected = r"[('year', '==', 2022)]" + self.assert_equal(actual, expected) + + def test_no_equality(self) -> None: + """ + Test interval with different start and end years. + """ + start_ts = pd.Timestamp("2021-10-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-10-02 21:31:00+00:00") + filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) + actual = str(filters) + expected = r"[]" + self.assert_equal(actual, expected) + + +# ############################################################################# + + +# ############################################################################# +# TestPartitionedParquet2 +# ############################################################################# + + +class TestPartitionedParquet2(hunitest.TestCase): + """ + Test case for writing and reading partitioned Parquet datasets with mixed + timestamp formats. + """ + + def _get_test_df(self) -> pd.DataFrame: + """ + Create a DataFrame with timestamps. + """ + # Mock the get_current_time method. + timestamp = pd.Timestamp("2024-05-20 00:00:00", tz="UTC") + index = [timestamp for _ in range(4)] + df = pd.DataFrame( + { + "bids": [200, 123, 263, 167], + "asks": [150, 120, 240, 150], + "symbol": ["BTC_USDT" for _ in range(4)], + }, + index=index, + ) + end_download_timestamp = "2024-06-04 20:38:43.467599+00:00" + df["end_download_timestamp"] = end_download_timestamp + return df + + def _run_write_and_read_mixed_timestamp_partitioned_dataset(self) -> None: + """ + Write two DataFrames with different timestamp formats to a partitioned + Parquet dataset and read it back. + """ + initial_df = self._get_test_df() + partition_columns = ["bids", "asks", "symbol"] + dst_dir = os.path.join(self.get_scratch_space(), "tmp.pp_mixed_units") + # Write first DF as partitioned parquet. + first_df = initial_df.copy() + hparque.to_partitioned_parquet(first_df, partition_columns, dst_dir) + # Write second DF as partitioned parquet. + second_df = initial_df.copy() + second_df["end_download_timestamp"] = pd.to_datetime( + second_df["end_download_timestamp"] + ) + hparque.to_partitioned_parquet(second_df, partition_columns, dst_dir) + # Read it back and verify the output. + combined_df = hparque.from_parquet(dst_dir) + combined_df["end_download_timestamp"] = pd.to_datetime( + combined_df["end_download_timestamp"] + ).dt.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + actual = hpandas.df_to_str(combined_df) + expected = r""" + end_download_timestamp bids asks symbol + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 123 120 BTC_USDT + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 123 120 BTC_USDT + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 167 150 BTC_USDT + ... + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 200 150 BTC_USDT + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 263 240 BTC_USDT + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 263 240 BTC_USDT + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Test writing and reading a partitioned Parquet dataset with mixed + timestamp formats. + """ + self._run_write_and_read_mixed_timestamp_partitioned_dataset() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py new file mode 100644 index 000000000..8e65eeb2e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py @@ -0,0 +1,398 @@ +import argparse +import os + +import helpers.hio as hio +import helpers.hparser as hparser +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestParseLimitRange +# ############################################################################# + + +class TestParseLimitRange(hunitest.TestCase): + def test_parse_limit_range_valid1(self) -> None: + """ + Test parsing valid range format. + """ + limit_str = "1:5" + expected = (1, 5) + actual = hparser.parse_limit_range(limit_str) + self.assertEqual(actual, expected) + + def test_parse_limit_range_valid2(self) -> None: + """ + Test parsing valid range format with same start and end. + """ + limit_str = "3:3" + expected = (3, 3) + actual = hparser.parse_limit_range(limit_str) + self.assertEqual(actual, expected) + + def test_parse_limit_range_valid3(self) -> None: + """ + Test parsing valid range format with larger numbers. + """ + limit_str = "10:100" + expected = (10, 100) + actual = hparser.parse_limit_range(limit_str) + self.assertEqual(actual, expected) + + def test_parse_limit_range_no_colon(self) -> None: + """ + Test that missing colon raises assertion error. + """ + limit_str = "15" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_multiple_colons(self) -> None: + """ + Test that multiple colons raise assertion error. + """ + limit_str = "1:2:3" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_invalid_start(self) -> None: + """ + Test that non-integer start raises fatal error. + """ + limit_str = "abc:5" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_invalid_end(self) -> None: + """ + Test that non-integer end raises fatal error. + """ + limit_str = "1:xyz" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_start_zero(self) -> None: + """ + Test that start index of 0 raises assertion error. + """ + limit_str = "0:5" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_end_zero(self) -> None: + """ + Test that end index of 0 raises assertion error. + """ + limit_str = "1:0" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_start_greater_than_end(self) -> None: + """ + Test that start greater than end raises assertion error. + """ + limit_str = "5:3" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + +# ############################################################################# +# TestApplyLimitRange +# ############################################################################# + + +class TestApplyLimitRange(hunitest.TestCase): + def test_apply_limit_range_no_limit(self) -> None: + """ + Test that None limit range returns original items. + """ + items = ["a", "b", "c", "d", "e"] + actual = hparser.apply_limit_range(items, None) + self.assertEqual(actual, items) + + def test_apply_limit_range_valid_range(self) -> None: + """ + Test applying valid range to items. + """ + items = ["a", "b", "c", "d", "e"] + limit_range = (1, 3) + expected = ["b", "c", "d"] # 0-indexed, inclusive + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + def test_apply_limit_range_single_item(self) -> None: + """ + Test applying range that selects single item. + """ + items = ["a", "b", "c", "d", "e"] + limit_range = (2, 2) + expected = ["c"] + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + def test_apply_limit_range_first_item(self) -> None: + """ + Test applying range starting from first item. + """ + items = ["a", "b", "c", "d", "e"] + limit_range = (0, 1) + expected = ["a", "b"] + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + def test_apply_limit_range_last_item(self) -> None: + """ + Test applying range ending at last item. + """ + items = ["a", "b", "c", "d", "e"] + limit_range = (3, 4) + expected = ["d", "e"] + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + def test_apply_limit_range_start_exceeds_length(self) -> None: + """ + Test that start index exceeding items length raises assertion error. + """ + items = ["a", "b", "c"] + limit_range = (5, 6) + with self.assertRaises(AssertionError): + hparser.apply_limit_range(items, limit_range) + + def test_apply_limit_range_end_exceeds_length(self) -> None: + """ + Test that end index exceeding items length raises assertion error. + """ + items = ["a", "b", "c"] + limit_range = (1, 5) + with self.assertRaises(AssertionError): + hparser.apply_limit_range(items, limit_range) + + def test_apply_limit_range_custom_item_name(self) -> None: + """ + Test that custom item name doesn't affect functionality. + """ + items = [1, 2, 3, 4, 5] + limit_range = (0, 2) + expected = [1, 2, 3] + actual = hparser.apply_limit_range( + items, limit_range, item_name="numbers" + ) + self.assertEqual(actual, expected) + + def test_apply_limit_range_empty_list(self) -> None: + """ + Test applying limit range to empty list. + """ + items = [] + limit_range = (0, 1) + with self.assertRaises(AssertionError): + hparser.apply_limit_range(items, limit_range) + + def test_apply_limit_range_complex_objects(self) -> None: + """ + Test applying limit range to complex objects. + """ + items = [{"id": i, "value": f"item{i}"} for i in range(10)] + limit_range = (2, 4) + expected = [ + {"id": 2, "value": "item2"}, + {"id": 3, "value": "item3"}, + {"id": 4, "value": "item4"}, + ] + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + +# ############################################################################# +# Test_add_multi_file_args +# ############################################################################# + + +class Test_add_multi_file_args(hunitest.TestCase): + def test_adds_correct_arguments(self) -> None: + """ + Test that add_multi_file_args adds the correct arguments to parser. + """ + # Prepare inputs. + parser = argparse.ArgumentParser() + # Run function. + hparser.add_multi_file_args(parser) + # Check that the arguments were added. + namespace = parser.parse_args([]) + self.assertTrue(hasattr(namespace, "files")) + self.assertTrue(hasattr(namespace, "from_files")) + self.assertTrue(hasattr(namespace, "input")) + + +# ############################################################################# +# Test_parse_multi_file_args +# ############################################################################# + + +class Test_parse_multi_file_args(hunitest.TestCase): + # Helper method. + def _create_test_file(self, file_path: str, content: str = "test") -> None: + """ + Create a test file with given content. + """ + hio.create_dir(os.path.dirname(file_path), incremental=True) + hio.to_file(file_path, content) + + def test_files_comma_separated(self) -> None: + """ + Test parsing comma-separated file list. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + file1 = f"{scratch_dir}/file1.txt" + file2 = f"{scratch_dir}/file2.txt" + file3 = f"{scratch_dir}/file3.txt" + self._create_test_file(file1) + self._create_test_file(file2) + self._create_test_file(file3) + # Create namespace with files argument. + args = argparse.Namespace() + args.files = f"{file1},{file2},{file3}" + args.from_files = None + args.input = None + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1, file2, file3] + self.assert_equal(str(actual), str(expected)) + + def test_from_files(self) -> None: + """ + Test parsing file containing list of files. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + file1 = f"{scratch_dir}/file1.txt" + file2 = f"{scratch_dir}/file2.txt" + file3 = f"{scratch_dir}/file3.txt" + self._create_test_file(file1) + self._create_test_file(file2) + self._create_test_file(file3) + # Create file list. + list_file = f"{scratch_dir}/list.txt" + content = f"{file1}\n{file2}\n{file3}\n" + self._create_test_file(list_file, content) + # Create namespace with from_files argument. + args = argparse.Namespace() + args.files = None + args.from_files = list_file + args.input = None + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1, file2, file3] + self.assert_equal(str(actual), str(expected)) + + def test_from_files_with_empty_lines(self) -> None: + """ + Test parsing file with empty lines and comments. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + file1 = f"{scratch_dir}/file1.txt" + file2 = f"{scratch_dir}/file2.txt" + self._create_test_file(file1) + self._create_test_file(file2) + # Create file list with empty lines and comments. + list_file = f"{scratch_dir}/list.txt" + content = f""" + # This is a comment + {file1} + + # Another comment + {file2} + + """ + self._create_test_file(list_file, content) + # Create namespace with from_files argument. + args = argparse.Namespace() + args.files = None + args.from_files = list_file + args.input = None + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1, file2] + self.assert_equal(str(actual), str(expected)) + + def test_input_multiple(self) -> None: + """ + Test parsing repeated --input arguments. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + file1 = f"{scratch_dir}/file1.txt" + file2 = f"{scratch_dir}/file2.txt" + self._create_test_file(file1) + self._create_test_file(file2) + # Create namespace with input argument. + args = argparse.Namespace() + args.files = None + args.from_files = None + args.input = [file1, file2] + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1, file2] + self.assert_equal(str(actual), str(expected)) + + def test_backward_compatibility_single_file(self) -> None: + """ + Test that single -i/--input still works. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test file. + file1 = f"{scratch_dir}/file1.txt" + self._create_test_file(file1) + # Create namespace with input argument (single file, not list). + args = argparse.Namespace() + args.files = None + args.from_files = None + args.input = file1 # Single file as string, not list + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1] + self.assert_equal(str(actual), str(expected)) + + def test_file_validation(self) -> None: + """ + Test that non-existent files raise error. + """ + # Create namespace with non-existent file. + args = argparse.Namespace() + args.files = "/nonexistent/file1.txt,/nonexistent/file2.txt" + args.from_files = None + args.input = None + # Run function and check that it raises error. + with self.assertRaises(AssertionError): + hparser.parse_multi_file_args(args) + + def test_empty_file_list(self) -> None: + """ + Test empty file list handling. + """ + # Prepare inputs. + + # Create namespace with no files. + args = argparse.Namespace() + args.files = None + args.from_files = None + args.input = None + # Run function and check that it raises error. + with self.assertRaises(AssertionError) as cm: + hparser.parse_multi_file_args(args) + # Check the error message. + act = str(cm.exception) + self.assertIn("No input files specified", act) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py new file mode 100644 index 000000000..8064ddbe1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py @@ -0,0 +1,97 @@ +import logging + +import helpers.hpickle as hpickle +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestToPickleable +# ############################################################################# + + +class TestToPickleable(hunitest.TestCase): + def test_list1(self) -> None: + """ + Test that a list is converted to a pickleable correctly. + + force_values_to_string = False + """ + _obj = [1, "2", [3, 0.4], (5, None)] + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = [1, "2", [3, 0.4], (5, None)] + self.assertEqual(actual, expected) + + def test_list2(self) -> None: + """ + Test that a list is converted to a pickleable correctly. + + force_values_to_string = True + """ + _obj = [1, "2", [3, 0.4], (5, None)] + force_values_to_string = True + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = ["1", "2", ["3", "0.4"], ("5", "None")] + self.assertEqual(actual, expected) + + def test_tuple1(self) -> None: + """ + Test that a tuple is converted to a pickleable correctly. + + force_values_to_string = False + """ + _obj = (1, "2", [3, 0.4], (5, None)) + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = (1, "2", [3, 0.4], (5, None)) + self.assertEqual(actual, expected) + + def test_dict1(self) -> None: + """ + Test that a dict is converted to a pickleable correctly. + + force_values_to_string = False + """ + _obj = {"a": 1, 2: ["b", 3], "c": {0.4: None}} + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = {"a": 1, 2: ["b", 3], "c": {0.4: None}} + self.assertEqual(actual, expected) + + def test_iterable1(self) -> None: + """ + Test that an iterable is converted to a pickleable correctly. + + force_values_to_string = False + """ + _obj = {1, 2, 3} + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = [1, 2, 3] + self.assertEqual(actual, expected) + + def test_unpickleable1(self) -> None: + """ + Test that an unpickleable object is converted to a string. + + force_values_to_string = False + """ + _obj = lambda x: x + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = ". at 0x>" + self.assert_equal(actual, expected, purify_text=True) + + def test_unpickleable2(self) -> None: + """ + Test that an unpickleable object is converted to a string. + + force_values_to_string = True + """ + _obj = lambda x: x + force_values_to_string = True + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = ". at 0x>" + self.assert_equal(actual, expected, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py new file mode 100644 index 000000000..a829ea82f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py @@ -0,0 +1,506 @@ +import datetime +import logging +import os +from typing import Any, Optional + +import pandas as pd +import pytest + +import config_root.config as cconfig +import helpers.hio as hio +import helpers.hplayback as hplayba +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestJsonRoundtrip1 +# ############################################################################# + + +class TestJsonRoundtrip1(hunitest.TestCase): + """ + Test roundtrip conversion through jsonpickle for different types. + """ + + def test1(self) -> None: + obj = 3 + # + hplayba.round_trip_convert(obj, logging.DEBUG) + + def test2(self) -> None: + obj = "hello" + # + hplayba.round_trip_convert(obj, logging.DEBUG) + + def test3(self) -> None: + data = { + "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], + "Price": [700, 250, 800, 1200], + } + df = pd.DataFrame(data, columns=["Product", "Price"]) + df.index.name = "hello" + # + obj = df + hplayba.round_trip_convert(obj, logging.DEBUG) + + def test4(self) -> None: + obj = datetime.date(2015, 1, 1) + # + hplayba.round_trip_convert(obj, logging.DEBUG) + + +# ############################################################################# +# TestPlaybackInputOutput1 +# ############################################################################# + + +class TestPlaybackInputOutput1(hunitest.TestCase): + """ + Freeze the output of Playback. + """ + + def helper(self, mode: str, *args: Any, **kwargs: Any) -> None: + # TODO(gp): Factor out the common code. + # Define a function to generate a unit test for. + def get_result_assert_equal(a: Any, b: Any) -> Any: + p = hplayba.Playback("assert_equal") + if isinstance(a, datetime.date) and isinstance(b, datetime.date): + return p.run(abs(a - b)) + if isinstance(a, dict) and isinstance(b, dict): + c = {} + c.update(a) + c.update(b) + return p.run(c) + if isinstance(a, cconfig.Config) and isinstance(b, cconfig.Config): + c = cconfig.Config(update_mode="overwrite") + c.update(a) + c.update(b) + return p.run(c) + return p.run(a + b) + + def get_result_check_string(a: Any, b: Any) -> Any: + p = hplayba.Playback("check_string") + if isinstance(a, datetime.date) and isinstance(b, datetime.date): + return p.run(abs(a - b)) + if isinstance(a, dict) and isinstance(b, dict): + c = {} + c.update(a) + c.update(b) + return p.run(c) + if isinstance(a, cconfig.Config) and isinstance(b, cconfig.Config): + c = cconfig.Config(update_mode="overwrite") + c.update(a) + c.update(b) + return p.run(c) + return p.run(a + b) + + def get_result_assert_equal_none() -> Any: + p = hplayba.Playback("assert_equal") + return p.run("Some string.") + + def get_result_check_string_none() -> Any: + p = hplayba.Playback("check_string") + return p.run("Some string") + + if mode == "assert_equal": + if not args and not kwargs: + code = get_result_assert_equal_none() + else: + code = get_result_assert_equal(*args, **kwargs) + elif mode == "check_string": + if not args and not kwargs: + code = get_result_check_string_none() + else: + code = get_result_check_string(*args, **kwargs) + else: + raise ValueError("Invalid mode ") + self.check_string(code, purify_text=True) + _LOG.debug("Testing code:\n%s", code) + exec(code, locals()) # pylint: disable=exec-used + + def test1(self) -> None: + """ + Test for int inputs. + """ + # Create inputs. + a = 3 + b = 2 + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test2(self) -> None: + """ + Test for string inputs. + """ + # Create inputs. + a = "test" + b = "case" + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test3(self) -> None: + """ + Test for list inputs. + """ + # Create inputs. + a = [1, 2, 3] + b = [4, 5, 6] + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test4(self) -> None: + """ + Test for dict inputs. + """ + # Create inputs. + a = {"1": 2} + b = {"3": 4} + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test5(self) -> None: + """ + Test for pd.DataFrame inputs. + """ + # Create inputs. + a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) + b = pd.DataFrame({"Price": [1, 1, 1, 1]}) + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test6(self) -> None: + """ + Test for datetime.date inputs (using `jsonpickle`). + """ + # Create inputs. + a = datetime.date(2015, 1, 1) + b = datetime.date(2012, 1, 1) + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test7(self) -> None: + """ + Test for int inputs with check_string. + """ + # Create inputs. + a = 3 + b = 2 + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test8(self) -> None: + """ + Test for string inputs with check_string. + """ + # Create inputs. + a = "test" + b = "case" + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test9(self) -> None: + """ + Test for list inputs with check_string. + """ + # Create inputs. + a = [1, 2, 3] + b = [4, 5, 6] + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test10(self) -> None: + """ + Test for dict inputs with check_string. + """ + # Create inputs. + a = {"1": 2} + b = {"3": 4} + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test11(self) -> None: + """ + Test for pd.DataFrame inputs with check_string. + """ + # Create inputs. + a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) + b = pd.DataFrame({"Price": [1, 1, 1, 1]}) + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test12(self) -> None: + """ + Test for dict inputs with data structures recursion. + """ + # Create inputs. + a = {"1": ["a", 2]} + b = {"3": pd.DataFrame({"Price": [700, 250, 800, 1200]}), "4": {"5": 6}} + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test13(self) -> None: + """ + Test for pd.Series inputs with check_string. + """ + # Create inputs. + a = pd.Series([10, 20, 15], name="N Numbers") + b = pd.Series([10.0, 0.0, 5.5], name="Z Numbers") + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test14(self) -> None: + """ + Test for pd.Series inputs with assert_equal. + """ + # Create inputs. + a = pd.Series([10, 20, 15], name="N Numbers") + b = pd.Series([10.0, 0.0, 5.5], name="Z Numbers") + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test15(self) -> None: + """ + Test for cconfig.Config inputs with check_string. + """ + # Create inputs. + a = cconfig.Config([("meta", "meta value 1"), ("list", [1, 2])]) + b = cconfig.Config([("meta", "meta value 2")]) + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test16(self) -> None: + """ + Test for cconfig.Config inputs with assert_equal. + """ + # Create inputs. + a = cconfig.Config([("meta", "meta value 1"), ("list", [1, 2])]) + b = cconfig.Config([("meta", "meta value 2")]) + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test17(self) -> None: + """ + Test if testing function has no args with check_string. + """ + self.helper("check_string") + + def test18(self) -> None: + """ + Test if testing function has no args with assert_equal. + """ + self.helper("assert_equal") + + +# ############################################################################# +# TestToPythonCode1 +# ############################################################################# + + +class TestToPythonCode1(hunitest.TestCase): + """ + Test to_python_code() for different types. + """ + + def _check(self, input_obj: Any, expected: str) -> None: + res = hplayba.to_python_code(input_obj) + self.assert_equal(res, expected) + + def test_float1(self) -> None: + """ + Test float without first zero. + """ + self._check(0.1, "0.1") + + def test_float2(self) -> None: + """ + Test positive float. + """ + self._check(1.0, "1.0") + + def test_float3(self) -> None: + """ + Test negative float. + """ + self._check(-1.1, "-1.1") + + def test_int1(self) -> None: + """ + Test zero. + """ + self._check(0, "0") + + def test_int2(self) -> None: + """ + Test positive int. + """ + self._check(10, "10") + + def test_int3(self) -> None: + """ + Test negative int. + """ + self._check(-10, "-10") + + def test_str1(self) -> None: + """ + Test str simple. + """ + self._check("a", '"a"') + + def test_str2(self) -> None: + """ + Test str with double quotes. + """ + self._check('"b"', '"\\"b\\""') + + def test_str3(self) -> None: + """ + Test str with single quotes. + """ + self._check("'c'", "\"'c'\"") + + def test_list1(self) -> None: + """ + Test List. + """ + self._check([1, 0.2, "3"], '[1, 0.2, "3"]') + + def test_dict1(self) -> None: + """ + Test Dist. + """ + self._check({"a": 0.2, 3: "b"}, '{"a": 0.2, 3: "b"}') + + def test_df1(self) -> None: + """ + Test pd.DataFrame (single quotes expected in field names) + """ + self._check( + pd.DataFrame.from_dict({"a": [0.2, 0.1]}), + "pd.DataFrame.from_dict({'a': [0.2, 0.1]})", + ) + + def test_dataseries1(self) -> None: + """ + Test pd.Series. + """ + self._check( + pd.Series([0.2, 0.1], name="a"), + "pd.Series(data=[0.2, 0.1], index=RangeIndex(start=0, stop=2, step=1), " + 'name="a", dtype=float64)', + ) + + def test_config1(self) -> None: + """ + Test cconfig.Config. + """ + config = cconfig.Config() + config["var1"] = "val1" + config["var2"] = cconfig.Config([("var3", 10), ("var4", "val4")]) + self._check( + config, + "cconfig.Config.from_python(\"Config({'var1': 'val1', " + "'var2': Config({'var3': 10, 'var4': 'val4'})})\")", + ) + + +# ############################################################################# +# TestPlaybackFilePath1 +# ############################################################################# + + +class TestPlaybackFilePath1(hunitest.TestCase): + """ + Test file mode correctness. + """ + + def test1(self) -> None: + """ + Test writing to file when number of tests is more than generated (10). + """ + test_file = hplayba.Playback._get_test_file_name( + "./path/to/somewhere.py" + ) + self.assert_equal( + test_file, "./path/to/test/test_by_playback_somewhere.py" + ) + + +# ############################################################################# +# TestPlaybackFileMode1 +# ############################################################################# + + +class TestPlaybackFileMode1(hunitest.TestCase): + """ + Test file mode correctness. + """ + + def get_code(self, max_tests: Optional[int] = None) -> str: + """ + Return a code for executable file to run. + """ + max_tests_str = "" if max_tests is None else f", max_tests={max_tests}" + code = ( + "\n".join( + [ + "import helpers.hplayback as hplayba", + "def plbck_sum(a: int, b: int) -> int:", + ' hplayba.Playback("check_string", to_file=True%s).run(None)', + " return a + b", + "", + "[plbck_sum(i, i + 1) for i in range(4)]", + ] + ) + % max_tests_str + ) + return code + + def helper(self, max_tests: Optional[int] = None) -> Any: + """ + Return generated by playback code. + """ + # Get file paths. + tmp_dir = self.get_scratch_space() + # File with code. + code_basename = "code_.py" + tmp_py_file = os.path.join(tmp_dir, code_basename) + # File with test. + tmp_test_file = os.path.join( + tmp_dir, "test", "test_by_playback_" + code_basename + ) + # Save the code to the file. + hio.to_file(tmp_py_file, self.get_code(max_tests)) + # Executes the code. + hsystem.system(f"python {tmp_py_file}") + playback_code = hio.from_file(tmp_test_file) + return playback_code + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~10 seconds.") + def test1(self) -> None: + """ + Test writing to file when number of tests is more than generated. + """ + max_tests = 100 + self.check_string(self.helper(max_tests)) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~10 seconds.") + def test2(self) -> None: + """ + Test writing to file when number of tests is default. + """ + self.check_string(self.helper()) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~10 seconds.") + def test3(self) -> None: + """ + Test writing to file when number of tests is lower than generated. + """ + max_tests = 2 + self.check_string(self.helper(max_tests)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py new file mode 100644 index 000000000..395138e7a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py @@ -0,0 +1,844 @@ +import logging +import pprint +from typing import List + +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_printing1 +# ############################################################################# + + +class Test_printing1(hunitest.TestCase): + def test_color_highlight1(self) -> None: + for c in hprint._COLOR_MAP: + _LOG.debug(hprint.color_highlight(c, c)) + + +# ############################################################################# +# Test_to_str1 +# ############################################################################# + + +class Test_to_str1(hunitest.TestCase): + def test1(self) -> None: + x = 1 + # To disable linter complaints. + _ = x + actual = hprint.to_str("x") + expected = "x=1" + self.assertEqual(actual, expected) + + def test2(self) -> None: + x = "hello world" + # To disable linter complaints. + _ = x + actual = hprint.to_str("x") + expected = "x='hello world'" + self.assertEqual(actual, expected) + + def test3(self) -> None: + x = 2 + # To disable linter complaints. + _ = x + actual = hprint.to_str("x*2") + expected = "x*2=4" + self.assertEqual(actual, expected) + + def test4(self) -> None: + """ + Test printing multiple values separated by space. + """ + x = 1 + y = "hello" + # To disable linter complaints. + _ = x, y + actual = hprint.to_str("x y") + expected = "x=1, y='hello'" + self.assertEqual(actual, expected) + + def test5(self) -> None: + """ + Test printing multiple strings separated by space. + """ + x = "1" + y = "hello" + # To disable linter complaints. + _ = x, y + actual = hprint.to_str("x y") + expected = "x='1', y='hello'" + self.assertEqual(actual, expected) + + def test6(self) -> None: + """ + Test printing a list. + """ + x = [1, "hello", "world"] + # To disable linter complaints. + _ = x + actual = hprint.to_str("x") + expected = "x=[1, 'hello', 'world']" + self.assertEqual(actual, expected) + + +# ############################################################################# + + +def example_func1(x: int, y: str) -> str: + _ = x, y + ret = hprint.func_signature_to_str() + return ret # type: ignore[no-any-return] + + +def example_func2() -> str: + ret = hprint.func_signature_to_str() + return ret # type: ignore[no-any-return] + + +def example_func3(x: int, y: str) -> str: + _ = x, y + ret = hprint.func_signature_to_str("y") + return ret # type: ignore[no-any-return] + + +def example_func4(x: int, y: str, z: float) -> str: + _ = x, y, z + ret = hprint.func_signature_to_str("x z") + return ret # type: ignore[no-any-return] + + +def example_func5(x: int, y: str, z: float) -> str: + _ = x, y, z + ret = hprint.func_signature_to_str(["y", "z"]) + return ret # type: ignore[no-any-return] + + +# ############################################################################# +# Test_func_signature_to_str1 +# ############################################################################# + + +class Test_func_signature_to_str1(hunitest.TestCase): + def test1(self) -> None: + actual = example_func1(1, "hello") + expected = "# example_func1: x=1, y='hello'" + self.assert_equal(actual, expected) + + def test2(self) -> None: + actual = example_func2() + expected = "# example_func2:" + self.assert_equal(actual, expected) + + def test3(self) -> None: + actual = example_func3(1, "hello") + expected = "# example_func3: x=1" + self.assert_equal(actual, expected) + + def test4(self) -> None: + actual = example_func4(1, "hello", 3.14) + expected = "# example_func4: y='hello'" + self.assert_equal(actual, expected) + + def test5(self) -> None: + actual = example_func5(1, "hello", 3.14) + expected = "# example_func5: x=1" + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_log +# ############################################################################# + + +class Test_log(hunitest.TestCase): + def test2(self) -> None: + x = 1 + # To disable linter complaints. + _ = x + for verb in [logging.DEBUG, logging.INFO]: + hprint.log(_LOG, verb, "x") + + def test3(self) -> None: + x = 1 + y = "hello" + # To disable linter complaints. + _ = x, y + for verb in [logging.DEBUG, logging.INFO]: + hprint.log(_LOG, verb, "x y") + + def test4(self) -> None: + """ + The command: + + > pytest -k Test_log::test4 -o log_cli=true --dbg_verbosity DEBUG + + should print something like: + + DEBUG test_printing:printing.py:315 x=1, y='hello', z=['cruel', 'world'] + INFO test_printing:printing.py:315 x=1, y='hello', z=['cruel', 'world'] + """ + x = 1 + y = "hello" + z = ["cruel", "world"] + # To disable linter complaints. + _ = x, y, z + for verb in [logging.DEBUG, logging.INFO]: + hprint.log(_LOG, verb, "x y z") + + +# ############################################################################# +# Test_sort_dictionary +# ############################################################################# + + +class Test_sort_dictionary(hunitest.TestCase): + def test1(self) -> None: + dict_ = { + "tool": { + "poetry": { + "name": "lm", + "version": "0.1.0", + "description": "", + "authors": [""], + "dependencies": { + "awscli": "*", + "boto3": "*", + "flaky": "*", + "fsspec": "*", + "gluonts": "*", + "invoke": "*", + "jupyter": "*", + "matplotlib": "*", + "mxnet": "*", + "networkx": "*", + "pandas": "^1.1.0", + "psycopg2": "*", + "pyarrow": "*", + "pytest": "^6.0.0", + "pytest-cov": "*", + "pytest-instafail": "*", + "pytest-xdist": "*", + "python": "^3.7", + "pywavelets": "*", + "s3fs": "*", + "seaborn": "*", + "sklearn": "*", + "statsmodels": "*", + "bs4": "*", + "jsonpickle": "*", + "lxml": "*", + "tqdm": "*", + "requests": "*", + }, + "dev-dependencies": {}, + } + }, + "build-system": { + "requires": ["poetry>=0.12"], + "build-backend": "poetry.masonry.api", + }, + } + actual = hprint.sort_dictionary(dict_) + self.check_string(pprint.pformat(actual)) + + +# ############################################################################# +# Test_indent1 +# ############################################################################# + + +class Test_indent1(hunitest.TestCase): + def test1(self) -> None: + txt = """foo + +klass TestHelloWorld(hunitest.TestCase): + bar +""" + num_spaces = 2 + actual = hprint.indent(txt, num_spaces=num_spaces) + expected = """ foo + + klass TestHelloWorld(hunitest.TestCase): + bar +""" + self.assert_equal(actual, expected, fuzzy_match=False) + + +# ############################################################################# +# Test_dedent1 +# ############################################################################# + + +class Test_dedent1(hunitest.TestCase): + def test1(self) -> None: + txt = """ + foo + + klass TestHelloWorld(hunitest.TestCase): + bar +""" + actual = hprint.dedent(txt) + expected = """foo + +klass TestHelloWorld(hunitest.TestCase): + bar""" + self.assert_equal(actual, expected, fuzzy_match=False) + + def test2(self) -> None: + txt = r""" + read_data: + file_name: foo_bar.txt + nrows: 999 + single_val: hello + zscore: + style: gaz + com: 28""" + actual = hprint.dedent(txt) + expected = """read_data: + file_name: foo_bar.txt + nrows: 999 +single_val: hello +zscore: + style: gaz + com: 28""" + self.assert_equal(actual, expected, fuzzy_match=False) + + def test_roundtrip1(self) -> None: + """ + Verify that `indent` and `dedent` are inverse of each other. + """ + txt1 = """foo + + +# ############################################################################# +# TestHelloWorld +# ############################################################################# + + +class TestHelloWorld(hunitest.TestCase): + bar""" + num_spaces = 3 + txt2 = hprint.indent(txt1, num_spaces=num_spaces) + txt3 = hprint.dedent(txt2) + self.assert_equal(txt1, txt3, fuzzy_match=False) + + +# ############################################################################# +# Test_align_on_left1 +# ############################################################################# + + +class Test_align_on_left1(hunitest.TestCase): + def test1(self) -> None: + txt = """foo + +klass TestHelloWorld(hunitest.TestCase): + bar +""" + actual = hprint.align_on_left(txt) + expected = """foo + +klass TestHelloWorld(hunitest.TestCase): +bar +""" + self.assert_equal(actual, expected, fuzzy_match=False) + + +# ############################################################################# +# Test_logging1 +# ############################################################################# + + +class Test_logging1(hunitest.TestCase): + def test_log_frame1(self) -> None: + hprint.log_frame(_LOG, "%s %s", "hello", "world") + + def test_log_frame2(self) -> None: + hprint.log_frame(_LOG, "%s", "hello", level=1) + + def test_log_frame3(self) -> None: + hprint.log_frame(_LOG, "%s", "hello", level=2, verbosity=logging.INFO) + + +# ############################################################################# +# Test_remove_lead_trail_empty_lines1 +# ############################################################################# + + +class Test_remove_lead_trail_empty_lines1(hunitest.TestCase): + def helper(self, input_str: str, expected_output: List[str]) -> None: + """ + Test the `remove_lead_trail_empty_lines` function. + + :param input_str: The input string to be processed. + :param expected_output: The expected output list of strings. + + Example: + input_str = "line1\n\n\nline2" + expected_output = ["line1", "", "", "line2"] + """ + # Test as string. + actual = hprint.remove_lead_trail_empty_lines(input_str) + expected = "\n".join(expected_output) + self.assertEqual(actual, expected) + # Test as list of strings. + input_str = input_str.splitlines() + actual = hprint.remove_lead_trail_empty_lines(input_str) + self.assertEqual(actual, expected_output) + + def test_empty_string_returns_empty_list(self) -> None: + input_str: str = "" + expected_output: List[str] = [] + self.helper(input_str, expected_output) + + def test_single_line_string_returns_single_line_list(self) -> None: + input_str: str = "line" + expected_output = ["line"] + self.helper(input_str, expected_output) + + def test_multiple_lines_with_no_empty_lines_returns_same_lines( + self, + ) -> None: + input_str: str = "line1\nline2\nline3" + expected_output = ["line1", "line2", "line3"] + self.helper(input_str, expected_output) + + def test_leading_empty_lines_are_removed(self) -> None: + input_str: str = "\n\nline1\nline2" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_trailing_empty_lines_are_removed(self) -> None: + input_str: str = "line1\nline2\n\n" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_leading_and_trailing_empty_lines_are_removed(self) -> None: + input_str: str = "\n\nline1\nline2\n\n" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_consecutive_empty_lines_in_middle_are_not_removed(self) -> None: + input_str: str = "line1\n\n\nline2" + expected_output = ["line1", "", "", "line2"] + self.helper(input_str, expected_output) + + def test_only_empty_lines_returns_empty_list(self) -> None: + input_str: str = "\n\n\n" + expected_output: List[str] = [] + self.helper(input_str, expected_output) + + def test_mixed_content_with_leading_trailing_and_middle_empty_lines( + self, + ) -> None: + input_str: str = "\n\nline1\n\nline2\n\n" + expected_output = ["line1", "", "line2"] + self.helper(input_str, expected_output) + + def test_single_empty_line_returns_empty_list(self) -> None: + input_str: str = "\n" + expected_output: List[str] = [] + self.helper(input_str, expected_output) + + def test_multiple_consecutive_empty_lines_at_beginning_and_end( + self, + ) -> None: + input_str: str = "\n\n\nline1\nline2\n\n\n" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_input_with_only_spaces_and_tabs_as_empty_lines(self) -> None: + input_str: str = " \n\t\nline1\nline2\n \n\t" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_input_with_mixed_line_endings_unix_and_windows(self) -> None: + input_str: str = "line1\n\nline2\r\n\r\nline3" + expected_output = ["line1", "", "line2", "", "line3"] + self.helper(input_str, expected_output) + + def test_input_with_special_characters(self) -> None: + input_str: str = "line1\n\n!@#$%^&*()\n\nline2" + expected_output = ["line1", "", "!@#$%^&*()", "", "line2"] + self.helper(input_str, expected_output) + + +# ############################################################################# +# Test_remove_empty_lines +# ############################################################################# + + +class Test_remove_empty_lines(hunitest.TestCase): + """ + Test remove_empty_lines function with different modes. + """ + + def helper(self, lines: str, mode: str, expected: str) -> None: + """ + Test helper for remove_empty_lines. + + :param lines: Input text as string (will be split into list) + :param mode: Mode parameter for remove_empty_lines + :param expected: Expected output as string (will be split into list) + """ + # Prepare inputs. + lines_str = hprint.dedent(lines) + if lines_str: + lines_list = lines_str.split("\n") + else: + lines_list = [] + # Prepare outputs. + expected_str = hprint.dedent(expected) + if expected_str: + expected_list = expected_str.split("\n") + else: + expected_list = [] + # Run test. + actual = hprint.remove_empty_lines(lines_list, mode=mode) + # Check outputs. + self.assert_equal(str(actual), str(expected_list)) + + def test1(self) -> None: + """ + Test no_empty_lines mode with an empty list. + """ + # Prepare inputs. + lines = "" + mode = "no_empty_lines" + # Prepare outputs. + expected = "" + # Run test. + self.helper(lines, mode, expected) + + def test2(self) -> None: + """ + Test no_empty_lines mode with no empty lines in the input. + """ + # Prepare inputs. + lines = """ + line1 + line2 + line3 + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test3(self) -> None: + """ + Test no_empty_lines mode with all lines being empty. + """ + # Prepare inputs. + lines = """ + + + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = "" + # Run test. + self.helper(lines, mode, expected) + + def test4(self) -> None: + """ + Test no_empty_lines mode removes leading empty lines. + """ + # Prepare inputs. + lines = """ + + line1 + line2 + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test5(self) -> None: + """ + Test no_empty_lines mode removes trailing empty lines. + """ + # Prepare inputs. + lines = """ + line1 + line2 + + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test6(self) -> None: + """ + Test no_empty_lines mode removes empty lines in the middle. + """ + # Prepare inputs. + lines = """ + line1 + + line2 + + line3 + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test7(self) -> None: + """ + Test no_empty_lines mode removes lines with only whitespace. + """ + # Prepare inputs. + lines = """ + line1 + + line2 + \t + line3 + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test8(self) -> None: + """ + Test no_consecutive_empty_lines mode with empty list. + """ + # Prepare inputs. + lines = "" + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = "" + # Run test. + self.helper(lines, mode, expected) + + def test9(self) -> None: + """ + Test no_consecutive_empty_lines mode with no empty lines. + """ + # Prepare inputs. + lines = """ + line1 + line2 + line3 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test10(self) -> None: + """ + Test no_consecutive_empty_lines mode keeps single empty line. + """ + # Prepare inputs. + lines = """ + line1 + + line2 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test11(self) -> None: + """ + Test no_consecutive_empty_lines mode keeps one of two consecutive empty lines. + """ + # Prepare inputs. + lines = """ + line1 + + + line2 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test12(self) -> None: + """ + Test no_consecutive_empty_lines mode keeps one of multiple consecutive empty lines. + """ + # Prepare inputs. + lines = """ + line1 + + + + + line2 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test13(self) -> None: + """ + Test no_consecutive_empty_lines mode with multiple groups of consecutive empty lines. + """ + # Prepare inputs. + lines = """ + line1 + + + line2 + + + + line3 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test14(self) -> None: + """ + Test no_consecutive_empty_lines mode keeps all non-consecutive empty lines. + """ + # Prepare inputs. + lines = """ + line1 + + line2 + + line3 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test15(self) -> None: + """ + Test that invalid mode raises ValueError. + """ + # Prepare inputs. + lines = ["line1", "line2"] + mode = "invalid_mode" + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hprint.remove_empty_lines(lines, mode=mode) + actual = str(cm.exception) + expected = "Invalid mode='invalid_mode'" + self.assert_equal(actual, expected) + + def test16(self) -> None: + """ + Test remove_empty_lines with string input (decorator functionality). + """ + # Prepare inputs. + text = """ + line1 + + line2 + + line3 + """ + text = hprint.dedent(text) + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + expected = hprint.dedent(expected) + # Run test. + actual = hprint.remove_empty_lines(text, mode=mode) + # Check outputs. + self.assert_equal(actual, expected) + + def test17(self) -> None: + """ + Test no_consecutive_empty_lines with string input (decorator functionality). + """ + # Prepare inputs. + text = """ + line1 + + + line2 + """ + text = hprint.dedent(text) + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + """ + expected = hprint.dedent(expected) + # Run test. + actual = hprint.remove_empty_lines(text, mode=mode) + # Check outputs. + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py new file mode 100644 index 000000000..652fdf47a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py @@ -0,0 +1,228 @@ +import io +import os +import pprint +import re +from contextlib import redirect_stdout + +import pytest + +# TODO(heanh): add `junitparser` in `//helpers` image. +pytest.importorskip("junitparser") + +import helpers.hio as hio # noqa: E402 +import helpers.hpytest as hpytest # noqa: E402 +import helpers.hunit_test as hunitest # noqa: E402 + + +def _strip_color_codes(text: str) -> str: + """ + Remove ANSI color escape codes from text. + + :param text: text to strip the color codes from + :return: text with the color codes removed + """ + # Remove ANSI escape codes. + txt = re.sub(r"\033\[[0-9;]*m", "", text) + return txt + + +# ############################################################################# +# Test_JUnitReporter +# ############################################################################# + + +class Test_JUnitReporter(hunitest.TestCase): + """ + Test scenario where there are passed, skipped tests with leads to `PASSED` + result. + """ + + def helper(self) -> hpytest.JUnitReporter: + """ + Helper function to create a `JUnitReporter` object. + + :return: `JUnitReporter` object + """ + xml_str = """ + + + + + /app/dummy/test/test_module.py:25: Dummy skip message for testing purposes. + + + + + """ + input_dir = self.get_scratch_space() + input_file_path = os.path.join(input_dir, "test.xml") + hio.to_file(input_file_path, xml_str) + reporter = hpytest.JUnitReporter(input_file_path) + return reporter + + def test_parse(self) -> None: + """ + Test parsing the JUnit XML file. + """ + reporter = self.helper() + reporter.parse() + actual = pprint.pformat(reporter.overall_stats) + expected = r""" + {'error': 0, + 'failed': 0, + 'passed': 1, + 'skipped': 1, + 'total_tests': 2, + 'total_time': 3.0} + """ + self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) + + def test_print_summary(self) -> None: + """ + Test printing the summary of the results from JUnit XML file. + """ + reporter = self.helper() + reporter.parse() + captured_output = io.StringIO() + with redirect_stdout(captured_output): + reporter.print_summary() + actual = captured_output.getvalue() + actual = _strip_color_codes(actual) + expected = r""" + ====================================================================== + collected 2 items + + ====================================================================== + Test: dummy-test-suite-1 + Timestamp: 2025-01-01T12:00:00.000000+00:00 + ---------------------------------------------------------------------- + dummy.test.test_module.DummyTestCase::test_dummy_function PASSED (1.000s) + dummy.test.test_module.DummyTestCase::test_another_function SKIPPED (1.000s) + Summary: 1 passed, 1 skipped in 2.000s + + ====================================================================== + Test: dummy-test-suite-2 + Timestamp: 2025-01-01T12:01:00.000000+00:00 + ---------------------------------------------------------------------- + Summary: no tests in 1.000s + + ====================================================================== + Summary: 1 passed, 1 skipped in 3.00s + Result: PASSED + """ + self.assert_equal( + actual, + expected, + dedent=True, + fuzzy_match=True, + ) + + +# ############################################################################# +# Test_JUnitReporter2 +# ############################################################################# + + +class Test_JUnitReporter2(hunitest.TestCase): + """ + Test scenario where there are passed, error, failed, and skipped tests with + leads to `FAILED` result. + """ + + def helper(self) -> hpytest.JUnitReporter: + """ + Helper function to create a `JUnitReporter` object. + + :return: `JUnitReporter` object + """ + xml_str = """ + + + + + /app/dummy/test/test_module.py:25: Dummy skip message for testing purposes. + + + + + + /app/dummy/test/test_module.py:30: Dummy failure message for testing purposes. + + + /app/dummy/test/test_module.py:35: Dummy error message for testing purposes. + + + + + """ + input_dir = self.get_scratch_space() + input_file_path = os.path.join(input_dir, "test.xml") + hio.to_file(input_file_path, xml_str) + reporter = hpytest.JUnitReporter(input_file_path) + return reporter + + def test_parse(self) -> None: + """ + Test parsing the JUnit XML file. + """ + reporter = self.helper() + reporter.parse() + actual = pprint.pformat(reporter.overall_stats) + expected = r""" + {'error': 1, + 'failed': 1, + 'passed': 2, + 'skipped': 1, + 'total_tests': 5, + 'total_time': 6.0} + """ + self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) + + def test_print_summary(self) -> None: + """ + Test printing the summary of the results from JUnit XML file. + """ + reporter = self.helper() + reporter.parse() + captured_output = io.StringIO() + with redirect_stdout(captured_output): + reporter.print_summary() + actual = captured_output.getvalue() + actual = _strip_color_codes(actual) + expected = r""" + ====================================================================== + collected 5 items + + ====================================================================== + Test: dummy-test-suite-1 + Timestamp: 2025-01-01T12:00:00.000000+00:00 + ---------------------------------------------------------------------- + dummy.test.test_module.DummyTestCase::test_dummy_function PASSED (1.000s) + dummy.test.test_module.DummyTestCase::test_another_function SKIPPED (1.000s) + Summary: 1 passed, 1 skipped in 2.000s + + ====================================================================== + Test: dummy-test-suite-2 + Timestamp: 2025-01-01T12:01:00.000000+00:00 + ---------------------------------------------------------------------- + dummy.test.test_module.DummyTestCase::test_passed_function PASSED (1.000s) + dummy.test.test_module.DummyTestCase::test_failed_function FAILED (1.000s) + dummy.test.test_module.DummyTestCase::test_error_function ERROR (1.000s) + Summary: 1 passed, 1 failed, 1 error in 3.000s + + ====================================================================== + Test: dummy-test-suite-3 + Timestamp: 2025-01-01T12:02:00.000000+00:00 + ---------------------------------------------------------------------- + Summary: no tests in 1.000s + + ====================================================================== + Summary: 2 passed, 1 failed, 1 error, 1 skipped in 6.00s + Result: FAILED + """ + self.assert_equal( + actual, + expected, + dedent=True, + fuzzy_match=True, + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py new file mode 100644 index 000000000..d64310202 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py @@ -0,0 +1,154 @@ +import asyncio +import logging + +import pytest + +import helpers.hretry as hretry +import helpers.htimer as htimer +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +EXCEPTIONS = (AttributeError, ValueError) + + +# ############################################################################# +# Test_retry +# ############################################################################# + + +class Test_retry(hunitest.TestCase): + def test_retry1(self) -> None: + """ + Test normal case. + """ + self.exception_count = 0 + num_attempts = 3 + + @hretry.sync_retry(num_attempts, EXCEPTIONS) + def func() -> bool: + if self.exception_count < num_attempts - 1: + self.exception_count += 1 + raise ValueError("Simulated expected error") + _LOG.debug("All good") + return True + + self.assertTrue(func()) + self.assertEqual(self.exception_count, num_attempts - 1) + + def test_retry2(self) -> None: + """ + Test when the number of exceptions is greater than the number of + retries. + """ + self.exception_count = 0 + num_attempts = 3 + + @hretry.sync_retry(num_attempts, EXCEPTIONS) + def func() -> bool: + if self.exception_count < num_attempts: + self.exception_count += 1 + raise ValueError("Simulated expected error") + _LOG.debug("All good") + return True + + with self.assertRaises(ValueError): + func() + + def test_retry3(self) -> None: + """ + Test when the raised exception is not in the list of expected + exceptions. + """ + self.exception_count = 0 + num_attempts = 3 + + @hretry.sync_retry(num_attempts, EXCEPTIONS) + def func() -> None: + if self.exception_count < num_attempts - 1: + self.exception_count += 1 + raise IndexError("Simulated non expected error") + _LOG.debug("All good") + + with self.assertRaises(IndexError): + func() + + +# ############################################################################# +# Test_retry2 +# ############################################################################# + + +class Test_retry2(hunitest.TestCase): + def test_async_retry1(self) -> None: + """ + Test normal case. + """ + self.exception_count = 0 + num_attempts = 3 + retry_delay_in_sec = 1 + + @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) + async def func() -> bool: + if self.exception_count < num_attempts - 1: + self.exception_count += 1 + await asyncio.sleep(0.1) + raise ValueError("Simulated expected error") + _LOG.debug("All good") + return True + + with htimer.TimedScope(logging.INFO, "async_retry_loop") as ts: + result = asyncio.run(func()) + self.assertEqual(round(ts.elapsed_time, 1), 2.2) + self.assertTrue(result) + self.assertEqual(self.exception_count, num_attempts - 1) + + @pytest.mark.skip(reason="See CmTask11013") + def test_async_retry2(self) -> None: + """ + Test when the number of exceptions is greater than the number of + retries. + """ + self.exception_count = 0 + num_attempts = 3 + retry_delay_in_sec = 1 + + @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) + async def func() -> bool: + if self.exception_count < num_attempts: + self.exception_count += 1 + await asyncio.sleep(0.1) + raise ValueError("Simulated expected error") + _LOG.debug("All good") + return True + + with self.assertRaises(ValueError) as fail: + with htimer.TimedScope(logging.INFO, "async_retry_loop") as ts: + asyncio.run(func()) + self.assertEqual(round(ts.elapsed_time, 1), 3.3) + actual = str(fail.exception) + expected = "Simulated expected error" + self.assert_equal(actual, expected) + + def test_async_retry3(self) -> None: + """ + Test when the raised exception is not in the list of expected + exceptions. + """ + self.exception_count = 0 + num_attempts = 3 + retry_delay_in_sec = 1 + + @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) + async def func() -> None: + if self.exception_count < num_attempts - 1: + self.exception_count += 1 + await asyncio.sleep(0.1) + raise IndexError("Simulated non expected error") + _LOG.debug("All good") + + with self.assertRaises(IndexError) as fail: + asyncio.run(func()) + actual = str(fail.exception) + expected = "Simulated non expected error" + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py new file mode 100644 index 000000000..8f9dd84df --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py @@ -0,0 +1,597 @@ +import logging +import os +from typing import Generator, Tuple + +import pytest + +import helpers.hio as hio +import helpers.hmoto as hmoto +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestReplaceStarWithDoubleStar +# ############################################################################# + + +class TestReplaceStarWithDoubleStar(hunitest.TestCase): + def test1(self) -> None: + """ + Test non replacement of a single asterisk at the end of the path. + """ + pattern_to_modify = "s3://bucket/path/*" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/path/*") + + def test2(self) -> None: + """ + Test replacement of a single asterisk within the path. + """ + pattern_to_modify = "s3://bucket/path/*/file" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/path/**/*/file") + + def test3(self) -> None: + """ + Test no replacement when there are no asterisks in the path. + """ + pattern_to_modify = "s3://bucket/path/file" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/path/file") + + def test4(self) -> None: + """ + Test replacement when multiple asterisk are in the path. + """ + pattern_to_modify = "s3://bucket/*/path/*" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/**/*/path/*") + + def test5(self) -> None: + """ + Test non-replacement of asterisk at the end of the path in a special + case. + """ + pattern_to_modify = "s3://bucket/*/path/csv*" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/**/*/path/csv*") + + +# ############################################################################# +# TestToFileAndFromFile1 +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +@pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", +) +class TestToFileAndFromFile1(hmoto.S3Mock_TestCase): + def write_read_helper(self, file_name: str, force_flush: bool) -> None: + # Prepare inputs. + file_content = "line_mock1\nline_mock2\nline_mock3" + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + s3_path = f"s3://{self.bucket_name}/{file_name}" + # Save file. + # TODO(Nikola): Is it possible to verify `force_flush`? + hs3.to_file( + file_content, + s3_path, + aws_profile=moto_s3fs, + force_flush=force_flush, + ) + # Read file. + saved_content = hs3.from_file(s3_path, aws_profile=moto_s3fs) + # Check output. + expected = r"""line_mock1 + line_mock2 + line_mock3""" + self.assert_equal(saved_content, expected, fuzzy_match=True) + + # ######################################################################### + + def test_to_file_and_from_file1(self) -> None: + """ + Verify that regular `.txt` file is saved/read on S3. + """ + # Prepare inputs. + regular_file_name = "mock.txt" + force_flush = False + self.write_read_helper(regular_file_name, force_flush) + + def test_to_file_and_from_file2(self) -> None: + """ + Verify that compressed (e.g,`.gz`,`gzip`) file is saved/read on S3. + """ + # Prepare inputs. + gzip_file_name = "mock.gzip" + force_flush = True + self.write_read_helper(gzip_file_name, force_flush) + + def test_to_file_invalid1(self) -> None: + """ + Verify that only binary mode is allowed. + """ + # Prepare inputs. + regular_file_name = "mock.txt" + regular_file_content = "line_mock1\nline_mock2\nline_mock3" + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + s3_path = f"s3://{self.bucket_name}/{regular_file_name}" + # Save file with `t` mode. + with self.assertRaises(ValueError) as fail: + hs3.to_file( + regular_file_content, s3_path, mode="wt", aws_profile=moto_s3fs + ) + # Check output. + actual = str(fail.exception) + expected = r"S3 only allows binary mode!" + self.assert_equal(actual, expected) + + def test_from_file_invalid1(self) -> None: + """ + Verify that encoding is not allowed. + """ + # Prepare inputs. + regular_file_name = "mock.txt" + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + s3_path = f"s3://{self.bucket_name}/{regular_file_name}" + # Read with encoding. + with self.assertRaises(ValueError) as fail: + hs3.from_file(s3_path, encoding=True, aws_profile=moto_s3fs) + # Check output. + actual = str(fail.exception) + expected = r"Encoding is not supported when reading from S3!" + self.assert_equal(actual, expected) + + +# ############################################################################# +# TestListdir1 +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +@pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", +) +class TestListdir1(hmoto.S3Mock_TestCase): + def prepare_test_data(self) -> Tuple[str, hs3.AwsProfile]: + bucket_s3_path = f"s3://{self.bucket_name}" + depth_one_s3_path = f"{bucket_s3_path}/depth_one" + # Prepare test files. + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + first_s3_path = f"{depth_one_s3_path}/mock1.txt" + lines = [b"line_mock1"] + with moto_s3fs.open(first_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + second_s3_path = f"{depth_one_s3_path}/mock2.gzip" + with moto_s3fs.open(second_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + # Prepare test directories. + # `moto_s3fs.mkdir` is useless as empty directory is not visible. + # There must be at least one file in the directory to be visible. + regular_dir_s3_path = f"{depth_one_s3_path}/mock" + additional_file_s3_path = f"{regular_dir_s3_path}/regular_mock3.txt" + with moto_s3fs.open(additional_file_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + git_dir_s3_path = f"s3://{bucket_s3_path}/.git" + additional_file_s3_path = f"{git_dir_s3_path}/git_mock3.txt" + with moto_s3fs.open(additional_file_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + return bucket_s3_path, moto_s3fs + + # ######################################################################### + + def test_listdir1(self) -> None: + """ + Verify that all paths are found. + """ + bucket_s3_path, moto_s3fs = self.prepare_test_data() + pattern = "*" + only_files = False + use_relative_paths = False + paths = hs3.listdir( + bucket_s3_path, + pattern, + only_files, + use_relative_paths, + aws_profile=moto_s3fs, + exclude_git_dirs=False, + ) + paths.sort() + expected_paths = [ + "mock_bucket/.git", + "mock_bucket/.git/git_mock3.txt", + "mock_bucket/depth_one", + "mock_bucket/depth_one/mock", + "mock_bucket/depth_one/mock/regular_mock3.txt", + "mock_bucket/depth_one/mock1.txt", + "mock_bucket/depth_one/mock2.gzip", + ] + self.assertListEqual(paths, expected_paths) + + def test_listdir2(self) -> None: + """ + Verify that all relative paths are found. + """ + bucket_s3_path, moto_s3fs = self.prepare_test_data() + # Exclude `.git` by going level below. + bucket_s3_path = os.path.join(bucket_s3_path, "depth_one") + pattern = "*" + only_files = False + use_relative_paths = True + paths = hs3.listdir( + bucket_s3_path, + pattern, + only_files, + use_relative_paths, + aws_profile=moto_s3fs, + exclude_git_dirs=False, + ) + paths.sort() + expected_paths = [ + "mock", + "mock/regular_mock3.txt", + "mock1.txt", + "mock2.gzip", + ] + self.assertListEqual(paths, expected_paths) + + def test_listdir3(self) -> None: + """ + Verify that all paths are found, except `.git` ones. + """ + bucket_s3_path, moto_s3fs = self.prepare_test_data() + pattern = "*" + only_files = False + use_relative_paths = False + paths = hs3.listdir( + bucket_s3_path, + pattern, + only_files, + use_relative_paths, + aws_profile=moto_s3fs, + ) + paths.sort() + expected_paths = [ + "mock_bucket/depth_one", + "mock_bucket/depth_one/mock", + "mock_bucket/depth_one/mock/regular_mock3.txt", + "mock_bucket/depth_one/mock1.txt", + "mock_bucket/depth_one/mock2.gzip", + ] + self.assertListEqual(paths, expected_paths) + + def test_listdir4(self) -> None: + """ + Verify that all file paths are found. + """ + bucket_s3_path, moto_s3fs = self.prepare_test_data() + pattern = "*" + only_files = True + use_relative_paths = False + paths = hs3.listdir( + bucket_s3_path, + pattern, + only_files, + use_relative_paths, + aws_profile=moto_s3fs, + exclude_git_dirs=False, + ) + paths.sort() + expected_paths = [ + "mock_bucket/.git/git_mock3.txt", + "mock_bucket/depth_one/mock/regular_mock3.txt", + "mock_bucket/depth_one/mock1.txt", + "mock_bucket/depth_one/mock2.gzip", + ] + self.assertListEqual(paths, expected_paths) + + +# ############################################################################# +# TestDu1 +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +@pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", +) +class TestDu1(hmoto.S3Mock_TestCase): + def test_du1(self) -> None: + """ + Verify that total file size is returned. + """ + bucket_s3_path = f"s3://{self.bucket_name}" + depth_one_s3_path = f"{bucket_s3_path}/depth_one" + # Prepare test files. + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + first_s3_path = f"{bucket_s3_path}/mock1.txt" + lines = [b"line_mock\n"] * 150 + with moto_s3fs.open(first_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + second_s3_path = f"{depth_one_s3_path}/mock2.txt" + with moto_s3fs.open(second_s3_path, "wb") as s3_file: + # One level deeper to test recursive `du`. + s3_file.writelines(lines) + # Get multiple files. + size = hs3.du(bucket_s3_path, aws_profile=moto_s3fs) + expected_size = 3000 + self.assertEqual(size, expected_size) + size = hs3.du(depth_one_s3_path, aws_profile=moto_s3fs) + expected_size = 1500 + self.assertEqual(size, expected_size) + # Get exactly one file. + size = hs3.du(second_s3_path, aws_profile=moto_s3fs) + self.assertEqual(size, expected_size) + # Verify size in human-readable form. + size = hs3.du(bucket_s3_path, human_format=True, aws_profile=moto_s3fs) + expected_size = r"2.9 KB" + self.assert_equal(size, expected_size) + + +# ############################################################################# +# TestGenerateAwsFiles +# ############################################################################# + + +class TestGenerateAwsFiles(hunitest.TestCase): + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test(self) -> None: + self.setUp() + os.environ["MOCK_AWS_ACCESS_KEY_ID"] = "mock_access_key" + os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] = "mock_secret_access_key" + os.environ["MOCK_AWS_SESSION_TOKEN"] = "mock_session_token" + os.environ["MOCK_AWS_S3_BUCKET"] = "mock_s3_bucket" + os.environ["MOCK_AWS_DEFAULT_REGION"] = "mock_default_region" + # + os.environ["TEST_AWS_ACCESS_KEY_ID"] = "test_access_key" + os.environ["TEST_AWS_SECRET_ACCESS_KEY"] = "test_secret_access_key" + os.environ["TEST_AWS_SESSION_TOKEN"] = "test_session_token" + os.environ["TEST_AWS_S3_BUCKET"] = "test_s3_bucket" + os.environ["TEST_AWS_DEFAULT_REGION"] = "test_default_region" + # Generate AWS files with mock AWS profiles. + self._scratch_test_dir = self.get_scratch_space() + aws_profiles = ["mock", "test"] + hs3.generate_aws_files( + home_dir=self._scratch_test_dir, aws_profiles=aws_profiles + ) + + def tear_down_test(self) -> None: + del os.environ["MOCK_AWS_ACCESS_KEY_ID"] + del os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] + del os.environ["MOCK_AWS_SESSION_TOKEN"] + del os.environ["MOCK_AWS_S3_BUCKET"] + del os.environ["MOCK_AWS_DEFAULT_REGION"] + # + del os.environ["TEST_AWS_ACCESS_KEY_ID"] + del os.environ["TEST_AWS_SECRET_ACCESS_KEY"] + del os.environ["TEST_AWS_SESSION_TOKEN"] + del os.environ["TEST_AWS_S3_BUCKET"] + del os.environ["TEST_AWS_DEFAULT_REGION"] + + def helper(self, file_name: str, expected: str) -> None: + # Check. + target_dir = os.path.join(self._scratch_test_dir, ".aws") + actual = hio.from_file(os.path.join(target_dir, file_name)) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Check that AWS credentials file is generated correctly. + """ + file_name = "credentials" + expected = r""" + [mock] + aws_access_key_id=mock_access_key + aws_secret_access_key=mock_secret_access_key + aws_session_token=mock_session_token + aws_s3_bucket=mock_s3_bucket + + [test] + aws_access_key_id=test_access_key + aws_secret_access_key=test_secret_access_key + aws_session_token=test_session_token + aws_s3_bucket=test_s3_bucket + """ + self.helper(file_name, expected) + + def test2(self) -> None: + """ + Check that AWS config file is generated correctly. + """ + file_name = "config" + expected = """ + [profile mock] + region=mock_default_region + + [profile test] + region=test_default_region + """ + self.helper(file_name, expected) + + +# ############################################################################# + + +# ############################################################################# +# Test_get_s3_bucket_from_stage +# ############################################################################# + + +class Test_get_s3_bucket_from_stage(hunitest.TestCase): + def test1(self) -> None: + """ + Check for a valid stage. + """ + # Define arguments. + stage = "test" + # Run. + actual = hs3.get_s3_bucket_from_stage(stage) + expected = "cryptokaizen-data-test" + self.assert_equal(actual, expected) + + def test2(self) -> None: + """ + Check for a valid stage and optional suffix. + """ + # Define arguments. + stage = "preprod" + suffix = "suffix_test" + # Run. + actual = hs3.get_s3_bucket_from_stage(stage, add_suffix=suffix) + expected = "cryptokaizen-data.preprod/suffix_test" + self.assert_equal(actual, expected) + + def test3(self) -> None: + """ + Check Invalid stage. + """ + # Define arguments. + stage = "Invalid" + # Run. + with self.assertRaises(AssertionError) as cm: + hs3.get_s3_bucket_from_stage(stage) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 'Invalid' in '{'test': 'cryptokaizen-data-test', 'preprod': 'cryptokaizen-data.preprod', 'prod': 'cryptokaizen-data'}' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_s3_get_credentials1 +# ############################################################################# + + +@pytest.mark.requires_aws +@pytest.mark.requires_ck_infra +class Test_s3_get_credentials1(hunitest.TestCase): + def test1(self) -> None: + res = hs3.get_aws_credentials(_AWS_PROFILE) + _LOG.debug("res=%s", str(res)) + + +# ############################################################################# +# Test_s3_functions1 +# ############################################################################# + + +class Test_s3_functions1(hunitest.TestCase): + def test_extract_bucket_from_path1(self) -> None: + path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "tmp/TestCachingOnS3.test_with_caching1/joblib", + ) + bucket, path = hs3.split_path(path) + self.assert_equal(bucket, "cryptokaizen-unit-test") + self.assert_equal(path, "/tmp/TestCachingOnS3.test_with_caching1/joblib") + + +# ############################################################################# +# Test_s3_1 +# ############################################################################# + + +@pytest.mark.requires_aws +@pytest.mark.requires_ck_infra +class Test_s3_1(hunitest.TestCase): + def test_ls1(self) -> None: + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "README.md", + ) + _LOG.debug("file_path=%s", file_path) + # > aws s3 ls s3://***** + # PRE data/ + # 2021-04-06 1:17:44 48 README.md + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_names = s3fs.ls(file_path) + _LOG.debug("file_names=%s", file_names) + self.assertGreater(len(file_names), 0) + + @pytest.mark.requires_aws + @pytest.mark.requires_ck_infra + def test_glob1(self) -> None: + # > aws s3 ls s3://alphamatic-data/data/ib/metadata/ + # 2021-04-26 08:39:00 18791 exchanges-2021-04-01-134738089177.csv + # 2021-04-26 08:39:00 18815 exchanges-2021-04-01-143112738505.csv + # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-134738089177.csv + # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-143112738505.csv + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "data/ib/metadata", + ) + glob_pattern = file_path + "/exchanges-*" + _LOG.debug("glob_pattern=%s", glob_pattern) + file_names = s3fs.glob(glob_pattern) + _LOG.debug("file_names=%s", file_names) + self.assertGreater(len(file_names), 0) + + @pytest.mark.requires_aws + @pytest.mark.requires_ck_infra + def test_exists1(self) -> None: + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "README.md", + ) + _LOG.debug("file_path=%s", file_path) + actual = s3fs.exists(file_path) + expected = True + self.assertEqual(actual, expected) + + @pytest.mark.requires_aws + @pytest.mark.requires_ck_infra + def test_exists2(self) -> None: + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "README_does_not_exist.md", + ) + _LOG.debug("file_path=%s", file_path) + actual = s3fs.exists(file_path) + expected = False + self.assertEqual(actual, expected) + + @pytest.mark.requires_aws + @pytest.mark.requires_ck_infra + def test_exists3(self) -> None: + # > aws s3 ls alphamatic-data/data/ib/metadata/symbols-2021-04-01-143112738505.csv + # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-143112738505.csv + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "data/ib/metadata/symbols-2021-04-01-143112738505.csv", + ) + _LOG.debug("file_path=%s", file_path) + actual = s3fs.exists(file_path) + expected = True + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py new file mode 100644 index 000000000..cc046ddac --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py @@ -0,0 +1,209 @@ +# TODO(gp): Use pytest.import_skip instead of all this machinery. +_HAS_MOTO = True +try: + import moto +except ImportError: + # `moto` may not be installed in a non-cmamp repo, so we skip it (see "DevTools376: + # Break 2022-02-22"). + import helpers.hgit as hgit + + assert not hgit.is_cmamp(), ( + "`cmamp` should have moto, while other repos are allowed to not have it)" + ) + _HAS_MOTO = False + +if _HAS_MOTO: + import json + import logging + import unittest.mock as umock + + import boto3 + import botocore + import pytest + + import helpers.hgit as hgit + import helpers.hs3 as hs3 + import helpers.hsecrets as hsecret + import helpers.hserver as hserver + import helpers.hunit_test as hunitest + + _LOG = logging.getLogger(__name__) + + # The `mock_aws` decorator ensures the calls to the AWS API are + # mocked. + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + class TestCreateClient(hunitest.TestCase): + def test_create_client1(self) -> None: + """ + Simple smoke test to verify connection to AWS. + """ + client = hsecret.get_secrets_client(aws_profile="ck") + self.assertIsInstance(client, botocore.client.BaseClient) + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + class TestGetSecret(hunitest.TestCase): + @moto.mock_aws + def test_get_secret(self) -> None: + """ + Verify that the secret can be retrieved correctly. + """ + # Make sure the region name matches the one used in `hsecret` profile. + client = boto3.client( + "secretsmanager", region_name=hs3.AWS_EUROPE_REGION_1 + ) + secret = {"testkey": "testvalue"} + secret_name = "test.local.sandbox.1" + client.create_secret( + Name=secret_name, SecretString=json.dumps(secret) + ) + self.assertDictEqual(hsecret.get_secret(secret_name), secret) + + @moto.mock_aws + @pytest.mark.skip( + reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." + ) + def test_trading_key(self) -> None: + """ + Verify locking mechanism for trading key is processed correctly. + """ + # Define test params. + secret_value = {"test.trading.key": "test.trading.value"} + secret_name = "test.trading.sandbox.1" + usedBy = "pytest" + hsecret.store_secret(secret_name, secret_value) + # Define expected values. + usedBy = hsecret._get_flag_value(usedBy) + expected = f"Secret key is already in use by {usedBy}" + # Call get secret to lock the key. + _ = hsecret.get_secret(secret_name) + # Recall get secret for same key to verify the lock. + try: + hsecret.get_secret(secret_name) + except RuntimeError as rte: + actual = str(rte) + self.assert_equal(actual, expected, fuzzy_match=True) + + @moto.mock_aws + @pytest.mark.skip( + reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." + ) + def test_lock_for_different_script(self) -> None: + """ + Verify locking mechanism for access to trading key is passed if + scripts are different. + """ + # Define test params. + secret_value = {"test.trading.key": "test.trading.value"} + secret_name = "test.trading.sandbox.1" + script1 = "pytest" + script2 = "run_system_observer.py" + hsecret.store_secret(secret_name, secret_value) + # Call get secret to lock the key with testing script. + _ = hsecret.get_secret(secret_name) + usedBy1 = hsecret._get_flag_value(script1) + # Define expected values. + usedBy2 = hsecret._get_flag_value(script2) + # Update secret value with expected usedBy script names. + secret_value["usedBy"] = [usedBy1, usedBy2] + # Call get secret for same key to verify the lock for mocked script. + with umock.patch("sys.argv", [script2]): + actual = hsecret.get_secret(secret_name) + self.assert_equal( + str(actual), expected=str(secret_value), fuzzy_match=True + ) + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + class TestStoreSecret(hunitest.TestCase): + @moto.mock_aws + def test_store_secret1(self) -> None: + """ + Verify that a secret can be stored correctly. + """ + secret = {"testkey": "testvalue"} + secret_name = "test.local.sandbox.1" + hsecret.store_secret(secret_name, secret) + # Make sure the region name matches the one used in `hsecret`. + client = boto3.client( + "secretsmanager", region_name=hs3.AWS_EUROPE_REGION_1 + ) + test_secret_value = json.loads( + client.get_secret_value(SecretId=secret_name)["SecretString"] + ) + self.assertDictEqual(test_secret_value, secret) + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + @pytest.mark.skip( + reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." + ) + class TestLockSecret(hunitest.TestCase): + @moto.mock_aws + def test_lock_secret(self) -> None: + """ + Verify that the lock secret function locks the key. + """ + # Define test params. + secret = {"testkey": "testvalue"} + secret_name = "test.local.sandbox.1" + hsecret.store_secret(secret_name, secret) + usedBy = "pytest" + # Lock the stored secret. + hsecret.lock_secret(secret_name, secret) + # Retry locking the same secret. + try: + hsecret.lock_secret(secret_name, secret) + except RuntimeError as rte: + usedBy = hsecret._get_flag_value(usedBy) + expected = f"Secret key is already in use by {usedBy}" + actual = str(rte) + self.assert_equal(actual, expected, fuzzy_match=True) + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + @pytest.mark.skip( + reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." + ) + class TestUpdateUsedby(hunitest.TestCase): + @moto.mock_aws + def test1(self) -> None: + """ + Verify that update_usedby updates value in secrets manager. + """ + # Define test params. + secret_value = {"testkey": "testvalue"} + secret_name = "test.local.sandbox.1" + usedBy = "pytest" + hsecret.store_secret(secret_name, secret_value) + # Define expected value. + expected = r""" + {'testkey': 'testvalue', 'usedBy': ['pytest']} + """ + # Run. + hsecret.update_usedby(secret_name, secret_value, usedBy) + actual = hsecret.get_secret(secret_name) + # Verify. + self.assert_equal(str(actual), expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py new file mode 100644 index 000000000..3e6a1ba7d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py @@ -0,0 +1,321 @@ +import logging + +import pytest + +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# _TestCase1 +# ############################################################################# + + +class _TestCase1: + # def test_config_func_to_str1(self) -> None: + # val = hserver.config_func_to_str() + # _LOG.info("val=\n%s", val) + # if self.exp_config_func_to_str is not None: + # self.assert_equal(val, self.exp_config_func_to_str) + + def test_consistency1(self) -> None: + hserver._dassert_setup_consistency() + + def test_is_host_csfy_server1(self) -> None: + val = hserver.is_host_csfy_server() + _LOG.info("val=\n%s", val) + if self.exp_is_host_csfy_server is not None: + self.assertEqual(val, self.exp_is_host_csfy_server) + + def test_is_host_mac1(self) -> None: + val = hserver.is_host_mac() + _LOG.info("val=\n%s", val) + if self.exp_is_host_mac is not None: + self.assertEqual(val, self.exp_is_host_mac) + + def test_get_docker_info1(self) -> None: + val = hserver.get_docker_info() + _LOG.info("val=\n%s", val) + # Remove the docker version since it is not stable. + val = hprint.filter_text("docker_version=", val) + if self.exp_get_docker_info is not None: + self.assert_equal(val, self.exp_get_docker_info) + + def test_get_setup_settings1(self) -> None: + setups = hserver._get_setup_settings() + val = hserver._setup_to_str(setups) + _LOG.info("val=\n%s", val) + if self.exp_get_setup_settings is not None: + self.assert_equal(val, self.exp_get_setup_settings) + + # def test_get_setup_signature1(self) -> None: + # val = hserver._get_setup_signature() + # _LOG.info("val=\n%s", val) + # if self.exp_get_setup_signature is not None: + # self.assert_equal(val, self.exp_get_setup_signature) + + def test_is_inside_ci1(self) -> None: + val = hserver.is_inside_ci() + _LOG.info("val=\n%s", val) + if self.exp_is_inside_ci is not None: + self.assertEqual(val, self.exp_is_inside_ci) + + +# ############################################################################# +# Test_hserver1 +# ############################################################################# + + +class Test_hserver1(_TestCase1, hunitest.TestCase): + """ + Smoke test without checking anything. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = None + self.exp_get_docker_info = None + self.exp_get_setup_settings = None + self.exp_get_setup_signature = None + self.exp_is_host_csfy_server = None + self.exp_is_host_mac = None + self.exp_is_inside_ci = None + + +# ############################################################################# +# Test_hserver_inside_ci1 +# ############################################################################# + + +@pytest.mark.skipif( + not hserver.is_inside_ci(), + reason="Config not matching", +) +class Test_hserver_inside_ci1(_TestCase1, hunitest.TestCase): + """ + Run tests inside CI. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = None + self.exp_get_docker_info = hprint.dedent(r""" + Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_docker_sibling_containers_support=True + has_docker_children_containers_support=True + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci True + """) + self.exp_get_setup_signature = None + self.exp_is_host_csfy_server = False + self.exp_is_host_mac = False + self.exp_is_inside_ci = True + + +# ############################################################################# +# Test_hserver_inside_docker_container_on_csfy_server1 +# ############################################################################# + + +@pytest.mark.skipif( + not hserver.is_inside_docker_container_on_csfy_server(), + reason="Config not matching", +) +class Test_hserver_inside_docker_container_on_csfy_server1( + _TestCase1, hunitest.TestCase +): + """ + Run tests inside Docker container on a Causify dev server. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_docker_sibling_containers_support=True + has_docker_children_containers_support=True + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server True + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci False + """) + self.exp_get_setup_signature = "" + self.exp_is_host_csfy_server = True + self.exp_is_host_mac = False + self.exp_is_inside_ci = False + + +# ############################################################################# +# Test_hserver_outside_docker_container_on_csfy_server1 +# ############################################################################# + + +@pytest.mark.skipif( + not hserver.is_outside_docker_container_on_csfy_server(), + reason="Config not matching", +) +class Test_hserver_outside_docker_container_on_csfy_server1( + _TestCase1, hunitest.TestCase +): + """ + Run tests outside Docker container on a Causify dev server. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=False + has_docker_sibling_containers_support=*undef* + has_docker_children_containers_support=*undef* + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server True + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci False + """) + self.exp_get_setup_signature = "" + self.exp_is_host_csfy_server = True + self.exp_is_host_mac = False + self.exp_is_inside_ci = False + + +# ############################################################################# +# Test_hserver_inside_docker_container_on_gp_mac1 +# ############################################################################# + + +@pytest.mark.skipif( + not (hserver.is_inside_docker() and hserver.is_host_gp_mac()), + reason="Config not matching", +) +class Test_hserver_inside_docker_container_on_gp_mac1( + _TestCase1, hunitest.TestCase +): + """ + Run tests inside Docker container on GP's Mac. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_docker_sibling_containers_support=True + has_docker_children_containers_support=True + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac True + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci False + """) + self.exp_get_setup_signature = "" + self.exp_is_host_csfy_server = False + self.exp_is_host_mac = True + self.exp_is_inside_ci = False + + +# ############################################################################# +# Test_hserver_outside_docker_container_on_gp_mac1 +# ############################################################################# + + +@pytest.mark.skipif( + not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), + reason="Config not matching", +) +class Test_hserver_outside_docker_container_on_gp_mac1( + _TestCase1, hunitest.TestCase +): + """ + Run tests outside Docker container on GP's Mac. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=False + has_docker_sibling_containers_support=*undef* + has_docker_children_containers_support=*undef* + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac True + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci False + """) + self.exp_get_setup_signature = "" + self.exp_is_host_csfy_server = False + self.exp_is_host_mac = True + self.exp_is_inside_ci = False + + +# ############################################################################# + + +# TODO(gp): Add test mocking the environment variables in _get_setup_signature. +# We should have one class for each set up (e.g., outside Mac, outside Linux, +# inside Docker, inside CI, etc.) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py new file mode 100644 index 000000000..998b65c86 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py @@ -0,0 +1,81 @@ +import os +import unittest.mock as umock + +import helpers.hslack as hslack +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestSlackNotifier +# ############################################################################# + + +class TestSlackNotifier(hunitest.TestCase): + def test1(self) -> None: + """ + Check that `SlackNotifier` initializes with provided bot token. + """ + # Create notifier with explicit token. + notifier = hslack.SlackNotifier(bot_token="xoxb-test1-token") + self.assertEqual(notifier.bot_token, "xoxb-test1-token") + + def test2(self) -> None: + """ + Check that `SlackNotifier` initializes with environment variable token. + """ + # Mock environment variable and create notifier. + with umock.patch.dict( + os.environ, {"SLACK_BOT_TOKEN": "xoxb-test2-token"} + ): + notifier = hslack.SlackNotifier() + self.assertEqual(notifier.bot_token, "xoxb-test2-token") + + def test3(self) -> None: + """ + Check that `SlackNotifier` raises `ValueError` when no token is + provided. + """ + # Clear environment and verify initialization fails. + with umock.patch.dict(os.environ, {}, clear=True): + with self.assertRaises(ValueError) as cm: + hslack.SlackNotifier() + self.assertIn("No bot token provided", str(cm.exception)) + + def test4(self) -> None: + """ + Check that `send_message()` successfully sends message to Slack + channel. + """ + # Mock successful Slack API response. + with umock.patch("helpers.hslack.requests.post") as mock_post: + mock_response = umock.MagicMock() + mock_response.json.return_value = {"ok": True} + mock_response.raise_for_status.return_value = None + mock_post.return_value = mock_response + # Send message and verify API call. + notifier = hslack.SlackNotifier(bot_token="xoxb-test4-token") + notifier.send_message("#test4", "test4 message content") + # Verify request parameters. + mock_post.assert_called_once() + _, kwargs = mock_post.call_args + self.assertEqual(kwargs["json"]["channel"], "#test4") + self.assertEqual(kwargs["json"]["text"], "test4 message content") + + def test5(self) -> None: + """ + Check that `send_message()` raises `ValueError` on Slack API error. + """ + # Mock Slack API error response. + with umock.patch("helpers.hslack.requests.post") as mock_post: + mock_response = umock.MagicMock() + mock_response.json.return_value = { + "ok": False, + "error": "channel_not_found", + } + mock_response.raise_for_status.return_value = None + mock_post.return_value = mock_response + # Verify error is raised with correct message. + notifier = hslack.SlackNotifier(bot_token="xoxb-test5-token") + with self.assertRaises(ValueError) as cm: + notifier.send_message("#test5", "test5 message content") + self.assertIn("channel_not_found", str(cm.exception)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py new file mode 100644 index 000000000..f6adba2f6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py @@ -0,0 +1,29 @@ +import helpers.hsql as hsql +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestCreateInOperator +# ############################################################################# + + +class TestCreateInOperator(hunitest.TestCase): + def test_create_in_operator1(self) -> None: + """ + Test creating IN operator for more than one value. + """ + values = ["binance", "ftx"] + column = "exchange_id" + actual = hsql.create_in_operator(values, column) + expected = "exchange_id IN ('binance','ftx')" + self.assertEqual(actual, expected) + + def test_create_in_operator2(self) -> None: + """ + Test creating IN operator for one value. + """ + values = ["ftx"] + column = "exchange_id" + actual = hsql.create_in_operator(values, column) + expected = "exchange_id IN ('ftx')" + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py new file mode 100644 index 000000000..1e5b4ff01 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py @@ -0,0 +1,270 @@ +import os +from typing import List, Tuple + +import helpers.hio as hio +import helpers.hstring as hstring +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestExtractVersionFromFileName +# ############################################################################# + + +class TestExtractVersionFromFileName(hunitest.TestCase): + def _test_extract_version_from_file_name( + self, version: str, expected: Tuple[int, int] + ) -> None: + """ + Verify function provides expected output on valid inputs. + + :param version: version in string format to input, e.g. 1.0 + :param expected: expected output version in (major, minor) + format + """ + fn = f"/app/datapull/ccxt/universe/download/universe_v{version}.json" + self.assertEqual(hstring.extract_version_from_file_name(fn), expected) + + def test_extract_version_from_file_name1(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("1.1", (1, 1)) + + def test_extract_version_from_file_name2(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("4", (4, 0)) + + def test_extract_version_from_file_name3(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("1.0", (1, 0)) + + def test_extract_version_from_file_name4(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("3.11", (3, 11)) + + def test_extract_version_from_file_name5(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("16.2", (16, 2)) + + def test_extract_version_from_file_name6(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("25.11", (25, 11)) + + def _test_extract_version_from_file_name_incorrect_format( + self, file_name: str + ) -> None: + """ + Helper function to verify function raises AssertionError on incorrect + input format. + + :param file_name: incorrect file_name to test + """ + expected_fail = "Can't parse file" + with self.assertRaises(AssertionError) as fail: + _ = hstring.extract_version_from_file_name(file_name) + self.assertIn(expected_fail, str(fail.exception)) + + def test_extract_version_from_file_name_incorrect_format1(self) -> None: + """ + Verify function raises AssertionError on incorrect input format. + """ + self._test_extract_version_from_file_name_incorrect_format("incorrect") + + def test_extract_version_from_file_name_incorrect_format2(self) -> None: + """ + Verify function raises AssertionError on incorrect input format. + """ + self._test_extract_version_from_file_name_incorrect_format( + "universe_vxx.json" + ) + + def test_extract_version_from_file_name_incorrect_format3(self) -> None: + """ + Verify function raises AssertionError on incorrect input format. + """ + self._test_extract_version_from_file_name_incorrect_format( + "universe_v.1.json" + ) + + def test_extract_version_from_file_name_incorrect_format4(self) -> None: + """ + Verify function raises AssertionError on incorrect input format. + """ + self._test_extract_version_from_file_name_incorrect_format( + "universe_11.json" + ) + + +# ############################################################################# +# TestGetDocstringLineIndices +# ############################################################################# + + +class TestGetDocstringLineIndices(hunitest.TestCase): + """ + Test determining which code lines are inside (doc)strings. + """ + + def helper(self, code: str, expected: List[str]) -> None: + lines = code.split("\n") + actual_idxs = hstring.get_docstring_line_indices(lines) + actual = [lines[i].strip() for i in actual_idxs] + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test one type of quotes. + """ + code = """ + def test_assert_equal1(self) -> None: + ''' + Test one. + ''' + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + s = ''' + Inside a string. + ''' + d = '''Does not count''' + self.check_string(actual) + + """ + expected = ["'''", "Test one.", "s = '''", "Inside a string."] + self.helper(code, expected) + + def test2(self) -> None: + """ + Test the second type of quotes. + """ + code = ''' + def test_assert_equal1(self) -> None: + """ + Test one. + """ + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + s = """ + Inside a string. + """ + d = """Does not count""" + self.check_string(actual) + + ''' + expected = ['"""', "Test one.", 's = """', "Inside a string."] + self.helper(code, expected) + + def test3(self) -> None: + """ + Test quotes within quotes. + """ + code = """ + def test_assert_equal1(self) -> None: + ''' + Test one. + """ + code += '''\ +""" + String within "Test one". + """ + ''' + code += """\ +''' + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + s = ''' + Inside a string. + ''' + d = '''Does not count''' + self.check_string(actual) + + """ + expected = [ + "'''", + "Test one.", + '"""', + 'String within "Test one".', + '"""', + "s = '''", + "Inside a string.", + ] + self.helper(code, expected) + + +# ############################################################################# +# TestGetCodeBlockLineIndices +# ############################################################################# + + +class TestGetCodeBlockLineIndices(hunitest.TestCase): + def helper(self, code: str, expected: List[str]) -> None: + lines = code.split("\n") + actual_idxs = hstring.get_code_block_line_indices(lines) + actual = [lines[i].strip() for i in actual_idxs] + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test getting code block line indices. + """ + code = """ + def test_assert_equal1(self) -> None: + ``` + Test one. + ``` + d = ```Does not count``` + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + """ + expected = ["```", "Test one."] + self.helper(code, expected) + + +# ############################################################################# +# TestGetDocstrings +# ############################################################################# + + +class TestGetDocstrings(hunitest.TestCase): + def test1(self) -> None: + """ + Test that grouped lines within docstrings are correctly returned. + """ + # Prepare inputs. + test_get_docstring_lines_input_dir = self.get_input_dir() + text_file_path = os.path.join( + test_get_docstring_lines_input_dir, "test.txt" + ) + text = hio.from_file(text_file_path) + lines = text.splitlines() + # Run. + actual = hstring.get_docstrings(lines) + # Check. + expected = [ + [1, 2, 3, 4, 5, 6], + [11, 12, 13, 14, 15, 16], + ] + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py new file mode 100644 index 000000000..4d2431bca --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py @@ -0,0 +1,494 @@ +import logging +import os +import platform +import re +import tempfile +from typing import List + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur + +_LOG = logging.getLogger(__name__) + + +def _get_ls_error_message(filename: str = "this_file_doesnt_exist") -> str: + """ + Get the expected error message for ls command for the current OS. + + :param filename: The filename that doesn't exist + """ + if platform.system() == "Darwin": + return f"ls: {filename}: No such file or directory" + elif platform.system() == "Linux": + return f"ls: cannot access '{filename}': No such file or directory" + raise RuntimeError(f"Unsupported OS: {platform.system()}") + +# ############################################################################# + + +# ############################################################################# +# Test_system1 +# ############################################################################# + + +class Test_system1(hunitest.TestCase): + def test1(self) -> None: + hsystem.system("ls") + + def test2(self) -> None: + hsystem.system("ls /dev/null", suppress_output=False) + + def test3(self) -> None: + """ + Output to a file. + """ + with tempfile.NamedTemporaryFile() as fp: + temp_file_name = fp.name + _LOG.debug("temp_file_name=%s", temp_file_name) + hsystem.system("ls", output_file=temp_file_name) + hdbg.dassert_path_exists(temp_file_name) + + def test4(self) -> None: + """ + Tee to a file. + """ + with tempfile.NamedTemporaryFile() as fp: + temp_file_name = fp.name + _LOG.debug("temp_file_name=%s", temp_file_name) + hsystem.system("ls", output_file=temp_file_name, tee=True) + hdbg.dassert_path_exists(temp_file_name) + + def test5(self) -> None: + """ + Test dry_run. + """ + temp_file_name = tempfile._get_default_tempdir() # type: ignore + candidate_name = tempfile._get_candidate_names() # type: ignore + temp_file_name += "/" + next(candidate_name) + _LOG.debug("temp_file_name=%s", temp_file_name) + hsystem.system("ls", output_file=temp_file_name, dry_run=True) + hdbg.dassert_path_not_exists(temp_file_name) + + def test6(self) -> None: + """ + Test abort_on_error=True. + """ + hsystem.system("ls this_file_doesnt_exist", abort_on_error=False) + + def test7(self) -> None: + """ + Test abort_on_error=True (default). + """ + with self.assertRaises(RuntimeError) as cm: + hsystem.system("ls this_file_doesnt_exist") + actual = str(cm.exception) + # Different systems return different rc. + actual = re.sub(r"rc='\d+'", "rc=''", actual) + # Use OS-specific expected error message. + error_msg = _get_ls_error_message() + expected = f""" + ################################################################################ + ################################################################################ + _system() failed + ################################################################################ + ################################################################################ + # _system: cmd='(ls this_file_doesnt_exist) 2>&1', print_command=False, abort_on_error=True, suppress_error=None, suppress_output=True, blocking=True, wrapper=None, output_file=None, num_error_lines=30, tee=False, dry_run=False, log_level=10 + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + cmd='(ls this_file_doesnt_exist) 2>&1' + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + - rc='' + - output=' + {error_msg} + ' + - Output saved in 'tmp.system_output.txt' + - Command saved in 'tmp.system_cmd.sh' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test8(self) -> None: + """ + Check that an assert error is raised when `tee` is passed without a log + file. + """ + with self.assertRaises(AssertionError) as cm: + _ = hsystem.system("ls this_should_fail", tee=True) + actual = str(cm.exception) + expected = r""" + ################################################################################ + * Failed assertion * + 'True' implies 'False' + ################################################################################ + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test9(self) -> None: + """ + Check that the failing command fails and logs are stored in the log + file. + + - `allow_errors = False` + - `tee = True` + - Log file path is passed + """ + log_dir = self.get_scratch_space() + log_file_path = os.path.join(log_dir, "tee_log") + with self.assertRaises(RuntimeError) as cm: + _ = hsystem.system( + "ls this_should_fail", tee=True, output_file=log_file_path + ) + actual = str(cm.exception) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # Normalize rc value (differs across systems). + actual = re.sub(r"rc='\d+'", "rc=''", actual) + # Check log output contains the OS-specific error message. + actual = hio.from_file(log_file_path) + error_msg = _get_ls_error_message("this_should_fail") + expected = error_msg + "\n" + self.assert_equal(actual, expected) + + def test10(self) -> None: + """ + Check that the failing command passes and logs are stored in the log + file. + + - `allow_errors = True` + - `tee = True` + - Log file path is passed + """ + log_dir = self.get_scratch_space() + log_file_path = os.path.join(log_dir, "tee_log") + rc = hsystem.system( + "ls this_should_fail", + tee=True, + abort_on_error=False, + output_file=log_file_path, + ) + self.assertNotEqual(rc, 0) + # Check log output. + actual = hio.from_file(log_file_path) + # Use OS-specific expected error message. + error_msg = _get_ls_error_message("this_should_fail") + expected = error_msg + "\n" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_system2 +# ############################################################################# + + +class Test_system2(hunitest.TestCase): + def test_get_user_name(self) -> None: + actual = hsystem.get_user_name() + _LOG.debug("actual=%s", actual) + # + expected = hsystem.system_to_string("whoami")[1] + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + # + expected = hsystem.system_to_one_line("whoami")[1] + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + + def test_get_server_name(self) -> None: + actual = hsystem.get_server_name() + _LOG.debug("actual=%s", actual) + # + expected = hsystem.system_to_string("uname -n")[1] + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + + def test_get_os_name(self) -> None: + actual = hsystem.get_os_name() + _LOG.debug("actual=%s", actual) + # + expected = hsystem.system_to_string("uname -s")[1] + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + + +# ############################################################################# + + +# ############################################################################# +# Test_compute_file_signature1 +# ############################################################################# + + +class Test_compute_file_signature1(hunitest.TestCase): + def test1(self) -> None: + """ + Compute the signature of a file using 1 enclosing dir. + """ + file_name = ( + "/app/amp/core/test/TestCheckSameConfigs." + + "test_check_same_configs_error/output/test.txt" + ) + dir_depth = 1 + actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) + expected = ["output", "test.txt"] + self.assert_equal(str(actual), str(expected)) + + def test2(self) -> None: + """ + Compute the signature of a file using 2 enclosing dirs. + """ + file_name = ( + "/app/amp/core/test/TestCheckSameConfigs." + + "test_check_same_configs_error/output/test.txt" + ) + dir_depth = 2 + actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) + expected = [ + "TestCheckSameConfigs.test_check_same_configs_error", + "output", + "test.txt", + ] + self.assert_equal(str(actual), str(expected)) + + def test3(self) -> None: + """ + Compute the signature of a file using 4 enclosing dirs. + """ + file_name = "/app/amp/core/test/TestApplyAdfTest.test1/output/test.txt" + dir_depth = 4 + actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) + expected = [ + "core", + "test", + "TestApplyAdfTest.test1", + "output", + "test.txt", + ] + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# + + +# ############################################################################# +# Test_find_file_with_dir1 +# ############################################################################# + + +class Test_find_file_with_dir1(hunitest.TestCase): + def test1(self) -> None: + """ + Check whether we can find this file using one enclosing dir. + """ + # Use this file. + file_name = "helpers/test/test_hsystem.py" + dir_depth = 1 + actual = hsystem.find_file_with_dir(file_name, dir_depth=dir_depth) + expected = r"""['helpers/test/test_hsystem.py']""" + self.assert_equal(str(actual), str(expected), purify_text=True) + + def _helper(self, dir_depth: int, mode: str) -> List[str]: + """ + Test helper for find_file_with_dir. + + :param dir_depth: Number of directory levels to use for matching + :param mode: Search mode for matching + :return: List of matching files + """ + # Create a fake golden outcome to be used in this test. + golden_content = "hello world" + self.check_string(golden_content) + # E.g., helpers/test/test_hsystem.py::Test_find_file_with_dir1::test2/test.txt + file_name = os.path.join(self.get_output_dir(), "test.txt") + _LOG.debug("file_name=%s", file_name) + actual = hsystem.find_file_with_dir( + file_name, dir_depth=dir_depth, mode=mode + ) + _LOG.debug("Found %d matching files", len(actual)) + return actual + + def test2(self) -> None: + """ + Check whether we can find a test golden output using different number + of enclosing dirs. + + With only 1 enclosing dir, we can't find it. + """ + # Use only one dir which is not enough to identify the file. + # E.g., .../test/TestSqlWriterBackend1.test_insert_tick_data1/output/test.txt + dir_depth = 1 + mode = "return_all_results" + actual = self._helper(dir_depth, mode) + # For sure there are more than 100 tests. + self.assertGreater(len(actual), 100) + + def test3(self) -> None: + """ + Like `test2`, but using 2 levels for sure we are going to identify the + file. + """ + dir_depth = 2 + mode = "return_all_results" + actual = self._helper(dir_depth, mode) + _LOG.debug("Found %d matching files", len(actual)) + # There should be a single match. + expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt']""" + self.assert_equal(str(actual), str(expected), purify_text=True) + self.assertEqual(len(actual), 1) + + def test4(self) -> None: + """ + Like `test2`, but using 2 levels for sure we are going to identify the + file and asserting in case we don't find a single result. + """ + dir_depth = 2 + mode = "assert_unless_one_result" + actual = self._helper(dir_depth, mode) + _LOG.debug("Found %d matching files", len(actual)) + # There should be a single match. + expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt']""" + self.assert_equal(str(actual), str(expected), purify_text=True) + self.assertEqual(len(actual), 1) + + def test5(self) -> None: + """ + Like `test2`, using more level than 2, again, we should have a single + result. + """ + dir_depth = 3 + mode = "assert_unless_one_result" + actual = self._helper(dir_depth, mode) + _LOG.debug("Found %d matching files", len(actual)) + expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt']""" + self.assert_equal(str(actual), str(expected), purify_text=True) + self.assertEqual(len(actual), 1) + + +# ############################################################################# + + +# ############################################################################# +# Test_Linux_commands1 +# ############################################################################# + + +class Test_Linux_commands1(hunitest.TestCase): + def test_du1(self) -> None: + hsystem.du(".") + + +# ############################################################################# + + +# ############################################################################# +# Test_has_timestamp1 +# ############################################################################# + + +class Test_has_timestamp1(hunitest.TestCase): + def test_has_not_timestamp1(self) -> None: + """ + No timestamp. + """ + file_name = "patch.amp.8c5a2da9.tgz" + actual = hsystem.has_timestamp(file_name) + expected = False + self.assertEqual(actual, expected) + + def test_has_timestamp1(self) -> None: + """ + Valid timestamp. + """ + file_name = "patch.amp.8c5a2da9.20210725_225857.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + def test_has_timestamp2(self) -> None: + """ + Valid timestamp. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725-22_58_57.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + def test_has_timestamp3(self) -> None: + """ + Valid timestamp. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725225857.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + def test_has_timestamp4(self) -> None: + """ + Valid timestamp. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_22_58_57.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + def test_has_timestamp5(self) -> None: + """ + Valid timestamp. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725225857.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + +# ############################################################################# +# Test_append_timestamp_tag1 +# ############################################################################# + + +class Test_append_timestamp_tag1(hunitest.TestCase): + def test_no_timestamp1(self) -> None: + """ + Invalid timestamp, with no tag. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.tgz" + tag = "" + actual = hsystem.append_timestamp_tag(file_name, tag) + # /foo/bar/patch.amp.8c5a2da9.20210726-15_11_25.tgz + expected = r"/foo/bar/patch.amp.8c5a2da9.\S+.tgz" + self.assertRegex(actual, expected) + + def test_no_timestamp2(self) -> None: + """ + Invalid timestamp, with no tag. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.tgz" + tag = "hello" + actual = hsystem.append_timestamp_tag(file_name, tag) + # /foo/bar/patch.amp.8c5a2da9.20210726-15_11_25.hello.tgz + expected = r"/foo/bar/patch.amp.8c5a2da9.\S+.hello.tgz" + self.assertRegex(actual, expected) + + def test1(self) -> None: + """ + Valid timestamp, with no tag. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" + tag = "" + actual = hsystem.append_timestamp_tag(file_name, tag) + # /foo/bar/patch.amp.8c5a2da9.20210725_225857.20210726-15_11_25.tgz + expected = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + Valid timestamp, with a tag. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" + tag = "hello" + actual = hsystem.append_timestamp_tag(file_name, tag) + expected = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.hello.tgz" + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py new file mode 100644 index 000000000..385de303a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py @@ -0,0 +1,159 @@ +import logging + +import helpers.hprint as hprint +import helpers.htable as htable +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestTable1 +# ############################################################################# + + +class TestTable1(hunitest.TestCase): + # ######################################################################### + + @staticmethod + def _get_table() -> htable.Table: + txt = """completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests""" + cols = ["status", "outcome", "descr", "workflow"] + # table = [line for line in csv.reader(txt.split("\n"), delimiter=' ')] + # _LOG.debug(hprint.to_str("table")) + # _LOG.debug("size=%s", str(htable.size(table))) + table = htable.Table.from_text(cols, txt, delimiter=" ") + return table + + def test_from_text1(self) -> None: + table = self._get_table() + self.assertIsInstance(table, htable.Table) + _LOG.debug(hprint.to_str("table")) + + def test_from_text_invalid1(self) -> None: + txt = """completed failure Lint Run_linter +completed success Lint +completed success Lint Slow_tests""" + cols = ["status", "outcome", "descr", "workflow"] + with self.assertRaises(AssertionError) as cm: + htable.Table.from_text(cols, txt, delimiter=" ") + actual = str(cm.exception) + expected = """ + * Failed assertion * + '3' + == + '4' + Invalid row='['completed', 'success', 'Lint']' for cols='['status', 'outcome', 'descr', 'workflow']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_from_text_invalid2(self) -> None: + txt = """completed failure Lint Run_linter + completed success Lint Fast_tess + completed success Lint Slow_tests""" + cols = ["status", "outcome", "descr", "workflow", "EXTRA"] + with self.assertRaises(AssertionError) as cm: + htable.Table.from_text(cols, txt, delimiter=" ") + actual = str(cm.exception) + expected = """ + * Failed assertion * + '4' + == + '5' + Invalid row='['completed', 'failure', 'Lint', 'Run_linter']' for cols='['status', 'outcome', 'descr', 'workflow', 'EXTRA']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + # ######################################################################### + + def test_repr1(self) -> None: + table = self._get_table() + actual = repr(table) + expected = r""" +cols=['status', 'outcome', 'descr', 'workflow'] +table= +['completed', 'failure', 'Lint', 'Run_linter'] +['completed', 'success', 'Lint', 'Fast_tests'] +['completed', 'success', 'Lint', 'Slow_tests'] +size=(3, 4) +""" + expected = expected.rstrip().lstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + + def test_str1(self) -> None: + table = self._get_table() + actual = str(table) + expected = r""" +status | outcome | descr | workflow | +--------- | ------- | ----- | ---------- | +completed | failure | Lint | Run_linter | +completed | success | Lint | Fast_tests | +completed | success | Lint | Slow_tests | +""" + expected = expected.rstrip().lstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + + # ######################################################################### + + def test_filter_table1(self) -> None: + """ + Filter resulting in a single matching row. + """ + table = self._get_table() + # + table_filter = table.filter_rows("outcome", "failure") + expected = r""" +cols=['status', 'outcome', 'descr', 'workflow'] +table= +['completed', 'failure', 'Lint', 'Run_linter'] +size=(1, 4) +""" + actual = repr(table_filter) + expected = expected.rstrip().lstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + + def test_filter_table2(self) -> None: + """ + Filter resulting in no matches. + """ + table = self._get_table() + # + table_filter = table.filter_rows("status", "in progress") + expected = r""" +cols=['status', 'outcome', 'descr', 'workflow'] +table= + +size=(0, 4) +""" + actual = repr(table_filter) + expected = expected.rstrip().lstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + + def test_filter_table3(self) -> None: + """ + Filter with a column constant using the constant value. + """ + table = self._get_table() + # + table_filter = table.filter_rows("descr", "Lint") + actual = repr(table_filter) + expected = repr(table) + self.assert_equal(actual, expected, fuzzy_match=False) + + # ######################################################################### + + def test_unique1(self) -> None: + table = self._get_table() + # + actual = table.unique("descr") + expected = ["Lint"] + self.assert_equal(str(actual), str(expected), fuzzy_match=False) + + def test_unique2(self) -> None: + table = self._get_table() + # + actual = table.unique("workflow") + expected = ["Fast_tests", "Run_linter", "Slow_tests"] + self.assert_equal(str(actual), str(expected), fuzzy_match=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py new file mode 100644 index 000000000..fa2059b0b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py @@ -0,0 +1,578 @@ +import logging + +import helpers.hprint as hprint +import helpers.htext_protect as htexprot +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test__extract_protected_content +# ############################################################################# + + +class Test__extract_protected_content(hunitest.TestCase): + """ + Test the extract_protected_content function. + """ + + def helper( + self, + txt: str, + file_type: str, + expected_txt: str, + expected_map_size: int, + ) -> None: + """ + Test helper for extract_protected_content. + + :param txt: Input text to process + :param file_type: File type ('md', 'txt', or 'tex') + :param expected_txt: Expected output text with placeholders + :param expected_map_size: Expected number of protected items + """ + # Prepare inputs. + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Run test. + actual_lines, protected_map = htexprot.extract_protected_content( + lines, file_type + ) + # Check outputs. + actual = "\n".join(actual_lines) + expected = hprint.dedent( + expected_txt, remove_lead_trail_empty_lines_=True + ) + self.assert_equal(actual, expected) + self.assertEqual(len(protected_map), expected_map_size) + + def test1(self) -> None: + """ + Test extracting single fenced block with content. + """ + # Prepare inputs. + txt = """ + Some text here. + ```python + def foo(): + return 42 + ``` + More text. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Some text here. + ```python + <<>> + ``` + More text. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test2(self) -> None: + """ + Test extracting multiple fenced blocks. + """ + # Prepare inputs. + txt = """ + Text. + ```python + code1 + ``` + Middle. + ```javascript + code2 + ``` + End. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text. + ```python + <<>> + ``` + Middle. + ```javascript + <<>> + ``` + End. + """ + expected_map_size = 2 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test3(self) -> None: + """ + Test extracting empty fenced block. + """ + # Prepare inputs. + txt = """ + Text before. + ```python + ``` + Text after. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text before. + ```python + <<>> + ``` + Text after. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test4(self) -> None: + """ + Test extracting fenced blocks with different languages. + """ + # Prepare inputs. + txt = """ + ```python + python_code + ``` + ```javascript + js_code + ``` + ```bash + bash_code + ``` + """ + file_type = "md" + # Prepare outputs. + expected = """ + ```python + <<>> + ``` + ```javascript + <<>> + ``` + ```bash + <<>> + ``` + """ + expected_map_size = 3 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test5(self) -> None: + """ + Test extracting HTML single-line comment. + """ + # Prepare inputs. + txt = """ + Text before. + + Text after. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text before. + <<>> + Text after. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test6(self) -> None: + """ + Test extracting HTML multi-line comment. + """ + # Prepare inputs. + txt = """ + Text before. + + Text after. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text before. + <<>> + Text after. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test7(self) -> None: + """ + Test extracting LaTeX comment. + """ + # Prepare inputs. + txt = """ + Some LaTeX text. + % This is a LaTeX comment + More text. + """ + file_type = "tex" + # Prepare outputs. + expected = """ + Some LaTeX text. + <<>> + More text. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test8(self) -> None: + """ + Test extracting math block. + """ + # Prepare inputs. + txt = """ + Text before. + $$ + E = mc^2 + $$ + Text after. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text before. + $$ + <<>> + $$ + Text after. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test9(self) -> None: + """ + Test fenced block not extracted for tex files. + """ + # Prepare inputs. + txt = """ + LaTeX text. + ``` + This should not be extracted for tex files + ``` + More text. + """ + file_type = "tex" + # Prepare outputs. + expected = """ + LaTeX text. + ``` + This should not be extracted for tex files + ``` + More text. + """ + expected_map_size = 0 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test10(self) -> None: + """ + Test mixed content (fenced blocks + comments + normal text). + """ + # Prepare inputs. + txt = """ + # Title + Some text. + ```python + code here + ``` + + $$ + math here + $$ + End. + """ + file_type = "md" + # Prepare outputs. + expected = """ + # Title + Some text. + ```python + <<>> + ``` + <<>> + $$ + <<>> + $$ + End. + """ + expected_map_size = 3 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + +# ############################################################################# +# Test__restore_protected_content +# ############################################################################# + + +class Test__restore_protected_content(hunitest.TestCase): + """ + Test the restore_protected_content function. + """ + + def helper( + self, + txt: str, + protected_map: dict, + expected_txt: str, + ) -> None: + """ + Test helper for restore_protected_content. + + :param txt: Input text with placeholders + :param protected_map: Mapping of placeholders to original content + :param expected_txt: Expected output with restored content + """ + # Prepare inputs. + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Run test. + actual_lines = htexprot.restore_protected_content(lines, protected_map) + # Check outputs. + actual = "\n".join(actual_lines) + expected = hprint.dedent( + expected_txt, remove_lead_trail_empty_lines_=True + ) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test restoring single placeholder. + """ + # Prepare inputs. + txt = """ + Text before. + ```python + <<>> + ``` + Text after. + """ + protected_map = { + "<<>>": "def foo():\n return 42" + } + # Prepare outputs. + expected = """ + Text before. + ```python + def foo(): + return 42 + ``` + Text after. + """ + # Run test. + self.helper(txt, protected_map, expected) + + def test2(self) -> None: + """ + Test restoring multiple placeholders. + """ + # Prepare inputs. + txt = """ + ```python + <<>> + ``` + <<>> + ``` + <<>> + ``` + """ + protected_map = { + "<<>>": "code1", + "<<>>": "", + "<<>>": "code2", + } + # Prepare outputs. + expected = """ + ```python + code1 + ``` + + ``` + code2 + ``` + """ + # Run test. + self.helper(txt, protected_map, expected) + + def test3(self) -> None: + """ + Test restoring multi-line content from single placeholder. + """ + # Prepare inputs. + txt = """ + Text. + <<>> + More text. + """ + protected_map = { + "<<>>": "" + } + # Prepare outputs. + expected = """ + Text. + + More text. + """ + # Run test. + self.helper(txt, protected_map, expected) + + def test4(self) -> None: + """ + Test with empty map (no-op). + """ + # Prepare inputs. + txt = """ + Text line 1. + Text line 2. + Text line 3. + """ + protected_map = {} + # Prepare outputs. + expected = """ + Text line 1. + Text line 2. + Text line 3. + """ + # Run test. + self.helper(txt, protected_map, expected) + + def test5(self) -> None: + """ + Test restoring empty content. + """ + # Prepare inputs. + txt = """ + Before. + ``` + <<>> + ``` + After. + """ + protected_map = {"<<>>": ""} + # Prepare outputs. + expected = """ + Before. + ``` + + ``` + After. + """ + # Run test. + self.helper(txt, protected_map, expected) + + +# ############################################################################# +# Test_extract_restore_roundtrip +# ############################################################################# + + +class Test_extract_restore_roundtrip(hunitest.TestCase): + """ + Test that extract followed by restore is identity operation. + """ + + def helper(self, txt: str, file_type: str) -> None: + """ + Test helper for roundtrip (extract then restore). + + :param txt: Input text + :param file_type: File type ('md', 'txt', or 'tex') + """ + # Prepare inputs. + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + original = "\n".join(lines) + # Run test. + extracted_lines, protected_map = htexprot.extract_protected_content( + lines, file_type + ) + restored_lines = htexprot.restore_protected_content( + extracted_lines, protected_map + ) + # Check outputs. + actual = "\n".join(restored_lines) + self.assert_equal(actual, original) + + def test1(self) -> None: + """ + Test roundtrip with fenced blocks. + """ + # Prepare inputs. + txt = """ + # Title + Some text. + ```python + def foo(): + return 42 + ``` + More text. + """ + file_type = "md" + # Run test. + self.helper(txt, file_type) + + def test2(self) -> None: + """ + Test roundtrip with mixed content. + """ + # Prepare inputs. + txt = """ + Text. + ```python + code + ``` + + $$ + E = mc^2 + $$ + End. + """ + file_type = "md" + # Run test. + self.helper(txt, file_type) + + def test3(self) -> None: + """ + Test roundtrip with LaTeX comments. + """ + # Prepare inputs. + txt = """ + LaTeX text. + % Comment 1 + More text. + % Comment 2 + End. + """ + file_type = "tex" + # Run test. + self.helper(txt, file_type) + + def test4(self) -> None: + """ + Test roundtrip with no protected content. + """ + # Prepare inputs. + txt = """ + Just regular text. + No special content here. + Just plain paragraphs. + """ + file_type = "md" + # Run test. + self.helper(txt, file_type) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py new file mode 100644 index 000000000..ff57a87c0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py @@ -0,0 +1,24 @@ +import logging +import time + +import helpers.htimer as htimer +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestTimedScope +# ############################################################################# + + +class TestTimedScope(hunitest.TestCase): + def test_1(self) -> None: + """ + Test that elapsed time is correctly computed. + """ + # Run the function to test. + with htimer.TimedScope(logging.INFO, "Test") as ts: + time.sleep(1) + # Round actual time up to 1 decimal and compare it with expected. + actual_rounded_time = round(ts.elapsed_time, 1) + expected_rounded_time = 1.0 + self.assertEqual(actual_rounded_time, expected_rounded_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py new file mode 100644 index 000000000..808a2221e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py @@ -0,0 +1,474 @@ +import logging +from typing import List + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.htraceback as htraceb +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_Traceback1 +# ############################################################################# + + +class Test_Traceback1(hunitest.TestCase): + def test_parse0(self) -> None: + txt = """ + + TEST + Traceback + TEST + Traceback (most recent call last): + File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": + NameError: name 'repo_short_name' is not defined + TEST TEST TEST + """ + txt = hprint.dedent(txt) + _LOG.debug("txt=\n%s", txt) + purify_from_client = False + # Run the function under test. + act_cfile, act_traceback = htraceb.parse_traceback( + txt, purify_from_client=purify_from_client + ) + # Check. + exp_traceback = """Traceback (most recent call last): + File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": +NameError: name 'repo_short_name' is not defined + TEST TEST TEST""" + self.assertEqual(act_traceback, exp_traceback) + + # pylint: disable=line-too-long + # TODO(gp): Add test and fix for the following traceback: + + # Bug1: + # Traceback (most recent call last): + # File "/Users/saggese/src/venv/amp.client_venv/bin/invoke", line 8, in + # sys.exit(program.run()) + # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 373, in run + # self.parse_collection() + # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 465, in parse_collection + # self.load_collection() + # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 696, in load_collection + # module, parent = loader.load(coll_name) + # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/loader.py", line 76, in load + # module = imp.load_module(name, fd, path, desc) + # File "/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/imp.py", line 234, in load_module + # return load_source(name, filename, file) + # File "/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/imp.py", line 171, in load_source + # module = _load(spec) + # File "", line 711, in _load + # File "", line 680, in _load_unlocked + # File "", line 855, in exec_module + # File "", line 228, in _call_with_frames_removed + # File "/Users/saggese/src/lem1/amp/tasks.py", line 8, in + # from helpers.lib_tasks import set_default_params # This is not an invoke target. + # File "/Users/saggese/src/lem1/amp/helpers/lib_tasks.py", line 23, in + # import helpers.hgit as hgit + # File "/Users/saggese/src/lem1/amp/helpers/git.py", line 16, in + # import helpers.hsystem as hsystem + # File "/Users/saggese/src/lem1/amp/helpers/system_interaction.py", line 529 + # signature2 = _compute_file_signature(file_name, dir_depth) + # ^ + # SyntaxError: invalid syntax + # Traceback (most recent call last): + # File "/Users/saggese/src/lem1/amp/dev_scripts/tg.py", line 21, in + # import helpers.hsystem as hsystem + # File "/Users/saggese/src/lem1/amp/helpers/system_interaction.py", line 529 + # signature2 = _compute_file_signature(file_name, dir_depth) + # ^ + # SyntaxError: invalid syntax + + # Bug2: + # Traceback (most recent call last): + # File "/app/amp/dataflow/pipelines/real_time/test/test_dataflow_amp_real_time_pipeline.py", line 46, in test1 + # ) = mdmdinex.get_ReplayedTimeMarketData_example2( + # TypeError: get_ReplayedTimeMarketData_example2() got an unexpected keyword argument 'df' + # + # 13:34:45 INFO traceback_to_cfile : _main : 76 : in_file_name=log.txt + # 13:34:45 INFO parser : read_file : 304 : Reading from 'log.txt' + # 13:34:45 ERROR traceback_to_cfile : _main : 87 : Can't find traceback in the file + + # Bug3: + # =================================== FAILURES =================================== + # _________________________ TestGetDataForInterval.test1 _________________________ + # Traceback (most recent call last): + # File "/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3361, in get_loc + # return self._engine.get_loc(casted_key) + # File "pandas/_libs/index.pyx", line 76, in pandas._libs.index.IndexEngine.get_loc + # File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc + # File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item + # File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item + # KeyError: 'end_ts' + # + # The above exception was the direct cause of the following exception: + # + # Traceback (most recent call last): + # File "/app/amp/market_data/test/test_market_data_client.py", line 46, in test1 + # data = market_data_client.get_data_for_interval( + # File "/app/amp/market_data/market_data.py", line 212, in get_data_for_interval + # df = self._get_data( + # File "/app/amp/market_data/market_data_client.py", line 93, in _get_data + # market_data["start_ts"] = market_data["end_ts"] - pd.Timedelta( + # File "/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 3458, in __getitem__ + # indexer = self.columns.get_loc(key) + # File "/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3363, in get_loc + # raise KeyError(key) from err + # KeyError: 'end_ts' + + # Bug4: + # dataflow/model/test/test_experiment_utils.py::Test_get_configs_from_command_line1::test1 (0.01 s) FAILED [100%] + # + # =================================== FAILURES =================================== + # __________________ Test_get_configs_from_command_line1.test1 ___________________ + # Traceback (most recent call last): + # File "/app/dataflow/model/test/test_experiment_utils.py", line 35, in test1 + # configs = dtfmoexuti.get_configs_from_command_line(args) + # File "/app/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + # configs = cconfig.get_configs_from_builder(config_builder) + # File "/app/config_root/config/builder.py", line 48, in get_configs_from_builder + # imp = importlib.import_module(import_) + # File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + # return _bootstrap._gcd_import(name[level:], package, level) + # File "", line 1014, in _gcd_import + # File "", line 991, in _find_and_load + # File "", line 961, in _find_and_load_unlocked + # File "", line 219, in _call_with_frames_removed + # File "", line 1014, in _gcd_import + # File "", line 991, in _find_and_load + # File "", line 961, in _find_and_load_unlocked + # File "", line 219, in _call_with_frames_removed + # File "", line 1014, in _gcd_import + # File "", line 991, in _find_and_load + # File "", line 973, in _find_and_load_unlocked + # ModuleNotFoundError: No module named 'research' + # ============================= slowest 3 durations ============================== + + # pylint: enable=line-too-long + + def _parse_traceback_helper( + self, + txt: str, + purify_from_client: bool, + exp_cfile: str, + exp_traceback: str, + ) -> None: + hdbg.dassert_isinstance(txt, str) + hdbg.dassert_isinstance(exp_cfile, str) + hdbg.dassert_isinstance(exp_traceback, str) + txt = hprint.dedent(txt) + _LOG.debug("txt=\n%s", txt) + # Run the function under test. + act_cfile, act_traceback = htraceb.parse_traceback( + txt, purify_from_client=purify_from_client + ) + _LOG.debug("act_cfile=\n%s", act_cfile) + _LOG.debug("act_traceback=\n%s", act_traceback) + # Compare cfile. + act_cfile = htraceb.cfile_to_str(act_cfile) + exp_cfile = hprint.dedent(exp_cfile) + _LOG.debug(hprint.to_str("exp_cfile act_cfile")) + self.assert_equal( + act_cfile, exp_cfile, fuzzy_match=True, purify_text=True + ) + # Compare traceback. + # Handle `None`. + act_traceback = str(act_traceback) + exp_traceback = hprint.dedent(exp_traceback) + _LOG.debug(hprint.to_str("exp_traceback act_traceback")) + self.assert_equal( + act_traceback, exp_traceback, fuzzy_match=True, purify_text=True + ) + + def test_parse1(self) -> None: + """ + Parse traceback with all files from Docker that actually exist in the + current repo. + """ + txt = """ + + TEST + Traceback + TEST + Traceback (most recent call last): + File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": + NameError: name 'repo_short_name' is not defined + TEST TEST TEST + """ + purify_from_client = False + # pylint: disable=line-too-long + exp_cfile = [ + ( + "$GIT_ROOT/helpers/test/test_lib_tasks.py", + 27, + "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)", + ), + ( + "$GIT_ROOT/helpers/lib_tasks.py", + 1265, + "_get_gh_issue_title:task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name)", + ), + ( + "$GIT_ROOT/helpers/git.py", + 397, + 'get_task_prefix_from_repo_short_name:if repo_short_name == "amp":', + ), + ] + exp_cfile = htraceb.cfile_to_str(exp_cfile) + # pylint: enable=line-too-long + exp_traceback = """ + Traceback (most recent call last): + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "$GIT_ROOT/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "$GIT_ROOT/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": + NameError: name 'repo_short_name' is not defined + TEST TEST TEST + """ + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse_empty_traceback1(self) -> None: + """ + Parse an empty traceback file. + """ + txt = """ + + TEST + Traceback + TEST TEST TEST + """ + purify_from_client = True + exp_cfile: List[htraceb.CfileRow] = [] + exp_cfile = htraceb.cfile_to_str(exp_cfile) + exp_traceback = "None" + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse2(self) -> None: + """ + Parse a traceback file with both files from Docker and local files. + """ + # Use references to this file so that we are independent of the file + # layout. + # pylint: disable=line-too-long + txt = """ + Traceback (most recent call last): + File "./helpers/test/test_htraceback.py", line 146, in + _main(_parse()) + File "./helpers/test/test_htraceback.py", line 105, in _main + configs = cdtfut.get_configs_from_command_line(args) + File "/app/amp/./helpers/test/test_htraceback.py", line 228, in get_configs_from_command_line + "config_builder": args.config_builder, + """ + purify_from_client = True + exp_cfile = """ + helpers/test/test_htraceback.py:146::_main(_parse()) + helpers/test/test_htraceback.py:105:_main:configs = cdtfut.get_configs_from_command_line(args) + helpers/test/test_htraceback.py:228:get_configs_from_command_line:"config_builder": args.config_builder, + """ + exp_traceback = """ + Traceback (most recent call last): + File "./helpers/test/test_htraceback.py", line 146, in + _main(_parse()) + File "./helpers/test/test_htraceback.py", line 105, in _main + configs = cdtfut.get_configs_from_command_line(args) + File "$GIT_ROOT/./helpers/test/test_htraceback.py", line 228, in get_configs_from_command_line + "config_builder": args.config_builder, + """ + # pylint: enable=line-too-long + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse3(self) -> None: + """ + Parse a traceback file with both files from Docker and local files. + """ + # Use references to this file so that we are independent from the file + # layout. + # pylint: disable=line-too-long + txt = """ + collected 6 items + + helpers/test/test_lib_tasks.py::Test_pytest_failed1::test_classes1 (0.02 s) FAILED [ 16%] + + =================================== FAILURES =================================== + ______________________ Test_pytest_failed1.test_classes1 _______________________ + Traceback (most recent call last): + File "/app/amp/helpers/test/test_lib_tasks.py", line 1460, in test_classes1 + self._helper(file_name, target_type, expected) + File "/app/amp/helpers/test/test_lib_tasks.py", line 1440, in _helper + actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, + File "/venv/lib/python3.8/site-packages/invoke/tasks.py", line 127, in __call__ + result = self.body(*args, **kwargs) + File "/app/amp/helpers/lib_tasks.py", line 2140, in pytest_failed + hdbg.dassert(m, "Invalid test='%s'", test) + File "/app/amp/helpers/dbg.py", line 129, in dassert + _dfatal(txt, msg, *args) + File "/app/amp/helpers/dbg.py", line 117, in _dfatal + dfatal(dfatal_txt) + File "/app/amp/helpers/dbg.py", line 63, in dfatal + raise assertion_type(ret) + AssertionError: + * Failed assertion * + cond=None + Invalid test='dev_scripts/testing/test/test_run_tests.py' + """ + # pylint: enable=line-too-long + purify_from_client = False + exp_cfile = """ + $GIT_ROOT/helpers/test/test_lib_tasks.py:1460:test_classes1:self._helper(file_name, target_type, expected) + $GIT_ROOT/helpers/test/test_lib_tasks.py:1440:_helper:actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, + /venv/lib/python3.8/site-packages/invoke/tasks.py:127:__call__:result = self.body(*args, **kwargs) + $GIT_ROOT/helpers/lib_tasks.py:2140:pytest_failed:hdbg.dassert(m, "Invalid test='%s'", test) + $GIT_ROOT/helpers/dbg.py:129:dassert:_dfatal(txt, msg, *args) + $GIT_ROOT/helpers/dbg.py:117:_dfatal:dfatal(dfatal_txt) + $GIT_ROOT/helpers/dbg.py:63:dfatal:raise assertion_type(ret)""" + exp_traceback = r""" + Traceback (most recent call last): + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 1460, in test_classes1 + self._helper(file_name, target_type, expected) + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 1440, in _helper + actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, + File "/venv/lib/python3.8/site-packages/invoke/tasks.py", line 127, in __call__ + result = self.body(*args, **kwargs) + File "$GIT_ROOT/helpers/lib_tasks.py", line 2140, in pytest_failed + hdbg.dassert(m, "Invalid test='%s'", test) + File "$GIT_ROOT/helpers/dbg.py", line 129, in dassert + _dfatal(txt, msg, *args) + File "$GIT_ROOT/helpers/dbg.py", line 117, in _dfatal + dfatal(dfatal_txt) + File "$GIT_ROOT/helpers/dbg.py", line 63, in dfatal + raise assertion_type(ret) + AssertionError: + * Failed assertion * + cond=None + Invalid test='dev_scripts/testing/test/test_run_tests.py' + """ + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse4(self) -> None: + """ + Parse a traceback file with both files from Docker and local files. + """ + # pylint: disable=line-too-long + txt = """ + =================================== FAILURES =================================== + ____________ TestEgSingleInstrumentDataReader2.test_true_real_time1 ____________ + Traceback (most recent call last): + File "/app/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 182, in test_true_real_time1 + self._execute_node(node) + File "/app/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 238, in _execute_node + dict_ = node.fit() + File "/app/amp/core/dataflow/nodes/sources.py", line 385, in fit + self.df = self._get_data_until_current_time() + File "/app/amp/core/dataflow/nodes/sources.py", line 429, in _get_data_until_current_time + df = self._get_data() + File "/app/amp/core/dataflow/nodes/sources.py", line 574, in _get_data + hdbg.dassert_lte(df.index.max(), current_time) + File "/app/amp/helpers/dbg.py", line 172, in dassert_lte + cond = val1 <= val2 + TypeError: '<=' not supported between instances of 'float' and 'Timestamp' + ============================= slowest 3 durations ============================== + """ + purify_from_client = False + exp_cfile = r""" + $GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:182:test_true_real_time1:self._execute_node(node) + $GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:238:_execute_node:dict_ = node.fit() + $GIT_ROOT/core/dataflow/nodes/sources.py:385:fit:self.df = self._get_data_until_current_time() + $GIT_ROOT/core/dataflow/nodes/sources.py:429:_get_data_until_current_time:df = self._get_data() + $GIT_ROOT/core/dataflow/nodes/sources.py:574:_get_data:hdbg.dassert_lte(df.index.max(), current_time) + $GIT_ROOT/helpers/dbg.py:172:dassert_lte:cond = val1 <= val2/TypeError: '<=' not supported between instances of 'float' and 'Timestamp'""" + exp_traceback = r""" + Traceback (most recent call last): + File "$GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 182, in test_true_real_time1 + self._execute_node(node) + File "$GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 238, in _execute_node + dict_ = node.fit() + File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 385, in fit + self.df = self._get_data_until_current_time() + File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 429, in _get_data_until_current_time + df = self._get_data() + File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 574, in _get_data + hdbg.dassert_lte(df.index.max(), current_time) + File "$GIT_ROOT/helpers/dbg.py", line 172, in dassert_lte + cond = val1 <= val2 + TypeError: '<=' not supported between instances of 'float' and 'Timestamp'""" + # pylint: enable=line-too-long + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse5(self) -> None: + """ + Parse a traceback file with both files from Docker and local files. + """ + # pylint: disable=line-too-long + txt = """ + Traceback (most recent call last): + File "/app/dataflow_lm/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 + configs = dtfmoexuti.get_configs_from_command_line(args) + File "/app/amp/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + configs = cconfig.get_configs_from_builder(config_builder) + File "/app/amp/config_root/config/builder.py", line 46, in get_configs_from_builder + imp = importlib.import_module(import_) + File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1014, in _gcd_import + File "", line 991, in _find_and_load + File "", line 973, in _find_and_load_unlocked + ModuleNotFoundError: No module named 'dataflow_lm.pipelines.E8.8Ed_configs' + """ + purify_from_client = False + exp_cfile = """ + $GIT_ROOT/dataflow_lm/pipelines/E8/test/test_E8d_configs.py:37:test1:configs = dtfmoexuti.get_configs_from_command_line(args) + $GIT_ROOT/dataflow/model/experiment_utils.py:195:get_configs_from_command_line:configs = cconfig.get_configs_from_builder(config_builder) + $GIT_ROOT/config_root/config/builder.py:46:get_configs_from_builder:imp = importlib.import_module(import_) + /usr/lib/python3.8/importlib/__init__.py:127:import_module:return _bootstrap._gcd_import(name[level:], package, level) + :1014:_gcd_import: + :991:_find_and_load: + :973:_find_and_load_unlocked: + """ + exp_traceback = """ + Traceback (most recent call last): + File "$GIT_ROOT/dataflow_lm/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 + configs = dtfmoexuti.get_configs_from_command_line(args) + File "$GIT_ROOT/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + configs = cconfig.get_configs_from_builder(config_builder) + File "$GIT_ROOT/config_root/config/builder.py", line 46, in get_configs_from_builder + imp = importlib.import_module(import_) + File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1014, in _gcd_import + File "", line 991, in _find_and_load + File "", line 973, in _find_and_load_unlocked + ModuleNotFoundError: No module named 'dataflow_lm.pipelines.E8.8Ed_configs' + """ + # pylint: enable=line-too-long + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py new file mode 100644 index 000000000..a6e1e2ef6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py @@ -0,0 +1,954 @@ +""" +Import as: + +import helpers.test.test_unit_test as ttutes +""" + +import logging +import tempfile +from typing import Optional, Tuple + +import pandas as pd +import pytest + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur + +_LOG = logging.getLogger(__name__) + + +def _git_add(file_name: str) -> None: + # pylint: disable=unreachable + cmd = f"git add -u {file_name}" + _LOG.debug("> %s", cmd) + rc = hsystem.system(cmd, abort_on_error=False) + if rc: + _LOG.warning( + "Can't run '%s': you need to add the file manually", + cmd, + ) + + +def _to_skip_on_update_outcomes() -> bool: + """ + Determine whether to skip on `--update_outcomes`. + + Some tests can't pass with `--update_outcomes`, since they exercise + the logic in `--update_outcomes` itself. + + We can't always use `@pytest.mark.skipif(hunitest.get_update_tests)` + since pytest decides which tests need to be run before the variable + is actually set. + """ + to_skip = False + if hunitest.get_update_tests(): + _LOG.warning( + "Skip this test since it exercises the logic for --update_outcomes" + ) + to_skip = True + return to_skip + + +# ############################################################################# +# TestTestCase1 +# ############################################################################# + + +class TestTestCase1(hunitest.TestCase): + """ + Test free-standing functions in unit_test.py. + """ + + def test_get_input_dir1(self) -> None: + """ + Test hunitest.get_input_dir(). + """ + actual = self.get_input_dir() + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_input_dir1/input" + self.assertEqual(actual, expected) + + def test_get_input_dir2(self) -> None: + use_only_test_class = False + test_class_name = "test_class" + test_method_name = "test_method" + actual = self.get_input_dir( + use_only_test_class=use_only_test_class, + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # + expected = "$GIT_ROOT/helpers/test/outcomes/test_class.test_method/input" + self.assertEqual(actual, expected) + + def test_get_input_dir3(self) -> None: + use_only_test_class = False + test_class_name = None + test_method_name = None + actual = self.get_input_dir( + use_only_test_class=use_only_test_class, + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # + expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_input_dir3/input" + self.assertEqual(actual, expected) + + def test_get_input_dir4(self) -> None: + use_only_test_class = True + test_class_name = None + test_method_name = None + actual = self.get_input_dir( + use_only_test_class=use_only_test_class, + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # + expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1/input" + self.assertEqual(actual, expected) + + def test_get_output_dir1(self) -> None: + """ + Test hunitest.get_output_dir(). + """ + actual = self.get_output_dir() + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_output_dir1/output" + self.assertEqual(actual, expected) + + def test_get_scratch_space1(self) -> None: + """ + Test hunitest.get_scratch_space(). + """ + actual = self.get_scratch_space() + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = ( + "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_scratch_space1" + "/tmp.scratch" + ) + self.assertEqual(actual, expected) + + def test_get_scratch_space2(self) -> None: + test_class_name = "test_class" + test_method_name = "test_method" + actual = self.get_scratch_space( + test_class_name=test_class_name, test_method_name=test_method_name + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = ( + "$GIT_ROOT/helpers/test/outcomes/test_class.test_method/tmp.scratch" + ) + self.assertEqual(actual, expected) + + def test_get_scratch_space3(self) -> None: + test_class_name = "test_class" + test_method_name = "test_method" + use_absolute_path = False + actual = self.get_scratch_space( + test_class_name=test_class_name, + test_method_name=test_method_name, + use_absolute_path=use_absolute_path, + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = "outcomes/test_class.test_method/tmp.scratch" + self.assertEqual(actual, expected) + + def test_get_s3_scratch_dir1(self) -> None: + actual = self.get_s3_scratch_dir() + _LOG.debug("actual=%s", actual) + # It is difficult to test, so we just execute. + + def test_get_s3_scratch_dir2(self) -> None: + test_class_name = "test_class" + test_method_name = "test_method" + actual = self.get_s3_scratch_dir( + test_class_name=test_class_name, test_method_name=test_method_name + ) + _LOG.debug("actual=%s", actual) + # It is difficult to test, so we just execute. + + def test_assert_equal1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_assert_not_equal1(self) -> None: + actual = "hello world" + expected = "hello world w" + tmp_dir = tempfile.mkdtemp() + with self.assertRaises(RuntimeError): + self.assert_equal(actual, expected, dst_dir=tmp_dir) + + def test_assert_not_equal2(self) -> None: + actual = "hello world" + expected = "hello world w" + # Create a dir like `/var/tmp/tmph_kun9xq`. + tmp_dir = tempfile.mkdtemp() + self.assert_equal( + actual, expected, abort_on_error=False, dst_dir=tmp_dir + ) + # Compute the signature from the dir. + actual = hunitest.get_dir_signature( + tmp_dir, include_file_content=True, num_lines=None + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + actual = actual.replace(tmp_dir, "$TMP_DIR") + # pylint: disable=line-too-long + expected = """ + # Dir structure + $TMP_DIR + $TMP_DIR/tmp_diff.sh + # File signatures + len(file_names)=1 + file_names=$TMP_DIR/tmp_diff.sh + # $TMP_DIR/tmp_diff.sh + num_lines=8 + ''' + #!/bin/bash + if [[ $1 == "wrap" ]]; then + cmd='vimdiff -c "windo set wrap"' + else + cmd='vimdiff' + fi; + cmd="$cmd helpers/test/outcomes/TestTestCase1.test_assert_not_equal2/tmp.final.actual.txt helpers/test/outcomes/TestTestCase1.test_assert_not_equal2/tmp.final.expected.txt" + eval $cmd + + ''' + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert_equal_fuzzy_match1(self) -> None: + actual = "hello world" + expected = "hello world " + is_equal = self.assert_equal(actual, expected, fuzzy_match=True) + self.assertTrue(is_equal) + + def test_assert_equal5(self) -> None: + actual = "hello world" + expected = "hello world2" + with self.assertRaises(RuntimeError): + self.assert_equal(actual, expected, fuzzy_match=True) + + def _remove_lines1(self) -> None: + txt = r""" + # ##################################################################### + * Failed assertion * + 'in1' not in '{'in1': 'out1'}' + ## + `in1` already receiving input from node n1 + # ##################################################################### + # ##################################################################### + """ + actual = hunitest._remove_spaces(txt) + expected = r""" + * Failed assertion * + 'in1' not in '{'in1': 'out1'}' + ## + `in1` already receiving input from node n1 + # ##################################################################### + """ + self.assert_equal(actual, expected, fuzzy_match=False) + + +# ############################################################################# +# Test_AssertEqual1 +# ############################################################################# + + +class Test_AssertEqual1(hunitest.TestCase): + def test_equal1(self) -> None: + """ + Matching actual and expected without fuzzy matching. + """ + actual = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + expected = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + test_name = self._get_test_name() + test_dir = self.get_scratch_space() + is_equal = hunitest.assert_equal(actual, expected, test_name, test_dir) + _LOG.debug(hprint.to_str("is_equal")) + self.assertTrue(is_equal) + + def test_equal2(self) -> None: + """ + Matching actual and expected with fuzzy matching. + """ + actual = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + expected = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + test_name = self._get_test_name() + test_dir = self.get_scratch_space() + fuzzy_match = True + is_equal = hunitest.assert_equal( + actual, expected, test_name, test_dir, fuzzy_match=fuzzy_match + ) + _LOG.debug(hprint.to_str("is_equal")) + self.assertTrue(is_equal) + + def test_not_equal1(self) -> None: + """ + Mismatching actual and expected. + """ + actual = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + expected = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + test_name = self._get_test_name() + test_dir = self.get_scratch_space() + fuzzy_match = False + with self.assertRaises(RuntimeError) as cm: + hunitest.assert_equal( + actual, expected, test_name, test_dir, fuzzy_match=fuzzy_match + ) + # Check that the assertion is what expected. + actual = str(cm.exception) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = ''' +-------------------------------------------------------------------------------- +ACTUAL vs EXPECTED: Test_AssertEqual1.test_not_equal1 +-------------------------------------------------------------------------------- + + ( +completed failure Lint Run_linter | completed failure Lint Run_linter +completed success Lint Fast_tests ( +completed success Lint Slow_tests ( +Diff with: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +ACTUAL VARIABLE: Test_AssertEqual1.test_not_equal1 +-------------------------------------------------------------------------------- +expected = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +"""''' + if actual != expected: + hio.to_file("actual.txt", actual) + hio.to_file("expected.txt", expected) + self.assert_equal(actual, expected, fuzzy_match=False) + # We don't use self.assert_equal() since this is exactly we are testing, + # so we use a trusted function. + self.assertEqual(actual, expected) + + # For debugging: don't commit code with this test enabled. + @pytest.mark.skip( + reason="This is only used to debug the debugging the infrastructure" + ) + def test_not_equal_debug(self) -> None: + """ + Create a mismatch on purpose to see how the suggested updated to + expected variable looks like. + """ + actual = r"""empty +start + +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests + +end + +""" + expected = "hello" + self.assert_equal(actual, expected, fuzzy_match=False) + + +# ############################################################################# +# TestCheckString1 +# ############################################################################# + + +class TestCheckString1(hunitest.TestCase): + def test_check_string1(self) -> None: + """ + Compare the actual value to a matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + golden_outcome = "hello world" + # + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + # Overwrite the golden file, so that --update_golden doesn't matter. + hio.to_file(file_name, golden_outcome) + try: + # Check. + outcome_updated, file_exists, is_equal = self.check_string(actual) + # Actual match the golden outcome and it wasn't updated. + finally: + # Clean up. + hio.to_file(file_name, golden_outcome) + _git_add(file_name) + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertTrue(is_equal) + + def test_check_string_not_equal1(self) -> None: + """ + Compare the actual value to a mismatching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + golden_outcome = "hello world2" + # + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + # Modify the golden. + hio.to_file(file_name, golden_outcome) + try: + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False + ) + finally: + # Clean up. + hio.to_file(file_name, golden_outcome) + _git_add(file_name) + # Actual doesn't match the golden outcome. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + + def test_check_string_not_equal2(self) -> None: + """ + Compare the actual value to a mismatching golden outcome and udpate it. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + golden_outcome = "hello world2" + # Force updating the golden outcomes. + self.mock_update_tests() + # + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + # Modify the golden. + hio.to_file(file_name, golden_outcome) + try: + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False + ) + new_golden = hio.from_file(file_name) + _git_add(file_name) + finally: + # Clean up. + hio.to_file(file_name, golden_outcome) + _git_add(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertTrue(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + # The golden outcome was updated. + self.assertEqual(new_golden, "hello world") + + def test_check_string_not_equal3(self) -> None: + """ + Like test_check_string_not_equal1() but raising the exception. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + golden_outcome = "hello world2" + # + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + # Modify the golden. + hio.to_file(file_name, golden_outcome) + try: + # Check. + with self.assertRaises(RuntimeError): + self.check_string(actual) + finally: + # Clean up. + hio.to_file(file_name, golden_outcome) + _git_add(file_name) + + def test_check_string_missing1(self) -> None: + """ + When running with --update_outcomes, the golden outcome was missing and + so it was added. + + This tests the code path when action_on_missing_golden="update". + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + # Force updating the golden outcomes. + self.mock_update_tests() + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False + ) + hdbg.dassert_file_exists(file_name) + new_golden = hio.from_file(file_name) + finally: + # Clean up. + hio.delete_file(file_name) + _git_add(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertTrue(outcome_updated) + self.assertFalse(file_exists) + self.assertFalse(is_equal) + # + self.assertEqual(new_golden, "hello world") + + def test_check_string_missing2(self) -> None: + """ + Without running with --update_outcomes, the golden outcome was missing, + action_on_missing_golden="assert", and the unit test framework + asserted. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False, action_on_missing_golden="assert" + ) + hdbg.dassert_file_exists(file_name + ".tmp") + new_golden = hio.from_file(file_name + ".tmp") + finally: + # Clean up. + hio.delete_file(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertFalse(outcome_updated) + self.assertFalse(file_exists) + self.assertFalse(is_equal) + # + self.assertEqual(new_golden, "hello world") + + def test_check_string_missing3(self) -> None: + """ + Without running with --update_outcomes, the golden outcome was missing, + action_on_missing_golden="update", and the unit test framework updates + the golden. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False, action_on_missing_golden="update" + ) + hdbg.dassert_file_exists(file_name) + new_golden = hio.from_file(file_name) + finally: + # Clean up. + hio.delete_file(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertTrue(outcome_updated) + self.assertFalse(file_exists) + self.assertFalse(is_equal) + # + self.assertEqual(new_golden, "hello world") + + +# ############################################################################# +# TestCheckDataFrame1 +# ############################################################################# + + +class TestCheckDataFrame1(hunitest.TestCase): + """ + Some of these tests can't pass with `--update_outcomes`, since they + exercise the logic in `--update_outcomes` itself. + """ + + def _check_df_helper( + self, actual: pd.DataFrame, abort_on_error: bool, err_threshold: float + ) -> Tuple[bool, bool, Optional[bool]]: + golden_outcomes = pd.DataFrame( + [[0, 1, 2], [3, 4, 5]], columns="a b c".split() + ) + # + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + # Overwrite the golden file, so that --update_golden doesn't matter. + hio.create_enclosing_dir(file_name, incremental=True) + golden_outcomes.to_csv(file_name) + try: + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, + abort_on_error=abort_on_error, + err_threshold=err_threshold, + ) + finally: + # Clean up. + golden_outcomes.to_csv(file_name) + _git_add(file_name) + return outcome_updated, file_exists, is_equal + + def test_check_df_equal1(self) -> None: + """ + Compare the actual value of a df to a matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = True + err_threshold = 0.0001 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome matches the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertTrue(is_equal) + + def test_check_df_equal2(self) -> None: + """ + Compare the actual value of a df to a matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1.01, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = True + err_threshold = 0.05 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome matches the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertTrue(is_equal) + + def test_check_df_equal3(self) -> None: + """ + Compare the actual value of a df to a matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1.05, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = True + err_threshold = 0.05 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome matches the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertTrue(is_equal) + + def test_check_df_not_equal1(self) -> None: + """ + Compare the actual value of a df to a non-matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1.06, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = False + err_threshold = 0.05 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome doesn't match the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + exp_error_msg = """ + actual= + a b c + 0 0 1.06 2 + 1 3 4.00 5 + expected= + a b c + 0 0 1 2 + 1 3 4 5 + actual_masked= + [[ nan 1.06 nan] + [ nan nan nan]] + expected_masked= + [[nan 1. nan] + [nan nan nan]] + err= + [[ nan 0.06 nan] + [ nan nan nan]] + max_err=0.060 + """ + self.assert_equal(self._error_msg, exp_error_msg, fuzzy_match=True) + + def test_check_df_not_equal2(self) -> None: + """ + Compare the actual value of a df to a not matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a d c".split()) + abort_on_error = False + err_threshold = 0.05 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome doesn't match the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + + def test_check_df_not_equal3(self) -> None: + """ + Compare the actual value to a mismatching golden outcome and update it. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + golden_outcome = pd.DataFrame( + [[0, 2, 2], [3, 4, 5]], columns="a b c".split() + ) + # Force updating the golden outcomes. + self.mock_update_tests() + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + # Modify the golden. + hio.create_enclosing_dir(file_name, incremental=True) + golden_outcome.to_csv(file_name) + try: + # Check. + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, abort_on_error=False + ) + # + new_golden = pd.read_csv(file_name, index_col=0) + finally: + # Clean up. + hio.to_file(file_name, str(golden_outcome)) + _git_add(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertTrue(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + # Check golden. + self.assert_equal(str(new_golden), str(actual)) + + def test_check_df_not_equal4(self) -> None: + """ + Like `test_check_df_not_equal1()` but raising the exception. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1.06, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = True + err_threshold = 0.05 + with self.assertRaises(RuntimeError): + self._check_df_helper(actual, abort_on_error, err_threshold) + + def test_check_df_missing1(self) -> None: + """ + When running with --update_outcomes, the golden outcome was missing and + so it was added. + + This tests the code path when action_on_missing_golden="update". + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + # Force updating the golden outcomes. + self.mock_update_tests() + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + _LOG.debug(hprint.to_str("file_name")) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, abort_on_error=False + ) + hdbg.dassert_file_exists(file_name) + new_golden = pd.read_csv(file_name, index_col=0) + finally: + # Clean up. + hio.delete_file(file_name) + _git_add(file_name) + # Expected outcome doesn't exists and it was updated. + self.assertTrue(outcome_updated) + self.assertFalse(file_exists) + self.assertFalse(is_equal) + # Check golden. + self.assert_equal(str(new_golden), str(actual)) + + def test_check_df_missing2(self) -> None: + """ + Without running with --update_outcomes, the golden outcome was missing, + action_on_missing_golden="assert", and the unit test framework + asserted. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, abort_on_error=False, action_on_missing_golden="assert" + ) + hdbg.dassert_file_exists(file_name + ".tmp") + new_golden = pd.read_csv(file_name + ".tmp", index_col=0) + hdbg.dassert_path_not_exists(file_name) + finally: + # Clean up. + hio.delete_file(file_name) + # Expected outcome doesn't exists and it was not updated. + self.assertFalse(outcome_updated) + self.assertFalse(file_exists) + self.assertIs(is_equal, None) + # Check golden. + self.assert_equal(str(new_golden), str(actual)) + + def test_check_df_missing3(self) -> None: + """ + Without running with --update_outcomes, the golden outcome was missing, + action_on_missing_golden="update", and the unit test framework updates + the golden. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, abort_on_error=False, action_on_missing_golden="update" + ) + hdbg.dassert_file_exists(file_name) + new_golden = pd.read_csv(file_name, index_col=0) + finally: + # Clean up. + hio.delete_file(file_name) + # Expected outcome doesn't exists and it was not updated. + self.assertTrue(outcome_updated) + self.assertFalse(file_exists) + self.assertIs(is_equal, None) + # Check golden. + self.assert_equal(str(new_golden), str(actual)) + + +# ############################################################################# +# Test_check_string_debug1 +# ############################################################################# + + +class Test_check_string_debug1(hunitest.TestCase): + def test1(self) -> None: + actual = "hello" + # action_on_missing_golden = "assert" + action_on_missing_golden = "update" + self.check_string( + actual, action_on_missing_golden=action_on_missing_golden + ) + + def test2(self) -> None: + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + # action_on_missing_golden = "assert" + action_on_missing_golden = "update" + self.check_dataframe( + actual, action_on_missing_golden=action_on_missing_golden + ) + + +# ############################################################################# +# Test_get_dir_signature1 +# ############################################################################# + + +class Test_get_dir_signature1(hunitest.TestCase): + def helper(self, include_file_content: bool) -> str: + in_dir = self.get_input_dir() + actual = hunitest.get_dir_signature( + in_dir, include_file_content, num_lines=None + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + return actual # type: ignore[no-any-return] + + def test1(self) -> None: + """ + Test dir signature excluding the file content. + """ + include_file_content = False + actual = self.helper(include_file_content) + # pylint: disable=line-too-long + expected = r""" + # Dir structure + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0 + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.txt + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/run_notebook.0.log + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1 + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.pkl + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.txt + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/run_notebook.1.log + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + Test dir signature including the file content. + """ + include_file_content = True + actual = self.helper(include_file_content) + # The golden outcome is long and uninteresting so we use check_string. + self.check_string(actual, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py new file mode 100644 index 000000000..14910d1f5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py @@ -0,0 +1,288 @@ +import logging +import unittest.mock as umock +from typing import Any + +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def _check(self: Any, str_to_eval: str, exp_val: str) -> None: + """ + Evaluate `str_to_eval` and compare it to expected value `exp_val`. + """ + # The variable lives 3 levels in the stack trace from here. + act_val = hprint.to_str(str_to_eval, frame_level=3) + _LOG.debug("%s", act_val) + self.assert_equal(act_val, exp_val, purify_text=True) + + +# ############################################################################# +# _Class +# ############################################################################# + + +class _Class: + def __init__(self) -> None: + self.a = 3 + self.b = 14 + + def get_a(self) -> int: + return self.a + + def get_b(self) -> int: + return self.b + + +# ############################################################################# +# _TestCase +# ############################################################################# + + +class _TestCase(hunitest.TestCase): + def check(self, *args, **kwargs) -> None: + _check(self, *args, **kwargs) + + +# ############################################################################# +# Test_Mock1 +# ############################################################################# + + +# References +# - https://docs.python.org/3/library/unittest.mock.html +# - https://realpython.com/python-mock-library/ +# +# - Mocks are used to imitate objects in the code base and need to have the same +# interface of objects they are replacing +# - `Mock` and `MagicMock` objects +# - avoid to create stubs by creating attributes and methods as they are +# accessed +# - accessing the same attribute returns the same mock +# - can be configured to specify return values +# - store details of how they have been used +# - After execution, one can make assertions about how mocks have been used + +# umock.Mockspec +# :param spec: specification for the mock object, e.g., using a class to create +# the proper interface + + +# ############################################################################# +# Test_Mock1 +# ############################################################################# + + +class Test_Mock1(_TestCase): + """ + - A `Mock` creates attributes / methods as you access them + - The return value of a mocked attribute / method is also a `Mock` + """ + + def test_lazy_attributes1(self) -> None: + """ + Assigning a class attribute on a Mock creates a Mock. + """ + obj = umock.Mock() + # obj is a Mock object. + self.check("obj", "obj=") + # Calling an attribute creates a Mock. + self.check("obj.a", "obj.a=") + # Assigning an attribute in the mock creates an attribute. + obj.a = 3 + self.check("obj.a", "obj.a=3") + + def test_lazy_methods1(self) -> None: + """ + Calling a method on a Mock creates a Mock. + """ + # Mock json module `import json`. + json = umock.Mock() + self.check("json", "json=") + # Create a function on the fly that returns a mock. + v = json.dumps() + self.assertTrue(isinstance(v, umock.Mock)) + self.check("json.dumps", "json.dumps=") + # The mocked function and the returned value from a mock function are + # different mocks. + self.check("v", "v=") + self.check("type(v)", "type(v)=") + self.check( + "json.dumps()", "json.dumps()=" + ) + self.assertTrue(isinstance(json.dumps, umock.Mock)) + self.assertNotEqual(id(v), id(json.dumps)) + + def test_assert1(self) -> None: + """ + Check what function was called. + """ + json = umock.Mock() + json.loads("hello") + # Check that the mocked function was called as expected. + json.loads.assert_called() + json.loads.assert_called_once() + json.loads.assert_called_with("hello") + self.assertEqual(json.loads.call_count, 1) + + def test_str1(self) -> None: + mock = umock.Mock() + # Calling `str()` on a mock creates a mock on the fly. + self.check("str(mock)", "str(mock)=\"\"") + # Assign a mocked function returning "hello" to mock.__str__. + mock.__str__ = umock.Mock(return_value="hello") + self.assertEqual(str(mock), "hello") + # One can't assign the return value, like one would do with a MagicMock. + # mock.__str__.return_value = "hello" + + def test_spec1(self) -> None: + # Create a Mock based on the class `_Class`. + mock = umock.Mock(spec=_Class) + # + self.assertTrue(isinstance(mock, _Class)) + mock.get_a = umock.Mock(return_value=3) + self.assertEqual(mock.get_a(), 3) + + +# ############################################################################# +# Test_MagicMock1 +# ############################################################################# + + +class Test_MagicMock1(_TestCase): + """ + A `MagicMock` is a subclass of `Mock` with some magic methods already + created. + """ + + def test_get1(self) -> None: + """ + Assign a MagicMock using array notation. + """ + mock = umock.MagicMock() + # MagicMock automatically infer `__get_item__()`. + mock[3] = "fish" + # Check. + mock.__setitem__.assert_called_with(3, "fish") + + def test_get2(self) -> None: + mock = umock.MagicMock() + mock.__getitem__.return_value = "result" + + def test_str1(self) -> None: + """ + Mock `str()` method. + """ + mock = umock.MagicMock() + # Mock `str()`. + mock.__str__.return_value = "foobar" + # Check. + self.assertEqual(str(mock), "foobar") + mock.__str__.assert_called_with() + + +# ############################################################################# +# Test_Mock_Class1 +# ############################################################################# + + +class Test_Mock_Class1(_TestCase): + def test_without_mock1(self) -> None: + obj = _Class() + self.assertEqual(obj.get_a(), 3) + self.assertEqual(obj.get_b(), 14) + + def test_with_mock1(self) -> None: + obj = _Class() + # Mock method `get_a()`. + obj.get_a = umock.MagicMock(return_value=4) + # Check. + self.assertEqual(obj.get_a(), 4) + obj.get_a.assert_called() + + def test_with_mock2(self) -> None: + obj = _Class() + # Mock method `get_a()`. + obj.get_a = umock.MagicMock(side_effect=KeyError("foo")) + # Check. + with self.assertRaises(KeyError) as cm: + obj.get_a() + # + actual = str(cm.exception) + expected = "'foo'" + self.assert_equal(actual, expected) + obj.get_a.assert_called() + + +# ############################################################################# +# Test_Mock_Class_with_decorator1 +# ############################################################################# + +# `umock.patch()` +# - replaces classes in a particular module with a Mock object +# - by default creates a MagicMock + +# `umock.patch.object(target, attribute)` patches the named member "attribute" +# on the object "target" with a mock object. + + +# ############################################################################# +# Test_Mock_Class_with_decorator1 +# ############################################################################# + + +class Test_Mock_Class_with_decorator1(_TestCase): + @umock.patch.object(_Class, "get_a", return_value=4) + def test1(self, mock_method: umock.MagicMock) -> None: + """ + Patch method of an object using a decorator. + """ + obj = _Class() + # Check. + # self.assertIs(mock_method, umock.MagicMock) + self.check( + "mock_method", "mock_method=" + ) + self.assertEqual(obj.get_a(), 4) + mock_method.assert_called() + obj.get_a.assert_called() + + +# ############################################################################# +# Test_Mock_Class_with_context_manager1 +# ############################################################################# + + +class Test_Mock_Class_with_context_manager1(_TestCase): + def test1(self) -> None: + """ + Patch an object method using a context manager. + """ + # Inside the context manager, the method is mocked. + with umock.patch.object(_Class, "get_a", return_value=4): + obj = _Class() + # Check. + self.check( + "obj.get_a", "obj.get_a=" + ) + self.assertEqual(obj.get_a(), 4) + obj.get_a.assert_called() + # Outside the context manager everything is normal. + obj = _Class() + # Check. + self.check( + "obj.get_a", + "obj.get_a=>", + ) + self.assertEqual(obj.get_a(), 3) + + def test_dict1(self) -> None: + """ + Patch a dictionary. + """ + foo = {"key": "value"} + with umock.patch.dict(foo, {"key": "new_value"}, clear=True): + self.assertEqual(foo["key"], "new_value") + # Outside the context manager everything is normal. + self.assertEqual(foo["key"], "value") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py new file mode 100644 index 000000000..6488621a1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py @@ -0,0 +1,1065 @@ +""" +Import as: + +import helpers.test.test_hunit_test_purification as thuntepur +""" + +import datetime +import logging +import os +import unittest.mock as umock +from typing import Any, List + +import pytest + +import helpers.hgit as hgit +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_purify_text1 +# ############################################################################# + + +class Test_purify_text1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str, **kwargs: Any) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(txt) + self.assert_equal(actual, expected, **kwargs) + + def test1(self) -> None: + txt = "amp/helpers/test/test_system_interaction.py" + expected = "helpers/test/test_system_interaction.py" + self.check_helper(txt, expected) + + def test2(self) -> None: + txt = "amp/helpers/test/test_system_interaction.py" + expected = "helpers/test/test_system_interaction.py" + self.check_helper(txt, expected) + + def test3(self) -> None: + txt = "['amp/helpers/test/test_system_interaction.py']" + expected = "['helpers/test/test_system_interaction.py']" + self.check_helper(txt, expected) + + def test4(self) -> None: + txt = "app.helpers.test.test_system_interaction.py" + expected = "helpers.test.test_system_interaction.py" + self.check_helper(txt, expected) + + def test5(self) -> None: + """ + Test that longer paths are processed before shorter ones. + """ + txt = "/home/user/project/src/file.py" + with ( + umock.patch("helpers.hgit.get_client_root") as mock_git_root, + umock.patch("os.getcwd") as mock_pwd, + ): + mock_git_root.return_value = "/home/user/project" + mock_pwd.return_value = "/home/user" + expected = "$GIT_ROOT/src/file.py" + self.check_helper(txt, expected) + + def test6(self) -> None: + """ + Test that paths with multiple occurrences of the same pattern are + processed correctly. + """ + txt = "/home/user/project/src/project/file.py" + with ( + umock.patch("helpers.hgit.get_client_root") as mock_git_root, + umock.patch("os.getcwd") as mock_pwd, + ): + mock_git_root.return_value = "/home/user/project" + mock_pwd.return_value = "/home/user" + expected = "$GIT_ROOT/src/project/file.py" + self.check_helper(txt, expected) + + def test7(self) -> None: + """ + Test that paths with multiple patterns are processed in the correct + order. + """ + txt = "/home/user/project/src/project/file.py" + with ( + umock.patch("helpers.hgit.get_client_root") as mock_git_root, + umock.patch("os.getcwd") as mock_pwd, + ): + mock_git_root.return_value = "/home/user/project" + mock_pwd.return_value = "/home/user/project/src" + expected = "$GIT_ROOT/src/project/file.py" + self.check_helper(txt, expected) + + def test8(self) -> None: + """ + Test that paths with no matching patterns are left unchanged. + """ + txt = "/home/user/other/file.py" + with ( + umock.patch("helpers.hgit.get_client_root") as mock_git_root, + umock.patch("os.getcwd") as mock_pwd, + ): + mock_git_root.return_value = "/home/user/project" + mock_pwd.return_value = "/home/user/project/src" + expected = "/home/user/other/file.py" + self.check_helper(txt, expected) + + def test9(self) -> None: + super_module_path = hgit.get_client_root(super_module=True) + # TODO(gp): We should remove the current path. + # pylint: disable=line-too-long + txt = r""" + ************* Module input [pylint] + $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint] + $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8] + $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8] + cmd line='$SUPER_MODULE/dev_scripts/linter.py -f $SUPER_MODULE/amp/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log' + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint] + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint] + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy] + """ + txt = hprint.dedent(txt) + txt = txt.replace("$SUPER_MODULE", super_module_path) + expected = r""" + ************* Module input [pylint] + $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint] + $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8] + $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8] + cmd line='$GIT_ROOT/dev_scripts/linter.py -f $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log' + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint] + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint] + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy] + """ + # pylint: enable=line-too-long + self.check_helper(txt, expected, dedent=True) + + def test10(self) -> None: + """ + Test case when client root path is equal to `/` + """ + # pylint: disable=redefined-outer-name + hgit = umock.Mock() + hgit.get_client_root.return_value = "/" + txt = "/tmp/subdir1" + expected = txt + self.check_helper(txt, expected) + + def test11(self) -> None: + """ + Test the correct order of `app` -> `amp` purification with multiple + import statements. + """ + txt = """ + import app.amp.helpers_root.helpers.test.test_file + from app.amp.helpers_root.helpers.hprint import dedent + import app.amp.helpers.config + from amp.app.helpers.config import get_config + import amp.app.helpers_root.config + """ + expected = """ + import helpers.test.test_file + from helpers.hprint import dedent + import helpers.config + from helpers.config import get_config + import helpers.config + """ + self.check_helper(txt, expected) + + def test12(self) -> None: + """ + Test amp and app purification in file path strings. + """ + txt = """ + app/amp/helpers_root/helpers/test/test_file.py + amp/app/helpers_root/helpers/test/test_file.py + """ + expected = """ + helpers/test/test_file.py + helpers/test/test_file.py + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_directory_paths1 +# ############################################################################# + + +class Test_purify_directory_paths1(hunitest.TestCase): + def check_helper(self, input_: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_directory_paths(input_) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Test the replacement of `GIT_ROOT`. + """ + with ( + umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ), + umock.patch.dict( + "os.environ", + {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, + clear=True, + ), + umock.patch("os.getcwd", return_value="/home/user"), + ): + input_ = "/home/user/gitroot/src/subdir/file.py" + expected = "$GIT_ROOT/src/subdir/file.py" + self.check_helper(input_, expected) + + def test2(self) -> None: + """ + Test the replacement of `CSFY_HOST_GIT_ROOT_PATH`. + """ + with ( + umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ), + umock.patch.dict( + "os.environ", + {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, + clear=True, + ), + umock.patch("os.getcwd", return_value="/home/user"), + ): + input_ = "/home/user/csfy_host_git_root/other/file.py" + expected = "$CSFY_HOST_GIT_ROOT_PATH/other/file.py" + self.check_helper(input_, expected) + + def test3(self) -> None: + """ + Test the replacement of `PWD`. + """ + with ( + umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ), + umock.patch.dict( + "os.environ", + {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, + clear=True, + ), + umock.patch("os.getcwd", return_value="/home/user"), + ): + input_ = "/home/user/documents/file.py" + expected = "$PWD/documents/file.py" + self.check_helper(input_, expected) + + def test4(self) -> None: + """ + Test the replacement when `GIT_ROOT`, `CSFY_HOST_GIT_ROOT_PATH` and + current working directory are the same. + """ + with ( + umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user" + ), + umock.patch.dict( + "os.environ", + {"CSFY_HOST_GIT_ROOT_PATH": "/home/user"}, + clear=True, + ), + umock.patch("os.getcwd", return_value="/home/user"), + ): + input_ = "/home/user/file.py" + expected = "$GIT_ROOT/file.py" + self.check_helper(input_, expected) + + +# ############################################################################# +# Test_purify_from_environment1 +# ############################################################################# + + +class Test_purify_from_environment1(hunitest.TestCase): + def check_helper(self, input_: str, expected: str) -> None: + try: + # Manually set a user name to test the behaviour. + hsystem.set_user_name("root") + # Run. + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_from_environment(input_) + self.assert_equal(actual, expected, fuzzy_match=True) + finally: + # Reset the global user name variable regardless of a test results. + hsystem.set_user_name(None) + + def test1(self) -> None: + input_ = "IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-root-1.0.0" + expected = "IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0" + self.check_helper(input_, expected) + + def test2(self) -> None: + input_ = "--name root.amp_test.app.app" + expected = "--name $USER_NAME.amp_test.app.app" + self.check_helper(input_, expected) + + def test3(self) -> None: + input_ = "run --rm -l user=root" + expected = "run --rm -l user=$USER_NAME" + self.check_helper(input_, expected) + + def test4(self) -> None: + input_ = "run_docker_as_root='True'" + expected = "run_docker_as_root='True'" + self.check_helper(input_, expected) + + def test5(self) -> None: + input_ = "out_col_groups: [('root_q_mv',), ('root_q_mv_adj',), ('root_q_mv_os',)]" + expected = "out_col_groups: [('root_q_mv',), ('root_q_mv_adj',), ('root_q_mv_os',)]" + self.check_helper(input_, expected) + + +# ############################################################################# +# Test_purify_amp_reference1 +# ############################################################################# + + +class Test_purify_amp_reference1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + txt = hprint.dedent(txt) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_amp_references(txt) + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Remove the reference to `amp.`. + """ + txt = """ + * Failed assertion * + Instance '' + of class '_Man' is not a subclass of '' + """ + expected = r""" + * Failed assertion * + Instance '' + of class '_Man' is not a subclass of '' + """ + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test removing multiple amp references in a single string. + """ + txt = """ + ImportError: No module named 'amp.helpers.test.test_file' + """ + expected = r""" + ImportError: No module named 'helpers.test.test_file' + """ + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test removing amp references in file paths. + """ + txt = """ + File "/home/user/amp/helpers/test/test_dbg.py", line 10 + File "/home/user/amp/helpers/test/test_file.py", line 20 + """ + expected = r""" + File "/home/user/helpers/test/test_dbg.py", line 10 + File "/home/user/helpers/test/test_file.py", line 20 + """ + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test removing amp references in import statements. + """ + txt = """ + from amp.helpers.test import test_dbg + import amp.helpers.test.test_file + from amp.helpers.test.test_dbg import _Man + """ + expected = r""" + from helpers.test import test_dbg + import helpers.test.test_file + from helpers.test.test_dbg import _Man + """ + self.check_helper(txt, expected) + + def test5(self) -> None: + """ + Test removing amp references in docstrings and comments. + """ + txt = """ + # This is a test for amp.helpers.test.test_dbg + """ + expected = r""" + # This is a test for helpers.test.test_dbg + """ + self.check_helper(txt, expected) + + def test6(self) -> None: + """ + Test removing amp references in error messages with multiple + occurrences. + """ + txt = """ + Error in amp.helpers.test.test_dbg: Invalid input + Error in amp.helpers.test.test_file: File not found + Error in amp.helpers.test.test_dbg: Permission denied + """ + expected = r""" + Error in helpers.test.test_dbg: Invalid input + Error in helpers.test.test_file: File not found + Error in helpers.test.test_dbg: Permission denied + """ + self.check_helper(txt, expected) + + def test7(self) -> None: + """ + Test that longer amp paths are processed before shorter ones. + """ + txt = "amp/helpers/amp/test/test_file.py" + expected = "helpers/test/test_file.py" + self.check_helper(txt, expected) + + def test8(self) -> None: + """ + Test that nested amp references are processed correctly. + """ + txt = "amp.helpers.test.amp.TestClass" + expected = "helpers.test.amp.TestClass" + self.check_helper(txt, expected) + + def test9(self) -> None: + """ + Test removing amp references from test creation comments with various + module paths. + """ + txt = """ + # Test created for amp.helpers.test.test_file + # Test created for amp.core.dataflow.model + # Test created for amp.helpers.test.test_dbg._Man + """ + expected = r""" + # Test created for helpers.test.test_file + # Test created for core.dataflow.model + # Test created for helpers.test.test_dbg._Man + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_app_references1 +# ############################################################################# + + +class Test_purify_app_references1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_app_references(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test app.helpers reference removal. + """ + txt = "app.helpers.test.test_file" + expected = "helpers.test.test_file" + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test app.amp.helpers reference removal. + """ + txt = "app.amp.helpers.test.test_file" + expected = "amp.helpers.test.test_file" + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test app.amp.helpers_root.helpers reference removal. + """ + txt = "app.amp.helpers_root.helpers.test.test_file" + expected = "amp.helpers.test.test_file" + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test multiple app references in the same string. + """ + txt = """ + app.helpers.test.test_file + app.amp.helpers.test.test_file + app.amp.helpers_root.helpers.test.test_file + """ + expected = """ + helpers.test.test_file + amp.helpers.test.test_file + amp.helpers.test.test_file + """ + self.check_helper(txt, expected) + + def test5(self) -> None: + """ + Test that longer app paths are processed before shorter ones. + """ + txt = "app/helpers/app/test/test_file.py" + expected = "helpers/test/test_file.py" + self.check_helper(txt, expected) + + def test6(self) -> None: + """ + Test that app.amp.helpers_root references are processed before app.amp. + """ + txt = "app.amp.helpers_root.helpers.test.TestClass" + expected = "amp.helpers.test.TestClass" + self.check_helper(txt, expected) + + def test7(self) -> None: + """ + Test string with no app references. + """ + txt = "path/to/file.txt" + expected = "path/to/file.txt" + self.check_helper(txt, expected) + + def test8(self) -> None: + """ + Test removing app references from test creation comments with various + module paths. + """ + txt = """ + # Test created for app.helpers.test.test_file + # Test created for app.core.dataflow.model + # Test created for app.helpers.test.test_dbg._Man + """ + expected = r""" + # Test created for helpers.test.test_file + # Test created for core.dataflow.model + # Test created for helpers.test.test_dbg._Man + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_from_env_vars +# ############################################################################# + + +# TODO(ShaopengZ): numerical issue. (arm vs x86) +@pytest.mark.requires_ck_infra +class Test_purify_from_env_vars(hunitest.TestCase): + """ + Test purification from env vars. + """ + + def check_helper(self, env_var: str) -> None: + env_var_value = os.environ[env_var] + input_ = f"s3://{env_var_value}/" + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_from_env_vars(input_) + expected = f"s3://${env_var}/" + self.assert_equal(actual, expected, fuzzy_match=True) + + @pytest.mark.skipif( + not hrecouti.get_repo_config().get_name() == "//cmamp", + reason="Run only in //cmamp", + ) + def test1(self) -> None: + """ + - $CSFY_AWS_S3_BUCKET + """ + env_var = "CSFY_AWS_S3_BUCKET" + self.check_helper(env_var) + + +# TODO(gp): HelpersTask1 +# @pytest.mark.skipif( +# not hrecouti.get_repo_config().get_name() == "//cmamp", +# reason="Run only in //cmamp", +# ) +# def test_end_to_end(self) -> None: +# """ +# - Multiple env vars. +# """ +# #am_aws_s3_bucket = os.environ["AM_AWS_S3_BUCKET"] +# csfy_aws_s3_bucket = os.environ["CSFY_AWS_S3_BUCKET"] +# # +# text = f""" +# $AM_AWS_S3_BUCKET = {am_aws_s3_bucket} +# $CSFY_AWS_S3_BUCKET = {csfy_aws_s3_bucket} +# """ +# # +# text_purifier = huntepur.TextPurifier() +# actual = text_purifier.purify_from_env_vars(text) +# self.check_string(actual, fuzzy_match=True) + + +# ############################################################################# +# Test_purify_object_representation1 +# ############################################################################# + + +class Test_purify_object_representation1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + txt = hprint.dedent(txt) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_object_representation(txt) + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + txt = """ + load_prices: {'source_node_name': 'RealTimeDataSource object + at 0x7f571c329b50 + """ + expected = r""" + load_prices: {'source_node_name': 'RealTimeDataSource object + at 0x""" + self.check_helper(txt, expected) + + def test2(self) -> None: + txt = """ + load_prices: {'source_node_name at 0x7f571c329b51': + 'RealTimeDataSource object at 0x7f571c329b50 + """ + expected = r""" + load_prices: {'source_node_name at 0x': + 'RealTimeDataSource object at 0x""" + self.check_helper(txt, expected) + + def test3(self) -> None: + txt = """ + load_prices: {'source_node_name': 'RealTimeDataSource', + 'source_node_kwargs': {'market_data': + , 'period': 'last_5mins', 'asset_id_col': 'asset_id', + 'multiindex_output': True}} process_forecasts: {'prediction_col': 'close', + 'execution_mode': 'real_time', 'process_forecasts_config': + {'market_data': + ,'portfolio ': , 'order_type': 'price@twap', 'ath_start_time': + datetime.time(9, 30), 'trading_start_time': datetime.time(9, 30), + 'ath_end_time': datetime.time(16, 40), 'trading_end_time': + datetime.time(16, 4 0)}} + """ + expected = r""" + load_prices: {'source_node_name': 'RealTimeDataSource', + 'source_node_kwargs': {'market_data': + , 'period': 'last_5mins', 'asset_id_col': 'asset_id', + 'multiindex_output': True}} process_forecasts: {'prediction_col': 'close', + 'execution_mode': 'real_time', 'process_forecasts_config': + {'market_data': + ,'portfolio ': , 'order_type': 'price@twap', 'ath_start_time': + datetime.time(9, 30), 'trading_start_time': datetime.time(9, 30), + 'ath_end_time': datetime.time(16, 40), 'trading_end_time': + datetime.time(16, 4 0)}}""" + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test replacing wall_clock_time=Timestamp('..., tz='America/New_York')) + """ + txt = """ + _knowledge_datetime_col_name='timestamp_db' _delay_in_secs='0' + >, 'bar_duration_in_secs': 300, 'rt_timeout_in_secs_or_time': 900} , + _dst_dir=None , _fit_at_beginning=False , + _wake_up_timestamp=None , _bar_duration_in_secs=300 , + _events=[Event(num_it=1, current_time=Timestamp('2000-01-01 + 10:05:00-0500', tz='America/New_York'), + wall_clock_time=Timestamp('2022-08-04 09:29:13.441715-0400', + tz='America/New_York')), Event(num_it=2, + current_time=Timestamp('2000-01-01 10:10:00-0500', + tz='America/New_York'), wall_clock_time=Timestamp('2022-08-04 + 09:29:13.892793-0400', tz='America/New_York')), Event(num_it=3, + current_time=Timestamp('2000-01-01 10:15:00-0500', + tz='America/New_York'), wall_clock_time=Timestamp('2022-08-04 + 09:29:14.131619-0400', tz='America/New_York'))] ) + """ + expected = """ + _knowledge_datetime_col_name='timestamp_db' _delay_in_secs='0' + >, 'bar_duration_in_secs': 300, 'rt_timeout_in_secs_or_time': 900} , + _dst_dir=None , _fit_at_beginning=False , + _wake_up_timestamp=None , _bar_duration_in_secs=300 , + _events=[Event(num_it=1, current_time=Timestamp('2000-01-01 + 10:05:00-0500', tz='America/New_York'), + wall_clock_time=Timestamp('xxx', tz='America/New_York')), + Event(num_it=2, current_time=Timestamp('2000-01-01 10:10:00-0500', + tz='America/New_York'), wall_clock_time=Timestamp('xxx', + tz='America/New_York')), Event(num_it=3, + current_time=Timestamp('2000-01-01 10:15:00-0500', + tz='America/New_York'), wall_clock_time=Timestamp('xxx', + tz='America/New_York'))] ) + """ + txt = " ".join(hprint.dedent(txt).split("\n")) + expected = " ".join(hprint.dedent(expected).split("\n")) + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_today_date1 +# ############################################################################# + + +class Test_purify_today_date1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_today_date(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test replacing today's date and time with placeholders. + """ + today = datetime.date.today() + today_str = today.strftime("%Y%m%d") + txt = f""" + Report generated on {today_str}_103045. + Next run scheduled at {today_str}_235959. + """ + expected = """ + Report generated on YYYYMMDD_HHMMSS. + Next run scheduled at YYYYMMDD_HHMMSS. + """ + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test replacing today's date only with placeholder. + """ + today = datetime.date.today() + today_str = today.strftime("%Y%m%d") + txt = f""" + Backup completed: {today_str}. + Last modified: {today_str}. + """ + expected = """ + Backup completed: YYYYMMDD. + Last modified: YYYYMMDD. + """ + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test to check that non-date-like numbers are not replaced. + """ + txt = """ + ID: 20000319_123456 + Code: 20000321 + Reference: 20000320_999999 + """ + expected = """ + ID: 20000319_123456 + Code: 20000321 + Reference: 20000320_999999 + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_white_spaces1 +# ############################################################################# + + +class Test_purify_white_spaces1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_white_spaces(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test removing trailing spaces and tabs. + """ + txt = "Line 1 \nLine 2\t\nLine 3 \t \n" + expected = "Line 1\nLine 2\nLine 3\n" + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test removing trailing spaces and preserving empty lines. + """ + txt = "Line 1\n\n\nLine 2\n\n\n\nLine 3 " + expected = "Line 1\n\n\nLine 2\n\n\n\nLine 3" + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test removing trailing whitespace and preserving leading whitespace. + """ + txt = " \n Line 1\nLine 2\n Line 3 \n " + expected = " \n Line 1\nLine 2\n Line 3\n" + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test preserving intentional whitespace within lines. + """ + txt = "Line 1 with spaces\nLine 2\twith\ttabs" + expected = "Line 1 with spaces\nLine 2\twith\ttabs\n" + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_parquet_file_names1 +# ############################################################################# + + +class Test_purify_parquet_file_names1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_parquet_file_names(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test purification of Parquet file names with the path. + + The Parquet file names with the + GUID have to be replaced with the `data.parquet` string. + """ + txt = """ + s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=1/ea5e3faed73941a2901a2128abeac4ca-0.parquet + s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=2/f7a39fefb69b40e0987cec39569df8ed-0.parquet + """ + expected = """ + s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=1/data.parquet + s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=2/data.parquet + """ + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test purification of Parquet file name without the path. + """ + txt = """ + ffa39fffb69b40e0987cec39569df8ed-0.parquet + """ + expected = """ + data.parquet + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_helpers1 +# ############################################################################# + + +class Test_purify_helpers1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_helpers(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test replacing helpers references in import statements. + """ + txt = """ + import helpers_root.helpers.hdbg as hdbg + from helpers_root.helpers.hprint import dedent + import helpers_root.config_root.config as config + """ + expected = """ + import helpers.hdbg as hdbg + from helpers.hprint import dedent + import config_root.config as config + """ + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test replacing helpers references in file paths. + """ + txt = """ + /path/to/helpers/hdbg.py + /path/to/helpers/hprint.py + /path/to/config_root/config.py + """ + expected = """ + /path/to/helpers/hdbg.py + /path/to/helpers/hprint.py + /path/to/config_root/config.py + """ + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test replacing helpers references in docstrings and comments. + """ + txt = """ + import helpers_root.helpers.hdbg + from /path/to/helpers_root/helpers/hprint import dedent + import helpers_root.config_root.config + from /path/to/helpers_root/config_root/config import settings + """ + expected = """ + import helpers.hdbg + from /path/to/helpers/hprint import dedent + import config_root.config + from /path/to/config_root/config import settings + """ + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test that non-matching patterns are not replaced. + """ + txt = """ + import other_module + from other_package import helpers + import helpers_utils + path/to/other/helpers/file.py + """ + expected = """ + import other_module + from other_package import helpers + import helpers_utils + path/to/other/helpers/file.py + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_docker_image_name1 +# ############################################################################# + + +class Test_purify_docker_image_name1(hunitest.TestCase): + def test1(self) -> None: + txt = r""" + docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.edb567be pdflatex -output-directory + """ + expected = r""" + docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.xxxxxxxx pdflatex -output-directory + """ + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_docker_image_name(txt) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + Test patterns like `tmp.latex.aarch64.2f590c86.2f590c86`. + """ + txt = r""" + docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.2f590c86.2f590c86 pdflatex -output-directory + """ + expected = r""" + docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.xxxxxxxx pdflatex -output-directory + """ + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_docker_image_name(txt) + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_purify_line_number1 +# ############################################################################# + + +class Test_purify_line_number1(hunitest.TestCase): + def test1(self) -> None: + """ + Check that the text is purified from line numbers correctly. + """ + txt = """ + dag_config (marked_as_used=False, writer=None, val_type=config_root.config.config_.Config): + in_col_groups (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::286::apply_history_lookback, val_type=list): [('close',), ('volume',)] + out_col_group (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::286::apply_history_lookback, val_type=tuple): () + """ + expected = r""" + dag_config (marked_as_used=False, writer=None, val_type=config_root.config.config_.Config): + in_col_groups (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::$LINE_NUMBER::apply_history_lookback, val_type=list): [('close',), ('volume',)] + out_col_group (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::$LINE_NUMBER::apply_history_lookback, val_type=tuple): () + """ + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_line_number(txt) + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_purify_file_names1 +# ############################################################################# + + +class Test_purify_file_names1(hunitest.TestCase): + def check_helper(self, file_names: List[str], expected: List[str]) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(file_names) + actual = "\n".join(str(path) for path in actual) + expected = "\n".join(str(path) for path in expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test basic file name purification with relative paths. + """ + with umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ): + txt = [ + "/home/user/gitroot/helpers/test/test_file.py", + "/home/user/gitroot/amp/helpers/test/test_dbg.py", + ] + expected = [ + "helpers/test/test_file.py", + "helpers/test/test_dbg.py", + ] + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test file name purification with nested amp references. + """ + with umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ): + txt = [ + "/home/user/gitroot/amp/helpers/amp/test/test_file.py", + "/home/user/gitroot/amp/helpers/test/amp/test_dbg.py", + ] + expected = [ + "helpers/test/test_file.py", + "helpers/test/test_dbg.py", + ] + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test file name purification with app references to ensure that they are + not replaced. + """ + with umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ): + txt = [ + "/home/user/gitroot/app/helpers/test/test_file.py", + "/home/user/gitroot/app/amp/helpers/test/test_dbg.py", + ] + expected = [ + "app/helpers/test/test_file.py", + "app/helpers/test/test_dbg.py", + ] + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test file name purification with empty list. + """ + with umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ): + txt = [] + expected = [] + self.check_helper(txt, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py new file mode 100644 index 000000000..f3873fdfc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py @@ -0,0 +1,347 @@ +import os + +import helpers.hio as hio +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.hunit_test_utils as hunteuti + + +# ############################################################################# +# TestUnitTestRenamer +# ############################################################################# + + +class TestUnitTestRenamer(hunitest.TestCase): + """ + Test class renaming functionality. + """ + + +# ############################################################################# +# TestCases +# ############################################################################# + + + @staticmethod + def helper() -> str: + """ + Create file content. + """ + content = """ +class TestCases(hunitest.TestCase): + def test_assert_equal1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + self.check_string(actual) + """ + return content + + +# ############################################################################# +# TestNewCase +# ############################################################################# + + + def test_rename_class1(self) -> None: + """ + Test renaming of existing class. + """ + content = self.helper() + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer("TestCases", "TestNewCase", root_dir) + actual, _ = renamer._rename_class(content) + expected = """ +class TestNewCase(hunitest.TestCase): + def test_assert_equal1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + self.check_string(actual) + """ + self.assert_equal(actual, expected) + + def test_rename_class2(self) -> None: + """ + Test renaming of non existing class. + """ + content = self.helper() + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer("TestCase", "TestNewCase", root_dir) + actual, _ = renamer._rename_class(content) + # Check if the content of the file was not changed. + self.assert_equal(actual, content) + + +# ############################################################################# +# TestPytestRenameMethod +# ############################################################################# + + +class TestPytestRenameMethod(hunitest.TestCase): + """ + Test method renaming functionality. + """ + + +# ############################################################################# +# TestCases +# ############################################################################# + + + @staticmethod + def helper() -> str: + """ + Create file content. + """ + content = """ +class TestCases(hunitest.TestCase): + def test1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test10(self) -> None: + actual = "hello world" + self.check_string(actual) + + +# ############################################################################# +# TestOtherCases +# ############################################################################# + + +class TestOtherCases(hunitest.TestCase): + def test1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test10(self) -> None: + actual = "hello world" + self.check_string(actual) + """ + return content + + +# ############################################################################# +# TestCases +# ############################################################################# + + + def test_rename_method1(self) -> None: + """ + Test renaming of existing method. + """ + content = self.helper() + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer( + "TestCases.test1", "TestCases.test_new", root_dir + ) + actual, _ = renamer._rename_method(content) + expected = """ +class TestCases(hunitest.TestCase): + def test_new(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test10(self) -> None: + actual = "hello world" + self.check_string(actual) + + +# ############################################################################# +# TestOtherCases +# ############################################################################# + + +class TestOtherCases(hunitest.TestCase): + def test1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test10(self) -> None: + actual = "hello world" + self.check_string(actual) + """ + self.assert_equal(actual, expected) + + def test_rename_method2(self) -> None: + """ + Test renaming of non existing method. + """ + content = self.helper() + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer( + "TestOtherCases.test5", "TestOtherCases.test6", root_dir + ) + actual, _ = renamer._rename_method(content) + # Check if the content of the file was not changed. + self.assert_equal(actual, content) + + def test_rename_method3(self) -> None: + """ + Test renaming of invalid method names. + """ + self.helper() + root_dir = os.getcwd() + with self.assertRaises(AssertionError): + hunteuti.UnitTestRenamer( + "TestCases.test10", "TestOtherCases.test6", root_dir + ) + + +# ############################################################################# +# TestPytestRenameOutcomes +# ############################################################################# + + +class TestPytestRenameOutcomes(hunitest.TestCase): + """ + Test golden outcomes directory renaming. + """ + + @staticmethod + def helper(toy_test: str) -> None: + """ + Create the temporary outcome to rename. + + :param toy_test: the name of the toy directory + """ + outcomes_paths = [ + "TestCase.test_check_string1", + "TestCase.test_rename", + "TestCase.test_rename3", + "TestCases.test_rename2", + "TestRename.test_rename1", + ] + for path in outcomes_paths: + outcomes_dir = os.path.join(toy_test, "test/outcomes", path) + hio.create_dir(outcomes_dir, incremental=False) + hio.to_file(f"{outcomes_dir}/test.txt", "Test files.") + cmd = f"git add {toy_test}/" + hsystem.system(cmd, abort_on_error=False, suppress_output=False) + + def _clean_up(self, toy_test: str) -> None: + """ + Remove temporary test directory. + + :param toy_test: the name of the toy directory + """ + cmd = f"git reset {toy_test}/ && rm -rf {toy_test}/" + hsystem.system(cmd, abort_on_error=False, suppress_output=False) + + def test_rename_class_outcomes(self) -> None: + """ + Rename outcome directory. + """ + toy_test = "toyCmTask1279." + self._testMethodName + # Create outcomes directory. + test_path = os.path.join(toy_test, "test") + # Create the toy outcomes. + self.helper(toy_test) + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer( + "TestCase", "TestRenamedCase", root_dir + ) + renamer.rename_outcomes( + test_path, + ) + # Check if the dirs were renamed. + outcomes_path = os.path.join(test_path, "outcomes") + outcomes_dirs = os.listdir(outcomes_path) + actual = sorted( + [ + ent + for ent in outcomes_dirs + if os.path.isdir(os.path.join(outcomes_path, ent)) + ] + ) + expected = [ + "TestCases.test_rename2", + "TestRename.test_rename1", + "TestRenamedCase.test_check_string1", + "TestRenamedCase.test_rename", + "TestRenamedCase.test_rename3", + ] + self.assertEqual(actual, expected) + self._clean_up(toy_test) + + def test_rename_method_outcomes(self) -> None: + """ + Rename outcome directory. + """ + toy_test = "toyCmTask1279." + self._testMethodName + # Create outcomes directory. + test_path = os.path.join(toy_test, "test") + # Create the toy outcomes. + self.helper(toy_test) + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer( + "TestCase.test_rename", + "TestCase.test_method_renamed", + root_dir, + ) + renamer.rename_outcomes( + test_path, + ) + # Check if the dirs were renamed. + outcomes_path = os.path.join(test_path, "outcomes") + outcomes_dirs = os.listdir(outcomes_path) + actual = sorted( + [ + ent + for ent in outcomes_dirs + if os.path.isdir(os.path.join(outcomes_path, ent)) + ] + ) + expected = [ + "TestCase.test_check_string1", + "TestCase.test_method_renamed", + "TestCase.test_rename3", + "TestCases.test_rename2", + "TestRename.test_rename1", + ] + self.assertEqual(actual, expected) + self._clean_up(toy_test) + + +# ############################################################################# +# Test_get_test_file_for_source +# ############################################################################# + + +class Test_get_test_file_for_source(hunitest.TestCase): + """ + Test mapping source files to test files. + """ + + def test1(self) -> None: + """ + Source file with existing test file returns the test path. + """ + actual = hunteuti.get_test_file_for_source("helpers/hdbg.py") + expected = "helpers/test/test_hdbg.py" + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + Source file without test file returns None. + """ + actual = hunteuti.get_test_file_for_source("tasks.py") + self.assertIsNone(actual) + + def test3(self) -> None: + """ + Test file as input returns None. + """ + actual = hunteuti.get_test_file_for_source("helpers/test/test_hdbg.py") + self.assertIsNone(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py new file mode 100644 index 000000000..79aa3ab80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py @@ -0,0 +1,74 @@ +import logging + +import helpers.hunit_test as hunitest +import helpers.hversion as hversio + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestVersioning1 +# ############################################################################# + + +class TestVersioning1(hunitest.TestCase): + def test_get_changelog_version1(self) -> None: + """ + Test `cmamp` version. + """ + container_dir_name = "." + code_version = hversio.get_changelog_version(container_dir_name) + _LOG.debug("code_version=%s", code_version) + + def test_get_container_version1(self) -> None: + container_version = hversio.get_container_version() + _LOG.debug("container_version=%s", container_version) + + def test_check_version1(self) -> None: + container_dir_name = "." + hversio.check_version(container_dir_name) + + def test__check_version1(self) -> None: + code_version = "1.0.0" + container_version = "1.0.2" + is_ok = hversio._check_version(code_version, container_version) + self.assertFalse(is_ok) + + def test__check_version2(self) -> None: + code_version = "1.0.0" + container_version = "1.0.0" + is_ok = hversio._check_version(code_version, container_version) + self.assertTrue(is_ok) + + def test__check_version3(self) -> None: + code_version = "1.0.0" + container_version = "amp-1.0.0" + is_ok = hversio._check_version(code_version, container_version) + self.assertTrue(is_ok) + + def test_bump_version1(self) -> None: + """ + Test major version bump. + """ + version = "2.2.0" + result = hversio.bump_version(version, bump_type="major") + expected = "3.0.0" + self.assertEqual(result, expected) + + def test_bump_version2(self) -> None: + """ + Test minor version bump. + """ + version = "2.2.0" + result = hversio.bump_version(version, bump_type="minor") + expected = "2.3.0" + self.assertEqual(result, expected) + + def test_bump_version3(self) -> None: + """ + Test patch version bump. + """ + version = "2.2.0" + result = hversio.bump_version(version, bump_type="patch") + expected = "2.2.1" + self.assertEqual(result, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py new file mode 100644 index 000000000..987b30476 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py @@ -0,0 +1,569 @@ +import logging +import os +import time +from typing import Any, List, Optional, Union + +import pytest + +import helpers.hjoblib as hjoblib +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# ############################################################################# + + +def workload_function( + val1: int, + val2: str, + # + **kwargs: Any, +) -> str: + """ + Execute the test workload. + """ + _LOG.info("Starting workload %s", val1) + incremental = kwargs.pop("incremental") + num_attempts = kwargs.pop("num_attempts") + _ = val1, val2, incremental, num_attempts + res: str = hprint.to_str("val1 val2 incremental num_attempts kwargs") + _LOG.debug("res=%s", res) + sleep = 0.01 + # sleep = 2 + time.sleep(sleep) + _LOG.info("Ending workload %s", val1) + if val1 == -1: + raise ValueError(f"Error: {res}") + return res + + +# ############################################################################# +# Test_parallel_execute1 +# ############################################################################# + + +def get_workload1( + randomize: bool, *, seed: Optional[int] = None +) -> hjoblib.Workload: + """ + Return a workload for `workload_function()` with 5 tasks that succeeds. + """ + tasks = [] + for i in range(5): + # val1, val2 + task = ((i, 2 * i), {f"hello{i}": f"world{2 * i}", "good": "bye"}) + tasks.append(task) + workload: hjoblib.Workload = (workload_function, "workload_function", tasks) + if randomize: + # Randomize workload. + workload = hjoblib.randomize_workload(workload, seed=seed) + return workload + + +# ############################################################################# + + +def _outcome_to_string(outcome: List[str]) -> str: + outcome = "\n".join(sorted(map(str, outcome))) + return outcome + + +def _helper_success( + self_: Any, + workload: hjoblib.Workload, + num_threads: Union[str, int], + abort_on_error: bool, + expected_return: str, + backend: str, +) -> None: + """ + Run a workload that is supposed to succeed and check its result. + """ + dry_run = False + incremental = True + num_attempts = 1 + log_file = os.path.join(self_.get_scratch_space(), "log.txt") + # + res = hjoblib.parallel_execute( + workload, + dry_run, + num_threads, + incremental, + abort_on_error, + num_attempts, + log_file, + backend=backend, + ) + # Check. + _LOG.debug("res=%s", str(res)) + actual = _outcome_to_string(res) + self_.assert_equal(actual, expected_return) + + +# ############################################################################# +# Test_parallel_execute1 +# ############################################################################# + + +class Test_parallel_execute1(hunitest.TestCase): + """ + Execute a workload of 5 tasks that all succeed. + """ + + # pylint: disable=line-too-long + EXPECTED_RETURN = r"""val1=0, val2=0, incremental=True, num_attempts=1, kwargs={'hello0': 'world0', 'good': 'bye'} +val1=1, val2=2, incremental=True, num_attempts=1, kwargs={'hello1': 'world2', 'good': 'bye'} +val1=2, val2=4, incremental=True, num_attempts=1, kwargs={'hello2': 'world4', 'good': 'bye'} +val1=3, val2=6, incremental=True, num_attempts=1, kwargs={'hello3': 'world6', 'good': 'bye'} +val1=4, val2=8, incremental=True, num_attempts=1, kwargs={'hello4': 'world8', 'good': 'bye'}""" + + def test_dry_run1(self) -> None: + """ + Dry-run a workload. + """ + workload = get_workload1(randomize=True) + dry_run = True + num_threads = "serial" + incremental = True + num_attempts = 1 + abort_on_error = True + log_file = os.path.join(self.get_scratch_space(), "log.txt") + res = hjoblib.parallel_execute( + workload, + dry_run, + num_threads, + incremental, + abort_on_error, + num_attempts, + log_file, + ) + _LOG.debug("res=%s", str(res)) + self.assertIs(res, None) + + def _run_test(self, num_threads: Union[str, int], backend: str) -> None: + workload = get_workload1(randomize=True) + abort_on_error = True + # + expected_return = self.EXPECTED_RETURN + _helper_success( + self, + workload, + num_threads, + abort_on_error, + expected_return, + backend, + ) + + # pylint: enable=line-too-long + + def test_serial1(self) -> None: + num_threads = "serial" + backend = "" + self._run_test(num_threads, backend) + + def test_parallel_loky1(self) -> None: + num_threads = "1" + backend = "loky" + self._run_test(num_threads, backend) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~6 seconds, see CmTask4951.") + def test_parallel_loky2(self) -> None: + num_threads = "3" + backend = "loky" + self._run_test(num_threads, backend) + + def test_parallel_asyncio_threading1(self) -> None: + num_threads = "1" + backend = "asyncio_threading" + self._run_test(num_threads, backend) + + def test_parallel_asyncio_threading2(self) -> None: + num_threads = "3" + backend = "asyncio_threading" + self._run_test(num_threads, backend) + + +# ############################################################################# +# Test_parallel_execute2 +# ############################################################################# + + +def get_workload2() -> hjoblib.Workload: + """ + Return a workload for `workload_function()` with 1 task that fails. + """ + task = ((-1, 7), {"hello2": "world2", "good2": "bye2"}) + tasks = [task] + workload: hjoblib.Workload = (workload_function, "workload_function", tasks) + return workload + + +def _helper_fail( + self_: Any, + workload: hjoblib.Workload, + num_threads: Union[str, int], + abort_on_error: bool, + expected_assertion: str, + backend: str, +) -> None: + dry_run = False + incremental = True + num_attempts = 1 + log_file = os.path.join(self_.get_scratch_space(), "log.txt") + # + with self_.assertRaises(ValueError) as cm: + res = hjoblib.parallel_execute( + workload, + dry_run, + num_threads, + incremental, + abort_on_error, + num_attempts, + log_file, + backend=backend, + ) + # Print result if it succeeds. + _LOG.debug("res=%s", str(res)) + # Check. + actual = str(cm.exception) + self_.assert_equal(actual, expected_assertion) + + +# # To observe the output in real-time. +# if __name__ == "__main__": +# hdbg.init_logger(verbosity=logging.INFO) +# workload = get_workload1(randomize=True) +# # num_threads = "serial" +# num_threads = "1" +# # num_threads = "5" +# # backend = "loky" +# backend = "asyncio_threading" +# # backend = "asyncio_multiprocessing" +# abort_on_error = True +# # +# dry_run = False +# incremental = True +# num_attempts = 1 +# log_file = "./log.txt" +# # +# _LOG.info("\n" + hprint.frame("Start workload")) +# with htimer.TimedScope(logging.INFO, "Execute workload"): +# res = hjoblib.parallel_execute( +# workload, +# dry_run, +# num_threads, +# incremental, +# abort_on_error, +# num_attempts, +# log_file, +# backend=backend, +# ) +# _LOG.info("\n" + hprint.frame("Results")) +# import pprint +# +# print(pprint.pformat(res)) + + +# ############################################################################# +# Test_parallel_execute2 +# ############################################################################# + + +class Test_parallel_execute2(hunitest.TestCase): + """ + Execute a workload of 1 task that fails. + """ + + # pylint: disable=line-too-long + EXPECTED_STRING = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'}""" + + # pylint: enable=line-too-long + + def _run_test( + self, + abort_on_error: bool, + num_threads: Union[str, int], + backend: str, + should_succeed: bool, + ) -> None: + workload = get_workload2() + # + expected_return = self.EXPECTED_STRING + if should_succeed: + _helper_success( + self, + workload, + num_threads, + abort_on_error, + expected_return, + backend, + ) + else: + _helper_fail( + self, + workload, + num_threads, + abort_on_error, + expected_return, + backend, + ) + + def test_serial1(self) -> None: + num_threads = "serial" + abort_on_error = True + backend = "" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_serial2(self) -> None: + num_threads = "serial" + abort_on_error = False + backend = "" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~7 seconds.") + def test_parallel_loky1(self) -> None: + num_threads = 2 + abort_on_error = True + backend = "loky" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~7 seconds.") + def test_parallel_loky2(self) -> None: + num_threads = 2 + abort_on_error = False + backend = "loky" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading1(self) -> None: + num_threads = 2 + abort_on_error = True + backend = "asyncio_threading" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading2(self) -> None: + num_threads = 2 + abort_on_error = False + backend = "asyncio_threading" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + +# ############################################################################# +# Test_parallel_execute3 +# ############################################################################# + + +def get_workload3( + randomize: bool, seed: Optional[int] = None +) -> hjoblib.Workload: + """ + Return a workload for `workload_function()` with 5 tasks succeeding and one + task failing. + """ + workload: hjoblib.Workload = get_workload1(randomize=True) + # Modify the workflow in place. + (workload_func, func_name, tasks) = workload + _ = workload_func, func_name + task = ((-1, 7), {"hello2": "world2", "good2": "bye2"}) + tasks.append(task) + if randomize: + # Randomize workload. + workload = hjoblib.randomize_workload(workload, seed=seed) + return workload + + +# ############################################################################# +# Test_parallel_execute3 +# ############################################################################# + + +class Test_parallel_execute3(hunitest.TestCase): + """ + Execute a workload with 5 tasks that succeed and 1 task that fails. + """ + + # pylint: disable=line-too-long + EXPECTED_STRING1 = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'}""" + + EXPECTED_STRING2 = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'} +val1=0, val2=0, incremental=True, num_attempts=1, kwargs={'hello0': 'world0', 'good': 'bye'} +val1=1, val2=2, incremental=True, num_attempts=1, kwargs={'hello1': 'world2', 'good': 'bye'} +val1=2, val2=4, incremental=True, num_attempts=1, kwargs={'hello2': 'world4', 'good': 'bye'} +val1=3, val2=6, incremental=True, num_attempts=1, kwargs={'hello3': 'world6', 'good': 'bye'} +val1=4, val2=8, incremental=True, num_attempts=1, kwargs={'hello4': 'world8', 'good': 'bye'}""" + + # pylint: enable=line-too-long + + def _run_test( + self, + abort_on_error: bool, + num_threads: Union[str, int], + backend: str, + should_succeed: bool, + ) -> None: + workload = get_workload3(randomize=False) + # Since there is an error and `abort_on_error=True` we only get information + # about the failed task. + if should_succeed: + expected_return = self.EXPECTED_STRING2 + _helper_success( + self, + workload, + num_threads, + abort_on_error, + expected_return, + backend, + ) + else: + # Since there is an error and `abort_on_error=True` we only get information + # about the failed task. + expected_exception = self.EXPECTED_STRING1 + _helper_fail( + self, + workload, + num_threads, + abort_on_error, + expected_exception, + backend, + ) + + def test_serial1(self) -> None: + num_threads = "serial" + abort_on_error = True + backend = "" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_serial2(self) -> None: + """ + Execute: + - a workload with 5 tasks that succeed and 1 task that fails + - serially + - don't abort because abort_on_error=False + """ + num_threads = "serial" + abort_on_error = False + backend = "" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_loky1(self) -> None: + num_threads = "1" + abort_on_error = True + backend = "loky" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~7 seconds.") + def test_parallel_loky2(self) -> None: + num_threads = "3" + abort_on_error = True + backend = "loky" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_loky3(self) -> None: + num_threads = "1" + abort_on_error = False + backend = "loky" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + @pytest.mark.slow("~5 seconds.") + def test_parallel_loky4(self) -> None: + num_threads = "3" + abort_on_error = False + backend = "loky" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading1(self) -> None: + num_threads = "1" + abort_on_error = True + backend = "asyncio_threading" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading2(self) -> None: + num_threads = "3" + abort_on_error = True + backend = "asyncio_threading" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading3(self) -> None: + num_threads = "1" + abort_on_error = False + backend = "asyncio_threading" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading4(self) -> None: + num_threads = "3" + abort_on_error = False + backend = "asyncio_threading" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + +# ############################################################################# + + +# ############################################################################# +# Test_joblib_example1 +# ############################################################################# + + +@pytest.mark.skip(reason="Just for experimenting with joblib") +class Test_joblib_example1(hunitest.TestCase): + @staticmethod + def func(val: int) -> int: + print(f"val={val}") + if val == -1: + raise ValueError(f"val={val}") + print(f" out={val}") + return val + + def test1(self) -> None: + """ + Show that when a job fails the entire `joblib.Parallel` fails without + returning anything, but just propagating the exception. + """ + # num_threads = 5 + num_threads = 1 + vals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + # vals[1] = -1 + vals[5] = -1 + import joblib + + backend = "loky" + res = joblib.Parallel(n_jobs=num_threads, backend=backend, verbose=200)( + joblib.delayed(Test_joblib_example1.func)(val) for val in vals + ) + print(f"res={str(res)}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py new file mode 100644 index 000000000..12f04c506 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py @@ -0,0 +1,540 @@ +# This should only test helper functions from `lib_tasks.py`. +# `test_tasks.py` associated to `tasks.py` should test specific task targets. + +import logging +import os +import re +import unittest.mock as umock +from typing import Dict, Generator + +import invoke +import pytest + +import helpers.hgit as hgit +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.lib_tasks as hlibtask +import helpers.lib_tasks_gh as hlitagh +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + + +def _get_default_params() -> Dict[str, str]: + """ + Get fake params pointing to a different image so we can test the code + without affecting the official images. + """ + ecr_base_path = os.environ["CSFY_ECR_BASE_PATH"] + default_params = { + "CSFY_ECR_BASE_PATH": ecr_base_path, + "BASE_IMAGE": "amp_test", + "HELPERS_IMAGE_PROD": f"{ecr_base_path}/helpers:prod", + } + return default_params + + +# ############################################################################# +# _LibTasksTestCase +# ############################################################################# + + +class _LibTasksTestCase(hunitest.TestCase): + """ + Test class injecting default parameters in the `lib_tasks` singleton in + `set_up_test()` and cleaning up the singleton in `tear_down_test()`. + """ + + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test(self) -> None: + params = _get_default_params() + hlitauti.set_default_params(params) + + def tear_down_test(self) -> None: + hlitauti.reset_default_params() + + +# ############################################################################# + + +# TODO(gp): Make it public. +def _build_mock_context_returning_ok() -> invoke.MockContext: + """ + Build a MockContext catching any command and returning rc=0. + """ + ctx = invoke.MockContext( + repeat=True, run={re.compile(".*"): invoke.Result(exited=0)} + ) + return ctx + + +# ############################################################################# +# _CheckDryRunTestCase +# ############################################################################# + + +class _CheckDryRunTestCase(hunitest.TestCase): + """ + Test class running an invoke target with/without dry-run and checking that + the issued commands are what is expected. + """ + + def _check_calls(self, ctx: invoke.MockContext) -> None: + """ + `check_string()` the sequence of commands issued in the context. + """ + actual = "\n".join(map(str, ctx.run.mock_calls)) + actual = hprint.remove_non_printable_chars(actual) + self.check_string(actual) + + def _check_output(self, target: str, check: bool = True) -> None: + """ + Dry run target checking that the sequence of commands issued is the + expected one. + """ + ctx = _build_mock_context_returning_ok() + # pylint: disable=exec-used + exec(f"hlibtask.{target}") + # pylint: enable=exec-used + # Check the outcome. + if check: + self._check_calls(ctx) + + +# TODO(gp): We should group the tests by what is tested and not how it's +# tested. E.g. TestDryRunTasks1::test_print_setup and +# TestDryRunTasks2::test_print_setup should go together in a class. + + +# ############################################################################# +# TestDryRunTasks1 +# ############################################################################# + + +class TestDryRunTasks1(hunitest.TestCase): + """ + - Run invoke in dry-run mode from command line + - Compare the output to the golden outcomes + """ + + # TODO(gp): -> TestGitCommands1 + + def dry_run( + self, target: str, dry_run: bool = True, check_string: bool = True + ) -> None: + """ + Invoke the given target with dry run. + + This is used to test the commands that we can't actually + execute. + """ + opts = "--dry" if dry_run else "" + # + # TODO(vitalii): While deploying the container versioning + # we disable the check in the unit tests. Remove `SKIP_VERSION_CHECK=1` + # after CmampTask570 is fixed. + cmd = f"SKIP_VERSION_CHECK=1 invoke {opts} {target} | grep -v INFO | grep -v '>>ENV<<:'" + _, actual = hsystem.system_to_string(cmd) + # + actual = hprint.remove_non_printable_chars(actual) + # docker_ps: sudo=False + regex = r"# \S+:" + actual = hunitest.filter_text(regex, actual) + # + regex = r"(WARN|INFO)\s+hcache.py" + actual = hunitest.filter_text(regex, actual) + # Filter out `no module` warnings. + # TODO(Grisha): add the "no module warning" filtering + # to `purify_text()` in `check_string()`. + regex = "WARN.*No module" + actual = hunitest.filter_text(regex, actual) + if check_string: + self.check_string(actual) + + # ######################################################################### + + # TODO(gp): We can't test this since amp and cmamp have now different base image. + # def test_print_setup(self) -> None: + # target = "print_setup" + # self.dry_run(target) + + # The problem is that we use system and not ctx to execute the command, so that + # --dry-run doesn't work. + @pytest.mark.skip(reason="This is actually run") + def test_git_pull(self) -> None: + target = "git_pull" + self.dry_run(target) + + @pytest.mark.skip(reason="This is actually run") + def test_git_fetch_master(self) -> None: + target = "git_fetch_master" + self.dry_run(target) + + @pytest.mark.skip(reason="This is actually run deleting files") + def test_git_clean(self) -> None: + target = "git_clean" + self.dry_run(target) + + # ######################################################################### + # TODO(gp): -> TestDockerCommands1 + + @pytest.mark.slow("~6 sec.") + @pytest.mark.skipif( + hserver.is_inside_ci(), reason="In CI the output is different" + ) + def test_docker_images_ls_repo(self) -> None: + target = "docker_images_ls_repo" + # TODO(gp): amp and cmamp have different version of aws cli and so the + # output is different. + check_string = False + self.dry_run(target, check_string=check_string) + + @pytest.mark.slow("~6 sec.") + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. Different golden outcomes in helpers and other repos.", + ) + def test_docker_ps(self) -> None: + target = "docker_ps" + self.dry_run(target) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_stats(self) -> None: + target = "docker_stats" + self.dry_run(target) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_kill_last(self) -> None: + target = "docker_kill" + self.dry_run(target) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_kill_all(self) -> None: + target = "docker_kill --all" + self.dry_run(target) + + +# ############################################################################# + + +# ############################################################################# +# TestDryRunTasks2 +# ############################################################################# + + +# Outside CK infra, the class hangs, so we skip it. +@pytest.mark.requires_ck_infra +@pytest.mark.slow(reason="Around 7s") +@pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", +) +class TestDryRunTasks2(_LibTasksTestCase, _CheckDryRunTestCase): + """ + - Call the invoke task directly from Python + - `check_string()` that the sequence of commands issued by the target is the + expected one using mocks to return ok for every system call. + """ + + def test_print_setup(self) -> None: + target = "print_setup(ctx)" + self._check_output(target) + + def test_git_pull(self) -> None: + target = "git_pull(ctx)" + self._check_output(target) + + def test_git_fetch_master(self) -> None: + target = "git_fetch_master(ctx)" + self._check_output(target) + + def test_git_clean(self) -> None: + target = "git_clean(ctx)" + self._check_output(target) + + # TODO(Grisha): is not it the same as `test_git_clean()`? + def test_git_clean2(self) -> None: + target = "git_clean(ctx, dry_run=False)" + self._check_output(target) + + # ######################################################################### + + def test_docker_images_ls_repo(self) -> None: + target = "docker_images_ls_repo(ctx)" + self._check_output(target, check=False) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_kill_all(self) -> None: + target = "docker_kill(ctx, all=True)" + self._check_output(target) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_kill_last(self) -> None: + target = "docker_kill(ctx)" + self._check_output(target) + + def test_docker_ps(self) -> None: + target = "docker_ps(ctx)" + self._check_output(target) + + def test_docker_pull(self) -> None: + target = "docker_pull(ctx)" + self._check_output(target, check=False) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_stats(self) -> None: + target = "docker_stats(ctx)" + self._check_output(target) + + # ######################################################################### + # TODO(gp): -> TestGhCommands1 + + # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_gh_create_pr1(self) -> None: + with ( + umock.patch.object( + hgit, "get_branch_name", return_value="AmpTask1_test_branch" + ), + umock.patch.object( + hlitagh, + "_get_repo_full_name_from_cmd", + return_value=("github.com/alphamatic/amp", "amp"), + ), + ): + target = "gh_create_pr(ctx, title='test')" + self._check_output(target) + + # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_gh_create_pr2(self) -> None: + with ( + umock.patch.object( + hgit, "get_branch_name", return_value="AmpTask1_test_branch" + ), + umock.patch.object( + hlitagh, + "_get_repo_full_name_from_cmd", + return_value=("github.com/alphamatic/amp", "amp"), + ), + ): + target = "gh_create_pr(ctx, body='hello_world', title='test')" + self._check_output(target) + + # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_gh_create_pr3(self) -> None: + with ( + umock.patch.object( + hgit, "get_branch_name", return_value="AmpTask1_test_branch" + ), + umock.patch.object( + hlitagh, + "_get_repo_full_name_from_cmd", + return_value=("github.com/alphamatic/amp", "amp"), + ), + ): + target = "gh_create_pr(ctx, draft=False, title='test')" + self._check_output(target) + + # TODO(*): Remove skip after migration to `csfy`.` + @pytest.mark.skip( + reason="migration to new repo " + "ref: https://github.com/causify-ai/cmamp/issues/13063" + ) + def test_gh_issue_title(self) -> None: + target = "gh_issue_title(ctx, 1)" + self._check_output(target) + + # TODO(Shaopengz): Outside CK infra, the test hangs, so skip. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") + def test_gh_workflow_list(self) -> None: + target = "gh_workflow_list(ctx, filter_by_branch='master')" + self._check_output(target) + + # This is an action with side effects so we can't test it. + # def test_gh_workflow_run(self) -> None: + # target = "gh_workflow_run(ctx)" + # self._check_output(target) + + # ######################################################################### + # TODO(gp): -> TestGitCommands1 + def test_git_branch_files(self) -> None: + # This test needs a reference to Git master branch. + hgit.fetch_origin_master_if_needed() + # + target = "git_branch_files(ctx)" + self._check_output(target) + + @pytest.mark.skip( + reason="HelpersTask638: Skip Failing test to merge the PR in cmamp" + ) + def test_git_branch_create1(self) -> None: + target = ( + "git_branch_create(ctx, branch_name='AmpTask123_test', " + "only_branch_from_master=False)" + ) + self._check_output(target) + + # TODO(*): Remove skip after migration to `csfy`.` + @pytest.mark.skip( + reason="migration to new repo " + "ref: https://github.com/causify-ai/cmamp/issues/13063" + ) + def test_git_branch_create2(self) -> None: + # Difference between `cmamp` and `kaizenflow`. + target = ( + "git_branch_create(ctx, issue_id=1, only_branch_from_master=False)" + ) + self._check_output(target) + + def test_git_branch_create3(self) -> None: + with self.assertRaises(AssertionError): + target = ( + "git_branch_create(ctx, branch_name='test', issue_id=1, " + "only_branch_from_master=False)" + ) + self._check_output(target, check=False) + + # This is an action with side effects so we can't test it. + # def test_git_branch_delete_merged(self) -> None: + # target = "git_branch_delete_merged(ctx)" + # self._check_output(target) + + def test_git_merge_master(self) -> None: + target = "git_merge_master(ctx, abort_if_not_clean=False)" + self._check_output(target) + + # ######################################################################### + # TODO(gp): -> TestLintCommands1 + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_lint1(self) -> None: + target = "lint(ctx, modified=True)" + # The output depends on the client, so don't check it. + self._check_output(target, check=False) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_lint2(self) -> None: + target = "lint(ctx, branch=True)" + # The output depends on the client, so don't check it. + self._check_output(target, check=False) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_lint3(self) -> None: + file = __file__ + target = f"lint(ctx, files='{file}')" + # The output depends on the client, so don't check it. + self._check_output(target, check=False) + + def test_find_test_class1(self) -> None: + class_name = self.__class__.__name__ + target = f"find_test_class(ctx, class_name='{class_name}')" + self._check_output(target) + + # ######################################################################### + + @pytest.mark.skipif( + hserver.is_inside_ci(), reason="In CI the output is different" + ) + def test_docker_login(self) -> None: + """ + Instead of using _build_mock_context_returning_ok(), set the return + values more explicitly. + """ + stdout = "aws-cli/1.19.49 Python/3.7.6 Darwin/19.6.0 botocore/1.20.49\n" + ctx = invoke.MockContext( + run={ + "aws --version": invoke.Result(stdout), + re.compile("^docker login"): invoke.Result(exited=0), + re.compile("^eval"): invoke.Result(exited=0), + } + ) + hlibtask.docker_login(ctx) + # Check the outcome. + # self._check_calls(ctx) + + +# ############################################################################# + +# TODO(gp): Run test coverage with +# > i run_fast_slow_tests \ +# --pytest-opts="helpers/test/test_lib_tasks.py test/test_tasks.py" \ +# --coverage + +# TODO(gp): Add tests for: +# - print_tasks +# - git_files +# - git_last_commit_files +# - check_python_files +# - docker_stats +# - traceback (with checked in file) +# - lint + + +# ############################################################################# + + +# ############################################################################# +# TestFailing +# ############################################################################# + + +class TestFailing(hunitest.TestCase): + """ + Run a test that fails based on CSFY_FORCE_TEST_FAIL environment variable. + """ + + def test_failing(self) -> None: + if os.environ.get("CSFY_FORCE_TEST_FAIL", "") == "1": + self.fail("test failed succesfully") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py new file mode 100644 index 000000000..80ea28ffb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py @@ -0,0 +1,494 @@ +import logging +import os +import re +import unittest.mock as umock +from typing import Dict, Optional + +import pytest + +import helpers.hgit as hgit +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur +import helpers.lib_tasks_docker as hlitadoc +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + + +# pylint: disable=protected-access + + +# ############################################################################# +# Test_generate_compose_file1 +# ############################################################################# + + +class Test_generate_compose_file1(hunitest.TestCase): + def helper( + self, + stage: str, + *, + use_privileged_mode: bool = False, + use_sibling_container: bool = False, + shared_data_dirs: Optional[Dict[str, str]] = None, + mount_as_submodule: bool = False, + use_network_mode_host: bool = True, + use_main_network: bool = False, + ) -> None: + txt = [] + # + params = [ + "stage", + "use_privileged_mode", + "use_sibling_container", + "shared_data_dirs", + "mount_as_submodule", + "use_network_mode_host", + ] + txt_tmp = hprint.to_str(" ".join(params)) + txt.append(txt_tmp) + # + file_name = None + txt_tmp = hlitadoc._generate_docker_compose_file( + stage, + use_privileged_mode, + use_sibling_container, + shared_data_dirs, + mount_as_submodule, + use_network_mode_host, + use_main_network, + file_name, + ) + # Remove all the env variables that are function of the host. + txt_tmp = hunitest.filter_text("CSFY_HOST_", txt_tmp) + txt_tmp = hunitest.filter_text("CSFY_GIT_ROOT_PATH", txt_tmp) + txt_tmp = hunitest.filter_text("CSFY_HELPERS_ROOT_PATH", txt_tmp) + txt_tmp = hunitest.filter_text( + "CSFY_USE_HELPERS_AS_NESTED_MODULE", txt_tmp + ) + txt_tmp = hunitest.filter_text("OPENAI_API_KEY", txt_tmp) + txt.append(txt_tmp) + # + txt = "\n".join(txt) + txt = hunitest.filter_text(r"working_dir", txt) + self.check_string(txt) + + def test1(self) -> None: + self.helper(stage="prod", use_privileged_mode=True) + + def test2(self) -> None: + self.helper( + stage="prod", shared_data_dirs={"/data/shared": "/shared_data"} + ) + + def test3(self) -> None: + self.helper(stage="prod", use_main_network=True) + + # TODO(ShaopengZ): This hangs outside CK infra, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + hgit.is_in_amp_as_submodule(), reason="Only run in amp directly" + ) + def test4(self) -> None: + self.helper(stage="dev") + + # TODO(ShaopengZ): This hangs outside CK infra, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test5(self) -> None: + self.helper(stage="dev") + + +# ############################################################################# +# Test_generate_compose_file2 +# ############################################################################# + + +class Test_generate_compose_file2(hunitest.TestCase): + def helper( + self, + mock_getcwd: str, + mock_find_git_root: str, + mock_find_helpers_root: str, + mock_is_in_helpers_as_supermodule: bool, + *, + stage: str = "prod", + use_privileged_mode: bool = True, + use_sibling_container: bool = False, + shared_data_dirs: Optional[Dict[str, str]] = None, + mount_as_submodule: bool = False, + use_network_mode_host: bool = True, + use_main_network: bool = False, + ) -> None: + txt = [] + # + params = [ + "stage", + "use_privileged_mode", + "use_sibling_container", + "shared_data_dirs", + "mount_as_submodule", + "use_network_mode_host", + ] + txt_tmp = hprint.to_str(" ".join(params)) + txt.append(txt_tmp) + # + file_name = None + with ( + umock.patch.object(os, "getcwd", return_value=mock_getcwd), + umock.patch.object( + hgit, "find_git_root", return_value=mock_find_git_root + ), + umock.patch.object( + hgit, "find_helpers_root", return_value=mock_find_helpers_root + ), + umock.patch.object( + hgit, + "is_in_helpers_as_supermodule", + return_value=mock_is_in_helpers_as_supermodule, + ), + ): + txt_tmp = hlitadoc._generate_docker_compose_file( + stage, + use_privileged_mode, + use_sibling_container, + shared_data_dirs, + mount_as_submodule, + use_network_mode_host, + use_main_network, + file_name, + ) + # Remove all the env variables that are function of the host. + txt_tmp = hunitest.filter_text("CSFY_HOST_", txt_tmp) + txt_tmp = hunitest.filter_text("OPENAI_API_KEY", txt_tmp) + txt.append(txt_tmp) + # + txt = "\n".join(txt) + self.check_string(txt) + + def test1(self) -> None: + """ + Check that file is generated correctly when the repo is `//cmamp`. + """ + self.helper( + mock_getcwd="/data/dummy/src/cmamp1", + mock_find_git_root="/data/dummy/src/cmamp1", + mock_find_helpers_root="/data/dummy/src/cmamp1/helpers_root", + mock_is_in_helpers_as_supermodule=False, + ) + + def test2(self) -> None: + """ + Check that file is generated correctly when the repo is `//helpers`. + """ + self.helper( + mock_getcwd="/data/dummy/src/helpers1", + mock_find_git_root="/data/dummy/src/helpers1", + mock_find_helpers_root="/data/dummy/src/helpers1", + mock_is_in_helpers_as_supermodule=True, + ) + + def test3(self) -> None: + """ + Check that file is generated correctly when the repo is `//cmamp` and + `//cmamp/ck.infra` is a runnable dir. + """ + self.helper( + mock_getcwd="/data/dummy/src/cmamp1/ck.infra", + mock_find_git_root="/data/dummy/src/cmamp1", + mock_find_helpers_root="/data/dummy/src/cmamp1/helpers_root", + mock_is_in_helpers_as_supermodule=False, + ) + + def test4(self) -> None: + """ + Check that file is generated correctly when the repo is `//orange`. + """ + self.helper( + mock_getcwd="/data/dummy/src/orange1", + mock_find_git_root="/data/dummy/src/orange1", + mock_find_helpers_root="/data/dummy/src/orange1/amp/helpers_root", + mock_is_in_helpers_as_supermodule=False, + ) + + +# ############################################################################# + + +# ############################################################################# +# TestLibTasksGetDockerCmd1 +# ############################################################################# + + +# TODO(ShaopengZ): This hangs outside CK infra, so we skip it. +@pytest.mark.requires_ck_infra +class TestLibTasksGetDockerCmd1(httestlib._LibTasksTestCase): + """ + Test `_get_docker_compose_cmd()`. + """ + + def check(self, actual: str, expected: str) -> None: + # Remove current timestamp (e.g., `20220317_232120``) from the `--name` + # so that the tests pass. + timestamp_regex = r"\.\d{8}_\d{6}" + actual = re.sub(timestamp_regex, "", actual) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # This is required when different repos run Docker with user vs root / remap. + actual = hunitest.filter_text("--user", actual) + self.assert_equal(actual, expected, fuzzy_match=True) + + @pytest.mark.requires_ck_infra + # TODO(gp): After using a single docker file as part of AmpTask2308 + # "Update_amp_container" we can probably run these tests in any repo, so + # we should be able to remove this `skipif`. + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_docker_bash1(self) -> None: + """ + Command for docker_bash target. + """ + base_image = "" + stage = "dev" + version = "1.0.0" + cmd = "bash" + service_name = "app" + use_entrypoint = False + print_docker_config = False + actual = hlitadoc._get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + service_name=service_name, + use_entrypoint=use_entrypoint, + print_docker_config=print_docker_config, + ) + expected = r""" + IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.app.app \ + --entrypoint bash \ + app + """ + self.check(actual, expected) + + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_docker_bash2(self) -> None: + """ + Command for docker_bash with entrypoint. + """ + base_image = "" + stage = "local" + version = "1.0.0" + cmd = "bash" + print_docker_config = False + actual = hlitadoc._get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + print_docker_config=print_docker_config, + ) + expected = r"""IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.app.app \ + app \ + bash """ + self.check(actual, expected) + + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_docker_bash3(self) -> None: + """ + Command for docker_bash with some env vars. + """ + base_image = "" + stage = "local" + version = "1.0.0" + cmd = "bash" + extra_env_vars = ["PORT=9999", "SKIP_RUN=1"] + print_docker_config = False + actual = hlitadoc._get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + extra_env_vars=extra_env_vars, + print_docker_config=print_docker_config, + ) + expected = r""" + IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0 \ + PORT=9999 \ + SKIP_RUN=1 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.app.app \ + app \ + bash + """ + self.check(actual, expected) + + if False: + + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_docker_bash4(self) -> None: + base_image = "" + stage = "dev" + version = "1.0.0" + cmd = "bash" + entrypoint = False + print_docker_config = False + actual = hlitadoc._get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + entrypoint=entrypoint, + print_docker_config=print_docker_config, + ) + expected = r""" + IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.app.app \ + --entrypoint bash \ + app + """ + self.check(actual, expected) + + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_docker_jupyter1(self) -> None: + base_image = "" + stage = "dev" + version = "1.0.0" + port = 9999 + self_test = True + print_docker_config = False + actual = hlitadoc._get_docker_jupyter_cmd( + base_image, + stage, + version, + port, + self_test, + print_docker_config=print_docker_config, + ) + expected = r""" + IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ + PORT=9999 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.jupyter_server_test.app \ + --service-ports \ + jupyter_server_test + """ + self.check(actual, expected) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_is_image_name_valid1 +# ############################################################################# + + +class Test_dassert_is_image_name_valid1(hunitest.TestCase): + def test1(self) -> None: + """ + Check that valid images pass the assertion. + """ + valid_images = [ + "12345.dkr.ecr.us-east-1.amazonaws.com/amp:dev", + "abcde.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0", + "12345.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0", + "sorrentum/cmamp", + ] + for image in valid_images: + hlitadoc.dassert_is_image_name_valid(image) + + def test2(self) -> None: + """ + Check that invalid images do not pass the assertion. + """ + invalid_images = [ + # Missing required parts. + "invalid-image-name", + # Missing stage/version. + "12345.dkr.ecr.us-east-1.amazonaws.com/amp:", + # Invalid version. + "12345.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-invalid", + ] + # TODO(gp): Add a check for the output. + for image in invalid_images: + with self.assertRaises(AssertionError): + hlitadoc.dassert_is_image_name_valid(image) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_is_base_image_name_valid1 +# ############################################################################# + + +class Test_dassert_is_base_image_name_valid1(hunitest.TestCase): + def test1(self) -> None: + """ + Check that valid base images pass the assertion. + """ + valid_base_images = [ + "12345.dkr.ecr.us-east-1.amazonaws.com/amp", + "sorrentum/cmamp", + "ghcr.io/cryptokaizen/cmamp", + ] + for base_image in valid_base_images: + hlitadoc._dassert_is_base_image_name_valid(base_image) + + def test2(self) -> None: + """ + Check that invalid base images do not pass the assertion. + """ + invalid_base_images = [ + # Missing required parts. + "invalid-base-image", + # Extra character at the end. + "abcde.dkr.ecr.us-east-1.amazonaws.com/amp:", + # Extra part in the name. + "ghcr.io/cryptokaizen/cmamp/invalid", + ] + for base_image in invalid_base_images: + with self.assertRaises(AssertionError): + hlitadoc._dassert_is_base_image_name_valid(base_image) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py new file mode 100644 index 000000000..ff430ed24 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py @@ -0,0 +1,1530 @@ +import logging +import os +import unittest.mock as umock +from typing import Generator, List + +import boto3 +import moto +import pytest + +import helpers.hgit as hgit +import helpers.hunit_test as hunitest +import helpers.lib_tasks_docker as hlitadoc +import helpers.lib_tasks_docker_release as hltadore +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + + +def _extract_commands_from_call(calls: List[umock._Call]) -> List[str]: + """ + Extract command strings from a list of mock call arguments. + + Example: + calls = [ + ( + # args tuple: (context, command) + (mock_ctx, "docker build --no-cache image1"), + # kwargs dictionary + {"pty": True} + ) + ] + After extraction: + ["docker build --no-cache image1"] + + :param calls: list of mock call objects containing (args, kwargs) + :return: list of command strings + """ + # Each mock call is a (args, kwargs) tuple, extract the command string + # from args[1] in each call. + call_list = [args_[1] for args_, kwargs_ in calls] + return call_list + + +# ############################################################################# +# _DockerFlowTestHelper +# ############################################################################# + + +class _DockerFlowTestHelper(hunitest.TestCase): + """ + Helper test class to perform common setup, teardown logic and assertion + checks for Docker flow tests. + """ + + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + self.set_up_test() + yield + self.tear_down_test() + + def set_up_test(self) -> None: + # Mock system calls. + self.system_patcher = umock.patch("helpers.hsystem.system") + self.mock_system = self.system_patcher.start() + # Mock run. + self.run_patcher = umock.patch("helpers.lib_tasks_utils.run") + self.mock_run = self.run_patcher.start() + # Mock version validation. + self.version_patcher = umock.patch( + "helpers.lib_tasks_docker.dassert_is_subsequent_version" + ) + self.mock_version = self.version_patcher.start() + # Mock docker login. + self.docker_login_patcher = umock.patch( + "helpers.lib_tasks_docker.docker_login" + ) + self.mock_docker_login = self.docker_login_patcher.start() + # Mock environment variable. + self.env_patcher = umock.patch.dict( + "os.environ", {"CSFY_ECR_BASE_PATH": "test.ecr.path"} + ) + self.get_default_param_patcher = umock.patch( + "helpers.lib_tasks_utils.get_default_param", + side_effect=lambda param: { + "CSFY_ECR_BASE_PATH": "test.ecr.path", + "BASE_IMAGE": "test-image", + }.get(param, ""), + ) + self.mock_get_default_param = self.get_default_param_patcher.start() + self.env_patcher.start() + self.get_docker_base_image_name_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_docker_base_image_name" + ) + self.mock_get_docker_base_image_name = ( + self.get_docker_base_image_name_patcher.start() + ) + # + self.patchers = { + "system": self.system_patcher, + "run": self.run_patcher, + "version": self.version_patcher, + "docker_login": self.docker_login_patcher, + "env": self.env_patcher, + "docker_base_image_name": self.get_docker_base_image_name_patcher, + "default_param": self.get_default_param_patcher, + } + # Test inputs. + self.mock_ctx = httestlib._build_mock_context_returning_ok() + self.test_version = "1.0.0" + self.test_base_image = "test-registry.com/test-image" + self.test_multi_arch = "linux/amd64,linux/arm64" + self.mock_get_docker_base_image_name.return_value = "test-image" + + def tear_down_test(self) -> None: + """ + Clean up test environment by stopping all mocks after each test case. + """ + for patcher in self.patchers.values(): + patcher.stop() + + def _check_docker_command_output( + self, expected: str, call_args_list: List[umock._Call] + ) -> None: + """ + Verify that the sequence of Docker commands from mock calls matches the + expected string. + + :param expected: expected command string + :param call_args_list: list of mock call objects + """ + actual_cmds = _extract_commands_from_call(call_args_list) + actual_cmds = "\n".join(actual_cmds) + _LOG.debug("Actual Docker commands:\n%s", actual_cmds) + self.assert_equal( + actual_cmds, + expected, + purify_text=True, + purify_expected_text=True, + fuzzy_match=True, + remove_lead_trail_empty_lines=True, + dedent=True, + ) + + +# ############################################################################# +# Test_docker_build_local_image1 +# ############################################################################# + + +class Test_docker_build_local_image1(_DockerFlowTestHelper): + """ + Test building a local Docker image. + """ + + def test_single_arch1(self) -> None: + """ + Test building with single architecture. + + This test checks: + - Single architecture build + - No-cache build options + - Custom build arguments + - Local user-specific tagging + """ + # Call tested function. + hltadore.docker_build_local_image( + self.mock_ctx, + self.test_version, + cache=False, + base_image=self.test_base_image, + poetry_mode="update", + ) + # The output is a list of strings, each representing a command. + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test-registry.com/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test-registry.com/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_multi_arch1(self) -> None: + """ + Test building with multiple architectures. + + This test checks: + - Multi-architecture build (amd64, arm64) + - Buildx driver setup + - Platform-specific build options + - Image pushing to registry + """ + # Call tested function. + hltadore.docker_build_local_image( + self.mock_ctx, + self.test_version, + cache=False, + base_image=self.test_base_image, + poetry_mode="update", + multi_arch=self.test_multi_arch, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test-registry.com/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + docker pull test-registry.com/test-image:local-$USER_NAME-1.0.0 + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test-registry.com/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_build_prod_image1 +# ############################################################################# + + +class Test_docker_build_prod_image1(_DockerFlowTestHelper): + """ + Test building a prod Docker image. + """ + + def test_single_arch_prod_image1(self) -> None: + """ + Test building with single architecture. + + This test checks: + - Production build workflow + - Single architecture build + - Build arguments for prod environment + - Prod image versioning + - Default and versioned tagging + """ + # Call tested function. + hltadore.docker_build_prod_image( + self.mock_ctx, + self.test_version, + base_image=self.test_base_image, + cache=False, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --tag test-registry.com/test-image:prod-1.0.0 \ + --file /app/devops/docker_build/prod.Dockerfile \ + --build-arg VERSION=1.0.0 \ + --build-arg ECR_BASE_PATH=test.ecr.path \ + --build-arg IMAGE_NAME=test-image \ + /app + docker tag test-registry.com/test-image:prod-1.0.0 test-registry.com/test-image:prod + docker image ls test-registry.com/test-image:prod + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_multi_arch_prod_image1(self) -> None: + """ + Test building with multiple architectures. + + This test checks: + - Multi-architecture production build + - Buildx setup for multi-platform builds + - Push to registry during build + - Production build arguments + - Multi-arch specific options + """ + # Call tested function. + hltadore.docker_build_multi_arch_prod_image( + self.mock_ctx, + self.test_version, + base_image=self.test_base_image, + cache=False, + multi_arch=self.test_multi_arch, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg VERSION=1.0.0 --build-arg ECR_BASE_PATH=test.ecr.path \ + --tag test-registry.com/test-image:prod-1.0.0 \ + --file devops/docker_build/prod.Dockerfile \ + - + docker pull test-registry.com/test-image:prod-1.0.0 + docker image ls test-registry.com/test-image:prod-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + # TODO(gp): Is the assertion too strict? + reason="Needs to run inside a super module", + ) + def test_candidate_tag1(self) -> None: + """ + Test building with candidate mode using tag. + + This test checks: + - Production build using candidate mode + - Custom tag specification + - Build arguments + - Non-default image tagging + """ + test_tag = "test_tag" + # Call tested function. + hltadore.docker_build_prod_image( + self.mock_ctx, + self.test_version, + base_image=self.test_base_image, + cache=False, + candidate=True, + tag=test_tag, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --tag test-registry.com/test-image:prod-test_tag \ + --file /app/devops/docker_build/prod.Dockerfile \ + --build-arg VERSION=1.0.0 \ + --build-arg ECR_BASE_PATH=test.ecr.path \ + --build-arg IMAGE_NAME=test-image \ + /app + docker image ls test-registry.com/test-image:prod-test_tag + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_candidate_user_tag1(self) -> None: + """ + Test building with candidate mode using user tag. + + This test checks: + - Production build using candidate mode + - Combined user and custom tag parameters + - Custom tag format (prod-user-tag) + - Build arguments + """ + test_user_tag = "test_user" + test_tag = "test_tag" + # Call tested function. + hltadore.docker_build_prod_image( + self.mock_ctx, + self.test_version, + base_image=self.test_base_image, + cache=False, + candidate=True, + user_tag=test_user_tag, + tag=test_tag, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --tag test-registry.com/test-image:prod-test_user-test_tag \ + --file /app/devops/docker_build/prod.Dockerfile \ + --build-arg VERSION=1.0.0 \ + --build-arg ECR_BASE_PATH=test.ecr.path \ + --build-arg IMAGE_NAME=test-image \ + /app + docker image ls test-registry.com/test-image:prod-test_user-test_tag + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_tag_push_multi_arch_prod_image1 +# ############################################################################# + + +class Test_docker_tag_push_multi_arch_prod_image1(_DockerFlowTestHelper): + """ + Test tagging and pushing a multi-architecture Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test pushing to AWS ECR. + + This test checks: + - Multi-arch image tagging + - AWS ECR target registry + - Production image versioning + """ + # Call tested function. + target_registry = "aws_ecr.ck" + hltadore.docker_tag_push_multi_arch_prod_image( + self.mock_ctx, + self.test_version, + target_registry=target_registry, + ) + expected = r""" + docker buildx imagetools create -t test.ecr.path/test-image:prod test.ecr.path/test-image:prod-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_dockerhub1(self) -> None: + """ + Test pushing to DockerHub from AWS ECR. + + This test checks: + - Multi-arch image tagging + - DockerHub registry (differs from AWS ECR test) + - Version and latest tagging + - Cross-registry image copying + """ + # Call tested function. + target_registry = "dockerhub.causify" + hltadore.docker_tag_push_multi_arch_prod_image( + self.mock_ctx, + self.test_version, + target_registry=target_registry, + ) + expected = r""" + docker buildx imagetools create -t causify/test-image:prod-1.0.0 test.ecr.path/test-image:prod-1.0.0 + docker buildx imagetools create -t causify/test-image:prod test.ecr.path/test-image:prod-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_tag_push_multi_build_local_image_as_dev1 +# ############################################################################# + + +class Test_docker_tag_push_multi_build_local_image_as_dev1( + _DockerFlowTestHelper +): + """ + Test tagging and pushing a multi-arch local Docker image as dev. + """ + + def test_aws_ecr1(self) -> None: + """ + Test pushing to AWS ECR. + + This test checks: + - Multi-arch image tagging + - AWS ECR target registry + - Dev image versioning + - Default and versioned tagging + """ + # Call tested function. + target_registry = "aws_ecr.ck" + hltadore.docker_tag_push_multi_build_local_image_as_dev( + self.mock_ctx, + self.test_version, + target_registry=target_registry, + ) + expected = r""" + docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_dockerhub1(self) -> None: + """ + Test pushing to DockerHub from AWS ECR. + + This test checks: + - Multi-arch image tagging + - DockerHub registry (differs from AWS ECR test) + - Version and latest tagging + - Cross-registry image copying + """ + # Call tested function. + target_registry = "dockerhub.causify" + hltadore.docker_tag_push_multi_build_local_image_as_dev( + self.mock_ctx, + self.test_version, + target_registry=target_registry, + ) + expected = r""" + docker buildx imagetools create -t causify/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t causify/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_release_dev_image1 +# ############################################################################# + + +class Test_docker_release_dev_image1(_DockerFlowTestHelper): + """ + Test releasing a dev Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test releasing the dev image to AWS ECR. + + This test checks: + - Build workflow + - No-cache build options + - Dev image versioning + - Default and versioned tagging + - Registry target selection + - Architecture support + - Tagging and versioning + """ + # Call tested function. + hltadore.docker_release_dev_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + push_to_repo=True, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 test.ecr.path/test-image:dev-1.0.0 + docker tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 test.ecr.path/test-image:dev + docker push test.ecr.path/test-image:dev-1.0.0 + docker push test.ecr.path/test-image:dev + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_release_prod_image1 +# ############################################################################# + + +class Test_docker_release_prod_image1(_DockerFlowTestHelper): + """ + Test releasing a prod Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test releasing the prod image to AWS ECR. + + This test checks: + - Build workflow + - No-cache build options + - Prod image versioning + - Default and versioned tagging + - Registry target selection + - Architecture support + - Tagging and versioning + """ + # Call tested function. + hltadore.docker_release_prod_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + push_to_repo=True, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --tag test.ecr.path/test-image:prod-1.0.0 \ + --file /app/devops/docker_build/prod.Dockerfile \ + --build-arg VERSION=1.0.0 \ + --build-arg ECR_BASE_PATH=test.ecr.path \ + --build-arg IMAGE_NAME=test-image \ + /app + docker tag test.ecr.path/test-image:prod-1.0.0 test.ecr.path/test-image:prod + docker image ls test.ecr.path/test-image:prod + docker push test.ecr.path/test-image:prod-1.0.0 + docker push test.ecr.path/test-image:prod + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_release_multi_build_dev_image1 +# ############################################################################# + + +class Test_docker_release_multi_build_dev_image1(_DockerFlowTestHelper): + """ + Test releasing a multi-arch dev Docker image. + """ + + def test_single_registry1(self) -> None: + """ + Test releasing to a single registry. + + This test checks: + - Multi-arch build setup + - Build and push workflow + - Dev image tagging + - Test skipping options + - Single registry target + """ + # Call tested function. + hltadore.docker_release_multi_build_dev_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + target_registries="aws_ecr.ck", + ) + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + docker pull test.ecr.path/test-image:local-$USER_NAME-1.0.0 + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_multiple_registries1(self) -> None: + """ + Test releasing to multiple registries. + + This test checks: + - Multi-arch build workflow + - Multiple registry targets (AWS ECR and DockerHub) + - Parallel image tagging + - Image retagging for different registries + """ + # Call tested function. + hltadore.docker_release_multi_build_dev_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + target_registries="aws_ecr.ck,dockerhub.causify", + ) + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + docker pull test.ecr.path/test-image:local-$USER_NAME-1.0.0 + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t causify/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t causify/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_rollback_dev_image1 +# ############################################################################# + + +class Test_docker_rollback_dev_image1(_DockerFlowTestHelper): + """ + Test rolling back a dev Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test rolling back and pushing to AWS ECR. + + This test checks: + - Dev image rollback workflow + - Version-specific image pull + - Retagging as latest + - Repository pushing + """ + # Call tested function. + hltadore.docker_rollback_dev_image( + self.mock_ctx, + self.test_version, + push_to_repo=True, + ) + expected = r""" + docker pull test.ecr.path/test-image:dev-1.0.0 + docker tag test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:dev + docker push test.ecr.path/test-image:dev-1.0.0 + docker push test.ecr.path/test-image:dev + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_rollback_prod_image1 +# ############################################################################# + + +class Test_docker_rollback_prod_image1(_DockerFlowTestHelper): + """ + Test rolling back a prod Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test rolling back and pushing to AWS ECR. + + This test checks: + - Production image rollback workflow + - Version-specific image pull + - Retagging as latest production + - Repository pushing + """ + # Call tested function. + hltadore.docker_rollback_prod_image( + self.mock_ctx, + self.test_version, + push_to_repo=True, + ) + expected = r""" + docker pull test.ecr.path/test-image:prod-1.0.0 + docker tag test.ecr.path/test-image:prod-1.0.0 test.ecr.path/test-image:prod + docker push test.ecr.path/test-image:prod-1.0.0 + docker push test.ecr.path/test-image:prod + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_push_prod_candidate_image1 +# ############################################################################# + + +class Test_docker_push_prod_candidate_image1(_DockerFlowTestHelper): + """ + Test pushing a prod candidate Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test pushing to AWS ECR. + + This test checks: + - Candidate image pushing + - AWS ECR target registry + - Hash-based image tagging + """ + # Call tested function. + candidate = "4759b3685f903e6c669096e960b248ec31c63b69" + hltadore.docker_push_prod_candidate_image( + self.mock_ctx, + candidate=candidate, + ) + expected = r""" + docker push test.ecr.path/test-image:prod-4759b3685f903e6c669096e960b248ec31c63b69 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_release_multi_arch_prod_image1 +# ############################################################################# + + +class Test_docker_release_multi_arch_prod_image1(_DockerFlowTestHelper): + """ + Test releasing a multi-arch prod Docker image. + """ + + def test_multiple_registries1(self) -> None: + """ + Test releasing to AWS ECR and DockerHub. + + This test checks: + - Multi-arch build workflow + - AWS ECR and DockerHub target registries + - Test skipping options + - Image tagging and pushing + """ + # Call tested function. + hltadore.docker_release_multi_arch_prod_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + docker_registry=["aws_ecr.ck", "dockerhub.causify"], + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg VERSION=1.0.0 --build-arg ECR_BASE_PATH=test.ecr.path \ + --tag test.ecr.path/test-image:prod-1.0.0 \ + --file devops/docker_build/prod.Dockerfile \ + - + docker pull test.ecr.path/test-image:prod-1.0.0 + docker image ls test.ecr.path/test-image:prod-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:prod test.ecr.path/test-image:prod-1.0.0 + docker buildx imagetools create -t causify/test-image:prod-1.0.0 test.ecr.path/test-image:prod-1.0.0 + docker buildx imagetools create -t causify/test-image:prod test.ecr.path/test-image:prod-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_create_candidate_image1 +# ############################################################################# + + +class Test_docker_create_candidate_image1(_DockerFlowTestHelper): + """ + Test creating a candidate Docker image. + """ + + def set_up_test2(self) -> None: + """ + Set up test environment with additional mocks specific to this test + class. + """ + self.set_up_test() + # Mock git hash. + self.git_hash_patcher = umock.patch( + "helpers.hgit.get_head_hash", + return_value="4759b3685f903e6c669096e960b248ec31c63b69", + ) + self.mock_git_hash = self.git_hash_patcher.start() + self.patchers["git_hash"] = self.git_hash_patcher + # Mock workspace size check. + self.workspace_check_patcher = umock.patch( + "helpers.lib_tasks_docker_release._check_workspace_dir_sizes" + ) + self.mock_workspace_check = self.workspace_check_patcher.start() + self.patchers["workspace_check"] = self.workspace_check_patcher + # Mock file existence check to handle both paths. + self.file_exists_patcher = umock.patch( + "helpers.hdbg.dassert_file_exists" + ) + self.mock_file_exists = self.file_exists_patcher.start() + self.patchers["file_exists"] = self.file_exists_patcher + # Mock `docker_build_prod_image()`. + self.build_prod_patcher = umock.patch( + "helpers.lib_tasks_docker_release.docker_build_prod_image" + ) + self.mock_build_prod = self.build_prod_patcher.start() + self.patchers["build_prod"] = self.build_prod_patcher + # Mock `docker_push_prod_candidate_image()`. + self.push_prod_patcher = umock.patch( + "helpers.lib_tasks_docker_release.docker_push_prod_candidate_image" + ) + self.mock_push_prod = self.push_prod_patcher.start() + self.patchers["push_prod"] = self.push_prod_patcher + + def tear_down_test2(self) -> None: + """ + Clean up test environment. + """ + self.tear_down_test() + + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + """ + Set up and tear down test environment for each test. + """ + self.set_up_test2() + yield + self.tear_down_test2() + + def test_aws_ecr1(self) -> None: + """ + Test creating and pushing to AWS ECR. + + This test checks: + - Task definition update with correct parameters + - Proper command construction for aws_update_task_definition.py + """ + # Call tested function. + hltadore.docker_create_candidate_image( + self.mock_ctx, + user_tag="test_user", + ) + # Verify the mocks were called with correct parameters. + self.mock_build_prod.assert_called_once_with( + self.mock_ctx, + container_dir_name=".", + version=hlitadoc._IMAGE_VERSION_FROM_CHANGELOG, + candidate=True, + tag="test_user-4759b3685f903e6c669096e960b248ec31c63b69", + ) + self.mock_push_prod.assert_called_once_with( + self.mock_ctx, + "test_user-4759b3685f903e6c669096e960b248ec31c63b69", + ) + + +# ############################################################################# +# Test_docker_update_prod_task_definition1 +# ############################################################################# + + +class Test_docker_update_prod_task_definition1(_DockerFlowTestHelper): + """ + Test updating a prod task definition to the desired version. + """ + + @pytest.fixture(autouse=True) + def aws_credentials(self) -> None: + """ + Mocked AWS credentials for moto. + """ + os.environ["DOCKER_MOCK_AWS_ACCESS_KEY_ID"] = "testing" + os.environ["DOCKER_MOCK_AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["DOCKER_MOCK_AWS_SECURITY_TOKEN"] = "testing" + os.environ["DOCKER_MOCK_AWS_SESSION_TOKEN"] = "testing" + os.environ["DOCKER_MOCK_AWS_DEFAULT_REGION"] = "us-east-1" + + def set_up_test2(self) -> None: + """ + Set up test environment with additional mocks specific to this test + class. + """ + self.set_up_test() + # Mock AWS and S3 functionality. + self.aws_patcher = umock.patch( + "helpers.haws.get_task_definition_image_url" + ) + self.mock_aws = self.aws_patcher.start() + self.mock_aws.return_value = ( + "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69" + ) + self.patchers["aws"] = self.aws_patcher + self.s3_patcher = umock.patch("helpers.hs3.get_s3fs") + self.mock_s3 = self.s3_patcher.start() + self.mock_s3.return_value.cat.return_value = b"test_content" + self.patchers["s3"] = self.s3_patcher + # Mock file operations. + self.file_patcher = umock.patch( + "helpers.hs3.from_file", return_value="test_content" + ) + self.mock_file = self.file_patcher.start() + self.patchers["file"] = self.file_patcher + # Mock listdir to return test DAG files. + self.listdir_patcher = umock.patch( + "helpers.hs3.listdir", + return_value=["/app/im_v2/airflow/dags/test_dag.py"], + ) + self.mock_listdir = self.listdir_patcher.start() + self.patchers["listdir"] = self.listdir_patcher + + def tear_down_test2(self) -> None: + """ + Clean up test environment. + """ + # Clean up environment variables. + for key in [ + "DOCKER_MOCK_AWS_ACCESS_KEY_ID", + "DOCKER_MOCK_AWS_SECRET_ACCESS_KEY", + "DOCKER_MOCK_AWS_SECURITY_TOKEN", + "DOCKER_MOCK_AWS_SESSION_TOKEN", + "DOCKER_MOCK_AWS_DEFAULT_REGION", + ]: + if key in os.environ: + del os.environ[key] + # Call parent teardown. + self.tear_down_test() + + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + """ + Set up and tear down test environment for each test. + """ + self.set_up_test2() + yield + self.tear_down_test2() + + @moto.mock_aws + @umock.patch("helpers.haws.update_task_definition") + @umock.patch("helpers.haws.get_ecs_client") + def test_promotion_to_prod( + self, + mock_get_ecs_client: umock.Mock, + mock_update_task_definition: umock.Mock, + ) -> None: + """ + Test the promotion of a preprod Docker image and DAGs to production. + + This test checks: + - Task definition update workflow + - Preprod to prod image conversion. + - DAG file synchronization + - Image tagging and pushing + """ + # Mock AWS ECS client using moto and register a task definition. + region = "us-east-1" + mock_ecs_client = boto3.client("ecs", region_name=region) + mock_ecs_client.register_task_definition( + family="test_task", + containerDefinitions=[ + { + "name": "test-container", + "image": "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69", + } + ], + executionRoleArn="__mock__", + networkMode="bridge", + requiresCompatibilities=["EC2"], + cpu="256", + memory="512", + ) + mock_get_ecs_client.return_value = mock_ecs_client + # Add mock client to patchers for cleanup. + self.ecs_client_patcher = umock.patch( + "boto3.client", return_value=mock_ecs_client + ) + self.mock_ecs_client = self.ecs_client_patcher.start() + self.patchers["ecs_client_test1"] = self.ecs_client_patcher + # Call tested function. + hltadore.docker_update_prod_task_definition( + self.mock_ctx, + version=self.test_version, + preprod_tag="4759b3685f903e6c669096e960b248ec31c63b69", + airflow_dags_s3_path="s3://test-bucket/dags/", + task_definition="test_task", + ) + expected = r""" + docker pull test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 + docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod-1.0.0 + docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod + docker rmi test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 + docker push test.ecr.path/test-image:prod-1.0.0 + docker push test.ecr.path/test-image:prod + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + # Check whether `update_task_definition` was called with the expected arguments. + expected_image_url = "test.ecr.path/test-image:prod-1.0.0" + mock_update_task_definition.assert_called_once_with( + "test_task", expected_image_url, environment="prod" + ) + + @moto.mock_aws + @umock.patch("helpers.haws.get_ecs_client") + def test_promotion_to_prod_exception_handling( + self, mock_get_ecs_client: umock.Mock + ) -> None: + """ + Test exception handling and rollback behavior when updating prod task + definition. + + This test checks: + - Exception handling during task definition update + - Rollback of task definition to original image + - Rollback of S3 DAG files + - Proper error propagation + """ + # Mock AWS ECS client using moto and register a task definition. + region = "us-east-1" + mock_ecs_client = boto3.client("ecs", region_name=region) + mock_ecs_client.register_task_definition( + family="test_task", + containerDefinitions=[ + { + "name": "test-container", + "image": "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69", + } + ], + executionRoleArn="__mock__", + networkMode="bridge", + requiresCompatibilities=["EC2"], + cpu="256", + memory="512", + ) + mock_get_ecs_client.return_value = mock_ecs_client + # Add mock client to patchers for cleanup. + self.ecs_client_patcher = umock.patch( + "boto3.client", return_value=mock_ecs_client + ) + self.mock_ecs_client = self.ecs_client_patcher.start() + self.patchers["ecs_client_test2"] = self.ecs_client_patcher + # Mock S3 bucket operations to simulate a failure. + self.mock_s3.return_value.put.side_effect = Exception("S3 upload failed") + # Call tested function and verify exception is raised. + with self.assertRaises(Exception) as cm: + hltadore.docker_update_prod_task_definition( + self.mock_ctx, + version=self.test_version, + preprod_tag="4759b3685f903e6c669096e960b248ec31c63b69", + airflow_dags_s3_path="s3://test-bucket/dags/", + task_definition="test_task", + ) + # Check the error message. + self.assertIn("S3 upload failed", str(cm.exception)) + # Check whether rollback commands were executed. + expected = r""" + docker pull test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 + docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod-1.0.0 + docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod + docker rmi test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + # Check whether task definition was rolled back. + self.mock_aws.assert_called_with("test_task") + + +# ############################################################################# +# Test_docker_tag_push_dev_image1 +# ############################################################################# + + +class Test_docker_tag_push_dev_image1(_DockerFlowTestHelper): + """ + Test tagging and pushing dev image from a base registry to multiple registries. + """ + + def set_up_test2(self) -> None: + """ + Set up test environment with additional mocks for GHCR workflow. + """ + super().set_up_test() + # Mock version retrieval from changelog. + self.changelog_version_patcher = umock.patch( + "helpers.hversion.get_changelog_version" + ) + self.mock_changelog_version = self.changelog_version_patcher.start() + self.mock_changelog_version.return_value = self.test_version + # Mock repo config for GHCR registry URL and image name. + self.get_container_registry_url_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_container_registry_url" + ) + self.mock_get_container_registry_url = ( + self.get_container_registry_url_patcher.start() + ) + # Use side_effect to return different values based on registry. + self.mock_get_container_registry_url.side_effect = lambda registry: { + "ghcr": "ghcr.io/causify-ai", + "ecr": "test.ecr.path", + }.get(registry, "ghcr.io/causify-ai") + # Add new patchers to cleanup list. + self.patchers.update( + { + "changelog_version": self.changelog_version_patcher, + "container_registry_url": self.get_container_registry_url_patcher, + } + ) + + def tear_down_test2(self) -> None: + """ + Clean up test environment. + """ + self.tear_down_test() + + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + """ + Set up and tear down test environment for each test. + """ + self.set_up_test2() + yield + self.tear_down_test2() + + def test_normal_execution1(self) -> None: + """ + Test normal execution without dry_run. + + This test checks: + - GHCR image pulling + - Tagging for GHCR and AWS ECR + - Pushing to both registries + - Versioned and latest image handling + """ + # Call tested function. + hltadore.docker_tag_push_dev_image( + self.mock_ctx, + target_registries="ghcr,ecr", + container_dir_name=".", + dry_run=False, + ) + # Verify expected Docker commands were executed. + expected = r""" + docker pull ghcr.io/causify-ai/test-image:dev-1.0.0 + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev + docker push ghcr.io/causify-ai/test-image:dev + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev-1.0.0 + docker push ghcr.io/causify-ai/test-image:dev-1.0.0 + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev + docker push test.ecr.path/test-image:dev + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev-1.0.0 + docker push test.ecr.path/test-image:dev-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_dry_run1(self) -> None: + """ + Test dry_run mode execution. + + This test checks: + - No actual Docker commands are executed when dry_run=True + - All operations are simulated + - Function completes without errors + - Mock calls should include dry_run parameter + """ + # Call tested function with dry_run enabled. + hltadore.docker_tag_push_dev_image( + self.mock_ctx, + target_registries="ghcr,ecr", + container_dir_name=".", + dry_run=True, + ) + # Verify expected Docker commands were executed. + expected = r""" + docker pull ghcr.io/causify-ai/test-image:dev-1.0.0 + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev + docker push ghcr.io/causify-ai/test-image:dev + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev-1.0.0 + docker push ghcr.io/causify-ai/test-image:dev-1.0.0 + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev + docker push test.ecr.path/test-image:dev + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev-1.0.0 + docker push test.ecr.path/test-image:dev-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_build_test_dev_image1 +# ############################################################################# + + +class Test_docker_build_test_dev_image1(_DockerFlowTestHelper): + """ + Test the complete periodic dev image release workflow. + """ + + def set_up_test(self) -> None: + """ + Set up test environment with additional mocks for the dev image + workflow. + """ + super().set_up_test() + # Mock version operations. + self.get_changelog_version_patcher = umock.patch( + "helpers.hversion.get_changelog_version" + ) + self.mock_get_changelog_version = ( + self.get_changelog_version_patcher.start() + ) + self.mock_get_changelog_version.return_value = "2.3.0" + self.bump_version_patcher = umock.patch("helpers.hversion.bump_version") + self.mock_bump_version = self.bump_version_patcher.start() + self.mock_bump_version.return_value = "2.4.0" + # Mock repo config methods. + self.get_release_team_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_release_team" + ) + self.mock_get_release_team = self.get_release_team_patcher.start() + self.mock_get_release_team.return_value = "dev_system" + self.get_issue_prefix_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_issue_prefix" + ) + self.mock_get_issue_prefix = self.get_issue_prefix_patcher.start() + self.mock_get_issue_prefix.return_value = "TestTask" + self.get_container_registry_url_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_container_registry_url" + ) + self.mock_get_container_registry_url = ( + self.get_container_registry_url_patcher.start() + ) + self.mock_get_container_registry_url.return_value = "ghcr.io/causify-ai" + # Mock GitHub operations. + self.gh_get_team_member_names_patcher = umock.patch( + "helpers.lib_tasks_gh.gh_get_team_member_names" + ) + self.mock_gh_get_team_member_names = ( + self.gh_get_team_member_names_patcher.start() + ) + self.mock_gh_get_team_member_names.return_value = ["user1", "user2"] + self.gh_create_pr_patcher = umock.patch( + "helpers.lib_tasks_gh.gh_create_pr" + ) + self.mock_gh_create_pr = self.gh_create_pr_patcher.start() + # Mock file operations. + self.get_client_root_patcher = umock.patch( + "helpers.hversion._get_client_root" + ) + self.mock_get_client_root = self.get_client_root_patcher.start() + self.mock_get_client_root.return_value = "/test/root" + self.from_file_patcher = umock.patch("helpers.hio.from_file") + self.mock_from_file = self.from_file_patcher.start() + self.mock_from_file.return_value = "# Existing changelog content\n" + self.to_file_patcher = umock.patch("helpers.hio.to_file") + self.mock_to_file = self.to_file_patcher.start() + # Mock file existence check for dassert_file_exists (changelog validation). + self.file_exists_patcher = umock.patch( + "helpers.hdbg.dassert_file_exists" + ) + self.mock_file_exists = self.file_exists_patcher.start() + # Mock os.path.exists selectively for file staging logic. + # Store the original function before patching + original_exists = os.path.exists + # Define which files should exist for staging + staged_files = { + "/test/root/./devops/docker_build/poetry.lock", + "/test/root/./devops/docker_build/pip_list.txt", + "/test/root/./changelog.txt", + } + + def selective_exists(path): + # Return True for staged files, use original function for everything else + if path in staged_files: + return True + return original_exists(path) + + self.path_exists_patcher = umock.patch( + "os.path.exists", side_effect=selective_exists + ) + self.mock_path_exists = self.path_exists_patcher.start() + # Mock date operations. + self.date_patcher = umock.patch("datetime.date") + self.mock_date = self.date_patcher.start() + # Set up strftime to return different formats based on the format string. + # Branch name uses %Y%m%d, changelog uses %Y-%m-%d + self.mock_date.today.return_value.strftime.side_effect = lambda fmt: { + "%Y%m%d": "20251023", + "%Y-%m-%d": "2025-10-23", + }.get(fmt, "2025-10-23") + # Mock Docker image operations. + self.get_image_patcher = umock.patch( + "helpers.lib_tasks_docker.get_image" + ) + self.mock_get_image = self.get_image_patcher.start() + self.mock_get_image.return_value = ( + "test.ecr.path/test-image:local-testuser-2.4.0" + ) + # Mock _run_tests to prevent actual test execution. + self.run_tests_patcher = umock.patch( + "helpers.lib_tasks_docker_release._run_tests" + ) + self.mock_run_tests = self.run_tests_patcher.start() + # Mock is_inside_ci to control CI-specific behavior. + self.is_inside_ci_patcher = umock.patch("helpers.hserver.is_inside_ci") + self.mock_is_inside_ci = self.is_inside_ci_patcher.start() + # Default to True to simulate CI environment. + self.mock_is_inside_ci.return_value = True + # Add all new patchers to cleanup list. + self.patchers.update( + { + "get_changelog_version": self.get_changelog_version_patcher, + "bump_version": self.bump_version_patcher, + "get_release_team": self.get_release_team_patcher, + "get_issue_prefix": self.get_issue_prefix_patcher, + "container_registry_url": self.get_container_registry_url_patcher, + "gh_get_team_member_names": self.gh_get_team_member_names_patcher, + "gh_create_pr": self.gh_create_pr_patcher, + "get_client_root": self.get_client_root_patcher, + "from_file": self.from_file_patcher, + "to_file": self.to_file_patcher, + "file_exists": self.file_exists_patcher, + "path_exists": self.path_exists_patcher, + "date": self.date_patcher, + "get_image": self.get_image_patcher, + "run_tests": self.run_tests_patcher, + "is_inside_ci": self.is_inside_ci_patcher, + } + ) + + def test_complete_workflow1(self) -> None: + """ + Test the complete periodic dev image release workflow. + """ + # Call the tested function. + hltadore.docker_build_test_dev_image( + self.mock_ctx, + reviewers="", # Empty to trigger team lookup + container_dir_name=".", + ) + # Verify version operations were called. + self.mock_bump_version.assert_called_once_with( + "2.3.0", bump_type="minor" + ) + # Verify GitHub team lookup was performed. + self.mock_get_release_team.assert_called_once() + self.mock_gh_get_team_member_names.assert_called_once_with("dev_system") + # Verify issue prefix was fetched for branch creation. + self.mock_get_issue_prefix.assert_called() + # Verify PR was created with team members as reviewers. + self.mock_gh_create_pr.assert_called_once() + pr_call_args = self.mock_gh_create_pr.call_args + self.assertIn("reviewer", pr_call_args.kwargs) + self.assertEqual(pr_call_args.kwargs["reviewer"], "user1,user2") + # Verify expected Docker and Git commands were executed. + expected = r""" + git checkout -b TestTask_Periodic_image_release_20251023 + cp -f devops/docker_build/dockerignore.dev /app/.dockerignore + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker build \ + \ + --build-arg AM_CONTAINER_VERSION=2.4.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test.ecr.path/test-image:local-testuser-2.4.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + invoke docker_cmd --stage local --version 2.4.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test.ecr.path/test-image:local-testuser-2.4.0 + sudo chmod -R 777 .git/objects/ + git add /test/root/./devops/docker_build/poetry.lock + git add /test/root/./devops/docker_build/pip_list.txt + git add /test/root/./changelog.txt + git commit -m "Poetry output from the v2.4.0 build" --no-verify + git push origin TestTask_Periodic_image_release_20251023 + docker tag test.ecr.path/test-image:local-testuser-2.4.0 ghcr.io/causify-ai/test-image:dev-2.4.0 + docker push ghcr.io/causify-ai/test-image:dev-2.4.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_with_existing_reviewers1(self) -> None: + """ + Test the workflow when reviewers is already provided. + """ + # Call the tested function with a specific reviewer. + hltadore.docker_build_test_dev_image( + self.mock_ctx, + reviewers="specific_user", + container_dir_name=".", + ) + # Verify PR was created with the provided reviewer. + self.mock_gh_create_pr.assert_called_once() + pr_call_args = self.mock_gh_create_pr.call_args + self.assertIn("reviewer", pr_call_args.kwargs) + self.assertEqual(pr_call_args.kwargs["reviewer"], "specific_user") + # Verify team lookup was NOT performed since reviewers was provided. + self.mock_gh_get_team_member_names.assert_not_called() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py new file mode 100644 index 000000000..886e1dc36 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py @@ -0,0 +1,267 @@ +import logging +import os + +import pytest + +import helpers.hgit as hgit +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur +import helpers.lib_tasks_find as hlitafin +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_find_short_import1 +# ############################################################################# + + +class Test_find_short_import1(hunitest.TestCase): + def test1(self) -> None: + iterator = [ + ("file1.py", 10, "import dataflow.core.dag_runner as dtfcodarun"), + ("file1.py", 11, "import helpers.hpandas as hpandas"), + ] + results = hlitafin._find_short_import(iterator, "dtfcodarun") + actual = "\n".join(map(str, results)) + # pylint: disable=line-too-long + expected = r"""('file1.py', 10, 'import dataflow.core.dag_runner as dtfcodarun', 'dtfcodarun', 'import dataflow.core.dag_runner as dtfcodarun')""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_find_func_class_uses1 +# ############################################################################# + + +class Test_find_func_class_uses1(hunitest.TestCase): + def test1(self) -> None: + iterator = [ + ( + "file1.py", + 10, + "dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)", + ), + ( + "file1.py", + 11, + "This test is similar to `TestRealTimeDagRunner1`. It uses:", + ), + ("file1.py", 12, "dag_builder: dtfcodabui.DagRunner,"), + ("file1.py", 13, ":param dag_builder: `DagRunner` instance"), + ] + results = hlitafin._find_func_class_uses(iterator, "DagRunner") + actual = "\n".join(map(str, results)) + expected = r""" + ('file1.py', 10, 'dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)', 'dtfamsys', 'RealTimeDagRunner') + ('file1.py', 12, 'dag_builder: dtfcodabui.DagRunner,', 'dtfcodabui', 'DagRunner')""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# TestLibTasksRunTests1 +# ############################################################################# + + +class TestLibTasksRunTests1(hunitest.TestCase): + """ + Test `_find_test_files()`, `_find_test_decorator()`. + """ + + def test_find_test_files1(self) -> None: + """ + Find all the test files in the current dir. + """ + files = hlitafin._find_test_files() + # For sure there are more than 1 test files: at least this one. + self.assertGreater(len(files), 1) + + def test_find_test_files2(self) -> None: + """ + Find all the test files from the top of the super module root. + """ + git_root = hgit.get_client_root(super_module=True) + files = hlitafin._find_test_files(git_root) + # For sure there are more than 1 test files: at least this one. + self.assertGreater(len(files), 1) + + def test_find_test_class1(self) -> None: + """ + Find the current test class. + """ + git_root = hgit.get_client_root(super_module=True) + file_names = hlitafin._find_test_files(git_root) + # + file_names = hlitafin._find_test_class( + "TestLibTasksRunTests1", file_names + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(file_names) + expected = ["helpers/test/test_lib_tasks_find.py::TestLibTasksRunTests1"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test_find_test_class2(self) -> None: + """ + Find the current test class. + """ + file_names = [__file__] + # + file_names = hlitafin._find_test_class( + "TestLibTasksRunTests1", file_names + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(file_names) + expected = ["helpers/test/test_lib_tasks_find.py::TestLibTasksRunTests1"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test_find_test_class3(self) -> None: + """ + Create synthetic code and look for a class. + """ + scratch_space = self.get_scratch_space() + dir_name = os.path.join(scratch_space, "test") + file_dict = { + "test_this.py": hprint.dedent( + """ + foo + + class TestHelloWorld(hunitest.TestCase): + bar + """ + ), + "test_that.py": hprint.dedent( + """ + foo + baz + + class TestHello_World(hunitest.): + bar + """ + ), + } + incremental = True + hunitest.create_test_dir(dir_name, incremental, file_dict) + # + file_names = hlitafin._find_test_files(dir_name) + act_file_names = [os.path.relpath(d, scratch_space) for d in file_names] + exp_file_names = ["test/test_that.py", "test/test_this.py"] + self.assert_equal(str(act_file_names), str(exp_file_names)) + # + actual = hlitafin._find_test_class("TestHelloWorld", file_names) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(actual) + expected = [ + "helpers/test/outcomes/TestLibTasksRunTests1.test_find_test_class3/tmp.scratch/" + "test/test_this.py::TestHelloWorld" + ] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test_find_test_decorator1(self) -> None: + """ + Find test functions in the "no_container" in synthetic code. + """ + scratch_space = self.get_scratch_space() + dir_name = os.path.join(scratch_space, "test") + file_dict = { + "test_this.py": hprint.dedent( + """ + foo + + class TestHelloWorld(hunitest.TestCase): + bar + """ + ), + "test_that.py": hprint.dedent( + """ + foo + baz + + @pytest.mark.no_container + class TestHello_World(hunitest.): + bar + """ + ), + } + incremental = True + hunitest.create_test_dir(dir_name, incremental, file_dict) + # + file_names = hlitafin._find_test_files(dir_name) + actual = hlitafin._find_test_decorator("no_container", file_names) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(actual) + expected = [ + "helpers/test/outcomes/TestLibTasksRunTests1.test_find_test_decorator1/" + "tmp.scratch/test/test_that.py" + ] + self.assert_equal(str(actual), str(expected), purify_text=True) + + # TODO(gp): This test can run in amp. + @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") + def test_find_test_decorator2(self) -> None: + """ + Find test functions in the "no_container" test list. + """ + file_name = hgit.find_file_in_git_tree("hunit_test.py") + file_names = [file_name] + actual = hlitafin._find_test_decorator("qa", file_names) + expected = ["$GIT_ROOT/helpers/hunit_test.py"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + +# ############################################################################# +# Test_find_check_string_output1 +# ############################################################################# + + +class Test_find_check_string_output1(hunitest.TestCase): + def helper(self, expected: str, fuzzy_match: bool) -> None: + # Look for the `check_string()` corresponding to this test. + ctx = httestlib._build_mock_context_returning_ok() + class_name = self.__class__.__name__ + method_name = self._testMethodName + as_python = True + # We don't want to copy but just print. + pbcopy = False + actual = hlitafin.find_check_string_output( + ctx, class_name, method_name, as_python, fuzzy_match, pbcopy + ) + # Check that it matches exactly. + self.assert_equal(actual, expected, fuzzy_match=False) + + def test1(self) -> None: + """ + Test `find_check_string_output()` by searching the `check_string` of + this test. + """ + # Force to generate a `check_string` file so we can search for it. + actual = "A fake check_string output to use for test1" + self.check_string(actual) + # Check. + expected = ''' + actual = + expected = r""" + A fake check_string output to use for test1 + """.lstrip().rstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + ''' + self.helper(expected, fuzzy_match=False) + + def test2(self) -> None: + """ + Like test1 but using `fuzzy_match=True`. + """ + # Force to generate a `check_string` file so we can search for it. + actual = "A fake check_string output to use for test2" + self.check_string(actual) + # Check. + expected = ''' + actual = + expected = r""" +A fake check_string output to use for test2 + + """.lstrip().rstrip() + self.assert_equal(actual, expected, fuzzy_match=True) + ''' + self.helper(expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py new file mode 100644 index 000000000..a5ee64c9e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py @@ -0,0 +1,133 @@ +import logging +import unittest.mock as umock + +import pytest + +import helpers.hgit as hgit +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.lib_tasks_gh as hlitagh + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +# ############################################################################# +# TestLibTasks1 +# ############################################################################# + + +class TestLibTasks1(hunitest.TestCase): + """ + Test some auxiliary functions, e.g., `_get_gh_issue_title()`. + """ + + @pytest.mark.skip("CmTask #2362.") + def test_get_gh_issue_title1(self) -> None: + issue_id = 1 + repo = "amp" + actual = hlitagh._get_gh_issue_title(issue_id, repo) + expected = ( + "AmpTask1_Bridge_Python_and_R", + "https://github.com/alphamatic/amp/issues/1", + ) + self.assert_equal(str(actual), str(expected)) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="""Skip unless helpers is the supermodule. Fails when updating submodules; + passes in fast tests super-repo run. See CmTask10845.""", + ) + def test_get_gh_issue_title4(self) -> None: + cmd = "invoke gh_login" + hsystem.system(cmd) + # + issue_id = 1 + repo = "current" + _ = hlitagh._get_gh_issue_title(issue_id, repo) + + def test_get_org_name1(self) -> None: + """ + Test _get_org_name when org_name is provided. + """ + org_name = "test-org" + result = hlitagh._get_org_name(org_name) + expected = "test-org" + self.assertEqual(result, expected) + + @umock.patch.object(hgit, "get_repo_full_name_from_dirname") + def test_get_org_name2(self, mock_get_repo: umock.Mock) -> None: + """ + Test _get_org_name when org_name is empty (infers from repo). + """ + mock_get_repo.return_value = "causify-ai/helpers" + result = hlitagh._get_org_name("") + expected = "causify-ai" + self.assertEqual(result, expected) + mock_get_repo.assert_called_once_with(".", include_host_name=False) + + +# ############################################################################# +# TestGhOrgTeamFunctions +# ############################################################################# + + +class TestGhOrgTeamFunctions(hunitest.TestCase): + """ + Test gh_get_org_team_names and gh_get_team_member_names with mocked data. + """ + + @umock.patch.object(hlitagh, "_gh_run_and_get_json") + @umock.patch.object(hlitagh, "_get_org_name") + def test_gh_get_org_team_names1( + self, mock_get_org_name: umock.Mock, mock_gh_run: umock.Mock + ) -> None: + """ + Test gh_get_org_team_names with sorted team names. + """ + # Setup mocks. + mock_get_org_name.return_value = "test-org" + mock_gh_run.return_value = [ + {"slug": "dev_backend", "id": 1}, + {"slug": "dev_frontend", "id": 2}, + {"slug": "qa_team", "id": 3}, + ] + # Call function. + result = hlitagh.gh_get_org_team_names("test-org", sort=True) + # Verify result. + expected = ["dev_backend", "dev_frontend", "qa_team"] + self.assertEqual(result, expected) + # Verify mocks were called correctly. + mock_get_org_name.assert_called_once_with("test-org") + mock_gh_run.assert_called_once_with( + "gh api /orgs/test-org/teams --paginate" + ) + + @umock.patch.object(hlitagh, "_gh_run_and_get_json") + @umock.patch.object(hlitagh, "_get_org_name") + def test_gh_get_team_member_names1( + self, mock_get_org_name: umock.Mock, mock_gh_run: umock.Mock + ) -> None: + """ + Test gh_get_team_member_names with member list. + """ + # Setup mocks. + mock_get_org_name.return_value = "test-org" + mock_gh_run.return_value = [ + {"login": "user1", "id": 101}, + {"login": "user2", "id": 102}, + {"login": "user3", "id": 103}, + ] + # Call function. + result = hlitagh.gh_get_team_member_names( + "dev_team", org_name="test-org" + ) + # Verify result. + expected = ["user1", "user2", "user3"] + self.assertEqual(result, expected) + # Verify mocks were called correctly. + mock_get_org_name.assert_called_once_with("test-org") + mock_gh_run.assert_called_once_with( + "gh api /orgs/test-org/teams/dev_team/members --paginate" + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py new file mode 100644 index 000000000..2695a505f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py @@ -0,0 +1,249 @@ +import logging +from typing import List + +import pytest + +import helpers.hgit as hgit +import helpers.hunit_test as hunitest +import helpers.lib_tasks_git as hlitagit +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +# ############################################################################# +# TestLibTasksGitCreatePatch1 +# ############################################################################# + + +@pytest.mark.slow(reason="Around 7s") +@pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", +) +class TestLibTasksGitCreatePatch1(hunitest.TestCase): + """ + Test `git_patch_create()`. + """ + + @staticmethod + def helper( + modified: bool, branch: bool, last_commit: bool, files: str + ) -> None: + ctx = httestlib._build_mock_context_returning_ok() + # + mode = "tar" + hlitagit.git_patch_create( + ctx, mode, modified, branch, last_commit, files + ) + # + mode = "diff" + hlitagit.git_patch_create( + ctx, mode, modified, branch, last_commit, files + ) + + def test1(self) -> None: + """ + Test modified files mode. + """ + hgit.fetch_origin_master_if_needed() + # Prepare inputs. + modified = True + branch = False + last_commit = False + files = "" + # Run test. + self.helper(modified, branch, last_commit, files) + + def test2(self) -> None: + """ + Test branch mode. + """ + # Prepare inputs. + modified = False + branch = True + last_commit = False + files = "" + # Run test. + self.helper(modified, branch, last_commit, files) + + def test3(self) -> None: + """ + Test last commit mode. + """ + hgit.fetch_origin_master_if_needed() + # Prepare inputs. + modified = False + branch = False + last_commit = True + files = "" + # Run test. + self.helper(modified, branch, last_commit, files) + + def test4(self) -> None: + """ + Test tar mode with specific files. + """ + hgit.fetch_origin_master_if_needed() + # Prepare inputs. + ctx = httestlib._build_mock_context_returning_ok() + mode = "tar" + modified = True + branch = False + last_commit = False + files = __file__ + # Run test. + hlitagit.git_patch_create( + ctx, mode, modified, branch, last_commit, files + ) + + def test5(self) -> None: + """ + Test diff mode with files but no mode flag raises AssertionError. + """ + hgit.fetch_origin_master_if_needed() + # Prepare inputs. + ctx = httestlib._build_mock_context_returning_ok() + mode = "diff" + modified = False + branch = False + last_commit = False + files = __file__ + # Run test and check output. + with self.assertRaises(AssertionError) as cm: + hlitagit.git_patch_create( + ctx, mode, modified, branch, last_commit, files + ) + actual = str(cm.exception) + expected = """ + * Failed assertion * + '0' + == + '1' + Specify only one among --modified, --branch, --last-commit + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# TestFilterGitFilesByType +# ############################################################################# + + +class TestFilterGitFilesByType(hunitest.TestCase): + """ + Test _filter_git_files_by_type() function. + """ + + def helper( + self, files: List[str], types: List[str], expected: List[str] + ) -> None: + """ + Test helper for _filter_git_files_by_type. + + :param files: List of files to filter + :param types: List of file types to filter by + :param expected: Expected filtered result + """ + # Run test. + result = hlitagit._filter_git_files_by_type(files, types) + # Check outputs. + self.assertEqual(result, expected) + + def test1(self) -> None: + """ + Test filtering to include only Python files. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + types = ["py"] + # Prepare outputs. + expected = ["foo.py"] + # Run test. + self.helper(files, types, expected) + + def test2(self) -> None: + """ + Test filtering to include only Jupyter notebooks. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + types = ["ipynb"] + # Prepare outputs. + expected = ["bar.ipynb"] + # Run test. + self.helper(files, types, expected) + + def test3(self) -> None: + """ + Test filtering to include only Markdown files. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + types = ["md"] + # Prepare outputs. + expected = ["baz.md"] + # Run test. + self.helper(files, types, expected) + + def test4(self) -> None: + """ + Test filtering with multiple file types. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md", "qux.txt"] + types = ["py", "md"] + # Prepare outputs. + expected = ["foo.py", "baz.md"] + # Run test. + self.helper(files, types, expected) + + def test5(self) -> None: + """ + Test filtering with all file types. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + types = ["py", "ipynb", "md"] + # Prepare outputs. + expected = files + # Run test. + self.helper(files, types, expected) + + def test6(self) -> None: + """ + Test filtering with empty file list. + """ + # Prepare inputs. + files: List[str] = [] + types = ["py", "ipynb"] + # Prepare outputs. + expected: List[str] = [] + # Run test. + self.helper(files, types, expected) + + def test7(self) -> None: + """ + Test filtering when no files match. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + types = ["txt"] + # Prepare outputs. + expected: List[str] = [] + # Run test. + self.helper(files, types, expected) + + def test8(self) -> None: + """ + Test that filtering preserves file order. + """ + # Prepare inputs. + files = ["c.py", "a.ipynb", "b.md", "d.py"] + types = ["py", "md"] + # Prepare outputs. + expected = ["c.py", "b.md", "d.py"] + # Run test. + self.helper(files, types, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py new file mode 100644 index 000000000..47a41e0d8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py @@ -0,0 +1,27 @@ +import helpers.hunit_test as hunitest +import helpers.lib_tasks_integrate as hlitaint + + +# ############################################################################# +# Test_infer_dst_dir1 +# ############################################################################# + + +class Test_infer_dst_dir1(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + src_dir = "/src/cmamp1/oms/broker/broker.py" + # Call function to test. + actual = hlitaint._infer_dst_file_path( + src_dir, + default_src_dir_basename="cmamp1", + default_dst_dir_basename="amp1", + check_exists=False, + ) + # Define expected output. + expected = ( + "/src/amp1/oms/broker/broker.py", + "oms/broker/broker.py", + ) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py new file mode 100644 index 000000000..cb40f72a5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py @@ -0,0 +1,32 @@ +import logging + +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.lib_tasks_lint as hlitalin +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_lint_check_if_it_was_run +# ############################################################################# + + +class Test_lint_check_if_it_was_run(hunitest.TestCase): + """ + Test `lint_check_if_it_was_run()`. + """ + + def test1(self) -> None: + # Build a mock context. + ctx = httestlib._build_mock_context_returning_ok() + # Stash the leftover changes from the previous tests. + cmd = "git stash --include-untracked" + hsystem.system(cmd) + # Simple check that the function does not fail. + _ = hlitalin.lint_check_if_it_was_run(ctx) + # Pop the stashed changes to restore the original state. + cmd = "git stash pop" + # Do not abort on error because the stash may be empty. + hsystem.system(cmd, abort_on_error=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py new file mode 100644 index 000000000..321f7f515 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py @@ -0,0 +1,1163 @@ +import logging +import os +import re +import unittest.mock as umock +from typing import List + +import pytest + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.lib_tasks_pytest as hlitapyt +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +def _remove_junit_suite_name(text: str) -> str: + """ + Remove the junit suite name from the input text. + - E.g. '-o junit_suite_name="helpers"' -> '-o junit_suite_name=""' + + :param text: input text to process + :return: text with the junit suite name removed + """ + txt = re.sub(r'(-o\s*junit_suite_name=)"[^"]*"', r'\1""', text) + return txt + + +def _purify_pytest_command(text: str) -> str: + """ + Purify the pytest command by removing environment-specific values. + + :param text: input text to process + :return: text with environment-specific values removed + """ + txt = _remove_junit_suite_name(text) + return txt + + +# ############################################################################# +# Test_build_run_command_line1 +# ############################################################################# + + +class Test_build_run_command_line1(hunitest.TestCase): + def run_fast_tests1_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Basic run fast tests. + + :param is_dev_csfy_return_value: mocking the return_value of + `hserver.is_dev_csfy()` + :param is_inside_ci_return_value: mocking the return_value of + `hserver.is_inside_ci()` + :param expected: expected output string + """ + custom_marker = "" + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = False + n_threads = "1" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests1_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests1_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests1_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_inside_ci_return_value = False + is_dev_csfy_return_value = False + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def run_fast_tests2_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Coverage and collect-only. + + See `run_fast_tests1_helper()` for params description. + """ + custom_marker = "" + pytest_opts = "" + skip_submodules = False + coverage = True + collect_only = True + tee_to_file = False + n_threads = "1" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests2_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + r'pytest -m "not slow and not superslow" . ' + r"-o timeout_func_only=true --timeout 5 --reruns 2 " + r'--only-rerun "Failed: Timeout" --cov=.' + r" --cov-branch --cov-report term-missing --cov-report html " + r"--collect-only -n 1 " + r"--junit-xml=tmp.junit.xml " + r'-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests2_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests2_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests2_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + r'pytest -m "not slow and not superslow" . ' + r"-o timeout_func_only=true --timeout 50 --reruns 2 " + r'--only-rerun "Failed: Timeout" --cov=.' + r" --cov-branch --cov-report term-missing --cov-report html " + r"--collect-only -n 1 " + r"--junit-xml=tmp.junit.xml " + r'-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + self.run_fast_tests2_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + @pytest.mark.skip(reason="Fix support for pytest_mark") + @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") + def test_run_fast_tests4(self) -> None: + """ + Select pytest_mark. + """ + scratch_space = self.get_scratch_space(use_absolute_path=False) + dir_name = os.path.join(scratch_space, "test") + file_dict = { + "test_this.py": hprint.dedent( + """ + foo + + class TestHelloWorld(hunitest.TestCase): + bar + """ + ), + "test_that.py": hprint.dedent( + """ + foo + baz + + @pytest.mark.no_container + class TestHello_World(hunitest.): + bar + """ + ), + } + incremental = True + hunitest.create_test_dir(dir_name, incremental, file_dict) + # + test_list_name = "fast_tests" + custom_marker = "" + pytest_opts = "" + skip_submodules = True + coverage = False + collect_only = False + tee_to_file = False + n_threads = "1" + # + actual = hlitapyt._build_run_command_line( + test_list_name, + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + expected = ( + "pytest Test_build_run_command_line1.test_run_fast_tests4/tmp.scratch/" + "test/test_that.py" + ) + self.assert_equal(actual, expected) + + def run_fast_tests5_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Basic run fast tests tee-ing to a file. Mock depending on + `is_dev_csfy_return_value`. + + See `run_fast_tests1_helper()` for params description. + """ + custom_marker = "" + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = True + n_threads = "1" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests5_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + " 2>&1" + " | tee tmp.pytest.fast_tests.log" + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests5_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests5_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests5_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + " 2>&1" + " | tee tmp.pytest.fast_tests.log" + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + self.run_fast_tests5_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def run_fast_tests6_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Run fast tests with a custom test marker. + + See `run_fast_tests1_helper()` for params description. + """ + custom_marker = "optimizer" + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = False + n_threads = "1" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests6_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + 'pytest -m "optimizer and not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests6_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests6_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests6_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + 'pytest -m "optimizer and not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + self.run_fast_tests6_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def run_fast_tests7_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Run fast tests with parallelization. + + See `run_fast_tests1_helper()` for params description. + """ + custom_marker = "" + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = False + n_threads = "auto" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests7_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n auto ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests7_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests7_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests7_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n auto ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + self.run_fast_tests7_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def get_custom_marker_helper( + self, + run_only_test_list: str, + skip_test_list: str, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Check that a correct cmd line is generated with custom marker string. + + :param run_only_test_list: a string of comma-separated markers + to run + :param skip_test_list: a string of comma-separated markers to + skip + :param is_dev_csfy_return_value: see `run_fast_tests1_helper()` + :param is_inside_ci_return_value: see `run_fast_tests1_helper()` + :param expected: expected output string + """ + # Mock settings. + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = False + n_threads = "1" + # Mock test. + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + custom_marker = hlitapyt._get_custom_marker( + run_only_test_list=run_only_test_list, + skip_test_list=skip_test_list, + ) + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_get_custom_marker1_full(self) -> None: + # Input params. + run_only_test_list = "run_marker_1,run_marker_2" + skip_test_list = "skip_marker_1,skip_marker_2" + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + # Expected output. + expected = ( + 'pytest -m "' + "run_marker_1 and run_marker_2 " + "and not requires_ck_infra " + "and not skip_marker_1 and not skip_marker_2 " + 'and not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + # Mock check. + self.get_custom_marker_helper( + run_only_test_list, + skip_test_list, + is_dev_csfy_return_value, + is_inside_ci_return_value, + expected, + ) + + def get_custom_marker2_empty(self) -> None: + # Input params. + run_only_test_list = "" + skip_test_list = "" + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + # Expected output. + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1' + ) + # Mock check. + self.get_custom_marker_helper( + run_only_test_list, + skip_test_list, + is_dev_csfy_return_value, + is_inside_ci_return_value, + expected, + ) + + +# ############################################################################# +# Test_pytest_repro1 +# ############################################################################# + + +class Test_pytest_repro1(hunitest.TestCase): + def helper(self, file_name: str, mode: str, expected: List[str]) -> None: + script_name = os.path.join( + self.get_scratch_space(), "tmp.pytest_repro.sh" + ) + ctx = httestlib._build_mock_context_returning_ok() + actual = hlitapyt.pytest_repro( + ctx, mode=mode, file_name=file_name, script_name=script_name + ) + hdbg.dassert_isinstance(actual, str) + expected = "\n".join(["pytest " + x for x in expected]) + self.assert_equal(actual, expected) + + # //////////////////////////////////////////////////////////////////////////// + + def _build_pytest_filehelper(self, txt: str) -> str: + txt = hprint.dedent(txt) + file_name = os.path.join(self.get_scratch_space(), "cache/lastfailed") + hio.to_file(file_name, txt) + return file_name + + def _build_pytest_file1(self) -> str: + txt = """ + { + "dev_scripts/testing/test/test_run_tests.py": true, + "dev_scripts/testing/test/test_run_tests2.py": true, + "helpers/test/test_printing.py::Test_dedent1::test2": true, + "documentation/scripts/test/test_all.py": true, + "documentation/scripts/test/test_render_md.py": true, + "helpers/test/helpers/test/test_list.py::Test_list_1": true, + "helpers/test/test_cache.py::TestAmpTask1407": true + } + """ + return self._build_pytest_filehelper(txt) + + def test_tests1(self) -> None: + file_name = self._build_pytest_file1() + mode = "tests" + expected = [ + "dev_scripts/testing/test/test_run_tests.py", + "dev_scripts/testing/test/test_run_tests2.py", + "documentation/scripts/test/test_all.py", + "documentation/scripts/test/test_render_md.py", + "helpers/test/helpers/test/test_list.py::Test_list_1", + "helpers/test/test_cache.py::TestAmpTask1407", + "helpers/test/test_printing.py::Test_dedent1::test2", + ] + self.helper(file_name, mode, expected) + + def test_files1(self) -> None: + file_name = self._build_pytest_file1() + mode = "files" + expected = [ + "dev_scripts/testing/test/test_run_tests.py", + "dev_scripts/testing/test/test_run_tests2.py", + "documentation/scripts/test/test_all.py", + "documentation/scripts/test/test_render_md.py", + "helpers/test/helpers/test/test_list.py", + "helpers/test/test_cache.py", + "helpers/test/test_printing.py", + ] + self.helper(file_name, mode, expected) + + def test_classes1(self) -> None: + file_name = self._build_pytest_file1() + mode = "classes" + expected = [ + "helpers/test/helpers/test/test_list.py::Test_list_1", + "helpers/test/test_cache.py::TestAmpTask1407", + "helpers/test/test_printing.py::Test_dedent1", + ] + self.helper(file_name, mode, expected) + + def _build_pytest_file2(self) -> str: + # pylint: disable=line-too-long + txt = """ + { + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1": true, + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2": true, + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1": true, + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test2": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test3": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test4": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test01": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test02": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test03": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test04": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test05": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test06": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test07": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test09": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test10": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test11": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test12": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test13": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1": true, + "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder::test1": true, + "core/dataflow/test/test_runners.py::TestIncrementalDagRunner::test1": true, + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_dump_json1": true, + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_load_json1": true, + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test1": true, + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test2": true, + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test3": true, + "core/test/test_config.py::Test_subtract_config1::test_test1": true, + "core/test/test_config.py::Test_subtract_config1::test_test2": true, + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1": true, + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1": true, + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2": true, + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test1": true, + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test2": true, + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test3": true, + "helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2": true, + "helpers/test/test_printing.py::Test_dedent1::test2": true + } + """ + # pylint: enable=line-too-long + return self._build_pytest_filehelper(txt) + + def test_tests2(self) -> None: + file_name = self._build_pytest_file2() + mode = "tests" + # pylint: disable=line-too-long + expected = [ + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1", + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2", + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1", + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1", + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1", + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2", + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3", + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1", + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2", + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test1", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test2", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test3", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test4", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test01", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test02", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test03", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test04", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test05", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test06", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test07", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test09", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test10", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test11", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test12", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test13", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1", + "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder::test1", + "core/dataflow/test/test_runners.py::TestIncrementalDagRunner::test1", + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_dump_json1", + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_load_json1", + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test1", + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test2", + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test3", + "core/test/test_config.py::Test_subtract_config1::test_test1", + "core/test/test_config.py::Test_subtract_config1::test_test2", + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1", + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1", + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2", + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test1", + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test2", + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test3", + "helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2", + "helpers/test/test_printing.py::Test_dedent1::test2", + ] + # pylint: enable=line-too-long + self.helper(file_name, mode, expected) + + def test_files2(self) -> None: + file_name = self._build_pytest_file2() + mode = "files" + # pylint: disable=line-too-long + expected = [ + "core/dataflow/nodes/test/test_sarimax_models.py", + "core/dataflow/nodes/test/test_volatility_models.py", + "core/dataflow/test/test_builders.py", + "core/dataflow/test/test_runners.py", + "core/dataflow_model/test/test_model_evaluator.py", + "core/dataflow_model/test/test_run_experiment.py", + "core/test/test_config.py", + "core/test/test_dataframe_modeler.py", + "dev_scripts/test/test_run_notebook.py", + "helpers/test/test_lib_tasks.py", + "helpers/test/test_printing.py", + ] + # pylint: enable=line-too-long + self.helper(file_name, mode, expected) + + def test_classes2(self) -> None: + file_name = self._build_pytest_file2() + mode = "classes" + # pylint: disable=line-too-long + expected = [ + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator", + "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder", + "core/dataflow/test/test_runners.py::TestIncrementalDagRunner", + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator", + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1", + "core/test/test_config.py::Test_subtract_config1", + "core/test/test_dataframe_modeler.py::TestDataFrameModeler", + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1", + "helpers/test/test_lib_tasks.py::Test_find_check_string_output1", + "helpers/test/test_printing.py::Test_dedent1", + ] + # pylint: enable=line-too-long + self.helper(file_name, mode, expected) + + +# ############################################################################# +# Test_pytest_repro_end_to_end +# ############################################################################# + + +@pytest.mark.slow("~6 sec.") +class Test_pytest_repro_end_to_end(hunitest.TestCase): + """ + - Run the `pytest_repro` invoke from command line + - A fixed file imitating the pytest output file is used + - Compare the output to the golden outcome + """ + + def helper(self, cmd: str) -> None: + # Save output in tmp dir. + script_name = os.path.join( + self.get_scratch_space(), "tmp.pytest_repro.sh" + ) + cmd += f" --script-name {script_name}" + # Run the command. + _, actual = hsystem.system_to_string(cmd) + # Filter out the "No module named ..." warnings. + # TODO(Grisha): add the "no module warning" filtering to + # `purify_text()` in `check_string()`. + regex = "WARN.*No module" + actual = hunitest.filter_text(regex, actual) + # Remove "Encountered unexpected exception importing solver GLPK" + # generated on Mac. + regex = "Encountered unexpected exception importing solver GLPK" + actual = hunitest.filter_text(regex, actual) + # ImportError("cannot import name 'glpk' from 'cvxopt' (/venv/lib/python3.9/site-packages/cvxopt/__init__.py)") + regex = r"""ImportError\("cannot import name""" + actual = hunitest.filter_text(regex, actual) + # Modify the outcome for reproducibility. + actual = hprint.remove_non_printable_chars(actual) + actual = re.sub(r"[0-9]{2}:[0-9]{2}:[0-9]{2} - ", r"HH:MM:SS - ", actual) + actual = actual.replace("/app/amp/", "/app/") + actual = re.sub( + r"lib_tasks_pytest.py pytest_repro:[0-9]+", + r"lib_tasks_pytest.py pytest_repro:{LINE_NUM}", + actual, + ) + # Remove unstable content. + lines = actual.split("\n") + line_cmd = lines[0] + _LOG.debug("%s", "\n".join(lines)) + for i, line in enumerate(lines): + m = re.search("# pytest_repro: ", line) + if m: + test_output_start = i + 1 + break + lines_test_output = lines[test_output_start:] + # + actual = "\n".join([line_cmd] + lines_test_output) + regex = "init_logger" + actual = hunitest.filter_text(regex, actual) + regex = r"(WARN|INFO)\s+hcache.py" + actual = hunitest.filter_text(regex, actual) + # Check the outcome. + self.check_string(actual, purify_text=True, fuzzy_match=True) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test1(self) -> None: + file_name = f"{self.get_input_dir()}/cache/lastfailed" + cmd = f"invoke pytest_repro --file-name='{file_name}'" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test2(self) -> None: + """ + The tests are different since the input depends on the test and it's + different for different tests. + """ + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}'" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test3(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}'" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test4(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test5(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test6(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test7(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" + self.helper(cmd) + + +# ############################################################################# +# Test_pytest_failed1 +# ############################################################################# + + +class Test_pytest_failed1(hunitest.TestCase): + def get_pytest_text1(self) -> str: + txt = """ + 20:48:15 - ^[[36mINFO ^[[0m hdbg.py init_logger:1018 > cmd='/venv/bin/pytest helpers_root/dev_scripts_helpers/documentation/' + collected 47 items + + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1::test1 (2.07 s) FAILED [ 2%] + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question1 (0.00 s) PASSED [ 4%] + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question2 (0.00 s) PASSED [ 6%] + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question3 (0.00 s) PASSED [ 8%] + + + =================================== FAILURES =================================== + _________________________ Test_preprocess_notes1.test1 _________________________ + + FAILED helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3::test_run_all1 - AttributeError: 'list' object has no attribute 'split' + FAILED helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1::test2 - RuntimeError: cmd='(/app/helpers_root/dev_scripts_helpers/documentation/notes_to_pdf.py --input /app/helpers_root/dev_scripts_helpers/documentation/test/outcomes/Test_notes + + ======================== 4 failed, 43 passed in 40.48s ========================= + """ + txt = hprint.dedent(txt) + return txt + + def helper( + self, + txt: str, + only_file: bool, + only_class: bool, + exp_failed_tests: str, + exp_num_failed: int, + exp_num_passed: int, + ) -> None: + act_failed_tests, act_num_failed, act_num_passed = ( + hlitapyt._parse_failed_tests(txt, only_file, only_class) + ) + act_failed_tests = "\n".join(act_failed_tests) + self.assert_equal( + act_failed_tests, + exp_failed_tests, + dedent=True, + remove_lead_trail_empty_lines=True, + ) + self.assertEqual(act_num_failed, exp_num_failed) + self.assertEqual(act_num_passed, exp_num_passed) + + def test1(self) -> None: + # Prepare inputs and outputs. + txt = self.get_pytest_text1() + only_file = False + only_class = False + exp_failed_tests = """ + helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1::test2 + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1::test1 + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3::test_run_all1 + """ + exp_num_failed = 4 + exp_num_passed = 43 + # Check. + self.helper( + txt, + only_file, + only_class, + exp_failed_tests, + exp_num_failed, + exp_num_passed, + ) + + def test2(self) -> None: + # Prepare inputs and outputs. + txt = self.get_pytest_text1() + only_file = True + only_class = False + exp_failed_tests = """ + helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py + """ + exp_num_failed = 4 + exp_num_passed = 43 + # Check. + self.helper( + txt, + only_file, + only_class, + exp_failed_tests, + exp_num_failed, + exp_num_passed, + ) + + def test3(self) -> None: + # Prepare inputs and outputs. + txt = self.get_pytest_text1() + only_file = False + only_class = True + exp_failed_tests = """ + helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1 + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1 + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3 + """ + exp_num_failed = 4 + exp_num_passed = 43 + # Check. + self.helper( + txt, + only_file, + only_class, + exp_failed_tests, + exp_num_failed, + exp_num_passed, + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py new file mode 100644 index 000000000..ac2b17b42 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py @@ -0,0 +1,301 @@ +import logging +import os + +import pytest + +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hunit_test as hunitest +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + + +# pylint: disable=protected-access + + +# ############################################################################# +# Test_get_files_to_process1 +# ############################################################################# + + +class Test_get_files_to_process1(hunitest.TestCase): + """ + We can't check the outcome so we just execute the code. + """ + + def test_modified1(self) -> None: + """ + Retrieve files modified in this client. + """ + modified = True + branch = False + last_commit = False + all_ = False + files_from_user = "" + mutually_exclusive = True + remove_dirs = True + _ = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + + @pytest.mark.skipif( + hgit.get_branch_name() != "master", + reason="This test makes sense for a branch", + ) + def test_branch1(self) -> None: + """ + Retrieved files modified in this client. + """ + # This test needs a reference to Git master branch. + hgit.fetch_origin_master_if_needed() + # + modified = False + branch = True + last_commit = False + all_ = False + files_from_user = "" + mutually_exclusive = True + remove_dirs = True + _ = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + + def test_last_commit1(self) -> None: + """ + Retrieved files modified in the last commit. + """ + modified = False + branch = False + last_commit = True + all_ = False + files_from_user = "" + mutually_exclusive = True + remove_dirs = True + _ = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + + def test_files1(self) -> None: + """ + Pass through files from user. + """ + modified = False + branch = False + last_commit = False + all_ = False + files_from_user = __file__ + mutually_exclusive = True + remove_dirs = True + files = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + self.assertEqual(files, [__file__]) + + def test_files2(self) -> None: + """ + Pass through files from user. + + Use two types of paths we don't want to process: + - non-existent python file + - pattern "/*" that matches no files + """ + modified = False + branch = False + last_commit = False + all_ = False + files_from_user = "testfile1.py testfiles1/*" + mutually_exclusive = True + remove_dirs = True + files = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + self.assertEqual(files, []) + + def test_files3(self) -> None: + """ + Pass through files from user. + + Use the sequence of paths separated by newlines. + """ + modified = False + branch = False + last_commit = False + all_ = False + # Specify the number of toy files. + n_toy_files = 4 + files_from_user = [] + # Get root directory. + root_dir = hgit.get_client_root(super_module=False) + # Generate toy files and store their paths. + for file_num in range(n_toy_files): + # Build the name of the test file. + file_name = f"test_toy{str(file_num)}.tmp.py" + # Build the path to the test file. + test_path = os.path.join(root_dir, file_name) + # Create the empty toy file. + hio.to_file(test_path, "") + files_from_user.append(test_path) + mutually_exclusive = True + remove_dirs = True + # Join the names with `\n` separator. + joined_files_from_user = "\n".join(files_from_user) + files = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + joined_files_from_user, + mutually_exclusive, + remove_dirs, + ) + # Remove the toy files. + for path in files_from_user: + hio.delete_file(path) + self.assertEqual(files, files_from_user) + + def test_assert1(self) -> None: + """ + Test that --modified and --branch together cause an assertion. + """ + modified = True + branch = True + last_commit = False + all_ = True + files_from_user = "" + mutually_exclusive = True + remove_dirs = True + with self.assertRaises(AssertionError) as cm: + hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + '3' + == + '1' + Specify only one among --modified, --branch, --last-commit, --all_files, and --files + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert2(self) -> None: + """ + Test that --modified and --files together cause an assertion if + `mutually_exclusive=True`. + """ + modified = True + branch = False + last_commit = False + all_ = False + files_from_user = __file__ + mutually_exclusive = True + remove_dirs = True + with self.assertRaises(AssertionError) as cm: + hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + '2' + == + '1' + Specify only one among --modified, --branch, --last-commit, --all_files, and --files + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert3(self) -> None: + """ + Test that --modified and --files together don't cause an assertion if + `mutually_exclusive=False`. + """ + modified = True + branch = False + last_commit = False + all_ = False + files_from_user = __file__ + mutually_exclusive = False + remove_dirs = True + files = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + self.assertEqual(files, [__file__]) + + +# ############################################################################# + + +# ############################################################################# +# TestLibTasksRemoveSpaces1 +# ############################################################################# + + +class TestLibTasksRemoveSpaces1(hunitest.TestCase): + def test1(self) -> None: + txt = r""" + IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev \ + docker-compose \ + --file $GIT_ROOT/devops/compose/docker-compose_as_submodule.yml \ + run \ + --rm \ + -l user=$USER_NAME \ + --entrypoint bash \ + user_space + """ + actual = hlitauti._to_single_line_cmd(txt) + expected = ( + "IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev" + " docker-compose --file" + " $GIT_ROOT/devops/compose/docker-compose_as_submodule.yml" + " run --rm -l user=$USER_NAME --entrypoint bash user_space" + ) + self.assert_equal(actual, expected, fuzzy_match=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py new file mode 100644 index 000000000..ac46b6c17 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py @@ -0,0 +1,74 @@ +import os + +import pytest + +import config_root.config as cconfig +import dev_scripts_helpers.notebooks.run_notebook_test_case as dshnrntca +import helpers.hgit as hgit +import helpers.hserver as hserver +import helpers.lib_tasks_gh as hlitagh + + +def build_config() -> cconfig.ConfigList: + """ + Get an empty config for the test. + """ + config = {} + config = cconfig.Config() + config_list = cconfig.ConfigList([config]) + return config_list + + +# ############################################################################# +# Test_Master_buildmeister_dashboard_notebook +# ############################################################################# + + +class Test_Master_buildmeister_dashboard_notebook( + dshnrntca.Test_Run_Notebook_TestCase +): + @pytest.mark.skipif( + not hserver.is_inside_ci(), + reason="No access to data from `lemonade` repo locally", + ) + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", + ) + @pytest.mark.superslow("~42 sec.") + def test1(self) -> None: + amp_dir = hgit.get_amp_abs_path() + notebook_path = os.path.join( + amp_dir, + "devops", + "notebooks", + "Master_buildmeister_dashboard.ipynb", + ) + config_builder = ( + "helpers.test.test_master_buildmeister_dashboard.build_config()" + ) + self._test_run_notebook(notebook_path, config_builder) + + @pytest.mark.skipif( + not hserver.is_inside_ci(), + reason="No access to data from `lemonade` repo locally", + ) + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", + ) + @pytest.mark.superslow("~30 sec.") + def test2(self) -> None: + """ + Check that we can get status for all the workflows. + """ + repo_list = [ + "causify-ai/cmamp", + "causify-ai/orange", + "causify-ai/lemonade", + "causify-ai/kaizenflow", + "causify-ai/helpers", + "causify-ai/quant_dashboard", + ] + for repo_name in repo_list: + hlitagh.gh_get_workflow_type_names(repo_name) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py new file mode 100644 index 000000000..ced80844b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py @@ -0,0 +1,284 @@ +import logging + +import pytest + +import helpers.hgit as hgit +import helpers.hserver as hserver +import helpers.hunit_test as hunitest +import helpers.hunit_test_utils as hunteuti +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestRepoConfig_Amp +# ############################################################################# + + +class TestRepoConfig_Amp(hunitest.TestCase): + # Difference between `cmamp` and `kaizenflow`. + expected_repo_name = "//cmamp" + + def test_repo_name1(self) -> None: + """ + Show that when importing repo_config, one doesn't get necessarily the + outermost repo_config (e.g., for lime one gets amp.repo_config). + """ + + actual = hrecouti.get_repo_config().get_name() + _LOG.info( + "actual=%s expected_repo_name=%s", actual, self.expected_repo_name + ) + + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_repo_name2(self) -> None: + """ + If //amp is a supermodule, then repo_config should report //amp. + """ + actual = hrecouti.get_repo_config().get_name() + self.assertEqual(actual, self.expected_repo_name) + + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_repo_name3(self) -> None: + """ + If //amp is a supermodule, then repo_config should report something + different than //amp. + """ + actual = hrecouti.get_repo_config().get_name() + self.assertNotEqual(actual, self.expected_repo_name) + + def test_config_func_to_str(self) -> None: + _LOG.info(hserver.config_func_to_str()) + + def test_is_dev4(self) -> None: + """ + Amp could run on dev4 or not. + """ + _ = hserver.is_dev4() + + def test_is_CK_S3_available(self) -> None: + """ + When running Amp on dev_csfy, the CSFY bucket should be available. + """ + if hserver.is_dev_csfy(): + actual = hserver.is_CK_S3_available() + expected = True + self.assertEqual(actual, expected) + + +# ############################################################################# +# TestRepoConfig_Amp_signature +# ############################################################################# + + +# > pytest ./amp/helpers/test/test_repo_config_amp.py + + +# ############################################################################# +# TestRepoConfig_Amp_signature1 +# ############################################################################# + + +class TestRepoConfig_Amp_signature1(hunitest.TestCase): + def test_dev_csfy_server(self) -> None: + target_name = "amp" + hunteuti.execute_only_in_target_repo(target_name) + # + hunteuti.execute_only_on_dev_csfy() + # + expected = r""" + # Repo config: + # repo_config.config + enable_privileged_mode='True' + get_docker_base_image_name='amp' + get_docker_shared_group='' + get_docker_user='' + get_host_name='github.com' + get_invalid_words='[]' + get_shared_data_dirs='{'/data/shared': '/shared_data'}' + has_dind_support='True' + has_docker_sudo='True' + is_CK_S3_available='True' + run_docker_as_root='False' + skip_submodules_test='False' + use_docker_db_container_name_to_connect='False' + use_docker_network_mode_host='False' + use_docker_sibling_containers='False' + # Server config: + # hserver.config + is_AM_S3_available()='True' + is_dev4()='False' + is_dev_csfy()='True' + is_inside_ci()='False' + is_inside_docker()='True' + is_mac(version='Catalina')='False' + is_mac(version='Monterey')='False' + is_mac(version='Sequoia')='False' + is_mac(version='Ventura')='False' + # Env vars: + CSFY_ENABLE_DIND='1' + CSFY_FORCE_TEST_FAIL='' + CSFY_REPO_CONFIG_CHECK='True' + CSFY_REPO_CONFIG_PATH='' + CSFY_CI='' + GH_ACTION_ACCESS_TOKEN=empty + """ + hunteuti.check_env_to_str(self, expected) + + def test_mac(self) -> None: + target_name = "amp" + hunteuti.execute_only_in_target_repo(target_name) + # + hunteuti.execute_only_on_mac(version="Catalina") + # + expected = r""" + # Repo config: + # repo_config.config + enable_privileged_mode='False' + get_docker_base_image_name='amp' + get_docker_shared_group='' + get_docker_user='' + get_host_name='github.com' + get_invalid_words='[]' + get_shared_data_dirs='None' + has_dind_support='False' + has_docker_sudo='True' + is_CK_S3_available='False' + run_docker_as_root='False' + skip_submodules_test='False' + use_docker_db_container_name_to_connect='True' + use_docker_network_mode_host='False' + use_docker_sibling_containers='True' + # Server config: + # hserver.config + is_AM_S3_available='True' + is_dev4='False' + is_dev_csfy='False' + is_inside_ci='False' + is_inside_docker='True' + is_mac='True' + # Env vars: + CSFY_ENABLE_DIND='1' + CSFY_FORCE_TEST_FAIL='' + CSFY_REPO_CONFIG_CHECK='False' + CSFY_REPO_CONFIG_PATH='' + CSFY_CI='' + GH_ACTION_ACCESS_TOKEN=empty + """ + hunteuti.check_env_to_str(self, expected) + # + exp_enable_privileged_mode = True + exp_has_dind_support = True + hrecouti.assert_setup( + self, exp_enable_privileged_mode, exp_has_dind_support + ) + + @pytest.mark.skipif( + not hrecouti.get_repo_config().get_name() == "//amp", + reason="Run only in //amp", + ) + def test_amp_ci(self) -> None: + hunteuti.execute_only_on_ci() + # + expected = r""" + # Repo config: + # repo_config.config + enable_privileged_mode='True' + get_docker_base_image_name='amp' + get_docker_shared_group='' + get_docker_user='' + get_host_name='github.com' + get_invalid_words='[]' + get_shared_data_dirs='None' + has_dind_support='True' + has_docker_sudo='False' + is_CK_S3_available='False' + run_docker_as_root='True' + skip_submodules_test='False' + use_docker_db_container_name_to_connect='False' + use_docker_network_mode_host='False' + use_docker_sibling_containers='False' + # Server config: + # hserver.config + is_AM_S3_available()='True' + is_dev4()='False' + is_dev_csfy()='False' + is_inside_ci()='True' + is_inside_docker()='True' + is_mac(version='Catalina')='False' + is_mac(version='Monterey')='False' + is_mac(version='Ventura')='False' + is_mac(version='Sequoia')='False' + # Env vars: + CSFY_CI='true' + CSFY_ENABLE_DIND='1' + CSFY_FORCE_TEST_FAIL='' + CSFY_REPO_CONFIG_CHECK='True' + CSFY_REPO_CONFIG_PATH='' + """ + # We ignore the AWS vars, since GH Actions does some replacement to mask + # the env vars coming from secrets. + skip_secrets_vars = True + hunteuti.check_env_to_str( + self, expected, skip_secrets_vars=skip_secrets_vars + ) + + @pytest.mark.skipif( + not hrecouti.get_repo_config().get_name() == "//cmamp", + reason="Run only in //cmamp", + ) + def test_cmamp_ci(self) -> None: + hunteuti.execute_only_on_ci() + # + expected = r""" + # Repo config + get_host_name='github.com' + get_html_dir_to_url_mapping='{'s3://cryptokaizen-html': 'http://172.30.2.44', 's3://cryptokaizen-html/v2': 'http://172.30.2.44/v2'}' + get_invalid_words='[]' + get_docker_base_image_name='cmamp' + # Server config + enable_privileged_mode='True' + get_docker_shared_group='' + get_docker_user='' + get_host_user_name='runner' + get_shared_data_dirs='None' + has_dind_support='True' + has_docker_sudo='False' + is_AM_S3_available='True' + is_CK_S3_available='True' + is_dev4='False' + is_dev_csfy='False' + is_external_linux='False' + is_host_mac='False' + is_ig_prod='False' + is_inside_ci='True' + is_inside_docker='True' + is_inside_ecs_container='False' + is_inside_unit_test='True' + is_prod_csfy='False' + run_docker_as_root='True' + skip_submodules_test='False' + use_docker_db_container_name_to_connect='False' + use_docker_network_mode_host='False' + use_docker_sibling_containers='False' + use_main_network='False' + # Env vars + CSFY_CI='true' + CSFY_ECR_BASE_PATH='$CSFY_ECR_BASE_PATH' + CSFY_ENABLE_DIND='1' + CSFY_FORCE_TEST_FAIL='' + CSFY_REPO_CONFIG_CHECK='True' + CSFY_REPO_CONFIG_PATH='' + """ + # We ignore the AWS vars, since GH Actions does some replacement to mask + # the env vars coming from secrets. + skip_secrets_vars = True + hunteuti.check_env_to_str( + self, expected, skip_secrets_vars=skip_secrets_vars + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py new file mode 100644 index 000000000..f5b284c58 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py @@ -0,0 +1,65 @@ +import logging +import os + +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_repo_config1 +# ############################################################################# + + +class Test_repo_config1(hunitest.TestCase): + def create_test_file(self) -> str: + yaml_txt = """ + repo_info: + repo_name: helpers + github_repo_account: causify-ai + github_host_name: github.com + invalid_words: + issue_prefix: HelpersTask + + docker_info: + docker_image_name: helpers + + s3_bucket_info: + unit_test_bucket_name: s3://cryptokaizen-unit-test + html_bucket_name: s3://cryptokaizen-html + html_ip: http://172.30.2.44 + + container_registry_info: + ecr: 623860924167.dkr.ecr.eu-north-1.amazonaws.com + ghcr: ghcr.io/cryptokaizen + + runnable_dir_info: + use_helpers_as_nested_module: False + venv_tag: helpers + dir_suffix: helpers + """ + yaml_txt = hprint.dedent(yaml_txt) + file_name = os.path.join(self.get_scratch_space(), "yaml.txt") + hio.to_file(file_name, yaml_txt) + return file_name + + def test1(self) -> None: + file_name = self.create_test_file() + repo_config = hrecouti.RepoConfig.from_file(file_name) + actual = repo_config.get_name() + expected = "//helpers" + self.assert_equal(actual, expected) + + def test2(self) -> None: + file_name = self.create_test_file() + repo_config = hrecouti.RepoConfig.from_file(file_name) + actual = repo_config.get_repo_map() + expected = { + "helpers": "causify-ai/helpers", + } + self.assert_equal(str(actual), str(expected)) + + # TODO(gp): Test all the methods of the RepoConfig class. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/logs.log b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/logs.log new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 142195013..5a0064d78 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -1,35 +1,199 @@ --- -- name: Run Python Page +# ────────────────────────────────────────────────────────────── +# playbook.yaml +# Ansible playbook – train, deploy, and test the House Price +# Prediction API running in Docker. +# +# Usage (from your Mac, inside the project folder): +# ansible-playbook playbook.yaml +# ansible-playbook playbook.yaml --tags testing +# ansible-playbook playbook.yaml --tags deploy +# ────────────────────────────────────────────────────────────── + +- name: Deploy House Price Prediction API hosts: localhost connection: local + gather_facts: false vars: - page_title: "ML Model Training and Evaluation" - output_path: "index.html" - author: "Likhon Gomes" + # ── Container settings ──────────────────────────────────── + container_name: house-price + image_name: house-price-project + host_port: 5001 # port on your Mac + container_port: 5000 # port inside the container + + # ── Paths ───────────────────────────────────────────────── + project_root: "{{ playbook_dir }}" + model_pkl: "{{ project_root }}/ml_model/house_price_model.pkl" + + # ── Health check ────────────────────────────────────────── + api_base: "http://localhost:{{ host_port }}" + healthcheck_retries: 12 + healthcheck_delay: 5 + # ────────────────────────────────────────────────────────── tasks: - - name: Clone ML Model from Github - ansible.builtin.git: - repo: https://github.com/likhongomes/Comparative-Analysis-of-RNN-Architectures-for-Sentiment-Classification.git - dest: ml_model - # - name: Install Python dependencies - # ansible.builtin.pip: - # requirements: ml_model/requirements.txt + # ── SETUP ───────────────────────────────────────────────── + - name: "Setup – verify Docker is installed" + ansible.builtin.command: docker --version + changed_when: false + tags: [setup] + + - name: "Setup – verify Docker daemon is running" + ansible.builtin.command: docker info + changed_when: false + tags: [setup] + + - name: "Setup – display deployment config" + ansible.builtin.debug: + msg: + - "Project : {{ project_root }}" + - "Image : {{ image_name }}" + - "Container: {{ container_name }}" + - "API URL : {{ api_base }}" + tags: [setup] + + # ── TRAIN MODEL ─────────────────────────────────────────── + - name: "Train – check if model already exists" + ansible.builtin.stat: + path: "{{ model_pkl }}" + register: model_stat + tags: [train] - - name: Run Python Code to train model - ansible.builtin.command: python ml_model/train.py --architecture rnn --activation relu --optimizer adam --seq_len 50 --epochs 5 --grad_clip_enable --data_dir ml_model/data - register: python_train_output + - name: "Train – run template.example.py to train and save model" + ansible.builtin.command: + cmd: python3 template.example.py + chdir: "{{ project_root }}" + when: not model_stat.stat.exists + register: train_out + tags: [train] - - name: Python train output + - name: "Train – show output" ansible.builtin.debug: - msg: "{{ python_train_output }}" + var: train_out.stdout_lines + when: + - not model_stat.stat.exists + - train_out is defined + tags: [train] + + - name: "Train – confirm model file exists" + ansible.builtin.stat: + path: "{{ model_pkl }}" + register: model_check + failed_when: not model_check.stat.exists + tags: [train] + + # ── DEPLOY ──────────────────────────────────────────────── + - name: "Deploy – stop existing container (if running)" + ansible.builtin.command: docker stop {{ container_name }} + ignore_errors: true + changed_when: false + tags: [deploy] + + - name: "Deploy – remove existing container (if any)" + ansible.builtin.command: docker rm {{ container_name }} + ignore_errors: true + changed_when: false + tags: [deploy] + + - name: "Deploy – start container with Flask API" + ansible.builtin.command: > + docker run -d + --name {{ container_name }} + -p {{ host_port }}:{{ container_port }} + -v {{ project_root }}:/project + -e PORT={{ container_port }} + -e FLASK_DEBUG=false + {{ image_name }} + bash -c "PORT={{ container_port }} python /project/app.py" + tags: [deploy] + + - name: "Deploy – wait for API to become healthy" + ansible.builtin.uri: + url: "{{ api_base }}/health" + method: GET + status_code: 200 + register: health_resp + retries: "{{ healthcheck_retries }}" + delay: "{{ healthcheck_delay }}" + until: health_resp.status == 200 + tags: [deploy] + + - name: "Deploy – confirm API is up" + ansible.builtin.debug: + msg: "API is live at {{ api_base }} status={{ health_resp.json.status }}" + tags: [deploy] + + # ── TESTING ─────────────────────────────────────────────── + - name: "Test 1 – GET /health returns ok" + ansible.builtin.uri: + url: "{{ api_base }}/health" + method: GET + status_code: 200 + register: t_health + tags: [testing] + + - name: "Test 1 – assert status is ok" + ansible.builtin.assert: + that: t_health.json.status == "ok" + success_msg: "✅ /health → ok" + fail_msg: "❌ /health returned {{ t_health.json }}" + tags: [testing] + + - name: "Test 2 – POST /predict returns a price" + ansible.builtin.uri: + url: "{{ api_base }}/predict" + method: POST + body_format: json + body: + OverallQual: 7 + GrLivArea: 1800 + GarageCars: 2 + YearBuilt: 2005 + Neighborhood: "CollgCr" + status_code: 200 + register: t_predict + tags: [testing] + + - name: "Test 2 – assert predicted_price is positive" + ansible.builtin.assert: + that: + - t_predict.json.predicted_price is defined + - t_predict.json.predicted_price | float > 0 + success_msg: "✅ /predict → ${{ t_predict.json.predicted_price }}" + fail_msg: "❌ /predict returned {{ t_predict.json }}" + tags: [testing] + + - name: "Test 3 – POST /predict/batch returns two prices" + ansible.builtin.uri: + url: "{{ api_base }}/predict/batch" + method: POST + body_format: json + body: + instances: + - { OverallQual: 3, GrLivArea: 800 } + - { OverallQual: 9, GrLivArea: 3000 } + status_code: 200 + register: t_batch + tags: [testing] - - name: Run Python Code to evaluate model - ansible.builtin.command: python ml_model/evaluate.py - register: python_evaluate_output + - name: "Test 3 – assert batch count and ordering" + ansible.builtin.assert: + that: + - t_batch.json.count == 2 + - t_batch.json.predictions[1] > t_batch.json.predictions[0] + success_msg: "✅ /predict/batch → {{ t_batch.json.predictions }}" + fail_msg: "❌ /predict/batch returned {{ t_batch.json }}" + tags: [testing] - - name: Python evaluate output + # ── SUMMARY ─────────────────────────────────────────────── + - name: "Summary" ansible.builtin.debug: - msg: "{{ python_evaluate_output }}" \ No newline at end of file + msg: + - "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + - " ✅ Deployment complete!" + - " Health → {{ api_base }}/health" + - " Predict → {{ api_base }}/predict" + - "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + tags: [always] \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt index 83c75f98a..f9df0ec3e 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/requirements.txt @@ -2,9 +2,21 @@ matplotlib numpy pandas seaborn -tqdm -boto3 -requests -regex -sentencepiece -sacremoses \ No newline at end of file + +# ── Machine learning ───────────────────────────────────────── +pycaret +scikit-learn>=1.5 +# pandas>=2.2 +# numpy>=1.26 + +# ── REST API ───────────────────────────────────────────────── +flask>=3.0 +gunicorn>=22.0 + +# ── Ansible (deploy automation) ────────────────────────────── +ansible>=9.0 + +# ── Utilities ──────────────────────────────────────────────── +requests>=2.31 +python-dotenv>=1.0 +# matplotlib>=3.8 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_by_neighborhood.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_by_neighborhood.png new file mode 100644 index 0000000000000000000000000000000000000000..4cc2a85c7ab98f436dce590ed10cd451d76ab61d GIT binary patch literal 27259 zcmd442UL}JmoAPyD(IKQ1{O460R;;vN;M(Uq$s^3BGQY}K|o^!V?&P$HbA8uI?|Ox zGXxM(P^y#zDgwgcfJ#&9efG&WbMMT(cm3D;-&r&Bt(8e8!a48zD|Q-E!pAr3?RWF>ZydYk{*J%x^E_bespodw)5rRxEuWUPr@OP8 zr?bP+_1?B8JsjLzr6gq}cW+y7@9F98A-jFM%fJ4Bq}xfm?VqEfOz)U z9$KcMzhy(>dFMp6@P~&?F6cJ@aPDgVF}r@wi(C7Ol_t|ToRbz=JEc}ucLuSZuwP6R z)a*+iyX_%Sv+r)%4t`~PKOdj`hc-px; z-!=b>eqi8E+0nx<@9gHbge(=7yjb?oD(G9;#@%rd$sBHN+{JAxE-qOs`++gi;oAM; zcBb*RJNrE1lMhyUR*$k-hqI1rnQU7Tks`Ns>Pyvj{Hc)QsmI9&)w5XN*lM+Q(Se)r zflAX~MSe_rzNWg|zj(U(haY~Bc7C&C`F2ZlOG``qifMswMsijD;96NvwG|f+Smb-2 z@DekLcc_bBBp_gL>Qq7U!Kcb0eEe-acoufCb91JcTYEv0L2`I^btI?n%dWDiHUCIC z^KCR(QBhI+@WbE7hkIjURKxP~|60Hpsmd=q`npa>iJKiZviOL;zWxfM1zC?zc3lYJ z7ks!$>(0kNHb=R8?OPf*)*hhaR`e}?(TZ&eXXY$$eDla!+$8f@cuK2g@P~j^1>d^w z%KCn~uQ>Th*(l<-&`Yf3#e7%yl!gdg_vEDa%uG+Dhi@}^*pO`GG}Qgle`adT!kj(R z+nSdi<<~8^Tuk?`hPY!%A3xfgsfAs>+z_TToltpwGbcH{sCKY;ChgU$SJD!8!yjJ@ z$@@N9v|Rkg9}SAeE4CTm?+Bi;Fi6s`%=YSU$vIe5RCGJ{l>9Avib#1s-LngpdV7hD z_2VU8rCG3U$hcV=MI7y!7%Vm#?QhOG*`?v<@1LK)rmf<6xJ2^!2#3SX>o6Q_FZ8QX z3E99mXX$x49h-?>X0$$2d+lXmCClvvpMLx0mtV|vnb1>CDGkc!?{rzV&lfFFtQAj9hYGTakZOIu^mbXiM>gMDt zd*401cI#F`q8>Bp*FS!^aOsk#*S@hAO-*A(Z8c%SN@?snOkApt5=8o{QMH_UqAHc<}@u^xw0oa zSfiAl7oGg@Y(sExa6_7f+3G#cY7Rd#FNTJO z`VW8HpzhtY#8jt8(eF!azRzIVFXtACndLg?>V4A9pZ^u7;LzPYF8T?FHqbNfKQlYY zEHABSyl-o0Q7?O4qDj7I;!h$XT|GTa^Cu@04U!F8SU%|%(YY?I&yQw&Y_baW%D;ps zHO0LNoEX&_@=w-Je0coj9S3&4*Z4Rmz0P2$z;9^Tk3YtoKF#MB5WvZ(SXcM#nn>h8 za%JT4;P-zm^y0P{{QUFJUp{|6+?;8PtxnNTXX1C_RW1wJveGi#-~H*|b<;?LBkqnRPGv7%a$vIaNB4RmqX=pi+3iaR{NA*P|Bk;%E+lJi@l#rE+E z;oyYBF)Gg^C0#(D}3kGu&}V-e*3LH!={4OGH8@$o+zSvMKoYhkyrFSuf7}k zUYz(--@a7hAY8k4%{1%yWzpWghGZdW`>Vg5J==v%{`ARR!uZkpX0Ld=Xstox>#cS( zlS4WqoF*M?#N(4a)e>%QZWNg2%;$ggZK!8$xW|M;X|hqeA>yOUV9aHq-L<%7jI|xy zltHfrOV`!ED-CrSSXXr7#0ep(Ru_X)CGf21GAjxbVc+z_N-8PU%u5{cm5o zXbr!OkBf_n{)(sa7?+(H>j-X$JGgSWq`7faRMhi`?a?hcPE0n-N6x}7>-ftLTyCn4 z;y2N*%d=zWsGdHv;Fk3&#)nU~qlIJbbvFxwEKE5xz$cSCMQZm6X)wCtk z_98wg-Ibj$CMKq?H2LW)eJ1X+}l{%|fNB+0F#=mPE}Gdd`V+nUEr zZj3t^k>tf`ym0ZN-qiR=L#~UN#bfuMTXS9N-elN}w0YDdBGwD-alWN;Y1M!pM!bwzeDFt%{aKjC`&T z<~-?%(#v&jrY)fp!PJRv?0$K7x8IjztnIyYH<1tAvQK?GnPdO@7hIeA##FPBktQpr zk-i4g;-Es>XZ2c-8U-c}k`q$Wg)3LQjjOXA>O|EG{YNy0@;5s4zsWGg4WGYsT}r&x z-RGsHA`O0Yifa@nG{fY-oHxyNPQ<;Xck-VaX&ONqbLe`0^Q4c?H<7{N-nt~yoTMef ziYY?79k=Y-rMp=tN*_yMkmt%?*5lHY7TK6$5;I>w=vhaHRaLkIt*Y1fumjU>&ON1I zOWct~-+gxo2i5D{pYsAo8jKt}iVCl+m5WD|Zyp)MO-FuB#`-1)j15qp&bGhiKQ@qx zb#<)1w=dB(CL1~OEXCi1ge}-et&EHeZ`Hm%ZtW?!qDffkzV<>E9$qOBPw)Swa&wLs z$FQ|%1KYni(J=MS^tT>iq&9UV4I^jg6bgF{sb)rq!G{rG2OaJSpZavh(!Y1(yt#7| z^qAUs+*eDLChtFcrq=c1wv>fUUXY1SuaF1l7Q*iX#KgX~Cmwf{f{SqLnF6c#B-qtN z$@&c36OwUF`Th5~FYn6exU^&+e)s3Owc5QpgNk<#+f04!48sQgbU;-#*_+#fz*QR^ zJe`P}VpSEfy{Y{2>YE~}OG3XD%}fVKm}LGcAS8V#-Li1FMrp<#;j$&$;X2+TTEAw} zp}pVilI97>r1rQBI5hPN zmstlRB=5+$Mg8>CPxXmj#*{hpsS!mi-6_ICFl$Vz$uAjgIEGxHLYL0P)+@JHgTU|f4Hy|<1{7LO2<82{j zM|T2E9$OV+&A)oji>t@Jj*klkPmM&B%HwtShzbrC3|+C9Kt#Tbz$UojV%Zx6nAAWV zw-C`7WS4UMkqa05x*M;3Ty59Gm>6!jZK^e1H8)qvu6ot<^mGU^@8_!R#e3_$K0NzA zbZ5sC^JE0M)$7)+Q?a*CI5GV3Fv|g%Wfv~|CEV%O76yZHNmV$o{_3%9mIZIpX0QS- ztz)D9dPmoGJhCqBZ~5_uA1(m`2r@1q&^2E^z<8Zuqbjmt!&7U&#*WITXBW7*$oULN zx3Srmk*5Xay!+j6^^9_FwsWsBaG75i4*GNi+R41kPsOFMZNihd`uh0^(tY7hj19^F z*nTf4cst$;XRGDvv23@F_8t7CAM2Q58!B(@u>LKz2Hzo|u9D-}aIca5@WA3u?2LC^ zN^$z<&WhF?xz-AV4J?Oc&^KGwXsTJB5CXh*@Zjs$ud!}OisgVtE?lI{oxdn63&_aG zbpG|nk4re=$Fg17Eoz#Y**U^{-AkU`+#2!j-8<|4j^d6+MSwA7#^E&ceVu@2-kgL( z4}QbbNZDN}^~Pb#dUq+OyVWGGSk$JkK5@xVo8nYfjbhf_JzjnK%8bv5-px5_1K5!H>>R19%rxvy4;GVj;o#Y6^QqVF1(vz8f@n?kQKUQ?Jn2$ zg5DiPf>Os=QJKmwPMN5%YRv+gV-1Qd%P~0^Mvprbt_ZPRfjGGqk9U1KCu-=DbrGnIT2~Q?#QG>jM z&$<8eS!WkSZrm}YnaM2WV9v@vvgf|V>)=qYwDjHZ(ned7&rIpNVnjM--CY?s*JD-T zFCTmMGOSmH;Fy}?EX1e?gcixTwM*j=4T|%rs;agIZ3gb#ePQKxSG8S$D^4pdMFny8 z%7OUrN!B#4OasCZVq8K%Rci3BWhv+$or-K1JSk8bWM(q+_UNjRoes64%8Z?^ZF^bM zjoZtCLo~b!L*A`Ajfmy)bz)+*-D5zty&^$B zkzp|rIbm#We(zmpr_F~Kx9fUyk#_8Pt<2!(VznEKG*3G|I6zE$`pI=LhBz z2^j774qo+$Y6*Vz^7G$|4EWyz43!oYbSwBAqCA6Sw%XOzwRcg;yhY1)$ji$!B3-9C z-$zbVApZ#X`1mL@*iIclWq*CQaAAhi!9m3U85=E$7j3{#mAD;wt>qz~g9jHkHg$8C ztl4`+Z5L1gA_KktUXhL}-PH$=&wVR5Q{sqi@%?-hYv=xUKa-O+ZM{?CDbD4RB}=@8 z_FP;hcJu?*Crx78wik%J<-o0L#?5BbUf~evSJ->?3=ROug@}sm+EqIcW?pSvA&Z@0 z)}FeJZ9qfo6kPVs|F>1|8Z{*Y?E5vP34oN^& z;vJ~`%P-E%(zpZuxkmnZs;05IsW|lyal6%Znc9)kzMqb0MJrkW%U|_$vLf077wcyK z$c@B>{QUKApPt=)?AbpGr$$?aK7;M4PEBcv$SPDBmC7Tu*P<{3>JclZxhMWskYIa(jz~Rbc;{oy?&)A7P#JzhmFA`F81RIkx~H`-eT6{H@Meu)O2G zNq_;Rb-0 z28I4!c*^E?S6()@U?ICb&H9_ORBKaR4kzi0u^bx5uD*HmhLyJb@!DCdeD|-KnHjIm zrjV-KisZDBz35Ldwr?lujNBQk0^h>`=idGT@mXG?Jy;3GjSXSr8CN>lJ{?*a(OzpB zInmcy-Fp-|AG4958HI`m0eh_ zhZuN6DL4pKqB^5IOgOq<;BhT0jM>C4h+uP#cPVQ`v&7inX=!v!cxa?QZ7rQ)7t9|l zkFW|#^lA4~_2{Y2QAm5;+G>_+np6AuM5psaikI1`$PWp`e)&!LJ8%}q?r<`KjtI+nYa%yl51p!P z5M__MkByD#c+3Ei?~9CzI?*O!R~&Yut*B5p;F9`3FXF)D!x=yT2S5W9X?KnuJ?bn| zoY}s7!WATh6G}ru5L~rSktsPhwi;D?-{^NW&a*gaqmwq}8fUkfIV?*Q#8Dd?ZqOK_ zX|6+c?C?36!U@NdDI=T5#$`{u|J_1~GgBv3-D=ee$g+f^Hs~a`Ws4e06|em1OP(?l z&MKF^fT;;EJ2s`I;uN>Ck8MSiZI+VA{KdbcYM0G6u|jX%#TUc^2PA>a>B{&hjJsMNxdY~M-qgwv+(G`9DJ5?()+NV5Gojddmc6W#O ze?O0tSu4AQvvy;oLegfOQN8Hh0eQ7qg-DgPKjpaKLhWNY*gn5;81W$+g$l0uVPIEX zP!Yp#Y{J+c5^LqW^`D&Vk_PGJ(q7PP?GG+y`MPy!0MuAy51+y);Q!|p6{^8AQ+bkB zMg2PzV#cn6uiW0OnyNP1`WT3n>K=fq?pHH;9YK0Wk3M8a>ZLM2uiJO>uLm>Od1t23 z@pvF)eejRC+jr~UKJp)NRL@M7E9}(l%?U(eWp?WPF>=uFWSC9m7N4w|x8G}`VrwW044C>Fv+0HH~(gP3gjz+<--j-*_6y%-!awfPsBaSS(IzUVANqZRd`uzj6Nxf?1_f8&g}wQIB~~4My(9MYlo$X8qRmf}=jq-=i`Zc^4}D9Fb?BWg7K~I)gqC!Te=o5VvJFkgqXgxIe3=rNskf zG7f_y$U*N39c(kcGj7|$eE@7!`{Qt=AS<(7Td;1t&nzuf#_`I|Vfx$yWfK9szDfI@ z{aD5Cp#D&^ZfWkBzq!Jnn=v9Z`Qk=<;GURr$G{LyWN?Xm;J9aIgy6B8!$lG7m&RW{ zREqRA3g5q%VObH-@+r{Ed zDtGYXTb>O_Dec)~=vFuqo*HHVHn?kWFr7BR!qt6h+^g`@pNp4NDS32Xs#E>Rve4fE zsP7suB9{>Y%p<;j{eBUZORt23C*SSR(bd(>3^oQg-$9Z4z^}i?y1d;lzIUF`j%(`I zH#vYj@$bAWBONg6eBi(VF{`3evO$xN!3)G8HlV=w%e_04?b2!pg(R+MYSauLkYJK^ zTtiLm6>^k8qHdgYXaxe7<3QUJ;+YWk>hJ$N=cbsRN`$0EEjYYXK*cxn1~*5a(nWSQ zy1q$Ezvz^opRaEM=+>meF(De^+Y*W4!WQZI`abS!N>Be`qsEpE8-B)9M-!n_QnJ6* z>XO8Yi;CYq{MeAFrz&BVn;o#^6MiceNW}>mM6ftx@Mn4&1aMTw2LXF1spuE@o`_Tk zFa+bwz|rTXkAcT>c#WE#I4oqJM=r?-lUscT+V05vW!Cwb=DDUK+8wUAvQ`goKT@x! zWBV-ksw+Ds&bl!i@)5G=jN6-2?ndT*)G}uL9;&J z%ZYLh)`9k^GfXMXr8U>~*0q5kKn*39eMiOZP|evv15h(uI0Kpjs_rX9Vfqj zmJJw9;pVe@qJu5>d30wNHdTYWQVm;YfSnw{S2jnjooq0&igVHgpd=UBlYsYg!X5Gl9%y)Et&A|CwRpw0SAgLo zV2@haA8K*wLC4mSc0w{3N{-}%Q; z^6fnO*LN|Hc0!m;=J#vXI8$AWpsI35`0TM9n~EhdcvzLEzs(asKo{Av&@yN~?iiHfx!jxEtrTN$z{+1eP+q%k4x??b3&flX-s}>BkPsklIgY#Q zi+lU(j2|62v!U|q+p|mi$K?WIuNs^TGtWqy03)n2G0MG+m{M9*Rkg(^?H=31)h*)I zE&Ef$uT@pfEn3bG*jkFrwZ*EafNFJ?U2VH-`6WRW#24NwAxeS*R=5OkzfAb-jkci3 z_KFzeNTuL*-)~(JmmfXaQCe45_YC*uFjPTim~4LQiRwtXCi=2>U0t>xUfyl8NA@`Z zHhfvIOeGPi^zZM<`BYn*YNU64D0f>WHYkD%Eg%wTqcz~;J=$;534UG*WRAf0?b~CZ zpp+vbIGPvwH}vk+4e&hVIC#QG!@*2YWxTzrR&GV zGvq6UPv$hGuLds2{hnC8fPmF>IKV0#e}R|mJxSamPS9$EWht-$mvCKHgDg6kuT>MR zwXt zo*$2ZkAL=HhF)837ivsK-=5jE&yTL^eVnt6kFOGWDL!a|E5mIsG(&jw@!BWo`0>>P zkiIvF*hL-vB!A5@2I$mel%OP6kAOsiQe09Grb9o)c&EbX8yo!5&E%@5%D=vTB4l66qhcXeg! z^EpU?7#CQzV?VLI>w+gw7)6{7{jLethk+Jw44x=XNl^x0z+&2cZeNPcNWgj{!#;za z2UOpH7<2Q+jab+Mz7Ntm()ReoIKG3>9W!jr?5jl84MQ!4n*46>Y&@+KVz;dSa01ce zQ0bBp=+Y#6qp;w=pdM}nf(Hz@C1nA$vd~`l`&`MVe0*=89^fMz3g4~SHM9Qt53^R$ zzc{jJ?h*R?X|aEto<;uT#>lIQm@cSXu`Y)olZkT}bCN_Z=RTdY~}N4)e(Bi;{~Krjn* z#BGoeW-p%oDe#4Vi-)oCtv^dkEzAYzYaVN>n6G%Q^@#qNFMj{OJox|ZdH>&k;O#)) z?5p{*gEQhaCCY!lMu~FYt6M5*k{b@X&;7rw)BpST{>Od7pUT3gA#o>ys%Z&Q^!#`g z%=_``d%c-=c#CKIp8m!ptqOos3^Zd)VL)DYO>{bbW~-uNyX^??*xh5L&;CT1|Akb| zmc4t82~sUuw&lp(My8H1K)MbD9WU#1x&WNnwpA-6%pV^jfOvNlHIV}%fLL|#NT0#C zZ{KW>|G|6G%>&7L_i=HPw;H8=yso2|vAEa}@dn56 z>IMk5tYok4#*g;LY20K$_JMdrwW(Kc?E&1nTt%!{XDA93={loK#|ej_h-sI>?FczS9IDrCfVEA5;?M8K}@ z?sx?U8W>1ZUFRp-5L6g2)`<1!pN8hR_a%pS z>|*~Hkzy&}DzPnF zK1TKNo=0@BXy5E@>cIVt*!&z+56dRzEm#0LMjf#|$4LqP4I=q{*shK#fsqz&0d{7< z<)Z3VAn5KyETyG-tkh;S>uXnpRd{rCG;I$NP?{aZ!Nr?2Z{0@?p+Rh!o6Ho-y9iOB z;VmyY+l(LG$LXZ9nM2g#s8#Tk0jkeDjv3H4T{@}bFWmk8Pr@hwbbPDZi}$#h)qx(1 zvPI!q;FWTyE7k?{@AT@cSO5O|@5#h~6qo}jM<*UwnJBZ}sCcO;faEp~)Fq8$OFkqE zpVQ&7Kx>Jp6R8k@2%Ch;+6{lfY4j~16P=-HA{?D_-zk-kb$`ODBy9VCq1!PBQA z_n!E3z@&s0;Pgak;PTTO6OjVP!KRN4um&9gC@Y%;+6%E5d`TG9q=aJfpg}}$qAbB zr>c%h-H;_~lfHiS#eOB@s}UHiLYx=S1MM%*gp&I>|NMjj`1<9mRwcl6l7DOpcvb(` zUw>cZIiVJH#-TV(kbbzf^VO|vE0sG9DNpPXZcq&rGi}*MO?)g3u#J%J9GXU<+S=p( zkx#?v!fg7jV+j)j%j}Kz;u;f+2NEX%MbkL2+inoMT{Y2GFjJiL@}r+U&A*>79CV^2 z6_420S9tjka3!I%c6f1X=W_^c$yL1lJ;pctQS7~a9)0*iPcPo%o!0zocL{yUf2|?2 z&BB6DH}6ZF{khOkRzA-$>U;mhe`0DKNR#ACOdgs}|H3c-p>G#0i2UzME<1ea`yrEG z^`Fq^KQ1B9{*OZ|W43%z5P6W%aY}ADc3i#s58TgyH;ns2b}i5|>VhaUWL zj!b{3XOqA&wmk8$3)!XwqtuFv8ykYb@xHkFcd}i#X22xpA#7VJ;0z4F>QG8cLcIw9 zewFwaO0PuZARP@jtVNt|!VPd%+HRJs3GTUWf$*vOD8*V@L`0zMD9~b_UZ4{`_bpVy zWR#{rxwi-;`^Pj;|Po4V7qBa2Yc zxBGJ>5aFIzRUIUrW7o0sK>p4=9+Il}5bHNBQNh!G@b?COeHWUWi*Q?cG{YV>KO+`l z;Q%~`R)Hfo+k+-t6uvwY`7%1%NLdgI8pkrE=i6Ui$>5`eE{3JWoB7BV;wmm$xbQV{ z<8_O22(u5sH(HSSt{rc&e5;`*IG+amXq~}9fa`w0Bs z2a5-=x5Q>wwo_GAEP*-)N z88B0(Sq(B_Hq20P=V~y(vcv^VjXPlDkrUiaTY>nz@Fc7~SA-~BVI7A>N*edy1$nq^ z!h~s+?^$Wxf)aC?u&|q)Kyy;?%ye&(EkZEEHS+x_)~q@6)fn9GJi$GHPIOqzg4>6@ zY|{3x)EY2f*9dT!-YJ#e78K~x=QC+D1%dag(Zs3Nu8T)2|A{`AwI109h~(!5|f zU>PqwD|fz`ovOE6gzh25Q(qs6Ci;GU@6A1P<_wam(H*CK_;+yAEL*YScvBRlE0k4l z8b#QPQX5MO@Y-Zx#OCVb0_D&uR7L0@%ImSbxoaBrV%5H9MW^n@uPHXSkiQIuh1MM4 z!mxkv!k?gM-5hu5aR&GI&!W^WHadY#MTp-3|D&C^`Vv*=$UiPzsD;loWzK>n_ApqH zIE^sy@ZrO3eBJVG#)n9NNq4Fam$+MJ+a9EsDhEUdc(R3imW~Bq?7{H1xydj?ki)w% z&4S}O4lw8W@zsXiu5E96he=b;jP!TLuH0T0mLzMYt*gr@J*gcj8wZjLuR7J?PzC&4 z4<6B3sVU*GGK!4X_y|frun)xgP4T{`2^T^hy8xRN&zuIDkDU$5DG|5z%g9IrTpIVP ztw5825w-{M<*j%~EG%CiJ&O<(tQhqPhegcHkke@i0zW-n09x}95V9zHQ8`MN zCRCyZVBDUt>?&h!@UYw1mV>osC^mUe8UhIfoknwqcjzh zmagxe#I{mFOvXMO?PP#8lC;COTcwdG_v8vz+8pvpBOHii9fWs|gDdh59pY9f6c^ghsQn^-{i8m5Jr!~@L|B0 z&*<+wlcI8EjTp?Y2+ElVlN!B;;1&j_wBNm zrtdf%svf^)))g>8u+r%E|G0d491a#~dEQ(^R>f)ftvc5h zNTdd+2T);`HhQDpC+F$*!eIu&H4}=$vHm0UgnUcSX)Y{L4>Hs`i5F8Q5D8MG6dM zT-#ouP>N$l`D=i;%(_c?kqh_Up6CVy4dOv^T-l~%Q72&ou7**|EM0eyk zjSiK$^zg^FjJ+`Zgn&Cj)o7$o`)hDY_eRMHP4?oAqN2|ry-=>E;ErquQq>XugWRIV z`=d-rQ_c46G7lN0+M6ILY+y3XNX`QC@CAc9NC>a z52GWBSkGlx+tMBN$wu1|sO{lIz`13_Mkxe{%gdXm7mnWHmYah@Y5+g3MCD}eNYUh% zszlfHe?wk)DbgG5Z#mXtbCt?WTo_0AXULaPrw9<_-87ztLy5g$xSs38uaZYU*8%s( z0BVfO5V&tE_&R8@Qd5DgO^Z%F0eds}s0h+%?TVm{i2y~6*l-E677HV17bW+%r$x!Z zPwg)_+srJx8gXdCa60Duk0eE%`l1dWms>7NXTjXL_BdK%mNWe?;D1~G{rA>m>Bwxo zraZo9z2EKeHE)|N1GHdc)EqT^+nPRS55F=fd?QMsRNj9X8*2is;|ZfDb@wb`Dg}Qo zTj~G)kIkOY9f)BA6{82o1<+SI*pR&`>1x-0@>U}23`761yET*B_zxF>cgT>2Kw#`* zWUDQblGWZa9GDv%0rJE_^+x%=Unl_o=!#gqS`bX!HyNb^9S9A2kvy*B>Ym%)i?TgA zTinUQ1Zsi&tJXv;q^;R^k{B@br#&DDEH}F!#18H?(Te|Un6uz($2uXnh^kXiuZxM|p?UL^T?{CP52iaJgW zUB+d2>uIUJXwf3wLx&D!*8PnvHtEfIgs1#Oo*4XOg*D8I%_R7N_0)#H?Iv_)bgQ`B zT_-4*xV?B%7tA7~KYb<=He#qDq_l_f;f|P6i7}2wxHVVC+b^SKOPv&IRQ!>SGqdL{ zO2BRGW3%$KqZL!3ol+B<_Rhc4+b*N{sM!+iMPaDxUZK0f6QxpUFOHxr907_G{Zg>d zn7F#m80j867E8AT^6?g(D0%<>j0)#E;JFxd3nU|UkYLX)hH|XVK+P4g!G;Xa5S@Vl z_gXwTS!SI-(ro~TW3JwVRCFQfKRWVrSy`E_ngrd1)2!>gvyAh-R6in|Lvv}hzB7_A zfH>jYG-C>502ND+oLFe)a`CZhM@QKxx0MOBC~IMbVw6k(FKU}CsUL!^xQd$ zwc1Sj{(*tK>c4U=XPn49f$~KC3RCHzE+{sJg&qDc5qJcy$MjhS0MH=l<`ZZqX&iAc z9Yl(F@bsiVF2Y82cz|dxa$VU;fQG$gE3N7W*evRdDIBgnD5%S$hTaYs^K&Aj+&h|| zcrd}h>ECZv{ld@h?X{1dZ{4oYa_N&^3llT{4cPi8z2Lt4zgdecJ@UUIhQG1SR;vq! z(1w$%=+90s5c*GKM*fc~%!j{Kn5zFws_6eWB>qio_9vvGV;RQM9^EhWC%pd_4sKiW ziuCnPw*LRZHgUZdHOVO~x8+cS|cd-NbMVj;7Tq5P_OWQ#Dtx+YfDc3AovZN6`6!0$FbL9!LJIfb3i3UtsJm}{$)U8!G(rAY8d=pX-wF$v`jOY=Uzl8ex&;teJ(}=R)g#v?0*r4MausPS%65Le_lQy9>gt70DwIjKiq`2eK*Hj6Dr4 zy!d>3Yt?Lbpb6N7nEGL}5k)P?rA{4tOBKKF2Ukt92^y4dOQ%){dC!?WTep7fFnoHS>96sw?t7B6-5Ogc#YH_H)|)Lf?ZMtuOTxD-KPLj4j`5N2Mfoj z`b8|=ygH5dG$tZdX|uv(vGaWhDot{O1u+YM+_YD-1CBz{UJyk%`0{YkkpZ?DdQzc< z-7`$LOucn$mD$W+fBjlkUapbvbz>AP(`$Te0}5fl%vj?pIN=_GCUQUqB!7;mn5?WR zN=pN@V5Cw~rEo^g&7kV9izY#`yOeR496Y)wS@ldeU~IhPxGRG_hI*>=k|>z0@>#lBa0 zXix@Ck>gaHRSr@O_PoOBaZc_b*C`ov^s8sGB)@6@8}aC0pX8vCz~AjEwB?M zLgKARbHm2nxO(*nODw{ZV?PLA?Xjlq@Gc|9q209Zo@K|W9)UF8E1FboR6QMM=JPx5 ze=eaH6!_}31Ost;sH-&J{oQXcjEn%T*@Mp{y8wIb{p)%t!FD^pIYcT5$|vfx-^-PO zc1k8OfFtUB$0so13?tl+N6Q$2eE5V1OX1QWhzn_H1VD>S#EgEOrfF10%Wy+L>2TW*4I&795nsSu5i3B z8I77jcOn$PhGFFm5Jog}b#OsRR@=B~Qy&pJG99O4s23JDE}Ij9RwvTa3GIMtlJ@*~ z2+ZCq@3~(`SG%G_e?Gd3)Gq3Dyd&$Cgp)oD`Z)GU5G~~X@~pp=ss*nR4^YG|PPJee zqT;T);nfdO0C86Q6<5S>e*CE^Df)%jiU&Irt&aLDJ>nshH`VKk%p~PzPp*Fg|2(?} zIPlKi6Ave+rVMaqR~qn8m)g<@i#e0){m>EWg%a5y)zkp;FRxJ&Y8p6Y4GgGAll&Xt z&!78NIswIFd%&PVAfkBwiO!He|NK+>#QR@IxUEeMalDXOd!Fy!e1tXPO8@3n_};E_ z7x=FuRQ`LSpZ7fU^K>IQfAGKmb~9G!q5c&>zB{v|q(>Oc!K1WHg*>jR11Y5EL! zwAHTq9(6|ptn|mDMW$uIZu|E1?3KL3?|Y=pCsY<_<3nesh|ic=^CVai!W(!jNcic0 zkL>70C-w>1V7NTV6a%%^5#fOm3+ZetOnDn%gn;(KuBlF&)eh)^@@*p^At_PdJ&tA0 z0wCxjAppel4fy6L@IjZ>?BPjMe8;3Cgl84`etw7^6ZHE~VToAShdLUR_*si$3Ok^| z4#C+-N+Dt(hcZbZQEZ6l4%Ar!bDX$AlBhaCL6myg@S6g*#U>Y_j{rKG3KoRg%xEwM zpj{PAln(~s9c%!2gIuITISUCV9)!*o_=3@Lby!VJE&rbzzgKc;30n1XCz_$pMF)*1 zg7UIy3PSTTQ4pZ$b913~yqON}dcGVbsxjWJ9*wdo=-z6H7eLmeo^sHNBj`m*fZdPU z@0b7hqsY>wVaqrHJ&``1;Pj}QgnGzD69M6fDzGRf7tpztmSkflOM_Ve90BB2#?+3( z-drD9uu#+-)szc&+A=2|IUXBXHRltuSS+0CY77!ZC?(_I7@=)u7^d;&@f_#oTFJ?F zYB{6{CunTEPZA=mvEFr@`otLc5(uM@gLxx!FllgjMKUo^`;c=WAeJ5$+xeXl1AvN} zL4Z`68*?$>hD;Zv&SHW%HOLJz~hDlQb<-~iu@9fNRY28DZkj4>U)8oKx0O;y! z_s9WJ!4TR1igIJiQ~`j8x-Pn|?nzd3bSO2CZ%hQ@CVtMM6>YE(M~0U5{JmnAv0{+~ z#xR)$7}y@{7(5@>MVU=)rhoyO)Vhb#fxIEiud77hMyBV%3h4gv!$$JdV~)vwWL_3C zKUJpe4#2^eo|?fOY=6D<4{+Y>YrDFb-udc@0T$mjw*QN3o}7GBCq8X;oLkI?upr|K z8suKVdRd#jj&rRC`B$`cGu%two@3rDpTXRTebUnUwW;d^dc0oJNAWeW2G^YhY`G7{ z`!G62h(M{8w}MfY_)8d!pHa0`S{-n3BV1y1(O z2t=JIFOOoR{-5!>hvhFo?sPW-7&+`TPd*5>~CLlR*!{*8;ZYt#1@IUW4(2%pd;%*Qd4&e^`n0N{8?N#_B<(_#6zN& zwo)=3Sdq+S6ml=4PlBW{NNB_{DNOWddGq=ni0Fom#ead6VdnKqwk!I#}Y(t>W-#CKr z_J7gjF?#NQ&5HlqGyIo3Vg5b6!Q)*D(2}5Di`#5&zL1{(amenaCPS|a*YkB$AS6nv z69RWy%sa>{Hxm!q>rpLvXLZ^jr*8`avwuA;K#1Bo?tC$ zZTEUJnpT?YBavIodBn8{b%|C4jFiP^0q76m?c_96CqI#ONkiY>4pYvROuu%4+&f3JYN$^ihIvTlh<9_6-E2hloAR zEy8dVU0$cqlI8|UCo8skfdx`<`>7!<47>Z4^wOYjT`QSWH0Fv16af%YiJWHsW8QxBN$$ zp;ZuxE+HDQafMzP`Szh&IaExN)N(j*1M~zB+MWZ_C_TplO;L2sEAH%9aiF zQ@ymxT5T7)x*VD$D_vVf{2bV0b>rWj7e#wJut%%qk#=ZW4-ni@CpoxcN_}+qMl=rI zEa>SF-aBtxG0XkkFJK9^FpBF@4m|K2JX`}%Few)+4aYv6hXy;b^{(i049^10L+*`G zCK3gO*8fQMoy|;6(WmM*%VS zYQ`Mj(>P24{{HAdtbI@jt&8zt*keBf$bMjWnZsms_P~T;Vqj6`Ak)G>uh0N1Rx~~C zL@t+Ws_Ra)^k7$E0WNO(LDnV{ujtVo}udv=<-f#vo7t zBwZM@xh?4ytWCke!5Ceo2e#HQS~2juxq|m0l0k8dKpBTpEZcABcxyHQJ}nK^SZsM9 zEn<6H3F&jgehCz*pp?eaL#=8!>od}{1S?NjQCawnxRMWEYe~|MXWexXH^ufIL zA-9g2Jm==0@S{YHEtyKGJ29voB`>DFBzg{mN<(x6R6?%?6IqL5Ckw|{+f>o2c}%JU z6@@2kK4`6O?;Qq_vjx2gF#8$eM~88PkDoZVG+vH(V`kn8y`6Z0Itn8pXN7t@wPHF^ zR=#K*4Gls(jR&7o*Jy}8D}Oxi0wc5_*nkZ+l9VoXuqT{J-;iH zPJ_a*%i1{5w;nFxKpIfwP+Ng^x*uw8>m6^})g_7RcQ2u)wZ zpwrOgt0I~7^{IQ$m`3~1pzx`H;u`bfAPWSLC0rz!+}>xt1bo5m#URoo;rQo{%DM3k zqu(O|eI{f_#5+CH`flJCD4OiVP-vIn0#bSYZ@>%FjHBmVlynAiN;vB~(Sv4$7EvOP zxh+aCtg@n!5)+^Wf5g@`V(LQzO4wtzx0lA%k9s%>BV8!2&MZNjWVU02IPDAYsdpVL zd3IwSm@B9M;Fo#C(Bc+f3k%a(VL0xt#-Y&KO}>b5v|}Iy$o7v>4#%jrBTbDMl!bsz z?HlX)m?h1Bzx2J?fziOhhjF)*StEBu6pu-@!*@uGKCVZdw}7_>P3Cy={-5vOQfuGyCGYRx^m?RIGtOi=TyjSlz7ky8`NrU9CS7Yb9& zw1KBL+Wvlc;4e*#J#}WPwNU2t#DSm#LzU=O!xXi;(Gk2G<~d@-(-VBYJfJ}e z4D{eR8@w;*oJ^l*xj@hchCDO_nBFP=x(ovu8E?j<)Ep=L@Eb0wd}!#o4-(<|4*t-f zAEE`uj>7#$-5*Gxv7mC|{S+;i3f4697XA({*U9b|iR41{TQ@rOA#S*kF*Z4|04-;5 zeVUMO=iQa_=*h{}QY}YwYzPf*LC*RJ8>JDtIBTUl02!9U0a#K}qM7!}9M$=H$Wk;H z5xnS0AD=wd&TvucTmo7sShDLGPCIqI>_#K4))W}}_3Hf-g0XN_FXHF-d{eWr7uo+& zYYk~=2ygq^B5&1%1Z1HtmkKkGx%FtDxtTU0`^o>AZ#6H-)F5bNW26M}1iihvmDU|T zk~zF&qx4(p!DBUtX&|6F7^o8N*KL}`mK!8{n)R-fw4hIDX0Kbw2~1wj(I08 zjT}Bj2Qvzhoj157>-0VD1Pml~G(&Tj?eo2bx38PTbp^!zg(URb-6L!}Le;peJUb*f zr{ybFkYl+Q^x6`cxToyZ=qRF>M~C~bs1xg8+pv}?gh@aa<7ttEnPBmgj#V-kuI!yY z$Y|uyf{V5LLanebcz0a+FgO@6rZ5&lhAOrrB0W7FmTMc%7;%0`O299DuVX?9^wB_N zsBm3jO9kJe&!QfxVPw$fZ%9#aDwu&(p*FL8(COsi#1AH-!JU%AV)39)n!vGWt$f5z z9fAoZ%R*`lSCu$LvZ>QpWN3oqm#6DbE)|FpbkuTfiT@*&f&Ap470_L+LGz_)Z~|C| zSpUJzbVW#E*$?GoWWM*7IhdQef?uBAeZI)oaLP4C2N!KLz2&gnYQYHZt-F%H1|{2Gj%4f0-*k0Sa|UL_wtX?1a(CI^It!zAcWAIF+!Uvs4p%71<4Lu z0ApgrpaV51gIQAn`nUvAVBpkX^hH@K?W_yW%`gmtG!3Jb-H)z zlx>F&E4yfL5;rYGv=be28r~+zjv$Z{tmn?3FElVPpaP(E8OLcAj;GypYz{%Pv{kDj=0vL1a7$c!0NlQ{3sFrXP{;e?5nPkvVUi}%kn zg!i5N6&W5=H6rVJOGB47p}vgNM)5;ks0iN??`5eso4_yrWXx2OZ~fWt(Q&i15V{uT zJ-?a$UwTbZK6dqesU_Dx4JbP3^f=HgAIBQqQD9gH0+)ARAHrY=nw5>dGt>Cg7!moC+V5Y$I!98#Y|l~W?7B@p3FvC63kXx{E3m$G&7L{V;97323hebT9ufRucG=SO{%nB>jVNEwTZ&OH5hF(p#03&8j z)l|*qJGKTEpAu=9%I;H37djHCz8xPMOvglXYHF*j_JHiGnOFuL9BX#cBVdQpg@ZS? zJfI&(=M2qh$p-r?81euhbF&;ZRKO4WAv1&AddOKcTcy9Hv$J#ekrF=Sii0sKmvvFT zLUM?~``Cl+^~p&?e?0 zYvWi#tcK#5X+SK!Qf+)9m6yrba$+$oCII7xA?73__mGv&()ZmwV%DKT zg2wl}P$kW;Op}x`JA-VzW&zMsz+qm0FKooeXHo+N31$}Y#+U%mkr_Ftz5Z~jtr3JdK>!@;M@QY{!p=89l+ASru~uW?S|>qqu_tT@&5Au~a=4RAPXV73t_5AB zzfDN`fNg8XF&xYUJ3l7vtxmn&l-y7`R6i~a4~oHyg4x(;gF2KSzLf9T8Vri$+i@BDxOK;lLzp1g zNUm#yG$)8GbcijA<-&fD#9JdB>w~93=ZVvV5muGpCNzO?Q3xm+C=Q6s=W1&WpusmO z2cpaY94~#`TJG%oqAc5uo3}a^uwfQ{h}k7%6vv^bz-oypTA^JuTea-d);O1bLOCRh z0Wq1vW0iZE6gZkwM1lh1$VS!`1TtO^R@V-i$VeYTQ}@RzZj|kAPXlD5aRSH+G}syu2F3ZlVGr-W zMzkFSCqO>a(6HbsFLK{#PuzpL`Yue+Cx7aErU4ELv6`^H9mao%;1w|4NVVJ`H5(C8 zrhHl$mM)^;fcTR^izcRRl?X0@H(h`ikBPXTxd&y6RE|E_i}q~zu&BbQ^Mmdtl~T^S z*|1v?7%K@GB56ujZX7NhkxaUQFtywpeKIuO*=mwLR5TQF#E^JN96d6x5~!w*MDl#0 zW6B=IjCUXhW4cid*kkYub^HQtL`v4>^O}t0_hN_!{`(YsS1+KPrxSrX@Zio@=s`Z1 z?x-H}cwQ63=78DE6GjB2aKZ0J@1!qtgbS-Tu$t7m}?RTellSVnNpO z$TGxY7{OPdD@(qi?g+cBP1^SPGAap>;SDgmfY)gUl!d-n-mEM2ymhG}>hxa*LG{hV z2;N>aVOBDPqz;-^Oam{ALA|D)GHMwha~v42T4DmBRfAfUC>udDye5ha{Kp3(J`4`J z{g2wt=as<#jN|WIT;w2!*~*Vx9JL#pi}?}TtZ>nL@cXb7rYsIak~EXrjuKiakur?z zuw2Ye>qgF_xn80=lMS0jjZ~7om6*OMAA9T3b#Qez8qB(_(N;BiM*=SSj=(50}Of9+5=z>tG^1 z--sILG31+5)}^(8T|aY|o~hu-5#t!{ov}pvX+fo#&zh@CL$EqQZnx9t-FsA7--Wds zvxkr%i3LXOOkm3~fwWX7lavTDt!i)2oa^<&r_rbpDT<8ro{n!xC|ihuT5_BvWhn&t zxgZ{P1&pA^kanOi2da0YUqBCIz|RDcOtQNikUpG_rTti5kpul=Fjd6`k!3-$dLaR= z+$Fl}z1Ye*!xhs>#??Um=wevSoflT5wFyNjTkhJ_DRV9o+U+RXHJrPfFVNl-$MPlS zy3|gJEP}$k<@qZndwgn=K+0X4P8llrHE&N9l}Nw+*T(7JT{;iny8hhSl3_gW$W_{! LJDT cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-085a2ce7-6161-4c8a-92d5-492051832f3c.json'\n" - ] - } - ], + "execution_count": null, + "id": "29327a84", + "metadata": {}, + "outputs": [], "source": [ "hdbg.init_logger(verbosity=logging.INFO)\n", "\n", @@ -101,90 +78,167 @@ }, { "cell_type": "markdown", - "id": "79c37ba3-bd5d-4a44-87df-645eee54977a", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "## Make the notebook flow clear\n", - "Each notebook needs to follow a clear and logical flow, e.g:\n", - "- Load data\n", - "- Compute stats\n", - "- Clean data\n", - "- Compute stats\n", - "- Do analysis\n", - "- Show results\n", - "\n", - "\n", - "\n", + "id": "8870b0e6", + "metadata": {}, + "source": [ + "## Check API health\n", "\n", - "#############################################################################\n", - "Template\n", - "#############################################################################" + "Verify the server is running and the model is loaded before making\n", + "prediction requests." ] }, { "cell_type": "code", - "execution_count": 10, - "id": "a8a109cd-fc8e-4b9e-9dc0-4fc8d4126ad8", - "metadata": { - "lines_to_next_cell": 2 - }, + "execution_count": null, + "id": "ed5a9016", + "metadata": {}, "outputs": [], "source": [ - "class Template:\n", - " \"\"\"\n", - " Brief imperative description of what the class does in one line, if needed.\n", - " \"\"\"\n", - "\n", - " def __init__(self):\n", - " pass\n", - "\n", - " def method1(self, arg1: int) -> None:\n", - " \"\"\"\n", - " Brief imperative description of what the method does in one line.\n", - "\n", - " You can elaborate more in the method docstring in this section, for e.g. explaining\n", - " the formula/algorithm. Every method/function should have a docstring, typehints and include the\n", - " parameters and return as follows:\n", - "\n", - " :param arg1: description of arg1\n", - " :return: description of return\n", - " \"\"\"\n", - " # Code bloks go here.\n", - " # Make sure to include comments to explain what the code is doing.\n", - " # No empty lines between code blocks.\n", - " pass\n", + "# Call the health endpoint and display the server status.\n", + "health = cpptteut.api_health()\n", + "_LOG.info(\"API health: %s\", health)\n", + "print(health)" + ] + }, + { + "cell_type": "markdown", + "id": "e4af1679", + "metadata": {}, + "source": [ + "## Inspect available features\n", "\n", + "Retrieve the full feature catalogue and default values so we know what\n", + "fields we can pass to `/predict`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b31c8679", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", "\n", - "def template_function(arg1: int) -> None:\n", - " \"\"\"\n", - " Brief imperative description of what the function does in one line.\n", + "# Fetch the feature catalogue from the API.\n", + "resp = requests.get(f\"{cpptteut.DEFAULT_API_URL}/features\")\n", + "features = resp.json()\n", + "# Display numeric and categorical feature names with their defaults.\n", + "print(\"Numeric features:\")\n", + "for f in features[\"numeric_features\"]:\n", + " print(f\" {f:<20s} default={features['defaults'].get(f)}\")\n", + "print(\"\\nCategorical features:\")\n", + "for f in features[\"categorical_features\"]:\n", + " print(f\" {f:<20s} default={features['defaults'].get(f)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "474864ca", + "metadata": {}, + "source": [ + "## Single prediction\n", "\n", - " You can elaborate more in the function docstring in this section, for e.g. explaining\n", - " the formula/algorithm. Every function should have a docstring, typehints and include the\n", - " parameters and return as follows:\n", + "Send one house's features to `/predict` and display the result." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdb0d379", + "metadata": {}, + "outputs": [], + "source": [ + "# Build the request payload with a subset of features.\n", + "# Missing fields will be filled with server-side defaults.\n", + "payload = {\n", + " \"OverallQual\": 7,\n", + " \"GrLivArea\": 1800,\n", + " \"GarageCars\": 2,\n", + " \"YearBuilt\": 2005,\n", + " \"Neighborhood\": \"CollgCr\",\n", + " \"ExterQual\": \"Gd\",\n", + " \"KitchenQual\": \"Gd\",\n", + "}\n", + "# Post the payload and display the predicted sale price.\n", + "result = cpptteut.api_predict(payload)\n", + "_LOG.info(\"Predicted price: %s\", result[\"predicted_price\"])\n", + "print(f\"Predicted sale price: ${result['predicted_price']:,.0f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "11bf3a66", + "metadata": {}, + "source": [ + "## Batch prediction\n", "\n", - " :param arg1: description of arg1\n", - " :return: description of return\n", - " \"\"\"\n", - " # Code bloks go here.\n", - " # Make sure to include comments to explain what the code is doing.\n", - " # No empty lines between code blocks.\n", - " pass" + "Send multiple houses in one request to `/predict/batch` and compare\n", + "predicted prices across different quality and size combinations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52eb3160", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a batch of houses varying quality and living area.\n", + "instances = [\n", + " {\"OverallQual\": 3, \"GrLivArea\": 800},\n", + " {\"OverallQual\": 5, \"GrLivArea\": 1200},\n", + " {\"OverallQual\": 7, \"GrLivArea\": 1800},\n", + " {\"OverallQual\": 9, \"GrLivArea\": 3000},\n", + "]\n", + "# Post the batch request and display a comparison table.\n", + "batch = cpptteut.api_predict_batch(instances)\n", + "print(f\"{'Quality':>10} {'Area (sqft)':>12} {'Predicted Price':>16}\")\n", + "print(\"-\" * 44)\n", + "for inst, price in zip(instances, batch[\"predictions\"]):\n", + " print(\n", + " f\"{inst['OverallQual']:>10} \"\n", + " f\"{inst['GrLivArea']:>12,} \"\n", + " f\"${price:>15,.0f}\"\n", + " )" ] }, { "cell_type": "markdown", - "id": "00926523-ae59-497d-bba8-b22e58333849", + "id": "19a5e0ea", + "metadata": {}, + "source": [ + "## Price sensitivity analysis\n", + "\n", + "Hold all features at their default values and vary `OverallQual` from 1\n", + "to 10 to visualise how quality drives price." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4a3468a", "metadata": {}, + "outputs": [], "source": [ - "## The flow should be highlighted using headings in markdown\n", - "```\n", - "# Level 1\n", - "## Level 2\n", - "### Level 3\n", - "```" + "import matplotlib.pyplot as plt\n", + "\n", + "# Build instances across the full quality range.\n", + "qual_range = list(range(1, 11))\n", + "instances = [{\"OverallQual\": q, \"GrLivArea\": 1500} for q in qual_range]\n", + "# Fetch batch predictions for all quality levels.\n", + "prices = cpptteut.api_predict_batch(instances)[\"predictions\"]\n", + "# Plot price vs quality.\n", + "plt.figure(figsize=(8, 4))\n", + "plt.plot(qual_range, [p / 1000 for p in prices], marker=\"o\", linewidth=2)\n", + "plt.xlabel(\"Overall Quality (1–10)\")\n", + "plt.ylabel(\"Predicted Price ($k)\")\n", + "plt.title(\"Predicted Sale Price vs. Overall Quality (GrLivArea = 1 500 sqft)\")\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.savefig(\"results/price_vs_quality.png\", dpi=120)\n", + "plt.show()\n", + "_LOG.info(\"Plot saved to results/price_vs_quality.png.\")" ] } ], @@ -196,18 +250,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" } }, "nbformat": 4, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py index 465093a52..0ff3c8690 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py @@ -14,21 +14,19 @@ # --- # %% [markdown] -# # Template API Notebook +# # House Price Prediction API # -# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a neo4j tutorial the heading should be `Neo4j API`. +# This notebook demonstrates how to interact with the Flask REST API that +# serves predictions from a scikit-learn regression model trained on the +# Kaggle House Prices dataset. # -# - Add description of what the notebook does. -# - Point to references, e.g. (neo4j.API.md) -# - Add citations. -# - Keep the notebook flow clear. -# - Comments should be imperative and have a period at the end. -# - Your code should be well commented. +# - Covers the `/health`, `/features`, `/predict`, and `/predict/batch` endpoints. +# - Uses `template_utils` helpers to keep API calls clean and reusable. +# - Requires the API server to be running: `python app.py` +# - Reference: (house_price.API.md) # -# The name of this notebook should in the following format: -# - if the notebook is exploring `pycaret API`, then it is `pycaret.API.ipynb` -# -# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md +# Follow the reference to write notebooks in a clear manner: +# https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md # %% # %load_ext autoreload @@ -40,90 +38,117 @@ # %% import logging -# Import libraries in this section. -# Avoid imports like import *, from ... import ..., from ... import *, etc. +import sys +import os + +sys.path.insert(0, "/project") -import helpers.hdbg as hdbg -import helpers.hnotebook as hnotebo +import requests +import template_utils as cpptteut # %% [markdown] # ## Configuration # %% -hdbg.init_logger(verbosity=logging.INFO) - _LOG = logging.getLogger(__name__) - -hnotebo.config_notebook() - +logging.basicConfig(level=logging.INFO) # %% [markdown] -# ## Make the notebook flow clear -# Each notebook needs to follow a clear and logical flow, e.g: -# - Load data -# - Compute stats -# - Clean data -# - Compute stats -# - Do analysis -# - Show results -# -# +# ## Check API health # -# - - -# ############################################################################# -# Template -# ############################################################################# - +# Verify the server is running and the model is loaded before making +# prediction requests. # %% -class Template: - """ - Brief imperative description of what the class does in one line, if needed. - """ - - def __init__(self): - pass - - def method1(self, arg1: int) -> None: - """ - Brief imperative description of what the method does in one line. +# Call the health endpoint and display the server status. +health = cpptteut.api_health() +_LOG.info("API health: %s", health) +print(health) - You can elaborate more in the method docstring in this section, for e.g. explaining - the formula/algorithm. Every method/function should have a docstring, typehints and include the - parameters and return as follows: - - :param arg1: description of arg1 - :return: description of return - """ - # Code bloks go here. - # Make sure to include comments to explain what the code is doing. - # No empty lines between code blocks. - pass +# %% [markdown] +# ## Inspect available features +# +# Retrieve the full feature catalogue and default values so we know what +# fields we can pass to /predict. +# %% +# Fetch the feature catalogue from the API. +resp = requests.get(f"{cpptteut.DEFAULT_API_URL}/features") +features = resp.json() +# Display numeric and categorical feature names with their defaults. +print("Numeric features:") +for f in features["numeric_features"]: + print(f" {f:<20s} default={features['defaults'].get(f)}") +print("\nCategorical features:") +for f in features["categorical_features"]: + print(f" {f:<20s} default={features['defaults'].get(f)}") -def template_function(arg1: int) -> None: - """ - Brief imperative description of what the function does in one line. +# %% [markdown] +# ## Single prediction +# +# Send one house's features to /predict and display the result. - You can elaborate more in the function docstring in this section, for e.g. explaining - the formula/algorithm. Every function should have a docstring, typehints and include the - parameters and return as follows: +# %% +# Build the request payload with a subset of features. +# Missing fields will be filled with server-side defaults. +payload = { + "OverallQual": 7, + "GrLivArea": 1800, + "GarageCars": 2, + "YearBuilt": 2005, + "Neighborhood": "CollgCr", + "ExterQual": "Gd", + "KitchenQual": "Gd", +} +# Post the payload and display the predicted sale price. +result = cpptteut.api_predict(payload) +_LOG.info("Predicted price: %s", result["predicted_price"]) +print(f"Predicted sale price: ${result['predicted_price']:,.0f}") - :param arg1: description of arg1 - :return: description of return - """ - # Code bloks go here. - # Make sure to include comments to explain what the code is doing. - # No empty lines between code blocks. - pass +# %% [markdown] +# ## Batch prediction +# +# Send multiple houses in one request to /predict/batch and compare +# predicted prices across different quality and size combinations. +# %% +# Define a batch of houses varying quality and living area. +instances = [ + {"OverallQual": 3, "GrLivArea": 800}, + {"OverallQual": 5, "GrLivArea": 1200}, + {"OverallQual": 7, "GrLivArea": 1800}, + {"OverallQual": 9, "GrLivArea": 3000}, +] +# Post the batch request and display a comparison table. +batch = cpptteut.api_predict_batch(instances) +print(f"{'Quality':>10} {'Area (sqft)':>12} {'Predicted Price':>16}") +print("-" * 44) +for inst, price in zip(instances, batch["predictions"]): + print(f"{inst['OverallQual']:>10} {inst['GrLivArea']:>12,} ${price:>15,.0f}") # %% [markdown] -# ## The flow should be highlighted using headings in markdown -# ``` -# # Level 1 -# ## Level 2 -# ### Level 3 -# ``` \ No newline at end of file +# ## Price sensitivity analysis +# +# Hold all features at their default values and vary OverallQual from 1 +# to 10 to visualise how quality drives price. + +# %% +import matplotlib.pyplot as plt + +# Build instances across the full quality range. +qual_range = list(range(1, 11)) +instances = [{"OverallQual": q, "GrLivArea": 1500} for q in qual_range] +# Fetch batch predictions for all quality levels. +prices = cpptteut.api_predict_batch(instances)["predictions"] +# Plot price vs quality. +os.makedirs("results", exist_ok=True) +plt.figure(figsize=(8, 4)) +plt.plot(qual_range, [p / 1000 for p in prices], marker="o", linewidth=2) +plt.xlabel("Overall Quality (1–10)") +plt.ylabel("Predicted Price ($k)") +plt.title("Predicted Sale Price vs. Overall Quality (GrLivArea = 1 500 sqft)") +plt.grid(True, alpha=0.3) +plt.tight_layout() +plt.savefig("results/price_vs_quality.png", dpi=120) +plt.show() +_LOG.info("Plot saved to results/price_vs_quality.png.") \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb index a2e9aedd7..0e080328b 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb @@ -2,30 +2,29 @@ "cells": [ { "cell_type": "markdown", - "id": "50f78f7e-2dee-45d6-9d37-7a55eeaae283", + "id": "3faa5bb4", "metadata": {}, "source": [ - "# Template Example Notebook\n", + "# House Price Prediction Example\n", "\n", - "This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`.\n", + "This notebook demonstrates the end-to-end machine learning workflow for\n", + "the Kaggle House Prices regression task without requiring a running server.\n", "\n", - "- Add description of what the notebook does.\n", - "- Point to references, e.g. (neo4j.example.md)\n", - "- Add citations.\n", - "- Keep the notebook flow clear.\n", - "- Comments should be imperative and have a period at the end.\n", - "- Your code should be well commented.\n", + "- Loads (or generates) the dataset via `template_utils`.\n", + "- Trains a PyCaret regression model and compares multiple algorithms.\n", + "- Finalizes and saves the best model to `ml_model/`.\n", + "- Runs direct in-process predictions using the saved model.\n", + "- Produces feature importance and neighbourhood price charts.\n", + "- Reference: (house_price.example.md)\n", "\n", - "The name of this notebook should in the following format:\n", - "- if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb`\n", - "\n", - "Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md" + "Follow the reference to write notebooks in a clear manner:\n", + "https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md" ] }, { "cell_type": "code", - "execution_count": 1, - "id": "6226667e-cab5-479c-be6a-6b7d6f580a97", + "execution_count": null, + "id": "0f747b0b", "metadata": {}, "outputs": [], "source": [ @@ -34,38 +33,42 @@ "%matplotlib inline" ] }, + { + "cell_type": "markdown", + "id": "42e70517", + "metadata": {}, + "source": [ + "## Imports" + ] + }, { "cell_type": "code", - "execution_count": 2, - "id": "8020901a-4bc7-4b73-95e8-aaa462b4fc19", + "execution_count": null, + "id": "10c685e6", "metadata": {}, "outputs": [], "source": [ "import logging\n", - "# Import libraries in this section.\n", - "# Avoid imports like import *, from ... import ..., from ... import *, etc.\n", "\n", "import helpers.hdbg as hdbg\n", - "import helpers.hnotebook as hnotebo" + "import helpers.hnotebook as hnotebo\n", + "import class_project.project_template.template_utils as cpptteut" + ] + }, + { + "cell_type": "markdown", + "id": "73a41952", + "metadata": {}, + "source": [ + "## Configuration" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "4ecb72b2-b21d-4fb0-ac92-e7174da390e6", - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0mWARNING: Running in Jupyter\n", - "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-783e0930-1631-4d64-8bb4-f3a98bb74fcd.json'\n" - ] - } - ], + "execution_count": null, + "id": "53dd637e", + "metadata": {}, + "outputs": [], "source": [ "hdbg.init_logger(verbosity=logging.INFO)\n", "\n", @@ -76,99 +79,218 @@ }, { "cell_type": "markdown", - "id": "1ede6422-bff2-4f0a-8d28-29a01d4786b2", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "## Make the notebook flow clear\n", - "Each notebook needs to follow a clear and logical flow, e.g:\n", - "- Load data\n", - "- Compute stats\n", - "- Clean data\n", - "- Compute stats\n", - "- Do analysis\n", - "- Show results\n", - "\n", - "\n", - "\n", + "id": "093b687b", + "metadata": {}, + "source": [ + "## Load data\n", "\n", - "#############################################################################\n", - "Template\n", - "#############################################################################" + "Attempt to load the Kaggle CSV; fall back to a synthetic dataset if the\n", + "file is absent so the notebook runs without Kaggle credentials." ] }, { "cell_type": "code", - "execution_count": 5, - "id": "8bbd660d-d22f-44fa-bf53-dd622dee0f53", - "metadata": { - "lines_to_next_cell": 2 - }, + "execution_count": null, + "id": "c170517a", + "metadata": {}, "outputs": [], "source": [ - "class Template:\n", - " \"\"\"\n", - " Brief imperative description of what the class does in one line, if needed.\n", - " \"\"\"\n", - "\n", - " def __init__(self):\n", - " pass\n", - "\n", - " def method1(self, arg1: int) -> None:\n", - " \"\"\"\n", - " Brief imperative description of what the method does in one line.\n", - "\n", - " You can elaborate more in the method docstring in this section, for e.g. explaining\n", - " the formula/algorithm. Every method/function should have a docstring, typehints and include the\n", - " parameters and return as follows:\n", - "\n", - " :param arg1: description of arg1\n", - " :return: description of return\n", - " \"\"\"\n", - " # Code bloks go here.\n", - " # Make sure to include comments to explain what the code is doing.\n", - " # No empty lines between code blocks.\n", - " pass\n", - "\n", + "# Load or generate the House Prices dataset.\n", + "DATA_PATH = \"ml_model/train.csv\"\n", + "df = cpptteut.load_data(DATA_PATH)\n", + "_LOG.info(\"Dataset shape: %s\", df.shape)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "92877696", + "metadata": {}, + "source": [ + "## Compute stats\n", "\n", - "def template_function(arg1: int) -> None:\n", - " \"\"\"\n", - " Brief imperative description of what the function does in one line.\n", + "Inspect the raw data before any cleaning to understand distributions and\n", + "identify potential issues." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60dfc66a", + "metadata": {}, + "outputs": [], + "source": [ + "# Display summary statistics for the target and key numeric features.\n", + "print(\"Target column statistics:\")\n", + "print(df[cpptteut.TARGET_COLUMN].describe())\n", + "print(f\"\\nMissing values per column:\\n{df.isnull().sum()[df.isnull().sum() > 0]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "080307b7", + "metadata": {}, + "source": [ + "## Split data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90b7bc24", + "metadata": {}, + "outputs": [], + "source": [ + "# Split into train and test sets for offline evaluation.\n", + "X_train, X_test, y_train, y_test = cpptteut.split_data(df)\n", + "_LOG.info(\n", + " \"Train: %d rows | Test: %d rows\", len(X_train), len(X_test)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5d0fb4a9", + "metadata": {}, + "source": [ + "## Train model with PyCaret\n", "\n", - " You can elaborate more in the function docstring in this section, for e.g. explaining\n", - " the formula/algorithm. Every function should have a docstring, typehints and include the\n", - " parameters and return as follows:\n", + "PyCaret's `compare_models` benchmarks multiple regression algorithms in a\n", + "single call and selects the best performer by RMSE." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4a2d510", + "metadata": {}, + "outputs": [], + "source": [ + "# Run the PyCaret regression experiment and retrieve the best model.\n", + "best_model = cpptteut.run_pycaret_regression(df, n_select=3, fold=5)\n", + "# Pull and display the full leaderboard.\n", + "leaderboard = cpptteut.get_model_results()\n", + "_LOG.info(\"PyCaret leaderboard retrieved.\")\n", + "leaderboard" + ] + }, + { + "cell_type": "markdown", + "id": "8000f7e7", + "metadata": {}, + "source": [ + "## Save model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34ae9f8b", + "metadata": {}, + "outputs": [], + "source": [ + "# Finalize on the full dataset and persist to disk.\n", + "cpptteut.finalize_and_save(best_model)\n", + "_LOG.info(\"Model finalized and saved.\")" + ] + }, + { + "cell_type": "markdown", + "id": "99d094b8", + "metadata": {}, + "source": [ + "## Run in-process predictions\n", "\n", - " :param arg1: description of arg1\n", - " :return: description of return\n", - " \"\"\"\n", - " # Code bloks go here.\n", - " # Make sure to include comments to explain what the code is doing.\n", - " # No empty lines between code blocks.\n", - " pass" + "Load the saved model and predict directly without a running Flask server." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cedab673", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the saved model artifact.\n", + "model = cpptteut.load_model_artifact()\n", + "# Predict the price of a representative house.\n", + "house = {\n", + " \"OverallQual\": 7,\n", + " \"GrLivArea\": 1800,\n", + " \"GarageCars\": 2,\n", + " \"YearBuilt\": 2005,\n", + " \"Neighborhood\": \"CollgCr\",\n", + "}\n", + "price = cpptteut.predict_price(house, model=model)\n", + "_LOG.info(\"Predicted price: $%.0f\", price)\n", + "print(f\"Predicted sale price: ${price:,.0f}\")" ] }, { "cell_type": "markdown", - "id": "103f6e36-54cf-442c-b137-8091d48805a7", + "id": "22e2cc31", "metadata": {}, "source": [ - "## The flow should be highlighted using headings in markdown\n", - "```\n", - "# Level 1\n", - "## Level 2\n", - "### Level 3\n", - "```" + "## Validate features" ] }, { "cell_type": "code", "execution_count": null, - "id": "d05d52af-67ba-4a4f-a561-af453e43854f", + "id": "e6c05122", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Demonstrate validation with an intentionally bad payload.\n", + "bad_payload = {\"OverallQual\": 15, \"GrLivArea\": -50, \"ExterQual\": \"ZZ\"}\n", + "errors = cpptteut.validate_features(bad_payload)\n", + "print(\"Validation errors:\")\n", + "for e in errors:\n", + " print(f\" ✗ {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c5c4b345", + "metadata": {}, + "source": [ + "## Show results\n", + "\n", + "Compare predicted prices across neighbourhoods and plot feature importance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdb8d334", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "# Build one instance per neighbourhood using default feature values.\n", + "neighborhoods = [\"OldTown\", \"BrkSide\", \"CollgCr\", \"NWAmes\", \"NoRidge\"]\n", + "instances = [\n", + " {**cpptteut.FEATURE_DEFAULTS, \"Neighborhood\": n} for n in neighborhoods\n", + "]\n", + "# Predict prices for all neighbourhoods.\n", + "prices = [cpptteut.predict_price(inst, model=model) for inst in instances]\n", + "result_df = (\n", + " pd.DataFrame({\"Neighborhood\": neighborhoods, \"PredictedPrice\": prices})\n", + " .sort_values(\"PredictedPrice\")\n", + ")\n", + "# Plot the neighbourhood comparison bar chart.\n", + "plt.figure(figsize=(8, 4))\n", + "plt.barh(result_df[\"Neighborhood\"], result_df[\"PredictedPrice\"] / 1000)\n", + "plt.xlabel(\"Predicted Price ($k)\")\n", + "plt.title(\"Predicted Price by Neighbourhood (median feature house)\")\n", + "plt.tight_layout()\n", + "plt.savefig(\"results/price_by_neighborhood.png\", dpi=120)\n", + "plt.show()\n", + "_LOG.info(\"Plot saved to results/price_by_neighborhood.png.\")\n", + "print(result_df.to_string(index=False))" + ] } ], "metadata": { @@ -179,18 +301,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" } }, "nbformat": 4, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py index 30fa7957f..1c5ff6c4e 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py @@ -14,112 +14,170 @@ # --- # %% [markdown] -# # Template Example Notebook +# # House Price Prediction Example # -# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`. +# This notebook demonstrates the end-to-end machine learning workflow for +# the Kaggle House Prices regression task without requiring a running server. # -# - Add description of what the notebook does. -# - Point to references, e.g. (neo4j.example.md) -# - Add citations. -# - Keep the notebook flow clear. -# - Comments should be imperative and have a period at the end. -# - Your code should be well commented. +# - Loads (or generates) the dataset via `template_utils`. +# - Compares multiple sklearn regression models using cross-validation. +# - Trains the best model on the full dataset and saves it to `ml_model/`. +# - Runs direct in-process predictions using the saved model. +# - Produces a neighbourhood price comparison chart saved to `results/`. +# - Reference: (house_price.example.md) # -# The name of this notebook should in the following format: -# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb` -# -# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md +# Follow the reference to write notebooks in a clear manner: +# https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md # %% # %load_ext autoreload # %autoreload 2 # %matplotlib inline +# %% [markdown] +# ## Imports + # %% import logging -# Import libraries in this section. -# Avoid imports like import *, from ... import ..., from ... import *, etc. +import sys +import os -import helpers.hdbg as hdbg -import helpers.hnotebook as hnotebo +sys.path.insert(0, "/project") -# %% -hdbg.init_logger(verbosity=logging.INFO) +import template_utils as cpptteut +# %% [markdown] +# ## Configuration + +# %% _LOG = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) -hnotebo.config_notebook() +# %% [markdown] +# ## Load data +# +# Attempt to load the Kaggle CSV; fall back to a synthetic dataset if the +# file is absent so the notebook runs without Kaggle credentials. +# %% +# Load or generate the House Prices dataset. +DATA_PATH = "ml_model/train.csv" +df = cpptteut.load_data(DATA_PATH) +_LOG.info("Dataset shape: %s", df.shape) +df.head() # %% [markdown] -# ## Make the notebook flow clear -# Each notebook needs to follow a clear and logical flow, e.g: -# - Load data -# - Compute stats -# - Clean data -# - Compute stats -# - Do analysis -# - Show results -# -# -# +# ## Compute stats # +# Inspect the raw data before any cleaning to understand distributions and +# identify potential issues. +# %% +# Display summary statistics for the target and key numeric features. +print("Target column statistics:") +print(df[cpptteut.TARGET_COLUMN].describe()) +print(f"\nMissing values per column:\n{df.isnull().sum()[df.isnull().sum() > 0]}") -# ############################################################################# -# Template -# ############################################################################# +# %% [markdown] +# ## Split data +# %% +# Split into train and test sets for offline evaluation. +X_train, X_test, y_train, y_test = cpptteut.split_data(df) +_LOG.info("Train: %d rows | Test: %d rows", len(X_train), len(X_test)) + +# %% [markdown] +# ## Compare models +# +# Cross-validate GradientBoosting, RandomForest, and Ridge and display the +# leaderboard sorted by RMSE. # %% -class Template: - """ - Brief imperative description of what the class does in one line, if needed. - """ +# Compare all candidate models using 5-fold cross-validation. +leaderboard = cpptteut.compare_models(df, fold=5) +leaderboard + +# %% [markdown] +# ## Train best model - def __init__(self): - pass +# %% +# Train the top-ranked model (GradientBoosting) on the full dataset. +best_model_name = leaderboard.iloc[0]["Model"] +_LOG.info("Best model: %s", best_model_name) +pipeline = cpptteut.train_best_model(df, model_name=best_model_name) - def method1(self, arg1: int) -> None: - """ - Brief imperative description of what the method does in one line. +# %% [markdown] +# ## Evaluate model - You can elaborate more in the method docstring in this section, for e.g. explaining - the formula/algorithm. Every method/function should have a docstring, typehints and include the - parameters and return as follows: +# %% +# Evaluate the fitted pipeline on the held-out test set. +metrics = cpptteut.evaluate_model(pipeline, X_test, y_test) +print(f"Test RMSE : ${metrics['RMSE']:,.0f}") +print(f"Test MAE : ${metrics['MAE']:,.0f}") +print(f"Test R² : {metrics['R2']:.4f}") - :param arg1: description of arg1 - :return: description of return - """ - # Code bloks go here. - # Make sure to include comments to explain what the code is doing. - # No empty lines between code blocks. - pass +# %% [markdown] +# ## Save model +# %% +# Persist the trained pipeline to disk for the Flask API to load. +cpptteut.finalize_and_save(pipeline) +_LOG.info("Model saved.") -def template_function(arg1: int) -> None: - """ - Brief imperative description of what the function does in one line. +# %% [markdown] +# ## Run in-process predictions - You can elaborate more in the function docstring in this section, for e.g. explaining - the formula/algorithm. Every function should have a docstring, typehints and include the - parameters and return as follows: +# %% +# Load the saved artifact and run a single in-process prediction. +model = cpptteut.load_model_artifact() +house = { + "OverallQual": 7, + "GrLivArea": 1800, + "GarageCars": 2, + "YearBuilt": 2005, + "Neighborhood": "CollgCr", +} +price = cpptteut.predict_price(house, model=model) +_LOG.info("Predicted price: $%.0f", price) +print(f"Predicted sale price: ${price:,.0f}") - :param arg1: description of arg1 - :return: description of return - """ - # Code bloks go here. - # Make sure to include comments to explain what the code is doing. - # No empty lines between code blocks. - pass +# %% [markdown] +# ## Validate features +# %% +# Demonstrate validation with an intentionally bad payload. +bad_payload = {"OverallQual": 15, "GrLivArea": -50, "ExterQual": "ZZ"} +errors = cpptteut.validate_features(bad_payload) +print("Validation errors:") +for e in errors: + print(f" ✗ {e}") # %% [markdown] -# ## The flow should be highlighted using headings in markdown -# ``` -# # Level 1 -# ## Level 2 -# ### Level 3 -# ``` - -# %% \ No newline at end of file +# ## Show results +# +# Compare predicted prices across neighbourhoods and save the chart. + +# %% +import matplotlib.pyplot as plt +import pandas as pd + +# Build one instance per neighbourhood using default feature values. +neighborhoods = ["OldTown", "BrkSide", "CollgCr", "NWAmes", "NoRidge"] +instances = [{**cpptteut.FEATURE_DEFAULTS, "Neighborhood": n} for n in neighborhoods] +# Predict prices for all neighbourhoods. +prices = [cpptteut.predict_price(inst, model=model) for inst in instances] +result_df = ( + pd.DataFrame({"Neighborhood": neighborhoods, "PredictedPrice": prices}) + .sort_values("PredictedPrice") +) +# Plot and save the neighbourhood comparison chart. +os.makedirs("results", exist_ok=True) +plt.figure(figsize=(8, 4)) +plt.barh(result_df["Neighborhood"], result_df["PredictedPrice"] / 1000) +plt.xlabel("Predicted Price ($k)") +plt.title("Predicted Price by Neighbourhood (median feature house)") +plt.tight_layout() +plt.savefig("results/price_by_neighborhood.png", dpi=120) +plt.show() +_LOG.info("Plot saved to results/price_by_neighborhood.png.") +print(result_df.to_string(index=False)) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py index a4cbeed04..e01af3b37 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py @@ -1,22 +1,35 @@ """ template_utils.py -This file contains utility functions that support the tutorial notebooks. +This file contains utility functions that support the House Price Prediction +tutorial notebooks. - Notebooks should call these functions instead of writing raw logic inline. - This helps keep the notebooks clean, modular, and easier to debug. -- Students should implement functions here for data preprocessing, - model setup, evaluation, or any reusable logic. +- Functions cover data loading, preprocessing, model training, evaluation, + and REST API interaction for the Kaggle House Prices regression task. -Import as: +Import from the project root as: -import class_project.project_template.template_utils as cpptteut +import template_utils as cpptteut """ -import pandas as pd import logging -from sklearn.model_selection import train_test_split -from pycaret.classification import compare_models +import os +import pickle +from typing import Any, Dict, List, Optional, Tuple + +import numpy as np +import pandas as pd +import requests +from sklearn.compose import ColumnTransformer +from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor +from sklearn.impute import SimpleImputer +from sklearn.linear_model import Ridge +from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score +from sklearn.model_selection import cross_val_score, train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler # ----------------------------------------------------------------------------- # Logging @@ -26,47 +39,455 @@ logger = logging.getLogger(__name__) # ----------------------------------------------------------------------------- -# Example 1: Split the dataset into train and test sets +# Constants +# ----------------------------------------------------------------------------- + +# Features used for training and inference. +NUMERIC_FEATURES: List[str] = [ + "LotArea", + "OverallQual", + "OverallCond", + "YearBuilt", + "TotalBsmtSF", + "GrLivArea", + "FullBath", + "BedroomAbvGr", + "GarageCars", + "GarageArea", +] +CATEGORICAL_FEATURES: List[str] = [ + "Neighborhood", + "HouseStyle", + "RoofStyle", + "ExterQual", + "KitchenQual", +] +ALL_FEATURES: List[str] = NUMERIC_FEATURES + CATEGORICAL_FEATURES +TARGET_COLUMN: str = "SalePrice" + +# Default feature values used when a field is omitted from an API request. +FEATURE_DEFAULTS: Dict[str, Any] = { + "LotArea": 9600, + "OverallQual": 6, + "OverallCond": 5, + "YearBuilt": 2000, + "TotalBsmtSF": 900, + "GrLivArea": 1500, + "FullBath": 2, + "BedroomAbvGr": 3, + "GarageCars": 2, + "GarageArea": 480, + "Neighborhood": "CollgCr", + "HouseStyle": "1Story", + "RoofStyle": "Gable", + "ExterQual": "TA", + "KitchenQual": "TA", +} + +# Valid quality codes accepted by the API. +VALID_QUALITY_CODES: set = {"Ex", "Gd", "TA", "Fa", "Po"} + +# Default path to the saved model artifact. +DEFAULT_MODEL_PATH: str = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ml_model", "house_price_model.pkl" +) + +# Default API base URL. +DEFAULT_API_URL: str = "http://localhost:5000" + +# Candidate models compared in compare_models(). +_CANDIDATE_MODELS: Dict[str, Any] = { + "GradientBoosting": GradientBoostingRegressor( + n_estimators=200, max_depth=4, learning_rate=0.05, + subsample=0.8, random_state=42, + ), + "RandomForest": RandomForestRegressor( + n_estimators=200, max_depth=8, random_state=42, n_jobs=-1, + ), + "Ridge": Ridge(alpha=100.0), +} + + +# ----------------------------------------------------------------------------- +# Data loading # ----------------------------------------------------------------------------- -def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2): +def load_data(data_path: str) -> pd.DataFrame: + """ + Load the House Prices dataset from a CSV file. + + If the file does not exist, generate a synthetic dataset with realistic + distributions so that the notebook can run end-to-end without Kaggle + credentials. + + :param data_path: path to train.csv (Kaggle House Prices dataset) + :return: DataFrame containing features and the SalePrice target column + """ + if os.path.exists(data_path): + logger.info("Loading dataset from '%s'.", data_path) + df = pd.read_csv(data_path) + # Keep only the columns required for this project. + available = [c for c in ALL_FEATURES + [TARGET_COLUMN] if c in df.columns] + return df[available] + logger.warning("File '%s' not found – generating synthetic dataset.", data_path) + return _generate_synthetic_data() + + +def _generate_synthetic_data(n: int = 1460) -> pd.DataFrame: + """ + Generate a synthetic House Prices dataset when train.csv is unavailable. + + The distributions and sale-price formula approximate those of the real + Kaggle dataset so that model metrics are representative. + + :param n: number of rows to generate + :return: synthetic DataFrame with the same schema as the Kaggle dataset + """ + rng = np.random.default_rng(42) + neighborhoods = [ + "CollgCr", "Veenker", "Crawfor", "NoRidge", + "Mitchel", "Somerst", "NWAmes", "OldTown", "BrkSide", "Sawyer", + ] + house_styles = ["1Story", "2Story", "1.5Fin", "SFoyer", "SLvl"] + roof_styles = ["Gable", "Hip", "Flat", "Gambrel", "Mansard"] + qualities = ["Ex", "Gd", "TA", "Fa", "Po"] + qual_map = {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1} + overall_qual = rng.integers(1, 11, n) + gr_liv_area = rng.integers(500, 4500, n) + ext_qual = rng.choice(qualities, n) + kit_qual = rng.choice(qualities, n) + sale_price = ( + 50000 + + overall_qual * 8000 + + gr_liv_area * 60 + + np.array([qual_map[q] for q in ext_qual]) * 5000 + + rng.normal(0, 15000, n) + ).clip(50000, 800000).astype(int) + return pd.DataFrame({ + "LotArea": rng.integers(2000, 215000, n), + "OverallQual": overall_qual, + "OverallCond": rng.integers(1, 10, n), + "YearBuilt": rng.integers(1872, 2011, n), + "TotalBsmtSF": rng.integers(0, 6110, n), + "GrLivArea": gr_liv_area, + "FullBath": rng.integers(0, 4, n), + "BedroomAbvGr": rng.integers(0, 8, n), + "GarageCars": rng.integers(0, 5, n), + "GarageArea": rng.integers(0, 1418, n), + "Neighborhood": rng.choice(neighborhoods, n), + "HouseStyle": rng.choice(house_styles, n), + "RoofStyle": rng.choice(roof_styles, n), + "ExterQual": ext_qual, + "KitchenQual": kit_qual, + "SalePrice": sale_price, + }) + + +# ----------------------------------------------------------------------------- +# Data splitting +# ----------------------------------------------------------------------------- + + +def split_data( + df: pd.DataFrame, + target_column: str = TARGET_COLUMN, + test_size: float = 0.2, +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]: """ Split the dataset into training and testing sets. - :param df: full dataset + :param df: full dataset including the target column :param target_column: name of the target column - :param test_size: proportion of test data (default = 0.2) - + :param test_size: proportion of data to reserve for testing :return: X_train, X_test, y_train, y_test """ - logger.info("Splitting data into train and test sets") + logger.info("Splitting data into train and test sets.") X = df.drop(columns=[target_column]) y = df[target_column] return train_test_split(X, y, test_size=test_size, random_state=42) # ----------------------------------------------------------------------------- -# Example 2: PyCaret classification pipeline +# Sklearn pipeline builder +# ----------------------------------------------------------------------------- + + +def _build_pipeline(estimator: Any) -> Pipeline: + """ + Wrap an estimator in a full preprocessing + regression pipeline. + + Numeric features are median-imputed and scaled. Categorical features are + mode-imputed and one-hot encoded. + + :param estimator: sklearn-compatible regressor + :return: fitted-ready Pipeline + """ + numeric_transformer = Pipeline([ + ("imputer", SimpleImputer(strategy="median")), + ("scaler", StandardScaler()), + ]) + categorical_transformer = Pipeline([ + ("imputer", SimpleImputer(strategy="most_frequent")), + ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False)), + ]) + preprocessor = ColumnTransformer([ + ("num", numeric_transformer, NUMERIC_FEATURES), + ("cat", categorical_transformer, CATEGORICAL_FEATURES), + ]) + return Pipeline([ + ("preprocessor", preprocessor), + ("regressor", estimator), + ]) + + +# ----------------------------------------------------------------------------- +# Model training and comparison # ----------------------------------------------------------------------------- -def run_pycaret_classification( - df: pd.DataFrame, target_column: str +def compare_models( + df: pd.DataFrame, + target_column: str = TARGET_COLUMN, + fold: int = 5, ) -> pd.DataFrame: """ - Run a basic PyCaret classification experiment. + Cross-validate all candidate models and return a leaderboard DataFrame. - :param df: dataset containing features and target + Mirrors the PyCaret ``compare_models`` interface so notebooks need + minimal changes. Candidates are GradientBoosting, RandomForest, Ridge. + + :param df: dataset containing features and the target column :param target_column: name of the target column + :param fold: number of cross-validation folds + :return: DataFrame with columns Model, RMSE, MAE, R2 sorted by RMSE + """ + X = df.drop(columns=[target_column])[ALL_FEATURES] + y = df[target_column] + rows = [] + for name, estimator in _CANDIDATE_MODELS.items(): + logger.info("Cross-validating %s (%d folds)…", name, fold) + pipeline = _build_pipeline(estimator) + rmse_scores = np.sqrt( + -cross_val_score(pipeline, X, y, cv=fold, scoring="neg_mean_squared_error") + ) + mae_scores = -cross_val_score(pipeline, X, y, cv=fold, scoring="neg_mean_absolute_error") + r2_scores = cross_val_score(pipeline, X, y, cv=fold, scoring="r2") + rows.append({ + "Model": name, + "RMSE": round(rmse_scores.mean(), 2), + "MAE": round(mae_scores.mean(), 2), + "R2": round(r2_scores.mean(), 4), + }) + leaderboard = ( + pd.DataFrame(rows) + .sort_values("RMSE") + .reset_index(drop=True) + ) + logger.info("Leaderboard:\n%s", leaderboard.to_string(index=False)) + return leaderboard + + +def train_best_model( + df: pd.DataFrame, + target_column: str = TARGET_COLUMN, + model_name: str = "GradientBoosting", +) -> Pipeline: + """ + Train the chosen model on the full dataset and return the fitted pipeline. + + :param df: full dataset including the target column + :param target_column: name of the target column + :param model_name: key from _CANDIDATE_MODELS to use + :return: fitted sklearn Pipeline + """ + if model_name not in _CANDIDATE_MODELS: + raise ValueError( + f"Unknown model '{model_name}'. " + f"Choose from: {list(_CANDIDATE_MODELS.keys())}" + ) + X = df.drop(columns=[target_column])[ALL_FEATURES] + y = df[target_column] + logger.info("Training %s on full dataset (%d rows)…", model_name, len(df)) + pipeline = _build_pipeline(_CANDIDATE_MODELS[model_name]) + pipeline.fit(X, y) + return pipeline + + +def evaluate_model( + pipeline: Pipeline, + X_test: pd.DataFrame, + y_test: pd.Series, +) -> Dict[str, float]: + """ + Evaluate a fitted pipeline on a held-out test set. - :return: comparison of top-performing models + :param pipeline: fitted sklearn Pipeline + :param X_test: test features + :param y_test: true target values + :return: dict with keys RMSE, MAE, R2 """ - logger.info("Initializing PyCaret classification setup") - ... + preds = pipeline.predict(X_test[ALL_FEATURES]) + metrics = { + "RMSE": round(float(np.sqrt(mean_squared_error(y_test, preds))), 2), + "MAE": round(float(mean_absolute_error(y_test, preds)), 2), + "R2": round(float(r2_score(y_test, preds)), 4), + } + logger.info("Test metrics: %s", metrics) + return metrics - logger.info("Comparing models") - results = compare_models() - ... - return results \ No newline at end of file +# ----------------------------------------------------------------------------- +# Model persistence +# ----------------------------------------------------------------------------- + + +def finalize_and_save( + pipeline: Pipeline, + model_path: str = DEFAULT_MODEL_PATH, +) -> None: + """ + Save the fitted pipeline to disk as a pickle file. + + :param pipeline: fitted sklearn Pipeline to persist + :param model_path: full destination path including the .pkl extension + :return: None + """ + os.makedirs(os.path.dirname(model_path), exist_ok=True) + with open(model_path, "wb") as fh: + pickle.dump(pipeline, fh) + logger.info("Model saved to '%s'.", model_path) + + +def load_model_artifact(model_path: str = DEFAULT_MODEL_PATH) -> Pipeline: + """ + Load a saved sklearn pipeline from disk. + + :param model_path: path to the .pkl file + :return: loaded sklearn Pipeline + """ + if not os.path.exists(model_path): + raise FileNotFoundError( + f"Model not found at '{model_path}'. " + "Run train_best_model() and finalize_and_save() first." + ) + logger.info("Loading model from '%s'.", model_path) + with open(model_path, "rb") as fh: + return pickle.load(fh) + + +# ----------------------------------------------------------------------------- +# Inference +# ----------------------------------------------------------------------------- + + +def predict_price( + payload: Dict[str, Any], + model: Optional[Pipeline] = None, + model_path: str = DEFAULT_MODEL_PATH, +) -> float: + """ + Predict the sale price for a single house. + + Missing feature values are filled with FEATURE_DEFAULTS before inference + so callers only need to supply the fields they care about. + + :param payload: dict of feature name → value pairs + :param model: pre-loaded pipeline; loaded from disk if None + :param model_path: path used when model is None + :return: predicted sale price in USD + """ + if model is None: + model = load_model_artifact(model_path) + row = {**FEATURE_DEFAULTS, **payload} + df = pd.DataFrame([row])[ALL_FEATURES] + return round(float(model.predict(df)[0]), 2) + + +# ----------------------------------------------------------------------------- +# Feature validation +# ----------------------------------------------------------------------------- + + +def validate_features(payload: Dict[str, Any]) -> List[str]: + """ + Validate a prediction request payload and return a list of error strings. + + An empty list means the payload is valid. + + :param payload: dict of feature name → value pairs + :return: list of human-readable validation error messages + """ + errors: List[str] = [] + for field in ("OverallQual", "OverallCond"): + val = payload.get(field) + if val is not None and not (1 <= float(val) <= 10): + errors.append(f"{field} must be between 1 and 10 (got {val}).") + if "GrLivArea" in payload and float(payload["GrLivArea"]) <= 0: + errors.append("GrLivArea must be greater than 0.") + for field in ("ExterQual", "KitchenQual"): + val = payload.get(field) + if val is not None and val not in VALID_QUALITY_CODES: + errors.append( + f"{field} must be one of {sorted(VALID_QUALITY_CODES)}" + f" (got '{val}')." + ) + return errors + + +# ----------------------------------------------------------------------------- +# REST API helpers (call the live Flask server from a notebook) +# ----------------------------------------------------------------------------- + + +def api_health(base_url: str = DEFAULT_API_URL) -> Dict[str, Any]: + """ + Call the /health endpoint of the running prediction API. + + :param base_url: base URL of the Flask API + :return: parsed JSON response dict + """ + resp = requests.get(f"{base_url}/health", timeout=5) + resp.raise_for_status() + return resp.json() + + +def api_predict( + payload: Dict[str, Any], + base_url: str = DEFAULT_API_URL, +) -> Dict[str, Any]: + """ + POST a single prediction request to the running API. + + Example usage in a notebook:: + + result = cpptteut.api_predict({"OverallQual": 8, "GrLivArea": 2200}) + print(result["predicted_price"]) + + :param payload: dict of feature name → value pairs (all fields optional) + :param base_url: base URL of the Flask API + :return: parsed JSON response dict containing predicted_price + """ + resp = requests.post(f"{base_url}/predict", json=payload, timeout=10) + resp.raise_for_status() + return resp.json() + + +def api_predict_batch( + instances: List[Dict[str, Any]], + base_url: str = DEFAULT_API_URL, +) -> Dict[str, Any]: + """ + POST a batch of prediction requests to the running API. + + :param instances: list of feature dicts, one per house + :param base_url: base URL of the Flask API + :return: parsed JSON response dict containing a predictions list + """ + resp = requests.post( + f"{base_url}/predict/batch", + json={"instances": instances}, + timeout=10, + ) + resp.raise_for_status() + return resp.json() \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/version.sh b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/version.sh new file mode 100755 index 000000000..c46ed254c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/version.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# """ +# Display versions of installed tools and packages. +# +# This script prints version information for Python, pip, Jupyter, and all +# installed Python packages. Used for debugging and documentation purposes +# to verify the Docker container environment setup. +# """ + +# Display Python 3 version. +echo "# Python3" +python3 --version + +# Display pip version. +echo "# pip3" +pip3 --version + +# Display Jupyter version. +echo "# jupyter" +jupyter --version + +# List all installed Python packages and their versions. +echo "# Python packages" +pip3 list + +# Template for adding additional tool versions. +# echo "# mongo" +# mongod --version From df75ea9450688537d9b38b27df22e9346f59d65c Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Sat, 2 May 2026 16:37:44 -0400 Subject: [PATCH 48/58] adding the file to test on Github --- .github/workflows/ansible-workflow.yml | 198 +++++++++++++++++++++---- .github/workflows/static.yaml | 43 ------ 2 files changed, 172 insertions(+), 69 deletions(-) delete mode 100644 .github/workflows/static.yaml diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 16e69b818..73d48997c 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -1,43 +1,189 @@ -# .github/workflows/hello.yml - -name: Hello World +name: House Price Prediction CI/CD on: push: - workflow_dispatch: + branches: [main, develop] + pull_request: + branches: [main] + workflow_dispatch: # allow manual trigger from GitHub UI jobs: - hello: + # ──────────────────────────────────────────────────────────── + # JOB 1 – Train the model and run the example script + # ──────────────────────────────────────────────────────────── + train: + name: Train Model runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Checking tree - run: tree + steps: + - name: Checkout repository + uses: actions/checkout@v4 - - name: Checking ls - run: ls class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip - - name: Install Ansible & lint - run: pip install ansible ansible-lint + - name: Install Python dependencies + run: pip install -r requirements.txt - - name: Install Ansible AWS collection - run: ansible-galaxy collection install amazon.aws + - name: Train model via template.example.py + run: python template.example.py - - name: Run ansible-lint - run: ansible-lint class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml + - name: Confirm model file was saved + run: | + ls -lh ml_model/house_price_model.pkl + echo "Model trained successfully." - - name: Run ansible-lint - run: ansible-playbook class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml + - name: Upload trained model as artifact + uses: actions/upload-artifact@v4 + with: + name: house-price-model + path: ml_model/house_price_model.pkl + retention-days: 7 - - name: Upload HTML as artifact + - name: Upload results plots as artifact uses: actions/upload-artifact@v4 with: - name: webpage - path: index.html + name: results + path: results/ + retention-days: 7 + + # ──────────────────────────────────────────────────────────── + # JOB 2 – Build Docker image and run Flask API tests + # ──────────────────────────────────────────────────────────── + build-and-test: + name: Build Docker & Test API + runs-on: ubuntu-latest + needs: train # wait for model to be trained first + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Download trained model artifact + uses: actions/download-artifact@v4 + with: + name: house-price-model + path: ml_model/ + + - name: Confirm model file is present + run: ls -lh ml_model/house_price_model.pkl + + - name: Build Docker image + run: docker build -t house-price-project . + + - name: Start container + run: | + docker run -d \ + --name house-price \ + -p 5001:5000 \ + -v ${{ github.workspace }}:/project \ + -e PORT=5000 \ + house-price-project \ + bash -c "PORT=5000 python /project/app.py" + + - name: Wait for API to be healthy + run: | + echo "Waiting for API to start..." + for i in $(seq 1 20); do + STATUS=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5001/health || echo "000") + echo "Attempt $i: HTTP $STATUS" + if [ "$STATUS" = "200" ]; then + echo "API is up!" + break + fi + sleep 5 + done + # Final check – fail the job if API never came up. + curl -sf http://localhost:5001/health + + - name: "Test – GET /health" + run: | + RESPONSE=$(curl -sf http://localhost:5001/health) + echo "Response: $RESPONSE" + echo $RESPONSE | python3 -c " + import sys, json + data = json.load(sys.stdin) + assert data['status'] == 'ok', f'Expected ok, got {data}' + print('✅ /health passed') + " + + - name: "Test – POST /predict (single house)" + run: | + RESPONSE=$(curl -sf -X POST http://localhost:5001/predict \ + -H "Content-Type: application/json" \ + -d '{"OverallQual": 7, "GrLivArea": 1800, "GarageCars": 2}') + echo "Response: $RESPONSE" + echo $RESPONSE | python3 -c " + import sys, json + data = json.load(sys.stdin) + price = data['predicted_price'] + assert price > 0, f'Expected positive price, got {price}' + print(f'✅ /predict passed → \${price:,.0f}') + " + + - name: "Test – POST /predict/batch" + run: | + RESPONSE=$(curl -sf -X POST http://localhost:5001/predict/batch \ + -H "Content-Type: application/json" \ + -d '{"instances": [{"OverallQual": 3, "GrLivArea": 800}, {"OverallQual": 9, "GrLivArea": 3000}]}') + echo "Response: $RESPONSE" + echo $RESPONSE | python3 -c " + import sys, json + data = json.load(sys.stdin) + preds = data['predictions'] + assert data['count'] == 2, f'Expected 2 predictions, got {data[\"count\"]}' + assert preds[1] > preds[0], f'Expected higher quality = higher price: {preds}' + print(f'✅ /predict/batch passed → {[\"\${p:,.0f}\" for p in preds]}') + " + + - name: "Test – POST /predict with invalid payload returns 400" + run: | + STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://localhost:5001/predict \ + -H "Content-Type: application/json" \ + -d '{"OverallQual": 99, "GrLivArea": -1}') + echo "HTTP status: $STATUS" + [ "$STATUS" = "400" ] && echo "✅ Validation test passed" || (echo "❌ Expected 400, got $STATUS" && exit 1) + + - name: Print container logs (always, for debugging) + if: always() + run: docker logs house-price + + # ──────────────────────────────────────────────────────────── + # JOB 3 – Run Ansible playbook (deploy + test) + # ──────────────────────────────────────────────────────────── + ansible: + name: Ansible Deploy & Test + runs-on: ubuntu-latest + needs: build-and-test + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Download trained model artifact + uses: actions/download-artifact@v4 + with: + name: house-price-model + path: ml_model/ + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + + - name: Install Ansible + run: pip install ansible + + - name: Build Docker image for Ansible deploy + run: docker build -t house-price-project . - - name: Echo hello world - run: echo "Hello, World!" + - name: Run Ansible playbook (deploy + test) + run: ansible-playbook playbook.yaml - - name: Checking tree - run: tree + - name: Print Ansible summary + if: always() + run: docker logs house-price 2>/dev/null || true \ No newline at end of file diff --git a/.github/workflows/static.yaml b/.github/workflows/static.yaml deleted file mode 100644 index e90d14d57..000000000 --- a/.github/workflows/static.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Simple workflow for deploying static content to GitHub Pages -name: Deploy static content to Pages - -on: - # Runs on pushes targeting the default branch - push: - branches: ["pages"] - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages -permissions: - contents: read - pages: write - id-token: write - -# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. -# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. -concurrency: - group: "pages" - cancel-in-progress: false - -jobs: - # Single deploy job since we're just deploying - deploy: - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Pages - uses: actions/configure-pages@v5 - - name: Upload artifact - uses: actions/upload-pages-artifact@v3 - with: - # Upload entire repository - path: '.' - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v5 \ No newline at end of file From 1e48bdee1cbec11aa4e106ba6148d6c2a56295ee Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Sat, 2 May 2026 16:38:54 -0400 Subject: [PATCH 49/58] adding the file to test on Github --- .github/workflows/ansible-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 73d48997c..f1758cb65 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -186,4 +186,4 @@ jobs: - name: Print Ansible summary if: always() - run: docker logs house-price 2>/dev/null || true \ No newline at end of file + run: docker logs house-price 2>/dev/null || true From 27bbf88097ef54ff617f92c1068a8288e859a9a7 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Sat, 2 May 2026 16:41:43 -0400 Subject: [PATCH 50/58] adding the file to test on Github --- .github/workflows/ansible-workflow.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index f1758cb65..01894f7b9 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -1,11 +1,11 @@ name: House Price Prediction CI/CD -on: - push: - branches: [main, develop] - pull_request: - branches: [main] - workflow_dispatch: # allow manual trigger from GitHub UI +# on: +# push: +# branches: [main, develop] +# pull_request: +# branches: [main] +# workflow_dispatch: # allow manual trigger from GitHub UI jobs: # ──────────────────────────────────────────────────────────── From ebc3030335435e9e721c9287e215284f05b0e8fa Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Sat, 2 May 2026 16:43:10 -0400 Subject: [PATCH 51/58] adding the file to test on Github --- .github/workflows/ansible-workflow.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 01894f7b9..35fe21ea4 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -1,11 +1,11 @@ name: House Price Prediction CI/CD -# on: -# push: -# branches: [main, develop] -# pull_request: -# branches: [main] -# workflow_dispatch: # allow manual trigger from GitHub UI +on: + push: + branches: ["**"] + pull_request: + branches: ["**"] + workflow_dispatch: # allow manual trigger from GitHub UI jobs: # ──────────────────────────────────────────────────────────── From c71e0d8ea5f9af1a6ee4853b31322ab78179f21c Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Sat, 2 May 2026 16:47:25 -0400 Subject: [PATCH 52/58] adding the file to test on Github --- .github/workflows/ansible-workflow.yml | 37 ++++++++++++------- .../index.html | 25 ------------- 2 files changed, 24 insertions(+), 38 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/index.html diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 35fe21ea4..9ffd66bc2 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -2,10 +2,14 @@ name: House Price Prediction CI/CD on: push: - branches: ["**"] + branches: ["**"] # run on every branch pull_request: - branches: ["**"] - workflow_dispatch: # allow manual trigger from GitHub UI + branches: ["**"] # run on PRs targeting any branch + workflow_dispatch: # allow manual trigger from GitHub UI + +# All jobs cd into the project subfolder before running anything. +env: + PROJECT_DIR: class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment jobs: # ──────────────────────────────────────────────────────────── @@ -26,12 +30,15 @@ jobs: cache: pip - name: Install Python dependencies + working-directory: ${{ env.PROJECT_DIR }} run: pip install -r requirements.txt - name: Train model via template.example.py + working-directory: ${{ env.PROJECT_DIR }} run: python template.example.py - name: Confirm model file was saved + working-directory: ${{ env.PROJECT_DIR }} run: | ls -lh ml_model/house_price_model.pkl echo "Model trained successfully." @@ -40,14 +47,14 @@ jobs: uses: actions/upload-artifact@v4 with: name: house-price-model - path: ml_model/house_price_model.pkl + path: ${{ env.PROJECT_DIR }}/ml_model/house_price_model.pkl retention-days: 7 - name: Upload results plots as artifact uses: actions/upload-artifact@v4 with: name: results - path: results/ + path: ${{ env.PROJECT_DIR }}/results/ retention-days: 7 # ──────────────────────────────────────────────────────────── @@ -56,7 +63,7 @@ jobs: build-and-test: name: Build Docker & Test API runs-on: ubuntu-latest - needs: train # wait for model to be trained first + needs: train steps: - name: Checkout repository @@ -66,20 +73,23 @@ jobs: uses: actions/download-artifact@v4 with: name: house-price-model - path: ml_model/ + path: ${{ env.PROJECT_DIR }}/ml_model/ - name: Confirm model file is present + working-directory: ${{ env.PROJECT_DIR }} run: ls -lh ml_model/house_price_model.pkl - name: Build Docker image + working-directory: ${{ env.PROJECT_DIR }} run: docker build -t house-price-project . - name: Start container + working-directory: ${{ env.PROJECT_DIR }} run: | docker run -d \ --name house-price \ -p 5001:5000 \ - -v ${{ github.workspace }}:/project \ + -v ${{ github.workspace }}/${{ env.PROJECT_DIR }}:/project \ -e PORT=5000 \ house-price-project \ bash -c "PORT=5000 python /project/app.py" @@ -96,7 +106,6 @@ jobs: fi sleep 5 done - # Final check – fail the job if API never came up. curl -sf http://localhost:5001/health - name: "Test – GET /health" @@ -136,7 +145,7 @@ jobs: preds = data['predictions'] assert data['count'] == 2, f'Expected 2 predictions, got {data[\"count\"]}' assert preds[1] > preds[0], f'Expected higher quality = higher price: {preds}' - print(f'✅ /predict/batch passed → {[\"\${p:,.0f}\" for p in preds]}') + print(f'✅ /predict/batch passed') " - name: "Test – POST /predict with invalid payload returns 400" @@ -167,7 +176,7 @@ jobs: uses: actions/download-artifact@v4 with: name: house-price-model - path: ml_model/ + path: ${{ env.PROJECT_DIR }}/ml_model/ - name: Set up Python 3.12 uses: actions/setup-python@v5 @@ -179,11 +188,13 @@ jobs: run: pip install ansible - name: Build Docker image for Ansible deploy + working-directory: ${{ env.PROJECT_DIR }} run: docker build -t house-price-project . - name: Run Ansible playbook (deploy + test) + working-directory: ${{ env.PROJECT_DIR }} run: ansible-playbook playbook.yaml - - name: Print Ansible summary + - name: Print container logs if: always() - run: docker logs house-price 2>/dev/null || true + run: docker logs house-price 2>/dev/null || true \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/index.html b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/index.html deleted file mode 100644 index 56edd194a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/index.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - My Simple Webpage - - - -

My Simple Webpage

-

Created by Likhon Gomes

-

Generated on 2026-04-30

- - From 12f87b99b80e85b0bebc9d9cabd6b451062299c9 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Sat, 2 May 2026 17:04:04 -0400 Subject: [PATCH 53/58] adding the file to test on Github --- .../helpers/README.md | 285 - .../helpers/__init__.py | 0 .../helpers/asana_utils.py | 1156 -- .../helpers/github_utils.py | 2110 --- .../helpers/hasyncio.py | 508 - .../helpers/haws.py | 266 - .../helpers/hcache.py | 1086 -- .../helpers/hcache_simple.py | 1188 -- .../helpers/hcfile.py | 135 - .../helpers/hchatgpt.py | 549 - .../helpers/hchatgpt_instructions.py | 32 - .../helpers/hcoverage.py | 183 - .../helpers/hcsv.py | 365 - .../helpers/hdataframe.py | 309 - .../helpers/hdatetime.py | 909 -- .../helpers/hdbg.py | 1134 -- .../helpers/hdict.py | 119 - .../helpers/hdocker.py | 871 -- .../helpers/hdocker_tests.py | 197 - .../helpers/hemail.py | 47 - .../helpers/henv.py | 541 - .../helpers/hfile_tree.py | 232 - .../helpers/hgit.py | 1865 --- .../helpers/hgoogle_drive_api.py | 1183 -- .../helpers/hintrospection.py | 284 - .../helpers/hio.py | 1046 -- .../helpers/hjoblib.py | 880 -- .../helpers/hjupyter.py | 383 - .../helpers/hlatex.py | 334 - .../helpers/hlint.py | 29 - .../helpers/hlist.py | 78 - .../helpers/hllm.py | 680 - .../helpers/hllm_cli.py | 840 - .../helpers/hllm_cost.py | 233 - .../helpers/hlogging.py | 809 - .../helpers/hlogging.pyi | 14 - .../helpers/hmarkdown.py | 18 - .../helpers/hmarkdown_bullets.py | 248 - .../helpers/hmarkdown_coloring.py | 286 - .../helpers/hmarkdown_comments.py | 66 - .../helpers/hmarkdown_div_blocks.py | 132 - .../helpers/hmarkdown_fenced_blocks.py | 131 - .../helpers/hmarkdown_filtering.py | 109 - .../helpers/hmarkdown_formatting.py | 530 - .../helpers/hmarkdown_headers.py | 841 - .../helpers/hmarkdown_rules.py | 367 - .../helpers/hmarkdown_slides.py | 201 - .../helpers/hmarkdown_tables.py | 121 - .../helpers/hmarkdown_toc.py | 164 - .../helpers/hmatplotlib.py | 106 - .../helpers/hmkdocs.py | 170 - .../helpers/hmodule.py | 121 - .../helpers/hmoto.py | 111 - .../helpers/hnetwork.py | 97 - .../helpers/hnotebook.py | 105 - .../helpers/hnumba.py | 43 - .../helpers/hnumpy.py | 57 - .../helpers/hobject.py | 500 - .../helpers/hopen.py | 106 - .../helpers/hpandas.py | 18 - .../helpers/hpandas.py.old | 2684 ---- .../helpers/hpandas_analysis.py | 628 - .../helpers/hpandas_check_summary.py | 111 - .../helpers/hpandas_clean.py | 282 - .../helpers/hpandas_compare.py | 289 - .../helpers/hpandas_conversion.py | 221 - .../helpers/hpandas_dassert.py | 371 - .../helpers/hpandas_display.py | 302 - .../helpers/hpandas_io.py | 128 - .../helpers/hpandas_multiindex.py | 183 - .../helpers/hpandas_stats.py | 527 - .../helpers/hpandas_transform.py | 1023 -- .../helpers/hpandas_utils.py | 649 - .../helpers/hparquet.py | 1309 -- .../helpers/hparser.py | 1151 -- .../helpers/hpickle.py | 253 - .../helpers/hplayback.py | 495 - .../helpers/hprint.py | 1076 -- .../helpers/hpytest.py | 266 - .../helpers/hretry.py | 94 - .../helpers/hs3.py | 1129 -- .../helpers/hsecrets.py | 233 - .../helpers/hserver.py | 1160 -- .../helpers/hsftp.py | 204 - .../helpers/hslack.py | 66 - .../helpers/hsql.py | 36 - .../helpers/hsql_implementation.py | 954 -- .../helpers/hsql_test.py | 273 - .../helpers/hstring.py | 176 - .../helpers/hsystem.py | 1097 -- .../helpers/htable.py | 180 - .../helpers/htest_logger.py | 48 - .../helpers/htext_protect.py | 262 - .../helpers/hthreading.py | 43 - .../helpers/htimer.py | 275 - .../helpers/htqdm.py | 48 - .../helpers/htraceback.py | 228 - .../helpers/htranslate.py | 109 - .../helpers/htypes.py | 11 - .../helpers/hunit_test.py | 1876 --- .../helpers/hunit_test_purification.py | 450 - .../helpers/hunit_test_utils.py | 586 - .../helpers/hversion.py | 300 - .../helpers/hwall_clock_time.py | 125 - .../helpers/hwarnings.py | 156 - .../helpers/lib_tasks.py | 37 - .../helpers/lib_tasks_aws.py | 407 - .../helpers/lib_tasks_bash.py | 104 - .../helpers/lib_tasks_docker.py | 1590 -- .../helpers/lib_tasks_docker_release.py | 1890 --- .../helpers/lib_tasks_find.py | 606 - .../helpers/lib_tasks_gh.py | 1252 -- .../helpers/lib_tasks_git.py | 1500 -- .../helpers/lib_tasks_integrate.py | 837 - .../helpers/lib_tasks_lint.py | 444 - .../helpers/lib_tasks_perms.py | 380 - .../helpers/lib_tasks_print.py | 103 - .../helpers/lib_tasks_pytest.py | 1743 --- .../helpers/lib_tasks_utils.py | 395 - .../helpers/logging_testing/__init__.py | 0 .../helpers/logging_testing/logging_main.py | 81 - .../helpers/logging_testing/logging_module.py | 10 - .../helpers/notebooks/conftest.py | 17 - .../helpers/notebooks/hcache.tutorial.ipynb | 638 - .../helpers/notebooks/hcache.tutorial.py | 274 - .../notebooks/hcache_simple.tutorial.ipynb | 653 - .../notebooks/hcache_simple.tutorial.py | 257 - .../hgoodle_drive_api.tutorial.ipynb | 424 - .../notebooks/hgoodle_drive_api.tutorial.py | 107 - .../helpers/notebooks/hllm.tutorial.ipynb | 13040 ---------------- .../helpers/notebooks/hllm.tutorial.py | 118 - .../notebooks/hplayback.tutorial.ipynb | 993 -- .../helpers/notebooks/hplayback.tutorial.py | 374 - .../helpers/notebooks/parquet.tutorial.ipynb | 1774 --- .../helpers/notebooks/parquet.tutorial.py | 304 - .../helpers/notebooks/s3.tutorial.ipynb | 210 - .../helpers/notebooks/s3.tutorial.py | 44 - .../helpers/notebooks/sage.tutorial.ipynb | 448 - .../helpers/notebooks/sage.tutorial.py | 98 - .../helpers/old/__init__.py | 0 .../helpers/old/conda.py | 192 - .../helpers/old/conftest.py | 17 - .../helpers/old/env2.py | 75 - .../helpers/old/tunnels.py | 267 - .../helpers/old/user_credentials.py | 208 - .../pandoc_docker_files/install-texlive.sh | 113 - .../helpers/pandoc_docker_files/packages.txt | 115 - .../pandoc_docker_files/texlive.profile | 32 - .../helpers/repo_config_utils.py | 411 - .../helpers/stage_linked_file.py | 83 - .../helpers/telegram_notify/__init__.py | 0 .../helpers/telegram_notify/config.py | 30 - .../helpers/telegram_notify/get_chat_id.py | 76 - .../telegram_notify/telegram_notify.py | 155 - .../helpers/test/__init__.py | 0 .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../TestDataframeToJson.test1/output/test.txt | 31 - .../TestDataframeToJson.test2/output/test.txt | 13 - .../TestDataframeToJson.test3/output/test.txt | 13 - .../TestDataframeToJson.test4/output/test.txt | 13 - .../output/test.txt | 31 - .../output/test.txt | 13 - .../output/test.txt | 13 - .../output/test.txt | 13 - .../output/test.txt | 4 - .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 1 - .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 4 - .../output/test.txt | 3 - .../output/test.txt | 2 - .../output/test.txt | 2 - .../output/test.txt | 2 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 3 - .../output/test.txt | 1 - .../output/test.txt | 0 .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 0 .../output/test.txt | 0 .../output/test.txt | 2 - .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 0 .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 2 - .../output/test.txt | 3 - .../output/test.txt | 2 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 0 .../input/test.txt | 101 - .../output/test.txt | 8 - .../TestGetDocstrings.test1/input/test.txt | 18 - .../output/test.txt | 52 - .../output/test.txt | 52 - .../output/test.txt | 30 - .../output/test.txt | 20 - .../output/test.txt | 19 - .../output/test.txt | 20 - .../output/test.txt | 20 - .../output/test.txt | 20 - .../output/test.txt | 23 - .../output/test.txt | 19 - .../output/test.txt | 21 - .../output/test.txt | 15 - .../output/test.txt | 17 - .../output/test.txt | 20 - .../output/test.txt | 20 - .../output/test.txt | 20 - .../output/test.txt | 22 - .../output/test.txt | 23 - .../output/test.txt | 19 - .../output/test.txt | 18 - .../output/test.txt | 19 - .../output/test.txt | 30 - .../output/test.txt | 1 - .../output/test.txt | 20 - .../output/test.txt | 30 - .../output/test.txt | 30 - .../output/test.txt | 65 - .../Test_CheckSummary.test1/output/test.txt | 4 - .../Test_CheckSummary.test2/output/test.txt | 4 - .../output/test.txt | 19 - .../output/test.txt | 5 - .../output/test.txt | 9 - .../output/test.txt | 9 - .../input/tmp.cache_simple._llm.json | 10 - .../Test_apply_nan_mode.test1/output/test.txt | 41 - .../Test_apply_nan_mode.test2/output/test.txt | 33 - .../Test_apply_nan_mode.test3/output/test.txt | 41 - .../Test_apply_nan_mode.test4/output/test.txt | 38 - .../Test_apply_nan_mode.test5/output/test.txt | 41 - .../output/test.txt | 3 - .../output/test.txt | 1 - .../output/test_df.txt | 3 - .../input/test.csv | 5 - .../Test_dassert1.test2/output/test.txt | 5 - .../Test_dassert1.test3/output/test.txt | 6 - .../Test_dassert1.test4/output/test.txt | 6 - .../Test_dassert1.test5/output/test.txt | 8 - .../Test_dassert1.test6/output/test.txt | 8 - .../Test_dassert1.test7/output/test.txt | 1 - .../Test_dassert_eq1.test3/output/test.txt | 8 - .../Test_dassert_eq1.test4/output/test.txt | 8 - .../Test_dassert_eq1.test5/output/test.txt | 10 - .../output/test.txt | 1 - .../output/test.txt | 5 - .../output/test.txt | 5 - .../output/test.txt | 8 - .../output/test.txt | 5 - .../output/test.txt | 5 - .../output/test.txt | 5 - .../output/test.txt | 5 - .../output/test.txt | 9 - .../output/test.txt | 9 - .../output/test.txt | 8 - .../output/test.txt | 1 - .../output/test.txt | 28 - .../output/test.txt | 28 - .../output/test.txt | 26 - .../output/test.txt | 27 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../Test_from_typed_csv.test1/input/test.csv | 2 - .../input/test.csv.types | 1 - .../output/test.txt | 58 - .../output/test.txt | 58 - .../output/test.txt | 60 - .../output/test.txt | 57 - .../output/test.txt | 56 - .../output/test.txt | 63 - .../output/test.txt | 63 - .../output/test.txt | 63 - .../output/test.txt | 63 - .../input/result_0/config.pkl | Bin 405 -> 0 bytes .../input/result_0/config.txt | 7 - .../input/result_0/run_notebook.0.log | 0 .../input/result_1/config.pkl | Bin 405 -> 0 bytes .../input/result_1/config.txt | 7 - .../input/result_1/run_notebook.1.log | 0 .../input/result_0/config.txt | 7 - .../input/result_0/run_notebook.0.log | 0 .../input/result_1/config.txt | 7 - .../input/result_1/run_notebook.1.log | 0 .../output/test.txt | 45 - .../input/test.json | 17 - .../Test_obj_to_str1.test1/output/test.txt | 11 - .../Test_obj_to_str1.test2/output/test.txt | 11 - .../Test_obj_to_str1.test3/output/test.txt | 11 - .../Test_obj_to_str1.test4/output/test.txt | 12 - .../Test_obj_to_str1.test5/output/test.txt | 12 - .../Test_obj_to_str1.test6/output/test.txt | 12 - .../Test_obj_to_str2.test1/output/test.txt | 11 - .../Test_obj_to_str2.test2/output/test.txt | 11 - .../Test_obj_to_str2.test3/output/test.txt | 11 - .../Test_obj_to_str2.test4/output/test.txt | 11 - .../Test_obj_to_str2.test5/output/test.txt | 11 - .../Test_obj_to_str2.test6/output/test.txt | 11 - .../output/test.txt | 1 - .../Test_open_html.test_mac1/output/test.txt | 1 - .../output/test.txt | 1 - .../Test_open_pdf.test_mac1/output/test.txt | 1 - .../input/test.txt | 16 - .../output/test.txt | 20 - .../Test_process_lines1.test1/input/test.txt | 16 - .../Test_process_lines1.test1/output/test.txt | 20 - .../output/test.txt | 2 - .../input/cache/lastfailed | 12 - .../output/test.txt | 15 - .../input/log.txt | 325 - .../output/test.txt | 10 - .../input/log.txt | 10 - .../output/test.txt | 8 - .../input/log.txt | 61 - .../output/test.txt | 61 - .../input/log.txt | 36 - .../output/test.txt | 36 - .../input/log.txt | 2533 --- .../output/test.txt | 41 - .../input/log.txt | 396 - .../output/test.txt | 399 - .../input/test.txt | 7 - .../input/test.txt | 16 - .../output/test.txt | 16 - .../input/test.txt | 9 - .../output/test.txt | 7 - .../output/test.txt | 2 - .../output/test.txt | 3 - .../output/test.txt | 71 - .../output/test.txt | 40 - .../output/test.txt | 40 - .../output/test.txt | 4 - .../output/test.txt | 1 - .../Test_system1.test7/output/test.txt | 16 - .../Test_to_typed_csv.test1/input/test.csv | 2 - .../helpers/test/test_create_link.py | 136 - .../helpers/test/test_hasyncio.py | 96 - .../helpers/test/test_haws.py | 276 - .../helpers/test/test_hcache.py | 1002 -- .../helpers/test/test_hcache_simple.py | 1815 --- .../helpers/test/test_hcfile.py | 335 - .../helpers/test/test_hcsv.py | 81 - .../helpers/test/test_hdataframe.py | 299 - .../helpers/test/test_hdatetime.py | 932 -- .../helpers/test/test_hdbg.py | 934 -- .../helpers/test/test_hdict.py | 107 - .../helpers/test/test_hdocker.py | 624 - .../helpers/test/test_hdocker_tests.py | 158 - .../helpers/test/test_henv.py | 17 - .../helpers/test/test_hfile_tree.py | 347 - .../helpers/test/test_hgit.py | 822 - .../helpers/test/test_hintrospection.py | 406 - .../helpers/test/test_hio.py | 225 - .../helpers/test/test_hlatex.py | 665 - .../helpers/test/test_hlist.py | 176 - .../helpers/test/test_hllm.py | 361 - .../helpers/test/test_hllm_cli.py | 1403 -- .../helpers/test/test_hlogging.py | 103 - .../helpers/test/test_hmarkdown_bullets.py | 716 - .../helpers/test/test_hmarkdown_coloring.py | 205 - .../helpers/test/test_hmarkdown_div_blocks.py | 355 - .../test/test_hmarkdown_fenced_blocks.py | 218 - .../helpers/test/test_hmarkdown_filtering.py | 449 - .../helpers/test/test_hmarkdown_formatting.py | 1403 -- .../helpers/test/test_hmarkdown_headers.py | 2002 --- .../helpers/test/test_hmarkdown_rules.py | 377 - .../helpers/test/test_hmarkdown_slides.py | 399 - .../helpers/test/test_hmarkdown_tables.py | 196 - .../helpers/test/test_hmarkdown_toc.py | 228 - .../helpers/test/test_hmkdocs.py | 394 - .../helpers/test/test_hmodule.py | 25 - .../helpers/test/test_hnumpy.py | 215 - .../helpers/test/test_hobject.py | 392 - .../helpers/test/test_hopen.py | 92 - .../helpers/test/test_hpandas_analysis.py | 42 - .../test/test_hpandas_check_summary.py | 67 - .../helpers/test/test_hpandas_clean.py | 364 - .../helpers/test/test_hpandas_compare.py | 650 - .../helpers/test/test_hpandas_conversion.py | 276 - .../helpers/test/test_hpandas_dassert.py | 448 - .../helpers/test/test_hpandas_display.py | 685 - .../helpers/test/test_hpandas_io.py | 43 - .../helpers/test/test_hpandas_multiindex.py | 680 - .../helpers/test/test_hpandas_stats.py | 426 - .../helpers/test/test_hpandas_transform.py | 1888 --- .../helpers/test/test_hpandas_utils.py | 251 - .../helpers/test/test_hparquet.py | 1468 -- .../helpers/test/test_hparser.py | 398 - .../helpers/test/test_hpickle.py | 97 - .../helpers/test/test_hplayback.py | 506 - .../helpers/test/test_hprint.py | 844 - .../helpers/test/test_hpytest.py | 228 - .../helpers/test/test_hretry.py | 154 - .../helpers/test/test_hs3.py | 597 - .../helpers/test/test_hsecrets.py | 209 - .../helpers/test/test_hserver.py | 321 - .../helpers/test/test_hslack.py | 81 - .../helpers/test/test_hsql.py | 29 - .../helpers/test/test_hstring.py | 270 - .../helpers/test/test_hsystem.py | 494 - .../helpers/test/test_htable.py | 159 - .../helpers/test/test_htext_protect.py | 578 - .../helpers/test/test_htimer.py | 24 - .../helpers/test/test_htraceback.py | 474 - .../helpers/test/test_hunit_test.py | 954 -- .../helpers/test/test_hunit_test_mock.py | 288 - .../test/test_hunit_test_purification.py | 1065 -- .../helpers/test/test_hunit_test_utils.py | 347 - .../helpers/test/test_hversion.py | 74 - .../helpers/test/test_joblib_helpers.py | 569 - .../helpers/test/test_lib_tasks.py | 540 - .../helpers/test/test_lib_tasks_docker.py | 494 - .../test/test_lib_tasks_docker_release.py | 1530 -- .../helpers/test/test_lib_tasks_find.py | 267 - .../helpers/test/test_lib_tasks_gh.py | 133 - .../helpers/test/test_lib_tasks_git.py | 249 - .../helpers/test/test_lib_tasks_integrate.py | 27 - .../helpers/test/test_lib_tasks_lint.py | 32 - .../helpers/test/test_lib_tasks_pytest.py | 1163 -- .../helpers/test/test_lib_tasks_utils.py | 301 - .../test_master_buildmeister_dashboard.py | 74 - .../helpers/test/test_repo_config_amp.py | 284 - .../helpers/test/test_repo_config_utils.py | 65 - .../playbook.yaml | 4 +- 454 files changed, 2 insertions(+), 127277 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_aws.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py delete mode 100755 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/run_notebook.0.log delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.pkl delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/run_notebook.1.log delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_0/config.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_0/run_notebook.0.log delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_1/config.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_1/run_notebook.1.log delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_load_df_from_json.test1/input/test.json delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md deleted file mode 100644 index 8578eccd3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md +++ /dev/null @@ -1,285 +0,0 @@ -# Summary - -The `helpers/` directory is the core Python library providing utilities, -development tools, and infrastructure components for the helpers ecosystem. -Modules follow the `h` naming convention and are organized by domain. - -# Directory Structure - -- `logging_testing/` - - Utilities for testing logging behavior across modules -- `notebooks/` - - Jupyter notebooks and tutorials (e.g., hcache_simple usage) -- `old/` - - Deprecated and archived modules (conda, tunnels, user_credentials) -- `pandoc_docker_files/` - - Docker setup files and package lists for pandoc and texlive -- `telegram_notify/` - - Telegram bot notification module with config and chat ID utilities -- `test/` - - Unit tests for all modules (90+ test files organized by module name) - -# Files - -## Core Infrastructure - -- `hdbg.py` - - Debugging utilities with specialized assertions, logging, and fatal error handling -- `hio.py` - - Filesystem operations, file read/write, and directory management utilities -- `hsystem.py` - - System interaction: shell commands, environment variables, process management -- `henv.py` - - Environment variable checks and module installation management -- `hserver.py` - - Identify which server the code is running on -- `hversion.py` - - Code version control and Docker container compatibility checking -- `hlogging.py` - - Logging configuration, custom formatters, and logging utilities -- `hwarnings.py` - - Suppress annoying Python warnings when imported -- `htraceback.py` - - Traceback parsing, formatting, and manipulation utilities -- `hprint.py` - - Debugging and pretty-printing utilities for Python objects -- `hparser.py` - - Argparse helpers: verbosity, action, limit-range, and other standard arguments -- `hobject.py` - - Introspect and print the state of a Python object -- `hintrospection.py` - - Python introspection and module analysis utilities -- `hmodule.py` - - Dynamic module installation and import management utilities -- `htimer.py` - - Timer class for measuring and reporting elapsed time -- `htqdm.py` - - tqdm progress bar stream redirected to Python logger -- `hthreading.py` - - Timeout decorator to enforce execution time limits on functions -- `hretry.py` - - Retry decorators for synchronous and asynchronous functions -- `hasyncio.py` - - Async/await utilities and coroutine management for asyncio -- `hnetwork.py` - - Network utilities including URL availability checking -- `hopen.py` - - Cross-platform file opening utility -- `htypes.py` - - General type aliases and type utilities based on standard Python libraries -- `hwall_clock_time.py` - - Wall clock time simulation and management for testing and replays - -## Data Processing - -- `hpandas.py` - - Pandas utilities aggregating all hpandas_* submodules -- `hpandas_analysis.py` - - Statistical analysis and ML-related functions for pandas DataFrames -- `hpandas_check_summary.py` - - DataFrame check and summary reporting utilities -- `hpandas_clean.py` - - DataFrame cleaning operations (deduplicate, fill NaN, sanitize) -- `hpandas_compare.py` - - DataFrame comparison utilities for diffing and equality checks -- `hpandas_conversion.py` - - DataFrame and Series conversion and casting utilities -- `hpandas_dassert.py` - - Pandas-specific assertions and validation functions -- `hpandas_display.py` - - DataFrame display formatting and signature generation -- `hpandas_io.py` - - Pandas I/O operations for local and S3 storage -- `hpandas_multiindex.py` - - MultiIndex creation, manipulation, and access operations -- `hpandas_stats.py` - - Pandas statistics, duration computation, and time-series helpers -- `hpandas_transform.py` - - DataFrame transformation operations (pivot, reshape, normalize) -- `hpandas_utils.py` - - General-purpose pandas utilities and helper functions -- `hdataframe.py` - - Lower-level helper functions for processing pandas DataFrames -- `hnumpy.py` - - NumPy utilities, array helpers, and random seed management -- `hnumba.py` - - Numba JIT compilation wrapper and acceleration utilities -- `hparquet.py` - - Parquet file read/write operations using pyarrow -- `hcsv.py` - - CSV file operations and DataFrame I/O utilities -- `hdatetime.py` - - Date/time manipulation, parsing, and timezone handling utilities -- `hdict.py` - - Dictionary manipulation and nested dictionary operation utilities -- `hlist.py` - - List manipulation, deduplication, and membership utilities -- `hstring.py` - - String manipulation, formatting, and transformation utilities -- `htable.py` - - Lightweight rectangular table class with no pandas dependency - -## Caching and Performance - -- `hcache.py` - - Advanced function caching using joblib with S3 and git integration -- `hcache_simple.py` - - Simple caching with JSON or pickle file-based storage backends -- `hjoblib.py` - - Joblib parallelization, memory caching, and job management -- `hpickle.py` - - Pickle and JSON serialization and deserialization routines - -## Testing Framework - -- `hunit_test.py` - - Enhanced unit testing framework built on unittest and pytest with golden files -- `hunit_test_purification.py` - - Text purification utilities to sanitize test output for comparison -- `hunit_test_utils.py` - - Unit test utilities including test renaming and helpers -- `hpytest.py` - - Pytest integration utilities and test artifact handling -- `hcoverage.py` - - Code coverage utilities and test coverage analysis helpers -- `hplayback.py` - - Automatically generate unit tests by recording and replaying function calls -- `htest_logger.py` - - Test logging script template -- `hmoto.py` - - AWS service mocking with moto for unit testing - -## Markdown Processing - -- `hmarkdown.py` - - Markdown processing entry point aggregating all hmarkdown_* submodules -- `hmarkdown_bullets.py` - - Markdown bullet point processing and formatting -- `hmarkdown_coloring.py` - - Markdown text coloring utilities for LaTeX and HTML output -- `hmarkdown_comments.py` - - Markdown comment detection, extraction, and removal utilities -- `hmarkdown_div_blocks.py` - - Utilities for handling HTML div blocks within markdown files -- `hmarkdown_fenced_blocks.py` - - Fenced code block parsing and manipulation in markdown -- `hmarkdown_filtering.py` - - Markdown section extraction and content filtering utilities -- `hmarkdown_formatting.py` - - Markdown text formatting and whitespace normalization utilities -- `hmarkdown_headers.py` - - Markdown header manipulation, extraction, and level adjustment -- `hmarkdown_rules.py` - - Markdown rule validation and processing utilities -- `hmarkdown_slides.py` - - Markdown slide extraction, splitting, and processing for presentations -- `hmarkdown_tables.py` - - Markdown table parsing, formatting, and manipulation utilities -- `hmarkdown_toc.py` - - Markdown table of contents generation and YAML frontmatter handling -- `hlint.py` - - Linting utilities for text and code files -- `htext_protect.py` - - Utilities for protecting content regions during text processing - -## External Services and Cloud - -- `haws.py` - - AWS services integration with boto3 client and resource management -- `hs3.py` - - S3 file operations, listing, and S3-backed filesystem utilities -- `hsecrets.py` - - AWS Secrets Manager integration for secret retrieval -- `htranslate.py` - - AWS Translate service wrapper for text translation -- `hgit.py` - - Git repository operations, branch management, and diff utilities -- `hdocker.py` - - Docker container operations, image management, and Docker utilities -- `hdocker_tests.py` - - Utilities for running tests inside Docker containers -- `hdockerized_executables.py` - - Wrappers for Dockerized executables: prettier, pandoc, latex, and others -- `hgoogle_drive_api.py` - - Google Drive and Google Sheets API integration utilities -- `hchatgpt.py` - - OpenAI API integration with file management and chat utilities -- `hchatgpt_instructions.py` - - ChatGPT system instructions and prompt templates -- `hllm.py` - - LLM API integration with caching, cost tracking, and response handling -- `hllm_cli.py` - - LLM CLI interaction wrapper and cost estimation utilities -- `hllm_cost.py` - - LLM cost calculation for OpenRouter and other APIs -- `hslack.py` - - Slack notification utilities for sending messages to channels -- `hemail.py` - - Email sending utilities via SMTP -- `hsftp.py` - - SFTP file transfer operations using pysftp -- `hsql.py` - - SQL database operations as a PostgreSQL wrapper -- `hsql_implementation.py` - - Low-level SQL implementation with psycopg2 driver -- `hsql_test.py` - - SQL testing utilities, fixtures, and database test helpers -- `asana_utils.py` - - Enhanced Asana analytics with time estimation and team grouping -- `github_utils.py` - - GitHub API utilities for caching and repository data retrieval - -## Notebooks and Visualization - -- `hnotebook.py` - - Jupyter notebook configuration and display setup utilities -- `hjupyter.py` - - Jupyter notebook execution and output capture utilities -- `hmatplotlib.py` - - Matplotlib utilities, figure management, and plotting helpers -- `hmkdocs.py` - - MkDocs-specific markdown generation and documentation utilities -- `hlatex.py` - - LaTeX conversion utilities using pandoc - -## Miscellaneous - -- `hfile_tree.py` - - Directory tree building and formatted output utilities -- `hcfile.py` - - C file parsing and transformation utilities -- `repo_config_utils.py` - - Repository configuration utilities loaded from YAML -- `stage_linked_file.py` - - Symbolic link staging utility for git operations - -## Task System (`lib_tasks_*.py`) - -- `lib_tasks.py` - - Entry point that aggregates all invoke task modules -- `lib_tasks_aws.py` - - Invoke tasks for AWS operations and deployments -- `lib_tasks_bash.py` - - Invoke tasks for bash script execution -- `lib_tasks_docker.py` - - Invoke tasks for Docker build, run, and management operations -- `lib_tasks_docker_release.py` - - Invoke tasks for Docker image release and publishing workflows -- `lib_tasks_find.py` - - Invoke tasks for searching and finding files in the repo -- `lib_tasks_gh.py` - - Invoke tasks for GitHub pull requests and issues -- `lib_tasks_git.py` - - Invoke tasks for git branch, merge, and commit operations -- `lib_tasks_integrate.py` - - Invoke tasks for integrating changes between repositories -- `lib_tasks_lint.py` - - Invoke tasks for linting and code quality checks -- `lib_tasks_perms.py` - - Invoke tasks for managing file permissions -- `lib_tasks_print.py` - - Invoke tasks for printing setup and environment info -- `lib_tasks_pytest.py` - - Invoke tasks for running pytest suites (fast, slow, superslow) -- `lib_tasks_utils.py` - - Shared utilities and helpers used across task modules diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py deleted file mode 100644 index 0aa7f7f4b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py +++ /dev/null @@ -1,1156 +0,0 @@ -""" -Enhanced Asana Analytics with Time Estimation and Team Grouping. - -Import as: - -import helpers.asana_utils as hasautil -""" - -import datetime as datetime_lib -import json -import logging -import os -from typing import Any, Dict, List, Optional - -import asana -import asana.rest as arest -import dateutil.parser as dateutil_parser -import pandas as pd - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# EnhancedAsanaAnalytics -# ############################################################################# - - -class EnhancedAsanaAnalytics: - def __init__(self, access_token: Optional[str] = None) -> None: - # Get token from parameter or environment variable. - token = access_token or os.getenv("ASANA_ACCESS_TOKEN") - if not token: - raise ValueError( - "Asana access token must be provided or set in ASANA_ACCESS_TOKEN" - ) - # Initialize Asana API client with access token. - configuration = asana.Configuration() - configuration.access_token = token - self.api_client = asana.ApiClient(configuration) - # Initialize API endpoints. - self.workspaces_api = asana.WorkspacesApi(self.api_client) - self.users_api = asana.UsersApi(self.api_client) - self.tasks_api = asana.TasksApi(self.api_client) - self.stories_api = asana.StoriesApi(self.api_client) - self.projects_api = asana.ProjectsApi(self.api_client) - self.custom_fields_api = asana.CustomFieldsApi(self.api_client) - - def get_workspace_gid(self, workspace_name: Optional[str] = None) -> str: - """ - Get the workspace GID by name or return the first available workspace. - - Retrieve the GID (Global ID) for an Asana workspace. If no - workspace name is provided, return the GID of the first - workspace available to the user. - - :param workspace_name: name of the workspace to find. - :return: workspace GID as a string - """ - _LOG.info( - "Fetching workspace GID for workspace: %s", - workspace_name or "first available", - ) - # Fetch all available workspaces. - opts: Dict[str, Any] = {} - workspaces = self.workspaces_api.get_workspaces(opts) - # Convert to list if needed. - workspace_list = list(workspaces) if workspaces else [] - _LOG.info("Found %s workspaces", len(workspace_list)) - # Check if any workspaces exist. - if not workspace_list: - raise ValueError("No workspaces found") - result = None - # Search for specific workspace by name if provided. - if workspace_name: - for ws in workspace_list: - if ws["name"].lower() == workspace_name.lower(): - _LOG.info( - "Found workspace '%s' with GID: %s", - workspace_name, - ws["gid"], - ) - result = str(ws["gid"]) - break - if result is None: - raise ValueError(f"Workspace '{workspace_name}' not found") - else: - # Return first workspace if no name specified. - _LOG.info( - "Using first workspace: %s (GID: %s)", - workspace_list[0]["name"], - workspace_list[0]["gid"], - ) - result = str(workspace_list[0]["gid"]) - return result - - def get_team_members(self, workspace_gid: str) -> List[Dict[str, Any]]: - """ - Get all team members in a workspace. - - :param workspace_gid: workspace GID to query for users - :return: user information with keys 'gid','name', and 'email' - """ - _LOG.info("Fetching team members for workspace: %s", workspace_gid) - # Fetch all users in the workspace. - opts: Dict[str, Any] = {} - users = self.users_api.get_users_for_workspace(workspace_gid, opts) - # Convert to list if needed. - users_list = list(users) if users else [] - _LOG.info("Found %s team members", len(users_list)) - # Extract relevant user information. - result = [ - {"gid": u["gid"], "name": u["name"], "email": u.get("email", "N/A")} - for u in users_list - ] - # Log member names. - member_names = [r["name"] for r in result] - _LOG.debug("Team members: %s", ", ".join(member_names)) - return result - - def get_user_by_name( - self, workspace_gid: str, username: str - ) -> Optional[Dict[str, Any]]: - """ - Get a specific user by their name in a workspace. - - Search for a user by their display name (case-insensitive - partial match). - - :param workspace_gid: workspace GID to search in - :param username: username or partial name to search for - :return: user with 'gid', 'name', and 'email' - """ - _LOG.info("Searching for user: %s", username) - team_members = self.get_team_members(workspace_gid) - res = None - # Search for exact match first. - for team_member in team_members: - if team_member["name"].lower() == username.lower(): - _LOG.info("Found exact match: %s", team_member["name"]) - res = team_member - # Search for partial match. - for team_member in team_members: - if username.lower() in team_member["name"].lower(): - _LOG.info("Found partial match: %s", team_member["name"]) - res = team_member - if res is None: - _LOG.warning("User '%s' not found in workspace", username) - return res - - def get_user_tasks_detailed( - self, - workspace_gid: str, - user_identifier: str, - *, - start_date: Optional[datetime_lib.datetime] = None, - end_date: Optional[datetime_lib.datetime] = None, - ) -> List[Dict[str, Any]]: - """ - Get detailed task information including estimated time. - - Fetch all tasks for a user with extended fields including custom - fields for time estimates, projects, tags, sections, and dates. - - :param workspace_gid: workspace GID to query - :param user_identifier: user GID or username to retrieve tasks - for - :param start_date: start date for filtering tasks by creation - date. - :param end_date: end date for filtering tasks by creation date. - :return: data with name, completion status, timestamps, custom - fields, and project associations - """ - # Resolve username to GID if needed. - if not user_identifier.isdigit(): - _LOG.info("Resolving username '%s' to GID", user_identifier) - user = self.get_user_by_name(workspace_gid, user_identifier) - if not user: - _LOG.error("User '%s' not found", user_identifier) - return [] - user_gid = user["gid"] - _LOG.debug("Resolved '%s' to GID: %s", user_identifier, user_gid) - else: - user_gid = user_identifier - _LOG.info("Fetching detailed tasks for user GID: %s", user_gid) - try: - # Define query parameters for task retrieval with extended fields. - opts = { - "assignee": user_gid, - "workspace": workspace_gid, - "opt_fields": ( - "name,completed,completed_at,created_at,modified_at," - "projects.name,projects.gid,num_subtasks,memberships.section.name," - "custom_fields,custom_fields.name,custom_fields.display_value," - "custom_fields.number_value,due_on,due_at,start_on," - "assignee.name,tags.name" - ), - } - # Fetch all tasks for the user. - _LOG.debug("Querying Asana API for detailed tasks...") - tasks = self.tasks_api.get_tasks(opts) - # Convert to list if generator. - tasks_list = list(tasks) if tasks else [] - _LOG.info( - "Retrieved %d tasks from API for user GID: %s", - len(tasks_list), - user_gid, - ) - # Make start_date and end_date timezone-aware if they aren't already. - if start_date and start_date.tzinfo is None: - start_date = start_date.replace(tzinfo=datetime_lib.timezone.utc) - if end_date and end_date.tzinfo is None: - end_date = end_date.replace(tzinfo=datetime_lib.timezone.utc) - # Filter tasks by date range if specified. - filtered_tasks = [] - for task in tasks_list: - # Parse creation date. - created_at = ( - dateutil_parser.parse(task["created_at"]) - if task.get("created_at") - else None - ) - # Apply start date filter. - if start_date and created_at and created_at < start_date: - continue - # Apply end date filter. - if end_date and created_at and created_at > end_date: - continue - # Add task to filtered results. - filtered_tasks.append(task) - _LOG.info( - "Filtered to %d tasks within date range for user GID: %s", - len(filtered_tasks), - user_gid, - ) - return filtered_tasks - except arest.ApiException as e: - _LOG.error("API error fetching detailed tasks: %s", e) - raise - except Exception as e: - _LOG.error("Unexpected error fetching detailed tasks: %s", e) - return [] - - def extract_time_estimate(self, task: Dict[str, Any]) -> Optional[float]: - """ - Extract time estimate from custom fields. - - Search through task custom fields for time estimation values. - Looks for common field names like 'estimated time', 'estimate', - 'hours', etc. - - :param task: tasks data containing custom_fields - :return: estimated hours as float, or None if not found - """ - result = None - if not task.get("custom_fields"): - _LOG.debug( - "No custom fields found for task: %s", task.get("gid", "unknown") - ) - return result - # Common field names for time estimates. - time_field_names = [ - "estimated time", - "estimate", - "time estimate", - "hours", - "estimated hours", - "effort", - ] - for field in task["custom_fields"]: - field_name = field.get("name", "").lower() - # Check if field name matches any time estimation pattern. - if any(time_name in field_name for time_name in time_field_names): - # Try number_value first, then display_value. - if field.get("number_value") is not None: - result = float(field["number_value"]) / 60.0 - _LOG.debug( - "Found time estimate %s hours in field '%s' for task: %s", - result, - field.get("name"), - task.get("gid", "unknown"), - ) - break - elif field.get("display_value"): - try: - result = float(field["display_value"]) / 60.0 - _LOG.debug( - "Found time estimate %s hours in field '%s' for task: %s", - result, - field.get("name"), - task.get("gid", "unknown"), - ) - break - except (ValueError, TypeError): - _LOG.warning( - "Could not parse display_value '%s' as float for task: %s", - field.get("display_value"), - task.get("gid", "unknown"), - ) - return result - - def get_task_stories(self, task_gid: str) -> List[Dict[str, Any]]: - """ - Get all stories (comments and activity) for a task. - - Fetch all stories including comments, task updates, and system - activities for a specific task. - - :param task_gid: task GID to fetch stories for - :return: data of type, text, created_at, and creator information - """ - _LOG.info("Fetching stories for task: %s", task_gid) - try: - opts = { - "opt_fields": ( - "type,text,created_at,created_by.name,created_by.email," - "resource_subtype,is_edited" - ) - } - stories = self.stories_api.get_stories_for_task(task_gid, opts) - stories_list = list(stories) if stories else [] - _LOG.debug( - "Found %d stories for task %s", len(stories_list), task_gid - ) - return stories_list - except arest.ApiException as e: - _LOG.error("API error fetching stories for task %s: %s", task_gid, e) - return [] - except Exception as e: - _LOG.error( - "Unexpected error fetching stories for task %s: %s", task_gid, e - ) - return [] - - def extract_comment_metrics(self, task_gid: str) -> Dict[str, Any]: - """ - Extract comment and activity metrics for a task. - - Analyze all stories for a task to extract metrics including: - - Total comment count - - Unique commenters - - Activity count (system updates) - - Last activity timestamp - - Comment frequency - - :param task_gid: task GID to analyze - :return: comment metrics - """ - stories = self.get_task_stories(task_gid) - # Initialize counters. - num_comments = 0 - num_activities = 0 - unique_commenters = set() - last_activity_at = None - for story in stories: - # Parse created timestamp. - created_at = ( - dateutil_parser.parse(story["created_at"]) - if story.get("created_at") - else None - ) - # Track last activity. - if created_at: - if last_activity_at is None or created_at > last_activity_at: - last_activity_at = created_at - # Categorize story type. - story_type = story.get("type", "") - if story_type == "comment": - num_comments += 1 - # Track unique commenters. - if story.get("created_by") and story["created_by"].get("name"): - unique_commenters.add(story["created_by"]["name"]) - else: - # System activities (status changes, assignments, etc). - num_activities += 1 - result = { - "num_comments": num_comments, - "num_activities": num_activities, - "total_stories": len(stories), - "unique_commenters": len(unique_commenters), - "unique_commenter_names": list(unique_commenters), - "last_activity_at": last_activity_at, - } - _LOG.debug( - "Task %s metrics: %d comments, %d activities, %d unique commenters", - task_gid, - num_comments, - num_activities, - len(unique_commenters), - ) - return result - - def calculate_activity_rate( - self, - created_at: datetime_lib.datetime, - last_activity_at: Optional[datetime_lib.datetime], - num_comments: int, - num_activities: int, - ) -> Dict[str, float]: - """ - Calculate activity rate metrics for a task. - - Compute various activity rate metrics based on task timeline and - activity counts. - - :param created_at: task creation timestamp - :param last_activity_at: timestamp of last activity/comment - :param num_comments: total number of comments - :param num_activities: total number of system activities - :return: activity rate metric - """ - now = datetime_lib.datetime.now(datetime_lib.timezone.utc) - - # Calculate task age in days. - task_age_days = (now - created_at).total_seconds() / 86400 - - # Calculate days since last activity. - days_since_activity = None - if last_activity_at: - days_since_activity = ( - now - last_activity_at - ).total_seconds() / 86400 - - # Calculate activity rates (avoid division by zero). - if task_age_days > 0: - comments_per_day = num_comments / task_age_days - activities_per_day = num_activities / task_age_days - total_activity_per_day = ( - num_comments + num_activities - ) / task_age_days - else: - comments_per_day = 0.0 - activities_per_day = 0.0 - total_activity_per_day = 0.0 - - result = { - "task_age_days": task_age_days, - "comments_per_day": comments_per_day, - "activities_per_day": activities_per_day, - "total_activity_per_day": total_activity_per_day, - "days_since_activity": days_since_activity, - } - - return result - - def get_user_tasks_with_activity( - self, - workspace_gid: str, - user_identifier: str, - *, - start_date: Optional[datetime_lib.datetime] = None, - end_date: Optional[datetime_lib.datetime] = None, - include_comments: bool = True, - ) -> List[Dict[str, Any]]: - """ - Get detailed task information including comments and activity metrics. - - Extended version of get_user_tasks_detailed that also fetches - comment and activity data for each task. - - :param workspace_gid: workspace GID to query - :param user_identifier: user GID or username to retrieve tasks - for - :param start_date: start date for filtering tasks by creation - date - :param end_date: end date for filtering tasks by creation date - :param include_comments: if True, fetch comment/activity data - for each task (default: True). Set to False for faster - execution - :return: task data with comment and activity metrics included - """ - # Get detailed tasks first. - tasks = self.get_user_tasks_detailed( - workspace_gid, - user_identifier, - start_date=start_date, - end_date=end_date, - ) - - if not include_comments: - return tasks - - _LOG.info("Fetching comment/activity data for %d tasks", len(tasks)) - - # Enhance each task with comment metrics. - for i, task in enumerate(tasks): - if (i + 1) % 10 == 0: - _LOG.info( - "Processing task %d/%d for comments...", i + 1, len(tasks) - ) - - # Get comment metrics. - comment_metrics = self.extract_comment_metrics(task["gid"]) - - # Add metrics to task. - task["num_comments"] = comment_metrics["num_comments"] - task["num_activities"] = comment_metrics["num_activities"] - task["total_stories"] = comment_metrics["total_stories"] - task["unique_commenters"] = comment_metrics["unique_commenters"] - task["unique_commenter_names"] = comment_metrics[ - "unique_commenter_names" - ] - task["last_activity_at"] = comment_metrics["last_activity_at"] - - # Calculate activity rates if we have created_at. - if task.get("created_at"): - created_at = dateutil_parser.parse(task["created_at"]) - activity_rates = self.calculate_activity_rate( - created_at, - comment_metrics["last_activity_at"], - comment_metrics["num_comments"], - comment_metrics["num_activities"], - ) - task.update(activity_rates) - - _LOG.info("Comment/activity data added to all tasks") - return tasks - - def create_task_dataframe( - self, - workspace_gid: str, - user_identifiers: Optional[List[str]] = None, - *, - project_names: Optional[List[str]] = None, - start_date: Optional[datetime_lib.datetime] = None, - end_date: Optional[datetime_lib.datetime] = None, - team_mapping: Optional[Dict[str, str]] = None, - include_comments: bool = False, - ) -> pd.DataFrame: - """ - Create comprehensive task DataFrame for all users. - - Build a detailed DataFrame containing all task information for - specified users, with optional filtering by project and date - range. Includes time estimates, sprint information, and team - assignments. - - :param workspace_gid: workspace GID to query - :param user_identifiers: usernames or GIDs to analyze. - :param project_names: project names to filter by and use - as team names (e.g., ["tech-now", "tech-next"]). If - provided, team will be determined from project name - :param start_date: start date for filtering tasks by creation - date - :param end_date: end date for filtering tasks by creation date - :param team_mapping: username to team name. Only - used if project_names is not provided - - Example: {"John Doe": "tech-now", "Jane Smith": "tech-next"} - :param include_comments: if True, fetch comment/activity data - (default: False). Set to True to include activity metrics - :return: data with columns including user info, task - details, dates, completion status, time estimates, project, - sprint, section, tags, and subtasks - """ - _LOG.info("Creating comprehensive task DataFrame") - # Get users to analyze. - team_members = [] - if user_identifiers: - for user_id in user_identifiers: - if user_id.isdigit(): - # If GID, fetch user info. - opts = {"opt_fields": "name,email"} - user_info = self.users_api.get_user(user_id, opts) - team_members.append( - { - "gid": user_id, - "name": user_info["name"], - "email": user_info.get("email", "N/A"), - } - ) - else: - # If username, resolve to user. - user = self.get_user_by_name(workspace_gid, user_id) - if user: - team_members.append(user) - else: - # Get all team members if no specific users provided. - team_members = self.get_team_members(workspace_gid) - all_task_data = [] - # Process tasks for each team member. - for member in team_members: - _LOG.info("Processing tasks for: %s", member["name"]) - # Fetch detailed tasks for this user. - if include_comments: - tasks = self.get_user_tasks_with_activity( - workspace_gid, - member["gid"], - start_date=start_date, - end_date=end_date, - include_comments=True, - ) - else: - tasks = self.get_user_tasks_detailed( - workspace_gid, - member["gid"], - start_date=start_date, - end_date=end_date, - ) - # Process each task. - for task in tasks: - # Parse dates. - created_at = ( - dateutil_parser.parse(task["created_at"]) - if task.get("created_at") - else None - ) - completed_at = ( - dateutil_parser.parse(task["completed_at"]) - if task.get("completed_at") - else None - ) - due_at = ( - dateutil_parser.parse(task["due_at"]) - if task.get("due_at") - else None - ) - # Check if task is overdue. - is_overdue = False - if not task.get("completed") and due_at: - is_overdue = due_at < datetime_lib.datetime.now( - datetime_lib.timezone.utc - ) - # Extract time estimate from custom fields. - estimated_hours = self.extract_time_estimate(task) - # Calculate actual hours if task is completed. - actual_hours = None - if completed_at and created_at: - actual_hours = ( - completed_at - created_at - ).total_seconds() / 3600 - # Extract projects, tags, and sections. - projects = [p["name"] for p in task.get("projects", [])] - project_gids = [p["gid"] for p in task.get("projects", [])] - tags = [t["name"] for t in task.get("tags", [])] - # Extract sections (sprints in Asana). - sections = [] - sprints = [] - if task.get("memberships"): - for membership in task["memberships"]: - if membership.get("section"): - section_name = membership["section"]["name"] - sections.append(section_name) - # Identify sprint sections using common patterns. - if any( - keyword in section_name.lower() - for keyword in [ - "sprint", - "iteration", - "cycle", - "week", - ] - ): - sprints.append(section_name) - # Build task data dictionary. - task_data = { - # User info. - "user_name": member["name"], - "user_email": member["email"], - "user_gid": member["gid"], - # Task info. - "task_name": task.get("name", "Untitled"), - "task_gid": task["gid"], - # Dates. - "created_at": created_at, - "completed_at": completed_at, - "due_on": task.get("due_on"), - "due_at": due_at, - "start_on": task.get("start_on"), - # Status. - "is_completed": task.get("completed", False), - "is_overdue": is_overdue, - # Time tracking. - "estimated_hours": estimated_hours, - "actual_hours": actual_hours, - # Organization. - "project": projects[0] if projects else None, - "all_projects": ", ".join(projects) if projects else None, - "project_gid": project_gids[0] if project_gids else None, - "tags": ", ".join(tags) if tags else None, - "section": sections[0] if sections else None, - "sprint": sprints[0] if sprints else None, - "all_sprints": ", ".join(sprints) if sprints else None, - "num_subtasks": task.get("num_subtasks", 0), - } - # Add comment/activity metrics if included. - if include_comments: - task_data.update( - { - "num_comments": task.get("num_comments", 0), - "num_activities": task.get("num_activities", 0), - "total_stories": task.get("total_stories", 0), - "unique_commenters": task.get( - "unique_commenters", 0 - ), - "last_activity_at": task.get("last_activity_at"), - "task_age_days": task.get("task_age_days", 0), - "comments_per_day": task.get( - "comments_per_day", 0.0 - ), - "activities_per_day": task.get( - "activities_per_day", 0.0 - ), - "total_activity_per_day": task.get( - "total_activity_per_day", 0.0 - ), - "days_since_activity": task.get( - "days_since_activity" - ), - } - ) - # Add team - either from project name or mapping. - if project_names: - # Determine team from project name. - task_data["team"] = task_data["project"] - elif team_mapping: - task_data["team"] = team_mapping.get( - member["name"], "Unassigned" - ) - else: - # No team mapping, use project as team (default). - task_data["team"] = task_data["project"] - all_task_data.append(task_data) - # Create DataFrame. - df = pd.DataFrame(all_task_data) - # Filter by project if specified. - if project_names and len(df) > 0: - df = df[df["project"].isin(project_names)] - _LOG.info( - "Filtered to %d tasks from projects: %s", len(df), project_names - ) - _LOG.info("Created DataFrame with %d tasks", len(df)) - result = df - return result - - def create_team_comparison_df( - self, task_df: pd.DataFrame, metrics: Optional[List[str]] = None - ) -> pd.DataFrame: - """ - Create team-level comparison DataFrame from task DataFrame. - - Aggregate task-level data to team-level metrics for comparison - across teams. Requires task DataFrame to have 'team' column. - - :param task_df: data with 'team' column - :param metrics: metrics to calculate. If None, calculate all - :return: data with team-level aggregated metrics - """ - if "team" not in task_df.columns: - _LOG.error( - "task_df missing 'team' column. Available columns: %s", - task_df.columns.tolist(), - ) - raise ValueError( - "task_df must have 'team' column. Pass team_mapping or " - "project_names to create_task_dataframe()" - ) - - _LOG.info("Creating team comparison DataFrame") - _LOG.info("Found %d unique teams in data", task_df["team"].nunique()) - - # Set default metrics if not provided. - if metrics is None: - metrics = [ - "total_tasks", - "completed_tasks", - "in_progress_tasks", - "completion_rate", - "total_estimated_hours", - "avg_estimated_hours", - "total_actual_hours", - "overdue_tasks", - "overdue_rate", - "unique_users", - ] - team_stats = [] - # Calculate metrics for each team. - for team_name in task_df["team"].unique(): - if team_name is None or ( - isinstance(team_name, float) and pd.isna(team_name) - ): - _LOG.warning("Skipping None/NaN team name") - continue - - team_data = task_df[task_df["team"] == team_name] - _LOG.debug( - "Processing team: %s (%d tasks)", team_name, len(team_data) - ) - - stats = {"team": team_name} - # Calculate each requested metric. - if "total_tasks" in metrics: - stats["total_tasks"] = len(team_data) - if "completed_tasks" in metrics: - stats["completed_tasks"] = team_data["is_completed"].sum() - if "in_progress_tasks" in metrics: - stats["in_progress_tasks"] = (~team_data["is_completed"]).sum() - if "completion_rate" in metrics: - if len(team_data) > 0: - stats["completion_rate"] = ( - stats["completed_tasks"] / len(team_data) - ) * 100 - else: - stats["completion_rate"] = 0.0 - if "total_estimated_hours" in metrics: - stats["total_estimated_hours"] = team_data[ - "estimated_hours" - ].sum() - if "avg_estimated_hours" in metrics: - stats["avg_estimated_hours"] = team_data[ - "estimated_hours" - ].mean() - if "total_actual_hours" in metrics: - stats["total_actual_hours"] = team_data["actual_hours"].sum() - if "overdue_tasks" in metrics: - stats["overdue_tasks"] = team_data["is_overdue"].sum() - if "overdue_rate" in metrics: - active_tasks = (~team_data["is_completed"]).sum() - if active_tasks > 0: - stats["overdue_rate"] = ( - stats["overdue_tasks"] / active_tasks - ) * 100 - else: - stats["overdue_rate"] = 0.0 - if "unique_users" in metrics: - stats["unique_users"] = team_data["user_name"].nunique() - team_stats.append(stats) - - _LOG.info("Team comparison completed for %d teams", len(team_stats)) - result = pd.DataFrame(team_stats) - return result - - def create_user_comparison_df( - self, task_df: pd.DataFrame, metrics: Optional[List[str]] = None - ) -> pd.DataFrame: - """ - Create user-level comparison DataFrame with aggregated metrics. - - Aggregate task-level data to user-level metrics for individual - performance comparison. - - :param task_df: tasks data - :param metrics: metrics to calculate. If None, calculate all - :return: data with user-level aggregated metrics - """ - # Set default metrics if not provided. - if metrics is None: - metrics = [ - "total_tasks", - "completed_tasks", - "completion_rate", - "total_estimated_hours", - "avg_estimated_hours", - "overdue_tasks", - "unique_projects", - ] - user_stats = [] - # Calculate metrics for each user. - for user_name in task_df["user_name"].unique(): - user_data = task_df[task_df["user_name"] == user_name] - stats = { - "user_name": user_name, - "user_email": user_data["user_email"].iloc[0], - } - # Add team if available. - if "team" in task_df.columns: - stats["team"] = user_data["team"].iloc[0] - # Calculate each requested metric. - if "total_tasks" in metrics: - stats["total_tasks"] = len(user_data) - if "completed_tasks" in metrics: - stats["completed_tasks"] = user_data["is_completed"].sum() - if "completion_rate" in metrics: - if len(user_data) > 0: - stats["completion_rate"] = ( - stats["completed_tasks"] / len(user_data) - ) * 100 - else: - stats["completion_rate"] = 0.0 - if "total_estimated_hours" in metrics: - stats["total_estimated_hours"] = user_data[ - "estimated_hours" - ].sum() - if "avg_estimated_hours" in metrics: - stats["avg_estimated_hours"] = user_data[ - "estimated_hours" - ].mean() - if "overdue_tasks" in metrics: - stats["overdue_tasks"] = user_data["is_overdue"].sum() - if "unique_projects" in metrics: - projects = user_data["all_projects"].dropna() - unique_projects = set() - for proj_str in projects: - unique_projects.update(proj_str.split(", ")) - stats["unique_projects"] = len(unique_projects) - user_stats.append(stats) - result = pd.DataFrame(user_stats) - return result - - -# ############################################################################# -# Convenience functions -# ############################################################################# - - -def list_workspace_users( - workspace_name: str, *, access_token: Optional[str] = None -) -> List[str]: - """ - Get all usernames in a workspace. - - Convenience function to quickly see all available users in a - workspace. - - :param workspace_name: name of workspace to query - :param access_token: Asana access token - :return: usernames (display names) - """ - # Initialize analytics instance. - analytics_instance = EnhancedAsanaAnalytics(access_token) - # Get workspace GID. - workspace_gid_local = analytics_instance.get_workspace_gid(workspace_name) - # Get team members. - team_members = analytics_instance.get_team_members(workspace_gid_local) - # Extract usernames. - result = [member["name"] for member in team_members] - return result - - -def get_user_by_name( - workspace_name: str, - username: str, - *, - access_token: Optional[str] = None, -) -> Optional[Dict[str, Any]]: - """ - Get a specific user by their name in a workspace. - - Convenience function to find a user without instantiating the class. - - :param workspace_name: name of workspace to search in - :param username: username or partial name to search for - :param access_token: Asana access token - :return: user with 'gid', 'name', and 'email', or None if not found - """ - # Initialize analytics instance. - analytics_instance = EnhancedAsanaAnalytics(access_token) - # Get workspace GID. - workspace_gid_local = analytics_instance.get_workspace_gid(workspace_name) - # Find user. - result = analytics_instance.get_user_by_name(workspace_gid_local, username) - return result - - -def create_kibana_ready_dataset( - workspace_name: str, - start_date: datetime_lib.datetime, - end_date: datetime_lib.datetime, - *, - project_names: Optional[List[str]] = None, - team_mapping: Optional[Dict[str, str]] = None, - access_token: Optional[str] = None, - user_list: Optional[List[str]] = None, - include_comments: bool = False, -) -> Dict[str, pd.DataFrame]: - """ - Create Kibana-ready datasets with all metrics. - - Generate three DataFrames suitable for Kibana visualization: detailed - task-level data, user-level aggregates, and team-level aggregates. - By default, extracts ALL tasks from ALL users and ALL projects. - The 'project' column can be used for filtering in Kibana. - - :param workspace_name: Asana workspace name to analyze - :param start_date: start date for analysis period - :param end_date: end date for analysis period - :param project_names: project names to filter by - (e.g., ["tech-now", "tech-next"]). If None, extract ALL projects. - When provided, also uses project names as team names - :param team_mapping: usernames to team names. - Alternative to project_names. If both are None, uses project as - team - - Example: {"John Doe": "tech-now", "Jane Smith": "tech-next"} - :param access_token: Asana access token. If None, reads from - environment variable ASANA_ACCESS_TOKEN - :param user_list: specific usernames or GIDs to analyze. If - None, analyze ALL team members - :param include_comments: if True, fetch comment/activity data - (default: False). Set to True to include activity metrics - :return: data with three DataFrames: - - 'tasks': detailed task-level data with sprint/section info - - 'users': user-level aggregated metrics - - 'teams': team-level aggregated metrics - """ - _LOG.info("=" * 70) - _LOG.info("STARTING KIBANA DATASET CREATION") - _LOG.info("=" * 70) - _LOG.info("Workspace: %s", workspace_name) - _LOG.info("Date range: %s to %s", start_date.date(), end_date.date()) - _LOG.info("Project filter: %s", project_names if project_names else "ALL") - _LOG.info("User filter: %s", user_list if user_list else "ALL") - _LOG.info("Include comments: %s", include_comments) - - # Initialize analytics instance. - _LOG.info("Initializing Asana Analytics client...") - analytics = EnhancedAsanaAnalytics(access_token) - - # Get workspace GID. - _LOG.info("Resolving workspace GID for: %s", workspace_name) - workspace_gid = analytics.get_workspace_gid(workspace_name) - _LOG.info("Workspace GID resolved: %s", workspace_gid) - - # Create detailed task DataFrame. - _LOG.info("-" * 70) - _LOG.info("STEP 1/3: Creating detailed task DataFrame...") - _LOG.info("-" * 70) - task_df = analytics.create_task_dataframe( - workspace_gid, - user_identifiers=user_list, - project_names=project_names, - start_date=start_date, - end_date=end_date, - team_mapping=team_mapping, - include_comments=include_comments, - ) - _LOG.info("Task DataFrame created with %d rows", len(task_df)) - - # Create user-level comparison DataFrame. - _LOG.info("-" * 70) - _LOG.info("STEP 2/3: Creating user-level aggregates...") - _LOG.info("-" * 70) - user_df = analytics.create_user_comparison_df(task_df) - _LOG.info("User DataFrame created with %d rows", len(user_df)) - - # Create team-level comparison DataFrame. - _LOG.info("-" * 70) - _LOG.info("STEP 3/3: Creating team-level aggregates...") - _LOG.info("-" * 70) - team_df = analytics.create_team_comparison_df(task_df) - _LOG.info("Team DataFrame created with %d rows", len(team_df)) - - _LOG.info("=" * 70) - _LOG.info("DATASET CREATION COMPLETE!") - _LOG.info("=" * 70) - _LOG.info("Summary:") - _LOG.info(" Tasks: %d rows", len(task_df)) - _LOG.info(" Users: %d rows", len(user_df)) - _LOG.info(" Teams: %d rows", len(team_df)) - _LOG.info("=" * 70) - - result = {"tasks": task_df, "users": user_df, "teams": team_df} - return result - - -def save_to_ndjson( - df: pd.DataFrame, filepath: str, index_name: Optional[str] = None -) -> None: - """ - Save DataFrame to NDJSON format for Kibana/OpenSearch bulk upload. - - Convert DataFrame to newline-delimited JSON format suitable for - Elasticsearch/OpenSearch bulk API ingestion. - - :param df: data to save - :param filepath: output file path (e.g., 'asana_tasks.ndjson') - :param index_name: optional index name to include in bulk action - metadata. If None, only document data is written - """ - _LOG.info("Saving DataFrame to NDJSON: %s", filepath) - _LOG.info("DataFrame shape: %d rows, %d columns", len(df), len(df.columns)) - - # Convert DataFrame to records (list of dicts). - records = df.to_dict(orient="records") - - # Open file for writing. - with open(filepath, "w") as f: - for record in records: - # Convert timestamps to ISO format strings. - for key, value in record.items(): - if pd.isna(value): - # Convert NaN/None to null. - record[key] = None - elif isinstance(value, pd.Timestamp): - # Convert pandas Timestamp to ISO string. - record[key] = value.isoformat() - - if index_name: - # Write bulk API metadata line. - action = {"index": {"_index": index_name}} - f.write(json.dumps(action) + "\n") - - # Write document data line. - f.write(json.dumps(record) + "\n") - - _LOG.info("Successfully saved %d records to %s", len(records), filepath) - - -def save_datasets_for_kibana( - datasets: Dict[str, pd.DataFrame], - output_dir: str = ".", - *, - use_ndjson: bool = True, - index_prefix: str = "asana", -) -> Dict[str, str]: - """ - Save all datasets to files for Kibana ingestion. - - Save task, user, and team DataFrames to either NDJSON or CSV format - for Kibana/OpenSearch ingestion. - - :param datasets: dictionary with 'tasks', 'users', 'teams' - DataFrames from create_kibana_ready_dataset() - :param output_dir: directory to save files (default: current - directory) - :param use_ndjson: if True, save as NDJSON format. If False, save as - CSV (default: True) - :param index_prefix: prefix for index names when using NDJSON - (default: 'asana') - :return: dataset names to saved file paths - """ - _LOG.info("=" * 70) - _LOG.info("SAVING DATASETS FOR KIBANA") - _LOG.info("=" * 70) - _LOG.info("Output directory: %s", output_dir) - _LOG.info("Format: %s", "NDJSON" if use_ndjson else "CSV") - - saved_files = {} - extension = "ndjson" if use_ndjson else "csv" - - for dataset_name, df in datasets.items(): - # Construct file path. - filename = "{}_{}_{}.{}".format( - index_prefix, dataset_name, "kibana", extension - ) - filepath = "{}/{}".format(output_dir, filename) - - _LOG.info("Saving %s dataset (%d rows)...", dataset_name, len(df)) - - if use_ndjson: - # Save as NDJSON with index name. - index_name = "{}-{}".format(index_prefix, dataset_name) - save_to_ndjson(df, filepath, index_name=index_name) - else: - # Save as CSV. - df.to_csv(filepath, index=False) - _LOG.info("Saved to CSV: %s", filepath) - - saved_files[dataset_name] = filepath - - _LOG.info("=" * 70) - _LOG.info("ALL DATASETS SAVED!") - _LOG.info("=" * 70) - for dataset_name, filepath in saved_files.items(): - _LOG.info(" %s: %s", dataset_name, filepath) - _LOG.info("=" * 70) - - result = saved_files - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py deleted file mode 100644 index 318897d3e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py +++ /dev/null @@ -1,2110 +0,0 @@ -""" -Import as: - -import helpers.github_utils as hgitutil -""" - -import collections -import datetime -import functools -import itertools -import json -import logging -import os -import time -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple - -import github -import matplotlib.pyplot as plt -import pandas as pd -from tqdm import tqdm - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): Why not using helpers.hcache_simple as hcacsimp. -def github_cached(cache_type: str = "json", write_through: bool = True): - """ - Cache decorator specifically for GitHub API functions. - - Automatically excludes the 'client' parameter (first positional arg) - from cache keys since client instances change across sessions. - - :param cache_type: Type of cache ('json' or 'pickle') - :param write_through: If True, write to disk after each cache update - :return: Decorated function with caching - """ - - def decorator(func: Callable) -> Callable: - # Get function name for cache. - func_name = func.__name__ - if func_name.endswith("_intrinsic"): - func_name = func_name[: -len("_intrinsic")] - # Set cache type property. - existing_type = hcacsimp.get_cache_property(func_name, "type") - if not existing_type: - hcacsimp.set_cache_property(func_name, "type", cache_type) - - # Create a cached version that only uses args after client. - @functools.wraps(func) - def wrapper(client, *args, **kwargs): - # Create cache key from everything EXCEPT client. - cache_key = json.dumps( - {"args": args, "kwargs": kwargs}, - sort_keys=True, - default=str, - ) - # Get cache. - cache = hcacsimp.get_cache(func_name) - # Check if we have cached value. - if cache_key in cache: - _LOG.debug("Cache hit for %s", func_name) - return cache[cache_key] - # Cache miss - call the actual function. - _LOG.debug("Cache miss for %s, fetching from API", func_name) - result = func(client, *args, **kwargs) - # Store in cache - cache[cache_key] = result - # Write to disk if enabled. - if write_through: - hcacsimp.flush_cache_to_disk(func_name) - return result - - return wrapper - - return decorator - - -# ############################################################################# -# GitHubAPI -# ############################################################################# - - -class GitHubAPI: - """ - Initialize and manage authentication with the GitHub API using PyGithub. - """ - - def __init__( - self, - *, - access_token: Optional[str] = None, - base_url: Optional[str] = None, - ): - """ - Initialize the GitHub API client. - - :param access_token: GitHub personal access token; if not provided, it - is fetched from the environment variable `GITHUB_ACCESS_TOKEN` - :param base_url: optional custom GitHub Enterprise base URL - """ - self.access_token = access_token or os.getenv("GITHUB_ACCESS_TOKEN") - if not self.access_token: - raise ValueError( - "GitHub Access Token is required. Set it as an environment variable or pass it explicitly." - ) - auth = github.Auth.Token(self.access_token) - self.github = ( - github.Github(base_url=base_url, auth=auth) - if base_url - else github.Github(auth=auth) - ) - - def get_client(self) -> github.Github: - """ - Return the authenticated GitHub client. - - :return: an instance of the authenticated PyGithub client - """ - return self.github - - def close_connection(self) -> None: - """ - Close the GitHub API connection. - """ - self.github.close() - - -# ############################################################################# -# Utility APIs -# ############################################################################# - - -def get_repo_names(client: github.Github, org_name: str) -> Dict[str, List[str]]: - """ - Retrieve a list of repositories under a specific organization. - - :param client: authenticated instance of the PyGithub client - :param org_name: name of the GitHub organization - :return: a dictionary containing: - - owner: name of the organization - - repositories: repository names - """ - owner = client.get_organization(org_name) - hdbg.dassert_is_not( - owner, - None, - "'%s' is not a valid GitHub organization", - org_name, - ) - repos = [repo.name for repo in owner.get_repos()] - result = {"owner": org_name, "repositories": repos} - return result - - -def get_github_contributors( - client: github.Github, repo_names: List[str] -) -> Dict[str, List[str]]: - """ - Retrieve GitHub usernames contributing to specified repositories. - - :param client: authenticated instance of the PyGithub client - :param repo_names: repository names in the format 'owner/repo' to fetch - contributor usernames - :return: a dictionary containing: - - repository: repository name - - contributors: contributor GitHub usernames - """ - result = {} - for repo_name in repo_names: - repo = client.get_repo(repo_name) - hdbg.dassert_is_not(repo, None, "Could not fetch repo: %s", repo_name) - contributors = [ - contributor.login for contributor in repo.get_contributors() - ] - result[repo_name] = contributors - return result - - -def normalize_period_to_utc( - period: Optional[Tuple[datetime.datetime, datetime.datetime]], -) -> Tuple[Optional[datetime.datetime], Optional[datetime.datetime]]: - """ - Convert a datetime period to UTC and ensure both dates are timezone-aware. - - :param period: start and end datetime - :return: UTC-aware start and end datetime, or (None, None) if period - is None - """ - - def to_utc(dt: Optional[datetime.datetime]) -> Optional[datetime.datetime]: - res = None - if dt is None: - return res - else: - res = ( - dt.replace(tzinfo=datetime.timezone.utc) - if dt.tzinfo is None - else dt.astimezone(datetime.timezone.utc) - ) - return res - - norm = ( - tuple(to_utc(dt) for dt in period) - if period is not None - else (None, None) - ) - return norm - - -# ############################################################################# -# Global Metrics APIs -# ############################################################################# - - -def get_total_commits( - client: github.Github, - org_name: str, - *, - usernames: Optional[List[str]] = None, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, Any]: - """ - Fetch the number of commits made in the repositories of the specified - organization, optionally filtered by GitHub usernames and a specified time - period. - - :param client: authenticated instance of the PyGithub client - :param org_name: name of the GitHub organization - :param usernames: GitHub usernames to filter commits; if None, fetches for - all users - :param period: start and end datetime for filtering commits - :return: a dictionary containing: - - total_commits (int): total number of commits across all repositories - - period (str): the time range considered - - commits_per_repository (Dict[str, int]): repository names as keys and - commit counts as values - """ - # Retrieve organization repositories - repos_info = get_repo_names(client, org_name) - hdbg.dassert_in( - "repositories", - repos_info, - "Missing 'repositories' key in get_repo_names() output", - ) - repositories = repos_info["repositories"] - total_commits = 0 - commits_per_repository = {} - since, until = period if period else (None, None) - for repo_name in tqdm( - repositories, desc="Processing repositories", unit="repo" - ): - repo = client.get_repo(f"{org_name}/{repo_name}") - hdbg.dassert_is_not(repo, None, "Could not retrieve repo: %s", repo_name) - repo_commit_count = 0 - if usernames: - for username in usernames: - commits = repo.get_commits( - author=username, since=since, until=until - ) - hdbg.dassert_is_not( - commits, - None, - "Failed to get commits by '%s' in %s", - username, - repo_name, - ) - repo_commit_count += commits.totalCount - else: - commits = repo.get_commits(since=since, until=until) - hdbg.dassert_is_not( - commits, None, "Failed to get commits in %s", repo_name - ) - repo_commit_count = commits.totalCount - commits_per_repository[repo_name] = repo_commit_count - total_commits += repo_commit_count - result = { - "total_commits": total_commits, - "period": f"{since} to {until}" if since and until else "All time", - "commits_per_repository": commits_per_repository, - } - return result - - -def get_total_prs( - client: github.Github, - org_name: str, - *, - usernames: Optional[List[str]] = None, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, - state: str = "all", -) -> Dict[str, Any]: - """ - Fetch the number of pull requests made in the repositories of the specified - organization, optionally filtered by GitHub usernames, a specified time - period, and the state of the pull requests. - - :param client: authenticated instance of the PyGithub client - :param org_name: name of the GitHub organization - :param usernames: GitHub usernames to filter pull requests; if None, fetches - for all users - :param period: start and end datetime for filtering pull requests - :param state: the state of the pull requests to fetch; can be 'open', 'closed', or 'all' - :return: a dictionary containing: - - total_prs (int): total number of pull requests - - period (str): the time range considered - - prs_per_repository (Dict[str, int]): repository names as keys and pull - request counts as values - """ - # Retrieve repositories for the organization - repos_info = get_repo_names(client, org_name) - hdbg.dassert_in( - "repositories", repos_info, "Missing 'repositories' key in repo info" - ) - repositories = repos_info["repositories"] - total_prs = 0 - prs_per_repository = {} - since, until = normalize_period_to_utc(period) - for repo_name in tqdm( - repositories, desc="Processing repositories", unit="repo" - ): - repo = client.get_repo(f"{org_name}/{repo_name}") - hdbg.dassert_is_not( - repo, None, "Could not retrieve repository: %s", repo_name - ) - repo_pr_count = 0 - pulls = repo.get_pulls(state=state) - for pr in pulls: - hdbg.dassert_is_not( - pr, None, "PR could not be fetched in %s", repo_name - ) - if usernames and pr.user.login not in usernames: - continue - pr_created_at = ( - pr.created_at.replace(tzinfo=datetime.timezone.utc) - if pr.created_at.tzinfo is None - else pr.created_at.astimezone(datetime.timezone.utc) - ) - if since and until and not (since <= pr_created_at <= until): - continue - repo_pr_count += 1 - prs_per_repository[repo_name] = repo_pr_count - total_prs += repo_pr_count - result = { - "total_prs": total_prs, - "period": f"{since} to {until}" if since and until else "All time", - "prs_per_repository": prs_per_repository, - } - return result - - -def get_prs_not_merged( - client: github.Github, - org_name: str, - *, - usernames: Optional[List[str]] = None, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, Any]: - """ - Fetch the count of closed but unmerged pull requests in the specified - repositories and by the specified GitHub users within a given period. - - :param client: authenticated instance of the PyGithub client - :param org_name: name of the GitHub organization - :param usernames: GitHub usernames to filter pull requests; if None, fetches for all users - :param period: start and end datetime for filtering pull requests - :return: a dictionary containing: - - prs_not_merged (int): total number of closed but unmerged pull requests - - period (str): the time range considered - - prs_per_repository (Dict[str, int]): repository names as keys and - unmerged pull request counts as values - """ - # Fetch all repositories in the org. - repos_info = get_repo_names(client, org_name) - hdbg.dassert_in( - "repositories", - repos_info, - "Missing 'repositories' in get_repo_names() output", - ) - repositories = repos_info["repositories"] - total_unmerged_prs = 0 - prs_per_repository = {} - since, until = normalize_period_to_utc(period) - for repo_name in tqdm( - repositories, desc="Processing repositories", unit="repo" - ): - # Fetch repo object. - repo = client.get_repo(f"{org_name}/{repo_name}") - hdbg.dassert_is_not( - repo, - None, - "Could not fetch repo: %s/%s", - org_name, - repo_name, - ) - repo_unmerged_pr_count = 0 - issues = repo.get_issues(state="closed", since=since) - pulls = [] - for issue in issues: - if issue.pull_request: - pull = repo.get_pull(issue.number) - hdbg.dassert_is_not( - pull, - None, - "Could not fetch pull request #%d in %s", - issue.number, - repo_name, - ) - pulls.append(pull) - for pr in pulls: - _LOG.debug("Processing PR #%d from %s", pr.number, repo_name) - pr_created_at = pr.created_at or datetime.datetime.min - pr_created_at = ( - pr_created_at.replace(tzinfo=datetime.timezone.utc) - if pr_created_at.tzinfo is None - else pr_created_at.astimezone(datetime.timezone.utc) - ) - if pr.merged: - continue - if usernames and pr.user.login not in usernames: - continue - if since and until and not (since <= pr_created_at <= until): - continue - repo_unmerged_pr_count += 1 - prs_per_repository[repo_name] = repo_unmerged_pr_count - total_unmerged_prs += repo_unmerged_pr_count - result = { - "prs_not_merged": total_unmerged_prs, - "period": f"{since} to {until}" if since and until else "All time", - "prs_per_repository": prs_per_repository, - } - return result - - -# ############################################################################# -# Individual User Metrics APIs -# ############################################################################# - - -def get_commits_by_user( - client: github.Github, - username: str, - org_name: str, - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, Any]: - """ - Retrieve the number of commits made by a specific GitHub user. - - :param client: authenticated instance of the PyGithub client - :param username: GitHub username to fetch commit data for - :param org_name: name of the GitHub organization - :param period: start and end datetime for filtering commits - :return: a dictionary containing: - - user (str): GitHub username - - total_commits (int): total number of commits made by the user - - period (str): the time range considered - - commits_per_repository (Dict[str, int]): repository names as keys and - commit counts as values - """ - result = get_total_commits( - client=client, org_name=org_name, usernames=[username], period=period - ) - res_dict = { - "user": username, - "total_commits": result["total_commits"], - "period": result["period"], - "commits_per_repository": result["commits_per_repository"], - } - return res_dict - - -def get_prs_by_user( - client: github.Github, - username: str, - org_name: str, - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, - state: str = "all", -) -> Dict[str, Any]: - """ - Fetch the number of pull requests created by a specific GitHub user in the - given repositories and time period. - - :param client: authenticated instance of the PyGithub client - :param username: GitHub username to fetch pull request data for - :param org_name: name of the GitHub organization - :param period: start and end datetime for filtering pull requests - :param state: state of the pull requests to fetch; can be 'open', 'closed', - or 'all' - :return: a dictionary containing: - - user (str): GitHub username - - total_prs (int): total number of pull requests created - - period (str): the time range considered - - prs_per_repository (Dict[str, int]): repository names as keys and pull - request counts as values - """ - result = get_total_prs( - client=client, - org_name=org_name, - usernames=[username], - period=period, - state=state, - ) - res_dict = { - "user": username, - "total_prs": result["total_prs"], - "period": result["period"], - "prs_per_repository": result["prs_per_repository"], - } - return res_dict - - -def get_prs_not_merged_by_user( - client: github.Github, - username: str, - org_name: str, - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, Any]: - """ - Fetch the number of closed but unmerged pull requests created by a specific - GitHub user in the given repositories and time period. - - :param client: authenticated instance of the PyGithub client - :param username: GitHub username to fetch unmerged pull request data for - :param org_name: name of the GitHub organization - :param period: start and end datetime for filtering pull requests - :return: a dictionary containing: - - user (str): GitHub username - - prs_not_merged (int): total number of closed but unmerged pull requests - - period (str): the time range considered - - prs_per_repository (Dict[str, int]): repository names as keys and - unmerged PR counts as values - """ - result = get_prs_not_merged( - client=client, org_name=org_name, usernames=[username], period=period - ) - res_dict = { - "user": username, - "prs_not_merged": result["prs_not_merged"], - "period": result["period"], - "prs_per_repository": result["prs_per_repository"], - } - return res_dict - - -def days_between( - period: Tuple[datetime.datetime, datetime.datetime], -) -> List[datetime.date]: - """ - Generate each date in time span. - - :param period: start and end datetime - :return: date span - """ - start_date = period[0].date() - end_date = period[1].date() - days: List[datetime.date] = [] - current = start_date - while current <= end_date: - days.append(current) - current += datetime.timedelta(days=1) - _LOG.debug("Generated %d days in period.", len(days)) - return days - - -@github_cached(cache_type="json", write_through=True) -def get_commit_datetimes_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: Optional[str], - since: datetime.datetime, - until: datetime.datetime, -) -> List[str]: - """ - Fetch commit timestamps for user in repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: commit timestamps in ISO format - """ - timestamps: List[str] = [] - # Fetch the repository object. - repo_obj = client.get_repo(f"{org}/{repo}") - # Retrieve all commits in the specified time period. - commits = repo_obj.get_commits(since=since, until=until) - # Iterate through each commit to find ones by the specified user. - for c in commits: - # Skip commits with incomplete metadata. - if not c.commit or not c.commit.author or not c.commit.author.date: - continue - # Extract author and committer logins. - author_login = c.author.login if c.author else None - committer_login = c.committer.login if c.committer else None - # Check if this commit belongs to the target user. - if username in (author_login, committer_login): - # Convert commit date to UTC timezone. - dt = c.commit.author.date - dt_utc = ( - dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) - ) - # Add timestamp to results list. - timestamps.append(dt_utc.isoformat()) - # Log the results summary. - if not timestamps: - _LOG.info( - "No commits found for %s/%s user=%s in %s to %s - possibly outdated or inactive.", - org, - repo, - username, - since.date(), - until.date(), - ) - else: - _LOG.info( - "Fetched %d commits for %s/%s user=%s.", - len(timestamps), - org, - repo, - username, - ) - return timestamps - - -@github_cached(cache_type="json", write_through=True) -def get_pr_datetimes_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: str, - since: datetime.datetime, - until: datetime.datetime, -) -> List[str]: - """ - Fetch pull request timestamps for user in repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: PR created timestamps in ISO format - """ - timestamps: List[str] = [] - # Format dates for GitHub search query. - since_date = since.date().isoformat() - until_date = until.date().isoformat() - # Build search query for PRs authored by the user. - query = f"repo:{org}/{repo} is:pr author:{username} created:{since_date}..{until_date}" - # Execute the search query. - results = client.search_issues(query) - # Process each PR from search results. - for issue in results: - # Convert PR creation date to UTC timezone. - dt = issue.created_at - dt_utc = dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) - # Add timestamp to results list. - timestamps.append(dt_utc.isoformat()) - # Log the results summary. - if not timestamps: - _LOG.debug( - "No PRs found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", - org, - repo, - username, - since_date, - until_date, - ) - else: - _LOG.info( - "Found %d PRs for %s/%s user=%s.", - len(timestamps), - org, - repo, - username, - ) - return timestamps - - -@github_cached(cache_type="json", write_through=True) -def get_issue_datetimes_by_repo_intrinsic( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> Dict[str, List[str]]: - """ - Fetch opened and closed issue timestamps for a user in a repo over a given - period. - - :param client: authenticated PyGithub client - :param org: GitHub organization name - :param repo: repository name - :param username: GitHub username - :param period: time window to filter issues - :return: 'assigned' and 'closed' issues containing ISO timestamps - """ - # Extract and format the time period. - since_date = period[0].date().isoformat() - until_date = period[1].date().isoformat() - # Build search query for issues assigned to the user. - query = ( - f"repo:{org}/{repo} type:issue assignee:{username} " - f"created:{since_date}..{until_date}" - ) - # Execute the search query. - issues = client.search_issues(query) - # Initialize lists for assigned and closed issues. - assigned: List[str] = [] - closed: List[str] = [] - # Process each issue from search results. - for issue in issues: - # Skip pull requests that appear in issue search. - if issue.pull_request is not None: - continue - # Add issue creation timestamp to assigned list. - assigned.append(issue.created_at.isoformat()) - # Check if issue was closed within the period. - if issue.closed_at: - # Convert closed date to UTC timezone. - closed_dt = issue.closed_at - dt_utc = ( - closed_dt - if closed_dt.tzinfo - else closed_dt.replace(tzinfo=datetime.timezone.utc) - ) - # Add to closed list if within the specified period. - if period[0] <= dt_utc <= period[1]: - closed.append(dt_utc.isoformat()) - # Log the results summary. - _LOG.info( - "Found %d opened and %d closed issues for %s/%s user=%s", - len(assigned), - len(closed), - org, - repo, - username, - ) - # Return the results dictionary. - result_dict = {"assigned": assigned, "closed": closed} - return result_dict - - -@github_cached(cache_type="json", write_through=True) -def get_loc_stats_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: str, - since: datetime.datetime, - until: datetime.datetime, -) -> List[Dict[str, int]]: - """ - Fetch commit LOC stats for user in repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: additions, deletions in code - """ - stats_list: List[Dict[str, int]] = [] - # Fetch the repository object. - repo_obj = client.get_repo(f"{org}/{repo}") - # Retrieve all commits in the specified time period. - commits = repo_obj.get_commits(since=since, until=until) - # Track number of commits processed for safety limit. - commit_count = 0 - # Process each commit to extract LOC statistics. - for c in commits: - # Extract author and committer logins. - author_login = c.author.login if c.author else None - committer_login = c.committer.login if c.committer else None - # Skip commits not by the target user. - if username not in (author_login, committer_login): - continue - # Fetch commit statistics. - s = c.stats - # Skip if statistics are not available. - if s is None: - _LOG.debug("No stats available for commit %s", c.sha) - continue - # Convert commit date to UTC timezone. - dt = c.commit.author.date - dt_utc = dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) - iso = dt_utc.date().isoformat() - # Add statistics to results list. - stats_list.append( - {"date": iso, "additions": s.additions, "deletions": s.deletions} - ) - # Increment commit counter and check safety limit. - commit_count += 1 - if commit_count > 1000: - _LOG.warning("Processed 1000 commits, stopping to avoid timeout") - break - # Log the results summary. - if not stats_list: - _LOG.info( - "No LOC stats found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", - org, - repo, - username, - since.date(), - until.date(), - ) - else: - _LOG.info( - "Fetched LOC stats for %s/%s user=%s entries=%d.", - org, - repo, - username, - len(stats_list), - ) - return stats_list - - -@github_cached(cache_type="json", write_through=True) -def get_issue_comment_datetimes_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: str, - since: datetime.datetime, - until: datetime.datetime, -) -> List[str]: - """ - Fetch issue comment timestamps for user in repo over period using search - API. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: comment timestamps in ISO format - """ - timestamps: List[str] = [] - # Format dates for GitHub search query. - since_date = since.date().isoformat() - until_date = until.date().isoformat() - # Build search query for issues where user has commented. - query = f"repo:{org}/{repo} is:issue commenter:{username} updated:{since_date}..{until_date}" - # Execute the search query. - results = client.search_issues(query) - # Process each issue to find user's comments. - for issue in results: - # Skip pull requests that appear in issue search. - if issue.pull_request: - continue - # Fetch all comments for this issue. - comments = issue.get_comments() - # Filter comments by the target user. - for comment in comments: - # Skip comments by other users. - if comment.user.login != username: - continue - # Convert comment date to UTC timezone. - comment_dt = comment.created_at - comment_dt_utc = ( - comment_dt - if comment_dt.tzinfo - else comment_dt.replace(tzinfo=datetime.timezone.utc) - ) - # Add timestamp if within the specified period. - if since <= comment_dt_utc <= until: - timestamps.append(comment_dt_utc.isoformat()) - # Log the results summary. - if not timestamps: - _LOG.info( - "No issue comments found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", - org, - repo, - username, - since.date(), - until.date(), - ) - else: - _LOG.info( - "Fetched %d issue comments for %s/%s user=%s.", - len(timestamps), - org, - repo, - username, - ) - return timestamps - - -@github_cached(cache_type="json", write_through=True) -def get_pr_review_datetimes_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: str, - since: datetime.datetime, - until: datetime.datetime, -) -> List[str]: - """ - Fetch PR review timestamps for user in repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: review timestamps in ISO format - """ - timestamps: List[str] = [] - # Fetch the repository object. - repo_obj = client.get_repo(f"{org}/{repo}") - # Format dates for GitHub search query. - since_date = since.date().isoformat() - until_date = until.date().isoformat() - # Build search query for PRs reviewed by the user. - query = f"repo:{org}/{repo} is:pr reviewed-by:{username} updated:{since_date}..{until_date}" - # Execute the search query. - results = client.search_issues(query) - # Process each PR to find user's reviews. - for issue in results: - # Fetch the full PR object. - pr = repo_obj.get_pull(issue.number) - # Fetch all reviews for this PR. - reviews = pr.get_reviews() - # Filter reviews by the target user. - for review in reviews: - # Skip reviews by other users. - if review.user.login != username: - continue - # Convert review date to UTC timezone. - review_dt = review.submitted_at - review_dt_utc = ( - review_dt - if review_dt.tzinfo - else review_dt.replace(tzinfo=datetime.timezone.utc) - ) - # Add timestamp if within the specified period. - if since <= review_dt_utc <= until: - timestamps.append(review_dt_utc.isoformat()) - # Log the results summary. - if not timestamps: - _LOG.info( - "No PR reviews found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", - org, - repo, - username, - since.date(), - until.date(), - ) - else: - _LOG.info( - "Fetched %d PR reviews for %s/%s user=%s.", - len(timestamps), - org, - repo, - username, - ) - return timestamps - - -def build_daily_commit_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily commit counts for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, commits, repo, user - """ - since, until = period - timestamps = get_commit_datetimes_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) - df["date"] = df.ts.dt.date - daily = df.groupby("date").size().reset_index(name="commits") - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - daily["commits"] = daily["commits"].fillna(0).astype(int) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily commit DataFrame rows=%d.", len(daily)) - return daily - - -def slice_by_date(df, start, end, date_col="date"): - """ - Slice DataFrame by date range. - - :param df: input DataFrame - :param start: start date (inclusive) - :param end: end date (inclusive) - :param date_col: name of the date column in df - :return: filtered DataFrame - """ - out = df.copy() - out[date_col] = pd.to_datetime(out[date_col], errors="coerce") - res = out.loc[(out[date_col] >= start) & (out[date_col] <= end)].copy() - return res - - -def build_daily_issue_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily assigned / closed issue counts for a user-repo pair. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with columns date, issues_assigned, issues_closed, - repo, user - """ - issue_data = get_issue_datetimes_by_repo_intrinsic( - client, org, repo, username, period - ) - df_assigned = pd.DataFrame( - {"ts": pd.to_datetime(issue_data["assigned"]), "issues_assigned": 1} - ) - df_assigned["date"] = df_assigned.ts.dt.date - df_closed = pd.DataFrame( - {"ts": pd.to_datetime(issue_data["closed"]), "issues_closed": 1} - ) - df_closed["date"] = df_closed.ts.dt.date - # Daily counts. - daily_assigned = ( - df_assigned.groupby("date")["issues_assigned"].sum().reset_index() - ) - daily_closed = df_closed.groupby("date")["issues_closed"].sum().reset_index() - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily_assigned, on="date", how="left").merge( - daily_closed, on="date", how="left" - ) - daily[["issues_assigned", "issues_closed"]] = ( - daily[["issues_assigned", "issues_closed"]].fillna(0).astype(int) - ) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily issue DataFrame rows=%d.", len(daily)) - return daily - - -def build_daily_pr_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily PR counts for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, prs, repo, user - """ - since, until = period - timestamps = get_pr_datetimes_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) - df["date"] = df.ts.dt.date - daily = df.groupby("date").size().reset_index(name="prs") - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - daily["prs"] = daily["prs"].fillna(0).astype(int) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily PR DataFrame rows=%d.", len(daily)) - return daily - - -def build_daily_loc_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily LOC additions and deletions for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, additions, deletions, repo, user - """ - since, until = period - # Fetch raw LOC stats list. - stats_list = get_loc_stats_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - # If no stats, return zeros for full range. - if not stats_list: - all_days = pd.DataFrame({"date": days_between(period)}) - # Initialize zeroes. - all_days["additions"] = all_days["date"].apply(lambda _: 0) - all_days["deletions"] = all_days["date"].apply(lambda _: 0) - # Format signs. - all_days["additions"] = ( - all_days["additions"].astype(str).apply(lambda x: "+" + x) - ) - all_days["deletions"] = ( - all_days["deletions"].astype(str).apply(lambda x: "-" + x) - ) - # Add context. - all_days["repo"] = repo - all_days["user"] = username - # TODO(*): Logging-248: Use `_LOG.debug()` instead of `_LOG.info()` for tracing execution. - _LOG.debug("Built daily LOC DataFrame rows=%d (no data).", len(all_days)) - return all_days - # Otherwise build from stats_list. - df = pd.DataFrame(stats_list) - df["date"] = pd.to_datetime(df["date"]).dt.date - # Sum per date. - daily = df.groupby("date")[["additions", "deletions"]].sum().reset_index() - # Ensure full date coverage. - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - # Fill missing and integerize. - daily[["additions", "deletions"]] = ( - daily[["additions", "deletions"]].fillna(0).astype(int) - ) - # Apply sign formatting. - daily["additions"] = daily["additions"].astype(str).apply(lambda x: "+" + x) - daily["deletions"] = daily["deletions"].astype(str).apply(lambda x: "-" + x) - # Add context. - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily LOC DataFrame rows=%d.", len(daily)) - return daily - - -def get_total_loc_for_period( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> Dict[str, int]: - """ - Get total LOC additions and deletions for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: additions and deletions totals - """ - since, until = period - stats = get_loc_stats_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - total_add = sum(item["additions"] for item in stats) - total_del = sum(item["deletions"] for item in stats) - _LOG.info( - "Total LOC for %s/%s user=%s => +%d -%d.", - org, - repo, - username, - total_add, - total_del, - ) - return {"additions": total_add, "deletions": total_del} - - -def prefetch_periodic_user_repo_data( - client, - org: str, - repos: List[str], - users: List[str], - period: Tuple[datetime.datetime, datetime.datetime], -) -> None: - """ - Prefetch and cache commits, PRs, LOC, issues, comments, and reviews for - each user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repos: repository names - :param users: GitHub usernames - :param period: start and end datetime objects - """ - # Validate input types. - if not isinstance(org, str): - raise ValueError(f"org must be a string, got {type(org).__name__}") - if not isinstance(repos, list) or not all(isinstance(r, str) for r in repos): - raise ValueError("repos must be a list of strings") - if not isinstance(users, list) or not all(isinstance(u, str) for u in users): - raise ValueError("users must be a list of strings") - # Initialize timer and pair up (repo, user) combinations. - start = time.time() - count = 0 - since, until = period - user_repo_pairs = list(itertools.product(repos, users)) - # Prefetch and cache GitHub data for each user-repo pair. - for repo, user in tqdm(user_repo_pairs, desc="Prefetching user-repo data"): - commits = get_commit_datetimes_by_repo_period_intrinsic( - client, org, repo, user, since, until - ) - prs = get_pr_datetimes_by_repo_period_intrinsic( - client, org, repo, user, since, until - ) - locs = get_loc_stats_by_repo_period_intrinsic( - client, org, repo, user, since, until - ) - issues = get_issue_datetimes_by_repo_intrinsic( - client, org, repo, user, period - ) - # issue_comments = get_issue_comment_datetimes_by_repo_period_intrinsic( - # client, org, repo, user, since, until - # ) - # pr_reviews = get_pr_review_datetimes_by_repo_period_intrinsic( - # client, org, repo, user, since, until - # ) - issue_comments = [] - pr_reviews = [] - _LOG.info( - "%s/%s: %d commits, %d PRs, %d LOC entries, %d issues assigned, " - "%d closed, %d issue comments, %d PR reviews", - repo, - user, - len(commits), - len(prs), - len(locs), - len(issues["assigned"]), - len(issues["closed"]), - len(issue_comments), - len(pr_reviews), - ) - count += 1 - # Report overall prefetch duration. - elapsed = time.time() - start - _LOG.info( - "Prefetched %d user-repo combos in %.2f seconds for period %s to %s.", - count, - elapsed, - period[0], - period[1], - ) - - -def build_daily_issue_comment_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily issue comment counts for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, issue_comments, repo, user - """ - since, until = period - timestamps = get_issue_comment_datetimes_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) - df["date"] = df.ts.dt.date - daily = df.groupby("date").size().reset_index(name="issue_comments") - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - daily["issue_comments"] = daily["issue_comments"].fillna(0).astype(int) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily issue comment DataFrame rows=%d.", len(daily)) - return daily - - -def build_daily_pr_review_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily PR review counts for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, pr_reviews, repo, user - """ - since, until = period - timestamps = get_pr_review_datetimes_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) - df["date"] = df.ts.dt.date - daily = df.groupby("date").size().reset_index(name="pr_reviews") - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - daily["pr_reviews"] = daily["pr_reviews"].fillna(0).astype(int) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily PR review DataFrame rows=%d.", len(daily)) - return daily - - -def collect_all_metrics( - client, - org: str, - repos: List[str], - users: List[str], - period: Tuple[datetime.datetime, datetime.datetime], - *, - skip_issue_comments: bool = True, - skip_pr_reviews: bool = True, -) -> pd.DataFrame: - """ - Collect daily metrics for all user-repo combinations. - - :param client: authenticated PyGithub client - :param org: Github org name - :param repos: repository names - :param users: github usernames - :param period: start and end datetime - :param skip_issue_comments: skip fetching issue comments (for speed) - :param skip_pr_reviews: skip fetching PR reviews (for speed) - :return: concatenated data with date, commits, prs, additions, - deletions, issues_assigned, issues_closed, issue_comments - (optional), pr_reviews (optional), repo, user - """ - combined_frames: List[pd.DataFrame] = [] - # Track progress. - total_combinations = len(repos) * len(users) - current = 0 - for repo in repos: - # Ensure repo is a string. - if not isinstance(repo, str): - raise ValueError(f"Expected repo to be a string but got {repo!r}") - for user in users: - # Ensure user is a string. - if not isinstance(user, str): - raise ValueError( - f"Expected user to be a string but got {user!r}" - ) - current += 1 - _LOG.info( - "Processing %d/%d: %s/%s", - current, - total_combinations, - repo, - user, - ) - # Build each metric DataFrame. - df_c = build_daily_commit_df(client, org, repo, user, period) - df_p = build_daily_pr_df(client, org, repo, user, period) - df_l = build_daily_loc_df(client, org, repo, user, period) - df_i = build_daily_issue_df(client, org, repo, user, period) - # Start merging with required metrics. - df = ( - df_c.merge(df_p, on=["date", "repo", "user"], how="inner") - .merge(df_l, on=["date", "repo", "user"], how="inner") - .merge(df_i, on=["date", "repo", "user"], how="inner") - ) - # Optionally add issue comments. - if not skip_issue_comments: - df_ic = build_daily_issue_comment_df( - client, org, repo, user, period - ) - df = df.merge(df_ic, on=["date", "repo", "user"], how="inner") - else: - # Add dummy column for consistency. - df["issue_comments"] = 0 - # Optionally add PR reviews. - if not skip_pr_reviews: - df_pr = build_daily_pr_review_df(client, org, repo, user, period) - df = df.merge(df_pr, on=["date", "repo", "user"], how="inner") - else: - # Add dummy column for consistency. - df["pr_reviews"] = 0 - combined_frames.append(df) - # Concatenate all DataFrames or return empty. - combined = ( - pd.concat(combined_frames, ignore_index=True) - if combined_frames - else pd.DataFrame() - ) - _LOG.info("Collected metrics for %d daily records", len(combined)) - return combined - - -def summarize_user_metrics_for_repo( - combined: pd.DataFrame, repo: str -) -> pd.DataFrame: - """ - Summarize total commits, PRs, LOC, issues, comments, and reviews per user - in a specific repository. - - :param combined: data with all metrics - :param repo: repository name - :return: data with columns user, commits, prs, additions, deletions, - issues_assigned, issues_closed, issue_comments, pr_reviews - """ - df = combined[combined["repo"] == repo].copy() - df["additions"] = df["additions"].str.replace("+", "").astype(int) - df["deletions"] = df["deletions"].str.replace("-", "").astype(int) - df["issues_assigned"] = df["issues_assigned"].astype(int) - df["issues_closed"] = df["issues_closed"].astype(int) - # df["issue_comments"] = df["issue_comments"].astype(int) - # df["pr_reviews"] = df["pr_reviews"].astype(int) - summary = ( - df.groupby("user") - .agg( - commits=pd.NamedAgg(column="commits", aggfunc="sum"), - prs=pd.NamedAgg(column="prs", aggfunc="sum"), - additions=pd.NamedAgg(column="additions", aggfunc="sum"), - deletions=pd.NamedAgg(column="deletions", aggfunc="sum"), - issues_assigned=pd.NamedAgg(column="issues_assigned", aggfunc="sum"), - issues_closed=pd.NamedAgg(column="issues_closed", aggfunc="sum"), - # issue_comments=pd.NamedAgg(column="issue_comments", aggfunc="sum"), - # pr_reviews=pd.NamedAgg(column="pr_reviews", aggfunc="sum"), - ) - .reset_index() - ) - return summary - - -def summarize_repo_metrics_for_user( - combined: pd.DataFrame, user: str -) -> pd.DataFrame: - """ - Summarize total commits, PRs, LOC, issues, comments, and reviews per repo - for a user. - - :param combined: data with all metrics - :param user: GitHub username - :return: columns repo, commits, prs, additions, deletions, - issues_assigned, issues_closed, issue_comments, pr_reviews - """ - df = combined[combined["user"] == user].copy() - df["additions"] = df["additions"].str.replace("+", "").astype(int) - df["deletions"] = df["deletions"].str.replace("-", "").astype(int) - df["issue_comments"] = df["issue_comments"].astype(int) - df["pr_reviews"] = df["pr_reviews"].astype(int) - summary = ( - df.groupby("repo") - .agg( - commits=pd.NamedAgg(column="commits", aggfunc="sum"), - prs=pd.NamedAgg(column="prs", aggfunc="sum"), - additions=pd.NamedAgg(column="additions", aggfunc="sum"), - deletions=pd.NamedAgg(column="deletions", aggfunc="sum"), - issues_assigned=pd.NamedAgg(column="issues_assigned", aggfunc="sum"), - issues_closed=pd.NamedAgg(column="issues_closed", aggfunc="sum"), - issue_comments=pd.NamedAgg(column="issue_comments", aggfunc="sum"), - pr_reviews=pd.NamedAgg(column="pr_reviews", aggfunc="sum"), - ) - .reset_index() - ) - return summary - - -def summarize_users_across_repos( - combined: pd.DataFrame, - users: List[str], - repos: List[str], -) -> pd.DataFrame: - """ - Aggregate commit / PR / LOC / issue / comment / review totals per-user - across a repo subset. - - :param combined: output of `collect_all_metrics` - :param users: GitHub usernames - :param repos: repository names - :return: data with columns user, commits, prs, additions, deletions, - issues_assigned, issues_closed, issue_comments, pr_reviews - """ - # Filter to requested slice. - df = combined[ - combined["user"].isin(users) & combined["repo"].isin(repos) - ].copy() - # Normalise numeric columns. - df["additions"] = df["additions"].str.replace("+", "").astype(int) - df["deletions"] = df["deletions"].str.replace("-", "").astype(int) - df["issue_comments"] = df["issue_comments"].astype(int) - df["pr_reviews"] = df["pr_reviews"].astype(int) - df.rename( - columns={ - "issues_assigned": "issues_assigned", - "issues_closed": "issues_closed", - }, - inplace=True, - errors="ignore", - ) - # Aggregate across repos. - summary = ( - df.groupby("user") - .agg( - commits=("commits", "sum"), - prs=("prs", "sum"), - additions=("additions", "sum"), - deletions=("deletions", "sum"), - issues_assigned=("issues_assigned", "sum"), - issues_closed=("issues_closed", "sum"), - issue_comments=("issue_comments", "sum"), - pr_reviews=("pr_reviews", "sum"), - ) - .reset_index() - ) - return summary - - -def _filter_period( - df: pd.DataFrame, - *, - start: Optional[datetime.datetime] = None, - end: Optional[datetime.datetime] = None, -) -> pd.DataFrame: - """ - Slice a DataFrame by date using optional start and end boundaries. - - :param df: data with a 'date' column - :param start: start datetime (inclusive) - :param end: end datetime (inclusive) - :return: filtered data such that start ≤ date ≤ end - """ - if not pd.api.types.is_datetime64_any_dtype(df["date"]): - df = df.copy() - df["date"] = pd.to_datetime(df["date"]) - if start is not None: - df = df[df["date"] >= start] - if end is not None: - df = df[df["date"] <= end] - return df - - -def _plot_grouped_bars( - summary: pd.DataFrame, - index_col: str, - title: str, - *, - metrics: Optional[List[str]] = None, -) -> None: - """ - Internal helper to render grouped bar plots. - - :param summary: data with one row per category (user or repo), and - one column per metric - :param index_col: column name(e.g., "user" or "repo") - :param metrics: subset of metrics to plot (e.g., ["commits", "prs"]) - :param title: chart title - """ - # Validate and prepare the list of metrics to plot. - default_metrics = [ - "commits", - "prs", - "additions", - "deletions", - "issues_assigned", - "issues_closed", - "issue_comments", - "pr_reviews", - ] - to_plot = metrics if metrics else default_metrics - for m in to_plot: - if m not in default_metrics: - raise ValueError(f"Unsupported metric '{m}'") - # Filter to only metrics that exist in the summary. - to_plot = [m for m in to_plot if m in summary.columns] - # Compute layout parameters. - categories = summary[index_col].tolist() - x = range(len(to_plot)) - n_cat = len(categories) - width = 0.8 / n_cat if n_cat else 0.8 - # Plot bars for each category (user or repo). - fig_width = max(12, len(to_plot) * 1.5) - fig, ax = plt.subplots(figsize=(fig_width, 5)) - for idx, cat in enumerate(categories): - values = ( - summary.loc[summary[index_col] == cat, to_plot].astype(int).iloc[0] - ) - pos = [i + idx * width for i in x] - bars = ax.bar(pos, values, width=width, label=str(cat)) - for b in bars: - ax.text( - b.get_x() + b.get_width() / 2, - b.get_height(), - str(int(b.get_height())), - ha="center", - va="bottom", - fontsize=8, - ) - # Finalize plot aesthetics. - ax.set_xticks([i + width * (n_cat - 1) / 2 for i in x]) - ax.set_xticklabels( - [m.replace("_", " ").title() for m in to_plot], rotation=45, ha="right" - ) - ax.set_ylabel("Count") - ax.set_title(title) - ax.legend(title=index_col.replace("_", " ").title()) - plt.tight_layout() - plt.show() - - -def plot_metrics_by_user( - combined: pd.DataFrame, - repo: str, - *, - start: Optional[datetime.datetime] = None, - end: Optional[datetime.datetime] = None, - users: Optional[List[str]] = None, - metrics: Optional[List[str]] = None, -) -> None: - """ - Plot selected metrics for users in one repo. - - :param combined: output from `collect_all_metrics` - :param repo: repository name - :param start: start datetime (inclusive) - :param end: end datetime (inclusive) - :param users: optional subset of GitHub usernames to show - :param metrics: list of metrics to plot; defaults to all numeric columns - :return: grouped bar chart where each group = metric, each bar = user - """ - df_period = _filter_period(df=combined, start=start, end=end) - summary = summarize_user_metrics_for_repo(df_period, repo) - if users is not None: - summary = summary[summary["user"].isin(users)] - _plot_grouped_bars( - summary, - index_col="user", - metrics=metrics, - title=f"Metric comparison for {repo} " - f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})", - ) - - -def plot_metrics_by_repo( - combined: pd.DataFrame, - user: str, - *, - start: Optional[datetime.datetime] = None, - end: Optional[datetime.datetime] = None, - repos: Optional[List[str]] = None, - metrics: Optional[List[str]] = None, -) -> None: - """ - Plot specified metrics for repos for a single user as grouped bar chart. - - :param combined: data from `collect_all_metrics` - :param user: GitHub username - :param start: start datetime (inclusive) - :param end: end datetime (inclusive) - :param repos: repos to include - :param metrics: metrics to plot; defaults to all numeric columns - :return: grouped bar chart where each group = metric, each bar = repo - """ - df_period = _filter_period(df=combined, start=start, end=end) - summary = summarize_repo_metrics_for_user(df_period, user) - if repos is not None: - summary = summary[summary["repo"].isin(repos)] - _plot_grouped_bars( - summary, - index_col="repo", - metrics=metrics, - title=f"Metric comparison for {user} " - f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})", - ) - - -def plot_multi_metrics_totals_by_user( - combined: pd.DataFrame, - metrics: List[str], - *, - start: Optional[datetime.datetime] = None, - end: Optional[datetime.datetime] = None, - users: Optional[List[str]] = None, - repos: Optional[List[str]] = None, -) -> None: - """ - Plot multiple metrics (summed across repos) per user as grouped bars. - - :param combined: data from `collect_all_metrics` - :param metrics: metrics to plot, e.g. ["commits", "prs", "additions"] - :param start: start datetime (inclusive) - :param end: end datetime (inclusive) - :param users: users to include - :param repos: repos to include - :return: grouped bar chart where each group = user, each bar = one metric - """ - df_period = _filter_period(df=combined, start=start, end=end) - # Aggregate totals for each user across the selected repos. - summary = summarize_users_across_repos( - df_period, - users or df_period["user"].unique().tolist(), - repos or df_period["repo"].unique().tolist(), - ) - if users is not None: - summary = summary[summary["user"].isin(users)] - # Validate metrics exist. - for metric in metrics: - if metric not in summary.columns: - raise ValueError(f"Metric '{metric}' not found in summary columns") - # Set up bar positions and sizing. - users_sorted = summary["user"].tolist() - x = range(len(users_sorted)) - width = 0.8 / len(metrics) if metrics else 0.8 - fig_width = max(10, len(users_sorted) * 0.7) - fig, ax = plt.subplots(figsize=(fig_width, 5)) - # Draw bars for each metric across users - for i, metric in enumerate(metrics): - offsets = [pos + i * width for pos in x] - values = ( - summary.set_index("user") - .loc[users_sorted, metric] - .astype(int) - .tolist() - ) - bars = ax.bar( - offsets, values, width=width, label=metric.replace("_", " ").title() - ) - for bar in bars: - ax.text( - bar.get_x() + bar.get_width() / 2, - bar.get_height(), - str(int(bar.get_height())), - ha="center", - va="bottom", - fontsize=8, - ) - # Final plot styling. - ax.set_xticks([pos + width * (len(metrics) - 1) / 2 for pos in x]) - ax.set_xticklabels(users_sorted, rotation=15, ha="right") - ax.set_ylabel("Total count across repos") - ax.set_title( - f"Metric totals across repos by user " - f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})" - ) - ax.legend() - plt.tight_layout() - plt.show() - - -def get_contributors_for_repo( - client, - org: str, - repo: str, - *, - top_n: Optional[int] = None, -) -> List[str]: - """ - Fetch GitHub usernames of contributors to a repository. - - :param client: authenticated PyGithub client - :param org: GitHub organization name - :param repo: repository name - :param top_n: if specified, return only the top N contributors by - commit count - :return: GitHub usernames - """ - repo_obj = client.get_repo(f"{org}/{repo}") - contributors = repo_obj.get_contributors() - usernames = list() - for idx, user in enumerate(contributors): - if top_n and idx >= top_n: - break - usernames.append(user.login) - _LOG.info("Fetched %d contributors for %s/%s", len(usernames), org, repo) - return usernames - - -def utc_period( - start: str, end: str -) -> Tuple[datetime.datetime, datetime.datetime]: - """ - Construct a UTC datetime period from string inputs. - - :param start: start date e.g. '2025-01-01' - :param end: end date e.g. '2025-05-24' - """ - date = ( - datetime.datetime.fromisoformat(start).replace( - tzinfo=datetime.timezone.utc - ), - datetime.datetime.fromisoformat(end).replace( - tzinfo=datetime.timezone.utc - ), - ) - return date - - -def slice_period( - df: pd.DataFrame, - start: datetime.date, - end: datetime.date, -) -> pd.DataFrame: - """ - Filter a DataFrame by date range. - - :param df: data with a `date` column of type `datetime.date` - :param start: start date for the filtering window - :param end: end date for the filtering window - :return: filtered data within the specified date range - """ - req_period = df[(df["date"] >= start) & (df["date"] <= end)] - return req_period - - -def compute_z_scores(summary: pd.DataFrame, metrics: List[str]) -> pd.DataFrame: - """ - Compute z-score (standardized score) for specified metrics across users. - - This helps assess how far a user's metric is from the group mean in units - of standard deviation. - - :param summary: data with users and raw metric values - :param metrics: metric column names to compute z-scores for - :return: data with added z-score columns suffixed with `_z` - """ - z_df = summary.copy() - for metric in metrics: - mean = z_df[metric].mean() - std = z_df[metric].std() - z_df[metric + "_z"] = (z_df[metric] - mean) / std - return z_df - - -def compute_percentile_ranks( - summary: pd.DataFrame, metrics: List[str] -) -> pd.DataFrame: - """ - Compute percentile rank for each user for the specified metrics. - - Percentile rank reflects the relative standing of a user compared to the - group. For example, a percentile of 0.8 means the user is ahead of 80% - of the group for that metric. - - :param summary: data with users and raw metric values - :param metrics: metric column names - :return: data with added percentile columns suffixed with `_pctile` - """ - perc_df = summary.copy() - for metric in metrics: - perc_df[metric + "_pctile"] = perc_df[metric].rank(pct=True) - return perc_df - - -def visualize_user_metric_comparison( - stats: pd.DataFrame, - *, - score_type: Literal["z", "percentile"] = "z", - top_n: Optional[int] = None, -) -> None: - """ - Visualize user performance across all available metrics using z-scores or - percentiles. - - :param stats: data with user metrics and their standardized scores - :param score_type: "z" for z-scores or "percentile" for relative - percentiles - :param top_n: number of top users to show in leaderboard bar chart - """ - suffix = "_z" if score_type == "z" else "_pctile" - score_cols = [col for col in stats.columns if col.endswith(suffix)] - if not score_cols: - raise ValueError( - f"No columns ending with '{suffix}' found in input DataFrame." - ) - # Stylized table. - import IPython - - IPython.display.display( - stats[["user"] + score_cols] - .set_index("user") - .style.format("{:.2f}") - .background_gradient( - axis=0, cmap="Greens" if score_type == "percentile" else "RdYlGn" - ) - ) - # Leaderboard chart (by average score). - stats["__score_avg__"] = stats[score_cols].mean(axis=1) - if top_n is None: - top_users = stats.sort_values("__score_avg__", ascending=False) - top_n_display = len(top_users) - else: - top_users = stats.sort_values("__score_avg__", ascending=False).head( - top_n - ) - top_n_display = top_n - fig, ax = plt.subplots(figsize=(max(8, 0.5 * len(top_users)), 4)) - ax.bar(top_users["user"], top_users["__score_avg__"], color="skyblue") - ax.set_ylabel( - "Average Score" - + (" (Z-score)" if score_type == "z" else " (Percentile)") - ) - ax.set_title(f"Top {top_n_display} Users by Average {score_type.title()}") - ax.axhline(0 if score_type == "z" else 0.5, color="gray", linestyle="--") - plt.xticks(rotation=15, ha="right") - plt.tight_layout() - plt.show() - stats.drop(columns="__score_avg__", inplace=True) - - -def compute_engagement_score( - summary: pd.DataFrame, - weights: Optional[Dict[str, float]] = None, -) -> pd.DataFrame: - """ - Compute a weighted engagement score for each user based on all metrics. - - :param summary: data with user metrics - :param weights: optional dictionary of metric weights; if None, uses - defaults - :return: summary with an added 'engagement_score' column - """ - # Default weights emphasizing collaboration and code quality. - default_weights = { - "commits": 1.0, - "prs": 2.0, - "additions": 0.001, - "deletions": 0.0005, - "issues_assigned": 0.5, - "issues_closed": 1.5, - "issue_comments": 0.3, - "pr_reviews": 2.5, - } - weights = weights or default_weights - summary = summary.copy() - summary["engagement_score"] = 0 - for metric, weight in weights.items(): - if metric in summary.columns: - summary["engagement_score"] += summary[metric] * weight - # Normalize to 0-100 scale. - max_score = summary["engagement_score"].max() - if max_score > 0: - summary["engagement_score"] = ( - summary["engagement_score"] / max_score * 100 - ).round(2) - summary_sorted = summary.sort_values("engagement_score", ascending=False) - return summary_sorted - - -# ############################################################################# -# PR Statistics -# ############################################################################# - - -def count_open_prs_by_author( - repo_obj, -) -> Dict[str, Dict[str, int]]: - """ - Count open PRs grouped by author and draft/ready status. - - :param repo_obj: PyGithub repository object - :return: dict mapping author -> {"ready": int, "draft": int} - """ - stats: Dict[str, Dict[str, int]] = collections.defaultdict( - lambda: {"ready": 0, "draft": 0} - ) - pulls = repo_obj.get_pulls(state="open") - for pr in pulls: - author = pr.user.login - status = "draft" if pr.draft else "ready" - stats[author][status] += 1 - _LOG.debug("Open PR #%d by %s status=%s", pr.number, author, status) - return dict(stats) - - -def count_closed_prs_by_author( - repo_obj, - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, int]: - """ - Count closed PRs grouped by author, optionally filtered by period. - - :param repo_obj: PyGithub repository object - :param period: optional (start, end) UTC-aware datetimes for filtering - :return: dict mapping author -> count of closed PRs - """ - stats: Dict[str, int] = collections.defaultdict(int) - since, until = normalize_period_to_utc(period) - pulls = repo_obj.get_pulls(state="closed") - for pr in pulls: - # Normalize the PR closed_at timestamp to UTC. - closed_at = pr.closed_at - if closed_at is None: - continue - if closed_at.tzinfo is None: - closed_at = closed_at.replace(tzinfo=datetime.timezone.utc) - else: - closed_at = closed_at.astimezone(datetime.timezone.utc) - # Filter by period if specified. - if since is not None and until is not None: - if not (since <= closed_at <= until): - continue - author = pr.user.login - stats[author] += 1 - _LOG.debug("Closed PR #%d by %s at %s", pr.number, author, closed_at) - return dict(stats) - - -def print_open_pr_stats( - open_stats: Dict[str, Dict[str, int]], -) -> None: - """ - Print open PR statistics by author and draft/ready status. - - :param open_stats: dict mapping author -> {"ready": int, "draft": int} - """ - if not open_stats: - _LOG.info("No open PRs found.") - return - # Sort by total PR count descending. - sorted_authors = sorted( - open_stats.items(), - key=lambda item: item[1]["ready"] + item[1]["draft"], - reverse=True, - ) - total_ready = 0 - total_draft = 0 - header = f"{'Author':<25} {'Ready':>7} {'Draft':>7} {'Total':>7}" - separator = "-" * len(header) - _LOG.info("Open PRs by author:") - _LOG.info(separator) - _LOG.info(header) - _LOG.info(separator) - for author, counts in sorted_authors: - ready = counts["ready"] - draft = counts["draft"] - total = ready + draft - total_ready += ready - total_draft += draft - _LOG.info("%-25s %7d %7d %7d", author, ready, draft, total) - _LOG.info(separator) - _LOG.info( - "%-25s %7d %7d %7d", - "TOTAL", - total_ready, - total_draft, - total_ready + total_draft, - ) - - -def print_closed_pr_stats( - closed_stats: Dict[str, int], - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> None: - """ - Print closed PR statistics by author. - - :param closed_stats: dict mapping author -> count of closed PRs - :param period: optional period used for filtering (for display only) - """ - if not closed_stats: - _LOG.info("No closed PRs found.") - return - # Sort by count descending. - sorted_authors = sorted( - closed_stats.items(), key=lambda item: item[1], reverse=True - ) - period_str = "all time" - if period is not None: - since, until = period - period_str = f"{since.date()} to {until.date()}" - header = f"{'Author':<25} {'Closed':>7}" - separator = "-" * len(header) - _LOG.info("Closed PRs by author (%s):", period_str) - _LOG.info(separator) - _LOG.info(header) - _LOG.info(separator) - total = 0 - for author, count in sorted_authors: - total += count - _LOG.info("%-25s %7d", author, count) - _LOG.info(separator) - _LOG.info("%-25s %7d", "TOTAL", total) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py deleted file mode 100644 index 96c8af1da..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py +++ /dev/null @@ -1,508 +0,0 @@ -""" -Import as: - -import helpers.hasyncio as hasynci -""" - -import asyncio -import contextlib -import datetime -import logging -import math -import time -from typing import ( - Any, - Callable, - Coroutine, - Dict, - Iterator, - List, - Optional, - Tuple, - Union, - cast, -) - -import async_solipsism # type: ignore[import-not-found] -import numpy as np -import pandas as pd - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hprint as hprint - -# Avoid dependency from other `helpers` modules, such as `helpers.hsql`, to prevent -# import cycles. - - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# _EventLoop -# ############################################################################# - - -# TODO(gp): We could make this a mixin and add this behavior to both asyncio and -# async_solipsism event loop. -# TODO(gp): -> _AsyncSolipsismEventLoop -# TODO(gp): Consider injecting a `get_wall_clock_time: hdatetim.GetWallClockTime` -# in the event loop so we can simplify the interfaces. An event loop always needs -# a function to get the wall clock. -class _EventLoop(async_solipsism.EventLoop): - """ - An `async_solipsism.EventLoop` returning also the wall-clock time. - """ - - # TODO(gp): If we pass an `initial_replayed_timestamp` we could incorporate here also - # the replayed time approach and can remove `ReplayedTime` object. - def __init__(self) -> None: - super().__init__() - self._initial_dt = datetime.datetime.utcnow() - - def get_current_time(self) -> datetime.datetime: - # `loop.time()` returns the number of seconds as `float` from when the event - # loop was created. - try: - num_secs = super().time() - except AttributeError: - # Sometimes we call the logger before `async_solipsism` is fully initialized. - # File "/app/amp/helpers/hdatetime.py", line 255, in get_current_time - # timestamp = event_loop.get_current_time() - # File "/app/amp/helpers/hasyncio.py", line 60, in get_current_time - # num_secs = super().time() - # File "/venv/lib/python3.8/site-packages/async_solipsism/loop.py", line 39, in time - # return self._selector.clock.time() - # AttributeError: 'NoneType' object has no attribute 'clock' - # Call stack: - # File "/app/amp/helpers/hcache.py", line 311, in clear_global_cache - # _LOG.info("After clear_global_cache: %s", info_after) - # Message: 'After clear_global_cache: %s' - # Arguments: ("'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan",) - # To avoid the error above we just set the `num_secs` to 0. - num_secs = 0 - return self._initial_dt + datetime.timedelta(seconds=num_secs) - - -# From https://stackoverflow.com/questions/49555991 -@contextlib.contextmanager -def solipsism_context() -> Iterator: - """ - Context manager to isolate an `asyncio_solipsism` event loop. - """ - # Use the variation of solipsistic `EventLoop` above. - event_loop = _EventLoop() - asyncio.set_event_loop(event_loop) - try: - yield event_loop - finally: - asyncio.set_event_loop(None) - - -async def gather_coroutines_with_wall_clock( - event_loop: asyncio.AbstractEventLoop, - *coroutines: Callable[[Any], Coroutine[Any, Any, Any]], -) -> List[Any]: - """ - Inject a wall clock associated to `event_loop` in all the coroutines and - then gathers them in a single coroutine. - """ - get_wall_clock_time = lambda: hdateti.get_current_time( - tz="ET", event_loop=event_loop - ) - # Construct the coroutines here by passing the `get_wall_clock_time()` - # function. - coros_list = [coro(get_wall_clock_time) for coro in coroutines] - # - result: List[Any] = await asyncio.gather(*coros_list) - return result - - -# TODO(gp): For some reason `asyncio.run()` doesn't seem to pick up the new event -# loop. So we use a re-implementation of `run` that does that. -def run( - coroutine: Coroutine, - event_loop: Optional[asyncio.AbstractEventLoop], - *, - close_event_loop: bool = True, -) -> Any: - """ - `asyncio.run()` wrapper that allows to use a specified `EventLoop`. - - :param coroutine: the coroutine to run - :param event_loop: the event loop to use. `None` means the standard `asyncio` - event loop - :param close_event_loop: if False the event loop is not closed, so that we can - run multiple times in the same event loop - :return: same output of `run_until_complete()` - """ - if event_loop is None: - # Use a normal `asyncio` EventLoop. - event_loop = asyncio.new_event_loop() - hdbg.dassert_issubclass(event_loop, asyncio.AbstractEventLoop) - hprint.log_frame(_LOG, "asyncio.run") - try: - ret = event_loop.run_until_complete(coroutine) - finally: - if close_event_loop: - event_loop.close() - return ret - - -# ############################################################################# -# Synchronous / asynchronous polling. -# ############################################################################# - - -# The result of a polling function in terms of a bool indicating success (which -# when True stops the polling) and a result. -PollOutput = Tuple[bool, Any] - -# A polling function accepts any inputs and returns a `PollOutput` in terms of -# (success, result). Typically polling functions don't accept any inputs and are -# built through lambdas and closures. -PollingFunction = Callable[[], PollOutput] - - -def _get_max_num_iterations( - sleep_in_secs: float, - timeout_in_secs: float, -) -> int: - hdbg.dassert_lt(0, sleep_in_secs) - hdbg.dassert_lt(0, timeout_in_secs) - max_num_iter = int(math.ceil(timeout_in_secs / sleep_in_secs)) - hdbg.dassert_lte(1, max_num_iter) - return max_num_iter - - -# TODO(gp): This is probably better implemented with an iterator. -def _poll_iterate( - polling_func: PollingFunction, - sleep_in_secs: float, - timeout_in_secs: float, - get_wall_clock_time: hdateti.GetWallClockTime, - num_iter: int, - max_num_iter: int, - tag: str, -) -> Tuple[int, PollOutput]: - """ - Execute an iteration of the polling loop. - - :return: the number of iterations executed and the output of the - polling function (sucess, return value) - :raises: TimeoutError in case of timeout - """ - _LOG.debug( - "\n## %s: wall clock time=%s: iter=%s/%s", - tag, - get_wall_clock_time(), - num_iter, - max_num_iter, - ) - hdbg.dassert_callable(get_wall_clock_time) - # Poll. - success, value = polling_func() - _LOG.debug("success=%s, value=%s", success, value) - if success: - # If success, then exit. - hprint.log_frame( - _LOG, - "%s: wall clock time=%s: poll done", - tag, - get_wall_clock_time(), - ) - else: - # Otherwise update state. - num_iter += 1 - if num_iter > max_num_iter: - msg = "Timeout for " + hprint.to_str( - "polling_func sleep_in_secs timeout_in_secs tag" - ) - _LOG.error(msg) - raise TimeoutError(msg) - return num_iter, (success, value) - - -# TODO(ai_gp): -> async_poll -async def poll( - polling_func: PollingFunction, - sleep_in_secs: float, - timeout_in_secs: float, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - tag: Optional[str] = None, -) -> Tuple[int, Any]: - """ - Call `polling_func()` every `sleep_in_secs` secs until the polling function - returns success or there is a timeout. A timeout happens if no success is - achieved within `timeout_in_secs` secs. - - :param polling_func: function returning a tuple (success, value) - :return: - - number of iterations before a successful call to `polling_func` - - result from `polling_func` - :raises: TimeoutError in case of timeout - """ - _LOG.debug(hprint.to_str("polling_func sleep_in_secs timeout_in_secs tag")) - if tag is None: - # Use the function calling this function. - tag = hintros.get_function_name(count=0) - max_num_iter = _get_max_num_iterations(sleep_in_secs, timeout_in_secs) - num_iter = 1 - while True: - num_iter, (success, value) = _poll_iterate( - polling_func, - sleep_in_secs, - timeout_in_secs, - get_wall_clock_time, - num_iter, - max_num_iter, - tag, - ) - if success: - return num_iter, value - _LOG.debug("sleep for %s secs", sleep_in_secs) - await asyncio.sleep(sleep_in_secs) - - -def sync_poll( - polling_func: PollingFunction, - sleep_in_secs: float, - timeout_in_secs: float, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - tag: Optional[str] = None, -) -> Tuple[int, Any]: - """ - Same interface and behavior of `poll()` but using a synchronous - implementation. - """ - _LOG.debug(hprint.to_str("polling_func sleep_in_secs timeout_in_secs tag")) - if tag is None: - # Use the function calling this function. - tag = hintros.get_function_name(count=0) - max_num_iter = _get_max_num_iterations(sleep_in_secs, timeout_in_secs) - num_iter = 1 - while True: - num_iter, (success, value) = _poll_iterate( - polling_func, - sleep_in_secs, - timeout_in_secs, - get_wall_clock_time, - num_iter, - max_num_iter, - tag, - ) - if success: - return success, value - _LOG.debug("sleep for %s secs", sleep_in_secs) - time.sleep(sleep_in_secs) - - -def get_poll_kwargs( - get_wall_clock_time: hdateti.GetWallClockTime, - *, - # TODO(ai_gp): Avoid using defaults. - sleep_in_secs: float = 1.0, - timeout_in_secs: float = 10.0, -) -> Dict[str, Any]: - hdbg.dassert_lt(0, sleep_in_secs) - hdbg.dassert_lt(0, timeout_in_secs) - hdbg.dassert_callable(get_wall_clock_time) - poll_kwargs = { - "sleep_in_secs": sleep_in_secs, - "timeout_in_secs": timeout_in_secs, - "get_wall_clock_time": get_wall_clock_time, - } - return poll_kwargs - - -# ############################################################################# -# Wait. -# ############################################################################# - - -# Represent a deterministic, if float, or random delay in [a, b] if a Tuple. -# All values are in seconds. -WaitInSecs = Union[float, Tuple[float, float]] - - -async def sleep( - delay_in_secs: WaitInSecs, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - # TODO(gp): -> msg - tag: Optional[str] = None, - # TODO(gp): How to handle random seed here? - seed: int = 42, -) -> None: - """ - Wait a deterministic or a randomized delay. - """ - if tag is None: - # Use the name of the function calling this function. - tag = hintros.get_function_name(count=0) - # Extract or compute the delay. - if isinstance(delay_in_secs, (int, float)): - # Deterministic delay. - pass - elif isinstance(delay_in_secs, tuple): - # Randomized delay. - hdbg.dassert_eq(len(delay_in_secs), 2) - min_, max_ = delay_in_secs - hdbg.dassert_lte(0, min_) - hdbg.dassert_lte(min_, max_) - delay_in_secs = np.random.rand(min_, max_) - else: - raise ValueError(f"Invalid delay_in_secs='{delay_in_secs}'") - # Wait. - hprint.log_frame( - _LOG, - "%s: wall_clock_time=%s: started waiting for %s secs", - tag, - get_wall_clock_time(), - delay_in_secs, - ) - hdbg.dassert_lte(0, delay_in_secs) - delay_in_secs = cast(float, delay_in_secs) - await asyncio.sleep(delay_in_secs) - hprint.log_frame( - _LOG, - "%s: wall_clock_time=%s: done waiting for %s secs", - tag, - get_wall_clock_time(), - delay_in_secs, - ) - - -# ////////////////////////////////////////////////////////////////////////////////// - - -def get_seconds_to_align_to_grid( - bar_duration_in_secs: int, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - add_buffer_in_secs: int = 0, -) -> Tuple[pd.Timestamp, float]: - """ - Given the current time return the amount of seconds to wait to align on a - grid with period `bar_duration_in_secs`. - - E.g., current_time=9:31:02am, bar_duration_in_secs=120 -> return 58 - - :param add_buffer_in_secs: number of seconds to add to make sure we - are right after the grid time - """ - hdbg.dassert_lte(0, add_buffer_in_secs) - current_time = get_wall_clock_time() - _LOG.debug("current_time=%s ...", current_time) - # Align on the time grid. - hdbg.dassert_isinstance(bar_duration_in_secs, int) - hdbg.dassert_lt(0, bar_duration_in_secs) - freq = f"{bar_duration_in_secs}S" - target_time = current_time.ceil(freq) - hdbg.dassert_lte(current_time, target_time) - _LOG.debug("target_time=%s", target_time) - secs_to_wait = (target_time - current_time).total_seconds() - # E.g., for - # target_time=2022-07-11 11:30:00-04:00 - # curr_time=2022-07-11 11:29:15.129365-04:00 - # The difference is 44secs, so we need to add 1 sec to make sure we pass - # the target time. - secs_to_wait += add_buffer_in_secs - return target_time, secs_to_wait - - -def _wait_until( - wait_until_timestamp: pd.Timestamp, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - tag: Optional[str] = None, -) -> float: - """ - Return amount of seconds to wait for. - - More accurate version of _wait_until, uses total_seconds() which - allows for returning fractional second values. - """ - if tag is None: - # Use the name of the function calling this function. - tag = hintros.get_function_name(count=2) - curr_timestamp = get_wall_clock_time() - _LOG.debug( - "wait_until_timestamp=%s, curr_timestamp=%s", - wait_until_timestamp, - curr_timestamp, - ) - # We can only wait for times in the future. - if curr_timestamp > wait_until_timestamp: - _LOG.warning( - "curr_timestamp=%s, wait_until_timestamp=%s is in the future: " - "continuing ", - curr_timestamp, - wait_until_timestamp, - ) - time_in_secs = 0 - else: - time_in_secs = (wait_until_timestamp - curr_timestamp).total_seconds() - _LOG.debug( - "%s: wall_clock_time=%s: sleep for %s secs", - tag, - get_wall_clock_time(), - time_in_secs, - ) - return time_in_secs - - -def sync_wait_until( - wait_until_timestamp: pd.Timestamp, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - tag: Optional[str] = None, - log_verbosity: int = logging.DEBUG, -) -> None: - """ - Synchronous wait until the wall clock time is `timestamp`. - - More accurate version of sync_wait_until allowing to wait for - fractional seconds. - """ - # Sync wait. - time_in_secs = _wait_until( - wait_until_timestamp, get_wall_clock_time, tag=tag - ) - hdbg.dassert_lte(0, time_in_secs) - # TODO(gp): Consider using part of align_on_time_grid for high-precision clock. - time.sleep(time_in_secs) - # - hprint.log_frame( - _LOG, - "%s: wall_clock_time=%s: done waiting", - tag, - get_wall_clock_time(), - verbosity=log_verbosity, - ) - - -async def async_wait_until( - wait_until_timestamp: pd.Timestamp, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - # TODO(gp): -> msg - tag: Optional[str] = None, -) -> None: - """ - Asynchronous wait until the wall clock time is `timestamp`. - """ - _LOG.debug(hprint.to_str("wait_until_timestamp")) - time_in_secs = _wait_until( - wait_until_timestamp, get_wall_clock_time, tag=tag - ) - # Async wait. - hdbg.dassert_lte(0, time_in_secs) - await asyncio.sleep(time_in_secs) - # - hprint.log_frame( - _LOG, "%s: wall_clock_time=%s: done waiting", tag, get_wall_clock_time() - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py deleted file mode 100644 index e010f5b08..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py +++ /dev/null @@ -1,266 +0,0 @@ -""" -Import as: - -import helpers.haws as haws -""" - -import logging -from typing import Dict, List, Optional - -import boto3 -import boto3.session -from boto3.resources.base import ServiceResource -from botocore.client import BaseClient - -import helpers.hdbg as hdbg -import helpers.hserver as hserver - -_LOG = logging.getLogger(__name__) - - -# AWS profile is used as a mechanism to differentiate between different AWS accounts. -# See CmampTask12943. -# `test` and `preprod` environments are in the same account using `ck` profile. -# `prod` environment is in the different account using `csfy` profile. -AWS_PROFILE = { - "test": "ck", - "preprod": "ck", - "prod": "csfy", -} - -# ############################################################################# -# Utils -# ############################################################################# - - -def get_session( - aws_profile: str, *, region: Optional[str] = None -) -> boto3.session.Session: - """ - Return connected Boto3 session. - - :param aws_profile: AWS profile name to use for the session. - :param region: AWS region, if None get region from AWS credentials. - :return: Boto3 session object. - """ - hdbg.dassert_isinstance(aws_profile, str) - # When deploying jobs via ECS the container obtains credentials based on - # passed task role specified in the ECS task-definition, refer to: - # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html - if aws_profile in ["ck", "csfy"] and hserver.is_inside_ecs_container(): - _LOG.info("Fetching credentials from task IAM role") - session = boto3.session.Session() - else: - # We do not need to extract the credential from the file because - # the credential is already set and `boto3` know where to find them. - if region: - session = boto3.Session(profile_name=aws_profile, region_name=region) - else: - session = boto3.Session(profile_name=aws_profile) - return session - - -def get_service_client( - aws_profile: str, service_name: str, *, region: Optional[str] = None -) -> BaseClient: - """ - Return client to work with desired service in the specific region. - - For params look at `get_session()` - """ - session = get_session(aws_profile, region=region) - client = session.client(service_name=service_name) - return client - - -def get_service_resource(aws_profile: str, service_name: str) -> ServiceResource: - """ - Return resource to work with desired service in the specific region. - """ - session = get_session(aws_profile) - resource = session.resource(service_name=service_name) - return resource - - -# ############################################################################# -# ECS -# ############################################################################# - - -# TODO(Toma): Deprecate in favor of `get_service_client`. -def get_ecs_client( - aws_profile: str, *, region: Optional[str] = None -) -> BaseClient: - """ - Return client to work with Elastic Container Service in the specific - region. - - For params look at `get_session()` - """ - session = get_session(aws_profile, region=region) - client = session.client(service_name="ecs") - return client - - -def get_task_definition_image_url( - task_definition_name: str, environment: str, *, region: Optional[str] = None -) -> str: - """ - Get ECS task definition by name and return only image URL. - - :param task_definition_name: The name of the ECS task definition, - e.g., `cmamp-test`. - :param region: AWS region, if None get region from AWS credentials. - :param region: look at `get_session()` - """ - aws_profile = AWS_PROFILE[environment] - service_name = "ecs" - client = get_service_client(aws_profile, service_name, region=region) - # Get the last revision of the task definition. - task_description = client.describe_task_definition( - taskDefinition=task_definition_name - ) - task_definition_json = task_description["taskDefinition"] - image_url = task_definition_json["containerDefinitions"][0]["image"] - return image_url - - -def is_task_definition_exists( - task_definition_name: str, *, region: Optional[str] = None -) -> bool: - """ - Check if a task definition exists in the specified region. - - :param task_definition_name: the name of the ECS task definition - :param region: region of the task definition - :return: whether the task definition exists - """ - client = get_ecs_client("ck", region=region) - try: - client.describe_task_definition(taskDefinition=task_definition_name) - return True - except client.exceptions.ClientError as e: - _LOG.warning( - "Failed to describe task definition '%s': %s", - task_definition_name, - e, - ) - return False - - -# TODO(Nikola): Pass a dict config instead, so any part can be updated. -def update_task_definition( - task_definition_name: str, - new_image_url: str, - *, - region: Optional[str] = None, - environment: str, -) -> None: - """ - Create the new revision of specified ECS task definition. - - If region is different then the default one, it is assumed that ECR - replication is enabled from the default region to the target region. - - :param task_definition_name: The name of the ECS task definition for - which an update to container image URL is made, e.g., `cmamp- - test`. - :param new_image_url: New image URL for task definition. e.g., - `***.dkr.ecr.***/cmamp:prod`. - :param region: AWS region, if None get region from AWS credentials. - """ - aws_profile = AWS_PROFILE[environment] - client = get_ecs_client(aws_profile, region=region) - # Get the last revision of the task definition. - task_description = client.describe_task_definition( - taskDefinition=task_definition_name - ) - task_definition_json = task_description["taskDefinition"] - # Set new image. - old_image_url = task_definition_json["containerDefinitions"][0]["image"] - if old_image_url == new_image_url: - _LOG.info( - "New image url `%s` is already set for task definition `%s`!", - new_image_url, - task_definition_name, - ) - return - task_definition_json["containerDefinitions"][0]["image"] = new_image_url - # Register the new revision with the new image. - response = client.register_task_definition( - family=task_definition_name, - taskRoleArn=task_definition_json.get("taskRoleArn", ""), - executionRoleArn=task_definition_json["executionRoleArn"], - networkMode=task_definition_json["networkMode"], - containerDefinitions=task_definition_json["containerDefinitions"], - volumes=task_definition_json["volumes"], - placementConstraints=task_definition_json["placementConstraints"], - requiresCompatibilities=task_definition_json["requiresCompatibilities"], - cpu=task_definition_json["cpu"], - memory=task_definition_json["memory"], - ) - updated_image_url = response["taskDefinition"]["containerDefinitions"][0][ - "image" - ] - # Check if the image URL is updated. - hdbg.dassert_eq(updated_image_url, new_image_url) - _LOG.info( - "The image URL of `%s` task definition is updated to `%s`", - task_definition_name, - updated_image_url, - ) - - -def list_all_objects( - s3_client: BaseClient, bucket_name: str, prefix: str -) -> List[Dict]: - """ - List all objects in the specified S3 bucket under the given prefix, - handling pagination. - - :param s3_client: Instance of boto3 S3 client. - :param bucket_name: The name of the S3 bucket e.g., `cryptokaizen-data-test`. - :param prefix: Prefix to filter the S3 objects e.g., `binance/historical_bid_ask/`. - :return: A list of dictionaries containing metadata about each object. E.g., - ``` - [ - { - 'Key': 'binance/historical_bid_ask/S_DEPTH/1000BONK_USDT/2023-05-27/data.tar.gz', - 'LastModified': datetime.datetime(2024, 5, 30, 17, 12, 12, tzinfo=tzlocal()), - 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', - 'Size': 0, - 'StorageClass': 'STANDARD' - }, - { - 'Key': 'binance/historical_bid_ask/S_DEPTH/1000BONK_USDT/2023-05-28/data.tar.gz', - 'LastModified': datetime.datetime(2024, 5, 30, 17, 12, 12, tzinfo=tzlocal()), - 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', - 'Size': 0, - 'StorageClass': 'STANDARD' - } - ] - ``` - """ - objects = [] - continuation_token = None - while True: - # If there's a continuation token, include it in the request to fetch - # the next page of results. - if continuation_token: - response = s3_client.list_objects_v2( - Bucket=bucket_name, - Prefix=prefix, - ContinuationToken=continuation_token, - ) - else: - response = s3_client.list_objects_v2( - Bucket=bucket_name, Prefix=prefix - ) - # Extend the objects list with the contents of the current page. - objects.extend(response.get("Contents", [])) - # Check if there are more pages. - if response.get("IsTruncated"): - continuation_token = response.get("NextContinuationToken") - else: - break - return objects diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py deleted file mode 100644 index d72a2f708..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py +++ /dev/null @@ -1,1086 +0,0 @@ -""" -See `docs/coding/all.hcache.explanation.md` for implementation details. - -Import as: - -import helpers.hcache as hcache -""" - -import atexit -import copy -import functools -import logging -import os -import time -from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast - -import joblib -import joblib.func_inspect as jfunci -import joblib.memory as jmemor - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hintrospection as hintros -import helpers.hlogging as hloggin -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hsystem as hsystem -import helpers.htimer as htimer - -_LOG = hloggin.getLogger(__name__) -# Enable extra verbose debugging. Do not commit. -_TRACE = False - -# ############################################################################# - - -_IS_CACHE_ENABLED: bool = True - - -def enable_caching(val: bool) -> None: - """ - Enable or disable all caching, i.e., global, tagged global, function- - specific. - """ - global _IS_CACHE_ENABLED - if _TRACE: - _LOG.trace("") - _LOG.warning("Setting caching to %s -> %s", _IS_CACHE_ENABLED, val) - _IS_CACHE_ENABLED = val - - -def is_caching_enabled() -> bool: - """ - Check if cache is enabled. - - :return: whether the cache is enabled or not - """ - if _TRACE: - _LOG.trace("") - return _IS_CACHE_ENABLED - - -# Global switch to allow or prevent clearing the cache. -_IS_CLEAR_CACHE_ENABLED: bool = True - - -def enable_clear_cache(val: bool) -> None: - """ - Enable or disable clearing a cache (both global and function-specific). - """ - global _IS_CLEAR_CACHE_ENABLED - if _TRACE: - _LOG.trace("") - _LOG.warning( - "Enabling clear cache to %s -> %s", _IS_CLEAR_CACHE_ENABLED, val - ) - _IS_CLEAR_CACHE_ENABLED = val - - -# ############################################################################# -# Global cache interface -# ############################################################################# - - -def _get_cache_types() -> List[str]: - """ - Return the types (aka levels) of the cache. - """ - return ["mem", "disk"] - - -def _dassert_is_valid_cache_type(cache_type: str) -> None: - """ - Assert that `cache_type` is a valid cache type. - """ - hdbg.dassert_in(cache_type, _get_cache_types()) - - -def _get_global_cache_name(cache_type: str, tag: Optional[str] = None) -> str: - """ - Get the canonical cache name for a type of cache and tag, both global and - function-specific. - - E.g., `tmp.cache.{cache_type}.{tag}` like `tmp.cache.mem.unit_tests` - - :param cache_type: type of a cache - :param tag: optional unique tag of the cache - :return: name of the folder for a cache - """ - _dassert_is_valid_cache_type(cache_type) - cache_name = f"tmp.cache.{cache_type}" - if tag is not None: - cache_name += f".{tag}" - return cache_name - - -def _get_global_cache_path(cache_type: str, tag: Optional[str] = None) -> str: - """ - Get path to the directory storing the cache. - - For a memory cache, the path is in a predefined RAM disk. - For a disk cache, the path is on the file system relative to Git root. - - :return: the file system path to the cache - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - # Get the cache name. - cache_name = _get_global_cache_name(cache_type, tag) - # Get the enclosing directory path. - if cache_type == "mem": - if hsystem.get_os_name() == "Darwin": - root_path = "/tmp" - else: - root_path = "/mnt/tmpfs" - elif cache_type == "disk": - root_path = hgit.get_client_root(super_module=True) - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - # Compute path. - file_name = os.path.join(root_path, cache_name) - file_name = os.path.abspath(file_name) - return file_name - - -def _get_cache_size(path: str, description: str) -> str: - """ - Report information about a cache (global or function) stored at a given - path. - """ - if _TRACE: - _LOG.trace("") - if path is None: - txt = f"'{description}' cache: path='{path}' doesn't exist yet" - else: - if os.path.exists(path): - size_in_bytes = hsystem.du(path) - if isinstance(size_in_bytes, str): - size_as_str = size_in_bytes - else: - size_as_str = hintros.format_size(size_in_bytes) - else: - size_as_str = "nan" - # TODO(gp): Compute number of files. - txt = f"'{description}' cache: path='{path}', size={size_as_str}" - return txt - - -def get_global_cache_info( - tag: Optional[str] = None, add_banner: bool = False -) -> str: - """ - Report information on global cache. - """ - if _TRACE: - _LOG.trace("") - txt = [] - if add_banner: - txt.append(hprint.frame("get_global_cache_info()", char1="<")) - txt.append(f"is global cache enabled={is_caching_enabled()}") - # - cache_types = _get_cache_types() - txt.append(f"cache_types={str(cache_types)}") - for cache_type in cache_types: - path = _get_global_cache_path(cache_type, tag=tag) - description = f"global {cache_type}" - cache_info = _get_cache_size(path, description) - txt.append(cache_info) - txt = "\n".join(txt) - return txt - - -# This is the global memory cache. -_MEMORY_CACHE: Optional[joblib.Memory] = None - - -# This is the global disk cache. -_DISK_CACHE: Optional[joblib.Memory] = None - - -def _create_global_cache_backend( - cache_type: str, tag: Optional[str] = None -) -> joblib.Memory: - """ - Create a Joblib memory object storing a cache. - - :return: cache backend object - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - dir_name = _get_global_cache_path(cache_type, tag) - _LOG.debug( - "Creating cache for cache_type='%s' and tag='%s' at '%s'", - cache_type, - tag, - dir_name, - ) - cache_backend = joblib.Memory(dir_name, verbose=0, compress=True) - return cache_backend - - -# TODO(gp): -> _get_global_cache -def get_global_cache( - cache_type: str, tag: Optional[str] = None -) -> joblib.Memory: - """ - Get global cache by cache type. - - :return: caching backend - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - global _MEMORY_CACHE - global _DISK_CACHE - if tag is None: - if cache_type == "mem": - # Create global memory cache if it doesn't exist. - if _MEMORY_CACHE is None: - _MEMORY_CACHE = _create_global_cache_backend(cache_type) - global_cache = _MEMORY_CACHE - elif cache_type == "disk": - # Create global disk cache if it doesn't exist. - if _DISK_CACHE is None: - _DISK_CACHE = _create_global_cache_backend(cache_type) - global_cache = _DISK_CACHE - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - else: - # Build a one-off cache using tag. - global_cache = _create_global_cache_backend(cache_type, tag) - return global_cache - - -def set_global_cache(cache_type: str, cache_backend: joblib.Memory) -> None: - """ - Set global cache by cache type. - - :param cache_type: type of a cache - :param cache_backend: caching backend - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - global _MEMORY_CACHE - global _DISK_CACHE - if cache_type == "mem": - _MEMORY_CACHE = cache_backend - elif cache_type == "disk": - _DISK_CACHE = cache_backend - - -def clear_global_cache( - cache_type: str, tag: Optional[str] = None, destroy: bool = False -) -> None: - """ - Reset the global cache by cache type. - - :param cache_type: type of a cache. `None` to clear all the caches. - :param tag: optional unique tag of the cache, empty by default - :param destroy: remove physical directory - """ - if _TRACE: - _LOG.trace("") - if cache_type == "all": - for cache_type_tmp in _get_cache_types(): - clear_global_cache(cache_type_tmp, tag=tag, destroy=destroy) - return - _dassert_is_valid_cache_type(cache_type) - # Clear and / or destroy the cache `cache_type` with the given `tag`. - cache_path = _get_global_cache_path(cache_type, tag) - if not _IS_CLEAR_CACHE_ENABLED: - hdbg.dfatal(f"Trying to delete cache '{cache_path}'") - description = f"global {cache_type}" - try: - # TODO(ShaopengZ): in some test run outside CK infra, the - # _get_cache_size() hangs. - info_before = _get_cache_size(cache_path, description) - except ValueError: - _LOG.warning("Cache has already been deleted by another process.") - return - _LOG.info("Before clear_global_cache: %s", info_before) - _LOG.warning("Resetting 'global %s' cache '%s'", cache_type, cache_path) - if hs3.is_s3_path(cache_path): - # For now we only allow to delete caches under the unit test path. - _, abs_path = hs3.split_path(cache_path) - hdbg.dassert( - abs_path.startswith("/tmp/cache.unit_test/"), - "The path '%s' is not valid", - abs_path, - ) - if destroy: - _LOG.warning("Destroying '%s' ...", cache_path) - hio.delete_dir(cache_path) - else: - cache_backend = get_global_cache(cache_type, tag) - try: - cache_backend.clear(warn=True) - except FileNotFoundError as e: - # A race condition can cause: - # FileNotFoundError: [Errno 2] No such file or directory: '/app/tmp.cache.disk/joblib' - _LOG.error("Caught %s: continuing", str(e)) - # Report stats before and after. - try: - info_after = _get_cache_size(cache_path, description) - except ValueError: - _LOG.warning("Cache has already been deleted by another process.") - return - _LOG.info("After clear_global_cache: %s", info_after) - - -# ############################################################################# -# CachedValueException -# ############################################################################# - - -class CachedValueException(RuntimeError): - """ - A cached function is run for a value present in the cache. - - This exception is thrown when the `check_only_if_present` mode is - used. - """ - - -# ############################################################################# -# NotCachedValueException -# ############################################################################# - - -class NotCachedValueException(RuntimeError): - """ - A cached function is run for a value not present in the cache. - - This exception is thrown when the `enable_read_only` mode is used. - """ - - -# ############################################################################# -# _Cached -# ############################################################################# - - -class _Cached: - # pylint: disable=protected-access - """ - Implement a cache in memory and disk for a function. - - If the function value was not cached either in memory or on disk, the function - `f()` is executed and the value is stored in both caches for future calls. - - This class uses 2 levels of caching: - - memory cache: useful for caching across multiple executions of a function in - a process or in notebooks without resetting the state - - disk cache: useful for retrieving the state among different executions of a - process or when a notebook is reset - """ - - def _create_function_memory_cache(self) -> joblib.Memory: - """ - Initialize Joblib object storing a memory cache for this function. - """ - if _TRACE: - _LOG.trace("") - _LOG.debug("Create memory cache") - # For memory always use the global cache. - cache_type = "mem" - memory_cache = get_global_cache(cache_type, self._tag) - # Get the Joblib object corresponding to the cached function. - return memory_cache.cache(self._func) - - def _create_function_disk_cache( - self, - ) -> Tuple[joblib.Memory, joblib.memory.MemorizedFunc]: - """ - Initialize Joblib object storing a disk cache for this function. - """ - if _TRACE: - _LOG.trace("") - if self.has_function_cache(): - hdbg.dassert( - not self._use_mem_cache, - "When using function cache the memory cache needs to be disabled", - ) - # Create a function-specific cache. - memory_kwargs: Dict[str, Any] = { - "verbose": 0, - "compress": True, - } - if hs3.is_s3_path(self._disk_cache_path): - import helpers.hjoblib as hjoblib - - # Register the S3 backend. - hjoblib.register_s3fs_store_backend() - s3fs = hs3.get_s3fs(self._aws_profile) - bucket, path = hs3.split_path(self._disk_cache_path) - # Remove the initial `/` from the path that makes the path - # absolute, since `Joblib.Memory` wants a path relative to the - # bucket. - hdbg.dassert( - path.startswith("/"), - "The path should be absolute instead of %s", - path, - ) - path = path[1:] - memory_kwargs.update( - { - "backend": "s3", - "backend_options": {"s3fs": s3fs, "bucket": bucket}, - } - ) - else: - path = self._disk_cache_path - _LOG.debug("path='%s'\nmemory_kwargs=\n%s", path, str(memory_kwargs)) - disk_cache = joblib.Memory(path, **memory_kwargs) - else: - # Use the global cache. - cache_type = "disk" - disk_cache = get_global_cache(cache_type, self._tag) - # Get the Joblib object corresponding to the cached function. - disk_cached_func = disk_cache.cache(self._func) - return disk_cache, disk_cached_func - # - - # /////////////////////////////////////////////////////////////////////////// - - def _reset_cache_tracing(self) -> None: - """ - Reset the values used to track which cache we are hitting when - executing the cached function. - """ - if _TRACE: - _LOG.trace("") - # The reset values depend on which caches are enabled. - self._last_used_disk_cache = self._use_disk_cache - self._last_used_mem_cache = self._use_mem_cache - - # TODO(gp): Either allow users to initialize `mem_cache_path` here or with - # `set_function_cache_path()` but not both code paths. It's unclear which option - # is better. On the one side `set_function_cache_path()` is more explicit, but - # it can't be changed. On the other side the wrapper needs to be initialized in - # one shot. - def __init__( - self, - func: Callable, - *, - use_mem_cache: bool = True, - use_disk_cache: bool = True, - verbose: bool = False, - tag: Optional[str] = None, - disk_cache_path: Optional[str] = None, - aws_profile: Optional[str] = "am", - ): - """ - Construct the class. - - :param func: function to cache - :param use_mem_cache, use_disk_cache: whether we allow memory and disk caching - :param verbose: print high-level information about the cache - behavior, e.g., - - whether a function was cached or not - - from which level the data was retrieved - - the execution time - - the amount of data retrieved - :param tag: a tag added to the global cache path to make it specific (e.g., - when running unit tests we want to use a different cache) - :param disk_cache_path: path of the function-specific cache - :param aws_profile: the AWS profile to use in case of S3 backend - """ - # Make the class have the same attributes (e.g., `__name__`, `__doc__`, - # `__dict__`) as the called function. - functools.update_wrapper(self, func) - if _TRACE: - _LOG.trace("") - # Save interface parameters. - hdbg.dassert_callable(func) - self._func = func - # TODO(gp): We should use memory cache only inside Jupyter notebooks. - self._use_mem_cache = use_mem_cache - self._use_disk_cache = use_disk_cache - self._is_verbose = verbose - self._tag = tag - self._disk_cache_path = disk_cache_path - self._aws_profile = aws_profile - # - self._reset_cache_tracing() - # Create the memory and disk cache objects for this function. - # TODO(gp): We might simplify the code by using a dict instead of 2 variables. - # Store the Joblib memory cache object for this function. - self._memory_cached_func = self._create_function_memory_cache() - # Store the Joblib memory object and the Joblib memory cache object for - # this function. - ( - self._disk_cache, - self._disk_cached_func, - ) = self._create_function_disk_cache() - # Enable a mode where an exception `NotCachedValueException` is thrown if - # the value is not in the cache. - self._enable_read_only = False - # Enable a mode where an exception `NotCachedValueException` is thrown if - # the value is in the cache, instead of accessing the value. - self._check_only_if_present = False - - def get_function_cache_info(self, add_banner: bool = False) -> str: - """ - Return info about the caching properties for this function. - """ - if _TRACE: - _LOG.trace("") - txt = [] - if add_banner: - txt.append(hprint.frame("get_global_cache_info()", char1="<")) - has_func_cache = self.has_function_cache() - txt.append(f"has function-specific cache={has_func_cache}") - if has_func_cache: - # Function-specific cache: print the paths of the local cache. - cache_type = "disk" - txt.append(f"local {cache_type} cache path={self._disk_cache_path}") - txt = "\n".join(txt) - return txt - - def get_last_cache_accessed(self) -> str: - """ - Get the cache used in the latest call of the wrapped function. - - :return: type of cache used in the last call - """ - if _TRACE: - _LOG.trace("") - if self._last_used_mem_cache: - ret = "mem" - elif self._last_used_disk_cache: - # If the disk cache was used, then the memory cache should not been used. - hdbg.dassert(not self._last_used_mem_cache) - ret = "disk" - else: - ret = "no_cache" - return ret - - def enable_read_only(self, val: bool) -> None: - """ - If set to True, the cached function can only read from the cache but - not execute for new values. - - Otherwise a `NotCachedValueException` is thrown. - """ - if _TRACE: - _LOG.trace("") - _LOG.warning( - "Setting enable_read_only to %s -> %s", self._enable_read_only, val - ) - self._enable_read_only = val - - def enable_check_only_if_present(self, val: bool) -> None: - """ - If set to True, the cached function a `CachedValueException` is thrown - if a function invocation was cached, instead of executing it. - - This can be used to check if a value was already cached without - triggering retrieving the value from the cache, e.g., when - probing the content of the cache. - """ - _LOG.warning( - "Setting check_only_if_present to %s -> %s", - self._check_only_if_present, - val, - ) - self._check_only_if_present = val - - def _get_memorized_result(self, cache_type: str) -> joblib.MemorizedResult: - """ - Get the instance of a cache by type. - - From https://github.com/joblib/joblib/blob/master/joblib/memory.py - A `MemorizedResult` is an object representing a cached value - - :param cache_type: type of a cache - :return: instance of the Joblib cache - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - if cache_type == "mem": - memorized_result = self._memory_cached_func - elif cache_type == "disk": - memorized_result = self._disk_cached_func - _LOG.debug("memorized_result=%s", memorized_result) - return memorized_result - - def _get_function_specific_code_path(self) -> str: - if _TRACE: - _LOG.trace("") - # Get the store backend. - cache_type = "disk" - memorized_result = self._get_memorized_result(cache_type) - store_backend = memorized_result.store_backend - # Get the function id (which is the full path). - func_id = jmemor._build_func_identifier(self._func) - # Assemble the path. - func_path = os.path.join(store_backend.location, func_id, "func_code.py") - _LOG.debug("func_path='%s'", func_path) - hdbg.dassert( - store_backend._item_exists(func_path), "Can't find '%s'", func_path - ) - return func_path - - def update_func_code_without_invalidating_cache(self) -> None: - """ - Update the Python code stored in the cache. - - This is used when we make changes to the cached function but we don't want - to invalidate the cache. - - NOTE: here the caller must guarantee that the new function yields exactly - the same results than the previous ones. Use carefully. - """ - if _TRACE: - _LOG.trace("") - hdbg.dassert( - self.has_function_cache(), - "This is used only for function-specific caches", - ) - # From `store_cached_func_code` in - # https://github.com/joblib/joblib/tree/master/joblib/_store_backends.py - func_path = self._get_function_specific_code_path() - # Archive old code. - new_func_path = ( - func_path + "." + hdateti.get_current_timestamp_as_string(tz="ET") - ) - _LOG.debug("new_func_path='%s'", new_func_path) - # Get the store backend. - cache_type = "disk" - memorized_result = self._get_memorized_result(cache_type) - store_backend = memorized_result.store_backend - hdbg.dassert( - not store_backend._item_exists(new_func_path), - "'%s' already exists", - new_func_path, - ) - store_backend._move_item(func_path, new_func_path) - # Write out function code to the cache. - func_code, _, first_line = jfunci.get_func_code(memorized_result.func) - memorized_result._write_func_code(func_code, first_line) - _LOG.debug("Updated func_path='%s'", func_path) - - # /////////////////////////////////////////////////////////////////////////// - # Function-specific cache. - # /////////////////////////////////////////////////////////////////////////// - - def has_function_cache(self) -> bool: - """ - Return whether this function has a function-specific cache or uses the - global cache. - """ - if _TRACE: - _LOG.trace("") - has_func_cache = self._disk_cache_path is not None - return has_func_cache - - # TODO(gp): Can we reuse the same code for `clear_function_cache` as above? - def clear_function_cache(self, destroy: bool = False) -> None: - """ - Clear a function-specific cache. - """ - if _TRACE: - _LOG.trace("") - hdbg.dassert( - self.has_function_cache(), - "This function has no function-specific cache", - ) - # Get the path for the disk cache. - cache_path = self._disk_cache_path - hdbg.dassert_is_not(cache_path, None) - cache_path = cast(str, cache_path) - if not _IS_CLEAR_CACHE_ENABLED: - hdbg.dfatal(f"Trying to delete function cache '{cache_path}'") - # Collect info before. - cache_type = "disk" - description = f"function {cache_type}" - info_before = _get_cache_size(cache_path, description) - _LOG.info("Before clear_function_cache: %s", info_before) - # Clear / destroy the cache. - _LOG.warning( - "Resetting '%s' cache for function '%s' in dir '%s'", - cache_type, - self._func.__name__, - cache_path, - ) - if hs3.is_s3_path(cache_path): - # For now we only allow to delete caches under the unit test path. - _, abs_path = hs3.split_path(cache_path) - hdbg.dassert( - abs_path.startswith("/tmp/"), - "The path '%s' is not valid", - abs_path, - ) - if destroy: - _LOG.warning("Destroying '%s' ...", cache_path) - hio.delete_dir(cache_path) - else: - self._disk_cache.clear() - # Print stats. - info_after = _get_cache_size(cache_path, description) - _LOG.info("After clear_function_cache: %s", info_after) - - def set_function_cache_path(self, cache_path: Optional[str]) -> None: - """ - Set the path for the function-specific cache for a cache type. - - :param cache_path: cache directory or `None` to use global cache - """ - if _TRACE: - _LOG.trace("") - if cache_path: - hdbg.dassert_dir_exists(cache_path) - # We need to disable the memory cache. - if cache_path: - self._use_mem_cache = False - else: - self._use_mem_cache = True - self._disk_cache_path = cache_path - ( - self._disk_cache, - self._disk_cached_func, - ) = self._create_function_disk_cache() - - # /////////////////////////////////////////////////////////////////////////// - - # TODO(gp): We should use the actual stored dir. - def _get_cache_dir(self, cache_type: str, tag: Optional[str]) -> str: - """ - Return the dir of the cache corresponding to `cache_type` and `tag`. - """ - if _TRACE: - _LOG.trace("") - if cache_type == "no_cache": - return "no_cache" - if self.has_function_cache(): - hdbg.dassert_eq(cache_type, "disk") - ret = self._disk_cache_path - else: - ret = _get_global_cache_path(cache_type, tag=tag) - ret = cast(str, ret) - return ret - - def _get_identifiers( - self, cache_type: str, *args: Any, **kwargs: Any - ) -> Tuple[str, str]: - """ - Get digests for current function and arguments to be used in cache. - - :param cache_type: type of a cache - :param args: original arguments of the call - :param kwargs: original kw-arguments of the call - :return: digests of the function and current arguments - """ - memorized_result = self._get_memorized_result(cache_type) - _LOG.debug("memorized_result=%s", memorized_result) - hdbg.dassert_is_not( - memorized_result, - None, - "Cache backend not initialized for %s", - cache_type, - ) - # This is needed for joblib >= 1.4.2. - func_id = memorized_result.func_id - args_id = memorized_result._get_args_id(*args, **kwargs) - _LOG.debug("func_id=%s args_id=%s", func_id, args_id) - return func_id, args_id - - def _has_cached_version( - self, cache_type: str, func_id: str, args_id: str - ) -> bool: - """ - Check if a cache contains an entry for a corresponding function and - arguments digests, and that function source has not changed. - - :param cache_type: type of a cache - :param func_id: digest of the function obtained from _get_identifiers - :param args_id: digest of arguments obtained from _get_identifiers - :return: whether there is an entry in a cache - """ - if _TRACE: - _LOG.trace("") - memorized_result = self._get_memorized_result(cache_type) - has_cached_version = memorized_result.store_backend.contains_item( - [func_id, args_id] - ) - _LOG.debug("has_cached_version=%s", has_cached_version) - if has_cached_version: - # We must check that the source of the function is the same, otherwise, - # cache tracing will not be correct. - # First, try faster check via joblib hash. - if self._func in jmemor._FUNCTION_HASHES: - func_hash = memorized_result._hash_func() - if func_hash == jmemor._FUNCTION_HASHES[self._func]: - return True - # Otherwise, check the the source of the function is still the same. - func_code, _, _ = jmemor.get_func_code(self._func) - old_func_code_cache = ( - memorized_result.store_backend.get_cached_func_code([func_id]) - ) - old_func_code, _ = jmemor.extract_first_line(old_func_code_cache) - if func_code == old_func_code: - return True - return False - - def _store_cached_version( - self, cache_type: str, func_id: str, args_id: str, obj: Any - ) -> None: - """ - Store returned value from the intrinsic function in the cache. - - :param cache_type: type of a cache - :param func_id: digest of the function obtained from `_get_identifiers()` - :param args_id: digest of arguments obtained from `_get_identifiers()` - :param obj: return value of the intrinsic function - """ - if _TRACE: - _LOG.trace("") - # This corresponds to - # /venv/lib/python3.8/site-packages/joblib/memory.py - # __call__ - if self._enable_read_only: - raise NotCachedValueException - memorized_result = self._get_memorized_result(cache_type) - # Write out function code to the cache. - func_code, _, first_line = jfunci.get_func_code(memorized_result.func) - memorized_result._write_func_code(func_code, first_line) - # Store the returned value into the cache. - memorized_result.store_backend.dump_item([func_id, args_id], obj) - - def _execute_func_from_disk_cache(self, *args: Any, **kwargs: Any) -> Any: - if _TRACE: - _LOG.trace("") - func_info = ( - f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" - ) - # Get the function signature. - func_id, args_id = self._get_identifiers("disk", *args, **kwargs) - if self._has_cached_version("disk", func_id, args_id): - _LOG.debug("There is a disk cached version") - with htimer.TimedScope( - logging.INFO, "Loading cached version from disk" - ): - obj = self._disk_cached_func(*args, **kwargs) - if self._check_only_if_present: - raise CachedValueException(func_info) - else: - # INV: we didn't hit neither memory nor the disk cache. - self._last_used_disk_cache = False - # - _LOG.debug( - "%s: execute the intrinsic function", - func_info, - ) - # If the cache was read-only, then assert. - if self._enable_read_only: - msg = f"{func_info}: trying to execute" - raise NotCachedValueException(msg) - with htimer.TimedScope( - logging.INFO, "Updating cached version on disk" - ): - obj = self._disk_cached_func(*args, **kwargs) - # obj = self._execute_intrinsic_function(*args, **kwargs) - # The function was not cached in disk, so now we need to update the - # memory cache. - # self._store_cached_version("disk", func_id, args_id, obj) - return obj - - def _execute_intrinsic_function(self, *args: Any, **kwargs: Any) -> Any: - if _TRACE: - _LOG.trace("") - with htimer.TimedScope(logging.INFO, "Executing intrinsic function"): - func_info = ( - f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" - ) - _LOG.debug("%s: execute intrinsic function", func_info) - if self._enable_read_only: - msg = f"{func_info}: trying to execute" - raise NotCachedValueException(msg) - obj = self._func(*args, **kwargs) - return obj - - def _execute_func_from_mem_cache(self, *args: Any, **kwargs: Any) -> Any: - """ - Execute the function from memory cache and if not possible try the - lower cache levels. - """ - if _TRACE: - _LOG.trace("") - func_info = ( - f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" - ) - # Get the function signature. - func_id, args_id = self._get_identifiers("mem", *args, **kwargs) - if self._has_cached_version("mem", func_id, args_id): - _LOG.debug("There is a mem cached version") - if self._check_only_if_present: - raise CachedValueException(func_info) - # The function execution was cached in the mem cache. - with htimer.TimedScope( - logging.INFO, "Loading cached version from memory" - ): - obj = self._memory_cached_func(*args, **kwargs) - else: - # INV: we know that we didn't hit the memory cache, but we don't know - # about the disk cache. - _LOG.debug("There is not a mem cached version") - self._last_used_mem_cache = False - # - if self._use_disk_cache: - # Try the disk cache. - _LOG.debug( - "Trying to retrieve from disk", - ) - obj = self._execute_func_from_disk_cache(*args, **kwargs) - else: - _LOG.warning("Skipping disk cache") - obj = self._execute_intrinsic_function(*args, **kwargs) - # The function was not cached in memory, so now we need to update the - # memory cache. - self._store_cached_version("mem", func_id, args_id, obj) - return obj - - def _execute_func(self, *args: Any, **kwargs: Any) -> Any: - if _TRACE: - _LOG.trace("") - func_info = ( - f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" - ) - _LOG.debug( - "%s: use_mem_cache=%s use_disk_cache=%s", - func_info, - self._use_mem_cache, - self._use_disk_cache, - ) - if self._use_mem_cache: - _LOG.debug("Trying to retrieve from memory") - obj = self._execute_func_from_mem_cache(*args, **kwargs) - else: - if self.has_function_cache(): - # For function-specific cache, skipping the memory cache is the - # normal behavior. - _LOG.debug( - "Function has function-specific cache: skipping memory cache" - ) - else: - _LOG.warning("Skipping memory cache") - self._last_used_mem_cache = False - if self._use_disk_cache: - obj = self._execute_func_from_disk_cache(*args, **kwargs) - else: - _LOG.warning("Skipping disk cache") - self._last_used_disk_cache = False - obj = self._execute_intrinsic_function(*args, **kwargs) - return obj - - def __call__(self, *args: Any, **kwargs: Any) -> Any: - """ - Execute the wrapped function using the caches, if needed. - - :return: object returned by the wrapped function - """ - if _TRACE: - _LOG.trace("") - perf_counter_start: float - if self._is_verbose: - perf_counter_start = time.perf_counter() - # Execute the cached function. - if not is_caching_enabled(): - # No caching is allowed: execute the function. - _LOG.warning("All caching is disabled") - self._last_used_disk_cache = self._last_used_mem_cache = False - obj = self._func(*args, **kwargs) - else: - # Caching is allowed. - self._reset_cache_tracing() - obj = self._execute_func(*args, **kwargs) - _LOG.debug( - "%s: executed from '%s'", - self._func.__name__, - self.get_last_cache_accessed(), - ) - # TODO(gp): Not sure making a deep copy is a good idea. In the end, - # the client should not modify a cached value. - obj = copy.deepcopy(obj) - # Print caching info. - if self._is_verbose: - # Get time. - elapsed_time = time.perf_counter() - perf_counter_start - # Get memory. - # TODO(gp): This is very slow. - # obj_size = hintros.get_size_in_bytes(obj) - # obj_size_as_str = hintros.format_size(obj_size) - obj_size_as_str = "nan" - last_cache = self.get_last_cache_accessed() - cache_dir = self._get_cache_dir(last_cache, self._tag) - _LOG.info( - " --> Cache data for '%s' from '%s' cache " - "(size=%s, time=%.2f s, tag=%s, loc=%s)", - self._func.__name__, - last_cache, - obj_size_as_str, - elapsed_time, - self._tag, - cache_dir, - ) - return obj - - -# ############################################################################# -# Decorator -# ############################################################################# - - -def cache( - use_mem_cache: bool = True, - use_disk_cache: bool = True, - set_verbose_mode: bool = False, - tag: Optional[str] = None, - disk_cache_path: Optional[str] = None, - aws_profile: Optional[str] = None, -) -> Union[Callable, _Cached]: - """ - Decorate a function with a cache. - - The parameters are the same as `hcache._Cached`. - - Usage examples: - ``` - import helpers.hcache as hcache - - @hcache.cache() - def add(x: int, y: int) -> int: - return x + y - - @hcache.cache(use_mem_cache=False) - def add(x: int, y: int) -> int: - return x + y - ``` - """ - - def wrapper(func: Callable) -> _Cached: - return _Cached( - func, - use_mem_cache=use_mem_cache, - use_disk_cache=use_disk_cache, - verbose=set_verbose_mode, - tag=tag, - disk_cache_path=disk_cache_path, - aws_profile=aws_profile, - ) - - return wrapper - - -# ############################################################################# - -# Clean up the memory cache on-exit. -# TODO(gp): Add another function and make it silent. -atexit.register(clear_global_cache, cache_type="mem", destroy="true") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py deleted file mode 100644 index afdf5438c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py +++ /dev/null @@ -1,1188 +0,0 @@ -""" -Detailed documentation at. - -- //helpers/docs/tools/helpers/all.hcache_simple.explanation.md -- //helpers/notebooks/hcache_simple.tutorial.ipynb - -Import as: - -import helpers.hcache_simple as hcacsimp -""" - -import functools -import glob -import json -import logging -import os -import pickle -import re -from typing import Any, Callable, Dict, List, Optional, Union, cast - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - -# Disable tracing for production code. -_LOG.trace = lambda *args, **kwargs: None -# _LOG.trace = _LOG.debug - -# ############################################################################# -# Memory cache. -# ############################################################################# - -# Type for the cache of a single function: key -> value properties. E.g., -# ``` -# { -# "{\"args\": [4], \"kwargs\": {}}": 16 -# } -# ``` -_FunctionCacheType = Dict[str, Any] - -# Basic type for caching data: func_name -> key -> value properties. E.g., -# ``` -# { -# "slow_square": { -# "{\"args\": [4], \"kwargs\": {}}": 16 -# } -# } -# ``` -_CacheType = Dict[str, _FunctionCacheType] - -# Create global variable for the memory cache. -if "_CACHE" not in globals(): - _LOG.trace("Creating _CACHE") - _CACHE: _CacheType = {} - -# Process-wide default `cache_mode` applied to every `@simple_cache` function -# when no explicit `cache_mode` is passed at the call site. Used by CLI scripts -# to flip all cached functions into refresh/disable/hit-or-abort mode from a -# single switch (see `hparser.add_cache_control_arg`). -_VALID_CACHE_MODES = ("REFRESH_CACHE", "DISABLE_CACHE", "HIT_CACHE_OR_ABORT") -_GLOBAL_CACHE_MODE: Optional[str] = None - - -def set_global_cache_mode(mode: Optional[str]) -> None: - """ - Set the process-wide default `cache_mode`. - - :param mode: one of `REFRESH_CACHE`, `DISABLE_CACHE`, - `HIT_CACHE_OR_ABORT`, or `None` to clear - """ - global _GLOBAL_CACHE_MODE - if mode is not None: - hdbg.dassert_in(mode, _VALID_CACHE_MODES) - _GLOBAL_CACHE_MODE = mode - - -def get_global_cache_mode() -> Optional[str]: - """ - Return the process-wide default `cache_mode`, or `None` if unset. - """ - return _GLOBAL_CACHE_MODE - - -# When enabled, every `@simple_cache` call emits a WARNING describing whether -# the result came from the cache, was computed on miss, or was recomputed -# because of an active `cache_mode`. -_CACHE_DEBUG: bool = False - - -def set_cache_debug(enabled: bool) -> None: - """ - Enable or disable process-wide cache-decision logging at WARNING level. - """ - global _CACHE_DEBUG - hdbg.dassert_isinstance(enabled, bool) - _CACHE_DEBUG = enabled - - -def get_cache_debug() -> bool: - """ - Return True if cache-decision logging is enabled. - """ - return _CACHE_DEBUG - - -def sanity_check_function_cache( - func_cache_data: _FunctionCacheType, *, assert_on_empty: bool = True -) -> None: - """ - Sanity check the function cache data. - - :param func_cache_data: The function cache data to check. - :param assert_on_empty: If True, assert that the function cache data - is not empty. - """ - hdbg.dassert_isinstance(func_cache_data, dict) - if assert_on_empty: - hdbg.dassert_ne(len(func_cache_data), 0, "Function data is empty") - for cache_key, cached_value in func_cache_data.items(): - hdbg.dassert_isinstance(cache_key, str) - hdbg.dassert_ne(cache_key, "", "Cache key is empty") - # cached_value can be any type, so no type check needed. - _ = cached_value - - -def sanity_check_cache( - cache_data: _CacheType, *, assert_on_empty: bool = True -) -> None: - """ - Sanity check the cache data. - - :param cache_data: The cache data to check. - :param assert_on_empty: If True, assert that the cache data is not - empty. - """ - hdbg.dassert_isinstance(cache_data, dict) - if assert_on_empty: - hdbg.dassert_ne(len(cache_data), 0, "Cache data is empty") - for func_name, func_cache_data in cache_data.items(): - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_ne(func_name, "", "Function name is empty") - sanity_check_function_cache( - func_cache_data, assert_on_empty=assert_on_empty - ) - - -def cache_data_to_str(cache_data: _CacheType) -> str: - """ - Convert cache data to a human-readable string. - - :param cache_data: The cache data to convert. - :return: A string representation of the cache data. - """ - txt = [] - txt.append(hprint.frame("Cache data")) - hdbg.dassert_isinstance(cache_data, dict) - for func_name, func_data in cache_data.items(): - txt.append(f"# func_name={func_name}") - hdbg.dassert_isinstance(func_data, dict) - for cache_key, cached_value in func_data.items(): - txt.append(f" cache_key={cache_key} cached_value={cached_value}") - result = "\n".join(txt) - return result - - -# ############################################################################# -# Cache properties. -# ############################################################################# - -# There are several ways to control caching behavior: -# - By passing keyword arguments to the decorated function. -# - E.g., `type_` -# - By using a special keyword argument (`force_refresh`, `abort_on_cache_miss`, -# `report_on_cache_miss`) cache_mode`) when calling the decorated function. -# - By setting cache properties -# - E.g., set_cache_property("func_name", "force_refresh", True) - -# - There are two types of properties: -# - `User Properties`: Configurable by the user to alter caching behavior. -# E.g., -# - `abort_on_cache_miss`: Whether to raise an error if a cache miss occurs -# - `report_on_cache_miss`: Whether to return a special value ("_cache_miss_") -# on a cache miss -# - `enable_perf`: Whether to enable performance statistics tracking (hits, -# misses, total calls) -# - `force_refresh`: Whether to bypass the cache and refresh the value -# - `System Properties`: -# - cache type (e.g., "json" or "pickle") -# - write through (e.g., True or False) -# - exclude keys (e.g., ["password", "api_key"]) - -_SYSTEM_PROPERTIES = ["type", "write_through", "exclude_keys"] - - -def get_main_cache_dir() -> str: - """ - Get the main cache directory (git root). - - :return: The absolute path to the main cache directory. - """ - git_dir = hgit.find_git_root() - cache_dir = os.path.abspath(git_dir) - return cache_dir - - -# Create global variable for the cache directory. -if "_CACHE_DIR" not in globals(): - _LOG.trace("Creating _CACHE_DIR") - _CACHE_DIR = get_main_cache_dir() - - -def set_cache_dir(cache_dir: str) -> None: - """ - Set the cache directory. - """ - global _CACHE_DIR - hdbg.dassert_isinstance(cache_dir, str) - _CACHE_DIR = os.path.abspath(cache_dir) - hio.create_dir(_CACHE_DIR, incremental=True) - _LOG.trace("Setting _CACHE_DIR to %s", _CACHE_DIR) - - -def get_cache_dir() -> str: - """ - Get the cache directory. - """ - return _CACHE_DIR - - -# Create global variable for the cache file prefix. -if "_CACHE_FILE_PREFIX" not in globals(): - _LOG.trace("Creating _CACHE_FILE_PREFIX") - _CACHE_FILE_PREFIX = "tmp.cache_simple" - - -def set_cache_file_prefix(prefix: str) -> None: - """ - Set the cache file prefix. - - :param prefix: prefix to use for cache files - """ - global _CACHE_FILE_PREFIX - hdbg.dassert_isinstance(prefix, str) - hdbg.dassert_ne(prefix, "", "Cache file prefix cannot be empty") - if prefix.endswith("."): - _LOG.warning( - "Prefix '%s' ends with '.' - cache files will have '..' in names", - prefix, - ) - _CACHE_FILE_PREFIX = prefix - _LOG.trace("Setting _CACHE_FILE_PREFIX to %s", _CACHE_FILE_PREFIX) - - -def get_cache_file_prefix() -> str: - """ - Get the cache file prefix. - - :return: cache file prefix - """ - return _CACHE_FILE_PREFIX - - -def get_cache_property_file() -> str: - """ - Get the cache property file name. - - :return: The cache property file name. - """ - prefix = get_cache_file_prefix() - val = os.path.join(get_cache_dir(), f"{prefix}_property.pkl") - return val - - -def _get_initial_cache_property() -> _CacheType: - """ - Get the initial cache property from disk or create an empty one. - - :return: A dictionary containing cache properties. - """ - file_name_ = get_cache_property_file() - if os.path.exists(file_name_): - _LOG.trace("Loading from %s", file_name_) - # TODO(gp): Use _load_data_from_file, if possible. - with open(file_name_, "rb") as file: - val = pickle.load(file) - else: - # func_name -> key -> value properties. - val = {} - val = cast(_CacheType, val) - return val - - -# Create global variables for the cache properties. -if "_CACHE_PROPERTY" not in globals(): - _LOG.trace("Creating _CACHE_PROPERTY") - _CACHE_PROPERTY = _get_initial_cache_property() - - -def _check_valid_cache_property(property_name: str) -> None: - """ - Verify that a cache property name is valid for the given type. - - :param property_name: The property name to validate. - """ - _LOG.trace(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(property_name, str) - valid_properties = [ - # Abort if there is a cache miss. This is used to make sure everything - # is cached. - "abort_on_cache_miss", - # Report if there is a cache miss and return `_cache_miss_` instead of - # accessing the real value. - "report_on_cache_miss", - # Enable performance stats (e.g., miss, hit, tot for the cache). - "enable_perf", - # Force to refresh the value. - "force_refresh", - # TODO(gp): "force_refresh_once" - # json or pickle cache type. - "type", - # cache mode. - "mode", - ] - hdbg.dassert_in(property_name, valid_properties) - - -def _save_func_cache_data_to_file( - file_name: str, - cache_type: Optional[str], - func_cache_data: _FunctionCacheType, -) -> None: - """ - Save the function cache data to a file. - - :param file_name: The name of the file. - :param func_cache_data: The function cache data to save. - """ - # Infer cache type from file extension if not set. - if cache_type is None: - if file_name.endswith(".pkl"): - cache_type = "pickle" - else: - cache_type = "json" - hio.create_enclosing_dir(file_name, incremental=True) - _LOG.trace("Saving to '%s'", file_name) - # Save data. - if cache_type == "pickle": - with open(file_name, "wb") as file: - pickle.dump(func_cache_data, file) - elif cache_type == "json": - with open(file_name, "w", encoding="utf-8") as file: - json.dump( - func_cache_data, - file, - indent=4, - sort_keys=True, - ensure_ascii=False, - ) - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - - -def set_cache_property(func_name: str, property_name: str, val: Any) -> None: - """ - Set a property for the cache of a given function name. - - :param func_name: The name of the function whose cache property is - to be set. - :param property_name: The name of the property to set. - :param val: The value to set for the property. - """ - _LOG.trace(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_isinstance(property_name, str) - _check_valid_cache_property(property_name) - # Assign value. - cache_property = _CACHE_PROPERTY - if func_name not in cache_property: - cache_property[func_name] = {} - dict_ = cache_property[func_name] - dict_[property_name] = val - # Update values on the disk. - file_name = get_cache_property_file() - _LOG.trace("Updating %s", file_name) - # Make sure the dict is well-formed. - for func_name_tmp in cache_property: - hdbg.dassert_isinstance(func_name_tmp, str) - _LOG.trace( - "func_name_tmp='%s' -> %s", - func_name_tmp, - cache_property[func_name_tmp], - ) - hio.create_enclosing_dir(file_name, incremental=True) - _save_func_cache_data_to_file(file_name, "pickle", cache_property) - - -def get_cache_property(func_name: str, property_name: str) -> Union[bool, Any]: - """ - Get the value of a property for the cache of a given function name. - """ - _LOG.trace(hprint.func_signature_to_str()) - _check_valid_cache_property(property_name) - # Read data. - cache_property = _CACHE_PROPERTY - if property_name in _SYSTEM_PROPERTIES: - if func_name not in cache_property: - return None - value = cache_property[func_name].get(property_name) - else: - value = cache_property.get(func_name, {}).get(property_name, False) - return value - - -def reset_cache_property() -> None: - """ - Reset the cache property for the given type. - """ - file_name = get_cache_property_file() - _LOG.warning("Resetting %s", file_name) - # Empty the values. - global _CACHE_PROPERTY - cache_property = _CACHE_PROPERTY - # Empty the values excluding the system properties like `type` and - # `write_through`. - _LOG.trace("before cache_property=%s", cache_property) - # Iterate over a list of keys to avoid modifying the dictionary during iteration. - for func_name_tmp in list(cache_property.keys()): - # Only remove non-system properties from the function's property dict. - func_prop = cache_property[func_name_tmp] - for property_name_tmp in list(func_prop.keys()): - if property_name_tmp not in _SYSTEM_PROPERTIES: - del func_prop[property_name_tmp] - _LOG.trace("after cache_property=%s", cache_property) - # Update values on the disk. - _LOG.trace("Updating %s", file_name) - hio.create_enclosing_dir(file_name, incremental=True) - _save_func_cache_data_to_file(file_name, "pickle", cache_property) - - -# ############################################################################# -# Get cache. -# ############################################################################# - -# Functions to retrieve cache (both memory and disk). - - -def get_cache_func_names(type_: str) -> List[str]: - """ - Retrieve the cache function names based on the specified type. - - :param type_: The type of cache to retrieve ('all', 'mem', or - 'disk'). - :return: A list of function names corresponding to the specified - cache type. - """ - if type_ == "all": - mem_func_names = get_cache_func_names("mem") - disk_func_names = get_cache_func_names("disk") - val = sorted(set(mem_func_names + disk_func_names)) - elif type_ == "mem": - mem_func_names = sorted(list(_CACHE.keys())) - val = mem_func_names - elif type_ == "disk": - prefix = get_cache_file_prefix() - disk_func_names = glob.glob(os.path.join(get_cache_dir(), f"{prefix}.*")) - disk_func_names = [os.path.basename(cache) for cache in disk_func_names] - # Exclude the cache property file. - property_file_name = os.path.basename(get_cache_property_file()) - disk_func_names = [ - cache for cache in disk_func_names if cache != property_file_name - ] - escaped_prefix = re.escape(prefix) - pattern = rf"{escaped_prefix}\.(.*)\.(json|pkl)" - disk_func_names = [ - re.sub(pattern, r"\1", cache) for cache in disk_func_names - ] - disk_func_names = sorted(disk_func_names) - val = disk_func_names - else: - raise ValueError(f"Invalid type '{type_}'") - return val - - -def cache_property_to_str(func_name: str = "") -> str: - """ - Convert cache properties to a string representation. - - :param func_name: The name of the function whose cache properties - are to be converted. - :return: A string representation of the cache properties. E.g., - ``` - # func_name=slow_square - type: json - write_through: False - exclude_keys: [] - ``` - """ - txt: List[str] = [] - if func_name == "": - func_names = get_cache_func_names("all") - for func_name_tmp in func_names: - txt.append(cache_property_to_str(func_name_tmp)) - result = "\n".join(txt) - return result - # - txt.append(f"# func_name={func_name}") - cache_property = _CACHE_PROPERTY - _LOG.trace("cache_property=%s", cache_property) - if func_name in cache_property: - for k, v in cache_property[func_name].items(): - txt.append(f"{k}: {v}") - result = "\n".join(txt) - return result - - -# ############################################################################# -# Cache performance. -# ############################################################################# - - -# Create global variable for the cache performance. -if "_CACHE_PERF" not in globals(): - _LOG.trace("Creating _CACHE_PERF") - # func_name -> perf properties (such as tot, hits, misses). - _CACHE_PERF: Dict[str, Dict[str, int]] = {} - - -def enable_cache_perf(func_name: str) -> None: - """ - Enable cache performance statistics for a given function. - """ - _CACHE_PERF[func_name] = {"tot": 0, "hits": 0, "misses": 0} - - -def disable_cache_perf(func_name: str = "") -> None: - """ - Disable cache performance statistics for a given function. - - If `func_name` is empty, disable cache performance statistics for all - functions. - """ - if func_name == "": - for func_name_tmp in get_cache_func_names("all"): - disable_cache_perf(func_name_tmp) - return - _CACHE_PERF[func_name] = None - - -def reset_cache_perf(func_name: str = "") -> None: - """ - Reset cache performance statistics for a given function. - """ - if func_name == "": - for func_name_tmp in get_cache_func_names("all"): - reset_cache_perf(func_name_tmp) - return - _CACHE_PERF[func_name] = {"tot": 0, "hits": 0, "misses": 0} - - -def get_cache_perf(func_name: str) -> Optional[Dict[str, int]]: - """ - Get the cache performance object for a given function. - """ - if func_name in _CACHE_PERF: - return _CACHE_PERF[func_name] - return None - - -def get_cache_perf_stats(func_name: str) -> str: - """ - Get the cache performance statistics for a given function. - - :param func_name: The name of the function whose cache performance - stats are to be retrieved. - :return: A string with the cache performance statistics. E.g., - `slow_square: hits=2 misses=0 tot=2 hit_rate=1.00`. - """ - perf = get_cache_perf(func_name) - if perf is None: - _LOG.warning("No cache performance stats for '%s'", func_name) - return "" - hits = perf["hits"] - misses = perf["misses"] - tot = perf["tot"] - hit_rate = hits / tot if tot > 0 else 0 - txt = ( - f"{func_name}: hits={hits} misses={misses} tot={tot}" - f" hit_rate={hit_rate:.2f}" - ) - return txt - - -# ############################################################################# -# Disk cache. -# ############################################################################# - -# Functions to save and retrieve cache from disk. -# ``` -# { -# "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"10 + 15\", \"gpt-5-nano\"], \"kwargs\": {}}": [ -# "25", -# 3.195e-05 -# ], -# "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"2 + 3\", \"gpt-5-nano\"], \"kwargs\": {}}": [ -# "5", -# 3.195e-05 -# ] -# } -# ``` - - -def _get_cache_file_name(func_name: str) -> str: - """ - Get the cache file name for a given function. - - :param func_name: The name of the function. - :return: The cache file name with appropriate extension. - """ - _LOG.trace("func_name='%s'", func_name) - hdbg.dassert_isinstance(func_name, str) - prefix = get_cache_file_prefix() - file_name = os.path.join(get_cache_dir(), f"{prefix}.{func_name}") - cache_type = get_cache_property(func_name, "type") - _LOG.trace(hprint.to_str("cache_type")) - if cache_type == "pickle": - file_name += ".pkl" - elif cache_type == "json": - file_name += ".json" - elif cache_type is None: - # Cache type not set - try to infer from existing files. - if os.path.exists(file_name + ".pkl"): - file_name += ".pkl" - elif os.path.exists(file_name + ".json"): - file_name += ".json" - else: - # Default to json if no file exists. - file_name += ".json" - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - return file_name - - -def _save_cache_dict_to_disk( - func_name: str, func_cache_data: _FunctionCacheType -) -> None: - """ - Save a cache dictionary to the disk cache. - - :param func_name: The name of the function. - :param func_cache_data: The function cache data to save. - """ - # Get the filename for the disk cache. - file_name = _get_cache_file_name(func_name) - cache_type = get_cache_property(func_name, "type") - _LOG.trace(hprint.to_str("file_name cache_type")) - _save_func_cache_data_to_file(file_name, cache_type, func_cache_data) - - -def _load_func_cache_data_from_file( - file_name: str, cache_type: Optional[str] -) -> _FunctionCacheType: - """ - Load the function cache data from a file. - - :param file_name: The name of the file. - :param cache_type: The type of the cache. - :return: The function cache data. - """ - # Infer cache type from file extension if not set. - if cache_type is None: - if file_name.endswith(".pkl"): - cache_type = "pickle" - else: - cache_type = "json" - # Load data. - _LOG.trace("Loading from '%s'", file_name) - hdbg.dassert_file_exists(file_name) - if cache_type == "pickle": - with open(file_name, "rb") as file: - func_cache_data = pickle.load(file) - elif cache_type == "json": - with open(file_name, "r", encoding="utf-8") as file: - func_cache_data = json.load(file) - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - return func_cache_data - - -# TODO(gp): Maybe private? -def get_disk_cache(func_name: str) -> _FunctionCacheType: - """ - Retrieve the disk cache for a given function. - - :param func_name: The name of the function. - :return: A dictionary containing the cache data. - """ - file_name = _get_cache_file_name(func_name) - # If the disk cache doesn't exist, create it. - if not os.path.exists(file_name): - _LOG.trace("No cache from disk") - func_cache_data: _FunctionCacheType = {} - _save_cache_dict_to_disk(func_name, func_cache_data) - # Load data. - cache_type = get_cache_property(func_name, "type") - _LOG.trace(hprint.to_str("cache_type")) - func_cache_data = _load_func_cache_data_from_file(file_name, cache_type) - return func_cache_data - - -# ############################################################################# -# Stats. -# ############################################################################# - - -def cache_stats_to_str( - func_name: str = "", -) -> Optional["pd.DataFrame"]: # noqa: F821 - """ - Print the cache stats for a function or for all functions. - - E.g., - ``` - find_email: - memory: - - disk: 1044 - - verify_email: - memory: - - disk: 2322 - ``` - """ - # We want to limit the dependency from pandas in the cache. - import pandas as pd - - if func_name == "": - result = [] - for func_name in get_cache_func_names("all"): - result_tmp = cache_stats_to_str(func_name) - result.append(result_tmp) - if result: - result = pd.concat(result) - else: - result = None - return result - result = {} - # Memory cache. - if func_name in _CACHE: - result["memory"] = len(_CACHE[func_name]) - else: - result["memory"] = "-" - # Disk cache. - file_name = _get_cache_file_name(func_name) - if os.path.exists(file_name): - disk_cache = get_disk_cache(func_name) - result["disk"] = len(disk_cache) - else: - result["disk"] = "-" - result = pd.Series(result).to_frame().T - result.index = [func_name] - return result - - -def force_cache_from_disk(func_name: str = "") -> None: - """ - Force loading the cache from disk and update the memory cache. - - :param func_name: The name of the function. If empty, apply to all - cached functions. - """ - if func_name == "": - _LOG.info("Before:\n%s", cache_stats_to_str()) - for func_name_tmp in get_cache_func_names("all"): - force_cache_from_disk(func_name_tmp) - _LOG.info("After:\n%s", cache_stats_to_str()) - return - _LOG.trace("func_name='%s'", func_name) - # Get disk cache. - disk_cache = get_disk_cache(func_name) - _LOG.trace("disk_cache=%s", len(disk_cache)) - # Update the memory cache. - global _CACHE - _CACHE[func_name] = disk_cache - - -def get_mem_cache(func_name: str) -> _CacheType: - """ - Retrieve the memory cache for a given function. - - :param func_name: The name of the function. - :return: A dictionary containing the memory cache data. - """ - mem_cache = _CACHE.get(func_name, {}) - return mem_cache - - -def flush_cache_to_disk(func_name: str = "") -> None: - """ - Flush the memory cache to disk and update the memory cache. - - :param func_name: The name of the function. If empty, apply to all - cached functions. - """ - if func_name == "": - _LOG.info("Before:\n%s", cache_stats_to_str()) - for func_name_tmp in get_cache_func_names("all"): - flush_cache_to_disk(func_name_tmp) - _LOG.info("After:\n%s", cache_stats_to_str()) - return - _LOG.trace("func_name='%s'", func_name) - # Get memory cache. - mem_cache = get_mem_cache(func_name) - _LOG.trace("mem_cache=%s", len(mem_cache)) - # Get disk cache. - disk_cache = get_disk_cache(func_name) - _LOG.trace("disk_cache=%s", len(disk_cache)) - # Merge disk cache with memory cache. - disk_cache.update(mem_cache) - # Save merged cache to disk. - _save_cache_dict_to_disk(func_name, disk_cache) - # Update the memory cache. - global _CACHE - _CACHE[func_name] = disk_cache - - -def get_cache(func_name: str) -> _CacheType: - """ - Retrieve the cache for a given function name. - - :param func_name: The name of the function whose cache is to be - retrieved. - :return: A dictionary containing the cache data. - """ - global _CACHE - if func_name in _CACHE: - _LOG.trace("Loading mem cache for '%s'", func_name) - cache = get_mem_cache(func_name) - else: - _LOG.trace("Loading disk cache for '%s'", func_name) - func_cache_data = get_disk_cache(func_name) - _CACHE[func_name] = func_cache_data - cache = func_cache_data - return cache - - -# ############################################################################# -# Reset cache. -# ############################################################################# - -# Functions to reset cache (both memory and disk). - - -def reset_mem_cache(func_name: str = "") -> None: - """ - Reset the memory cache for a given function. - - :param func_name: The name of the function. If empty, reset all - memory caches. - """ - _LOG.trace(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(func_name, str) - if func_name == "": - _LOG.trace("Before resetting memory cache:\n%s", cache_stats_to_str()) - for func_name_tmp in get_cache_func_names("all"): - reset_mem_cache(func_name=func_name_tmp) - _LOG.trace("After:\n%s", cache_stats_to_str()) - return - _CACHE[func_name] = {} - del _CACHE[func_name] - - -def reset_disk_cache(func_name: str = "", interactive: bool = True) -> None: - """ - Reset the disk cache for a given function name. - - If `func_name` is empty, reset all disk cache files. - :param func_name: The name of the function whose disk cache is to - be reset. If empty, reset all disk cache files. - :param interactive: If True, prompt the user for confirmation before - resetting the disk cache. - """ - _LOG.trace(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_isinstance(interactive, bool) - if interactive: - hsystem.query_yes_no( - f"Are you sure you want to reset the disk cache for func_name={func_name}?" - ) - if func_name == "": - _LOG.trace("Before resetting disk cache:\n%s", cache_stats_to_str()) - prefix = get_cache_file_prefix() - cache_files = glob.glob(os.path.join(get_cache_dir(), f"{prefix}.*")) - _LOG.warning("Resetting disk cache") - for file_name in cache_files: - if os.path.isfile(file_name): - os.remove(file_name) - _LOG.trace("After:\n%s", cache_stats_to_str()) - return - # - file_name = _get_cache_file_name(func_name) - if os.path.exists(file_name): - _LOG.warning("Removing cache file '%s'", file_name) - os.remove(file_name) - - -def reset_cache(func_name: str = "", interactive: bool = True) -> None: - """ - Reset both memory and disk cache for a given function. - - :param func_name: The name of the function. If empty, reset all - caches. - :param interactive: If True, prompt the user for confirmation before - resetting the disk cache. - """ - _LOG.trace(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_isinstance(interactive, bool) - reset_mem_cache(func_name=func_name) - reset_disk_cache(func_name=func_name, interactive=interactive) - - -# ############################################################################# -# Mock / unit test cache. -# ############################################################################# - - -def _get_cache_key(args: Any, kwargs: Any) -> str: - cache_key = json.dumps( - {"args": args, "kwargs": kwargs}, - sort_keys=True, - default=str, - ) - _LOG.trace("cache_key=%s", cache_key) - return cache_key - - -def mock_cache(func_name: str, cache_key: str, value: Any) -> None: - """ - Mock the function cache for a given function and cache key. - - :param func_name: The name of the function. - :param cache_key: The cache key. - :param value: The value to store in the cache. - """ - # We should not use the main cache directory for mocking. - hdbg.dassert_ne( - get_cache_dir(), - get_main_cache_dir(), - msg="Do not use the main cache directory for mocking", - ) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_ne(func_name, "", "Function name is empty") - hdbg.dassert_isinstance(cache_key, str) - hdbg.dassert_ne(cache_key, "", "Cache key is empty") - # Get the function cache. - func_cache_data = get_cache(func_name) - # Update the function cache. - func_cache_data[cache_key] = value - - -def mock_cache_from_args_kwargs( - func_name: str, args: Any, kwargs: Any, value: Any -) -> None: - """ - Mock the function cache for a given function and args/kwargs. - - E.g., when testing a cached expensive function (e.g., an LLM call or - downloading data) we can mock the cache to return a fixed value, - instead of calling the function. - - :param func_name: The name of the function. - :param args: The arguments for the function. - :param kwargs: The keyword arguments for the function. - :param value: The value to store in the cache. - """ - hdbg.dassert_isinstance(args, tuple, "args is not a tuple: %s", args) - hdbg.dassert_isinstance(kwargs, dict, "kwargs is not a dict: %s", kwargs) - # Get the cache key. - cache_key = _get_cache_key(args, kwargs) - # Mock the cache. - mock_cache(func_name, cache_key, value) - - -def mock_cache_from_disk( - func_name: str, func_cache_data: _FunctionCacheType -) -> None: - """ - Mock the function cache from disk data. - - :param func_name: The name of the function. - :param cache_data: The cache data to mock. - """ - hdbg.dassert_isinstance(func_name, str) - sanity_check_function_cache(func_cache_data, assert_on_empty=True) - for cache_key, cached_value in func_cache_data.items(): - mock_cache(func_name, cache_key, cached_value) - - -# ############################################################################# -# Decorator -# ############################################################################# - -# - Decorated functions accept special keyword arguments to control caching -# behavior: -# - `force_refresh=True`: Bypass cache and recompute the result -# - `abort_on_cache_miss=True`: Raise an exception if cache miss occurs -# - `report_on_cache_miss=True`: Return "_cache_miss_" instead of computing on -# cache miss -# - `cache_mode`: Alternative way to control caching with predefined modes: -# - `"REFRESH_CACHE"`: Force cache refresh (same as `force_refresh=True`) -# - `"HIT_CACHE_OR_ABORT"`: Abort on cache miss (same as -# `abort_on_cache_miss=True`) -# - `"DISABLE_CACHE"`: Completely disable caching for this call - - -# TODO(gp): Not sure that cache_mode is worth having the duplication. -def simple_cache( - *, - cache_type: str = "json", - write_through: bool = True, - exclude_keys: Optional[List[str]] = None, -) -> Callable[..., Any]: - """ - Decorate a function to cache its results. - - The cache is stored in memory and on disk. - :param cache_type: The type of cache to use ('json' or 'pickle'). - :param write_through: If True, the cache is written to disk after - each access. - :param exclude_keys: A list of keys to exclude from the cache key. - :return: A decorator that can be applied to a function. - """ - - def decorator(func: Callable[..., Any]) -> Callable[..., Any]: - """ - Decorate a function to cache its results. - """ - hdbg.dassert_in(cache_type, ("json", "pickle")) - func_name = getattr(func, "__name__", "unknown_function") - if func_name.endswith("_intrinsic"): - func_name = func_name[: -len("_intrinsic")] - # Only set cache type if not already set (preserve existing setting). - existing_type = get_cache_property(func_name, "type") - if not existing_type: - set_cache_property(func_name, "type", cache_type) - # Handle mutable default argument. - exclude_keys_list: List[str] = ( - exclude_keys if exclude_keys is not None else [] - ) - - @functools.wraps(func) - def wrapper( - *args: Any, - force_refresh: bool = False, - abort_on_cache_miss: bool = False, - report_on_cache_miss: bool = False, - **kwargs: Any, - ) -> Any: - """ - Cache the results of the decorated function. - - :param args: Positional arguments for the function. - :param force_refresh: If True, the cache is refreshed - regardless of whether the key exists in the cache. - :param abort_on_cache_miss: If True, an exception is raised - if the key is not found in the cache. - :param report_on_cache_miss: If True, a message is logged if - the key is not found in the cache, and the function - returns "_cache_miss_" instead of accessing the real - value. - :param kwargs: Keyword arguments for the function. - :return: The cached value or the result of the function. - """ - # Get the function name. - func_name = getattr(func, "__name__", "unknown_function") - if func_name.endswith("_intrinsic"): - func_name = func_name[: -len("_intrinsic")] - # Get the cache. - cache = get_cache(func_name) - # Remove keys that should not be part of the cache key. - # Also exclude cache_mode since it's a control parameter. - excluded_keys = set(exclude_keys_list) | {"cache_mode"} - kwargs_for_cache_key = { - k: v for k, v in kwargs.items() if k not in excluded_keys - } - # Prepare kwargs for the actual function call. - # Keep cache_mode since the wrapped function may need it in its signature. - kwargs_for_func = kwargs.copy() - # Resolve effective cache_mode: explicit kwarg wins, otherwise - # fall back to the process-wide global (set via - # `set_global_cache_mode`). Do NOT inject into kwargs_for_func, as - # the wrapped function may not accept a `cache_mode` parameter. - if "cache_mode" in kwargs: - cache_mode = kwargs.get("cache_mode") - else: - cache_mode = _GLOBAL_CACHE_MODE - # `cache_mode` is a special keyword argument to control caching - # behavior. - if cache_mode is not None: - _LOG.trace("cache_mode=%s", cache_mode) - if cache_mode == "REFRESH_CACHE": - # Force to refresh the cache. - _LOG.trace("Forcing cache refresh") - force_refresh = True - if cache_mode == "HIT_CACHE_OR_ABORT": - # Abort if the cache is not hit. - _LOG.trace("Abort on cache miss") - abort_on_cache_miss = True - if cache_mode == "DISABLE_CACHE": - # Disable the cache. - _LOG.trace("Disabling cache") - if _CACHE_DEBUG: - _LOG.warning( - "cache[%s]: COMPUTE (cache disabled by cache_mode=DISABLE_CACHE)", - func_name, - ) - value = func(*args, **kwargs_for_func) - return value - # Get the key. - cache_key = _get_cache_key(args, kwargs_for_cache_key) - # Get the cache properties. - cache_perf = get_cache_perf(func_name) - _LOG.trace("cache_perf is None=%s", cache_perf is None) - # Update the performance stats. - if cache_perf: - hdbg.dassert_in("tot", cache_perf) - cache_perf["tot"] += 1 - # Handle a forced refresh. - force_refresh = force_refresh or get_cache_property( - func_name, "force_refresh" - ) - _LOG.trace("force_refresh=%s", force_refresh) - if cache_key in cache and not force_refresh: - _LOG.trace("Cache hit for key='%s'", cache_key) - if _CACHE_DEBUG: - _LOG.warning("cache[%s]: HIT", func_name) - # Update the performance stats. - if cache_perf: - cache_perf["hits"] += 1 - # Retrieve the value from the cache. - value = cache[cache_key] - else: - _LOG.trace("Cache miss for key='%s'", cache_key) - # Update the performance stats. - if cache_perf: - cache_perf["misses"] += 1 - # Abort on cache miss. - abort_on_cache_miss = abort_on_cache_miss or get_cache_property( - func_name, "abort_on_cache_miss" - ) - _LOG.trace("abort_on_cache_miss=%s", abort_on_cache_miss) - if abort_on_cache_miss: - raise ValueError(f"Cache miss for key='{cache_key}'") - # Report on cache miss. - report_on_cache_miss = ( - report_on_cache_miss - or get_cache_property(func_name, "report_on_cache_miss") - ) - _LOG.trace("report_on_cache_miss=%s", report_on_cache_miss) - if report_on_cache_miss: - _LOG.trace("Cache miss for key='%s'", cache_key) - return "_cache_miss_" - if _CACHE_DEBUG: - if force_refresh: - _LOG.warning( - "cache[%s]: RECOMPUTE (cache_mode=REFRESH_CACHE)", - func_name, - ) - else: - _LOG.warning("cache[%s]: COMPUTE (miss)", func_name) - # Access the intrinsic function. - value = func(*args, **kwargs_for_func) - # Update cache. - cache[cache_key] = value - _LOG.trace( - "Updating cache with key='%s' value='%s'", cache_key, value - ) - if write_through: - _LOG.trace("Writing through to disk") - flush_cache_to_disk(func_name) - return value - - return wrapper - - return decorator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py deleted file mode 100644 index e2f54a02c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -Import as: - -import helpers.hcfile as hcfile -""" - -import logging -import re -from typing import List, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hio as hio - -_LOG = logging.getLogger(__name__) - - -def parse_cfile(cfile: str) -> List[Tuple[str, str, str]]: - """ - Read and parse a cfile. - - :param cfile: path to the cfile - :return: list of tuples, each containing a line number and a transform, e.g., - [(file_name, line_number, transform), ...] - """ - # Read the cfile. - cfile_lines = hio.from_file(cfile) - cfile_lines = cfile_lines.split("\n") - # - ret = [] - # Parse the cfile. - for line in cfile_lines: - _LOG.debug("line=%s", line) - hdbg.dassert_isinstance(line, str) - # Parse the lines of the cfile, like - # ``` - # dev_scripts_helpers/llms/llm_prompts.py:106: in public function `test`:D404: ... - # dev_scripts_helpers/llms/llm_prompts.py:110: error: Need type annotation for ... - # dev_scripts_helpers/llms/llm_transform.py:63:33: F821 undefined name '_extract_bullet_points' [flake8] - # ``` - # extracting the file name, line number, and transform. - regex = r"^([^:]+):(\d+):(.*)$" - match = re.match(regex, line) - if match is None: - _LOG.debug("Failed to parse line '%s'", line) - continue - # Extract the file name, line number, and transform. - file_name = match.group(1) - line_number = match.group(2) - transform = match.group(3) - # Add values to the list. - ret.append((file_name, line_number, transform)) - return ret - - -# ############################################################################# - - -def inject_todos_from_cfile( - cfile_txt: str, todo_user: str, comment_prefix: str -) -> None: - """ - Inject the TODOs from a cfile in the corresponding files. - - Given a cfile with the following content: - the function will inject the TODO in the corresponding file and line - - :param cfile_txt: The content of the cfile. - :param todo_user: The user to use in the TODO. - :param comment_prefix: The prefix to use for the comment (e.g., "#") - """ - # For each file, store - # - the current file content - # - the offset (i.e., how many lines we inserted in the file so far, so - # we can inject the TODO at the correct line number) - # - the index of the last line modified to make sure the TODOs are for - # increasing line numbers. - file_content = {} - for todo_line in cfile_txt.split("\n"): - _LOG.debug("\n%s", hprint.frame(f"todo line='{todo_line}'")) - if todo_line.strip() == "": - continue - # dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py:101: The logic for extracting required status checks and pull request reviews is repeated. Consider creating a helper function to handle this extraction to reduce redundancy. - m = re.match(r"^\s*(\S+):(\d+):\s*(.*)$", todo_line) - if not m: - _LOG.warning("Can't parse line='%s': skipping", todo_line) - continue - file_name, todo_line_number, todo = m.groups() - todo_line_number = int(todo_line_number) - _LOG.debug(hprint.to_str("file_name todo_line_number todo")) - # Update the state if needed. - if file_name not in file_content: - _LOG.debug("Reading %s", file_name) - hdbg.dassert_path_exists(file_name) - txt = hio.from_file(file_name).split("\n") - offset = 0 - last_line_modified = 0 - file_content[file_name] = (txt, offset, last_line_modified) - # Extract the info for the file to process. - txt, offset, last_line_modified = file_content[file_name] - _LOG.debug(hprint.to_str("offset last_line_modified")) - hdbg.dassert_lt( - last_line_modified, - todo_line_number, - "The TODOs don't look like they are increasing line numbers: " - "TODO at line %d is before the last line modified %d", - todo_line_number, - last_line_modified, - ) - # We subtract 1 from the line number since TODOs count from 1, while - # Python arrays count from 0. - act_line_number = todo_line_number - 1 + offset - hdbg.dassert_lte(0, act_line_number) - hdbg.dassert_lt(act_line_number, len(txt)) - insert_line = txt[act_line_number] - _LOG.debug(hprint.to_str("act_line_number insert_line")) - # Extract how many spaces there are at place where the line to insert - # the TODO. - m = re.match(r"^(\s*)\S", insert_line) - hdbg.dassert(m, "Can't parse insert_line='%s'", insert_line) - spaces = len(m.group(1)) * " " # type: ignore[union-attr] - # Build the new line to insert. - new_line = spaces + f"{comment_prefix} TODO({todo_user}): {todo}" - _LOG.debug(hprint.to_str("new_line")) - # Insert the new line in txt at the correct position. - txt = txt[:act_line_number] + [new_line] + txt[act_line_number:] - # Update the state. - offset += 1 - file_content[file_name] = (txt, offset, todo_line_number) - # Write updated files back. - for file_name, (txt, offset, last_line_modified) in file_content.items(): - _ = last_line_modified - _LOG.info("Writing %d lines in %s", offset, file_name) - txt = "\n".join(txt) - hio.to_file(file_name, txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py deleted file mode 100644 index 675ba557d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py +++ /dev/null @@ -1,549 +0,0 @@ -""" -Import as: - -import helpers.hchatgpt as hchatgp -""" - -import logging -import math -import os -import sys -import time -from typing import Dict, List, Optional - -import helpers.hdbg as hdbg -import helpers.hio as hio - -# import helpers.henv as henv -# henv.install_module_if_not_present("openai") -import openai # noqa: E402 - -_LOG = logging.getLogger(__name__) - -# Setting API as env var in your terminal is the correct approach. -# NEVER upload any OpenAI API key to GitHub, OpenAI will revoke it. - -client = openai.OpenAI() - -# The OpenAI File ID cache will be saved as `prefix_to_root/gpt_id.json` -# Only files under the given root directory may be uploaded to OpenAI. -prefix_to_root = os.path.join(os.path.dirname(__file__), "..") - -# ############################################################################# -# Create/update/delete Assistant. -# ############################################################################# - - -def create_assistant( - assistant_name: str, - instructions: str, - *, - model: str = "gpt-3.5-turbo-1106", - use_retrieval: bool = True, - use_code_interpreter: bool = True, - use_function: Optional[Dict] = None, -) -> str: - """ - Create an OpenAI Assistant for your OpenAI Organization. All configs can - still be updated after creation. - - This method should only be used when a new Assistant is needed. - Otherwise, use the Assistant name to retrieve an existing Assistant. - - :param assistant_name: name of the Assistant to be created - :param instructions: instruction string that describes the expected - behavior of assistant - :param model: GPT model used by the assistant - :param use_retrieval: enable the retrieval tool from OpenAI - :param use_code_interpreter: enable the code interpreter tool from - OpenAI - :param use_function: enable the function tool from OpenAI (To be - implemented) - """ - # Create the assistant - tools = [] - if use_retrieval: - tools.append({"type": "retrieval"}) - if use_code_interpreter: - tools.append({"type": "code_interpreter"}) - if use_function: - tools.append(use_function) - if not model: - model = "gpt-3.5-turbo-1106" - assistant = client.beta.assistants.create( - instructions=instructions, - name=assistant_name, - model=model, - tools=tools, - ) - return assistant.id - - -def update_assistant_by_id( - assistant_id: str, - *, - instructions: str = "", - name: str = "", - tools: Optional[List[Dict[str, str]]] = None, - model: str = "", - file_ids: Optional[List[str]] = None, -) -> str: - """ - Update an existing OpenAI Assistant in our OpenAI Organization. - - :param assistant_id: Assistant to be updated - :param instructions: instruction string that describes the expected - behavior of assistant - :param name: change the name of assistant, no change when empty - :param tools: change the tools of assistant, no change when empty - :param model: change the model of assistant, no change when empty - :param file_ids: change the files linked to assistant, no change - when empty - """ - if tools is None: - tools = [] - if file_ids is None: - file_ids = [] - update_config = { - "instructions": instructions, - "name": name, - "tools": tools, - "model": model, - "file_ids": file_ids, - } - not_empty_params = {k: v for k, v in update_config.items() if v} - updated_assistant = client.beta.assistants.update( - assistant_id, **not_empty_params - ) - return updated_assistant.id - - -def delete_assistant_by_id(assistant_id: str) -> None: - """ - Delete an Assistant from our OpenAI Organization. - """ - client.beta.assistants.delete(assistant_id) - - -def get_all_assistants() -> List[openai.types.beta.assistant.Assistant]: - """ - Get all available Assistant objects in our OpenAI Organization. - """ - list_assistants_response = client.beta.assistants.list( - order="desc", - limit="100", - ) - assistants = list_assistants_response.data - return assistants - - -def get_all_assistant_names() -> List[str]: - """ - Get all available Assistant names in our OpenAI Organization. - """ - assistants = get_all_assistants() - return [assistant.name for assistant in assistants] - - -def get_assistant_id_by_name(assistant_name) -> str: - """ - Get the id of an Assistant by its name. - """ - assistant = None - assistants = get_all_assistants() - for cur_assistant in assistants: - if cur_assistant.name == assistant_name: - assistant = cur_assistant - break - hdbg.dassert_is_not( - assistant, None, f"Assistant '{assistant_name}' not found" - ) - assert assistant is not None - return assistant.id - - -# ############################################################################# -# Create directory structure storing gpt file ids -# ############################################################################# - - -def _path_to_dict(path: str) -> Dict: - """ - Generate a dictionary of all files under a given folder. - """ - for root, dirs, files in os.walk(path): - tree = {d: _path_to_dict(os.path.join(root, d)) for d in dirs} - tree.update({f: {"name": f} for f in files}) - return tree - return {} - - -# TODO(Henry): We use fileIO here to store the directory structure, which may -# not be thread-safe. Should change to use DAO if we have any. -def _dump_gpt_ids(dictionary: Dict) -> None: - """ - Dump a given OpenAI File ID dictionary into a cache file for furture use. - """ - file_path = os.path.join(prefix_to_root, "gpt_id.json") - hio.to_json(file_path, dictionary) - return - - -def _load_gpt_ids() -> Dict: - """ - Load the OpenAI File ID dictionary from the cache file. - """ - file_path = os.path.join(prefix_to_root, "gpt_id.json") - if os.path.exists(file_path) and os.path.isfile(file_path): - return hio.from_json(file_path) - else: - directory_dict = _path_to_dict(prefix_to_root) - _dump_gpt_ids(directory_dict) - return directory_dict - - -# ############################################################################# -# Upload file to OpenAI account -# ############################################################################# - - -def _upload_to_gpt_no_set_id(path_from_root: str) -> str: - """ - Upload a file to OpenAI. - - This method will NOT set File ID to cache. - """ - _LOG.info("Uploading file %s to chatgpt", path_from_root) - upload_file_response = client.files.create( - # Must use 'rb' regardless of file type. - file=open(os.path.join(prefix_to_root, path_from_root), "rb"), - purpose="assistants", - ) - gpt_id = upload_file_response.id - return gpt_id - - -def _get_gpt_id_file(dictionary: Dict, path_from_root: str) -> Dict[str, str]: - """ - Get the OpenAI File ID for a given file using a specific cache. - - If this file has not been uploaded to OpenAI, this method will - upload it and generate its OpenAI File ID. - """ - cur = dictionary - path_list = path_from_root.split("/") - for level in path_list: - cur = cur[level] - if "gpt_id" not in cur: - cur["gpt_id"] = _upload_to_gpt_no_set_id(path_from_root) - _dump_gpt_ids(dictionary) - return cur - - -def _set_gpt_id(path_from_root: str, gpt_id: str) -> None: - """ - Manually set the cached OpenAI File ID of a given file. - - This method should ONLY be called if a file manually uploaded to - OpenAI. It will NOT upload the given file to OpenAI. - """ - gpt_id_dict = _load_gpt_ids() - item = _get_gpt_id_file(gpt_id_dict, path_from_root) - item["gpt_id"] = gpt_id - _dump_gpt_ids(gpt_id_dict) - - -def _remove_gpt_id(path_from_root: str): - """ - Remove the cached ID of a given file. - - It does NOT fully remove a file from OpenAI. Use `remove_from_gpt` - to fully remove a file. - """ - gpt_id_dict = _load_gpt_ids() - item = _get_gpt_id_file(gpt_id_dict, path_from_root) - if "gpt_id" in item: - del item["gpt_id"] - _dump_gpt_ids(gpt_id_dict) - - -def get_gpt_id(path_from_root: str) -> str: - """ - Get the OpenAI File ID from cache for a given file. - - If this file has not been uploaded to OpenAI, this method will - upload it and generate its OpenAI File ID. - """ - gpt_id_dict = _load_gpt_ids() - return _get_gpt_id_file(gpt_id_dict, path_from_root)["gpt_id"] - - -def upload_to_gpt(path_from_root: str) -> str: - """ - Upload a file to OpenAI and set its File ID to cache. - """ - gpt_id = _upload_to_gpt_no_set_id(path_from_root) - _set_gpt_id(path_from_root, gpt_id) - return gpt_id - - -def remove_from_gpt(path_from_root: str) -> None: - """ - Fully remove a file from OpenAI. - - This method will first delete the file from OpenAI account, then - remove its OpenAI File ID from the cache. - """ - gpt_id = get_gpt_id(path_from_root) - client.files.delete(gpt_id) - _remove_gpt_id(path_from_root) - - -def get_gpt_file_from_id(gpt_id: str) -> openai.types.file_object.FileObject: - """ - Get a OpenAI File Object using its OpenAI File ID. - """ - return client.files.retrieve(gpt_id) - - -def get_gpt_file_from_path( - path_from_root: str, -) -> openai.types.file_object.FileObject: - """ - Get a OpenAI File Object using its file path. - """ - gpt_id = get_gpt_id(path_from_root) - return get_gpt_file_from_id(gpt_id) - - -# ############################################################################# -# Add/Remove files for an assistant -# ############################################################################# - -# Note that files for Assistant means files constantly used by this assistant -# (like guidelines). For one-time used files, add them to a message instead. -# One Assistant can have up to 20 files linked to it. - - -def set_assistant_files_by_name( - assistant_name: str, file_path_list: List[str] -) -> str: - """ - Use the given file list to overwrite the file list linked to an assistant. - """ - assistant_id = get_assistant_id_by_name(assistant_name) - file_ids = [get_gpt_id(path) for path in file_path_list] - return update_assistant_by_id(assistant_id, file_ids=file_ids) - - -def add_files_to_assistant_by_name( - assistant_name: str, file_path_list: List[str] -) -> str: - """ - Link all given files to an assistant. - - An Assistant can hold only 20 files, the oldest files will be - unlinked automatically. - """ - assistant_id = get_assistant_id_by_name(assistant_name) - assistant_files = client.beta.assistants.files.list( - assistant_id=assistant_id - ).data - existing_file_ids = [file.id for file in assistant_files] - new_file_ids = [get_gpt_id(path) for path in file_path_list] - file_ids = list(set(existing_file_ids + new_file_ids)) - file_ids = file_ids[-20:] - return update_assistant_by_id(assistant_id, file_ids=file_ids) - - -def delete_file_from_assistant_by_id(assistant_id: str, file_id: str) -> None: - """ - Unlink a file from an Assistant using Assistant id and file id. - - This method does NOT remove the file from OpenAI account. - """ - client.beta.assistants.files.delete( - assistant_id=assistant_id, file_id=file_id - ) - - -def delete_file_from_assistant_by_name( - assistant_name: str, file_path: str -) -> None: - """ - Unlink a file from an Assistant using Assistant name and file path. - - This method does NOT remove the file from OpenAI account. - """ - gpt_id = get_gpt_id(file_path) - assistant_id = get_assistant_id_by_name(assistant_name) - delete_file_from_assistant_by_id(assistant_id, gpt_id) - - -# ############################################################################# -# Create Thread and Message from user input -# ############################################################################# - - -def create_thread() -> str: - message_thread = client.beta.threads.create() - return message_thread.id - - -def create_message_on_thread( - thread_id: str, content: str, file_ids: List[str] -) -> str: - """ - Create a message on a thread, then link files to the message using file id. - - Files linked to a message can only be used by ChatGPT in the thread - that holds this message. - """ - if not content: - _LOG.error( - "Message content must not be empty. This will cause an OpenAI error." - ) - if file_ids: - message = client.beta.threads.messages.create( - thread_id=thread_id, - role="user", - content=content, - file_ids=file_ids, - ) - else: - message = client.beta.threads.messages.create( - thread_id=thread_id, - role="user", - content=content, - ) - return message.id - - -def create_message_on_thread_with_file_names( - thread_id: str, content: str, file_names: List[str] -) -> str: - """ - Create a message on a thread, then link files to the message using file - name. - - Files linked to a message can only be used by ChatGPT in the thread - that holds this message. - """ - if file_names: - file_ids = [get_gpt_id(file) for file in file_names] - else: - file_ids = [] - return create_message_on_thread(thread_id, content, file_ids) - - -# ############################################################################# -# Run thread on certain assistant -# ############################################################################# - - -def run_thread_on_assistant(assistant_id, thread_id, model: str = "") -> str: - """ - Run a thread on a given Assistant id. - - This is similar to sending a message to ChatGPT. - """ - if model: - run = client.beta.threads.runs.create( - thread_id=thread_id, assistant_id=assistant_id, model=model - ) - else: - run = client.beta.threads.runs.create( - thread_id=thread_id, assistant_id=assistant_id - ) - return run.id - - -def run_thread_on_assistant_by_name( - assistant_name: str, thread_id: str, model: str = "" -) -> str: - """ - Run a thread on a given Assistant name. - - This is similar to sending a message to ChatGPT. - """ - assistant_id = get_assistant_id_by_name(assistant_name) - if model: - return run_thread_on_assistant(assistant_id, thread_id, model) - else: - return run_thread_on_assistant(assistant_id, thread_id) - - -def wait_for_run_result(thread_id: str, run_id: str, timeout: int = 180) -> List: - """ - Wait for the thread to be processed. - - This is similar to waiting for ChatGPT's typing. - """ - finished = False - _LOG.info("Waiting for chatgpt response...") - for i in range(math.ceil(timeout / 5)): - _LOG.info("%s/%s seconds before timeout", i * 5, timeout) - time.sleep(5) - run = client.beta.threads.runs.retrieve( - thread_id=thread_id, run_id=run_id - ) - finished = run.status == "completed" - if finished: - break - if not finished: - raise TimeoutError("Failed to retrieve response from OpenAI.") - messages = client.beta.threads.messages.list(thread_id=thread_id).data - return messages - - -# ############################################################################# -# ChatGPT runner -# ############################################################################# - - -def e2e_assistant_runner( - assistant_name: str, - user_input: str = "", - *, - model: str = "", - input_file_names: Optional[List[str]] = None, - output_file_path: str = "", - vim_mode: bool = False, -) -> str: - """ - Send a message with files to an Assistant and wait for its reply. - - :param assistant_name: Assistant that should process this message - :param user_input: message to be sent to ChatGPT assistant - :param model: change the GPT model used by the assistant, no change - when empty this WILL update the configuration of the assistant - :param input_file_names: files to be used in this conversation - :param output_file_path: redirect ChatGPT's output to the given file - :param vim_mode: if True, take input from stdin and output to stdout - forcely - """ - if input_file_names is None: - input_file_names = [] - if not assistant_name: - _LOG.error("No Assistant name provided.") - return "" - if vim_mode: - user_input = "".join(sys.stdin.readlines()) - thread_id = create_thread() - create_message_on_thread_with_file_names( - thread_id, user_input, input_file_names - ) - if model: - run_id = run_thread_on_assistant_by_name( - assistant_name, thread_id, model - ) - else: - run_id = run_thread_on_assistant_by_name(assistant_name, thread_id) - messages = wait_for_run_result(thread_id, run_id) - output = messages[0].content[0].text.value - if vim_mode or not output_file_path: - sys.stdout.write(output) - if output_file_path: - with open(output_file_path, "w", encoding="utf-8") as fp: - fp.write(output) - return output diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py deleted file mode 100644 index 18ce63d7d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -Import as: - -import helpers.hchatgpt_instructions as hchainst -""" - -instructions = { - "MarkdownLinter": """ -You are a markdown linter. -If you are given a piece of text under markdown format, treat these text as the -content of the markdown content you need to lint. -If you are given a filename, you should find the file in your linked files, use -it as the markdown content you need to lint. -After get the markdown content, find and fix grammatical errors in that content -with the minimum amount of changes possible and preserve the formatting. -You don't need to add periods at the end of each sentence. -You should not add ```markdown ``` around the output content. -Your only output message should be the linted result of that file, no additional -explanations should be added in your output. - """, - "DocWriter": """ -You are a documentation writer. -If you are given several python code files, try to understand these files and -how they may work. -You should write a markdown document about these files for users that have not -read the codes to know the basic workflow of them, your can use examples to show -the user how they can easily use those codes. -For the format of markdown document, you can use files linked to you as -reference. You don't need to strictly follow the format, the goal is to make the -document easy to understand - """, -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py deleted file mode 100644 index 2fd175bf4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Import as: - -import helpers.hcoverage as hcovera -""" - -import glob -import logging -import os -import pathlib -import site -import subprocess -import sysconfig - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def _detect_site_packages() -> pathlib.Path: - """ - Return the Path to the site-packages directory for the active interpreter. - - - Try sysconfig first - - Fall back to site.getsitepackages() or user-site. - """ - try: - purelib = sysconfig.get_path("purelib") - if purelib: - return pathlib.Path(purelib) - except (KeyError, IOError): - _LOG.debug( - "sysconfig.get_path('purelib') failed, falling back to site packages" - ) - try: - sp_dirs = site.getsitepackages() - except AttributeError: - sp_dirs = [] - for d in sp_dirs: - if "site-packages" in d: - return pathlib.Path(d) - return pathlib.Path(site.getusersitepackages()) - - -def inject(coveragerc: str = ".coveragerc") -> None: - """ - Install the coverage startup hook into this env site-packages. - """ - rc = pathlib.Path(coveragerc).resolve() - os.environ["COVERAGE_PROCESS_START"] = str(rc) - _LOG.debug("Set COVERAGE_PROCESS_START to %s", rc) - sp = _detect_site_packages() - target = sp / "coverage.pth" - hook_line = "import coverage; coverage.process_startup()" - cmd = f'echo "{hook_line}" | sudo tee "{target}" > /dev/null' - try: - hsystem.system(cmd) - _LOG.debug("Installed coverage hook to %s via sudo tee", target) - except (OSError, subprocess.SubprocessError) as e: - hdbg.dassert(False, f"Failed to install coverage hook via sudo tee: {e}") - - -def remove() -> None: - """ - Remove the coverage startup hook from this env site-packages. - """ - sp = _detect_site_packages() - target = sp / "coverage.pth" - if target.is_file(): - cmd = f'sudo rm -f "{target}"' - try: - hsystem.system(cmd) - _LOG.info("Removed coverage hook from %s via sudo rm", target) - except Exception as e: - _LOG.error("Failed to remove coverage hook via sudo rm: %s", e) - raise - else: - # TODO(Maddy): Is this acceptable? - _LOG.warning("No coverage.pth found in %s", sp) - # Remove coverage environment variables. - try: - if "COVERAGE_PROCESS_START" in os.environ: - del os.environ["COVERAGE_PROCESS_START"] - _LOG.info("Removed COVERAGE_PROCESS_START from environment") - else: - _LOG.debug("COVERAGE_PROCESS_START not found in environment") - except Exception as e: - _LOG.error("Failed to remove COVERAGE_PROCESS_START: %s", e) - raise - - -def generate_coverage_dockerfile() -> str: - """ - Build a Dockerfile string that appends coverage support. - """ - # This requires to: - # - Install coverage, pytest, pytest-cov at build time - # - Create /coverage_data and writes .coveragerc - # - Set ENV COVERAGE_PROCESS_START to /coverage_data/.coveragerc - # - Write a coverage.pth into site-packages so coverage auto-starts - txt = """ - # Install coverage and testing dependencies. - RUN pip install --no-cache-dir coverage pytest pytest-cov - - # Create coverage data directory with proper permissions. - RUN mkdir -p /app/coverage_data && chmod 777 /app/coverage_data - - # Setup coverage configuration. - COPY .coveragerc /app/coverage_data/.coveragerc - ENV COVERAGE_PROCESS_START=/app/coverage_data/.coveragerc - - # Create coverage.pth file for automatic startup. - # This ensures coverage tracking starts automatically when Python runs. - RUN python - < None: - """ - Execute shell commands to run coverage steps in a Docker container. - - Assumes: - - A valid .coveragerc exists in the current working directory. - - coverage_data/ is the mounted folder inside the container. - """ - commands = [ - "mkdir -p coverage_data", - "chmod 777 coverage_data", - "cp .coveragerc coverage_data/.coveragerc", - "chmod 644 coverage_data/.coveragerc", - ] - for cmd in commands: - hsystem.system(cmd, suppress_output=False) - - -def coverage_combine() -> None: - """ - Execute shell commands to combine coverage data. - - Assumes: - - .coverage.* files are present in the current directory or coverage_data/. - """ - # Check if there are any coverage files in coverage_data/ and copy them. - if os.path.exists("coverage_data"): - coverage_files_cmd = ( - "find coverage_data -name '.coverage.*' 2>/dev/null | wc -l" - ) - rc = hsystem.system(coverage_files_cmd, abort_on_error=False) - if rc == 0: - # Use a simple existence check instead of parsing command output. - coverage_files = glob.glob("coverage_data/.coverage.*") - if coverage_files: - _LOG.info( - "Found coverage files in coverage_data/, copying to current directory" - ) - commands = [ - "cp coverage_data/.coverage.* . 2>/dev/null || true", - "rm -rf coverage_data/.coverage.* 2>/dev/null || true", - ] - for cmd in commands: - hsystem.system(cmd, suppress_output=False) - # Check if there are any .coverage.* files to combine. - coverage_files = glob.glob(".coverage.*") - num_files = len(coverage_files) - if num_files > 0: - _LOG.info("Found %d coverage data files to combine", num_files) - commands = [ - "coverage combine", - "coverage report --skip-empty", - ] - for cmd in commands: - hsystem.system(cmd, suppress_output=False) - else: - _LOG.warning("No .coverage.* files found to combine") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py deleted file mode 100644 index 6c64659c0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py +++ /dev/null @@ -1,365 +0,0 @@ -""" -Import as: - -import helpers.hcsv as hcsv -""" - -import ast -import logging -import os -from typing import Any, Callable, Dict, List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hs3 as hs3 - -_LOG = logging.getLogger(__name__) - - -def _append_csv( - df: pd.DataFrame, path: str, *, index: bool = False, **kwargs: Any -) -> None: - """ - Append a df to the CSV file `path` without header. - """ - with open(path, "a") as f: - df.to_csv(f, header=False, index=index, **kwargs) - - -def _read_csv_range( - csv_path: str, from_: int, to: int, **kwargs: Any -) -> pd.DataFrame: - """ - Read a specified row range of a CSV file and convert to a DataFrame. - - This function: - - assumes the CSV file to have header, considered to be row 0. - - reads [from_, to), e.g., (to - from_) lines following list slicing semantics. - - :param csv_path: location of CSV file - :param from_: first line to read (header is row 0 and is always read) - :param to: last line to read, not inclusive - :return: DataFrame with columns from CSV line 0 (header) - """ - hdbg.dassert_lt(0, from_, msg="Row 0 assumed to be header row") - hdbg.dassert_lt(from_, to, msg="Empty range requested!") - skiprows = list(range(1, from_)) - nrows = to - from_ - df = pd.read_csv(csv_path, skiprows=skiprows, nrows=nrows, **kwargs) - if df.shape[0] < to: - _LOG.warning("Number of df rows = %i vs requested = %i", df.shape[0], to) - return df - - -# TODO(gp): There is no use of this function. -def build_chunk( - csv_path: str, - col_name: str, - start: int, - *, - nrows_at_a_time: int = 1000, - **kwargs: Any, -) -> pd.DataFrame: - """ - Build a DataFrame from a CSV subset as follows: - - - Names the columns using the header line (row 0) - - Reads the value in (row, col) coordinates (`start`, `col_name`) (if it - exists) as `value` - - Adds row `start` and all subsequent contiguous rows with `value` in - column `col_name` - - For memory efficiency, the CSV is processed in chunks of size `nrows_at_a_time`. - - :param csv_path: location of CSV file - :param col_name: name of column whose values define chunks - :param start: first row to process - :param nrows_at_a_time: size of chunks to process - :return: DataFrame with columns from CSV line 0 - """ - hdbg.dassert_lt(0, start) - stop = False - dfs: List[pd.DataFrame] = [] - init_df = _read_csv_range(csv_path, start, start + 1, **kwargs) - if init_df.shape[0] < 1: - return init_df - val = init_df[col_name].iloc[0] - _LOG.debug("Building chunk for %s", val) - counter = 0 - while not stop: - from_ = start + counter * nrows_at_a_time - df = _read_csv_range(csv_path, from_, from_ + nrows_at_a_time) - # Break if there are no matches. - if df.shape[0] == 0: - break - if not (df[col_name] == val).any(): - break - # Stop if we have run out of rows to read. - if df.shape[0] < nrows_at_a_time: - stop = True - idx_max = (df[col_name] == val)[::-1].idxmax() - # Stop if we have reached a new value. - if idx_max < (df.shape[0] - 1): - stop = True - dfs.append(df.iloc[0 : idx_max + 1]) - counter += 1 - if not dfs: - return pd.DataFrame() - return pd.concat(dfs, axis=0).reset_index(drop=True) - - -# TODO(gp): There is no use of this function. -def find_first_matching_row( - csv_path: str, - col_name: str, - val: str, - *, - start: int = 1, - nrows_at_a_time: int = 1000000, - **kwargs: Any, -) -> Optional[int]: - """ - Find first row in CSV where value in column `col_name` equals `val`. - - :param csv_path: location of CSV file - :param col_name: name of column whose values define chunks - :param val: value to match on - :param start: first row (inclusive) to start search on - :param nrows_at_a_time: size of chunks to process - :return: line in CSV of first matching row at or past start - """ - curr = start - while True: - _LOG.debug("Start of current chunk = line %i", curr) - df = _read_csv_range(csv_path, curr, curr + nrows_at_a_time, **kwargs) - if df.shape[0] < 1: - _LOG.info("Value %s not found", val) - break - matches = df[col_name] == val - if matches.any(): - idx_max = matches.idxmax() - return int(curr + idx_max) - curr += nrows_at_a_time - return None - - -# ############################################################################# -# CSV to PQ conversion -# ############################################################################# - - -def _csv_mapreduce( - csv_path: str, - out_dir: str, - key_func: Callable, - chunk_preprocessor: Optional[Callable], - *, - chunk_size: int = 1000000, -) -> None: - """ - Map-reduce-type processing of CSV. - - The phases are: - - Read the CSV in chunks as DataFrame - - Key each row of the DataFrame using a `groupby` - - "Reduce" keyed groups by writing and appending to a CSV - - :param csv_path: input CSV path - :param out_dir: output dir for CSV with filenames corresponding to keys - :param key_func: function to apply to each chunk DataFrame to key rows - Should return an iterable with elements like (key, df) - :param chunk_preprocessor: function to apply to each chunk DataFrame before - applying key_func - :param chunk_size: chunk_size of input to process - """ - # Read CSV data in chunks. - chunks = pd.read_csv(csv_path, chunksize=chunk_size) - # Preprocess chunk, if needed. - if chunk_preprocessor is not None: - chunks = map(chunk_preprocessor, chunks) - # Apply key_func to each chunk. - keyed_group_blocks = map(key_func, chunks) - # Append results. - for block in keyed_group_blocks: - for idx, df in block: - file_name = os.path.join(out_dir, idx + ".csv") - _append_csv(df, file_name) - - -def convert_csv_to_pq( - csv_path: str, - pq_path: str, - *, - normalizer: Optional[Callable] = None, - header: Optional[int] = 0, - compression: Optional[str] = "gzip", -) -> None: - """ - Convert CSV file to Parquet file. - - Output of `csv_map_reduce()` is typically header-less to support append mode, - and so `normalizer` may be used to add appropriate headers. Note that Parquet - requires string column names, whereas Pandas by default uses integer column - names. - - :param csv_path: full path of CSV - :param pq_path: full path of parquet - :param header: header specification of CSV - :param normalizer: function to apply to df before writing to PQ - """ - df = pd.read_csv(csv_path, header=header) - # TODO(Paul): Ensure that one of header, normalizer is not None. - if normalizer is not None: - df = normalizer(df) - df.to_parquet(pq_path, compression=compression) - - -def convert_csv_dir_to_pq_dir( - csv_dir: str, - pq_dir: str, - *, - normalizer: Optional[Callable] = None, - header: Optional[int] = None, -) -> None: - """ - Apply `convert_csv_to_pq()` to all files in `csv_dir`. - - :param csv_dir: directory storing CSV files on S3 or local - :param pq_dir: target directory to save PQ files (only local - filesystem) - :param header: header specification of CSV - :param normalizer: function to apply to df before writing to PQ - """ - # Get the filenames in `csv_dir`. - if hs3.is_s3_path(csv_dir): - # TODO(gp): Pass aws_profile. - s3fs = hs3.get_s3fs("am") - filenames = s3fs.ls(csv_dir) - else: - # Local filesystem. - hdbg.dassert_dir_exists(csv_dir) - # TODO(Paul): check .endswith(".csv") or do glob(csv_dir + "/*.csv") - filenames = os.listdir(csv_dir) - hdbg.dassert(filenames, "No files in the directory '%s'", csv_dir) - # Process all the filenames. - # TODO(gp): Add tqdm. - # TODO(gp): Consider parallelizing. - for filename in filenames: - # Remove .csv/.csv.gz. - csv_stem = hio.remove_extension( - filename, ".csv", check_file_exists=True, check_has_extension=False - ) - if csv_stem is None: - csv_stem = hio.remove_extension( - filename, - ".csv.gz", - check_file_exists=True, - check_has_extension=False, - ) - if csv_stem is None: - _LOG.warning( - "Skipping filename=%s since it has invalid extension", csv_stem - ) - continue - # Convert file to PQ. - pq_filename = csv_stem + ".pq" - convert_csv_to_pq( - os.path.join(csv_dir, filename), - os.path.join(pq_dir, pq_filename), - normalizer=normalizer, - header=header, - ) - - -# ############################################################################# -# CSV-JSON dict conversion -# ############################################################################# - - -# TODO(gp): convert_csv_to_json_dict? -# TODO(gp): path_to_csv -> file_name -def convert_csv_to_dict(path_to_csv: str, remove_nans: bool) -> Dict[Any, Any]: - """ - Convert a CSV file storing a dataframe into a JSON-compatible dict. - - :param path_to_csv: path to the CSV file - :param remove_nans: whether to remove NaNs from the dictionary - :return: a JSON-compatible dict with the dataframe data - """ - hdbg.dassert_file_exists(path_to_csv) - # Load the dataframe from a CSV file. - df = pd.read_csv(path_to_csv) - # Transform the dataframe into a dict. - dict_df = df.to_dict(orient="list") - if remove_nans: - # Remove NaNs from the dict. - for key in dict_df: - dict_df[key] = [x for x in dict_df[key] if not pd.isnull(x)] - return dict_df # type: ignore - - -# TODO(gp): path_to_csv -> file_name -def save_csv_as_json( - path_to_csv: str, remove_nans: bool, path_to_json: Optional[str] = None -) -> None: - """ - Convert the df from a CSV into a dict and save it into a JSON file. - - If the `path_to_json` is not provided, the JSON is saved in the folder where - the CSV file is located. - - :param path_to_csv: path to the CSV file - :param remove_nans: whether to remove NaNs from the dictionary - :param path_to_json: path to save the JSON file - """ - # Convert the df from the CSV into a JSON-compatible dict. - dict_df = convert_csv_to_dict(path_to_csv, remove_nans) - # Determine the JSON destination path. - if path_to_json is None: - path_to_json = hio.change_filename_extension( - path_to_csv, ".csv", ".json" - ) - # Save the dict into a JSON file. - hio.to_json(path_to_json, dict_df) - - -# ############################################################################# -# CSV files with types -# ############################################################################# - - -def to_typed_csv(df: pd.DataFrame, file_name: str) -> str: - """ - Convert df into CSV and creates a file with the dtypes of columns. - - This function creates a file containing the types with the same name - and suffix e.g., `foobar.csv.types`. - """ - # Save the types. - dtypes_filename = file_name + ".types" - hio.create_enclosing_dir(dtypes_filename, incremental=True) - dtypes_dict = str(df.dtypes.apply(lambda x: x.name).to_dict()) - # Save the data. - df.to_csv(file_name, index=False) - with open(dtypes_filename, "w") as dtypes_file: - dtypes_file.write(dtypes_dict) - return dtypes_filename - - -def from_typed_csv(file_name: str) -> pd.DataFrame: - """ - Load CSV file as df applying the original types of columns. - - This function uses a file with name `file_name.types` to load - information about the column types. - """ - # Load the types. - dtypes_filename = file_name + ".types" - hdbg.dassert_path_exists(dtypes_filename) - with open(dtypes_filename) as dtypes_file: - dtypes_dict = ast.literal_eval(list(dtypes_file)[0]) - # Load the data, applying the types. - df = pd.read_csv(file_name, dtype=dtypes_dict) - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py deleted file mode 100644 index 2849dfb10..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py +++ /dev/null @@ -1,309 +0,0 @@ -""" -Helper functions for processing pandas dataframes. - -Import as: - -import helpers.hdataframe as hdatafr -""" - -# TODO(gp): Consider merging with `helpers/pandas_helpers.py`. - -import collections -import functools -import logging -import operator -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - - -_METHOD_TO_APPLY = Dict[str, Dict[str, Any]] - - -def _combine_masks( - masks: pd.DataFrame, mode: str, info: collections.OrderedDict -) -> pd.Series: - if mode == "and": - combined_mask = masks.all(axis=1) - elif mode == "or": - combined_mask = masks.any(axis=1) - else: - raise ValueError(f"Invalid `mode`='{mode}'") - if combined_mask.sum() == 0: - _LOG.warning("No data remaining after filtering.") - info["nrows_remaining"] = combined_mask.sum() - return combined_mask - - -def filter_data_by_values( - df: pd.DataFrame, - filters: Dict[Union[int, str], Tuple[Any, ...]], - mode: str, - info: Optional[collections.OrderedDict] = None, -) -> pd.DataFrame: - """ - Filter dataframe rows based on column values. - - :param df: dataframe - :param filters: `{col_name: (possible_values)}` - :param mode: `and` for conjunction and `or` for disjunction of filters - :param info: information storage - :return: filtered dataframe - """ - if info is None: - info = collections.OrderedDict() - info["nrows"] = df.shape[0] - if not filters: - info["nrows_remaining"] = df.shape[0] - return df.copy() - # Create filter masks for each column. - masks = [] - for col_name, vals in filters.items(): - hdbg.dassert_isinstance(vals, tuple) - mask = df[col_name].isin(vals) - info[f"n_{col_name}"] = mask.sum() - info[f"perc_{col_name}"] = hprint.perc(mask.sum(), df.shape[0]) - masks.append(mask) - masks = pd.concat(masks, axis=1) - combined_mask = _combine_masks(masks, mode, info) - filtered_df = df.loc[combined_mask].copy() - return filtered_df - - -def filter_data_by_comparison( - df: pd.DataFrame, - filters: Dict[ - Union[int, str], Union[Tuple[str, Any], Tuple[Tuple[str, Any], ...]] - ], - mode: str, - info: Optional[collections.OrderedDict] = None, -) -> pd.DataFrame: - """ - Filter dataframe by comparing columns to values. - - :param df: dataframe - :param filters: `{col_name: (comparison_method, value)}` or - `{col_name: ((comparison_method_i, value_i))}`. - `comparison_method` is one of the ("eq", "ne", "le", "lt", "ge", "gt") - pandas method names. - :param mode: `and` for conjunction and `or` for disjunction of filters - :param info: information storage - :return: filtered dataframe - """ - if info is None: - info = collections.OrderedDict() - info["nrows"] = df.shape[0] - if not filters: - info["nrows_remaining"] = df.shape[0] - return df.copy() - # Create filter masks for each column. - masks = [] - for col_name, tuple_ in filters.items(): - if not isinstance(tuple_[0], tuple): - tuple_ = (tuple_,) # type: ignore - for comparison_method, val in tuple_: - hdbg.dassert_in( - comparison_method, ("eq", "ne", "le", "lt", "ge", "gt") - ) - mask = getattr(df[col_name], comparison_method)(val) - info[f"n_{col_name}_{comparison_method}_{val}"] = mask.sum() - info[f"perc_{col_name}_{comparison_method}_{val}"] = hprint.perc( - mask.sum(), df.shape[0] - ) - masks.append(mask) - masks = pd.concat(masks, axis=1) - combined_mask = _combine_masks(masks, mode, info) - filtered_df = df.loc[combined_mask].copy() - return filtered_df - - -def filter_data_by_method( - df: pd.DataFrame, - filters: Dict[Union[int, str], _METHOD_TO_APPLY], - mode: str, - info: Optional[collections.OrderedDict] = None, -) -> pd.DataFrame: - """ - Filter dataframe by calling a method specified for each column. - - :param df: dataframe - :param filters: `{col_name: {method: kwargs}}`, where `method` is the - method called on the dataframe column, e.g. "isin" or "str.contains", - and `kwargs` are the kwargs for this method - :param mode: `and` for conjunction and `or` for disjunction of filters - :param info: information storage - :return: filtered dataframe - """ - if info is None: - info = collections.OrderedDict() - info["nrows"] = df.shape[0] - if not filters: - info["nrows_remaining"] = df.shape[0] - return df.copy() - # Create filter masks for each column. - masks = [] - for col_name, method_dict in filters.items(): - for method, kwargs in method_dict.items(): - mask = operator.attrgetter(method)(df[col_name])(**kwargs) - info[f"n_{col_name}"] = mask.sum() - info[f"perc_{col_name}"] = hprint.perc(mask.sum(), df.shape[0]) - masks.append(mask) - masks = pd.concat(masks, axis=1) - combined_mask = _combine_masks(masks, mode, info) - filtered_df = df.loc[combined_mask].copy() - return filtered_df - - -# ############################################################################# - - -def apply_nan_mode( - srs: pd.Series, - mode: str = "leave_unchanged", - info: Optional[dict] = None, -) -> pd.Series: - """ - Process NaN values in a series according to the parameters. - - :param srs: pd.Series to process - :param mode: method of processing NaNs - - "leave_unchanged" - no transformation - - "drop" - drop all NaNs - - "ffill" - forward fill not leading NaNs - - "ffill_and_drop_leading" - do ffill and drop leading NaNs - - "fill_with_zero" - fill NaNs with 0 - - "strict" - raise ValueError that NaNs are detected - :param info: information storage - :return: transformed copy of input series - """ - hdbg.dassert_isinstance(srs, pd.Series) - if srs.empty: - _LOG.warning("Empty input series `%s`", srs.name) - if mode == "leave_unchanged": - res = srs.copy() - elif mode == "drop": - res = srs.dropna().copy() - elif mode == "ffill": - res = srs.ffill().copy() - elif mode == "ffill_and_drop_leading": - res = srs.ffill().dropna().copy() - elif mode == "fill_with_zero": - res = srs.fillna(0).copy() - elif mode == "strict": - res = srs.copy() - if srs.isna().any(): - raise ValueError(f"NaNs detected in mode `{mode}`") - else: - raise ValueError(f"Unrecognized mode `{mode}`") - # - if info is not None: - hdbg.dassert_isinstance(info, dict) - # Dictionary should be empty. - hdbg.dassert(not info) - info["series_name"] = srs.name - info["num_elems_before"] = len(srs) - info["num_nans_before"] = np.isnan(srs).sum() - info["num_elems_removed"] = len(srs) - len(res) - info["num_nans_imputed"] = ( - info["num_nans_before"] - info["num_elems_removed"] - ) - info["percentage_elems_removed"] = ( - 100.0 * info["num_elems_removed"] / info["num_elems_before"] - ) - info["percentage_elems_imputed"] = ( - 100.0 * info["num_nans_imputed"] / info["num_elems_before"] - ) - return res - - -@functools.lru_cache() -def compute_points_per_year_for_given_freq(freq: str) -> float: - """ - Return the number of index time points per year. - - :param freq: string identifier of date frequency - :return: number of time points per year (approximate) - """ - # `pd.date_range` breaks for zero-period frequencies, so we need to work - # around that. - try: - # Leap years: 2012, 2016. - points_in_span = pd.date_range( - freq=freq, start="2012-01-01", end="2019-12-31" - ).size - span_in_years = 8 - points_per_year: float = points_in_span / span_in_years - return points_per_year - except ZeroDivisionError: - return 0.0 - - -# ############################################################################# - - -def infer_sampling_points_per_year(df: Union[pd.Series, pd.DataFrame]) -> float: - """ - Return the number of index time points per year. - - TODO(*): Consider extending to all frequencies and count points by - explicitly building indices of the given frequency. - - :param df: series or dataframe with non-null `df.index.freq` - :return: number of time points per year (approximate) - """ - hdbg.dassert(hasattr(df.index, "freq") and df.index.freq is not None) - freq = df.index.freq - # TODO(*): Make start, end dates parameters that can be passed in. - return compute_points_per_year_for_given_freq(freq) - - -def compute_count_per_year(df: Union[pd.Series, pd.DataFrame]) -> float: - """ - Return df.count() divided by the length of `df` in years. - """ - hdbg.dassert( - hasattr(df.index, "freq") and df.index.freq is not None, - msg="`df` must have a `DatetimeIndex` with a `freq`", - ) - assert hasattr(df.index, "freq") and df.index.freq is not None - freq = df.index.freq - # Calculate the time span of `df` in years. - points_per_year = compute_points_per_year_for_given_freq(freq) - span_in_years = df.size / points_per_year - # Determine the number of non-NaN/inf/etc. data points per year. - count_per_year = df.count() / span_in_years - count_per_year = cast(float, count_per_year) - return count_per_year - - -# ############################################################################# - - -def remove_duplicates( - df: pd.DataFrame, - duplicate_columns: Optional[List[str]], - control_column: Optional[str], -) -> pd.DataFrame: - """ - Remove duplicates from DataFrame. - - :param df: DataFrame to process - :param duplicate_columns: subset of column names, None for all - :param control_column: column max value of which determines the kept - row - :return: DataFrame with removed duplicates - """ - # Fix maximum value of control column at the bottom. - if control_column: - df = df.sort_values(by=control_column) - duplicate_columns = duplicate_columns or df.columns - df = df.drop_duplicates(subset=duplicate_columns) - # Sort by index to return to original view. - df = df.sort_index() - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py deleted file mode 100644 index e63152593..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py +++ /dev/null @@ -1,909 +0,0 @@ -""" -Import as: - -import helpers.hdatetime as hdateti -""" - -import asyncio -import calendar -import datetime -import logging -import re -from typing import Callable, Iterable, Optional, Tuple, Union - -# TODO(gp): Use hdbg.WARNING -_WARNING = "\033[33mWARNING\033[0m" - -# Avoid dependency from other `helpers` modules to prevent import cycles. - -import pandas as pd # noqa: E402 # pylint: disable=wrong-import-position - -# TODO(gp): Check if dateutils is equivalent to `pytz` or better so we can simplify -# the dependencies. -try: - import pytz -except ModuleNotFoundError: - _module = "pytz" - print(_WARNING + f": Can't find {_module}: continuing") - - -import helpers.hdbg as hdbg # noqa: E402 # pylint: disable=wrong-import-position -import helpers.hprint as hprint # noqa: E402 # pylint: disable=wrong-import-position -import helpers.hwall_clock_time as hwacltim # noqa: E402 # pylint: disable=wrong-import-position - -_LOG = logging.getLogger(__name__) - -# We use the type `Datetime` to allow flexibility in the interface exposed to client. -# The typical pattern is: -# - we call `to_datetime()`, as soon as we enter functions exposed to users, -# to convert the user-provided datetime into a `datetime.datetime` -# - we use only `datetime.datetime` in the private interfaces -# TODO(gp): In practice we are using `pd.Timestamp` -# -# It's often worth to import this file even for just the type `Datetime`, -# since typically as soon as the caller uses this type, they also want to use -# `to_datetime()` and `dassert_*()` functions. -# TODO(gp): It would be better to call this `GeneralDateTime`, `FlexibleDateTime`, -# and rename `StrictDateTime` -> `DateTime`. -Datetime = Union[str, pd.Timestamp, datetime.datetime] - -# The type `StrictDateTime` is for stricter interfaces, although it is a bit of a -# compromise. -# Either one wants to allow everything that can be interpreted as a datetime (and -# then use `Datetime`), or strict (and then use only `datetime.datetime`). -StrictDatetime = Union[pd.Timestamp, datetime.datetime] - - -def dassert_is_datetime(datetime_: Datetime) -> None: - """ - Assert that `datetime_` is of type `Datetime`. - """ - hdbg.dassert_isinstance( - datetime_, - (str, pd.Timestamp, datetime.datetime), - "datetime_='%s' of type '%s' is not a DateTimeType", - datetime_, - str(type(datetime_)), - ) - - -def dassert_is_strict_datetime(datetime_: StrictDatetime) -> None: - """ - Assert that `datetime_` is of type `StrictDatetime`. - """ - hdbg.dassert_isinstance( - datetime_, - (pd.Timestamp, datetime.datetime), - "datetime_='%s' of type '%s' is not a StrictDateTimeType", - datetime_, - str(type(datetime_)), - ) - - -def dassert_str_is_date(date: str) -> None: - """ - Check if an input string is a date. - - :param date: date as string, e.g., "20221101" - """ - hdbg.dassert_isinstance(date, str) - try: - _ = datetime.datetime.strptime(date, "%Y%m%d") - except ValueError as e: - raise ValueError(f"date='{date}' doesn't have the right format: {e}") - - -# TODO(Grisha): also pass timezone. -def to_datetime(datetime_: Datetime) -> datetime.datetime: - """ - Convert a `Datetime` into a `datetime.datetime`. - - :return: tz-aware or naive datetime.datetime - """ - dassert_is_datetime(datetime_) - if isinstance(datetime_, str): - datetime_ = pd.Timestamp(datetime_) - if isinstance(datetime_, pd.Timestamp): - datetime_ = datetime_.to_pydatetime() - return datetime_ # type: ignore - - -def to_timestamp(datetime_: Datetime) -> pd.Timestamp: - """ - Convert a `Datetime` into a `pd.Timestamp`. - - :return: tz-aware or naive datetime.datetime - """ - dassert_is_datetime(datetime_) - timestamp = pd.Timestamp(datetime_) - return timestamp - - -# //////////////////////////////////////////////////////////////////////////////////O - - -def dassert_is_tz_naive(datetime_: StrictDatetime) -> None: - """ - Assert that the passed timestamp is tz-naive, i.e., doesn't have timezone - info. - """ - hdbg.dassert_is( - datetime_.tzinfo, None, "datetime_='%s' is not tz naive", datetime_ - ) - - -def dassert_has_tz(datetime_: StrictDatetime) -> None: - """ - Assert that the passed timestamp has timezone info. - """ - hdbg.dassert_is_not( - datetime_.tzinfo, - None, - "datetime_='%s' doesn't have timezone info", - datetime_, - ) - - -def dassert_has_specified_tz( - datetime_: StrictDatetime, tz_zones: Iterable[str] -) -> None: - """ - Assert that the passed timestamp has the timezone passed in `tz_zones`. - """ - # Make sure that the passed timestamp has timezone information. - dassert_has_tz(datetime_) - # Get the timezone. - tz_info = datetime_.tzinfo - # Unlike other timezones UTC is a `datetime.timezone` object not a - # `pytz.tzfile`. See CmTask5895 for details. - if ( - isinstance(tz_info, datetime.timezone) - and tz_info == datetime.timezone.utc - ): - tz_zone = "UTC" - else: - tz_zone = tz_info.zone # type: ignore - has_expected_tz = tz_zone in tz_zones - hdbg.dassert( - has_expected_tz, - "datetime_=%s (type=%s) tz_info=%s tz_info.zone=%s instead of tz_zones=%s", - datetime_, - type(datetime_), - tz_info, - tz_zone, - tz_zones, - ) - - -def dassert_has_UTC_tz(datetime_: StrictDatetime) -> None: - """ - Assert that the passed timestamp is UTC. - """ - tz_zones = (pytz.timezone("UTC").zone,) - dassert_has_specified_tz(datetime_, tz_zones) - - -def dassert_has_ET_tz(datetime_: StrictDatetime) -> None: - """ - Assert that the passed timestamp is Eastern Time (ET). - """ - tz_zones = ( - pytz.timezone("US/Eastern").zone, - pytz.timezone("America/New_York").zone, - ) - dassert_has_specified_tz(datetime_, tz_zones) - - -def dassert_tz_compatible( - datetime1: StrictDatetime, datetime2: StrictDatetime -) -> None: - """ - Assert that two timestamps are both naive or both have timezone info. - """ - dassert_is_strict_datetime(datetime1) - dassert_is_strict_datetime(datetime2) - has_tz1 = datetime1.tzinfo is not None - has_tz2 = datetime2.tzinfo is not None - hdbg.dassert_eq( - has_tz1, - has_tz2, - "datetime1='%s' and datetime2='%s' are not compatible", - str(datetime1), - str(datetime2), - ) - - -def dassert_have_same_tz( - datetime1: StrictDatetime, datetime2: StrictDatetime -) -> None: - """ - Assert that both timestamps have the same tz. - - The timezones are compared regardless of a DST mode. - """ - dassert_tz_compatible(datetime1, datetime2) - # Convert to string to remove DST mode info. - tz1_as_str = str(datetime1.tzinfo) - tz2_as_str = str(datetime2.tzinfo) - hdbg.dassert_eq( - tz1_as_str, - tz2_as_str, - "datetime1=%s (datetime1.tzinfo=%s) datetime2=%s (datetime2.tzinfo=%s) ", - datetime1, - tz1_as_str, - datetime2, - tz2_as_str, - ) - - -# TODO(gp): Replace this check with compatibility between series vs scalar. -# def dassert_srs_tz_compatible( -# def dassert_srs_has_tz -# def dassert_srs_is_tz_naive -def dassert_tz_compatible_timestamp_with_df( - datetime_: StrictDatetime, - df: pd.DataFrame, - col_name: Optional[str], -) -> None: - """ - Assert that timestamp and a df column are both naive or both have timezone - info. - - :param col_name: col_name. `None` represents the index. - """ - dassert_is_strict_datetime(datetime_) - hdbg.dassert_isinstance(df, pd.DataFrame) - if df.empty: - return - if col_name is None: - # We assume that the first element in the index is representative. - df_datetime = df.index[0] - else: - hdbg.dassert_in(col_name, df.columns) - df_datetime = df[col_name].iloc[0] - dassert_tz_compatible(df_datetime, datetime_) - - -# //////////////////////////////////////////////////////////////////////////////////O - - -def dassert_is_valid_timestamp(timestamp: Optional[pd.Timestamp]) -> None: - """ - Assert that a timestamp is `None` or a `pd.Timestamp` with timezone. - """ - if timestamp is not None: - hdbg.dassert_isinstance(timestamp, pd.Timestamp) - dassert_has_tz(timestamp) - - -def dassert_timestamp_lte( - start_timestamp: Optional[pd.Timestamp], - end_timestamp: Optional[pd.Timestamp], -) -> None: - dassert_is_valid_timestamp(start_timestamp) - dassert_is_valid_timestamp(end_timestamp) - if start_timestamp is not None and end_timestamp is not None: - hdbg.dassert_lte(start_timestamp, end_timestamp) - - -def dassert_timestamp_lt( - start_timestamp: Optional[pd.Timestamp], - end_timestamp: Optional[pd.Timestamp], -) -> None: - dassert_is_valid_timestamp(start_timestamp) - dassert_is_valid_timestamp(end_timestamp) - if start_timestamp is not None and end_timestamp is not None: - hdbg.dassert_lt(start_timestamp, end_timestamp) - - -def dassert_is_valid_interval( - start_timestamp: Optional[pd.Timestamp], - end_timestamp: Optional[pd.Timestamp], - left_close: bool, - right_close: bool, -) -> None: - """ - Assert that an interval has valid start and end timestamps. - """ - _LOG.debug( - hprint.to_str("start_timestamp end_timestamp left_close right_close") - ) - dassert_is_valid_timestamp(start_timestamp) - dassert_is_valid_timestamp(end_timestamp) - # Check the requested interval. - if start_timestamp is not None and end_timestamp is not None: - if left_close and right_close: - # If they are both closed, an interval like [a, a] makes sense, - # otherwise it doesn't. - hdbg.dassert_lte(start_timestamp, end_timestamp) - else: - hdbg.dassert_lt(start_timestamp, end_timestamp) - - -# ############################################################################# - - -def get_UTC_tz() -> datetime.tzinfo: - """ - Return the UTC timezone. - """ - return pytz.timezone("UTC") - - -def get_ET_tz() -> datetime.tzinfo: - """ - Return the US Eastern Time timezone. - """ - # TODO(Grisha): -> `US/Eastern`? - # It appears that "America/New_York" is to be preferred over "US/Eastern". - # https://www.iana.org/time-zones - # https://en.wikipedia.org/wiki/Tz_database - return pytz.timezone("America/New_York") - - -# Function returning the current (true, replayed, simulated) wall-clock time as a -# timestamp. -# TODO(gp): maybe GetWallClockTimeFunc is better to clarify that this is a function -# and not time. We often pass -GetWallClockTime = Callable[[], pd.Timestamp] - - -# TODO(gp): -> get_wall_clock_time -# TODO(gp): tz -> tz_mode since we are not passing neither a timezone or a -# timezone_as_str. -def get_current_time( - tz: str, - # TODO(gp): Add * - # *, - event_loop: Optional[asyncio.AbstractEventLoop] = None, -) -> pd.Timestamp: - """ - Return current time in UTC / ET timezone or as a naive time. - - This should be the only way to get the current wall-clock time, - since it handles both wall-clock time and "simulated" wall-clock - time through asyncio. - - :param tz: how to represent the returned time (e.g., "UTC", "ET", - "naive") - """ - if event_loop is not None: - # We accept only `hasyncio.EventLoop` here. If we are using standard asyncio - # EventLoop we rely on wall-clock time instead of `loop.time()`. - hdbg.dassert_isinstance(event_loop, asyncio.AbstractEventLoop) - hdbg.dassert(hasattr(event_loop, "get_current_time")) - timestamp = event_loop.get_current_time() - else: - # Use true real-time. - timestamp = datetime.datetime.utcnow() - # Convert it into the right - timestamp = pd.Timestamp(timestamp, tz=get_UTC_tz()) - if tz == "UTC": - pass - elif tz == "ET": - timestamp = timestamp.tz_convert(get_ET_tz()) - elif tz == "naive_UTC": - timestamp = timestamp.replace(tzinfo=None) - elif tz == "naive_ET": - timestamp = timestamp.tz_convert(get_ET_tz()) - timestamp = timestamp.replace(tzinfo=None) - else: - raise ValueError(f"Invalid tz='{tz}'") - return timestamp - - -def get_current_timestamp_as_string(tz: str) -> str: - """ - Return the current time in the format `YYYYMMDD_HHMMSS` (e.g., - 20210728_221734). - - Note that no information about the timezone is returned. Thus the - same time corresponds to `20210728_171749` for tz="ET" and - `20210728_221749` for tz="UTC". - """ - timestamp = get_current_time(tz) - ret = timestamp.strftime("%Y%m%d-%H%M%S") - return ret - - -def get_current_date_as_string(tz: str) -> str: - """ - Return the current date in the format `YYYYMMDD` (e.g., 20210728). - """ - timestamp = get_current_time(tz) - ret = timestamp.strftime("%Y%m%d") - return ret - - -# ############################################################################# -# Bar-related utilities -# ############################################################################# - - -def convert_seconds_to_minutes(num_secs: int) -> int: - hdbg.dassert_lt(0, num_secs) - hdbg.dassert_eq( - num_secs % 60, - 0, - "num_secs=%s is not an integer number of minutes", - num_secs, - ) - num_mins = int(num_secs / 60) - hdbg.dassert_lt(0, num_mins) - _LOG.debug(hprint.to_str("num_secs num_mins")) - return num_mins - - -# TODO(Dan): Unit test. -def convert_seconds_to_pandas_minutes(val: int) -> str: - """ - Convert a number of seconds to its Pandas delay representation in minutes. - - E.g. 300 -> '5T' - - :param val: number of seconds to convert - :return: Pandas delay representation - """ - res = convert_seconds_to_minutes(val) - res = f"{res}T" - return res - - -def convert_minutes_to_seconds(num_minutes: int) -> int: - """ - Convert minutes to seconds. - - E.g., 5 (minutes) -> 300 (seconds). - - :param num_minutes: the number of minutes to convert - :return: the number of seconds - """ - hdbg.dassert_isinstance(num_minutes, int) - hdbg.dassert_lt(0, num_minutes) - num_seconds = num_minutes * 60 - _LOG.debug(hprint.to_str("num_minutes num_seconds")) - return num_seconds - - -# TODO(gp): bar_duration_in_secs -> bar_{length,period}_in_secs -def find_bar_timestamp( - current_timestamp: pd.Timestamp, - bar_duration_in_secs: int, - *, - mode: str = "round", - max_distance_in_secs: int = 10, -) -> pd.Timestamp: - """ - Compute the bar (a, b] with period `bar_duration_in_secs` including - `current_timestamp`. - - :param current_timestamp: current timestamp - :param bar_duration_in_secs: bar duration in seconds - :param mode: how to compute the bar - - `round`: snap to the closest bar extreme - - `floor`: pick timestamp to the bar that includes it, returning the lower - bound. E.g., For `9:13am` and 5 mins bars returns `9:10am` - :param max_distance_in_secs: number of seconds representing the maximal distance - that it's allowed from the start of the bar - """ - _LOG.debug( - hprint.to_str( - "current_timestamp bar_duration_in_secs mode max_distance_in_secs" - ) - ) - hdbg.dassert_isinstance(current_timestamp, pd.Timestamp) - # Align. - reference_timestamp = f"{bar_duration_in_secs}S" - if mode == "round": - bar_timestamp = current_timestamp.round(reference_timestamp) - elif mode == "floor": - bar_timestamp = current_timestamp.floor(reference_timestamp) - hdbg.dassert_lte(bar_timestamp, current_timestamp) - else: - raise ValueError(f"Invalid mode='{mode}'") - _LOG.debug( - hprint.to_str("current_timestamp bar_duration_in_secs bar_timestamp") - ) - # Sanity check. - if mode == "round": - hdbg.dassert_lte(1, max_distance_in_secs) - if bar_timestamp >= current_timestamp: - distance_in_secs = (bar_timestamp - current_timestamp).seconds - else: - distance_in_secs = (current_timestamp - bar_timestamp).seconds - hdbg.dassert_lte(0, distance_in_secs) - hdbg.dassert_lte( - distance_in_secs, - max_distance_in_secs, - "current_timestamp=%s is too distant from bar_timestamp=%s", - current_timestamp, - bar_timestamp, - ) - _LOG.debug(hprint.to_str("bar_timestamp")) - return bar_timestamp - - -# This can't go in `helpers.hwall_clock_time` since it has a dependency from -# `find_bar_timestamp()` and might introduce an import loop. -def set_current_bar_timestamp( - current_timestamp: pd.Timestamp, - bar_duration_in_secs: int, -) -> None: - """ - Compute the current bar by snapping the current timestamp to the grid. - """ - mode = "round" - # E.g., `current_timestamp` is 09:26 and the next bar is at 09:30, so - # the distance is 4 minutes, i.e. max distance should be within a bar's - # length. - max_distance_in_secs = bar_duration_in_secs - bar_timestamp = find_bar_timestamp( - current_timestamp, - bar_duration_in_secs, - mode=mode, - max_distance_in_secs=max_distance_in_secs, - ) - _LOG.debug(hprint.to_str("current_timestamp bar_timestamp")) - hwacltim.set_current_bar_timestamp(bar_timestamp) - - -# ############################################################################# - - -def str_to_timestamp( - timestamp_as_str: str, tz: str, *, datetime_format: Optional[str] = None -) -> pd.Timestamp: - """ - Convert timestamp as string to `pd.Timestamp`. - - Localize input time to the specified timezone. - - E.g., `timestamp_as_str = "20230523_150513"`: - - `tz = "UTC"` -> "2023-05-23 15:05:13+0000" - - `tz = "US/Eastern"` -> "2023-05-23 15:05:13-0400" - - :param timestamp_as_str: string datetime (e.g., 20230523_150513) - :param tz: timezone info (e.g., "US/Eastern") - :param datetime_format: datetime format (e.g., %Y%m%d_%H%M%S) - If None, infer automatically - :return: pd.Timestamp with a specified timezone - """ - hdbg.dassert_isinstance(timestamp_as_str, str) - hdbg.dassert_isinstance(tz, str) - msg = "timestamp_as_str must be nonempty." - hdbg.dassert_is_not(timestamp_as_str, "", msg=msg) - _LOG.debug(hprint.to_str("timestamp_as_str tz datetime_format")) - if datetime_format is None: - # Try to infer the format automatically. - timestamp = pd.to_datetime(timestamp_as_str, infer_datetime_format=True) - else: - # Convert using the provided format. - timestamp = pd.to_datetime(timestamp_as_str, format=datetime_format) - # Convert to the specified timezone - timestamp = timestamp.tz_localize(tz) - return timestamp - - -def _handle_incorrect_conversions( - date: str, -) -> Optional[Tuple[Optional[str], Callable[[str], str]]]: - """ - Change data pre-processing for cases when `pd.to_datetime` is mistaken. - - :param date: string date - :return: date format and a function to apply to string dates before - passing them into `pd.to_datetime()` - """ - if len(date) in [7, 8]: - # "2021-M2" is transformed to '2020-01-01 00:00:01' by - # `pd.to_datetime`. - if date[:4].isdigit() and date[4] in ["-", ".", "/"] and date[5] == "M": - - def modify_monthly_date(x: str) -> str: - year_number = int(x[:4]) - month_number = x[6:] - num_days_in_month = calendar.monthrange( - year_number, int(month_number) - )[1] - modified_x = f"{x[:4]}-{month_number}-{num_days_in_month}" - return modified_x - - return "%Y-%m-%d", modify_monthly_date - return None - - -def _shift_to_period_end( # pylint: disable=too-many-return-statements - date: str, -) -> Optional[Callable[[StrictDatetime], StrictDatetime]]: - """ - Get function to shift the dates to the end of period. - - :param date: string date - :return: a function to shift the dates to the end of period. If `None`, no - shift is needed - """ - - def shift_to_month_end(x: StrictDatetime) -> StrictDatetime: - return x + pd.offsets.MonthEnd(0) - - def shift_to_quarter_end(x: StrictDatetime) -> StrictDatetime: - return x + pd.offsets.QuarterEnd(0) - - def shift_to_year_end(x: StrictDatetime) -> StrictDatetime: - return x + pd.offsets.YearEnd(0) - - if date[:4].isdigit(): - if len(date) == 7: - if date[5:].isdigit(): - # "2020-12" format. - return shift_to_month_end - if date[5] == "Q": - # "2021-Q1" format. - return shift_to_quarter_end - elif len(date) == 6: - # "2021Q1" format. - if date[4] == "Q": - return shift_to_quarter_end - elif len(date) == 4: - # "2021" format. - return shift_to_year_end - # "September 2020" or "Sep 2020" format. - # Get a flat list of month aliases. The full month name comes first. - # Since the `calendar` is using the natural month order, we need to - # shift the month aliases by one to get the correct order. - # E.g., `calendar.month_name[1:]` is `['January', 'February', ...]` and - # `calendar.month_abbr[1:]` is `['Jan', 'Feb', ...]`. - month_aliases = list(calendar.month_name[1:]) + list(calendar.month_abbr[1:]) - pattern = re.compile("|".join(month_aliases), re.IGNORECASE) - match = pattern.search(date) - if match is None: - return None - span = match.span() - date_without_month = f"{date[: span[0]]}{date[span[1] :]}".strip() - if len(date_without_month) == 4 and date_without_month.isdigit(): - return shift_to_month_end - return None - - -def _determine_date_format( - date: str, date_standard: Optional[str] = None -) -> Optional[Tuple[str, Callable[[str], str]]]: - """ - Determine date format for cases when `pd.to_datetime` fails. - - :param date: date string - :param date_standard: "standard" or "ISO_8601", `None` defaults to - "standard" - :return: date format and a function to transform date strings before - converting them to datetime using `pd.to_datetime` - """ - date_standard = date_standard or "standard" - if date_standard == "standard": - year_format = "%Y" - week_format = "%W" - day_of_week_format = "%w" - elif date_standard == "ISO_8601": - year_format = "%G" - week_format = "%V" - day_of_week_format = "%u" - else: - raise ValueError(f"Invalid `date_standard`='{date_standard}'") - # Determine format and original `date` modification function. - format_ = "" - if date[:4].isdigit(): - format_ += year_format - elif date[0] == "Q" and len(date) == 7 and date[-4:].isdigit(): - # "Q1 2020" format. - - def modify_quarterly_data(x: str) -> str: - year_number = x[-4:] - quarter = int(x[1:2]) - last_month_of_quarter = 3 * quarter - last_day_of_quarter = calendar.monthrange( - int(year_number), last_month_of_quarter - )[1] - modified_x = ( - f"{year_number}-{last_month_of_quarter}-{last_day_of_quarter}" - ) - return modified_x - - format_ = f"{year_format}-%m-%d" - return format_, modify_quarterly_data - else: - _LOG.error("This format is not supported: '%s'", date) - return None - next_char = date[4] - if next_char in ["-", ".", "/", " "]: - if len(date) not in [7, 8]: - _LOG.error("This format is not supported: '%s'", date) - return None - format_ += "-" - next_char = date[5] - if next_char == "W": - # "2020-W14" format. - - def modify_weekly_date(x: str) -> str: - x = re.sub(r"[//.\s]", "-", x) - return x + "-6" - - date_modification_func = modify_weekly_date - format_ += f"W{week_format}-{day_of_week_format}" - elif next_char == "S": - # "2020-S1" - semi-annual format. - def modify_semiannual_date(x: str) -> str: - x = re.sub(r"[//.\s]", "-", x) - return x.replace("S1", "06-30").replace("S2", "12-31") - - date_modification_func = modify_semiannual_date - format_ += "%m-%d" - elif next_char == "B": - # "2020-B1" - bi-monthly format (every other month). - # We'll index by the start of the month starting with January - # based on PiT. - - def modify_bimonthly_date(x: str) -> str: - x = re.sub(r"[//.\s]", "-", x) - bimonth_number = x[6] - month_number = int(bimonth_number) * 2 - 1 - modified_x = f"{x[:5]}{month_number}-01" - return modified_x - - date_modification_func = modify_bimonthly_date - format_ += "%m-%d" - else: - _LOG.error("This format is not supported: '%s'", date) - return None - elif next_char == "M" and len(date) == 7: - # "1959M01" format. - - def modify_monthly_date(x: str) -> str: - year_number = int(x[:4]) - month_number = x[5:] - num_days_in_month = calendar.monthrange( - year_number, int(month_number) - )[1] - modified_x = f"{x[:4]}-{month_number}-{num_days_in_month}" - return modified_x - - date_modification_func = modify_monthly_date - format_ += "-%m-%d" - else: - _LOG.error("This format is not supported: '%s'", date) - return None - return format_, date_modification_func - - -def to_generalized_datetime( - dates: Union[pd.Series, pd.Index], date_standard: Optional[str] = None -) -> Union[pd.Series, pd.Index]: - """ - Convert string dates to datetime. - - This works like `pd.to_datetime`, but supports more date formats and shifts - the dates to the end of period instead of the start. - - :param dates: series or index of dates to convert - :param date_standard: "standard" or "ISO_8601", `None` defaults to - "standard" - :return: datetime dates - """ - # This function doesn't deal with mixed formats. - hdbg.dassert_isinstance(dates, Iterable) - hdbg.dassert(not isinstance(dates, str)) - # Try converting to datetime using `pd.to_datetime`. - format_example_index = -1 - date_example = dates.tolist()[format_example_index] - format_fix = _handle_incorrect_conversions(date_example) - if format_fix is not None: - format_, date_modification_func = format_fix - dates = dates.map(date_modification_func) - date_example = dates.tolist()[format_example_index] - else: - format_ = None - datetime_dates = pd.to_datetime(dates, format=format_, errors="coerce") - # Shift to end of period if conversion has been successful. - # Handle both scalar and array cases for `pd.isna()`. - if hasattr(datetime_dates, "all"): - # datetime_dates is a Series or array-like - all_na = pd.isna(datetime_dates).all() - datetime_example = ( - datetime_dates.tolist()[format_example_index] - if hasattr(datetime_dates, "tolist") - else datetime_dates - ) - else: - # datetime_dates is a scalar - all_na = pd.isna(datetime_dates) - datetime_example = datetime_dates - if not all_na: - if ( - not pd.isna(datetime_example) - and hasattr(datetime_example, "strftime") - and datetime_example.strftime("%Y-%m-%d") == date_example - ): - return datetime_dates - shift_func = _shift_to_period_end(date_example) - if shift_func is not None: - if hasattr(datetime_dates, "map"): - datetime_dates = datetime_dates.map(shift_func) - else: - # For scalar case, apply the shift function directly - datetime_dates = shift_func(datetime_dates) - return datetime_dates - # If standard conversion fails, attempt our own conversion. - date_standard = date_standard or "standard" - format_determination_output = _determine_date_format( - date_example, date_standard - ) - if format_determination_output is None: - return datetime_dates - format_, date_modification_func = format_determination_output - dates = dates.map(date_modification_func) - return pd.to_datetime(dates, format=format_) - - -# ############################################################################# -# Unix to epoch conversion -# ############################################################################# - - -def convert_unix_epoch_to_timestamp( - epoch: int, unit: str = "ms", tz: str = "UTC" -) -> pd.Timestamp: - """ - Convert Unix epoch to timestamp. - - :param epoch: Unix time epoch - :param unit: epoch's time unit - :param tz: resulting timestamp timezone - :return: timestamp - """ - timestamp = pd.Timestamp(epoch, unit=unit, tz=tz) - return timestamp - - -def convert_timestamp_to_unix_epoch( - timestamp: pd.Timestamp, unit: str = "ms" -) -> int: - """ - Convert timestamp to Unix epoch. - - :param timestamp: timestamp - :param unit: epoch's time unit - :return: Unix time epoch - """ - # Make timestamp tz-naive if it is not. Converted to UTC tz before becoming - # naive automatically. - if timestamp.tz: - timestamp = timestamp.tz_convert(None) - # Convert to epoch. - epoch: int = (timestamp - pd.Timestamp("1970-01-01")) // pd.Timedelta( - "1" + unit - ) - return epoch - - -# TODO(Sameep): Reuse this function across the code base (`jackpy strftime`) when -# it doesn't make the import graph too complicated. -# TODO(gp): This seems redundant with get_timestamp() in `hwall_clock_time`. -def timestamp_to_str( - timestamp: pd.Timestamp, *, include_msec: bool = False -) -> str: - """ - Convert timestamp to string. - - :param timestamp: timestamp to convert - :param include_msec: whether to include milliseconds e.g. - `20230727_111057_123` - :return: timestamp in string format e.g. `20230727_111057`. - """ - hdbg.dassert_isinstance(timestamp, pd.Timestamp) - # Convert timestamp to string. - if include_msec: - # %f is the format code for microseconds. We truncate the last 3 digits - # to get milliseconds. - # This results in a string like "20230426_153042_123". - timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S_%f")[:-3] - else: - timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S") - return timestamp_str diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py deleted file mode 100644 index a11dfb243..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py +++ /dev/null @@ -1,1134 +0,0 @@ -""" -Import as: - -import helpers.hdbg as hdbg -""" - -import functools -import logging -import os -import pprint -import sys -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union - -# This module can depend only on: -# - Python standard modules -# - `helpers/hserver.py` -# See `helpers/dependencies.txt` for more details - -_LOG = logging.getLogger(__name__) - - -# Enforce that certain warnings are disabled. -import helpers.hwarnings as hwarnin # # isort:skip # noqa: E402,F401,F403 # pylint: disable=unused-import - - -# TODO(gp): Make these generate from MAPPING below. -INFO = "\033[36mINFO\033[0m" -WARNING = "\033[33mWARNING\033[0m" -ERROR = "\033[31mERROR\033[0m" - - -# ############################################################################# -# dfatal. -# ############################################################################# - -# Copied from printing.py to avoid cyclical dependencies. - - -def _line(chars: str = "#", num_cols: int = 80) -> str: - line_ = chars * num_cols + "\n" - return line_ - - -def _frame(x: str, chars: str = "#", num_cols: int = 80) -> str: - """ - Return a string with a frame of num_cols chars around the object x. - - :param x: object to print through str() - :param num_cols: number - """ - line_ = _line(chars=chars, num_cols=num_cols) - ret = "" - ret += line_ - ret += str(x) + "\n" - ret += line_ - return ret - - -# End of copy. - - -def dfatal(message: str, assertion_type: Optional[Any] = None) -> None: - """ - Print an error message and exits. - """ - ret = "" - message = str(message) - ret = "\n" + _frame(message, "#", 80) - if assertion_type is None: - assertion_type = AssertionError - raise assertion_type(ret) - - -# ############################################################################# -# dassert. -# ############################################################################# - -# TODO(gp): Would be nice to have a way to disable the assertions in certain -# builds, or at least know how much time is spent in the assertions. -# To disable we could have a fake_dbg.py that has all `dassert_*`, `logging` -# defined as `lambda x: 0`. - - -# INVARIANTS: -# - `dassert_COND()` checks that COND is true, and raises if COND is False -# - For this reason the condition inside the `dassert` is typically in the form -# `if not (...):`, even this might annoy the linter or look weird -# - The parameter `only_warning` is to report a problem but keep going. -# This can be used (sparingly) for production when we want to be aware of -# certain conditions without aborting. - - -def _to_msg(msg: Optional[str], *args: Any) -> str: - """ - Format error message `msg` using the params in `args`, like `msg % args`. - """ - if msg is None: - # If there is no message, we should have no arguments to format. - assert not args, f"args={str(args)}" - res = "" - else: - try: - res = msg % args - except TypeError as e: - # The arguments didn't match the format string: report error and - # print the result somehow. - res = f"Caught assertion while formatting message:\n'{str(e)}'" - _LOG.warning(res) - res += "\n" + msg + " " + " ".join(map(str, args)) - # res = "(" + res + ") " - return res - - -def _dfatal( - txt: Union[str, Iterable[str]], - msg: Optional[str], - *args: Any, - only_warning: bool = False, -) -> None: - """ - Abort execution. - - :param only_warning: issue a warning instead of aborting - """ - dfatal_txt = "* Failed assertion *\n" - # TODO(gp): This should be an iterable. - if isinstance(txt, list): - dfatal_txt += "\n".join(txt) - else: - dfatal_txt += str(txt) - msg = _to_msg(msg, *args) - if msg: - if not dfatal_txt.endswith("\n"): - dfatal_txt += "\n" - dfatal_txt += msg - if only_warning: - # Only warn. - dfatal_txt += "\nContinuing as per user request with only_warning=True" - _LOG.warning(dfatal_txt) - else: - # Abort. - dfatal(dfatal_txt) - - -def dassert( - cond: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # Handle the somehow frequent case of using `dassert` instead of another - # one, e.g., `dassert(y, list)` - if msg is not None: - assert isinstance(msg, str), ( - f"You passed '{msg}' or type '{type(msg)}' instead of str" - ) - if not cond: - txt = f"cond={cond}" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_eq( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 == val2 - if not cond: - txt = f"'{val1}'\n==\n'{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_ne( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 != val2 - if not cond: - txt = f"'{val1}'\n!=\n'{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_imply( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = not val1 or val2 - if not cond: - txt = f"'{val1}' implies '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# Comparison related. -# ############################################################################# - - -def dassert_lt( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 < val2 - if not cond: - txt = f"{val1} < {val2}" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_lte( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 <= val2 - if not cond: - txt = f"{val1} <= {val2}" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_lgt( - lower_bound: float, - x: float, - upper_bound: float, - lower_bound_closed: bool, - upper_bound_closed: bool, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert that `lower_bound <= x <= upper_bound`. - - :param lower_bound_closed, upper_bound_closed: control the open- - ness/close-ness of the interval extremes. - """ - # `lower_bound <= or < x`. - if lower_bound_closed: - dassert_lte(lower_bound, x, msg, *args, only_warning=only_warning) - else: - dassert_lt(lower_bound, x, msg, *args, only_warning=only_warning) - # `x <= or < upper_bound`. - if upper_bound_closed: - dassert_lte(x, upper_bound, msg, *args, only_warning=only_warning) - else: - dassert_lt(x, upper_bound, msg, *args, only_warning=only_warning) - - -def dassert_is_proportion( - x: float, msg: Optional[str] = None, *args: Any, only_warning: bool = False -) -> None: - """ - Assert that `0 <= x <= 1`. - """ - lower_bound_closed = True - upper_bound_closed = True - dassert_lgt( - 0, - x, - 1, - lower_bound_closed, - upper_bound_closed, - msg, - *args, - only_warning=only_warning, - ) - - -# ############################################################################# -# Membership. -# ############################################################################# - - -def dassert_in( - value: Any, - valid_values: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = value in valid_values - if not cond: - txt = f"'{value}' in '{valid_values}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_not_in( - value: Any, - valid_values: Iterable[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = value not in valid_values - if not cond: - txt = f"'{value}' not in '{valid_values}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# Type related. -# ############################################################################# - - -def dassert_is( - val1: Optional[str], - val2: Optional[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 is val2 - if not cond: - txt = f"'{val1}' is '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_is_not( - val1: Any, - val2: Optional[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 is not val2 - if not cond: - txt = f"'{val1}' is not '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_type_is( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # pylint: disable=unidiomatic-typecheck - cond = type(val1) is val2 - if not cond: - txt = f"Type of '{val1}' is '{type(val1)}' instead of '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# TODO(gp): This is redundant with dassert_isinstance(..., (str, float)). -def dassert_type_in( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # pylint: disable=unidiomatic-typecheck - cond = type(val1) in val2 - if not cond: - txt = f"Type of '{val1}' is '{type(val1)}' not in '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_isinstance( - val1: Any, - val2: Union[type, Iterable[type]], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = isinstance(val1, val2) # type: ignore[arg-type] - if not cond: - txt = f"Instance of '{val1}' is '{type(val1)}' instead of '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_issubclass( - val1: Any, - val2: Union[type, Iterable[type]], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert that an object `val1` is a subclass of `val2`. - """ - cond = issubclass(val1.__class__, val2) # type: ignore[arg-type] - if not cond: - txt = ( - f"Instance '{str(val1)}' of class '{val1.__class__.__name__}' is " - f"not a subclass of '{val2}'" - ) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_is_integer( - val: Union[int, float], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert that val represents an integer number, independently of the type. - """ - if isinstance(val, int): - pass - elif isinstance(val, float): - cond = val == int(val) - if not cond: - txt = f"Invalid val='{val}' of type '{type(val)}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - else: - txt = f"Invalid val='{val}' of type '{type(val)}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_callable( - func: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert that an object `val1` is callable. - """ - cond = callable(func) - if not cond: - txt = f"Obj '{str(func)}' of type '{str(type(func))}' is not callable" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# Set related. -# ############################################################################# - - -# TODO(gp): A more general solution is to have a function that traverses an obj -# and creates a corresponding obj only with deterministic data structures (e.g., -# converting sets and dicts to sorted lists). Then we can print with `pprint`. -def _set_to_str(set_: Set[Any], thr: Optional[int] = 20) -> str: - """ - Return a string with the ordered content of a set. - - This is useful when printing assertions that we want to be deterministic (e.g., - if we use it inside unit tests like: - ``` - with self.assertRaises(AssertionError) as cm: - ... - actual = str(cm.exception) - expected = r - self.assert_equal(actual, expected, fuzzy_match=True) - ``` - """ - try: - list_ = sorted(list(set_)) - # If sets have less than `thr` elements print them as well, otherwise - # print the beginning / end. - if thr is not None and len(list_) > thr: - txt = f"{len(list_)} [{min(list_)}, ... {max(list_)}]" - else: - txt = str(list_) - except TypeError: - # Sometimes the set has elements of different types and we can't easily - # sort them. In these cases we just skip the sorting. - txt = str(list(set_)) - return txt - - -def dassert_set_eq( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val1` has the same elements as `val2`, raise otherwise. - - :param only_warning: issue a warning instead of aborting - """ - val1 = set(val1) - val2 = set(val2) - # pylint: disable=superfluous-parens - if not (val1 == val2): - txt = [] - txt.append("val1 - val2=" + _set_to_str(val1.difference(val2))) - txt.append("val2 - val1=" + _set_to_str(val2.difference(val1))) - txt.append("val1=" + _set_to_str(val1)) - txt.append("set eq") - txt.append("val2=" + _set_to_str(val2)) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# TODO(gp): -> dassert_issubset to match Python set function. -def dassert_is_subset( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val1` is a subset of `val2`, raise otherwise. - """ - val1 = set(val1) - val2 = set(val2) - if not val1.issubset(val2): - txt = [] - txt.append("val1=" + _set_to_str(val1)) - txt.append("issubset") - txt.append("val2=" + _set_to_str(val2)) - txt.append("val1 - val2=" + _set_to_str(val1.difference(val2))) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# TODO(gp): -> dassert_no_intersection to match other functions. -def dassert_not_intersection( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val1` has no intersection `val2`, raise otherwise. - """ - val1 = set(val1) - val2 = set(val2) - if val1.intersection(val2): - txt = [] - txt.append("val1=" + _set_to_str(val1)) - txt.append("has no intersection") - txt.append("val2=" + _set_to_str(val2)) - txt.append( - "val1.intersection(val2)=" + _set_to_str(val1.intersection(val2)) - ) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_is_iterable( - val: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val` is an iterable (excluding strings, bytes), raise otherwise. - """ - cond = isinstance(val, Iterable) and not isinstance( - val, (str, bytes, bytearray) - ) - if not cond: - txt = f"Val '{val}' of type '{type(val)}' is not an iterable" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# Array related. -# ############################################################################# - - -def dassert_no_duplicates( - val1: Iterable[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val1` has no duplicates, raise otherwise. - """ - dassert_is_iterable(val1) - cond = len(set(val1)) == len(val1) - if not cond: - # Count the occurrences of each element of the seq. - v_to_num = [(v, val1.count(v)) for v in set(val1)] - # Build list of elements with duplicates. - dups = [v for v, n in v_to_num if n > 1] - txt = [] - txt.append("val1=\n" + pprint.pformat(val1)) - txt.append("has duplicates") - txt.append(",".join(map(str, dups))) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_is_sorted( - val1: Union[List, Tuple], - sort_kwargs: Optional[Dict[Any, Any]] = None, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val` is sorted, raise otherwise. - """ - # TODO(gp): Extend for pd.Series using the proper method. - dassert_isinstance(val1, (list, tuple)) - sort_kwargs = {} if sort_kwargs is None else sort_kwargs - sorted_val1 = sorted(val1, **sort_kwargs) - cond = sorted_val1 == val1 - if not cond: - txt = [] - txt.append("val1=\n" + pprint.pformat(val1)) - txt.append("is not sorted") - txt.append("sorted(val1)=\n" + pprint.pformat(sorted_val1)) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_eq_all( - val1: Iterable[Any], - val2: Iterable[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that two iterables `val1` and `val2` are equal, raise otherwise. - """ - dassert_is_iterable(val1) - val1 = list(val1) - dassert_is_iterable(val2) - val2 = list(val2) - cond = val1 == val2 - if not cond: - # mask = val1 != val2 - txt = [] - txt.append(f"val1={len(val1)}\n{val1}") - txt.append(f"val2={len(val2)}\n{val2}") - # txt += "\ndiff=%s" % mask.sum() - # txt += "\n%s" % val1[mask] - # txt += "\n%s" % val2[mask] - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def _get_first_type(obj: Iterable, tag: str) -> Type: - obj_types = {type(v) for v in obj} - dassert_eq( - len(obj_types), - 1, - "More than one type for elem of %s=%s", - tag, - map(str, obj_types), - ) - return list(obj_types)[0] - - -# TODO(gp): IMO a bit overfit to the use case. Move this to the files that are -# using is. -def dassert_all_attributes_are_same( - list_: List[Any], - attribute_name: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check if all the elements in the list have the same attribute value. - - :param list_: list of objects - :param attribute_name: name of the attribute to check - """ - dassert_isinstance(list_, list) - dassert_isinstance(attribute_name, str) - attribute_values = [getattr(element, attribute_name) for element in list_] - if len(set(attribute_values)) != 1: - txt = [] - txt.append("Elements in the list have different values for ") - txt.append(f"attribute {attribute_name}:\n\t{set(attribute_values)}") - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_array_has_same_type_element( - obj1: Any, - obj2: Any, - only_first_elem: bool, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that two objects iterables like arrays (e.g., pd.Index) have elements - of the same type. - - :param only_first_elem: whether to check only the first element or - all the elements of the iterable. - """ - # Get the types to compare. - if only_first_elem: - obj1_first_type = type(obj1[0]) - obj2_first_type = type(obj2[0]) - else: - obj1_first_type = _get_first_type(obj1, "obj1") - obj2_first_type = _get_first_type(obj2, "obj2") - # - if obj1_first_type != obj2_first_type: - txt = [] - num_elems = 5 - txt.append(f"obj1=\n{obj1[:num_elems]}") - txt.append(f"obj2=\n{obj2[:num_elems]}") - txt.append( - f"type(obj1)='{obj1_first_type}' is different from type(obj2)='{obj2_first_type}'" - ) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_container_type( - obj: Any, - container_type: Optional[Any], - elem_type: Optional[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert `obj` is a certain type of container containing certain type of - objects. - - E.g., `obj` is a list of strings. - """ - # Add information about the obj. - if not msg: - msg = "" - msg = msg.rstrip("\n") + f"\nobj='{str(obj)}'" - # Check container. - if container_type is not None: - dassert_isinstance( - obj, container_type, msg, *args, only_warning=only_warning - ) - # Check the elements of the container. - if elem_type is not None: - for elem in obj: - dassert_isinstance( - elem, elem_type, msg, *args, only_warning=only_warning - ) - - -# TODO(gp): @all Replace calls to this with calls to `dassert_container_type()`. -def dassert_list_of_strings( - list_: List[str], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # TODO(gp): Allow iterable? - dassert_isinstance(list_, list, msg, *args, only_warning=only_warning) - for elem in list_: - dassert_isinstance(elem, str, msg, *args, only_warning=only_warning) - - -def dassert_all_defined_or_all_None( - vals: List[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that all the values in a list are either all defined or all None. - """ - all_defined_cond = all(val is not None for val in vals) - all_none_cond = all(val is None for val in vals) - cond = all_defined_cond or all_none_cond - if not cond: - txt = f"Some values in list are defined and some are None: '{vals}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# File related. -# ############################################################################# - - -def dassert_path_exists( - path: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - dassert_isinstance(path, str) - path = os.path.abspath(path) - if not os.path.exists(path): - txt = f"Path '{path}' doesn't exist!" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_path_not_exists( - path: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - dassert_isinstance(path, str) - dassert_ne(path, "") - path = os.path.abspath(path) - if os.path.exists(path): - txt = f"Path '{path}' already exist!" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_file_exists( - file_name: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert unless `file_name` exists and it's a file and not a directory. - """ - dassert_isinstance(file_name, str) - dassert_ne(file_name, "") - file_name = os.path.abspath(file_name) - # `file_name` exists. - exists = os.path.exists(file_name) - if not exists: - txt = f"File '{file_name}' doesn't exist" - _dfatal(txt, msg, *args, only_warning=only_warning) - # `file_name` is a file. - is_file = os.path.isfile(file_name) - if not is_file: - txt = f"'{file_name}' is not a file" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_dir_exists( - dir_name: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert unless `dir_name` exists and it's a directory. - """ - dassert_isinstance(dir_name, str) - dassert_ne(dir_name, "") - dir_name = os.path.abspath(dir_name) - # `dir_name` exists. - exists = os.path.exists(dir_name) - if not exists: - txt = f"Dir '{dir_name}' doesn't exist" - _dfatal(txt, msg, *args, only_warning=only_warning) - # `dir_name` is a directory. - is_dir = os.path.isdir(dir_name) - if not is_dir: - txt = f"'{dir_name}' is not a dir" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_file_extension( - file_name: str, - extensions: Union[str, List[str]], - only_warning: bool = False, -) -> None: - """ - Ensure that file has one of the given extensions. - - :param extensions: don't need to start with `.`, e.g., use `csv` instead of - `.csv` - """ - # Handle single extension case. - if isinstance(extensions, str): - extensions = [extensions] - # Make sure extension starts with . - extensions = [f".{e}" if not e.startswith(".") else e for e in extensions] - # Check. - name, act_ext = os.path.splitext(file_name) - if act_ext == ".gz": - # Concatenate with the preceding extension, e.g., `.csv.gz`. - ext = os.path.splitext(name)[-1] - act_ext = (ext + act_ext).lower() - dassert_in( - act_ext, - extensions, - "Invalid extension '%s' for file '%s'", - act_ext, - file_name, - only_warning=only_warning, - ) - - -def dassert_is_path_abs(path: str, only_warning: bool = False) -> None: - """ - Assert that `path` is an absolute path. - """ - dassert_isinstance(path, str) - dassert_ne(path, "") - dassert( - os.path.isabs(path), - "Path '%s' is not absolute", - path, - only_warning=only_warning, - ) - - -def dassert_related_params( - params: Dict[str, Any], - mode: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check whether `params` have a certain relationship. - - :params params: dictionary of parameter name, value - :params mode: - - `all_or_none_non_null`: either all params are null (i.e., `bool` evaluate - to false) or are non-null - - `all_or_none_non_None`: either all params are None or all params are not - None. This is useful when passing set of params that are optional - """ - # TODO(gp): Allow iterable? - dassert_isinstance(params, dict, msg, *args, only_warning=only_warning) - if mode == "all_or_none_non_null": - # Find out if at least one value is set. - is_non_null = map(bool, params.values()) - one_is_non_null = functools.reduce(lambda x, y: x or y, is_non_null) - for k, v in params.items(): - if bool(v) != one_is_non_null: - txt = f"All or none parameter should be non-null:\n{k}={v}\nparams={pprint.pformat(params)}\n" - _dfatal(txt, msg, *args, only_warning=only_warning) - elif mode == "all_or_none_non_None": - # Find out if at least one value is not None. - is_non_None = map(lambda x: x is not None, params.values()) - one_is_non_None = functools.reduce(lambda x, y: x or y, is_non_None) - for k, v in params.items(): - if (v is not None) != one_is_non_None: - txt = f"All or none parameter should be non-None:\n{k}={v}\nparams={pprint.pformat(params)}\n" - _dfatal(txt, msg, *args, only_warning=only_warning) - else: - raise ValueError(f"Invalid mode='{mode}'") - - -# ############################################################################# -# Command line. -# ############################################################################# - - -# Sample at the beginning of time before we start fiddling with command line -# args. -_CMD_LINE = " ".join(arg for arg in sys.argv) - - -def get_command_line() -> str: - return _CMD_LINE - - -# ############################################################################# -# Logger. -# ############################################################################# - - -# TODO(gp): Move this to helpers/hlogging.py and change all the callers. - - -# TODO(gp): maybe replace "force_verbose_format" and "force_print_format" with -# a "mode" in ("auto", "verbose", "print") -def init_logger( - verbosity: int = logging.INFO, - use_exec_path: bool = False, - log_filename: Optional[str] = None, - force_verbose_format: bool = False, - force_print_format: bool = False, - force_white: bool = True, - force_no_warning: bool = False, - in_pytest: bool = False, - report_memory_usage: bool = False, - report_cpu_usage: bool = False, - report_command_line: bool = True, -) -> None: - """ - Send stderr and stdout to logging (optionally teeing the logs to file). - - - Note that: - - logging.DEBUG = 10 - - logging.INFO = 20 - - :param verbosity: verbosity to use - :param use_exec_path: use the name of the executable - :param log_filename: log to that file - :param force_verbose_format: use the verbose format for the logging - :param force_print_format: use the print format for the logging - :param force_white: use white color for printing. This can pollute the - output of a script when redirected to file with echo characters - :param in_pytest: True when we are running through pytest, so that we - can overwrite the default logger from pytest - :param report_memory_usage: turn on reporting memory usage - :param report_cpu_usage: turn on reporting CPU usage - :param report_command_line: turn on reporting command line - """ - # Try to minimize dependencies. - import helpers.hlogging as hloggin - - # TODO(gp): Print the stacktrace every time is called. - if force_white: - sys.stdout.write("\033[0m") - if isinstance(verbosity, str): - # pylint: disable=protected-access - dassert(hasattr(logging, "_checkLevel")) - assert hasattr(logging, "_checkLevel") - verbosity = logging._checkLevel(verbosity) - # From https://stackoverflow.com/questions/14058453 - root_logger = logging.getLogger() - # Set verbosity for all loggers. - root_logger.setLevel(verbosity) - # if False: - # eff_level = root_logger.getEffectiveLevel() - # print( - # "effective level= %s (%s)" - # % (eff_level, logging.getLevelName(eff_level)) - # ) - # if False: - # # dassert_eq(root_logger.getEffectiveLevel(), verbosity) - # for handler in root_logger.handlers: - # handler.setLevel(verbosity) - # Exit to avoid to replicate the same output multiple times. - if not in_pytest and root_logger.handlers: - print(WARNING + ": Logger already initialized: skipping") - if False: - # Print info about the caller. - import traceback - - traceback.print_stack() - return - # - ch = logging.StreamHandler(sys.stdout) - ch.setLevel(verbosity) - # Set the formatter. - # formatter = hloggin.set_v1_formatter( - dassert(hasattr(hloggin, "set_v2_formatter")) - assert hasattr(hloggin, "set_v2_formatter") - formatter = hloggin.set_v2_formatter( - ch, - root_logger, - force_no_warning, - force_print_format, - force_verbose_format, - report_memory_usage, - report_cpu_usage, - ) - # Find name of the log file. - if use_exec_path and log_filename is None: - dassert_is(log_filename, None, msg="Can't specify conflicting filenames") - # Use the name of the executable. - import inspect - - frame = inspect.stack()[1] - module = inspect.getmodule(frame[0]) - if not hasattr(module, __file__): - if module is None: - filename = "none" - else: - filename = str(module.__file__) - else: - filename = "unknown_module" - log_filename = os.path.realpath(filename) + ".log" - # Handle teeing to a file. - if log_filename: - # Create a dir (and all its missing parent dirs) if it doesn't exist. - log_dirname = os.path.dirname(log_filename) - if log_dirname != "" and not os.path.exists(log_dirname): - os.makedirs(log_dirname) - # Delete the file since we don't want to append. - if os.path.exists(log_filename): - try: - os.unlink(log_filename) - except FileNotFoundError as e: - print(e) - # Tee to file. - file_handler = logging.FileHandler(log_filename) - root_logger.addHandler(file_handler) - file_handler.setFormatter(formatter) - # - _LOG.info("Saving log to file '%s'", log_filename) - # - _LOG.debug("Effective logging level=%s", _LOG.getEffectiveLevel()) - # Shut up chatty modules. - dassert(hasattr(hloggin, "shutup_chatty_modules")) - assert hasattr(hloggin, "shutup_chatty_modules") - hloggin.shutup_chatty_modules(verbose=False) - if report_command_line: - _LOG.info("> cmd='%s'", get_command_line()) - # - # test_logger() - - -def set_logger_verbosity( - verbosity: int, module_name: Optional[str] = None -) -> None: - """ - Change the verbosity of the logging after the initialization. - - Passing a module_name (e.g., matplotlib) one can change the logging - of that specific module. - - E.g., set_logger_verbosity(logging.WARNING, "matplotlib") - """ - logger = logging.getLogger(module_name) - if module_name is None and not logger.handlers: - assert 0, "ERROR: Logger not initialized" - logger.setLevel(verbosity) - eff_level = logger.getEffectiveLevel() - print(f"effective level= {eff_level} ({logging.getLevelName(eff_level)})") - dassert_eq(logger.getEffectiveLevel(), verbosity) - - -def get_logger_verbosity() -> int: - root_logger = logging.getLogger() - if not root_logger.handlers: - assert 0, "ERROR: Logger not initialized" - return root_logger.getEffectiveLevel() - - -# ############################################################################# -# Command line. -# ############################################################################# - - -# Sample at the beginning of time before we start fiddling with command line -# args. -_CMD_LINE = " ".join(arg for arg in sys.argv) -_EXEC_NAME = os.path.abspath(sys.argv[0]) - - -def get_command_line() -> str: - return _CMD_LINE - - -def get_exec_name() -> str: - return _EXEC_NAME diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py deleted file mode 100644 index 13d388249..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Import as: - -import helpers.hdict as hdict -""" - -import logging -from typing import ( - Any, - Dict, - Generator, - Iterable, - Mapping, - Optional, - Tuple, - Union, -) - -try: - from collections.abc import Mapping as AbcMapping -except ImportError: - from collections import Mapping as AbcMapping - -import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - - -def get_nested_dict_iterator( - nested: Mapping[Any, Any], - path: Optional[Iterable[Any]] = None, -) -> Generator[Tuple[Tuple, Any], None, None]: - """ - Return nested mapping iterator that iterates in a depth-first fashion. - - :param nested: nested dictionary - :param path: path to node to start the visit from or `None` to start from - the root - :return: path to leaf node, value - """ - if path is None: - path = [] - if not isinstance(path, tuple): - path = tuple(path) - if not nested.items(): - yield path, nested - for key, value in nested.items(): - local_path = path + (key,) - if isinstance(value, AbcMapping): - yield from get_nested_dict_iterator(value, local_path) - else: - yield local_path, value - - -def extract_leaf_values(nested: Dict[Any, Any], key: Any) -> Dict[Any, Any]: - """ - Extract leaf values with key matching `key`. - - :param nested: nested dictionary - :param key: leaf key value to match - :return: dict with key = path as tuple, value = leaf value - """ - d = {} - for k, v in get_nested_dict_iterator(nested): - if k[-1] == key: - d[k] = v - return d - - -_NO_VALUE_SPECIFIED = "__NO_VALUE_SPECIFIED__" - - -def typed_get( - dict_: Union[Dict, "Config"], # noqa: F821 - key: Any, - default_value: Optional[Any] = _NO_VALUE_SPECIFIED, - *, - expected_type: Optional[Any] = None, -) -> Any: - """ - Equivalent to `dict.get(key, default_val)` and check the type of the - output. - - :param default_value: default value to return if key is not in `config` - :param expected_type: expected type of `value` - :return: config[key] if available, else `default_value` - """ - hdbg.dassert_isinstance(dict_, dict) - if default_value == _NO_VALUE_SPECIFIED: - # No value is specified so check that the key is present with dassert_in - # to report a decent error. - hdbg.dassert_in(key, dict_) - try: - ret = dict_.__getitem__(key) - except KeyError as e: - # No key: use the default val if it was passed or asserts. - _LOG.debug("e=%s", e) - # We can't use None since None can be a valid default value, so we use - # another value. - if default_value != _NO_VALUE_SPECIFIED: - ret = default_value - else: - # No default value found, then raise. - raise e - if expected_type is not None: - hdbg.dassert_isinstance(ret, expected_type) - return ret - - -def checked_get( - dict_: Dict, - key: Any, -) -> Any: - """ - Ensure that the key exists and print a decent error message in case of - error, instead of a generic `TypeError`. - """ - hdbg.dassert_in(key, dict_) - return dict_[key] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py deleted file mode 100644 index 44f973a89..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py +++ /dev/null @@ -1,871 +0,0 @@ -""" -Import as: - -import helpers.hdocker as hdocker -""" - -import argparse -import copy -import hashlib -import logging -import os -import platform -import subprocess -import time -from typing import List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.henv as henv -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Docker utilities -# ############################################################################# - - -# TODO(gp): This is a function of the architecture. Move to the repo_config.py -# or the config file. -def get_use_sudo() -> bool: - """ - Check if Docker commands should be run with sudo. - - :return: Whether to use sudo for Docker commands. - """ - use_sudo = False - # if hserver.is_inside_docker(): - # use_sudo = True - return use_sudo - - -# TODO(gp): use_sudo should be set to None and the correct value inferred from -# the repo config. -def get_docker_executable(use_sudo: bool) -> str: - """ - Get the Docker executable with / without sudo, if needed. - """ - executable = "sudo " if use_sudo else "" - executable += "docker" - return executable - - -def process_docker_cmd( - docker_cmd: str, container_image: str, dockerfile: str, mode: str -) -> str: - """ - Process a Docker command according to the mode. - - :param docker_cmd: The Docker command to process. - :param container_image: The name of the Docker container. - :param dockerfile: The content of the Dockerfile. - :param mode: The mode to process the Docker command. - - "return_cmd": return the command as is. - - "system": execute the command. - - "save_to_file": save the command to a file. - :return: The output of the Docker command. - """ - _LOG.debug(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(docker_cmd, str) - hdbg.dassert_isinstance(container_image, str) - hdbg.dassert_isinstance(dockerfile, str) - if mode == "return_cmd": - ret = docker_cmd - elif mode == "system": - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd, suppress_output=False) - ret = "" - elif mode == "system_without_output": - hsystem.system(docker_cmd, suppress_output=True) - ret = "" - elif mode == "save_to_file": - file_name = f"tmp.process_docker_cmd.{container_image}.txt" - txt = [] - txt.append(f"docker_cmd={docker_cmd}") - txt.append(f"container_image={container_image}") - txt.append(f"dockerfile={dockerfile}") - txt = "\n".join(txt) - hio.to_file(file_name, txt) - ret = "" - else: - raise ValueError(f"Invalid mode='{mode}'") - return ret - - -def container_exists(container_name: str, use_sudo: bool) -> Tuple[bool, str]: - """ - Check if a Docker container is running by executing a command like: - - ``` - > docker container ls --filter=tmp.prettier -aq - aed8a5ce33a9 - ``` - """ - _LOG.debug(hprint.func_signature_to_str()) - # - executable = get_docker_executable(use_sudo) - cmd = f"{executable} container ls --filter name=/{container_name} -aq" - _, container_id = hsystem.system_to_one_line(cmd) - container_id = container_id.rstrip("\n") - exists = container_id != "" - _LOG.debug(hprint.to_str("exists container_id")) - return exists, container_id - - -def image_exists(image_name: str, use_sudo: bool) -> Tuple[bool, str]: - """ - Check if a Docker image already exists by executing a command like: - - ``` - > docker images tmp.prettier -aq - aed8a5ce33a9 - ``` - """ - _LOG.debug(hprint.func_signature_to_str()) - # - executable = get_docker_executable(use_sudo) - cmd = f"{executable} image ls --filter reference={image_name} -q" - _, image_id = hsystem.system_to_one_line(cmd) - image_id = image_id.rstrip("\n") - exists = image_id != "" - _LOG.debug(hprint.to_str("exists image_id")) - return exists, image_id - - -def container_rm(container_name: str, use_sudo: bool) -> None: - """ - Remove a Docker container by its name. - - :param container_name: Name of the Docker container to remove. - :param use_sudo: Whether to use sudo for Docker commands. - :raises AssertionError: If the container ID is not found. - """ - _LOG.debug(hprint.func_signature_to_str()) - # - executable = get_docker_executable(use_sudo) - # Find the container ID from the name. - # Docker filter refers to container names using a leading `/`. - cmd = f"{executable} container ls --filter name=/{container_name} -aq" - _, container_id = hsystem.system_to_one_line(cmd) - container_id = container_id.rstrip("\n") - hdbg.dassert_ne(container_id, "") - # Delete the container. - _LOG.debug(hprint.to_str("container_id")) - cmd = f"{executable} container rm --force {container_id}" - hsystem.system(cmd) - _LOG.debug("docker container '%s' deleted", container_name) - - -def volume_rm(volume_name: str, use_sudo: bool) -> None: - """ - Remove a Docker volume by its name. - - :param volume_name: Name of the Docker volume to remove. - :param use_sudo: Whether to use sudo for Docker commands. - """ - _LOG.debug(hprint.func_signature_to_str()) - # - executable = get_docker_executable(use_sudo) - cmd = f"{executable} volume rm {volume_name}" - hsystem.system(cmd) - _LOG.debug("docker volume '%s' deleted", volume_name) - - -# ############################################################################# - - -def get_current_arch() -> str: - """ - Return the architecture that we are running on (e.g., arm64, aarch64, - x86_64). - """ - cmd = "uname -m" - _, current_arch = hsystem.system_to_one_line(cmd) - _LOG.debug(hprint.to_str("current_arch")) - return current_arch - - -def _is_compatible_arch(val1: str, val2: str) -> bool: - valid_arch = ["x86_64", "amd64", "aarch64", "arm64"] - hdbg.dassert_in(val1, valid_arch) - hdbg.dassert_in(val2, valid_arch) - if val1 == val2: - return True - compatible_sets = [{"x86_64", "amd64"}, {"aarch64", "arm64"}] - for comp_set in compatible_sets: - if {val1, val2}.issubset(comp_set): - return True - return False - - -def check_image_compatibility_with_current_arch( - image_name: str, - *, - use_sudo: Optional[bool] = None, - pull_image_if_needed: bool = True, - assert_on_error: bool = True, -) -> None: - """ - Check if the Docker image is compatible with the current architecture. - - :param image_name: Name of the Docker image to check. - :param use_sudo: Whether to use sudo for Docker commands. - :param pull_image_if_needed: Whether to pull the image if it doesn't - exist. - :param assert_on_error: Whether to raise an error if the image is - not compatible with the current architecture. - """ - _LOG.debug(hprint.func_signature_to_str()) - hdbg.dassert_ne(image_name, "") - if use_sudo is None: - use_sudo = get_use_sudo() - # Get the architecture that we are running on. - current_arch = get_current_arch() - # > docker image inspect \ - # 623860924167.dkr.ecr.eu-north-1.amazonaws.com/helpers:local-saggese-1.1.0 \ - # --format '{{.Architecture}}' - # arm64 - # Check and pull the image if needed. - has_image, _ = image_exists(image_name, use_sudo) - if not has_image: - _LOG.warning("Image '%s' not found: trying to pull it", image_name) - if pull_image_if_needed: - cmd = f"docker pull {image_name}" - hsystem.system(cmd) - else: - hdbg.dfatal("Image '%s' not found", image_name) - # Check the image architecture. - executable = get_docker_executable(use_sudo) - cmd = f"{executable} inspect {image_name}" + r" --format '{{.Architecture}}'" - _, image_arch = hsystem.system_to_one_line(cmd) - _LOG.debug(hprint.to_str("image_arch")) - # Check architecture compatibility. - if not _is_compatible_arch(current_arch, image_arch): - msg = f"Running architecture '{current_arch}' != image architecture '{image_arch}'" - if assert_on_error: - hdbg.dfatal(msg) - else: - _LOG.warning(msg) - _LOG.debug( - "Running architecture '%s' and image architecture '%s' are compatible", - current_arch, - image_arch, - ) - - -# ############################################################################# - - -def wait_for_file_in_docker( - container_id: str, - docker_file_path: str, - out_file_path: str, - *, - check_interval_in_secs: float = 0.5, - timeout_in_secs: int = 10, -) -> None: - """ - Wait for a file to be generated inside a Docker container and copy it to - the host. - - This function periodically checks for the existence of a file inside - a Docker container. Once the file is found, it copies the file to - the specified output path on the host. - - :param container_id: ID of the Docker container. - :param docker_file_path: Path to the file inside the Docker - container. - :param out_file_path: Path to copy the file to on the host. - :param check_interval_in_secs: Time in seconds between checks. - :param timeout_in_secs: Maximum time to wait for the file in - seconds. - :raises ValueError: If the file is not found within the timeout - period. - """ - _LOG.debug("Waiting for file: %s:%s", container_id, docker_file_path) - start_time = time.time() - while not os.path.exists(out_file_path): - cmd = f"docker cp {container_id}:{docker_file_path} {out_file_path}" - hsystem.system(cmd) - if time.time() - start_time > timeout_in_secs: - raise ValueError( - "Timeout reached. File not found: " - f"{container_id}:{docker_file_path}" - ) - time.sleep(check_interval_in_secs) - _LOG.debug("File generated: %s", out_file_path) - - -def replace_shared_root_path( - path: str, *, replace_ecs_tokyo: Optional[bool] = False -) -> str: - """ - Replace root path of the shared directory based on the mapping. - - :param path: path to replace, e.g., `/data/shared` - :param replace_ecs_tokyo: if True replace `ecs_tokyo` to `ecs` in the path - :return: replaced shared data dir root path, e.g., - - `/data/shared/ecs_tokyo/.../20240522_173000.20240522_182500/` -> - `/shared_data/ecs/.../20240522_173000.20240522_182500/` - - `/data/shared/ecs/.../20240522_173000.20240522_182500` -> - `/shared_data/ecs/.../20240522_173000.20240522_182500` - """ - # Inside ECS, we keep the original shared data path and replace it only when - # running inside Docker on the dev server. - if hserver.is_inside_docker() and not hserver.is_inside_ecs_container(): - shared_data_dirs = hserver.get_shared_data_dirs() - if shared_data_dirs is not None: - if replace_ecs_tokyo: - # Make a copy to avoid modifying the original one. - shared_data_dirs = copy.deepcopy(shared_data_dirs) - shared_data_dirs["ecs_tokyo"] = "ecs" - for shared_dir, docker_shared_dir in shared_data_dirs.items(): - path = path.replace(shared_dir, docker_shared_dir) - _LOG.debug( - "Running inside Docker on the dev server, thus replacing %s " - "with %s", - shared_dir, - docker_shared_dir, - ) - else: - _LOG.debug("No replacement found, returning path as-is: %s", path) - return path - - -# ############################################################################# -# Dockerized executable utils. -# ############################################################################# - -# See `docs/tools/docker/all.dockerized_flow.explanation.md` for details -# about the Dockerized flow. - - -def get_docker_base_cmd(use_sudo: bool) -> List[str]: - """ - Get the base command for running a Docker container. - - E.g., - ``` - docker run --rm --user $(id -u):$(id -g) \ - -e CSFY_AWS_PROFILE -e CSFY_ECR_BASE_PATH \ - ... - -e OPENAI_API_KEY - ``` - - :param use_sudo: Whether to use sudo for Docker commands. - :return: The base command for running a Docker container. - """ - docker_executable = get_docker_executable(use_sudo) - # Get the env vars to pass to the Docker container. - vars_to_pass = henv.get_csfy_env_vars() + henv.get_api_key_env_vars() - vars_to_pass = sorted(vars_to_pass) - vars_to_pass_as_str = " ".join(f"-e {v}" for v in vars_to_pass) - # Build the command as a list. - docker_cmd = [ - docker_executable, - "run --rm", - "--user $(id -u):$(id -g)", - vars_to_pass_as_str, - ] - # Handle coverage. - # TODO(gp): Is this env var standard, or should it be - # CSFY_COVERAGE_PROCESS_START? - # if os.environ.get("COVERAGE_PROCESS_START"): - # _LOG.debug("Enabling coverage") - # host_cov_dir = os.path.abspath("coverage_data") - # # TODO(gp): Use `hio.create_dir()` instead. - # os.makedirs(host_cov_dir, exist_ok=True) - # os.chmod(host_cov_dir, 0o777) - # coverage_dir_container = "/app/coverage_data" - # docker_cmd.extend( - # [ - # f"-e COVERAGE_FILE={coverage_dir_container}/.coverage", - # f"-e COVERAGE_PROCESS_START={coverage_dir_container}/.coveragerc", - # f"-v {host_cov_dir}:{coverage_dir_container}", - # ] - # ) - return docker_cmd - - -def get_container_image_name( - image_name: str, dockerfile: str -) -> Tuple[str, str]: - """ - Get the name of the container image. - - :param image_name: Name of the Docker container to build. - :param dockerfile: Content of the Dockerfile for building the - container. - :return: Name of the container image. - """ - _LOG.debug(hprint.func_signature_to_str("image_name dockerfile")) - hdbg.dassert_ne(image_name, "") - hdbg.dassert_ne(dockerfile, "") - dockerfile = hprint.dedent(dockerfile) - # if os.environ.get("COVERAGE_PROCESS_START"): - # _LOG.debug("Enabling coverage") - # # Check if this is a Python-based Dockerfile. - # if any( - # keyword in dockerfile.lower() - # for keyword in ["python", "pip", "python3"] - # ): - # coverage_dockerfile = hcovera.generate_coverage_dockerfile() - # _LOG.debug("Coverage Dockerfile content:\n%s", coverage_dockerfile) - # dockerfile = dockerfile.strip() + "\n" + coverage_dockerfile - # _LOG.debug("Coverage support added to Dockerfile") - # else: - # _LOG.warning( - # "Skipping coverage addition - not a Python-based Dockerfile" - # ) - _LOG.debug("Final Dockerfile:\n%s", dockerfile) - # Get the current architecture. - current_arch = get_current_arch() - sha256_hash = hashlib.sha256(dockerfile.encode()).hexdigest() - short_hash = sha256_hash[:8] - # Build the name of the container image. - image_name_out = f"{image_name}.{current_arch}.{short_hash}" - return image_name_out, dockerfile - - -def build_container_image( - image_name: str, - dockerfile: str, - force_rebuild: bool, - use_sudo: bool, - *, - use_cache: bool = True, - incremental: bool = True, -) -> str: - """ - Build a Docker image from a Dockerfile. - - :param image_name: Name of the Docker container to build. - :param dockerfile: Content of the Dockerfile for building the - container. - :param force_rebuild: Whether to force rebuild the Docker container. - There are two level of caching. The first level of caching is - our approach of skipping `docker build` if the image already - exists and the Dockerfile hasn't changed. The second level is - the Docker cache itself, which is invalidated by `--no-cache`. - :param use_sudo: Whether to use sudo for Docker commands. - :return: Name of the built Docker container. - :raises AssertionError: If the container ID is not found. - """ - _LOG.debug(hprint.func_signature_to_str("dockerfile")) - # - image_name_out, dockerfile = get_container_image_name(image_name, dockerfile) - # Check if the container already exists. If not, build it. - has_container, _ = image_exists(image_name_out, use_sudo) - coverage_enabled = os.environ.get("COVERAGE_PROCESS_START") - # if coverage_enabled: - # # Add coverage suffix to image name for tracking. - # image_name_out += ".coverage" - # # Force rebuild when coverage is enabled. - # has_container = False - # _LOG.debug( - # "Coverage enabled - forcing rebuild of image: {image_name_out}" - # ) - if bool(os.environ.get("CSFY_DOCKER_FORCE_REBUILD", False)): - _LOG.warning( - "CSFY_DOCKER_FORCE_REBUILD forcing to rebuild container without cache" - ) - force_rebuild = True - if force_rebuild: - _LOG.warning( - "Forcing to rebuild of container '%s' without cache", - image_name, - ) - has_container = False - use_cache = False - _LOG.debug(hprint.to_str("has_container use_cache")) - # # Always prepare coverage files when coverage is enabled, regardless of container existence. - # if coverage_enabled: - # # Create build context directory for coverage files. - # build_context_dir = "tmp.docker_build" - # hio.create_dir(build_context_dir, incremental=incremental) - # # Always copy .coveragerc when coverage is enabled. - # coveragerc_src = ".coveragerc" - # coveragerc_dst = os.path.join(build_context_dir, ".coveragerc") - # if os.path.exists(coveragerc_src): - # shutil.copy2(coveragerc_src, coveragerc_dst) - # _LOG.debug( - # "Coverage enabled - copied {coveragerc_src} to {coveragerc_dst}" - # ) - # else: - # _LOG.warning( - # "Coverage enabled but .coveragerc not found at {coveragerc_src}" - # ) - if not has_container: - # Create a temporary Dockerfile. - _LOG.warning("Building Docker container...") - build_context_dir = "tmp.docker_build" - if not coverage_enabled: - # Only create build context if not already created for coverage - hio.create_dir(build_context_dir, incremental=incremental) - temp_dockerfile = os.path.join(build_context_dir, "Dockerfile") - hio.to_file(temp_dockerfile, dockerfile) - # Build the container. - docker_executable = get_docker_executable(use_sudo) - cmd = [ - f"{docker_executable} build", - f"-f {temp_dockerfile}", - f"-t {image_name_out}", - # "--platform linux/aarch64", - ] - if not use_cache: - cmd.append("--no-cache") - cmd.append(build_context_dir) - cmd = " ".join(cmd) - hsystem.system(cmd, suppress_output=False) - _LOG.info("Building Docker container... done") - return image_name_out - - -# ############################################################################# - - -def get_host_git_root() -> str: - """ - Get the Git root path on the host machine, when inside a Docker container. - """ - hdbg.dassert_in("CSFY_HOST_GIT_ROOT_PATH", os.environ) - host_git_root_path = os.environ["CSFY_HOST_GIT_ROOT_PATH"] - return host_git_root_path - - -def get_docker_mount_info( - is_caller_host: bool, use_sibling_container_for_callee: bool -) -> Tuple[str, str, str]: - """ - Get the Docker mount information for the current environment. - - This function determines the appropriate source and target paths for - mounting a directory in a Docker container. - - Same inputs as `convert_caller_to_callee_docker_path()`. - - :return: A tuple containing - - caller_mount_path: the mount path on the caller filesystem, e.g., - `/app` or `/Users/.../src/cmamp1` - - callee_mount_path: the mount path inside the called Docker container, - e.g., `/app` - - the mount string, e.g., - `source={caller_mount_path},target={callee_mount_path}` - type=bind,source=/app,target=/app - """ - _LOG.debug(hprint.func_signature_to_str()) - # Compute the mount path on the caller filesystem. - if is_caller_host: - # On the host machine, the mount path is the Git root. - caller_mount_path = hgit.find_git_root() - else: - # Inside a Docker container, the mount path depends on the container - # style. - use_host_git_root = ( - use_sibling_container_for_callee - and not hserver.is_csfy_dind_enabled() - ) - if use_host_git_root: - # For sibling containers, we need to get the Git root on the host. - caller_mount_path = get_host_git_root() - else: - # For children containers, we need to get the local Git root on the - # host. - caller_mount_path = hgit.find_git_root() - # The target mount path is always `/app` inside the Docker container. - callee_mount_path = "/app" - # Build the Docker mount string. - mount = f"type=bind,source={caller_mount_path},target={callee_mount_path}" - _LOG.debug(hprint.to_str("caller_mount_path callee_mount_path mount")) - return caller_mount_path, callee_mount_path, mount - - -def get_docker_mount_context() -> Tuple[bool, bool, str, str, str]: - """ - Return Docker mount context for container operations. - - :return: (is_caller_host, use_sibling_container_for_callee, - caller_mount_path, callee_mount_path, mount) - """ - is_caller_host = not hserver.is_inside_docker() - use_sibling_container_for_callee = hserver.use_docker_sibling_containers() - caller_mount_path, callee_mount_path, mount = get_docker_mount_info( - is_caller_host, use_sibling_container_for_callee - ) - return ( - is_caller_host, - use_sibling_container_for_callee, - caller_mount_path, - callee_mount_path, - mount, - ) - - -def build_and_run_docker_cmd( - use_sudo: bool, - callee_mount_path: str, - mount: str, - container_image: str, - dockerfile: str, - tool_cmd: str, - mode: str, - *, - override_entrypoint: bool = False, - wrap_in_bash: bool = False, -) -> str: - """ - Build and execute a Docker command. - """ - docker_cmd = get_docker_base_cmd(use_sudo) - if override_entrypoint: - docker_cmd.append("--entrypoint ''") - # Check that the container image exists. - hdbg.dassert( - image_exists(container_image, use_sudo)[0], - "Container image '%s' does not exist", - container_image, - ) - docker_cmd.extend( - [ - f"--workdir {callee_mount_path} --mount {mount}", - container_image, - ] - ) - if wrap_in_bash: - docker_cmd.append(f'bash -c "{tool_cmd}"') - else: - docker_cmd.append(tool_cmd) - docker_cmd_str = " ".join(docker_cmd) - return process_docker_cmd(docker_cmd_str, container_image, dockerfile, mode) - - -# TODO(gp): Move to helpers.hdbg. -def _dassert_valid_path(file_path: str, is_input: bool) -> None: - """ - Assert that a file path is valid, based on it being input or output. - - For input files, it ensures that the file or directory exists. For - output files, it ensures that the enclosing directory exists. - - :param file_path: The file path to check. - :param is_input: Whether the file path is an input file. - """ - if is_input: - # If it's an input file, then `file_path` must exist as a file or a dir. - hdbg.dassert_path_exists(file_path) - else: - # If it's an output, we might be writing a file that doesn't exist yet, - # but we assume that the including directory is already present. - dir_name = os.path.normpath(os.path.dirname(file_path)) - hio.create_dir(dir_name, incremental=True) - hdbg.dassert( - os.path.exists(file_path) or os.path.exists(dir_name), - "Invalid path: '%s' and '%s' don't exist", - file_path, - dir_name, - ) - - -# TODO(gp): Move to helpers.hdbg. -def _dassert_is_path_included(file_path: str, including_path: str) -> None: - """ - Assert that a file path is included within another path. - - This function checks if the given file path starts with the - specified including path. If not, it raises an assertion error. - - :param file_path: The file path to check. - :param including_path: The path that should include the file path. - """ - # TODO(gp): Maybe we need to normalize the paths. - hdbg.dassert( - file_path.startswith(including_path), - "'%s' needs to be underneath '%s'", - file_path, - including_path, - ) - - -def convert_caller_to_callee_docker_path( - caller_file_path: str, - caller_mount_path: str, - callee_mount_path: str, - check_if_exists: bool, - is_input: bool, - is_caller_host: bool, - use_sibling_container_for_callee: bool, -) -> str: - """ - Convert a file path from the (current) caller filesystem to the called - Docker container path. - - :param caller_file_path: The file path on the caller filesystem. - :param caller_mount_path: The source mount path on the host machine. - :param callee_mount_path: The target mount path inside the Docker - container. - :param check_if_exists: Whether to check if the file path exists. - :param is_input: Whether the file path is an input file (used only if - `check_if_exists` is True). - :param is_caller_host: Whether the caller is running on the host - machine or inside a Docker container. - :param use_sibling_container_for_callee: Whether to use a sibling - container or a children container - :return: The converted file path inside the Docker container. - """ - _LOG.debug(hprint.func_signature_to_str()) - hdbg.dassert_ne(caller_file_path, "") - hdbg.dassert_ne(caller_mount_path, "") - hdbg.dassert_ne(callee_mount_path, "") - if check_if_exists: - _dassert_valid_path(caller_file_path, is_input) - # Make the path absolute with respect to the (current) caller filesystem. - abs_caller_file_path = os.path.abspath(caller_file_path) - if is_caller_host: - # On the host, the path needs to be underneath the caller mount point. - caller_mount_point = caller_mount_path - else: - # We are inside a Docker container, so the path needs to be under - # the local Git root, since this is the mount point. - caller_mount_point = hgit.find_git_root() - _ = use_sibling_container_for_callee - # This is not always possible, e.g., '/var/log/app.log' needs to be - # underneath '/app' - _dassert_is_path_included(abs_caller_file_path, caller_mount_point) - # Make the path relative to the caller mount point. - _LOG.debug(hprint.to_str("caller_file_path caller_mount_point")) - rel_path = os.path.relpath(caller_file_path, caller_mount_point) - docker_path = os.path.join(callee_mount_path, rel_path) - docker_path = os.path.normpath(docker_path) - # - _LOG.debug( - " Converted %s -> %s -> %s", caller_file_path, rel_path, docker_path - ) - return docker_path - - -def is_path(path: str) -> bool: - """ - Check if `path` can be considered a file or a directory using heuristics. - - - return: True if the string looks like a path, False otherwise. - """ - # E.g., - # ``` - # is_path("file.txt") # True, since it has an extension - # is_path("/path/to/file.py") # True, since it has an absolute path - # is_path("/path/to") # True, since it has an absolute path - # is_path("../data.csv") # True, since it has an relative path - # is_path("folder/") # True, since it has a trailing slash - # is_path(".hidden") # True, since it has a leading dot - # is_path("readme") # False, since it has no extension and no path - # ``` - # Check if it has a file extension (e.g., .txt, .csv). - if os.path.splitext(path)[1]: - return True - # Check if it is an absolute or relative path (e.g., starts with "/" or "./" - # or "../") - if path.startswith("/") or path.startswith("./") or path.startswith("../"): - return True - # Check if it ends with a slash. - if path.endswith("/"): - return True - # Check if it has a hidden file. - basename = os.path.basename(path) - if basename.startswith(".") and basename.count(".") == 1: - return True - # Check if it contains a slash. - if "/" in path: - return True - return False - - -def convert_all_paths_from_caller_to_callee_docker_path( - cmd_opts: List[str], - caller_mount_path: str, - callee_mount_path: str, - is_caller_host: bool, - use_sibling_container_for_callee: bool, -) -> List[str]: - """ - Convert all the paths from the caller to the callee Docker container path. - - The paths are recognized by checking whether they point to an existing file - or directory. - - The limitation of this approach is that output files are not recognized. To - work around this problem: - - Create output dirs - - Explicitly parse options that are outputs (e.g., `-o `) - - :param cmd_opts: List of command options. - :param caller_mount_path: See `get_docker_mount_info()`. - :param callee_mount_path: See `get_docker_mount_info()`. - :param is_caller_host: See `get_docker_mount_info()`. - :param use_sibling_container_for_callee: See `get_docker_mount_info()`. - :return: List of converted command options. - """ - _LOG.debug(hprint.func_signature_to_str()) - # Converted command options. - cmd_opts_out = [] - # Scan the list of command option. - for cmd_opt_in in cmd_opts: - exists = os.path.exists(cmd_opt_in) - is_path_ = is_path(cmd_opt_in) - _LOG.debug(hprint.to_str("cmd_opt_in exists is_path_")) - if exists or is_path_: - check_if_exists = False - is_input = False - cmd_opt_out = convert_caller_to_callee_docker_path( - cmd_opt_in, - caller_mount_path, - callee_mount_path, - check_if_exists, - is_input, - is_caller_host, - use_sibling_container_for_callee, - ) - _LOG.debug(hprint.to_str("cmd_opt_in -> cmd_opt_out")) - cmd_opts_out.append(cmd_opt_out) - else: - _LOG.debug("File does not exist: %s", cmd_opt_in) - cmd_opts_out.append(cmd_opt_in) - _LOG.debug(hprint.to_str("cmd_opts_out")) - return cmd_opts_out - - -# ############################################################################# -# CLI utilities -# ############################################################################# - - -def add_open_arg(parser: argparse.ArgumentParser) -> None: - """ - Add --open option to parser for opening output files on macOS. - - :param parser: ArgumentParser instance to add the option to - """ - parser.add_argument( - "--open", - action="store_true", - default=False, - help="Open the output file on macOS", - ) - - -def open_file_on_macos(file_path: str) -> None: - """ - Open a file on macOS using the 'open' command. - - :param file_path: Path to the file to open - :raises subprocess.CalledProcessError: If open command fails - """ - if platform.system() != "Darwin": - _LOG.warning("--open flag only works on macOS") - return - subprocess.run(["open", file_path], check=True) - _LOG.info("Opened file with macOS 'open' command: %s", file_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py deleted file mode 100644 index 0ab2f2f2f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py +++ /dev/null @@ -1,197 +0,0 @@ -""" -Utilities for running docker tests. - -Import as: - -import helpers.hdocker_tests as hdoctest -""" - -import glob -import logging -import os -from typing import List - -import pytest - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# ############################################################################# -# Constants -# ############################################################################# - - -# Pattern for docker test files. -DOCKER_TEST_PATTERN = "docker_test_*.py" - - -# ############################################################################# -# Helper functions -# ############################################################################# - - -def get_docker_test_files(test_dir: str) -> List[str]: - """ - Find all docker test files in the specified directory. - - :param test_dir: directory to search for test files - :return: sorted list of test file paths - """ - pattern = os.path.join(test_dir, DOCKER_TEST_PATTERN) - files = sorted(glob.glob(pattern)) - _LOG.info("Found %d docker test files", len(files)) - for file in files: - _LOG.debug(" - %s", file) - return files - - -def _run_docker_pytest_cmd( - test_file: str, *, docker_cmd_script: str = "./docker_cmd.sh" -) -> int: - """ - Run a test file through docker_cmd.sh with pytest. - - :param test_file: path to the test file - :param docker_cmd_script: path to docker_cmd.sh script - :return: return code from the command - """ - hdbg.dassert_file_exists(test_file) - hdbg.dassert_file_exists(docker_cmd_script) - cmd = f'{docker_cmd_script} "pytest {test_file}"' - _LOG.info("Running: %s", cmd) - rc = hsystem.system(cmd, abort_on_error=False) - return rc - - -def run_docker_cmd(script_dir: str, *, shell_cmd: str = "ls /git_root") -> None: - """ - Run an arbitrary shell command inside Docker via docker_cmd.sh. - - :param script_dir: directory containing docker_cmd.sh - :param shell_cmd: shell command to run inside the container - """ - hdbg.dassert_path_exists(script_dir) - docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") - hdbg.dassert_file_exists(docker_cmd_script) - cmd = f"cd {script_dir} && bash {docker_cmd_script} '{shell_cmd}'" - hsystem.system(cmd) - - -def run_all_tests( - test_dir: str, *, docker_cmd_script: str = "./docker_cmd.sh" -) -> int: - """ - Find and run all docker test files in the directory. - - :param test_dir: directory containing test files - :param docker_cmd_script: path to docker_cmd.sh script - :return: 0 if all tests passed, non-zero otherwise - """ - test_files = get_docker_test_files(test_dir) - if not test_files: - _LOG.warning("No docker test files found in %s", test_dir) - return 0 - failed_tests = [] - for test_file in test_files: - return_code = _run_docker_pytest_cmd( - test_file, docker_cmd_script=docker_cmd_script - ) - if return_code != 0: - failed_tests.append(test_file) - if failed_tests: - _LOG.error("Failed tests: %s", failed_tests) - return 1 - _LOG.info("All tests passed") - return 0 - - -# ############################################################################# -# DockerTestCase -# ############################################################################# - - -# TODO(gp): Can this be used for run_dockerized_* tests? -class DockerTestCase(hunitest.TestCase): - """ - Base test class for Docker tests. - - Subclasses must set `_test_file = __file__` and may add notebook test - methods that call `self._helper(notebook_name)`. - """ - - _test_file: str = "" - - @pytest.mark.slow - def test_docker_build(self) -> None: - """ - Test that docker_build.sh runs without error. - """ - # Prepare inputs. - script_dir = os.path.dirname( - os.path.dirname(os.path.abspath(self._test_file)) - ) - docker_build_script = os.path.join(script_dir, "docker_build.sh") - hdbg.dassert_file_exists(docker_build_script) - # Run test. - cmd = f"cd {script_dir} && bash {docker_build_script}" - hsystem.system(cmd) - - @pytest.mark.slow - def test_docker_cmd(self) -> None: - """ - Test that docker_cmd.sh 'ls /git_root' runs without error. - """ - # Prepare inputs. - script_dir = os.path.dirname( - os.path.dirname(os.path.abspath(self._test_file)) - ) - docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") - hdbg.dassert_file_exists(docker_cmd_script) - # Run test. - cmd = f"cd {script_dir} && bash {docker_cmd_script} 'ls /git_root'" - hsystem.system(cmd) - - def test_docker_bash(self) -> None: - """ - Test that docker_bash.sh runs 'ls /git_root' and exits without error. - """ - # Prepare inputs. - script_dir = os.path.dirname( - os.path.dirname(os.path.abspath(self._test_file)) - ) - docker_bash_script = os.path.join(script_dir, "docker_bash.sh") - if not os.path.exists(docker_bash_script): - pytest.skip("docker_bash.sh not found in " + script_dir) - # Run test. - shell_cmd = "ls /git_root" - cmd = f"echo '{shell_cmd}' | bash {docker_bash_script}" - hsystem.system(cmd) - - def _run_notebook(self, notebook_name: str) -> None: - """ - Run a single notebook inside Docker. - - :param notebook_name: notebook filename relative to the project dir - """ - # Prepare inputs. - script_dir = os.path.dirname( - os.path.dirname(os.path.abspath(self._test_file)) - ) - docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") - notebook_path = os.path.join(script_dir, notebook_name) - hdbg.dassert_file_exists(notebook_path) - # Compute the notebook path inside the container via /git_root. - git_root = hgit.find_git_root(script_dir) - rel_path = os.path.relpath(script_dir, git_root) - container_notebook_path = f"/git_root/{rel_path}/{notebook_name}" - cmd = ( - f"cd {script_dir} && " - f"bash {docker_cmd_script} " - f"'jupyter nbconvert --execute --to html " - f"--ExecutePreprocessor.timeout=-1 {container_notebook_path}'" - ) - hsystem.system(cmd) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py deleted file mode 100644 index f52fc9230..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py +++ /dev/null @@ -1,47 +0,0 @@ -""" -Import as: - -import helpers.hemail as hemail -""" - -import email.mime.multipart as emmult -import email.mime.text as emtext -import os -import smtplib -from typing import Optional - - -def send_email( - subject: str, - message: str, - to_adr: str, - email_address: Optional[str] = None, - email_password: Optional[str] = None, - html: bool = False, -) -> None: - """ - Send mail to specified e-mail addresses. - - :param message: Message to be sent - :param to_adr: Mail to which to send messages - :type list - :return: None - """ - server = smtplib.SMTP("smtp.gmail.com", 587) - server.starttls() - if email_address is None: - email_address = os.environ["AM_EMAIL_ADDRESS"] - if email_password is None: - email_password = os.environ["AM_EMAIL_PASSWORD"] - server.login(email_address, email_password) - msg = emmult.MIMEMultipart() - msg["From"] = email_address - msg["To"] = ", ".join(to_adr) - msg["Subject"] = subject - if html: - msg.attach(emtext.MIMEText(message, "html")) - else: - msg.attach(emtext.MIMEText(message, "plain")) - - text = msg.as_string() - server.sendmail(email_address, to_adr, text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py deleted file mode 100644 index f2e0719bd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py +++ /dev/null @@ -1,541 +0,0 @@ -""" -Import as: - -import helpers.henv as henv -""" - -import logging -import os -from typing import Any, List, Tuple, Union - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hversion as hversio -import helpers.repo_config_utils as hrecouti - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - - -_WARNING = "\033[33mWARNING\033[0m" - - -# All printing functions should: -# - Return a string and not a list of strings -# - Add a newline at the end of the string (i.e., the string should end with -# `\n`) - - -# ############################################################################# -# Get env vars info. -# ############################################################################# - - -def get_env_var( - env_name: str, - *, - as_bool: bool = False, - default_value: Any = None, - abort_on_missing: bool = True, -) -> Union[str, bool, Any]: - """ - Get an environment variable by name. - - :param env_name: name of the env var - :param as_bool: convert the value into a Boolean - :param default_value: the default value to use in case it's not - defined - :param abort_on_missing: if the env var is not defined aborts, - otherwise use the default value - :return: value of env var - """ - if env_name not in os.environ: - if abort_on_missing: - hdbg.dassert_in( - env_name, - os.environ, - "Can't find env var '%s' in '%s'", - env_name, - str(os.environ), - ) - else: - return default_value - value = os.environ[env_name] - if as_bool: - # Convert the value into a boolean. - if value in ("0", "", "None", "False"): - value = False - else: - value = True - return value - - -def get_csfy_env_vars() -> List[str]: - """ - Get all the environment variables that start with `AM_`, `CK_`, `CSFY_`. - """ - # TODO(gp): We should only pass the `CSFY_` vars. - env_var_names = [ - v - for v in os.environ.keys() - if v.startswith("AM_") or v.startswith("CK_") or v.startswith("CSFY_") - ] - return env_var_names - - -# TODO(gp): Extract all the env vars that start with AM_, CK_, CSFY_ and make -# sure they have a description here. -def get_env_vars() -> List[str]: - """ - Return all the env vars that are expected to be set in Docker. - """ - # Keep in sync with `lib_tasks.py:_generate_compose_file()`. - env_var_names = [ - # Force enabling Docker-in-Docker. - "CSFY_ENABLE_DIND", - # Enable forcing certain unit tests to fail to check that unit test - # failures are caught. - "CSFY_FORCE_TEST_FAIL", - # The name of the host running Docker. - "CSFY_HOST_NAME", - # The OS of the host running Docker. - "CSFY_HOST_OS_NAME", - # The version of the host running Docker. - "CSFY_HOST_OS_VERSION", - # The name of the user running the host. - "CSFY_HOST_USER_NAME", - # Whether to check if certain property of the repo are as expected or not. - "CSFY_REPO_CONFIG_CHECK", - # Path to use for `repo_config.py`. E.g., used when running `helpers` - # container to avoid using the `repo_config.py` corresponding to the - # container launching the linter. - "CSFY_REPO_CONFIG_PATH", - "GH_ACTION_ACCESS_TOKEN", - # Whether we are running inside GH Actions. - "CSFY_CI", - # TODO(gp): Difference between amp and cmamp. - # CK AWS credentials. - "CSFY_AWS_ACCESS_KEY_ID", - "CSFY_AWS_DEFAULT_REGION", - "CSFY_AWS_SECRET_ACCESS_KEY", - "CSFY_AWS_SESSION_TOKEN", - # S3 bucket to use for CK. - "CSFY_AWS_S3_BUCKET", - # Path to the ECR for the Docker images for CK. - "CSFY_ECR_BASE_PATH", - ] - # No duplicates. - # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. - hdbg.dassert_eq( - len(set(env_var_names)), - len(env_var_names), - "There are duplicates", - str(env_var_names), - ) - # Sort. - env_var_names = sorted(env_var_names) - return env_var_names - - -def get_secret_env_vars() -> List[str]: - """ - Return the list of env vars that are secrets. - """ - secret_env_var_names = [ - # TODO(gp): Difference between amp and cmamp. - "CSFY_AWS_ACCESS_KEY_ID", - "CSFY_AWS_SECRET_ACCESS_KEY", - "GH_ACTION_ACCESS_TOKEN", - ] - # No duplicates. - # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. - hdbg.dassert_eq( - len(set(secret_env_var_names)), - len(secret_env_var_names), - "There are duplicates", - str(secret_env_var_names), - ) - # Secret env vars are a subset of the env vars. - env_vars = get_env_vars() - # TODO(gp): GFI. Use `hdbg.dassert_issubset()` instead. - if not set(secret_env_var_names).issubset(set(env_vars)): - diff = set(secret_env_var_names).difference(set(env_vars)) - cmd = f"Secret vars in `{str(diff)} are not in '{str(env_vars)}'" - assert 0, cmd - # Sort. - secret_env_var_names = sorted(secret_env_var_names) - return secret_env_var_names - - -def get_api_key_env_vars() -> List[str]: - """ - Return the list of env vars that are API keys. - """ - # Find all the env vars that end with "_API_KEY". - env_var_names = [ - env_var for env_var in os.environ.keys() if env_var.endswith("_API_KEY") - ] - return env_var_names - - -def check_env_vars() -> None: - """ - Make sure all the expected env vars are defined. - """ - env_vars = get_env_vars() - for env_var in env_vars: - hdbg.dassert_in( - env_var, - os.environ, - "env_var='%s' is not in env_vars='%s'", - env_var, - str(os.environ.keys()), - ) - - -def env_vars_to_string() -> str: - """ - Return a string with the signature of all the expected env vars (including - the secret ones). - """ - txt: List[str] = [] - # Get the expected env vars and the secret ones. - env_vars = get_env_vars() - secret_env_vars = get_secret_env_vars() - # Print a signature. - for env_name in env_vars: - is_defined = env_name in os.environ - is_empty = is_defined and os.environ[env_name] == "" - if not is_defined: - txt.append(f"{env_name}=undef") - else: - if env_name in secret_env_vars: - # Secret env var: print if it's empty or not. - if is_empty: - txt.append(f"{env_name}=empty") - else: - txt.append(f"{env_name}=***") - else: - # Not a secret var: print the value. - txt.append(f"{env_name}='{os.environ[env_name]}'") - result = "\n".join(txt) - return result - - -# ############################################################################# -# Get Git info. -# ############################################################################# - - -# Copied from helpers.hgit to avoid circular dependencies. - - -def _git_log(num_commits: int = 5, my_commits: bool = False) -> str: - """ - Return the output of a pimped version of git log. - - :param num_commits: number of commits to report - :param my_commits: True to report only the current user commits - :return: string - """ - cmd = [] - cmd.append("git log --date=local --oneline --graph --date-order --decorate") - cmd.append( - "--pretty=format:'%h %<(8)%aN% %<(65)%s (%>(14)%ar) %ad %<(10)%d'" - ) - cmd.append(f"-{num_commits}") - if my_commits: - # This doesn't work in a container if the user relies on `~/.gitconfig` to - # set the user name. - # TODO(gp): We should use `get_git_name()`. - cmd.append("--author $(git config user.name)") - cmd = " ".join(cmd) - data: Tuple[int, str] = hsystem.system_to_string(cmd) - _, txt = data - return txt - - -# End copy. - - -def _get_git_signature(git_commit_type: str = "all") -> str: - """ - Get information about current branch and latest commits. - """ - txt: List[str] = [] - # Get the branch name. - cmd = "git branch --show-current" - _, branch_name = hsystem.system_to_one_line(cmd) - txt.append(f"branch_name='{branch_name}'") - # Get the short Git hash of the current branch. - cmd = "git rev-parse --short HEAD" - _, hash_ = hsystem.system_to_one_line(cmd) - txt.append(f"hash='{hash_}'") - # Add info about the latest commits. - num_commits = 3 - if git_commit_type == "all": - txt.append("# Last commits:") - log_txt = _git_log(num_commits=num_commits, my_commits=False) - txt.append(hprint.indent(log_txt)) - elif git_commit_type == "mine": - txt.append("# Your last commits:") - log_txt = _git_log(num_commits=num_commits, my_commits=True) - txt.append(hprint.indent(log_txt)) - elif git_commit_type == "none": - pass - else: - raise ValueError(f"Invalid value='{git_commit_type}'") - # - result = "\n".join(txt) + "\n" - hdbg.dassert(result.endswith("\n"), "result='%s'", result) - return result - - -# def _get_submodule_signature( -# partial_signature: List[str], *, git_commit_type: str = "all" -# ) -> str: -# """ -# Add git signature for all submodules. -# :param partial_signature: the signature to append to -# `git_commit_type` the type of git commit to include in the -# signature -# :return: system signature enhanced by git submodule info -# """ -# # TODO(Juraj): Think of a better generalisation rather listing all the options. -# submodule_options = ["amp", "amp/helpers_root", "helpers_root"] -# signature = partial_signature -# prev_cwd = os.getcwd() -# for submodule in submodule_options: -# if os.path.exists(submodule): -# try: -# # Temporarily descend into submodule. -# os.chdir(submodule) -# signature.append(f"# Git {submodule}") -# git_amp_sig = _get_git_signature(git_commit_type) -# signature = _append(signature, git_amp_sig) -# # In case there is a runtime error we want to end up in a consistent -# # state (the original path). -# finally: -# os.chdir(prev_cwd) -# hdbg.dassert(txt_tmp.endswith("\n"), f"txt_tmp='%s'", txt_tmp) -# return signature - - -# ############################################################################# -# Get system info. -# ############################################################################# - - -def _get_platform_info() -> str: - """ - Get platform information as a list of strings. - """ - import platform - - txt_tmp: List[str] = [] - uname = platform.uname() - txt_tmp.append(f"system={uname.system}") - txt_tmp.append(f"node name={uname.node}") - txt_tmp.append(f"release={uname.release}") - txt_tmp.append(f"version={uname.version}") - txt_tmp.append(f"machine={uname.machine}") - txt_tmp.append(f"processor={uname.processor}") - # - txt = hprint.to_info("Platform info", txt_tmp) - return txt - - -def _get_psutil_info() -> str: - """ - Get system resource information using psutil. - """ - try: - import psutil - - has_psutil = True - except ModuleNotFoundError as e: - _LOG.warning("psutil is not installed: %s", str(e)) - has_psutil = False - txt_tmp = [] - if has_psutil: - txt_tmp.append(f"cpu count={psutil.cpu_count()}") - if hasattr(psutil, "cpu_freq") and psutil.cpu_freq is not None: - txt_tmp.append(f"cpu freq={str(psutil.cpu_freq())}") - else: - txt_tmp.append("cpu freq=unavailable") - # TODO(gp): Report in MB or GB. - txt_tmp.append(f"memory={str(psutil.virtual_memory())}") - txt_tmp.append(f"disk usage={str(psutil.disk_usage('/'))}") - else: - txt_tmp.append("psutil is not installed") - # - txt = hprint.to_info("psutils info", txt_tmp) - return txt - - -# ############################################################################# -# Get package info. -# ############################################################################# - - -def _get_library_version(lib_name: str) -> str: - try: - cmd = f"import {lib_name}" - # pylint: disable=exec-used - exec(cmd) - except ImportError: - version = "?" - else: - cmd = f"{lib_name}.__version__" - version = eval(cmd) - return version - - -def _get_package_info() -> Tuple[str, int]: - """ - Get package version information. - - Returns: - Tuple containing: - - List of strings with package info - - Number of failed imports - """ - import platform - - txt_tmp = [] - packages = [] - packages.append(("python", platform.python_version())) - # import sys - # print(sys.version) - libs = [ - "cvxopt", - "cvxpy", - "gluonnlp", - "gluonts", - "joblib", - "mxnet", - "numpy", - "pandas", - "pyarrow", - "scipy", - "seaborn", - "sklearn", - "statsmodels", - ] - libs = sorted(libs) - failed_imports = 0 - for lib in libs: - # This is due to Cmamp4924: - # WARNING: libarmpl_lp64_mp.so: cannot open shared object file: No such - # file or directory - try: - version = _get_library_version(lib) - except OSError as e: - print(_WARNING + ": " + str(e)) - if version.startswith("ERROR"): - failed_imports += 1 - packages.append((lib, version)) - txt_tmp.extend([f"{lib}: {version}" for (lib, version) in packages]) - # - txt = hprint.to_info("Packages", txt_tmp) - return txt, failed_imports - - -# ############################################################################# - - -def _get_git_info(git_commit_type: str) -> str: - txt_tmp: List[str] = [] - try: - txt_tmp.append(_get_git_signature(git_commit_type)) - # If there are any submodules, fetch their git signature. - # txt_tmp.append(_get_submodule_signature(txt_tmp, git_commit_type)) - except RuntimeError as e: - _LOG.warning(str(e)) - txt_tmp.append("No git info") - # - txt = hprint.to_info("Git info", txt_tmp) - return txt - - -# ############################################################################# -# Get system signature. -# ############################################################################# - - -def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: - """ - Return a string with the system signature. - - :param git_commit_type: the type of git commit to include in the - signature - :return: the system signature and the number of failed imports - """ - txt: List[str] = [] - # Add container version. - txt_tmp = hversio.get_container_version_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add Git signature. - txt_tmp = _get_git_info(git_commit_type) - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add platform info. - txt_tmp = _get_platform_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add psutil info. - txt_tmp = _get_psutil_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add Docker info. - txt_tmp = hserver.get_docker_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add package info. - txt_tmp, failed_imports = _get_package_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # - txt_str: str = hprint.to_info("System signature", txt) - return txt_str, failed_imports - - -# ############################################################################# -# Package all the information into a string. -# ############################################################################# - - -def env_to_str( - repo_config: bool = True, - server_config: bool = True, - system_signature: bool = True, - env_vars: bool = True, -) -> str: - """ - Package all the information into a string. - """ - # - msg = "" - # - if repo_config: - repo_config_str = hrecouti.get_repo_config().config_func_to_str() - msg += hprint.to_info("Repo config", repo_config_str) + "\n" - # - if server_config: - server_config_str = hserver.config_func_to_str() - msg += hprint.to_info("Server config", server_config_str) + "\n" - # - if system_signature: - msg += get_system_signature()[0] + "\n" - # - if env_vars: - env_vars_str = env_vars_to_string() - msg += hprint.to_info("Env vars", env_vars_str) + "\n" - return msg diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py deleted file mode 100644 index d758ff16b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py +++ /dev/null @@ -1,232 +0,0 @@ -""" -Import as: - -import helpers.hfile_tree as hfiltree -""" - -import logging -import os -import pathlib -import re -from typing import Dict, List - -_LOG = logging.getLogger(__name__) - - -def _build_tree_lines( - dir_name: str, - nodes: List[pathlib.Path], - comments: Dict[str, str], -) -> str: - """ - Build the text lines for the directory tree while preserving inline - comments. - - :param dir_name: the directory name - :param nodes: relative paths under the given directory - :param comments: inline comments from existing file - :return: a formatted tree - - Example output: - ``` - devops - - __init__.py - - compose - - __init__.py - - tmp.docker-compose.yml - - docker_build - - create_users.sh - - dev.Dockerfile - - dockerignore.dev - - dockerignore.prod - - etc_sudoers - - fstab - - install_cprofile.sh - - install_dind.sh - - install_os_packages.sh - - install_publishing_tools.sh - - install_python_packages.sh - - pip_list.txt - - poetry.lock - - poetry.toml - - prod.Dockerfile - - pyproject.python_data_stack.toml - - pyproject.toml - - update_os.sh - - utils.sh - - docker_run - - bashrc - - docker_setenv.sh - - entrypoint.sh - - run_jupyter_server.sh - - env - - default.env - ``` - """ - lines = [dir_name] - for rel in nodes: - indent = " " * (len(rel.parts) - 1) - key = "/".join(rel.parts) - suffix = comments.get(key, "") - lines.append(f"{indent}- {rel.name}{suffix}".rstrip()) - return "\n".join(lines) - - -def _parse_comments(old_tree: List[str]) -> Dict[str, str]: - """ - Parse existing tree lines to extract inline comments. - - :param old_tree: the existing tree block - :return: inline comments and indentations - """ - comments: Dict[str, str] = {} - stack: List[str] = [] - for line in old_tree: - # Find indents, bullet points, name, and inline comments. - match = re.match(r"^(\s*)-\s+([^\s#]+)(\s*#.*)?$", line) - if not match: - continue - indent, name, suffix = match.groups() - level = len(indent) // 2 - stack = stack[:level] - stack.append(name) - key = "/".join(stack) - comments[key] = suffix or "" - return comments - - -def _get_tree_nodes( - dir_path: pathlib.Path, - depth: int, - include_tests: bool, - include_python: bool, - only_dirs: bool, -) -> List[pathlib.Path]: - """ - Get relative paths under the given directory based on filters. - - Filters include: - - Test files and directories - - Python files - - :param dir_path: the directory path - :param depth: maximum depth to traverse - :param include_tests: include test files or directories - :param include_python: only show python files - :param only_dirs: only show directories - :return: all relative paths that match the specified flags - """ - nodes: List[pathlib.Path] = [] - for dirpath, dirnames, filenames in os.walk(dir_path): - rel_dir = pathlib.Path(dirpath).relative_to(dir_path) - level = len(rel_dir.parts) - if 0 < depth <= level: - # Stop pruning on given depth. - dirnames[:] = [] - continue - if not include_tests: - # Prune out test directories. - filtered = [] - for d in dirnames: - dir_lower = d.lower() - if not ( - dir_lower.startswith("test_") - or dir_lower in {"test", "tests"} - ): - filtered.append(d) - dirnames[:] = filtered - candidates = dirnames + filenames - for name in candidates: - full_path = pathlib.Path(dirpath) / name - rel_path = full_path.relative_to(dir_path) - name_lower = name.lower() - is_dir = full_path.is_dir() - is_test_name = name_lower.startswith("test_") or name_lower in { - "test", - "tests", - } - is_test = is_test_name or name_lower.endswith("_test.py") - is_python = full_path.suffix in {".py", ".ipynb"} - if is_dir: - # Always include directories. - nodes.append(rel_path) - continue - # Flag filter to include test or python files. - allowed_by_flag = (include_tests and is_test) or ( - include_python and is_python - ) - if only_dirs: - include_file = allowed_by_flag - else: - include_file = allowed_by_flag or ( - not is_test - and not is_python - and not include_tests - and not include_python - ) - if include_file: - nodes.append(rel_path) - nodes.sort() - return nodes - - -def generate_tree( - path: str, - depth: int, - include_tests: bool, - include_python: bool, - only_dirs: bool, - output: str, -) -> str: - """ - Generate a directory tree, and optionally update or create a markdown file. - - :param path: directory path to traverse - :param depth: maximum depth to traverse - :param include_tests: include test files or directories - :param include_python: include show python files - :param only_dirs: only show directories - :param output: path of the markdown file to create or update - """ - dir_path = pathlib.Path(path).resolve() - nodes = _get_tree_nodes( - dir_path, depth, include_tests, include_python, only_dirs - ) - _LOG.debug("Collected %d nodes under '%s'", len(nodes), dir_path) - if output: - output_path = pathlib.Path(output) - start_marker = f"" - end_marker = "" - prefix = [] - suffix = [] - comments = {} - if output_path.exists(): - # Parse inline comments. - file = output_path.read_text(encoding="utf-8") - lines = file.splitlines() - _LOG.debug("Reading existing file '%s' for markers", output_path) - try: - idx_start = lines.index(start_marker) - idx_end = lines.index(end_marker) - _LOG.debug("Markers found at lines %d–%d", idx_start, idx_end) - except ValueError as exc: - raise RuntimeError( - "Couldn't find tree markers in output file." - ) from exc - # Parse existing file. - prefix = lines[:idx_start] - old_tree = lines[idx_start + 1 : idx_end] - suffix = lines[idx_end + 1 :] - comments = _parse_comments(old_tree) - # Build the directory tree. - tree_block = _build_tree_lines(dir_path.name, nodes, comments) - # Build the content of the file. - content = ( - "\n".join(prefix + [start_marker, tree_block, end_marker] + suffix) - + "\n" - ) - output_path.write_text(content, encoding="utf-8") - _LOG.debug("Writing updated tree to '%s'", output_path) - # Return tree without markers. - tree_block = _build_tree_lines(dir_path.name, nodes, {}) - return tree_block diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py deleted file mode 100644 index 14e2f600e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py +++ /dev/null @@ -1,1865 +0,0 @@ -""" -Import as: - -import helpers.hgit as hgit -""" - -import collections -import functools -import logging -import os -import random -import re -import string -from typing import cast, List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.repo_config_utils as hrecouti - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - -# We refer to "Git" when we talk about the control system (e.g., "in a Git -# repository") and `git` when we refer to implementation of Git as a program -# installed in a computer. - -# TODO(gp): Check -# https://git-scm.com/book/en/v2/Appendix-B%3A-Embedding-Git-in-your-Applications-Dulwich - -# TODO(gp): Avoid "stuttering": the module is already called "git", so no need -# to make reference to git again. - -# TODO(gp): Add mem caching to some functions below. We assume that one doesn't -# change dir (which is a horrible idea) and thus we can memoize. - -# TODO(gp): Spell super_module and sub_module always in the same way in both -# comments and code. For simplicity (e.g., instead of `super_module` in code and -# `super-module` in comment) we might want to spell `supermodule` everywhere. - -# ############################################################################# -# Git branch functions -# ############################################################################# - - -def extract_gh_issue_number_from_branch(branch_name: str) -> Optional[int]: - """ - Extract the GitHub issue number from a branch name. - - Example: - CmampTask10725_Add_more_tabs_to_orange_tmux -> 10725 - HelpersTask23_Add_more_tabs_to_orange_tmux -> 23. - - Works only if `invoke gh_branch_create` was used to create the branch. - or the name was retrieved using `invoke gh_issue_title`. - - :param branch_name: the name of the branch - :return: the issue number or None if it can't be extracted - """ - match = re.match(r".*Task_?(\d+)(?:_\w+)?", branch_name) - if match: - # Return the captured number. - return int(match.group(1)) - return None - - -def get_branch_name(dir_name: str = ".") -> str: - """ - Return the name of the Git branch in a directory. - - E.g., `master` or `AmpTask672_Add_script_to_check_and_merge_PR` - - :param dir_name: directory containing the git repository - :return: the name of the current branch - """ - hdbg.dassert_path_exists(dir_name) - # > git rev-parse --abbrev-ref HEAD - # master - cmd = f"cd {dir_name} && git rev-parse --abbrev-ref HEAD" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - return output - - -def _get_branch_next_name_via_github_api( - curr_branch_name: str, - *, - max_num_ids: int = 100, -) -> Optional[str]: - """ - Find the next available branch name using GitHub API (fast method). - - Uses `gh pr list` to query merged branches and extract the highest number. - - :param curr_branch_name: current branch name (e.g., "gp_scratch") - :param max_num_ids: maximum number of IDs to check - :return: next available branch name or None if GitHub API is not available - """ - try: - # Query merged PRs and extract branch names matching pattern. - cmd = ( - "gh pr list --state merged --json headRefName " - "| jq -r '.[].headRefName | select(test(\"^{branch}_[0-9]+$\"))' " - "| sed 's/.*_//' | sort -rn | head -1" - ).format(branch=re.escape(curr_branch_name)) - _LOG.debug("Running GitHub API query: %s", cmd) - ret, output = hsystem.system_to_one_line(cmd, suppress_output=True) - if ret != 0: - _LOG.debug("GitHub API query failed, falling back to linear scan") - return None - # Extract the highest number from merged branches. - output = output.strip() - if output: - highest_num = int(output) - next_num = highest_num + 1 - new_branch_name = f"{curr_branch_name}_{next_num}" - _LOG.info( - "Found highest number '%s' in merged branches, next is '%s'", - highest_num, - next_num, - ) - return new_branch_name - # No existing numbered branches found. - _LOG.debug("No existing numbered branches found, starting at 1") - return f"{curr_branch_name}_1" - except Exception as e: - _LOG.debug( - "Error querying GitHub API: %s, falling back to linear scan", - e, - ) - return None - - -@functools.lru_cache() -def _get_gh_pr_list() -> str: - """ - Get a cached list of all pull requests from GitHub (merged and open). - - Results are cached via functools.lru_cache to avoid repeated GitHub API calls. - - :return: raw output from `gh pr list` command - """ - cmd = "gh pr list -s all --limit 1000" - rc, txt = hsystem.system_to_string(cmd) - _ = rc - return txt - - -def does_branch_exist( - branch_name: str, - mode: str, - *, - dir_name: str = ".", -) -> bool: - """ - Check if a branch with the given name exists in local git or on GitHub. - - Supports checking in local git repository or on GitHub via the `gh` CLI. - - :param branch_name: the name of the branch to check - :param mode: where to check ("all" checks all, "git_local", "git_remote", "github") - :param dir_name: directory containing the git repository - :return: True if the branch exists in the specified location - """ - _LOG.debug(hprint.to_str("branch_name mode dir_name")) - # Handle the "all" case by recursion on all the possible modes. - if mode == "all": - exists = False - for mode_tmp in ("git_local", "git_remote", "github"): - exists_tmp = does_branch_exist( - branch_name, mode_tmp, dir_name=dir_name - ) - exists = exists or exists_tmp - return exists - # - hdbg.dassert_in(mode, ("git_local", "git_remote", "github")) - exists = False - if mode in ("git_local", "git_remote"): - # From https://stackoverflow.com/questions/35941566 - cmd = f"cd {dir_name} && git fetch --prune" - hsystem.system(cmd, abort_on_error=False) - # From https://stackoverflow.com/questions/5167957 - # > git rev-parse --verify LimeTask197_Get_familiar_with_CF2 - # f03bfa0b4577c2524afd6a1f24d06013f8aa9f1a - # > git rev-parse --verify I_dont_exist - # fatal: Needed a single revision - git_branch_name = branch_name - if mode == "git_remote": - git_branch_name = f"origin/{git_branch_name}" - cmd = f"cd {dir_name} && git rev-parse --verify {git_branch_name}" - rc = hsystem.system(cmd, abort_on_error=False) - exists = rc == 0 - _LOG.debug("branch_name='%s' on git: exists=%s", branch_name, exists) - # Check on GitHub. - if mode == "github": - txt = _get_gh_pr_list() - # ``` - # > gh pr list -s all --limit 10000 | grep AmpTask2163 - # 347 AmpTask2163_Implement_tiled_backtesting_1 AmpTask2163 ... MERGED - # ``` - # The text is separated by tabs. - # - # If there are no issues on the GitHub repo, just return. - # ``` - # > gh pr list -s all --limit 1000 - # no pull requests match your search in causify-ai/sports_analytics - # ``` - if txt == "": - return False - for line in txt.split("\n"): - # number, GH branch name, Git branch name, status. - fields = line.split("\t") - # fields=['179', - # 'CmTask2914: Add end-to-end unit test for prod reconcile', - # 'CmTask2914_Add_end_to_end_unit_test_around_the_prod_reconciliation', - # 'DRAFT', '2022-09-27 19:56:50 +0000 UTC'] - hdbg.dassert_lte(4, len(fields), "fields=%s", fields) - number, gh_branch_name, git_branch_name = fields[:3] - _ = number, gh_branch_name - if branch_name == git_branch_name: - exists = True - _LOG.debug( - "branch_name='%s' on github: exists=%s", branch_name, exists - ) - return exists - - -def _get_branch_next_name_linear_scan( - dir_name: str, - curr_branch_name: str, - *, - max_num_ids: int = 100, - log_verb: int = logging.DEBUG, -) -> str: - """ - Find the next available branch name using linear scanning (fallback method). - - Tries branch names sequentially until finding one that doesn't exist. - - :param dir_name: directory containing the git repository - :param curr_branch_name: current branch name (e.g., "gp_scratch") - :param max_num_ids: maximum number of IDs to check - :param log_verb: logging verbosity level - :return: next available branch name - """ - for i in range(1, max_num_ids): - new_branch_name = f"{curr_branch_name}_{i}" - _LOG.info("Trying branch name '%s' ...", new_branch_name) - mode = "all" - exists = does_branch_exist(new_branch_name, mode, dir_name=dir_name) - _LOG.log(log_verb, "-> exists=%s", exists) - if not exists: - _LOG.log(log_verb, "new_branch_name='%s'", new_branch_name) - return new_branch_name - raise ValueError( - f"Can't find the next branch name for '{curr_branch_name}' " - f"within {max_num_ids} ids" - ) - - -def get_branch_next_name( - dir_name: str = ".", - *, - curr_branch_name: Optional[str] = None, - log_verb: int = logging.DEBUG, - method: str = "auto", -) -> str: - """ - Return a name derived from the branch so that the branch doesn't exist. - - E.g., `AmpTask1903_Implemented_system_Portfolio` -> - `AmpTask1903_Implemented_system_Portfolio_3` - - :param dir_name: directory containing the git repository - :param curr_branch_name: branch name to use (if None, gets current branch) - :param log_verb: logging verbosity level - :param method: method to use ('auto' tries fast first, 'github_api', 'linear_scan') - :return: next available branch name - """ - if curr_branch_name is None: - curr_branch_name = get_branch_name(dir_name=dir_name) - hdbg.dassert_ne( - curr_branch_name, "master", "Cannot get next name for 'master' branch" - ) - _LOG.log(log_verb, "curr_branch_name='%s'", curr_branch_name) - max_num_ids = 100 - hdbg.dassert_in( - method, ["auto", "github_api", "linear_scan"], "Invalid method specified" - ) - # Try GitHub API method first (faster) if requested or on auto mode. - next_name: Optional[str] = None - if method in ("auto", "github_api"): - next_name = _get_branch_next_name_via_github_api( - curr_branch_name, - max_num_ids=max_num_ids, - ) - if next_name is None and method == "github_api": - raise ValueError("GitHub API method requested but failed") - # Fall back to linear scanning if GitHub API failed in auto mode. - if next_name is None and method == "auto": - _LOG.warning("GitHub API method failed, falling back to linear scan") - next_name = _get_branch_next_name_linear_scan( - dir_name, - curr_branch_name, - max_num_ids=max_num_ids, - log_verb=log_verb, - ) - else: - # Fall back to linear scanning method when explicitly requested. - next_name = _get_branch_next_name_linear_scan( - dir_name, - curr_branch_name, - max_num_ids=max_num_ids, - log_verb=log_verb, - ) - hdbg.dassert_ne(next_name, None) - return cast(str, next_name) - - -def get_branch_hash(dir_name: str = ".") -> str: - """ - Return the hash of the commit right before the branch was created. - - This finds the merge-base between the current branch and master, which is - the commit where the branch was created. - - :param dir_name: directory containing the git repository - :return: the hash of the commit where the branch diverged from master - """ - curr_branch_name = get_branch_name(dir_name=dir_name) - hdbg.dassert_ne( - curr_branch_name, "master", "Cannot get branch hash for 'master' branch" - ) - _LOG.debug("curr_branch_name=%s", curr_branch_name) - cmd = f"cd {dir_name} && git merge-base master {curr_branch_name}" - _, hash_ = hsystem.system_to_string(cmd) - hash_ = hash_.rstrip("\n").lstrip("\n") - hdbg.dassert_eq( - len(hash_.split("\n")), 1, "Expected single hash line from merge-base" - ) - return hash_ - - -# ############################################################################# - - -@functools.lru_cache() -def is_inside_submodule(git_dir: str = ".") -> bool: - """ - Return whether a dir is inside a Git submodule or a Git supermodule. - - We determine this by checking if the current Git repo is included inside another Git repo. - - :param git_dir: directory to check - :return: True if the directory is inside a submodule - """ - cmd = [] - # Go to the directory. - cmd.append(f"cd {git_dir}") - # > cd im/ - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd.append('cd "$(git rev-parse --show-toplevel)/.."') - # > git rev-parse --is-inside-work-tree - # true - cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") - # Execute the command chain and check the return code. - cmd_as_str = " && ".join(cmd) - rc = hsystem.system(cmd_as_str, abort_on_error=False) - ret: bool = rc == 0 - return ret - - -# ############################################################################# -# Git submodule functions -# ############################################################################# - - -@functools.lru_cache() -def get_client_root(super_module: bool) -> str: - """ - Return the full path of the root of the Git client. - - E.g., `/Users/saggese/src/.../amp`. - - :param super_module: if True use the root of the Git super_module, - if we are in a submodule. Otherwise use the Git sub_module root - """ - if super_module and is_inside_submodule(): - # https://stackoverflow.com/questions/957928 - # > cd /Users/saggese/src/.../amp - # > git rev-parse --show-superproject-working-tree - # /Users/saggese/src/... - cmd = "git rev-parse --show-superproject-working-tree" - else: - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd = "git rev-parse --show-toplevel" - # TODO(gp): Use system_to_one_line(). - _, out = hsystem.system_to_string(cmd) - out = out.rstrip("\n") - hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") - client_root: str = os.path.realpath(out) - return client_root - - -# TODO(gp): Replace `get_client_root` with this. -# TODO(gp): -> get_client_root2() or get_outermost_supermodule_root() -def find_git_root(path: str = ".") -> str: - """ - Find recursively the dir of the outermost super module. - - This function traverses the directory hierarchy upward from a specified - starting path to find the root directory of a Git repository. - It supports: - - standard git repository: where a `.git` directory exists at the root - - submodule: where repository is nested inside another, and the `.git` file contains - a `gitdir:` reference to the submodule's actual Git directory - - linked repositories: where the `.git` file points to a custom Git directory - location, such as in Git worktrees or relocated `.git` directories - - :param path: starting file system path. Defaults to the current directory (".") - :return: absolute path to the top-level Git repository directory - """ - import helpers.hio as hio - - path = os.path.abspath(path) - git_root_dir = None - while True: - git_dir = os.path.join(path, ".git") - _LOG.debug("git_dir=%s", git_dir) - # Check if `.git` is a directory which indicates a standard Git repository. - if os.path.isdir(git_dir): - # Found the Git root directory. - git_root_dir = path - break - # Check if `.git` is a file which indicates submodules or linked setups. - if os.path.isfile(git_dir): - txt = hio.from_file(git_dir) - lines = txt.split("\n") - for line in lines: - # Look for a `gitdir:` line that specifies the linked directory. - # Example: `gitdir: ../.git/modules/helpers_root` (submodule) - # or `gitdir: /path/to/.git/worktrees/name` (worktree). - if line.startswith("gitdir:"): - git_dir_path = line.split(":", 1)[1].strip() - _LOG.debug("git_dir_path=%s", git_dir_path) - # For worktrees, the current path is the root of the worktree. - # The worktree's `.git` file points to the shared git directory - # (e.g., main_repo/.git/worktrees/worktree_name). - if ".git/worktrees/" in git_dir_path: - git_root_dir = path - else: - # For other linked setups (submodules, custom .git directory), - # traverse up to find the root of the target repository. - abs_git_dir = os.path.abspath( - os.path.join(path, git_dir_path) - ) - # Traverse up to find the top-level `.git` directory. - while True: - # Check if the current directory is a `.git` directory. - if os.path.basename(abs_git_dir) == ".git": - git_root_dir = os.path.dirname(abs_git_dir) - # Found the root. - break - # Move one level up in the directory structure. - parent = os.path.dirname(abs_git_dir) - # Reached the filesystem root without finding the `.git` directory. - hdbg.dassert_ne( - parent, - abs_git_dir, - "Top-level .git directory not found.", - ) - # Continue traversing up. - abs_git_dir = parent - break - # Exit the loop if the Git root directory is found. - if git_root_dir is not None: - break - # Move up one level in the directory hierarchy. - parent = os.path.dirname(path) - # Reached the filesystem root without finding `.git`. - hdbg.dassert_ne( - parent, - path, - "No .git directory or file found in any parent directory.", - ) - # Update the path to the parent directory for the next iteration. - path = parent - hdbg.dassert_is_not( - git_root_dir, None, "Git root directory should have been found" - ) - return str(git_root_dir) - - -# ############################################################################# - - -# TODO(gp): There are several functions doing the same work. -# helpers_root/helpers/hgit.py:827:def find_file_in_git_tree( -# helpers_root/helpers/hsystem.py:757:def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: -def find_file(file_name: str, *, dir_path: Optional[str] = None) -> str: - """ - Find a file within a directory hierarchy, excluding version control and cache dirs. - - Searches for the file starting from a directory, skipping .git and .mypy_cache - to avoid expensive traversals. - - :param file_name: the name of the file to find - :param dir_path: the directory to start the search from (defaults to git root) - :return: the first absolute path to the file found - """ - if dir_path is None: - dir_path = find_git_root() - _LOG.debug(hprint.to_str("dir_path")) - cmd = ( - rf"find {dir_path} " - + r"\( -path '*/.git' -o -path '*/.mypy_cache' \) -prune " - + rf'-o -name "{file_name}" -print' - ) - _LOG.debug(hprint.to_str("cmd")) - _, res = hsystem.system_to_one_line(cmd) - hdbg.dassert_ne(res, "Can't find file '%s' in '%s'", file_name, dir_path) - return res - - -def _is_repo(repo_short_name: str) -> bool: - """ - Check if the current directory is in a repository with the given short name. - - Uses repo config to determine the repository type without relying on directory names. - - :param repo_short_name: the short name of the repository to check (e.g., "helpers", "amp") - :return: True if the current directory is in the specified repository - """ - curr_repo_short_name = hrecouti.get_repo_config().get_repo_short_name() - is_repo = bool(curr_repo_short_name == repo_short_name) - return is_repo - - -def is_helpers() -> bool: - """ - Return whether we are inside `helpers` repo. - - Either as super module, or a sub module depending on a current - working directory. - """ - return _is_repo("helpers") - - -def find_helpers_root(dir_path: str = ".") -> str: - """ - Find the root directory of the `helpers` repository. - - If the current directory is within the `helpers` repository, the root of the - repository is returned. Otherwise, the function searches for the `helpers_root` - directory starting from the root of the repository. - - :param dir_path: starting directory for the search - :return: absolute path to the `helpers_root` directory - """ - with hsystem.cd(dir_path): - git_root = find_git_root() - if is_helpers(): - # If we are in `helpers` repo as supermodule, its root is the helpers_root. - cmd = "git rev-parse --show-toplevel" - _, helpers_root = hsystem.system_to_one_line(cmd) - else: - # Search for the `helpers_root` directory from the root of the supermodule. - helpers_root = find_file("helpers_root", dir_path=git_root) - helpers_root = os.path.abspath(helpers_root) - # Verify that the directory and `helpers` subdirectory exist. - hdbg.dassert_dir_exists( - helpers_root, "helpers_root directory must exist" - ) - hdbg.dassert_dir_exists( - os.path.join(helpers_root, "helpers"), - "helpers subdirectory must exist within helpers_root", - ) - return helpers_root - - -# ############################################################################# - - -def resolve_git_client_dir(git_client_name: str) -> str: - """ - Resolve the absolute path of the Git client directory. - - Supports both relative names (assumed to be in ~/src/) and absolute paths. - - :param git_client_name: the name of the Git client (e.g., "helpers1" - or "/Users/saggese/src/helpers1") - :return: the absolute path of the Git client directory - """ - if not os.path.isabs(git_client_name): - # Relative names are resolved relative to ~/src/ directory for convenience. - git_client_dir = os.path.join(os.environ["HOME"], "src", git_client_name) - else: - # Absolute paths are used as-is. - git_client_dir = git_client_name - _LOG.debug(hprint.to_str("git_client_dir")) - hdbg.dassert_dir_exists(git_client_dir, "Git client directory must exist") - return git_client_dir - - -def project_file_name_in_git_client( - file_name: str, - git_src_dir: str, - git_dst_dir: str, - *, - check_src_file_exists: bool = False, - check_dst_file_exists: bool = False, -) -> str: - """ - Find the file corresponding to `file_name` in `git_src_dir` for the client - `git_dst_dir`. - - This is useful when we want to find the file in a destination Git client - directory corresponding to a file in a source Git client directory. - - E.g., for: - ``` - file_name = '/Users/saggese/src/helpers1/dev_scripts_helpers/system_tools/path.py' - git_src_dir = '/Users/saggese/src/helpers1' - git_dst_dir = '/Users/saggese/src/helpers2' - ``` - the output is - `/Users/saggese/src/helpers2/dev_scripts_helpers/system_tools/path.py` - - :param file_name: the name of the file to find (which is under `git_src_dir`) - :param git_src_dir: the directory of the Git client from which `file_name` is - :param git_dst_dir: the directory of the Git client to which find the - corresponding file - :param check_src_file_exists: if True, check that `file_name` exists in - `git_src_dir` - :param check_dst_file_exists: if True, check that the file in `git_dst_dir` - exists - :return: the absolute path of the file in `git_dst_dir` - """ - if not os.path.isabs(file_name): - file_name = os.path.abspath(file_name) - if check_src_file_exists: - hdbg.dassert_file_exists(file_name) - if not os.path.isabs(git_src_dir): - git_src_dir = os.path.abspath(git_src_dir) - if not os.path.isabs(git_dst_dir): - git_dst_dir = os.path.abspath(git_dst_dir) - # Compute the relative path of the file in the source git client. - hdbg.dassert_is_path_abs(file_name) - hdbg.dassert_is_path_abs(git_src_dir) - rel_path = os.path.relpath(file_name, git_src_dir) - # Compute the absolute path of the file in the destination git client. - hdbg.dassert_is_path_abs(git_dst_dir) - dst_file_path = os.path.join(git_dst_dir, rel_path) - dst_file_path = os.path.abspath(dst_file_path) - if check_dst_file_exists: - hdbg.dassert_file_exists(dst_file_path) - return dst_file_path - - -def get_project_dirname(only_index: bool = False) -> str: - """ - Return the name of the project directory (e.g., `/Users/saggese/src/amp1` -> `amp1`). - - NOTE: This works properly only outside Docker. Inside Docker the Git client is - mapped to `/app`, so the result might be incorrect. - - :param only_index: if True, return only the numeric suffix (e.g., "1" from "amp1") - :return: the directory name or numeric index suffix - """ - # git_dir = get_client_root(super_module=True) - git_dir = find_git_root() - _LOG.debug("git_dir=%s", git_dir) - ret = os.path.basename(git_dir) - if only_index: - last_char = ret[-1] - hdbg.dassert( - last_char.isdigit(), - "The last char `%s` of the git dir `%s` is not a digit", - last_char, - git_dir, - ) - ret = last_char - _LOG.debug("ret=%s", ret) - return ret - - -def is_amp() -> bool: - """ - Return whether we are inside `amp` repo. - - Either as super module or a sub module depending on a current - working directory. - """ - return _is_repo("amp") or _is_repo("cmamp") or _is_repo("sorr") - - -def is_in_helpers_as_supermodule() -> bool: - """ - Return whether we are in the `helpers` repo and it's a super-module, i.e., - `helpers` by itself. - """ - return is_helpers() and not is_inside_submodule(".") - - -# TODO(gp): Be consistent with submodule and sub-module in the code. Same for -# supermodule. -def is_in_amp_as_submodule() -> bool: - """ - Return whether we are in the `amp` repo and it's a sub-module, e.g., of - `lm`. - """ - return is_amp() and is_inside_submodule(".") - - -def is_in_amp_as_supermodule() -> bool: - """ - Return whether we are in the `amp` repo and it's a super-module, i.e., - `amp` by itself. - """ - return is_amp() and not is_inside_submodule(".") - - -def is_amp_present(*, dir_name: str = ".") -> bool: - """ - Return whether the `amp` dir exists. - - This is a bit of an hacky way of knowing if there is the amp - submodule. - - :param dir_name: path to the directory where we want to - check the existence of `amp`. - """ - amp_path = os.path.join(dir_name, "amp") - return os.path.exists(amp_path) - - -# Using these functions is the last resort to skip / change the tests depending -# on the repo. We should control the tests through what functionalities they -# have, rather than the name of the repo. - - -def is_cmamp() -> bool: - """ - Return whether we are inside `cmamp` repo. - """ - return _is_repo("cmamp") - - -def is_lem() -> bool: - """ - Return whether we are inside `lem` repo. - """ - return _is_repo("lem") - - -def is_lime() -> bool: - """ - Return whether we are inside `lime` repo. - """ - return _is_repo("lime") - - -# ############################################################################# - - -def _get_submodule_hash(dir_name: str) -> str: - """ - Report the Git hash that a submodule is at from the supermodule perspective. - - Uses git ls-tree to get the submodule commit hash from the parent repository. - > git ls-tree master | grep - 160000 commit 0011776388b4c0582161eb2749b665fc45b87e7e amp - - :param dir_name: the name of the submodule directory - :return: the git commit hash of the submodule - """ - hdbg.dassert_path_exists(dir_name) - # Use git ls-tree to get the submodule entry which includes its hash. - cmd = f"git ls-tree master | grep {dir_name}" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - _LOG.debug("output=%s", output) - # Parse the output; format is: "160000 commit ". - data: List[str] = output.split() - _LOG.debug("data=%s", data) - # Extract the hash from the third field (index 2). - git_hash = data[2] - return git_hash - - -@functools.lru_cache() -def get_path_from_supermodule() -> Tuple[str, str]: - """ - Return the path to the Git repo including the Git submodule for a submodule. - - Returns the superproject path and submodule path, or empty for a supermodule. - E.g., - - for amp included in another repo returns 'amp' - - for amp without supermodule returns '' - - :return: tuple of (superproject_path, submodule_path) - """ - # Get the superproject working tree path. - cmd = "git rev-parse --show-superproject-working-tree" - # > cd /Users/saggese/src/.../lm/amp - # > git rev-parse --show-superproject-working-tree - # /Users/saggese/src/.../lm - # - # > cd /Users/saggese/src/.../lm - # > git rev-parse --show-superproject-working-tree - # (No result) - superproject_path: str = hsystem.system_to_one_line(cmd)[1] - _LOG.debug("superproject_path='%s'", superproject_path) - # Query the .gitmodules file to get the path for the current submodule. - cmd = ( - f"git config --file {superproject_path}/.gitmodules --get-regexp path" - '| grep $(basename "$(pwd)")' - "| awk '{ print $2 }'" - ) - # > git config --file /Users/saggese/src/.../.gitmodules --get-regexp path - # submodule.amp.path amp - submodule_path: str = hsystem.system_to_one_line(cmd)[1] - _LOG.debug("submodule_path='%s'", submodule_path) - return superproject_path, submodule_path - - -@functools.lru_cache() -def get_submodule_paths() -> List[str]: - """ - Return the path of the submodules in this repo. - - :return: list of submodule paths, e.g., ["amp"] or [] - """ - # Query .gitmodules to get submodule paths. - # > git config --file .gitmodules --get-regexp path - # submodule.amp.path amp - cmd = "git config --file .gitmodules --get-regexp path | awk '{ print $2 }'" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug("txt=%s", txt) - # Convert the output string to a list of paths. - files: List[str] = hsystem.text_to_list(txt) - _LOG.debug("files=%s", files) - return files - - -def has_submodules() -> bool: - """ - Return whether the repository has any submodules configured. - - :return: True if the repository contains submodules - """ - return len(get_submodule_paths()) > 0 - - -# ############################################################################# - - -def _get_hash(git_hash: str, short_hash: bool, num_digits: int = 8) -> str: - """ - Return the git hash, optionally shortened. - - :param git_hash: the full git hash - :param short_hash: if True, return only the first num_digits characters - :param num_digits: number of digits for short hash - :return: the git hash or shortened version - """ - hdbg.dassert_lte(1, num_digits) - # Return shortened hash if requested, otherwise return full hash. - if short_hash: - ret = git_hash[:num_digits] - else: - ret = git_hash - return ret - - -def _group_hashes(head_hash: str, remh_hash: str, subm_hash: str) -> str: - """ - Group multiple hashes and display which ones are equal. - - Transform three hashes into a string that shows which ones are identical. - For example, if head_hash == remh_hash, display "head_hash = remh_hash = ". - - :param head_hash: the head hash - :param remh_hash: the remote head hash - :param subm_hash: the submodule hash - :return: formatted string showing hash equality - """ - # Build a mapping from hash names to their values. - map_ = collections.OrderedDict() - map_["head_hash"] = head_hash - map_["remh_hash"] = remh_hash - if subm_hash: - map_["subm_hash"] = subm_hash - # Invert the mapping to group identical hashes together. - inv_map = collections.OrderedDict() - for k, v in map_.items(): - if v not in inv_map: - inv_map[v] = [k] - else: - inv_map[v].append(k) - # Format the output so equal hashes are grouped together. - txt = [] - for k, v in inv_map.items(): - # Transform: - # ('a2bfc704', ['head_hash', 'remh_hash']) - # into - # 'head_hash = remh_hash = a2bfc704' - txt.append(f"{' = '.join(v)} = {k}") - txt = "\n".join(txt) - return txt - - -# ############################################################################# -# GitHub repository name -# ############################################################################# - - -# All functions should take as input `repo_short_name` and have a switch `mode` -# to distinguish full vs short repo name. - -# TODO(gp): Maybe rename full -> long to keep it more symmetric "short vs long". - - -def _parse_github_repo_name(repo_name: str) -> Tuple[str, str]: - """ - Parse a repo name from `git remote`. - - The supported formats are both SSH and HTTPS, e.g., - - `git@github.com:alphamatic/amp` - - `https://github.com/alphamatic/amp` - - For both of these strings the function returns ("github.com", "alphamatic/amp"). - """ - # Try to parse the SSH format, e.g., `git@github.com:alphamatic/amp` - m = re.match(r"^git@(\S+.com):(\S+)$", repo_name) - if not m: - # Try tp parse the HTTPS format, e.g., `https://github.com/alphamatic/amp` - m = re.match(r"^https://(\S+.com)/(\S+)$", repo_name) - hdbg.dassert(m, "Can't parse '%s'", repo_name) - # The linter doesn't understand that `dassert` is equivalent to an - # `assert`. - assert m is not None - host_name = m.group(1) - repo_name = m.group(2) - _LOG.debug("host_name=%s repo_name=%s", host_name, repo_name) - # We expect something like "alphamatic/amp". - m = re.match(r"^\S+/\S+$", repo_name) - hdbg.dassert(m, "repo_name='%s'", repo_name) - # The linter doesn't understand that `dassert` is equivalent to an - # `assert`. - assert m is not None - # origin git@github.com:.../ORG_....git (fetch) - suffix_to_remove = ".git" - if repo_name.endswith(suffix_to_remove): - repo_name = repo_name[: -len(suffix_to_remove)] - return host_name, repo_name - - -def get_repo_full_name_from_dirname( - dir_name: str, include_host_name: bool -) -> str: - """ - Return the full name of the repo in a directory. - - E.g., "alphamatic/amp" or "github.com/alphamatic/amp" (if hostname included). - - This function relies on `git remote` to extract the origin URL. - - :param dir_name: directory containing the git repository - :param include_host_name: if True, prepend the GitHub hostname (e.g., - "github.com/alphamatic/amp") - :return: the full name of the repo - - E.g., "alphamatic/amp", "github.com/alphamatic/amp". - """ - hdbg.dassert_path_exists(dir_name) - cmd = f"cd {dir_name}; (git remote -v | grep origin | grep fetch)" - _, output = hsystem.system_to_string(cmd) - # > git remote -v - # origin git@github.com:alphamatic/amp (fetch) - # origin git@github.com:alphamatic/amp (push) - data: List[str] = output.split() - _LOG.debug("data=%s", data) - hdbg.dassert_eq(len(data), 3, "Expected 3 fields from git remote output") - # Extract the origin URL (second field). - repo_name = data[1] - # Parse SSH/HTTPS URL into host and org/repo parts. - host_name, repo_name = _parse_github_repo_name(repo_name) - if include_host_name: - res = f"{host_name}/{repo_name}" - else: - res = repo_name - return res - - -# ############################################################################# -# Git hash -# ############################################################################# - - -def get_head_hash(dir_name: str = ".", short_hash: bool = False) -> str: - """ - Return the git commit hash of a repository with submodule/random suffix. - - Gets the HEAD commit hash and appends either the amp submodule hash (if present) - or a random suffix to make the hash unique across different module configurations. - - ``` - > git rev-parse HEAD - 4759b3685f903e6c669096e960b248ec31c63b69 - ``` - - :param dir_name: directory containing the git repository - :param short_hash: if True, return abbreviated hash (useful when combined with suffix) - :return: the commit hash with submodule/random suffix (e.g., "4759b36-abc123") - """ - hdbg.dassert_path_exists(dir_name) - # Get the commit hash, optionally abbreviated to 7 characters. - opts = "--short " if short_hash else " " - cmd = f"cd {dir_name} && git rev-parse {opts}HEAD" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - # Check whether we are building an orange image. If the condition - # is True, add './amp' hash to the tag as well. - if is_amp_present(dir_name=dir_name): - amp_hash = get_head_hash(os.path.join(dir_name, "amp"), short_hash=True) - output = output + "-" + amp_hash - else: - # Use random suffix when no submodule exists (needed for Docker image tags). - random_string = "".join( - random.choices(string.ascii_lowercase + string.digits, k=3) - ) - output = output + "-" + random_string - return output - - -def get_remote_head_hash(dir_name: str) -> str: - """ - Return the commit hash that the remote repository's HEAD points to. - - Queries the remote origin to get the current HEAD hash without fetching. - - :param dir_name: directory containing the git repository - :return: the remote HEAD commit hash - """ - hdbg.dassert_path_exists(dir_name) - sym_name = get_repo_full_name_from_dirname(dir_name, include_host_name=False) - cmd = f"git ls-remote git@github.com:{sym_name} HEAD 2>/dev/null" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - # > git ls-remote git@github.com:alphamatic/amp HEAD 2>/dev/null - # 921676624f6a5f3f36ab507baed1b886227ac2e6 HEAD - return output - - -def report_submodule_status(dir_names: List[str], short_hash: bool) -> str: - """ - Return a formatted string reporting the status of git repositories. - - Reports whether each directory is a submodule, current branch, and commit hashes - (local, remote, and submodule hash if applicable). - - :param dir_names: list of directory paths to report on - :param short_hash: if True, truncate hashes to 8 characters - :return: formatted string with status information for each directory - """ - txt = [] - for dir_name in dir_names: - txt.append(f"dir_name='{dir_name}'") - txt.append(f" is_inside_submodule: {is_inside_submodule(dir_name)}") - # Get branch name, highlighting if not on master (likely indicates incomplete work). - branch_name = get_branch_name(dir_name) - if branch_name != "master": - branch_name = f"!!! {branch_name} !!!" - txt.append(f" branch: {branch_name}") - # Get local and remote commit hashes. - head_hash = get_head_hash(dir_name) - head_hash = _get_hash(head_hash, short_hash) - txt.append(f" head_hash: {head_hash}") - remh_hash = get_remote_head_hash(dir_name) - remh_hash = _get_hash(remh_hash, short_hash) - txt.append(f" remh_hash: {remh_hash}") - # Get submodule hash if this is not the root directory. - if dir_name != ".": - subm_hash = _get_submodule_hash(dir_name) - subm_hash = _get_hash(subm_hash, short_hash) - txt.append(f" subm_hash: {subm_hash}") - txt_as_str = "\n".join(txt) - return txt_as_str - - -def get_repo_full_name_from_client(super_module: bool) -> str: - """ - Return the full name of the repo (e.g., "alphamatic/amp") from a Git - client. - - :param super_module: like in get_client_root() - """ - # Get the Git remote in the dir containing the Git repo. - git_dir = get_client_root(super_module) - repo_name = get_repo_full_name_from_dirname(git_dir, include_host_name=False) - return repo_name - - -def is_cwd_git_repo() -> bool: - """ - Return whether the current directory is a git repository root. - - Checks for the presence of a .git file or directory in the current location. - - :return: True if .git exists in the current directory - """ - return os.path.exists(".git") - - -# ############################################################################# -# Git path -# ############################################################################# - - -# TODO(gp): Use find_file -@functools.lru_cache() -def find_file_in_git_tree( - file_name: str, super_module: bool = True, remove_tmp_base: bool = False -) -> str: - """ - Find the path of a file in a Git tree. - - We get the Git root and then search for the file from there. - """ - root_dir = get_client_root(super_module=super_module) - cmd = rf"find {root_dir} -name '{file_name}' -not -path '*/.git/*'" - if remove_tmp_base: - cmd += r" -not -path '*/tmp\.base/*'" - _, file_name_out = hsystem.system_to_one_line(cmd) - _LOG.debug(hprint.to_str("file_name_out")) - hdbg.dassert_ne( - file_name_out, - "", - "Can't find file '%s' in dir '%s'", - file_name, - root_dir, - ) - file_name_out: str = os.path.abspath(file_name_out) - hdbg.dassert_path_exists(file_name_out) - return file_name_out - - -def get_path_from_git_root( - file_name: str, - super_module: bool, - *, - git_root: Optional[str] = None, -) -> str: - """ - Get the path of `file_name` from the root of the Git client. - - E.g., in Docker: - - `super_module=True` -> git_root=/app - - `super_module=False` -> git_root=/app/amp - - :param super_module: like get_client_root() - """ - # Get the root of the Git client. - if git_root is None: - git_root = get_client_root(super_module) - # - git_root = os.path.normpath(git_root) - _LOG.debug("git_root=%s", git_root) - file_name = os.path.normpath(file_name) - _LOG.debug("file_name=%s", file_name) - if file_name.startswith(git_root): - # Remove the `git_root` from file_name. - ret = os.path.relpath(file_name, git_root) - else: - # If the file is not under the root, we can't normalize it. - raise ValueError( - f"Can't normalize file_name='{file_name}' for git_root='{git_root}'" - ) - _LOG.debug( - "file_name=%s, git_root=%s (super_module=%s) -> ret=%s", - file_name, - git_root, - super_module, - ret, - ) - return str(ret) - - -# TODO(gp): Rewrite this function in a better way. -@functools.lru_cache() -def get_amp_abs_path() -> str: - """ - Return the absolute path of `amp` dir. - """ - repo_sym_name = get_repo_full_name_from_client(super_module=False) - _LOG.debug("repo_sym_name=%s", repo_sym_name) - # - repo_sym_names = ["alphamatic/amp"] - extra_amp_repo_sym_name = ( - hrecouti.get_repo_config().get_extra_amp_repo_sym_name() - ) - repo_sym_names.append(extra_amp_repo_sym_name) - _LOG.debug("repo_sym_names=%s", repo_sym_names) - # - if repo_sym_name in repo_sym_names: - # If we are in the amp repo, then the git client root is the amp - # directory. - git_root = get_client_root(super_module=False) - amp_dir = git_root - else: - # If we are not in the amp repo, then look for the amp dir. - amp_dir = find_file_in_git_tree( - "amp", super_module=True, remove_tmp_base=True - ) - git_root = get_client_root(super_module=True) - amp_dir = os.path.join(git_root, amp_dir) - amp_dir = os.path.abspath(amp_dir) - # Sanity check. - hdbg.dassert_dir_exists(amp_dir) - return amp_dir - - -# TODO(gp): Is this needed? -def get_repo_dirs() -> List[str]: - """ - Return the list of the repo repositories, e.g., `[".", "amp", "infra"]`. - """ - dir_names = ["."] - dirs = ["amp"] - for dir_name in dirs: - if os.path.exists(dir_name): - dir_names.append(dir_name) - return dir_names - - -# TODO(gp): It should go in hdocker? -# TODO(gp): There are functions in hdocker.py that might be more general than -# this. -def find_docker_file( - file_name: str, - *, - root_dir: str = ".", - dir_depth: int = -1, - mode: str = "return_all_results", - candidate_files: Optional[List[str]] = None, -) -> List[str]: - """ - Convert a file or dir that was generated inside Docker to a file in the - current Git client. - - This operation is best-effort since it might not be able to find the - corresponding file in the current repo. - - E.g., - - A file like '/app/amp/core/dataflow_model/utils.py', in a Docker container - with Git root in '/app' becomes 'amp/core/dataflow_model/utils.py' - - For a file like '/app/amp/core/dataflow_model/utils.py' outside Docker, we - look for the file 'dataflow_model/utils.py' in the current client and - then normalize with respect to the - - :param dir_depth: same meaning as in `find_file_with_dir()` - :param mode: same as `system_interaction.select_result_file_from_list()` - :param candidate_files: list of results from the `find` command for unit - test mocking - :return: the best guess for the file name corresponding to `file_name` - """ - _LOG.debug(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(file_name, str) - # Clean up file name. - file_name = os.path.normpath(file_name) - _LOG.debug("file_name=%s", file_name) - # Find the file in the dir. - file_names = hsystem.find_file_with_dir( - file_name, - root_dir=root_dir, - dir_depth=dir_depth, - mode=mode, - candidate_files=candidate_files, - ) - # Purify. - _LOG.debug("Purifying file_names=%s", file_names) - file_names = [ - os.path.relpath(file_name, root_dir) for file_name in file_names - ] - return file_names - - -# TODO(gp): Use get_head_hash() and remove this. -def get_current_commit_hash(dir_name: str = ".") -> str: - """ - Return the full SHA-1 hash of the current HEAD commit. - - :param dir_name: directory containing the git repository - :return: the full commit hash (e.g., "0011776388b4c0582161eb2749b665fc45b87e7e") - """ - hdbg.dassert_path_exists(dir_name) - cmd = f"cd {dir_name} && git rev-parse HEAD" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, sha = data - # 0011776388b4c0582161eb2749b665fc45b87e7e - _LOG.debug("sha=%s", sha) - return sha - - -# ############################################################################# -# Modified files -# ############################################################################# - - -def get_modified_files( - dir_name: str = ".", remove_files_non_present: bool = True -) -> List[str]: - """ - Return the files that are added and modified in the Git client. - - In other words the files that will be committed with a `git commit -am ...`. - Equivalent to `dev_scripts/git_files.sh` - - :param dir_name: directory with Git client - :param remove_files_non_present: remove the files that are not - currently present in the client - :return: list of files - """ - # If the client status is: - # > git status -s - # AM dev_scripts/infra/ssh_tunnels.py - # M helpers/git.py - # ?? linter_warnings.txt - # - # The result is: - # > git diff --cached --name-only - # dev_scripts/infra/ssh_tunnels.py - # - # > git ls-files -m - # dev_scripts/infra/ssh_tunnels.py - # helpers/git.py - cmd = "(git diff --cached --name-only; git ls-files -m) | sort | uniq" - files: List[str] = hsystem.system_to_files( - cmd, dir_name, remove_files_non_present - ) - return files - - -# TODO(gp): -> ...previously... -def get_previous_committed_files( - dir_name: str = ".", - num_commits: int = 1, - remove_files_non_present: bool = True, -) -> List[str]: - """ - Return files changed in the Git client in the last `num_commits` commits. - - Equivalent to `dev_scripts/git_previous_commit_files.sh` - - :param dir_name: directory with Git client - :param num_commits: how many commits in the past to consider - :param remove_files_non_present: remove the files that are not - currently present in the client - :return: list of files - """ - cmd = [] - cmd.append('git show --pretty="" --name-only') - cmd.append(f'$(git log --author "$(git config user.name)" -{num_commits}') - cmd.append(r"""| \grep "^commit " | perl -pe 's/commit (.*)/$1/')""") - cmd_as_str = " ".join(cmd) - files: List[str] = hsystem.system_to_files( - cmd_as_str, dir_name, remove_files_non_present - ) - return files - - -def get_modified_files_in_branch( - dst_branch: str, dir_name: str = ".", remove_files_non_present: bool = True -) -> List[str]: - """ - Return files modified in the current branch with respect to `dst_branch`. - - Equivalent to `git diff --name-only master...` - Please remember that there is a difference between `master` and `origin/master`. - See https://stackoverflow.com/questions/18137175 - - :param dir_name: directory with Git client - :param dst_branch: branch to compare to, e.g., `master`, `HEAD` - :param remove_files_non_present: remove the files that are not - currently present in the client - :return: list of files - """ - if dst_branch == "HEAD": - target = dst_branch - else: - target = f"{dst_branch}..." - cmd = f"git diff --name-only {target}" - files: List[str] = hsystem.system_to_files( - cmd, dir_name, remove_files_non_present - ) - return files - - -def get_modified_and_untracked_files( - repo_path: str = ".", *, mode: str = "all" -) -> List[str]: - """ - Get list of modified and untracked files in a git repository. - - Excludes files from submodules and deleted files. - - Mode options: - - "all": Both modified and untracked files (default, current behavior) - - "modified": Only files with changes (staged, modified, added, renamed, copied) - - "untracked": Only untracked files - - This includes (when mode="all"): - - Modified files (both staged and unstaged) - - Untracked files - - Cached/staged files - - The function uses `git status --porcelain -u` which shows all changes - including cached (staged) files. - - :param repo_path: Path to the git repository - :param mode: Filter mode: "all", "modified", or "untracked" - :return: List of file paths relative to repo_path - """ - hdbg.dassert_dir_exists(repo_path) - # Validate mode. - valid_modes = ["all", "modified", "untracked"] - hdbg.dassert_in( - mode, - valid_modes, - "Invalid mode '%s'; must be one of: %s", - mode, - ", ".join(valid_modes), - ) - # Get modified and untracked files, excluding submodules. - # The command uses: - # - git status --porcelain -u: Get status in machine-readable format with untracked files - # This includes both cached (staged) and modified files - # Status codes: ?? = untracked, M/A/R/C/D = modified/added/renamed/copied/deleted - cmd = f"cd {repo_path} && git status --porcelain -u" - _, output = hsystem.system_to_string(cmd, abort_on_error=False) - # Get submodule paths to exclude. - submodule_cmd = ( - f"cd {repo_path} && " - "git config -f .gitmodules --get-regexp path 2>/dev/null || true" - ) - _, submodule_output = hsystem.system_to_string( - submodule_cmd, abort_on_error=False - ) - submodule_paths = set() - for line in submodule_output.strip().split("\n"): - if line: - # Format: "submodule..path " - parts = line.split() - if len(parts) >= 2: - submodule_paths.add(parts[-1]) - # Parse output. - files = [] - for line in output.strip().split("\n"): - line = line.strip() - if not line: - continue - # Extract status code (first 2 characters) and filename (from position 3). - status_code = line[:2] if len(line) >= 2 else "" - file_name = line[3:].strip() if len(line) > 3 else "" - # Filter by mode. - if mode == "untracked": - # Untracked files have status "??" - if status_code != "??": - continue - elif mode == "modified": - # Modified files have any status other than "??" - if status_code == "??": - continue - # Skip submodule paths. - is_in_submodule = any( - file_name.startswith(subpath + "/") or file_name == subpath - for subpath in submodule_paths - ) - if is_in_submodule: - _LOG.debug("Skipping submodule file: %s", file_name) - continue - # Check if file exists (exclude deleted files). - file_path = os.path.join(repo_path, file_name) - if os.path.exists(file_path) and os.path.isfile(file_path): - files.append(file_name) - else: - _LOG.debug("Skipping non-existent or non-file: %s", file_path) - return files - - -def get_summary_files_in_branch( - dst_branch: str, - *, - dir_name: str = ".", -) -> str: - """ - Report summary of files in the current branch with respect to `dst_branch'. - - Same interface as `get_modified_files_in_branch`. - """ - # File types (from https://git-scm.com/docs/git-diff). - file_types = [ - ("added", "A"), - ("copied", "C"), - ("deleted", "D"), - ("modified", "M"), - ("renamed", "R"), - ("type changed", "T"), - ("unmerged", "U"), - ("unknown", "X"), - ("broken pairing", "B"), - ] - res = "" - for tag, diff_type in file_types: - cmd = f"git diff --diff-filter={diff_type} --name-only {dst_branch}..." - files = hsystem.system_to_files( - cmd, dir_name, remove_files_non_present=False - ) - _LOG.debug("files=%s", "\n".join(files)) - if files: - res += f"# {tag}: {len(files)}\n" - res += hprint.indent("\n".join(files)) + "\n" - res = res.rstrip("\n") - return res - - -# ############################################################################# -# Git commands. -# ############################################################################# - - -# TODO(gp): -> get_user_name() -@functools.lru_cache() -def get_git_name() -> str: - """ - Return the configured git user name from git config. - - Caches the result to avoid repeated config lookups. - - :return: the configured git user name (e.g., from user.name setting) - """ - cmd = "git config --get user.name" - # For some reason data is annotated as Any by mypy, instead of - # Tuple[int, str] so we need to cast it to the right value. - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - return output - - -def git_log(num_commits: int = 5, my_commits: bool = False) -> str: - """ - Return a formatted git log with graph, timestamps, and author information. - - Uses a custom pretty format to display commits in a user-friendly layout - with graph visualization, relative time, and author name. - - :param num_commits: number of commits to report - :param my_commits: if True, filter to only commits by the current git user - :return: formatted git log output - """ - cmd = [] - cmd.append("git log --date=local --oneline --graph --date-order --decorate") - cmd.append( - "--pretty=format:'%h %<(8)%aN% %<(65)%s (%>(14)%ar) %ad %<(10)%d'" - ) - cmd.append(f"-{num_commits}") - if my_commits: - # This doesn't work in a container if the user relies on `~/.gitconfig` to - # set the user name. - # TODO(gp): We should use `get_git_name()`. - cmd.append("--author $(git config user.name)") - cmd = " ".join(cmd) - data: Tuple[int, str] = hsystem.system_to_string(cmd) - _, txt = data - return txt - - -def git_stash_push( - prefix: str, msg: Optional[str] = None, log_level: int = logging.DEBUG -) -> Tuple[str, bool]: - """ - Stash current changes with a timestamped, labeled message. - - Creates a unique stash name from prefix, username, server, and timestamp to - enable tracking of which changes were stashed when and by whom. - - :param prefix: prefix for the stash tag (e.g., "backup", "work") - :param msg: optional message to append to the stash description - :param log_level: logging level for system output - :return: tuple of (stash_tag, was_stashed) indicating success - """ - import helpers.hdatetime as hdateti - - user_name = hsystem.get_user_name() - server_name = hsystem.get_server_name() - timestamp = hdateti.get_current_timestamp_as_string("naive_ET") - # Build unique tag from context to identify who stashed what when. - tag = f"{user_name}-{server_name}-{timestamp}" - tag = prefix + "." + tag - _LOG.debug("tag='%s'", tag) - cmd = "git stash push" - _LOG.debug("msg='%s'", msg) - push_msg = tag[:] - if msg: - push_msg += ": " + msg - cmd += f" -m '{push_msg}'" - hsystem.system(cmd, suppress_output=False, log_level=log_level) - # Verify that something was actually stashed (git stash push is silent on no-op). - cmd = rf"git stash list | \grep '{tag}' | wc -l" - _, output = hsystem.system_to_string(cmd) - was_stashed = int(output) > 0 - if not was_stashed: - msg = "Nothing was stashed" - _LOG.warning(msg) - # raise RuntimeError(msg) - return tag, was_stashed - - -def git_stash_apply(mode: str, log_level: int = logging.DEBUG) -> None: - """ - Apply or pop the most recent git stash. - - Displays the stash list before applying to help the user verify they're applying - the correct stash. - - :param mode: "apply" to keep the stash or "pop" to remove after applying - :param log_level: logging level for system output - """ - _LOG.debug("# Checking stash head ...") - cmd = "git stash list | head -3" - hsystem.system(cmd, suppress_output=False, log_level=log_level) - # Restore the stashed changes, either keeping or removing the stash. - _LOG.debug("# Restoring local changes...") - if mode == "pop": - cmd = "git stash pop --quiet" - elif mode == "apply": - cmd = "git stash apply --quiet" - else: - raise ValueError(f"mode='{mode}'") - hsystem.system(cmd, suppress_output=False, log_level=log_level) - - -# TODO(gp): Consider using this everywhere. Maybe it can simplify handling issues -# stemming from the super-module / sub-module repo. -def _get_git_cmd(super_module: bool) -> str: - """ - Build a git command prefix with explicit repository and working tree paths. - - Useful for running git commands from outside the repository or when working - with specific submodules/supermodules. - - :param super_module: if True, use supermodule root; else use current module root - :return: git command prefix (e.g., "git --git-dir=... --work-tree=...") - """ - cmd = [] - cmd.append("git") - client_root = get_client_root(super_module=super_module) - # Set the path to the repository (".git" directory), avoiding Git to search for - # it (from https://git-scm.com/docs/git) - cmd.append(f"--git-dir='{client_root}/.git'") - # Explicitly specify working tree location. - cmd.append(f"--work-tree='{client_root}'") - cmd = " ".join(cmd) - return cmd - - -def git_tag( - tag_name: str, super_module: bool = True, log_level: int = logging.DEBUG -) -> None: - """ - Create a git tag on the current commit (locally, not pushed). - - Overwrites existing tags with the same name (using -f flag). - - :param tag_name: the name of the tag to create - :param super_module: if True, tag the supermodule; else tag the current module - :param log_level: logging level for system output - """ - _LOG.debug("# Tagging current commit ...") - git_cmd = _get_git_cmd(super_module) - cmd = f"{git_cmd} tag -f {tag_name}" - _ = hsystem.system(cmd, suppress_output=False, log_level=log_level) - - -def git_push_tag( - tag_name: str, - remote: str = "origin", - super_module: bool = True, - log_level: int = logging.DEBUG, -) -> None: - """ - Push a git tag to the remote repository. - - :param tag_name: the name of the tag to push - :param remote: the remote name to push to (default: origin) - :param super_module: if True, tag the supermodule; else tag the current module - :param log_level: logging level for system output - """ - _LOG.debug("# Pushing current commit ...") - git_cmd = _get_git_cmd(super_module) - cmd = f"{git_cmd} push {remote} {tag_name}" - _ = hsystem.system(cmd, suppress_output=False, log_level=log_level) - - -def git_describe( - match: Optional[str] = None, log_level: int = logging.DEBUG -) -> str: - """ - Return the most recent git tag, or abbreviated commit hash if no tags exist. - - Useful for version identification and release tracking. - - :param match: optional glob pattern to filter tags (e.g., "cmamp-*") - :param log_level: logging level for system output - :return: the closest tag (e.g., "1.0.0") or short commit hash - """ - _LOG.debug("# Looking for version ...") - cmd = "git describe --tags --always --abbrev=0" - if match is not None: - hdbg.dassert_isinstance(match, str, "match pattern must be a string") - hdbg.dassert_ne(match, "", "match pattern cannot be empty") - cmd = f"{cmd} --match '{match}'" - num, tag = hsystem.system_to_one_line(cmd, log_level=log_level) - _ = num - return tag - - -def git_add_update( - file_list: Optional[List[str]] = None, log_level: int = logging.DEBUG -) -> None: - """ - Add files to the git staging area. - - If no file list is provided, adds all modified and deleted files (git add -u). - - :param file_list: list of specific files to add; if None, add all modified files - :param log_level: logging level for system output - """ - _LOG.debug("# Adding all changed files to staging ...") - cmd = f"git add {' '.join(file_list) if file_list is not None else '-u'}" - hsystem.system(cmd, suppress_output=False, log_level=log_level) - - -def fetch_origin_master_if_needed() -> None: - """ - Fetch the master branch from origin if running in a CI environment. - - In CI, master may not be fetched when testing a branch, but it's often needed - for tests that compare against baseline or merge behavior. This ensures master - is available if needed. - """ - if hserver.is_inside_ci(): - _LOG.warning("Running inside CI so fetching master") - cmd = "git branch -a" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug("%s=%s", cmd, txt) - cmd = r'git branch -a | egrep "\s+master\s*$" | wc -l' - # * (HEAD detached at pull/1337/merge) - # master - # remotes/origin/master - # remotes/pull/1337/merge - _, num = hsystem.system_to_one_line(cmd) - num = int(num) - _LOG.debug("num=%s", num) - if num == 0: - # See AmpTask1321 and AmpTask1338 for details. - cmd = "git fetch origin master:refs/remotes/origin/master" - hsystem.system(cmd) - cmd = "git branch --track master origin/master" - hsystem.system(cmd) - - -def is_client_clean( - dir_name: str = ".", - abort_if_not_clean: bool = False, -) -> bool: - """ - Return whether there are files modified, added, or removed in a directory. - - Ignores submodule changes (amp, helpers_root) to focus on actual code changes. - - :param dir_name: directory containing the git repository - :param abort_if_not_clean: if True and the client is not clean, - abort with a detailed message showing the modified files - :return: True if no files are modified (excluding submodules) - """ - _LOG.debug(hprint.to_str("abort_if_not_clean")) - files = get_modified_files(dir_name) - # Exclude submodule directories from consideration since their changes - # are tracked separately and don't affect code cleanliness. - if "amp" in files: - _LOG.warning("Skipping 'amp' in modified files") - files = [f for f in files if "amp" != f] - elif "helpers_root" in files: - _LOG.warning("Skipping 'helpers_root' in modified files") - files = [f for f in files if "helpers_root" != f] - # A Git client is clean iff there are no files in the index. - is_clean = len(files) == 0 - if abort_if_not_clean: - hdbg.dassert( - is_clean, "The Git client is not clean:\n%s", "\n".join(files) - ) - return is_clean - - -def delete_branches( - dir_name: str, - mode: str, - branches: List[str], - confirm_delete: bool, - abort_on_error: bool = True, -) -> None: - """ - Delete local or remote git branches. - - Optionally prompts the user for confirmation before performing deletion. - - :param dir_name: directory containing the git repository - :param mode: "local" for local branches or "remote" for remote branches - :param branches: list of branch names to delete - :param confirm_delete: if True, prompt user for confirmation before deletion - :param abort_on_error: if True, abort on any deletion error - """ - hdbg.dassert_isinstance( - branches, list, "branches must be a list, got type %s", type(branches) - ) - delete_cmd = f"cd {dir_name} && " - if mode == "local": - delete_cmd += "git branch -d" - elif mode == "remote": - delete_cmd += "git push origin --delete" - else: - raise ValueError(f"Invalid mode='{mode}'") - # Prompt for confirmation to prevent accidental deletion of important branches. - if confirm_delete: - branches_as_str = " ".join(branches) - msg = ( - hdbg.WARNING - + f": Delete {len(branches)} {mode} branch(es) '{branches_as_str}'?" - ) - hsystem.query_yes_no(msg, abort_on_no=True) - for branch in branches: - if mode == "remote": - prefix = "origin/" - hdbg.dassert( - branch.startswith(prefix), - "Remote branch '%s' needs to start with '%s'", - branch, - prefix, - ) - branch = branch[len(prefix) :] - cmd = f"{delete_cmd} {branch}" - hsystem.system( - cmd, - suppress_output=False, - log_level="echo", - abort_on_error=abort_on_error, - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py deleted file mode 100644 index e796b865f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py +++ /dev/null @@ -1,1183 +0,0 @@ -""" -Use cases for this module are at: -helpers/notebooks/Master_how_to_use_hgoogle_drive_api.ipynb - -Import as: - -import helpers.hgoogle_drive_api as hgodrapi -""" - -import datetime -import importlib -import logging -import os -import re -import sys -from typing import List, Optional, Union - -# Keep try-except to avoid `ModuleNotFoundError` in CI/CD (HelpersTask #1183). -try: - # Authentication for Google API to produce credentials. - import google.oauth2.service_account as goasea - - # Google API client for service objects (e.g., Drive, Sheets, etc.) - import googleapiclient.discovery as godisc - - # Built on top of Google API to simplify interactions with Google Sheets. - import gspread - - _GOOGLE_API_AVAILABLE = True -except ImportError: - # If Google API packages are not installed, set placeholders. - _GOOGLE_API_AVAILABLE = False - -import pandas as pd - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg -import helpers.hmodule as hmodule -import helpers.hpandas as hpandas - -_LOG = logging.getLogger(__name__) - - -def install_needed_modules( - *, use_sudo: bool = True, venv_path: Optional[str] = None -) -> None: - """ - Install needed modules for Google Drive API. - - :param use_sudo: whether to use sudo to install the module - :param venv_path: path to the virtual environment E.g., - /Users/saggese/src/venv/client_venv.helpers - """ - hmodule.install_module_if_not_present( - "google", - package_name="google-auth", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - hmodule.install_module_if_not_present( - "googleapiclient", - package_name="google-api-python-client", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - hmodule.install_module_if_not_present( - "gspread", - package_name="gspread", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - # Reload this module (hgoogle_drive_api) if already imported - this_module_name = __name__ - if this_module_name in sys.modules: - importlib.reload(sys.modules[this_module_name]) - - -# ############################################################################# -# Credentials -# ############################################################################# - - -def get_credentials( - *, - service_key_path: Optional[str] = None, -) -> "goasea.Credentials": - """ - Get credentials for Google API with service account key. - - :param service_key_path: service account key file path. - :return: Google credentials. - """ - # service_key_path = "/home/.config/gspread_pandas/google_secret.json" - if not service_key_path: - service_key_path = os.path.join( - os.path.expanduser("~"), - ".config", - "gspread_pandas", - "google_secret.json", - ) - service_key_path = os.path.join(os.path.dirname(__file__), service_key_path) - # Download service.json from Google API, then save it as - # /home/.config/gspread_pandas/google_secret.json - # Instructions: https://gspread-pandas.readthedocs.io/en/latest/getting_started.html#client-credentials" - hdbg.dassert_file_exists( - service_key_path, - "Failed to read service key file: %s", - service_key_path, - ) - # Scopes required for making API calls. - scopes = [ - "https://www.googleapis.com/auth/drive", - "https://www.googleapis.com/auth/spreadsheets", - ] - creds = goasea.Credentials.from_service_account_file( - service_key_path, scopes=scopes - ) - return creds - - -# ############################################################################# -# Google Sheets API -# ############################################################################# - - -# TODO(gp): Extend this to work with v3, v4, etc. -# TODO(ai_gp): Make it private if it's not called by anybody else. -def get_sheets_service(credentials: "goasea.Credentials") -> "godisc.Resource": - """ - Get Google Sheets service with provided credentials. - - :param credentials: Google credentials object. - :return: Google Sheets service instance. - """ - # Ensure credentials are provided. - hdbg.dassert(credentials, "The 'credentials' parameter must be provided") - # Build the Sheets service. - sheets_service = godisc.build( - "sheets", "v4", credentials=credentials, cache_discovery=False - ) - return sheets_service - - -def _get_gsheet_id( - credentials: "goasea.Credentials", - sheet_id: str, - *, - tab_name: Optional[str] = None, -) -> str: - """ - Get the sheet ID from the sheet name in a Google Sheets document. - - :param credentials: Google credentials object. - :param sheet_id: ID of the Google Sheet document. - :param tab_name: Name of the sheet (tab) in the Google Sheets - document. - :return: Sheet ID of the sheet with the given name or the first - sheet if the name is not provided. - """ - sheets_service = get_sheets_service(credentials) - sheet_metadata = ( - sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() - ) - sheets = sheet_metadata.get("sheets", []) - if tab_name: - for sheet in sheets: - properties = sheet.get("properties", {}) - if properties.get("title") == tab_name: - return properties.get("sheetId") - raise ValueError(f"Sheet with name '{tab_name}' not found.") - # Return the ID of the first sheet if no sheet name is provided. - first_sheet_id = sheets[0].get("properties", {}).get("sheetId") - return first_sheet_id - - -def get_gsheet_name( - url: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Get the name of a Google Sheet from its URL. - - E.g., https://docs.google.com/spreadsheets/d/1GnnmtGTrHDwMP77VylEK0bSF_RLUV5BWf1iGmxuBQpI - -> pitchbook.Outreach_AI_companies - - :param url: URL of the Google Sheets file. - :param credentials: Google credentials object. - :return: Name of the Google Sheet (spreadsheet title). - """ - if credentials is None: - credentials = get_credentials() - # TODO(ai): Should we use the Sheets API instead? - client = gspread.authorize(credentials) - spreadsheet = client.open_by_url(url) - tab_name = spreadsheet.title - _LOG.debug("Retrieved sheet name: '%s'", tab_name) - return tab_name - - -def get_tabs_from_gsheet( - url: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> List[str]: - """ - Get all the tabs (worksheets) from a Google Sheet. - - :param url: URL of the Google Sheet. - :param credentials: Google credentials object. - :return: List of tab names. - """ - if credentials is None: - credentials = get_credentials() - client = gspread.authorize(credentials) - spreadsheet = client.open_by_url(url) - return [sheet.title for sheet in spreadsheet.worksheets()] - - -# ############################################################################# - - -def _extract_file_id_from_url(url: str) -> str: - """ - Extract the file ID from a Google Docs/Sheets/Drive URL. - - E.g., - https://docs.google.com/spreadsheets/d/FILE_ID/... - https://docs.google.com/document/d/FILE_ID/... - https://drive.google.com/file/d/FILE_ID/... - - :param url: URL of the Google Docs/Sheets/Drive file. - :return: File ID extracted from the URL. - """ - # Handle URLs like: - # https://docs.google.com/spreadsheets/d/FILE_ID/... - # https://docs.google.com/document/d/FILE_ID/... - # https://drive.google.com/file/d/FILE_ID/... - pattern = r"/d/([a-zA-Z0-9-_]+)" - match = re.search(pattern, url) - hdbg.dassert(match, "Invalid URL format: %s", url) - file_id = match.group(1) - _LOG.debug("Extracted file ID: '%s' from URL: '%s'", file_id, url) - return file_id - - -def get_gsheet_tab_url( - url: str, - tab_name: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Generate the full URL for a specific tab in a Google Sheet. - - E.g., - - Input URL: https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI - - Tab name: Sheet3 - - Output: https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI/edit?gid=229426446#gid=229426446 - - :param url: URL of the Google Sheets file. - :param tab_name: Name of the tab to generate the URL for. - :param credentials: Google credentials object. - :return: Full URL with the gid parameter for the specified tab. - """ - if credentials is None: - credentials = get_credentials() - hdbg.dassert(tab_name, "tab_name parameter must be provided") - # Extract the spreadsheet ID from the URL. - sheet_id = _extract_file_id_from_url(url) - _LOG.debug("Extracted sheet_id: '%s' from URL: '%s'", sheet_id, url) - # Get the gid for the specified tab. - gid = _get_gsheet_id(credentials, sheet_id, tab_name=tab_name) - _LOG.debug("Retrieved gid: '%s' for tab: '%s'", gid, tab_name) - # Construct the full URL with the gid parameter. - full_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/edit?gid={gid}#gid={gid}" - _LOG.debug("Generated full URL: '%s'", full_url) - return full_url - - -def _freeze_rows_in_gsheet( - credentials: "goasea.Credentials", - sheet_id: str, - num_rows_to_freeze: int, - *, - tab_name: Optional[str] = None, - bold: bool = True, -) -> None: - """ - Freeze specified rows in the given sheet. - - :param credentials: Google credentials object. - :param sheet_id: ID of the Google Sheet (spreadsheet ID). - :param num_rows_to_freeze: Number of rows to freeze (starting from - row 0). - :param tab_name: Name of the sheet (tab) to freeze rows in. Defaults - to the first tab if not provided. - :param bold: If True, make the frozen rows bold. - """ - hdbg.dassert_lt(0, num_rows_to_freeze) - tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) - sheets_service = get_sheets_service(credentials) - # Build the batch update request. - requests = [] - # Add freeze rows request. - requests.append( - { - "updateSheetProperties": { - "properties": { - "sheetId": tab_id, - "gridProperties": {"frozenRowCount": num_rows_to_freeze}, - }, - "fields": "gridProperties.frozenRowCount", - } - } - ) - # Add bold formatting request if requested. - if bold: - requests.append( - { - "repeatCell": { - "range": { - "sheetId": tab_id, - "startRowIndex": 0, - "endRowIndex": num_rows_to_freeze, - }, - "cell": { - "userEnteredFormat": { - "textFormat": { - "bold": True, - } - } - }, - "fields": "userEnteredFormat.textFormat.bold", - } - } - ) - _LOG.debug( - "Adding bold formatting to %s frozen rows", num_rows_to_freeze - ) - # Execute the batch update. - freeze_request = {"requests": requests} - response = ( - sheets_service.spreadsheets() - .batchUpdate(spreadsheetId=sheet_id, body=freeze_request) - .execute() - ) - _LOG.debug("response: %s", response) - - -def _set_row_height_in_gsheet( - credentials: "goasea.Credentials", - sheet_id: str, - height: int, - *, - start_index: Optional[int] = None, - end_index: Optional[int] = None, - tab_name: Optional[str] = None, -) -> None: - """ - Set the height for rows in the given Google sheet. - - :param credentials: Google credentials object. - :param sheet_id: ID of the Google Sheet (spreadsheet ID). - :param height: Height of the rows in pixels. - :param start_index: Starting index of the rows (zero-based). If - None, applies to all rows. - :param end_index: Ending index of the rows (zero-based). If None, - applies to all rows. - :param tab_name: Name of the sheet (tab) to set row height in. - Defaults to the first tab if not provided. - """ - tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) - sheets_service = get_sheets_service(credentials) - if start_index is None and end_index is None: - sheet_metadata = ( - sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() - ) - sheet_properties = next( - sheet - for sheet in sheet_metadata.get("sheets", []) - if sheet.get("properties", {}).get("sheetId") == tab_id - ).get("properties", {}) - grid_properties = sheet_properties.get("gridProperties", {}) - start_index, end_index = 0, grid_properties.get("rowCount", 1000) - elif start_index is None: - start_index = 0 - elif end_index is None: - sheet_metadata = ( - sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() - ) - sheet_properties = next( - sheet - for sheet in sheet_metadata.get("sheets", []) - if sheet.get("properties", {}).get("sheetId") == tab_id - ).get("properties", {}) - grid_properties = sheet_properties.get("gridProperties", {}) - end_index = grid_properties.get("rowCount", 1000) - elif start_index >= end_index: - raise ValueError( - f"Invalid params: start_index ({start_index}) must be less than end_index ({end_index})." - ) - # Create request. - set_row_height_request = { - "requests": [ - { - "updateDimensionProperties": { - "range": { - "sheetId": tab_id, - "dimension": "ROWS", - "startIndex": start_index, - "endIndex": end_index, - }, - "properties": {"pixelSize": height}, - "fields": "pixelSize", - } - } - ] - } - # Get response. - response = ( - sheets_service.spreadsheets() - .batchUpdate(spreadsheetId=sheet_id, body=set_row_height_request) - .execute() - ) - _LOG.debug("response: %s", response) - - -def _set_text_wrapping_clip_in_gsheet( - credentials: "goasea.Credentials", - sheet_id: str, - *, - tab_name: Optional[str] = None, -) -> None: - """ - Set text wrapping to "CLIP" for all columns in the given Google sheet. - - :param credentials: Google credentials object. - :param sheet_id: ID of the Google Sheet (spreadsheet ID). - :param tab_name: Name of the sheet (tab) to set text wrapping in. - Defaults to the first tab if not provided. - """ - tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) - sheets_service = get_sheets_service(credentials) - # Get sheet metadata to determine the range. - sheet_metadata = ( - sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() - ) - sheet_properties = next( - sheet - for sheet in sheet_metadata.get("sheets", []) - if sheet.get("properties", {}).get("sheetId") == tab_id - ).get("properties", {}) - grid_properties = sheet_properties.get("gridProperties", {}) - row_count = grid_properties.get("rowCount", 1000) - col_count = grid_properties.get("columnCount", 26) - _LOG.debug( - "Setting text wrapping to CLIP for sheet with %s rows and %s columns", - row_count, - col_count, - ) - # Create request to set text wrapping to CLIP. - set_wrapping_request = { - "requests": [ - { - "repeatCell": { - "range": { - "sheetId": tab_id, - "startRowIndex": 0, - "endRowIndex": row_count, - "startColumnIndex": 0, - "endColumnIndex": col_count, - }, - "cell": { - "userEnteredFormat": { - "wrapStrategy": "CLIP", - } - }, - "fields": "userEnteredFormat.wrapStrategy", - } - } - ] - } - # Execute the batch update. - response = ( - sheets_service.spreadsheets() - .batchUpdate(spreadsheetId=sheet_id, body=set_wrapping_request) - .execute() - ) - _LOG.debug("response: %s", response) - - -def from_gsheet( - url: str, - *, - tab_name: Optional[str] = None, - credentials: Optional["goasea.Credentials"] = None, -) -> pd.DataFrame: - """ - Read data from a Google Sheet. - - :param url: URL of the Google Sheets file. - :param tab_name: Name of the tab to read (default: first sheet if - not specified). - :param credentials: Google credentials object. - :return: pandas DataFrame with the sheet data. - """ - if credentials is None: - credentials = get_credentials() - client = gspread.authorize(credentials) - spreadsheet = client.open_by_url(url) - if tab_name is None: - # Read the first sheet. - worksheet = spreadsheet.get_worksheet(0) - else: - # Read the specified sheet. - worksheet = spreadsheet.worksheet(tab_name) - data = worksheet.get_all_records() - hdbg.dassert(data, "The sheet '%s' is empty", tab_name) - df = pd.DataFrame(data) - _LOG.debug("Data fetched") - return df - - -def to_gsheet( - df: pd.DataFrame, - url: str, - *, - tab_name: Optional[str] = "new_data", - freeze_rows: bool = False, - set_text_wrapping_clip: bool = False, - credentials: Optional["goasea.Credentials"] = None, -) -> None: - """ - Write data to a specified Google Sheet and tab. - - :param df: Data to be written. - :param url: URL of the Google Sheet. - :param tab_name: Name of the tab where the data will be written. - :param freeze_rows: If True, freeze the header row. - :param set_text_wrapping_clip: If True, set text wrapping to CLIP. - :param credentials: Google credentials object. - """ - if credentials is None: - credentials = get_credentials() - client = gspread.authorize(credentials) - spreadsheet = client.open_by_url(url) - # Try to get existing worksheet or create new one. - try: - worksheet = spreadsheet.worksheet(tab_name) - except gspread.exceptions.WorksheetNotFound: - _LOG.debug( - "Tab '%s' not found, creating a new tab with that name", - tab_name, - ) - worksheet = spreadsheet.add_worksheet( - title=tab_name, rows="100", cols="20" - ) - # - if freeze_rows: - _freeze_rows_in_gsheet( - credentials, - spreadsheet.id, - num_rows_to_freeze=1, - tab_name=tab_name, - ) - # - _set_row_height_in_gsheet( - credentials, - spreadsheet.id, - height=20, - tab_name=tab_name, - ) - # Clear and write data. - worksheet.clear() - # Replace NaN/inf values with empty strings for JSON compatibility. - df_clean = df.fillna("").replace([float("inf"), float("-inf")], "") - values = [df_clean.columns.values.tolist()] + df_clean.values.tolist() - worksheet.update("A1", values) - # - if set_text_wrapping_clip: - _set_text_wrapping_clip_in_gsheet( - credentials, - spreadsheet.id, - tab_name=tab_name, - ) - _LOG.info("Data written to:\ntab '%s'\nGoogle Sheet '%s'", tab_name, url) - _LOG.info( - "url=%s", get_gsheet_tab_url(url, tab_name, credentials=credentials) - ) - - -# ############################################################################# -# Google file API -# ############################################################################# - - -def _get_gdrive_service(credentials: "goasea.Credentials") -> "godisc.Resource": - """ - Get Google Drive service with provided credentials. - - :param credentials: Google credentials object. - :return: Google Drive service instance. - """ - # Ensure credentials are provided. - hdbg.dassert(credentials, "The 'credentials' parameter must be provided") - # Build the drive service. - gdrive_service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - return gdrive_service - - -def _create_new_google_document( - credentials: "goasea.Credentials", - doc_name: str, - doc_type: str, -) -> str: - """ - Create a new Google document (Sheet or Doc). - - :param credentials: Google credentials object. - :param doc_name: The name of the new Google document. - :param doc_type: The type of the Google document ('sheets' or - 'docs'). - :return: doc_id. The ID of the created document in Google Drive. - """ - if doc_type not in ["sheets", "docs"]: - raise ValueError("Invalid doc_type. Must be 'sheets' or 'docs'.") - # Build the service for the respective document type. - service = godisc.build( - doc_type, - "v4" if doc_type == "sheets" else "v1", - credentials=credentials, - cache_discovery=False, - ) - # Create the document with the specified name. - document = {"properties": {"title": doc_name}} - create_method = ( - service.spreadsheets().create - if doc_type == "sheets" - else service.documents().create - ) - response = create_method( - body=document, - fields="spreadsheetId" if doc_type == "sheets" else "documentId", - ).execute() - # Extract the document ID. - doc_id = response.get( - "spreadsheetId" if doc_type == "sheets" else "documentId" - ) - return doc_id - - -def move_gfile_to_dir( - gfile_id: str, - folder_id: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> dict: - """ - Move a Google file to a specified folder in Google Drive. - - :param gfile_id: The ID of the Google file. - :param folder_id: The ID of the folder. - :param credentials: Google credentials object. - :return: The response from the API after moving the file. - """ - if credentials is None: - credentials = get_credentials() - service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - res = ( - service.files() - .update( - fileId=gfile_id, - body={}, - addParents=folder_id, - removeParents="root", - supportsAllDrives=True, - ) - .execute() - ) - return res - - -def share_google_file( - gfile_id: str, - user: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> None: - """ - Share a Google file with a user. - - :param gfile_id: The ID of the Google file. - :param user: The email address of the user. - :param credentials: Google credentials object. - """ - if credentials is None: - credentials = get_credentials() - # Build the Google Drive service using the provided credentials. - # TODO(gp): -> get_gdrive_service - service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - # Create the permission. - parameters = {"role": "reader", "type": "user", "emailAddress": user} - new_permission = ( - service.permissions().create(fileId=gfile_id, body=parameters).execute() - ) - _LOG.debug( - "The new permission ID of the document is: '%s'", - new_permission.get("id"), - ) - _LOG.debug("The Google file is shared with '%s'", user) - - -def create_empty_google_file( - gfile_type: str, - gfile_name: str, - gdrive_folder_id: str, - *, - user: Optional[str] = None, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Create a new Google file (sheet or doc) and move it to a specified folder. - - :param gfile_type: the type of the Google file ('sheet' or 'doc'). - :param gfile_name: the name of the new Google file. - :param gdrive_folder_id: the ID of the Google Drive folder. - :param user: the email address of the user to share the Google file. - :param credentials: Google credentials object for API access. - :return: the ID of the created Google file, or None if an error - occurred. - """ - if credentials is None: - credentials = get_credentials() - # Create the new Google file (either Sheet or Doc). - if gfile_type == "sheet": - gfile_id = _create_new_google_document( - credentials, - doc_name=gfile_name, - doc_type="sheets", - ) - elif gfile_type == "doc": - gfile_id = _create_new_google_document( - credentials, - doc_name=gfile_name, - doc_type="docs", - ) - else: - raise ValueError(f"Invalid gfile_type={gfile_type}") - _LOG.debug("Created a new Google %s '%s'", gfile_type, gfile_name) - # Move the Google file to the specified folder. - if gdrive_folder_id: - move_gfile_to_dir(gfile_id, gdrive_folder_id, credentials=credentials) - # Share the Google file to the user and send an email. - if user: - share_google_file(gfile_id, user, credentials=credentials) - _LOG.debug( - "The new Google '%s': '%s' is shared with '%s'", - gfile_type, - gfile_name, - user, - ) - # Return the file ID. - return gfile_id - - -def create_or_overwrite_with_timestamp( - file_name: str, - folder_id: str, - *, - file_type: str = "sheets", - overwrite: bool = False, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Create or overwrite a Google Sheet or Google Doc with a timestamp in a - specific Google Drive folder. - - :param file_name: Name for the file (timestamp will be added). - :param folder_id: Google Drive folder ID where the file will be - created or updated. - :param file_type: Type of file to create ('sheets' or 'docs'). - :param overwrite: If True, overwrite an existing file. Otherwise, - create a new file. - :param credentials: Google credentials object. - :return: The ID of the created or overwritten file. - """ - if credentials is None: - credentials = get_credentials() - # Authenticate with Google APIs using the provided credentials. - # TODO(gp): -> get_gdrive_service - drive_service = godisc.build("drive", "v3", credentials=credentials) - if file_type == "sheets": - mime_type = "application/vnd.google-apps.spreadsheet" - elif file_type == "docs": - mime_type = "application/vnd.google-apps.document" - else: - raise ValueError("Invalid file_type. Must be 'sheets' or 'docs'.") - query = ( - f"'{folder_id}' in parents and mimeType = '{mime_type}'" - f" and name contains '{file_name}'" - ) - response = ( - drive_service.files() - .list( - q=query, - fields="files(id, name)", - includeItemsFromAllDrives=True, - supportsAllDrives=True, - ) - .execute() - ) - files = response.get("files", []) - # Check if overwriting or creating new file. - if files and overwrite: - file_id = files[0]["id"] - _LOG.debug("Overwriting existing file '%s'", files[0]["name"]) - else: - # Create new file with timestamp. - timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - new_file_name = f"{file_name}_{timestamp}" - file_metadata = { - "name": new_file_name, - "mimeType": mime_type, - "parents": [folder_id], - } - file = ( - drive_service.files() - .create(body=file_metadata, fields="id", supportsAllDrives=True) - .execute() - ) - file_id = file.get("id") - _LOG.debug( - "New file '%s' created successfully in folder '%s'", - new_file_name, - folder_id, - ) - return file_id - - -# ############################################################################# -# Google folder API -# ############################################################################# - - -def create_google_drive_folder( - folder_name: str, - parent_folder_id: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Create a new Google Drive folder inside the given folder. - - :param folder_name: the name of the new Google Drive folder. - :param parent_folder_id: the ID of the parent folder. - :param credentials: Google credentials object. - :return: the ID of the created Google Drive folder. - """ - if credentials is None: - credentials = get_credentials() - # Build the Google Drive service using the provided credentials. - # TODO(gp): -> get_gdrive_service - service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - # Define the metadata for the new folder. - file_metadata = { - "name": folder_name, - "mimeType": "application/vnd.google-apps.folder", - "parents": [parent_folder_id], - } - # Create the folder in Google Drive. - folder = service.files().create(body=file_metadata, fields="id").execute() - # Log and return the folder ID. - _LOG.debug("Created a new Google Drive folder '%s'", folder_name) - _LOG.debug("The new folder id is '%s'", folder.get("id")) - return folder.get("id") - - -def _get_folders_in_gdrive(*, credentials: "goasea.Credentials") -> list: - """ - Get a list of folders in Google Drive. - - :param credentials: Google credentials object. - :return: A list of folders (each containing an ID and name). - """ - # Build the Google Drive service using the provided credentials. - # TODO(gp): -> get_gdrive_service - service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - # Make the API request to list folders. - response = ( - service.files() - .list( - q="mimeType='application/vnd.google-apps.folder' and trashed=false", - spaces="drive", - fields="nextPageToken, files(id, name)", - ) - .execute() - ) - # Return the list of folders (id and name). - return response.get("files", []) - - -def get_folder_id_by_name( - credentials: "goasea.Credentials", - name: str, -) -> dict: - """ - Get the folder id by the folder name. - - :param credentials: Google credentials object. - :param name: The name of the folder. - :return: Dictionary with folder id and name. - """ - folders = _get_folders_in_gdrive(credentials=credentials) - folder_list = [] - # Find all folders matching the name. - for folder in folders: - if folder.get("name") == name: - folder_list.append(folder) - if len(folder_list) == 1: - _LOG.debug("Found folder: %s", folder_list[0]) - elif len(folder_list) > 1: - for folder in folder_list: - _LOG.debug( - "Found folder: '%s', '%s'", - folder.get("name"), - folder.get("id"), - ) - _LOG.debug( - "Return the first found folder. '%s' '%s' ", - folder_list[0].get("name"), - folder_list[0].get("id"), - ) - _LOG.debug( - "if you want to use another '%s' folder, " - "please change the folder id manually.", - name, - ) - else: - raise ValueError(f"Can't find the folder '{name}'.") - return folder_list[0] - - -def _get_folder_path_list( - service: "godisc.Resource", - file_id: str, -) -> List[str]: - """ - Get the full folder path as a list of folder names. - - :param service: Google Drive service instance. - :param file_id: The ID of the file. - :return: List of folder names from root to immediate parent folder. - Returns empty list if file is at root level. - """ - # Get file metadata with parents. - file_metadata = ( - service.files() - .get( - fileId=file_id, - fields="parents", - supportsAllDrives=True, - ) - .execute() - ) - parents = file_metadata.get("parents", []) - # If no parents, file is at root level. - if not parents: - _LOG.debug("File is at root level") - return [] - # Build the path by traversing up the folder hierarchy. - path_list = [] - current_id = parents[0] # Files typically have one parent in Google Drive. - while current_id: - folder_metadata = ( - service.files() - .get( - fileId=current_id, - fields="name,parents", - supportsAllDrives=True, - ) - .execute() - ) - folder_name = folder_metadata.get("name") - path_list.insert(0, folder_name) - parents = folder_metadata.get("parents", []) - current_id = parents[0] if parents else None - _LOG.debug("Folder path: %s", path_list) - return path_list - - -def get_google_path_from_url( - url: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> List[str]: - """ - Get the full folder path from a Google Docs/Sheets/Drive URL. - - E.g., https://docs.google.com/spreadsheets/d/1GnnmtGTrHDwMP77VylEK0bSF_RLUV5BWf1iGmxuBQpI - -> ['My Drive', 'Folder1', 'Folder2'] - - :param url: URL of the Google Docs/Sheets/Drive file. - :param credentials: Google credentials object. - :return: List of folder names from root to immediate parent folder. - Returns empty list if file is at root level. - """ - if credentials is None: - credentials = get_credentials() - # Extract file ID from URL. - file_id = _extract_file_id_from_url(url) - # Get Google Drive service. - service = _get_gdrive_service(credentials) - # Get folder path as list. - path_list = _get_folder_path_list(service, file_id) - _LOG.debug("Retrieved folder path for URL '%s': %s", url, path_list) - return path_list - - -def print_info_about_google_url( - url: str, - *, - tab_name: Optional[str] = None, - credentials: Optional["goasea.Credentials"] = None, -) -> None: - """ - Print information about a Google Sheet URL. - - :param url: URL of the Google Sheets file. - :param tab_name: Optional tab name to display full URL for. - :param credentials: Google credentials object. - """ - if credentials is None: - credentials = get_credentials() - print("url: '%s'" % url) - print("file name: '%s'" % get_gsheet_name(url, credentials=credentials)) - print("tab names: '%s'" % get_tabs_from_gsheet(url, credentials=credentials)) - if tab_name is not None: - print( - "full url: '%s'" - % get_gsheet_tab_url(url, tab_name, credentials=credentials) - ) - print( - "folder path: '%s'" - % "/".join(get_google_path_from_url(url, credentials=credentials)) - ) - - -# TODO(gp): Add clean up -# TODO(gp): Make url mandatory and when url = "tmp" use the hardcored value. -# TODO(gp): -> save_df_to_gsheet -def save_df_to_tmp_gsheet( - df: pd.DataFrame, - *, - url: str = "", - tab_name: str = "", - remove_empty_columns: bool = False, - remove_stable_columns: bool = False, - verbose: bool = True, - credentials: Optional["goasea.Credentials"] = None, -) -> None: - """ - Save a DataFrame to a Google Sheet. - - :param df: The DataFrame to save. - :param url: URL of the Google Sheet (empty means default temp - sheet). - :param tab_name: The name of the tab to save the DataFrame to. - :param remove_empty_columns: Whether to remove empty columns. - :param remove_stable_columns: Whether to remove stable columns. - :param verbose: Whether to print verbose output. - :param credentials: Google credentials object. - """ - if credentials is None: - credentials = get_credentials() - if remove_stable_columns: - df = hpandas.remove_stable_columns(df, verbose=verbose) - if remove_empty_columns: - df = hpandas.remove_empty_columns(df, verbose=verbose) - if url == "": - url = "https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI/edit?gid=0#gid=0" - if tab_name == "": - # Find the first tab name that is not empty. - tab_names = get_tabs_from_gsheet(url, credentials=credentials) - for i in range(0, 100): - tab_name = "Sheet" + str(i) - if tab_name not in tab_names: - break - hdbg.dassert_ne(tab_name, "No empty tab name found") - to_gsheet( - df, - url, - tab_name=tab_name, - freeze_rows=True, - set_text_wrapping_clip=True, - credentials=credentials, - ) - - -def _get_gsheet_to_df(url: str, tab_name: Optional[str]) -> pd.DataFrame: - credentials = get_credentials() - file_name = get_gsheet_name(url, credentials=credentials) - _LOG.info( - "Reading data:\n url='%s'\n file_name='%s'\n tab_name='%s'" - % (url, file_name, tab_name) - ) - df = from_gsheet(url, tab_name=tab_name, credentials=credentials) - return df - - -get_cached_gsheet_to_df = hcacsimp.simple_cache( - cache_type="pickle", write_through=True -)(_get_gsheet_to_df) - - -# TODO(gp): This is redundant with disable cache. -# TODO(gp): Create a function to normalize the column names. -def get_gsheet_to_df( - url: str, - tab_name: Optional[str], - *, - remove_spaces_in_cols: bool = True, - force_no_cache: bool = False, -) -> pd.DataFrame: - """ - Get a Google Sheet as a DataFrame with optional caching. - - :param url: The URL of the Google Sheet. - :param tab_name: The name of the tab to read - - `None` means the first sheet - :param remove_spaces_in_cols: Whether to remove spaces in the column names. - :param force_no_cache: Whether to bypass the cache and fetch fresh data. - :return: DataFrame containing the sheet data. - """ - if force_no_cache: - df = get_gsheet_to_df(url, tab_name) - else: - df = get_cached_gsheet_to_df(url, tab_name) - if remove_spaces_in_cols: - df.columns = df.columns.str.replace(" ", "") - return df - - -def read_all_gsheets( - url: str, *, tab_names: Union[str, List[str]], concat: bool = False -) -> Union[pd.DataFrame, List[pd.DataFrame]]: - """ - Read all the sheets from a Google Sheet. - - :param url: The URL of the Google Sheet. - :param tab_names: The names of the sheets to read. - :param concat: Whether to concatenate the DataFrames. - :return: A list of DataFrames, one for each sheet. - """ - dfs = [] - # TODO(ai_gp): -> _all_ - if tab_names == "all": - tab_names = get_tabs_from_gsheet(url) - for tab_name in tab_names: - df = get_cached_gsheet_to_df(url, tab_name) - dfs.append(df) - if len(dfs) > 1 and concat: - # Assert if the columns are the same. - for df in dfs[1:]: - hdbg.dassert_eq(df.columns, dfs[0].columns) - # Concatenate the DataFrames. - df = pd.concat(dfs) - df.reset_index(drop=True, inplace=True) - return df - return dfs diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py deleted file mode 100644 index fdc7ed66c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py +++ /dev/null @@ -1,284 +0,0 @@ -""" -Import as: - -import helpers.hintrospection as hintros -""" - -import collections.abc as cabc -import importlib -import inspect -import logging -import pickle -import re -import sys -import types -from typing import Any, Callable, List, Optional, cast - -import helpers.hdbg as hdbg - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - -_LOG = logging.getLogger(__name__) - - -# Copied from `hstring` to avoid import cycles. - - -def remove_prefix(string: str, prefix: str, assert_on_error: bool = True) -> str: - if string.startswith(prefix): - res = string[len(prefix) :] - else: - if assert_on_error: - raise RuntimeError( - f"string='{string}' doesn't start with prefix ='{prefix}'" - ) - return res - - -# End copy. - -# TODO(gp): object -> Any? - - -# ############################################################################# -# Function introspection -# ############################################################################# - - -def get_function_name(count: int = 0) -> str: - """ - Return the name of the function calling this function. - """ - ptr = inspect.currentframe() - # count=0 corresponds to the calling function, so we need to add an extra - # step walking the call stack. - count += 1 - for _ in range(count): - hdbg.dassert_is_not(ptr, None) - ptr = ptr.f_back # type: ignore - func_name = ptr.f_code.co_name # type: ignore - return func_name - - -def get_name_from_function(func: Callable) -> str: - """ - Return the name of the passed function. - - E.g., amp.helpers.test.test_hintrospection.test_function - """ - func_name = func.__name__ - # - module = inspect.getmodule(func) - hdbg.dassert_is_not( - module, None, f"Could not get module for function {func}" - ) - assert module is not None - module_name = module.__name__ - # Remove `app.` if needed from the module name, e.g., - # `app.amp.helpers.test.test_hintrospection`. - prefix = "app." - if module_name.startswith(prefix): - module_name = remove_prefix(module_name, prefix) - return f"{module_name}.{func_name}" - - -def get_function_from_string(func_as_str: str) -> Callable: - """ - Return the function from its name including the import. - - E.g., `import im.scripts.AmpTask317_transform_pq_by_date_to_by_asset` - """ - # Split txt in an import and function name. - m = re.match(r"^(\S+)\.(\S+)$", func_as_str) - hdbg.dassert(m, "txt='%s'", func_as_str) - m = cast(re.Match, m) - import_, function = m.groups() - _LOG.debug("import=%s", import_) - _LOG.debug("function=%s", function) - # Import the needed module. - imp = importlib.import_module(import_) - # Force the linter not to remove this import which is needed in the following - # eval. - _ = imp - python_code = f"imp.{function}" - func: Callable = eval(python_code) - _LOG.debug("%s -> func=%s", func_as_str, func) - return func - - -def get_methods(obj: Any, access: str = "all") -> List[str]: - """ - Return list of names corresponding to class methods of an object `obj`. - - :param obj: class or class object - :param access: allows to select private, public or all methods of - the object. - """ - methods = [method for method in dir(obj) if callable(getattr(obj, method))] - if access == "all": - pass - elif access == "private": - methods = [method for method in methods if method.startswith("_")] - elif access == "public": - methods = [method for method in methods if not method.startswith("_")] - else: - raise ValueError(f"Invalid access='{access}'") - return methods - - -# ############################################################################# - - -def is_iterable(obj: object) -> bool: - """ - Return whether obj can be iterated upon or not. - - Note that a string is iterable in Python, but typically we refer to - iterables as lists, tuples, so we exclude strings. - """ - # From https://stackoverflow.com/questions/1952464 - return not isinstance(obj, str) and isinstance(obj, cabc.Iterable) - - -# From https://stackoverflow.com/questions/53225 -def is_bound_to_object(method: object) -> bool: - """ - Return whether a method is bound to an object. - """ - _LOG.debug("method=%s", method) - if not hasattr(method, "__self__"): - _LOG.debug("hasattr(im_self)=False") - val = False - else: - # val = method.im_self is not None - val = True - return val - - -# From https://stackoverflow.com/questions/23852423 -def is_lambda_function(method: object) -> bool: - _LOG.debug("type(method)=%s", str(type(method))) - return isinstance(method, types.LambdaType) and method.__name__ == "" - - -def is_pickleable(obj: object, *, mode: str = "try_and_catch") -> bool: - """ - Return if an object is a bound method. - - :param obj: object to process - :param mode: approach to detect non-pikleable objects - - "type_search": detect non-pickleable objects by type, e.g., lambda - functions are not Pickleable - - "try_and_catch": try to pickle an object directly, if it fails, - an object is non-pickleable then - """ - _LOG.debug("obj=%s", obj) - if mode == "type_search": - _LOG.debug("callable=%s", callable(obj)) - if not callable(obj): - return True - # - is_bound = is_bound_to_object(obj) - _LOG.debug("is_bound=%s", is_bound) - if is_bound: - return False - # - is_lambda = is_lambda_function(obj) - _LOG.debug("is_lambda=%s", is_lambda) - if is_lambda: - return False - return True - elif mode == "try_and_catch": - try: - _ = pickle.dumps(obj) - return True - # `AttributeError` is raised when obj is a class with lambda param - # values, and `TypeError`is raised when the class has DB connection - # object as value. - except (AttributeError, TypeError) as e: - _LOG.debug("Cannot pickle object=%s, the error is %s", obj, str(e)) - return False - else: - raise ValueError(f"Invalid mode='{mode}'") - - -# ############################################################################# -# Object size -# ############################################################################# - - -# https://code.activestate.com/recipes/577504/ -# https://stackoverflow.com/questions/449560/how-do-i-determine-the-size-of-an-object-in-python - - -def get_size_in_bytes(obj: object, seen: Optional[set] = None) -> int: - """ - Recursively find size of an object `obj` in bytes. - """ - # From https://github.com/bosswissam/pysize - # getsizeof() returns the size in bytes. - size = sys.getsizeof(obj) - if seen is None: - seen = set() - obj_id = id(obj) - if obj_id in seen: - return 0 - # Mark as seen *before* entering recursion to gracefully handle - # self-referential objects. - seen.add(obj_id) - if hasattr(obj, "__dict__"): - for cls in obj.__class__.__mro__: - if "__dict__" in cls.__dict__: - d = cls.__dict__["__dict__"] - if inspect.isgetsetdescriptor(d) or inspect.ismemberdescriptor( - d - ): - size += get_size_in_bytes(obj.__dict__, seen) - break - if isinstance(obj, dict): - size += sum((get_size_in_bytes(v, seen) for v in obj.values())) - size += sum((get_size_in_bytes(k, seen) for k in obj.keys())) - elif isinstance(obj, cabc.Iterable) and not isinstance( - obj, (str, bytes, bytearray) - ): - size += sum((get_size_in_bytes(i, seen) for i in obj)) - if hasattr(obj, "__slots__"): # can have __slots__ with __dict__ - slots = getattr(obj, "__slots__", None) - if slots is not None: - size += sum( - get_size_in_bytes(getattr(obj, s), seen) - for s in slots - if hasattr(obj, s) - ) - return size - - -# TODO(gp): -> move to helpers/hprint.py -def format_size(num: float) -> str: - """ - Return a human-readable string for a filesize (e.g., "3.5 MB"). - """ - # From http://stackoverflow.com/questions/1094841 - for x in ["b", "KB", "MB", "GB", "TB"]: - if num < 1024.0: - return f"%3.1f {x}" % num - num /= 1024.0 - assert 0, f"Invalid num='{num}'" - - -# ############################################################################# -# Stacktrace -# ############################################################################# - - -def stacktrace_to_str() -> str: - """ - Print the stack trace. - """ - import traceback - - txt = traceback.format_stack() - txt = "".join(txt) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py deleted file mode 100644 index bc2f71ab7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py +++ /dev/null @@ -1,1046 +0,0 @@ -""" -Functions to handle filesystem operations. - -Import as: - -import helpers.hio as hio -""" - -import datetime -import gzip -import json -import logging -import os -import re -import shlex -import shutil -import time -import uuid -from typing import Any, Dict, List, Optional, Union - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - -# Set logging level of this file. -_LOG.setLevel(logging.INFO) - -# ############################################################################# -# Glob. -# ############################################################################# - - -def purify_file_name(file_name: str) -> str: - """ - Remove non-Linux friendly characters from the basename. - """ - basename = os.path.basename(file_name) - for char in (" ", "_", "'", '"', "`", "/"): - basename = basename.replace(char, "_") - # - dir_name = os.path.dirname(file_name) - file_name_out = os.path.join(dir_name, basename) - file_name_out: str = os.path.normpath(file_name_out) - return file_name_out - - -def listdir( - dir_name: str, - pattern: str, - only_files: bool, - use_relative_paths: bool, - *, - exclude_git_dirs: bool = True, - maxdepth: Optional[int] = None, -) -> List[str]: - """ - Find all files and subdirectories under `directory` that match `pattern`. - - :param dir_name: path to the directory where to look for files - :param pattern: pattern to match a filename against (e.g., `*.py`) - :param only_files: look for only files instead of both files and directories - :param use_relative_paths: remove `dir_name` from path - :param exclude_git_dirs: skip `.git` dirs - :param maxdepth: limit the depth of directory traversal - """ - hdbg.dassert_dir_exists(dir_name) - # Escape the directory path. - dir_name = shlex.quote(dir_name) - cmd = [f"find {dir_name}", f'-name "{pattern}"'] - if maxdepth is not None: - cmd.append(f'-maxdepth "{maxdepth}"') - if only_files: - cmd.append("-type f") - if exclude_git_dirs: - cmd.append(r'-not -path "*/\.git/*"') - cmd = " ".join(cmd) - _, output = hsystem.system_to_string(cmd) - # TODO(gp): -> system_to_files - paths = [path for path in output.split("\n") if path != ""] - _LOG.debug("Found %s paths in %s", len(paths), dir_name) - _LOG.debug("\n".join(paths)) - if use_relative_paths: - paths = [os.path.relpath(path, start=dir_name) for path in paths] - return paths - - -def is_valid_filename_extension(ext: str) -> bool: - """ - By convention extensions don't include the initial `.`. - - E.g., "tgz" is valid, but not ".tgz". - """ - valid = not ext.startswith(".") - return valid - - -def change_filename_extension(filename: str, old_ext: str, new_ext: str) -> str: - """ - Change extension of a filename (e.g. "data.csv" to "data.json"). - - :param filename: the old filename (including extension) - :param old_ext: the extension of the old filename (e.g., "csv") - - If empty, it is extracted from the filename - :param new_ext: the extension to replace the old extension (e.g., "json") - :return: a filename with the new extension - """ - # If the old extension is empty, extract it from the filename. - if old_ext == "": - _, old_ext = os.path.splitext(filename) - # Remove the leading dot. - old_ext = old_ext.lstrip(".") - hdbg.dassert( - is_valid_filename_extension(old_ext), "Invalid extension '%s'", old_ext - ) - hdbg.dassert( - is_valid_filename_extension(new_ext), "Invalid extension '%s'", new_ext - ) - hdbg.dassert( - filename.endswith(old_ext), - "Extension '%s' doesn't match file '%s'", - old_ext, - filename, - ) - # Remove the old extension. - len_ext = len(old_ext) - new_filename = filename[:-len_ext] - hdbg.dassert(new_filename.endswith("."), "new_filename='%s'", new_filename) - # Add the new extension. - new_filename += new_ext - return new_filename - - -def is_paired_jupytext_python_file(py_filename: str) -> bool: - """ - Return if a Python file has a paired Jupyter notebook. - """ - hdbg.dassert( - py_filename.endswith("py"), "Invalid python filename='%s'", py_filename - ) - hdbg.dassert_file_exists(py_filename) - # Check if a corresponding ipynb file exists. - ipynb_filename = change_filename_extension(py_filename, "py", "ipynb") - is_paired = os.path.exists(ipynb_filename) - _LOG.debug( - "Checking ipynb file='%s' for py file='%s': is_paired=%s", - py_filename, - ipynb_filename, - is_paired, - ) - return is_paired - - -def keep_python_files( - file_names: List[str], exclude_paired_jupytext: bool -) -> List[str]: - """ - Return a list with all Python file names (i.e., with the `py` extension). - - :param exclude_paired_jupytext: exclude Python file that are associated to - notebooks (i.e., that have a corresponding `.ipynb` file) - """ - hdbg.dassert_isinstance(file_names, list) - # Check all the files. - py_file_names = [] - for file_name in file_names: - if file_name.endswith(".py"): - if exclude_paired_jupytext: - # Include only the non-paired Python files. - is_paired = is_paired_jupytext_python_file(file_name) - add = not is_paired - else: - # Include all the Python files. - add = True - else: - add = False - _LOG.debug("file_name='%s' -> add='%s'", file_name, add) - if add: - py_file_names.append(file_name) - _LOG.debug("Found %s python files", len(py_file_names)) - return py_file_names - - -def delete_file(file_name: str) -> None: - _LOG.debug("Deleting file '%s'", file_name) - # hs3.dassert_is_not_s3_path(file_name) - if not os.path.exists(file_name) or file_name == "/dev/null": - # Nothing to delete. - return - try: - os.unlink(file_name) - except OSError as e: - # It can happen that we try to delete the file, while somebody already - # deleted it, so we neutralize the corresponding exception. - if e.errno == 2: - # OSError: [Errno 2] No such file or directory. - pass - else: - raise e - - -def _create_dir( - dir_name: str, - incremental: bool, - abort_if_exists: bool = False, - ask_to_delete: bool = False, -) -> None: - """ - Create a directory `dir_name` if it doesn't exist. - - Same interface as `create_dir()` but without handling - `backup_dir_if_exists`. - """ - _LOG.debug( - hprint.to_str("dir_name incremental abort_if_exists ask_to_delete") - ) - hdbg.dassert_is_not(dir_name, None) - dir_name = os.path.normpath(dir_name) - if os.path.normpath(dir_name) == ".": - _LOG.debug("Can't create dir '%s'", dir_name) - exists = os.path.exists(dir_name) - is_dir = os.path.isdir(dir_name) - _LOG.debug(hprint.to_str("dir_name exists is_dir")) - if abort_if_exists: - hdbg.dassert_path_not_exists(dir_name) - # dir exists / dir does not exist - # incremental no-op mkdir - # not incremental rm+mkdir mkdir - if exists: - if incremental and is_dir: - # The dir exists and we want to keep it (i.e., incremental), so we - # are done. - # os.chmod(dir_name, 0755) - _LOG.debug( - "The dir '%s' exists and incremental=True: exiting", dir_name - ) - return - if ask_to_delete: - hsystem.query_yes_no( - f"Do you really want to delete dir '{dir_name}'?", - abort_on_no=True, - ) - # The dir exists and we want to create it from scratch (i.e., not - # incremental), so we need to delete the dir. - _LOG.debug("Deleting dir '%s'", dir_name) - if os.path.islink(dir_name): - delete_file(dir_name) - else: - hdbg.dassert_ne(os.path.normpath(dir_name), ".") - shutil.rmtree(dir_name) - _LOG.debug("Creating directory '%s'", dir_name) - # NOTE: `os.makedirs` raises `OSError` if the target directory already exists. - # A race condition can happen when another process creates our target - # directory, while we have just found that it doesn't exist, so we need to - # handle this situation gracefully. - try: - os.makedirs(dir_name) - except OSError as e: - _LOG.error(str(e)) - # It can happen that we try to create the directory while somebody else - # created it, so we neutralize the corresponding exception. - if e.errno == 17: - # OSError: [Errno 17] File exists. - pass - else: - raise e - - -def create_dir( - dir_name: str, - incremental: bool, - *, - abort_if_exists: bool = False, - ask_to_delete: bool = False, - backup_dir_if_exists: bool = False, -) -> None: - """ - Create a directory. - - :param incremental: if False then the directory is deleted and re- - created, otherwise the same directory is reused as it is - :param abort_if_exists: abort if the target directory already exists - :param ask_to_delete: if it is not incremental and the dir exists, - asks before deleting. This option is used when we want to start - with a clean dir (i.e., incremental=False) but, at the same - time, we want to make sure that the user doesn't want to delete - the content of the dir. Another approach is to automatically - rename the old dir with backup_dir_if_exists. - :param backup_dir_if_exists: if the target dir already exists, then - rename it using a timestamp (e.g., dir_20231003_080000) and - create a new target dir - """ - if backup_dir_if_exists: - if not os.path.exists(dir_name): - # Create new dir. - _LOG.debug("Creating dir '%s'", dir_name) - _create_dir(dir_name, incremental=True) - else: - _LOG.debug("Dir '%s' already exists", dir_name) - # Get dir timestamp. - dir_timestamp = os.path.getmtime(dir_name) - dir_datetime = datetime.datetime.fromtimestamp(dir_timestamp) - # Build new dir name with timestamp. - dir_name_new = ( - dir_name + "." + dir_datetime.strftime("%Y%m%d_%H%M%S") - ) - # Rename dir. - if not os.path.exists(dir_name_new): - _LOG.warning("Renaming dir '%s' -> '%s'", dir_name, dir_name_new) - os.rename(dir_name, dir_name_new) - else: - _LOG.warning("Dir '%s' already exists", dir_name_new) - # Create new dir. - _LOG.debug("Creating dir '%s'", dir_name) - _create_dir(dir_name, incremental=True) - else: - _create_dir( - dir_name, - incremental, - abort_if_exists=abort_if_exists, - ask_to_delete=ask_to_delete, - ) - - -# ############################################################################# -# Filesystem. -# ############################################################################# - - -def create_soft_link(src: str, dst: str) -> None: - """ - Create a soft-link to called (where and are files - or directories as in a Linux ln command). - - This is equivalent to a command like "cp " but creating a - soft link. - """ - _LOG.debug("# CreateSoftLink") - # hs3.dassert_is_not_s3_path(src) - # hs3.dassert_is_not_s3_path(dst) - # Create the enclosing directory, if needed. - enclosing_dir = os.path.dirname(dst) - _LOG.debug("enclosing_dir=%s", enclosing_dir) - create_dir(enclosing_dir, incremental=True) - # Create the link. Note that the link source needs to be an absolute path. - src = os.path.abspath(src) - cmd = f"ln -s {src} {dst}" - hsystem.system(cmd) - - -def delete_dir( - dir_: str, - change_perms: bool = False, - errnum_to_retry_on: int = 16, - num_retries: int = 1, - num_secs_retry: int = 1, -) -> None: - """ - Delete a directory. - - :param change_perms: change permissions to -R rwx before deleting to deal with - incorrect permissions left over - :param errnum_to_retry_on: specify the error to retry on, e.g., - ``` - OSError: [Errno 16] Device or resource busy: - 'gridTmp/.nfs0000000002c8c10b00056e57' - ``` - """ - _LOG.debug("Deleting dir '%s'", dir_) - # hs3.dassert_is_not_s3_path(dir_) - if not os.path.isdir(dir_): - # No directory so nothing to do. - return - if change_perms and os.path.isdir(dir_): - cmd = "chmod -R +rwx " + dir_ - hsystem.system(cmd) - i = 1 - while True: - try: - shutil.rmtree(dir_) - # Command succeeded: exit. - break - except OSError as e: - if errnum_to_retry_on is not None and e.errno == errnum_to_retry_on: - # TODO(saggese): Make it less verbose once we know it's working - # properly. - _LOG.warning( - "Couldn't delete %s: attempt=%s / %s", dir_, i, num_retries - ) - i += 1 - if i > num_retries: - hdbg.dfatal( - f"Couldn't delete {dir_} after {num_retries} attempts ({str(e)})" - ) - else: - time.sleep(num_secs_retry) - else: - # Unforeseen error: just propagate it. - raise e - - -def backup_file_or_dir_if_exists(path: str) -> None: - """ - Create a timestamped backup of a file or directory if it exists. - - If the path exists, it is moved to a new location with a timestamp - appended to the name (e.g., path.20231003_080000.backup). - - :param path: path to the file or directory to back up - """ - if not os.path.exists(path): - # Nothing to back up. - return - _LOG.warning("Path '%s' already exists: making a backup", path) - # Get current timestamp. - timestamp = datetime.datetime.now() - timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S") - # Build backup path. - backup_path = f"{path}.{timestamp_str}.backup" - # Move the file or directory to backup. - shutil.move(path, backup_path) - _LOG.info("Backed up '%s' -> '%s'", path, backup_path) - - -def dassert_is_valid_file_name(file_name: str) -> None: - hdbg.dassert_isinstance(file_name, str) - hdbg.dassert_ne(file_name, "") - - -# TODO(gp): Don't use default incremental. -def create_enclosing_dir(file_name: str, incremental: bool = False) -> str: - """ - Create the dir enclosing file_name, if needed. - - :param incremental: same meaning as in `create_dir()` - """ - _LOG.debug(hprint.to_str("file_name incremental")) - dassert_is_valid_file_name(file_name) - # hs3.dassert_is_not_s3_path(file_name) - # - dir_name = os.path.dirname(file_name) - _LOG.debug(hprint.to_str("dir_name")) - if dir_name != "": - _LOG.debug( - "Creating dir_name='%s' for file_name='%s'", dir_name, file_name - ) - create_dir(dir_name, incremental=incremental) - hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) - return dir_name - - -# ############################################################################# -# File. -# ############################################################################# - - -# TODO(saggese): We should have `lines` first since it is an input param. -# TODO(Nikola): Remove `use_gzip` and use `file_name` extension instead. -def to_file( - file_name: str, - txt: str, - use_gzip: bool = False, - mode: Optional[str] = None, - force_flush: bool = False, -) -> None: - """ - Write the content of txt into file_name, creating the enclosing directory - if needed. - - :param file_name: name of written file - :param txt: content of the file - :param use_gzip: whether the file should be compressed as gzip - :param mode: file writing mode - :param force_flush: whether to forcibly clear the file buffer - """ - _LOG.debug(hprint.to_str("file_name use_gzip mode force_flush")) - dassert_is_valid_file_name(file_name) - hdbg.dassert_isinstance(txt, str) - # Choose default writing mode based on compression. - if mode is None: - if use_gzip: - # Override default binary mode for `gzip`. - mode = "wt" - else: - mode = "w" - # Create the enclosing dir, if needed. - create_enclosing_dir(file_name, incremental=True) - if use_gzip: - # Check if user provided correct file name. - if not file_name.endswith(("gz", "gzip")): - _LOG.warning("The provided file extension is not for a gzip file.") - # Open gzipped file. - f = gzip.open(file_name, mode) - else: - # Open regular text file. - # buffering = 0 if mode == "a" else -1 - buffering = 0 if force_flush else -1 - f = open( # pylint: disable=consider-using-with,assignment - file_name, mode, buffering=buffering - ) - # Write file contents. - f.write(txt) # type: ignore - f.close() - # Clear internal buffer of the file. - if force_flush: - f.flush() - os.fsync(f.fileno()) - - -def _raise_file_decode_error(error: Exception, file_name: str) -> None: - """ - Raise UnicodeDecodeError with detailed error message. - - :param error: raised UnicodeDecodeError - :param file_name: name of read file that raised the exception - """ - msg = [] - msg.append(f"error={error}") - msg.append(f"file_name='{file_name}'") - msg_as_str = "\n".join(msg) - _LOG.error(msg_as_str) - raise RuntimeError(msg_as_str) - - -def from_file( - file_name: str, - *, - encoding: Optional[Any] = None, -) -> str: - """ - Read contents of a file as string. - - :param file_name: path to .txt,.gz or .pq file - :param encoding: encoding to use when reading the string - :return: contents of file as string - """ - dassert_is_valid_file_name(file_name) - hdbg.dassert_path_exists(file_name) - data: str = "" - if file_name.endswith((".gz", ".gzip")): - # Open gzipped file. - f = gzip.open(file_name, "rt", encoding=encoding) - else: - # Open regular text file. - f = open( # pylint: disable=consider-using-with - file_name, "r", encoding=encoding - ) - try: - # Read data. - data = f.read() - except UnicodeDecodeError as e: - # Raise unicode decode error message. - _raise_file_decode_error(e, file_name) - finally: - f.close() - hdbg.dassert_isinstance(data, str) - return data - - -# TODO(gp): Use hintro.format_size -def get_size_as_str(file_name: str) -> str: - if os.path.exists(file_name): - size_in_bytes = os.path.getsize(file_name) - if size_in_bytes < (1024**2): - size_in_kb = size_in_bytes / 1024.0 - res = "%.1f KB" % size_in_kb - elif size_in_bytes < (1024**3): - size_in_mb = size_in_bytes / (1024.0**2) - res = "%.1f MB" % size_in_mb - else: - size_in_gb = size_in_bytes / (1024.0**3) - res = "%.1f GB" % size_in_gb - else: - res = "nan" - return res - - -def remove_extension( - filename: str, - extension: str, - *, - check_file_exists: bool = False, - check_has_extension: bool = True, -) -> Optional[str]: - """ - Attempt to remove `extension` from `filename`. - - :param filename: str filename - :param extension: file extension starting with a dot. E.g., ".csv" - :return: filename without `extension`, if applicable, else returns `None`. - """ - hdbg.dassert_isinstance(filename, str) - hdbg.dassert(filename) - if check_file_exists: - hdbg.dassert_file_exists(filename) - # - hdbg.dassert_isinstance(extension, str) - hdbg.dassert( - extension.startswith("."), - "Filename extension=`%s` expected to start with `.`", - extension, - ) - # - ret: Optional[str] = None - if check_has_extension: - hdbg.dassert( - filename.endswith(extension), - "Filename '%s' doesn't have extension=`%s`", - filename, - extension, - ) - if filename.endswith(extension): - ret = filename[: -len(extension)] - return ret - - -# TODO(gp): @all Use msg in all uses of this script `jackpyc "create_executable"` -# TODO(gp): `file_name` should go last. -def create_executable_script( - file_name: str, content: str, *, msg: str = "" -) -> None: - # Write the file. - hdbg.dassert_isinstance(content, str) - to_file(file_name, content) - # Make it executable. - cmd = "chmod +x " + file_name - hsystem.system(cmd) - if msg: - print(f"# {msg}:\n> {file_name}") - - -def add_suffix_to_filename( - file_name: str, - suffix: Union[int, str], - *, - before_extension: bool = True, - with_underscore: bool = True, -) -> str: - """ - Add a suffix to a file name, with or without changing the extension. - - E.g., {base_name}.{ext} -> {file_name}.{suffix}.{ext} - - :param file_name: file name to modify - :param suffix: index to add to the file name - :param before_extension: whether to insert the index before the file - extension - :param with_underscore: whether to separate the index with an - underscore - :return: modified file name with an index - """ - suffix = str(suffix) - if with_underscore: - suffix = "_" + suffix - _LOG.debug(hprint.to_str("suffix")) - # - if before_extension: - # Add the suffix to the file name before the extension. - data = file_name.rsplit(".", 1) - if len(data) == 1: - # E.g., `system_log_dir` -> `system_log_dir_1` - ret = file_name + suffix - else: - # E.g., `dir/file.txt` -> `dir/file_1.txt`. - hdbg.dassert_eq(len(data), 2, "Invalid file_name='%s'", file_name) - file_name_no_ext, ext = data - ret = file_name_no_ext + suffix + "." + ext - else: - # Add the suffix after the name of the file. - # E.g., `dir/file.txt` -> `dir/file.txt_1`. - ret = file_name + suffix - _LOG.debug(hprint.to_str("ret")) - return ret - - -def rename_file_if_exists( - file_path: str, - suffix: str, - *, - before_extension: bool = True, -) -> None: - """ - Rename a file if it exists using provided suffix. - - Used to avoid overwriting if writing multiple files with the same name. - - :param file_path: a file path to modify - :param suffix: index to add to the file name - :param before_extension: whether to insert the suffix before the file extension - - if True, {file_path}.{ext} -> {file_path}.{suffix}.{ext} - - if False, {file_path}.{ext} -> {file_path}.{ext}.{suffix} - """ - if os.path.exists(file_path): - # Add a suffix to a file name. - if before_extension: - # Add a suffix before an extension, e.g., `file.suffix.csv`. - dir_path, file_name = os.path.split(file_path) - file_name, ext = os.path.splitext(file_name) - hdbg.dassert(ext.startswith("."), "Invalid extension='%s'", ext) - new_file_path = f"{file_name}.{suffix}{ext}" - new_file_path = os.path.join(dir_path, new_file_path) - else: - # Add a suffix after an extension, e.g., `file.csv.suffix`. - new_file_path = f"{file_path}.{suffix}" - hdbg.dassert_path_not_exists(new_file_path) - _LOG.debug("renaming %s to %s", file_path, new_file_path) - os.rename(file_path, new_file_path) - - -def change_file_extension(file_path: str, new_extension: str) -> str: - """ - Change the extension of a file path. - - :param file_path: The path of the file to change the extension of. - :param new_extension: The new extension to use, starting with `.` - :return: The new file path with the new extension. - """ - # Make sure the new extension starts with a dot - hdbg.dassert( - new_extension.startswith("."), "Invalid extension='%s'", new_extension - ) - # Split the file path into root and extension - file_name, _ = os.path.splitext(file_path) - # Create the new file path - new_file_path = file_name + new_extension - return new_file_path - - -def wait_for_file( - file_path: str, - *, - check_interval_in_secs: float = 0.5, - timeout_in_secs: int = 10, -) -> None: - """ - Wait until a specified file is generated or until the timeout is reached. - - :param file_path: The path of the file to wait for. - :param check_interval_in_secs: Time in seconds between checks - :param timeout_in_secs: Maximum time to wait for the file in seconds - """ - _LOG.debug("Waiting for file: %s", file_path) - start_time = time.time() - while not os.path.exists(file_path): - if time.time() - start_time > timeout_in_secs: - raise ValueError(f"Timeout reached. File not found: {file_path}") - time.sleep(check_interval_in_secs) - _LOG.debug("File generated: %s", file_path) - - -# ############################################################################# -# JSON -# ############################################################################# - - -def serialize_custom_types_for_json_encoder(obj: Any) -> Any: - """ - Serialize DataFrame and other objects for JSON. - - E.g. dataframe {"A": [0, 1], "B": [0, 1]} will go to a list of dictionaries: - [{"A": 0, "B": 0}, {"A": 1, "B": 1}] - each dictionary is for one row. - """ - import numpy as np - import pandas as pd - - result = None - if isinstance(obj, pd.DataFrame): # type: ignore - result = obj.to_dict("records") - elif isinstance(obj, pd.Series): # type: ignore - result = obj.to_dict() - elif isinstance(obj, np.int64): # type: ignore - result = int(obj) - elif isinstance(obj, np.float64): # type: ignore - result = float(obj) - elif isinstance(obj, uuid.UUID): - result = str(obj) - elif isinstance(obj, datetime.date): - result = obj.isoformat() - elif isinstance(obj, type(pd.NaT)): - result = None - elif isinstance(obj, type(pd.NA)): - result = None - else: - raise TypeError(f"Can not serialize {obj} of type {type(obj)}") - return result - - -def to_json(file_name: str, obj: dict, *, use_types: bool = False) -> None: - """ - Write an object into a JSON file. - - :param obj: data for writing - :param file_name: name of file - :param use_types: whether to use jsonpickle to save the file - """ - if not file_name.endswith(".json"): - _LOG.warning("The file '%s' doesn't end in .json", file_name) - # Create dir. - dir_name = os.path.dirname(file_name) - if dir_name != "" and not os.path.isdir(dir_name): - create_dir(dir_name, incremental=True) - # Write data as JSON. - with open(file_name, "w") as outfile: - if use_types: - # Use jsonpickle to save types. - import jsonpickle # type: ignore[import-untyped] - - txt = jsonpickle.encode(obj, indent=4) - outfile.write(txt) - else: - json.dump( - obj, - outfile, - indent=4, - default=serialize_custom_types_for_json_encoder, - ) - - -def from_json(file_name: str, *, use_types: bool = False) -> Dict: - """ - Read object from JSON file. - - :param file_name: name of file - :param use_types: whether to use jsonpickle to load the file - :return: dict with data - """ - hdbg.dassert(file_name) - if not file_name.endswith(".json"): - _LOG.warning("The file '%s' doesn't end in .json", file_name) - # Read file as text. - hdbg.dassert_file_exists(file_name) - txt = from_file(file_name) - # Remove comments (which are not supported natively by JSON). - txt_tmp = [] - for line in txt.split("\n"): - if re.match(r"^\s*#", line): - continue - txt_tmp.append(line) - txt_tmp = "\n".join(txt_tmp) - _LOG.debug("txt_tmp=\n%s", txt_tmp) - # Convert text into Python data structures. - data = {} - if use_types: - import jsonpickle # type: ignore - - data = jsonpickle.decode(txt_tmp) - else: - data = json.loads(txt_tmp) - return data - - -# TODO(gp): -> pandas_helpers.py -def load_df_from_json(path_to_json: str) -> "pd.DataFrame": # noqa: F821 # type: ignore - """ - Load a dataframe from a json file. - - :param path_to_json: path to the json file - :return: - """ - import pandas as pd - - # Load the dict with the data. - data = from_json(path_to_json) - # Preprocess the dict to handle arrays with different length. - data = {k: pd.Series(v) for k, v in data.items()} - # Package into a dataframe. - df = pd.DataFrame(data) - return df - - -# ############################################################################# -# Directory operations -# ############################################################################# - -# Copied from `hgit.py` to avoid import cycles. - - -def _find_git_root(path: str = ".") -> str: - """ - Find recursively the dir of the outermost super module. - - This function traverses the directory hierarchy upward from a specified - starting path to find the root directory of a Git repository. - It supports: - - standard git repository: where a `.git` directory exists at the root - - submodule: where repository is nested inside another, and the `.git` file contains - a `gitdir:` reference to the submodule's actual Git directory - - linked repositories: where the `.git` file points to a custom Git directory - location, such as in Git worktrees or relocated `.git` directories - - :param path: starting file system path. Defaults to the current directory (".") - :return: absolute path to the top-level Git repository directory - """ - path = os.path.abspath(path) - git_root_dir = None - while True: - git_dir = os.path.join(path, ".git") - _LOG.debug("git_dir=%s", git_dir) - # Check if `.git` is a directory which indicates a standard Git repository. - if os.path.isdir(git_dir): - # Found the Git root directory. - git_root_dir = path - break - # Check if `.git` is a file which indicates submodules or linked setups. - if os.path.isfile(git_dir): - # Using the `open()` to avoid import cycles with the `hio` module. - with open(git_dir, "r") as f: - txt = f.read() - lines = txt.split("\n") - for line in lines: - # Look for a `gitdir:` line that specifies the linked directory. - # Example: `gitdir: ../.git/modules/helpers_root`. - if line.startswith("gitdir:"): - git_dir_path = line.split(":", 1)[1].strip() - _LOG.debug("git_dir_path=%s", git_dir_path) - # Resolve the relative path to the absolute path of the Git directory. - abs_git_dir = os.path.abspath( - os.path.join(path, git_dir_path) - ) - # Traverse up to find the top-level `.git` directory. - while True: - # Check if the current directory is a `.git` directory. - if os.path.basename(abs_git_dir) == ".git": - git_root_dir = os.path.dirname(abs_git_dir) - # Found the root. - break - # Move one level up in the directory structure. - parent = os.path.dirname(abs_git_dir) - # Reached the filesystem root without finding the `.git` directory. - hdbg.dassert_ne( - parent, - abs_git_dir, - "Top-level .git directory not found.", - ) - # Continue traversing up. - abs_git_dir = parent - break - # Exit the loop if the Git root directory is found. - if git_root_dir is not None: - break - # Move up one level in the directory hierarchy. - parent = os.path.dirname(path) - # Reached the filesystem root without finding `.git`. - hdbg.dassert_ne( - parent, - path, - "No .git directory or file found in any parent directory.", - ) - # Update the path to the parent directory for the next iteration. - path = parent - return git_root_dir - - -# End copy. - - -def safe_rm_file(dir_path: str) -> None: - """ - Safely remove a file after ensuring it's within our Git client. - - This function provides a safety check to prevent accidental deletion - of files outside our Git repository. - - :param dir_path: Path to the directory to delete - :raises AssertionError: If dir_path is not within the Git client - :raises OSError: If directory doesn't exist or can't be deleted - """ - # Convert to absolute path for comparison. - dir_path = os.path.abspath(dir_path) - # Get the Git client root. - git_root = _find_git_root() - git_root = os.path.abspath(git_root) - # Ensure the directory is within our Git client. - hdbg.dassert( - dir_path.startswith(git_root), - "Directory '%s' is not within Git client root '%s'", - dir_path, - git_root, - ) - # Additional safety check: prevent deletion of Git root itself. - hdbg.dassert_ne( - dir_path, - git_root, - "Cannot delete Git client root directory '%s'", - git_root, - ) - # Verify directory exists before attempting deletion. - hdbg.dassert( - os.path.exists(dir_path), - "Directory '%s' does not exist", - dir_path, - ) - hdbg.dassert( - os.path.isdir(dir_path), - "Path '%s' is not a directory", - dir_path, - ) - # Perform the deletion. - _LOG.debug("Safely removing directory: %s", dir_path) - shutil.rmtree(dir_path) - _LOG.debug("Successfully removed directory: %s", dir_path) - - -# TODO(ai_gp): Add unit tests. -def is_subdir(dir1: str, dir2: str) -> bool: - """ - Check if `dir1` is a subdirectory of `dir2`. - - :param dir1: First directory - :param dir2: Second directory - :return: True if `dir1` is a subdirectory of `dir2`, False otherwise - """ - # Resolve to absolute and normalized paths. - abs_dir1 = os.path.abspath(dir1) - abs_dir2 = os.path.abspath(dir2) - # Get the common path prefix. - common = os.path.commonpath([abs_dir1, abs_dir2]) - # It's a subdir if they share the same common path as the parent. - return common == abs_dir2 - - -def write_file_back( - file_name: str, txt_old: List[str], txt_new: List[str] -) -> None: - """ - Write new text to file only if it differs from the old text. - - :param file_name: Path to the file to write to - :param txt_old: Original text as a list of strings - :param txt_new: New text as a list of strings - """ - # Process old text. - hdbg.dassert_list_of_strings(txt_old) - txt_as_str = "\n".join(txt_old) - # Process new text. - hdbg.dassert_list_of_strings(txt_new) - txt_new_as_str = "\n".join(txt_new) - # Write file back, if needed. - if txt_as_str != txt_new_as_str: - to_file(file_name, txt_new_as_str) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py deleted file mode 100644 index d11ecbafc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py +++ /dev/null @@ -1,880 +0,0 @@ -""" -Import as: - -import helpers.hjoblib as hjoblib -""" - -import concurrent.futures -import logging -import math -import os -import pprint -import random -import sys -import traceback -from functools import wraps -from multiprocessing import Process, Queue -from typing import Any, Callable, Dict, List, Optional, Tuple, Union - -import joblib -from joblib._store_backends import StoreBackendBase, StoreBackendMixin -from tqdm.autonotebook import tqdm - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.htimer as htimer -import helpers.htqdm as htqdm - -# Avoid dependency from other `helpers` modules, such as `helpers.hcache`, to -# prevent import cycles. - - -_LOG = logging.getLogger(__name__) - -# - Assume one wants to execute `n` invocations of a given `func` -# - E.g., `func(param_1), func(param_2), ..., func(param_n)` -# - Each `param` is a tuple of `*args` and `**kwargs` to apply to `func` -# - A `Workload` is composed of: -# - `workload_func`: the function to execute -# - `func_name`: the name / description of the function `func` -# - `tasks`: a list of `n` set of parameters `*args`, `**kwargs` to apply -# to the function (e.g., `param_1`, ..., `param_n`) -# - Each `Task` executes a subset of the functions -# - `Tasks` are a partition of the function invocations, i.e., each function -# invocation is executed by one and only one task -# - The `n` `Tasks` are then executed by `k` threads in parallel or serially -# - Note that a single task can correspond to processing of multiple logical -# chunks of work, because they need to be processed together or because we -# want to enforce that it is executed on a single processor -# - E.g., if we want to concatenate files we can map multiple filenames in a -# single `Task`. In this case the `Task` contains a list of filenames to -# concatenate together - -# ############################################################################# -# Task -# ############################################################################# - -# A `Task` contains the parameters to pass to the function that needs to be -# executed. -# A `Task` is represented by a tuple of `*args` and `**kwargs`, e.g., -# ``` -# args=() -# kwargs={ -# 'asset_col_name': 'asset', -# 'dst_dir': './tmp.s3_out', -# 'parquet_file_names': [ -# './tmp.s3/20220110/data.parquet', -# './tmp.s3/20220111/data.parquet', -# './tmp.s3/20220112/data.parquet'] -# } -# ``` -Task = Tuple[Tuple[Any], Dict[str, Any]] - - -# TODO(gp): @Nikola add unit tests -def split_list_in_tasks( - list_in: List[Any], - n: int, - *, - keep_order: bool = False, - num_elems_per_task: Optional[int] = None, -) -> List[List[Any]]: - """ - Split a list in tasks based on the number of threads or elements per - partition. - - :param num_elems_per_task: force each task to have the given number of elements - :param keep_order: split the list so that consecutive elements of the list - are in different tasks. This favors executing the workload in order on `n` - threads - :return: list of lists of elements, where each list can be assigned to an - execution thread - - - E.g., [a, b, c, d, e] executed on 3 threads [1, 2, 3] gives the allocation - for `keep_order=True`: - ``` - 1 -> [a, d] - 2 -> [b, e] - 3 -> [c] - ``` - - For `keep_order=False` the allocation is: - ``` - 1 -> [a, b] - 2 -> [c, d] - 3 -> [e] - ``` - - For `num_elems_per_task=3` the allocation is: - ``` - 1 -> [a, b, c] - 2 -> [d, e] - 3 -> [] - ``` - """ - hdbg.dassert_lte(1, n) - hdbg.dassert_lte(n, len(list_in), "There are fewer tasks than threads") - if keep_order: - hdbg.dassert_is( - num_elems_per_task, - None, - "Can't specify num_elems_per_task with keep_order", - ) - list_out: List[list] = [[] for _ in range(n)] - for i, elem in enumerate(list_in): - _LOG.debug("%s: %s -> %s", i, elem, i % n) - list_out[i % n].append(elem) - else: - if num_elems_per_task is None: - k = int(math.ceil(len(list_in) / n)) - else: - k = num_elems_per_task - hdbg.dassert_lte(1, k) - list_out = [list_in[i : i + k] for i in range(0, len(list_in), k)] - # Ensure that the elements are all distributed. - hdbg.dassert_eq(sum(len(l_) for l_ in list_out), len(list_in)) - return list_out - - -def apply_incremental_mode( - src_dst_file_name_map: List[Tuple[str, str]], -) -> List[Tuple[str, str]]: - """ - Apply incremental mode to a map of source to destination files. - - Often the function in a `Workload` corresponds to reading a file, processing it, - and writing the output in a file. In this case, applying the incremental mode - means removing the tuples in the src_file -> dst_file mapping where the dst file - already exists. - - :return: filtered mapping - """ - hdbg.dassert_container_type(src_dst_file_name_map, list, tuple) - # - src_dst_file_name_map_tmp = [] - for src_dst_file_name in src_dst_file_name_map: - # Parse the element of the mapping. - hdbg.dassert_eq(len(src_dst_file_name), 2) - src_file_name, dst_file_name = src_dst_file_name - _LOG.debug("%s -> %s", src_file_name, dst_file_name) - # Discard the mapping element if the destination file already exists. - hdbg.dassert_path_exists(src_file_name) - if os.path.exists(dst_file_name): - _LOG.debug("Skipping %s -> %s", src_file_name, dst_file_name) - else: - src_dst_file_name_map_tmp.append((src_file_name, dst_file_name)) - _LOG.info( - "After applying incremental mode, there are %s / %s files to process", - len(src_dst_file_name_map_tmp), - len(src_dst_file_name_map), - ) - return src_dst_file_name_map_tmp - - -def validate_task(task: Task) -> bool: - """ - Assert if `Task` is malformed, otherwise return True. - - A valid `Task` is a tuple `(*args, **kwargs)`. - """ - # A `Task` is a tuple. - hdbg.dassert_isinstance(task, tuple) - hdbg.dassert_eq(len(task), 2) - # Parse the `Task`. - args, kwargs = task - _LOG.debug("task.args=%s", pprint.pformat(args)) - hdbg.dassert_isinstance(args, tuple) - _LOG.debug("task.kwargs=%s", pprint.pformat(kwargs)) - hdbg.dassert_isinstance(kwargs, dict) - return True - - -def task_to_string(task: Task, *, use_pprint: bool = True) -> str: - hdbg.dassert(validate_task(task)) - args, kwargs = task - txt = [] - if use_pprint: - txt.append(f"args={pprint.pformat(args)}") - txt.append(f"kwargs={pprint.pformat(kwargs)}") - else: - txt.append(f"args={str(args)}") - txt.append(f"kwargs={str(kwargs)}") - txt = "\n".join(txt) - return txt - - -# ############################################################################# -# Workload -# ############################################################################# - -# A `Workload` consists of multiple executions of a function with different -# parameters represented by `Tasks`. -# Note: `joblib_helper` can be used together with caching. The workload function -# doesn't have to be the one that is cached, but it can trigger caching of function -# results in the call stack. -Workload = Tuple[ - # `func`: the function representing the workload to execute - Callable, - # `func_name`: the mnemonic name of the function, which is used for debugging - # info and for naming the directory storing the cache - # - E.g., `vltbut.get_cached_bar_data_for_date_interval` - # - Note that the `func_name` can be different than the name of `func` - # - E.g., we can call - # `vltbut.get_cached_bar_data_for_date_interval_for_interval` inside `func`, - # in order to create a cache for - # `vltbut.get_cached_bar_data_for_date_interval`, so the cache name - # should be for `vltbut.get_cached_bar_data_for_date_interval` - str, - # `tasks`: a list of (*args, **kwargs) to pass to `func` - List[Task], -] - - -def validate_workload(workload: Workload) -> bool: - """ - Assert if the `Workload` is malformed, otherwise return True. - - A valid `Workload` is a triple `(func, func_name, List[Task])`. - """ - # A valid workload` is a triple. - hdbg.dassert_isinstance(workload, tuple) - hdbg.dassert_eq(len(workload), 3) - # Parse. - workload_func, func_name, tasks = workload - # Check each component. - hdbg.dassert_callable(workload_func) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_container_type(tasks, List, tuple) - hdbg.dassert(all(validate_task(task) for task in tasks)) - return True - - -def randomize_workload( - workload: Workload, *, seed: Optional[int] = None -) -> Workload: - validate_workload(workload) - # Parse the workload. - workload_func, func_name, tasks = workload - # Randomize `tasks`. - seed = seed or 42 - random.seed(seed) - random.shuffle(tasks) - # Build a new workload. - workload = (workload_func, func_name, tasks) - validate_workload(workload) - return workload - - -def reverse_workload( - workload: Workload, *, seed: Optional[int] = None -) -> Workload: - """ - Reverse the workload. - - Typically we generate workload in chronological order, but sometimes - we want to run from most recent data to least recent, so that we - have the results about the most recent periods first, which is what - we care most about. - """ - validate_workload(workload) - # Parse the workload. - workload_func, func_name, tasks = workload - # Reverse. - _LOG.warning("Reversing the workload as per user request") - tasks = list(reversed(tasks)) - # Build a new workload. - workload = (workload_func, func_name, tasks) - validate_workload(workload) - return workload - - -def truncate_workload( - workload: Workload, - max_num: int, -) -> Workload: - """ - Limit the workload to the first `max_num` tasks. - """ - validate_workload(workload) - # Parse the workload. - workload_func, func_name, tasks = workload - # Truncate the workload. - _LOG.warning("Considering only the first %d / %d tasks", max_num, len(tasks)) - hdbg.dassert_lte(1, max_num) - hdbg.dassert_lte(max_num, len(tasks)) - tasks = tasks[:max_num] - # Build a new workload. - workload = (workload_func, func_name, tasks) - validate_workload(workload) - return workload - - -def workload_to_string(workload: Workload, *, use_pprint: bool = True) -> str: - """ - Print the workload. - - E.g., - - ``` - workload_func=_LimeTask317_process_chunk - func_name=_LimeTask317_process_chunk - # task 1 / 3 - args=([('./tmp.s3/20220110/data.parquet', - './tmp.s3_out/./tmp.s3/20220110/data.parquet')],) - kwargs={} - # task 2 / 3 - args=([('./tmp.s3/20220111/data.parquet', - './tmp.s3_out/./tmp.s3/20220111/data.parquet')],) - kwargs={} - # task 3 / 3 - args=([('./tmp.s3/20220112/data.parquet', - './tmp.s3_out/./tmp.s3/20220112/data.parquet')],) - kwargs={} - ``` - """ - validate_workload(workload) - workload_func, func_name, tasks = workload - txt = [] - workload_func_str = getattr(workload_func, "__name__", "unknown_function") - txt.append(f"workload_func={workload_func_str}") - txt.append(f"func_name={func_name}") - for i, task in enumerate(tasks): - txt.append(f"# task {i + 1} / {len(tasks)}") - txt.append(task_to_string(task, use_pprint=use_pprint)) - txt = "\n".join(txt) - return txt - - -# ############################################################################# -# Template for functions to execute in parallel. -# ############################################################################# - -# NOTE: the workload function: -# - asserts if there is an error, since the return value is a string with a summary -# of the execution -# - doesn't have to be the function that we intend to cache - - -def _workload_function(*args: Any, **kwargs: Any) -> str: - """ - Execute the function task. - - :raises: in case of error - :return: string representing information about the cached function - execution - """ - _ = args - incremental = kwargs.pop("incremental") - num_attempts = kwargs.pop("num_attempts") - _ = incremental, num_attempts - func_output: List[str] = [] - result = "\n".join(func_output) - return result - - -def _get_workload( - # args: argparse.Namespace -) -> None: - """ - Prepare the workload using the parameters from command line. - """ - # _ = args - - -# ############################################################################# -# Layer passing information from `parallel_execute` to the function to execute -# in parallel. -# ############################################################################# - - -def get_num_executing_threads(args_num_threads: Union[str, int]) -> int: - """ - Return the number of executing threads based on the value of - `args.num_threads`. - - E.g., - - `serial` corresponds to 1 - - `-1` corresponds to all available CPUs - """ - if args_num_threads == "serial": - num_executing_threads = 1 - elif args_num_threads == -1: - # All CPUs available. - num_executing_threads = joblib.cpu_count() - else: - # Assume it's an int. - num_executing_threads = int(args_num_threads) - hdbg.dassert_lte(1, num_executing_threads) - return num_executing_threads - - -def _run_in_process(func: Callable, q: Queue, *args: Any, **kwargs: Any) -> None: - """ - Run function as a process and store output in the input Queue. - """ - _LOG.debug("pid after processify=", os.getpid()) - try: - ret = func(*args, **kwargs) - except Exception: - # Store error logs in the queue. - ex_type, ex_value, tb = sys.exc_info() - error = ex_type, ex_value, "".join(traceback.format_tb(tb)) - ret = None - else: - error = None - q.put((ret, error)) - - -# TODO(grisha): Add type hints, add unit test to understand the behavior. -# From https://gist.github.com/schlamar/2311116 -# Note that this is not going to work with joblib.parallel with -# backend="multiprocessing" returning an error -# AssertionError: daemonic processes are not allowed to have children -def processify(func): - """ - Decorator to run a function as a process. - - Be sure that every argument and the return value is *pickable*. The - created process is joined, so the code does not run in parallel. - """ - - @wraps(func) - def wrapper(*args, **kwargs): - q = Queue() - p = Process( - target=_run_in_process, args=[func] + [q] + list(args), kwargs=kwargs - ) - p.start() - ret, error = q.get() - p.join() - if error: - ex_type, ex_value, tb_str = error - message = f"{ex_value.message} (in subprocess)\n{tb_str}" - raise ex_type(message) - return ret - - return wrapper - - -def _parallel_execute_decorator( - task_idx: int, - task_len: int, - incremental: bool, - abort_on_error: bool, - num_attempts: int, - log_file: str, - # TODO(gp): Pass these parameters first. - workload_func: Callable, - func_name: str, - processify_func: bool, - task: Task, - enable_file_logging: bool, - verbose_log: bool, -) -> Any: - """ - Parameters have the same meaning as in `parallel_execute()`. - - :param abort_on_error: control whether to abort on `workload_func` function - that is failing and asserting - - If `workload_func` fails: - - if `abort_on_error=True` the exception from `workload_func` is - propagated and the return value is `None` - - if `abort_on_error=False` the exception is not propagated, but the - return value is the string representation of the exception - :param processify_func: switch to enable wrapping a function into a process - :param enable_file_logging: see same parameter in `parallel_execute()` - :param verbose_log: see same parameter in `parallel_execute()` - :return: the return value of the workload function or the exception string - """ - # Validate very carefully all the parameters. - hdbg.dassert_lte(0, task_idx) - hdbg.dassert_lt(task_idx, task_len) - hdbg.dassert_isinstance(incremental, bool) - hdbg.dassert_isinstance(abort_on_error, bool) - hdbg.dassert_lte(1, num_attempts) - hdbg.dassert_isinstance(log_file, str) - hdbg.dassert_callable(workload_func) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert(validate_task(task)) - # Redirect the logging output of each task to a different file. - # TODO(gp): This file should go in the `task_dst_dir`. - # log_to_file = True - log_to_file = False - if log_to_file: - dst_dir = os.path.dirname(os.path.abspath(log_file)) - print(dst_dir) - hio.create_dir(dst_dir, incremental=True) - file_name = os.path.join( - dst_dir, f"{func_name}.{task_idx + 1}_{task_len}.log" - ) - _LOG.warning("Logging to %s", file_name) - file_handler = logging.FileHandler(file_name) - root_logger = logging.getLogger() - root_logger.addHandler(file_handler) - # Save information about the function to be executed. - txt = [] - # `start_ts` needs to be before running the function. - start_ts = hdateti.get_current_timestamp_as_string("naive_ET") - tag = f"{task_idx + 1}/{task_len} ({start_ts})" - txt.append("\n" + hprint.frame(tag) + "\n") - txt.append(f"tag={tag}") - workload_func_str = getattr(workload_func, "__name__", "unknown_function") - txt.append(f"workload_func={workload_func_str}") - txt.append(f"func_name={func_name}") - txt.append(task_to_string(task)) - # Run the workload. - args, kwargs = task - kwargs.update({"incremental": incremental, "num_attempts": num_attempts}) - with htimer.TimedScope( - logging.DEBUG, f"Execute '{workload_func_str}'" - ) as ts: - try: - if processify_func: - _LOG.debug("Using processify") - # Wrap the function into a process to enforce de-allocating - # memory at the end of the execution (see - # CmampTask5854: Resolve backtest memory leakage). - _LOG.debug("pid before processify=%s", os.getpid()) - workload_func = processify(workload_func) - res = workload_func(*args, **kwargs) - error = False - except Exception as e: # pylint: disable=broad-except - exception = e - txt.append(f"exception='{str(e)}'") - res = None - error = True - _LOG.error("Execution failed") - # Save information about the execution of the function. - elapsed_time = ts.elapsed_time - end_ts = hdateti.get_current_timestamp_as_string("naive_ET") - # TODO(gp): -> func_result - if verbose_log: - txt.append(f"func_res=\n{hprint.indent(str(res))}") - else: - txt.append("func_res=") - txt.append(f"elapsed_time_in_secs={elapsed_time}") - txt.append(f"start_ts={start_ts}") - txt.append(f"end_ts={end_ts}") - txt.append(f"error={error}") - # Update log file. - txt = "\n".join(txt) - _LOG.debug("txt=\n%s", hprint.indent(txt)) - if enable_file_logging: - hio.to_file(log_file, txt, mode="a") - if error: - # The execution wasn't successful. - _LOG.error(txt) - if abort_on_error: - _LOG.error("Aborting since abort_on_error=%s", abort_on_error) - raise exception # noqa: F821 - _LOG.error( - "Continuing execution since abort_on_error=%s", abort_on_error - ) - res = str(exception) - else: - # The execution was successful. - pass - return res - - -# TODO(gp): Pass a `task_dst_dir` to each task so it can write there. -# This is a generalization of `experiment_result_dir` for `run_config_list` and -# `run_notebook`. -def parallel_execute( - workload: Workload, - # Options for the `parallel_execute` framework. - dry_run: bool, - num_threads: Union[str, int], - incremental: bool, - abort_on_error: bool, - num_attempts: int, - log_file: str, - *, - backend: str = "loky", - enable_file_logging: bool = True, - verbose_log: bool = False, -) -> Optional[List[Any]]: - """ - Run a workload in parallel using joblib or asyncio. - - Note: - - if `abort_on_error=True` and a task fails early, `joblib` does not return partial results - - use `enable_logging=False` to disable logging entirely (useful for large results) - - use `verbose_log=False` to keep logging enabled but skip verbose output per task - - :param workload: the workload to execute - :param dry_run: if True, print the workload and exit without executing it - :param num_threads: joblib parameter to control how many threads to use - :param incremental: parameter passed to the function to execute to control if - we want to re-execute tasks already executed or not - :param abort_on_error: when True, if one task asserts then stop executing the - workload and return the exception of the failing task - - If False, the execution continues - :param num_attempts: number of times to attempt running a function before - declaring an error - :param log_file: file used to log information about the execution - :param backend: specify the backend type (e.g., joblib `loky` or `asyncio_process_executor`) - :param enable_file_logging: if False, skip writing any log file - :param verbose_log: if True, write detailed task results to the log file - - If False, large outputs will be omitted from the log to reduce file size - :return: results from executing `func` or the exception of the failing function - """ - # Print the parameters. - _LOG.info(hprint.frame("Workload")) - # It's too verbose to print all the workload. - # print(workload_to_string(workload, use_pprint=False)) - _LOG.info( - hprint.to_str( - "dry_run num_threads incremental num_attempts abort_on_error" - ) - ) - # Parse the workload. - validate_workload(workload) - workload_func, func_name, tasks = workload - _LOG.info("Saving log info in '%s'", log_file) - _LOG.info( - "Number of executing threads=%s (%s)", - get_num_executing_threads(num_threads), - num_threads, - ) - _LOG.info("Number of tasks=%s", len(tasks)) - # - if dry_run: - file_name = "./tmp.parallel_execute.workload.txt" - workload_as_str = workload_to_string(workload, use_pprint=False) - hio.to_file(file_name, workload_as_str) - _LOG.warning("Workload saved at '%s'", file_name) - _LOG.warning("Exiting without executing workload, as per user request") - return None - # Run. - task_len = len(tasks) - tqdm_out = htqdm.TqdmToLogger(_LOG, level=logging.INFO) - tqdm_iter = tqdm( - enumerate(tasks), - total=task_len, - file=tqdm_out, - desc=f"num_threads={num_threads} backend={backend}", - ) - if backend == "threading": - # Enable wrapping a function into a process for threading backend - # to force memory de-allocation. - # TODO(Grisha): unclear if there are cases when we want to use - # `False` with `threading` backends, consider exposing to the - # interface. - # TODO(Grisha): should we enable the switch for `num_threads="serial"`? will it work? - processify_func = True - else: - processify_func = False - if num_threads == "serial": - # Execute the tasks serially. - res = [] - for task_idx, task in tqdm_iter: - _LOG.debug("\n%s", hprint.frame(f"Task {task_idx + 1} / {task_len}")) - # Execute. - res_tmp = _parallel_execute_decorator( - task_idx, - task_len, - incremental, - abort_on_error, - num_attempts, - log_file, - # - workload_func, - func_name, - processify_func, - task, - enable_file_logging, - verbose_log, - ) - res.append(res_tmp) - else: - # Execute the tasks in parallel. - num_threads = int(num_threads) - # -1 is interpreted by joblib like for all cores. - _LOG.info("Using %d threads, backend='%s'", num_threads, backend) - if backend in ("loky", "threading", "multiprocessing"): - # from joblib.externals.loky import set_loky_pickler - # set_loky_pickler('cloudpickle') - # Removed `verbose` param which causes issues in HelpersTask715. - res = joblib.Parallel(n_jobs=num_threads, backend=backend)( - joblib.delayed(_parallel_execute_decorator)( - task_idx, - task_len, - incremental, - abort_on_error, - num_attempts, - log_file, - # - workload_func, - func_name, - processify_func, - task, - enable_file_logging, - verbose_log, - ) - # We can't use `tqdm_iter` since this only shows the submission of - # the jobs but not their completion. - for task_idx, task in enumerate(tasks) - ) - elif backend in ("asyncio_threading", "asyncio_multiprocessing"): - if backend == "asyncio_threading": - executor = concurrent.futures.ThreadPoolExecutor - elif backend == "asyncio_multiprocessing": - executor = concurrent.futures.ProcessPoolExecutor - else: - raise ValueError(f"Invalid backend='{backend}'") - func = lambda args_: _parallel_execute_decorator( - args_[0], - task_len, - incremental, - abort_on_error, - num_attempts, - log_file, - # - workload_func, - func_name, - processify_func, - args_[1], - enable_file_logging, - verbose_log, - ) - args = list(enumerate(tasks)) - use_progress_bar = True - if not use_progress_bar: - # Implementation without progress bar. - with executor(max_workers=num_threads) as executor_: - res = list(executor_.map(func, args)) - else: - # Implementation with progress bar. - res = [] - with tqdm_iter as pbar: - with executor(max_workers=num_threads) as executor_: - futures = { - executor_.submit(func, arg): arg for arg in args - } - _LOG.debug("done submitting") - for future in concurrent.futures.as_completed(futures): - res_tmp = future.result() - res.append(res_tmp) - pbar.update(1) - else: - raise ValueError(f"Invalid backend='{backend}'") - _LOG.info("Saved log info in '%s'", log_file) - return res - - -# ############################################################################# -# joblib storage backend for S3. -# ############################################################################# - -# This allows to store a joblib cache on S3. - -# Adapted from https://github.com/aabadie/joblib-s3 - - -# ############################################################################# -# _S3FSStoreBackend -# ############################################################################# - - -class _S3FSStoreBackend(StoreBackendBase, StoreBackendMixin): - """ - A StoreBackend for S3 cloud storage file system. - """ - - def __init__(self) -> None: - super().__init__() - self._objs: List[Any] = [] - - def _flush(self) -> None: - _ = self - - def clear_location(self, location: str) -> None: - """ - Check if object exists in store. - """ - if self.storage.exists(location): - self._flush() - self.storage.rm(location, recursive=True) - - def _mkdirp(self, directory: str) -> None: - """ - Create recursively a directory on the S3 store. - """ - # Remove root cachedir from input directory to create as it should - # have already been created in the configure function. - if directory.startswith(self.location): - directory = directory.replace(self.location + "/", "") - current_path = self.location - for sub_dir in directory.split("/"): - current_path = os.path.join(current_path, sub_dir) - self.storage.mkdir(current_path) - - def create_location(self, location: str) -> None: - """ - Create object location on store. - """ - self._mkdirp(location) - - def get_items(self) -> List[Any]: - """ - Return the whole list of items available in cache. - """ - _ = self - return [] - - def configure( - self, - location: str, - backend_options: Dict[str, Any], - verbose: int = 0, - ) -> None: - """ - Configure the store backend. - """ - options = backend_options - hdbg.dassert_in("s3fs", options) - self.storage = options["s3fs"] - hdbg.dassert_in("bucket", options) - bucket = options["bucket"] - # Ensure the given bucket exists. - root_bucket = os.path.join("s3://", bucket) - if not self.storage.exists(root_bucket): - self.storage.mkdir(root_bucket) - if location.startswith("/"): - location.replace("/", "") - self.location = os.path.join(root_bucket, location) - if not self.storage.exists(self.location): - self.storage.mkdir(self.location) - # Computation results can be stored compressed for faster I/O. - self.compress = backend_options["compress"] - # Memory map mode is not supported. - self.mmap_mode = None - # TODO(gp): No need to flush for now. - # for fd in self._objs: - # fd.flush(force=True) - - def _open_item(self, fd: Any, mode: str) -> Any: - self._objs.append(fd) - return self.storage.open(fd, mode) - - def _item_exists(self, path: str) -> bool: - self._flush() - ret: bool = self.storage.exists(path) - return ret - - def _move_item(self, src: str, dst: str) -> None: - self.storage.mv(src, dst) - - -_REGISTER_S3FS_STORE = False - - -def register_s3fs_store_backend() -> None: - """ - Register the S3 store backend for joblib memory caching. - """ - global _REGISTER_S3FS_STORE - if not _REGISTER_S3FS_STORE: - joblib.register_store_backend("s3", _S3FSStoreBackend) - _REGISTER_S3FS_STORE = True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py deleted file mode 100644 index 5b8aa72aa..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py +++ /dev/null @@ -1,383 +0,0 @@ -""" -Import as: - -import helpers.hjupyter as hjupyte -""" - -import logging -import os -from typing import Dict, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hsystem as hsystem -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - - -def run_notebook( - file_name: str, - scratch_dir: str, - *, - pre_cmd: str = "", -) -> None: - """ - Run jupyter notebook. - - Assert if the notebook doesn't complete successfully. - - :param file_name: path to the notebook to run. If this is a .py - file, convert to .ipynb first - :param scratch_dir: temporary dir storing the output - :param pre_cmd: - """ - file_name = os.path.abspath(file_name) - hdbg.dassert_path_exists(file_name) - hio.create_dir(scratch_dir, incremental=True) - # Build command line. - cmd = [] - if pre_cmd: - cmd.append(f"{pre_cmd} &&") - # Convert .py file into .ipynb if needed. - root, ext = os.path.splitext(file_name) - if ext == ".ipynb": - notebook_name = file_name - elif ext == ".py": - cmd.append(f"jupytext --update --to notebook {file_name};") - notebook_name = f"{root}.ipynb" - else: - raise ValueError(f"Unsupported file format for file_name='{file_name}'") - # Execute notebook. - cmd.append(f"cd {scratch_dir} &&") - cmd.append(f"jupyter nbconvert {notebook_name}") - cmd.append("--execute") - cmd.append("--to html") - cmd.append("--ExecutePreprocessor.kernel_name=python") - # No time-out. - cmd.append("--ExecutePreprocessor.timeout=-1") - # Execute. - cmd_as_str = " ".join(cmd) - hsystem.system(cmd_as_str, abort_on_error=True, suppress_output=False) - - -def run_notebook_cells( - notebook_path: str, - dst_notebook_path: str, - *, - num_cells: Optional[int] = None, - kernel_name: str = "python3", - timeout: int = 30, -) -> None: - """ - Execute the first N cells of a notebook and save the result. - - :param notebook_path: path to the source notebook to execute - :param dst_notebook_path: path where the executed notebook will be saved - :param num_cells: number of cells to execute from the beginning; if None, - execute all cells - :param kernel_name: name of the Jupyter kernel to use - :param timeout: execution timeout in seconds per cell - """ - import nbformat - from nbconvert.preprocessors import ExecutePreprocessor - - hdbg.dassert_path_exists(notebook_path) - # Read the notebook. - _LOG.info("Reading notebook '%s'", notebook_path) - with open(notebook_path) as f: - nb = nbformat.read(f, as_version=4) - # Truncate to first N cells if requested. - total_cells = len(nb.cells) - if num_cells is not None: - hdbg.dassert_lte(1, num_cells, "num_cells must be >= 1") - hdbg.dassert_lte( - num_cells, - total_cells, - "num_cells=%d exceeds total cells=%d in notebook", - num_cells, - total_cells, - ) - _LOG.info("Executing first %d of %d cells", num_cells, total_cells) - nb.cells = nb.cells[:num_cells] - else: - _LOG.info("Executing all %d cells", total_cells) - # Execute the cells. - ep = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name) - ep.preprocess(nb) - # Save the executed notebook. - _LOG.info("Saving executed notebook to '%s'", dst_notebook_path) - with open(dst_notebook_path, "w") as f: - nbformat.write(nb, f) - - -def build_run_notebook_cmd( - config_builder: str, - dst_dir: str, - notebook_path: str, - *, - extra_opts: str = "", -) -> str: - """ - Construct a command string to run dev_scripts/notebooks/run_notebook.py - with specified configurations. - - :param config_builder: the configuration builder to use for the - notebook execution - :param dst_dir: the destination directory where the notebook results - will be saved - :param notebook_path: the path to the notebook that should be - executed - :param extra_opts: options for "run_notebook.py", e.g., "-- - publish_notebook" - """ - # Importing inside func to avoid error while creating dockerized executable. - # TODO(Shaunak): debug why. - import helpers.hgit as hgit - - # TODO(Vlad): Factor out common code with the - # `helpers.lib_tasks_gh.publish_buildmeister_dashboard_to_s3()`. - run_notebook_script_path = hgit.find_file_in_git_tree("run_notebook.py") - cmd_run_txt = [ - run_notebook_script_path, - f"--notebook {notebook_path}", - f"--config_builder '{config_builder}'", - f"--dst_dir '{dst_dir}'", - f"{extra_opts}", - ] - cmd_run_txt = " ".join(cmd_run_txt) - return cmd_run_txt - - -# ############################################################################# - - -def find_paired_files( - directory: str, - *, - pattern: str = "*.py", - exclude_pattern: str = None, -) -> tuple: - """ - Find Python files and paired Jupyter notebooks in a directory. - - :param directory: path to the directory to search - :param pattern: glob pattern for Python files (default: "*.py") - :param exclude_pattern: suffix pattern to exclude (e.g., "_utils.py") - :return: tuple of (python_files, paired_notebooks, unpaired_notebooks) - - python_files: list of .py files matching pattern - - paired_notebooks: list of .ipynb files with corresponding .py - - unpaired_notebooks: list of .ipynb files without corresponding .py - """ - hdbg.dassert_path_exists(directory) - # Find Python files matching pattern. - py_files = hio.listdir( - directory, - pattern, - only_files=True, - use_relative_paths=False, - maxdepth=1, - ) - # Exclude files matching exclude_pattern. - if exclude_pattern: - py_files = [f for f in py_files if not f.endswith(exclude_pattern)] - py_files = sorted(py_files) - # Find notebook files. - nb_pattern = pattern.replace(".py", ".ipynb") - nb_files = hio.listdir( - directory, - nb_pattern, - only_files=True, - use_relative_paths=False, - maxdepth=1, - ) - nb_files = sorted(nb_files) - # Build set of base names from Python files. - py_basenames = set() - for py_file in py_files: - basename = os.path.basename(py_file) - basename = os.path.splitext(basename)[0] - py_basenames.add(basename) - # Check which notebooks have corresponding .py files. - paired_notebooks = [] - unpaired_notebooks = [] - for nb_file in nb_files: - basename = os.path.basename(nb_file) - basename = os.path.splitext(basename)[0] - if basename in py_basenames: - paired_notebooks.append(nb_file) - else: - unpaired_notebooks.append(nb_file) - return py_files, paired_notebooks, unpaired_notebooks - - -def execute_file_with_docker( - file_path: str, - *, - working_dir: str, - is_notebook: bool, -) -> Tuple[bool, str, float]: - """ - Execute a Python file or notebook using docker_cmd. - - :param file_path: path to the file to execute - :param working_dir: directory to cd into before execution - :param is_notebook: True if file is a notebook, False if Python script - :return: tuple of (success, error_message, elapsed_time) - """ - timer = htimer.Timer() - success = False - error_msg = "" - try: - if is_notebook: - # For notebooks, use hjupyter.run_notebook via docker_cmd. - scratch_dir = os.path.join(working_dir, "tmp.notebook_scratch") - # Build Python command to run notebook. - cmd = ( - f'python -c "' - f"import helpers.hjupyter as hjupyte; " - f"import helpers.hio as hio; " - f"hio.create_dir('{scratch_dir}', incremental=True); " - f"hjupyte.run_notebook('{file_path}', '{scratch_dir}')\"" - ) - else: - # For Python scripts, execute directly. - cmd = f"python {file_path}" - # Build invoke docker_cmd command. - docker_cmd = f'invoke docker_cmd --cmd "{cmd}"' - # Execute in the working directory. - hsystem.system( - docker_cmd, - abort_on_error=False, - suppress_output=False, - ) - success = True - except Exception as e: - error_msg = str(e) - elapsed = timer.get_elapsed() - return success, error_msg, elapsed - - -def execute_file_directly( - file_path: str, - *, - working_dir: str, - is_notebook: bool, -) -> Tuple[bool, str, float]: - """ - Execute a Python file or notebook directly (inside container). - - :param file_path: path to the file to execute - :param working_dir: directory to cd into before execution - :param is_notebook: True if file is a notebook, False if Python script - :return: tuple of (success, error_message, elapsed_time) - """ - timer = htimer.Timer() - success = False - error_msg = "" - try: - if is_notebook: - # For notebooks, use hjupyter.run_notebook. - scratch_dir = os.path.join(working_dir, "tmp.notebook_scratch") - hio.create_dir(scratch_dir, incremental=True) - run_notebook( - file_path, - scratch_dir, - pre_cmd=f"cd {working_dir}", - ) - else: - # For Python scripts, execute directly. - cmd = f"cd {working_dir} && python {file_path}" - hsystem.system( - cmd, - abort_on_error=True, - suppress_output=False, - ) - success = True - except Exception as e: - error_msg = str(e) - elapsed = timer.get_elapsed() - return success, error_msg, elapsed - - -def report_execution_results( - py_results: Dict[str, Tuple[bool, str, float]], - nb_results: Dict[str, Tuple[bool, str, float]], -) -> Tuple[int, str]: - """ - Report execution results and return failure information. - - :param py_results: results from Python file execution - :param nb_results: results from notebook execution - :return: tuple of (total_failures, error_message) - """ - # Collect failures. - py_failures = [f for f, (success, _, _) in py_results.items() if not success] - nb_failures = [f for f, (success, _, _) in nb_results.items() if not success] - # Calculate statistics. - py_total = len(py_results) - py_success = py_total - len(py_failures) - nb_total = len(nb_results) - nb_success = nb_total - len(nb_failures) - total_files = py_total + nb_total - total_success = py_success + nb_success - total_failures = len(py_failures) + len(nb_failures) - # Calculate timing statistics. - py_times = [elapsed for _, _, elapsed in py_results.values()] - nb_times = [elapsed for _, _, elapsed in nb_results.values()] - py_total_time = sum(py_times) if py_times else 0.0 - nb_total_time = sum(nb_times) if nb_times else 0.0 - total_time = py_total_time + nb_total_time - # Report summary. - _LOG.info("=" * 80) - _LOG.info("EXECUTION SUMMARY") - _LOG.info("=" * 80) - _LOG.info( - "Python scripts: %d total, %d success, %d failed", - py_total, - py_success, - len(py_failures), - ) - if py_total > 0: - _LOG.info(" Total time: %.2f seconds", py_total_time) - _LOG.info(" Average time: %.2f seconds", py_total_time / py_total) - _LOG.info( - "Notebooks: %d total, %d success, %d failed", - nb_total, - nb_success, - len(nb_failures), - ) - if nb_total > 0: - _LOG.info(" Total time: %.2f seconds", nb_total_time) - _LOG.info(" Average time: %.2f seconds", nb_total_time / nb_total) - _LOG.info("-" * 80) - _LOG.info( - "TOTAL: %d files, %d success, %d failed", - total_files, - total_success, - total_failures, - ) - _LOG.info("Total execution time: %.2f seconds", total_time) - # Build error message if failures exist. - error_message = "" - if total_failures > 0: - _LOG.error("=" * 80) - _LOG.error("FAILURES DETECTED") - _LOG.error("=" * 80) - if py_failures: - _LOG.error("Failed Python scripts:") - for file_path in py_failures: - basename = os.path.basename(file_path) - _, error, _ = py_results[file_path] - _LOG.error(" - %s: %s", basename, error) - if nb_failures: - _LOG.error("Failed notebooks:") - for file_path in nb_failures: - basename = os.path.basename(file_path) - _, error, _ = nb_results[file_path] - _LOG.error(" - %s: %s", basename, error) - _LOG.error("=" * 80) - error_message = ( - f"{total_failures} file(s) failed to execute. See log for details." - ) - return total_failures, error_message diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py deleted file mode 100644 index 5e0ec6214..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py +++ /dev/null @@ -1,334 +0,0 @@ -""" -Import as: - -import helpers.hlatex as hlatex -""" - -import logging -import re -from typing import List, Optional - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hmarkdown_headers as hmarhead -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Consider using `pypandoc` instead of calling `pandoc` directly. -# https://boisgera.github.io/pandoc - - -# TODO(gp): Add a switch to keep the tmp files or delete them. -def convert_pandoc_md_to_latex(txt: str) -> str: - """ - Run pandoc to convert a markdown file to a latex file. - """ - hdbg.dassert_isinstance(txt, str) - # Save to tmp file. - in_file_name = "./tmp.run_pandoc_in.md" - hio.to_file(in_file_name, txt) - # Run Pandoc. - out_file_name = "./tmp.run_pandoc_out.tex" - cmd = ( - f"pandoc {in_file_name} -o {out_file_name} --read=markdown --write=latex" - ) - container_type = "pandoc_only" - - # To minimze the dependency. - import dev_scripts_helpers.dockerize.lib_pandoc as dshdlipa - - dshdlipa.run_dockerized_pandoc(cmd, container_type) - # Read tmp file. - res = hio.from_file(out_file_name) - # Remove lines that contain \tightlist. - res = "\n".join( - [line for line in res.splitlines() if "\\tightlist" not in line] - ) - return res - - -def markdown_list_to_latex(markdown: str) -> str: - """ - Convert a Markdown list to LaTeX format. - - :param markdown: The Markdown text to convert - :return: The converted LaTeX text - """ - hdbg.dassert_isinstance(markdown, str) - markdown = hprint.dedent(markdown) - # Remove the first line if it's a title. - markdown_lines = markdown.split("\n") - m = re.match(r"^(\*+ )(.*)", markdown_lines[0]) - if m: - title = m.group(2) - markdown_lines = markdown_lines[1:] - else: - title = "" - markdown = "\n".join(markdown_lines) - # Convert. - txt = convert_pandoc_md_to_latex(markdown) - # Remove `\tightlist` and empty lines. - lines = txt.splitlines() - lines = [line for line in lines if "\\tightlist" not in line] - lines = [line for line in lines if line.strip() != ""] - txt = "\n".join(lines) - # Add the title frame. - if title: - txt = f"\\begin{{frame}}{{{title}}}" + "\n" + txt + "\n" + "\\end{frame}" - return txt - - -def remove_latex_formatting(latex_string: str) -> str: - r""" - Remove LaTeX formatting such as \textcolor{color}{content} and retains only - the content. - """ - cleaned_string = re.sub( - r"\\textcolor\{[^}]*\}\{([^}]*)\}", r"\1", latex_string - ) - return cleaned_string - - -def format_latex(txt: str) -> str: - """ - Format LaTeX text using `prettier`. - - :param txt: input LaTeX text to format - :return: formatted LaTeX text - """ - file_type = "tex" - # To minimize the dependency. - import dev_scripts_helpers.dockerize.lib_prettier as dshdlipr - - txt = dshdlipr.prettier_on_str(txt, file_type) - return txt - - -# ############################################################################# -# Frame Latex sections -# ############################################################################# - - -def _is_latex_line_separator(line: str, *, min_repeats: int = 5) -> bool: - """ - Check if the given line is a LaTeX comment separator. - - This function determines if a line consists of a comment character - `%` followed by repeated characters (`#`, `=`, `-`) that would - indicate a section separator. - - :param line: current line of text being processed - :param min_repeats: minimum number of times the characters have to - be repeated to be considered a separator - :return: whether the line is a separator - """ - separator_pattern = rf""" - ^\s*%\s* # % - ([#=\-])\1{{{min_repeats - 1},}} # Capture a character, then repeat it - # (`min_repeats` - 1) times. - \s*$ # Match only whitespace characters - # until the end of the line. - """ - res = bool(re.match(separator_pattern, line, re.VERBOSE)) - return res - - -def frame_sections(lines: List[str]) -> List[str]: - r""" - Add line separators before LaTeX section commands. - - This function adds comment separators before \section, \subsection, and - \subsubsection commands in LaTeX files. The separators are: - ``` - % #####... - \section - - % =====... - \subsection: - - % -----... - \subsubsection - ``` - - If a separator comment already exists immediately before the section command, - no separator is added. - - :param lines: list of strings representing the LaTeX file content - :return: list of strings with separators added before section commands - """ - hdbg.dassert_isinstance(lines, list) - # Loop 1: Remove existing latex separators. - txt_tmp: List[str] = [] - for line in lines: - if not _is_latex_line_separator(line): - txt_tmp.append(line) - # Loop 2: Remove consecutive empty lines, leaving only one. - txt_tmp2: List[str] = [] - prev_was_empty = False - for line in txt_tmp: - is_empty = line.strip() == "" - if is_empty: - if not prev_was_empty: - txt_tmp2.append(line) - prev_was_empty = True - else: - txt_tmp2.append(line) - prev_was_empty = False - # Loop 3: Add correct LaTeX separator based on section commands. - txt_new: List[str] = [] - # Define the section patterns and their corresponding separators. - # Total line length is 80 characters, "% " is 2 characters, so 78 separator chars. - prefix = "% " - section_patterns = [ - (r"^\\section\{", prefix + "#" * 78), - (r"^\\subsection\{", prefix + "=" * 78), - (r"^\\subsubsection\{", prefix + "-" * 78), - ] - for i, line in enumerate(txt_tmp2): - _LOG.debug("line=%d:%s", i, line) - txt_processed = False - # Check if the line matches any section command. - for pattern, separator in section_patterns: - m = re.match(pattern, line.strip()) - if m: - _LOG.debug(" -> Found section command") - txt_new.append(separator) - _LOG.debug(" -> Added separator: %s", separator) - txt_new.append(line) - txt_processed = True - break - if not txt_processed: - txt_new.append(line) - hdbg.dassert_isinstance(txt_new, list) - return txt_new - - -# ############################################################################# -# LaTeX Header Extraction -# ############################################################################# - - -def _is_latex_comment(line: str) -> bool: - r""" - Check if a line is a LaTeX comment. - - A LaTeX comment line starts with the `%` character. This function - handles the edge case where `%` is escaped (e.g., `\%`), which - should not be treated as a comment. - - :param line: line of text to check - :return: True if the line is a comment, False otherwise - """ - hdbg.dassert_isinstance(line, str) - # Strip leading whitespace to check the first non-whitespace character. - stripped_line = line.lstrip() - # Check if line starts with %. - if not stripped_line.startswith("%"): - return False - # Check if the % is escaped by looking at the character before it in the - # original line. - # Find the position of % in the original line. - percent_pos = line.find("%") - # If there's a character before %, check if it's a backslash. - if percent_pos > 0 and line[percent_pos - 1] == "\\": - # Check if the backslash itself is escaped. - if percent_pos > 1 and line[percent_pos - 2] == "\\": - # Double backslash before %, so % is not escaped. - return True - # Single backslash before %, so % is escaped. - return False - # % is at the beginning or has no backslash before it. - return True - - -def _extract_latex_section( - line: str, line_number: int -) -> Optional[hmarhead.HeaderInfo]: - r""" - Parse a LaTeX section command and extract section information. - - This function identifies LaTeX section commands (\section{}, \subsection{}, - \subsubsection{}) and extracts the section title. It handles several edge - cases including: - - Regex parsing of `\section[Short]{Long Title}` (extracts "Long Title") - - Handles nested braces within titles (e.g., `\section{Intro to \textbf{ML}}`) - - Does not handle multi-line section titles - - :param line: line of text to parse - :param line_number: line number in the original file - :return: HeaderInfo object if section found, None otherwise - """ - hdbg.dassert_isinstance(line, str) - hdbg.dassert_isinstance(line_number, int) - # Define section patterns with their corresponding levels. - # Pattern supports optional [short title] before {long title}. - regex = r"(?:\[.*?\])?\{(.*)\}" - section_patterns = [ - (r"\\section" + regex, 1), - (r"\\subsection" + regex, 2), - (r"\\subsubsection" + regex, 3), - ] - line_stripped = line.strip() - # Try to match each section pattern. - for pattern, level in section_patterns: - # Check if line starts with the section command. - match = re.match(pattern, line_stripped) - if match: - # Extract the title from the first capture group. - title = match.group(1) - # Skip sections with empty titles. - if not title: - return None - # Return HeaderInfo with level, title, and line number. - return hmarhead.HeaderInfo(level, title, line_number) - # No section command found. - return None - - -def extract_headers_from_latex( - lines: List[str], max_level: int, *, sanity_check: bool = True -) -> hmarhead.HeaderList: - r""" - Extract headers from a LaTeX file and return a HeaderList. - - This function processes a LaTeX file line by line, identifies section - commands (\section, \subsection, \subsubsection), and creates a list - of HeaderInfo objects. It skips commented-out lines (lines starting - with %) and only includes headers up to the specified maximum level. - - :param lines: content of the input LaTeX file as list of strings - :param max_level: maximum header levels to parse (e.g., '3' parses - \section, \subsection, and \subsubsection, but not deeper levels) - :param sanity_check: whether to check that the header list is valid - using the same validation as Markdown headers - :return: list of HeaderInfo objects, each containing (level, title, - line_number), e.g.: - ``` - [ - HeaderInfo(1, "Introduction", 5), - HeaderInfo(2, "Background", 10), - ... - ] - ``` - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_lte(1, max_level) - header_list: hmarhead.HeaderList = [] - # Process the input file to extract headers. - for line_number, line in enumerate(lines, start=1): - # Skip LaTeX comment lines. - if _is_latex_comment(line): - continue - # Check if this line contains a section command. - header_info = _extract_latex_section(line, line_number) - if header_info and header_info.level <= max_level: - # Add HeaderInfo to list. - header_list.append(header_info) - # Check the header list. - if sanity_check: - hmarhead.sanity_check_header_list(header_list) - else: - _LOG.debug("Skipping sanity check") - hdbg.dassert_isinstance(header_list, list) - return header_list diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py deleted file mode 100644 index 8f857d385..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Linting utilities for text and code files. - -Import as: - -import helpers.hlint as hlint -""" - -import logging - -import helpers.hgit as hgit -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def lint_file(file_path: str) -> None: - """ - Run lint_txt.py on the file to ensure proper formatting. - - :param file_path: path to the file to lint - """ - _LOG.info("Linting file: %s", file_path) - lint_script = hgit.find_file_in_git_tree("lint_txt.py", super_module=True) - # Run lint_txt.py. - cmd = f"{lint_script} -i {file_path} -v CRITICAL" - _LOG.debug("Running command: %s", cmd) - hsystem.system(cmd, suppress_output=True) - _LOG.info("File linted successfully: %s", file_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py deleted file mode 100644 index c13ed1255..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Import as: - -import helpers.hlist as hlist -""" - -from typing import Any, List, Optional, Set - -import helpers.hdbg as hdbg - - -# TODO(gp): -> return_single_element, return_single_element_or_assert? -def assert_single_element_and_return(list_: List[Any]) -> Any: - """ - Assert that the passed list has a single element and return that single - element. - - :return: return the unique element in the list - """ - hdbg.dassert_isinstance(list_, list) - hdbg.dassert_eq(len(list_), 1, "List has %d elements!", len(list_)) - return list_[0] - - -def find_duplicates(list_: List[Any]) -> List[Any]: - """ - Find the elements duplicated in a list. - """ - hdbg.dassert_isinstance(list_, list) - # Count the occurrences of each element of the seq. - set_l = set(list_) - v_to_num = [(v, list_.count(v)) for v in set_l] - # Build list of elems with duplicates. - res = [v for v, n in v_to_num if n > 1] - return res - - -def remove_duplicates(list_: List[Any]) -> List[Any]: - """ - Remove the elements duplicated in a list, without changing the order. - """ - hdbg.dassert_isinstance(list_, list) - list_out = [] - set_l: Set[Any] = set() - for v in list_: - if v not in set_l: - set_l.add(v) - list_out.append(v) - return list_out - - -def extract( - list_: List[Any], start_idx: Optional[int], end_idx: Optional[int] -) -> List[Any]: - """ - Filter the list using [start_idx, end_idx). - """ - if start_idx is not None: - hdbg.dassert_lte(0, start_idx) - else: - start_idx = 0 - if end_idx is not None: - hdbg.dassert_lte(end_idx, len(list_)) - else: - end_idx = len(list_) - if list_: - hdbg.dassert_lt(start_idx, end_idx) - list_ = list_[start_idx:end_idx] - return list_ - - -def chunk(list_: List[Any], n: int) -> List[Any]: - hdbg.dassert_lte(1, n) - hdbg.dassert_lte(n, len(list_)) - k, m = divmod(len(list_), n) - return [ - list_[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n) - ] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py deleted file mode 100644 index f821d4f76..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py +++ /dev/null @@ -1,680 +0,0 @@ -""" -Import as: - -import helpers.hllm as hllm -""" - -import functools -import logging -import os -import re -from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union - -import openai -import tqdm -from pydantic import BaseModel - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg -import helpers.hllm_cost as hllmcost -import helpers.hprint as hprint -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - - -# Create a generic type variable. -T = TypeVar("T", bound=BaseModel) - -# ############################################################################# -# Update LLM cache -# ############################################################################# - - -_UPDATE_LLM_CACHE = False - - -def set_update_llm_cache(update: bool) -> None: - """ - Set whether to update the LLM cache. - - :param update: True to update the cache, False otherwise - """ - global _UPDATE_LLM_CACHE - _UPDATE_LLM_CACHE = update - - -def get_update_llm_cache() -> bool: - """ - Get whether to update the LLM cache. - - :return: True if the cache should be updated, False otherwise - """ - return _UPDATE_LLM_CACHE - - -# ############################################################################# -# Utility Functions -# ############################################################################# - - -def _get_llm_provider_and_model(model: str) -> Tuple[str, str]: - """ - Get the provider and model names from a model string. - - The model can be specified as: - - "gpt-4o-mini" - - "openai/gpt-4o-mini" - - "deepseek/deepseek-r1-0528-qwen3-8b:free/" - - :param model: model to use for the completion - :return: tuple of provider name and model name - """ - if "/" in model: - if model.startswith("openai/"): - provider_name = "openai" - model = model.split("/")[1] - else: - provider_name = "openrouter" - else: - provider_name = "openai" - hdbg.dassert_in( - provider_name, - ("openai", "openrouter"), - "Unknown provider: %s", - provider_name, - ) - return provider_name, model - - -def response_to_txt(response: Any) -> str: - """ - Convert an OpenAI API response to a text string. - - :param response: API response object - :return: extracted text contents as a string - """ - if isinstance(response, openai.types.chat.chat_completion.ChatCompletion): - ret = response.choices[0].message.content - elif isinstance(response, openai.types.responses.Response): - ret = response.output_text - # elif isinstance(response, openai.pagination.SyncCursorPage): - # ret = response.data[0].content[0].text.value - elif isinstance(response, openai.types.beta.threads.message.Message): - ret = response.content[0].text.value - elif isinstance(response, str): - ret = response - elif isinstance(response, dict): - # Handle Chat Completions dict form. - if "choices" in response and "message" in response["choices"][0]: - ret = response["choices"][0]["message"]["content"] - # Handle Responses API dict form. - elif "output_text" in response: - ret = response["output_text"] - else: - raise ValueError( - f"Unknown dict structure in response: {response.keys()}" - ) - else: - raise ValueError(f"Unknown response type: {type(response)}") - hdbg.dassert_isinstance(ret, str) - return ret - - -def build_chat_completion_messages( - system_prompt: str, - user_prompt: str, - *, - images_as_base64: Optional[Tuple[str, ...]] = None, -) -> List[Dict[str, Any]]: - """ - Construct the standard messages payload for the Chat Completions API. - - :param system_prompt: system prompt - :param user_prompt: user prompt - :param images_as_base64: base64-encoded images - :return: messages in the format expected by the Chat Completions API - """ - hdbg.dassert_isinstance(system_prompt, str) - hdbg.dassert_isinstance(user_prompt, str) - ret = [{"role": "system", "content": system_prompt}] - # Build user message content. - if images_as_base64: - # Multi-modal message with text and images - user_content = [{"type": "text", "text": user_prompt}] - for image_b64 in images_as_base64: - user_content.append( - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}, - } - ) - ret.append({"role": "user", "content": user_content}) - else: - # Text-only message. - ret.append({"role": "user", "content": user_prompt}) - return ret - - -def build_responses_input( - user_prompt: str, - *, - images_as_base64: Optional[Tuple[str, ...]] = None, -) -> List[Dict[str, Any]]: - """ - Construct the user input payload for the Responses API. - - :param user_prompt: user prompt - :param images_as_base64: base64-encoded images - :return: input in the format expected by the Responses API - """ - hdbg.dassert_isinstance(user_prompt, str) - # Build user message content. - content_blocks = [{"type": "input_text", "text": user_prompt}] - if images_as_base64: - # Add image input. - for image_b64 in images_as_base64: - content_blocks.append( - { - "type": "input_image", - "image_url": f"data:image/jpeg;base64,{image_b64}", - } - ) - responses_input = [ - { - "role": "user", - "content": content_blocks, - } - ] - return responses_input - - -# ############################################################################# - - -@hcacsimp.simple_cache( - write_through=True, exclude_keys=["client", "cache_mode", "cost_tracker"] -) -def _call_api_sync( - # pylint: disable=unused-argument - # This is needed to support caching. - cache_mode: str, - client: openai.OpenAI, - user_prompt: str, - system_prompt: str, - temperature: float, - model: str, - *, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional[hllmcost.LLMCostTracker] = None, - use_responses_api: bool = False, - **create_kwargs, -) -> Dict[Any, Any]: - """ - Make a non-streaming API call. - - See `get_completion()` for other parameter descriptions. - - :param client: LLM client - :param cost_tracker: LLMCostTracker instance to track costs - :param use_responses_api: whether to use the Responses API instead - of Chat Completions - :return: OpenAI API result as a dictionary - """ - if not use_responses_api: - messages = build_chat_completion_messages( - system_prompt, user_prompt, images_as_base64=images_as_base64 - ) - completion = client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - **create_kwargs, - ) - else: - user_input = build_responses_input( - user_prompt, images_as_base64=images_as_base64 - ) - completion = client.responses.create( - model=model, - instructions=system_prompt, - input=user_input, - temperature=temperature, - **create_kwargs, - ) - completion_obj = completion.to_dict() - if isinstance(completion, openai.types.responses.Response): - # Store the output of the Responses API. - completion_obj["output_text"] = completion.output_text - if cost_tracker is not None: - # Calculate the cost of the completion. - hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) - cost = cost_tracker.calculate_cost(completion, model) - cost_tracker.accumulate_cost(cost) - # Store the cost in the completion object. - completion_obj["cost"] = cost - return completion_obj - - -@hcacsimp.simple_cache( - cache_type="pickle", - write_through=True, - exclude_keys=["client", "cache_mode", "cost_tracker"], -) -def _call_structured_api_sync( - # pylint: disable=unused-argument - # This is needed to support caching. - cache_mode: str, - client: openai.OpenAI, - model: str, - user_prompt: str, - system_prompt: str, - temperature: float, - response_format: type[T], - *, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional[hllmcost.LLMCostTracker] = None, - print_cost: bool = False, - **create_kwargs, -) -> T: - """ - Make a non-streaming structured API call. - - See `get_structured_completion()` for parameter descriptions. - - :param client: LLM client - :param response_format: expected structured output format - :return: parsed output as the specified Pydantic model - """ - user_input = build_responses_input( - user_prompt, images_as_base64=images_as_base64 - ) - response = client.responses.parse( - model=model, - instructions=system_prompt, - input=user_input, - temperature=temperature, - text_format=response_format, - **create_kwargs, - ) - # Extract the parsed output. - parsed_output: T = response.output_parsed - # Track costs. - if cost_tracker is not None: - hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) - cost = cost_tracker.calculate_cost(response) - cost_tracker.accumulate_cost(cost) - if print_cost: - _LOG.info("cost=%.6f", cost) - return parsed_output - - -# ############################################################################# -# LLMClient -# ############################################################################# - - -class LLMClient: - """ - Class to handle LLM API client creation and requests. - """ - - def __init__( - self, - model: str, - ) -> None: - """ - Initialize the LLMClient. - - The model can be specified as: - - "gpt-4o-mini" - - "openai/gpt-4o-mini" - - "deepseek/deepseek-r1-0528-qwen3-8b:free/" - - :param model: model to use for the completion. - """ - hdbg.dassert_isinstance(model, str) - if model == "": - provider_name, model = self.get_default_model() - else: - provider_name, model = _get_llm_provider_and_model(model) - - self.provider_name = provider_name - self.model = model - self.client = None - - def get_default_model(self) -> Tuple[str, str]: - """ - Get the default provider and model for the client. - - :return: default provider and model used in the client - """ - provider_name = "openai" - model = self._get_default_model(provider_name) - return provider_name, model - - def create_client(self) -> None: - """ - Create an LLM client. - """ - if self.provider_name == "openai": - base_url = "https://api.openai.com/v1" - api_key = os.environ.get("OPENAI_API_KEY") - elif self.provider_name == "openrouter": - base_url = "https://openrouter.ai/api/v1" - api_key = os.environ.get("OPENROUTER_API_KEY") - else: - raise ValueError(f"Unknown provider: {self.provider_name}") - _LOG.debug(hprint.to_str("self.provider_name base_url")) - client = openai.OpenAI(base_url=base_url, api_key=api_key) - self.client = client - - def call_llm( - self, - cache_mode: str, - user_prompt: str, - system_prompt: str, - temperature: float, - *, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional[hllmcost.LLMCostTracker] = None, - use_responses_api: bool = False, - **create_kwargs, - ) -> Dict[Any, Any]: - """ - Call the LLM API. - - Check `_call_api_sync()` params for more details. - """ - return _call_api_sync( - cache_mode=cache_mode, - client=self.client, - user_prompt=user_prompt, - system_prompt=system_prompt, - temperature=temperature, - model=self.model, - images_as_base64=images_as_base64, - cost_tracker=cost_tracker, - use_responses_api=use_responses_api, - **create_kwargs, - ) - - def _get_default_model(self, provider_name: str) -> str: - """ - Get the default model for a provider. - - :return: default model for the provider - """ - if provider_name == "openai": - model = "gpt-4o" - elif provider_name == "openrouter": - model = "openai/gpt-4o" - else: - raise ValueError(f"Unknown provider: {self.provider_name}") - return model - - -# ############################################################################# - - -@functools.lru_cache(maxsize=1024) -def get_completion( - user_prompt: str, - *, - system_prompt: str = "", - model: str = "", - report_progress: bool = False, - print_cost: bool = False, - cache_mode: str = "DISABLE_CACHE", - temperature: float = 0.1, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional["hllmcost.LLMCostTracker"] = None, - use_responses_api: bool = False, - return_raw: bool = False, - **create_kwargs, -) -> Union[str, Dict[Any, Any]]: - """ - Generate a completion using OpenAI's API. - - :param user_prompt: user input message - :param system_prompt: system instruction - :param model: model to use or empty string to use the default model - :param report_progress: whether to report progress running the API - call - :param cache_mode: - - "DISABLE_CACHE": No caching - - "REFRESH_CACHE": Make API calls and save responses to cache - - "HIT_CACHE_OR_ABORT": Use cached responses, fail if not in cache - - "NORMAL": Use cached responses if available, otherwise make API call - :param cache_file: file to save/load completion cache - :param temperature: adjust an LLM's sampling diversity: lower values make it - more deterministic, while higher values foster creative variation. - 0 < temperature <= 2, 0.1 is default value in OpenAI models. - :param images_as_base64: base64-encoded images to include in the user message - :param cost_tracker: LLMCostTracker instance to track costs - :param use_responses_api: whether to use the Responses API instead of Chat - Completions - :param return_raw: whether to return the raw API response instead of - extracting the text content - :param create_kwargs: additional params for the API call - :return: API response or its text content - """ - hdbg.dassert_in( - cache_mode, - ("DISABLE_CACHE", "REFRESH_CACHE", "HIT_CACHE_OR_ABORT", "NORMAL"), - ) - update_llm_cache = get_update_llm_cache() - if update_llm_cache: - cache_mode = "REFRESH_CACHE" - # Initialize LLM client. - # Skip client creation for HIT_CACHE_OR_ABORT mode since: - # - If cache hits, we never use the client - # - If cache misses, we abort before calling the function - llm_client = LLMClient(model=model) - if cache_mode != "HIT_CACHE_OR_ABORT": - llm_client.create_client() - if use_responses_api and llm_client.provider_name != "openai": - raise ValueError( - "Responses API is only supported for the 'openai' provider." - ) - if report_progress and return_raw: - raise ValueError( - "Streaming mode is only supported while returning text content." - ) - if report_progress and cache_mode == "HIT_CACHE_OR_ABORT": - raise ValueError( - "Streaming mode (report_progress=True) is not supported with " - "cache_mode='HIT_CACHE_OR_ABORT'." - ) - # Construct messages in OpenAI API request format. - _LOG.info("LLM API call ... ") - memento = htimer.dtimer_start(logging.DEBUG, "LLM API call") - if not report_progress: - completion = llm_client.call_llm( - cache_mode=cache_mode, - user_prompt=user_prompt, - system_prompt=system_prompt, - temperature=temperature, - images_as_base64=images_as_base64, - cost_tracker=cost_tracker, - use_responses_api=use_responses_api, - **create_kwargs, - ) - if not use_responses_api: - txt_response = completion["choices"][0]["message"]["content"] - else: - txt_response = completion["output_text"] - else: - # TODO(gp): This is not working. It doesn't show the progress and it - # doesn't show the cost. - # Stream the output to show progress. - collected_messages = [] - if not use_responses_api: - # Stream Chat Completions API. - messages = build_chat_completion_messages( - system_prompt, user_prompt, images_as_base64=images_as_base64 - ) - completion = llm_client.client.chat.completions.create( - model=model, - messages=messages, - stream=True, - **create_kwargs, - ) - for chunk in tqdm.tqdm( - completion, desc="Generating completion", unit=" chunks" - ): - if chunk.choices[0].delta.content is not None: - collected_messages.append(chunk.choices[0].delta.content) - else: - # Stream Responses API. - user_input = build_responses_input( - user_prompt, images_as_base64=images_as_base64 - ) - completion = llm_client.client.responses.create( - model=model, - instructions=system_prompt, - input=user_input, - stream=True, - **create_kwargs, - ) - for event in tqdm.tqdm( - completion, desc="Generating response", unit=" events" - ): - if event.type == "response.output_text.delta": - collected_messages.append(event.delta.value) - txt_response = "".join(collected_messages) - # Report the time taken. - msg, _ = htimer.dtimer_stop(memento) - _LOG.info(msg) - if print_cost and "cost" in completion: - _LOG.info("cost=%.6f", completion["cost"]) - if return_raw: - # Return the full completion/response object. - return completion - return txt_response - - -@functools.lru_cache(maxsize=1024) -def get_structured_completion( - user_prompt: str, - response_format: type[T], - *, - system_prompt: str = "", - model: str = "", - cache_mode: str = "DISABLE_CACHE", - temperature: float = 0.1, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional[hllmcost.LLMCostTracker] = None, - print_cost: bool = False, - **create_kwargs, -) -> T: - """ - Generate a Structured Output using OpenAI's API. - - See `get_completion()` for other parameter descriptions. - - :param response_format: expected structured output format - :param cache_mode: - - "DISABLE_CACHE": No caching - - "REFRESH_CACHE": Make API calls and save responses to cache - - "HIT_CACHE_OR_ABORT": Use cached responses, fail if not in cache - - "NORMAL": Use cached responses if available, otherwise make API call - :return: output parsed into the specified format - """ - hdbg.dassert_in( - cache_mode, - ("DISABLE_CACHE", "REFRESH_CACHE", "HIT_CACHE_OR_ABORT", "NORMAL"), - ) - update_llm_cache = get_update_llm_cache() - if update_llm_cache: - cache_mode = "REFRESH_CACHE" - # Initialize LLM client. - # Skip client creation for HIT_CACHE_OR_ABORT mode since: - # - If cache hits, we never use the client - # - If cache misses, we abort before calling the function - if cache_mode == "HIT_CACHE_OR_ABORT": - # Don't create the client; pass None since it won't be used. - llm_client = LLMClient(model=model) - client = None - model_to_use = llm_client.model - else: - llm_client = LLMClient(model=model) - llm_client.create_client() - if llm_client.provider_name != "openai": - raise ValueError( - "`get_structured_completion()` currently only supports the " - "'openai' provider (Responses API + Structured Outputs). " - f"Got provider_name='{llm_client.provider_name}'." - ) - client = llm_client.client - model_to_use = llm_client.model - # Retrieve a structured response. - parsed_output: T = _call_structured_api_sync( - cache_mode=cache_mode, - client=client, - model=model_to_use, - user_prompt=user_prompt, - system_prompt=system_prompt, - temperature=temperature, - response_format=response_format, - images_as_base64=images_as_base64, - cost_tracker=cost_tracker, - print_cost=print_cost, - **create_kwargs, - ) - return parsed_output - - -# ############################################################################# - - -def apply_prompt_to_dataframe( - df, - prompt, - model: str, - input_col, - response_col, - *, - chunk_size=50, - allow_overwrite: bool = False, -): - _LOG.debug(hprint.to_str("prompt model input_col response_col chunk_size")) - hdbg.dassert_in(input_col, df.columns) - if not allow_overwrite: - hdbg.dassert_not_in(response_col, df.columns) - response_data = [] - for start in tqdm.tqdm( - range(0, len(df), chunk_size), desc="Processing chunks" - ): - end = start + chunk_size - chunk = df.iloc[start:end] - _LOG.debug("chunk.size=%s", chunk.shape[0]) - data = chunk[input_col].astype(str).tolist() - data = [f"{i + 1}: {val}" for i, val in enumerate(data)] - user = "\n".join(data) - _LOG.debug("user=\n%s", user) - try: - response = get_completion(user, system_prompt=prompt, model=model) - except Exception as e: - _LOG.error( - f"Error processing column {input} in chunk {start}-{end}: {e}" - ) - raise e - # processed_response = response.split("\n") - processed_response = [ - ln.rstrip() for ln in response.splitlines() if ln.strip() - ] - _LOG.debug(hprint.to_str("processed_response")) - _LOG.debug("len(processed_response)=%s", len(processed_response)) - hdbg.dassert_eq(len(processed_response), chunk.shape[0]) - for i in range(len(processed_response)): - m = re.match(r"\d+: (.*)\s*", processed_response[i]) - hdbg.dassert(m, f"Invalid response: {processed_response[i]}") - # The linter doesn't understand that `dassert` is equivalent to an - # `assert`. - assert m is not None - processed_response[i] = m.group(1).rstrip().lstrip() - _LOG.debug(hprint.to_str("processed_response")) - response_data.extend(processed_response) - df[response_col] = response_data - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py deleted file mode 100644 index bc42d6816..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py +++ /dev/null @@ -1,840 +0,0 @@ -""" -Import as: - -import helpers.hllm_cli as hllmcli -""" - -import json -import logging -import shlex -import subprocess -import sys -import importlib -import pprint -import time -from typing import Callable, Dict, List, Optional, Tuple, Union - -try: - import llm - import tokencost - - _LLM_AVAILABLE = True -except ImportError: - _LLM_AVAILABLE = False - -import pandas as pd -from tqdm import tqdm - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hmodule as hmodule -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -# _LOG.trace = lambda *args, **kwargs: None -_LOG.trace = _LOG.debug - - -def install_needed_modules( - *, use_sudo: bool = True, venv_path: Optional[str] = None -) -> None: - """ - Install needed modules for LLM CLI. - - :param use_sudo: whether to use sudo to install the module - :param venv_path: path to the virtual environment - E.g., /Users/saggese/src/venv/client_venv.helpers - """ - hmodule.install_module_if_not_present( - "llm", - package_name="llm", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - hmodule.install_module_if_not_present( - "tokencost", - package_name="tokencost", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - # Reload this module if already imported. - this_module_name = __name__ - if this_module_name in sys.modules: - importlib.reload(sys.modules[this_module_name]) - - -def shutup_llm_logging() -> None: - """ - Shut up OpenAI logging. - """ - # OpenAI client logging. - logging.getLogger("openai").setLevel(logging.WARNING) - # Common HTTP logging sources - logging.getLogger("httpx").setLevel(logging.WARNING) - logging.getLogger("httpcore").setLevel(logging.WARNING) - logging.getLogger("urllib3").setLevel(logging.WARNING) - - -# ############################################################################# -# Helper functions -# ############################################################################# - - -def _check_llm_executable() -> bool: - """ - Check if the llm command-line executable is available. - - :return: True if llm executable exists, False otherwise - """ - try: - hsystem.system("which llm", suppress_output=True) - _LOG.debug("llm command found") - return True - except Exception: - _LOG.debug("llm command not found") - return False - - -def _apply_llm_via_executable( - input_str: str, - *, - system_prompt: Optional[str] = None, - model: Optional[str] = None, - expected_num_chars: Optional[int] = None, -) -> Tuple[str, float]: - """ - Apply LLM using the llm CLI executable. - - :param input_str: the input text to process - :param system_prompt: optional system prompt to use - :param model: optional model name to use - :param expected_num_chars: optional expected number of characters in - output (used for progress bar) - :return: tuple of (LLM response as string, cost in dollars) - """ - # Build command. - cmd = ["llm"] - if system_prompt: - cmd.extend(["--system", system_prompt]) - if model: - cmd.extend(["--model", model]) - # Add the user prompt. - cmd.append(input_str) - _LOG.debug("Running command: %s", " ".join(cmd)) - # Execute command. - if expected_num_chars: - # Use streaming with progress bar. - proc = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - response_parts = [] - with tqdm(total=expected_num_chars, unit="char") as pbar: - for line in proc.stdout: - response_parts.append(line) - pbar.update(len(line)) - # Wait for process to complete. - proc.wait() - if proc.returncode != 0: - error_msg = proc.stderr.read() if proc.stderr else "" - hdbg.dfatal( - f"llm command failed with return code: {proc.returncode} error: {error_msg}" - ) - response = "".join(response_parts) - else: - # Run without progress bar. - cmd_str = " ".join(shlex.quote(arg) for arg in cmd) - _, response = hsystem.system_to_string(cmd_str) - # Cost calculation not available when using executable. - cost = 0.0 - _LOG.debug("Cost calculation not available when using llm executable") - return response, cost - - -def _calculate_cost_from_usage( - usage: object, - model: str, -) -> float: - """ - Calculate LLM cost from usage object. - - :param usage: usage object from LLM result containing input/output token counts - :param model: model name for cost calculation - :return: total cost in dollars - """ - input_tokens = usage.input - output_tokens = usage.output - prompt_cost = tokencost.calculate_cost_by_tokens( - num_tokens=input_tokens, model=model, token_type="input" - ) - completion_cost = tokencost.calculate_cost_by_tokens( - num_tokens=output_tokens, model=model, token_type="output" - ) - cost = float(prompt_cost + completion_cost) - return cost - - -def _apply_llm_via_library( - input_str: str, - *, - system_prompt: Optional[str] = None, - model: Optional[str] = None, - expected_num_chars: Optional[int] = None, -) -> Tuple[str, float]: - """ - Apply LLM using the llm Python library. - - :param input_str: the input text to process - :param system_prompt: optional system prompt to use - :param model: optional model name to use - :param expected_num_chars: optional expected number of characters in - output (used for progress bar) - :return: tuple of (LLM response as string, cost in dollars) - """ - # Get the model. - if model: - llm_model = llm.get_model(model) - else: - llm_model = llm.get_model() - _LOG.debug("Using model: %s", llm_model.model_id) - # Execute with or without progress bar. - if expected_num_chars: - # Use streaming with progress bar. - response_parts = [] - with tqdm(total=expected_num_chars, unit="char") as pbar: - for chunk in llm_model.prompt( - input_str, system=system_prompt, stream=True - ): - chunk_str = str(chunk) - response_parts.append(chunk_str) - pbar.update(len(chunk_str)) - response = "".join(response_parts) - # Streaming doesn't provide usage info, so we can't calculate cost. - cost = 0.0 - _LOG.debug("Cost calculation not available for streaming mode") - else: - # Run without progress bar. - _LOG.trace("system_prompt=\n%s", system_prompt) - _LOG.trace("input_str=\n%s", input_str) - result = llm_model.prompt(input_str, system=system_prompt) - response = result.text() - _LOG.trace("response=\n%s", response) - # Calculate cost. - usage = result.usage() - cost = _calculate_cost_from_usage( - usage=usage, - model=llm_model.model_id, - ) - _LOG.debug( - "Cost: $%.6f (input: %d tokens, output: %d tokens)", - cost, - usage.input, - usage.output, - ) - return response, cost - - -# ############################################################################# -# Main functions -# ############################################################################# - - -@hcacsimp.simple_cache(cache_type="json", write_through=True) -def apply_llm( - input_str: str, - *, - system_prompt: Optional[str] = None, - model: Optional[str] = None, - use_llm_executable: bool = False, - expected_num_chars: Optional[int] = None, -) -> Tuple[str, float]: - """ - Apply an LLM to process input text using either CLI executable or library. - - This function provides a unified interface to call LLMs either through the - llm command-line executable or through the llm Python library. It supports - optional system prompts, model selection, and progress bars for long outputs. - - :param input_str: the input text to process with the LLM - :param system_prompt: optional system prompt to guide the LLM's behavior - :param model: optional model name to use (e.g., "gpt-4", "claude-3-opus") - :param use_llm_executable: if True, use the llm CLI executable; if False, - use the llm Python library - :param expected_num_chars: optional expected number of characters in - output; if provided, displays a progress bar during generation - :return: tuple of (LLM response as string, cost in dollars) - """ - hdbg.dassert_isinstance(input_str, str) - hdbg.dassert_ne(input_str, "", "Input string cannot be empty") - if system_prompt is not None: - hdbg.dassert_isinstance(system_prompt, str) - if model is not None: - hdbg.dassert_isinstance(model, str) - hdbg.dassert_ne(model, "", "Model cannot be empty string") - if expected_num_chars is not None: - hdbg.dassert_isinstance(expected_num_chars, int) - hdbg.dassert_lt(0, expected_num_chars) - _LOG.debug("Applying LLM to input text") - _LOG.debug("use_llm_executable=%s", use_llm_executable) - # Route to appropriate implementation. - if use_llm_executable: - # Check that llm executable exists. - hdbg.dassert( - _check_llm_executable(), - "llm executable not found. Install it using: pip install llm", - ) - response, cost = _apply_llm_via_executable( - input_str, - system_prompt=system_prompt, - model=model, - expected_num_chars=expected_num_chars, - ) - else: - response, cost = _apply_llm_via_library( - input_str, - system_prompt=system_prompt, - model=model, - expected_num_chars=expected_num_chars, - ) - _LOG.debug("LLM processing completed") - return response, cost - - -def apply_llm_with_files( - input_file: str, - output_file: str, - *, - system_prompt: Optional[str] = None, - model: Optional[str] = None, - use_llm_executable: bool = False, - expected_num_chars: Optional[int] = None, -) -> float: - """ - Apply an LLM to process text from an input file and save to output file. - - This is a convenience wrapper around apply_llm() that handles reading from - and writing to files. It reads the input file, processes the content using - the LLM, and writes the result to the output file. - - :param input_file: path to the input file containing text to process - :param output_file: path to the output file where result will be saved - :param system_prompt: optional system prompt to guide the LLM's behavior - :param model: optional model name to use (e.g., "gpt-4", "claude-3-opus") - :param use_llm_executable: if True, use the llm CLI executable; if False, - use the llm Python library - :param expected_num_chars: optional expected number of characters in - output; if provided, displays a progress bar during generation - :return: cost in dollars - """ - hdbg.dassert_isinstance(input_file, str) - hdbg.dassert_ne(input_file, "", "Input file cannot be empty") - hdbg.dassert_isinstance(output_file, str) - hdbg.dassert_ne(output_file, "", "Output file cannot be empty") - _LOG.debug("Reading input from file: %s", input_file) - # Read input file. - input_str = hio.from_file(input_file) - _LOG.debug("Read %d characters from input file", len(input_str)) - # Process with LLM. - response, cost = apply_llm( - input_str, - system_prompt=system_prompt, - model=model, - use_llm_executable=use_llm_executable, - expected_num_chars=expected_num_chars, - ) - # Write output file. - _LOG.debug("Writing output to file: %s", output_file) - hio.to_file(output_file, response) - _LOG.debug("Wrote %d characters to output file", len(response)) - return cost - - -# ############################################################################# -# Batch processing -# ############################################################################# - - -def _validate_batch_inputs( - prompt: str, - input_list: List[str], -) -> None: - """ - Validate prompt and input list for batch processing. - - :param prompt: System prompt to validate - :param input_list: List of inputs to validate - :raises: Assertion errors if validation fails - """ - hdbg.dassert_isinstance(prompt, str) - hdbg.dassert_isinstance(input_list, list) - hdbg.dassert_lt(0, len(input_list), "Input list cannot be empty") - for idx, input_str in enumerate(input_list): - hdbg.dassert_isinstance( - input_str, - str, - "Input at index %d must be a string", - idx, - ) - hdbg.dassert_ne( - input_str, - "", - "Input at index %d cannot be empty", - idx, - ) - - -@hcacsimp.simple_cache(cache_type="json", write_through=True) -def _llm( - system_prompt: str, - input_str: str, - model: str, -) -> Tuple[str, float]: - """ - Apply LLM using the llm Python library. - - :param input_str: the input text to process - :param system_prompt: optional system prompt to use - :param model: optional model name to use - :param expected_num_chars: optional expected number of characters in - output (used for progress bar) - :return: LLM response as string - """ - hdbg.dassert_isinstance(system_prompt, str) - _LOG.trace("system_prompt=\n%s", system_prompt) - # - hdbg.dassert_isinstance(input_str, str) - _LOG.trace("input_str=\n%s", input_str) - # - hdbg.dassert_isinstance(model, str) - hdbg.dassert_ne(model, "", "Model cannot be empty") - llm_model = llm.get_model(model) - _LOG.debug("model=%s", llm_model.model_id) - # Call the LLM. - result = llm_model.prompt(input_str, system=system_prompt) - response = result.text() - _LOG.trace("response=\n%s", response) - usage = result.usage() - cost = _calculate_cost_from_usage( - usage=usage, - model=model, - ) - return response, cost - - -def _call_llm_or_test_functor( - input_str: str, - system_prompt: Optional[str], - model: str, - testing_functor: Optional[Callable[[str], str]], -) -> Tuple[str, float]: - """ - Call LLM or testing functor if provided. - - :param input_str: Input text to process - :param system_prompt: System prompt (can be None) - :param model: Model name (required for cost calculation) - :param testing_functor: Optional testing functor - :return: Tuple of (response, cost) where cost is 0.0 if not calculated - """ - if testing_functor is None: - response, cost = _llm(system_prompt, input_str, model) - # # Calculate cost for this call. - # # Build full prompt for cost calculation. - # if system_prompt: - # full_prompt = system_prompt + "\n" + input_str - # else: - # full_prompt = input_str - # cost = _calculate_llm_cost(full_prompt, response, model) - else: - response = testing_functor(input_str) - cost = 0.0 - return response, cost - - -def _calculate_llm_cost( - prompt: str, - completion: str, - model: str, -) -> float: - """ - Calculate the cost of an LLM call using tokencost library. - - :param prompt: the prompt sent to the LLM - :param completion: the completion returned by the LLM - :param model: the model name used - :return: total cost in dollars - """ - prompt_cost = tokencost.calculate_prompt_cost(prompt, model) - completion_cost = tokencost.calculate_completion_cost(completion, model) - total_cost = prompt_cost + completion_cost - # Convert to float to ensure consistent type. - return float(total_cost) - - -def apply_llm_batch_individual( - prompt: str, - input_list: List[str], - *, - model: str, - testing_functor: Optional[Callable[[str], str]] = None, - progress_bar_object: Optional[tqdm] = None, -) -> Tuple[List[str], float]: - """ - Apply an LLM to process a batch of inputs one at the time. - """ - _validate_batch_inputs(prompt, input_list) - _LOG.debug("Processing batch of %d inputs individually", len(input_list)) - # Process each input sequentially with progress bar and error handling. - responses = [] - # Initialize total cost accumulator. - total_cost = 0.0 - for input_str in input_list: - response, cost = _call_llm_or_test_functor( - input_str=input_str, - system_prompt=prompt, - model=model, - testing_functor=testing_functor, - ) - total_cost += cost - responses.append(response) - if progress_bar_object is not None: - progress_bar_object.update(1) - _LOG.debug("Batch processing completed") - _LOG.debug("Total cost for batch with individual prompt: $%.6f", total_cost) - return responses, total_cost - - -def apply_llm_batch_with_shared_prompt( - prompt: str, - input_list: List[str], - *, - model: str, - testing_functor: Optional[Callable[[str], str]] = None, - progress_bar_object: Optional[tqdm] = None, -) -> Tuple[List[str], float]: - """ - Apply an LLM to process a batch of input texts using the same system prompt. - """ - _validate_batch_inputs(prompt, input_list) - _LOG.debug("Processing batch of %d inputs", len(input_list)) - # Process each input sequentially with progress bar. - responses = [] - total_cost = 0.0 - if testing_functor is None: - # TODO(gp): Factor this out and use a cache. - llm_model = llm.get_model(model) - conv = llm.Conversation(model=llm_model) - for input_str in input_list: - result = conv.prompt(input_str, system=prompt) - response = result.text() - usage = result.usage() - cost = _calculate_cost_from_usage( - usage=usage, - model=model, - ) - total_cost += cost - responses.append(response) - if progress_bar_object is not None: - progress_bar_object.update(1) - else: - for input_str in input_list: - response = testing_functor(input_str) - responses.append(response) - if progress_bar_object is not None: - progress_bar_object.update(1) - _LOG.debug("Batch processing completed") - _LOG.debug("Total cost for batch with shared prompt: $%.6f", total_cost) - return responses, total_cost - - -def apply_llm_batch_combined( - prompt: str, - input_list: List[str], - *, - model: str, - max_retries: int = 3, - testing_functor: Optional[Callable[[str], str]] = None, - progress_bar_object: Optional[tqdm] = None, -) -> Tuple[List[str], float]: - """ - Apply an LLM to process a batch using a single combined prompt. - - This function combines all queries into a single prompt and expects - structured JSON output. It includes retry logic for failed JSON parsing. - """ - _validate_batch_inputs(prompt, input_list) - hdbg.dassert_isinstance(max_retries, int) - hdbg.dassert_lt(0, max_retries) - _LOG.debug( - "Processing batch of %d inputs with combined prompt", len(input_list) - ) - # Build combined prompt. - - combined_prompt = f"{prompt}\n\n" - instruction = """ - Return the results only as a valid JSON object with string values, using - zero-based numeric keys that match the item numbers. - - Output format: - '{"0": "result1", "1": "result2", ...} - - """ - combined_prompt += hprint.dedent(instruction) - for idx, input_str in enumerate(input_list): - combined_prompt += f"{idx}: {input_str}\n" - combined_prompt += "\nReturn ONLY the JSON object, no other text." - _LOG.debug("Combined prompt:\n%s", combined_prompt) - # You are a calculator. Return only the numeric result. - # ``` - # Process the following items and return results as JSON in the format: - # {"0": "result1", "1": "result2", ...} - # 0: 2 + 2 - # 1: 3 * 3 - # 2: 10 - 5 - # 3: 20 / 4 - # Return ONLY the JSON object, no other text. - # ``` - # Process with retries for JSON parsing. - total_cost = 0.0 - if testing_functor is None: - for retry_num in range(max_retries): - _LOG.debug( - "Processing batch of %d inputs with combined prompt (attempt %d/%d)", - len(input_list), - retry_num + 1, - max_retries, - ) - system_prompt = combined_prompt - user_prompt = "Process the items listed above." - response, cost = _llm(system_prompt, user_prompt, model) - total_cost += cost - try: - # Parse JSON response. - # E.g., - # ``` - # {"0": "4", "1": "9", "2": "5", "3": "5"} - # ``` - _LOG.debug("Parsing JSON response:\n%s", response) - # Extract JSON from response (handle cases where LLM adds extra text). - response_stripped = response.strip() - # Find JSON object boundaries. - json_start = response_stripped.find("{") - json_end = response_stripped.rfind("}") + 1 - hdbg.dassert_lte(0, json_start) - hdbg.dassert_lt(json_start, json_end) - json_str = response_stripped[json_start:json_end] - result_dict = json.loads(json_str) - # Convert dict to list in order. - responses = [] - for idx in range(len(input_list)): - key = str(idx) - if key in result_dict: - responses.append(result_dict[key]) - else: - _LOG.warning("Missing result for index %d", idx) - responses.append("") - _LOG.debug("Successfully parsed JSON response") - if progress_bar_object is not None: - progress_bar_object.update(len(input_list)) - _LOG.debug( - "Total cost for batch with combined prompt: $%.6f", - total_cost, - ) - return responses, total_cost - except (json.JSONDecodeError, ValueError) as e: - _LOG.debug( - "JSON parsing failed (attempt %d/%d): %s", - retry_num + 1, - max_retries, - e, - ) - if retry_num == max_retries - 1: - hdbg.dfatal( - "Failed to parse JSON after %d retries", max_retries - ) - # Add instruction to retry. - combined_prompt += "\n\nPrevious response had invalid JSON format. Please return ONLY a valid JSON object." - else: - responses = [] - for input_str in input_list: - response = testing_functor(input_str) - responses.append(response) - if progress_bar_object is not None: - progress_bar_object.update(1) - total_cost = 0.0 - return responses, total_cost - # Should not reach here. - raise RuntimeError("Unexpected error in apply_llm_batch_combined") - - -# ############################################################################# - - -# TODO(gp): Move it somewhere else. -def get_tqdm_progress_bar() -> tqdm: - # Use appropriate tqdm for notebook or terminal - try: - from IPython import get_ipython - - if get_ipython() is not None: - from tqdm.notebook import tqdm as notebook_tqdm - - tqdm_progress = notebook_tqdm - else: - tqdm_progress = tqdm - except ImportError: - tqdm_progress = tqdm - return tqdm_progress - - -# TODO(gp): Skip values that already have a value in the target column. -# TODO(gp): Parallelize -def apply_llm_prompt_to_df( - prompt: str, - df: pd.DataFrame, - extractor: Callable[[Union[str, pd.Series]], str], - target_col: str, - batch_mode: str, - *, - model: str, - batch_size: int = 50, - dump_every_batch: Optional[str] = None, - tag: str = "Processing", - testing_functor: Optional[Callable[[str], str]] = None, - use_sys_stderr: bool = False, -) -> Tuple[pd.DataFrame, Dict[str, int]]: - """ - Apply an LLM to process a dataframe column using the same system prompt. - - This function processes text from dataframe rows using an extractor function, - applies the LLM to each item in batches, and stores the results in a target - column. It can optionally save progress to a file after each batch. - - :param prompt: system prompt to guide the LLM's behavior - :param df: dataframe to process - :param extractor: callable that extracts text from a row or string - :param target_col: name of column to store results - :param batch_mode: batch mode to use (individual, shared_prompt, combined) - :param model: model name to use (e.g., "gpt-4", "claude-3-opus") - :param batch_size: number of items to process in each batch - :param dump_every_batch: optional file path to dump the dataframe after each batch - :param tag: description tag for progress bar - :param testing_functor: optional functor to use for testing - :return: tuple of (dataframe with results, statistics dict) - """ - start_time = time.time() - hdbg.dassert_isinstance(prompt, str) - hdbg.dassert_ne(prompt, "", "Prompt cannot be empty") - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_lt(0, len(df), "Dataframe cannot be empty") - hdbg.dassert_isinstance(target_col, str) - hdbg.dassert_ne(target_col, "", "Target column cannot be empty") - hdbg.dassert_isinstance(model, str) - hdbg.dassert_ne(model, "", "Model cannot be empty") - hdbg.dassert_isinstance(batch_size, int) - hdbg.dassert_lt(0, batch_size) - if dump_every_batch is not None: - hdbg.dassert_isinstance(dump_every_batch, str) - hdbg.dassert_ne(dump_every_batch, "", "Dump file path cannot be empty") - # Create target column if it doesn't exist. - if target_col not in df.columns: - df[target_col] = None - # Process items in batches with progress bar for entire workload. - num_items = len(df) - num_batches = (num_items + batch_size - 1) // batch_size - _LOG.info( - "Processing %d items in %d batches of %d items each", - num_items, - num_batches, - batch_size, - ) - _LOG.info(hprint.to_str("model batch_mode")) - num_skipped = 0 - progress_bar_ctor = get_tqdm_progress_bar() - progress_bar_object = progress_bar_ctor( # type: ignore - total=num_items, - desc=tag, - dynamic_ncols=True, - # Workaround for unit tests. - file=sys.__stderr__ if use_sys_stderr else None, - ) - total_cost = 0.0 - # TODO(gp): Precompute the batch indices that needs to be processed. - for batch_num in range(num_batches): - # Get batch rows. - start_idx = batch_num * batch_size - end_idx = min(start_idx + batch_size, len(df)) - rows = df.iloc[start_idx:end_idx] - # Extract items from rows, filtering out invalid ones. - batch_items = [] - batch_indices = [] - for idx, row in rows.iterrows(): - extracted_text = extractor(row) - # Check if extraction returned valid text (not NaN/None/empty). - if extracted_text != "": - batch_items.append(extracted_text) - batch_indices.append(idx) - else: - # Set NaN for rows with missing company information. - df.at[idx, target_col] = "" - num_skipped += 1 - progress_bar_object.update(1) - # Call LLM only if there are valid items in this batch. - if batch_items: - _LOG.debug( - "Processing batch %d/%d (%d items, %d skipped)", - batch_num + 1, - num_batches, - len(batch_items), - len(rows) - len(batch_items), - ) - if batch_mode == "individual": - func = apply_llm_batch_individual - elif batch_mode == "shared_prompt": - func = apply_llm_batch_with_shared_prompt - elif batch_mode == "combined": - func = apply_llm_batch_combined - else: - hdbg.dfatal("Invalid batch mode: %s", batch_mode) - batch_responses, batch_cost = func( - prompt=prompt, - input_list=batch_items, - model=model, - testing_functor=testing_functor, - progress_bar_object=progress_bar_object, - ) - # Update total_cost. - total_cost += batch_cost - # Store results back into dataframe. - for idx, response in zip(batch_indices, batch_responses): - df.at[idx, target_col] = response - else: - _LOG.debug( - "Skipping batch %d/%d (all %d items have missing data)", - batch_num + 1, - num_batches, - len(rows), - ) - # Dump dataframe to file after batch if requested. - if dump_every_batch is not None: - _LOG.debug("Dumping dataframe to file: %s", dump_every_batch) - df.to_csv(dump_every_batch, index=False) - # Calculate elapsed time. - elapsed_time = time.time() - start_time - stats = { - "num_items": num_items, - "num_skipped": num_skipped, - "num_batches": num_batches, - "total_cost_in_dollars": total_cost, - "elapsed_time_in_seconds": elapsed_time, - } - _LOG.info("Processing completed:\n%s", pprint.pformat(stats)) - return df, stats diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py deleted file mode 100644 index 3d33b17d8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py +++ /dev/null @@ -1,233 +0,0 @@ -""" -Import as: - -import helpers.hllm_cost as hllmcost -""" - -import logging -import os -from typing import Any - -import requests - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# OpenRouter API Helpers -# ############################################################################# - - -def _get_models_info_file() -> str: - """ - Get the path to the file for storing OpenRouter models info. - """ - dir_path = hgit.get_helpers_root_dir() - file_path = os.path.join( - dir_path, "dev_scripts_helpers/llms", "openrouter_models_info.csv" - ) - return file_path - - -def _retrieve_openrouter_model_info() -> "pd.DataFrame": - """ - Retrieve OpenRouter models info from the OpenRouter API. - """ - import pandas as pd - - response = requests.get("https://openrouter.ai/api/v1/models") - # {'architecture': {'input_modalities': ['text', 'image'], - # 'instruct_type': None, - # 'modality': 'text+image->text', - # 'output_modalities': ['text'], - # 'tokenizer': 'Mistral'}, - # 'context_length': 131072, - # 'created': 1746627341, - # 'description': 'Mistral Medium 3 is a high-performance enterprise-grade ' - # 'language model designed to deliver frontier-level ' - # ... - # 'broad compatibility across cloud environments.', - # 'id': 'mistralai/mistral-medium-3', - # 'name': 'Mistral: Mistral Medium 3', - # 'per_request_limits': None, - # 'pricing': {'completion': '0.000002', - # 'image': '0', - # 'internal_reasoning': '0', - # 'prompt': '0.0000004', - # 'request': '0', - # 'web_search': '0'}, - # 'supported_parameters': ['tools', - # 'tool_choice', - # 'max_tokens', - # 'temperature', - # 'top_p', - # 'stop', - # 'frequency_penalty', - # 'presence_penalty', - # 'response_format', - # 'structured_outputs', - # 'seed'], - # 'top_provider': {'context_length': 131072, - # 'is_moderated': False, - # 'max_completion_tokens': None}} - response_json = response.json() - # There is only one key in the response. - hdbg.dassert_eq(list(response_json.keys()), ["data"]) - response_json = response_json["data"] - model_info_df = pd.DataFrame(response_json) - return model_info_df - - -def _save_models_info_to_csv( - model_info_df: "pd.DataFrame", - file_name: str, -) -> "pd.DataFrame": - """ - Save models info to a CSV file. - """ - hdbg.dassert_isinstance(file_name, str) - hdbg.dassert_ne(file_name, "") - # TODO(*): Save all the data. - # Extract prompt, completion pricing from pricing column. - model_info_df["prompt_pricing"] = model_info_df["pricing"].apply( - lambda x: x["prompt"] - ) - model_info_df["completion_pricing"] = model_info_df["pricing"].apply( - lambda x: x["completion"] - ) - required_columns = [ - "id", - "name", - "description", - "prompt_pricing", - "completion_pricing", - "supported_parameters", - ] - # Take only relevant columns. - model_info_df = model_info_df.loc[:, required_columns] - # Save to CSV file. - model_info_df.to_csv(file_name, index=False) - return model_info_df - - -# ############################################################################# -# LLMCostTracker -# ############################################################################# - - -class LLMCostTracker: - """ - Track the costs of LLM API calls through one of the providers. - """ - - def __init__(self, provider_name: str, model: str) -> None: - """ - Initialize the class. - """ - self.current_cost: float = 0.0 - self.provider_name = provider_name - self.model = model - - def end_logging_costs(self) -> None: - """ - End logging costs by resetting the current cost to 0. - """ - self.current_cost = 0.0 - - def accumulate_cost(self, cost: float) -> None: - """ - Accumulate the cost. - - :param cost: The cost to accumulate - """ - self.current_cost += cost - - def get_current_cost(self) -> float: - """ - Get the current accumulated cost. - - :return: The current cost - """ - return self.current_cost - - def calculate_cost( - self, - completion: Any, - *, - models_info_file: str = "", - ) -> float: - """ - Calculate the cost of an API call, based on the provider. - - :param completion: the completion response from API - :return: the calculated cost in dollars - """ - import pandas as pd - - # Get the number of input and output tokens. - usage = getattr(completion, "usage", None) - hdbg.dassert( - usage is not None, - "Completion/response object has no 'usage' attribute", - ) - if hasattr(usage, "prompt_tokens") and hasattr( - usage, "completion_tokens" - ): - prompt_tokens = usage.prompt_tokens - completion_tokens = usage.completion_tokens - elif hasattr(usage, "input_tokens") and hasattr(usage, "output_tokens"): - prompt_tokens = usage.input_tokens - completion_tokens = usage.output_tokens - else: - raise ValueError( - f"Unknown usage structure on completion object: {usage}" - ) - # Get the provider and model details. - if self.provider_name == "openai": - # Get the pricing for the selected model. - # TODO(gp): Use pricing from OpenAI or Openrouter API. - # https://openai.com/api/pricing/ - # https://gptforwork.com/tools/openai-chatgpt-api-pricing-calculator - # Cost per 1M tokens. - pricing = { - "gpt-3.5-turbo": {"prompt": 0.5, "completion": 1.5}, - "gpt-4o-mini": {"prompt": 0.15, "completion": 0.60}, - "gpt-4o": {"prompt": 2.5, "completion": 10}, - "gpt-5.2": {"prompt": 1.75, "completion": 14.0}, - "gpt-5.1": {"prompt": 1.25, "completion": 10.0}, - "gpt-5-mini": {"prompt": 0.25, "completion": 2.00}, - } - hdbg.dassert_in(self.model, pricing) - model_pricing = pricing[self.model] - # Calculate the cost. - cost = (prompt_tokens / 1e6) * model_pricing["prompt"] + ( - completion_tokens / 1e6 - ) * model_pricing["completion"] - elif self.provider_name == "openrouter": - # If the model info file doesn't exist, download one. - if models_info_file == "": - models_info_file = _get_models_info_file() - _LOG.debug(hprint.to_str("models_info_file")) - if not os.path.isfile(models_info_file): - model_info_df = _retrieve_openrouter_model_info() - _save_models_info_to_csv(model_info_df, models_info_file) - else: - model_info_df = pd.read_csv(models_info_file) - # Extract pricing for this model. - hdbg.dassert_in(self.model, model_info_df["id"].values) - row = model_info_df.loc[model_info_df["id"] == self.model].iloc[0] - prompt_price = row["prompt_pricing"] - completion_price = row["completion_pricing"] - # Compute cost. - cost = ( - prompt_tokens * prompt_price - + completion_tokens * completion_price - ) - else: - raise ValueError(f"Unknown provider: {self.provider_name}") - _LOG.debug(hprint.to_str("prompt_tokens completion_tokens cost")) - return cost diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py deleted file mode 100644 index 94738202c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py +++ /dev/null @@ -1,809 +0,0 @@ -""" -Import as: - -import helpers.hlogging as hloggin -""" - -import asyncio -import contextlib -import copy -import datetime -import logging -from typing import Any, Iterable, List, Optional, Tuple, Union - -# Avoid dependency from other helpers modules since this is used when the code -# is bootstrapped. - - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -# Copied from `helpers/hsystem.py` to avoid circular imports. -def _is_running_in_ipynb() -> bool: - try: - _ = get_ipython().config # type: ignore - res = True - except NameError: - res = False - return res - - -# Copied from `helpers/hsystem.py` to avoid circular dependencies. -def get_user_name() -> str: - import getpass - - res = getpass.getuser() - return res - - -# ############################################################################# -# Memory usage -# ############################################################################# - -# TODO(gp): Consider moving to hmemory.py - - -MemoryUsage = Tuple[float, float, float] - - -def get_memory_usage(process: Optional[Any] = None) -> MemoryUsage: - """ - Return the memory usage in terms of resident, virtual, and percent of total - used memory. - """ - if process is None: - import psutil - - process = psutil.Process() - rss_in_GB = process.memory_info().rss / (1024**3) - vms_in_GB = process.memory_info().vms / (1024**3) - mem_pct = process.memory_percent() - return (rss_in_GB, vms_in_GB, mem_pct) - - -def memory_to_str(resource_use: MemoryUsage, *, verbose: bool = True) -> str: - (rss_in_GB, vms_in_GB, mem_pct) = resource_use - if verbose: - txt = "rss=%.3fGB vms=%.3fGB mem_pct=%.0f%%" % ( - rss_in_GB, - vms_in_GB, - mem_pct, - ) - else: - txt = "%.3fGB %.3fGB %.0f%%" % (rss_in_GB, vms_in_GB, mem_pct) - return txt - - -def get_memory_usage_as_str(process: Optional[Any] = None) -> str: - """ - Like `get_memory_usage()` but returning a formatted string. - """ - resource_use = get_memory_usage(process) - txt = memory_to_str(resource_use) - return txt - - -# ############################################################################# -# Utils. -# ############################################################################# - -# White: 37. -# Red: 31 -# Green: 32 -# Yellow: 33 -# Blu: 34 -# Cyan: 36 -# White on red background: 41 - -_COLOR_MAPPING = { - # Green. - "TRACE": (32, "TRACE"), - # Blu. - "DEBUG": (34, "DEBUG"), - # Cyan. - "INFO": (36, "INFO "), - # White on red background. - "WARNING": (41, "WARN "), - "ERROR": (41, "ERROR"), - "CRITICAL": (41, "CRTCL"), -} - - -def reset_logger() -> None: - import importlib - - print("Resetting logger...") - logging.shutdown() - importlib.reload(logging) - - -def get_all_loggers() -> List: - """ - Return list of all registered loggers. - """ - logger_dict = logging.root.manager.loggerDict # type: ignore # pylint: disable=no-member - loggers = [logging.getLogger(name) for name in logger_dict] - return loggers - - -def get_matching_loggers( - module_names: Union[str, Iterable[str]], verbose: bool -) -> List: - """ - Find loggers that match a name or a name in a set. - """ - if isinstance(module_names, str): - module_names = [module_names] - loggers = get_all_loggers() - if verbose: - print("loggers=\n", "\n".join(map(str, loggers))) - # - sel_loggers = [] - for module_name in module_names: - if verbose: - print(f"module_name={module_name}") - # TODO(gp): We should have a regex. - # str(logger) looks like `` - sel_loggers_tmp = [ - logger - for logger in loggers - if str(logger).startswith(" None: - """ - Reduce the verbosity for external modules that are very chatty. - - :param verbosity: level of verbosity used for chatty modules: the - higher the better - :param verbose: print extra information - """ - module_names = [ - "aiobotocore", - "asyncio", - "boto", - "boto3", - "botocore", - "ccxt", # CCXT also needs to be shut up after the `exchange` is built. - "fsspec", - "hooks", - "httpcore", - "httpx", - "invoke", - "matplotlib", - "nose", - "openai", - "s3fs", - "s3transfer", - "urllib3", - # "ib_insync", - ] - # verbose = True - loggers = get_matching_loggers(module_names, verbose) - loggers = sorted(loggers, key=lambda logger: logger.name) - for logger in loggers: - logger.setLevel(verbosity) - if len(loggers) > 0: - logger_names = list({logger.name for logger in loggers}) - _LOG.debug( - "Shut up %d modules: %s", len(loggers), ", ".join(logger_names) - ) - # if _LOG.getEffectiveLevel() < logging.DEBUG: - # print(WARNING + - # " Shutting up %d modules: %s" - # % (len(loggers), ", ".join([logger.name for logger in loggers])) - # ) - - -# ############################################################################# -# _LocalTimeZoneFormatter -# ############################################################################# - - -# From https://stackoverflow.com/questions/32402502 -class _LocalTimeZoneFormatter: - """ - Override logging.Formatter to use an aware datetime object. - """ - - def __init__(self, *args: Any, **kwargs: Any): - super().__init__(*args, **kwargs) # type: ignore[call-arg] - try: - # TODO(gp): Automatically detect the time zone. It might be complicated in - # Docker. - import pytz - - self._tzinfo = pytz.timezone("America/New_York") - except ModuleNotFoundError: - # print(f"Can't import pytz: using UTC\n{str(e)}") - self._tzinfo = None - - def converter(self, timestamp: float) -> datetime.datetime: - # To make the linter happy and respecting the signature of the - # superclass method. - _ = self - # timestamp=1622423570.0147252 - dt = datetime.datetime.utcfromtimestamp(timestamp) - # Convert it to an aware datetime object in UTC time. - dt = dt.replace(tzinfo=datetime.timezone.utc) - if self._tzinfo is not None: - # Convert it to desired timezone. - dt = dt.astimezone(self._tzinfo) - return dt - - def formatTime( - self, record: logging.LogRecord, datefmt: Optional[str] = None - ) -> str: - dt = self.converter(record.created) - if datefmt: - s = dt.strftime(datefmt) - else: - try: - s = dt.isoformat(timespec="milliseconds") - except TypeError: - s = dt.isoformat() - return s - - -# ############################################################################# -# _ColoredFormatter -# ############################################################################# - - -# [mypy] error: Definition of "converter" in base class -# "_LocalTimeZoneFormatter" is incompatible with definition in base class -# "Formatter" -class _ColoredFormatter( # type: ignore[misc] - _LocalTimeZoneFormatter, logging.Formatter -): - """ - Logging formatter using colors for different levels. - """ - - _SKIP_DEBUG = True - - def format(self, record: logging.LogRecord) -> str: - colored_record = copy.copy(record) - # `levelname` is the internal name and can't be changed to `level_name` - # as per our conventions. - levelname = colored_record.levelname - if _ColoredFormatter._SKIP_DEBUG and levelname == "DEBUG": - colored_levelname = "" - else: - # Use white as default. - prefix = "\033[" - suffix = "\033[0m" - assert levelname in _COLOR_MAPPING, "Can't find info '%s'" - color_code, tag = _COLOR_MAPPING[levelname] - # Align the level name. - colored_levelname = f"{prefix}{color_code}m{tag}{suffix}" - colored_record.levelname = colored_levelname - return logging.Formatter.format(self, colored_record) - - -# From https://stackoverflow.com/questions/2183233 -def addLoggingLevel(levelName, levelNum, methodName=None): - """ - Comprehensively adds a new logging level to the `logging` module and the - currently configured logging class. - - `levelName` becomes an attribute of the `logging` module with the value - `levelNum`. `methodName` becomes a convenience method for both `logging` - itself and the class returned by `logging.getLoggerClass()` (usually just - `logging.Logger`). If `methodName` is not specified, `levelName.lower()` is - used. - - To avoid accidental clobberings of existing attributes, this method will - raise an `AttributeError` if the level name is already an attribute of the - `logging` module or if the method name is already present - - Example - ------- - >>> addLoggingLevel('TRACE', logging.DEBUG - 5) - >>> logging.getLogger(__name__).setLevel("TRACE") - >>> logging.getLogger(__name__).trace('that worked') - >>> logging.trace('so did this') - >>> logging.TRACE - 5 - """ - if not methodName: - methodName = levelName.lower() - - if hasattr(logging, levelName): - raise AttributeError( - "{} already defined in logging module".format(levelName) - ) - if hasattr(logging, methodName): - raise AttributeError( - "{} already defined in logging module".format(methodName) - ) - if hasattr(logging.getLoggerClass(), methodName): - raise AttributeError( - "{} already defined in logger class".format(methodName) - ) - - # This method was inspired by the answers to Stack Overflow post - # http://stackoverflow.com/q/2183233/2988730, especially - # http://stackoverflow.com/a/13638084/2988730 - def logForLevel(self, message, *args, **kwargs): - if self.isEnabledFor(levelNum): - self._log(levelNum, message, args, **kwargs) - - def logToRoot(message, *args, **kwargs): - logging.log(levelNum, message, *args, **kwargs) - - logging.addLevelName(levelNum, levelName) - setattr(logging, levelName, levelNum) - setattr(logging.getLoggerClass(), methodName, logForLevel) - setattr(logging, methodName, logToRoot) - - -addLoggingLevel("TRACE", 5) - - -# Note that this doesn't avoid evaluating the call. -# The only way to be completely sure that there is no evaluation is: -# ``` -# if False: _LOG.debug(...) -# ``` -def shut_up_log_debug(logger: logging.Logger) -> None: - logging.disable(logging.DEBUG) - # logger.debug = lambda *_: 0 - # logger.trace = lambda *_: 0 - - -# ############################################################################# -# ResourceUsageFilter -# ############################################################################# - - -# From https://stackoverflow.com/questions/10848342 -# and https://docs.python.org/3/howto/logging-cookbook.html#filters-contextual -class ResourceUsageFilter(logging.Filter): - """ - Add fields to the logger about memory and CPU use. - """ - - def __init__(self, report_cpu_usage: bool): - super().__init__() - import psutil - - self._process = psutil.Process() - self._report_cpu_usage = report_cpu_usage - if self._report_cpu_usage: - # Start sampling the CPU usage. - self._process.cpu_percent(interval=1.0) - - def filter(self, record: logging.LogRecord) -> bool: - """ - Override `logging.Filter()`, adding several fields to the logger. - """ - p = self._process - # Report memory usage. - resource_use = get_memory_usage_as_str(p) - # Report CPU usage. - if self._report_cpu_usage: - # CPU usage since the previous call. - cpu_use = p.cpu_percent(interval=None) - resource_use += " cpu=%.0f%%" % cpu_use - record.resource_use = resource_use # type: ignore - return True - - -# ############################################################################# - - -# TODO(gp): Replace `force_print_format` and `force_verbose_format` with `mode`. -def _get_logging_format( - force_print_format: bool, - force_verbose_format: bool, - force_no_warning: bool, - report_memory_usage: bool, - date_format_mode: str = "time", -) -> Tuple[str, str]: - """ - Compute the logging format depending whether running on notebook or in a - shell. - - The logging format can be: - - print: looks like a `print` statement - - :param force_print_format: force to use the non-verbose format - :param force_verbose_format: force to use the verbose format - """ - if _is_running_in_ipynb() and not force_no_warning: - print("WARNING: Running in Jupyter") - verbose_format = not _is_running_in_ipynb() - # - assert not (force_verbose_format and force_print_format), ( - f"Can't use both force_verbose_format={force_verbose_format} " - + f"and force_print_format={force_print_format}" - ) - if force_verbose_format: - verbose_format = True - if force_print_format: - verbose_format = False - # - if verbose_format: - # TODO(gp): We would like to have filename:name:funcName:lineno all - # justified on 15 chars. - # See https://docs.python.org/3/howto/logging-cookbook.html#use-of - # -alternative-formatting-styles - # Something like: - # {{asctime}-5s {{filename}{name}{funcname}{linedo}d}-15s {message} - # - # %(pathname)s Full pathname of the source file where the logging call was - # issued (if available). - # %(filename)s Filename portion of pathname. - # %(module)s Module (name portion of filename). - if True: - log_format = ( - # 04-28_08:08 INFO : - "%(asctime)-5s %(levelname)-5s" - ) - if report_memory_usage: - # rss=0.3GB vms=2.0GB mem_pct=2% cpu=91% - log_format += " [%(resource_use)-40s]" - log_format += ( - # lib_tasks _delete_branches - " %(module)-20s: %(funcName)-30s:" - # 142: ... - " %(lineno)-4d:" - " %(message)s" - ) - else: - # Super verbose: to help with debugging print more info without trimming. - log_format = ( - # 04-28_08:08 INFO : - "%(asctime)-5s %(levelname)-5s" - # .../src/lem1/amp/helpers/system_interaction.py - # _system : - " %(pathname)s %(funcName)-20s " - # 199: ... - " %(lineno)d:" - " %(message)s" - ) - if date_format_mode == "time": - date_fmt = "%H:%M:%S" - elif date_format_mode == "date_time": - date_fmt = "%m-%d_%H:%M" - elif date_format_mode == "date_timestamp": - date_fmt = "%Y-%m-%d %I:%M:%S %p" - else: - raise ValueError(f"Invalid date_format_mode='{date_format_mode}'") - else: - # Make logging look like a normal print(). - # TODO(gp): We want to still prefix with WARNING and ERROR. - log_format = "%(message)s" - date_fmt = "" - return date_fmt, log_format - - -def set_v1_formatter( - ch: Any, - root_logger: Any, - force_no_warning: bool, - force_print_format: bool, - force_verbose_format: bool, - report_cpu_usage: bool, - report_memory_usage: bool, -) -> _ColoredFormatter: - # Decide whether to use verbose or print format. - date_fmt, log_format = _get_logging_format( - force_print_format, - force_verbose_format, - force_no_warning, - report_memory_usage, - ) - # Use normal formatter. - # formatter = logging.Formatter(log_format, datefmt=date_fmt) - # Use formatter with colors. - formatter = _ColoredFormatter(log_format, date_fmt) - ch.setFormatter(formatter) - root_logger.addHandler(ch) - # Report resource usage. - if report_memory_usage: - # Get root logger. - log = logging.getLogger("") - # Create filter. - f = ResourceUsageFilter(report_cpu_usage) - # The ugly part:adding filter to handler. - log.handlers[0].addFilter(f) - return formatter - - -# ############################################################################# -# CustomFormatter -# ############################################################################# - - -# pylint: disable=line-too-long -class CustomFormatter(logging.Formatter): - """ - Override `format` to implement a completely custom logging formatting. - - The logging output looks like: - ``` - 07:37:17 /app/amp/helpers/hunit_test.py setUp 932 - Resetting random.seed to 20000101 - ``` - or for simulated time: - ``` - 07:43:17 @ 2022-01-18 02:43:17 workload /app/amp/helpers/test/test_hlogging.py workload:33 - -> wait - ``` - """ - - def __init__( - self, - *args: Any, - date_format_mode: str = "time", - report_memory_usage: bool = False, - report_cpu_usage: bool = False, - **kwargs: Any, - ): - super().__init__(*args, **kwargs) - self._date_fmt = self._get_date_format(date_format_mode) - # - try: - # TODO(gp): Automatically detect the time zone. It might be complicated - # in Docker. - import pytz - - self._tzinfo = pytz.timezone("America/New_York") - except ModuleNotFoundError: - # print(f"Can't import pytz: using UTC\n{str(e)}") - self._tzinfo = None - # - self._report_memory_usage = report_memory_usage - self._report_cpu_usage = report_cpu_usage - if self._report_memory_usage or self._report_cpu_usage: - import psutil - - self._process = psutil.Process() - if self._report_cpu_usage: - # Start sampling the CPU usage. - self._process.cpu_percent(interval=1.0) - - def format(self, record: logging.LogRecord) -> str: - # record = copy.copy(record) - # print(pprint.pformat(record.__dict__)) - # `record` looks like: - # {'args': (30,), - # 'created': 1642456725.5569131, - # 'exc_info': None, - # 'exc_text': None, - # 'filename': 'logging_main.py', - # 'funcName': 'test_logger', - # 'levelname': 'WARNING', - # 'levelno': 30, - # 'lineno': 105, - # 'module': 'logging_main', - # 'msecs': 556.9131374359131, - # 'msg': 'WARNING=%s', - # 'name': '__main__', - # 'pathname': 'helpers/logging_testing/logging_main.py', - # 'process': 16484, - # 'processName': 'MainProcess', - # 'relativeCreated': 29.956817626953125, - # 'stack_info': None, - # 'thread': 140250120021824, - # 'threadName': 'MainThread'} - msg = "" - # Add the wall clock time. - msg += self._get_wall_clock_time() - # Report memory usage, if needed. - # rss=0.240GB vms=1.407GB mem_pct=2% cpu=92% - if self._report_memory_usage: - msg_tmp = get_memory_usage_as_str(self._process) - # Escape the % to avoid confusing for a string to expand. - msg_tmp = msg_tmp.replace("%", "%%") - msg += " " + msg_tmp - # Report CPU usage, if needed. - if self._report_cpu_usage: - # CPU usage since the previous call. - msg_tmp = " cpu=%.0f" % self._process.cpu_percent(interval=None) - # Escape the % to avoid confusing for a string to expand. - msg_tmp += "%%" - msg += msg_tmp - # Get the (typically) simulated wall clock time. - import helpers.hwall_clock_time as hwacltim - - simulated_wall_clock_time = hwacltim.get_wall_clock_time() - if simulated_wall_clock_time is not None: - date_fmt = "%Y-%m-%d %I:%M:%S" - msg += " @ " + self._convert_time_to_string( - simulated_wall_clock_time, date_fmt - ) - # Colorize / shorten the logging level if it's not DEBUG. - if record.levelno != logging.DEBUG: - msg += f" - {self._colorize_level(record.levelname)}" - # Add information about which coroutine we are running in. - try: - asyncio.get_running_loop() - task = asyncio.current_task() - if task is not None: - msg += f" {task.get_name()}" - except (RuntimeError, AttributeError): - pass - # Add information about the caller. - # ``` - # /helpers/hunit_test.py setUp:932 - # ``` - # pathname = record.pathname.replace("/amp", "") - # msg += f" {pathname} {record.funcName}:{record.lineno}" - # ``` - # test_hlogging.py _print_time:28 - # ``` - msg += f" {record.filename} {record.funcName}:{record.lineno}" - # Indent. - if len(msg) < 50: - msg = "%-60s" % msg - else: - msg = "%-80s" % msg - # Add the caller string. - msg += f" {record.msg}" - record.msg = msg - return super().format(record) - - @staticmethod - def _get_date_format(date_format_mode: str) -> str: - if date_format_mode == "time": - date_fmt = "%H:%M:%S" - elif date_format_mode == "date_time": - date_fmt = "%m-%d_%H:%M" - elif date_format_mode == "date_timestamp": - date_fmt = "%Y-%m-%d %I:%M:%S %p" - else: - raise ValueError("Invalid date_format") - return date_fmt - - def _convert_time_to_string( - self, now: datetime.datetime, date_fmt: str - ) -> str: - # Convert it to an tz-aware datetime object in UTC time. - dt = now.replace(tzinfo=datetime.timezone.utc) - if self._tzinfo is not None: - # Convert it to desired timezone. - dt = dt.astimezone(self._tzinfo) - time_as_str = dt.strftime(date_fmt) - return time_as_str - - def _get_wall_clock_time(self) -> str: - dt = datetime.datetime.utcnow() - return self._convert_time_to_string(dt, self._date_fmt) - - def _colorize_level(self, level_name: str) -> str: - # Use white as default. - prefix = "\033[" - suffix = "\033[0m" - # Print stacktrace to debug. - if False: - import traceback - - txt = traceback.format_stack() - txt = "".join(txt) - print(txt) - - assert level_name in _COLOR_MAPPING, "Can't find info '%s'" - color_code, tag = _COLOR_MAPPING[level_name] - colored_level_name = f"{prefix}{color_code}m{tag}{suffix}" - return colored_level_name - - -def set_v2_formatter( - ch: Any, - root_logger: Any, - force_no_warning: bool, - force_print_format: bool, - force_verbose_format: bool, - report_memory_usage: bool, - report_cpu_usage: bool, -) -> Union[logging.Formatter, CustomFormatter]: - """ - See params in `init_logger()`. - """ - assert not (force_verbose_format and force_print_format), ( - f"Can't use both force_verbose_format={force_verbose_format} " - + f"and force_print_format={force_print_format}" - ) - # When running in a notebook make logging behave like a `print`. - verbose_format = True - if _is_running_in_ipynb(): - verbose_format = False - if not force_no_warning: - print("WARNING: Running in Jupyter") - # - if force_verbose_format: - verbose_format = True - if force_print_format: - verbose_format = False - # - if verbose_format: - # Force to report memory / CPU usage. - # report_memory_usage = report_cpu_usage = True - # print( - # "report_memory_usage=%s report_cpu_usage=%s" - # % (report_memory_usage, report_cpu_usage) - # ) - formatter: Union[logging.Formatter, CustomFormatter] = CustomFormatter( - report_memory_usage=report_memory_usage, - report_cpu_usage=report_cpu_usage, - ) - else: - # Make logging look like a normal `print()`. - log_format = "%(levelname)-5s %(message)s" - date_fmt = "" - formatter = logging.Formatter(log_format, datefmt=date_fmt) - ch.setFormatter(formatter) - root_logger.addHandler(ch) - return formatter - - -# TODO(gp): Not sure it works properly. -@contextlib.contextmanager -def set_level(logger: Any, level: int) -> None: - """ - Context manager changing the verbosity level. - """ - previous_level = logger.getEffectiveLevel() - try: - logger.setLevel(level) - yield - finally: - logger.setLevel(previous_level) - assert logger.getEffectiveLevel() == previous_level - - -# ############################################################################# - - -def getLogger(name: str) -> logging.Logger: - """ - Get logger with custom trace method support. - - This function provides the same functionality as `logging.getLogger()` - but with proper type hints that include the custom trace method. - - Usage: - ``` - # Instead of `import logging`. - import helpers.hlogging as hlogging - - _LOG = hlogging.getLogger(__name__) - _LOG.trace("This works without type checker errors") - _LOG.debug("Standard logging methods also work") - ``` - """ - return logging.getLogger(name) - - -def test_logger() -> None: - print("# Testing logger ...") - print("effective level=", _LOG.getEffectiveLevel()) - # - if hasattr(_LOG, "trace"): - if hasattr(logging, "TRACE"): - _LOG.trace("TRACE=%s", logging.TRACE) - else: - _LOG.trace("TRACE level not available") - # - _LOG.debug("DEBUG=%s", logging.DEBUG) - # - _LOG.info("INFO=%s", logging.INFO) - # - _LOG.warning("WARNING=%s", logging.WARNING) - # - _LOG.error("ERROR=%s", logging.ERROR) - # - _LOG.critical("CRITICAL=%s", logging.CRITICAL) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi deleted file mode 100644 index 993f9cc14..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi +++ /dev/null @@ -1,14 +0,0 @@ -""" -Type stub for hlogging module with custom Logger that includes trace method. -""" - -import logging -from typing import Any - -class Logger(logging.Logger): - """ - Custom Logger class that includes trace method. - """ - def trace(self, msg: str, *args: Any, **kwargs: Any) -> None: ... - -def getLogger(name: str) -> Logger: ... diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py deleted file mode 100644 index 07fe8d14f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown as hmarkdo -""" - -from helpers.hmarkdown_bullets import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_coloring import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_comments import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_div_blocks import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_fenced_blocks import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_filtering import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_formatting import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_headers import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_rules import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_slides import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_tables import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_toc import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py deleted file mode 100644 index 0edb705a4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py +++ /dev/null @@ -1,248 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_bullets as hmarbull -""" - -import logging -import re -from typing import Generator, List, Tuple - -from helpers.hmarkdown_comments import ( - process_comment_block, - process_single_line_comment, -) - -_LOG = logging.getLogger(__name__) - -_TRACE = False - -# ############################################################################# -# Formatting markdown -# ############################################################################# - - -# These are the colors that are supported by Latex / markdown, are readable on -# white, and form an equidistant color palette. -_ALL_COLORS = [ - "red", - "orange", - "brown", - "olive", - "green", - "teal", - "cyan", - "blue", - "violet", - "darkgray", - "gray", -] - - -# TODO(gp): -> hmarkdown_color.py? -# TODO(gp): This seems the same as `_colorize_bullet_points()`. -def colorize_bold_text( - markdown_text: str, color_sequence: bool, *, use_abbreviations: bool = True -) -> str: - r""" - Add colors to bold text in markdown using equidistant colors from an array. - - The function finds all bold text (enclosed in ** or __) and adds - LaTeX color commands while preserving the rest of the markdown - unchanged. - - :param markdown_text: Input markdown text - :param color_sequence: Sequence of colors to use - :param use_abbreviations: Use LaTeX abbreviations for colors, - `\red{text}` instead of `\textcolor{red}{text}` - :return: Markdown text with colored bold sections - """ - # Remove any existing color formatting. - # Remove \color{text} format. - markdown_text = re.sub(r"\\[a-z]+\{([^}]+)\}", r"\1", markdown_text) - # Remove \textcolor{color}{text} format. - markdown_text = re.sub( - r"\\textcolor\{[^}]+\}\{([^}]+)\}", r"\1", markdown_text - ) - # Find all bold text (both ** and __ formats). - bold_pattern = r"\*\*(.*?)\*\*|__(.*?)__" - # matches will look like: - # For **text**: group(1)='text', group(2)=None. - # For __text__: group(1)=None, group(2)='text'. - matches = list(re.finditer(bold_pattern, markdown_text)) - if not matches: - return markdown_text - result = markdown_text - # Calculate color spacing to use equidistant colors. - if color_sequence == "equidistant": - color_step = len(_ALL_COLORS) / len(matches) - elif color_sequence == "fixed": - color_step = 1 - else: - raise ValueError(f"Invalid color sequence: {color_sequence}") - # Process matches in reverse to not mess up string indices. - for i, match in enumerate(reversed(matches)): - # Get the matched bold text (either ** or __ format). - bold_text = match.group(1) or match.group(2) - # Calculate `color_idx` using equidistant spacing. - color_idx = int((len(matches) - 1 - i) * color_step) % len(_ALL_COLORS) - color = _ALL_COLORS[color_idx] - # Create the colored version. - if use_abbreviations: - # E.g., \red{text} - colored_text = f"\\{color}{{{bold_text}}}" - else: - # E.g., \textcolor{red}{text} - colored_text = f"\\textcolor{{{color}}}{{{bold_text}}}" - # Apply bold. - colored_text = f"**{colored_text}**" - # Replace in the original text. - result = result[: match.start()] + colored_text + result[match.end() :] - return result - - -def remove_bullets(markdown_text: str) -> str: - """ - Remove bullet points (dashes) and leading spaces from markdown text. - - This function removes all leading dashes (`-`) from lines and removes - leading whitespace. Empty lines are preserved. - - :param markdown_text: Input markdown text - :return: Markdown text with bullets removed - """ - lines = markdown_text.split("\n") - result = [] - for line in lines: - # Check if line is not empty. - if line.strip(): - # Remove leading whitespace. - stripped_line = line.lstrip() - # Check if line starts with a bullet point. - if stripped_line.startswith("- "): - # Remove the bullet and the space after it. - result.append(stripped_line[2:]) - else: - # Keep the line as is (no leading whitespace). - result.append(stripped_line) - else: - # Preserve empty lines. - result.append("") - return "\n".join(result) - - -def format_first_level_bullets(markdown_text: str) -> str: - """ - Add empty lines only before first level bullets and remove all empty lines - from markdown text. - - :param markdown_text: Input markdown text - :return: Formatted markdown text - """ - # Split into lines and remove empty ones. - lines = [line for line in markdown_text.split("\n") if line.strip()] - # Add empty lines only before first level bullets. - result = [] - for i, line in enumerate(lines): - # Check if current line is a first level bullet (no indentation). - if re.match(r"^- ", line): - # Add empty line before first level bullet if not at start. - if i > 0: - result.append("") - result.append(line) - return "\n".join(result) - - -def process_code_block( - line: str, in_code_block: bool, i: int, lines: List[str] -) -> Tuple[bool, bool, List[str]]: - """ - Process lines of text to handle code blocks that start and end with '```'. - - The transformation is to: - - add an empty line before the start/end of the code - - indent the code block with four spaces - - replace '//' with '# ' to comment out lines in Python code - - :param line: The current line of text being processed. - :param in_code_block: A flag indicating if the function is currently - inside a code block. - :param i: The index of the current line in the list of lines. - :param lines: the lines of text to process - :return: tuple containing: - - `do_continue`: whether to continue processing the current line or skip - it - - `in_code_block`: boolean indicating whether the function is currently - inside a code block - - list of processed lines of text - """ - out: List[str] = [] - do_continue = False - # Look for a code block. - if re.match(r"^(\s*)```", line): - _LOG.debug(" -> code block") - in_code_block = not in_code_block - # Add empty line before the start of the code block. - if ( - in_code_block - and (i + 1 < len(lines)) - and re.match(r"\s*", lines[i + 1]) - ): - out.append("\n") - out.append(" " + line) - if ( - not in_code_block - and (i + 1 < len(lines)) - and re.match(r"\s*", lines[i + 1]) - ): - out.append("\n") - do_continue = True - return do_continue, in_code_block, out - if in_code_block: - line = line.replace("// ", "# ") - out.append(" " + line) - # We don't do any of the other post-processing. - do_continue = True - return do_continue, in_code_block, out - return do_continue, in_code_block, out - - -# TODO(gp): -> iterator -# TODO(gp): where is this used? -def process_lines(lines: List[str]) -> Generator[Tuple[int, str], None, None]: - """ - Process lines of text to handle comment blocks, code blocks, and single - line comments. - - :param lines: list of all the lines of text being processed - :return: generator of processed lines of text - """ - out: List[str] = [] - in_skip_block = False - in_code_block = False - for i, line in enumerate(lines): - _LOG.debug("%s:line=%s", i, line) - # 1) Remove comment block. - if _TRACE: - _LOG.debug("# 1) Process comment block.") - do_continue, in_skip_block = process_comment_block(line, in_skip_block) - if do_continue: - continue - # 2) Remove code block. - if _TRACE: - _LOG.debug("# 2) Process code block.") - do_continue, in_code_block, out_tmp = process_code_block( - line, in_code_block, i, lines - ) - out.extend(out_tmp) - if do_continue: - continue - # 3) Remove single line comment. - if _TRACE: - _LOG.debug("# 3) Process single line comment.") - do_continue = process_single_line_comment(line) - if do_continue: - continue - out.append(line) - # - yield from enumerate(out) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py deleted file mode 100644 index ba7278726..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py +++ /dev/null @@ -1,286 +0,0 @@ -""" -Utilities for colorizing markdown and LaTeX text with color commands. - -Import as: - -import helpers.hmarkdown_coloring as hmarcolo -""" - -import logging -import re -from typing import Dict, List, Optional - -import helpers.hdbg as hdbg -from helpers.hmarkdown_fenced_blocks import ( - replace_fenced_blocks_with_tags, - replace_tags_with_fenced_blocks, -) -from helpers.hmarkdown_tables import ( - replace_tables_with_tags, - replace_tags_with_tables, -) - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Colorize -# ############################################################################# - -# Mapping of markdown color names to their LaTeX color equivalents for use in -# \textcolor{} commands. -_MD_COLORS_LATEX_MAPPING = { - "red": "red", - "orange": "orange", - "yellow": "yellow", - "lime": "lime", - "green": "darkgreen", - "teal": "teal", - "cyan": "cyan", - "blue": "blue", - "purple": "purple", - "violet": "violet", - "magenta": "magenta", - "pink": "pink", - "brown": "brown", - "olive": "olive", - "gray": "gray", - "darkgray": "darkgray", - "lightgray": "lightgray", - "black": "black", - "white": "white", -} - - -def get_md_colors_latex_mapping() -> Dict[str, str]: - """ - Get a copy of the markdown-to-LaTeX color mapping. - - :return: Dict mapping color names (e.g., 'red', 'blue') to LaTeX color names - """ - return dict(_MD_COLORS_LATEX_MAPPING) - - -# Curated list of colors that are visually distinguishable and work well in -# both markdown and LaTeX contexts (excludes ones which are too light or have -# poor contrast). -_MD_COLORS = [ - "red", - "orange", - # "yellow", - # "lime", - "green", - "teal", - "cyan", - "blue", - # "purple", - "violet", - "magenta", - # "pink", - "brown", - "olive", - "gray", - "darkgray", - # "lightgray", - "black", - # "white", -] - - -def get_md_colors() -> List[str]: - """ - Get a copy of the curated list of markdown colors. - - :return: List of color names suitable for colorizing markdown/LaTeX - """ - return list(_MD_COLORS) - - -def process_color_commands(in_line: str) -> str: - r""" - Transform color commands like `\red{xyz}` into valid LaTeX syntax. - - If the content is text (not math), wraps it in `\text{}`. - - E.g.: - - `\red{abc}` -> `\textcolor{red}{\text{abc}}` - - `\blue{x + y}` -> `\textcolor{blue}{x + y}` - - :param in_line: input line to process - :return: line with color commands transformed - """ - for md_color, latex_color in get_md_colors_latex_mapping().items(): - # This regex matches color commands like \red{content}, \blue{content}, - # etc. - pattern = re.compile( - rf""" - \\{md_color} # Match the color command (e.g., \red, \blue, etc.). - \{{ # Match the opening curly brace. - ([^}}]*) # Capture everything inside the curly braces. - \}} # Match the closing curly brace. - """, - re.VERBOSE, - ) - - def _replacement(match: re.Match, latex_color: str) -> str: - """ - Replace a color command with LaTeX \textcolor directive. - """ - content = match.group(1) - # Math expressions (containing operators, brackets, etc.) render - # directly; plain text needs \text{} wrapper for proper LaTeX rendering. - is_math_expr = any(c in content for c in "+-*/=<>{}[]()^_") - if is_math_expr: - ret = rf"\textcolor{{{latex_color}}}{{{content}}}" - else: - ret = rf"\textcolor{{{latex_color}}}{{\text{{{content}}}}}" - return ret - - # Replace the color command with the LaTeX color command. - in_line = re.sub( - pattern, lambda m: _replacement(m, latex_color), in_line - ) - return in_line - - -def has_color_command(text: str) -> bool: - """ - Check if text contains any color commands like `\\red{...}` or `\\blue{...}`. - - :param text: text to check - :return: True if text contains at least one color command - """ - hdbg.dassert_isinstance(text, str) - # hdbg.dassert_not_in("\n", line) - for color in _MD_COLORS_LATEX_MAPPING.keys(): - # This regex matches LaTeX color commands like \red{content}, - # \blue{content}, etc. - pattern = re.compile( - rf""" - \\{color} # Match the color command (e.g., \red, \blue, etc.). - \{{ # Match the opening curly brace. - ([^}}]*) # Capture everything inside the curly braces. - \}} # Match the closing curly brace. - """, - re.VERBOSE, - ) - if re.search(pattern, text): - return True - return False - - -# TODO(gp): -> List[str] -# TODO(gp): Use hmarkdown.process_lines() and test it. -def colorize_bullet_points_in_slide( - txt: str, - *, - use_abbreviations: bool = True, - interpolate_colors: bool = False, - all_md_colors: Optional[List[str]] = None, -) -> str: - r""" - Colorize bold markdown items `**text**` with color commands. - - Scans the text line-by-line for bold markdown items and wraps each in a - color command (e.g., `**\red{text}**`). Skips code blocks and tables to - preserve their formatting. Bold items are colored sequentially using the - provided color list. - - :param txt: Markdown text containing bold items to colorize - :param use_abbreviations: - - If True, use abbreviated color syntax (e.g., `\red{foo}`) - - If False, use full LaTeX syntax (e.g., `\textcolor{red}{foo}`) - :param interpolate_colors: - - If True, evenly space selected colors across all bold items - - If False, use a predefined sequence for common counts (1-4 items get - fixed color sets, more items cycle through all_md_colors) - :param all_md_colors: List of available colors to cycle through - - Default: curated list from `get_md_colors()` - :return: Markdown text with bold items wrapped in color commands - """ - hdbg.dassert_isinstance(txt, str) - if all_md_colors is None: - all_md_colors = list(get_md_colors()) - # Strip code blocks and tables to avoid colorizing content inside them. - lines = txt.split("\n") - lines, fence_map = replace_fenced_blocks_with_tags(lines) - _LOG.debug("Found %s fenced blocks", len(fence_map)) - lines, table_map = replace_tables_with_tags(lines) - _LOG.debug("Found %s tables", len(table_map)) - # Count bold markers (**) to determine how many bold items exist. - tot_bold = 0 - # Scan the text line by line and count how many bold items there are. - for line in lines: - # Count the number of bold items. - num_bold = len(re.findall(r"\*\*", line)) - tot_bold += num_bold - _LOG.debug("tot_bold=%s", tot_bold) - if tot_bold == 0: - return txt - # Divide by 2 since each bold item is wrapped with ** on both sides. - # hdbg.dassert_eq(tot_bold % 2, 0, "tot_bold=%s needs to be even", tot_bold) - num_bolds = tot_bold // 2 - - def _interpolate_colors(num_bolds: int) -> List[str]: - """ - Sample colors evenly spaced to cover all bold items distinctly. - """ - step = len(all_md_colors) // num_bolds - colors = list(all_md_colors)[::step][:num_bolds] - return colors - - if interpolate_colors: - colors = _interpolate_colors(num_bolds) - else: - # Use fixed color sequences for small numbers of bold items; for larger - # counts, cycle through the available colors. - if num_bolds == 1: - colors = ["red"] - elif num_bolds == 2: - colors = ["red", "blue"] - elif num_bolds == 3: - colors = ["red", "green", "blue"] - elif num_bolds == 4: - colors = ["red", "green", "blue", "violet"] - else: - colors = all_md_colors[:num_bolds] - _LOG.debug("colors=%s", colors) - hdbg.dassert_lte( - num_bolds, len(colors), "Number of bold items exceeds available colors" - ) - color_idx = 0 - txt_out = [] - for line in lines: - - def color_replacer(match: re.Match[str]) -> str: - """ - Replace strings like "**foo**" with strings like "**\red{foo}**". - """ - nonlocal color_idx - text = match.group(1) - hdbg.dassert_lte( - color_idx, - len(colors), - "Color index out of bounds; not enough colors assigned", - ) - color_to_use = colors[color_idx] - hdbg.dassert_in( - color_to_use, - get_md_colors_latex_mapping(), - "Selected color is not in the color mapping", - ) - latex_color = get_md_colors_latex_mapping()[color_to_use] - color_idx += 1 - if use_abbreviations: - ret = f"**\\{color_to_use}{{{text}}}**" - else: - ret = f"**\\textcolor{{{latex_color}}}{{{text}}}**" - return ret - - line = re.sub(r"\*\*([^*]+)\*\*", color_replacer, line) - txt_out.append(line) - # Restore code blocks and tables that were temporarily replaced with tags. - txt_out = replace_tags_with_fenced_blocks(txt_out, fence_map) - txt_out = replace_tags_with_tables(txt_out, table_map) - txt_out = "\n".join(txt_out) - return txt_out diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py deleted file mode 100644 index 5b626a15a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_comments as hmarcomm -""" - -import logging -import re -from typing import Tuple - -import helpers.hdbg as hdbg -from helpers.hmarkdown_headers import is_markdown_line_separator - -_LOG = logging.getLogger(__name__) - - -def process_single_line_comment(line: str) -> bool: - """ - Handle single line comment. - - We need to do it after the '//' in code blocks have been handled. - - :param line: line of text to process - :return: whether to continue processing the line or skip it - """ - do_continue = False - if line.startswith(r"%%") or line.startswith(r"//"): - do_continue = True - _LOG.debug(" -> do_continue=True") - return do_continue - # Skip frame. - if is_markdown_line_separator(line): - do_continue = True - _LOG.debug(" -> do_continue=True") - return do_continue - # Nothing to do. - return do_continue - - -def process_comment_block(line: str, in_skip_block: bool) -> Tuple[bool, bool]: - """ - Process lines of text to identify blocks that start with '' or '*/'. - - :param line: current line of text being processed - :param in_skip_block: flag indicating if the function is currently - inside a comment block - :return: tuple containing: - - `do_continue`: whether to continue processing the current line or skip - it - - `in_skip_block`: boolean indicating whether the function is currently - inside a comment block - """ - do_continue = False - if line.startswith(r"") or re.search(r"^\s*\*\/", line): - # End skipping comments. - in_skip_block = False - # Skip comment. - _LOG.debug(" -> skip") - do_continue = True - return do_continue, in_skip_block diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py deleted file mode 100644 index 169e06624..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py +++ /dev/null @@ -1,132 +0,0 @@ -""" -Utilities for handling div blocks in markdown files. - -This module provides functions to add and remove prettier-ignore comments -around div blocks in markdown files. - -Import as: - -import helpers.hmarkdown_div_blocks as hmadiblo -""" - -from typing import List, Tuple - - -def _split_lines_into_chunks( - lines: List[str], -) -> List[Tuple[bool, List[str]]]: - """ - Split lines into chunks of div blocks and non-div blocks. - - A div block starts with a line containing ::: and ends with another - line containing :::. - - :param lines: List of strings representing lines in a markdown file. - :return: List of tuples (is_div_block, chunk_lines) where is_div_block - indicates if the chunk is a div block. - """ - chunks = [] - i = 0 - while i < len(lines): - line = lines[i] - # Check if this line starts a div block. - if line.strip().startswith(":::"): - # Look ahead to find the closing div block. - j = i + 1 - while j < len(lines): - if lines[j].strip().startswith(":::"): - # Found the end of the div block. - chunk_lines = lines[i : j + 1] - chunks.append((True, chunk_lines)) - i = j + 1 - break - j += 1 - else: - # No closing div block found, treat as regular line. - chunks.append((False, [line])) - i += 1 - else: - # Start a non-div block chunk. - chunk_lines = [line] - i += 1 - # Continue collecting non-div lines. - while i < len(lines) and not lines[i].strip().startswith(":::"): - chunk_lines.append(lines[i]) - i += 1 - chunks.append((False, chunk_lines)) - return chunks - - -def add_prettier_ignore_to_div_blocks(lines: List[str]) -> List[str]: - """ - Add prettier-ignore comments around div blocks. - - A div block starts with a line containing ::: and has another line - with ::: following it. - - Examples of div blocks: - - :::: - ::::{.column width=40%} - - :::columns - ::::{.column width=60%} - - :::: - ::: - - :param lines: List of strings representing lines in a markdown file. - :return: List of strings with prettier-ignore comments added. - """ - # Step 1: Split into chunks. - chunks = _split_lines_into_chunks(lines) - # Step 2: Process chunks and add prettier-ignore comments. - result = [] - for is_div_block, chunk_lines in chunks: - if is_div_block: - # Add prettier-ignore comments around div blocks. - result.append("") - result.append("") - result.extend(chunk_lines) - result.append("") - result.append("") - else: - # Add non-div block lines as-is. - result.extend(chunk_lines) - return result - - -def remove_prettier_ignore_from_div_blocks(lines: List[str]) -> List[str]: - """ - Remove all prettier-ignore comments from lines. - - This function removes: - - lines - - lines - - Empty lines before prettier-ignore-start - - Empty lines after prettier-ignore-end - - :param lines: List of strings representing lines in a markdown file. - :return: List of strings with prettier-ignore comments removed. - """ - result = [] - i = 0 - while i < len(lines): - line = lines[i] - # Check if this is a prettier-ignore-start comment. - if line.strip() == "": - # Remove empty line before prettier-ignore-start if present. - if result and result[-1] == "": - result.pop() - # Skip the prettier-ignore-start line. - i += 1 - continue - # Check if this is a prettier-ignore-end comment. - if line.strip() == "": - # Skip the prettier-ignore-end line. - i += 1 - # Skip empty line after prettier-ignore-end if present. - if i < len(lines) and lines[i] == "": - i += 1 - continue - # Add all other lines. - result.append(line) - i += 1 - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py deleted file mode 100644 index 8d3614b9b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py +++ /dev/null @@ -1,131 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_fenced_blocks as hmafeblo -""" - -import logging -import pprint -import re -from typing import Dict, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Add a decorator like in hprint to process both strings and lists -# of strings. - - -def replace_fenced_blocks_with_tags( - lines: List[str], -) -> Tuple[List[str], Dict[str, str]]: - """ - Replace fenced blocks with a tag and return the mapping from tags to the - fenced block text. - - E.g., - ```` - hello - world - ```python - foo - ``` - bye - ```` - is replaced with: - ``` - hello - world - - bye - ``` - - :param lines: list of lines to process - :return: tuple containing: - - list of lines with the fenced blocks replaced by tags - - mapping from tags to the fenced block text - """ - hdbg.dassert_isinstance(lines, list) - result = [] - # True if we are inside a fenced block. - in_fenced_block = False - # Count the number of fenced blocks found. - fenced_block_count = 0 - # Store the mapping between the block number and the fence type. - fence_map = {} - # Store the text of the fenced block. - fence_depth = 0 - fence_text = [] - for i, line in enumerate(lines): - _LOG.debug("%d:line='%s'", i, line) - _LOG.debug( - " " - + hprint.to_str("fenced_block_count in_fenced_block fence_depth") - ) - # Look for the start of a fenced block. - fence_match = re.match(r"^\s*(`{3,})", line) - if fence_match: - _LOG.debug(" -> fence_match") - curr_fence_depth = len(fence_match.group(0)) - if not in_fenced_block: - # Start of a fenced block. - _LOG.debug(" -> start of fenced block") - in_fenced_block = True - fence_depth = curr_fence_depth - fenced_block_count += 1 - fence_text.append(line) - else: - # We are already in a fenced block. - fence_text.append(line) - if curr_fence_depth == fence_depth: - # End of block found. - _LOG.debug(" -> end of fenced block") - in_fenced_block = False - # Replace nested code block markers with tag. - result.append(f"") - fence_map[str(fenced_block_count)] = "\n".join(fence_text) - _LOG.debug(" -> added to fence_map") - # Reset state. - fence_depth = 0 - fence_text = [] - else: - if in_fenced_block: - _LOG.debug(" -> in_fenced_block") - fence_text.append(line) - else: - result.append(line) - return result, fence_map - - -def replace_tags_with_fenced_blocks( - lines: List[str], fence_map: Dict[str, str] -) -> List[str]: - """ - Replace tags with fenced blocks. - - :param lines: list of lines to process - :param fence_map: mapping from tags to fenced block text - :return: list of lines with tags replaced by fenced blocks - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_isinstance(fence_map, dict) - result = [] - for line in lines: - if line.startswith("")[0] - hdbg.dassert_in(tag, fence_map, "Found unmatched tag %s", tag) - result.append(fence_map[tag]) - del fence_map[tag] - else: - result.append(line) - hdbg.dassert_eq( - len(fence_map), - 0, - "Found %s unmatched tags:\n%s", - len(fence_map), - pprint.pformat(fence_map), - ) - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py deleted file mode 100644 index 666c3d03b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_filtering as hmarfilt -""" - -import logging -import re -from typing import List, Tuple - -import helpers.hdbg as hdbg -from helpers.hmarkdown_headers import ( - extract_section_from_markdown, -) -from helpers.hmarkdown_slides import extract_slides_from_markdown - -_LOG = logging.getLogger(__name__) - - -def filter_by_header(lines: List[str], header: str) -> List[str]: - """ - Extract a specific header from markdown text. - - :param lines: list of markdown lines to be processed - :param header: header to filter by (e.g., `# Introduction`) - :return: filtered lines - """ - hdbg.dassert_isinstance(lines, list) - # Filter by header. - txt_lines = extract_section_from_markdown(lines, header) - hdbg.dassert_isinstance(txt_lines, list) - return txt_lines - - -def _parse_range(range_as_str: str, max_value: int) -> Tuple[int, int]: - """ - Parse a 0-indexed range string like '0:10' into start and end indices. - - :param range_as_str: string in format 'start:end' where start/end - can be numbers or 'None' (None means 0 for start, max_value for end) - :param max_value: maximum value to use when 'None' is specified for end - :return: tuple of '(start_index, end_index)' as 0-indexed integers - """ - m = re.match(r"^(\S+):(\S+)$", range_as_str) - hdbg.dassert(m, "Invalid range_as_str='%s'", range_as_str) - assert m is not None - start_value, end_value = m.groups() - if start_value.lower() == "none": - start_value = 0 - else: - start_value = int(start_value) - if end_value.lower() == "none": - end_value = max_value - else: - end_value = int(end_value) - return start_value, end_value - - -def filter_by_lines(lines: List[str], filter_by_lines: str) -> List[str]: - """ - Filter the lines of text in `[start_line, end_line[` (0-indexed). - - :param lines: list of lines to be processed - :param filter_by_lines: 0-indexed range string like `0:10`, `0:None`, or `None:10` - :return: filtered lines - """ - hdbg.dassert_isinstance(lines, list) - start_line, end_line = _parse_range(filter_by_lines, len(lines)) - hdbg.dassert_lte(start_line, end_line) - txt = lines[start_line:end_line] - _LOG.warning( - "filter_by_lines='%s' -> lines=[%s:%s]", - filter_by_lines, - start_line, - end_line, - ) - hdbg.dassert_isinstance(txt, list) - return txt - - -def filter_by_slides(lines: List[str], filter_by_slides: str) -> List[str]: - """ - Filter the lines of text in `[start_slide, end_slide[` (0-indexed). - - :param lines: list of lines to be processed - :param filter_by_slides: 0-indexed range string like `0:10`, `0:None`, or `None:10` - :return: filtered lines - """ - hdbg.dassert_isinstance(lines, list) - slides_info, last_line_number = extract_slides_from_markdown(lines) - _LOG.debug("slides_info=%s\n%s", len(slides_info), slides_info) - start_slide, end_slide = _parse_range(filter_by_slides, len(slides_info)) - _LOG.debug("start_slide=%s, end_slide=%s", start_slide, end_slide) - hdbg.dassert_lte(start_slide, end_slide) - hdbg.dassert_lte(end_slide, len(slides_info)) - start_line = slides_info[start_slide].line_number - if end_slide == len(slides_info): - end_line = last_line_number - else: - end_line = slides_info[end_slide].line_number - _LOG.warning( - "filter_by_slides='%s' -> lines=[%s:%s]", - filter_by_slides, - start_line, - end_line, - ) - txt = lines[start_line - 1 : end_line - 1] - hdbg.dassert_isinstance(txt, list) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py deleted file mode 100644 index f3fd1b4a9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py +++ /dev/null @@ -1,530 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_formatting as hmarform -""" - -import logging -import re -from typing import List - -import helpers.hdbg as hdbg -import helpers.hmarkdown_headers as hmarhead -import helpers.hmarkdown_slides as hmarslid -import dev_scripts_helpers.dockerize.lib_prettier as dshdlipr - -_LOG = logging.getLogger(__name__) - - -def remove_end_of_line_periods(lines: List[str]) -> List[str]: - """ - Remove periods at the end of each line in the given text. - - :param lines: list of input lines to process - :return: lines with end-of-line periods removed - """ - hdbg.dassert_isinstance(lines, list) - txt_out = [line.rstrip(".") for line in lines] - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -def remove_empty_lines(lines: List[str]) -> List[str]: - """ - Remove empty lines from the given text. - - :param lines: list of input lines to process - :return: lines with empty lines removed - """ - hdbg.dassert_isinstance(lines, list) - txt_out = [line for line in lines if line != ""] - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -# def remove_gdoc_artifacts(lines: List[str]) -> List[str]: -# """ -# Remove empty lines from the given text. - -# :param lines: list of input lines to process -# :return: lines with empty lines removed -# """ -# hdbg.dassert_isinstance(lines, list) -# # Remove “” and …. -# lines = re.sub(r"“", '"', lines) -# lines = re.sub(r"”", '"', lines) -# lines = re.sub(r"’", "'", lines) -# lines = re.sub(r"…", "", lines) -# hdbg.dassert_isinstance(lines, list) -# return lines - - -# TODO(gp): Add tests. -def remove_code_delimiters(lines: List[str]) -> List[str]: - """ - Remove ```python and ``` delimiters from a given text. - - :param lines: list of input lines containing code delimiters - :return: lines with the code delimiters removed - """ - hdbg.dassert_isinstance(lines, list) - # Join lines back to text, apply regex logic, then split again. - txt = "\n".join(lines) - # Replace the ```python and ``` delimiters with empty strings. - txt_out = txt.replace("```python", "").replace("```", "") - txt_out = txt_out.strip() - # Remove the numbers at the beginning of the line, if needed - # E.g., `3: """` -> `"""`. - txt_out = re.sub(r"(^\d+: )", "", txt_out, flags=re.MULTILINE) - # Split back into lines. - result = txt_out.split("\n") if txt_out else [] - hdbg.dassert_isinstance(result, list) - return result - - -def add_line_numbers(lines: List[str]) -> List[str]: - """ - Add line numbers to each line of text. - - :param lines: list of input lines to process - :return: lines with line numbers added - """ - hdbg.dassert_isinstance(lines, list) - numbered_lines = [] - for i, line in enumerate(lines, 1): - numbered_lines.append(f"{i}: {line}") - hdbg.dassert_isinstance(numbered_lines, list) - return numbered_lines - - -def remove_formatting(txt: str) -> str: - """ - Remove markdown and LaTeX formatting from text. - - :param txt: input text to process - :return: text with formatting removed - """ - # Replace bold markdown syntax with plain text. - txt = re.sub(r"\*\*(.*?)\*\*", r"\1", txt) - # Replace italic markdown syntax with plain text. - txt = re.sub(r"\*(.*?)\*", r"\1", txt) - # Remove \textcolor{red}{ ... }. - txt = re.sub(r"\\textcolor\{(.*?)\}\{(.*?)\}", r"\2", txt) - # Remove \red{ ... }. - txt = re.sub(r"\\\S+\{(.*?)\}", r"\1", txt) - return txt - - -def md_clean_up(txt: str) -> str: - """ - Clean up a Markdown file copy-pasted from Google Docs, ChatGPT. - - :param txt: input text to process - :return: text with the cleaning up applied - """ - # 0) General formatting. - # Remove dot at the end of each line. - txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) - # 1) ChatGPT formatting. - # E.g.,`` • Description Logics (DLs) are a family`` - # Replace `•` with `-` - txt = re.sub(r"•\s+", r"- ", txt) - # Replace `\t` with 2 spaces - txt = re.sub(r"\t", r" ", txt) - # Remove `⋅`. - txt = re.sub(r"⸻", r"", txt) - # “ - txt = re.sub(r"“", r'"', txt) - # ” - txt = re.sub(r"”", r'"', txt) - # ’ - txt = re.sub(r"’", r"'", txt) - # … - txt = re.sub(r"…", r"...", txt) - # 2) Latex formatting. - # Replace \( ... \) math syntax with $ ... $. - txt = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", txt) - # Replace \[ ... \] math syntax with $$ ... $$, handling multiline equations. - txt = re.sub(r"\\\[(.*?)\\\]", r"$$\1$$", txt, flags=re.DOTALL) - # Replace `P(.)`` with `\Pr(.)`. - txt = re.sub(r"P\((.*?)\)", r"\\Pr(\1)", txt) - # - txt = re.sub(r"\\left\[", r"[", txt) - txt = re.sub(r"\\right\]", r"]", txt) - # - txt = re.sub(r"\\mid", r"|", txt) - # - txt = re.sub(r"→", r"$\\rightarrow$", txt) - # Remove empty spaces at beginning / end of Latex equations $...$. - # E.g., $ \text{Student} $ becomes $\text{Student}$ - # txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) - # Transform `Example: Training a deep` into `E.g., training a deep`, - # converting the word after `Example:` to lower case. - txt = re.sub(r"\bExample:", "E.g.,", txt) - txt = re.sub(r"\bE.g.,\s+(\w)", lambda m: "E.g., " + m.group(1).lower(), txt) - return txt - - -def remove_empty_lines_from_markdown(lines: List[str]) -> List[str]: - """ - Remove all empty lines from markdown text. - - :param lines: list of input markdown lines - :return: formatted markdown lines - """ - hdbg.dassert_isinstance(lines, list) - # Remove empty lines. - result = [line for line in lines if line.strip()] - hdbg.dassert_isinstance(result, list) - return result - - -def prettier_markdown(txt: str) -> str: - """ - Format markdown text using `prettier`. - - :param txt: input text to format - :return: formatted text - """ - file_type = "md" - txt = dshdlipr.prettier_on_str(txt, file_type) - return txt - - -def format_markdown(txt: str) -> str: - """ - Format markdown text. - - :param txt: input text to format - :return: formatted text - """ - file_type = "md" - txt = dshdlipr.prettier_on_str(txt, file_type) - lines = txt.split("\n") - clean_lines = remove_empty_lines_from_markdown(lines) - txt = "\n".join(clean_lines) - return txt - - -def bold_first_level_bullets( - lines: List[str], *, max_length: int = 30 -) -> List[str]: - """ - Make first-level bullets bold in markdown text. - - :param lines: list of input markdown lines - :param max_length: max length of the bullet text to be bolded. The - value '-1' means no limit - :return: formatted markdown lines with first-level bullets in bold - """ - hdbg.dassert_isinstance(lines, list) - result = [] - for line in lines: - # Check if this is a first-level bullet point. - if re.match(r"^\s*- ", line): - # Check if the line has already bold text it in it. - if not re.search(r"\*\*", line): - # Bold first-level bullets. - indentation = len(line) - len(line.lstrip()) - if indentation == 0: - # First-level bullet, add bold markers. - m = re.match(r"^(\s*-\s+)(.*)", line) - hdbg.dassert(m, "Can't parse line='%s'", line) - bullet_text = m.group(2) # type: ignore[union-attr] - if max_length > -1 and len(bullet_text) <= max_length: - spaces = m.group(1) # type: ignore[union-attr] - line = spaces + "**" + bullet_text + "**" - result.append(line) - hdbg.dassert_isinstance(result, list) - return result - - -def format_figures(lines: List[str]) -> List[str]: - """ - Convert markdown slides with figures to use fenced div syntax with column - layout. - - If the input already uses column format or contains no figures, - returns unchanged. - - :param lines: list of input markdown lines - :return: formatted markdown lines with figures in column layout - """ - hdbg.dassert_isinstance(lines, list) - # Check if already in column format. - text = "\n".join(lines) - if "::: columns" in text and ":::: {.column" in text: - return lines - # Find first figure line to split content. - first_figure_idx = -1 - for i, line in enumerate(lines): - if re.match(r"^\s*!\[.*\]\(.*\)\s*$", line.strip()): - first_figure_idx = i - break - # If no figures found, return original lines unchanged. - if first_figure_idx == -1: - return lines - # Split content: slide titles (lines starting with *) stay outside columns, - # other content before first figure goes to left column, - # everything from first figure onwards goes to right column. - pre_figure_lines = lines[:first_figure_idx] - figure_content = lines[first_figure_idx:] - # Separate slide titles from other content - slide_titles = [] - text_lines = [] - for line in pre_figure_lines: - if line.strip().startswith("*"): - slide_titles.append(line) - else: - text_lines.append(line) - # Remove empty lines at the beginning and end of text_lines. - while text_lines and not text_lines[0].strip(): - text_lines.pop(0) - while text_lines and not text_lines[-1].strip(): - text_lines.pop() - # Build the column format. - result = [] - # Add slide titles first (outside columns) - result.extend(slide_titles) - result.append("::: columns") - result.append(":::: {.column width=65%}") - result.extend(text_lines) - result.append("::::") - result.append(":::: {.column width=40%}") - result.append("") - result.extend(figure_content) - result.append("::::") - result.append(":::") - hdbg.dassert_isinstance(result, list) - return result - - -def format_md_links_to_latex_format(lines: List[str]) -> List[str]: - r""" - Convert markdown links to formatted links with LaTeX styling. - - Convert markdown links: - - Plain URLs: - http://... or https://... - to the format: - [\textcolor{blue}{\underline{URL}}](URL) - - - Existing formatted links: - [Text](URL) - to the format: - [\textcolor{blue}{\underline{Text}}](URL) - - - Email links: - [](email@domain.com) or [](http://...) or [](https://...) - to the format: - [\textcolor{blue}{\underline{URL}}](URL) - - - Picture links - ![](lectures_source/.../lec_4_1_slide_5_image_1.png) - are left untouched - - :param lines: list of input markdown lines - :return: formatted markdown lines with styled links - """ - hdbg.dassert_isinstance(lines, list) - result = [] - # URL regex pattern. - url_pattern = r"https?://[^\s)}\]`]+" - # Pattern for URLs in backticks. - backtick_url_pattern = r"`(https?://[^\s`]+)`" - # Pattern for existing formatted links that need normalization. - # This matches [\textcolor{blue}{\underline{Text}}](URL) where Text != URL. - formatted_link_pattern = ( - r"\[\\textcolor\{blue\}\{\\underline\{([^}]+)\}\}\]\((https?://[^)]+)\)" - ) - # Pattern for markdown links: [Text](URL). - # Matches text that can include escaped underscores (\_ ). - markdown_link_pattern = r"\[((?:[^\]\\]|\\[_])+)\]\((https?://[^\)]+)\)" - # Pattern for email links: [email@domain.com](email@domain.com). - email_link_pattern = r"\[([^\]\\]+@[^\]\\]+)\]\(([^)]+@[^)]+)\)" - # Pattern for empty bracket links: [](URL) or [](email). - empty_bracket_pattern = r"\[\]\(([^\)]+)\)" - # Pattern for image links: ![...](...). - image_link_pattern = r"!\[.*?\]\([^\)]+\)" - for line in lines: - # Process the line for all URL patterns. - processed_line = line - # Store image links temporarily to avoid processing them. - image_placeholders = [] - - def store_image_link(match): - placeholder = f"__IMAGE_LINK_{len(image_placeholders)}__" - image_placeholders.append(match.group(0)) - return placeholder - - processed_line = re.sub( - image_link_pattern, store_image_link, processed_line - ) - - # Convert empty bracket links [](URL) or [](email). - def convert_empty_bracket_link(match): - target = match.group(1) - return rf"[\textcolor{{blue}}{{\underline{{{target}}}}}]({target})" - - processed_line = re.sub( - empty_bracket_pattern, convert_empty_bracket_link, processed_line - ) - - # Convert URLs in backticks. - def convert_backtick_url(match): - url = match.group(1) - return rf"[\textcolor{{blue}}{{\underline{{{url}}}}}]({url})" - - processed_line = re.sub( - backtick_url_pattern, convert_backtick_url, processed_line - ) - - # Normalize existing formatted links to keep existing display text. - def normalize_formatted_link(match): - text = match.group(1) - url = match.group(2) - return rf"[\textcolor{{blue}}{{\underline{{{text}}}}}]({url})" - - processed_line = re.sub( - formatted_link_pattern, normalize_formatted_link, processed_line - ) - - # Convert markdown links [Text](URL) to formatted links. - def convert_markdown_link(match): - text = match.group(1) - url = match.group(2) - return rf"[\textcolor{{blue}}{{\underline{{{text}}}}}]({url})" - - processed_line = re.sub( - markdown_link_pattern, convert_markdown_link, processed_line - ) - - # Convert email links [email@domain.com](email@domain.com) to formatted links. - def convert_email_link(match): - email = match.group(2) - return rf"[\textcolor{{blue}}{{\underline{{{email}}}}}]({email})" - - processed_line = re.sub( - email_link_pattern, convert_email_link, processed_line - ) - # Convert plain URLs (but avoid converting URLs that are already part - # of formatted links). - # First, temporarily replace formatted links to avoid interfering with - # them. - temp_placeholders = [] - # Store existing correctly formatted links temporarily. - correct_formatted_link_pattern = ( - r"\[\\textcolor\{blue\}\{\\underline\{([^}]+)\}\}\]\(([^)]+)\)" - ) - - def store_formatted_link(match): - placeholder = f"__FORMATTED_LINK_{len(temp_placeholders)}__" - temp_placeholders.append(match.group(0)) - return placeholder - - temp_line = re.sub( - correct_formatted_link_pattern, store_formatted_link, processed_line - ) - - # Convert remaining plain URLs. - def convert_plain_url(match): - url = match.group(0) - return rf"[\textcolor{{blue}}{{\underline{{{url}}}}}]({url})" - - temp_line = re.sub(url_pattern, convert_plain_url, temp_line) - # Restore formatted links. - for i, placeholder in enumerate(temp_placeholders): - temp_line = temp_line.replace(f"__FORMATTED_LINK_{i}__", placeholder) - # Restore image links. - for i, image_link in enumerate(image_placeholders): - temp_line = temp_line.replace(f"__IMAGE_LINK_{i}__", image_link) - result.append(temp_line) - hdbg.dassert_isinstance(result, list) - return result - - -# TODO(gp): -> format_first_level_bullets_in_slide -def format_first_level_bullets(lines: List[str]) -> List[str]: - """ - Add empty lines to separate first level bullets and remove all remaining - empty lines. - - This is the formatting we use in the slides. - - :param lines: list of input markdown lines - :return: formatted markdown lines - """ - hdbg.dassert_isinstance(lines, list) - # Remove empty lines. - lines_clean = [line for line in lines if line.strip()] - # Handle special case: if input was only empty lines, preserve structure. - if not lines_clean and lines: - return lines - # Add empty lines only before first level bullets. - result = [] - for i, line in enumerate(lines_clean): - # Check if current line is a first level bullet (no indentation). - if re.match(r"^- ", line): - # Add empty line before first level bullet if not at start. - if i > 0: - result.append("") - result.append(line) - hdbg.dassert_isinstance(result, list) - return result - - -# TODO(gp): Implement and add tests. -def format_column_blocks(lines: List[str]) -> List[str]: - """ - # Make sure that there is a single empty line before and after the following - # block: - # - # 1) - # ``` - # ::: columns - # :::: {.column width=55%} - # ``` - # 2) - # ``` - # :::: - # :::: {.column width=40%} - # ``` - # 3) - # ``` - # :::: - # ::: - # ``` - - # - """ - return lines - - -def format_markdown_slide(lines: List[str]) -> List[str]: - """ - Format markdown text for a slide. - - :param lines: input lines to format - :return: formatted slide text - """ - hdbg.dassert_isinstance(lines, list) - if False: - lines = bold_first_level_bullets(lines) - txt = "\n".join(lines) - # Format the markdown slides. - # TODO(gp): Maybe the conversion should be done inside `prettier_on_str` - # passing a marker to indicate that the text is a slide. - lines = hmarslid.convert_slide_to_markdown(lines) - # lines = format_column_blocks() - # - file_type = "md" - txt = "\n".join(lines) - txt = dshdlipr.prettier_on_str(txt, file_type) - # - lines = txt.split("\n") - lines = hmarslid.convert_markdown_to_slide(lines) - # Format the first level bullets. - lines = format_first_level_bullets(lines) - # - lines = hmarhead.capitalize_header(lines) - return lines diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py deleted file mode 100644 index 532de2aee..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py +++ /dev/null @@ -1,841 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_headers as hmarhead -""" - -import dataclasses -import logging -import re -from typing import List, Optional, Tuple, cast - -import helpers.hdbg as hdbg -import helpers.hparser as hparser -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - -_TRACE = False - - -def is_markdown_line_separator(line: str, *, min_repeats: int = 5) -> bool: - """ - Check if the given line is a Markdown separator. - - This function determines if a line consists of repeated characters - (`#`, `/`, `-`, `=`) that would indicate a markdown separator. - - :param line: current line of text being processed - :param min_repeats: minimum number of times the characters have to - be repeated to be considered a separator, e.g., if `min_repeats - = 2`, then `##`, `###`, `//` are considered to be line - separators, but `#`, `/` are not - :return: whether the line is a separator - """ - separator_pattern = rf""" - \#*\s* # Optional leading `#` and whitespace. - ([#/=\-])\1{{{min_repeats - 1},}} # Capture a character, then repeat it - # (`min_repeats` - 1) times. - \s*$ # Match only whitespace characters - # until the end of the line. - """ - res = bool(re.match(separator_pattern, line, re.VERBOSE)) - return res - - -def is_header(line: str) -> Tuple[bool, int, str]: - """ - Check if the given line is a Markdown header. - - :param line: line to check - :return: tuple containing: - - boolean indicating if the line is a header - - level of the header (`0` if not a header) - - title of the header (empty string if not a header) - """ - # hdbg.dassert(not is_markdown_line_separator(line), "line='%s'", line) - m = re.match(r"(#+)\s+(.*)", line) - is_header_ = bool(m) - if m: - level = len(m.group(1)) - title = m.group(2) - else: - level = 0 - title = "" - return is_header_, level, title - - -# ############################################################################# -# Frame chapters -# ############################################################################# - - -def _has_internal_capitals(word: str) -> bool: - """ - Check if a word has capital letters within it (not just at the start). - - This function detects words like `SimpleFeedForward`, `DeepNPTS` that - should be preserved without title case transformation. - - Note: uppercase letters immediately after an apostrophe are excluded - from this check, since they are not "internal capitals" but rather - normal English capitalization (e.g., "Won'T" has a capital T that is - not internal but rather a grammatical artifact of title case tools). - - :param word: word to check - :return: `True` if the word has internal capitals, `False` otherwise - """ - hdbg.dassert_isinstance(word, str) - # A word has internal capitals if it contains at least one uppercase letter - # after the first character, excluding uppercase letters immediately after - # an apostrophe. - if len(word) <= 1: - return False - for i in range(1, len(word)): - if word[i].isupper() and word[i - 1] != "'": - return True - return False - - -def frame_chapters(lines: List[str], *, max_lev: int = 4) -> List[str]: - """ - Add the frame around each chapter. - """ - hdbg.dassert_isinstance(lines, list) - txt_new: List[str] = [] - # _LOG.debug("lines=%s", lines) - for i, line in enumerate(lines): - _LOG.debug("line=%d:%s", i, line) - m = re.match(r"^(\#+) ", line) - txt_processed = False - if m: - comment = m.group(1) - lev = len(comment) - _LOG.debug(" -> lev=%s", lev) - if lev < max_lev: - sep = comment + " " + "#" * (80 - 1 - len(comment)) - txt_new.append(sep) - txt_new.append(line) - txt_new.append(sep) - txt_processed = True - else: - _LOG.debug( - " -> Skip formatting the chapter frame: lev=%d, max_lev=%d", - lev, - max_lev, - ) - if not txt_processed: - txt_new.append(line) - hdbg.dassert_isinstance(txt_new, list) - return txt_new - - -def has_mixed_case(word: str) -> bool: - """ - Check if a word has capital letters in positions other than the first. - - This detects words like "SimpleFeedForward", "DeepNPTS", etc. that should - be preserved as-is. - - :param word: word to check - :return: True if the word has capital letters after the first position - """ - if len(word) <= 1: - return False - # Check if any character after the first position is uppercase. - return any(c.isupper() for c in word[1:]) - - -def _capitalize_title_word(word: str) -> str: - """ - Capitalize the first letter of a word without capitalizing after apostrophes. - - Python's `str.title()` capitalizes the first letter after ANY non-alphanumeric - character, including apostrophes. For example, `"won't".title()` returns - `"Won'T"` instead of the expected `"Won't"`. - - This function instead capitalizes only the first letter of the word and - lowercases any uppercase letters that follow an apostrophe. - - :param word: word to capitalize - :return: word with proper title case (first letter capitalized, no capitals - after apostrophes) - """ - if not word: - return word - chars = list(word) - chars[0] = chars[0].upper() - for i in range(1, len(chars)): - if chars[i - 1] == "'": - chars[i] = chars[i].lower() - return "".join(chars) - - -def capitalize_header(lines: List[str]) -> List[str]: - """ - Improve the header and slide titles. - - - Headers start with one or more `#`s - - Slide titles start with one `*` - - - The title is transformed to title case as below: - - ML theory -> ML Theory - - A map of machine learning -> A Map of Machine Learning - - Business strategists -> - Business Strategists - - Establish a phased, collaborative approach -> - Establish a Phased, Collaborative Approach - - - Strings inside backticks, single quotes, and double quotes are preserved, - with careful handling to avoid matching apostrophes in contractions. - - Words with internal capital letters are preserved (e.g., SimpleFeedForward, - DeepNPTS). - - Contractions and words with apostrophes are properly capitalized - (e.g., "won't" becomes "Won't", not "Won'T"). - - Headers inside fenced code blocks are not processed. - """ - import helpers.hmarkdown_fenced_blocks as hmafeblo - - hdbg.dassert_isinstance(lines, list) - # Replace fenced blocks with tags to prevent processing headers inside them. - lines_without_fenced, fence_map = hmafeblo.replace_fenced_blocks_with_tags( - lines - ) - txt_new: List[str] = [] - for i, line in enumerate(lines_without_fenced): - # Parse header (starting with `#`) and slide title (starting with `*`). - m = re.match(r"^(\#+|\*) (.*)$", line) - if m: - # Parse the title. - title = m.group(2) - # Transform to title case, leaving words that are all capitalized - # and conjunctions as is, while preserving quoted strings. - non_cap_words = { - "a", - "an", - "and", - "as", - "at", - "but", - "by", - "for", - "in", - "of", - "on", - "or", - "the", - "to", - "vs", - "with", - } - # Find and temporarily replace quoted strings to preserve them. - quoted_strings = [] - placeholders = [] - # Pattern to match strings inside backticks, single quotes, or double quotes. - # Single quotes are matched only when not preceded or followed by word - # characters, to avoid matching apostrophes in contractions like "don't". - # Backtick and double-quote patterns are simpler since they're less likely - # to be used in natural text. - quote_pattern = r""" - ( # Start of alternation - `[^`]*` # Backtick-quoted string - | # OR - (? str: - quoted_strings.append(match.group(0)) - placeholder = f"__QUOTED_{len(quoted_strings) - 1}__" - placeholders.append(placeholder) - return placeholder - - # Replace quoted strings with placeholders. - title_with_placeholders = re.sub( - quote_pattern, replace_quoted, title, flags=re.VERBOSE - ) - # Split into words. - words = title_with_placeholders.split() - # Find the first non-numeric word index to always capitalize it, - # even if it's in non_cap_words (e.g., "4.4 the Victim" -> "4.4 The Victim"). - first_text_word_idx = None - for j, word in enumerate(words): - if word.startswith("__QUOTED_") and word.endswith("__"): - continue - # Skip numeric/punctuation-only prefixes like "4.4", "1.", "1.2.3". - if not re.match(r"^[\d\.\-]+$", word): - first_text_word_idx = j - break - # If all words are numeric, fall back to index 0. - if first_text_word_idx is None and words: - first_text_word_idx = 0 - # Process each word. - for i, word in enumerate(words): - if word.startswith("__QUOTED_") and word.endswith("__"): - # Skip placeholder words, they will be restored later. - continue - elif i == first_text_word_idx and not word.isupper(): - # Capitalize the first text word (may follow numeric prefix - # like "4.4") even if it's in non_cap_words. - if _has_internal_capitals(word): - # Preserve words with internal capitals. - pass - else: - words[i] = _capitalize_title_word(word) - elif word.isupper(): - # Skip words that are all caps (e.g. ML, API). - continue - elif _has_internal_capitals(word): - # Preserve words with internal capitals (e.g., SimpleFeedForward). - pass - elif word.lower() in non_cap_words: - # Don't capitalize conjunctions and other minor words. - words[i] = word.lower() - else: - # Capitalize other words. - words[i] = _capitalize_title_word(word) - title = " ".join(words) - # Restore quoted strings. - for i, placeholder in enumerate(placeholders): - title = title.replace(placeholder, quoted_strings[i]) - # Reconstruct the line. - line = m.group(1) + " " + title - txt_new.append(line) - else: - txt_new.append(line) - # Restore fenced blocks. - txt_new = hmafeblo.replace_tags_with_fenced_blocks(txt_new, fence_map) - hdbg.dassert_isinstance(txt_new, list) - return txt_new - - -# ############################################################################# -# Header processing -# ############################################################################# - - -# TODO(gp): This could be done by processing `HeaderList`. -def extract_section_from_markdown( - lines: List[str], header_name: str -) -> List[str]: - """ - Extract a section of text from a Markdown document based on the header - name. - - The function identifies a section by locating the specified header - and captures all lines until encountering another header of the same - or higher level. Headers are identified by the '#' prefix, and their - level is determined by the number of '#' characters. - - :param lines: markdown content as a list of strings - :param header_name: exact header name to extract (excluding `#` - symbols) - :return: extracted section as a list of strings, including the header line - itself and all lines until the next header of the same or higher - level - """ - hdbg.dassert_isinstance(lines, list) - _LOG.debug(hprint.to_str("lines")) - extracted_lines = [] - # Level of the current header being processed. - current_level: Optional[int] = None - # Flag to indicate if we're inside the desired section. - inside_section: bool = False - found = False - # Process each line in the markdown content. - for line in lines: - _LOG.debug(hprint.to_str("line")) - # Check if the line is a markdown header. - if line.strip().startswith("#"): - # Determine the level of the header by counting leading '#' - # characters. - header_level = len(line) - len(line.lstrip("#")) - # Extract the actual header text by stripping '#' and surrounding - # whitespace. - header_text = line.strip("#").strip() - _LOG.debug(hprint.to_str("header_level, header_text")) - # Handle the end of the desired section when encountering another - # header. - if inside_section: - hdbg.dassert_is_not(current_level, None) - current_level = cast(int, current_level) - if header_level <= current_level: - break - # Check if the current line is the desired header. - if header_text == header_name: - found = True - # Set the level of the matched header. - current_level = header_level - # Mark that we are now inside the desired section. - inside_section = True - # Add the line to the output if inside the desired section. - if inside_section: - extracted_lines.append(line) - _LOG.debug(hprint.to_str("extracted_lines")) - if not found: - raise ValueError(f"Header '{header_name}' not found") - hdbg.dassert_isinstance(extracted_lines, list) - return extracted_lines - - -# ############################################################################# -# HeaderInfo -# ############################################################################# - - -@dataclasses.dataclass -class HeaderInfo: - """ - Store the header level, the description, and the line number in the - original file. - - E.g., `(1, "Chapter 1", 5)` and `(2, "Section 1.1", 10)` - """ - - level: int - description: str - line_number: int - - def __init__(self, level: int, description: str, line_number: int): - hdbg.dassert_isinstance(level, int) - hdbg.dassert_lte(1, level) - self.level = level - # - hdbg.dassert_isinstance(description, str) - hdbg.dassert_ne( - description, - "", - "Invalid HeaderInfo: %s, %s, %s", - level, - description, - line_number, - ) - self.description = description - # - hdbg.dassert_isinstance(line_number, int) - hdbg.dassert_lte(1, line_number) - self.line_number = line_number - # - self.children: List[HeaderInfo] = [] - - def as_tuple(self) -> Tuple[int, str, int]: - return (self.level, self.description, self.line_number) - - def __repr__(self) -> str: - return ( - f"HeaderInfo({self.level}, '{self.description}', {self.line_number})" - ) - - -HeaderList = List[HeaderInfo] - - -def header_list_to_str(header_list: HeaderList) -> str: - """ - Convert a list of headers into a string. - - :param header_list: list of headers - :return: string representation of the header list - """ - return "\n".join([str(header) for header in header_list]) - - -def sanity_check_header_list(header_list: HeaderList) -> None: - """ - Check that the header list is valid. - - 1) The first header should be level 1. - 2) All level 1 headers are unique. - 3) Check that consecutive elements in the header list only increase by at - most one level at a time (even if it can decrease by multiple levels). - - E.g., the following is valid: - ``` - # Header 1 - # Header 2 - ## Header 2.1 - ## Header 2.2 - # Header 3 - ``` - - E.g., the following is valid: - ``` - # Header1 - ## Header 1.1 - ### Header 1.1.1 - # Header 2 - ``` - - E.g., the following is not valid: - ``` - # Header 1 - ### Header 1.0.1 - # Header 2 - ``` - - :param header_list: list of headers to validate - """ - # 1) The first header should be level 1. - if header_list and header_list[0].level > 1: - _LOG.warning( - "First header '%s' at line %s is not level 1, but %s", - header_list[0].description, - header_list[0].line_number, - header_list[0].level, - ) - # 2) All level 1 headers are unique. - level_1_headers = [ - header.description for header in header_list if header.level == 1 - ] - hdbg.dassert_no_duplicates(level_1_headers) - # 3) Check that consecutive elements in the header list only increase by at - # most one level at a time (even if it can decrease by multiple levels). - if len(header_list) > 1: - for i in range(1, len(header_list)): - hdbg.dassert_isinstance(header_list[i - 1], HeaderInfo) - hdbg.dassert_isinstance(header_list[i], HeaderInfo) - if header_list[i].level - header_list[i - 1].level > 1: - msg = [] - msg.append( - "Consecutive headers increase by more than one level:" - ) - msg.append(f" {header_list[i - 1]}") - msg.append(f" {header_list[i]}") - msg = "\n".join(msg) - raise ValueError(msg) - - -# TODO(gp): Move sanity check outside? -def extract_headers_from_markdown( - lines: List[str], max_level: int, *, sanity_check: bool = True -) -> HeaderList: - """ - Extract headers from Markdown file and return an `HeaderList`. - - :param lines: content of the input Markdown file as list of strings - :param max_level: maximum header levels to parse (e.g., '3' parses all levels - included `###`, but not `####`) - :param sanity_check: whether to check that the header list is valid - :return: generated `HeaderList`, e.g., - ``` - [ - (1, "Chapter 1", 5), - (2, "Section 1.1", 10), ...] - ``` - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_lte(1, max_level) - header_list: HeaderList = [] - # Process the input file to extract headers. - for line_number, line in enumerate(lines, start=1): - # TODO(gp): Use the iterator. - # Skip the visual separators. - if is_markdown_line_separator(line): - continue - # Get the header level and title. - is_header_, level, title = is_header(line) - if is_header_ and level <= max_level: - header_info = HeaderInfo(level, title, line_number) - header_list.append(header_info) - # Check the header list. - if sanity_check: - sanity_check_header_list(header_list) - else: - _LOG.debug("Skipping sanity check") - hdbg.dassert_isinstance(header_list, list) - return header_list - - -def header_list_to_vim_cfile( - markdown_file: str, header_list: HeaderList -) -> List[str]: - """ - Convert a list of headers into a Vim cfile format. - - Use the generated file in Vim as: - `:cfile ` - Use `:cnext` and `:cprev` to navigate between headers. - - :param markdown_file: path to the input Markdown file - :param header_list: list of headers, where each header is a tuple containing - the line number, level, and title - :return: generated cfile content as a list of strings in the format: - ``` - ... - ::
- ... - ``` - """ - hdbg.dassert_isinstance(markdown_file, str) - hdbg.dassert_isinstance(header_list, list) - _LOG.debug(hprint.to_str("header_list")) - output_lines = [ - f"{markdown_file}:{header_info.line_number}:{header_info.description}" - for header_info in header_list - ] - hdbg.dassert_isinstance(output_lines, list) - return output_lines - - -def header_list_to_markdown(header_list: HeaderList, mode: str) -> List[str]: - """ - Convert a list of headers into a Markdown format. - - :param header_list: list of headers, where each header is a tuple - containing the level, title, and line number - :param mode: format of the output: - - `list`: indents headers to create a nested list - - `headers`: uses Markdown header syntax (e.g., '#', '##', '###') - :return: generated Markdown content as a list of strings - """ - hdbg.dassert_isinstance(header_list, list) - _LOG.debug(hprint.to_str("header_list mode")) - output_lines = [] - for header_info in header_list: - level, title, line_number = header_info.as_tuple() - _ = line_number - if mode == "list": - header_prefix = " " * (level - 1) + "-" - elif mode == "headers": - header_prefix = "#" * level - else: - raise ValueError(f"Invalid mode '{mode}'") - output_lines.append(f"{header_prefix} {title}") - hdbg.dassert_isinstance(output_lines, list) - return output_lines - - -# ############################################################################# -# Process headers. -# ############################################################################# - - -def format_headers(lines: List[str], out_file_name: str, max_lev: int) -> None: - """ - Format the headers in the input lines and write the formatted text to the - output file. - - :param lines: list of input lines to process - :param out_file_name: name of the output file to write the formatted - text to - :param max_lev: maximum level of headings to include in the - formatted text - """ - hdbg.dassert_isinstance(lines, list) - txt = lines[:] - # - for line in txt: - m = re.search(r"max_level=(\d+)", line) - if m: - max_lev = int(m.group(1)) - _LOG.warning("Inferred max_level=%s", max_lev) - break - hdbg.dassert_lte(1, max_lev) - # Remove all headings. - txt_tmp = [] - for line in txt: - # Keep the comments. - if not is_markdown_line_separator(line): - txt_tmp.append(line) - txt = txt_tmp[:] - # Add proper heading of the correct length. - txt_tmp = [] - for line in txt: - # Keep comments. - found = False - for i in range(1, max_lev + 1): - if line.startswith("#" * i + " "): - row = "#" * i + " " + "#" * (79 - 1 - i) - txt_tmp.append(row) - txt_tmp.append(line) - txt_tmp.append(row) - found = True - if not found: - txt_tmp.append(line) - # TODO(gp): Remove all empty lines after a heading. - # TODO(gp): Format title (first line capital and then small). - hparser.to_file(txt_tmp, out_file_name) - - -def modify_header_level(lines: List[str], level: int) -> List[str]: - """ - Increase or decrease the level of headings by the specified amount. - - :param lines: input lines to modify - :param level: amount to adjust header levels (positive increases, - negative decreases) - :return: modified lines with header levels adjusted - """ - hdbg.dassert_isinstance(lines, list) - txt_tmp = [] - for line in lines: - # TODO(gp): Use the iterator. - line = line.rstrip(r"\n") - is_header_, current_level, title = is_header(line) - if is_header_: - modified_level = current_level + level - # Ensure modified level is within valid range (1-6 for markdown headers). - hdbg.dassert_lte(1, modified_level) - hdbg.dassert_lte(modified_level, 6) - line = "#" * modified_level + " " + title - txt_tmp.append(line) - hdbg.dassert_isinstance(txt_tmp, list) - return txt_tmp - - -# ############################################################################# -# _HeaderTreeNode -# ############################################################################# - - -# This is a different representation of the data than the one in `HeaderList` -# because it is a tree structure. So we use a different type hint. -_HeaderTree = List[HeaderInfo] - - -def build_header_tree(header_list: HeaderList) -> _HeaderTree: - """ - Build a tree (list of Node objects) from the flat list. - - We assume that the level changes never jump by more than 1. - - :param header_list: flat list of headers - :return: tree structure of headers - """ - tree: _HeaderTree = [] - stack: _HeaderTree = [] - for node in header_list: - if node.level == 1: - tree.append(node) - stack = [node] - else: - # Pop until we find the proper parent: one with level < current - # level. - while stack and stack[-1].level >= node.level: - stack.pop() - if stack: - stack[-1].children.append(node) - else: - tree.append(node) - stack.append(node) - # hdbg.dassert_eq(len(header_list), len(tree)) - # hdbg.dassert_eq(len(stack), 0) - return tree - - -def _find_header_tree_ancestry( - tree: _HeaderTree, level: int, description: str -) -> Optional[_HeaderTree]: - """ - Recursively search for the node matching (level, description). - - If found, return the ancestry as a list from the root down to that - node. Otherwise return None. - - :param tree: header tree to search - :param level: header level to match - :param description: header description to match - :return: ancestry list from root to matching node, or None if not - found - """ - for node in tree: - if node.level == level and node.description == description: - return [node] - result = _find_header_tree_ancestry(node.children, level, description) - if result: - return [node] + result - return None - - -def header_tree_to_str( - tree: _HeaderTree, - ancestry: Optional[_HeaderTree], - *, - open_modifier: str = "**", - close_modifier: str = "**", - indent: int = 0, -) -> str: - """ - Return the tree as a string. - - Only expand (i.e. recursively include children) for a node if it is part of - the ancestry of the selected node. - - :param tree: tree to convert to a string - :param ancestry: ancestry of the selected node - :param open_modifier: modifier to use for the open of the selected node - :param close_modifier: modifier to use for the close of the selected node - :param indent: indent of the tree - :return: string representation of the tree - - - Nodes not in the ancestry are included on one line (even if they have - children). - - The selected node (last in the ancestry) is included highlighted. - """ - prefix = " " * indent + "- " - result = [] - for node in tree: - _LOG.debug(hprint.to_str("node")) - # Check if this node is the next expected one in the ancestry branch. - if ancestry and node is ancestry[0]: - # If this is the last in the ancestry, it is the selected node. - val = prefix - if len(ancestry) == 1: - val += open_modifier + node.description + close_modifier - else: - val += node.description - _LOG.debug("-> %s", hprint.to_str("val")) - if val: - result.append(val) - # Expand this node’s children using the rest of the ancestry. - val = header_tree_to_str( - node.children, - ancestry[1:], - indent=indent + 1, - open_modifier=open_modifier, - close_modifier=close_modifier, - ) - else: - # For nodes not on the selected branch, include them without - # expanding. - val = prefix + node.description - _LOG.debug("-> %s", hprint.to_str("val")) - if val: - result.append(val) - return "\n".join(result) - - -def selected_navigation_to_str( - tree: _HeaderTree, - level: int, - description: str, - *, - open_modifier: str = "**", - close_modifier: str = "**", -) -> str: - """ - Given a level and description for the selected node, print the navigation. - - :param tree: header tree - :param level: level of the selected node - :param description: description of the selected node - :param open_modifier: modifier for opening the selected node - :param close_modifier: modifier for closing the selected node - :return: navigation string with selected node highlighted - """ - ancestry = _find_header_tree_ancestry(tree, level, description) - hdbg.dassert_ne( - ancestry, - None, - "Node (%s, '%s') not found", - level, - description, - ) - _LOG.debug(hprint.to_str("ancestry")) - txt = header_tree_to_str( - tree, - ancestry, - open_modifier=open_modifier, - close_modifier=close_modifier, - ) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py deleted file mode 100644 index a471a44cc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py +++ /dev/null @@ -1,367 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_rules as hmarrule -""" - -import logging -import re -from typing import Dict, List - -import helpers.hdbg as hdbg -import helpers.hmarkdown_headers as hmarhead -import helpers.hprint as hprint -from helpers.hmarkdown_headers import ( - extract_headers_from_markdown, - sanity_check_header_list, -) - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Add a decorator like in hprint to process both strings and lists -# of strings. - -# ############################################################################# -# Rules processing. -# ############################################################################# - -# Rules are organized in 4 levels of a markdown file: -# -# 1) Rule sets (level 1) -# - E.g., `General`, `Python`, `Notebooks`, `Markdown` -# - Level 1 is a set of rules determined mainly by the type of the file we -# are processing -# - Several sets of rules can be applied to a given file type -# - E.g., rules in `Python` and `Notebooks` apply to all Python files -# 2) Sections (level 2) -# - E.g., `Naming`, `Comments`, `Code_design`, `Imports`, `Type_annotations` -# 3) Targets (level 3) -# - E.g., LLM vs Linter -# 4) Atomic rules (level 4) -# - This is the set of rules that are applied to the file -# ``` -# - Spell commands in lower case and programs with the first letter in upper case -# - E.g., `git` as a command, `Git` as a program -# - E.g., capitalize the first letter of `Python` -# ``` - -# Extract the rules from the markdown file: -# ``` -# > extract_toc_from_txt.py \ -# -i docs/code_guidelines/all.coding_style_guidelines.reference.md \ -# --max_level 2 -# - General -# - Spelling -# - LLM -# - Linter -# - Python -# - Naming -# - LLM -# - Linter -# - Docstrings -# - ... -# - Comments -# - Code_implementation -# - Code_design -# - Imports -# - Type_annotations -# - Functions -# - Scripts -# - Logging -# - Misc -# - Unit_tests -# - All -# - Notebooks -# - General -# - Plotting -# - Jupytext -# - Markdown -# - Naming -# - General -# ``` - -# - The rules to apply to a Python file are automatically extractedas: -# `([`General:*`, `Python:*`], `LLM`)` -# - The rules to apply to a Notebook file are automatically extracted as: -# `([`General:*`, `Python:*`, `Notebooks:*`], `LLM`)` -# - A user can specify to apply a subset of rules like -# `([`General:*`, `Python:Naming,Docstrings`], `LLM,Linter`)` -# - Atomic rules are the first-level bullets of the markdown file, e.g., -# ``` -# - Spell commands in lower case and programs with the first letter in upper case -# - E.g., `git` as a command, `Git` as a program -# - E.g., capitalize the first letter of `Python` -# ``` - - -def sanity_check_rules(lines: List[str]) -> None: - """ - Sanity check the rules. - - :param lines: list of text lines to check - """ - header_list = extract_headers_from_markdown(lines, max_level=5) - # 1) Start with level 1 headers. - # 2) All level 1 headers are unique. - # 3) Header levels are increasing / decreasing by at most 1. - sanity_check_header_list(header_list) - # 4) Level 3 headers are always `LLM` or `Linter`. - # for header in header_list: - # if header.level != 3: - # hdbg.dassert_in(header.description, ["LLM", "Linter"]) - # TODO(gp): Implement this. - # 5) All headers have no spaces. - # TODO(gp): Implement this. - - -# A `Rule` is a string separated by `:` characters, where each part can be: -# - `*` (which means "match any string") -# - a `string` (e.g., `Spelling`) -# - a list of strings separated by `|` (e.g., `LLM|Linter`) -# -# E.g., valid rules are: -# - `General:*:LLM`, `*:*:Linter|LLM`, `General|Python:*:LLM`, `Python:*:Linter` -# - For a Python file -> `General|Python:*:LLM` -# - For a Notebook file -> `General|Python|Notebooks:*:LLM` -# - `Python:Naming|Docstrings|Comments:LLM` -SelectionRule = str - - -# A `Guidelines`` is a header list with only level 1 headers storing the full -# hierarchy of the rules as a description, e.g., -# `(1, "Spelling:All:LLM", xyz)` -# TODO(gp): Make Guidelines descend from HeaderList. - -HeaderInfo = hmarhead.HeaderInfo -HeaderList = hmarhead.HeaderList -Guidelines = HeaderList - - -def convert_header_list_into_guidelines( - header_list: HeaderList, -) -> Guidelines: - """ - Convert the header list into a `Guidelines` object with only level 1 - headers and full hierarchy of the rules as description. - - Expand a header list like: - ``` - - General - - Spelling - - LLM - - Linter - - Python - - Naming - - LLM - - Linter - ``` - represented internally as: - ``` - (1, "General", xyz), - (2, "Spelling", xyz), - (3, "LLM", xyz), - (3, "Linter", xyz), - (1, "Python", xyz), - (2, "Naming", xyz), - (3, "LLM", xyz), - (3, "Linter", xyz), - ``` - into: - ``` - [ - (1, "Spelling:All:LLM", xyz), - (1, "Spelling:All:Linter", xyz), - (1, "Python:Naming:LLM", xyz), - (1, "Python:Naming:Linter", xyz), - ] - ``` - - :param header_list: input header list to convert - :return: guidelines with flattened hierarchy - """ - hdbg.dassert_isinstance(header_list, list) - # Store the last level headers. - level_1 = "" - level_2 = "" - # Accumulate the last level headers. - level_3_headers = [] - # Scan the header list. - for header_info in header_list: - level, description, line_number = header_info.as_tuple() - # Store the headers found at each level. - if level == 1: - level_1 = description - elif level == 2: - level_2 = description - elif level == 3: - # Store the level 3 header. - hdbg.dassert_ne(level_1, "") - hdbg.dassert_ne(level_2, "") - full_level_3 = f"{level_1}:{level_2}:{description}" - header_info_tmp = HeaderInfo(1, full_level_3, line_number) - level_3_headers.append(header_info_tmp) - else: - raise ValueError(f"Invalid header info={header_info}") - return level_3_headers - - -def _convert_rule_into_regex(selection_rule: SelectionRule) -> str: - r""" - Convert a rule into an actual regular expression. - - E.g., - - `Spelling:*:LLM` -> `Spelling:(\S*):LLM` - - `*:*:Linter|LLM` -> `(\S*):(\S*):(Linter|LLM)` - - `Spelling|Python:*:LLM` -> `Spelling|Python:(\S*):LLM` - - `Python:*:Linter` -> `Python:(\S*):Linter` - - :param selection_rule: rule to convert to regex - :return: regex pattern string - """ - hdbg.dassert_isinstance(selection_rule, SelectionRule) - # Parse the rule into tokens. - selection_rule_parts = selection_rule.split(":") - hdbg.dassert_eq(len(selection_rule_parts), 3) - # Process each part of the rule regex. - rule_parts_out = [] - for rule_part_in in selection_rule_parts: - hdbg.dassert_not_in(" ", rule_part_in) - if rule_part_in == "*": - # Convert `*` into `\S*`. - rule_part_out = r"(\S*)" - elif "|" in rule_part_in: - # Convert `LLM|Linter` into `(LLM|Linter)`. - rule_part_out = "(" + rule_part_in + ")" - else: - # Keep the string as is. - rule_part_out = rule_part_in - rule_parts_out.append(rule_part_out) - # Join the parts of the rule back together. - rule_out = ":".join(rule_parts_out) - return rule_out - - -def extract_rules( - guidelines: Guidelines, selection_rules: List[SelectionRule] -) -> Guidelines: - """ - Extract the set of rules from the `guidelines` that match the rule regex. - - :param guidelines: guidelines to extract the rules from - :param selection_rules: selection rules to use to extract the rules - :return: extracted rules - """ - hdbg.dassert_isinstance(guidelines, list) - hdbg.dassert_isinstance(selection_rules, list) - # A rule regex is a string separated by `:` characters, where each part is - # - `*` (meaning "any string") - # - a `string` (e.g., `Spelling`) - # - a list of strings separated by `|` (e.g., `LLM|Linter`) - # E.g., `Spelling:*:LLM`, `*:*:Linter|LLM`, `Spelling|Python:*:LLM`. - # Convert each rule regex into a regular expression. - rule_regex_map: Dict[str, str] = {} - for rule_regex_str in selection_rules: - hdbg.dassert_isinstance(rule_regex_str, SelectionRule) - regex = _convert_rule_into_regex(rule_regex_str) - _LOG.debug(hprint.to_str("rule_regex_str regex")) - hdbg.dassert_not_in(rule_regex_str, rule_regex_map) - rule_regex_map[rule_regex_str] = regex - # Extract the set of rules from the `guidelines` that match the rule regex. - rule_sections = [] - for guideline in guidelines: - # A guideline description is a string separated by `:` characters, where each part is - # (1, "Python:Naming:Linter", xyz), - for k, v in rule_regex_map.items(): - if re.match(v, guideline.description): - _LOG.debug("%s matches %s", k, guideline.description) - if guideline not in rule_sections: - rule_sections.append(guideline) - # Select the rules. - _LOG.debug( - "Selected %s sections:\n%s", - len(rule_sections), - "\n".join([r.description for r in rule_sections]), - ) - return rule_sections - - -# TODO(gp): This seems private? -def parse_rules_from_txt(lines: List[str]) -> List[str]: - """ - Parse rules from a chunk of markdown text. - - - Extract first-level bullet point list items from text until the next one. - - Sub-lists nested under first-level items are extracted together with the - first-level items. - - :param lines: list of text lines to process - ``` - - Item 1 - - Item 2 - - Item 3 - - Item 4 - ``` - :return: extracted bullet points - """ - hdbg.dassert_isinstance(lines, list) - # Store the first-level bullet points. - bullet_points = [] - # Store the current item including the first level bullet point and all - # its sub-items. - current_item = "" - for line in lines: - line = line.rstrip() - if not line: - continue - if re.match(r"^- ", line): - # Match first-level bullet point item. - if current_item: - # Store the previous item, if any. - bullet_points.append(current_item) - # Start a new first-level bullet point item. - current_item = line - elif re.match(r"^\s+- ", line): - # Match a sub-item (non first-level bullet point item). - # Append a sub-item to the current item. - current_item += "\n" + line - elif len(line.strip()) != 0 and current_item: - # Append a line to the current item. - current_item += "\n" + line - # Add the last item if there is one. - if current_item: - bullet_points.append(current_item) - hdbg.dassert_isinstance(bullet_points, list) - return bullet_points - - -def extract_rules_from_section( - lines: List[str], start_line_number: int -) -> List[str]: - """ - Extract rules from a section of a markdown file. - - :param lines: list of markdown text lines to extract the rules from - :param start_line_number: line number of the section to start extracting - the rules from - :return: extracted rules - """ - hdbg.dassert_isinstance(lines, list) - # Find the line number of the next header. - end_line_number = start_line_number - while True: - hdbg.dassert_lt(end_line_number, len(lines)) - line = lines[end_line_number] - if line.startswith("#"): - break - end_line_number += 1 - _LOG.debug("end_line_number=%s", end_line_number) - # Parse the markdown text into a list of bullet points. - bullet_points = parse_rules_from_txt( - lines[start_line_number:end_line_number] - ) - # Extract the rules from the bullet points. - rules = [] - for bullet_point in bullet_points: - rules.append(bullet_point) - hdbg.dassert_isinstance(rules, list) - return rules diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py deleted file mode 100644 index 2cefec7a8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py +++ /dev/null @@ -1,201 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_slides as hmarslid -""" - -import logging -import re -from typing import Any, Callable, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -from helpers.hmarkdown_comments import process_comment_block -from helpers.hmarkdown_headers import ( - HeaderInfo, - HeaderList, - is_markdown_line_separator, -) - -_LOG = logging.getLogger(__name__) - - -_TRACE = True - - -def extract_slides_from_markdown( - lines: List[str], -) -> Tuple[HeaderList, int]: - """ - Extract slides (i.e., sections prepended by `*`) from Markdown file and - return an `HeaderList`. - - :param lines: content of the input Markdown file as list of strings - :return: tuple containing: - - generated `HeaderList` - ``` - [ - (1, "Slide 1", 5), - (1, "Slide 2", 10), ...] - ``` - - last line number of the file, e.g., '100' - """ - hdbg.dassert_isinstance(lines, list) - header_list: HeaderList = [] - # Process the input file to extract headers. - for line_number, line in enumerate(lines, start=1): - _LOG.debug("%d: %s", line_number, line) - # TODO(gp): Use the iterator. - # Skip the visual separators. - if is_markdown_line_separator(line): - continue - # Get the header level and title. - m = re.match(r"^\* (.*)$", line) - if m: - title = m.group(1) - header_info = HeaderInfo(1, title, line_number) - header_list.append(header_info) - last_line_number = len(lines) - # Return results. - hdbg.dassert_isinstance(header_list, list) - return header_list, last_line_number - - -# TODO(gp): Consider passing and returning List[str] -def process_slides(txt: str, transform: Callable[..., Any]) -> str: - """ - Process markdown text by applying a transform function to each slide. - - - Slides are sections prepended by `*` - - The text is processed by: - - Extracting the slides one by one - - Calling a `transform()` function on each slide (defined by the user) - - Joining the transformed slides back together - - Comments are left untouched. - - :param txt: markdown text to process - :param transform: function to transform each slide - :return: transformed text - """ - hdbg.dassert_isinstance(txt, str) - # Text of the current slide. - slide_txt: List[str] = [] - # Store all the transformed slides. - transformed_txt: List[str] = [] - # True inside a block to skip. - in_skip_block = False - # True inside a slide. - in_slide = False - # Track line number where slide started. - slide_start_line = 0 - lines = txt.splitlines() - for i, line in enumerate(lines): - _LOG.debug("%s:line='%s'", i, line) - # 1) Remove comment block. - do_continue, in_skip_block = process_comment_block(line, in_skip_block) - if _TRACE: - _LOG.debug(" -> %s", hprint.to_str("do_continue in_skip_block")) - if do_continue: - transformed_txt.append(line) - continue - # 2) Process slide. - if _TRACE: - _LOG.debug(" -> %s", hprint.to_str("in_slide")) - if line.startswith("* ") or line.startswith("#### "): - _LOG.debug("### Found slide") - # Found a slide or the end of the file. - if slide_txt: - _LOG.debug("# Transform slide") - # Transform the slide. - slide_title = slide_txt[0] - transformed_slide = transform( - slide_txt, - slide_title=slide_title, - slide_line_number=slide_start_line, - ) - hdbg.dassert_isinstance(transformed_slide, list) - transformed_txt.extend(transformed_slide) - else: - _LOG.debug("# First slide") - # Start a new slide. - slide_txt = [] - slide_txt.append(line) - slide_start_line = i - in_slide = True - elif in_slide: - _LOG.debug("# Accumulate slide") - slide_txt.append(line) - else: - _LOG.debug("# Accumulate txt outside slide") - transformed_txt.append(line) - # Process the last slide, if needed. - if slide_txt: - hdbg.dassert(in_slide) - in_slide = False - # Transform the slide. - slide_title = slide_txt[0] - transformed_slide = transform( - slide_txt, - slide_title=slide_title, - slide_line_number=slide_start_line, - ) - hdbg.dassert_isinstance(transformed_slide, list) - transformed_txt.extend(transformed_slide) - # - hdbg.dassert( - not in_skip_block, - "Found end of file while still parsing a comment block", - ) - hdbg.dassert(not in_slide, "Found end of file while still parsing a slide") - # Join the transformed slides back together. - result = "\n".join(transformed_txt) - return result - - -# ############################################################################# -# Slides conversion to markdown and back -# ############################################################################# - - -def convert_slide_to_markdown(lines: List[str], *, level: int = 5) -> List[str]: - """ - Convert slide to standard markdown. - - - Handle * bullets to markdown headers level 5 - - :param lines: list of lines to convert - :param level: level of the markdown headers to convert to - :return: list of converted lines - """ - hdbg.dassert_isinstance(lines, list) - converted_lines = [] - for line in lines: - if line.startswith("* "): - # Convert slide bullet to markdown header level 5. - converted_line = "#" * level + " " + line[2:] - converted_lines.append(converted_line) - else: - converted_lines.append(line) - return converted_lines - - -def convert_markdown_to_slide(lines: List[str], *, level: int = 5) -> List[str]: - """ - Convert standard markdown back to slide. - - - Handle markdown headers level 5 to * bullets - - :param lines: list of lines to convert - :param level: level of the markdown headers to convert to - :return: list of converted lines - """ - hdbg.dassert_isinstance(lines, list) - converted_lines = [] - for line in lines: - if line.startswith("#" * level + " "): - # Convert markdown header level 5 back to slide bullet. - converted_line = "* " + line[6:] - converted_lines.append(converted_line) - else: - converted_lines.append(line) - return converted_lines diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py deleted file mode 100644 index becc00b09..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py +++ /dev/null @@ -1,121 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_tables as hmartabl -""" - -import logging -from typing import Dict, List, Tuple - -import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - - -def replace_tables_with_tags( - lines: List[str], -) -> Tuple[List[str], Dict[str, str]]: - """ - Replace markdown tables with tag and return mapping from tags to the table. - - E.g., - ``` - Some text before - | Column 1 | Column 2 | - |----------|----------| - | Value 1 | Value 2 | - | Value 3 | Value 4 | - More text after - ``` - is replaced with: - ``` - Some text before - - More text after - ``` - - :param lines: list of lines to process - :return: tuple containing: - - list of lines with the tables replaced by tags - - mapping from tags to the table text - """ - hdbg.dassert_isinstance(lines, list) - result = [] - table_map = {} - table_count = 0 - i = 0 - while i < len(lines): - line = lines[i].strip() - # Check if this line starts a table (contains |). - if "|" in line and line.strip(): - # Look ahead to see if next line is a separator. - if i + 1 < len(lines): - next_line = lines[i + 1].strip() - # Check if next line is a table separator (contains --- and |). - if "|" in next_line and "-" in next_line: - # Found a table, collect all table lines. - table_lines = [] - # Add header line. - table_lines.append(lines[i]) - i += 1 - # Add separator line. - table_lines.append(lines[i]) - i += 1 - # Add data rows (continue while lines contain |). - while ( - i < len(lines) - and "|" in lines[i].strip() - and lines[i].strip() - ): - table_lines.append(lines[i]) - i += 1 - # Store the table. - table_count += 1 - table_text = "\n".join(table_lines) - table_map[str(table_count)] = table_text - result.append(f"") - continue - # Not a table line, add as-is. - result.append(lines[i]) - i += 1 - return result, table_map - - -def replace_tags_with_tables( - lines: List[str], table_map: Dict[str, str] -) -> List[str]: - """ - Replace tags with markdown tables. - - :param lines: list of lines to process - :param table_map: mapping from tags to table text - :return: list of lines with tags replaced by tables - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_isinstance(table_map, dict) - # Initialize output. - result = [] - table_map_copy = table_map.copy() - # Parse data. - for line in lines: - if line.startswith(""): - # Extract table number from tag like . - tag_match = line[6:-1] # Remove '' - hdbg.dassert_in( - tag_match, table_map_copy, f"Found unmatched tag {tag_match}" - ) - # Split table text into lines and add them. - table_text = table_map_copy[tag_match] - table_lines = table_text.split("\n") - result.extend(table_lines) - # Remove used tag from map. - del table_map_copy[tag_match] - else: - result.append(line) - # Ensure all tags were used. - hdbg.dassert_eq( - len(table_map_copy), - 0, - f"Found {len(table_map_copy)} unmatched tags: {list(table_map_copy.keys())}", - ) - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py deleted file mode 100644 index 7d8cb8d75..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py +++ /dev/null @@ -1,164 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_toc as hmartoc -""" - -import logging -import os -import re -import tempfile -from typing import Any, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hdocker as hdocker -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import dev_scripts_helpers.dockerize.lib_markdown_toc as dshdlmato - -_LOG = logging.getLogger(__name__) - -# ############################################################################# -# YAML preamble -# ############################################################################# - - -def extract_yaml_frontmatter(lines: List[str]) -> Tuple[List[str], List[str]]: - """ - Extract YAML front matter from the beginning of the file. - - YAML front matter is delimited by `---` at the beginning and end. - Example: - ``` - --- - title: My Document - date: 2024-01-01 - --- - ``` - - :param lines: The lines to be processed. - :return: A tuple of (frontmatter_lines, remaining_lines). - """ - _LOG.debug("lines=%s", lines) - # Check if file starts with YAML front matter. - if len(lines) < 3: - # Not enough lines for front matter. - return [], lines - if not re.match(r"^---\s*$", lines[0]): - # No front matter marker at the beginning. - return [], lines - # Find the closing --- marker. - for i in range(1, len(lines)): - if re.match(r"^---\s*$", lines[i]): - # Found closing marker. - frontmatter = lines[: i + 1] - remaining = lines[i + 1 :] - _LOG.debug("Found YAML front matter: %d lines", len(frontmatter)) - return frontmatter, remaining - # No closing marker found, treat as no front matter. - _LOG.debug("No closing YAML front matter marker found") - return [], lines - - -def reattach_yaml_frontmatter( - yaml_frontmatter: List[str], lines: List[str] -) -> List[str]: - """ - Reattach YAML front matter to the beginning of the content lines. - - :param yaml_frontmatter: The YAML front matter lines to reattach. - :param lines: The content lines to prepend the front matter to. - :return: Combined lines with YAML front matter reattached. - """ - if not yaml_frontmatter: - return lines - # Add an empty line after the front matter if the remaining content doesn't - # start with one. - if lines and lines[0] != "": - return yaml_frontmatter + [""] + lines - return yaml_frontmatter + lines - - -# ############################################################################# -# TOC -# ############################################################################# - - -def refresh_toc( - lines: List[str], - *, - use_dockerized_markdown_toc: bool = True, - # TODO(gp): Remove this. - **kwargs: Any, -) -> List[str]: - """ - Refresh the table of contents (TOC) in the given text. - - :param lines: The lines to be processed. - :param use_dockerized_markdown_toc: if True, run markdown-toc in a - Docker container - :return: The lines with the updated TOC. - """ - _LOG.debug("lines=%s", lines) - # Check whether there is a TOC otherwise add it. - # Add `` comment in the doc to generate the TOC after that - # line. By default, it will generate at the top of the file. - # This workaround is useful to generate the TOC after the heading of the doc - # at the top and not include it in the TOC. - if "" not in lines: - _LOG.warning("No tags for table of content in md file: adding it") - lines = [""] + lines - txt = "\n".join(lines) - # Write file. - curr_dir = os.getcwd() - tmp_file_name = tempfile.NamedTemporaryFile(dir=curr_dir).name - hio.to_file(tmp_file_name, txt) - # Process TOC. - cmd_opts: List[str] = [] - if use_dockerized_markdown_toc: - # Run `markdown-toc` in a Docker container. - use_sudo = hdocker.get_use_sudo() - force_rebuild = False - dshdlmato.run_dockerized_markdown_toc( - tmp_file_name, - cmd_opts, - use_sudo=use_sudo, - force_rebuild=force_rebuild, - ) - else: - # Run `markdown-toc` installed on the host directly. - executable = "markdown-toc" - cmd = [executable] + cmd_opts - cmd.append("-i " + tmp_file_name) - # - cmd_as_str = " ".join(cmd) - _, output_tmp = hsystem.system_to_string(cmd_as_str, abort_on_error=True) - _LOG.debug("output_tmp=%s", output_tmp) - # Read file. - txt = hio.from_file(tmp_file_name) - # Clean up. - os.remove(tmp_file_name) - # Remove empty lines introduced by `markdown-toc`. - txt = hprint.remove_lead_trail_empty_lines(txt) - ret = txt.split("\n") - hdbg.dassert_isinstance(ret, list) - return ret - - -def remove_table_of_contents(txt: str) -> str: - """ - Remove the table of contents from the text of a markdown file. - - The table of contents is stored between - ``` - - ... - - ``` - - :param txt: Input markdown text - :return: Text with table of contents removed - """ - txt = re.sub(r".*?", "", txt, flags=re.DOTALL) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py deleted file mode 100644 index b8087b9fd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -Matplotlib utilities and plotting helpers. - -Import as: - -import helpers.hmatplotlib as hmatplo -""" - -import logging -import math -from typing import Any, Optional, Tuple - -import matplotlib as mpl -import matplotlib.pyplot as plt -import numpy as np - -import helpers.hdbg as hdbg -import helpers.hio as hio - -_LOG = logging.getLogger(__name__) - -# Default figure size for plots. -# TODO(gp): Is this used? -FIG_SIZE = (20, 5) - - -def get_multiple_plots( - num_plots: int, - num_cols: int, - y_scale: Optional[float] = None, - *args: Any, - **kwargs: Any, -) -> Tuple[mpl.figure.Figure, np.array]: - """ - Create figure to accommodate `num_plots` plots. - - The figure is arranged in rows with `num_cols` columns. - - :param num_plots: number of plots - :param num_cols: number of columns to use in the subplot - :param y_scale: the height of each plot. If `None`, the size of the whole - figure equals the default `figsize` - :return: figure and array of axes - """ - hdbg.dassert_lte(1, num_plots) - hdbg.dassert_lte(1, num_cols) - # Heuristic to find the dimension of the fig. - if y_scale is not None: - hdbg.dassert_lt(0, y_scale) - ysize = math.ceil(num_plots / num_cols) * y_scale - figsize: Optional[Tuple[float, float]] = (20, ysize) - else: - figsize = None - if "tight_layout" not in kwargs and not kwargs.get( - "constrained_layout", False - ): - kwargs["tight_layout"] = True - fig, ax = plt.subplots( - math.ceil(num_plots / num_cols), - num_cols, - figsize=figsize, - *args, - **kwargs, - ) - if isinstance(ax, np.ndarray): - ax = ax.flatten() - else: - ax = np.array([ax]) - # Remove extra axes that can appear when `num_cols` > 1. - empty_axes = ax[num_plots:] - for empty_ax in empty_axes: - empty_ax.remove() - return fig, ax[:num_plots] - - -def save_fig( - fig: Optional[mpl.figure.Figure], - file_name: str, - *, - print_markdown: bool = False, - path_prefix: Optional[str] = None, -) -> None: - """ - Save matplotlib figure to file and optionally print markdown reference. - - :param fig: Matplotlib figure. If None, uses the active figure. - :param file_name: Output filename - :param print_markdown: If True, print markdown image reference - :param path_prefix: Path prefix for markdown reference (e.g., "msml610/lectures_source") - """ - if fig is None: - fig = plt.gcf() - hdbg.dassert_isinstance(fig, mpl.figure.Figure) - hdbg.dassert_isinstance(file_name, str) - hio.create_enclosing_dir(file_name, incremental=True) - fig.savefig(file_name, dpi=300, bbox_inches="tight") - # Use print instead of _LOG.info. - print(f"Saved figure to '{file_name}'") - # - if print_markdown: - if path_prefix: - markdown_path = f"{path_prefix}/{file_name}" - else: - markdown_path = file_name - markdown_ref = f"![]({markdown_path})" - print(markdown_ref) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py deleted file mode 100644 index 27e5130ca..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -Import as: - -import helpers.hmkdocs as hmkdocs -""" - -import re - -import helpers.hdbg as hdbg -import helpers.hmarkdown as hmarkdo - -# TODO(ai): Make function private. -# TODO(ai): Convert str to List[str] -# TODO(ai): Add unit tests. - - -# TODO(gp): -> hmarkdown_?.py -def dedent_python_code_blocks(txt: str) -> str: - """ - Dedent Python code blocks so they are aligned to column 0. - - This is needed by mkdocs to render a Python code block correctly. - - :param txt: Input markdown text - :return: Text with Python code blocks dedented - """ - import textwrap - - lines = txt.split("\n") - result = [] - # Store whether the parser is inside a code block. - in_python_block = False - # Store the current Python code block. - code_block_lines = [] - # Track whether current block is indented (inside a list item). - block_is_indented = False - for line in lines: - if line.strip() == "```python": - in_python_block = True - # Only dedent top-level blocks (fence at column 0). - block_is_indented = line != line.lstrip() - result.append(line) - elif line.strip() == "```" and in_python_block: - if code_block_lines and not block_is_indented: - # Dedent only top-level code blocks. - code_text = "\n".join(code_block_lines) - dedented_code = textwrap.dedent(code_text) - result.extend(dedented_code.split("\n")) - code_block_lines = [] - elif code_block_lines: - # Indented block: pass through unchanged. - result.extend(code_block_lines) - code_block_lines = [] - result.append(line) - in_python_block = False - block_is_indented = False - elif in_python_block: - code_block_lines.append(line) - else: - result.append(line) - return "\n".join(result) - - -def replace_indentation(txt: str, input_spaces: int, output_spaces: int) -> str: - """ - Replace indentation from input_spaces to output_spaces. - - :param txt: Input markdown text - :param input_spaces: Number of spaces to detect as one indentation - level - :param output_spaces: Number of spaces to replace each indentation - level with - :return: Text with indentation replaced - """ - hdbg.dassert_lte(1, input_spaces) - hdbg.dassert_lte(1, output_spaces) - lines = txt.split("\n") - result = [] - for line in lines: - # Count leading spaces. - leading_spaces = len(line) - len(line.lstrip()) - if leading_spaces > 0 and leading_spaces % input_spaces == 0: - # Calculate indentation level and convert to output spaces. - indentation_level = leading_spaces // input_spaces - new_indentation = " " * (indentation_level * output_spaces) - result.append(new_indentation + line.lstrip()) - else: - result.append(line) - return "\n".join(result) - - -def replace_indentation_with_four_spaces(txt: str) -> str: - """ - Replace 2 spaces indentation with 4 spaces since this is what mkdocs needs. - - :param txt: Input markdown text - :return: Text with 2-space indentation replaced with 4-space - indentation - """ - return replace_indentation(txt, input_spaces=2, output_spaces=4) - - -def convert_slides_to_markdown(txt: str, level: int) -> str: - """ - Convert strings storing "slides", i.e., `* ...` to markdown headers. - - E.g., - ``` - * Tools for Vision component - ``` - to: - ``` - #### Tools for Vision component - ``` - """ - lines = txt.split("\n") - result = [] - for line in lines: - if line.startswith("* "): - result.append("#" * level + " " + line[2:]) - else: - result.append(line) - return "\n".join(result) - - -def rewrite_absolute_doc_links(txt: str) -> str: - """ - Rewrite absolute /docs/ markdown links to root-relative HTML links. - - MkDocs only converts relative `.md` links to `.html`. Absolute links - like `/docs/path/file.md` are left unchanged and 404 at serve time. - This converts them to `/path/file.html` so they resolve correctly. - - :param txt: Input markdown text - :return: Text with absolute /docs/ links rewritten - """ - - def _replace(m: re.Match) -> str: - path = m.group(1) - # Strip /docs/ prefix and convert .md → .html. - path = re.sub(r"^/docs/", "/", path) - path = re.sub( - r"\.md(#[^)]*)?$", lambda h: ".html" + (h.group(1) or ""), path - ) - return f"({path})" - - # Match markdown links: ([text](/docs/...md)) including optional anchors. - txt = re.sub(r"\((/docs/[^)]+\.md(?:#[^)]*)?)\)", _replace, txt) - return txt - - -def preprocess_mkdocs_markdown(txt: str) -> str: - """ - Preprocess markdown text for mkdocs. - - This function applies the following transformations: - 1. Remove table of contents - 2. Dedent Python code blocks - 3. Replace 2 spaces indentation with 4 spaces - 4. Rewrite absolute /docs/ links to root-relative HTML links - - :param txt: Input markdown text - :return: Preprocessed markdown text - """ - txt = hmarkdo.remove_table_of_contents(txt) - txt = dedent_python_code_blocks(txt) - txt = replace_indentation_with_four_spaces(txt) - txt = convert_slides_to_markdown(txt, level=4) - txt = rewrite_absolute_doc_links(txt) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py deleted file mode 100644 index 66ed59b39..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py +++ /dev/null @@ -1,121 +0,0 @@ -""" -Import as: - -import helpers.hmodule as hmodule -""" - -import logging -import os -import subprocess -import textwrap -from typing import Any, Dict, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hserver as hserver - -_LOG = logging.getLogger(__name__) - -_WARNING = "\033[33mWARNING\033[0m" - - -# Use this to avoid extra dependencies from `hsystem`. -def _system_to_string(cmd: str) -> Tuple[int, str]: - """ - Run a command and return the output and the return code. - - :param cmd: command to run - :return: tuple of (return code, output) - """ - result = subprocess.run( - cmd, - stdout=subprocess.PIPE, - # Redirect stderr to stdout. - stderr=subprocess.STDOUT, - shell=True, - text=True, - ) - rc = result.returncode - output = result.stdout - output = output.strip() - return rc, output - - -def has_module(module: str) -> bool: - """ - Return whether a Python module can be imported or not. - """ - if module == "gluonts" and hserver.is_host_mac(): - # Gluonts and mxnet modules are not properly supported on the ARM - # architecture yet, see CmTask4886 for details. - return False - code = f""" - try: - import {module} - has_module_ = True - except ImportError as e: - _LOG.warning("%s: %s", _WARNING, str(e)) - has_module_ = False - """ - code = textwrap.dedent(code) - # To make the linter happy. - has_module_ = True - locals_: Dict[str, Any] = {} - # Need to explicitly declare and pass `locals_`: - # https://docs.python.org/3/library/functions.html#exec - # `Pass an explicit locals dictionary if you need to see effects - # of the code on locals after function exec() returns.` - exec(code, globals(), locals_) - has_module_ = locals_["has_module_"] - return has_module_ - - -def install_module_if_not_present( - import_name: str, - *, - package_name: Optional[str] = None, - use_sudo: bool = True, - use_activate: bool = False, - venv_path: Optional[str] = None, - quiet: bool = True, -) -> None: - """ - Install a Python module if it is not already installed. - - :param import_name: name used to import the module (e.g., "openai") - :param package_name: name of the package on PyPI (if different from `import_name`) - :param use_sudo: whether to use sudo to install the module - :param use_activate: whether to use the activate script to install the module - (e.g., "source /venv/bin/activate; pip install --quiet --upgrade openai") - :param venv_path: path to the virtual environment - E.g., /Users/saggese/src/venv/client_venv.helpers - :param quiet: whether to install the module quietly - """ - _has_module = has_module(import_name) - if _has_module: - print(f"Module '{import_name}' is already installed.") - return - print(f"Installing module '{import_name}'...") - # Sometime the package name is different from the import name. - # E.g., we import using `import dash_bootstrap_components` but the package - # name is `dash-bootstrap-components`. - if package_name is None: - package_name = import_name - # Sometime the package name is different from the import name. - # E.g., we import using `import dash_bootstrap_components` but the package - # name is `dash-bootstrap-components`. - if quiet: - quiet_flag = "--quiet" - else: - quiet_flag = "" - if venv_path is None: - venv_path = "/venv" - venv_path = os.path.join(venv_path, "bin/activate") - hdbg.dassert_file_exists(venv_path, "Can't find venv_path='{venv_path}'") - if use_activate: - cmd = f'/bin/bash -c "(source {venv_path}; pip install {quiet_flag} --upgrade {package_name})"' - else: - cmd = f"pip install {quiet_flag} {package_name}" - if use_sudo: - cmd = f"sudo {cmd}" - _, output = _system_to_string(cmd) - print(output) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py deleted file mode 100644 index 525673032..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -Import as: - -import helpers.hmoto as hmoto -""" - -import unittest.mock as umock -from typing import Generator, Union - -import pytest # isort:skip # noqa: E402 # pylint: disable=wrong-import-position - -# Equivalent to `import moto`, but skip this module if the module is not present. -# `moto` must be imported before `boto3` to properly mock it. -moto = pytest.importorskip("moto") - -# It is necessary that boto3 is imported after moto. -# If not, boto3 will access real AWS. -import boto3 # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position - -import helpers.hdbg as hdbg # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hs3 as hs3 # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hunit_test as hunitest # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position - - -# ############################################################################# -# S3Mock_TestCase -# ############################################################################# - - -@pytest.mark.requires_aws -@pytest.mark.requires_ck_infra -class S3Mock_TestCase(hunitest.TestCase): - # Mocked AWS credentials. - mock_aws_credentials_patch = umock.patch.dict( - hs3.os.environ, - { - "MOCK_AWS_ACCESS_KEY_ID": "mock_key_id", - "MOCK_AWS_SECRET_ACCESS_KEY": "mock_secret_access_key", - "MOCK_AWS_DEFAULT_REGION": "us-east-1", - }, - ) - mock_aws_credentials = None - mock_aws_profile = "__mock__" - # Mocked bucket. - mock_s3 = moto.mock_aws() - bucket_name = "mock_bucket" - # TODO(Nikola): Temporary here to ensure it is called only once. - # Used in some tests that are obtaining data from 3rd party providers. - binance_secret = None - - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def _mock_get_s3fs( - self, aws_profile: Union[str, hs3.S3FileSystem] - ) -> hs3.S3FileSystem: - """ - Mock implementation of `get_s3fs` to use the mocked environment - variables from `moto`. - """ - from s3fs import S3FileSystem - - hdbg.dassert_isinstance(aws_profile, (str, S3FileSystem)) - aws_profile = S3FileSystem(anon=False) - return aws_profile - - def set_up_test(self) -> None: - # Getting necessary secret before boto3 is mocked. - if self.binance_secret is None: - import helpers.hsecrets as hsecret - - self.binance_secret = hsecret.get_secret("binance.preprod.trading.1") - # Start boto3 mock. - self.mock_s3.start() - # Start AWS credentials mock. Must be started after moto mock, - # or it will be overridden by moto with `foobar` values. - self.mock_aws_credentials = self.mock_aws_credentials_patch.start() - # Initialize boto client and create bucket for testing. - s3_client = boto3.client("s3") - s3_client.create_bucket(Bucket=self.bucket_name) - # Precaution to ensure that we are using mocked botocore. - s3_test_client = boto3.client("s3") - buckets = s3_test_client.list_buckets()["Buckets"] - self.assertEqual(len(buckets), 1) - self.assertEqual(buckets[0]["Name"], self.bucket_name) - # Patch `get_s3fs` that uses the mocked environment variables. - self.mock_get_s3fs = umock.patch.object( - hs3, "get_s3fs", side_effect=self._mock_get_s3fs - ) - self.mock_get_s3fs.start() - - def tear_down_test(self) -> None: - # Empty the bucket otherwise deletion will fail. - s3_client = boto3.resource("s3") - hdbg.dassert_eq(self.bucket_name, "mock_bucket") - bucket = s3_client.Bucket(self.bucket_name) - bucket.objects.all().delete() - # Delete bucket. - bucket.delete() - # Stop mocked `get_s3fs`. - if hasattr(self, "mock_get_s3fs"): - self.mock_get_s3fs.stop() - # Stop moto. - self.mock_aws_credentials_patch.stop() - self.mock_s3.stop() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py deleted file mode 100644 index 13ae41c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Import as: - -import helpers.hnetwork as hnetwor -""" - -import logging -import os -import re -from typing import Optional, Tuple - -import requests - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def check_url(url: str) -> None: - """ - Check that an URL responds. - """ - try: - request = requests.get(url) - exists = request.status_code == 200 - # pylint: disable=broad-except - except Exception: - # TODO(gp): RuntimeError doesn't seem to catch. Find a narrower - # exception to catch. - exists = False - if not exists: - _LOG.warning("url '%s' doesn't exist", url) - - -def get_prefixes(jupyter_port: Optional[int] = None) -> Tuple[str, str]: - """ - Return the prefixes that a file should have under a GitHub repo and a - Jupyter notebook. - """ - hsystem.get_user_name() - if jupyter_port is None: - jupyter_port = 10001 - _LOG.warning( - "jupyter_port not available: using the default one %s", jupyter_port - ) - repo_name = hgit.get_repo_full_name_from_client(super_module=False) - _LOG.debug("repo_name=%s", repo_name) - github_prefix = f"https://github.com/{repo_name}/blob/master" - jupyter_prefix = f"http://localhost:{jupyter_port}/tree" - return github_prefix, jupyter_prefix - - -# TODO(gp): -> get_canonical_file_name_from_url -def get_file_name(url: str) -> str: - """ - Given an URL from GitHub or from Jupyter server extract the path - corresponding to the file. - - E.g., - - http://localhost:10001/notebooks/research/... - oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb - -> - oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb - - - https://github.com/.../.../blob/master/... - oil/ST/Task229_Exploratory_analysis_of_ST_data.ipynb - -> - oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb - """ - # "http://localhost:10001/notebooks/... - # oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb" - ret = None - if ret is None: - m = re.search(r"http.*://localhost:\d+/(.*)", url) - if m: - ret = m.group(1) - to_remove = "notebooks/" - idx = ret.index(to_remove) - if idx >= 0: - end_idx = idx + len(to_remove) - ret = ret[end_idx:] - if ret is None: - # https://github.com/.../.../blob/master/... - # oil/ST/Task229_Exploratory_analysis_of_ST_data.ipynb - m = re.search(r"http.*://.*github.com/(.*)", url) - if m: - ret = m.group(1) - # Remove ".../.../blob/master" - ret = "/".join(ret.split("/")[4:]) - if ret is None: - if os.path.exists(url): - ret = url - if ret is None: - hdbg.dassert_is_not(ret, None, "url=%s", url) - return ret # type: ignore diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py deleted file mode 100644 index 75ecabfe4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Import as: - -import helpers.hnotebook as hnotebo -""" - -import logging - - -def config_notebook(sns_set: bool = True) -> None: - """ - Configure the notebook for plotting. - """ - import helpers.hmodule as hmodule - - # Matplotlib. - module = "matplotlib" - if hmodule.has_module(module): - # Matplotlib. - import matplotlib.pyplot as plt - - # plt.rcParams - plt.rcParams["figure.figsize"] = (20, 5) - plt.rcParams["legend.fontsize"] = 14 - plt.rcParams["font.size"] = 14 - plt.rcParams["image.cmap"] = "rainbow" - if False: - # Tweak the size of the plots to make it more readable when embedded in - # documents or presentations. - # font = {'family' : 'normal', - # #'weight' : 'bold', - # 'size' : 32} - # matplotlib.rc('font', **font) - scale = 3 - small_size = 8 * scale - medium_size = 10 * scale - bigger_size = 12 * scale - # Default text sizes. - plt.rc("font", size=small_size) - # Fontsize of the axes title. - plt.rc("axes", titlesize=small_size) - # Fontsize of the x and y labels. - plt.rc("axes", labelsize=medium_size) - # Fontsize of the tick labels. - plt.rc("xtick", labelsize=small_size) - # Fontsize of the tick labels. - plt.rc("ytick", labelsize=small_size) - # Legend fontsize. - plt.rc("legend", fontsize=small_size) - # Fontsize of the figure title. - plt.rc("figure", titlesize=bigger_size) - else: - print("No module '{module}'") - # Seaborn. - module = "seaborn" - if hmodule.has_module(module): - import seaborn as sns - - if sns_set: - sns.set() - else: - print("No module '{module}'") - # Pandas. - module = "pandas" - if hmodule.has_module(module): - import pandas as pd - - pd.set_option("display.max_rows", 500) - pd.set_option("display.max_columns", 500) - pd.set_option("display.width", 1000) - else: - print("No module '{module}'") - # Warnings. - import helpers.hwarnings as hwarnin - - # Force the linter to keep this import. - _ = hwarnin - - -def _info_print(msg: str, *args, **kwargs) -> None: - """ - Print a message with optional formatting arguments. - """ - if args: - msg = msg % args - print(msg) - - -def set_logger_to_print(log) -> None: - """ - Replace logger.info method with a print function. - - :param log: logger object to modify - """ - log.info = _info_print - - -def set_all_loggers_to_print() -> None: - """ - Replace all loggers' info method with a print function. - """ - for name in logging.root.manager.loggerDict: - logger = logging.getLogger(name) - # print("Setting logger %s to print" % name) - set_logger_to_print(logger) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py deleted file mode 100644 index 47fc37975..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Import as: - -import helpers.hnumba as hnumba -""" - -import logging -from typing import Any, Callable, TypeVar - -try: - import numba - - numba_available = True -except ImportError: - numba_available = False - -_LOG = logging.getLogger(__name__) - -# Switch to enable numba at run-time. -# For using in notebooks you need to force a reload of the library, like: -# import importlib -# importlib.reload(numba_) -# numba_.USE_NUMBA = False - -USE_NUMBA = True -RT = TypeVar("RT") # Return type for decorator. - - -def jit(f: Callable[..., RT]) -> Callable[..., RT]: - if USE_NUMBA and not numba_available: - _LOG.warning("numba is not installed") - use_numba = USE_NUMBA and numba_available - - if use_numba: - _LOG.debug("Using numba!") - wrapper: Callable[..., RT] = numba.jit(f) - else: - - def wrapper(*args: Any, **kwargs: Any) -> RT: - _LOG.debug("Not using numba!") - return f(*args, **kwargs) - - return wrapper diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py deleted file mode 100644 index 4cd0e8c4d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Import as: - -import helpers.hnumpy as hnumpy -""" - -import contextlib -from typing import Iterator - -import numpy as np - -import helpers.hdbg as hdbg - - -# From https://stackoverflow.com/questions/49555991 -@contextlib.contextmanager -def random_seed_context(seed: int) -> Iterator: - """ - Context manager to isolate a numpy random seed. - """ - state = np.random.get_state() - np.random.seed(seed) - try: - yield - finally: - np.random.set_state(state) - - -# TODO(Juraj): unit test in CmTask5092. -def floor_with_precision(value: float, amount_precision: int) -> float: - """ - Floor a value using desired precision. - - The invariant for this function is that negative number are floored based - on their absolute value: e.g floor_with_precision(-4.6, 0) == -4. This is - useful for calculating share size where there are decimal precision - limitations. The desired behavior is to rather round down than overfill. - - Other examples: - floor_with_precision(0.125, 2) == 0.12 - floor_with_precision(0.4, 0) == 0.0 - - :param value: value to floor with desire - :param amount_precision: number of decimal points to floor to - :return: value floored using desired precision. - """ - # Custom solution to allow flooring using precision. - # https://stackoverflow.com/questions/58065055/floor-and-ceil-with-number-of-decimals/58065394#58065394 - # Precision < 0 does not make sense. - hdbg.dassert_lte(0, amount_precision) - # Store sign and get absolute value to get the desire - sign = -1 if value < 0 else 1 - value_abs = abs(value) - value_floored = np.true_divide( - np.floor(value_abs * 10**amount_precision), 10**amount_precision - ) - return value_floored * sign diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py deleted file mode 100644 index e9424b8cc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py +++ /dev/null @@ -1,500 +0,0 @@ -""" -Methods to introspect and print the state of an object. - -Import as: - -import helpers.hobject as hobject -""" - -import abc -import logging -import pprint -from typing import Any, Dict, List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hpandas as hpandas -import helpers.hprint as hprint -import helpers.hstring as hstring - -_LOG = logging.getLogger(__name__) - -# Mute this module unless we want to debug it. -_LOG.setLevel(logging.INFO) - -# ############################################################################# -# _to_skip* -# ############################################################################# - - -def _to_skip(is_: bool, mode: str) -> bool: - """ - Return whether to skip the attribute. - - :param is_: if `True` the attribute is of the type we are checking - :param mode: how to handle the attribute - :return: whether to skip the attribute - """ - hdbg.dassert_in(mode, ("skip", "only", "all")) - skip = False - if mode == "skip": - if is_: - # Skip everything. - skip = True - elif mode == "only": - if not is_: - # Keep only the callables. - skip = True - elif mode == "all": - # Keep everything. - skip = False - else: - raise ValueError(f"Invalid mode='{mode}'") - return skip - - -def _to_skip_callable_attribute(attr_name: Any, mode: str) -> bool: - """ - Decide whether to skip a callable attribute. - """ - # Check whether the attribute is callable. - is_callable = callable(attr_name) - skip = _to_skip(is_callable, mode) - return skip - - -def _to_skip_private_attribute(attr_name: str, mode: str) -> bool: - """ - Decide whether to skip a private attribute. - """ - # _Object__hello - # TODO(gp): This can be improved by passing the name of the object. - is_dunder = attr_name.startswith("_") and "__" in attr_name - # We assume that private attributes start with `_` and are not dunder. - is_private = not is_dunder and attr_name.startswith("_") - skip = _to_skip(is_private, mode) - return skip - - -def _to_skip_dunder_attribute(attr_name: str, mode: str) -> bool: - """ - Decide whether to skip a double under attribute. - """ - # Check if it is a dunder (i.e., double under method). E.g., `__hello__`. - is_dunder = attr_name.startswith("_") and "__" in attr_name - skip = _to_skip(is_dunder, mode) - return skip - - -def _to_skip_attribute( - attr_name: Any, - attr_value: Any, - callable_mode: str, - private_mode: str, - dunder_mode: str, - attr_names_to_skip: Optional[List[str]], -) -> bool: - """ - Decide whether to skip an attribute. - - :param attr_name: name of the attribute - :param attr_value: value of the attribute - :param callable_mode: how to handle attributes that are callable methods - :param private_mode: how to handle attributes that are private (e.g., - `_hello`) - :param dunder_mode: how to handle attributes that are dunder (e.g., - `__hello`) - :param attr_names_to_skip: a list of attributes (e.g., private, callable, dunder) - to skip. `None` to skip nothing. - :return: whether to skip the attribute - """ - # Check whether the attribute is one that was requested explicitly to skip. - if attr_names_to_skip is not None: - if attr_name in attr_names_to_skip: - skip = True - return skip - # Handle callable methods. - skip = _to_skip_callable_attribute(attr_value, callable_mode) - if skip: - _LOG.debug("Skip callable") - return skip - # Handle private methods. - skip = _to_skip_private_attribute(attr_name, private_mode) - if skip: - _LOG.debug("Skip private") - return skip - # Handle dunder methods. - skip = _to_skip_dunder_attribute(attr_name, dunder_mode) - if skip: - _LOG.debug("Skip dunder") - return skip - return False - - -# ############################################################################# -# obj_to_str -# ############################################################################# - - -def _type_to_str(attr_value: Any) -> str: - """ - Print the attribute value together with its type. - - E.g., `a=False , b=hello , c=3.14 ` - """ - type_as_str = str(type(attr_value)) - # Convert from `` to `str`. - type_as_str = hstring.remove_prefix(type_as_str, "") - # Add `<` and `>` around the type. - type_as_str = f"<{type_as_str}>" - return type_as_str - - -def _attr_to_str(attr_value: Any, print_type: bool) -> str: - """ - Print the attribute value handling different types. - """ - _LOG.debug("type(attr_value)=%s", type(attr_value)) - if isinstance(attr_value, pd.DataFrame): - res = f"pd.df({attr_value.shape}" - elif isinstance(attr_value, pd.Series): - res = f"pd.srs({attr_value.shape}" - elif isinstance(attr_value, dict): - res = str(attr_value) - else: - res = str(attr_value) - # Add the type, if needed. - if print_type: - res += " " + _type_to_str(attr_value) - return res - - -def obj_to_str( - obj: Any, - *, - attr_mode: str = "__dict__", - sort: bool = False, - print_type: bool = False, - callable_mode: str = "skip", - private_mode: str = "skip", - dunder_mode: str = "skip", - attr_names_to_skip: Optional[List[str]] = None, -) -> str: - """ - Print the attributes of an object. - - An object is printed as name of its class and its attributes, e.g., - ``` - _Object1 at 0x...=(a=False, b=hello, c=3.14) - ``` - - :param attr_mode: use `__dict__` or `dir()` - - It doesn't seem to make much difference - :sort: sort the attributes in order of name, or not - :param print_type: print the type of the attribute - :param callable_mode: how to handle attributes that are callable (i.e., - methods) - - `skip`: skip the callable methods - - `only`: print only the callable methods - - `all`: always print - :param private_mode: how to handle private attributes. Same params as - `callable_mode` - :param dunder_mode: how to handle double under attributes. Same params as - `callable_mode` - :param attr_names_to_skip: a list of attributes (e.g., private, callable, - dunder) to skip. This is used to avoid to print data that is redundant - (e.g., a cached value) - """ - ret = [] - if attr_mode == "__dict__": - # Use `__dict__` to get the attributes of the object. - values = obj.__dict__ - elif attr_mode == "dir": - # Use `dir()` to get the attributes of the object. - values = dir(obj) - elif attr_mode == "config": - # Use object method to get the attributes to print info for. - values = obj.get_config_attributes() - else: - raise ValueError(f"Invalid attr_mode='{attr_mode}'") - if sort: - values = sorted(values) - for attr_name in values: - if attr_mode == "__dict__": - attr_value = obj.__dict__[attr_name] - elif attr_mode in ["dir", "config"]: - attr_value = getattr(obj, attr_name) - else: - raise ValueError(f"Invalid attr_mode='{attr_mode}'") - skip = _to_skip_attribute( - attr_name, - attr_value, - callable_mode, - private_mode, - dunder_mode, - attr_names_to_skip, - ) - # `attr_value` can be callable object and needs to be properly handled - # for string conversion and formatting. - _LOG.debug(hprint.to_str("attr_name attr_value skip")) - if skip: - continue - # - out = f"{attr_name}=" + _attr_to_str(attr_value, print_type) - ret.append(out) - # - txt = hprint.to_object_str(obj) + "=" - txt += "(" + ", ".join(ret) + ")" - return txt - - -# ############################################################################# -# obj_to_repr -# ############################################################################# - - -def _attr_to_repr(attr_name: Any, attr_value: Any, print_type: bool) -> str: - """ - Print an object as name of its class and its attributes. - - E.g., - ``` - : - a='False' - b='hello' - c='3.14' - ``` - """ - _LOG.debug("type(attr_value)=%s", type(attr_value)) - if isinstance(attr_value, (pd.DataFrame, pd.Series)): - attr_value_as_str = hpandas.df_to_str(attr_value) - elif isinstance(attr_value, dict): - attr_value_as_str = pprint.pformat(attr_value) - else: - attr_value_as_str = repr(attr_value) - # - if len(attr_value_as_str.split("\n")) > 1: - # The string representing the attribute value spans multiple lines, so - # print like: - # ``` - # attr_name= (type) - # attr_value - # ``` - out = f"{attr_name}=" - if print_type: - out += " " + _type_to_str(attr_value) - out += "\n" + hprint.indent(attr_value_as_str) - else: - # The string representing the attribute value is a single line, so print - # like: - # ``` - # attr_name='attr_value' (type) - # ``` - out = f"{attr_name}='{str(attr_value)}'" - if print_type: - out += " " + _type_to_str(attr_value) - return out - - -# TODO(gp): Merge the code with obj_to_repr() using a switch for the different -# code. -def obj_to_repr( - obj: Any, - *, - attr_mode: str = "__dict__", - sort: bool = False, - print_type: bool = False, - callable_mode: str = "skip", - private_mode: str = "skip", - dunder_mode: str = "skip", - attr_names_to_skip: Optional[List[str]] = None, -) -> str: - """ - Same interface and behavior as `obj_to_str()`. - - Use `_attr_to_repr()` instead of a simple `attr_name = attr_value` - like in `obj_to_str()`. - """ - ret = [] - # TODO(Grisha): factor out the logic in a function `get_class_attributes(attr_mode)`. - if attr_mode == "__dict__": - values = obj.__dict__ - elif attr_mode == "dir": - values = dir(obj) - elif attr_mode == "config": - values = obj.get_config_attributes() - else: - raise ValueError(f"Invalid attr_mode='{attr_mode}'") - if sort: - values = sorted(values) - for attr_name in values: - if attr_mode == "__dict__": - attr_value = obj.__dict__[attr_name] - elif attr_mode in ["dir", "config"]: - attr_value = getattr(obj, attr_name) - else: - raise ValueError(f"Invalid attr_mode='{attr_mode}'") - skip = _to_skip_attribute( - attr_name, - attr_value, - callable_mode, - private_mode, - dunder_mode, - attr_names_to_skip, - ) - # `attr_value` can be callable object and needs to be properly handled - # for string conversion and formatting. - _LOG.debug(hprint.to_str("attr_name attr_value skip")) - if skip: - continue - # - out = _attr_to_repr(attr_name, attr_value, print_type) - ret.append(out) - # - txt = [] - txt.append(hprint.to_object_repr(obj) + ":") - txt.append(hprint.indent("\n".join(ret))) - return "\n".join(txt) - - -# ############################################################################# -# PrintableMixin -# ############################################################################# - - -class PrintableMixin: - """ - Implement `__str__()` and `__repr__()` to print the state of an object. - - These methods can be overridden with more specific methods by - derived classes. - """ - - @staticmethod - @abc.abstractmethod - def get_config_attributes() -> List[str]: - """ - Get list of attributes that are relevant to the configuration of each - block. - """ - ... - - # TODO(Grisha): decide if we need this method: what are the use-cases? - # Ideally we should just save `SystemConfig` and load it when needed. - def to_config_dict(self) -> Dict[str, Any]: - """ - Get class configuration as dict. - """ - res_dict = {} - # Get class attribute names to print. - attributes = self.get_config_attributes() - hdbg.dassert_is_subset(attributes, self.__dict__.keys()) - # Iterate over attributes and add their state to the dict. - for attr in attributes: - value = getattr(self, attr) - # Get a list of types the value class is derived from. - value_parent_classes = value.__class__.__mro__ - if any( - "helpers.hobject.PrintableMixin" in str(parent_class) - for parent_class in value_parent_classes - ): - # Call the function recursively if value is also - # a `PrintableMixin` descendant. - dict_val = value.to_config_dict() - else: - # Get attribute value representation. - dict_val = _attr_to_repr(attr, value, print_type=True) - # Put value in the result dict. - res_dict[attr] = dict_val - return res_dict - - def to_config_str(self) -> str: - """ - Get class configuration as string. - """ - ret = [] - attributes = self.get_config_attributes() - hdbg.dassert_is_subset(attributes, self.__dict__.keys()) - # Iterate over attributes and add their state to the dict. - for attr in attributes: - value = getattr(self, attr) - if isinstance(value, PrintableMixin): - # Call the function recursively if value is also - # a `PrintableMixin` descendant. - dict_val = value.to_config_str() - # Add attribute name for string representation. - dict_val = f"{attr}={dict_val}" - else: - dict_val = _attr_to_repr(attr, value, print_type=True) - # Put value in the result dict. - ret.append(dict_val) - txt = [] - txt.append(hprint.to_object_repr(self) + ":") - txt.append(hprint.indent("\n".join(ret))) - txt = "\n".join(txt) - return txt - - def __repr__( - self, - *, - attr_names_to_skip: Optional[List[str]] = None, - ) -> str: - """ - Used for debugging and development and need to be unambiguous. - """ - txt = obj_to_repr( - self, - print_type=True, - private_mode="all", - attr_names_to_skip=attr_names_to_skip, - ) - return txt - - def __str__( - self, - *, - attr_names_to_skip: Optional[List[str]] = None, - ) -> str: - """ - Used for creating output for end user and need to be readable. - """ - txt = obj_to_str( - self, - print_type=True, - private_mode="all", - attr_names_to_skip=attr_names_to_skip, - ) - return txt - - -# ############################################################################# - - -# TODO(gp): CleanUp. This is for testing and should be in hobject_test.py. -# TODO(gp): -> check_object_signature -def test_object_signature( - self_: Any, obj: Any, *, remove_lines_regex: Optional[str] = None -) -> None: - """ - Print a string representation of an object using both `str()` and `repr()`. - - :param obj: the object to print - :param remove_lines_regex: a regex to remove certain lines from the - output - """ - txt = [] - # - txt.append(hprint.frame("str:")) - txt.append(str(obj)) - # - txt.append(hprint.frame("repr:")) - txt.append(repr(obj)) - # - txt = "\n".join(txt) - # Remove certain lines, if needed. - if remove_lines_regex: - txt = hprint.filter_text(remove_lines_regex, txt) - # - self_.check_string(txt, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py deleted file mode 100644 index 2c6d9c729..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -Support opening a file. - -Import as: - -import helpers.hopen as hopen -""" - -# TODO(gp): -> open_file or move it to system_interaction.py - -import logging -import os -from typing import Optional - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - -# ############################################################################# - - -def _cmd_open_html(file_name: str, os_name: str) -> Optional[str]: - """ - Get OS-specific command to open an HTML file. - """ - # Retrieve the executable. - os_cmds = { - "Darwin": "open", - "Windows": "start", - "Linux": "xdg-open", - } - hdbg.dassert_in(os_name, os_cmds) - exec_name = os_cmds[os_name] - if not hsystem.check_exec(exec_name): - _LOG.warning( - "Can't execute the command '%s' on this platform", exec_name - ) - return None - # Build the command. - full_cmd = f"{exec_name} {file_name}" - if os_name == "Linux": - _LOG.warning( - "To open files faster launch in background '%s &'", exec_name - ) - return full_cmd - - -def _cmd_open_pdf(file_name: str, os_name: str) -> Optional[str]: - """ - Get OS-specific command to open a PDF file. - """ - os_cmds = { - "Darwin": ( - "/usr/bin/osascript << EOF\n" - f'set theFile to POSIX file "{file_name}" as alias\n' - 'tell application "Skim"\n' - "activate\n" - "set theDocs to get documents whose path is " - "(get POSIX path of theFile)\n" - "if (count of theDocs) > 0 then revert theDocs\n" - "open theFile\n" - "end tell\n" - "EOF\n" - ) - } - if os_name not in os_cmds: - _LOG.warning("Opening PDF files on '%s' is not supported yet", os_name) - full_cmd = None - else: - full_cmd = os_cmds[os_name] - return full_cmd - - -def open_file(file_name: str) -> None: - """ - Open file locally if its extension is supported. - """ - # Detect file format by the (last) extension. - # E.g., 'hello.html.txt' is considered a txt file. - extension = os.path.split(file_name)[-1].split(".")[-1] - extension = extension.lower() - # Make sure file exists. - _LOG.info( - "\n%s", - hprint.frame( - f"Opening {extension} file '{file_name}'", char1="<", char2=">" - ), - ) - hdbg.dassert_path_exists(file_name) - # Get opening command. - os_name = hsystem.get_os_name() - cmd: Optional[str] - if extension == "pdf": - cmd = _cmd_open_pdf(file_name, os_name) - elif extension == "html": - cmd = _cmd_open_html(file_name, os_name) - else: - hdbg.dfatal(f"Opening '{extension}' files is not supported yet") - # Run command. - if cmd is not None: - _LOG.info("%s", cmd) - hio.to_file("open_file_cmd.sh", cmd) - hsystem.system("source open_file_cmd.sh", suppress_output=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py deleted file mode 100644 index 535e7f081..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Import as: - -import helpers.hpandas as hpandas -""" - -from helpers.hpandas_analysis import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_check_summary import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_clean import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_compare import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_conversion import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_dassert import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_display import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_io import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_multiindex import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_stats import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_transform import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_utils import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old deleted file mode 100644 index 5be1b281a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old +++ /dev/null @@ -1,2684 +0,0 @@ -""" -Import as: - -import helpers.hpandas as hpandas -""" - -import csv -import dataclasses -import logging -import helpers.hlogging as hlogging -import random -import re -from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union - -import numpy as np -import pandas as pd - -# Handle different versions of s3fs where core module may be at different -# locations. -try: - import s3fs - - # Try to access s3fs.core to check if it exists - if hasattr(s3fs, "core"): - from s3fs.core import S3File, S3FileSystem - else: - # In newer versions, classes might be directly in s3fs module. - try: - from s3fs import S3File, S3FileSystem - except ImportError: - # Fallback to dynamic import - S3File = getattr(s3fs, "S3File", None) - S3FileSystem = getattr(s3fs, "S3FileSystem", None) -except ImportError: - # If s3fs is not available, define dummy classes for type hints. - s3fs = None - - class S3File: - pass - - class S3FileSystem: - pass - - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -# Avoid the following dependency from other `helpers` modules to prevent import -# cycles: -# import helpers.hs3 as hs3 -# import helpers.hsql as hsql -# import helpers.hunit_test as hunitest - - -_LOG = hlogging.getLogger(__name__) - -# Enable extra verbose debugging. Do not commit. -_TRACE = False - -RowsValues = List[List[str]] - - -# ############################################################################# - - -def to_series(df: pd.DataFrame, *, series_dtype: str = "float64") -> pd.Series: - """ - Convert a pd.DataFrame with a single column into a pd.Series. The problem - is that empty df or df with a single row are not converted correctly to a - pd.Series. - - :param df: dataframe with a single column to convert to a series - :param series_dtype: dtype of the desired series in case a DataFrame - is empty, otherwise inherit dtype from a DataFrame - """ - # See https://stackoverflow.com/questions/33246771 - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_eq(df.shape[1], 1, "df=%s doesn't have a single column", df) - if df.empty: - srs = pd.Series(dtype=series_dtype) - elif df.shape[0] > 1: - srs = df.squeeze() - else: - srs = pd.Series(df.iloc[0, 0], index=[df.index.values[0]]) - srs.name = df.index.name - hdbg.dassert_isinstance(srs, pd.Series) - return srs - - -def as_series(data: Union[pd.DataFrame, pd.Series]) -> pd.Series: - """ - Convert a single-column dataframe to a series or no-op if already a series. - """ - if isinstance(data, pd.Series): - return data - return to_series(data) - - -def dassert_is_days( - timedelta: pd.Timedelta, *, min_num_days: Optional[int] = None -) -> None: - hdbg.dassert( - (timedelta / pd.Timedelta(days=1)).is_integer(), - "timedelta='%s' is not an integer number of days", - timedelta, - ) - if min_num_days is not None: - hdbg.dassert_lte(1, timedelta.days) - - -# ############################################################################# - - -def _get_index(obj: Union[pd.Index, pd.DataFrame, pd.Series]) -> pd.Index: - """ - Return the index of a Pandas object. - """ - if isinstance(obj, pd.Index): - index = obj - else: - hdbg.dassert_isinstance(obj, (pd.Series, pd.DataFrame)) - index = obj.index - return index - - -# TODO(gp): Maybe for symmetry with the other functions, rename to -# dassert_datetime_index -def dassert_index_is_datetime( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the dataframe has an index containing datetimes. - - It works for both single and multi-indexed dataframes. - """ - index = _get_index(obj) - if isinstance(index, pd.MultiIndex): - # In case of multi index check that at least one level is a datetime. - is_any_datetime = any( - isinstance(level, pd.DatetimeIndex) for level in index.levels - ) - hdbg.dassert(is_any_datetime, msg, *args) - else: - hdbg.dassert_isinstance(index, pd.DatetimeIndex, msg, *args) - - -def dassert_unique_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a unique index. - """ - index = _get_index(obj) - if not index.is_unique: - dup_indices = index.duplicated(keep=False) - df_dup = obj[dup_indices] - dup_msg = f"Duplicated rows are:\n{df_to_str(df_dup)}\n" - if msg is None: - msg = dup_msg - else: - msg = dup_msg + msg - hdbg.dassert(index.is_unique, msg=msg, *args) - - -# TODO(gp): @all Add unit tests. -def dassert_increasing_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has an increasing index. - """ - index = _get_index(obj) - if not index.is_monotonic_increasing: - # Print information about the problematic indices like: - # ``` - # Not increasing indices are: - # full_symbol open high - # timestamp - # 2018-08-17 01:39:00+00:00 binance::BTC_USDT 6339.250000 6348.910000 - # 2018-08-17 00:01:00+00:00 kucoin::ETH_USDT 286.712987 286.712987 - # ``` - # Find the problematic indices. - mask = np.diff(index) <= pd.Timedelta(seconds=0) - mask = np.insert(mask, 0, False) - # TODO(gp): We might want to specify an integer with how many rows before - # after we want to show. - # Shift back to get the previous index that was creating the issue. - mask_shift = np.empty_like(mask) - mask_shift[: len(mask) - 1] = mask[1 : len(mask)] - mask_shift[len(mask) - 1] = False - # - mask = mask | mask_shift - dup_msg = f"Not increasing indices are:\n{df_to_str(obj[mask])}\n" - if msg is None: - msg = dup_msg - else: - msg = dup_msg + msg - # Dump the data to file for further inspection. - # obj.to_csv("index.csv") - hdbg.dassert(index.is_monotonic_increasing, msg=msg, *args) - - -# TODO(gp): @all Add more info in case of failures and unit tests. -def dassert_strictly_increasing_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a strictly increasing index. - """ - dassert_unique_index(obj, msg, *args) - dassert_increasing_index(obj, msg, *args) - - -# TODO(gp): Not sure it's used or useful? -def dassert_monotonic_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a monotonic (i.e., strictly increasing or - decreasing index). - """ - dassert_unique_index(obj, msg, *args) - index = _get_index(obj) - cond = index.is_monotonic_increasing or index.is_monotonic_decreasing - hdbg.dassert(cond, msg=msg, *args) - - -# TODO(Paul): @gp -> dassert_datetime_indexed_df -def dassert_time_indexed_df( - df: pd.DataFrame, allow_empty: bool, strictly_increasing: bool -) -> None: - """ - Validate that input dataframe is time indexed and well-formed. - - It works for both single and multi-indexed dataframes. - - :param df: dataframe to validate - :param allow_empty: allow empty data frames - :param strictly_increasing: if True the index needs to be strictly - increasing, instead of just increasing - """ - # Verify that Pandas dataframe is passed as input. - hdbg.dassert_isinstance(df, pd.DataFrame) - if not allow_empty: - # Verify that a non-empty dataframe is passed as input. - hdbg.dassert_lt(0, df.shape[0]) - # Verify that the dataframe has at least 1 column. - hdbg.dassert_lte(1, len(df.columns)) - # Verify that the index is increasing. - if strictly_increasing: - dassert_strictly_increasing_index(df) - else: - dassert_increasing_index(df) - # Check that the index is in datetime format. - dassert_index_is_datetime(df) - # Check that the passed timestamp has timezone info. - index_item = df.index[0] - if isinstance(index_item, tuple): - # In case of multi index assume that the first level is a datetime. - index_item = index_item[0] - hdateti.dassert_has_tz(index_item) - - -def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None: - """ - Ensure that remapping rows / columns is valid. - """ - hdbg.dassert_isinstance(to_remap, list) - hdbg.dassert_isinstance(remap_dict, dict) - # All the rows / columns to remap, should exist. - hdbg.dassert_is_subset( - remap_dict.keys(), - to_remap, - "Keys to remap should be a subset of existing columns", - ) - # The mapping is invertible. - hdbg.dassert_no_duplicates(remap_dict.keys()) - hdbg.dassert_no_duplicates(remap_dict.values()) - # Rows / columns should not be remapped on existing rows / columns. - hdbg.dassert_not_intersection(remap_dict.values(), to_remap) - - -def dassert_series_type_is( - srs: pd.Series, - type_: type, - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the data type of `srs` is `type_`. - - Examples of valid series types are - - np.float64 - - np.int64 - - pd.Timestamp - """ - hdbg.dassert_isinstance(srs, pd.Series) - hdbg.dassert_isinstance(type_, type) - hdbg.dassert_eq(srs.dtype.type, type_, msg, *args) - - -def dassert_series_type_in( - srs: pd.Series, - types: List[type], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the data type of `srs` is one of the types in `types`. - """ - hdbg.dassert_isinstance(srs, pd.Series) - hdbg.dassert_container_type(types, list, type) - hdbg.dassert_in(srs.dtype.type, types, msg, *args) - - -def dassert_indices_equal( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - allow_series: bool = False, - only_warning: bool = False, -) -> None: - """ - Ensure that `df1` and `df2` share a common index. - - Print the symmetric difference of indices if equality does not hold. - """ - if allow_series: - if isinstance(df1, pd.Series): - df1 = df1.to_frame() - if isinstance(df2, pd.Series): - df2 = df2.to_frame() - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert( - df1.index.equals(df2.index), - "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", - df1.index.difference(df2.index), - df2.index.difference(df1.index), - only_warning=only_warning, - ) - - -def dassert_columns_equal( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - sort_cols: bool = False, - only_warning: bool = False, -) -> None: - """ - Ensure that `df1` and `df2` have the same columns. - - Print the symmetric difference of columns if equality does not hold. - """ - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - if sort_cols: - _LOG.debug("Sorting dataframe columns.") - df1 = df1.sort_index(axis=1) - df2 = df2.sort_index(axis=1) - hdbg.dassert( - df1.columns.equals(df2.columns), - "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", - df1.columns.difference(df2.columns), - df2.columns.difference(df1.columns), - only_warning=only_warning, - ) - - -def dassert_axes_equal( - df1: pd.DataFrame, df2: pd.DataFrame, *, sort_cols: bool = False -) -> None: - """ - Ensure that `df1` and `df2` have the same index and same columns. - """ - dassert_indices_equal(df1, df2) - dassert_columns_equal(df1, df2, sort_cols=sort_cols) - - -# TODO(Grisha): instead of passing `rtol` and `atol` use `**allclose_kwargs: Dict[str, Any]`. -def dassert_approx_eq( - val1: Any, - val2: Any, - rtol: float = 1e-05, - atol: float = 1e-08, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # Approximate comparison is not applicable for strings. - hdbg.dassert_is_not(type(val1), str) - hdbg.dassert_is_not(type(val2), str) - # Convert iterable inputs to list in order to comply with numpy. - if isinstance(val1, Iterable): - val1 = list(val1) - if isinstance(val2, Iterable): - val2 = list(val2) - cond = np.allclose( - np.array(val1), np.array(val2), rtol=rtol, atol=atol, equal_nan=True - ) - if not cond: - txt = f"'{val1}'\n==\n'{val2}' rtol={rtol}, atol={atol}" - hdbg._dfatal(txt, msg, *args, only_warning=only_warning) # type: ignore - - -# ############################################################################# - - -def resample_index(index: pd.DatetimeIndex, frequency: str) -> pd.DatetimeIndex: - """ - Resample `DatetimeIndex`. - - :param index: `DatetimeIndex` to resample - :param frequency: frequency from `pd.date_range()` to resample to - :return: resampled `DatetimeIndex` - """ - _LOG.debug(hprint.to_str("index frequency")) - hdbg.dassert_isinstance(index, pd.DatetimeIndex) - dassert_unique_index(index, msg="Index must have only unique values") - min_date = index.min() - max_date = index.max() - _LOG.debug("min_date=%s max_date=%s", min_date, max_date) - # TODO(gp): Preserve the index name. - # index_name = index.name - resampled_index = pd.date_range( - start=min_date, - end=max_date, - freq=frequency, - ) - # Enable detailed debugging. - if False: - if len(resampled_index) > len(index): - # Downsample. - _LOG.debug( - "Index length increased by %s = %s - %s", - len(resampled_index) - len(index), - len(resampled_index), - len(index), - ) - elif len(resampled_index) < len(index): - # Upsample. - _LOG.debug( - "Index length decreased by %s = %s - %s", - len(index) - len(resampled_index), - len(index), - len(resampled_index), - ) - else: - _LOG.debug("Index length=%s has not changed", len(index)) - # resampled_index.name = index_name - return resampled_index - - -def resample_df(df: pd.DataFrame, frequency: str) -> pd.DataFrame: - """ - Resample `DataFrame` by placing NaN in missing locations in the index. - - :param df: `DataFrame` to resample - :param frequency: frequency from `pd.date_range()` to resample to - :return: resampled `DataFrame` - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - # Preserve the index name. - index_name = df.index.name - resampled_index = resample_index(df.index, frequency) - df_reindex = df.reindex(resampled_index) - df_reindex.index.name = index_name - return df_reindex - - -def find_gaps_in_dataframes( - df1: pd.DataFrame, df2: pd.DataFrame -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Find data present in one dataframe and missing in the other one. - - :param df1: first dataframe for comparison - :param df2: second dataframe for comparison - :return: two dataframes with missing data - """ - # Get data present in first, but not present in second dataframe. - first_missing_indices = df2.index.difference(df1.index) - first_missing_data = df2.loc[first_missing_indices] - # Get data present in second, but not present in first dataframe. - second_missing_indices = df1.index.difference(df2.index) - second_missing_data = df1.loc[second_missing_indices] - return first_missing_data, second_missing_data - - -# TODO(Grisha): use this idiom everywhere in the codebase, e.g., in `compare_dfs()`. -def apply_index_mode( - df1: pd.DataFrame, - df2: pd.DataFrame, - mode: str, -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Process DataFrames according to the index mode. - - :param df1: first input df - :param df2: second input df - :param mode: method of processing indices - - "assert_equal": check that both indices are equal, assert otherwise - - "intersect": restrict both dfs to a common index - - "leave_unchanged": ignore any indices mismatch and return dfs as-is - :return: transformed copy of the inputs - """ - _LOG.debug("mode=%s", mode) - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert_isinstance(mode, str) - # Copy in order not to modify the inputs. - df1_copy = df1.copy() - df2_copy = df2.copy() - if mode == "assert_equal": - dassert_indices_equal(df1_copy, df2_copy) - elif mode == "intersect": - # TODO(Grisha): Add sorting on demand. - common_index = df1_copy.index.intersection(df2_copy.index) - df1_copy = df1_copy[df1_copy.index.isin(common_index)] - df2_copy = df2_copy[df2_copy.index.isin(common_index)] - elif mode == "leave_unchanged": - _LOG.debug( - "Ignoring any index missmatch as per user's request.\n" - "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", - df1_copy.index.difference(df2_copy.index), - df2_copy.index.difference(df1_copy.index), - ) - else: - raise ValueError(f"Unsupported index_mode={mode}") - return df1_copy, df2_copy - - -def apply_columns_mode( - df1: pd.DataFrame, - df2: pd.DataFrame, - mode: str, -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Process DataFrames according to the column mode. - - :param df1: first input df - :param df2: second input df - :param mode: method of processing columns - - "assert_equal": check that both dfs have equal columns, assert otherwise - - "intersect": restrict both dfs to only include common columns - - "leave_unchanged": ignore any column mismatches and return dfs as-is - :return: transformed copy of the inputs - """ - _LOG.debug("mode=%s", mode) - # Input validation. - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert_isinstance(mode, str) - # Copy in order not to modify the inputs. - df1_copy = df1.copy() - df2_copy = df2.copy() - if mode == "assert_equal": - # Check if columns are equal or not. - dassert_columns_equal(df1_copy, df2_copy) - elif mode == "intersect": - # Filter dataframes based on its common columns. - common_columns = df1_copy.columns.intersection(df2_copy.columns) - df1_copy = df1_copy[common_columns] - df2_copy = df2_copy[common_columns] - # Log the string representation of 2 dfs. - _LOG.debug("df1 after filtering=\n%s", df_to_str(df1)) - _LOG.debug("df2 after filtering=\n%s", df_to_str(df2)) - elif mode == "leave_unchanged": - # Ignore mismatch. - _LOG.debug( - "Ignoring any column missmatch as per user's request.\n" - "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", - df1.columns.difference(df2.columns), - df2.columns.difference(df1.columns), - ) - else: - raise ValueError(f"Unsupported column mode: {mode}") - return df1_copy, df2_copy - - -def find_gaps_in_time_series( - time_series: pd.Series, - start_timestamp: pd.Timestamp, - end_timestamp: pd.Timestamp, - freq: str, -) -> pd.Series: - """ - Find missing points on a time interval specified by [start_timestamp, - end_timestamp], where point distribution is determined by . - - If the passed time series is of a unix epoch format. It is - automatically tranformed to pd.Timestamp. - - :param time_series: time series to find gaps in - :param start_timestamp: start of the time interval to check - :param end_timestamp: end of the time interval to check - :param freq: distance between two data points on the interval. - Aliases correspond to pandas.date_range's freq parameter, i.e. - "S" -> second, "T" -> minute. - :return: pd.Series representing missing points in the source time - series. - """ - _time_series = time_series - if str(time_series.dtype) in ["int32", "int64"]: - _time_series = _time_series.map(hdateti.convert_unix_epoch_to_timestamp) - correct_time_series = pd.date_range( - start=start_timestamp, end=end_timestamp, freq=freq - ) - return correct_time_series.difference(_time_series) - - -def check_and_filter_matching_columns( - df: pd.DataFrame, required_columns: List[str], filter_data_mode: str -) -> pd.DataFrame: - """ - Check that columns are the required ones and if not filter data depending - on `filter_data_mode`. - - :param df: data to check columns for - :param required_columns: columns to return, skipping columns that are not required - :param filter_data_mode: control behaviour with respect to extra or missing columns - - "assert": raise an error if required columns do not match received columns - - "warn_and_trim": return the intersection of required and received columns and - issue a warning - :return: input data as it is if required columns match received columns otherwise - processed data, see `filter_data_mode` - """ - received_columns = df.columns.to_list() - hdbg.dassert_lte(1, len(received_columns)) - # - if filter_data_mode == "assert": - # Raise an assertion. - only_warning = False - elif filter_data_mode == "warn_and_trim": - # Just issue a warning. - only_warning = True - # Get columns intersection while preserving the order of the columns. - columns_intersection = [ - col_name - for col_name in required_columns - if col_name in received_columns - ] - hdbg.dassert_lte(1, len(columns_intersection)) - df = df[columns_intersection] - else: - raise ValueError(f"Invalid filter_data_mode='{filter_data_mode}'") - hdbg.dassert_set_eq( - required_columns, - received_columns, - only_warning=only_warning, - msg="Received columns do not match required columns.", - ) - return df - - -def compare_dataframe_rows(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: - """ - Compare contents of rows with same indices. - - Index is set to default sequential integer values because compare is - sensitive to multi index (probably because new multi indexes are created - for each difference in `compare`). Multi index columns are regular columns now. - Excess columns are removed so both dataframes are always same shape because - `compare` expects identical dataframes (same number of rows, columns, etc.). - - :param df1: first dataframe for comparison - :param df2: second dataframe for comparison - :return: dataframe with data with same indices and different contents - """ - # Get rows on which the two dataframe indices match. - idx_intersection = df1.index.intersection(df2.index) - # Remove excess columns and reset indexes. - trimmed_second = df2.loc[idx_intersection].reset_index() - trimmed_first = df1.loc[idx_intersection].reset_index() - # Get difference between second and first dataframe. - data_difference = trimmed_second.compare(trimmed_first) - # Update data difference with original dataframe index names - # for easier identification. - index_names = tuple(df2.index.names) - # If index or multi index is named, it will be visible in data difference. - if index_names != (None,): - for index in data_difference.index: - for column in index_names: - data_difference.loc[index, column] = trimmed_second.loc[index][ - column - ] - data_difference = data_difference.convert_dtypes() - return data_difference - - -def drop_duplicates( - data: Union[pd.Series, pd.DataFrame], - use_index: bool, - column_subset: Optional[List[str]] = None, - *args: Any, - **kwargs: Any, -) -> Union[pd.Series, pd.DataFrame]: - """ - Wrap `pandas.drop_duplicates()`. - - See the official docs: - - https://pandas.pydata.org/docs/reference/api/pandas.Series.drop_duplicates.html - - https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html - - :param use_index: - - if `True`, use index values together with a column subset for - identifying duplicates - - if `False`, duplicated rows are with the exact same values in a subset - and different indices - :param column_subset: a list of columns to consider for identifying duplicates - :return: data without duplicates - """ - _LOG.debug(hprint.to_str("use_index column_subset args kwargs")) - num_rows_before = data.shape[0] - # Get all columns list for subset if no subset is passed. - if column_subset is None: - column_subset = data.columns.tolist() - else: - hdbg.dassert_lte(1, len(column_subset), "Columns subset cannot be empty") - if use_index: - # Add dummy index column to use it for duplicates detection. - index_col_name = "use_index_col" - hdbg.dassert_not_in(index_col_name, data.columns.tolist()) - column_subset.insert(0, index_col_name) - data[index_col_name] = data.index - # - data_no_dups = data.drop_duplicates(subset=column_subset, *args, **kwargs) - # - if use_index: - # Remove dummy index column. - data_no_dups = data_no_dups.drop([index_col_name], axis=1) - # Report the change. - num_rows_after = data_no_dups.shape[0] - if num_rows_before != num_rows_after: - _LOG.debug( - "Removed %s rows", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - return data_no_dups - - -def dropna( - df: pd.DataFrame, - *args: Any, - drop_infs: bool = False, - report_stats: bool = False, - **kwargs: Any, -) -> pd.DataFrame: - """ - Create a wrapper around pd.dropna() reporting information about the removed - rows. - - :param df: dataframe to process - :param drop_infs: if +/- np.inf should be considered as nans - :param report_stats: if processing stats should be reported - :return: dataframe with nans dropped - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - num_rows_before = df.shape[0] - if drop_infs: - df = df.replace([np.inf, -np.inf], np.nan) - df = df.dropna(*args, **kwargs) - if report_stats: - num_rows_after = df.shape[0] - pct_removed = hprint.perc( - num_rows_before - num_rows_after, num_rows_before - ) - _LOG.info("removed rows with nans: %s", pct_removed) - return df - - -def drop_axis_with_all_nans( - df: pd.DataFrame, - drop_rows: bool = True, - drop_columns: bool = False, - drop_infs: bool = False, - report_stats: bool = False, -) -> pd.DataFrame: - """ - Remove columns and rows not containing information (e.g., with only nans). - - The operation is not performed in place and the resulting df is - returned. Assume that the index is timestamps. - - :param df: dataframe to process - :param drop_rows: remove rows with only nans - :param drop_columns: remove columns with only nans - :param drop_infs: remove also +/- np.inf - :param report_stats: report the stats of the operations - :return: dataframe with specific nan axis dropped - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - if drop_infs: - df = df.replace([np.inf, -np.inf], np.nan) - if drop_columns: - # Remove columns with all nans, if any. - cols_before = df.columns[:] - df = df.dropna(axis=1, how="all") - if report_stats: - # Report results. - cols_after = df.columns[:] - removed_cols = set(cols_before).difference(set(cols_after)) - pct_removed = hprint.perc( - len(cols_before) - len(cols_after), len(cols_after) - ) - _LOG.info( - "removed cols with all nans: %s %s", - pct_removed, - hprint.list_to_str(removed_cols), - ) - if drop_rows: - # Remove rows with all nans, if any. - rows_before = df.index[:] - df = df.dropna(axis=0, how="all") - if report_stats: - # Report results. - rows_after = df.index[:] - removed_rows = set(rows_before).difference(set(rows_after)) - if len(rows_before) == len(rows_after): - # Nothing was removed. - min_ts = max_ts = None - else: - # TODO(gp): Report as intervals of dates. - min_ts = min(removed_rows) - max_ts = max(removed_rows) - pct_removed = hprint.perc( - len(rows_before) - len(rows_after), len(rows_after) - ) - _LOG.info( - "removed rows with all nans: %s [%s, %s]", - pct_removed, - min_ts, - max_ts, - ) - return df - - -def reindex_on_unix_epoch( - df: pd.DataFrame, in_col_name: str, unit: str = "s" -) -> pd.DataFrame: - """ - Transform the column `in_col_name` into a datetime index. `in_col_name` - contains Unix epoch (e.g., 1638194400) and it is converted into a UTC time. - - :param df: dataframe with a unix epoch - :param in_col_name: column containing unix epoch - :param unit: the unit of unix epoch - """ - # Convert. - temp_col_name = in_col_name + "_tmp" - hdbg.dassert_in(in_col_name, df.columns) - hdbg.dassert_not_in(temp_col_name, df.columns) - # Save. - df[temp_col_name] = pd.to_datetime(df[in_col_name], unit=unit, utc=True) - df.set_index(temp_col_name, inplace=True, drop=True) - df.index.name = None - return df - - -def get_df_signature(df: pd.DataFrame, num_rows: int = 6) -> str: - """ - Compute a simple signature of a dataframe in string format. - - The signature contains metadata about dataframe size and certain - amount of rows from start and end of a dataframe. It is used for - testing purposes. - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - text: List[str] = [f"df.shape={str(df.shape)}"] - with pd.option_context( - "display.max_colwidth", int(1e6), "display.max_columns", None - ): - # If dataframe size exceeds number of rows, show only subset in form of - # first and last rows. Otherwise, whole dataframe is shown. - if len(df) > num_rows: - text.append(f"df.head=\n{df.head(num_rows // 2)}") - text.append(f"df.tail=\n{df.tail(num_rows // 2)}") - else: - text.append(f"df.full=\n{df}") - text: str = "\n".join(text) - return text - - -# ############################################################################# - - -def trim_df( - df: pd.DataFrame, - ts_col_name: Optional[str], - start_ts: Optional[pd.Timestamp], - end_ts: Optional[pd.Timestamp], - left_close: bool, - right_close: bool, -) -> pd.DataFrame: - """ - Trim the dataframe using values in `ts_col_name`. - - The dataframe is trimmed in the interval bounded by `start_ts` and `end_ts`. - - :param df: the dataframe to trim - :param ts_col_name: the name of the column; `None` means index - :param start_ts: the start boundary for trimming - :param end_ts: the end boundary for trimming - :param left_close: whether to include the start boundary of the interval - - True: [start_ts, ... - - False: (start_ts, ... - :param right_close: whether to include the end boundary of the interval - - True: ..., end_ts] - - False: ..., end_ts) - :return: the trimmed dataframe - """ - if _TRACE: - _LOG.trace( - df_to_str(df, print_dtypes=True, print_shape_info=True, tag="df") - ) - _LOG.debug( - hprint.to_str("ts_col_name start_ts end_ts left_close right_close") - ) - if _TRACE: - _LOG.trace("df=\n%s", df_to_str(df)) - if df.empty: - # If the df is empty, there is nothing to trim. - return df - if start_ts is None and end_ts is None: - # If no boundaries are specified, there are no points of reference to trim - # to. - return df - num_rows_before = df.shape[0] - if start_ts is not None and end_ts is not None: - # Confirm that the interval boundaries are valid. - hdateti.dassert_tz_compatible(start_ts, end_ts) - hdbg.dassert_lte(start_ts, end_ts) - # Get the values to filter by. - if ts_col_name is None: - values_to_filter_by = pd.Series(df.index, index=df.index) - else: - hdbg.dassert_in(ts_col_name, df.columns) - values_to_filter_by = df[ts_col_name] - if values_to_filter_by.is_monotonic_increasing: - _LOG.trace("df is monotonic") - # The values are sorted; using the `pd.Series.searchsorted()` method. - # Find the index corresponding to the left boundary of the interval. - if start_ts is not None: - side = "left" if left_close else "right" - left_idx = values_to_filter_by.searchsorted(start_ts, side) - else: - # There is nothing to filter, so the left index is the first one. - left_idx = 0 - _LOG.debug(hprint.to_str("start_ts left_idx")) - # Find the index corresponding to the right boundary of the interval. - if end_ts is not None: - side = "right" if right_close else "left" - right_idx = values_to_filter_by.searchsorted(end_ts, side) - else: - # There is nothing to filter, so the right index is None. - right_idx = df.shape[0] - _LOG.debug(hprint.to_str("end_ts right_idx")) - # - hdbg.dassert_lte(0, left_idx) - hdbg.dassert_lte(left_idx, right_idx) - hdbg.dassert_lte(right_idx, df.shape[0]) - _LOG.debug(hprint.to_str("start_ts left_idx")) - if right_idx < df.shape[0]: - _LOG.debug(hprint.to_str("end_ts right_idx")) - df = df.iloc[left_idx:right_idx] - else: - _LOG.trace("df is not monotonic") - # The values are not sorted; using the `pd.Series.between` method. - if left_close and right_close: - inclusive = "both" - elif left_close: - inclusive = "left" - elif right_close: - inclusive = "right" - else: - inclusive = "neither" - epsilon = pd.DateOffset(minutes=1) - if start_ts is None: - start_ts = values_to_filter_by.min() - epsilon - if end_ts is None: - end_ts = values_to_filter_by.max() + epsilon - df = df[ - values_to_filter_by.between(start_ts, end_ts, inclusive=inclusive) - ] - # Report the changes. - num_rows_after = df.shape[0] - if num_rows_before != num_rows_after: - _LOG.debug( - "Removed %s rows", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - return df - - -# TODO(Nina): Add `filter_data_mode`. -def merge_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - threshold_col_name: str, - *, - threshold: float = 0.9, - intersecting_columns: Optional[List[str]] = None, - **pd_merge_kwargs: Any, -) -> pd.DataFrame: - """ - Wrap `pd.merge`. - - :param threshold_col_name: a column's name to check the minimum - overlap on - :param threshold: minimum overlap of unique values in a specified - column to perform the merge - :param intersecting_columns: allow certain columns to appear in both - dataframes; store both in the resulting df with corresponding - suffixes - """ - _LOG.debug( - hprint.to_str( - "threshold_col_name threshold intersecting_columns pd_merge_kwargs" - ) - ) - # Sanity check column types. - threshold_col1 = df1[threshold_col_name] - threshold_col2 = df2[threshold_col_name] - only_first_elem = False - hdbg.dassert_array_has_same_type_element( - threshold_col1, threshold_col2, only_first_elem - ) - # TODO(Grisha): @Dan Implement asserts for each asset id. - # Check that an overlap of unique values is above the specified threshold. - threshold_unique_values1 = set(threshold_col1) - threshold_unique_values2 = set(threshold_col2) - threshold_common_values = set(threshold_unique_values1) & set( - threshold_unique_values2 - ) - threshold_common_values_share1 = len(threshold_common_values) / len( - threshold_unique_values1 - ) - threshold_common_values_share2 = len(threshold_common_values) / len( - threshold_unique_values2 - ) - hdbg.dassert_lte(threshold, threshold_common_values_share1) - hdbg.dassert_lte(threshold, threshold_common_values_share2) - # Use an empty set instead of None to perform set difference further. - intersecting_columns_set = ( - set() if intersecting_columns is None else set(intersecting_columns) - ) - # Check that there are no common columns except for the ones in `intersecting_columns`. - df1_cols = ( - set(df1.columns.to_list()) - - set(pd_merge_kwargs["on"]) - - intersecting_columns_set - ) - df2_cols = ( - set(df2.columns.to_list()) - - set(pd_merge_kwargs["on"]) - - intersecting_columns_set - ) - hdbg.dassert_not_intersection(df1_cols, df2_cols) - # - res_df = df1.merge(df2, **pd_merge_kwargs) - return res_df - - -# TODO(gp): Is this (ironically) a duplicate of drop_duplicates? -def drop_duplicated( - df: pd.DataFrame, *, subset: Optional[List[str]] = None -) -> pd.DataFrame: - """ - Implement `df.duplicated` but considering also the index and ignoring nans. - """ - _LOG.debug("before df=\n%s", df_to_str(df)) - # Move the index to the df. - old_index_name = df.index.name - new_index_name = "_index.tmp" - hdbg.dassert_not_in(new_index_name, df.columns) - df.index.name = new_index_name - df.reset_index(drop=False, inplace=True) - # Remove duplicates by ignoring nans. - if subset is not None: - hdbg.dassert_isinstance(subset, list) - subset = [new_index_name] + subset - duplicated = df.fillna(0.0).duplicated(subset=subset, keep="first") - # Report the result of the operation. - if duplicated.sum() > 0: - num_rows_before = df.shape[0] - _LOG.debug("Removing duplicates df=\n%s", df_to_str(df.loc[duplicated])) - df = df.loc[~duplicated] - num_rows_after = df.shape[0] - _LOG.warning( - "Removed repeated rows num_rows=%s", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - _LOG.debug("after removing duplicates df=\n%s", df_to_str(df)) - # Set the index back. - df.set_index(new_index_name, inplace=True) - df.index.name = old_index_name - _LOG.debug("after df=\n%s", df_to_str(df)) - return df - - -# ############################################################################# - - -def infer_column_types(col: pd.Series): - """ - Determine which data type is most prevalent in a column. - - Examine the values in the given pandas Series and decides whether the - majority of entries are strings, numeric values, or booleans. - - :param col: The column to inspect. - :return: One of `"is_string"`, `"is_numeric"`, or `"is_bool"`, representing - the predominant type. - """ - vals = { - "is_numeric": pd.to_numeric(col, errors="coerce").notna(), - #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), - "is_bool": col.map(lambda x: isinstance(x, bool)), - "is_string": col.map(lambda x: isinstance(x, str)), - } - vals = {k: float(v.mean()) for k, v in vals.items()} - # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", - # (vals["is_numeric"] >= vals["is_string"], "is_numeric", - # "is_string")) - if vals["is_bool"] >= vals["is_numeric"] and (vals["is_bool"] != 0): - type_ = "is_bool" - elif vals["is_numeric"] >= vals["is_string"] and (vals["is_numeric"] != 0): - type_ = "is_numeric" - else: - type_ = "is_string" - vals["type"] = type_ - return vals - - -def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: - """ - Identify the predominant data type for each column in a DataFrame. - - :param df: The DataFrame whose columns will be analyzed. - :return: A DataFrame with two columns: - - `column`: the name of each original column. - - `predominant_type`: the most frequent type in that column, - one of `"string"`, `"numeric"`, or `"bool"`. - """ - return df.apply(lambda x: pd.Series(infer_column_types(x))).T - - -def convert_to_type(col: pd.Series, type_: str) -> pd.Series: - """ - Convert a pandas Series to a specified data type. - - :param col: The input column to be converted. - :param type_: The target data type. Expected values include: - - `"is_bool"`: convert values to booleans. - - `"is_int"`: convert values to integers. - - `"is_numeric"`: convert values to float. - - `"is_string"`: convert values to strings. - :return: A new Series with the same index as `col`, cast to the requested - type. - """ - if type_ == "is_bool": - return col.map( - lambda x: ( - True - if x in ["True", 1, "1", "true", True] - else False - if x in [0, "0", "False", False, "false"] - else None - ) - ) - elif type_ == "is_int": - return pd.to_numeric(col, errors="coerce", downcast="integer") - elif type_ == "is_numeric": - return pd.to_numeric(col, errors="coerce") - elif type_ == "is_string": - return col.astype(str) - else: - raise ValueError(f"Unknown column type: {type_}") - - -def convert_col_to_int( - df: pd.DataFrame, - col: str, -) -> pd.DataFrame: - """ - Convert a column to an integer column. - - Example use case: Parquet uses categoricals. If supplied with a - categorical-type column, this function will convert it to an integer - column. - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(col, str) - hdbg.dassert_in(col, df.columns) - # Attempt the conversion. - df[col] = df[col].astype("int64") - # Trust, but verify. - dassert_series_type_is(df[col], np.int64) - return df - - -def cast_series_to_type( - series: pd.Series, series_type: Optional[type] -) -> pd.Series: - """ - Convert a Pandas series to a given type. - - :param series: the input series - :param series_type: the type to convert the series into - - if None, then the series values are turned into Nones - :return: the series in the required type - """ - if series_type is None: - # Turn the series values into None. - series[:] = None - elif series_type is pd.Timestamp: - # Convert to timestamp. - series = pd.to_datetime(series) - elif series_type is dict: - # Convert to dict. - series = series.apply(eval) - else: - # Convert to the specified type. - series = series.astype(series_type) - return series - - -def _display(log_level: int, df: pd.DataFrame) -> None: - """ - Display a df in a notebook at the given log level. - - The behavior is similar to a command like `_LOG.log(log_level, ...)` but - for a notebook `display` command. - - :param log_level: log level at which to display a df. E.g., if `log_level = - logging.DEBUG`, then we display the df only if we are running with - `-v DEBUG`. If `log_level = logging.INFO` then we don't display it - """ - from IPython.display import display - - if ( - hsystem.is_running_in_ipynb() - and log_level >= hdbg.get_logger_verbosity() - ): - display(df) - - -def _df_to_str( - df: pd.DataFrame, - num_rows: Optional[int], - max_columns: int, - max_colwidth: int, - max_rows: int, - precision: int, - display_width: int, - use_tabulate: bool, - log_level: int, -) -> str: - is_in_ipynb = hsystem.is_running_in_ipynb() - out = [] - # Set dataframe print options. - with pd.option_context( - "display.max_colwidth", - max_colwidth, - # "display.height", 1000, - "display.max_rows", - max_rows, - "display.precision", - precision, - "display.max_columns", - max_columns, - "display.width", - display_width, - ): - if use_tabulate: - import tabulate - - out.append(tabulate.tabulate(df, headers="keys", tablefmt="psql")) - # TODO(Grisha): Add an option to display all rows since if `num_rows` - # is `None`, only first and last 5 rows are displayed. Consider using - # `df.to_string()` instead of `str(df)`. - if num_rows is None or df.shape[0] <= num_rows: - # Print the entire data frame. - if not is_in_ipynb: - out.append(str(df)) - else: - # Display dataframe. - _display(log_level, df) - else: - nr = num_rows // 2 - if not is_in_ipynb: - # Print top and bottom of df. - out.append(str(df.head(nr))) - out.append("...") - tail_str = str(df.tail(nr)) - # Remove index and columns from tail_df. - skipped_rows = 1 - if df.index.name: - skipped_rows += 1 - tail_str = "\n".join(tail_str.split("\n")[skipped_rows:]) - out.append(tail_str) - else: - # TODO(gp): @all use this approach also above and update all the - # unit tests. - df = [ - df.head(nr), - pd.DataFrame( - [["..."] * df.shape[1]], index=[" "], columns=df.columns - ), - df.tail(nr), - ] - df = pd.concat(df) - # Display dataframe. - _display(log_level, df) - if not is_in_ipynb: - txt = "\n".join(out) - else: - txt = "" - return txt - - -# TODO(gp): Maybe we can have a `_LOG_df_to_str(log_level, *args, **kwargs)` that -# calls `_LOG.log(log_level, hpandas.df_to_str(*args, **kwargs, log_level=log_level))`. -# TODO(gp): We should make sure this works properly in a notebook, although -# it's not easy to unit test. -def df_to_str( - df: Union[pd.DataFrame, pd.Series, pd.Index], - *, - # TODO(gp): Remove this hack in the integration. - # handle_signed_zeros: bool = False, - handle_signed_zeros: bool = True, - num_rows: Optional[int] = 6, - print_dtypes: bool = False, - print_shape_info: bool = False, - print_nan_info: bool = False, - print_memory_usage: bool = False, - memory_usage_mode: str = "human_readable", - tag: Optional[str] = None, - max_columns: int = 10000, - max_colwidth: int = 2000, - max_rows: int = 500, - precision: int = 6, - display_width: int = 10000, - use_tabulate: bool = False, - log_level: int = logging.DEBUG, -) -> str: - """ - Print a dataframe to string reporting all the columns without trimming. - - Note that code like: `_LOG.info(hpandas.df_to_str(df, num_rows=3))` works - properly when called from outside a notebook, i.e., the dataframe is printed - But it won't display the dataframe in a notebook, since the default level at - which the dataframe is displayed is `logging.DEBUG`. - - In this case to get the correct behavior one should do: - - ``` - log_level = ... - _LOG.log(log_level, hpandas.df_to_str(df, num_rows=3, log_level=log_level)) - ``` - - :param: handle_signed_zeros: convert `-0.0` to `0.0` - :param: num_rows: max number of rows to print (half from the top and half from - the bottom of the dataframe) - - `None` to print the entire dataframe - :param print_dtypes: report dataframe types and information about the type of - each column by looking at the first value - :param print_shape_info: report dataframe shape, index and columns - :param print_memory_usage: report memory use for each - """ - if df is None: - return "" - if isinstance(df, pd.Series): - df = pd.DataFrame(df) - elif isinstance(df, pd.Index): - df = df.to_frame(index=False) - hdbg.dassert_isinstance(df, pd.DataFrame) - # For some reason there are so-called "negative zeros", but we consider - # them equal to `0.0`. - df = df.copy() - if handle_signed_zeros: - for col_name in df.select_dtypes(include=[np.float64, float]).columns: - df[col_name] = df[col_name].where(df[col_name] != -0.0, 0.0) - out = [] - # Print the tag. - if tag is not None: - out.append(f"# {tag}=") - if not df.empty: - # Print information about the shape and index. - # TODO(Nikola): Revisit and rename print_shape_info to print_axes_info - if print_shape_info: - # TODO(gp): Unfortunately we can't improve this part of the output - # since there are many golden inside the code that would need to be - # updated. Consider automating updating the expected values in the code. - txt = f"index=[{df.index.min()}, {df.index.max()}]" - out.append(txt) - txt = f"columns={','.join(map(str, df.columns))}" - out.append(txt) - txt = f"shape={str(df.shape)}" - out.append(txt) - # Print information about the types. - if print_dtypes: - out.append("* type=") - - table = [] - - def _report_srs_stats(srs: pd.Series) -> List[Any]: - """ - Report dtype, the first element, and its type of series. - """ - row: List[Any] = [] - first_elem = srs.values[0] - num_unique = srs.nunique() - num_nans = srs.isna().sum() - row.extend( - [ - srs.dtype, - hprint.perc(num_unique, len(srs)), - hprint.perc(num_nans, len(srs)), - first_elem, - type(first_elem), - ] - ) - return row - - row = [] - col_name = "index" - row.append(col_name) - row.extend(_report_srs_stats(df.index)) - row = map(str, row) - table.append(row) - for col_name in df.columns: - row_: List[Any] = [] - row_.append(col_name) - row_.extend(_report_srs_stats(df[col_name])) - row_ = map(str, row_) - table.append(row_) - # - columns = [ - "col_name", - "dtype", - "num_unique", - "num_nans", - "first_elem", - "type(first_elem)", - ] - df_stats = pd.DataFrame(table, columns=columns) - stats_num_rows = None - df_stats_as_str = _df_to_str( - df_stats, - stats_num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - out.append(df_stats_as_str) - # Print info about memory usage. - if print_memory_usage: - out.append("* memory=") - mem_use_df = pd.concat( - [df.memory_usage(deep=False), df.memory_usage(deep=True)], - axis=1, - keys=["shallow", "deep"], - ) - # Add total row. - mem_use_df_total = pd.DataFrame({"total": mem_use_df.sum(axis=0)}) - mem_use_df = pd.concat([mem_use_df, mem_use_df_total.T]) - # Convert into the desired format. - if memory_usage_mode == "bytes": - pass - elif memory_usage_mode == "human_readable": - import helpers.hintrospection as hintros - - mem_use_df = mem_use_df.applymap(hintros.format_size) - else: - raise ValueError( - f"Invalid memory_usage_mode='{memory_usage_mode}'" - ) - memory_num_rows = None - memory_usage_as_txt = _df_to_str( - mem_use_df, - memory_num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - out.append(memory_usage_as_txt) - # Print info about nans. - if print_nan_info: - num_elems = df.shape[0] * df.shape[1] - num_nans = df.isna().sum().sum() - txt = f"num_nans={hprint.perc(num_nans, num_elems)}" - out.append(txt) - # - num_zeros = df.isnull().sum().sum() - txt = f"num_zeros={hprint.perc(num_zeros, num_elems)}" - out.append(txt) - # TODO(gp): np can't do isinf on objects like strings. - # num_infinite = np.isinf(df).sum().sum() - # txt = "num_infinite=" + hprint.perc(num_infinite, num_elems) - # out.append(txt) - # - num_nan_rows = df.dropna().shape[0] - txt = f"num_nan_rows={hprint.perc(num_nan_rows, num_elems)}" - out.append(txt) - # - num_nan_cols = df.dropna(axis=1).shape[1] - txt = f"num_nan_cols={hprint.perc(num_nan_cols, num_elems)}" - out.append(txt) - if hsystem.is_running_in_ipynb(): - if len(out) > 0 and log_level >= hdbg.get_logger_verbosity(): - print("\n".join(out)) - txt = None - # Print the df. - df_as_str = _df_to_str( - df, - num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - if not hsystem.is_running_in_ipynb(): - out.append(df_as_str) - txt = "\n".join(out) - return txt - - -def _assemble_df_rows(rows_values: RowsValues) -> RowsValues: - """ - Organize dataframe values into a column-row structure. - - - Indentation artifacts are removed - - The index placement is handled, i.e. - - if the index is named, the name is located and moved to the same - row as the column names - - if the index is not named, the row with the column names receives - a placeholder empty value in its place - - Empty columns are dropped - - :param rows_values: row values extracted from a string df representation - :return: row values assembled into a valid column-row structure - """ - # Clean up indentation artifacts. - if all(row[0] == "" for row in rows_values): - # Remove the first empty cell in each row. - for row in rows_values: - del row[0] - # If the index is named, its name is located in the second row, - # with an optional extra empty value cell value next to it. - if len(rows_values[1]) == 1 or ( - len(rows_values[1]) == 2 and rows_values[1][1] == "" - ): - # Move the index name to the row with all the column names. - if rows_values[0][0] == "": - rows_values[0][0] = rows_values[1][0] - else: - rows_values[0].insert(0, rows_values[1][0]) - # Drop the former index name row. - del rows_values[1] - else: - # Add an empty cell for the absent index name. - rows_values[0].insert(0, "") - # Identify and remove empty columns. - min_len_row = min(len(row) for row in rows_values) - idxs_to_delete = [] - for i in range(min_len_row): - if all(row[i] == "" for row in rows_values): - idxs_to_delete.append(i) - for idx in idxs_to_delete: - for row in rows_values: - del row[idx] - # Confirm that all the rows have the same number of values. - hdbg.dassert_eq(len({len(row) for row in rows_values}), 1) - return rows_values - - -def str_to_df( - df_as_str: str, - col_to_type: Dict[str, Optional[type]], - col_to_name_type: Dict[str, type], -) -> pd.DataFrame: - """ - Convert a string representation of a dataframe into a Pandas df. - - :param df_as_str: a df as a string - - the format of the string is the same as the output of - `hpandas.df_to_str()` on a pd.DataFrame, e.g. - ``` - col1 col2 col3 col4 - 0 0.1 a None 2020-01-01 - 1 0.2 "b c" None 2021-05-05 - ``` - - values (including column names) that contain spaces need - to be enclosed in double quotation marks, e.g. - "2023-03-15 16:35:41.205000+00:00" - :param col_to_type: a mapping between the column names and the - types of the values in these columns - - if a column is not present in the mapping, its values will - remain strings - - to indicate the type of index values, use {"__index__": ...} - mapping, e.g. {"__index__": pd.Timestamp} - :param col_to_name_type: a mapping between the column names and - the required types of these column names - - same conventions apply as for `col_to_type` (see above) - :return: a converted Pandas dataframe - """ - # Separate the rows. - rows = df_as_str.split("\n") - # Clean up extra spaces. - rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] - # Identify individual values in the rows. - rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) - # Remove the placeholder ["..."] row. - rows_values = [row for row in rows_values if row != ["..."]] - # Organize values into a proper column-row structure. - rows_values = _assemble_df_rows(rows_values) - # Get the column names. - column_names = rows_values[0][1:] - # Get the index. - index_values = [row[0] for row in rows_values[1:]] - index_name = rows_values[0][0] - # Construct the df. - df = pd.DataFrame( - [row[1:] for row in rows_values[1:]], - columns=column_names, - index=index_values, - ) - if index_name != "": - df.index.name = index_name - # Cast the columns into appropriate types. - for col, col_type in col_to_type.items(): - if col == "__index__": - df.index = cast_series_to_type(df.index, col_type) - else: - df[col] = cast_series_to_type(df[col], col_type) - # Cast the column names into appropriate types. - for col, col_name_type in col_to_name_type.items(): - if col == "__index__": - df.index = df.index.rename(col_name_type(df.index.name)) - else: - df = df.rename(columns={col: col_name_type(col)}) - return df - - -def convert_df_to_json_string( - df: pd.DataFrame, - n_head: Optional[int] = 10, - n_tail: Optional[int] = 10, - columns_order: Optional[List[str]] = None, -) -> str: - """ - Convert dataframe to pretty-printed JSON string. - - To select all rows of the dataframe, pass `n_head` as None. - - :param df: dataframe to convert - :param n_head: number of printed top rows - :param n_tail: number of printed bottom rows - :param columns_order: order for the KG columns sort - :return: dataframe converted to JSON string - """ - # Append shape of the initial dataframe. - shape = f"original shape={df.shape}" - # Reorder columns. - if columns_order is not None: - hdbg.dassert_set_eq(columns_order, df.cols) - df = df[columns_order] - # Select head. - if n_head is not None: - head_df = df.head(n_head) - else: - # If no n_head provided, append entire dataframe. - head_df = df - # Transform head to json. - head_json = head_df.to_json( - orient="index", - force_ascii=False, - indent=4, - default_handler=str, - date_format="iso", - date_unit="s", - ) - if n_tail is not None: - # Transform tail to json. - tail = df.tail(n_tail) - tail_json = tail.to_json( - orient="index", - force_ascii=False, - indent=4, - default_handler=str, - date_format="iso", - date_unit="s", - ) - else: - # If no tail specified, append an empty string. - tail_json = "" - # Join shape and dataframe to single string. - output_str = "\n".join([shape, "Head:", head_json, "Tail:", tail_json]) - return output_str - - -def convert_df( - df: pd.DataFrame, *, print_invalid_values: bool = False -) -> pd.DataFrame: - """ - Convert each DataFrame column to its predominant type. - - This function inspects every column in `df`, determines whether the - majority of its values are boolean, numeric, or string, and then - casts the column to that type using `convert_to_type`. - - :param df: The input DataFrame whose columns will be converted. - :param print_invalid_values: If True, print any original values that could - not be converted (they become NaN after conversion) - :return: a new DataFrame with each column cast to its detected predominant - type. - """ - df_out = pd.DataFrame(index=df.index) - for col in df.columns: - series = df[col] - # Determine the dominant datatype. - col_type = infer_column_types(series)["type"] - hdbg.dassert_in(col_type, ("is_bool", "is_numeric", "is_string")) - # Convert the column to dominant datatype. - converted = convert_to_type(series, col_type) - if print_invalid_values: - invalid_mask = series.notna() & converted.isna() - if invalid_mask.any(): - invalid = series[invalid_mask].tolist() - print(f"Column {col} dropped invalid values: {invalid}") - df_out[col] = converted - return df_out - - -# ############################################################################# - - -def read_csv_to_df( - stream: Union[str, S3File, S3FileSystem], - *args: Any, - **kwargs: Any, -) -> pd.DataFrame: - """ - Read a CSV file into a `pd.DataFrame`. - """ - # Gets filename from stream if it is not already a string, - # so it can be inspected for extension type. - file_name = stream if isinstance(stream, str) else vars(stream)["path"] - # Handle zipped files. - if any(file_name.endswith(ext) for ext in (".gzip", ".gz", ".tgz")): - hdbg.dassert_not_in("compression", kwargs) - kwargs["compression"] = "gzip" - elif file_name.endswith(".zip"): - hdbg.dassert_not_in("compression", kwargs) - kwargs["compression"] = "zip" - # Read. - _LOG.debug(hprint.to_str("args kwargs")) - df = pd.read_csv(stream, *args, **kwargs) - return df - - -def read_parquet_to_df( - stream: Union[str, S3File, S3FileSystem], - *args: Any, - **kwargs: Any, -) -> pd.DataFrame: - """ - Read a Parquet file into a `pd.DataFrame`. - """ - # Read. - _LOG.debug(hprint.to_str("args kwargs")) - df = pd.read_parquet(stream, *args, **kwargs) - return df - - -# ############################################################################# - - -# TODO(Paul): Add unit tests. -def compute_weighted_sum( - dfs: Dict[str, pd.DataFrame], - weights: pd.DataFrame, - *, - index_mode: str = "assert_equal", -) -> Dict[str, pd.DataFrame]: - """ - Compute weighted sums of `dfs` using `weights`. - - :param dfs: dataframes keyed by id; all dfs should have the same cols, - indices are handled based on the `index_mode` - :param weights: float weights indexed by id with unique col names - :param index_mode: same as `mode` in `apply_index_mode()` - :return: weighted sums keyed by weight col names - """ - hdbg.dassert_isinstance(dfs, dict) - hdbg.dassert(dfs, "dictionary of dfs must be nonempty") - # Get a dataframe from the dictionary and record its index and columns. - id_ = list(dfs)[0] - hdbg.dassert_isinstance(id_, str) - df = dfs[id_] - hdbg.dassert_isinstance(df, pd.DataFrame) - cols = df.columns - # Sanity-check dataframes in dictionary. - for key, value in dfs.items(): - hdbg.dassert_isinstance(key, str) - hdbg.dassert_isinstance(value, pd.DataFrame) - # The reference df is not modified. - _, value = apply_index_mode(df, value, index_mode) - hdbg.dassert( - value.columns.equals(cols), - "Column equality fails for keys=%s, %s", - id_, - key, - ) - # Sanity-check weights. - hdbg.dassert_isinstance(weights, pd.DataFrame) - hdbg.dassert_eq(weights.columns.nlevels, 1) - hdbg.dassert(not weights.columns.has_duplicates) - hdbg.dassert_set_eq(weights.index.to_list(), list(dfs)) - # Create a multiindexed dataframe to facilitate computing the weighted sums. - weighted_dfs = {} - combined_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys()) - # TODO(Paul): Consider relaxing the NaN-handling. - for col in weights.columns: - weighted_combined_df = combined_df.multiply(weights[col], level=0) - weighted_sums = weighted_combined_df.groupby(axis=1, level=1).sum( - min_count=len(dfs) - ) - weighted_dfs[col] = weighted_sums - return weighted_dfs - - -def subset_df(df: pd.DataFrame, nrows: int, seed: int = 42) -> pd.DataFrame: - """ - Remove N rows from the input data and shuffle the remaining ones. - - :param df: input data - :param nrows: the number of rows to remove from the original data - :param seed: see `random.seed()` - :return: shuffled data with removed rows - """ - hdbg.dassert_lte(1, nrows) - hdbg.dassert_lte(nrows, df.shape[0]) - idx = list(range(df.shape[0])) - random.seed(seed) - random.shuffle(idx) - idx = sorted(idx[nrows:]) - return df.iloc[idx] - - -def remap_obj( - obj: Union[pd.Series, pd.Index], - map_: Dict[Any, Any], - **kwargs: Any, -) -> pd.Series: - """ - Substitute each value of an object with another value from a dictionary. - - :param obj: an object to substitute value in - :param map_: values to substitute with - :return: remapped pandas series - """ - hdbg.dassert_lte(1, obj.shape[0]) - # TODO(Grisha): consider extending for other mapping types supported by - # `pd.Series.map`. - hdbg.dassert_isinstance(map_, dict) - # Check that every element of the object is in the mapping. - hdbg.dassert_is_subset(obj, map_.keys()) - new_srs = obj.map(map_, **kwargs) - return new_srs - - -def get_random_df( - num_cols: int, - seed: Optional[int] = None, - date_range_kwargs: Optional[Dict[str, Any]] = None, -) -> pd.DataFrame: - """ - Compute df with random data with `num_cols` columns and index obtained by - calling `pd.date_range(**kwargs)`. - - :param num_cols: the number of columns in a DataFrame to generate - :param seed: see `random.seed()` - :param date_range_kwargs: kwargs for `pd.date_range()` - """ - if seed: - np.random.seed(seed) - dt = pd.date_range(**date_range_kwargs) - df = pd.DataFrame(np.random.rand(len(dt), num_cols), index=dt) - return df - - -# ############################################################################# - -# TODO(gp): -> AxisNameSet -ColumnSet = Optional[Union[str, List[str]]] - - -# TODO(gp): -> _resolve_axis_names -def _resolve_column_names( - column_set: ColumnSet, - columns: Union[List[str], pd.Index], - *, - keep_order: bool = False, -) -> List[str]: - """ - Change format of the columns and perform some sanity checks. - - :param column_set: columns to proceed - :param columns: all columns available - :param keep_order: preserve the original order or allow sorting - """ - # Ensure that `columns` is well-formed. - if isinstance(columns, pd.Index): - columns = columns.to_list() - hdbg.dassert_isinstance(columns, list) - hdbg.dassert_lte(1, len(columns)) - # - if column_set is None: - # Columns were not specified, thus use the list of all the columns. - column_set = columns - else: - if isinstance(column_set, str): - column_set = [column_set] - hdbg.dassert_isinstance(column_set, list) - hdbg.dassert_lte(1, len(column_set)) - hdbg.dassert_is_subset(column_set, columns) - if keep_order: - # Keep the selected columns in the same order as in the original - # `columns`. - column_set = [c for c in columns if c in column_set] - return column_set - - -# TODO(Grisha): finish the function. -# TODO(Grisha): merge with the one in `dataflow.model.correlation.py`? -def remove_outliers( - df: pd.DataFrame, - lower_quantile: float, - *, - column_set: ColumnSet, - # TODO(Grisha): the params are not used. - fill_value: float = np.nan, - mode: str = "remove_outliers", - axis: Any = 0, - upper_quantile: Optional[float] = None, -) -> pd.DataFrame: - hdbg.dassert_eq(len(df.shape), 2, "Multi-index dfs not supported") - # - hdbg.dassert_lte(0.0, lower_quantile) - if upper_quantile is None: - upper_quantile = 1.0 - lower_quantile - hdbg.dassert_lte(lower_quantile, upper_quantile) - hdbg.dassert_lte(upper_quantile, 1.0) - # - df = df.copy() - if axis == 0: - all_columns = df.columns - columns = _resolve_column_names(column_set, all_columns) - hdbg.dassert_is_subset(columns, df.columns) - for column in all_columns: - if column in columns: - df[column] = df[column].quantile( - [lower_quantile, upper_quantile] - ) - elif axis == 1: - all_rows = df.rows - rows = _resolve_column_names(column_set, all_rows) - hdbg.dassert_is_subset(rows, df.rows) - for row in all_rows: - if row in rows: - df[row] = df[row].quantile([lower_quantile, upper_quantile]) - else: - raise ValueError(f"Invalid axis='{axis}'") - return df - - -# ############################################################################# - - -# TODO(Grisha): add assertions/logging. -def get_df_from_iterator( - iter_: Iterator[pd.DataFrame], - *, - sort_index: bool = True, -) -> pd.DataFrame: - """ - Concat all the dataframes in the iterator in one dataframe. - - :param iter_: dataframe iterator - :param sort_index: whether to sort output index or not - :return: combined iterator data - """ - # TODO(gp): @all make a copy of `iter_` so we don't consume it. - dfs = list(iter_) - df_res = pd.concat(dfs) - if sort_index: - df_res = df_res.sort_index() - return df_res - - -def heatmap_df(df: pd.DataFrame, *, axis: Any = None) -> pd.DataFrame: - """ - Colorize a df with a heatmap depending on the numeric values. - - :param axis: along which axis to compute the heatmap - - 0 colorize along rows - - 1 colorize along columns - - None: colorize everything - """ - # Keep it here to avoid long start up times. - import seaborn as sns - - cm = sns.diverging_palette(5, 250, as_cmap=True) - df = df.style.background_gradient(axis=axis, cmap=cm) - return df - - -def compare_nans_in_dataframes( - df1: pd.DataFrame, df2: pd.DataFrame -) -> pd.DataFrame: - """ - Compare equality of DataFrames in terms of NaNs. - - For example: - - `5 vs np.nan` is a mismatch - - `np.nan vs 5` is a mismatch - - `np.nan vs np.nan` is a match - - `np.nan vs np.inf` is a mismatch - - :param df1: dataframe to compare - :param df2: dataframe to compare with - :return: dataframe that shows the differences stacked side by side, see - `pandas.DataFrame.compare()` for an example - """ - dassert_axes_equal(df1, df2) - # Keep rows where df1's value is NaN and df2's value is not NaN and vice versa. - mask1 = df1.isna() & ~df2.isna() - mask2 = ~df1.isna() & df2.isna() - mask3 = mask1 | mask2 - # Compute a dataframe with the differences. - nan_diff_df = df1[mask3].compare(df2[mask3], result_names=("df1", "df2")) - return nan_diff_df - - -# TODO(Grisha): -> `compare_dataframes()`? -def compare_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - row_mode: str = "equal", - column_mode: str = "equal", - # TODO(Grisha): should be True by default? - compare_nans: bool = False, - diff_mode: str = "diff", - assert_diff_threshold: float = 1e-3, - close_to_zero_threshold: float = 1e-6, - zero_vs_zero_is_zero: bool = True, - remove_inf: bool = True, - log_level: int = logging.DEBUG, - only_warning: bool = True, -) -> pd.DataFrame: - """ - Compare two dataframes. - - This works for dataframes with and without multi-index. - - :param row_mode: control how the rows are handled - - "equal": rows need to be the same for the two dataframes - - "inner": compute the common rows for the two dataframes - :param column_mode: same as `row_mode` - :param compare_nans: include NaN comparison if True otherwise just - compare non-NaN values - :param diff_mode: control how the dataframes are compared in terms of - corresponding elements - - "diff": use the difference - - "pct_change": use the percentage difference - :param assert_diff_threshold: maximum allowed total difference - - do not assert if `None` - - works when `diff_mode` is "pct_change" - :param close_to_zero_threshold: round numbers below the threshold to 0 - :param zero_vs_zero_is_zero: replace the diff with 0 when comparing 0 to 0 - if True, otherwise keep the actual result - :param remove_inf: replace +-inf with `np.nan` - :param log_level: logging level - :param only_warning: when `True` the function issues a warning instead of aborting - :return: a singe dataframe with differences as values - """ - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - # Check value of `assert_diff_threshold`, if it was passed. - if assert_diff_threshold: - hdbg.dassert_lte(assert_diff_threshold, 1.0) - hdbg.dassert_lte(0.0, assert_diff_threshold) - # TODO(gp): Factor out this logic and use it for both compare_visually_dfs - # and - if row_mode == "equal": - dassert_indices_equal(df1, df2) - elif row_mode == "inner": - # TODO(gp): Add sorting on demand, otherwise keep the columns in order. - same_rows = list((set(df1.index)).intersection(set(df2.index))) - df1 = df1[df1.index.isin(same_rows)] - df2 = df2[df2.index.isin(same_rows)] - else: - raise ValueError(f"Invalid row_mode='{row_mode}'") - # - if column_mode == "equal": - hdbg.dassert_eq(sorted(df1.columns), sorted(df2.columns)) - elif column_mode == "inner": - # TODO(gp): Add sorting on demand, otherwise keep the columns in order. - col_names = sorted(list(set(df1.columns).intersection(set(df2.columns)))) - df1 = df1[col_names] - df2 = df2[col_names] - else: - raise ValueError(f"Invalid column_mode='{column_mode}'") - # Round small numbers to 0 to exclude them from the diff computation. - close_to_zero_threshold_mask = lambda x: abs(x) < close_to_zero_threshold - df1[close_to_zero_threshold_mask] = df1[close_to_zero_threshold_mask].round( - 0 - ) - df2[close_to_zero_threshold_mask] = df2[close_to_zero_threshold_mask].round( - 0 - ) - # Compute the difference df. - if diff_mode == "diff": - # Test and convert the assertion into a boolean. - is_ok = True - try: - pd.testing.assert_frame_equal( - df1, df2, check_like=True, check_dtype=False - ) - except AssertionError as e: - is_ok = False - _ = e - # Check `is_ok` and raise an assertion depending on `only_warning`. - if not is_ok: - hdbg._dfatal( - _, - "df1=\n%s\n and df2=\n%s\n are not equal.", - df_to_str(df1, log_level=log_level), - df_to_str(df2, log_level=log_level), - only_warning=only_warning, - ) - # Calculate the difference. - df_diff = df1 - df2 - if remove_inf: - df_diff = df_diff.replace([np.inf, -np.inf], np.nan) - elif diff_mode == "pct_change": - # Compare NaN values in dataframes. - nan_diff_df = compare_nans_in_dataframes(df1, df2) - _LOG.debug("Dataframe with NaN differences=\n%s", df_to_str(nan_diff_df)) - msg = "There are NaN values in one of the dataframes that are not in the other one." - hdbg.dassert_eq( - 0, nan_diff_df.shape[0], msg=msg, only_warning=only_warning - ) - # Compute pct_change. - df_diff = 100 * (df1 - df2) / df2.abs() - if zero_vs_zero_is_zero: - # When comparing 0 to 0 set the diff (which is NaN by default) to 0. - df1_mask = df1 == 0 - df2_mask = df2 == 0 - zero_vs_zero_mask = df1_mask & df2_mask - df_diff[zero_vs_zero_mask] = 0 - if remove_inf: - df_diff = df_diff.replace([np.inf, -np.inf], np.nan) - # Check if `df_diff` values are less than `assert_diff_threshold`. - if assert_diff_threshold is not None: - nan_mask = df_diff.isna() - within_threshold = ( - df_diff.abs() <= assert_diff_threshold - ) | nan_mask - expected = pd.DataFrame( - True, - index=within_threshold.index, - columns=within_threshold.columns, - ) - # Test and convert the assertion into boolean. - is_ok = True - try: - pd.testing.assert_frame_equal( - within_threshold, expected, check_exact=True - ) - except AssertionError as e: - is_ok = False - _ = e - # Check `is_ok` and raise assertion depending on `only_warning`. - if not is_ok: - hdbg._dfatal( - _, - "df1=\n%s\n and df2=\n%s\n have pct_change more than `assert_diff_threshold`.", - df_to_str(df1, log_level=log_level), - df_to_str(df2, log_level=log_level), - only_warning=only_warning, - ) - # Report max diff. - max_diff = df_diff.abs().max().max() - _LOG.log( - log_level, - "Maximum percentage difference between the two dataframes = %s", - max_diff, - ) - else: - raise ValueError(f"diff_mode={diff_mode}") - df_diff = df_diff.add_suffix(f".{diff_mode}") - return df_diff - - -# ############################################################################# -# Multi-index dfs -# ############################################################################# - - -# TODO(Grisha): should be a more elegant way to add a column. -def add_multiindex_col( - df: pd.DataFrame, multiindex_col: pd.DataFrame, col_name: str -) -> pd.DataFrame: - """ - Add column to a multiindex DataFrame. - - Note: each column in a multiindex DataFrame is a DataFrame itself. - - :param df: multiindex df - :param multiindex_col: column (i.e. singleindex df) of a multiindex df - :param col_name: name of a new column - :return: a multiindex DataFrame with a new column - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - hdbg.dassert_isinstance(multiindex_col, pd.DataFrame) - hdbg.dassert_isinstance(col_name, str) - hdbg.dassert_not_in(col_name, df.columns) - for col in multiindex_col.columns: - df[col_name, col] = multiindex_col[col] - return df - - -def list_to_str( - vals: List[Any], - *, - sep_char: str = ", ", - enclose_str_char: str = "'", - max_num: Optional[int] = 10, -) -> str: - """ - Convert a list of values into a formatted string representation. - - E.g., [1, "two", 3, 4, 5] -> "5 ['1', 'two', '3', '4', '5']" - - :param vals: values to be converted - :param sep_char: separator to use between elements - :param enclose_str_char: character to enclose each element's string - representation; if empty, elements are not enclosed - :param max_num: maximum number of elements to display in the output - :return: the formatted string representing the list - """ - vals_as_str = list(map(str, vals)) - # Add a str around. - if enclose_str_char: - vals_as_str = [ - enclose_str_char + v + enclose_str_char for v in vals_as_str - ] - # - ret = f"{len(vals)} [" - if max_num is not None and len(vals) > max_num: - hdbg.dassert_lt(1, max_num) - ret += sep_char.join(vals_as_str[: int(max_num / 2)]) - ret += sep_char + "..." + sep_char - ret += sep_char.join(vals_as_str[-int(max_num / 2) :]) - else: - ret += sep_char.join(vals_as_str) - ret += "]" - return ret - - -def multiindex_df_info( - df: pd.DataFrame, - *, - log_level: int = logging.INFO, - **list_to_str_kwargs: Dict[str, Any], -) -> str: - """ - Report information about a multi-index df. - """ - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - columns_level0 = df.columns.levels[0] - columns_level1 = df.columns.levels[1] - rows = df.index - ret = [] - ret.append( - f"shape={len(columns_level0)} x {len(columns_level1)} x {len(rows)}" - ) - ret.append( - "columns_level0=" + list_to_str(columns_level0, **list_to_str_kwargs) - ) - ret.append( - "columns_level1=" + list_to_str(columns_level1, **list_to_str_kwargs) - ) - ret.append("rows=" + list_to_str(rows, **list_to_str_kwargs)) - if isinstance(df.index, pd.DatetimeIndex): - # Display timestamp info. - start_timestamp = df.index.min() - end_timestamp = df.index.max() - frequency = df.index.freq - if frequency is None: - # Try to infer frequency. - frequency = pd.infer_freq(df.index) - ret.append(f"start_timestamp={start_timestamp}") - ret.append(f"end_timestamp={end_timestamp}") - ret.append(f"frequency={frequency}") - ret = "\n".join(ret) - _LOG.log(log_level, ret) - return ret - - -def subset_multiindex_df( - df: pd.DataFrame, - *, - # TODO(gp): Consider passing trim_df_kwargs as kwargs. - start_timestamp: Optional[pd.Timestamp] = None, - end_timestamp: Optional[pd.Timestamp] = None, - columns_level0: ColumnSet = None, - columns_level1: ColumnSet = None, - keep_order: bool = False, -) -> pd.DataFrame: - """ - Filter multi-index DataFrame by timestamp index and column levels. - - :param start_timestamp: see `trim_df()` - :param end_timestamp: see `trim_df()` - :param columns_level0: column names that corresponds to `df.columns.levels[0]` - - `None` means no filtering - :param columns_level1: column names that corresponds to `df.columns.levels[1]` - - `None` means no filtering - :param keep_order: see `_resolve_column_names()` - :return: filtered DataFrame - """ - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - # Filter by timestamp. - allow_empty = False - strictly_increasing = False - dassert_time_indexed_df(df, allow_empty, strictly_increasing) - df = trim_df( - df, - ts_col_name=None, - start_ts=start_timestamp, - end_ts=end_timestamp, - left_close=True, - right_close=True, - ) - # Filter level 0. - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - all_columns_level0 = df.columns.levels[0] - columns_level0 = _resolve_column_names( - columns_level0, all_columns_level0, keep_order=keep_order - ) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_is_subset(columns_level0, df.columns.levels[0]) - df = df[columns_level0] - # Filter level 1. - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - all_columns_level1 = df.columns.levels[1] - columns_level1 = _resolve_column_names( - columns_level1, all_columns_level1, keep_order=keep_order - ) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_is_subset(columns_level1, df.columns.levels[1]) - df = df.swaplevel(axis=1)[columns_level1].swaplevel(axis=1) - return df - - -# ############################################################################# - - -def compare_multiindex_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - subset_multiindex_df_kwargs: Optional[Dict[str, Any]] = None, - compare_dfs_kwargs: Optional[Dict[str, Any]] = None, -) -> pd.DataFrame: - """ - - Subset both multi-index dfs, if needed - - Compare dfs - - :param subset_multiindex_df: params for `subset_multiindex_df()` - :param compare_dfs_kwargs: params for `compare_dfs()` - :return: df with differences as values - """ - # Subset dfs. - if subset_multiindex_df_kwargs is None: - subset_multiindex_df_kwargs = {} - subset_df1 = subset_multiindex_df(df1, **subset_multiindex_df_kwargs) - subset_df2 = subset_multiindex_df(df2, **subset_multiindex_df_kwargs) - # Compare dfs. - if compare_dfs_kwargs is None: - compare_dfs_kwargs = {} - diff_df = compare_dfs(subset_df1, subset_df2, **compare_dfs_kwargs) - return diff_df - - -# ############################################################################# - - -def compute_duration_df( - tag_to_df: Dict[str, pd.DataFrame], - *, - intersect_dfs: bool = False, - valid_intersect: bool = False, -) -> Tuple[pd.DataFrame, Dict[str, pd.DataFrame]]: - """ - Compute a df with some statistics about the time index. - - E.g., - ``` - min_index max_index min_valid_index max_valid_index - tag1 - tag2 - ``` - - :param intersect_dfs: return a transformed dict with the intersection of - indices of all the dfs if True, otherwise return the input data as is - :param valid_intersect: intersect indices without NaNs if True, otherwise - intersect indices as is - :return: timestamp stats and updated dict of dfs, see `intersect_dfs` param - """ - hdbg.dassert_isinstance(tag_to_df, Dict) - # Create df and assign columns. - data_stats = pd.DataFrame() - min_col = "min_index" - max_col = "max_index" - min_valid_index_col = "min_valid_index" - max_valid_index_col = "max_valid_index" - # Collect timestamp info from all dfs. - for tag in tag_to_df.keys(): - # Check that the passed timestamp has timezone info. - hdateti.dassert_has_tz(tag_to_df[tag].index[0]) - dassert_index_is_datetime(tag_to_df[tag]) - # Compute timestamp stats. - data_stats.loc[tag, min_col] = tag_to_df[tag].index.min() - data_stats.loc[tag, max_col] = tag_to_df[tag].index.max() - data_stats.loc[tag, min_valid_index_col] = ( - tag_to_df[tag].dropna().index.min() - ) - data_stats.loc[tag, max_valid_index_col] = ( - tag_to_df[tag].dropna().index.max() - ) - # Make a copy so we do not modify the original data. - tag_to_df_updated = tag_to_df.copy() - # Change the initial dfs with intersection. - if intersect_dfs: - if valid_intersect: - # Assign start, end date column according to specs. - min_col = min_valid_index_col - max_col = max_valid_index_col - # The start of the intersection will be the max value amongt all start dates. - intersection_start_date = data_stats[min_col].max() - # The end of the intersection will be the min value amongt all end dates. - intersection_end_date = data_stats[max_col].min() - for tag in tag_to_df_updated.keys(): - df = trim_df( - tag_to_df_updated[tag], - ts_col_name=None, - start_ts=intersection_start_date, - end_ts=intersection_end_date, - left_close=True, - right_close=True, - ) - tag_to_df_updated[tag] = df - return data_stats, tag_to_df_updated - - -# ############################################################################# - - -# TODO(gp): Remove this since it's in Google API. -def to_gsheet( - df: pd.DataFrame, - gsheet_name: str, - gsheet_sheet_name: str, - overwrite: bool, -) -> None: - """ - Save a dataframe to a Google sheet. - - :param df: the dataframe to save to a Google sheet - :param gsheet_name: the name of the Google sheet to save the df - into; the Google sheet with this name must already exist on the - Google Drive - :param gsheet_sheet_name: the name of the sheet in the Google sheet - :param overwrite: if True, the contents of the sheet are erased - before saving the dataframe into it; if False, the dataframe is - appended to the contents of the sheet - """ - import gspread_pandas - - spread = gspread_pandas.Spread( - gsheet_name, sheet=gsheet_sheet_name, create_sheet=True - ) - if overwrite: - spread.clear_sheet() - else: - sheet_contents = spread.sheet_to_df(index=None) - combined_df = pd.concat([sheet_contents, df]) - df = combined_df.drop_duplicates() - spread.df_to_sheet(df, index=False) - - -# ############################################################################# -# _SummaryRow -# ############################################################################# - - -@dataclasses.dataclass -class _SummaryRow: - """ - Output of a check corresponding to a row of the summary df. - """ - - # Description of the check. - description: str - # Description of the output. - comment: str - # Whether the check was successful or not. - is_ok: bool - - -# ############################################################################# -# CheckSummary -# ############################################################################# - - -class CheckSummary: - """ - Collect and report the results of several checks performed in a notebook. - """ - - def __init__(self, *, title: Optional[str] = ""): - self.title = title - # - self._array: List[_SummaryRow] = [] - - def add(self, description: str, comment: str, is_ok: bool) -> None: - """ - Add the result of a single check. - """ - summary_row = _SummaryRow(description, comment, is_ok) - self._array.append(summary_row) - - def is_ok(self) -> bool: - """ - Compute whether all the checks were succesfull or not. - """ - is_ok = all(sr.is_ok for sr in self._array) - return is_ok - - def report_outcome( - self, *, notebook_output: bool = True, assert_on_error: bool = True - ) -> Optional[str]: - """ - Report the result of the entire check. - - :param notebook_output: report the result of the checks for a - notebook or as a string - :param assert_on_error: assert if one check failed - """ - df = pd.DataFrame(self._array) - - # Compute result as a string. - result = [] - if self.title: - result.append("# " + self.title) - result.append(str(df)) - is_ok = self.is_ok() - result.append(f"is_ok={is_ok}") - result = "\n".join(result) - # Display on a notebook, if needed. - if notebook_output: - if self.title: - print(self.title) - - # Convert DataFrame to HTML with colored rows based on 'is_ok' column. - def _color_rows(row: bool) -> str: - """ - Apply red/green color based on boolean value in `row["is_ok"]`. - """ - is_ok = row["is_ok"] - color = "#FA6B84" if not is_ok else "#ACF3AE" - return [f"background-color: {color}"] * len(row) - - df_html = df.style.apply(_color_rows, axis=1) - from IPython.display import display - - display(df_html) - print(f"is_ok={is_ok}") - # Assert if at least one of the check failed. - if not is_ok and assert_on_error: - raise ValueError("The checks have failed:\n" + result) - # For notebooks, we want to return None, since the outcome was - # already displayed. - if notebook_output: - result = None - return result - - -# ############################################################################# - - -def add_end_download_timestamp( - obj: Union[pd.DataFrame, Dict], *, timezone: str = "UTC" -) -> Union[pd.DataFrame, Dict]: - """ - Add a column 'end_download_timestamp' to the DataFrame with the current - time. - - :param obj: The DataFrame to which the column will be added. - :param timezone: The timezone for the current time. Defaults to - 'UTC'. - """ - # Get current timestamp. - current_ts = hdateti.get_current_time(timezone) - # Set value of end_download_timestamp. - obj["end_download_timestamp"] = current_ts - return obj - - -def filter_df( - df: pd.DataFrame, - col_name: str, - value: Any, - *, - invert: bool = False, - check_value: bool = True, - print_info: bool = True, -) -> pd.DataFrame: - hdbg.dassert_in(col_name, df.columns) - if isinstance(value, list): - mask = df[col_name].isin(value) - else: - if check_value: - hdbg.dassert_in(value, df[col_name].unique()) - mask = df[col_name] == value - if invert: - mask = ~mask - if print_info: - _LOG.info("selected=%s", hprint.perc(mask.sum(), df.shape[0])) - return df[mask] - - -def to_perc(vals: Union[List, pd.Series], **perc_kwargs: Dict[str, Any]) -> str: - """ - Report percentage of True for a list / series. - """ - if isinstance(vals, list): - vals = pd.Series(vals) - ret = hprint.perc(vals.sum(), len(vals), **perc_kwargs) - return ret diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py deleted file mode 100644 index 54ca04c93..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py +++ /dev/null @@ -1,628 +0,0 @@ -""" -Statistical analysis and ML functions for pandas DataFrames. - -Import as: - -import helpers.hpandas_analysis as hpananal -""" - -import datetime -import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hprint as hprint - -# Lazy imports to avoid slow module loading. -# When a type checker analyzes the code: it pretends the imports exist, so you -# can use those names in type annotations without “unknown name” errors. -# These heavy dependencies are only imported when functions are actually called. -if TYPE_CHECKING: - import matplotlib as mpl - -_LOG = logging.getLogger(__name__) - - -def _get_num_pcs_to_plot(num_pcs_to_plot: int, max_pcs: int) -> int: - """ - Get the number of principal components to plot. - - :param num_pcs_to_plot: requested number of PCs to plot, use -1 for - all - :param max_pcs: maximum number of available principal components - :return: validated number of PCs to plot - """ - if num_pcs_to_plot == -1: - num_pcs_to_plot = max_pcs - hdbg.dassert_lte(0, num_pcs_to_plot) - hdbg.dassert_lte(num_pcs_to_plot, max_pcs) - return num_pcs_to_plot - - -def rolling_corr_over_time( - df: pd.DataFrame, com: float, nan_mode: str -) -> pd.DataFrame: - """ - Compute rolling correlation over time. - - :return: corr_df is a multi-index df storing correlation matrices - with labels - """ - import helpers.hpandas_dassert as hpandass - - hpandass.dassert_strictly_increasing_index(df) - # Handle NaNs based on mode. - if nan_mode == "drop": - df = df.dropna(how="any") - elif nan_mode == "fill_with_zero": - df = df.fillna(0.0) - elif nan_mode == "abort": - num_nans = np.isnan(df).sum().sum() - if num_nans > 0: - raise ValueError("df has %d nans\n%s" % (num_nans, df)) - else: - raise ValueError("Invalid nan_mode='%s'" % nan_mode) - corr_df = df.ewm(com=com, min_periods=3 * com).corr() - return corr_df - - -def _get_eigvals_eigvecs( - df: pd.DataFrame, dt: datetime.date, sort_eigvals: bool -) -> Tuple[np.array, np.array]: - """ - Compute eigenvalues and eigenvectors for a correlation matrix at a specific - date. - - :param df: correlation matrix dataframe with multiindex (date, - columns) - :param dt: date for which to compute eigenvalues/eigenvectors - :param sort_eigvals: whether to sort eigenvalues in descending order - :return: tuple of (eigenvalues array, eigenvectors array) - """ - hdbg.dassert_isinstance(dt, datetime.date) - df_tmp = df.loc[dt].copy() - # Compute rolling eigenvalues and eigenvectors. - # TODO(gp): Count and report inf and nans as warning. - df_tmp.replace([np.inf, -np.inf], np.nan, inplace=True) - df_tmp.fillna(0.0, inplace=True) - eigval, eigvec = np.linalg.eigh(df_tmp) - # Sort eigenvalues, if needed. - if not (sorted(eigval) == eigval).all(): - _LOG.debug("eigvals not sorted: %s", eigval) - if sort_eigvals: - _LOG.debug( - "Before sorting:\neigval=\n%s\neigvec=\n%s", eigval, eigvec - ) - _LOG.debug("eigvals: %s", eigval) - idx = eigval.argsort()[::-1] - eigval = eigval[idx] - eigvec = eigvec[:, idx] - _LOG.debug( - "After sorting:\neigval=\n%s\neigvec=\n%s", eigval, eigvec - ) - # - if (eigval == 0).all(): - eigvec = np.nan * eigvec - return eigval, eigvec - - -def rolling_pca_over_time( - df: pd.DataFrame, com: float, nan_mode: str, sort_eigvals: bool = True -) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: - """ - Compute rolling PCAs over time. - - :param sort_eigvals: sort the eigenvalues in descending orders - :return: - - eigval_df stores eigenvalues for the different components indexed by - timestamps - - eigvec_df stores eigenvectors as multiindex df - """ - import tqdm.autonotebook as tauton - - import helpers.hpandas_dassert as hpandass - - # Compute rolling correlation. - corr_df = rolling_corr_over_time(df, com, nan_mode) - # Compute eigvalues and eigenvectors. - timestamps = corr_df.index.get_level_values(0).unique() - eigval = np.zeros((timestamps.shape[0], df.shape[1])) - eigvec = np.zeros((timestamps.shape[0], df.shape[1], df.shape[1])) - for i, dt in tauton.tqdm( - enumerate(timestamps), - total=timestamps.shape[0], - desc="Computing rolling PCA", - ): - eigval[i], eigvec[i] = _get_eigvals_eigvecs(corr_df, dt, sort_eigvals) - # Package results. - eigval_df = pd.DataFrame(eigval, index=timestamps) - hdbg.dassert_eq(eigval_df.shape[0], len(timestamps)) - hpandass.dassert_strictly_increasing_index(eigval_df) - # Normalize by sum. - # TODO(gp): Move this up. - eigval_df = eigval_df.multiply(1 / eigval_df.sum(axis=1), axis="index") - # - # pylint ref: github.com/PyCQA/pylint/issues/3139 - eigvec = eigvec.reshape((-1, eigvec.shape[-1])) # pylint: disable=unsubscriptable-object - idx = pd.MultiIndex.from_product( - [timestamps, df.columns], names=["datetime", None] - ) - eigvec_df = pd.DataFrame(eigvec, index=idx, columns=range(df.shape[1])) # pylint: disable=unsubscriptable-object - hdbg.dassert_eq( - len(eigvec_df.index.get_level_values(0).unique()), len(timestamps) - ) - return corr_df, eigval_df, eigvec_df - - -def plot_pca_over_time( - eigval_df: pd.DataFrame, - eigvec_df: pd.DataFrame, - num_pcs_to_plot: int = 0, - num_cols: int = 2, -) -> None: - """ - Similar to plot_pca_analysis() but over time. - """ - import helpers.hmatplotlib as hmatplo - - # Plot eigenvalues. - eigval_df.plot(title="Eigenvalues over time", ylim=(0, 1)) - # Plot cumulative variance. - eigval_df.cumsum(axis=1).plot( - title="Fraction of variance explained by top PCs over time", ylim=(0, 1) - ) - # Plot eigenvalues. - max_pcs = eigvec_df.shape[1] - num_pcs_to_plot = _get_num_pcs_to_plot(num_pcs_to_plot, max_pcs) - _LOG.info("num_pcs_to_plot=%s", num_pcs_to_plot) - if num_pcs_to_plot > 0: - _, axes = hmatplo.get_multiple_plots( - num_pcs_to_plot, - num_cols=num_cols, - y_scale=4, - sharex=True, - sharey=True, - ) - for i in range(num_pcs_to_plot): - eigvec_df[i].unstack(1).plot( - ax=axes[i], ylim=(-1, 1), title="PC%s" % i - ) - - -def plot_time_distributions( - dts: List[Union[datetime.datetime, pd.Timestamp]], - mode: str, - density: bool = True, -) -> "mpl.axes.Axes": - """ - Compute distribution for an array of timestamps `dts`. - - - mode: see below - """ - hdbg.dassert_type_in(dts[0], (datetime.datetime, pd.Timestamp)) - hdbg.dassert_in( - mode, - ( - "time_of_the_day", - "weekday", - "minute_of_the_hour", - "day_of_the_month", - "month_of_the_year", - "year", - ), - ) - if mode == "time_of_the_day": - # Convert in minutes from the beginning of the day. - data = [dt.time() for dt in dts] - data = [t.hour * 60 + t.minute for t in data] - # 1 hour bucket. - step = 60 - bins = np.arange(0, 24 * 60 + step, step) - vals = pd.cut( - data, - bins=bins, - include_lowest=True, - right=False, - retbins=False, - labels=False, - ) - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = [ - "%02d:%02d" % (bins[k] / 60, bins[k] % 60) for k in count.index - ] - elif mode == "weekday": - data = [dt.date().weekday() for dt in dts] - bins = np.arange(0, 7 + 1) - vals = pd.cut( - data, - bins=bins, - include_lowest=True, - right=False, - retbins=False, - labels=False, - ) - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = "Mon Tue Wed Thu Fri Sat Sun".split() - elif mode == "minute_of_the_hour": - vals = [dt.time().minute for dt in dts] - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = list(map(str, list(range(1, 60 + 1)))) - elif mode == "day_of_the_month": - vals = [dt.date().day for dt in dts] - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = list(map(str, list(range(1, 31 + 1)))) - elif mode == "month_of_the_year": - vals = [dt.date().month for dt in dts] - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec".split() - elif mode == "year": - vals = [dt.date().year for dt in dts] - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = pd.Series(vals).unique().tolist() - else: - raise ValueError("Invalid mode='%s'" % mode) - hdbg.dassert_eq(count.sum(), len(dts)) - # - if density: - count /= count.sum() - label = "num points=%s" % len(dts) - ax = count.plot(kind="bar", label=label, figsize=(20, 7)) - ax.set_xticklabels(yticks) - if density: - ax.set_ylabel("Probability") - else: - ax.set_ylabel("Count") - ax.legend(loc="best") - return ax - - -# TODO(gp): It can't accept ax. Remove this limitation. -def jointplot( - df: pd.DataFrame, - predicted_var: str, - predictor_var: str, - height: Optional[int] = None, - *args: Any, - **kwargs: Any, -) -> None: - """ - Perform a scatterplot of two columns of a dataframe using - seaborn.jointplot(). - - :param df: dataframe - :param predicted_var: y-var - :param predictor_var: x-var :param args, kwargs: arguments passed to - seaborn.jointplot() - """ - import seaborn as sns - - hdbg.dassert_in(predicted_var, df.columns) - hdbg.dassert_in(predictor_var, df.columns) - df = df[[predicted_var, predictor_var]] - # Remove non-finite values. - # TODO(gp): Use explore.dropna(). - mask = np.all(np.isfinite(df.values), axis=1) - df = df[mask] - # Plot. - sns.jointplot( - x=predictor_var, y=predicted_var, data=df, height=height, *args, **kwargs - ) - - -def _preprocess_regression( - df: pd.DataFrame, - intercept: bool, - predicted_var: str, - predicted_var_delay: int, - predictor_vars: Union[str, List[str]], - predictor_vars_delay: int, -) -> Optional[Tuple[pd.DataFrame, List[str], List[str]]]: - """ - Preprocess data in dataframe form in order to perform a regression. - """ - # Sanity check vars. - hdbg.dassert_type_is(df, pd.DataFrame) - hdbg.dassert_lte(1, df.shape[0]) - if isinstance(predictor_vars, str): - predictor_vars = [predictor_vars] - hdbg.dassert_type_is(predictor_vars, list) - # hdbg.dassert_type_is(predicted_var, str) - hdbg.dassert_not_in(predicted_var, predictor_vars) - if not predictor_vars: - # No predictors. - _LOG.warning("No predictor vars: skipping") - return None - # - col_names = [predicted_var] + predictor_vars - hdbg.dassert_is_subset(col_names, df.columns) - df = df[col_names].copy() - num_rows = df.shape[0] - # Shift. - if predicted_var_delay != 0: - df[predicted_var] = df[predicted_var].shift(predicted_var_delay) - _LOG.warning("Shifting predicted_var=%s", predicted_var_delay) - if predictor_vars_delay != 0: - df[predictor_vars] = df[predictor_vars].shift(predictor_vars_delay) - _LOG.warning("Shifting predictor_vars=%s", predictor_vars_delay) - # Remove non-finite values. - # TODO(gp): Use the function. - df.dropna(how="all", inplace=True) - num_rows_after_drop_nan_all = df.shape[0] - if num_rows_after_drop_nan_all != num_rows: - _LOG.info( - "Removed %s rows with all nans", - hprint.perc(num_rows - num_rows_after_drop_nan_all, num_rows), - ) - # - df.dropna(how="any", inplace=True) - num_rows_after_drop_nan_any = df.shape[0] - if num_rows_after_drop_nan_any != num_rows_after_drop_nan_all: - _LOG.warning( - "Removed %s rows with any nans", - hprint.perc(num_rows - num_rows_after_drop_nan_any, num_rows), - ) - # Prepare data. - if intercept: - if "const" not in df.columns: - df.insert(0, "const", 1.0) - predictor_vars = ["const"] + predictor_vars[:] - param_names = predictor_vars[:] - hdbg.dassert(np.all(np.isfinite(df[predicted_var].values))) - hdbg.dassert( - np.all(np.isfinite(df[predictor_vars].values)), - msg="predictor_vars=%s" % predictor_vars, - ) - # Perform regression. - if df.shape[0] < 1: - return None - return df, param_names, predictor_vars - - -def ols_regress( - df: pd.DataFrame, - predicted_var: str, - predictor_vars: str, - intercept: bool, - print_model_stats: bool = True, - tsplot: bool = False, - tsplot_figsize: Optional[Any] = None, - jointplot_: bool = True, - jointplot_height: Optional[Any] = None, - predicted_var_delay: int = 0, - predictor_vars_delay: int = 0, - max_nrows: float = 1e4, -) -> Optional[Dict[str, Any]]: - """ - Perform OLS on columns of a dataframe. - - :param df: dataframe - :param predicted_var: y variable - :param predictor_vars: x variables - :param intercept: - :param print_model_stats: print or return the model stats - :param tsplot: plot a time-series if possible - :param tsplot_figsize: - :param jointplot_: plot a scatter plot - :param jointplot_height: - :param predicted_var_delay: - :param predictor_vars_delay: - :param max_nrows: do not plot if there are too many rows, since - notebook can be slow or hang - :return: - """ - import statsmodels.api - - import helpers.hmatplotlib as hmatplo - - obj = _preprocess_regression( - df, - intercept, - predicted_var, - predicted_var_delay, - predictor_vars, - predictor_vars_delay, - ) - if obj is None: - return None - df, param_names, predictor_vars = obj - hdbg.dassert_lte(1, df.shape[0]) - model = statsmodels.api.OLS( - df[predicted_var], df[predictor_vars], hasconst=intercept - ).fit() - regr_res = { - "param_names": param_names, - "coeffs": model.params, - "pvals": model.pvalues, - # pylint: disable=no-member - "rsquared": model.rsquared, - "adj_rsquared": model.rsquared_adj, - "model": model, - } - if print_model_stats: - # pylint: disable=no-member - _LOG.info(model.summary().as_text()) - if tsplot or jointplot_: - if max_nrows is not None and df.shape[0] > max_nrows: - _LOG.warning( - "Skipping plots since df has %d > %d rows", - df.shape[0], - max_nrows, - ) - else: - predictor_vars = [p for p in predictor_vars if p != "const"] - if len(predictor_vars) == 1: - if tsplot: - # Plot the data over time. - if tsplot_figsize is None: - tsplot_figsize = hmatplo.FIG_SIZE - df[[predicted_var, predictor_vars[0]]].plot( - figsize=tsplot_figsize - ) - if jointplot_: - # Perform scatter plot. - if jointplot_height is None: - jointplot_height = hmatplo.FIG_SIZE[1] - jointplot( - df, - predicted_var, - predictor_vars[0], - height=jointplot_height, - ) - else: - _LOG.warning( - "Skipping plots since there are too many predictors" - ) - if print_model_stats: - return None - return regr_res - - -def ols_regress_series( - srs1: pd.Series, - srs2: pd.Series, - intercept: bool, - srs1_name: Optional[Any] = None, - srs2_name: Optional[Any] = None, - convert_to_dates: bool = False, - **kwargs: Any, -) -> Dict[str, Any]: - """ - Regress two series against each other. - - Wrapper around regress() to regress series against each other. - """ - # Validate inputs are Series. - hdbg.dassert_isinstance(srs1, pd.Series) - hdbg.dassert_isinstance(srs2, pd.Series) - srs1 = srs1.copy() - srs2 = srs2.copy() - # - if convert_to_dates: - _LOG.warning("Sampling to date") - srs1.index = [pd.to_datetime(dt).date() for dt in srs1.index] - srs2.index = [pd.to_datetime(dt).date() for dt in srs2.index] - # - hdbg.dassert_array_has_same_type_element(srs1, srs2, only_first_elem=True) - # Check common indices. - common_idx = srs1.index.intersection(srs2.index) - hdbg.dassert_lte(1, len(common_idx)) - # Merge series into a dataframe. - if srs1_name is None: - srs1_name = srs1.name if srs1.name is not None else "" - if srs2_name is None: - srs2_name = srs2.name if srs2.name is not None else "" - if srs1_name == srs2_name: - srs1_name += "_1" - srs2_name += "_2" - _LOG.warning("Series have the same name: adding suffix to distinguish") - df = pd.concat([srs1, srs2], axis=1, join="outer") - df.columns = [srs1_name, srs2_name] - # - val = ols_regress(df, srs1_name, srs2_name, intercept=intercept, **kwargs) - val = cast(Dict[str, Any], val) - return val - - -def robust_regression( - df: pd.DataFrame, - predicted_var: str, - predictor_vars: str, - intercept: bool, - jointplot_: bool = True, - jointplot_figsize: Optional[Any] = None, - predicted_var_delay: int = 0, - predictor_vars_delay: int = 0, -) -> None: - """ - Perform robust regression using RANSAC algorithm to handle outliers. - - :param df: dataframe with data - :param predicted_var: dependent variable column name - :param predictor_vars: independent variable column name(s) - :param intercept: whether to include intercept in regression - :param jointplot_: whether to create a scatter plot - :param jointplot_figsize: size of the joint plot - :param predicted_var_delay: shift predicted variable by this many - periods - :param predictor_vars_delay: shift predictor variables by this many - periods - """ - import matplotlib.pyplot as plt - import sklearn.linear_model - - import helpers.hmatplotlib as hmatplo - - obj = _preprocess_regression( - df, - intercept, - predicted_var, - predicted_var_delay, - predictor_vars, - predictor_vars_delay, - ) - if obj is None: - return - # From http://scikit-learn.org/stable/auto_examples/linear_model/ - # plot_robust_fit.html#sphx-glr-auto-examples-linear-model-plot-robust-fit-py - # TODO(gp): Add also TheilSenRegressor and HuberRegressor. - - hdbg.dassert_eq(len(predictor_vars), 1) - y = df[predicted_var] - X = df[predictor_vars] - # Fit line using all data. - lr = sklearn.linear_model.LinearRegression() - lr.fit(X, y) - # Robustly fit linear model with RANSAC algorithm. - ransac = sklearn.linear_model.RANSACRegressor() - ransac.fit(X, y) - inlier_mask = ransac.inlier_mask_ - outlier_mask = np.logical_not(inlier_mask) - # Predict data of estimated models. - line_X = np.linspace(X.min().values[0], X.max().values[0], num=100)[ - :, np.newaxis - ] - line_y = lr.predict(line_X) - line_y_ransac = ransac.predict(line_X) - # Compare estimated coefficients - _LOG.info("Estimated coef for linear regression=%s", lr.coef_) - _LOG.info("Estimated coef for RANSAC=%s", ransac.estimator_.coef_) - if jointplot_: - if jointplot_figsize is None: - jointplot_figsize = hmatplo.FIG_SIZE - plt.figure(figsize=jointplot_figsize) - plt.scatter( - X[inlier_mask], - y[inlier_mask], - color="red", - marker="o", - label="Inliers", - ) - plt.scatter( - X[outlier_mask], - y[outlier_mask], - color="blue", - marker="o", - label="Outliers", - ) - plt.plot(line_X, line_y, color="green", linewidth=2, label="OLS") - plt.plot( - line_X, line_y_ransac, color="black", linewidth=3, label="RANSAC" - ) - plt.legend(loc="best") - plt.xlabel(", ".join(predictor_vars)) - plt.ylabel(predicted_var) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py deleted file mode 100644 index 0604afd67..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -Import as: - -import helpers.hpandas_check_summary as hpachsum -""" - -import dataclasses -from typing import List, Optional - -import pandas as pd - -import helpers.hlogging as hloggin - -_LOG = hloggin.getLogger(__name__) - - -# ############################################################################# -# _SummaryRow -# ############################################################################# - - -@dataclasses.dataclass -class _SummaryRow: - """ - Output of a check corresponding to a row of the summary df. - """ - - # Description of the check. - description: str - # Description of the output. - comment: str - # Whether the check was successful or not. - is_ok: bool - - -# ############################################################################# -# CheckSummary -# ############################################################################# - - -class CheckSummary: - """ - Collect and report the results of several checks performed in a notebook. - """ - - def __init__(self, *, title: Optional[str] = ""): - self.title = title - # Initialize the array for storing summary rows. - self._array: List[_SummaryRow] = [] - - def add(self, description: str, comment: str, is_ok: bool) -> None: - """ - Add the result of a single check. - """ - summary_row = _SummaryRow(description, comment, is_ok) - self._array.append(summary_row) - - def is_ok(self) -> bool: - """ - Compute whether all the checks were successful or not. - """ - is_ok = all(sr.is_ok for sr in self._array) - return is_ok - - def report_outcome( - self, *, notebook_output: bool = True, assert_on_error: bool = True - ) -> Optional[str]: - """ - Report the result of the entire check. - - :param notebook_output: report the result of the checks for a - notebook or as a string - :param assert_on_error: assert if one check failed - """ - df = pd.DataFrame(self._array) - - # Compute result as a string. - result = [] - if self.title: - result.append("# " + self.title) - result.append(str(df)) - is_ok = self.is_ok() - result.append(f"is_ok={is_ok}") - result = "\n".join(result) - # Display on a notebook, if needed. - if notebook_output: - if self.title: - print(self.title) - - # Convert DataFrame to HTML with colored rows based on 'is_ok' column. - def _color_rows(row: bool) -> str: - """ - Apply red/green color based on boolean value in `row["is_ok"]`. - """ - is_ok = row["is_ok"] - color = "#FA6B84" if not is_ok else "#ACF3AE" - return [f"background-color: {color}"] * len(row) - - df_html = df.style.apply(_color_rows, axis=1) - from IPython.display import display - - display(df_html) - print(f"is_ok={is_ok}") - # Assert if at least one of the check failed. - if not is_ok and assert_on_error: - raise ValueError("The checks have failed:\n" + result) - # For notebooks, we want to return None, since the outcome was - # already displayed. - if notebook_output: - result = None - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py deleted file mode 100644 index c421095a3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py +++ /dev/null @@ -1,282 +0,0 @@ -""" -Import as: - -import helpers.hpandas_clean as hpanclea -""" - -from typing import Any, List, Optional, Union - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hpandas_utils as hpanutil -import helpers.hprint as hprint - -_LOG = hloggin.getLogger(__name__) - - -def drop_duplicates( - data: Union[pd.Series, pd.DataFrame], - use_index: bool, - column_subset: Optional[List[str]] = None, - *args: Any, - **kwargs: Any, -) -> Union[pd.Series, pd.DataFrame]: - """ - Wrap `pandas.drop_duplicates()` with additional index handling. - - See the official docs: - - https://pandas.pydata.org/docs/reference/api/pandas.Series.drop_duplicates.html - - https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html - - :param data: input series or dataframe - :param use_index: whether to consider index values when identifying duplicates - - if `True`, use index values together with a column subset for - identifying duplicates - - if `False`, duplicated rows are with the exact same values in a subset - and different indices - :param column_subset: a list of columns to consider for identifying duplicates - :param args: additional arguments passed to pandas.drop_duplicates() - :param kwargs: additional keyword arguments passed to pandas.drop_duplicates() - :return: data without duplicates - """ - _LOG.debug(hprint.to_str("use_index column_subset args kwargs")) - num_rows_before = data.shape[0] - # Get all columns list for subset if no subset is passed. - if column_subset is None: - column_subset = data.columns.tolist() - else: - hdbg.dassert_lte(1, len(column_subset), "Columns subset cannot be empty") - if use_index: - # Add dummy index column to use it for duplicates detection. - index_col_name = "use_index_col" - hdbg.dassert_not_in(index_col_name, data.columns.tolist()) - column_subset.insert(0, index_col_name) - data[index_col_name] = data.index - # Drop duplicates based on the column subset. - data_no_dups = data.drop_duplicates(subset=column_subset, *args, **kwargs) - # Clean up the temporary index column if it was added. - if use_index: - # Remove dummy index column. - data_no_dups = data_no_dups.drop([index_col_name], axis=1) - # Report the change. - num_rows_after = data_no_dups.shape[0] - if num_rows_before != num_rows_after: - _LOG.debug( - "Removed %s rows", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - return data_no_dups - - -def dropna( - df: pd.DataFrame, - *args: Any, - drop_infs: bool = False, - report_stats: bool = False, - **kwargs: Any, -) -> pd.DataFrame: - """ - Create a wrapper around pd.dropna() reporting information about the removed - rows. - - :param df: dataframe to process - :param drop_infs: if +/- np.inf should be considered as nans - :param report_stats: if processing stats should be reported - :return: dataframe with nans dropped - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - num_rows_before = df.shape[0] - if drop_infs: - df = df.replace([np.inf, -np.inf], np.nan) - df = df.dropna(*args, **kwargs) - if report_stats: - num_rows_after = df.shape[0] - pct_removed = hprint.perc( - num_rows_before - num_rows_after, num_rows_before - ) - _LOG.info("removed rows with nans: %s", pct_removed) - return df - - -def drop_axis_with_all_nans( - df: pd.DataFrame, - drop_rows: bool = True, - drop_columns: bool = False, - drop_infs: bool = False, - report_stats: bool = False, -) -> pd.DataFrame: - """ - Remove columns and rows not containing information (e.g., with only nans). - - The operation is not performed in place and the resulting df is - returned. Assume that the index is timestamps. - - :param df: dataframe to process - :param drop_rows: remove rows with only nans - :param drop_columns: remove columns with only nans - :param drop_infs: remove also +/- np.inf - :param report_stats: report the stats of the operations - :return: dataframe with specific nan axis dropped - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - if drop_infs: - df = df.replace([np.inf, -np.inf], np.nan) - if drop_columns: - # Remove columns with all nans, if any. - cols_before = df.columns[:] - df = df.dropna(axis=1, how="all") - if report_stats: - # Report results. - cols_after = df.columns[:] - removed_cols = set(cols_before).difference(set(cols_after)) - pct_removed = hprint.perc( - len(cols_before) - len(cols_after), len(cols_after) - ) - _LOG.info( - "removed cols with all nans: %s %s", - pct_removed, - hprint.list_to_str(removed_cols), - ) - if drop_rows: - # Remove rows with all nans, if any. - rows_before = df.index[:] - df = df.dropna(axis=0, how="all") - if report_stats: - # Report results. - rows_after = df.index[:] - removed_rows = set(rows_before).difference(set(rows_after)) - if len(rows_before) == len(rows_after): - # Nothing was removed. - min_ts = max_ts = None - else: - # TODO(gp): Report as intervals of dates. - min_ts = min(removed_rows) - max_ts = max(removed_rows) - pct_removed = hprint.perc( - len(rows_before) - len(rows_after), len(rows_after) - ) - _LOG.info( - "removed rows with all nans: %s [%s, %s]", - pct_removed, - min_ts, - max_ts, - ) - return df - - -def drop_duplicated( - df: pd.DataFrame, *, subset: Optional[List[str]] = None -) -> pd.DataFrame: - """ - Implement `df.duplicated` but considering also the index and ignoring nans. - """ - _LOG.debug("before df=\n%s", hpanutil.df_to_str(df)) - # Move the index to the df. - old_index_name = df.index.name - new_index_name = "_index.tmp" - hdbg.dassert_not_in(new_index_name, df.columns) - df.index.name = new_index_name - df.reset_index(drop=False, inplace=True) - # Remove duplicates by ignoring nans. - if subset is not None: - hdbg.dassert_isinstance(subset, list) - subset = [new_index_name] + subset - duplicated = df.fillna(0.0).duplicated(subset=subset, keep="first") - # Report the result of the operation. - if duplicated.sum() > 0: - num_rows_before = df.shape[0] - _LOG.debug( - "Removing duplicates df=\n%s", - hpanutil.df_to_str(df.loc[duplicated]), - ) - df = df.loc[~duplicated] - num_rows_after = df.shape[0] - _LOG.warning( - "Removed repeated rows num_rows=%s", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - _LOG.debug("after removing duplicates df=\n%s", hpanutil.df_to_str(df)) - # Set the index back. - df.set_index(new_index_name, inplace=True) - df.index.name = old_index_name - _LOG.debug("after df=\n%s", hpanutil.df_to_str(df)) - return df - - -def impute_nans(df: pd.DataFrame, column: str, value: Any) -> pd.DataFrame: - """ - Assign `value` to the `column` of `df` where the value is "nan". - - :param df: The DataFrame to modify. - :param column: The column in which to replace "nan" values. - :param value: The value to assign to "nan" entries. - :return: The DataFrame with the "nan" values assigned. - """ - df[column] = df[column].astype(str) - mask = df[column] == "nan" - # Assign the new value or keep the original value. - df[column] = np.where(mask, value, df[column]) - # There should be no more nans. - mask = df[column] == "nan" - hdbg.dassert_eq(mask.sum(), 0) - # - return df - - -# ############################################################################# - - -def remove_outliers( - df: pd.DataFrame, - lower_quantile: float, - *, - column_set: hpanutil.ColumnSet, - # TODO(Grisha): the params are not used. - fill_value: float = np.nan, - mode: str = "remove_outliers", - axis: Any = 0, - upper_quantile: Optional[float] = None, -) -> pd.DataFrame: - """ - Remove outliers from a dataframe based on quantile thresholds. - - :param df: input dataframe - :param lower_quantile: lower quantile threshold (0.0 to 1.0) - :param column_set: columns to apply outlier removal to - :param fill_value: value to use for filling outliers (currently unused) - :param mode: outlier removal mode (currently unused) - :param axis: axis along which to compute quantiles (0 for columns, 1 for rows) - :param upper_quantile: upper quantile threshold, defaults to 1 - lower_quantile - :return: dataframe with outliers removed based on quantile thresholds - """ - hdbg.dassert_eq(len(df.shape), 2, "Multi-index dfs not supported") - # Validate quantile parameters. - hdbg.dassert_lte(0.0, lower_quantile) - if upper_quantile is None: - upper_quantile = 1.0 - lower_quantile - hdbg.dassert_lte(lower_quantile, upper_quantile) - hdbg.dassert_lte(upper_quantile, 1.0) - # Create a copy of the dataframe to avoid modifying the original. - df = df.copy() - if axis == 0: - all_columns = df.columns - columns = hpanutil.resolve_column_names(column_set, all_columns) - hdbg.dassert_is_subset(columns, df.columns) - for column in all_columns: - if column in columns: - df[column] = df[column].quantile( - [lower_quantile, upper_quantile] - ) - elif axis == 1: - all_rows = df.rows - rows = hpanutil.resolve_column_names(column_set, all_rows) - hdbg.dassert_is_subset(rows, df.rows) - for row in all_rows: - if row in rows: - df[row] = df[row].quantile([lower_quantile, upper_quantile]) - else: - raise ValueError(f"Invalid axis='{axis}'") - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py deleted file mode 100644 index b40308daa..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py +++ /dev/null @@ -1,289 +0,0 @@ -""" -Import as: - -import helpers.hpandas_compare as hpancomp -""" - -import logging -from typing import List - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hpandas_dassert as hpandass -import helpers.hpandas_utils as hpanutil - -_LOG = hloggin.getLogger(__name__) - -RowsValues = List[List[str]] - - -def compare_dataframe_rows(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: - """ - Compare contents of rows with same indices. - - Index is set to default sequential integer values because compare is - sensitive to multi index (probably because new multi indexes are created - for each difference in `compare`). Multi index columns are regular columns now. - Excess columns are removed so both dataframes are always same shape because - `compare` expects identical dataframes (same number of rows, columns, etc.). - - :param df1: first dataframe for comparison - :param df2: second dataframe for comparison - :return: dataframe with data with same indices and different contents - """ - # Get rows on which the two dataframe indices match. - idx_intersection = df1.index.intersection(df2.index) - # Remove excess columns and reset indexes. - trimmed_second = df2.loc[idx_intersection].reset_index() - trimmed_first = df1.loc[idx_intersection].reset_index() - # Get difference between second and first dataframe. - data_difference = trimmed_second.compare(trimmed_first) - # Update data difference with original dataframe index names - # for easier identification. - index_names = tuple(df2.index.names) - # If index or multi index is named, it will be visible in data difference. - if index_names != (None,): - for index in data_difference.index: - for column in index_names: - data_difference.loc[index, column] = trimmed_second.loc[index][ - column - ] - data_difference = data_difference.convert_dtypes() - return data_difference - - -def compare_nans_in_dataframes( - df1: pd.DataFrame, df2: pd.DataFrame -) -> pd.DataFrame: - """ - Compare equality of DataFrames in terms of NaNs. - - For example: - - `5 vs np.nan` is a mismatch - - `np.nan vs 5` is a mismatch - - `np.nan vs np.nan` is a match - - `np.nan vs np.inf` is a mismatch - - :param df1: dataframe to compare - :param df2: dataframe to compare with - :return: dataframe that shows the differences stacked side by side, see - `pandas.DataFrame.compare()` for an example - """ - hpandass.dassert_axes_equal(df1, df2) - # Keep rows where df1's value is NaN and df2's value is not NaN and vice versa. - mask1 = df1.isna() & ~df2.isna() - mask2 = ~df1.isna() & df2.isna() - mask3 = mask1 | mask2 - # Compute a dataframe with the differences. - nan_diff_df = df1[mask3].compare(df2[mask3], result_names=("df1", "df2")) - return nan_diff_df - - -# TODO(Grisha): -> `compare_dataframes()`? - - -def compare_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - row_mode: str = "equal", - column_mode: str = "equal", - # TODO(Grisha): should be True by default? - compare_nans: bool = False, - diff_mode: str = "diff", - assert_diff_threshold: float = 1e-3, - close_to_zero_threshold: float = 1e-6, - zero_vs_zero_is_zero: bool = True, - remove_inf: bool = True, - log_level: int = logging.DEBUG, - only_warning: bool = True, -) -> pd.DataFrame: - """ - Compare two dataframes. - - This works for dataframes with and without multi-index. - - :param row_mode: control how the rows are handled - - "equal": rows need to be the same for the two dataframes - - "inner": compute the common rows for the two dataframes - :param column_mode: same as `row_mode` - :param compare_nans: include NaN comparison if True otherwise just - compare non-NaN values - :param diff_mode: control how the dataframes are compared in terms of - corresponding elements - - "diff": use the difference - - "pct_change": use the percentage difference - :param assert_diff_threshold: maximum allowed total difference - - do not assert if `None` - - works when `diff_mode` is "pct_change" - :param close_to_zero_threshold: round numbers below the threshold to 0 - :param zero_vs_zero_is_zero: replace the diff with 0 when comparing 0 to 0 - if True, otherwise keep the actual result - :param remove_inf: replace +-inf with `np.nan` - :param log_level: logging level - :param only_warning: when `True` the function issues a warning instead of aborting - :return: a singe dataframe with differences as values - """ - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - # Check value of `assert_diff_threshold`, if it was passed. - if assert_diff_threshold: - hdbg.dassert_lte(assert_diff_threshold, 1.0) - hdbg.dassert_lte(0.0, assert_diff_threshold) - # TODO(gp): Factor out this logic and use it for both compare_visually_dfs - # and - if row_mode == "equal": - hpandass.dassert_indices_equal(df1, df2) - elif row_mode == "inner": - # TODO(gp): Add sorting on demand, otherwise keep the columns in order. - same_rows = list((set(df1.index)).intersection(set(df2.index))) - df1 = df1[df1.index.isin(same_rows)] - df2 = df2[df2.index.isin(same_rows)] - else: - raise ValueError(f"Invalid row_mode='{row_mode}'") - # Handle column comparison mode. - if column_mode == "equal": - hdbg.dassert_eq(sorted(df1.columns), sorted(df2.columns)) - elif column_mode == "inner": - # TODO(gp): Add sorting on demand, otherwise keep the columns in order. - col_names = sorted(list(set(df1.columns).intersection(set(df2.columns)))) - df1 = df1[col_names] - df2 = df2[col_names] - else: - raise ValueError(f"Invalid column_mode='{column_mode}'") - # Round small numbers to 0 to exclude them from the diff computation. - close_to_zero_threshold_mask = lambda x: abs(x) < close_to_zero_threshold - df1[close_to_zero_threshold_mask] = df1[close_to_zero_threshold_mask].round( - 0 - ) - df2[close_to_zero_threshold_mask] = df2[close_to_zero_threshold_mask].round( - 0 - ) - # Compute the difference df. - if diff_mode == "diff": - # Test and convert the assertion into a boolean. - is_ok = True - try: - pd.testing.assert_frame_equal( - df1, df2, check_like=True, check_dtype=False - ) - except AssertionError as e: - is_ok = False - _ = e - # Check `is_ok` and raise an assertion depending on `only_warning`. - if not is_ok: - hdbg._dfatal( - _, - "df1=\n%s\n and df2=\n%s\n are not equal.", - hpanutil.df_to_str(df1, log_level=log_level), - hpanutil.df_to_str(df2, log_level=log_level), - only_warning=only_warning, - ) - # Calculate the difference. - df_diff = df1 - df2 - if remove_inf: - df_diff = df_diff.replace([np.inf, -np.inf], np.nan) - elif diff_mode == "pct_change": - # Compare NaN values in dataframes. - nan_diff_df = compare_nans_in_dataframes(df1, df2) - _LOG.debug( - "Dataframe with NaN differences=\n%s", - hpanutil.df_to_str(nan_diff_df), - ) - msg = "There are NaN values in one of the dataframes that are not in the other one." - hdbg.dassert_eq( - 0, nan_diff_df.shape[0], msg=msg, only_warning=only_warning - ) - # Compute pct_change. - df_diff = 100 * (df1 - df2) / df2.abs() - if zero_vs_zero_is_zero: - # When comparing 0 to 0 set the diff (which is NaN by default) to 0. - df1_mask = df1 == 0 - df2_mask = df2 == 0 - zero_vs_zero_mask = df1_mask & df2_mask - df_diff[zero_vs_zero_mask] = 0 - if remove_inf: - df_diff = df_diff.replace([np.inf, -np.inf], np.nan) - # Check if `df_diff` values are less than `assert_diff_threshold`. - if assert_diff_threshold is not None: - nan_mask = df_diff.isna() - within_threshold = ( - df_diff.abs() <= assert_diff_threshold - ) | nan_mask - expected = pd.DataFrame( - True, - index=within_threshold.index, - columns=within_threshold.columns, - ) - # Test and convert the assertion into boolean. - is_ok = True - try: - pd.testing.assert_frame_equal( - within_threshold, expected, check_exact=True - ) - except AssertionError as e: - is_ok = False - _ = e - # Check `is_ok` and raise assertion depending on `only_warning`. - if not is_ok: - hdbg._dfatal( - _, - "df1=\n%s\n and df2=\n%s\n have pct_change more than `assert_diff_threshold`.", - hpanutil.df_to_str(df1, log_level=log_level), - hpanutil.df_to_str(df2, log_level=log_level), - only_warning=only_warning, - ) - # Report max diff. - max_diff = df_diff.abs().max().max() - _LOG.log( - log_level, - "Maximum percentage difference between the two dataframes = %s", - max_diff, - ) - else: - raise ValueError(f"diff_mode={diff_mode}") - df_diff = df_diff.add_suffix(f".{diff_mode}") - return df_diff - - -def find_common_columns( - names: List[str], dfs: List[pd.DataFrame] -) -> pd.DataFrame: - """ - Find common columns across multiple dataframes. - - :param names: list of names for each dataframe - :param dfs: list of dataframes to compare - :return: dataframe showing common columns between each pair of dataframes - """ - df = [] - for i, df1 in enumerate(dfs): - df1 = dfs[i].columns - for j in range(i + 1, len(dfs)): - df2 = dfs[j].columns - common_cols = [c for c in df1 if c in df2] - df.append( - ( - names[i], - len(df1), - names[j], - len(df2), - len(common_cols), - ", ".join(common_cols), - ) - ) - df = pd.DataFrame( - df, - columns=[ - "table1", - "num_cols1", - "num_cols2", - "table2", - "num_comm_cols", - "common_cols", - ], - ) - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py deleted file mode 100644 index c9443c888..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py +++ /dev/null @@ -1,221 +0,0 @@ -""" -Import as: - -import helpers.hpandas_conversion as hpanconv -""" - -from typing import List, Optional, Union - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin - -_LOG = hloggin.getLogger(__name__) - -RowsValues = List[List[str]] - -# ############################################################################# -# DataFrame/Series Conversion -# ############################################################################# - - -def to_series(df: pd.DataFrame, *, series_dtype: str = "float64") -> pd.Series: - """ - Convert a pd.DataFrame with a single column into a pd.Series. The problem - is that empty df or df with a single row are not converted correctly to a - pd.Series. - - :param df: dataframe with a single column to convert to a series - :param series_dtype: dtype of the desired series in case a DataFrame - is empty, otherwise inherit dtype from a DataFrame - """ - # See https://stackoverflow.com/questions/33246771 - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_eq(df.shape[1], 1, "df=%s doesn't have a single column", df) - if df.empty: - srs = pd.Series(dtype=series_dtype) - elif df.shape[0] > 1: - srs = df.squeeze() - else: - srs = pd.Series(df.iloc[0, 0], index=[df.index.values[0]]) - srs.name = df.index.name - hdbg.dassert_isinstance(srs, pd.Series) - return srs - - -def as_series(data: Union[pd.DataFrame, pd.Series]) -> pd.Series: - """ - Convert a single-column dataframe to a series or no-op if already a series. - """ - if isinstance(data, pd.Series): - return data - return to_series(data) - - -# ############################################################################# -# Infer type -# ############################################################################# - - -def infer_column_types(col: pd.Series): - """ - Determine which data type is most prevalent in a column. - - Examine the values in the given pandas Series and decides whether - the majority of entries are strings, numeric values, or booleans. - - :param col: The column to inspect. - :return: One of `"is_string"`, `"is_numeric"`, or `"is_bool"`, - representing the predominant type. - """ - vals = { - "is_numeric": pd.to_numeric(col, errors="coerce").notna(), - #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), - "is_bool": col.map(lambda x: isinstance(x, bool)), - "is_string": col.map(lambda x: isinstance(x, str)), - } - vals = {k: float(v.mean()) for k, v in vals.items()} - # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", - # (vals["is_numeric"] >= vals["is_string"], "is_numeric", - # "is_string")) - if vals["is_bool"] >= vals["is_numeric"] and (vals["is_bool"] != 0): - type_ = "is_bool" - elif vals["is_numeric"] >= vals["is_string"] and (vals["is_numeric"] != 0): - type_ = "is_numeric" - else: - type_ = "is_string" - vals["type"] = type_ - return vals - - -def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: - """ - Identify the predominant data type for each column in a DataFrame. - - :param df: The DataFrame whose columns will be analyzed. - :return: A DataFrame with two columns: - - `column`: the name of each original column. - - `predominant_type`: the most frequent type in that column, - one of `"string"`, `"numeric"`, or `"bool"`. - """ - return df.apply(lambda x: pd.Series(infer_column_types(x))).T - - -def convert_to_type(col: pd.Series, type_: str) -> pd.Series: - """ - Convert a pandas Series to a specified data type. - - :param col: The input column to be converted. - :param type_: The target data type. Expected values include: - - `"is_bool"`: convert values to booleans. - - `"is_int"`: convert values to integers. - - `"is_numeric"`: convert values to float. - - `"is_string"`: convert values to strings. - :return: A new Series with the same index as `col`, cast to the requested - type. - """ - if type_ == "is_bool": - return col.map( - lambda x: ( - True - if x in ["True", 1, "1", "true", True] - else False - if x in [0, "0", "False", False, "false"] - else None - ) - ) - elif type_ == "is_int": - return pd.to_numeric(col, errors="coerce", downcast="integer") - elif type_ == "is_numeric": - return pd.to_numeric(col, errors="coerce") - elif type_ == "is_string": - return col.astype(str) - else: - raise ValueError(f"Unknown column type: {type_}") - - -def convert_col_to_int( - df: pd.DataFrame, - col: str, -) -> pd.DataFrame: - """ - Convert a column to an integer column. - - Example use case: Parquet uses categoricals. If supplied with a - categorical-type column, this function will convert it to an integer - column. - """ - import helpers.hpandas_dassert as hpandass - - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(col, str) - hdbg.dassert_in(col, df.columns) - # Attempt the conversion. - df[col] = df[col].astype("int64") - # Trust, but verify. - hpandass.dassert_series_type_is(df[col], np.int64) - return df - - -def cast_series_to_type( - series: pd.Series, series_type: Optional[type] -) -> pd.Series: - """ - Convert a Pandas series to a given type. - - :param series: the input series - :param series_type: the type to convert the series into - - if None, then the series values are turned into Nones - :return: the series in the required type - """ - if series_type is None: - # Turn the series values into None. - series[:] = None - elif series_type is pd.Timestamp: - # Convert to timestamp. - series = pd.to_datetime(series) - elif series_type is dict: - # Convert to dict. - series = series.apply(eval) - else: - # Convert to the specified type. - series = series.astype(series_type) - return series - - -def convert_df( - df: pd.DataFrame, *, print_invalid_values: bool = False -) -> pd.DataFrame: - """ - Convert each DataFrame column to its predominant type. - - This function inspects every column in `df`, determines whether the - majority of its values are boolean, numeric, or string, and then - casts the column to that type using `convert_to_type`. - - :param df: The input DataFrame whose columns will be converted. - :param print_invalid_values: If True, print any original values that could - not be converted (they become NaN after conversion) - :return: a new DataFrame with each column cast to its detected predominant - type. - """ - df_out = pd.DataFrame(index=df.index) - for col in df.columns: - series = df[col] - # Determine the dominant datatype. - col_type = infer_column_types(series)["type"] - hdbg.dassert_in(col_type, ("is_bool", "is_numeric", "is_string")) - # Convert the column to dominant datatype. - converted = convert_to_type(series, col_type) - if print_invalid_values: - invalid_mask = series.notna() & converted.isna() - if invalid_mask.any(): - invalid = series[invalid_mask].tolist() - _LOG.info("Column %s dropped invalid values: %s", col, invalid) - df_out[col] = converted - return df_out - - -# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py deleted file mode 100644 index 7d62b84b3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py +++ /dev/null @@ -1,371 +0,0 @@ -""" -Import as: - -import helpers.hpandas_dassert as hpandass -""" - -from typing import Any, Dict, Iterable, List, Optional, Union - -import numpy as np -import pandas as pd - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin - -_LOG = hloggin.getLogger(__name__) - - -RowsValues = List[List[str]] - -# ############################################################################# -# Index/Axis Validation & Assertions -# ############################################################################# - - -def _get_index(obj: Union[pd.Index, pd.DataFrame, pd.Series]) -> pd.Index: - """ - Return the index of a Pandas object. - - :param obj: pandas Index, DataFrame, or Series - :return: the index of the object - """ - if isinstance(obj, pd.Index): - index = obj - else: - hdbg.dassert_isinstance(obj, (pd.Series, pd.DataFrame)) - index = obj.index - return index - - -# TODO(gp): Maybe for symmetry with the other functions, rename to -# dassert_datetime_index - - -def dassert_index_is_datetime( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the dataframe has an index containing datetimes. - - It works for both single and multi-indexed dataframes. - """ - index = _get_index(obj) - if isinstance(index, pd.MultiIndex): - # In case of multi index check that at least one level is a datetime. - is_any_datetime = any( - isinstance(level, pd.DatetimeIndex) for level in index.levels - ) - hdbg.dassert(is_any_datetime, msg, *args) - else: - hdbg.dassert_isinstance(index, pd.DatetimeIndex, msg, *args) - - -def dassert_unique_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a unique index. - """ - import helpers.hpandas_utils as hpanutil - - index = _get_index(obj) - if not index.is_unique: - dup_indices = index.duplicated(keep=False) - df_dup = obj[dup_indices] - df_dup_as_str = hpanutil.df_to_str(df_dup) - dup_msg = f"Duplicated rows are:\n{df_dup_as_str}\n" - if msg is None: - msg = dup_msg - else: - msg = dup_msg + msg - hdbg.dassert(index.is_unique, msg=msg, *args) - - -# TODO(gp): @all Add unit tests. - - -def dassert_increasing_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has an increasing index. - """ - import helpers.hpandas_utils as hpanutil - - index = _get_index(obj) - if not index.is_monotonic_increasing: - # Print information about the problematic indices like: - # ``` - # Not increasing indices are: - # full_symbol open high - # timestamp - # 2018-08-17 01:39:00+00:00 binance::BTC_USDT 6339.250000 6348.910000 - # 2018-08-17 00:01:00+00:00 kucoin::ETH_USDT 286.712987 286.712987 - # ``` - # Find the problematic indices. - mask = np.diff(index) <= pd.Timedelta(seconds=0) - mask = np.insert(mask, 0, False) - # TODO(gp): We might want to specify an integer with how many rows before - # after we want to show. - # Shift back to get the previous index that was creating the issue. - mask_shift = np.empty_like(mask) - mask_shift[: len(mask) - 1] = mask[1 : len(mask)] - mask_shift[len(mask) - 1] = False - # - mask = mask | mask_shift - df_dup_as_str = hpanutil.df_to_str(obj[mask]) - dup_msg = f"Not increasing indices are:\n{df_dup_as_str}\n" - if msg is None: - msg = dup_msg - else: - msg = dup_msg + msg - # Dump the data to file for further inspection. - # obj.to_csv("index.csv") - hdbg.dassert(index.is_monotonic_increasing, msg=msg, *args) - - -# TODO(gp): @all Add more info in case of failures and unit tests. - - -def dassert_strictly_increasing_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a strictly increasing index. - """ - dassert_unique_index(obj, msg, *args) - dassert_increasing_index(obj, msg, *args) - - -# TODO(gp): Not sure it's used or useful? - - -def dassert_monotonic_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a monotonic (i.e., strictly increasing or - decreasing index). - """ - dassert_unique_index(obj, msg, *args) - index = _get_index(obj) - cond = index.is_monotonic_increasing or index.is_monotonic_decreasing - hdbg.dassert(cond, msg=msg, *args) - - -# TODO(Paul): @gp -> dassert_datetime_indexed_df - - -def dassert_time_indexed_df( - df: pd.DataFrame, allow_empty: bool, strictly_increasing: bool -) -> None: - """ - Validate that input dataframe is time indexed and well-formed. - - It works for both single and multi-indexed dataframes. - - :param df: dataframe to validate - :param allow_empty: allow empty data frames - :param strictly_increasing: if True the index needs to be strictly - increasing, instead of just increasing - """ - # Verify that Pandas dataframe is passed as input. - hdbg.dassert_isinstance(df, pd.DataFrame) - if not allow_empty: - # Verify that a non-empty dataframe is passed as input. - hdbg.dassert_lt(0, df.shape[0]) - # Verify that the dataframe has at least 1 column. - hdbg.dassert_lte(1, len(df.columns)) - # Verify that the index is increasing. - if strictly_increasing: - dassert_strictly_increasing_index(df) - else: - dassert_increasing_index(df) - # Check that the index is in datetime format. - dassert_index_is_datetime(df) - # Check that the passed timestamp has timezone info. - index_item = df.index[0] - if isinstance(index_item, tuple): - # In case of multi index assume that the first level is a datetime. - index_item = index_item[0] - hdateti.dassert_has_tz(index_item) - - -def dassert_indices_equal( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - allow_series: bool = False, - only_warning: bool = False, -) -> None: - """ - Ensure that `df1` and `df2` share a common index. - - Print the symmetric difference of indices if equality does not hold. - """ - if allow_series: - if isinstance(df1, pd.Series): - df1 = df1.to_frame() - if isinstance(df2, pd.Series): - df2 = df2.to_frame() - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert( - df1.index.equals(df2.index), - "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", - df1.index.difference(df2.index), - df2.index.difference(df1.index), - only_warning=only_warning, - ) - - -def dassert_columns_equal( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - sort_cols: bool = False, - only_warning: bool = False, -) -> None: - """ - Ensure that `df1` and `df2` have the same columns. - - Print the symmetric difference of columns if equality does not hold. - """ - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - if sort_cols: - _LOG.debug("Sorting dataframe columns.") - df1 = df1.sort_index(axis=1) - df2 = df2.sort_index(axis=1) - hdbg.dassert( - df1.columns.equals(df2.columns), - "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", - df1.columns.difference(df2.columns), - df2.columns.difference(df1.columns), - only_warning=only_warning, - ) - - -def dassert_axes_equal( - df1: pd.DataFrame, df2: pd.DataFrame, *, sort_cols: bool = False -) -> None: - """ - Ensure that `df1` and `df2` have the same index and same columns. - """ - dassert_indices_equal(df1, df2) - dassert_columns_equal(df1, df2, sort_cols=sort_cols) - - -# TODO(Grisha): instead of passing `rtol` and `atol` use `**allclose_kwargs: Dict[str, Any]`. - - -def dassert_series_type_is( - srs: pd.Series, - type_: type, - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the data type of `srs` is `type_`. - - Examples of valid series types are - - np.float64 - - np.int64 - - pd.Timestamp - """ - hdbg.dassert_isinstance(srs, pd.Series) - hdbg.dassert_isinstance(type_, type) - hdbg.dassert_eq(srs.dtype.type, type_, msg, *args) - - -def dassert_series_type_in( - srs: pd.Series, - types: List[type], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the data type of `srs` is one of the types in `types`. - """ - hdbg.dassert_isinstance(srs, pd.Series) - hdbg.dassert_container_type(types, list, type) - hdbg.dassert_in(srs.dtype.type, types, msg, *args) - - -def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None: - """ - Ensure that remapping rows / columns is valid. - """ - hdbg.dassert_isinstance(to_remap, list) - hdbg.dassert_isinstance(remap_dict, dict) - # All the rows / columns to remap, should exist. - hdbg.dassert_is_subset( - remap_dict.keys(), - to_remap, - "Keys to remap should be a subset of existing columns", - ) - # The mapping is invertible. - hdbg.dassert_no_duplicates(remap_dict.keys()) - hdbg.dassert_no_duplicates(remap_dict.values()) - # Rows / columns should not be remapped on existing rows / columns. - hdbg.dassert_not_intersection(remap_dict.values(), to_remap) - - -def dassert_approx_eq( - val1: Any, - val2: Any, - rtol: float = 1e-05, - atol: float = 1e-08, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # Approximate comparison is not applicable for strings. - hdbg.dassert_is_not(type(val1), str) - hdbg.dassert_is_not(type(val2), str) - # Convert iterable inputs to list in order to comply with numpy. - if isinstance(val1, Iterable): - val1 = list(val1) - if isinstance(val2, Iterable): - val2 = list(val2) - cond = np.allclose( - np.array(val1), np.array(val2), rtol=rtol, atol=atol, equal_nan=True - ) - if not cond: - txt = f"'{val1}'\n==\n'{val2}' rtol={rtol}, atol={atol}" - hdbg._dfatal(txt, msg, *args, only_warning=only_warning) # type: ignore - - -# ############################################################################# - - -def dassert_is_days( - timedelta: pd.Timedelta, *, min_num_days: Optional[int] = None -) -> None: - """ - Assert that a timedelta represents an integer number of days. - - :param timedelta: the timedelta to check - :param min_num_days: optional minimum number of days to enforce - """ - hdbg.dassert( - (timedelta / pd.Timedelta(days=1)).is_integer(), - "timedelta='%s' is not an integer number of days", - timedelta, - ) - if min_num_days is not None: - hdbg.dassert_lte(1, timedelta.days) - - -# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py deleted file mode 100644 index 6c73c8988..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py +++ /dev/null @@ -1,302 +0,0 @@ -""" -Import as: - -import helpers.hpandas_display as hpandisp -""" - -import logging -import os -from typing import List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hlist as hlist -import helpers.hlogging as hloggin -import helpers.hsystem as hsystem - -_LOG = hloggin.getLogger(__name__) - - -# Invariant: -# - When we are in a notebook we want to: -# - Convert `_LOG.info()` in `print()` using `hnotebo.set_logger_to_print()` -# - Display any dataframe using the `hpandas.display` function -# - Do not return any value -# -# - When we are not in a notebook we want to: -# - Use `_LOG.info()` and `_LOG.debug()` to log messages -# - Print the dataframe with `_LOG.debug()` -# - Return the result through a `return` statement -# -# - Each function should have a `log_level` parameter to control the logging level. -# - If `log_level` is not provided, it should be set to `logging.DEBUG` if we are not in a notebook, -# and `logging.INFO` if we are in a notebook. - - -def get_df_signature(df: pd.DataFrame, num_rows: int = 6) -> str: - """ - Compute a simple signature of a dataframe in string format. - - The signature contains metadata about dataframe size and certain - amount of rows from start and end of a dataframe. It is used for - testing purposes. - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - text: List[str] = [f"df.shape={str(df.shape)}"] - with pd.option_context( - "display.max_colwidth", int(1e6), "display.max_columns", None - ): - # If dataframe size exceeds number of rows, show only subset in form of - # first and last rows. Otherwise, whole dataframe is shown. - if len(df) > num_rows: - text.append(f"df.head=\n{df.head(num_rows // 2)}") - text.append(f"df.tail=\n{df.tail(num_rows // 2)}") - else: - text.append(f"df.full=\n{df}") - text: str = "\n".join(text) - return text - - -# ############################################################################# - - -def convert_df_to_json_string( - df: pd.DataFrame, - n_head: Optional[int] = 10, - n_tail: Optional[int] = 10, - columns_order: Optional[List[str]] = None, -) -> str: - """ - Convert dataframe to pretty-printed JSON string. - - To select all rows of the dataframe, pass `n_head` as None. - - :param df: dataframe to convert - :param n_head: number of printed top rows - :param n_tail: number of printed bottom rows - :param columns_order: order for the KG columns sort - :return: dataframe converted to JSON string - """ - # Append shape of the initial dataframe. - shape = f"original shape={df.shape}" - # Reorder columns. - if columns_order is not None: - hdbg.dassert_set_eq(columns_order, df.columns) - df = df[columns_order] - # Select head. - if n_head is not None: - head_df = df.head(n_head) - else: - # If no n_head provided, append entire dataframe. - head_df = df - # Transform head to json. - head_json = head_df.to_json( - orient="index", - force_ascii=False, - indent=4, - default_handler=str, - date_format="iso", - date_unit="s", - ) - if n_tail is not None: - # Transform tail to json. - tail = df.tail(n_tail) - tail_json = tail.to_json( - orient="index", - force_ascii=False, - indent=4, - default_handler=str, - date_format="iso", - date_unit="s", - ) - else: - # If no tail specified, append an empty string. - tail_json = "" - # Join shape and dataframe to single string. - output_str = "\n".join([shape, "Head:", head_json, "Tail:", tail_json]) - return output_str - - -# ############################################################################# - - -def convert_df_to_png( - df: pd.DataFrame, - file_path: str, - index: bool = True, - table_conversion: str = "kaleido", - dpi: int = 300, - print_markdown: bool = False, - markdown_path_prefix: Optional[str] = None, -) -> None: - """ - Convert a dataframe to a PNG image file. - - Uses the dataframe_image library to render the DataFrame as an image - with HTML styling. - - :param df: dataframe to convert - :param file_path: path where the PNG image will be saved - :param index: whether to include the index in the image - :param table_conversion: conversion method ('kaleido', 'chrome', or 'playwright') - :param dpi: resolution in dots per inch (default: 300 for print quality, - higher values = higher resolution and larger file size) - :param print_markdown: if True, print markdown image reference like - ![](path/to/image.png) - :param markdown_path_prefix: optional path to prepend to the image path in - the markdown reference (e.g., '../figures/' or 'assets/') - """ - # Keep this import here since it's an optional one. - import dataframe_image as dfi - - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(file_path, str) - # Ensure the output directory exists. - hio.create_enclosing_dir(file_path, incremental=True) - # Prepare dataframe for export, handling index parameter. - export_df = df - if not index: - # Reset index to exclude it from the image. - export_df = df.reset_index(drop=True) - dfi.export(export_df, file_path, table_conversion=table_conversion, dpi=dpi) - # Use print instead of _LOG.info. - print(f"PNG image saved to: '{file_path}'") - if print_markdown: - # Construct the markdown path. - markdown_path = file_path - if markdown_path_prefix: - markdown_path = os.path.join(markdown_path_prefix, file_path) - markdown_ref = f"![]({markdown_path})" - # Use print instead of _LOG.info. - print(markdown_ref) - - -# ############################################################################# - - -def print_or_display( - df: pd.DataFrame, - *, - index: bool = True, - as_txt: bool = False, - log_level: int = logging.INFO, -) -> None: - """ - Print or display a dataframe in a notebook at the given log level. - - :param df: dataframe to print - :param index: whether to show the index or not - :param as_txt: print if True, otherwise render as usual HTML table - :param log_level: log level at which to print the dataframe - """ - # print(_LOG.getEffectiveLevel()) - # print(log_level) - # print(_LOG.isEnabledFor(log_level)) - if hsystem.is_running_in_ipynb() and not as_txt: - from IPython.display import display, HTML - - if _LOG.isEnabledFor(log_level): - display(HTML(df.to_html(index=index))) - else: - _LOG.log(log_level, "%s", df.to_string(index=index)) - - -def display_df( - df: pd.DataFrame, - *, - index: bool = True, - inline_index: bool = False, - max_lines: Optional[int] = 5, - tag: Optional[str] = None, - mode: Optional[str] = None, - as_txt: bool = False, - log_level: int = logging.INFO, -) -> None: - """ - Display a Pandas object (series, df, panel) in a better way than the - ipython display, e.g., by printing head and tail of the dataframe, and - other formatting options. - - :param index: whether to show the index or not - :param inline_index: make the index part of the dataframe. This is used - when cutting and pasting to other applications, which are not happy - with the output pandas HTML form - :param max_lines: number of lines to print - :param mode: use different formats temporarily overriding the default, e.g., - - "all_rows": print all the rows - - "all_cols": print all the columns - - "all": print the entire df (it could be huge) - :param as_txt: print if True, otherwise render as usual html table - :param log_level: log level at which to print the dataframe - """ - # Convert Series to DataFrame if needed. - if isinstance(df, pd.Series): - df = pd.DataFrame(df) - # - hdbg.dassert_type_is(df, pd.DataFrame) - hdbg.dassert_eq( - hlist.find_duplicates(df.columns.tolist()), - [], - msg="Find duplicated columns", - ) - if tag is not None: - _LOG.log(log_level, "tag=%s", tag) - # Shrink the dataframe to the number of lines specified by `max_lines`, - # if needed. - if max_lines is not None: - hdbg.dassert_lte(1, max_lines) - if df.shape[0] > max_lines: - # log.error("Printing only top / bottom %s out of %s rows", - # max_lines, df.shape[0]) - ellipses = pd.DataFrame( - [["..."] * len(df.columns)], columns=df.columns, index=["..."] - ) - df = pd.concat( - [ - df.head(int(max_lines / 2)), - ellipses, - df.tail(int(max_lines / 2)), - ], - axis=0, - ) - # Inline the index, if needed. - if inline_index: - df = df.copy() - # Copy the index to a column and don't print the index. - if df.index.name is None: - col_name = "." - else: - col_name = df.index.name - df.insert(0, col_name, df.index) - df.index.name = None - index = False - # Print or display the dataframe. - if mode is None: - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - elif mode == "all_rows": - with pd.option_context( - "display.max_rows", None, "display.max_columns", 3 - ): - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - elif mode == "all_cols": - with pd.option_context( - "display.max_colwidth", int(1e6), "display.max_columns", None - ): - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - elif mode == "all": - with pd.option_context( - "display.max_rows", - int(1e6), - "display.max_columns", - 3, - "display.max_colwidth", - int(1e6), - "display.max_columns", - None, - ): - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - else: - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - raise ValueError("Invalid mode=%s" % mode) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py deleted file mode 100644 index a1049d77f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py +++ /dev/null @@ -1,128 +0,0 @@ -""" -Import as: - -import helpers.hpandas_io as hpanio -""" - -from typing import Any, Union - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hprint as hprint - -# Handle different versions of s3fs where core module may be at different -# locations. -try: - import s3fs - - # Try to access s3fs.core to check if it exists - if hasattr(s3fs, "core"): - from s3fs.core import S3File, S3FileSystem - else: - # In newer versions, classes might be directly in s3fs module. - try: - from s3fs import S3File, S3FileSystem - except ImportError: - # Fallback to dynamic import - S3File = getattr(s3fs, "S3File", None) - S3FileSystem = getattr(s3fs, "S3FileSystem", None) -except ImportError: - # If s3fs is not available, define dummy classes for type hints. - s3fs = None - - class S3File: - pass - - class S3FileSystem: - pass - - -_LOG = hloggin.getLogger(__name__) - - -def read_csv_to_df( - stream: Union[str, S3File, S3FileSystem], - *args: Any, - **kwargs: Any, -) -> pd.DataFrame: - """ - Read a CSV file into a `pd.DataFrame`. - - :param stream: file path, S3File, or S3FileSystem object - :param args: additional arguments passed to pd.read_csv() - :param kwargs: additional keyword arguments passed to pd.read_csv() - :return: dataframe with CSV contents - """ - # Gets filename from stream if it is not already a string, - # so it can be inspected for extension type. - file_name = stream if isinstance(stream, str) else vars(stream)["path"] - # Handle zipped files. - if any(file_name.endswith(ext) for ext in (".gzip", ".gz", ".tgz")): - hdbg.dassert_not_in("compression", kwargs) - kwargs["compression"] = "gzip" - elif file_name.endswith(".zip"): - hdbg.dassert_not_in("compression", kwargs) - kwargs["compression"] = "zip" - # Read. - _LOG.debug(hprint.to_str("args kwargs")) - df = pd.read_csv(stream, *args, **kwargs) - return df - - -def read_parquet_to_df( - stream: Union[str, S3File, S3FileSystem], - *args: Any, - **kwargs: Any, -) -> pd.DataFrame: - """ - Read a Parquet file into a `pd.DataFrame`. - - :param stream: file path, S3File, or S3FileSystem object - :param args: additional arguments passed to pd.read_parquet() - :param kwargs: additional keyword arguments passed to pd.read_parquet() - :return: dataframe with Parquet contents - """ - # Read. - _LOG.debug(hprint.to_str("args kwargs")) - df = pd.read_parquet(stream, *args, **kwargs) - return df - - -# ############################################################################# - - -# TODO(Paul): Remove this since it's a dup of hgoogle_drive_api.py. - - -def to_gsheet( - df: pd.DataFrame, - tab_name: str, - gsheet_tab_name: str, - overwrite: bool, -) -> None: - """ - Save a dataframe to a Google sheet. - - :param df: the dataframe to save to a Google sheet - :param tab_name: the name of the Google sheet to save the df - into; the Google sheet with this name must already exist on the - Google Drive - :param gsheet_tab_name: the name of the sheet in the Google sheet - :param overwrite: if True, the contents of the sheet are erased - before saving the dataframe into it; if False, the dataframe is - appended to the contents of the sheet - """ - import gspread_pandas - - spread = gspread_pandas.Spread( - tab_name, sheet=gsheet_tab_name, create_sheet=True - ) - if overwrite: - spread.clear_sheet() - else: - sheet_contents = spread.sheet_to_df(index=None) - combined_df = pd.concat([sheet_contents, df]) - df = combined_df.drop_duplicates() - spread.df_to_sheet(df, index=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py deleted file mode 100644 index f139a3ba9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Import as: - -import helpers.hpandas_multiindex as hpanmult -""" - -import logging -from typing import Any, Dict, List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hpandas_compare as hpancomp -import helpers.hpandas_dassert as hpandass -import helpers.hpandas_transform as hpantran -import helpers.hpandas_utils as hpanutil -import helpers.hprint as hprint - -_LOG = hloggin.getLogger(__name__) - -RowsValues = List[List[str]] - -# ############################################################################# -# Functions -# ############################################################################# - - -def add_multiindex_col( - df: pd.DataFrame, multiindex_col: pd.DataFrame, col_name: str -) -> pd.DataFrame: - """ - Add column to a multiindex DataFrame. - - Note: each column in a multiindex DataFrame is a DataFrame itself. - - :param df: multiindex df - :param multiindex_col: column (i.e. singleindex df) of a multiindex df - :param col_name: name of a new column - :return: a multiindex DataFrame with a new column - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - hdbg.dassert_isinstance(multiindex_col, pd.DataFrame) - hdbg.dassert_isinstance(col_name, str) - hdbg.dassert_not_in(col_name, df.columns) - for col in multiindex_col.columns: - df[col_name, col] = multiindex_col[col] - return df - - -def multiindex_df_info( - df: pd.DataFrame, - *, - log_level: int = logging.INFO, - **list_to_str_kwargs: Dict[str, Any], -) -> str: - """ - Report information about a multi-index df. - """ - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - columns_level0 = df.columns.levels[0] - columns_level1 = df.columns.levels[1] - rows = df.index - ret = [] - ret.append( - f"shape={len(columns_level0)} x {len(columns_level1)} x {len(rows)}" - ) - ret.append( - "columns_level0=" - + hprint.list_to_str2(columns_level0, **list_to_str_kwargs) - ) - ret.append( - "columns_level1=" - + hprint.list_to_str2(columns_level1, **list_to_str_kwargs) - ) - ret.append("rows=" + hprint.list_to_str2(rows, **list_to_str_kwargs)) - if isinstance(df.index, pd.DatetimeIndex): - # Display timestamp info. - start_timestamp = df.index.min() - end_timestamp = df.index.max() - frequency = df.index.freq - if frequency is None: - # Try to infer frequency. - frequency = pd.infer_freq(df.index) - ret.append(f"start_timestamp={start_timestamp}") - ret.append(f"end_timestamp={end_timestamp}") - ret.append(f"frequency={frequency}") - ret = "\n".join(ret) - _LOG.log(log_level, ret) - return ret - - -def subset_multiindex_df( - df: pd.DataFrame, - *, - # TODO(gp): Consider passing trim_df_kwargs as kwargs. - start_timestamp: Optional[pd.Timestamp] = None, - end_timestamp: Optional[pd.Timestamp] = None, - columns_level0: hpanutil.ColumnSet = None, - columns_level1: hpanutil.ColumnSet = None, - keep_order: bool = False, -) -> pd.DataFrame: - """ - Filter multi-index DataFrame by timestamp index and column levels. - - :param start_timestamp: see `trim_df()` - :param end_timestamp: see `trim_df()` - :param columns_level0: column names that corresponds to `df.columns.levels[0]` - - `None` means no filtering - :param columns_level1: column names that corresponds to `df.columns.levels[1]` - - `None` means no filtering - :param keep_order: see `hpandas_utils.resolve_column_names()` - :return: filtered DataFrame - """ - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - # Filter by timestamp. - allow_empty = False - strictly_increasing = False - hpandass.dassert_time_indexed_df(df, allow_empty, strictly_increasing) - df = hpantran.trim_df( - df, - ts_col_name=None, - start_ts=start_timestamp, - end_ts=end_timestamp, - left_close=True, - right_close=True, - ) - # Filter level 0. - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - all_columns_level0 = df.columns.levels[0] - columns_level0 = hpanutil.resolve_column_names( - columns_level0, all_columns_level0, keep_order=keep_order - ) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_is_subset(columns_level0, df.columns.levels[0]) - df = df[columns_level0] - # Filter level 1. - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - all_columns_level1 = df.columns.levels[1] - columns_level1 = hpanutil.resolve_column_names( - columns_level1, all_columns_level1, keep_order=keep_order - ) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_is_subset(columns_level1, df.columns.levels[1]) - df = df.swaplevel(axis=1)[columns_level1].swaplevel(axis=1) - return df - - -# ############################################################################# - - -def compare_multiindex_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - subset_multiindex_df_kwargs: Optional[Dict[str, Any]] = None, - compare_dfs_kwargs: Optional[Dict[str, Any]] = None, -) -> pd.DataFrame: - """ - - Subset both multi-index dfs, if needed - - Compare dfs - - :param subset_multiindex_df: params for `subset_multiindex_df()` - :param compare_dfs_kwargs: params for `compare_dfs()` - :return: df with differences as values - """ - # Subset dfs. - if subset_multiindex_df_kwargs is None: - subset_multiindex_df_kwargs = {} - subset_df1 = subset_multiindex_df(df1, **subset_multiindex_df_kwargs) - subset_df2 = subset_multiindex_df(df2, **subset_multiindex_df_kwargs) - # Compare dfs. - if compare_dfs_kwargs is None: - compare_dfs_kwargs = {} - diff_df = hpancomp.compare_dfs(subset_df1, subset_df2, **compare_dfs_kwargs) - return diff_df - - -# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py deleted file mode 100644 index b0a6bf9d8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py +++ /dev/null @@ -1,527 +0,0 @@ -""" -Import as: - -import helpers.hpandas_stats as hpanstat -""" - -import logging -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hpandas_dassert as hpandass -import helpers.hpandas_transform as hpantran -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = hloggin.getLogger(__name__) - - -def compute_duration_df( - tag_to_df: Dict[str, pd.DataFrame], - *, - intersect_dfs: bool = False, - valid_intersect: bool = False, -) -> Tuple[pd.DataFrame, Dict[str, pd.DataFrame]]: - """ - Compute a df with some statistics about the time index. - - E.g., - ``` - min_index max_index min_valid_index max_valid_index - tag1 2022-01-01 21:00:00+00:00 ... - tag2 2022-01-01 21:02:00+00:00 ... - tag3 2022-01-01 21:01:00+00:00 ... - ``` - - :param intersect_dfs: return a transformed dict with the intersection of - indices of all the dfs if True, otherwise return the input data as is - :param valid_intersect: intersect indices without NaNs if True, otherwise - intersect indices as is - :return: timestamp stats and updated dict of dfs, see `intersect_dfs` param - """ - hdbg.dassert_isinstance(tag_to_df, Dict) - # Create df and assign columns. - data_stats = pd.DataFrame() - min_col = "min_index" - max_col = "max_index" - min_valid_index_col = "min_valid_index" - max_valid_index_col = "max_valid_index" - # Collect timestamp info from all dfs. - for tag in tag_to_df.keys(): - # Check that the passed timestamp has timezone info. - first_idx = tag_to_df[tag].index[0] - hdateti.dassert_has_tz(cast(pd.Timestamp, first_idx)) - hpandass.dassert_index_is_datetime(tag_to_df[tag]) - # Compute timestamp stats. - data_stats.loc[tag, min_col] = tag_to_df[tag].index.min() - data_stats.loc[tag, max_col] = tag_to_df[tag].index.max() - data_stats.loc[tag, min_valid_index_col] = ( - tag_to_df[tag].dropna().index.min() - ) - data_stats.loc[tag, max_valid_index_col] = ( - tag_to_df[tag].dropna().index.max() - ) - # Make a copy so we do not modify the original data. - tag_to_df_updated = tag_to_df.copy() - # Change the initial dfs with intersection. - if intersect_dfs: - if valid_intersect: - # Assign start, end date column according to specs. - min_col = min_valid_index_col - max_col = max_valid_index_col - # The start of the intersection will be the max value amongt all start dates. - intersection_start_date = cast(pd.Timestamp, data_stats[min_col].max()) - # The end of the intersection will be the min value amongt all end dates. - intersection_end_date = cast(pd.Timestamp, data_stats[max_col].min()) - for tag in tag_to_df_updated.keys(): - df = hpantran.trim_df( - tag_to_df_updated[tag], - ts_col_name=None, - start_ts=intersection_start_date, - end_ts=intersection_end_date, - left_close=True, - right_close=True, - ) - tag_to_df_updated[tag] = df - return data_stats, tag_to_df_updated - - -# ############################################################################# - - -# TODO(gp): Remove this since it's in Google API. - - -def compute_weighted_sum( - dfs: Dict[str, pd.DataFrame], - weights: pd.DataFrame, - *, - index_mode: str = "assert_equal", -) -> Dict[str, pd.DataFrame]: - """ - Compute weighted sums of `dfs` using `weights`. - - :param dfs: dataframes keyed by id; all dfs should have the same cols, - indices are handled based on the `index_mode` - :param weights: float weights indexed by id with unique col names - :param index_mode: same as `mode` in `apply_index_mode()` - :return: weighted sums keyed by weight col names - """ - hdbg.dassert_isinstance(dfs, dict) - hdbg.dassert(dfs, "dictionary of dfs must be nonempty") - # Get a dataframe from the dictionary and record its index and columns. - id_ = list(dfs)[0] - hdbg.dassert_isinstance(id_, str) - df = dfs[id_] - hdbg.dassert_isinstance(df, pd.DataFrame) - cols = df.columns - # Sanity-check dataframes in dictionary. - for key, value in dfs.items(): - hdbg.dassert_isinstance(key, str) - hdbg.dassert_isinstance(value, pd.DataFrame) - # The reference df is not modified. - _, value = hpantran.apply_index_mode(df, value, index_mode) - hdbg.dassert( - value.columns.equals(cols), - "Column equality fails for keys=%s, %s", - id_, - key, - ) - # Sanity-check weights. - hdbg.dassert_isinstance(weights, pd.DataFrame) - hdbg.dassert_eq(weights.columns.nlevels, 1) - hdbg.dassert(not weights.columns.has_duplicates) - hdbg.dassert_set_eq(weights.index.to_list(), list(dfs)) - # Create a multiindexed dataframe to facilitate computing the weighted sums. - weighted_dfs = {} - combined_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys()) - # TODO(Paul): Consider relaxing the NaN-handling. - for col in weights.columns: - weighted_combined_df = combined_df.multiply(weights[col], level=0) - weighted_sums = weighted_combined_df.groupby(axis=1, level=1).sum( - min_count=len(dfs) - ) - weighted_dfs[col] = weighted_sums - return weighted_dfs - - -def remap_obj( - obj: Union[pd.Series, pd.Index], - map_: Dict[Any, Any], - **kwargs: Any, -) -> pd.Series: - """ - Substitute each value of an object with another value from a dictionary. - - :param obj: a Series or Index to remap values in - :param map_: dictionary mapping old values to new values - :param kwargs: additional keyword arguments passed to pd.Series.map() - :return: remapped pandas series - """ - hdbg.dassert_lte(1, obj.shape[0]) - # TODO(Grisha): consider extending for other mapping types supported by - # `pd.Series.map`. - hdbg.dassert_isinstance(map_, dict) - # Check that every element of the object is in the mapping. - hdbg.dassert_is_subset(obj, map_.keys()) - new_srs = obj.map(map_, **kwargs) - return cast(pd.Series, new_srs) - - -def get_random_df( - num_cols: int, - seed: Optional[int] = None, - date_range_kwargs: Optional[Dict[str, Any]] = None, -) -> pd.DataFrame: - """ - Compute df with random data with `num_cols` columns and index obtained by - calling `pd.date_range(**kwargs)`. - - :param num_cols: the number of columns in a DataFrame to generate - :param seed: see `random.seed()` - :param date_range_kwargs: kwargs for `pd.date_range()` - """ - if seed: - np.random.seed(seed) - if date_range_kwargs is None: - date_range_kwargs = {} - dt = pd.date_range(**date_range_kwargs) - df = pd.DataFrame(np.random.rand(len(dt), num_cols), index=dt) - return df - - -# ############################################################################# - - -def heatmap_df(df: pd.DataFrame, *, axis: Any = None) -> Any: - """ - Colorize a df with a heatmap depending on the numeric values. - - :param axis: along which axis to compute the heatmap - - 0 colorize along rows - - 1 colorize along columns - - None: colorize everything - """ - # Keep it here to avoid long start up times. - import seaborn as sns - - cm = sns.diverging_palette(5, 250, as_cmap=True) - return df.style.background_gradient(axis=axis, cmap=cm) - - -def to_perc(vals: Union[List, pd.Series], **perc_kwargs: Any) -> str: - """ - Report percentage of True values in a list or series. - - :param vals: list or series of boolean values - :param perc_kwargs: additional keyword arguments passed to hprint.perc() - :return: formatted percentage string - """ - if isinstance(vals, list): - vals = pd.Series(vals) - ret = hprint.perc(vals.sum(), len(vals), **perc_kwargs) - return cast(str, ret) - - -def add_end_download_timestamp( - obj: Union[pd.DataFrame, Dict], *, timezone: str = "UTC" -) -> Union[pd.DataFrame, Dict]: - """ - Add a column 'end_download_timestamp' to the DataFrame with the current - time. - - :param obj: The DataFrame to which the column will be added. - :param timezone: The timezone for the current time. Defaults to - 'UTC'. - """ - # Get current timestamp. - current_ts = hdateti.get_current_time(timezone) - # Set value of end_download_timestamp. - obj["end_download_timestamp"] = current_ts - return obj - - -def get_value_counts_stats_df( - df: pd.DataFrame, col_name: str, *, num_rows: int = 10 -) -> pd.DataFrame: - """ - Get the value counts of `col_name` in `df`. - - :param df: The DataFrame to get the value counts of `col_name` from. - :param col_name: The column name to get the value counts of. - :param num_rows: The number of rows to return. - :return: A DataFrame with the value counts of `col_name` in `df`. E.g., - ``` - count pct [%] - Venture Fund 1004 25.100 - Financial Services 274 6.850 - Venture Capital & Private Equity 176 4.400 - Computer Software 163 4.075 - Higher Education 133 3.325 - Information Technology & Services 73 1.825 - ``` - """ - hdbg.dassert_in(col_name, df.columns) - stats_df = df[col_name].value_counts().to_frame() - stats_df["pct [%]"] = stats_df["count"] / len(df) * 100 - if num_rows > 0: - stats_df = stats_df.head(num_rows) - return stats_df - - -def display_value_counts_stats_df( - df: pd.DataFrame, col_names: Union[str, List[str]], *, num_rows: int = 10 -) -> None: - if isinstance(col_names, list): - for col_name in col_names: - display_value_counts_stats_df(df, col_name, num_rows=num_rows) - return - import IPython.display - - hdbg.dassert_isinstance(col_names, str) - _LOG.info("# %s", col_names) - stats_df = get_value_counts_stats_df(df, col_names, num_rows=num_rows) - IPython.display.display(stats_df) - - -# ############################################################################# -# Functions moved from core/explore.py -# ############################################################################# - - -def report_zero_nan_inf_stats( - df: pd.DataFrame, - *, - zero_threshold: float = 1e-9, - verbose: bool = False, - as_txt: bool = False, - dbg_log_level: int = logging.DEBUG, -) -> pd.DataFrame: - """ - Report count and percentage about zeros, nans, infs for a df. - - :param df: dataframe to report the stats of - :param zero_threshold: threshold for classifying values as "zero" - :param verbose: if True, print the stats - :param as_txt: if True, print the stats as text - :param dbg_log_level: log level at which to print the debug info - :return: a DataFrame with the stats - """ - # Convert Series to DataFrame if needed. - if isinstance(df, pd.Series): - df = pd.DataFrame(df) - # Print stats about the input dataframe. - _LOG.log(dbg_log_level, "index in [%s, %s]", df.index.min(), df.index.max()) - num_rows = df.shape[0] - _LOG.log(dbg_log_level, "num_rows=%s", hprint.thousand_separator(num_rows)) - _LOG.log(dbg_log_level, "data=") - import helpers.hpandas_display as hpandisp - - hpandisp.display_df(df, as_txt=as_txt, log_level=dbg_log_level) - # Compute date-based stats only if index is datetime. - if isinstance(df.index, pd.DatetimeIndex): - # TODO(gp): Can we do this faster? - dates = [d.date() for d in df.index] - num_days = len(set(dates)) - _LOG.log(dbg_log_level, "num_days=%s", num_days) - num_weekdays = len(set(d for d in dates if d.weekday() < 5)) - _LOG.log(dbg_log_level, "num_weekdays=%s", num_weekdays) - # - stats_df = pd.DataFrame(None, index=df.columns) - if False: - # Find the index of the first non-nan value. - df = df.applymap(lambda x: not np.isnan(x)) - min_idx = df.idxmax(axis=0) - min_idx.name = "min_idx" - # Find the index of the last non-nan value. - max_idx = df.reindex(index=df.index[::-1]).idxmax(axis=0) - max_idx.name = "max_idx" - stats_df["num_rows"] = num_rows - # - num_zeros = (np.abs(df) < zero_threshold).sum(axis=0) - if verbose: - stats_df["num_zeros"] = num_zeros - stats_df["zeros [%]"] = (100.0 * num_zeros / num_rows).apply( - hprint.round_digits - ) - # - num_nans = np.isnan(df).sum(axis=0) - if verbose: - stats_df["num_nans"] = num_nans - stats_df["nans [%]"] = (100.0 * num_nans / num_rows).apply( - hprint.round_digits - ) - # - num_infs = np.isinf(df).sum(axis=0) - if verbose: - stats_df["num_infs"] = num_infs - stats_df["infs [%]"] = (100.0 * num_infs / num_rows).apply( - hprint.round_digits - ) - # - num_valid = df.shape[0] - num_zeros - num_nans - num_infs - if verbose: - stats_df["num_valid"] = num_valid - stats_df["valid [%]"] = (100.0 * num_valid / num_rows).apply( - hprint.round_digits - ) - # - _LOG.log(dbg_log_level, "stats_df=\n%s", stats_df) - return stats_df - - -def pvalue_to_stars(pval: Optional[float]) -> str: - """ - Convert p-value to star notation for statistical significance. - - :param pval: p-value to convert - :return: star notation (* to ****) or ? for non-significant, NA for NaN - """ - if pval is None or np.isnan(pval): - stars = "NA" - else: - hdbg.dassert_lte(0.0, pval) - hdbg.dassert_lte(pval, 1.0) - if pval < 0.005: - # More than 99.5% confidence. - stars = "****" - elif pval < 0.01: - # More than 99% confidence. - stars = "***" - elif pval < 0.05: - # More than 95% confidence. - stars = "**" - elif pval < 0.1: - # More than 90% confidence. - stars = "*" - else: - stars = "?" - return stars - - -def format_ols_regress_results(regr_res: Optional[pd.DataFrame]) -> pd.DataFrame: - """ - Format OLS regression results into a readable DataFrame. - - :param regr_res: regression results dictionary with coeffs, pvals, rsquared, etc. - :return: formatted DataFrame with coefficients and statistics - """ - if regr_res is None: - _LOG.warning("regr_res=None: skipping") - df = pd.DataFrame(None) - return df - row: List[Union[float, str]] = [ - "%.3f (%s)" % (coeff, pvalue_to_stars(pval)) - for (coeff, pval) in zip(regr_res["coeffs"], regr_res["pvals"]) - ] - row.append(float("%.2f" % (regr_res["rsquared"] * 100.0))) - row.append(float("%.2f" % (regr_res["adj_rsquared"] * 100.0))) - col_names = regr_res["param_names"] + ["R^2 [%]", "Adj R^2 [%]"] - df = pd.DataFrame([row], columns=col_names) - return df - - -# ############################################################################# -# Exploratory analysis functions -# ############################################################################# - - -def _get_unique_values_stats(df: pd.DataFrame) -> pd.DataFrame: - """ - Get unique values count and percentage for each column. - - :param df: dataframe to analyze - :return: DataFrame with num_unique and unique [%] columns - """ - stats_df = pd.DataFrame(None, index=df.columns) - num_unique = df.nunique() - stats_df["num_unique"] = num_unique - stats_df["unique [%]"] = (100.0 * num_unique / df.shape[0]).apply( - hprint.round_digits - ) - return stats_df - - -def explore_dataframe( - df: pd.DataFrame, - *, - show_distributions: bool = False, - show_correlations: bool = False, - zero_threshold: float = 1e-9, - dbg_log_level: int = logging.DEBUG, -) -> Optional[pd.DataFrame]: - """ - Perform comprehensive exploratory analysis of a DataFrame. - - Computes data quality metrics (zeros, NaNs, infinities, valid data), - optionally plots distributions of high-variability columns, and - optionally displays a correlation matrix. - - :param df: Input dataframe to analyze - :param show_distributions: If True, plots distributions of top-variability - columns in a 3-column grid - :param show_correlations: If True, displays correlation matrix as a heatmap - :param zero_threshold: Threshold for classifying values as "zero" in - quality report - :return: Statistics DataFrame from report_zero_nan_inf_stats with columns: - num_rows, zeros [%], nans [%], infs [%], valid [%] - """ - import matplotlib.pyplot as plt - from IPython.display import display - - hdbg.dassert_lt(0, len(df), "Dataframe is empty") - # Compute and display data quality statistics. - stats_df = report_zero_nan_inf_stats( - df, zero_threshold=zero_threshold, dbg_log_level=dbg_log_level - ) - # Add information about the number of unique values and percentage of unique values for each column. - unique_stats_df = _get_unique_values_stats(df) - stats_df = pd.concat([stats_df, unique_stats_df], axis=1) - if hsystem.is_running_in_ipynb(): - _LOG.info("stats_df=") - display(stats_df) - _LOG.debug("stats_df=\n%s", stats_df) - # Plot distributions if requested. - if hsystem.is_running_in_ipynb(): - if show_distributions: - _LOG.info("Univariate distributions:") - numeric_cols = df.select_dtypes(include="number").columns.tolist() - if len(numeric_cols) > 0: - # Compute standard deviation and select top columns. - std_vals = df[numeric_cols].std().sort_values(ascending=False) - num_to_plot = len(numeric_cols) - top_cols = std_vals.head(num_to_plot).index.tolist() - # Create grid of subplots. - import helpers.hmatplotlib as hmatplo - - fig, axes = hmatplo.get_multiple_plots( - num_to_plot, 3, y_scale=3.5 - ) - _ = fig - for i, col in enumerate(top_cols): - ax = axes[i] - col_data = df[col].dropna() - weights = np.ones_like(col_data) / len(col_data) * 100 - ax.hist(col_data, bins=30, weights=weights, edgecolor="k") - ax.set_title(col) - ax.set_xlabel("Value") - ax.set_ylabel("Percentage [%]") - plt.tight_layout() - plt.show() - # Display correlation matrix if requested. - if show_correlations: - numeric_df = df.select_dtypes(include="number") - if len(numeric_df.columns) >= 2: - corr_matrix = numeric_df.corr() - _LOG.info("Correlation matrix:") - # TODO(gp): Improve the plot changing the number of digits. - corr_heatmap = heatmap_df(corr_matrix) - display(corr_heatmap) - if hsystem.is_running_in_ipynb(): - return None - return stats_df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py deleted file mode 100644 index 6eae1fa57..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py +++ /dev/null @@ -1,1023 +0,0 @@ -""" -Import as: - -import helpers.hpandas_transform as hpantran -""" - -import csv -import logging -import math -import random -import re -from typing import ( - Any, - Callable, - Collection, - Dict, - Iterator, - List, - Optional, - Tuple, - Union, -) - -import pandas as pd - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin - -# TODO(ai_gp): Import the file and not the package to avoid cyclic imports. -import helpers.hpandas_conversion as hpanconv -import helpers.hprint as hprint - -_LOG = hloggin.getLogger(__name__) - -# Enable extra verbose debugging. Do not commit. -_TRACE = False - -RowsValues = List[List[str]] - -# ############################################################################# -# Resampling & Time Series Operations -# ############################################################################# - - -def resample_index(index: pd.DatetimeIndex, frequency: str) -> pd.DatetimeIndex: - """ - Resample `DatetimeIndex`. - - :param index: `DatetimeIndex` to resample - :param frequency: frequency from `pd.date_range()` to resample to - :return: resampled `DatetimeIndex` - """ - # Import locally to avoid cyclic import. - import helpers.hpandas_dassert as hpandass - - _LOG.debug(hprint.to_str("index frequency")) - hdbg.dassert_isinstance(index, pd.DatetimeIndex) - hpandass.dassert_unique_index( - index, msg="Index must have only unique values" - ) - min_date = index.min() - max_date = index.max() - _LOG.debug("min_date=%s max_date=%s", min_date, max_date) - # TODO(gp): Preserve the index name. - # index_name = index.name - resampled_index = pd.date_range( - start=min_date, - end=max_date, - freq=frequency, - ) - # Enable detailed debugging. - if False: - if len(resampled_index) > len(index): - # Downsample. - _LOG.debug( - "Index length increased by %s = %s - %s", - len(resampled_index) - len(index), - len(resampled_index), - len(index), - ) - elif len(resampled_index) < len(index): - # Upsample. - _LOG.debug( - "Index length decreased by %s = %s - %s", - len(index) - len(resampled_index), - len(index), - len(resampled_index), - ) - else: - _LOG.debug("Index length=%s has not changed", len(index)) - # resampled_index.name = index_name - return resampled_index - - -def resample_df(df: pd.DataFrame, frequency: str) -> pd.DataFrame: - """ - Resample `DataFrame` by placing NaN in missing locations in the index. - - :param df: `DataFrame` to resample - :param frequency: frequency from `pd.date_range()` to resample to - :return: resampled `DataFrame` - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - # Preserve the index name. - index_name = df.index.name - resampled_index = resample_index(df.index, frequency) - df_reindex = df.reindex(resampled_index) - df_reindex.index.name = index_name - return df_reindex - - -def reindex_on_unix_epoch( - df: pd.DataFrame, in_col_name: str, unit: str = "s" -) -> pd.DataFrame: - """ - Transform the column `in_col_name` into a datetime index. `in_col_name` - contains Unix epoch (e.g., 1638194400) and it is converted into a UTC time. - - :param df: dataframe with a unix epoch - :param in_col_name: column containing unix epoch - :param unit: the unit of unix epoch - """ - # Convert. - temp_col_name = in_col_name + "_tmp" - hdbg.dassert_in(in_col_name, df.columns) - hdbg.dassert_not_in(temp_col_name, df.columns) - # Save. - df[temp_col_name] = pd.to_datetime(df[in_col_name], unit=unit, utc=True) - df.set_index(temp_col_name, inplace=True, drop=True) - df.index.name = None - return df - - -def find_gaps_in_dataframes( - df1: pd.DataFrame, df2: pd.DataFrame -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Find data present in one dataframe and missing in the other one. - - :param df1: first dataframe for comparison - :param df2: second dataframe for comparison - :return: two dataframes with missing data - """ - # Get data present in first, but not present in second dataframe. - first_missing_indices = df2.index.difference(df1.index) - first_missing_data = df2.loc[first_missing_indices] - # Get data present in second, but not present in first dataframe. - second_missing_indices = df1.index.difference(df2.index) - second_missing_data = df1.loc[second_missing_indices] - return first_missing_data, second_missing_data - - -# TODO(Grisha): use this idiom everywhere in the codebase, e.g., in `compare_dfs()`. - - -def find_gaps_in_time_series( - time_series: pd.Series, - start_timestamp: pd.Timestamp, - end_timestamp: pd.Timestamp, - freq: str, -) -> pd.Series: - """ - Find missing points on a time interval specified by [start_timestamp, - end_timestamp], where point distribution is determined by . - - If the passed time series is of a unix epoch format. It is - automatically tranformed to pd.Timestamp. - - :param time_series: time series to find gaps in - :param start_timestamp: start of the time interval to check - :param end_timestamp: end of the time interval to check - :param freq: distance between two data points on the interval. - Aliases correspond to pandas.date_range's freq parameter, i.e. - "S" -> second, "T" -> minute. - :return: pd.Series representing missing points in the source time - series. - """ - _time_series = time_series - if str(time_series.dtype) in ["int32", "int64"]: - _time_series = _time_series.map(hdateti.convert_unix_epoch_to_timestamp) - correct_time_series = pd.date_range( - start=start_timestamp, end=end_timestamp, freq=freq - ) - return correct_time_series.difference(_time_series) - - -# ############################################################################# -# DataFrame Transformation -# ############################################################################# - - -def apply_index_mode( - df1: pd.DataFrame, - df2: pd.DataFrame, - mode: str, -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Process DataFrames according to the index mode. - - :param df1: first input df - :param df2: second input df - :param mode: method of processing indices - - "assert_equal": check that both indices are equal, assert otherwise - - "intersect": restrict both dfs to a common index - - "leave_unchanged": ignore any indices mismatch and return dfs as-is - :return: transformed copy of the inputs - """ - # Import locally to avoid cyclic import - import helpers.hpandas_dassert as hpandass - - _LOG.debug("mode=%s", mode) - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert_isinstance(mode, str) - # Copy in order not to modify the inputs. - df1_copy = df1.copy() - df2_copy = df2.copy() - if mode == "assert_equal": - hpandass.dassert_indices_equal(df1_copy, df2_copy) - elif mode == "intersect": - # TODO(Grisha): Add sorting on demand. - common_index = df1_copy.index.intersection(df2_copy.index) - df1_copy = df1_copy[df1_copy.index.isin(common_index)] - df2_copy = df2_copy[df2_copy.index.isin(common_index)] - elif mode == "leave_unchanged": - _LOG.debug( - "Ignoring any index missmatch as per user's request.\n" - "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", - df1_copy.index.difference(df2_copy.index), - df2_copy.index.difference(df1_copy.index), - ) - else: - raise ValueError(f"Unsupported index_mode={mode}") - return df1_copy, df2_copy - - -def apply_columns_mode( - df1: pd.DataFrame, - df2: pd.DataFrame, - mode: str, -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Process DataFrames according to the column mode. - - :param df1: first input df - :param df2: second input df - :param mode: method of processing columns - - "assert_equal": check that both dfs have equal columns, assert otherwise - - "intersect": restrict both dfs to only include common columns - - "leave_unchanged": ignore any column mismatches and return dfs as-is - :return: transformed copy of the inputs - """ - # Import locally to avoid cyclic import - import helpers.hpandas_dassert as hpandass - import helpers.hpandas_utils as hpanutil - - _LOG.debug("mode=%s", mode) - # Input validation. - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert_isinstance(mode, str) - # Copy in order not to modify the inputs. - df1_copy = df1.copy() - df2_copy = df2.copy() - if mode == "assert_equal": - # Check if columns are equal or not. - hpandass.dassert_columns_equal(df1_copy, df2_copy) - elif mode == "intersect": - # Filter dataframes based on its common columns. - common_columns = df1_copy.columns.intersection(df2_copy.columns) - df1_copy = df1_copy[common_columns] - df2_copy = df2_copy[common_columns] - # Log the string representation of 2 dfs. - _LOG.debug("df1 after filtering=\n%s", hpanutil.df_to_str(df1)) - _LOG.debug("df2 after filtering=\n%s", hpanutil.df_to_str(df2)) - elif mode == "leave_unchanged": - # Ignore mismatch. - _LOG.debug( - "Ignoring any column missmatch as per user's request.\n" - "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", - df1.columns.difference(df2.columns), - df2.columns.difference(df1.columns), - ) - else: - raise ValueError(f"Unsupported column mode: {mode}") - return df1_copy, df2_copy - - -def trim_df( - df: pd.DataFrame, - ts_col_name: Optional[str], - start_ts: Optional[pd.Timestamp], - end_ts: Optional[pd.Timestamp], - left_close: bool, - right_close: bool, -) -> pd.DataFrame: - """ - Trim the dataframe using values in `ts_col_name`. - - The dataframe is trimmed in the interval bounded by `start_ts` and `end_ts`. - - :param df: the dataframe to trim - :param ts_col_name: the name of the column; `None` means index - :param start_ts: the start boundary for trimming - :param end_ts: the end boundary for trimming - :param left_close: whether to include the start boundary of the interval - - True: [start_ts, ... - - False: (start_ts, ... - :param right_close: whether to include the end boundary of the interval - - True: ..., end_ts] - - False: ..., end_ts) - :return: the trimmed dataframe - """ - if _TRACE: - # Import locally to avoid cyclic import - import helpers.hpandas_utils as hpanutil - - _LOG.trace( - hpanutil.df_to_str( - df, print_dtypes=True, print_shape_info=True, tag="df" - ) - ) - _LOG.debug( - hprint.to_str("ts_col_name start_ts end_ts left_close right_close") - ) - if _TRACE: - # Import locally to avoid cyclic import - import helpers.hpandas_utils as hpanutil - - _LOG.trace("df=\n%s", hpanutil.df_to_str(df)) - if df.empty: - # If the df is empty, there is nothing to trim. - return df - if start_ts is None and end_ts is None: - # If no boundaries are specified, there are no points of reference to trim - # to. - return df - num_rows_before = df.shape[0] - if start_ts is not None and end_ts is not None: - # Confirm that the interval boundaries are valid. - hdateti.dassert_tz_compatible(start_ts, end_ts) - hdbg.dassert_lte(start_ts, end_ts) - # Get the values to filter by. - if ts_col_name is None: - values_to_filter_by = pd.Series(df.index, index=df.index) - else: - hdbg.dassert_in(ts_col_name, df.columns) - values_to_filter_by = df[ts_col_name] - if values_to_filter_by.is_monotonic_increasing: - _LOG.trace("df is monotonic") - # The values are sorted; using the `pd.Series.searchsorted()` method. - # Find the index corresponding to the left boundary of the interval. - if start_ts is not None: - side = "left" if left_close else "right" - left_idx = values_to_filter_by.searchsorted(start_ts, side) - else: - # There is nothing to filter, so the left index is the first one. - left_idx = 0 - _LOG.debug(hprint.to_str("start_ts left_idx")) - # Find the index corresponding to the right boundary of the interval. - if end_ts is not None: - side = "right" if right_close else "left" - right_idx = values_to_filter_by.searchsorted(end_ts, side) - else: - # There is nothing to filter, so the right index is None. - right_idx = df.shape[0] - _LOG.debug(hprint.to_str("end_ts right_idx")) - # - hdbg.dassert_lte(0, left_idx) - hdbg.dassert_lte(left_idx, right_idx) - hdbg.dassert_lte(right_idx, df.shape[0]) - _LOG.debug(hprint.to_str("start_ts left_idx")) - if right_idx < df.shape[0]: - _LOG.debug(hprint.to_str("end_ts right_idx")) - df = df.iloc[left_idx:right_idx] - else: - _LOG.trace("df is not monotonic") - # The values are not sorted; using the `pd.Series.between` method. - if left_close and right_close: - inclusive = "both" - elif left_close: - inclusive = "left" - elif right_close: - inclusive = "right" - else: - inclusive = "neither" - epsilon = pd.DateOffset(minutes=1) - if start_ts is None: - start_ts = values_to_filter_by.min() - epsilon - if end_ts is None: - end_ts = values_to_filter_by.max() + epsilon - df = df[ - values_to_filter_by.between(start_ts, end_ts, inclusive=inclusive) - ] - # Report the changes. - num_rows_after = df.shape[0] - if num_rows_before != num_rows_after: - _LOG.debug( - "Removed %s rows", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - return df - - -def _assemble_df_rows(rows_values: RowsValues) -> RowsValues: - """ - Organize dataframe values into a column-row structure. - - - Indentation artifacts are removed - - The index placement is handled, i.e. - - if the index is named, the name is located and moved to the same - row as the column names - - if the index is not named, the row with the column names receives - a placeholder empty value in its place - - Empty columns are dropped - - :param rows_values: row values extracted from a string df representation - :return: row values assembled into a valid column-row structure - """ - # Clean up indentation artifacts. - if all(row[0] == "" for row in rows_values): - # Remove the first empty cell in each row. - for row in rows_values: - del row[0] - # If the index is named, its name is located in the second row, - # with an optional extra empty value cell value next to it. - if len(rows_values[1]) == 1 or ( - len(rows_values[1]) == 2 and rows_values[1][1] == "" - ): - # Move the index name to the row with all the column names. - if rows_values[0][0] == "": - rows_values[0][0] = rows_values[1][0] - else: - rows_values[0].insert(0, rows_values[1][0]) - # Drop the former index name row. - del rows_values[1] - else: - # Add an empty cell for the absent index name. - rows_values[0].insert(0, "") - # Identify and remove empty columns. - min_len_row = min(len(row) for row in rows_values) - idxs_to_delete = [] - for i in range(min_len_row): - if all(row[i] == "" for row in rows_values): - idxs_to_delete.append(i) - for idx in idxs_to_delete: - for row in rows_values: - del row[idx] - # Confirm that all the rows have the same number of values. - hdbg.dassert_eq(len({len(row) for row in rows_values}), 1) - return rows_values - - -# TODO(Nina): Add `filter_data_mode`. - - -def str_to_df( - df_as_str: str, - col_to_type: Dict[str, Optional[type]], - col_to_name_type: Dict[str, type], -) -> pd.DataFrame: - """ - Convert a string representation of a dataframe into a Pandas df. - - :param df_as_str: a df as a string - - the format of the string is the same as the output of - `hpandas_utils.df_to_str()` on a pd.DataFrame, e.g. - ``` - col1 col2 col3 col4 - 0 0.1 a None 2020-01-01 - 1 0.2 "b c" None 2021-05-05 - ``` - - values (including column names) that contain spaces need - to be enclosed in double quotation marks, e.g. - "2023-03-15 16:35:41.205000+00:00" - :param col_to_type: a mapping between the column names and the - types of the values in these columns - - if a column is not present in the mapping, its values will - remain strings - - to indicate the type of index values, use {"__index__": ...} - mapping, e.g. {"__index__": pd.Timestamp} - :param col_to_name_type: a mapping between the column names and - the required types of these column names - - same conventions apply as for `col_to_type` (see above) - :return: a converted Pandas dataframe - """ - # Separate the rows. - rows = df_as_str.split("\n") - # Clean up extra spaces. - rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] - # Identify individual values in the rows. - rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) - # Remove the placeholder ["..."] row. - rows_values = [row for row in rows_values if row != ["..."]] - # Organize values into a proper column-row structure. - rows_values = _assemble_df_rows(rows_values) - # Get the column names. - column_names = rows_values[0][1:] - # Get the index. - index_values = [row[0] for row in rows_values[1:]] - index_name = rows_values[0][0] - # Construct the df. - df = pd.DataFrame( - [row[1:] for row in rows_values[1:]], - columns=column_names, - index=index_values, - ) - if index_name != "": - df.index.name = index_name - # Cast the columns into appropriate types. - # Import locally to avoid cyclic import - import helpers.hpandas_conversion as hpanconv - - for col, col_type in col_to_type.items(): - if col == "__index__": - df.index = hpanconv.cast_series_to_type(df.index, col_type) - else: - df[col] = hpanconv.cast_series_to_type(df[col], col_type) - # Cast the column names into appropriate types. - for col, col_name_type in col_to_name_type.items(): - if col == "__index__": - df.index = df.index.rename(col_name_type(df.index.name)) - else: - df = df.rename(columns={col: col_name_type(col)}) - return df - - -# ############################################################################# -# Column Operations -# ############################################################################# - - -def check_and_filter_matching_columns( - df: pd.DataFrame, required_columns: List[str], filter_data_mode: str -) -> pd.DataFrame: - """ - Check that columns are the required ones and if not filter data depending - on `filter_data_mode`. - - :param df: data to check columns for - :param required_columns: columns to return, skipping columns that are not required - :param filter_data_mode: control behaviour with respect to extra or missing columns - - "assert": raise an error if required columns do not match received columns - - "warn_and_trim": return the intersection of required and received columns and - issue a warning - :return: input data as it is if required columns match received columns otherwise - processed data, see `filter_data_mode` - """ - received_columns = df.columns.to_list() - hdbg.dassert_lte(1, len(received_columns)) - # - if filter_data_mode == "assert": - # Raise an assertion. - only_warning = False - elif filter_data_mode == "warn_and_trim": - # Just issue a warning. - only_warning = True - # Get columns intersection while preserving the order of the columns. - columns_intersection = [ - col_name - for col_name in required_columns - if col_name in received_columns - ] - hdbg.dassert_lte(1, len(columns_intersection)) - df = df[columns_intersection] - else: - raise ValueError(f"Invalid filter_data_mode='{filter_data_mode}'") - hdbg.dassert_set_eq( - required_columns, - received_columns, - only_warning=only_warning, - msg="Received columns do not match required columns.", - ) - return df - - -# TODO(Grisha): finish the function. -# TODO(Grisha): merge with the one in `dataflow.model.correlation.py`? - - -# ############################################################################# -# Merge -# ############################################################################# - - -def merge_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - threshold_col_name: str, - *, - threshold: float = 0.9, - intersecting_columns: Optional[List[str]] = None, - **pd_merge_kwargs: Any, -) -> pd.DataFrame: - """ - Wrap `pd.merge`. - - :param threshold_col_name: a column's name to check the minimum - overlap on - :param threshold: minimum overlap of unique values in a specified - column to perform the merge - :param intersecting_columns: allow certain columns to appear in both - dataframes; store both in the resulting df with corresponding - suffixes - """ - _LOG.debug( - hprint.to_str( - "threshold_col_name threshold intersecting_columns pd_merge_kwargs" - ) - ) - # Sanity check column types. - threshold_col1 = df1[threshold_col_name] - threshold_col2 = df2[threshold_col_name] - only_first_elem = False - hdbg.dassert_array_has_same_type_element( - threshold_col1, threshold_col2, only_first_elem - ) - # TODO(Grisha): @Dan Implement asserts for each asset id. - # Check that an overlap of unique values is above the specified threshold. - threshold_unique_values1 = set(threshold_col1) - threshold_unique_values2 = set(threshold_col2) - threshold_common_values = set(threshold_unique_values1) & set( - threshold_unique_values2 - ) - threshold_common_values_share1 = len(threshold_common_values) / len( - threshold_unique_values1 - ) - threshold_common_values_share2 = len(threshold_common_values) / len( - threshold_unique_values2 - ) - hdbg.dassert_lte(threshold, threshold_common_values_share1) - hdbg.dassert_lte(threshold, threshold_common_values_share2) - # Use an empty set instead of None to perform set difference further. - intersecting_columns_set = ( - set() if intersecting_columns is None else set(intersecting_columns) - ) - # Check that there are no common columns except for the ones in `intersecting_columns`. - df1_cols = ( - set(df1.columns.to_list()) - - set(pd_merge_kwargs["on"]) - - intersecting_columns_set - ) - df2_cols = ( - set(df2.columns.to_list()) - - set(pd_merge_kwargs["on"]) - - intersecting_columns_set - ) - hdbg.dassert_not_intersection(df1_cols, df2_cols) - # - res_df = df1.merge(df2, **pd_merge_kwargs) - return res_df - - -# TODO(gp): Is this (ironically) a duplicate of drop_duplicates? - - -def get_df_from_iterator( - iter_: Iterator[pd.DataFrame], - *, - sort_index: bool = True, -) -> pd.DataFrame: - """ - Concat all the dataframes in the iterator in one dataframe. - - :param iter_: dataframe iterator - :param sort_index: whether to sort output index or not - :return: combined iterator data - """ - # TODO(gp): @all make a copy of `iter_` so we don't consume it. - dfs = list(iter_) - df_res = pd.concat(dfs) - if sort_index: - df_res = df_res.sort_index() - return df_res - - -# ############################################################################# -# Filter -# ############################################################################# - - -def subset_df(df: pd.DataFrame, nrows: int, seed: int = 42) -> pd.DataFrame: - """ - Remove N rows from the input data and shuffle the remaining ones. - - :param df: input data - :param nrows: the number of rows to remove from the original data - :param seed: see `random.seed()` - :return: shuffled data with removed rows - """ - hdbg.dassert_lte(1, nrows) - hdbg.dassert_lte(nrows, df.shape[0]) - idx = list(range(df.shape[0])) - random.seed(seed) - random.shuffle(idx) - idx = sorted(idx[nrows:]) - return df.iloc[idx] - - -def filter_df( - df: pd.DataFrame, - col_name: str, - value: Any, - *, - invert: bool = False, - check_value: bool = True, - # TODO(gp): -> verbose - print_info: bool = True, -) -> pd.DataFrame: - """ - Filter a dataframe based on a column value. - - :param df: dataframe to filter - :param col_name: column name to filter on - :param value: value to filter on - :param invert: whether to invert the filter - :param check_value: whether to check that the value is in the column - :param print_info: whether to print information about the filter - :return: filtered dataframe - """ - hdbg.dassert_in(col_name, df.columns) - if isinstance(value, list): - mask = df[col_name].isin(value) - else: - if check_value: - hdbg.dassert_in(value, df[col_name].unique()) - mask = df[col_name] == value - if invert: - mask = ~mask - if print_info: - _LOG.info("selected=%s", hprint.perc(mask.sum(), df.shape[0])) - return df[mask] - - -def remove_empty_columns( - df: pd.DataFrame, *, verbose: bool = True -) -> pd.DataFrame: - """ - Remove empty columns from a dataframe. - - :param df: dataframe to remove empty columns from - :return: dataframe with empty columns removed - """ - mask = df.apply(lambda col: col.notna() & (col != "")).any() - non_empty_columns = df.columns[mask] - empty_columns = df.columns[~mask] - if verbose: - _LOG.info( - "kept %s columns: %s", - hprint.perc(len(non_empty_columns), len(df.columns)), - hprint.list_to_str(non_empty_columns), - ) - _LOG.info( - "removed %s columns: %s", - hprint.perc(len(empty_columns), len(df.columns)), - hprint.list_to_str(empty_columns), - ) - df = df[non_empty_columns] - return df - - -def remove_stable_columns( - df: pd.DataFrame, *, threshold: float = 0.9, verbose: bool = True -) -> pd.DataFrame: - """ - Remove columns from a dataframe that have less than threshold unique - values. - - :param df: dataframe to remove stable columns from - :param threshold: threshold for the percentage of stable columns to - remove - :return: dataframe with stable columns removed - """ - high_variability_columns = [] - for col in df.columns: - unique_values = df[col].unique() - if len(unique_values) / len(df) >= threshold: - high_variability_columns.append(col) - # Compute the columns to remove. - columns_to_remove = df.columns[~df.columns.isin(high_variability_columns)] - if verbose: - _LOG.info( - "kept %s columns: %s", - hprint.perc(len(high_variability_columns), len(df.columns)), - hprint.list_to_str(high_variability_columns), - ) - _LOG.info( - "removed %s columns: %s", - hprint.perc(len(columns_to_remove), len(df.columns)), - hprint.list_to_str(columns_to_remove), - ) - df = df[high_variability_columns] - return df - - -def adapt_to_series(f: Callable) -> Callable: - """ - Extend a function working on dataframes so that it can work on series. - """ - - def wrapper( - obj: Union[pd.Series, pd.DataFrame], *args: Any, **kwargs: Any - ) -> Any: - # Convert a pd.Series to a pd.DataFrame. - was_series = False - if isinstance(obj, pd.Series): - obj = pd.DataFrame(obj) - was_series = True - hdbg.dassert_isinstance(obj, pd.DataFrame) - # Apply the function. - res = f(obj, *args, **kwargs) - # Transform the output, if needed. - if was_series: - if isinstance(res, tuple): - res_obj, res_tmp = res[0], res[1:] - res_obj_srs = hpanconv.to_series(res_obj) - res_obj_srs = [res_obj_srs] - res_obj_srs.extend(res_tmp) - res = tuple(res_obj_srs) - else: - res = hpanconv.to_series(res) - return res - - return wrapper - - -# ############################################################################# - - -def add_pct( - df: pd.DataFrame, - col_name: str, - total: int, - dst_col_name: str, - num_digits: int = 2, - use_thousands_separator: bool = True, -) -> pd.DataFrame: - """ - Add to df a column "dst_col_name" storing the percentage of values in - column "col_name" with respect to "total". The rest of the parameters are - the same as hprint.round_digits(). - - :return: updated df - """ - # Add column with percentage right after col_name. - pos_col_name = df.columns.tolist().index(col_name) - df.insert(pos_col_name + 1, dst_col_name, (100.0 * df[col_name]) / total) - # Format. - df[col_name] = [ - hprint.round_digits( - v, num_digits=None, use_thousands_separator=use_thousands_separator - ) - for v in df[col_name] - ] - df[dst_col_name] = [ - hprint.round_digits( - v, num_digits=num_digits, use_thousands_separator=False - ) - for v in df[dst_col_name] - ] - return df - - -# ############################################################################# - - -def remove_columns( - df: pd.DataFrame, cols: Collection[str], log_level: int = logging.DEBUG -) -> pd.DataFrame: - """ - Remove specified columns from a dataframe. - - :param df: dataframe to remove columns from - :param cols: collection of column names to remove - :param log_level: logging level for reporting removed columns - :return: dataframe with specified columns removed - """ - to_remove = set(cols).intersection(set(df.columns)) - _LOG.log(log_level, "to_remove=%s", hprint.list_to_str(to_remove)) - df.drop(to_remove, axis=1, inplace=True) - _LOG.debug("df=\n%s", df.head(3)) - _LOG.log(log_level, hprint.list_to_str(df.columns)) - return df - - -def filter_with_df( - df: pd.DataFrame, filter_df: pd.DataFrame, log_level: int = logging.DEBUG -) -> pd.Series: - """ - Compute a mask for DataFrame df using common columns and values in - "filter_df". - """ - mask = None - for c in filter_df: - hdbg.dassert_in(c, df.columns) - vals = filter_df[c].unique() - if mask is None: - mask = df[c].isin(vals) - else: - mask &= df[c].isin(vals) - mask: pd.DataFrame - _LOG.log(log_level, "after filter=%s", hprint.perc(mask.sum(), len(mask))) - return mask - - -def filter_by_time( - df: pd.DataFrame, - lower_bound: hdateti.StrictDatetime, - upper_bound: hdateti.StrictDatetime, - inclusive: str, - ts_col_name: Optional[str], - log_level: int = logging.DEBUG, -) -> pd.DataFrame: - """ - Filter data by time between `lower_bound` and `upper_bound`. - - Pass `None` to `ts_col_name` to filter by `DatetimeIndex`. - - :param df: data to filter - :param lower_bound: left limit point of the time interval - :param upper_bound: right limit point of the time interval - :param inclusive: include boundaries - - "both": `[lower_bound, upper_bound]` - - "neither": `(lower_bound, upper_bound)` - - "right": `(lower_bound, upper_bound]` - - "left": `[lower_bound, upper_bound)` - :param ts_col_name: name of a timestamp column to filter with, or None to - use the DatetimeIndex - :param log_level: the level of logging, e.g. `DEBUG` - :return: dataframe filtered by time - """ - hdateti.dassert_is_strict_datetime(lower_bound) - hdateti.dassert_is_strict_datetime(upper_bound) - # Time filtering is not working if timezones are different. - hdateti.dassert_tz_compatible_timestamp_with_df(lower_bound, df, ts_col_name) - hdateti.dassert_tz_compatible_timestamp_with_df(upper_bound, df, ts_col_name) - # - if ts_col_name is None: - # Filter data by index. - hdbg.dassert_isinstance(df.index, pd.DatetimeIndex) - # Cast index to `pd.Series` to use the `between` method. - mask = df.index.to_series().between(lower_bound, upper_bound, inclusive) - else: - # Filter data by a specified column. - hdbg.dassert_in(ts_col_name, df.columns) - mask = df[ts_col_name].between(lower_bound, upper_bound, inclusive) - # - _LOG.log( - log_level, - "Filtering between %s and %s with inclusive=`%s`, selected rows=%s", - lower_bound, - upper_bound, - inclusive, - hprint.perc(mask.sum(), df.shape[0]), - ) - return df[mask] - - -def filter_by_val( - df: pd.DataFrame, - col_name: str, - min_val: float, - max_val: float, - use_thousands_separator: bool = True, - log_level: int = logging.DEBUG, -) -> pd.DataFrame: - """ - Filter out rows of df where df[col_name] is not in [min_val, max_val]. - """ - # TODO(gp): If column is ordered, this can be done more efficiently with - # binary search. - num_rows = df.shape[0] - if min_val is not None and max_val is not None: - hdbg.dassert_lte(min_val, max_val) - mask = None - if min_val is not None: - mask = min_val <= df[col_name] - if max_val is not None: - mask2 = df[col_name] <= max_val - if mask is None: - mask = mask2 - else: - mask &= mask2 - res = df[mask] - hdbg.dassert_lt(0, res.shape[0]) - _LOG.log( - log_level, - "Rows kept %s, removed %s rows", - hprint.perc( - res.shape[0], - num_rows, - use_thousands_separator=use_thousands_separator, - ), - hprint.perc( - num_rows - res.shape[0], - num_rows, - use_thousands_separator=use_thousands_separator, - ), - ) - return res - - -# ############################################################################# -# PCA -# ############################################################################# - - -def sample_rolling_df( - rolling_df: pd.DataFrame, periods: int -) -> Tuple[pd.DataFrame, pd.DatetimeIndex]: - """ - Given a rolling metric stored as multiindex (e.g., correlation computed by - pd.ewm) sample `periods` equispaced samples. - - :return: sampled df, array of timestamps selected - """ - timestamps = rolling_df.index.get_level_values(0) - ts = timestamps[:: math.ceil(len(timestamps) / periods)] - _LOG.debug("timestamps=%s", str(ts)) - # rolling_df_out = rolling_df.unstack().reindex(ts).stack(dropna=False) - rolling_df_out = rolling_df.loc[ts] - return rolling_df_out, ts diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py deleted file mode 100644 index aaacb290a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py +++ /dev/null @@ -1,649 +0,0 @@ -""" -Import as: - -import helpers.hpandas_utils as hpanutil -""" - -import logging -from typing import Any, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd -import tqdm.autonotebook as tauton - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = hloggin.getLogger(__name__) - -# Import add_pct for use in this module. - - -# TODO(gp): -> AxisNameSet -ColumnSet = Optional[Union[str, List[str]]] - - -# ############################################################################# - - -# TODO(gp): Maybe we can have a `_LOG_df_to_str(log_level, *args, **kwargs)` that -# calls `_LOG.log(log_level, hpandas.df_to_str(*args, **kwargs, log_level=log_level))`. -# TODO(gp): We should make sure this works properly in a notebook, although -# it's not easy to unit test. - - -def _display(log_level: int, df: pd.DataFrame) -> None: - """ - Display a dataframe in a notebook at the given log level. - - The behavior is similar to a command like `_LOG.log(log_level, ...)` but - for a notebook `display` command. - - :param log_level: log level at which to display a df. E.g., if `log_level = - logging.DEBUG`, then we display the df only if we are running with - `-v DEBUG`. If `log_level = logging.INFO` then we don't display it - :param df: dataframe to display - """ - from IPython.display import display - - if ( - hsystem.is_running_in_ipynb() - and log_level >= hdbg.get_logger_verbosity() - ): - display(df) - - -def _df_to_str( - df: pd.DataFrame, - num_rows: Optional[int], - max_columns: int, - max_colwidth: int, - max_rows: int, - precision: int, - display_width: int, - use_tabulate: bool, - log_level: int, -) -> str: - """ - Convert a DataFrame to a string representation. - - :param df: The DataFrame to convert to a string. - :param num_rows: The number of rows to display. - :param max_columns: The maximum number of columns to display. - :param max_colwidth: The maximum width of each column. - :param max_rows: The maximum number of rows to display. - :param precision: The precision of the numbers. - :param display_width: The width of the display. - :param use_tabulate: Whether to use the tabulate library to format - the DataFrame. - :param log_level: The log level to use. - :return: A string representation of the DataFrame. - """ - is_in_ipynb = hsystem.is_running_in_ipynb() - out = [] - # Set dataframe print options. - with pd.option_context( - "display.max_colwidth", - max_colwidth, - # "display.height", 1000, - "display.max_rows", - max_rows, - "display.precision", - precision, - "display.max_columns", - max_columns, - "display.width", - display_width, - ): - if use_tabulate: - import tabulate - - out.append(tabulate.tabulate(df, headers="keys", tablefmt="psql")) - # TODO(Grisha): Add an option to display all rows since if `num_rows` - # is `None`, only first and last 5 rows are displayed. Consider using - # `df.to_string()` instead of `str(df)`. - if num_rows is None or df.shape[0] <= num_rows: - # Print the entire data frame. - if not is_in_ipynb: - out.append(str(df)) - else: - # Display dataframe. - _display(log_level, df) - else: - nr = num_rows // 2 - if not is_in_ipynb: - # Print top and bottom of df. - out.append(str(df.head(nr))) - out.append("...") - tail_str = str(df.tail(nr)) - # Remove index and columns from tail_df. - skipped_rows = 1 - if df.index.name: - skipped_rows += 1 - tail_str = "\n".join(tail_str.split("\n")[skipped_rows:]) - out.append(tail_str) - else: - # TODO(gp): @all use this approach also above and update all the - # unit tests. - df = [ - df.head(nr), - pd.DataFrame( - [["..."] * df.shape[1]], index=[" "], columns=df.columns - ), - df.tail(nr), - ] - df = pd.concat(df) - # Display dataframe. - _display(log_level, df) - if not is_in_ipynb: - txt = "\n".join(out) - else: - txt = "" - return txt - - -def _report_srs_stats(srs: pd.Series) -> List[Any]: - """ - Report dtype, the first element, and its type of series. - - :param srs: The series to report the stats of. - :return: A list of the stats. - """ - row: List[Any] = [] - first_elem = srs.values[0] - num_unique = srs.nunique() - num_nans = srs.isna().sum() - row.extend( - [ - srs.dtype, - hprint.perc(num_unique, len(srs)), - hprint.perc(num_nans, len(srs)), - first_elem, - type(first_elem), - ] - ) - return row - - -def df_to_str( - df: Union[pd.DataFrame, pd.Series, pd.Index], - *, - # TODO(gp): Remove this hack in the integration. - # handle_signed_zeros: bool = False, - handle_signed_zeros: bool = True, - num_rows: Optional[int] = 6, - print_dtypes: bool = False, - print_shape_info: bool = False, - print_nan_info: bool = False, - print_memory_usage: bool = False, - memory_usage_mode: str = "human_readable", - tag: Optional[str] = None, - max_columns: int = 10000, - max_colwidth: int = 2000, - max_rows: int = 500, - precision: int = 6, - display_width: int = 10000, - use_tabulate: bool = False, - log_level: int = logging.DEBUG, -) -> str: - """ - Print a dataframe to string reporting all the columns without trimming. - - Note that code like: `_LOG.info(hpandas.df_to_str(df, num_rows=3))` works - properly when called from outside a notebook, i.e., the dataframe is printed - But it won't display the dataframe in a notebook, since the default level at - which the dataframe is displayed is `logging.DEBUG`. - - In this case to get the correct behavior one should do: - ``` - log_level = ... - _LOG.log(log_level, hpandas.df_to_str(df, num_rows=3, log_level=log_level)) - ``` - - :param: handle_signed_zeros: convert `-0.0` to `0.0` - :param: num_rows: max number of rows to print (half from the top and half from - the bottom of the dataframe) - - `None` to print the entire dataframe - :param print_dtypes: report dataframe types and information about the type of - each column by looking at the first value - :param print_shape_info: report dataframe shape, index and columns - :param print_memory_usage: report memory use for each - """ - if df is None: - return "" - if isinstance(df, pd.Series): - df = pd.DataFrame(df) - elif isinstance(df, pd.Index): - df = df.to_frame(index=False) - hdbg.dassert_isinstance(df, pd.DataFrame) - # Convert "negative zeros" to `0.0`. - df = df.copy() - if handle_signed_zeros: - for col_name in df.select_dtypes(include=[np.float64, float]).columns: - df[col_name] = df[col_name].where(df[col_name] != -0.0, 0.0) - out = [] - # Print the tag. - if tag is not None: - out.append(f"# {tag}=") - if not df.empty: - # Print information about the shape and index. - # TODO(Nikola): Revisit and rename print_shape_info to print_axes_info - if print_shape_info: - # TODO(gp): Unfortunately we can't improve this part of the output - # since there are many golden inside the code that would need to be - # updated. Consider automating updating the expected values in the code. - txt = f"index=[{df.index.min()}, {df.index.max()}]" - out.append(txt) - txt = f"columns={','.join(map(str, df.columns))}" - out.append(txt) - txt = f"shape={str(df.shape)}" - out.append(txt) - # Print information about the types. - if print_dtypes: - out.append("* type=") - table = [] - row = [] - col_name = "index" - row.append(col_name) - row.extend(_report_srs_stats(df.index)) - row = map(str, row) - table.append(row) - for col_name in df.columns: - row_: List[Any] = [] - row_.append(col_name) - row_.extend(_report_srs_stats(df[col_name])) - row_ = map(str, row_) - table.append(row_) - # - columns = [ - "col_name", - "dtype", - "num_unique", - "num_nans", - "first_elem", - "type(first_elem)", - ] - df_stats = pd.DataFrame(table, columns=columns) - stats_num_rows = None - df_stats_as_str = _df_to_str( - df_stats, - stats_num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - out.append(df_stats_as_str) - # Print info about memory usage. - if print_memory_usage: - out.append("* memory=") - mem_use_df = pd.concat( - [df.memory_usage(deep=False), df.memory_usage(deep=True)], - axis=1, - keys=["shallow", "deep"], - ) - # Add total row. - mem_use_df_total = pd.DataFrame({"total": mem_use_df.sum(axis=0)}) - mem_use_df = pd.concat([mem_use_df, mem_use_df_total.T]) - # Convert into the desired format. - if memory_usage_mode == "bytes": - pass - elif memory_usage_mode == "human_readable": - import helpers.hintrospection as hintros - - mem_use_df = mem_use_df.applymap(hintros.format_size) - else: - raise ValueError( - f"Invalid memory_usage_mode='{memory_usage_mode}'" - ) - memory_num_rows = None - memory_usage_as_txt = _df_to_str( - mem_use_df, - memory_num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - out.append(memory_usage_as_txt) - # Print info about nans. - if print_nan_info: - num_elems = df.shape[0] * df.shape[1] - num_nans = df.isna().sum().sum() - txt = f"num_nans={hprint.perc(num_nans, num_elems)}" - out.append(txt) - # - num_zeros = df.isnull().sum().sum() - txt = f"num_zeros={hprint.perc(num_zeros, num_elems)}" - out.append(txt) - # TODO(gp): np can't do isinf on objects like strings. - # num_infinite = np.isinf(df).sum().sum() - # txt = "num_infinite=" + hprint.perc(num_infinite, num_elems) - # out.append(txt) - # - num_nan_rows = df.dropna().shape[0] - txt = f"num_nan_rows={hprint.perc(num_nan_rows, num_elems)}" - out.append(txt) - # - num_nan_cols = df.dropna(axis=1).shape[1] - txt = f"num_nan_cols={hprint.perc(num_nan_cols, num_elems)}" - out.append(txt) - if hsystem.is_running_in_ipynb(): - if len(out) > 0 and log_level >= hdbg.get_logger_verbosity(): - print("\n".join(out)) - txt = None - # Print the df. - df_as_str = _df_to_str( - df, - num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - if not hsystem.is_running_in_ipynb(): - out.append(df_as_str) - txt = "\n".join(out) - return txt - - -# ############################################################################# - - -def head( - df: pd.DataFrame, - *, - print_columns: bool = False, - num_rows: int = 2, - seed: Union[int, None] = None, -) -> str: - """ - Display a sample of rows from a DataFrame. - - By default shows the first `num_rows` rows. When a seed is provided, - randomly samples `num_rows` rows instead. - - :param df: The DataFrame to sample from. - :param num_rows: Number of rows to display. - :param seed: Optional random seed for reproducible sampling. If None, shows - first rows. - """ - txt = "" - if print_columns: - txt += "columns=%s\n" % ",".join(df.columns.tolist()) - txt += "shape=%s\n" % str(df.shape) - # - if seed is not None: - np.random.seed(seed) - index = np.random.choice(df.index, num_rows, replace=False) - index = sorted(index) - df = df.loc[index] - else: - df = df.head(num_rows) - with pd.option_context( - "display.width", - 200, - "display.max_columns", - None, - "display.max_colwidth", - None, - ): - txt += "\n" + str(df) - return txt - - -# ############################################################################# - - -def resolve_column_names( - column_set: ColumnSet, - columns: Union[List[str], pd.Index], - *, - keep_order: bool = False, -) -> List[str]: - """ - Change format of the columns and perform some sanity checks. - - :param column_set: columns to proceed - :param columns: all columns available - :param keep_order: preserve the original order or allow sorting - """ - # Ensure that `columns` is well-formed. - if isinstance(columns, pd.Index): - columns = columns.to_list() - hdbg.dassert_isinstance(columns, list) - hdbg.dassert_lte(1, len(columns)) - # - if column_set is None: - # Columns were not specified, thus use the list of all the columns. - column_set = columns - else: - if isinstance(column_set, str): - column_set = [column_set] - hdbg.dassert_isinstance(column_set, list) - hdbg.dassert_lte(1, len(column_set)) - hdbg.dassert_is_subset(column_set, columns) - if keep_order: - # Keep the selected columns in the same order as in the original - # `columns`. - column_set = [c for c in columns if c in column_set] - return column_set - - -def _get_unique_elements_in_column(df: pd.DataFrame, col_name: str) -> List[Any]: - """ - Get unique elements in a column, handling unhashable types. - - :param df: dataframe containing the column - :param col_name: name of the column to get unique elements from - :return: list of unique elements - """ - try: - vals = df[col_name].unique() - except TypeError: - # TypeError: unhashable type: 'list' - _LOG.error("Column '%s' has unhashable types", col_name) - vals = list(set(map(str, df[col_name]))) - cast(List[Any], vals) - return vals - - -def _get_variable_cols( - df: pd.DataFrame, threshold: int = 1 -) -> Tuple[List[str], List[str]]: - """ - Return columns of a df that contain less than unique values. - - :return: (variable columns, constant columns) - """ - var_cols = [] - const_cols = [] - for col_name in df.columns: - unique_elems = _get_unique_elements_in_column(df, col_name) - num_unique_elems = len(unique_elems) - if num_unique_elems <= threshold: - const_cols.append(col_name) - else: - var_cols.append(col_name) - return var_cols, const_cols - - -def remove_columns_with_low_variability( - df: pd.DataFrame, threshold: int = 1, log_level: int = logging.DEBUG -) -> pd.DataFrame: - """ - Remove columns of a df that contain less than unique values. - - :return: df with only columns with sufficient variability - """ - var_cols, const_cols = _get_variable_cols(df, threshold=threshold) - _LOG.log(log_level, "# Constant cols") - for col_name in const_cols: - unique_elems = _get_unique_elements_in_column(df, col_name) - _LOG.log( - log_level, - " %s: %s", - col_name, - hprint.list_to_str(list(map(str, unique_elems))), - ) - _LOG.log(log_level, "# Var cols") - _LOG.log(log_level, hprint.list_to_str(var_cols)) - return df[var_cols] - - -# Start copy-paste From helpers/hpandas_transform.py - - -def add_pct( - df: pd.DataFrame, - col_name: str, - total: int, - dst_col_name: str, - num_digits: int = 2, - use_thousands_separator: bool = True, -) -> pd.DataFrame: - """ - Add to df a column "dst_col_name" storing the percentage of values in - column "col_name" with respect to "total". The rest of the parameters are - the same as hprint.round_digits(). - - :return: updated df - """ - # Add column with percentage right after col_name. - pos_col_name = df.columns.tolist().index(col_name) - df.insert(pos_col_name + 1, dst_col_name, (100.0 * df[col_name]) / total) - # Format. - df[col_name] = [ - hprint.round_digits( - v, num_digits=None, use_thousands_separator=use_thousands_separator - ) - for v in df[col_name] - ] - df[dst_col_name] = [ - hprint.round_digits( - v, num_digits=num_digits, use_thousands_separator=False - ) - for v in df[dst_col_name] - ] - return df - - -# End copy-paste. - - -def print_column_variability( - df: pd.DataFrame, - max_num_vals: int = 3, - num_digits: int = 2, - use_thousands_separator: bool = True, -) -> pd.DataFrame: - """ - Print statistics about the values in each column of a data frame. - - This is useful to get a sense of which columns are interesting. - """ - print(("# df.columns=%s" % hprint.list_to_str(df.columns))) - res = [] - for c in tauton.tqdm(df.columns, desc="Computing column variability"): - vals = _get_unique_elements_in_column(df, c) - try: - min_val = min(vals) - except TypeError as e: - _LOG.debug("Column='%s' reported %s", c, e) - min_val = "nan" - try: - max_val = max(vals) - except TypeError as e: - _LOG.debug("Column='%s' reported %s", c, e) - max_val = "nan" - if len(vals) <= max_num_vals: - txt = ", ".join(map(str, vals)) - else: - txt = ", ".join(map(str, [min_val, "...", max_val])) - row = ["%20s" % c, len(vals), txt] - res.append(row) - res = pd.DataFrame(res, columns=["col_name", "num", "elems"]) - res.sort_values("num", inplace=True) - # TODO(gp): Fix this. - # res = add_count_as_idx(res) - res = add_pct( - res, - "num", - df.shape[0], - "[diff %]", - num_digits=num_digits, - use_thousands_separator=use_thousands_separator, - ) - res.reset_index(drop=True, inplace=True) - return res - - -def breakdown_table( - df: pd.DataFrame, - col_name: str, - num_digits: int = 2, - use_thousands_separator: bool = True, - verbosity: bool = False, -) -> pd.DataFrame: - """ - Create a breakdown table showing value counts and percentages for a column. - - :param df: dataframe to analyze - :param col_name: column name to create breakdown for - :param num_digits: number of decimal digits for percentages - :param use_thousands_separator: whether to use thousands separator - in counts - :param verbosity: whether to print additional details - :return: breakdown table with counts and percentages - """ - if isinstance(col_name, list): - for c in col_name: - print(("\n" + hprint.frame(c).rstrip("\n"))) - res = breakdown_table(df, c) - print(res) - return None - # - if verbosity: - print(("# col_name=%s" % col_name)) - first_col_name = df.columns[0] - res = df.groupby(col_name)[first_col_name].count() - res = pd.DataFrame(res) - res.columns = ["count"] - res.sort_values(["count"], ascending=False, inplace=True) - res = pd.concat( - [res, pd.DataFrame([df.shape[0]], index=["Total"], columns=["count"])] - ) - res["pct"] = (100.0 * res["count"]) / df.shape[0] - # Format. - res["count"] = [ - hprint.round_digits( - v, num_digits=None, use_thousands_separator=use_thousands_separator - ) - for v in res["count"] - ] - res["pct"] = [ - hprint.round_digits( - v, num_digits=num_digits, use_thousands_separator=False - ) - for v in res["pct"] - ] - if verbosity: - for k, df_tmp in df.groupby(col_name): - print((hprint.frame("%s=%s" % (col_name, k)))) - cols = [col_name, "description"] - with pd.option_context( - "display.max_colwidth", 100000, "display.width", 130 - ): - print((df_tmp[cols])) - return res diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py deleted file mode 100644 index 319c6cf44..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py +++ /dev/null @@ -1,1309 +0,0 @@ -""" -Import as: - -import helpers.hparquet as hparque -""" - -import collections -import datetime -import glob -import logging -import os -from typing import Any, Callable, Iterator, List, Optional, Tuple, Union - -import numpy as np -import pandas as pd -import pyarrow as pa -import pyarrow.dataset as ds -import pyarrow.fs as pafs -import pyarrow.parquet as pq - -# Check if S3FileSystem is available in `pyarrow.fs`. -if hasattr(pafs, "S3FileSystem"): - S3FileSystemAvailable = True - PyArrowS3FileSystem = pafs.S3FileSystem -else: - S3FileSystemAvailable = False - - # Define a dummy class for type hints when S3FileSystem is not available. - class PyArrowS3FileSystem: - def __init__(self, *args, **kwargs): - raise ImportError( - "S3FileSystem is not available in this version of pyarrow.fs" - ) - - -from tqdm.autonotebook import tqdm - -import helpers.hdataframe as hdatafr -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hpandas as hpandas -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# ParquetDataFrameGenerator -# ############################################################################# - - -class ParquetDataFrameGenerator: - # Allowed types. - OUTPUT_TYPES = ("basic", "verbose_open", "cm_task_1103") - - @staticmethod - def _wrap_all_assets_df(df: List[pd.DataFrame]) -> pd.DataFrame: - # Create a single dataframe for all the assets. - df = pd.concat(df) - _LOG.debug(hpandas.df_to_str(df, print_shape_info=True, tag="df")) - return df - - def _get_core_dataframes(self) -> List[pd.DataFrame]: - """ - Create core dataframes that are updated according to the output type. - - :return: list of core dataframes for specified assets with string values - Example: - - ``` - asset - 2000-01-01 A - 2000-01-02 A - 2000-01-03 A - ``` - """ - # Generate core dataframe for each asset. - df = [] - for asset in self._assets: - asset_df = pd.DataFrame( - {self._asset_col_name: asset}, - index=self._dataframe_index, - ) - _LOG.debug( - hpandas.df_to_str( - asset_df, print_shape_info=True, tag="asset_df" - ) - ) - df.append(asset_df) - return df - - def _get_daily_basic_dataframe(self) -> pd.DataFrame: - """ - Update core dataframes with additional columns. - - :return: updated core dataframe as presented below - Example: - - ``` - idx asset val1 val2 - 2000-01-01 0 A 00 00 - 2000-01-02 0 A 01 01 - 2000-01-03 0 A 02 02 - ``` - """ - asset_dataframes = self._get_core_dataframes() - for idx, asset_dataframe in enumerate(asset_dataframes): - # Positioned left from `asset` column. - asset_dataframe.insert(loc=0, column="idx", value=idx) - # Positioned right from `asset` column. - asset_dataframe.insert( - loc=2, - column="val1", - value=list(range(len(self._dataframe_index))), - ) - asset_dataframe.insert( - loc=3, - column="val2", - value=list(range(len(self._dataframe_index))), - ) - return self._wrap_all_assets_df(asset_dataframes) - - def _get_verbose_open_dataframe(self) -> pd.DataFrame: - """ - Update core dataframes with additional columns. - - :return: update core dataframe as presented below - Example: - - ``` - vendor_date interval start_time end_time ticker currency open id - 2021-11-24 60 1637762400 1637762460 A USD 100 1 - 2021-11-24 60 1637762400 1637762460 A USD 200 2 - ``` - """ - interval = self._dataframe_index[1] - self._dataframe_index[0] - interval = interval.seconds - asset_dataframes = self._get_core_dataframes() - for id_, asset_dataframe in enumerate(asset_dataframes): - start_time = ( - asset_dataframe.index - pd.Timestamp("1970-01-01") - ) // pd.Timedelta("1s") - end_time = start_time + interval - # Positioned left from `ticker` column. - asset_dataframe.insert( - loc=0, - column="vendor_date", - value=asset_dataframe.index.date.astype(str), - ) - asset_dataframe.insert(loc=1, column="interval", value=interval) - asset_dataframe.insert(loc=2, column="start_time", value=start_time) - asset_dataframe.insert(loc=3, column="end_time", value=end_time) - # Positioned right from `ticker` column. - asset_dataframe.insert(loc=5, column="currency", value="USD") - asset_dataframe.insert( - loc=6, - column="open", - value=list(range(len(self._dataframe_index))), - ) - asset_dataframe.insert(loc=7, column="id", value=id_) - return self._wrap_all_assets_df(asset_dataframes) - - # TODO(Dan): CmTask1490. - def _get_cm_task_1103_dataframe(self) -> pd.DataFrame: - """ - Update core dataframes with additional columns. - - :return: updated core dataframe as presented below - Example: - - ``` - full_symbol close - 2000-01-01 10689 100 - 2000-01-02 10689 200 - 2000-01-03 10689 300 - ``` - """ - asset_dataframes = self._get_core_dataframes() - for asset_dataframe in asset_dataframes: - # Positioned right from asset column. - asset_dataframe.insert( - loc=1, - column="close", - value=list(range(len(self._dataframe_index))), - ) - return self._wrap_all_assets_df(asset_dataframes) - - def __init__( - self, - start_date: str, - end_date: str, - output_type: str, - assets: List[Union[str, int]], - asset_col_name: str, - freq: str, - ) -> None: - """ - Constructor. - - :param start_date: start of date range including start_date - :param end_date: end of date range excluding end_date - :param output_type: type of data that is generated - :param assets: list of desired assets that can be names or ids - :param asset_col_name: name of the column that stores assets - :param freq: frequency of steps between start and end date - """ - self._start_date = start_date - self._end_date = end_date - self._output_type = output_type - self._assets = assets - self._asset_col_name = asset_col_name - self._freq = freq - self._dataframe_index = pd.date_range( - self._start_date, - self._end_date, - freq=self._freq, - inclusive="left", - tz="UTC", - ) - self._OUTPUT_TYPE_FUNCTION_MAP = { - "basic": self._get_daily_basic_dataframe, - "verbose_open": self._get_verbose_open_dataframe, - "cm_task_1103": self._get_cm_task_1103_dataframe, - } - - @property - def output_type_function(self) -> Callable: - """ - Return proper function for data generation depending on output type. - """ - return self._OUTPUT_TYPE_FUNCTION_MAP[self._output_type] - - def generate(self) -> pd.DataFrame: - """ - Generate specific dataframe based on inputs provided in instance - creation. - """ - if self._output_type not in self.OUTPUT_TYPES: - raise ValueError(f"Unsupported data type `{self._output_type}`!") - return self.output_type_function() - - -def add_date_partition_columns( - df: pd.DataFrame, partition_mode: str -) -> Tuple[pd.DataFrame, List[str]]: - """ - Add partition columns like year, month, day from datetime index. - - :param df: dataframe indexed by timestamp - :param partition_mode: - - "by_date": extract the date from the index - - E.g., an index like `2022-01-10 14:00:00+00:00` is transform to a - column `20220110` - - "by_year_month_day": split the index in year, month, day columns - - "by_year_month": split by year and month - - "by_year_week": split by year and week of the year - - "by_year": split by year - :return: - - df with additional partitioning columns - - list of partitioning columns - """ - with htimer.TimedScope(logging.DEBUG, "# add_date_partition_cols"): - if partition_mode == "by_date": - df["date"] = df.index.strftime("%Y%m%d") - partition_columns = ["date"] - else: - if partition_mode == "by_year_month_day": - partition_columns = ["year", "month", "day"] - elif partition_mode == "by_year_month": - partition_columns = ["year", "month"] - elif partition_mode == "by_year_week": - partition_columns = ["year", "weekofyear"] - elif partition_mode == "by_year": - partition_columns = ["year"] - elif partition_mode == "by_month": - partition_columns = ["month"] - else: - raise ValueError(f"Invalid partition_mode='{partition_mode}'") - # Add date columns chosen by partition mode. - for column_name in partition_columns: - # Extract data corresponding to `column_name` (e.g., - # `df.index.year`). - if column_name == "weekofyear": - # The `weekofyear` attribute has been deprecated in Pandas - # 2.1.0, so weeks are extracted using a function instead of - # the attribute name. - df["weekofyear"] = df.index.isocalendar().week - else: - df[column_name] = getattr(df.index, column_name) - return df, partition_columns - - -def to_partitioned_parquet( - df: pd.DataFrame, - partition_columns: List[str], - dst_dir: str, - *, - aws_profile: hs3.AwsProfile = None, - basename_template: str = None, -) -> None: - """ - Save the given dataframe as Parquet file partitioned along the given - columns. - - :param df: dataframe - :param partition_columns: partitioning columns - :param dst_dir: location of partitioned dataset - :param aws_profile: the name of an AWS profile or a s3fs filesystem - - E.g., in case of partition using `date`, the file layout looks like: - ``` - dst_dir/ - date=20211230/ - data.parquet - date=20211231/ - data.parquet - date=20220101/ - data.parquet - ``` - - In case of multiple columns like `asset`, `year`, `month`, the file layout - looks like: - ``` - dst_dir/ - asset=A/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ... - asset=B/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ``` - """ - # Use either S3 or local filesystem. - filesystem = None - if aws_profile is not None: - filesystem = hs3.get_s3fs(aws_profile) - # ParquetDataset appends an extra "/", creating an empty-named folder - # when saving on S3. - dst_dir = dst_dir.rstrip("/") - with htimer.TimedScope(logging.DEBUG, "# partition_dataset"): - # Read. - table = pa.Table.from_pandas(df) - # Write using partition. - # TODO(gp): add this logic to hparquet.to_parquet as a possible option. - _LOG.debug(hprint.to_str("partition_columns dst_dir")) - hdbg.dassert_is_subset(partition_columns, df.columns) - # TODO(gp): We would like to avoid overriding existing tiles. It's not clear - # how to do it. Either setting permissions to read-only before writing. - # Or having a list of files that will be written and ensure that none of - # those files already existing. - pq.write_to_dataset( - table, - dst_dir, - partition_cols=partition_columns, - filesystem=filesystem, - basename_template=basename_template, - ) - - -def generate_parquet_files( - start_date: str, - end_date: str, - assets: List[Union[str, int]], - asset_col_name: str, - dst_dir: str, - *, - freq: str = "1H", - output_type: str = "basic", - partition_mode: str = "by_date", - custom_partition_cols: Optional[str] = None, - reset_index: bool = False, -) -> None: - """ - Generate parquet files for testing. - - :param start_date: date from which the data is generated, value - included - :param end_date: date until which the data is generated, value - excluded - :param assets: list of assets that can be either names or ids - :param asset_col_name: name of the column that stores assets - :param dst_dir: destination dir for generated data - :param freq: frequency of data generation - :param output_type: type of data that is generated - :param partition_mode: Partition mode for parquet DataFrame, default - by date - :param custom_partition_cols: overrides default partition by time - :param reset_index: reset dataframe index to default sequential - integer values - """ - # Generate timespan. - hdbg.dassert_lt(start_date, end_date) - timespan = pd.date_range(start_date, end_date) - hdbg.dassert_lt(2, len(timespan)) - # Run dataframe generation. - pdg = ParquetDataFrameGenerator( - start_date, end_date, output_type, assets, asset_col_name, freq - ) - parquet_df = pdg.generate() - # Add partition columns to the dataframe. - df, partition_cols = add_date_partition_columns(parquet_df, partition_mode) - if custom_partition_cols: - # If custom partition is provided, it will override date partition. - # Sample: `["asset", "year", "month"]` - custom_partition_cols = custom_partition_cols.split(",") - # Ensure that date partition columns are present. - hdbg.dassert_is_subset(partition_cols, custom_partition_cols) - partition_cols = custom_partition_cols - # Partition and write dataset. - if reset_index: - df = df.reset_index(drop=True) - # TODO(Nikola): When direct run is possible, expose usage of `aws_profile` - # so generator can be used in conjunction with `helpers.hmoto.S3Mock_TestCase`. - # Will probably be part of CMTask #1490. - to_partitioned_parquet(df, partition_cols, dst_dir) - - -def get_pyarrow_s3fs(*args: Any, **kwargs: Any) -> PyArrowS3FileSystem: - """ - Return an Pyarrow S3Fs object from a given AWS profile. - - Same as `hs3.get_s3fs`, used specifically for accessing Parquet - datasets. - """ - # Check if S3FileSystem is available - hdbg.dassert( - S3FileSystemAvailable, - "S3FileSystem is not available in this version of pyarrow.fs", - ) - # When deploying jobs via ECS the container obtains credentials based on passed - # task role specified in the ECS task-definition, refer to: - # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html - if hserver.is_inside_ecs_container(): - _LOG.info("Fetching credentials from task IAM role") - s3fs_ = PyArrowS3FileSystem() - else: - aws_credentials = hs3.get_aws_credentials(*args, **kwargs) - s3fs_ = PyArrowS3FileSystem( - access_key=aws_credentials["aws_access_key_id"], - secret_key=aws_credentials["aws_secret_access_key"], - session_token=aws_credentials["aws_session_token"], - region=aws_credentials["aws_region"], - ) - return s3fs_ - - -def _get_parquet_tiles_from_file_path(file_path: str) -> List[Tuple[str, Any]]: - """ - Hacky function to help get tile values from parquet file path. - - Used by from_parquet when loading first n rows of a dataset only. - - Example - input: ...ccxt/binance/v1_0_0/currency_pair=CTK_USDT/ - year=2023/month=3/26dc59f62b87403d9a3e9f04c7c21382-0.parquet - output: [("currency_pair", "CTK_USDT"), ("year", 2023), ("month", 3)] - """ - path_parts = file_path.split("/") - tiles = [] - for part in path_parts: - if "=" in part: - col, value = part.split("=") - value = int(value) if value.isdigit() else value - tiles.append((col, value)) - return tiles - - -# TODO(Dan): Add mode to allow querying even when some non-existing columns are passed. -def from_parquet( - file_name: str, - *, - columns: Optional[List[str]] = None, - filters: Optional[List[Any]] = None, - n_rows: Optional[int] = None, - schema: Optional[List[Tuple[str, pa.DataType]]] = None, - log_level: int = logging.DEBUG, - report_stats: bool = False, - aws_profile: hs3.AwsProfile = None, -) -> pd.DataFrame: - """ - Load a dataframe from a Parquet file. - - The difference with `pd.read_pq` is that here we use Parquet - Dataset. - - :param file_name: path to a Parquet dataset - :param columns: columns to return, skipping reading columns that are not requested - - `None` means return all available columns - :param filters: Parquet query - :param n_rows: the number of rows to load, load all data if `None` - :param schema: see `pyarrow.Schema`, e.g., `schema = - [("int_col", pa.int32()), ("str_col", pa.string())]` - :param log_level: logging level to execute at - :param report_stats: whether to report Parquet file size or not - :param aws_profile: AWS profile to use if and only if using an S3 path, - otherwise `None` for local path - :return: data from Parquet dataset - """ - _LOG.debug(hprint.to_str("file_name columns filters schema")) - hdbg.dassert_isinstance(file_name, str) - hs3.dassert_is_valid_aws_profile(file_name, aws_profile) - if hs3.is_s3_path(file_name): - if isinstance(aws_profile, str): - filesystem = get_pyarrow_s3fs(aws_profile) - else: - # Note: `s3fs` filesystem is only to be used on exact file path - # as `pq.ParquetDataset` is not properly handling directory path. - filesystem = aws_profile - # Pyarrow S3FileSystem does not have `exists` method. - s3_filesystem = hs3.get_s3fs(aws_profile) - hs3.dassert_path_exists(file_name, s3_filesystem) - file_name = file_name.lstrip("s3://") - else: - filesystem = None - hdbg.dassert_path_exists(file_name) - # Load data. - with htimer.TimedScope( - logging.DEBUG, f"# Reading Parquet file '{file_name}'" - ) as ts: - if n_rows: - # Get the latest parquet file in the directory. - hdbg.dassert_isinstance( - aws_profile, - str, - "aws_profile must be a string for S3 operations", - ) - last_pq_file = hs3.get_latest_pq_in_s3_dir(file_name, aws_profile) - file = s3_filesystem.open(last_pq_file, "rb") - # Load the data. - parquet_file = pq.ParquetFile(file) - # Get the head of the data. - df = ( - parquet_file.read_row_group(0, columns=parquet_file.schema.names) - .to_pandas() - .head(n_rows) - ) - if columns: - # Note: `schema.names` also includes and index. - hdbg.dassert_is_subset(columns, parquet_file.schema.names) - df = df[columns] - # Hacky way to append tile values lost when obtaining particular .pq file. - tiles = _get_parquet_tiles_from_file_path(last_pq_file) - for col, value in tiles: - df[col] = value - else: - if schema is not None: - # Pass partition columns types explicitly. - schema = pa.schema(schema) - partitioning = ds.partitioning(schema, flavor="hive") - dataset = pq.ParquetDataset( - # Replace URI with path. - file_name, - filesystem=filesystem, - filters=filters, - partitioning=partitioning, - ) - if columns: - # Note: `schema.names` also includes and index. - hdbg.dassert_is_subset(columns, dataset.schema.names) - # To read also the index we need to use `read_pandas()`, instead of - # `read_table()`. - # See https://arrow.apache.org/docs/python/parquet.html#reading-and-writing-single-files. - table = dataset.read_pandas(columns=columns) - # Convert the Pandas Dataframe timestamp columns and index to `ns` - # resolution. The general approach is to preserve the time unit - # information after reading data back from Parquet files. - # Currently, it's challenging to resolve this issue since Parquet - # data is mixed with data from CSV files, which convert the time - # unit to `ns` by default. Refer to CmampTask7331 for details. - # https://github.com/cryptokaizen/cmamp/issues/7331 - df = table.to_pandas(coerce_temporal_nanoseconds=True) - if isinstance(df.index, pd.DatetimeIndex): - df.index = df.index.as_unit("ns") - # Report stats about the df. - _LOG.debug("df.shape=%s", str(df.shape)) - mem = df.memory_usage().sum() - _LOG.debug("df.memory_usage=%s", hintros.format_size(mem)) - # Report stats about the Parquet file size. - if report_stats: - file_size = hs3.du(file_name, human_format=True, aws_profile=aws_profile) - _LOG.log( - log_level, - "Loaded '%s' (size=%s, time=%.1fs)", - file_name, - file_size, - ts.elapsed_time, - ) - return df - - -# Copied from `hio.create_enclosing_dir()` to avoid circular dependencies. -def _create_enclosing_dir(file_name: str) -> Optional[str]: - dir_name = os.path.dirname(file_name) - if dir_name != "": - _LOG.debug( - "Creating dir_name='%s' for file_name='%s'", dir_name, file_name - ) - hdbg.dassert_is_not(dir_name, None) - dir_name = os.path.normpath(dir_name) - if os.path.normpath(dir_name) == ".": - _LOG.debug("Can't create dir '%s'", dir_name) - if os.path.exists(dir_name): - # The dir exists and we want to keep it, so we are done. - _LOG.debug("The dir '%s' exists: exiting", dir_name) - return None - _LOG.debug("Creating directory '%s'", dir_name) - try: - os.makedirs(dir_name) - except OSError as e: - _LOG.error(str(e)) - # It can happen that we try to create the directory while somebody else - # created it, so we neutralize the corresponding exception. - if e.errno == 17: - # OSError: [Errno 17] File exists. - pass - else: - raise e - hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) - return dir_name - - -def to_parquet( - df: pd.DataFrame, - file_name: str, - *, - log_level: int = logging.DEBUG, - report_stats: bool = False, - aws_profile: hs3.AwsProfile = None, -) -> None: - """ - Save a dataframe as Parquet. - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(file_name, str) - hs3.dassert_is_valid_aws_profile(file_name, aws_profile) - if hs3.is_s3_path(file_name): - filesystem = hs3.get_s3fs(aws_profile) - hs3.dassert_path_not_exists(file_name, filesystem) - file_name = file_name.lstrip("s3://") - else: - filesystem = None - hdbg.dassert_path_not_exists(file_name) - hdbg.dassert_file_extension(file_name, ["parquet", "pq"]) - # There is no concept of directory on S3. - # Only applicable to local filesystem. - if aws_profile is None: - _create_enclosing_dir(file_name) - # Report stats about the df. - _LOG.debug("df.shape=%s", str(df.shape)) - mem = df.memory_usage().sum() - _LOG.debug("df.memory_usage=%s", hintros.format_size(mem)) - # Save data. - with htimer.TimedScope( - logging.DEBUG, f"# Writing Parquet file '{file_name}'" - ) as ts: - table = pa.Table.from_pandas(df) - # This is needed to handle: - # ``` - # pyarrow.lib.ArrowInvalid: Casting from timestamp[ns, tz=America/New_York] - # to timestamp[us] would lose data: 1663595160000000030 - # ``` - # No need to cast to `us` since pyarrow >= 15.0.0. - # See - # https://github.com/cryptokaizen/cmamp/blob/master/docs/infra/all.parquet.explanation.md#time-unit-conversion-when-writing-to-parquet - # for details. - # parquet_args = { - # "coerce_timestamps": "us", - # "allow_truncated_timestamps": True, - # } - # pq.write_table(table, file_name, filesystem=filesystem, **parquet_args) - pq.write_table(table, file_name, filesystem=filesystem) - # Report stats about the Parquet file size. - if report_stats: - file_size = hs3.du(file_name, human_format=True, aws_profile=aws_profile) - _LOG.log( - log_level, - "Saved '%s' (size=%s, time=%.1fs)", - file_name, - file_size, - ts.elapsed_time, - ) - - -# ############################################################################# - - -def _yield_parquet_tile( - file_name: str, - columns: Optional[List[str]], - filters: List[Any], - asset_id_col: str, -) -> Iterator[pd.DataFrame]: - """ - Yield Parquet data in a single tile given the filters. - - It is assumed that data is partitioned by asset_id, year and month, i.e. - the file layout is: - - ``` - file_name/ - asset_id=1032127330/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ... - asset_id=2133227690/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ``` - - :param file_name: see `from_parquet()` - :param columns: see `from_parquet()` - :param filters: see `from_parquet()` - :param asset_id_col: name of the column with asset ids - :return: a generator of `from_parquet()` dataframe - """ - # Without the schema being provided `pyarrow` incorrectly infers - # type of the asset id column, i.e. `pyarrow` reads assets as - # strings instead of integers. See the related discussion at - # `https://issues.apache.org/jira/browse/ARROW-6114`. - int_type = np.int64 - pyarrow_int_type = pa.from_numpy_dtype(int_type) - schema = [ - (asset_id_col, pyarrow_int_type), - # TODO(Grisha): consider passing year and month column names as params. - ("year", pyarrow_int_type), - ("month", pyarrow_int_type), - ] - tile = from_parquet( - file_name, - columns=columns, - filters=filters, - schema=schema, - ) - hpandas.dassert_series_type_is(tile[asset_id_col], int_type) - yield tile - - -def build_asset_id_filter( - asset_ids: List[int], - asset_id_col: str, -) -> List[List[Tuple[str, str, int]]]: - filters = [] - for asset_id in asset_ids: - filters.append([(asset_id_col, "==", asset_id)]) - return filters - - -def build_year_month_filter( - start_date: datetime.date, - end_date: datetime.date, -) -> list: - """ - Use the year/months to build a Parquet filter. - - If `start_date.year == end_date.year`, then return a list of - three tuples (to be "ANDed" together) based on the year and months. - Else, return a list of list of tuples: - - the inner lists consist of AND filters; the inner lists are ORed - together if used as a single filter - - each inner list filter represents a calendar year or part thereof - - One use case of this function is to generate a filter whose OR - components can be processed one-by-one. For example, if memory constraints - prevent loading an entire tile at once, then one could instead attempt to - load one-year tiles one at a time. - - NOTE: `start_date.day` and `end_date.day` are ignored. - - TODO(Paul): Consider adding a switch to support smaller AND filter chunks - (e.g., at monthly instead of yearly granularity). - """ - hdbg.dassert_isinstance(start_date, datetime.date) - hdbg.dassert_isinstance(end_date, datetime.date) - hdbg.dassert_lte(start_date, end_date) - start_year = start_date.year - end_year = end_date.year - filter_ = [] - # - if start_year == end_year: - filter_.append(("year", "==", start_year)) - filter_.append(("month", ">=", start_date.month)) - filter_.append(("month", "<=", end_date.month)) - else: - start_year_filter = [] - start_year_filter.append(("year", "==", start_year)) - start_year_filter.append(("month", ">=", start_date.month)) - end_year_filter = [] - end_year_filter.append(("year", "==", end_year)) - end_year_filter.append(("month", "<=", end_date.month)) - filter_.append(start_year_filter) - filter_.append(end_year_filter) - for year in range(start_year + 1, end_year): - year_filter = [] - year_filter.append(("year", "==", year)) - filter_.append(year_filter) - return filter_ - - -def yield_parquet_tiles_by_year( - file_name: str, - start_date: datetime.date, - end_date: datetime.date, - cols: List[Union[int, str]], - *, - asset_ids: Optional[List[int]] = None, - asset_id_col: str = "asset_id", -) -> Iterator[pd.DataFrame]: - """ - Yield Parquet data in tiles up to one year in length. - - :param file_name: as in `from_parquet()` - :param start_date: first date to load; day is ignored - :param end_date: last date to load; day is ignored - :param cols: if an `int` is supplied, it is cast to a string before reading - :param asset_ids: asset ids to load - :param asset_id_col: see `_yield_parquet_tile()` - :return: a generator of `from_parquet()` dataframes - """ - time_filters = build_year_month_filter(start_date, end_date) - hdbg.dassert_isinstance(time_filters, list) - # The list should not be empty. - hdbg.dassert(time_filters) - if not isinstance(time_filters[0], list): - time_filters = [time_filters] - columns = [str(col) for col in cols] - if asset_ids is None: - asset_ids = [] - asset_id_filter = build_asset_id_filter(asset_ids, asset_id_col) - for time_filter in time_filters: - if asset_id_filter: - combined_filter = [ - id_filter + time_filter for id_filter in asset_id_filter - ] - else: - combined_filter = time_filter - yield from _yield_parquet_tile( - file_name, columns, combined_filter, asset_id_col - ) - - -# TODO(Paul): Add additional time-restriction filter. -def yield_parquet_tiles_by_assets( - file_name: str, - asset_ids: List[int], - asset_id_col: str, - asset_batch_size: int, - cols: Optional[List[Union[int, str]]], -) -> Iterator[pd.DataFrame]: - """ - Yield Parquet data in tiles batched by asset ids. - - :param file_name: as in `from_parquet()` - :param asset_ids: asset ids to load - :param asset_id_col: see `_yield_parquet_tile()` - :param asset_batch_size: the number of asset to load in a single batch - :param cols: if an `int` is supplied, it is cast to a string before reading - :return: a generator of `from_parquet()` dataframes - """ - hdbg.dassert_isinstance(asset_id_col, str) - hdbg.dassert(asset_id_col, "`asset_id_col` must be nonempty") - batches = [ - asset_ids[i : i + asset_batch_size] - for i in range(0, len(asset_ids), asset_batch_size) - ] - columns: Optional[List[str]] = None - if cols: - columns = [str(col) for col in cols] - for batch in tqdm(batches): - _LOG.debug("assets=%s", batch) - filter_ = build_asset_id_filter(batch, asset_id_col) - yield from _yield_parquet_tile(file_name, columns, filter_, asset_id_col) - - -def build_filter_with_only_equalities( - start_timestamp: pd.Timestamp, end_timestamp: pd.Timestamp -) -> list: - """ - Build a list of Parquet filters based on equality conditions for partition - columns. - - This function creates a filter for each partition column (year, month, day) based on the - equality conditions between components of the timestamp arguments when possible. - - Example: - Input args: - start_timestamp: 2022-08-31T00:01:00+00:00 - end-timestamp: 2022-08-31T23:59:59+00:00 - Output: - [("year", "=", 2022), ("month", "=", 8), ("day", "=", 31)] - - These filters enhance performance by allowing to load data quicker when used in tandem with timestamp filters. - Less memory will be used because less `.pq` need to be loaded. - - :param start_timestamp: start of the interval. - :param end_timestamp: end of the interval: - """ - hdbg.dassert_isinstance(start_timestamp, pd.Timestamp) - hdbg.dassert_isinstance(end_timestamp, pd.Timestamp) - filters = [] - if start_timestamp.year == end_timestamp.year: - filters.append(("year", "==", start_timestamp.year)) - if start_timestamp.month == end_timestamp.month: - filters.append(("month", "==", start_timestamp.month)) - if start_timestamp.day == end_timestamp.day: - filters.append(("day", "==", start_timestamp.day)) - return filters - - -# TODO(Paul): The `int` assumption is baked in. We can generalize to strings -# if needed, but if we do, then we should continue to handle string ints as -# ints as we do here (e.g., there are sorting advantages, among others). -def _process_walk_triple( - triple: tuple, start_depth: int -) -> Tuple[Tuple[str, ...], Tuple[int, ...]]: - """ - Process a triple returned by `os.walk()` - - :param triple: (dirpath: str, dirnames: List[str], filenames: List[str]) - :param start_depth: the "depth" of `path` used in the call - `os.walk(path)` - :return: tuple(lhs_vals), tuple(rhs_vals) - """ - lhs_vals: List[str] = [] - rhs_vals: List[int] = [] - # If there are subdirectories, do not process. - if triple[1]: - return tuple(lhs_vals), tuple(rhs_vals) - depth = len(triple[0].split("/")) - rel_depth = depth - start_depth - key = tuple(triple[0].split("/")[start_depth:]) - if len(key) == 0: - return tuple(lhs_vals), tuple(rhs_vals) - hdbg.dassert_eq(len(key), rel_depth) - lhs_vals = [] - rhs_vals = [] - for string in key: - lhs, rhs = string.split("=") - lhs_vals.append(lhs) - rhs_vals.append(int(rhs)) - hdbg.dassert_eq(len(lhs_vals), len(rhs_vals)) - return tuple(lhs_vals), tuple(rhs_vals) - - -def collate_parquet_tile_metadata( - path: str, -) -> pd.DataFrame: - """ - Report stats in a dataframe on Parquet file partitions. - - The directories should be of the form `lhs=rhs` where "rhs" is a string - representation of an `int`. - - :param path: path to top-level Parquet directory - :return: dataframe with two file size columns and a multiindex reflecting - the Parquet path structure. - """ - hdbg.dassert_dir_exists(path) - # Remove the trailing slash to simplify downstream accounting. - if path.endswith("/"): - path = path[:-1] - hdbg.dassert(not path.endswith("/")) - # Walk the path. - # os.walk() yields a 3-tuple of the form - # (dirpath: str, dirnames: List[str], filenames: List[str]) - start_depth = len(path.split("/")) - headers_set = set() - dict_ = collections.OrderedDict() - for triple in os.walk(path): - # If the walk has taken us to, e.g., - # asset_id=100/year=2010/month=1/data.parquet - # then we expect - # lhs = ("asset_id", "year", "month") - # rhs = (100, 2010, 1) - lhs, rhs = _process_walk_triple(triple, start_depth) - # If the walkabout has not yet taken us to a file, continue. - if not lhs: - continue - # The tuple `lhs` is to become the index headers. We check later - # for uniqueness. - headers_set.add(lhs) - # Get the file name and full path. - file_name = triple[2][0] - file_path = os.path.join(triple[0], file_name) - # Record the size of the file. We keep this in bytes for easy - # join aggregations. - size_in_bytes = os.path.getsize(file_path) - dict_[rhs] = size_in_bytes - # Ensure that headers are unambiguous. - hdbg.dassert_eq(len(headers_set), 1) - # Convert to a multiindexed dataframe. - df = pd.DataFrame(dict_.values(), index=dict_.keys()) - df.rename(columns={0: "file_size_in_bytes"}, inplace=True) - headers = headers_set.pop() - df.index.names = headers - df.sort_index(inplace=True) - # Add a more human-readable file size column. Keep the original numerical - # one for downstream aggregations. - file_size = df["file_size_in_bytes"].apply(hintros.format_size) - df["file_size"] = file_size - return df - - -# ############################################################################# - -# A Parquet filtering condition. e.g., `("year", "=", year)` -ParquetFilter = Tuple[str, str, Any] -# The AND of Parquet filtering conditions, e.g., -# `[("year", "=", year), ("month", "=", month)]` -ParquetAndFilter = List[ParquetFilter] -# A OR-AND Parquet filtering condition, e.g., -# ``` -# [[('year', '=', 2020), ('month', '=', 1)], -# [('year', '=', 2020), ('month', '=', 2)], -# [('year', '=', 2020), ('month', '=', 3)]] -# ``` -ParquetOrAndFilter = List[ParquetAndFilter] - - -# TODO(gp): @Nikola add light unit tests for `by_year_week` and for additional_filter. -# TODO(gp): Can we return a single type? -def get_parquet_filters_from_timestamp_interval( - partition_mode: str, - start_timestamp: Optional[pd.Timestamp], - end_timestamp: Optional[pd.Timestamp], - *, - additional_filters: Optional[List[ParquetFilter]] = None, -) -> Union[ParquetOrAndFilter, ParquetAndFilter]: - """ - Convert a constraint on a timestamp [start_timestamp, end_timestamp] into a - Parquet filters expression, based on the passed partitioning / tiling - criteria. - - :param partition_mode: control filtering of Parquet datasets. It needs to be - in sync with the way the data was saved - :param start_timestamp: start of the interval. `None` means no bound - :param end_timestamp: end of the interval. `None` means no bound - :param additional_filters: AND conditions to add to the final filter. - E.g., if we want to constraint also on `exchange_id` and 'currency_pair`, - we can specify - `[("exchange_id", "in", (...)),("currency_pair", "in", (...))]` - :return: list of OR-AND predicates - """ - # Check timestamp interval. - left_close = True - right_close = True - hdateti.dassert_is_valid_interval( - start_timestamp, - end_timestamp, - left_close=left_close, - right_close=right_close, - ) - or_and_filter = [] - if partition_mode == "by_year_month": - # Handle the first and last year of the interval. - if start_timestamp: - # `[('year', '==', 2020), ('month', '>=', 6)]` - and_filter = [ - ("year", "==", start_timestamp.year), - ("month", ">=", start_timestamp.month), - ] - or_and_filter.append(and_filter) - if end_timestamp: - # `[('year', '==', 2021), ('month', '<=', 3)]` - and_filter = [ - ("year", "==", end_timestamp.year), - ("month", "<=", end_timestamp.month), - ] - or_and_filter.append(and_filter) - if start_timestamp and end_timestamp: - number_of_years = len( - range(start_timestamp.year, end_timestamp.year + 1) - ) - if number_of_years == 1: - # For a one-year range, we overwrite the result with a single AND - # statement, e.g., `[Jan 2020, Mar 2020]` corresponds to - # `[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 3)]]`. - # Note that this interval is different from and OR-AND form as - # `[[('year', '==', 2020), ('month', '>=', 1)], - # [('year', '==', 2020), ('month', '<=', 3)]]` - # since the first AND clause include months <= 3 and the second one - # include months >= 1, and the OR corresponds to the entire year, - # instead of the interval `[Jan 2020, Mar 2020]`. - and_filter = [ - ("year", "==", start_timestamp.year), - ("month", ">=", start_timestamp.month), - ("month", "<=", end_timestamp.month), - ] - or_and_filter = [and_filter] - elif number_of_years > 2: - # For ranges over two years, one OR statement is necessary to bridge - # the gap between first and last AND statement. - # `[('year', '>', 2020), ('year', '<', 2023)]` - # Inserted in middle as bridge between AND statements. - and_filter = [ - ("year", ">", start_timestamp.year), - ("year", "<", end_timestamp.year), - ] - or_and_filter.insert(1, and_filter) - else: - # For intervals of exactly two years the two AND conditions are - # enough to select the desired period of time. - pass - elif len(or_and_filter) == 1: - # Handle the case when exactly one of the interval bounds is passed, - # e.g., [June 2020, None]. - # In this case the first year was covered by the code above (i.e., - # `year >= 2020 and month == 6`) and we need to specify the rest of - # the years (i.e., `year > 2020`). - operator = ">" if start_timestamp else "<" - timestamp = start_timestamp if start_timestamp else end_timestamp - hdbg.dassert_is_not(timestamp, None, "timestamp should not be None") - extra_filter = [("year", operator, timestamp.year)] - or_and_filter.append(extra_filter) - else: - # If there is no interval provided, leave empty `or_and_filter` as is. - pass - elif partition_mode == "by_year_week": - # TODO(gp): Consider using the same approach above for months also here. - # Partition by year and week. - hdbg.dassert_is_not( - end_timestamp, - None, - "Parquet backend can't determine the boundaries of the data", - ) - # Include last week in the interval. - end_timestamp += pd.DateOffset(weeks=1) - # Get all weeks in the interval. - hdbg.dassert_is_not( - start_timestamp, - None, - "start_timestamp should not be None for by_year_week partition mode", - ) - dates = pd.date_range( - start_timestamp.date(), end_timestamp.date(), freq="W" - ) - for date in dates: - year = date.year - # https://docs.python.org/3/library/datetime.html#datetime.date.isocalendar - weekofyear = date.isocalendar().week - and_filter = [("year", "=", year), ("weekofyear", "=", weekofyear)] - or_and_filter.append(and_filter) - else: - raise ValueError(f"Unknown partition mode `{partition_mode}`!") - if additional_filters: - hdbg.dassert_isinstance(additional_filters, list) - if or_and_filter: - # Append additional filters for every present timestamp filter. - or_and_filter = [ - additional_filters + and_filter for and_filter in or_and_filter - ] - else: - # If no timestamp filters are provided, use additional filters. - or_and_filter = additional_filters - _LOG.debug("or_and_filter=%s", str(or_and_filter)) - if len(or_and_filter) == 0: - # Empty list is not acceptable value for pyarrow dataset. - # Only logical expression or `None`. - or_and_filter = None - return or_and_filter - - -def list_and_merge_pq_files( - root_dir: str, - *, - file_name: str = "data.parquet", - aws_profile: hs3.AwsProfile = None, - drop_duplicates_mode: Optional[str] = None, -) -> None: - """ - Merge all files of the Parquet dataset. - - Can be generalized to any used partition. - - The standard partition (also known as "by-tile") assumed is: - - ``` - root_dir/ - currency_pair=ADA_USDT/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ... - currency_pair=EOS_USDT/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ``` - - :param root_dir: root directory of Parquet dataset - :param file_name: name of the single resulting file - :param aws_profile: the name of an AWS profile or a s3fs filesystem - """ - if aws_profile is not None: - filesystem = hs3.get_s3fs(aws_profile) - else: - filesystem = None - # Get full paths to each Parquet file inside root dir. - if filesystem: - # Use specialized S3 filesystem function to list Parquet files efficiently. - # since glob.glob() is very slow as it does a lot of accesses to S3. - # The extra `**/*` is needed by `pyarrow` >= 17. - parquet_files = filesystem.glob(f"{root_dir}/**/*.parquet") - else: - # For local filesystem, use glob.glob - parquet_files = glob.glob(f"{root_dir}/**/*.parquet", recursive=True) - _LOG.debug("Parquet files: '%s'", parquet_files) - # Get paths only to the lowest level of dataset folders. - dataset_folders = {f.rsplit("/", 1)[0] for f in parquet_files} - for folder in dataset_folders: - # Get files per folder and merge if there are multiple ones. - if filesystem: - # Use specialized S3 filesystem function to list Parquet files efficiently. - folder_files = filesystem.ls(folder) - else: - # For local filesystem, use os.listdir - folder_files = [os.path.join(folder, f) for f in os.listdir(folder)] - hdbg.dassert_ne( - len(folder_files), 0, msg=f"Empty folder `{folder}` detected!" - ) - if len(folder_files) == 1 and folder_files[0].endswith("/data.parquet"): - # If there is already single `data.parquet` file, no action is required. - continue - # Read all files in target folder. - # `partitioning=None` is required to read the dataset without - # partitioning columns. See CmTask7324 for details. - # https://github.com/cryptokaizen/cmamp/issues/7324 - data = pq.ParquetDataset( - folder_files, filesystem=filesystem, partitioning=None - ).read() - data = data.to_pandas() - # Drop duplicates on all non-metadata columns. - # TODO(gp): hparquet is general and we should pass the columns to remove - # or perform the transform after. - if drop_duplicates_mode is None: - duplicate_columns = data.columns.to_list() - for col_name in ["knowledge_timestamp", "end_download_timestamp"]: - if col_name in duplicate_columns: - duplicate_columns.remove(col_name) - control_column = None - elif drop_duplicates_mode == "bid_ask": - # Drop duplicates on timestamp index. - duplicate_columns = ["timestamp", "exchange_id"] - control_column = None - elif drop_duplicates_mode == "ohlcv": - # Drop duplicates on timestamp and keep one with largest volume. - duplicate_columns = ["timestamp", "exchange_id"] - control_column = "volume" - else: - hdbg.dfatal("Supported drop duplicates modes: ohlcv, bid_ask") - data = hdatafr.remove_duplicates(data, duplicate_columns, control_column) - # Remove all old files and write the new, merged one. - if filesystem: - filesystem.rm(folder, recursive=True) - pq.write_table( - pa.Table.from_pandas(data), - folder + "/" + file_name, - filesystem=filesystem, - ) - else: - # Use os.remove for local filesystem to remove files. - for file_path in folder_files: - os.remove(file_path) - data.to_parquet(os.path.join(folder, file_name)) - - -def maybe_cast_to_int(string: str) -> Union[str, int]: - """ - Return `string` as an `int` if convertible, otherwise a no-op. - - This is useful for parsing mixed-type dataframe columns that may - contain strings and ints. For example, a dataframe with columns - `feature1, feature2, 1, 2, 3` will be written and read back with - columns `1`, `2`, `3` as the strings "1", "2", "3" rather than the - ints. This function can be used to rectify that in a post-processing - column rename. - """ - hdbg.dassert_isinstance(string, str) - try: - val = int(string) - except ValueError: - val = string - return val diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py deleted file mode 100644 index 0ba179142..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py +++ /dev/null @@ -1,1151 +0,0 @@ -""" -Import as: - -import helpers.hparser as hparser -""" - -import argparse -import logging -import os -import sys -from typing import Any, Dict, List, Optional, Tuple, Union - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - -# TODO(gp): arg -> args - - -# ############################################################################# - - -def add_bool_arg( - parser: argparse.ArgumentParser, - name: str, - *, - default_value: bool = False, - help_: Optional[str] = None, -) -> argparse.ArgumentParser: - """ - Add options to a parser like `--xyz` and `--no_xyz`, controlled by - `args.xyz`. - - E.g., `add_bool_arg(parser, "run_diff_script", default_value=True)` adds - two options: - ``` - --run_diff_script Run the diffing script or not - --no_run_diff_script - ``` - corresponding to `args.run_diff_script`, where the default behavior is to have - that value equal to True unless one specifies `--no_run_diff_script`. - """ - group = parser.add_mutually_exclusive_group(required=False) - group.add_argument("--" + name, dest=name, action="store_true", help=help_) - group.add_argument("--no_" + name, dest=name, action="store_false") - parser.set_defaults(**{name: default_value}) - return parser - - -# ############################################################################# - - -def add_verbosity_arg( - parser: argparse.ArgumentParser, *, log_level: str = "INFO" -) -> argparse.ArgumentParser: - parser.add_argument( - "-v", - dest="log_level", - default=log_level, - # TRACE=5 - # DEBUG=10 - # INFO=20 - # WARNING=30 - # CRITICAL=50 - choices=["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], - help="Set the logging level", - ) - parser.add_argument( - "--no_report_command_line", - action="store_true", - help="Disable printing of executed commands", - ) - return parser - - -# TODO(gp): Use this everywhere. -def parse_verbosity_args( - args: argparse.Namespace, *args_: Any, **kwargs: Any -) -> None: - if hasattr(args, "no_report_command_line") and args.no_report_command_line: - report_command_line = False - else: - report_command_line = True - kwargs["report_command_line"] = report_command_line - # if args.log_level == "VERB_DEBUG": - # args.log_level = 5 - hdbg.init_logger(verbosity=args.log_level, *args_, **kwargs) - - -# ############################################################################# -# Command line for `@hcache_simple.simple_cache` functions. -# ############################################################################# - - -# TODO(gp): Use the ones from hcache_simple.py for DRY. -_CACHE_MODE_CHOICES = ("REFRESH_CACHE", "DISABLE_CACHE", "HIT_CACHE_OR_ABORT") - - -def add_cache_control_arg( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add `--cache_mode` switch controlling every - `@hcache_simple.simple_cache`-decorated function in the process. - - The resolved mode is applied globally via - `hcache_simple.set_global_cache_mode` in `parse_cache_control_args()`. - """ - parser.add_argument( - "--cache_mode", - action="store", - default=None, - choices=list(_CACHE_MODE_CHOICES), - help=( - "Override cache behavior for all @simple_cache functions. " - "REFRESH_CACHE repopulates, DISABLE_CACHE bypasses, " - "HIT_CACHE_OR_ABORT raises on miss." - ), - ) - parser.add_argument( - "--cache_debug", - action="store_true", - help=( - "Log at WARNING level for every @simple_cache call whether the " - "result was served from cache, computed on miss, or recomputed " - "because of `cache_mode`" - ), - ) - return parser - - -def parse_cache_control_args(args: argparse.Namespace) -> None: - """ - Apply `--cache_mode`, `--cache_debug` by setting the `hcache_simple` - process-wide globals. - """ - # Import lazily to avoid a circular dependency at module load time. - import helpers.hcache_simple as hcacsimp - - mode = getattr(args, "cache_mode", None) - if mode is not None: - _LOG.info("Setting global cache_mode=%s", mode) - hcacsimp.set_global_cache_mode(mode) - cache_debug = bool(getattr(args, "cache_debug", False)) - if cache_debug: - _LOG.info("Enabling cache_debug logging") - hcacsimp.set_cache_debug(cache_debug) - - -# ############################################################################# -# Command line options for handling the destination dir. -# ############################################################################# - - -def add_dst_dir_arg( - parser: argparse.ArgumentParser, - dst_dir_required: bool, - dst_dir_default: Optional[str] = None, -) -> argparse.ArgumentParser: - """ - Add command line options related to destination dir. - - E.g., `--dst_dir`, `--clean_dst_dir` - """ - # TODO(gp): Add unit test to check this. - # A required dst_dir implies no default dst_dir. - hdbg.dassert_imply( - dst_dir_required, - not dst_dir_default, - "Since dst_dir_required='%s', you need to specify a default " - "destination dir, instead of dst_dir_default='%s'", - dst_dir_required, - dst_dir_default, - ) - # If dst_dir is not required, then a default dst_dir must be specified. - hdbg.dassert_imply( - not dst_dir_required, - dst_dir_default, - "Since dst_dir_required='%s', you can't specify a default " - "destination dir, dst_dir_default='%s'", - dst_dir_required, - dst_dir_default, - ) - parser.add_argument( - "--dst_dir", - action="store", - default=dst_dir_default, - required=dst_dir_required, - help="Directory storing the results", - ) - parser.add_argument( - "--clean_dst_dir", - action="store_true", - help="Delete the destination dir before running", - ) - parser.add_argument( - "--no_confirm", - action="store_true", - help="Do not confirm before deleting dst dir", - ) - return parser - - -def parse_dst_dir_arg(args: argparse.Namespace) -> Tuple[str, bool]: - """ - Process the command line options related to destination dir. - - :return: a tuple (dst_dir, clean_dst_dir) - - dst_dir: the destination dir - - clean_dst_dir: whether to clean the destination dir or not - """ - dst_dir = args.dst_dir - _LOG.debug("dst_dir=%s", dst_dir) - # TODO(Dan): Fix `clean_dst_dir` usage since it is always `False` now. - clean_dst_dir = False - if args.clean_dst_dir: - _LOG.info("Cleaning dst_dir='%s'", dst_dir) - if os.path.exists(dst_dir): - _LOG.warning("Dir '%s' already exists", dst_dir) - if not args.no_confirm: - hsystem.query_yes_no( - f"Do you want to delete the dir '{dst_dir}'", - abort_on_no=True, - ) - hio.create_dir(dst_dir, incremental=False) - hio.create_dir(dst_dir, incremental=True) - _LOG.debug("clean_dst_dir=%s", clean_dst_dir) - return dst_dir, clean_dst_dir - - -# ############################################################################# -# Command line options related to selection actions. -# ############################################################################# - - -def add_action_arg( - parser: argparse.ArgumentParser, - valid_actions: List[str], - default_actions: Optional[List[str]], -) -> argparse.ArgumentParser: - """ - Add command line options to select actions to execute, skip, or enable. - - The function creates a mutually exclusive group with three options: - - `-a/--action`: specify exact actions to execute - - `-sa/--skip_action`: skip specific actions from default set - - `-e/--enable`: enable additional actions on top of defaults - - Available actions are listed once in the help epilog to avoid repetition. - - :param parser: parser to add the option to - :param valid_actions: list of valid actions - :param default_actions: list of default actions to execute - :return: parser with the option added - """ - # Add epilog with list of available actions to avoid repeating them. - actions_list = ", ".join(valid_actions) - if parser.epilog: - parser.epilog += f"\n\nAvailable actions: {actions_list}" - else: - parser.epilog = f"Available actions: {actions_list}" - # Create mutually exclusive group for action selection. - group = parser.add_mutually_exclusive_group(required=False) - group.add_argument( - "-a", - "--action", - action="append", - dest="action", - help="Actions to execute (see available actions below)", - ) - group.add_argument( - "-sa", - "--skip_action", - action="append", - dest="skip_action", - help="Actions to skip from default set (see available actions below)", - ) - group.add_argument( - "-e", - "--enable", - action="append", - dest="enable_action", - help="Enable additional actions on top of defaults (see available actions below)", - ) - if default_actions is not None: - hdbg.dassert_is_subset(default_actions, valid_actions) - parser.add_argument( - "--all", - action="store_true", - help=f"Run all the actions ({' '.join(default_actions)})", - ) - return parser - - -def actions_to_string( - actions: List[str], valid_actions: List[str], add_frame: bool -) -> str: - """ - Convert a list of actions to a string. - - :param actions: list of actions to convert - :param valid_actions: list of valid actions - :param add_frame: if `True`, add a frame around the actions - :return: string of the actions - """ - space = max(len(a) for a in valid_actions) + 2 - format_ = "%" + str(space) + "s: %s" - actions = [ - format_ % (a, "Yes" if a in actions else "-") for a in valid_actions - ] - actions_as_str = "\n".join(actions) - if add_frame: - ret = hprint.frame("# Action selected:") + "\n" - ret += hprint.indent(actions_as_str) - else: - ret = actions_as_str - return ret # type: ignore - - -def select_actions( - args: argparse.Namespace, - valid_actions: List[str], - default_actions: List[str], -) -> List[str]: - """ - Select actions based on the command line arguments. - - Supports three mutually exclusive modes: - - `--action`: run only specified actions - - `--skip_action`: run default actions minus specified ones - - `--enable`: run default actions plus specified additional ones - - :param args: command line arguments - :param valid_actions: list of valid actions - :param default_actions: list of default actions to execute - :return: list of selected actions - """ - hdbg.dassert( - not (args.action and args.all), - "You can't specify together --action and --all", - ) - hdbg.dassert( - not (args.action and args.skip_action), - "You can't specify together --action and --skip_action", - ) - # TODO(ai_gp): Is this still needed? - # Check for enable_action attribute (added for backward compatibility). - has_enable = hasattr(args, "enable_action") - if has_enable: - hdbg.dassert( - not (args.action and args.enable_action), - "You can't specify together --action and --enable", - ) - hdbg.dassert( - not (args.skip_action and args.enable_action), - "You can't specify together --skip_action and --enable", - ) - # Select actions. - if not args.action or args.all: - if default_actions is None: - default_actions = valid_actions[:] - hdbg.dassert_is_subset(default_actions, valid_actions) - # Convert it into list since through some code paths it can be a tuple. - actions = list(default_actions) - else: - # Validate actions specified by user. - for action in args.action: - hdbg.dassert_in( - action, - valid_actions, - "Invalid action '%s'", - action, - ) - actions = args.action[:] - hdbg.dassert_isinstance(actions, list) - hdbg.dassert_no_duplicates(actions) - # Remove actions, if needed. - if args.skip_action: - hdbg.dassert_isinstance(args.skip_action, list) - for skip_action in args.skip_action: - # Validate that skip_action is a valid action. - hdbg.dassert_in( - skip_action, - valid_actions, - "Invalid action '%s'", - skip_action, - ) - # Validate that skip_action is in the current action list. - if skip_action not in actions: - _LOG.warning( - "Skipping action '%s' since it's already not in actions='%s'", - skip_action, - actions, - ) - actions = [a for a in actions if a != skip_action] - # Add enabled actions on top of defaults. - if has_enable and args.enable_action: - hdbg.dassert_isinstance(args.enable_action, list) - for enable_action in args.enable_action: - hdbg.dassert_in( - enable_action, - valid_actions, - "Invalid action '%s'", - enable_action, - ) - if enable_action not in actions: - actions.append(enable_action) - # Reorder actions according to 'valid_actions'. - actions = [action for action in valid_actions if action in actions] - return actions - - -def mark_action( - action: str, actions: Optional[List[str]] -) -> Tuple[bool, Optional[List[str]]]: - """ - Mark an action as to be executed or skipped. - - :param action: action to mark - :param actions: list of actions, or None to execute all actions - :return: tuple of (to_execute, actions) - """ - if actions is None: - # If actions is None, execute all actions. - to_execute = True - else: - to_execute = action in actions - _LOG.debug("\n%s", hprint.frame(f"action={action}")) - if to_execute: - if actions is not None: - actions = [a for a in actions if a != action] - else: - _LOG.warning("Skip action='%s'", action) - return to_execute, actions - - -# ############################################################################# -# Command line options for input/output processing. -# ############################################################################# - -# For non-dockerized scripts the following idiom is used: -# -# ```python -# # Add input/output arguments to parser. -# hparser.add_input_output_args(parser) -# # Handle input/output arguments, including stdin/stdout. -# in_file_name, out_file_name = hparser.parse_input_output_args(args) -# ... -# # Read input file, handling stdin. -# in_lines = hparser.from_file(in_file_name) -# ... -# # Write output, handling stdout. -# hparser.to_file(txt, out_file_name) -# ``` -# See helpers_root/dev_scripts_helpers/coding_tools/transform_template.py as an -# example. - -# For dockerized scripts the following idiom is used inside the wrapper, which -# calls the dockerized script: -# -# ```python -# # Add input/output arguments to parser. -# hparser.add_input_output_args(parser) -# # Handle input/output arguments, including stdin/stdout. -# in_file_name, out_file_name = hparser.parse_input_output_args(args) -# tmp_in_file_name, tmp_out_file_name = hparser.adapt_input_output_args_for_dockerized_scripts( -# in_file_name, "llm_transform") -# ... -# # For stdin/stdout, suppress the output of the container. -# suppress_output = in_file_name == "-" or out_file_name == "-" -# _run_dockerized_llm_transform( -# tmp_in_file_name, -# cmd_line_opts, -# tmp_out_file_name, -# mode="system", -# force_rebuild=args.dockerized_force_rebuild, -# use_sudo=args.dockerized_use_sudo, -# suppress_output=suppress_output, -# ) -# ... -# # Write output, handling stdout. -# hparser.to_file(txt, out_file_name) -# ``` -# -# See helpers_root/dev_scripts_helpers/llms/llm_transform.py as an example. - - -def add_input_output_args( - parser: argparse.ArgumentParser, - *, - in_default: Optional[str] = None, - in_required: bool = True, - out_default: Optional[str] = None, - out_required: bool = False, -) -> argparse.ArgumentParser: - """ - Add options to parse input and output file name, and handle stdin / stdout. - - :param in_default: default file to be used for input - - If `None`, it must be specified by the user - :param in_required: whether the input file is required - :param out_default: default file to be used for output - - If `None`, it must be specified by the user - :param out_required: whether the output file is required - """ - parser.add_argument( - "-i", - "--input", - dest="input", - required=in_required, - type=str, - default=in_default, - help="Input file or `-` for stdin", - ) - parser.add_argument( - "-o", - "--output", - dest="output", - required=out_required, - type=str, - default=out_default, - help="Output file or `-` for stdout", - ) - return parser - - -def parse_input_output_args( - args: argparse.Namespace, *, clear_screen: bool = False -) -> Tuple[str, str]: - """ - Parse input and output file name, handling stdin / stdout. - - :return input and output file name. - """ - in_file_name = args.input - out_file_name = args.output - if out_file_name is None: - # If the output file is not specified, use the input file name, i.e., - # in place. - out_file_name = in_file_name - # Print summary. If we are using stdin / stdout, don't print anything since - # we don't want to pollute the output. - if in_file_name != "-": - if clear_screen: - os.system("clear") - _LOG.info(hprint.to_str("in_file_name")) - _LOG.info(hprint.to_str("out_file_name")) - - return in_file_name, out_file_name - - -def init_logger_for_input_output_transform( - args: argparse.Namespace, *, verbose: bool = True -) -> None: - """ - Initialize the logger when input/output transformation is used. - - :param verbose: if `False`, set the log level to `CRITICAL` so that no - output is printed and avoid to print: - ``` - 09:34:24 - INFO hdbg.py init_logger:1013 Saving log to file '/User... - 09:34:24 - INFO hdbg.py init_logger:1018 > cmd='/Users/saggese/src... - 09:34:24 - INFO hparser.py parse_input_output_args:368 in_file_name='lectures_source/Les... - 09:34:24 - INFO hparser.py parse_input_output_args:369 out_file_name='-' - ``` - """ - verbosity = args.log_level - if not verbose: - # Unless user has specified DEBUG level, set the log level to `CRITICAL` - # so that no output is printed. - if args.log_level == "INFO": - verbosity = "CRITICAL" - else: - # If the input is stdin, we don't want to print the command line or any - # other log messages, unless the user specified a more verbose log level. - if args.input == "-": - if args.log_level == "INFO": - verbosity = "CRITICAL" - else: - print("cmd line: " + hdbg.get_command_line()) - hdbg.init_logger(verbosity=verbosity, use_exec_path=True, force_white=False) - - -def from_file(file_name: str) -> List[str]: - """ - Read file or stdin (represented by `-`), returning an array of lines. - - If file_name is "pb" and the platform is macOS, read from clipboard. - """ - if file_name == "-": - _LOG.info("Reading from stdin") - # Read. - txt = [] - for line in sys.stdin: - txt.append(line.rstrip("\n")) - elif file_name == "pb": - # Read from clipboard (macOS only). - if hserver.is_host_mac(): - _LOG.info("Reading from clipboard") - cmd = "pbpaste" - rc, txt_str = hsystem.system_to_string(cmd) - txt = txt_str.split("\n") - else: - hdbg.dfatal("Reading from clipboard (pb) only works on macOS") - else: - txt = hio.from_file(file_name) - txt = txt.split("\n") - return txt - - -def to_file(txt: Union[str, List[str]], file_name: str) -> None: - """ - Write txt in a file or stdout (represented by `-`). - - If file_name is "pb" and the platform is macOS, write to clipboard. - """ - if isinstance(txt, str): - txt = [txt] - if file_name == "-": - _LOG.debug("Saving to stdout") - print("\n".join(txt)) - elif file_name == "pb": - # Write to clipboard (macOS only). - if hserver.is_host_mac(): - _LOG.info("Writing to clipboard") - txt_str = "\n".join(txt) - # Use echo with pbcopy, escaping single quotes. - txt_str_escaped = txt_str.replace("'", "'\\''") - cmd = f"echo -n '{txt_str_escaped}' | pbcopy" - hsystem.system(cmd) - _LOG.info("Written to clipboard") - else: - hdbg.dfatal("Writing to clipboard (pb) only works on macOS") - else: - _LOG.debug("Saving to file") - with open(file_name, "w") as f: - f.write("\n".join(txt)) - _LOG.info("Written file '%s'", file_name) - - -def adapt_input_output_args_for_dockerized_scripts( - in_file_name: str, tag: str -) -> Tuple[str, str]: - """ - Adapt input and output file name for dockerized scripts. - - Since we need to call a container and passing stdin/stdout is tricky, - we read the input and save it in a temporary file. - - :param tag: tag to be used for the temporary file name (e.g., `llm_transform`) - """ - # Since we need to call a container and passing stdin/stdout is tricky, - # we read the input and save it in a temporary file. - in_lines = from_file(in_file_name) - if in_file_name == "-": - tmp_in_file_name = f"tmp.{tag}.in.txt" - in_txt = "\n".join(in_lines) - hio.to_file(tmp_in_file_name, in_txt) - else: - tmp_in_file_name = in_file_name - # - tmp_out_file_name = f"tmp.{tag}.out.txt" - return tmp_in_file_name, tmp_out_file_name - - -# ############################################################################# -# Command line options for parallel processing. -# ############################################################################# - - -# pylint: disable=line-too-long -# TODO(gp): These should go in hjoblib.py -def add_parallel_processing_arg( - parser: argparse.ArgumentParser, - *, - num_threads_default: Optional[str] = None, -) -> argparse.ArgumentParser: - """ - Add parallel processing args. - - The "incremental idiom" means skipping processing computation that has - already been performed. E.g., if we need to transform files from one dir to - another we skip the files already processed (assuming that a file present - in the destination dir is an indication that it has already been - processed). - - The default behavior should always be incremental since "incremental mode" - is not destructive like the non-incremental, i.e., delete and restart - - The incremental behavior is disabled with `--no_incremental`. This implies - performing the computation in any case - - It is often implemented by deleting the destination dir and then running - again, even in incremental mode - - If the destination dir already exists, then we require the user to - explicitly use `--force` to confirm that the user knows what is doing - """ - parser.add_argument( - "--dry_run", - action="store_true", - help="Print the workload and exit without running it", - ) - parser.add_argument( - "--no_incremental", - action="store_true", - help="Skip workload already performed", - ) - parser.add_argument( - "--force", - action="store_true", - help="Confirm that one wants to remove the previous results. It works only together with --no_incremental", - ) - # - help = """ - Number of threads to use: - - '-1' to use all CPUs; - - '1' to use one-thread at the time but using the parallel execution (mainly used - for debugging) - - 'serial' to serialize the execution without using parallel execution""" - if num_threads_default is None: - parser.add_argument( - "--num_threads", - action="store", - help=help, - required=True, - ) - else: - parser.add_argument( - "--num_threads", - action="store", - help=help, - default=num_threads_default, - ) - parser.add_argument("--no_keep_order", action="store_true", help="") - parser.add_argument( - "--num_func_per_task", - action="store", - type=int, - default=None, - help="Number of function execute in a (parallel) task of the workload. `None` means automatically decided by the function", - ) - parser.add_argument( - "--skip_on_error", - action="store_true", - help="Continue execution after encountering an error", - ) - parser.add_argument( - "--num_attempts", - default=1, - type=int, - help="Repeat running an experiment up to `num_attempts` times", - required=False, - ) - return parser - - -def create_incremental_dir(dst_dir: str, args: argparse.Namespace) -> None: - """ - Create a dir using the "incremental idiom". - - If the dir already exists and the user requested the not - incremental, we require `--force` to confirm deleting the dir. - """ - if args.force: - hdbg.dassert( - args.no_incremental, "--force only works with --no_incremental" - ) - _LOG.debug(hprint.to_str("dst_dir args")) - if args.no_incremental: - # Create the dir from scratch. - _LOG.debug("No incremental mode") - if os.path.exists(dst_dir): - _LOG.debug("Dir '%s' already exists", dst_dir) - hdbg.dassert_dir_exists(dst_dir, "'%s' must be a directory") - if not args.force: - _LOG.warning( - "The directory '%s' already exists. To confirm deleting it use --force", - dst_dir, - ) - sys.exit(-1) - _LOG.warning("Deleting %s", dst_dir) - hio.create_dir(dst_dir, incremental=False) - else: - _LOG.debug("Incremental mode") - hio.create_dir(dst_dir, incremental=True) - - -# ############################################################################# -# Command line options for metadata output. -# ############################################################################# - - -def add_json_output_metadata_args( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add arguments related to storing the output metadata from a script. - - This data can be read / used by other scripts to post-process a - script results. - """ - parser.add_argument( - "--json_output_metadata", - type=str, - action="store", - help="File storing the output metadata of this script in JSON format", - ) - return parser - - -# Store the metadata about the output of a script. -OutputMetadata = Dict[str, str] - - -def process_json_output_metadata_args( - args: argparse.Namespace, - output_metadata: OutputMetadata, -) -> Optional[str]: - """ - Save the output metadata according to the command line options. - - :return: file name with the output metadata - """ - hdbg.dassert_isinstance(output_metadata, dict) - if args.json_output_metadata is None: - return None - file_name: str = args.json_output_metadata - _LOG.info("Saving output metadata into file '%s'", file_name) - if not file_name.endswith(".json"): - _LOG.warning( - "The output metadata file '%s' doesn't end in .json: adding it", - file_name, - ) - file_name += ".json" - hio.to_json(file_name, output_metadata) - _LOG.info("Saved output metadata into file '%s'", file_name) - return file_name - - -def read_output_metadata(output_metadata_file: str) -> OutputMetadata: - """ - Read the output metadata. - """ - output_metadata: OutputMetadata = hio.from_json(output_metadata_file) - return output_metadata - - -def str_to_bool(value: str) -> bool: - """ - Convert string representing true or false to the corresponding bool. - """ - if value.lower() == "true": - ret = True - elif value.lower() == "false": - ret = False - else: - raise argparse.ArgumentTypeError( - f"Invalid boolean value {value}. Use 'true' or 'false'." - ) - return ret - - -# ############################################################################# -# Command line options for dockerized scripts. -# ############################################################################# - - -def add_dockerized_script_arg( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add common command line arguments for dockerized scripts. - """ - parser.add_argument( - "--dockerized_force_rebuild", - action="store_true", - help="Force to rebuild the Docker container", - ) - parser.add_argument( - "--dockerized_use_sudo", - action="store_true", - help="Use sudo inside the container", - ) - return parser - - -def add_llm_prompt_arg( - parser: argparse.ArgumentParser, - *, - default_prompt: str = "", - is_required: bool = True, -) -> argparse.ArgumentParser: - """ - Add common command line arguments for `*llm_transform.py` scripts. - - :param default_prompt: default prompt to use - :param is_required: whether the prompt is required - :return: parser with the option added - """ - parser.add_argument( - "--debug", - action="store_true", - help="Print before/after the transform", - ) - if default_prompt != "": - is_required = False - parser.add_argument( - "-p", - "--prompt", - required=is_required, - type=str, - help="Prompt to apply", - default=default_prompt, - ) - parser.add_argument( - "-f", - "--fast_model", - action="store_true", - help="Use a fast LLM model vs a high-quality one", - ) - return parser - - -# ############################################################################# -# Command line options for limit range processing. -# ############################################################################# - - -def add_limit_range_arg( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add argument for limiting processing to a range of items. - - The range format is X:Y where X and Y are 1-indexed integers. - """ - parser.add_argument( - "--limit", - action="store", - help="Limit processing to item range X:Y (integers >= 1, inclusive)", - ) - return parser - - -def parse_limit_range(limit_str: str) -> Tuple[int, int]: - """ - Parse limit string in format "X:Y" and return tuple (start, end). - - :param limit_str: string in format "X:Y" where X and Y are integers >= 1 - :return: tuple in [start_index, end_index] - """ - hdbg.dassert( - ":" in limit_str, "Limit format must be X:Y, got: %s", limit_str - ) - parts = limit_str.split(":") - hdbg.dassert_eq( - len(parts), 2, "Limit format must be X:Y, got: %s", limit_str - ) - try: - start = int(parts[0]) - end = int(parts[1]) - except ValueError as e: - hdbg.dfatal("Invalid limit format, must be integers: %s" % str(e)) - hdbg.dassert_lte(1, start, "Start index must be >= 1, got: %s", start) - hdbg.dassert_lte(1, end, "End index must be >= 1, got: %s", end) - hdbg.dassert_lte( - start, end, "Start index must be <= end index, got: %s:%s", start, end - ) - return start, end - - -def parse_limit_range_args( - args: argparse.Namespace, -) -> Optional[Tuple[int, int]]: - """ - Parse limit range from command line arguments and log the result. - - :param args: parsed command line arguments containing 'limit' - attribute - :return: tuple of (start_index, end_index) as 0-indexed integers, or - None if no limit - """ - limit_range = None - if args.limit: - limit_range = parse_limit_range(args.limit) - _LOG.warning( - "Using limit range: [%s:%s]", limit_range[0], limit_range[1] - ) - return limit_range - - -def apply_limit_range( - items: List[Any], - limit_range: Optional[Tuple[int, int]] = None, - *, - item_name: str = "items", -) -> List[Any]: - """ - Apply limit range filtering to a list of items. - - :param items: list of items to filter - :param limit_range: optional tuple (start, end) for 0-indexed range - filtering - :param item_name: name of items for logging purposes - :return: filtered list of items - """ - if limit_range is not None: - start_idx, end_idx = limit_range - total_items = len(items) - hdbg.dassert_lt( - start_idx, - total_items, - "Start index %s exceeds available %s %s", - start_idx, - item_name, - total_items, - ) - hdbg.dassert_lt( - end_idx, - total_items, - "End index %s exceeds available %s %s", - end_idx, - item_name, - total_items, - ) - items = items[start_idx : end_idx + 1] - _LOG.warning( - "Found %s %s, limited to range %s:%s (%s %s)", - total_items, - item_name, - start_idx, - end_idx, - len(items), - item_name, - ) - else: - _LOG.info("Found %s %s to process", len(items), item_name) - # Print the items that will be processed. - _LOG.debug("Items to process:") - for i, item in enumerate(items): - _LOG.debug(" [%s]: %s", i, item) - return items - - -# ############################################################################# -# Command line options for multiple file input. -# ############################################################################# - - -def add_multi_file_args( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add command line options for specifying multiple input files. - - Three mutually exclusive methods are supported: - - `--files="file1,file2,..."`: comma-separated list of files - - `--from_files="file.txt"`: file containing one file per line - - `--input file1 --input file2`: repeated argument - - These options work alongside the existing `-i/--input` for backward - compatibility. - - :param parser: parser to add the options to - :return: parser with the options added - """ - group = parser.add_mutually_exclusive_group(required=False) - group.add_argument( - "--files", - type=str, - help="Comma-separated list of files to process (e.g., 'file1.txt,file2.txt,file3.txt')", - ) - group.add_argument( - "--from_files", - type=str, - help="Path to file containing one file path per line", - ) - group.add_argument( - "-i", - "--input", - action="append", - help="File to process (can be specified multiple times)", - ) - return parser - - -def parse_multi_file_args( - args: argparse.Namespace, -) -> List[str]: - """ - Parse multi-file command line arguments and return list of file paths. - - Handles three input methods: - - `--files="file1,file2,..."`: comma-separated list - - `--from_files="file.txt"`: file containing one file per line - - `--input file1 --input file2`: repeated argument - - If none of the multi-file options are specified, falls back to the single - `-i/--input` argument for backward compatibility. - - :param args: parsed command line arguments - :return: list of file paths to process - """ - file_list: List[str] = [] - # Check which multi-file option was specified. - if hasattr(args, "files") and args.files: - # Parse comma-separated list. - _LOG.debug("Using --files option") - file_list = [f.strip() for f in args.files.split(",")] - # Remove empty strings. - file_list = [f for f in file_list if f] - elif hasattr(args, "from_files") and args.from_files: - # Read file containing list of files. - _LOG.debug("Using --from_files option") - hdbg.dassert_path_exists(args.from_files) - content = hio.from_file(args.from_files) - lines = content.split("\n") - for line in lines: - # Strip whitespace. - line = line.strip() - # Skip empty lines and comments. - if line and not line.startswith("#"): - file_list.append(line) - elif hasattr(args, "input") and args.input: - # Check if args.input is a list (from --input repeated argument) or a string (from -i/--input single file). - if isinstance(args.input, list): - # Use repeated argument from add_multi_file_args. - _LOG.debug("Using --input option (repeated argument)") - file_list = args.input - else: - # Backward compatibility: support single file via -i/--input from add_input_output_args. - _LOG.debug( - "Using -i/--input option (single file, backward compatibility)" - ) - file_list = [args.input] - else: - # No file specified. - hdbg.dfatal("No input files specified") - # Validate that we have at least one file. - hdbg.dassert_isinstance(file_list, list) - hdbg.dassert_lt( - 0, len(file_list), "No input files specified after parsing arguments" - ) - # Validate that all files exist. - for file_path in file_list: - hdbg.dassert_path_exists(file_path) - _LOG.info("Found %s file(s) to process", len(file_list)) - return file_list diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py deleted file mode 100644 index e46fc8143..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py +++ /dev/null @@ -1,253 +0,0 @@ -""" -Pickle and JSON serialization/deserialization routines. - -Import as: - -import helpers.hpickle as hpickle -""" - -import gzip -import json -import logging -import marshal -import os -import pickle -import types -from typing import Any, Callable, Optional - -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hio as hio - -# TODO(Grisha): Can this module depend on hs3? -import helpers.hs3 as hs3 -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - - -def to_pickleable(obj: Any, force_values_to_string: bool) -> Any: - """ - Convert an object into an object with the same nested structure (e.g., - lists and dicts), but where all values are replaced with their pickleable - representations. - - :param obj: object to convert - :param force_values_to_string: if True, store all the object values - as strings - :return: pickleable object - """ - if isinstance(obj, list): - # Process list values recursively. - out = [to_pickleable(k, force_values_to_string) for k in obj] - elif isinstance(obj, tuple): - # Process tuple values recursively. - out = tuple([to_pickleable(k, force_values_to_string) for k in obj]) - elif isinstance(obj, dict): - # Process dict keys and values recursively. - out = {} - for k, v in obj.items(): - k = to_pickleable(k, force_values_to_string) - v = to_pickleable(v, force_values_to_string) - out[k] = v - elif hintros.is_iterable(obj): - # TODO(Grisha): is it ok that we convert any Iterable (e.g., set) to list? - # This means that input and output data types do not match. - # Process other iterable values recursively. - out = [to_pickleable(v, force_values_to_string) for v in obj] - else: - # We need to use try_and_catch mode because of CmTask7713. - if hintros.is_pickleable(obj, mode="try_and_catch"): - # Store a pickleable object. - if force_values_to_string: - # Store as string if specified. - out = str(obj) - else: - out = obj - else: - # Store a string representation of an unpickleable object. - out = str(obj) - return out - - -# ############################################################################# -# pickle -# ############################################################################# - - -def to_pickle( - obj: Any, - file_name: str, - *, - backend: str = "pickle", - log_level: int = logging.DEBUG, - aws_profile: Optional[hs3.AwsProfile] = None, -) -> None: - """ - Pickle object `obj` into file `file_name`. - - :param file_name: the file_name is not changed, but it is checked for - consistency with the backend (e.g., `pickle_gzip` needs a `.pkl.gz` - extension) - :param backend: pickle, dill, pickle_gzip - """ - hdbg.dassert_type_is(file_name, str) - hio.create_enclosing_dir(file_name, incremental=True) - with htimer.TimedScope(logging.DEBUG, f"Pickling to '{file_name}'") as ts: - # We assume that the user always specifies a .pkl extension and then we - # change the extension based on the backend. - if backend in ("pickle", "dill"): - hdbg.dassert_file_extension(file_name, "pkl") - if backend == "pickle": - # Use S3 file system. - if hs3.is_s3_path(file_name): - s3fs_ = hs3.get_s3fs(aws_profile) - with s3fs_.open(file_name, "wb") as s3_file: - pickler = pickle.Pickler( - s3_file, pickle.HIGHEST_PROTOCOL - ) - pickler.fast = True - pickler.dump(obj) - # Use local file system. - else: - with open(file_name, "wb") as fd: - pickler = pickle.Pickler(fd, pickle.HIGHEST_PROTOCOL) - pickler.fast = True - pickler.dump(obj) - elif backend == "dill": - import dill - - with open(file_name, "wb") as fd: - dill.dump(obj, fd) - else: - raise ValueError(f"Invalid backend='{backend}'") - elif backend == "pickle_gzip": - hdbg.dassert_file_extension(file_name, "pkl.gz") - with gzip.open(file_name, "wb") as zfd: - pickler = pickle.Pickler(zfd, pickle.HIGHEST_PROTOCOL) - pickler.fast = True - pickler.dump(obj) - else: - raise ValueError(f"Invalid backend='{backend}'") - # Report time and size. - if hs3.is_s3_path(file_name): - file_size = hs3.du(file_name, aws_profile=aws_profile, human_format=True) - else: - file_size = hintros.format_size(os.path.getsize(file_name)) - _LOG.log( - log_level, - "Saved '%s' (size=%s, time=%.1fs)", - file_name, - file_size, - ts.elapsed_time, - ) - - -def from_pickle( - file_name: str, - backend: str = "pickle", - *, - log_level: int = logging.DEBUG, - aws_profile: Optional[hs3.AwsProfile] = None, -) -> Any: - """ - Unpickle and return object stored in `file_name`. - """ - hdbg.dassert_isinstance(file_name, str) - with htimer.TimedScope( - logging.DEBUG, f"Unpickling from '{file_name}'" - ) as ts: - # We assume that the user always specifies a .pkl extension and then we - # change the extension based on the backend. - if backend in ("pickle", "dill"): - hdbg.dassert_file_extension(file_name, "pkl") - if backend == "pickle": - # Use S3 file system. - if hs3.is_s3_path(file_name): - s3fs_ = hs3.get_s3fs(aws_profile) - with s3fs_.open(file_name) as s3_file: - unpickler = pickle.Unpickler(s3_file) - obj = unpickler.load() - else: - with open(file_name, "rb") as fd: - unpickler = pickle.Unpickler(fd) - obj = unpickler.load() - elif backend == "dill": - import dill - - with open(file_name, "rb") as fd: - obj = dill.load(fd) - else: - raise ValueError(f"Invalid backend='{backend}'") - elif backend == "pickle_gzip": - hdbg.dassert_file_extension(file_name, "pkl.gz") - with gzip.open(file_name, "rb") as zfd: - unpickler = pickle.Unpickler(zfd) - obj = unpickler.load() - else: - raise ValueError(f"Invalid backend='{backend}'") - # Report time and size. - if hs3.is_s3_path(file_name): - file_size = hs3.du(file_name, aws_profile=aws_profile, human_format=True) - else: - file_size = hintros.format_size(os.path.getsize(file_name)) - _LOG.log( - log_level, - "Read '%s' (size=%s, time=%.1fs)", - file_name, - file_size, - ts.elapsed_time, - ) - return obj - - -# ############################################################################# - - -# TODO(gp): -> to_pickle_function -def pickle_function(func: Callable) -> str: - """ - Pickle a function into bytecode stored into a string. - - - return: string - """ - hdbg.dassert_callable(func) - hdbg.dassert(hasattr(func, "__code__")) - assert hasattr(func, "__code__") - code_as_bytes = marshal.dumps(func.__code__) - return code_as_bytes.decode() - - -# TODO(gp): -> from_pickle_function -def unpickle_function(code_as_str: str, func_name: str) -> Callable: - """ - Unpickle a function saved into string . The function is - injected in the global namespace as . - - - return: function - """ - hdbg.dassert_isinstance(code_as_str, str) - code = marshal.loads(code_as_str.encode()) - func = types.FunctionType(code, globals(), name=func_name) - return func - - -# ############################################################################# -# JSON -# ############################################################################# - -# TODO(gp): Maybe move helpers/hjson.py? - - -# TODO(gp): Switch file_name and obj to be consistent with the pickle functions. -def to_json(file_name: str, obj: object) -> None: - hdbg.dassert_file_extension(file_name, "json") - with open(file_name, "w") as outfile: - json.dump(obj, outfile) - - -def from_json(file_name: str) -> object: - hdbg.dassert_path_exists(file_name) - hdbg.dassert_file_extension(file_name, "json") - obj = json.loads(hio.from_file(file_name)) - return obj diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py deleted file mode 100644 index 5e1df13c8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py +++ /dev/null @@ -1,495 +0,0 @@ -""" -Code to automatically generate unit tests for functions. - -Import as: - -import helpers.hplayback as hplayba -""" - -import inspect -import json -import logging -import os -from typing import Any, Callable, List, Optional - -import jsonpickle # type: ignore -import jsonpickle.ext.pandas as jepand # type: ignore -import pandas as pd - -import config_root.config as cconfig -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint - -jepand.register_handlers() - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): Use repr to serialize: -# >>> a = {"hello": [1, 2, (3, 4)]} -# >>> repr(a) -# "{'hello': [1, 2, (3, 4)]}" -# TODO(gp): Add more types. -# TODO(gp): -> _to_python_code -def to_python_code(obj: Any) -> str: - """ - Serialize an object into a string of Python code. - - :param obj: an object to serialize - :return: a string of Python code building the object - """ - output = [] - if isinstance(obj, (int, float)): - # Float 2.5 -> "2.5". - output.append(str(obj)) - elif isinstance(obj, str): - # String test -> '"test"'. - # Use jsonpickle to handle double quotes. - output.append(jsonpickle.encode(obj)) - elif isinstance(obj, list): - # List ["a", 1] -> '["a", 1]'. - output_tmp = "[" - for el in obj: - output_tmp += to_python_code(el) + ", " - output_tmp = output_tmp.rstrip(", ") + "]" - output.append(output_tmp) - elif isinstance(obj, tuple): - # Tuple ["a", 1] -> '["a", 1]'. - output_tmp = "(" - for el in obj: - output_tmp += to_python_code(el) + ", " - output_tmp = output_tmp.rstrip(", ") + ")" - output.append(output_tmp) - elif isinstance(obj, dict): - # Dict {"a": 1} -> '{"a": 1}'. - output_tmp = "{" - for key in obj: - output_tmp += ( - to_python_code(key) + ": " + to_python_code(obj[key]) + ", " - ) - output_tmp = output_tmp.rstrip(", ") + "}" - output.append(output_tmp) - elif isinstance(obj, pd.DataFrame): - # Dataframe with a column "a" and row values 1, 2 -> - # "pd.DataFrame.from_dict({'a': [1, 2]})". - vals = obj.to_dict(orient="list") - output.append(f"pd.DataFrame.from_dict({vals})") - elif isinstance(obj, pd.Series): - # Series init as pd.Series([1, 2]) - output.append( - f'pd.Series(data={obj.tolist()}, index={obj.index}, name="{obj.name}", ' - f"dtype={obj.dtype})" - ) - elif isinstance(obj, cconfig.Config): - # Config -> python_code -> "cconfig.Config.from_python(python_code)" - val = obj.to_python() - output.append(f'cconfig.Config.from_python("{val}")') - else: - # Use `jsonpickle` for serialization. - _LOG.warning( - "Type %s not found in serialization function: using jsonpickle.", - type(obj), - ) - output.append(f"r'{jsonpickle.encode(obj)}'") - output = "\n".join(output) - return output - - -# ############################################################################# -# Playback -# ############################################################################# - - -class Playback: - @staticmethod - def _get_test_file_name(file_with_code: str) -> str: - """ - Construct the test file name based on the file with the code to test. - - :param file_with_code: path to file with code to test. - :return: path to the file with generated test. - """ - # Get directory and filename of the testing code. - dirname_with_code, filename_with_code = os.path.split(file_with_code) - dirname_with_test = os.path.join(dirname_with_code, "test") - # Construct test file. - test_file = os.path.join( - dirname_with_test, f"test_by_playback_{filename_with_code}" - ) - return test_file - - def _update_code_to_existing(self) -> None: - """ - Get existing content from the file with test. - - If the file doesn't exist - creates it. - """ - # Create test file if it doesn't exist. - if not os.path.exists(self._test_file): - hio.create_enclosing_dir(self._test_file, True) - hio.to_file(self._test_file, "", mode="w") - else: - # Get already existing content in the test file. - self._code = hio.from_file(self._test_file).split("\n") - self._file_exists = True - - def _append(self, string: str, num_tabs: int = 0) -> None: - """ - Add indented line to the code. - """ - num_spaces = num_tabs * 4 - self._code.append(hprint.indent(string, num_spaces=num_spaces)) - - def __init__( - self, - mode: str, - to_file: Optional[bool] = None, - max_tests: Optional[int] = None, - ) -> None: - """ - Initialize the class variables. - - :param mode: the type of unit test to be generated (e.g. "assert_equal") - :param to_file: save playback output to the file - test/test_by_playback_.py - :param max_tests: limit a number of generated tests for the testing - function. Can be useful if the function is called a lot of times - during the execution. - """ - _LOG.debug(hprint.to_str("mode to_file max_tests")) - hdbg.dassert_in(mode, ("check_string", "assert_equal")) - self.mode = mode - # TODO(gp): Factor out in a function but need to discard one more level - # in the stack trace. - cur_frame = inspect.currentframe() - self._func_name = cur_frame.f_back.f_code.co_name # type: ignore - # We can use kw arguments for all args. Python supports this. - self._kwargs = cur_frame.f_back.f_locals.copy() # type: ignore - # It treats all arguments defined before itself as arguments. If this - # is done, it will mess up the function call that will be created in - # `Playback.run`. - expected_arg_count = cur_frame.f_back.f_code.co_argcount # type: ignore - if "kwargs" in self._kwargs: - expected_arg_count += 1 - _LOG.debug(hprint.to_str("expected_arg_count")) - # TODO(gp): Is this necessary? - # hdbg.dassert_eq( - # expected_arg_count, - # len(cur_frame.f_back.f_locals), # type: ignore - # msg="the Playback class should be the first thing instantiated in" - # " a function.", - # ) - # If the function is a method, store the parent class so we can also - # create that in the test. - if "self" in self._kwargs: - x = self._kwargs.pop("self") - self._parent_class = x - self._code = [ - f"# Test created for {cur_frame.f_back.f_globals['__name__']}" # type: ignore - f".{x.__class__.__name__}.{self._func_name}." - ] - else: - self._parent_class = None - self._code = [ - # pylint: disable=line-too-long - f"# Test created for {cur_frame.f_back.f_globals['__name__']}.{self._func_name}." # type: ignore - ] - self._append("") - # Check if need to write the code directly to file. - self._to_file = to_file if to_file is not None else False - # Find filename to write the code. - file_with_code = cur_frame.f_back.f_code.co_filename # type: ignore - self._test_file = self._get_test_file_name(file_with_code) - # Check if file exists, need to keep code already here. - self._file_exists = False - if self._to_file: - self._update_code_to_existing() - # Limit number of tests per tested function. - self._max_tests = max_tests or float("+inf") - - @staticmethod - def test_code(output: str) -> None: - # Try to execute in a fake environment. - # ``` - # local_env = {} - # _ = exec(output, local_env) - # ``` - _ = exec(output) # pylint: disable=exec-used - - def _check_code(self, func_output: Any) -> None: - """ - Generate test code that makes an assertion. - """ - if self.mode == "check_string": - if isinstance(func_output, (pd.DataFrame, pd.Series, str)): - if not isinstance(func_output, str): - self._append( - "actual = hpandas.df_to_str(actual, num_rows=None)", 2 - ) - if not isinstance(func_output, (str, bytes)): - self._append("actual = str(actual)", 2) - self._append("# Check output.", 2) - self._append("self.check_string(actual)", 2) - elif self.mode == "assert_equal": - self._append("# Define expected output.", 2) - func_output_as_code = to_python_code(func_output) - self._append(f"expected = {func_output_as_code}", 2) - if not isinstance( - func_output, (int, float, str, list, dict, pd.DataFrame) - ): - self._append("expected = jsonpickle.decode(expected)", 2) - - if isinstance(func_output, (pd.DataFrame, pd.Series)): - self._append( - "actual = hpandas.df_to_str(actual, num_rows=None)", 2 - ) - self._append( - "expected = hpandas.df_to_str(expected, num_rows=None)", 2 - ) - self._append("# Compare actual and expected output.", 2) - self._append("self.assertEqual(actual, expected)", 2) - else: - raise ValueError(f"Invalid mode='{self.mode}'") - - def _add_imports(self, additional: Optional[List[str]] = None) -> None: - """ - Add the code with imports. - """ - # Add imports. - self._append("import helpers.hpandas as hpandas") - self._append("import helpers.hunit_test as hunitest") - self._append("import jsonpickle") - self._append("import pandas as pd") - self._append("import config_root.config as cconfi") - for a in additional or []: - self._append(a) - self._code.extend(["", ""]) - - def _get_class_name_string(self) -> str: - """ - Get a string for the test code with the name of the test class. - - I.e. "class TestMyMethod(hunitest.TestCase):". - """ - test_name = ( - self._parent_class.__class__.__name__ - if self._parent_class is not None - else "" - ) - test_name += "".join( - [x.capitalize() for x in self._func_name.split("_")] - ) - class_string = f"class Test{test_name}(hunitest.TestCase):" - return class_string - - def _get_class_count(self) -> int: - """ - Find a number of already generated tests for the method. - """ - class_string = self._get_class_name_string() - count = 0 - for line in self._code: - count += line == class_string - return count - - def _add_test_class(self) -> None: - """ - Add the code with the test class definition and the test method - definition. - """ - # Add test class and test method. - class_string = self._get_class_name_string() - # Find how many times method was tested. - count = self._get_class_count() - if count >= self._max_tests: - # If it was already tested enough times, raise. - raise IndexError(f"{self._max_tests} tests already generated") - # Otherwise, continue to create a test code. - self._append(class_string) - self._append(f"def test{count + 1}(self) -> None:", 1) - - def _add_function_call(self) -> None: - """ - Add a call of the function to test to the test code. - """ - self._append("# Call function to test.", 2) - if self._parent_class is None: - fnc_call = [f"{k}={k}" for k in self._kwargs.keys()] - self._append(f"actual = {self._func_name}({', '.join(fnc_call)})", 2) - else: - var_code = to_python_code(self._parent_class) - # Re-create the parent class. - self._append(f"cls = {var_code}", 2) - self._append("cls = jsonpickle.decode(cls)", 2) - fnc_call = [f"{k}={k}" for k in self._kwargs.keys()] - # Call the method as a child of the parent class. - self._append( - f"actual = cls.{self._func_name}({', '.join(fnc_call)})", 2 - ) - - def _add_var_definitions(self) -> None: - """ - Add variables definitions for the function to test. - """ - if self._kwargs: - self._append("# Define input variables.", 2) - for key in self._kwargs: - as_python = to_python_code(self._kwargs[key]) - self._append(f"{key} = {as_python}", 2) - # Decode back to an actual Python object, if necessary. - if not isinstance( - self._kwargs[key], - ( - int, - float, - str, - list, - dict, - pd.DataFrame, - pd.Series, - cconfig.Config, - ), - ): - self._append(f"{key} = jsonpickle.decode({key})", 2) - - def _gen_code(self) -> str: - """ - Construct string with all generated test code. - """ - code = "\n".join(self._code) + "\n" - _LOG.debug("code=\n%s", code) - if self._to_file: - hio.to_file(self._test_file, code) - return code - - def run(self, func_output: Any) -> str: - """ - Generate a unit test for the function. - - The unit test compares the actual function output with the expected - `func_output`. - - :param func_output: the expected function output - :return: the code of the unit test - """ - if self._to_file and self._file_exists: - # Imports were added before, so skip. - pass - else: - # Start with imports. - self._add_imports() - # Count if we reached max number of tests generated for a single function. - try: - self._add_test_class() - except IndexError as exception: - # If there are already enough tests, not add anything. - _LOG.warning(str(exception)) - return "" - self._add_var_definitions() - self._add_function_call() - self._check_code(func_output) - return self._gen_code() - - -# ############################################################################# - - -def json_pretty_print(parsed: Any) -> str: - """ - Pretty print a JSON object. - - :param parsed: a JSON object - :return: a prettified JSON object - """ - if isinstance(parsed, str): - parsed = json.loads(parsed) - # `ret = pprint.pformat(parsed) - ret = json.dumps(parsed, indent=4, sort_keys=True) - return ret - - -def round_trip_convert(obj1: Any, log_level: int) -> Any: - """ - Encode and decode with `jsonpickle` ensuring the object remains the same. - - :param obj1: the initial object - :param log_level: the level of logging - :return: the object after encoding and decoding - """ - _LOG.log(log_level, "# obj1=\n%s", obj1) - _LOG.log(log_level, "class=%s", type(obj1)) - # Encode. - frozen = jsonpickle.encode(obj1) - _LOG.log(log_level, "# frozen=\n%s", json_pretty_print(frozen)) - # Decode. - obj2 = jsonpickle.decode(frozen) - _LOG.log(log_level, "# obj2=\n%s", obj2) - _LOG.log(log_level, "class=%s", type(obj1)) - # Check whether the decoded version is the same as the initial object. - if str(type(obj1)).startswith(" Callable: - def wrapper(*args: Any, **kwargs: Any) -> Any: - import helpers.hplayback as hplayba - - playback = hplayba.Playback("assert_equal") - res = func(*args, **kwargs) - code = playback.run(res) - print(code) - return res - - return wrapper(func) - - -# Inline the decorator as: -# -# 1) Rename `target_func` -> `target_func_tmp` -# ``` -# def target_function_tmp(...): -# ... -# ``` -# -# 2) Add wrapper: -# ``` -# def target_function_tmp(...): -# ... -# -# from typing import Any -# -# def target_function(*args: Any, **kwargs: Any) -> Any: -# import helpers.hplayback as hplayb -# playback = hplayb.Playback("assert_equal") -# res = target_func_tmp(*args, **kwargs) -# code = playback.run(res) -# print(code) -# return res -# ``` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py deleted file mode 100644 index 29a504226..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py +++ /dev/null @@ -1,1076 +0,0 @@ -""" -Import as: - -import helpers.hprint as hprint -""" - -import functools -import inspect -import logging -import pprint -import re -import sys -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union - -import helpers.hdbg as hdbg - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - -# Mute this module unless we want to debug it. -_LOG.setLevel(logging.INFO) - - -# ############################################################################# -# Debug output -# ############################################################################# - -_COLOR_MAP = { - "bold": 1, - # Colors. - "blue": 94, - "green": 92, - "white": 0, - "purple": 95, - "red": 91, - "yellow": 33, - # Blue. - "DEBUG": 34, - # Cyan. - "INFO": 36, - # Yellow. - "WARNING": 33, - # Red. - "ERROR": 31, - # White on red background. - "CRITICAL": 41, -} - - -def color_highlight(text: str, color: str) -> str: - """ - Return a colored string. - """ - prefix = "\033[" - suffix = "\033[0m" - hdbg.dassert_in(color, _COLOR_MAP) - color_code = _COLOR_MAP[color] - txt = f"{prefix}{color_code}m{text}{suffix}" - return txt - - -def clear_screen() -> None: - print((chr(27) + "[2J")) - - -def line(char: Optional[str] = None, num_chars: Optional[int] = None) -> str: - """ - Return a line with the desired character. - """ - char = "#" if char is None else char - num_chars = 80 if num_chars is None else num_chars - return char * num_chars - - -def pprint_pformat(obj: Any, *, sort_dicts: bool = False) -> str: - """ - Pretty-print in color. - """ - from pygments import highlight - from pygments.formatters import Terminal256Formatter - from pygments.lexers import PythonLexer - - txt = pprint.pformat(obj, sort_dicts=sort_dicts) - txt = highlight(txt, PythonLexer(), Terminal256Formatter()) - txt = txt.rstrip() - return txt - - -def pprint_color(obj: Any, *, tag: Optional[str] = None, sep: str = "") -> None: - """ - Pretty-print in color. - """ - txt = "" - if tag is not None: - txt += tag + "= " + sep - txt += pprint_pformat(obj) - print(txt) - - -# TODO(gp): -> Use *args instead of forcing to build a string to simplify the caller. -def frame( - message: str, - *, - char1: Optional[str] = None, - num_chars: Optional[int] = None, - char2: Optional[str] = None, - thickness: int = 1, - level: int = 0, -) -> str: - """ - Print a frame around a message. - - :param message: message to print - :param char1: char for top line of the frame - :param num_chars: how many chars in each line (by default 80 chars) - :param char2: char for bottom line of the frame - :param thickness: how many overlapping lines - - E.g., thickness = 2 - ``` - # #######... - # #######... - # hello - # #######... - # #######... - ``` - :param level: level of framing indent based on `#` char: - - E.g., level = 0 - ``` - #######... - hello - #######... - ``` - - E.g., level = 1 - ``` - # #######... - # hello - # #######... - ``` - """ - hdbg.dassert_isinstance(message, str) - # Fill in the default values. - if char1 is None: - # User didn't specify any char. - char1 = char2 = "#" - elif char1 is not None and char2 is None: - # User specified only one char. - char2 = char1 - elif char1 is None and char2 is not None: - # User specified the second char, but not the first one. - hdbg.dfatal(f"Invalid char1='{char1}' char2='{char2}'") - else: - # User specified both chars. Nothing to do. - pass - num_chars = 80 if num_chars is None else num_chars - # Sanity check. - hdbg.dassert_eq(len(char1), 1) - hdbg.dassert_lte(1, num_chars) - hdbg.dassert_eq(len(char2), 1) - hdbg.dassert_lte(1, thickness) - hdbg.dassert_lte(0, level) - # Build the return value. - prefix = "" - if level: - prefix = "#" * level + " " - ret = ( - (prefix + (line(char1, num_chars) + "\n") * thickness) - + (prefix + message + "\n") - + (prefix + (line(char2, num_chars) + "\n") * thickness) - ).rstrip("\n") - return ret - - -# ############################################################################# - - -StrOrList = Union[str, List[str]] - - -# TODO(gp): Use this everywhere in the codebase to avoid back-and-forth -# transforms between strings and lists of strings. -def split_lines(func: Callable) -> Callable: - """ - A decorator that splits a string input into lines before passing it to the - decorated function which expects a list of lines. - """ - - @functools.wraps(func) - def wrapper(txt: StrOrList, *args: Any, **kwargs: Any) -> StrOrList: - if isinstance(txt, str): - # Split the txt into lines. - lines = txt.splitlines() - is_str = True - else: - # The txt is already a list of lines: pass it as is. - hdbg.dassert_isinstance(txt, list) - lines = txt - is_str = False - # Call the function. - lines = func(lines, *args, **kwargs) - if is_str: - # Join the lines back together. - out = "\n".join(lines) - else: - # The output is already a list of lines. - hdbg.dassert_isinstance(lines, list) - out = lines - return out - - return wrapper - - -@split_lines -def prepend(lines: List[str], prefix: str) -> List[str]: - """ - Add `prefix` before each line of the string `txt`. - """ - hdbg.dassert_isinstance(lines, list) - lines_out = [prefix + curr_line for curr_line in lines] - hdbg.dassert_isinstance(lines_out, list) - return lines_out - - -@split_lines -def indent(lines: List[str], *, num_spaces: int = 2) -> List[str]: - """ - Add `num_spaces` spaces before each line of the passed string. - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_isinstance(num_spaces, int) - hdbg.dassert_lte(0, num_spaces) - spaces = " " * num_spaces - txt_out = [] - for curr_line in lines: - if curr_line.lstrip().rstrip() == "": - # Do not prepend any space to a line with only white characters. - txt_out.append("") - continue - txt_out.append(spaces + curr_line) - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -@split_lines -def strict_split(lines: List[str], max_length: int) -> List[str]: - """ - Split a string into chunks of `max_length` characters. - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_lte(1, max_length) - lines_out = [] - for line in lines: - for i in range(0, len(line), max_length): - lines_out.append(line[i : i + max_length]) - hdbg.dassert_isinstance(lines_out, list) - return lines_out - - -@split_lines -def remove_lead_trail_empty_lines(lines: List[str]) -> List[str]: - """ - Remove consecutive empty lines only at the beginning / end of a string. - """ - hdbg.dassert_isinstance(lines, list) - # Remove leading empty lines. - while lines and not lines[0].strip(): - lines.pop(0) - # Remove trailing empty lines. - while lines and not lines[-1].strip(): - lines.pop() - hdbg.dassert_isinstance(lines, list) - return lines - - -@split_lines -def dedent( - lines: List[str], *, remove_lead_trail_empty_lines_: bool = True -) -> List[str]: - """ - Remove from each line the minimum number of spaces to align the text on the - left. - - It is the opposite of `indent()`. - - :param txt: multi-line string - :param txt: multi-line string - :param remove_lead_trail_empty_lines_: if True, remove all the empty - lines at the beginning and at the end - """ - if remove_lead_trail_empty_lines_: - lines = remove_lead_trail_empty_lines(lines) - # Find the minimum number of leading spaces. - min_num_spaces = None - for curr_line in lines: - _LOG.debug( - "min_num_spaces=%s: curr_line='%s'", min_num_spaces, curr_line - ) - # Skip empty lines. - if curr_line.lstrip().rstrip() == "": - _LOG.debug(" -> Skipping empty line") - continue - m = re.search(r"^(\s*)", curr_line) - hdbg.dassert(m) - # The linter doesn't understand that `dassert` is equivalent to an - # `assert`. - assert m is not None - curr_num_spaces = len(m.group(1)) - _LOG.debug(" -> curr_num_spaces=%s", curr_num_spaces) - if min_num_spaces is None or curr_num_spaces < min_num_spaces: - min_num_spaces = curr_num_spaces - _LOG.debug("min_num_spaces=%s", min_num_spaces) - # Process each line and remove the minimum indentation. - txt_out = [] - for curr_line in lines: - _LOG.debug("curr_line='%s'", curr_line) - # Skip empty lines. - if curr_line.lstrip().rstrip() == "": - txt_out.append("") - continue - hdbg.dassert_lte(min_num_spaces, len(curr_line)) - txt_out.append(curr_line[min_num_spaces:]) - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -@split_lines -def align_on_left(lines: List[str]) -> List[str]: - """ - Remove all leading/trailing spaces for each line. - """ - hdbg.dassert_isinstance(lines, list) - txt_out = [] - for curr_line in lines: - curr_line = curr_line.rstrip(" ").lstrip(" ") - txt_out.append(curr_line) - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -@split_lines -def remove_empty_lines( - lines: List[str], *, mode: str = "no_empty_lines" -) -> List[str]: - """ - Remove empty lines from a multi-line string. - - :param lines: list of input lines to process - :param mode: - - no_empty_lines: remove all empty lines - - no_consecutive_empty_lines: remove consecutive empty lines - :return: lines with empty lines removed - """ - hdbg.dassert_isinstance(lines, list) - if mode == "no_empty_lines": - lines_out = [line for line in lines if line.rstrip().lstrip() != ""] - elif mode == "no_consecutive_empty_lines": - # If there are two or more consecutive empty lines, remove all but the last one. - lines_out = [] - prev_empty = False - for line in lines: - if re.search(r"^\s*$", line): - if prev_empty: - continue - prev_empty = True - else: - prev_empty = False - lines_out.append(line) - else: - raise ValueError(f"Invalid mode='{mode}'") - hdbg.dassert_isinstance(lines_out, list) - return lines_out - - -def vars_to_debug_string(vars_as_str: List[str], locals_: Dict[str, Any]) -> str: - """ - Create a string with var name -> var value. - - E.g., ["var1", "var2"] is converted into: ``` var1=... var2=... ``` - """ - txt = [] - for var in vars_as_str: - txt.append(var + "=") - txt.append(indent(str(locals_[var]))) - return "\n".join(txt) - - -# ############################################################################# -# Pretty print data structures. -# ############################################################################# - - -def to_object_str(obj: Any) -> str: - class_name = str(obj.__class__.__name__) - hex_str = str(hex(id(obj))) - return f"{class_name} at {hex_str}" - - -def to_object_repr(obj: Any) -> str: - class_module = str(obj.__class__.__module__) - class_name = str(obj.__class__.__name__) - hex_str = str(hex(id(obj))) - return f"<{class_module}.{class_name} at {hex_str}>" - - -def thousand_separator(v: float) -> str: - v = "{0:,}".format(v) - return v - - -# TODO(gp): -> to_percentage -def perc( - a: float, - b: float, - *, - invert: bool = False, - num_digits: int = 2, - only_perc: bool = False, - use_float: bool = False, - only_fraction: bool = False, - use_thousands_separator: bool = False, -) -> Union[str, float]: - """ - Calculate percentage a / b as a string. - - Asserts 0 <= a <= b. If true, returns a/b to `num_digits` decimal places. - - :param a: numerator - :param b: denominator - :param invert: assume the fraction is (b - a) / b - This is useful when we want to compute the complement of a count. - :param num_digits: number of digits to represent the percentage - :param only_perc: return only the percentage, without the fraction - - E.g., "50.00%" vs "10 / 20 = 50.00%" - :param use_float: return the percentage as a float. It requires - `only_perc = True` - :param only_fraction: return only the fraction, without the percentage - - E.g., "10 / 20" vs "10 / 20 = 50.00%" - :param use_thousands_separator: report the numbers using thousands separator - :return: string with a/b - """ - hdbg.dassert_lte(0, a) - hdbg.dassert_lte(a, b) - if invert: - a = b - a - if use_thousands_separator: - a_str = str("{0:,}".format(a)) - b_str = str("{0:,}".format(b)) - else: - a_str = str(a) - b_str = str(b) - # Validate and format the percentage. - hdbg.dassert_lte(0, num_digits) - if only_perc: - fmt = "%." + str(num_digits) + "f" - ret = fmt % (float(a) / b * 100.0) - if use_float: - # 57.27 - ret = float(ret) - else: - # 57.27% - hdbg.dassert_isinstance(ret, str) - ret += "%" - elif only_fraction: - # 4225 / 7377 - ret = f"{a_str} / {b_str}" - else: - # 4225 / 7377 = 57.27% - fmt = "%s / %s = %." + str(num_digits) + "f%%" - ret = fmt % (a_str, b_str, float(a) / b * 100.0) - return ret - - -def round_digits( - v: float, *, num_digits: int = 2, use_thousands_separator: bool = False -) -> str: - """ - Round digit returning a string representing the formatted number. - - :param v: value to convert - :param num_digits: number of digits to represent v on None is - (Default value = 2) - :param use_thousands_separator: use "," to separate thousands - (Default value = False) - :return: str with formatted value - """ - if (num_digits is not None) and isinstance(v, float): - fmt = "%0." + str(num_digits) + "f" - res = float(fmt % v) - else: - res = v - if use_thousands_separator: - res = "{0:,}".format(res) # type: ignore - res_as_str = str(res) - return res_as_str - - -# ############################################################################# -# Logging helpers -# ############################################################################# - - -# TODO(gp): Move this to hdbg.hlogging, but there are dependencies from this file. - -# https://stackoverflow.com/questions/2749796 has some solutions to find the -# name of variables from the caller. - - -_VarNamesType = Optional[Union[str, List[str]]] - - -def _to_var_list(expression: _VarNamesType) -> List[str]: - if isinstance(expression, List): - return expression - hdbg.dassert_isinstance(expression, str) - # If expression is a list of space-separated expressions, convert each in a - # string. - exprs = [v.lstrip().rstrip() for v in expression.split(" ")] - # Remove empty var names. - exprs = [v for v in exprs if v.strip().rstrip() != ""] - hdbg.dassert_isinstance(exprs, list) - hdbg.dassert_lte(1, len(exprs)) - return exprs - - -def to_str( - expression: str, - *, - frame_level: int = 1, - print_lhs: bool = True, - char_separator: str = ",", - mode: str = "repr", -) -> str: - """ - Return a string with the value of a variable / expression / multiple - variables. - - If expression is a space-separated compound expression, convert it into - `exp1=val1, exp2=val2, ...`. - - This is similar to Python 3.8 f-string syntax `f"{foo=} {bar=}"`. - We don't want to force to use Python 3.8 just for this feature. - ``` - > x = 1 - > to_str("x+1") - x+1=2 - ``` - - :param expression: the variable / expression to evaluate and print. - E.g., `to_str("exp1")` is converted into `exp1=val1`. - If expression is a space-separated compound expression, e.g., - `to_str("exp1 exp2 ...")`, it is converted into `exp1=val1, exp2=val2, ...` - :param frame_level: level of the frame to inspect - :param print_lhs: whether we want to print the left hand side (i.e., `exp1`) - :param char_separator: separator between the values of the expressions - when printed (e.g., `,`) - :param mode: select how to print the value of the expressions (e.g., `str`, - `repr`, `pprint`, `pprint_color`) - """ - # TODO(gp): If we pass an object it would be nice to find the name of it. - # E.g., https://github.com/pwwang/python-varname - hdbg.dassert_isinstance(expression, str) - if " " in expression: - exprs = _to_var_list(expression) - # Convert each expression into a value. - _to_str = lambda x: to_str(x, frame_level=frame_level + 2) - values = list(map(_to_str, exprs)) - # Assemble in a return value. - hdbg.dassert_lte(len(char_separator), 1) - sep = char_separator + " " - txt = sep.join(values) - return txt - # Certain expressions are evaluated as literals. - if expression in ("", "->", ":", "=", "\n"): - return expression - # Evaluate the expression. - frame_ = sys._getframe(frame_level) # pylint: disable=protected-access - ret = "" - if print_lhs: - ret += expression + "=" - try: - eval_ = eval(expression, frame_.f_globals, frame_.f_locals) - except Exception as e: - print("expression=''", expression) - raise e - if mode == "str": - ret += str(eval_) - elif mode == "repr": - ret += repr(eval_) - elif mode == "pprint": - ret += "\n" + indent(pprint.pformat(eval_)) - elif mode == "pprint_color": - ret += "\n" + indent(pprint_pformat(eval_)) - else: - raise ValueError(f"Invalid mode='{mode}'") - return ret - - -# TODO(gp): Extend this to work on class methods, static and not. -def _func_signature_to_str( - skip_vars: _VarNamesType, - assert_on_skip_vars_error: bool, - frame_level: int, -) -> Tuple[str, str]: - """ - Return the variables of the caller function as a string. - - Same params as `func_signature_to_str()`. - :return: function name and string with the variables of the caller function - as `var1 var2 ...` - """ - if skip_vars is not None: - skip_vars = _to_var_list(skip_vars) - # Get the caller's frame (i.e., the function that called this function). - caller_frame = inspect.currentframe() - for _ in range(frame_level): - hdbg.dassert_is_not( - caller_frame, None, "caller_frame should not be None" - ) - caller_frame = caller_frame.f_back - hdbg.dassert_is_not( - caller_frame, - None, - "caller_frame should not be None after traversing frames", - ) - caller_function_name = caller_frame.f_code.co_name - # _LOG.debug("caller_function_name=%s", caller_function_name) - # Retrieve the function object from the caller's frame. - caller_function = caller_frame.f_globals.get(caller_function_name, None) - if caller_function: - # Get the function's signature - sig = inspect.signature(caller_function) - var_names = list(sig.parameters.keys()) - if skip_vars: - if assert_on_skip_vars_error: - hdbg.dassert_is_subset(skip_vars, var_names) - var_names = [ - var_name for var_name in var_names if var_name not in skip_vars - ] - vars_str = " ".join(var_names) - else: - raise ValueError("Unable to determine caller function") - return caller_function_name, vars_str - - -def func_signature_to_str( - # We don't use * since we want to keep it simple to call this function. - skip_vars: _VarNamesType = None, - *, - assert_on_skip_vars_error: bool = True, - frame_level: int = 2, -) -> str: - r""" - Return the variables of the caller function as a string. - - Use like: - ``` - _LOG.debug("\n%s", hprint.func_signature_to_str()) - ``` - - :param skip_vars: list of variables to skip - :param assert_on_skip_vars_error: whether to assert if the variables to skip - are not found in the function signature - :param frame_level: level of the frame to inspect. By default we need to - access the frame of the caller of the caller, so frame_level = 2 - """ - # Get the variables. - func_name, func_signature = _func_signature_to_str( - skip_vars, - assert_on_skip_vars_error, - frame_level, - ) - # Get the value of the variables. - val = to_str(func_signature, frame_level=frame_level) - val = f"# {func_name}: {val}" - return val - - -# ############################################################################# - - -def log(logger: logging.Logger, verbosity: int, *vals: Any) -> None: - """ - Log at a certain verbosity. - - `log(_LOG, logging.DEBUG, "ticker", "exchange")` - - is equivalent to statements like: - - ``` - _LOG.debug("%s, %s", to_str("ticker"), to_str("exchange")) - _LOG.debug("ticker=%s, exchange=%s", ticker, exchange) - ``` - """ - logger_verbosity = hdbg.get_logger_verbosity() - # print("verbosity=%s logger_verbosity=%s" % (verbosity, logger_verbosity)) - # We want to avoid the overhead of converting strings, so we evaluate the - # expressions only if we are going to print. - if verbosity >= logger_verbosity: - # We need to increment frame_lev since we are 2 levels deeper in the stack. - _to_str = lambda x: to_str(x, frame_level=3) - num_vals = len(vals) - if num_vals == 1: - fstring = "%s" - vals = _to_str(vals[0]) # type: ignore - else: - fstring = ", ".join(["%s"] * num_vals) - vals = list(map(_to_str, vals)) # type: ignore - logger.log(verbosity, fstring, vals) - - -# TODO(gp): Replace calls to `_LOG.debug("\n%s", hprint.frame(...)` with this. -# TODO(gp): Consider changing the signature from -# _log_frame(_LOG, "hello", verbosity=logger.INFO)) -# to -# _log_frame(_LOG.info, "hello", ...) -# by using the first element as a Callable -def log_frame( - logger: logging.Logger, - fstring: str, - *args: Any, - level: int = 1, - char: str = "#", - verbosity: int = logging.DEBUG, -) -> None: - """ - Log using a frame around the text with different number of leading `#` (or - `char`) to organize the log visually. - - The logging output looks like: - _log_frame(_LOG, "hello", verbosity=logger.INFO)) - ``` - 07:44:51 printing : log_frame : 390 : - # ######################################################################### - # hello - # ######################################################################### - ``` - - :param txt: text to print in a frame - :param level: number of `#` (or `char`) to prepend the logged text - :param char: char to prepend the logged text with - :param verbosity: logging verbosity - """ - hdbg.dassert_isinstance(logger, logging.Logger) - hdbg.dassert_isinstance(fstring, str) - msg = fstring % args - msg = msg.rstrip().lstrip() - msg = frame(msg) - # Prepend a `# `, if needed. - if level > 0: - prefix = level * char + " " - msg = prepend(msg, prefix=prefix) - # Add an empty space. - msg = "\n" + msg - logger.log(verbosity, "%s", msg) - - -# ############################################################################# - - -def type_to_string(type_as_str: str) -> str: - """ - Return a short string representing the type of an object, e.g., - "dataflow.Node" (instead of "class <'dataflow.Node'>") - """ - if isinstance(type_as_str, type): - type_as_str = str(type_as_str) - hdbg.dassert_isinstance(type_as_str, str) - # Remove the extra string from: - # - prefix = " str: - ret = f"({type(obj)}) {obj}" - return ret - - -# ############################################################################# - - -def format_list( - list_: List[Any], - *, - sep: str = " ", - max_n: Optional[int] = None, - tag: Optional[str] = None, -) -> str: - # sep = ", " - if max_n is None: - max_n = 10 - hdbg.dassert_lte(1, max_n) - n = len(list_) - txt = "" - if tag is not None: - txt += f"{tag}: " - txt += f"({n}) " - if n < max_n: - txt += sep.join(map(str, list_)) - else: - num_elems = int(max_n / 2) - hdbg.dassert_lte(1, num_elems) - txt += sep.join(map(str, list_[:num_elems])) - txt += " ... " - # pylint: disable=invalid-unary-operand-type - txt += sep.join(map(str, list_[-num_elems:])) - return txt - - -# TODO(gp): Use format_list(). -def list_to_str( - list_: List, - *, - tag: str = "", - sort: bool = False, - axis: int = 0, - to_string: bool = False, -) -> str: - """ - Print list / index horizontally or vertically. - """ - # TODO(gp): Fix this. - _ = to_string - txt = "" - if axis == 0: - if list_ is None: - txt += f"{tag}: (0) None\n" - else: - # hdbg.dassert_in(type(l), (list, pd.Index, pd.Int64Index)) - vals = list(map(str, list_)) - if sort: - vals = sorted(vals) - txt += f"{tag}: ({len(list_)}) {' '.join(vals)}\n" - elif axis == 1: - txt += f"{tag} ({len(list_)}):\n" - vals = list(map(str, list_)) - if sort: - vals = sorted(vals) - txt += "\n".join(vals) + "\n" - else: - raise ValueError(f"Invalid axis='{axis}'") - return txt - - -def list_to_str2( - vals: List[Any], - *, - sep_char: str = ", ", - enclose_str_char: str = "'", - max_num: Optional[int] = 10, -) -> str: - """ - Convert a list of values into a formatted string representation. - - E.g., [1, "two", 3, 4, 5] -> "5 ['1', 'two', '3', '4', '5']" - - :param vals: values to be converted - :param sep_char: separator to use between elements - :param enclose_str_char: character to enclose each element's string - representation; if empty, elements are not enclosed - :param max_num: maximum number of elements to display in the output - :return: the formatted string representing the list - """ - vals_as_str = list(map(str, vals)) - # Add a str around. - if enclose_str_char: - vals_as_str = [ - enclose_str_char + v + enclose_str_char for v in vals_as_str - ] - # Build the output string with optional truncation. - ret = f"{len(vals)} [" - if max_num is not None and len(vals) > max_num: - hdbg.dassert_lt(1, max_num) - ret += sep_char.join(vals_as_str[: int(max_num / 2)]) - ret += sep_char + "..." + sep_char - ret += sep_char.join(vals_as_str[-int(max_num / 2) :]) - else: - ret += sep_char.join(vals_as_str) - ret += "]" - return ret - - -def set_diff_to_str( - obj1: Iterable, - obj2: Iterable, - *, - obj1_name: str = "obj1", - obj2_name: str = "obj2", - sep_char: str = " ", - add_space: bool = False, -) -> str: - """ - Compute the difference between two sequences of data and return a formatted - string. - - :param obj1: The first iterable object. - :param obj2: The second iterable object. - :param obj1_name: The name to use for the first object in the output string. - :param obj2_name: The name to use for the second object in the output string. - :param sep_char: The character to use for separating elements in the output - string. - :param add_space: Whether to add empty lines to make the output more readable. - :return: A formatted string showing the differences between the two objects. - - Example: - ``` - >>> obj1 = [1, 2, 3, 4] - >>> obj2 = [3, 4, 5, 6] - >>> set_diff_to_str(obj1, obj2, obj1_name="list1", obj2_name="list2") - * list1: (4) 1 2 3 4 - * list2: (4) 3 4 5 6 - * intersect=(2) 3 4 - * list1-list2=(2) 1 2 - * list2-list1=(2) 5 6 - ``` - """ - - def _to_string(obj: Iterable) -> str: - obj = sorted(list(obj)) - if sep_char == "\n": - txt = indent("\n" + sep_char.join(map(str, obj))) - else: - txt = sep_char.join(map(str, obj)) - return txt - - res: List[str] = [] - # obj1. - obj1 = set(obj1) - hdbg.dassert_lte(1, len(obj1)) - res.append(f"* {obj1_name}: ({len(obj1)}) {_to_string(obj1)}") - if add_space: - res.append("") - # obj2. - obj2 = set(obj2) - hdbg.dassert_lte(1, len(obj2)) - res.append(f"* {obj2_name}: ({len(obj2)}) {_to_string(obj2)}") - if add_space: - res.append("") - # obj1 intersect obj2. - intersection = obj1.intersection(obj2) - res.append(f"* intersect=({len(intersection)}) {_to_string(intersection)}") - if add_space: - res.append("") - # obj1 - obj2. - diff = obj1 - obj2 - res.append(f"* {obj1_name}-{obj2_name}=({len(diff)}) {_to_string(diff)}") - if add_space: - res.append("") - # obj2 - obj1. - diff = obj2 - obj1 - res.append(f"* {obj2_name}-{obj1_name}=({len(diff)}) {_to_string(diff)}") - if add_space: - res.append("") - # Join all result lines. - result = "\n".join(res) - return result - - -# ############################################################################# - - -def remove_non_printable_chars(txt: str) -> str: - # From https://stackoverflow.com/questions/14693701 - # 7-bit and 8-bit C1 ANSI sequences - ansi_escape = re.compile( - r""" - \x1B # ESC - (?: # 7-bit C1 Fe (except CSI) - [@-Z\\-_] - | # or [ for CSI, followed by a control sequence - \[ - [0-?]* # Parameter bytes - [ -/]* # Intermediate bytes - [@-~] # Final byte - ) - """, - re.VERBOSE, - ) - txt = ansi_escape.sub("", txt) - return txt - - -# TODO(gp): Maybe move to helpers/hpython.py since it's not about printing. -def sort_dictionary(dict_: Dict) -> Dict: - """ - Sort a dictionary recursively using nested OrderedDict. - """ - import collections - - res = collections.OrderedDict() - for k, v in sorted(dict_.items()): - if isinstance(v, dict): - res[k] = sort_dictionary(v) - else: - res[k] = v - return res - - -def to_pretty_str(obj: Any) -> str: - if isinstance(obj, dict): - res = pprint.pformat(obj) - # import json - # res = json.dumps(obj, indent=4, sort_keys=True) - else: - res = str(obj) - return res - - -# TODO(gp): GSI -> rename remove_lines()? -def filter_text(regex: str, txt: str) -> str: - """ - Remove lines in `txt` that match the regex `regex`. - """ - _LOG.debug("Filtering with '%s'", regex) - if regex is None: - return txt - txt_out = [] - txt_as_arr = txt.split("\n") - for line_ in txt_as_arr: - if re.search(regex, line_): - _LOG.debug("Skipping line='%s'", line_) - continue - txt_out.append(line_) - # We can only remove lines. - hdbg.dassert_lte( - len(txt_out), - len(txt_as_arr), - "txt_out=\n'''%s'''\ntxt=\n'''%s'''", - "\n".join(txt_out), - "\n".join(txt_as_arr), - ) - txt = "\n".join(txt_out) - return txt - - -def dassert_one_trailing_newline(txt: str) -> None: - match = re.search(r"\n*$", txt) - hdbg.dassert(match) - assert match is not None - num_newlines = len(match.group()) - hdbg.dassert_eq( - num_newlines, 0, "num_newlines='%s' txt='%s'", num_newlines, txt - ) - - -def to_info(tag: str, txt: Union[str, List[str]]) -> str: - """ - Return a string with a tag and the text indented. - - :param tag: the tag to add to the text - :param txt: the text to indent - :return: the string with the tag and the text indented - """ - hdbg.dassert_isinstance(tag, str) - hdbg.dassert_isinstance(txt, (str, list)) - txt_tmp = "" - txt_tmp += "# " + tag + "\n" - # Indent the text. - if not isinstance(txt, str): - for t in txt: - hdbg.dassert_isinstance(t, str) - txt = "\n".join(txt) - txt_tmp += indent(txt) - # Ensure that there is a single trailing newline. - txt_tmp = txt_tmp.rstrip("\n") - # txt_tmp += "\n" - # _dassert_one_trailing_newline(txt_tmp) - _LOG.debug("'%s'", txt_tmp) - return txt_tmp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py deleted file mode 100644 index c9cdd7be4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py +++ /dev/null @@ -1,266 +0,0 @@ -""" -Import as: - -import helpers.hpytest as hpytest -""" - -import logging -import os -import shutil -import sys -from typing import List, Optional - -import junitparser - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def _pytest_show_artifacts( - dir_name: str, *, tag: Optional[str] = None -) -> List[str]: - hdbg.dassert_ne(dir_name, "") - hdbg.dassert_dir_exists(dir_name) - cd_cmd = f"cd {dir_name} && " - # There might be no pytest artifacts. - abort_on_error = False - file_names: List[str] = [] - # Find pytest artifacts. - cmd = 'find . -name ".pytest_cache" -type d' - _, output_tmp = hsystem.system_to_string( - cd_cmd + cmd, abort_on_error=abort_on_error - ) - file_names.extend(output_tmp.split()) - # - cmd = 'find . -name "__pycache__" -type d' - _, output_tmp = hsystem.system_to_string( - cd_cmd + cmd, abort_on_error=abort_on_error - ) - file_names.extend(output_tmp.split()) - # Find .pyc artifacts. - cmd = 'find . -name "*.pyc" -type f' - _, output_tmp = hsystem.system_to_string( - cd_cmd + cmd, abort_on_error=abort_on_error - ) - file_names.extend(output_tmp.split()) - # Remove empty lines. - file_names = hprint.remove_empty_lines(file_names) - # - if tag is not None: - num_files = len(file_names) - _LOG.info("%s: %d", tag, num_files) - _LOG.debug("\n%s", hprint.indent("\n".join(file_names))) - return file_names # type: ignore - - -def pytest_clean(dir_name: str, preview: bool = False) -> None: - """ - Clean pytest artifacts. - """ - _LOG.warning("Cleaning pytest artifacts") - hdbg.dassert_ne(dir_name, "") - hdbg.dassert_dir_exists(dir_name) - if preview: - _LOG.warning("Preview only: nothing will be deleted") - # Show before cleaning. - file_names = _pytest_show_artifacts(dir_name, tag="Before cleaning") - # Clean. - for f in file_names: - exists = os.path.exists(f) - _LOG.debug("%s -> exists=%s", f, exists) - if exists: - if not preview: - if os.path.isdir(f): - shutil.rmtree(f) - elif os.path.isfile(f): - os.remove(f) - else: - raise ValueError(f"Can't delete {f}") - else: - _LOG.debug("rm %s", f) - # Show after cleaning. - file_names = _pytest_show_artifacts(dir_name, tag="After cleaning") - hdbg.dassert_eq(len(file_names), 0) - - -# ############################################################################# -# JUnitReporter -# ############################################################################# - - -class JUnitReporter: - def __init__(self, xml_file: str): - self.xml_file = xml_file - self.xml_data = None - self.overall_stats = { - "passed": 0, - "failed": 0, - "error": 0, - "skipped": 0, - "total_time": 0.0, - "total_tests": 0, - } - - def _load(self) -> None: - """ - Load the JUnit XML file. - """ - self.xml_data = junitparser.JUnitXml.fromfile(self.xml_file) - - def parse(self): - """ - Parse the JUnit XML file. - """ - try: - self._load() - # Calculate overall statistics. - for suite in self.xml_data: - if isinstance(suite, junitparser.TestSuite): - self.overall_stats["total_time"] += suite.time or 0 - self.overall_stats["total_tests"] += suite.tests or 0 - self.overall_stats["passed"] += ( - (suite.tests or 0) - - (suite.failures or 0) - - (suite.errors or 0) - - (suite.skipped or 0) - ) - self.overall_stats["failed"] += suite.failures or 0 - self.overall_stats["error"] += suite.errors or 0 - self.overall_stats["skipped"] += suite.skipped or 0 - except Exception as e: - print(hprint.color_highlight(f"Error parsing XML file: {e}", "red")) - sys.exit(1) - - def _get_colored_status(self, case: junitparser.TestCase) -> str: - """ - Get the colored status representation of test case. - """ - if not case.result or len(case.result) == 0: - return hprint.color_highlight("PASSED", "green") - result_type = case.result[0].__class__.__name__ - if result_type == "Failure": - return hprint.color_highlight("FAILED", "red") - elif result_type == "Error": - return hprint.color_highlight("ERROR", "red") - elif result_type == "Skipped": - return hprint.color_highlight("SKIPPED", "yellow") - else: - return hprint.color_highlight("PASSED", "green") - - def _print_detailed_results(self): - print(hprint.color_highlight("=" * 70, "bold")) - print( - hprint.color_highlight( - f"collected {self.overall_stats['total_tests']} items", "bold" - ) - ) - for _, suite in enumerate(self.xml_data): - if not isinstance(suite, junitparser.TestSuite): - continue - # Print suite header. - print(f"\n{hprint.color_highlight('=' * 70, 'blue')}") - print(hprint.color_highlight(f"Test: {suite.name}", "bold")) - print( - hprint.color_highlight( - f"Timestamp: {getattr(suite, 'timestamp', 'Unknown')}", - "bold", - ) - ) - print(hprint.color_highlight("-" * 70, "blue")) - # Print each test case. - for case in suite: - if isinstance(case, junitparser.TestCase): - status_display = self._get_colored_status(case) - test_time = getattr(case, "time", 0) or 0 - print( - f" {case.classname}::{case.name} {status_display} ({test_time:.3f}s)" - ) - # Print suite summary. - suite_passed = ( - (suite.tests or 0) - - (suite.failures or 0) - - (suite.errors or 0) - - (suite.skipped or 0) - ) - summary_parts = [] - if suite_passed > 0: - summary_parts.append( - hprint.color_highlight(f"{suite_passed} passed", "green") - ) - if suite.failures and suite.failures > 0: - summary_parts.append( - hprint.color_highlight(f"{suite.failures} failed", "red") - ) - if suite.errors and suite.errors > 0: - summary_parts.append( - hprint.color_highlight(f"{suite.errors} error", "red") - ) - if suite.skipped and suite.skipped > 0: - summary_parts.append( - hprint.color_highlight(f"{suite.skipped} skipped", "WARNING") - ) - suite_summary = ( - ", ".join(summary_parts) if summary_parts else "no tests" - ) - suite_time = getattr(suite, "time", 0) or 0 - print( - hprint.color_highlight( - f"Summary: {suite_summary} in {suite_time:.3f}s", "INFO" - ) - ) - - def _print_final_summary(self): - summary_parts = [] - if self.overall_stats["passed"] > 0: - summary_parts.append( - hprint.color_highlight( - f"{self.overall_stats['passed']} passed", "green" - ) - ) - if self.overall_stats["failed"] > 0: - summary_parts.append( - hprint.color_highlight( - f"{self.overall_stats['failed']} failed", "red" - ) - ) - if self.overall_stats["error"] > 0: - summary_parts.append( - hprint.color_highlight( - f"{self.overall_stats['error']} error", "red" - ) - ) - if self.overall_stats["skipped"] > 0: - summary_parts.append( - hprint.color_highlight( - f"{self.overall_stats['skipped']} skipped", "yellow" - ) - ) - summary_text = ", ".join(summary_parts) if summary_parts else "no tests" - time_text = "in " + hprint.color_highlight( - f"{self.overall_stats['total_time']:.2f}s", "bold" - ) - # Determine overall status - if self.overall_stats["failed"] > 0 or self.overall_stats["error"] > 0: - status_indicator = hprint.color_highlight("FAILED", "red") - elif ( - self.overall_stats["skipped"] > 0 - and self.overall_stats["passed"] == 0 - ): - status_indicator = hprint.color_highlight("SKIPPED", "yellow") - else: - status_indicator = hprint.color_highlight("PASSED", "green") - # Print summary. - print(f"\n{hprint.color_highlight('=' * 70, 'bold')}") - print( - hprint.color_highlight( - f"Summary: {summary_text} {time_text}", "INFO" - ) - ) - print(hprint.color_highlight(f"Result: {status_indicator}", "INFO")) - - def print_summary(self): - self._print_detailed_results() - self._print_final_summary() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py deleted file mode 100644 index 2ee2166f9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -Import as: - -import helpers.hretry as hretry -""" - -import asyncio -import functools -import logging -import time -from typing import Any, Tuple - -_LOG = logging.getLogger(__name__) - - -def sync_retry( - num_attempts: int, exceptions: Tuple[Any], retry_delay_in_sec: int = 0 -) -> object: - """ - Decorator retrying the wrapped function/method num_attempts times if the - `exceptions` listed in exceptions are thrown. - - :param num_attempts: the number of times to repeat the wrapped function/method - - The function will be called `num_attempts` times. - :param exceptions: list of exceptions that trigger a retry attempt - :param retry_delay_in_sec: the number of seconds to wait between retry attempts - :return: the result of the wrapped function/method - """ - - def decorator(func) -> object: - @functools.wraps(func) - def retry_wrapper(*args, **kwargs): - attempts_count = 1 - last_exception = None - while attempts_count < num_attempts + 1: - try: - return func(*args, **kwargs) - except exceptions as e: - last_exception = e - _LOG.warning( - "Exception %s thrown when attempting to run %s, attempt " - "%d of %d", - e, - func, - attempts_count, - num_attempts, - ) - attempts_count += 1 - time.sleep(retry_delay_in_sec) - _LOG.error( - "Function %s failed after %d attempts", func, num_attempts - ) - raise last_exception - - return retry_wrapper - - return decorator - - -def async_retry( - num_attempts: int, exceptions: Tuple[Any], retry_delay_in_sec: int = 0 -) -> object: - """ - Same as `sync_retry` decorator but for `async` functions. - """ - - def decorator(func) -> object: - @functools.wraps(func) - async def retry_wrapper(*args, **kwargs): - attempts_count = 1 - last_exception = None - while attempts_count < num_attempts + 1: - try: - return await func(*args, **kwargs) - except exceptions as e: - last_exception = e - _LOG.warning( - "Exception %s thrown when attempting to run %s, attempt " - "%d of %d", - e, - func, - attempts_count, - num_attempts, - ) - attempts_count += 1 - await asyncio.sleep(retry_delay_in_sec) - _LOG.error( - "Function %s failed after %d attempts", func, num_attempts - ) - raise last_exception - - return retry_wrapper - - return decorator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py deleted file mode 100644 index a28914cb7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py +++ /dev/null @@ -1,1129 +0,0 @@ -""" -Import as: - -import helpers.hs3 as hs3 -""" - -import argparse -import configparser -import copy -import functools -import gzip -import logging -import os -import pathlib -import re -from typing import Any, Dict, List, Optional, Tuple, Union - -_WARNING = "\033[33mWARNING\033[0m" - -try: - import s3fs - - # Handle different versions of s3fs where core module may be at different locations - if hasattr(s3fs, "core"): - from s3fs.core import S3File, S3FileSystem - else: - # In newer versions, classes might be directly in s3fs module - try: - from s3fs import S3File, S3FileSystem - except ImportError: - # Fallback to dynamic import - S3File = getattr(s3fs, "S3File", None) - S3FileSystem = getattr(s3fs, "S3FileSystem", None) -except ModuleNotFoundError: - _module = "s3fs" - print(_WARNING + f": Can't find {_module}: continuing") - # Define dummy classes for type hints when s3fs is not available - s3fs = None - - class S3File: - pass - - class S3FileSystem: - pass - - -# Avoid the following dependency from other `helpers` modules to prevent import cycles. -# import helpers.hpandas as hpandas -# import helpers.hsql as hsql -# import helpers.hunit_test as hunitest - -# To enforce this order of the imports we use the directive for the linter below. -import helpers.hdbg as hdbg # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hintrospection as hintros # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hio as hio # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hprint as hprint # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hserver as hserver # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hsystem as hsystem # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.htimer as htimer # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position - -_LOG = logging.getLogger(__name__) - -# AWS Region global constants -# Moved to hs3.py from haws.py due to cyclic imports detected in -# build https://github.com/cryptokaizen/cmamp/actions/runs/10729983412/job/29757600889 -AWS_EUROPE_REGION_1 = "eu-north-1" -AWS_TOKYO_REGION_1 = "ap-northeast-1" -AWS_US_REGION_1 = "us-east-1" -AWS_REGIONS = [AWS_EUROPE_REGION_1, AWS_TOKYO_REGION_1, AWS_US_REGION_1] - -# TODO(gp): @all separate S3 code in `helpers/hs3.py` from authentication and -# AWS profile code in `helpers/aws_authentication.py`. - -# ############################################################################# -# Basic utils. -# ############################################################################# - -AwsProfile = Optional[Union[str, S3FileSystem]] - - -def is_s3_path(s3_path: str) -> bool: - """ - Return whether a path is on an S3 bucket, i.e., if it starts with `s3://`. - """ - hdbg.dassert_isinstance(s3_path, str) - valid = s3_path.startswith("s3://") - if s3_path.startswith("s3://s3://"): - valid = False - return valid - - -def dassert_is_s3_path(s3_path: str) -> None: - """ - Assert if a file is not a S3 path. - """ - hdbg.dassert( - is_s3_path(s3_path), - "Invalid S3 file='%s'", - s3_path, - ) - - -def dassert_is_not_s3_path(s3_path: str) -> None: - """ - Assert if a file is a S3 path. - """ - hdbg.dassert( - not is_s3_path(s3_path), - "Passed an S3 file='%s' when it was not expected", - s3_path, - ) - - -def dassert_is_valid_aws_profile(path: str, aws_profile: AwsProfile) -> None: - """ - Check that the value of `aws_profile` is compatible with the S3 or local - file `path`. - - :param path: S3 or local path - :param aws_profile: AWS profile to use if and only if using an S3 path, - otherwise `None` for local path - """ - if is_s3_path(path): - hdbg.dassert_is_not( - aws_profile, None, "path=%s aws_profile=%s", path, aws_profile - ) - else: - hdbg.dassert_is( - aws_profile, None, "path=%s aws_profile=%s", path, aws_profile - ) - - -# /////////////////////////////////////////////////////////////////////////////// - - -def get_s3fs(aws_profile: AwsProfile) -> S3FileSystem: - """ - Return a `s3fs` object from a given AWS profile. - - :param aws_profile: the name of an AWS profile or a s3fs filesystem - """ - if hserver.is_ig_prod(): - # On IG prod machines we let the Docker container infer the right AWS - # account. - _LOG.warning("Not using AWS profile='%s'", aws_profile) - s3fs_ = S3FileSystem() - else: - if isinstance(aws_profile, str): - # When deploying jobs via ECS the container obtains credentials - # based on passed task role specified in the ECS task-definition, - # refer to: - # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html - if ( - # TODO(heanh): Centralize the list of supported profiles. - aws_profile in ["ck", "csfy"] - and hserver.is_inside_ecs_container() - ): - _LOG.info("Fetching credentials from task IAM role") - s3fs_ = S3FileSystem() - else: - # TODO(heanh): Make this manual extraction of credentials - # code obsoleted. - # From https://stackoverflow.com/questions/62562945 - # aws_credentials = get_aws_credentials(aws_profile) - # _LOG.debug("%s", pprint.pformat(aws_credentials)) - # s3fs_ = S3FileSystem( - # anon=False, - # key=aws_credentials["aws_access_key_id"], - # secret=aws_credentials["aws_secret_access_key"], - # token=aws_credentials["aws_session_token"], - # client_kwargs={"region_name": aws_credentials["aws_region"]}, - # ) - # - # We do not need to extract the credential from the file because - # the config (`~/.aws/config`) and credential - # (`~/.aws/credentials`) are already set. - s3fs_ = S3FileSystem(anon=False, profile=aws_profile) - elif isinstance(aws_profile, S3FileSystem): - s3fs_ = aws_profile - else: - raise ValueError(f"Invalid aws_profile='{aws_profile}'") - return s3fs_ - - -def dassert_path_exists( - path: str, aws_profile: Optional[AwsProfile] = None -) -> None: - """ - Assert if S3 or local path doesn't exist. `aws_profile` is specified if and - only if path is an S3 path. - - :param path: S3 or local path - :param aws_profile: the name of an AWS profile or a s3fs filesystem - """ - dassert_is_valid_aws_profile(path, aws_profile) - if is_s3_path(path): - s3fs_ = get_s3fs(aws_profile) - hdbg.dassert(s3fs_.exists(path), f"S3 path '{path}' doesn't exist!") - else: - hdbg.dassert_path_exists(path) - - -def dassert_path_not_exists( - path: str, aws_profile: Optional[AwsProfile] = None -) -> None: - """ - Assert if S3 or local path exist. `aws_profile` is specified if and only if - path is an S3 path. - - :param path: S3 or local path - :param aws_profile: the name of an AWS profile or a s3fs filesystem - """ - dassert_is_valid_aws_profile(path, aws_profile) - if is_s3_path(path): - s3fs_ = get_s3fs(aws_profile) - hdbg.dassert(not s3fs_.exists(path), f"S3 path '{path}' already exist!") - else: - hdbg.dassert_path_not_exists(path) - - -# TODO(gp): Consider using `s3fs.split_path`. -def split_path(s3_path: str) -> Tuple[str, str]: - """ - Separate an S3 path in the bucket and the rest of the path as absolute from - the root. - - E.g., for `s3://alphamatic-data/tmp/hello` returns (`alphamatic- - data`, /tmp/hello`) - """ - dassert_is_s3_path(s3_path) - # Remove the s3 prefix. - prefix = "s3://" - hdbg.dassert(s3_path.startswith(prefix)) - s3_path = s3_path[len(prefix) :] - # Break the path into dirs. - dirs = s3_path.split("/") - bucket = dirs[0] - abs_path = os.path.join("/", *dirs[1:]) - hdbg.dassert( - abs_path.startswith("/"), - "The path should be absolute instead of %s", - abs_path, - ) - return bucket, abs_path - - -def _replace_star_with_double_star(pattern_to_modify: str) -> str: - """ - Replace a single star with a double star in a pattern. - - Originally we simply used to do `pattern.replace("*", "**")`. - but in the newer versions of `s3fs` this is not allowed: - `ValueError: Invalid pattern: '**' can - only be an entire path component` - - We also need to take care of special such as: - *.csv* -> **/*.csv* - - Examples: - s3://bucket/*/path/* -> s3://bucket/**/*/path/**/* - s3://bucket/*/path/csv* -> s3://bucket/**/*/path/csv* - - :param pattern_to_modify: pattern to replace wildcards in - :return: pattern with wildcards replaced - """ - append_wildcard = False - # Handle the special case of ending with wildcard - # (e.g.: *.csv*). - if re.match(r"(?=.*[a-zA-Z0-9]).*\*$", pattern_to_modify): - pattern_to_modify = pattern_to_modify[:-1] - append_wildcard = True - new_pattern = pattern_to_modify.replace("*", "**/*") - new_pattern = new_pattern + "*" if append_wildcard else new_pattern - return new_pattern - - -def listdir( - dir_name: str, - pattern: str, - only_files: bool, - use_relative_paths: bool, - *, - exclude_git_dirs: bool = True, - aws_profile: Optional[AwsProfile] = None, - maxdepth: Optional[int] = None, -) -> List[str]: - """ - Counterpart to `hio.listdir` with S3 support. - - :param dir_name: S3 or local path - :param aws_profile: AWS profile to use if and only if using an S3 path, - otherwise `None` for local path - :param maxdepth: limit the depth of directory traversal - """ - dassert_is_valid_aws_profile(dir_name, aws_profile) - _LOG.debug("pattern=%s", pattern) - if is_s3_path(dir_name): - s3fs_ = get_s3fs(aws_profile) - dassert_path_exists(dir_name, s3fs_) - # Ensure that there are no multiple stars in pattern. - hdbg.dassert_not_in("**", pattern) - # `hio.listdir` is using `find` which looks for files and directories - # descending recursively in the directory. - # One star in glob will use `maxdepth=1`. - pattern = _replace_star_with_double_star(pattern) - _LOG.debug("pattern=%s", pattern) - # Detailed S3 objects in dict form with metadata. - path_objects = s3fs_.glob( - f"{dir_name}/{pattern}", detail=True, maxdepth=maxdepth - ) - if only_files: - # Original `path_objects` must not be changed during loop. - temp_path_objects = copy.deepcopy(list(path_objects.values())) - # Use metadata to distinguish files from directories without - # calling `s3fs_.isdir/isfile`. - for path_object in temp_path_objects: - if path_object["type"] != "file": - path_objects.pop(path_object["Key"]) - paths = list(path_objects.keys()) - if exclude_git_dirs: - paths = [ - path for path in paths if ".git" not in pathlib.Path(path).parts - ] - bucket, absolute_path = split_path(dir_name) - # Basically the goal is to remove `s3://` from the full S3 path. - root_path = f"{bucket}{absolute_path}" - # Remove redundant separators. - paths = {os.path.normpath(path) for path in paths} - # Remove special entries such as `.` (`root_path` in this case) and - # bucket name to keep the same return format as in `hio.listdir()`. - paths_to_exclude = [bucket, root_path] - paths = [path for path in paths if path not in paths_to_exclude] - if use_relative_paths: - paths = [os.path.relpath(path, start=root_path) for path in paths] - else: - paths = hio.listdir( - dir_name, - pattern, - only_files, - use_relative_paths, - exclude_git_dirs=exclude_git_dirs, - maxdepth=maxdepth, - ) - return paths - - -def du( - path: str, - *, - human_format: bool = False, - aws_profile: Optional[AwsProfile] = None, -) -> Union[int, str]: - """ - Counterpart to `hsystem.du` with S3 support. - - If and only if `aws_profile` is specified, S3 is used instead of - local filesystem. - """ - dassert_is_valid_aws_profile(path, aws_profile) - if is_s3_path(path): - s3fs_ = get_s3fs(aws_profile) - dassert_path_exists(path, s3fs_) - size: Union[int, str] = s3fs_.du(path) - if human_format: - size = hintros.format_size(size) - else: - size = hsystem.du(path, human_format=human_format) - return size - - -def to_file( - lines: str, - file_name: str, - *, - mode: Optional[str] = None, - force_flush: bool = False, - aws_profile: Optional[AwsProfile] = None, -) -> None: - """ - Counterpart to `hio.to_file` with S3 support. - - If and only if `aws_profile` is specified, S3 is used instead of - local filesystem. - """ - dassert_is_valid_aws_profile(file_name, aws_profile) - if is_s3_path(file_name): - # Ensure that `bytes` is used. - if mode is not None and "b" not in mode: - raise ValueError("S3 only allows binary mode!") - hdbg.dassert_isinstance(lines, str) - # Convert lines to bytes, only supported mode for S3. - # Also create a list of new lines as raw bytes is not supported. - os_sep = os.linesep - lines_lst = [f"{line}{os_sep}".encode() for line in lines.split(os_sep)] - # Inspect file name and path. - hio.dassert_is_valid_file_name(file_name) - s3fs_ = get_s3fs(aws_profile) - mode = "wb" if mode is None else mode - # Open S3 file. `rb` is the default mode for S3. - with s3fs_.open(file_name, mode) as s3_file: - if file_name.endswith((".gz", ".gzip")): - # Open and decompress gzipped file. - with gzip.GzipFile(fileobj=s3_file) as gzip_file: - gzip_file.writelines(lines_lst) - else: - # Any other file. - s3_file.writelines(lines_lst) - if force_flush: - # TODO(Nikola): Investigate S3 alternative for `os.fsync(f.fileno())`. - s3_file.flush() - else: - use_gzip = file_name.endswith((".gz", ".gzip")) - hio.to_file( - file_name, - lines, - mode=mode, - use_gzip=use_gzip, - force_flush=force_flush, - ) - - -def from_file( - file_name: str, - encoding: Optional[Any] = None, - aws_profile: Optional[AwsProfile] = None, -) -> str: - """ - Counterpart to `hio.from_file` with S3 support. - - If and only if `aws_profile` is specified, S3 is used instead of - local filesystem. - """ - dassert_is_valid_aws_profile(file_name, aws_profile) - if is_s3_path(file_name): - if encoding: - raise ValueError("Encoding is not supported when reading from S3!") - # Inspect file name and path. - hio.dassert_is_valid_file_name(file_name) - s3fs_ = get_s3fs(aws_profile) - dassert_path_exists(file_name, s3fs_) - # Open s3 file. - with s3fs_.open(file_name) as s3_file: - if file_name.endswith((".gz", ".gzip")): - # Open and decompress gzipped file. - with gzip.GzipFile(fileobj=s3_file) as gzip_file: - data = gzip_file.read().decode() - else: - # Any other file. - data = s3_file.read().decode() - else: - data = hio.from_file(file_name, encoding=encoding) - return data - - -# TODO(Nina): consider adding support for handling dirs. -# TODO(Grisha): consider extending for the regular file system. -def copy_file_to_s3( - file_path: str, - s3_dst_file_path: str, - aws_profile: str, -) -> None: - """ - Copy a local file to S3. - - :param file_path: path to a file to copy - :param s3_dst_file_path: S3 path to copy to - :param aws_profile: aws profile - """ - hdbg.dassert_file_exists(file_path) - dassert_is_s3_path(s3_dst_file_path) - dassert_is_valid_aws_profile(s3_dst_file_path, aws_profile) - aws_s3_cp_cmd = f"aws s3 cp {file_path} {s3_dst_file_path}" - if not hserver.is_inside_ecs_container(): - # There is no `~/.aws/credentials` file inside an ECS container - # but the AWS credentials are received via a task role. So - # no need to pass the profile option. - aws_s3_cp_cmd += f" --profile {aws_profile}" - _LOG.info("Copying from %s to %s", file_path, s3_dst_file_path) - hsystem.system(aws_s3_cp_cmd, suppress_output=False) - - -def get_local_or_s3_stream( - file_name: str, **kwargs: Any -) -> Tuple[Union[S3FileSystem, str], Any]: - """ - Get S3 stream for desired file or simply returns file name. - - :param file_name: file name or full path to file - """ - _LOG.debug(hprint.to_str("file_name kwargs")) - # Handle the s3fs param, if needed. - if is_s3_path(file_name): - # For S3 files we need to have an `s3fs` parameter. - hdbg.dassert_in( - "s3fs", - kwargs, - "Credentials through s3fs are needed to access an S3 path", - ) - s3fs_ = kwargs.pop("s3fs") - hdbg.dassert_isinstance(s3fs_, S3FileSystem) - dassert_path_exists(file_name, s3fs_) - stream = s3fs_.open(file_name) - else: - if "s3fs" in kwargs: - _LOG.warning("Passed `s3fs` without an S3 file: ignoring it") - _ = kwargs.pop("s3fs") - hdbg.dassert_file_exists(file_name) - stream = file_name - return stream, kwargs - - -# ############################################################################# -# AWS. -# ############################################################################# - - -def _get_aws_config(file_name: str) -> configparser.RawConfigParser: - """ - Return a parser to the config in `~/.aws/{file_name}`. - """ - file_name = os.path.join(os.path.expanduser("~"), ".aws", file_name) - hdbg.dassert_file_exists(file_name) - # Read the config. - config = configparser.RawConfigParser() - config.read(file_name) - _LOG.debug("config.sections=%s", config.sections()) - return config - - -# ############################################################################# -# Authentication. -# ############################################################################# - -# Architecture of the AWS authentication -# -# - There can be two or more AWS S3 systems with different credentials, paths to -# bucket, and other properties -# - Some code needs to refer always and only to a specific S3 bucket -# - E.g., AM S3 bucket for Kibot data -# - Other code needs to work with different AWS S3 systems -# - E.g., `publish_notebooks`, saving / retrieving experiments, caching -# -# - The desired AWS S3 systems are selected through an `aws_profile` parameter -# (e.g., `ck`) -# - The value of AWS profile is obtained from -# - the `--aws_profile` command line option; or -# - a client specifying the needed `aws_profile` -# -# - The AWS profile is then used to access the `~/.aws` files and extract: -# - the credentials (e.g., `aws_access_key_id`, `aws_secret_access_key`, -# `aws_region`) -# - other variables (e.g., `aws_s3_bucket`) -# - The variables that are extracted from the files are passed through env vars -# directly for GitHub Actions CI -# - One can specify env vars conditioned to different profiles using the AWS -# profile -# - E.g., `ck` profile for `AWS_ACCESS_KEY_ID` corresponds to -# `CSFY_AWS_ACCESS_KEY_ID` - - -@functools.lru_cache() -def get_aws_credentials( - aws_profile: str, -) -> Dict[str, Optional[str]]: - """ - Read the AWS credentials for a given profile from `~/.aws` or from env - vars. - - :return: a dictionary with `access_key_id`, `aws_secret_access_key`, - `aws_region` and optionally `aws_session_token` - """ - _LOG.debug("Getting credentials for aws_profile='%s'", aws_profile) - if aws_profile == "__mock__": - # `mock` profile is artificial construct used only in tests. - aws_profile = aws_profile.strip("__") - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - # profile_prefix = aws_profile.upper() - profile_prefix = ( - "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() - ) - result: Dict[str, Optional[str]] = {} - if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: - key_to_env_var: Dict[str, str] = { - "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - "aws_session_token": f"{profile_prefix}_AWS_SESSION_TOKEN", - # TODO(gp): AWS_DEFAULT_REGION -> AWS_REGION so we can use the invariant - # that the var is simply the capitalized version of the key. - "aws_region": f"{profile_prefix}_AWS_DEFAULT_REGION", - } - else: - key_to_env_var: Dict[str, str] = { - "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - # TODO(gp): AWS_DEFAULT_REGION -> AWS_REGION so we can use the invariant - # that the var is simply the capitalized version of the key. - "aws_region": f"{profile_prefix}_AWS_DEFAULT_REGION", - } - # If all the AWS credentials are passed through env vars, they override the - # config file. - env_var_override = False - set_env_vars = [ - (env_var in os.environ and os.environ[env_var] != "") - for env_var in sorted(key_to_env_var.values()) - ] - if any(set_env_vars): - if not all(set_env_vars): - _LOG.warning( - "Some but not all AWS env vars are set (%s): ignoring", - str(set_env_vars), - ) - else: - env_var_override = True - if env_var_override: - _LOG.debug("Using AWS credentials from env vars") - # If one variable is defined all should be defined. - for key, env_var in key_to_env_var.items(): - _LOG.debug("'%s' in env vars=%s", env_var, env_var in os.environ) - _LOG.debug( - "'%s' != ''=%s", env_var, os.environ.get(env_var, None) != "" - ) - hdbg.dassert_in(env_var, os.environ) - result[key] = os.environ[env_var] - if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: - result["aws_session_token"] = os.environ[ - f"{profile_prefix}_AWS_SESSION_TOKEN" - ] - else: - result["aws_session_token"] = None - else: - _LOG.debug("Using AWS credentials from files") - # > more ~/.aws/credentials - # [am] - # aws_access_key_id=AKI... - # aws_secret_access_key=mhg.. - # aws_session_token = Fwo... - file_name = "credentials" - config = _get_aws_config(file_name) - # - key = "aws_access_key_id" - result[key] = config.get(aws_profile, key) - # - key = "aws_secret_access_key" - result[key] = config.get(aws_profile, key) - # - key = "aws_session_token" - if config.has_option(aws_profile, key): - result[key] = config.get(aws_profile, key) - else: - result[key] = None - # - key = "aws_s3_bucket" - if config.has_option(aws_profile, key): - result[key] = config.get(aws_profile, key) - else: - result[key] = None - # > more ~/.aws/config - # [am] - # region = us-east-1 - file_name = "config" - config = _get_aws_config(file_name) - key = "aws_region" - # For ~/.aws/config the tag is `profile am` instead of `am`. - result[key] = config.get(f"profile {aws_profile}", "region") - # - hdbg.dassert_is_subset(key_to_env_var.keys(), result.keys()) - return result - - -# ############################################################################# -# Bucket -# ############################################################################# - - -# TODO(Nikola): CmTask #1810 "Increase test coverage in helpers/hs3.py" -def get_s3_bucket_path(aws_profile: str, add_s3_prefix: bool = True) -> str: - """ - Return the S3 bucket from environment variable corresponding to a given - `aws_profile`. - - E.g., `aws_profile="am"` uses the value in `AM_AWS_S3_BUCKET` which - is usually set to `s3://alphamatic-data`. - """ - hdbg.dassert_type_is(aws_profile, str) - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - prefix = aws_profile.upper() - prefix = ( - "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() - ) - env_var = f"{prefix}_AWS_S3_BUCKET" - if env_var in os.environ: - _LOG.debug("No env var '%s'", env_var) - s3_bucket = os.environ[env_var] - else: - # Fall-back to local credentials. - _LOG.debug("Checking credentials") - aws_credentials = get_aws_credentials(aws_profile) - _LOG.debug("%s", aws_credentials) - s3_bucket = aws_credentials.get("aws_s3_bucket", "") - hdbg.dassert_ne(s3_bucket, "") - hdbg.dassert( - not s3_bucket.startswith("s3://"), - "Invalid %s value '%s'", - env_var, - s3_bucket, - ) - if add_s3_prefix: - s3_bucket = "s3://" + s3_bucket - return s3_bucket - - -# TODO(sonaal): Do we really need aws profile as argument or -# we can use default? Ref. https://github.com/cryptokaizen/cmamp/pull/6045#discussion_r1380392748 -def get_s3_bucket_path_unit_test( - aws_profile: str, *, add_s3_prefix: bool = True -) -> str: - if aws_profile == "ck": - s3_bucket = "cryptokaizen-unit-test" - else: - hdbg.dfatal(f"Invalid aws_profile={aws_profile}") - if add_s3_prefix: - s3_bucket = "s3://" + s3_bucket - return s3_bucket - - -def get_latest_pq_in_s3_dir(s3_path: str, aws_profile: str) -> str: - """ - Get the latest Parquet file in the specified directory. - - :param s3_path: the path to s3 directory, e.g. - `cryptokaizen-data/reorg/daily_staged.airflow.pq/bid_ask/crypto_chassis.downloaded_1sec/binance` - :param aws_profile: AWS profile to use - :return: the path to the latest Parquet file in the directory, - E.g. `cryptokaizen-data/reorg/daily_staged.airflow.pq/bid_ask/crypto_chassis.downloaded_1sec/binance/ - currency_pair=ETH_USDT/year=2022/month=12/data.parquet` - """ - hdbg.dassert_type_is(aws_profile, str) - s3fs_ = get_s3fs(aws_profile) - dir_name = f"{s3_path}/**/*.parquet" - pq_files = s3fs_.glob(dir_name, detail=True) - hdbg.dassert_lte(1, len(pq_files), "dir_name=%s", dir_name) - _LOG.debug("pq_files=%s", pq_files) - # Sort the files by the date they were modified for the last time. - sorted_files = sorted( - pq_files.items(), key=lambda t: t[1]["LastModified"], reverse=True - ) - # Get the path to the latest file. - latest_file_path = sorted_files[0][0] - return latest_file_path - - -# ############################################################################# -# Parser. -# ############################################################################# - - -def add_s3_args(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: - """ - Add the command line options for the AWS credentials. - """ - parser.add_argument( - "--aws_profile", - action="store", - type=str, - help="The AWS profile to use for `.aws/credentials` or for env vars", - ) - parser.add_argument( - "--s3_path", - action="store", - type=str, - default=None, - help="Full S3 dir path to use (e.g., `s3://alphamatic-data/foobar/`), " - "overriding any other setting", - ) - return parser - - -def _dassert_all_env_vars_set(key_to_env_var: Dict[str, str]) -> None: - """ - Check that the required AWS env vars are set and are not empty strings. - """ - for v in key_to_env_var.values(): - hdbg.dassert_in(v, os.environ) - hdbg.dassert_ne(v, "") - - -def _get_aws_file_text(key_to_env_var: Dict[str, str]) -> List[str]: - """ - Generate text from env vars for AWS files. - - E.g.: - ``` - aws_access_key_id=*** # gitleaks:allow - aws_secret_access_key=*** # gitleaks:allow - aws_s3_bucket=*** - ``` - :param key_to_env_var: aws settings names to the corresponding env - var names mapping - :return: AWS file text - """ - txt = [] - for k, v in key_to_env_var.items(): - line = f"{k}={os.environ[v]}" - txt.append(line) - return txt - - -def _get_aws_config_text(aws_profile: str) -> str: - """ - Generate text for the AWS config file, i.e. ".aws/config". - """ - # Set which env vars we need to get. - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - # profile_prefix = aws_profile.upper() - profile_prefix = ( - "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() - ) - region_env_var = f"{profile_prefix}_AWS_DEFAULT_REGION" - key_to_env_var = {"region": region_env_var} - # Check that env vars are set. - _dassert_all_env_vars_set(key_to_env_var) - text = _get_aws_file_text(key_to_env_var) - text.insert(0, f"[profile {aws_profile}]") - text = "\n".join(text) - return text - - -def _get_aws_credentials_text(aws_profile: str) -> str: - """ - Generate text for the AWS credentials file, i.e. ".aws/credentials". - """ - # Set which env vars we need to get. - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - # profile_prefix = aws_profile.upper() - profile_prefix = ( - "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() - ) - # Check if AWS session token is set in environment variable. - if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: - key_to_env_var = { - "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - "aws_session_token": f"{profile_prefix}_AWS_SESSION_TOKEN", - # TODO(heanh): Is this needed? - "aws_s3_bucket": f"{profile_prefix}_AWS_S3_BUCKET", - } - else: - key_to_env_var = { - "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - # TODO(heanh): Is this needed? - "aws_s3_bucket": f"{profile_prefix}_AWS_S3_BUCKET", - } - # Check that env vars are set. - _dassert_all_env_vars_set(key_to_env_var) - text = _get_aws_file_text(key_to_env_var) - text.insert(0, f"[{aws_profile}]") - text = "\n".join(text) - return text - - -def generate_aws_files( - home_dir: str = "~", - aws_profiles: Optional[List[str]] = None, -) -> None: - """ - Generate AWS configuration files. - - This is needed to use the AWS CLI and the `boto3` library when we are in CI. - """ - if home_dir == "~": - home_dir = os.path.expanduser(home_dir) - config_file_name = os.path.join(home_dir, ".aws", "config") - credentials_file_name = os.path.join(home_dir, ".aws", "credentials") - # Check if the files already exist. - if os.path.exists(credentials_file_name) and os.path.exists( - config_file_name - ): - _LOG.info( - "Both files exist: %s and %s; exiting", - credentials_file_name, - config_file_name, - ) - return - if aws_profiles is None: - aws_profiles = ["ck"] - config_file_text = [] - credentials_file_text = [] - # Get text with settings for both files. - for profile in aws_profiles: - current_config_text = _get_aws_config_text(profile) - config_file_text.append(current_config_text) - current_credentials_text = _get_aws_credentials_text(profile) - credentials_file_text.append(current_credentials_text) - # Create both files. - config_file_text = "\n\n".join(config_file_text) - hio.to_file(config_file_name, config_file_text) - _LOG.debug("Saved AWS config to %s", config_file_name) - - # - credentials_file_text = "\n\n".join(credentials_file_text) - hio.to_file(credentials_file_name, credentials_file_text) - _LOG.debug("Saved AWS credentials to %s", credentials_file_name) - - -# ############################################################################# -# Archive and retrieve data from S3. -# ############################################################################# - - -# TODO(gp): -> helpers/aws_utils.py - - -def archive_data_on_s3( - src_dir: str, s3_path: str, aws_profile: Optional[str], tag: str = "" -) -> str: - """ - Compress dir `src_dir` and save it on AWS S3 under `s3_path`. - - A timestamp and a tag is added to make the name more informative. - The tgz is created so that when expanded a dir with the name `src_dir` is - created. - - :param src_dir: directory that will be compressed - :param s3_path: full S3 path starting with `s3://` - :param aws_profile: the profile to use. We use a string and not an - `AwsProfile` since this is typically the outermost caller in the stack, - and it doesn't reuse an S3 fs object - :param tag: a tag to add to the name of the file - """ - _LOG.info( - "# Archiving '%s' to '%s' with aws_profile='%s'", - src_dir, - s3_path, - aws_profile, - ) - hdbg.dassert_dir_exists(src_dir) - dassert_is_s3_path(s3_path) - _LOG.info( - "The size of '%s' is %s", - src_dir, - hsystem.du(src_dir, human_format=True), - ) - # Add a timestamp if needed. - dst_path = hsystem.append_timestamp_tag(src_dir, tag) + ".tgz" - # Compress the dir. - # > (cd .../TestRunExperimentArchiveOnS3.test_serial1; \ - # tar cvzf /app/.../TestRunExperimentArchiveOnS3.test_serial1.tgz experiment.RH1E) - # experiment.RH1E/ - # experiment.RH1E/log.20210802-123758.txt - # experiment.RH1E/output_metadata.json - # ... - _LOG.debug("Destination path is '%s'", dst_path) - with htimer.TimedScope(logging.INFO, "Compressing"): - dir_name = os.path.dirname(src_dir) - base_name = os.path.basename(src_dir) - hdbg.dassert_ne(base_name, "", "src_dir=%s", src_dir) - cmd = "" - if dir_name != "": - cmd += f"cd {dir_name} && " - cmd += f"tar czf {dst_path} {base_name}" - hsystem.system(cmd) - _LOG.info( - "The size of '%s' is %s", - dst_path, - hsystem.du(dst_path, human_format=True), - ) - # Test expanding the tgz. The package should expand to the original dir. - # > tar tf /app/.../TestRunExperimentArchiveOnS3.test_serial1.tgz - # experiment.RH1E/ - # experiment.RH1E/log.20210802-123758.txt - # experiment.RH1E/output_metadata.json - _LOG.info("Testing archive") - cmd = f"tar tvf {dst_path}" - hsystem.system(cmd, log_level=logging.INFO, suppress_output=False) - # Copy to S3. - s3_file_path = os.path.join(s3_path, os.path.basename(dst_path)) - _LOG.info("Copying '%s' to '%s'", dst_path, s3_file_path) - hdbg.dassert_file_exists(dst_path) - s3fs_ = get_s3fs(aws_profile) - # TODO(gp): Make sure the S3 dir exists. - s3fs_.put(dst_path, s3_file_path) - _LOG.info("Data archived on S3 to '%s'", s3_file_path) - return s3_file_path - - -def copy_data_from_s3_to_local_dir( - src_s3_dir: str, dst_local_dir: str, aws_profile: str -) -> None: - """ - Copy data from S3 to a local dir. - - :param src_s3_dir: path on S3 storing the data to copy - :param scratch_space_path: local path on scratch space - :param aws_profile: AWS profile to use - """ - _LOG.debug( - "Copying input data from %s to %s", - src_s3_dir, - dst_local_dir, - ) - cmd = f"aws s3 sync {src_s3_dir} {dst_local_dir} --profile {aws_profile}" - hsystem.system(cmd, suppress_output=False, log_level="echo") - - -def retrieve_archived_data_from_s3( - s3_file_path: str, - dst_dir: str, - aws_profile: Optional[str] = None, - incremental: bool = True, -) -> str: - """ - Retrieve tgz file from S3, unless it's already present (incremental mode). - - :param s3_file_path: path to the S3 file with the archived data. E.g., - `s3://.../experiment.20210802-121908.tgz` - :param dst_dir: destination directory where to save the data - :param aws_profile: the profile to use. We use a string and not an - `AwsProfile` since this is typically the outermost caller in the stack, - and it doesn't reuse an S3 fs object - :param incremental: skip if the tgz file is already present locally - :return: path with the local tgz file - """ - _LOG.info( - "# Retrieving archive from '%s' to '%s' with aws_profile='%s'", - s3_file_path, - dst_dir, - aws_profile, - ) - dassert_is_s3_path(s3_file_path) - # Download the tgz file. - hio.create_dir(dst_dir, incremental=True) - dst_file = os.path.join(dst_dir, os.path.basename(s3_file_path)) - _LOG.debug(hprint.to_str("s3_file_path dst_dir dst_file")) - if incremental and os.path.exists(dst_file): - _LOG.warning("Found '%s': skipping downloading", dst_file) - else: - # Download. - s3fs_ = get_s3fs(aws_profile) - dassert_path_exists(s3_file_path, s3fs_) - _LOG.debug("Getting from s3: '%s' -> '%s", s3_file_path, dst_file) - s3fs_.get(s3_file_path, dst_file) - _LOG.info("Saved to '%s'", dst_file) - return dst_file - - -def expand_archived_data(src_tgz_file: str, dst_dir: str) -> str: - """ - Expand an S3 tarball storing results of an experiment. - - E.g., - - given a tgz file like `s3://.../experiment.20210802-121908.tgz` (which is the - result of compressing a dir like `/app/.../experiment.RH1E`) - - expand it into a dir `{dst_dir}/experiment.RH1E` - - :param src_tgz_file: path to the local file with the archived data. E.g., - `/.../experiment.20210802-121908.tgz` - :param dst_dir: directory where expand the archive tarball - :return: dir with the expanded data (e.g., `{dst_dir/experiment.RH1E`) - """ - _LOG.debug("Expanding '%s'", src_tgz_file) - # Get the name of the including dir, e.g., `experiment.RH1E`. - cmd = f"cd {dst_dir} && tar tzf {src_tgz_file} | head -1" - rc, enclosing_tgz_dir_name = hsystem.system_to_one_line(cmd) - _ = rc - _LOG.debug(hprint.to_str("enclosing_tgz_dir_name")) - tgz_dst_dir = os.path.join(dst_dir, enclosing_tgz_dir_name) - if os.path.exists(tgz_dst_dir): - hdbg.dassert_dir_exists(dst_dir) - _LOG.info( - "While expanding '%s' dst dir '%s' already exists: skipping", - src_tgz_file, - tgz_dst_dir, - ) - else: - # Expand the tgz file. - # The output should be the original compressed dir under `{dst_dir}`. - # E.g., - # > tar tzf /app/.../experiment.20210802-133901.tgz - # experiment.RH1E/ - # experiment.RH1E/log.20210802-133859.txt - # experiment.RH1E/result_0/ - with htimer.TimedScope(logging.INFO, "Decompressing"): - hdbg.dassert_file_exists(src_tgz_file) - cmd = f"cd {dst_dir} && tar xzf {src_tgz_file}" - hsystem.system(cmd) - hdbg.dassert_dir_exists(tgz_dst_dir) - # Return `{dst_dir}/experiment.RH1E`. - return tgz_dst_dir - - -def get_s3_bucket_from_stage( - stage: str, *, add_suffix: Optional[str] = None -) -> str: - """ - Retrieve the S3 bucket name based on the provided deployment stage. - - :param stage: the deployment stage, which can be 'test', 'preprod', - or 'prod'. - :param add_suffix: optional suffix to append to the bucket name. - :return: return corresponding S3 bucket name. - """ - # Mapping of stages to their respective S3 bucket names. - _S3_BUCKET_BY_STAGE = { - "test": "cryptokaizen-data-test", - "preprod": "cryptokaizen-data.preprod", - "prod": "cryptokaizen-data", - } - # TODO(Juraj): hack applied until a solution for #CmTask6620 is found. - # Retrieve the region from the environment variable or use the default region 'eu-north-1'. - region = os.environ.get("CSFY_AWS_DEFAULT_REGION", "eu-north-1") - # TODO(Juraj): hack applied until a solution for #CmTask6620 is found. - if region == "ap-northeast-1": - _S3_BUCKET_BY_STAGE["preprod"] = "cryptokaizen-data-tokyo.preprod" - # Ensure the provided stage is valid. - hdbg.dassert_in(stage, _S3_BUCKET_BY_STAGE) - s3_bucket = _S3_BUCKET_BY_STAGE[stage] - # Append the suffix to the bucket name if provided. - if add_suffix: - s3_bucket = os.path.join(s3_bucket, add_suffix) - return s3_bucket diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py deleted file mode 100644 index f86f50342..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py +++ /dev/null @@ -1,233 +0,0 @@ -""" -Import as: - -import helpers.hsecrets as hsecret -""" - -import atexit -import json -import sys -import warnings -from typing import Any, Dict, Optional - -from botocore.client import BaseClient -from botocore.exceptions import ClientError - -import helpers.hdbg as hdbg - - -def get_secrets_client(aws_profile: str) -> BaseClient: - """ - Return client to work with AWS Secrets Manager in the specified region. - """ - import helpers.haws as haws - - session = haws.get_session(aws_profile) - client = session.client(service_name="secretsmanager") - return client - - -def _get_flag_value(flag: str) -> str: - """ - Return flag value with concatenated date string. - - E.g., for flag = 'pytest' return 'pytest_20240619'. - """ - # Import here to avoid import extra dependencies in the thin environment. - import helpers.hdatetime as hdateti - - timestamp = hdateti.get_current_date_as_string("naive_ET") - updated_flag = "_".join([flag, timestamp]) - return updated_flag - - -def update_usedby( - secret_name: str, - secret_value: Dict[str, Any], - usedBy: str, - *, - remove: bool = False, -) -> Dict[str, Any]: - """ - Update the value of `usedBy` attribute from `secret_value` in AWS secrets - manager to lock the key. Unlock the key at the end of process using default - value of `usedBy`. - - :param secret_name: SecretId of record to be updated. - :param secret_value: Current value of SecretString. - :param usedBy: value of `usedBy` to be updated. Used to remove from - list on deallocation of resource, i.e., when remove is True. - :param remove: Boolean to decide addition or removal of `usedBy` value - in the secret value list of scripts. Default is False. - :return secret_value: SecretString with updated `usedBy` script. - """ - hdbg.dassert_isinstance(secret_name, str) - aws_profile = "ck" - client = get_secrets_client(aws_profile) - # Modify value of used by in secret value. - if not remove: - try: - secret_value["usedBy"].append(usedBy) - except KeyError: - secret_value["usedBy"] = [usedBy] - else: - secret_value["usedBy"].remove(usedBy) - # Update the modified secret value in AWS secret manager. - client.update_secret( - SecretId=secret_name, SecretString=json.dumps(secret_value) - ) - return secret_value - - -def lock_secret( - secret_name: str, secret_value: Dict[str, Any] -) -> Optional[Dict[str, Any]]: - """ - Lock access to a secret to the current script. - - Lock access to secret key with trading keyword in `secret_name`, for a - runtime instance of a script, to avoid parallel run. - Add the script name to `usedBy` list in the AWS secret manager. - Raise error if the same script tries to access a locked key. - - :param secret_name: SecretId of record to be updated. - :param secret_value: Current value of SecretString. - :return secret_value: SecretString with updated `usedBy` script if not - already locked. - """ - current_script = sys.argv[0].split("/")[-1] - # Check if the current script is already using this secret. - current_usedBy = list( - filter(lambda x: current_script in x, secret_value.get("usedBy", [])) - ) - # Check current value of usedBy to determine further action. - if not current_usedBy: - # Fetch and update value of usedBy if not locked. - usedBy = _get_flag_value(current_script) - secret_value = update_usedby(secret_name, secret_value, usedBy) - # Release secret key lock on termination. - atexit.register( - update_usedby, secret_name, secret_value, usedBy, remove=True - ) - else: - # Raise warning of locked resource with current use info. - # raise RuntimeError() - warnings.warn( - f"Secret key is already in use by {current_usedBy[0]}", - RuntimeWarning, - ) - return secret_value - - -# TODO(Juraj): add support to access secrets for different profiles, not important rn -def get_secret(secret_name: str) -> Optional[Dict[str, Any]]: - """ - Fetch secret values(s) from AWS secrets manager. - - :return a dictionary of key-value pairs. E.g., `get_secret('binance')` returns - ``` - { - 'apiKey': '', - 'secret': '' - } - ``` - """ - # TODO(Juraj): This assertion can't be applied universally. - # Check if the secret name format is valid. - # dassert_valid_secret(secret_name) - hdbg.dassert_isinstance(secret_name, str) - # Create a AWS Secrets Manager client. - aws_profile = "ck" - client = get_secrets_client(aws_profile) - # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html - # for the full list of exceptions. - # Define access key to check the entity requesting for secret key. - access_key = "trading" - try: - get_secret_value_response = client.get_secret_value(SecretId=secret_name) - secret_string = get_secret_value_response["SecretString"] - hdbg.dassert_isinstance(secret_string, str) - secret_val = json.loads(secret_string) - # Check access entity value to lock secret key to avoid parallel run. - if access_key in secret_name: - # TODO(Juraj): Temporarily disabled in #Cmtask10068. - # secret_val = lock_secret(secret_name, secret_val) - pass - except ClientError as e: - if e.response["Error"]["Code"] == "ResourceNotFoundException": - # Let user know the secret does not exist. - raise ValueError(f"No such secret: {secret_name}") from e - # If not yet implemented handler then just re-raise. - raise e - return secret_val - - -# TODO(Juraj): add support to store secrets in different regions, not important rn. -def store_secret( - secret_name: str, secret_value: Dict[str, str], *, description: str = "" -) -> Optional[bool]: - """ - Store secret values(s) into AWS secrets manager, specify secret as a dict - of key-value pairs. - - :return: bool representing whether writing was successful or not - """ - hdbg.dassert_isinstance(secret_name, str) - # Create a AWS Secrets Manager client. - aws_profile = "ck" - client = get_secrets_client(aws_profile) - # See - # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_CreateSecret.html - # for the full list of exceptions. - try: - create_secret_value_response = client.create_secret( - Name=secret_name, - Description=description, - SecretString=json.dumps(secret_value), - ) - # If no exception was thrown and we get back the name we passed in the - # response then the secret was stored successfully. - return_name = create_secret_value_response["Name"] - hdbg.dassert_isinstance(return_name, str) - res: bool = create_secret_value_response["Name"] == secret_name - return res - except ClientError as e: - if e.response["Error"]["Code"] == "ResourceExistsException": - # Let user know the secret with this name already exists. - raise ValueError( - "Secret with this name already exists:", secret_name - ) from e - # If not yet implemented handler then just re-raise. - raise e - # If we did not return inside try block then something went wrong. - return False - - -# TODO(Juraj): this might be deprecated since this is only fit for exchange API keys -def dassert_valid_secret(secret_id: str) -> None: - """ - Enforce that the valid format is `exchange_id.stage.account_type.num`. - """ - values = secret_id.split(".") - hdbg.dassert_eq(len(values), 4) - hdbg.dassert_in( - values[0], - [ - "binance", - "bitfinex", - "coinbase", - "coinbaseprime", - "coinbasepro", - "ftx", - "gateio", - "huobi", - "kraken", - "kucoin", - "test", - ], - ) - hdbg.dassert_in(values[1], ["local", "preprod"]) - hdbg.dassert_in(values[2], ["trading", "sandbox"]) - hdbg.dassert( - values[3].isnumeric(), "values[3] should be numeric, got: %s", values[3] - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py deleted file mode 100644 index 5aa297e5d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py +++ /dev/null @@ -1,1160 +0,0 @@ -""" -Identify on which server we are running. - -Import as: - -import helpers.hserver as hserver -""" - -import functools -import logging -import os -import shutil -import subprocess -from typing import Dict, List, Optional, Tuple - -# This module should depend only on: -# - Python standard modules -# See `helpers/dependencies.txt` for more details - -_LOG = logging.getLogger(__name__) - -_WARNING = "\033[33mWARNING\033[0m" - - -def _print(msg: str) -> None: - _ = msg - # _LOG.info(msg) - if False: - print(msg) - - -# Copied from hprint to avoid import cycles. -def _indent(txt: str, *, num_spaces: int = 2) -> str: - """ - Add `num_spaces` spaces before each line of the passed string. - """ - spaces = " " * num_spaces - txt_out = [] - for curr_line in txt.split("\n"): - if curr_line.lstrip().rstrip() == "": - # Do not prepend any space to a line with only white characters. - txt_out.append("") - continue - txt_out.append(spaces + curr_line) - res = "\n".join(txt_out) - return res - - -# We can't use `hsystem` to avoid import cycles. -def _system_to_string(cmd: str) -> Tuple[int, str]: - """ - Run a command and return the output and the return code. - - :param cmd: command to run - :return: tuple of (return code, output) - """ - result = subprocess.run( - cmd, - stdout=subprocess.PIPE, - # Redirect stderr to stdout. - stderr=subprocess.STDOUT, - shell=True, - text=True, - ) - rc = result.returncode - output = result.stdout - output = output.strip() - return rc, output - - -# ############################################################################# -# Host -# ############################################################################# - - -# We can't rely only on the name / version of the host to infer where we are -# running, since inside Docker the name of the host is like `01a7e34a82a5`. Of -# course, there is no way to know anything about the host for security reason, -# so we pass this value from the external environment to the container, through -# env vars (e.g., `CSFY_HOST_NAME`, `CSFY_HOST_OS_NAME`, `CSFY_HOST_OS_VERSION`). - - -# Sometimes we want to know if: -# - The processor is x86_64 or arm64 -# - The host is Mac or Linux -# - We are running on a Causify machine or on an external machine -# - We are inside CI or not -# TODO(gp): Grep all the use cases in the codebase and use the right function. - - -def get_host_user_name() -> Optional[str]: - """ - Return the name of the user running the host. - """ - return os.environ.get("CSFY_HOST_USER_NAME", None) - - -def get_dev_csfy_host_names() -> Tuple[str]: - """ - Return the names of the Causify dev servers. - """ - host_names = ("dev1", "dev2", "dev3") - return list(host_names) - - -# TODO(gp): -> is_inside_docker_container() -def is_inside_docker() -> bool: - """ - Return whether we are inside a container or not. - """ - # From https://stackoverflow.com/questions/23513045 - ret = os.path.exists("/.dockerenv") - return ret - - -def _get_host_name() -> str: - """ - Return the name of the host (not the machine) on which we are running. - - If we are inside a Docker container, we use the name of the host passed - through the `CSFY_HOST_NAME` env var. - """ - if is_inside_docker(): - host_name = os.environ["CSFY_HOST_NAME"] - else: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws' - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' - # machine='x86_64' - host_name = os.uname()[1] - _LOG.debug("host_name=%s", host_name) - return host_name - - -def _get_host_os_name() -> str: - """ - Return the name of the OS on which we are running (e.g., "Linux", - "Darwin"). - - If we are inside a Docker container, we use the name of the OS passed - through the `CSFY_HOST_OS_NAME` env var. - """ - if is_inside_docker(): - host_os_name = os.environ["CSFY_HOST_OS_NAME"] - else: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws' - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' - # machine='x86_64' - host_os_name = os.uname()[0] - _LOG.debug("host_os_name=%s", host_os_name) - return host_os_name - - -def _get_host_os_version() -> str: - """ - Return the version of the OS on which we are running. - - If we are inside a Docker container, we use the version of the OS passed - through the `CSFY_HOST_OS_VERSION` env var. - """ - if is_inside_docker(): - host_os_version = os.environ["CSFY_HOST_OS_VERSION"] - else: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws' - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' - # machine='x86_64' - host_os_version = os.uname()[2] - _LOG.debug("host_os_version=%s", host_os_version) - return host_os_version - - -def is_host_csfy_server() -> bool: - """ - Return whether we are running on a Causify dev server. - """ - host_name = _get_host_name() - ret = host_name in get_dev_csfy_host_names() - return ret - - -_MAC_OS_VERSION_MAPPING = { - "Catalina": "19.", - "Monterey": "21.", - "Ventura": "22.", - "Sequoia": "24.", -} - - -def get_host_mac_version() -> str: - """ - Get the macOS version (e.g., "Catalina", "Monterey", "Ventura"). - """ - host_os_version = _get_host_os_version() - for version, tag in _MAC_OS_VERSION_MAPPING.items(): - if tag in host_os_version: - return version - raise ValueError(f"Invalid host_os_version='{host_os_version}'") - - -def is_host_mac_version(version: str) -> bool: - """ - Return whether we are running on a Mac with a specific version (e.g., - "Catalina", "Monterey", "Ventura"). - """ - assert version in _MAC_OS_VERSION_MAPPING, f"Invalid version='{version}'" - host_mac_version = get_host_mac_version() - ret = version.lower() == host_mac_version.lower() - return ret - - -def is_host_gp_mac() -> bool: - """ - Return whether we are running on a Mac owned by GP. - - This is used to check if we can use a specific feature before - releasing it to all the users. - """ - host_name = _get_host_name() - ret = host_name.startswith("gpmac.") - return ret - - -# ############################################################################# -# Detect server. -# ############################################################################# - - -def is_inside_ci() -> bool: - """ - Return whether we are running inside the Continuous Integration flow. - """ - if "CSFY_CI" not in os.environ: - ret = False - else: - ret = os.environ["CSFY_CI"] != "" - return ret - - -def is_inside_unit_test() -> bool: - """ - Return whether we are running code insider the regressions. - """ - ret = "PYTEST_CURRENT_TEST" in os.environ - return ret - - -# TODO(gp): Remove! -def is_dev_csfy() -> bool: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws', - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025', - # machine='x86_64' - host_name = os.uname()[1] - host_names = ("dev1", "dev2", "dev3") - csfy_host_name = os.environ.get("CSFY_HOST_NAME", "") - _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) - is_dev_csfy_ = host_name in host_names or csfy_host_name in host_names - return is_dev_csfy_ - - -# TODO(gp): This is obsolete and should be removed. -def is_dev4() -> bool: - """ - Return whether it's running on dev4. - """ - host_name = os.uname()[1] - csfy_host_name = os.environ.get("CSFY_HOST_NAME", None) - dev4 = "cf-spm-dev4" - _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) - is_dev4_ = dev4 in (host_name, csfy_host_name) - # - if not is_dev4_: - dev4 = "cf-spm-dev8" - _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) - is_dev4_ = dev4 in (host_name, csfy_host_name) - return is_dev4_ - - -def is_host_mac(*, version: Optional[str] = None) -> bool: - """ - Return whether we are running on macOS and, optionally, on a specific - version. - - :param version: check whether we are running on a certain macOS version (e.g., - `Catalina`, `Monterey`) - """ - _LOG.debug("version=%s", version) - host_os_name = os.uname()[0] - _LOG.debug("os.uname()=%s", str(os.uname())) - csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) - _LOG.debug( - "host_os_name=%s csfy_host_os_name=%s", host_os_name, csfy_host_os_name - ) - is_mac_ = host_os_name == "Darwin" or csfy_host_os_name == "Darwin" - if version is None: - # The user didn't request a specific version, so we return whether we - # are running on a Mac or not. - _LOG.debug("is_mac_=%s", is_mac_) - return is_mac_ - else: - # The user specified a version: if we are not running on a Mac then we - # return False, since we don't even have to check the macOS version. - if not is_mac_: - _LOG.debug("is_mac_=%s", is_mac_) - return False - # Check the macOS version we are running. - if version == "Catalina": - # Darwin gpmac.local 19.6.0 Darwin Kernel Version 19.6.0: - # root:xnu-6153.141.2~1/RELEASE_X86_64 x86_64 - macos_tag = "19.6" - elif version == "Monterey": - # Darwin alpha.local 21.5.0 Darwin Kernel Version 21.5.0: - # root:xnu-8020.121.3~4/RELEASE_ARM64_T6000 arm64 - macos_tag = "21." - elif version == "Ventura": - macos_tag = "22." - elif version == "Sequoia": - # Darwin gpmac.local 24.4.0 Darwin Kernel Version 24.4.0: - # root:xnu-11417.101.15~1/RELEASE_ARM64_T8112 arm64 - macos_tag = "24." - else: - raise ValueError(f"Invalid version='{version}'") - _LOG.debug("macos_tag=%s", macos_tag) - host_os_version = os.uname()[2] - # 'Darwin Kernel Version 19.6.0: Mon Aug 31 22:12:52 PDT 2020; - # root:xnu-6153.141.2~1/RELEASE_X86_64' - csfy_host_os_version = os.environ.get("CSFY_HOST_VERSION", "") - _LOG.debug( - "host_os_version=%s csfy_host_os_version=%s", - host_os_version, - csfy_host_os_version, - ) - is_mac_ = macos_tag in host_os_version or macos_tag in csfy_host_os_version - _LOG.debug("is_mac_=%s", is_mac_) - return is_mac_ - - -def is_prod_csfy() -> bool: - """ - Detect whether we are running in a Causify production container. - - This env var is set inside `devops/docker_build/prod.Dockerfile`. - """ - # TODO(gp): CK -> CSFY - return bool(os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", False)) - - -# TODO(gp): Obsolete. -def is_ig_prod() -> bool: - """ - Detect whether we are running in an IG production container. - - This env var is set inside `//lime/devops_cf/setenv.sh` - """ - # CF sets up `DOCKER_BUILD` so we can use it to determine if we are inside - # a CF container or not. - # print("os.environ\n", str(os.environ)) - return bool(os.environ.get("DOCKER_BUILD", False)) - - -# TODO(Grisha): consider adding to `setup_to_str()`. -def is_inside_ecs_container() -> bool: - """ - Detect whether we are running in an ECS container. - """ - # When deploying jobs via ECS the container obtains credentials based - # on passed task role specified in the ECS task-definition, refer to: - # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html - ret = "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" in os.environ - return ret - - -# ############################################################################# - - -def is_external_linux() -> bool: - """ - Detect whether we are running on a non-server/non-CI Linux machine. - - This returns true when we run on the machine of an intern, or a non- - CSFY contributor. - """ - if is_host_csfy_server() or is_inside_ci(): - # Dev servers and CI are not external Linux systems. - ret = False - else: - # We need to check if the host is Linux. - host_os_name = _get_host_os_name() - ret = host_os_name == "Linux" - return ret - - -def is_external_dev() -> bool: - """ - Detect whether we are running on an system outside of Causify. - - E.g., a Linux / Mac contributor's laptop, an intern's laptop, a non- - CSFY machine. - """ - ret = is_host_mac() or is_external_linux() - return ret - - -# ############################################################################# -# Set up consistency. -# ############################################################################# - - -# TODO(gp): Update this. -def _get_setup_signature() -> str: - """ - Dump all the variables that are used to make a decision about the values of - the functions in `_get_setup_settings()`. - - This function is used to mock the state of the system for testing - purposes. - """ - cmds = [] - # is_prod_csfy() - cmds.append('os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", "*undef*")') - # is_dev4() - # is_dev_csfy() - # is_ig_prod() - cmds.append('os.environ.get("CSFY_HOST_NAME", "*undef*")') - # is_inside_ci() - cmds.append('os.environ.get("CSFY_CI", "*undef*")') - # is_mac() - cmds.append("os.uname()[0]") - cmds.append("os.uname()[2]") - # is_external_linux() - cmds.append('os.environ.get("CSFY_HOST_OS_NAME", "*undef*")') - # Build an array of strings with the results of executing the commands. - results = [] - for cmd in cmds: - result_tmp = cmd + "=" + str(eval(cmd)) - results.append(result_tmp) - # Join the results into a single string. - result = "\n".join(results) - return result - - -# The valid set ups are: -# - Running on a Causify server (e.g., `dev1`, `dev2`, `dev3`) -# - Container -# - Host -# - External Mac (GP, Paul, interns, contributors) -# - Container -# - Host -# - External Linux (interns, contributors) -# - Container -# - Host -# - Prod container on Linux -# - Container -# - CI -# - Container - - -def is_inside_docker_container_on_csfy_server() -> bool: - """ - Return whether we are running on a Docker container on a Causify server. - """ - ret = is_inside_docker() and is_host_csfy_server() - return ret - - -def is_outside_docker_container_on_csfy_server() -> bool: - """ - Return whether we are running outside a Docker container on a Causify - server. - """ - ret = not is_inside_docker() and is_host_csfy_server() - return ret - - -def is_inside_docker_container_on_host_mac() -> bool: - """ - Return whether we are running on a Docker container on a Mac host. - """ - ret = is_inside_docker() and is_host_mac() - return ret - - -def is_outside_docker_container_on_host_mac() -> bool: - """ - Return whether we are running outside of a Docker container on a Mac host. - """ - ret = not is_inside_docker() and is_host_mac() - return ret - - -def is_inside_docker_container_on_external_linux() -> bool: - """ - Return whether we are running on a Docker container on an external Linux. - """ - ret = is_inside_docker() and is_external_linux() - return ret - - -def is_outside_docker_container_on_external_linux() -> bool: - """ - Return whether we are outside of a Docker container on an external Linux. - """ - ret = not is_inside_docker() and is_external_linux() - return ret - - -def _get_setup_settings() -> List[Tuple[str, bool]]: - """ - Return a list of tuples with the name and value of the current server - setup. - - E.g., - ```bash - is_inside_docker_container_on_csfy_server=True - is_outside_docker_container_on_csfy_server=False - is_inside_docker_container_on_host_mac=False - is_outside_docker_container_on_host_mac=True - is_inside_docker_container_on_external_linux=False - is_outside_docker_container_on_external_linux=True - is_dev4=False - is_ig_prod=False - is_prod_csfy=False - is_inside_ci=False - ``` - """ - func_names = [ - "is_inside_docker_container_on_csfy_server", - "is_outside_docker_container_on_csfy_server", - # - "is_inside_docker_container_on_host_mac", - "is_outside_docker_container_on_host_mac", - # - "is_inside_docker_container_on_external_linux", - "is_outside_docker_container_on_external_linux", - # - "is_dev4", - "is_ig_prod", - "is_prod_csfy", - "is_inside_ci", - ] - # Store function name / value pairs as tuples. - setups = [] - for func_name in func_names: - val = eval(f"{func_name}()") - setups.append((func_name, val)) - return setups - - -def _setup_to_str(setups: List[Tuple[str, bool]]) -> str: - """ - Return a string representation of the current server setup configuration. - - :return: string with each setting on a new line, aligned with - padding - """ - # Find maximum length of setting names. - max_len = max(len(name) for name, _ in setups) + 1 - # Format each line with computed padding. - txt = [] - for name, value in setups: - txt.append(f"{name:<{max_len}}{value}") - return "\n".join(txt) - - -def _dassert_setup_consistency() -> None: - """ - Check that one and only one setup configuration is true. - - This is used to ensure that the setup configuration is one of the - expected ones and uniquely defined. - """ - setups = _get_setup_settings() - # One and only one set-up should be true. - sum_ = sum([value for _, value in setups]) - if sum_ != 1: - msg = "One and only one set-up config should be true:\n" - msg += _setup_to_str(setups) + "\n" - msg += "_get_setup_signature() returns:\n" - msg += _indent(_get_setup_signature()) - raise ValueError(msg) - - -# If the env var is not defined then we want to check. The only reason to skip -# it's if the env var is defined and equal to False. -check_repo = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") != "False" -_is_called = False -if check_repo: - # The repo check is executed at import time, before the logger is initialized. - # To debug the repo check, enable the following block. - if False: - import helpers.hdbg as hdbg - - hdbg.init_logger(verbosity=logging.DEBUG) - # Compute and cache the result. - if not _is_called: - _dassert_setup_consistency() - _is_called = True -else: - _LOG.warning("Skipping repo check in %s", __file__) - - -# ############################################################################# -# Detect Docker functionalities. -# ############################################################################# - - -# Each function below should run without asserting. E.g., when we check if -# docker supports privileged mode, we should check if `docker` is available, -# and then if docker supports privileged mode, instead of asserting if `docker` -# doesn't exist on the system. - - -@functools.lru_cache() -def has_docker() -> bool: - """ - Return whether we have Docker installed. - """ - return shutil.which("docker") is not None - - -@functools.lru_cache() -def docker_needs_sudo() -> bool: - """ - Return whether Docker commands need to be run with sudo. - """ - if not has_docker(): - return False - # This check is required to ensure it does not cause issues when running on ECS - # Fargate through Airflow, since ECS Fargate does not support either DinD - # or sibling containers. - # See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/fargate-security-considerations.html - # TODO(heanh): Check if we can use `is_inside_ecs_container()` to check if - # we are inside Airflow. - if not has_dind_support() and not use_docker_sibling_containers(): - return False - # Another way to check is to see if your user is in the docker group: - # > groups | grep docker - rc = os.system("docker run hello-world 2>&1 >/dev/null") - if rc == 0: - return False - # - rc = os.system("sudo docker run hello-world 2>&1 >/dev/null") - if rc == 0: - return True - assert False, "Failed to run docker" - - -def get_docker_executable() -> str: - """ - Return the docker executable, wrapper with `sudo` if needed. - """ - docker_needs_sudo_ = docker_needs_sudo() - executable = "docker" - if docker_needs_sudo_: - executable = "sudo " + executable - return executable - - -@functools.lru_cache() -def has_docker_privileged_mode() -> bool: - """ - Return whether the current container supports privileged mode. - - Docker privileged mode gives containers nearly all the same capabilities as - the host system's kernel. - - Privileged mode allows to: - - run Docker-in-Docker - - mount filesystems - """ - if not has_docker(): - return False - docker_executable = get_docker_executable() - cmd = f"{docker_executable} run --privileged hello-world 2>&1 >/dev/null" - rc = os.system(cmd) - _print(f"cmd={cmd} -> rc={rc}") - has_privileged_mode = rc == 0 - return has_privileged_mode - - -def has_docker_sibling_containers_support() -> bool: - """ - Return whether the current container supports running sibling containers. - """ - # We need to be inside a container to run sibling containers. - if not is_inside_docker(): - return False - # We assume that if the socket exists then we can run sibling containers. - if os.path.exists("/var/run/docker.sock"): - return True - return False - - -def has_docker_children_containers_support() -> bool: - """ - Return whether the current container supports Docker-in-Docker. - """ - # We need to be inside a container to run docker-in-docker. - if not is_inside_docker(): - return False - # We assume that if we have privileged mode then we can run docker-in-docker. - return has_docker_privileged_mode() - - -def is_csfy_dind_enabled() -> bool: - """ - Return whether `CSFY_ENABLE_DIND` is enabled (e.g. users opt-in to use - Docker-in-Docker). - """ - val = os.environ.get("CSFY_ENABLE_DIND", "0") - return val == "1" or val.lower() in ("true", "yes") - - -def can_run_docker_from_docker() -> bool: - """ - Return whether we can run docker from docker, either as children or sibling - container. - """ - return ( - has_docker_children_containers_support() - or has_docker_sibling_containers_support() - ) - - -def get_docker_info() -> str: - txt_tmp: List[str] = [] - # - has_docker_ = has_docker() - txt_tmp.append(f"has_docker={has_docker_}") - # - cmd = r"docker version --format '{{.Server.Version}}'" - _, docker_version = _system_to_string(cmd) - txt_tmp.append(f"docker_version='{docker_version}'") - # - docker_needs_sudo_ = docker_needs_sudo() - txt_tmp.append(f"docker_needs_sudo={docker_needs_sudo_}") - # - has_privileged_mode_ = has_docker_privileged_mode() - txt_tmp.append(f"has_privileged_mode={has_privileged_mode_}") - # - is_inside_docker_ = is_inside_docker() - txt_tmp.append(f"is_inside_docker={is_inside_docker_}") - # - if is_inside_docker_: - has_docker_sibling_containers_support_ = ( - has_docker_sibling_containers_support() - ) - has_docker_children_containers_support_ = ( - has_docker_children_containers_support() - ) - else: - has_docker_sibling_containers_support_ = "*undef*" - has_docker_children_containers_support_ = "*undef*" - txt_tmp.append( - f"has_docker_sibling_containers_support={has_docker_sibling_containers_support_}" - ) - txt_tmp.append( - f"has_docker_children_containers_support={has_docker_children_containers_support_}" - ) - # Format as title with indented items. - txt = "Docker info" + "\n" + _indent("\n".join(txt_tmp)) - return txt - - -def _is_mac_version_with_sibling_containers() -> bool: - if not is_host_mac(): - return False - mac_version = get_host_mac_version() - return mac_version in ("Monterey", "Ventura", "Sequoia") - - -# ############################################################################# -# Detect Docker functionalities, based on the set-up. -# ############################################################################# - - -# TODO(gp): These approach is sub-optimal. We deduce what we can do based on the -# name of the set-up. We should base our decisions on the actual capabilities of -# the system. - - -# TODO(gp): -> has_docker_privileged_mode -@functools.lru_cache() -def has_dind_support() -> bool: - """ - Return whether the current container supports privileged mode. - - This is needed to use Docker-in-Docker. - """ - _print(f"is_inside_docker()={is_inside_docker()}") - if not is_inside_docker(): - # Outside Docker there is no privileged mode. - _print("-> ret = False") - return False - # TODO(gp): Not sure this is really needed since we do this check - # after enable_privileged_mode controls if we have dind or not. - if _is_mac_version_with_sibling_containers(): - return False - # TODO(gp): This part is not multi-process friendly. When multiple - # processes try to run this code they interfere. A solution is to run `ip - # link` in the entrypoint and create a `has_docker_privileged_mode` file - # which contains the value. - # We rely on the approach from https://stackoverflow.com/questions/32144575 - # to check if there is support for privileged mode. - # Sometimes there is some state left, so we need to clean it up. - # TODO(Juraj): this is slow and inefficient, but works for now. - cmd = "sudo docker run hello-world" - rc = os.system(cmd) - _print(f"cmd={cmd} -> rc={rc}") - has_dind = rc == 0 - # dind is supported on both Mac and GH Actions. - # TODO(Juraj): HelpersTask16. - # if check_repo: - # if hserver.is_inside_ci(): - # # Docker-in-docker is needed for GH actions. For all other builds is optional. - # assert has_dind, ( - # f"Expected privileged mode: has_dind={has_dind}\n" - # + hserver.setup_to_str() - # ) - # else: - # only_warning = True - # _raise_invalid_host(only_warning) - # return False - # else: - # csfy_repo_config = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") - # print( - # _WARNING - # + ": Skip checking since CSFY_REPO_CONFIG_CHECK=" - # + f"'{csfy_repo_config}'" - # ) - return has_dind - - -def _raise_invalid_host(only_warning: bool) -> None: - host_os_name = os.uname()[0] - am_host_os_name = os.environ.get("AM_HOST_OS_NAME", None) - msg = ( - f"Don't recognize host: host_os_name={host_os_name}, " - f"am_host_os_name={am_host_os_name}" - ) - if only_warning: - _LOG.warning(msg) - else: - raise ValueError(msg) - - -# TODO(gp): -> use_docker_in_docker_support -def enable_privileged_mode() -> bool: - """ - Return whether a host supports privileged mode for its containers. - """ - import helpers.repo_config_utils as hrecouti - - repo_name = hrecouti.get_repo_config().get_name() - # TODO(gp): Remove this dependency from a repo. - if repo_name in ("//dev_tools",): - ret = False - else: - # Keep this in alphabetical order. - if is_dev_csfy(): - ret = True - elif is_inside_ci(): - ret = True - elif is_external_linux(): - ret = True - elif is_host_mac(): - mac_version = get_host_mac_version() - if mac_version == "Catalina": - # Docker for macOS Catalina supports dind. - ret = True - elif mac_version in ("Monterey", "Ventura", "Sequoia"): - # Docker doesn't seem to support dind for these versions of macOS. - ret = False - else: - raise ValueError(f"Invalid version='{mac_version}'") - # Docker doesn't seem to support dind for these versions of macOS. - ret = False - elif is_prod_csfy(): - ret = False - else: - ret = False - only_warning = True - _raise_invalid_host(only_warning) - return ret - - -# TODO(gp): -> use_docker_sudo_in_commands -def has_docker_sudo() -> bool: - """ - Return whether Docker commands should be run with `sudo` or not. - """ - # Keep this in alphabetical order. - if is_dev_csfy(): - ret = True - elif is_external_linux(): - ret = True - elif is_inside_ci(): - ret = False - elif is_host_mac(): - # macOS runs Docker with sudo by default. - # TODO(gp): This is not true. - ret = True - elif is_prod_csfy(): - ret = False - else: - ret = False - only_warning = True - _raise_invalid_host(only_warning) - return ret - - -# TODO(gp): -> use_docker_sibling_container_support -def use_docker_sibling_containers() -> bool: - """ - Return whether to use Docker sibling containers. - - Using sibling containers requires that all Docker containers are in - the same network so that they can communicate with each other. - """ - return has_docker_sibling_containers_support() - # if is_dev_csfy(): - # val = True - # else: - # val = is_dev4() or _is_mac_version_with_sibling_containers() - # return val - - -# TODO(gp): -> use_docker_main_network -def use_main_network() -> bool: - # TODO(gp): Replace this. - return use_docker_sibling_containers() - - -# TODO(gp): -> get_docker_shared_data_dir_map -def get_shared_data_dirs() -> Optional[Dict[str, str]]: - """ - Get path of dir storing data shared between different users on the host and - Docker. - - E.g., one can mount a central dir `/data/shared`, shared by multiple - users, on a dir `/shared_data` in Docker. - """ - # TODO(gp): Keep this in alphabetical order. - if is_dev4(): - shared_data_dirs = { - "/local/home/share/cache": "/cache", - "/local/home/share/data": "/data", - } - elif is_dev_csfy(): - shared_data_dirs = { - "/data/shared": "/shared_data", - "/data/shared2": "/shared_data2", - "/data/shared_k8s": "/shared_k8s", - "/data/shared_test": "/shared_test", - } - elif is_external_dev() or is_inside_ci() or is_prod_csfy(): - shared_data_dirs = None - else: - shared_data_dirs = None - only_warning = True - _raise_invalid_host(only_warning) - return shared_data_dirs - - -def use_docker_network_mode_host() -> bool: - # TODO(gp): Not sure this is needed any more, since we typically run in - # bridge mode. - ret = is_host_mac() or is_dev_csfy() - ret = False - if ret: - assert use_docker_sibling_containers() - return ret - - -def use_docker_db_container_name_to_connect() -> bool: - """ - Connect to containers running DBs just using the container name, instead of - using port and localhost / hostname. - """ - if _is_mac_version_with_sibling_containers(): - # New Macs don't seem to see containers unless we connect with them - # directly with their name. - ret = True - else: - ret = False - if ret: - # This implies that we are using Docker sibling containers. - assert use_docker_sibling_containers() - return ret - - -# TODO(gp): This seems redundant with use_docker_sudo_in_commands -def run_docker_as_root() -> bool: - """ - Return whether Docker should be run with root user. - - I.e., adding `--user $(id -u):$(id -g)` to docker compose or not. - """ - # Keep this in alphabetical order. - if is_dev4() or is_ig_prod(): - # //lime runs on a system with Docker remap which assumes we don't - # specify user credentials. - ret = True - elif is_dev_csfy(): - # On dev1 / dev2 we run as users specifying the user / group id as - # outside. - ret = False - elif is_external_linux(): - ret = False - elif is_inside_ci(): - # When running as user in GH action we get an error: - # ``` - # /home/.config/gh/config.yml: permission denied - # ``` - # see https://github.com/alphamatic/amp/issues/1864 - # So we run as root in GH actions. - ret = True - elif is_host_mac(): - ret = False - elif is_prod_csfy(): - ret = False - else: - ret = False - only_warning = True - _raise_invalid_host(only_warning) - return ret - - -# TODO(gp): Probably obsolete -def get_docker_user() -> str: - """ - Return the user that runs Docker, if any. - """ - if is_dev4(): - val = "spm-sasm" - else: - val = "" - return val - - -# TODO(gp): Probably obsolete -def get_docker_shared_group() -> str: - """ - Return the group of the user running Docker, if any. - """ - if is_dev4(): - val = "sasm-fileshare" - else: - val = "" - return val - - -# TODO(gp): -> repo_config.yaml -def skip_submodules_test() -> bool: - """ - Return whether the tests in the submodules should be skipped. - - E.g. while running `i run_fast_tests`. - """ - import helpers.repo_config_utils as hrecouti - - repo_name = hrecouti.get_repo_config().get_name() - # TODO(gp): Why do we want to skip running tests? - # TODO(gp): Remove this dependency from a repo. - if repo_name in ("//dev_tools",): - # Skip running `amp` tests from `dev_tools`. - return True - return False - - -# ############################################################################# -# S3 buckets. -# ############################################################################# - - -def is_AM_S3_available() -> bool: - # AM bucket is always available. - val = True - _LOG.debug("val=%s", val) - return val - - -def is_CK_S3_available() -> bool: - val = True - if is_inside_ci(): - import helpers.repo_config_utils as hrecouti - - repo_name = hrecouti.get_repo_config().get_name() - # TODO(gp): Remove this dependency from a repo. - if repo_name in ("//amp", "//dev_tools"): - # No CK bucket. - val = False - # TODO(gp): We might want to enable CK tests also on lemonade. - if repo_name in ("//lemonade",): - # No CK bucket. - val = False - elif is_dev4(): - # CK bucket is not available on dev4. - val = False - _LOG.debug("val=%s", val) - return val - - -# ############################################################################# -# Functions. -# ############################################################################# - - -def config_func_to_str() -> str: - """ - Print the value of all the config functions. - """ - ret: List[str] = [] - # Get the functions with: - # grep "def " helpers/hserver.py | sort | awk '{ print $2 }' | perl -i -ne 'print "$1\n" if /^([^\(]+)/' - function_names = [ - "enable_privileged_mode", - "get_docker_shared_group", - "get_docker_user", - "get_host_user_name", - "get_shared_data_dirs", - "has_dind_support", - "has_docker_sudo", - "is_AM_S3_available", - "is_CK_S3_available", - "is_csfy_dind_enabled", - "is_dev4", - "is_dev_csfy", - "is_external_linux", - "is_host_mac", - "is_ig_prod", - "is_inside_ci", - "is_inside_docker", - "is_inside_ecs_container", - "is_inside_unit_test", - "is_prod_csfy", - "run_docker_as_root", - "skip_submodules_test", - "use_docker_db_container_name_to_connect", - "use_docker_network_mode_host", - "use_docker_sibling_containers", - "use_main_network", - ] - for func_name in sorted(function_names): - try: - _LOG.debug("func_name=%s", func_name) - func_value = eval(f"{func_name}()") - except NameError: - func_value = "*undef*" - msg = f"{func_name}='{func_value}'" - ret.append(msg) - # Package. - result = "\n".join(ret) - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py deleted file mode 100644 index b960bd8bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py +++ /dev/null @@ -1,204 +0,0 @@ -""" -Import as: - -import helpers.hsftp as hsftp -""" - -import logging -import os -import subprocess -import sys -from io import BytesIO -from typing import List - -import helpers.haws as haws -import helpers.hmodule as hmodule -import helpers.hsecrets as hsecret - -hmodule.install_module_if_not_present("pysftp") - -import pysftp # noqa: E402 - -# Create a logger instance. -_LOG = logging.getLogger(__name__) - - -def install_lftp(): - """ - Install `lftp` using the system package manager. - """ - try: - subprocess.run(["sudo", "apt-get", "update"], check=True) - subprocess.run(["sudo", "apt-get", "install", "-y", "lftp"], check=True) - _LOG.info("`lftp` successfully installed using `apt`.") - except Exception as e: - _LOG.error("Failed to install `lftp`: %s", e) - sys.exit(1) - - -def check_lftp_connection(): - """ - Check if `lftp` is installed. - - If not, install it using the package manager. - """ - try: - # Check if `lftp` is available by trying to run it. - subprocess.run( - ["lftp", "--version"], - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - _LOG.info("`lftp` is already installed.") - except subprocess.CalledProcessError: - _LOG.error("Error occurred while checking `lftp` version.") - sys.exit(1) - except FileNotFoundError: - _LOG.warning("`lftp` is not installed. Attempting to install it...") - install_lftp() - - -def download_file_using_lftp( - remote_data_path: str, save_path: str, hostname: str, secret_name: str -) -> None: - """ - Download files from a remote SFTP server using `lftp` and a private SSH - key. - - :param remote_data_path: path to the remote directory on the SFTP - server from which files should be downloaded. - :param save_path: local directory where the downloaded files will be - saved. - :param hostname: hostname of the SFTP server. - :param secret_name: Name of the secret in AWS Secrets Manager that - stores the SFTP credentials, including the username and private - key. - :return: None. - """ - # Fetch the private key from AWS Secrets Manager - secret_dict = hsecret.get_secret(secret_name) - username = secret_dict["username"] - private_key = secret_dict["private_key"] - # Write the private key to a temporary file - with open("/tmp/temp_key.pem", "w") as temp_key_file: - temp_key_file.write(private_key) - # Ensure the key file has the correct permissions - os.chmod("/tmp/temp_key.pem", 0o600) - private_key_path = "/tmp/temp_key.pem" - # Construct the lftp command. - # The 'set sftp:connect-program' allows specifying custom SSH options for the SFTP connection. - # -o GSSAPIAuthentication=no: Disables GSSAPI to avoid unnecessary authentication mechanisms. - # -o StrictHostKeyChecking=no: Bypasses the host key verification prompt for new hosts. - # -a: Enables SSH agent forwarding for more seamless authentication. - # -x: Disables X11 forwarding (not needed for file transfer). - # -i {private_key_path}: Specifies the private key for SSH authentication. - # 'mirror --parallel=10': Downloads files from the remote server, with 10 parallel downloads to speed up the process. - lftp_cmd = ( - f"lftp -u {username}, -e \"set sftp:connect-program 'ssh -o GSSAPIAuthentication=no " - f"-o StrictHostKeyChecking=no -a -x -i {private_key_path}'; " - f'mirror --parallel=10 {remote_data_path} {save_path}; quit" ' - f"sftp://{hostname}" - ) - try: - _LOG.info("Executing lftp command: %s", lftp_cmd) - subprocess.run( - lftp_cmd, - shell=True, - check=True, - capture_output=True, - text=True, - ) - except subprocess.CalledProcessError as e: - _LOG.error( - "lftp command failed with error: %s", - e.stderr, - ) - - -def get_sftp_connection(hostname: str, secret_name: str) -> pysftp.Connection: - """ - Return SFTP connection object using a private key stored in AWS Secrets - Manager. - - :param hostname: hostname of the SFTP server. - :param secret_name: name of the secret in AWS Secrets Manager - containing the private key. - :return: active SFTP connection object. - """ - # Fetch the private key from AWS Secrets Manager - secret_dict = hsecret.get_secret(secret_name) - username = secret_dict["username"] - private_key = secret_dict["private_key"] - # Write the private key to a temporary file - with open("/tmp/temp_key.pem", "w") as temp_key_file: - temp_key_file.write(private_key) - # Ensure the key file has the correct permissions - os.chmod("/tmp/temp_key.pem", 0o600) - # Ensure pysftp is installed before attempting connection. - cnopts = pysftp.CnOpts() - # Disable host key checking. - cnopts.hostkeys = None - sftp = pysftp.Connection( - hostname, - username=username, - private_key="/tmp/temp_key.pem", - cnopts=cnopts, - ) - # Remove the temporary key file after establishing the connection - os.remove("/tmp/temp_key.pem") - return sftp - - -def download_file_to_s3( - sftp: pysftp.Connection, - s3_client: haws.BaseClient, - remote_dir: str, - filename: str, - s3_bucket: str, - s3_prefix: str, -) -> None: - """ - Download data from an SFTP server and upload it to an S3 bucket. - - :param sftp: An active SFTP Connection object. - :param s3_client: An AWS Base client object to interact with S3. - :param remote_dir: The directory on the SFTP server where the file - is located. - :param filename: The name of the file to download from the SFTP - server. - :param s3_bucket: The name of the S3 bucket to upload the file to. - :param s3_prefix: The prefix (path) in the S3 bucket where the file - will be stored. - :return: None. - """ - remote_path = f"{remote_dir}/{filename}" - s3_key = f"{s3_prefix}/{filename}" - with sftp.open(remote_path) as file_obj: - # Download data from sftp server. - file_data = file_obj.read() - try: - # Upload data to S3. - s3_client.upload_fileobj(BytesIO(file_data), s3_bucket, s3_key) - _LOG.info( - "Uploaded: %s to s3://%s/%s", remote_path, s3_bucket, s3_key - ) - except Exception as e: - _LOG.error("Failed to upload file to S3. Error: %s", str(e)) - raise e - - -def get_file_names(sftp: pysftp.Connection, sftp_remote_dir: str) -> List[str]: - """ - Retrieve all file names from a specified directory on a remote SFTP server. - - :param sftp: An active SFTP Connection object. - :param sftp_remote_dir: The directory on the SFTP server from which - to list file names. - :return: A list of file names present in the specified directory on - the SFTP server. - """ - file_names = [] - for item in sftp.listdir_attr(sftp_remote_dir): - file_names.append(item.filename) - return file_names diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py deleted file mode 100644 index 41c4cf571..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -Slack notification utilities for sending messages to Slack channels. - -Import as: - -import helpers.hslack as hslack -""" - -import logging -import os -from typing import Optional - -import requests - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# SlackNotifier -# ############################################################################# - - -class SlackNotifier: - """ - Send notifications to Slack channels using bot tokens. - """ - - def __init__(self, bot_token: Optional[str] = None) -> None: - """ - Initialize Slack notifier. - - :param bot_token: Slack bot token (starts with 'xoxb-') - """ - self.bot_token = bot_token or os.environ.get("SLACK_BOT_TOKEN") - if not self.bot_token: - raise ValueError( - "No bot token provided via parameter or SLACK_BOT_TOKEN env var" - ) - - def send_message( - self, - channel: str, - message: str, - ) -> None: - """ - Send a message to a Slack channel. - - :param channel: Slack channel ID (e.g., 'C1234567890') or - channel name (e.g., '#notifications') - :param message: Message text to send - """ - URL = "https://slack.com/api/chat.postMessage" - headers = { - "Authorization": f"Bearer {self.bot_token}", - "Content-Type": "application/json", - } - payload = { - "channel": channel, - "text": message, - } - response = requests.post(URL, headers=headers, json=payload, timeout=30) - response.raise_for_status() - result = response.json() - if not result.get("ok"): - raise ValueError(f"Slack API error: {result.get('error')}") - _LOG.info("Message sent successfully to %s", channel) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py deleted file mode 100644 index 4c3f6a748..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Import as: - -import helpers.hsql as hsql -""" - -import helpers.hmodule as hmodule - -# The problem here is that part of the code base end up including `hsql` which -# requires `psycopg2` even though it's not called at run-time. -# To simplify the dependency management we include the code of `hsql` only if -# `psycopg2` is present. If not, we just create a stub for the needed type hints. -if hmodule.has_module("psycopg2"): - from helpers.hsql_implementation import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import - -else: - from typing import Any, List - - DbConnection = Any - - -def create_in_operator(values: List[str], column_name: str) -> str: - """ - Transform a list of possible values into an IN operator clause. - - :param values: a list of possible values for the given column, e.g. `["binance", "ftx"]` - :param column_name: the name of the column, e.g. 'exchange_id' - :return: IN operator clause with specified values, - e.g. `"exchange_id IN ('binance', 'ftx')"` - """ - in_operator = ( - f"{column_name} IN (" - + ",".join([f"'{value}'" for value in values]) - + ")" - ) - return in_operator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py deleted file mode 100644 index ddd48d1e4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py +++ /dev/null @@ -1,954 +0,0 @@ -""" -Import as: - -import helpers.hsql_implementation as hsqlimpl -""" - -import collections -import io -import logging -import os -import re -import time -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd -import psycopg2 as psycop -import psycopg2.extras as extras -import psycopg2.sql as psql - -import helpers.hasyncio as hasynci -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hpandas as hpandas -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hsecrets as hsecret -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - -# ############################################################################# -# Connection -# ############################################################################# - -DbConnection = Any - -# Invariant: keep the arguments in the interface in the same order as: -# host, dbname, port, user, password. -DbConnectionInfo = collections.namedtuple( - "DbConnectionInfo", ["host", "dbname", "port", "user", "password"] -) - - -def get_connection( - host: str, - dbname: str, - port: int, - user: str, - password: str, - autocommit: bool = True, -) -> DbConnection: - """ - Create a connection and cursor for a SQL database. - """ - _LOG.debug(hprint.to_str("host dbname port user")) - connection = psycop.connect( - host=host, dbname=dbname, port=port, user=user, password=password - ) - if autocommit: - connection.autocommit = True - return connection - - -def get_connection_from_aws_secret( - aws_region: str, - *, - stage: str = "prod", -) -> DbConnection: - """ - Create an SQL connection using credentials obtained from AWS - SecretsManager. - - The function uses `ck` AWS profile on the backend. - The intended usage is obtaining connection to a DB on RDS instances. - - :param aws_region: AWS DB region, e.g. "eu-north-1", "ap-northeast-1" - :param stage: DB stage to connect to. For "prod" stage it is only possible to obtain a read-only connection via this method. - """ - hdbg.dassert_in(stage, ["prod", "preprod", "test"]) - hdbg.dassert_in(aws_region, hs3.AWS_REGIONS) - dbname = f"{stage}.im_data_db" - if stage == "prod": - secret_name = f"{dbname}.read_only" - else: - secret_name = ( - dbname - if aws_region == hs3.AWS_EUROPE_REGION_1 - else f"{dbname}.{aws_region}" - ) - _LOG.info("Fetching secret: %s", secret_name) - db_creds = hsecret.get_secret(secret_name) - connection = get_connection( - host=db_creds["host"], - dbname=dbname, - port=db_creds["port"], - user=db_creds["username"], - password=db_creds["password"], - ) - return connection - - -def get_connection_from_env_vars() -> DbConnection: - """ - Create a SQL connection with the information from the environment - variables. - """ - # Get values from the environment variables. - host = os.environ["POSTGRES_HOST"] - dbname = os.environ["POSTGRES_DB"] - port = int(os.environ["POSTGRES_PORT"]) - user = os.environ["POSTGRES_USER"] - password = os.environ["POSTGRES_PASSWORD"] - # Build the - connection = get_connection( - host=host, - dbname=dbname, - port=port, - user=user, - password=password, - ) - return connection - - -def get_connection_from_string( - conn_as_str: str, - autocommit: bool = True, -) -> DbConnection: - """ - Create a connection from a string. - - E.g., `host=localhost dbname=im_db_local port=5432 user=... - password=...` - """ - regex = r"host=\w+ dbname=\w+ port=\d+ user=\w+ password=\w+" - m = re.match(regex, conn_as_str) - hdbg.dassert(m, "Invalid connection string: '%s'", conn_as_str) - connection = psycop.connect(conn_as_str) - if autocommit: - connection.autocommit = True - return connection - - -def get_connection_info_from_env_file(env_file_path: str) -> DbConnectionInfo: - """ - Get connection parameters from environment file. - - :param env_file_path: path to an environment file that contains db - connection parameters - """ - import dotenv - - db_config = dotenv.dotenv_values(env_file_path) - params = { - "host": db_config["POSTGRES_HOST"], - "dbname": db_config["POSTGRES_DB"], - "user": db_config["POSTGRES_USER"], - "password": db_config["POSTGRES_PASSWORD"], - } - key = "POSTGRES_PORT" - if key in db_config: - params["port"] = int(db_config[key]) - else: - params["port"] = 5432 - # The parameters' names are fixed and cannot be changed, see - # `https:://hub.docker.com/_/postgres`. - connection_parameters = DbConnectionInfo(**params) - return connection_parameters - - -def check_db_connection( - host: str, - dbname: str, - port: int, - user: str, - password: str, -) -> Tuple[bool, Optional[psycop.OperationalError]]: - """ - Check whether a connection to a DB exists, in a non-blocking way. - """ - try: - get_connection( - host=host, dbname=dbname, port=port, user=user, password=password - ) - connection_exist = True - error = None - except psycop.OperationalError as e: - connection_exist = False - error = e - return connection_exist, error - - -def wait_db_connection( - host: str, - dbname: str, - port: int, - user: str, - password: str, - *, - timeout_in_secs: int = 30, -) -> None: - """ - Wait until the database is available. - - :param timeout_in_secs: secs before timing out with `RuntimeError`. - """ - hdbg.dassert_lte(1, timeout_in_secs) - _LOG.debug("dbname=%s, port=%s, host=%s", dbname, port, host) - elapsed_secs = 0 - while True: - _LOG.info("Waiting for PostgreSQL to become available...") - conn_exists = check_db_connection(host, dbname, port, user, password) - if conn_exists[0]: - _LOG.info("PostgreSQL is available (after %s seconds)", elapsed_secs) - break - if elapsed_secs > timeout_in_secs: - raise psycop.OperationalError( - f"Cannot connect to db host={host} dbname={dbname} port={port} " - f"due to timeout={timeout_in_secs} seconds" - f"\n{conn_exists[1]}" - ) - elapsed_secs += 1 - time.sleep(1) - - -def db_connection_to_tuple(connection: DbConnection) -> DbConnectionInfo: - """ - Get database connection details using connection. Connection details - include: - - - Host - - Database name - - Port - - Username - - Password - - :param connection: a database connection - :return: database connection details - """ - info = connection.info - ret = DbConnectionInfo( - host=info.host, - dbname=info.dbname, - port=info.port, - user=info.user, - password=info.password, - ) - return ret - - -# ############################################################################# -# State of the whole DB -# ############################################################################# - - -def get_engine_version(connection: DbConnection) -> str: - """ - Report information on the SQL engine. - - E.g., ``` PostgreSQL 11.5 on x86_64-pc-linux-gnu compiled by gcc - (GCC) 4.8.3 20140911 (Red Hat 4.8.3-9), 64-bit ``` - """ - query = "SELECT version();" - df = pd.read_sql_query(query, connection) - # pylint: disable=no-member - info: str = df.iloc[0, 0] - return info - - -# ############################################################################# -# Tables -# ############################################################################# - - -def get_table_names(connection: DbConnection) -> List[str]: - """ - Report the name of the tables. - - E.g., tables=['entities', 'events', 'stories', 'taxonomy'] - """ - query = """ - SELECT table_name - FROM information_schema.tables - WHERE table_type = 'BASE TABLE' - AND table_schema = 'public' - """ - cursor = connection.cursor() - cursor.execute(query) - tables = [x[0] for x in cursor.fetchall()] - return tables - - -# TODO(gp): Test / fix this. -def get_indexes(connection: DbConnection) -> pd.DataFrame: - res = [] - tables = get_table_names(connection) - cursor = connection.cursor() - for table in tables: - query = f"""SELECT * FROM pg_indexes WHERE tablename = '{table}' """ - cursor.execute(query) - z = cursor.fetchall() - res.append(pd.DataFrame(z)) - tmp: pd.DataFrame = pd.concat(res) - tmp["index_type"] = tmp[4].apply( - lambda w: w.split("USING")[1].lstrip().split(" ")[0] - ) - tmp.columns = [ - "type: public/private", - "table_name", - "key_name", - "None", - "Statement", - "index_type", - ] - tmp["columns"] = tmp["Statement"].apply(lambda w: w.split("(")[1][:-1]) - - return tmp - - -def disconnect_all_clients(connection: DbConnection) -> None: - # From https://stackoverflow.com/questions/36502401 - # Not sure this will work in our case, since it might kill our own connection. - dbname = connection.info.host - query = f""" - SELECT pg_terminate_backend(pid) - FROM pg_stat_activity - WHERE datname = '{dbname}';""" - connection.cursor().execute(query) - - -# ############################################################################# -# Database -# ############################################################################# - - -def get_db_names(connection: DbConnection) -> List[str]: - """ - Return the names of the available DBs. - - E.g., ['postgres', 'rdsadmin', 'template0', 'template1'] - """ - query = "SELECT datname FROM pg_database;" - cursor = connection.cursor() - cursor.execute(query) - dbs = list(zip(*cursor.fetchall()))[0] - dbs = sorted(dbs) - return dbs - - -def create_database( - connection: DbConnection, - dbname: str, - *, - overwrite: Optional[bool] = None, -) -> None: - """ - Create empty database. - - :param connection: database connection - :param dbname: database to create - :param overwrite: overwrite existing database - """ - _LOG.debug("connection=%s", connection) - with connection.cursor() as cursor: - if overwrite: - cursor.execute( - psql.SQL("DROP DATABASE IF EXISTS {} WITH (FORCE);").format( - psql.Identifier(dbname) - ) - ) - else: - if dbname in get_table_names(connection): - raise ValueError(f"Database {dbname} already exists") - cursor.execute( - psql.SQL("CREATE DATABASE {};").format(psql.Identifier(dbname)) - ) - - -def remove_database(connection: DbConnection, dbname: str) -> None: - """ - Remove database in current environment. - - :param connection: a database connection - :param dbname: database name to drop, e.g. `im_db_local` - """ - # Drop database. - # From https://stackoverflow.com/questions/36502401 - connection.cursor().execute( - psql.SQL("DROP DATABASE {} WITH (FORCE);").format( - psql.Identifier(dbname) - ) - ) - - -def get_tables_size( - connection: DbConnection, - only_public: bool = True, - summary: bool = True, -) -> pd.DataFrame: - """ - Report the size of each table. - - E.g., - - ``` - table_name row_estimate total index toast table - 0 events 0.0 26 GB 0 bytes 192 bytes 26 GB - 1 stories 0.0 15 GB 43 GB 192 bytes 12 GB - 2 entities 10823400.0 76 MB 0 bytes 192 bytes 76 MB - 3 taxonomy 20691.0 690 kB 0 bytes 192 bytes 652 kB - ``` - """ - q = """SELECT *, pg_size_pretty(total_bytes) AS total - , pg_size_pretty(index_bytes) AS INDEX - , pg_size_pretty(toast_bytes) AS toast - , pg_size_pretty(table_bytes) AS TABLE - FROM ( - SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM ( - SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME - , c.reltuples AS row_estimate - , pg_total_relation_size(c.oid) AS total_bytes - , pg_indexes_size(c.oid) AS index_bytes - , pg_total_relation_size(reltoastrelid) AS toast_bytes - FROM pg_class c - LEFT JOIN pg_namespace n ON n.oid = c.relnamespace - WHERE relkind = 'r' - ) a - ) a - ORDER by total_bytes DESC""" - df = pd.read_sql_query(q, connection) - if only_public: - df = df[df["table_schema"] == "public"] - if summary: - cols = "table_name row_estimate total index toast table".split() - df = df[cols] - return df - - -# ############################################################################# -# Query -# ############################################################################# - - -# TODO(gp): -> as_df -def execute_query_to_df( - connection: DbConnection, - query: str, - limit: Optional[int] = None, - offset: Optional[int] = None, - use_timer: bool = False, - profile: bool = False, - verbose: bool = False, -) -> pd.DataFrame: - """ - Execute a query. - """ - if False: - # Ask the user before executing a query. - print(f"query=\n{query}") - import helpers.hsystem as hsystem - - hsystem.query_yes_no("Ok to execute?") - if limit is not None: - query += f" LIMIT {limit}" - if offset is not None: - query += f" OFFSET {offset}" - if profile: - query = "EXPLAIN ANALYZE " + query - if verbose: - _LOG.info("> %s", query) - # Compute. - if use_timer: - idx = htimer.dtimer_start(0, "Sql time") - cursor = connection.cursor() - try: - df = pd.read_sql_query(query, connection) - except psycop.OperationalError: - # Catch error and execute query directly to print error. - try: - cursor.execute(query) - except psycop.Error as e: - print(e.pgerror) - raise e - if use_timer: - htimer.dtimer_stop(idx) - if profile: - _LOG.info("df=%s", df) - return df - - -def head_table( - connection: DbConnection, - table: str, - limit: int = 5, -) -> str: - """ - Report the head of the table as str. - """ - txt = [] - query = f"SELECT * FROM {table} LIMIT {limit} " - df = execute_query_to_df(connection, query) - # pd.options.display.max_columns = 1000 - # pd.options.display.width = 130 - txt.append(str(df)) - txt = "\n".join(txt) - return txt - - -def head_tables( - connection: DbConnection, - tables: Optional[List[str]] = None, - limit: int = 5, -) -> str: - txt = [] - if tables is None: - tables = get_table_names(connection) - for table in tables: - txt.append("\n" + "#" * 80 + "\n" + table + "\n" + "#" * 80) - txt_tmp = head_table(connection, table, limit=limit) - txt.append(txt_tmp) - txt = "\n".join(txt) - return txt - - -def get_table_columns(connection: DbConnection, table_name: str) -> List[str]: - """ - Get column names for given table. - """ - query = f""" - SELECT column_name - FROM information_schema.columns - WHERE TABLE_NAME = '{table_name}'""" - cursor = connection.cursor() - cursor.execute(query) - columns = [x[0] for x in cursor.fetchall()] - return columns - - -def find_tables_common_columns( - connection: DbConnection, - tables: List[str], - as_df: bool = False, -) -> Optional[pd.DataFrame]: - limit = 5 - df = [] - for i, table in enumerate(tables): - table = tables[i] - query = f"SELECT * FROM {table} LIMIT {limit} " - df1 = execute_query_to_df(connection, query, verbose=False) - if df1 is None: - continue - for j in range(i + 1, len(tables)): - table = tables[j] - query = f"SELECT * FROM {table} LIMIT {limit} " - df2 = execute_query_to_df(connection, query, verbose=False) - if df2 is None: - continue - common_cols = [c for c in df1 if c in df2] - if as_df: - df.append( - ( - tables[i], - tables[j], - len(common_cols), - " ".join(common_cols), - ) - ) - else: - print(f"'{tables[i]}' vs '{tables[j]}'") - print(f" ({len(common_cols)}): {' '.join(common_cols)}") - obj = None - if as_df: - obj = pd.DataFrame( - df, columns=["table1", "table2", "num_comm_cols", "common_cols"] - ) - return obj - - -def remove_table( - connection: DbConnection, table_name: str, cascade: bool = False -) -> None: - """ - Remove a table from a database. - - :param connection: database connection - :param table_name: table name - :param cascade: whether to drop the objects dependent on the table - """ - query = f"DROP TABLE IF EXISTS {table_name}" - if cascade: - query = " ".join([query, "CASCADE"]) - connection.cursor().execute(query) - - -def remove_all_tables(connection: DbConnection, cascade: bool = False) -> None: - """ - Remove all the tables from a database. - - :param connection: database connection - :param cascade: whether to drop the objects dependent on the tables - """ - table_names = get_table_names(connection) - _LOG.warning("Deleting all the tables: %s", table_names) - for table_name in table_names: - _LOG.warning("Deleting %s ...", table_name) - remove_table(connection, table_name, cascade) - - -# ############################################################################# -# Insert -# ############################################################################# - - -def csv_to_series(csv_as_txt: str, sep: str = ",") -> pd.Series: - """ - Convert a text with (key, value) separated by `sep` into a `pd.Series`. - - :param csv_as_txt: a string containing csv data - E.g., - ``` - tradedate,2021-11-12 - targetlistid,1 - ``` - :param sep: csv separator, e.g. `,` - :return: series - """ - lines = hprint.dedent(csv_as_txt).split("\n") - tuples = [tuple(line.split(sep)) for line in lines] - # Remove empty tuples. - tuples = [t for t in tuples if t[0] != ""] - # Build series. - index, data = zip(*tuples) - # _LOG.debug("index=%s", index) - # _LOG.debug("data=%s", data) - srs = pd.Series(data, index=index) - return srs - - -def copy_rows_with_copy_from( - connection: DbConnection, df: pd.DataFrame, table_name: str -) -> None: - """ - Copy dataframe contents into DB directly from buffer. - - This function works much faster for large dataframes (>10000 rows). - - :param connection: DB connection - :param df: data to insert - :param table_name: name of the table for insertion - """ - # The target table needs to exist. - hdbg.dassert_in(table_name, get_table_names(connection)) - # Read the data. - buffer = io.StringIO() - df.to_csv(buffer, index=False, header=False) - buffer.seek(0) - # Copy the data to the DB. - cur = connection.cursor() - cur.copy_from(buffer, table_name, sep=",") - # TODO(gp): CmampTask413, is this still needed because the autocommit. - connection.commit() - - -# TODO(gp): -> table_name, df -def create_insert_query(df: pd.DataFrame, table_name: str) -> str: - """ - Create an INSERT query to insert data into a DB. - - :param df: data to insert into DB - :param table_name: name of the table for insertion - :return: sql query, e.g., - ``` - INSERT INTO ccxt_ohlcv_spot(timestamp,open,high,low,close) VALUES %s - ``` - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - columns = ",".join(list(df.columns)) - query = f"INSERT INTO {table_name}({columns}) VALUES %s" - _LOG.debug("query=%s", query) - return query - - -# TODO(gp): -> table_name, df -def create_insert_on_conflict_do_nothing_query( - df: pd.DataFrame, table_name: str, unique_columns: List[str] -) -> str: - """ - Create an INSERT query to insert data into a DB. If a unique constraint is - violated for a provided set of columns, duplicates are not inserted. - - :param df: data to insert into DB - :param table_name: name of the table for insertion - :param unique_columns: set of columns which should be unique record-wise. - :return: sql query, e.g., - ``` - INSERT INTO ccxt_bid_ask(timestamp,bid_size,bid_price,ask_size, - ask_price,exchange_id,currency_pair) VALUES %s - ON CONFLICT (timestamp, exchange_id, currency_pair) DO NOTHING; - ``` - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - # Check that the constraint is actually applied to columns - # of the DataFrame. - hdbg.dassert_is_subset(unique_columns, list(df.columns)) - columns = ",".join(list(df.columns)) - unique_columns_str = ",".join(unique_columns) - query = f"INSERT INTO {table_name}({columns}) VALUES %s ON CONFLICT ({unique_columns_str}) \ - DO NOTHING" - _LOG.debug("query=%s", query) - return query - - -# TODO(gp): -> connection, table_name, obj -def execute_insert_query( - connection: DbConnection, - obj: Union[pd.DataFrame, pd.Series], - table_name: str, -) -> None: - """ - Insert a DB as multiple rows into the database. - - :param connection: connection to the DB - :param obj: data to insert - :param table_name: name of the table for insertion - """ - if isinstance(obj, pd.Series): - df = obj.to_frame().T - else: - df = obj - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_in(table_name, get_table_names(connection)) - _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) - # Ensure the DataFrame has compatible types with - # downstream consumers (e.g., database). - df = df.applymap(lambda x: float(x) if isinstance(x, np.float64) else x) - # Transform dataframe into list of tuples. - values = [tuple(v) for v in df.to_numpy()] - # Generate a query for multiple rows. - query = create_insert_query(df, table_name) - # Execute query for each provided row. - cur = connection.cursor() - extras.execute_values(cur, query, values) - connection.commit() - - -# TODO(gp): -> connection, table_name, obj -def execute_insert_on_conflict_do_nothing_query( - connection: DbConnection, - obj: Union[pd.DataFrame, pd.Series], - table_name: str, - unique_columns: List[str], -) -> None: - """ - Insert a DB as multiple rows into the database. If a a UNIQUE constraint is - violated for a provided set of columns, duplicates are not inserted. - - :param connection: connection to the DB - :param obj: data to insert - :param table_name: name of the table for insertion - :param unique_columns: set of columns which should be unique record-wise. - If unique_columns is an empty list, a regular DB insert is executed - without the UNIQUE constraint. - """ - if isinstance(obj, pd.Series): - df = obj.to_frame().T - else: - df = obj - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_in(table_name, get_table_names(connection)) - _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) - # Transform dataframe into list of tuples. - values = [tuple(v) for v in df.to_numpy()] - # Generate a query for multiple rows. - if not unique_columns: - # If unique_columns is an empty list, currently used when saving - # bid/ask RT data, to experiment with using no uniqueness constraints. - query = create_insert_query(df, table_name) - else: - query = create_insert_on_conflict_do_nothing_query( - df, table_name, unique_columns - ) - # Execute query for each provided row. - cur = connection.cursor() - try: - extras.execute_values(cur, query, values) - connection.commit() - except Exception as e: - _LOG.error( - "Failed to insert data with the '%s'. Query %s. Values: %s", - str(e), - query, - values, - ) - raise e - - -def execute_query(connection: DbConnection, query: str) -> List[tuple]: - """ - Use for generic simple operations. - - :param connection: connection to the DB - :param query: generic query that can be: insert, update, delete, etc. - :return: list of tuples with the results of the query - """ - _LOG.debug(hprint.to_str("query")) - with connection.cursor() as cursor: - cursor.execute(query) - if not connection.autocommit: - connection.commit() - try: - result = cursor.fetchall() - except psycop.ProgrammingError: - result = [()] - return result - - -# ############################################################################# -# Build more complex SQL queries. -# ############################################################################# - - -# Invariants for functions with SQL queries -# -# - Functions creating tables -# - accept a parameter `incremental that has the same behavior as in -# `hio.create_dir(..., incremental)` -# - It controls the behavior of this function if the target table already exists. -# If `incremental` is True, then skip creating it and reuse it as it is; if -# False delete it and create it from scratch. -# -# - Function creating / execution SQL queries -# - We prefer functions that directly perform SQL queries implementing a given -# functionality (e.g., `get_num_rows()`) -# - Use `get_..._query()` returning the query text only when we want to freeze -# the query in a test, e.g., because it is complex - - -def get_remove_duplicates_query( - table_name: str, id_col_name: str, column_names: List[str] -) -> str: - """ - Get a query to remove duplicates from table, keeping last duplicated row. - - :param table_name: name of table - :param id_col_name: name of unique id column - :param column_names: names of columns to compare on - :return: query to execute duplicate removal - """ - # TODO(*): Add a "limit" parameter if possible, to check only in top N rows. - remove_statement = [] - remove_statement.append(f"DELETE FROM {table_name} a USING {table_name} b") - remove_statement.append(f"WHERE a.{id_col_name} < b.{id_col_name}") - for c in column_names: - remove_statement.append(f"AND a.{c} = b.{c}") - remove_statement = " ".join(remove_statement) - return remove_statement - - -def get_num_rows(connection: DbConnection, table_name: str) -> int: - """ - Return the number of rows in a DB table. - """ - cursor = connection.cursor() - query = f"SELECT COUNT(*) FROM {table_name}" - cursor.execute(query) - vals = cursor.fetchall() - # The return value is like: vals=[(0,)] - hdbg.dassert_eq(len(vals), 1) - return vals[0][0] # type: ignore[no-any-return] - - -# ############################################################################# -# Polling functions -# ############################################################################# - - -def is_row_with_value_present( - connection: DbConnection, - table_name: str, - field_name: str, - target_value: str, - *, - show_db_state: bool = True, -) -> hasynci.PollOutput: - """ - Check with a polling function if a row with `field_name` == `target_value` - is present in the table `table_name` of the DB. - - E.g., this can be used with polling to wait for the target value - "hello_world.txt" in the "filename" field of the table "table_name" to appear - - :return: - - success if the value is present - - result: None - """ - _LOG.debug(hprint.to_str("connection table_name field_name target_value")) - # Print the state of the DB, if needed. - if show_db_state: - query = f"SELECT * FROM {table_name} ORDER BY filename" - df = execute_query_to_df(connection, query) - _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) - # Check if the required row is available. - query = f"SELECT {field_name} FROM {table_name} WHERE {field_name}='{target_value}'" - df = execute_query_to_df(connection, query) - _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) - # Package results. - success = df.shape[0] > 0 - result = None - return success, result - - -# TODO(gp): Add unit test. -async def wait_for_change_in_number_of_rows( - get_wall_clock_time: hdateti.GetWallClockTime, - db_connection: DbConnection, - table_name: str, - poll_kwargs: Dict[str, Any], - *, - tag: Optional[str] = None, -) -> int: - """ - Wait until the number of rows in a table changes. - - :param get_wall_clock_time: a function to get current time - :param db_connection: connection to the target DB - :param table_name: name of the table to poll - :param poll_kwargs: a dictionary with the kwargs for `poll()` - :param tag: name of the caller function - :return: number of new rows found - """ - num_rows = get_num_rows(db_connection, table_name) - - def _is_number_of_rows_changed() -> hasynci.PollOutput: - new_num_rows = get_num_rows(db_connection, table_name) - _LOG.debug("new_num_rows=%s num_rows=%s", new_num_rows, num_rows) - success = new_num_rows != num_rows - diff_num_rows = new_num_rows - num_rows - return success, diff_num_rows - - # Poll. - if tag is None: - # Use name of the caller function. - tag = hintros.get_function_name(count=0) - if poll_kwargs is None: - poll_kwargs = hasynci.get_poll_kwargs(get_wall_clock_time) - num_iters, diff_num_rows = await hasynci.poll( - _is_number_of_rows_changed, - tag=tag, - **poll_kwargs, - ) - _ = num_iters - diff_num_rows = cast(int, diff_num_rows) - return diff_num_rows diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py deleted file mode 100644 index 2aeff7c6c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py +++ /dev/null @@ -1,273 +0,0 @@ -""" -Import as: - -import helpers.hsql_test as hsqltest -""" - -import abc -import logging -import os - -import pytest - -import helpers.hdocker as hdocker -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsql as hsql -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestDbHelper -# ############################################################################# - - -@pytest.mark.requires_docker_in_docker -@pytest.mark.skipif( - not hserver.can_run_docker_from_docker(), - reason="Need docker children / sibling support", -) -class TestDbHelper(hunitest.TestCase, abc.ABC): - """ - Allow testing code that interacts with a DB. - - It creates / destroys a test DB during setup / teardown of the class. This means - that the same DB is reused for multiple test methods of the same class. - - The invariant is that each test method should: - - (ideally) find a clean DB to work with - - not assume that the DB is clean. If the DB is not clean, tests should clean it - before starting, or work around it - - E.g., if a test needs to write a table, but the table is already present and - partially filled as a leftover from a previous test, the new test should - delete the table and create it again - - clean the DB after themselves, i.e., undo the work that has been done - - E.g., if a test creates a table, then the test should delete the table at - the end of the test - - - An existing DB can be reused - - A user can create a persistent local DB in the Docker container, e.g. for OMS: - ``` - docker> (cd oms; sudo docker-compose \ - --file /app/oms/devops/compose/tmp.docker-compose.yml up \ - -d \ - oms_postgres) - ``` - or - ``` - docker> invoke oms_docker_up - ``` - - Then this class skips creating / destructing the DB, making the tests faster - and allowing easier debugging. - """ - - @classmethod - def setUpClass(cls) -> None: - """ - Initialize the test database inside test container. - """ - _LOG.info("\n%s", hprint.frame("setUpClass")) - cls._create_docker_files() - # Read the connection parameters from the env file. - cls.db_env_file = cls._get_db_env_path() - connection_info = hsql.get_connection_info_from_env_file(cls.db_env_file) - _LOG.debug("connection_info=%s", connection_info) - conn_exists = hsql.check_db_connection(*connection_info)[0] - if conn_exists: - _LOG.warning("DB is already up: skipping docker compose") - # Since we have found the DB already up, we assume that we need to - # leave it running after the tests - cls.bring_down_db = False - else: - # Start the service. - cls.docker_compose_file_path = os.path.join( - hgit.get_amp_abs_path(), cls._get_compose_file() - ) - # TODO(Grisha): use invoke task CMTask #547. - cmd = ( - "sudo docker-compose " - f"--file {cls.docker_compose_file_path} " - f"--env-file {cls.db_env_file} " - f"up -d {cls._get_service_name()}" - ) - _LOG.debug("cmd=%s", cmd) - hsystem.system(cmd, suppress_output=False) - # Wait for the DB to be available. - hsql.wait_db_connection(*connection_info) - cls.bring_down_db = True - # Save connection info. - # TODO(gp): -> db_connection - cls.connection = hsql.get_connection(*connection_info, autocommit=True) - - # TODO(Grisha): difference between cmamp and kaizenflow. - @classmethod - def tearDownClass(cls) -> None: - """ - Bring down the test container. - """ - _LOG.info("\n%s", hprint.frame("tearDown")) - docker_compose_cleanup = cls.bring_down_db - if docker_compose_cleanup: - if hserver.use_main_network(): - # When using sibling containers `docker-compose down` tries to shut - # down also the `main_network`, while it is attached to the Docker - # container running the tests - # So we clean up the containers and volumes directly. - # TODO(gp): This could become an invoke target. - # Remove the container, e.g., `compose-oms_postgres7482-1`. - service_name = cls._get_service_name() - container_name = f"compose-{service_name}-1" - use_sudo = hdocker.get_use_sudo() - hdocker.container_rm(container_name, use_sudo) - # Remove the volume, e.g., `compose_oms_postgres7482_data`. - volume_name = f"compose_{service_name}_data" - hdocker.volume_rm(volume_name, use_sudo) - else: - # TODO(Grisha): use invoke task CMTask #547. - cmd = ( - "sudo docker-compose " - f"--file {cls.docker_compose_file_path} " - f"--env-file {cls.db_env_file} " - "down -v" - ) - hsystem.system(cmd, suppress_output=False) - else: - _LOG.warning("Leaving DB up") - if not hunitest.get_incremental_tests(): - os.unlink(cls._get_compose_file()) - os.unlink(cls._get_db_env_path()) - - @classmethod - @abc.abstractmethod - def get_id(cls) -> int: - """ - Return a unique ID to create an OMS instance. - - This ID is used to generate Docker compose / env files and - services, so that we can avoid collisions in case of parallel - execution. - - This function is specified by the unit test in a way that is - unique to each test. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _get_compose_file(cls) -> str: - """ - Get path to Docker compose file. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _get_service_name(cls) -> str: - """ - Get service name. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _get_db_env_path(cls) -> str: - """ - Get path to env file that contains DB connection parameters. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _create_docker_files(cls) -> str: - """ - Create the compose and env file for the DB run. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _get_postgres_db(cls) -> str: - """ - Return the name of the postgres DB to use (e.g., im_postgres_db_local). - """ - raise NotImplementedError - - -# ############################################################################# -# TestImOmsDbHelper -# ############################################################################# - - -class TestImOmsDbHelper(TestDbHelper, abc.ABC): - # TODO(gp): Rewrite building a YAML with a package. - @classmethod - def _create_docker_files(cls) -> None: - # Create compose file. - service_name = cls._get_service_name() - idx = cls.get_id() - host_port = 5432 + idx - txt = f"""version: '3.5' -services: - # Docker container running Postgres DB. - {service_name}: - image: postgres:13 - restart: "no" - environment:""" - if not hserver.use_docker_db_container_name_to_connect(): - # Use the port to connect. - txt += f""" - - POSTGRES_HOST=${{POSTGRES_HOST}} - - POSTGRES_DB=${{POSTGRES_DB}} - - POSTGRES_PORT=${{POSTGRES_PORT}} - - POSTGRES_USER=${{POSTGRES_USER}} - - POSTGRES_PASSWORD=${{POSTGRES_PASSWORD}} - volumes: - - {service_name}_data:/var/lib/postgresql/data - ports: - - {host_port}:5432""" - else: - # Do not use the port to connect. - txt += f""" - - POSTGRES_HOST=${{POSTGRES_HOST}} - - POSTGRES_DB=${{POSTGRES_DB}} - - POSTGRES_USER=${{POSTGRES_USER}} - - POSTGRES_PASSWORD=${{POSTGRES_PASSWORD}} - volumes: - - {service_name}_data:/var/lib/postgresql/data""" - # - txt += f""" -volumes: - {service_name}_data: {{}} - -networks: - default: - #name: {service_name}_network - name: main_network""" - compose_file_name = cls._get_compose_file() - hio.to_file(compose_file_name, txt) - # Create env file. - txt = [] - if not hserver.use_docker_db_container_name_to_connect(): - if hserver.is_dev4(): - host = "cf-spm-dev4" - else: - # host = os.environ["CSFY_HOST_NAME"] - host = "localhost" - else: - # Use the service name, e.g., `im_postgres...`. - host = service_name - postgres_db = cls._get_postgres_db() - txt.append(f"POSTGRES_HOST={host}") - txt.append(f"POSTGRES_DB={postgres_db}") - if not hserver.use_docker_db_container_name_to_connect(): - txt.append(f"POSTGRES_PORT={host_port}") - txt.append("POSTGRES_USER=aljsdalsd") - txt.append("POSTGRES_PASSWORD=alsdkqoen") - txt = "\n".join(txt) - env_file_name = cls._get_db_env_path() - hio.to_file(env_file_name, txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py deleted file mode 100644 index a56f9b0a1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py +++ /dev/null @@ -1,176 +0,0 @@ -""" -Import as: - -import helpers.hstring as hstring -""" - -import logging -import os -import re -import tempfile -from typing import List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def remove_prefix(string: str, prefix: str, assert_on_error: bool = True) -> str: - if string.startswith(prefix): - res = string[len(prefix) :] - else: - res = string - if assert_on_error: - raise RuntimeError( - f"string='{string}' doesn't start with prefix ='{prefix}'" - ) - return res - - -def remove_suffix(string: str, suffix: str, assert_on_error: bool = True) -> str: - if string.endswith(suffix): - res = string[: -len(suffix)] - else: - res = string - if assert_on_error: - raise RuntimeError( - f"string='{string}' doesn't end with suffix='{suffix}'" - ) - return res - - -def diff_strings( - txt1: str, - txt2: str, - txt1_descr: Optional[str] = None, - txt2_descr: Optional[str] = None, - width: int = 130, -) -> str: - # Write file. - def _to_file(txt: str, txt_descr: Optional[str]) -> str: - file_name = tempfile.NamedTemporaryFile().name - if txt_descr is not None: - txt = "# " + txt_descr + "\n" + txt - hio.to_file(file_name, txt) - return file_name - - file_name1 = _to_file(txt1, txt1_descr) - file_name2 = _to_file(txt2, txt2_descr) - # Get the difference between the files. - cmd = f"sdiff --width={width} {file_name1} {file_name2}" - _, txt = hsystem.system_to_string( - cmd, - # We don't care if they are different. - abort_on_error=False, - ) - return txt - - -# TODO(gp): GFI. Move to hpython_code.py -def get_docstring_line_indices(lines: List[str]) -> List[int]: - """ - Get indices of lines of code that are inside (doc)strings. - - :param lines: the code lines to check - :return: the indices of docstrings - """ - docstring_line_indices = [] - quotes = {'"""': False, "'''": False, "```": False} - for i, line in enumerate(lines): - # Determine if the current line is inside a (doc)string. - for quote in quotes: - quotes_matched = re.findall(quote, line) - for q in quotes_matched: - # Switch the docstring flag. - # pylint: disable=modified-iterating-dict - quotes[q] = not quotes[q] - if q in ('"""', "'''") and not quotes[q]: - # A triple-quote has just been closed. - # Reset the triple backticks flag. - quotes["```"] = False - if any(quotes.values()): - # Store the index if the quotes have been opened but not closed yet. - docstring_line_indices.append(i) - return docstring_line_indices - - -def get_docstrings(lines: List[str]) -> List[List[int]]: - """ - Get line indices grouped together by the docstring they belong to. - - :param lines: lines from the file to process - :return: grouped lines within docstrings - """ - # Get indices of lines that are within docstrings. - doc_indices = get_docstring_line_indices(lines) - # Group these indices into consecutive docstrings. - docstrings = [] - if doc_indices: - current_docstring = [doc_indices[0]] - for idx in doc_indices[1:]: - if idx == current_docstring[-1] + 1: - current_docstring.append(idx) - else: - docstrings.append(current_docstring) - current_docstring = [idx] - docstrings.append(current_docstring) - return docstrings - - -# TODO(gp): GFI. Move to hpython_code.py -def get_code_block_line_indices(lines: List[str]) -> List[int]: - """ - Get indices of lines that are inside code blocks. - - Code blocks are lines surrounded by triple backticks, e.g., - ``` - This line. - ``` - Note that the backticks need to be the leftmost element of their line. - - :param lines: the lines to check - :return: the indices of code blocks - """ - code_block_line_indices = [] - quotes = {"```": False} - for i, line in enumerate(lines): - # Determine if the current line is inside a code block. - for quote in quotes: - quotes_matched = re.findall(rf"^\s*({quote})", line) - for q in quotes_matched: - # Switch the flag. - # pylint: disable=modified-iterating-dict - quotes[q] = not quotes[q] - if any(quotes.values()): - # Store the index if the quotes have been opened but not closed yet. - code_block_line_indices.append(i) - return code_block_line_indices - - -def extract_version_from_file_name(file_name: str) -> Tuple[int, int]: - """ - Extract version number from filename_vXX.json file. - - E.g. - - 'universe_v3.1.json' -> (3, 1) - - 'universe_v1.json' -> (1, 0) - - 'dataset_schema_v3.json' -> (3, 0) - - Currently only JSON file extension is supported. - - :param file_name: file to extract version part from - :return: file version tuple in format (major, minor) - """ - basename = os.path.basename(file_name).rstrip(".json") - m = re.search(r"v(\d+(\.\d+)?)$", basename) - hdbg.dassert( - m, - "Can't parse file '%s', correct format is e.g. 'universe_v03.json'.", - basename, - ) - # Groups return tuple. - version = m.groups(1)[0].split(".") # type: ignore[arg-type, union-attr] - major, minor = int(version[0]), 0 if len(version) == 1 else int(version[1]) - return major, minor diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py deleted file mode 100644 index b63bd34f4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py +++ /dev/null @@ -1,1097 +0,0 @@ -""" -Contain all the code needed to interact with the outside world, e.g., through -system commands, env vars, ... - -Import as: - -import helpers.hsystem as hsystem -""" - -import contextlib -import datetime -import getpass -import glob -import logging -import os -import re -import signal -import subprocess -import sys -import time -from typing import ( - Any, - Callable, - Generator, - List, - Match, - Optional, - Tuple, - Union, - cast, -) - -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hprint as hprint -import helpers.hserver as hserver - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - -# Set logging level of this file higher to avoid too much chatter. -_LOG.setLevel(logging.INFO) - -# ############################################################################# - - -# TODO(gp): Move to hdatetime.py and maybe merge with `timestamp_to_str()`. -def get_timestamp() -> str: - timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") - return timestamp - - -# TODO(gp): Maybe move to hserver.py -def is_running_in_ipynb() -> bool: - # From https://stackoverflow.com/questions/15411967 - try: - _ = get_ipython().config # type: ignore - res = True - except NameError: - res = False - return res - - -# ############################################################################# - -_USER_NAME = None - - -def set_user_name(user_name: str) -> None: - """ - To impersonate a user. - - To use only in rare cases for testing or back-door. - """ - _LOG.warning("Setting user to '%s'", user_name) - global _USER_NAME - _USER_NAME = user_name - - -def get_user_name() -> str: - if _USER_NAME is None: - res = getpass.getuser() - else: - res = _USER_NAME - hdbg.dassert_ne(res, "") - return res - - -def get_server_name() -> str: - res = os.uname() - # posix.uname_result( - # sysname='Darwin', - # nodename='gpmac.lan', - # release='18.2.0', - # version='Darwin Kernel Version 18.2.0: Mon Nov 12 20:24:46 PST 2018; - # root:xnu-4903.231.4~2/RELEASE_X86_64', - # machine='x86_64') - # This is not compatible with python2.7 - # return res.nodename - return res[1] - - -def get_os_name() -> str: - res = os.uname() - # This is not compatible with python2.7 - # return res.sysname - return res[0] - - -def get_env_var(env_var_name: str) -> str: - if env_var_name not in os.environ: - msg = f"Can't find '{env_var_name}': re-run dev_scripts/setenv.sh?" - _LOG.error(msg) - raise RuntimeError(msg) - return os.environ[env_var_name] - - -# ############################################################################# -# system(), system_to_string() -# ############################################################################# - - -# pylint: disable=too-many-branches,too-many-statements,too-many-arguments,too-many-locals -def _system( - cmd: str, - print_command: bool, - abort_on_error: bool, - suppress_error: Optional[Any], - suppress_output: Union[bool, str], - blocking: bool, - wrapper: Optional[Any], - output_file: Optional[Any], - num_error_lines: Optional[int], - tee: bool, - dry_run: bool, - log_level: Union[int, str], -) -> Tuple[int, str]: - """ - Execute a shell command. - - To print the command and see the output call this as: - ``` - _system(cmd, suppress_output=False, log_level="echo") - ``` - - See `system()` for options. - """ - _LOG.debug(hprint.func_signature_to_str()) - _LOG.debug("##> %s", cmd) - orig_cmd = cmd[:] - _LOG.debug("orig_cmd=%s", orig_cmd) - # Handle `suppress_output`. - hdbg.dassert_in(suppress_output, ("ON_DEBUG_LEVEL", True, False)) - if suppress_output == "ON_DEBUG_LEVEL": - # Show the output if we are at (or lower than) DEBUG level, since - # logging.DEBUG=10 and logging.INFO=20. - show_output = _LOG.getEffectiveLevel() <= logging.DEBUG - suppress_output = not show_output - _LOG.debug(hprint.to_str("suppress_output")) - # Prepare the command line. - cmd = f"({cmd})" - hdbg.dassert_imply(tee, output_file is not None) - if output_file is not None: - # Redirect to a file. - dir_name = os.path.dirname(output_file) - if not dir_name: - dir_name = "." - if not os.path.exists(dir_name): - _LOG.debug("Dir '%s' doesn't exist: creating", dir_name) - hdbg.dassert(bool(dir_name), "dir_name='%s'", dir_name) - os.makedirs(dir_name) - if tee: - cmd += f" 2>&1 | tee -a {output_file};" - cmd += " exit ${PIPESTATUS[0]}" - else: - cmd += f" 2>&1 >{output_file}" - else: - # Do not redirect to a file. - cmd += " 2>&1" - # Handle `wrapper`. - if wrapper: - cmd = wrapper + " && " + cmd - # Handle `log_level`. - # TODO(gp): Make it "ECHO" or "PRINT". - if isinstance(log_level, str): - hdbg.dassert_in(log_level, ("echo", "echo_frame")) - if log_level == "echo_frame": - print(hprint.frame(f"> {cmd}")) - elif log_level == "echo": - print(f"> {cmd}") - else: - raise ValueError(f"Invalid log_level='{log_level}'") - _LOG.debug("> %s", cmd) - else: - _LOG.log(log_level, "> %s", cmd) - output = "" - # Handle `dry_run`. - if dry_run: - _LOG.warning("As per user request, not executing command:\n%s", cmd) - rc = 0 - return rc, output - # Execute the command. - try: - stdout = subprocess.PIPE - stderr = subprocess.STDOUT - if print_command: - _LOG.info("> %s", cmd) - with subprocess.Popen( - cmd, - shell=True, - executable="/bin/bash", - stdout=stdout, - stderr=stderr, - ) as p: - output = "" - if blocking: - # Blocking call: get the output. - while True: - line = p.stdout.readline().decode("utf-8", errors="replace") # type: ignore - if not line: - break - if not suppress_output: - # print(" ==> " + line.rstrip("\n")) - print(" ... " + line.rstrip("\n")) - output += line - p.stdout.close() # type: ignore - rc = p.wait() - else: - # Not blocking. - # Wait until process terminates (without using p.wait()). - max_cnt = 20 - cnt = 0 - while p.poll() is None: - # Process hasn't exited yet, let's wait some time. - time.sleep(0.1) - cnt += 1 - _LOG.debug("cnt=%s, rc=%s", cnt, p.returncode) - if cnt > max_cnt: - break - if cnt > max_cnt: - # Timeout: we assume it worked. - rc = 0 - else: - rc = p.returncode - if suppress_error is not None: - hdbg.dassert_isinstance(suppress_error, set) - if rc in suppress_error: - rc = 0 - except OSError as e: - rc = -1 - _LOG.error("error=%s", str(e)) - _LOG.debug(" ==> rc=%s", rc) - if abort_on_error and rc != 0: - # Report the last `num_error_lines` of the output. - num_error_lines = num_error_lines or 30 - output_error = "\n".join(output.split("\n")[-num_error_lines:]) - msg = [] - msg.append("\n" + hprint.frame("_system() failed", thickness=2)) - msg.append(hprint.func_signature_to_str()) - msg.append(hprint.frame(f"cmd='{cmd}'", char1="%", thickness=1)) - msg.append(f"- rc='{rc}'") - msg.append(f"- output='\n{output_error}'") - # Save the output in a file. - file_name = "tmp.system_output.txt" - with open(file_name, "w") as f: - f.write(output) - msg.append(f"- Output saved in '{file_name}'") - # Save the command in an executable file. - file_name = "tmp.system_cmd.sh" - msg.append(f"- Command saved in '{file_name}'") - with open(file_name, "w") as f: - f.write(cmd) - os.chmod(file_name, 0o755) - # - msg = "\n".join(msg) - raise RuntimeError(msg) - # hdbg.dassert_type_in(output, (str, )) - return rc, output - - -# pylint: disable=too-many-arguments -def system( - cmd: str, - *, - print_command: bool = False, - abort_on_error: bool = True, - suppress_error: Optional[Any] = None, - suppress_output: Union[str, bool] = "ON_DEBUG_LEVEL", - blocking: bool = True, - wrapper: Optional[Any] = None, - output_file: Optional[Any] = None, - num_error_lines: Optional[int] = None, - tee: bool = False, - dry_run: bool = False, - log_level: Union[int, str] = logging.DEBUG, -) -> int: - """ - Execute a shell command, without capturing its output. - - :param cmd: string with command to execute - :param print_command: whether to print the command using `_LOG.info()` - :param abort_on_error: whether we should assert in case of error or not - :param suppress_error: set of error codes to suppress - :param suppress_output: whether to print the output or not - - If "ON_DEBUG_LEVEL" then print the output if the log level is DEBUG - :param blocking: blocking system call or not - :param wrapper: another command to prepend the execution of cmd - :param output_file: redirect stdout and stderr to this file - :param num_error_lines: number of lines of the output to display when - raising `RuntimeError` - :param tee: if True, tee append (i.e., `tee -a`) stdout and stderr to - `output_file` - :param dry_run: print the final command but not execute it - :param log_level: print the command to execute at level "log_level". - - If `echo` then print the command line to screen as `print()` and not - logging - :return: - - return code as int - - output of the command as str - """ - # print("cmd=", cmd) - # print("suppress_output=", suppress_output) - cmd = hprint.dedent(cmd) - rc, _ = _system( - cmd, - print_command=print_command, - abort_on_error=abort_on_error, - suppress_error=suppress_error, - suppress_output=suppress_output, - blocking=blocking, - wrapper=wrapper, - output_file=output_file, - num_error_lines=num_error_lines, - tee=tee, - dry_run=dry_run, - log_level=log_level, - ) - return rc - - -# def _system_to_string(cmd): -# py_ver = sys.version_info[0] -# if py_ver == 2: -# txt = subprocess.check_output(cmd) -# elif py_ver == 3: -# txt = subprocess.getoutput(cmd) -# else: -# raise RuntimeError("Invalid py_ver=" + py_ver) -# txt = [f for f in txt.split("\n") if f] -# hdbg.dassert_eq(len(txt), 1) -# return txt[0] - - -def system_to_string( - cmd: str, - *, - print_command: bool = False, - abort_on_error: bool = True, - suppress_output: Union[bool, str] = "ON_DEBUG_LEVEL", - wrapper: Optional[Any] = None, - dry_run: bool = False, - log_level: Union[int, str] = logging.DEBUG, -) -> Tuple[int, str]: - """ - Execute a shell command and capture its output. - - See _system() for options. - """ - rc, output = _system( - cmd, - print_command=print_command, - abort_on_error=abort_on_error, - suppress_error=None, - suppress_output=suppress_output, - # If we want to see the output the system call must be blocking. - blocking=True, - wrapper=wrapper, - output_file=None, - num_error_lines=None, - tee=False, - dry_run=dry_run, - log_level=log_level, - ) - output = output.rstrip("\n") - return rc, output - - -# ############################################################################# -# system_to_one_line() -# ############################################################################# - - -def get_first_line(output: str) -> str: - """ - Return the first (and only) line from a string. - - This is used when calling system_to_string() and expecting a single - line output. - """ - output = hprint.remove_empty_lines(output) - output_as_arr: List[str] = output.split("\n") - # Remove the annoying spurious matches under `tmp.base`. - output_as_arr = [line for line in output_as_arr if "/tmp.base/" not in line] - hdbg.dassert_eq(len(output_as_arr), 1, "output='%s'", output) - output = output_as_arr[0] - output = output.rstrip().lstrip() - return output - - -# TODO(gp): Move it to a more general file, e.g., `helpers/printing.py`? -def text_to_list(txt: str) -> List[str]: - """ - Convert a string (e.g., from system_to_string) into a list of lines. - """ - res = [line.rstrip().lstrip() for line in txt.split("\n")] - res = [line for line in res if line != ""] - return res - - -def system_to_one_line(cmd: str, *args: Any, **kwargs: Any) -> Tuple[int, str]: - """ - Execute a shell command, capturing its output (expected to be a single - line). - - This is a thin wrapper around system_to_string(). - """ - rc, output = system_to_string(cmd, *args, **kwargs) - output = get_first_line(output) - return rc, output - - -# ############################################################################# -# system_to_files() -# ############################################################################# - - -def to_normal_paths(files: List[str]) -> List[str]: - files = list(map(os.path.normpath, files)) - return files - - -def to_absolute_paths(files: List[str]) -> List[str]: - files = list(map(os.path.abspath, files)) - return files - - -def _remove_files_non_present(files: List[str]) -> List[str]: - """ - Return list of files from `files` excluding the files that don't exist. - """ - files_tmp = [] - for f in files: - if os.path.exists(f): - files_tmp.append(f) - else: - _LOG.warning("File '%s' doesn't exist: skipping", f) - return files_tmp - - -def remove_dirs(files: List[str]) -> List[str]: - """ - Return list of files from `files` excluding the files that are directories. - """ - files_tmp: List[str] = [] - dirs_tmp: List[str] = [] - for file in files: - if os.path.isdir(file): - _LOG.debug("file='%s' is a dir: skipping", file) - dirs_tmp.append(file) - else: - files_tmp.append(file) - if dirs_tmp: - _LOG.warning("Removed dirs: %s", ", ".join(dirs_tmp)) - return files_tmp - - -def select_result_file_from_list( - files: List[str], mode: str, file_name: str -) -> List[str]: - """ - Select a file from a list according to various approaches encoded in - `mode`. - - :param files: list of files to select from - :param file_name: name of the file we are looking for - :param mode: - - "return_all_results": return the list of files, whatever it is - - "assert_unless_one_result": assert unless there is a single file and return - the only file. Note that we still return a list to keep the interface - simple. - """ - res: List[str] = [] - if mode == "assert_unless_one_result": - # Expect to have a single result and return that. - if len(files) == 0: - hdbg.dfatal(f"mode={mode}: didn't find file {file_name}") - elif len(files) > 1: - hdbg.dfatal( - f"mode={mode}: found multiple files:\n" + "\n".join(files) - ) - res = [files[0]] - elif mode == "return_all_results": - # Return all files. - res = files - else: - hdbg.dfatal(f"Invalid mode='{mode}'") - return res - - -def system_to_files( - cmd: str, - dir_name: Optional[str] = None, - remove_files_non_present: bool = False, - mode: str = "return_all_results", -) -> List[str]: - """ - Execute command `cmd` in `dir_name` and return the output as a list of - strings. - - :param remove_files_non_present: remove files that don't exist on - the filesystem - :param mode: like in `select_result_file_from_list()` - """ - if dir_name is None: - dir_name = "." - hdbg.dassert_dir_exists(dir_name) - cmd = f"cd {dir_name} && {cmd}" - _, output = system_to_string(cmd) - # Remove empty lines. - _LOG.debug("output=\n%s", output) - files = output.split("\n") - files = [line.rstrip().rstrip() for line in files] - files = [line for line in files if line != ""] - _LOG.debug("files=%s", " ".join(files)) - # Convert to normalized paths. - files = [os.path.join(dir_name, f) for f in files] - files: List[str] = list(map(os.path.normpath, files)) # type: ignore - _LOG.debug(hprint.to_str("files")) - # Remove non-existent files, if needed. - if remove_files_non_present: - files = _remove_files_non_present(files) - # Process output. - files = select_result_file_from_list(files, mode, cmd) - return files - - -# ############################################################################# -# Functions handling processes -# ############################################################################# - - -def get_process_pids( - keep_line: Callable[[str], bool], -) -> Tuple[List[int], List[str]]: - """ - Find all the processes corresponding to `ps ax` filtered line by line with - `keep_line()`. - - :return: list of pids and filtered output of `ps ax` - """ - cmd = "ps ax" - rc, txt = system_to_string(cmd, abort_on_error=False) - _LOG.debug("txt=\n%s", txt) - pids: List[int] = [] - txt_out: List[str] = [] - if rc == 0: - for line in txt.split("\n"): - _LOG.debug("line=%s", line) - # PID TT STAT TIME COMMAND - if "PID" in line and "TT" in line and "STAT" in line: - txt_out.append(line) - continue - keep = keep_line(line) - _LOG.debug(" keep=%s", keep) - if not keep: - continue - # > ps ax | grep 'ssh -i' | grep localhost - # 19417 ?? Ss 0:00.39 ssh -i /Users/gp/.ssh/id_rsa -f -nNT \ - # -L 19999:localhost:19999 gp@54.172.40.4 - fields = line.split() - try: - pid = int(fields[0]) - except ValueError as e: - _LOG.error( - "Can't parse fields '%s' from line '%s'", fields, line - ) - raise e - _LOG.debug("pid=%s", pid) - pids.append(pid) - txt_out.append(line) - return pids, txt_out - - -def kill_process( - get_pids: Callable[[], Tuple[List[int], str]], - timeout_in_secs: int = 5, - polltime_in_secs: float = 0.1, -) -> None: - """ - Kill all the processes returned by the function `get_pids()`. - - :param timeout_in_secs: how many seconds to wait at most before - giving up - :param polltime_in_secs: how often to check for dead processes - """ - import tqdm - - pids, txt = get_pids() - _LOG.info("Killing %d pids (%s)\n%s", len(pids), pids, "\n".join(txt)) - if not pids: - return - for pid in pids: - try: - os.kill(pid, signal.SIGKILL) - except ProcessLookupError as e: - _LOG.warning(str(e)) - # - _LOG.info("Waiting %d processes (%s) to die", len(pids), pids) - for _ in tqdm.tqdm( - range(int(timeout_in_secs / polltime_in_secs)), desc="Polling process" - ): - time.sleep(polltime_in_secs) - pids, _ = get_pids() - if not pids: - break - pids, txt = get_pids() - hdbg.dassert_eq(len(pids), 0, "Processes are still alive:%s", "\n".join(txt)) - _LOG.info("Processes dead") - - -# ############################################################################# -# User interaction -# ############################################################################# - - -def query_yes_no(question: str, *, abort_on_no: bool = True) -> bool: - """ - Ask a yes/no question via `input()` and return their answer. - - :param question: string with the question presented to the user - :param abort_on_no: exit if the user answers "no" - :return: True for "yes" or False for "no" - """ - hdbg.dassert_isinstance(question, str) - hdbg.dassert_isinstance(abort_on_no, bool) - valid = { - "yes": True, - "y": True, - # - "no": False, - "n": False, - } - prompt = " [y/n] " - while True: - sys.stdout.write(question + prompt) - choice = input().lower() - if choice in valid: - ret = valid[choice] - break - _LOG.debug("ret=%s", ret) - if abort_on_no: - if not ret: - print("You answer no: exiting") - sys.exit(-1) - return ret - - -def press_enter_to_continue(prompt: str = "") -> None: - hdbg.dassert_isinstance(prompt, str) - if not prompt: - prompt = "Press Enter to continue..." - sys.stdout.write(prompt) - _ = input() - - -# ############################################################################# -# Functions similar to Linux commands. -# ############################################################################# - - -def check_exec(tool: str) -> bool: - """ - Check if an executable can be executed. - - :return: True if the executables "tool" can be executed. - """ - suppress_output = _LOG.getEffectiveLevel() > logging.DEBUG - cmd = f"which {tool}" - abort_on_error = False - rc = system( - cmd, - abort_on_error=abort_on_error, - suppress_output=suppress_output, - log_level=logging.DEBUG, - ) - return rc == 0 - - -def to_pbcopy(txt: str, pbcopy: bool) -> None: - """ - Save the content of txt in the system clipboard. - """ - txt = txt.rstrip("\n") - if not pbcopy: - print(txt) - return - if not txt: - print("Nothing to copy") - return - if hserver.is_host_mac(): - # -n = no new line - cmd = f"echo -n '{txt}' | pbcopy" - system(cmd) - _LOG.warning("\n# Copied to system clipboard:\n%s", txt) - else: - _LOG.warning("pbcopy works only on macOS") - print(txt) - - -# ############################################################################# - -# Copied from hgit to avoid import cycles. - - -def _find_git_root(path: str = ".") -> str: - """ - Find recursively the dir of the outermost super module. - - This function traverses the directory hierarchy upward from a specified - starting path to find the root directory of a Git repository. - It supports: - - standard git repository: where a `.git` directory exists at the root - - submodule: where repository is nested inside another, and the `.git` file contains - a `gitdir:` reference to the submodule's actual Git directory - - linked repositories: where the `.git` file points to a custom Git directory - location, such as in Git worktrees or relocated `.git` directories - - :param path: starting file system path. Defaults to the current directory (".") - :return: absolute path to the top-level Git repository directory - """ - path = os.path.abspath(path) - git_root_dir = None - while True: - git_dir = os.path.join(path, ".git") - _LOG.debug("git_dir=%s", git_dir) - # Check if `.git` is a directory which indicates a standard Git repository. - if os.path.isdir(git_dir): - # Found the Git root directory. - git_root_dir = path - break - # Check if `.git` is a file which indicates submodules or linked setups. - if os.path.isfile(git_dir): - # Using the `open()` to avoid import cycles with the `hio` module. - with open(git_dir, "r") as f: - txt = f.read() - lines = txt.split("\n") - for line in lines: - # Look for a `gitdir:` line that specifies the linked directory. - # Example: `gitdir: ../.git/modules/helpers_root`. - if line.startswith("gitdir:"): - git_dir_path = line.split(":", 1)[1].strip() - _LOG.debug("git_dir_path=%s", git_dir_path) - # Resolve the relative path to the absolute path of the Git directory. - abs_git_dir = os.path.abspath( - os.path.join(path, git_dir_path) - ) - # Traverse up to find the top-level `.git` directory. - while True: - # Check if the current directory is a `.git` directory. - if os.path.basename(abs_git_dir) == ".git": - git_root_dir = os.path.dirname(abs_git_dir) - # Found the root. - break - # Move one level up in the directory structure. - parent = os.path.dirname(abs_git_dir) - # Reached the filesystem root without finding the `.git` directory. - hdbg.dassert_ne( - parent, - abs_git_dir, - "Top-level .git directory not found.", - ) - # Continue traversing up. - abs_git_dir = parent - break - # Exit the loop if the Git root directory is found. - if git_root_dir is not None: - break - # Move up one level in the directory hierarchy. - parent = os.path.dirname(path) - # Reached the filesystem root without finding `.git`. - hdbg.dassert_ne( - parent, - path, - "No .git directory or file found in any parent directory.", - ) - # Update the path to the parent directory for the next iteration. - path = parent - return git_root_dir - - -# End copy. - - -def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: - """ - Find file in the repo. - """ - if root_dir is None: - root_dir = _find_git_root() - _, file_name_out = system_to_one_line( - rf"find {root_dir} -name {file_name} -not -path '*/\.git/*'" - ) - hdbg.dassert_ne(file_name_out, "", "File not found in repo: '%s'", file_name) - return file_name_out - - -# TODO(gp): Use find_file -def _find_file(filename: str, *, search_path: str = ".") -> Optional[str]: - """ - Find a file in a directory and report its absolute path. - - :param filename: the name of the file to find (e.g., "helpers_root") - :param search_path: the directory to search in (e.g., "/Users/saggese/src/helpers1") - :return: the absolute path of the file - """ - # Recursive glob. - search_path = os.path.join(search_path, "**", filename) - files = glob.glob(search_path, recursive=True) - if len(files) == 1: - return files[0] - elif len(files) > 1: - msg = f"Found multiple files with basename '{filename}' in directory '{search_path}':\n" - msg += "\n".join(files) - raise RuntimeError(msg) - else: - return None - - -# TODO(gp): -> find_path_greedily -def find_path( - path: str, *, dir_name: str = ".", abort_on_error: bool = False -) -> str: - """ - Find a path in a directory and report its absolute path. - - :param path: the path to find (e.g., "system_tools/path.py") - :param dir_name: the directory to search in (e.g., "/Users/saggese/src/helpers1") - :param abort_on_error: if True, raise an error if the path doesn't exist - :return: the absolute path of the path - """ - # Make the path absolute. - path_out = os.path.abspath(path) - # If the path exists, return it. - if os.path.exists(path_out): - return path_out - # If the path doesn't exist, abort. - if abort_on_error: - msg = f"path '{path}' doesn't exist in '{dir_name}'" - raise RuntimeError(msg) - # Look for a file with the same basename in ``dir_name``. - dir_name = os.path.abspath(dir_name) - basename = os.path.basename(path) - path_out = _find_file(basename, search_path=dir_name) - # If the file doesn't exist, abort. - if path_out is None: - msg = f"path '{path}' doesn't exist in '{dir_name}'" - raise RuntimeError(msg) - return path_out - - -# TODO(Nikola): Use filesystem's `du` and move to `hio` instead? -def du(path: str, human_format: bool = False) -> Union[int, str]: - """ - Return the size in bytes of a file or a directory (recursively). - - :param human_format: represent the size in KB, MB, ... instead of bytes - using `hintrospection.format_size()` - """ - hdbg.dassert_path_exists(path) - cmd = f"du -d 0 {path}" + " | awk '{print $1}'" - # > du -d 0 core - # 20 core - _, txt = system_to_one_line(cmd) - _LOG.debug("txt=%s", txt) - # `du` returns size in KB. - size_in_bytes = int(txt) * 1024 - size: Union[int, str] - if human_format: - size = hintros.format_size(size_in_bytes) - else: - size = size_in_bytes - return size - - -def _compute_file_signature(file_name: str, dir_depth: int) -> Optional[List]: - """ - Compute a signature for files using basename and `dir_depth` enclosing - dirs. - - :return: tuple of extracted enclosing dirs - - E.g., `("core", "dataflow_model", "utils.py")` - """ - # Split a file like: - # /app/amp/core/test/TestCheckSameConfigs.test_check_same_configs_error/output/test.txt - # into - # ['', 'app', 'amp', 'core', 'test', - # 'TestCheckSameConfigs.test_check_same_configs_error', 'output', 'test.txt'] - path = os.path.normpath(file_name) - paths = path.split(os.sep) - hdbg.dassert_lte(1, dir_depth) - if dir_depth > len(paths): - _LOG.warning( - "Can't compute signature of file_name='%s' with" - " dir_depth=%s, len(paths)=%s", - file_name, - dir_depth, - len(paths), - ) - signature = None - else: - signature = paths[-(dir_depth + 1) :] - return signature - - -# TODO(gp): -> hio.py -def find_file_with_dir( - file_name: str, - *, - root_dir: str = ".", - dir_depth: int = -1, - mode: str = "return_all_results", - candidate_files: Optional[List[str]] = None, -) -> List[str]: - """ - Find a file matching basename and several enclosing dir name starting from - `root_dir`. - - E.g., find a file matching `amp/core/dataflow_model/utils.py` with `dir_depth=1` - means looking for a file with basename 'utils.py' under a dir 'dataflow_model'. - - :param dir_depth: how many enclosing dirs in order to declare a match. - - `-1` to use as many enclosing dirs as possible. E.g., - `/app/amp/core/dataflow/utils.py` will use 3 levels, since `/app` is - removed - :param mode: control the returned list of files, like in - `select_result_file_from_list()` - :param candidate_files: list of results from the `find` command for unit test - mocking - :return: list of files found - """ - _LOG.debug(hprint.func_signature_to_str()) - # Find all the files in the dir with the same basename. - if candidate_files is None: - base_name = os.path.basename(file_name) - cmd = rf"find . -name '{base_name}' -not -path '*/\.git/*'" - # > find . -name "utils.py" - # ./amp/core/dataflow/utils.py - # ./amp/core/dataflow_model/utils.py - # ./amp/im/common/test/utils.py - mode_ = "return_all_results" - candidate_files = system_to_files(cmd, dir_name=root_dir, mode=mode_) - _LOG.debug("candidate files=\n%s", "\n".join(candidate_files)) - # - if dir_depth == -1: - # Remove "/app" if present. - prefix = "/app/" - if file_name.startswith(prefix): - file_name = file_name[len(prefix) :] - # Remove "amp" if present. - prefix = "amp/" - if file_name.startswith(prefix): - file_name = file_name[len(prefix) :] - # Count how many dirs levels there are. - dir_depth = len(os.path.normpath(file_name).split("/")) - 1 - _LOG.debug( - "inferred dir_depth=%s for file_name=%s", dir_depth, file_name - ) - # Check the matching files. - matching_files = [] - for candidate_file_name in sorted(candidate_files): - signature1 = _compute_file_signature(candidate_file_name, dir_depth) - signature2 = _compute_file_signature(file_name, dir_depth) - is_equal = signature1 == signature2 - _LOG.debug("found_file=%s -> is_equal=%s", candidate_file_name, is_equal) - if is_equal: - matching_files.append(candidate_file_name) - _LOG.debug( - "Found %d files:\n%s", len(matching_files), "\n".join(matching_files) - ) - # Select the result based on mode. - res = select_result_file_from_list(matching_files, mode, file_name) - _LOG.debug("-> res=%s", str(res)) - return res - - -# https://stackoverflow.com/questions/169070 -@contextlib.contextmanager -def cd(dir_name: str) -> Generator[None, None, None]: - """ - Context manager managing changing directory. - """ - hdbg.dassert_dir_exists(dir_name) - current_dir = os.getcwd() - _LOG.debug("Entering ctx manager: " + hprint.to_str("current_dir")) - try: - os.chdir(dir_name) - _LOG.debug("Switched to dir '%s'", os.getcwd()) - yield - finally: - _LOG.debug("Switching back to dir '%s'", current_dir) - os.chdir(current_dir) - _LOG.debug("Exiting ctx manager") - - -# ############################################################################# -# File timestamping. -# ############################################################################# - - -def has_timestamp(file_name: str) -> bool: - """ - Check whether `file_name` contains a timestamp. - - The timestamp is in the format `%Y%m%d-%H_%M_%S` (e.g., - 20210724-12_45_51). E.g., this function for - `experiment.RH1E.5T.20210724-12_45_51` returns True. - """ - file_name = os.path.basename(file_name) - # E.g., %Y%m%d-%H_%M_%S - # The separator is _, -, or nothing. - sep = "[-_]?" - regex = sep.join( - [r"\d{4}", r"\d{2}", r"\d{2}", r"\d{2}", r"\d{2}", r"\d{2}"] - ) - _LOG.debug("regex=%s", regex) - occurrences = re.findall(regex, file_name) - hdbg.dassert_lte( - len(occurrences), 1, "Found more than one timestamp", str(occurrences) - ) - m = re.search("(" + regex + ")", file_name) - has_timestamp_ = m is not None - if has_timestamp_: - m = cast(Match[str], m) - _LOG.debug("Found a timestamp '%s' in '%s'", m.group(1), file_name) - return has_timestamp_ - - -def append_timestamp_tag(file_name: str, tag: str) -> str: - """ - Add a tag and the current timestamp to a filename, before the extension. - - :return: new filename - """ - dir_name = os.path.dirname(file_name) - base_name = os.path.basename(file_name) - name, extension = os.path.splitext(base_name) - tag_ = "" - # E.g., 20210723-20_52_00 - if not has_timestamp(file_name): - import helpers.hdatetime as hdateti - - tag_ += "." + hdateti.get_current_timestamp_as_string(tz="ET") - # Add tag, if specified. - if tag: - # If the tag is specified prepend a `.` in the filename. - tag_ += "." + tag - new_file_name = os.path.join(dir_name, "".join([name, tag_, extension])) - _LOG.debug(hprint.to_str("file_name new_file_name")) - return new_file_name - - -def tee( - cmd: str, executable: str, abort_on_error: bool -) -> Tuple[int, List[str]]: - """ - Execute command and return its exit code and output lines. - - Captures output, removes empty lines, and optionally aborts on error. - - :param cmd: Command string to execute - :param executable: Executable to use for running the command - :param abort_on_error: Whether to abort execution if command fails - :return: Tuple of (exit code, list of non-empty output lines) - """ - _LOG.debug("cmd=%s executable=%s", cmd, executable) - rc, output = system_to_string(cmd, abort_on_error=abort_on_error) - hdbg.dassert_isinstance(output, str) - output1 = output.split("\n") - _LOG.debug("output1= (%d)\n'%s'", len(output1), "\n".join(output1)) - output2 = hprint.remove_empty_lines(output1) - _LOG.debug("output2= (%d)\n'%s'", len(output2), "\n".join(output2)) - hdbg.dassert_list_of_strings(output2) - return rc, output2 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py deleted file mode 100644 index 5278e3984..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py +++ /dev/null @@ -1,180 +0,0 @@ -""" -Import as: - -import helpers.htable as htable -""" - -import copy -import csv -import logging -from typing import Any, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - - -TableType = List[List[str]] - - -# ############################################################################# -# Table -# ############################################################################# - - -class Table: - """ - A simple (rectangular) table without introducing a dependency from Pandas. - - The element in the table can be anything. - """ - - @staticmethod - def _check_table(table: TableType, column_names: List[str]) -> None: - """ - Check that the table is well-formed (e.g., the list of lists is - rectangular). - """ - hdbg.dassert_isinstance(table, list) - hdbg.dassert_isinstance(column_names, list) - hdbg.dassert_no_duplicates(column_names) - # Columns have no leading or trailing spaces. - for column_name in column_names: - hdbg.dassert_eq(column_name, column_name.rstrip().lstrip()) - # Check that the list of lists is rectangular. - for row in table: - hdbg.dassert_isinstance(table, list) - hdbg.dassert_eq( - len(row), - len(column_names), - "Invalid row='%s' for cols='%s'", - row, - column_names, - ) - - def __repr__(self) -> str: - res = "" - res += f"cols={str(self._column_names)}" - res += "\ntable=\n" + "\n".join(map(str, self._table)) - res += "\n" + f"size={str(self.size())}" - return res - - def __init__(self, table: TableType, column_names: List[str]) -> None: - # Check that the inputs are well-formed. - self._check_table(table, column_names) - # Save state. - self._table = table - self._column_names = column_names - _LOG.debug("%s", self.__repr__()) - # Map a column name to the index of the corresponding column, to allow - # indexing by column. - self._col_to_idx = { - col: idx for idx, col in enumerate(self._column_names) - } - _LOG.debug("col_to_idx=%s", str(self._col_to_idx)) - - @classmethod - def from_text(cls, cols: List[str], txt: str, delimiter: str) -> "Table": - """ - Build a table from a list of columns and the body of a CSV file. - """ - hdbg.dassert_isinstance(txt, str) - table = list(csv.reader(txt.split("\n"), delimiter=delimiter)) - return cls(table, cols) - - def size(self) -> Tuple[int, int]: - """ - Return the size of the table. - - :return: number of rows x columns (i.e., numpy / Pandas convention) - """ - return len(self._table), len(self._column_names) - - def filter_rows(self, column_name: str, value: str) -> "Table": - """ - Return a Table filtered with rows filtered by the criteria "field == - value". - """ - _LOG.debug("self=\n%s", repr(self)) - # Filter the rows. - hdbg.dassert_in(column_name, self._col_to_idx.keys()) - rows_filter = [ - row - for row in self._table - if row[self._col_to_idx[column_name]] == value - ] - _LOG.debug(hprint.to_str("rows_filter")) - # Build the resulting table. - table_filter = Table(rows_filter, self._column_names) - _LOG.debug("table_filter=\n%s", repr(table_filter)) - return table_filter - - def get_column(self, column_name: str) -> List[Any]: - """ - Return the list of unique values for a row / field. - """ - hdbg.dassert_in(column_name, self._column_names) - column_idx = self._col_to_idx[column_name] - # Scan the rows to extract the column. - vals = [] - for row in self._table: - vals.append(row[column_idx]) - return vals - - def unique(self, column_name: str) -> List[Any]: - """ - Return a list of unique values for a field. - """ - vals = self.get_column(column_name) - vals = sorted(list(set(vals))) - return vals - - def remove_column(self, column_name: str) -> "Table": - """ - Return a new Table with the specified column removed. - - :param column_name: name of the column to remove - :return: new Table without the specified column - """ - hdbg.dassert_in(column_name, self._column_names) - # Find the index of the column to remove. - column_idx = self._col_to_idx[column_name] - # Create new column names list without the removed column. - new_column_names = [ - col for col in self._column_names if col != column_name - ] - # Create new table rows without the removed column. - new_table = [ - [val for idx, val in enumerate(row) if idx != column_idx] - for row in self._table - ] - # Build and return the new table. - return Table(new_table, new_column_names) - - def __str__(self) -> str: - """ - Return a string representing the table with columns aligned. - """ - table = copy.deepcopy(self._table) - table.insert(0, self._column_names) - # Convert the cells to strings. - table_as_str = [[str(cell) for cell in row] for row in table] - # Find the length of each columns. - lengths = [max(map(len, col)) for col in zip(*table_as_str)] - _LOG.debug(hprint.to_str("lengths")) - # Compute format for the columns. - fmt = " ".join(f"{{:{x}}} |" for x in lengths) - _LOG.debug(hprint.to_str("fmt")) - # Add the row separating the column names. - row_sep = ["-" * length for length in lengths] - table.insert(1, row_sep) - table_as_str = [[str(cell) for cell in row] for row in table] - # Format rows. - rows_as_str = [fmt.format(*row) for row in table_as_str] - # Remove trailing spaces. - rows_as_str = [row.rstrip() for row in rows_as_str] - # Create string. - res = "\n".join(rows_as_str) - # res += "\nsize=" + str(self.size()) - return res diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py deleted file mode 100644 index 8ef0e3a4f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python - -""" -Add a description of what the script does and examples of command lines. - -Check dev_scripts/linter.py to see an example of a script using this -template. - -Import as: - -import dev_scripts_helpers.script_template as dscscske -""" - -import argparse -import logging - -import helpers.hlogging as hloggin -import helpers.hparser as hparser - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -def _parse() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument("positional", nargs="*", help="...") - parser.add_argument("--dst_dir", action="store", help="Destination dir") - hparser.add_verbosity_arg(parser) - return parser - - -def _main(parser: argparse.ArgumentParser) -> None: - args = parser.parse_args() - hparser.parse_verbosity_args(args, use_exec_path=True) - hloggin.test_logger() - # - # logging.disable(logging.WARNING) - hloggin.shut_up_log_debug(_LOG) - hloggin.test_logger() - - -if __name__ == "__main__": - _main(_parse()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py deleted file mode 100644 index 7b6506ce6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py +++ /dev/null @@ -1,262 +0,0 @@ -""" -Utilities for protecting content during text processing. - -Extract and restore content that should not be modified by formatters and text -transformations (code blocks, comments, etc.). - -Import as: - -import helpers.htext_protect as htexprot -""" - -import logging -import re -from typing import Dict, List, Optional, Tuple - -import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Helper functions -# ############################################################################# - - -def _is_fenced_block_delimiter(line: str) -> bool: - """ - Check if line is a fenced block delimiter (```). - - :param line: Line to check - :return: True if line matches fenced block delimiter pattern - """ - return bool(re.match(r"^\s*```", line)) - - -def _is_math_block_delimiter(line: str) -> bool: - """ - Check if line is a math block delimiter ($$). - - :param line: Line to check - :return: True if line matches math block delimiter pattern - """ - return bool(re.match(r"^\s*\$\$\s*$", line)) - - -def _extract_single_line_html_comment(line: str) -> Optional[str]: - """ - Extract single-line HTML comment from line if present. - - Skips TOC markers ( and ) as they need to be - processed by the TOC generation logic. - - :param line: Line to check - :return: Full comment string if found, None otherwise - """ - # Skip TOC markers: they are processed by `refresh_toc`. - if "" in line or "" in line: - return None - # Match on single line. - m = re.match(r"^(\s*\s*)$", line) - if m: - return m.group(1) - return None - - -def _is_html_comment_start(line: str) -> bool: - """ - Check if line starts an HTML comment. - - Skips TOC markers as they need to be processed by TOC generation logic. - - :param line: Line to check - :return: True if line contains - """ - # Skip TOC markers. - if "" in line or "" in line: - return False - return "" not in line - - -def _is_html_comment_end(line: str) -> bool: - """ - Check if line ends an HTML comment. - - :param line: Line to check - :return: True if line contains --> without opening " in line and ") for .md and .txt files - - LaTeX comments (% ...) for .tex files - - :param lines: The lines to be processed - :param file_type: File extension ('md', 'txt', or 'tex') - :return: Tuple of (lines with placeholders, mapping of placeholders to - original content) - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_in(file_type, ["md", "txt", "tex"]) - _LOG.debug("Extracting protected content for file_type=%s", file_type) - # - protected_map: Dict[str, str] = {} - counter = 1 - lines_new: List[str] = [] - # State tracking. - in_fenced_block = False - in_math_block = False - in_html_comment = False - fenced_block_lines: List[str] = [] - math_block_lines: List[str] = [] - html_comment_lines: List[str] = [] - # Process each line. - for line in lines: - # Handle fenced blocks (for .md and .txt files). - if file_type in ["md", "txt"] and _is_fenced_block_delimiter(line): - if not in_fenced_block: - # Opening delimiter. - in_fenced_block = True - lines_new.append(line) - fenced_block_lines = [] - else: - # Closing delimiter: protect only content, keep delimiters visible. - placeholder = f"<<>>" - protected_map[placeholder] = "\n".join(fenced_block_lines) - counter += 1 - lines_new.append(placeholder) - lines_new.append(line) - in_fenced_block = False - fenced_block_lines = [] - continue - # Inside fenced block: accumulate. - if in_fenced_block: - fenced_block_lines.append(line) - continue - # Handle math blocks (for all file types). - if _is_math_block_delimiter(line): - if not in_math_block: - # Opening delimiter. - in_math_block = True - lines_new.append(line) - math_block_lines = [] - else: - # Closing delimiter: protect only content, keep delimiters visible. - placeholder = f"<<>>" - protected_map[placeholder] = "\n".join(math_block_lines) - counter += 1 - lines_new.append(placeholder) - lines_new.append(line) - in_math_block = False - math_block_lines = [] - continue - # Inside math block: accumulate. - if in_math_block: - math_block_lines.append(line) - continue - # Handle HTML comments (for .md and .txt files). - if file_type in ["md", "txt"]: - # Single-line HTML comment. - single_line_comment = _extract_single_line_html_comment(line) - if single_line_comment: - placeholder = f"<<>>" - protected_map[placeholder] = single_line_comment - counter += 1 - lines_new.append(placeholder) - continue - # Multi-line HTML comment start. - if _is_html_comment_start(line): - in_html_comment = True - html_comment_lines = [line] - continue - # Multi-line HTML comment end. - if in_html_comment and _is_html_comment_end(line): - html_comment_lines.append(line) - placeholder = f"<<>>" - protected_map[placeholder] = "\n".join(html_comment_lines) - counter += 1 - lines_new.append(placeholder) - in_html_comment = False - html_comment_lines = [] - continue - # Inside multi-line HTML comment: accumulate. - if in_html_comment: - html_comment_lines.append(line) - continue - # Handle LaTeX comments (for .tex files). - if file_type == "tex" and _is_latex_comment(line): - placeholder = f"<<>>" - protected_map[placeholder] = line - counter += 1 - lines_new.append(placeholder) - continue - # Regular line: keep as-is. - lines_new.append(line) - # Check for unclosed blocks. - if in_fenced_block: - _LOG.warning("Unclosed fenced block detected") - if in_math_block: - _LOG.warning("Unclosed math block detected") - if in_html_comment: - _LOG.warning("Unclosed HTML comment detected") - _LOG.debug("Extracted %d protected content blocks", len(protected_map)) - return lines_new, protected_map - - -def restore_protected_content( - lines: List[str], - protected_map: Dict[str, str], -) -> List[str]: - """ - Restore protected content by replacing placeholders with original text. - - :param lines: Lines containing placeholders - :param protected_map: Mapping of placeholders to original content - :return: Lines with restored content - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_isinstance(protected_map, dict) - _LOG.debug("Restoring %d protected content blocks", len(protected_map)) - # - lines_new: List[str] = [] - for line in lines: - # Check if line contains any placeholder. - restored = False - for placeholder, original in protected_map.items(): - if placeholder in line: - if line.strip() == placeholder: - # Placeholder is entire line: replace with multi-line content. - lines_new.extend(original.split("\n")) - restored = True - break - else: - # Placeholder embedded in line: replace inline. - line = line.replace(placeholder, original) - if not restored: - lines_new.append(line) - return lines_new diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py deleted file mode 100644 index 31cd642cf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python -""" -`timeout` decorator which is used to limit function execution time. - -Import as: - -import helpers.hthreading as hthread -""" - -import _thread -import sys -import threading -from typing import Any - - -def _timeout_handler() -> None: - sys.stderr.flush() - # Raise KeyboardInterrupt. - _thread.interrupt_main() - - -def timeout(timeout_sec: int) -> Any: - """ - Exit process if its execution takes longer than timeout_sec seconds. This - is a decorator that issue a KeyboardInterrupt, that will be raised if time - limit is exceed. - - :param timeout_sec: time limit - """ - - def outer(fn: Any) -> Any: - def inner(*args: Any, **kwargs: Any) -> Any: - timer = threading.Timer(timeout_sec, _timeout_handler) - timer.start() - try: - result = fn(*args, **kwargs) - finally: - timer.cancel() - return result - - return inner - - return outer diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py deleted file mode 100644 index c3aed5e80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py +++ /dev/null @@ -1,275 +0,0 @@ -""" -Import as: - -import helpers.htimer as htimer -""" - -import logging -import time -from typing import Any, Callable, Optional, Tuple, cast - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin - -# Avoid dependency from other `helpers` modules to prevent import cycles. - - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Timer -# ############################################################################# - - -class Timer: - """ - Measure time elapsed in one or more intervals. - """ - - def __init__(self, *, start_on_creation: bool = True): - """ - Create a timer. - - If "start_on_creation" is True start automatically the timer. - """ - self._stop: Optional[float] = None - # Store the time for the last elapsed interval. - self._last_elapsed: Optional[float] = None - # Store the total time for all the measured intervals. - self._total_elapsed = 0.0 - if start_on_creation: - # For better accuracy start the timer as last action, after all the - # bookkeeping. - self._start: Optional[float] = time.time() - else: - self._start = None - - def stop(self) -> None: - """ - Stop the timer and accumulate the interval. - """ - # Timer must have not been stopped before. - hdbg.dassert(self.is_started() and not self.is_stopped()) - # For better accuracy stop the timer as first action. - self._stop = time.time() - # Update the total elapsed time. - # Sometimes we get numerical error tripping this assertion - # (e.g., '1619552498.813126' <= '1619552498.805193') so we give - # a little slack to the assertion. - # hdbg.dassert_lte(self._start, self._stop + 1e-2) - self._last_elapsed = cast(float, self._stop) - cast(float, self._start) - self._total_elapsed += self._last_elapsed - # Stop. - self._start = None - self._stop = None - - def get_elapsed(self) -> float: - """ - Stop if not stopped already, and return the elapsed time. - """ - if not self.is_stopped(): - self.stop() - hdbg.dassert_is_not(self._last_elapsed, None) - return cast(float, self._last_elapsed) - - # ///////////////////////////////////////////////////////////////////////// - - def resume(self) -> None: - """ - Resume the timer after a stop. - """ - # Timer must have been stopped before. - hdbg.dassert(self.is_started() or self.is_stopped()) - self._stop = None - # Start last for better accuracy. - self._start = time.time() - - def is_started(self) -> bool: - return ( - self._start is not None and self._start >= 0 and self._stop is None - ) - - def is_stopped(self) -> bool: - return self._start is None and self._stop is None - - def get_total_elapsed(self) -> float: - """ - Stop if not stopped already, and return the total elapsed time. - """ - if not self.is_stopped(): - self.stop() - return self._total_elapsed - - def accumulate(self, *, timer: "Timer") -> None: - """ - Accumulate the value of a timer to the current object. - """ - # Both timers must be stopped. - hdbg.dassert(timer.is_stopped()) - hdbg.dassert(self.is_stopped()) - hdbg.dassert_lte(0.0, timer.get_total_elapsed()) - self._total_elapsed += timer.get_total_elapsed() - - def __repr__(self) -> str: - """ - Return string with the intervals measured so far. - """ - measured_time = self._total_elapsed - if self.is_started() and not self.is_stopped(): - # Timer still running. - measured_time += time.time() - cast(float, self._start) - ret = "%.3f secs" % measured_time - return ret - - -# ############################################################################# - - -_TimerMemento = Tuple[int, str, Timer] - - -def dtimer_start(log_level: int, message: str) -> _TimerMemento: - """ - Start measuring time. - - :return: memento of the timer. - """ - _LOG.log(log_level, "%s ...", message) - memento = log_level, message, Timer() - return memento - - -def dtimer_stop(memento: _TimerMemento) -> Tuple[str, float]: - """ - End measuring time. - - :return: - - message as as string - - time in seconds (int) - """ - log_level, message, timer = memento - timer.stop() - elapsed_time = round(timer.get_elapsed(), 3) - msg = f"{message} done (%.3f s)" % elapsed_time - _LOG.log(log_level, msg) - return msg, elapsed_time - - -# TODO(gp): Is this useful / used? -def stop_timer(timer: Timer) -> str: - timer.stop() - elapsed_time = round(timer.get_elapsed(), 3) - msg = "%.3f s" % elapsed_time - return msg - - -# ############################################################################# -# TimedScope -# ############################################################################# - - -class TimedScope: - """ - Measure the execution time of a block of code. - - ``` - with htimer.TimedScope(logging.INFO, "Work") as ts: - ... work work work ... - ``` - """ - - def __init__( - self, log_level: int, message: str, *, profile_memory: bool = False - ): - self._log_level = log_level - self._message = message - # TODO(gp): Implement profiling also memory using dmemory_start/end. - # State. - self._memento: Optional[_TimerMemento] = None - self.elapsed_time = None - - def get_result(self) -> str: - msg: str = f"{self._message} done (%.3f s)" % self.elapsed_time - return msg - - def __enter__(self) -> "TimedScope": - self._memento = dtimer_start(self._log_level, self._message) - return self - - def __exit__(self, *args: Any) -> None: - if self._memento is not None: - msg, self.elapsed_time = dtimer_stop(self._memento) - _ = msg - - -# ############################################################################# -# Decorator. -# ############################################################################# - - -def timed(f: Callable) -> Callable: - """ - Add a timer around the invocation of a function. - """ - - def wrapper(*args: Any, **kwargs: Any) -> Any: - func_name = getattr(f, "__name__", "unknown_function") - # - timer = dtimer_start(0, func_name) - v = f(*args, **kwargs) - dtimer_stop(timer) - return v - - return wrapper - - -# TODO(gp): Add an object that accumulates the times from multiple timers. -# E.g., use a dict for message -> time - - -# ############################################################################# - - -_MemoryMemento = Tuple[int, str, hloggin.MemoryUsage] - - -def dmemory_start(log_level: int, message: str) -> _MemoryMemento: - """ - Start measuring memory. - - :return: memento of the memory profile - """ - _LOG.log(log_level, "%s ...", message) - memory_usage = hloggin.get_memory_usage() - memento = (log_level, message, memory_usage) - return memento - - -def dmemory_stop(memento: _MemoryMemento, *, mode: str = "all") -> str: - """ - Stop measuring memory. - - :return: message as as string - """ - log_level, message, start_memory_usage = memento - end_memory_usage = hloggin.get_memory_usage() - verbose = False - start_mem = hloggin.memory_to_str(start_memory_usage, verbose=verbose) - end_mem = hloggin.memory_to_str(end_memory_usage, verbose=verbose) - diff_mem = tuple(x - y for x, y in zip(end_memory_usage, start_memory_usage)) - diff_mem = hloggin.memory_to_str(diff_mem, verbose=verbose) - # Package the output. - msg = [] - msg.append(f"{message} done:") - if mode == "all": - msg.append(f"start=({start_mem})") - msg.append(f"end=({end_mem})") - msg.append(f"diff=({diff_mem})") - elif mode == "only_diff": - msg.append(f"diff=({diff_mem})") - else: - raise ValueError(f"Invalid mode='{mode}'") - msg = " ".join(msg) - _LOG.log(log_level, msg) - return msg diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py deleted file mode 100644 index bb16ad381..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -Import as: - -import helpers.htqdm as htqdm -""" - -import io -import logging -from typing import Any, Optional - -# Avoid dependency from other `helpers` modules, such as `helpers.hjoblib`, to -# prevent import cycles. - - -# ############################################################################# -# TqdmToLogger -# ############################################################################# - - -# From https://github.com/tqdm/tqdm/issues/313 -class TqdmToLogger(io.StringIO): - """ - Output stream for `tqdm` which will output to logger module instead of the - `stdout`. - - Use as: - ``` - from tqdm.autonotebook import tqdm - - tqdm_out = TqdmToLogger(_LOG, level=logging.INFO) - for ... tqdm(..., file=tqdm_out): - ``` - """ - - logger = None - level = None - buf = "" - - def __init__(self, logger: Any, level: Optional[int] = None): - super().__init__() - self.logger = logger - self.level = level or logging.INFO - - def write(self, buf: str) -> None: - self.buf = buf.strip("\r\n\t ") - - def flush(self) -> None: - self.logger.log(self.level, self.buf) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py deleted file mode 100644 index 03de65ce1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py +++ /dev/null @@ -1,228 +0,0 @@ -""" -Import as: - -import helpers.htraceback as htraceb -""" - -import logging -import os -import re -from typing import Any, List, Match, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hgit as hgit - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): Move some code to `hcfile.py`. - -# Store elements parsed from a line of a traceback: -# (file_name, line_num, text) -# E.g., -# ("test/test_lib_tasks.py", -# 27, -# "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)" -# ) -CfileRow = Tuple[str, int, str] - - -def cfile_row_to_str(cfile_row: CfileRow) -> str: - # helpers/git.py:295:def get_repo_long_name_from_client(super_module - hdbg.dassert_isinstance(cfile_row, tuple) - return ":".join(list(map(str, cfile_row))) - - -def cfile_to_str(cfile: List[CfileRow]) -> str: - hdbg.dassert_isinstance(cfile, list) - return "\n".join(map(cfile_row_to_str, cfile)) - - -def parse_traceback( - txt: str, *, purify_from_client: bool = True -) -> Tuple[List[CfileRow], Optional[str]]: - """ - Parse a string containing text including a Python traceback. - - :param txt: the text to parse - :param purify_from_client: express the files with respect to the Git root - :return: - - a list of `CFILE_ROW`, e.g., - ``` - ("test/test_lib_tasks.py", - 27, - "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)") - - a string storing the traceback, like: - ``` - Traceback (most recent call last): - File "/app/amp/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "/app/amp/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": - NameError: name 'repo_short_name' is not defined - ``` - - A `None` value means that no traceback was found. - """ - # TODO(gp): Horrible hack to get the tests to pass. IMO this whole function - # needs to be rewritten using a proper parser or library. Now it's full - # of weird handling of edge cases. - txt += "\n" - # - lines = txt.split("\n") - # pylint: disable=line-too-long - # Remove the artifacts of a GH run. E.g., - # "Run_fast_tests Run fast tests 2022-02-19T16:53:07.0945561Z NameError: name 'cofinanc' is not defined" -> - # -> "NameError: name 'cofinanc' is not defined". - lines = [ - re.split( - r"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+Z ", - line, - )[-1] - for line in lines - ] - state = "look_for" - cfile: List[CfileRow] = [] - i = 0 - start_idx = end_idx = 0 - while i < len(lines): - line = lines[i] - _LOG.debug("state=%-10s i=%d: line='%s'", state, i, line) - if state == "look_for": - if line.startswith("Traceback (most recent call last):"): - start_idx = i - # Update the state. - state = "parse" - i += 1 - continue - elif state == "parse": - # The file looks like: - # File "/app/amp/test/test_lib_tasks.py", line 27, in test_get_gh - # actual = ltasks._get_gh_issue_title(issue_id, repo) - regex = r"^\s*File \"(.+)\", line (\d+), in (\S+)$" - m = re.match(regex, line) - hdbg.dassert(m, "Can't parse '%s'", line) - m: Match[Any] - file_name = m.group(1) - line_num = int(m.group(2)) - func_name = m.group(3) - _LOG.debug(" -> %s %d %s", file_name, line_num, func_name) - # - # Parse the next line until the next `File...`. - _LOG.debug("Search end of snippet") - j = i + 1 - hdbg.dassert_lte(j, len(lines)) - while j < len(lines): - _LOG.debug(" j=%d: line='%s'", j, lines[j]) - if lines[j].startswith(' File "') or not lines[j].startswith( - " " - ): - _LOG.debug(" Found end of snippet") - break - j += 1 - # Concatenate the lines into a single line. - code = lines[i + 1 : j] - _LOG.debug(" -> code: [%d, %d]\n%s", i, j, "\n".join(code)) - code = map(lambda x: x.rstrip().lstrip(), code) - code_as_single_line = "/".join(code) - _LOG.debug(" -> code_as_single_line=\n%s", code_as_single_line) - # Assemble the result. - file_name = os.path.normpath(file_name) - cfile_row = ( - file_name, - line_num, - func_name + ":" + code_as_single_line, - ) - _LOG.debug(" => cfile_row='%s'", cfile_row_to_str(cfile_row)) - cfile.append(cfile_row) - # Update the state. - if not lines[j].startswith(" "): - _LOG.debug(" Found end of traceback") - end_idx = j - state = "end" - break - state = "parse" - i = j - continue - # - i += 1 - # - if state == "look_for": - # We didn't find a traceback. - cfile = [] - traceback = None - elif state == "end": - if ( - end_idx < len(lines) - 1 - and "Error:" not in lines[end_idx - 1] - and "Error:" in lines[end_idx] - ): - # Extend the traceback to the lines with the error description. - # E.g., for the snippet below: - # ``` - # if repo_short_name == "amp": - # NameError: name 'repo_short_name' is not defined - # ``` - # If the parsed traceback stops at 'if repo_short_name == "amp":', - # and thus, its last line does not include the error description - # ("NameError:..."), and the following line does include the error - # description, then the traceback will be extended to include the - # following line, making the parsed traceback end with the following - # two lines: - # ``` - # if repo_short_name == "amp": - # NameError: name 'repo_short_name' is not defined - # ``` - to_break = False - while end_idx < len(lines) - 1 and not to_break: - end_idx += 1 - line = lines[end_idx] - _LOG.debug( - "Extend traceback: to_break=%s, end_idx=%s, line='%s'", - to_break, - end_idx, - line, - ) - if ( - "________ Test" in line - or "====== slowest 3 durations" in line - ): - # Stop if we have reached the next traceback or the end of the - # pytest report. - to_break = True - hdbg.dassert_lte(0, start_idx) - hdbg.dassert_lte(start_idx, end_idx) - hdbg.dassert_lt(end_idx, len(lines)) - _LOG.debug("start_idx=%d end_idx=%d", start_idx, end_idx) - traceback = "\n".join(lines[start_idx:end_idx]) - else: - raise ValueError(f"Invalid state='{state}'") - _LOG.debug("traceback=\n%s", traceback) - _LOG.debug("cfile=\n%s", cfile_to_str(cfile)) - # Purify filenames from client so that refer to files in this client. - if cfile and purify_from_client: - _LOG.debug("# Purifying from client") - cfile_tmp = [] - for cfile_row in cfile: - file_name, line_num, text = cfile_row - # Leave the files relative to the current dir. - root_dir = hgit.get_client_root(super_module=False) - mode = "return_all_results" - file_names = hgit.find_docker_file( - file_name, root_dir=root_dir, mode=mode - ) - if len(file_names) == 0: - _LOG.warning("Can't find file corresponding to '%s'", file_name) - elif len(file_names) > 1: - _LOG.warning( - "Found multiple potential files corresponding to '%s'", - file_name, - ) - else: - file_name = file_names[0] - cfile_tmp.append((file_name, line_num, text)) - cfile = cfile_tmp - _LOG.debug("# After purifying from client") - _LOG.debug("cfile=\n%s", cfile_to_str(cfile)) - return cfile, traceback diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py deleted file mode 100644 index d706292ed..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env python - -""" -Allow translating text using AWS Translate. It can be used as a module or CLI -tool. - -Supported languages and languages codes: -https://docs.aws.amazon.com/translate/latest/dg/what-is.html - -Import as: - -import helpers.htranslate as htransl -""" - -import argparse -import configparser -import logging -import pathlib -import sys -from typing import Optional, Tuple - -import boto3 - -_LOG = logging.getLogger(__name__) - - -def _parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument( - "lang", - help=( - "source language code. " - "https://docs.aws.amazon.com/translate/latest/dg/what-is.html" - ), - ) - parser.add_argument("text", help="string to translate") - parser.add_argument( - "--aws", - type=pathlib.Path, - dest="credentials", - default=pathlib.Path().home() / ".aws/credentials", - help="Path to the aws credentials file.", - ) - return parser.parse_args() - - -def _load_credentials(conf_path: pathlib.Path) -> Tuple[str, str]: - """ - Load aws credentilas from config file. - - :param conf_path:credentials file path. - :return: A tuple consist of aws_access and aws_secret keys. - """ - config = configparser.ConfigParser() - config.read(conf_path) - try: - access = config.get("default", "aws_access_key_id") - secret = config.get("default", "aws_secret_access_key") - except configparser.NoOptionError as err: - _LOG.error("Unable to read option for: %s", err.args) - sys.exit(1) - else: - return access, secret - - -# ############################################################################# -# TranslateAPI -# ############################################################################# - - -class TranslateAPI: - def __init__( - self, - aws_access_key: str, - aws_secret_key: str, - region: Optional[str] = "us-east-2", - ) -> None: - self._translate = boto3.client( - service_name="translate", - region_name=region, - use_ssl=True, - aws_access_key_id=aws_access_key, - aws_secret_access_key=aws_secret_key, - ) - - def translate_text(self, text: str, lang_code: str) -> str: - """ - Translate given text into English. Amazon has a limit on text size: - 5,000 bytes. - - :param text: Foreing language text. - :param lang_code: Language code in accordance with supported - languages and code of Amazon. - :return: English text. - """ - tr = self._translate.translate_text( - Text=text, SourceLanguageCode=lang_code, TargetLanguageCode="en" - ) - return str(tr.get("TranslatedText")) - - -if __name__ == "__main__": - args = _parse_args() - aws_access, aws_secret = _load_credentials(args.credentials) - api = TranslateAPI(aws_access, aws_secret) - result = api.translate_text(args.text, args.lang) - print(result) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py deleted file mode 100644 index 1bb3472d7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -Contain general types based on standard Python libraries. - -Import as: - -import helpers.htypes as htypes -""" - -from typing import Any, Dict - -Kwargs = Dict[str, Any] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py deleted file mode 100644 index d585faeef..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py +++ /dev/null @@ -1,1876 +0,0 @@ -""" -Enhanced unit testing framework built on top of unittest and pytest. - -This module provides: -- TestCase base class with golden file testing capabilities -- Utilities for comparing strings, dataframes, and other outputs -- Test outcome management with update and incremental modes -- Directory management for input, output, and scratch space -- Integration with Git for managing test outcomes - -Import as: - -import helpers.hunit_test as hunitest -""" - -import abc -import collections -import inspect -import logging -import os -import pprint -import random -import re -import sys -import traceback -import unittest -from typing import Any, Dict, List, Mapping, Optional, Tuple - -import pytest - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.htimer as htimer -import helpers.hunit_test_purification as huntepur -import helpers.hwall_clock_time as hwacltim -import helpers.repo_config_utils as hrecouti - -# We use strings as type hints (e.g., 'pd.DataFrame') since we are not sure -# we have the corresponding libraries installed. - - -# Minimize dependencies from installed packages. - -# TODO(gp): Use `hprint.color_highlight`. -_WARNING = "\033[33mWARNING\033[0m" - -try: - import numpy as np - - _HAS_NUMPY = True -except ImportError as e: - print(_WARNING + ": " + str(e)) - _HAS_NUMPY = False -try: - import pandas as pd - - _HAS_PANDAS = True -except ImportError as e: - print(_WARNING + ": " + str(e)) - _HAS_PANDAS = False - -try: - import matplotlib.pyplot as plt - - _HAS_MATPLOTLIB = True -except ImportError as e: - print(_WARNING + ": " + str(e)) - _HAS_MATPLOTLIB = False - - -_LOG = logging.getLogger(__name__) - -# Mute this module unless we want to debug it. -_LOG.setLevel(logging.INFO) - -# ############################################################################# - -# Global setter / getter for updating test. - -# This controls whether the output of a test is updated or not. -# Set by `conftest.py`. -_UPDATE_TESTS = False - - -# TODO(gp): -> ..._update_outcomes. -def set_update_tests(val: bool) -> None: - """ - Set the global flag for updating test outcomes. - - :param val: True to enable updating test outcomes, False otherwise - """ - global _UPDATE_TESTS - _UPDATE_TESTS = val - - -def get_update_tests() -> bool: - """ - Get the current state of the update tests flag. - - :return: True if test outcomes should be updated, False otherwise - """ - return _UPDATE_TESTS - - -# ############################################################################# - -# Global setter / getter for incremental mode. - -# This is useful when a long test wants to reuse some data already generated. -# Set by conftest.py. -_INCREMENTAL_TESTS = False - - -def set_incremental_tests(val: bool) -> None: - """ - Set the global flag for incremental test mode. - - :param val: True to enable incremental mode, False otherwise - """ - global _INCREMENTAL_TESTS - _INCREMENTAL_TESTS = val - - -def get_incremental_tests() -> bool: - """ - Get the current state of the incremental tests flag. - - :return: True if incremental mode is enabled, False otherwise - """ - return _INCREMENTAL_TESTS - - -# ############################################################################# - -_CONFTEST_IN_PYTEST = False - - -# TODO(gp): Use https://stackoverflow.com/questions/25188119 -# TODO(gp): -> is_in_unit_test() -def in_unit_test_mode() -> bool: - """ - Return True if we are inside a pytest run. - - This is set by `conftest.py`. - """ - return _CONFTEST_IN_PYTEST - - -# ############################################################################# - - -# Set by `conftest.py`. -_GLOBAL_CAPSYS = None - - -def pytest_print(txt: str) -> None: - """ - Print bypassing `pytest` output capture. - """ - with _GLOBAL_CAPSYS.disabled(): # type: ignore - sys.stdout.write(txt) - - -def pytest_warning(txt: str, prefix: str = "") -> None: - """ - Print a warning bypassing `pytest` output capture. - - :param prefix: prepend the message with a string - """ - txt_tmp = "" - if prefix: - txt_tmp += prefix - txt_tmp += hprint.color_highlight("WARNING", "yellow") + f": {txt}" - pytest_print(txt_tmp) - - -# ############################################################################# -# Generation and conversion functions. -# ############################################################################# - - -# TODO(gp): Is this dataflow Info? If so it should go somewhere else. -def convert_info_to_string(info: Mapping) -> str: - """ - Convert info to string for verifying test results. - - Info often contains `pd.Series`, so pandas context is provided to print all rows - and all contents. - - :param info: info to convert to string - :return: string representation of info - """ - output = [] - # Provide context for full representation of `pd.Series` in info. - with pd.option_context( - "display.max_colwidth", - int(1e6), - "display.max_columns", - None, - "display.max_rows", - None, - ): - output.append(hprint.frame("info")) - output.append(pprint.pformat(info)) - output_str = "\n".join(output) - return output_str - - -# TODO(gp): This seems the python3.9 version of `to_str`. Remove if possible. -def to_string(var: str) -> str: - """ - Generate an f-string expression for debugging variable values. - - :param var: the variable name to create an f-string for - :return: an f-string expression that will print the variable name and value - """ - return f"""f"{var}={{{var}}}""" - - -# ############################################################################# - - -def diff_files( - file_name1: str, - file_name2: str, - *, - tag: Optional[str] = None, - abort_on_exit: bool = True, - dst_dir: str = ".", - error_msg: str = "", -) -> None: - """ - Compare the passed filenames and create script to compare them with - vimdiff. - - :param tag: add a banner the tag - :param abort_on_exit: whether to assert or not - :param dst_dir: dir where to save the comparing script - """ - _LOG.debug(hprint.func_signature_to_str()) - file_name1 = os.path.relpath(file_name1, os.getcwd()) - file_name2 = os.path.relpath(file_name2, os.getcwd()) - msg = [] - # Add tag. - if tag is not None: - msg.append("\n" + hprint.frame(tag, char1="-")) - # Diff to screen. - _, res = hsystem.system_to_string( - f"echo; sdiff --expand-tabs -l -w 150 {file_name1} {file_name2}", - abort_on_error=False, - log_level=logging.DEBUG, - ) - msg.append(res) - # Save a script to diff. - diff_script = os.path.join(dst_dir, "tmp_diff.sh") - vimdiff_cmd = f""" - #!/bin/bash - if [[ $1 == "wrap" ]]; then - cmd='vimdiff -c "windo set wrap"' - else - cmd='vimdiff' - fi; - cmd="$cmd {file_name1} {file_name2}" - eval $cmd - """ - vimdiff_cmd = hprint.dedent(vimdiff_cmd) - # TODO(gp): Use hio.create_executable_script(). - hio.to_file(diff_script, vimdiff_cmd) - cmd = "chmod +x " + diff_script - hsystem.system(cmd) - # Report how to diff. - msg.append("Diff with:") - msg.append("> " + diff_script) - msg_as_str = "\n".join(msg) - # Append also error_msg to the current message. - if error_msg: - msg_as_str += "\n" + error_msg - # Add also the stack trace to the logging error. - if False: - log_msg_as_str = ( - msg_as_str - + "\n" - + hprint.frame("Traceback", char1="-") - + "\n" - + "".join(traceback.format_stack()) - ) - _LOG.error(log_msg_as_str) - # Assert. - if abort_on_exit: - raise RuntimeError(msg_as_str) - - -# ############################################################################# - - -def _remove_spaces(txt: str) -> str: - """ - Remove leading / trailing spaces and empty lines. - - This is used to implement fuzzy matching. - """ - txt = txt.replace("\\n", "\n").replace("\\t", "\t") - # Convert multiple empty spaces (but not newlines) into a single one. - txt = re.sub(r"[^\S\n]+", " ", txt) - # Remove insignificant crap. - lines = [] - for line in txt.split("\n"): - # Remove leading and trailing spaces. - line = re.sub(r"^\s+", "", line) - line = re.sub(r"\s+$", "", line) - # Skip empty lines. - if line != "": - lines.append(line) - txt = "\n".join(lines) - return txt - - -def _remove_banner_lines(txt: str) -> str: - """ - Remove lines of separating characters long at least 20 characters. - """ - txt_tmp: List[str] = [] - for line in txt.split("\n"): - if re.match(r"^\s*[\#\-><=]{20,}\s*$", line): - continue - txt_tmp.append(line) - txt = "\n".join(txt_tmp) - return txt - - -def _fuzzy_clean(txt: str) -> str: - """ - Remove irrelevant artifacts to make string comparison less strict. - """ - hdbg.dassert_isinstance(txt, str) - # Ignore spaces. - txt = _remove_spaces(txt) - # Ignore separation lines. - txt = _remove_banner_lines(txt) - return txt - - -def _ignore_line_breaks(txt: str) -> str: - """ - Replace all line breaks with spaces for loose comparison. - - :param txt: the input text - :return: text with line breaks replaced by spaces - """ - # Ignore line breaks. - txt = txt.replace("\n", " ") - return txt - - -def _sort_lines(txt: str) -> str: - """ - Sort the lines in alphabetical order. - - This is used when we want to perform a comparison of equality but - without order. Of course there are false negatives, since the - relative order of lines might matter. - """ - lines = txt.split("\n") - lines.sort() - lines = "\n".join(lines) - return lines - - -def _save_diff( - actual: str, - expected: str, - tag: str, - test_dir: str, -) -> None: - """ - Save actual and expected strings to temporary files for comparison. - - :param actual: the actual test output - :param expected: the expected test output - :param tag: identifier tag for the files - :param test_dir: directory to save files in - """ - if tag != "": - tag += "." - # Save expected strings to dir. - for dst_dir in (".", test_dir): - act_file_name = f"{dst_dir}/tmp.{tag}actual.txt" - hio.to_file(act_file_name, actual) - exp_file_name = f"{dst_dir}/tmp.{tag}expected.txt" - hio.to_file(exp_file_name, expected) - - -def assert_equal( - actual: str, - expected: str, - full_test_name: str, - test_dir: str, - *, - check_string: bool = False, - remove_lead_trail_empty_lines: bool = False, - dedent: bool = False, - purify_text: bool = False, - purify_expected_text: bool = False, - fuzzy_match: bool = False, - ignore_line_breaks: bool = False, - split_max_len: Optional[int] = None, - sort: bool = False, - abort_on_error: bool = True, - dst_dir: str = ".", - error_msg: str = "", -) -> bool: - """ - See interface in `TestCase.assert_equal()`. - - :param full_test_name: e.g., `TestRunNotebook1.test2` - :param check_string: if it was invoked by `check_string()` or directly - """ - _LOG.debug(hprint.func_signature_to_str("actual expected")) - # Store a mapping tag after each transformation (e.g., original, sort, ...) to - # (actual, expected). - values: Dict[str, str] = collections.OrderedDict() - - def _append(tag: str, actual: str, expected: str) -> None: - _LOG.debug( - "tag=%s\n actual='\n%s'\n expected='\n%s'", tag, actual, expected - ) - hdbg.dassert_not_in(tag, values) - values[tag] = (actual, expected) - - # - _LOG.debug("Before any transformation:") - tag = "original" - _append(tag, actual, expected) - # 1) Remove white spaces. - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_white_spaces(actual) - expected = text_purifier.purify_white_spaces(expected) - tag = "purify_white_spaces" - _append(tag, actual, expected) - # Remove empty leading / trailing lines. - if remove_lead_trail_empty_lines: - tag = "remove_lead_trail_empty_lines" - actual = hprint.remove_lead_trail_empty_lines(actual) - expected = hprint.remove_lead_trail_empty_lines(expected) - _append(tag, actual, expected) - # Dedent only expected since we often align it to make it look more readable - # in the Python code, if needed. - if dedent: - tag = "dedent" - expected = hprint.dedent(expected) - _append(tag, actual, expected) - # Purify text, if needed. - if purify_text: - tag = "purify_text" - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - if purify_expected_text: - expected = text_purifier.purify_txt_from_client(expected) - _append(tag, actual, expected) - # Ensure that there is a single `\n` at the end of the strings. - actual = actual.rstrip("\n") + "\n" - expected = expected.rstrip("\n") + "\n" - # Sort the lines. - if sort: - tag = "sort" - actual = _sort_lines(actual) - expected = _sort_lines(expected) - _append(tag, actual, expected) - # Fuzzy match, if needed. - if fuzzy_match: - tag = "fuzzy_match" - actual = _fuzzy_clean(actual) - expected = _fuzzy_clean(expected) - _append(tag, actual, expected) - # Ignore line breaks, if needed. - if ignore_line_breaks: - tag = "ignore_line_breaks" - actual = _ignore_line_breaks(actual) - expected = _ignore_line_breaks(expected) - _append(tag, actual, expected) - # Split the strings into lines of at most `split_max_len` characters. - if split_max_len: - tag = "split_max_len" - actual = hprint.strict_split(actual, split_max_len) - expected = hprint.strict_split(expected, split_max_len) - _append(tag, actual, expected) - # Check. - tag = "final" - _append(tag, actual, expected) - # - is_equal = expected == actual - _LOG.debug(hprint.to_str("is_equal")) - if is_equal: - return is_equal - _LOG.error( - "%s", - "\n" - + hprint.frame( - f"Test '{full_test_name}' failed", char1="=", num_chars=80 - ), - ) - if not check_string: - # If this is a `self.assert_equal()` and not a `self.check_string()`, - # then print the correct output, like: - # expected = r'""" - # 2021-02-17 09:30:00-05:00 - # 2021-02-17 10:00:00-05:00 - # 2021-02-17 11:00:00-05:00 - # """ - txt = [] - txt.append(hprint.frame(f"ACTUAL VARIABLE: {full_test_name}", char1="-")) - # TODO(gp): Switch to expected or expected_result. - exp_var = "expected = r" - # We always return the variable exactly as this should be, even if we - # could make it look better through indentation in case of fuzzy match. - actual_orig = values["original"][0] - if actual_orig.startswith('"'): - sep = "'''" - else: - sep = '"""' - exp_var += sep - if fuzzy_match: - # We can print in a more readable way since spaces don't matter. - exp_var += "\n" - exp_var += actual_orig - if fuzzy_match: - # We can print in a more readable way since spaces don't matter. - exp_var += "\n" - exp_var += sep - # Save the expected variable to files. - exp_var_file_name = f"{test_dir}/tmp.exp_var.txt" - hio.to_file(exp_var_file_name, exp_var) - # - exp_var_file_name = "tmp.exp_var.txt" - hio.to_file(exp_var_file_name, exp_var) - _LOG.info("Saved exp_var in %s", exp_var_file_name) - # - txt.append(exp_var) - txt = "\n".join(txt) - error_msg += txt - # Save all the values after the transformations. - debug = False - if debug: - for idx, key in enumerate(values.keys()): - actual_tmp, expected_tmp = values[key] - tag = f"{idx}.{key}" - _save_diff(actual_tmp, expected_tmp, tag, test_dir) - else: - key = "final" - actual_tmp, expected_tmp = values[key] - _save_diff(actual_tmp, expected_tmp, key, test_dir) - # Compare the last values. - act_file_name = f"{test_dir}/tmp.final.actual.txt" - exp_file_name = f"{test_dir}/tmp.final.expected.txt" - if fuzzy_match: - msg = "FUZZY ACTUAL vs FUZZY EXPECTED" - else: - msg = "ACTUAL vs EXPECTED" - msg += f": {full_test_name}" - diff_files( - act_file_name, - exp_file_name, - tag=msg, - abort_on_exit=abort_on_error, - dst_dir=dst_dir, - error_msg=error_msg, - ) - return is_equal - - -# TODO(gp): @all move to hpandas -def compare_df(df1: "pd.DataFrame", df2: "pd.DataFrame") -> None: - """ - Compare two dfs including their metadata. - """ - if not df1.equals(df2): - print(df1.compare(df2)) - raise ValueError("Dfs are different") - - def _compute_df_signature(df: "pd.DataFrame") -> str: - txt = [] - txt.append(f"df1=\n{str(df)}") - txt.append(f"df1.dtypes=\n{str(df.dtypes)}") - if hasattr(df.index, "freq"): - txt.append(f"df1.index.freq=\n{str(df.index.freq)}") - return "\n".join(txt) - - full_test_name = "dummy" - test_dir = "." - assert_equal( - _compute_df_signature(df1), - _compute_df_signature(df2), - full_test_name, - test_dir, - ) - - -# ############################################################################# - - -def create_test_dir( - dir_name: str, incremental: bool, file_dict: Dict[str, str] -) -> None: - """ - Create a directory `dir_name` with the files from `file_dict`. - - `file_dict` is interpreted as pair of files relative to `dir_name` - and content. - """ - hdbg.dassert_no_duplicates(file_dict.keys()) - hio.create_dir(dir_name, incremental=incremental) - for file_name in file_dict: - dst_file_name = os.path.join(dir_name, file_name) - _LOG.debug("file_name=%s -> %s", file_name, dst_file_name) - hio.create_enclosing_dir(dst_file_name, incremental=incremental) - file_content = file_dict[file_name] - hio.to_file(dst_file_name, file_content) - - -# TODO(gp): Make remove_dir_name=True default. -def get_dir_signature( - dir_name: str, - include_file_content: bool, - *, - remove_dir_name: bool = False, - num_lines: Optional[int] = None, -) -> str: - """ - Compute a string with the content of the files in `dir_name`. - - :param include_file_content: include the content of the files, besides the - name of files and directories - :param remove_dir_name: use paths relative to `dir_name` - :param num_lines: number of lines to include for each file - - The output looks like: - ``` - # Dir structure - $GIT_ROOT/.../tmp.scratch - $GIT_ROOT/.../tmp.scratch/dummy_value_1=1 - $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A - $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet - ... - - # File signatures - len(file_names)=3 - file_names=$GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet, - $GIT_ROOT/.../tmp.scratch/dummy_value_1=2/dummy_value_2=B/data.parquet, ... - # $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet - num_lines=13 - ''' - original shape=(1, 1) - Head: - { - "0":{ - "dummy_value_3":0 - } - } - Tail: - { - "0":{ - "dummy_value_3":0 - } - } - ''' - # $GIT_ROOT/.../tmp.scratch/dummy_value_1=2/dummy_value_2=B/data.parquet - ``` - """ - - def _remove_dir_name(file_name: str) -> str: - if remove_dir_name: - res = os.path.relpath(file_name, dir_name) - else: - res = file_name - return res - - txt: List[str] = [] - # Find all the files under `dir_name`. - _LOG.debug("dir_name=%s", dir_name) - hdbg.dassert_path_exists(dir_name) - cmd = f'find {dir_name} -name "*"' - remove_files_non_present = False - dir_name_tmp = None - file_names = hsystem.system_to_files( - cmd, dir_name_tmp, remove_files_non_present - ) - file_names = sorted(file_names) - # Save the directory / file structure. - txt.append("# Dir structure") - txt.append("\n".join(map(_remove_dir_name, file_names))) - # - if include_file_content: - txt.append("# File signatures") - # Remove the directories. - file_names = hsystem.remove_dirs(file_names) - # Scan the files. - txt.append(f"len(file_names)={len(file_names)}") - txt.append(f"file_names={', '.join(map(_remove_dir_name, file_names))}") - for file_name in file_names: - _LOG.debug("file_name=%s", file_name) - txt.append("# " + _remove_dir_name(file_name)) - # Read file. - txt_tmp = hio.from_file(file_name) - # This seems unstable on different systems. - # txt.append("num_chars=%s" % len(txt_tmp)) - txt_tmp = txt_tmp.split("\n") - # Filter lines, if needed. - txt.append(f"num_lines={len(txt_tmp)}") - if num_lines is not None: - hdbg.dassert_lte(1, num_lines) - txt_tmp = txt_tmp[:num_lines] - txt.append("'''\n" + "\n".join(txt_tmp) + "\n'''") - else: - hdbg.dassert_is(num_lines, None) - # Concat everything in a single string. - result = "\n".join(txt) - return result - - -# TODO(gp): GSI. Use the copy in helpers/hprint.py -def filter_text(regex: str, txt: str) -> str: - """ - Remove lines in `txt` that match the regex `regex`. - """ - _LOG.debug("Filtering with '%s'", regex) - if regex is None: - return txt - txt_out = [] - txt_as_arr = txt.split("\n") - for line in txt_as_arr: - if re.search(regex, line): - _LOG.debug("Skipping line='%s'", line) - continue - txt_out.append(line) - # We can only remove lines. - hdbg.dassert_lte( - len(txt_out), - len(txt_as_arr), - "txt_out=\n'''%s'''\ntxt=\n'''%s'''", - "\n".join(txt_out), - "\n".join(txt_as_arr), - ) - txt = "\n".join(txt_out) - return txt - - -def diff_strings( - string1: str, - string2: str, - *, - tag: Optional[str] = None, - abort_on_exit: bool = True, - dst_dir: str = ".", -) -> None: - """ - Compare two strings using the diff_files() flow by creating a script to - compare with vimdiff. - - :param dst_dir: where to save the intermediatary files - """ - _LOG.debug(hprint.to_str("tag abort_on_exit dst_dir")) - # Save the actual and expected strings to files. - file_name1 = f"{dst_dir}/tmp.string1.txt" - hio.to_file(file_name1, string1) - # - file_name2 = f"{dst_dir}/tmp.string2.txt" - hio.to_file(file_name2, string2) - # Compare with diff_files. - if tag is None: - tag = "string1 vs string2" - diff_files( - file_name1, - file_name2, - tag=tag, - abort_on_exit=abort_on_exit, - dst_dir=dst_dir, - ) - - -def diff_df_monotonic( - df: "pd.DataFrame", - *, - tag: Optional[str] = None, - abort_on_exit: bool = True, - dst_dir: str = ".", -) -> None: - """ - Check for a dataframe to be monotonic using the vimdiff flow from - diff_files(). - """ - _LOG.debug(hprint.to_str("abort_on_exit dst_dir")) - if not df.index.is_monotonic_increasing: - df2 = df.copy() - df2.sort_index(inplace=True) - diff_strings( - df.to_csv(), - df2.to_csv(), - tag=tag, - abort_on_exit=abort_on_exit, - dst_dir=dst_dir, - ) - - -# ############################################################################# - - -# pylint: disable=protected-access -def get_pd_default_values() -> "pd._config.config.DictWrapper": - """ - Get a deep copy of the current pandas default options. - - :return: a copy of pandas configuration options - """ - import copy - - vals = copy.deepcopy(pd.options) - return vals - - -def set_pd_default_values() -> None: - """ - Set pandas display options to standard default values for testing. - - This ensures consistent output across different test environments. - """ - # 'display': - default_pd_values = { - "chop_threshold": None, - "colheader_justify": "right", - "date_dayfirst": False, - "date_yearfirst": False, - "encoding": "UTF-8", - "expand_frame_repr": True, - "float_format": None, - "html": {"border": 1, "table_schema": False, "use_mathjax": True}, - "large_repr": "truncate", - "latex": { - "escape": True, - "longtable": False, - "multicolumn": True, - "multicolumn_format": "l", - "multirow": False, - "repr": False, - }, - "max_categories": 8, - "max_columns": 20, - "max_colwidth": 50, - "max_info_columns": 100, - "max_info_rows": 1690785, - "max_rows": 60, - "max_seq_items": 100, - "memory_usage": True, - "min_rows": 10, - "multi_sparse": True, - "notebook_repr_html": True, - "pprint_nest_depth": 3, - "precision": 6, - "show_dimensions": "truncate", - "unicode": {"ambiguous_as_wide": False, "east_asian_width": False}, - "width": 80, - } - section = "display" - for key, new_val in default_pd_values.items(): - if isinstance(new_val, dict): - continue - full_key = f"{section}.{key}" - old_val = pd.get_option(full_key) - if old_val != new_val: - _LOG.debug( - "-> Assigning a different value: full_key=%s, " - "old_val=%s, new_val=%s", - full_key, - old_val, - new_val, - ) - pd.set_option(full_key, new_val) - - -# If a golden outcome is missing asserts (instead of updating golden and adding -# it to Git repo, corresponding to "update"). -_ACTION_ON_MISSING_GOLDEN = "assert" - - -# ############################################################################# -# TestCase -# ############################################################################# - - -# TODO(gp): Remove all the calls to `dedent()` and use the `dedent` switch. -class TestCase(unittest.TestCase): - """ - Add some functions to compare actual results to a golden outcome. - """ - - def setUp(self) -> None: - """ - Execute before any test method. - """ - # Set up the base class in case it does something, current - # implementation does nothing, see - # https://docs.python.org/3/library/unittest.html#unittest.TestCase.setUp. - super().setUp() - # Print banner to signal the start of a new test. - func_name = f"{self.__class__.__name__}.{self._testMethodName}" - _LOG.info("\n%s", hprint.frame(func_name)) - # Set the random seed. - random_seed = 20000101 - _LOG.debug("Resetting random.seed to %s", random_seed) - random.seed(random_seed) - if _HAS_NUMPY: - _LOG.debug("Resetting np.random.seed to %s", random_seed) - np.random.seed(random_seed) - # Disable matplotlib plotting by overwriting the `show` function. - if _HAS_MATPLOTLIB: - plt.show = lambda: 0 - # Name of the dir with artifacts for this test. - self._scratch_dir: Optional[str] = None - # The base directory is the one including the class under test. - self._base_dir_name = os.path.dirname(inspect.getfile(self.__class__)) - _LOG.debug("base_dir_name=%s", self._base_dir_name) - # Store whether a test needs to be updated or not. - self._update_tests = get_update_tests() - self._overriden_update_tests = False - # Store whether the golden outcome of this test was updated. - self._test_was_updated = False - # Store whether the output files need to be added to hgit. - self._git_add = True - # Error message printed when comparing actual and expected outcome. - self._error_msg = "" - # Set the default pandas options (see AmpTask1140). - if _HAS_PANDAS: - self._old_pd_options = get_pd_default_values() - set_pd_default_values() - # Reset the timestamp of the current bar. - hwacltim.reset_current_bar_timestamp() - # Start the timer to measure the execution time of the test. - self._timer = htimer.Timer() - - def tearDown(self) -> None: - """ - Execute after each test method completes. - - Handles cleanup, timing, and restoration of default settings. - """ - # Stop the timer to measure the execution time of the test. - self._timer.stop() - pytest_print("(%.2f s) " % self._timer.get_total_elapsed()) - # Report if the test was updated - if self._test_was_updated: - if not self._overriden_update_tests: - pytest_warning("Test was updated) ", prefix="(") - else: - # We forced an update from the unit test itself, so no need - # to report an update. - pass - # Recover the original default pandas options. - if _HAS_PANDAS: - pd.options = self._old_pd_options - # Force matplotlib to close plots to decouple tests. - if _HAS_MATPLOTLIB: - plt.close() - plt.clf() - # Delete the scratch dir, if needed. - if self._scratch_dir and os.path.exists(self._scratch_dir): - if False: - # We want to keep this if the test failed, as an alternative - # to just re-running with --incremental. - result = self._outcome.result - # From https://stackoverflow.com/questions/4414234/getting-pythons-unittest-results-in-a-teardown-method - # https://github.com/pytest-dev/pytest/issues/10631 - # This doesn't work any longer. - # has_error = test_result.failures or test_result.errors - has_error = result._excinfo is not None - else: - # TODO(gp): The problem is that when there is a failure during - # the regressions, having artifacts in the scratch dir causes - # more tests to fail (especially the ones in the cycle detector). - # We need to make tests more robust to this and then we can enable - # the logic to keep files for the failed tests in the scratch dir. - has_error = False - if has_error or get_incremental_tests(): - _LOG.warning("Skipping deleting %s", self._scratch_dir) - else: - _LOG.debug("Deleting %s", self._scratch_dir) - hio.delete_dir(self._scratch_dir) - # Tear down the base class in case it does something, current - # implementation does nothing, see - # https://docs.python.org/3/library/unittest.html#unittest.TestCase.tearDown. - super().tearDown() - - def set_base_dir_name(self, base_dir_name: str) -> None: - """ - Set the base directory for the input, output, and scratch directories. - - This is used to override the standard location of the base - directory which is close to the class under test. - """ - self._base_dir_name = base_dir_name - _LOG.debug("Setting base_dir_name to '%s'", self._base_dir_name) - hio.create_dir(self._base_dir_name, incremental=True) - - def mock_update_tests(self) -> None: - """ - When unit testing the unit test framework we want to test updating the - golden outcome. - """ - self._update_tests = True - self._overriden_update_tests = True - self._git_add = False - - def _get_current_path( - self, - use_only_class_name: bool, - test_class_name: Optional[str], - test_method_name: Optional[str], - use_absolute_path: bool, - ) -> str: - """ - Return the name of the directory containing the input / output data. - - E.g., - ``` - ./core/dataflow/test/outcomes/TestContinuousSarimaxModel.test_compare - ``` - - The parameters have the same meaning as in `get_input_dir()`. - """ - if test_class_name is None: - test_class_name = self.__class__.__name__ - if use_only_class_name: - # Use only class name. - dir_name = test_class_name - else: - # Use both class and test method. - if test_method_name is None: - test_method_name = self._testMethodName - dir_name = f"{test_class_name}.{test_method_name}" - if use_absolute_path: - # E.g., `.../dataflow/test/outcomes/TestContinuousSarimaxModel.test_compare`. - dir_name = os.path.join(self._base_dir_name, "outcomes", dir_name) - else: - # E.g., `outcomes/TestContinuousSarimaxModel.test_compare`. - dir_name = os.path.join("outcomes", dir_name) - return dir_name - - def get_input_dir( - self, - *, - use_only_test_class: bool = False, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - use_absolute_path: bool = True, - ) -> str: - """ - Return the path of the directory storing input data for this test - class. - - E.g., `TestLinearRegression1.test1`. - - :param use_only_test_class: use only the name on the test class and not of - the method. E.g., when one wants all the test methods to use a single - file for testing - :param test_class_name: `None` uses the current test class name - :param test_method_name: `None` uses the current test method name - :param use_absolute_path: use the path from the file containing the test - :return: dir name - """ - # Get the dir of the test. - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - # Add `input` to the dir. - dir_name = os.path.join(dir_name, "input") - return dir_name - - def get_output_dir( - self, - *, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - ) -> str: - """ - Return the path of the directory storing output data for this test - class. - - :param test_class_name: override the current test class name - :param test_method_name: override the current test method name - :return: dir name - """ - # The output dir is specific of this dir. - use_only_test_class = False - use_absolute_path = True - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - # Add `output` to the dir. - dir_name = os.path.join(dir_name, "output") - return dir_name - - # TODO(gp): -> get_scratch_dir(). - def get_scratch_space( - self, - *, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - use_absolute_path: bool = True, - ) -> str: - """ - Return the path of the directory storing scratch data for this test. - - The directory is also created and cleaned up based on whether - the incremental behavior is enabled or not. - """ - if self._scratch_dir is None: - # Create the dir on the first invocation on a given test. - use_only_test_class = False - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - # Add `tmp.scratch` to the dir. - dir_name = os.path.join(dir_name, "tmp.scratch") - # On the first invocation create the dir. - incremental = get_incremental_tests() - hio.create_dir(dir_name, incremental=incremental) - # Store the value. - self._scratch_dir = dir_name - return self._scratch_dir - - def get_s3_scratch_dir( - self, - *, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - ) -> str: - """ - Return the path of a directory storing scratch data on S3 for this - test. - - E.g., - s3://alphamatic-data/tmp/cache.unit_test/ - root.98e1cf5b88c3.amp.TestTestCase1.test_get_s3_scratch_dir1 - """ - # Make the path unique for the test. - use_only_test_class = False - use_absolute_path = False - test_path = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - # Make the path unique for the current user. - user_name = hsystem.get_user_name() - server_name = hsystem.get_server_name() - project_dirname = hgit.get_project_dirname() - dir_name = f"{user_name}.{server_name}.{project_dirname}" - # Assemble everything in a single path. - import helpers.hs3 as hs3 - - aws_profile = "ck" - s3_bucket = hs3.get_s3_bucket_path_unit_test(aws_profile) - scratch_dir = f"{s3_bucket}/tmp/cache.unit_test/{dir_name}.{test_path}" - return scratch_dir - - def get_s3_input_dir( - self, - *, - use_only_test_class: bool = False, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - use_absolute_path: bool = False, - ) -> str: - """ - Return the S3 path for storing input data for this test. - - :param use_only_test_class: use only the test class name, not method - :param test_class_name: override the current test class name - :param test_method_name: override the current test method name - :param use_absolute_path: use the path from the file containing the test - :return: S3 path for test input data - """ - s3_bucket = hrecouti.get_repo_config().get_unit_test_bucket_path() - hdbg.dassert_isinstance(s3_bucket, str) - # Make the path unique for the test. - test_path = self.get_input_dir( - use_only_test_class=use_only_test_class, - test_class_name=test_class_name, - test_method_name=test_method_name, - use_absolute_path=use_absolute_path, - ) - hdbg.dassert_isinstance(test_path, str) - # Assemble everything in a single path. - input_dir = os.path.join(s3_bucket, test_path) - return input_dir - - def _get_test_name(self) -> str: - """ - Return the full test name as `class.method`. - """ - return f"{self.__class__.__name__}.{self._testMethodName}" - - # /////////////////////////////////////////////////////////////////////// - - def assert_equal( - self, - actual: str, - expected: str, - *, - remove_lead_trail_empty_lines: bool = False, - dedent: bool = False, - purify_text: bool = False, - purify_expected_text: bool = False, - fuzzy_match: bool = False, - ignore_line_breaks: bool = False, - split_max_len: Optional[int] = None, - sort: bool = False, - abort_on_error: bool = True, - dst_dir: str = ".", - ) -> bool: - """ - Return if `actual` and `expected` are different and report the - difference. - - Implement a better version of `self.assertEqual()` that reports - mismatching strings with sdiff and save them to files for - further analysis with vimdiff. - - The interface is similar to `check_string()`. - """ - _LOG.debug(hprint.to_str("fuzzy_match abort_on_error dst_dir")) - hdbg.dassert_in(type(actual), (bytes, str), "actual=%s", str(actual)) - hdbg.dassert_in( - type(expected), (bytes, str), "expected=%s", str(expected) - ) - # Get the current dir name. - use_only_test_class = False - test_class_name = None - test_method_name = None - use_absolute_path = True - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - _LOG.debug("dir_name=%s", dir_name) - hio.create_dir(dir_name, incremental=True) - hdbg.dassert_path_exists(dir_name) - # - test_name = self._get_test_name() - is_equal = assert_equal( - actual, - expected, - test_name, - dir_name, - check_string=False, - remove_lead_trail_empty_lines=remove_lead_trail_empty_lines, - dedent=dedent, - purify_text=purify_text, - purify_expected_text=purify_expected_text, - fuzzy_match=fuzzy_match, - ignore_line_breaks=ignore_line_breaks, - split_max_len=split_max_len, - sort=sort, - abort_on_error=abort_on_error, - dst_dir=dst_dir, - ) - return is_equal - - def assert_dfs_close( - self, - actual: "pd.DataFrame", - expected: "pd.DataFrame", - **kwargs: Any, - ) -> None: - """ - Assert dfs have same indexes and columns and that all values are close. - - This is a more robust alternative to `compare_df()`. In - particular, it is less sensitive to floating point round-off - errors. - """ - self.assertEqual(actual.index.to_list(), expected.index.to_list()) - self.assertEqual(actual.columns.to_list(), expected.columns.to_list()) - # Often the output of a failing assertion is difficult to parse - # so we resort to our special `assert_equal()`. - if not np.allclose(actual, expected, **kwargs): - import helpers.hpandas as hpandas - - self.assert_equal( - hpandas.df_to_str(actual), hpandas.df_to_str(expected) - ) - np.testing.assert_allclose(actual, expected, **kwargs) - - # /////////////////////////////////////////////////////////////////////// - - # TODO(gp): This needs to be moved to `helper.git` and generalized. - def _git_add_file(self, file_name: str) -> None: - """ - Add to git repo `file_name`, if needed. - """ - _LOG.debug(hprint.to_str("file_name")) - if self._git_add: - # Find the file relative to here. - mode = "assert_unless_one_result" - # The problem is that when we run from an included repo, we look - # for files like: - # ``` - # helpers_root/helpers/test/outcomes/TestCheckString1.test_check_string_missing3/output/test.txt - # ``` - # but in our directory we find files like: - # ``` - # helpers/test/outcomes/TestCheckString1.test_check_string_missing3/output/test.txt - # ``` - # so we need to make the file relative to the innermost repo. - git_root = hgit.get_client_root(super_module=False) - rel_file_name = os.path.relpath(file_name, git_root) - _LOG.debug(hprint.to_str("rel_file_name")) - file_names_tmp = hgit.find_docker_file(rel_file_name, mode=mode) - hdbg.dassert_eq(len(file_names_tmp), 1) - file_name_tmp = file_names_tmp[0] - _LOG.debug(hprint.to_str("file_name_tmp")) - cmd = f"cd amp; git add -u {file_name_tmp}" - rc = hsystem.system(cmd, abort_on_error=False) - if rc: - pytest_warning( - f"Can't git add file\n'{file_name}' -> '{file_name_tmp}'\n" - "You need to git add the file manually\n", - prefix="\n", - ) - pytest_print(f"> {cmd}\n") - - def _check_string_update_outcome( - self, file_name: str, actual: str, use_gzip: bool - ) -> None: - """ - Update the golden outcome file with actual test output. - - :param file_name: path to the golden outcome file - :param actual: the actual test output to save - :param use_gzip: whether to compress the file with gzip - """ - _LOG.debug(hprint.to_str("file_name")) - hio.to_file(file_name, actual, use_gzip=use_gzip) - # Add to git repo. - self._git_add_file(file_name) - - # /////////////////////////////////////////////////////////////////////// - - def _get_golden_outcome_file_name( - self, - tag: str, - *, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - ) -> Tuple[str, str]: - """ - Get the directory and file name for the golden outcome file. - - :param tag: identifier tag for the golden outcome file - :param test_class_name: override the current test class name - :param test_method_name: override the current test method name - :return: tuple of (directory_path, file_path) - """ - # Get the current dir name. - use_only_test_class = False - use_absolute_path = True - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - _LOG.debug("dir_name=%s", dir_name) - hio.create_dir(dir_name, incremental=True) - hdbg.dassert_path_exists(dir_name) - # Get the expected outcome. - file_name = ( - self.get_output_dir( - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - + f"/{tag}.txt" - ) - return dir_name, file_name - - # TODO(gp): There is a lot of similarity between `check_string()` and - # `check_df_string()` that can be factored out if we extract the code that - # reads and saves the golden file. - def check_string( - self, - actual: str, - *, - remove_lead_trail_empty_lines: bool = False, - dedent: bool = False, - purify_text: bool = False, - fuzzy_match: bool = False, - ignore_line_breaks: bool = False, - split_max_len: Optional[int] = None, - sort: bool = False, - use_gzip: bool = False, - tag: str = "test", - abort_on_error: bool = True, - action_on_missing_golden: str = _ACTION_ON_MISSING_GOLDEN, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - ) -> Tuple[bool, bool, Optional[bool]]: - """ - Check the actual outcome of a test against the expected outcome - contained in the file. If `--update_outcomes` is used, updates the - golden reference file with the actual outcome. - - :param actual: actual outcome of the test - :param remove_lead_trail_empty_lines: remove leading and trailing empty - :param dedent: call `dedent` on the expected string to align it to the - beginning of the row - :param purify_text: remove some artifacts (e.g., usernames, - directories, reference to Git client) - :param fuzzy_match: ignore differences in spaces - :param ignore_line_breaks: ignore difference due to line breaks - :param split_max_len: split the string into lines of at most this length - :param sort: sort the text and then compare it. In other terms we check - whether the lines are the same although in different order - :param use_gzip: use gzip to compress/decompress the golden outcome - :param tag: tag to identify the golden outcome file - :param abort_on_error: whether to raise an exception if the outcome is - different from the golden outcome - :param action_on_missing_golden: what to do (e.g., "assert" or "update" - when the golden outcome is missing) - :param test_class_name: override the current test class name - :param test_method_name: override the current test method name - :return: outcome_updated, file_exists, is_equal - :raises: `RuntimeError` if there is a mismatch. If `abort_on_error` is False - (which should be used only for unit testing) return the result but do not - assert - """ - _LOG.debug( - hprint.to_str( - "remove_lead_trail_empty_lines dedent purify_text fuzzy_match " - "ignore_line_breaks split_max_len sort use_gzip tag " - "abort_on_error action_on_missing_golden test_class_name " - "test_method_name" - ) - ) - hdbg.dassert_in(type(actual), (bytes, str), "actual='%s'", actual) - # - dir_name, file_name = self._get_golden_outcome_file_name( - tag, - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - if use_gzip: - file_name += ".gz" - _LOG.debug("file_name=%s", file_name) - # Remove reference from the current environment. - # TODO(gp): Not sure why we purify here and not delegate to `assert_equal`. - if purify_text: - _LOG.debug("Purifying actual outcome") - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - _LOG.debug("actual=\n%s", actual) - outcome_updated = False - file_exists = os.path.exists(file_name) - _LOG.debug("file_exists=%s", file_exists) - is_equal: Optional[bool] = None - if self._update_tests: - _LOG.debug("# Update golden outcomes") - # Determine whether outcome needs to be updated. - if file_exists: - expected = hio.from_file(file_name) - is_equal = expected == actual - if not is_equal: - outcome_updated = True - else: - # The golden outcome doesn't exist. - outcome_updated = True - _LOG.debug("outcome_updated=%s", outcome_updated) - if outcome_updated: - # Update the golden outcome. - self._check_string_update_outcome(file_name, actual, use_gzip) - else: - # Check the test result. - _LOG.debug("# Check golden outcomes") - if file_exists: - # Golden outcome is available: check the actual outcome against - # the golden outcome. - expected = hio.from_file(file_name) - test_name = self._get_test_name() - is_equal = assert_equal( - actual, - expected, - test_name, - dir_name, - check_string=True, - remove_lead_trail_empty_lines=remove_lead_trail_empty_lines, - dedent=dedent, - # We have handled the purification of the output earlier. - purify_text=False, - fuzzy_match=fuzzy_match, - ignore_line_breaks=ignore_line_breaks, - split_max_len=split_max_len, - sort=sort, - abort_on_error=abort_on_error, - ) - else: - # No golden outcome available. - _LOG.warning("Can't find golden outcome file '%s'", file_name) - if action_on_missing_golden == "assert": - # Save the result to a temporary file and assert. - file_name += ".tmp" - hio.to_file(file_name, actual, use_gzip=use_gzip) - msg = ( - "The golden outcome doesn't exist: saved the actual " - f"output in '{file_name}'" - ) - _LOG.error(msg) - if abort_on_error: - hdbg.dfatal(msg) - elif action_on_missing_golden == "update": - # Create golden file and add it to the repo. - _LOG.warning("Creating the golden outcome") - outcome_updated = True - self._check_string_update_outcome( - file_name, actual, use_gzip - ) - is_equal = None - else: - hdbg.dfatal( - "Invalid action_on_missing_golden=" - + f"'{action_on_missing_golden}'" - ) - self._test_was_updated = outcome_updated - _LOG.debug(hprint.to_str("outcome_updated file_exists is_equal")) - return outcome_updated, file_exists, is_equal - - # /////////////////////////////////////////////////////////////////////// - - def _check_df_update_outcome( - self, - file_name: str, - actual: "pd.DataFrame", - ) -> None: - """ - Update the golden outcome file with actual dataframe output. - - :param file_name: path to the golden outcome file - :param actual: the actual dataframe to save - """ - _LOG.debug(hprint.to_str("file_name")) - hio.create_enclosing_dir(file_name) - actual.to_csv(file_name) - pytest_warning(f"Update golden outcome file '{file_name}'", prefix="\n") - # Add to git repo. - self._git_add_file(file_name) - - def _to_error(self, msg: str) -> None: - """ - Append error message to the accumulated error log. - - :param msg: error message to log and accumulate - """ - self._error_msg += msg + "\n" - _LOG.error(msg) - - def _check_df_compare_outcome( - self, file_name: str, actual: "pd.DataFrame", err_threshold: float - ) -> Tuple[bool, "pd.DataFrame"]: - """ - Compare actual dataframe with golden outcome from file. - - :param file_name: path to the golden outcome file - :param actual: the actual dataframe to compare - :param err_threshold: relative error threshold for numerical comparison - :return: tuple of (is_equal, expected_dataframe) - """ - _LOG.debug(hprint.to_str("file_name")) - _LOG.debug("actual_=\n%s", actual) - hdbg.dassert_lte(0, err_threshold) - hdbg.dassert_lte(err_threshold, 1.0) - # Load the expected df from file. - expected = pd.read_csv(file_name, index_col=0) - _LOG.debug("expected=\n%s", expected) - hdbg.dassert_isinstance(expected, pd.DataFrame) - ret = True - # Compare columns. - if actual.columns.tolist() != expected.columns.tolist(): - msg = f"Columns are different:\n{str(actual.columns)}\n{str(expected.columns)}" - self._to_error(msg) - ret = False - # Compare the values. - _LOG.debug("actual.shape=%s", str(actual.shape)) - _LOG.debug("expected.shape=%s", str(expected.shape)) - # From https://numpy.org/doc/stable/reference/generated/numpy.allclose.html - # absolute(a - b) <= (atol + rtol * absolute(b)) - # absolute(a - b) / absolute(b)) <= rtol - is_close = np.allclose( - actual, expected, rtol=err_threshold, equal_nan=True - ) - if not is_close: - _LOG.error("Dataframe values are not close") - if actual.shape == expected.shape: - close_mask = np.isclose(actual, expected, equal_nan=True) - # - msg = f"actual=\n{actual}" - self._to_error(msg) - # - msg = f"expected=\n{expected}" - self._to_error(msg) - # - actual_masked = np.where(close_mask, np.nan, actual) - msg = f"actual_masked=\n{actual_masked}" - self._to_error(msg) - # - expected_masked = np.where(close_mask, np.nan, expected) - msg = f"expected_masked=\n{expected_masked}" - self._to_error(msg) - # - err = np.abs((actual_masked - expected_masked) / expected_masked) - msg = f"err=\n{err}" - self._to_error(msg) - max_err = np.nanmax(np.nanmax(err)) - msg = "max_err=%.3f" % max_err - self._to_error(msg) - else: - msg = ( - "Shapes are different:\n" - f"actual.shape={str(actual.shape)}\nexpected.shape={str(expected.shape)}" - ) - self._to_error(msg) - ret = False - _LOG.debug("ret=%s", ret) - return ret, expected - - def check_dataframe( - self, - actual: "pd.DataFrame", - *, - err_threshold: float = 0.05, - dedent: bool = False, - tag: str = "test_df", - abort_on_error: bool = True, - action_on_missing_golden: str = _ACTION_ON_MISSING_GOLDEN, - ) -> Tuple[bool, bool, Optional[bool]]: - """ - Like `check_string()` but for pandas dataframes, instead of strings. - """ - _LOG.debug(hprint.to_str("err_threshold tag abort_on_error")) - hdbg.dassert_isinstance(actual, pd.DataFrame) - # - dir_name, file_name = self._get_golden_outcome_file_name(tag) - _LOG.debug("file_name=%s", file_name) - outcome_updated = False - file_exists = os.path.exists(file_name) - _LOG.debug(hprint.to_str("file_exists")) - is_equal: Optional[bool] = None - if self._update_tests: - _LOG.debug("# Update golden outcomes") - # Determine whether outcome needs to be updated. - if file_exists: - is_equal, _ = self._check_df_compare_outcome( - file_name, actual, err_threshold - ) - _LOG.debug(hprint.to_str("is_equal")) - if not is_equal: - outcome_updated = True - else: - # The golden outcome doesn't exist. - outcome_updated = True - _LOG.debug("outcome_updated=%s", outcome_updated) - if outcome_updated: - # Update the golden outcome. - self._check_df_update_outcome(file_name, actual) - else: - # Check the test result. - _LOG.debug("# Check golden outcomes") - if file_exists: - # Golden outcome is available: check the actual outcome against - # the golden outcome. - is_equal, expected = self._check_df_compare_outcome( - file_name, actual, err_threshold - ) - # If not equal, report debug information. - if not is_equal: - test_name = self._get_test_name() - assert_equal( - str(actual), - str(expected), - test_name, - dir_name, - check_string=True, - remove_lead_trail_empty_lines=False, - dedent=dedent, - purify_text=False, - fuzzy_match=False, - ignore_line_breaks=False, - split_max_len=None, - sort=False, - abort_on_error=abort_on_error, - error_msg=self._error_msg, - ) - else: - # No golden outcome available. - _LOG.warning("Can't find golden outcome file '%s'", file_name) - if action_on_missing_golden == "assert": - # Save the result to a temporary file and assert. - file_name += ".tmp" - hio.create_enclosing_dir(file_name) - actual.to_csv(file_name) - msg = ( - "The golden outcome doesn't exist: saved the actual " - f"output in '{file_name}'" - ) - _LOG.error(msg) - if abort_on_error: - hdbg.dfatal(msg) - elif action_on_missing_golden == "update": - # Create golden file and add it to the repo. - _LOG.warning("Creating the golden outcome") - outcome_updated = True - self._check_df_update_outcome(file_name, actual) - is_equal = None - else: - hdbg.dfatal( - "Invalid action_on_missing_golden=" - + f"'{action_on_missing_golden}'" - ) - self._test_was_updated = outcome_updated - # TODO(gp): Print the file with the updated test. - _LOG.debug(hprint.to_str("outcome_updated file_exists is_equal")) - return outcome_updated, file_exists, is_equal - - def check_df_output( - self, - actual_df: "pd.DataFrame", - expected_length: Optional[int], - expected_column_names: Optional[List[str]], - expected_column_unique_values: Optional[Dict[str, List[Any]]], - expected_signature: str, - ) -> None: - """ - Verify that actual outcome dataframe matches the expected one. - - :param actual_df: actual outcome dataframe - :param expected_length: expected outcome dataframe length - - If `None`, skip the check - :param expected_column_names: expected outcome dataframe column names - - If `None`, skip the check - :param expected_column_unique_values: dict of column names and unique values - that they should contain - - If `None`, skip the check - :param expected_signature: expected outcome dataframe as string - - If `__CHECK_STRING__` use the value in `self.check_string()` - """ - # TODO(Grisha): get rid of `hpandas` dependency. - import helpers.hpandas as hpandas - - hdbg.dassert_isinstance(actual_df, pd.DataFrame) - if expected_length: - # Verify that the output length is correct. - actual_length = actual_df.shape[0] - self.assert_equal(str(actual_length), str(expected_length)) - if expected_column_names: - # Verify that the column names are correct. - self.assert_equal( - str(sorted(actual_df.columns)), - str(sorted(expected_column_names)), - ) - if expected_column_unique_values: - hdbg.dassert_is_subset( - list(expected_column_unique_values.keys()), actual_df.columns - ) - # Verify that the unique values in specified columns are correct. - for column in expected_column_unique_values: - actual_one_column_unique_values = sorted( - list(actual_df[column].unique()) - ) - self.assert_equal( - str(actual_one_column_unique_values), - str(sorted(expected_column_unique_values[column])), - ) - # Build signature. - actual_signature = hpandas.df_to_str( - actual_df, - print_shape_info=True, - tag="df", - ) - _LOG.debug("\n%s", actual_signature) - # Check signature. - if expected_signature == "__CHECK_STRING__": - self.check_string(actual_signature, dedent=True, fuzzy_match=True) - else: - hdbg.dassert_isinstance(expected_signature, str) - self.assert_equal( - actual_signature, - expected_signature, - dedent=True, - fuzzy_match=True, - ) - - def check_srs_output( - self, - actual_srs: "pd.Series", - expected_length: Optional[int], - expected_unique_values: Optional[List[Any]], - expected_signature: str, - ) -> None: - """ - Verify that actual outcome series matches the expected one. - - :param actual_srs: actual outcome series - :param expected_length: expected outcome series length - - If `None`, skip the check - :param expected_unique_values: list of expected unique values in series - - If `None`, skip the check - :param expected_signature: expected outcome series as string - """ - # Import `hpandas` dynamically to exclude `pandas` from the thin client - # requirements. See CmTask6613 for details. - import helpers.hpandas as hpandas - - hdbg.dassert_isinstance(actual_srs, pd.Series) - if expected_length: - # Verify that output length is correct. - self.assert_equal(str(actual_srs.shape[0]), str(expected_length)) - if expected_unique_values: - # Verify that unique values in series are correct. - self.assert_equal( - str(sorted(list(actual_srs.unique()))), - str(sorted(expected_unique_values)), - ) - # Build signature. - actual_signature = hpandas.df_to_str(actual_srs, num_rows=None) - _LOG.debug("\n%s", actual_signature) - # Check signature. - if expected_signature == "__CHECK_STRING__": - self.check_string(actual_signature, dedent=True, fuzzy_match=True) - else: - hdbg.dassert_isinstance(expected_signature, str) - self.assert_equal( - actual_signature, - expected_signature, - dedent=True, - fuzzy_match=True, - ) - - -# ############################################################################# -# QaTestCase -# ############################################################################# - - -@pytest.mark.qa -@pytest.mark.skipif( - hserver.is_inside_docker(), reason="Test needs to be run outside Docker" -) -class QaTestCase(TestCase, abc.ABC): - """ - Use for QA to test functionalities (e.g., invoke tasks) that run the dev / - prod container. - """ - - # TODO(Grisha): Linter should not remove `pass` statement from an empty class - # DevToolsTask #476. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py deleted file mode 100644 index cf429b5ac..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py +++ /dev/null @@ -1,450 +0,0 @@ -""" -Import as: - -import helpers.hunit_test_purification as huntepur -""" - -import datetime -import logging -import os -import re -from typing import List, Tuple - -import helpers.hgit as hgit -import helpers.hintrospection as hintros -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - -# Mute this module unless we want to debug it. -_LOG.setLevel(logging.INFO) - - -# ############################################################################# -# TextPurifier -# ############################################################################# - - -# TODO(gp): Not sure the class is really needed since now it's in a separate -# file. -class TextPurifier: - """ - A class to purify text by removing environment-specific information and - standardizing output for test comparisons. - """ - - def purify_txt_from_client(self, txt: str) -> str: - """ - Apply all purification steps to the input text. - - :param txt: input text to purify - :return: purified text - """ - # The order of substitutions is important. We want to start from the "most - # specific" (e.g., `amp/helpers/test/...`) to the "least specific" (e.g., - # `amp`). - txt = self.purify_directory_paths(txt) - txt = self.purify_from_environment(txt) - # Correct order: -> `app` -> `amp` -> - # Start with `app.amp.helpers_root.helpers...` - # After purifying app references -> `amp.helpers_root.helpers...` - # After purifying amp references -> `helpers_root.helpers...` - # - # Incorrect order: -> `amp` -> `app` -> - # Start with `amp.helpers_root.helpers...` - # After purifying `amp` references -> `app.amp.helpers_root.helpers...` - # After purifying `app` references -> `amp.helpers_root.helpers...` - # - txt = self.purify_app_references(txt) - txt = self.purify_amp_references(txt) - txt = self.purify_from_env_vars(txt) - txt = self.purify_object_representation(txt) - txt = self.purify_today_date(txt) - txt = self.purify_white_spaces(txt) - txt = self.purify_parquet_file_names(txt) - txt = self.purify_helpers(txt) - txt = self.purify_docker_image_name(txt) - return txt - - def purify_directory_paths(self, txt: str) -> str: - """ - Replace known directory paths with standardized placeholders. - - Apply replacements in this order: - 1. Replace Git root paths with `$GIT_ROOT`. - 2. Replace `CSFY_HOST_GIT_ROOT_PATH` with `$CSFY_HOST_GIT_ROOT_PATH`. - 3. Replace current working directory with `$PWD`. - - :param txt: input text that needs to be purified - :return: purified text - """ - _LOG.debug("Before: txt='\n%s'", txt) - # Collect all paths to replace with their priorities. - replacements = [] - # 1. Git root paths. - # Remove references to Git modules starting from the innermost one. - for super_module in [False, True]: - # Replace the git root path with `$GIT_ROOT`. - git_root = hgit.get_client_root(super_module=super_module) - if git_root and git_root != "/": - replacements.append((git_root, "$GIT_ROOT")) - _LOG.debug("Added git root '%s' for replacement", git_root) - else: - # Skip git root path if it is `/`. - pass - # 2. CSFY_HOST_GIT_ROOT_PATH environment variable. - # Replace the CSFY_HOST_GIT_ROOT_PATH with `$CSFY_HOST_GIT_ROOT_PATH`. - csfy_git_root = os.environ.get("CSFY_HOST_GIT_ROOT_PATH") - if csfy_git_root: - replacements.append((csfy_git_root, "$CSFY_HOST_GIT_ROOT_PATH")) - _LOG.debug( - "Added CSFY_HOST_GIT_ROOT_PATH '%s' for replacement", - csfy_git_root, - ) - # 3. Current working directory. - # Replace the path of current working directory with `$PWD`. - pwd = os.getcwd() - if pwd and pwd != "/": - replacements.append((pwd, "$PWD")) - _LOG.debug("Added PWD '%s' for replacement", pwd) - # Apply replacements in order of priority. - for path, replacement in replacements: - # Use word boundaries to avoid replacing path fragments. - # E.g., To avoid replacing `app` in `application.py`. - pattern = rf"(? str: - """ - Replace environment-specific values with placeholders. - - Perform these transformations: - 1. Replace directory paths with standardized placeholders. - 2. Replace the current user name with $USER_NAME. - 3. Handle special cases like usernames in paths and commands. - - :param txt: input text that needs to be purified - :return: purified text - """ - # Replace current username with `$USER_NAME`. - user_name = hsystem.get_user_name() - # Set a regex pattern that finds a user name surrounded by dot, dash or space. - # E.g., `IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0`, - # `--name $USER_NAME.amp_test.app.app`, `run --rm -l user=$USER_NAME`. - regex = rf"([\s\n\-\.\=]|^)+{user_name}+([.\s/-]|$)" - # Use `\1` and `\2` to preserve specific characters around `$USER_NAME`. - target = r"\1$USER_NAME\2" - txt = re.sub(regex, target, txt) - _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) - return txt - - def _apply_regex_replacements( - self, txt: str, regex_patterns: List[Tuple[str, str]] - ) -> str: - """ - Apply a series of regex replacements to text. - - :param txt: input text to process - :param regex_patterns: list of (pattern, replacement) tuples to - apply in order - :return: text with all regex replacements applied - """ - # Apply regex replacements in order. - txt_out = txt - for regex_pattern, replacement in regex_patterns: - txt_out = re.sub(regex_pattern, replacement, txt_out) - _LOG.debug( - "Applying %s -> %s: before=%s, after=%s", - regex_pattern, - replacement, - txt, - txt_out, - ) - return txt_out - - def purify_amp_references(self, txt: str) -> str: - """ - Remove references to amp from text by applying a series of regex - substitutions. - - Handle these patterns: - 1. Replace path references - - E.g., "amp/helpers/test/..." -> "helpers/test/..." - 2. Replace class references - - E.g., "" -> "" - 3. Replace comment references - - E.g., "# Test created for amp.helpers.test" -> "# Test created for helpers.test" - 4. Replace module references - - E.g., "amp.helpers.test.TestClass" -> "helpers.test.TestClass" - - :param txt: input text containing amp references - :return: text with amp references removed - """ - amp_patterns = [ - # Remove 'amp/' prefix from quoted paths. - (r"'amp/", "'"), - # Remove 'amp/' prefix from path segments. - (r"(?m)(^\s*|\s+)amp/", r"\1"), - # Replace '/amp/' with '/' and '/amp:' with ':' in paths. - (r"(?m)/amp/", "/"), - (r"(?m)/amp:", ":"), - # Remove 'amp.' prefix from class representations and tracebacks. - (r" str: - """ - Remove references to `/app` from text by applying a series of regex - substitutions. - - :param txt: input text containing app references - :return: text with app references removed - """ - app_patterns = [ - # Remove trailing '/app/' references. - (r"(? str: - """ - Replace environment variable values with their variable names. - - :param txt: input text containing environment variable values - :return: text with environment variable values replaced - """ - for env_var in [ - "CSFY_AWS_S3_BUCKET", - "CSFY_ECR_BASE_PATH", - ]: - if env_var in os.environ: - val = os.environ[env_var] - if val == "": - _LOG.debug("Env var '%s' is empty", env_var) - else: - txt = txt.replace(val, f"${env_var}") - _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) - return txt - - def purify_object_representation(self, txt: str) -> str: - """ - Remove references like `at 0x7f43493442e0`. - - :param txt: input text containing object representations - :return: text with object representations standardized - """ - object_patterns = [ - (r"at 0x[0-9A-Fa-f]+", "at 0x"), - (r" id='\d+'>", " id='xxx'>"), - (r"port=\d+", "port=xxx"), - (r"host=\S+ ", "host=xxx "), - ( - r"wall_clock_time=Timestamp\('.*?',", - r"wall_clock_time=Timestamp('xxx',", - ), - ] - txt = self._apply_regex_replacements(txt, object_patterns) - _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) - return txt - - def purify_today_date(self, txt: str) -> str: - """ - Remove today's date like `20220810`. - - :param txt: input text containing dates - :return: text with dates standardized - """ - today_date = datetime.date.today() - today_date_as_str = today_date.strftime("%Y%m%d") - # Replace predict.3.compress_tails.df_out.20220627_094500.YYYYMMDD_171106.csv.gz. - txt = re.sub( - today_date_as_str + r"_\d{6}", - "YYYYMMDD_HHMMSS", - txt, - flags=re.MULTILINE, - ) - txt = re.sub(today_date_as_str, "YYYYMMDD", txt, flags=re.MULTILINE) - return txt - - def purify_white_spaces(self, txt: str) -> str: - """ - Remove trailing white spaces. - - :param txt: input text with whitespace - :return: text with standardized whitespace - """ - txt_new = [] - for line in txt.split("\n"): - line = line.rstrip() - txt_new.append(line) - txt = "\n".join(txt_new) - return txt - - def purify_line_number(self, txt: str) -> str: - """ - Replace line number with `$LINE_NUMBER`. - - :param txt: input text containing line numbers - :return: text with line numbers standardized - """ - txt = re.sub(r"\.py::\d+", ".py::$LINE_NUMBER", txt, flags=re.MULTILINE) - return txt - - def purify_parquet_file_names(self, txt: str) -> str: - """ - Replace UUIDs file names to `data.parquet` in the golden outcomes. - - :param txt: input text containing parquet file names - :return: text with standardized parquet file names - """ - pattern = r""" - [0-9a-f]{32}-[0-9].* # GUID pattern. - (?=\.parquet) # positive lookahead assertion that matches a - # position followed by ".parquet" without - # consuming it. - """ - # TODO(Vlad): Need to change the replacement to `$FILE_NAME` as in the - # `purify_from_environment()` function. For now, some tests are expecting - # `data.parquet` files. - replacement = "data" - # flags=re.VERBOSE allows us to use whitespace and comments in the pattern. - txt = re.sub(pattern, replacement, txt, flags=re.VERBOSE) - return txt - - def purify_helpers(self, txt: str) -> str: - """ - Replace the path `helpers_root.helpers` with `helpers`. - - :param txt: input text containing helper references - :return: text with standardized helper references - """ - txt = re.sub( - r"helpers_root\.helpers\.", "helpers.", txt, flags=re.MULTILINE - ) - txt = re.sub( - r"helpers_root/helpers/", "helpers/", txt, flags=re.MULTILINE - ) - txt = re.sub( - r"helpers_root\.config_root", "config_root", txt, flags=re.MULTILINE - ) - txt = re.sub( - r"helpers_root/config_root/", "config_root/", txt, flags=re.MULTILINE - ) - txt = re.sub( - r"helpers_root/dev_scripts_helpers/", - "dev_scripts_helpers/", - txt, - flags=re.MULTILINE, - ) - return txt - - def purify_docker_image_name(self, txt: str) -> str: - """ - Remove temporary docker image name. - - :param txt: input text containing docker image names - :return: text with standardized docker image names - """ - # Purify command like: - # > docker run --rm ... tmp.latex.edb567be .. - # > ... tmp.latex.aarch64.2f590c86.2f590c86 - pattern = r""" - ^ # Start of line - ( # Start capture group 1 - .*docker.* # Any text containing "docker" - \s+ # One or more whitespace - tmp\.\S+\. # tmp.something. - ) # End capture group 1 - [a-z0-9]{8} # 8 character hex hash - ( # Start capture group 2 - \s+ # One or more whitespace - .* # Rest of the line - ) # End capture group 2 - $ # End of line - """ - txt = re.sub( - pattern, - r"\1xxxxxxxx\2", - txt, - flags=re.MULTILINE | re.VERBOSE, - ) - # Handle patterns like `tmp.latex.aarch64.2f590c86.2f590c86`. - pattern = r""" - ^ # Start of line - ( # Start capture group 1 - .*docker.* # Any text containing "docker" - \s+ # One or more whitespace - tmp\.\S+\.\S+\. # tmp.something.something. - ) # End capture group 1 - [a-z0-9]{8} # 8 character hex hash - \. # Literal dot - [a-z0-9]{8} # Another 8 character hex hash - ( # Start capture group 2 - \s+ # One or more whitespace - .* # Rest of the line - ) # End capture group 2 - $ # End of line - """ - txt = re.sub( - pattern, - r"\1xxxxxxxx\2", - txt, - flags=re.MULTILINE | re.VERBOSE, - ) - return txt - - def purify_file_names(self, file_names: List[str]) -> List[str]: - """ - Express file names in terms of the root of git repo, removing reference - to `amp`. - """ - git_root = hgit.get_client_root(super_module=True) - file_names = [os.path.relpath(f, git_root) for f in file_names] - # Apply amp reference purification to file paths. - file_names = list(map(self.purify_amp_references, file_names)) - return file_names - - -def purify_text(txt: str) -> str: - """ - Purify text by removing environment-specific information and standardizing - output for test comparisons. - """ - purifier = TextPurifier() - return purifier.purify_txt_from_client(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py deleted file mode 100644 index 4848ea094..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py +++ /dev/null @@ -1,586 +0,0 @@ -""" -Import as: - -import helpers.hunit_test_utils as hunteuti -""" - -import abc -import contextlib -import glob -import logging -import os -import re -from typing import Any, Dict, Generator, List, Optional, Tuple -import unittest.mock as mock - -import pytest - -import helpers.hdbg as hdbg -import helpers.henv as henv -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hserver as hserver -import helpers.hstring as hstring -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def get_test_directories(root_dir: str) -> List[str]: - """ - Get paths of all the directories that contain unit tests. - - :param root_dir: the dir to start the search from, e.g. - `/src/cmamp1/helpers` - :return: paths of test directories - """ - paths = [] - for path, _, _ in os.walk(root_dir): - # Iterate over the paths to find the test directories. - if path.endswith("/test"): - paths.append(path) - hdbg.dassert_lte(1, len(paths)) - return paths - - -# ############################################################################# -# UnitTestRenamer -# ############################################################################# - - -class UnitTestRenamer: - """ - Rename a unit test in Python code and the corresponding directories - containing the inputs and the expected outputs. - """ - - @staticmethod - def _check_names(old_test_name: str, new_test_name: str) -> None: - """ - Check if the test names are valid. - - :param old_test_name: the old name of the test - :param new_test_name: the new name of the test - """ - # Assert if the classname does not start with `Test`. - for name in [old_test_name, new_test_name]: - hdbg.dassert( - name.startswith("Test"), - "Invalid test_class_name='%s'. A test class should start with `Test`", - name, - ) - # Assert if the names are the same. - hdbg.dassert_ne(old_test_name, new_test_name) - - @staticmethod - def _process_parameters( - old_test_name: str, - new_test_name: str, - ) -> Dict[str, str]: - """ - Build the processing config with the renaming parameters. - - :param old_test_name: the old name of the test - :param new_test_name: the new name of the test - :return: config for renaming process, i.e. a dictionary which includes the fields: - - `old_class`: old name of the class - - `new_class`: new name of the class - - `old_method`: new name of the method. If empty, only class should be renamed - - `new_method`: new name of the method - """ - # Build the processing config. - config: Dict[str, str] = {} - # Split by "." to separate class name and method name. - split_old_name = old_test_name.split(".") - split_new_name = new_test_name.split(".") - # Check the consistency of the names - they should have the same length. - hdbg.dassert_eq( - len(split_old_name), - len(split_new_name), - "The test names are not consistent; one has a method and the other does not.", - ) - # Check the format of the test name. - hdbg.dassert_in( - len(split_old_name), - [1, 2], - msg="Wrong test name format: it must contain no more than 1 dot", - ) - if len(split_old_name) == 1: - # Class name split by `.` is one element array, e.g. `["TestClassName"]`. - old_class_name, old_method_name = split_old_name[0], "" - new_class_name, new_method_name = split_new_name[0], "" - _LOG.debug( - "Trying to change the name of `{old_test_name}` unit test class to `%s`.", - new_test_name, - ) - else: - # Method name split by `.` is 2 element array, e.g. - # TestClassName.test2` - >`["TestClassName", "test2"]`. - old_class_name, old_method_name = split_old_name - new_class_name, new_method_name = split_new_name - hdbg.dassert_eq( - old_class_name, - new_class_name, - "To change the name of the method, specify the methods of the \ - same class. E.g. `--old TestCache.test1 --new TestCache.new_test1`", - ) - _LOG.debug( - "Trying to change the name of `%s` method of `%s` class to `%s`.", - old_method_name, - old_class_name, - new_method_name, - ) - # Fill the processing parameters. - config["old_class"] = old_class_name - config["old_method"] = old_method_name - config["new_class"] = new_class_name - config["new_method"] = new_method_name - return config - - def __init__( - self, old_test_name: str, new_test_name: str, root_dir: str - ) -> None: - """ - Construct the UnitTestRenamer. - - :param old_test_name: the old name of the test - :param new_test_name: the new name of the test - :param root_dir: the directory to start the search from - """ - # Check if the names of the test are valid. - self._check_names(old_test_name, new_test_name) - # Get the directories containing tests. - self.test_dirs = get_test_directories(root_dir) - # Construct the renaming config. - self.cfg = self._process_parameters(old_test_name, new_test_name) - - def _rename_class( - self, - content: str, - ) -> Tuple[str, int]: - """ - Rename a class in a Python file. - - :param content: the content of the file - :return: the content of the file with the class name replaced, - the number of substitutions replaced - """ - lines = content.split("\n") - docstring_line_indices = hstring.get_docstring_line_indices(lines) - num_replaced = 0 - for ind, line in enumerate(lines): - # Skip if the line is inside a docstring. - if ind not in docstring_line_indices: - # Rename the class. - new_line, num_replaced = re.subn( - rf"class {self.cfg['old_class']}\(", - rf"class {self.cfg['new_class']}(", - line, - ) - if num_replaced != 0: - lines[ind] = new_line - break - content = "\n".join(lines) - return content, num_replaced - - def _rename_method( - self, - content: str, - ) -> Tuple[str, int]: - """ - Rename the method of the class. - - :param content: the content of the file - :return: content of the file with the method renamed, the number - of substitutions made - """ - lines = content.split("\n") - # Flag that informs if the class border was found. - class_found = False - # The number of substitutions made in the content of the file. - num_replaced = 0 - class_pattern = rf"class {self.cfg['old_class']}\(" - method_pattern = rf"def {self.cfg['old_method']}\(" - docstring_line_indices = hstring.get_docstring_line_indices(lines) - for ind, line in enumerate(lines): - # Iterate over the lines of the file to find the specific method of the - # class that should be renamed. - # Skip if the line is inside a docstring. - if class_found and ind not in docstring_line_indices: - if line.startswith("class"): - # Break if the next class started and the method was not found. - break - # Rename the method. - new_line, num_replaced = re.subn( - method_pattern, f"def {self.cfg['new_method']}(", line - ) - if num_replaced != 0: - # Replace the line with method definition. - lines[ind] = new_line - break - else: - if re.search(class_pattern, line): - class_found = True - new_content = "\n".join(lines) - return new_content, num_replaced - - def _rename_in_file( - self, - test_dir: str, - file_path: str, - ) -> None: - """ - Process the file: - - - check if the content of the file contains target class - - change the class name, e.g. `TestClassName` -> `TestClassNameNew` - / change the method name `TestClassName.test2` -> `TestClassName.test_new` - - rename the outcomes if they exist - - :param test_dir: the path to the test directory containing the file, e.g. - `/src/cmamp1/helpers/test` - :param file_path: the path to the file, `/src/cmamp1/helpers/test/test_lib_tasks.py` - """ - content = hio.from_file(file_path) - if not re.search(rf"class {self.cfg['old_class']}\(", content): - # Return if target test class does not appear in file content. - return - if self.cfg["old_method"] == "": - # Rename the class. - content, n_replaced = self._rename_class(content) - if n_replaced != 0: - _LOG.info( - "%s: class `%s` was renamed to `%s`.", - file_path, - self.cfg["old_class"], - self.cfg["new_class"], - ) - else: - # Rename the method of the class. - content, n_replaced = self._rename_method(content) - if n_replaced != 0: - _LOG.info( - "%s: method `%s` of `%s` class was renamed to `%s`.", - file_path, - self.cfg["old_method"], - self.cfg["old_class"], - self.cfg["new_method"], - ) - # Rename the directories that contain target test outcomes. - self.rename_outcomes( - test_dir, - ) - # Write processed content back to file. - hio.to_file(file_path, content) - - def run(self) -> None: - """ - Run the renamer tool on the files under `root_dir`. - """ - # Iterate over test directories. - for path in self.test_dirs: - # Get all Python test files from this directory. - _LOG.debug("Scanning `%s` directory.", path) - search_pattern = os.path.join(path, "test_*.py") - files = glob.glob(search_pattern) - for test_file in files: - self._rename_in_file( - path, - test_file, - ) - - @staticmethod - def _rename_directory(outcome_path_old: str, outcome_path_new: str) -> None: - """ - Rename the outcomes directory and add it to git. - - :param outcome_path_old: the old name of outcome directory, e.g. - `/src/cmamp1/helpers/test/outcomes/TestRename.test_old` - :param outcome_path_new: the new name of outcome directory, e.g. - `/src/cmamp1/helpers/test/outcomes/TestRename.test_new` - """ - cmd = f"mv {outcome_path_old} {outcome_path_new}" - # Rename the directory. - rc = hsystem.system(cmd, abort_on_error=True, suppress_output=False) - _LOG.info( - "Renaming `%s` directory to `%s`. Output log: %s", - outcome_path_old, - outcome_path_new, - rc, - ) - # Add to git new outcome directory and remove the old one. - # The sequence of commands is used because `git mv` does not work - # properly while unit testing. - cmd = f"git add {outcome_path_new} && git rm -r {outcome_path_old}" - hsystem.system(cmd, abort_on_error=True, suppress_output=False) - - def _process_outcomes_dir( - self, outcome_dir: str, outcomes_path: str - ) -> bool: - """ - Process the directory containing target test outcomes. - - The stages of processing are: - - generate the new name of the directory - - rename and add it to git - - :param outcome_dir: the name of the directory containing the outcomes - :param outcomes_path: the path to the outcomes directory - :return: if the outcomes were renamed - """ - # Contruct the path to outcomes directory. - outcome_path_old = os.path.join(outcomes_path, outcome_dir) - # Construct old and new target dir names, e.g. - # `TestOldName.` and `TestNewName.` if class should be renamed or - # `TestOldName.test_old` and `TestOldName.test_new` if method should be renamed. - old_target = ".".join([self.cfg["old_class"], self.cfg["old_method"]]) - new_target = ".".join([self.cfg["new_class"], self.cfg["new_method"]]) - if self.cfg["old_method"] == "" and outcome_dir.startswith(old_target): - # Check if the class should be renamed, e.g. - # if `outcome_dir` is `TestOld.test1` and `old_target` is `TestOld.`. - # Split old directory name - the part before "." is the class name. - class_method = outcome_dir.split(".") - # Replace old class name with the new one, `["TestOld", "test1"]` - # -> `["TestNew", "test1"]`. - class_method[0] = self.cfg["new_class"] - # Construct the new outcome directory name -> `TestNew.test1`. - outcome_name_new = ".".join(class_method) - outcome_path_new = os.path.join(outcomes_path, outcome_name_new) - elif self.cfg["old_method"] != "" and outcome_dir == old_target: - # Check if the dir should be renamed. E.g. given that `old_target` - # is `TestOld.test1_new`, then if `outcome_dir` is `TestOld.test1`, - # it should not be renamed, and if `outcome_dir` is `TestOld.test1_new`, - # it should be renamed. - outcome_path_new = os.path.join(outcomes_path, new_target) - else: - return False - # Rename the directory and add it to git. - self._rename_directory(outcome_path_old, outcome_path_new) - return True - - def rename_outcomes( - self, - path: str, - ) -> None: - """ - Rename the directory that contains test outcomes. - - :param path: the path to the test directory, e.g. - `cmamp1/helpers/test/` - """ - outcomes_path = os.path.join(path, "outcomes") - dir_items = os.listdir(outcomes_path) - # Get the list of outcomes directories. - outcomes = [ - dir_name - for dir_name in dir_items - if os.path.isdir(os.path.join(outcomes_path, dir_name)) - ] - renamed = False - for outcome_dir in outcomes: - renamed = self._process_outcomes_dir(outcome_dir, outcomes_path) - if not renamed: - _LOG.info( - "No outcomes for `%s` were found in `%s`.", - self.cfg["old_class"], - outcomes_path, - ) - - -# ############################################################################# -# Obj_to_str_TestCase -# ############################################################################# - - -class Obj_to_str_TestCase(abc.ABC): - """ - Test case for testing `obj_to_str()` and `obj_to_repr()`. - """ - - def helper(self, obj: Any, method_name: str, expected_str: str) -> None: - """ - Common method for testing `__repr__` and `__str__`. - """ - hdbg.dassert_is_not(obj, None) - actual_str = getattr(obj, method_name)() - self.assert_equal( # type: ignore - actual_str, expected_str, purify_text=True, fuzzy_match=True - ) - - def run_test_repr(self, obj: Any, expected_str: str) -> None: - """ - Check that `__repr__` is printed correctly. - """ - method_name = "__repr__" - self.helper(obj, method_name, expected_str) - - def run_test_str(self, obj: Any, expected_str: str) -> None: - """ - Check that `__str__` is printed correctly. - """ - method_name = "__str__" - self.helper(obj, method_name, expected_str) - - def run_test_to_config_str(self, obj: Any, expected_str: str) -> None: - """ - Check that `to_config_str()` is printed correctly. - """ - method_name = "to_config_str" - self.helper(obj, method_name, expected_str) - - -# ############################################################################# - - -def _get_repo_short_name() -> str: - dir_name = "." - include_host_name = False - repo_name = hgit.get_repo_full_name_from_dirname(dir_name, include_host_name) - _LOG.debug("repo_name=%s", repo_name) - # ck/cmamp - short_repo_name = repo_name.split("/")[1] - _LOG.debug("short_repo_name=%s", short_repo_name) - return short_repo_name - - -def execute_only_in_target_repo(target_name: str) -> None: - repo_short_name = _get_repo_short_name() - if repo_short_name != target_name: - pytest.skip(f"Only run on {target_name} and not {repo_short_name}") - - -# TODO(gp): Remove and use pytest.skipif(). -def execute_only_on_ci() -> None: - is_inside_ci_ = hserver.is_inside_ci() - if not is_inside_ci_: - pytest.skip("Only run in CI") - - -def execute_only_on_dev4() -> None: - is_dev4_ = hserver.is_dev4() - if not is_dev4_: - pytest.skip("Only run on dev4") - - -def execute_only_on_dev_csfy() -> None: - is_dev_csfy_ = hserver.is_dev_csfy() - if not is_dev_csfy_: - pytest.skip("Only run on dev CSFY") - - -def execute_only_on_mac(*, version: Optional[str] = None) -> None: - is_host_mac_ = hserver.is_host_mac() - if version: - is_host_mac_ = hserver.is_host_mac_version(version) - if not is_host_mac_: - pytest.skip(f"Only run on Mac with version={version}") - - -def check_env_to_str( - self_: Any, expected: str, *, skip_secrets_vars: bool = False -) -> None: - actual = henv.env_to_str(system_signature=False) - actual = hunitest.filter_text("get_name", actual) - actual = hunitest.filter_text("get_repo_map", actual) - actual = hunitest.filter_text("CSFY_HOST_", actual) - if skip_secrets_vars: - # TODO(gp): Difference between amp and cmamp. - actual = hunitest.filter_text( - "AM_AWS_|CSFY_AWS_|GH_ACTION_ACCESS_TOKEN", actual - ) - self_.assert_equal(actual, expected, fuzzy_match=True, purify_text=True) - - -def get_test_file_for_source(source_file: str) -> Optional[str]: - """ - Map a source Python file to its corresponding test file. - - E.g., helpers/hdbg.py -> helpers/test/test_hdbg.py - - :param source_file: path to a source Python file - :return: path to corresponding test file if it exists and source is not - already a test file; None otherwise - """ - base_name = os.path.basename(source_file) - is_test = ( - "test" in source_file.split("/") - and base_name.startswith("test_") - and source_file.endswith(".py") - ) - if is_test: - return None - dir_name = os.path.dirname(source_file) - test_file = os.path.join(dir_name, "test", f"test_{base_name}") - if os.path.exists(test_file): - return test_file - return None - - -# ############################################################################# -# System call capture utilities -# ############################################################################# - - -@contextlib.contextmanager -def capture_system_calls( - side_effect: Optional[Any] = None, -) -> Generator[List[Dict[str, Any]], None, None]: - """ - Context manager that captures all system calls to `subprocess.run()` and - `hsystem._system()`, returning them as a list of invocations. - - Each invocation is a dict with 'function', 'args', and 'kwargs' keys. - - :param side_effect: Exception or return value to use for mocked calls - :return: List of invocations, each as {'function': str, 'args': tuple, - 'kwargs': dict} - - Example: - ``` - with capture_system_calls() as invocations: - my_function() - # Check captured calls. - assert len(invocations) == 1 - assert invocations[0]['function'] == 'subprocess.run' - ``` - """ - invocations: List[Dict[str, Any]] = [] - - def mock_subprocess_run(*args: Any, **kwargs: Any) -> Any: - invocations.append( - { - "function": "subprocess.run", - "args": args, - "kwargs": kwargs, - } - ) - if side_effect is not None: - if isinstance(side_effect, type) and issubclass( - side_effect, BaseException - ): - raise side_effect() - elif isinstance(side_effect, BaseException): - raise side_effect - return None - - def mock_hsystem(*args: Any, **kwargs: Any) -> Any: - invocations.append( - { - "function": "hsystem._system", - "args": args, - "kwargs": kwargs, - } - ) - if side_effect is not None: - if isinstance(side_effect, type) and issubclass( - side_effect, BaseException - ): - raise side_effect() - elif isinstance(side_effect, BaseException): - raise side_effect - return (0, "") # Return code and output - - with mock.patch("subprocess.run", side_effect=mock_subprocess_run): - with mock.patch("helpers.hsystem._system", side_effect=mock_hsystem): - yield invocations diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py deleted file mode 100644 index 18aea68c5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py +++ /dev/null @@ -1,300 +0,0 @@ -""" -Import as: - -import helpers.hversion as hversio -""" - -# This code implements version control for code -# The code version is used in two circumstances: -# 1) when any code using `hdbg.py` (which is included everywhere) starts in -# order to verify that the running code and the container in which the code -# is running are compatible -# 2) when a container is built to know what version of the code was used to build -# it - -import functools -import logging -import os -import re -from typing import List, Optional, cast - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - -_LOG = logging.getLogger(__name__) - - -_INFO = "\033[36mINFO\033[0m" -_WARNING = "\033[33mWARNING\033[0m" -_ERROR = "\033[31mERROR\033[0m" -# -_VERSION_RE = r"\d+\.\d+\.\d+" - - -# Copied from helpers.hgit to avoid circular dependencies. - - -@functools.lru_cache() -def _is_inside_submodule(git_dir: str = ".") -> bool: - """ - Return whether a dir is inside a Git submodule or a Git supermodule. - - We determine this checking if the current Git repo is included - inside another Git repo. - """ - cmd = [] - # - Find the git root of the current directory - # - Check if the dir one level up is a valid Git repo - # Go to the dir. - cmd.append(f"cd {git_dir}") - # > cd im/ - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd.append('cd "$(git rev-parse --show-toplevel)/.."') - # > git rev-parse --is-inside-work-tree - # true - cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") - cmd_as_str = " && ".join(cmd) - rc = hsystem.system(cmd_as_str, abort_on_error=False) - ret: bool = rc == 0 - return ret - - -@functools.lru_cache() -def _get_client_root(super_module: bool) -> str: - """ - Return the full path of the root of the Git client. - - E.g., `/Users/saggese/src/.../amp`. - - :param super_module: if True use the root of the Git super_module, - if we are in a submodule. Otherwise use the Git sub_module root - """ - if super_module and _is_inside_submodule(): - # https://stackoverflow.com/questions/957928 - # > cd /Users/saggese/src/.../amp - # > git rev-parse --show-superproject-working-tree - # /Users/saggese/src/... - cmd = "git rev-parse --show-superproject-working-tree" - else: - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd = "git rev-parse --show-toplevel" - # TODO(gp): Use system_to_one_line(). - _, out = hsystem.system_to_string(cmd) - out = out.rstrip("\n") - hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") - client_root: str = os.path.realpath(out) - return client_root - - -# End copy. - - -def get_changelog_version( - container_dir_name: str, *, file_name: str = None -) -> Optional[str]: - """ - Return latest version from changelog.txt file. - - :param container_dir_name: container directory relative to the root - directory - :param file_name: changelog file name - """ - version: Optional[str] = None - supermodule = True - root_dir = _get_client_root(supermodule) - # Note: for `amp` as submodule one should pass `container_dir_name` relative - # to the root, e.g., `amp/optimizer` and not just `optimizer`. - hdbg.dassert_ne(container_dir_name, "") - if file_name is None: - file_name = "changelog.txt" - changelog_file = os.path.join(root_dir, container_dir_name, file_name) - hdbg.dassert_file_exists(changelog_file) - changelog = hio.from_file(changelog_file) - match = re.search(_VERSION_RE, changelog) - if match: - version = match.group() - return version - - -def get_container_version() -> Optional[str]: - """ - Return the container version. - - :return: container code version from the env var - """ - container_version: Optional[str] = None - if hserver.is_inside_docker(): - env_var = "AM_CONTAINER_VERSION" - if env_var not in os.environ: - # This can happen when GH Actions pull the image using invoke - # inside their container (but not inside ours), thus there is no - # AM_CONTAINER_VERSION. - print( - _WARNING - + f": The env var {env_var} should be defined when running inside a" - " container" - ) - else: - # We are running inside a container. - # Keep the code and the container in sync by versioning both and - # requiring to be the same. - container_version = os.environ["AM_CONTAINER_VERSION"] - return container_version - - -def _check_version(code_version: str, container_version: str) -> bool: - """ - Check whether the code version and the container version are the same. - - :param code_version: code version from the changelog - :param container_version: container code version from the env var - :return: whether the versions are the same or not - """ - # Since the code version from the changelog is extracted with the - # `_VERSION_RE` regex, we apply the same regex to the container version - # to keep the representations comparable. - match = re.search(_VERSION_RE, container_version) - hdbg.dassert( - match, - ( - "Invalid format of the container code version '%s'; " - "it should contain a number like '1.0.0'" - ), - container_version, - ) - container_version = match.group() # type: ignore - # Check if the versions are the same. - is_ok = container_version == code_version - if not is_ok: - msg = f""" - ----------------------------------------------------------------------------- - This code is not in sync with the container: - code_version='{code_version}' != container_version='{container_version}' - ----------------------------------------------------------------------------- - You need to: - - merge origin/master into your branch with `invoke git_merge_master` - - pull the latest container with `invoke docker_pull` - """ - msg = hprint.dedent(msg) - # Highlight in red. - # TODO(gp): Use the proper function, if dependencies allow it. - msg = f"\033[31m{msg}\033[0m" - print(msg) - if False: - raise RuntimeError(msg) - return is_ok - - -def check_version(container_dir_name: str) -> None: - """ - Check that the code and container code have compatible version, otherwise - raises `RuntimeError`. - - :param container_dir_name: container directory relative to the root - directory - """ - # TODO(gp): -> CK_SKIP_VERSION_CHECK. - if "SKIP_VERSION_CHECK" in os.environ: - # Skip the check altogether. - return - # Get code version. - code_version = get_changelog_version(container_dir_name) - container_version = get_container_version() - # Check version, if possible. - if container_version is None: - # No need to check. - return - code_version = cast(str, code_version) - _check_version(code_version, container_version) - - -def get_latest_changelog_entry( - changelog_path: str, -) -> dict: - """ - Parse the latest changelog entry from a changelog file. - - :param changelog_path: path to the changelog.txt file - :return: dict with keys: 'version', 'date', 'changes' (list of - change lines) - """ - hdbg.dassert_file_exists(changelog_path) - changelog = hio.from_file(changelog_path) - lines = changelog.split("\n") - version = None - date = None - changes = [] - in_entry = False - for line in lines: - line = line.rstrip() - # Check for version header (e.g., "# csfy-2.2.0"). - version_match = re.match(r"^#\s+(.+)$", line) - if version_match: - if version is None: - # This is the first (latest) entry. - version = version_match.group(1) - in_entry = True - else: - # We've reached the next entry, stop. - break - elif in_entry: - # Check for date (e.g., "- 2025-10-06"). - date_match = re.match(r"^-\s+(\d{4}-\d{2}-\d{2})$", line) - if date_match and date is None: - date = date_match.group(1) - # Collect change lines. - elif line.startswith("- ") and not date_match: - changes.append(line) - return {"version": version, "date": date, "changes": changes} - - -def bump_version(version: str, *, bump_type: str = "minor") -> str: - """ - Bump a semantic version number. - - :param version: version string in format X.Y.Z (e.g., "2.2.0") - :param bump_type: type of version bump - "major", "minor", or "patch" - :return: bumped version string - """ - hdbg.dassert_in(bump_type, ("major", "minor", "patch")) - # Parse version using regex. - match = re.match(r"^(\d+)\.(\d+)\.(\d+)$", version) - hdbg.dassert( - match, - f"Invalid version format: '{version}'. Expected X.Y.Z format.", - ) - major, minor, patch = map(int, match.groups()) - # Bump according to type. - if bump_type == "major": - major += 1 - minor = 0 - patch = 0 - elif bump_type == "minor": - minor += 1 - patch = 0 - else: # patch - patch += 1 - return f"{major}.{minor}.{patch}" - - -def get_container_version_info() -> str: - txt_tmp: List[str] = [] - # - container_version = str(get_container_version()) - txt_tmp.append(f"container_version='{container_version}'") - # - container_dir_name = "." - changelog_version = str(get_changelog_version(container_dir_name)) - txt_tmp.append(f"changelog_version='{changelog_version}'") - # - txt = hprint.to_info("Container version", txt_tmp) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py deleted file mode 100644 index ea8392f6e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py +++ /dev/null @@ -1,125 +0,0 @@ -""" -Import as: - -import helpers.hwall_clock_time as hwacltim -""" - -# This should have no dependencies besides Python standard libraries since it's used -# in `helpers/hlogging.py`. - -import datetime -import logging -from typing import Callable, Optional, Union - -_LOG = logging.getLogger(__name__) - -# ############################################################################# -# Simulated real time -# ############################################################################# - -# Copied from `helpers/hdatetime.py` -# -# Function returning the current (true, replayed, simulated) wall-clock time as a -# timestamp. -_GetWallClockTime = Callable[[], "pd.Timestamp"] # noqa: F821 - -_get_wall_clock_time_func: Optional[_GetWallClockTime] = None - - -def set_wall_clock_time(get_wall_clock_time_func_: _GetWallClockTime) -> None: - """ - Set the global function to retrieve the wall clock time. - """ - assert callable(get_wall_clock_time_func_) - global _get_wall_clock_time_func - _get_wall_clock_time_func = get_wall_clock_time_func_ - - -def get_wall_clock_time_func() -> Optional[_GetWallClockTime]: - """ - Retrieve the global function retrieve the wall clock time. - """ - return _get_wall_clock_time_func - - -# We don't want to import `Pandas` just for a type. -def get_wall_clock_time() -> Optional["pd.Timestamp"]: # noqa: F821 - """ - Return the wall clock time (according to the set function) or `None` if no - function was set. - """ - func = _get_wall_clock_time_func - if func is None: - timestamp = None - else: - timestamp = func() - return timestamp - - -# ############################################################################# -# Real-world / machine real time. -# ############################################################################# - - -# TODO(Sameep): Redundant fuction replace by `hdatetime.timestamp_to_str()`. -def to_timestamp_str( - timestamp: "pd.Timestamp", # noqa: F821 - include_msec: bool = False, -) -> str: - if include_msec: - # Chop the last 4 miliseconds digits. This is needed for CcxtBroker_v2. - return timestamp.strftime("%Y%m%d_%H%M%S%f")[:-4] - else: - return timestamp.strftime("%Y%m%d_%H%M%S") - - -# This is redundant with `hdatetime.get_current_time()` and -# `hdateti.get_current_timestamp_as_string()` but we keep them to simplify -# dependencies. -def get_machine_wall_clock_time( - *, - as_str: bool = False, - include_msec: bool = False, -) -> Union[str, datetime.datetime]: - ret = datetime.datetime.utcnow() - if as_str: - ret = to_timestamp_str(ret, include_msec) - return ret - - -# ############################################################################# -# Current bar being processed. -# ############################################################################# - - -_CURR_BAR_TIMESTAMP: Optional["pd.Timestamp"] = None # noqa: F821 - - -def reset_current_bar_timestamp() -> None: - global _CURR_BAR_TIMESTAMP - _LOG.debug("Reset") - _CURR_BAR_TIMESTAMP = None - - -def set_current_bar_timestamp(timestamp: "pd.Timestamp") -> None: # noqa: F821 - _LOG.debug("timestamp=%s", timestamp) - global _CURR_BAR_TIMESTAMP - if _CURR_BAR_TIMESTAMP is not None: - # TODO(Grisha): should we relax the check by using - # `<=` instead of `<`? - assert _CURR_BAR_TIMESTAMP < timestamp, ( - "Bar timestamp can only move forward: " - + f"{_CURR_BAR_TIMESTAMP} <= {timestamp}" - ) - _CURR_BAR_TIMESTAMP = timestamp - - -def get_current_bar_timestamp( - *, - as_str: bool = False, - include_msec: bool = False, -) -> Optional[Union[str, "pd.Timestamp"]]: # noqa: F821 - ret = _CURR_BAR_TIMESTAMP - if _CURR_BAR_TIMESTAMP and as_str: - ret = to_timestamp_str(ret, include_msec=include_msec) - return ret diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py deleted file mode 100644 index 4f740f572..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py +++ /dev/null @@ -1,156 +0,0 @@ -""" -When this module is imported certain annoying warnings are disabled. - -Import as: - -import helpers.hwarnings as hwarnin -""" - -if False: - _WARNING = "\033[33mWARNING\033[0m" - print(f"{_WARNING}: Disabling annoying warnings") - -# Avoid dependency from other `helpers` modules, such as `helpers.hprint`, to -# prevent import cycles. - -import warnings - -# From https://docs.python.org/3/library/warnings.html - -# TODO(gp): For some reason "once" doesn't work, so we ignore all of the warnings. -action = "ignore" - -try: - import statsmodels # noqa: F401 - - _HAS_STATSMODELS = True -except ImportError: - _HAS_STATSMODELS = False - - -if _HAS_STATSMODELS: - # /venv/lib/python3.8/site-packages/statsmodels/tsa/stattools.py:1910: - # InterpolationWarning: The test statistic is outside of the range of p-values - # available in the look-up table. The actual p-value is greater than the - # p-value returned. - from statsmodels.tools.sm_exceptions import InterpolationWarning - - # warnings.simplefilter("ignore", category=InterpolationWarning) - - # /venv/lib/python3.8/site-packages/statsmodels/tsa/stattools.py:1906: - # InterpolationWarning: The test statistic is outside of the range of p-values - # available in the look-up table. The actual p-value is smaller than the - # p-value returned. - warnings.filterwarnings( - action, - category=InterpolationWarning, - module=".*statsmodels.*", - lineno=1906, - append=False, - ) - - warnings.filterwarnings( - action, - category=InterpolationWarning, - module=".*statsmodels.*", - lineno=1910, - append=False, - ) - - -# /venv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: -# DeprecationWarning: `should_run_async` will not call `transform_cell` -# automatically in the future. Please pass the result to `transformed_cell` -# argument and any exception that happen during thetransform in -# `preprocessing_exc_tuple` in IPython 7.17 and above. -# and should_run_async(code) -warnings.filterwarnings( - action, - category=DeprecationWarning, - module=".*ipykernel.*", - lineno=283, - append=False, -) - - -# TODO(gp): Add this TqdmExperimentalWarning - -try: - import pandas as pd - - _HAS_PANDAS = True -except ImportError: - _HAS_PANDAS = False - - -if _HAS_PANDAS: - pd.set_option("mode.chained_assignment", None) - # TODO(gp): We should fix the issues and re-enable. - # See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - # row["net_cost"] -= cost - # /app/amp/oms/order_processing/order_processor.py:376: SettingWithCopyWarning: - # A value is trying to be set on a copy of a slice from a DataFrame - - # /venv/lib/python3.8/site-packages/pandas/io/sql.py:761: UserWarning: pandas - # only support SQLAlchemy connectable(engine/connection) ordatabase string URI or - # sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider - # using SQLAlchemy - # - # This seems a false alarm: - # https://github.com/pandas-dev/pandas/issues/45660#issuecomment-1077355514 - warnings.filterwarnings( - action, - category=UserWarning, - module=".*pandas.*", - lineno=761, - append=False, - ) - - # run_leq_node: 38%|███▊ | 3/8 [00:05<00:09, 1.98s/it]/app/amp/helpers/hdbg.py:309: PerformanceWarning: indexing past lexsort depth may impact performance. - # cond = value in valid_values - warnings.filterwarnings( - action, - category=pd.errors.PerformanceWarning, - module=".*hdbg.py.*", - lineno=309, - append=False, - ) - - # run_leq_node: 0%| | 0/8 [00:00 str: - """ - Get the shared configs S3 bucket. - - :param environment: environment to get the shared configs for - :return: shared configs S3 bucket - """ - hdbg.dassert_in(environment, ["prod", "preprod", "test"]) - bucket_name = hrecouti.get_repo_config().get_shared_configs_bucket_name( - environment - ) - hdbg.dassert_is_not( - bucket_name, - None, - f"Shared configs bucket is not defined in `repo_config.yaml` for environment: {environment}", - ) - return bucket_name - - -def _get_ecs_task_definition_template(environment: str) -> Dict[str, Any]: - """ - Get the ECS task definition template. - - :return: ECS task definition template - """ - s3_bucket = _get_shared_configs_s3_bucket(environment) - s3_path = f"{s3_bucket}/{environment}/templates/ecs/ecs_task_definition_template.json" - hs3.dassert_is_s3_path(s3_path) - task_definition_config = hs3.from_file( - s3_path, aws_profile=haws.AWS_PROFILE[environment] - ) - task_definition_config = json.loads(task_definition_config) - return task_definition_config - - -def _get_efs_mount_config_template(environment: str) -> Dict[str, Any]: - """ - Get the EFS mount config template. - - :return: EFS mount config template - """ - s3_bucket = _get_shared_configs_s3_bucket(environment) - s3_path = ( - f"{s3_bucket}/{environment}/templates/efs/efs_mount_config_template.json" - ) - hs3.dassert_is_s3_path(s3_path) - efs_config = hs3.from_file( - s3_path, aws_profile=haws.AWS_PROFILE[environment] - ) - efs_config = json.loads(efs_config) - return efs_config - - -def _set_task_definition_config( - task_definition_config: Dict, - task_definition_name: str, - region: str, - environment: str, -) -> Dict[str, Any]: - """ - Update template of ECS task definition with concrete values. - - :param task_definition_config: task definition config template - :param task_definition_name: name of the task definition - :param region: region to create the task definition in - :return: full formed task definition config dictionary - """ - # Replace placeholder values inside container definition - # from the template with concrete values. - # We use single container inside our task definition and - # the convention is to set the same name as the task - # definition itself. - task_definition_config["containerDefinitions"][0]["name"] = ( - task_definition_name - ) - # Set placeholder image URL. - # Get the base registry URL in the base region. - base_registry_url = hrecouti.get_repo_config().get_container_registry_url() - # Build the region-specific ECR registry URL for the target region. - # ECR registry URL format: `{account_id}.dkr.ecr.{region}.amazonaws.com`. - account_id = base_registry_url.split(".")[0] - registry_url = f"{account_id}.dkr.ecr.{region}.amazonaws.com" - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - # Make sure that the ECR replication is configured for the target region, - # so images are available in any new regions. - task_definition_config["containerDefinitions"][0]["image"] = ( - _IMAGE_URL_TEMPLATE.format(registry_url, image_name) - ) - # Set log configuration options. - log_config_opts = copy.deepcopy(_TASK_DEFINITION_LOG_OPTIONS_TEMPLATE) - log_config_opts["awslogs-group"] = log_config_opts["awslogs-group"].format( - task_definition_name - ) - log_config_opts["awslogs-region"] = region - task_definition_config["containerDefinitions"][0]["logConfiguration"][ - "options" - ] = log_config_opts - # Index is based on the order of the environment variables in the template. - # Set environment variable `CSFY_ECR_BASE_PATH`. - task_definition_config["containerDefinitions"][0]["environment"][0][ - "value" - ] = registry_url - # Set environment variable `CSFY_AWS_DEFAULT_REGION`. - task_definition_config["containerDefinitions"][0]["environment"][1][ - "value" - ] = region - # Configure access to EFS. - efs_config = _get_efs_mount_config_template(environment) - task_definition_config["volumes"] = efs_config[region]["volumes"] - task_definition_config["containerDefinitions"][0]["mountPoints"] = ( - efs_config[region]["mountPoints"] - ) - return task_definition_config - - -def _register_task_definition( - task_definition_name: str, region: str, environment: str -) -> None: - """ - Register a new ECS task definition. - - :param task_definition_name: name of the new task definition. - :param config_file: path to the JSON file containing the task - definition configuration. - :param region: region to create the task definition in - :param environment: environment to create the task definition in - """ - task_definition_config = _get_ecs_task_definition_template(environment) - client = haws.get_ecs_client(haws.AWS_PROFILE[environment], region=region) - # Prevent overwriting existing task definition if it exists. - if haws.is_task_definition_exists(task_definition_name, region=region): - _LOG.info( - "Task definition %s already exists in region %s", - task_definition_name, - region, - ) - return - # - task_definition_config = _set_task_definition_config( - task_definition_config, task_definition_name, region, environment - ) - client.register_task_definition( - family=task_definition_name, - taskRoleArn=task_definition_config.get("taskRoleArn", ""), - executionRoleArn=task_definition_config["executionRoleArn"], - networkMode=task_definition_config["networkMode"], - containerDefinitions=task_definition_config["containerDefinitions"], - volumes=task_definition_config.get("volumes", []), - placementConstraints=task_definition_config.get( - "placementConstraints", [] - ), - requiresCompatibilities=task_definition_config[ - "requiresCompatibilities" - ], - cpu=task_definition_config["cpu"], - memory=task_definition_config["memory"], - ) - _LOG.info( - "Registered new task definition: %s in region %s", - task_definition_name, - region, - ) - - -def aws_update_ecs_task_definition( - *, - task_definition: str, - image_tag: str, - region: str, - environment: str, -) -> None: - """ - Update an existing ECS task definition. - - :param task_definition: the name of the ECS task definition for - which an update to container image URL is made, e.g. cmamp-test - :param image_tag: the hash of the new candidate image, e.g. - 13538588e - :param region: region to update the task definition in - """ - hdbg.dassert_in(region, hs3.AWS_REGIONS) - old_image_url = haws.get_task_definition_image_url( - task_definition, environment=environment, region=region - ) - # Edit container version, e.g. cmamp:prod-12a45 - > cmamp:prod-12b46`. - new_image_url = re.sub("prod-(.+)$", f"prod-{image_tag}", old_image_url) - haws.update_task_definition( - task_definition, new_image_url, region=region, environment=environment - ) - - -@task -def aws_create_test_task_definition( - ctx, - issue_id: Optional[int] = None, - region: str = hs3.AWS_EUROPE_REGION_1, -) -> None: - """ - Create a new ECS task definition. - - :param issue_id: issue ID to create the task definition for - :param region: region to create the task definition in - """ - _ = ctx - hlitauti.report_task() - # Check if the `issue_id` provided is valid. - hdbg.dassert_is_not(issue_id, None, "issue_id is required") - is_valid_issue_id = str(issue_id).isdigit() - hdbg.dassert(is_valid_issue_id, f"issue_id '{issue_id}' must be an integer") - # Check if the `region` provided is valid. - hdbg.dassert_in(region, hs3.AWS_REGIONS) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-test-{issue_id}" - # Register task definition. - _register_task_definition( - task_definition_name, region=region, environment="test" - ) - - -@task -def aws_create_preprod_task_definition( - ctx, - region: str = hs3.AWS_EUROPE_REGION_1, -) -> None: - """ - Create a new ECS task definition for preprod environment. - - :param region: region to create the task definition in - """ - _ = ctx - hlitauti.report_task() - hdbg.dassert_in(region, hs3.AWS_REGIONS) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-preprod" - # Register task definition. - _register_task_definition( - task_definition_name, region=region, environment="preprod" - ) - - -@task -def aws_create_prod_task_definition( - ctx, - region: str = hs3.AWS_US_REGION_1, -) -> None: - """ - Create a new ECS task definition. - - :param region: region to create the task definition in - """ - _ = ctx - hlitauti.report_task() - hdbg.dassert_in(region, hs3.AWS_REGIONS) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-prod" - # Register task definition. - _register_task_definition( - task_definition_name, region=region, environment="prod" - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py deleted file mode 100644 index 111fa2815..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_bash as hlitabas -""" - -import logging -import os - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hfile_tree as hfiltree -import helpers.hsystem as hsystem -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): GFI: Unit test. -@task -def bash_print_path(ctx): # type: ignore - """ - Print the bash path. - """ - _ = ctx - cmd = r"echo $PATH | sed 's/:/\n/g'" - _, ret = hsystem.system_to_string(cmd) - paths = ret.split("\n") - paths.sort() - # - all_paths = [] - # Remove empty lines. - for path in paths: - if path.strip() == "": - _LOG.error("Empty path: '%s'", path) - continue - if not os.path.exists(path): - _LOG.error("Dir doesn't exist: '%s'", path) - continue - if not os.path.isdir(path): - _LOG.error("Not a dir: '%s'", path) - continue - # TODO(gp): Make it efficient. - if paths.count(path) > 1: - _LOG.error("Duplicate path: '%s'", path) - continue - all_paths.append(path) - # Print the paths. - _LOG.info("Valid paths:") - for path in all_paths: - print(path) - - -@task -def bash_print_tree( # type: ignore - ctx, - path=".", - depth=0, - clean=False, - include_tests=False, - include_python=False, - only_dirs=False, - output="", -): - """ - Print a directory tree, and optionally update or create a markdown file. - - ``` - # To print tree for current directory: - > i bash_print_tree - - # Limit depth to 2 and include test files: - > i bash_print_tree --path="devops" --depth=2 --include-tests - - # Include python files: - > i bash_print_tree --path="devops" --include-python - - # Only show directories: - > i bash_print_tree --path="devops" --only-dirs - - # Write the tree to file, preserving comments: - > i bash_print_tree --path="devops" --output="README.md" - ``` - - :param path: directory path to traverse - :param depth: maximum depth to traverse - :param clean: clean untracked files in directory - :param include_tests: include test files or directories - :param include_python: include python files - :param only_dirs: only show directories - :param output: path of the markdown file to create or update - """ - _ = ctx - hdbg.dassert_lte(0, depth, "Depth must be non-negative: %s", depth) - if clean: - cmd = "git clean -fd" - hlitauti.run(ctx, cmd) - tree = hfiltree.generate_tree( - path, depth, include_tests, include_python, only_dirs, output - ) - print(tree) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py deleted file mode 100644 index f7dcadc54..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py +++ /dev/null @@ -1,1590 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_docker as hlitadoc -""" - -import functools -import getpass -import logging -import os -import re -from typing import Any, Dict, List, Optional, Union, cast - -# TODO(gp): We should use `pip install types-PyYAML` to get the mypy stubs. -import yaml -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hdict as hdict -import helpers.hdocker as hdocker -import helpers.henv as henv -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hsecrets as hsecret -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hversion as hversio -import helpers.lib_tasks_utils as hlitauti -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -# ############################################################################# -# Basic Docker commands. -# ############################################################################# - - -def _get_docker_exec(sudo: bool) -> str: - docker_exec = "docker" - if sudo: - docker_exec = "sudo " + docker_exec - return docker_exec - - -# //////////////////////////////////////////////////////////////////////////// -# Docker login -# //////////////////////////////////////////////////////////////////////////// - - -@functools.lru_cache() -def _get_aws_cli_version() -> int: - # > aws --version - # aws-cli/1.19.49 Python/3.7.6 Darwin/19.6.0 botocore/1.20.49 - # aws-cli/1.20.1 Python/3.9.5 Darwin/19.6.0 botocore/1.20.106 - cmd = "aws --version" - res = hsystem.system_to_one_line(cmd)[1] - # Parse the output. - m = re.match(r"aws-cli/((\d+)\.\d+\.\d+)\s", res) - hdbg.dassert_is_not(m, None, "Can't parse '%s'", res) - assert m is not None - version = m.group(1) - _LOG.debug("version=%s", version) - major_version = int(m.group(2)) - _LOG.debug("major_version=%s", major_version) - return major_version - - -def _check_docker_login(repo_name: str) -> bool: - """ - Check if we are already logged in to the Docker registry `repo_name`. - """ - file_name = os.path.join(os.environ["HOME"], ".docker/config.json") - json_data = hio.from_json(file_name) - # > more ~/.docker/config.json - # ``` - # { - # "auths": { - # "623860924167.dkr.ecr.eu-north-1.amazonaws.com": {}, - # "665840871993.dkr.ecr.us-east-1.amazonaws.com": {}, - # "https://index.docker.io/v1/": {} - # }, - # ``` - _LOG.debug("json_data=%s", json_data) - is_logged = any(repo_name in val for val in json_data["auths"].keys()) - return is_logged - - -def _docker_login_dockerhub() -> None: - """ - Log into the Docker Hub which is a public Docker image registry. - """ - # Check if we are already logged in to the target registry. - # TODO(gp): Enable caching https://github.com/causify-ai/helpers/issues/20 - use_cache = False - if use_cache: - is_logged = _check_docker_login("623860924167.dkr.ecr") - if is_logged: - _LOG.warning("Already logged in to the target registry: skipping") - return - _LOG.info("Logging in to the target registry") - secret_id = "causify_dockerhub" - secret = hsecret.get_secret(secret_id) - username = hdict.typed_get(secret, "username", expected_type=str) - password = hdict.typed_get(secret, "password", expected_type=str) - cmd = f"docker login -u {username} -p {password}" - hsystem.system(cmd, suppress_output=False) - - -def _docker_login_ecr() -> None: - """ - Log in the AM Docker repo_short_name on AWS. - """ - hlitauti.report_task() - if hserver.is_inside_ci(): - _LOG.warning("Running inside GitHub Action: skipping `docker_login`") - return - # TODO(gp): Enable caching https://github.com/causify-ai/helpers/issues/20 - use_cache = False - if use_cache: - # Check if we are already logged in to the target registry. - is_logged = _check_docker_login("623860924167.dkr.ecr") - if is_logged: - _LOG.warning("Already logged in to the target registry: skipping") - return - _LOG.info("Logging in to the target registry") - # Log in the target registry. - major_version = _get_aws_cli_version() - # docker login \ - # -u AWS \ - # -p eyJ... \ - # -e none \ - # https://*****.dkr.ecr.us-east-1.amazonaws.com - # TODO(gp): Move this to var in repo_config.py. - # TODO(gp): Hack - profile = "ck" - region = hs3.AWS_EUROPE_REGION_1 - cmd = "" - if major_version == 1: - cmd = f"eval $(aws ecr get-login --profile {profile} --no-include-email --region {region})" - elif major_version == 2: - if profile == "ck": - env_var = "CSFY_ECR_BASE_PATH" - else: - env_var = f"{profile.upper()}_ECR_BASE_PATH" - ecr_base_path = hlitauti.get_default_param(env_var) - # TODO(Nikola): Remove `_get_aws_cli_version()` and use only `aws ecr get-login-password` - # as it is present in both versions of `awscli`. - cmd = ( - "docker login -u AWS -p " - f"$(aws ecr get-login-password --profile {profile}) " - f"https://{ecr_base_path}" - ) - else: - NotImplementedError( - f"Docker login for awscli v{major_version} is not implemented!" - ) - # TODO(Grisha): fix properly. We pass `ctx` despite the fact that we do not - # need it with `use_system=True`, but w/o `ctx` invoke tasks (i.e. ones - # with `@task` decorator) do not work. - hsystem.system(cmd, suppress_output=False) - - -@task -def docker_login(ctx, target_registry="aws_ecr.ck"): # type: ignore - """ - Log in the target registry and skip if we are in kaizenflow. - - :param ctx: invoke context - :param target_registry: target Docker image registry to log in to - - "dockerhub.causify": public Causify Docker image registry - - "aws_ecr.ck": private AWS CK ECR - """ - _ = ctx - hlitauti.report_task() - # No login required as the `helpers` and `tutorials` images are accessible - # on the public DockerHub registry. - if not hserver.is_dev_csfy() and hrecouti.get_repo_config().get_name() in [ - "//helpers", - "//tutorials", - ]: - _LOG.warning("Skipping Docker login process for Helpers or Tutorials") - return - # We run everything using `hsystem.system(...)` but `ctx` is needed - # to make the function work as an invoke target. - if target_registry == "aws_ecr.ck": - _docker_login_ecr() - elif target_registry == "dockerhub.causify": - _docker_login_dockerhub() - else: - raise ValueError(f"Invalid Docker image registry='{target_registry}'") - - -@task -def docker_images_ls_repo(ctx, sudo=False): # type: ignore - """ - List images in the logged in repo_short_name. - """ - hlitauti.report_task() - docker_login(ctx) - # TODO(gp): Move this to a var ECR_BASE_PATH="CSFY_ECR_BASE_PATH" in repo_config.py. - ecr_base_path = hlitauti.get_default_param("CSFY_ECR_BASE_PATH") - docker_exec = _get_docker_exec(sudo) - hlitauti.run(ctx, f"{docker_exec} image ls {ecr_base_path}") - - -# //////////////////////////////////////////////////////////////////////////////// -# Version. -# //////////////////////////////////////////////////////////////////////////////// - - -_IMAGE_VERSION_RE = r"\d+\.\d+\.\d+" - - -def _dassert_is_version_valid(version: str) -> None: - """ - Check that the version is valid, i.e. looks like `1.0.0`. - """ - hdbg.dassert_isinstance(version, str) - hdbg.dassert_ne(version, "") - regex = rf"^({_IMAGE_VERSION_RE})$" - _LOG.debug("Testing with regex='%s'", regex) - m = re.match(regex, version) - hdbg.dassert(m, "Invalid version: '%s'", version) - - -# //////////////////////////////////////////////////////////////////////////////// -# Image. -# //////////////////////////////////////////////////////////////////////////////// - - -# This pattern aims to match the full image name including -# both registry and image path. -# Examples of valid matches include: -# - '623860924167.dkr.ecr.eu-north-1.amazonaws.com/cmamp' -# - 'ghcr.io/cryptokaizen/cmamp' -# This change is introduced to match the GHCR registry path, -# since it already includes `/` in the registry name itself. -_FULL_IMAGE_NAME_RE = r"([a-z0-9]+(-[a-z0-9]+)*\.)*[a-z]{2,}(\/[a-z0-9_-]+){1,2}" -_IMAGE_USER_RE = r"[a-z0-9_-]+" -# For candidate prod images which have added hash for easy identification. -_IMAGE_HASH_RE = r"[a-z0-9]{9}" -_IMAGE_STAGE_RE = rf"(local(?:-{_IMAGE_USER_RE})?|dev|prod|prod(?:-{_IMAGE_USER_RE})(?:-{_IMAGE_HASH_RE})?|prod(?:-{_IMAGE_HASH_RE})?)" - - -# TODO(Grisha): call `_dassert_is_base_image_name_valid()` and a separate -# function that validates an image tag. -def dassert_is_image_name_valid(image: str) -> None: - """ - Check whether an image name is valid. - - Invariants: - - Local images contain a username and a version - - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0` - - `dev` and `prod` images have an instance with a version and one without - to indicate the latest - - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0` - and `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev` - - `prod` candidate image has an optional tag (e.g., a username) and - a 9 character hash identifier corresponding Git commit - - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-4rf74b83a` - - and `*****.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-saggese-4rf74b83a` - - An image should look like: - - *****.dkr.ecr.us-east-1.amazonaws.com/amp:dev - *****.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0 - *****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0 - ghcr.io/cryptokaizen/cmamp:dev - """ - regex = "".join( - [ - # E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/cmamp` - # or `sorrentum/cmamp` or ghcr.io/cryptokaizen/cmamp. - rf"^{_FULL_IMAGE_NAME_RE}", - # E.g., `:local-saggese`. - rf"(:{_IMAGE_STAGE_RE})?", - # E.g., `-1.0.0`. - rf"(-{_IMAGE_VERSION_RE})?$", - ] - ) - _LOG.debug("Testing with regex='%s'", regex) - m = re.match(regex, image) - hdbg.dassert(m, "Invalid image: '%s'", image) - - -def _dassert_is_base_image_name_valid(base_image: str) -> None: - """ - Check that the base image is valid, i.e. looks like below. - - *****.dkr.ecr.us-east-1.amazonaws.com/amp ghcr.io/cryptokaizen/cmamp - """ - regex = rf"^{_FULL_IMAGE_NAME_RE}$" - _LOG.debug("regex=%s", regex) - m = re.match(regex, base_image) - hdbg.dassert(m, "Invalid base_image: '%s'", base_image) - - -# TODO(Grisha): instead of using `base_image` which is Docker registry address -# + image name, use those as separate parameters. See CmTask5074. -def _get_base_image(base_image: str) -> str: - """ - :return: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - """ - if base_image == "": - # TODO(gp): Use os.path.join. - base_image = ( - hlitauti.get_default_param("CSFY_ECR_BASE_PATH") - + "/" - + hlitauti.get_default_param("BASE_IMAGE") - ) - _dassert_is_base_image_name_valid(base_image) - return base_image - - -# This code path through Git tag was discontinued with CmTask746. -# def get_git_tag( -# version: str, -# ) -> str: -# """ -# Return the tag to be used in Git that consists of an image name and -# version. -# :param version: e.g., `1.0.0`. If None, the latest version is used -# :return: e.g., `amp-1.0.0` -# """ -# hdbg.dassert_is_not(version, None) -# _dassert_is_version_valid(version) -# base_image = hlibtaskut.get_default_param("BASE_IMAGE") -# tag_name = f"{base_image}-{version}" -# return tag_name - - -# TODO(gp): Consider using a token "latest" in version, so that it's always a -# string and we avoid a special behavior encoded in None. -def get_image( - base_image: str, - stage: str, - version: Optional[str], -) -> str: - """ - Return the fully qualified image name. - - For local stage, it also appends the username to the image name. - - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param stage: e.g., `local`, `dev`, `prod` - :param version: e.g., `1.0.0`, if None empty, the latest version is used - :return: e.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local` or - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local-1.0.0` - """ - # Docker refers the default image as "latest", although in our stage - # nomenclature we call it "dev". - hdbg.dassert_in(stage, "local dev prod".split()) - # Get the base image. - base_image = _get_base_image(base_image) - _dassert_is_base_image_name_valid(base_image) - # Get the full image name. - image = [base_image] - # Handle the stage. - image.append(f":{stage}") - if stage == "local": - user = hsystem.get_user_name() - image.append(f"-{user}") - # Handle the version. - if version is not None and version != "": - _dassert_is_version_valid(version) - image.append(f"-{version}") - # - image = "".join(image) - dassert_is_image_name_valid(image) - return image - - -@task -def docker_remove_image(ctx, base_image="") -> None: # type: ignore - """ - Delete the current dev image to free up disk space. - - :param base_image: base name of the image (e.g., `*****.dkr.ecr.us- - east-1.amazonaws.com/amp`) - """ - # Display disk space before cleanup. - _LOG.info("Disk space before cleanup:") - hsystem.system("df -h", suppress_output=False) - # Handle the image. - stage = "dev" - version = "" - image = get_image(base_image, stage, version) - _LOG.info("Deleting Docker image: %s", image) - # Get Docker executable configuration. - use_sudo = hdocker.get_use_sudo() - docker_exec = hdocker.get_docker_executable(use_sudo) - # Delete the specific image. - cmd = f"{docker_exec} rmi -f {image}" - _LOG.info("Running: %s", cmd) - try: - result = hsystem.system(cmd, abort_on_error=False, suppress_output=False) - if result != 0: - _LOG.warning( - "Docker image deletion failed with exit code %s for image: %s", - result, - image, - ) - else: - _LOG.info("Successfully deleted Docker image: %s", image) - except Exception as e: - _LOG.error("Error during Docker image deletion: %s", e) - # Display disk space after cleanup. - _LOG.info("Disk space after cleanup:") - hsystem.system("df -h", suppress_output=False) - - -@task -def docker_ps(ctx, sudo=False): # type: ignore - # pylint: disable=line-too-long - """ - List all the running containers. - - ``` - > docker_ps - CONTAINER ID user IMAGE COMMAND CREATED STATUS PORTS service - 2ece37303ec9 gp *****....:latest "./docker_build/entry.sh" 5 seconds ago Up 4 seconds user_space - ``` - """ - hlitauti.report_task() - # pylint: enable=line-too-long - fmt = ( - r"""table {{.ID}}\t{{.Label "user"}}\t{{.Image}}\t{{.Command}}""" - + r"\t{{.RunningFor}}\t{{.Status}}\t{{.Ports}}" - + r'\t{{.Label "com.docker.compose.service"}}' - ) - docker_exec = _get_docker_exec(sudo) - cmd = f"{docker_exec} ps --format='{fmt}'" - cmd = hlitauti._to_single_line_cmd(cmd) - hlitauti.run(ctx, cmd) - - -def _get_last_container_id(sudo: bool) -> str: - docker_exec = _get_docker_exec(sudo) - # Get the last started container. - cmd = f"{docker_exec} ps -l | grep -v 'CONTAINER ID'" - # CONTAINER ID IMAGE COMMAND CREATED - # 90897241b31a eeb33fe1880a "/bin/sh -c '/bin/bash ... - _, txt = hsystem.system_to_one_line(cmd) - # Parse the output: there should be at least one line. - hdbg.dassert_lte(1, len(txt.split(" ")), "Invalid output='%s'", txt) - container_id: str = txt.split(" ")[0] - return container_id - - -@task -def docker_stats( # type: ignore - ctx, - all=False, # pylint: disable=redefined-builtin - sudo=False, -): - # pylint: disable=line-too-long - """ - Report last started Docker container stats, e.g., CPU, RAM. - - ``` - > docker_stats - CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS - 2ece37303ec9 ..._user_space_run_30 0.00% 15.74MiB / 31.07GiB 0.05% 351kB / 6.27kB 34.2MB / 12.3kB 4 - ``` - - :param all: report stats for all the containers - """ - # pylint: enable=line-too-long - hlitauti.report_task(txt=hprint.to_str("all")) - _ = ctx - fmt = ( - r"table {{.ID}}\t{{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" - + r"\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}\t{{.PIDs}}" - ) - docker_exec = _get_docker_exec(sudo) - cmd = f"{docker_exec} stats --no-stream --format='{fmt}'" - _, txt = hsystem.system_to_string(cmd) - if all: - output = txt - else: - # Get the id of the last started container. - container_id = _get_last_container_id(sudo) - print(f"Last container id={container_id}") - # Parse the output looking for the given container. - txt = txt.split("\n") - output = [] - # Save the header. - output.append(txt[0]) - for line in txt[1:]: - if line.startswith(container_id): - output.append(line) - # There should be at most two rows: the header and the one corresponding to - # the container. - hdbg.dassert_lte( - len(output), 2, "Invalid output='%s' for '%s'", output, txt - ) - output = "\n".join(output) - print(output) - - -@task -def docker_kill( # type: ignore - ctx, - all=False, # pylint: disable=redefined-builtin - sudo=False, -): - """ - Kill the last Docker container started. - - :param all: kill all the containers (be careful!) - :param sudo: use sudo for the Docker commands - """ - hlitauti.report_task(txt=hprint.to_str("all")) - docker_exec = _get_docker_exec(sudo) - # Last container. - opts = "-l" - if all: - _LOG.warning("Killing all the containers") - # TODO(gp): Ask if we are sure and add a --just-do-it option. - opts = "-a" - # Print the containers that will be terminated. - cmd = f"{docker_exec} ps {opts}" - hlitauti.run(ctx, cmd) - # Kill. - cmd = f"{docker_exec} rm -f $({docker_exec} ps {opts} -q)" - hlitauti.run(ctx, cmd) - - -# docker system prune -# docker container ps -f "status=exited" -# docker container rm $(docker container ps -f "status=exited" -q) -# docker rmi $(docker images --filter="dangling=true" -q) - -# pylint: disable=line-too-long -# Remove the images with hash -# > docker image ls -# REPOSITORY TAG IMAGE ID CREATED SIZE -# *****.dkr.ecr.us-east-2.amazonaws.com/im 07aea615a2aa9290f7362e99e1cc908876700821 d0889bf972bf 6 minutes ago 684MB -# *****.dkr.ecr.us-east-2.amazonaws.com/im rc d0889bf972bf 6 minutes ago 684MB -# python 3.7-slim-buster e7d86653f62f 14 hours ago 113MB -# *****.dkr.ecr.us-east-1.amazonaws.com/amp 415376d58001e804e840bf3907293736ad62b232 e6ea837ab97f 18 hours ago 1.65GB -# *****.dkr.ecr.us-east-1.amazonaws.com/amp dev e6ea837ab97f 18 hours ago 1.65GB -# *****.dkr.ecr.us-east-1.amazonaws.com/amp local e6ea837ab97f 18 hours ago 1.65GB -# *****.dkr.ecr.us-east-1.amazonaws.com/amp 9586cc2de70a4075b9fdcdb900476f8a0f324e3e c75d2447da79 18 hours ago 1.65GB -# pylint: enable=line-too-long - - -# ############################################################################# -# Docker development. -# ############################################################################# - -# TODO(gp): We might want to organize the code in a base class using a Command -# pattern, so that it's easier to generalize the code for multiple repos. -# -# class DockerCommand: -# def pull(): -# ... -# def cmd(): -# ... -# -# For now we pass the customizable part through the default params. - - -# //////////////////////////////////////////////////////////////////////////// -# Docker pull. -# //////////////////////////////////////////////////////////////////////////// - - -def _docker_pull( - ctx: Any, base_image: str, stage: str, version: Optional[str] -) -> None: - """ - Pull images from the registry. - """ - docker_login(ctx) - # - image = get_image(base_image, stage, version) - _LOG.info("image='%s'", image) - dassert_is_image_name_valid(image) - cmd = f"docker pull {image}" - hlitauti.run(ctx, cmd, pty=True) - - -@task -def docker_pull(ctx, stage="dev", version=None, skip_pull=False): # type: ignore - """ - Pull latest dev image corresponding to the current repo from the registry. - - :param skip_pull: if True skip pulling the docker image - """ - hlitauti.report_task() - if stage == "local": - _LOG.warning("Setting skip_pull to True for local stage") - skip_pull = True - if skip_pull: - _LOG.warning("Skipping pulling docker image as per user request") - return - # - base_image = "" - _docker_pull(ctx, base_image, stage, version) - - -@task -def docker_pull_helpers(ctx, stage="prod", version=None): # type: ignore - """ - Pull latest prod image of `helpers` from the registry. - - :param ctx: invoke context - :param stage: stage of the Docker image - :param version: version of the Docker image - """ - base_image = hlitauti.get_default_param("CSFY_ECR_BASE_PATH") + "/helpers" - _LOG.debug("base_image=%s", base_image) - _docker_pull(ctx, base_image, stage, version) - - -# //////////////////////////////////////////////////////////////////////////////// -# Compose files. -# //////////////////////////////////////////////////////////////////////////////// - -# TODO(gp): All this code can become `DockerComposeFileGenerator`. - -# There are several combinations to consider: -# - whether the Docker host can run with / without privileged mode -# - amp as submodule / as supermodule -# - different supermodules for amp - -# TODO(gp): use_privileged_mode -> use_docker_privileged_mode -# use_sibling_container -> use_docker_containers_containers - -DockerComposeServiceSpec = Dict[str, Union[str, List[str]]] - - -def _get_linter_service(stage: str) -> DockerComposeServiceSpec: - """ - Get the linter service specification for the `tmp.docker-compose.yml` file. - - :return: linter service specification - """ - superproject_path, submodule_path = hgit.get_path_from_supermodule() - if superproject_path: - # We are running in a Git submodule. - work_dir = f"/src/{submodule_path}" - repo_root = superproject_path - else: - work_dir = "/src" - repo_root = os.getcwd() - # TODO(gp): To avoid linter getting confused between `Sequence[str]` and - # `List[str]`, we should assign one element at the time. - linter_service_spec = { - "extends": "base_app", - "volumes": [ - f"{repo_root}:/src", - ], - "working_dir": work_dir, - "environment": [ - "MYPYPATH", - ], - } - if stage != "prod": - # When we run a development Linter container, we need to mount the - # Linter repo under `/app`. For prod container instead we copy / freeze - # the repo code in `/app`, so we should not mount it. - volumes = cast(List[str], linter_service_spec["volumes"]) - if superproject_path: - # When running in a Git submodule we need to go one extra level up. - # TODO(*): Clean up the indentation, #2242 (also below). - volumes.append("../../../:/app") - else: - volumes.append("../../:/app") - if stage == "prod": - # Use the `repo_config.py` inside the helpers container instead of - # the one in the calling repo. - environment = cast(List[str], linter_service_spec["environment"]) - environment.append("CSFY_REPO_CONFIG_PATH=/app/repo_config.py") - return linter_service_spec - - -# TODO(gp): Remove mount_as_submodule -def _generate_docker_compose_file( - stage: str, - use_privileged_mode: bool, - use_sibling_container: bool, - shared_data_dirs: Optional[Dict[str, str]], - mount_as_submodule: bool, - use_network_mode_host: bool, - use_main_network: bool, - file_name: Optional[str], -) -> str: - """ - Generate `tmp.docker-compose.yml` file and save it. - - :param shared_data_dirs: data directory in the host filesystem to mount - inside the container. `None` means no dir sharing - :param use_main_network: use `main_network` as default network - """ - _LOG.debug( - hprint.to_str( - "use_privileged_mode " - "use_sibling_container " - "shared_data_dirs " - "mount_as_submodule " - "use_network_mode_host " - "use_main_network " - "file_name " - ) - ) - # We could pass the env var directly, like: - # ``` - # - CSFY_ENABLE_DIND=$CSFY_ENABLE_DIND - # ``` - # but we prefer to inline it. - if use_privileged_mode: - CSFY_ENABLE_DIND = 1 - else: - CSFY_ENABLE_DIND = 0 - # ``` - # sysname='Linux' - # nodename='cf-spm-dev4' - # release='3.10.0-1160.53.1.el7.x86_64' - # version='#1 SMP Fri Jan 14 13:59:45 UTC 2022' - # machine='x86_64' - # ``` - csfy_host_os_name = os.uname()[0] - csfy_host_name = os.uname()[1] - csfy_host_os_version = os.uname()[2] - csfy_host_user_name = getpass.getuser() - # We assume that we don't use this code inside a container, since otherwise - # we would need to distinguish the container style (see - # docs/work_tools/docker/all.dockerized_flow.explanation.md) to find the - # outermost Git root. - if not hserver.is_inside_unit_test(): - hdbg.dassert(not hserver.is_inside_docker()) - else: - # We call this function as part of the unit tests, which we run insider - # the container. - pass - git_host_root_path = hgit.find_git_root() - # Find git root path in the container. - # The Git root is always mounted in the container at `/app`. So we need to - # use that as starting point. - # E.g. For CSFY_GIT_ROOT_PATH, we need to use `/app`, rather than - # `/data/dummy/src/cmamp1`. - # E.g. For CSFY_HELPERS_ROOT_PATH, we need to use `/app/helpers_root`. - # rather than `/data/dummy/src/cmamp1/helpers_root`. - git_root_path = "/app" - # Find helpers root path in the container. - helper_dir = hgit.find_helpers_root() - helper_relative_path = os.path.relpath(helper_dir, git_host_root_path) - helper_root_path = os.path.normpath( - os.path.join(git_root_path, helper_relative_path) - ) - # A super repo is a repo that contains helpers as a submodule and - # is not a helper itself. - use_helpers_as_nested_module = ( - 0 if hgit.is_in_helpers_as_supermodule() else 1 - ) - # We could do the same also with IMAGE for symmetry. - # Keep the env vars in sync with what we print in `henv.get_env_vars()`. - # Configure `base_app` service. - # TODO(gp): Use henv.get_env_vars() to get the env vars. - environment = [ - f"CSFY_ENABLE_DIND={CSFY_ENABLE_DIND}", - "CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL", - f"CSFY_HOST_NAME={csfy_host_name}", - f"CSFY_HOST_OS_NAME={csfy_host_os_name}", - f"CSFY_HOST_OS_VERSION={csfy_host_os_version}", - f"CSFY_HOST_USER_NAME={csfy_host_user_name}", - "CSFY_REPO_CONFIG_CHECK=True", - # Use inferred path for `repo_config.py`. - "CSFY_REPO_CONFIG_PATH=", - "CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID", - "CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION", - "CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE", - "CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET", - "CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY", - "CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN", - "CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH", - # The path of the outermost Git root on the host. - f"CSFY_HOST_GIT_ROOT_PATH={git_host_root_path}", - # The path of the outermost Git root in the Docker container. - f"CSFY_GIT_ROOT_PATH={git_root_path}", - # The path of the helpers dir in the Docker container (e.g., - # `/app`, `/app/helpers_root`) - f"CSFY_HELPERS_ROOT_PATH={helper_root_path}", - f"CSFY_USE_HELPERS_AS_NESTED_MODULE={use_helpers_as_nested_module}", - "CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN", - # This env var is used by GH Action to signal that we are inside the - # CI. It's set up by default by the GH Action runner. See: - # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables - "CSFY_CI=$CSFY_CI", - # TODO(Vlad): consider removing, locally we use our personal tokens - # from files and inside GitHub actions we use the `GH_TOKEN` - # environment variable. - ] - environment.extend( - [ - "GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN", - # Inside GitHub Actions we use `GH_TOKEN` environment variable, - # see https://cli.github.com/manual/gh_auth_login. - "GH_TOKEN=$GH_ACTION_ACCESS_TOKEN", - ] - ) - api_key_env_vars = henv.get_api_key_env_vars() - environment.extend([f"{env_var}=${env_var}" for env_var in api_key_env_vars]) - # - base_app_spec = { - "cap_add": ["SYS_ADMIN"], - "environment": environment, - "image": "${IMAGE}", - "restart": "no", - "volumes": [ - # TODO(gp): We should pass the value of $HOME from dev.Dockerfile to here. - # E.g., we might define $HOME in the env file. - "~/.aws:/home/.aws", - "~/.config/gspread_pandas/:/home/.config/gspread_pandas/", - "~/.config/gh:/home/.config/gh", - "~/.ssh:/home/.ssh", - ], - } - if use_privileged_mode: - # This is needed: - # - for Docker-in-docker (dind) - # - to mount fstabs - base_app_spec["privileged"] = use_privileged_mode - if shared_data_dirs: - # Mount shared dirs. - shared_volumes = [ - f"{host}:{container}" for host, container in shared_data_dirs.items() - ] - # Mount all dirs that are specified. - base_app_spec["volumes"].extend(shared_volumes) - if False: - # No need to mount file systems. - base_app_spec["volumes"].append("../docker_build/fstab:/etc/fstab") - if use_sibling_container: - # Use sibling-container approach. - base_app_spec["volumes"].append( - "/var/run/docker.sock:/var/run/docker.sock" - ) - if False: - base_app_spec["deploy"] = { - "resources": { - "limits": { - # This should be passed from command line depending on how much - # memory is available. - "memory": "60G", - }, - }, - } - if use_network_mode_host: - # Default network mode set to host so we can reach e.g. - # a database container pointing to localhost:5432. - # In tests we use dind so we need set back to the default "bridge". - # See CmTask988 and https://stackoverflow.com/questions/24319662 - base_app_spec["network_mode"] = "${NETWORK_MODE:-host}" - # Configure `app` service. - # Mount `amp` when it is used as submodule. In this case we need to - # mount the super project in the container (to make git work with the - # supermodule) and then change dir to `amp`. - app_spec = { - "extends": "base_app", - } - # Use absolute path of the dir to mount the volume and set working dir. - # The `app_dir` dir points to the root of the repo. - # The `working_dir` points to the path of the runnable dir. - # - If the runnable dir is the root of the repo, then `working_dir` is `/app`. - # - If the runnable dir is a subdirectory of the repo, then `working_dir` is `/app/subdir`. - curr_dir = os.getcwd() - rel_dir1 = os.path.relpath(curr_dir, git_host_root_path) - rel_dir2 = os.path.relpath(git_host_root_path, curr_dir) - app_dir = os.path.abspath(os.path.join(curr_dir, rel_dir2)) - working_dir = os.path.normpath(os.path.join("/app", rel_dir1)) - app_spec["volumes"] = [f"{app_dir}:/app"] - app_spec["working_dir"] = working_dir - # Configure `linter` service. - linter_spec = _get_linter_service(stage) - # Configure `jupyter_server` service. - # For Jupyter server we cannot use "host" network_mode because - # it is incompatible with the port bindings. - jupyter_server = { - "command": "devops/docker_run/run_jupyter_server.sh", - "environment": [ - "PORT=${PORT}", - ], - "extends": "app", - "network_mode": "${NETWORK_MODE:-bridge}", - # TODO(gp): Rename `AM_PORT`. - "ports": [ - "${PORT}:${PORT}", - ], - } - # Configure `jupyter_server_test` service. - # TODO(gp): For some reason the following doesn't work. - # jupyter_server_test: - # command: jupyter notebook -h 2>&1 >/dev/null - # extends: - # jupyter_server - jupyter_server_test = { - "command": "jupyter notebook -h 2>&1 >/dev/null", - "environment": [ - "PORT=${PORT}", - ], - "extends": "app", - "network_mode": "${NETWORK_MODE:-bridge}", - "ports": [ - "${PORT}:${PORT}", - ], - } - # Specify structure of the docker-compose file. - docker_compose = { - "version": "3", - "services": { - "base_app": base_app_spec, - "app": app_spec, - "linter": linter_spec, - "jupyter_server": jupyter_server, - "jupyter_server_test": jupyter_server_test, - }, - } - # Configure networks. - if use_main_network: - docker_compose["networks"] = {"default": {"name": "main_network"}} - - class _Dumper(yaml.Dumper): - """ - A custom YAML Dumper class that adjusts indentation. - """ - - def increase_indent(self_: Any, flow=False, indentless=False) -> Any: - """ - Override the method to modify YAML indentation behavior. - """ - return super().increase_indent(flow=False, indentless=False) - - # Convert the dictionary to YAML format. - yaml_str = yaml.dump( - docker_compose, - Dumper=_Dumper, - default_flow_style=False, - indent=2, - sort_keys=False, - ) - yaml_str = cast(str, yaml_str) - # Save YAML to file if file_name is specified. - if file_name: - if os.path.exists(file_name) and hserver.is_inside_ci(): - # Permission error is raised if we try to overwrite existing file. - # See CmTask #2321 for detailed info. - compose_directory = os.path.dirname(file_name) - hsystem.system(f"sudo rm -rf {compose_directory}") - hio.to_file(file_name, yaml_str) - return yaml_str - - -def get_base_docker_compose_path() -> str: - """ - Return the absolute path to the Docker compose file. - - E.g., `devops/compose/tmp.docker-compose.yml`. - """ - # Add the default path. - dir_name = "devops/compose" - # TODO(gp): Factor out the piece below. - docker_compose_path = "tmp.docker-compose.yml" - docker_compose_path = os.path.join(dir_name, docker_compose_path) - docker_compose_path = os.path.abspath(docker_compose_path) - return docker_compose_path - - -def _get_docker_compose_files( - stage: str, - generate_docker_compose_file: bool, - service_name: str, - extra_docker_compose_files: Optional[List[str]], -) -> List[str]: - """ - Generate the Docker compose file and return the list of Docker compose - paths. - - :return: list of the Docker compose paths - """ - docker_compose_files = [] - # Get the repo short name (e.g., `amp`). - repo_short_name = hrecouti.get_repo_config().get_repo_short_name() - _LOG.debug("repo_short_name=%s", repo_short_name) - # Check submodule status, if needed. - mount_as_submodule = False - if repo_short_name in ("amp", "cmamp"): - # Check if `amp` is a submodule. - path, _ = hgit.get_path_from_supermodule() - if path != "": - _LOG.warning("amp is a submodule") - mount_as_submodule = True - # Write Docker compose file. - file_name = get_base_docker_compose_path() - if service_name == "linter": - # Since we are running the prod `helpers` container we need to use the - # settings from the `repo_config` from that container, and not the settings - # launch the container corresponding to this repo. - enable_privileged_mode = False - use_docker_sibling_containers = False - get_shared_data_dirs = None - use_docker_network_mode_host = False - use_main_network = False - else: - # Use the settings from the `repo_config` corresponding to this container. - enable_privileged_mode = hserver.enable_privileged_mode() - use_docker_sibling_containers = hserver.use_docker_sibling_containers() - get_shared_data_dirs = hserver.get_shared_data_dirs() - use_docker_network_mode_host = hserver.use_docker_network_mode_host() - use_main_network = hserver.use_main_network() - # - if generate_docker_compose_file: - _generate_docker_compose_file( - stage, - enable_privileged_mode, - use_docker_sibling_containers, - get_shared_data_dirs, - mount_as_submodule, - use_docker_network_mode_host, - use_main_network, - file_name, - ) - else: - _LOG.warning("Skipping generating Docker compose file '%s'", file_name) - docker_compose_files.append(file_name) - # Add the compose files from command line. - if extra_docker_compose_files: - hdbg.dassert_isinstance(extra_docker_compose_files, list) - docker_compose_files.extend(extra_docker_compose_files) - # Add the compose files from the global params. - key = "DOCKER_COMPOSE_FILES" - if hlitauti.has_default_param(key): - docker_compose_files.append(hlitauti.get_default_param(key)) - # - _LOG.debug(hprint.to_str("docker_compose_files")) - for docker_compose in docker_compose_files: - hdbg.dassert_path_exists(docker_compose) - return docker_compose_files - - -_IMAGE_VERSION_FROM_CHANGELOG = "FROM_CHANGELOG" - - -def resolve_version_value( - version: str, - *, - container_dir_name: str = ".", -) -> str: - """ - Pass a version (e.g., 1.0.0) or a symbolic value (e.g., FROM_CHANGELOG) and - return the resolved value of the version. - - :return: full version with patch for prod (e.g., 1.3.2) - """ - hdbg.dassert_isinstance(version, str) - if version == _IMAGE_VERSION_FROM_CHANGELOG: - version = hversio.get_changelog_version(container_dir_name) - _dassert_is_version_valid(version) - prod_version = version - return prod_version - - -def to_dev_version(prod_version: str) -> str: - """ - Pass a prod version (e.g., 1.1.1) and strip the patch value. - - :return: stripped version without patch for dev (e.g., 1.1.0) - """ - hdbg.dassert_isinstance(prod_version, str) - _dassert_is_version_valid(prod_version) - # Strip patch value from the version. - dev_version = prod_version.split(".")[:-1] - dev_version = ".".join(dev_version) + ".0" - return dev_version - - -def dassert_is_subsequent_version( - version: str, - *, - container_dir_name: str = ".", -) -> None: - """ - Check that `version` is bigger than the current one as specified in the - changelog. - """ - if version != _IMAGE_VERSION_FROM_CHANGELOG: - current_version = hversio.get_changelog_version(container_dir_name) - hdbg.dassert_lte(current_version, version) - - -# //////////////////////////////////////////////////////////////////////////////// -# Misc. -# //////////////////////////////////////////////////////////////////////////////// - - -def _run_docker_as_user(as_user_from_cmd_line: bool) -> bool: - as_root = hserver.run_docker_as_root() - as_user = as_user_from_cmd_line - if as_root: - as_user = False - _LOG.debug( - "as_user_from_cmd_line=%s as_root=%s -> as_user=%s", - as_user_from_cmd_line, - as_root, - as_user, - ) - return as_user - - -def _get_container_name(service_name: str) -> str: - """ - Create a container name based on various information. - - E.g., `grisha.cmamp.app.cmamp1.20220317_232120` - - The information used to build a container is: - - Linux username - - Base Docker image name - - Service name - - Project directory that was used to start a container - - Container start timestamp - - :param service_name: `docker-compose` service name, e.g., `app` - :return: container name - """ - hdbg.dassert_ne(service_name, "", "You need to specify a service name") - # Get linux username. - linux_user = hsystem.get_user_name() - # Get dir name. - project_dir = hgit.get_project_dirname() - # Get Docker image base name. - image_name = hlitauti.get_default_param("BASE_IMAGE") - # Get current timestamp. - current_timestamp = hlitauti.get_ET_timestamp() - # Build container name. - container_name = f"{linux_user}.{image_name}.{service_name}.{project_dir}.{current_timestamp}" - _LOG.debug( - "get_container_name: container_name=%s", - container_name, - ) - return container_name - - -def _get_docker_base_cmd( - base_image: str, - stage: str, - version: str, - service_name: str, - # Params from `_get_docker_compose_cmd()`. - generate_docker_compose_file: bool, - extra_env_vars: Optional[List[str]], - extra_docker_compose_files: Optional[List[str]], - skip_docker_image_compatibility_check: bool, -) -> List[str]: - r""" - Get base `docker-compose` command encoded as a list of strings. - - It can be used as a base to build more complex commands, e.g., `run`, `up`, - `down`. - - E.g., - ``` - ['IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev', - '\n docker-compose', - '\n --file amp/devops/compose/tmp.docker-compose.yml', - '\n --file amp/devops/compose/tmp.docker-compose_as_submodule.yml', - '\n --env-file devops/env/default.env'] - ``` - :param generate_docker_compose_file: whether to generate or reuse the existing - Docker compose file - :param extra_env_vars: represent vars to add, e.g., `["PORT=9999", "DRY_RUN=1"]` - :param extra_docker_compose_files: `docker-compose` override files - :param skip_docker_image_compatibility_check: if True, skip checking image - architecture compatibility - """ - _LOG.debug(hprint.func_signature_to_str()) - docker_cmd_: List[str] = [] - # - Handle the image. - image = get_image(base_image, stage, version) - _LOG.debug("base_image=%s stage=%s -> image=%s", base_image, stage, image) - dassert_is_image_name_valid(image) - # The check is mainly for developers to avoid using the wrong image (e.g., - # an x86 vs ARM architecture). - # We can skip the image compatibility check during the CI or when - # explicitly skipped. - if not (hserver.is_inside_ci() or skip_docker_image_compatibility_check): - hdocker.check_image_compatibility_with_current_arch(image) - else: - _LOG.warning("Skipping docker image compatibility check") - docker_cmd_.append(f"IMAGE={image}") - # - Handle extra env vars. - if extra_env_vars: - hdbg.dassert_isinstance(extra_env_vars, list) - for env_var in extra_env_vars: - docker_cmd_.append(f"{env_var}") - # - docker_cmd_.append(r""" - docker compose""") - docker_compose_files = _get_docker_compose_files( - stage, - generate_docker_compose_file, - service_name, - extra_docker_compose_files, - ) - file_opts = " ".join([f"--file {dcf}" for dcf in docker_compose_files]) - _LOG.debug(hprint.to_str("file_opts")) - # TODO(gp): Use something like `.append(rf"{space}{...}")` - docker_cmd_.append(rf""" - {file_opts}""") - # - Handle the env file. - env_file = "devops/env/default.env" - docker_cmd_.append(rf""" - --env-file {env_file}""") - return docker_cmd_ - - -def _get_docker_compose_cmd( - base_image: str, - stage: str, - version: str, - cmd: str, - *, - # TODO(gp): make these params mandatory. - extra_env_vars: Optional[List[str]] = None, - extra_docker_compose_files: Optional[List[str]] = None, - extra_docker_run_opts: Optional[List[str]] = None, - service_name: str = "app", - use_entrypoint: bool = True, - generate_docker_compose_file: bool = True, - as_user: bool = True, - print_docker_config: bool = False, - use_bash: bool = False, - skip_docker_image_compatibility_check: bool = False, -) -> str: - """ - Get `docker-compose` run command. - - E.g., - ``` - IMAGE=*****..dkr.ecr.us-east-1.amazonaws.com/amp:dev \ - docker-compose \ - --file /amp/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name grisha.cmamp.app.cmamp1.20220317_232120 \ - --user $(id -u):$(id -g) \ - app \ - bash - ``` - :param cmd: command to run inside Docker container - :param extra_docker_run_opts: additional `docker-compose` run options - :param service_name: service to use to run a command - :param use_entrypoint: whether to use the `entrypoint.sh` or not - :param generate_docker_compose_file: generate the Docker compose file or not - :param as_user: pass the user / group id or not - :param print_docker_config: print the docker config for debugging purposes - :param use_bash: run command through a shell - :param skip_docker_image_compatibility_check: if True, skip checking image architecture compatibility - """ - _LOG.debug(hprint.func_signature_to_str()) - # - Get the base Docker command. - docker_cmd_ = _get_docker_base_cmd( - base_image, - stage, - version, - service_name, - generate_docker_compose_file, - extra_env_vars, - extra_docker_compose_files, - skip_docker_image_compatibility_check, - ) - # - Add the `config` command for debugging purposes. - docker_config_cmd: List[str] = docker_cmd_[:] - # TODO(gp): Use yaml approach like done for other parts of the code. - docker_config_cmd.append(r""" - config""") - # - Add the `run` command. - docker_cmd_.append(r""" - run \ - --rm""") - # - Add a name to the container. - container_name = _get_container_name(service_name) - docker_cmd_.append(rf""" - --name {container_name}""") - # - Handle the user. - as_user = _run_docker_as_user(as_user) - if as_user: - docker_cmd_.append(r""" - --user $(id -u):$(id -g)""") - # - Handle the extra docker options. - if extra_docker_run_opts: - hdbg.dassert_isinstance(extra_docker_run_opts, list) - extra_opts = " ".join(extra_docker_run_opts) - docker_cmd_.append(rf""" - {extra_opts}""") - # - Handle entrypoint. - if use_entrypoint: - docker_cmd_.append(rf""" - {service_name}""") - if cmd: - if use_bash: - cmd = f"bash -c '{cmd}'" - docker_cmd_.append(rf""" - {cmd}""") - else: - # No entrypoint. - docker_cmd_.append(rf""" - --entrypoint bash \ - {service_name}""") - # Print the config for debugging purpose. - if print_docker_config: - docker_config_cmd_as_str = hlitauti.to_multi_line_cmd(docker_config_cmd) - _LOG.debug("docker_config_cmd=\n%s", docker_config_cmd_as_str) - _LOG.debug( - "docker_config=\n%s", - hsystem.system_to_string(docker_config_cmd_as_str)[1], - ) - # Print the config for debugging purpose. - docker_cmd_: str = hlitauti.to_multi_line_cmd(docker_cmd_) - return docker_cmd_ - - -# //////////////////////////////////////////////////////////////////////////////// -# bash and cmd. -# //////////////////////////////////////////////////////////////////////////////// - - -def _docker_cmd( - ctx: Any, - docker_cmd_: str, - *, - skip_pull: bool = False, - **ctx_run_kwargs: Any, -) -> Optional[int]: - """ - Print and execute a Docker command. - - :param kwargs: kwargs for `ctx.run()` - """ - if hserver.is_inside_ci(): - import helpers.hs3 as hs3 - - # Generate files with the AWS settings that are missing when running - # inside CI. - hs3.generate_aws_files() - docker_pull(ctx, skip_pull=skip_pull) - _LOG.debug("cmd=%s", docker_cmd_) - rc: Optional[int] = hlitauti.run( - ctx, docker_cmd_, pty=True, **ctx_run_kwargs - ) - return rc - - -@task -def docker_bash( # type: ignore - ctx, - base_image="", - stage="dev", - version="", - use_entrypoint=True, - as_user=True, - generate_docker_compose_file=True, - container_dir_name=".", - skip_pull=False, - skip_docker_image_compatibility_check=False, -): - """ - Start a bash shell inside the container corresponding to a stage. - - :param use_entrypoint: whether to use the `entrypoint.sh` or not - :param as_user: pass the user / group id or not - :param generate_docker_compose_file: generate the Docker compose file or not - :param skip_pull: if True skip pulling the docker image - """ - _LOG.debug(hprint.func_signature_to_str("ctx")) - hlitauti.report_task(container_dir_name=container_dir_name) - # - cmd = "bash" - docker_cmd_ = _get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - generate_docker_compose_file=generate_docker_compose_file, - use_entrypoint=use_entrypoint, - as_user=as_user, - skip_docker_image_compatibility_check=skip_docker_image_compatibility_check, - ) - _LOG.debug("docker_cmd_=%s", docker_cmd_) - _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) - - -@task -def docker_cmd( # type: ignore - ctx, - base_image="", - stage="dev", - version="", - cmd="", - as_user=True, - generate_docker_compose_file=True, - use_bash=False, - container_dir_name=".", - skip_pull=False, -): - """ - Execute the command `cmd` inside a container corresponding to a stage. - - :param as_user: pass the user / group id or not - :param generate_docker_compose_file: generate or reuse the Docker - compose file - :param use_bash: run command through a shell - """ - hlitauti.report_task(container_dir_name=container_dir_name) - hdbg.dassert_ne(cmd, "") - # TODO(gp): Do we need to overwrite the entrypoint? - docker_cmd_ = _get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - generate_docker_compose_file=generate_docker_compose_file, - as_user=as_user, - use_bash=use_bash, - ) - _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) - - -# //////////////////////////////////////////////////////////////////////////////// -# Jupyter. -# //////////////////////////////////////////////////////////////////////////////// - - -def _get_docker_jupyter_cmd( - base_image: str, - stage: str, - version: str, - port: int, - self_test: bool, - *, - use_entrypoint: bool = True, - print_docker_config: bool = False, -) -> str: - cmd = "" - extra_env_vars = [f"PORT={port}"] - extra_docker_run_opts = ["--service-ports"] - service_name = "jupyter_server_test" if self_test else "jupyter_server" - # - docker_cmd_ = _get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - extra_env_vars=extra_env_vars, - extra_docker_run_opts=extra_docker_run_opts, - service_name=service_name, - use_entrypoint=use_entrypoint, - print_docker_config=print_docker_config, - ) - return docker_cmd_ - - -@task -def docker_jupyter( # type: ignore - ctx, - stage="dev", - version="", - base_image="", - auto_assign_port=True, - use_entrypoint=True, - port=None, - self_test=False, - container_dir_name=".", - skip_pull=False, -): - """ - Run Jupyter notebook server. - - :param auto_assign_port: use the UID of the user and the inferred - number of the repo (e.g., 4 for `~/src/amp4`) to get a unique - port - :param skip_pull: if True skip pulling the docker image - """ - hlitauti.report_task(container_dir_name=container_dir_name) - if port is None: - if auto_assign_port: - uid = os.getuid() - _LOG.debug("uid=%s", uid) - git_repo_idx = hgit.get_project_dirname(only_index=True) - git_repo_idx = int(git_repo_idx) - _LOG.debug("git_repo_idx=%s", git_repo_idx) - # We assume that there are no more than `max_idx_per_users` clients. - max_idx_per_user = 10 - hdbg.dassert_lte(git_repo_idx, max_idx_per_user) - port = (uid * max_idx_per_user) + git_repo_idx - else: - port = 9999 - _LOG.info("Assigned port is %s", port) - # - print_docker_config = False - docker_cmd_ = _get_docker_jupyter_cmd( - base_image, - stage, - version, - port, - self_test, - use_entrypoint=use_entrypoint, - print_docker_config=print_docker_config, - ) - _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) - - -def _get_docker_dash_app_cmd( - base_image: str, - stage: str, - version: str, - port: int, - *, - print_docker_config: bool = False, -) -> str: - cmd = "" - extra_env_vars = [f"PORT={port}"] - extra_docker_run_opts = ["--service-ports"] - service_name = "dash_app" - # - docker_cmd_ = _get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - extra_env_vars=extra_env_vars, - extra_docker_run_opts=extra_docker_run_opts, - service_name=service_name, - print_docker_config=print_docker_config, - ) - return docker_cmd_ - - -@task -def docker_dash_app( # type: ignore - ctx, - stage="dev", - version="", - base_image="", - auto_assign_port=True, - port=None, - container_dir_name=".", -): - """ - Run dash app. - - :param auto_assign_port: use the UID of the user and the inferred - number of the repo (e.g., 4 for `~/src/amp4`) to get a unique - port - """ - hlitauti.report_task(container_dir_name=container_dir_name) - if port is None: - if auto_assign_port: - uid = os.getuid() - _LOG.debug("uid=%s", uid) - git_repo_idx = hgit.get_project_dirname(only_index=True) - git_repo_idx = int(git_repo_idx) - _LOG.debug("git_repo_idx=%s", git_repo_idx) - # We assume that there are no more than `max_idx_per_users` clients. - max_idx_per_user = 10 - hdbg.dassert_lte(git_repo_idx, max_idx_per_user) - port = (uid * max_idx_per_user) + git_repo_idx - else: - port = 9999 - # - _LOG.info("Assigned port is %s", port) - print_docker_config = False - docker_cmd_ = _get_docker_dash_app_cmd( - base_image, - stage, - version, - port, - print_docker_config=print_docker_config, - ) - _docker_cmd(ctx, docker_cmd_) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py deleted file mode 100644 index 4c2149f52..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py +++ /dev/null @@ -1,1890 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_docker_release as hltadore -""" - -import datetime -import logging -import os -from operator import attrgetter -from typing import Any, Optional - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hversion as hversio -import helpers.lib_tasks_aws as hlitaaws -import helpers.lib_tasks_docker as hlitadoc -import helpers.lib_tasks_gh as hlitagh -import helpers.lib_tasks_pytest as hlitapyt -import helpers.lib_tasks_utils as hlitauti -import helpers.repo_config_utils as hrecouti - -_DEFAULT_TARGET_REGISTRY = "aws_ecr.ck" -_LOG = logging.getLogger(__name__) -_AUTO_RELEASE_LABEL = "Automated release" - -# pylint: disable=protected-access - - -# ############################################################################# -# Docker image workflows. -# ############################################################################# - - -def _to_abs_path(filename: str) -> str: - filename = os.path.abspath(filename) - hdbg.dassert_path_exists(filename) - return filename - - -def _prepare_docker_ignore( - ctx: Any, - docker_ignore: str, - *, - copy_to_git_root: bool = True, -) -> None: - """ - Copy the target `docker_ignore` in the proper position for `docker build`. - - :param ctx: invoke context - :param docker_ignore: path to the `.dockerignore` file - :param copy_to_git_root: if True, copy the `.dockerignore` file to the - git root directory; otherwise, copy it to the current directory - """ - # Currently there is no built-in way to control which `.dockerignore` to - # use (https://stackoverflow.com/questions/40904409). - hdbg.dassert_path_exists(docker_ignore) - # Since all the runnable dirs copy the entire repo content, we use - # the Git root dir as a docker context so we need to copy the `.dockerignore` - # file to the Git root dir. - if copy_to_git_root: - dest_docker_ignore = os.path.join(hgit.find_git_root(), ".dockerignore") - else: - dest_docker_ignore = ".dockerignore" - cmd = f"cp -f {docker_ignore} {dest_docker_ignore}" - hlitauti.run(ctx, cmd) - - -def _get_dev_version(version: str, container_dir_name: str) -> str: - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - dev_version = hlitadoc.to_dev_version(prod_version) - _LOG.debug("prod_version=%s -> dev_version=%s", prod_version, dev_version) - return dev_version - - -def _create_multiarch_builder( - ctx: Any, -) -> None: - """ - Create a multi-arch builder for Docker buildx. - - :param ctx: invoke context - """ - # Create a multi-arch builder. - platform_builder_name = "multiarch_builder" - cmd = rf""" - docker buildx rm {platform_builder_name} - """ - # We do not abort on error since the platform builder might be present - # or not from previous executions. - hsystem.system(cmd, abort_on_error=False) - cmd = rf""" - docker buildx create \ - --name {platform_builder_name} \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use {platform_builder_name} - """ - hlitauti.run(ctx, cmd) - - -# ############################################################################# -# Local/Dev image flow -# ############################################################################# -# - A "local" image (which is a release candidate for the DEV image) is built -# with: -# ``` -# > i docker_build_local_image -# ``` -# - This creates a local image like `helpers:local.saggese-1.0.0` -# - A qualification process (e.g., running all unit tests and the QA tests) is -# performed on the local image (e.g., locally or through GitHub actions) -# - If the qualification process is passed, the image is released as `dev` on -# the registries - - -# Use Docker buildkit or not. -# DOCKER_BUILDKIT = 1 -DOCKER_BUILDKIT = 0 - - -def _build_multi_arch_image( - ctx: Any, - opts: str, - multi_arch: str, - build_args: str, - build_image: str, - dockerfile: str, -) -> None: - """ - Build a multi-architecture Docker image in a remote Docker registry. - - :param ctx: invoke context - :param opts: build options (e.g., --no-cache) - :param multi_arch: target architectures to build for (e.g., - `linux/amd64,linux/arm64`) - :param build_args: build arguments for the Docker build command - :param build_image: name of the image to build - :param dockerfile: path to the Dockerfile to use for building - """ - # Build the multi-arch image. - # Compress the current directory (in order to dereference symbolic - # links) into a tar stream and pipes it to the `docker build` command. - # See HelpersTask197. - cmd = rf""" - tar -czh . | DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ - time \ - docker buildx build \ - {opts} \ - --push \ - --platform {multi_arch} \ - {build_args} \ - --tag {build_image} \ - --file {dockerfile} \ - - - """ - hlitauti.run(ctx, cmd) - - -def _list_image(ctx: Any, image: str) -> None: - """ - List Docker image. - - :param ctx: invoke context - :param image: docker image reference in REPOSITORY[:TAG] format - Examples: - - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0` - - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev` - - `sorrentum/cmamp:dev-1.0.0` - - `ghcr.io/cryptokaizen/cmamp:prod` - """ - cmd = f"docker image ls {image}" - hlitauti.run(ctx, cmd) - - -def _run_tests( - ctx: Any, - stage: str, - version: str, - *, - skip_tests: Optional[bool] = False, - fast_tests: Optional[bool] = True, - slow_tests: Optional[bool] = True, - superslow_tests: Optional[bool] = True, - qa_tests: Optional[bool] = True, -) -> None: - """ - Run tests for a given stage and version. - - :param ctx: invoke context - :param stage: image stage (must be one of `local`, `dev`, or `prod`) - :param version: version to test - :param skip_tests: skip all tests if True - :param fast_tests: run fast tests - :param slow_tests: run slow tests - :param superslow_tests: run superslow tests - :param qa_tests: run QA tests - """ - hdbg.dassert_in(stage, ("local", "dev", "prod")) - if skip_tests: - _LOG.warning("Skipping all tests") - return - if fast_tests: - hlitapyt.run_fast_tests(ctx, stage=stage, version=version) - if slow_tests: - hlitapyt.run_slow_tests(ctx, stage=stage, version=version) - if superslow_tests: - hlitapyt.run_superslow_tests(ctx, stage=stage, version=version) - if qa_tests: - hlitapyt.run_qa_tests(ctx, stage=stage, version=version) - - -# TODO(sandeep): Consider promoting this to an invoke target and removing the callers. -# Reason: the caller invoke targets only contain this helper call. -def _docker_tag_and_push_multi_arch_image( - ctx: Any, - version: str, - base_image: str, - target_registry: str, - container_dir_name: str, - source_stage: str, - target_stage: str, -) -> None: - """ - Tag and push a multi-arch image to the target registry using `docker buildx - imagetools`. - - :param ctx: invoke context - :param version: version to tag the image with - :param base_image: base name of the image (e.g., - `*****.dkr.ecr.us-east-1.amazonaws.com/amp`) - :param target_registry: target Docker registry to push to (e.g., - `aws_ecr.ck` or `dockerhub.causify`) - :param container_dir_name: directory where Dockerfile is located - :param source_stage: source stage of the image (must be one of `local` or - `prod`) - :param target_stage: target stage to push the image as (must be one - of `dev` or `prod`) - """ - hdbg.dassert_in(source_stage, ("local", "prod")) - hdbg.dassert_in(target_stage, ("dev", "prod")) - # - hlitadoc.docker_login(ctx, target_registry) - # Get source version string. - if source_stage == "local": - source_stage_version = _get_dev_version(version, container_dir_name) - elif source_stage == "prod": - source_stage_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - else: - raise ValueError( - f"Invalid source stage='{source_stage}' for tagging and pushing" - ) - source_image_versioned = hlitadoc.get_image( - base_image, source_stage, source_stage_version - ) - _LOG.info( - "Pushing the %s image %s to the target_registry %s ", - source_stage, - source_image_versioned, - target_registry, - ) - if target_registry == "aws_ecr.ck": - # Use AWS Docker registry. - target_base_image = "" - elif target_registry == "dockerhub.causify": - # Use public GitHub Docker registry. - target_base_image_name = ( - hrecouti.get_repo_config().get_docker_base_image_name() - ) - target_base_image = f"causify/{target_base_image_name}" - else: - raise ValueError( - f"Invalid target Docker image registry='{target_registry}'" - ) - # Only create a versioned image for the 'dev' stage or for the - # `dockerhub.causify` registry. - if target_stage == "dev" or target_registry == "dockerhub.causify": - # Tag and push the source image as versioned target image. - target_versioned_image = hlitadoc.get_image( - target_base_image, target_stage, source_stage_version - ) - cmd = f"docker buildx imagetools create -t {target_versioned_image} {source_image_versioned}" - hlitauti.run(ctx, cmd) - # Tag and push the source image as target image. - target_latest_version = None - target_latest_image = hlitadoc.get_image( - target_base_image, target_stage, version=target_latest_version - ) - cmd = f"docker buildx imagetools create -t {target_latest_image} {source_image_versioned}" - hlitauti.run(ctx, cmd) - - -@task -def docker_push_dev_image( # type: ignore - ctx, - version, - base_image="", - container_dir_name=".", -): - """ - Push the "dev" image to ECR. - - :param ctx: invoke context - :param version: version to tag the image and code with - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # - dev_version = _get_dev_version(version, container_dir_name) - # - hlitadoc.docker_login(ctx) - # Push Docker versioned tag. - image_versioned_dev = hlitadoc.get_image(base_image, "dev", dev_version) - cmd = f"docker push {image_versioned_dev}" - hlitauti.run(ctx, cmd, pty=True) - # Push Docker tag. - latest_version = None - image_dev = hlitadoc.get_image(base_image, "dev", latest_version) - cmd = f"docker push {image_dev}" - hlitauti.run(ctx, cmd, pty=True) - - -@task -def docker_push_prod_image( # type: ignore - ctx, - version, - base_image="", - container_dir_name=".", -): - """ - Push the "prod" image to ECR. - - :param ctx: invoke context - :param version: version to tag the image and code with - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - # - hlitadoc.docker_login(ctx) - # Push versioned tag. - image_versioned_prod = hlitadoc.get_image(base_image, "prod", prod_version) - cmd = f"docker push {image_versioned_prod}" - hlitauti.run(ctx, cmd, pty=True) - # - latest_version = None - image_prod = hlitadoc.get_image(base_image, "prod", latest_version) - cmd = f"docker push {image_prod}" - hlitauti.run(ctx, cmd, pty=True) - - -# TODO(gp): We moved away from versioning of the prod image because we release -# continuously and so it's easier to track the hash. -def _docker_rollback_image( - ctx: Any, - base_image: str, - stage: str, - version: str, - push_to_repo: bool, -) -> None: - """ - Rollback the versioned image for a particular stage and optionally push it - to ECR. - - :param ctx: invoke context - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param stage: select a specific stage for the Docker image (must be - one of `dev` or `prod`) - :param version: version to tag the image and code with - :param push_to_repo: whether to push the rolled back image to ECR - """ - hdbg.dassert_in(stage, ("dev", "prod")) - # TODO(sandeep): Consider removing the redundant pull-push step. Instead of - # pulling the versioned image and pushing it back to ECR, directly push - # the local image. However, note that this may not work for multi-arch images - # since local images are arch-specific, while remote tags include all architectures. - # 1) Ensure that version of the image exists locally. - hlitadoc._docker_pull( - ctx, base_image=base_image, stage=stage, version=version - ) - # 2) Promote requested image to target stage. - image_versioned = hlitadoc.get_image(base_image, stage, version) - latest_version = None - image_latest = hlitadoc.get_image(base_image, stage, latest_version) - cmd = f"docker tag {image_versioned} {image_latest}" - hlitauti.run(ctx, cmd) - # 3) Push the image to ECR. - if push_to_repo: - if stage == "dev": - docker_push_dev_image(ctx, version=version) - elif stage == "prod": - docker_push_prod_image(ctx, version=version) - else: - raise ValueError(f"Invalid stage='{stage}' for rollback") - else: - _LOG.warning("Skipping pushing %s image to ECR, as requested", stage) - - -@task -def docker_build_local_image( # type: ignore - ctx, - version, - cache=True, - base_image="", - poetry_mode="update", - container_dir_name=".", - just_do_it=False, - multi_arch="", - cleanup_installation=True, -): - """ - Build a local image, i.e., a release candidate "dev" image. - - :param ctx: invoke context - :param version: version to tag the image with - :param cache: use the cache - :param base_image: the name for the base image - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp`. - For base_image, we use "" as default instead None since `invoke` can - only infer a single type. - :param poetry_mode: - - `update`: run `poetry lock` to update the packages - - `no_update`: it uses the current `poetry.lock` file, if it is valid - according to the constraints. This is useful when the goal is to - remove / add / update only a single package without updating - everything - :param container_dir_name: directory where the Dockerfile is located - :param just_do_it: execute the action ignoring the checks - :param multi_arch: - - if not specified, build for the current architecture - - if specified, build for the specified multiple architectures. E.g., - `linux/amd64,linux/arm64` - :param cleanup_installation: force clean up Docker installation. This can - be disabled to speed up the build process - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # For poetry_mode="update", the `poetry.lock` file is updated and saved as - # `/install/poetry.lock.out` to the container. - # For poetry_mode="no_update", the `poetry.lock` file from the repo is used, - # and it's passed as `/install/poetry.lock.in` to the container. - hdbg.dassert_in(poetry_mode, ("update", "no_update")) - if just_do_it: - _LOG.warning("Skipping subsequent version check") - else: - hlitadoc.dassert_is_subsequent_version( - version, container_dir_name=container_dir_name - ) - dev_version = _get_dev_version(version, container_dir_name) - # Prepare `.dockerignore`. - docker_ignore = "devops/docker_build/dockerignore.dev" - _prepare_docker_ignore(ctx, docker_ignore) - # Build the local image. - stage = "local" - image_local = hlitadoc.get_image(base_image, stage, dev_version) - # - dockerfile = "devops/docker_build/dev.Dockerfile" - # Keep the relative path instead of an absolute path to ensure it matches - # files inside the tar stream and avoids file not found errors. - # dockerfile = _to_abs_path(dockerfile) - opts = "--no-cache" if not cache else "" - build_args = [ - ("AM_CONTAINER_VERSION", dev_version), - ("INSTALL_DIND", True), - ("POETRY_MODE", poetry_mode), - ("CLEAN_UP_INSTALLATION", cleanup_installation), - ] - build_args = " ".join(f"--build-arg {k}={v}" for k, v in build_args) - # Build for both a single arch or multi-arch. - if multi_arch: - # Login to AWS ECR because for multi-arch we need to build the local - # image remotely. - hlitadoc.docker_login(ctx) - _create_multiarch_builder(ctx) - _build_multi_arch_image( - ctx, opts, multi_arch, build_args, image_local, dockerfile - ) - # TODO(sandeep): If possible, switch to using hlitadoc._docker_pull(). - # Pull the image from registry after building. - cmd = f"docker pull {image_local}" - hlitauti.run(ctx, cmd) - else: - # Build for a single architecture using `docker build`. - # Compress the current directory (in order to dereference symbolic - # links) into a tar stream and pipes it to the `docker build` command. - # See HelpersTask197. - cmd = rf""" - tar -czh . | DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ - time \ - docker build \ - {opts} \ - {build_args} \ - --tag {image_local} \ - --file {dockerfile} \ - - - """ - hlitauti.run(ctx, cmd) - # Retrieve the package files, if present. - if poetry_mode == "update": - # TODO(gp): Not sure it works properly for multi-arch build, since on - # different platforms the generated poetry.lock might be different. - # TODO(gp): For some reason we can't use more than one bash command in - # docker_cmd. - cmd = "cp -f /install/poetry.lock.out /install/pip_list.txt ." - opts = [ - "--stage local", - f"--version {version}", - f"--cmd '{cmd}'", - ] - opts.append("--skip-pull") - cmd = "invoke docker_cmd " + " ".join(opts) - hlitauti.run(ctx, cmd) - # The destination dir is always in the same relative position. - dst_dir = "./devops/docker_build" - hdbg.dassert_dir_exists(dst_dir) - cmd = f"cp -f poetry.lock.out {dst_dir}/poetry.lock" - hlitauti.run(ctx, cmd) - cmd = f"cp -f pip_list.txt {dst_dir}/pip_list.txt" - hlitauti.run(ctx, cmd) - # Check image and report stats. - _list_image(ctx, image_local) - - -@task -def docker_tag_local_image_as_dev( # type: ignore - ctx, - version, - base_image="", - container_dir_name=".", -): - """ - Mark the "local" image as "dev". - - :param ctx: invoke context - :param version: version to tag the image and code with - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # Get the version. - dev_version = _get_dev_version(version, container_dir_name) - # Tag local image as versioned dev image (e.g., `dev-1.0.0`). - image_versioned_local = hlitadoc.get_image(base_image, "local", dev_version) - image_versioned_dev = hlitadoc.get_image(base_image, "dev", dev_version) - cmd = f"docker tag {image_versioned_local} {image_versioned_dev}" - hlitauti.run(ctx, cmd) - # Tag local image as dev image. - latest_version = None - image_dev = hlitadoc.get_image(base_image, "dev", latest_version) - cmd = f"docker tag {image_versioned_local} {image_dev}" - hlitauti.run(ctx, cmd) - - -@task -def docker_release_dev_image( # type: ignore - ctx, - version, - cache=True, - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=False, - qa_tests=True, - push_to_repo=True, - poetry_mode="update", - container_dir_name=".", -): - """ - Build, test, and release to ECR the latest "dev" image. - - This can be used to test the entire flow from scratch by building an image, - running the tests, and pushing if needed. - - Phases: - 1) Build local image - 2) Run the unit tests (e.g., fast, slow, superslow) on the local image - 3) Mark local as dev image - 4) Run the QA tests on the dev image - 5) Push dev image to the repo - - :param ctx: invoke context - :param version: version to tag the image and code with - :param cache: use the cache - :param skip_tests: skip all the tests and release the dev image - :param fast_tests: run fast tests, unless all tests skipped - :param slow_tests: run slow tests, unless all tests skipped - :param superslow_tests: run superslow tests, unless all tests skipped - :param qa_tests: run QA tests (e.g., end-to-end linter tests) - :param push_to_repo: push the image to the repo_short_name - :param poetry_mode: same as - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # 1) Build "local" image. - docker_build_local_image( - ctx, - version, - cache=cache, - poetry_mode=poetry_mode, - container_dir_name=container_dir_name, - ) - # Run resolve after `docker_build_local_image` so that a proper check - # for subsequent version can be made in case `FROM_CHANGELOG` token - # is used. - dev_version = _get_dev_version(version, container_dir_name) - # 2) Run tests for the "local" image. - stage = "local" - _run_tests( - ctx, - stage, - dev_version, - skip_tests=skip_tests, - fast_tests=fast_tests, - slow_tests=slow_tests, - superslow_tests=superslow_tests, - qa_tests=False, - ) - # 3) Promote the "local" image to "dev". - docker_tag_local_image_as_dev( - ctx, dev_version, container_dir_name=container_dir_name - ) - # 4) Run QA tests for the (local version) of the dev image. - stage = "dev" - _run_tests( - ctx, - stage, - dev_version, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=qa_tests, - ) - # 5) Push the "dev" image to ECR. - if push_to_repo: - docker_push_dev_image( - ctx, dev_version, container_dir_name=container_dir_name - ) - else: - _LOG.warning( - "Skipping pushing dev image to repo_short_name, as requested" - ) - _LOG.info("==> SUCCESS <==") - - -# ///////////////////////////////////////////////////////////////////////////// -# Multi-arch build flow -# ///////////////////////////////////////////////////////////////////////////// - - -# TODO(gp): multi_build -> multi_arch - - -@task -def docker_tag_push_multi_build_local_image_as_dev( # type: ignore - ctx, - version, - local_base_image="", - target_registry=_DEFAULT_TARGET_REGISTRY, - container_dir_name=".", -): - """ - Mark the multi-arch "local" image as "dev" and push it. - - `base_image` and `target_registry` both contain information about the target - Docker registry. Docker image registry address in `local_base_image` name - is ignored when pushing, instead the `target_registry` param provides a - Docker image registry address to push to. - - :param ctx: invoke context - :param version: version to tag the image and code with - :param local_base_image: base name of a local image, - e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param target_registry: target Docker image registry to push the image to - - "dockerhub.causify": public Causify Docker image registry - - "aws_ecr.ck": private AWS CK ECR - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - source_stage = "local" - target_stage = "dev" - _docker_tag_and_push_multi_arch_image( - ctx, - version, - local_base_image, - target_registry, - container_dir_name, - source_stage, - target_stage, - ) - - -# TODO(gp): This needs to be merged with docker_release_dev_image. -@task -def docker_release_multi_build_dev_image( # type: ignore - ctx, - version, - cache=True, - poetry_mode="update", - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=False, - qa_tests=True, - # TODO(Grisha): use iterable values, see - # https://docs.pyinvoke.org/en/stable/concepts/invoking-tasks.html#iterable-flag-values - # target_registries=... - target_registries=_DEFAULT_TARGET_REGISTRY, - container_dir_name=".", -): - """ - Build, test, and release the latest multi-arch "dev" image. - - :param version: version to tag the image and code with - :param cache: use the cache - :param skip_tests: skip all the tests and release the dev image - :param fast_tests: run fast tests, unless all tests skipped - :param slow_tests: run slow tests, unless all tests skipped - :param superslow_tests: run superslow tests, unless all tests - skipped - :param qa_tests: run QA tests (e.g., end-to-end linter tests) - :param poetry_mode: update package dependencies using poetry - :param target_registries: comma separated list of target Docker - image registries to push the image to. E.g., - "aws_ecr.ck,dockerhub.causify". See `docker_login()` for - details. - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - target_registries = target_registries.split(",") - # 1) Build "local" image remotely in the CK AWS ECR registry and pull once - # it is built. - docker_build_local_image( - ctx, - version, - cache=cache, - poetry_mode=poetry_mode, - container_dir_name=container_dir_name, - multi_arch="linux/amd64,linux/arm64", - ) - # Run resolve after `docker_build_local_image` so that a proper check - # for subsequent version can be made in case `FROM_CHANGELOG` token - # is used. - dev_version = _get_dev_version(version, container_dir_name) - # 2) Run tests for the "local" image. - # 3) Run QA tests using the local version of an image. - # Use the local image because it is not possible to tag a multi-arch - # image as dev without releasing (pushing) it. - # The difference between a local and a dev image is just a tag. - stage = "local" - _run_tests( - ctx, - stage, - dev_version, - skip_tests=skip_tests, - fast_tests=fast_tests, - slow_tests=slow_tests, - superslow_tests=superslow_tests, - qa_tests=qa_tests, - ) - # 4) Tag the image as dev image and push it to the target registries. - for target_registry in target_registries: - docker_tag_push_multi_build_local_image_as_dev( - ctx, - version=dev_version, - target_registry=target_registry, - container_dir_name=container_dir_name, - ) - _LOG.info("==> SUCCESS <==") - - -# ############################################################################# -# Prod image flow: -# ############################################################################# -# - Prod image has no release candidate -# - Start from a Dev image already built and qualified -# - The prod image is created from the dev image by copying the code inside the -# image -# - The prod image is tagged as "prod" -# The prod flow doesn't support multi-arch because we only run on x86 in prod. - - -@task -def docker_build_prod_image( # type: ignore - ctx, - version, - cache=True, - base_image="", - candidate=False, - user_tag="", - container_dir_name=".", - tag=None, -): - """ - Build a prod image from a dev image. - - :param version: version to tag the image and code with - :param cache: note that often the prod image is just a copy of the - dev image so caching makes no difference - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param candidate: build a prod image with a tag format: prod-{hash} - where hash is the output of `hgit.get_head_hash()` - :param user_tag: the name of the user building the candidate image - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - # Prepare `.dockerignore`. - docker_ignore = "devops/docker_build/dockerignore.prod" - _prepare_docker_ignore(ctx, docker_ignore) - # TODO(gp): We should do a `i git_clean` to remove artifacts and check that - # the client is clean so that we don't release from a dirty client. - # Build prod image. - if candidate: - # For candidate prod images which need to be tested on the AWS infra add - # a hash identifier. - latest_version = None - image_versioned_prod = hlitadoc.get_image( - base_image, "prod", latest_version - ) - if not tag: - head_hash = hgit.get_head_hash(short_hash=True) - else: - head_hash = tag - # Add username to the prod image name. - if user_tag: - image_versioned_prod += f"-{user_tag}" - # Add head hash to the prod image name. - image_versioned_prod += f"-{head_hash}" - - else: - image_versioned_prod = hlitadoc.get_image( - base_image, "prod", prod_version - ) - # - dockerfile = "devops/docker_build/prod.Dockerfile" - dockerfile = _to_abs_path(dockerfile) - # - # TODO(gp): Use to_multi_line_cmd() - opts = "--no-cache" if not cache else "" - # Use dev version for building prod image. - dev_version = hlitadoc.to_dev_version(prod_version) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - hdbg.dassert( - not hgit.is_inside_submodule(), - "The build should be run from a super repo, not a submodule.", - ) - git_root_dir = hgit.find_git_root() - # TODO(heanh): Expose the build context to the interface and use `git_root_dir` by default. - cmd = rf""" - DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ - time \ - docker build \ - {opts} \ - --tag {image_versioned_prod} \ - --file {dockerfile} \ - --build-arg VERSION={dev_version} \ - --build-arg ECR_BASE_PATH={os.environ["CSFY_ECR_BASE_PATH"]} \ - --build-arg IMAGE_NAME={image_name} \ - {git_root_dir} - """ - hlitauti.run(ctx, cmd) - if candidate: - _LOG.info("Head hash: %s", head_hash) - _list_image(ctx, image_versioned_prod) - else: - # Tag versioned image as latest prod image. - latest_version = None - image_prod = hlitadoc.get_image(base_image, "prod", latest_version) - cmd = f"docker tag {image_versioned_prod} {image_prod}" - hlitauti.run(ctx, cmd) - # - _list_image(ctx, image_prod) - - -@task -def docker_build_multi_arch_prod_image( # type: ignore - ctx, - version, - cache=True, - base_image="", - user_tag="", - container_dir_name=".", - tag=None, - multi_arch="linux/amd64,linux/arm64", -): - """ - Build a multi arch. versioned prod image from a dev image. For e.g.: we - have the dev image `helpers:dev-1.0.0` and we want to build a prod image - `helpers:prod-1.0.0`. - - :param version: version to tag the image and code with - :param cache: note that often the prod image is just a copy of the - dev image so caching makes no difference - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param user_tag: the name of the user building the candidate image - :param container_dir_name: directory where the Dockerfile is located - :param multi_arch: comma separated list of target architectures to - build the image for. E.g., `linux/amd64,linux/arm64` - """ - hlitauti.report_task(container_dir_name=container_dir_name) - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - # Prepare `.dockerignore`. - docker_ignore = "devops/docker_build/dockerignore.prod" - _prepare_docker_ignore(ctx, docker_ignore) - # TODO(gp): We should do a `i git_clean` to remove artifacts and check that - # the client is clean so that we don't release from a dirty client. - # Build prod image. - image_versioned_prod = hlitadoc.get_image(base_image, "prod", prod_version) - # Prepare the build. - dockerfile = "devops/docker_build/prod.Dockerfile" - # Keep the relative path instead of an absolute path to ensure it matches - # files inside the tar stream and avoids file not found errors. - # dockerfile = _to_abs_path(dockerfile) - # - opts = "--no-cache" if not cache else "" - # Use dev version for building prod image. - dev_version = hlitadoc.to_dev_version(prod_version) - build_args = [ - ("VERSION", dev_version), - ("ECR_BASE_PATH", os.environ["CSFY_ECR_BASE_PATH"]), - ] - build_args = " ".join(f"--build-arg {k}={v}" for k, v in build_args) - # Login to AWS ECR because for multi-arch we need to build the local - # image remotely. - hlitadoc.docker_login(ctx) - _create_multiarch_builder(ctx) - _build_multi_arch_image( - ctx, opts, multi_arch, build_args, image_versioned_prod, dockerfile - ) - # TODO(sandeep): If possible, switch to hlitadoc._docker_pull(). - # Pull the image from registry after building. - cmd = f"docker pull {image_versioned_prod}" - hlitauti.run(ctx, cmd) - _list_image(ctx, image_versioned_prod) - - -@task -def docker_tag_push_multi_arch_prod_image( # type: ignore - ctx, - version, - base_image="", - target_registry=_DEFAULT_TARGET_REGISTRY, - container_dir_name=".", -): - """ - Mark the multi-arch versioned "prod" image as "prod" and push them to the - target registry. - - `base_image` and `target_registry` both contain information about the target - Docker registry. - - :param ctx: invoke context - :param version: version to tag the image and code with - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param target_registry: target Docker image registry to push the image to - - "dockerhub.causify": public Causify Docker image registry - - "aws_ecr.ck": private AWS CK ECR - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - source_stage = "prod" - target_stage = "prod" - _docker_tag_and_push_multi_arch_image( - ctx, - version, - base_image, - target_registry, - container_dir_name, - source_stage, - target_stage, - ) - - -# TODO(gp): Can we merge this with docker_push_prod_image? -@task -def docker_push_prod_candidate_image( # type: ignore - ctx, - candidate, - base_image="", - container_dir_name=".", -): - """ - (ONLY CI/CD) Push the "prod" candidate image to ECR. - - :param ctx: invoke context - :param candidate: hash of the candidate prod image to push - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # - hlitadoc.docker_login(ctx) - # Push image with tagged with a hash ID. - image_versioned_prod = hlitadoc.get_image(base_image, "prod", None) - cmd = f"docker push {image_versioned_prod}-{candidate}" - hlitauti.run(ctx, cmd, pty=True) - - -@task -# TODO(Vlad): Add the release flow with the multi-arch support. -# See HelpersTask339. -def docker_release_prod_image( # type: ignore - ctx, - version, - cache=True, - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=False, - qa_tests=True, - push_to_repo=True, - container_dir_name=".", -): - """ - Build, test, and release to ECR the prod image. - - - Build prod image - - Run the tests - - Push the prod image repo - - :param ctx: invoke context - :param version: version to tag the image and code with - :param cache: use the cache - :param skip_tests: skip all the tests and release the dev image - :param fast_tests: run fast tests, unless all tests skipped - :param slow_tests: run slow tests, unless all tests skipped - :param superslow_tests: run superslow tests, unless all tests skipped - :param qa_tests: run QA tests (e.g., end-to-end linter tests) - :param push_to_repo: push the image to the repo_short_name - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - # 1) Build prod image. - docker_build_prod_image( - ctx, - cache=cache, - version=prod_version, - container_dir_name=container_dir_name, - ) - # 2) Run tests. - if skip_tests: - _LOG.warning("Skipping all tests and releasing") - fast_tests = slow_tests = superslow_tests = False - stage = "prod" - if fast_tests: - hlitapyt.run_fast_tests(ctx, stage=stage, version=prod_version) - if slow_tests: - hlitapyt.run_slow_tests(ctx, stage=stage, version=prod_version) - if superslow_tests: - hlitapyt.run_superslow_tests(ctx, stage=stage, version=prod_version) - # 3) Run QA tests using the local version of the prod image before pushing - # it to ECR. - if qa_tests: - hlitapyt.run_qa_tests(ctx, stage=stage, version=prod_version) - # 4) Push prod image. - if push_to_repo: - docker_push_prod_image( - ctx, version=prod_version, container_dir_name=container_dir_name - ) - else: - _LOG.warning("Skipping pushing image to repo_short_name as requested") - _LOG.info("==> SUCCESS <==") - - -@task(iterable=["docker_registry"]) -def docker_release_multi_arch_prod_image( - ctx, - version, - cache=True, - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=False, - qa_tests=True, - docker_registry=None, - container_dir_name=".", -): - """ - Build, test, and release to Docker registries the multi-arch prod image. - :param ctx: invoke context - :param version: version to tag the image and code with - :param cache: use the cache - :param skip_tests: skip all the tests - :param fast_tests: run fast tests, unless all tests skipped - :param slow_tests: run slow tests, unless all tests skipped - :param superslow_tests: run superslow tests, unless all tests skipped - :param qa_tests: run QA tests (e.g., end-to-end linter tests) - :param docker_registry: list of Docker image registries to push the image to - :param container_dir_name: directory where the Dockerfile is located - Example usage: - > invoke docker_release_multi_arch_prod_image \ - --version 1.2.0 - --docker-registry dockerhub.causify \ - --docker-registry aws_ecr.ck - """ - hlitauti.report_task() - # The default value for iterative task parameter will be an empty list. - # https://docs.pyinvoke.org/en/stable/concepts/invoking-tasks.html#iterable-flag-values - if len(docker_registry) == 0: - docker_registry = [_DEFAULT_TARGET_REGISTRY] - _LOG.warning( - "No Docker registries provided, using default: %s", docker_registry - ) - # 1) Build prod image. - docker_build_multi_arch_prod_image( - ctx, - version, - cache=cache, - container_dir_name=container_dir_name, - multi_arch="linux/amd64,linux/arm64", - ) - # 2) Run tests. - stage = "prod" - _run_tests( - ctx, - stage, - version, - skip_tests=skip_tests, - fast_tests=fast_tests, - slow_tests=slow_tests, - superslow_tests=superslow_tests, - qa_tests=qa_tests, - ) - # 3) Push prod image. - for registry in docker_registry: - docker_tag_push_multi_arch_prod_image( - ctx, - version=version, - target_registry=registry, - container_dir_name=container_dir_name, - ) - _LOG.info("==> SUCCESS <==") - - -# # TODO(gp): Useless IMO. -@task -def docker_release_all(ctx, version, container_dir_name="."): # type: ignore - """ - (ONLY CI/CD) Release both dev and prod image to ECR. - - This includes: - - docker_release_dev_image - - docker_release_prod_image - - :param version: version to tag the image and code with - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task() - docker_release_dev_image(ctx, version, container_dir_name=container_dir_name) - docker_release_prod_image( - ctx, version, container_dir_name=container_dir_name - ) - _LOG.info("==> SUCCESS <==") - - -@task -def docker_rollback_dev_image( # type: ignore - ctx, - version, - push_to_repo=True, -): - """ - Rollback the version of the dev image. - - Phases: - 1) Ensure that version of the image exists locally - 2) Promote versioned image as dev image - 3) Push dev image to the repo - - :param ctx: invoke context - :param version: version to tag the image and code with - :param push_to_repo: push the image to the ECR repo - """ - hlitauti.report_task() - stage = "dev" - _docker_rollback_image( - ctx, - base_image="", - stage=stage, - version=version, - push_to_repo=push_to_repo, - ) - _LOG.info("==> SUCCESS <==") - - -@task -def docker_rollback_prod_image( # type: ignore - ctx, - version, - push_to_repo=True, -): - """ - Rollback the version of the prod image. - - Same as parameters and meaning as `docker_rollback_dev_image`. - """ - hlitauti.report_task() - stage = "prod" - _docker_rollback_image( - ctx, - base_image="", - stage=stage, - version=version, - push_to_repo=push_to_repo, - ) - _LOG.info("==> SUCCESS <==") - - -def _check_workspace_dir_sizes() -> None: - """ - Check if user doesn't have large files/directories in their workspace. - - Use-case is running the function before building a candidate image. - Large files significanty slow dwon image creation and subsequent - pulling. Overtime it also increases costs of ECR usage. - """ - # Execute system command and split into a list of tuples [size, dir]. - # Threshold is chosen heuristically according to current repo dir sizes. - git_root = hgit.find_git_root() - with hsystem.cd(git_root): - fs_item_max_threshold = "200M" - directory_size_list = hsystem.system_to_string( - f"du --threshold {fs_item_max_threshold} -hs $(ls -A) | sort -hr" - )[1].split("\n") - # Filter out directories ignored by `dockerignore.prod` + "amp/" - # as submodule. - ignored_dirs = [ - "amp", - "ck.infra", - "amp/ck.infra", - "docs", - ".git", - "amp/.git", - ] - offending_items = [ - it.replace("\t", " ") - for it in directory_size_list - if it.split("\t")[1] not in ignored_dirs - ] - hdbg.dassert( - len(offending_items) == 0, - ( - "Your workspace contains one or more files/directories " - f"larger than {fs_item_max_threshold} move " - f"or delete the items:\n\t {offending_items}" - ), - ) - - -@task -def docker_create_candidate_image(ctx, container_dir_name=".", user_tag=""): # type: ignore - """ - Create new prod candidate image and update the specified ECS task - definition such that the Image URL specified in container definition points - to the new candidate image. - - :param task_definition: the name of the ECS task definition for - which an update to container image URL is made, e.g. cmamp-test - :param container_dir_name: the runnable dir path (e.g. - `./ck.infra/`) - :param user_tag: the name of the user creating the image, empty - parameter means the command was run via gh actions - :param region: AWS Region, for Tokyo region specify 'ap-northeast-1' - :return: the tag used for the image - """ - _check_workspace_dir_sizes() - # Get the hash of the image. - tag = hgit.get_head_hash(".", short_hash=True) - if user_tag: - # Add user name to the candidate tag. - tag = f"{user_tag}-{tag}" - # Create new prod image. - docker_build_prod_image( - ctx, - container_dir_name=container_dir_name, - version=hlitadoc._IMAGE_VERSION_FROM_CHANGELOG, - candidate=True, - tag=tag, - ) - # Push candidate image. - docker_push_prod_candidate_image(ctx, tag) - return tag - - -# ############################################################################# -# ECS task definition workflows. -# ECS task definition is a wrapper around a container definition. -# ############################################################################# - - -@task -def docker_release_test_task_definition( - ctx, - task_definition: Optional[str] = None, - user_tag: Optional[str] = None, - region: str = hs3.AWS_EUROPE_REGION_1, -): # type: ignore - """ - Release candidate image to test ECS task definition. - - :param region: region to create the task definition in - """ - hdbg.dassert_in(region, hs3.AWS_REGIONS) - # Verify that task definition is provided. - hdbg.dassert_is_not(task_definition, None, "task definition is required") - # Create candidate image. - current_dir = os.getcwd() - image_tag = docker_create_candidate_image(ctx, current_dir, user_tag) - # Update ECS task definition with new image URL. - hlitaaws.aws_update_ecs_task_definition( - task_definition=task_definition, - image_tag=image_tag, - region=region, - environment="test", - ) - - -@task -def docker_release_preprod_task_definition( - ctx, region: str = hs3.AWS_EUROPE_REGION_1 -): # type: ignore - """ - Release candidate image to preprod ECS task definition. - - :param region: region to create the task definition in - """ - hdbg.dassert_in(region, hs3.AWS_REGIONS) - # Preprod release should be done from master branch and the client should be - # clean. - curr_branch = hgit.get_branch_name() - hdbg.dassert_eq( - curr_branch, "master", msg="You should release from master branch" - ) - _ = hgit.is_client_clean(abort_if_not_clean=True) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-preprod" - # Create candidate image. - current_dir = os.getcwd() - image_tag = docker_create_candidate_image(ctx, current_dir) - # Update ECS task definition with new image URL. - hlitaaws.aws_update_ecs_task_definition( - task_definition=task_definition_name, - image_tag=image_tag, - region=region, - environment="preprod", - ) - - -@task -def docker_release_prod_task_definition(ctx, region: str = hs3.AWS_US_REGION_1): # type: ignore - """ - Release candidate image to prod ECS task definition. - - :param region: region to create the task definition in - """ - hdbg.dassert_in(region, hs3.AWS_REGIONS) - # Prod release should be done from master branch and the client should be - # clean. - curr_branch = hgit.get_branch_name() - hdbg.dassert_eq( - curr_branch, "master", msg="You should release from master branch" - ) - _ = hgit.is_client_clean(abort_if_not_clean=True) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-prod" - # Create candidate image. - current_dir = os.getcwd() - image_tag = docker_create_candidate_image(ctx, current_dir) - # Update ECS task definition with new image URL. - hlitaaws.aws_update_ecs_task_definition( - task_definition=task_definition_name, - image_tag=image_tag, - region=region, - environment="prod", - ) - - -@task -def copy_ecs_task_definition_image_url(ctx, src_task_def, dst_task_def): # type: ignore - """ - Copy image URL from one task definition to another. - - Currently the implementation assumes the source region is Stockholm - and destination #TODO(Juraj): Because this is the configuration we - need at the moment. - - :param src_task_def: source ECS task definition (located in eu- - north-1) - :param dst_task_def: destination ECS task definition (located in ap- - northeast-1) - """ - # TODO(Vlad): Import locally to avoid redundant dependencies. - # See for detals: https://github.com/cryptokaizen/cmamp/issues/8086. - import helpers.haws as haws - - # - _ = ctx - src_image_url = haws.get_task_definition_image_url( - src_task_def, region=hs3.AWS_EUROPE_REGION_1 - ) - # We have cross-region replication enabled in ECR, all images live in both regions. - dst_image_url = src_image_url.replace( - hs3.AWS_EUROPE_REGION_1, hs3.AWS_TOKYO_REGION_1 - ) - haws.update_task_definition( - dst_task_def, dst_image_url, region=hs3.AWS_TOKYO_REGION_1 - ) - - -# TODO(gp): This might become obsolete. -@task -def docker_update_prod_task_definition( - ctx, version, preprod_tag, airflow_dags_s3_path, task_definition -): # type: ignore - """ - Update image in prod task definition to the desired version. - - :param version: latest version from `changelog.txt` or custom one (e.g., `1.1.1`) - :param preprod_tag: image that will be re-tagged with prod version - e.g., `preprod-d8sf76s` -> `prod-1.1.1` - :param airflow_dags_s3_path: S3 bucket from which airflow will load DAGs - :param task_definition: which ECS task definition to use - currently our prod ECS task definitions match short name of repos. - """ - # TODO(Nikola): Convert `haws` part to script so it can be called via `docker_cmd`. - # https://github.com/cryptokaizen/cmamp/pull/2594/files#r948551787 - import helpers.haws as haws - - # - # TODO(Nikola): Use env var for CK profile. - s3fs_ = hs3.get_s3fs(aws_profile="ck") - super_module = not hgit.is_inside_submodule() - # Prepare params for listing DAGs. - root_dir = hgit.get_client_root(super_module) - dags_path = [root_dir, "datapull", "airflow", "dags"] - if super_module and hgit.is_amp_present(): - # Main DAGs location is always in `cmamp`. - dags_path.insert(1, "amp") - dir_name = os.path.join(*dags_path) - pattern = "preprod.*.py" - only_files = True - use_relative_paths = False - # List preprod DAGs. - dag_paths = hs3.listdir(dir_name, pattern, only_files, use_relative_paths) - for dag_path in dag_paths: - # Abort in case one of the preprod DAGs is out of sync. - _, dag_name = os.path.split(dag_path) - hdbg.dassert_eq( - hs3.from_file(dag_path), - s3fs_.cat(airflow_dags_s3_path + dag_name).decode(), - msg=f"Preprod file `{dag_name}` is out of sync with `{airflow_dags_s3_path}`!", - ) - # Prepare params to compose new prod image url. - prod_version = hlitadoc.resolve_version_value(version) - base_image = "" - stage = "prod" - # Compose new prod image url. - new_prod_image_url = hlitadoc.get_image(base_image, stage, prod_version) - version = None - new_prod_image_url_no_version = hlitadoc.get_image( - base_image, stage, version - ) - # Check if preprod tag exist in preprod task definition as precaution. - preprod_task_definition_name = f"{task_definition}-preprod" - preprod_image_url = haws.get_task_definition_image_url( - preprod_task_definition_name - ) - preprod_tag_from_image = preprod_image_url.split(":")[-1] - msg = ( - f"Preprod tag is different in the image url `{preprod_tag_from_image}`!" - ) - hdbg.dassert_eq(preprod_tag_from_image, preprod_tag, msg=msg) - # Pull preprod image for re-tag. - hlitadoc.docker_login(ctx) - cmd = f"docker pull {preprod_image_url}" - hlitauti.run(ctx, cmd) - # Re-tag preprod image to prod. - cmd = f"docker tag {preprod_image_url} {new_prod_image_url}" - hlitauti.run(ctx, cmd) - cmd = f"docker tag {preprod_image_url} {new_prod_image_url_no_version}" - hlitauti.run(ctx, cmd) - cmd = f"docker rmi {preprod_image_url}" - hlitauti.run(ctx, cmd) - # Get original prod image for potential rollback. - original_prod_image_url = haws.get_task_definition_image_url(task_definition) - # Track successful uploads for potential rollback. - successful_uploads = [] - try: - # Update prod task definition to the latest prod tag. - haws.update_task_definition( - task_definition, new_prod_image_url, environment="prod" - ) - # Add prod DAGs to airflow s3 bucket after all checks are passed. - for dag_path in dag_paths: - # Update prod DAGs. - _, dag_name = os.path.split(dag_path) - prod_dag_name = dag_name.replace("preprod.", "prod.") - dag_s3_path = airflow_dags_s3_path + prod_dag_name - s3fs_.put(dag_path, dag_s3_path) - _LOG.info("Successfully uploaded `%s`!", dag_s3_path) - successful_uploads.append(dag_s3_path) - # Upload new tag to ECS. - docker_push_prod_image(ctx, prod_version) - except Exception as ex: - _LOG.info("Rollback started!") - # Rollback prod task definition image URL. - haws.update_task_definition( - task_definition, original_prod_image_url, environment="prod" - ) - _LOG.info( - "Reverted prod task definition image url to `%s`!", - original_prod_image_url, - ) - # Notify for potential rollback for airflow S3 bucket, if any. - if successful_uploads: - _LOG.warning("Starting S3 rollback!") - # Prepare bucket resource. - s3 = haws.get_service_resource(aws_profile="ck", service_name="s3") - bucket_name, _ = hs3.split_path(airflow_dags_s3_path) - if hasattr(s3, "Bucket"): - bucket = s3.Bucket(bucket_name) - else: - # We'll need to handle this differently since client doesn't - # have object_versions. - raise NotImplementedError( - "S3 resource Bucket attribute not available, fallback implementation needed" - ) - for successful_upload in successful_uploads: - # TODO(Nikola): Maybe even Telegram notification? - # Rollback successful upload. - _, prefix = hs3.split_path(successful_upload) - prefix = prefix.lstrip(os.sep) - versions = sorted( - bucket.object_versions.filter(Prefix=prefix), - key=attrgetter("last_modified"), - reverse=True, - ) - latest_version = versions[0] - latest_version.delete() - _LOG.info("Deleted version `%s`.", latest_version.version_id) - if len(versions) > 1: - rollback_version = versions[1] - _LOG.info( - "Active version is now `%s`!", - rollback_version.version_id, - ) - elif len(versions) == 1: - _LOG.info( - "Deleted version was also the only version. Nothing to rollback." - ) - else: - # TODO(Nikola): Do we need custom exception? - raise NotImplementedError - s3_rollback_message = ( - f"S3 uploads reverted: {successful_uploads}" - if successful_uploads - else "No S3 uploads." - ) - _LOG.info("Rollback completed! %s", s3_rollback_message) - raise ex - - -@task -def docker_build_frontend_feature_image( - ctx, - stage, - dev_image_version=None, - app_version=None, -): - """ - Build frontend image for releasing the features. - - :param stage: stage to release the image - :param dev_image_version: base dev image version to use - :param app_version: app version for feature releases - """ - hdbg.dassert_in(stage, ["test", "preprod", "prod"]) - # Get changelog paths. - current_dir = os.getcwd() - # Get image and app version. - if not dev_image_version: - dev_image_version = hversio.get_changelog_version(current_dir) - if not app_version: - errors = [] - # Here we assume FE has its own runnable dir or the app changelog file - # is inside `app` dir of a parent runnable dir. - for file_name in [ - "app_changelog.txt", - os.path.join("app", "app_changelog.txt"), - ]: - try: - app_version = hversio.get_changelog_version( - current_dir, file_name=file_name - ) - break - except AssertionError as e: - errors.append(str(e)) - else: - raise FileNotFoundError( - f"App changelog file not found. Provide app version explicitly. Errors: {errors}" - ) - # Set ECR base path. - if stage in ("test", "preprod"): - ecr_base_path = "623860924167.dkr.ecr.eu-north-1.amazonaws.com" - else: - ecr_base_path = "726416904550.dkr.ecr.us-east-1.amazonaws.com" - # Set prod docker file name. - dockerfile = "devops/docker_build/prod.Dockerfile" - dockerfile = _to_abs_path(dockerfile) - # Set image tag. - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - image_tag = f"{ecr_base_path}/{image_name}:{stage}-{app_version}" - git_root_dir = hgit.find_git_root() - # Docker build command. - cmd = rf""" - docker build --no-cache \ - --file {dockerfile} \ - --build-arg VERSION={dev_image_version} \ - --build-arg ECR_BASE_PATH={ecr_base_path} \ - --build-arg IMAGE_NAME={image_name} \ - --tag {image_tag} \ - {git_root_dir} - """ - hlitauti.run(ctx, cmd) - _list_image(ctx, image_tag) - - -# ############################################################################# -# Test dev image flow -# ############################################################################# - - -@task -def docker_build_test_dev_image( # type: ignore - ctx, - assignee="", - reviewers="", - container_dir_name=".", -): - """ - Automate the complete periodic release workflow for the dev image. - - This task performs: - 1) Bump version (e.g., 2.2.0 -> 2.3.0) - 2) Get release team members - 3) Create branch with date-based name - 4) Build image locally with the bumped version number - 5) Run tests (fast, slow, superslow) - 6) Add changelog entry for the release - 7) Stage poetry.lock and pip_list.txt files - 8) Commit changes with versioned message - 9) Push changes - 10) Create PR - 11) Tag and push image to GHCR - - :param ctx: invoke context - :param assignee: GitHub username to assign the PR to - :param reviewers: GitHub username(s) to request PR review. If not - specified, uses the release team members from GitHub team - configured in repo_config.yaml - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # 1) Bump version. - _LOG.info("Step 1: Bumping version") - current_version = hversio.get_changelog_version(container_dir_name) - hdbg.dassert(current_version, "Could not find current version in changelog") - _LOG.info("Current version: %s", current_version) - version = hversio.bump_version(current_version, bump_type="minor") - _LOG.info("Bumped version: %s -> %s", current_version, version) - # 2) Get release team members. - _LOG.info("Step 2: Getting release team members") - if not reviewers: - release_team_name = hrecouti.get_repo_config().get_release_team() - # Get team members from GitHub team. - team_members = hlitagh.gh_get_team_member_names(release_team_name) - reviewers = ",".join(team_members) - _LOG.info("Release team '%s' members: %s", release_team_name, reviewers) - # 3) Create branch with date-based name. - _LOG.info("Step 3: Creating branch with date-based name") - issue_prefix = hrecouti.get_repo_config().get_issue_prefix() - # Get current date in YYYYMMDD format. - today = datetime.date.today().strftime("%Y%m%d") - branch_name = f"{issue_prefix}_Periodic_image_release_{today}" - _LOG.info("Branch name: %s", branch_name) - cmd = f"git checkout -b {branch_name}" - hlitauti.run(ctx, cmd) - # 4) Build image locally. - _LOG.info("Step 4: Building local image with version %s", version) - docker_build_local_image( - ctx, - version=version, - cache=True, - poetry_mode="update", - container_dir_name=container_dir_name, - ) - # 5) Run tests. - _LOG.info("Step 5: Running tests") - dev_version = _get_dev_version(version, container_dir_name) - stage = "dev" - _run_tests( - ctx, - stage, - dev_version, - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=True, - qa_tests=False, - ) - # 6) Add changelog entry. - _LOG.info("Step 6: Adding changelog entry") - supermodule = True - root_dir = hversio._get_client_root(supermodule) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - changelog_file = os.path.join(root_dir, container_dir_name, "changelog.txt") - hdbg.dassert_file_exists(changelog_file) - # Read the current changelog. - changelog_content = hio.from_file(changelog_file) - # Prepare new entry. - today = datetime.date.today().strftime("%Y-%m-%d") - new_entry = f"""# {image_name}-{version} -- {today} -- Periodic release: {today} - -""" - # Prepend new entry to changelog. - updated_changelog = new_entry + changelog_content - # Write back to file. - hio.to_file(changelog_file, updated_changelog) - _LOG.info("Added changelog entry for version %s", version) - # 7) Stage files. - _LOG.info("Step 7: Staging files") - # Fix git permissions in CI to avoid "insufficient permission" errors. - if hserver.is_inside_ci(): - _LOG.info("Running in CI, fixing git permissions") - cmd = "sudo chmod -R 777 .git/objects/" - hlitauti.run(ctx, cmd) - files_to_stage = [ - "devops/docker_build/poetry.lock", - "devops/docker_build/pip_list.txt", - "changelog.txt", - ] - for file_path in files_to_stage: - full_path = os.path.join(root_dir, container_dir_name, file_path) - if os.path.exists(full_path): - cmd = f"git add {full_path}" - hlitauti.run(ctx, cmd) - _LOG.info("Staged %s", full_path) - else: - _LOG.warning("File not found, skipping: %s", full_path) - # 8) Commit changes. - _LOG.info("Step 8: Committing changes") - commit_message = f"Poetry output from the v{version} build" - # --no-verify to skip pre-commit checks since the `poetry.lock` file is - # too big and the `check_file_size` is failed. - cmd = f'git commit -m "{commit_message}" --no-verify' - hlitauti.run(ctx, cmd) - # 9) Push changes. - _LOG.info("Step 9: Pushing changes") - cmd = f"git push origin {branch_name}" - hlitauti.run(ctx, cmd) - # 10) Create PR. - _LOG.info("Step 10: Creating pull request") - pr_body = f"- Periodic release of {image_name} dev image version {version}" - label = _AUTO_RELEASE_LABEL - hlitagh.gh_create_pr( - ctx, - body=pr_body, - draft=False, - reviewer=reviewers, - labels=label, - assignee=assignee, - ) - _LOG.info("PR submitted for branch %s", branch_name) - # 11) Tag and push to GHCR. - _LOG.info("Step 11: Tagging and pushing image to GHCR") - # Get GHCR base image path from repo config. - ghcr_base = hrecouti.get_repo_config().get_container_registry_url("ghcr") - ghcr_image_name = hrecouti.get_repo_config().get_docker_base_image_name() - ghcr_base_image = f"{ghcr_base}/{ghcr_image_name}" - _LOG.info("GHCR base image: %s", ghcr_base_image) - # Get local image name. - local_stage = "local" - image_local = hlitadoc.get_image("", local_stage, dev_version) - # Tag local image as versioned GHCR dev image (e.g., ghcr.io/causify-ai/csfy:dev-2.3.0). - ghcr_image_versioned = f"{ghcr_base_image}:dev-{version}" - cmd = f"docker tag {image_local} {ghcr_image_versioned}" - hlitauti.run(ctx, cmd) - _LOG.info("Tagged as versioned GHCR dev image: %s", ghcr_image_versioned) - # Push versioned GHCR dev image. - cmd = f"docker push {ghcr_image_versioned}" - hlitauti.run(ctx, cmd, pty=True) - _LOG.info("Pushed versioned GHCR dev image: %s", ghcr_image_versioned) - _LOG.info("==> SUCCESS <==") - - -@task -def docker_tag_push_dev_image( - ctx, - version="", - base_image="", - target_registries="ghcr,ecr", - container_dir_name=".", - dry_run=False, -): - """ - Pulls a versioned dev image from a base registry, then tags and pushes - it to the specified target registries (both as versioned and latest). - - :param ctx: invoke context - :param version: version to tag the image and code with. If empty, reads - from changelog - :param base_image: base image path to pull from (e.g., - ghcr.io/causify-ai/csfy). If empty, uses GHCR from repo config - :param target_registries: comma separated list of target Docker - image registries to push the image to. E.g., "ghcr,ecr". - See the `helpers.repo_config_utils.RepoConfig.get_container_registry_url()` - for supported registry names - :param container_dir_name: directory where the Dockerfile is located - :param dry_run: if True, only print the commands without executing - them - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # Get version. - if not version: - version = hversio.get_changelog_version(container_dir_name) - # Get base image if not provided. - if not base_image: - ghcr_base = hrecouti.get_repo_config().get_container_registry_url("ghcr") - ghcr_image_name = hrecouti.get_repo_config().get_docker_base_image_name() - base_image = f"{ghcr_base}/{ghcr_image_name}" - # Pull the image. - stage = "dev" - source_dev_image_versioned = hlitadoc.get_image(base_image, stage, version) - cmd = f"docker pull {source_dev_image_versioned}" - hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) - # Tag and push to target registries. - for registry in target_registries.split(","): - # Strip whitespace from registry name. - registry = registry.strip() - # Tag and push the image to the target registry as latest dev image. - target_base = hrecouti.get_repo_config().get_container_registry_url( - registry - ) - target_image_name = ( - hrecouti.get_repo_config().get_docker_base_image_name() - ) - target_base_image = f"{target_base}/{target_image_name}" - latest_version = None - target_dev_image_latest = hlitadoc.get_image( - target_base_image, stage, latest_version - ) - cmd = ( - f"docker tag {source_dev_image_versioned} {target_dev_image_latest}" - ) - hlitauti.run(ctx, cmd, dry_run=dry_run) - cmd = f"docker push {target_dev_image_latest}" - hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) - # Tag and push versioned dev image to target registry. - target_dev_image_versioned = hlitadoc.get_image( - target_base_image, stage, version - ) - cmd = f"docker tag {source_dev_image_versioned} {target_dev_image_versioned}" - hlitauti.run(ctx, cmd, dry_run=dry_run) - cmd = f"docker push {target_dev_image_versioned}" - hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py deleted file mode 100644 index 7c1c360a6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py +++ /dev/null @@ -1,606 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_find as hlitafin -""" - -import functools -import glob -import logging -import os -import re -from typing import Iterator, List, Optional, Tuple - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hlist as hlist -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# ############################################################################# -# Find test. -# ############################################################################# - - -def _find_test_files( - dir_name: Optional[str] = None, use_absolute_path: bool = False -) -> List[str]: - """ - Find all the files containing test code in `abs_dir`. - """ - dir_name = dir_name or "." - hdbg.dassert_dir_exists(dir_name) - _LOG.debug("abs_dir=%s", dir_name) - # Find all the file names containing test code. - _LOG.info("Searching from '%s'", dir_name) - path = os.path.join(dir_name, "**", "test_*.py") - _LOG.debug("path=%s", path) - file_names = glob.glob(path, recursive=True) - _LOG.debug("Found %d files: %s", len(file_names), str(file_names)) - hdbg.dassert_no_duplicates(file_names) - # Test files should always under a dir called `test`. - for file_name in file_names: - if "/old/" in file_name: - continue - if "/compute/" in file_name: - continue - hdbg.dassert_eq( - os.path.basename(os.path.dirname(file_name)), - "test", - "Test file '%s' needs to be under a `test` dir ", - file_name, - ) - hdbg.dassert_not_in( - "notebook/", - file_name, - "Test file '%s' should not be under a `notebook` dir", - file_name, - ) - # Make path relatives, if needed. - if use_absolute_path: - file_names = [os.path.abspath(file_name) for file_name in file_names] - # - file_names = sorted(file_names) - _LOG.debug("file_names=%s", file_names) - hdbg.dassert_no_duplicates(file_names) - return file_names - - -# TODO(gp): -> find_class since it works also for any class. -def _find_test_class( - class_name: str, file_names: List[str], exact_match: bool = False -) -> List[str]: - """ - Find test file containing `class_name` and report it in pytest format. - - E.g., for "TestLibTasksRunTests1" return - "test/test_lib_tasks.py::TestLibTasksRunTests1" - - :param exact_match: find an exact match or an approximate where `class_name` - is included in the class name - """ - # > jackpy TestLibTasksRunTests1 - # test/test_lib_tasks.py:60:class TestLibTasksRunTests1(hut.TestCase): - regex = r"^\s*class\s+(\S+)\s*\(" - _LOG.debug("regex='%s'", regex) - res: List[str] = [] - # Scan all the files. - for file_name in file_names: - _LOG.debug("file_name=%s", file_name) - txt = hio.from_file(file_name) - # Search for the class in each file. - for i, line in enumerate(txt.split("\n")): - # _LOG.debug("file_name=%s i=%s: %s", file_name, i, line) - # TODO(gp): We should skip ```, """, ''' - m = re.match(regex, line) - if m: - found_class_name = m.group(1) - _LOG.debug(" %s:%d -> %s", line, i, found_class_name) - if exact_match: - found = found_class_name == class_name - else: - found = class_name in found_class_name - if found: - res_tmp = f"{file_name}::{found_class_name}" - _LOG.debug("-> res_tmp=%s", res_tmp) - res.append(res_tmp) - res = sorted(list(set(res))) - return res - - -# TODO(gp): Extend this to accept only the test method. -# TODO(gp): Have a single `find` command with multiple options to search for different -# things, e.g., class names, test names, pytest_mark, ... -@task -def find_test_class( - ctx, class_name, dir_name=".", pbcopy=True, exact_match=False -): # type: ignore - """ - Report test files containing `class_name` in a format compatible with - pytest. - - :param class_name: the class to search - :param dir_name: the dir from which to search (default: .) - :param pbcopy: save the result into the system clipboard (only on - macOS) - """ - hlitauti.report_task(txt="class_name abs_dir pbcopy") - hdbg.dassert_ne(class_name, "", "You need to specify a class name") - _ = ctx - file_names = _find_test_files(dir_name) - res = _find_test_class(class_name, file_names, exact_match) - res = " ".join(res) - # Print or copy to clipboard. - hsystem.to_pbcopy(res, pbcopy) - - -# ////////////////////////////////////////////////////////////////////////////////// - - -@functools.lru_cache() -def _get_python_files(subdir: str) -> List[str]: - pattern = "*.py" - only_files = False - use_relative_paths = False - python_files = hio.listdir(subdir, pattern, only_files, use_relative_paths) - # Remove tmp files. - python_files = [f for f in python_files if not f.startswith("tmp")] - return python_files - - -# File, line number, line, info1, info2 -_FindResult = Tuple[str, int, str, str, str] -_FindResults = List[_FindResult] - - -def _scan_files(python_files: List[str]) -> Iterator: - for file_ in python_files: - _LOG.debug("file=%s", file_) - txt = hio.from_file(file_) - for line_num, line in enumerate(txt.split("\n")): - # TODO(gp): Skip commented lines. - # _LOG.debug("%s:%s line='%s'", file_, line_num, line) - yield file_, line_num, line - - -def _find_short_import(iterator: Iterator, short_import: str) -> _FindResults: - """ - Find imports in the Python files with the given short import. - - E.g., for dtfcodarun dataflow/core/test/test_builders.py:9:import - dataflow.core.dag_runner as dtfcodarun returns - """ - # E.g., - # `import dataflow.core.dag_runner as dtfcodarun` - regex = rf"import\s+(\S+)\s+as\s+({short_import})" - regex = re.compile(regex) - # - results: _FindResults = [] - for file_, line_num, line in iterator: - m = regex.search(line) - if m: - # E.g., - # dataflow/core/test/test_builders.py:9:import dataflow.core.dag_runner as dtfcodarun - _LOG.debug(" --> line:%s=%s", line_num, line) - long_import_txt = m.group(1) - short_import_txt = m.group(2) - full_import_txt = f"import {long_import_txt} as {short_import_txt}" - res = (file_, line_num, line, short_import_txt, full_import_txt) - # E.g., - _LOG.debug(" => %s", str(res)) - results.append(res) - return results - - -def _find_func_class_uses(iterator: Iterator, regex: str) -> _FindResults: - regexs = [] - # E.g., - # `dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)` - regexs.append(rf"\s+(\w+)\.(\w*{regex})\(") - # `dag_builder: dtfcodabui.DagBuilder` - regexs.append(rf":\s*(\w+)\.(\w*{regex})") - # - _LOG.debug("regexs=%s", str(regexs)) - regexs = [re.compile(regex_) for regex_ in regexs] - # - results: _FindResults = [] - for file_, line_num, line in iterator: - _LOG.debug("line='%s'", line) - m = None - for regex_ in regexs: - m = regex_.search(line) - if m: - # _LOG.debug("--> regex matched") - break - if m: - _LOG.debug(" --> line:%s=%s", line_num, line) - short_import_txt = m.group(1) - obj_txt = m.group(2) - res = (file_, line_num, line, short_import_txt, obj_txt) - # E.g., - # ('./helpers/lib_tasks.py', 10226, 'dtfsys', 'RealTimeDagRunner') - # ('./dataflow/core/test/test_builders.py', 70, 'dtfcodarun', 'FitPredictDagRunner') - # ('./dataflow/core/test/test_builders.py', 157, 'dtfcodarun', 'FitPredictDagRunner') - _LOG.debug(" => %s", str(res)) - results.append(res) - return results - - -def _process_find_results(results: _FindResults, how: str) -> List: - filtered_results: List = [] - if how == "remove_dups": - # Remove duplicates. - for result in results: - (_, _, _, info1, info2) = result - filtered_results.append((info1, info2)) - filtered_results = hlist.remove_duplicates(filtered_results) - filtered_results = sorted(filtered_results) - elif how == "all": - filtered_results = sorted(results) - else: - raise ValueError(f"Invalid how='{how}'") - return filtered_results - - -@task -def find(ctx, regex, mode="all", how="remove_dups", subdir="."): # type: ignore - """ - Find symbols, imports, test classes and so on. - - Example: - ``` - > i find DagBuilder - ('dtfcodabui', 'DagBuilder') - ('dtfcore', 'DagBuilder') - ('dtfcodabui', 'import dataflow.core.dag_builder as dtfcodabui') - ('dtfcore', 'import dataflow.core as dtfcore') - ``` - - :param regex: function or class use to search for - :param mode: what to look for - - `symbol_import`: look for uses of function or classes - E.g., `DagRunner` - returns - ``` - ('cdataf', 'PredictionDagRunner') - ('cdataf', 'RollingFitPredictDagRunner') - ``` - - `short_import`: look for the short import - E.g., `'dtfcodabui' - returns - ``` - ('dtfcodabui', 'import dataflow.core.dag_builder as dtfcodabui') - ``` - :param how: how to report the results - - `remove_dups`: report only imports and calls that are the same - """ - hlitauti.report_task(txt=hprint.to_str("regex mode how subdir")) - _ = ctx - # Process the `where`. - python_files = _get_python_files(subdir) - iter_ = _scan_files(python_files) - # Process the `what`. - if mode == "all": - for mode_tmp in ("symbol_import", "short_import"): - find(ctx, regex, mode=mode_tmp, how=how, subdir=subdir) - return - if mode == "symbol_import": - results = _find_func_class_uses(iter_, regex) - filtered_results = _process_find_results(results, "remove_dups") - print("\n".join(map(str, filtered_results))) - # E.g., - # ('cdataf', 'PredictionDagRunner') - # ('cdataf', 'RollingFitPredictDagRunner') - # Look for each short import. - results = [] - for short_import, _ in filtered_results: - iter_ = _scan_files(python_files) - results.extend(_find_short_import(iter_, short_import)) - elif mode == "short_import": - results = _find_short_import(iter_, regex) - else: - raise ValueError(f"Invalid mode='{mode}'") - # Process the `how`. - filtered_results = _process_find_results(results, how) - print("\n".join(map(str, filtered_results))) - - -# ############################################################################# -# Find test decorator. -# ############################################################################# - - -# TODO(gp): decorator_name -> pytest_mark -def _find_test_decorator( - decorator_name: str, file_names: List[str] -) -> List[str]: - """ - Find test files containing tests with a certain decorator - `@pytest.mark.XYZ`. - """ - hdbg.dassert_isinstance(file_names, list) - # E.g., - # @pytest.mark.slow(...) - # @pytest.mark.qa - string = f"@pytest.mark.{decorator_name}" - regex = rf"^\s*{re.escape(string)}\s*[\(]?" - _LOG.debug("regex='%s'", regex) - res: List[str] = [] - # Scan all the files. - for file_name in file_names: - _LOG.debug("file_name=%s", file_name) - txt = hio.from_file(file_name) - # Search for the class in each file. - for i, line in enumerate(txt.split("\n")): - # _LOG.debug("file_name=%s i=%s: %s", file_name, i, line) - # TODO(gp): We should skip ```, """, '''. We can add a function to - # remove all the comments, although we need to keep track of the - # line original numbers. - m = re.match(regex, line) - if m: - _LOG.debug(" -> found: %d:%s", i, line) - res.append(file_name) - # - res = sorted(list(set(res))) - return res - - -@task -def find_test_decorator(ctx, decorator_name="", dir_name="."): # type: ignore - """ - Report test files containing `class_name` in pytest format. - - :param decorator_name: the decorator to search - :param dir_name: the dir from which to search - """ - hlitauti.report_task() - _ = ctx - hdbg.dassert_ne(decorator_name, "", "You need to specify a decorator name") - file_names = _find_test_files(dir_name) - res = _find_test_decorator(decorator_name, file_names) - res = " ".join(res) - print(res) - - -# ############################################################################# -# Find / replace `check_string`. -# ############################################################################# - - -@task -def find_check_string_output( # type: ignore - ctx, class_name, method_name, as_python=True, fuzzy_match=False, pbcopy=True -): - """ - Find output of `check_string()` in the test running - class_name::method_name. - - E.g., for `TestResultBundle::test_from_config1` return the content of the file - `./core/dataflow/test/TestResultBundle.test_from_config1/output/test.txt` - - :param as_python: if True return the snippet of Python code that replaces the - `check_string()` with a `assert_equal` - :param fuzzy_match: if True return Python code with `fuzzy_match=True` - :param pbcopy: save the result into the system clipboard (only on macOS) - """ - hlitauti.report_task() - _ = ctx - hdbg.dassert_ne(class_name, "", "You need to specify a class name") - hdbg.dassert_ne(method_name, "", "You need to specify a method name") - # Look for the directory named `class_name.method_name`. - cmd = f"find . -name '{class_name}.{method_name}' -type d" - # > find . -name "TestResultBundle.test_from_config1" -type d - # ./core/dataflow/test/TestResultBundle.test_from_config1 - _, txt = hsystem.system_to_string(cmd, abort_on_error=False) - file_names = txt.split("\n") - if not txt: - hdbg.dfatal(f"Can't find the requested dir with '{cmd}'") - if len(file_names) > 1: - hdbg.dfatal(f"Found more than one dir with '{cmd}':\n{txt}") - dir_name = file_names[0] - # Find the only file underneath that dir. - hdbg.dassert_dir_exists(dir_name) - cmd = f"find {dir_name} -name 'test.txt' -type f" - _, file_name = hsystem.system_to_one_line(cmd) - hdbg.dassert_file_exists(file_name) - # Read the content of the file. - _LOG.info("Found file '%s' for %s::%s", file_name, class_name, method_name) - txt = hio.from_file(file_name) - if as_python: - # Package the code snippet. - if not fuzzy_match: - # Align the output at the same level as 'expected = r...'. - num_spaces = 8 - txt = hprint.indent(txt, num_spaces=num_spaces) - output = f""" - actual = - expected = r\"\"\" -{txt} - \"\"\".lstrip().rstrip() - self.assert_equal(actual, expected, fuzzy_match={fuzzy_match}) - """ - else: - output = txt - # Print or copy to clipboard. - hsystem.to_pbcopy(output, pbcopy=pbcopy) - return output - - -# ############################################################################# -# Find module dependencies. -# ############################################################################# - - -standard_libs = [ - "abc", - "argparse", - "datetime", - "importlib", - "logging", - "os", - "pandas", - "pytest", - "re", - "unittest", -] - - -@task -def find_dependency( # type: ignore - ctx, - module_name, - mode="print_deps", - only_module="", - ignore_standard_libs=True, - ignore_helpers=True, - remove_dups=True, -): - """ - E.g., ``` - - # Find all the dependency of a module from itself - > i find_dependency --module-name "amp.dataflow.model" --mode "find_lev2_deps" --ignore-helpers --only-module dataflow - amp/dataflow/model/stats_computer.py:16 dataflow.core - amp/dataflow/model/model_plotter.py:4 dataflow.model - ``` - - :param module_name: the module path to analyze (e.g., `amp.dataflow.model`) - :param mode: - - `print_deps`: print the result of grepping for imports - - `find_deps`: find all the dependencies - - `find_lev1_deps`, `find_lev2_deps`: find all the dependencies - :param only_module: keep only imports containing a certain module (e.g., `dataflow`) - :param ignore_standard_libs: ignore the Python standard libs (e.g., `os`, `...`) - :param ignore_helpers: ignore the `helper` lib - :param remove_dups: remove the duplicated imports - """ - _ = ctx - # (cd amp/dataflow/model/; jackpy "import ") | grep -v notebooks | grep -v test | grep -v __init__ | grep "import dataflow" - src_dir = module_name.replace(".", "/") - hdbg.dassert_dir_exists(src_dir) - # Find all the imports. - cmd = f'find {src_dir} -name "*.py" | xargs grep -n -r "^import "' - _, txt = hsystem.system_to_string(cmd) - # - if mode == "print_deps": - print(txt) - return - # Parse the output. - _LOG.debug("\n" + hprint.frame("Parse")) - lines = txt.split("\n") - lines_out = [] - for line in lines: - # ./forecast_evaluator_from_prices.py:16:import helpers.hpandas as hpandas - # import helpers.hunit_test as hunitest # pylint: disable=no-name-in-module' - data = line.split(":") - hdbg.dassert_lte(3, len(data), "Invalid line='%s'", line) - file, line_num, import_code = data[:3] - _LOG.debug(hprint.to_str("file line_num import_code")) - lines_out.append((file, line_num, import_code)) - lines = lines_out - _LOG.debug("Found %d imports", len(lines)) - # Remove irrelevant files and imports. - _LOG.debug("\n" + hprint.frame("Remove irrelevant entries")) - lines_out = [] - for line in lines: - file, line_num, import_code = line - _LOG.debug("# " + hprint.to_str("file line_num import_code")) - if "__init__.py" in file: - _LOG.debug("Remove because init") - continue - if "/test/" in file: - _LOG.debug("Remove because test") - continue - if "notebooks/" in file: - _LOG.debug("Remove because notebook") - continue - if "from typing import" in import_code: - _LOG.debug("Remove because typing") - continue - lines_out.append(line) - lines = lines_out - _LOG.debug("After removal %d imports", len(lines)) - # Process. - _LOG.debug("\n" + hprint.frame("Process entries")) - lines_out = [] - for line in lines: - # ./forecast_evaluator_from_prices.py:16:import helpers.hpandas as hpandas - file, line_num, import_code = line - _LOG.debug("# " + hprint.to_str("file line_num import_code")) - # Parse import code. - m = re.match(r"^import\s+(\S+)(\s+as)?", import_code) - hdbg.dassert(m, "Can't parse line='%s'", import_code) - assert m is not None - import_name = m.group(1) - _LOG.debug("import_name='%s'", import_name) - lev1_import = import_name.split(".")[0] - if ignore_standard_libs: - if lev1_import in standard_libs: - _LOG.debug("Ignoring standard lib '%s'", lev1_import) - continue - if ignore_helpers: - if lev1_import.startswith("helpers"): - _LOG.debug("Ignoring helpers '%s'", lev1_import) - continue - if only_module: - if only_module not in import_name: - _LOG.debug( - "Ignoring '%s' since it doesn't contain %s", - import_name, - only_module, - ) - continue - # - if mode == "find_deps": - dep = import_name - elif mode == "find_lev1_deps": - deps = import_name.split(".") - if len(deps) > 1: - dep = deps[0] - else: - dep = import_name - elif mode == "find_lev2_deps": - deps = import_name.split(".") - if len(deps) > 1: - dep = ".".join(deps[:2]) - else: - dep = import_name - else: - raise ValueError(f"Invalid mode='{mode}'") - lines_out.append((file, line_num, dep)) - lines = lines_out - # Remove repeated tuples. - if remove_dups: - _LOG.debug("\n" + hprint.frame("Remove repeated tuples")) - import_names = set() - lines_out = [] - for line in lines: - if line[2] in import_names: - continue - lines_out.append(line) - import_names.add(line[2]) - lines = lines_out - else: - _LOG.warning("Remove dups skipped") - # Sort. - _LOG.debug("\n" + hprint.frame("Sort tuples")) - lines = sorted(lines, key=lambda x: x[2]) - # Print and save. - print(hprint.frame("Results")) - _LOG.debug("\n" + hprint.frame("Print")) - txt = "\n".join([":".join(line) for line in lines]) - file_name = "cfile" - hio.to_file(file_name, txt) - _LOG.info("%s saved", file_name) - # - txt = "\n".join(["%s:%s\t\t\t%s" % line for line in lines]) - print(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py deleted file mode 100644 index 53c9600af..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py +++ /dev/null @@ -1,1252 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_gh as hlitagh -""" - -import datetime -import json -import logging -import os -import re -from typing import Any, Dict, List, Optional, Tuple - -import invoke.exceptions as invexc -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.htable as htable -import helpers.lib_tasks_utils as hlitauti -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# ############################################################################# -# GitHub CLI. -# ############################################################################# - - -@task -def gh_login( # type: ignore - ctx, - account="", - print_status=False, -): - hlitauti.report_task() - # - if not account: - # Retrieve the name of the repo, e.g., "alphamatic/amp". - full_repo_name = hgit.get_repo_full_name_from_dirname( - ".", include_host_name=False - ) - _LOG.debug(hprint.to_str("full_repo_name")) - account = full_repo_name.split("/")[0] - _LOG.info(hprint.to_str("account")) - # - ssh_filename = os.path.expanduser(f"~/.ssh/id_rsa.{account}.github") - _LOG.debug(hprint.to_str("ssh_filename")) - if os.path.exists(ssh_filename): - cmd = f"export GIT_SSH_COMMAND='ssh -i {ssh_filename}'" - print(cmd) - else: - _LOG.warning("Can't find file '%s'", ssh_filename) - # - if print_status: - cmd = "gh auth status" - hlitauti.run(ctx, cmd) - # - github_pat_filename = os.path.expanduser(f"~/.ssh/github_pat.{account}.txt") - if os.path.exists(github_pat_filename): - cmd = f"gh auth login --with-token <{github_pat_filename}" - hlitauti.run(ctx, cmd) - else: - _LOG.warning("Can't find file '%s'", github_pat_filename) - # - if print_status: - cmd = "gh auth status" - hlitauti.run(ctx, cmd) - - -# ############################################################################# - - -def _get_branch_name(branch_mode: str) -> Optional[str]: - if branch_mode == "current_branch": - branch_name: Optional[str] = hgit.get_branch_name() - elif branch_mode == "master": - branch_name = "master" - elif branch_mode == "all": - branch_name = None - else: - raise ValueError(f"Invalid branch='{branch_mode}'") - return branch_name - - -def _get_org_name(org_name: str) -> str: - """ - Get organization name, inferring from current repo if not provided. - - :param org_name: organization name or empty string - :return: organization name - """ - if not org_name: - # Infer organization from current repo. - full_repo_name = hgit.get_repo_full_name_from_dirname( - ".", include_host_name=False - ) - org_name = full_repo_name.split("/")[0] - return org_name - - -def _get_workflow_table() -> htable.TableType: - """ - Get a table with the status of the GH workflow for the current repo. - """ - # Get the workflow status from GH. - cmd = "export NO_COLOR=1; gh run list" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug(hprint.to_str("txt")) - # pylint: disable=line-too-long - # > gh run list - # STATUS TITLE WORKFLOW BRANCH EVENT ID ELAPSED AGE - # * AmpTask1786_Integrate_20230518_2 Fast tests AmpTask1786_Integrate_20230518_2 pull_request 5027911519 4m49s 4m - # > gh run list | more - # completed success AmpTask1786_Integrate_20230518_2 Fast tests AmpTask1786_Integrate_20230518_2 pull_request 5027911519 7m17s 10m - # in_progress AmpTask1786_Integrate_20230518_2 Slow tests AmpTask1786_Integrate_20230518_2 pull_request 5027911518 10m9s 10m - # pylint: enable=line-too-long - # The output is tab separated, so convert it into CSV. - first_line = txt.split("\n")[0] - _LOG.debug("first_line=%s", first_line.replace("\t", ",")) - num_cols = len(first_line.split("\t")) - _LOG.debug(hprint.to_str("first_line num_cols")) - cols = [ - # E.g., completed, in_progress. - "completed", - # E.g., success, failure. - "status", - # Aka title: parse but don't use. - "name", - "workflow", - "branch", - "event", - "id", - "elapsed", - "age", - ] - hdbg.dassert_eq(num_cols, len(cols)) - # Build the table. - table = htable.Table.from_text(cols, txt, delimiter="\t") - _LOG.debug(hprint.to_str("table")) - # Remove the "name" column as it's redundant with "workflow". - table = table.remove_column("name") - return table - - -def _print_table(table: htable.TableType) -> None: - table_str = str(table) - # Colorize the table. - color_map = {"success": "green", "failure": "red", "in progress": "yellow"} - for status, color in color_map.items(): - table_str = table_str.replace( - status, hprint.color_highlight(status, color) - ) - # Report the full status. - print(table_str) - - -# TODO(Grisha): seems like GH changed the output format, we should update accordingly, -# see CmTask #4672 "Slow tests fail (9835540316)" for details. -@task -def gh_workflow_list( # type: ignore - ctx, - filter_by_branch="current_branch", - filter_by_completed="all", - report_only_status=True, - show_stack_trace=False, - print_table=True, -): - """ - Report the status of the GH workflows. - - :param filter_by_branch: name of the branch to check - - `current_branch` for the current Git branch - - `master` for master branch - - `all` for all branches - :param filter_by_completed: filter table by the status of the workflow - - E.g., "failure", "success" - :param report_only_status: if True, report only the status of the workflows - :param show_stack_trace: in case of error run `pytest_repro` reporting also - the stack trace - :param print_table: if True, print the table with the status of the workflows - """ - hlitauti.report_task( - txt=hprint.to_str("filter_by_branch filter_by_completed") - ) - # Login. - gh_login(ctx) - # Get the table. - table = _get_workflow_table() - # Filter table based on the branch. - if filter_by_branch != "all": - field = "branch" - value = _get_branch_name(filter_by_branch) - print(f"Filtering table by {field}={value}") - table = table.filter_rows(field, value) - # Filter table by the workflow status. - if filter_by_completed != "all": - field = "completed" - value = filter_by_completed - print(f"Filtering table by {field}={value}") - table = table.filter_rows(field, value) - if ( - filter_by_branch not in ("current_branch", "master") - or not report_only_status - ): - _print_table(table) - return - # For each workflow find the last success. - branch_name = hgit.get_branch_name() - workflows = table.unique("workflow") - print(f"workflows={workflows}") - for workflow in workflows: - table_tmp = table.filter_rows("workflow", workflow) - if print_table: - print(hprint.frame(workflow)) - _print_table(table_tmp) - # Find the first success. - num_rows = table.size()[0] - _LOG.debug("num_rows=%s", num_rows) - for i in range(num_rows): - status_column = table_tmp.get_column("status") - _LOG.debug("status_column=%s", str(status_column)) - hdbg.dassert_lt( - i, len(status_column), "status_column=", status_column - ) - status = status_column[i] - if status == "success": - print(f"Workflow '{workflow}' for '{branch_name}' is ok") - break - if status == "failure": - _LOG.error( - "Workflow '%s' for '%s' is broken", workflow, branch_name - ) - # Get the output of the broken run. - # > gh run view 1477484584 --log-failed - workload_id = table_tmp.get_column("id")[i] - log_file_name = f"tmp.failure.{workflow}.{branch_name}.txt" - log_file_name = log_file_name.replace(" ", "_").lower() - cmd = f"gh run view {workload_id} --log-failed >{log_file_name}" - hsystem.system(cmd) - # Remove non-printable chars. - # TODO(heanh): Consider adding all the helpers util scripts - # to the `PATH` (when inside the container) so we can just use - # them without specifying the full path. - helpers_root_dir = hgit.find_helpers_root() - file_path = ( - f"{helpers_root_dir}/dev_scripts_helpers/system_tools" - ) - cmd = f"{file_path}/remove_escape_chars.py -i {log_file_name}" - hsystem.system(cmd) - print(f"# Log is in '{log_file_name}'") - # Run_fast_tests Run fast tests 2021-12-19T00:19:38.3394316Z FAILED data - # cmd = rf"grep 'Z FAILED ' {log_file_name}" - workflow_as_str = workflow.lower().replace(" ", "_") - script_name = f"./tmp.pytest_repro.{workflow_as_str}.sh" - cmd = f"invoke pytest_repro --file-name {log_file_name} --script-name {script_name}" - if show_stack_trace: - cmd += " -s" - hsystem.system(cmd, suppress_output=False, abort_on_error=False) - break - if status in ("startup_failure", "cancelled", "skipped"): - _LOG.debug( - "Workflow '%s' for '%s' has status '%s', skipping", - workflow, - branch_name, - status, - ) - break - if status == "": - if i == (len(status_column) - 1): - # If all the runs in the table are in progress, i.e. there is no - # failed or succesful run, issue a warning and exit. E.g., - # ######################################################### - # Superslow tests - # ######################################################### - # completed | status | workflow | branch | event | id | elapsed | age | - # ----------- | ------ | --------------- | ------ | ----------------- | ---------- | ------- | --- | - # in_progress | | Superslow tests | master | workflow_dispatch | 5421740561 | 13m25s | 13m | - _LOG.warning( - "No failed/successful run found for workflow=%s for branch=%s, all runs are in progress, exiting.", - workflow, - branch_name, - ) - else: - _LOG.debug( - "Workflow=%s for branch %s is in progress, skipping further checks", - workflow, - branch_name, - ) - break - else: - raise ValueError(f"Invalid status='{status}'") - - -@task -def gh_workflow_run(ctx, branch="current_branch", workflows="all"): # type: ignore - """ - Run GH workflows in a branch. - """ - hlitauti.report_task(txt=hprint.to_str("branch workflows")) - # Login. - gh_login(ctx) - # Get the branch name. - if branch == "current_branch": - branch_name = hgit.get_branch_name() - elif branch == "master": - branch_name = "master" - else: - raise ValueError(f"Invalid branch='{branch}'") - _LOG.debug(hprint.to_str("branch_name")) - # Get the workflows. - if workflows == "all": - gh_tests = ["fast_tests", "slow_tests"] - else: - gh_tests = [workflows] - _LOG.debug(hprint.to_str("workflows")) - # Run. - for gh_test in gh_tests: - gh_test += ".yml" - # gh workflow run fast_tests.yml --ref AmpTask1251_Update_GH_actions_for_amp - cmd = f"gh workflow run {gh_test} --ref {branch_name}" - hlitauti.run(ctx, cmd) - - -# ############################################################################# - - -# TODO(gp): Remove repo_short_name. -def _get_repo_full_name_from_cmd(repo_short_name: str) -> Tuple[str, str]: - """ - Convert the `repo_short_name` from command line (e.g., "current", "amp", - "lm") to the repo_short_name full name without host name. - """ - repo_full_name_with_host: str - if repo_short_name == "current": - # Get the repo name from the current repo. - repo_full_name_with_host = hgit.get_repo_full_name_from_dirname( - ".", include_host_name=True - ) - hdbg.dassert_eq( - repo_full_name_with_host, - hrecouti.get_repo_config().get_repo_full_name_with_hostname(), - ) - ret_repo_short_name = hrecouti.get_repo_config().get_repo_short_name() - else: - hdbg.dfatal("This code path is obsolete") - _LOG.debug( - "repo_short_name=%s -> repo_full_name_with_host=%s ret_repo_short_name=%s", - repo_short_name, - repo_full_name_with_host, - ret_repo_short_name, - ) - return repo_full_name_with_host, ret_repo_short_name - - -def _get_gh_issue_title(issue_id: int, repo_short_name: str) -> Tuple[str, str]: - """ - Get the title of a GitHub issue. - - :param repo_short_name: `current` refer to the repo where we are in, - otherwise a `repo_short_name` (e.g., "amp") - """ - # TODO(gp): I don't see applications where we need to pass the repo_short_name. - # One should always operate in the dir corresponding to a repo. - hdbg.dassert_eq(repo_short_name, "current") - repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( - repo_short_name - ) - # > (export NO_COLOR=1; gh issue view 1251 --json title) - # {"title":"Update GH actions for amp"} - hdbg.dassert_lte(1, issue_id) - cmd = f"gh issue view {issue_id} --repo {repo_full_name_with_host} --json title,url" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug("txt=\n%s", txt) - # Parse json. - dict_ = json.loads(txt) - _LOG.debug("dict_=\n%s", dict_) - title = dict_["title"] - _LOG.debug("title=%s", title) - url = dict_["url"] - _LOG.debug("url=%s", url) - # Remove some annoying chars. - for char in ": + ( ) / ` *".split(): - title = title.replace(char, "") - # Replace multiple spaces with one. - title = re.sub(r"\s+", " ", title) - title = title.replace(" ", "_") - # Remove some annoying chars. - for char in "- ' ` \"".split(): - title = title.replace(char, "_") - # Add the prefix `AmpTaskXYZ_...` - task_prefix = hrecouti.get_repo_config().get_issue_prefix() - # task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - _LOG.debug("task_prefix=%s", task_prefix) - title = f"{task_prefix}{issue_id}_{title}" - return title, url - - -@task -def gh_issue_title(ctx, issue_id, repo_short_name="current", pbcopy=True): # type: ignore - """ - Print the title that corresponds to the given issue and repo_short_name. - E.g., AmpTask1251_Update_GH_actions_for_amp. - - Before running the invoke, one must check their login status on GH - by running `gh auth status`. - - :param issue_id: id number of the issue to create the branch for - :param repo_short_name: short name of the repo to use for the branch - name building. "current" refers to the repo where the call is - implemented - :param pbcopy: save the result into the system clipboard (only on - macOS) - """ - hlitauti.report_task(txt=hprint.to_str("issue_id repo_short_name")) - # Login. - gh_login(ctx) - # - issue_id = int(issue_id) - hdbg.dassert_lte(1, issue_id) - title, url = _get_gh_issue_title(issue_id, repo_short_name) - # Print or copy to clipboard. - msg = f"{title}: {url}" - hsystem.to_pbcopy(msg, pbcopy=pbcopy) - - -@task -def gh_issue_create( # type: ignore - ctx, - title="", - body="", - labels="", - assignees="", - project="", - repo_short_name="current", -): - """ - Create a new GitHub issue in the specified repository. - - ``` - # Create a simple issue - > invoke gh_issue_create --title "Fix bug in parser" - - # Create an issue with body and labels - > invoke gh_issue_create --title "Add new feature" --body "Description here" --labels "enhancement,priority-high" - - # Create an issue with assignees - > invoke gh_issue_create --title "Review PR" --assignees "user1,user2" - - # Create an issue and add to a project - > invoke gh_issue_create --title "Implement feature" --project "Development Board" - ``` - - :param title: title of the issue (required) - :param body: body/description of the issue - :param labels: comma-separated list of labels to apply - :param assignees: comma-separated list of GitHub usernames to assign - :param project: GitHub project name or number to add the issue to - :param repo_short_name: `current` refer to the repo where we are in, - otherwise a `repo_short_name` (e.g., "amp") - :return: issue ID (integer) of the created issue - """ - hlitauti.report_task(txt=hprint.to_str("title repo_short_name")) - # Login. - gh_login(ctx) - # - hdbg.dassert(title, "Title is required") - hdbg.dassert_eq(repo_short_name, "current") - repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( - repo_short_name - ) - _LOG.info( - "Creating issue with title '%s' in %s", - title, - repo_full_name_with_host, - ) - # Build the command. - cmd = ( - "gh issue create" - + f" --repo {repo_full_name_with_host}" - + f' --title "{title}"' - ) - if body: - cmd += f' --body "{body}"' - if labels: - cmd += f' --label "{labels}"' - if assignees: - cmd += f' --assignee "{assignees}"' - if project: - cmd += f' --project "{project}"' - # Execute the command and capture output. - # gh issue create outputs the URL of the created issue, e.g., - # https://github.com/cryptokaizen/csfy/issues/7572 - _, output = hsystem.system_to_string(cmd) - _LOG.debug("gh issue create output: %s", output) - # Extract the issue ID from the URL. - # The URL format is: https://github.com/org/repo/issues/123 - match = re.search(r"/issues/(\d+)", output) - hdbg.dassert(match, f"Could not extract issue ID from output: {output}") - issue_id = int(match.group(1)) - _LOG.info("Created issue #%s", issue_id) - return issue_id - - -# ############################################################################# - - -def _check_if_pr_exists(title: str) -> bool: - """ - Return whether a PR exists or not. - """ - # > gh pr diff AmpTask1955_Lint_20211219 - # no pull requests found for branch "AmpTask1955_Lint_20211219" - cmd = f"gh pr diff {title}" - rc = hsystem.system(cmd, abort_on_error=False) - pr_exists: bool = rc == 0 - return pr_exists - - -@task -def gh_create_pr( # type: ignore - ctx, - body="", - draft=True, - auto_merge=False, - repo_short_name="current", - title="", - reviewer="", - labels="", - assignee="", -): - """ - Create a draft PR for the current branch in the corresponding - repo_short_name. - - ``` - # To open a PR in the web browser - > gh pr view --web - - # To see the status of the checks - > gh pr checks - ``` - - :param body: the body of the PR - :param draft: draft or ready-to-review PR - :param auto_merge: enable auto merging PR - :param repo_short_name: `current` refer to the repo where we are in, - otherwise a `repo_short_name` (e.g., "amp") - :param title: title of the PR or the branch name, if title is empty - :param reviewer: GitHub username to request review from - :param labels: comma-separated list of labels to apply - :param assignee: GitHub username to assign the PR to - """ - hlitauti.report_task() - # Login. - gh_login(ctx) - # - branch_name = hgit.get_branch_name() - if not title: - # Use the branch name as title. - title = branch_name - repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( - repo_short_name - ) - _LOG.info( - "Creating PR with title '%s' for '%s' in %s", - title, - branch_name, - repo_full_name_with_host, - ) - if auto_merge: - hdbg.dassert( - not draft, "The PR can't be a draft in order to auto merge it" - ) - pr_exists = _check_if_pr_exists(title) - _LOG.debug(hprint.to_str("pr_exists")) - if pr_exists: - _LOG.warning("PR '%s' already exists: skipping creation", title) - else: - # Link the PR automatically to the branch, if possible. - issue_id = hgit.extract_gh_issue_number_from_branch(branch_name) - _LOG.debug(hprint.to_str("issue_id")) - if issue_id and str(issue_id) not in body: - body += f"\n\n#{issue_id}" - _LOG.info("Added issue id %s to the PR body", issue_id) - cmd = ( - "gh pr create" - + f" --repo {repo_full_name_with_host}" - + (" --draft" if draft else "") - + f' --title "{title}"' - + f' --body "{body}"' - ) - if reviewer: - cmd += f" --reviewer {reviewer}" - _LOG.info("Added reviewer %s to the PR", reviewer) - if labels: - cmd += f' --label "{labels}"' - _LOG.info("Added labels %s to the PR", labels) - if assignee: - cmd += f" --assignee {assignee}" - # TODO(gp): Use _to_single_line_cmd - hlitauti.run(ctx, cmd) - if auto_merge: - cmd = f"gh pr ready {title}" - hlitauti.run(ctx, cmd) - cmd = f"gh pr merge {title} --auto --delete-branch --squash" - hlitauti.run(ctx, cmd) - - -# TODO(gp): Add gh_open_pr to jump to the PR from this branch. - -# TODO(Grisha): probably the section deserves a separate lib. -# ############################################################################# -# Buildmeister dashboard -# ############################################################################# - - -# TODO(Grisha): consider moving to cmamp as we run the workflow from cmamp. -@task -def gh_publish_buildmeister_dashboard_to_s3(ctx, mark_as_latest=True): # type: ignore - """ - Run the buildmeister dashboard notebook and publish it to S3. - - :param mark_as_latest: if True, mark the dashboard as `latest`, otherwise - just publish a timestamped copy - """ - hlitauti.report_task() - # Login to GH CLI. - if hserver.is_inside_ci(): - _LOG.info("Skipping login since running inside CI") - else: - gh_login(ctx) - # Run and publish the Buildmeister dashboard Jupyter notebook locally. - run_notebook_script_path = hgit.find_file_in_git_tree("run_notebook.py") - amp_abs_path = hgit.get_amp_abs_path() - notebook_path = os.path.join( - amp_abs_path, "devops/notebooks/Master_buildmeister_dashboard.ipynb" - ) - dst_local_dir = os.path.join(amp_abs_path, "tmp.notebooks") - cmd_run_txt = [ - run_notebook_script_path, - f"--notebook {notebook_path}", - # The notebook does not require a config, so using a random dummy config. - # TODO(Grisha): consider creating a separate config builder for the notebook. - "--config_builder 'datapull.optima.common.qa.qa_check.build_dummy_data_reconciliation_config()'", - f"--dst_dir '{dst_local_dir}'", - "--publish", - "--num_threads serial", - ] - cmd_run_txt = " ".join(cmd_run_txt) - hsystem.system(cmd_run_txt) - # To avoid the dependency on `helpers.hs3`. - import helpers.hs3 as hs3 - - # Get HTML file name. - tmp_local_dir_name = os.path.join(amp_abs_path, "tmp.notebooks") - pattern = "Master_buildmeister_dashboard.0*.html" - only_files = True - use_relative_paths = False - local_html_files = hio.listdir( - tmp_local_dir_name, - pattern, - only_files=only_files, - use_relative_paths=use_relative_paths, - ) - # Assert if more than 1 file is returned. - hdbg.dassert_eq( - len(local_html_files), - 1, - f"Found more than one file in {tmp_local_dir_name} - {local_html_files}", - ) - local_html_file = local_html_files[0] - s3_build_path = os.path.join( - hrecouti.get_repo_config().get_html_bucket_path(), - "build/buildmeister_dashboard", - ) - aws_profile = "ck" - if mark_as_latest: - # Copy the dashboard notebook to S3 as latest build. - s3_latest_build_path = os.path.join( - s3_build_path, "Master_buildmeister_dashboard.latest.html" - ) - hs3.copy_file_to_s3(local_html_file, s3_latest_build_path, aws_profile) - # Copy the timestamped version of the dashboard notebook to S3. - # Need to add a trailing slash to the path to copy the file into the folder. - # https://docs.python.org/3/library/os.path.html#os.path.join - s3_build_path_folder = os.path.join(s3_build_path, "") - hs3.copy_file_to_s3(local_html_file, s3_build_path_folder, aws_profile) - - -def _gh_run_and_get_json(cmd: str) -> List[Dict[str, Any]]: - """ - Run a `gh` command and remove colors when running inside a notebook. - - :param cmd: `gh` command to run - :return: parsed JSON output of a command - """ - _, _txt = hsystem.system_to_string(cmd) - if hsystem.is_running_in_ipynb(): - # Remove the colors from the text. - _txt = re.sub(r"\x1b\[((1;)*[0-9]{2})*m", "", _txt) - _LOG.debug(hprint.to_str("_txt")) - ret: List[Dict[str, Any]] = json.loads(_txt) - return ret - - -def gh_get_open_prs(repo: str) -> List[Dict[str, Any]]: - """ - Return a list of open PRs. - - :param repo: repo name in the format "organization/repo", e.g., - "cryptokaizen/cmamp" - """ - cmd = f"gh pr list --state 'open' --json id --repo {repo}" - pull_requests = _gh_run_and_get_json(cmd) - return pull_requests - - -def _get_best_workflow_run( - workflow_name: str, - workflow_runs: List[Dict[str, Any]], - *, - preferred_event: Optional[str] = None, -) -> Optional[Dict[str, Any]]: - """ - Pick the best available workflow run: - - If `preferred_event` is specified (e.g., "schedule"), try that first. - - Otherwise, return the most recent success/failure run. - - :param workflow_name: GitHub Actions workflow name - :param workflow_runs: run metadata, sorted most-recent-first - :param preferred_event: trigger type to prioritize (e.g., "schedule") - :return: best-matching run - e.g., - ``` - { - 'conclusion': 'success', - 'status': 'completed', - 'url': 'https://github.com/cryptokaizen/cmamp/actions/runs/8714881296', - 'workflowName': 'Allure fast tests' - } - """ - run_status = None - if preferred_event: - for run in workflow_runs: - if run.get("event") == preferred_event and run["conclusion"] in [ - "success", - "failure", - ]: - run_status = run - break - if run_status is None: - _LOG.warning( - "No '%s' run found for workflow '%s'", - preferred_event, - workflow_name, - ) - if run_status is None: - for run in workflow_runs: - if run["conclusion"] in ["success", "failure"]: - run_status = run - break - return run_status - - -def gh_get_workflows( - repo_name: str, *, sort: bool = True -) -> List[Dict[str, str]]: - """ - Get a list of workflows for a given repo. - - :param repo_name: git repo name in the format "organization/repo", - e.g., "cryptokaizen/cmamp" - :param sort: if True, sort the list of workflow names - :return: list of workflows, e.g., [{"id": "12520125", "name": "Fast - tests"}, {"id": "12520124", "name": "Slow tests"}] - """ - hdbg.dassert_isinstance(repo_name, str) - _LOG.debug(hprint.to_str("repo_name")) - # Get the workflow list. - cmd = f"gh workflow list --json id,name --repo {repo_name}" - workflows = _gh_run_and_get_json(cmd) - workflows = [ - {"id": str(workflow["id"]), "name": workflow["name"]} - for workflow in workflows - ] - # sort workflow by name - if sort: - workflows = sorted(workflows, key=lambda workflow: workflow["name"]) - return workflows - - -def gh_get_workflow_details( - repo_name: str, workflow_id: str, fields: List[str], limit: int -) -> List[Dict[str, Any]]: - """ - Return the stats for a given workflow. - - :param repo_name: git repo name in the format "organization/repo", - e.g., "cryptokaizen/cmamp" - :param workflow_id: workflow id, e.g., "12520125" - :param fields: list of fields to return, e.g., ["workflowName", "status"] - :param limit: number of runs to return - :return: workflow stats - Example output: - ``` - [ - { - "conclusion": "success", - "status": "completed", - "url": "https://github.com/cryptokaizen/cmamp/actions/runs/7757345960", - "workflowName": "Slow tests" - } - ] - ``` - """ - hdbg.dassert_isinstance(repo_name, str) - hdbg.dassert_isinstance(workflow_id, str) - hdbg.dassert_container_type(fields, List, str) - _LOG.debug(hprint.to_str("repo_name workflow_id fields")) - # Fetch the latest `limit` runs for status calculation. - cmd = f""" - gh run list \ - --json {",".join(fields)} \ - --repo {repo_name} \ - --branch master \ - --limit {limit} \ - --workflow "{workflow_id}" - """ - workflow_statuses = _gh_run_and_get_json(cmd) - # We still want to return the statuses even there are less runs than requested. E.g., there is a new workflow with a few runs or there is a workflow that was never run. - hdbg.dassert_eq(len(workflow_statuses), limit, only_warning=True) - _LOG.debug("workflow_statuses=\n%s", workflow_statuses) - return workflow_statuses - - -def gh_get_details_for_all_workflows( - repo_list: List[str], -) -> "pd.DataFrame": # noqa: F821 - """ - Get status for all the workflows. - - :param repo_list: list of repos to get the status for e.g., - ["cryptokaizen/cmamp", "cryptokaizen/orange"] - :return: a table with the status of all the workflows, e.g., - ``` - Repo workflowName url status - cryptokaizen/cmamp Allure fast tests https://github.com/cryptokaizen/cmamp/actions/... completed - cryptokaizen/cmamp Allure slow tests https://github.com/cryptokaizen/cmamp/actions/... completed - ``` - """ - import pandas as pd - - # TODO(Grisha): expose cols to the interface, i.e. a caller decides what to do. - gh_cols = ["workflowName", "url", "status", "conclusion", "event"] - # Import locally in order not to introduce external dependencies to the lib. - repo_dfs = [] - for repo_name in repo_list: - # Get all workflows for the given repo. - workflows = gh_get_workflows(repo_name) - # For each workflow find the last run. - for workflow in workflows: - # Get at least a few runs to compute the status; this is useful when - # the latest run is not completed, in this case the run before the - # latest one tells the status for a workflow. - limit = 10 - workflow_id = workflow["id"] - workflow_name = workflow["name"] - workflow_statuses = gh_get_workflow_details( - repo_name, workflow_id, gh_cols, limit - ) - if len(workflow_statuses) < limit: - # TODO(Grisha): should we just insert empty rows as placeholders so that - # we know that such workflows exist? - _LOG.warning( - "Not enough runs to compute status for '%s', repo '%s', skipping the workflow", - workflow_name, - repo_name, - ) - continue - # Get the latest successful or failed workflow run (prioritize scheduled run if available). - SCHEDULED_WORKFLOWS = { - "Gitleaks Scan", - } - preferred_event = ( - "schedule" if workflow_name in SCHEDULED_WORKFLOWS else None - ) - workflow_status = _get_best_workflow_run( - workflow_name, workflow_statuses, preferred_event=preferred_event - ) - if workflow_status is None: - _LOG.warning( - "No successful or failed runs found for '%s', repo '%s', skipping the workflow", - workflow_name, - repo_name, - ) - continue - # Access the info of latest workflow run. - workflow_status = pd.DataFrame([workflow_status]) - workflow_status["repo_name"] = repo_name - repo_dfs.append(workflow_status) - # Collect per-repo tables into a single DataFrame. - df = pd.concat(repo_dfs, ignore_index=True) - # Rename the columns. - df = df.drop(columns=["status"]) - df = df.rename(columns={"workflowName": "workflow_name"}) - return df - - -def gh_get_overall_build_status_for_repo( - repo_df: "pd.Dataframe", # noqa: F821 - *, - use_colors: bool = True, -) -> str: - """ - Return the overall status of the workflows for a repo. - - :param repo_df: table with the status of the workflows for a repo - :param use_colors: if True, return the status with colors - :return: overall status of the build for a repo - """ - if use_colors: - hdbg.dassert( - hsystem.is_running_in_ipynb(), - msg="The use_colors option is applicable only when running inside a Jupyter notebook", - ) - # See: https://stackoverflow.com/questions/19746350/how-to-change-color-in-markdown-cells-ipython-jupyter-notebook - failed_status = 'Failed' - success_status = 'Success' - else: - failed_status = "Failed" - success_status = "Success" - if "failure" in repo_df["conclusion"].values: - # The build is failed if at least one workflow is failed. - overall_status = failed_status - else: - overall_status = success_status - return overall_status - - -def gh_get_workflow_type_names( - repo_name: str, *, sort: bool = True -) -> List[str]: - """ - Get a list of workflow names for a given repo. - - :param repo_name: git repo name in the format "organization/repo", - e.g., "cryptokaizen/cmamp" - :param sort: if True, sort the list of workflow names - :return: list of workflow names, e.g., ["Fast tests", "Slow tests"] - """ - hdbg.dassert_isinstance(repo_name, str) - _LOG.debug(hprint.to_str("repo_name")) - # Get the workflow list. - cmd = f"gh workflow list --json name --repo {repo_name}" - workflow_types = _gh_run_and_get_json(cmd) - workflow_names = [workflow["name"] for workflow in workflow_types] - if sort: - workflow_names = sorted(workflow_names) - # Check for duplicate workflow names. - hdbg.dassert_no_duplicates( - workflow_names, - f"Found duplicate workflow names in repo '{repo_name}'", - ) - return workflow_names - - -def gh_get_org_team_names(org_name: str = "", *, sort: bool = True) -> List[str]: - """ - Get a list of team names for a GitHub organization. - - :param org_name: organization name, e.g., "causify-ai". If empty, - infers from the current repo - :param sort: if True, sort team names alphabetically - :return: list of team names (slugs) - Example output: - ``` - ["dev_system", "dev_frontend", "qa_team"] - ``` - """ - org_name = _get_org_name(org_name) - _LOG.debug(hprint.to_str("org_name")) - # Get the team list using GitHub API. - cmd = f"gh api /orgs/{org_name}/teams --paginate" - teams_data = _gh_run_and_get_json(cmd) - # Extract team slugs from the response. - team_names = [team["slug"] for team in teams_data] - # Sort team names if requested. - if sort: - team_names = sorted(team_names) - _LOG.debug("Found %s teams for org '%s'", len(team_names), org_name) - return team_names - - -def gh_get_team_member_names(team_slug: str, *, org_name: str = "") -> List[str]: - """ - Get a list of member usernames for a specific team in a GitHub - organization. - - :param team_slug: team slug (URL-friendly team name), e.g., "dev_system" - :param org_name: organization name, e.g., "causify-ai". If empty, - infers from the current repo - :return: list of member usernames (login names) - Example output: - ``` - ["username1", "username2", "username3"] - ``` - """ - org_name = _get_org_name(org_name) - hdbg.dassert_isinstance(team_slug, str) - _LOG.debug(hprint.to_str("org_name team_slug")) - # Get the team members using GitHub API. - cmd = f"gh api /orgs/{org_name}/teams/{team_slug}/members --paginate" - members_data = _gh_run_and_get_json(cmd) - # Extract usernames from the response. - usernames = [member["login"] for member in members_data] - _LOG.debug( - "Found %s members in team '%s' (org: '%s')", - len(usernames), - team_slug, - org_name, - ) - return usernames - - -def make_clickable(url: str) -> str: - """ - Wrap a URL as an HTML anchor tag. - - :param url: URL to wrap (e.g., "https://github.com/causify-ai/cmamp/actions/...") - :return: HTML anchor string that makes the URL clickable in rendered Markdown - """ - anchor = f'{url}' - return anchor - - -def color_format(val: str, status_color_mapping: Dict[str, str]) -> str: - """ - Return a background-color style for DataFrame.style.map based on status. - - :param val: value to evaluate for status-based styling (e.g., - "success" or "failure") - :param status_color_mapping: map status strings to color values, - e.g.: { "success": "green", "failure": "red" } - :return: CSS string to apply as a style, e.g., "background-color: - green" - """ - color = status_color_mapping.get(val, "grey") - style = f"background-color: {color}" - return style - - -def render_repo_workflow_status_table( - workflow_df: "pd.DataFrame", # noqa: F821 - status_color_mapping: Dict[str, str], - timezone: str = "America/New_York", -) -> None: - """ - Render a dashboard summary of workflow statuses grouped by repo. - - :param workflow_df: data with columns ["repo_name", "workflow_name", - "conclusion", "url"] - :param status_color_mapping: color for outcomes {"success": "green", - "failure": "red"} - :param timezone: timezone for timestamp display - """ - import pandas as pd - from IPython.display import Markdown, display - - workflow_df["url"] = workflow_df["url"].apply(make_clickable) - repos = workflow_df["repo_name"].unique() - display(Markdown("## Overall Status")) - current_timestamp = pd.Timestamp.now(tz=timezone) - display(Markdown(f"**Last run: {current_timestamp}**")) - for repo in repos: - repo_df = workflow_df[workflow_df["repo_name"] == repo] - overall_status = gh_get_overall_build_status_for_repo(repo_df) - display(Markdown(f"## {repo}: {overall_status}")) - repo_df = repo_df.drop(columns=["repo_name"]) - display( - repo_df.style.map( - color_format, - status_color_mapping=status_color_mapping, - subset=["conclusion"], - ) - ) - - -def get_workflow_run_ids( - repo_path: str, workflow_id: str, *, older_than_days: Optional[int] = None -) -> List[str]: - """ - Get workflow run IDs, optionally filtering by age. - - :param repo_path: repository path in format "org/repo" - :param workflow_id: GitHub workflow ID - :param older_than_days: if specified, only return runs older than - this many days - :return: list of run IDs - """ - # See GitHub CLI API documentation: https://cli.github.com/manual/gh_api - # We use the -q/--jq option to filter results using jq syntax. - if older_than_days is not None: - # Use jq to filter runs by age directly in the gh api command. - # jq date filtering breakdown: - # - `fromdateiso8601` converts ISO 8601 date to Unix timestamp (seconds since epoch) - # - `now` returns current Unix timestamp - # - Days are converted to seconds (days * 86400 seconds/day) - # - Example: if older_than_days=30, cutoff = now - (30 * 86400) - # Only runs where created_at timestamp < cutoff are selected - cutoff_seconds = older_than_days * 86400 - # Log the cutoff date for debugging. - cutoff_date = datetime.datetime.now( - datetime.timezone.utc - ) - datetime.timedelta(days=older_than_days) - _LOG.debug("Filtering runs created before: %s", cutoff_date.isoformat()) - jq_filter = ( - f".workflow_runs[] | " - f"select((.created_at | fromdateiso8601) < (now - {cutoff_seconds})) | " - f".id" - ) - # WARNING: Using --paginate to fetch all workflow runs can be slow - # for workflows with a large number of runs (e.g., 1000+ runs). - # The GitHub API paginates results, and jq filters each page. - cmd = ( - f"gh api /repos/{repo_path}/actions/workflows/{workflow_id}/runs " - f"--paginate -q '{jq_filter}'" - ) - else: - # Get all run IDs without date filtering. - # Example API output (one ID per line): - # 11758293857 - # 11758293856 - # 11758293855 - cmd = ( - f"gh api /repos/{repo_path}/actions/workflows/{workflow_id}/runs " - "--paginate -q '.workflow_runs[].id'" - ) - # Execute command and parse output. - _, run_ids_output = hsystem.system_to_string(cmd) - run_ids = [ - run_id.strip() - for run_id in run_ids_output.strip().split("\n") - if run_id.strip() - ] - return run_ids - - -@task -def gh_delete_workflow_runs( # type: ignore - ctx, workflow_name, older_than_days=None, dry_run=False, confirmation=True -): - """ - Delete all workflow runs for a given workflow. - - :param workflow_name: name of the workflow to delete runs for - :param older_than_days: only delete runs older than this many days - (optional). If None, delete all runs. Example: - older_than_days=30 deletes runs created more than 30 days ago - :param dry_run: if True, show what would be deleted without actually - deleting - :param confirmation: if True, prompt user for confirmation before - deletion (default: True) - """ - hlitauti.report_task( - txt=hprint.to_str("workflow_name older_than_days dry_run confirmation") - ) - # Convert older_than_days to int if provided (invoke passes strings). - if older_than_days is not None: - older_than_days = int(older_than_days) - hdbg.dassert_lte(1, older_than_days) - # Login. - gh_login(ctx) - # - repo_full_name_with_host, _ = _get_repo_full_name_from_cmd("current") - # Get workflow ID by name. - repo_path = repo_full_name_with_host.replace("github.com/", "") - workflows = gh_get_workflows(repo_path) - workflow_id = None - for workflow in workflows: - if workflow["name"] == workflow_name: - workflow_id = workflow["id"] - break - if not workflow_id: - available_workflows = [w["name"] for w in workflows] - raise ValueError( - f"Workflow '{workflow_name}' not found. " - f"Available workflows: {available_workflows}" - ) - _LOG.info("Found workflow '%s' with ID: %s", workflow_name, workflow_id) - # Get all run IDs for this workflow, optionally filtering by date. - run_ids = get_workflow_run_ids( - repo_path, workflow_id, older_than_days=older_than_days - ) - # Check if any runs were found. - age_filter_msg = ( - f" older than {older_than_days} days" - if older_than_days is not None - else "" - ) - if not run_ids: - _LOG.info( - "No workflow runs%s found for '%s'", age_filter_msg, workflow_name - ) - return - _LOG.info("Found %d workflow runs%s to delete", len(run_ids), age_filter_msg) - # Prompt for confirmation if required. - if confirmation and not dry_run: - confirmation_msg = ( - f"\nAre you sure you want to delete {len(run_ids)} workflow run(s)" - f"{age_filter_msg} for '{workflow_name}'?\n" - f"Repository: {repo_full_name_with_host}\n" - f"Type 'yes' or 'y' to confirm: " - ) - user_input = input(confirmation_msg).strip().lower() - if user_input not in ("yes", "y"): - _LOG.info("Deletion cancelled by user") - return - _LOG.info("User confirmed deletion, proceeding...") - # Delete each run. - deleted_count = 0 - failed_count = 0 - for run_id in run_ids: - try: - cmd = f"gh api -X DELETE /repos/{repo_path}/actions/runs/{run_id}" - _LOG.info("Deleting run %s", run_id) - hlitauti.run(ctx, cmd, dry_run=dry_run) - deleted_count += 1 - except (invexc.UnexpectedExit, RuntimeError) as e: - _LOG.error("Failed to delete run %s: %s", run_id, str(e)) - failed_count += 1 - _LOG.info( - "Deletion complete: %d successful, %d failed out of %d total runs", - deleted_count, - failed_count, - len(run_ids), - ) - - -# ############################################################################# - -# def gh_get_pr_title(pr_url: str) -> str: -# > gh pr view https://github.com/causify-ai/helpers/pull/754 --json title -q .title -# HelpersTask705_Extend_coverage_in_pytest_to_cover_when_we_run_through_system diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py deleted file mode 100644 index b77bb125d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py +++ /dev/null @@ -1,1500 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_git as hlitagit -""" - -import logging -import os -import re -import stat -import subprocess -import time -from typing import Any, List - -from invoke.tasks import task - -import helpers.hdbg as hdbg -import helpers.hsystem as hsystem - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.lib_tasks_gh as hlitagh -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# Bits matching `chmod a+w` / `chmod a-w` on the symlink inode (not the target). -_SYMLINK_WRITE_BITS = stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH - - -def _collect_symlinks(dir: str) -> List[str]: - """ - Collect symlink paths under a given directory. - - :param dir: directory to walk - :return: symlink paths under `dir` - """ - out: List[str] = [] - for dirpath, dirnames, filenames in os.walk(dir, topdown=True): - # Skips `.git` directories. Does not follow symlinked directories. - if ".git" in dirnames: - dirnames.remove(".git") - for name in filenames: - path = os.path.join(dirpath, name) - if os.path.islink(path): - out.append(path) - for name in dirnames: - path = os.path.join(dirpath, name) - if os.path.islink(path): - out.append(path) - return out - - -def _add_write_perm_to_symlink(dir: str) -> None: - """ - Add write permission for all on each symlink under the given directory. - - :param dir: directory to walk - """ - _LOG.info("Adding write permission for all on each symlink under %s", dir) - for path in _collect_symlinks(dir): - try: - mode = os.lstat(path).st_mode - os.chmod( - path, - mode | _SYMLINK_WRITE_BITS, - ) - except OSError: - hdbg.dassert( - False, - "Failed to add write permissions to symlink; manual intervention may be needed", - ) - - -def _remove_write_perm_from_symlink(dir: str) -> None: - """ - Remove write permission for all on each symlink under a given directory. - - :param dir: directory to walk - """ - _LOG.info("Removing write permission for all on each symlink under %s", dir) - for path in _collect_symlinks(dir): - if not os.path.exists(path): - _LOG.warning("Skipping broken symlink: %s", path) - continue - try: - mode = os.lstat(path).st_mode - os.chmod( - path, - mode & ~_SYMLINK_WRITE_BITS, - ) - except OSError: - hdbg.dassert( - False, - "Failed to remove write permissions from symlink; manual intervention may be needed", - ) - - -def run_git_recursively(ctx: Any, cmd_: str) -> None: - """ - Execute a git command in the main repository and all submodules. - - :param ctx: Invoke context - :param cmd_: Git command to execute - """ - cmd = cmd_ - hlitauti.run(ctx, cmd) - # Run the same command on all submodules. - cmd = f"git submodule foreach '{cmd_}'" - hlitauti.run(ctx, cmd) - - -@task -def git_pull(ctx): # type: ignore - """ - Pull latest changes from remote for main repo and all submodules. - - Temporarily enables write permissions on symlinks to allow pull operations. - """ - hlitauti.report_task() - # Temporarily grant write access to symlinks needed for pulling. - root_dir = hgit.get_client_root(super_module=False) - _add_write_perm_to_symlink(root_dir) - try: - # Pull with autostash to preserve local changes during pull. - cmd = "git pull --autostash" - run_git_recursively(ctx, cmd) - finally: - # Restore restricted permissions on symlinks after pull completes. - _remove_write_perm_from_symlink(root_dir) - - -@task -def git_fetch_master(ctx): # type: ignore - """ - Fetch master branch from remote without switching to it. - - Updates the local master branch to track the latest remote master without - affecting the current branch. - """ - hlitauti.report_task() - # Fetch remote master directly into local master ref (colon syntax). - cmd = "git fetch origin master:master" - run_git_recursively(ctx, cmd) - - -@task -def git_merge_master( - ctx, - abort_if_not_ff=False, - abort_if_not_clean=True, - skip_fetch=False, - auto_merge=False, # type: ignore -): - """ - Merge `origin/master` into the current branch. - - :param abort_if_not_ff: abort if fast-forward is not possible - :param abort_if_not_clean: abort if the client is not clean - :param skip_fetch: skip fetching master - :param auto_merge: automatically commit and push if merge is - successful - """ - hlitauti.report_task() - # Verify working directory is clean before merging to avoid losing changes. - hgit.is_client_clean(dir_name=".", abort_if_not_clean=abort_if_not_clean) - # Fetch latest master from remote to ensure we merge the latest changes. - if not skip_fetch: - git_fetch_master(ctx) - # Perform merge, optionally restricting to fast-forward only to maintain linear history. - cmd = "git merge master" - if abort_if_not_ff: - cmd += " --ff-only" - hlitauti.run(ctx, cmd) - # Commit and push automatically if merge succeeded and user requested it. - if auto_merge: - _LOG.info("Auto-merge enabled: committing and pushing changes") - cmd = 'git commit -am "Merge master" && git push' - hlitauti.run(ctx, cmd) - - -@task -def git_clean(ctx, fix_perms_=False, dry_run=False): # type: ignore - """ - Clean the repo_short_name and its submodules from artifacts. - - Run `git status --ignored` to see what it's skipped. - """ - hlitauti.report_task(txt=hprint.to_str("dry_run")) - - def _run_all_repos(cmd: str) -> None: - # Use `run(ctx, cmd)` instead of `hsystem.system()` so unit tests can easily mock context. - hlitauti.run(ctx, cmd) - # Also clean submodules to ensure they're included in cleanup. - cmd = f"git submodule foreach '{cmd}'" - hlitauti.run(ctx, cmd) - - # Remove untracked files and directories from main repo and submodules. - git_clean_cmd = "git clean -fd" - if dry_run: - git_clean_cmd += " --dry-run" - # Suppress errors since git clean reports non-fatal warnings. - git_clean_cmd += " >/dev/null 2>&1" - _run_all_repos(git_clean_cmd) - # TODO(*): Add "are you sure?" or a `--force switch` to avoid to cancel by - # mistake. - # Fix permissions on symlinks if requested, then clean any temporary files created. - if fix_perms_: - cmd = "invoke fix_perms" - hlitauti.run(ctx, cmd) - # Remove temporary files that may have been created during permission fix. - _run_all_repos(git_clean_cmd) - # Remove common build artifacts and cache directories. - to_delete = [ - r"*\.pyc", - r"*\.pyo", - r".coverage", - r".DS_Store", - r".ipynb_checkpoints", - r".mypy_cache", - r".pytest_cache", - r".ruff_cache", - r".venv", - r"__pycache__", - r"cfile", - r"tmp.*", - r"*.tmp", - r".*_cache", - "htmlcov", - ] - opts = [f"-name '{opt}'" for opt in to_delete] - opts = " -o ".join(opts) - cmd = f"find . {opts} | sort" - if not dry_run: - cmd += " | xargs rm -rf" - hlitauti.run(ctx, cmd) - - -@task -def git_add_all_untracked(ctx): # type: ignore - """ - Add all untracked files to Git. - """ - hlitauti.report_task() - # cmd = "git add $(git ls-files -o --exclude-standard)" - cmd = "git ls-files -o --exclude-standard -z | xargs -0 git add" - hlitauti.run(ctx, cmd) - - -@task -def git_patch_create( # type: ignore - ctx, mode="diff", modified=False, branch=False, last_commit=False, files="" -): - """ - Create a patch file for the entire repo_short_name client from the base - revision. This script accepts a list of files to package, if specified. - - The parameters `modified`, `branch`, `last_commit` have the same meaning as - in `_get_files_to_process()`. - - :param mode: what kind of patch to create - - "diff": (default) creates a patch with the diff of the files - - "tar": creates a tar ball with all the files - """ - hlitauti.report_task( - txt=hprint.to_str("mode modified branch last_commit files") - ) - _ = ctx - # TODO(gp): Check that the current branch is up to date with master to avoid - # failures when we try to merge the patch. - hdbg.dassert_in( - mode, - ("tar", "diff"), - "Patch mode must be either 'tar' for archives or 'diff' for patches", - ) - # Currently only handles the current submodule (not parent repos). - # TODO(gp): Extend this to handle also nested repos. - super_module = False - git_client_root = hgit.get_client_root(super_module) - hash_ = hgit.get_head_hash(git_client_root, short_hash=True) - # Use timestamp and hash to ensure unique patch filenames across time. - timestamp = hlitauti.get_ET_timestamp() - tag = os.path.basename(git_client_root) - dst_file = f"patch.{tag}.{hash_}.{timestamp}" - if mode == "tar": - dst_file += ".tgz" - elif mode == "diff": - dst_file += ".patch" - else: - hdbg.dfatal("Invalid code path") - _LOG.debug("dst_file=%s", dst_file) - # Show what changes will be included in the patch. - _LOG.info( - "Difference between HEAD and master:\n%s", - hgit.get_summary_files_in_branch("master", dir_name="."), - ) - # Determine which files to include in the patch. - all_ = False - # Allow optional user-specified file subset (can be combined with other selectors). - mutually_exclusive = False - # Filter out directories; patches only work with files. - remove_dirs = True - files_as_list = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files, - mutually_exclusive, - remove_dirs, - ) - _LOG.info("Files to save:\n%s", hprint.indent("\n".join(files_as_list))) - if not files_as_list: - _LOG.warning("Nothing to patch: exiting") - return - files_as_str = " ".join(files_as_list) - # Choose command based on patch format: archive vs diff. - cmd = "" - if mode == "tar": - # Create compressed tar archive of the selected files. - cmd = f"tar czvf {dst_file} {files_as_str}" - cmd_inv = "tar xvzf" - elif mode == "diff": - # Generate diff against various targets for different merge strategies. - opts: str - if modified: - # Only uncommitted changes in working tree. - opts = "HEAD" - elif branch: - # All changes since branch point (includes commits on current branch). - opts = "master..." - elif last_commit: - # Only changes in the most recent commit. - opts = "HEAD^" - else: - raise ValueError( - "You need to specify one among -modified, --branch, " - "--last-commit" - ) - cmd = f"git diff {opts} --binary {files_as_str} >{dst_file}" - cmd_inv = "git apply" - else: - raise ValueError(f"Invalid cmd='{cmd}'") - # Execute the patch creation command. - _LOG.info("Creating the patch into %s", dst_file) - hdbg.dassert_ne( - cmd, - "", - "Patch creation command must not be empty", - ) - _LOG.debug("cmd=%s", cmd) - rc = hsystem.system(cmd, abort_on_error=False) - if not rc: - _LOG.warning("Command failed with rc=%d", rc) - # Provide instructions for applying the patch on different environments. - remote_file = os.path.basename(dst_file) - abs_path_dst_file = os.path.abspath(dst_file) - msg = f""" - # To apply the patch and execute: - > git checkout {hash_} - > {cmd_inv} {abs_path_dst_file} - - # To apply the patch to a remote client: - > export SERVER="server" - > export CLIENT_PATH="~/src" - > scp {dst_file} $SERVER: - > ssh $SERVER 'cd $CLIENT_PATH && {cmd_inv} ~/{remote_file}'" - """ - msg = hprint.dedent(msg) - print(msg) - - -def _filter_git_files_by_type( - file_paths: List[str], - file_extensions: List[str], -) -> List[str]: - """ - Filter files by type for git_files task. - - Unlike linters2 version, this returns a flat list (not a tuple) - and does not separate paired jupytext files. - - :param file_paths: files to filter - :param file_extensions: list of file extensions to include (e.g., ["py", "ipynb", "md"]) - :return: filtered list of files - """ - hdbg.dassert_isinstance(file_extensions, list) - filtered = [] - for f in file_paths: - for ext in file_extensions: - if f.endswith(f".{ext}"): - filtered.append(f) - break - return filtered - - -@task -def git_files( # type: ignore - ctx, - modified=False, - branch=False, - last_commit=False, - file_types="", - pbcopy=False, - only_print_files=False, -): - """ - Report which files are changed in the current branch with respect to master. - - The params have the same meaning as in `_get_files_to_process()`. - - Examples: - > invoke git_files --modified - > invoke git_files --branch --file_types "py,ipynb" - > invoke git_files --last_commit --file_types "py" - - :param file_types: comma-separated list of file extensions to include - - E.g., "py,ipynb,md" - :param only_print_files: only print files without logging headers/footers (default: False) - """ - if not only_print_files: - hlitauti.report_task() - _ = ctx - all_ = False - files = "" - # Use mutually_exclusive=True to enforce exactly one filter mode. - mutually_exclusive = True - remove_dirs = True - files_as_list = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files, - mutually_exclusive, - remove_dirs, - ) - # Parse file_types into a list of extensions. - if file_types: - file_extensions = [ext.strip() for ext in file_types.split(",")] - # Filter by file type. - files_as_list = _filter_git_files_by_type(files_as_list, file_extensions) - else: - # file_types="" means every file, so don't filter. - pass - print("\n".join(sorted(files_as_list))) - # Optionally copy the file list to clipboard for easy pasting. - if not only_print_files: - res = " ".join(files_as_list) - hsystem.to_pbcopy(res, pbcopy) - - -@task -def git_last_commit_files(ctx, pbcopy=True): # type: ignore - """ - Print the status of the files in the previous commit. - - :param pbcopy: save the result into the system clipboard (only on - macOS) - """ - # Display the raw git log output for the latest commit. - cmd = 'git log -1 --name-status --pretty=""' - hlitauti.run(ctx, cmd) - # Parse the files that were actually committed (filtering out deletions if needed). - files = hgit.get_previous_committed_files(".") - txt = "\n".join(files) - print(f"\n# The files modified are:\n{txt}") - # Optionally copy the file list to clipboard for easy pasting into commands. - res = " ".join(files) - hsystem.to_pbcopy(res, pbcopy) - - -@task -def git_roll_amp_forward(ctx): # type: ignore - """ - Update amp submodule pointer to the latest master commit. - - Checks out master in amp, pulls latest changes, updates the parent repo's - submodule pointer, and commits the change. - """ - hlitauti.report_task() - AMP_DIR = "amp" - if os.path.exists(AMP_DIR): - # Update amp submodule to point to the latest master. - cmds = [ - f"cd {AMP_DIR} && git checkout master", - f"cd {AMP_DIR} && git pull", - # Stage the submodule pointer change in the parent repository. - f"git add {AMP_DIR}", - f"git commit -m 'Roll {AMP_DIR} pointer forward'", - "git push", - ] - for cmd in cmds: - hlitauti.run(ctx, cmd) - else: - _LOG.warning("%s does not exist, aborting", AMP_DIR) - - -# TODO(gp): Add git_co(ctx) -# Reuse hgit.git_stash_push() and hgit.stash_apply() -# git stash save your-file-name -# git checkout master -# # do whatever you had to do with master -# git checkout staging -# git stash pop - - -# ############################################################################# -# Branches workflows -# ############################################################################# - - -# TODO(gp): Consider renaming the commands as `git_branch_*` - - -@task -def git_branch_files(ctx): # type: ignore - """ - Report which files were added, changed, and modified in the current branch - with respect to master. - - This is a more detailed version of `invoke git_files --branch`, showing file - statuses (added, modified, deleted) rather than just the file list. - """ - hlitauti.report_task() - _ = ctx - # Display the detailed summary of changes made on this branch. - print( - "Difference between HEAD and master:\n" - + hgit.get_summary_files_in_branch("master", dir_name=".") - ) - - -@task -def git_branch_create( # type: ignore - ctx, - branch_name="", - issue_id=0, - repo_short_name="current", - suffix="", - only_branch_from_master=True, - check_branch_name=True, -): - """ - Create and push upstream branch `branch_name` or the one corresponding to - `issue_id` in repo_short_name `repo_short_name`. - - E.g., - ``` - > git checkout -b LemTask169_Get_GH_actions - > git push --set- upstream origin LemTask169_Get_GH_actions - ``` - - :param branch_name: name of the branch to create (e.g., - `LemTask169_Get_GH_actions`) - :param issue_id: use the canonical name for the branch corresponding to that - issue - :param repo_short_name: name of the GitHub repo_short_name that the `issue_id` - belongs to - - "current" (default): the current repo_short_name - - short name (e.g., "amp", "lm") of the branch - :param suffix: suffix (e.g., "02") to add to the branch name when using issue_id - :param only_branch_from_master: only allow to branch from master - :param check_branch_name: make sure the name of the branch is valid like - `{Amp,...}TaskXYZ_...` - """ - hlitauti.report_task() - if issue_id > 0: - # Convert GitHub issue ID to branch name. - hdbg.dassert_eq( - branch_name, - "", - "Cannot specify both --issue and --branch_name; choose one", - ) - title, _ = hlitagh._get_gh_issue_title(issue_id, repo_short_name) - branch_name = title - _LOG.info( - "Issue %d in %s repo_short_name corresponds to '%s'", - issue_id, - repo_short_name, - branch_name, - ) - if suffix != "": - # Add the suffix. - _LOG.debug("Adding suffix '%s' to '%s'", suffix, branch_name) - if suffix[0] in ("-", "_"): - _LOG.warning( - "Suffix '%s' should not start with '%s': removing", - suffix, - suffix[0], - ) - suffix = suffix.rstrip("-_") - branch_name += "_" + suffix - _LOG.info("branch_name='%s'", branch_name) - hdbg.dassert_ne( - branch_name, - "", - "Branch name cannot be empty", - ) - if check_branch_name: - # Reject numeric-only branch names to avoid confusion with commit SHAs. - m = re.match(r"^\d+$", branch_name) - hdbg.dassert( - not m, - "Branch names with only numbers are invalid", - ) - # Enforce naming convention `{RepoPrefix}TaskXYZ_Description` for consistency. - # The valid format of a branch name is `AmpTask1903_Implemented_system_...`. - m = re.match(r"^\S+Task\d+_\S+$", branch_name) - hdbg.dassert( - m, - "Branch name must follow convention: '{RepoPrefix,Amp,...}TaskXYZ_...'", - ) - # Prevent accidental duplicate branches. - hdbg.dassert( - not hgit.does_branch_exist(branch_name, mode="all"), - "Branch '%s' already exists", - branch_name, - ) - # Make sure we are branching from `master`, unless that's what the user wants. - # TODO(Vlad): Remove before merging - temporarily allowing branching from non-master. - curr_branch = hgit.get_branch_name() - if curr_branch != "master": - if only_branch_from_master: - _LOG.warning( - f"Branching from '{curr_branch}' instead of 'master'. " - "This is temporarily allowed but should be reviewed before merging." - ) - # hdbg.dfatal( - # f"You should branch from master and not from '{curr_branch}'" - # ) - # Fetch master. - cmd = "git pull --autostash --rebase" - hlitauti.run(ctx, cmd) - # git checkout -b LmTask169_Get_GH_actions_working_on_lm - cmd = f"git checkout -b {branch_name}" - hlitauti.run(ctx, cmd) - cmd = f"git push --set-upstream origin {branch_name}" - hlitauti.run(ctx, cmd) - - -# TODO(gp): @all Move to hgit. -def _delete_branches(ctx: Any, tag: str, confirm_delete: bool) -> None: - """ - Delete branches that have been merged into master. - - :param ctx: Invoke context - :param tag: Either "local" for local branches or "remote" for remote branches - :param confirm_delete: If True, ask user for confirmation before deleting - """ - if tag == "local": - # Delete local branches that are already merged into master. - # > git branch --merged - # * AmpTask1251_Update_GH_actions_for_amp_02 - find_cmd = r"git branch --merged master | grep -v master | grep -v \*" - delete_cmd = "git branch -d" - elif tag == "remote": - # Get the branches to delete. - find_cmd = ( - "git branch -r --merged origin/master" - + r" | grep -v master | sed 's/origin\///'" - ) - delete_cmd = "git push origin --delete" - else: - raise ValueError(f"Invalid tag='{tag}'") - # TODO(gp): Use system_to_lines - _, txt = hsystem.system_to_string(find_cmd, abort_on_error=False) - branches = hsystem.text_to_list(txt) - # Print info. - _LOG.info( - "There are %d %s branches to delete:\n%s", - len(branches), - tag, - "\n".join(branches), - ) - if not branches: - # No branch to delete, then we are done. - return - # Ask whether to continue. - if confirm_delete: - hsystem.query_yes_no( - hdbg.WARNING + f": Delete these {tag} branches?", abort_on_no=True - ) - for branch in branches: - cmd_tmp = f"{delete_cmd} {branch}" - hlitauti.run(ctx, cmd_tmp) - - -@task -def git_branch_delete_merged(ctx, confirm_delete=True): # type: ignore - """ - Remove (both local and remote) branches that have been merged into master. - """ - hlitauti.report_task() - # Ensure user is on master since we're deleting branches merged into master. - hdbg.dassert_eq( - hgit.get_branch_name(), - "master", - "Must be on master branch to safely delete merged branches", - ) - # - cmd = "git fetch --all --prune" - hlitauti.run(ctx, cmd) - # Delete local and remote branches that are already merged into master. - _delete_branches(ctx, "local", confirm_delete) - _delete_branches(ctx, "remote", confirm_delete) - # - cmd = "git fetch --all --prune" - hlitauti.run(ctx, cmd) - - -@task -def git_branch_rename(ctx, new_branch_name): # type: ignore - """ - Rename current branch both locally and remotely. - """ - hlitauti.report_task() - old_branch_name = hgit.get_branch_name(".") - # Ensure new branch name is actually different to avoid no-op rename. - hdbg.dassert_ne( - old_branch_name, - new_branch_name, - "New branch name must be different from current branch name", - ) - msg = ( - f"Do you want to rename the current branch '{old_branch_name}' to " - f"'{new_branch_name}'" - ) - hsystem.query_yes_no(msg, abort_on_no=True) - # https://stackoverflow.com/questions/30590083 - # Rename the local branch to the new name. - # > git branch -m - cmd = f"git branch -m {new_branch_name}" - hlitauti.run(ctx, cmd) - # Delete the old branch on remote. - # > git push --delete - cmd = f"git push origin --delete {old_branch_name}" - hlitauti.run(ctx, cmd) - # Prevent Git from using the old name when pushing in the next step. - # Otherwise, Git will use the old upstream name instead of . - # > git branch --unset-upstream - cmd = f"git branch --unset-upstream {new_branch_name}" - hlitauti.run(ctx, cmd) - # Push the new branch to remote. - # > git push - cmd = f"git push origin {new_branch_name}" - hlitauti.run(ctx, cmd) - # Reset the upstream branch for the new_name local branch. - # > git push -u - cmd = f"git push origin u {new_branch_name}" - hlitauti.run(ctx, cmd) - print("Done") - - -@task -def git_branch_next_name(ctx, branch_name=None, method="auto"): # type: ignore - """ - Return a name derived from the current branch so that the branch doesn't - exist. - - :param branch_name: if `None` use the current branch name, otherwise specify it - :param method: method to use ('auto', 'github_api', 'linear_scan') - - 'auto' (default): tries GitHub API first, falls back to linear scan - - 'github_api': use only GitHub API method (fast) - - 'linear_scan': use only linear scan method (always works) - - E.g., `AmpTask1903_Implemented_system_Portfolio` -> - `AmpTask1903_Implemented_system_Portfolio_3` - """ - hlitauti.report_task() - _ = ctx - branch_next_name = hgit.get_branch_next_name( - curr_branch_name=branch_name, method=method, log_verb=logging.INFO - ) - print(f"branch_next_name='{branch_next_name}'") - - -@task -def git_branch_copy( # type: ignore - ctx, - new_branch_name="", - skip_git_merge_master=False, - use_patch=False, - check_branch_name=True, -): - """ - Create a new branch with the same content of the current branch. - - :param new_branch_name: name for the new branch - :param skip_git_merge_master: skip merging master into current branch - :param use_patch: apply patching instead of merging - :param check_branch_name: enforce branch naming convention like - `{Amp,...}TaskXYZ_...` - """ - # Patch-based copying is not yet implemented. - hdbg.dassert( - not use_patch, - "Patch-based branch copying is not yet implemented", - ) - # Remove untracked files to ensure clean state when copying branch. - cmd = "git clean -fd" - hlitauti.run(ctx, cmd) - curr_branch_name = hgit.get_branch_name() - # Cannot copy master branch since it would be copying the source to itself. - hdbg.dassert_ne( - curr_branch_name, - "master", - "Cannot copy master branch", - ) - # Sync with master first to ensure new branch includes latest changes (if requested). - if not skip_git_merge_master: - cmd = "invoke git_merge_master --abort-if-not-ff" - hlitauti.run(ctx, cmd) - else: - _LOG.warning("Skipping git_merge_master as requested") - if use_patch: - # TODO(gp): Create a patch or do a `git merge`. - pass - # Generate unique branch name if not provided. - if new_branch_name is None or new_branch_name == "": - new_branch_name = hgit.get_branch_next_name() - _LOG.info("new_branch_name='%s'", new_branch_name) - hdbg.dassert_ne( - new_branch_name, - None, - "Branch name must not be None after generation", - ) - # Allow scratch branches to bypass naming convention. - if new_branch_name.startswith("gp_scratch"): - check_branch_name = False - # Create or checkout the target branch. - mode = "all" - new_branch_exists = hgit.does_branch_exist(new_branch_name, mode) - if new_branch_exists: - # Switch to existing branch to copy changes into it. - cmd = f"git checkout {new_branch_name}" - else: - # Create new branch from master as base. - cmd = f"git checkout master && invoke git_branch_create -b '{new_branch_name}'" - if not check_branch_name: - cmd += " --no-check-branch-name" - hlitauti.run(ctx, cmd) - if use_patch: - # TODO(gp): Apply the patch. - pass - # Squash merge copies all commits as a single change without creating a merge commit. - cmd = f"git merge --squash --ff {curr_branch_name} && git reset HEAD" - hlitauti.run(ctx, cmd) - - -# /////////////////////////////////////////////////////////////////////////////// - - -def _git_diff_with_branch( - ctx: Any, - hash_: str, - tag: str, - # - dir_name: str, - subdir: str, - # - diff_type: str, - keep_extensions: str, - skip_extensions: str, - file_name: str, - # - only_print_files: bool, - dry_run: bool, -) -> None: - """ - Diff files from this client against files in a branch using vimdiff. - - Same parameters as `git_branch_diff_with`. - """ - _LOG.debug( - hprint.to_str( - "hash_ tag dir_name diff_type subdir keep_extensions skip_extensions" - " file_name only_print_files dry_run" - ) - ) - # Diff only works on non-master branches to avoid comparing with itself. - curr_branch_name = hgit.get_branch_name() - hdbg.dassert_ne( - curr_branch_name, - "master", - "Cannot diff master branch against itself", - ) - # Retrieve the list of changed files between current state and the given hash. - cmd = [] - cmd.append("git diff") - if diff_type: - cmd.append(f"--diff-filter={diff_type}") - cmd.append(f"--name-only HEAD {hash_}") - cmd = " ".join(cmd) - files = hsystem.system_to_files( - cmd, dir_name, remove_files_non_present=False - ) - files = sorted(files) - _LOG.debug("%s", "\n".join(files)) - # Filter to a single specific file if requested. - if file_name: - _LOG.debug("Filter by file_name") - _LOG.info("Before filtering files=%s", len(files)) - files_tmp = [] - for f in files: - if f == file_name: - files_tmp.append(f) - hdbg.dassert_eq( - 1, - len(files_tmp), - "Can't find file_name='%s' in\n%s", - file_name, - "\n".join(files), - ) - files = files_tmp - _LOG.info("After filtering by file_name: files=%s", len(files)) - _LOG.debug("%s", "\n".join(files)) - # Keep only files with specified extensions (useful for focusing on code vs docs). - if keep_extensions: - _LOG.debug("# Filter by keep_extensions") - _LOG.debug("Before filtering files=%s", len(files)) - extensions_lst = keep_extensions.split(",") - _LOG.warning( - "Keeping files with %d extensions: %s", - len(extensions_lst), - extensions_lst, - ) - files_tmp = [] - for f in files: - if any(f.endswith(ext) for ext in extensions_lst): - files_tmp.append(f) - files = files_tmp - _LOG.info("After filtering by keep_extensions: files=%s", len(files)) - _LOG.debug("%s", "\n".join(files)) - # Exclude files with specified extensions (useful for skipping config or build files). - if skip_extensions: - _LOG.debug("# Filter by skip_extensions") - _LOG.debug("Before filtering files=%s", len(files)) - extensions_lst = skip_extensions.split(",") - _LOG.warning( - "Skipping files with %d extensions: %s", - len(extensions_lst), - extensions_lst, - ) - files_tmp = [] - for f in files: - if not any(f.endswith(ext) for ext in extensions_lst): - files_tmp.append(f) - files = files_tmp - _LOG.info("After filtering by skip_extensions: files=%s", len(files)) - _LOG.debug("%s", "\n".join(files)) - # Limit diff to files within a specific subdirectory. - if subdir != "": - _LOG.debug("# Filter by subdir") - _LOG.debug("Before filtering files=%s", len(files)) - files_tmp = [] - for f in files: - if f.startswith(subdir): - files_tmp.append(f) - files = files_tmp - _LOG.info("After filtering by subdir: files=%s", len(files)) - _LOG.debug("%s", "\n".join(files)) - # Summary of what will be diffed. - _LOG.info("\n" + hprint.frame(f"# files={len(files)}")) - _LOG.info("\n" + "\n".join(files)) - if len(files) == 0: - _LOG.warning("No files match the filter criteria: exiting") - return - if only_print_files: - _LOG.warning("Exiting as per user request with --only-print-files") - return - # Create temporary directory to store base versions for comparison. - root_dir = hgit.get_repo_full_name_from_client(super_module=True) - # TODO(gp): We should get a temp dir. - dst_dir = f"/tmp/{root_dir}/tmp.{tag}" - hio.create_dir(dst_dir, incremental=False) - # Build vimdiff commands for each file, retrieving base version from source hash. - script_txt = [] - for branch_file in files: - _LOG.debug("\n%s", hprint.frame(f"branch_file={branch_file}")) - # Use current file as right side (what the branch currently has). - if os.path.exists(branch_file): - right_file = branch_file - else: - # For deleted files, use /dev/null as the right side. - right_file = "/dev/null" - # Flatten directory structure to avoid naming conflicts in temp directory. - tmp_file = branch_file - tmp_file = tmp_file.replace("/", "_") - tmp_file = os.path.join(dst_dir, tmp_file) - _LOG.debug( - "Extracting base version of %s to %s", - branch_file, - tmp_file, - ) - # Extract the base version from the specified hash/branch. - cmd = f"git show {hash_}:{branch_file} >{tmp_file}" - rc = hsystem.system(cmd, abort_on_error=False) - if rc != 0: - # File is new in the branch (didn't exist in base hash). - _LOG.debug("File '%s' is new (doesn't exist in base)", branch_file) - left_file = "/dev/null" - else: - left_file = tmp_file - # Generate vimdiff command to compare base and current versions. - cmd = f"vimdiff {left_file} {right_file}" - _LOG.debug("-> %s", cmd) - script_txt.append(cmd) - script_txt = "\n".join(script_txt) - # Display the diff commands that will be executed. - _LOG.info("\n%s" % hprint.frame("Diffing script")) - _LOG.info(script_txt) - # Create executable script for easy manual re-running. - script_file_name = f"./tmp.vimdiff_branch_with_{tag}.sh" - msg = f"To diff against {tag} run" - hio.create_executable_script(script_file_name, script_txt, msg=msg) - hlitauti.run(ctx, script_file_name, dry_run=dry_run, pty=True) - # Clean up temporary files. - cmd = f"rm -rf {dst_dir}" - hlitauti.run(ctx, cmd, dry_run=dry_run) - - -def _git_diff_with_branch_wrapper( - ctx: Any, - hash_: str, - tag: str, - # - dir_name: str, - subdir: str, - include_submodules: bool, - # - diff_type: str, - keep_extensions: str, - skip_extensions: str, - python: bool, - file_name: str, - # - only_print_files: bool, - dry_run: bool, -) -> None: - """ - Wrapper for _git_diff_with_branch that handles Python-specific filtering and submodules. - - Applies Python-specific extension filter if requested, then delegates to _git_diff_with_branch. - If include_submodules is True, also runs the diff for the amp submodule if present. - - Parameters are the same as _git_diff_with_branch with the addition of: - :param include_submodules: if True, also diff the amp submodule - :param python: if True, only diff Python files (overrides extension filters) - """ - hdbg.dassert_eq(dir_name, ".") - # If Python mode is enabled, override all extension filters to only diff Python files. - if python: - hdbg.dassert_eq( - diff_type, - "", - "Cannot specify diff_type with python mode", - ) - hdbg.dassert_eq( - keep_extensions, - "", - "Cannot specify keep_extensions with python mode", - ) - hdbg.dassert_eq( - skip_extensions, - "", - "Cannot specify skip_extensions with python mode", - ) - hdbg.dassert_eq( - file_name, - "", - "Cannot specify file_name with python mode", - ) - keep_extensions = "py" - # Diff files in the main repository. - _git_diff_with_branch( - ctx, - hash_, - tag, - dir_name, - subdir, - diff_type, - keep_extensions, - skip_extensions, - file_name, - only_print_files, - dry_run, - ) - # Also diff the amp submodule if it exists and was requested. - if include_submodules: - if hgit.is_amp_present(): - with hsystem.cd("amp"): - _git_diff_with_branch( - ctx, - hash_, - tag, - dir_name, - subdir, - diff_type, - keep_extensions, - skip_extensions, - file_name, - only_print_files, - dry_run, - ) - - -@task -def git_branch_diff_with( # type: ignore - ctx, - target="base", - hash_value="", - # Where to diff. - subdir="", - include_submodules=False, - # What files to diff. - diff_type="", - keep_extensions="", - skip_extensions="", - python=False, - file_name="", - # What actions. - only_print_files=False, - dry_run=False, -): - """ - Diff files of the current branch with master at the branching point. - - :param subdir: subdir to consider for diffing, instead of `.` - :param target: - - `base`: diff with respect to the branching point - - `master`: diff with respect to `origin/master` - - `head`: diff modified files - - `hash`: diff with respect to hash specified in `hash` - :param hash_value: the hash to use with target="hash" - :param include_submodules: run recursively on all submodules - :param diff_type: files to diff using git `--diff-filter` options - :param keep_extensions: a comma-separated list of extensions to check, e.g., - 'csv,py'. An empty string means keep all the extensions - :param skip_extensions: a comma-separated list of extensions to skip, e.g., - 'txt'. An empty string means do not skip any extension - :param only_print_files: print files to diff and exit - :param dry_run: execute diffing script or not - """ - # Determine the comparison target based on user preference. - dir_name = "." - hdbg.dassert_in(target, ("base", "master", "head", "hash"), "Invalid target") - # Resolve target to a specific git hash for consistent diffing. - if target == "base": - # Compare against the point where this branch diverged from master. - hdbg.dassert_eq( - hash_value, - "", - "Cannot specify hash_value when target is 'base'", - ) - hash_value = hgit.get_branch_hash(dir_name=dir_name) - tag = "base" - elif target == "master": - # Compare against the current state of the remote master branch. - hdbg.dassert_eq( - hash_value, - "", - "Cannot specify hash_value when target is 'master'", - ) - hash_value = "origin/master" - tag = "origin_master" - elif target == "head": - # Compare working directory against HEAD (uncommitted changes). - hdbg.dassert_eq( - hash_value, - "", - "Cannot specify hash_value when target is 'head'", - ) - hash_value = "" - tag = "head" - elif target == "hash": - # Compare against a user-specified commit hash. - hdbg.dassert_ne( - hash_value, - "", - "Must provide hash_value when target is 'hash'", - ) - tag = f"hash@{hash_value}" - else: - raise ValueError(f"Invalid target='{target}") - _git_diff_with_branch_wrapper( - ctx, - hash_value, - tag, - # - dir_name, - subdir, - include_submodules, - # - diff_type, - keep_extensions, - skip_extensions, - python, - file_name, - # - only_print_files, - dry_run, - ) - - -@task -def git_repo_copy(ctx, file_name, src_git_dir, dst_git_dir): # type: ignore - """ - Copy the code from the src Git client to the dst Git client. - - :param file_name: the name of the file to copy (which is under - `src_git_dir`) - :param src_git_dir: the directory of the source Git client (e.g., - "/Users/saggese/src/helpers1") - :param dst_git_dir: the directory of the destination Git client (e.g., - "/Users/saggese/src/helpers2") - """ - _ = ctx - src_git_dir = hgit.resolve_git_client_dir(src_git_dir) - dst_git_dir = hgit.resolve_git_client_dir(dst_git_dir) - # Map source file path to equivalent path in destination repository. - dst_file_path = hgit.project_file_name_in_git_client( - file_name, - src_git_dir, - dst_git_dir, - check_src_file_exists=True, - check_dst_file_exists=False, - ) - _LOG.info("Copying code from '%s' to '%s' ...", file_name, dst_git_dir) - # Perform the file copy operation. - hsystem.system_to_string(f"cp {file_name} {dst_file_path}") - - -# ############################################################################# - - -def _get_submodule_paths() -> List[str]: - """ - Get list of submodule paths from .gitmodules file. - - :return: List of submodule directory paths, empty if no submodules - found - """ - gitmodules_path = ".gitmodules" - if not os.path.exists(gitmodules_path): - _LOG.info("No .gitmodules file found") - return [] - # Extract submodule paths from git config using the .gitmodules file. - cmd = "git config --file .gitmodules --get-regexp path" - _, output = hsystem.system_to_string(cmd) - submodule_paths = [] - for line in output.strip().split("\n"): - if line: - # Parse format: "submodule..path " to extract path. - path = line.split(" ", 1)[1] - submodule_paths.append(path) - return submodule_paths - - -def _get_branch_name(submodule_path: str) -> str: - """ - Get the current branch name for a git repository. - - :param submodule_path: Path to the git repository directory - :return: Branch name or error message - """ - hdbg.dassert_dir_exists(submodule_path) - hdbg.dassert_path_exists(os.path.join(submodule_path, ".git")) - # Query git to get the symbolic name of the current HEAD. - cmd = f"cd {submodule_path} && git rev-parse --abbrev-ref HEAD" - _, branch_name = hsystem.system_to_string(cmd) - return branch_name.strip() - - -@task -def git_branches(ctx): # type: ignore - """ - Print the branch name for the main repository and each git submodule - directory. - - Example usage:: - > dev_scripts_helpers/git/print_git_branches.py - . (main): master - submodule1: feature/new-feature - submodule2: develop - submodule3: main - """ - _ = ctx - # Display main repository branch first for clarity. - main_branch = _get_branch_name(".") - print(f". -> {main_branch}") - # List submodule branches to detect if any are out of sync. - submodule_paths = _get_submodule_paths() - if not submodule_paths: - _LOG.debug("No git submodules found in this repository") - return - # Report branch for each submodule. - for path in submodule_paths: - branch_name = _get_branch_name(path) - print(f"{path} -> {branch_name}") - - -@task -def git_branch_is_merged(ctx): # type: ignore - """ - Check if the current branch was merged into master using GitHub API and git. - - Uses GitHub API to check for open/closed PRs and git to verify branch presence on remote. - """ - _ = ctx - hlitauti.report_task() - branch_name = hgit.get_branch_name() - print(f"branch_name='{branch_name}'") - # Check for PRs targeting master from the current branch on GitHub. - cmd = f"gh pr list --base master --head {branch_name}" - ctx.run(cmd, pty=True) - # Verify if the branch still exists on the remote repository. - cmd = f"git ls-remote --heads origin {branch_name}" - ctx.run(cmd, pty=True) - - -@task -def git_backup( - ctx, - file_mode="all", - backup_dir=None, - include_subrepos=True, - dry_run=False, -): # type: ignore - """ - Create a zip file with modified and/or untracked files from the current - repository and optionally its submodules. - - The zip file is created with a timestamp-based name in the specified - backup directory (default: $HOME/src/backups). - Example: `modified_files.helpers_root.20251119_130034.zip` - - :param file_mode: which files to include: "all" (default), "modified", or - "untracked" - :param backup_dir: directory where to save the zip file (default: - $HOME/src/backups) - :param include_subrepos: whether to include submodule files (default: True) - :param dry_run: if True, only print the files that would be included - without creating the zip - """ - hlitauti.report_task( - txt=hprint.to_str("file_mode, backup_dir, include_subrepos, dry_run") - ) - _ = ctx - # Validate backup scope to ensure user intent is clear. - valid_modes = ["all", "modified", "untracked"] - hdbg.dassert_in( - file_mode, - valid_modes, - "Invalid file_mode '%s'; must be one of: %s", - file_mode, - ", ".join(valid_modes), - ) - # Use default backup location if not specified. - if backup_dir is None: - backup_dir = os.path.join(os.path.expanduser("~"), "src", "backups") - hio.create_dir(backup_dir, incremental=True) - # Determine repository name for readable backup file naming. - super_module = False - git_client_root = hgit.get_client_root(super_module) - # Include timestamp to avoid overwriting previous backups. - timestamp = hlitauti.get_ET_timestamp() - repo_name = os.path.basename(git_client_root) - zip_file_name = f"modified_files.{repo_name}.{timestamp}.zip" - # Collect files from the main repository. - _LOG.info("Collecting %s files from main repository...", file_mode) - main_repo_files = hgit.get_modified_and_untracked_files(".", mode=file_mode) - _LOG.info("Found %d files in main repository", len(main_repo_files)) - all_files = [] - for file_path in main_repo_files: - all_files.append((".", file_path)) - # Also include submodule files if requested to ensure complete backup. - if include_subrepos: - submodule_paths = _get_submodule_paths() - if submodule_paths: - _LOG.info( - "Found %d submodule(s), collecting files...", - len(submodule_paths), - ) - for submodule_path in submodule_paths: - hdbg.dassert_dir_exists( - submodule_path, - msg=f"Submodule path does not exist: {submodule_path}", - ) - _LOG.info("Checking submodule: %s", submodule_path) - submodule_files = hgit.get_modified_and_untracked_files( - submodule_path, mode=file_mode - ) - _LOG.info( - "Found %d files in submodule %s", - len(submodule_files), - submodule_path, - ) - for file_path in submodule_files: - all_files.append((submodule_path, file_path)) - else: - _LOG.info("No submodules found") - else: - _LOG.info("Skipping submodules (include_subrepos=False)") - # Verify there's content to backup before proceeding. - if not all_files: - _LOG.warning("No %s files found. Nothing to zip.", file_mode) - return - # Display summary of what will be backed up. - _LOG.info( - "\n%s\nFound %d total files to include:\n%s", - hprint.frame("Files to include in zip"), - len(all_files), - hprint.indent( - "\n".join( - [ - ( - os.path.join(repo_path, file_path) - if repo_path != "." - else file_path - ) - for repo_path, file_path in all_files - ] - ) - ), - ) - if dry_run: - _LOG.warning("Dry-run mode: not creating zip file") - return - # Create zip file with all collected files. - zip_file_path = os.path.join(backup_dir, zip_file_name) - _LOG.info("Creating zip file: %s", zip_file_path) - import zipfile - - with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zipf: - for repo_path, file_path in all_files: - full_path = os.path.join(repo_path, file_path) - # Maintain directory hierarchy in archive for easy restoration. - arcname = ( - os.path.join(repo_path, file_path) - if repo_path != "." - else file_path - ) - try: - zipf.write(full_path, arcname=arcname) - _LOG.debug("Added to zip: %s", arcname) - except Exception as e: - _LOG.warning("Failed to add %s to zip: %s", full_path, e) - _LOG.info("Successfully created zip file: %s", zip_file_path) - # Display location for easy access. - abs_zip_path = os.path.abspath(zip_file_path) - print(f"\nZip file created at: {abs_zip_path}") - - -@task -def gh_watch(ctx, *, interval=60): # type: ignore - """ - Watch GitHub workflow status with periodic updates. - - Runs `invoke gh_workflow_list` every N seconds using the `watch` command. - If running in tmux, temporarily renames the window to "*GH_WATCH*" for - visibility and restores it on exit. - - :param interval: Update interval in seconds - """ - hlitauti.report_task() - # Check if running inside tmux and save original window name. - old_pane_title = None - if os.environ.get("TMUX"): - _LOG.info("Running in tmux, saving window name") - _, old_pane_title = hsystem.system_to_one_line( - "tmux display-message -p '#W'" - ) - _LOG.info("Original window name: %s", old_pane_title) - # Rename window to indicate we're watching workflows. - hsystem.system("tmux rename-window '*GH_WATCH*'") - try: - # Watch workflows by repeatedly running gh_workflow_list. - while True: - # Clear screen before displaying updated workflow status. - subprocess.run("clear; invoke gh_workflow_list", shell=True) - _LOG.info("Sleeping for %d seconds before next update", interval) - time.sleep(interval) - finally: - # Restore original tmux window name if it was changed. - if old_pane_title is not None: - _LOG.info("Restoring window name: %s", old_pane_title) - hsystem.system(f"tmux rename-window '{old_pane_title}'") - - -# TODO(gp): Add the following scripts: -# dev_scripts/git/gcl -# dev_scripts/git/git_branch.sh -# dev_scripts/git/git_branch_point.sh -# dev_scripts/create_class_diagram.sh diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py deleted file mode 100644 index 7f437780b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py +++ /dev/null @@ -1,837 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_integrate as hlitaint -""" - -import datetime -import logging -import os -from typing import List, Optional, Set, Tuple - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.lib_tasks_gh as hlitagh -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - - -DEFAULT_SRC_DIR_BASENAME = "cmamp1" -DEFAULT_DST_DIR_BASENAME = "kaizenflow1" - -# DEFAULT_SRC_DIR_BASENAME="amp1" -# DEFAULT_DST_DIR_BASENAME="cmamp1" - - -def _dassert_current_dir_matches(expected_dir_basename: str) -> None: - """ - Ensure that the name of the current dir is the one expected. - - E.g., `/Users/saggese/src/cmamp1` is a valid dir for an integration - branch for `cmamp1`. - """ - _LOG.debug(hprint.to_str("expected_dir_basename")) - # Get the basename of the current dir. - curr_dir_basename = os.path.basename(os.getcwd()) - # Check that it's what is expected. - hdbg.dassert_eq( - curr_dir_basename, - expected_dir_basename, - "The current dir '%s' doesn't match the expected dir '%s'", - curr_dir_basename, - expected_dir_basename, - ) - - -# TODO(gp): -> _dassert_is_integration_dir -def _dassert_is_integration_branch(abs_dir: str) -> None: - """ - Ensure that the branch in `abs_dir` is a valid integration or lint branch. - - E.g., `AmpTask1786_Integrate_20220402` is a valid integration - branch. - """ - _LOG.debug(hprint.to_str("abs_dir")) - branch_name = hgit.get_branch_name(dir_name=abs_dir) - hdbg.dassert_ne(branch_name, "master") - hdbg.dassert( - ("_Integrate_" in branch_name) or ("_Lint_" in branch_name), - "Invalid branch_name='%s' in abs_dir='%s'", - branch_name, - abs_dir, - ) - - -def _clean_both_integration_dirs(abs_dir1: str, abs_dir2: str) -> None: - """ - Run `i git_clean` on the passed dirs. - - :param abs_dir1, abs_dir2: full paths of the dirs to clean - """ - _LOG.debug(hprint.to_str("abs_dir1 abs_dir2")) - # - cmd = f"cd {abs_dir1} && invoke git_clean" - hsystem.system(cmd) - # - cmd = f"cd {abs_dir2} && invoke git_clean" - hsystem.system(cmd) - - -@task -def integrate_create_branch(ctx, dir_basename, dry_run=False): # type: ignore - """ - Create the branch for integration of `dir_basename` (e.g., amp1) in the - current dir. - - :param dir_basename: specify the dir name (e.g., `amp1`) to ensure the set-up is - correct. - """ - hlitauti.report_task() - # Check that the current dir has the name `dir_basename`. - _dassert_current_dir_matches(dir_basename) - # Login in GitHub. - hlitagh.gh_login(ctx) - # Create the integration branch with the current date, e.g., - # `AmpTask1786_Integrate_20211231`. - date = datetime.datetime.now().date() - date_as_str = date.strftime("%Y%m%d") - branch_name = f"AmpTask1786_Integrate_{date_as_str}" - # query_yes_no("Are you sure you want to create the branch ") - _LOG.info("Creating branch '%s'", branch_name) - cmd = f"invoke git_branch_create -b '{branch_name}'" - hlitauti.run(ctx, cmd, dry_run=dry_run) - - -# ############################################################################# - - -def _resolve_src_dst_names( - src_dir_basename: str, - dst_dir_basename: str, - subdir: str, - *, - check_exists: bool = True, -) -> Tuple[str, str]: - """ - Return the full path of `src_dir_basename` and `dst_dir_basename`. - - :param src_dir_basename: the current dir (e.g., `amp1`) - :param dst_dir_basename: a dir parallel to the current one (`cmamp1`) - :param check_exists: check that the dst dir exists - - :return: absolute paths of both directories - """ - curr_parent_dir = os.path.dirname(os.getcwd()) - # - abs_src_dir = os.path.join(curr_parent_dir, src_dir_basename, subdir) - abs_src_dir = os.path.normpath(abs_src_dir) - hdbg.dassert_dir_exists(abs_src_dir) - # - abs_dst_dir = os.path.join(curr_parent_dir, dst_dir_basename, subdir) - abs_dst_dir = os.path.normpath(abs_dst_dir) - if check_exists: - hdbg.dassert_dir_exists(abs_dst_dir) - return abs_src_dir, abs_dst_dir - - -@task -def integrate_diff_dirs( # type: ignore - ctx, - src_dir_basename=DEFAULT_SRC_DIR_BASENAME, - dst_dir_basename=DEFAULT_DST_DIR_BASENAME, - reverse=False, - subdir="", - copy=False, - use_linux_diff=False, - check_branches=True, - clean_branches=True, - remove_usual=False, - run_diff_script=True, - dry_run=False, -): - """ - Integrate repos from dirs `src_dir_basename` to `dst_dir_basename` by diffing - or copying all the files with differences. - - ``` - # Use the default values for src / dst dirs to represent the usual set-up. - > i integrate_diff_dirs \ - --src-dir-basename amp1 \ - --dst-dir-basename cmamp1 \ - --subdir . - ``` - - :param src_dir_basename: dir with the source branch (e.g., amp1) - :param dst_dir_basename: dir with the destination branch (e.g., cmamp1) - :param reverse: switch the roles of the default source and destination branches - :param subdir: filter to the given subdir for both dirs (e.g., - `src_dir_basename/subdir` and `dst_dir_basename/subdir`) - :param copy: copy the files instead of diffing - :param use_linux_diff: use Linux `diff` instead of `diff_to_vimdiff.py` - :param remove_usual: remove the usual mismatching files (e.g., `.github`) - :param run_diff_script: run the diff script - :param dry_run: do not execute the commands - """ - _ = ctx - hlitauti.report_task() - if reverse: - src_dir_basename, dst_dir_basename = dst_dir_basename, src_dir_basename - _LOG.warning( - "Reversing dirs: %s", - hprint.to_str2(src_dir_basename, dst_dir_basename), - ) - # Check that the integration branches are in the expected state. - # _dassert_current_dir_matches(src_dir_basename) - # When we integrate a dir that doesn't exist in the dst branch, we need to - # skip the check for existence. - check_exists = False - abs_src_dir, abs_dst_dir = _resolve_src_dst_names( - src_dir_basename, dst_dir_basename, subdir, check_exists=check_exists - ) - hio.create_dir(abs_dst_dir, incremental=True) - if check_branches: - _dassert_is_integration_branch(abs_src_dir) - _dassert_is_integration_branch(abs_dst_dir) - else: - _LOG.warning("Skipping integration branch check") - # Clean branches if needed. - if clean_branches: - # We can clean up only the root dir. - if subdir == "": - _clean_both_integration_dirs(abs_src_dir, abs_dst_dir) - else: - _LOG.warning("Skipping integration branch cleaning") - # Copy or diff dirs. - _LOG.info("abs_src_dir=%s", abs_src_dir) - _LOG.info("abs_dst_dir=%s", abs_dst_dir) - hdbg.dassert_ne(abs_src_dir, abs_dst_dir) - if copy: - # Copy the files. - if dry_run: - cmd = f"diff -r --brief {abs_src_dir} {abs_dst_dir}" - else: - rsync_opts = "--delete -a" - cmd = f"rsync {rsync_opts} {abs_src_dir}/ {abs_dst_dir}" - else: - # Diff the files. - if use_linux_diff: - cmd = f"diff -r --brief {abs_src_dir} {abs_dst_dir}" - else: - cmd = "diff_to_vimdiff.py" - if run_diff_script: - cmd += " --run_diff_script" - else: - cmd += " --no_run_diff_script" - _LOG.warning("Skipping running diff script") - cmd += f" --dir1 {abs_src_dir} --dir2 {abs_dst_dir}" - if remove_usual: - vals = [ - r"\/\.github\/", - ] - regex = "|".join(vals) - cmd += f" --ignore_files='{regex}'" - # We need to use `system` to get vimdiff to connect to stdin and stdout. - if not dry_run: - # hlitauti.run(ctx, cmd, dry_run=dry_run, print_cmd=True) - os.system(cmd) - - -# ############################################################################# - - -# TODO(gp): Allow to pass the hash of the last integration to consider. -# Factor out the logic to find the hash - -# Sometimes we want to see the changes in one dir since an integration point - -# E.g., find all the changes in `datapull` since the last integration -# -# > git log --oneline datapull -# 77f612f75 SorrIssue244 CCXT timestamp representation unit test (#317) -# 6b981b1f6 Sorrtask298 rename get docker cmd to get docker run cmd (#331) -# bd33a5fb9 SorrTask267_Parquet_to_CSV (#267) -# 9819fd117 AmpTask1786_Integrate_20230518_im (#273) <==== -# d530ed561 Update (#272) -# b75eab7ad AmpTask1786_Integrate_20230518_3 (#271) -# -# > git difftool 9819fd117.. datapull -# ... -# -# > git diff --name-only 9819fd117.. datapull -# datapull/ccxt/data/extract/test/test_ccxt_extractor.py -# datapull/common/data/transform/convert_pq_to_csv.py -# datapull/im_lib_tasks.py -# datapull/test/test_im_lib_tasks.py -# -# for file in datapull/ccxt/data/extract/test/test_ccxt_extractor.py datapull/common/data/transform/convert_pq_to_csv.py datapull/im_lib_tasks.py datapull/test/test_im_lib_tasks.py; do -# vimdiff ~/src/cmamp1/$file ~/src/kaizenflow1/$file -# done - - -def _find_files_touched_since_last_integration( - abs_dir: str, subdir: str -) -> List[str]: - """ - Return the list of files modified since the last integration for `abs_dir`. - - :param abs_dir: directory to cd before executing this script - :param subdir: consider only the files under `subdir` - """ - _LOG.debug(hprint.to_str2(abs_dir)) - dir_basename = os.path.basename(abs_dir) - # TODO(gp): dir_basename can be computed from abs_dir_name to simplify the - # interface. - # Change the dir to the desired one. - old_dir = os.getcwd() - try: - os.chdir(abs_dir) - # Find the hash of all integration commits. - cmd = "git log --date=local --oneline --date-order | grep AmpTask1786_Integrate" - # Remove integrations like "'... Merge branch 'master' into - # AmpTask1786_Integrate_20220113'" - cmd += " | grep -v \"Merge branch 'master' into \"" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug("integration commits=\n%s", txt) - txt = txt.split("\n") - # > git log --date=local --oneline --date-order | grep AmpTask1786_Integrate - # 72a1a101 AmpTask1786_Integrate_20211218 (#1975) - # 2acfd6d7 AmpTask1786_Integrate_20211214 (#1950) - # 318ab0ff AmpTask1786_Integrate_20211210 (#1933) - hdbg.dassert_lte(2, len(txt)) - print(f"# last_integration: '{txt[0]}'") - last_integration_hash = txt[0].split()[0] - print("* " + hprint.to_str("last_integration_hash")) - # Find the first commit after the commit with the last integration. - cmd = f"git log --oneline --reverse --ancestry-path {last_integration_hash}^..master" - _, txt = hsystem.system_to_string(cmd) - print(f"* commits after last integration=\n{txt}") - txt = txt.split("\n") - # > git log --oneline --reverse --ancestry-path 72a1a101^..master - # 72a1a101 AmpTask1786_Integrate_20211218 (#1975) - # 90e90353 AmpTask1955_Lint_20211218 (#1976) - # 4a2b45c6 AmpTask1858_Implement_buildmeister_workflows_in_invoke (#1860) - hdbg.dassert_lte(2, len(txt)) - first_commit_hash = txt[1].split()[0] - _LOG.debug("first_commit: '%s'", txt[1]) - _LOG.debug(hprint.to_str("first_commit_hash")) - # Find all the files touched in each branch. - cmd = f"git diff --name-only {first_commit_hash}..HEAD" - _, txt = hsystem.system_to_string(cmd) - files: List[str] = txt.split("\n") - finally: - os.chdir(old_dir) - _LOG.debug("Files modified since the integration=\n%s", "\n".join(files)) - # Filter files by subdir, if needed. - if subdir: - filtered_files = [] - for file in files: - if file.startswith(subdir): - filtered_files.append(file) - files = filtered_files - # Reorganize the files. - hdbg.dassert_no_duplicates(files) - files = sorted(files) - # Save to file for debugging. - file_name = os.path.join( - f"tmp.integrate_find_files_touched_since_last_integration.{dir_basename}.txt" - ) - hio.to_file(file_name, "\n".join(files)) - _LOG.debug("Saved file to '%s'", file_name) - return files - - -@task -def integrate_find_files_touched_since_last_integration( # type: ignore - ctx, - subdir="", -): - """ - Print the list of files modified since the last integration for this dir. - """ - hlitauti.report_task() - abs_dir = os.getcwd() - _ = ctx - files = _find_files_touched_since_last_integration(abs_dir, subdir) - # Print the result. - tag = "Files modified since the integration" - print(hprint.frame(tag)) - print("\n".join(files)) - - -# ############################################################################# - - -def _integrate_files( - files: Set[str], - abs_left_dir: str, - abs_right_dir: str, - only_different_files: bool, -) -> List[Tuple[str, str, str]]: - """ - Build a list of files to compare based on the pattern. - - :param files: relative path of the files to compare :param - abs_left_dir, abs_right_dir: path of the left / right dir - :param only_different_files: include in the script only the files - that are different - :return: list of files to compare - """ - _LOG.debug(hprint.to_str("abs_left_dir abs_right_dir only_different_files")) - files_to_diff: List[Tuple[str, str, str]] = [] - for file in sorted(list(files)): - _LOG.debug(hprint.to_str("file")) - left_file = os.path.join(abs_left_dir, file) - right_file = os.path.join(abs_right_dir, file) - # Check if both the files exist and are the same. - both_exist = os.path.exists(left_file) and os.path.exists(right_file) - if not both_exist: - # Both files don't exist: nothing to do. - equal: Optional[bool] = False - skip: Optional[bool] = True - else: - # They both exist. - if only_different_files: - # We want to check if they are the same. - try: - equal = hio.from_file(left_file) == hio.from_file(right_file) - except RuntimeError as e: - # RuntimeError: error='utf-8' codec can't decode byte 0xd0 in - # position 10: invalid continuation byte - _LOG.error("Caught error:\n%s", e) - equal = True - skip = equal - else: - # They both exist, and we want to process even if they are the - # same. - equal = None - skip = False - _ = left_file, right_file, both_exist, equal, skip - _LOG.debug(hprint.to_str("left_file right_file both_exist equal skip")) - # Execute the action on the 2 files. - if skip: - _LOG.debug(" Skip %s", file) - else: - _LOG.debug(" -> (%s, %s)", left_file, right_file) - files_to_diff.append((file, left_file, right_file)) - return files_to_diff - - -@task -def integrate_files( # type: ignore - ctx, - src_dir_basename=DEFAULT_SRC_DIR_BASENAME, - dst_dir_basename=DEFAULT_DST_DIR_BASENAME, - reverse=False, - subdir="", - mode="vimdiff", - file_direction="", - only_different_files=True, - check_branches=True, -): - """ - Find and copy the files that are touched only in one branch or in both. - - :param ctx: invoke ctx - :param src_dir_basename: dir with the source branch (e.g., amp1) - :param dst_dir_basename: dir with the destination branch (e.g., cmamp1) - :param reverse: switch the roles of the default source and destination branches - :param subdir: directory to select - :param mode: - - "print_dirs": print the directories - - "vimdiff": diff the files - - "copy": copy the files - :param file_direction: which files to diff / copy: - - "common_files": files touched in both branches - - "union_files": files touched in either branch - - "only_files_in_src": files touched only in the src dir - - "only_files_in_dst": files touched only in the dst dir - :param only_different_files: consider only the files that are different among - the branches - :param check_branches: ensure that the current branches are for integration - and not `master` - """ - hlitauti.report_task() - _ = ctx - if reverse: - src_dir_basename, dst_dir_basename = dst_dir_basename, src_dir_basename - _LOG.warning( - "Reversing dirs: %s", - hprint.to_str2(src_dir_basename, dst_dir_basename), - ) - # Check that the integration branches are in the expected state. - _dassert_current_dir_matches(src_dir_basename) - # We want to stay at the top level dir, since the subdir is handled by - # `integrate_find_files_touched_since_last_integration`. - abs_src_dir, abs_dst_dir = _resolve_src_dst_names( - src_dir_basename, dst_dir_basename, subdir="" - ) - if check_branches: - _dassert_is_integration_branch(abs_src_dir) - _dassert_is_integration_branch(abs_dst_dir) - else: - _LOG.warning("Skipping integration branch check") - # Find the files touched in each branch since the last integration. - src_files = set( - _find_files_touched_since_last_integration(abs_src_dir, subdir) - ) - dst_files = set( - _find_files_touched_since_last_integration(abs_dst_dir, subdir) - ) - # - if file_direction == "common_files": - files = src_files.intersection(dst_files) - elif file_direction == "only_files_in_src": - files = src_files - dst_files - elif file_direction == "only_files_in_dst": - files = dst_files - src_files - elif file_direction == "union_files": - files = src_files.union(dst_files) - else: - raise ValueError(f"Invalid file_direction='{file_direction}'") - # - files_to_diff = _integrate_files( - files, - abs_src_dir, - abs_dst_dir, - only_different_files, - ) - # Print the files. - print(hprint.frame(file_direction)) - _LOG.debug(hprint.to_str("files_to_diff")) - files_set = list(zip(*files_to_diff)) - if not files_set: - _LOG.warning("No file found: skipping") - return - files_set = sorted(list(files_set[0])) - txt = "\n".join(files_set) - print(hprint.indent(txt)) - # Process the files touched. - if mode == "print_dirs": - files_lst = [] - for file, left_file, right_file in files_to_diff: - dir_name = os.path.dirname(file) - # Skip empty dir, e.g., for `pytest.ini`. - if dir_name != "": - files_lst.append(dir_name) - files_lst = sorted(list(set(files_lst))) - print(hprint.frame("Dirs changed")) - print("\n".join(files_lst)) - else: - # Build the script with the operations to perform. - if mode == "copy" and file_direction == "only_files_in_dst": - raise ValueError("Can't copy files from destination") - script_txt = [] - for file, left_file, right_file in files_to_diff: - if mode == "copy": - cmd = f"cp -f {left_file} {right_file}" - elif mode == "vimdiff": - cmd = f"vimdiff {left_file} {right_file}" - else: - raise ValueError(f"Invalid mode='{mode}'") - _LOG.debug(" -> %s", cmd) - script_txt.append(cmd) - script_txt = "\n".join(script_txt) - # Execute / save the script. - if mode == "copy": - for cmd in script_txt.split("\n"): - hsystem.system(cmd) - elif mode == "vimdiff": - # Save the diff script. - script_file_name = f"./tmp.vimdiff.{file_direction}.sh" - hio.create_executable_script(script_file_name, script_txt) - print(f"# To diff run:\n> {script_file_name}") - else: - raise ValueError(f"Invalid mode='{mode}'") - - -@task -def integrate_find_files( # type: ignore - ctx, - subdir="", -): - """ - Find the files that are touched in the current branch since last - integration. - """ - hlitauti.report_task() - _ = ctx - # - abs_src_dir = "." - abs_src_dir = os.path.normpath(abs_src_dir) - hdbg.dassert_dir_exists(abs_src_dir) - # Find the files touched in each branch since the last integration. - src_files = sorted( - _find_files_touched_since_last_integration(abs_src_dir, subdir) - ) - print("* Files touched:\n" + "\n".join(src_files)) - - -@task -def integrate_diff_overlapping_files( # type: ignore - ctx, src_dir_basename, dst_dir_basename, subdir="" -): - """ - Find the files modified in both branches `src_dir_basename` and - `dst_dir_basename` Compare these files from HEAD to master version before - the branch point. - - This is used to check what changes were made to files modified by - both branches. - """ - hlitauti.report_task() - _ = ctx - # Check that the integration branches are in the expected state. - _dassert_current_dir_matches(src_dir_basename) - # When we integrate a dir that doesn't exist in the dst branch, we need to - # skip the check for existence. - check_exists = False - src_dir_basename, dst_dir_basename = _resolve_src_dst_names( - src_dir_basename, dst_dir_basename, subdir, check_exists=check_exists - ) - _dassert_is_integration_branch(src_dir_basename) - _dassert_is_integration_branch(dst_dir_basename) - _clean_both_integration_dirs(src_dir_basename, dst_dir_basename) - # Find the files modified in both branches. - src_hash = hgit.get_branch_hash(src_dir_basename) - _LOG.info("src_hash=%s", src_hash) - dst_hash = hgit.get_branch_hash(dst_dir_basename) - _LOG.info("dst_hash=%s", dst_hash) - diff_files1 = os.path.abspath("./tmp.files_modified1.txt") - diff_files2 = os.path.abspath("./tmp.files_modified2.txt") - cmd = f"cd {src_dir_basename} && git diff --name-only {src_hash} HEAD >{diff_files1}" - hsystem.system(cmd) - cmd = f"cd {dst_dir_basename} && git diff --name-only {dst_hash} HEAD >{diff_files2}" - hsystem.system(cmd) - common_files = "./tmp.common_files.txt" - cmd = f"comm -12 {diff_files1} {diff_files2} >{common_files}" - hsystem.system(cmd) - # Get the base files to diff. - files = hio.from_file(common_files).split("\n") - files = [f for f in files if f != ""] - _LOG.info("Found %d files to diff:\n%s", len(files), "\n".join(files)) - # Retrieve the original file and create the diff command. - script_txt = [] - for src_file in files: - hdbg.dassert_file_exists(src_file) - # TODO(gp): Add function to add a suffix to a name, using - # os.path.dirname(), os.path.basename(), os.path.split_extension(). - dst_file = src_file.replace(".py", ".base.py") - # Save the base file. - cmd = f"git show {src_hash}:{src_file} >{dst_file}" - rc = hsystem.system(cmd, abort_on_error=False) - if rc == 0: - # The file was created: nothing to do. - pass - elif rc == 128: - # Note that the file potentially could not exist, i.e., it was added - # in the branch. In this case Git returns: - # ``` - # rc=128 fatal: path 'dataflow/pipelines/real_time/test/ - # test_dataflow_pipelines_real_time_pipeline.py' exists on disk, but - # not in 'ce54877016204315766e90df7c45192bec1fbf20' - src_file = "/dev/null" - else: - raise ValueError(f"cmd='{cmd}' returned {rc}") - # Update the script to diff. - script_txt.append(f"vimdiff {dst_file} {src_file}") - # Save the script to compare. - script_file_name = "./tmp.vimdiff_overlapping_files.sh" - script_txt = "\n".join(script_txt) - hio.create_executable_script(script_file_name, script_txt) - print(f"# To diff against the base run:\n> {script_file_name}") - - -# ############################################################################# - - -def _infer_dst_file_path( - src_file_path: str, - *, - default_src_dir_basename: str = DEFAULT_SRC_DIR_BASENAME, - default_dst_dir_basename: str = DEFAULT_DST_DIR_BASENAME, - check_exists: bool = True, -) -> Tuple[str, str]: - """ - Convert a file path across two dirs with the same data structure. - - E.g., - `.../src/cmamp1/.../test_data_snapshots/alpha_numeric_data_snapshots` - is converted into - `.../src/amp1/.../test_data_snapshots/alpha_numeric_data_snapshots` - """ - _LOG.debug(hprint.to_str("src_file_path")) - src_file_path = os.path.normpath(src_file_path) - if check_exists: - hdbg.dassert_path_exists(src_file_path) - # Extract the repo dir name, by looking for one of the default basenames. - target_dir = f"/{default_dst_dir_basename}/" - idx = src_file_path.find(target_dir) - if idx >= 0: - src_dir_basename = default_dst_dir_basename - dst_dir_basename = default_src_dir_basename - subdir = src_file_path[idx + len(target_dir) :] - else: - target_dir = f"/{default_src_dir_basename}/" - idx = src_file_path.find(target_dir) - if idx >= 0: - src_dir_basename = default_src_dir_basename - dst_dir_basename = default_dst_dir_basename - subdir = src_file_path[idx + len(target_dir) :] - else: - raise ValueError( - f"Can't find either '{default_src_dir_basename}' or " - f"'{default_dst_dir_basename}' in file_path=" - f"'{src_file_path}'" - ) - # Replace src dir (e.g., `cmamp1`) with dst dir (e.g., `amp1`). - dst_file_path = src_file_path.replace( - f"/{src_dir_basename}/", f"/{dst_dir_basename}/" - ) - _LOG.debug(hprint.to_str("dst_file_path subdir")) - if check_exists: - hdbg.dassert_path_exists(dst_file_path) - return dst_file_path, subdir - - -@task -def integrate_rsync( # type: ignore - ctx, - src_dir, - src_dir_basename=DEFAULT_SRC_DIR_BASENAME, - dst_dir_basename=DEFAULT_DST_DIR_BASENAME, - dst_dir="", - check_dir=True, - dry_run=False, -): - """ - Use `rsync` to bring two dirs to sync. - - E.g., - ``` - > invoke integrate_diff_dirs - ... - ... Only in .../cmamp1/.../alpha_numeric_data_snapshots: alpha - ... Only in .../amp1/.../alpha_numeric_data_snapshots: latest - - # Accept the `cmamp1` side vs the `amp1` side with: - > invoke integrate_rsync .../cmamp1/.../alpha_numeric_data_snapshots/ - ``` - - :param src_dir: dir to be used. If empty, it is inferred from file_name - :param dst_dir: dir to be used. If empty, it is inferred from file_name - :param check_dir: force checking that src_dir and dst_dir are valid - integration dirs - :param dry_run: print the system command instead of executing them - """ - hlitauti.report_task() - _ = ctx - src_dir = os.path.normpath(src_dir) - hdbg.dassert_path_exists(src_dir) - _LOG.info(hprint.to_str("src_dir")) - if check_dir: - _dassert_is_integration_branch(src_dir) - # Resolve the dst dir. - if dst_dir == "": - dst_dir, _ = _infer_dst_file_path( - src_dir, - default_src_dir_basename=src_dir_basename, - default_dst_dir_basename=dst_dir_basename, - ) - if check_dir: - _dassert_is_integration_branch(dst_dir) - dst_dir = os.path.normpath(dst_dir) - hdbg.dassert_path_exists(dst_dir) - _LOG.info(hprint.to_str("dst_dir")) - # - _LOG.info("Syncing:\n'%s'\nto\n'%s'", src_dir, dst_dir) - cmd = f"rsync --delete -a -r {src_dir}/ {dst_dir}/" - hsystem.system(cmd, log_level=logging.INFO, dry_run=dry_run) - - -@task -def integrate_file( # type: ignore - ctx, - file_name, - src_dir_basename=DEFAULT_SRC_DIR_BASENAME, - dst_dir_basename=DEFAULT_DST_DIR_BASENAME, - dry_run=False, -): - """ - Diff corresponding files in two different repos. - - ``` - # The path is assumed referred to current dir. - > i integrate_file --file-name helpers/lib_tasks_integrate.py - - > i integrate_file --file-name /Users/saggese/src/kaizenflow1/helpers/lib_tasks_integrate.py - - > i integrate_file \ - --file-name helpers/lib_tasks_integrate.py \ - --src-dir-name cmamp1 - --dst-dir-name kaizenflow1 - ``` - - :param file_name: it can be a full path (e.g., - `/Users/saggese/src/kaizenflow1/helpers/lib_tasks_integrate.py`) - or a relative path to the root of the Git repo (e.g., - `helpers/lib_tasks_integrate.py) - :param dst_dir: dir to be used. If empty, it is inferred from file_name - :param check_dir: force checking that src_dir and dst_dir are valid - integration dirs - :param dry_run: print the system command instead of executing them - """ - hlitauti.report_task() - _ = ctx - file_name = os.path.normpath(file_name) - hdbg.dassert_file_exists(file_name) - # If the file is in the current dir, we need to prepend the dir name. - if not file_name.startswith("/"): - file_name = os.path.join(os.getcwd(), file_name) - _LOG.info(hprint.to_str("file_name")) - # Resolve the src / dst dir, if needed. - dst_file_name, _ = _infer_dst_file_path( - file_name, - default_src_dir_basename=src_dir_basename, - default_dst_dir_basename=dst_dir_basename, - ) - _LOG.info(hprint.to_str("file_name dst_file_name")) - # - _LOG.info("Syncing:\n'%s'\nto\n'%s'", file_name, dst_file_name) - cmd = f"vimdiff {file_name} {dst_file_name}" - # We need to use `system` to get vimdiff to connect to stdin and stdout. - if not dry_run: - # hlitauti.run(ctx, cmd, dry_run=dry_run, print_cmd=True) - os.system(cmd) - - -# Compare the timestamp of last modification of a file. -# FILE=helpers/lib_tasks_git.py; (cd ~/src/cmamp1; git log -1 $FILE); (cd ~/src/kaizenflow1; git log -1 $FILE) - -# > git log --pretty=format:"%h - %an, %ad : %s" --date=short | grep _Integrate_ | head -5 -# fffa1c8b2 - GP Saggese, 2023-06-30 : AmpTask1786_Integrate_20230627_7 (#367) -# 5a05a0c94 - GP Saggese, 2023-06-29 : AmpTask1786_Integrate_20230627_6 (#365) -# 6c3ad7d87 - GP Saggese, 2023-06-29 : AmpTask1786_Integrate_20230627_5 (#364) -# 36abfd8b3 - GP Saggese, 2023-06-28 : AmpTask1786_Integrate_20230627_3 (#361) -# 65fe42d38 - GP Saggese, 2023-06-28 : AmpTask1786_Integrate_20230627_2 (#360) - -# In Sorr -# GIT_INTEGR_HASH=fffa1c8b2 -# fffa1c8b2 - GP Saggese, 2023-06-30 : AmpTask1786_Integrate_20230627_7 (#367) - -# In cmamp -# 20526ed09 - GP Saggese, 2023-08-10 : AmpTask1786_Integrate_20230810_2 (#5011) - -# Show files changed since an integration point -# > git diff --name-only $GIT_INTEGR_HASH dataflow_amp -# dataflow_amp/system/mock1/test/test_mock1_forecast_system.py - -# Show the difference since an integration point -# git difftool $GIT_INTEGR_HASH.. dataflow_amp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py deleted file mode 100644 index 8fe792c97..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py +++ /dev/null @@ -1,444 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_lint as hlitalin -""" - -import datetime -import filecmp -import logging -import os - -from invoke.tasks import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.lib_tasks_docker as hlitadoc -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -# ############################################################################# -# Linter. -# ############################################################################# - - -@task -def lint_check_python_files_in_docker( # type: ignore - ctx, - python_compile=True, - python_execute=True, - modified=False, - branch=False, - last_commit=False, - all_=False, - files="", -): - """ - Compile and execute Python files checking for errors. - - This is supposed to be run inside Docker. - - The params have the same meaning as in `_get_files_to_process()`. - """ - hlitauti.report_task() - _ = ctx - # We allow to filter through the user specified `files`. - mutually_exclusive = False - remove_dirs = True - file_list = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files, - mutually_exclusive, - remove_dirs, - ) - _LOG.debug("Found %d files:\n%s", len(file_list), "\n".join(file_list)) - # Filter keeping only Python files. - _LOG.debug("Filtering for Python files") - exclude_paired_jupytext = True - file_list = hio.keep_python_files(file_list, exclude_paired_jupytext) - _LOG.debug("file_list=%s", "\n".join(file_list)) - _LOG.info("Need to process %d files", len(file_list)) - if not file_list: - _LOG.warning("No files were selected") - # Scan all the files. - failed_filenames = [] - for file_name in file_list: - _LOG.info("Processing '%s'", file_name) - if python_compile: - import compileall - - success = compileall.compile_file(file_name, force=True, quiet=1) - _LOG.debug("file_name='%s' -> python_compile=%s", file_name, success) - if not success: - msg = f"'{file_name}' doesn't compile correctly" - _LOG.error(msg) - failed_filenames.append(file_name) - # TODO(gp): Add also `python -c "import ..."`, if not equivalent to `compileall`. - if python_execute: - cmd = f"python {file_name}" - rc = hsystem.system(cmd, abort_on_error=False, suppress_output=False) - _LOG.debug("file_name='%s' -> python_compile=%s", file_name, rc) - if rc != 0: - msg = f"'{file_name}' doesn't execute correctly" - _LOG.error(msg) - failed_filenames.append(file_name) - hprint.log_frame( - _LOG, - f"failed_filenames={len(failed_filenames)}", - verbosity=logging.INFO, - ) - _LOG.info("\n".join(failed_filenames)) - error = len(failed_filenames) > 0 - return error - - -@task -def lint_check_python_files( # type: ignore - ctx, - python_compile=True, - python_execute=True, - modified=False, - branch=False, - last_commit=False, - all_=False, - files="", -): - """ - Compile and execute Python files checking for errors. - - The params have the same meaning as in `_get_files_to_process()`. - """ - _ = ( - python_compile, - python_execute, - modified, - branch, - last_commit, - all_, - files, - ) - # Execute the same command line but inside the container. E.g., - # /Users/saggese/src/venv/amp.client_venv/bin/invoke lint_docker_check_python_files --branch - cmd_line = hdbg.get_command_line() - # Replace the full path of invoke with just `invoke`. - cmd_line = cmd_line.split() - cmd_line = ["/venv/bin/invoke lint_check_python_files_in_docker"] + cmd_line[ - 2: - ] - docker_cmd_ = " ".join(cmd_line) - cmd = f'invoke docker_cmd --cmd="{docker_cmd_}"' - hlitauti.run(ctx, cmd) - - -def _get_lint_docker_cmd( - base_image: str, - docker_cmd_: str, - stage: str, - version: str, - *, - use_entrypoint: bool = True, -) -> str: - """ - Create a command to run in Linter service. - - :param docker_cmd_: command to run - :param stage: the image stage to use - :return: the full command to run - """ - if base_image == "": - base_path = os.environ["CSFY_ECR_BASE_PATH"] - # Get an image to run the linter on. - linter_image = f"{base_path}/helpers" - else: - linter_image = base_image - _LOG.debug(hprint.to_str("linter_image")) - # Execute command line. - cmd: str = hlitadoc._get_docker_compose_cmd( - linter_image, - stage, - version, - docker_cmd_, - use_entrypoint=use_entrypoint, - ) - return cmd - - -@task -def lint_detect_cycles( # type: ignore - ctx, - dir_name=".", - stage="prod", - version="", - out_file_name="lint_detect_cycles.output.txt", - debug_tool=False, -): - """ - Detect cyclic imports in the directory files. - - For param descriptions, see `lint()`. - - :param dir_name: the name of the dir to detect cyclic imports in - - By default, the check will be carried out in the dir from where - the task is run - :param debug_tool: print the output of the cycle detector - """ - hlitauti.report_task() - # Remove the log file. - if os.path.exists(out_file_name): - cmd = f"rm {out_file_name}" - hlitauti.run(ctx, cmd) - # Prepare the command line. - docker_cmd_opts = [dir_name] - if debug_tool: - docker_cmd_opts.append("-v DEBUG") - docker_cmd_ = ( - "$(find -wholename '*import_check/detect_import_cycles.py') " - + hlitauti._to_single_line_cmd(docker_cmd_opts) - ) - # Execute command line. - base_image = "" - cmd = _get_lint_docker_cmd(base_image, docker_cmd_, stage, version) - # Use `PIPESTATUS` otherwise the exit status of the pipe is always 0 - # because writing to a file succeeds. - cmd = f"({cmd}) 2>&1 | tee -a {out_file_name}; exit $PIPESTATUS" - # Run. - hlitauti.run(ctx, cmd) - - -# pylint: disable=line-too-long -@task -def lint( # type: ignore - ctx, - base_image="", - stage="prod", - version="", - files="", - from_file="", - skip_files="", - dir_name="", - modified=False, - last_commit=False, - branch=False, - # It needs to be a string to allow the user to specify "serial". - num_threads="serial", - only_format=False, - only_check=False, -): - """ - Lint files. - - ``` - # To lint specific files: - > i lint --files="dir1/file1.py dir2/file2.md" - - # To lint the files changed in the last commit, excluding specific files: - > i lint --last-commit --skip-files="dir1/file1.py dir2/file2.md" - - # To lint all the files in the current dir using only formatting actions: - > i lint --dir-name . --only-format - - # To lint the files modified in the current git client: - > i lint --modified - - # To exclude certain paths from linting: - > i lint --files="$(find . -name '*.py' -not -path './compute/*' -not -path './amp/*')" - ``` - - :param stage: the image stage to use (e.g., "prod", "dev", "local") - :param version: the version of the container to use - :param files: specific files to lint (e.g. "dir1/file1.py dir2/file2.md") - :param from_file: specific file storing files to lint - :param skip_files: specific files to skip during linting (e.g. "dir1/file1.py dir2/file2.md") - :param dir_name: name of the dir where all files should be linted - :param modified: lint the files modified in the current git client - :param last_commit: lint the files modified in the previous commit - :param branch: lint the files modified in the current branch w.r.t. master - :param num_threads: number of threads to use ("serial", -1, 0, 1, 2, ...) - :param only_format: run only the modifying actions of Linter (e.g., black) - :param only_check: run only the non-modifying actions of Linter (e.g., pylint) - """ - # Check if the user is in a repo root. - hdbg.dassert( - hgit.is_cwd_git_repo(), - msg="Linter should run from repo root", - ) - hlitauti.report_task() - # Prepare the command line. - lint_cmd_opts = [] - # Add the file selection argument. - hdbg.dassert_eq( - int(len(files) > 0) - + int(len(from_file) > 0) - + int(len(dir_name) > 0) - + int(modified) - + int(last_commit) - + int(branch), - 1, - msg="Specify exactly one among --files, --from_file, --dir-name, " - "--modified, --last-commit, --branch", - ) - if len(files) > 0: - lint_cmd_opts.append(f"--files {files}") - elif len(from_file) > 0: - lint_cmd_opts.append(f"--from_file {from_file}") - elif len(dir_name) > 0: - lint_cmd_opts.append(f"--dir_name {dir_name}") - elif modified: - lint_cmd_opts.append("--modified") - elif last_commit: - lint_cmd_opts.append("--last_commit") - elif branch: - lint_cmd_opts.append("--branch") - else: - raise ValueError("No file selection arguments are specified") - if len(skip_files) > 0: - lint_cmd_opts.append(f"--skip_files {skip_files}") - # - lint_cmd_opts.append(f"--num_threads {num_threads}") - # Add the action selection argument, if needed. - hdbg.dassert_lte( - int(only_format) + int(only_check), - 1, - msg="Specify only one among --only-format, --only-check", - ) - if only_format: - lint_cmd_opts.append("--only_format") - elif only_check: - lint_cmd_opts.append("--only_check") - else: - _LOG.info("All Linter actions selected") - # Compose the command line. - if hserver.is_host_mac(): - find_cmd = "$(find . -path '*linters/base.py')" - else: - find_cmd = "$(find -wholename '*linters/base.py')" - lint_cmd_ = find_cmd + " " + hlitauti._to_single_line_cmd(lint_cmd_opts) - docker_cmd_ = _get_lint_docker_cmd( - base_image, lint_cmd_, stage=stage, version=version - ) - # Run. - hlitauti.run(ctx, docker_cmd_) - - -@task -def lint_check_if_it_was_run(ctx): # type: ignore - """ - Check if the linter was run in the current branch. - - - abort the task with error if the files were modified - """ - hlitauti.report_task() - # Check if the files were modified. - hgit.is_client_clean(abort_if_not_clean=True) - - -@task -def lint_create_branch(ctx, dry_run=False): # type: ignore - """ - Create the branch for linting in the current dir. - - The dir needs to be specified to ensure the set-up is correct. - """ - hlitauti.report_task() - # - date = datetime.datetime.now().date() - date_as_str = date.strftime("%Y%m%d") - branch_name = f"AmpTask1955_Lint_{date_as_str}" - # query_yes_no("Are you sure you want to create the branch '{branch_name}'") - _LOG.info("Creating branch '%s'", branch_name) - cmd = f"invoke git_branch_create -b '{branch_name}'" - hlitauti.run(ctx, cmd, dry_run=dry_run) - - -@task -def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): # type: ignore - """ - Sync code needed to run linter / ai_review from a Git client to the current one. - - :param git_client_name: the name of the Git client to sync from. It can be - something like "helpers1" and it will be used from "$HOME/src" or can - be a full path. - :param revert_to_original: if `True`, revert the changes to the original - """ - _ = ctx - hlitauti.report_task() - # Copy the code from the src Git client to the current one. - src_git_dir = hgit.resolve_git_client_dir(git_client_name) - # - files_to_copy = [ - # "hgit.py", - # "hmarkdown.py", - "llm_prompts.py", - "llm_transform.py", - "inject_todos.py", - "all.coding_style_guidelines.reference.md", - ] - # Revert the files in the current git client to the original code. - if revert_to_original: - _LOG.debug("Reverting to original code ...") - for file_name in files_to_copy: - _LOG.debug("Reverting %s to original code", file_name) - src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) - git_root_dir = hgit.find_git_root(src_git_dir) - src_file_path = os.path.relpath(src_file_path, git_root_dir) - cmd = "git checkout -- " + src_file_path - hsystem.system(cmd) - _LOG.info("Done") - return - # Get the path to the helpers repo. - src_helpers_dir = hgit.find_helpers_root(src_git_dir) - hdbg.dassert_ne(src_helpers_dir, "") - hdbg.dassert_dir_exists(src_helpers_dir) - # - dst_helpers_dir = hgit.find_helpers_root() - hdbg.dassert_dir_exists(dst_helpers_dir) - _LOG.debug(hprint.to_str("src_helpers_dir dst_helpers_dir")) - # - _LOG.info( - "Copying files from '%s' to '%s' ...", src_helpers_dir, dst_helpers_dir - ) - # Find the files to copy. - for file_name in files_to_copy: - _LOG.debug(hprint.to_str("file_name")) - # Get the path to the file in the src Git client. - src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) - src_file_path = os.path.abspath(os.path.join(src_git_dir, src_file_path)) - _LOG.debug(hprint.to_str("src_file_path")) - hdbg.dassert_file_exists(src_file_path) - # Get the path to the file in the dst Git client. - dst_file_path = hgit.project_file_name_in_git_client( - src_file_path, src_helpers_dir, dst_helpers_dir - ) - _LOG.debug(hprint.to_str("dst_file_path")) - # Copy the file. - _LOG.debug(hprint.to_str("src_file_path dst_file_path")) - dir_name = os.path.dirname(dst_file_path) - # Check that the files are different. - if os.path.exists(src_file_path) and os.path.isdir(dst_file_path): - if filecmp.cmp(src_file_path, dst_file_path, shallow=False): - _LOG.info( - "File '%s' is identical to '%s', skipping", - src_file_path, - dst_file_path, - ) - continue - # Copy the file. - hio.create_dir(dir_name, incremental=True) - cmd = f"cp -f {src_file_path} {dst_file_path}" - _LOG.debug(hprint.to_str("cmd")) - _LOG.info("Copying file '%s' to '%s' ...", src_file_path, dst_file_path) - hsystem.system(cmd) - _LOG.info("Done") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py deleted file mode 100644 index 215820d4d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py +++ /dev/null @@ -1,380 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_perms as hlitaper -""" - -import grp -import logging -import os -import pwd -import stat -from typing import Dict, List, Tuple - -import tqdm -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# ############################################################################# -# Fix permission -# ############################################################################# - - -# The desired invariants are that all files -# 1) are owned by our user or by Docker user -# 2) have the shared group as group -# 3) have the same user and group permissions - -# E.g., -# -rw-rw-r-- 1 sasm sasm-fileshare 21877 Nov 3 18:11 pytest_logger.log - -# The possible problems are: -# -r--r--r-- 1 sasm sasm-fileshare ./.git/objects/02/4df16f66c87bdfb -# -rw-r--r-- 1 265533 sasm-fileshare ./core_lime/dataflow/nodes/test/te -# -rw-rw-r-- 1 265533 sasm-fileshare ./research/real_time/notebooks/Lim - -# drwxr-sr-x 2 gsaggese sasm-fileshare 35 Oct 12 21:51 test -# chmod g=u amp/dev_scripts/git/git_hooks/test - - -def _save_dir_status(dir_name: str, filename: str) -> None: - cmd = f'find {dir_name} -name "*" | sort | xargs ls -ld >{filename}' - hsystem.system(cmd) - _LOG.info("Saved dir status in %s", filename) - - -# From https://stackoverflow.com/questions/1830618 -def _get_user_group(filename: str) -> Tuple[str, str]: - """ - Return the symbolic name of user and group of a file. - """ - uid = os.stat(filename).st_uid - try: - user = pwd.getpwuid(uid).pw_name - except KeyError as e: - # _LOG.warning("Error: ", str(e)) - _ = e - user = str(uid) - # - gid = os.stat(filename).st_gid - try: - group = grp.getgrgid(gid).gr_name - except KeyError as e: - _ = e - group = str(gid) - return user, group - - -def _find_files_for_user(dir_name: str, user: str, is_equal: bool) -> List[str]: - """ - Find all the files under `abs_dir` that are owned or not by `user`. - """ - _LOG.debug("") - mode = "\\!" if not is_equal else "" - cmd = f'find {dir_name} -name "*" {mode} -user "{user}"' - _, txt = hsystem.system_to_string(cmd) - files: List[str] = txt.split("\n") - return files - - -def _find_files_for_group( - dir_name: str, group: str, is_equal: bool -) -> List[str]: - """ - Find all the files under `abs_dir` that are owned by a group `group`. - """ - _LOG.debug("") - mode = "\\!" if not is_equal else "" - cmd = f'find {dir_name} -name "*" {mode} -group "{group}"' - _, txt = hsystem.system_to_string(cmd) - files: List[str] = txt.split("\n") - return files - - -def _compute_stats_by_user_and_group(dir_name: str) -> Tuple[Dict, Dict, Dict]: - """ - Scan all the files reporting statistics in terms of users and groups. - - It also compute a mapping from file to user and group. - """ - _LOG.debug("") - # Find all files. - cmd = f'find {dir_name} -name "*"' - _, txt = hsystem.system_to_string(cmd) - files = txt.split("\n") - # Get the user of each file. - user_to_files: Dict[str, List[str]] = {} - group_to_files: Dict[str, List[str]] = {} - file_to_user_group: Dict[str, Tuple[str, str]] = {} - for file in files: - user, group = _get_user_group(file) - # Update mapping from user to files. - if user not in user_to_files: - user_to_files[user] = [] - user_to_files[user].append(file) - # Update mapping from group to files. - if group not in group_to_files: - group_to_files[group] = [] - group_to_files[group].append(file) - # Update the mapping from file to (user, group). - hdbg.dassert_not_in(file, file_to_user_group) - file_to_user_group[file] = (user, group) - # Print stats. - txt1 = "" - for user, files in user_to_files.items(): - txt1 += f"{user}({len(files)}), " - _LOG.info("user=%s", txt1) - # - txt2 = "" - for group, files in group_to_files.items(): - txt2 += f"{group}({len(files)}), " - _LOG.info("group=%s", txt2) - return user_to_files, group_to_files, file_to_user_group - - -def _ls_l(files: List[str], size: int = 100) -> str: - """ - Run `ls -l` on the files using chunks of size `size`. - """ - txt = [] - for pos in range(0, len(files), size): - files_tmp = files[pos : pos + size] - files_tmp = [f"'{f}'" for f in files_tmp] - cmd = f"ls -ld {' '.join(files_tmp)}" - _, txt_tmp = hsystem.system_to_string(cmd) - txt.append(txt_tmp) - return "\n".join(txt) - - -def _exec_cmd_by_chunks( - cmd: str, files: List[str], abort_on_error: bool, size: int = 100 -) -> None: - """ - Execute `cmd` on files using chunks of size `size`. - """ - for pos in range(0, len(files), size): - files_tmp = files[pos : pos + size] - files_tmp = [f"'{f}'" for f in files_tmp] - cmd = f"{cmd} {' '.join(files_tmp)}" - hsystem.system(cmd, abort_on_error=abort_on_error) - - -def _print_problems(dir_name: str = ".") -> None: - """ - Do `ls -l` on files that are not owned by the current user and its group. - - This function is used for debugging. - """ - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - user = hsystem.get_user_name() - docker_user = hserver.get_docker_user() - # user_group = f"{user}_g" - # shared_group = hserver.get_docker_shared_group() - files_with_problems = [] - for file, (curr_user, curr_group) in file_to_user_group.items(): - _ = curr_user, curr_group - # Files owned by our user and - # if curr_user == user and curr_group == user_group: - # continue - if curr_user in (user, docker_user): - continue - # if curr_group == shared_group: - # continue - files_with_problems.append(file) - # - txt = _ls_l(files_with_problems) - print(txt) - - -def _change_file_ownership(file: str, abort_on_error: bool) -> None: - """ - Change ownership of files with an invalid user (e.g., 265533) by copying - and deleting. - """ - # pylint: disable=line-too-long - # > ls -l ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py - # -rw-r--r-- 1 265533 sasm-fileshare 14327 Nov 3 14:01 ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py - # - # > mv ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py{,.OLD} - # - # > cp ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py{.OLD,} - # - # > ls -l ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py - # -rw-r--r-- 1 gsaggese sasm-fileshare 14327 Nov 5 17:58 ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py - # - # > rm -rf ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py.OLD - # pylint: enable=line-too-long - hdbg.dassert_file_exists(file) - tmp_file = file + ".OLD" - # - cmd = f"mv {file} {tmp_file}" - hsystem.system(cmd, abort_on_error=abort_on_error) - # - cmd = f"cp {tmp_file} {file}" - hsystem.system(cmd, abort_on_error=abort_on_error) - # - cmd = f"rm -rf {tmp_file}" - hsystem.system(cmd, abort_on_error=abort_on_error) - - -def _fix_invalid_owner(dir_name: str, fix: bool, abort_on_error: bool) -> None: - """ - Fix files that are owned by a user that is not the current user or the - Docker one. - """ - _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) - # - _LOG.info("Before fix") - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - # - user = hsystem.get_user_name() - docker_user = hserver.get_docker_user() - for file, (curr_user, _) in tqdm.tqdm(file_to_user_group.items()): - if curr_user not in (user, docker_user): - _LOG.info("Fixing file '%s'", file) - hdbg.dassert_file_exists(file) - cmd = f"ls -l {file}" - hsystem.system( - cmd, abort_on_error=abort_on_error, suppress_output=False - ) - if fix: - _change_file_ownership(file, abort_on_error) - # - _LOG.info("After fix") - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - - -def _fix_group(dir_name: str, fix: bool, abort_on_error: bool) -> None: - """ - Ensure that all files are owned by the shared group. - """ - _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) - _LOG.info("Before fix") - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - if fix: - # Get the user and the group. - user = hsystem.get_user_name() - user_group = f"{user}_g" - shared_group = hserver.get_docker_shared_group() - # - for file, (curr_user, curr_group) in file_to_user_group.items(): - # If the group is the shared group there is nothing to do. - if curr_group == shared_group: - continue - cmd = f"chgrp {shared_group} {file}" - if curr_user == user: - # This is a paranoia check. - hdbg.dassert_eq(curr_group, user_group) - else: - # For files not owned by the current user, we need to `sudo`. - cmd = f"sudo -u {curr_user} {cmd}" - hsystem.system(cmd, abort_on_error=abort_on_error) - _LOG.info("After fix") - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - else: - _LOG.warning("Skipping fix") - - -def _fix_group_permissions(dir_name: str, abort_on_error: bool) -> None: - """ - Ensure that all files are owned by the shared group. - """ - _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - user = hsystem.get_user_name() - # docker_user = get_default_param("DOCKER_USER") - for file, (curr_user, curr_group) in tqdm.tqdm(file_to_user_group.items()): - _ = curr_group - st_mode = os.stat(file).st_mode - perms = oct(st_mode & 0o777) - # perms=0o775 - if perms[2] != perms[3]: - _LOG.debug("%s -> %s, %s", file, oct(st_mode), perms) - cmd = f"chmod g=u {file}" - if curr_user != user: - # For files not owned by the current user, we need to `sudo`. - cmd = f"sudo -u {curr_user} {cmd}" - hsystem.system(cmd, abort_on_error=abort_on_error) - is_dir = os.path.isdir(file) - if is_dir: - # pylint: disable=line-too-long - # From https://www.gnu.org/software/coreutils/manual/html_node/Directory-Setuid-and-Setgid.html - # If a directory - # inherit the same group as the directory, - # pylint: enable=line-too-long - has_set_group_id = st_mode & stat.S_ISGID - if not has_set_group_id: - cmd = f"chmod g+s {file}" - if curr_user != user: - # For files not owned by the current user, we need to `sudo`. - cmd = f"sudo -u {curr_user} {cmd}" - hsystem.system(cmd, abort_on_error=abort_on_error) - - -@task -def fix_perms( # type: ignore - ctx, dir_name=".", action="all", fix=True, abort_on_error=True -): - """ - :param action: - - `all`: run all the fixes - - `print_stats`: print stats about file users and groups - - `print_problems`: - - `fix_invalid_owner`: fix the files with an invalid owner (e.g., mysterious - 265533) - - `fix_group`: ensure that shared group owns all the files - - `fix_group_permissions`: ensure that the group permissions are the same - as the owner ones - """ - _ = ctx - hlitauti.report_task() - # - if hserver.is_dev4(): - if action == "all": - action = ["fix_invalid_owner", "fix_group", "fix_group_permissions"] - else: - action = [action] - # - file_name1 = "./tmp.fix_perms.before.txt" - _save_dir_status(dir_name, file_name1) - # - if "print_stats" in action: - _compute_stats_by_user_and_group(dir_name) - if "print_problems" in action: - _print_problems(dir_name) - if "fix_invalid_owner" in action: - _fix_invalid_owner(dir_name, fix, abort_on_error) - if "fix_group" in action: - _fix_group(dir_name, fix, abort_on_error) - if "fix_group_permissions" in action: - _fix_group_permissions(dir_name, abort_on_error) - # - file_name2 = "./tmp.fix_perms.after.txt" - _save_dir_status(dir_name, file_name2) - # - cmd = f"To compare run:\n> vimdiff {file_name1} {file_name2}" - print(cmd) - elif hserver.is_dev_csfy(): - user = hsystem.get_user_name() - group = user - cmd = f"sudo chown -R {user}:{group} *" - hsystem.system(cmd) - cmd = f"sudo chown -R {user}:{group} .pytest_cache" - hsystem.system(cmd, abort_on_error=False) - elif hserver.is_external_dev(): - # Nothing to do. - pass - else: - raise ValueError(f"Invalid machine {os.uname()[1]}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py deleted file mode 100644 index 512c09a60..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_print as hlitapri -""" - -import logging -import os -import re - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.henv as henv -import helpers.hgit as hgit -import helpers.hsystem as hsystem -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# ############################################################################# -# Set-up. -# ############################################################################# - - -@task -def print_setup(ctx): # type: ignore - """ - Print some configuration variables. - """ - hlitauti.report_task() - _ = ctx - var_names = "CSFY_ECR_BASE_PATH BASE_IMAGE".split() - for v in var_names: - print(f"{v}={hlitauti.get_default_param(v)}") - - -@task -def print_tasks(ctx, as_code=False): # type: ignore - """ - Print all the available tasks in `lib_tasks.py`. - - These tasks might be exposed or not by different. - - :param as_code: print as python code so that it can be embed in a - `from helpers.lib_tasks import ...` - """ - hlitauti.report_task() - _ = ctx - func_names = [] - lib_tasks_file_name = os.path.join( - hgit.get_amp_abs_path(), "helpers/lib_tasks.py" - ) - hdbg.dassert_file_exists(lib_tasks_file_name) - # TODO(gp): Use __file__ instead of hardwiring the file. - cmd = rf'\grep "^@task" -A 1 {lib_tasks_file_name} | grep def' - # def print_setup(ctx): # type: ignore - # def git_pull(ctx): # type: ignore - # def git_fetch_master(ctx): # type: ignore - _, txt = hsystem.system_to_string(cmd) - for line in txt.split("\n"): - _LOG.debug("line=%s", line) - m = re.match(r"^def\s+(\S+)\(", line) - if m: - func_name = m.group(1) - _LOG.debug(" -> %s", func_name) - func_names.append(func_name) - func_names = sorted(func_names) - if as_code: - print("\n".join([f"{fn}," for fn in func_names])) - else: - print("\n".join(func_names)) - - -@task -def print_env( - ctx, - repo_config=True, - server_config=True, - system_signature=True, - env_vars=True, -): # type: ignore - """ - Print the repo configuration. - """ - _ = ctx - print( - henv.env_to_str( - repo_config=repo_config, - server_config=server_config, - system_signature=system_signature, - env_vars=env_vars, - ) - ) - - -# TODO(gp): -# Print a CSV -# cat /share/data/cf_production/20221005/system_log_dir/process_forecasts/target_positions/20221005_153006.csv | column -t -s, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py deleted file mode 100644 index 98a9b203e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py +++ /dev/null @@ -1,1743 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_pytest as hlitapyt -""" - -import json -import logging -import os -import re -import sys -from typing import Any, List, Optional, Tuple - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hcoverage as hcovera -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hlist as hlist -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.htraceback as htraceb -import helpers.lib_tasks_docker as hlitadoc -import helpers.lib_tasks_lint as hlitalin -import helpers.lib_tasks_utils as hlitauti -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -# ############################################################################# -# Run tests. -# ############################################################################# - - -_COV_PYTEST_OPTS = [ - # Only compute coverage for current project and not venv libraries. - "--cov=.", - "--cov-branch", - # Report the missing lines. - # Name Stmts Miss Cover Missing - # ------------------------------------------------------------------------- - # myproj/__init__ 2 0 100% - # myproj/myproj 257 13 94% 24-26, 99, 149, 233-236, 297-298 - "--cov-report term-missing", - # Report data in the directory `htmlcov`. - "--cov-report html", - # "--cov-report annotate", -] - - -_TEST_TIMEOUTS_IN_SECS = { - "fast_tests": 5, - "slow_tests": 30, - "superslow_tests": 60 * 60, -} - - -_NUM_TIMEOUT_TEST_RERUNS = { - "fast_tests": 2, - "slow_tests": 1, - "superslow_tests": 1, -} - - -@task -def run_blank_tests(ctx, stage="dev", version=""): # type: ignore - """ - (ONLY CI/CD) Test that pytest in the container works. - """ - hlitauti.report_task() - _ = ctx - base_image = "" - cmd = '"pytest -h >/dev/null"' - docker_cmd_ = hlitadoc._get_docker_compose_cmd( - base_image, stage, version, cmd - ) - hsystem.system(docker_cmd_, abort_on_error=False, suppress_output=False) - - -def _select_tests_to_skip(test_list_name: str) -> str: - """ - Generate text for pytest specifying which tests to deselect. - """ - if test_list_name == "fast_tests": - skipped_tests = "not slow and not superslow" - elif test_list_name == "slow_tests": - skipped_tests = "slow and not superslow" - elif test_list_name == "superslow_tests": - skipped_tests = "not slow and superslow" - else: - raise ValueError(f"Invalid `test_list_name`={test_list_name}") - return skipped_tests - - -def _build_run_command_line( - test_list_name: str, - custom_marker: str, - pytest_opts: str, - skip_submodules: bool, - coverage: bool, - collect_only: bool, - tee_to_file: bool, - n_threads: str, - *, - allure_dir: Optional[str] = None, -) -> str: - """ - Build the pytest run command. - - E.g., - - ``` - pytest -m "optimizer and not slow and not superslow" \ - . \ - -o timeout_func_only=true \ - --timeout 5 \ - --reruns 2 \ - --only-rerun "Failed: Timeout" - ``` - - The rest of params are the same as in `run_fast_tests()`. - - The invariant is that we don't want to duplicate pytest options that can be - passed by the user through `-p` (unless really necessary). - - :param test_list_name: "fast_tests", "slow_tests" or - "superslow_tests" - :param custom_marker: specify a space separated list of - `pytest` markers to skip (e.g., `optimizer` for the optimizer - tests, see `pytest.ini`). Empty means no marker to skip - :param allure_dir: directory to save allure results to. If specified, allure - plugin will be installed on-the-fly and results will be generated - and saved to the specified directory - """ - hdbg.dassert_in( - test_list_name, _TEST_TIMEOUTS_IN_SECS, "Invalid test_list_name" - ) - pytest_opts = pytest_opts or "." - pytest_opts_tmp = [] - # Select tests to skip based on the `test_list_name` (e.g., fast tests) - # and on the custom marker, if present. - skipped_tests = _select_tests_to_skip(test_list_name) - timeout_in_sec = _TEST_TIMEOUTS_IN_SECS[test_list_name] - # Detect if we are running on a CK dev server / inside CI - # or a laptop outside the CK infra. - is_outside_ck_infra = ( - not hserver.is_dev_csfy() and not hserver.is_inside_ci() - ) - if is_outside_ck_infra: - timeout_multiplier = 10 - _LOG.warning( - f"Tests are running outside the CK server and CI, timeout increased {timeout_multiplier} times." - ) - # Since we are running outside the CK server we increase the duration - # of the timeout, since the thresholds are set for the CK server. - timeout_in_sec *= timeout_multiplier - if custom_marker != "": - pytest_opts_tmp.append(f'-m "{custom_marker} and {skipped_tests}"') - else: - pytest_opts_tmp.append(f'-m "{skipped_tests}"') - if pytest_opts: - pytest_opts_tmp.append(pytest_opts) - # Adding `timeout_func_only` is a workaround for - # https://github.com/pytest-dev/pytest-rerunfailures/issues/99. Because of - # it, we limit only run time, without setup and teardown time. - pytest_opts_tmp.append("-o timeout_func_only=true") - pytest_opts_tmp.append(f"--timeout {timeout_in_sec}") - num_reruns = _NUM_TIMEOUT_TEST_RERUNS[test_list_name] - pytest_opts_tmp.append( - f'--reruns {num_reruns} --only-rerun "Failed: Timeout"' - ) - if hserver.skip_submodules_test(): - # For some repos submodules should be skipped - # regardless of the passed value. - skip_submodules = True - if skip_submodules: - submodule_paths = hgit.get_submodule_paths() - _LOG.warning( - "Skipping %d submodules: %s", len(submodule_paths), submodule_paths - ) - pytest_opts_tmp.append( - " ".join([f"--ignore {path}" for path in submodule_paths]) - ) - if coverage: - pytest_opts_tmp.append(" ".join(_COV_PYTEST_OPTS)) - if collect_only: - _LOG.warning("Only collecting tests as per user request") - pytest_opts_tmp.append("--collect-only") - # Indicate the number of threads for parallelization. - if n_threads != "serial": - pytest_opts_tmp.append(f"-n {str(n_threads)}") - if allure_dir is not None: - pytest_opts_tmp.append(f"--alluredir={allure_dir}") - # Generate test report. - pytest_opts_tmp.append("--junit-xml=tmp.junit.xml") - # Add runnable dir image name to the test report. - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - pytest_opts_tmp.append(f'-o junit_suite_name="{image_name}"') - # Concatenate the options. - _LOG.debug("pytest_opts_tmp=\n%s", str(pytest_opts_tmp)) - pytest_opts_tmp = [po for po in pytest_opts_tmp if po != ""] - # TODO(gp): Use to_multi_line_cmd() - pytest_opts = " ".join([po.rstrip().lstrip() for po in pytest_opts_tmp]) - cmd = f"pytest {pytest_opts}" - if allure_dir is not None: - # Install the `allure-pytest` before running the tests. This is needed - # to generate Allure results which serve as an input for generating - # Allure HTML reports. - # Excluding the command `"source /venv/bin/activate"` because post-activation, - # the `PATH` variable lacks necessary values, causing a failure in a test - # associated with `publish_notebook.py`. - cmd = f"sudo /venv/bin/pip install allure-pytest && {cmd}" - if tee_to_file: - cmd += f" 2>&1 | tee tmp.pytest.{test_list_name}.log" - return cmd - - -def _run_test_cmd( - ctx: Any, - stage: str, - version: str, - cmd: str, - coverage: bool, - collect_only: bool, - skip_pull: bool, - start_coverage_script: bool, - **ctx_run_kwargs: Any, -) -> Optional[int]: - """ - See params in `run_fast_tests()`. - """ - if collect_only: - # Clean files. - hlitauti.run(ctx, "rm -rf ./.coverage*") - # Run. - base_image = "" - # We need to add some " to pass the string as it is to the container. - cmd = f"'{cmd}'" - # We use "host" for the app container to allow access to the database - # exposing port 5432 on localhost (of the server), when running dind we - # need to switch back to bridge. See CmTask988. - extra_env_vars = ["NETWORK_MODE=bridge"] - docker_cmd_ = hlitadoc._get_docker_compose_cmd( - base_image, stage, version, cmd, extra_env_vars=extra_env_vars - ) - _LOG.info("cmd=%s", docker_cmd_) - # We can't use `hsystem.system()` because of buffering of the output, - # losing formatting and so on, so we stick to executing through `ctx`. - rc: Optional[int] = hlitadoc._docker_cmd( - ctx, docker_cmd_, skip_pull=skip_pull, **ctx_run_kwargs - ) - # Print message about coverage. - if coverage: - msg = """ - - The coverage results in textual form are above - - - To browse the files annotate with coverage, start a server (not from the - container): - > (cd ./htmlcov; python -m http.server 33333) - - Then go with your browser to `localhost:33333` to see which code is - covered - """ - msg = hprint.dedent(msg) - print(msg) - if start_coverage_script: - # Create and run a script to show the coverage in the browser. - script_txt = """ - (sleep 2; open http://localhost:33333) & - (cd ./htmlcov; python -m http.server 33333) - """ - script_txt = hprint.dedent(script_txt) - script_name = "./tmp.coverage.sh" - hio.create_executable_script(script_name, script_txt) - coverage_rc = hsystem.system(script_name) - if coverage_rc != 0: - _LOG.warning( - "Setting `rc` to `0` even though the coverage script fails." - ) - rc = 0 - return rc - - -def _run_tests( - ctx: Any, - test_list_name: str, - stage: str, - version: str, - custom_marker: str, - pytest_opts: str, - skip_pull: bool, - skip_submodules: bool, - coverage: bool, - collect_only: bool, - tee_to_file: bool, - n_threads: str, - git_clean_: bool, - *, - start_coverage_script: bool = False, - allure_dir: Optional[str] = None, - # TODO(Grisha): do we need to expose ctx kwargs to the invoke targets? - # E.g., to `run_fast_tests`. See CmTask3602 "All tests fail". - **ctx_run_kwargs: Any, -) -> Optional[int]: - """ - See params in `run_fast_tests()`. - """ - if git_clean_: - cmd = "invoke git_clean --fix-perms" - hlitauti.run(ctx, cmd) - # Build the command line. - cmd = _build_run_command_line( - test_list_name, - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - allure_dir=allure_dir, - ) - # Execute the command line. - rc = _run_test_cmd( - ctx, - stage, - version, - cmd, - coverage, - collect_only, - skip_pull, - start_coverage_script, - **ctx_run_kwargs, - ) - return rc - - -# TODO(Grisha): "Unit tests run_*_tests invokes" CmTask #1652. -@task -def run_tests( # type: ignore - ctx, - test_lists, - abort_on_first_error=False, - stage="dev", - version="", - custom_marker="", - pytest_opts="", - skip_pull=False, - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, - **kwargs, -): - """ - :param test_lists: comma separated list with test lists to run (e.g., `fast_test,slow_tests`) - :param abort_on_first_error: stop after the first test list failing - """ - results = [] - for test_list_name in test_lists.split(","): - rc = _run_tests( - ctx, - test_list_name, - stage, - version, - custom_marker, - pytest_opts, - skip_pull, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - warn=True, - allure_dir=allure_dir, - **kwargs, - ) - if rc != 0: - _LOG.error("'%s' tests failed", test_list_name) - if abort_on_first_error: - sys.exit(-1) - results.append((test_list_name, rc)) - # - rc = any(result[1] for result in results) - # Summarize the results. - _LOG.info("# Tests run summary:") - for test_list_name, rc in results: - if rc != 0: - _LOG.error("'%s' tests failed", test_list_name) - else: - _LOG.info("'%s' tests succeeded", test_list_name) - return rc - - -def _get_custom_marker( - *, - run_only_test_list: str = "", - skip_test_list: str = "", -) -> str: - """ - Get a custom pytest marker from comma-separated string representations of - test lists to run or skip. - - :param run_only_test_list: a string of comma-separated markers to - run, e.g. `run_only_test_list = - "requires_ck_infra,requires_aws"` - :param skip_test_list: a string of comma-separated markers to skip - :return: custom pytest marker - """ - # If we are running outside the CK server / CI, tests requiring CK infra - # should be automatically skipped. - is_outside_ck_infra = ( - not hserver.is_dev_csfy() and not hserver.is_inside_ci() - ) - # Skip tests that requires CK infra. - if is_outside_ck_infra: - _LOG.warning( - "Skipping the tests that require CK " - "infra when running outside the CK server / CI." - ) - if skip_test_list: - skip_test_list = "requires_ck_infra," + skip_test_list - else: - skip_test_list = "requires_ck_infra" - # Convert string representations of lists to actual lists. - if run_only_test_list: - # This works as expected when there is a single test in the list. - run_only_test_list_items = run_only_test_list.split(",") - _LOG.warning("Running only tests inside %s.", run_only_test_list_items) - else: - run_only_test_list_items = [] - if skip_test_list: - # This works as expected when there is a single test in the list. - skip_test_list_items = skip_test_list.split(",") - _LOG.warning("Skipping the tests inside %s.", skip_test_list_items) - else: - # The list can be empty when running inside CK infra. - skip_test_list_items = [] - # Convert marker strings for `pytest -m` using `and` and `not`. - run_only_marker_string = " and ".join(run_only_test_list_items) - skip_marker_string = " and ".join( - [("not " + item) for item in skip_test_list_items] - ) - if run_only_marker_string: - if skip_marker_string: - custom_marker = run_only_marker_string + " and " + skip_marker_string - else: - custom_marker = run_only_marker_string - else: - custom_marker = skip_marker_string - return custom_marker - - -# TODO(gp): Pass a test_list in fast, slow, ... instead of duplicating all the code CmTask #1571. -@task -def run_fast_tests( # type: ignore - ctx, - stage="dev", - version="", - pytest_opts="", - run_only_test_list="", - skip_test_list="", - skip_pull=False, - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run fast tests. check `gh auth status` before invoking to avoid auth - errors. - - :param stage: select a specific stage for the Docker image - :param pytest_opts: additional options for `pytest` invocation. It can be empty - :param run_only_test_list: select markers to run. Takes comma-separated tokens, - e.g. `--run_only_test_list = requires_ck_infra,requires_aws` - :param skip_test_list: select markers to skip. Takes comma-separated tokens. - :param skip_submodules: ignore all the dir inside a submodule - :param coverage: enable coverage computation - :param collect_only: do not run tests but show what will be executed - :param tee_to_file: save output of pytest in `tmp.pytest.log` - :param n_threads: the number of threads to run the tests with - - "auto": distribute the tests across all the available CPUs - :param git_clean_: run `invoke git_clean --fix-perms` before running the tests - :param allure_dir: directory to save allure results to. If specified, allure - plugin will be installed on-the-fly and results will be generated - and saved to the specified directory - """ - hlitauti.report_task() - hdbg.dassert( - not (run_only_test_list and skip_test_list), - "You can't specify both --run_only_test_list and --skip_test_list", - ) - test_list_name = "fast_tests" - # Convert cmd line marker lists to a pytest marker list. - custom_marker = _get_custom_marker( - run_only_test_list=run_only_test_list, skip_test_list=skip_test_list - ) - rc = _run_tests( - ctx, - test_list_name, - stage, - version, - custom_marker, - pytest_opts, - skip_pull, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir=allure_dir, - ) - return rc - - -@task -def run_slow_tests( # type: ignore - ctx, - stage="dev", - version="", - pytest_opts="", - run_only_test_list="", - skip_test_list="", - skip_pull=False, - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run slow tests. - - Same params as `invoke run_fast_tests`. - """ - hlitauti.report_task() - test_list_name = "slow_tests" - # Convert cmd line marker lists to a pytest marker list. - custom_marker = _get_custom_marker( - run_only_test_list=run_only_test_list, skip_test_list=skip_test_list - ) - rc = _run_tests( - ctx, - test_list_name, - stage, - version, - custom_marker, - pytest_opts, - skip_pull, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir=allure_dir, - ) - return rc - - -@task -def run_superslow_tests( # type: ignore - ctx, - stage="dev", - version="", - pytest_opts="", - run_only_test_list="", - skip_test_list="", - skip_pull=False, - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run superslow tests. - - Same params as `invoke run_fast_tests`. - """ - hlitauti.report_task() - test_list_name = "superslow_tests" - # Convert cmd line marker lists to a pytest marker list. - custom_marker = _get_custom_marker( - run_only_test_list=run_only_test_list, skip_test_list=skip_test_list - ) - rc = _run_tests( - ctx, - test_list_name, - stage, - version, - custom_marker, - pytest_opts, - skip_pull, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir=allure_dir, - ) - return rc - - -@task -def run_fast_slow_tests( # type: ignore - ctx, - abort_on_first_error=False, - stage="dev", - version="", - pytest_opts="", - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run fast and slow tests back-to-back. - - Same params as `invoke run_fast_tests`. - """ - hlitauti.report_task() - # Run fast tests but do not fail on error. - test_lists = "fast_tests,slow_tests" - custom_marker = "" - rc = run_tests( - ctx, - test_lists, - abort_on_first_error, - stage, - version, - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir, - ) - return rc - - -@task -def run_fast_slow_superslow_tests( # type: ignore - ctx, - abort_on_first_error=False, - stage="dev", - version="", - pytest_opts="", - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run fast, slow, superslow tests back-to-back. - - Same params as `invoke run_fast_tests`. - """ - hlitauti.report_task() - # Run fast tests but do not fail on error. - test_lists = "fast_tests,slow_tests,superslow_tests" - custom_marker = "" - rc = run_tests( - ctx, - test_lists, - abort_on_first_error, - stage, - version, - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir, - ) - return rc - - -@task -def run_qa_tests( # type: ignore - ctx, - stage="dev", - version="", -): - """ - Run QA tests independently. - - :param version: version to tag the image and code with - :param stage: select a specific stage for the Docker image - """ - hlitauti.report_task() - # - qa_test_fn = hlitauti.get_default_param("QA_TEST_FUNCTION") - # Run the call back function. - rc = qa_test_fn(ctx, stage, version) - if not rc: - msg = "QA tests failed" - _LOG.error(msg) - raise RuntimeError(msg) - - -# ############################################################################# -# Coverage report -# ############################################################################# - - -def _publish_html_coverage_report_on_s3(aws_profile: str) -> None: - """ - Publish HTML coverage report on S3 so that it can be accessed via browser. - - Target S3 dir is constructed from linux user and Git branch name, e.g. - `s3://...-html/html_coverage/grisha_CmTask1047_fix_tests`. - """ - # Build the dir name from user and branch name. - user = hsystem.get_user_name() - branch_name = hgit.get_branch_name() - _LOG.debug("User='%s', branch_name='%s'", user, branch_name) - s3_html_coverage_dir = f"{user}_{branch_name}" - # Get the full path to the dir. - s3_html_base_dir = "html_coverage" - s3_html_bucket_path = hrecouti.get_repo_config().get_html_bucket_path() - s3_html_coverage_path = os.path.join( - s3_html_bucket_path, s3_html_base_dir, s3_html_coverage_dir - ) - # Copy HTML coverage data from the local dir to S3. - local_coverage_path = "./htmlcov" - # TODO(Nikola): Revert to `s3fs_.put` after `s3fs` is updated to latest - # version. See CmTask #2400. - use_aws_copy = True - if use_aws_copy: - sudo_prefix = "" - if hserver.is_inside_ci(): - # There is no AWS config in GH action, thus create default one from - # chosen profile. To bypass permission errors, `sudo` is used. - sudo_prefix = "sudo " - aws_set_param_cmd = "sudo aws configure set" - aws_set_profile_cmd = f"--profile {aws_profile}" - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - # profile_prefix = aws_profile.upper() - profile_prefix = ( - "CSFY" - if aws_profile.upper() in ["AM", "CK"] - else aws_profile.upper() - ) - # Check if AWS session token is set in environment variable. - if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: - aws_set_value_pairs = [ - f"aws_access_key_id ${profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - f"aws_secret_access_key ${profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - f"aws_session_token ${profile_prefix}_AWS_SESSION_TOKEN", - f"region ${profile_prefix}_AWS_DEFAULT_REGION", - ] - else: - aws_set_value_pairs = [ - f"aws_access_key_id ${profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - f"aws_secret_access_key ${profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - f"region ${profile_prefix}_AWS_DEFAULT_REGION", - ] - aws_config_cmds = [ - f"{aws_set_param_cmd} {aws_set_value_pair} {aws_set_profile_cmd}" - for aws_set_value_pair in aws_set_value_pairs - ] - aws_config_pipe_cmd = " && ".join(aws_config_cmds) - hsystem.system(aws_config_pipe_cmd) - cp_cmd = ( - f"{sudo_prefix}aws s3 cp {local_coverage_path} {s3_html_coverage_path} " - f"--recursive --profile {aws_profile}" - ) - hsystem.system(cp_cmd) - else: - # Use `s3fs` to copy data to AWS S3. - s3fs_ = hs3.get_s3fs(aws_profile) - s3fs_.put(local_coverage_path, s3_html_coverage_path, recursive=True) - _LOG.info( - "HTML coverage report is published on S3: path=`%s`", - s3_html_coverage_path, - ) - - -@task -def run_coverage_report( # type: ignore - ctx, - target_dir, - generate_html_report=False, - publish_html_on_s3=True, - aws_profile="ck", -): - """ - Compute test coverage stats. - - The flow is: - - Run tests and compute coverage stats for each test type - - Combine coverage stats in a single file - - Generate a text report - - Generate a HTML report (optional) - - Post it on S3 (optional) - - :param target_dir: directory to compute coverage stats for. The value '.' - uses all the dirs in the current working directory - :param generate_html_report: whether to generate HTML coverage report or not - :param publish_html_on_s3: whether to publish HTML coverage report or not - :param aws_profile: the AWS profile to use for publishing HTML report - """ - # TODO(Grisha): allow user to specify which tests to run. - # Run fast tests for the target dir and collect coverage results. - fast_tests_cmd = f"invoke run_fast_tests --coverage -p {target_dir}" - hlitauti.run(ctx, fast_tests_cmd, use_system=False) - fast_tests_coverage_file = ".coverage_fast_tests" - create_fast_tests_file_cmd = f"mv .coverage {fast_tests_coverage_file}" - hsystem.system(create_fast_tests_file_cmd) - # Run slow tests for the target dir and collect coverage results. - slow_tests_cmd = f"invoke run_slow_tests --coverage -p {target_dir}" - hlitauti.run(ctx, slow_tests_cmd, use_system=False) - slow_tests_coverage_file = ".coverage_slow_tests" - create_slow_tests_file_cmd = f"mv .coverage {slow_tests_coverage_file}" - hsystem.system(create_slow_tests_file_cmd) - # Check that coverage files are present for both fast and slow tests. - hdbg.dassert_file_exists(fast_tests_coverage_file) - hdbg.dassert_file_exists(slow_tests_coverage_file) - # - report_cmd: List[str] = [] - # Clean the previous coverage results. For some docker-specific reasons - # command which combines stats does not work when being run first in - # the chain `bash -c "cmd1 && cmd2 && cmd3"`. So `erase` command which - # does not affect the coverage results was added as a workaround. - report_cmd.append("coverage erase") - # Merge stats for fast and slow tests into single dir. - report_cmd.append( - f"coverage combine --keep {fast_tests_coverage_file} {slow_tests_coverage_file}" - ) - # Specify the dirs to include and exclude in the report. - exclude_from_report = None - if target_dir == ".": - # Include all dirs. - include_in_report = "*" - if hserver.skip_submodules_test(): - # Exclude submodules. - submodule_paths = hgit.get_submodule_paths() - exclude_from_report = ",".join( - path + "/*" for path in submodule_paths - ) - else: - # Include only the target dir. - include_in_report = f"*/{target_dir}/*" - # Generate text report with the coverage stats. - report_stats_cmd = ( - f"coverage report --include={include_in_report} --sort=Cover" - ) - if exclude_from_report is not None: - report_stats_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_stats_cmd) - if generate_html_report: - # Generate HTML report with the coverage stats. - report_html_cmd = f"coverage html --include={include_in_report}" - if exclude_from_report is not None: - report_html_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_html_cmd) - # Execute commands above one-by-one inside docker. Coverage tool is not - # installed outside docker. - full_report_cmd = " && ".join(report_cmd) - docker_cmd_ = f"invoke docker_cmd --use-bash --cmd '{full_report_cmd}'" - hlitauti.run(ctx, docker_cmd_) - if publish_html_on_s3: - # Publish HTML report on S3. - _publish_html_coverage_report_on_s3(aws_profile) - - -def _get_inclusion_settings(target_dir: str) -> Tuple[str, Optional[str]]: - """ - Determine include/omit glob patterns for the coverage report for both text - and HTML coverage reports. - - :param target_dir: directory for coverage stats; use "." to indicate all directories - :return: glob pattern to include and a comma-separated glob pattern to omit - - Examples: - 1. Cover everything (no submodules to omit): - `_get_inclusion_settings(".")` -> `("*", "")` - - 2. Only cover code under a specific directory: - `_get_inclusion_settings("helpers")` -> `("*/helpers/*", None)` - - In `_run_coverage`: - - To cover the entire repo coverage (e.g. `helpers` project root): - `_get_inclusion_settings(".")` corresponds to - ``` - > coverage report --include=* --sort=Cover - > coverage html --include=* [--omit=submodule1/*,submodule2/*] - ``` - - - To cover a single-directory: - ` _get_inclusion_settings("helpers")` corresponds to: - ``` - > coverage report --include=*/helpers/* --sort=Cover - > coverage html --include=*/helpers/* [--omit=...] - ``` - """ - if target_dir == ".": - include_in_report = "*" - exclude_from_report = "" - if hserver.skip_submodules_test(): - submodule_paths: List[str] = hgit.get_submodule_paths() - exclude_from_report = ",".join( - f"{path}/*" for path in submodule_paths - ) - else: - include_in_report = f"*/{target_dir}/*" - exclude_from_report = None - return include_in_report, exclude_from_report - - -@task -def run_coverage(ctx, suite, target_dir=".", generate_html_report=False): # type: ignore - """ - Task to run coverage for any test suite. - - :param ctx: invoke context - :param suite: suite to run ("fast", "slow", "superslow") - :param target_dir: directory to measure coverage - """ - hdbg.dassert_in(suite, ("fast", "slow", "superslow")) - # Build the command line. - test_cmd_parts = [ - # Invoke the "_tests" task. - "invoke", - f"run_{suite}_tests", - # Enable coverage computation. - "--coverage", - # Specify which directory to test. - "-p", - target_dir, - ] - test_cmd = hlitauti.to_multi_line_cmd(test_cmd_parts) - # Run the tests under coverage. - hlitauti.run(ctx, test_cmd, use_system=False) - hdbg.dassert_file_exists(".coverage") - # Compute which files/dirs to include and omit in the report. - include_in_report, exclude_from_report = _get_inclusion_settings(target_dir) - report_cmd: List[str] = [ - # Reset any previous coverage data to avoid contamination. - "coverage erase" - ] - # Generate a text report, including only our target paths. - report_stats_cmd = ( - f"coverage report --include={include_in_report} --sort=Cover" - ) - if exclude_from_report: - report_stats_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_stats_cmd) - # Produce HTML output for interactive browsing. - if generate_html_report: - report_html_cmd = f"coverage html --include={include_in_report}" - if exclude_from_report: - report_html_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_html_cmd) - # Export XML coverage report to integrate with Codecov. - report_cmd.append("coverage xml -o coverage.xml") - full_report_cmd = " && ".join(report_cmd) - docker_cmd_ = f"invoke docker_cmd --use-bash --cmd '{full_report_cmd}'" - hlitauti.run(ctx, docker_cmd_) - - -@task -def run_coverage_subprocess(ctx, target_dir=".", generate_html_report=False): # type: ignore - """ - Run comprehensive coverage using subprocess mode with hcoverage injection - and direct coverage run. This function runs all tests (fast, slow, - superslow) to generate complete coverage. - - :param ctx: invoke context - :param target_dir: directory to measure coverage - :param generate_html_report: whether to generate HTML coverage - report or not - """ - _LOG.info("Running comprehensive test coverage with subprocess injection...") - # Inject coverage hooks. - hcovera.inject() - try: - # Setup coverage environment for subprocess. - hcovera.coverage_commands_subprocess() - # Clean any existing coverage data. - erase_cmd = "coverage erase" - hsystem.system(erase_cmd, abort_on_error=True) - # Build the coverage command with parallel mode - run all tests. - coverage_cmd = ["coverage", "run", "--parallel-mode", "-m", "pytest"] - # Add target directory. - coverage_cmd.append(target_dir) - test_cmd = hlitauti.to_multi_line_cmd(coverage_cmd) - _LOG.debug("About to run command: {test_cmd}") - # Run tests with coverage tracking directly. - hsystem.system(test_cmd, abort_on_error=True) - # Combine coverage data from subprocesses directly. - hcovera.coverage_combine() - hdbg.dassert_file_exists(".coverage") - include_in_report, exclude_from_report = _get_inclusion_settings( - target_dir - ) - include_in_report = include_in_report.replace("/./", "/").replace( - "//", "/" - ) - report_cmd: List[str] = [] - # Generate a text report, including only our target paths. - report_stats_cmd = ( - f"coverage report --include={include_in_report} --sort=Cover" - ) - if exclude_from_report: - exclude_from_report = exclude_from_report.replace( - "/./", "/" - ).replace("//", "/") - report_stats_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_stats_cmd) - if generate_html_report: - # Generate HTML report with the coverage stats. - report_html_cmd = f"coverage html --include={include_in_report}" - if exclude_from_report: - report_html_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_html_cmd) - # Export XML coverage report to integrate with Codecov. - report_cmd.append("coverage xml -o coverage.xml") - full_report_cmd = " && ".join(report_cmd) - # Run coverage report commands directly (avoid Docker-in-Docker issues). - hsystem.system(full_report_cmd, abort_on_error=True) - except Exception as e: - _LOG.error("Coverage with subprocess failed: %s", e) - raise - finally: - # Always cleanup coverage hooks. - hcovera.remove() - - -# ############################################################################# -# Traceback. -# ############################################################################# - - -# TODO(gp): Consolidate the code from dev_scripts_helpers/testing here. - - -@task -def traceback(ctx, log_name="tmp.pytest_script.txt", purify=True): # type: ignore - """ - Parse the traceback from Pytest and navigate it with vim. - - ``` - # Run a unit test. - > pytest helpers/test/test_traceback.py 2>&1 | tee tmp.pytest.log - > pytest.sh helpers/test/test_traceback.py - # Parse the traceback - > invoke traceback -i tmp.pytest.log - ``` - - :param log_name: the file with the traceback - :param purify: purify the filenames from client (e.g., from running inside Docker) - """ - hlitauti.report_task() - # - dst_cfile = "cfile" - hio.delete_file(dst_cfile) - # Convert the traceback into a cfile. - cmd = [] - cmd.append("traceback_to_cfile.py") - if log_name: - cmd.append(f"-i {log_name}") - cmd.append(f"-o {dst_cfile}") - # Purify the file names. - if purify: - cmd.append("--purify_from_client") - else: - cmd.append("--no_purify_from_client") - cmd = " ".join(cmd) - hlitauti.run(ctx, cmd) - # Read and navigate the cfile with vim. - if os.path.exists(dst_cfile): - cmd = 'vim -c "cfile cfile"' - hlitauti.run(ctx, cmd, pty=True) - else: - _LOG.warning("Can't find %s", dst_cfile) - - -# ############################################################################# -# pytest_clean -# ############################################################################# - - -@task -def pytest_clean(ctx): # type: ignore - """ - Clean pytest artifacts. - """ - hlitauti.report_task() - _ = ctx - import helpers.hpytest as hpytest - - hpytest.pytest_clean(".") - - -# ############################################################################# -# pytest_repro -# ############################################################################# - - -def _get_failed_tests_from_file(file_name: str) -> List[str]: - hdbg.dassert_file_exists(file_name) - txt = hio.from_file(file_name) - if file_name.endswith("/cache/lastfailed"): - # Decode the json-style string. - # { - # "vendors/test/test_vendors.py::Test_gp::test1": true, - # "vendors/test/test_vendors.py::Test_kibot_utils1::...": true, - # } - vals = json.loads(txt) - hdbg.dassert_isinstance(vals, dict) - tests = [k for k, v in vals.items() if v] - else: - # Extract failed tests from the regular text output. - tests = re.findall(r"FAILED (\S+\.py::\S+::\S+)\b", txt) - return tests - - -@task -def pytest_repro( # type: ignore - ctx, - mode="tests", - file_name="./.pytest_cache/v/cache/lastfailed", - show_stacktrace=False, - create_script=True, - script_name="./tmp.pytest_repro.sh", -): - """ - Generate commands to reproduce the failed tests after a `pytest` run. - - The workflow is: - ``` - # Run a lot of tests, e.g., the entire regression suite. - server> i run_fast_slow_tests 2>&1 | log pytest.txt - docker> pytest ... 2>&1 | log pytest.txt - - # Run the `pytest_repro` to summarize test failures and to generate - # commands to reproduce them. - server> i pytest_repro - ``` - - :param mode: the granularity level for generating the commands - - "tests" (default): failed test methods, e.g., - ``` - pytest helpers/test/test_cache.py::TestCachingOnS3::test_with_caching1 - pytest helpers/test/test_cache.py::TestCachingOnS3::test_with_caching2 - ``` - - "classes": classes of the failed tests, e.g., - ``` - pytest helpers/test/test_cache.py::TestCachingOnS3 - pytest helpers/test/test_cache.py::TestCachingOnS3_2 - ``` - - "files": files with the failed tests, e.g., - :param file_name: the name of the file containing the pytest output file to parse - :param show_stacktrace: whether to show the stacktrace of the failed tests - - only if it is available in the pytest output file - :param create_script: create a script to run the tests - :return: commands to reproduce pytest failures at the requested granularity level - """ - hlitauti.report_task() - _ = ctx - # Read file. - _LOG.info("Reading file_name='%s'", file_name) - hdbg.dassert_file_exists(file_name) - _LOG.info("Reading failed tests from file '%s'", file_name) - # E.g., vendors/test/test_vendors.py::Test_gp::test1 - tests = _get_failed_tests_from_file(file_name) - if len(tests) == 0: - _LOG.info("Found 0 failed tests") - return "" - _LOG.debug("tests=%s", str(tests)) - # Process the tests. - targets = [] - for test in tests: - data = test.split("::") - hdbg.dassert_lte(len(data), 3, "Can't parse '%s'", test) - # E.g., dev_scripts/testing/test/test_run_tests.py - # E.g., helpers/test/helpers/test/test_list.py::Test_list_1 - # E.g., core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5 - test_file_name = test_class = test_method = "" - if len(data) >= 1: - test_file_name = data[0] - if len(data) >= 2: - test_class = data[1] - if len(data) >= 3: - test_method = data[2] - _LOG.debug( - "test=%s -> (%s, %s, %s)", - test, - test_file_name, - test_class, - test_method, - ) - if mode == "tests": - targets.append(test) - elif mode == "files": - if test_file_name != "": - targets.append(test_file_name) - else: - _LOG.warning( - "Skipping test='%s' since test_file_name='%s'", - test, - test_file_name, - ) - elif mode == "classes": - if test_file_name != "" and test_class != "": - targets.append(f"{test_file_name}::{test_class}") - else: - _LOG.warning( - "Skipping test='%s' since test_file_name='%s', test_class='%s'", - test, - test_file_name, - test_class, - ) - else: - hdbg.dfatal(f"Invalid mode='{mode}'") - # Package the output. - # targets is a list of tests in the format - # `helpers/test/test_env.py::Test_env1::test_get_system_signature1`. - hdbg.dassert_isinstance(targets, list) - targets = hlist.remove_duplicates(targets) - targets = sorted(targets) - failed_test_output_str = ( - f"Found {len(targets)} failed pytest '{mode}' target(s); " - "to reproduce run:\n" - ) - res = [f"pytest {t}" for t in targets] - res = "\n".join(res) - failed_test_output_str += res - # - if show_stacktrace: - # Get the stacktrace block from the pytest output. - txt = hio.from_file(file_name) - if ( - "====== FAILURES ======" in txt - and "====== slowest 3 durations ======" in txt - ): - failures_blocks = txt.split("====== FAILURES ======")[1:] - failures_blocks = [ - x.split("====== slowest 3 durations ======")[0] - for x in failures_blocks - ] - txt = "\n".join([x.rstrip("=").lstrip("=") for x in failures_blocks]) - # Get the classes and names of the failed tests, e.g. - # "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5" -> - # -> "TestSmaModel.test5". - failed_test_names = [ - test.split("::")[1] + "." + test.split("::")[2] for test in tests - ] - tracebacks = [] - for name in failed_test_names: - # Get the stacktrace for the individual test failure. - # Its start is marked with the name of the test, e.g. - # "___________________ TestSmaModel.test5 ___________________". - start_block = "__ " + name + " __" - traceback_block = txt.rsplit(start_block, maxsplit=1)[-1] - end_block_options = [ - "__ " + n + " __" for n in failed_test_names if n != name - ] - for end_block in end_block_options: - # The end of the traceback for the current failed test is the - # start of the traceback for the next failed test. - if end_block in traceback_block: - traceback_block = traceback_block.split(end_block)[0] - _, traceback_ = htraceb.parse_traceback( - traceback_block, purify_from_client=False - ) - traceback_text = ( - traceback_.strip() if traceback_ is not None else "" - ) - tracebacks.append("\n".join(["# " + name, traceback_text, ""])) - # Combine the stacktraces for all the failures. - full_traceback = "\n\n" + "\n".join(tracebacks) - failed_test_output_str += full_traceback - res += full_traceback - _LOG.info("%s", failed_test_output_str) - if create_script: - # pytest \ - # amp/oms/test/test_portfolio.py::TestDatabasePortfolio2::test1 \ - # ... - # $* - script_txt = [] - # pytest or pytest_log - script_txt.append("pytest_log \\") - script_txt.extend([f" {t} \\" for t in targets]) - script_txt.append(" $*") - script_txt = "\n".join(script_txt) - msg = "To run the tests" - hio.create_executable_script(script_name, script_txt, msg=msg) - return res - - -# ############################################################################# -# pytest_rename_test -# ############################################################################# - - -@task -def pytest_rename_test(ctx, old_test_class_name, new_test_class_name): # type: ignore - """ - Rename the test and move its golden outcome. - - E.g., to rename a test class and all the test methods: - - :param old_test_class_name: old class name - :param new_test_class_name: new class name - """ - hlitauti.report_task() - _ = ctx - root_dir = os.getcwd() - # `lib_tasks` is used from outside the Docker container in the thin dev - # environment and we want to avoid pulling in too many dependencies, unless - # necessary, so we import dynamically. - import helpers.hunit_test_utils as hunteuti - - renamer = hunteuti.UnitTestRenamer( - old_test_class_name, new_test_class_name, root_dir - ) - renamer.run() - - -# ############################################################################# -# pytest_find_ununsed_goldens -# ############################################################################# - - -@task -def pytest_find_unused_goldens( # type: ignore - ctx, - dir_name=".", - stage="prod", - version="", - out_file_name="pytest_find_unused_goldens.output.txt", -): - """ - Detect mismatches between tests and their golden outcome files. - - - When goldens are required by the tests but the corresponding files - do not exist - - When the existing golden files are not actually required by the - corresponding tests - - :param dir_name: the head dir to start the check from - """ - hlitauti.report_task() - # Remove the log file. - if os.path.exists(out_file_name): - cmd = f"rm {out_file_name}" - hlitauti.run(ctx, cmd) - # Prepare the command line. - amp_abs_path = hgit.get_amp_abs_path() - amp_path = amp_abs_path.replace( - os.path.commonpath([os.getcwd(), amp_abs_path]), "" - ) - script_path = os.path.join( - amp_path, "dev_scripts/find_unused_golden_files.py" - ).lstrip("/") - docker_cmd_opts = [f"--dir_name {dir_name}"] - docker_cmd_ = f"{script_path} " + hlitauti._to_single_line_cmd( - docker_cmd_opts - ) - # Execute command line. - base_image = "" - cmd = hlitalin._get_lint_docker_cmd(base_image, docker_cmd_, stage, version) - cmd = f"({cmd}) 2>&1 | tee -a {out_file_name}" - # Run. - hlitauti.run(ctx, cmd) - - -# ############################################################################# -# pytest_compare_logs -# ############################################################################# - - -def _purify_log_file( - file_name: str, remove_line_numbers: bool, grep_regex: str -) -> str: - txt = hio.from_file(file_name) - # Remove leading `16:34:27`. - txt = re.sub(r"^\d\d:\d\d:\d\d ", "", txt, flags=re.MULTILINE) - # Remove references like `at 0x7f43493442e0`. - txt = re.sub(r"at 0x\S{12}", "at 0x", txt, flags=re.MULTILINE) - # Remove `done (0.014 s)`. - txt = re.sub(r"(done) \(\d+\.\d+ s\)", "\\1", txt, flags=re.MULTILINE) - # Remove wall_clock_time='2022-06-17 04:36:56.062645-04:00'. - txt = re.sub(r"(wall_clock_time=)'.*'", "\\1", txt, flags=re.MULTILINE) - # Remove `real_wall_clock_time = '2022-06-17 04:33:19.946025-04:00'`. - txt = re.sub(r"(real_wall_clock_time=)'.*'", "\\1", txt, flags=re.MULTILINE) - # Remove `tqdm [00:00<00:00, 4.05it/s]`. - txt = re.sub(r"(htqdm.py.*)\[.*\]", "\\1", txt, flags=re.MULTILINE) - # Remove `Task-3`. - txt = re.sub(r"(Task-)\d+", "\\1", txt, flags=re.MULTILINE) - # Remove line number, e.g., - # `htqdm.py abstract_market_data.py get_data_for_interval:259` - if remove_line_numbers: - txt = re.sub( - r"(\.py [a-zA-Z_][a-zA-Z0-9_]*):\d+ ", - "\\1:0 ", - txt, - flags=re.MULTILINE, - ) - # - if grep_regex: - lines = [] - for line in txt.split("\n"): - if re.search(grep_regex, line): - lines.append(line) - txt = "\n".join(lines) - return txt - - -@task -def pytest_compare_logs( # type: ignore - ctx, file1, file2, remove_line_numbers=False, grep_regex="", dry_run=False -): - """ - Diff two log files removing the irrelevant parts (e.g., timestamps, object - pointers). - - :param remove_line_numbers: remove line numbers from function calls - (e.g., `abstract_market_data.py get_data_for_interval:259` - :param grep_regex: select lines based on a regex - """ - suffix = "tmp" - # - txt = _purify_log_file(file1, remove_line_numbers, grep_regex) - file1_tmp = hio.add_suffix_to_filename(file1, suffix) - hio.to_file(file1_tmp, txt) - # - txt = _purify_log_file(file2, remove_line_numbers, grep_regex) - file2_tmp = hio.add_suffix_to_filename(file2, suffix) - hio.to_file(file2_tmp, txt) - # Save the script to compare. - script_file_name = "./tmp.vimdiff_log.sh" - script_txt = f"vimdiff {file1_tmp} {file2_tmp}" - msg = "To diff run:" - hio.create_executable_script(script_file_name, script_txt, msg=msg) - hlitauti.run(ctx, script_file_name, dry_run=dry_run, pty=True) - - -# ############################################################################# -# pytest_buildmeister -# ############################################################################# - - -def _run( - cmd: str, - *, - abort_on_error: bool = False, - output_file: Optional[str] = None, - tee: bool = False, -) -> int: - rc = hsystem.system( - cmd, - abort_on_error=abort_on_error, - suppress_output=False, - log_level="echo_frame", - output_file=output_file, - tee=tee, - ) - return rc - - -def _get_invoke_cmd_line(target: str, opts: str, pytest_opts: str) -> str: - """ - - :param opts: options to pass to invoke - """ - cmd = ["invoke"] - cmd.append(target) - if opts: - cmd.append(opts) - if pytest_opts: - cmd.append("--pytest-opts " + pytest_opts) - cmd.append("2>&1") - return " ".join(cmd) - - -def _run_cmd_and_tg(cmd: str, *args: Any, **kwargs: Any) -> None: - rc = _run(cmd, *args, **kwargs) - if rc != 0: - # pytest returns 5, if there are no tests to run. - # On error, send Telegram message. - cmd = "tg.py" - _run(cmd, abort_on_error=False) - - -@task -def pytest_buildmeister_check(ctx, print_output=False): # type: ignore - """ - - :param print_output: print content of the file with the output of the - buildmeister run - """ - _ = ctx - # Concat the files generated by `invoke pytest_...` - log_file = "bm.log.txt" - if os.path.exists(log_file): - cmd = f"rm -rf {log_file}" - _run(cmd) - log_file = "bm.log.txt" - cmd = 'cat $(find . -name "bm.log*.txt" | sort) >' + log_file - _run(cmd) - # - if print_output: - print(hprint.frame("Print output")) - cmd = f"cat {log_file}" - _run(cmd) - # Report failures using `invoke pytest_repro`. - print(hprint.frame("Failures")) - # "> sudo -u sasm rm ./tmp.pytest_repro.sh; i pytest_repro -f {log_file}" - if os.path.exists("./tmp.pytest_repro.sh"): - cmd = "sudo -u sasm rm ./tmp.pytest_repro.sh" - _run(cmd) - # - cmd = f"invoke pytest_repro -f {log_file}" - _run(cmd) - # Report failures using `grep`. - print(hprint.frame("grep Failures")) - cmd = f"grep '^FAILED' {log_file} | sort" - _run(cmd) - - -@task -def pytest_buildmeister( # type: ignore - ctx, opts="", pytest_opts="", docker_clean=False, test=False -): - """ - Run the regression tests. - - - Run updating all the tests - - :param docker_clean: remove all dead Docker instances - :param opts: options to pass to the invoke (e.g., `--version 1.2.0` to test - a specific version of the Docker container) - :param pytest_opts: options to pass to pytest - :param test: just run a single quick test to verify functionality of this - script - """ - _ = ctx - if test: - # For testing. - pytest_opts = "amp/dataflow/backtest/test/test_dataflow_backtest_utils.py::Test_get_configs_from_command_line_Amp1::test1" - if docker_clean: - cmd = "dev_scripts_lime/docker_clean.sh" - _run(cmd) - # Clean and sync. - cmd = "invoke git_clean -f" - _run(cmd) - # - cmd = "invoke git_pull" - _run(cmd) - # - log_file = "bm.log*txt" - if os.path.exists(log_file): - cmd = f"rm -rf {log_file}" - _run(cmd) - # - files_to_merge = [] - # - target = "run_fast_tests" - cmd = _get_invoke_cmd_line(target, opts, pytest_opts) - log_file = f"bm.log.{target}.txt" - files_to_merge.append(log_file) - cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" - cmd = f"bash -c '{cmd}'" - _run_cmd_and_tg(cmd) - # - cmd = "invoke fix_perms" - hsystem.system(cmd) - # - target = "run_slow_tests" - cmd = _get_invoke_cmd_line(target, opts, pytest_opts) - log_file = f"bm.log.{target}.txt" - files_to_merge.append(log_file) - cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" - cmd = f"bash -c '{cmd}'" - _run_cmd_and_tg(cmd) - # - cmd = "invoke fix_perms" - _run(cmd) - # - target = "run_superslow_tests" - log_file = f"bm.log.{target}.txt" - files_to_merge.append(log_file) - cmd = _get_invoke_cmd_line(target, opts, pytest_opts) - cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" - cmd = f"bash -c '{cmd}'" - _run_cmd_and_tg(cmd) - # - pytest_buildmeister_check(ctx) - - -# ############################################################################# -# pytest_collect_only -# ############################################################################# - - -@task -def pytest_collect_only(ctx): # type: ignore - _ = ctx - cmd = 'invoke docker_cmd --cmd "pytest --collect-only 2>&1"' - hsystem.system(cmd, suppress_output=False) - - -# ############################################################################# -# pytest_add_untracked_golden_outcomes -# ############################################################################# - - -@task -def pytest_add_untracked_golden_outcomes(ctx): # type: ignore - """ - Add the golden outcomes files that are not tracked under git. - """ - _ = ctx - cmd = 'git add $(git ls-files . --exclude-standard --others | grep "output" | grep -v tmp)' - hsystem.system(cmd, suppress_output=False) - - -# ############################################################################# -# pytest_failed -# ############################################################################# - - -def _parse_failed_tests( - txt: str, only_file: bool, only_class: bool -) -> Tuple[List[str], int, int]: - """ - Parse the failed tests from the pytest output. - - :param only_file: return only the file name - :param only_class: return only the class name - :return: - - failed_tests: list of failed tests - - num_failed: number of failed tests - - num_passed: number of passed tests - """ - hdbg.dassert_lte(only_file + only_class, 1) - failed_tests = [] - num_failed = num_passed = 0 - for line in txt.split("\n"): - # Remove non printable characters. - line = re.sub(r"[^\x20-\x7E]", "", line) - # FAILED oms/broker/ccxt/test/test_ccxt_execution_quality.py::Test_compute_adj_fill_ecdfs::test3 - RuntimeError: - m = re.search(r"^(FAILED|ERROR) (\S+) -", line) - if m: - test_name = m.group(2) - _LOG.debug("line=%s ->\n\ttest_name='%s'", line, test_name) - failed_tests.append(test_name) - # helpers_root/helpers/test/test_hserver.py::Test_hserver1::test_gp1 (0.00 s) PASSED [ 36%] - m = re.search(r"(\S+) \(\S+ s\) (FAILED|ERROR)", line) - if m: - test_name = m.group(1) - _LOG.debug("line=%s ->\n\ttest_name='%s'", line, test_name) - failed_tests.append(test_name) - # ============ 11 failed, 917 passed, 113 skipped in 64.57s (0:01:04) ============ - # ======================== 4 failed, 43 passed in 40.48s ========================= - m = re.search(r"=+\s+(\d+)\s+failed,\s+(\d+)\s+passed.*", line) - if m: - num_failed = int(m.group(1)) - num_passed = int(m.group(2)) - failed_tests = sorted(list(set(failed_tests))) - # - if num_failed and num_passed and num_failed != len(failed_tests): - _LOG.warning( - "n_failed=%s len(failed_tests)=%s", num_failed, len(failed_tests) - ) - print(f"Failed tests: {num_failed}/{num_passed}") - # Filter, if needed. - if only_file or only_class: - failed_tests_tmp = [] - for test in failed_tests: - # oms/broker/ccxt/test/test_ccxt_execution_quality.py::Test_compute_adj_fill_ecdfs::test3 - m = re.match(r"(\S+)::(\S+)::\S+$", test) - hdbg.dassert(m, f"Can't parse '{test}'") - if only_file: - failed_tests_tmp.append(m.group(1)) - elif only_class: - failed_tests_tmp.append(m.group(1) + "::" + m.group(2)) - else: - raise RuntimeError("Unexpected") - failed_tests = sorted(list(set(failed_tests_tmp))) - return failed_tests, num_failed, num_passed - - -@task -def pytest_failed( - ctx, only_file=False, only_class=False, file_name="tmp.pytest_script.txt" -): # type: ignore - _ = ctx - hlitauti.report_task() - # Read file. - txt = hio.from_file(file_name) - # Extract info. - failed_tests, _, _ = _parse_failed_tests(txt, only_file, only_class) - print("\n".join(failed_tests)) - # Write the repro in a file. - repro_file_name = "tmp.pytest_failed.sh" - repro_txt = "pytest_log " + " ".join(failed_tests) + " $*" - hio.to_file(repro_file_name, repro_txt) - # - hio.create_executable_script(repro_file_name, repro_txt) - _LOG.warning("To run the failed tests run: %s", repro_file_name) - # Save to clipboard. - txt = " ".join(failed_tests) - hsystem.to_pbcopy(txt, pbcopy=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py deleted file mode 100644 index 8039a1b07..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py +++ /dev/null @@ -1,395 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_utils as hlitauti -""" - -import datetime -import glob -import logging -import os -import pprint -import re -import sys -from typing import Any, Dict, List, Optional, Union - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hversion as hversio - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Default params. -# ############################################################################# - -# This is used to inject the default params. -# TODO(gp): Using a singleton here is not elegant but simple. -_DEFAULT_PARAMS = {} - - -def set_default_params(params: Dict[str, Any]) -> None: - global _DEFAULT_PARAMS - _DEFAULT_PARAMS = params - _LOG.debug("Assigning:\n%s", pprint.pformat(params)) - - -def has_default_param(key: str) -> bool: - hdbg.dassert_isinstance(key, str) - return key in _DEFAULT_PARAMS - - -def get_default_param(key: str, *, override_value: Any = None) -> Any: - """ - Return the value from the default parameters dictionary, optionally - overriding it. - """ - hdbg.dassert_isinstance(key, str) - value = None - if has_default_param(key): - value = _DEFAULT_PARAMS[key] - if override_value: - _LOG.info("Overriding value %s with %s", value, override_value) - value = override_value - hdbg.dassert_is_not( - value, None, "key='%s' not defined from %s", key, _DEFAULT_PARAMS - ) - return value - - -def reset_default_params() -> None: - params: Dict[str, Any] = {} - set_default_params(params) - - -# ############################################################################# -# Utils. -# ############################################################################# - - -def parse_command_line() -> None: - # Since it's not easy to add global command line options to invoke, we - # piggy back the option that already exists. - # If one uses the debug option for `invoke` we turn off the code - # debugging. - # TODO(gp): Check http://docs.pyinvoke.org/en/1.0/concepts/library.html# - # modifying-core-parser-arguments - if ("-d" in sys.argv) or ("--debug" in sys.argv): - verbosity = logging.DEBUG - else: - verbosity = logging.INFO - # Suppress command line logging if only_print_files is requested. - report_command_line = "--only-print-files" not in sys.argv - hdbg.init_logger(verbosity=verbosity, report_command_line=report_command_line) - - -# NOTE: We need to use a `# type: ignore` for all the @task functions because -# pyinvoke infers the argument type from the code and mypy annotations confuse -# it (see https://github.com/pyinvoke/invoke/issues/357). - -# In the following, when using `lru_cache`, we use functions from `hsyste` -# instead of `ctx.run()` since otherwise `lru_cache` would cache `ctx`. - -# We prefer not to cache functions running `git` to avoid stale values if we -# call git (e.g., if we cache Git hash and then we do a `git pull`). - -# pyinvoke `ctx.run()` is useful for unit testing, since it allows to: -# - mock the result of a system call -# - register the issued command line (to create the expected outcome of a test) -# On the other side `system_interaction.py` contains many utilities that make -# it easy to interact with the system. -# Once AmpPart1347 is implemented we can replace all the `ctx.run()` with calls -# to `system_interaction.py`. - - -_WAS_FIRST_CALL_DONE = False - - -# TODO(gp): This can be part of the @task -def report_task(txt: str = "", container_dir_name: str = ".") -> None: - """ - Print the task description. - - Each task should call this function at the beginning to print the - task name. - """ - # On the first invocation check the version of the container. - global _WAS_FIRST_CALL_DONE - if not _WAS_FIRST_CALL_DONE: - _WAS_FIRST_CALL_DONE = True - hversio.check_version(container_dir_name) - # Print the name of the function. - msg = hprint.func_signature_to_str( - skip_vars="ctx", assert_on_skip_vars_error=False, frame_level=3 - ) - print(hprint.color_highlight(msg, color="purple")) - - -# TODO(gp): Move this to helpers.system_interaction and allow to add the switch -# globally. -def _to_single_line_cmd(cmd: Union[str, List[str]]) -> str: - """ - Convert a multiline command (as a string or list of strings) into a single - line. - - E.g., convert - ``` - IMAGE=.../amp:dev \ - docker-compose \ - --file devops/compose/tmp.docker-compose.yml \ - --file devops/compose/tmp.docker-compose_as_submodule.yml \ - --env-file devops/env/default.env - ``` - into - ``` - IMAGE=.../amp:dev docker-compose --file ... - ``` - """ - if isinstance(cmd, list): - cmd = " ".join(cmd) - hdbg.dassert_isinstance(cmd, str) - cmd = cmd.rstrip().lstrip() - # Remove `\` at the end of the line. - cmd = re.sub(r" \\\s*$", " ", cmd, flags=re.MULTILINE) - # Use a single space between words in the command. - # TODO(gp): This is a bit dangerous if there are multiple spaces in a string - # that for some reason are meaningful. - cmd = " ".join(cmd.split()) - return cmd - - -def to_multi_line_cmd(docker_cmd_: List[str]) -> str: - r""" - Convert a command encoded as a list of strings into a single command - separated by `\`. - - E.g., convert - ``` - ['IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev', - '\n docker-compose', - '\n --file amp/devops/compose/tmp.docker-compose.yml', - '\n --file amp/devops/compose/tmp.docker-compose_as_submodule.yml', - '\n --env-file devops/env/default.env'] - ``` - into - ``` - IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ - docker-compose \ - --file devops/compose/tmp.docker-compose.yml \ - --file devops/compose/tmp.docker-compose_as_submodule.yml \ - --env-file devops/env/default.env - ``` - """ - # Expand all strings into single lines. - _LOG.debug("docker_cmd=%s", docker_cmd_) - docker_cmd_tmp = [] - for dc in docker_cmd_: - # Add a `\` at the end of each string. - hdbg.dassert(not dc.endswith("\\"), "dc='%s'", dc) - dc += " \\" - docker_cmd_tmp.extend(dc.split("\n")) - docker_cmd_ = docker_cmd_tmp - # Remove empty lines. - docker_cmd_ = [cmd for cmd in docker_cmd_ if cmd.rstrip().lstrip() != ""] - # Package the command. - result = "\n".join(docker_cmd_) - # Remove a `\` at the end, since it is not needed. - result = result.rstrip("\\") - _LOG.debug("docker_cmd=%s", result) - return result - - -# TODO(gp): Pass through command line using a global switch or an env var. -use_one_line_cmd = False - - -def run( - ctx: Any, - cmd: str, - *args: Any, - dry_run: bool = False, - use_system: bool = False, - print_cmd: bool = False, - **ctx_run_kwargs: Any, -) -> Optional[int]: - cmd = hprint.dedent(cmd) - _LOG.debug(hprint.to_str("cmd dry_run")) - if use_one_line_cmd: - cmd = _to_single_line_cmd(cmd) - _LOG.debug("cmd=%s", cmd) - if dry_run: - print(f"Dry-run: > {cmd}") - _LOG.warning("Skipping execution of '%s'", cmd) - res = None - else: - if print_cmd: - print(f"> {cmd}") - if use_system: - # TODO(gp): Consider using only `hsystem.system()` since it's more - # reliable. - res = hsystem.system(cmd, suppress_output=False) - else: - result = ctx.run(cmd, *args, **ctx_run_kwargs) - res = result.return_code - return res - - -# TODO(gp): -> system_interaction.py ? -def _to_pbcopy(txt: str, pbcopy: bool) -> None: - """ - Save the content of txt in the system clipboard. - """ - txt = txt.rstrip("\n") - if not pbcopy: - print(txt) - return - if not txt: - print("Nothing to copy") - return - if hserver.is_host_mac(): - # -n = no new line - cmd = f"echo -n '{txt}' | pbcopy" - hsystem.system(cmd) - print(f"\n# Copied to system clipboard:\n{txt}") - else: - _LOG.warning("pbcopy works only on macOS") - print(txt) - - -def _filter_existing_paths(paths_from_user: List[str]) -> List[str]: - """ - Filter out the paths to non-existent files. - - :param paths_from_user: paths passed by user - :return: existing paths - """ - paths = [] - for user_path in paths_from_user: - if user_path.endswith("/*"): - # Get the files according to the "*" pattern. - dir_files = glob.glob(user_path) - if dir_files: - # Check whether the pattern matches files. - paths.extend(dir_files) - else: - _LOG.error( - ( - "'%s' pattern doesn't match any files: " - "the directory is empty or path does not exist" - ), - user_path, - ) - elif os.path.exists(user_path): - paths.append(user_path) - else: - _LOG.error("'%s' does not exist", user_path) - return paths - - -# TODO(gp): We should factor out the meaning of the params in a string and add it -# to all the tasks' help. -def _get_files_to_process( - modified: bool, - branch: bool, - last_commit: bool, - # TODO(gp): Pass abs_dir, instead of `all_` and remove the calls from the - # outer clients. - all_: bool, - files_from_user: str, - mutually_exclusive: bool, - remove_dirs: bool, -) -> List[str]: - """ - Get a list of files to process. - - The files are selected based on the switches: - - `branch`: changed in the branch - - `modified`: changed in the client (both staged and modified) - - `last_commit`: part of the previous commit - - `all`: all the files in the repo - - `files_from_user`: passed by the user - - :param modified: return files modified in the client (i.e., changed with - respect to HEAD) - :param branch: return files modified with respect to the branch point - :param last_commit: return files part of the previous commit - :param all: return all repo files - :param files_from_user: return files passed to this function - :param mutually_exclusive: ensure that all options are mutually exclusive - :param remove_dirs: whether directories should be processed - :return: paths to process - """ - _LOG.debug( - hprint.to_str( - "modified branch last_commit all_ files_from_user " - "mutually_exclusive remove_dirs" - ) - ) - if mutually_exclusive: - # All the options are mutually exclusive. - hdbg.dassert_eq( - int(modified) - + int(branch) - + int(last_commit) - + int(all_) - + int(len(files_from_user) > 0), - 1, - msg="Specify only one among --modified, --branch, --last-commit, " - "--all_files, and --files", - ) - else: - # We filter the files passed from the user through other the options, - # so only the filtering options need to be mutually exclusive. - hdbg.dassert_eq( - int(modified) + int(branch) + int(last_commit) + int(all_), - 1, - msg="Specify only one among --modified, --branch, --last-commit", - ) - dir_name = "." - if modified: - files = hgit.get_modified_files(dir_name) - elif branch: - files = hgit.get_modified_files_in_branch("master", dir_name) - elif last_commit: - files = hgit.get_previous_committed_files(dir_name) - elif all_: - pattern = "*" - only_files = True - use_relative_paths = True - files = hio.listdir(dir_name, pattern, only_files, use_relative_paths) - if files_from_user: - # If files were passed, filter out non-existent paths. - files = _filter_existing_paths(files_from_user.split()) - # Convert into a list. - hdbg.dassert_isinstance(files, list) - files_to_process = [f for f in files if f != ""] - # We need to remove `amp` to avoid copying the entire tree. - files_to_process = [f for f in files_to_process if f != "amp"] - _LOG.debug("files_to_process='%s'", str(files_to_process)) - # Remove dirs, if needed. - if remove_dirs: - files_to_process = hsystem.remove_dirs(files_to_process) - _LOG.debug("files_to_process='%s'", str(files_to_process)) - # Ensure that there are files to process. - if not files_to_process: - _LOG.warning("No files were selected") - return files_to_process - - -# Copied from helpers.datetime_ to avoid dependency from pandas. - - -def get_ET_timestamp() -> str: - # The timezone depends on how the shell is configured. - timestamp = datetime.datetime.now() - return timestamp.strftime("%Y%m%d_%H%M%S") - - -# End copy. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py deleted file mode 100644 index 631a68e5f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python - -""" -Import as: - -import helpers.logging_testing.logging_main as hlteloma -""" - -import logging -import sys -from typing import Union - -import helpers.hlogging as hloggin - -_LOG = logging.getLogger(__name__) -print(f"_LOG={_LOG}") - - -def install_basic_formatter() -> None: - # The output looks like - # ``` - # DEBUG:__main__: message - # ``` - logging.basicConfig() - - -def _install_formatter( - formatter: Union[hloggin.CustomFormatter, logging.Formatter], -) -> None: - root_logger_ = logging.getLogger() - ch = logging.StreamHandler(sys.stdout) - ch.setFormatter(formatter) - root_logger_.addHandler(ch) - - -def install_current_formatter() -> None: - date_fmt = "%m-%d_%H:%M" - log_format = ( - # 04-28_08:08 INFO : - "%(asctime)-5s %(levelname)-5s" - ) - log_format += ( - # lib_tasks _delete_branches - " %(module)-20s: %(funcName)-30s:" - # 142: ... - " %(lineno)-4d:" - " %(message)s" - ) - formatter = logging.Formatter(log_format, datefmt=date_fmt) - # - _install_formatter(formatter) - - -def install_custom_formatter() -> None: - formatter = hloggin.CustomFormatter() - _install_formatter(formatter) - - -if __name__ == "__main__": - # - print("\n# Installing formatter") - # install_basic_formatter() - # install_current_formatter() - install_custom_formatter() - # - print("\n# Loggers before setLevel") - root_logger = logging.getLogger() - print(f"root_logger={root_logger}") - # Show the loggers that have registered. - print(f"loggers={hloggin.get_all_loggers()}") - # - verbosity = logging.DEBUG - # verbosity = logging.ERROR - print(f"\n# Loggers after setLevel {verbosity}") - root_logger.setLevel(verbosity) - # Setting the verbosity for the root logger sets the verbosity for all the - # children ones. - print(f"root_logger={root_logger}") - print(f"loggers={hloggin.get_all_loggers()}") - # - hloggin.test_logger() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py deleted file mode 100644 index ad88346fe..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -Import as: - -import helpers.logging_testing.logging_module as hltelomo -""" - -import logging - -_LOG = logging.getLogger(__name__) -print(f"_LOG={_LOG}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py deleted file mode 100644 index 5b0445a31..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py +++ /dev/null @@ -1,17 +0,0 @@ -import pathlib -from typing import Any, Optional - - -def pytest_ignore_collect( # type: ignore - collection_path: pathlib.Path, path: Any, config: Any -) -> Optional[bool]: - """ - Skip all tests in this directory. - - :param collection_path: path to analyze - :param path: path to analyze (deprecated) - :param config: pytest config object - :return: True if the path should be ignored - """ - # Ignore this directory and all its subdirectories. - return True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb deleted file mode 100644 index 7df18640d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb +++ /dev/null @@ -1,638 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# Test Cache in Jupyter Notebook" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T16:23:59.696680Z", - "start_time": "2021-08-16T16:23:58.792511Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mWARNING\u001b[0m: Disabling annoying warnings\n", - "\u001b[0m\u001b[36mINFO\u001b[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-89e1d81f-7cff-47ee-9790-af936835f517.json'\n", - "\u001b[33mWARNING\u001b[0m: Running in Jupyter\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import logging\n", - "\n", - "import joblib\n", - "\n", - "import helpers.hcache as hcache\n", - "import helpers.hdbg as hdbg\n", - "import helpers.hs3 as hs3\n", - "\n", - "hnotebook.config_notebook()\n", - "\n", - "# hdbg.init_logger(verbosity=logging.DEBUG)\n", - "hdbg.init_logger(verbosity=logging.INFO)\n", - "# hdbg.test_logger()\n", - "_LOG = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2, - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# Define computation function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-15T12:07:59.739169Z", - "start_time": "2021-08-15T12:07:59.714831Z" - } - }, - "outputs": [], - "source": [ - "def func(a, b):\n", - " # hello\n", - " # assert 0\n", - " out = a * b\n", - " print(f\"Multiplication: {a} * {b} = {out}\")\n", - " return out\n", - "\n", - "\n", - "inputs = (1, 2)\n", - "exp_output = 2\n", - "\n", - "func(*inputs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:32:30.476809Z", - "start_time": "2021-08-14T23:32:30.202040Z" - } - }, - "outputs": [], - "source": [ - "!ls hello/joblib/__main__*/f/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:42:14.671491Z", - "start_time": "2021-08-14T23:42:13.356163Z" - } - }, - "outputs": [], - "source": [ - "!pip install https://github.com/aabadie/joblib-s3.git" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:46:59.749548Z", - "start_time": "2021-08-14T23:46:54.455947Z" - } - }, - "outputs": [], - "source": [ - "#!git clone git://github.com/aabadie/joblib-s3.git\n", - "# !(cd joblib-s3 && pip install -r requirements.txt .)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:56:02.954013Z", - "start_time": "2021-08-14T23:56:02.793451Z" - }, - "scrolled": false - }, - "outputs": [], - "source": [ - "# import joblibs3\n", - "\n", - "# joblibs3.register_s3fs_store_backend()\n", - "\n", - "# # dict(compress=False, bucket=None, anon=False,\n", - "# #key=None, secret=None, token=None, use_ssl=True)\n", - "# dict2 = {\n", - "# \"bucket\": \"alphamatic-data\",\n", - "# \"key\": dict_[\"aws_access_key_id\"],\n", - "# \"secret\": dict_[\"aws_secret_access_key\"],\n", - "# }\n", - "# mem = joblib.Memory('joblib_cache', backend='s3', verbose=100, compress=True,\n", - "# backend_options=dict2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T13:24:11.630748Z", - "start_time": "2021-08-16T13:24:10.983061Z" - } - }, - "outputs": [], - "source": [ - "# hjoblib.register_s3fs_store_backend()\n", - "\n", - "s3fs = hs3.get_s3fs(\"am\")\n", - "\n", - "dict2 = {\n", - " \"bucket\": \"alphamatic-data\",\n", - " # \"key\": dict_[\"aws_access_key_id\"],\n", - " # \"secret\": dict_[\"aws_secret_access_key\"],\n", - " \"s3fs\": s3fs,\n", - "}\n", - "\n", - "mem = joblib.Memory(\n", - " \"joblib_cache\",\n", - " backend=\"s3\",\n", - " verbose=100,\n", - " compress=True,\n", - " backend_options=dict2,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-17T15:51:32.654896Z", - "start_time": "2021-08-17T15:51:32.258447Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "ename": "PermissionError", - "evalue": "Access Denied", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter)\u001b[0m\n\u001b[1;32m 531\u001b[0m \u001b[0mdircache\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m \u001b[0;32masync\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mit\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 533\u001b[0m \u001b[0mdircache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CommonPrefixes\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/aiobotocore/paginate.py\u001b[0m in \u001b[0;36m__anext__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcurrent_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0mparsed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_extract_parsed_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/aiobotocore/client.py\u001b[0m in \u001b[0;36m_make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0merror_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 154\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merror_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_response\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moperation_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 155\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mClientError\u001b[0m: An error occurred (AccessDenied) when calling the ListObjectsV2 operation: Access Denied", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0ms3fs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m#mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0mself\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msync\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36msync\u001b[0;34m(loop, func, timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBaseException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 54\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36m_runner\u001b[0;34m(event, coro, result, timeout)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mcoro\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoro\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mcoro\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_ls\u001b[0;34m(self, path, detail, refresh)\u001b[0m\n\u001b[1;32m 719\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsbuckets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 721\u001b[0;31m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 722\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrefresh\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter)\u001b[0m\n\u001b[1;32m 553\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"name\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Key\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 554\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mClientError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 555\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mtranslate_boto_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 556\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdelimiter\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mPermissionError\u001b[0m: Access Denied" - ] - } - ], - "source": [ - "# hjoblib.register_s3fs_store_backend()\n", - "\n", - "s3fs = hs3.get_s3fs(\"am\")\n", - "dict_ = {}\n", - "\n", - "dict2 = {\n", - " \"bucket\": \"alphamatic-data\",\n", - " # \"key\": dict_[\"aws_access_key_id\"],\n", - " # \"secret\": dict_[\"aws_secret_access_key\"],\n", - " \"s3fs\": s3fs,\n", - "}\n", - "path = \"/tmp/cache.unit_test/root.98e1cf5b88c3.app.TestCachingOnS3.test_with_caching1\"\n", - "\n", - "\n", - "s3fs.ls(path)\n", - "\n", - "# mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:54:50.021139Z", - "start_time": "2021-08-14T23:54:50.017180Z" - } - }, - "outputs": [], - "source": [ - "print(dict_)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:51:25.440705Z", - "start_time": "2021-08-14T23:51:25.419214Z" - } - }, - "outputs": [], - "source": [ - "# dict_[\"bucket\"] = \"alphamatic-data/tmp\"\n", - "\n", - "print(dict_)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def dec(func=None, val=5):\n", - " if func is not None:\n", - " return" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "68549a47", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:53:08.985727Z", - "start_time": "2021-08-14T23:53:08.795065Z" - } - }, - "outputs": [], - "source": [ - "dict_ = hs3.get_aws_credentials(\"am\")\n", - "print(dict_)\n", - "# s3fs = hs3.get_s3fs(\"am\")\n", - "# s3fs.ls(\"s3://alphamatic-data/tmp\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T13:25:34.841885Z", - "start_time": "2021-08-16T13:25:34.820510Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "s3fs.clear_instance_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T16:23:17.621301Z", - "start_time": "2021-08-16T16:23:16.722753Z" - } - }, - "outputs": [], - "source": [ - "# import joblib\n", - "\n", - "# cachedir = \"./hello\"\n", - "# memory = joblib.Memory(cachedir, verbose=0)\n", - "\n", - "\n", - "@mem.cache()\n", - "def f(x):\n", - " # hello\n", - " print(f\"Running f({x})\")\n", - " return x\n", - "\n", - "\n", - "f(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T16:26:31.661915Z", - "start_time": "2021-08-16T16:26:31.640938Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'hello'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hcache.cache(set_verbose_mode=True)\n", - "\n", - "\n", - "def hello():\n", - " return \"hello\"\n", - "\n", - "\n", - "hello()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Memory cache" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T22:19:51.526004Z", - "start_time": "2021-08-14T22:19:51.259763Z" - } - }, - "outputs": [], - "source": [ - "!ls /app/tmp.cache.disk/joblib/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T22:19:25.253342Z", - "start_time": "2021-08-14T22:19:24.986513Z" - } - }, - "outputs": [], - "source": [ - "!ls /mnt/tmpfs/tmp.cache.mem/joblib/lib" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:21:32.636049Z", - "start_time": "2021-08-14T23:21:32.479710Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "memory_cached_func = hcache._Cached(\n", - " func, use_mem_cache=True, use_disk_cache=False\n", - ")\n", - "\n", - "print(memory_cached_func.get_function_cache_info())\n", - "\n", - "# cache_type = None\n", - "# memory_cached_func.clear_function_cache(cache_type)\n", - "\n", - "hdbg.dassert_eq(memory_cached_func(*inputs), exp_output)\n", - "hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), \"no_cache\")\n", - "\n", - "hdbg.dassert_eq(memory_cached_func(*inputs), exp_output)\n", - "hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), \"mem\")\n", - "\n", - "print(\"memory caching checks passed\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:56:58.620662Z", - "start_time": "2020-09-03T19:56:58.610337Z" - } - }, - "outputs": [], - "source": [ - "def computation_function(a, b):\n", - " # hello\n", - " # assert 0\n", - " out = a * b\n", - " print(f\"Multiplication: {a} * {b} = {out}\")\n", - " return out\n", - "\n", - "\n", - "inputs = (1, 2)\n", - "exp_output = 2\n", - "\n", - "# hdbg.dassert_eq(memory_cached_computation(*inputs), exp_output)\n", - "# hdbg.dassert_eq(memory_cached_computation.get_last_cache_accessed(), \"mem\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Disk cache" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:45:20.999548Z", - "start_time": "2020-09-03T19:45:20.987298Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "disk_cached_computation = hcache._Cached(\n", - " computation_function, use_mem_cache=False, use_disk_cache=True\n", - ")\n", - "\n", - "disk_cached_computation.clear_function_cache()\n", - "\n", - "hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), \"no_cache\")\n", - "\n", - "hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), \"disk\")\n", - "\n", - "print(\"disk caching checks passed\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Full cache" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:34:54.851944Z", - "start_time": "2020-09-03T19:34:54.839379Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "fully_cached_computation = hcache._Cached(\n", - " computation_function, use_mem_cache=True, use_disk_cache=True\n", - ")\n", - "\n", - "fully_cached_computation.clear_function_cache()\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"no_cache\")\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", - "\n", - "print(\"Clear mem cache\")\n", - "fully_cached_computation.clear_function_cache()\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"disk\")\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", - "\n", - "print(\"full caching checks passed\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:34:54.856369Z", - "start_time": "2020-09-03T19:34:54.853563Z" - } - }, - "outputs": [], - "source": [ - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:34:54.995926Z", - "start_time": "2020-09-03T19:34:54.859279Z" - } - }, - "outputs": [], - "source": [ - "# This should fail all the times, because we clear the memory cache.\n", - "fully_cached_computation.clear_function_cache()\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py deleted file mode 100644 index 3469f42b7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py +++ /dev/null @@ -1,274 +0,0 @@ -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] pycharm={"name": "#%% md\n"} -# # Test Cache in Jupyter Notebook - -# %% -# %load_ext autoreload -# %autoreload 2 - -import logging - -import joblib - -import helpers.hcache as hcache -import helpers.hdbg as hdbg -import helpers.hs3 as hs3 - -hnotebook.config_notebook() - -# hdbg.init_logger(verbosity=logging.DEBUG) -hdbg.init_logger(verbosity=logging.INFO) -# hdbg.test_logger() -_LOG = logging.getLogger(__name__) - - -# %% [markdown] pycharm={"name": "#%% md\n"} -# # Define computation function - - -# %% -def func(a, b): - # hello - # assert 0 - out = a * b - print(f"Multiplication: {a} * {b} = {out}") - return out - - -inputs = (1, 2) -exp_output = 2 - -func(*inputs) - -# %% -# !ls hello/joblib/__main__*/f/ - -# %% -# !pip install https://github.com/aabadie/joblib-s3.git - -# %% -# #!git clone git://github.com/aabadie/joblib-s3.git -# !(cd joblib-s3 && pip install -r requirements.txt .) - -# %% -# import joblibs3 - -# joblibs3.register_s3fs_store_backend() - -# # dict(compress=False, bucket=None, anon=False, -# #key=None, secret=None, token=None, use_ssl=True) -# dict2 = { -# "bucket": "alphamatic-data", -# "key": dict_["aws_access_key_id"], -# "secret": dict_["aws_secret_access_key"], -# } -# mem = joblib.Memory('joblib_cache', backend='s3', verbose=100, compress=True, -# backend_options=dict2) - -# %% -# hjoblib.register_s3fs_store_backend() - -s3fs = hs3.get_s3fs("am") - -dict2 = { - "bucket": "alphamatic-data", - # "key": dict_["aws_access_key_id"], - # "secret": dict_["aws_secret_access_key"], - "s3fs": s3fs, -} - -mem = joblib.Memory( - "joblib_cache", - backend="s3", - verbose=100, - compress=True, - backend_options=dict2, -) - -# %% -# hjoblib.register_s3fs_store_backend() - -s3fs = hs3.get_s3fs("am") -dict_ = {} - -dict2 = { - "bucket": "alphamatic-data", - # "key": dict_["aws_access_key_id"], - # "secret": dict_["aws_secret_access_key"], - "s3fs": s3fs, -} -path = "/tmp/cache.unit_test/root.98e1cf5b88c3.app.TestCachingOnS3.test_with_caching1" - - -s3fs.ls(path) - -# mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2) - - -# %% -print(dict_) - -# %% -# dict_["bucket"] = "alphamatic-data/tmp" - -print(dict_) - - -# %% -def dec(func=None, val=5): - if func is not None: - return - - -# %% - -# %% -dict_ = hs3.get_aws_credentials("am") -print(dict_) -# s3fs = hs3.get_s3fs("am") -# s3fs.ls("s3://alphamatic-data/tmp") - -# %% -s3fs.clear_instance_cache() - - -# %% -# import joblib - -# cachedir = "./hello" -# memory = joblib.Memory(cachedir, verbose=0) - - -@mem.cache() -def f(x): - # hello - print(f"Running f({x})") - return x - - -f(1) - -# %% -hcache.cache(set_verbose_mode=True) - - -def hello(): - return "hello" - - -hello() - -# %% [markdown] pycharm={"name": "#%% md\n"} -# ## Memory cache - -# %% -# !ls /app/tmp.cache.disk/joblib/ - -# %% -# !ls /mnt/tmpfs/tmp.cache.mem/joblib/lib - -# %% pycharm={"name": "#%%\n"} -memory_cached_func = hcache._Cached( - func, use_mem_cache=True, use_disk_cache=False -) - -print(memory_cached_func.get_function_cache_info()) - -# cache_type = None -# memory_cached_func.clear_function_cache(cache_type) - -hdbg.dassert_eq(memory_cached_func(*inputs), exp_output) -hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), "no_cache") - -hdbg.dassert_eq(memory_cached_func(*inputs), exp_output) -hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), "mem") - -print("memory caching checks passed") - - -# %% -def computation_function(a, b): - # hello - # assert 0 - out = a * b - print(f"Multiplication: {a} * {b} = {out}") - return out - - -inputs = (1, 2) -exp_output = 2 - -# hdbg.dassert_eq(memory_cached_computation(*inputs), exp_output) -# hdbg.dassert_eq(memory_cached_computation.get_last_cache_accessed(), "mem") - -# %% [markdown] -# ## Disk cache - -# %% pycharm={"name": "#%%\n"} -disk_cached_computation = hcache._Cached( - computation_function, use_mem_cache=False, use_disk_cache=True -) - -disk_cached_computation.clear_function_cache() - -hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), "no_cache") - -hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), "disk") - -print("disk caching checks passed") - -# %% [markdown] -# ## Full cache - -# %% pycharm={"name": "#%%\n"} -fully_cached_computation = hcache._Cached( - computation_function, use_mem_cache=True, use_disk_cache=True -) - -fully_cached_computation.clear_function_cache() - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "no_cache") - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") - -print("Clear mem cache") -fully_cached_computation.clear_function_cache() - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "disk") - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") - -print("full caching checks passed") - -# %% -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") - -# %% -# This should fail all the times, because we clear the memory cache. -fully_cached_computation.clear_function_cache() -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb deleted file mode 100644 index 3b3c5ae1e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb +++ /dev/null @@ -1,653 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "9ad5fd70-a4de-4671-86c4-9f3e87c32df1", - "metadata": {}, - "source": [ - "# Using hcache_simple for Caching in Python\n", - "\n", - "This tutorial provides a detailed walkthrough of the `hcache_simple` module,\n", - "which implements a lightweight caching mechanism." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "aa084398-eba9-4e8f-aad9-6348d62f8fc1", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d20a80f4-b837-487f-96df-ebb9e8202cfc", - "metadata": {}, - "outputs": [], - "source": [ - "# Import necessary modules.\n", - "import logging\n", - "import time\n", - "\n", - "import helpers.hcache_simple as hcacsimp\n", - "import helpers.hdbg as hdbg" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5d76eda3-044b-47c9-bf5f-eb09aad51ad1", - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0mWARNING: Running in Jupyter\n", - "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-4f3ae573-f3ef-4865-b9b0-386ca4221989.json'\n" - ] - } - ], - "source": [ - "hdbg.init_logger(verbosity=logging.INFO)\n", - "\n", - "_LOG = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3d440b77-178a-4e3e-9bb9-0d508f1948a1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Force a reload.\n", - "import importlib\n", - "\n", - "importlib.reload(hcacsimp)" - ] - }, - { - "cell_type": "markdown", - "id": "90aa14ab-f441-468c-a114-77cf9c6baff1", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "## Setting up caching\n", - "\n", - "The `@hcsi.simple_cache` decorator enables caching for a function and supports both memory- and disk-based storage (json or pickle format).\n", - "\n", - "We'll demonstrate this with a function that simulates a slow computation." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "dbc25952-6587-4bb9-a5e9-064ed0317550", - "metadata": {}, - "outputs": [], - "source": [ - "# cache_type=\"json\": The cache will be stored in JSON format on disk.\n", - "# write_through=True: Any changes to the cache will be written to disk immediately.\n", - "@hcacsimp.simple_cache(cache_type=\"json\", write_through=True)\n", - "def slow_square(x):\n", - " \"\"\"\n", - " Simulate a slow function that computes the square of a number.\n", - "\n", - " The `@hcsi.simple_cache` decorator caches the results of this\n", - " function to avoid recomputation for the same input.\n", - " \"\"\"\n", - " # Simulate a time-consuming computation.\n", - " print(\"Computing ...\")\n", - " time.sleep(2)\n", - " return x**2" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "bce58692-fd3f-49fe-ab7c-fb07357697e6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# func_name=slow_square\n", - "type: json\n" - ] - } - ], - "source": [ - "print(hcacsimp.cache_property_to_str(\"slow_square\"))" - ] - }, - { - "cell_type": "markdown", - "id": "738a112b-3eac-4488-bd6b-8cba124d3f2d", - "metadata": {}, - "source": [ - "## Demonstration: First and Subsequent Calls\n", - "\n", - "Let's see how caching works:\n", - "\n", - "- On the first call with a specific input, the function takes time to compute.\n", - "- On subsequent calls with the same input, the result is retrieved instantly from the cache." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "aa1f2b9d-bdd4-4714-a5e1-ebafe05632f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING Resetting disk cache\n", - "WARNING Resetting /app/tmp.cache_simple_property.pkl\n" - ] - } - ], - "source": [ - "cache_file = hcacsimp._get_cache_file_name(\"slow_square\")\n", - "hdbg.dassert_eq(cache_file, \"/app/tmp.cache_simple.slow_square.json\")\n", - "\n", - "hcacsimp.reset_cache(interactive=False)\n", - "hcacsimp.reset_cache_property()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "66ff027f-a6d2-438e-bded-7d631b2faace", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '/app/tmp.cache_simple.*': No such file or directory\n" - ] - } - ], - "source": [ - "!ls /app/tmp.cache_simple.*" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "7b081f89-b5a2-4757-8936-ed567eaa049c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '/app/tmp.cache_simple.slow_square.json': No such file or directory\n" - ] - } - ], - "source": [ - "# There should be no cache file yet.\n", - "!ls -l $cache_file" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "789c978c-c25a-48ba-a8fc-34c9fc0b6243", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# First call (expected delay):\n", - "Computing ...\n", - "Result: 16\n" - ] - } - ], - "source": [ - "# First call is slow: the result is computed and cached.\n", - "print(\"# First call (expected delay):\")\n", - "result = slow_square(4)\n", - "print(f\"Result: {result}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "0836b391-30ca-443e-bfe8-7794fb91151c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"{\\\"args\\\": [4], \\\"kwargs\\\": {}}\": 16\n", - "}" - ] - } - ], - "source": [ - "# The cache file is created and stores the content.\n", - "!cat $cache_file" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "af3af183-d293-45b4-9d60-c826e382a786", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# Second call (retrieved from cache):\n", - "Result: 16\n" - ] - } - ], - "source": [ - "# Second call is fast: the result is retrieved from the cache.\n", - "print(\"# Second call (retrieved from cache):\")\n", - "result = slow_square(4)\n", - "print(f\"Result: {result}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "f0cbe4e0-b87e-4ed8-b373-c2114647076d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Computing ...\n" - ] - }, - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: '/app/tmp.cache_simple.slow_square.json'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[13], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Call another value -> cache miss.\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mslow_square\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResult: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m/app/helpers_root/helpers/hcache_simple.py:888\u001b[0m, in \u001b[0;36msimple_cache..decorator..wrapper\u001b[0;34m(force_refresh, abort_on_cache_miss, report_on_cache_miss, *args, **kwargs)\u001b[0m\n\u001b[1;32m 886\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m write_through:\n\u001b[1;32m 887\u001b[0m _LOG\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWriting through to disk\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 888\u001b[0m \u001b[43mflush_cache_to_disk\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 889\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m value\n", - "File \u001b[0;32m/app/helpers_root/helpers/hcache_simple.py:593\u001b[0m, in \u001b[0;36mflush_cache_to_disk\u001b[0;34m(func_name)\u001b[0m\n\u001b[1;32m 591\u001b[0m disk_cache\u001b[38;5;241m.\u001b[39mupdate(mem_cache)\n\u001b[1;32m 592\u001b[0m \u001b[38;5;66;03m# Save merged cache to disk.\u001b[39;00m\n\u001b[0;32m--> 593\u001b[0m \u001b[43m_save_cache_dict_to_disk\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdisk_cache\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[38;5;66;03m# Update the memory cache.\u001b[39;00m\n\u001b[1;32m 595\u001b[0m \u001b[38;5;28;01mglobal\u001b[39;00m _CACHE\n", - "File \u001b[0;32m/app/helpers_root/helpers/hcache_simple.py:447\u001b[0m, in \u001b[0;36m_save_cache_dict_to_disk\u001b[0;34m(func_name, data)\u001b[0m\n\u001b[1;32m 445\u001b[0m pickle\u001b[38;5;241m.\u001b[39mdump(data, file)\n\u001b[1;32m 446\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m cache_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mjson\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 447\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mw\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mutf-8\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[1;32m 448\u001b[0m json\u001b[38;5;241m.\u001b[39mdump(data, file, indent\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m, sort_keys\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, ensure_ascii\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 449\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/app/tmp.cache_simple.slow_square.json'" - ] - } - ], - "source": [ - "# Call another value -> cache miss.\n", - "result = slow_square(3)\n", - "print(f\"Result: {result}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b7c6e693-05a3-4e2c-9159-892887eb91de", - "metadata": {}, - "outputs": [], - "source": [ - "!cat $cache_file" - ] - }, - { - "cell_type": "markdown", - "id": "124703fe-bf5d-4756-aa63-12dbed57db12", - "metadata": {}, - "source": [ - "## Monitoring Cache Performance\n", - "\n", - "The `hcache_simple` module provides utilities to track cache performance metrics,\n", - "such as the total number of calls, cache hits, and cache misses." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "879c8230-656d-455c-9370-1cd5afb8f59a", - "metadata": {}, - "outputs": [], - "source": [ - "# Enable cache performance monitoring for the function `slow_square`.\n", - "hcacsimp.enable_cache_perf(\"slow_square\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4dcf4f5d-e4ee-490e-a62e-a152af9c0e62", - "metadata": {}, - "outputs": [], - "source": [ - "# Retrieve and display cache performance statistics.\n", - "print(\"# Cache Performance Stats:\")\n", - "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" - ] - }, - { - "cell_type": "markdown", - "id": "8a0e90b3-4fa7-43c7-8251-c7b713e33073", - "metadata": {}, - "source": [ - "Explanation of Performance Metrics\n", - "\n", - "- Total Calls (tot): The total number of times the function was invoked.\n", - "- Cache Hits (hits): The number of times the result was retrieved from the cache.\n", - "- Cache Misses (misses): The number of times the function had to compute the result due to a cache miss.\n", - "- Hit Rate: The percentage of calls where the result was retrieved from the cache." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e4b46455-314f-4ff3-9f8a-93c91dcab334", - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "hcacsimp.reset_cache(interactive=False)\n", - "hcacsimp.reset_cache_perf(\"slow_square\")\n", - "\n", - "print(\"# First call (expected delay):\")\n", - "result = slow_square(4) # This call will be recorded as a cache miss.\n", - "print(f\"Result: {result}\")\n", - "\n", - "print(\"\\n# Second call (retrieved from cache):\")\n", - "result = slow_square(4) # This call will be recorded as a cache hit.\n", - "print(f\"Result: {result}\")\n", - "\n", - "print(\"\\n# Cache performance stats:\")\n", - "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" - ] - }, - { - "cell_type": "markdown", - "id": "3d614729-924d-4285-b5bc-4ed16006ba12", - "metadata": {}, - "source": [ - "## Flush Cache to Disk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "21ecd2ea-812b-4d67-8fb2-0d2aba944175", - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "# The following cell writes the current in‑memory cache to disk. This is useful\n", - "# if you want persistence across sessions.\n", - "print(\"# Flushing cache to disk for 'slow_square'...\")\n", - "hcacsimp.flush_cache_to_disk(\"slow_square\")\n", - "\n", - "# The `hcsi.cache_stats_to_str` function provides a summary of the current cache\n", - "# state, including the number of items stored in memory and on disk.\n", - "print(\"\\n# Cache stats:\")\n", - "print(hcacsimp.cache_stats_to_str(\"slow_square\"))" - ] - }, - { - "cell_type": "markdown", - "id": "39b16e4a-c007-4cee-8566-0ad9057c54ea", - "metadata": {}, - "source": [ - "## Reset In‑Memory Cache\n", - "\n", - "Now reset the in‑memory cache. After this, the in‑memory cache will be empty until reloaded from disk." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd4374f3-8f89-422a-923a-3bf4bd01f8a1", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"# Resetting in-memory cache for 'slow_square'...\")\n", - "hcacsimp.reset_mem_cache(\"slow_square\")\n", - "\n", - "print(\"\\n# Cache stats:\")\n", - "print(hcacsimp.cache_stats_to_str(\"slow_square\"))" - ] - }, - { - "cell_type": "markdown", - "id": "04677407-f3d4-46de-b818-100eafb2bf1d", - "metadata": {}, - "source": [ - "## Force Cache from Disk\n", - "\n", - "Now we force the in‑memory cache to update from disk. This should repopulate our\n", - "cache based on the disk copy." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a42de2d9-05d8-4dd8-947b-71d6751108fb", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"# Forcing cache from disk for 'slow_square'...\")\n", - "hcacsimp.force_cache_from_disk(\"slow_square\")\n", - "\n", - "print(\"\\n# Cache stats:\")\n", - "print(hcacsimp.cache_stats_to_str(\"slow_square\"))" - ] - }, - { - "cell_type": "markdown", - "id": "0516463d-d450-4071-9284-c1e839cec62a", - "metadata": {}, - "source": [ - "## Attempt to Reset Disk Cache\n", - "\n", - "The `reset_disk_cache` function is currently not implemented (it contains an assertion).\n", - "We'll catch the expected error to confirm its behavior." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "59bbf120-4d21-430a-9fad-d4c68a1e4af5", - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " print(\n", - " \"\\nAttempting to reset disk cache for 'slow_square' (expected to fail)...\"\n", - " )\n", - " hcacsimp.reset_disk_cache(\"slow_square\")\n", - "except AssertionError:\n", - " print(\"reset_disk_cache raised an AssertionError as expected.\")" - ] - }, - { - "cell_type": "markdown", - "id": "74c97383-d444-4760-a2cd-25d7c2bbbf27", - "metadata": {}, - "source": [ - "# Dynamic parameters" - ] - }, - { - "cell_type": "markdown", - "id": "865b8e7d-ebb3-4bca-8edd-784c71f142b5", - "metadata": {}, - "source": [ - "## force_refresh" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "103b9d4d-0a6b-439e-b722-5c3125baef3e", - "metadata": {}, - "outputs": [], - "source": [ - "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))\n", - "hcacsimp.reset_cache_perf(\"slow_square\")\n", - "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5919331a-f6b9-4b7b-ac8d-354ea2bd5226", - "metadata": {}, - "outputs": [], - "source": [ - "slow_square(4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55e1679d-b529-4f3f-a27a-5a7173180ab8", - "metadata": {}, - "outputs": [], - "source": [ - "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f8e17839-fdeb-4950-b662-e04cdcd73406", - "metadata": {}, - "outputs": [], - "source": [ - "# Force a recompute.\n", - "slow_square(4, force_refresh=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11c745bf-28e4-4ec4-bdb7-c6e73956fbf4", - "metadata": {}, - "outputs": [], - "source": [ - "print(hcacsimp.get_cache_perf_stats(\"slow_square\"))" - ] - }, - { - "cell_type": "markdown", - "id": "f0119e5a-74c2-47d6-a80c-7b2e6d70f8be", - "metadata": {}, - "source": [ - "## abort_on_cache_miss" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5b8b63dd-c80e-4a26-a242-5a09ddc3d76c", - "metadata": {}, - "outputs": [], - "source": [ - "hcacsimp.reset_cache(interactive=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4e643920-6d2b-4055-9dd3-50745feb2373", - "metadata": {}, - "outputs": [], - "source": [ - "# This call doesn't abort since it's not a cache miss.\n", - "slow_square(4, abort_on_cache_miss=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5de5d0fe-b331-427e-bfa1-95bcf8efd40a", - "metadata": {}, - "outputs": [], - "source": [ - "# This call aborts since it's a cache miss.\n", - "try:\n", - " slow_square(16, abort_on_cache_miss=True)\n", - "except ValueError as e:\n", - " print(e)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff84291a-1827-4c6b-b2d7-5386b7e127a6", - "metadata": {}, - "outputs": [], - "source": [ - "slow_square(16, report_on_cache_miss=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "281adfc6-38cf-4e66-935e-95760e5fa5cf", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py deleted file mode 100644 index 5acf042c0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py +++ /dev/null @@ -1,257 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# # Using hcache_simple for Caching in Python -# -# This tutorial provides a detailed walkthrough of the `hcache_simple` module, -# which implements a lightweight caching mechanism. - -# %% -# %load_ext autoreload -# %autoreload 2 - -# %% -# Import necessary modules. -import logging -import time - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg - -# %% -hdbg.init_logger(verbosity=logging.INFO) - -_LOG = logging.getLogger(__name__) - - -# %% -# Force a reload. -import importlib - -importlib.reload(hcacsimp) - -# %% [markdown] -# ## Setting up caching -# -# The `@hcsi.simple_cache` decorator enables caching for a function and supports both memory- and disk-based storage (json or pickle format). -# -# We'll demonstrate this with a function that simulates a slow computation. - - -# %% -# cache_type="json": The cache will be stored in JSON format on disk. -# write_through=True: Any changes to the cache will be written to disk immediately. -@hcacsimp.simple_cache(cache_type="json", write_through=True) -def slow_square(x): - """ - Simulate a slow function that computes the square of a number. - - The `@hcsi.simple_cache` decorator caches the results of this - function to avoid recomputation for the same input. - """ - # Simulate a time-consuming computation. - print("Computing ...") - time.sleep(2) - return x**2 - - -# %% -print(hcacsimp.cache_property_to_str("slow_square")) - -# %% [markdown] -# ## Demonstration: First and Subsequent Calls -# -# Let's see how caching works: -# -# - On the first call with a specific input, the function takes time to compute. -# - On subsequent calls with the same input, the result is retrieved instantly from the cache. - -# %% -cache_file = hcacsimp._get_cache_file_name("slow_square") -hdbg.dassert_eq(cache_file, "/app/tmp.cache_simple.slow_square.json") - -hcacsimp.reset_cache(interactive=False) -hcacsimp.reset_cache_property() - -# %% -# !ls /app/tmp.cache_simple.* - -# %% -# There should be no cache file yet. -# !ls -l $cache_file - -# %% -# First call is slow: the result is computed and cached. -print("# First call (expected delay):") -result = slow_square(4) -print(f"Result: {result}") - -# %% -# The cache file is created and stores the content. -# !cat $cache_file - -# %% -# Second call is fast: the result is retrieved from the cache. -print("# Second call (retrieved from cache):") -result = slow_square(4) -print(f"Result: {result}") - -# %% -# Call another value -> cache miss. -result = slow_square(3) -print(f"Result: {result}") - -# %% -# !cat $cache_file - -# %% [markdown] -# ## Monitoring Cache Performance -# -# The `hcache_simple` module provides utilities to track cache performance metrics, -# such as the total number of calls, cache hits, and cache misses. - -# %% -# Enable cache performance monitoring for the function `slow_square`. -hcacsimp.enable_cache_perf("slow_square") - -# %% -# Retrieve and display cache performance statistics. -print("# Cache Performance Stats:") -print(hcacsimp.get_cache_perf_stats("slow_square")) - -# %% [markdown] -# Explanation of Performance Metrics -# -# - Total Calls (tot): The total number of times the function was invoked. -# - Cache Hits (hits): The number of times the result was retrieved from the cache. -# - Cache Misses (misses): The number of times the function had to compute the result due to a cache miss. -# - Hit Rate: The percentage of calls where the result was retrieved from the cache. - -# %% -hcacsimp.reset_cache(interactive=False) -hcacsimp.reset_cache_perf("slow_square") - -print("# First call (expected delay):") -result = slow_square(4) # This call will be recorded as a cache miss. -print(f"Result: {result}") - -print("\n# Second call (retrieved from cache):") -result = slow_square(4) # This call will be recorded as a cache hit. -print(f"Result: {result}") - -print("\n# Cache performance stats:") -print(hcacsimp.get_cache_perf_stats("slow_square")) - - -# %% [markdown] -# ## Flush Cache to Disk - -# %% -# The following cell writes the current in‑memory cache to disk. This is useful -# if you want persistence across sessions. -print("# Flushing cache to disk for 'slow_square'...") -hcacsimp.flush_cache_to_disk("slow_square") - -# The `hcsi.cache_stats_to_str` function provides a summary of the current cache -# state, including the number of items stored in memory and on disk. -print("\n# Cache stats:") -print(hcacsimp.cache_stats_to_str("slow_square")) - - -# %% [markdown] -# ## Reset In‑Memory Cache -# -# Now reset the in‑memory cache. After this, the in‑memory cache will be empty until reloaded from disk. - -# %% -print("# Resetting in-memory cache for 'slow_square'...") -hcacsimp.reset_mem_cache("slow_square") - -print("\n# Cache stats:") -print(hcacsimp.cache_stats_to_str("slow_square")) - -# %% [markdown] -# ## Force Cache from Disk -# -# Now we force the in‑memory cache to update from disk. This should repopulate our -# cache based on the disk copy. - -# %% -print("# Forcing cache from disk for 'slow_square'...") -hcacsimp.force_cache_from_disk("slow_square") - -print("\n# Cache stats:") -print(hcacsimp.cache_stats_to_str("slow_square")) - -# %% [markdown] -# ## Attempt to Reset Disk Cache -# -# The `reset_disk_cache` function is currently not implemented (it contains an assertion). -# We'll catch the expected error to confirm its behavior. - -# %% -try: - print( - "\nAttempting to reset disk cache for 'slow_square' (expected to fail)..." - ) - hcacsimp.reset_disk_cache("slow_square") -except AssertionError: - print("reset_disk_cache raised an AssertionError as expected.") - -# %% [markdown] -# # Dynamic parameters - -# %% [markdown] -# ## force_refresh - -# %% -print(hcacsimp.get_cache_perf_stats("slow_square")) -hcacsimp.reset_cache_perf("slow_square") -print(hcacsimp.get_cache_perf_stats("slow_square")) - -# %% -slow_square(4) - -# %% -print(hcacsimp.get_cache_perf_stats("slow_square")) - -# %% -# Force a recompute. -slow_square(4, force_refresh=True) - -# %% -print(hcacsimp.get_cache_perf_stats("slow_square")) - -# %% [markdown] -# ## abort_on_cache_miss - -# %% -hcacsimp.reset_cache(interactive=False) - -# %% -# This call doesn't abort since it's not a cache miss. -slow_square(4, abort_on_cache_miss=True) - -# %% -# This call aborts since it's a cache miss. -try: - slow_square(16, abort_on_cache_miss=True) -except ValueError as e: - print(e) - -# %% -slow_square(16, report_on_cache_miss=True) - -# %% diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb deleted file mode 100644 index 7b505f87e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb +++ /dev/null @@ -1,424 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "7fb27b941602401d91542211134fc71a", - "metadata": {}, - "source": [ - "CONTENTS:\n", - "- [hgoogle_file_api.py](#hgoogle_file_api.py)\n", - " - [Get Credentials for your drive](#get-credentials-for-your-drive)\n", - " - [Get Tab/Sheet id of a particular google sheet](#get-tab/sheet-id-of-a-particular-google-sheet)\n", - " - [Freeze Rows](#freeze-rows)\n", - " - [Change the height of certin rows](#change-the-height-of-certin-rows)\n", - " - [Read some nice data](#read-some-nice-data)\n", - " - [Write this nice data](#write-this-nice-data)" - ] - }, - { - "cell_type": "markdown", - "id": "982ab891-de0a-47d5-946a-0f4fd3f16307", - "metadata": {}, - "source": [ - "\n", - "# hgoogle_file_api.py" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "6b997caf-4bfc-47bc-b7e1-584f02da328f", - "metadata": {}, - "outputs": [], - "source": [ - "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade google-api-python-client)\"\n", - "# !sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade pip install oauth2client)\"\n", - "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade gspread)\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "0fdf8a01-00ed-4e40-8b8b-3e4ecfe37d45", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import importlib\n", - "import helpers.hgoogle_drive_api as hgodrapi\n", - "\n", - "importlib.reload(hgodrapi)" - ] - }, - { - "cell_type": "markdown", - "id": "f9733115-f65b-43fb-8b56-32be7588c617", - "metadata": {}, - "source": [ - "\n", - "## Get Credentials for your drive" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "0f3eb12a-bd7e-4846-a8f0-331ece997137", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "google_creds = hgodrapi.get_credentials()\n", - "print(google_creds)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "feb74dae-ff52-44ce-b698-4c04cc2bc8f3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "service = hgodrapi.get_sheets_service(google_creds)\n", - "print(service)" - ] - }, - { - "cell_type": "markdown", - "id": "9e1c8840-c759-4bd6-a2c5-f30d94daf72b", - "metadata": {}, - "source": [ - "\n", - "## Get Tab/Sheet id of a particular google sheet" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "67fe7cc1-0f90-4b45-b93d-c6eaecd25028", - "metadata": {}, - "outputs": [], - "source": [ - "tab_name = \"cleaned_profiles_1\"\n", - "url = \"https://docs.google.com/spreadsheets/d/1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA/edit?gid=1687996260#gid=1687996260\"\n", - "sheet_id = \"1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA\"\n", - "credentials = google_creds" - ] - }, - { - "cell_type": "markdown", - "id": "f18db947-8170-4cba-8799-dfe792e1c732", - "metadata": {}, - "source": [ - "\n", - "## Freeze Rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "232a1ee0-83d2-4449-a8c0-a8e8eca02fc5", - "metadata": {}, - "outputs": [], - "source": [ - "row_indices = [0, 1, 2]\n", - "hgodrapi.freeze_rows(\n", - " credentials,\n", - " sheet_id=sheet_id,\n", - " row_indices=row_indices,\n", - " tab_name=tab_name,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "dd9b9b7d-2dc6-416d-bd9c-a8039fadaba2", - "metadata": {}, - "source": [ - "\n", - "## Change the height of certin rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "50db6e3d-8d05-47ea-9ace-dc79ce131f37", - "metadata": {}, - "outputs": [], - "source": [ - "hgodrapi.set_row_height(\n", - " google_creds,\n", - " sheet_id=sheet_id,\n", - " height=20,\n", - " start_index=0,\n", - " end_index=2,\n", - " tab_name=tab_name,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "3569d048-d69e-4e4b-ab53-a93b6f4a41d1", - "metadata": {}, - "source": [ - "\n", - "## Read some nice data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e63bede3-2948-4a37-b444-36b4dba81c6d", - "metadata": {}, - "outputs": [], - "source": [ - "nice_data = hgodrapi.from_gsheet(google_creds, url, tab_name=tab_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "59233081-ac03-4ac7-96b1-4de1b07fae75", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameTitleFirmLocationPersonal Investment FocusGeographic FocusTypical Deal SizePreferred StagesNotable Personal AI InvestmentsPotential Fit with KaizenfirstNamelastName
0Michael MoritzManaging PartnerSequoia CapitalMenlo ParkAI/MLGlobal$10MEarly to GrowthGoogleHigh'''MichaelMoritz
1Navid AlipourManaging PartnerAnalytics VenturesSan DiegoAI/MLSan Diego$5MSeedCureMetrixHigh''NavidAlipour
2Aaref HilalyPartnerBain Capital VenturesPalo AltoReal-time AnalyticsBurlingameSeedSeed/Early StageRubrikMediumAarefHilaly
3Aaron FleishmanPrincipalTola CapitalSeattle WAEnterprise AIPNW$5M-$20MSeries ADatabricksHighAaronFleishman
4Aaron JacobsonPartnerNew Enterprise AssociatesMenlo ParkMLOpsNorth AmericaSeries A$10M-$30MDatabricksHighAaronJacobson
\n", - "
" - ], - "text/plain": [ - " Name Title Firm Location \\\n", - "0 Michael Moritz Managing Partner Sequoia Capital Menlo Park \n", - "1 Navid Alipour Managing Partner Analytics Ventures San Diego \n", - "2 Aaref Hilaly Partner Bain Capital Ventures Palo Alto \n", - "3 Aaron Fleishman Principal Tola Capital Seattle WA \n", - "4 Aaron Jacobson Partner New Enterprise Associates Menlo Park \n", - "\n", - " Personal Investment Focus Geographic Focus Typical Deal Size \\\n", - "0 AI/ML Global $10M \n", - "1 AI/ML San Diego $5M \n", - "2 Real-time Analytics Burlingame Seed \n", - "3 Enterprise AI PNW $5M-$20M \n", - "4 MLOps North America Series A \n", - "\n", - " Preferred Stages Notable Personal AI Investments Potential Fit with Kaizen \\\n", - "0 Early to Growth Google High''' \n", - "1 Seed CureMetrix High'' \n", - "2 Seed/Early Stage Rubrik Medium \n", - "3 Series A Databricks High \n", - "4 $10M-$30M Databricks High \n", - "\n", - " firstName lastName \n", - "0 Michael Moritz \n", - "1 Navid Alipour \n", - "2 Aaref Hilaly \n", - "3 Aaron Fleishman \n", - "4 Aaron Jacobson " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nice_data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "30dcc791-cbdb-45f1-9298-a74e0a7babab", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(100, 12)" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nice_data.shape" - ] - }, - { - "cell_type": "markdown", - "id": "5c4cafb4-fe5f-4f6e-b594-759b199acb7e", - "metadata": {}, - "source": [ - "\n", - "## Write this nice data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b1f4a89-cb96-417a-86f4-ebc513c18510", - "metadata": {}, - "outputs": [], - "source": [ - "hgodrapi.to_gsheet(google_creds, nice_data, url, tab_name=\"testing_tab\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py deleted file mode 100644 index a76ac9e94..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py +++ /dev/null @@ -1,107 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# CONTENTS: -# - [hgoogle_file_api.py](#hgoogle_file_api.py) -# - [Get Credentials for your drive](#get-credentials-for-your-drive) -# - [Get Tab/Sheet id of a particular google sheet](#get-tab/sheet-id-of-a-particular-google-sheet) -# - [Freeze Rows](#freeze-rows) -# - [Change the height of certin rows](#change-the-height-of-certin-rows) -# - [Read some nice data](#read-some-nice-data) -# - [Write this nice data](#write-this-nice-data) - -# %% [markdown] -# -# # hgoogle_file_api.py - -# %% -# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade google-api-python-client)" -# # !sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade pip install oauth2client)" -# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade gspread)" - -# %% -import importlib -import helpers.hgoogle_drive_api as hgodrapi - -importlib.reload(hgodrapi) - -# %% [markdown] -# -# ## Get Credentials for your drive - -# %% -google_creds = hgodrapi.get_credentials() -print(google_creds) - -# %% -service = hgodrapi.get_sheets_service(google_creds) -print(service) - -# %% [markdown] -# -# ## Get Tab/Sheet id of a particular google sheet - -# %% -tab_name = "cleaned_profiles_1" -url = "https://docs.google.com/spreadsheets/d/1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA/edit?gid=1687996260#gid=1687996260" -sheet_id = "1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA" -credentials = google_creds - -# %% [markdown] -# -# ## Freeze Rows - -# %% -row_indices = [0, 1, 2] -hgodrapi.freeze_rows( - credentials, - sheet_id=sheet_id, - row_indices=row_indices, - tab_name=tab_name, -) - -# %% [markdown] -# -# ## Change the height of certin rows - -# %% -hgodrapi.set_row_height( - google_creds, - sheet_id=sheet_id, - height=20, - start_index=0, - end_index=2, - tab_name=tab_name, -) - -# %% [markdown] -# -# ## Read some nice data - -# %% -nice_data = hgodrapi.from_gsheet(google_creds, url, tab_name=tab_name) - -# %% -nice_data.head() - -# %% -nice_data.shape - -# %% [markdown] -# -# ## Write this nice data - -# %% -hgodrapi.to_gsheet(google_creds, nice_data, url, tab_name="testing_tab") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb deleted file mode 100644 index 3bb70bdef..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb +++ /dev/null @@ -1,13040 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CONTENTS:\n", - "- [Description](#description)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "# Description\n", - "\n", - "This notebook examines ..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet jupyterlab-vim)\"\n", - "#!jupyter labextension enable" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2021-04-02T18:11:14.828251Z", - "start_time": "2021-04-02T18:11:14.514771Z" - } - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import logging\n", - "\n", - "import helpers.hdbg as hdbg\n", - "import helpers.henv as henv" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2021-04-02T18:11:24.635995Z", - "start_time": "2021-04-02T18:11:18.239237Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# System signature\n", - " # Container version\n", - " container_version='1.2.0'\n", - " changelog_version='2.0.0'\n", - " # Git info\n", - " branch_name='CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI'\n", - " hash='0ca93d8c'\n", - " # Last commits:\n", - " * 0ca93d8c GP Saggese Merge ( 5 minutes ago) Fri May 9 22:09:03 2025 (HEAD -> CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI, origin/CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI)\n", - " |\\ \n", - " * | 99cbbf22 GP Saggese Lint ( 6 minutes ago) Fri May 9 22:08:07 2025 \n", - " | * 27b38c48 GP Saggese CmampTask12067_Read_docs_about_DataPull_4 (#698) ( 8 minutes ago) Fri May 9 22:06:25 2025 (origin/master, origin/HEAD, master)\n", - " # Platform info\n", - " system=Linux\n", - " node name=0f79e8b845ee\n", - " release=6.10.14-linuxkit\n", - " version=#1 SMP Thu Mar 20 16:32:56 UTC 2025\n", - " machine=aarch64\n", - " processor=aarch64\n", - " # psutils info\n", - " cpu count=8\n", - " cpu freq=None\n", - " memory=svmem(total=16749285376, available=14575529984, percent=13.0, used=1910644736, free=9673363456, active=2843516928, inactive=3252117504, buffers=490647552, cached=4674629632, shared=1093632, slab=694362112)\n", - " disk usage=sdiskusage(total=270233210880, used=102272610304, free=154199986176, percent=39.9)\n", - " # Docker info\n", - " has_docker=True\n", - " docker_version='28.0.4'\n", - " docker_needs_sudo=False\n", - " has_privileged_mode=True\n", - " is_inside_docker=True\n", - " has_docker_sibling_containers_support=True\n", - " has_docker_children_containers_support=True\n", - " # Packages\n", - " python: 3.12.3\n", - " cvxopt: ?\n", - " cvxpy: ?\n", - " gluonnlp: ?\n", - " gluonts: ?\n", - " joblib: 1.4.2\n", - " mxnet: ?\n", - " numpy: 2.2.3\n", - " pandas: 2.2.3\n", - " pyarrow: 19.0.1\n", - " scipy: 1.15.2\n", - " seaborn: 0.13.2\n", - " sklearn: 1.6.1\n", - " statsmodels: 0.14.4\n" - ] - } - ], - "source": [ - "print(henv.get_system_signature()[0])\n", - "\n", - "hnotebook.config_notebook()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2021-04-02T18:11:24.668793Z", - "start_time": "2021-04-02T18:11:24.638503Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0mWARNING: Running in Jupyter\n", - "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-0f2f4a10-7f18-4858-af02-b60808101345.json'\n" - ] - } - ], - "source": [ - "# hdbg.init_logger(verbosity=logging.DEBUG)\n", - "hdbg.init_logger(verbosity=logging.INFO)\n", - "# hdbg.test_logger()\n", - "_LOG = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet openai requests)\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "import helpers.hllm as hllm\n", - "import helpers.hpandas as hpandas" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "val = hllm.get_model_stats()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'architecture': {'input_modalities': ['text', 'image'],\n", - " 'instruct_type': None,\n", - " 'modality': 'text+image->text',\n", - " 'output_modalities': ['text'],\n", - " 'tokenizer': 'Mistral'},\n", - " 'context_length': 131072,\n", - " 'created': 1746627341,\n", - " 'description': 'Mistral Medium 3 is a high-performance enterprise-grade '\n", - " 'language model designed to deliver frontier-level '\n", - " 'capabilities at significantly reduced operational cost. It '\n", - " 'balances state-of-the-art reasoning and multimodal '\n", - " 'performance with 8× lower cost compared to traditional large '\n", - " 'models, making it suitable for scalable deployments across '\n", - " 'professional and industrial use cases.\\n'\n", - " '\\n'\n", - " 'The model excels in domains such as coding, STEM reasoning, '\n", - " 'and enterprise adaptation. It supports hybrid, on-prem, and '\n", - " 'in-VPC deployments and is optimized for integration into '\n", - " 'custom workflows. Mistral Medium 3 offers competitive '\n", - " 'accuracy relative to larger models like Claude Sonnet '\n", - " '3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining '\n", - " 'broad compatibility across cloud environments.',\n", - " 'id': 'mistralai/mistral-medium-3',\n", - " 'name': 'Mistral: Mistral Medium 3',\n", - " 'per_request_limits': None,\n", - " 'pricing': {'completion': '0.000002',\n", - " 'image': '0',\n", - " 'internal_reasoning': '0',\n", - " 'prompt': '0.0000004',\n", - " 'request': '0',\n", - " 'web_search': '0'},\n", - " 'supported_parameters': ['tools',\n", - " 'tool_choice',\n", - " 'max_tokens',\n", - " 'temperature',\n", - " 'top_p',\n", - " 'stop',\n", - " 'frequency_penalty',\n", - " 'presence_penalty',\n", - " 'response_format',\n", - " 'structured_outputs',\n", - " 'seed'],\n", - " 'top_provider': {'context_length': 131072,\n", - " 'is_moderated': False,\n", - " 'max_completion_tokens': None}}\n" - ] - } - ], - "source": [ - "import pprint\n", - "\n", - "pprint.pprint(val[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreateddescriptioncontext_lengthper_request_limitssupported_parametersarchitecture_modalityarchitecture_input_modalitiesarchitecture_output_modalitiesarchitecture_tokenizerarchitecture_instruct_typepricing_promptpricing_completionpricing_requestpricing_imagepricing_web_searchpricing_internal_reasoningtop_provider_context_lengthtop_provider_max_completion_tokenstop_provider_is_moderatedpricing_input_cache_readpricing_input_cache_write
0mistralai/mistral-medium-3Mistral: Mistral Medium 31746627341Mistral Medium 3 is a high-performance enterpr...131072None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone0.00000040.0000020000131072.0NaNFalseNaNNaN
1google/gemini-2.5-pro-previewGoogle: Gemini 2.5 Pro Preview1746578513Gemini 2.5 Pro is Google’s state-of-the-art AI...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[text, image, file][text]GeminiNone0.000001250.0000100.00516001048576.065535.0False0.000000310.000001625
2arcee-ai/caller-largeArcee AI: Caller Large1746487869Caller Large is Arcee's specialist \"function‑c...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.000000550.00000085000032768.0NaNFalseNaNNaN
3arcee-ai/spotlightArcee AI: Spotlight1746481552Spotlight is a 7‑billion‑parameter vision‑lang...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[image, text][text]OtherNone0.000000180.000000180000131072.065537.0FalseNaNNaN
4arcee-ai/maestro-reasoningArcee AI: Maestro Reasoning1746481269Maestro Reasoning is Arcee's flagship analysis...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000090.00000330000131072.032000.0FalseNaNNaN
5arcee-ai/virtuoso-largeArcee AI: Virtuoso Large1746478885Virtuoso‑Large is Arcee's top‑tier general‑pur...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000750.00000120000131072.064000.0FalseNaNNaN
6arcee-ai/coder-largeArcee AI: Coder Large1746478663Coder‑Large is a 32 B‑parameter offspring of Q...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000050.0000008000032768.0NaNFalseNaNNaN
7arcee-ai/virtuoso-medium-v2Arcee AI: Virtuoso Medium V21746478434Virtuoso‑Medium‑v2 is a 32 B model distilled f...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000050.00000080000131072.032768.0FalseNaNNaN
8arcee-ai/arcee-blitzArcee AI: Arcee Blitz1746470100Arcee Blitz is a 24 B‑parameter dense model di...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000450.00000075000032768.0NaNFalseNaNNaN
9microsoft/phi-4-reasoning-plus:freeMicrosoft: Phi 4 Reasoning Plus (free)1746130961Phi-4-reasoning-plus is an enhanced 14B parame...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
10microsoft/phi-4-reasoning-plusMicrosoft: Phi 4 Reasoning Plus1746130961Phi-4-reasoning-plus is an enhanced 14B parame...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.000000070.00000035000032768.0NaNFalseNaNNaN
11microsoft/phi-4-reasoning:freeMicrosoft: Phi 4 Reasoning (free)1746121275Phi-4-reasoning is a 14B parameter dense decod...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
12qwen/qwen3-0.6b-04-28:freeQwen: Qwen3 0.6B (free)1746043526Qwen3-0.6B is a lightweight, 0.6 billion param...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000032000.0NaNFalseNaNNaN
13inception/mercury-coder-small-betaInception: Mercury Coder Small Beta1746033880Mercury Coder Small is the first diffusion lar...32000None[max_tokens, frequency_penalty, presence_penal...text->text[text][text]OtherNone0.000000250.000001000032000.0NaNFalseNaNNaN
14qwen/qwen3-1.7b:freeQwen: Qwen3 1.7B (free)1746031388Qwen3-1.7B is a compact, 1.7 billion parameter...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000032000.0NaNFalseNaNNaN
15qwen/qwen3-4b:freeQwen: Qwen3 4B (free)1746031104Qwen3-4B is a 4 billion parameter dense langua...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None000000128000.0NaNFalseNaNNaN
16opengvlab/internvl3-14b:freeOpenGVLab: InternVL3 14B (free)1746021355The 14b version of the InternVL3 series. An ad...32000None[max_tokens, temperature, top_p]text+image->text[image, text][text]OtherNone00000032000.0NaNFalseNaNNaN
17opengvlab/internvl3-2b:freeOpenGVLab: InternVL3 2B (free)1746019807The 2b version of the InternVL3 series, for an...32000None[max_tokens, temperature, top_p]text+image->text[image, text][text]OtherNone00000032000.0NaNFalseNaNNaN
18deepseek/deepseek-prover-v2:freeDeepSeek: DeepSeek Prover V2 (free)1746013094DeepSeek Prover V2 is a 671B parameter model, ...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
19deepseek/deepseek-prover-v2DeepSeek: DeepSeek Prover V21746013094DeepSeek Prover V2 is a 671B parameter model, ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone0.00000050.000002180000131072.0NaNFalseNaNNaN
20meta-llama/llama-guard-4-12bMeta: Llama Guard 4 12B1745975193Llama Guard 4 is a Llama 4 Scout-derived multi...163840None[max_tokens, temperature, top_p, stop, frequen...text+image->text[image, text][text]OtherNone0.000000050.000000050000163840.0NaNFalseNaNNaN
21qwen/qwen3-30b-a3b:freeQwen: Qwen3 30B A3B (free)1745878604Qwen3, the latest generation in the Qwen large...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
22qwen/qwen3-30b-a3bQwen: Qwen3 30B A3B1745878604Qwen3, the latest generation in the Qwen large...40960None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Qwen3None0.00000010.0000003000040960.040960.0FalseNaNNaN
23qwen/qwen3-8b:freeQwen: Qwen3 8B (free)1745876632Qwen3-8B is a dense 8.2B parameter causal lang...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.040960.0FalseNaNNaN
24qwen/qwen3-8bQwen: Qwen3 8B1745876632Qwen3-8B is a dense 8.2B parameter causal lang...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.0000000350.0000001380000128000.0NaNFalseNaNNaN
25qwen/qwen3-14b:freeQwen: Qwen3 14B (free)1745876478Qwen3-14B is a dense 14.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
26qwen/qwen3-14bQwen: Qwen3 14B1745876478Qwen3-14B is a dense 14.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.000000070.00000024000040960.040960.0FalseNaNNaN
27qwen/qwen3-32b:freeQwen: Qwen3 32B (free)1745875945Qwen3-32B is a dense 32.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
28qwen/qwen3-32bQwen: Qwen3 32B1745875945Qwen3-32B is a dense 32.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.00000010.0000003000040960.0NaNFalseNaNNaN
29qwen/qwen3-235b-a22b:freeQwen: Qwen3 235B A22B (free)1745875757Qwen3-235B-A22B is a 235B parameter mixture-of...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
30qwen/qwen3-235b-a22bQwen: Qwen3 235B A22B1745875757Qwen3-235B-A22B is a 235B parameter mixture-of...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.000000140.000002000040960.040960.0FalseNaNNaN
31tngtech/deepseek-r1t-chimera:freeTNG: DeepSeek R1T Chimera (free)1745760875DeepSeek-R1T-Chimera is created by merging Dee...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
32thudm/glm-z1-rumination-32bTHUDM: GLM Z1 Rumination 32B1745601495THUDM: GLM Z1 Rumination 32B is a 32B-paramete...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000240.00000024000032000.0NaNFalseNaNNaN
33thudm/glm-z1-9b:freeTHUDM: GLM Z1 9B (free)1745601140GLM-Z1-9B-0414 is a 9B-parameter language mode...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032000.0NaNFalseNaNNaN
34thudm/glm-4-9b:freeTHUDM: GLM 4 9B (free)1745601023GLM-4-9B-0414 is a 9 billion parameter languag...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032000.0NaNFalseNaNNaN
35microsoft/mai-ds-r1:freeMicrosoft: MAI DS R1 (free)1745194100MAI-DS-R1 is a post-trained variant of DeepSee...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
36thudm/glm-z1-32b:freeTHUDM: GLM Z1 32B (free)1744924148GLM-Z1-32B-0414 is an enhanced reasoning varia...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
37thudm/glm-z1-32bTHUDM: GLM Z1 32B1744924148GLM-Z1-32B-0414 is an enhanced reasoning varia...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000240.00000024000032000.0NaNFalseNaNNaN
38thudm/glm-4-32b:freeTHUDM: GLM 4 32B (free)1744920915GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
39thudm/glm-4-32bTHUDM: GLM 4 32B1744920915GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000240.00000024000032000.0NaNFalseNaNNaN
40google/gemini-2.5-flash-previewGoogle: Gemini 2.5 Flash Preview1744914667Gemini 2.5 Flash is Google's state-of-the-art ...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[image, text, file][text]GeminiNone0.000000150.000000600.0006192001048576.065535.0False0.00000003750.0000002333
41google/gemini-2.5-flash-preview:thinkingGoogle: Gemini 2.5 Flash Preview (thinking)1744914667Gemini 2.5 Flash is Google's state-of-the-art ...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[image, text, file][text]GeminiNone0.000000150.000003500.0006192001048576.065535.0False0.00000003750.0000002333
42openai/o4-mini-highOpenAI: o4 Mini High1744824212OpenAI o4-mini-high is the same model as [o4-m...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text, file][text]OtherNone0.00000110.000004400.000841500200000.0100000.0True0.000000275NaN
43openai/o3OpenAI: o31744823457o3 is a well-rounded and powerful model across...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text, file][text]OtherNone0.000010.0000400.0076500200000.0100000.0True0.0000025NaN
44openai/o4-miniOpenAI: o4 Mini1744820942OpenAI o4-mini is a compact reasoning model in...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text][text]OtherNone0.00000110.000004400.000841500200000.0100000.0True0.000000275NaN
45shisa-ai/shisa-v2-llama3.3-70b:freeShisa AI: Shisa V2 Llama 3.3 70B (free)1744754858Shisa V2 Llama 3.3 70B is a bilingual Japanese...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3None00000032768.0NaNFalseNaNNaN
46qwen/qwen2.5-coder-7b-instructQwen: Qwen2.5 Coder 7B Instruct1744734887Qwen2.5-Coder-7B-Instruct is a 7B parameter in...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]QwenNone0.000000010.00000003000032768.0NaNFalseNaNNaN
47openai/gpt-4.1OpenAI: GPT-4.11744651385GPT-4.1 is a flagship large language model opt...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.0000020.00000800001047576.032768.0True0.0000005NaN
48openai/gpt-4.1-miniOpenAI: GPT-4.1 Mini1744651381GPT-4.1 Mini is a mid-sized model delivering p...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.00000040.000001600001047576.032768.0True0.0000001NaN
49openai/gpt-4.1-nanoOpenAI: GPT-4.1 Nano1744651369For tasks that demand low latency, GPT‑4.1 nan...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.00000010.000000400001047576.032768.0True0.000000025NaN
50eleutherai/llemma_7bEleutherAI: Llemma 7b1744643225Llemma 7B is a language model for mathematics....4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Othercode-llama0.00000080.000001200004096.04096.0FalseNaNNaN
51alfredpros/codellama-7b-instruct-solidityAlfredPros: CodeLLaMa 7B Instruct Solidity1744641874A finetuned 7 billion parameters Code LLaMA - ...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otheralpaca0.00000080.000001200004096.04096.0FalseNaNNaN
52arliai/qwq-32b-arliai-rpr-v1:freeArliAI: QwQ 32B RpR v1 (free)1744555982QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
53agentica-org/deepcoder-14b-preview:freeAgentica: Deepcoder 14B Preview (free)1744555395DeepCoder-14B-Preview is a 14B parameter code ...96000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000096000.0NaNFalseNaNNaN
54moonshotai/kimi-vl-a3b-thinking:freeMoonshot AI: Kimi VL A3B Thinking (free)1744304841Kimi-VL is a lightweight Mixture-of-Experts vi...131072None[max_tokens, temperature, top_p, reasoning, in...text+image->text[image, text][text]OtherNone000000131072.0NaNFalseNaNNaN
55x-ai/grok-3-mini-betaxAI: Grok 3 Mini Beta1744240195Grok 3 Mini is a lightweight, smaller thinking...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.00000030.00000050000131072.0NaNFalseNaNNaN
56x-ai/grok-3-betaxAI: Grok 3 Beta1744240068Grok 3 is the latest model from xAI. It's thei...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000030.0000150000131072.0NaNFalseNaNNaN
57nvidia/llama-3.3-nemotron-super-49b-v1:freeNVIDIA: Llama 3.3 Nemotron Super 49B v1 (free)1744119494Llama-3.3-Nemotron-Super-49B-v1 is a large lan...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone000000131072.0NaNFalseNaNNaN
58nvidia/llama-3.3-nemotron-super-49b-v1NVIDIA: Llama 3.3 Nemotron Super 49B v11744119494Llama-3.3-Nemotron-Super-49B-v1 is a large lan...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000130.00000040000131072.0NaNFalseNaNNaN
59nvidia/llama-3.1-nemotron-ultra-253b-v1:freeNVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free)1744115059Llama-3.1-Nemotron-Ultra-253B-v1 is a large la...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3None000000131072.0NaNFalseNaNNaN
60meta-llama/llama-4-maverick:freeMeta: Llama 4 Maverick (free)1743881822Llama 4 Maverick 17B Instruct (128E) is a high...256000None[max_tokens, temperature, top_p, structured_ou...text+image->text[text, image][text]OtherNone000000256000.0NaNFalseNaNNaN
61meta-llama/llama-4-maverickMeta: Llama 4 Maverick1743881822Llama 4 Maverick 17B Instruct (128E) is a high...1048576None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0.000000170.000000600.0006684001048576.016384.0FalseNaNNaN
62meta-llama/llama-4-scout:freeMeta: Llama 4 Scout (free)1743881519Llama 4 Scout 17B Instruct (16E) is a mixture-...512000None[max_tokens, temperature, top_p, structured_ou...text+image->text[text, image][text]OtherNone000000512000.0NaNFalseNaNNaN
63meta-llama/llama-4-scoutMeta: Llama 4 Scout1743881519Llama 4 Scout 17B Instruct (16E) is a mixture-...1048576None[max_tokens, temperature, top_p, presence_pena...text+image->text[text, image][text]OtherNone0.000000080.000000300001048576.01048576.0FalseNaNNaN
64all-hands/openhands-lm-32b-v0.1OpenHands LM 32B V0.11743613013OpenHands LM v0.1 is a 32B open-source coding ...16384None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.00000260.0000034000016384.04096.0FalseNaNNaN
65mistral/ministral-8bMistral: Ministral 8B1743430021Ministral 8B is a state-of-the-art language mo...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000010.00000010000131072.0NaNFalseNaNNaN
66deepseek/deepseek-v3-base:freeDeepSeek: DeepSeek V3 Base (free)1743272023Note that this is a base model mostly meant fo...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
67scb10x/llama3.1-typhoon2-8b-instructTyphoon2 8B Instruct1743196511Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000180.0000001800008192.0NaNFalseNaNNaN
68scb10x/llama3.1-typhoon2-70b-instructTyphoon2 70B Instruct1743196170Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000880.0000008800008192.0NaNFalseNaNNaN
69allenai/molmo-7b-d:freeAllenAI: Molmo 7B D (free)1743023247Molmo is a family of open vision-language mode...4096None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0000004096.0NaNFalseNaNNaN
70bytedance-research/ui-tars-72b:freeBytedance: UI-TARS 72B (free)1743020065UI-TARS 72B is an open-source multimodal AI mo...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone00000032768.0NaNFalseNaNNaN
71qwen/qwen2.5-vl-3b-instruct:freeQwen: Qwen2.5 VL 3B Instruct (free)1743014573Qwen2.5 VL 3B is a multimodal LLM from the Qwe...64000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone00000064000.0NaNFalseNaNNaN
72google/gemini-2.5-pro-exp-03-25Google: Gemini 2.5 Pro Experimental1742922099Gemini 2.5 Pro is Google’s state-of-the-art AI...1000000None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[text, image, file][text]GeminiNone0000001000000.065535.0FalseNaNNaN
73qwen/qwen2.5-vl-32b-instruct:freeQwen: Qwen2.5 VL 32B Instruct (free)1742839838Qwen2.5-VL-32B is a multimodal vision-language...8192None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0000008192.0NaNFalseNaNNaN
74qwen/qwen2.5-vl-32b-instructQwen: Qwen2.5 VL 32B Instruct1742839838Qwen2.5-VL-32B is a multimodal vision-language...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000090.00000090000128000.0NaNFalseNaNNaN
75deepseek/deepseek-chat-v3-0324:freeDeepSeek: DeepSeek V3 0324 (free)1742824755DeepSeek V3, a 685B-parameter, mixture-of-expe...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
76deepseek/deepseek-chat-v3-0324DeepSeek: DeepSeek V3 03241742824755DeepSeek V3, a 685B-parameter, mixture-of-expe...163840None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]DeepSeekNone0.00000030.000000880000163840.0NaNFalseNaNNaN
77featherless/qwerky-72b:freeQwerky 72B (free)1742481597Qwerky-72B is a linear-attention RWKV variant ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.04096.0FalseNaNNaN
78openai/o1-proOpenAI: o1-pro1742423211The o1 series of models are trained with reinf...200000None[max_tokens, temperature, top_p, reasoning, in...text+image->text[text, image][text]GPTNone0.000150.000600.2167500200000.0100000.0TrueNaNNaN
79mistralai/mistral-small-3.1-24b-instruct:freeMistral: Mistral Small 3.1 24B (free)1742238937Mistral Small 3.1 24B Instruct is an upgraded ...96000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone00000096000.096000.0FalseNaNNaN
80mistralai/mistral-small-3.1-24b-instructMistral: Mistral Small 3.1 24B1742238937Mistral Small 3.1 24B Instruct is an upgraded ...131072None[max_tokens, temperature, top_p, presence_pena...text+image->text[text, image][text]MistralNone0.000000050.000000150000131072.0NaNFalseNaNNaN
81open-r1/olympiccoder-32b:freeOlympicCoder 32B (free)1742077228OlympicCoder-32B is a high-performing open-sou...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
82google/gemma-3-1b-it:freeGoogle: Gemma 3 1B (free)1741963556Gemma 3 1B is the smallest of the new Gemma 3 ...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma00000032768.08192.0FalseNaNNaN
83google/gemma-3-4b-it:freeGoogle: Gemma 3 4B (free)1741905510Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma000000131072.08192.0FalseNaNNaN
84google/gemma-3-4b-itGoogle: Gemma 3 4B1741905510Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.000000020.000000040000131072.0NaNFalseNaNNaN
85ai21/jamba-1.6-largeAI21: Jamba 1.6 Large1741905173AI21 Jamba Large 1.6 is a high-performance hyb...256000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.0000020.0000080000256000.04096.0FalseNaNNaN
86ai21/jamba-1.6-miniAI21: Jamba Mini 1.61741905171AI21 Jamba Mini 1.6 is a hybrid foundation mod...256000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.00000020.00000040000256000.04096.0FalseNaNNaN
87google/gemma-3-12b-it:freeGoogle: Gemma 3 12B (free)1741902625Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma000000131072.08192.0FalseNaNNaN
88google/gemma-3-12b-itGoogle: Gemma 3 12B1741902625Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.000000050.00000010000131072.0NaNFalseNaNNaN
89cohere/command-aCohere: Command A1741894342Command A is an open-weights 111B parameter mo...256000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000250.000010000256000.08192.0FalseNaNNaN
90openai/gpt-4o-mini-search-previewOpenAI: GPT-4o-mini Search Preview1741818122GPT-4o mini Search Preview is a specialized mo...128000None[web_search_options, max_tokens, response_form...text->text[text][text]GPTNone0.000000150.00000060.02750.00021700128000.016384.0TrueNaNNaN
91openai/gpt-4o-search-previewOpenAI: GPT-4o Search Preview1741817949GPT-4o Search Previewis a specialized model fo...128000None[web_search_options, max_tokens, response_form...text->text[text][text]GPTNone0.00000250.000010.0350.00361300128000.016384.0TrueNaNNaN
92rekaai/reka-flash-3:freeReka: Flash 3 (free)1741812813Reka Flash 3 is a general-purpose, instruction...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
93google/gemma-3-27b-it:freeGoogle: Gemma 3 27B (free)1741756359Gemma 3 introduces multimodality, supporting v...96000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma00000096000.08192.0FalseNaNNaN
94google/gemma-3-27b-itGoogle: Gemma 3 27B1741756359Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.00000010.000000200.000025600131072.016384.0FalseNaNNaN
95thedrummer/anubis-pro-105b-v1TheDrummer: Anubis Pro 105B V11741642290Anubis Pro 105B v1 is an expanded and refined ...131072None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]OtherNone0.00000080.0000010000131072.0131072.0FalseNaNNaN
96thedrummer/skyfall-36b-v2TheDrummer: Skyfall 36B V21741636566Skyfall 36B v2 is an enhanced iteration of Mis...32768None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]OtherNone0.00000050.0000008000032768.032768.0FalseNaNNaN
97microsoft/phi-4-multimodal-instructMicrosoft: Phi 4 Multimodal Instruct1741396284Phi-4 Multimodal Instruct is a versatile 5.6B ...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0.000000050.000000100.0001768500131072.0NaNFalseNaNNaN
98perplexity/sonar-reasoning-proPerplexity: Sonar Reasoning Pro1741313308Note: Sonar Pro pricing includes Perplexity se...128000None[max_tokens, temperature, top_p, reasoning, in...text+image->text[text, image][text]Otherdeepseek-r10.0000020.000008000.0050128000.0NaNFalseNaNNaN
99perplexity/sonar-proPerplexity: Sonar Pro1741312423Note: Sonar Pro pricing includes Perplexity se...200000None[max_tokens, temperature, top_p, web_search_op...text+image->text[text, image][text]OtherNone0.0000030.000015000.0050200000.08000.0FalseNaNNaN
100perplexity/sonar-deep-researchPerplexity: Sonar Deep Research1741311246Sonar Deep Research is a research-focused mode...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.0000020.000008000.0050.000003128000.0NaNFalseNaNNaN
101deepseek/deepseek-r1-zero:freeDeepSeek: DeepSeek R1 Zero (free)1741297434DeepSeek-R1-Zero is a model trained via large-...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r1000000163840.0NaNFalseNaNNaN
102qwen/qwq-32b:freeQwen: QwQ 32B (free)1741208814QwQ is the reasoning model of the Qwen series....40000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwenqwq00000040000.040000.0FalseNaNNaN
103qwen/qwq-32bQwen: QwQ 32B1741208814QwQ is the reasoning model of the Qwen series....131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwenqwq0.000000150.00000020000131072.0NaNFalseNaNNaN
104moonshotai/moonlight-16b-a3b-instruct:freeMoonshot AI: Moonlight 16B A3B Instruct (free)1740719801Moonlight-16B-A3B-Instruct is a 16B-parameter ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0000008192.0NaNFalseNaNNaN
105nousresearch/deephermes-3-llama-3-8b-preview:freeNous: DeepHermes 3 Llama 3 8B Preview (free)1740719372DeepHermes 3 Preview is the latest version of ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone000000131072.0NaNFalseNaNNaN
106openai/gpt-4.5-previewOpenAI: GPT-4.5 (Preview)1740687810GPT-4.5 (Preview) is a research preview of Ope...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GPTNone0.0000750.0001500.10837500128000.016384.0True0.0000375NaN
107google/gemini-2.0-flash-lite-001Google: Gemini 2.0 Flash Lite1740506212Gemini 2.0 Flash Lite offers a significantly f...1048576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GeminiNone0.0000000750.000000300001048576.08192.0FalseNaNNaN
108anthropic/claude-3.7-sonnetAnthropic: Claude 3.7 Sonnet1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.064000.0False0.00000030.00000375
109anthropic/claude-3.7-sonnet:thinkingAnthropic: Claude 3.7 Sonnet (thinking)1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.064000.0False0.00000030.00000375
110anthropic/claude-3.7-sonnet:betaAnthropic: Claude 3.7 Sonnet (self-moderated)1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[max_tokens, temperature, stop, reasoning, inc...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.0128000.0False0.00000030.00000375
111perplexity/r1-1776Perplexity: R1 17761740004929R1 1776 is a version of DeepSeek-R1 that has b...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r10.0000020.0000080000128000.0NaNFalseNaNNaN
112mistralai/mistral-sabaMistral: Saba1739803239Mistral Saba is a 24B-parameter language model...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000020.0000006000032768.0NaNFalseNaNNaN
113cognitivecomputations/dolphin3.0-r1-mistral-24...Dolphin3.0 R1 Mistral 24B (free)1739462498Dolphin 3.0 R1 is the next generation of the D...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
114cognitivecomputations/dolphin3.0-mistral-24b:freeDolphin3.0 Mistral 24B (free)1739462019Dolphin 3.0 is the next generation of the Dolp...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
115meta-llama/llama-guard-3-8bLlama Guard 3 8B1739401318Llama Guard 3 is a Llama-3.1-8B pretrained mod...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.000000020.000000060000131072.0NaNFalseNaNNaN
116openai/o3-mini-highOpenAI: o3 Mini High1739372611OpenAI o3-mini-high is the same model as [o3-m...200000None[tools, tool_choice, seed, max_tokens, respons...text->text[text][text]OtherNone0.00000110.00000440000200000.0100000.0True0.00000055NaN
117deepseek/deepseek-r1-distill-llama-8bDeepSeek: R1 Distill Llama 8B1738937718DeepSeek R1 Distill Llama 8B is a distilled la...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10.000000040.00000004000032000.032000.0FalseNaNNaN
118google/gemini-2.0-flash-001Google: Gemini 2.0 Flash1738769413Gemini Flash 2.0 offers a significantly faster...1000000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GeminiNone0.00000010.000000400.0000258001000000.08192.0False0.0000000250.0000001833
119qwen/qwen-vl-plusQwen: Qwen VL Plus1738731255Qwen's Enhanced Large Visual Language Model. S...7500None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0.000000210.0000006300.0002688007500.01500.0FalseNaNNaN
120aion-labs/aion-1.0AionLabs: Aion-1.01738697557Aion-1.0 is a multi-model system designed for ...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.0000040.0000080000131072.032768.0FalseNaNNaN
121aion-labs/aion-1.0-miniAionLabs: Aion-1.0-Mini1738697107Aion-1.0-Mini 32B parameter model is a distill...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.00000070.00000140000131072.032768.0FalseNaNNaN
122aion-labs/aion-rp-llama-3.1-8bAionLabs: Aion-RP 1.0 (8B)1738696718Aion-RP-Llama-3.1-8B ranks the highest in the ...32768None[max_tokens, temperature, top_p]text->text[text][text]OtherNone0.00000020.0000002000032768.032768.0FalseNaNNaN
123qwen/qwen-vl-maxQwen: Qwen VL Max1738434304Qwen VL Max is a visual understanding model wi...7500None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0.00000080.000003200.001024007500.01500.0FalseNaNNaN
124qwen/qwen-turboQwen: Qwen-Turbo1738410974Qwen-Turbo, based on Qwen2.5, is a 1M context ...1000000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.000000050.000000200001000000.08192.0FalseNaNNaN
125qwen/qwen2.5-vl-72b-instruct:freeQwen: Qwen2.5 VL 72B Instruct (free)1738410311Qwen2.5-VL is proficient in recognizing common...131072None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone000000131072.02048.0FalseNaNNaN
126qwen/qwen2.5-vl-72b-instructQwen: Qwen2.5 VL 72B Instruct1738410311Qwen2.5-VL is proficient in recognizing common...32000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.000000250.00000075000032000.0NaNFalseNaNNaN
127qwen/qwen-plusQwen: Qwen-Plus1738409840Qwen-Plus, based on the Qwen2.5 foundation mod...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.00000040.00000120000131072.08192.0FalseNaNNaN
128qwen/qwen-maxQwen: Qwen-Max1738402289Qwen-Max, based on Qwen2.5, provides the best ...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.00000160.0000064000032768.08192.0FalseNaNNaN
129openai/o3-miniOpenAI: o3 Mini1738351721OpenAI o3-mini is a cost-efficient language mo...200000None[tools, tool_choice, seed, max_tokens, respons...text->text[text][text]OtherNone0.00000110.00000440000200000.0100000.0True0.00000055NaN
130deepseek/deepseek-r1-distill-qwen-1.5bDeepSeek: R1 Distill Qwen 1.5B1738328067DeepSeek R1 Distill Qwen 1.5B is a distilled l...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000180.000000180000131072.032768.0FalseNaNNaN
131mistralai/mistral-small-24b-instruct-2501:freeMistral: Mistral Small 3 (free)1738255409Mistral Small 3 is a 24B-parameter language mo...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]MistralNone00000032768.0NaNFalseNaNNaN
132mistralai/mistral-small-24b-instruct-2501Mistral: Mistral Small 31738255409Mistral Small 3 is a 24B-parameter language mo...28000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]MistralNone0.000000060.00000012000028000.014000.0FalseNaNNaN
133deepseek/deepseek-r1-distill-qwen-32b:freeDeepSeek: R1 Distill Qwen 32B (free)1738194830DeepSeek R1 Distill Qwen 32B is a distilled la...16000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r100000016000.016000.0FalseNaNNaN
134deepseek/deepseek-r1-distill-qwen-32bDeepSeek: R1 Distill Qwen 32B1738194830DeepSeek R1 Distill Qwen 32B is a distilled la...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r10.000000120.000000180000131072.016384.0FalseNaNNaN
135deepseek/deepseek-r1-distill-qwen-14b:freeDeepSeek: R1 Distill Qwen 14B (free)1738193940DeepSeek R1 Distill Qwen 14B is a distilled la...64000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r100000064000.0NaNFalseNaNNaN
136deepseek/deepseek-r1-distill-qwen-14bDeepSeek: R1 Distill Qwen 14B1738193940DeepSeek R1 Distill Qwen 14B is a distilled la...64000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r10.000000150.00000015000064000.064000.0FalseNaNNaN
137perplexity/sonar-reasoningPerplexity: Sonar Reasoning1738131107Sonar Reasoning is a reasoning model provided ...127000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.0000010.0000050.005000127000.0NaNFalseNaNNaN
138perplexity/sonarPerplexity: Sonar1738013808Sonar is lightweight, affordable, fast, and si...127072None[max_tokens, temperature, top_p, web_search_op...text+image->text[text, image][text]OtherNone0.0000010.0000010.005000127072.0NaNFalseNaNNaN
139liquid/lfm-7bLiquid: LFM 7B1737806883LFM-7B, a new best-in-class language model. LF...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000010.00000001000032768.0NaNFalseNaNNaN
140liquid/lfm-3bLiquid: LFM 3B1737806501Liquid's LFM 3B delivers incredible performanc...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000020.00000002000032768.0NaNFalseNaNNaN
141deepseek/deepseek-r1-distill-llama-70b:freeDeepSeek: R1 Distill Llama 70B (free)1737663169DeepSeek R1 Distill Llama 70B is a distilled l...8192None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10000008192.04096.0FalseNaNNaN
142deepseek/deepseek-r1-distill-llama-70bDeepSeek: R1 Distill Llama 70B1737663169DeepSeek R1 Distill Llama 70B is a distilled l...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10.00000010.00000040000131072.016384.0FalseNaNNaN
143deepseek/deepseek-r1:freeDeepSeek: R1 (free)1737381095DeepSeek R1 is here: Performance on par with [...163840None[max_tokens, reasoning, include_reasoning, tem...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
144deepseek/deepseek-r1DeepSeek: R11737381095DeepSeek R1 is here: Performance on par with [...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r10.00000050.000002180000163840.0163840.0FalseNaNNaN
145minimax/minimax-01MiniMax: MiniMax-011736915462MiniMax-01 is a combines MiniMax-Text-01 for t...1000192None[max_tokens, temperature, top_p]text+image->text[text, image][text]OtherNone0.00000020.000001100001000192.01000192.0FalseNaNNaN
146mistralai/codestral-2501Mistral: Codestral 25011736895522[Mistral](/mistralai)'s cutting-edge language ...262144None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000030.00000090000262144.0NaNFalseNaNNaN
147microsoft/phi-4Microsoft: Phi 41736489872[Microsoft Research](/microsoft) Phi-4 is desi...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000070.00000014000016384.016384.0FalseNaNNaN
148deepseek/deepseek-chat:freeDeepSeek: DeepSeek V3 (free)1735241320DeepSeek-V3 is the latest model from the DeepS...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
149deepseek/deepseek-chatDeepSeek: DeepSeek V31735241320DeepSeek-V3 is the latest model from the DeepS...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone0.000000380.000000890000163840.0163840.0FalseNaNNaN
150sao10k/l3.3-euryale-70bSao10K: Llama 3.3 Euryale 70B1734535928Euryale L3.3 70B is a model focused on creativ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000070.00000080000131072.016384.0FalseNaNNaN
151openai/o1OpenAI: o11734459999The latest and strongest model family from Ope...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[text, image][text]GPTNone0.0000150.0000600.02167500200000.0100000.0True0.0000075NaN
152eva-unit-01/eva-llama-3.33-70bEVA Llama 3.33 70B1734377303EVA Llama 3.33 70b is a roleplay and storywrit...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.0000040.000006000016384.04096.0FalseNaNNaN
153x-ai/grok-2-vision-1212xAI: Grok 2 Vision 12121734237338Grok 2 Vision 1212 advances image-based AI wit...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GrokNone0.0000020.0000100.00360032768.0NaNFalseNaNNaN
154x-ai/grok-2-1212xAI: Grok 2 12121734232814Grok 2 1212 introduces significant enhancement...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000020.000010000131072.0NaNFalseNaNNaN
155cohere/command-r7b-12-2024Cohere: Command R7B (12-2024)1734158152Command R7B (12-2024) is a small, fast update ...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]CohereNone0.00000003750.000000150000128000.04000.0FalseNaNNaN
156google/gemini-2.0-flash-exp:freeGoogle: Gemini 2.0 Flash Experimental (free)1733937523Gemini Flash 2.0 offers a significantly faster...1048576None[max_tokens, temperature, top_p, stop]text+image->text[text, image][text]GeminiNone0000001048576.08192.0FalseNaNNaN
157meta-llama/llama-3.3-70b-instruct:freeMeta: Llama 3.3 70B Instruct (free)1733506137The Meta Llama 3.3 multilingual large language...8000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30000008000.08000.0FalseNaNNaN
158meta-llama/llama-3.3-70b-instructMeta: Llama 3.3 70B Instruct1733506137The Meta Llama 3.3 multilingual large language...131000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000090.000000350000131000.0131000.0FalseNaNNaN
159amazon/nova-lite-v1Amazon: Nova Lite 1.01733437363Amazon Nova Lite 1.0 is a very low-cost multim...300000None[tools, max_tokens, temperature, top_p, top_k,...text+image->text[text, image][text]NovaNone0.000000060.0000002400.0000900300000.05120.0TrueNaNNaN
160amazon/nova-micro-v1Amazon: Nova Micro 1.01733437237Amazon Nova Micro 1.0 is a text-only model tha...128000None[tools, max_tokens, temperature, top_p, top_k,...text->text[text][text]NovaNone0.0000000350.000000140000128000.05120.0TrueNaNNaN
161amazon/nova-pro-v1Amazon: Nova Pro 1.01733436303Amazon Nova Pro 1.0 is a capable multimodal mo...300000None[tools, max_tokens, temperature, top_p, top_k,...text+image->text[text, image][text]NovaNone0.00000080.000003200.001200300000.05120.0TrueNaNNaN
162qwen/qwq-32b-preview:freeQwen: QwQ 32B Preview (free)1732754541QwQ-32B-Preview is an experimental research mo...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwendeepseek-r100000016384.0NaNFalseNaNNaN
163qwen/qwq-32b-previewQwen: QwQ 32B Preview1732754541QwQ-32B-Preview is an experimental research mo...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwendeepseek-r10.000000090.00000027000032768.0NaNFalseNaNNaN
164google/learnlm-1.5-pro-experimental:freeGoogle: LearnLM 1.5 Pro Experimental (free)1732216551An experimental version of [Gemini 1.5 Pro](/g...40960None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone00000040960.08192.0FalseNaNNaN
165eva-unit-01/eva-qwen-2.5-72bEVA Qwen2.5 72B1732210606EVA Qwen2.5 72B is a roleplay and storywriting...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000040.000006000016384.04096.0FalseNaNNaN
166openai/gpt-4o-2024-11-20OpenAI: GPT-4o (2024-11-20)1732127594The 2024-11-20 version of GPT-4o offers a leve...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
167mistralai/mistral-large-2411Mistral Large 24111731978685Mistral Large 2 2411 is an update of [Mistral ...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000131072.0NaNFalseNaNNaN
168mistralai/mistral-large-2407Mistral Large 24071731978415This is Mistral AI's flagship model, Mistral L...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000131072.0NaNFalseNaNNaN
169mistralai/pixtral-large-2411Mistral: Pixtral Large 24111731977388Pixtral Large is a 124B parameter, open-weight...131072None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone0.0000020.00000600.00288800131072.0NaNFalseNaNNaN
170x-ai/grok-vision-betaxAI: Grok Vision Beta1731976624Grok Vision Beta is xAI's experimental languag...8192None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GrokNone0.0000050.00001500.009008192.0NaNFalseNaNNaN
171infermatic/mn-inferor-12bInfermatic: Mistral Nemo Inferor 12B1731464428Inferor 12B is a merge of top roleplay models,...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.00000080.0000012000016384.04096.0FalseNaNNaN
172qwen/qwen-2.5-coder-32b-instruct:freeQwen2.5 Coder 32B Instruct (free)1731368400Qwen2.5-Coder is the latest series of Code-Spe...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.0NaNFalseNaNNaN
173qwen/qwen-2.5-coder-32b-instructQwen2.5 Coder 32B Instruct1731368400Qwen2.5-Coder is the latest series of Code-Spe...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000060.00000015000032768.016384.0FalseNaNNaN
174raifle/sorcererlm-8x22bSorcererLM 8x22B1731105083SorcererLM is an advanced RP and storytelling ...16000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralvicuna0.00000450.0000045000016000.0NaNFalseNaNNaN
175eva-unit-01/eva-qwen-2.5-32bEVA Qwen2.5 32B1731104847EVA Qwen2.5 32B is a roleplaying/storywriting ...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000260.0000034000016384.04096.0FalseNaNNaN
176thedrummer/unslopnemo-12bUnslopnemo 12B1731103448UnslopNemo v4.1 is the latest addition from th...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.000000450.00000045000032000.016000.0FalseNaNNaN
177anthropic/claude-3.5-haiku:betaAnthropic: Claude 3.5 Haiku (self-moderated)1730678400Claude 3.5 Haiku features offers enhanced capa...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0False0.000000080.000001
178anthropic/claude-3.5-haikuAnthropic: Claude 3.5 Haiku1730678400Claude 3.5 Haiku features offers enhanced capa...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0True0.000000080.000001
179anthropic/claude-3.5-haiku-20241022:betaAnthropic: Claude 3.5 Haiku (2024-10-22) (self...1730678400Claude 3.5 Haiku features enhancements across ...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0False0.000000080.000001
180anthropic/claude-3.5-haiku-20241022Anthropic: Claude 3.5 Haiku (2024-10-22)1730678400Claude 3.5 Haiku features enhancements across ...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0True0.000000080.000001
181neversleep/llama-3.1-lumimaid-70bNeverSleep: Lumimaid v0.2 70B1729555200Lumimaid v0.2 70B is a finetune of [Llama 3.1 ...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000150.00000225000016384.02048.0FalseNaNNaN
182anthracite-org/magnum-v4-72bMagnum v4 72B1729555200This is a series of models designed to replica...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000150.00000225000016384.01024.0FalseNaNNaN
183anthropic/claude-3.5-sonnet:betaAnthropic: Claude 3.5 Sonnet (self-moderated)1729555200New Claude 3.5 Sonnet delivers better-than-Opu...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0False0.00000030.00000375
184anthropic/claude-3.5-sonnetAnthropic: Claude 3.5 Sonnet1729555200New Claude 3.5 Sonnet delivers better-than-Opu...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0True0.00000030.00000375
185x-ai/grok-betaxAI: Grok Beta1729382400Grok Beta is xAI's experimental language model...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000050.0000150000131072.0NaNFalseNaNNaN
186mistralai/ministral-8bMistral: Ministral 8B1729123200Ministral 8B is an 8B parameter model featurin...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000010.00000010000128000.0NaNFalseNaNNaN
187mistralai/ministral-3bMistral: Ministral 3B1729123200Ministral 3B is a 3B parameter model optimized...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000040.000000040000131072.0NaNFalseNaNNaN
188qwen/qwen-2.5-7b-instruct:freeQwen2.5 7B Instruct (free)1729036800Qwen2.5 7B is the latest series of Qwen large ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.032768.0FalseNaNNaN
189qwen/qwen-2.5-7b-instructQwen2.5 7B Instruct1729036800Qwen2.5 7B is the latest series of Qwen large ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000050.0000001000032768.016384.0FalseNaNNaN
190nvidia/llama-3.1-nemotron-70b-instructNVIDIA: Llama 3.1 Nemotron 70B Instruct1728950400NVIDIA's Llama 3.1 Nemotron 70B is a language ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000120.00000030000131072.0131072.0FalseNaNNaN
191inflection/inflection-3-productivityInflection: Inflection 3 Productivity1728604800Inflection 3 Productivity is optimized for fol...8000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000250.0000100008000.01024.0FalseNaNNaN
192inflection/inflection-3-piInflection: Inflection 3 Pi1728604800Inflection 3 Pi powers Inflection's [Pi](https...8000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000250.0000100008000.01024.0FalseNaNNaN
193google/gemini-flash-1.5-8bGoogle: Gemini 1.5 Flash 8B1727913600Gemini Flash 1.5 8B is optimized for speed and...1000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.00000003750.0000001500001000000.08192.0False0.000000010.0000000583
194thedrummer/rocinante-12bRocinante 12B1727654400Rocinante 12B is designed for engaging storyte...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000250.0000005000032768.0NaNFalseNaNNaN
195anthracite-org/magnum-v2-72bMagnum v2 72B1727654400From the maker of [Goliath](https://openrouter...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000030.000003000032768.0NaNFalseNaNNaN
196liquid/lfm-40bLiquid: LFM 40B MoE1727654400Liquid's 40.3B Mixture of Experts (MoE) model....32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000150.00000015000032768.0NaNFalseNaNNaN
197meta-llama/llama-3.2-3b-instruct:freeMeta: Llama 3.2 3B Instruct (free)1727222400Llama 3.2 3B is a 3-billion-parameter multilin...20000None[max_tokens, temperature, top_p]text->text[text][text]Llama3llama300000020000.020000.0FalseNaNNaN
198meta-llama/llama-3.2-3b-instructMeta: Llama 3.2 3B Instruct1727222400Llama 3.2 3B is a 3-billion-parameter multilin...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000010.000000020000131072.016384.0FalseNaNNaN
199meta-llama/llama-3.2-1b-instruct:freeMeta: Llama 3.2 1B Instruct (free)1727222400Llama 3.2 1B is a 1-billion-parameter language...131000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama3000000131000.0NaNFalseNaNNaN
200meta-llama/llama-3.2-1b-instructMeta: Llama 3.2 1B Instruct1727222400Llama 3.2 1B is a 1-billion-parameter language...131072None[max_tokens, temperature, top_p, top_k, stop, ...text->text[text][text]Llama3llama30.0000000050.000000010000131072.0NaNFalseNaNNaN
201meta-llama/llama-3.2-90b-vision-instructMeta: Llama 3.2 90B Vision Instruct1727222400The Llama 90B Vision model is a top-tier, 90-b...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama30.00000120.000001200.00173400131072.02048.0FalseNaNNaN
202meta-llama/llama-3.2-11b-vision-instruct:freeMeta: Llama 3.2 11B Vision Instruct (free)1727222400Llama 3.2 11B Vision is a multimodal model wit...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama3000000131072.02048.0FalseNaNNaN
203meta-llama/llama-3.2-11b-vision-instructMeta: Llama 3.2 11B Vision Instruct1727222400Llama 3.2 11B Vision is a multimodal model wit...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama30.0000000490.00000004900.0000794800131072.016384.0FalseNaNNaN
204qwen/qwen-2.5-72b-instruct:freeQwen2.5 72B Instruct (free)1726704000Qwen2.5 72B is the latest series of Qwen large...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.0NaNFalseNaNNaN
205qwen/qwen-2.5-72b-instructQwen2.5 72B Instruct1726704000Qwen2.5 72B is the latest series of Qwen large...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Qwenchatml0.000000120.00000039000032768.016384.0FalseNaNNaN
206qwen/qwen-2.5-vl-72b-instructQwen: Qwen2.5-VL 72B Instruct1726617600Qwen2.5 VL 72B is a multimodal LLM from the Qw...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000060.000000600.0005780032768.0NaNFalseNaNNaN
207neversleep/llama-3.1-lumimaid-8bNeverSleep: Lumimaid v0.2 8B1726358400Lumimaid v0.2 8B is a finetune of [Llama 3.1 8...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000032768.02048.0FalseNaNNaN
208openai/o1-previewOpenAI: o1-preview1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.0000150.000060000128000.032768.0True0.0000075NaN
209openai/o1-preview-2024-09-12OpenAI: o1-preview (2024-09-12)1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.0000150.000060000128000.032768.0True0.0000075NaN
210openai/o1-miniOpenAI: o1-mini1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.00000110.00000440000128000.065536.0True0.00000055NaN
211openai/o1-mini-2024-09-12OpenAI: o1-mini (2024-09-12)1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.00000110.00000440000128000.065536.0True0.00000055NaN
212mistralai/pixtral-12bMistral: Pixtral 12B1725926400The first multi-modal, text+image-to-text mode...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]MistralNone0.00000010.000000100.00014450032768.0NaNFalseNaNNaN
213cohere/command-r-plus-08-2024Cohere: Command R+ (08-2024)1724976000command-r-plus-08-2024 is an update of the [Co...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000250.000010000128000.04000.0FalseNaNNaN
214cohere/command-r-08-2024Cohere: Command R (08-2024)1724976000command-r-08-2024 is an update of the [Command...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.000000150.00000060000128000.04000.0FalseNaNNaN
215qwen/qwen-2.5-vl-7b-instruct:freeQwen: Qwen2.5-VL 7B Instruct (free)1724803200Qwen2.5 VL 7B is a multimodal LLM from the Qwe...64000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone00000064000.064000.0FalseNaNNaN
216qwen/qwen-2.5-vl-7b-instructQwen: Qwen2.5-VL 7B Instruct1724803200Qwen2.5 VL 7B is a multimodal LLM from the Qwe...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000020.000000200.00014450032768.0NaNFalseNaNNaN
217sao10k/l3.1-euryale-70bSao10K: Llama 3.1 Euryale 70B v2.21724803200Euryale L3.1 70B v2.2 is a model focused on cr...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000070.00000080000131072.016384.0FalseNaNNaN
218google/gemini-flash-1.5-8b-expGoogle: Gemini 1.5 Flash 8B Experimental1724803200Gemini Flash 1.5 8B Experimental is an experim...1000000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GeminiNone0000001000000.08192.0FalseNaNNaN
219microsoft/phi-3.5-mini-128k-instructMicrosoft: Phi-3.5 Mini 128K Instruct1724198400Phi-3.5 models are lightweight, state-of-the-a...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.000000030.000000090000131072.0NaNFalseNaNNaN
220nousresearch/hermes-3-llama-3.1-70bNous: Hermes 3 70B Instruct1723939200Hermes 3 is a generalist language model with m...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.000000120.00000030000131072.0131072.0FalseNaNNaN
221nousresearch/hermes-3-llama-3.1-405bNous: Hermes 3 405B Instruct1723766400Hermes 3 is a generalist language model with m...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.00000080.00000080000131072.0131072.0FalseNaNNaN
222openai/chatgpt-4o-latestOpenAI: ChatGPT-4o1723593600OpenAI ChatGPT 4o is continually updated by Op...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GPTNone0.0000050.00001500.00722500128000.016384.0TrueNaNNaN
223sao10k/l3-lunaris-8bSao10K: Llama 3 8B Lunaris1723507200Lunaris 8B is a versatile generalist and rolep...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000020.0000000500008192.0NaNFalseNaNNaN
224aetherwiing/mn-starcannon-12bAetherwiing: Starcannon 12B1723507200Starcannon 12B v2 is a creative roleplay and s...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000080.0000012000016384.04096.0FalseNaNNaN
225openai/gpt-4o-2024-08-06OpenAI: GPT-4o (2024-08-06)1722902400The 2024-08-06 version of GPT-4o offers improv...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
226meta-llama/llama-3.1-405b:freeMeta: Llama 3.1 405B (base) (free)1722556800Meta's latest class of model (Llama 3.1) launc...64000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none00000064000.0NaNFalseNaNNaN
227meta-llama/llama-3.1-405bMeta: Llama 3.1 405B (base)1722556800Meta's latest class of model (Llama 3.1) launc...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.0000020.000002000032768.0NaNFalseNaNNaN
228nothingiisreal/mn-celeste-12bMistral Nemo 12B Celeste1722556800A specialized story writing and roleplaying mo...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000080.0000012000016384.04096.0FalseNaNNaN
229perplexity/llama-3.1-sonar-small-128k-onlinePerplexity: Llama 3.1 Sonar 8B Online1722470400Llama 3.1 Sonar is Perplexity's latest model f...127072None[max_tokens, temperature, top_p, top_k, freque...text->text[text][text]Llama3None0.00000020.00000020.005000127072.0NaNFalseNaNNaN
230perplexity/llama-3.1-sonar-large-128k-onlinePerplexity: Llama 3.1 Sonar 70B Online1722470400Llama 3.1 Sonar is Perplexity's latest model f...127072None[max_tokens, temperature, top_p, top_k, freque...text->text[text][text]Llama3None0.0000010.0000010.005000127072.0NaNFalseNaNNaN
231meta-llama/llama-3.1-8b-instruct:freeMeta: Llama 3.1 8B Instruct (free)1721692800Meta's latest class of model (Llama 3.1) launc...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama3000000131072.04096.0FalseNaNNaN
232meta-llama/llama-3.1-8b-instructMeta: Llama 3.1 8B Instruct1721692800Meta's latest class of model (Llama 3.1) launc...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000020.00000003000016384.016384.0FalseNaNNaN
233meta-llama/llama-3.1-405b-instructMeta: Llama 3.1 405B Instruct1721692800The highly anticipated 400B class of Llama3 is...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Llama3llama30.00000080.0000008000032768.016384.0FalseNaNNaN
234meta-llama/llama-3.1-70b-instructMeta: Llama 3.1 70B Instruct1721692800Meta's latest class of model (Llama 3.1) launc...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Llama3llama30.00000010.000000280000131072.016384.0FalseNaNNaN
235mistralai/codestral-mambaMistral: Codestral Mamba1721347200A 7.3B parameter Mamba-based model designed fo...262144None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000250.000000250000262144.0NaNFalseNaNNaN
236mistralai/mistral-nemo:freeMistral: Mistral Nemo (free)1721347200A 12B parameter model with a 128k token contex...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral000000128000.0128000.0FalseNaNNaN
237mistralai/mistral-nemoMistral: Mistral Nemo1721347200A 12B parameter model with a 128k token contex...98304None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.000000030.00000007000098304.049152.0FalseNaNNaN
238openai/gpt-4o-miniOpenAI: GPT-4o-mini1721260800GPT-4o mini is OpenAI's newest model after [GP...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image, file][text]GPTNone0.000000150.000000600.00021700128000.016384.0True0.000000075NaN
239openai/gpt-4o-mini-2024-07-18OpenAI: GPT-4o-mini (2024-07-18)1721260800GPT-4o mini is OpenAI's newest model after [GP...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.000000150.000000600.00722500128000.016384.0True0.000000075NaN
240google/gemma-2-27b-itGoogle: Gemma 2 27B1720828800Gemma 2 27B by Google is an open model built f...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0.00000010.000000300008192.0NaNFalseNaNNaN
241alpindale/magnum-72bMagnum 72B1720656000From the maker of [Goliath](https://openrouter...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000040.000006000016384.04096.0FalseNaNNaN
242google/gemma-2-9b-it:freeGoogle: Gemma 2 9B (free)1719532800Gemma 2 9B by Google is an advanced, open-sour...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0000008192.08192.0FalseNaNNaN
243google/gemma-2-9b-itGoogle: Gemma 2 9B1719532800Gemma 2 9B by Google is an advanced, open-sour...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0.000000020.0000000600008192.0NaNFalseNaNNaN
24401-ai/yi-large01.AI: Yi Large1719273600The Yi Large model was designed by 01.AI with ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]YiNone0.0000030.000003000032768.04096.0FalseNaNNaN
245ai21/jamba-instructAI21: Jamba Instruct1719273600The Jamba-Instruct model, introduced by AI21 L...256000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000050.00000070000256000.04096.0FalseNaNNaN
246anthropic/claude-3.5-sonnet-20240620:betaAnthropic: Claude 3.5 Sonnet (2024-06-20) (sel...1718841600Claude 3.5 Sonnet delivers better-than-Opus ca...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0False0.00000030.00000375
247anthropic/claude-3.5-sonnet-20240620Anthropic: Claude 3.5 Sonnet (2024-06-20)1718841600Claude 3.5 Sonnet delivers better-than-Opus ca...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0True0.00000030.00000375
248sao10k/l3-euryale-70bSao10k: Llama 3 Euryale 70B v2.11718668800Euryale 70B v2.1 is a model focused on creativ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000001480.0000014800008192.08192.0FalseNaNNaN
249cognitivecomputations/dolphin-mixtral-8x22bDolphin 2.9.2 Mixtral 8x22B 🐬1717804800Dolphin 2.9 is designed for instruction follow...16000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000090.0000009000016000.0NaNFalseNaNNaN
250qwen/qwen-2-72b-instructQwen 2 72B Instruct1717718400Qwen2 72B is a transformer-based model that ex...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000090.0000009000032768.04096.0FalseNaNNaN
251mistralai/mistral-7b-instruct:freeMistral: Mistral 7B Instruct (free)1716768000A high-performing, industry-standard 7.3B para...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral00000032768.016384.0FalseNaNNaN
252mistralai/mistral-7b-instructMistral: Mistral 7B Instruct1716768000A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.0000000280.000000054000032768.016384.0FalseNaNNaN
253nousresearch/hermes-2-pro-llama-3-8bNousResearch: Hermes 2 Pro - Llama-3 8B1716768000Hermes 2 Pro is an upgraded, retrained version...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.0000000250.000000040000131072.0131072.0FalseNaNNaN
254mistralai/mistral-7b-instruct-v0.3Mistral: Mistral 7B Instruct v0.31716768000A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.0000000280.000000054000032768.016384.0FalseNaNNaN
255microsoft/phi-3-mini-128k-instructMicrosoft: Phi-3 Mini 128K Instruct1716681600Phi-3 Mini is a powerful 3.8B parameter model ...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.00000010.00000010000128000.0NaNFalseNaNNaN
256microsoft/phi-3-medium-128k-instructMicrosoft: Phi-3 Medium 128K Instruct1716508800Phi-3 128K Medium is a powerful 14-billion par...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.00000010.00000030000131072.0NaNFalseNaNNaN
257neversleep/llama-3-lumimaid-70bNeverSleep: Llama 3 Lumimaid 70B1715817600The NeverSleep team is back, with a Llama 3 70...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.0000040.00000600008192.04096.0FalseNaNNaN
258deepseek/deepseek-coderDeepSeek-Coder-V21715644800DeepSeek-Coder-V2, an open-source Mixture-of-E...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000040.000000120000128000.0NaNFalseNaNNaN
259google/gemini-flash-1.5Google: Gemini 1.5 Flash1715644800Gemini 1.5 Flash is a foundation model that pe...1000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.0000000750.000000300.00004001000000.08192.0False0.000000018750.0000001583
260openai/gpt-4oOpenAI: GPT-4o1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
261openai/gpt-4o:extendedOpenAI: GPT-4o (extended)1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.0000060.00001800.00722500128000.064000.0TrueNaNNaN
262meta-llama/llama-guard-2-8bMeta: LlamaGuard 2 8B1715558400This safeguard model has 8B parameters and is ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.00000020.000000200008192.0NaNFalseNaNNaN
263openai/gpt-4o-2024-05-13OpenAI: GPT-4o (2024-05-13)1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.0000050.00001500.00722500128000.04096.0TrueNaNNaN
264allenai/olmo-7b-instructOLMo 7B Instruct1715299200OLMo 7B Instruct by the Allen Institute for AI...2048None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherzephyr0.000000080.0000002400002048.0NaNFalseNaNNaN
265neversleep/llama-3-lumimaid-8b:extendedNeverSleep: Llama 3 Lumimaid 8B (extended)1714780800The NeverSleep team is back, with a Llama 3 8B...24576None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000024576.02048.0FalseNaNNaN
266neversleep/llama-3-lumimaid-8bNeverSleep: Llama 3 Lumimaid 8B1714780800The NeverSleep team is back, with a Llama 3 8B...24576None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000024576.02048.0FalseNaNNaN
267sao10k/fimbulvetr-11b-v2Fimbulvetr 11B v21713657600Creative writing model, routed with permission...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000080.000001200004096.04096.0FalseNaNNaN
268meta-llama/llama-3-8b-instructMeta: Llama 3 8B Instruct1713398400Meta's latest class of model (Llama 3) launche...8192None[max_tokens, temperature, top_p, top_k, seed, ...text->text[text][text]Llama3llama30.000000030.0000000600008192.016384.0FalseNaNNaN
269meta-llama/llama-3-70b-instructMeta: Llama 3 70B Instruct1713398400Meta's latest class of model (Llama 3) launche...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000030.000000400008192.016384.0FalseNaNNaN
270mistralai/mixtral-8x22b-instructMistral: Mixtral 8x22B Instruct1713312000Mistral's official instruct fine-tuned version...65536None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.00000040.0000012000065536.0NaNFalseNaNNaN
271microsoft/wizardlm-2-8x22bWizardLM-2 8x22B1713225600WizardLM-2 8x22B is Microsoft AI's most advanc...65536None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]Mistralvicuna0.00000050.0000005000065536.016384.0FalseNaNNaN
272google/gemini-pro-1.5Google: Gemini 1.5 Pro1712620800Google's latest multimodal model, supports ima...2000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.000001250.00000500.0006575002000000.08192.0FalseNaNNaN
273openai/gpt-4-turboOpenAI: GPT-4 Turbo1712620800The latest GPT-4 Turbo model with vision capab...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GPTNone0.000010.0000300.0144500128000.04096.0TrueNaNNaN
274cohere/command-r-plusCohere: Command R+1712188800Command R+ is a new, 104B-parameter LLM from C...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.0000030.0000150000128000.04000.0FalseNaNNaN
275cohere/command-r-plus-04-2024Cohere: Command R+ (04-2024)1712016000Command R+ is a new, 104B-parameter LLM from C...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.0000030.0000150000128000.04000.0FalseNaNNaN
276sophosympatheia/midnight-rose-70bMidnight Rose 70B1711065600A merge with a complex family tree, this model...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000080.000000800004096.0NaNFalseNaNNaN
277cohere/commandCohere: Command1710374400Command is an instruction-following conversati...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]CohereNone0.0000010.00000200004096.04000.0FalseNaNNaN
278cohere/command-rCohere: Command R1710374400Command-R is a 35B parameter model that perfor...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000050.00000150000128000.04000.0FalseNaNNaN
279anthropic/claude-3-haiku:betaAnthropic: Claude 3 Haiku (self-moderated)1710288000Claude 3 Haiku is Anthropic's fastest and most...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.000000250.0000012500.000400200000.04096.0False0.000000030.0000003
280anthropic/claude-3-haikuAnthropic: Claude 3 Haiku1710288000Claude 3 Haiku is Anthropic's fastest and most...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.000000250.0000012500.000400200000.04096.0True0.000000030.0000003
281anthropic/claude-3-opus:betaAnthropic: Claude 3 Opus (self-moderated)1709596800Claude 3 Opus is Anthropic's most powerful mod...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000150.00007500.02400200000.04096.0False0.00000150.00001875
282anthropic/claude-3-opusAnthropic: Claude 3 Opus1709596800Claude 3 Opus is Anthropic's most powerful mod...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000150.00007500.02400200000.04096.0True0.00000150.00001875
283anthropic/claude-3-sonnet:betaAnthropic: Claude 3 Sonnet (self-moderated)1709596800Claude 3 Sonnet is an ideal balance of intelli...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.04096.0False0.00000030.00000375
284anthropic/claude-3-sonnetAnthropic: Claude 3 Sonnet1709596800Claude 3 Sonnet is an ideal balance of intelli...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.04096.0True0.00000030.00000375
285cohere/command-r-03-2024Cohere: Command R (03-2024)1709341200Command-R is a 35B parameter model that perfor...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000050.00000150000128000.04000.0FalseNaNNaN
286mistralai/mistral-largeMistral Large1708905600This is Mistral AI's flagship model, Mistral L...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000128000.0NaNFalseNaNNaN
287openai/gpt-3.5-turbo-0613OpenAI: GPT-3.5 Turbo (older v0613)1706140800GPT-3.5 Turbo is OpenAI's fastest model. It ca...4095None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000010.00000200004095.04096.0FalseNaNNaN
288openai/gpt-4-turbo-previewOpenAI: GPT-4 Turbo Preview1706140800The preview GPT-4 model with improved instruct...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000010.000030000128000.04096.0TrueNaNNaN
289nousresearch/nous-hermes-2-mixtral-8x7b-dpoNous: Hermes 2 Mixtral 8x7B DPO1705363200Nous Hermes 2 Mixtral 8x7B DPO is the new flag...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000060.0000006000032768.02048.0FalseNaNNaN
290mistralai/mistral-mediumMistral Medium1704844800This is Mistral AI's closed-source, medium-sid...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000002750.0000081000032768.0NaNFalseNaNNaN
291mistralai/mistral-smallMistral Small1704844800With 22 billion parameters, Mistral Small v24....32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000020.0000006000032768.0NaNFalseNaNNaN
292mistralai/mistral-tinyMistral Tiny1704844800Note: This model is being deprecated. Recommen...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000250.00000025000032768.0NaNFalseNaNNaN
293mistralai/mistral-7b-instruct-v0.2Mistral: Mistral 7B Instruct v0.21703721600A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.00000020.0000002000032768.0NaNFalseNaNNaN
294mistralai/mixtral-8x7b-instructMistral: Mixtral 8x7B Instruct1702166400Mixtral 8x7B Instruct is a pretrained generati...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.000000080.00000024000032768.0NaNFalseNaNNaN
295neversleep/noromaid-20bNoromaid 20B1700956800A collab between IkariDev and Undi. This merge...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.000000750.000001500008192.02048.0FalseNaNNaN
296anthropic/claude-2.1:betaAnthropic: Claude v2.1 (self-moderated)1700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0FalseNaNNaN
297anthropic/claude-2.1Anthropic: Claude v2.11700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0TrueNaNNaN
298anthropic/claude-2:betaAnthropic: Claude v2 (self-moderated)1700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0FalseNaNNaN
299anthropic/claude-2Anthropic: Claude v21700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0TrueNaNNaN
300undi95/toppy-m-7bToppy M 7B1699574400A wild 7B parameter model that merges several ...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralalpaca0.00000080.000001200004096.04096.0FalseNaNNaN
301alpindale/goliath-120bGoliath 120B1699574400A large LLM created by combining two fine-tune...6144None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000656250.00000937500006144.0512.0FalseNaNNaN
302openrouter/autoAuto Router1699401600Your prompt will be processed by a meta-model ...2000000None[]text->text[text][text]RouterNone-1-1NaNNaNNaNNaNNaNNaNFalseNaNNaN
303openai/gpt-3.5-turbo-1106OpenAI: GPT-3.5 Turbo 16k (older v1106)1699228800An older GPT-3.5 Turbo model with improved ins...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000010.000002000016385.04096.0TrueNaNNaN
304openai/gpt-4-1106-previewOpenAI: GPT-4 Turbo (older v1106)1699228800The latest GPT-4 Turbo model with vision capab...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000010.000030000128000.04096.0TrueNaNNaN
305jondurbin/airoboros-l2-70bAiroboros 70B1698537600A Llama 2 70B fine-tune using synthetic data (...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000050.000000500004096.0NaNFalseNaNNaN
306openai/gpt-3.5-turbo-instructOpenAI: GPT-3.5 Turbo Instruct1695859200This model is a variant of GPT-3.5 Turbo tuned...4095None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]GPTchatml0.00000150.00000200004095.04096.0TrueNaNNaN
307mistralai/mistral-7b-instruct-v0.1Mistral: Mistral 7B Instruct v0.11695859200A 7.3B parameter model that outperforms Llama ...2824None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.000000110.0000001900002824.0NaNFalseNaNNaN
308pygmalionai/mythalion-13bPygmalion: Mythalion 13B1693612800A blend of the new Pygmalion-13b and MythoMax....8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000056250.00000112500008192.01024.0FalseNaNNaN
309openai/gpt-3.5-turbo-16kOpenAI: GPT-3.5 Turbo 16k1693180800This model offers four times the context lengt...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000030.000004000016385.04096.0TrueNaNNaN
310openai/gpt-4-32kOpenAI: GPT-4 32k1693180800GPT-4-32k is an extended version of GPT-4, wit...32767None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000060.00012000032767.04096.0TrueNaNNaN
311openai/gpt-4-32k-0314OpenAI: GPT-4 32k (older v0314)1693180800GPT-4-32k is an extended version of GPT-4, wit...32767None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000060.00012000032767.04096.0TrueNaNNaN
312mancer/weaverMancer: Weaver (alpha)1690934400An attempt to recreate Claude-style verbosity,...8000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.0000011250.00000112500008000.01000.0FalseNaNNaN
313anthropic/claude-2.0:betaAnthropic: Claude v2.0 (self-moderated)1690502400Anthropic's flagship model. Superior performan...100000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000100000.04096.0FalseNaNNaN
314anthropic/claude-2.0Anthropic: Claude v2.01690502400Anthropic's flagship model. Superior performan...100000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000100000.04096.0TrueNaNNaN
315undi95/remm-slerp-l2-13bReMM SLERP 13B1689984000A recreation trial of the original MythoMax-L2...6144None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000056250.00000112500006144.01024.0FalseNaNNaN
316gryphe/mythomax-l2-13bMythoMax 13B1688256000One of the highest performing and most popular...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.0000000650.00000006500004096.04096.0FalseNaNNaN
317meta-llama/llama-2-70b-chatMeta: Llama 2 70B Chat1687219200The flagship, 70 billion parameter language mo...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2llama20.00000090.000000900004096.0NaNFalseNaNNaN
318openai/gpt-3.5-turboOpenAI: GPT-3.5 Turbo1685232000GPT-3.5 Turbo is OpenAI's fastest model. It ca...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.00000050.0000015000016385.04096.0TrueNaNNaN
319openai/gpt-3.5-turbo-0125OpenAI: GPT-3.5 Turbo 16k1685232000The latest GPT-3.5 Turbo model with improved i...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.00000050.0000015000016385.04096.0TrueNaNNaN
320openai/gpt-4OpenAI: GPT-41685232000OpenAI's flagship model, GPT-4 is a large-scal...8191None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000030.0000600008191.04096.0TrueNaNNaN
321openai/gpt-4-0314OpenAI: GPT-4 (older v0314)1685232000GPT-4-0314 is the first version of GPT-4 relea...8191None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000030.0000600008191.04096.0TrueNaNNaN
\n", - "
" - ], - "text/plain": [ - " id name created description context_length per_request_limits supported_parameters architecture_modality architecture_input_modalities architecture_output_modalities architecture_tokenizer architecture_instruct_type pricing_prompt pricing_completion pricing_request pricing_image pricing_web_search pricing_internal_reasoning top_provider_context_length top_provider_max_completion_tokens top_provider_is_moderated pricing_input_cache_read pricing_input_cache_write\n", - "0 mistralai/mistral-medium-3 Mistral: Mistral Medium 3 1746627341 Mistral Medium 3 is a high-performance enterpr... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.0000004 0.000002 0 0 0 0 131072.0 NaN False NaN NaN\n", - "1 google/gemini-2.5-pro-preview Google: Gemini 2.5 Pro Preview 1746578513 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0.00000125 0.00001 0 0.00516 0 0 1048576.0 65535.0 False 0.00000031 0.000001625\n", - "2 arcee-ai/caller-large Arcee AI: Caller Large 1746487869 Caller Large is Arcee's specialist \"function‑c... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.00000055 0.00000085 0 0 0 0 32768.0 NaN False NaN NaN\n", - "3 arcee-ai/spotlight Arcee AI: Spotlight 1746481552 Spotlight is a 7‑billion‑parameter vision‑lang... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000018 0.00000018 0 0 0 0 131072.0 65537.0 False NaN NaN\n", - "4 arcee-ai/maestro-reasoning Arcee AI: Maestro Reasoning 1746481269 Maestro Reasoning is Arcee's flagship analysis... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000009 0.0000033 0 0 0 0 131072.0 32000.0 False NaN NaN\n", - "5 arcee-ai/virtuoso-large Arcee AI: Virtuoso Large 1746478885 Virtuoso‑Large is Arcee's top‑tier general‑pur... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000075 0.0000012 0 0 0 0 131072.0 64000.0 False NaN NaN\n", - "6 arcee-ai/coder-large Arcee AI: Coder Large 1746478663 Coder‑Large is a 32 B‑parameter offspring of Q... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 NaN False NaN NaN\n", - "7 arcee-ai/virtuoso-medium-v2 Arcee AI: Virtuoso Medium V2 1746478434 Virtuoso‑Medium‑v2 is a 32 B model distilled f... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", - "8 arcee-ai/arcee-blitz Arcee AI: Arcee Blitz 1746470100 Arcee Blitz is a 24 B‑parameter dense model di... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000045 0.00000075 0 0 0 0 32768.0 NaN False NaN NaN\n", - "9 microsoft/phi-4-reasoning-plus:free Microsoft: Phi 4 Reasoning Plus (free) 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "10 microsoft/phi-4-reasoning-plus Microsoft: Phi 4 Reasoning Plus 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.00000007 0.00000035 0 0 0 0 32768.0 NaN False NaN NaN\n", - "11 microsoft/phi-4-reasoning:free Microsoft: Phi 4 Reasoning (free) 1746121275 Phi-4-reasoning is a 14B parameter dense decod... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "12 qwen/qwen3-0.6b-04-28:free Qwen: Qwen3 0.6B (free) 1746043526 Qwen3-0.6B is a lightweight, 0.6 billion param... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "13 inception/mercury-coder-small-beta Inception: Mercury Coder Small Beta 1746033880 Mercury Coder Small is the first diffusion lar... 32000 None [max_tokens, frequency_penalty, presence_penal... text->text [text] [text] Other None 0.00000025 0.000001 0 0 0 0 32000.0 NaN False NaN NaN\n", - "14 qwen/qwen3-1.7b:free Qwen: Qwen3 1.7B (free) 1746031388 Qwen3-1.7B is a compact, 1.7 billion parameter... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "15 qwen/qwen3-4b:free Qwen: Qwen3 4B (free) 1746031104 Qwen3-4B is a 4 billion parameter dense langua... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 128000.0 NaN False NaN NaN\n", - "16 opengvlab/internvl3-14b:free OpenGVLab: InternVL3 14B (free) 1746021355 The 14b version of the InternVL3 series. An ad... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "17 opengvlab/internvl3-2b:free OpenGVLab: InternVL3 2B (free) 1746019807 The 2b version of the InternVL3 series, for an... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "18 deepseek/deepseek-prover-v2:free DeepSeek: DeepSeek Prover V2 (free) 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "19 deepseek/deepseek-prover-v2 DeepSeek: DeepSeek Prover V2 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.0000005 0.00000218 0 0 0 0 131072.0 NaN False NaN NaN\n", - "20 meta-llama/llama-guard-4-12b Meta: Llama Guard 4 12B 1745975193 Llama Guard 4 is a Llama 4 Scout-derived multi... 163840 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000005 0.00000005 0 0 0 0 163840.0 NaN False NaN NaN\n", - "21 qwen/qwen3-30b-a3b:free Qwen: Qwen3 30B A3B (free) 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", - "22 qwen/qwen3-30b-a3b Qwen: Qwen3 30B A3B 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 40960.0 False NaN NaN\n", - "23 qwen/qwen3-8b:free Qwen: Qwen3 8B (free) 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 40960.0 False NaN NaN\n", - "24 qwen/qwen3-8b Qwen: Qwen3 8B 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.000000035 0.000000138 0 0 0 0 128000.0 NaN False NaN NaN\n", - "25 qwen/qwen3-14b:free Qwen: Qwen3 14B (free) 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", - "26 qwen/qwen3-14b Qwen: Qwen3 14B 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000007 0.00000024 0 0 0 0 40960.0 40960.0 False NaN NaN\n", - "27 qwen/qwen3-32b:free Qwen: Qwen3 32B (free) 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", - "28 qwen/qwen3-32b Qwen: Qwen3 32B 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 NaN False NaN NaN\n", - "29 qwen/qwen3-235b-a22b:free Qwen: Qwen3 235B A22B (free) 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", - "30 qwen/qwen3-235b-a22b Qwen: Qwen3 235B A22B 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000014 0.000002 0 0 0 0 40960.0 40960.0 False NaN NaN\n", - "31 tngtech/deepseek-r1t-chimera:free TNG: DeepSeek R1T Chimera (free) 1745760875 DeepSeek-R1T-Chimera is created by merging Dee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "32 thudm/glm-z1-rumination-32b THUDM: GLM Z1 Rumination 32B 1745601495 THUDM: GLM Z1 Rumination 32B is a 32B-paramete... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", - "33 thudm/glm-z1-9b:free THUDM: GLM Z1 9B (free) 1745601140 GLM-Z1-9B-0414 is a 9B-parameter language mode... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "34 thudm/glm-4-9b:free THUDM: GLM 4 9B (free) 1745601023 GLM-4-9B-0414 is a 9 billion parameter languag... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "35 microsoft/mai-ds-r1:free Microsoft: MAI DS R1 (free) 1745194100 MAI-DS-R1 is a post-trained variant of DeepSee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "36 thudm/glm-z1-32b:free THUDM: GLM Z1 32B (free) 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "37 thudm/glm-z1-32b THUDM: GLM Z1 32B 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", - "38 thudm/glm-4-32b:free THUDM: GLM 4 32B (free) 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "39 thudm/glm-4-32b THUDM: GLM 4 32B 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", - "40 google/gemini-2.5-flash-preview Google: Gemini 2.5 Flash Preview 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000006 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", - "41 google/gemini-2.5-flash-preview:thinking Google: Gemini 2.5 Flash Preview (thinking) 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000035 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", - "42 openai/o4-mini-high OpenAI: o4 Mini High 1744824212 OpenAI o4-mini-high is the same model as [o4-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", - "43 openai/o3 OpenAI: o3 1744823457 o3 is a well-rounded and powerful model across... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.00001 0.00004 0 0.00765 0 0 200000.0 100000.0 True 0.0000025 NaN\n", - "44 openai/o4-mini OpenAI: o4 Mini 1744820942 OpenAI o4-mini is a compact reasoning model in... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", - "45 shisa-ai/shisa-v2-llama3.3-70b:free Shisa AI: Shisa V2 Llama 3.3 70B (free) 1744754858 Shisa V2 Llama 3.3 70B is a bilingual Japanese... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "46 qwen/qwen2.5-coder-7b-instruct Qwen: Qwen2.5 Coder 7B Instruct 1744734887 Qwen2.5-Coder-7B-Instruct is a 7B parameter in... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen None 0.00000001 0.00000003 0 0 0 0 32768.0 NaN False NaN NaN\n", - "47 openai/gpt-4.1 OpenAI: GPT-4.1 1744651385 GPT-4.1 is a flagship large language model opt... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.000002 0.000008 0 0 0 0 1047576.0 32768.0 True 0.0000005 NaN\n", - "48 openai/gpt-4.1-mini OpenAI: GPT-4.1 Mini 1744651381 GPT-4.1 Mini is a mid-sized model delivering p... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000004 0.0000016 0 0 0 0 1047576.0 32768.0 True 0.0000001 NaN\n", - "49 openai/gpt-4.1-nano OpenAI: GPT-4.1 Nano 1744651369 For tasks that demand low latency, GPT‑4.1 nan... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000001 0.0000004 0 0 0 0 1047576.0 32768.0 True 0.000000025 NaN\n", - "50 eleutherai/llemma_7b EleutherAI: Llemma 7b 1744643225 Llemma 7B is a language model for mathematics.... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other code-llama 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "51 alfredpros/codellama-7b-instruct-solidity AlfredPros: CodeLLaMa 7B Instruct Solidity 1744641874 A finetuned 7 billion parameters Code LLaMA - ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "52 arliai/qwq-32b-arliai-rpr-v1:free ArliAI: QwQ 32B RpR v1 (free) 1744555982 QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "53 agentica-org/deepcoder-14b-preview:free Agentica: Deepcoder 14B Preview (free) 1744555395 DeepCoder-14B-Preview is a 14B parameter code ... 96000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 96000.0 NaN False NaN NaN\n", - "54 moonshotai/kimi-vl-a3b-thinking:free Moonshot AI: Kimi VL A3B Thinking (free) 1744304841 Kimi-VL is a lightweight Mixture-of-Experts vi... 131072 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [image, text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", - "55 x-ai/grok-3-mini-beta xAI: Grok 3 Mini Beta 1744240195 Grok 3 Mini is a lightweight, smaller thinking... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.0000003 0.0000005 0 0 0 0 131072.0 NaN False NaN NaN\n", - "56 x-ai/grok-3-beta xAI: Grok 3 Beta 1744240068 Grok 3 is the latest model from xAI. It's thei... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000003 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", - "57 nvidia/llama-3.3-nemotron-super-49b-v1:free NVIDIA: Llama 3.3 Nemotron Super 49B v1 (free) 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", - "58 nvidia/llama-3.3-nemotron-super-49b-v1 NVIDIA: Llama 3.3 Nemotron Super 49B v1 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000013 0.0000004 0 0 0 0 131072.0 NaN False NaN NaN\n", - "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free) 1744115059 Llama-3.1-Nemotron-Ultra-253B-v1 is a large la... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", - "60 meta-llama/llama-4-maverick:free Meta: Llama 4 Maverick (free) 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 256000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 256000.0 NaN False NaN NaN\n", - "61 meta-llama/llama-4-maverick Meta: Llama 4 Maverick 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 1048576 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000017 0.0000006 0 0.0006684 0 0 1048576.0 16384.0 False NaN NaN\n", - "62 meta-llama/llama-4-scout:free Meta: Llama 4 Scout (free) 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 512000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 512000.0 NaN False NaN NaN\n", - "63 meta-llama/llama-4-scout Meta: Llama 4 Scout 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 1048576 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Other None 0.00000008 0.0000003 0 0 0 0 1048576.0 1048576.0 False NaN NaN\n", - "64 all-hands/openhands-lm-32b-v0.1 OpenHands LM 32B V0.1 1743613013 OpenHands LM v0.1 is a 32B open-source coding ... 16384 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "65 mistral/ministral-8b Mistral: Ministral 8B 1743430021 Ministral 8B is a state-of-the-art language mo... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", - "66 deepseek/deepseek-v3-base:free DeepSeek: DeepSeek V3 Base (free) 1743272023 Note that this is a base model mostly meant fo... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "67 scb10x/llama3.1-typhoon2-8b-instruct Typhoon2 8B Instruct 1743196511 Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000018 0.00000018 0 0 0 0 8192.0 NaN False NaN NaN\n", - "68 scb10x/llama3.1-typhoon2-70b-instruct Typhoon2 70B Instruct 1743196170 Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000088 0.00000088 0 0 0 0 8192.0 NaN False NaN NaN\n", - "69 allenai/molmo-7b-d:free AllenAI: Molmo 7B D (free) 1743023247 Molmo is a family of open vision-language mode... 4096 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 4096.0 NaN False NaN NaN\n", - "70 bytedance-research/ui-tars-72b:free Bytedance: UI-TARS 72B (free) 1743020065 UI-TARS 72B is an open-source multimodal AI mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "71 qwen/qwen2.5-vl-3b-instruct:free Qwen: Qwen2.5 VL 3B Instruct (free) 1743014573 Qwen2.5 VL 3B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", - "72 google/gemini-2.5-pro-exp-03-25 Google: Gemini 2.5 Pro Experimental 1742922099 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1000000 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0 0 0 0 0 0 1000000.0 65535.0 False NaN NaN\n", - "73 qwen/qwen2.5-vl-32b-instruct:free Qwen: Qwen2.5 VL 32B Instruct (free) 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 8192 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", - "74 qwen/qwen2.5-vl-32b-instruct Qwen: Qwen2.5 VL 32B Instruct 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000009 0.0000009 0 0 0 0 128000.0 NaN False NaN NaN\n", - "75 deepseek/deepseek-chat-v3-0324:free DeepSeek: DeepSeek V3 0324 (free) 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "76 deepseek/deepseek-chat-v3-0324 DeepSeek: DeepSeek V3 0324 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] DeepSeek None 0.0000003 0.00000088 0 0 0 0 163840.0 NaN False NaN NaN\n", - "77 featherless/qwerky-72b:free Qwerky 72B (free) 1742481597 Qwerky-72B is a linear-attention RWKV variant ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 4096.0 False NaN NaN\n", - "78 openai/o1-pro OpenAI: o1-pro 1742423211 The o1 series of models are trained with reinf... 200000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] GPT None 0.00015 0.0006 0 0.21675 0 0 200000.0 100000.0 True NaN NaN\n", - "79 mistralai/mistral-small-3.1-24b-instruct:free Mistral: Mistral Small 3.1 24B (free) 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 96000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0 0 0 0 0 0 96000.0 96000.0 False NaN NaN\n", - "80 mistralai/mistral-small-3.1-24b-instruct Mistral: Mistral Small 3.1 24B 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 131072 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Mistral None 0.00000005 0.00000015 0 0 0 0 131072.0 NaN False NaN NaN\n", - "81 open-r1/olympiccoder-32b:free OlympicCoder 32B (free) 1742077228 OlympicCoder-32B is a high-performing open-sou... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "82 google/gemma-3-1b-it:free Google: Gemma 3 1B (free) 1741963556 Gemma 3 1B is the smallest of the new Gemma 3 ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 32768.0 8192.0 False NaN NaN\n", - "83 google/gemma-3-4b-it:free Google: Gemma 3 4B (free) 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", - "84 google/gemma-3-4b-it Google: Gemma 3 4B 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000002 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", - "85 ai21/jamba-1.6-large AI21: Jamba 1.6 Large 1741905173 AI21 Jamba Large 1.6 is a high-performance hyb... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.000002 0.000008 0 0 0 0 256000.0 4096.0 False NaN NaN\n", - "86 ai21/jamba-1.6-mini AI21: Jamba Mini 1.6 1741905171 AI21 Jamba Mini 1.6 is a hybrid foundation mod... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000002 0.0000004 0 0 0 0 256000.0 4096.0 False NaN NaN\n", - "87 google/gemma-3-12b-it:free Google: Gemma 3 12B (free) 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", - "88 google/gemma-3-12b-it Google: Gemma 3 12B 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000005 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", - "89 cohere/command-a Cohere: Command A 1741894342 Command A is an open-weights 111B parameter mo... 256000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 256000.0 8192.0 False NaN NaN\n", - "90 openai/gpt-4o-mini-search-preview OpenAI: GPT-4o-mini Search Preview 1741818122 GPT-4o mini Search Preview is a specialized mo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.00000015 0.0000006 0.0275 0.000217 0 0 128000.0 16384.0 True NaN NaN\n", - "91 openai/gpt-4o-search-preview OpenAI: GPT-4o Search Preview 1741817949 GPT-4o Search Previewis a specialized model fo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.0000025 0.00001 0.035 0.003613 0 0 128000.0 16384.0 True NaN NaN\n", - "92 rekaai/reka-flash-3:free Reka: Flash 3 (free) 1741812813 Reka Flash 3 is a general-purpose, instruction... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "93 google/gemma-3-27b-it:free Google: Gemma 3 27B (free) 1741756359 Gemma 3 introduces multimodality, supporting v... 96000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 96000.0 8192.0 False NaN NaN\n", - "94 google/gemma-3-27b-it Google: Gemma 3 27B 1741756359 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.0000001 0.0000002 0 0.0000256 0 0 131072.0 16384.0 False NaN NaN\n", - "95 thedrummer/anubis-pro-105b-v1 TheDrummer: Anubis Pro 105B V1 1741642290 Anubis Pro 105B v1 is an expanded and refined ... 131072 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000008 0.000001 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "96 thedrummer/skyfall-36b-v2 TheDrummer: Skyfall 36B V2 1741636566 Skyfall 36B v2 is an enhanced iteration of Mis... 32768 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 32768.0 False NaN NaN\n", - "97 microsoft/phi-4-multimodal-instruct Microsoft: Phi 4 Multimodal Instruct 1741396284 Phi-4 Multimodal Instruct is a versatile 5.6B ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000005 0.0000001 0 0.00017685 0 0 131072.0 NaN False NaN NaN\n", - "98 perplexity/sonar-reasoning-pro Perplexity: Sonar Reasoning Pro 1741313308 Note: Sonar Pro pricing includes Perplexity se... 128000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0 128000.0 NaN False NaN NaN\n", - "99 perplexity/sonar-pro Perplexity: Sonar Pro 1741312423 Note: Sonar Pro pricing includes Perplexity se... 200000 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000003 0.000015 0 0 0.005 0 200000.0 8000.0 False NaN NaN\n", - "100 perplexity/sonar-deep-research Perplexity: Sonar Deep Research 1741311246 Sonar Deep Research is a research-focused mode... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0.000003 128000.0 NaN False NaN NaN\n", - "101 deepseek/deepseek-r1-zero:free DeepSeek: DeepSeek R1 Zero (free) 1741297434 DeepSeek-R1-Zero is a model trained via large-... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "102 qwen/qwq-32b:free Qwen: QwQ 32B (free) 1741208814 QwQ is the reasoning model of the Qwen series.... 40000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0 0 0 0 0 0 40000.0 40000.0 False NaN NaN\n", - "103 qwen/qwq-32b Qwen: QwQ 32B 1741208814 QwQ is the reasoning model of the Qwen series.... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0.00000015 0.0000002 0 0 0 0 131072.0 NaN False NaN NaN\n", - "104 moonshotai/moonlight-16b-a3b-instruct:free Moonshot AI: Moonlight 16B A3B Instruct (free) 1740719801 Moonlight-16B-A3B-Instruct is a 16B-parameter ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", - "105 nousresearch/deephermes-3-llama-3-8b-preview:free Nous: DeepHermes 3 Llama 3 8B Preview (free) 1740719372 DeepHermes 3 Preview is the latest version of ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", - "106 openai/gpt-4.5-preview OpenAI: GPT-4.5 (Preview) 1740687810 GPT-4.5 (Preview) is a research preview of Ope... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.000075 0.00015 0 0.108375 0 0 128000.0 16384.0 True 0.0000375 NaN\n", - "107 google/gemini-2.0-flash-lite-001 Google: Gemini 2.0 Flash Lite 1740506212 Gemini 2.0 Flash Lite offers a significantly f... 1048576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.000000075 0.0000003 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", - "108 anthropic/claude-3.7-sonnet Anthropic: Claude 3.7 Sonnet 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", - "109 anthropic/claude-3.7-sonnet:thinking Anthropic: Claude 3.7 Sonnet (thinking) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", - "110 anthropic/claude-3.7-sonnet:beta Anthropic: Claude 3.7 Sonnet (self-moderated) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [max_tokens, temperature, stop, reasoning, inc... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 128000.0 False 0.0000003 0.00000375\n", - "111 perplexity/r1-1776 Perplexity: R1 1776 1740004929 R1 1776 is a version of DeepSeek-R1 that has b... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.000002 0.000008 0 0 0 0 128000.0 NaN False NaN NaN\n", - "112 mistralai/mistral-saba Mistral: Saba 1739803239 Mistral Saba is a 24B-parameter language model... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", - "113 cognitivecomputations/dolphin3.0-r1-mistral-24... Dolphin3.0 R1 Mistral 24B (free) 1739462498 Dolphin 3.0 R1 is the next generation of the D... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "114 cognitivecomputations/dolphin3.0-mistral-24b:free Dolphin3.0 Mistral 24B (free) 1739462019 Dolphin 3.0 is the next generation of the Dolp... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "115 meta-llama/llama-guard-3-8b Llama Guard 3 8B 1739401318 Llama Guard 3 is a Llama-3.1-8B pretrained mod... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.00000002 0.00000006 0 0 0 0 131072.0 NaN False NaN NaN\n", - "116 openai/o3-mini-high OpenAI: o3 Mini High 1739372611 OpenAI o3-mini-high is the same model as [o3-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", - "117 deepseek/deepseek-r1-distill-llama-8b DeepSeek: R1 Distill Llama 8B 1738937718 DeepSeek R1 Distill Llama 8B is a distilled la... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.00000004 0.00000004 0 0 0 0 32000.0 32000.0 False NaN NaN\n", - "118 google/gemini-2.0-flash-001 Google: Gemini 2.0 Flash 1738769413 Gemini Flash 2.0 offers a significantly faster... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.0000001 0.0000004 0 0.0000258 0 0 1000000.0 8192.0 False 0.000000025 0.0000001833\n", - "119 qwen/qwen-vl-plus Qwen: Qwen VL Plus 1738731255 Qwen's Enhanced Large Visual Language Model. S... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.00000021 0.00000063 0 0.0002688 0 0 7500.0 1500.0 False NaN NaN\n", - "120 aion-labs/aion-1.0 AionLabs: Aion-1.0 1738697557 Aion-1.0 is a multi-model system designed for ... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.000004 0.000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", - "121 aion-labs/aion-1.0-mini AionLabs: Aion-1.0-Mini 1738697107 Aion-1.0-Mini 32B parameter model is a distill... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.0000007 0.0000014 0 0 0 0 131072.0 32768.0 False NaN NaN\n", - "122 aion-labs/aion-rp-llama-3.1-8b AionLabs: Aion-RP 1.0 (8B) 1738696718 Aion-RP-Llama-3.1-8B ranks the highest in the ... 32768 None [max_tokens, temperature, top_p] text->text [text] [text] Other None 0.0000002 0.0000002 0 0 0 0 32768.0 32768.0 False NaN NaN\n", - "123 qwen/qwen-vl-max Qwen: Qwen VL Max 1738434304 Qwen VL Max is a visual understanding model wi... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.0000008 0.0000032 0 0.001024 0 0 7500.0 1500.0 False NaN NaN\n", - "124 qwen/qwen-turbo Qwen: Qwen-Turbo 1738410974 Qwen-Turbo, based on Qwen2.5, is a 1M context ... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.00000005 0.0000002 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", - "125 qwen/qwen2.5-vl-72b-instruct:free Qwen: Qwen2.5 VL 72B Instruct (free) 1738410311 Qwen2.5-VL is proficient in recognizing common... 131072 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", - "126 qwen/qwen2.5-vl-72b-instruct Qwen: Qwen2.5 VL 72B Instruct 1738410311 Qwen2.5-VL is proficient in recognizing common... 32000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.00000025 0.00000075 0 0 0 0 32000.0 NaN False NaN NaN\n", - "127 qwen/qwen-plus Qwen: Qwen-Plus 1738409840 Qwen-Plus, based on the Qwen2.5 foundation mod... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000004 0.0000012 0 0 0 0 131072.0 8192.0 False NaN NaN\n", - "128 qwen/qwen-max Qwen: Qwen-Max 1738402289 Qwen-Max, based on Qwen2.5, provides the best ... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000016 0.0000064 0 0 0 0 32768.0 8192.0 False NaN NaN\n", - "129 openai/o3-mini OpenAI: o3 Mini 1738351721 OpenAI o3-mini is a cost-efficient language mo... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", - "130 deepseek/deepseek-r1-distill-qwen-1.5b DeepSeek: R1 Distill Qwen 1.5B 1738328067 DeepSeek R1 Distill Qwen 1.5B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000018 0.00000018 0 0 0 0 131072.0 32768.0 False NaN NaN\n", - "131 mistralai/mistral-small-24b-instruct-2501:free Mistral: Mistral Small 3 (free) 1738255409 Mistral Small 3 is a 24B-parameter language mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "132 mistralai/mistral-small-24b-instruct-2501 Mistral: Mistral Small 3 1738255409 Mistral Small 3 is a 24B-parameter language mo... 28000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0.00000006 0.00000012 0 0 0 0 28000.0 14000.0 False NaN NaN\n", - "133 deepseek/deepseek-r1-distill-qwen-32b:free DeepSeek: R1 Distill Qwen 32B (free) 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 16000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16000.0 16000.0 False NaN NaN\n", - "134 deepseek/deepseek-r1-distill-qwen-32b DeepSeek: R1 Distill Qwen 32B 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000012 0.00000018 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "135 deepseek/deepseek-r1-distill-qwen-14b:free DeepSeek: R1 Distill Qwen 14B (free) 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", - "136 deepseek/deepseek-r1-distill-qwen-14b DeepSeek: R1 Distill Qwen 14B 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000015 0.00000015 0 0 0 0 64000.0 64000.0 False NaN NaN\n", - "137 perplexity/sonar-reasoning Perplexity: Sonar Reasoning 1738131107 Sonar Reasoning is a reasoning model provided ... 127000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000001 0.000005 0.005 0 0 0 127000.0 NaN False NaN NaN\n", - "138 perplexity/sonar Perplexity: Sonar 1738013808 Sonar is lightweight, affordable, fast, and si... 127072 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", - "139 liquid/lfm-7b Liquid: LFM 7B 1737806883 LFM-7B, a new best-in-class language model. LF... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000001 0.00000001 0 0 0 0 32768.0 NaN False NaN NaN\n", - "140 liquid/lfm-3b Liquid: LFM 3B 1737806501 Liquid's LFM 3B delivers incredible performanc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000002 0.00000002 0 0 0 0 32768.0 NaN False NaN NaN\n", - "141 deepseek/deepseek-r1-distill-llama-70b:free DeepSeek: R1 Distill Llama 70B (free) 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 8192 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0 0 0 0 0 0 8192.0 4096.0 False NaN NaN\n", - "142 deepseek/deepseek-r1-distill-llama-70b DeepSeek: R1 Distill Llama 70B 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.0000001 0.0000004 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "143 deepseek/deepseek-r1:free DeepSeek: R1 (free) 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, reasoning, include_reasoning, tem... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "144 deepseek/deepseek-r1 DeepSeek: R1 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.0000005 0.00000218 0 0 0 0 163840.0 163840.0 False NaN NaN\n", - "145 minimax/minimax-01 MiniMax: MiniMax-01 1736915462 MiniMax-01 is a combines MiniMax-Text-01 for t... 1000192 None [max_tokens, temperature, top_p] text+image->text [text, image] [text] Other None 0.0000002 0.0000011 0 0 0 0 1000192.0 1000192.0 False NaN NaN\n", - "146 mistralai/codestral-2501 Mistral: Codestral 2501 1736895522 [Mistral](/mistralai)'s cutting-edge language ... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000003 0.0000009 0 0 0 0 262144.0 NaN False NaN NaN\n", - "147 microsoft/phi-4 Microsoft: Phi 4 1736489872 [Microsoft Research](/microsoft) Phi-4 is desi... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000007 0.00000014 0 0 0 0 16384.0 16384.0 False NaN NaN\n", - "148 deepseek/deepseek-chat:free DeepSeek: DeepSeek V3 (free) 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "149 deepseek/deepseek-chat DeepSeek: DeepSeek V3 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.00000038 0.00000089 0 0 0 0 163840.0 163840.0 False NaN NaN\n", - "150 sao10k/l3.3-euryale-70b Sao10K: Llama 3.3 Euryale 70B 1734535928 Euryale L3.3 70B is a model focused on creativ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "151 openai/o1 OpenAI: o1 1734459999 The latest and strongest model family from Ope... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [text, image] [text] GPT None 0.000015 0.00006 0 0.021675 0 0 200000.0 100000.0 True 0.0000075 NaN\n", - "152 eva-unit-01/eva-llama-3.33-70b EVA Llama 3.33 70B 1734377303 EVA Llama 3.33 70b is a roleplay and storywrit... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "153 x-ai/grok-2-vision-1212 xAI: Grok 2 Vision 1212 1734237338 Grok 2 Vision 1212 advances image-based AI wit... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000002 0.00001 0 0.0036 0 0 32768.0 NaN False NaN NaN\n", - "154 x-ai/grok-2-1212 xAI: Grok 2 1212 1734232814 Grok 2 1212 introduces significant enhancement... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000002 0.00001 0 0 0 0 131072.0 NaN False NaN NaN\n", - "155 cohere/command-r7b-12-2024 Cohere: Command R7B (12-2024) 1734158152 Command R7B (12-2024) is a small, fast update ... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.0000000375 0.00000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "156 google/gemini-2.0-flash-exp:free Google: Gemini 2.0 Flash Experimental (free) 1733937523 Gemini Flash 2.0 offers a significantly faster... 1048576 None [max_tokens, temperature, top_p, stop] text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", - "157 meta-llama/llama-3.3-70b-instruct:free Meta: Llama 3.3 70B Instruct (free) 1733506137 The Meta Llama 3.3 multilingual large language... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 8000.0 8000.0 False NaN NaN\n", - "158 meta-llama/llama-3.3-70b-instruct Meta: Llama 3.3 70B Instruct 1733506137 The Meta Llama 3.3 multilingual large language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009 0.00000035 0 0 0 0 131000.0 131000.0 False NaN NaN\n", - "159 amazon/nova-lite-v1 Amazon: Nova Lite 1.0 1733437363 Amazon Nova Lite 1.0 is a very low-cost multim... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.00000006 0.00000024 0 0.00009 0 0 300000.0 5120.0 True NaN NaN\n", - "160 amazon/nova-micro-v1 Amazon: Nova Micro 1.0 1733437237 Amazon Nova Micro 1.0 is a text-only model tha... 128000 None [tools, max_tokens, temperature, top_p, top_k,... text->text [text] [text] Nova None 0.000000035 0.00000014 0 0 0 0 128000.0 5120.0 True NaN NaN\n", - "161 amazon/nova-pro-v1 Amazon: Nova Pro 1.0 1733436303 Amazon Nova Pro 1.0 is a capable multimodal mo... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.0000008 0.0000032 0 0.0012 0 0 300000.0 5120.0 True NaN NaN\n", - "162 qwen/qwq-32b-preview:free Qwen: QwQ 32B Preview (free) 1732754541 QwQ-32B-Preview is an experimental research mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16384.0 NaN False NaN NaN\n", - "163 qwen/qwq-32b-preview Qwen: QwQ 32B Preview 1732754541 QwQ-32B-Preview is an experimental research mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0.00000009 0.00000027 0 0 0 0 32768.0 NaN False NaN NaN\n", - "164 google/learnlm-1.5-pro-experimental:free Google: LearnLM 1.5 Pro Experimental (free) 1732216551 An experimental version of [Gemini 1.5 Pro](/g... 40960 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 40960.0 8192.0 False NaN NaN\n", - "165 eva-unit-01/eva-qwen-2.5-72b EVA Qwen2.5 72B 1732210606 EVA Qwen2.5 72B is a roleplay and storywriting... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "166 openai/gpt-4o-2024-11-20 OpenAI: GPT-4o (2024-11-20) 1732127594 The 2024-11-20 version of GPT-4o offers a leve... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", - "167 mistralai/mistral-large-2411 Mistral Large 2411 1731978685 Mistral Large 2 2411 is an update of [Mistral ... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", - "168 mistralai/mistral-large-2407 Mistral Large 2407 1731978415 This is Mistral AI's flagship model, Mistral L... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", - "169 mistralai/pixtral-large-2411 Mistral: Pixtral Large 2411 1731977388 Pixtral Large is a 124B parameter, open-weight... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.000002 0.000006 0 0.002888 0 0 131072.0 NaN False NaN NaN\n", - "170 x-ai/grok-vision-beta xAI: Grok Vision Beta 1731976624 Grok Vision Beta is xAI's experimental languag... 8192 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000005 0.000015 0 0.009 0 0 8192.0 NaN False NaN NaN\n", - "171 infermatic/mn-inferor-12b Infermatic: Mistral Nemo Inferor 12B 1731464428 Inferor 12B is a merge of top roleplay models,... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "172 qwen/qwen-2.5-coder-32b-instruct:free Qwen2.5 Coder 32B Instruct (free) 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "173 qwen/qwen-2.5-coder-32b-instruct Qwen2.5 Coder 32B Instruct 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000006 0.00000015 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "174 raifle/sorcererlm-8x22b SorcererLM 8x22B 1731105083 SorcererLM is an advanced RP and storytelling ... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral vicuna 0.0000045 0.0000045 0 0 0 0 16000.0 NaN False NaN NaN\n", - "175 eva-unit-01/eva-qwen-2.5-32b EVA Qwen2.5 32B 1731104847 EVA Qwen2.5 32B is a roleplaying/storywriting ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "176 thedrummer/unslopnemo-12b Unslopnemo 12B 1731103448 UnslopNemo v4.1 is the latest addition from th... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000045 0.00000045 0 0 0 0 32000.0 16000.0 False NaN NaN\n", - "177 anthropic/claude-3.5-haiku:beta Anthropic: Claude 3.5 Haiku (self-moderated) 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", - "178 anthropic/claude-3.5-haiku Anthropic: Claude 3.5 Haiku 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", - "179 anthropic/claude-3.5-haiku-20241022:beta Anthropic: Claude 3.5 Haiku (2024-10-22) (self... 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", - "180 anthropic/claude-3.5-haiku-20241022 Anthropic: Claude 3.5 Haiku (2024-10-22) 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", - "181 neversleep/llama-3.1-lumimaid-70b NeverSleep: Lumimaid v0.2 70B 1729555200 Lumimaid v0.2 70B is a finetune of [Llama 3.1 ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000015 0.00000225 0 0 0 0 16384.0 2048.0 False NaN NaN\n", - "182 anthracite-org/magnum-v4-72b Magnum v4 72B 1729555200 This is a series of models designed to replica... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000015 0.00000225 0 0 0 0 16384.0 1024.0 False NaN NaN\n", - "183 anthropic/claude-3.5-sonnet:beta Anthropic: Claude 3.5 Sonnet (self-moderated) 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", - "184 anthropic/claude-3.5-sonnet Anthropic: Claude 3.5 Sonnet 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", - "185 x-ai/grok-beta xAI: Grok Beta 1729382400 Grok Beta is xAI's experimental language model... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000005 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", - "186 mistralai/ministral-8b Mistral: Ministral 8B 1729123200 Ministral 8B is an 8B parameter model featurin... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", - "187 mistralai/ministral-3b Mistral: Ministral 3B 1729123200 Ministral 3B is a 3B parameter model optimized... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000004 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", - "188 qwen/qwen-2.5-7b-instruct:free Qwen2.5 7B Instruct (free) 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 32768.0 False NaN NaN\n", - "189 qwen/qwen-2.5-7b-instruct Qwen2.5 7B Instruct 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000005 0.0000001 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "190 nvidia/llama-3.1-nemotron-70b-instruct NVIDIA: Llama 3.1 Nemotron 70B Instruct 1728950400 NVIDIA's Llama 3.1 Nemotron 70B is a language ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "191 inflection/inflection-3-productivity Inflection: Inflection 3 Productivity 1728604800 Inflection 3 Productivity is optimized for fol... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", - "192 inflection/inflection-3-pi Inflection: Inflection 3 Pi 1728604800 Inflection 3 Pi powers Inflection's [Pi](https... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", - "193 google/gemini-flash-1.5-8b Google: Gemini 1.5 Flash 8B 1727913600 Gemini Flash 1.5 8B is optimized for speed and... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.0000000375 0.00000015 0 0 0 0 1000000.0 8192.0 False 0.00000001 0.0000000583\n", - "194 thedrummer/rocinante-12b Rocinante 12B 1727654400 Rocinante 12B is designed for engaging storyte... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000025 0.0000005 0 0 0 0 32768.0 NaN False NaN NaN\n", - "195 anthracite-org/magnum-v2-72b Magnum v2 72B 1727654400 From the maker of [Goliath](https://openrouter... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000003 0.000003 0 0 0 0 32768.0 NaN False NaN NaN\n", - "196 liquid/lfm-40b Liquid: LFM 40B MoE 1727654400 Liquid's 40.3B Mixture of Experts (MoE) model.... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000015 0.00000015 0 0 0 0 32768.0 NaN False NaN NaN\n", - "197 meta-llama/llama-3.2-3b-instruct:free Meta: Llama 3.2 3B Instruct (free) 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 20000 None [max_tokens, temperature, top_p] text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 20000.0 20000.0 False NaN NaN\n", - "198 meta-llama/llama-3.2-3b-instruct Meta: Llama 3.2 3B Instruct 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000001 0.00000002 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "199 meta-llama/llama-3.2-1b-instruct:free Meta: Llama 3.2 1B Instruct (free) 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131000.0 NaN False NaN NaN\n", - "200 meta-llama/llama-3.2-1b-instruct Meta: Llama 3.2 1B Instruct 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131072 None [max_tokens, temperature, top_p, top_k, stop, ... text->text [text] [text] Llama3 llama3 0.000000005 0.00000001 0 0 0 0 131072.0 NaN False NaN NaN\n", - "201 meta-llama/llama-3.2-90b-vision-instruct Meta: Llama 3.2 90B Vision Instruct 1727222400 The Llama 90B Vision model is a top-tier, 90-b... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.0000012 0.0000012 0 0.001734 0 0 131072.0 2048.0 False NaN NaN\n", - "202 meta-llama/llama-3.2-11b-vision-instruct:free Meta: Llama 3.2 11B Vision Instruct (free) 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", - "203 meta-llama/llama-3.2-11b-vision-instruct Meta: Llama 3.2 11B Vision Instruct 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.000000049 0.000000049 0 0.00007948 0 0 131072.0 16384.0 False NaN NaN\n", - "204 qwen/qwen-2.5-72b-instruct:free Qwen2.5 72B Instruct (free) 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "205 qwen/qwen-2.5-72b-instruct Qwen2.5 72B Instruct 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen chatml 0.00000012 0.00000039 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "206 qwen/qwen-2.5-vl-72b-instruct Qwen: Qwen2.5-VL 72B Instruct 1726617600 Qwen2.5 VL 72B is a multimodal LLM from the Qw... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000006 0.0000006 0 0.000578 0 0 32768.0 NaN False NaN NaN\n", - "207 neversleep/llama-3.1-lumimaid-8b NeverSleep: Lumimaid v0.2 8B 1726358400 Lumimaid v0.2 8B is a finetune of [Llama 3.1 8... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 32768.0 2048.0 False NaN NaN\n", - "208 openai/o1-preview OpenAI: o1-preview 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", - "209 openai/o1-preview-2024-09-12 OpenAI: o1-preview (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", - "210 openai/o1-mini OpenAI: o1-mini 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", - "211 openai/o1-mini-2024-09-12 OpenAI: o1-mini (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", - "212 mistralai/pixtral-12b Mistral: Pixtral 12B 1725926400 The first multi-modal, text+image-to-text mode... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Mistral None 0.0000001 0.0000001 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", - "213 cohere/command-r-plus-08-2024 Cohere: Command R+ (08-2024) 1724976000 command-r-plus-08-2024 is an update of the [Co... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000025 0.00001 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "214 cohere/command-r-08-2024 Cohere: Command R (08-2024) 1724976000 command-r-08-2024 is an update of the [Command... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.00000015 0.0000006 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "215 qwen/qwen-2.5-vl-7b-instruct:free Qwen: Qwen2.5-VL 7B Instruct (free) 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 64000.0 False NaN NaN\n", - "216 qwen/qwen-2.5-vl-7b-instruct Qwen: Qwen2.5-VL 7B Instruct 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000002 0.0000002 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", - "217 sao10k/l3.1-euryale-70b Sao10K: Llama 3.1 Euryale 70B v2.2 1724803200 Euryale L3.1 70B v2.2 is a model focused on cr... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "218 google/gemini-flash-1.5-8b-exp Google: Gemini 1.5 Flash 8B Experimental 1724803200 Gemini Flash 1.5 8B Experimental is an experim... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", - "219 microsoft/phi-3.5-mini-128k-instruct Microsoft: Phi-3.5 Mini 128K Instruct 1724198400 Phi-3.5 models are lightweight, state-of-the-a... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.00000003 0.00000009 0 0 0 0 131072.0 NaN False NaN NaN\n", - "220 nousresearch/hermes-3-llama-3.1-70b Nous: Hermes 3 70B Instruct 1723939200 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "221 nousresearch/hermes-3-llama-3.1-405b Nous: Hermes 3 405B Instruct 1723766400 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.0000008 0.0000008 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "222 openai/chatgpt-4o-latest OpenAI: ChatGPT-4o 1723593600 OpenAI ChatGPT 4o is continually updated by Op... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 16384.0 True NaN NaN\n", - "223 sao10k/l3-lunaris-8b Sao10K: Llama 3 8B Lunaris 1723507200 Lunaris 8B is a versatile generalist and rolep... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000005 0 0 0 0 8192.0 NaN False NaN NaN\n", - "224 aetherwiing/mn-starcannon-12b Aetherwiing: Starcannon 12B 1723507200 Starcannon 12B v2 is a creative roleplay and s... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "225 openai/gpt-4o-2024-08-06 OpenAI: GPT-4o (2024-08-06) 1722902400 The 2024-08-06 version of GPT-4o offers improv... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", - "226 meta-llama/llama-3.1-405b:free Meta: Llama 3.1 405B (base) (free) 1722556800 Meta's latest class of model (Llama 3.1) launc... 64000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", - "227 meta-llama/llama-3.1-405b Meta: Llama 3.1 405B (base) 1722556800 Meta's latest class of model (Llama 3.1) launc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.000002 0.000002 0 0 0 0 32768.0 NaN False NaN NaN\n", - "228 nothingiisreal/mn-celeste-12b Mistral Nemo 12B Celeste 1722556800 A specialized story writing and roleplaying mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "229 perplexity/llama-3.1-sonar-small-128k-online Perplexity: Llama 3.1 Sonar 8B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.0000002 0.0000002 0.005 0 0 0 127072.0 NaN False NaN NaN\n", - "230 perplexity/llama-3.1-sonar-large-128k-online Perplexity: Llama 3.1 Sonar 70B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", - "231 meta-llama/llama-3.1-8b-instruct:free Meta: Llama 3.1 8B Instruct (free) 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 4096.0 False NaN NaN\n", - "232 meta-llama/llama-3.1-8b-instruct Meta: Llama 3.1 8B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000003 0 0 0 0 16384.0 16384.0 False NaN NaN\n", - "233 meta-llama/llama-3.1-405b-instruct Meta: Llama 3.1 405B Instruct 1721692800 The highly anticipated 400B class of Llama3 is... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000008 0.0000008 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "234 meta-llama/llama-3.1-70b-instruct Meta: Llama 3.1 70B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000001 0.00000028 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "235 mistralai/codestral-mamba Mistral: Codestral Mamba 1721347200 A 7.3B parameter Mamba-based model designed fo... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 262144.0 NaN False NaN NaN\n", - "236 mistralai/mistral-nemo:free Mistral: Mistral Nemo (free) 1721347200 A 12B parameter model with a 128k token contex... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 128000.0 128000.0 False NaN NaN\n", - "237 mistralai/mistral-nemo Mistral: Mistral Nemo 1721347200 A 12B parameter model with a 128k token contex... 98304 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000003 0.00000007 0 0 0 0 98304.0 49152.0 False NaN NaN\n", - "238 openai/gpt-4o-mini OpenAI: GPT-4o-mini 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.000217 0 0 128000.0 16384.0 True 0.000000075 NaN\n", - "239 openai/gpt-4o-mini-2024-07-18 OpenAI: GPT-4o-mini (2024-07-18) 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.007225 0 0 128000.0 16384.0 True 0.000000075 NaN\n", - "240 google/gemma-2-27b-it Google: Gemma 2 27B 1720828800 Gemma 2 27B by Google is an open model built f... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.0000001 0.0000003 0 0 0 0 8192.0 NaN False NaN NaN\n", - "241 alpindale/magnum-72b Magnum 72B 1720656000 From the maker of [Goliath](https://openrouter... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "242 google/gemma-2-9b-it:free Google: Gemma 2 9B (free) 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0 0 0 0 0 0 8192.0 8192.0 False NaN NaN\n", - "243 google/gemma-2-9b-it Google: Gemma 2 9B 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.00000002 0.00000006 0 0 0 0 8192.0 NaN False NaN NaN\n", - "244 01-ai/yi-large 01.AI: Yi Large 1719273600 The Yi Large model was designed by 01.AI with ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Yi None 0.000003 0.000003 0 0 0 0 32768.0 4096.0 False NaN NaN\n", - "245 ai21/jamba-instruct AI21: Jamba Instruct 1719273600 The Jamba-Instruct model, introduced by AI21 L... 256000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000005 0.0000007 0 0 0 0 256000.0 4096.0 False NaN NaN\n", - "246 anthropic/claude-3.5-sonnet-20240620:beta Anthropic: Claude 3.5 Sonnet (2024-06-20) (sel... 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", - "247 anthropic/claude-3.5-sonnet-20240620 Anthropic: Claude 3.5 Sonnet (2024-06-20) 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", - "248 sao10k/l3-euryale-70b Sao10k: Llama 3 Euryale 70B v2.1 1718668800 Euryale 70B v2.1 is a model focused on creativ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000148 0.00000148 0 0 0 0 8192.0 8192.0 False NaN NaN\n", - "249 cognitivecomputations/dolphin-mixtral-8x22b Dolphin 2.9.2 Mixtral 8x22B 🐬 1717804800 Dolphin 2.9 is designed for instruction follow... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000009 0.0000009 0 0 0 0 16000.0 NaN False NaN NaN\n", - "250 qwen/qwen-2-72b-instruct Qwen 2 72B Instruct 1717718400 Qwen2 72B is a transformer-based model that ex... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000009 0.0000009 0 0 0 0 32768.0 4096.0 False NaN NaN\n", - "251 mistralai/mistral-7b-instruct:free Mistral: Mistral 7B Instruct (free) 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "252 mistralai/mistral-7b-instruct Mistral: Mistral 7B Instruct 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "253 nousresearch/hermes-2-pro-llama-3-8b NousResearch: Hermes 2 Pro - Llama-3 8B 1716768000 Hermes 2 Pro is an upgraded, retrained version... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.000000025 0.00000004 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "254 mistralai/mistral-7b-instruct-v0.3 Mistral: Mistral 7B Instruct v0.3 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "255 microsoft/phi-3-mini-128k-instruct Microsoft: Phi-3 Mini 128K Instruct 1716681600 Phi-3 Mini is a powerful 3.8B parameter model ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", - "256 microsoft/phi-3-medium-128k-instruct Microsoft: Phi-3 Medium 128K Instruct 1716508800 Phi-3 128K Medium is a powerful 14-billion par... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000003 0 0 0 0 131072.0 NaN False NaN NaN\n", - "257 neversleep/llama-3-lumimaid-70b NeverSleep: Llama 3 Lumimaid 70B 1715817600 The NeverSleep team is back, with a Llama 3 70... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 8192.0 4096.0 False NaN NaN\n", - "258 deepseek/deepseek-coder DeepSeek-Coder-V2 1715644800 DeepSeek-Coder-V2, an open-source Mixture-of-E... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000004 0.00000012 0 0 0 0 128000.0 NaN False NaN NaN\n", - "259 google/gemini-flash-1.5 Google: Gemini 1.5 Flash 1715644800 Gemini 1.5 Flash is a foundation model that pe... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.000000075 0.0000003 0 0.00004 0 0 1000000.0 8192.0 False 0.00000001875 0.0000001583\n", - "260 openai/gpt-4o OpenAI: GPT-4o 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", - "261 openai/gpt-4o:extended OpenAI: GPT-4o (extended) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000006 0.000018 0 0.007225 0 0 128000.0 64000.0 True NaN NaN\n", - "262 meta-llama/llama-guard-2-8b Meta: LlamaGuard 2 8B 1715558400 This safeguard model has 8B parameters and is ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.0000002 0.0000002 0 0 0 0 8192.0 NaN False NaN NaN\n", - "263 openai/gpt-4o-2024-05-13 OpenAI: GPT-4o (2024-05-13) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 4096.0 True NaN NaN\n", - "264 allenai/olmo-7b-instruct OLMo 7B Instruct 1715299200 OLMo 7B Instruct by the Allen Institute for AI... 2048 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other zephyr 0.00000008 0.00000024 0 0 0 0 2048.0 NaN False NaN NaN\n", - "265 neversleep/llama-3-lumimaid-8b:extended NeverSleep: Llama 3 Lumimaid 8B (extended) 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", - "266 neversleep/llama-3-lumimaid-8b NeverSleep: Llama 3 Lumimaid 8B 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", - "267 sao10k/fimbulvetr-11b-v2 Fimbulvetr 11B v2 1713657600 Creative writing model, routed with permission... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "268 meta-llama/llama-3-8b-instruct Meta: Llama 3 8B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, top_k, seed, ... text->text [text] [text] Llama3 llama3 0.00000003 0.00000006 0 0 0 0 8192.0 16384.0 False NaN NaN\n", - "269 meta-llama/llama-3-70b-instruct Meta: Llama 3 70B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000003 0.0000004 0 0 0 0 8192.0 16384.0 False NaN NaN\n", - "270 mistralai/mixtral-8x22b-instruct Mistral: Mixtral 8x22B Instruct 1713312000 Mistral's official instruct fine-tuned version... 65536 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.0000004 0.0000012 0 0 0 0 65536.0 NaN False NaN NaN\n", - "271 microsoft/wizardlm-2-8x22b WizardLM-2 8x22B 1713225600 WizardLM-2 8x22B is Microsoft AI's most advanc... 65536 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Mistral vicuna 0.0000005 0.0000005 0 0 0 0 65536.0 16384.0 False NaN NaN\n", - "272 google/gemini-pro-1.5 Google: Gemini 1.5 Pro 1712620800 Google's latest multimodal model, supports ima... 2000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.00000125 0.000005 0 0.0006575 0 0 2000000.0 8192.0 False NaN NaN\n", - "273 openai/gpt-4-turbo OpenAI: GPT-4 Turbo 1712620800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.00001 0.00003 0 0.01445 0 0 128000.0 4096.0 True NaN NaN\n", - "274 cohere/command-r-plus Cohere: Command R+ 1712188800 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "275 cohere/command-r-plus-04-2024 Cohere: Command R+ (04-2024) 1712016000 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "276 sophosympatheia/midnight-rose-70b Midnight Rose 70B 1711065600 A merge with a complex family tree, this model... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000008 0.0000008 0 0 0 0 4096.0 NaN False NaN NaN\n", - "277 cohere/command Cohere: Command 1710374400 Command is an instruction-following conversati... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.000001 0.000002 0 0 0 0 4096.0 4000.0 False NaN NaN\n", - "278 cohere/command-r Cohere: Command R 1710374400 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "279 anthropic/claude-3-haiku:beta Anthropic: Claude 3 Haiku (self-moderated) 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 False 0.00000003 0.0000003\n", - "280 anthropic/claude-3-haiku Anthropic: Claude 3 Haiku 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 True 0.00000003 0.0000003\n", - "281 anthropic/claude-3-opus:beta Anthropic: Claude 3 Opus (self-moderated) 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 False 0.0000015 0.00001875\n", - "282 anthropic/claude-3-opus Anthropic: Claude 3 Opus 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 True 0.0000015 0.00001875\n", - "283 anthropic/claude-3-sonnet:beta Anthropic: Claude 3 Sonnet (self-moderated) 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 False 0.0000003 0.00000375\n", - "284 anthropic/claude-3-sonnet Anthropic: Claude 3 Sonnet 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 True 0.0000003 0.00000375\n", - "285 cohere/command-r-03-2024 Cohere: Command R (03-2024) 1709341200 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "286 mistralai/mistral-large Mistral Large 1708905600 This is Mistral AI's flagship model, Mistral L... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 128000.0 NaN False NaN NaN\n", - "287 openai/gpt-3.5-turbo-0613 OpenAI: GPT-3.5 Turbo (older v0613) 1706140800 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 4095 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 4095.0 4096.0 False NaN NaN\n", - "288 openai/gpt-4-turbo-preview OpenAI: GPT-4 Turbo Preview 1706140800 The preview GPT-4 model with improved instruct... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", - "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo Nous: Hermes 2 Mixtral 8x7B DPO 1705363200 Nous Hermes 2 Mixtral 8x7B DPO is the new flag... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000006 0.0000006 0 0 0 0 32768.0 2048.0 False NaN NaN\n", - "290 mistralai/mistral-medium Mistral Medium 1704844800 This is Mistral AI's closed-source, medium-sid... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000275 0.0000081 0 0 0 0 32768.0 NaN False NaN NaN\n", - "291 mistralai/mistral-small Mistral Small 1704844800 With 22 billion parameters, Mistral Small v24.... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", - "292 mistralai/mistral-tiny Mistral Tiny 1704844800 Note: This model is being deprecated. Recommen... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 32768.0 NaN False NaN NaN\n", - "293 mistralai/mistral-7b-instruct-v0.2 Mistral: Mistral 7B Instruct v0.2 1703721600 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000002 0.0000002 0 0 0 0 32768.0 NaN False NaN NaN\n", - "294 mistralai/mixtral-8x7b-instruct Mistral: Mixtral 8x7B Instruct 1702166400 Mixtral 8x7B Instruct is a pretrained generati... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000008 0.00000024 0 0 0 0 32768.0 NaN False NaN NaN\n", - "295 neversleep/noromaid-20b Noromaid 20B 1700956800 A collab between IkariDev and Undi. This merge... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.00000075 0.0000015 0 0 0 0 8192.0 2048.0 False NaN NaN\n", - "296 anthropic/claude-2.1:beta Anthropic: Claude v2.1 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", - "297 anthropic/claude-2.1 Anthropic: Claude v2.1 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", - "298 anthropic/claude-2:beta Anthropic: Claude v2 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", - "299 anthropic/claude-2 Anthropic: Claude v2 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", - "300 undi95/toppy-m-7b Toppy M 7B 1699574400 A wild 7B parameter model that merges several ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "301 alpindale/goliath-120b Goliath 120B 1699574400 A large LLM created by combining two fine-tune... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000065625 0.000009375 0 0 0 0 6144.0 512.0 False NaN NaN\n", - "302 openrouter/auto Auto Router 1699401600 Your prompt will be processed by a meta-model ... 2000000 None [] text->text [text] [text] Router None -1 -1 NaN NaN NaN NaN NaN NaN False NaN NaN\n", - "303 openai/gpt-3.5-turbo-1106 OpenAI: GPT-3.5 Turbo 16k (older v1106) 1699228800 An older GPT-3.5 Turbo model with improved ins... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 16385.0 4096.0 True NaN NaN\n", - "304 openai/gpt-4-1106-preview OpenAI: GPT-4 Turbo (older v1106) 1699228800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", - "305 jondurbin/airoboros-l2-70b Airoboros 70B 1698537600 A Llama 2 70B fine-tune using synthetic data (... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000005 0.0000005 0 0 0 0 4096.0 NaN False NaN NaN\n", - "306 openai/gpt-3.5-turbo-instruct OpenAI: GPT-3.5 Turbo Instruct 1695859200 This model is a variant of GPT-3.5 Turbo tuned... 4095 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] GPT chatml 0.0000015 0.000002 0 0 0 0 4095.0 4096.0 True NaN NaN\n", - "307 mistralai/mistral-7b-instruct-v0.1 Mistral: Mistral 7B Instruct v0.1 1695859200 A 7.3B parameter model that outperforms Llama ... 2824 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000011 0.00000019 0 0 0 0 2824.0 NaN False NaN NaN\n", - "308 pygmalionai/mythalion-13b Pygmalion: Mythalion 13B 1693612800 A blend of the new Pygmalion-13b and MythoMax.... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 8192.0 1024.0 False NaN NaN\n", - "309 openai/gpt-3.5-turbo-16k OpenAI: GPT-3.5 Turbo 16k 1693180800 This model offers four times the context lengt... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000003 0.000004 0 0 0 0 16385.0 4096.0 True NaN NaN\n", - "310 openai/gpt-4-32k OpenAI: GPT-4 32k 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", - "311 openai/gpt-4-32k-0314 OpenAI: GPT-4 32k (older v0314) 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", - "312 mancer/weaver Mancer: Weaver (alpha) 1690934400 An attempt to recreate Claude-style verbosity,... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000001125 0.000001125 0 0 0 0 8000.0 1000.0 False NaN NaN\n", - "313 anthropic/claude-2.0:beta Anthropic: Claude v2.0 (self-moderated) 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 False NaN NaN\n", - "314 anthropic/claude-2.0 Anthropic: Claude v2.0 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 True NaN NaN\n", - "315 undi95/remm-slerp-l2-13b ReMM SLERP 13B 1689984000 A recreation trial of the original MythoMax-L2... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 6144.0 1024.0 False NaN NaN\n", - "316 gryphe/mythomax-l2-13b MythoMax 13B 1688256000 One of the highest performing and most popular... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000000065 0.000000065 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "317 meta-llama/llama-2-70b-chat Meta: Llama 2 70B Chat 1687219200 The flagship, 70 billion parameter language mo... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 llama2 0.0000009 0.0000009 0 0 0 0 4096.0 NaN False NaN NaN\n", - "318 openai/gpt-3.5-turbo OpenAI: GPT-3.5 Turbo 1685232000 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", - "319 openai/gpt-3.5-turbo-0125 OpenAI: GPT-3.5 Turbo 16k 1685232000 The latest GPT-3.5 Turbo model with improved i... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", - "320 openai/gpt-4 OpenAI: GPT-4 1685232000 OpenAI's flagship model, GPT-4 is a large-scal... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN\n", - "321 openai/gpt-4-0314 OpenAI: GPT-4 (older v0314) 1685232000 GPT-4-0314 is the first version of GPT-4 relea... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Normalize the nested JSON\n", - "df = pd.json_normalize(val, sep=\"_\")\n", - "df\n", - "# View the resulting DataFrame\n", - "# print(df.T) # Transpose just for readable vertical inspection" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id mistralai/mistral-medium-3\n", - "name Mistral: Mistral Medium 3\n", - "created 1746627341\n", - "description Mistral Medium 3 is a high-performance enterpr...\n", - "context_length 131072\n", - "per_request_limits None\n", - "supported_parameters [tools, tool_choice, max_tokens, temperature, ...\n", - "architecture_modality text+image->text\n", - "architecture_input_modalities [text, image]\n", - "architecture_output_modalities [text]\n", - "architecture_tokenizer Mistral\n", - "architecture_instruct_type None\n", - "pricing_prompt 0.0000004\n", - "pricing_completion 0.000002\n", - "pricing_request 0\n", - "pricing_image 0\n", - "pricing_web_search 0\n", - "pricing_internal_reasoning 0\n", - "top_provider_context_length 131072.0\n", - "top_provider_max_completion_tokens NaN\n", - "top_provider_is_moderated False\n", - "pricing_input_cache_read NaN\n", - "pricing_input_cache_write NaN\n", - "Name: 0, dtype: object" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.iloc[0].T" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "col_names = [\"id\", \"context_length\", \"pricing_prompt\", \"pricing_completion\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id object\n", - "name object\n", - "created int64\n", - "description object\n", - "context_length int64\n", - "per_request_limits object\n", - "supported_parameters object\n", - "architecture_modality object\n", - "architecture_input_modalities object\n", - "architecture_output_modalities object\n", - "architecture_tokenizer object\n", - "architecture_instruct_type object\n", - "pricing_prompt object\n", - "pricing_completion object\n", - "pricing_request object\n", - "pricing_image object\n", - "pricing_web_search object\n", - "pricing_internal_reasoning object\n", - "top_provider_context_length float64\n", - "top_provider_max_completion_tokens float64\n", - "top_provider_is_moderated bool\n", - "pricing_input_cache_read object\n", - "pricing_input_cache_write object\n", - "dtype: object" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.dtypes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.38819875776397517, 'type': 'is_bool'}\n", - "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.5962732919254659, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 1.0, 'is_bool': 1.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.13043478260869565, 'is_bool': 0.0, 'is_string': 0.13043478260869565, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.07142857142857142, 'is_bool': 0.0, 'is_string': 0.07142857142857142, 'type': 'is_numeric'}\n" - ] - } - ], - "source": [ - "for col in df.columns:\n", - " print(hpandas.infer_column_types(df[col]))" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
is_numericis_boolis_string
id0.0000000.01.000000
name0.0000000.01.000000
created1.0000000.00.000000
description0.0000000.01.000000
context_length1.0000000.00.000000
per_request_limits0.0000000.00.000000
supported_parameters0.0000000.00.000000
architecture_modality0.0000000.01.000000
architecture_input_modalities0.0000000.00.000000
architecture_output_modalities0.0000000.00.000000
architecture_tokenizer0.0000000.01.000000
architecture_instruct_type0.0000000.00.388199
pricing_prompt1.0000000.01.000000
pricing_completion1.0000000.01.000000
pricing_request0.9968940.00.996894
pricing_image0.9968940.00.996894
pricing_web_search0.9968940.00.996894
pricing_internal_reasoning0.9968940.00.996894
top_provider_context_length0.9968940.00.000000
top_provider_max_completion_tokens0.5962730.00.000000
top_provider_is_moderated1.0000001.00.000000
pricing_input_cache_read0.1304350.00.130435
pricing_input_cache_write0.0714290.00.071429
\n", - "
" - ], - "text/plain": [ - " is_numeric is_bool is_string\n", - "id 0.000000 0.0 1.000000\n", - "name 0.000000 0.0 1.000000\n", - "created 1.000000 0.0 0.000000\n", - "description 0.000000 0.0 1.000000\n", - "context_length 1.000000 0.0 0.000000\n", - "per_request_limits 0.000000 0.0 0.000000\n", - "supported_parameters 0.000000 0.0 0.000000\n", - "architecture_modality 0.000000 0.0 1.000000\n", - "architecture_input_modalities 0.000000 0.0 0.000000\n", - "architecture_output_modalities 0.000000 0.0 0.000000\n", - "architecture_tokenizer 0.000000 0.0 1.000000\n", - "architecture_instruct_type 0.000000 0.0 0.388199\n", - "pricing_prompt 1.000000 0.0 1.000000\n", - "pricing_completion 1.000000 0.0 1.000000\n", - "pricing_request 0.996894 0.0 0.996894\n", - "pricing_image 0.996894 0.0 0.996894\n", - "pricing_web_search 0.996894 0.0 0.996894\n", - "pricing_internal_reasoning 0.996894 0.0 0.996894\n", - "top_provider_context_length 0.996894 0.0 0.000000\n", - "top_provider_max_completion_tokens 0.596273 0.0 0.000000\n", - "top_provider_is_moderated 1.000000 1.0 0.000000\n", - "pricing_input_cache_read 0.130435 0.0 0.130435\n", - "pricing_input_cache_write 0.071429 0.0 0.071429" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.apply(lambda x: pd.Series(hpandas.infer_column_types(x))).T" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
is_numericis_boolis_stringtype
id0.00.01.0is_bool
name0.00.01.0is_bool
created1.00.00.0is_numeric
description0.00.01.0is_bool
context_length1.00.00.0is_numeric
per_request_limits0.00.00.0is_bool
supported_parameters0.00.00.0is_bool
architecture_modality0.00.01.0is_bool
architecture_input_modalities0.00.00.0is_bool
architecture_output_modalities0.00.00.0is_bool
architecture_tokenizer0.00.01.0is_bool
architecture_instruct_type0.00.00.388199is_bool
pricing_prompt1.00.01.0is_numeric
pricing_completion1.00.01.0is_numeric
pricing_request0.9968940.00.996894is_numeric
pricing_image0.9968940.00.996894is_numeric
pricing_web_search0.9968940.00.996894is_numeric
pricing_internal_reasoning0.9968940.00.996894is_numeric
top_provider_context_length0.9968940.00.0is_numeric
top_provider_max_completion_tokens0.5962730.00.0is_numeric
top_provider_is_moderated1.01.00.0is_bool
pricing_input_cache_read0.1304350.00.130435is_numeric
pricing_input_cache_write0.0714290.00.071429is_numeric
\n", - "
" - ], - "text/plain": [ - " is_numeric is_bool is_string type\n", - "id 0.0 0.0 1.0 is_bool\n", - "name 0.0 0.0 1.0 is_bool\n", - "created 1.0 0.0 0.0 is_numeric\n", - "description 0.0 0.0 1.0 is_bool\n", - "context_length 1.0 0.0 0.0 is_numeric\n", - "per_request_limits 0.0 0.0 0.0 is_bool\n", - "supported_parameters 0.0 0.0 0.0 is_bool\n", - "architecture_modality 0.0 0.0 1.0 is_bool\n", - "architecture_input_modalities 0.0 0.0 0.0 is_bool\n", - "architecture_output_modalities 0.0 0.0 0.0 is_bool\n", - "architecture_tokenizer 0.0 0.0 1.0 is_bool\n", - "architecture_instruct_type 0.0 0.0 0.388199 is_bool\n", - "pricing_prompt 1.0 0.0 1.0 is_numeric\n", - "pricing_completion 1.0 0.0 1.0 is_numeric\n", - "pricing_request 0.996894 0.0 0.996894 is_numeric\n", - "pricing_image 0.996894 0.0 0.996894 is_numeric\n", - "pricing_web_search 0.996894 0.0 0.996894 is_numeric\n", - "pricing_internal_reasoning 0.996894 0.0 0.996894 is_numeric\n", - "top_provider_context_length 0.996894 0.0 0.0 is_numeric\n", - "top_provider_max_completion_tokens 0.596273 0.0 0.0 is_numeric\n", - "top_provider_is_moderated 1.0 1.0 0.0 is_bool\n", - "pricing_input_cache_read 0.130435 0.0 0.130435 is_numeric\n", - "pricing_input_cache_write 0.071429 0.0 0.071429 is_numeric" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hpandas.infer_column_types_df(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0 True\n", - "1 True\n", - "2 True\n", - "3 True\n", - "4 True\n", - "5 True\n", - "6 True\n", - "7 True\n", - "8 True\n", - "9 True\n", - "10 True\n", - "11 True\n", - "12 True\n", - "13 True\n", - "14 True\n", - "15 True\n", - "16 True\n", - "17 True\n", - "18 True\n", - "19 True\n", - "20 True\n", - "21 True\n", - "22 True\n", - "23 True\n", - "24 True\n", - "25 True\n", - "26 True\n", - "27 True\n", - "28 True\n", - "29 True\n", - "30 True\n", - "31 True\n", - "32 True\n", - "33 True\n", - "34 True\n", - "35 True\n", - "36 True\n", - "37 True\n", - "38 True\n", - "39 True\n", - "40 True\n", - "41 True\n", - "42 True\n", - "43 True\n", - "44 True\n", - "45 True\n", - "46 True\n", - "47 True\n", - "48 True\n", - "49 True\n", - "50 True\n", - "51 True\n", - "52 True\n", - "53 True\n", - "54 True\n", - "55 True\n", - "56 True\n", - "57 True\n", - "58 True\n", - "59 True\n", - "60 True\n", - "61 True\n", - "62 True\n", - "63 True\n", - "64 True\n", - "65 True\n", - "66 True\n", - "67 True\n", - "68 True\n", - "69 True\n", - "70 True\n", - "71 True\n", - "72 True\n", - "73 True\n", - "74 True\n", - "75 True\n", - "76 True\n", - "77 True\n", - "78 True\n", - "79 True\n", - "80 True\n", - "81 True\n", - "82 True\n", - "83 True\n", - "84 True\n", - "85 True\n", - "86 True\n", - "87 True\n", - "88 True\n", - "89 True\n", - "90 True\n", - "91 True\n", - "92 True\n", - "93 True\n", - "94 True\n", - "95 True\n", - "96 True\n", - "97 True\n", - "98 True\n", - "99 True\n", - "100 True\n", - "101 True\n", - "102 True\n", - "103 True\n", - "104 True\n", - "105 True\n", - "106 True\n", - "107 True\n", - "108 True\n", - "109 True\n", - "110 True\n", - "111 True\n", - "112 True\n", - "113 True\n", - "114 True\n", - "115 True\n", - "116 True\n", - "117 True\n", - "118 True\n", - "119 True\n", - "120 True\n", - "121 True\n", - "122 True\n", - "123 True\n", - "124 True\n", - "125 True\n", - "126 True\n", - "127 True\n", - "128 True\n", - "129 True\n", - "130 True\n", - "131 True\n", - "132 True\n", - "133 True\n", - "134 True\n", - "135 True\n", - "136 True\n", - "137 True\n", - "138 True\n", - "139 True\n", - "140 True\n", - "141 True\n", - "142 True\n", - "143 True\n", - "144 True\n", - "145 True\n", - "146 True\n", - "147 True\n", - "148 True\n", - "149 True\n", - "150 True\n", - "151 True\n", - "152 True\n", - "153 True\n", - "154 True\n", - "155 True\n", - "156 True\n", - "157 True\n", - "158 True\n", - "159 True\n", - "160 True\n", - "161 True\n", - "162 True\n", - "163 True\n", - "164 True\n", - "165 True\n", - "166 True\n", - "167 True\n", - "168 True\n", - "169 True\n", - "170 True\n", - "171 True\n", - "172 True\n", - "173 True\n", - "174 True\n", - "175 True\n", - "176 True\n", - "177 True\n", - "178 True\n", - "179 True\n", - "180 True\n", - "181 True\n", - "182 True\n", - "183 True\n", - "184 True\n", - "185 True\n", - "186 True\n", - "187 True\n", - "188 True\n", - "189 True\n", - "190 True\n", - "191 True\n", - "192 True\n", - "193 True\n", - "194 True\n", - "195 True\n", - "196 True\n", - "197 True\n", - "198 True\n", - "199 True\n", - "200 True\n", - "201 True\n", - "202 True\n", - "203 True\n", - "204 True\n", - "205 True\n", - "206 True\n", - "207 True\n", - "208 True\n", - "209 True\n", - "210 True\n", - "211 True\n", - "212 True\n", - "213 True\n", - "214 True\n", - "215 True\n", - "216 True\n", - "217 True\n", - "218 True\n", - "219 True\n", - "220 True\n", - "221 True\n", - "222 True\n", - "223 True\n", - "224 True\n", - "225 True\n", - "226 True\n", - "227 True\n", - "228 True\n", - "229 True\n", - "230 True\n", - "231 True\n", - "232 True\n", - "233 True\n", - "234 True\n", - "235 True\n", - "236 True\n", - "237 True\n", - "238 True\n", - "239 True\n", - "240 True\n", - "241 True\n", - "242 True\n", - "243 True\n", - "244 True\n", - "245 True\n", - "246 True\n", - "247 True\n", - "248 True\n", - "249 True\n", - "250 True\n", - "251 True\n", - "252 True\n", - "253 True\n", - "254 True\n", - "255 True\n", - "256 True\n", - "257 True\n", - "258 True\n", - "259 True\n", - "260 True\n", - "261 True\n", - "262 True\n", - "263 True\n", - "264 True\n", - "265 True\n", - "266 True\n", - "267 True\n", - "268 True\n", - "269 True\n", - "270 True\n", - "271 True\n", - "272 True\n", - "273 True\n", - "274 True\n", - "275 True\n", - "276 True\n", - "277 True\n", - "278 True\n", - "279 True\n", - "280 True\n", - "281 True\n", - "282 True\n", - "283 True\n", - "284 True\n", - "285 True\n", - "286 True\n", - "287 True\n", - "288 True\n", - "289 True\n", - "290 True\n", - "291 True\n", - "292 True\n", - "293 True\n", - "294 True\n", - "295 True\n", - "296 True\n", - "297 True\n", - "298 True\n", - "299 True\n", - "300 True\n", - "301 True\n", - "302 False\n", - "303 True\n", - "304 True\n", - "305 True\n", - "306 True\n", - "307 True\n", - "308 True\n", - "309 True\n", - "310 True\n", - "311 True\n", - "312 True\n", - "313 True\n", - "314 True\n", - "315 True\n", - "316 True\n", - "317 True\n", - "318 True\n", - "319 True\n", - "320 True\n", - "321 True\n", - "Name: pricing_request, dtype: bool" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.to_numeric(df[\"pricing_request\"], errors=\"coerce\").notna()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0.000002\n", - "1 0.00001\n", - "2 0.00000085\n", - "3 0.00000018\n", - "4 0.0000033\n", - "5 0.0000012\n", - "6 0.0000008\n", - "7 0.0000008\n", - "8 0.00000075\n", - "9 0\n", - "10 0.00000035\n", - "11 0\n", - "12 0\n", - "13 0.000001\n", - "14 0\n", - "15 0\n", - "16 0\n", - "17 0\n", - "18 0\n", - "19 0.00000218\n", - "20 0.00000005\n", - "21 0\n", - "22 0.0000003\n", - "23 0\n", - "24 0.000000138\n", - "25 0\n", - "26 0.00000024\n", - "27 0\n", - "28 0.0000003\n", - "29 0\n", - "30 0.000002\n", - "31 0\n", - "32 0.00000024\n", - "33 0\n", - "34 0\n", - "35 0\n", - "36 0\n", - "37 0.00000024\n", - "38 0\n", - "39 0.00000024\n", - "40 0.0000006\n", - "41 0.0000035\n", - "42 0.0000044\n", - "43 0.00004\n", - "44 0.0000044\n", - "45 0\n", - "46 0.00000003\n", - "47 0.000008\n", - "48 0.0000016\n", - "49 0.0000004\n", - "50 0.0000012\n", - "51 0.0000012\n", - "52 0\n", - "53 0\n", - "54 0\n", - "55 0.0000005\n", - "56 0.000015\n", - "57 0\n", - "58 0.0000004\n", - "59 0\n", - "60 0\n", - "61 0.0000006\n", - "62 0\n", - "63 0.0000003\n", - "64 0.0000034\n", - "65 0.0000001\n", - "66 0\n", - "67 0.00000018\n", - "68 0.00000088\n", - "69 0\n", - "70 0\n", - "71 0\n", - "72 0\n", - "73 0\n", - "74 0.0000009\n", - "75 0\n", - "76 0.00000088\n", - "77 0\n", - "78 0.0006\n", - "79 0\n", - "80 0.00000015\n", - "81 0\n", - "82 0\n", - "83 0\n", - "84 0.00000004\n", - "85 0.000008\n", - "86 0.0000004\n", - "87 0\n", - "88 0.0000001\n", - "89 0.00001\n", - "90 0.0000006\n", - "91 0.00001\n", - "92 0\n", - "93 0\n", - "94 0.0000002\n", - "95 0.000001\n", - "96 0.0000008\n", - "97 0.0000001\n", - "98 0.000008\n", - "99 0.000015\n", - "100 0.000008\n", - "101 0\n", - "102 0\n", - "103 0.0000002\n", - "104 0\n", - "105 0\n", - "106 0.00015\n", - "107 0.0000003\n", - "108 0.000015\n", - "109 0.000015\n", - "110 0.000015\n", - "111 0.000008\n", - "112 0.0000006\n", - "113 0\n", - "114 0\n", - "115 0.00000006\n", - "116 0.0000044\n", - "117 0.00000004\n", - "118 0.0000004\n", - "119 0.00000063\n", - "120 0.000008\n", - "121 0.0000014\n", - "122 0.0000002\n", - "123 0.0000032\n", - "124 0.0000002\n", - "125 0\n", - "126 0.00000075\n", - "127 0.0000012\n", - "128 0.0000064\n", - "129 0.0000044\n", - "130 0.00000018\n", - "131 0\n", - "132 0.00000012\n", - "133 0\n", - "134 0.00000018\n", - "135 0\n", - "136 0.00000015\n", - "137 0.000005\n", - "138 0.000001\n", - "139 0.00000001\n", - "140 0.00000002\n", - "141 0\n", - "142 0.0000004\n", - "143 0\n", - "144 0.00000218\n", - "145 0.0000011\n", - "146 0.0000009\n", - "147 0.00000014\n", - "148 0\n", - "149 0.00000089\n", - "150 0.0000008\n", - "151 0.00006\n", - "152 0.000006\n", - "153 0.00001\n", - "154 0.00001\n", - "155 0.00000015\n", - "156 0\n", - "157 0\n", - "158 0.00000035\n", - "159 0.00000024\n", - "160 0.00000014\n", - "161 0.0000032\n", - "162 0\n", - "163 0.00000027\n", - "164 0\n", - "165 0.000006\n", - "166 0.00001\n", - "167 0.000006\n", - "168 0.000006\n", - "169 0.000006\n", - "170 0.000015\n", - "171 0.0000012\n", - "172 0\n", - "173 0.00000015\n", - "174 0.0000045\n", - "175 0.0000034\n", - "176 0.00000045\n", - "177 0.000004\n", - "178 0.000004\n", - "179 0.000004\n", - "180 0.000004\n", - "181 0.00000225\n", - "182 0.00000225\n", - "183 0.000015\n", - "184 0.000015\n", - "185 0.000015\n", - "186 0.0000001\n", - "187 0.00000004\n", - "188 0\n", - "189 0.0000001\n", - "190 0.0000003\n", - "191 0.00001\n", - "192 0.00001\n", - "193 0.00000015\n", - "194 0.0000005\n", - "195 0.000003\n", - "196 0.00000015\n", - "197 0\n", - "198 0.00000002\n", - "199 0\n", - "200 0.00000001\n", - "201 0.0000012\n", - "202 0\n", - "203 0.000000049\n", - "204 0\n", - "205 0.00000039\n", - "206 0.0000006\n", - "207 0.00000075\n", - "208 0.00006\n", - "209 0.00006\n", - "210 0.0000044\n", - "211 0.0000044\n", - "212 0.0000001\n", - "213 0.00001\n", - "214 0.0000006\n", - "215 0\n", - "216 0.0000002\n", - "217 0.0000008\n", - "218 0\n", - "219 0.00000009\n", - "220 0.0000003\n", - "221 0.0000008\n", - "222 0.000015\n", - "223 0.00000005\n", - "224 0.0000012\n", - "225 0.00001\n", - "226 0\n", - "227 0.000002\n", - "228 0.0000012\n", - "229 0.0000002\n", - "230 0.000001\n", - "231 0\n", - "232 0.00000003\n", - "233 0.0000008\n", - "234 0.00000028\n", - "235 0.00000025\n", - "236 0\n", - "237 0.00000007\n", - "238 0.0000006\n", - "239 0.0000006\n", - "240 0.0000003\n", - "241 0.000006\n", - "242 0\n", - "243 0.00000006\n", - "244 0.000003\n", - "245 0.0000007\n", - "246 0.000015\n", - "247 0.000015\n", - "248 0.00000148\n", - "249 0.0000009\n", - "250 0.0000009\n", - "251 0\n", - "252 0.000000054\n", - "253 0.00000004\n", - "254 0.000000054\n", - "255 0.0000001\n", - "256 0.0000003\n", - "257 0.000006\n", - "258 0.00000012\n", - "259 0.0000003\n", - "260 0.00001\n", - "261 0.000018\n", - "262 0.0000002\n", - "263 0.000015\n", - "264 0.00000024\n", - "265 0.00000075\n", - "266 0.00000075\n", - "267 0.0000012\n", - "268 0.00000006\n", - "269 0.0000004\n", - "270 0.0000012\n", - "271 0.0000005\n", - "272 0.000005\n", - "273 0.00003\n", - "274 0.000015\n", - "275 0.000015\n", - "276 0.0000008\n", - "277 0.000002\n", - "278 0.0000015\n", - "279 0.00000125\n", - "280 0.00000125\n", - "281 0.000075\n", - "282 0.000075\n", - "283 0.000015\n", - "284 0.000015\n", - "285 0.0000015\n", - "286 0.000006\n", - "287 0.000002\n", - "288 0.00003\n", - "289 0.0000006\n", - "290 0.0000081\n", - "291 0.0000006\n", - "292 0.00000025\n", - "293 0.0000002\n", - "294 0.00000024\n", - "295 0.0000015\n", - "296 0.000024\n", - "297 0.000024\n", - "298 0.000024\n", - "299 0.000024\n", - "300 0.0000012\n", - "301 0.000009375\n", - "302 -1\n", - "303 0.000002\n", - "304 0.00003\n", - "305 0.0000005\n", - "306 0.000002\n", - "307 0.00000019\n", - "308 0.000001125\n", - "309 0.000004\n", - "310 0.00012\n", - "311 0.00012\n", - "312 0.000001125\n", - "313 0.000024\n", - "314 0.000024\n", - "315 0.000001125\n", - "316 0.000000065\n", - "317 0.0000009\n", - "318 0.0000015\n", - "319 0.0000015\n", - "320 0.00006\n", - "321 0.00006\n", - "Name: pricing_completion, dtype: object" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[\"pricing_completion\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcontext_lengthpricing_promptpricing_completion
302openrouter/auto2000000-1-1
133deepseek/deepseek-r1-distill-qwen-32b:free1600000
59nvidia/llama-3.1-nemotron-ultra-253b-v1:free13107200
113cognitivecomputations/dolphin3.0-r1-mistral-24...3276800
57nvidia/llama-3.3-nemotron-super-49b-v1:free13107200
114cognitivecomputations/dolphin3.0-mistral-24b:free3276800
54moonshotai/kimi-vl-a3b-thinking:free13107200
53agentica-org/deepcoder-14b-preview:free9600000
52arliai/qwq-32b-arliai-rpr-v1:free3276800
231meta-llama/llama-3.1-8b-instruct:free13107200
226meta-llama/llama-3.1-405b:free6400000
125qwen/qwen2.5-vl-72b-instruct:free13107200
45shisa-ai/shisa-v2-llama3.3-70b:free3276800
87google/gemma-3-12b-it:free13107200
92rekaai/reka-flash-3:free3276800
131mistralai/mistral-small-24b-instruct-2501:free3276800
81open-r1/olympiccoder-32b:free3276800
60meta-llama/llama-4-maverick:free25600000
236mistralai/mistral-nemo:free12800000
62meta-llama/llama-4-scout:free51200000
83google/gemma-3-4b-it:free13107200
93google/gemma-3-27b-it:free9600000
79mistralai/mistral-small-3.1-24b-instruct:free9600000
251mistralai/mistral-7b-instruct:free3276800
77featherless/qwerky-72b:free3276800
75deepseek/deepseek-chat-v3-0324:free16384000
242google/gemma-2-9b-it:free819200
73qwen/qwen2.5-vl-32b-instruct:free819200
135deepseek/deepseek-r1-distill-qwen-14b:free6400000
72google/gemini-2.5-pro-exp-03-25100000000
70bytedance-research/ui-tars-72b:free3276800
69allenai/molmo-7b-d:free409600
101deepseek/deepseek-r1-zero:free16384000
102qwen/qwq-32b:free4000000
66deepseek/deepseek-v3-base:free16384000
104moonshotai/moonlight-16b-a3b-instruct:free819200
105nousresearch/deephermes-3-llama-3-8b-preview:free13107200
71qwen/qwen2.5-vl-3b-instruct:free6400000
218google/gemini-flash-1.5-8b-exp100000000
82google/gemma-3-1b-it:free3276800
156google/gemini-2.0-flash-exp:free104857600
204qwen/qwen-2.5-72b-instruct:free3276800
21qwen/qwen3-30b-a3b:free4096000
38thudm/glm-4-32b:free3276800
157meta-llama/llama-3.3-70b-instruct:free800000
18deepseek/deepseek-prover-v2:free16384000
17opengvlab/internvl3-2b:free3200000
23qwen/qwen3-8b:free4096000
16opengvlab/internvl3-14b:free3200000
14qwen/qwen3-1.7b:free3200000
202meta-llama/llama-3.2-11b-vision-instruct:free13107200
12qwen/qwen3-0.6b-04-28:free3200000
11microsoft/phi-4-reasoning:free3276800
162qwen/qwq-32b-preview:free1638400
9microsoft/phi-4-reasoning-plus:free3276800
15qwen/qwen3-4b:free12800000
164google/learnlm-1.5-pro-experimental:free4096000
148deepseek/deepseek-chat:free16384000
199meta-llama/llama-3.2-1b-instruct:free13100000
36thudm/glm-z1-32b:free3276800
35microsoft/mai-ds-r1:free16384000
34thudm/glm-4-9b:free3200000
33thudm/glm-z1-9b:free3200000
188qwen/qwen-2.5-7b-instruct:free3276800
172qwen/qwen-2.5-coder-32b-instruct:free3276800
25qwen/qwen3-14b:free4096000
197meta-llama/llama-3.2-3b-instruct:free2000000
141deepseek/deepseek-r1-distill-llama-70b:free819200
29qwen/qwen3-235b-a22b:free4096000
143deepseek/deepseek-r1:free16384000
27qwen/qwen3-32b:free4096000
215qwen/qwen-2.5-vl-7b-instruct:free6400000
31tngtech/deepseek-r1t-chimera:free16384000
200meta-llama/llama-3.2-1b-instruct1310720.0000000050.00000001
198meta-llama/llama-3.2-3b-instruct1310720.000000010.00000002
139liquid/lfm-7b327680.000000010.00000001
46qwen/qwen2.5-coder-7b-instruct327680.000000010.00000003
243google/gemma-2-9b-it81920.000000020.00000006
232meta-llama/llama-3.1-8b-instruct163840.000000020.00000003
84google/gemma-3-4b-it1310720.000000020.00000004
140liquid/lfm-3b327680.000000020.00000002
115meta-llama/llama-guard-3-8b1310720.000000020.00000006
223sao10k/l3-lunaris-8b81920.000000020.00000005
253nousresearch/hermes-2-pro-llama-3-8b1310720.0000000250.00000004
254mistralai/mistral-7b-instruct-v0.3327680.0000000280.000000054
252mistralai/mistral-7b-instruct327680.0000000280.000000054
268meta-llama/llama-3-8b-instruct81920.000000030.00000006
219microsoft/phi-3.5-mini-128k-instruct1310720.000000030.00000009
237mistralai/mistral-nemo983040.000000030.00000007
160amazon/nova-micro-v11280000.0000000350.00000014
24qwen/qwen3-8b1280000.0000000350.000000138
193google/gemini-flash-1.5-8b10000000.00000003750.00000015
155cohere/command-r7b-12-20241280000.00000003750.00000015
187mistralai/ministral-3b1310720.000000040.00000004
117deepseek/deepseek-r1-distill-llama-8b320000.000000040.00000004
258deepseek/deepseek-coder1280000.000000040.00000012
203meta-llama/llama-3.2-11b-vision-instruct1310720.0000000490.000000049
80mistralai/mistral-small-3.1-24b-instruct1310720.000000050.00000015
20meta-llama/llama-guard-4-12b1638400.000000050.00000005
189qwen/qwen-2.5-7b-instruct327680.000000050.0000001
97microsoft/phi-4-multimodal-instruct1310720.000000050.0000001
88google/gemma-3-12b-it1310720.000000050.0000001
124qwen/qwen-turbo10000000.000000050.0000002
132mistralai/mistral-small-24b-instruct-2501280000.000000060.00000012
173qwen/qwen-2.5-coder-32b-instruct327680.000000060.00000015
159amazon/nova-lite-v13000000.000000060.00000024
316gryphe/mythomax-l2-13b40960.0000000650.000000065
10microsoft/phi-4-reasoning-plus327680.000000070.00000035
147microsoft/phi-4163840.000000070.00000014
26qwen/qwen3-14b409600.000000070.00000024
259google/gemini-flash-1.510000000.0000000750.0000003
107google/gemini-2.0-flash-lite-00110485760.0000000750.0000003
63meta-llama/llama-4-scout10485760.000000080.0000003
294mistralai/mixtral-8x7b-instruct327680.000000080.00000024
264allenai/olmo-7b-instruct20480.000000080.00000024
163qwen/qwq-32b-preview327680.000000090.00000027
158meta-llama/llama-3.3-70b-instruct1310000.000000090.00000035
266neversleep/llama-3-lumimaid-8b245760.000000093750.00000075
207neversleep/llama-3.1-lumimaid-8b327680.000000093750.00000075
265neversleep/llama-3-lumimaid-8b:extended245760.000000093750.00000075
212mistralai/pixtral-12b327680.00000010.0000001
28qwen/qwen3-32b409600.00000010.0000003
49openai/gpt-4.1-nano10475760.00000010.0000004
186mistralai/ministral-8b1280000.00000010.0000001
118google/gemini-2.0-flash-00110000000.00000010.0000004
234meta-llama/llama-3.1-70b-instruct1310720.00000010.00000028
65mistral/ministral-8b1310720.00000010.0000001
240google/gemma-2-27b-it81920.00000010.0000003
256microsoft/phi-3-medium-128k-instruct1310720.00000010.0000003
255microsoft/phi-3-mini-128k-instruct1280000.00000010.0000001
94google/gemma-3-27b-it1310720.00000010.0000002
22qwen/qwen3-30b-a3b409600.00000010.0000003
142deepseek/deepseek-r1-distill-llama-70b1310720.00000010.0000004
307mistralai/mistral-7b-instruct-v0.128240.000000110.00000019
205qwen/qwen-2.5-72b-instruct327680.000000120.00000039
190nvidia/llama-3.1-nemotron-70b-instruct1310720.000000120.0000003
134deepseek/deepseek-r1-distill-qwen-32b1310720.000000120.00000018
220nousresearch/hermes-3-llama-3.1-70b1310720.000000120.0000003
58nvidia/llama-3.3-nemotron-super-49b-v11310720.000000130.0000004
30qwen/qwen3-235b-a22b409600.000000140.000002
90openai/gpt-4o-mini-search-preview1280000.000000150.0000006
136deepseek/deepseek-r1-distill-qwen-14b640000.000000150.00000015
214cohere/command-r-08-20241280000.000000150.0000006
238openai/gpt-4o-mini1280000.000000150.0000006
103qwen/qwq-32b1310720.000000150.0000002
196liquid/lfm-40b327680.000000150.00000015
41google/gemini-2.5-flash-preview:thinking10485760.000000150.0000035
40google/gemini-2.5-flash-preview10485760.000000150.0000006
239openai/gpt-4o-mini-2024-07-181280000.000000150.0000006
61meta-llama/llama-4-maverick10485760.000000170.0000006
67scb10x/llama3.1-typhoon2-8b-instruct81920.000000180.00000018
130deepseek/deepseek-r1-distill-qwen-1.5b1310720.000000180.00000018
3arcee-ai/spotlight1310720.000000180.00000018
216qwen/qwen-2.5-vl-7b-instruct327680.00000020.0000002
262meta-llama/llama-guard-2-8b81920.00000020.0000002
293mistralai/mistral-7b-instruct-v0.2327680.00000020.0000002
86ai21/jamba-1.6-mini2560000.00000020.0000004
122aion-labs/aion-rp-llama-3.1-8b327680.00000020.0000002
229perplexity/llama-3.1-sonar-small-128k-online1270720.00000020.0000002
145minimax/minimax-0110001920.00000020.0000011
291mistralai/mistral-small327680.00000020.0000006
112mistralai/mistral-saba327680.00000020.0000006
119qwen/qwen-vl-plus75000.000000210.00000063
39thudm/glm-4-32b320000.000000240.00000024
32thudm/glm-z1-rumination-32b320000.000000240.00000024
37thudm/glm-z1-32b320000.000000240.00000024
13inception/mercury-coder-small-beta320000.000000250.000001
292mistralai/mistral-tiny327680.000000250.00000025
126qwen/qwen2.5-vl-72b-instruct320000.000000250.00000075
194thedrummer/rocinante-12b327680.000000250.0000005
279anthropic/claude-3-haiku:beta2000000.000000250.00000125
235mistralai/codestral-mamba2621440.000000250.00000025
280anthropic/claude-3-haiku2000000.000000250.00000125
55x-ai/grok-3-mini-beta1310720.00000030.0000005
269meta-llama/llama-3-70b-instruct81920.00000030.0000004
146mistralai/codestral-25012621440.00000030.0000009
76deepseek/deepseek-chat-v3-03241638400.00000030.00000088
149deepseek/deepseek-chat1638400.000000380.00000089
270mistralai/mixtral-8x22b-instruct655360.00000040.0000012
0mistralai/mistral-medium-31310720.00000040.000002
48openai/gpt-4.1-mini10475760.00000040.0000016
127qwen/qwen-plus1310720.00000040.0000012
8arcee-ai/arcee-blitz327680.000000450.00000075
176thedrummer/unslopnemo-12b320000.000000450.00000045
278cohere/command-r1280000.00000050.0000015
19deepseek/deepseek-prover-v21310720.00000050.00000218
285cohere/command-r-03-20241280000.00000050.0000015
7arcee-ai/virtuoso-medium-v21310720.00000050.0000008
6arcee-ai/coder-large327680.00000050.0000008
271microsoft/wizardlm-2-8x22b655360.00000050.0000005
96thedrummer/skyfall-36b-v2327680.00000050.0000008
144deepseek/deepseek-r11638400.00000050.00000218
305jondurbin/airoboros-l2-70b40960.00000050.0000005
318openai/gpt-3.5-turbo163850.00000050.0000015
319openai/gpt-3.5-turbo-0125163850.00000050.0000015
245ai21/jamba-instruct2560000.00000050.0000007
2arcee-ai/caller-large327680.000000550.00000085
308pygmalionai/mythalion-13b81920.00000056250.000001125
315undi95/remm-slerp-l2-13b61440.00000056250.000001125
206qwen/qwen-2.5-vl-72b-instruct327680.00000060.0000006
289nousresearch/nous-hermes-2-mixtral-8x7b-dpo327680.00000060.0000006
150sao10k/l3.3-euryale-70b1310720.00000070.0000008
121aion-labs/aion-1.0-mini1310720.00000070.0000014
217sao10k/l3.1-euryale-70b1310720.00000070.0000008
5arcee-ai/virtuoso-large1310720.000000750.0000012
295neversleep/noromaid-20b81920.000000750.0000015
221nousresearch/hermes-3-llama-3.1-405b1310720.00000080.0000008
233meta-llama/llama-3.1-405b-instruct327680.00000080.0000008
224aetherwiing/mn-starcannon-12b163840.00000080.0000012
179anthropic/claude-3.5-haiku-20241022:beta2000000.00000080.000004
95thedrummer/anubis-pro-105b-v11310720.00000080.000001
180anthropic/claude-3.5-haiku-202410222000000.00000080.000004
51alfredpros/codellama-7b-instruct-solidity40960.00000080.0000012
50eleutherai/llemma_7b40960.00000080.0000012
267sao10k/fimbulvetr-11b-v240960.00000080.0000012
276sophosympatheia/midnight-rose-70b40960.00000080.0000008
123qwen/qwen-vl-max75000.00000080.0000032
161amazon/nova-pro-v13000000.00000080.0000032
171infermatic/mn-inferor-12b163840.00000080.0000012
300undi95/toppy-m-7b40960.00000080.0000012
177anthropic/claude-3.5-haiku:beta2000000.00000080.000004
178anthropic/claude-3.5-haiku2000000.00000080.000004
228nothingiisreal/mn-celeste-12b163840.00000080.0000012
68scb10x/llama3.1-typhoon2-70b-instruct81920.000000880.00000088
74qwen/qwen2.5-vl-32b-instruct1280000.00000090.0000009
249cognitivecomputations/dolphin-mixtral-8x22b160000.00000090.0000009
4arcee-ai/maestro-reasoning1310720.00000090.0000033
317meta-llama/llama-2-70b-chat40960.00000090.0000009
250qwen/qwen-2-72b-instruct327680.00000090.0000009
230perplexity/llama-3.1-sonar-large-128k-online1270720.0000010.000001
287openai/gpt-3.5-turbo-061340950.0000010.000002
277cohere/command40960.0000010.000002
138perplexity/sonar1270720.0000010.000001
137perplexity/sonar-reasoning1270000.0000010.000005
303openai/gpt-3.5-turbo-1106163850.0000010.000002
42openai/o4-mini-high2000000.00000110.0000044
210openai/o1-mini1280000.00000110.0000044
211openai/o1-mini-2024-09-121280000.00000110.0000044
44openai/o4-mini2000000.00000110.0000044
129openai/o3-mini2000000.00000110.0000044
116openai/o3-mini-high2000000.00000110.0000044
312mancer/weaver80000.0000011250.000001125
201meta-llama/llama-3.2-90b-vision-instruct1310720.00000120.0000012
272google/gemini-pro-1.520000000.000001250.000005
1google/gemini-2.5-pro-preview10485760.000001250.00001
248sao10k/l3-euryale-70b81920.000001480.00000148
181neversleep/llama-3.1-lumimaid-70b163840.00000150.00000225
306openai/gpt-3.5-turbo-instruct40950.00000150.000002
182anthracite-org/magnum-v4-72b163840.00000150.00000225
128qwen/qwen-max327680.00000160.0000064
169mistralai/pixtral-large-24111310720.0000020.000006
286mistralai/mistral-large1280000.0000020.000006
85ai21/jamba-1.6-large2560000.0000020.000008
154x-ai/grok-2-12121310720.0000020.00001
47openai/gpt-4.110475760.0000020.000008
100perplexity/sonar-deep-research1280000.0000020.000008
227meta-llama/llama-3.1-405b327680.0000020.000002
153x-ai/grok-2-vision-1212327680.0000020.00001
168mistralai/mistral-large-24071310720.0000020.000006
98perplexity/sonar-reasoning-pro1280000.0000020.000008
111perplexity/r1-17761280000.0000020.000008
167mistralai/mistral-large-24111310720.0000020.000006
166openai/gpt-4o-2024-11-201280000.00000250.00001
225openai/gpt-4o-2024-08-061280000.00000250.00001
260openai/gpt-4o1280000.00000250.00001
192inflection/inflection-3-pi80000.00000250.00001
91openai/gpt-4o-search-preview1280000.00000250.00001
213cohere/command-r-plus-08-20241280000.00000250.00001
191inflection/inflection-3-productivity80000.00000250.00001
89cohere/command-a2560000.00000250.00001
64all-hands/openhands-lm-32b-v0.1163840.00000260.0000034
175eva-unit-01/eva-qwen-2.5-32b163840.00000260.0000034
290mistralai/mistral-medium327680.000002750.0000081
195anthracite-org/magnum-v2-72b327680.0000030.000003
284anthropic/claude-3-sonnet2000000.0000030.000015
283anthropic/claude-3-sonnet:beta2000000.0000030.000015
309openai/gpt-3.5-turbo-16k163850.0000030.000004
184anthropic/claude-3.5-sonnet2000000.0000030.000015
183anthropic/claude-3.5-sonnet:beta2000000.0000030.000015
275cohere/command-r-plus-04-20241280000.0000030.000015
274cohere/command-r-plus1280000.0000030.000015
109anthropic/claude-3.7-sonnet:thinking2000000.0000030.000015
110anthropic/claude-3.7-sonnet:beta2000000.0000030.000015
99perplexity/sonar-pro2000000.0000030.000015
24401-ai/yi-large327680.0000030.000003
246anthropic/claude-3.5-sonnet-20240620:beta2000000.0000030.000015
247anthropic/claude-3.5-sonnet-202406202000000.0000030.000015
56x-ai/grok-3-beta1310720.0000030.000015
108anthropic/claude-3.7-sonnet2000000.0000030.000015
152eva-unit-01/eva-llama-3.33-70b163840.0000040.000006
257neversleep/llama-3-lumimaid-70b81920.0000040.000006
241alpindale/magnum-72b163840.0000040.000006
165eva-unit-01/eva-qwen-2.5-72b163840.0000040.000006
120aion-labs/aion-1.01310720.0000040.000008
174raifle/sorcererlm-8x22b160000.00000450.0000045
263openai/gpt-4o-2024-05-131280000.0000050.000015
222openai/chatgpt-4o-latest1280000.0000050.000015
170x-ai/grok-vision-beta81920.0000050.000015
185x-ai/grok-beta1310720.0000050.000015
261openai/gpt-4o:extended1280000.0000060.000018
301alpindale/goliath-120b61440.00000656250.000009375
313anthropic/claude-2.0:beta1000000.0000080.000024
297anthropic/claude-2.12000000.0000080.000024
299anthropic/claude-22000000.0000080.000024
298anthropic/claude-2:beta2000000.0000080.000024
314anthropic/claude-2.01000000.0000080.000024
296anthropic/claude-2.1:beta2000000.0000080.000024
304openai/gpt-4-1106-preview1280000.000010.00003
43openai/o32000000.000010.00004
273openai/gpt-4-turbo1280000.000010.00003
288openai/gpt-4-turbo-preview1280000.000010.00003
151openai/o12000000.0000150.00006
282anthropic/claude-3-opus2000000.0000150.000075
281anthropic/claude-3-opus:beta2000000.0000150.000075
208openai/o1-preview1280000.0000150.00006
209openai/o1-preview-2024-09-121280000.0000150.00006
321openai/gpt-4-031481910.000030.00006
320openai/gpt-481910.000030.00006
311openai/gpt-4-32k-0314327670.000060.00012
310openai/gpt-4-32k327670.000060.00012
106openai/gpt-4.5-preview1280000.0000750.00015
78openai/o1-pro2000000.000150.0006
\n", - "
" - ], - "text/plain": [ - " id context_length pricing_prompt pricing_completion\n", - "302 openrouter/auto 2000000 -1 -1\n", - "133 deepseek/deepseek-r1-distill-qwen-32b:free 16000 0 0\n", - "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free 131072 0 0\n", - "113 cognitivecomputations/dolphin3.0-r1-mistral-24... 32768 0 0\n", - "57 nvidia/llama-3.3-nemotron-super-49b-v1:free 131072 0 0\n", - "114 cognitivecomputations/dolphin3.0-mistral-24b:free 32768 0 0\n", - "54 moonshotai/kimi-vl-a3b-thinking:free 131072 0 0\n", - "53 agentica-org/deepcoder-14b-preview:free 96000 0 0\n", - "52 arliai/qwq-32b-arliai-rpr-v1:free 32768 0 0\n", - "231 meta-llama/llama-3.1-8b-instruct:free 131072 0 0\n", - "226 meta-llama/llama-3.1-405b:free 64000 0 0\n", - "125 qwen/qwen2.5-vl-72b-instruct:free 131072 0 0\n", - "45 shisa-ai/shisa-v2-llama3.3-70b:free 32768 0 0\n", - "87 google/gemma-3-12b-it:free 131072 0 0\n", - "92 rekaai/reka-flash-3:free 32768 0 0\n", - "131 mistralai/mistral-small-24b-instruct-2501:free 32768 0 0\n", - "81 open-r1/olympiccoder-32b:free 32768 0 0\n", - "60 meta-llama/llama-4-maverick:free 256000 0 0\n", - "236 mistralai/mistral-nemo:free 128000 0 0\n", - "62 meta-llama/llama-4-scout:free 512000 0 0\n", - "83 google/gemma-3-4b-it:free 131072 0 0\n", - "93 google/gemma-3-27b-it:free 96000 0 0\n", - "79 mistralai/mistral-small-3.1-24b-instruct:free 96000 0 0\n", - "251 mistralai/mistral-7b-instruct:free 32768 0 0\n", - "77 featherless/qwerky-72b:free 32768 0 0\n", - "75 deepseek/deepseek-chat-v3-0324:free 163840 0 0\n", - "242 google/gemma-2-9b-it:free 8192 0 0\n", - "73 qwen/qwen2.5-vl-32b-instruct:free 8192 0 0\n", - "135 deepseek/deepseek-r1-distill-qwen-14b:free 64000 0 0\n", - "72 google/gemini-2.5-pro-exp-03-25 1000000 0 0\n", - "70 bytedance-research/ui-tars-72b:free 32768 0 0\n", - "69 allenai/molmo-7b-d:free 4096 0 0\n", - "101 deepseek/deepseek-r1-zero:free 163840 0 0\n", - "102 qwen/qwq-32b:free 40000 0 0\n", - "66 deepseek/deepseek-v3-base:free 163840 0 0\n", - "104 moonshotai/moonlight-16b-a3b-instruct:free 8192 0 0\n", - "105 nousresearch/deephermes-3-llama-3-8b-preview:free 131072 0 0\n", - "71 qwen/qwen2.5-vl-3b-instruct:free 64000 0 0\n", - "218 google/gemini-flash-1.5-8b-exp 1000000 0 0\n", - "82 google/gemma-3-1b-it:free 32768 0 0\n", - "156 google/gemini-2.0-flash-exp:free 1048576 0 0\n", - "204 qwen/qwen-2.5-72b-instruct:free 32768 0 0\n", - "21 qwen/qwen3-30b-a3b:free 40960 0 0\n", - "38 thudm/glm-4-32b:free 32768 0 0\n", - "157 meta-llama/llama-3.3-70b-instruct:free 8000 0 0\n", - "18 deepseek/deepseek-prover-v2:free 163840 0 0\n", - "17 opengvlab/internvl3-2b:free 32000 0 0\n", - "23 qwen/qwen3-8b:free 40960 0 0\n", - "16 opengvlab/internvl3-14b:free 32000 0 0\n", - "14 qwen/qwen3-1.7b:free 32000 0 0\n", - "202 meta-llama/llama-3.2-11b-vision-instruct:free 131072 0 0\n", - "12 qwen/qwen3-0.6b-04-28:free 32000 0 0\n", - "11 microsoft/phi-4-reasoning:free 32768 0 0\n", - "162 qwen/qwq-32b-preview:free 16384 0 0\n", - "9 microsoft/phi-4-reasoning-plus:free 32768 0 0\n", - "15 qwen/qwen3-4b:free 128000 0 0\n", - "164 google/learnlm-1.5-pro-experimental:free 40960 0 0\n", - "148 deepseek/deepseek-chat:free 163840 0 0\n", - "199 meta-llama/llama-3.2-1b-instruct:free 131000 0 0\n", - "36 thudm/glm-z1-32b:free 32768 0 0\n", - "35 microsoft/mai-ds-r1:free 163840 0 0\n", - "34 thudm/glm-4-9b:free 32000 0 0\n", - "33 thudm/glm-z1-9b:free 32000 0 0\n", - "188 qwen/qwen-2.5-7b-instruct:free 32768 0 0\n", - "172 qwen/qwen-2.5-coder-32b-instruct:free 32768 0 0\n", - "25 qwen/qwen3-14b:free 40960 0 0\n", - "197 meta-llama/llama-3.2-3b-instruct:free 20000 0 0\n", - "141 deepseek/deepseek-r1-distill-llama-70b:free 8192 0 0\n", - "29 qwen/qwen3-235b-a22b:free 40960 0 0\n", - "143 deepseek/deepseek-r1:free 163840 0 0\n", - "27 qwen/qwen3-32b:free 40960 0 0\n", - "215 qwen/qwen-2.5-vl-7b-instruct:free 64000 0 0\n", - "31 tngtech/deepseek-r1t-chimera:free 163840 0 0\n", - "200 meta-llama/llama-3.2-1b-instruct 131072 0.000000005 0.00000001\n", - "198 meta-llama/llama-3.2-3b-instruct 131072 0.00000001 0.00000002\n", - "139 liquid/lfm-7b 32768 0.00000001 0.00000001\n", - "46 qwen/qwen2.5-coder-7b-instruct 32768 0.00000001 0.00000003\n", - "243 google/gemma-2-9b-it 8192 0.00000002 0.00000006\n", - "232 meta-llama/llama-3.1-8b-instruct 16384 0.00000002 0.00000003\n", - "84 google/gemma-3-4b-it 131072 0.00000002 0.00000004\n", - "140 liquid/lfm-3b 32768 0.00000002 0.00000002\n", - "115 meta-llama/llama-guard-3-8b 131072 0.00000002 0.00000006\n", - "223 sao10k/l3-lunaris-8b 8192 0.00000002 0.00000005\n", - "253 nousresearch/hermes-2-pro-llama-3-8b 131072 0.000000025 0.00000004\n", - "254 mistralai/mistral-7b-instruct-v0.3 32768 0.000000028 0.000000054\n", - "252 mistralai/mistral-7b-instruct 32768 0.000000028 0.000000054\n", - "268 meta-llama/llama-3-8b-instruct 8192 0.00000003 0.00000006\n", - "219 microsoft/phi-3.5-mini-128k-instruct 131072 0.00000003 0.00000009\n", - "237 mistralai/mistral-nemo 98304 0.00000003 0.00000007\n", - "160 amazon/nova-micro-v1 128000 0.000000035 0.00000014\n", - "24 qwen/qwen3-8b 128000 0.000000035 0.000000138\n", - "193 google/gemini-flash-1.5-8b 1000000 0.0000000375 0.00000015\n", - "155 cohere/command-r7b-12-2024 128000 0.0000000375 0.00000015\n", - "187 mistralai/ministral-3b 131072 0.00000004 0.00000004\n", - "117 deepseek/deepseek-r1-distill-llama-8b 32000 0.00000004 0.00000004\n", - "258 deepseek/deepseek-coder 128000 0.00000004 0.00000012\n", - "203 meta-llama/llama-3.2-11b-vision-instruct 131072 0.000000049 0.000000049\n", - "80 mistralai/mistral-small-3.1-24b-instruct 131072 0.00000005 0.00000015\n", - "20 meta-llama/llama-guard-4-12b 163840 0.00000005 0.00000005\n", - "189 qwen/qwen-2.5-7b-instruct 32768 0.00000005 0.0000001\n", - "97 microsoft/phi-4-multimodal-instruct 131072 0.00000005 0.0000001\n", - "88 google/gemma-3-12b-it 131072 0.00000005 0.0000001\n", - "124 qwen/qwen-turbo 1000000 0.00000005 0.0000002\n", - "132 mistralai/mistral-small-24b-instruct-2501 28000 0.00000006 0.00000012\n", - "173 qwen/qwen-2.5-coder-32b-instruct 32768 0.00000006 0.00000015\n", - "159 amazon/nova-lite-v1 300000 0.00000006 0.00000024\n", - "316 gryphe/mythomax-l2-13b 4096 0.000000065 0.000000065\n", - "10 microsoft/phi-4-reasoning-plus 32768 0.00000007 0.00000035\n", - "147 microsoft/phi-4 16384 0.00000007 0.00000014\n", - "26 qwen/qwen3-14b 40960 0.00000007 0.00000024\n", - "259 google/gemini-flash-1.5 1000000 0.000000075 0.0000003\n", - "107 google/gemini-2.0-flash-lite-001 1048576 0.000000075 0.0000003\n", - "63 meta-llama/llama-4-scout 1048576 0.00000008 0.0000003\n", - "294 mistralai/mixtral-8x7b-instruct 32768 0.00000008 0.00000024\n", - "264 allenai/olmo-7b-instruct 2048 0.00000008 0.00000024\n", - "163 qwen/qwq-32b-preview 32768 0.00000009 0.00000027\n", - "158 meta-llama/llama-3.3-70b-instruct 131000 0.00000009 0.00000035\n", - "266 neversleep/llama-3-lumimaid-8b 24576 0.00000009375 0.00000075\n", - "207 neversleep/llama-3.1-lumimaid-8b 32768 0.00000009375 0.00000075\n", - "265 neversleep/llama-3-lumimaid-8b:extended 24576 0.00000009375 0.00000075\n", - "212 mistralai/pixtral-12b 32768 0.0000001 0.0000001\n", - "28 qwen/qwen3-32b 40960 0.0000001 0.0000003\n", - "49 openai/gpt-4.1-nano 1047576 0.0000001 0.0000004\n", - "186 mistralai/ministral-8b 128000 0.0000001 0.0000001\n", - "118 google/gemini-2.0-flash-001 1000000 0.0000001 0.0000004\n", - "234 meta-llama/llama-3.1-70b-instruct 131072 0.0000001 0.00000028\n", - "65 mistral/ministral-8b 131072 0.0000001 0.0000001\n", - "240 google/gemma-2-27b-it 8192 0.0000001 0.0000003\n", - "256 microsoft/phi-3-medium-128k-instruct 131072 0.0000001 0.0000003\n", - "255 microsoft/phi-3-mini-128k-instruct 128000 0.0000001 0.0000001\n", - "94 google/gemma-3-27b-it 131072 0.0000001 0.0000002\n", - "22 qwen/qwen3-30b-a3b 40960 0.0000001 0.0000003\n", - "142 deepseek/deepseek-r1-distill-llama-70b 131072 0.0000001 0.0000004\n", - "307 mistralai/mistral-7b-instruct-v0.1 2824 0.00000011 0.00000019\n", - "205 qwen/qwen-2.5-72b-instruct 32768 0.00000012 0.00000039\n", - "190 nvidia/llama-3.1-nemotron-70b-instruct 131072 0.00000012 0.0000003\n", - "134 deepseek/deepseek-r1-distill-qwen-32b 131072 0.00000012 0.00000018\n", - "220 nousresearch/hermes-3-llama-3.1-70b 131072 0.00000012 0.0000003\n", - "58 nvidia/llama-3.3-nemotron-super-49b-v1 131072 0.00000013 0.0000004\n", - "30 qwen/qwen3-235b-a22b 40960 0.00000014 0.000002\n", - "90 openai/gpt-4o-mini-search-preview 128000 0.00000015 0.0000006\n", - "136 deepseek/deepseek-r1-distill-qwen-14b 64000 0.00000015 0.00000015\n", - "214 cohere/command-r-08-2024 128000 0.00000015 0.0000006\n", - "238 openai/gpt-4o-mini 128000 0.00000015 0.0000006\n", - "103 qwen/qwq-32b 131072 0.00000015 0.0000002\n", - "196 liquid/lfm-40b 32768 0.00000015 0.00000015\n", - "41 google/gemini-2.5-flash-preview:thinking 1048576 0.00000015 0.0000035\n", - "40 google/gemini-2.5-flash-preview 1048576 0.00000015 0.0000006\n", - "239 openai/gpt-4o-mini-2024-07-18 128000 0.00000015 0.0000006\n", - "61 meta-llama/llama-4-maverick 1048576 0.00000017 0.0000006\n", - "67 scb10x/llama3.1-typhoon2-8b-instruct 8192 0.00000018 0.00000018\n", - "130 deepseek/deepseek-r1-distill-qwen-1.5b 131072 0.00000018 0.00000018\n", - "3 arcee-ai/spotlight 131072 0.00000018 0.00000018\n", - "216 qwen/qwen-2.5-vl-7b-instruct 32768 0.0000002 0.0000002\n", - "262 meta-llama/llama-guard-2-8b 8192 0.0000002 0.0000002\n", - "293 mistralai/mistral-7b-instruct-v0.2 32768 0.0000002 0.0000002\n", - "86 ai21/jamba-1.6-mini 256000 0.0000002 0.0000004\n", - "122 aion-labs/aion-rp-llama-3.1-8b 32768 0.0000002 0.0000002\n", - "229 perplexity/llama-3.1-sonar-small-128k-online 127072 0.0000002 0.0000002\n", - "145 minimax/minimax-01 1000192 0.0000002 0.0000011\n", - "291 mistralai/mistral-small 32768 0.0000002 0.0000006\n", - "112 mistralai/mistral-saba 32768 0.0000002 0.0000006\n", - "119 qwen/qwen-vl-plus 7500 0.00000021 0.00000063\n", - "39 thudm/glm-4-32b 32000 0.00000024 0.00000024\n", - "32 thudm/glm-z1-rumination-32b 32000 0.00000024 0.00000024\n", - "37 thudm/glm-z1-32b 32000 0.00000024 0.00000024\n", - "13 inception/mercury-coder-small-beta 32000 0.00000025 0.000001\n", - "292 mistralai/mistral-tiny 32768 0.00000025 0.00000025\n", - "126 qwen/qwen2.5-vl-72b-instruct 32000 0.00000025 0.00000075\n", - "194 thedrummer/rocinante-12b 32768 0.00000025 0.0000005\n", - "279 anthropic/claude-3-haiku:beta 200000 0.00000025 0.00000125\n", - "235 mistralai/codestral-mamba 262144 0.00000025 0.00000025\n", - "280 anthropic/claude-3-haiku 200000 0.00000025 0.00000125\n", - "55 x-ai/grok-3-mini-beta 131072 0.0000003 0.0000005\n", - "269 meta-llama/llama-3-70b-instruct 8192 0.0000003 0.0000004\n", - "146 mistralai/codestral-2501 262144 0.0000003 0.0000009\n", - "76 deepseek/deepseek-chat-v3-0324 163840 0.0000003 0.00000088\n", - "149 deepseek/deepseek-chat 163840 0.00000038 0.00000089\n", - "270 mistralai/mixtral-8x22b-instruct 65536 0.0000004 0.0000012\n", - "0 mistralai/mistral-medium-3 131072 0.0000004 0.000002\n", - "48 openai/gpt-4.1-mini 1047576 0.0000004 0.0000016\n", - "127 qwen/qwen-plus 131072 0.0000004 0.0000012\n", - "8 arcee-ai/arcee-blitz 32768 0.00000045 0.00000075\n", - "176 thedrummer/unslopnemo-12b 32000 0.00000045 0.00000045\n", - "278 cohere/command-r 128000 0.0000005 0.0000015\n", - "19 deepseek/deepseek-prover-v2 131072 0.0000005 0.00000218\n", - "285 cohere/command-r-03-2024 128000 0.0000005 0.0000015\n", - "7 arcee-ai/virtuoso-medium-v2 131072 0.0000005 0.0000008\n", - "6 arcee-ai/coder-large 32768 0.0000005 0.0000008\n", - "271 microsoft/wizardlm-2-8x22b 65536 0.0000005 0.0000005\n", - "96 thedrummer/skyfall-36b-v2 32768 0.0000005 0.0000008\n", - "144 deepseek/deepseek-r1 163840 0.0000005 0.00000218\n", - "305 jondurbin/airoboros-l2-70b 4096 0.0000005 0.0000005\n", - "318 openai/gpt-3.5-turbo 16385 0.0000005 0.0000015\n", - "319 openai/gpt-3.5-turbo-0125 16385 0.0000005 0.0000015\n", - "245 ai21/jamba-instruct 256000 0.0000005 0.0000007\n", - "2 arcee-ai/caller-large 32768 0.00000055 0.00000085\n", - "308 pygmalionai/mythalion-13b 8192 0.0000005625 0.000001125\n", - "315 undi95/remm-slerp-l2-13b 6144 0.0000005625 0.000001125\n", - "206 qwen/qwen-2.5-vl-72b-instruct 32768 0.0000006 0.0000006\n", - "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo 32768 0.0000006 0.0000006\n", - "150 sao10k/l3.3-euryale-70b 131072 0.0000007 0.0000008\n", - "121 aion-labs/aion-1.0-mini 131072 0.0000007 0.0000014\n", - "217 sao10k/l3.1-euryale-70b 131072 0.0000007 0.0000008\n", - "5 arcee-ai/virtuoso-large 131072 0.00000075 0.0000012\n", - "295 neversleep/noromaid-20b 8192 0.00000075 0.0000015\n", - "221 nousresearch/hermes-3-llama-3.1-405b 131072 0.0000008 0.0000008\n", - "233 meta-llama/llama-3.1-405b-instruct 32768 0.0000008 0.0000008\n", - "224 aetherwiing/mn-starcannon-12b 16384 0.0000008 0.0000012\n", - "179 anthropic/claude-3.5-haiku-20241022:beta 200000 0.0000008 0.000004\n", - "95 thedrummer/anubis-pro-105b-v1 131072 0.0000008 0.000001\n", - "180 anthropic/claude-3.5-haiku-20241022 200000 0.0000008 0.000004\n", - "51 alfredpros/codellama-7b-instruct-solidity 4096 0.0000008 0.0000012\n", - "50 eleutherai/llemma_7b 4096 0.0000008 0.0000012\n", - "267 sao10k/fimbulvetr-11b-v2 4096 0.0000008 0.0000012\n", - "276 sophosympatheia/midnight-rose-70b 4096 0.0000008 0.0000008\n", - "123 qwen/qwen-vl-max 7500 0.0000008 0.0000032\n", - "161 amazon/nova-pro-v1 300000 0.0000008 0.0000032\n", - "171 infermatic/mn-inferor-12b 16384 0.0000008 0.0000012\n", - "300 undi95/toppy-m-7b 4096 0.0000008 0.0000012\n", - "177 anthropic/claude-3.5-haiku:beta 200000 0.0000008 0.000004\n", - "178 anthropic/claude-3.5-haiku 200000 0.0000008 0.000004\n", - "228 nothingiisreal/mn-celeste-12b 16384 0.0000008 0.0000012\n", - "68 scb10x/llama3.1-typhoon2-70b-instruct 8192 0.00000088 0.00000088\n", - "74 qwen/qwen2.5-vl-32b-instruct 128000 0.0000009 0.0000009\n", - "249 cognitivecomputations/dolphin-mixtral-8x22b 16000 0.0000009 0.0000009\n", - "4 arcee-ai/maestro-reasoning 131072 0.0000009 0.0000033\n", - "317 meta-llama/llama-2-70b-chat 4096 0.0000009 0.0000009\n", - "250 qwen/qwen-2-72b-instruct 32768 0.0000009 0.0000009\n", - "230 perplexity/llama-3.1-sonar-large-128k-online 127072 0.000001 0.000001\n", - "287 openai/gpt-3.5-turbo-0613 4095 0.000001 0.000002\n", - "277 cohere/command 4096 0.000001 0.000002\n", - "138 perplexity/sonar 127072 0.000001 0.000001\n", - "137 perplexity/sonar-reasoning 127000 0.000001 0.000005\n", - "303 openai/gpt-3.5-turbo-1106 16385 0.000001 0.000002\n", - "42 openai/o4-mini-high 200000 0.0000011 0.0000044\n", - "210 openai/o1-mini 128000 0.0000011 0.0000044\n", - "211 openai/o1-mini-2024-09-12 128000 0.0000011 0.0000044\n", - "44 openai/o4-mini 200000 0.0000011 0.0000044\n", - "129 openai/o3-mini 200000 0.0000011 0.0000044\n", - "116 openai/o3-mini-high 200000 0.0000011 0.0000044\n", - "312 mancer/weaver 8000 0.000001125 0.000001125\n", - "201 meta-llama/llama-3.2-90b-vision-instruct 131072 0.0000012 0.0000012\n", - "272 google/gemini-pro-1.5 2000000 0.00000125 0.000005\n", - "1 google/gemini-2.5-pro-preview 1048576 0.00000125 0.00001\n", - "248 sao10k/l3-euryale-70b 8192 0.00000148 0.00000148\n", - "181 neversleep/llama-3.1-lumimaid-70b 16384 0.0000015 0.00000225\n", - "306 openai/gpt-3.5-turbo-instruct 4095 0.0000015 0.000002\n", - "182 anthracite-org/magnum-v4-72b 16384 0.0000015 0.00000225\n", - "128 qwen/qwen-max 32768 0.0000016 0.0000064\n", - "169 mistralai/pixtral-large-2411 131072 0.000002 0.000006\n", - "286 mistralai/mistral-large 128000 0.000002 0.000006\n", - "85 ai21/jamba-1.6-large 256000 0.000002 0.000008\n", - "154 x-ai/grok-2-1212 131072 0.000002 0.00001\n", - "47 openai/gpt-4.1 1047576 0.000002 0.000008\n", - "100 perplexity/sonar-deep-research 128000 0.000002 0.000008\n", - "227 meta-llama/llama-3.1-405b 32768 0.000002 0.000002\n", - "153 x-ai/grok-2-vision-1212 32768 0.000002 0.00001\n", - "168 mistralai/mistral-large-2407 131072 0.000002 0.000006\n", - "98 perplexity/sonar-reasoning-pro 128000 0.000002 0.000008\n", - "111 perplexity/r1-1776 128000 0.000002 0.000008\n", - "167 mistralai/mistral-large-2411 131072 0.000002 0.000006\n", - "166 openai/gpt-4o-2024-11-20 128000 0.0000025 0.00001\n", - "225 openai/gpt-4o-2024-08-06 128000 0.0000025 0.00001\n", - "260 openai/gpt-4o 128000 0.0000025 0.00001\n", - "192 inflection/inflection-3-pi 8000 0.0000025 0.00001\n", - "91 openai/gpt-4o-search-preview 128000 0.0000025 0.00001\n", - "213 cohere/command-r-plus-08-2024 128000 0.0000025 0.00001\n", - "191 inflection/inflection-3-productivity 8000 0.0000025 0.00001\n", - "89 cohere/command-a 256000 0.0000025 0.00001\n", - "64 all-hands/openhands-lm-32b-v0.1 16384 0.0000026 0.0000034\n", - "175 eva-unit-01/eva-qwen-2.5-32b 16384 0.0000026 0.0000034\n", - "290 mistralai/mistral-medium 32768 0.00000275 0.0000081\n", - "195 anthracite-org/magnum-v2-72b 32768 0.000003 0.000003\n", - "284 anthropic/claude-3-sonnet 200000 0.000003 0.000015\n", - "283 anthropic/claude-3-sonnet:beta 200000 0.000003 0.000015\n", - "309 openai/gpt-3.5-turbo-16k 16385 0.000003 0.000004\n", - "184 anthropic/claude-3.5-sonnet 200000 0.000003 0.000015\n", - "183 anthropic/claude-3.5-sonnet:beta 200000 0.000003 0.000015\n", - "275 cohere/command-r-plus-04-2024 128000 0.000003 0.000015\n", - "274 cohere/command-r-plus 128000 0.000003 0.000015\n", - "109 anthropic/claude-3.7-sonnet:thinking 200000 0.000003 0.000015\n", - "110 anthropic/claude-3.7-sonnet:beta 200000 0.000003 0.000015\n", - "99 perplexity/sonar-pro 200000 0.000003 0.000015\n", - "244 01-ai/yi-large 32768 0.000003 0.000003\n", - "246 anthropic/claude-3.5-sonnet-20240620:beta 200000 0.000003 0.000015\n", - "247 anthropic/claude-3.5-sonnet-20240620 200000 0.000003 0.000015\n", - "56 x-ai/grok-3-beta 131072 0.000003 0.000015\n", - "108 anthropic/claude-3.7-sonnet 200000 0.000003 0.000015\n", - "152 eva-unit-01/eva-llama-3.33-70b 16384 0.000004 0.000006\n", - "257 neversleep/llama-3-lumimaid-70b 8192 0.000004 0.000006\n", - "241 alpindale/magnum-72b 16384 0.000004 0.000006\n", - "165 eva-unit-01/eva-qwen-2.5-72b 16384 0.000004 0.000006\n", - "120 aion-labs/aion-1.0 131072 0.000004 0.000008\n", - "174 raifle/sorcererlm-8x22b 16000 0.0000045 0.0000045\n", - "263 openai/gpt-4o-2024-05-13 128000 0.000005 0.000015\n", - "222 openai/chatgpt-4o-latest 128000 0.000005 0.000015\n", - "170 x-ai/grok-vision-beta 8192 0.000005 0.000015\n", - "185 x-ai/grok-beta 131072 0.000005 0.000015\n", - "261 openai/gpt-4o:extended 128000 0.000006 0.000018\n", - "301 alpindale/goliath-120b 6144 0.0000065625 0.000009375\n", - "313 anthropic/claude-2.0:beta 100000 0.000008 0.000024\n", - "297 anthropic/claude-2.1 200000 0.000008 0.000024\n", - "299 anthropic/claude-2 200000 0.000008 0.000024\n", - "298 anthropic/claude-2:beta 200000 0.000008 0.000024\n", - "314 anthropic/claude-2.0 100000 0.000008 0.000024\n", - "296 anthropic/claude-2.1:beta 200000 0.000008 0.000024\n", - "304 openai/gpt-4-1106-preview 128000 0.00001 0.00003\n", - "43 openai/o3 200000 0.00001 0.00004\n", - "273 openai/gpt-4-turbo 128000 0.00001 0.00003\n", - "288 openai/gpt-4-turbo-preview 128000 0.00001 0.00003\n", - "151 openai/o1 200000 0.000015 0.00006\n", - "282 anthropic/claude-3-opus 200000 0.000015 0.000075\n", - "281 anthropic/claude-3-opus:beta 200000 0.000015 0.000075\n", - "208 openai/o1-preview 128000 0.000015 0.00006\n", - "209 openai/o1-preview-2024-09-12 128000 0.000015 0.00006\n", - "321 openai/gpt-4-0314 8191 0.00003 0.00006\n", - "320 openai/gpt-4 8191 0.00003 0.00006\n", - "311 openai/gpt-4-32k-0314 32767 0.00006 0.00012\n", - "310 openai/gpt-4-32k 32767 0.00006 0.00012\n", - "106 openai/gpt-4.5-preview 128000 0.000075 0.00015\n", - "78 openai/o1-pro 200000 0.00015 0.0006" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.sort_values(\"pricing_prompt\")[col_names]" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAABqwAAAHJCAYAAADwyhjGAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXl0VFXWt58aMlZSGSAEQoAMShImIQgKwYi2imCLLWqD2gqKiC1qK+DYyiAo6CvtAMqkyCCCoiKDjAoSgwwKUQhhzCBDIASSVCWVqZKq7498uVBGEAJHbjjnWetddm6d+9Rv34vd683mnG1wu91uFAqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLhPFSB1AoFAqFQqFQKBQKhUKhUCgUCoVCoVDIjWpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLimpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLivlSB1BcXrjdblwu96WOoUuMRsNFfTYX2yfCKWNGGWsW4dS7T4RTZZTDJ8KpMsrhE+GUMaOMNYtwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjPn2XA0ajAYPBcE5rVcNKcVFxudwUFDgudQzdYTYbCQmxYLeXUlXl0p1PhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4dS7T4RTZdRnRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzXg5EBpqwWQ6t4aVOhJQoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUV3DavMzEweeughOnbsSFJSEm+++SaVlZV/ep/b7WbGjBn07NmTDh060L9/f3755Zc66/Ly8njyySfp1KkTXbt25b///S8lJSV11q1bt46+ffvSvn17evXqxZdffllnTWVlJW+88QZJSUl07NiRhx56iKysrHrXtGjRInr16kX79u3p27cv69evr7OmuLiYl156ia5du9KpUyeeeuopjh8/Xmedy+Vi9uzZ3HrrrbRr146kpCRGjBjhseaBBx4gLi6uzv9lZmbW8SkUCoVCoVAoFAqFQqFQKBQKhUKhUIjCfKkDnI7NZmPgwIFERUUxefJk8vLymDhxIuXl5YwaNeqs986cOZP33nuPkSNHEhcXx/z583n44YdZsmQJLVq0AMDpdPLII48AMGnSJMrLy3njjTcYMWIE06dP11w///wzTzzxBHfffTcvvfQSmzdv5r///S8Wi4Vbb71VWzd+/HhWrFjBCy+8QHh4ONOmTWPQoEF88803BAYGnldN33zzDa+88gqPPfYY1157LStWrOCJJ55g/vz5dOzYUVv39NNPc+DAAcaMGYOPjw/vvPMOQ4YM4csvv8RsPvU6R40axfr163n88ce58soryc/PZ9u2bXWeW2JiIs8//7zHtcjIyD97VQqFQqFQKBQKhUKhUCgUCoVCoVA0WI6edJB5rASLt5HGVt9LHUeBzhpWCxcuxOFwMGXKFIKDgwGorq5m7NixDB06lPDw8D+8r6KigunTp/Pwww8zaNAgADp37sytt97KRx99xJgxYwBYvXo1+/fvZ8WKFcTExABgtVoZPHgwO3bsoEOHDgBMnTqVDh068OqrrwJw7bXXcujQId577z2tYXXs2DG++OILRo8ezd133w1A+/btueGGG1i4cCFDhgw5r5ree+89brvtNp5++mntO/ft28f777/PzJkzAUhLSyM1NZWPPvqIHj16ABAdHU2fPn1Ys2YNffr0AWDTpk0sXryYr776iri4OO053XbbbXWendVq9WiIKRQKhUKhUCgUCoVCoVAoFAqFQnG5UlLmZMbSXaRnF2jX2kWHMvSOtlh8vS5hMoWujgRMSUmhW7duWmMHoHfv3rhcLjZu3HjG+7Zv305JSQm9e/fWrnl7e3PzzTeTkpLi4Y+Li9OaVQBJSUkEBwezYcMGoOaYvy1btnjspALo06cPmZmZHD58GIDU1FRcLpfHuuDgYJKSkup855/VdOjQIXJycjzy137npk2btOMDU1JSsFqtJCUlaWtiYmJISEjw+M7PP/+crl27ejSrFAqFQqFQKBQKhUKhUCgUCoVCoZCdGUt3kZFT4HEtI6eA6Ut2XaJEilp0tcMqKyuLu+66y+Oa1WolLCzsD2dDnX4f4NGIAoiNjWXOnDmUl5fj6+tLVlZWnTUGg4Ho6GjNcfDgQZxO5x+6AJ544glycnIA8PX1xc/Pr866L774ok5NM2bM4NNPP6WgoICEhASCg4O176z9p9Vq5cknnyQ1NRUvLy86dOiA0+nk0KFDxMbGkpWVRXR0NOvXr+edd94hOzubiIgIgoKCPJ7Pr7/+SqNGjejZsydHjx4FoGPHjkycOJHo6Ght3cmTJ9m2bZvW2PLz8+P+++9n5MiRGAyGMz7vP8Ns1lUfVBeYTEaPf+rNJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zChjzSKcF8t39KTDY2dVLS43pGcXcMJeTtNQ/0uaUWZ01bCy2+1YrdY614OCgrDZbGe9z9vbGx8fH4/rVqsVt9uNzWbD19cXu92uzZY6k7/2n7/PUdvAqaioYPLkyUybNo3t27czceJEj1lUVqvVI6vdbmf37t1s2bLFY77Wr7/+ypEjRzy+c/To0ZjNZm2+1vjx4z0+t9vtuN3uOvO1pk6dSlhYmPad+fn55Obm4u/vT2JiItu3b+fEiRMMHjyYlStXas+ppKSEtm3bkpycTHV1NV988QUffvghRUVFvPbaa2d83mfDaDQQEmKp170yYLX6/fmiS+gT4ZQxo4w1i3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoxw+EU4ZMmYeKznr545K1wX/flvEc5QFXTWs9MzixYsBGDRoENdddx2rV68mJyeHhQsXnnW+FsDWrVvrzNfq1KkTe/fu9ViXlZXFypUrtd1dBQUFjBs3jszMTBITE4GaHWC/n6+1YsUKcnNzNY/b7cbb25s1a9aQmZnJgw8+yLPPPsszzzzDsmXLtJlbX3/9NaGhodp9jz76KElJSSxevJhx48ZhNJ5/J9jlcmO3l573fZc7JpMRq9UPu72M6mqX7nwinDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYMsCu7gEMnHLQMs9AmKvTPb/gTLlZGf6+znyxm8TZSWOiol1vEc7wcsFr9znnXma4aVlarleLi4jrXbTYbQUFBZ72vsrKSiooKj11Wdrsdg8Gg3Wu1WikpqdtBtdlsNGvWDEBb+/scP/zwA4C2rnb3Vu0sqn79+mnfeXpWPz8/iouL68zX8vb2Ji8vz+M7o6OjPY4ijIqKAmD37t0ABAQEYLPZ6szXatKkCb/99huHDx8mMjISq9VK06ZNady4MZmZmQA0b96cpk2bcuDAAe2+05tVAP7+/rRp04aff/6Z0tJSAgIC6jyrc6GqSv3LeCaqq10X9flcbJ8Ip4wZZaxZhFPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcF4MX15hKa/N/ZmSsirtWoCfmVcGXk1YcP2O2judC80YFuRHu+hQMnIKcLlPXTcaoE1UKI2tvhf8DES8a1nQ1WGKMTExdWZVFRcXk5+fX2em1O/vA8jOzva4npWVRUREBL6+vmf0u91usrOzNUfLli3x8vKqs652blXtupiYGAoKCmjUqJHH2t/PyQoJCfG4r7amsrIySkpKKC8v1z77fQMpOzsbg8HAyZMntc/dbrfHHCqAwsJC7bsBrrjiijM8qZojDc9Gfn4+RqOx3s0qhUKhUCgUCoVCoVAoFAqFQqFQyMnvm1UAJWVVjJvz8yVKVJehd7Sts+urTVQoQ+9oe4kSKWrR1Q6r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSko6432JiYkEBATw+uuvc/DgQQoKCoiPjyc3N5ebbrrJw7906VIefvhh0tLS8PLyokOHDhQVFXH99dcDNbufrrnmGj777DO+/PJLsrOziYiIoKSkhNDQUCIjIwHo0aMHRqMRh8PB3Llz+eSTT2jfvj07d+5k2LBh2ndGRkZy8OBBHnnkEY+aDAaDNl+rRYsWmEwmdu/eTXJyMnl5eTz33HN8++23hISEaLu9rrzySgDGjRvHf/7zH8rKyvjss8+0XVS1s66Cg4P56aef6NatG6WlNcfzrVy5kmPHjtG27al/6W688UZtjtbpnK05eC6Yzbrqg+oCvQ4ZFOmUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSp5h2ZJ+o0q2opKati98FC2sc0qpf7YtYcFODDc/cnkl9Uhr28GquvibDgC587JeJdy4bB7Xa7/3zZX8OhQ4e45ZZbsFgsPPLII/z222989dVXtG7dmmXLlmnrBg4cSG5uLmvXrtWuDR48mNTUVG666SYSExOZPXs2x48fZ+7cuVxzzTUAlJaWav/5kUceoaSkhE8++YTg4GA2bdqkuebPn8+rr75KbGws99xzD2vXrmXbtm0kJiayYMECbV2fPn3IzMwkKiqKO++8k5kzZ1JaWsq6deu0owMnTpzIxx9/TGBgoEdNzZo14+jRo6SkpBAeHk779u2prKykbdu27Nq1i44dO5Kenk779u3x9/dn1qxZ/PDDDzzyyCN4eXkRGxvLnj17iI2NxWg0sn//ft566y1uv/12kpOTKS4uplGjRlx99dXa/K3GjRvz7bff4ufnx88//8zgwYO54ooruPPOOzl06BDz5s3D5XKxYMECOnXqVK936Ha7MRjOfg6oQqFQKBQKhUKhUCgUCoVCoVAoLi8WrNnDp6v3nvHz+3rFce8t8X9hIkVDQ1c7rFasWIG3tzfx8fFMnToVi8VCUlISmzZtIi8vj/DwcABcLhfV1dXafRUVFaSlpdG1a1fS09NJSUkhPj4el8vFypUrtSbVunXrqKysJCkpidmzZ2M2m+nevTupqans2LGDDh06aOtiYmIwmUxMmjSJiIgITCaTx9F/x44dIzs7Gy8vL/Ly8pg6dSodOnRgx44dLF++nCFDhgDw22+/AdC6dWuPmjZu3OgxXysuLg6omYEFcOTIEaZMmcL06dO1NbVNsGuuuYaff67ZQtmiRQseffRR7rvvPm3d119/jcPh4LXXXmPFihVAzZyswMBA/PxqOsVhYWG43W4OHDjAhAkTcLlc+Pn58f7779e7WVXzbtzY7aX1vv9yxWTS/yBElVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM2NDqHlXdgGHTjhoGWapc1Te+RARevZdSs0b+VNY6KiX+3hRGcUXcTcUNIx3fTlgtfqd864zXTWsUlJSSEpK4oMPPtCu2e12unbtysaNG+nXrx8A8+bN87hv+/btOBwOXnrpJRISErTrEyZM8NiFVdvImjVrlnbN7XZz7bXXsmHDBjp06EBlZSVbtmxh5MiRDBo0SFt32223ceDAAQ4fPkxkZCSpqam4XC7cbjevvvqqlu2JJ54gJSVFa1gdPnwYgFGjRhEfH6/V1KVLF4KCgjzma+3bt481a9YQFxfHQw89RM+ePXnhhRe04xBr52slJydz22238eKLLzJhwgR++eUXzQE1s65CQ0OZNm0aW7Zs4cEHH6Rnz55s2LBBq6dVq1Y0btyYHj16sH//fo4ePcpnn32mNQUvBDVQ7szocRCiaKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU8aMMtYswiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwXQxnXmFpnZlTAX5mXhl4NWHB/ufta9MqlAA/8x8eCxjgZyahZch55y0pczJj6S7Sswu0a+2ia+ZNWXy9zjvjH9EQ3rUs6KphlZWVxV133eVxzWq1EhYW5rG7CSAzM5Px48eTlpaG0VjTnaudL1VLbGwsc+bMoby8HF9fX7KysoiJiWHGjBl8+umnFBQUkJCQQJMmTTT/wYMHcTqdhIaG8uSTT5KamoqXlxdNmjQBID09ncjISLKysggICKC0tJQPP/yQ0aNHExERQVRUlEfW/Px8vLy8GDVqFEeOHMHhcHDVVVdhNBq1HVNwar7WgAEDAJgyZQo7duz4w/la8+fPp6SkBIB//etfNGrUiNjYWK3+yspK3nnnHX799Vd27Nih5f79bCq3282iRYtwuWr+5bnlllt48MEHGTFixHm9N4VCoVAoFAqFQqFQKBQKhUKhUDQsft+sgppZU+Pm/Mx7/0mul/OVgVczbs4fN8Hqw4ylu8jIKfC4lpFTwPQluxjev2O9nAr9oquGld1ux2q11rkeFBSEzWbTfrbZbAwcOJCoqCgmT57MwoUL+fbbb3n77bcZNWqUts5qteJ2u7HZbPj6+mK32zl8+DBr165l5MiRxMXFMX/+fNatW0dgYKDmBnjvvffw8/Nj0qRJlJeX89prrwHw7rvvEhAQwK+//orDUbN98eqrr2bUqFFs3ryZqVOnemQvLi6mcePG/Prrr9x2223aDi+Xy0WLFi20dd26dcNoNLJnzx4ArrzySlavXk1ERIR2VCFAx44dSU1NJSIiAgCz2czWrVt5+umntTXl5eUsWLCAFi1a0LRpUw4ePEhmZiaDBw9m586dtG/fHqg5WtHlchEXF0fr1q1Zt24dM2bMoKCggNGjR+Pt7X2eb5D/n0kNlfs9eh2EKNIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zKjXmndknvjDnVBQ07TafbCQ9jGNztvbrHEAH4zoSUZOAQfzL+yYwaMnHR47q2pxuSE9u4AT9nKahp7/TrBaGsK7lg1dNazOlYULF+JwOJgyZQrBwcGkp6fz/fffs3DhQoYOHXrGY+3cbjcZGRkMHjxYO+6vc+fOXH311Rw5csRj7aFDh1i5cqW2K8lgMPD000/jdDoZNmwYbrcbk8lE27ZtefXVVwG49tprmTNnDqWlpR7feezYMW666Sa2bdvG2rVrufLKKyksLOTYsWPaui+++AIvLy+uueYavv/+e/bs2UOnTp345ZdfPOZ3LV++nKuvvlo7atDhcNCyZUu2b9+uuaxWK23btuWnn37yqOmjjz6ioKCAiRMnAnDixAkA9u7dy969p4bhffHFFwwZMoSoqKhzeyGnYTQaCAmxnPd9smC1XpzzVUX5RDhlzChjzSKceveJcKqMcvhEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinynj5+47kl7Bvdx4RjS1EhAXUy5FbcOjs33GylOTOLevlBkgKsZBU77tryDxWctbPHZWui/K7aD2/a9nQVcPKarVSXFxc57rNZiMoKEj7OSUlhW7duhEcHKzdV1VV0w0+fdaV3W7HYDBo95pMJqqqqujdu7fm8vb29tjBVbs2MjLS4wi92vlTHTt25K233mLixIl8/PHH9OnTxyPrddddx+rVq7VZV76+vjgcDl5//XWPGjp06EB+fr5HTbXzu+Li4vjPf/7DPffc4zG/69ChQ+Tk5PDss89it9t58cUXWbRoEcuXL+fNN9+ksrJS2xX1ySefcOTIEf7xj39gt9v54YcftGMNAcrKyqiurmbs2LHaMYQAd911F+np6eTn59erYeVyubHbS/98oWSYTPof4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mvJi+kjInUxfvZGfWqV1H7WNCefzO9lj8zm+eU0To2ZsqzRv5U1joqFdOuDh1+3sZzvq5xdt4yTOK9F0uWK1+57zrTFcNq5iYmDqzqoqLi8nPz/doHv1+1lXtZyEhIR73Z2VlERERga+vL4B27N/pLrfbTUlJCWVlZZSXl9OyZUsMBoO2tpbs7GwtD9Q0yYA6u7lqZ0tlZWURGRlJQEAATqfTo1lVXFxMRUWFR3PuXOZ31f4zOjqaX3/9VVsXGxuL0+nk0KFDxMbGAlBQUMDgwYOxWCzY7XbMZs9XXVlZidvtrnPsX+08sMzMTLp06UJ9UAPlzkxDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzXgzfB1/trDPPaVd2Ae9/tfO85zm1aRVKgJ/5D48FDPAzk9Ay5KLUfyF1hwX50S46lIycAlzuU9eNBmgTFUpjq+8lz/hX+GRCVw2r5ORkpk2b5jHLatWqVRiNRpKSTm0g/P2sq8TERAICarY+1u6UcjqdrFmzhuTkZG1dREQEO3bs4OjRo9ruoU2bNmlH+NlsNsLDw/Hx8fHY/QSwYsUK/P39cTqdAFxxxRUAZGRkaDu2bDYbaWlpHjnCwsI4fvx4nZoMBgPl5eVnrKmW03d/1f7z9+tqf6793OFwMGTIEJxOJ4MGDWLChAl/6PXz82Py5Mm8+eabFBYW8uqrr2oztE6fGXa+qBlWdWkI56GqjPrziXDKmFHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVGfGS+WT8Q8pzEPd2XMrK0eTasAPzNjHu56wb/jvVh1D+vXng9+t6usbXTNrjK9ZBTlkxFdNawGDBjAvHnzGDZsGEOHDiUvL48333yTAQMGeOxkqq6u5qOPPuLRRx8FwMfHh6FDh/K///2Pffv2sWnTJhYsWEBRURGDBw/W7rvyyitZvXo1Tz75JMOHD6esrIw333yTNm3akJGRoa0LCgri+PHjjBkzht69e7NlyxaWL19OQkKCtqZRo5qBc/PnzycqKorw8HCmT5+OxWLxmGEVGxvL7t2769TUsWNH0tPTtXVut5t9+/axatUqAO0/Oxx1tzSuX79eayytX7+ekydPenz+5JNPsmfPHl577TVycnIASE9Px2q10rFjR6BmFpbb7SY3N5fWrVtTWFjIW2+9pTXkDIazb7c8E2qG1dlpCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjBfqEzHPKSTEwoLxt5G29zh7fisgvlUoneKa/PmN58GF1h0SAq8Pu47c/BJyTzguaG7XmdDbu5YZXTWsgoKCmDNnDuPGjWPYsGFYLBbuvvtunnnmGY91tbOoTmfIkCFMmzaN/fv38+ijj5KQkMBHH31EixYttDUhISG43W5atGjB8OHDMZvN3HzzzcTHx7N7927t2L4mTZrQokULtm3bxhdffEFERATjx4/niy++0NbU/jMpKYlJkybhcDhITEzk9ddfZ8iQIdrnjRs3xmq1YjKZPGoymUwcOnRqsJ2XlxfLli1j2bJlAHz99dd8/fXXGI3GOt/5yiuvaPe99NJLHs8PauZ4ATz//PMezwdg7969QM2Mrnbt2rFjxw727dsH1Bxn+O9//5sPPviAsLCwc3pnv0fNsPpjTCb9n4eqMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZjxeVEZxeTVWXxNhwfVvZIic5xTbLJBOcU2w28suaCbU6Vzs5xjgY+LqhHBdZxTx5/tywGptoDOsoGZH0uzZs8+6pmPHjgQHB3tcKykpobS0lNdff51+/fr94X21s6ueeuopPvjgA+36xIkTPWZdxcTEsG/fPq15BDU7oP7v//5PO5qwZcuWeHl5cfXVVzN58mRt3bp16zy+KyYmhqKiIt59912POVZPPvmkxyytdu3aERwczPvvv69dKy4upkuXLh4ugPfff5+bbrpJWzdv3jzeeOMNrTlX25QC+Oqrr3jxxRfZtGkToaGhHs9w/vz5QM3uq7vuuouJEycSHR3NBx98wFVXXfWHz/BcUOdznpmGcB6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2WUwyfCqTLK4RPhVBnl8IlwyphRxppFOGXIWFLmZMbSXR7H+LWLDmXoHW2x+Hqdt++vmOckw3v5K5wiMspCgzxMMTk5mR9//BG73a5d+6NZV7+ndtbVypUrtWt/NOsqOTmZPXv2aMfpQc2sq6KiIq6//noAvL29ueaaa1i9erXHd6xYsYLY2FgiIyMB6NGjB0ajkTVr1mhrbDYbqampdb7zz2pq0aIFUVFR2rGBp39nt27d8Pb2PvNDOwu1zT8fHx/mz5/P1Vdf7dFMUygUCoVCoVAoFAqFQqFQKBQKxbkzY+kuMnIKPK5l5BQwfcmuejuH3tGWNlGhHtfaRNU0wRSKywHd7bDKzMxk/PjxpKWlYbFYuOOOO3j66ac9mjFnmnXVvn17+vfvT0FBAQkJCVRVVWG321m7di1Q05C57777mDFjBh999BHe3t4EBwfXmXXVq1cvJk2axO23347L5SIkJITKykp69uxJhw4dtHVDhgxh0KBBXHXVVbhcLpo0acKRI0d4++23tTVNmzbllltuYfTo0YwdOxY/Pz/8/f0JCAhgwIAB2rp+/foxbdo0unfvDtQcS1hYWOgxv6u4uJjAwECWLVvGihUr8PPzIyAggPz8fG23FMALL7zA4sWLPZ5rt27dABg0aBAvvvii9p83bdqkrfnPf/4DwJgxY+rx5k5xocPuLkcawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqJ+PRkw6PnVW1uNyQnl3ACXs5TUP9z9sbFODDc/cnkl9Uhv0iHDNYiyzvRbRTREbZ0FXDymazMXDgQKKiopg8eTJ5eXlMnDiR8vJyRo0apa37o1lXrVu35pdffuHZZ58lLi6O+fPn891333nMYnI6naxfv57Q0FCqq6spLi7m+PHjtGvXzmPW1a+//sqxY8eIiIjgxIkT2Gw2Kisr6d27t0feFStW4Ovri9Vq5eTJk+Tn5xMYGEiPHj08avrpp59o3LgxpaWllJWV4XA46N27N4GBgdq6999/H5fLRfPmzcnNzSU/P5+qqir69u2rramsrKRVq1bExsayadMmTp48SXV1NWaz2eOIxMcff5zU1FTy8/PrPOPS0lKqq6sxmUwYjUa8vLwwGAxUVlYSGxvL448/ru0iqw9Go+G8h/vJREMY4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwiXCqjHL4RDhVRjl8IpwyZpSxZhHOyz1j5rGSs37uqHRd0O9QRf3+9XJ/L3+VU0RGWdBVw2rhwoU4HA6mTJmiNWCqq6sZO3YsQ4cO1XYageesq4qKCrp3787gwYMZNGgQAJ07d+bWW2/1OHZv9erVHDhwgBUrVmhH3qWmpjJ48GB27Nih7Z6aOnUqV111FQsXLtTuHTFiBDNmzOAf//gHAMeOHeOLL75g9OjR9O/fH4CioiJuuOEGFi5cyJAhQ7SaSktLWb9+vVbTZ599xtixY3nuuecIDw8nLy+Pzz//nBdffJEHHngAqJmZ1bdvX6ZPn87UqVMBaNSoEZMmTfJ4Zg6HQzua8LHHHgNq5mulpqZqa7Zs2cKDDz4IwOeff05OTg7z5s2jSZMmREVFMW3aNP72t7/x1FNPceutt57nW/PE5XJjt5dekONyxGTS/wA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTJeHOfxojKKL3D3kr+X4ayfW7yNFBY66uUGOd+LrBkvB6xWv3PedaarhlVKSgrdunXz2C3Uu3dvRo8ezcaNG+nXr98f3rd9+3ZKSko8dkB5e3tz8803a8cB1vrj4uI85jMlJSURHBzMhg0b6NChA5WVlWzZsoWRI0d6fEefPn1Yvnw5hw8fJjIyktTUVFwul0eDJzg4mKSkJFJSUrSG1bnUtGfPHqqrqz3mbxkMBnr06MEnn3xCZWXlGedT+fv74+Pjg9PpPONzbdKkCQaDgQEDBnDXXXdhsYjdAaUGyp2ZhjDAT2XUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqkyyuET4VQZ5fCJcKqMcvhEOGXMKGPNIpx6zFhS5mTG0l0eR/m1i66ZD2Xx9TovV1iQH+2iQ8nIKcDlPnXdaKiZOdXY6ntR6pfhvYj2iXCKyCgLujpMMSsry6OZBGC1WgkLCyMrK+us9wF17o2NjSU3N5fy8vIz+g0GA82aNWPBggV07NiR6667DqfTScuWLeu4Tv+urKwsGjVqxGeffabNturfvz8BAQEeWbOysggPD+fJJ5+kU6dOdO3alTfeeIPGjRtr6yorKwHYtm0bffv2pX379vTq1YusrCwqKys5fPiw5qusrGTixIl0796dq666ihtuuAFA2/kFsHLlSv7973+TnJxMx44deeCBB3C73QwePJj27dt7PIPffvuN22+/HaiZYdWzZ88zPmeFQqFQKBQKhUKhUCgUCoVCobgcmbF0Fxk5BR7XMnIKmL5kV718Q+9oS5uoUI9rbaJqGmAKheKP0dUOK7vdjtVqrXM9KCgIm8121vu8vb3x8fHxuG61WnG73dhsNnx9fbHb7R5zo6BmxlRmZia+vr5MnjyZrVu3MmPGDL7++mtuvPFGD1ft+trvrK6u5r333mPkyJHa3KxvvvkGl+tU99Rut7NmzRpCQ0OZNGkS5eXlvPHGGzgcDs3VqlUrAF555RX++c9/8tJLL7F582btKMDTax8/fjyLFy/Wmly1c7NO38E1e/ZsmjdvzgsvvEBISAgjRowAYMmSJTzxxBPauuDgYPr27YuPjw/z58/HaDRy9OhRpkyZ4rHufDGbddUH1QUNYYCfyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnr7zx60uGxs6oWlxvSsws4YS+naaj/eTmDAnx47v5E8ovKsF/gEYO/R5b3ItInwikio2zoqmF1KVi4cCFVVVXEx8dz3XXX4e/vz4wZM1i7di15eXkec7NOp7q6mqKiIh599FGPuVlJSUmUlJwaqudyuThx4gTz5s3TdjZZrVYGDx7MyZMnAWjdujVWq5Xy8nLuvPNOoqKi2LVrFwaDAbfbjcFQc+Zp7dysZ555hmuvvZb8/Hw+/fRTfvjhB959911efvlloGYGV2hoKACZmZmcPHmSTp068fHHH/P4449jNNb8C1NUVMTixYs9sgLMnDmz3g0ro9EgbOjf5UBDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRzovpe3vBNtIzT9Lhisb8Z0BivRyZx0rO+rmj0lXv33mK/F2pnt+LKKesGWVBVw0rq9VKcXFxnes2m42goKCz3ldZWUlFRYXHLiu73Y7BYNDutVqtHs0kqJkxFRgYSOPGjQG0tS6Xy2Nult1u9/i8rKwMt9tdZ25WVFQU6enp2jWz2UxAQECduVkGg4HCwkKg5pi/0tJSGjduzIABAwBo3rw5t912G8uXL6e6uhpAm5v1z3/+U8vRs2dPunbtytKlS7WGVW2zCmDZsmWYzWb+9re/kZaWRmlpKQEBAQBMnDiRxx9/nL59+7Jw4UJmz57NDz/8wIkTJygvL8fX1/eMz/xMuFxu7PbS877vcsdk0v8AP5VRfz4RThkzylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYswiljRhlrFuGUMaOMNYtw6t0nwilbxtQdR5ixdLf287c/HeLbnw7x2B1t6N4+4rxc/l6Gs35u8TZSWOioV07Z3osop6wZLwesVr9z3nWmq4ZVTExMnVlVxcXF5Ofn15k99fv7ALKzs4mPj9euZ2VlERERoTVeYmJi2Ldvn8e9mZmZlJeXa46WLVvi5eWFj49PnVlUp3+X2Vzz6GobXbVUVVXhcrm0ho/ZbMbLy3MoX0lJCW63m6qqKgAOHjxIVVUV48aNIyYmhvLycqKjo/nf//4HgMPh0DI0atTIo3lnNBpp3rw5+/fv/8Nn880339CtWzf27NlDeHi41qyq5bXXXuOOO+7weG4Xihood2YawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPhlDGjjDWLcF4M3+nNqtOZtiSDrglNz8sVFuRHu+hQMnIKcLlPXTcaauZONbb6XnBeWd6LaKesGWVBVw2r5ORkpk2b5jHLatWqVRiNRpKSks54X2JiIgEBAaxcuVJrvDidTlasWAFAx44dsVgstGvXjj179pCTk0NUVBRQs3vL5XJx/fXXAzW7pLp27cqWLVv45JNPmDt3LgkJCQQEBBAbG0tkZCQATZo0AeDRRx8lJycHLy8vrr/+eg4cOKB5fX198fb25sSJE9x2220cPHiQiIgIOnXqpH1X7VqomTG1efNmHA4HHTp04NChQx6f2+12fH19eeihh0hLS8NisXD77bdz4sQJj7lZZWVlfPDBByxevJj8/HxsNht2u53nnntOW1NZWcnw4cP54Ycf8PLyYuHChfTu3Zvi4mKuvPLKeu2uqkXNsKpLQzgPVWXUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRThlzChjzSKceveJcMqUcfqSnWf9/OMVGQzp2+68nMP6teeDxTvZmVWgXWsbHcrjd7a/oN93yvReRDplzSgbumpYDRgwgHnz5jFs2DCGDh1KXl4eb775JgMGDPCYJTVw4EByc3NZu3YtAD4+PgwdOpTJkycTGhpK69atmTt3LsePH6ddu3a89tpr5OXlMWHCBKxWK08++STDhw+nrKwMl8tFTEwMHTp00PyRkZFs3LiRoKAgHnnkERYtWsQvv/zCK6+8oq3x968Zsrdnzx7uv/9+/P39mTVrFm73aS14wMvLi+rqamw2G0899RTbtm1j8eLFhISEeBxfCLBmzRoeeOABXC4XCxcupLKy0uPzffv2ceTIEby9vXniiSc4ePAgc+fOxeVyaXOpAF599VXWrFlDmzZtKCoqwul04na7KSoq0tbs3buXtWvX0qhRI5o1a0Z6ejpbt26loqKC//znP/V8g2qG1Z/REM5DVRn15xPhlDGjjDWLcMqYUcaaRThlzChjzSKceveJcKqMcvhEOFVGOXwinCqjHD4RThkzylizCOeF+vYftp/1872HbOf9O8qQEHh92HXk5peQe8JBRGMLEWEBf37jOSLDe/krnLJmlAVdNayCgoKYM2cO48aNY9iwYVgsFu6++26eeeYZj3Uul4vauU61DBkyBLfbzaxZsygoKCA0NBRvb28+/PBDgoODAaiurmbMmDFcddVVDB8+HLPZjLe3N8nJyZqnoqKCb775Bm9vb9xuN//73/9o1qwZwcHB2u4pgCNHjgDQt29fVqxYgcPh4MorryQjI8NjblZZWRmBgYFcccUVTJkyBYvFQkxMDEeOHNHWOJ1OoGbH1dy5cwkODua2225j2bJlVFdXa+vKy8txu90UFhbyzjvvEBYWRlxcHBkZGR6zt1auXMlDDz3EZ599ho+PDxEREVx55ZWsWLGC4cOHA7Bu3Tr8/PwwmUzs3r1bu7dZs2Zcc801VFZWajvAzgc1w+qPMZn0fx6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhbAgZjxeVUVxejdXXRFhw/X+pf2WkleOFZWf8PK5FUL1nTgX4mLg6IRy7vazejtNpCO9FZdRvxssBq7WBzrACiI2NZfbs2WddM2/evDrXDAYDQ4cOZejQoQDcf//9tG3bVmtWAfTu3ZvRo0fTu3dvZs6cqa07fPiwtmb79u2UlJRgMBh49tln6devHwATJkzQdnQBHDt2DICHHnqIN954AwC3281VV12Ft7c3vr6+VFZW4nA4aNKkiUdN3377LcOGDdPmX+Xk5AA1xwvW5gc4cOAAv/zyizY3q6KiAoDVq1drDSq73U6XLl20n2tnY/n7+9OqVSuOHj3Khx9+yMyZMz12fx09epSysjLKyk79F3thYSEAXbp0YcyYMdx7771neQtnRp3PeWYawnmoKqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWNGGWu+GM6SMiczlu4iPbtAu9YuOpShd7TF4ut13r7Bt7Vl4868M37+UJ82ups5pcf3ItonwilrRlnQXcPqYpGVlcVdd93lcc1qtRIWFkZWVpZ2LTk5malTp/LAAw+wc+dO7Wi938/Nio2NZc6cOZSXl+Pr64vNZsNsNvP6669z8OBBCgoKiI+Pp7q6mpCQEAAOHjyI2+3m+PHjPPzww6SlpeHl5cWVV14JoM3DOnjwIF5eXixZsoRvvvmG7OxsIiIiKC0txWQyaeuKioowGAw888wz7N27F4fDQfv27QEICwsDwGQyERQUxFtvveVRYy3ffPMNt912G0OGDGH37t3s2bOnzrObOHEi3bt3r+eTVygUCoVCoVAoFAqFQqFQKBSKU8xYuouMnAKPaxk5BUxfsovh/TvWy/lo3wRmLN39h9cVCkXD5LJtWNntdqxWa53rQUFB2Gw27ec+ffrwzjvvsHv3bh577DFWr15NRkYGsbGxHnOz5s6di9vtxmaz4evrS0lJCY0bN2bLli3cdNNNJCYmMnv2bKqqqrTvrf0es9nMTz/9xCOPPEJJSQmffPIJAKGhoR5ZMzMziY2NZfjw4axdu5acnByP2VQlJSWEhISwceNG+vXrR6tWrbSdYk2aNNHWJSYmsmbNGo+6mzdvzrFjx+jcuTNQ04CzWq0kJiZyzTXXMHXqVLp168ahQ4fo06dPnfla58OFDCG8XGkIA/xURv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinjBllrFmEU8aMMtYswql3nwinXjMePenw2FlVi8sN6dkFnLCX0zTU/7y9PTo0p0eH5ny0LJ3dB20ktAxi8O3t6p2zFlnei0ifCKesGWXjsm1YnSsrVqzA29ub+Ph4pk6ditFoxGAwcODAAfLy8rSm1enH6dX+nJ+fT9euXUlPTyclJYX4+HgKCws5ceKEx1qn00lSUhKzZ8/GbDbTpUsXtmzZwsGDB7U1paWlxMTEYDKZmDRpEhERETRr1oyjR496fGdBQQHdu3dnw4YNOBwOOnTowNatWz1cP/74Y506a2duVVVVeVy3Wq20bNkSgJCQEE6cOHFBzSqj0XDeAw1loiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRn35tu89zt7NvxHfKpROcU3+/IbzQK81i3TKmFHGmkU49e4T4dRbxsxjJWf93FHpuqDfJ4588Jp633s2Lvf38lf4RDhlzSgLl23Dymq1UlxcXOe6zWbT5j0BpKSkkJSUxAcffADA/PnzefXVV3G73dpOJoCBAwcyatQo7V6TyUR1dTUvvfQSCQmntpled9112s6q2rWRkZHMmjVLW5Odnc2tt95KdnY2AAEBAZSVldG/f38GDRqkrXvqqac4evQohw8fJjIyEl9fXxwOB++8845HDR06dCA/Px+Affv2UVJSwtSpU7nxxhuBmrlc9957L2az2WOm1+955ZVXtF1f9cXlcmO3l16Q43LEZNL/AD+VUX8+EU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8a8glLGfryVkrJTf2E0wM/MmIe70iTk/HcQiMgoyifCKWNGGWsW4dS7T4RTrxn9vQxn/dzibaSw0FEvN+j/Oer1vYj0iXDKmvFywGr1O+ddZ5dtwyomJsZjVhVAcXEx+fn5xMTEaNd+P+uq9rOQkBCP+7OysoiIiMDX1xeAwMBAj/VQswOqpKSEsrIyysvLadmyJQaDQVtbS22jqrahVnuE4OlHEELNEYC13x0ZGUlAQABOp9OjWVVcXExFRYXmOnDgAIBHE2358uX4+PhQUVFBXl4eAQEB2mdbt25l48aNAPz73/9m5MiRdOnSpe4DPQ/UQLkz0xAG+KmM+vOJcMqYUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRjl8Ipwqoz58v29WAZSUVTFm1lbe+0/yGe46P/RW81/hlDGjjDWLcOrdJ8Kpt4xhQX60iw4lI6cA12mHWBkN0CYqlMZW34uSV+/PUW/v5a/wiXDKmlEWGmzDKjMzk/Hjx5OWlobFYuGOO+7g6aefxtvbG4Dk5GSmTZvmMctq1apVGI1GcnJy6NmzJwUFBVRUVGiNIaiZ/xQQEIDL5WLFihXMnz8fs9lMVVUVffr00dZFRESwY8cOFi9ezKeffkp2djYhISGUltbsLrLZbISHh+Pj40N+fj5vvPEGS5cuxeFwYLFY8PX1xel0AnDFFVcANUf5ff7556SlpeHn54fdbtdcAGFhYRw/fpzHHnuMjIwM8vLy6NWrFwaDgfLycqBmVhXACy+8QG5uLnl5eVqTq7KykoiICK0Gt9ut5QX45Zdf+Ne//sXgwYN57rnn6v1u1AyrujSE81BVRv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinLBl3ZJ6o06yqpaSsit0HC2kf06jefj3WLNopY0YZaxbh1LtPhFPPGYf1a88Hi3eyM6tAu9Y2OpTH72x/wb9L1Ptz1PN7EeUT4ZQ1o2wY3L8fztQAsNls3HbbbURFRTF06FDy8vKYOHEiffv2ZdSoUR5roqOjPdbExMSwa9cuRo4cSVxcnHYE37fffkuLFi0AmDp1Ku+88w6+vr4MHTqU9evXs3PnTrp27crcuXMBmDJlCpMnTwZqjgFs3749c+fO1ZpfKSkphIeHk5ycTF5eHl5eXjz44IMcP36cZcuWYTab6dy5M3PnzmXbtm3cd999GAwGWrVqxZ133snXX39NTk4Obrebt956i9tvv53nnnuOpUuX4ufnR5cuXdiwYQM+Pj60adOG9PR00tPTqa6uJjk5maKiIvr06YOPjw+LFi0Cahpeqamp2nMcOXIkq1at4sYbb2T16tUMHjyYRYsWER0dzeeff16vd+N2uzEYzr7NV6FQKBQKhUKhUCgUisuRBWv28OnqvWf8/L5ecdx7S/xfmEihUCj0RW5+CbknHEQ0thARFvDnNygUCqlokDusFi5ciMPhYMqUKdpMpurqasaOHcvQoUMJDw8nKCiIOXPmMG7cOIYNG4bFYuHOO+/kiy++4OGHH9YaVWazmerqaj766CPGjBkD1MycAjAYDEydOpWEhAReeOEFJkyYwI4dO+jQoQMhISFAzZyqbdu28euvv9KrVy92795NRkaGdmxfSEgIeXl5hIaGMnfuXCIiIvjvf//LhAkTKCoq0hwARqMRm83G1KlTSUxMpEePHsybN4/anmLjxo0JCgoiISGBLVu2ADXzqzp27MihQ4cAMJlMzJ49mzlz5vDjjz9y7NgxjEYjN954I99++y3p6em0a9cOgO+//x6n08nq1asB+Oijj4CaOVj1Rc2w+mNMJv2fh6oy6s8nwtkQMu7KLuDQCQctwyy0iQrVXT4RTpVRnxllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpS8aI0LMPWW/eyP+yntMiwiljRhlrFuHUu0+EsyFkDPAxcXVCOHZ72QX99+Hp6P05NoT3ojLqN+PlwGU/wyolJYVu3bppzSqA3r17M3r0aDZu3Ei/fv0AiI2NZfbs2dqaTZs2MWfOHHr37q1d69ixI8ePHyclJUW7tm7dOgBGjRqludxuN1OnTmXDhg106NCBli1bAnDXXXfx/PPPa/c+/vjjZGRkcOLECSIjI7WZV8uWLdMaU263mzfffFObO1U76yomJobly5drruXLlzNv3jzy8/OBmnlZNpuNd999l6CgIOLi4rjhhhv45ZdfPGZpXXnllYwfP57y8nK6detG3759efDBB/n22285fvy4tm7gwIHMmjWLtLQ07drYsWO1BlZ9UedznpmGcB6qyqg/nwinHjPmFZby2tyf6wynfmXg1YQFX9hw6ouR769wqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCOflnrFNq1AC/Mx/eCxggJ+ZhJYhUsxpEeGUMaOMNYtw6t0nwqkyyuET4VQZ9emTiQZ5mGJWVpZHgwbAarUSFhZGVlbWWe8DPO5NTk7m2LFjHDlyRJsD9euvv2IwGEhKStLWGQwGoqOjNUejRjVnTh87dkxb43Q6SU9P9/iu2qZaYWGhtm7Tpk04nU4cjpq/ReDt7Y3JZNJmVtXy/fffYzKZtHt79OiB0WhkzZo12pry8nJSU1NJTk6uU++6desoLS3l9ttvZ9u2bXVqr73/2muvpU2bNtx666188803tG/f/ozPUKFQKETx+2YV1JzzP27Oz5cokUKhUCgUCoVCcf68MvBqAvw8/35w7V/EUigUCoVCoVCcmQa5w8put2O1WutcDwoKwmazaT9nZmYyfvx40tLSsFgstGrVCm9vb3x8fLQ1AwYM4MMPP6SyspK1a9dSWVnJ0aNHCQ0NpX///hQUFJCQkEBVVRUHDhzA37/mb/mXlZUBsHLlStauXYu3tzfBwcHarqnaHI0aNcJsNnP77bfjcrkICQmhsrKSmJgY7Rg/qNl1lZeXx1VXXYXL5aJJkyYcOXKE8PBwzdW0aVNuueUWRo8ezdixYwH4+OOPCQgIYMCAAZprxYoVrFy5kg0bNgDw448/smjRIv72t78RFRWlrZkyZQpwqpmWnZ0NoM3yqi8XOijxcqQhDPBTGfXnE+HUa0aRw6n1WrNInwinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8ZmjQP4YERPMnIKOJh/8Y66Pj2b3moW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyppRNhpkw+pcsNlsDBw4kKioKCZPnkxeXh5jx46lqsrzF6JBQUEMGzaMCRMm8N///peAgAC8vLwoKCjghRdeIC4ujvnz5/Pdd99hNp96XLUeq9WK0WikuLiY48ePEx8fz86dO7V1J0+epKqqisjISE6cOIHNZqOyspKkpCSPhpXL5cJsNhMUFMTJkyfJz88nMDBQa5DV1vTTTz/RuHFjSktLcTqdlJSUcP311xMYGKitW7VqFTk5OTidTgDtmMHaGV1waoeYxWKhvLwcX19fWrduTXV1NV9//TXPP/88Xl5e5/3cjUYDISGW875PFqzWs59nfql9IpwyZpSx5gt15hYcOuvnR06Wkty5Zb39oL+a/wqfCKeMGWWsWYRTxowy1izCqXefCKfKKIdPhFNl1JcvKcRC0p8vqxd6rVmkU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPKarVqO5lOx2azaXOiFi5ciMPhYMqUKdqxfJs2bWL58uUcPHhQm0EF4O/vj8FgYOvWrRgMBjp27EhMTAyDBg0CoHPnztx6661UVFRo/tqm1LPPPss999wDQGpqKoMHDwbQ1u3fvx+z2cx3332nfd+IESNITU3V1hw7dgy32023bt348MMPASgqKuKGG27g+PHjHjWVlpayfv16goODiYuL4+abb2blypU899xzhIeHA/DOO+9gNBpxu93Ex8dz9OhRFi1aRJMmTbQMzZs3B+D111/n1ltv1a6vXLmSp59+moMHDxIbG3t+LwZwudzY7aXnfd/ljsmk/wF+MmY8XlRGcXk1Vl8TYcEX/j8kyzZms+dgEW1aBXNb9+gL9oE870XkcGq91izSJ8IpY0YZaxbhlDGjjDWLcOrdJ8KpMuozo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpyyZrwcsFr9znnXWYNsWMXExNSZVVVcXEx+fr42oyklJYVu3bppzSqAPn36sHz5cpYtW8awYcO061lZWURERODr68umTZtwuVy4XKf+QHl7e3PTTTcxb948zb97924ASktPNWeSkpKwWCw4HA5iYmKorKzk2LFjVFdXezTTanNcddVVQE2jC8BoPPXSgoOD6dq1K99///1Za4qPj2ft2rVs3LiRfv36eXjeeOMNAO68807i4+PP6xlfCGqg3JlpCAP8ZMhYUuZkxtJdpGcXaNfaRYcy9I62WHzPf2dhRk4Bby38Rft5Z+ZJPluXyXP3dSS+5cU5+uNyfy9/xXBqvdX8V/hEOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpa0ZZaJCHKSYnJ/Pjjz9it9u1a6tWrcJoNJKUVLPhPisrS2v01NKjRw8MBgMpKSnaNafTyZo1a0hOTtbuA8jJySEnJ0dbZ7PZcLlczJgxg6SkJDZv3kyjRo1YvXq1tsZgMODt7U1AQACRkZEcPHiQ6upqDAYDN998Mx06dKB///4UFRUBcMUVV2jf6efnR2pqKh07dqRr167897//1Y70O70mk8lE3759ad++PVCzgyssLMyjgVdZWcl9993Hxx9/DMDmzZvrNPi2bNkCwNNPP01cXBwJCQn8/e9/58svv8RqtXrsQFMoLjdmLN1FRk6Bx7WMnAKmL9lVL9/pzarTefPTP76u+GPUcGqFQqFQKBQKhUKhUCgUCoVCXhrkDqsBAwYwb948hg0bxtChQ8nLy+PNN99kwIAB2rF4drud1atXs3r1atauXQuAj48PoaGh7Nixgzlz5tC6dWsWLFhAUVGRdpSf3W7Hy8uL6OhonnzySYYPH87JkydZsmQJUHOEXkVFBS+//DIRERH88ssvjBkzht69e7NlyxYKCwuJi4sDappctVRUVPCvf/2LtLQ0Xn75ZQA6duwI1Bz/V9ucioyM5MYbb2TBggWUlJRgNBq1mmw2G2vWrKF79+7cfPPNTJkyhZUrVxIcHMyePXu07xoyZAjbtm2jY8eO/PLLL5SWlnLffffxv//9j/j4eEJDQyktLcXHx4dOnToRFRVFRkYGO3bsYP/+/fWeX1WL2dwg+6BCaQgD/GTJePSkw2NnVS0uN6RnF3DCXk7TUP8/uPOPWfJD1lk/X7n5N27vUf/jAWV5LyBuOLWeaxblE+GUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mlLFmEU5ZM8qGwe12uy91iPqQmZnJuHHjSEtLw2KxcMcdd/DMM8/g7e0NQNu2bWnatClut5t169Zp99122234+fmRn59PQUEBCQkJvPjii3Tq1AmAqVOn8sEHH/Dtt98yfvx4UlNTqa6uxul04nK5SElJITw8nG7dulFYWMhrr73G7Nmzyc7O1o4VbNy4MbNmzWLz5s0MHDiQ22+/nbCwMJYsWYLD4cDlclFZWclbb73F7bffzoMPPsiWLVuYPn06s2fPJi0tDW9vb+x2O0ajUTt+MD4+HovFQklJSZ3n4e/vT1paGseOHaNnz56c6bVOmDBBOzrw6aefZseOHZw4cQKDwYDFYuHkyZOMGTOGe++9t17vxe12YzAY6nWvQvFX8PPuPMZ+uPmMn49+5FquTgg/Z99LH6SyM/PkGT9vH9uI1x/vcV4ZFQqFPliz5Td2HsjnqivDuKlrq0sdR6FQKBQKhUKhUCgUCoXisqZB7rACiI2NZfbs2Wf83Gq10qdPH0aMGOFx3W63c8MNNzBy5Mgz3ldZWUlwcDCTJ08G4P7776ekpIS9e/dqc6iaNm1KQUEBBoOBZcuWafcPGDBAW3P48GEAEhMTue+++3j++ecBePHFF/nqq6+0dSdPnsRkMtGzZ0969uwJ1DR+rrrqKkwmE1BzzJ/b7aZ9+/YedX/33Xc8/vjj9O3bFzg1D2vr1q0EBQURFxfHc889R1paGjabTWtWAbzzzjseta9evZqnnnpKa5DVB5fLjd1e+ucLJcNk0v8AP1ky+nudvaFq8TZSWOg4Z1/ryKCzNqziWwSfl+/3yPJeGpJPhFNl1FfG7Fwbr87+iVrF99uPMGXRL4x+qCtRzay6yCjKJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxppFOGXNeDlgtfqd866zBtuw+jNiYmLqzG0qLi4mPz+/zmyr398HkJ2dTXx8PFAzO6p58+baDiqAK6+8kn379nl8h9vtJjs7W5s55XDU/JK6rKzM4zv8/PwAaN68ufZ5dXU1NptNa2IZDAZ8fX21htXBgwf/0NW0aVMALVdWVhaNGjXSPLXExsbyxRdf1Km3urqaqqoqDhw4wPvvv4/JZCIwMPCMz+dcUAPlzkxDGOB3uWcMC/KjXXQoGTkFuE7biGg0QJuoUBpbfc/LfVu3KL7ccOZjAXtf2+qi1H+5v5eG6BPhVBn14Tu9WaU5XTD2463MfO7GC0z3/306q/mvcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCKesGWXhsm1YJScnM23aNOx2O1Zrzd+GXrVqFUajUWso1ZKZmcn48eNJS0vD398fLy8vli9frjWsbDYbVVVV3HbbbR7+JUuWsGHDBpYvX05BQQGRkZEUFRVx/fXXA1BaWorBYGDVqlX88ssvpKam4uXlpc2HCggI0HwGg4F3332Xn3/+mezsbMLDw7Hb7bRs2VLLALBnzx5effVVVq9ejcPh0OZbtWjRAqjZQebr68uoUaP49ddfAZg1axYPP/ywx0wtqGl+JSUlaY01b29vqquradOmzQU9ezXDqi4N4TxUmTIO69eeDxbvZGdWgXatbXQoj9/Zvl5/fl/8VyITPtn+h9cv9N8Hmd5LQ/GJcKqM+sn4fdrhOs2qWqpd8GP6UZI7Nq+3X481i3bKmFHGmkU49e4T4VQZ9ZlRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRTlkzysZl27AaMGAA8+bNY9iwYQwdOpS8vDzefPNNBgwYoDV5oOa4v+3bt9O5c2cmT55MXl4eY8eO5aOPPiIsLIzWrVvjcrkoLy9n8ODB2n29evXiueeeY9++fdx99900adKEWbNmYTKZCAkJ0daZTCZ27NjBb7/9xtChQ9m1axdr1qzxyGo2m2nWrBnz58+nS5cuPPXUU3z66ae43W6tuVWLy+Vi4cKFPPDAAwDMmzcPAB8fH21NWVkZq1evplWrmnkbFRUV7N27F5fr1G/gJk6cSGpqKk6nk169enHixAm2bdsGQEZGhkdz7nwwGg2EhFjqda8MWK1+uvaJcOoxY0gIvD7sOnLzS8g94SCisYWIsIA/v/EMdA+xsKxTCxZ9t4+0vcfpFNeEe/7W+oIy/h4Z3ktD84lwqoyX3pd1tPisnx/ItXPHDRf+77eeav6rnDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOlVEOnwinjBllrFmEU8aMMtYswilrRlm4bBtWQUFBzJkzh3HjxjFs2DAsFgt33303zzzzjMe6Y8eO4Xa7mTJlCsHBwQBUVVUxZswYZs6cic1mw2g00rt3b20XE9Q0jqqrqwkKCmLFihWYzWZ69+7N5s2b+eijjxgzZgxWq5WqqioAQkJCmDJlChEREXTr1o1NmzZx8OBBwsPDsVqt5Obm0qRJE7Kysti5cyeJiYmUlJRw7NgxrR4Ap9NJdHQ0CxcuxGKx0KdPH5YtW0ZaWhr33HMPVquV0tJSysrKKCoqAmqOQlyyZAlG46nObkxMDLNnz8bLy4vvvvuO8PBwbr/9dpYtW8bixYt59tln6/Xc1QyrP8Zk0v95qDJmDPAxcXVCOHZ72QXNmaqlV5cW3PO31hfNB3K+F737RDhVRv1kjGkWyPdn+fyKCKuaS3eJfSKceveJcMqYUcaaRThlzChjzSKcMmaUsWYRThkzylizCKfefSKcIjIeLyqjuLwaq6+JsOAL/yW8jM9RxppFOGXNeDlgtaoZVkDN3KbZs2efdU3Tpk2Ji4vTmlUAffr0YcyYMQwfPpx+/fpx//33a8fm1ZKamgrAgw8+yBNPPKFdnzBhAmvXrgVOzcOKiopi9erVHms2b97M5s2b6dKlC1FRUezcuZMXX3yRQYMGATXzsDp37ozD4eDw4cO0bNkSk8lEdXU1Cxcu1BpY69atY9myZRw4cED7zvLycrZu3UpQUBAvvPAC6enpREdHaw0sgLvuuotXX32VESNGeHznN998g9PpPLcHfAbU+ZxnpiGch6oy6s8nwiljRhlrFuG83DP2aB/BnJV7/vBYQJMRurdrpubS6cQnwql3nwinjBllrFmEU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwnkxfCVlTmYs3UV6doF2rV10KEPvaIvF1+ssd54bsjxHkT4RTpVRnz6Z0F3D6vR5UhaLhTvuuIOnn34ab2/vs97ndruZOXMmn376KQUFBSQkJPDiiy/SsWNHj3V5eXmMHz9emydVXl7OgAEDPNZYrVasViv/93//x+jRo/H396e0tNRjHtaKFSsAOHr0qDYHqlOnTlx99dXk5uZSXl5OYmIiRqOR6upqHnroIW1GVkVFBaGhoWRlZQEQHx/PsmXL2L17N7169dJ2W9U2ybKysoiMjKRp06bk5eXxxhtv8O233+J0OgkKCiIoKIgjR44A0KNHDwC6du3qUdP+/fu56aabtJ9NJhP9+vXjk08+ITExkc2bNzNp0iQAbrjhhnN+XwqFQqFQXG68PPBqxs/52aNpZTLWXFcoFAqFQqFQKBQKGZixdBcZOQUe1zJyCpi+ZBfD+3e8NKEUCsVlj64aVjabjYEDBxIVFaXNk5o4cSLl5eWMGjXqrPfOnDmT9957j5EjRxIXF8f8+fN5+OGHWbJkiXaUn9Pp5JFHHgFg0qRJlJeX88wzz5CSksJLL72kuX7++WdsNhsxMTG8/fbbbNiwgVmzZnHvvffy4osvkpeXp+2iWr16NS+88ALh4eE888wzbN68Gbfbjc1mIzw8nMDAQA4dOgTA448/zqpVq9i1axf+/v7YbDYA2rdvD8DXX39N79696dOnj8fOsNp1rVu35siRI6xYsYKHH36Y7OxsVqxYgdVq1XZPNW3alDZt2rBr1y4ee+wxNm7cyN69ewkICOC5557TnE888QRt2rShZcuW3HPPPdp1Ly8vxo8ff97v7nTMZjVU7vc0hAF+KqP+fCKcMmaUsWYRTpkyxjYP5uOXbiJ1Ry77DttoHRlEjw4RFyOibmsW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwXizf0ZMOj51VtbjckJ5dwAl7OU1D/S9pRpFOvftEOFVG/WaUDYPb7XZf6hC1TJ8+nWnTprF+/XrtiL7PPvuMsWPHsn79esLDw//wvoqKCrp3787999/P8OHDAaisrOTWW28lOTmZMWPGALB8+XJGjhzJihUrtOP62rRpQ3V1NYsWLaJDhw4ADB48mJ9//pm+ffsybtw4AB599FG2bt2K2+3GYrEQERHBzp07efXVV+nfvz8A9957L9u3bwcgJSWF8PBwunbtSnFxMY0bN6aoqIiEhAS6dOnChx9+yNVXX838+fPZtm0b9913H40aNaKsrAyz2czNN9/M7t27ycjI4K233uL222/n3//+N+vWrSMyMpK8vDwiIiK48847eeeddzCZTGRkZADwzjvvMHPmTIKCgigqKsLb25svv/yS2NhY7ZnNmDGDjz/+mMLCQgwGA76+vpjNZoqLixk5cqTW2Dtf3G43BoOhXvcqFAqFQqFQKBQKhUKhUCgUsrB973H2/lZAfKtQOsU1udRxNH7encfYDzef8fPRj1zL1Ql//HtahUKhuBB0tcMqJSWFbt26ecyT6t27N6NHj2bjxo3069fvD+/bvn07JSUl9O7dW7vm7e3NzTffrO2EqvXHxcVpzSqAoKAgHA4HGzZsoEOHDlRWVrJlyxZ8fHy0OVEA/fv3Z8OGDXz33XdERkYycuRIdu7c6XF83oIFC/jHP/7B7t27tXudTidhYWGkpKRo6+x2Ox9++CGVlZUAlJWVATBgwACeeuopbd0777xDRkYGFosFgIKCAgwGA99++61HU+iTTz6huLhY+9lkMuHt7c2PP/6ozbA6vVkF0LNnTyZNmsSIESOYNm0aq1atYsSIEeTn5/Puu+8yYMAAAgIC/vB5nw2Xy43dXnre913umEz6H+CnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU49+/IKShn78VZKyqq0awF+ZsY83JUmIfXbuXQxM/p7nf0vo1u8jRQWOurlVn925KhZhFPWjJcDVqvfOe8601XDKisri7vuusvjmtVqJSwsTJv3dKb7AI9GFEBsbCxz5syhvLwcX19fsrKy6qyJiYlh//79muPgwYM4nU6qqqo81tY2fGrnSVVV1fwPSkFBAU2anPobEF5eXhiNRnx9fQGoqqrS1tYSGBiIwWDAbK55/OXl5UBNc+t0vLxqBhie7nK73djtdo9mmtls1ly1lJeXc+2111JUVITZbObzzz/nn//8p/b5gQMHgJpjCB977DGthoCAACorK8nLy6tXw6omp/qX8Uw0hAF+KqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWPGi+H7fbMKoKSsijGztvLef5IvyA0XnjEsyI920aFk5BTgOu1sLqMB2kSF0tjqe8HPQP3Z0adTZdSnTyZ01bCy2+1YrdY614OCgrQ5TrVkZmYyfvx40tLSMBgMmEymOkfRWa1WbZ6Ur68vdrudwMBAZsyYwaeffkpBQQGhoaEUFxdz8uRJ4NS8KIPBwMqVKxk3bhxeXl4kJyd7fO7n54fBYGDq1KlkZ2eTnZ1Ns2bNOH78uEeGqqoqTp48yUsvvcSGDRtwOBxER0fjdrsJCQkBoLS0ZkfS999/T3p6OmlpaVgsFnx8fIBTDavanwcNGsTJkyfJy8vjqaee4sSJEwQGBmrf2bJlS0aOHEmbNm2YPn06W7du5ZVXXuG7775j+vTpADRv3hyA48ePs3TpUj744AOqqqrw8fHBYDAQEVH/WR1qhlVdGsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhFPGjDLWLMIpY0YZaxbh1KtvR+aJOs2qWkrKqth9sJD2MY3q5b6YNQ/r154PFu9kZ1aBdq1tdCiP39n+gn73p/7sXBxUxotDQ8goG7pqWJ0rNpuNgQMHEhUVxeTJk1m4cCHffvstEydOZNSoUWe9d/fu3Xz11VeMHDmSuLg4Zs+ezdGjR9m1axepqals2bIFAH9/f44dO8akSZMoLy/nueee8/CYTCYCAgJYtWoVnTp14plnnmHRokWUlZVhNJ76A1k7H2rx4sXcf//9+Pv78/HHHwPUac7t27ePwsJChg0bxrZt21i/fr3H5z4+Pvj4+LBnzx46d+5MXl4eixcvxsvLy6Nhdcstt7BhwwZsNhsnT56kdkxZamoqeXl5hIeH06xZMwwGA8XFxXTo0IE777yTmTNnUlhYSKNGjfDz8zvPt1KD0WggJMRSr3tlwGqt33P9q3winDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOGTPKWLMIp4wZZaxZhFPGjDLWLMIpY0YZaxbh1Jsvt+DQWT8/crKU5M4tL+g7LkbNISHw+rDryM0vIfeEg4jGFiLC6nca0x+h/uzo06ky6tMnE7pqWFmtVo9ZTLXYbDaPI/AWLlyIw+FgypQpBAcHc/DgQb799lsWLFjA0KFDCQ+vGfpnt9sxGAzavYGBgWRkZDB48GAGDRoEQOfOnUlMTMTpdDJs2DBtN1NJSQnvvvuudizge++9R3Z2NgUFBVrWsrIymjVrxtGjR3n77bdJSEigefPmHDt2TMsaGBhIUVERbdq04csvv8RsNtOrVy+WLVvGkSNHALR8Xl5eBAUF8e677xIREUG3bt3YtGmTdqSg1WqldevWdOnShSVLlgBgNBq55pprKCws1L7z5MmT/Oc//6nzHKuqqvjxxx+58847mTJliraLKysri59//hmA0NBQTp48yaFDh2jRosV5vT9QM6zOhMmk//NQVUb9+UQ4ZcwoY80inDJmlLFmEU4ZM8pYswin3n0inCqjPjPKWLMIp4wZZaxZhFPGjDLWLMJ5sX3Hi8ooLq/G6msiLLj+v4yOCD37vc0b+etqPlSAj4mrE8Kx28vqnet0ZPyzI2PNIpyyZrwcsFob6AyrmJiYOrOqiouLyc/P95gnlZKSQrdu3QgODtbuA3C5XGzcuJF+/foBNfOmIiIitCaU1WqlqqqK3r17a67amVPe3t78/PPPVFZW0qFDB8LCwjy+89lnn+Xxxx/n8OHDALRq1Yqqqir69+/Pv//9b21d//79OXLkCIcPHyYyMpLg4GCKioqYPXu21pgqLi5m2bJl5Ofne+RPSEhg0aJFmmvmzJls2rSJnJwcunbtSkxMDJs2bWLRokU8//zzxMXF0b9/f7755htat26t3RcZGcnevXuZOnUq3333HYsWLSI+Ph6ADh06AJCdnU1JSQklJSUez7uiogKAbdu21athBWqG1dloCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8IpwwZS8qczFi6i/TsAu1au+hQht7RFouv13n72rQKJcDP/IfHAgb4mUloGaLmQ+nEqXefCKfKqE+fTOjqMMXk5GR+/PFH7Ha7dm3VqlUYjUaSkpK0a1lZWR7NpMTERAICAvD399caXk6nkzVr1mizpwCaNm0K1BznV8umTZuoqKiguLiY8vJyvL29sVgsOJ1Oj2wrV67E19eXEydOADUNK0DbcQU1O8H27t2rZQRo3LgxBoPBY77WqlWrMBgMFBUVAdCiRQuMRqM2y6qWdevW4e3tzcGDB7XnY7PZ2LRpk7amoKCAjIwMjzoBcnNzmTFjBi+//LL23T4+PrRs2RKAl156icceewyTycQTTzzB9OnTiY6Oxs/Pj44dO9KzZ08UCoVCoVAoFAqFQqFQKBQKmZmxdBcZOQUe1zJyCpi+ZFe9na8MvJoAP899BAF+Zl4ZeHW9nQqFQnE5oKsdVgMGDGDevHkMGzaMoUOHkpeXx5tvvsmAAQO0Y/4ACgsLWbRoESNHjgRqGjFDhw7lf//7Hz/99BObNm1iwYIFFBQUsHfvXjp27IjFYqFly5YYDAZGjBjB8OHDKSsr480336RNmzZkZGRgs9nw9fXFYrGQl5dHp06dcDqdhIaGcvz4cRISErDZbEDNziyA+fPns3DhQsxmM/7+/lgsFsrKyrR1zZs3Z/v27SQnJ+N0OgkKCsLhcNCxY0fS09M96j9w4AAdO3akurqakJAQTpw4QbNmzTRXp06d6Ny5M0OHDtXumT9/Pq1bt+aWW27RrvXr148TJ05QXV3N/fffr82jSkpK0nJ/++23TJs2DYApU6Z45IiNjdV2r9WHCxm8eLnSEAb4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRThkzylizCKeMGWWsWYTzYviOnnR47KyqxeWG9OwCTtjLaRrqf97eZo0D+GBETzJyCjiY76BlmIU2UaH1zlmLLO9FtFPvPhFOlVG/GWVDVw2roKAg5syZw7hx4xg2bBgWi4W7776bZ555xmOd2+3G5fLcUjdkyBBmz57N3r17efTRR2ndujU+Pj6YTCYmT55MXl4eY8eOBSAqKorhw4djNpu5+eab6dq1K88//7zmqqiowGAwYLFYKCwsxG634+XlRWBgoLamdq6Uv3/N/yiVl5djt9tp06aNtgsLanZduVwurFYrhYWFlJaWUl5eTmRkpEfDyu12YzKZ8Pf3x2azYbfbCQwMxNvb28OVk5OD1WqltLSUyspKKioqaNOmDWbzqVdZVlZGXl4eJpMJk8mkNakiIiK0Nffccw/e3t5MnTqV22+/nYSEBCZPnqw1+dxut8eusHPFaDQQEmI57/tkoSEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRjl8Ipwqoxw+Ec7LPWPmsZKzfu6odF3Q78GSQiwk/fmy8+Zyfy9/lVPvPhFOlVGfPpnQVcMKanb3zJ49+6xrQkNDufvuuz2uGQwGTCYT//rXvxg5ciTTp09n2rRpTJkyRdsttGnTJpYvX86zzz7L5MmTtXs///xzDAYDQUFBVFRUYLfbiYmJYcWKFQBUVlZy6623kpmZSdeuXQHYuXMnAM8//zz33HMPAKmpqQwePBhAm1e1f/9+zGYzKSkp2veNGDGCH374QVtz7Ngx3G433bt358MPPwSgqKiIG264gePHj2vrFi5cSFlZGevXryc4OJi4uDhuuukmli5dyjPPPKPtQjt48CBXXHEFCxYs0L6zS5cu/PDDD9jtdqxWK+Hh4cyePZsBAwbwwgsvALBjxw7WrFlDeno6GzdupEePHn/6vn6Py+XGbi/984WSYTLpf4CfyqifAayno/eaRTj17hPhVBn1mVHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMI58Xw+Xud/S9zW7yNFBY66uUGfdYs2iljRhlrFuGUNePlgNXqd867znTXsDoXYmJitBlRtRQXF5Ofn6/NtkpJSaFbt24eR9v16dOH5cuXs2zZMoYNG6Zdz8rKIiIiAl9fXzZt2oTL5fLYweXt7c1NN93EvHnzNP/u3bsBPOZOJSUlYbFYcDgcxMTEUFlZybFjx6iursZms2mNp9ocV111FVDT6AIwGk+9tODgYLp27cr3339/1pri4+NZu3YtGzdupF+/fhw6dIiqqioOHDhAly5dPJ7Rb7/9RpcuXdixYwclJSUUFBQQHx8P1OwqW7t2Lb169eKrr77S5mbVBzVQ7sw0hAF+KuP5c7EHsP4Reqv5r3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMI54X4woL8aBcdSkZOAS73qetGA7SJCqWx1feiZNVTzX+VU8aMMtYswilrRllokA2r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSqrZSJuVlcVdd93lcV+PHj0wGAwsWrSIRYsWaQ2b3NxcbrrpJu0+gJycHB5++GHS0tLw8vIiLCwMl8vFtddeC9Q0fxo3bsxnn33Gl19+SXZ2NhEREbjdbgICAoiMjOTAgQNUV1djNBp55pln2Lt3Lw6HgyuuuAJA+2dWVhb+/v5s3brVoyZfX1+AOjUtWrRI24m1YMECrFarlrv2n2PGjGHdunVs27aNqqoqKioqAJg4cSJeXl6EhoZiMBh4/vnnPY5D/OqrrwC09QqF4s852wDW4f07XppQCoVCoVAoFAqFQqFQKC6YoXe0ZfoSz7+k2iaq5i+pKhQKheLi0iAbVn369OGdd95h9+7dPPbYY/z222989dVXtG7dWjsWz263s3r1alavXs3atWsB8PHxwc/Pj6NHj3LTTTeRmJjI7Nmzyc/Pp3fv3tp9Xl5eGAwGfvrpJx555BFKSkqYN28eAC1bttTWtWzZku3btxMbG8vw4cNZu3YtOTk52qwom80GQLNmzbQdUK1atWLGjBkAJCQkaK7Q0FByc3Pr1AR41LR3715mzpzJLbfcQk5ODhaLhfz8fLZu3erxnStWrODgwYOMHz8eHx8fHn/8cQAiIyO1nVx///vfWbFiBf/4xz9IT0/n8OHDOJ1Oqqur6d+/f73fj9mshsr9noYwwE9lrB+iBrBezIwifSKceveJcKqM+swoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhPNi+YICfHju/kTyi8qwCxgDcPo/9eYT4ZQxo4w1i3DKmlE2GmTDasWKFXh7exMfH8/UqVOxWCwkJSWxadMm8vLytAaPy+XC7T61X7eiooKysjLCwsJIT08nJSWF+Ph4XC4XK1eu5JprrgHA7XbjdDpJSkpi9uzZmM1m4uLi2LNnD7t379b8ubm5xMTEYDKZmDRpEhEREQQGBlJYWOiRNzc3l+7du7NhwwYcDgcJCQmkpaWRlpbGAw88AIDD4ahTU6tWrfjtt988atqyZQsAa9asAWp2gsGpmVq1bN26lY8++qjOHKqtW7dqRwW+/vrrXHHFFSxevJicnBz8/f2prq7m9ttvx9+/fr9gNxoNFzRs8nKnIQzwUxnPD9EDWGvRU81/lVPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmi+m80h+Cft25xHR2EJEWMAF+0T+vkum9yLKJ8Kpd58Ip8qoT59MNMiGVUpKCklJSXzwwQfaNbvdTteuXbWdTFarlT59+jBixAhtzfbt23G73Vx33XVMmDBBuz5hwgRtF5bVaqWqqorWrVsza9Ysbc1nn33GqFGjSEtLo2fPngQGBnLw4EFeeOEFBg0apK3r3bs3WVlZHD58WJtZ5Xa7eeedd7Sfc3Jy6NWrF/v379e+s6SkhOTkZI+aJk6cyMcff6zVFBAQQFFREe+//752hCFA586dKS0tpbKyUvuO2iZeLbNmzeLhhx8mIyNDu+bt7c1jjz1Go0aNePnllxk6dChvv/32Be2ucrnc2O2lf75QMkwm/Q/wUxnVAFa9OPXuE+FUGfWZUcaaRThlzChjzSKceveJcKqM+swoY80inDJmlLFmEU4ZM8pY88V0lpQ5mbp4JzuzTp2G0j4mlMfvbI/Fr/5zpvVcsyifCKeMGWWsWYRT1oyXA1ar3znvOmuQDas/mk9ltVoJCwvTZjjFxMRo/7mW2mZNp06dPK7HxsYyZ84cysvLiYmJAaBJkyYea7Kzs/H29ubgwYPa5+np6dp6qGlMnThxQst47bXXYjQa8fPz0xpJtZ8B5Ofna1mdTifNmzf3+M4jR47g5eWlrQ8LC6OoqIjo6GhtTXFxMQ6HA7fbzaFDh7Q8YWFhGAynfpGelZWF0WgkLy+vzvNcvnw5MTExpKWl0bx5cxITE+usOR/UQLkz0xAG+KmM54cawCrOqXefCKfKKIdPhFNllMMnwiljRhlrFuGUMaOMNYtwyphRxppFOGXMKGPNF8P5wVc768yZ3pVdwPtf7bwoc6b1WLNonwinjBllrFmEU9aMstAgG1Z2ux2n08lDDz1EWloaFouFO+64A6vVqs1wSk5OZtq0adjtdqxWK1Czwwrg8OHD9OzZk4KCAhISErjhhhtwu93YbDYSExMxGAzk5eXx5JNPkpqaitlspqqqitDQUM3fpk0b1q1bx5YtW3jrrbfIzs4mJCQEu90O1MyS8vb21ppMb7zxBkuXLsXhcGCxWAgODqakpOYosdpj+w4cOKDV5OfnR3FxsUdNV1xxBfv372f16tUsWbKE3NxcQkJCtOdSm9/b25ujR49y8803c/z4ccLDwykvLyc8PJzi4mJt/eTJk5kyZYr2c21jbMGCBdx77731fj9qhlVdGsJ5qCpj/RnWrz0f/O5vb7WNrvnbWxf674Neaxbp1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNF8spcs60XmsW6RPhlDGjjDWLcMqaUTYaZMPK7XazaNEi2rRpw+TJk8nLy2PixIkYjaf+IAwYMIB58+YxbNgwhg4dSl5eHj/88AMGg4FZs2YxcuRI4uLiGD58OG+//bZ2n4+PD1arlf3791NUVMSQIUNYv349O3fuxMvr1Lbhrl27AvDhhx9y3XXX8be//Y25c+fWydqmTRvWr1/PvHnzePDBBzl+/DjLli3D19dXm6/VtGlTDAYDP/74I1FRUTz22GN8/fXXFBYW4nQ6Nde1117LypUreffdd/n73/9+xu/09/enqKiImJgY7r//ftasWcO2bdsICQnBx8fHY21tM+6ee+5h0aJF/N///Z/HUYLni5phdXYawnmoKuP5ExICrw+7jtz8EnJPOC7a+dino7ea/wqn3n0inCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpY0YZaxbhlDGjjDVfqPOvmDOtt5r/Cp8Ip4wZZaxZhFPWjLLQIBtW3t7eVFRUMGXKFIKDgwGorq5m1KhRmM01JQUFBTFnzhzGjRvHsGHDsFgsdOzYkS1btvDggw9qc6diYmIoLCzE7XZrx/YFBgZis9lwu91MnTqVhIQEXnjhBSZMmKA1mRo1agTUzIratm0bv/76K7169SIvL4/U1FTNFR4eDkBoaChz584lIiKC//73v7zxxht4e3trNfn4+FBRUYHNZmPq1KkkJiby97//ncmTJ2s1NWvWDIDg4GC+/fZbLBYLDzzwAN9//z1ZWVnadyYmJrJ//36ys7PZuHEj0dHRPPzww8yaNYvGjRt7PEuXy0WHDh3IysoiLi6Ovn37XtC7UTOs/hiTSf/noaqMF+4L8DFxdUI4dnvZBc2tOh291yzCuSu7gEMnHLQMs9AmKlR3+UQ4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1nyxnCLnTOu1ZpE+EU4ZM8pYswinrBkvB6zWy3yGlclkIiAgQGtWAVx33XUAVFZWatdiY2OZPXu29vNHH33Eli1baNeunXZt/vz59OvXjz179uDr66td9/HxYePGjdrPLpeLiRMnajuemjZtCkC3bt14//33tXVvv/02qamp+PvXbC+uqKjQvqdFixbaupkzZ1JWVqb9bDabCQwMJDU1VbuWm5vL5MmTtZpq8/3zn/9kxIgR2rrDhw+TlZWlZYqPj2fbtm1s2bJFm2OVmZnJrFmztEZbLb6+vrz77rvceOONDB8+nIuBOp/zzDSE81BVRv35RDj1mDGvsJTX5v5MSVmVdi3Az8wrA68mLLh+RzZczHx/hVNllMMnwqkyyuET4ZQxo4w1i3DKmFHGmkU4ZcwoY80inDJmlLHmC3X+FXOm9VbzX+ET4ZQxo4w1i3DKmlEWGmTDqrq6msLCQh544AF27tyJxWIhPj4ewGPX0u+p3an06aef8uabb1JQUEB8fDwHDx6kurqa8vJyfH19MRgMVFRU8PDDD5OWloaXlxcdOnTA7XZrxwIeO3YMgF27dtG3b1+ys7OJiIjQmkqlpTW7jGqP4Bs9ejR79+7F4XDQvn17CgoKPLKeS03l5eUAbNiwgTVr1pCbm0t0dLQ2N+vYsWPExsYSERGBzWYjOTkZu91OeHi4trPqxhtv9HgmZWVl3Hjjjbjdbt5++22+//57Jk+eXKexpVAoFKL5fbMKoKSsinFzfua9/yRfolQKhUKhUCgUCoVCoWhIDL2jLdOX7PKYZdUmKpShd7S9hKkUCoVCcS40yIZVRUUFbreb3bt389hjj/Hbb7/x1Vdf4e3tTVXVqV92Dhw4kNzcXNauXQvUNJEMBgM//fQTN910E4mJicyePZvi4mIAbDab1nAyGo389NNPPPLII5SUlPDJJ5/g5eWl7Viy2WwAHD16FH9/f4YPH87atWvZtm2bx+fV1dV4eXmxceNG+vXrR6tWrZg5cybV1dXa7qtzranWuXfvXjp37syAAQNYtGgRR48e9fg8KysLq9VKRUUFjzzyCEeOHGHx4sWYTCbuv/9+7Tvz8/Nxu90EBgYSGBhIWFgY27Zt4+6772b9+vX1fj9msxoq93sawgA/lVF/PhFOvWbckXmiTrOqlpKyKnYfLKR9TP0a6XqtWaRPhFPGjDLWLMIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmi+mMyjAh+fuTyS/qAx7eTVWXxNhwRc+T0bPNYvyiXDKmFHGmkU4Zc0oGw2yYQVgMpmIj49n6tSpWCwWkpKS2Lhxo8cxey6Xi+rqau3nqqoq3G43Xbt2JT09nZSUFOLj4ykrK9OaVlCz68jlctGlSxdmz56N2Wyme/fupKameqwDiIiIwGQyMWnSJCIiImjbti27du3SPi8tLcXpdNK9e3c2bNiAw+GgQ4cObN++HZfLc1vgudQEkJCQwIkTJ5g0aRLR0dE0b96cI0eOaJ8PGTKExx9/nAkTJjB37lxKSkowm81UVVWxZ88e7UjEI0eO0L59e3bu3Mnw4cO57777uOuuu0hPT+fQoUMeRxieK0aj4YKHV17ONIQBfiqj/nwinHrLmFtw6KyfHzlZSnLnlvX2w8Wt+Uh+Cft25xHR2EJEWMBF8+rtvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtZ8MZ2ifj+l55pF+UQ4ZcwoY80inLJmlIUG2bAyGo00b96cTz75RLtmt9vp0qWLdjwewLx58zzuq92BNHLkSK666irt+kMPPcSPP/5IUFAQUNPYCgwMZNasWdoat9tN27ZtteZR7Yyq6667jldffVVbt2DBAnbt2qXNuqrN884772h+gFtvvZXDhw+fV021s6zuuecej51SY8aMYcGCBVqm0NBQAF577TWcTiffffcdb7zxBk888QTHjx/X7quqqiIsLIy9e/dq12qbeYcPH65Xw8rlcmO3l573fZc7JpP+B/ipjPrziXDqNWNE6Nn/h7x5I39dDMYtKXMydfFOdmadOlqifUwoj9/ZHoufV729en0vIn0inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinrBkvB6xWv3PeddYgG1Z/RO1RfRdr7ZnWuN3uP7xenxz1df3+vjOte+ONN1i5ciUzZ87k0KGa3QsxMTHa53fffTcvvvgiq1atokePHhw9epQ1a9YA0KRJk3Ou4/eogXJnpiEM8FMZ9ecT4dRbxjatQgnwM//hsYABfmYSWoboYjDuB1/tJCOnwOParuwC3v9qJ8P7d7wgN+jvvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPK5XJx5MgRHnjgAXbu3InFYiE+Ph4Aq9V6xvtqdzi99dZbHDx4kIKCAuLj48nOzgZOzbAym82cOHGChx9+mLS0NLy8vOjQoQPV1dXaLqbS0ppdRKmpqfTt25fs7GwiIiIICKg5GsrLy8sjzzPPPMPevXtxOBy0b9+ew4cPexwJeC41eXt7A/Dll18yb948cnNziY6OxuFweGQCmDx5Mh9//DF+fn78+9//xuVy0aNHD6KiorQ1t99+Oxs3buQ///mPds3f35/AwEBatmx57i/kd6gZVnVpCOehqoz684lw6jnjmIe7MmbWVo+mVYCfmTEPd72g/165WPmOnnR4DO2txeWG9OwCTtjLaRrqf0kzinTKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKesGWWjQTasAKqrq9m9ezePPfYYv/32G1999RXe3t74+Z06VmrgwIHk5uaydu1aAMxmMwaDgS1btnDTTTeRmJjI7NmzPY4RBPDz88NoNPLTTz/xyCOPUFJSwieffIKXlxeBgYEea48cOUJsbCzDhw9n7dq1bNu2zeNzf39/vLy82LhxI/369aNVq1bMnDmTqqoqjEbPP7jnUhNARkYGnTt3ZsCAASxatMjjaEGAZcuWMWXKFHx9fXnkkUdYsmQJhw8fJicnh+PHj2u7p7p3787JkyeJiYmhY8eOpKamcvz4cRo3bozZXL8/GmqG1dlpCOehqoz684lw6jFjSIiFBeNvI23vcfb8VkB8q1A6xdV/t+fvudB8mcdKzvq5o9J1wf/9p8f3Itonwql3nwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcKqM+vTJRINsWPn4+FBZWUl8fDxTp07FYrGQlJTExo0bPRotLpeL6upq7Wd/f3/cbjdXX3016enppKSkEB8fT2lpKSUlJR4zplwuF126dGH27NmYzWa6d+9Oamqqdhxf7dqmTZtiMpmYNGkSERERxMXFsXfvXu1zk8mE0+mke/fubNiwAYfDQYcOHdi2bRs+Pj7nVVOtMy4ujhMnTjBp0iSio6Np2rQpx44d0z5funQpAOXl5UyePFn7jsOHD/PBBx8wZswY3G43J0+exGQyceTIEXJzc2ndujXh4eHs3LmTDRs20LNnz/N+N2qG1R9jMun/PFSVUX8+Ec6GkDG2WSCd4ppgt5fVe26ViHz+Xmc/8tXibdTFnC1RThkzylizCKeMGWWsWYRT7z4RTpVRnxllrFmEU8aMMtYswiljRhlrFuE8XlRGcXk1Vl8TYcEX/svjhlCzyqjPjDLWLMIpa8bLAav1Mp9hZTKZCA0N5ZNPPtGu5ebmcsMNN1BZWaldmzdvnsd9VVU1x0zdf//99OnTR7ver18/9uzZg6+vL1AzI8rHx4dZs2Zpa1wuF23atMHpdAI1jSqAdu3a8f7772vr3n77bfbu3asdHVhRUQHAq6++SosWLbR11113HWVlZedVU22+66+/nhEjRmjrnnzySY4dO+aR6ZdffqFfv37Mnz+fmTNn0q1bN+68807Ky8s1N8Add9zBhAkTNNfq1at56qmn2LRpU70aVqBmWJ2NhnAeqsqoP58Ip4wZL9QXFuRHu+hQMnIKcJ02StBogDZRoTS2+upizpZop4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPh1GPGkjInM5bu8jjSvV10KEPvaIvF1+uS5/srnCqjHD4RTpVRnz6ZaJCHKVZXV1NUVORxlN/GjRuBU3Oe/ojanUoZGRnaNafTyZEjR6iurtaaOQaDgYqKCnJycrR1mzdvxu12a7Opjh07BuCxBmDfvn3AqXlStbuoNm/erK2x2WwUFhZy+u6vc6mpNt/+/fs9vjMrK8sjU1ZWFv7+/syZM4eJEyfSrVs3AGJiYrS1FkvNsVV5eXkertpGVm1zT6FQKBSnGHpHW9pEhXpcaxNV8//4KBQKhUKhUCgUCoXi0jNj6S4ycgo8rmXkFDB9ya5LlEihUCgU50qD3GFVUVGBwWDgmmuuISAggKuuuopffvmFoKAgj0bL72dYlZaWYjAYmDlzJrNmzSIqKorGjRtrO51sNhu+vr5UV1djNpu59dZb8fPzo3379uTk5BAaGorBYNDWAhw4cICEhATCwsJo27Yt69ev9/jc6XTi7e3NK6+8wtixY7nyyisxmUz4+PhoDahzranWuX79ehISEmjevDnR0dFkZ2drnx86dIgffvgBh8OB0Whk3LhxfP755wwYMACn00lBQc3/YAcHB2M2m9m4cSMJCQkEBQXRqVMnfvrpJwBCQz1/IXs+mM0Nsg8qlIYwwE9l1J9PhFPGjBfTFxTgw3P3J5JfVIb9Ih8tcfo/LwZ6fo6inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzyljzxXIePenw2FlVi8sN6dkFnLCX0zTU/5LlE+1UGfWZUcaaRThlzSgbDa5hZbPZcLlchIeHExoayoEDB0hNTSU+Pl47rq+W38+w+umnn3C73Vx//fXs3LmT7OxssrOzGThwIB9//DFQ02DKz8/Hz8+P2NhYdu/ezdatWwkPD6dNmzbaDKs9e/YA0KFDBwoKCsjNzWXdunXceeedLF68WPvOtLQ0nE4nSUlJ/Prrr+zZswez2Uz//v1ZuHDhedX0yy+/ANC9e3cyMzM5cuQIhw4d4sEHH2Tu3LkAOBwOrWaXy0VRURFbtmxhy5YtADRq1EhzVVVVYTAYsFqtFBcXs27dOozGmn+ZaneSnS9Go4GQEEu97pWBhjDAT2XUn0+EU8aMF9Mn6r/nZHwvIpx694lwqoxy+EQ4ZcwoY80inDJmlLFmEU4ZM8pYswinjBllrPlCnZnHSs76uaPSdcH//5zeav4rfCKcMmaUsWYRTlkzykKDa1jVNnl69erFf//7XwA+++wzxo4dS0hICEFBQdra02dYVVRU8PPPPwMwefJkfHx8qKys5NZbbyU9PR2DwUBQUBCrV6/G6XTSvXt3ZsyYAUBqaiqDBw8mKCiI2NhYAJYvXw7AU089xXXXXQfAiBEjtO8ICgri2LFjZGdnY7FY+OijjwAoKirihhtuYN++fVrWc62pdqfYK6+8QkxMDAADBgwgLS1N+87Y2Fh69uzJsWPH+Oyzz4CaYwt79epFp06dcLlc2jNISEggNjaWb775RjvusLbZFRYWVq/343K5sdtL63Xv5YzJpP8Bfiqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhPN4URnFF3GHOuj/OTaE96Iy6jOjjDVfLKe/l+Gsn1u8jRQWOurl1mvNIn0inDJmlLFmEU5ZM14OWK1+57zrrME1rFJSUggODtZmLQH07t2bUaNGcfLkSa2R83u2b99ORUUFANnZ2cTHx+Pt7c3NN9/MF198QUREBL6+vqSkpGC1Wjl+/Lh2b1JSEkFBQfz222/cfPPNVFZWsmPHDoxGI1lZWVrDqk+fPlojKyYmhtTUVNxuNw6HA5vNRlBQEMHBwSQlJfHzzz9z5ZVXnnNNhw4d0jJlZWVpdfbp04cJEybg5eVFixYttO/etGkTbrcbg8FAcHAwUDOv6pprrgFg9+7d3HXXXYwYMYL//ve/5OfnU1FRwT333APAVVddVe93pAbKnZmGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLK4RPh1GPGkjInM5bu8jiiq110zQxQi2/9Tvb4PXp/jnp8L6J9IpwyZpSx5gt1hgX50S46lIycAlzuU9eNhpr5w42tvhecV281/xU+EU4ZM8pYswinrBllQXeHKWZmZvLQQw/RsWNHkpKSePPNN6msrNQ+z8rK4sorr+THH3/EbrcDYLVaCQwMxO128/bbb9OhQwf69++vHaFXex+Av78/Tz/9NJ06daJr1678+uuvlJSUkJSUpK2LiYlhz5493HrrrbRv355bb71Vmzl1/fXXc/DgQaqqqoiLi+Ojjz4iKSmJjh07MnPmTACaNWtGZGQkWVlZhISEYDAYuPfee7WacnNzKSwsJDk52aOmlJQUbrrpJtq3b8+//vUv/Pxq/tZZUlKSlj8iIoKJEyfStWtXOnXqxKpVq3C5XFx11VV4e3sDkJycjM1m44cffiAtLY3u3bsDcPToUe07KyoqcLvdvPLKK/Tu3Zt//vOfjBs3DoCmTZuesfGnUCgUCoVCoVAoFArFuTJj6S4ycgo8rmXkFDB9ya5LlEihUFzuDL2jLW2iQj2utYmqaZQrFAqFQt/oaoeVzWZj4MCBREVFMXnyZPLy8pg4cSLl5eWMGjUKALvdTteuXcnJyWHYsGEMHTqUvLw8SkpqzqgdPHgwcXFxzJ8/n3vvvZemTZuyfv167HY73t7e+Pv7k5OTw913301YWJjWZOrXr5/mj46Oxu12U1BQwLBhw0hPT2ft2rVYLBY6dOjAtm3bgJrm1+7du+natSvJycnaEYTXX3+95rJarZSVlZGTk8MDDzwAwJw5c4Ca4/xq14WEhFBZWUl1dTVPPfUUGzZsoKysjMjISMLDw7UZVH5+fmRmZnLLLbfQpk0bPvzwQwBuu+027Tl26tSJsLAwhgwZAoDBULMd+oorruCWW24BoFWrVixYsACXy8WAAQMwm83Mnz8fgPj4+At6j2az7vqgl5yGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNV8s59GTDo+dVbW43JCeXcAJezlNQ/0vacaG5BPhVBn1mVHGmi+mMyjAh+fuTyS/qAz7RTyKVM81i/KJcMqYUcaaRThlzSgbumpYLVy4EIfDwZQpU7Rj7Kqrqxk7dixDhw4lPDwcAF9fX+bMmcO4ceMYNmwY/v7+uN1u4uLiGDRoEACdO3emc+fO2Gw2ze9yuThx4gSDBg1i1apVFBQUEBISwvHjxz3W7dmzh7Zt29K8eXOmT5+O2WzG19eX2vlOtaSlpXHffffx888/8+6772pzn4qLi7U1drsdg8FA//79WbZsGQ6Hg5CQEAoKCigtLSUwMBCArVu3cv3111NZWcmUKVOwWCyYTCacTqfHd2ZmZjJo0CDWr1/P+vXrady4MSUlJRQWFnqsmzlzJmPGjPHYZfbCCy9gNte88uuuu45du3YRFBTEp59+itFo1Oo7cuTIeb230zEaDRc8vPJypiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDj1ljHzWMlZP3dUui7K//+o9+eot/fyV/hEOGXMeDF9R/JL2Lc7j4jGFiLCAi6aV8/vRdTvp/RcsyifCKeMGWWsWYRT1oyyoKuGVUpKCt26ddOaVVAzy2n06NFs3LiRfv36YbVaKS4uJjY2ltmzZwOwadMmBg0aRNu2p7b2ent7c99997F27Vqg5tjAqqoqWrduzYsvvsiLL74IwGeffcaoUaNIS0ujZ8+eBAYGcvDgQR566CGt+VWbIysri8OHDxMUFATUNMCefvpp7eecnBx69erF/v37te8sKSkhOTmZ0aNHM3r0aAAmTpzIxx9/rNUUEBBAUVER//znP7npppu07+zcuTP5+flUVlZq32GxWHjhhRe0/Bs3buThhx8mIyPD41leccUVFBcX88orr2A2mxk9ejQrVqygR48eADRp0gQAh8NBVVUVXl5ePPHEE7z//vvk5eXV8w2Cy+XGbi+t9/2XKyaT/gf4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUseaL5fT3Mpz1c4u3kcJCR73coP/nqNf3ItInwiljxovpKylzMnXxTnZmndrt2D4mlMfvbI/Fr/5z5NR7kaNmEU4ZM8pYswinrBkvB6xWv3PedaarhlVWVhZ33XWXxzWr1UpYWJg2wykmJkb7z7XUNms6derkcT02NpY5c+ZQXl6uzWSqbdTUkp2djbe3NwcPHtQ+T09P95jh5Ha7OXHihJbx2muvxWg04ufnpzWSaj8DyM/P17I6nU6aN2/u8Z1HjhzBy8tLWx8WFkZRURHR0dHamuLiYhwOB263m0OHDml5wsLCtCP+ar/TaDTWaTLNnTsXk8nEvffey5IlSwA4fPiw9rnT6cRkMpGamkpeXh4RERFUVVXx9ttvU1lZSXl5Ob6+vtQHNVDuzDSEAX4qo/58IpwyZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVEOnwin3jKGBfnRLjqUjJwCXO5T142Gmnkyja2+FyWv3p+j3t7LX+ET4ZQx48XwffDVzjpz5HZlF/D+VzsZ3r/jBblBvRe9OlVGOXwinCqjPn0yoauGVe3Mp98TFBSkHdmXnJzMtGnTPNZu374dODU7qhar1Yrb7cZms5GYmIjBYKCg4NT/SDudTtasWUNQUBAbN26kY8eO2menHxG4adMm7Ha7dt3b21trMs2YMYNPP/2UgoIC/P39CQoK0uZp1e5mysnJ4cknnyQ1NRWz2UxpaSlWq1X7jiuuuIL9+/ezZcsWnnnmGbKzs7WjAmu/MzExEV9fXxwOB2+88QZLly7F4XBgNptp3LixxzGEeXl5TJ48mZiYGDp37ozRWNO9bNasmbamVatWVFdXc/fdd1NQUIDBYMDf3x+j0YjL5cJut9e7YaVmWNWlIZyHqjLqzyfCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxpovpnNYv/Z88LvdHW2ja3Z3XOj/76j356jn9yLKJ8IpY8aL5RM5R069l4uDynhx0HtGGWsW4ZQ1o2zoqmF1LgwYMIB58+YxbNgwhg4dSl5eHj/88ANGo1GbcQUwcOBADhw4oP3s4+NDUFAQe/bsYc6cObRu3ZoFCxZQVFREZWUlfn5+TJ48mc2bN/Phhx8yZswYAgICKCsr480336R79+78+OOPmq9NmzasX7+et99+mwEDBuBwOFiyZIk2IwqgadOmGAwGfvjhB5o0acLgwYNZsWIF2dnZlJWVaeuuvfZaVq5cydixY7n++uvp0aMH8+bNw+0+7a+gAS1atGD//v188sknPPDAAxw4cICUlBQqKiq05tLkyZNZunQpTqeTqqoqBgwYwOLFiwEoLT11VJ/VasXb25uSkhL69u1LdnY2W7du1b7z9F1c54OaYXV2GsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUQ6fCKceM4aEwOvDriM3v4TcE46LPj8H9P8c9fheRPtEOGXMeKG+v2KOnHov+nSqjHL4RDhVRn36ZEJXDava+VS/x2azaUfvBQUFMWfOHMaNG8ewYcOwWCwkJiayadMmKioq8PHxAWrmS1VVVWEwGLR7IyMjcTqdzJo1i4KCAhISErj99tv5/PPP6dq1K9dddx3NmjXjww8/pKSkhGeeeQYvLy9uvvlm7r//fvr166e5WrVqBUBwcDCLFi0iIiKCV199lTfffJOqqiotu7+/Pw6Hg4qKCmbOnEliYiIPPfQQL7/8MhUVFcCpnU/+/v5s2bKF9PR0/vWvf7Fz505++ukn7TubN2/O/v37CQgIYO7cuURHR/PWW2/x3HPP4XQ6AfD19dWON8zKyqKkpIRWrVpRVFTEt99+y2+//UarVq3w9/enVatWHDhwgIULFwI1DbGKigry8/M95oidD2qG1R9jMun/PFSVUX8+EU4ZM8pYswinjBllrFmEU0TGXdkFHDrhoGWYhTZRoRfsawg1q4z684lwqoz6zChjzSKcAT4mrk4Ix24vu6C5Vaej9+fYEN6LyqjPjBfLJ3KOnHovctQswiljRhlrFuGUNePlgNXaQGdY/dF8quLiYvLz8z1mSsXGxjJ79mzt502bNrFp0yays7OJj48HYN68eUycOJE1a9Zou49iY2PZt28fGzZs0O697777MJlM2n0tW7bEbDZTVVXFqFGj6NevHwDr1q3TMgLaMXvvvfceXbp00XwfffQRR48e1X729fXFYDCwZcsW7Zrdbufll1/Wjg6MjIwEoHfv3rz++uvaupdffpmffvpJ+67aXU8rV670aCiNGTNGa1g1atRIu+50Ojly5AhHjhwBapp4I0eOZNGiRSQkJLB8+XLcbje//fYbbrebqKgo/va3v+Hv74+XV/0Hb6rzOc9MQzgPVWXUn0+EU8aMMtYswiljRhlrFuG8GL68wlJem/szJWWn/nJQgJ+ZVwZeTVhw/Y6zOR091izaKWNGGWsW4ZQxo4w1i3DKmFHGmkU4Zcx4ob6/Yo6cei/6dKqMcvhEOFVGffpk4qI0rA4cOMChQ4c85j6dzj/+8Y9z8iQn151PtWrVKoxGI0lJSR5rMzMzGT9+PGlpaVqDZfny5VrjqXY+VXJysod/6dKlvPHGG6xcuZKCggKcTicul0ubf+Xt7c211/4/9s48Lqp6///PmYFhGRwWRQx3MAEXRE3NjRat1G56M7tZplbmpSK7V/O2+C3TLDNvZmVqWpq7Vje7LmlmlpFmVmqK+wK4gCDIMjAsAzPz+2N+HJ1QbzLzsYOf83o8eiTnfM7zvF7ngzjDez6f98389NNPzJs3jylTpuDr60twcDAtW7ZUikvV/tavX8+UKVNIT08nIiKCs2fPUlVVRXl5Of7+/vj4+FBYWMirr77Kpk2bsFqtREZGAijb71X//9ixYzz66KPs2bMHk8mkHD99+jQtW7ZUCm9Dhw4lLy8Po9FIq1at3Fal9e7dm8DAQMLCwrDZbBQUFBAQEKA809atWytjZ82axfvvv19jHm666aY/NF+aNGnSpEmTJk3XUr8vVgGUlFUxZfGvvPePxMtcpUmTJk2aNGnSJK+SBrVl3poDbr2s2rQII2lQ2z/RlSZNmjRp0nRpeVSwOnXqFP/617/Yt29fjX5L1dLpdH+4YHWp/lTTp09n6NChbv2phg0bxu7du+ncuTOzZs0iJyeHyZMns2DBAsLDw936U40aNUq57q677uKNN95g4cKFDBkyhIYNGzJnzhx0Oh2hoaHKuL///e9s27aNrKwskpOTOXDgAF9//TVt2rRx86vX61m1ahVdunThmWeeYcWKFcp2gEVFRfj7+ysrlVatWsXw4cMB1+ovvV6P1WpVxgLs27ePyMhIkpOTSUlJ4eeff3Y7b7fbAcjNzeXhhx8GYPHixcCF/lTh4eGUlZXhdDp5+umnadGiBf/5z39Yu3YtNptNWa0F8Ouvv2IwGBQuuLYdfOedd/7QfF1OnjbOvR5VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUxv8fadyKtRrKpWSVkVh04V0D6q/iXP/y+pNbNIpoweZcwsgimjRxkzi2DK6FHGzCKYMnr0Ji84yI/nhnUit7AMS7kds7+B8BDPe6to8+IdaR69I7V7lDGzCKasHmWTRwWriRMncvToUSZMmMBNN92krDqqrS7Vn2rIkCGMHTvWbVx2djZOp5P3339f2RqvqqqKSZMm8eGHH1JUVERcXBwLFiygadOmynUOh4PS0lJatmzJhg0b8PHxQafTERQUxIIFC5g0aRLgKgiBa7XV+++/T2RkJI888giLFi1i3759xMfHK7yGDRuSlpZGamoqnTp1IiQkhIMHDyr3dDqd2O12oqKiWLVqFSaTiaFDh7JixQqys7Pdcvn4+KDX63n33Xdp2bIl9913H59//rlSsDKZTDRr1owuXbooxbHu3buzfft2ZUtAcBXShg4dygMPPABAZmYma9eupby8nPz8C5+oKS8vx+Fw4OPjQ3BwME2aNOHAgQNs3bqV+++/v1ZzqNfrPG7YeT2rLjTw0zyqjyeCKaNHGTOLYMroUcbMIpie8rLyT1/xfOb5UhI7N/PoHmrLfC2YMnqUMbMIpoweZcwsgimjRxkzi2B6k7f7yDmO/HSS2OZhdIxp6DWumjOL+l2NbN87IngimJpHOXgimJpHdfJkkkcFq927d5OUlKSsHPKGft+f6lJq1KgRMTExbn2cBgwYwKRJkxg3bpzSd+pSfktLS5k5cyZxcXEAdO/encaNG5OSkqKMS0lJwcfHhwcffJDx48cDrsLTf//7X77//nvi4+MxmVz/0I8cOZLHH39cuXby5MkcPHiQkpISt1Vhq1atIjg4WPl6/fr1ylZ+1cfbtWvHJ598oozZv38/n3/+OadPu35BExYWRllZGVOnTnXrddW7d28KCwuVr4ODg922CRw8eDAdOnRgwIABbiusevXqxdGjR9mzZ49ybOrUqUybNo3BgwdjMBgu+RyvJIfDicVSetXXXe8yGNTfwE/zqD6eCKaMHmXMLIIpo0cZM4tgeosXGXblF/yN6weqpmm4TPMikql2ngim5lGdHmXMLIIpo0cZM4tgepOXk1/K5I9/rtEPc9JjXWkYWvt+mGrOLIopo0cZM4tgyuhRxswimLJ6vB5kNgf84VVnHhWsQkNDqVevnieIWiktLY377rvP7ZjZbCY8PJy0tLQrXgcQFRWlHIuKisJqtZKVlaX0nTp27BhVVVVu43Q6HS1btlQY1QWrgAD3X56Ul5cDrlVN0dHRBAQEYDAY3IpVTqeTiooKdDodAM2aNbsk69y5c27MqKgo8vLyKCoqUngWi4W8vDy34lhUVFSN57B9+3YA2rdvX8PvzTffjMVioUWLFiQkJFBSUkJ+fj7h4eGXfZZXktZQ7vKqCw38NI/q44lgyuhRxswimDJ6lDGzCKanvDbNwwgK8LnktoBBAT7ENQtVXdNwGeblWjDVzhPB1DzKwRPB1DzKwRPBlMXj74tV4NpaeNLCn73SD1ONmUUzZfQoY2YRTBk9yphZBFNWj7LIo4LV0KFDWbt2LcOGDavVapzaymKxcKntB4ODg5Xt8y53ndFoxM/PTzmWmJjI+++/j9PppFu3bgQFBVFSUoJOp6Nnz56X5UdGRgLw+eef8+GHH5Kfn09sbCxnzpwBLvSdql+/PsePH+exxx5jz549+Pr6Eh8fT3l5ufLMjEYjOp2Oo0ePMnDgQNLT04mMjCQkJASj0YjD4frm7tWrF3q9nrFjx3LkyBGsVitBQUE4HA4GDhzolmnu3LkMHz6c1NRUAgIClEyDBg1SxjVr1ozx48fTpk0bdu7cydy5czlx4gRGo9Gtp9fVSuthVVN1YT9UzaP6eCKYMnqUMbMIpoweZcwsgulN3qTHujJp4aU/Fe3J6w81ZxbFlNGjjJlFMGX0KGNmEUwZPcqYWQTTWzytH6bmUW08EUzNozo9yphZBFNWj7LJo4JVixYtcDgcDBo0iPvuu49GjRpxqcLVnXfe6clthGrAgAHMnDkTgOHDh3P+/HlWr15NcHCw26qlkSNH8ttvv9G5c2fAVWQCOHDgAH379qVTp04sWrSI8+fPu/GrC1u//PILjz/+OCUlJSxbtozAwEC3vlM6nY7z588TEhLCuHHj2Lx5M7t27XLb9rBRo0a0aNGC7du3M3jwYIqLi9m8eTM6nY4HH3zQLdM777zDoUOHGD16NCtXrsRmsxEVFaVkOnz4MOvWraNfv37YbDY++eQT/Pz8qKioAFyrwGojrYfVlVUX9kPVPKqPJ4Ipo0cZM4tgyuhRxswimN7ghYaaWPna3ew5co7DJ/Ol6jshiimjRxkzi2DK6FHGzCKYMnqUMbMIpqc8rR+mGKaMHmXMLIIpo0cZM4tgyupRFnlUsBo7dqzy5zfffPOSY3Q6HYcOHfLkNjVkNpvdejRV6+Kt8i53nc1mo6KiQllltWHDBnx8fKisrGTp0qWYTCaCgoIoKioiJydHKfA4HA4cDofCr96+r3Xr1uzfv5+UlBRiY2OprKykoKBAGVdQUABAly5dWLRoET4+PvTo0YNt27a5eTUYDNSrVw+DwcCMGTOIjIwkPj6eAwcOKOOys7NJT0+nR48ebN68meLiYiIjI8nPz2f9+vWMHj1ayWQ0GomJiWH27NnY7XYSEhLYt2+fkqlBgwaYzWbmzJlDdnY2Op2O4OBgAgICKCws5NSpU0RHR1/13Gg9rC4tg0H9+6FqHtXHE8GU0aOMmUUwZfQoY2aAddvTOXyqkDbNQ7i7R0uPeSI8Bhr13Ng0FJNRX+u+VRerLsyL5lF9PBFMzaM6PcqYWQRTRo8yZhbB9BZP64epeVQbTwRT86hOjzJmFsGU1eP1ILP5GvWwWrJkiSeX11qX6tFUXFxMbm6uW9+pS10HkJ6eTmxsLAApKSnccMMN2O12vv32W8BViNuwYYOykglcWW+++WaFkZeXB0Dv3r157rnnlHs89dRTbNmyRRlXWFgIwMyZM5XCk9PppH379kofLJvNRlVVFREREaxbt05hrV+/nmeffVZZZbVt2zacTiejRo3i6aefZsiQIbz++us8/fTTpKSkKAWrlJQUevbsSbNmzdi7dy8LFiygbdu2dO3aVcnUoEED3n77bU6dOsXAgQNZtWoVixYtYseOHVczFZeUtj/n5VUX9kPVPKqPJ4Ipo0cZM4tgyuhRlswHM/J5a9VvytepJ87zybcneO6hBGKbhXno0DseS8oqmb/2APvT85Vj7VqGkTSoLSZ/X08tqnJeRDNl9ChjZhFMGT3KmFkEU0aPMmYWwfSUp/XDFMOU0aOMmUUwZfQoY2YRTFk9yiKPNlPs2rXrH/rP20pMTOTHH3/EYrEox7766iv0en2NvlMXq1OnTgQFBbFx40bl2IkTJ8jPzycxMVE51qdPHwD27NmjHNuxYweFhYXccsstAJw+7VpKvnv3brd75ObmAtCgQQMAysvL0el0fP3118oYi8WC3W6nXr16AJw6dQqn08nJkyfdMlXfo2FD11Y3aWlpBAcH8+yzz3LzzTczefJkAKKjo90KeGlpaVgsFhYtWsS0adPo3r07ZrOZ8PDwGoW+119/nUGDBikFvOLiYsxmM82aNbvsc9SkSZMmTZo0XX+6uFh1saavuPTxP0Pz1x7gYEa+27GDGfnMW3PgT3KkSZMmTZo0aapLennkTQQFuH92OyjAh5dH3vQnOdKkSZMmTZo0XSyPVlhdrOPHj5OZmQlA48aNadWqlbfQNTR06FAWLVpEnz59KC8vx2g0YrPZuP/++2v0ncrKymLz5s0A+Pn58fe//513332XFStWUFZWRmVlJT4+PowaNUq57q677uKFF15g9erVrFmzBr3eVdfr1asX8fHxgKvo5OPjw969e+nWrRslJSWYTCaKiooA1/aE/v7+lJWV0bJlS1599VWmTZuGzWbD398fg8GgrJyqvsbPz69GJnBtqwhw7tw5iouLMZlMHDx4kA4dOhAZGUlUVJSykgtcq7p++eUXBg4cSJMmTRg2bBi//vorwcHB5OTkKOO6dOmiFMhWrVqlHO/Xrx++vrX/lLInTc+vV9WFBn6aR/XxRDBl9ChjZhFMGT3KlHnND2lXPL/xp5Pc06t22wN6y+PZ81a3lVXVcjhhf3o+eZZyGoUF/qkeRfFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9CbvhgZBzHn2Vg5m5HMq10qzcBNtWni+klzNmUUxZfQoY2YRTBk9yphZBFNWj7LJ44LVN998w7Rp05RiVbWaNGnCCy+8oKxW8racTqfbny/+uloOhwO73X7Jay++5lLX/hFVF5L+yPV/5H5Op9ON+ftx1SuzLBaLUmg6deoUp06dqsEBWLt2LWvXrlWOFxUVcfbsWQAqKiooLXX1mvL19UWn0+Hn54evry8TJ078n3kuJ71eR2ioqdbXX++qCw38NI/q44lgyuhRxswimDJ6lCHz0TNFVzx/+HQhIzz8991TjyeyS6543mpzePwaRG3zci2YMnqUMbMIpoweZcwsgimjRxkzi2B6k9cz1MTl9+epvdScWRRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9//33PPPMM0RGRjJ27Fiio6MB1zZ7n376KWPGjOGDDz5w227PG1q1ahUVFRV89913yiqlTz75hMmTJ/PUU08pq6yWLl3qdl1FRQXz589n9OjRjBs3DoDu3btjs9lYsGABkyZNAmDTpk1UVlZy//3389prrwGu/lGjRo1i3759xMfHYzabqayspEOHDnz66afKPYYMGUJqaqrSryogIIAjR44wefJkHnjgAcC1Aqp79+7KqqjqsTabjZSUFCXTnDlzePfdd5UCVKtWrdi2bRv9+/dnxowZyj0TExPJz7/wiePQ0FCGDBnCmDFj+Mtf/kJSUhITJkwgKCiIhIQEABYvXozJZFLuCTB58mQOHTqEr68vNpsNo9F41XPjcDixWEqv+rrrXQaD+hv4aR7VxwM4V1hGcbkds7+B8BDP/7HT5kWOzCKYMnqUKXPrJsGknjh/2fOxTUP+9Cbkgb66K543GfV/ukdRPBFMGT3KmFkEU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9Xg8ymwP+8KozjwpWc+bMISYmhuXLlxMYeGELlj59+vDwww/z0EMPMXv2bK8XrFJSUujevbtS2AHo378/r7zyCtu3b2fw4MGXvG737t2UlJTQv39/5VhUVBTnzp1TijYAW7ZsAVw9r6rVs2dPQkJC+P7774mPj1d6PHXu3NntHtX9pvLy8mjSpAn+/v44nU769eunjAkODsZgMFBcXAxAs2bN0Ol0NGvWzC1T9T2q+2KFhIRgt9uVPlrVCgkJ4dy5c0qRKSoqirS0NBYsWIDZbGbw4MFMmDCBkpISoqKiAFefq+qtCLt06eLG69KlC5MmTeLBBx+85HP8X9Iayl1edaGBn+ZRHbySskrmrz3gtv1Vu5ZhJA1qi8m/9lt2VkubF3UyNY9y8EQwPeXd3b0Fn39/+W0B+9/c/E9vQh4eHEC7lmEczMjHcdEidL0O2rQIo4HZ/0/3KJongimjRxkzi2DK6FHGzCKYMnqUMbMIpoweZcwsgql2ngim5lEOngim5lGdPJnk0WaKR44c4a9//atbsapagYGB3HvvvRw5csSTW1xSaWlpSuGlWmazmfDwcNLSLvyy5cSJEzz66KMkJCTQs2dP5s2bB+B2bWJiItnZ2WRmZlJeXg7A3r17AZg5cybx8fE88MAD7N27l5YtWyr8+vXrA7BhwwY6duxI165defHFF0lNTVU8AkoB6oEHHqB9+/bcddddTJ8+ncrKSqxW16eAjUYjBoOBzMxMevbsSUJCAo8++ijr16/HYDBQUFAAXCiGvffee0qmKVOmkJGRgdPp5PTp00qm7du3M2/ePO666y5iY2MB1xaGPXu6Fr0PHDjwks/WYDCwZMkSbr/99quYEU2aNHlb89ce4GBGvtuxgxn5zFtz4E9ypEmTputdzz2UcFXH/wwlDWpbo89EmxauYr4mTZo0adKkSZMmTZo0adKkqW7LoxVWfn5+yiqdS6moqAg/Pz9PbnFJWSwWzGZzjePBwcGKn6KiIkaOHEmLFi2YNWsWOTk5TJ48Gb1e7+Zp6NChfPTRR9hsNjZv3ozNZiMrKwuAUaNGKSvIHnzwQXx9fZXiXPXqqOzsbO6//37Cw8NZsGABVVVVyv3B1UcLID8/n+TkZPbv38/ChQuJiIhw28avqqqKqqoq2rVrR2JiIsuXL+fs2bM0bNhQYVVUVACQlZXF8OHDAde2hz4+Pm73HDp0KLNnz8ZkMvHxxx8TFBRESUkJCQkJynaJ1au3EhMTSUxMpLKyko8//phz586xf/9+unXrVuv58fHRmsr9XnWhgZ/mUT28s+etbiurquVwwv70fPIs5TQKq/lBgWvpUSRT7TwRTM2jOj3KlrldVAOWvNSXL39M5+DJQto0D+HuHi095nrTY3CQH88N60RuYRkWL2+XevH/1cYTwZTRo4yZRTBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT86hOjzJmFsGU1aNs8qhg1a1bN5YsWULv3r3p2LGj27m9e/eydOlSZUXPtdaqVauwWq28//77yiqnb7/9li1btpCTk6MUboKDg0lOTuaNN97g//7v/zCZTDidTmJjY3nkkUcA17Z/nTt3VopRAD///DMAAwYM4IcffiA/P58mTZq4rfAC1yo0o9FIt27dmDdvHj4+PjRt2pSSkguNw7OzswHo0KEDhYWFvPvuu0RERGA0GrHb7cq4HTt2ADBo0CDWrVuH1WqlefPmpKenu90zNTUVnU6Hn58fOTk5+Pq6tg+7/fbbazyn++67T9mu8OjRo2zevJm5c+cyYsQI5bqrkV6v87jh+fWsutDAT/P45/NOZJdc8bzV5vD475k2L+pkah7l4IlgepP38N3tvMa6WN70KOq1hprnRRRTRo8yZhbBlNGjjJlFMGX0KGNmbzIzc0s4eiiHyAYmIsODvMKsltqfo5rnRRRPBFPtPBFMzaMcPBFMzaM6eTLJo4LVv/71L4YOHcpDDz1EfHw8LVu6PoWbnp7Ovn37qF+/PuPHj/eK0YtlNpuVFU4Xq6ioiODgYODSfa46d+7Mli1b2Lp1Kw888IByPDAwEJ1Ox88//8yePXt45JFHCA8PV84bjUYeeughVqxYofAPHHBtyzV48GBmzpwJgNPppEuXLhQXFxMcHIzNZiM3N5eAgABmzZql8LZs2cJTTz1FaGgoANu2bQMgISGBCRMmKOOefvpptm7dqtzz2LFjAIwePZo33ngDcK02q+5BVT3utdde49577+WLL75g6dKlrFixgi+//JKqqqrLrk4DmDZtGrfccgv//Oc/OXXqFNHR0VeYhUvL4XBisZRe9XXXuwwG9Tfw0zyqhxfoq7vieZNRT0GBtVZsbV7kyCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYMnqUMbM3mSVllcz9IpXUtAu7T7SPCuOpe9tjCvCsr6/an6Oa50UUTwRT7TwRTM2jOj3KmFkEU1aP14PM5oA/vOrMo4JV06ZNWbt2LfPmzSMlJYUNGzYAEBkZyYgRI/j73/+u9HrypqKiomqsZCouLiY3N1fpT5WWlsZ9993nNqZNmzYA/Pbbb24Fq7S0NCIjI/H391e4ubm5Ne5ps9mUrfTOnTuHTqcjLS2N3r17A64eUeHh4RQXFxMVFcWpU6dwOByUlpa6FdOqC0HVRbG0tDR8fHzIzMx0u2eTJk2orKxUMlV7uriHl9lsJigoiNLSUpo2bQq4CobVq66GDRum8N59913effdd9u3bpxybNGkSY8eOJSQkhD59+tRYKVcbaQ3lLq+60MBP8/jn88KDA2jXMoyDGfk4nBeO63WuXi0NzP4e+9XmRZ1MzaMcPBFMzaMcPBFMGT3KmFkEU0aPMmYWwZTRo4yZvcGcszq1Rl/fA+n5zF6dyrgHEjx055Lan6Ma50U0TwRT7TwRTM2jHDwRTM2jOnkyyaOCFUD9+vWZMGGC28og0UpMTGTu3LkMHz6c1NRUTCYTsbGx6PV6ZQvCS60k6tSpE3q9nh9++IFbb72V/Px8YmNjycrKom/fvsp1BoOBI0eO8Nhjj7Fnzx58fX2JjIwEXKugAEpKSoiIiOCTTz7h888/Jz09ncjISM6fP09AQABNmjRh165dgKuQNXbsWI4cOYLVaiU2NhaAG2+8UblnvXr12L59u1umoCDXUvfqTFarldDQUObPn8+///1vsrKyaNmyJTabjYYNG2I0GgF48MEH+fTTTwkNDaWoqAiDwUB5eTlDhgxh4MCB+Pr6YjQaMZlMFBQUAK4eW5999hmfffYZgHK8NtJ6WNVUXdgPVfOoLl7y4PbM+d2nCdu2dH2a0JO/Y9q8eEeaR+9I7R5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzN5iiuzr6y2PdYkngimjRxkzi2DK6FHGzCKYsnqUTR4XrP4MDRgwgHfeeYdDhw7xxBNPcPLkSVavXk3r1q2V3lSAUoDZvHkzAH5+fvj7+5Obm0vfvn3p1KkTixYtIjc3l/79+yvX6fV6DAYDv/zyC48//jglJSUsXboUuLBKC1wryXbv3k10dDTjxo1j8+bNZGRkKMWtat1www1s376dwYMH07x5c+bPnw/gtprJZDJRVFRUIxPglql58+b89ttvdO7cmaFDh/LZZ59hs9lo0qQJABUVFaxfv57IyEhGjx5NixYtmDFjBnv37uXnn3/m9ddfB6Bhw4ZERESQmJhIZGQkNpuNdevWKT23qgtzVyuth9WVVRf2Q9U8qoMXGgpTk3uTlVtCVp7V6/u1a/OiTqbmUQ6eCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyuhRxsyeMq9FX19Q/3NU27xcC54Iptp5IpiaRzl4IpiaR3XyZNJVFaxefPFFdDodU6ZMwWAw8OKLL/7Pa3Q6HVOnTq21wUtpw4YNGI1GYmNjmTt3LiaTiZ49e7Jjxw5ycnKIiIjAbDZTWVmJXn+hmllRUUFpaSk33HAD+/fvJyUlhdjYWBwOBxs3bqRbt27KdeBa2bRo0SJ8fHyIjo7m+PHjnDp1SuGfOnWKqKgoDAYDM2bMIDIyksDAQCwWC3Chp1RWVhY9evTg+++/x2q1cuONN5KamsqhQ4cA17Z+BQUFNTI1adKEM2fOuGVKT08nPj6evLw8ZsyYQcuWLTEYDOTk5ACwePFigoOD+fTTT/HxcU1v06ZN2bt3L6dOnWLPnj1KoSwmJobNmzeTl5eHTqejVatWgGvLwuprr1ZaD6tLy2BQ/36omkf18QCC/AzcFBeBxVJW675VF0ubFzkyi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKaMHutC5nOFZRSX2zH7GwgP8c4v17zhUWRfX1D/XNeF7x0ZPcqYWQRTRo8yZhbBlNXj9SCzWVAPq507d6LT6XA4HBgMBnbu3Pk/r9HprvwiozZKSUmhZ8+ezJkzRzlmsVjo2rWrspIpKiqKkJAQZs+erYzZtm0bAEOGDOHpp59Wjr/xxhvKKqzq3lAtWrRg4cKFbmNOnDjBTz/9RJcuXWjRogWpqamMHj2aRx55BACn00nnzp0pKSnhzJkzNGvWDIPBgN1u55133lEKWN9++y1PPvkkx48fV+5ptVpJTEzkww8/VO75xBNPcObMGSVT48aNSU1NJSkpSdnCsLi4mJtuuomsrCxsNhtpaWmcOXOG7t27X/LZrVmzRilYvfPOO27n1q5dy7/+9S+lcFVbaftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXmUg+cNZklZJfPXHnDbdq9dyzCSBrXF5O/rDYseebwWfX099VgXeSKYMnqUMbMIpoweZcwsgimrR1l0VQWrb7/99opfXyulpaVx3333uR0zm82Eh4eTlpYGuPpcffDBB269rDZs2ADAoEGDalybmZlJhw4dMJlM6HQ6t5VZlZWVbN68mfr16yv82NhY1q1bx65du1i0aBH5+fk0adIEq9WqeGzSpAmNGjUiJyeHl156iW3btuHr60twcDAhISFkZmYC0KtXLwDy8vIYOHAg6enpREREkJOTg8lkUu7ZqlUrUlNT2bhxI6+88gpWq5XIyEj0ej12u53Tp08zevRo7r33XrKysli6dCnHjh3D6XSi1+upqKjgnnvucctusVh47733+Oqrr8jNzQVwy65JkyZNmjRp0qRJkyZNmjRpur40f+0BDmbkux07mJHPvDUHGPdAwp9j6ndKGtSWeWvci2ptWriKapo0adKkSZOm61Me9bDKysoiLCwMf3//S54vLy8nPz+/Rk8nT3VxEepiBQcHU1RUBMDQoUNZunQpycnJJCUlkZOTw+bNm9Hr9TRt2lS5ZtiwYezatQuA1157jYqKCv7v//6P9PR0Fi9eTOvWrVm5ciWFhYW0a9dO4bdv3x6Ar7/+miFDhtCwYUMWLlyIXq/H4XAo41q1akVmZiY7d+4kKSmJAwcO8PXXX9OwYUMKCgoAaNSoETqdjoMHD9KlSxeeeeYZVqxYgc1mIyQkRGElJCTwxRdfsGHDBkaMGAHA0qVL8fX1paKigqKiIjp16kSDBg149tlnadGiBXPmzOHdd98lNTWVJk2a0LlzZwCmTZuG3W7nm2++wdfXl4SEBDZv3kxQUBA33nijR/Pj46MVvH6vutDAT/OoPp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimDJ6VGvms+etbkWgajmcsD89nzxLOY3CAv9UjwDBQX48N6wTuYVlWARsW3jx/693ngimjB5lzCyCKaNHGTOLYMrqUTZ5VLDq06cP06dPr7Fqp1rffvstzz77rNKr6VoqODiYxYsXM2XKFJKTkzGZTLRr1459+/a5jcvOzsbpdK0v79q1KxEREbz11lsUFBTw4YcfUlRURFxcHAsWLOCDDz5Q+ltVX3PDDTewYcMGfHx86N+/P9u3b+fcuXMKv7y8HIDQ0FDef/99IiMjeeSRR1i0aBEGg0EZ53Q6ldVUqampdOrUidatW7N9+3ZlTPW9mzRpwqpVqzCZTAwdOpTPPvvMLdOqVauwWq28//77mEwmXnzxRfR6PVlZWUo/rOjoaN555x3Onz+PwWCgsLAQgAULFpCQkFDr567X67zS+PR6VV1o4Kd5VB9PBFNGjzJmFsGU0aOaM3/yzRH2Hs2lY0xD7u/T2ivMasn0HOsKTwRTRo8yZhbBlMmj9rNW86g2nqfME9klVzxvtTm88r7eW7lF/o5B7XOttu+da8ETwVQ7TwRT8ygHTwRT86hOnkzyqGBVXbS5nCorK4VsL2c2mykuLq5xvKioSOkTBRAdHc2iRYuUr5cvX86uXbuoqKjAz88PcK1uCgoK4siRI8q1N9xwAwUFBYwbN47Bgwe78W+44QYAzpw5A8Df//53HnroIWXMiy++yOrVqxVWdUFo06ZNyhin08nKlSuVgpXNZgMgPj7eze+WLVvYunWr8gxPnToFwIwZM4iPj1fGHTp0iN27dyv3TElJoXv37gQHB/Pcc89RWlrK0qVLefjhh5V+WPfffz/vvvsu//jHP3jyySd56KGHKCkp8ahYBeBwOLFYSj1iXI8yGNTfwE/zqD6eCKaMHmXMLIIpo0c1Zz6Qfp43l+9Rvk49cZ4lGw7x4sOdiGsRpgqPongimGrniWDK6FHGzCKYMnnUftZqHtXG8xYz0PfKvcZNRj0FBdZasUGe51iXeCKYMnqUMbMIpoweZcwsgimrx+tBZnPAH151dtUFq5KSEiwWi/J1YWEhWVlZNcZZLBY2bNhAeHj41d7ifyoqKkrp61St4uJicnNziYqKuuJ1AOnp6cTGxgKuXlONGzcmMjJS2drwxhtv5OjRo273cDqdpKen07NnTwClV1VZWZnbPQICXNXTxo0bK+ftdrtbMU2n0+Hv768UrKoLUb9nNWrUCEDxVX3+4hVcF9+zeqvD6h5fb775Jhs3buTDDz/kpptucuvxdebMGXJzcwkNDeWRRx5h165d+Pv789JLL/Hiiy9iMtX+E0xaQ7nLqy408NM8qo8ngimjRxkzi2DK6FGNmS/+BerFemPZbha+cHutuRdLhudY13gimDJ6lDGzCKYMHrWftWKYMnpUW+bw4ADatQzjYEY+jos+h6zXuXpENTD7e8Xv9f4c6yJPBFNGjzJmFsGU0aOMmUUwZfUoi666YLVo0SJmz54NuAovU6dOZerUqZcc63Q6+ec//+mRwUspMTGRDz74wK2X1VdffYVer1cKSpdSp06dCAoKYuPGjUrBqqioiKqqKu6++243/po1a5RVVACff/45hYWFfPjhh6xatYrmzZuj0+nYsmULo0aNUsYdPnwYgKCgILd733HHHZSXlxMXF8eYMWMoKSmhSZMmigeAffv2kZCQgNFo5I477qBZs2YAbj23DAYDL7zwAhUVFURGRvL3v/+djIwMdDodRqMRuFAszMzMxNfXl/nz5xMREeHW4ysvLw+A119/XVnB5XQ6Wb16NcXFxbz77rt/bDIuIa2HVU3Vhf1QNY/q44lgyuhRxswimDJ6VGvmNT+kXfH8xp9Ock+vlrXmy/Ic6xJPBFNGjzJmFsGUxaP2s1bzqEaeN5nJg9sz54tUUtPylWNtW4bx1L3tPX5PL9NzrCs8EUwZPcqYWQRTRo8yZhbBlNWjbLrqglXPnj0JDAzE6XTy73//m7vvvpu2bdu6jdHpdAQEBNC2bVvat2/vNbPVGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOJSIiQhk3cuRIsrKy2Lx5MwB+fn4kJSUxa9YswsLCaN26NQ6Hg/Lycrei01133cWLL77Ijz/+yHfffcf58+d5+eWXCQ4OZsaMGeTk5DB58mQAfvvtNyZNmkT//v3ZuXMnu3fvdvNavRKroqKChx9+mD179jB69GgMBoOygqqqqkoZ36RJE26//XZWrlypXFu9Mis3Nxe73U5xcTGDBg3C6XQyYcIEdDqd29aLVVVVZGZmkpCQQP/+/fniiy946KGHCAgIUHpqORwOZWz9+vVp1qwZTz/9NMuWLeOrr77i9OnTboWyPyqth9WVVRf2Q9U8qo8ngimjRxkzi2DK6FFtmY+eKbri+cOnCxmhor4TongimGrniWDK6FHGzCKY17tH7WetOKaMHtWYOTQUpib3Jiu3hKw8K5ENTESGB/3vC69CMjzHusYTwZTRo4yZRTBl9ChjZhFMWT3KoqsuWHXs2JGOHTsCri3q7rzzTlq39m7j2f+l4OBgFi9ezJQpU0hOTsZkMjFkyBDGjh3rNs7hcGC3292OjR49GqfTycKFC8nPz0ev19O/f3+34oyvry/16tWjXr16jBs3Drvdjk6nY/Xq1cqqqB07drB+/XomTpzI8uXL+c9//kNkZCSDBg1izZo1BAcHU1FRgcViISoqiltvvZX//ve/WK1WfH19CQgIoH79+gCkpqYC8PTTT/PLL7+wePFijEaj4r26YHXs2DF8fHyYNGkSH374IVlZWdSrVw+Hw6FsC5idna3k+O233/jtt9+UrwsKCpQVVtXbOnbr1o0dO3bw1FNPcdddd9GmTRv69u3LsWPHalWw0npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGtmVs3CSb1xPnLno9tGqL1nbjOeCKYMnqUMbMIpiwetZ+1mkdv8H7Ym8WxzCJaNwmmV3ykxzzwvscgPwM3xUVgsZR59D19sdQ+LyKYaueJYMroUcbMIpgyepQxswhmXfB4ID2f03lWmoWbaONhz9PrSWazwB5WF+vpp592+7q4uJjAwECqezOJVHR0NIsWLbrimKVLl9Y4ptPpSEpKIikpCYBhw4YpK5mqVVxcTH5+PuPHj2fw4MEMGzaM4OBgpVgFMGDAANavX8/58+dZt26dcnzatGlKP6wdO3bgcDhwOp08//zzPP/88wBMnTqVpUuXKj21Dh06BLi2EazO5HQ66dy5M1arlaioKGw2G9nZ2djtdu68807uv/9+ALZs2cJTTz1Fq1atANi2bRsAt9xyC/Pnz1d8JSUlsXXrVvr16wfAzp07AYiLi7vkc6yoqLjis72StP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPast8d/cWfP795beq6n9zc63vxHXKE8GU0aOMmUUwr3eP2s9acUwZPKaftTB16a9U/95r6+5MFqw/yEsjb6J5hFkVHkXzRDBl9ChjZhFMtfNEMDWPcvBEMDWPtVNOQSmvL/mVkrILO6kFBfjw8sibCA8J9IZFaeTxZoqpqamMGjWKDh060K1bN37++WcA8vPzefLJJ5XCiFqVmJjIjz/+qKw4gpr9sNLS0pTiUrV69eqFTqcjJSVFOVZZWcnXX39NYmKich1ARkYGGRkZyjidTofD4eDmm28G4OTJkzRo0IBNmza5jTEajQQFBdGkSRNOnTqF3W5Hr9fz9ddfK+MaNmwIoBSs0tLSCAwM5JdffnHL5HS6OqlWZ9q/fz/h4eGsW7eO7t27065dO4YOHcqnn34KUGObR02aNGnSpEmTevTcQwlXdVyTJk2aNF29tJ+1mmqri4tV1bI74LXFv/45hjRp0qRJkyZNQvX7YhVASVkVU7R/+69aHq2w2r17NyNHjiQiIoKBAwfy2WefKefCwsIoKSnhk08+oVu3bh4b/b1OnDjBa6+9xp49ezCZTAwaNIh//vOfGI3GK17ndDr58MMPWbFiBfn5+dx44434+fnV6Ic1cOBAXnvtNbZt20ZpaSkrVqxg48aNbNmyBXD1wwoLC2Pv3r307t2b/Px8ZRu/6n5YFosFX19fmjdvztChQ7Hb7ZSVlSnFo2bNminj4uPj2bp1K4mJiRQUFGAwGCgrK1O2W6zeyu/222/ntddeY+bMmRQVFeHj45rChIQEhdWgQQNKSkro27cvdrudqqoqKioq0Ol0So+v3NxciouLKS8vx8/PD7vdTmpqKnv27OHOO+9UvNVGnjZovR5VFxr4aR7VxxPBlNGjjJlFMGX0qObM7aIasOSlvnz5YzoHTxbSpnkId/do6Q2LUj3HusITwZTRo4yZRTBl8qj9rNU81kZb95ypUayqlt0BP+4/S2JC41rz1ZhZNFNGjzJmFsFUO08EU/OoTo8yZhbBVKvHfSfyahSrqlVSVsWhUwW0j6pfa75s8qhgNXPmTKKjo/n0008pKSlxK1iBqz/SF1984ZHBS6moqIiRI0fSokULZs2aRU5ODtOmTaO8vJyJEyde8doPP/yQ9957j/HjxxMTE8Py5cs5ceIElZWVSj+swYMHs337dnQ6HTNmzCA5OZmysjKqqty/8fz9/XE6nRQXF6PT6QgKCuLcuXMcOHBA6f+k0+mIi4tj48aN6PV6fH19MRgMFBcXu21FGBwcTFBQEBaLBYfDQb169SgvL3dbJQXQunVrvvnmG3Q6HXq9Xhl37tw5t3F2ux273U5FRQX+/v74+flRVFTEkSNHiImJwel0Ul5eDri2IiwqKsLX15eqqqoaWyRejfR6HaFeaD58vaouNPDTPKqPJ4Ipo0cZM4tgyuhRzZkfvrudVziXkkzPsa7wRDBl9ChjZhFMmTxqP2s1j1ejtLPFVzx/PMvCoNs87wOupszXiimjRxkzi2CqnSeCqXmUgyeCqXm8emXln77i+czzpSR2rv3iENnkUcEqNTWVcePGYTQa0el0Nc5HRESQl5fnyS0uqVWrVmG1Wnn//fcJCQkBXAWayZMnk5SUpKwi+r0qKiqYN28ejz32GI888ggAnTt3pl+/fsTGxrJq1SoA1q9fz+LFi9mwYQNRUVGEhIRw8803s2HDBvbt20d8fDzgWqXUsGFDfvjhB+Uezz77LO+99x79+vXDbDZjs9n48ssvmTRpEg888AAAixYt4o033uCrr77iqaeewmw2c/DgQaqqqti6dauSqW/fvpw5c4acnByCg4MB+Pzzz/nLX/7CjBkzANd2g3fddRffffcdTz/9NGazmYKCAoqLi9myZYvSd+vf//43H330Ed988w0xMTGYzWb0ej19+/Zl1qxZiv/evXvz888/U1lZia+v71XPjcPhxGIpverrrncZDPI1GZTRo4yZRTDVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEcy64PFcYRnF5XbM/gbCQ2r3i6uoG+qx9QrnW0WaKSio/YdEZZwXGT3KmFkEU+08EUzNozo9yphZBFOtHiPDrvyaoXH9QI/+7b8eZDYH/OFVbB4VrHx8fHA4Lj+ROTk5BAZ6v6lYSkoK3bt3Vwo7AP379+eVV15h+/btDB48+JLX7d69m5KSEvr3768cMxqN3HHHHWzevNmNHxMTo/StioqKwmazERISwvfff098fDznz5/HZrPRpUsXt3sMGDCA9evXc+bMGeV6h8NBv379lDHZ2dn4+/uzY8cOnnrqKaKiovj666/p0aOHksnpdGKxWHA6nWzfvp2//OUv+Pj4kJOT4+a/uk/W4cOHsdlsREVFUVJSAkC9evWUcSdPnkSv1yvbEbZs2ZJ9+/YpPa2qFRwczLlz5/jtt99qZPuj8nbTu+tJMjQZFM0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY9y8EQwNY+1U0lZJfPXHmB/er5yrF3LMJIGtcXkf3Uf5OzVPpLFGw9fcltAgx56tLvBK/llmBfRPBFMtfNEMGX0KGNmEUwZPcqYWQRTbR7bNA8jKMDnktsCBgX4ENcsVPt9+VXIow0fO3TowKZNmy55rrS0lNWrV9e66HElpaWlERYWxqOPPkpCQgI9e/bkgw8+IDw8XCngXO46gO+++45bb72V+Ph4HnjgAYxGI1lZWcoWeWlpaURGRjJmzBg6duxIamoqW7dupUmTJgqjejVWeHg4AwcOpH379tx1110cP35cYXTq1AlfX18CAgL44IMP6NmzJx06dGDFihU0btxYYSUmJlJWVoavr6+SqWvXrhQVFREaGkpaWhpGo1HpZ3X06FHuuusu2rdvz3PPPUdoaChVVVWcPn2aXr16KVsP9u/fnw4dOhATE8N3332Hj48PgwYNAqBHjx4Aygq47OxsEhISOHbsGODqEaZJkyZNmjRp0qRJkyZNmjRd75q/9gAHM/Ldjh3MyGfemgO14r008iZ+/yFig951XJMmTZo0adJ0/enlkTcRFOC+NigowIeXtX/7r1oerbB65plnePjhh/n73//O3XffDcCRI0c4c+YMCxYsID8/n6eeesorRi9WUVERGzZsoE2bNm49rPR6PUVFRZe9zmKxYDAYmDNnjlsPq8WLF+N0OikqKsLf35+ioiJOnTpFREQEM2bMID8/n5dffpnDhw/jcDj4/PPP+eijjwBYsmQJ999/PxMmTGDChAm8/fbbikc/Pz9iY2NJTU1l+fLlDBs2jL1797Jnzx7Onj2LzWYD4K677uLZZ59l06ZNtGrVihEjRrBq1SoMBgMOh0PJdOutt3Lw4EHeffddBg4cSIcOHVizZg16vV65Z3R0NIMGDWL16tUUFRUpfbfsdjvz589XemsNHDiQCRMm8PHHHxMVFcXSpUuprKx0e8a1lY+P9xrfXS+SpcmgSJ4Iptp5IpgyepQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2Cq1ePZ81a3lVXVcjhhf3o+eZZyGoVd3c4x0Y1D+HhCX7bty+LomSJaNwmmV3xkrT1eLFnmRSRPBFPtPBFMGT3KmFkEU0aPMmYWwVSzxxsaBDHn2Vs5mJHPqVwrzcJNtGkR5g2L0smjglWHDh2YP38+kyZN4vnnnwdg2rRpADRr1oz58+cTGxvrucvfyel0YrPZavSwmjhxImVlZZe9rqqqCrvdzuOPP+7Ww+qWW26hoqJCGVdaWkpRURGrVq1y29bv5Zdf5tChQ8yYMYPbbruNL7/8klatWvHqq68C0LhxY86fP6+s1AJo2rQpqamp+Pn5sWzZMuLi4pg/fz5PPvmksp2ir6+v0gMsMzOTlStXcscddxATE8PUqVOVTC1btgQgKCiIjRs3EhkZyeuvv86SJUs4cuSIcs9nnnmGb7/9FqvVisFgwG6306VLF5599lmWL19OdHQ0er2eESNGsHjxYp5//nlsNhsNGzbk3LlzAJfsSfZHpNfrCA011epaGXS9Nxm8FjwRTLXzRDC9ycvMLeHooRwiG5iIDA/yGlfNmb3NFPUMQa7nWFd4IpiaRzl4IpgyepQxswimjB5lzCyCqTaPJ7JLrnjeanPU+j3uPbfcWKvr/oiu93m5FjwRTLXzRDBl9ChjZhFMGT3KmFkEU80ee4aa6Pm/h2m6gjwqWAF0796dTZs2cejQITIyMnA6nTRt2pR27drVuujxv6TX64mMjKzRw2rixIlYLJbLXle9aqhPnz7KMaPRSGxsLD/++CPBwcGAq7BVr149pVgFcP/99zNp0iSioqJYv349hw4d4ssvv6Rjx47KmKVLl7Jy5UomTZqkrFYqLi4G4JtvvlH4AE2aNOHMmTPK1waDgcaNG/P1118rxywWC1OnTlUyVa/IGjduHMOGDVPG7d+/nyNHjij9wpYsWYLRaOSHH35g/fr1vPjii7z99tsMHz6cOXPmMGPGDAD++c9/cu7cOb788ksACgsLFWZ4ePhln+OV5HA4sVhKa3Xt9SyDQY4mgyJ5Iphq54lgepNXUlbJ3C9SSU278InU9lFhPHVve0wBV7fXvyiPInjeZIp6ht70KIongql2ngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCqVaPgb5X/r2FyaivdaN0tWYWyRPBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOr2DwuWFUrLi6OuLg4b+GuWldTHPsjYy83xul0es1HbVm/v+73444fP05UVBRGo1E5ZjAYiImJ4dSpU8oxf39/2rZty6FDh3j77bfZu3cvr7zyCuBaPVdbaU3kLq/rvcngteCJYKqdJ4LpDd6c1ak19vo/kJ7P7NWpjHsgwSM2qDOzt5minyHI8RzrGk8EU/MoB08EU0aPMmYWwZTRo4yZRTDV5jE8OIB2LcM4mJGP46K32nodtGkRRgOzv8d+1Zb5WvBEMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkqypY/fLLL7W6SZcuXWp13eXkcDjIysrCYrFgNpsB+OqrrwCUrwFOnDjBa6+9xp49ezCZTEr/pi1bthAfHw9AZWUlhw8fBlB6WPn4+JCXl0evXr2wWCzExcVx9913Y7fblVVMpaWuVURffvkl69atw9fXlzvuuIOCggLAtc3fxX7uueceCgoKiIyMZPjw4Zw5c0bZErA60+nTp+nevTtlZWV07NhReW7VjOoC1OzZs3nrrbcwmUwMGjSIXbt2uXmqqqpiz5499O7dW9niz263c/jwYaWoaLPZeP311/n000/x8fHhr3/9Ky+99BIACQkJbqvLrlZaD6uakmnPVlE8EUy180QwvcUTsde/tz2K4nmLKfIZesujSJ4Iptp5IpiaR3V6lDGzCKbaeSKYmkd1epQxswimmj0mD27PnN+tem/b0rXq3ZP3t2rOLIongimjRxkzi2CqnSeCqXlUp0cZM4tgyupRNl1VwWr48OFXvYJIp9Nx6NChqzZ2Jel0OoxGI8nJySQlJZGTk8P06dMJDg4mIMC132RRURGDBg1Cr9cze/ZscnJymDx5MjqdjoULFxIWFkbr1q1ZuXIlVqv78v7q7fwAnnzySbZs2cLUqVMJCQmhXr16gKsoBFBSUkJiYiLt27dn0aJFNVjVX+fn5zNixAhycnKYMmUK/v7+biulqotXwcHBjBw5ki+++IL33nsPs9msZKruZZWfn88999xDw4YNWbJkieLlYlZFRQV6/YW/GBMmTODkyZO89tprAJSXl/Ppp5/i7+9PVFQU+/fv57PPPgPgX//611XPSbW0HlZXlkx7toriiWCqnSeC6SlP5F7/1VJbZm8zr8UzhOv/OdZFngim5lEOngimjB5lzCyCKaNHGTOLYKrRY2goTE3uTVZuCVl5VtX3ZhXB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiqypYLVmyRJSPq1JwcDB9+vThzJkzJCcnYzKZGDJkCOvWrVP6RK1atQq73U6DBg3o3bs3ADt27GD9+vU8/PDDLFy4kPz8fOLi4hgxYgQfffQRwcHBVFRUUFxcTPPmzYmJiWH+/PkYDAYCAgLQ6/UKPzU1FYBhw4bxyy+/sGPHDkJDQ5UCVfW4Y8eOYTAYGD58OP/973+xWq00aNCAiooKgoJcL4Czs7MB6Ny5M0ajkblz5xIQEIDBYMButyus3bt3A/DEE0+wceNGsrKyCAsLU1ZRVY/7+OOP2blzJ3PmzFHYhYWFzJ8/X1m1deLECXQ6HQ0bNuTIkSOAaxUWQEREBGVlZUqh7Gqk9bC6tAwG9e+HqnlUH08E01u8urTXv1rnReQzBHmeY13iiWBqHtXpUcbMIphq5wGcKyyjuNyO2d9AeIjnb0plnBcRTLXzRDA1j95hBvkZuCkuAoulzKPXYaL8iWBqHtXpUcbMIphq54lgah7V6VHGzCKYsnq8HmQ2C+ph1bVr11oZ8raioqIoKChg0aJFyrHi4mI+/vhjZSu7lJQUbrvtNubMmaOMGTBgAOvXrycoKIjvv/9eOT5t2jQiIyPx9/dnx44dOBwO9Ho9s2bNUsZMnTqVpUuXKvzqVWPNmzdn4sSJgGtFWefOnbFarURFRWGz2cjOzsZut/PEE0/w/PPPA64tCZ966imFtW3bNgCCgoKYP3++cs+kpCS2bt2qjDtx4gQA7du3Z+zYsQBYLBa6dOmCwWBQtjzU6/V0796d7t27ExMTA8C8efMICwtT2Onp6djtdjIyMpRjx48fB6Bv374MGDCAmTNn/tEpcZO2P+flVRf2Q9U8qo8ngukpry7u9a+2ebkWz9BTj9eCJ4Kpdp4IpuZRDp4IpowevcErKatk/toDblu7tmsZRtKgtpj8fT21KOW8iGCqnSeCqXmUgyeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFV1WwupLOnz9PZmYmAI0bN6Z+/freQtdQYmIic+fOZfjw4aSmpmIymYiNjUWv19OzZ08A0tLSuO+++9yu69WrFzqdjs8++4zPPvuM/Px8YmNjycrKom/fvsp1ABkZGTz22GPs2bMHX19fwsPDcTgc3HzzzQCcPHmSBg0a8Mknn/D555+Tnp5OZGQkTqeToKAgmjRpwvHjx7Hb7ej1esaOHcuRI0ewWq20atUKQPl/WloagYGB/Pzzz26Z/P39AZRMmZmZBAcHM3/+fP7973+TlZVFy5YtMRgMREZGKj2uwFXAe+ONN5SvJ0yYwKuvvkrDhg0B6N27N4888gi//vorx48fp7y8nAYNGpCXl8fs2bNp0aKFV+dMkyZN15+SBrVl3hr3X9a1aeH6ZZ2mPybtGWrSpElT3dT8tQc4mJHvduxgRj7z1hxg3AMJf44pTZo0adKkSZMmTZo0aarj8rhgtWPHDv7973/X6FMVFxfH+PHj6dGjh6e3qKEBAwbwzjvvcOjQIZ544glOnjzJ6tWrad26NREREYBr5dGmTZvYtGkTmzdvBsDPz4+AgADOnj1L37596dSpE4sWLSI3N5f+/fsr1/n6+qLT6fjll194/PHHKSkpYenSpQA0a9ZMGdesWTN2795NdHQ048aNY/PmzWRkZBAZGQm4+mgB3HDDDWzfvp3BgwfTvHlzZRVVXFycwgoLCyMrK6tGJsAtU7t27fjtt9/o3LkzQ4cO5bPPPsNutytjqjVixAiysrKUrw8ePMgDDzzA3LlziY2NJTw8nE2bNtGrVy8SEhJYtmwZTZs2JS8vj+zsbKWAVxt50pT2elVdaOCneVQfTwTTm7zgID+eG9aJ3MIyLF7eDuni/6uN502mqGd4sTcZnmNd4Ylgah7V6VHGzCKYauWdPW91+6BBtRxO2J+eT56lnEZhgX+qR5FMGT3KmFkEU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomzwqWG3evJl//OMf1K9fn8cff1xZlZOens6aNWsYPXo077zzDnfccYc3vCrasGEDRqOR2NhY5s6di8lkomfPnuzYsYOcnByleONwOHA6L+yzVFFRQVlZGeHh4ezfv5+UlBRiY2NxOBxs3LiRbt26Aa6t/SorK+nZsyeLFi3Cx8eHmJgYDh8+zKFDhxR+VlYWUVFRGAwGZsyYQWRkJPXq1aOgoMDNb1ZWFj169OD777/HarUSFxfHnj172LNnD8OHDwfAarXWyNS8eXNOnjzplikjI4P4+Hjy8vKYMWMGLVu2xNfXlzNnzij327NnDwcPHnTzkJOTA8CcOXN47733AFi9ejVhYWFKYezuu+9mz549rFixgocffrhWc6PX6wgNNdXqWhlUFxr4aR7VxxPB9CZP1N95NWf2NlPkz02ZnmNd4GXmlnD0UI7WzF2lTLXzRDBl8uitv38nskuueN5qc3j8c12meRHJVDtPBFPzKAdPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWL3zzjvceOONLF++nKAg9zd9TzzxBA8++KCQglVKSgo9e/Z0609lsVjo2rWrspLJbDYzYMAAnn32WWXM7t27cTqd9O7d2227vDfeeENZhWU2m6mqqqJ169YsXLhQGfPJJ58wceJE9uzZw6233kq9evU4deoUL7zwAo888ogyrn///qSlpXHmzBmCg4MBVwHsnXfeUb7OyMjgrrvu4tixY8o9S0pKSExMdMs0bdo0Pv74YyVTUFAQhYWFvP76624roDp37sy5c+ew2WwYjUZSUlIwm838/PPPxMbG8txzzzFq1CjuvfdeAgMvfNrz4p5W4CpYZWdn85///OfqJ+X/y+FwYrGU1vr661UGg/ob+Gke1ccTwZTRo4yZRTBl9OhNXklZJXO/SCU17cKqjPZRYTx1b3tMAbXvdyPjvIhgqp0ngimTR2///Qv01V3xvMmop6DAetVckGteRDLVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEUxZPV4PMpsD/vCqM48KVqdPn+bZZ5+tUawCCAoKYsiQIbz99tue3OKSulR/KrPZTHh4uNKDKioqSvlztapXHXXs2NHteHR0NIsXL6a8vJyoqCgApddTtdLT0zEajZw6dUo5v3//fmU8uApTeXl5isebb74ZvV5PQECAUqyqPgeQm5ureK2srKRx48Zu98zMzMTX11cZHx4eTmFhIS1btlTGFBcXY7VacTqdnD59mujoaNLS0mjZsiU6nfub6Us9k99r165dbplqI62h3OVVFxr4aR7VxxPBlNGjjJlFMGX06A3enNWpNfrdHEjPZ/bqVK/0u5FxXkQw1c4TwZTBo7f//oUHB9CuZRgHM/JxXNjMAb3O1Yewgdnf4/wyzMu1YKqdJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUeFayioqLIz8+/7Pnz588r2wR6UxaLhcrKSh599FH27NmDyWRi0KBBmM1mpW9UYmIiH3zwARaLBbPZDLhWWAGcOXOGW2+9lfz8fOLi4rjttttwOp0UFRXRqVMndDodOTk5jBkzhm3btuHj40NVVRVhYWEKv02bNnz77bfs3LmTt956i/T0dEJDQ7FYLICrf5XRaFSKTG+++SZr167FarViMpkICQmhpMS1nUivXr0AOH78uJIpICCA4uJit0ytWrXi2LFjbNq0iTVr1pCVlUVoaKjyXKrHWSwWAgMDmTBhAgAzZ85k7969mEwmZQxAamoqK1as4IcffgBcfa+OHTvG7NmzPZofrYdVTdWF/VA1j+rjiWDK6FHGzCKYMnr0Fk/rdyOfRxkzi2B6gyfq71/y4PbM+d2qrbYtXau2PHktLMu8iGaqnSeCqXlUp0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNHhWs/vWvfzFu3Djat2/vtkUduPpbffLJJ8ycOdMjg5eS0+nks88+o02bNsyaNYucnBymTZuGXn/hG2Ho0KEsXbqU5ORkkpKSyMnJ4YcffkCn07Fw4ULGjx9PTEwM48aNc/Po5+eH2Wzm2LFjFBYWMnr0aL777jtSU1Px9b2wZUjXrl0B+Oijj+jduzd9+vRhyZIlNby2adOG7777jqVLlzJixAjOnTvHunXr8Pf3V/prNWrUCJ1Ox48//kiLFi144okn+O9//0tBQQGVlZUK6+abb2bjxo28++67/OUvf7nsPUtLSzl69CiHDh0CoFOnTuzbtw+LxUKDBg2UcZ999hkpKSmYTK499o8fP063bt08KjJqPayurLqwH6rmUX08EUwZPcqYWQRTRo+e8rR+N2J4Iphq54lgXu8eRf39Cw2Fqcm9ycotISvPqvWlUylT7TwRTM2jHDwRTM2jHDwRTBk9yphZBFNGjzJmFsGU1aMs8qhgtXTpUkJDQxkzZgwNGzakWbNmAJw6dYpz587RokULlixZ4lZU0el0zJ071yPTRqORiooK3n//fUJCQgCw2+1MnDgRHx9XpODgYBYvXsyUKVNITk7GZDKRkJDAzp07GTFihNJ3KioqioKCApxOp7JtX7169SgqKsLpdDJ37lzi4uJ44YUXeOONN5QiU/369QEwmUzs2rWLvXv3ctddd5GTk8O2bdsUVkREBODqF7VkyRIiIyP5v//7P958802MRqOSyc/Pj4qKCoqKipg7dy6dOnXiL3/5C7NmzVIy3XDDDQCEhITwzTffYDKZGD58OFu3biUtLU25Z25uLqWlpZSWunpJ7dy5U7nPxQWrH374gby8PGUbQ6fTyc6dO9m4cSNjxoyp1dxoPawuLYNB/fuhah7VxxPBlNGjjJlFMGX06C2e1u9GPo8yZhbB9AZP5N8/gCA/AzfFRWCxlHnEqZYs8yKaqXaeCKbmUZ0eZcwsglkXPJ4rLKO43I7Z30B4iOe/pKwLmWX0KGNmEUwZPcqYWQRTVo/Xg8zma9TD6ujRo8CFQkpmZiYABoOBG264gYqKCmVMtX7fV6k2MhgMBAUFKcUqgN69ewNgs9mUY9HR0SxatEj5esGCBezcuZN27dopx5YvX87gwYM5fPgw/v7+ynE/Pz+2b9+ufO1wOJg2bZqy4qlRo0YAdO/e3W0LvZkzZ7Jt2zYCA11bi1RUVCj3adq0qTLuww8/pKysTPnax8eHevXqsW3bNuVYVlYWs2bNUjJV+/vb3/7Gs88+q4w7c+YMaWlpiqcbbriBzMxMDh065LbqrEOHDkrBDeC7774jPz+fhx56iJycHDp16sSCBQvwVNr+nJdXXdgPVfOoPp4IpoweZcwsgimjR0954cEBxDYL4fCpwhrn4pqHaP1uVMRUO08E83r3eC36TXnq8VrwRDBl9ChjZhFMGT3KmFkEU40eS8oqmb/2gNv2s+1ahpE0qC0mf98rXHlt/F0LpoweZcwsgimjRxkzi2DK6lEWeVSw+vbbb73l46pkt9spLCx0609VXVy6eNXS71W9UungwYMMGDAAgMrKSjIzM7Hb7ZSXl+Pv749Op6OiooKMjAxle7yffvoJp9OpbAuYnZ0NQEZGhts9qgt01aub/Pz8lOurC1ZFRUUUFBS4bTH4RzKVl5cDcOzYMbd7pqWlKZ6io6MxGAzKPXv06AFAeno65eXlbgVDq9XK6NGjqayspHXr1srz0aRJkyZNmjR5V5f9uI7zcic0adLkLSUNasu8Ne6/TGzTwvXLRE2aNGnSpMkTzV97gIMZ+W7HDmbkM2/NAcY9kPDnmNKkSZMmTZrqsOpkhcJms+Hv7+/Wn2r69OkEBwdTVVWljBs5ciRZWVls3rwZcBWRDAYDixcvJjw8nNatW7Ny5UqluNStWzeCgoKoqKjAbDYzZswYxo0bR1lZGdOnTycsLEwp+BQVFQFw4sQJOnbsSGVlJWFhYZw7d87tvN1uJygoiEmTJvHqq6/i4+NDYGAgfn5+SgGqOpOvry+JiYlUVlYSHByM1Wp1y1TN/O6770hISMButxMaGkpubq7beYPBgNlsJikpCXAVzXx8fNxWpAHceuutWCwWwLVKCyAmJoa3336bu+++u9bz40mj6etVdaGBn+ZRfTwRTBk9yphZBFNGj97inT1v5dAlVlcBHDpVSJ6lnEZhgbViyzgvIphq54lgyuQxOMiP54Z1IrewDIsXt2u62JvaMotkyuhRxswimDJ6lDGzCKZaPZ49b3X7MES1HE7Yn56vvcZTAVPtPBFMzaM6PcqYWQRTVo+yySsFq8rKSnJycrBYLG5bzlWrbVvvfnpRp9Nx//33c+TIEaU/1ZAhQ0hJSXEb53A4sNvtbsf0ej1PP/00CxcuJD8/X1lZZLPZeO2116ioqOCll16iefPmtGjRgnHjxuHj48Mdd9xBTk5OjXw6nQ6TyURBQQEWiwVfX1+3bQkdDgfl5eXUq1ePqqoqysvLsVgsNGzYkJycHDdWeXk5ERERFBQUUFpaSnl5udKX6mIZjUYCAwMpKirCYrEQGBhIScmFhtJVVVWUlpYSEhJCaWkpFRUVlJWV0axZMxyOC0sRq4tVv9e4ceNqXbDS63UeN46/nlUXGvhpHtXHE8GU0aOMmUUwZfToKe9EdskVz1ttDo//7ZRxXkQw1c4TwZTJo8jXqGrNLJIpo0cZM4tgyuhRxswimGrzqL3GE8MTwVQ7TwRT8ygHTwRT86hOnkzyqGBlsVh48803WbdundLb6WI5nU50Oh2HDh3y5DY1ZDabMRqNbv2pANavX+9W4Fm6dGmN6yorK3nkkUeU1Ufz5s3j/fffR6fTcccdd+Dv78+cOXM4efIkS5YsISIiQrl+6NChSr+ugADXN12/fv2YOXMm4Fol1bdvX3JychQf58+fp6qqihUrVhAVFQXAtm3bGDVqlJtXvV5PgwYN3Ipuzz77LBs3blTGVRebnnjiCZKTkwEoLCwkMTERQBlXWFiIw+Hgyy+/VFZVffLJJ7zyyivExcUp/K5duxIYGMi8efMYPny48mdP5HA4sVhKPWJcjzIY1N3QFbzv0ds8EUy180QwZfQoY2YRTBk9eosX6Hvl/p0mo56CAmut2DLOiwim2nkimDJ6lDGzCKaMHmXMLIIpwqO33xfVhcyaR/V41F7jyedRxswimDJ6lDGzCKasHq8Hmc0Bf3jVmUcFqxdeeIHvvvuOAQMG0KFDB+rVq+cJ7g8rKipK6dtUreLiYnJzc5Wi0OWuA1c/p9jYWABSUlJo1KgRdrsdf39/ADp06EBmZibbt29n8ODBgKv4lp6eTs+ePQHIy8sDUApY4Fr51K5dO3JycpR7FRYWAlC/fn1lXM+ePfH19cVkcn3SxmazUVVVVWM11W233cb69euVolP1doPVvbAAQkJCaNGiBUePHlWOFxcXo9fr3Xj9+/dn4sSJSkaR0hrKXV5qb+gKdaPJoNo9yphZBFPtPBFMzeP1yQsPDqBdyzAOZuTjuGihtl7n6qPTwOzvsV8Z50UEU+08EUwZPcqYWQRTRo8yZhbB9AZP9PsiNWYWzdQ8Xr2013hieCKYaueJYGoe5eCJYGoe1cmTSR4VrLZv387w4cOZMGGCt/z8ISUmJjJ37lyGDx9OamoqJpOJ2NhY9Hq9UlC6lDp16kRQUBBTp07l1KlT5OfnU1VVhZ+fH4MGDVLG9enThw0bNvD+++8zZcoUfH19iY+Pp7CwkFtuuQWA06dPA65+Utu2bSM9PZ3IyEiqtyBs0KAB4NrmT6fTMXbsWI4cOYLVaqV9+/bY7XalwHfq1CmcTicZGRlumZo3bw5Aw4YNAVfxy2AwsGzZMmbPnk1WVhYtW7YkJycHHx8fjEYjACUlJVRVVTFw4EBOnz5NWVkZn3zyCYBbH6tz585x6tQpYmJiADAYDMydO5cnn3zSswnSJExaQ1dNmjRpqptKGtSWeWvcf7HWpoXrF2uaNGnSpEmTpquT9r5Ik1qkvcbTpEmTJk2avCuPClYhISFKUeVaasCAAbzzzjscOnSIJ554gpMnT7J69Wpat27ttoXfyJEjycrKYvPmzQD4+fmRkJDAtm3b6Nu3L506dWL69OmUlpbSv39/5brbb78dnU7H2bNneeKJJygpKWHZsmWEhYURHx8PuLZDNBgMpKWlER0dzbhx49i8eTO7du0CoKioCH9/f8rKyqhXr56yWqt58+Z8+OGHOBwOgoKClLHg6gX2+0zg6pNVfU+TycTevXvp3LkzQ4cO5bPPPqOwsBC9/sKSutLSUnQ6HRkZGURFRXH48GEmTJiA0WgkLCxMGZebm0uLFi2Ii4vjxx9/xGKx8M4773Dw4EGmT5+ubHt4tfLx0ZrK/V5qb+jqLY8ieSKYaueJYMroUcbMIpgyevQmLzjIj+eGdSK3sAyLl7cuuvj/3pCan6Moptp5IpgyepQxswimjB5lzCyC6S2eyPdFas0skql59IypvcaTy6OMmUUwZfQoY2YRTFk9yiaPClZ/+9vf+PLLL3nwwQfdCiaitWHDBoxGI7GxscydOxeTyUTPnj3ZsWMHOTk5StHK4XAoK54AKioq2LNnD127dmX//v1Kv6jAwEA2btxIt27dAPj2229xOp00bNiQRYsW4ePjQ48ePdi2bRv79u1TilYOh4OoqCgMBgMzZswgMjKSZs2acerUKeWeVVVVWCwWevTowffff4/VaiU+Pp5ffvlF2VawWj4+Pm6Zunbtys8//6wUtMBVjIqPjycvL48ZM2bQsmVLwsLCKCgoUMbodDqeeuopsrOz+fLLLwHXNoIOh8NtnqZPn05ycnKN7RW//vprnn/+eZo0aXLVc6PX64Q2tK7rUntDV6gbTQbV7lHGzCKYaueJYGoer3+eqH8jZZwXEUy180QwZfQoY2YRTG/yMnNLOHooh8gGJiLDg7zGVXNmUUwZ5uVavC9S87yIYmoePZP2Gs+7UrtHGTOLYMroUcbMIpiyepRFHhWskpOTsdls3HfffQwaNIiIiAgMBkONcXfeeacnt6mhlJQUevbsyZw5c5RjFouFrl27uvWdWrp0qdt1u3fvxmq1MmHCBOLi4gDo3r07jRs3VopX1XwfHx8GDhzI+PHjAVcPq5tvvpnvv/+e+Ph4TCYTTqeT++67j8cff1y5dvLkyaxYsYKSkhK31V7vvPOOW0+prl27UlxcDKAcb9u2LcuWLVPG7N+/n/vuu0/ZflCn01FVVUVSUhJ9+/ZVxj366KP8+OOP2Gw2jEYjZrOZyspKpk6dyk033cSLL77IG2+8waBBg9w89O3blyNHjrg9o7/+9a8cPny4VsUqAIfDicVSWqtrr2cZDOpu6Are8SiSJ4Kpdp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYMrmsaSskrlfpJKadmG1TPuoMJ66tz2mgNr3IVJzZlFMmeZF5PsiNc+LKKbmUZ0eZcwsgql2ngim5lGdHmXMLIIpq8frQWZzwB9edeZRwSonJ4edO3dy6NAhDh06dMkxOp3usudqq7S0NO677z63Y2azmfDw8BqrhX5/HUBUVJRyLCoqCqvVSlZWFuXl5fj7+3Ps2DGqqqrcxul0Olq2bKkwTCbXp2d+v21eeXk5AJmZmURHRxMQEIDBYHArFDmdTioqKpSt/po1a3ZJ1rlz59yY/v7+wIX+WNWqqKgAXH21oqOjiYqKqvEcSkpKyM3Ndct0KeXm5l6y6Hg10hrKXV5qb+jqqcdrwRPBVDtPBFNGjzJmFsGU0aOMmUUwZfQoY2YRTLXzRDBl8ThndWqNPkQH0vOZvTrVK32I1JhZNFOGebkW74vUOC+imZpHOXgimDJ6lDGzCKaMHmXMLIIpq0dZ5FHBasKECRw4cICkpCTi4+OpV6+et3xdURaLBbPZXON4cHCw2/Z5J06c4LXXXmPPnj2YTCaaN2+O0WjEz89PGZOYmMjs2bNxOp1K36ns7GwAZs6cyaRJk4iLi+PFF19040dGRgLw0Ucf8dZbb+Hr60ufPn348ccfgQt9qerXr8/x48fp168fmZmZREZGcvvtt1NeXq4UhoxGIzqdjtTUVHr27InVaqVjx474+flhNBpxOFzf3E2bNgXgueee49y5c5hMJvr168eBAwfc7pmYmMgHH3zAxIkT2bBhAwBDhgwBoGfPnpd8pq+//jpLliwBoF27dlcxGzWl9bCqKW/tX5o8uD1zfvdpx7YtXZ929PS514U9W9XuUcbMIphq54lgah7V6VHGzCKYMnqUMbMIptp5IpgyedT6EKnTY12ZF1Hvi9Q6LyKZmkd1epQxswim2nkimJpHdXqUMbMIpqweZZNHBatdu3YxevRonnnmGW/58ZqKiooYOXIkLVq0YNasWeTk5DB58mSqqqrcxg0dOpSPPvqIiooKfv75Z2w2m9IPatSoUcTExLB8+XIefPBBfH19uemmmwCUXlBZWVncf//9hIeHs2DBghr86uJYfn4+ycnJ7N+/n4ULFxIREUF+/oUX106nk5KSEtq0aUNiYiLLly/n7NmzNGzYsAbrzJkzDB8+HHBte+jj4z6NgwYN4r333mPdunV07dqVrVu3Ul5eTkJCgrJN4a+//spHH33EHXfcgcPhYOXKlcr1r7zySi2futbD6n/J0/1LQ0NhanJvsnJLyMqzen0/eagbe7aq3aOMmUUw1c4TwdQ8ysETwdQ8ysETwZTRo4yZRTC1PkTqZMoyL6LfF6ltXq4FU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYNWjQwG2ru2sls9ms9H+6WEVFRYqfVatWYbVaef/99wkJCQFgx44drF+/nlOnTinb8AUHBzNy5EhmzZrFSy+9RGBgIE6nk6ioKB555BEAOnfuTOfOnamsrFT4qampANxxxx388MMP5Ofn06RJE2UrvupxJ06cwGAw0K1bN+bNm4ePjw9NmzbFYrEoY6pXdLVp04bCwkLeffddIiIiMBqNlJaWKuN2794NQP/+/Vm3bh1Wq5XmzZuTnp7uds9Vq1bRoEEDmjVrxvbt2wG47777ePHFF5VnFR4eTmVlJTNnziQvLw+n04ler6dv377Ex8fXem60HlaXlsHg3f1Lg/wM3BQXgcVS5lHfqovlbY/e5olgqp0ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpgyedT6EHmXea6wjOJyO2Z/A+Ehtf8FSV2aF/D++yIZv3c0j+r0KGNmEUy180QwNY/q9ChjZhFMWT1eDzKbr1EPq0cffZRVq1YxZMgQpafTtdClejQVFxe79WhKSUmhe/fuSrEKYMCAAaxfv55169aRnJysHC8pKaFx48Z8++237Nixg0ceeQSn88Jm2EajkQcffJClS5cq/Oq+XF26dOH9998HXKukOnfujNVqJSoqCpvNRnZ2Nna7nddee00pKG3ZsoWnnnqKDh06ALBt2zbAVUSaP3++ct+kpCS2bt2q3PPEiROAq2A1Y8YMwLU9YpcuXTAYDMqWgf/5z38YNmwYTz75JKtXr+bFF19kzJgxGI1Ghd28eXMWLFjAypUrmTJlCq1atcJisRAeHn71E/I7aftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKYNHrQ+Rd5glZZXMX3vAbRu/di3DSBrUFpO/71Xz6uK8iGCqnSeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFHhWsbDYbPj4+3HnnnfTv359GjRpR3ZepWjqdTlmp5C0lJrp6NF3cy+qrr75Cr9crPZrS0tLo06cPjz76qNLD6u6770an05GSkqIUrCorK/n6669JTExUrgNIT0+nV69eWCwW4uLiiIyMxOFwcPPNNwNw8uRJwsLCmDNnDu+88w6+vr7ccccd+Pr6EhQURJMmTTh+/Dh2ux2dTsc999xDQUEBkZGRDBgwAIBWrVop9wwMDGT79u10796dsrIyOnbsiM1mAy70ncrMzCQoKIhXXnmF8ePHYzKZGDRoEL6+vjRq1Aij0ciZM2fIzc1l+fLlzJo1C7vdDkBycjIfffSRUlhMS0tj/vz5fPHFF4BrdZrFYqG8vNyrc6VJkyZNmjRp0qRJkyZNl1LSoLbMW+NebGnTwlVs0fTHNH/tAQ5m5LsdO5iRz7w1Bxj3QEKtmNq8aNKkSZMmTZo0afqz5FHB6s0331T+vGzZskuOEVGwGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOVXo0FRUV8fnnn+Pr68vs2bPJyclh2rRpGI1G9u3bx+LFi2ndujUrV66ksLCQUaNGAa4VS3q9HofDVQF98skn2bJlCxs2bABQthIsKiqioqICq9VKYmIi7du3Z9GiRVitVmJiYpQx4Fp5lZ+fz4gRI8jJyWHOnDkAJCQkuN3TbrcrWxR+8cUXZGRkoNPp3DL5+PhQUlLCPffcQ8OGDVmyZAmVlZXceOONAOTl5QFQUFBAdHQ0gYGB/Pbbb+zevZvBgwcze/ZsWrVqxY8//si6desA10oug8HABx98wMaNG7n33nvp0qVLrefHkya316vqQgM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKZvH4CA/nhvWidzCMixe2M6uWmrO7E3m2fNWt6JStRxO2J+eT56lnEZhgVfNrSvzIoKpdp4IpuZRnR5lzCyCqXaeCKbmUZ0eZcwsgimrR9nkUcFqy5Yt3vJxVQoODmbx4sVMmTKF5ORkTCYTQ4YMYezYscoYp9OJ0+kkLCyM3r17A2C325k4cSKxsbEsXLiQ/Px84uLiWLBggbKdXlVVFQ6Hg4cffphz584xf/58DAYDRqNRWfEEUFpaitVq5ZVXXmHlypXs2LGD0NBQrFYrfn5+bn5btWpFYmIi//3vf7FarYSFhZGfn09AQIDCKikpYcyYMfz666/MnTuXgIAA9Hq929aETqcTh8PB//3f/7Fs2TKysrIICwsjJydHWTlVXWjT6XQcPXrUzUdGRgarVq3ipZdeIi4ujqqqKgDmzZvnluvhhx/myJEjtZobvV7ncRPe61l1oYGf5lF9PBFMGT3KmFkEU0aPsmXefeQcR346SWzzMDrGNPQaV7bnKIIngimjRxkzi2B6kyfq/YOaM3uDeSK75IrnrTaHR8+2rsyLCKbaeSKYmkc5eCKYMnqUMbMIpoweZcwsgimrR1nkUcGqcePG3vJx1YqOjmbRokWXPa/X62ncuDFff/21cqx///5MnDiRiIgI1qxZc8nrqldFDRw4UOkxBa5+XT/++KPSh6qqqop69erx0EMP8dBDDwGuglLbtm0pKysDIDDQ9Wm2zp078/zzz/P8888DsHLlSiZNmkRlZSXgWmEFMHz4cJ5++mnlnv369ePMmTNumSIjIxkxYgQjRoxQru3SpYvCqPY3fPhw5X4AI0eO5KeffqJHjx4AzJkzh4EDB/Lyyy8rY+655x6ys7MZP348DocDvf7qK8EOhxOLpfSqr7veZTCov4Gf5lF9PBFMGT3KmFkEU0aPsmXOyS9l8sc/U1JWpRwLCvBh0mNdaRh69Z/QF+FRFFPtPBFMGT3KmFkEU0aPas0c6Ku74nmTUU9BgbVWbJDnOdYlngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCKavH60Fmc8AfXnXmUcGqWqWlpfzyyy9kZmYCrkJWly5dlIKNWqTTXfkF/dWOvdyYi1dFeerjallNmzbFaDTidDqpqqqitLSUb7/9ll27dgFQUVEBuHp0bdu2jbVr19ZgvvXWW9x+++1ER0f/YZ8XS2sod3nVhQZ+mkf18UQwZfQoY2YRTBk9ypL598UqgJKyKiYt/Jn3/pHoERvkeY4ieSKYMnqUMbMIpowe1ZY5PDiAdi3DOJiRj+Oit416navnVAOzv1f8Xu/PsS7yRDA1j3LwRDBl9ChjZhFMGT3KmFkEU1aPssjjgtXSpUt55513KC0tdSuumEwmxo4dy8MPP+zpLWolh8NBZmYmw4cPJzU1FZPJRGxsLABms/my11WvUHrrrbc4deoU+fn5xMbGkp6eDrhWYPn7++Pj40NeXh6PPfYYe/bswdfXl/j4eOx2u1KoKy11rTTatm0bAwcOJD09ncjISIKCggDw9fV18zN27FiOHDmC1Wqlffv2nDlzRtni749mMhqN3HjjjaxatYqPP/4YcK3Muvvuu1m3bh1t27oa5b799ttK8QpgxowZ7N27F6PRyIcffkhkZGStn73Ww6qm6sJ+qJpH9fFEMGX0KGNmEUwZPcqUed+JvBrFqmqVlFVx6FQB7aPq14ot03MUxRPBlNGjjJlFMGX0qObMyYPbM+eLVFLT8pVjbVuG8dS97T1+XybTc6wrPBFMzaM6PcqYWQRT7TwRTM2jOj3KmFkEU1aPssmjgtV///tfXn/9dRISEhgxYgRRUVEApKWlsXTpUl5//XWCgoL461//6g2vVy273c6hQ4d44oknOHnyJKtXr8ZoNCq9o8C1VV5WVhabN28GwMfHB51Ox86dO+nbty+dOnVi0aJFypZ71aruMfXLL7/w+OOPU1JSwrJly/D19aVevXpuYzMzM4mOjmbcuHFs3rxZWe1UrcDAQHx9fdm+fTuDBw+mefPmfPjhh1RVVdXYlu+PZKpXrx7l5eU0atSI7OxsmjRpwrp16+jQoQPNmjUDICEhQRn//fffc/jwYQCioqK4+eaba/nEtR5W/0t1YT9UzaP6eCKYMnqUMbMIpoweZciclX/6iuczz5eS2LmZR/eQ4TmK5olgyuhRxswimDJ6VGPm0FCYmtybrNwSsvKsRDYwERke5CV3LsnwHOsaTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9/PHHdOnShUWLFmEwGJTjsbGx3HXXXTzyyCN8/PHHf0rBys/PD5vNRmxsLHPnzsVkMtGzZ0+2b9+Oj8+F2A6HA7vdrnwdGBiI0+nkpptuYv/+/aSkpBAbG0tpaSklJSXKCqzqa6vz+/j40KNHD7Zt26asNKse26hRIwwGAzNmzCAyMpKYmBiOHDminDcYDFRWVtKjRw++//57rFYr8fHx7Nq1Cz8/v6vO9PHHH7Nz507eeustsrOzOX/+PG3atOH48ePY7Xa3ubLZbIwfP17pp3Vx4as20npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGmzJFhV/73v3H9wFr3QZHpOYriiWDK6FHGzCKYMnoUkflcYRnF5XbM/gbCQzz/hUaQn4Gb4iKwWMo86lt1serCc1S7Rxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vB5kNl+jHlbp6ek8//zzbgWQahkMBvr168ebb77pyS1qLYPBQFhYGMuWLVOOZWVlcdttt2Gz2ZRjS5cudbuuqsq1Hc6wYcMYMGCAcnzw4MEcPnwYf39/wNVbys/Pj4ULFypjHA4Hbdq0UYo/jRo1AqBdu3bMnj1bGTdz5kyOHDmibB1YvTXfq6++StOmTZVxvXv3pqys7Koz6fV6unfvzueff05MTAzJyck0aNCA5557jvz8fMLDw5WxY8aMwWKxMG3aNF544QXuuOOO//Fk/7e0/Tkvr7qwH6rmUX08EUwZPcqYWQRTRo8yZG7TPIygAJ9LbgsYFOBDXLNQj/3K8BxF80QwZfQoY2YRTBk9eoNXUlbJ/LUH2J9+YQu/di3DSBrUFpO/r6cWpZwXEUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LIo80U69Wrx5kzZy57/syZM0q/pmstu91OYWGh21Z+27dvB1x9ni6n6pVKBw8eVI5VVlaSmZmJ3W6nvLwcAJ1OR0VFBRkZGcq4n376CafTqfSmys7OBnAbA3D06FHgQo+r6lVUP/30kzKmqKiIgoICLl79VdtMALt27SIoKIjQ0FDl2NKlS9m6dSsPPfQQ99577xWv16RJkyZNmjRd33p55E0EBbh/likowIeXR970JznSpEmTputP89ce4GBGvtuxgxn5zFtz4E9ypEmTJk2aNGnSpEmTeuTRCqtbbrmFZcuW0a5dO+6++263cxs2bGD58uXcc889Hhm8lE6cOMFrr73Gnj17MJlMDBo0iH/+859uRRubzYa/vz/JyckkJSWRk5PD9OnT8fPz44svvuDzzz8nLi6OqqoqLBaL0sOqtLQUg8HAggULWLRoEUajkZCQEGWlU1FREf7+/uh0OgIDA7nnnntwOByEhoZis9kICwtDp9MpY6v9dujQAYfDQcOGDcnMzHQ7b7fbMZlMvPLKK0yePJmAgAACAwPx8/NTCmTVmQwGA7169cJut9OgQQMsFgvBwcHKyrDDhw/z1ltv0bZtW1JSUgB46623cDgcjBgxQinIzZ8/nxkzZqDT6fjkk0+U/BkZGWRnZyurw2ojT5v7Xo+qCw38NI/q44lgyuhRxswimDJ6lC3zDQ2CmPPsrRzMyOdUrpVm4SbatAjzmCvbcxTBE8GU0aOMmUUwZfToLd7Z81a3lVXVcjhhf3o+eZZyGoUF/qkeRTJl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWj7LJo4LV+PHj+e233xg/fjzTpk2jRYsWgKvokZeXR1RUFM8++6w3fCoqKipi5MiRtGjRglmzZpGTk8O0adMoLy9n4sSJyjidTsf999/PkSNHSE5OxmQy0bp1a3799VcSEhL45z//yfLly9myZYvbFnl2u10pBtntdoqLizl37hyNGzd2WylVUVFBaWkpTZo0IS8vj6KiImw2GzExMTU8+/r6EhwczPnz58nNzcVkMlFSUqKct9lslJWVER4eTmlpKWVlZVitVlq3bs3x48eVcU6nE5vNRuPGjcnNzaWgoACbzea2aqpBgwbYbDY++OAD9HrXX4ywsDC6du1K//79lXHffvutwrTb7eTm5gLw6aef0rBhQ8aMGVOr+dHrdYSGmmp1rQyqCw38NI/q44lgyuhRxswimDJ6lC1zz1ATPb1GuyDZnqMIngimjB5lzCyCKaNHT3knskuueN5qc3j8XkrGeRHBVDtPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWIWFhfHFF1+watUqUlJSyMrKAqB169aMHj2aBx54QNnuzltatWoVVquV999/n5CQEMBVZJo8eTJJSUlEREQAYDabMRqNLFq0CHAVmHr06EFgYCBdunShe/fudO7cmX79+pGYmKjwq1c/LViwgNjYWAC2bdvGqFGj0Ol0BAcHA2C1WgkJCWHLli3Ktc8++yzffPMN0dHRAEovqwceeICXXnoJgMLCQm655RYAhZWZmYnD4WDt2rVKpk8++YRXXnnFrRgFEB0dzYYNG5Svhw4dyoEDBxRWSEgIZ86c4fHHH+df//oXMTExPPbYY4waNcqNU1hYyBNPPOF2vEuXLgBuha2rlcPhxGIprfX116sMBvU38NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR+8wzxWWUVxux+xvIDzE818WeMtfoK/uiudNRj0FBdZasevCvMjoUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwerweZzQF/eNWZRwUrcPVfGjlyJCNHjvQU9YeUkpJC9+7dlcIOuAosr7zyCtu3b2fw4MEAREVFkZaWpozZvXs3JSUl6HQ6oqKiAFffpzvuuEPZDg8u9J26WD179sTPzw+j0Yi/vz82mw2r1UrDhg3dxvXv35/169fToEED4ELvqotXcIWEhBAbG8tvv/2m+MjLywNQthKsZk2cOFEpRJ0+fRqHw1GjJ9jtt9/Onj17aNasGQA//vgjmZmZjBgx4orPMT09nQ8++IAPPvigxrm7776bffv21brYqDWUu7zqQr2YNyQAAQAASURBVAM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmsnUrKKpm/9oDbtnvtWoaRNKgtJn/fP91feHAA7VqGcTAjH4fzwnG9Dtq0CKOB2d/jZ6rGeRHNE8FUO08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYFRYWkp2draxE+r2OHDlCo0aNlKKLN5SWlsZ9991Xo49VQEAAx44dU8YlJibywQcfYLFYMJvNSvFKr9eTkZHBrbfeSn5+Pg0bNiQrK4vy8nL8/f0pKirCx8eH//znP+Tk5LBt2zZ8fHyorKxUCk+nTp3C6XRy7tw5Vq1axYoVK0hPT6devXoANGnSRBnn6+vL1q1bKSwsZO3atVitVsVH9bjCwkJ0Oh3Lli3j119/Zc+ePQQEuD4JWL9+fSU3wNGjR1myZAnLly8nKysLk8m1ZUT1dox79+4lJCSEn3/+mUmTJgEwffp0NmzYwNy5c5Ui25IlSwCoqqpi06ZNfPPNN5w/fx69Xk/Xrl3x9fX8DZ0mTZo0adKkSZMmTZo0XSvNX3uAgxn5bscOZuQzb80Bxj2Q8OeY+p2SBrVl3hr3olqbFq6imiZNmjRp0qRJkyZNssujgtUbb7xBeno6n3766SXPv/LKK0RFRTF16lRPbuMmi8WC0Wis0cfq5ZdfJiUlheeffx5wbZW3dOlSkpOTSUpKYseOHQC0b9+ehQsXMn78eGJiYnj66adxOp0cOnSIjh07UlJSQlxcHMuWLaNBgwaMHj2a7777jn379mGxWABXHy1wrZx65ZVX6N27N3369GHx4sWAazVUtdcGDRqwZ88eUlNTGTFiBOfOnWPdunUAFBcXU69ePUpKSmjTpg2zZs2iefPmPPHEE/z3v/8lPz+f8+fPu93TYDDw+uuv85e//MXtnunp6XTt2pXc3FysVivPPfccRqNR8bl//37++te/kpKSgo+PD926dQMgISGBsrIy5fneddddhIWFKf2vaiMfH62p3O9VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM1j7Zlnz1vdikDVcjhhf3o+eZZyGoUF/mn+qhUc5MdzwzqRW1iGxcvbFl78f29Ilu+dusQTwdQ8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9SibPCpY/fTTTzz44IOXPX/bbbexatUqT25xSe3du7dGH6t3332XEydOkJOTQ0REBMHBwSxevJgpU6aQnJyMXq9Hr9dz7NgxHnvsMR555BEAIiIiKCkpYeXKlXTs2BEAk8mE0+nE6XQyd+5c4uLiiI6O5sSJE+zbt0/xUb9+fSoqKti1axd79+7ltttuY/369WzatEnpWeXr64tOpyMsLIwlS5YQGRlJnz592LJlC6tWrWL06NGAq5+VwWCgqKiIuXPn0qlTJ4qKisjIyCAnJ0e5p8lkwt/fn2+++QaTycSAAQNYu3Ytn3/+OQ888ABOp1PpnVVeXg5Abm4uAOfPn+frr79mwIABAOzYsQObzcb06dOx2Wy89NJLHDhwgK+++qrWc6PX6zxuFHw9qy408NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIprd4u4+c48hPJ4ltHkbHmIb/+4KrkFozi2SqzeOJ7JIrnrfaHB6/T/FmZlHvmdQ2L9eCJ4Kpdp4IpuZRDp4Ippo9ZuaWcPRQDpENTESGB/3vC/6g1JxZFE8EU0aPMmYWwZTVoyzyqGCVn59PaGjoZc+HhIQoK4S8JbPZzPHjx2v0saouMF3cxyo6OppFixYBsHz5cl599VWsViv9+/dXrnvkkUd4+eWX+eWXXxT+6dOniY2NZc2aNcq4oUOH4uvry/fff69cf/ToUZ577jml+JWRkcH69es5d+4cZ86cwWw2k5+fj9PpZN26dcrWiDNnzuSHH34gJSWF0aNHK1sW3nLLLcyZM0e5Z69evZRM1VsD5uTkMHv2bPr27QvA9u3bWbt2LQcOHMBms2E2mwFXYWvXrl1KX6z33nuPDz74gJSUFKVg9emnn9KtWzcGDRoEuFafDRo0iM2bN9OvX79azY/D4cRiKa3VtdezDAb1N/DTPKqPJ4Ipo0cZM4tgyuhRxswimDJ6lDGzCKa3eDn5pUz++GdKyqqUY0EBPkx6rCsNQ2u34sbbHkXxRDDV6jHQV3fF8yajnoICa63Yas0skieCKaNHGTOLYMroUcbM3mSWlFUy94tUUtMurLxtHxXGU/e2xxRQ+xYYas4siieCKaNHGTOLYMrq8XqQ2Rzwh1edeVSwCg8P5+DBg5c9f+DAAcLCwjy5RQ1FRUWxd+9eoqKilGPFxcXk5eVRr149pdfTpa671J/T0tIICQnh7NmzlJeXExUVxbFjx+jQoYMyxul0kp6eTv369UlLS6NZs2b4+PhQVVVVg3Xxn6OioigpKSEsLMytj1daWprC+qOZunTpopxr2bKlG6vay+nTp7nxxhsBaNy4sVKsulgXe6xeFfb666/zxRdfUFFRgU6n47fffqt1wQrQGspdQXWhgZ/mUX08EUwZPcqYWQRTRo8yZhbBlNGjjJlFMD3l/b5YBVBSVsWkhT/z3j8SPbUHqC/ztWCqzWN4cADtWoZxMCMfh/PCcb3O1SOqgdnfY79qy3wteCKYMnqUMbMIpoweZczsDeac1ak1ehoeSM9n9upUr/Q0VGNm0TwRTBk9yphZBFNWj7LIo4JV3759WbFiBYmJifTp08ft3DfffMPq1asZOnSoRwZ/r8TERH799VelPxPAV199hV6vp0GDBkqvp9+rU6dOGI1Gqqqq8PPzA6CyspKvv/6atm3bsn37doqKikhMTHRbWQWurfMKCwuJi4tj+/btdO3aVTkXEHBhed+GDRto0aIFGRkZFBUV0atXL3Q6HXq9nvnz57NixQrOnz9PVVUVHTp0YP/+/W6ZbDYbY8aMYdu2bTidrndZYWFhFBUV0bRpU8LDw8nNzWXfvn2MHTuW9PR0dDodkZGRnDp1yu2e586d49FHHyU1NZXi4mJat26tsKqVk5PDsmXLajyrFStWMHbsWOU5Xa20HlY1VRf2Q9U8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIpjd4+07k1ShWVaukrIpDpwpoH1W/1nw1ZhbNVLPH5MHtmfO7T9S3ben6RL0n71HUnFkUTwRTRo8yZhbBlNGjjJm9xawrPQ1FMTWP6vQoY2YRTFk9yiads7oyUgsVFxfz0EMPcfz4cWJjY5XVPceOHePw4cNER0ezYsUKZZs6b6ioqIiuXbvSpEkTJk+eTE5ODtOmTeOee+7h559/pmPHjkyZMoWRI0eSlZXF5s2blWsfe+wxtm/fzoQJE2jdujUrV65k27ZtjBs3jilTppCSkkJYWBgdO3YkMDCQN998k7KyMqZPn05UVBS//PIL/v7+vP3226xdu5a1a9fStGlTXn/9dXbu3MmcOXOYMmUKL730Em+99Rb33HMP/fv3Jz09Hb1ez5AhQ/jtt9/IyMjA4XDgdDo5cOCAksnHx4eIiAhuv/12/vOf/ygrpP7yl78wZcoUJk+ezIoVKwBXfzCA77//HofDVa1duXIlnTp14rbbbiMrK4umTZvSsGFDZWvA2267jbS0NDZt2sTTTz/NN998g9PpJCkpidLSUtatW0dgYCBnz57ltddeY8iQIVc9P06n85IruzRp0qRJkyZNmjRputZa+fVhVmw6ctnzD90Vw4N3xl5DR5quhbJyS8jKs3q9Z4kmTZo0aVK/fj2Uw+SPfrrs+Vcev5mb4iKuoSNNmjRp0nQ18miFVb169fjkk0/46KOP2Lx5M5s2bQKgWbNmPPXUU4waNYrAQM/2hf+9goODCQ4Oxul0kpycjMlkYsiQIYwdO5Y+ffooW+85HA7sdrvbtbfffjvbt29nwYIFFBQUEBcXx4IFCzh27Bg6nY7g4GB8fX2Jjo6msLCQcePG4ePjwx133EGjRo3YsWMHXbt2pXfv3txwww2sXbuW06dPM2rUKCIjI3nttde46aabFJ8APXv2JC0tDaPRyJo1a+jUqRPTp09n2LBhVFVVKWMDAwMpLS0lLy+PDRs28OCDD9KtWzeSkpKoqKgA4NZbb2XFihX4+vqybds2WrZsyZw5c/j444/ZuXOncs927dpht9ux2Wzs3bsXgOeee44DBw4oY+Lj49myZQtOp5MlS5bQpEkTHnroIUaNGsU999zD8ePHazU/Wg+rS8tgUP9+qJpH9fFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9AYvMuzKzY4b1w+sdV8jUGfm3+tcYRnF5XbM/gbCQzxv/lwXvneC/AzcFBeBxVLm0fyK8ieCqXlUp0cZM4tgyuhRxszeYmo9DTWPavQoY2YRTFk9Xg+6Zj2sAAIDA3nmmWd45pln/ufYyspKfvvtN2JjY6lXr16t73njjTcSEhLC7NmzlWPFxcXk5uYqfaCWLl1a47ro6GgA5s+fT2zshU9Sbtq0icjISPz9/QGIiYnh6NGjfP/998qYhx56CIPBoFx3cR+rV199lcGDBwPw7bffAhf6ZPn6upo5fvjhh259qOrXr8/Zs2eVrwMCAtDr9ezatUs5ZrFYACgpKQGgSZMmAAwcOJCpU6cq47Zs2cLOnTvR612T3qpVK3bu3MnOnTv5+eefGTFiBF26dGH9+vW0bt0agL///e988cUXpKWl8e2339boNVZdJKuNtP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPMmYWwZTRo4yZRTA94bVpHkZQgM8ltwUMCvAhrlmoV7yqKXO1Ssoqmb/2gNu2SO1ahpE0qC0m/9o3nK/W9f69cy14IpiaRzl4IpiaRzl4Iphq86j1NBTDE8GU0aOMmUUwZfUoi67pZopFRUWMGDFC6d1UWyUmJvLjjz8qBR240MeqZ8+el72uU6dOBAUFsXHjRuVYdR+rxMREN/7hw4fJyMhQjh09epTKykpuueUWAIxGIzfffDO+vr6kpaUp4zZs2EB0dLRSXKreDvHYsWPKmKKiIs6ePYvNZqO8vBwAHx8fysrK3DJVr1ir3rWx+v9nzpxxy3Xw4EEATp8+rfgvKipix44dypisrCwOHjzolrO6+DZgwADi4uLo06cPb7zxBtnZ2bRt2/ayz1GTJk2aNGnSpEmTprqil0feRFCA++f0ggJ8eHnkTX+So2uj+WsP1Gg4fzAjn3lrDvxJjjRp0qRJk6Zro6RBbWnTwv2D2W1auD60oUmTJk2a1C2PV1hdrTxomaVo6NChLF26lOTkZJKSksjJyWH69OkMHTqUiIgL+9D+7W9/48iRI+h0OkwmE4MGDWLUqFHMnTuXsLAwpY9VYWEho0aNUq678847qV+/PnfffTcAjRs3pri4mKioKOLj4918bNu2jY8++ogVK1bQrFkzDh8+zMyZM9386nQ6pkyZwmuvvUaDBg0IDAwkICAAm81GUVER/v7+ykqs7t27o9fradq0KTk5OURGRiqrnYqKigDYuXMnbdu2JTAwkMjISKUYVn2+Y8eO3HjjjTz++OPK854yZQoxMTHceeedAOzbt4/z58+j0+koLCxUCmuLFi2iXr16SvbayJOGxter6kIDP82j+ngimDJ6lDGzCKaMHmXMLIIpo0cZM4tgeot3Q4Mg5jx7Kwcz8jmVa6VZuKnGL7FqK7Vm1hrOq5sngql5VKdHGTOLYMroUcbM3mQGB/nx3LBO5BaWYfHytrje8CeSqXlUp0cZM4tgyupRNl3zgpU3FBwczOLFi5kyZUqNPlbVKioq4uDBg+h0OubMmUNOTg7Tpk3jnnvu4emnn2bhwoXk5+crfayaNm2qXLto0SIKCwuJjo7m5MmTZGZmArit3qqsrOS9997Dx8cHo9FIeXk5R48epXXr1vTv318Zl5mZidPppHXr1pw9e5bz58+Tk5PDvffeyxdffKGMKywsRKfT0bx5c06fPs3JkycxGo20aNFCGWO1uvbYveGGG7Db7Zw/f56jR49y88038+OPPyrjvvzyS44dO0ZsbCzp6elUVFSQm5vLq6++io+Pa8o3btxIVVUV//znP9m6dSv79+9XimdWq5Xi4mICAq7+H3O9XkdoqOmqr5NFZrPnL5BE8kQwZfQoY2YRTLXzRDA1j3LwRDA1j3LwRDBl8tgz1MTl92LwTGrLfCK75IrnrTaHx6/ZZfreEcUTwdQ8ysETwdQ8ysETwVSzR1G/n1JzZlE8EUwZPcqYWQRTVo+yqE4WrMDVj2rRokWXPb9q1Sp8fX357rvvCAkJAcButzN58mS+++47kpKSLnldRUUF8+bNY9SoUYwbNw4Am81Ghw4d+PXXX5VxmzZt4tixY4SFhTF48GDGjx/Ptm3bGDVqFPv27VNWYu3evRuAzz77DD8/PwCeffZZtm3bhk6nIzg4mOzsbEpKSmjfvj3/+c9/AFcB67bbbiMjI4OEhAQAtm3bBsALL7xAv379APjkk0+YNGkS4CrkAbz33nv85S9/YcaMGezcuZMRI0YQExPDypUruf322wEYPXq00rfqiSeeAGDdunWMHz8egEOHDtGwYcP/NQ015HA4sVhKr/q6610Gg/ob+Gke1ccTwZTRo7ebzYtgyjgvoP7nKOu8aB7VxxPBlNGjiMwrNh/h8MlC2rQIYWjfGI953vKoNZxXN08EU/OoTo8yZhbBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOrzupswep/KSUlhe7duyvFKoD+/fvzyiuvsH37dgYPHnzJ63bv3k1JSYnbKimj0Uh4eDjp6elu/FatWnH8+HGioqIA1wqskJAQvv/+e+Lj47HZbEofrPT0dLeeUevXryciIgJ/f3+lEFW99R9ASEgIPXr0YOvWrQo/NTUVnU5HTk6OW6aJEycCEBUVxenTp8nIyOBf//qXW64ePXqwbNkybDYbRqNRKVZdrDZt2lz5of5BaQ3lLq+60MBP86g+ngimDB5FNJvXGth7h1fXnqMs8yKaKaNHGTOLYKqRt+vIOWZ/caEvb0Z2MRt+Os0zQ9qR0OrqP/T1e3nqUWs4Xzd4IpiaRzl4IpiaRzl4IpgyepQxswimjB5lzCyCKatHWVRnN1M8ceIEjz76KAkJCfTs2ZPp06djs9mU82lpaUqhp1pms5nw8HBWr17NrbfeSnx8PA888AC//fab23UAJpOJMWPG0LFjR7p27YrBYKC8vJxz584p4/z9/dHpdHz00Ue0b9+efv36YTabFcapU6ew2+34+/szceJEevbsSUJCAvPnzwcgLi5OYdWrV49jx44xdOhQJVNaWhpVVVXccsstAGRkZBAZGcny5cu56667aN++PQ8//DC+vr6EhITQpEkT5d4RERFMmDCBJ598EoAdO3ZQWVnJ6dOnL/k8s7Oz+etf/wqAXq/3WvFKkyZNmv4siWg2rzWw946056hJk6a6rouLVRfrvf9c+vifIa3hvCZNmjRp0qRJkyZNmuqa6uQKq6KiIkaOHEmLFi2YNWuW0p+qvLxcWW1ksVgwm801rrXb7fz666+88MILxMTEsHz5ch577DHWrFlD06ZNsVgsGI1GkpOTAZgxYwbl5eW88sorAIwZM4YxY8aQlZVFUVERTqeTm266iYkTJ/LTTz8xd+5czp49q/gEaNy4MXv37uXuu+8mNjaWjz76CIBOnTopXkNDQyktLeXw4cM8/vjj5ObmsmrVKgBle0GLxUJMTAw7duygbdu2jBs3jnXr1lFZWUmTJk3c7vnSSy+RnZ1Nr1692LRpE9nZ2QD8+uuvREdHAzBq1Ci6detGTEwMs2fPVgp+999/P+Hh4bWeHx+fOlsHFaa60MBP86g+ngimLB5FNJvXGth7h1eXnqNM8yKSKaNHGTOLYKqVt2zT4Sue//TbYzx0Z+22B/RmZq3hvHp5IpiaR3V6lDGzCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2VQnC1arVq3CarXy/vvv1+hPlZSURERExCWvq6iooKCggJiYGB555BEAOnfuTL9+/ViwYIHSC8rhcHDs2DE2bNigrNI6evQoc+fOpby8nOTkZGw2G/7+/tx44428+uqrANx888189NFHVFVVud03LS2Nvn37smvXLjZv3kxUVBRFRUUcOXJEGVNcXIyvry/dunVjwYIF+Pj40LhxYzIzM8nJyVEyHTp0iJtuugmLxcKMGTOIjIzEx8eHvLw8t3sePux6I71p0ybA1RMLYOXKlTzwwAMAtGzZks8//5ysrCxsNht6vR6Hw8E//vGPq50SRXq9TlhTy+tBdaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9BYvM7eEo4dyiGxgIjI8qNaco6eLrnj+8KlCVf0cqwsN53cfOceRn04S2zyMjjGeb6lYLbV+L4pkah7l4Ilgah7l4IlgyuhRzZm99XrnUpLpOdYVngim5lGdPJl0TQtW9erV44033uDGG2/0iPNH+lOZzWaKi4vdrtu9ezcOh8Ntuzuj0cgdd9zB5s2bAde2gVVVVbRu3dptS8EbbrgBgFtvvZWxY8dy3333ceDAAQYMGOB2j6ZNm5KWlsaZM2cIDg4GwOl0MnXqVOXrjIwM7rrrLo4dO6bcs6SkhMTERObMmaOwpk2bxscff6xkCgoKorCwkEcffZS+ffsq4zp37sy5c+ew2WzKPUwmE7t27UKnczVc3r59O4899hiNGzdWrnvppZew2Wx0794dgCeeeIL33ntPuaY2cjicWCyltb7+epXBoP4GfppH9fFEMGXxKKLZvNbA3ju8uvQcZZoXkUwZPcqYWQTTW7ySskrmfpFKatqF1Z3to8J46t72mAKuvm9e66bBZGQXX/Z8bLMQ1fwcE8H0Ji8nv5TJH/9MSdmFD/wFBfgw6bGuNAyt3Wpbb3sUwRPB1Dyq06OMmUUwZfQoY2YRTLXzvMn09usdER5F8UQw1c4TwdQ8qtfj9SCzOeAPrzrzqGD1yy+/XPG8TqfDaDTSqFEjGjZsiJ+fH/fee68ntwRcK5buu+8+t2PV/amqezhFRUUpf67WwYMHAejYsaPb8ejoaBYvXkx5eblSpGrY0P2Tfenp6RiNRk6dOqWc379/v1tRy+l0Kiud0tLSuPnmm9Hr9QQEBCiFpOpzALm5uYrXyspKt2ISQGZmJr6+vsr48PBwCgsLadmypTKmuLgYq9WK0+nk9OnTip/w8HC3wlNaWhp6vZ6cnBy3ezz66KNYrVY++ugjpT+Xp9Iayl1edaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9JQ3Z3Vqjb55B9Lzmb06lXEPJFw1b2if1nz9y5nLnv/b7Teq7ueYCKY3eL8vVgGUlFUxaeHPvPePRI/YoM7MopmaRzl4IpiaRzl4IpgyelRjZm+/3rmUZHiOdY0ngql5VCdPJnlUsBo+fPgfXo3TvHlznnnmmRorkmoji8VCZWUljz76KHv27MFkMjFo0CDMZrPSwykxMZEPPvjArZfV7t27AThz5gy33nor+fn5xMXFcdttt+F0OikqKqJTp07odDpycnIYM2YM27Ztw8fHh6qqKsLCwhR+mzZt+Pbbb9m5cydvvfUW6enphIaGYrFYAFcvKaPRqBSZ3nzzTdauXYvVasVkMhESEkJJiWtbpF69egFw/PhxJVNAQADFxcVumVq1asWxY8fYtGkTa9asISsri9DQUOW5VPv39/cnJyeHYcOGceDAAcrKyoiPj6dRo0Zuq84efPBB5ZmMGjVKOb569Woef/zxWs+P1sOqpurCfqiaR/XxRDBl8pg8uD1zfvcJs7YtXZ8wq+3PKRFMkGteoO48R9nmRRRTRo8yZhbB9AZPVN+8f94fzzuf7bvkcTX9HBPB9BZv34m8GsWqapWUVXHoVAHto+rXiq3WzCKZmkd1epQxswimjB5lzCyCqXaet5gi+wR7y6NIngim2nkimJpH9XqUTR4VrD766CPeeustbDYbf/vb32jWrBkAJ0+e5LPPPsPf358nn3ySzMxMPvnkE5599ln0ej39+vXzyLTT6eSzzz6jTZs2zJo1i5ycHKZNm4Zef+EbYejQoSxdupTk5GSSkpLIycnhhx9+QKfTsXDhQsaPH09MTAzjxo1j5syZynV+fn6YzWaOHTtGYWEho0eP5rvvviM1NRVf3wtLaLt27ao8g969e9OnTx+WLFlSw2ubNm34f+yde1xUdf7/n3NhuAxyk4siKkIqqJhSWmpSpl3UzI10czVvWV8qat1ca7VatXLT3G2r1aQ0De92WU0tzeymoWZ5SRHvgoKgqAwwMHKdmd8f/Dg6oW7CfOzg57wej31snPM5z/N6nc/AjLz5fN7fffcdS5YsYdSoUZw9e5Z169bh5eWF01nzp+XNmjVDp9Oxbds2IiMjefLJJ/nss88oLCykqqpKYd1+++1s2LCBd955hwceeOCK92zRogXHjx8nPz+f6Oho9u/fT0ZGBn379uXIkSMArFu3TilWvfbaa0BNQW/16tXEx8fXe260HlZXV2PYD1XzqD6eCKYMHgMD4fXk3uSdKyXvvM0te3iLYF4qGeYFGt9zlGVeRDNl9ChjZhHMhvBE9c3re3sb+t7ehgVr9/PL4bN0aR/KuAc71ddmHd3o8wKQZ8m56vncggsk3NKqQfdQW+brwdQ8ysETwdQ8ysETwZTRo9oyX48+wXDjP8fGyBPB1DyqkyeTGlSw+uGHH/D09OTjjz/GZDK5nBs+fDgjR47kl19+4fnnn+dPf/oTDz/8MPPnz29wwcpkMlFRUcGcOXOUPlZ2u50pU6ZgNNZE8vf3Z9GiRbz22mskJydjNpvp0qULO3bsYNSoUYwZMwao2Y6vsLAQp9OpbNvXpEkTiouLcTqdpKSkEBsby6RJk5gxY4ZSZGratOav/Wp7Re3du5f77ruP/Px80tLSFFZYWBgAQUFBLF68mPDwcF566SXeeOMNl2fm6elJRUUFxcXFpKSkEB8fzwMPPMDs2bOVTLV9tAICAvj6668xm82MHDmS77//nszMTOWebdu2paKiAr1ez8GDB4GaflibN29Wxqxdu1a599///neX57t7925iY2Px9r72byyth9Xl5e79S93NE8GU0aOMmUUw3c0rKS0HoLS0nEJj/Xv0XSpfTwO3xoZhtZbVu0/JpZJxXgAycwrJOW+jorwSbzfMTWPIrHmUw6OMmUUw3cET2TcPYOhd0Yx7sJP2flAPhQdd/d8aLZr6qKYXmEzzcqnOFpVRUm7Hz8tASEDDf+ki43MUkTkjy0LOeRutQsx0iAxqME/GeRHBVDtPBFNGj2rNLPrzjizPsTHxRDA1j+r1eCPouvWwWrduHU899VSdYhXUFGAGDRrEe++9x/PPP4+npycPPvggc+fObcgtATAYDPj6+irFKoDevXsDUFlZqRyLjo4mNTVV+XrBggXs2LGDTp0u/vXjsmXLSExM5NChQ3h5ebn437p1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWloaPj41S20rKiqU+7Rs2VIZN3/+fMrKypSvjUYjTZo0IS0tTTmWl5fH7NmzlUy1/v74xz/y17/+VRl36tQpMjMzFU9RUVFs376dHTt2sHr1aiZPnswdd9xBamoq7dq1A6B///5s2bLlss/3n//8JxkZGS4rz65F2v6cV1Zj2A9V86g+ngimDB5Ly6qYtzbDZWuETm2CSBrcEbNXw5rO1kptma8H0x28/MIL/GPxTpetoHy9jfx99K2EBNR/q4paqTGzaKbmUQ6eCOaN7vF69M1rqMfrwRPBbCivQ+sgfL2Nl90W0NfbSGyrQNX1ApNhXkD8ZyhZnqO7eY3t85MIpoweZcwsgql2XkOZ2ucdcUy180QwNY/q5MmkBm2mWFZWxvnz5694/ty5c1y4cHG1TZMmTVy27auv7HY7RUVFSr8oQCkuXa54VqvalUoHDhxQjlVVVZGbm4vdbqe8vOav8HU6HRUVFZw4cUIZ9+OPP+J0OpVtAc+cOQPgMgZgz549AIwdO5ZevXopK5x+/PFHZUxxcTGFhYXY7XaXTAUFBSQkJNC5c2ceeeQRVqxY4ZKp1t/q1avp2rUr3bt356WXXuLYsWMunhISEiguLqZfv368/PLLACxatIgDBw6QkJAA1BT4/vCHP6DT6TAYDC4Znn32WZKTk6/4HDVp0qSpMWje2ow6TWcPnLDw/pqM38mRplr9+pctUNOv5LVFO38nR5o0abqRlTS4Y51VCB0ia375run31d9H34qvt+vfUNb+Al7T7yftM5Q6pX1+0qRJ09Wkfd7RpEnTjaIGrbC67bbbWLx4MV26dKFPnz4u57799lsWL17M7bffrhw7ePAgLVq0aMgtgZpVVF5eXi79qWbNmoW/vz/V1Rc/wI0ePZq8vDw2bdoEwIULFzAYDCxatIiQkBDatWvHihUrlJVOxcXFeHl5odPp8Pf359lnn2XChAmUlZUxa9YsgoKC0Ol0yliAzMxMpk2bpqxY2rFjh3LvyMhIpk2bhoeHB7NmzUKv1xMWFsb777+Pp6enUoCCmmKU0+nE09OTZ599lvXr1zNv3jyaNGmiZLJYav7RcO7cOQYMGECbNm1YuHChi39AKYRZLBbuvPNOvv32W9577z1atGjBvffeC0BISAjR0dGYTCY6d+7Mbbfdxvr168nMzOT48ePcdNNN9Z6fhjSavlHVGBr4aR7VxxPBlMWj1nRWvR73HT9/2b+mh5pfuhzMLiQuqmm92GrNLJKpeVSnRxkzi2C6i+fv68kLI+I5V1SG1Y3bm13qTW2ZRTLdyWse7Mvcv97FgRMWss+5d4uzS/9fbTwRTHfxRH6Gkuk5upvXmD4/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lE2NahgNWXKFEaNGsXTTz9NWFiYsuVdTk4O+fn5hIeHK/2RKioqOH36NEOHDm2waZ1Ox9ChQzl8+LDSn2rIkCF1trhzOBwuq5gA9Ho9zzzzDAsXLsRisRAbG8tTTz3F22+/7cJPSEigoqKCCRMmYDQaueeee8jPz1d6WNXqr3/9K2vWrOHTTz/FbDbj4eFBVVUV7du3Z9CgQaxatYpdu3aRmJjIm2++ic1mIz4+nkceeYTFixcrz8bpdBIXF4evry9z5szBx8cHX19fpUAGsG/fPgCeeOIJvvrqKzZt2kRYWBinTp1y8ZSSkkLnzp1p27YtX3zxBQChoaEYjUZllRnA999/z913361kb9GiBZMnT2bz5s1UVVUpq8muRXq9zi2NHG9UNYYGfppH9fFEMG90j1rTWXHMhvLyLDlXPZ9bcIGEW1o16B5qy3w9mJpHOXgimGr2mHuulCMH8wkPNhMe4ttgnsjPqDLNiwher0AzvdxGuyg1ZxbFbCjvenyGkuE5upvXGD8/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lEWNahgFR4ezrp161i5ciVpaWnk5uYCNb2jRo8ezSOPPKL0cvL09GT+/PkNdwz4+flhMplc+lMBfP755/j7+ytfL1mypM51VVVVjBkzhqSkJOX4xx9/rKyqqh3ncDiYPXu2y/XDhg2jefPmAMrY9u3bs27dOgBGjBiBh4cH27dvV8536NCBXbt20aVLF1599VWF9dZbbyljdu/eDUC7du14/fXXlTEzZsxg8eLFyrjMzEwAEhMTmThxIgBOp5NbbrkFm82Gv78/lZWV7Nixg4kTJzJmzBhuvfVWJk+ezMSJE3nhhRc4deoUERERHD9+nF27drF8+XLlfomJiXh7e/OXv/yF7OxsoqOj/8dM1JXD4cRqvfC/B0omg0H9Dfw0j+rjiWDK4lFrOqtej+FBV//Q1qKpT73nRq2ZRTI1j+r0KGNmdzJLy6pIWZ1OeubFVR5xUUE8/VAcZu/6989Rc2ZRPBFMGT3KlFnkZyiZnqO7eY3p85MIpoweZcwsgql2ngim5lGdHmXMLIIpq8cbQX5+3r951VmDClYA3t7ejB07lrFjxzYU9ZsVFRWlFG9qVVJSwrlz54iKirrqdQBZWVnExMQoxzMzMwkPD8fLy0sZd+TIEZdrnU4nWVlZ9OpV87d/rVq1wsPDg8zMTHr37q1wbr31Vpd71d6ntpfVpfesHVObpbCw0GVMixYtcDgcysq12vOXXqvT6QgMDKSsrIyWLVuSnZ1NVVVVnecQGRmpXBsREcHevXuBmm0SH3roIQ4fPkxoaCjdunW74vP7rdIayl1ZjaGBn+ZRfTwRzBvdo9Z0VhyzobwOrYPw9TZedlsbX28jsa0CG+xXbZmvB1PzKAdPBFONHueuSq/TPycjy8K7q9KZ8EiXBrpTZ2bRPBFMGT3KkPl6fIaS4Tm6m9cYPz+JYMroUcbMIphq54lgah7l4Ilgah7VyZNJDS5Y/R5KSEjgvffew2q14ufnB8CXX36JXq9XCkoAx48fZ/r06ezZswez2czAgQPx9fVlw4YNSiGpqqqKr776ioSEBOW63r17s2bNGu644w6sViuxsbEMHDiQoqIi7rzzTgBMJhNdunRh7ty5vP3223h4eGC1Wjlx4gTR0dFEREQAcMcddwDwySefsHLlSsLDwxk5ciRpaWk8/fTTAFitVgwGA1u2bKFHjx6UlZXRtWtXAgICAOjUqRNQ0+fKbDYzdepUJk6ciNlsZvDgwVitVvz9/TGZTEofq507d/KPf/yDnJyarQNqC2a158+fPw/Ak08+CdSsgDObzaxduxYvLy9atWpV7/nReljVVWPYD1XzqD6eCKZMHpMT45j7q7/Q79im5i/0G/pzSq2ZRTLdyZv2WHemLfzJ5Zcuvt5Gpj3WvUFzo+bMopiaR3V6lDGzu5ha/xzNoww8EUx38kR9hpLtObqb11g+P4lgyuhRxswimGrniWBqHtXpUcbMIpiyepRNDS5Y/fDDD3z66afk5ORgtVrr9HjS6XR8/fXXDb2Ni4YNG8aSJUtITk4mKSmJ/Px8Zs2axbBhwwgLCwNqCjODBw9Gr9fz7rvvkp+fz8yZM7nppptYuHAhQUFBtGvXjhUrVlBUVMS4ceMU/qU9oZ566im++eYbXn/9dW677TY6d+4M1BS6zpw5Q1FREQkJCcTFxfHuu+9y5MgRl35YtayKigrGjh1Lfn4+r732Gv7+/gwbNkwZ53DUVFz9/f0ZPXo0q1evZtu2bQAEBwcrYyorK7HZbAwaNIjQ0FAWL15MVVUVcXFxLs/o/fff59577yU6OppvvvmGadOmAXDmzBmgpvgFNYW3xx57DJvNxsqVK5Vj9elfBVoPq/+lxrAfquZRfTwRTBk8BgbC68m9yTtXSt55m9t6oFwqtWW+Hkx38AIDzayYPpA9h89y6KSFmNZBdG0f6gZ3NVJjZtFMzaMcPBFMtXnU+ueI4YlgyuhRlsyiP0PJ8hzdzWtsn59EMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkBhWsPvjgA958802aNm1K586dad++vbt8XVX+/v4sWrSI1157jeTkZMxmM0OGDOG5555TxqxcuRK73U5wcLCyZZ/dbmfatGk8/vjjLFy4EIvFQmxsLAsWLFC23auoqOCDDz7g0Ucf5ezZs8ybNw+DwYC3t7cyBmDjxo2cOnWKqVOnsmLFCrZv345Op8PpdNKiRQtlXEpKCkajkdjYWD777DNsNhvBwcF4e3vTpEkTZZzT6eTJJ59k7969pKSk4O3tjV6vx+FwKD2sKisrcTgcvPTSSyxdupS8vDyCgoLIz88nJCREeTZQU3T66quvXPgAhw8fBuDQoUPAxd5iOp2OiIgIjh07htVqpbS0FF/fa/9HidbD6vIyGNS/H2pj8LhuaxaHsovo0DqAgT3bNJjXGDJrHhvO8/U0cGtsGFZrWYP6Vl0qtWcWwRThMbp5E7q2D3Xb3DSGzDJ6PFtURkm5HT8vAyEB7vnQrvbn2BjmRa0etf45mkc1epQxM7j/M5SMz1HGz08imDJ6lDGzCKbaeSKYmkd1epQxswimrB5vBPn5XaceVosXL+b2229n3rx59V6RU19FR0eTmpp6xfNbtmyhT58+zJ07VznWv39/pk6dSps2bdi8efNlr9u9ezelpaUMGTKE2NhY5fiMGTPYtGmTC799+/YMHz6c4cOHAzBixAj27t3L5s2b6dy5M5WVlfz4449UV1czfPhwEhMTAfjmm294+umnOXXqFBEREVitVqBm+8BLi2733HMPZ86cUXprVVdX4+3tzahRoxg1ahRQs5Kse/fu6HQ1/7iv/f8HH3yQf/zjHwrr5Zdf5pNPPiE5ORmA0tJSJVefPn2UcV26dKGsrIzKysorPtv/JW1/ziurMeyHqkaPB05Y+NfKX5Sv048X8NG3x3lheBdiWgU10KE6M4tmyuhRxswimDJ6lDGzO5ilZVXMW5vhsr1bpzZBJA3uiNnLPZ8b1f4c1TgvonkNZWr9c8TwRDBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT8ygHTwRT86hOnkxq0GaKVquV++6777oXq36LMjMziYqKcjnm5+dHSEgImZmZV70OqHNtdHQ0eXl5ylZ6l+MnJCRgt9uVVUzZ2dlUV1fX6a0VHR3tci+73Y5OpyMtLU0ZU1VVRWFhIXr9xSkqLy+ntLSUEydOKMcyMjIAlNVQtVsQ1vasqlVtL6varQe9vb0xGo3KtoO1cjgceHh4EBTU8CKAJk3u0qXFqks1a/nlj2vSpEmTpt9f89ZmcOCExeXYgRMW3l+T8Ts50tRYlDS4Ix0iXT+LdoisKXZq0qRJkyZNmjRp0qRJk6YbVw1aYRUXF0dWVpa7vFyTjh8/zvTp09mzZw9ms5nBgwfzl7/8BZPJBNQU0/z8/Opc5+/vz48//shdd92lbAk4efJkunTpolxnMpkoKipi+vTppKWl4eHhQWxsLE6nk+LiYry8vLBarTRp0oRvv/2Wt99+m6ysLMLCwtDpdOzYsYO0tDR27NgBQN++fUlNTWXt2rXYbDaMxprHXlxcDMCFCxcICAhgwYIFfPnll+Tl5QE1K6pqV0xBTcEqODiYUaNG4XQ6KSgoQKfT4enpiaenpwvz4MGD9OnTB6vVSmVlpbJiqvZ8VFQUO3bsYOnSpXzxxReUlJRgMBioqKhocLGqIQ1fb1Q1hgZ+avW45ocrF5gBNvx4kkF31G97QLVmFsmU0aOMmUUwZfQoY2Z3MU8X2FxWVtXK4YT9WRbOW8tpFuTzu3psTDwRTDV79Pf15IUR8ZwrKsPqxu0k1ZxZFE8EU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9yqYGFaymTZvGE088QadOnRg0aJC7PP1PFRcXM3r0aCIjI5k9ezb5+fnMnDmT8vJypkyZctVrLRYLFouFSZMm0b59e5YtW8Zjjz3GmjVrXHpUPf744wC8+eablJeX88orr9RhnTt3jmeeeYYhQ4bw4osv8uOPP5KSkkJ1dTXJyclK8ay6uppPPvmESZMmERYWxvjx4wGU1VoATZo0obKyktzcXJxOJ82bN+fMmTPY7XZljE6n4+abb+abb77Bw8MDT09PwsLCyMrKoqCgwMWbt7c3FouF6upqgoKC8PLyIjs7m+zsbOLj4xk6dCipqak4HA7Ky8ux2+1Knyubrf77YOv1ugY3wr6R1Rga+KnN45FTxVc9fyiniFEqa74uw7xcD6baeSKYmkc5eCKYavN4/EzpVc/bKh1u+byg9ueotnm5Hjx3MkV9plRzZlE8EUwZPcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLD6y1/+QnV1NS+88ALTpk2jWbNmLlvYQU2RZe3atQ0y+WutXLkSm83GnDlzCAgIAGq21XvllVdISkoiLCwMPz8/SkpKXK6rqKjAYrFw8803M2bMGABuueUW7r//fhYsWMC0adPw8/OjsrKSI0eOsGHDBmXbvz179rB48WKys7MV/r59++jcuTOvvvoqALfffjsrV66kqqqKPXv2cOzYMQYOHMjmzZuZNm0aQ4YMASA1NZWhQ4eyZ88ehg4dip+fHwUFBTidTtLS0pRMTzzxBFu2bCE/P1+5586dO3nggQd48803lVydOnXiyJEjQM0KMqgpOn3zzTdEREQA8N133/Hkk0+Snp7OH/7wB6KiooiOjiYzM1MpUPXt25fs7GwOHTrE6dOnad68+TXPjcPhxGq9cM3X3egyGNTfwE+tHttF+JN+vOCK52NaBqim+bpM8yKSqXaeCKbmUZ0eZczsLqaPh+6q580mfYMaxav9Oap1XkTyAM4WlVHi5hVRas+seZTDo4yZRTBl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWjzeC/Py8f/OqswYVrAICAggICKB169YNwVyztmzZQo8ePZTCDkD//v2ZOnUqW7duJTExkaioqDq9qtLS0nA6nfTu3Vs5ZjKZuOeee9i0aRNwsXdVZGSkS48qvV6PTqfjxx9/pFu3bkRGRpKens4TTzyhjHE6nVRWVmKz2Th16hStWrXCYDBgt9u5//77lXHnz58H4NixY8o9bTYbCQkJLpkMBgOAkqlFixakp6fTv39/ZUxJSQlVVVXk5eVRWVnp4rlJkybKf9f2sLp0m8SmTZvSokUL/vrXv+Lv709YWBh33nmnkqW+0hrKXVmNoYGf2jwO7BHJfzdfeVvA/re3Vl3zdRnm5Xow1c4TwdQ8ysETwVSbxxB/b2JaBXAou6jOudjWAQT7ebnFr9qfo9rmRRSvtKyKeWszXLaB7NSmpueU2avhvW7VmFk0U/MoB08EU/MoB08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLGlSwWrJkibt8XJMyMzN5+OGHXY75+fkREhKiFKkSEhJISUlh5MiRpKenYzablWLN4MGDXa6Njo5m0aJFlJeXEx8fj16vx2q1Kn2uYmJiyMvLo2nTpgo/JiaGdevWsWbNGt555x08PDzo3LmzslopMzOTiIgImjVrxpkzZxg5ciRZWVmEh4cTEBBAYGAgubm5ANxxxx1ATV+uXr16YbPZiIuLU3zX3vOmm24iPT2dlJQUJk6ciNlsJiYmBr1ej91uJycnh+joaFq1akVeXh533303FRUVtGrVioKCAgwGA4mJiQBUVlbi5eXF5s2b2bZtG5WVlbz44oucOXOG2267jfDwcBFTp0lTvfTC8C7MWv7LZY9r0qRJkyZ16oprrOr/NzGaVKp5azM4cMLicuzACQvvr8lgwiNdfh9TmjRp0qRJkyZNmjRp0qSp0alBBavfS1ar1WWlUK38/f0pLq7pdzNgwADefvttDh48yJNPPsnJkydZtWoVgEuvqtGjR3Ps2DGcTifFxcWEhYVhMpmwWCz069eP+Ph4UlNTOXfuHDfffLPCj42NBeDo0aM88cQTlJaWsnTpUgIDAyksLFTGhYSEkJubS3V1NRMmTGDTpk3s2rWLW2+9lb179wLQrFkzAHJzc0lMTKR169bMnz9fKTbVsmJiYgDIyspyydSqVSuys7OVcQkJCSxdupSqqiqqqqo4fvw4ULPFYG32wsJCtm3bRlhYGGfOnAHg9ddfp1WrVrz//vsNmh+jUWsq92s1hgZ+avbYKSqYxS/344ttWRw4WUSH1gEM7NmmwVw1ZxbFlNGjjJlFMGX0KGNmdzFPF9g4eJnVVQAHs4s4by2nWZBPvflqf45qnRcRvNMFNpeVVbVyOGF/lqVBc63WzCKZmkd1epQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6lE2XVPB6ueffwagW7duLl//L9WOv55av349JpOJmJgYUlJSMJvNSmGnticUgMPhwOG4uDyvoqKC8vJyQkJC2L9/P1u2bCEmJgaHw8G5c+fw9fUFYPfu3UBNESk1NRWj0UjPnj1JS0tz8XH69GmMRiMGg4E333yT8PBwOnfuTFZWljKmtmDUqlUrNm/ejM1mo3Pnzuzbt8+lD9cvv/wCQNu2bZVMvXr1Ytu2bS733Lx5M56enuh0OoxGI82aNaOwsJDU1FQeeughoqOj8ff3JyoqikOHDqHX63E4HDz44IPs2rWLv/zlL7z33nvodFfvP3E56fU6YQ2ybwQ1hgZ+avb46MBObmNdKjVnFsWUyeNXO06SfuwcN7cNoV9392xh+9HXh9l75Bxd24cytG87tzBBrnkRyVQ7TwRTbR6Pnym96nlbpcMtnxfU/hzVNi8ieNdjrtWW+XowNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LomgpWI0eORKfTsXfvXkwmk/L1leR0OtHpdBw8eLDBRi+Vn5+fSyGnVsXFxfj7+wM1fa569erF3LlzlfMLFixg1qxZfP/99zzyyCNAzbaGH3/8MVOmTMHf39+lEPXBBx8o186YMYPly5fTpUsXADIyMgAYP3680hPL6XTSrVs3SkpK8Pf3p7KyknPnzuHt7c26desU1jfffMPTTz9NYGAggFLk6tOnDy+++KIy7plnnuH7779XMh09elTxUturymq1KgVBf39/cnJyyMnJwd/fn7S0NEwmEwCpqanMmDGD2bNn8/bbb+Pp6cnZs2cZO3Ys7dq1Y/LkyUyePJmsrCyGDx/O1q1bla0Kr0UOhxOr9cI1X3ejy2BQfwM/zaP6eCKYMnnMyivm1dSfqUV8vzuXOZ/8wtSx3YlsXneV7m9RRlYBbyzbo3ydfryAxesPMvnReGIjg+rtVaZ5EclUO08EU60efTyu/kcvZpOewkJbvdig/ueo1nkRwRM512rNLJKpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ9dUsFq8eDGAUgSp/fp6KyoqSunrVKuSkhLOnTunFHIu1+eqQ4cOQM1KpdqCVe3Y8PBwvLy8FO65c+fq3LOyspJWrVoBcPbsWXQ6HZmZmUrBSqfTERISQklJCVFRUWRnZ+NwOLhw4YJLMS06Ohqo2S6w9v5Go1HpaVWriIgIqqqqlEy1njIzM5Vjfn5++Pr6cuHCBVq2bMn27dsBiIyMVOYJalZlAcr2gBaLRenPdekKs9pnlJ2dfblH/5ukNZS7shpDAz/No/p4IpgyeLy0WKUwHfDKhz8x/4W768W8tFh1qWYs3c3CSfVjXioZ5uV6MNXOE8FUm8cQf286tQniwAkLjkt6Vul10CEyiGA/L7f4VftzVNu8iOBdj7lWW+brwdQ8ysETwdQ8ysETwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KomsqWHXv3v2qX18vJSQk8N5777n0svryyy/R6/X06tULqFl5VFVVxdixY9mzZw9ms5mBAwei1+tdVnxVVVXx1VdfkZCQoFxnMBg4dOgQd9xxB1arldjYWKVAVLvCqrS0lODgYObOncvbb7+Nh4cH99xzDyUlJXh7exMREcGuXbuU+wwaNIjCwkLCw8NJTEwELhaRrFYrvr6+fP/99/To0YOysjK6du2KXl9TdazNZLPZ8PPzY+rUqUycOBGz2czgwYOprq4mNDQUk8mk9LHav38/Xbp0Qa/X06ZNG/r27QtAUFCQ8v/e3t4cOHBA6Y3Vo0cP7rnnHgBatGhR7/nReljVVWPYD1XzqD6eCKYsHr/fc6pOsapWdgds23+ahC7X9nNuzQ+ZVz2/4ceTDLqjfr3VZJkX0Uy180Qw1ewxOTGOuavTSc+0KMc6tgni6YfiGvxZQe3PUc3zIoInaq7VnFkUU/OoTo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY/q9ChjZhFMWT3KpmsqWP1a1dXVlJeXK32dfq3S0lK8vLwwGht0mzoaNmwYS5YsITk5maSkJPLz85k1axbDhg1TelM5nU4WL16Mh4cH7777Lvn5+cycORMPDw8OHz7MokWLaNeuHStWrKCoqIhx48YpfKfz4p+HPvXUU3zzzTesWrUKuLgCyel0cuHCBWw2GwkJCcTFxZGamorNZqN9+/Yufp1OJxaLhVGjRpGfn8+///1vAG6//XZlTHV1NXa7HX9/f0aPHs3q1as5ceIEOp3OJVNFRQVWq5VBgwYRGhrK4sWLqaqqUnxdes82bdpw991388svv/DOO+8AcO+99wI1q8H++Mc/snz5crp27QrUrJzbunUrbdu2pUePHvWaG62H1dXVGPZD1TyqjyeCeaN7zDxdd9vYS3Usz8rgPtfWe+rIqeKrnj+UU8QolfVpEcGU0aOMmd3BDAyE15N7k3eulLzzNsKDzYSHXP4zY32l9ueoxnkRwRM912rMLJqpeZSDJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUNqiRNnz6dnTt38vnnn1/2/J/+9Cduu+02Xn755Ybcpo78/f1ZtGgRr732GsnJyZjNZoYMGcJzzz2njDGZTJSVlREUFKRs2We325kyZQo333wzCxcuxGKxEBsby4IFC2jZsiUAPj4+OBwOhg0bhsViYd68eRgMBkwmE5WVlcq2fk6nE5vNxtSpU1mxYgXbt28nMDAQm81G06ZNFZ9Qs53gXXfdxWeffYbNZiMwMJDCwkKaNWsGgMFgoLS0lGeffZadO3eSkpKCt7c3Op0OLy8vl0xVVVW89NJLLF26lLy8PIKCgsjPz1dYtff8xz/+werVq1m6dCkOhwMvLy/Ky8tdimQTJ04kKCiIOXPmADVFsxYtWvDBBx+4bCd4LdJ6WF1eBoP690PVPKqPJ4Ipi8eo5k34/irnbwr3u+aeKu0i/Ek/XnDF8zEtA1TTp0UEU0aPMmYWwfT1NHBrbBhWa1mD+lZdKrU/x8YwLxlZFnLO22gVYqZDA3rwXSp3z7WM86J5VKdHGTOLYMroUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwebwT5+QnqYfVr/fDDD/zhD3+44vn77ruPtWvXNuQWV1R0dDSpqalXPG8wGAgJCeH7779XjtUWrtq2bcvHH3982euqq6sBuO222xgwYIByPDExkUOHDikFJKfTiaenJ8OHD2f48OEAOBwOOnToQFVVFYBSRIqKiuJvf/sbf/vb3wB46623eO+99/Dx8QGgoqICgMGDB/PMM8+4+C0rK3PJ5Ovry6hRoxg1ahQAeXl59OnTh8rKSuVeUNPbasmSJcq1DzzwAMeOHVMKc1BTAAsPD8fHx4fi4mJCQ0Pp2bOn4ru+0vbnvLIaw36omkf18UQwb3SPd8SFs2jDoctuC2jQQ89Oza+ZPbBHJP/dfOVtAfvf3lp1fVpEMGX0KGNmEUwZPaoxc37hBf6xeCelZdXKMV9vI38ffSshAT7usKj656jGeRHNE8GU0aOMmUUwZfQoY2YRTBk9yphZBFPtPBFMzaMcPBFMzaM6eTKpQQWrs2fPKtvVXU6hoaHk5+c35Bb1lt1up6ioyKXP1datWwHqrB46fvw406dPZ8+ePeh0OgDS09OVglVVVRW5ubnY7XbKy8vx8vJCp9NRUVHBG2+8wYYNG7BYLEREROB0OvHw8ADgzJkzCv/ZZ58lLS0NDw8PpVB14ULNSiRPT08APvzwQ3bu3ElWVhZhYWEUFBQo52ozFRYW8uqrr7Jx40ZsNhvh4eEumVq2bElERARvvPEG//nPfzh69CghISGcPXuWmJgYl+zDhw936bN15swZli1bxogRI4iOjm7wHGjSpEnT76mXR9/K9EU7XYpWBn3N8frqheFdmLX8l8se16RJk6bGoF8XqwBKy6p5bdFO/jM+4XdypUmTJk2aNGnSpEmTJk2aNDWwYBUQEEBWVtYVzx8/fvyK/a1Eq7KyEi8vrzp9rvz9/ZVVVAAjRoxg9+7d3HLLLcyePZuVK1fy9ddfk5qaSrNmzZQ+V7UrnYqLi5WClZeXFwsXLmTIkCGEhoaycOFC4OKKqeLimn4nWVlZWCwWkpKSyMjI4KuvvnI5b7fb8fHxYdmyZXTr1o0///nPLF++XCmQXZpJp9OxcuVKRo4cCcCSJUvQ6XQuK7HuuecePvzwQ+Xr/Px89Ho9U6dOdXlGmZmZeHl5ceedd7Jx40aaNGlCVFSU4r++amgj9RtRjaGBn+ZRfTwRTJk8RrcI4MMX+5G2L48jp4ppF+HPHZ3DG8TsFBXM4pf78cW2LA6cLKJD6wAG9mzTICbINS8imWrniWBqHtXpUa2Z9x0/X6dYVavSsmoOZhcSF9W03ny1P0e1zotIngimjB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnqUTQ0qWPXu3ZuVK1cyaNAgOnTo4HIuIyODjz/+mPvvv79BBusrnU7H0KFDOXz4sEufqy1btriMO3PmDE6nkzlz5hAQEMD+/fv57rvvcDgczJ8/n+LiYmJjY3nqqad4++23Xa6tqqqiTZs2rF+/HqPRSP/+/fniiy/Izc2t4ycwMJA5c+YQHh7OsGHDWLlyJdnZ2cp5h8NBaGgomZmZpKenEx8fj5eXF5mZrttPVVdXExUVxcqVKzGbzQwbNoxly5Zx/PhxZcwLL7xAREQE8+fPJz8/H51Oh06n49ixY3Tt2hWAo0ePUlhYCMDGjRsBKCkpYe/evXzzzTd15vO3Sq/XERhorte1MqgxNPDTPKqPJ4Ipk8dBd7Z1C+dSPTqwk9uZINe8iGSqnSeCKZvHMa9+SUFxBcEBnnz4d/d91lRzZncw8yw5Vz2fW3CBhFta1ZtfK7U/R7XNy/XgiWDK6FHGzCKYMnqULfNHXx9m75FzdG0fytC+7dzGle05iuCJYMroUcbMIpgyepQxswimrB5lUYMKVuPHj+eHH35g6NCh3H333dx0001ATTHku+++IygoiPHjx7vF6LXKz88Pk8lUp8/V559/jr+/v/J1s2bNaN++PQEBAcp1drsdgAkTJpCYmAjAxx9/jE6nU641GAzY7XbeeustYmNjFd7WrVuVlVO1YyMiIpSiENSsuFq5cqWyOs3X15fy8nKee+45xowZo4z785//TGZmJqdOnSIiIgIvLy9sNhsrV650yfDpp59y7tw55Wu9Xs+jjz7Ko48+yqRJk9i/fz89e/Zk5syZJCYmYjAYmDlzJk2bNiUmJkYpxD344IP07t2b0aNH43A40OuvvRLscDixWi9c83U3ugwG9Tfw0zyqjyeCKaNHGTOLYMroUcbMIpju5L332T627T+rfH2+qIJBf11DQudmPP5g/QvJas7sTmZ40NX/0dSiqQ+FhbZ6sUH9z1Gt8yKSJ4Ipo0cZM4tgyuhRtswZWQW8sWyP8nX68QIWrz/I5EfjiY0MUoVHUUy180QwZfQoY2YRTBk9yphZBFNWjzeC/Py8f/OqswYVrMLCwvjvf//Lm2++yTfffMOmTZuAmgLMoEGDeO65567a40qkoqKi6qxOKikp4dy5c0RFRSnHMjMzefjhh12ug5oVUZden5mZSXh4OF5eXgA0adLEZTyA0+mktLSUsrIyysvLadWqFTqdThlbq9pCVUlJCVBTJAPqPKvS0lLl3hEREfj6+lJVVeVSrCopKaGiokJhXUkdO3Zk0aJFWCwWQkJCyMrKoqCggK1bt9KtWzdl3Mcff8zHH3/M+vXr693HSmsod2U1hgZ+mkf18UQwZfQoY2YRTBk9yphZBNMdvEuLVZdqy74zjBlQv9Xhl0qNmd3J7NA6CF9v42W3BfT1NhLbKtAtftX+HNU2L9eDJ4Ipo0cZM4tgyuhRlsyXFqsu1Yylu1k46e4GsUGe5yiSJ4Ipo0cZM4tgyuhRxswimLJ6lEUNKlgBhIaG8sYbb+B0OrFYLAAEBQWh0+kabK4hSkhIICUlhZEjR5Keno7ZbCYmJga9Xk+vXr2UcVarVSkYAcTHxysrnpYuXcrixYuJiYkhLy+Pfv36KePCw8PZt28fTz31FHv27MHDw4POnTtz4ULN6qLi4mLCwsLw9PQkNzeXBx98kKysLMLDwwkICMDHx4eqqioAZWXawoULmT59Ojabjbi4OPbt26ewAEJCQjh79mydTDqdzqXXFcAnn3zCBx98QHZ2NkajkXXr1uHr60tgYCAAkyZN4tlnn73ss/v73/9OeHj9+7xoPazqqjHsh6p5VB9PBFNGjzJmFsGU0aOMmUUw3cUb//bmq56f+O4PvD3+znqx1ZpZBHPaY92ZtvAnl6KVr7eRaY91b/BnOLU/RzXPiyieCKaMHmXMLIIpo0eZMq/5IfOq5zf8eJJBd9Sv96tMz1EUTwRTRo8yZhbBlNGjjJlFMGX1KJsaXLCqlU6no2nT+jdpdrcGDBjA22+/zcGDB3nyySc5efIkq1atol27di4rmex2OwsWLOD//u//APD09KRLly6kpaXRokULRowYQWpqKufOnaN///7KdbUrq37++Wcef/xxSktLWbp0Kb6+vsrKKABvb28KCwsJCQlhwoQJbNq0iV27dhEREaGMqX1u+/btIzExkdatWzN//nwqKytdMrVs2ZL9+/fXyRQcHKwUtQ4dOsTf/vY3Dh06xKBBg/D29ubYsWP88MMPDB8+HKOxZspre1RNmDCB2267DYDk5GRsNhubN2/m0Ucfrddz13pYXV2NYT9UzaP6eCKYMnqUMbMIpoweZcwsgtlQXmFp1VXPW0qqGvwZRG2ZRTADA82smD6QPYfPcuikhZjWQXRtH+omdzVS+3NU47yI5olgyuhRxswimDJ6lCHzkVPFVz1/KKeIUSp7nxbBVDtPBFNGjzJmFsGU0aOMmUUwZfUoi66pYDVnzhx0Oh1PPfUUer2eOXPm/M9rdDodycnJ9TZYX61fvx6TyURMTAwpKSmYzWZ69erF9u3byc/PV4pWBoOB6uqLf2FaUVGhrJgqKiri7bffJiYmBofDwYYNG5TizpkzZwC45ZZbSE1NxWg00rNnT9LS0lx6XVVVVWE2mzEYDLz55puEh4fTuXNnjhw5QufOnZUxUFNE2rx5Mzabjc6dO/PLL79QWVmpsGr7VP0609atWwkKCgIgODiYnJwcvL292bhxIzqdDqPRSEREBKdOnarznFq3bk2XLl2AmmJdYGAgGRkZ9X7uWg+ry8tgUP9+qJrHhvMysizknLfRKsRMhwbs0X6p1J5ZBPNsURkl5Xb8vAyEBDT8Db4xZNY8qtOjjJlFMN31PR3o63HVolVQE49691+ScV6imzeha/tQrNayBvWtulRqf46NYV40j+r0KGNmEUwZPcqUuV2EP+nHC654PqZlgGrep0Uw1c4TwZTRo4yZRTBl9ChjZhFMWT3eCPLzE9TDqrZg9cQTT2AymVRdsNqyZQu9evVi7ty5yjGr1Ur37t3ZunUriYmJAHTp0oWAgABlzO7du7HZbOh0Ol599VVl3IwZM5QeXXCxYDVp0iRiYmKAmh5WN998MyaTCS8vLyorK7HZbISGhrJu3Trl2q+//prk5GSCg4MBOHHiBAD3338/SUlJyrhHHnmEX375RVnNdf78eQDeffddpYhltVrp1q2b8nVZWRk2m413332Xfv36MWnSJPbv388f//hHZs2aRWVlJSaT6bLP7Ntvv+WVV15h48aN1/Ko60jbn/PKagz7oWoer135hRf4x+KddbZX+vvoWwkJ8HGHRdVlFsEsLati3toM9mdZlGOd2gSRNLgjZi+P393f9WBqHuXgiWCq0aO7v6fffKY3j8389orn/5Xcu8HPQIZ5Ec0TwVQ7TwRT8ygHTwRT8ygHTwSzobyBPSL57+YrbwvY//bWqnufFsFUO08EU0aPMmYWwZTRo4yZRTBl9SiLrmkzxUOHDnHw4EGl4HHo0KH/+b+DBw8KMf6/lJmZqRR6auXn50dISAiZmRc/RCUkJLBt2zasVqtyHVCn11V0dDR5eXlKr6ji4mKMRiMbNmxQxlRXV+N0OpU+UdnZ2TidTs6ePasUpQDlXrXbAmZnZ+Ph4cHmza69GWw2GwaDQRlXVFSETqfjq6++UsY4nU50Oh0hISEu/tu0cd0bOjo6mqqqKnJyclyOT5s2jdjYWHr06MGkSZP49ttviYuLu8JT1aRJ0+X062IVQGlZNa8t2vk7OWqcmrc2gwMnLC7HDpyw8P6a+q/61KRJ0+8nEd/Td8SFXdNxTZo0adKkSdP10wvDu1zTcU2aNGnSpEmTpl+r3j2sKisr+eGHH2jRooWywkhNslqtVFVVMXbsWPbs2YPZbGbw4MH4+fkp/Z4Ahg0bxpIlS0hOTiYpKYnt27cDEBcXxyOPPILFYiE2NpaCggKcTifFxcV4eXlRWlpK27ZtmTdvHgsWLMBkMhEQEIDdbleKR7X3adq0KYMGDcLhcBAYGEhFRQWAso2f1WqladOm7N69m5tvvhmHw0FoaCinTp3CYDAoXmvvOXXqVF555RW8vb3x8fFBr9cTHh7ucs/333+fbdu2cf78eQwGg7LCKycnh+joaA4ePEiLFi3o06cPLVu2ZOHChaxevRqAt956q0HPvqENu29ENYYGfprH+mnf8fN1ilW1Ki2r5mB2IXFR9e/vp8bMIpinC2wuqzBq5XDC/iwL563lNAuq32o1tWYWyRPBlNGjjJndxRT1Pf1/g+P4v8FxPPefzRRYq2jq58Fbf76z3j5rJcu8iOSJYKqdJ4KpeVSnRxkzi2DK6FG2zJ2igln8cj++2JbFgZNFdGgdwMCebf73hf9Dsj1HETwRTBk9yphZBFNGjzJmFsGU1aNsqnfBysPDg/Hjx/PSSy+psmDldDr55JNP6NChA7NnzyY/P5+ZM2ei17u+WPz9/Vm0aBGvvfYaycnJ6PV6dDod+/fv5/nnn6d9+/YsW7aMvXv31uHn5uYSFBSE3W6npKSEs2fP4uPjg5eXl8vYgoICWrRowfnz5ykuLqaysrKOX5vNhpeXF35+fhQUFHDu3Dk8PT2x2+0u9zx16hTBwcFcuHBB2f7P29u7zjZ/a9asUf67urpaKUbV9rFq164dbdq0YePGjVgsFhyOi0sUa7c7rI/0el2DG57fyGoMDfw0j9emPEvOVc/nFlwg4ZZW9ebXSk2ZRTCPnym96nlbpaPBP1vUlvl68EQwZfQoY+aGMkV/T6dOHVDva6+mG31ergdPBFPtPBFMzaMcPBFMzaMcPBFMd/IeHdjJbaxLJdtzFMETwZTRo4yZRTBl9ChjZhFMWT3KonoXrHQ6HZGRkRQWFrrTj9tkMpmoqKhgzpw5So8qu93OlClTMBpdY0dHR5OamgrAokWLeP311xk9ejRjxowB4JZbbqF3794UFRUpvaKcTidWq5UNGzYoWw+mpaUxbtw4nE4ngDI2KiqK9evXK/dLSkri+++/V84bDAZKSkp49dVXeeSRR4Ca7f969erlUogymUxUVlaydu1aJdNHH33kkqmWedddd/H+++8r1w4YMIDjx4/To0cPAFq3bs2CBQuorKzkgQceICkpiRdffBEPDw8yMjIYMKB+vwhyOJxYrRfqde2NLINB/Q38NI/144UHXf0NqEVTnwY1s1djZhFMHw/dVc+bTXpVNWn+YW8eR3OLaRfhzx2dwxvMU+u8iOSJYKqdJ4KpVo8iv6fB/bnPFpVRUm7Hz8tASEDD/2Gh1nkRyRPBVDsPtNeOLB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53Vu2AFNYWXmTNncv/999fpF/V7y2Aw4OvrqxR2AHr37g1w2RVOtaqurtnaq1Oni38RZDKZaNGiBSUlJcrqKafTiaenp0vunj17otPpqKqqAqBZs2ZA3X5SMTExfP/99/j41GyFU7tFYM+ePZUxAQEBBAUFUVZWdk2Zav21a9fO5Z7R0dEcP35c8VSrBQsW4OfnR2JiIi+++OIVn8u1SGsod2U1hgZ+msdrU4fWQfh6Gy+7LaCvt5HYVoFu8aqmzCKYIf7edGoTxIETFhzOi8f1OugQGUSwn5cqmjRnnbby+pKd1H7m+H53Lgs+P8DLo2+ldZhfg9ju8iiaKaNHGTM3lBni701MqwAOZRfVORfbKsAt39PQ8NylZVXMW5vhsn1hpzZBJA3uiNnL43f3dz2YMnp0B0977cjpUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdSggtXevXsJCAhg0KBBdO/enRYtWtTZDg/g5Zdfbsht6iW73U5hYSEjR44kPT0ds9msbF346+3zLlXtSqXly5cza9YsLBYLMTExZGdnY7fbKS8vx8vLC51OR0VFBY899hh79uzBw8ODzp0743Q68fCo+Udq7dZ6GRkZPPjgg2RlZREeHq48owsXalYieXp6AjB16lQOHz6MzWYjLi4Oi8Xi4vW3ZCovLwdg9erVrFmzhvz8fF544QUyMzMVT9HR0QDk5eWRkpJC27ZtiY+PB6CqqkrpraVJk6bfpr+PvpXXFu10KVr5ehv5++hbf0dXjU9Jgzvy/hrXX/51iKz55Z9adGmxqlZ2B0xftJP5L9z9+5jSpEml0l1pkdXVF19dV81bm8GBExaXYwdOWHh/TQYTHuny+5jS1CikvXY0adKkSZMmTZo0adKkyf1qUMFq6dKlyn9v3779smN0Ot3vUrCqqKjA6XRy8OBBnnzySU6ePMmqVaswmUzKKiqA0aNHk5eXx6ZNm4CaIpJOp+Pnn3+mX79+xMfHk5qaSklJCQDFxcVKwUmv1/Pzzz/z+OOPU1paytKlS/Hw8ED3/39DU1xcDMDp06fx8fFhwoQJbNq0iV27drmct9vteHh4sHXrVhITE2ndujXz58/Hbrcrq69+a6Za5rlz5+jYsSP5+fl89dVXZGVluZyfOXMm69evp7KykpCQENq3b89///tfzGYznTt3btCzNxq1pnK/VmNo4Kd5rL+aB/sy9693ceCEhexzNlqFmOkQ6Z7Cr1ozi2D6+3rywoh4zhWVYXXz9kru8Pf9nlN1ilW1sjtg2/7TJHRpUS+2mudFFE8EU+08EUy1ejxdYOPgyaLLnjt4sojz1nKaBfnUm+8uj5cWyGvlcML+LEuDPKp1XkTyRDDVytNeO/J5lDGzCKaMHmXMLIIpo0cZM4tgqp0ngql5VKdHGTOLYMrqUTY1qGB16NAhd/kQIoPBQExMDCkpKZjNZnr16sXWrVtdttlzOBzY7Xbl6+rqapxOJ927d2f//v1s2bKFmJgYysrKlKIVQFlZGQ6Hg27dupGamorRaKRnz56kpaW5jAMIDw/HYDDw5ptvEh4eTseOHcnIyFDOX7hwgaqqKnr27MnmzZux2Wx07tyZ3bt343C4/nb0t2QCmDRpEitWrAAgNzeX559/npkzZyrn7XY7+fn5eHh48MMPPxAWFgbA448/Trdu3er7yNHrdQ1qon6jKvdcKUcO5hMebCY8xNdtXFmbDKrVY69AM73cQqortWYWwRT1M6Sh/jJPl1z1/LE8K4P7tLvqmP8lNc+LKJ4Iptp5Iphq83j8TOlVz9sqHW75Xle7R7XNy/XgiWCqjae9dsTwRDDVzhPB1DzKwRPB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiBhWsanXkyBE2b95Mbm4uABERESQkJNTpo3Q9pdfradGihcsqMKvVSrdu3bBarcqxJUuWuFxXuwJp4sSJ3HzzzcrxsWPHsm3bNvz9/YGawlaTJk1YuHChMsbpdNKxY0eleFTbo6p37968+uqryrgVK1aQkZGh9Lqq9fP2228rfID777+fU6dOXVOm2uvvvPNOxo4dS/v27Rk7dqwyF7Xnv/rqKwA+++wzQkNDAZRCldVqxc+vfr1YHA4nVuuFel17I6q0rIqU1emkZ178K9y4qCCefigOs3f9+xsYDHI2GVS7Rxkzi2CqlRfVvAnfX+X8TeF+FBba6sWWcV5EMNXOE8FUq0cfj6vv+2c26ev9/QLq96jWeRHJE8FUK0977cjnUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUZ0eZcwsgimrxxtBfn7ev3nVWYMKVpWVlUyZMoU1a9bgdDrR62tu6nA4ePPNNxk0aBDTp0+/as+o6yndFZsp1G/slcY4nU63+bhWVlRUFACZmZnKf9d+7eHhQcuWLYGL/bUGDhzocv0777zDO++8w759+5TeWtcqraHcRc1dlV6nv0FGloV3V6W7pb+BrE0G1e5RxswimGrj3REXzqINhy67LaBBDz07NW+wXxnnRQRT7TwRTLV5DPH3plObIA6csOC45KOMXlfTmy7Yz8stftXuUW3zcj14Iphq42mvHTE8EUy180QwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoQQWrf/7zn3z22WcMHz6cRx99lFatWqHT6Th58iRLlixhxYoV+Pv789JLL7nL72+Ww+EgLy/PZbXQl19+CVBn9dDx48eZPn06e/bsUY599dVXSi+nqqoqZfvD2h5WRqOR8+fP88Ybb7BhwwYsFgsRERHY7XZlZdWFCzUrjXbu3Mmzzz5LWloaHh4e+PrWbAnn4eHh4uedd95h586dZGVlERYWRl5enkvBqjbTq6++ysaNG7HZbISHh7swWrZsSWRkJJ9++inLli0DYM6cOTRp0oTbbrtNKR726dOHtLQ0AEwmE+Xl5dRujThhwgTFW32k9bCqkdbfQD6PMmYWwVQzb+rY7rzy4U8uRSuDvuZ4Q372yTgvIphq54lgqtljcmIcc3+1yrhjm5pVxg39rKB2j2qeF1E8EUw187TXjlweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6lE0NKlitXbuWwYMHM2XKFJfjUVFRTJ06ldLSUtauXfu7FKx0Oh0mk4nk5GSSkpLIz89n1qxZ+Pv74+19cQ/JESNGsHv3bm655RZmz57NypUr+frrr1mwYAEhISG0a9eOFStWYLO5buvh4+ODl5cXCxcuZMiQIYSGhirbA/662HP8+HHOnz9PUlISGRkZynZ8l7J8fHxYtmwZ3bp1489//jPLly/Hbrcrq9ZqMzmdTlauXMnIkSOBmi0NdTodRuPFqRw3bhx///vflWJW06ZNycnJcdniMCQkhKqqKkwmE3/961+Jjo5m9OjRQM2WhUlJSfV67loPq4vS+huI4Ylgqp0ngimjR3fwAgPNfPbPwXz900n2Hj3HzW1D6Ne9tRvc1UjGeRHBVDtPBFONHgMD4fXk3uSdKyXvvM3tfRxB/R7VOC+ieSKYauRprx05PcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLCqrq52KYL8Wl27duW7775ryC3qLX9/f/r27cupU6dITk7GbDYzZMgQ1q1b59In6syZMzidTubMmUNAQADZ2dl8/fXXOJ1O5s+fT3FxMbGxsYwaNYoPPvhAudbPz4/c3FzatGnD+vXrMRqN9O/fn88//5xz584pHmoVGBjInDlzCA8PJzExkVWrVmGxWBRWZWUloaGhZGZmkp6eTnx8PEajUekLBtCkSROKioqIiopi5cqVmM1mhg0bxrJly1zGFRYWYjKZlGJXeXk5I0aMYOXKleTn5xMWFkaTJk0AePjhhxkxYoRybUhICPn5+fV+7loPq4vS+hvI51HGzCKYaucBdI8JpV/31litZQ3qw1MrGedFBFPtPBHM5ZsOc+hkER0iAxjWr70bHLrfo6+ngVtjw9z2/QLq99gYXjsyehSRWXvtyOHxbFEZJeV2/LwMhAS455cPas8sgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vBHk53edeljdcccdpKWlMXz48Mue/+GHH+jVq1dDblFvRUVFUVhYSGpqqnKspKSEDz/80KW3U7NmzWjfvj0BAQHKdVDTO2rChAkkJiYCMHPmTMLDw/Hy8gJqikx2u5233nqL2NhY5Zr169dTWFgIoGyRGBISwsaNG5V7fvPNN6xatYpTp04B0Lp1a6qrqxk+fDhPPfWUMu6RRx4hOzubU6dOERERQUBAAEVFRaxcuVIphpWUlLBs2TKlSAawZcsWevfuzdy5c2nfvj1jx45l6NChLF++nK1bt5KYmMhNN90EgNl8cYXP4cOHGTp0KOfPn2/Ak9d6WNVK628ghieCqXaeCKaMHmXMLIIpo0c1Zt51+Czvrt6vfH3iTAnrf8zhz0M60eWmUHdYlOI5NjaeCKaMHmXMLIIpg8fSsirmrc1w2ea7U5sgkgZ3xOxV/y3UL5XaMl8PpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNWgzxfHjx3Pq1CmeeeYZtm/fTm5uLrm5uWzbto3k5GTy8vIYP348RUVFLv9zh44fP87YsWPp0qULvXr1YtasWVRWVirnExIS2LZtG1arVTn25ZdfotfrOXHiBHfddRedO3fml19+UXpKAcTHx+Pr64u3tzfvv/8+Xbt2pVu3bnz00Uf06NFDGdesWTOgpj/Vgw8+SFxcHHfeeScVFRWUlJRQXl6OyWTCbDZTVVXFG2+8Qa9evejSpQtTpkzBZDIphaHWrWu2k8rKylIy9ejRg/37a34RlZmZCUBwcDA6nY41a9Zw3333ERcXx8CBA9HpdC7PNTMzk4iICF588UUA3nrrLV5++WWaNm2qsBISEtDpdHz44YfEx8fTqVMnEhISSE9Pp0WLFm6ZI02QNLgjHSKDXI51iKz5R64mTZo0adJUX11arLpU//n08sc1adKkSdO1a97aDA6csLgcO3DCwvtrMn4nR5o0adKkSZMmTZo03dhq0AqrAQMGAHDkyBG++eYbl3NOZ82SkoEDB9a57uDBgw25LcXFxYwePZrIyEhmz55Nfn4+M2fOpLy8XOmnNWzYMJYsWVKnh1VcXBwLFy5k4sSJtG/fnjFjxrBmzRqeffZZWrZsiaenJ48//jhvv/02Z86cISkpie+++4709HRycnIUD7VFnenTp9O7d2/69u3L4sWLXTx6eXlhNpvJz89nyZIljBo1irNnz7Ju3TqMRiMFBQXAxZ5Xa9eupXXr1jz55JN89tlnykqt4uJi5Z67d+/mH//4Bw888IByT6fT6VKYKy4u5osvvqC6uhqoKcLt27ePoqIihRkcHMytt97Kzp07lf5ctVsBXroCrT5qaCP1G0n+vp68MCKec0VlWN24jYisTQbV7lHGzCKYaueJYGoe1elRrZmXbjx01fMff3uU4ffWf3tAWZ5jY+KJYMroUcbMIpiyeDxdYHNZWVUrhxP2Z1k4by2nWZDP7+pRJE8EU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomxpUsEpOTkanu3qfHhFauXIlNptN6TsFYLfbeeWVV0hKSiIsLAx/f38WLVrEa6+9pvSweuihh/j000957LHHGDNmjMLT6XQsWLCAadOmARAREaGcS0lJITY2lkmTJjFjxgz27dtH586dMRgMQE2fql27drF3717uu+8+jh07xt69e5XrjcaaRxwUFMTixYsJDw/npZdeYsaMGZw5c8Yll16vp7i4mJSUFOLj4+nbty8ffPCBUrAymUzodDqCg4P5+uuvMZvNjBw5kg0bNriwHA6Hy7Z+O3bsUP67dhtCgI4dO5KRkYHRaOTChQsEBgZSUFDAtm3bsNvtSsZrkV6vIzDQ/L8HSiZRz0TWJoNq9yhjZhFMtfNEMDWPNz4v91wpRw7mEx5sJjzE939fcBkdySm+6vlD2UVued9R83MUxVQ7TwRTJo/u+P67ktSaWSTzRvd4/EzpVc/bKh3az1qV8EQw1c4TwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KogYVrJ599ll3+bgmbdmyhR49eijFKoD+/fszdepUpUcTQHR0tEsPq+3bt7No0SL69++vHAsKCqJFixZs2bJFOfbDDz9gNBoZOXIkEydOBGpWjKWkpLB582Y6d+6s9H76v//7Px5//HHl2ldeeYW9e/dSWlpKWFiYcnzdunVK3ymAOXPmUFJSAqAcj4uL46OPPlLG7N+/nw8++EBZ2aXT6ZTCXL9+/ZRxhw4d4vTp01RWVmIymfDy8sJut7Nv3z6XgmLHjh2VVV1HjhwhNTWVlJQU7r77bmXME088wZYtWygpKXF5vr9VDocTq/XCNV93o8tgUH8DP82j+ngimDJ6lDGzCKaMHt3JKy2rImV1OumZF/9aPy4qiKcfisPsfW19UNq19OfEmZIrno9pFUBhoa3eXtX8HEUx1c4TwZTJozu//0R5FMUTwZTFo4/H1f8w02zSaz9rf2eeCKbaeSKYmkd1epQxswim2nkimJpHdXqUMbMIpqwebwT5+Xn/5lVnDSpY/V7KzMzk4Ycfdjnm5+dHSEiI0qPpSteB65Z3UVFR2Gw28vLyKC8vx8vLi6NHj1JdXe0yTqfT0aZNG4VRW7Dy9natlpaXlwOQm5tLdHQ03t7eGAwGl2KV0+mkoqJCKSa1atXqsqyzZ8+6ML28vICa7fwuVUVFBQA5OTlER0crBatLi1UlJSVUV1crrGPHjgEQGxtLdXU1VVVVZGRkkJ6eDkBBQUG9ClaA1lDuKmoMDfw0j+rjiWDK6FHGzCKYMnp0B2/uqvQ6fVAysiy8uyqdCY90uSbWsL7t+OrnU1c8/8e727olvxqfo2im2nkimDJ4dOf335WktszXg3mjewzx96ZTmyAOnLDgcF48rtfV9KQN9vPSftaqhCeCqXaeCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpXVasXPz6/OcX9/f2X7vCtdZzKZ8PT0VI4lJCTw7rvv4nQ6lb5TZ86cQafT0atXL5frjUYjW7dupUuXLkrfqb179zJixAgAqqqq2L59O3Cx71TTpk05duwYb7zxBhs2bMBisRAREUF5ebmy5V7tVn9ZWVk8++yzpKWl4eHhgb+/Px4eHjgcNS/uli1bAvDRRx8xZcoUsrKyCAsLU7YDrL1nkyZNOHXqFK+++iobN27EZrMRHh6OTqdTeovV9uBKT0+vs1JOp9MRHh5+9Um4irQeVnXVGPZD1TyqjyeCKaNHGTOLYMro0V08EX1Q/jK0M29/su+yxxv6PqzW5yiSqXaeCKYsHrU+RJrHhig5MY65v1qd17FNzeo87Wft788TwVQ7TwRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBSt3atiwYXzwwQdUVFTw008/UVlZSVFREdHR0S5b+o0YMYKdO3fi5+fH7Nmz+emnn5g3bx7r1q2jY8eOtGvXjhUrVtQpmEVERPDzzz+zcOFChgwZQmhoKAsXLkSn07msgNLr9Zw5c4aysjKSkpLIyMjgq6++wmQyKWNqVzytWrWKbt268ec//5nly5dTVVXlcs9mzZpx6tQpVq5cyciRIwFYsmQJer0evb7mm6VTp0506tRJ6fu1d+9edu3aRVVVFWFhYXVWe/1WaT2srq7GsB+q5lF9PBFMGT3KmFkEU0aPDeWJ6IPS9/Y29L29DQvW7ueXw2fp0j6UcQ92aojNOlLbc7weTLXzRDBvdI9aHyJxTBk8BgbC68m9yTtXSt55m9b/TKU8EUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoURas/Pz8lP5Pl6q4uNhl673LXVdZWUlFRYWyysrf35/Ro0cze/ZsXn75ZcxmM0FBQbRt29bl2tpVTN27d6d37940b96cefPm4XQ6mT9/PsXFxcTGxvKPf/yD5557TvHRpEkTHA4Hbdq0Yf369RiNRvr3789XX32F3W5X+N7e3pSWlhIYGMicOXMIDw9nzJgxpKamKlv+1TKDgoLIzMwkPT2d+Ph4goOD2bdvn3Lex8cHh8NBVFQUK1euxGw2M2zYMFasWKEUtwwGA++99x7vvPMOq1ev5uzZszRt2pSCggLOnDlDeno6cXFx1zw3Wg+ry8tgUP9+qJpH9fFEMGX0KGNmEUwZPZ4tKqOk3I6fl4GQgPp/2BTZB2XoXdGMe7ATVmtZg3qpXCq1z4sIptp5IpiyeNT6EGke3cHz9TRwa2yY9rNWZTwRTLXzRDA1j+r0KGNmEUy180QwNY/q9ChjZhFMWT3eCPLzu8F7WEVFRdXpVVVSUsK5c+dc+k5d7jqArKwsYmJilOOlpaW0aNGCb7/9FoAXXniBI0eOuFwbFhZGfn6+cl2rVq0wGo1UV1czYcIEEhMTARRG7b1qVzS99tprdOvWTeHt3r2b06dPK1/XFtA2btyoHLNaraSmplJaWvOXoREREQD06dOH119/XRn38ssvs2/fPuVetSu3VqxY4dKHau3atS6rsUJCQpg+fTpjx47lgQce4K677mLUqFEAZGdn16tgBVoPq6upMeyHqnlUH08EU0aPMmYWwZTBY2lZFfPWZrhsI9apTRBJgzti9vK4Zt716IMiw7xcD6baeSKYN7pHrQ+ROKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHuXgiWBqHtXJk0mNcjPFhIQEtm3bhtVqVY59+eWX6PX6On2nLlV8fDy+vr5s2LBBOVZVVcVXX31FQkKCC//QoUOcOHFCOXbkyBGqqqq48847gZq+U7fffjseHh4uxbP169cTHR2tFJdqe20dPXpUGVNcXMzp06eprKykvLwcqOmPVVZW5pKptnhV23eq9v9PnXJttH7gwAEAcnJyADCbzS7HoaZIV1JSoqzWuvS5HTlyhOTkZJfjtf2yNGnSpEmTJtk0b20GB05YXI4dOGHh/TUZ9WYmDe5Ih8ggl2MdImuKYJo0aRIr7ftPkyZNmjRp0qRJkyZNmhqHGuUKq2HDhrFkyRKSk5NJSkoiPz+fWbNmMWzYMJe+U3/84x85fPgwOp0Os9nM4MGDGTduHCkpKQQFBSl9p4qKihg3bpxy3b333kvTpk0ZOHAgAC1atKCkpISoqCg6d+7s4iMtLY0PPviA5cuX06pVKw4dOsRbb73l4len0/Haa68xffp0goOD8fHxwdvbm8rKSoqLi/Hy8sLDo+Yvtnv06IFer6dly5bk5+cTHh6uFJlq+2Pt2LGDjh074uPjQ3h4uFIMqz3v7e2Nt7c3L774IgMGDGDBggWYTCZCQ0OxWC7+Au5Pf/oTR48eJTExkXHjxvHLL78A0KVLF5ec16qGNiC+EdUYGvhpHtXHE8GU0aOMmUUwZfF4usDmsrKqVg4n7M+ycN5aTrMgn2vm+vt68sKIeM4VlWF1wzaDtZJlXkQz1c4TwZTJo6jvv0u9qS2zSKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNjbJg5e/vz6JFi3jttddITk7GbDYzZMgQnnvuOWVMcXExBw4cQKfTMXfuXPLz85k5cyaDBg3imWeeYeHChVgsFmJjY1mwYIHLiqLU1FSKioqIjo7m5MmT5ObmAris3qqqquI///kPRqMRk8lEeXk5R44coV27dvTv318Zl5ubi9PppF27dpw+fZqCggLy8/N56KGHWL16tTKuqKgInU5H69atycnJ4eTJk5hMJiIjI5UxNlvNfunNmzfHbrdTUFDAkSNHuP3229m2bZvLMwoNDaVr164sXLgQnU6Hh4cHDz/8MB988IFLhsrKSpfeVgB/+9vf6js16PU6tzSuvlHVGBr4aR7VxxPBlNGjjJlFMG90j8fPlF71vK3S0aD3OVHvkTf6vFwvptp5IpgyeRT5GVWtmUUyZfQoY2YRTBk9yphZBFNGjzJmFsFUO08EU/MoB08EU/OoTp5MapQFK4Do6GhSU1OveH7lypV4eHjw3XffKX2c7HY7r7zyCt999x1JSUmXva6iooL333+fcePGMWHCBAAqKyu5+eab2blzpzJu48aNHD16lKCgIBITE5k4cSJpaWmMGzeOffv2KSuUdu/eDcAnn3yi9Kn661//SlpaGjqdDn9/f86cOUNpaSlxcXF8+umnQE0Bq0+fPpw4cYIuXboAkJaWBsCkSZO4//77Afjoo4+YNm0aUFPIg5ptCC9cuEB4eDi33norERER7N+/H6fTqYzJzc3l0KFDvP3228ycOZMxY8bw2muvATWFLJvNpmwteC1yOJxYrReu+bobXQaD+hv4aR7VxxPBlNGjjJlFMBuDx/lr93M4p5jYVv6MG9SpXgwfD91Vz5tNegoLbfVig5zzIqNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ422YPW/tGXLFnr06KEUqwD69+/P1KlT2bp1K4mJiZe9bvfu3ZSWlrqskjKZTISEhJCVleXCv+mmmzh27BhRUVFAzQqsgIAANm/eTOfOnamsrFT6YGVlZRETEwPAgAED+PzzzwkLC8PLy0spRF3aXyogIICePXvy/fffK/z09HR0Oh35+fkumaZMmQKgjIuKiuL8+fMsXLiQjz76SCnsZWZmKmNOnTpFVVWV0ruqtlgFMGrUKG6++WY+/vjj3/Ko60hrKHdlNYYGfppH9fFEMGX0KGNmEUw1etyanseCLw4pX58tLGPz3jP834Ox3N6h+TWxQvy96dQmiAMnLDicF4/rdTU9b4L9vNySX4Z5Ec0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jHDwRTM2jOnky6YYtWGVmZtK3b1/Gjh3Lnj17lB5WISEhZGZmXvU6gO+++46nnnpK2TawVatW/Pzzz5w9e5bQ0FAyMzMxGAwAvPLKK8ycOZN77rmH1q1bK4zs7Gzsdjsmk4mxY8dSWlpKeHg4gwcPBiA2Nla5p6+vr7K9X3l5OV27dsXDw4Pq6mruvPNOAE6cOEFoaCizZ8/mrbfeUjJ5enri7e1NREQEAPv27cPpdFJZWcnw4cPx8PDAZDKRlpbG008/7XLvy+mVV14hLi6uIY9fkyZNmjRpum66tFh1qeatPXjNBSuApMEdeX9Nhksvqw6RQSQN7lhvj5o0adKkSZMmTZo0adKkSZMmTZqurhu2YFVcXMz69evp0KEDs2fPVnpY6fV6iouLr3id1WrFYDAwd+5cJk6cSPv27Vm2bBmbN28G4Nlnn+XZZ58lNzcXi8WCv78/M2fOpLy8nDfeeIPz589z+PBhxQPUbLFXVFTEn/70J6xWK++88w4APXr0UO4JoNfrMZlMPProo3zxxRdkZWWh0+mU7QWLi4vx8PCgvLycu+66i+joaJYsWUJlZSXx8fFKhtzcXAwGAyaTiaFDh7J+/XrOnj2Lv78/w4YNA2q2DYSa1VgzZswA4JFHHgGgY8eOdOxY/1/KGY1aU7lfqzE08NM8qo8ngimjRxkzi2Cq1eP7a9Kvev7D9Qd44sFr2x7Q39eTF0bEc66oDGu5HT8vAyEB7tl/WpZ5EckTwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsumGLVjVrjCaM2eOSw+rKVOmUFZWdsXrqqursdvtPP7444wZMwaAW265hTvvvBOLxYLBYCA5OVnZvm/JkiW0b98eqCkCjRs3Dp3Otf/FTTfdxKBBg1i+fLlS5CouLqZp06YAXLhwgdLSUiZOnMi+fftYsGABtau3fp3Jbrfzz3/+k/nz57N161aaNGmCxWIhODgYqNlW8OTJkzz//POcPXuWNWvWUFRUhF6vJyoqiiZNmgAX+2Hdd999So8sd0iv1wltaN3Y1Rga+Gke1ccTwZTRo4yZRTDdycs9V8qRg/mEB5sJD/GtF+PoKetVzx/OKa73+5LI9zM1z4sopoweZcwsgql2ngim5lEOngim5lEOngim5lEOngimjB5lzCyCKaNHGTOLYMrqURbdsAUrvV5PeHh4nR5WU6ZMUVY0XU61q6L69u2rHDOZTMTExLBt2zYWLlyIl5cXt912G3a7XSlWQU0PK4PBQGRkJAA+Pj4AxMfHk5SURFJSEgArVqxg2rRpVFVVARdXWP3xj3/kiSeeUHj3338/p06dqpPpwQcf5MEHH1Su7datm8JYtGgRer2ewYMHYzQaeeqpp3jllVf49ttv0el0VFZWYjKZmD59OgAeHh4uz2P8+PG0bt36tzziy8rhcGK1Xqj39TeqDAb1N/DTPKqPJ4Ipo0cZM4tgupNXWlZFyup00jMvbrkXFxXE0w/FYfb2uCZW2wg/zhZe+Q9R2rf0p7DQVi+fss2LKKaMHmXMLIKpdp4IpuZRnR5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53dsAWry+nXK58aOvZKY5xO52WP18fHtbIyMzM5efKkst3gpdq9ezf//e9/+dOf/kRWVhYA//nPf/jPf/6jjHnnnXd455132LdvH56enr/Z56XSGspdWY2hgZ/mUX08EUwZPcqYWQTTHby5q9I5cMLiciwjy8K7q9KZ8EiXa2KNG9iRren5Vzw/dkCHBvuVZV5EM2X0KGNmEUy180QwNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7Lohi1YORwOcnNzGTlyJOnp6ZjNZmJiYoCL/ZsuJ39/fwD+9a9/kZ2djcViISYmRinwFBcX4+XlhdFo5Pz58zz22GPs2bMHDw8POnfujN1uV1ZWXbhQs9IoLS2NBx98kKysLMLDw/H1rdn2yMPDw8XPc889x+HDh7HZbMTFxXHq1Ckcjosv7N+S6YknnuChhx5i2bJlbNy4UbnWbDaTkpKirP5avHgx77//Ps2aNWPXrl3k5ORgt9vR6/VMmjRJ8VYfaT2s6qox7IeqeVQfTwRTRo8yZhbBdBfvdIGN/VmWOscdTtifZeG8tZxmQT7XxHxycAfeW3Pgsscb8p4k07yIZMroUcbMIphq54lgah7V6VHGzCKYMnqUMbMIpoweZcwsgql2ngim5lGdHmXMLIIpq0fZdMMWrKCmZ9XBgwd58sknOXnyJKtWrcJkMuHtfXEPydGjR5OXl8emTZsAMBqN6HQ6duzYQb9+/YiPjyc1NbXONoLe3t7o9Xp+/vlnHn/8cUpLS1m6dCkeHh5Kn6ha5ebmEh0dzYQJE9i0aRO7du1yOe/j44OHhwdbt24lMTGR1q1bM3/+fKqrq9HrXV/c/ytTdHQ0hw4dYuPGjTz88MNER0fz3nvvYbVa2bp1K7fddhsAt912Gw6Hg//7v//j4YcfZsqUKTz22GM4HA6ysrLq3Pe3SuthdXU1hv1QNY/q44lgyuhRxswimA3lHT9TetXztkrHNb+PDExoy8CEtryzcjf7jp2n803BjB8W3xCbLpJhXq4HU0aPMmYWwVQ7TwRT8ygHTwRT8ygHTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KItu2IKVp6cnlZWVxMTEkJKSgtlsplevXmzduhWj8WJsh8OB3W5Xvvbx8cHpdHLrrbeyf/9+tmzZQkxMDBcuXKC0tFRZgVV7bbdu3UhNTcVoNNKzZ0/S0tKUbfxqxzZr1gyDwcCbb75JeHg47du35/Dhw8p5g8FAVVUVPXv2ZPPmzdhsNjp37syuXbtctuX7rZn+85//8MADD/D6668DcPToUdauXcsHH3zA+PHjMRgMVFdX89JLLzFq1Cief/555dqmTZtSVFRU7+eu9bC6vAwG9e+HqnlUH08EU0aPMmYWwXQXz8fj6tvimk36evecGjsgVvFYX8alkmleRDJl9Hi2qIyScjt+XgZCAtzzDxW1ZxbBVDsParYzzTlvo1WImQ6RQQ3myTgvIphq54lgah7V6VHGzCKYMnpsDJnd/R4I6n+OjWFeNI/q9ChjZhFMWT3eCPLz03pYYTAYCAoKYunSpcqxvLw8+vTpQ2VlpXJsyZIlLtdVV1cDMGLECAYMGKAcT0xM5NChQ3h5eQE1vaU8PT1ZuHChMsbhcNChQweqqqqAmkIVQKdOnXj33XeVcW+99RaHDx9Wtg6sqKgA4NVXX6Vly5bKuN69e1NWdrGR/G/JlJOTw4kTJ1yKUDNnzsRut7N27Vry8/MJDw9n27Zt5ObmMmrUKGXc4cOHGThw4NUe62+Stj/nldUY9kPVPKqPJ4Ipo0cZM4tgNpQX4u9NRIiZU+fqFpRahpoJ9vNSXc8pGeblejBl8FhaVsW8tRku2152ahNE0uCOmL3qv93ypVJb5uvBVCMvv/AC/1i8k9KyauWYr7eRv4++lZCAa9vW9HKScV5EMNXOE8HUPMrBE8HUPMrBcwdT9HsgqP85qnFeRPNEMGX0KGNmEUxZPcqiRluwOn78ONOnT2fPnj2YzWYGDx7MX/7yF0wmE1CzdV5RURFWq5Xa/k5bt24FalYc3XXXXVgsFmJjY5k8eTJdunQBUFYq/fzzz2zYsIG0tDSMRiPl5eXY7XbKy8vx8vJCp9NRUVHBypUrWb58OVlZWQQGBuJ0OpX+T2fOnAEgKyuLN954g7Vr12Kz2ZSiV22Pq9pVVGvXrmXnzp3s2bMHb29vioqKlLGXZlq8eDHLli0jLy+PwMBAACV3ZmYmAJ999hlvvPEGZ8+eJSwsTOmFVVpasxXT3r17CQgIID09nVGjRik9rADuv/9+902UJk2aNGnS9CtdrlgFkHO24auiNGn6PTVvbQYHTlhcjh04YeH9NRlMeKTL72NKkxD9+hd1AKVl1by2aCf/GZ/wO7nSpEmTJk2axEt7D9SkSZMmTSLVKAtWxcXFjB49msjISGbPnk1+fj4zZ86kvLycKVOmAFBZWYmXlxfJyckkJSWRn5/PrFmz8PT0ZN++fUyaNIn27dszYcIEhg0bxqZNm2jZsiUXLlzAYDCwYsUKgoODeeKJJ/juu+/Yt2+fcu/agpXZbGbq1Kn07t2bvn37snjxYgAKCgqUsVBTRMrOzmbUqFGcPXuWdevWAZCfnw/UFKJ8fX2ZPXs2rVu35sknn+Szzz7DYrG4rLCqrKzEaDTyj3/8gwceeMDlnrWsgwcPKv/fr18/AgIC2LJlC7t37wYuFqx++OEHrFYrEydOZMiQIWzevJns7GwAwsPDGzQ/DWlwf6OqMTTw0zyqjyeCKaNHGTOLYLqLt3Tjoaue//jbowy/t3292GrNLJKpeVSPx9MFNpeVVbVyOGF/loXz1nKaBdX/r47VmFk0U628fcfP1/lFXa1Ky6o5mF1IXFTTerFlnBcRTLXzRDA1j+r0KGNmEUwZPao1s8j3QFD/c1TrvIjkiWDK6FHGzCKYsnqUTY2yYLVy5UpsNhtz5swhICAAqCn6vPLKKyQlJREWFoZOp2Po0KEcPnyY5ORkzGYzDz30EIsXL6Z9+/aMGTMGgKioKPbs2cOCBQuYNm2acg+n04nT6SQlJYXY2FgeeughVq9ezcGDBxW+j48PRqORXbt2sXfvXu677z6Xws+lCgoKYvHixYSHhzNhwgT+/e9/s337doYOHQrUrOwyGAwUFxeTkpJCfHw84eHhbN26lfz8fOWeJpOJJk2a8PXXX2M2mxk5ciTLli0jIyMDgCZNmgA1WxsuX76cJk2aEBUVxQMPPMDnn39OZmYm8fHxNGnSBIfDgdPpZPny5TgcDuLi4khPT+fjjz/m4Ycfrtfc6PU6AgPN9bpWBjWGBn6aR/XxRDDdycs9V8qRg/mEB5sJD/F1G1fNmUUx1ezxqx0nST92jpvbhtCve+t6c47kFF/1/KHsoga/j8g0L6J4Ipg3usfjZ0qvet5W6WjQa3v34bMc/vEkMa2D6No+tN6cX+tGnxcRvDxLzlXP5xZcIOGWVg26h4zzIoKpdp4Ipoyf8UQw1c4TwdQ8ysFrKPN6vAeC+p+j2ublevBEMGX0KGNmEUxZPcqiRlmw2rJlCz169FCKVQD9+/dn6tSpbN26lcTERPz8/DCZTKSmpipjtm/fzqJFi+jQoYNybNmyZcyYMYNNmzYB4Ofnh91up127dspKKICPPvqI1atXs2fPHu666y6aNGlCdnY2kyZNUopftT7Onz/PqVOn8Pf3B2qKX+vWrVO+PnHiBP/+9785evSock+bzcadd97J3LlzFdbMmTPZunWrksnX15eioiJmzpxJv379lHEff/wxZ8+epbKykoiICAA+/PBDoqKilDGffPIJn3/+udLrKiYmhrS0NFasWMHEiRMZPnw4Pj4+TJ48WdlWsD5yOJxYrRfqff2NKoNB/Q38NI/q44lgupNXWlZFyup00jMvriiIiwri6YfiMHvXv1eLmjOLYqrZY1ZeMa+m/kwt4vvducz55Bemju1OZHO/a+a1a+nPiTMlVzwf0yqAwsL6bQ0o07yI4olgyuLRx0N31fNmk75er+18ywVe+fCnOn0ipj3WndDAhq3YkmFeRPDCg67+j88WTX1U83NMBFNGj7Jlbiyf8UQw1c4TwdQ8qtOjWjOLfA8E9T9Htc6LSJ4IpoweZcwsgimrxxtBfn7ev3nVWaMsWGVmZtZZAeTn50dISIhSbImKiqpTeDlw4AAAXbt2dTkeHR3NokWLKC8vV4o8oaGuf7malZWFyWRSVk+Fhoayf/9+l6KQ0+nk/Pnzisfbb78dvV6Pt7e3UqyqPQdw7tw5xWtVVRUtWrRwuWdubi4eHh7K+JCQEIqKimjTpo0ypqSkBJvNhtPpJCcnR/GTmZnp4m379u0AdOvWDYC2bdsCNX2zDAYDf/rTn1izZg2AUtSqr7SGcldWY2jgp3lUH08E0x28uavS6/Rqyciy8O6qdLf0alFjZtFMNXq8tFilMB3wyoc/Mf+Fu6+ZN6xvO776+dQVz//x7rYNfgYyzItongjmje4xxN+bTm2COHDCgsN58bheBx0igwj286oX+9fFKqjZcmfawp/c0ifiRp8XEbwOrYPw9TZedkskX28jsa0CVfdzTARTRo+yZG5sn/FEMNXOE8HUPMrBayjzerwHgvqfo9rm5XrwRDBl9ChjZhFMWT3KItUVrI4fP8706dPZs2cPZrOZwYMH85e//AWTyaSMsVqt+Pm5/lW30+nEbrezdOlSFi9eTFBQEPv373cZW9vHadOmTcyYMQMPDw/uueceunXrhtPppLi4mPj4eHQ6HSdPnuTBBx8kKyuL5s2bY7VaCQ4OVvpSdejQgW+//ZZly5YxefJkbDYbbdq0wWq1AjX9q0wmEyEhIRQWFjJ27Fglk7+/P0FBQZSU1PyF+R133AHAzz//zH333UdeXh6tWrUiJycHPz8/5Z5RUVEcPXqUF154gePHj1NWVsbkyZPR6XQu/oODg3nppZd48cUXKSsrIywsjNzcXIKCgpRC1R133IHRaGTRokUEBQURHx+P2VyzTU379vXrHVIrrYdVXTWG/VA1j+rjiWC6iyeyV4taM4tkqtXj93tO1SlW1crugG37T5PQpcXlB1xFfxnambc/2XfZ4w15D5FlXkTyRDBl8picGMfcX61K6NimZlVCfV7bWq8k9fKmPdadaQsvv/JNTT/HRDBl9ChT5sb0GU8EU+08EUzNozo9qjmzqPdAd3psLDwRTM2jOj3KmFkEU1aPsklVBavi4mJGjx5NZGQks2fPJj8/n5kzZ1JeXs6UKVOueu38+fMpKCigS5cu/OUvfyE1NZXNmzczbtw4xo8fT35+Pj/88AMAZ86c4c0336S8vJy//e1vysoiAE9PT8xmMzk5OXTt2pXnnnuOTz75hMLCQpo2vfgLge7duwOwefNmHn30UXx8fPjwww8xmUwuK5Tatm1LWloaR48eJTk5mV27dvHdd98RGRmpFKyaNWsGwMGDB+nTpw9Dhgxh0aJFVFRUuNwzPj6ejRs3cvLkSaKiosjIyGDOnDnce++9fPnll8q4nj17snbtWvr370+nTp1ISUnB4XAQGxurjAkODsbLy4vS0lJuueUW4uLiWL58OVCzkqu+0npYXV2NYT9UzaP6eCKYDeWJ7tUC6st8PZhq85h5+spb9wEcy7MyuE+7a+b2vb0NfW9vw4K1+/nl8Fm6tA9l3IOd6muzjm70ebkePBFMGTwGBsLryb3JO1dK3nlbg/u+aL2S1MsLDDSzYvpA9hw+y6GTFq23mEqZaueJYMr4GU8EU+08EUzNoxw8dzBFvweC+p+jGudFNE8EU0aPMmYWwZTVoyxSVcFq5cqV2Gw25syZo/SnstvtvPLKKyQlJREWFgbUbP9XW+wBqKio4P3338fHx4du3brRo0cPbrnlFvr27Ut+fj7JycmYzWZatWrF0aNH+ec//0lMTAwA7733HocPH0an07n0nPLy8uL06dO89dZbxMbG0qtXL3bt2kW7du2UMQAtW7bkv//9L0ajkfvuu4+vvvoKQGGVl5crX7/zzjuEh4fzhz/8gTVr1hAYGKhk0Ol0+Pn5sW/fPrZv366slDp+/LjCqt0KMDo6moyMDAAGDRrEXXfdxZdffqmM++c//0n37t2ZP38+GzduxOl00rNnT3bt2oXdbsdgMLBr1y5KS0vp2LEje/bs4dtvvyUoKAiAjIwMysrK8Pa+9m8srYfV5dUY9kPVPKqPJ4LpLp6oXi2g3swimWeLyigpt+PnZSAkwD0fatzhMap5E76/yvmbwv0atEf90LuiGfdgJ6zWsgZxaiXja0fzqE6Pvp4Gbo0Na/BrW+uVpG4eQHTzJnRtH6ran2MimDJ6lClzY/qMJ4Kpdp4IpuZRnR4bQ2Z3vweC+p9jY5gXzaM6PcqYWQRTVo83ghptD6stW7bQo0cPpVgF0L9/f6ZOncrWrVtJTEwE6van2r17N6Wlpeh0OqVvk8lkYsCAAWzatIm9e/cCMHbsWI4ePepyzzVr1nDzzTdjMpnw8vKisrKSCxcuEBoayubNm5VxX3/9NVu3biU4OBiAEydOADBkyBCSkpKUcY888gi//PKL4qO2p9Xy5cuVgpLVauWzzz5Tvs7JycHpdBIZGcnHH3+ssObNm8ebb75Jq1atlNwATzzxBFarlcmTJ/Pss8/yxRdf4OHhQcuWLZVrhw4dyvHjx1m6dCnz58/n0KFD/PTTTzgcDgwGA1lZWQBK4QsgPz8fgNOnT/Piiy/y1ltvXXW+riRtf84rqzHsh6p5VB9PBLOhPFG9WtzpUTTPHczSsirmrc1w2XqnU5sgkgZ3xOxV/6bml6ohHu+IC2fRhkOX3RbQoIeenZpre9SrhKl5vDF5Wq+kxsETwdQ8ysETwZTxM54Iptp5IpiaRzl4IpgyepQxswimjB5lzCyCKatHWaSqzRQzMzOVokyt/Pz8CAkJcSlQJSQksG3bNqVfVO05vV5Pr169lHHR0dHk5eUpq5yKi4sxGo1s2LBBGVNdXY3T6VRWO2VnZ+N0Ojl79qxSlAKUe0VERCjjPDw8XIpaADabDYPBoIwrKipCp9MpK6+gZnWWTqdTtt6r9X/kyBHlPgAFBQUAREZGAjWruSIjI122/wNYv349PXr0cOnzNW/ePFJTU3n11Vfx8PBg0aJF/OlPf8LDo+YXoHfeeSd9+vQhJCSEadOmMW/ePAYMGABAYmIiycnJaNKkSdPVlDS4Ix0ig1yOdYisKbZo+m2atzajTlPzAycsvL8m4wpXXH+9PPpWfv1HMAZ9zXFNmjSJ199H34qvt+vfmPl6G/m79j2oSZMmQdI+42nSpEmTJk2aNGn6vaSqFVZWqxU/P786x/39/SkuLla+HjZsGEuWLCE5OZmkpCS2b9+uHK/dNhDggw8+wOl0ctttt+Hr60tlZSWxsbEsXLiQoKAg2rVrx4oVK7Db7UrxqPY+fn5+DBw4EIAWLVooWxDWbptntVoJDAxk9+7ddOzYEU9PT2XLQYPBoHgoLS2ldevWTJkyhalTpxIcHIyPjw8Gg4Hw8HCXezqdTnr06IFer6dly5bk5eUBYDRenKY//vGPzJo1iy+++AKAUaNGkZWVxdKlS5Ux69at48033wRg8uTJAHTs2JEBAwZgsVgICgqivLwch8NBaWkp06ZNc3nezz//vJKzPmpok80bUY2hgZ/mUX08EUx38vx9PXlhRDznisqwunE7OzVndidTZFNzd3kEiG4RwIcv9iNtXx5HThXTLsKfOzqHN4hZK7XPtVpfOyJ5IpgyenQnr3mwL3P/ehcHTljIPmejVYi5zi+S6yNtXtwjzaN7pHaPsmVuLJ/xRDDVzhPB1Dyq06OMmUUw1c4TwdQ8qtOjjJlFMGX1KJtUVbD6rfL392fRokW89tprJCcno9fr0ev1TJo0SRlTXFzMqVOnAJg+fToVFRW8/PLLXLhwgWeeeYaFCxdisViIjY2lc+fOeHl5udyjpKSEtm3bcvLkSXJzc5WeVbVyOBwUFhbSvHlzdDodZ86c4ciRIwQHB7sU15xOJydOnKB9+/acPn2agoIC8vPzCQ4OdlkRBTV9rFq3bk1OTg4nT550KVTVZvrwww+JjIykoKCAkpISjh07RkJCAl27dlXGffTRR3WeWUZGBn/605+YMWMGiYmJ2Gw2Tp8+jdFoZNy4cXh6erJ48WJKS0uZP38+f/vb365xVmqk1+sa3IT3RlZjaOCneVQfTwTTnTxR3/NqzuwO5vVoag7uyz3ozrZu4VxOap9rtb12rgdPBFNGj+7k9Qo00+t/D7tmafOiTqbmUQ6eCKaMn/FEMNXOE8HUPMrBE8GU0aOMmUUwZfQoY2YRTFk9yiJVFaz8/PyUlUyXqri4WOn3VKvo6GhSU1MBWLZsGa+++qpLUWnlypUYDAYcDgf33HMPXl5ezJ07l8zMTP7whz+49J0aNmyYwvf2rnkx3X///UoPp8rKSvr160d+fr4yrqCggKqqKhYsWKBsY5iWlsa4ceNcvOr1eoKDg1m7dq1y7K9//SsbNmxQxjkcNftZPvHEE8pWfEVFRSQkJAAo41auXInNZuPzzz/n22+/ZfLkybzwwgv861//Ij8/X1lddu7cOR544AFllVVtnqysLNq2bav4OnLkCCkpKdx9990ADB48mPvuu4/FixeTnJyMr6/vlSfrCnI4nFitF675uhtdBoP6G/hpHtXHE8GU0aOIzOu2ZnEou4gOrQMY2LNNvRgim5pD43iOavcoY2YRTBk9ish8tqiMEjevdNDmpeGS0WNGloWc8+5b7Qfqf46NYV4ag0fttaPOedE8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9XgjyM/P+zevOlNVwSoqKsqlVxXUrHQ6d+5cnd5Wv74OICsri5iYGAC2bNlCs2bNsNvtyuqpm2++mdzcXLZu3UpiYiJQswIqKytL6X11/vx5AJo3b67wTSYTnTp1Ij8/X7lXUVERAE2bNlXG9erVCw8PD8zmmr9Eq6yspLq6uk6xrU+fPnz++ecEBAQAcPbsWaCmR1WtAgICiIyM5MiRI8rxLVu20KNHD+U6gH79+jFr1iwlU05ODidOnOD55593uecDDzzA7NmzyczMJC4ujmPHjgEQGxvrck+o6euVn59fr4JVzfXaN+OV1Bga+Gke1ccTwZTRozt4B05Y+NfKX5Sv048X8NG3x3lheBdiWl3bL16uR1NzUOdzFM1UO08EU/N44/JKy6qYtzbDZQvRTm1qesmYvTwaalGbF5Uy1egxv/AC/1i8k9KyauVYbT+1kID6b2F7qdT+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08maSqglVCQgLvvfeeSy+rL7/8Er1erxSUanX8+HGmT5/Onj178PHxwcPDg88//1wpWB0/fpyqqioGDRqkXNO3b1/Wr1/P8uXL+c9//oPFYiEiIoKioiLuvPNOAHJycgDYsWMHzz77LGlpaXh4eKDX11QAg4ODASgvL0en0/HOO++wc+dOsrKyCAsLo7q6miZNmgCQnZ2N0+nk5MmTvPrqq2zcuBGbzaYwQkNDgZril8FgYP369axevZo9e/ZgNpu5cOECRqNR2TowMzOThx9+mE8++URZ/fXkk0/i5+enFPpq/z8sLIwXX3yRr7/+mqqqKmX1lYdHzS82WrRoAcBdd9112bmo7a+lSZMmTZou6tJi1aWatfwXFk66+5p5SYM78v4a119Ea03NNWnSdKnmrc3gwAmLy7EDJyy8vyaDCY90+X1MaZJSvy44AJSWVfPaop38Z3zC7+RKU2OQ9trRpEmTJk2aNGnS9FulqoLVsGHDWLJkCcnJySQlJZGfn8+sWbMYNmyYUnABGDFiBLt37+aWW25h9uzZ5Ofn88orr7BgwQJCQkJo164dRUVFeHh4MG7cOOW6++67jxdeeIH09HSGDBlCaGgoCxcuxGAwEBgYCIDVasVoNLJ//35ycnJISkoiIyODr776CqjZntDLy4uysjKaN2/OsmXL6NatG3/+859Zvnw5TqcTu92ujIWaVVwrV65k5MiRACxZsgSAiooK5Z6BgYF89913hIeHk5yczJYtW/jpp5/Q6S5uGWW1Wvnll1+YP38+nTp1oqCggKCgII4fP86+fftc7jlhwgRKSkoYNmwYubm5fPHFFwCEhIQA0KlTJ0JDQ5XVXQC+vr5UVFRw//33K1sj1kdGo9ZU7tdqDA38NI/q44lgyujRXbw1P2Re9fyGH08y6I5r2x5QVFNzUO9zFMlUO08EU/OoTo/u4p0usLkUtGvlcML+LAvnreU0C6rf6gRtXtwjWTzuO36+TsGhVqVl1RzMLiQuqullz/8Wqf05qnVeRPLcxdReO+qcF5E8EUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsklVBSt/f38WLVrEa6+9RnJyMmazmSFDhvDcc8+5jDtz5gxOp5M5c+a4bGM3bdo05s+fT3FxMU6nk4cffthlmz2Hw4HD4cDX15f169djNBrp378/P/74IwsWLGDatGku9wkMDGTOnDmEh4dz11138f3333Pw4EGleGa32wkNDSUzM5P09HTi4+MpLy8nNzfXhVNVVUWbNm1YuXIlZrOZhx56iE8//ZTt27czdOhQhWUymdDr9bzzzju0adOGrl27smfPHpf+VDt37gRg//79APz8888uX9cqOzsbk8nE8uXLiYiIYPDgwXz22Wf89NNPdOvWDYPBwMCBA1m8eDFBQUEUFRVRVVWFp6en0kerPtLrdcKa894IagwN/DSP6uOJYMrkMfdcKUcO5hMebCY8pH5bnQIcOVV81fOHcooYVc+ffyJ/bqp1XkQy1c4TwdQ83pi842dKr3reVulo8M8PmebFXe8Hl5NaM7uLmWfJuer53IILJNzSqt78Wqn9OaptXq4Hr6FM7bUjhieCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFqipYAURHR5OamnrVMc2aNaN9+/YuvZwGDBjAtGnTmDBhAomJifTo0UPZmq9Wu3fvxul0cu+99zJjxgzl+IwZM9i0aRMAfn5+VFdX065dO9atW6eM+eijj/j+++/Zs2cPd911F02aNCE7O5tJkyYxZswYZVz//v2xWCycOnVK6V1Vu8Kq9usTJ07w6aefcvToUeWepaWlJCQkMHfuXIU1c+ZM9uzZo/Sn8vX1paioiHfffZd+/fop42655RYuXLhAZWWlcg+z2cyuXbuUFVpbt27ls88+48CBA8p1ZrMZT09P0tLSACgrK+Pee+9lyZIlTJky5apzcCU5HE6s1gv1uvZGlsGg/gZ+mkf18UQwZfJYWlZFyup00jMvrk6Iiwri6YfiMHtfe9+XdhH+pB8vuOL5mJYBFBba6uVVpnkRyVQ7TwRT86hOj+7i+XjornrebNJrP3d+g9z9fiDCoyieu5jhQVf/B3eLpj71fi2C+p+jWudFJM9dTO21o855EckTwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfV4I8jPz/s3rzpTXcHqt6i2l9Ol8vPzIyQkROnhFBUVpfx3rWqLNV27dnU5Hh0dzaJFiygvLycqKgq42F+qVllZWZhMJrKzs5Xz+/fvV8ZDTWHq/Pnzisfbb78dvV6Pt7e3UkiqPQdw7tw5xWtVVZXSV6pWubm5eHh4KONDQkIoKiqiTZuLW06VlJRgs9lwOp3k5OQofkJCQly2E8zMzESv15Ofn+9yj/Lycm6//XasViuRkZH4+vpy8uRJGiKtodyV1Rga+Gke1ccTwZTB49xV6XX6vmRkWXh3VXq9+r4M7BHJfzdfeVvA/re3bnB+GeblejDVzhPB1DzemLwQf286tQniwAkLDufF43pdTb+7YD8v7efOb5C73w8uJ7VldjezQ+sgfL2Nl93azdfbSGyrQLf4VftzVNu8XA9eQ5naa0cMTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIsaZcHKarVSVVXF2LFj2bNnD2azmcGDB+Pn56f0cEpISOC9997DarXi5+cH1KywAjh16hR33XUXFouF2NhY+vTpg9PppLi4mPj4eHQ6Hfn5+Tz77LOkpaVhNBqprq4mKChI4Xfo0IFvv/2WHTt28K9//YusrCwCAwOxWq1ATS8pk8mkFJneeOMN1q5di81mw2w2ExAQQGlpzTYvd9xxBwDHjh1TMnl7e1NSUuKSqXXr1hw9epTx48dz5swZTCYTwcHB6HQ6F/9eXl7k5+czYsQIMjIyKCsro3PnzjRr1oySkhLlObZq1Yp7772X/fv3Y7PZOHXqFBUVFfj41K8XQq20HlZ11Rj2Q9U8qo8ngimLR1F9XyY/Gs+Mpbsve7whP/tkmRfRTLXzRDA1j+r06E5ecmIcc3+1Oqhjm5rVQdrPnf8tkX3A3OVRJM+dzGmPdWfawp9cCg++3kamPda9wZ//1f4c1TwvonjuZGqvHXXOiyieCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2dQoC1ZOp5NPPvmEDh06MHv2bPLz85k5cyZ6/cUXwrBhw1iyZAnJyckkJSWRn5/PDz/8gE6nY+HChUycOJH27dszYcIE3nrrLeU6T09P/Pz8OHr0KEVFRTzxxBN89913pKen4+FxccuQ7t27A/DBBx/Qu3dv+vbty+LFi+t47dChA9999x1Llixh1KhRnD17lnXr1uHl5YXTWfOnss2aNUOn07Ft2zYiIyN58skn+eyzzygsLKSqqkphtW/fnq+//hqr1crjjz/O+fPnWb58eZ17tmjRguPHj5Ofn090dDT79+8nIyODvn37cuTIEQBmz57NL7/8QlpaGgMGDCA8PJz169eTl5fHgQMHqKqqcsn7W6X1sLq6GsN+qJpH9fFEMG90j6L6vvQMNLOua0s++eYIew6fpWv7UIb2bVdfm3V0o8/L9WKqnSeCqXm8cXmBgfB6cm/yzpWSd97m9v5LN/q8XI8+YKCuzKKYgYFmVkwfyJ7DZzl00kJM6yC6tg/93xdeg9T+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdQoC1Ymk4mKigrmzJmj9LGy2+1MmTIFo7Emkr+/P4sWLeK1114jOTkZs9lMly5d2LFjB6NGjVL6TkVFRVFYWIjT6VS27WvSpAnFxcU4nU5SUlKIjY1l0qRJzJgxQykyNW3aFLjYK2rv3r3cd9995Ofnk5aWprDCwsIACAoKYvHixYSHh/PSSy/xxhtvYDKZlEyenp5UVFRQXFxMSkoK8fHxPPDAA8yePVvJ1L59ewCaN29OSkoKZrOZ4cOH89///pcLFy4o92zbti0VFRXo9XoOHjwI1PTD2rx5szKmQ4cOLF68GKPRyNdff01YWBi33347VVVVrFu3jl27dnH77bdf89xoPawuL4NB/fuhah4bzsvIspBz3karEDMdIoMazAP1ZxbBdAdPZN8XgPu6tWRo33ZYrWUN4tRKlnkRzVQ7TwRT86hOjyIy+3oauDU2TPu5c40S/X6gxsyimdHNm9C1fajbXoug/ufYGOalMXjUXjvqnBfNozo9yphZBFPtPBFMzaM6PcqYWQRTVo83gm74HlYGgwFfX1+lWAXQu3dvACorK5Vj0dHRpKamKl8vWLCAHTt20KlTJ+XYsmXLSExM5NChQ3h5eSnHPT092bp1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWlqasq1eRUWFcp+WLVsq4+bPn09ZWZnytdFopEmTJqSlpSnH8vLymD17tpKpQ4cOADzxxBP069dPGffNN99QVlam8KOioti+fTs7duxg9erVTJ48mTvuuIPU1FTatatZCdC3b186dOiAj48PKSkpCuuNN94AUApz9ZG2P+eV1Rj2Q9U8XrvyCy/wj8U762xz8vfRtxIS0LAtNmultszXg9kQ3vXo+9JQj9eDJ4Ipo0cZM4tgyuhRxswimNr7gTqZMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNcqCld1up6ioyKU/VW1x6dJVS79W7UqlAwcOMGDAAACqqqrIycnBbrdz88034+vrS0VFBRUVFZw4cYLIyEgAfvzxR5xOp7JN3pkzZwDYs2ePSz8sg8EAwIULNauMPD09Afjzn//MiRMn8PDw4M4778Risbh4tdvtWCwWBg4cSHZ2NuHh4XTt2tUlU8uWLYmMjOTf//43U6dOxWazERcXx+nTp4mIiFDGJSQkMHfuXBITEzl69CgAr7/+OgcOHODxxx9X7nnTTTexbNkyunfvTnFxMY899hiffPIJRqORW2+99donRpMmSfXrYhVAaVk1ry3ayX/GJ/xOrjQlDe7I+2syXHqXdIgMImlwx9/RlSZNmjRput7S3g80adKkSZMmTZo0adKkqXGoURasKisr8fLyculPNWvWLPz9/amuvvhL49GjR5OXl8emTZuAmiKSwWBg0aJFhISE0K5dOxYvXozVagVg+vTpVFRU8NJLL2EymXj22WeZMGECZWVlzJo1i6CgIHS6mm1FiouLASgoKKBr166MGjWKTz75hMzMTJfzVVVV6PV6Dh06xIgRI/Dx8WHhwoU4nU6X1WAVFRU4HA6Ki4v585//zK5du1i9ejU+Pj4umZo1a8aPP/5Inz59uOWWW5gzZw5Op5Px48crY6KiovDw8ODYsWP06tWL77//ns8//5yAgADuvfdeZdzatWvx9fVV8i9cuBCA559/vl79q2rV0Ma5N6IaQwM/zWP9tO/4+TrFqlqVllVzMLuQuKim9earMbNoprt4/r6evDAinnNFZVjL7fh5GQgJcM8ewmrNLJIpo0cZM4tgyuhRxswimNr7gXukeVQfTwRT86hOjzJmFsGU0aOMmUUw1c4TwdQ8qtOjjJlFMGX1KJsaZcFKp9MxdOhQDh8+rPSnGjJkCFu2bHEZ53A4sNvtLsf0ej3PPPMMCxcuxGKxEBQUhNFopLq6mu7duxMWFsabb76JxWKhWbNmTJgwAaPRyD333EN+fr6yVV7t1oC33XYbhYWF/Pvf/6Z58+b4+fkpBSCAU6dO4XA4SExMZP369dhsNtq2bcuBAweoXY0FNVvwBQcHc9NNNzFnzhzMZjNRUVFkZ2crY86cOcPPP//M4MGD+eWXX9iyZQt2ux2j0ais+AJYuXIlRqORfv36KcW6tm3bcuzYMQoKCpS+WtHR0fzyyy/o9XrsdjsBAQHodDp+/vlnxo0bpxTnrkV6vc4tjatvVDWGBn6ax2tTniXnqudzCy6QcEurevNrpabMopm550o5cjCf8GAz4SG+DeaJ/Jkk07yI4olgqp0ngulOnru/B2sl23MUwRPBlMmj9n6geVQbTwRTNo+7D5/l8I8niWkdRNf2oW7jqjmzKKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpWfnx8mk8mlPxXA559/jr+/v/L1kiVL6lxXVVXFmDFjSEpKAmDEiBEEBARw+PBh5dpmzZphsVjo378/8+fPV64fNmwYzZs3B2oKUQD3338/w4cPV8ZMnjyZVatWKayCggIMBgMzZsxQxjidTm6++WalYFVZWYnT6aRt27Yumb755huefvpp9PqaimxaWhoOh4OXXnqJX375haeffpqnn36ao0ePsmXLFp544gkAtmzZQs+ePfnXv/7FqlWrmDx5Mu+++y733nsvW7duJTExEafTycmTJxkzZgyTJk2iffv2/N///R9dunRh+PDhbN26lTvuuOMaZwYcDidW64Vrvu5Gl8Gg/gZ+msf68cKDrv4G1KKpj9bM/TeqtKyKlNXppGde3LIpLiqIpx+Kw+xd/1Wfas4siieCKaNH2TJr34Pq5YlgyuhRxswimDJ6lDGzCKY7efmWC7zy4U91eshOe6w7oYH17yGr5syimJpHdXqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ42yYBUVFaVsvVerkpISzp07R1RU1FWvA8jKyiImJgaAzMxMWrRoQXh4OF5eXkDNaqQjR4643MPpdJKVlUWvXr0AsNlqfgFdVlbmcg9v75pfXrdo0UI5b7fbKS4uVopYOp0OLy8vpWBVu4rq16xmzZoBKL4yMzNp2rQpWVlZjB8/nj/84Q+MHz+et956i08//VS5LjMzk4cfftiF5evrS0hIiJLJYrFgsViU51CrDh06uHiqj7SGcldWY2jgp3m8NnVoHYSvt/Gy2wL6ehuJbRWoNXP/jZq7Kp0DJywuxzKyLLy7Kp0Jj3RpoDt1ZhbNE8GU0aMsmbXvQfXzRDBl9ChjZhFMGT3KmFkE0x28XxeroGY77mkLf3JLD1k1ZhbN1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiRlmwSkhIICUlhZEjR5Keno7ZbCYmJga9Xq8UlC6n+Ph4fH19ef3118nOzsZisVBRUUFFRQUPPvigC3/NmjWsXbuWZcuW4eHhQefOnSkqKuLOO+8Eavph6XQ6/vvf/7JmzRqysrIIDw9Xtgz09b24hY5Op+O5557j8OHD2Gw24uLiKCkpoWXLlsDFflcHDx50yRQdHQ2gjLNarXh6ejJmzBgA1qxZQ3p6Oh07dlQYtbyvv/6atWvXcv78eQBefPFFvL29lXFBQUF4eHjwt7/9jb/97W8AzJo1i1mzZgEXC271kdbDqq4aw36omsf6a9pj3Zm28PJ/4dnQ7we1ZnY383SBjf1ZljrHHU7Yn2XhvLWcZkH1+2tZtWYWyRPBlNGjTJm170F180QwZfQoY2YRTBk9yphZBNNdPJE9ZNWaWSRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBasBAwbw9ttvc/DgQZ588klOnjzJqlWraNeundKfCWD06NHk5eUpfZw8PT3p0qULaWlp9OvXj/j4eGbNmoXNZqN///7KdXfffTc6nY5z587x5JNPUlpaytKlSwkKCqJz587KOL1ez/Hjx4mOjmbChAls2rSJXbt2uXg1Go00adJE2YqvdevWzJ8/H4fDQXBwsMvYysrKOpkAZWVWeXk5p0+fxuFwMGjQILp06cK2bdtYvXp1nX5TDoeDfv36kZ+fz9dff82BAwdcVqDpdDq6d+/O1q1bGTRoEOvWraNHjx4cPXoUX19fevToUa+50XpYXV2NYT9UzeO1KzDQzIrpA9lz+CyHTlrcvoc+qC+zu5nHz5Re9byt0tHgny1qy3w9eCKYMnqUIbP2Pdg4eCKYMnqUMbMIpoweZcwsgtlQ3vXoIau2zNeDqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFjbJgtX79ekwmEzExMaSkpGA2m+nVqxfbt28nPz9fKVo5HA7sdrtyXUVFBXv27KF79+7s37+fLVu2YDAYMJlMbNiwgdtuuw2Ab7/9FqfTSXh4OKmpqRiNRnr27ElaWhr79u2jc+fO+Pn5YbfbiYyMxGAw8Oabb/L/2Hvv8KiK9v//tTVlk00nEFpIgCR0kB5AaUoTfBAU9YvYUdGPCioWQFRQHhRFsSGCFBUriiAWBCEU6UgnlBAgJCSBlE02ZTe7+/sjvzPu0h4pRxfPvK7LC7N79n3ue2bOzJwp98TFxVG7dm1OnDghJpmCgoI4evQonTt3ZvXq1djtdlq0aMHmzZspLa0eHFKuNRqNPj61adOGbdu2iV1bLpcLt7t6K+GSJUtYsmTJOdMnLCyM06dP8+mnn4rPcnNzgepwiAp9+vRh3bp17Ny5E4Ddu3fTs2dPnnjiCcxm8yXljTzD6twYDP4fD1XaePl6ibVCaZ1UA5ut/LLOrfLG330GyCsqp6TChTXQQEz4pTXIwSbdBb+3mPWXnKZaLItqaGrRRi35LJ9B/9ZTQ1OLNmrRZzU0tWijFn1WQ/NK6al5hqy/+qym5pXoy5+JFtPR3/XU0NSijVr0WQ1NLdqoRZ/V0NSqjf8GrNZ/+RlWaWlppKam8t5774nPbDab2DE0ePBgABYsWODzu23btmG323nuuedISUkB4I477iAvL4+0tDRx3YoVKwB45JFHhJbH46Fjx46sXr2aFi1aUK9e9YqtHj16iJB6AA8//DAnTpzg1KlT1KlTR5w/NX36dDEx5fF4RFhAgHr16qHT6YiPj+eTTz4RWkuXLmXbtm3k5+cDkJKSwrJly3jttdd8QhgOHDiQAwcO4HA4MJvNJCQkEB4ezrvvviuuycrKomfPnrRt21Z8ZjRWZ//nn39Op06deOihh7j33nv/ajacFxmf8/xcDfFQpY3+p6eG5pXQKy138uH3e3zCiDVrEMnIQU2xBJouSismLIhmDSLZm1mA2/Pn53odNImPJNoaeNn2aiVf1NbUoo1a8Fk+g1eHnhqaWrRRiz6roalFG7Xosxqal6v3d5wh628+q6F5Jfvy50ML6Xi16amhqUUbteizGppatFGLPquhqVUbtcJVGUwxIyNDhLZTsFqtxMTEkJGRccHfAT6/7datGydPnuTEiRNUVFQAsGPHDnQ6nc95WDqdjgYNGgiNqKjqmNgnT54U1zidTnbv3u1zr/DwcAAKCwvFdb///jtOpxO7vXrVl9lsxmAwYLPZfOxdtWoVBoNB/LZGjeoQY8eP/xkCobi4mMzMTDwej/i8W7durF+/nqKiIpxOJ1lZWTzxxBMA55yQGjBgAAAzZ85k5syZeO9Kk0gkkvPx4fd72JtZ4PPZ3swCZi7ec0l6Iwc1pUl8pM9nTeKrX5olEon6yGdQIpFIJFcL40e0JSTId/1tSJCR8SPanucXkjO50n15iUQikUgkkivBVbnDymazYbVaz/o8LCyM4uLiC/7ObDYTEBAgPhs2bBgfffQRDoeD5cuX43A4yMnJITEx8azzsPbt20dwcPWB4+Xl5QAsX76cefPm0bhxYxYuXCh2TSl2REVFYTKZePTRRxk9ejTl5eVMnTqVhIQEn4knj8dDXl4eEydOpG/fvmzcuJGlS5cSGxsrtPT66vnFOXPmULNmTWJjY5k5cyYWi4XKykpx3bBhw1iwYAH9+vXj9OnTQPWE24033ugzWed0OunatauYXCspKeGNN95gy5YtzJo16y/lxbkwGq/KeVBVuRoO8JM2+p+eGppXSi/ntN1nNaaC2wO7jxRwylZBzcjgi9IMCwng6TvakF9Uju0KhiXRUr6oqalFG7Xms3wG/VdPDU0t2qhFn9XQ1KKNWvRZDc0rqVcrOoT3xlzH3swCjuXbqRdjOWvRxaXgzz5fSU01+vJX2kY19dTQ9Hc9NTS1aKMWfVZDU4s2atFnNTS1aqPWuConrK4kYWFhjBo1ildffZXnn3+ekJAQQkJCaNWqlc91brdbnCXlzc0338ycOXMoKCggJSWF6dOn88ADD4jv9Xo9NWvWJD4+ntGjR2M0Gunduzd16tTxCWmo0+kYOHAgW7du5euvvyYuLo5JkyYxd+7cs+45YMAApk2bht1up02bNrzwwgs89thjPj7NmzeP559/npKSEgICAggODmbz5s1kZ2cTFxcHwLFjx1izZo2Pj1AdcjEvL0/s6LoY9HrdZR/K/m/majjAT9rof3pqaF6u3uGTpRf83u5wX3JdoFYdooV8+Ts0tWij1nyWz6D/6qmhqUUbteizGppatFGLPquheSX1UiMspP7vyy4af/b5Smiq2Zf35t+ejlejnhqaWrRRiz6roalFG7XosxqaWrVRK1yVE1ZWq1XsZPKmuLhYnBN1vt85HA4qKyt9dlkFBwej0+nYtGkTgYGBDBkyROygUliwYAHDhg0T+sq/vXr14sUXXxTXZWZm+nxvtVqpqKhgxowZPnpvvvmmj61Wq5XY2FimTp3qc91bb7111j1HjBjhc89169b5fA+QmJjI559/Lv4uLy/n+uuv56OPPmLChAninsHBwfz2228idOEbb7zBzJkz+f333xk0aNB50/J8uN0ebLayi/7dvx2Dwf8P8JM2+p+eGpp3TvpV/P/8cb0uWSfYpLvg9xazXh52/Q/qqaGpRRu16LMamlq0UYs+q6Hp73pqaEob/dNGLfqshqYWbfRXn9Xsy4N20vFq0lNDU4s2atFnNTS1aKMWfVZDU6s2/huwWoP+8q6zq3LCKiEh4ayzqkpKSsjPzz/rbKszfwdw5MgRkpOTxecZGRnExcURGBgorjtw4IDPbz0eD0eOHBHnWtWrVw+TyURGRgZdu3b10fK+V0JCAqdOnTprMu3Mc7j+ik/Kv2f+NiMjA5PJRN26dc/re1BQEImJiRw9elR8lpaWRqdOncRkFUCnTp2YOXMme/fuvaQJK0AeKHcBroYD/KSN/qd3JTTvmbLyrM+Uyas5z/S4aL2YsCCaNYhkb2YBbq/Np3pd9Zk30dZAedi1H+ipoalFG7XosxqaWrRRiz6roenvempoShu1oaeGprRRG3qXq/l39OUv18a/Q08NTX/XU0NTizZq0Wc1NLVooxZ9VkNTqzZqhatywqpbt268//77DB8+nF27dmGxWEhOTkav14sJpXPRpk0bQkJCeOWVVzh27BgFBQUkJyeTnZ1Nr169fPS///577rnnHrZv347JZKJFixYUFRVx7bXXAmA2m+nQoQNffPEF33zzDUeOHCEuLo7w8HASExOpU6cOAF26dEGv1/PEE0+Qnp6O3W6nefPm7Nq1i1GjRl2UT3Xr1iU+Pp4PP/yQ1157jezsbBo0aEBVVRWdOnXCbDYDMGPGDN55551zpkFiYqL4/z179lBeXk5SUtJZ1zmdzr+aHRKJRKOMHNSUmYv3+MS/bxIfychBTf9BqyQSiUQikUgkEsn/QvblJRKJRCKR+CNX5YRVv379mD59Ovv27ePBBx/k6NGjLFq0iMaNGxMbGyuuGzFiBNnZ2SxfvhyAgIAAWrVqxdq1a+nVqxdt2rRh7ty55Ofn07dvX/G7Hj16YDKZ2Lx5M/fddx+lpaV88sknREZG0qJFC5/rXnrpJRITExk9ejTLly9n69at3HXXXeIa5fyqdevWMXjwYOrXr8+sWbOorKxkwIABF+1Tly5d+OSTT7jmmmsYNmwYX331FYcPH/a5Z2lpKf369SMpKQmr1Up+fj7ffPMNubm53HHHHeK68vJy9Ho9Tz/9NABbtmxhxYoVhIaGXtaEldEoD5U7k6vhAD9po//pXSlN7zCA5+KeKSsvKTxgWEgAT9/RhvyicmwVLqyBBmLCLz9Gr1byRU09NTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0NSijf7ss1p9eW/btJCOV4ueGppatFGLPquhqUUbteizGppatVFr6Dwej+d/X+ZfzJw5k/fee0/sVFJ2I/3++++sWrVKTPAMHz6cEydOsHJldSisyspKOnXqRNOmTc/aYdW7d28mTpwIwNKlSxkzZgypqals374do9FIixYtWLt2LV999ZWYtLr33nvJzs7GaDSKHVZhYWHY7XaWLVsGwMmTJ+nevTsdO3YUO6xatGjBzp07eeSRR7j//vsvyqcbbrgBq9VKcXGxzw6rOnXqMGvWLABWrFjB3LlzOXDgAGVlZcTGxqLT6SgsLGT9+vViJ5aysyowMBC32018fDxDhw7liy++oE2bNrz88ssXnTcejwed7sLxsCUSyV/nRH4pOafsxEVbiIsJuSSNG8cs/p/XLJl2aSFA1eBK+CyRSCQSiUQikUgkkquLXzYeZdehfFo2iqFX+/r/tDkSiUQi+Qe4KndYpaWlkZqaynvvvSc+s9lstG/fXuxkAliwYIHP77Zt24bdbue5554jJSVFfP7qq6+KXViKfnJyMnPmzBGfeTweOnbsyOrVq2nRogUOh4ONGzfy5JNP+uxuWrFiBQ8//DBZWVnUqVOHtWvX4vF4mD59us8ZVo888ghpaWliwuqv+HT8+HEyMzN59913fUIYzp8/n6lTp+JwODCbzfTs2ZOePXuK7ysrK+ncuTP9+vUTk1VQfa6V0+lkx44dPuk0a9YsH1svBrfbg81Wdkm//TdjMPj/AX7SRv/SKy138v63u9iV8WeIjuYJkTz8n+ZYgkyXa+pZ+MOhymr5rMWyqIamFm3Uos9qaGrRRi36rIamv+upoSlt9E8bteizGppatFGLPquhqUUbtebzkexiXpq7GUVm1bYTvPPVH7xwd3via1n9wkY19NTQlDb6p41a9FkNTa3a+G/Aag36y7vOrsoJq4yMDG6++Wafz6xWKzExMWRkZFzwdwAJCQk+nycmJjJv3jwqKioIDAwkIyPjrGt0Oh0NGjQQGseOHcPpdJ5TS7lXnTp1yMjIICoq6qwJoMTERL7++uuL8kn5t0GDBmdpOZ1Ojh8/7nNGlcJvv/1GaWmpTwhCgIiICLKzs+nYsSM2m434+HiGDRtGfn7+WX5dDPJAufNzNRzgJ230D733Fu1ib2aBz2d7jhTw7qJdjL611UVpzXmmB/dMWXnB7/3hUOUr6fO50GJZVENTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1pozb01NCUNmpDTw3NK6HnPVkldN3w4sebmPV0j8vSBv/0WW1NaaM29NTQlDb6p56WuConrGw2G1br2SsswsLCKC4uFn8fPnyYSZMmsX37diwWC/Xr18dsNhMQEODzO6vVisfjobi4mMDAQGw2G1lZWVx33XUUFBSQkpLCs88+66Ov/Dtr1iwee+wxTCYTvXv35sEHH/T53mazYTAYGDhwoAgb+MADD4iwft4+bdu2jdTUVOx2O61bt2b8+PHnvOeECRPYs2cPFouFQYMG0bt3b5/vAb766is++ugjsrOzMZvNhIeH065dOx+/ExISyMnJETuzCgsLmTx5MjqdjtTU1EvImWrkGVZnczXEQ5U2+o9ezmm7z+HHCm4P7D5SwClbBTUjgy/rHt5c7jN7JfxW02ctlkU1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KKNWvRZDU0t2qgln1dtzzprskrB5Yb1u3Po1qr2JWn7q89qakob/dNGLfqshqZWbdQaV+WE1V+huLiYESNGEB8fz4wZM8jNzeXFF1+kqqrqL/32+PHjjB07lqSkJD799FPuuecemjVrhtFYnWSKTk5ODtOmTaOiooL//ve/ZGdn+2jl5+eTm5vLddddx3PPPceGDRt4/vnnGThwoM91LpeLXbt28cILLxAbG8sHH3zAXXfdhcViEdeUl5eLeys+TZkyhRMnTvho/fDDD4wfP54HH3yQ5s2b88gjj+DxeNi5cyetWrUCoKCggD179mAwGIiLi6N///4sW7aMgoICDAYDkZGRF5fg/z96vY6ICMv/vlCjWK1X5hBbtfTU0NSijZerd/hk6QW/tzvcF/2cKWdUeZ9ndaXPrbocv9Xw+Uy0WBbV0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NKWN2tBTQ/Ny9TJySi74/aFsG4O6N76se/ibz3+HprRRG3pqaEob/VNPS1yVE1ZWq5WSkrMbtOLiYhF67/PPP8dut/POO+8QHh4OwO+//87SpUs5duwY9erVE7+z2WzodDrCwsKorKzEZrPRoEEDcTbVNddcQ58+fTh8+DDt27cHYNeuXQCMHDmSHj16CLvuvfdeAGHHwYMHMRqNvPTSSwB07NiR48ePs3r1anHNyZMn8Xg8dOjQgSFDhgDQvHlzunfvjt1uF9dt27YNgOeee46WLVsC1RNdEydO9Lnn22+/Tf/+/Xn88cf56quvcLvdJCUl8e677zJr1iyRFoWFhcyZM4dZs2bxwQcfEBwcjE6no6qqimPHjp0zvOD/Qp5hdW4MBv+PhyptvHy9NTuyOXiimMZ1wujSIu6SdYJNugt+bzHrL/nMqU9fuF74fDnnVnlzJdJRTZ+1WBbV0NSijVr0Ga5cXaagxXTUos9qaPq7nhqa0kb/tFENn/ccKeD4KTv1Yiw0ib+0xYLeaDFf1ND0dz01NKWN/mmjlnxOqBXKqgt83zDO6jfvglrKFzU1tWijFn1WQ1OrNv4bsFr/5WdYJSQknHVWVUlJic/ZS2lpaXTq1ElMVgH069ePpUuXsmTJEkaNGiU+z8jIIC4ujsDAQH7//Xfcbjdu958Fymw206tXLxYsWCD09+3bB0BZ2Z+TM6mpqVgsFux2OwkJCTgcDk6ePInL5fKZTFPsUCad1q5dC4Be/2emhYeH0759e1atWiXuefjwYaB615ZC3759mTBhAgaDgbp163L8+HEyMzN56qmnAFi6dCkJCQkMGTKEqVOnivB/TqcTgGbNmjF37lwAPB4PrVq1oqKi4q9nxjmQ8TnPz9UQD1XaePEcybHxyoItPgfEzl66l3Ej2lI/9uzwpf+LmLAgmjWIZG9mAW7Pn5/rddAkPpJoa+Bl++9v+aJFn/8OPTU0tWijVny+0nWZGjaqrenvempoatFGLfqshqYWbbwSermFZUyev4XS8j8jf4QEGRk/oi0x4Zcf8lmL+aKGpr/rqaEpbdSGnhqal6vXpXkc837cf86wgAY9dG5Wy+/eBbWQL3+HphZt1KLPamhq1UatcFUGU+zWrRvr16/HZrOJz3766Sf0er04eykjI4PIyEjuvvtuWrVqRWpqKhs3bkSn05GWliZ+53Q6+eWXX+jWrZv4HcCRI0fo0qULLVq04NZbbyU/Px+3203Hjh0BOHr0KJGRkbz33nu0bt2a9u3bM27cOEwmEyEhIdSpU4djx47hcrnQ6XTceOONNG/enBtuuIHdu3cD0LBhQ3HP4OBg1q1bR6dOnWjVqhV33303paXVIbIUn06cOEFISAgvvPCC8OmDDz7AZDIRFxeH2WwW9n/22WekpqayYcMG4uLiSExMxOl0cvz4cQBq1KhBQEAA3bt3p0WLFvTs2ZNbb70Vp9NJSEiIzw40iURyYbwHeBVcbpg0b8sla44c1PSs1bZN4iMZOajpJWv6O1r0WSLxJ9SoyyQSiUTiy5mTVQCl5VW8LOtaiUSiYcaNaMuZC+8N+urPJRKJRKItrsodVsOGDWPBggWMGjWKkSNHkpuby9SpUxk2bBixsbFAdXjAb775BpPJxLvvvivOezKbzezcuZN58+bRuHFjFi5cSFFRkQjlZ7PZ0Ov1YofVQw89xIoVK1i2bBmAmMgpLi6msrISu91Ot27daN68OXPnzsVut5OUlCSugeqdSwUFBdx5553k5uby3nvvAYjzpJR7ulwuwsLCGDFiBN9++y2ZmZnodDofn4xGI6Wlpdx4443UqFGD+fPn43Q6adSokc898/LyqFOnDqdOnSIlJQWr1erz/Zo1a9DpdHg8HiorK8nKyiIrKwuA//u//8NkMl1y/hiNV+U8qKpcDQf4SRsvDbUOiA0LCeDpO9qQX1SOrcKFNdBATPjlx7/153zRos9q6amhqUUbteTz1XTYtRqa/q6nhqYWbdSiz2poatHGK6W38/CpsyarFErLq9h3rJDmCVGXpK3FfFFD09/11NCUNvqnjVrzObF2OB8/14u1O7M5kHVlQ1N7/+tvempoShv900Yt+qyGplZt1BpX5YRVWFgY8+bN4+WXX2bUqFFYLBaGDBnCE088Ia7xeDx4PB4iIyPp2rUrUH3e04QJE0hOTmbOnDkUFBSQkpLC7NmzqVu3LgBVVVW43W7+3//7f+Tl5fHhhx9iMBgwm804HA6hX1ZWht1u54UXXmDhwoX8/vvvREREYLfbCQgI8LG3YcOGdOvWje+++w673U5kZCQFBQUEBQUJrdLSUh599FG2bNnC+++/T1BQEHq9Ho/nz9hYHo8Ht9vN888/zyeffEJ2djaRkZHk5uZisVh87vnxxx/z4IMPAhAREXFWGt588818+eWXOJ1OTCYTOp0Og8FAeXm5mCC7FPR6HRERlv99ocbYlp5H+oajJNePpHVSjSumq9VDBq+U5on8Ug7syyUu2kJcTMglaah9QKxaz5M/54sWfVZLTw1NLdqoBZ+vxsOu1dD0dz01NLVooxZ9VkNTizZerl52wfELfn/idBndrrm8SBdazBc1NP1dTw1NaaN/6V2Jd9Xz4a8+A9x4baMrpuWNP/uslqa0URt6amhKG/1TT0tclRNWAImJieLspXOh1+upXbs2v/zyi/hMOe8pNjaWxYsXn/N3yg6kgQMHijOmAO6++27Wr18vzqGqqqoiNDSU22+/ndtvvx2onlBq2rQp5eXlAAQHV8cgv+aaaxg7dixjx44FYOHChUycOFGcI6WENhw+fDiPPPKIuGefPn3ErifFp7i4OO68807uvPNO8dt27doJDcW+kpISvvnmG7Hb68zvv/nmG4KDg1mxYgVmsxmAAwcOcOONN7JgwQL69Olz3rS9EG63B5ut7H9fqBFyC8p48eNNZ8Won3hPe2pEXHqMeoNBm4cMXinN0nIn73+7i10ZBeKz5gmRPPyf5liCLm53oZoHxIK28uVq0VNDU9ronzZqyeer6bBrNTT9XU8NTS3aqEWf1dDUoo1XSi8u8sIDF7WjgmVd+w9r+rueGprSRv+y8Uq+q6plo1p6amj6u54amtJG/7RRiz6roalVG/8NWK1Bf3nX2VU7YXUp6HS6K3rt+a7x3hV1uXZcrFZCQgJQfS6W8v/K3yaTSewkO3ToEAkJCWKyCuCPP/4Aqie7Lgd5oNyfnDlZBdXhPibO2cTbj3W7bH2tHjJ4uZrvLdrF3swCn8/2HCng3UW7GH1rq4vS+jsOiAVt5MvVpqeGprRRG3pqaF6Jw67n/LD/vN/742HXamj6u54amlq0UYs+q6GpRRsvV69J/UhCgoznDAsYEmQkpV6ErGv9RNPf9dTQlDb6h96VfFc9H/7m89+h6e96amhKG7Whp4amtNE/9bTEv3bCyu12c+LECYYPH86uXbuwWCwkJycDiPOczoWyA+n111/n2LFjFBQUkJyczJEjR4DqHViBgYEYjUZOnTrFPffcw/bt2zGZTLRo0QKXyyV2VpWVVe80Wrt2LQMHDuTIkSPExcURElK9nVs5J0qx54knniA9PR273U7z5s3JysoSZ2n9VZ/q1q2L1Wpl7Nix4rfz588nKCiITp06iQmqmJgYli5dSvfu3Tl9+jSBgYGUlZURGBjoM9F1KcgzrKqRMer908ac03Z2Hyk463O3B3YfKeCUrYKakRe3++2Fu9vz4sebfCatDPrqzy/3edBKvlxNempoShv900Yt+Tz9y+0X/P69b3fyf0NbXZK2ltJRLT01NLVooxZ9VkNTizZeSb2J97Rn4pxzR2C4nH6jFvNFDU1/11NDU9roPzaq8a56pW1UU08NTX/XU0NT2uifNmrRZzU0tWqj1vjXTlhB9ZlV+/bt48EHH+To0aMsWrQIs9kszo4CGDFiBNnZ2SxfvhwAo9GITqdj48aN9OrVizZt2jB37lwRUk9BOWNq8+bN3HfffZSWlvLJJ59gMpkIDQ31ufbEiRMkJiYyevRoli9fztatW32+Dw4OxmQysW7dOgYPHkz9+vWZNWsWVVVV6PW+hfuv+NSkSRM2bNjAgAEDWLp0KQ6Hg5MnT9K/f39xTf/+/Vm0aBHBwcEMHjyYb7/9FpvNhtPppEePHpec5vIMqz+RMerV0btczcMnSy/4vd3hvugyHBFh4bvXBvHrpqPsOJhPy0Yx9Gpf/5JtPBf/9ny5GvXU0JQ2akNPDc3L1TucfeHd1QdP2C67fddCOqqtp4amFm3Uos9qaGrRxiuhFxFhYeGk/mxPz2P/0QJ5xq2favq7nhqa0sZ/Xk+Nd9Vz4U8+/12a/q6nhqa0URt6amhKG/1TT0v8ayesAgICcDgcJCcn8/7772OxWEhNTWXdunUYjX+67Xa7cblc4u/g4GA8Hg9t27Zl9+7dpKWlkZycTFlZGaWlpWIHlvLbdu3aMXfuXIxGI507d2bt2rUijJ9ybc2aNTEYDEybNo24uDiSkpJIT08X3xsMBpxOJ507d2b16tXY7XZatGjB1q1bCQgIuGif5s2bx1dffcWsWbOEfkJCAtu3/7l6OjU1lY8//ph3332XmTNn4nQ6SUpK4tChQ2RnZ19yusszrP5Exqi/8jbmFZVTUuHCGmggJvzSKv5g04VDclrM+kvOl/bJNejVvj42W/llnVvlzdWQL/5uoxZ9VkNTizZqyefEuFC2HTh93u8b1ZZnWP2TempoatFGLfqshqYWbVTD58RaobROqnHF+o1azBc1NP1dTw1NaaP/2Kjmuyr4p89qa/q7nhqa0kb/tFGLPquhqVUb/w3IM6yonqSJjIzkk08+EZ9lZ2fTvXt3HA6H+GzBggU+v6uqqg7NcMcdd9CvXz/x+eDBg9m/fz+BgYFA9dlSAQEBzJkzR1zjdrtp0qQJTqcTqJ6oAmjWrBnvvvuuuO7NN98kPT1dhA6srKwE4KWXXhJnTAF07dqV8vLyi/YJYOjQoQwdOpSkpCTuvvtuNm/ejN3u27Hp1KkTq1ev5o8//mDu3Ll07NiRtm3bCvsvFRmfsxoZo/7K6ZWWO/nw+z0+4RGaNYhk5KCmWAIv7uDZmLAgmjWIZG9mAW6vI+L0OmgSH0m0NVDmi59o+rueGprSRm3oqaF5uXqPDG7JPVNWnvf7h//TQtaNfqCnhqYWbdSiz2poatFGLfqshqYWbdSiz2po/ttt/DveVS/Xxr9DTw1Nf9dTQ1PaqA09NTSljf6ppyX+tcEUXS4XRUVFPqH81q1bByDOcToXyk6lvXv3is+cTicnTpzA5XJRUVEBgE6no7KykszMTHHdhg0b8Hg84myqkydPAvhcA3DgwAHgzzOulF1UGzZsENcUFxdTWFiI9+6vi/HJ4/GIybfdu3ezbt067rjjDp9rPvzwQ+bOncvkyZNJTExkypQp6PV6brrppvOmj+TiGD+iLSFBvvPCIUFGxo9o+w9ZdHXy4fd7zjp4dm9mATMX77kkvZGDmtIkPtLnsybx1RNgEolEolVG9Gl8UZ9LJBKJRCKRSK4s8l1VIpFIJFrnqt1hdfjwYSZNmsT27duxWCwMGjSIxx9/XEzcOBwOAgMDGTVqFCNHjiQ3N5epU6cSFhbGrl27uO666ygoKMBkMmGxWEhLSwOqJ5EMBgPz5s0jLS2NzMxMXC6XCPNXXFxMYGAgOp2OsLAw7rnnHuDPySmLxYJOpxPXAmRkZDBo0CCys7MpKysTE0nK9y6Xi9DQUKZMmcLHH3/M8ePHcbvdYlJMQfFp6NChlJeXc+rUKTweD8HBwUITYN++fUycOJE//vgDgGXLljFo0CCfc66WLFnCtGnTqFGjBk8//TRQPQnXpk0bn3teCpdzWPC/jVrRIbw35jr2ZhZwLN9OvRjLWZ3PS0FLhwyqcfBsWEgAT9/RhvyicmyXGWLQGy3li5qa/q6nhqa00T9t1JrPPdvWo2fbesz4+g8OZNloXMfKo0NaXbau1tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaq9a7qbZu/+aympr/rqaEpbfRPG7XosxqaWrVRa1yVE1bFxcWMGDGC+Ph4ZsyYQW5uLlOmTKGiooIJEyYA1ZMvQ4cOJT09nVGjRmGxWBgyZAjfffcdu3fv5plnniEpKYknnniCvLw8jh8/LsLx6fV6rFYrhw8fBqB27dqcPn2a0tI/D8DU6XQ0a9ZMnB8VEBBAvXr12L9/P6dOnfKxt1mzZuzevRudTkdUVBRms5msrCyxWwsgIiKC/Px8jh49il6vp379+hw7dkxMlCn3vOaaa0hLS8NoNGKxWKhVqxbp6emcPv3nuRM//PCDmKxSWLx4MatXr2bjxo3Anzuz8vLyxDUej4etW7dy7733snr16kvKG71ed0UOAf23kRphIVUFXS0cMqjmwbNqlVUt5MvfoenvempoShu1oaeG5pXUm3C/Gi2W9tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaqOa7irz6rqenvempoShu1oaeGprTRP/W0xFU5YfX5559jt9t55513CA8PB6p3Kb344ouMHDmS2NhYrFYrZrOZuXPnit9VVlby8ccf07JlS+666y4A0tLS6NOnD7Nnz2bixIlYrVacTienTp3ixx9/JCEhAYDJkyczf/58jh07JvTT09Np3bo1n3/+ubhHx44dOXHiBABhYWFAdUi+iRMncuuttwKwc+dOhg4dyvbt2xk6dChWq5XTp0+j0+lYt26d8On+++8nLS2N3Nxccc8dO3YwYMAApk2bJu7ZrFkzEWYQ4Mknn2TMmDHodDoWLVrEs88+y6233soPP/yAy+XCYDAwZcoU8vPzcTgc4hwvt9tNz549yc7OJicnh1q1al103rjdHmy2sov+3b8dg8H/D/C70pprdmRz8EQxjeuE0aVF3CXrqHnwrBbzRYs2atFnNTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0MwrKqdEhR0j/pyOV0O+SBv900Yt+qyGpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dlRNWaWlpdOrUSUzsAPTt25cXXniBdevWMXjwYBISEsjIyPD53dq1a/F4PHTt2lV8Zjab6d27N8uXLwcQE1Tx8fHi/6F615VOp2PDhg20a9eO+Ph4du3axf333y+u8Xg8OBwO7HY7WVlZ1KtXD4PBgMvlok+fPuI6ZQfWoUOHxD3tdjvdunXz8clgMAAIn2rXrs2uXbvo27evuKakpASn00l2djYOh0OERFTCEiokJyfzxRdfUFBQQExMDABVVVWEhIT4+Fi/fn2ys7N9dnZdLPJAufNzNRzgd7maR3JsvLJgC0qdvGrbCWYv3cu4EW2pH2u9aL2/4+BZLeSL2npqaPq7nhqa0kZt6KmhKW3Uhp4amlq0UYs+q6GpRRu16POV0Cwtd/Lh93t8wnw3a1B9Jo8l0HQlTPT7dPTHfFFbTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9ron3pa4qqcsMrIyKBnz57cfffdPmdYxcTEiEmqbt268cEHH2Cz2bBaqwfJly1bBlSfU6WcYZWSkkL79u3Jzs6moqKCNm3aoNfr8Xg8PProo6xduxaj0UhVVRWRkZFCPzk5mSVLlnD8+HEGDhzIkSNHiIiIwG63Cxvr1KlDzZo1yc3N5YMPPuD777/HbrdjsViwWq1iJ1aXLl0AsNvtwqegoCBKSkqwWCzing0bNmTXrl3s2rWL1157jezsbCIiItDpdLhcLo4fP05iYiIABw4cYNq0aWzatAmAd955h6CgICIiIkQ6hoaGsnz5cpKSknzSNzg4mLi4S98RI9E23pNVCi43TJq3hVlP97gkzZGDmjJzse9Lrjx4ViKRSCQSiUQi8X8+/H4PezMLfD7bm1nAzMV7GH1rq3/GKIlEIpFIJBKJX3JVTlgVFxezbNkymjRp4nOGlV6vp7i4GIBhw4axYMECRo0axciRI8nNzWX58uXodDrmz5/Pk08+SVJSEqNHjxbnPRUXFxMbG0toaChHjx6lrKyM+++/n99++41du3ZhMpmEfvPmzQH45JNP6Nq1Kz179mT+/Pk+NgI0btyYEydOsGDBAu68807y8vJYsmQJgYGBlJVVh86rWbMmOp2OrVu3Eh8fz4MPPsh3331HYWEhQUFBQqtVq1Z8++23fPDBBwwYMOCc99y/fz+vvvoqu3btombNmrRv355Vq1Zx+vRpDAYDhw8fJikpiS+++IITJ06g11dvxXO7q2cYdDodr7zyymXlj9EoD5U7k6vhAL8roblqe9ZZk1UKLjes351Dt1a1L1pXrYNntZIvauqpoenvempoShv900Yt+qyGphZt1KLPamj6u54amtJG/7RRiz5fKc2c03afRWcKbg/sPlLAKVsFNSOD/1EbryY9NTSljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJKyX03plnWE2YMIHy8nKg+vyoefPm8fLLLzNq1CgsFgtNmjRh+/bt3HPPPeIMq4SEBIqKisSEDYDRaBT3ef/990lJSeGZZ57h1VdfpaSkxOeakJAQtm7dyo4dO7jhhhsoLCxk5cqVQis4uLrzHRkZyfz584mLi+P5559n6tSpPvcExITb+++/T5s2bRgwYAAzZswQPnlr/frrr1gsFoYPH87atWvFGVbR0dFUVlZit9s5duwYubm5AEyaNImXX36ZX3/9laSkJBo2bEheXh5utxu9Xk9UVBS1atUiLy+P7777jj59+pwVVvCvoNfrVD0c9GrHnw/wO5FfyoF9ucRFW4iLCfnfPzgHGTklF/z+ULaNQd0bX5I2qHfwrD/ni1qaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShu1oaeGpr/ZePhk6QW/tzvcV6SP7+/p6G/58nfoqaGpRRu16LMamv6up4amtFEbempoShv9U09LXJUTVnq9nri4uLPOsJowYQI2m018lpiYyNy5c8XfkyZNYvv27fTs2VN89umnn3L33Xezfv16wsLCgOrJr9DQUNatWyeu83g8TJ069azJo/79+/PSSy+J6xYuXMjKlStxOp0Awp4lS5YIfYDPPvuMrKws8bfBYKB27dr88ssv4jObzcaMGTOEhsPhAOCRRx7hjjvuENeVl5dz4MABgoODiY6OZtiwYWzfvp1169axYsUKnn32WXr27MmUKVPE2VRt2rTBbrdjNBrZs2eP0Nq6dSu3334769atE6EKLwa324PNVnbRv/u3YzD47wF+peVO3v92F7sy/lz52Dwhkof/0xxL0MXFlE+oFcqqC3zfMM5KYaH90gzFv9NRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2+qeNWvT5SmkGmy68CNJi1sv3g39YU9ronzZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KqN/was1qC/vOvsqpywOhcXsxvor1x7vmuUCZ8rYcelap35uzOv6969O9HR0UyZMkWcT/Xee++h0+kYNGgQAAUFBZSXl6PT6ejYsSM2m434+Hhuv/12AI4dO/aX/TgTeaDc+fHHA/zeW7TrrJjye44U8O6iXRcdU75L8zjm/bj/nGEBDXro3KzWFfHfH9NRTT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1/szEmLIhmDSLZm1mA2+s1Vq+rPpM22hoo3w/8RFPaqA09NTS1aKMWfVZDU4s2atFnNTS1aqNWuConrNxuN9nZ2dhsNqxWKwA//fQTgPj7XCg7nFasWEGLFi0AcDqd7N+/H6g+AyowMBCj0cipU6fIzMwkPj4egN9//x2Xy0VOTg6tWrXCbDYDsH37dp97rF+/HgCTyeRjz/PPP8/u3bspKCigUaNGHD9+3CckoOLTgw8+yMaNGzGZTDRs2NBHQ7nnkiVL+PLLLzly5AhxcXFUVFQAiDOxwsLCmDt3LrfffjuLFi0Cqnd0vfbaa9StWxeoDitoMpkICwsT52Tl5eXx8ssvA1C79sWfM6Qgz7A6G3+Nh6pGTPkX7m7Pix9v8pm0MuirP7/csuGv6aiWnhqaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShv900Yt+nwlNUcNbs57Z0R0aNqgOqKDfD/45zWljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJK51Oh9lsZtSoUYwcOZLc3FymTp0qJl8URowYQXZ2NsuXLweqz50yGAzMmTOHyMhIGjduzMKFC7HbfUMQBAcHExYWxqOPPsro0aMpLy9nypQp6HQ6DAYDb731Fps2beLDDz/k4MGDTJw4kb59+7Jx40afkH6KltlsZvny5fTv35+UlBRmz55NVVUVBoPBxyeXy8WGDRu47777yM/P54svvsBoNPr4BPDHH3/QtGlTRo8ezffff09mZqbP96dPn2bo0KFUVlaSlJREeno6AQEBjB49GqgOY6jT6ejSpQu//fYbiYmJxMXF4XA4cDqdOBwO2rZte0l5I8+wujD+Fg9VjZjyEREWvnttEL9uOsqOg/m0bBRDr/b1L8fMs/C3dFRbTw1NLdqoRZ/V0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FND0x9tjIiAV0Z1JTu/lOxT9ss6M/d8+Hs6+mO+qK2nhqYWbdSiz2po+rueGprSRm3oqaEpbfRPPS1xVU5YhYWF0bNnT7Kyshg1ahQWi4UhQ4acdU6U2+3G5XKJv61WKy6Xi1GjRjFnzhwKCgpISUnhzjvv5KOPPhK/DQsLo1GjRhgMBkaPHo3RaKRu3brk5eXRvn17unbtSq1atfjwww8B2LhxI19//TVxcXE88cQTvPHGG0LLYrHgcDho0aIFGzZsYMWKFbRq1YqdO3f67LAKCgqitLSUpKQkZs2ahcVi4frrr+fnn3+msrJS2AVQq1YtSktLmTZtGg0aNKBx48YcOHBAfP/WW2+Js7bS09OBP3dfTZ48mf79+wMwffp05s6dy+LFi8nJyUGn09G6dWs2b97MyZMnSUxMvOi8kWdYnRuD4crGL80rKqekwoU10EBM+KVXgGrGlG+fXINe7etjs5VfVlx6b650Ovq7nhqaWrRRiz6roalFG68Gn/ccKeD4KTv1Yiw0iY+8AhZqMx39XU8NTS3aeDX4/N9Pt3Ikp4TEuFCeuv2aK2ChNtPR3/XU0LwabAwJMNA2JVa+H/iZprTRP23Uos9qaPq7nhqa0kb/tFGLPquhqVUb/w1Yrf/yM6wSEhIoLCxk7ty54rOSkhI+/vhjEhISxGcLFiw463cAPXv2ZNSoUeLzKVOmEBcXR2BgoLjuwIEDfPfdd+Ka22+/HaPRSHJyMgD16tXDaDRSVVXF/fffz+DBgwFYuXKlz730+uqMePrpp2nXrp3Qu/7668nJyRF/BwQEAPDFF1+Iz2w2Gz///DOlpdW7YOrUqQNA586deeWVV8R148aN48CBA+JeSpjCTZs2+UzgdevWTWgBBAYG8uCDD/Lggw/yzDPPsHv3bu644w42b97M5SDjc56fy41fWlru5MPv9/iE8WvWIJKRg5piCTRdtN7fEVNei3FlteizGpr+rqeGprRRG3pXQjO3sIzJ87dQWl4lPgsJMjJ+RFtiwi8ulOv50EI6Xm16amhq0UZ/9PnHDZl8tSpD/L0ro5A7J/3KsJ6JXN/uyuxU10I6Xm16amhKG7Whp4amtFEbempoatFGLfqshqYWbdSiz2poatVGrXBVBlPs1q0b69evx2azic9++ukn9Ho9qamp5/1dmzZtCAkJ4ccffxSfOZ1OfvnlF7p16+ajv3//fp9QewcOHMDpdHLttdcC1edJdezYEZPJREbGny+Xy5YtIzExUUwuKedPHTx4UFxTXFxMTk4ODodDnD9lNBopLy/38ennn38GwOPx+PyblZXl49fevXsBOH78OFA9SabX68UEHIDL5aKiogKn03ne9FHst1qt1KtX74LXSf4ZPvx+D3szC3w+25tZwMzFey5Zc+Sgpmetym8SXz0JJpFIJBL/5MzJKoDS8ipenrflH7JIIpFcDt6TVd58vuLw32yJRCKRSCQSiUQikfxzXJU7rIYNG8aCBQvOOsNq2LBhxMbGiuvOPMMqICCAkSNHMmPGDJ8zrAoKCkhPT6dVq1ZYLBZuvPFGEhMTfc6wKikpISEhgRYtWgj9hx56iLVr1zJnzhzmzp1LZGQkeXl5vPnmmz726vV6Jk+ezKuvvorRaCQ4OJjAwEAcDgfFxcUEBgZiMpkwGAx069YNp9NJWFgYdruduLg4ERKwuLgYqA5B2KpVK1wuFxEREZw6dcrn+zp16rB//366detGWVkZgYGBhIeHU1xcLM7NKi0tpU+fPhiNRoqKinA6nXg8Hg4ePMhzzz2HyXTxu3UULvfg3H8jV+LAvZzTdp+dVQpuD+w+UsApWwU1Iy9+VX1YSABP39GG/KJybFcgzKCCFg9C1KLPamj6u54amtJG/7TRX33eefjUWZNVCqXlVew7VkjzhKhL1tdKOl5NempoatFGf/V5yicXnmh+44vtPH3HpYcH1Eo6Xk16amhKG/3TRi36rIamFm3Uos9qaPq7nhqa0kb/tFGLPquhqVUbtcZVOWEVFhbGvHnzePnll33OsHriiSd8rjvzDCuA+++/H4/HI86waty4MQEBARgMBmbMmEFubi5TpkyhV69e2O12cYaVTqejX79+Plrbtm0DwGQyUVVVhc1mw2Qy0axZM3GNy+XC7XYTFhZGVVUVFRUV2Gw26tat67Obyul04nA4iI2NpbCwkLKyMioqKnxC+imYzWaCg4MpLi7GZrNhsVh8tCwWC3q9HpfLhclkwuFwcOLECRITEzl27BgA2dnZ2Gw2zGYzVVVVuN1uPB4PJpOJLl26XGLOgF6vIyLCcsm//7dyIr+UA/tyL+uA4cMnSy/4vd3hvqy0VyvftHgQohZ9VkPT3/XU0LySelei3jkXWktHNfQuVzO74PgFvz9xuoxu11z+Tul/ezpejXpqaGrRRn/zOfN/9PEyckquSD/t356OV6OeGppas/GXjUfZdSiflo1i6NX+yoTPBP/2WS1Nrdl4tfSVtZYvamn6u54amtJGbeipoSlt9E89LXFVTlgBJCYm+pxhdS7OPMMKQKfTMXLkSEaOHAnAzJkz+eCDD3jnnXcIDw8HqieZXnzxRX777TexY6tTp044HA6hU1lZycyZMwkODuaOO+7gySefxOFw0KdPH2bPns3EiRMBOHHiBABz584V51+tXbuWe++9F51OJyak7HY74eHhpKWliXuMGTOGX3/9lQYNGgCIcH633nor48aNA6CoqEiEKVS0Tpw4gdvt5tdffxU+ffHFF7zwwgtEREQA1buwNm7cSFBQ9cPzzDPPsHPnTk6fPs1nn33G+PHjL5i258Pt9mCzlV3Sb/+NlJY7ef/bXezK+HNnVPOESB7+T3MsQRe3iy3YpLvg9xaz/rIOLzYY/P+QQX+3UYs+q6Hp73pqaF5JvStZ76hlo1qa/q53pTTjIi/c8a0dFSzbg3+ZnhqaWrTRX32OrxnC3syi836fUCtUPtP/Mj01NLVm45HsYl6auxlFZtW2E7zz1R+8cHd74mtZ/cJGNfTU0NSajVdLX1lr+aKWpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dtRNWV4q0tDQ6deokJnYA+vbtywsvvMC6desYPHgwAAkJCT5nVW3bto3S0lJ0Oh0JCQlA9c6n3r17ixCEACdPnjzrnqmpqQQEBGA2m0VoQLvdTo0aNXyu69u3L0uXLiU6OhpAnKkVExMjrgkPDyc5OZk//vhD2KGECNTpdD5aEyZMEJNawcFnh47T6/XUq1ePvLy8/5FqF0YeKPcn7y3addaZU3uOFPDuol2MvrXVRWnFhAXRrEEkezMLcHv+/Fyvqz5zKtoaeEXS/mo4ZNDfbdSiz2po+rueGppXQu9K1jvnQivpqKbe5Wo2qR+JQQ/n6vsa9JBSL0K2B/9SPTU0tWijv/n85LA23DNl5Xm/H31ra/lM/0v11NDUio3ek1VC1w0vfryJWU/3uCxt8E+f1dbUio1XW19ZK/mitqa/66mhKW3Uhp4amtJG/9TTEpqfsMrIyODmm2/2+cxqtRITE+MzQdWtWzc++OADbDYbVqtVfKfX60lNTRXXJSYmMm/ePCoqKggMDKS4uBij0ciPP/4odlhVVVXh8XjEbqdjx47h8XjIy8sjMzOT+Ph4ABHmr06dOuI6k8nE6tWrxQ4xqN6dZTAYxHVFRUXodDp++eUXhg4dCoDH40Gn0/lMdp2Jy+Xi4MGDdO7c+eIT0gt5hlU1apw5NWpwc947YzVY0wbVq8EuN92vhpit/m6jFn1WQ9Pf9dTQvFJ6ap11dyVtVFPT3/WulGbOafs5J6ugerDucvIZtJOOV5OeGppatNGffb69V0M++/XQOT+Xfbx/n54amlqycdX2rAu2g+t359CtVe1L0vZXn9XU1JKNV1NfWUv5oqamv+upoSlt9E8bteizGppatVFraH7CSpmAOpOwsDCKi4vF38OGDWPBggWMGjWKkSNH8vvvv4vPlbCBAPPnz8fj8VBcXExgYCClpaU0adKEOXPmEBkZSePGjVm4cCEul0tMHin3qVOnDo8++iijR4+mvLyc6dOnAxAZGSlsjY6O5o8//mDixIn07duXjRs3cvDgQQwGg7ChtLSU5s2bM3XqVPR6PbGxscycORODwUBcXJy4rry8nNWrVwPVYQRPnjyJy+WiVq1aFBQUiPteDPIMqz9R48ypiAh4ZVRXsvNLyT5lv+LxtuHqiNnq7zZq0Wc1NP1dTw3Ny9VT+6w70EY6qq13uZp/Rz7Dvz8dr0Y9NTS1aKM/+nxb36bc1rcpE2auI/1YEUn1wnlpZOr//uFFoIV0vNr01NDUgo0ZOSUX/P5Qto1B3Rtf1j38zee/Q1MLNl6NfWUt5MvfoenvempoShu1oaeGprTRP/W0hOYnrP4qYWFhzJs3j5dffplRo0ah1+vR6/U888wzPtd5PJ6zfpucnEyvXr2YM2cOBQUFpKSk0KJFCwIDA32ue+655/j2228ZPXo0RqORbt26sXTpUp9rAgMDmTFjBtOnT+frr78mLi6OPn36sGLFCp/runfvTklJCdOmTcNut9OmTRtq166N2WwW15w+fZrHHnvsLHtfeOEFGjRoQIcOHS46neQZVn+i5plTIQEG2qbEYrOVX9aZBt5c6RirV1pPDU1/11NDU4s2aslnNesdLaWjWnpXSlOeaej/NmrRZzU0/V1PDc0nb2sj9GQf79+rp4amlmxMqBXKqgt83zDO6jf9HS3li5qaWuwraylf1NT0dz01NKWN/mmjFn1WQ1OrNv4bkGdYXQRWq5WSkrNXaBUXF4vznhQSExOZO3cuAJ9++ikvvfTSWRNUI0aM8Dkrymq1YrfbGTlypE8Yv2HDholrlH9NJhMzZswQ12RmZrJ06VIfrdLSUnr27EnPnj3FdW+++aaPrVarlfLycsaOHcvYsWPF5127dvW5rk6dOqSnp7N69WoefvhhHnjggXNOYF0sMj5nNX/HmVNajdnq7zZq0Wc1NP1dTw3Ny9WT9c7VoXe5mvJMQ/U0/V1PDU0t2qhFn9XQ1KKNWvRZDc3L1evSPI55P+4/71mOnZvV8rv+jhby5e/Q1GJfWQv58ndo+rueGprSRm3oqaEpbfRPPS2h+WCKCQkJPmdVAZSUlJCfn09CQsIFfwdw5MgRn88zMjKIi4sTu6fOpe/xeDhy5IjQqFevHiaT6azrlL+V6xISEjh16pRPqELlOm9bL8anP/74g8cee4ybbrrpikxWSXwZOagpTeJ9Qys2iY9k5KCm/5BFEonk346sd7SBzGeJRCKRaJlxI9py5iJdg776c4nkQsg+lEQikUgk/o3md1h169aNDz74wOcsq59++gm9Xk9q6vnjxrdp04aQkBB+/PFHkpOTAXA6nfzyyy9069bNR//7778nMzOT+Ph4AH7//XeKioq49tprATCbzXTo0IGff/6ZESNGiN8uW7aMxMRE6tSpA0CXLl3Q6/X88ssvDB06FKjeCbZ27Voefvjhi/bp0KFDjBw5ko4dO/Liiy9echpKzo8l0MToW1txylaB3eHGYtYTbQ383z+USCSSS0TWO9pA5rNEIpFItEz9WCuznu7B+t05HMq20TDOSudmtf5psyRXAbIPJZFIJBKJf6P5Cathw4axYMECRo0axciRI8nNzWXq1KkMGzaM2NhYcd2IESPIzs5m+fLlAAQEBDBy5EhmzJhBZGQkjRs3ZuHChRQVFXHvvfeK391www3MnDmTRx99lNGjR1NeXs7UqVO57rrraNGihbjuoYce4s4772TixIn07duXjRs3snTpUt58801xTc2aNRkyZAhTp05Fr9cTGxvLzJkzCQ0NZdiwYRfl0+nTp7n33nsJCAhgxIgR7N69W/w+JCSEhg0bXvnE1jA1I4OJiLBQWGiX20ElEsnfgqx3tIHMZ4lEIpFomW6tajOoe2PZDkouGtmHkkgkEonEP9H8hFVYWBjz5s3j5ZdfZtSoUVgsFoYMGcITTzzhc53b7cblcvl8dv/99+PxeJgzZw4FBQWkpKQwe/Zs6tatK64xmUx89NFHTJo0idGjR2M0GunduzfPPfecj1bbtm2ZMWMG06dP5+uvvyYuLo5JkybRt29fn+vGjRuHxWJh2rRp2O122rRpw8cff0xoaOhF+XTo0CFOnjwJwF133eVzj/bt27NgwYKLT0yJRCKRSCQSiUQikUgkEolEIpFIJJJLQPMTVgCJiYnMnTv3gtecawJHp9MxcuRIRo4cecHfxsbGMmPGjP9pR8+ePenZs+cFrzGbzYwdO5axY8de8Lr/5VOHDh1IT0//nzZJJBKJRCKRSCQSiUQikUgkEolEIpGojf5/XyKRSCQSiUQikUgkEolEIpFIJBKJRCKRqIecsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/ipywkkgkEolEIpFIJBKJRCKRSCQSiUQikfyjyAkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KnLCSSCQSiUQikUgkEolEIpFIJBKJRCKR/KPICSuJRCKRSCQSiUQikUgkEolEIpFIJBLJP4qcsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/is7j8Xj+aSMk/x48Hg9utyxS58Jg0ONyuf1WTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaM29NTQlDb6p96/Ab1eh06n+0vXygkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KDAkokUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8UOWElkUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8U4z9tgERyJTl8+DCTJk1i+/btWCwWBg0axOOPP47ZbL7g7zweD7NmzWL+/PmcOnUKnU5HaGgoQ4YM8fl9bm4ukyZNYu3atZhMJnr37s2zzz6LxWJh1qxZfPbZZxQUFBAXF4fL5SInJweDwYDL5SIsLIzevXuTn5/P+vXrMRqN1KxZk1OnTlFeXk7r1q0ZP348CQkJrFixggkTJnDq1CkAAgICAAgKCiI4OJjCwkKCg4OJiYnh8OHDOJ1O9Ho9gYGBOJ1OEhISeOKJJ+jevTuHDx/miSeeID09Xfhbp04datasyZYtW3j66aepVasWP/zwA2vXrqWiogIAnU6HXq8nPDycm266iZ49e/L111/z+++/c/LkSTweDwBWq5Unn3ySW2+9FY/Hw7Bhw/jjjz/OSuPY2Fjmzp1LQkICubm5PP/886xZs+as6/R6PSaTiZSUFHr37s2cOXM4ffr0WdfpdDo8Hg8BAQEkJiZy8uRJbDYbQUFBOBwOKisradiwIceOHSMuLo6bbrqJjRs3snnzZqqqqny0DAYDRqORa665hnbt2vHNN99w4sQJkeZdunShqKiInTt3otfr8Xg8VFZWijTQ6XTUrl2btm3bsn37dk6cOEFAQABOp5OAgACRzzab7ax7KwQHB1OrVi2ys7PR6XSUlZWddY3RaKR+/fpUVFSQn59PgwYNfPJ50qRJbN26lcrKyrN+Gx8fT2FhIcXFxeIzi8VCVVUVgYGBoiyHhIT4/G7IkCHs2rVL+FmnTh1ef/111qxZwzvvvHNOX4xGIz179uSVV17x0Tt8+DCjR49m//79Qs9gMNCsWTOeffZZWrVqBcDChQt54403sNlsProBAQHceuutPPXUU1RWVvLss8/y22+/UVVVhU6nE9coz5vNZsPhcJzTxubNmzN16lQsFgtPPvkkW7Zswe12C7u8y/7jjz/O2rVree655ygsLDxLq0aNGrz00kukpqby5ptv8tVXX1FaWgpUlx+lnNSpU4fi4mKKioowm82Ul5fjcrnOaZ/ZbMZgMIiyoNRJQ4cO5fHHH2fNmjU+dYT37+Li4ggMDOTo0aN4PB4cDgdutxuDwUDNmjUpKSmhqqqKrl27Mm7cOEpKSnzyRUGn0xEbG8stt9zC/fffzzfffHPOfPHm5ptvZv/+/ezbt0/cU/F95MiRDB48WNSV+fn56HQ6nE4nACaTiXbt2jF+/HgsFgtPPfUUW7duFfliMpmoqqoSz2BYWBht2rTh6NGjHDlyBIPBgNPpFGmamJjIU089RWpqKm+88QZffPEF5eXleDwegoKCcLvdVFZW8tBDD/Hdd99x+vRpTCYTZWVlIs/OxGis7jbp9XocDgd6vR6z2SzKXOvWrcnMzCQjIwOj0YjT6cTtdhMYGEibNm0YP348derUEfac+ZxHRESQlpaG2Wzm8OHDTJgwgW3btuHxeDCZTAwYMIDnn3/ep83Jz88HEHWLyWQCOKvNUdJGyZfWrVvz8ssvizbnueeeo6ioyCf/o6KiRDsKMHnyZL755huRZ1D9zHXs2JExY8aQlJQknvP09HQ8Hg8GgwG9Xk9YWBjx8fFkZWVRWFhITEwMBQUFonwnJSXx5ptvYrFYePnll1m1ahVVVVU+eWE0GuncuTMOh4Nt27bhcrlEfptMJgICAqisrBRtoPczWVJSclZeXnPNNdx33328++677Ny5U5Q1JY+Dg4Pp06cPzz77LO+++y5paWlkZWWJZ0pph7zrRqUe++qrr5g1axbHjh3z8cFsNjNs2DAef/xx3n33XXbs2MGuXbuorKwUekq7esstt/D444/z66+/nrPtSE5OFmmm9E2qqqoIDg6muLhYlHW9Xk9VVZXomxw7duyc5Ruq24XatWuLvknz5s0JDw9n48aNlJeXYzQaKS8vF2mk0+kICwujfv367Nu3T3ynPPtKHRgQEEB4eDg2m42SkhIaNGiAyWTi4MGD6PV68TwqZQ+q28WkpCTy8/PJycnBZDKJZ9hkMmGxWLDb7cTExBAaGkpeXp7ow1RVVWGxWIiKiiI3N5fS0lJCQkKw2+0iH9xu91llzLtcX3vttRQVFbFr1y6fPqV3/avT6QgPDycgIICCggLRLqempjJ58mQWLVok2iHFL4PBQPfu3RkxYgSvv/46O3bsEDbo9XqCgoKA6jqsQYMGbNq0ifz8fDwejyjvMTExANhsNho2bIjH4+HAgQO4XC6hFRgYSEREBLm5ubjdbkwmE8HBwZSUlGA0GtHr9VgsFuLi4igpKSE7O5tatWpRt25ddu/e7VMfKCh92vPRokULAgMD2bVrF1VVVT51hTcRERH/M8282yyHwyHyOygoCJ1OR0FBwXntALjvvvv44osvznr2vVm3bh0TJ05k5cqVwi+z2XxJde2hQ4fYvn07brdb5IHBYOCaa65h4sSJ561rofo5ufXWWxkzZgyATzoofarevXvz5JNPEhUVhdls5vTp0+j1elHmveta77rf7XbjcrlwOp0+da1iz5NPPnnOPidU9wFfeOEFH3t0Oh1ms5nKykruvfde9u3bx/bt2wkMDPTJF4vFQmxsLCdPnhR1ZGBgIHv37vV57hSbXnzxRVF/lpSU8Oyzz/rkC1Q/H02bNmXcuHG0atUKt9vNmDFj+Omnn3zq76CgIPr378+zzz5LVlYW9913n8g/b2rXrs2oUaO4+eab+fzzz/nll1/Ys2ePqD+90el0NGzYkEmTJpGdnc2PP/7I1q1bz3pHMZlM9OvXjwkTJoj3xs2bN4v2VyEqKooxY8Zw8803c/z4cSZNmsTevXvFs+59X+/0WbhwIW+//fZZ5d+73IaEhPj0372JjIzkySef5OabbxaflZSUMHnyZH744Qef+srj8WA0Gunbty8dOnTgq6++4vDhw9jtdtG+63Q6goODuf766xk+fDiDBg06R0mqJjAwEJfLJdqoM9HpdJhMJoKCgqioqCAgIACDwUBRUZFPedHr9URERDBo0CBGjRrFO++8w6JFi86ZbwozZ85k3rx5bN68WdRLer2eNm3aMHPmTJFX27dvJzg4GL1eL8pMYGAgDz30EA8++CBZWVn07NnzvD4mJSXx0ksvibK8bNkyvv76azZt2uRTH3qX5ejo6PNqepfl3Nxcxo4de8589S7LANu2bePFF188q2+vpGHDhg0pKio653OhoJTlwYMHs3jxYrZs2UJ2djZut9unLAOEhIRw66238vjjj1NZWcnkyZNZunSpT1q3bNmSV155hQYNGoh69fTp0xgMBtF3MJvNREdHU1hYSFBQEBaLhezs7LPandq1azN+/Hi6d+/OypUrefPNNzl06NBZdlmtVqZPn067du148803+fbbb88qJ3FxcTRu3Jj9+/dz+vRpzGYzFRUVPu0pVNexERERoh9hMBjE+975UJ6j82E0Gn36d8r1Y8aM4YEHHhDv91u2bKGqqkr0PS0WC5GRkRw7doynn34aq9XKrFmzOH78OIBPOgQHB9OgQQPy8vJEfeXxeER78Z///IfHH3+cgoIChgwZ4lMmlGc8MTFRjDksWrSISZMmiXRQ7DYajVgsFmw2G263W/TtoDr/AwIC0Ov1NGzYkAceeIBevXqxf/9+HnzwQXJycs6bRjt37iQ9PZ2HH374LNsGDBjAhAkTAPj4449ZvXo1hw4dorKbDUD5AACB/0lEQVSyUtzbu66dMWPGeccvoqKi+OSTT0hISDjvdUajkeHDhzN69GgxPlheXs4rr7zCd9995zPuoNfr0ev1PuMcSUlJ57y3TqdDp9P5tG/ffvst8+bN4+DBg6Is6nQ6AgMD6devH3Fxcfz++++i3AcHB2Oz2cSzFBcXx6hRo3juuefOm7bKvc8c91i8eDHvvfeeyBdlDOKxxx5j3rx5fPLJJ+Tl5QGI8m00GkU7n5mZKfoe5+sztmzZkgcffJDp06dz6NAhPB6PyDOj0YjH4yEiIoL27dtTs2ZNFi9eTGFhoU/ZNpvNdOjQgaeeeoqkpCS+/fZbPvzwQzIyMs55z6SkJKKiotiyZQuAeA/wHoNISEgAwOFwMH36dDZs2MDevXvFM6Pk0/PPP0+rVq1E32HHjh2iPtDpdAQFBdGvXz8xvtajRw8xvujNmXW3YtfHH3/MzJkzRVsZHR3NggULhH3Dhw9n06ZNZ+nddNNNjBs3jtDQ0HOmwdWGznOhGlQiuYooLi6mf//+xMfHM3LkSHJzc5kyZQoDBw4UDdn5+PDDD3nrrbcIDAykdu3aBAcHs2fPHkwmEzfddBMTJkzA6XQyePBgAJ544gkqKir473//S3JyMtdccw1vv/02Tz75JACvvvoqBoMBi8WCxWIhJyeHW2+9la+++orQ0FCmTJnC/Pnz2bBhA40bN+app57igw8+4Pjx40yePJn7778fnU7Hf/7zH5YuXUplZSVGo5GAgAAqKiro1q0bJSUlbNmyhaCgILp168bPP/8sKtCmTZvyzTffMHPmTMaMGUNRURE6nY727duzdetWn47R008/zY4dO9iwYYOoEE0mEwaDgcrKShITE8nNzaVOnTqYzWYOHDhAeXm5mAxyOp14PB7eeustDhw4wLvvvntW+kZERFBYWEhkZCTLli3jzjvvxOl0cuTIETHBYLVaKSoqwmg00rVrV4qLi9myZQuNGjUiIyNDNHahoaGUlpbi8Xi46aab+OGHH3A6nbRt25a9e/diNBopKyujqqqKm266if/85z+sXr2aOXPmEBkZSWFhoWhYTSaT6EhbrVaio6PJyMggIiICi8VCfHw869atE9/37NmTxYsX43K5CAoKory8HIPBgNVqxeVyYbPZROerpKSEkpISmjRpwp49e2jcuDH33Xcfs2fPJi8vj5EjR/Lf//6X5s2bs2PHDjHQ+uCDD7Jnzx7WrFlDREQE8fHx7NmzB4AGDRqQnp6OxWLh9ddfZ9WqVSKfn3nmGeLj44mKiuKnn34SA9I6nY6dO3ficDhE+WncuDEHDhxAr6/eZPvoo4/yxRdfkJyczMyZM0W+ffTRR7z22msAtGrVirp167JkyRICAgIYNGgQX375JSkpKZjNZnbs2IFOpyM5OZnKykoyMzPp3Lkzs2fPFs/n9ddfT1FREQ0aNODo0aPodDpcLhctWrTg8OHDLF68mJ07dzJ69GgxsGaz2cRgZ0BAAHa7nWHDhnH48GE2bdpEzZo1ycnJEb4FBwdTVlZGixYt2LlzJ7Vq1SInJ0d8rvybnJxMQUEBoaGhZGdnU15eTrNmzdi9e7foYCsTTB07dmTFihUYjUYcDodP3ns8HurXr8/x48e57rrrWLduHZWVlXTt2pXNmzeLwfi2bduyZcsWoqKicDqdlJeXiwE5l8tFSEgIbreb8vJyLBYLAwcO5LPPPhMDAddccw1r164lMDCQ1NRUfv31VzHhFxgYKAbkxo4dy1tvvUVFRQUtW7Zk586dADRr1oxDhw5RXl5OUlISjz32GNOnT8fj8ZCfny/yJTMzE6jufCYkJJCVlQVAmzZt2LBhg8gXpa4wm80YjUYqKyu59tpr2bRpE5WVlWLip3Xr1uzfv5/ExET27NlD//79+fnnn+nXrx+LFy/2eVHT6/U0btxY5EtBQQGRkZE0a9aM77//HrPZLMqxTqejQYMG7N27lwYNGoiJxKKiIpGmDRo0EPmSlpaGy+Vi4MCBLF68WHQ6XS4XJpOJUaNGMWfOHFG3NG7cmPT0dAIDA0lJSRETg+Hh4eTk5Ii/lQHhnJwcevbsyYoVK0hISBADdMrL3Jtvvsmnn37K8ePHSU1NZenSpVRVVYlJNu+Jj9tvv53HHnuMfv36YbfbiYyMpHv37nz99dfo9Xrat28v2hwlHZX0U/QAnzYnLi6OQ4cO4XQ6qV27NgAFBQVYrVbR5kD1S215ebl4IR49ejQffvghAwcOxOFw8PXXXwN/dsTtdjt6vZ6oqChKS0uZO3cuI0eOpKioiM6dO/PHH3+ICQblmWzTpg3t27fngw8+AGDo0KHk5uaSlpZGREQEkZGRFBcXU1hYiMFgEM+JwWAgNjaW48ePi3YgMDCQiooKoqKiOHXqFAEBASQnJ5OcnMw333zj80zWqlWLEydOiPJRr1498WKvtNd2u12UR6vVKl4MU1JSiI2NpVatWnz88cdYrVbxW71eT7169cjMzCQgIIAffviBnTt3MmbMGJKTk9m3b5+oU5U2SK/XM3jwYH7++WdSUlJEu9ypUyeaNGnCggULcDgcDBgwgGXLlolBAO8Jc6UuU9LMYDAwcuRInn/+eSoqKsSzpSzIaNq0Kdu2bcNoNPLII4+wbt06Nm/eTLt27cjMzKSgoACXy0WzZs1Em/XUU08xevRoSktLue+++5g1a5ZoO3U6HZWVlWKSPDMzk+DgYBo2bCgGpAFq1apFRUUFnTp14scff8RoNDJ27FimTp1KVVUVvXv3Zvny5bjdbuLi4igoKKCyspLrrrsOs9nMzz//TL169aiqqiInJwePx0PXrl1Zs2YNOp1O1LXl5eUivWJjY8nPzxcTXDVr1uTkyZMA9O3bl82bN4vJJmViWRkgtVgsPPzwwwC8/vrrhIaGMm3aNNGn7Nixo6h/b731VtLT09m2bRsGg4EpU6awZcsWvvnmG7p168aqVauIiorCbrdTUVGB2+2mZs2aFBcXYzKZRP3gcrlEu6zw8MMPs2bNGnbt2kXr1q3Zvn27sDc2NpasrCyCg4P573//y7PPPktpaSm1a9fm9OnTVFVVERMTIwYawsLC6NatG0uWLBEDws2aNWP79u3079+fpUuXYjAYmDRpEgsWLGDv3r0kJyezf/9+wsLCcLvdBAQEcOrUKYKCgqhRowYul4uCggLRZiUlJbFt2zaxuGLatGlMnjyZY8eOcc0117B582YAevXqRX5+Pn/88cd50ywmJoahQ4cye/Zs0d/Mz8/H4XCI/mR5eTlxcXG0a9eOH374gaqqKp544gk6duzI66+/zt69e7Hb7RgMBp8JJGUAyWQy0a1bN4qLi9m4cSMmk0m0w8XFxQQFBZ23rj0Tpa41mUzUqVMHj8fDkSNHiImJEXVkcHCwqGuVut77FTwgIAC3280tt9yCw+Hgm2++EemwbNkyMjMzRd+8vLycxx57jFmzZgl9ZQLJ4/H41P1DhgwR/bC6devSoEED0tLSiIyMZOrUqeJ949prr2Xt2rU4nU6MRiOdOnVizZo1dOvWjdjYWGFPSEgIx44dExNOFouFJk2a8P/+3//jmWeeEfnSsmVLfvzxR1GWGzVqxPPPP094eLhYmKVMzttsNkwmE2azmcWLF1O3bl1GjBgh8sV7MDsgIIDQ0FDKy8tZvHgxzz//PBs3bhTtsbKgTGmvmzZtyr59+ygoKPCZbNXpdFitVqqqqigrK2P69OlMmTKF9u3b89tvv6HT6UT/U9FWJpwCAgK45ppryM3N5ejRo1RWVqLX67FardjtdjHw2LZtWw4cOEBoaKjP4JlSdyt2TJ8+nfj4eBYsWMDx48fFQh2lHdbr9RiNRoxGIy+++CJPPfUUAQEBVFVViUHMkJAQsaCnffv2TJ06ldTUVPR6vUgPg8FAZGSk6L9Mnz6dPn36AHDvvfeydetWPB4PDRs2ZPfu3aL/WbduXfbv30/Tpk3p1KkTX331lZiItNlsGAwGOnbsyOHDhwkMDCQrK0uUn6ioKJ8JvejoaE6dOkVoaCglJSUEBgaKPqSy8Ke4uJjg4GAAUUd6PB5iY2PJzc0V7249evRg8+bNwhaA0tJSTCYTLpdL5Pf06dOxWCw888wzREZGkp6eTlBQEC6XS/QpkpKSKCgoEGMIr732Gunp6eh0OiIjIzGbzeTk5DBlyhSSk5O56aabiIuLIy8vT7R1kZGRYiI6KChIlOWHHnqI1atXi7pe6bsbDAZRlj/44AOGDx8u0ikwMBBATEYrZXn//v1iolKn0/lMBgQFBYmynJKSwqBBg8T7aklJibjWuyy73W4iIyMJDg7m5MmToj8aFhbmU5Zr1KhBjRo1SEhI4KuvvjpnPaj4PXjwYI4ePcrWrVvFwi6LxSIWg0VERHDLLbcwc+ZM0fc+cyGcUift27eP3Nxcn4kPk8kkFm/o9XomTJjASy+9RFJSEnv37hUaShkzm83UqlWLDh06sGzZMiwWC06nk4KCAsxmsyhrRUVFXHfddWzfvh2Xy0VpaSnh4eEUFRWJd3Sl7MXFxZGdnY3RaMTtdpOUlER6ejp169aloKBAvEso9ir1alVVlah3IiMjxaIDZRGc0t+tqqqiRYsWfPTRR/Tv35+oqCjS09MxGo24XC7q1q3L0aNHha833ngjS5cuJSUlhSNHjuBwOHzeLR0OB1VVVQQEBBAUFERpaSkul4saNWqId5UBAwawfPlyTp06RXx8PFarVbxDAnTo0IGtW7fy3HPP8dJLL2EwGMT7mN1uF31oqH43rFGjBllZWaLfoix2uvfeeykpKeHLL79kwoQJTJ06VdRR3pPSSl/ltddeo0WLFvTs2VM818p7OCDaqzFjxnDPPffQuXNnvv/+e7GwUSn7Sl3bqlUrTp48yaxZs1i1ahV6vZ7mzZuzdetWDAYDUVFRLFu2jA8++ICPPvpIvOt7axkMBm655RYxvjhmzBh++OEHjEYjkZGR6HQ6Tp48idVqpW/fvhQUFLB+/XoWL15Mr169GD58OOvXr+f48eN4PB4xjnX48GHR3t1yyy18+eWXDBgwgC+//FIs1OncuTPLly8Xz+gdd9xB586d+fTTT1m3bp0YV6moqODEiRPiuRs+fDjXXXcdTz75pBizUvpTyvPVtm1b0tPTadGiBevWrRP1f2xsrBiDaNy4MXv27KF9+/asW7dOvJvDnwvsKysriY6ORq/Xi0ktpZ4yGAw0b96csWPHkpmZyXPPPce1117Lb7/9JsqA8qyYTCaefvppvvjiC3Jzc2ncuLFYxGk0GomKiiIvL4/Q0FCcTidDhw7l66+/xu12i7ozOjqaoqIiqqqqeOihh/jiiy9ITEzE7Xazd+9eUY6GDh3KkSNHOH78OD/88AOhoaHYbDZ69OhBRUWFaEt79erFunXriI6OpqCggMWLFzN8+HDq1avHxo0bCQ0Nxe12i/ozMjKS5s2bM3PmTKHldDopKSmhRYsWYgFAQUGBT5s8btw4lixZIvJOr9fz7bffotfrhX3Dhw8nNzeXFi1a0KhRI0JCQjh+/DjffvstTZs2Zc6cOeetq68mZEhAyb+Gzz//HLvdzjvvvEPXrl0ZMmQITz31FJ9//jm5ubnn/V1lZSUzZ86kTZs2uN1u5s+fz/z584mJiaFJkybi9z///DMHDx7krbfeokePHvTr14/JkyezatUq3nvvPe655x7uuusu1qxZQ8uWLUXn7LvvvmPAgAH89ttvuN1uSkpKqFGjBps2bWL48OGkp6cTFhbGu+++S0lJCS+99BI6nY7777+f+vXrYzAY6NWrl1h9/NRTT7F69Wq2bt0q7N+zZw/XX389ZrOZ3bt3M2TIEJo3b87LL79McXExer2eBx54gPnz54tVFspkBcAzzzyDzWYTq38BMUB86NAhbrvtNg4cOECHDh2orKykRYsW/PDDD7zyyiuiczFt2jRmzZoldHU6HRMmTBCdveTkZIqKinj55Zc5ePAgL7zwAlDdME2ZMoWCggIGDRrEuHHjWLVqFR6PB7PZLF7iBwwYQOvWrSkpKSEgIIDmzZuTk5MjXj63bdvGjz/+yPLly8VnY8eOpWPHjoSHh6PX6yksLCQ0NBS9Xs/DDz+M0+nEarXSoEEDbDabaFwKCwuZNWsWs2fPpmbNmkD1hMuPP/7INddcIzpdTZo0oWbNmrRv3140/Dt37uTEiRN8/vnnpKSksHv3bpHP8fHxzJs3j4qKChYuXEjLli1JSEgQK46uv/56li9fLgaclV1d48aNY9y4caSnp9OtWzc8Hg+HDx/mpZdeEvlst9uZNm2a2LU2ZMgQtm/fzpQpU0Q6R0ZGMmDAAEaOHAlUr95o1KgR27dvF2VZ6Zw6HA7efvttUSauv/56Xn/9dfr164fL5RIvX1OnTmXnzp3odDoeeeQRDhw4wNtvv43JZGLt2rVC7/PPP8dms9GsWTPy8/O5//77eeGFF9DpdBQVFREeHs7s2bN544030Ol0DBo0iOLiYgICAkhJScHpdHL99dfjcrlYuHChmDypW7curVq1YtiwYUD1Kqfu3buze/duunbtSk5ODl26dKGiooKwsDAcDgfR0dFERERQVFTE4cOHARg5ciTXX3+9KG+hoaFkZWVx++23s3z5cuLi4kSHLDo6mri4OGrUqIHb7ebo0aMkJCTw66+/ioGahx56iLKyMgICAmjUqBFbt27llltu4dSpU2JCVRlcUSZ3lMnfsrIy0tLSSE5Oxmw2Y7fbefTRR2nZsiW1a9dm+fLloo7YtWsXUVFRhIWFAbB06VL0ej1JSUns2LEDgEceeYQ9e/ZQXl6OyWQiPT2dmJgY3nrrLQ4ePEhxcbHIlwceeIAXX3wRnU4nBj0bNmzIhg0bfPJFr9eLARKlTKxatYrbbrsNk8kknpv33nuPZ599ln379nHdddexbNky7rnnHnbs2EHr1q1FOgA0adLEJ1/ef/99li1bRmJiohiMMxqNVFRUcOedd7J3714xUK4MnCu7HQCOHTsm8kWn03HfffcxZcoU1q9fLwZJAG677TaxSt3tdhMTEyMGzxwOB9u3bycrK4sPP/yQL7/8kuTkZBITE4HqlfZKHb9+/XrxEjl//nwqKiqoU6cOAB07duTdd9+luLiYRYsWicEsk8nEL7/8QmBgICaTicDAQD7//HM++ugjbDYbFRUVfPTRR4wfP57nnnuOiooKnzZnx44dxMTE0LJlS0JCQvB4PDRv3pyYmBifNmffvn24XC7uvPNOTpw4wVNPPUV5eTmFhYU+bc7IkSMJDAwUaZidnc1TTz3FwoUL+frrr8Xk8W+//cbKlSvF86LsTnv11Vex2Wy0aNGCjh07inoYYNeuXXTt2pUdO3aIgd/w8HCMRiOzZs2iefPmIu/Lysq47rrrcDgcYvApLCxM2FVSUkLt2rWpqKjgkUce4dSpU8TGxhIZGcmOHTsYMmQISUlJ4pls3rw5xcXFjBgxQkxqKZNAShtWWlrKTTfdJNovZVdH165dWbVqFUOGDEGv1+N0OqlZsyZ6vZ7Y2FgxMdqjRw9cLhezZ8/m7bffpkePHmJlc2BgIM2bNycpKUnsyPvmm29YsmQJqampos164403ePrppxk3bhwAP/zwA23btsVoNIpdN+Hh4dSqVYuysjKSkpJEmr311lsMGDCAhx56SAxmAtxxxx08//zzbNu2jSZNmohBv/z8fOLi4jh69CinTp3CZDIRERHh02YpA5JKOa1ZsyYBAQE4HA48Hg/XX389YWFhZGZmotPpKC8vZ8+ePUyYMIEBAwYA8Morr1BRUcGvv/7KXXfdhcViERMkNWrUYOXKlQQHB9OlSxdycnKoqKggJiaGmjVrkp6eTocOHTh27BjZ2dmi7d+9ezeJiYl4PB7WrFlDeXk5PXr0YMWKFbRs2VIs2rjvvvvo27ev6AM2adKEZ555hoKCArEbRGlvlQH4srIyBgwYIF6Wi4uLCQsLE31K7/p34sSJFBQUUKtWLXQ6Hdu2bRMDZytXrsTj8dC/f38xOGgymTh58iTDhw+npKRErDxt0aIFS5YsYezYsaIPtXTpUo4cOUKdOnXYtWsXOp2O8ePHExMTQ25uLjExMVRUVPDTTz9RWlpKdHS0GCCYNm2az2plm83GyZMnRZpVVVUxffp0+vfvL9IsNDSUTZs2kZ6eTtOmTcWu/LFjx+J0Ojl16pTYxf/ll1/yzTff+KRZdnY28fHxPmkWExNDYmIimzZtQqfT8cADD/DOO+9QVFR0wTSbO3cuXbp0EW3WoUOHqKioYN68eWRnZ1NRUUHt2rXJzc1lzJgxTJgwAZ1Ox6JFi8RgioLRaKRdu3aibfB4PAQHB9OsWTOWL1/Ohg0bxET/p59+ysKFC/F4PJSXl59V14aFhYlV4lC9w9G7rnU6nUydOpWjR48yfPhw8vLyxOBccXGxqGvbtWsnnunw8HB0Oh0tWrTA5XLx2Wefibp27ty5PProoyxcuBCo7u9XVlaKPq2y0EZpi8eOHYtOp2PFihWi7l+2bBk6nY4777yT48eP8+ijj4q61rvub926NSaTiV69egGQlZVFUFAQa9eu9bFn6dKlrF27Vqwsr6ys5J133hETN0q+bN++nS5duqDT6fjhhx/EBL+C2Wyma9eu2O12kpOTxeT67Nmz2b59u8gXZUfXrbfeClT3TU+dOkVwcDCvvPIKGzduJCIigvr161NZWcl3331H69atiYmJISgoiHXr1lFcXCz6WzVq1BA2KM9gUlISb7/9NosWLSIxMRGHw0FJSYmYmA8ODhYDycHBwSKqwoABA0T74Xa7WbhwIePHj6eqqorKykrWrVuHzWbD6XSKXaB169alsrKSZs2aERMTg8Vi4e233yY5OZlHH31U7MQymUwicoWyuMZgMPD+++8TFxfHQw89JHab16hRg9LSUu644w7RR5g+fbqYHImJiaF169a88MILFBQU0LRpU6Kjo3n77bcB2L59O2vXrhXvmAcOHBCr251OJ3379iU8PJy9e/eKNvD//b//R0lJCcOGDUOn07FhwwYee+wxMjMzufHGGzl+/DgDBgwgKSlJ9LV0Oh2NGjUS73JQPSHz+OOPizqyc+fOuN1u/u///o+ysjJq1qwp0njQoEEYDAbi4uKwWCysXbuW++67j0OHDtGlSxdKS0u59dZbad68OVA9iO92u1m6dCl79uzxWZCyaNEixo0bR3Z2Nq1btyY9PR2bzcY777xDo0aNxOQAVC9e+e677zAYDEydOpUlS5YA0LBhQzEJoCy0fOihh8TguDJR3LJlS5/J0g4dOoiFsEpZViaBQkNDRVn+9ttvzyrL3jvNGjRoQN26dcXElcPhEGV59uzZmEwm9Hq9z45a70VAykRNZGQkQ4cOFe97Ho/nrLJ8/PhxnnnmGTZv3izKstIfa9iwoZjMDgkJYeHChSJii8fjYfz48SxcuFC8QxUUFPDhhx9yzz33oNfrxSSHshtAmZBavXq1GOx2u90EBQXRoUMHnE4nwcHBREdHY7FYmD59uliUp9ChQwdiYmIIDw/H6XRy9OhRvv76a/r06UNeXh5BQUF0794do9FI9+7dKSoqIiEhgQ0bNuBwOMRizHr16mE2m3E6nTRr1kz0/7Kzs+nSpYuY9HvppZdo2bIlcXFxYvJG4cYbb8RgMIj3NJvNRvfu3cnKyqJHjx6UlZXx5ZdfsmrVKoKCgkQ/dM+ePXz22WfY7XbCwsKoWbOmGMBX6mcliklaWho9evQgPT2d1NRUsZigefPmeDweUWYqKyvp1KmTeE9r27Ytubm59OnTh4ULF3Lq1CkGDRrEzz//jM1mo127dkD1zqPCwkKaN2/OtGnTAMQ7hd1uJykpSeSjMvmWlZVFRESEuParr77CYDDw7bff8vLLL5OamsqMGTOoqKjAZDKJRSNt2rQR0QGMRiO///47Y8eOFc+P2+3mq6++4oUXXhALFFatWkVRURHLly/n9OnThISE0Lp1a3777TesVivx8fGirq1ZsyY1a9Zk5cqVuN1uxo0bR7169QgNDcVkMlFYWMjnn3/O7t27Rb0VEBDATz/9JCaonE6nGB90u9389NNPoq5dvHgxpaWl1KhRA5vNxqhRo3jjjTfEOAdUL+R66KGHxA5qqN4t8/rrr3Pq1CnMZjNz587lzTff5MSJE9SrV4/y8nK+/vpr3nnnHQYMGEBQUBBVVVUMGDCA5ORk1q1bR3x8PAEBAURFRbFgwQI8Ho/Y0VWrVi0xFqBM5A4bNgyz2SzGIrZu3crIkSNZt26deM+dP38+s2bNomXLltSqVYs//viDIUOGsGPHDh544AFq1aoldpsr72t16tTh1KlT3H333aJMKHlXv359tm3bhl6vZ8mSJWJyTafT0b9/f3Q6HbVq1aJfv354PB52797NkSNHeOCBB8Rk1YIFCzCbzQwZMgSz2SzGMBcsWMANN9yAw+EgOTkZgPHjx4s+4sKFC6moqOCdd97h9ddfx+FwMHDgQKB6bEcZi/38889F3X/fffeJPAJ4+eWXefbZZzlx4gShoaHMnj2bRYsWkZubS+vWrSktLeXrr78mNTWVgIAAatWqJcbXKisrOX36tBi7+vLLL5k/fz4FBQW0bt1atMm///672NH+/PPP89RTTzFmzBiWLl3qY5/SDrz++uuMHDmSO+64g2eeeYYxY8awbt26C45/X03ICSvJv4a0tDQ6deokOlBQvZLW7XaLXTLnYtu2bZSWllJaWip+bzab6d27N1lZWeL3aWlpJCUliW2YAKmpqVgsFsrLy+nbty8Oh4ONGzfSr18/0dCGh4fTr18/8vLyiI+Px+Px8Nlnn+F2uxk1ahTh4eGsXr2a8PBwOnXqxLFjx3C73fTt21f4NHjwYLGi4+abbxarRRMSEvB4PGRlZTFo0CC6du2KwWBg9erV9OvXj8zMTLG9tm/fvgA+nT+F9evXi1V1APXr16d+/fpCT9H4+eefxQAMVA/+KBw7dgyHwyHC1AAMGDCAvn37Cm2Px8OGDRtISkqibt26QPXW3OjoaKD6BbZv3764XC7++OMPWrZsyZEjR/B4PPTt21c0iG3atGHgwIEijJuymmH9+vWiA35m2VDS3ul0EhkZSVRUFAApKSli1Xpubi7h4eEEBweLfFZe2pR8NplMNG7cGKhuJHr37i1CIMbExJCXl0dCQgIJCQnEx8cDcP/995+Vz0ePHqVXr14sX76c0NBQOnfuzE033cThw4fJyMgQL6Jut5s+ffrQokULAOrVq0dqaippaWkA9OvXj6NHj9KhQwcyMjJEh1EJlbJp0yaxLT87O1uUK8W/wMBAfv/9d9q1aydshOrQGd5hD73ztKqqivz8fAwGAzt37hT5e+edd+J2u8XAtFIWATEJ2bx5c0pLS+nbt68oG8eOHaNjx46sXLlS7OhRwv6kpqYyePBgsWLZarWKl8HOnTuzbds2+vTpw+7du8WAvbKi+tprrwX+HIhs0qQJVVVV9O/fny1bthAZGUlAQIB4ftPS0khNTSU8PJy4uDgAkU7Kah69Xs/x48e58cYbycnJEavXlEmXo0eP0qdPH9LS0rBarXTr1g273Y7b7aZHjx4EBwcTHByMx+MRk6dKGQsODhbhs7KysnA6ncIe5ZlWVukqz3RWVhaZmZki9MiBAwfo1KmTeJYTExNFvgQGBnLttdeKfElISBD3u1C+eIcPU/LFarWKgQTvfFm+fDkdOnTwWfmn1MNKvii7UZTJwpYtWwLQu3dvkS/BwcG0bt1aPL+pqalih413vjRr1gyPxyNWunbu3Fms5vXOl8rKSlEHRkRE0KVLFxGyrGfPnmLFeXJysnimFHuVFXcJCQlUVVWxceNGhg4dClR3ZsPDw+nduzfl5eVcf/31YuDK7XaTmpoKQE5ODuHh4cLuyspK0ebUr19f2KOEmvvll1+IjIz0aXOUdFTCSSrhBwsKCnzanIEDB1JQUCDaHLfbLcqY0uYcOnSIJk2aiHAibrebfv36iTYnJiYGj8fDypUrxX09Ho/Y4VezZk3Cw8Pp2rWrWJlntVpFfd2/f3+hpYQ48Hg8oi3LyclBp9OJ5w5g4MCBYqFCWVkZzZo1AxCLDuLi4khPTxf5pryIKWF0UlJSyMnJITQ0lNWrV4v69+jRozRp0oTS0lL+85//iAmijIwMMegaExMjVsQ1atRI6CckJHD06FHxDKalpdGhQwe2bduG2+2mbt26YgdK7dq1qaqqYvny5WRmZornQ0kzpc1SVr0rbZZSN3qjlFWPx0NBQQGtWrUSuxL69u0rQk4oExBBQUGinKxZs4bU1FThW8+ePUUaKW3WypUrxcBiXl6emPxISUkB/myzvvvuO6xWK126dOHo0aMYjUZSU1MJDg4mKiqKm266SYRnUeo1pc3q2rUrAPv376dJkyY4HA4GDRoknv/OnTvTsmVLnE4npaWlPPvss3g8HvGiqtg4fPhwTCaTuHe/fv0oLCxk8ODBWK1WkXbXX3895eXldOrUiYCAANFfaNy4sbgmODiYtWvXivpXKUtQPWin1IdKn8+7/lXSUqlb+vbty/Hjx8nMzGTgwIFUVVWxcuVKAFH2EhMT2b17N3FxcVitVlH/KgNIygCh0qcaMmSIsPXYsWOUlpaSkpIiBlkHDBhA8+bNcTqd4nnZsGEDycnJDBgwgH379tGpUyduuOEGsTNFKUfbtm1j6NChoq5et26dqLdSU1OpV68eBw4cwO1206VLF2GHxWIRZSs0NFT0a8PDw4mKihJppkyQnZlmSl/rYtIsISFBtKFKO64sBlHud8sttwg/lDbr6NGjfPXVV5SVlYm+kFLXKuGjoXoXgNK/1+l0REdHi7o2ISGBpKQkURd517XFxcX06tVLTFgpK6cv1L9XdrM0btxY1LXKgGbnzp1FXav015X/GjZsKNJd2emjTFyEhoaKeuPGG28Udf/NN9+Mx+MRK+S988X7fWPgwIG43e7zvm9UVVWRmZnJDTfcIOp+xR4lbJDyfCttoGKPki8nT54Uz/TRo0dZvny5qMeUfBk5ciRut1v0V4KCgkhLSyMtLQ2DwUB0dDR169alrKyM2267TeRLUFAQcXFx4t3OZrMRGhoq8rBfv34UFBSIsOxGo5EGDRrgdrtFfijpnJiYiMvlEgsl0tLSxLXKxLyyGywxMVG8Y6xZs0b0GwCx+0RpL5UJ8ZiYGI4fP87p06ex2+0MGTIEt9tNfHy82Ilx+PBhsrKyWLt2rUjvyspKMenfuHFjQkNDCQoKEv3ONWvWEBkZSVhYmCiP7du3FwP7q1atIiQkhIqKCgoKCujTp4+wTbm3ct+0tDQxoVezZk0cDgdt2rShSZMmREVF+Txjv/zyC506dWLr1q0kJSWJcuZ2u8WEgcPhIDMzk169erF582YxaN2wYUO2bNniM/gHiBDSVquV7du3Exoayu7du8U7m8Ivv/xC06ZNOXr0KC1btsThcIj+nrI79bbbbhO7HI4fP05AQAC///67aLszMjLOyqumTZuKPAwPD2fRokUA4t2jrKyM8PBwmjVrRkFBAWvXrhV2n1mWlfpOr9eLvo2SV97vvcq9lbK8fv16oLqPfaGyrGgrz5V3mxEcHCzK8sqVKwkMDPQpy0p4rjPLcnZ29v8sywEBAXz//fdkZmaKsqz0r1u3bi3Kcn5+vk+f0+Px0KdPHxISEmjSpAnR0dEEBgbicDjo27cvq1evxuOpjmqgvOe2bdtWPKPeIa3Ky8sZPnw44eHhxMTEiInlwsJCkpOTxYRYSEgII0aMICMjg65du+LxeEQ/TenjZWdnc91115Gamip2ZcXFxVFRUUFycrIYg9i/fz+dOnUS5QAQYxZKeD6r1Sre0TZv3uxzrcVi4dixYzRt2lSUVaPRKCbwld19CQkJREZGkpqaKt6fXC4Xa9asEX1OZSfRzTffLMJqKn3B4uJiatasKeq4sLAw4uLi2LJlC8nJyT4h2bKysujcuTM33HADf/zxBykpKT4hYe+++27RTiuLvEpKSsjLy+P666/HbreLcp6ZmYnVauX//u//AMTuPGXR4rBhw0Q52LVrl3iGsrKySElJETsSld1lQUFBop1WyntaWhrbtm0jJCQEq9Uq0uvM8rlx40YMBgMbNmzAbrfTp08fLBYL9erVIzw8/Lx1bY8ePVi+fDl9+/ala9euhISEkJaWJt59Q0JC6Ny5M+Hh4aIeBET/w+PxiMUBt9xyC/v376e0tFS8lyu7kHr37i3qBKVeqFu3rtjhA3/2n5TJ5k6dOoldO97vg/369ePkyZPifWft2rW4XC6OHj0qQj+Gh4f7LIgrLi4W7ZvH46Fjx45s3ryZTp06AX++Tyi73Ww2m8+4ar9+/Thy5Ih4FktLS0VYfu++ldFoFHX+3r17adSokUgzZTIzPDyclStXsnHjRnr16sXGjRtxu90UFRWRlJTE8ePH6dSpE1VVVWJBkNL3a9SoEe3btyc1NZXNmzeL8Sbl/spxCD169BDpquRbUVERrVq1Ijw8XIRLfuyxx0RZVtLszHxSogedqZeYmEhaWhp2u13sglfyKSUlBZ1OJyIVrF69WuzSPHDggNhJpVyrHAWQlZXFl19+KdpW5TrgnPadCyXPzheS+2pDTlhJ/jVkZGT4TCZB9YtUTEzMeeOYKr+D6gFF798r5yIpYeLOpa/T6USlnJCQIELkJSQkUFpaSkVFBRUVFaIij4mJISYmhoMHDxIVFUV4eDgNGjQQNigdAEVPuafye2UXgbLCLzk5WazYadCgAQ0bNkSn05GRkSF+462XnZ3N/PnzsVqt4nOn0ynOW1EG6hQ7FL0TJ04QExMjzq5S0kHZ5eU9Sei9qqhTp078+uuv4j5KY+GdjgcOHBArMNatWyfCE7hcLpKTk8Wgw7PPPivitBYXF4uXzIiICBo2bEhAQAAZGRlnnRei5LFiozIAuG7dOrG6WglFqOSpd8hEpTOlrGIyGo2iYXS5XCQmJopJQO+wEMr3gDivwzufPR4PJSUllJaWijNXlDwrKioSE4oej4fbbruNb775RpSBxMREoaV0MJRyqjB16lQ8Hg8vvviiOEcHEHYkJyeLlXdOp5OsrCzxXXZ2Nh999BHnQrFRefF56aWXgOrn56effhLPm3dZhOrzq7wH6BISErBarSKtgoODRTqGh4eTlZWFy+US6eLxeES5VlbqKOH1lGclNjYWo9EowmV4/+sdlqRTp044nU4RIkGxR9Fv0KCBWP2plGclvIOSjh9++CFQ3TlSOvNKGVC0GjRoQGJiorDD5XJRVVWFw+EgKipK7LRStq0XFxdTUVEhXgJPnTol7FFs837pUO6j5CtUP2f16tUT1ykDpUoYnTPzRfH/fPmiTOafmS9BQUEilIESvkbZPRAVFeVjp1IPK+ngHb85KipKTA7GxcWdlS+A8F2n04m0UupdJZa0on3nnXf6aHufXXJm/a683NasWVOEHVXKW0lJiVjx5XK5KCoq4ssvvxR1/Jn1q7L7JSQkxKeOV3ZYKaEWlUF08G1zEhMTRXif6OhoTp48KezxTscaNWqIwSkljZXrlDanbt26PufcKIP3Z7Y5CQkJPrHWvdscxZ+8vDyfNsftdvvY1LBhQ7Hj4OTJkyL8n7eW1WoVAyMBAQGizQoPD6dJkyZix4T3rjWoHrzX6XQiJK3yQquU1dLSUhISEkS9oTw3NWrU8Akj633GnTIgrUyyKPW7MvielZVFo0aNiImJITg4GJPJxJEjR0SaKSFjFe0dO3YwYsQIYmJixMuMMiCqhOVT0iwxMVGstFfSLCMjQ0zceuOdZkpYEagO7aacQeR9Pt65nhelzapfv77w88w2SxkYUnZGKPdR2iylXVDaLKX9rqqqoqqqyuc5sNvtYld0Xl4e8+fPx2w2U1BQIMpTdHQ0Bw8epKKigoSEBPE8WK1W8Yzk5OSwZMkSsTJQuU5JR8Wv0NBQEhMTRYib77//HqiuA6dNmyZ2hHrXRdu3b2fFihVERkZSWlqK0+kUC1Dy8vIoLy8XO3W82wOlvvQ+d8i7/lXSUTlDy7v+9a6jvftU3iHhlGdKqSuVsgxw8OBBoLqMhoWFiTRr1aqVT54odVq9evVEKB2lvQoLCxN5r6RZRkaGyBe3283BgwdFiBXvemrixIliQK+wsFD0a5Vy53K5xI7VoqKis9Js+/btQuvll1/mp59++p9pBog2VJnMUJ5hpaw3adJE9De826wlS5aIPqziX05ODkajkX379uHxeMjNzRVnogFn1bWNGjUSYbiV7xVuvvlm4b9yJhycv3+v7DJXQrEq9ijPgGLjqVOniI6Oxmg0ip0oCpMnTxaTyVDdjih9Ku+632w2i3SIiYnxqWu96/5zvR+c+b7h8XgYMGCAqPu97QFEvigo9jRp0oTg4GACAgLIzMwU/bXdu3eL9ljxuUWLFsTExIhBdOUctYMHD4q2V6mTEhISRL4oYbiVOtflcrFnzx4OHDjAvffei8ViEfmiTHqYTCbCw8PFMxEdHU1QUBAmk0nU2Uodr5yP510elJ3Xdrsdu91OdnY2hw8fFvdR0l1pp5W0V8qrshNDKbdnnuuh3Fs5swfgs88+o1atWrRv356Kigrsdjvl5eUcOnSIvXv3ilCmp0+fpkmTJnTt2pUaNWpgtVo5deqUz7vKa6+9xsMPP0x4eLjPvZX7hoaGivP+oHoxUEJCgljcoSxQzMnJIT4+nkOHDhEeHs68efO4/fbbiYmJETuKlUlSZUemEkI6ISEBp9N51plJ33zzjaiz8vPzqVGjhjgDUykzoaGhnDx5UqyaV9pHJby50qYpz3FRURGbN2+msrKSkpIS0tPTRTtWv359kVfKxAf8Obj3xRdfANWLqJTJFUA8g8rCOm8/Fi1aRHR0tAhBXl5eLvo2Sn9EKctRUVGinChl2Xuh1e7du8nIyGDcuHHExsb6lGWXyyXOg6yqqqJJkybUqFFD7BJVynJ+fr4Ij+hdlqG6n6qUZY/Hg91uZ8uWLSKsonK9d1kODg4WE3RKWVbaw9atW/uUZWW3TFBQkE8ECKU8Kc9tQkKCqJO7desmdj/Gx8dTo0YNEbrVOyJMQkICDRo0oLy83OccKmWsQFmAMHnyZKB6saSSdkajkaysLOrXry8WJCQmJor89D7D2+l0ikmwhIQEn3cYpT0sKCggOjpavJMr/Xjvd0WlblXKLVTXCUq7cPjwYfH+VlVVJRaOQPV7zpEjR8S7rtLWl5aWotPpxISIghIe+9ixY7Rq1YqMjAyf93+drvrswRMnToi6Pjs7m/r165OVlSXS2btvk5CQQI0aNXA4HMTHx4v+V61atejSpQvr16+nVq1aot32eDwiEg1ULzCuUaMGQUFBZGRkiGcoIyNDjCF59zdMJhM///wzgIh8k52dLZ7NsrIy0tPTRdhTpZ0LDg4WY0DKRGJCQgI2m42DBw/6pL93XRsVFcXWrVspLS1lwIABJCYmimfWO9T8ypUr6devnxjnUBaYZmRkYDAYxM6isrIyJk2aBFRPsBuNRrFbXUlvqB5DWLx48VnnGSnh7pWQjZMnT8bpdLJ7925OnjwpIqco7bTyvqOMc1VVVYmF08p1Sp3wySefsGXLFmFP7dq1ycjIoHHjxqLfqdfrRZ/TZrOd9d6slCWlrlCee+VdV+mLKCE+09PTadSokWgflYUypaWlfPvttzidTkJCQnzaI6VPrqSjzWYjLCxM9OOUvojy3Cn96rKyMmJjY9mzZw8ej0eEnR83bhwdOnQQfij9j4yMDJ/3dOX59h5jU647czG80nYoi8GVts97jHPr1q1iIaPyLqD0mZ1OJ2+88Qb3338/6enpJCQkiPo0IyNDHBUSEBBAz549adGiBffeey9Hjhw5y75NmzbRqlUrmjVrxu23386XX37Ju+++S48ePYRvVztywkryr8Fms/lMxCgo8bAv9Duz2UxJSYnP75WVuyEhIRQXF4sVfGeihIwICAgQ97FarWLFRHFxsdBVtoSXlJQILW/7lAZeOTxe8cnbruLiYnFdaGioeKFWrnO5XD739NZ79dVX6d27t1gdBPg0LIpN3pqKnhJOTfm8oKCAGTNm0LNnT9EQwZ8TOzExMbz33ntiRZnSQCqrQ5XwXX369OHmm29Gp9ORk5PDXXfd5TO4pfDSSy+Jv/fs2SNi3QYFBREWFobBYKC4uPiceW2z2USaORwOevbsyW+//SZWHcOfk27KpIKCsupIGRhPTEwUK0yUkIJKh/nMw+aVDpaSfmfm89atW4mNjaW8vNwnn51OJ926dRMdopSUFBYsWCDyy2q1+pQ15Z5KWb7tttuYNGkStWvXFg2/gnJegLJSWnlx8Lbx1VdfFYNJ3h1J7/tBdciFDh06iLj448ePx+12i/KnlB1ATHq4XC4xqOmd7vBn7GolbrCSvso9lU6Lcq13uA7FJ6XTAn8OLirnkigDw0qnXOnIK/Yoz1tQUJB4aVcGyZTVicrWeiUsgLKa0Hvljbc9VqtVfKeE/VOeAaW8PPjgg8TFxYkVicrnpaWlWK1WkS/eaa8MeCvp652O3mezKfmnxEg/M1+U67zzpaqqSnTclBeHM/Olfv363HPPPUJbecFTwvadSVhYmMgXpcPepEkTkd5n5ov3M6jki3JmSs+ePUU94z0RDr6Dgt5pr9SB3t8pHeTKykrhl5Jn3vmi7IIbP348n376qfg9/LlT1ftf7zpe8U2pY5Uyfmabo8S0h+o6tLKyUthzZjoqE7/eKyK92xzvl3uDwSD+PrPNCQsL8ykn3s+A984M7zZHeSbPTEflJd37/CfvNllJeyVMpmKPUn96l2/lmVQGLPLy8ti9e7cYLFFCDTocDkJDQ0VZ9k7b4uJin5Vl3uV7zZo1orx4LwxQyndoaChhYWFissm7blRCOiqMGzeOPn36EBYW5lMHKPZ5p5nin3IGgdJmne/Abu80UygqKhIhRWrWrOkTzk5BSXcl373riTPbrFq1aonfDBgw4Kw2q7y8XLQVii1WqxWn03lWWahduzYOh4OKigoGDBhAWFgYdevWpbi4WNSjSpjZM+2oqKjgueeew2AwUKtWLZ/BceU6ZSWzd+hipe2/7rrrxOr+WbNm8dVXXzFr1izq1q3rUw7uv/9+fvvtN06fPi2el7vuuotHH30Ui8Uizl/avHkzhYWFPvWvki5wdv2rpKNSlr2fK5vNJup87/rXe2GNdzp6T0rp9Xoxkac8v8rvvCdTvOstZWedsno4MTFR/FbxRyl7SvlPS0tDp9NRp04dcRYAVC/umDRpkngBV8qZMriqlAOHw0FcXJyo05Q0a9eunVhEoNNVn3n25ptv/s80A3zaUMVP5Z7evpzZBu7du1fs2jWbzaKubdSoEffdd5+wRdFSJjS869qwsDAxoaj0TRWUEFNK/iicq38fFBREeno6PXv2FIssTCaTCDd6Zl0bEhIibFM0Vq5cyfbt2+nRo4fIL71eL+qNM983lHRQzhfx1jpXX+JC7xvKBJG3hsKZbaC3Pcrg+XPPPSf8huqwsN75oixgUNJTCSFWWFgoBnWU87mUa5Uznc5cOaxEL8jJyRFlTLFPeRaU9lixU6njlUkR5V1PGcxW8vf+++8XOzSUtPZ4PKLfAPj0L5TQkWfuEFLu6+2zgnJvp9MpBt/37NnDxx9/LBZEKAtCJk+eTFlZGadPnxaLW2bNmiXKoJI+3osgR40aJcIAe0+0KPfV6/WiDYTqwXLFTu8y43A4mD17Nnv27GHDhg107NiR5557TpQtJZyud9lQ2kJl0E9p05TFVLVq1RLhoJUBTOW5Vn6rnB+i+KT44H3WpVIvt2vXjl69emEwGMSAXUlJiWj3lfcI5f+VtlvZ8aloW61WMRANf76TlpeX+9QxBoOBX375BZvNRmFhIcHBwaJP5J2vSnlSninvvPJuDyIjI0lJSeHHH3/kjTfeEJ8r6amc4eNdnpRzj7wXaSnPk2KjsvjIbDb7lGUlfZX8OXr0qNgBpNiolBVvlIVGZ75nKnWYckaXd1p79++LiopEWQgLCxM+mUwmkUfeCy8Uf5UxCG+8n8usrCwRJl6JyqCUI5vNRmRkJOHh4ezcuVPUn8riKPiznVPyOy8vj7y8PFF+rrnmGuG/0l88s15V0iEsLAybzSbKrbKLTJkcsNvtHDx4kKqqKpo2bcrs2bN93u+9xy+U/terr74qwi96o/SJbTYbbdq0EWfTKpMcyiJr77re46mOAqDc81zjWUr6e4eNDAwMZMaMGRgMBtLT0/nPf/4j7GjZsqX4jVI+jUYjxcXFIk1/++03tm/fjsfjEee5mc1mevbsyfjx432eUeXZUNqYa6+9lieeeIKffvqJ8vJyn/LpXUatViuvvfYaOp2O2267TXyuPJNK+Vy6dCmxsbG0a9dOvDcWFxdTWVkpztK66aabSElJEeMcyplcyv2UOuLDDz8UfdGuXbvidrtZsWKFsMfj8XDDDTcwceJEYmNjxU40JX8Bn/ZB+W1ERARGo5F77rmH06dP+/T3FH+U56eoqIi77rpL3FOpEwYPHuwTqWTp0qWi3Crtm9vtFhN1Sh/FOz0VSktLRTQM+LN/4D3eAb79OGVXZ2hoKPXq1ROLDJS0UxaPLF++HKhetKig7KT0vpcyFub9/nzy5Elhw0MPPUR0dDROp5O3335b9EU2bdqE0+k8a1xXqUO8x9gUH4CzxjbCwsJEPa9M4JWVlREaGsqSJUvYvn07gwcPBhDPWFhYmBinuPfeezl27Bi33347BoNBtPvFxcXk5+eLRRuvv/66CPN97733+pwj3q5dO55//nk++ugjgoOD2bp1K+PHjycgIECE7vw3ICesJBKNsHbtWtauXcuYMWMuW6uqqorRo0cD1atvvVEq4sDAQK677jpGjBgBcFb8emVFWMOGDcULRePGjdm3b59PJ1qhbdu2YtWxcgD0peDxeFi1ahVNmjTBaDSKVaJnHvh6PoYOHSrsq6ys9Hnx8x5I+ysoB56fifcZB0ajkWnTpomY7GfuHjsXEydOpFevXgQHB/usKvorFBYWsnbtWp/O3flo2bIlsbGx6HQ6EYaooKDA5+XCX1BWK/0vXC6Xz6G9Z6IM3Clxvb0H9y6V2267jeDgYDGp5D3w8U9gNBrFajll4vJMUlNTRXgsZaWf946BC6GsuPcOH/q/UFZgw9l1DuCzI+FKoeSL8tJ5ww038O23317x+1ztKKt+r7nmmouuAy/E6dOncblchIeH07x5c9FObNy48bJ0u3Tp4jPQDBcXNqF3795A9QDPpEmTLngA+ZVCSdfY2FgxMHDmKvXLQdlNcTnk5OSI3U7//e9/OXr0KDk5OT51gnLI9pnlxOFw8Mgjj4iD4/9K++Nt+7p160R9MnDgQDp06MBDDz101u61uXPn0qRJE3F+ElTvyBk1apTY2VKjRg0RRvB8k4l/F263+7x18IVQJpEAcSbRhdi7dy8TJkw4axXpzTffTK9evcTAmJJmZ4Yjcblc1KtX7yzd//u//+O6664Tv12wYME5F5ZdSVwulwgn602PHj3o1q2bsOViWLp06UXb4XQ6RV/iXG3WX6GyspJXXnmFRx999JzhrtXAu8xfTn3udrt55JFHxMCj9/mSahAQEECdOnV45513xIryy0E5U0mv1/+lZ+hKoeyGNxqNxMTEcPfdd5/1fjJp0iR0Op3Y0e3xeHjggQfEDhEF74n/zp07s2DBAoxGo1h0d6nccccdxMbGijNinn322Qtef76yGxsbi8FgIDAwkLCwsP+vvfOOr7q+/v/r7tzkJiEJGWSQkAAJK8jeCSsqIkNAoiB0WmmlCmKp8KXQAoKigKyCVFtBFBSihBE2DkgAQYYIBLJDhtn7Zt3c/P7I4xw+nyTgaJWf7Xk+Hn1Ybu79jPc47/N+n8VRXMpIVuCOQ54yXRU5sLQ2Rp977jmEh4ezkxZBNWnuRmNjI1asWMERL/di9OjRbDzV6XRYvXo1amtrOVrn38FoNCIsLAyrV69mh4UfG4q+Icgx5tv4d+bGqlWr2CBwNygKhigpKUFKSkqLsa6kffv2fNhN6c6U2QQAYNq0afjoo49U+z2lY4ISLy8vBAQE8D7jwoUL3/Jmd4cMnTRuKFq2uLgYe/bswbRp01g/bU2Opaen4/Tp0yqH3ta4desW3nnnHQBQRc3Sof93JTk5mduF0sASCxYsQHV1NUJDQ7Fp0yb+nKJN7kVMTAwmTZrUou8XLVqE8ePHszO4EpIV3t7eePLJJ/HnP/8ZlZWVKoctJZ988gnrnMrahUrsdjs+++wzjB07tsW+gJwxNRoNgoODsXr1aj7naO06QFPU9eOPPw6dTocLFy7A0dGR0yvSGcSCBQswZswYODg4wMPDgw0hlBZUCRkdqU5eY2MjduzY0eq7kGycPHkyp1RX0rNnT45ABO5kjvkp0Ov16NOnD6f3bt++PTu0kywH7uwnAagM4Q0NDaryBErIEcZgMKgMmBERETh48CB0Oh2uX7/OBquKigo2iv0YlJaWYsmSJZg0aRL69++v+ltwcDA/R2RkJDujNz93ojSa3t7eGD58OKKiorBx40bk5uaqvvvcc89hypQp6Nu3L7Zv34533nkHbdq0QWJiImbNmvWdzr1+DojBSvivQelFoIQs2vf6HXmlKH9PBfwqKyvh6uoKFxeXVg8vyDOqtraW70PpegCwdw0A9qx1dnZW5R1unjqCIhXonZSbFbLoA+B0cnRP8hhR3pOut3TpUsycORNms1mlsGi1WlbC6JmU+WvpemVlZazMbtiwAV999RX+8Y9/wMvLS7W5UKbBo2soP9fpdCpPTPLIbWxsRPv27eHo6MieGUVFRaywVFRU8KaHCs4D4LQADQ0NcHV1bbWvld4XjY2NCAsLw8CBA2G32/ngipRZyndP0D3JQ9PPz4/z3Z4+fZpDloE70WVKbxHqs9b62WazYdy4cS362cnJCRUVFZx2CbijdNtsNlWkkbJ9aSw3j5pSQu1NSh4ZR+gZMzIyMHPmTFZo6CCXolCU48pms/FmkOofUU0I5dhRto1er1c9o3LsKPuaPBuV7aL0eqF0McrvV1RUoLGxkfuMDkfMZjPKysrYuEaKJaWaoOdxcXHBgQMHUF5ejg4dOnDhT6BpA0mFgIE74fB0L6Wiq2zj8vJybmNKMabX61FZWamSERTFokw5Sam/qB+VbU9RXdS+Sm8gm83GY0+ZjoY8gpX9Qt9r3i+0GVSmISotLb1rv1RWVnIbtKYglZWVcVuR0knjhK5DhmBqI4L6BWjaDCjT7ZBHEkV5KsPklTKLZCpB7QA0bQTovajPyEhFHsNarRZjxozhZyT5Ru+s/K9Sxis9tuidAbRYc5RelFRvhZ6neTuSYVB5CKRcc5QGYzp0o7WMrkHvRv3ffM2hPqR2oHFEfU/QRkGn06Fnz548z5VzErizpjg6OrZYs5ReqNSGdXV12LJlCzQaDUaNGsV1c6i/gKY5Setz87ZVRqkA6vGt1Wr5N3StkpISjmisrKxEWVkZbDYbf5fazNXVlZ/DbrcjOjoar7zyCsrKyloYmunfzeeLzWbjAuyurq4tvGQJZZuR3KHaItSftA42ny8VFRU8DpRygiJ+qK1IjpnNZnTu3LnFmmU2m1UHAiSTDAYDFzomPDw8eE5NnDgRr732GqxWKyorKzki53e/+x0ee+wxfkblgcTAgQM5okV5XfoepbClttBoNCgrK+MUos8++yyAptR7a9euRZs2bfD3v/9dNQ6ioqIwcOBA2Gw2nj9nzpxBfHw8y52ysjKWexkZGap1lP7bXP5SO9JYVspfijgh2ULyV/nuyjlF70TMmDFD9R36HY0P5bVpjCxcuJDnvbKeBq0nNPauXbsGoKn2DfULpZGi/qZ7ajQa/rcyxSn9LTQ0lPv1bm2m0+nYw/xebQZAtYbSe9I9le9C1yddwt/fnyOhKY0oyVq6FtUXIrndXNZSBBy1H6VvAppqndIzKJ2rmuv3CxcuRHl5OQYMGAAvLy+Vfk9ZBprLWuUBUmVlJbZt2watVouxY8eioKCAn9dms3Harub7DWX6T6WspfdqbX+glP0HDx7kvzXfbyhRymsAKtlPvxk4cCA/j5OTEx88U7+QJzu1J/WLm5sbH+ZS/R36LkVbNo/+d3Bw4DSxyqgucgRSphSm5ycZr4xAowhSZZQQ3ZsM6tRfpDcAUOkXFL1Iae6U0Lht/jndm3Q1m82Gv//971zHhpwBgKbsBm5ubpyS2s/PD9evX0dsbCzfW6mv0n1JB1XKG7ovGehJVyktLeXnVI4Zo9EIJycneHl5wdfXFy+//DL27t2LoqIijgCn9qT9AxmzSQ6RHKG2pjlD65hyXptMJo6QBoB//vOfAMDp0CgFNUUNKfevFPFIz00Hzcrxr1y7KUqLHP3y8vL4s/Lyco4IoD0aPVtdXR0GDhzIke9Wq5UdIKh9lZFCND+UfaVcD2gsR0ZGqvQKmmt2u53HslKm6/V6VcSA2WzmiGZ6TqvVyu90t7FsNpt5bVBG4jbfT5Ju0tygSrKzoaGhhVxV6veHDx9mWVhYWKjaC5Dc0Gq1eOaZZ/ga48aNQ2FhITp06KCqQamMniQ9R4ky4qeyshLPPPMMoqKicODAAZ5zZOClNie9haKDadyS0Yn6qTW5SvOPopmaGzmo3piTkxMaGxtx48YN9OjRAy4ud9LbVlVV8ffoN8ePH8fMmTM5LaASWkNcXFxw7NgxdtCgfiM92Gw2q/TvmpoaVfYSpW7zwgsvqPRCkikFBQU4fPgw+vTpA7PZrEoR+8knn/BYpfFps9ng6urK9bk6dOiApUuX8hyi8amM0Gke0UKRgjSmKNsCyZnmZ0Bbt27FH/7wBzz22GMtztEo2r2oqAh1dXUYN24cAPCenb6jjNIG7tQSJr3T1dUVt27d4nZ5+OGH0aVLFzQ0NOA3v/kNP+tDDz3Ejt4vvvgigDvRayQPyLFbmdWCHJYoNWnXrl2RnJys0vcoIovG2EMPPcTvqowioncinY0i36iPaK2k9VP53nQtwmKxoK6ujucEfU953kHvqJwjJKfKysp4XSCDKK0F9LlyfivXb2pTiio7fPgwR4i2adOG37eiooJTzycnJ7NM0el0uHbtWotzXWVGFOU4ojWxuWGU+kmj0XCknMFgwKefforw8HAsXbqU24zmGJ3T0fN5eXmhT58+KCgoYJ2Ovufm5qbSMYODg+Hj44OcnJxWzzrDwsIwaNAgPPLII3BwcMC5c+d+VMPcT4kYrIT/GpQ5d4mKigoUFBS0qD3V/HdAkxGkec5SHx8fFBYWct2L5tdXequkpaWhffv2MBgMSE1NhcVigYODAxwcHPh3hYWFKCgoQKdOnVBYWIjS0lKkpaXxMygVGvqcctMCTQtBfX09rFYrGhsbkZiYyAfVyloYrT1rRkYGtmzZgn79+qkMVps2bcK7774Lm83GeWvpOeh6fn5+KCgoYIH8xRdfYNOmTQgLC0NFRYXquUmhp82O0tOGPCyVuZGVtcHatGmDxsZG1NTUQKfTITExkb1iqHYIAFXqK/K2ojpQVL+heR8rDSN2ux2+vr6w2+2or69Xed4oU83RMwF3Fsi0tDT23IqIiMDUqVM5vJoWaGpfZSqV1vrZw8MDXbt2bdHPQUFBuHXrFqxWK2pqalSpBoxGo6rNUlNTodFoUFRUxJ+lpaWpxr5yY0y/TUxM5BRLBoMB/v7+SEtLQ1VVFbZs2YLo6GhVG65btw79+vXj1I4GgwGZmZlczyQpKYn7xd/fXzUWAXANIlLq6BmpLaqrq7kfSktL4e/vD51Ox+1CHk6pqam88SgqKuL5FhwcjLy8PNhsNj4Uof9Seg4fHx/4+voiKyuLD1ypj9LS0mAwGJCcnAxHR0dYLBb4+vrymLdYLNDpdKyUkYcdzT9SNPR6PT9PWloaUlJSePNBBikKe3d0dOQc/wUFBTAYDNDpdLypd3NzQ0pKikoWKBU45Zgij1nqF3qvnJwcVFRUcB5/SlVAv6Pv3atfyHhZXl5+z35paGiAVqtFUVGR6jlpLFJ/0L327duHoqIiNkYtWrSI0/8o5yD1C/2N2ga4My8pDzgV/qXxoZR1Sq/i1NRUvlZeXh6Cg4NV70X1tPLz81uk9dHr9S0iN5SKtFLGk7dtUFAQALXCq1xzqI6gTqdDYWEhfHx8+HmU7Zifn8/KLrURfY/WnKysLG7/wsJCTi/YfM1JS0tTHYYoxxnNZS8vL9Wao3ymy5cvcz0aZf0vjUbDcyA1NRUVFRU8puvq6njOlJaW4saNG/D19VWtk/ReX3/9NSwWC9LT05GWlga9Xg+j0cgOFhaLhQuoK/uA5B5dh1Jt0jumpqbymFamsSgsLIS/vz+SkpJQUFDAKTw7dOjAbRMcHKwa323btuU+p+eia1N6TeW4orlPhwLKmjFKlG3Wrl07ljvUZhRNRO+snC/BwcFISUlhY25mZibPF5JP1GaUWpfWruZrFq0L5DxC6zeNVeX4JNlFaxZ5T1ZWVrIHJUUU09hQzquqqirYbDZYrVakpqayTE1JSeF0SqmpqfxeFRUVLHd8fHxUBgGdTofQ0FBkZmaqZFFQUBCCg4NZ/tL4J5mp0+lQUFDAkeK5ubkqOas8CFd+TuuBl5cX13MCmuSvUkaT/PXz81PVdKR2JFmpPPymcUU6FbXZV199peoTkmknTpxAXFwcv8+tW7dQXFwMnU6HlJQUpKamcupWyvFPhyLUNtSW9B6U9qT5ONHpdHzY17FjRx5792oz2mjfq83oWZRtRtD3rl+/znNduWY9/PDDfG+SYyRrac1ydnbmAvX0HsqxnJyczPUmAbUTwHPPPcefL1q0iPtQqd8XFBTg4MGDcHR05IN1pZ7crl07lnn0jG3btuW2b2xsRFJSElJTU5GRkYFBgwbhX//6Fz9HWloaH4QoZb/y0KqwsFAla5Wyv/n+QPn5oUOHADRFvyhlP+0PiOZrIMn+69ev8ztVVVXx8wQFBXG70d+//vprFBQU8KGPs7MzfH19uUi7UueisUBpz0iXov6j9QBQ1y2kyJH6+nqUlJSooheqq6tRX1/PMpv2EKmpqSpjDz0npasj/TAkJITlC70nrdPUP83TMtO4VTon0b2Dg4O5mD3QZNSlesoODg5sFOzWrRvrLNQWbm5uSE5ORn5+PioqKtC2bVtuOxrfFRUVLVKX030rKipQWFjI+5mvv/6a113aM9DYVa7v5MlfUlLChh5y9KK6w5Tui4rS0/pFdR1pzlD/VVVVITQ0lA0pQJPuEBAQwMZoaovw8HDY7XZe00jPoxp4+fn5nLaNjIh0aE7yVpleNSMjg3XS5557DrW1tSgtLUW/fv04AiIoKIjPCEgef/3115zW02q1wmw2s26j7CsaKzROqP/o+ZuPZXpmGss6nQ41NTUoKSmBXq/H9evXkZ+fz0Y7Gsuenp5wcnJCfX19C2Mz1W+821imd1KO5erqapUDm1arhYeHB8tV5VimVLHV1dUqOUTjidqbHDBramqwbds2bvfdu3dzunWr1aqKUli8eDEcHBzg5OQEHx8fbhul4092djbLp+ZOlv7+/khNTYWDgwNWr16N4cOHc00ziuKn2lmJiYm85y4oKODzAIrgdXZ25nFL84Geh76r3HMDTbLPYDDwGYQyeoy+r4zwa9u2Le9127Rpg7KyMmzZsgUFBQXYu3ev6rf79u1DYWEhfH19uR6ycv9P/erm5qba62RmZiIgIEC1vtC+lWqlOjs7w8HBgfUv0oN79uypWqf1ej3XZwaaMmpQfVBvb29OM//aa6/BYDCo5hDdm/Rfs9kMi8XC46W5vkTjmmr7kfwmGd+jRw92MFauec1lbWBgIEd30tig7zQ/xyP5WVNTw/ekd6fnI/1DaUR94YUXMG7cOHh6emLZsmX8veaO9o2NjUhLS2PnjU6dOsFgMKhkQm1tLf//goICuLu7s2GCdHxCud+hdyJH+7q6Ojg5OeHWrVsoKChATU0N7HY7Rza5uLi0uBa1Lcl40sdoD066CMnt0NBQ3lN16NABNTU1MJvNSEtLQ7du3QA0yXaSH76+vqwn0BpMDhW0pyJd5OLFi6isrES7du2g1Wr5mZT7UEKpi9B1Sd+lZ1c6virPjWmsKKG1o7GxEb6+vrz2kfFt48aNfD6l1+u5ZnVwcDBycnL4b0RtbS3LjODgYHTs2BEmk0klP4mSkpJ7nmsDYIefu0Wl/dwQg5XwX0NERAQSEhJUyglZ3YcMGXLX3/Xu3ZuVNvp9fX09jh49ioCAAP59REQEEhMTVeH5Z86cQVVVFcxmMw4dOgSj0YgBAwZwXl1SxuLi4uDl5YW0tDRoNBoO+d68eTNKS0sRGRmJsrIyJCQk8GJ76NAhfqePP/4YWq0WDQ0N/P81Gg1SUlK47sC+ffvw+eefw2azITIyEnFxcQgKCmIv0vHjx2P79u2qujMA8MQTT+CNN96ARqPhRSA9PR1ZWVl8PaqDQgp1QEAAR/wcPnwYGo0G7du3h9FoxLVr1zid2KFDh9hbk55j4MCB3I70fnSonJmZyeG8DzzwAC5fvoyRI0ciKCgIhw8fZqUmKSkJWq0Wffv2Zc836qfW0glGREQgPT2dn+HmzZtswLpx4wYvUu7u7pxTm/qZDuGV/Xzy5EkATYtGfHw8L7q5ubnw8vJCSkoK0tPT+Rpvv/22qp/j4+MB3PFEoXaIjY1FSEgIoqKikJCQwM979OhRxMXFAWg6eDh9+jR7TsXFxSEwMBBnz55Fx44dYbFYcOjQIR77ykKT7u7uOHz4MCIiIliZr66uxqBBg/Dll19yGPP27duxfft2dO3aVTVOtm/fjiNHjkCv16NLly5ISEjgEPOkpCS89dZbAJrCnJVjEQCGDx8OjUbDh9D0jDR2zpw5oyoQabFYUF1djfj4eB7zwcHB7AXU2NiIhIQE9OnTB0eOHEH37t3Z44z6jNIWKY3KERERiIuLQ9++fVFcXMxecytWrGDP6crKSmRnZyMiIoL7WqvVorq6GhcvXkRAQAD279+Pdu3a8YExKUBBQUE4cuQIIiIiUFZWhlOnTsFisUCr1eLkyZOwWq2cL508kT/++GP2zKTxq9FoYDabcfr0aR47cXFxrKSQjAgICEBQUBAuX74MoCmEPiEhgfs3OTkZO3bs4Of/7LPPuF/S0tL4We7VL8qw/Hv1C9B0EHv27FneAJKMIPml0Wgwfvx4+Pj4cAo5mgfBwcEIDw9HcXExz8GtW7dyv9jtduTk5KC8vJz7pbCwkK/h5eWFtm3bstKn1Wp5bptMJj6Io36hzdnJkycRERGB4uJi3LhxA3FxcYiIiFA9d1FREXbv3g0XFxf0798fe/bsAQCOEDt27BjMZjOOHz8OrVbLMp4OONq1a4eysjLuF5PJxGtOVlYWTp06xQcOWq0WDz74IIqLi1VrDj1PbW0tzGYzLl26hKCgIJ7XtObs27cPHh4evOZQjQSNRsNrTqdOnXDt2jUUFxe3uuYUFBRAo9Fg5MiRfF+NRsN9f/nyZTz99NO8QSP5S/Oc2pDWLxrTNGbatWuHxsZG/h7QtNmmaFdHR0fePNEcyM7ORlhYGKdJ0Wq1SExM5KLaiYmJaNeuHcrLyxEZGcntFhQUhBs3bsBisWDv3r04ffo0H8KRYYDWHYvFwgWvacMTFBTEczAiIgJnz55Fnz59WDabTCZotVpkZ2dDr9cjKioKQUFByMvLU7XZvn370LdvX5w+fZpllLLNlJAhkKINLl26BF9fX24zMrQVFxfzPeh9IyIicPr0aX63kydP8nzJyMjA0aNHeV3dv38/X6e1NWvixIm8ZgUGBqKhoQHx8fGwWq0oLi5GbGwsHz7m5+er1qxt27YBAM6fP8+HxjQHIyIiEB8fj6+++gp6vR5arRbbtm2DRtNULyguLo6jB3bs2MF1L+Lj4xEXFwc3Nzd8/PHHHHFYWlqKI0eOwGw249y5c2hoaEBiYiL8/PyQlJSkOugYOnSoSv4CTYdLVNeKHEA0Gg1u3LjBfU9tSWNPKX/37dsHvV6PkSNHArgTgZScnIwePXogNzcXZWVlLH91Op3KgYPW99jYWP7Mz88PFosFly5d4nXx0KFDuHr1KgwGA2JjY1mnIrnl7u6OzMxMPPXUUxyhHR8fD61Wi969e2PPnj28hr733ntwd3fnNgPupMuk31y8eFElt0hvCgkJQVlZmWoukp6sXLOat9n+/ftx4cIFmM3me7YZ6YfUZkCTIevq1avcR7t37+b70nwBmtKVBQQEcJspZS2ttXR4RhQVFbGsTUtLQ2JiInvVOzg48JrVpk0bdOrUiZ0dAgMD4ebmptLvycA4ZMgQjpJort9bLBZUVVUhPj6eZS1FddP/kpKS8Mgjj2D79u3YvHmzyvDh7OyMxx9/HFqtFgcPHmTZT+/Xtm1bfh46xFHuN/bt2wedTtdC9sfHx+PmzZvQarUtZH9SUhL3UVlZGetXtAaSHKN+0ev1PKcDAwMRFRWF7Oxsfg+TyYQ333wTWq0WycnJ3C8RERGIiIjgdTczMxOOjo7YuXMn94vVakV2dja6d+/ORtuqqiokJibi888/R3l5OSwWC9q1a8c1FtPT06HVavnwjto5JSUFer0eISEhcHNzQ25uLurq6lhmGY1GvPnmm/zdqqoqNDY28nPSIVh+fj7S09O5zajeVEFBAQICAuDh4QGLxYKYmBhotVqkpaXxZyEhIfD39+f5RzrNvn37WAbRIa6LiwuuX7/O9y4rK4PVakVJSQkb5KxWK4YPH45Lly4hICAA7u7uOHLkCGJjYwE0Ob55eHjwfSMiInhe5efnw2g04ssvv8S1a9dQVFSEIUOG4ODBg9BoNHjwwQeRkJCAfv36ITExUbVWka5oMpkQFBTE62NSUhLCwsKQlJSEfv36qaIKgSYDHEWnA00H6+Hh4dBoNCoDx5gxY5CcnIygoCCcPXsWnp6ebKggQ9auXbtQVlaG06dPo3379qitrUX//v25nlNISAjy8vJUfUXRRA0NDdi8eTPeeOMNAHccWnx8fLB582bk5ubC3d2d92iku5hMJqxYsQIAVJEq9HfqKzo8vHnzJt+bxjKtye7u7jyWP/jgA446oLEM3DFiBQUFISYmhtuRolJCQkIwcuRI1NTUsA5IuiWtPfcay3V1dQgMDFSN5ZqaGr6/s7MzLBYLGhoakJCQwDonpahU6pwajQZHjx5FWloarl27hsLCQtTU1MBoNGL8+PF4/PHH2RlJmYmDxsc333zDOp7ZbMbRo0dRWlqK9PR0jihzc3PDzZs3WbZUVlZi+/btCA4O5vWDZKfFYmFZX1ZWhi+++IJrUt28eRN+fn64evUqevXqhcuXLyMsLIz34+SwQc4WZrOZx21kZCQOHjzI89dgMHCEemBgIL7++msEBQXh1KlTXAeY+pLalMYtObgBTVFEpHM2NDTAwcEB06dPh1arRZ8+fXi8OTk5sV5KjpYJCQno27cvbty4oTIo+vv7Iz4+HkeOHEGvXr1w/fp1dhACgB07duCvf/0rG1Lr6ur4zOfo0aNwcnLiNPuBgYEoKyvDxo0beQ4BdyKod+7cyeNg165dqKurg5ubG8sKSrFNsvbQoUO8Z6CaSpRBhjLqUP/961//AtAkL2pqahAZGYnMzExef5TvHBcX16qsBe6kTKS9YWVlJc+JqqoqVFZW8vkgpXCj8TRkyBB2cNJoNDh9+jSfKyrThUdFReHixYsYPXo07+MjIiKQmZnJ8i0wMBBnzpxBaWkpp8FPSUnBgAEDWCZcvXoV3bp1Q1xcHO93jh49ys6kdAZB70P7HaWsJ93AarXy+AbAZ4qkw7i4uKjOVePi4tiYSsbES5cuITAwkHXEmpoa2Gw21hW7deuGW7du8XVtNhuMRiNKS0t57BYWFmLAgAHQaptSuGZlZbGM1+v17FDVtm1b1kXi4uLwxRdfICAgAEVFRbDZbBg7dixKS0vRtWtX3oeWlJTg2rVrHMXq5OQEu92OkJAQHgPr168H0LRXpDYj2U39dOPGDZWDJ52TJCcnIyIiAu7u7hzlW1dXx7pxXFwcunTpotpDlpeXo3Pnzjhy5Ajy8vJw/vx5dp6hfhoxYgQKCwt5P0X9k5ubi7y8PNXzKbFarfj0008RGBiI+vp61oN/7mgaf6rklYLwI1NWVoaxY8eiQ4cOeOaZZ5CXl4dXXnkF48aNw+LFi/l7v/jFL5CTk6MKk9y6dSvWr1/PqRUsFguuXr0Ko9GICRMmYPHixaivr8ekSZNw+/ZtWCwWLFy4EKtWrUJoaCj69OmDDRs2cJjvihUroNPp4OTkBIvFgpycHERHR2P37t1wdnbGq6++im3btuHs2bMs9IOCgpCRkYGXX36ZDwInT56Mffv2sTesyWTiRbm8vJzz4w4bNgxHjhxhhYpCZ7ds2YIXX3wRpaWl0Gg0GDBgAC5cuMCp4GprazF//nxERkZi2bJlOHfuHEcYURqBTp06IS8vD927d0dCQgIMBgMr0FQUub6+Hm+88QaSkpKwadMmODk5sReOEnd3d8TFxWHEiBGora3FsGHDuPghhdZrNBoMHz4clZWVOH/+PFxcXNCjRw828tD9gaa0Q3FxcZyOYcCAATh48CAyMzNRV1cHk8mEp59+Grm5uYiJiYGHhweKi4vZs1mv1/O1HB0d4ePjw5FcVquVDyyApoV79OjR7NEUFBSElJQU9pqkBTsqKgo3btxARUUFKioq0K1bN3z99dfo3Lkz5s+fjzfffJP/rtVqER0djYiICMybNw9WqxWzZs3C3r178c0336BNmzbw8/PDjRs3YLfb4e3tjby8PDg5OWH16tVYvHgx8vPz8dZbb2HBggXo0KEDSkpKkJycDL1ez0YcMkQ6ODigpqYGoaGhfCgBAH/84x/x4Ycforq6mlMJAE1K+VNPPYXGxkb06tUL7du3R2xsLM+T/Px8+Pn5cX5goOlg3tHREWlpaRg8eDBsNhtycnKwZ88ePPjggygtLUWHDh1449bQ0ICePXsiOTkZsbGxuHLlCubNmwej0Qiz2cx5600mE0wmE6xWK6Kjo5GSkoIvvviCw6MJSinZs2dPXLlyhaMjKAWXxWJBZWUlunTpgqKiIjg7O7OXo6+vL3JycrgA7JQpU7B//34MHDgQJ06cUBW7r6mp4QPHtm3borS0FMOHD0dCQgKsVisX1KVx1LdvX1y4cAEeHh7seUiHIvTcyiLIAwYMwLlz51jRNxqNnMph8ODBOH78ODQaDSIjI5GRkcGeOhMnTsTBgwdRX1+Pnj17ciqjHj16cBQcbezpML6goID7hRTZxsZGBAcH4/bt2zAYDAgICMDNmzd5U0GebZTegw78n3/+ebz99tvQarUoLS3lw+jAwEAkJydj7NixOHr0KMaMGYPY2FguKgw0Kf+hoaHcL1arFbm5ufweJNucnZ35ea9fv47g4GA+uC8pKeG2Cg4ORmZmJoYPH45Tp06xN1mbNm14M2Wz2aDX6/Hwww/j008/ZW9yFxcXWK1WaLVadOzYERkZGaisrMSDDz4IrVbLhzTk+VZSUoLRo0fj+PHj6NSpE8aPH48PPviAI6ycnZ3h7e2NkpIS3thS6kZKCUHja+jQoXjkkUfw+uuvw2q1wm63w2g0std4//79ec2hdqT2o2L3AFRrjp+fH5KTk1FfX8/XMhqNcHFxUa05AwYMwPnz5/lejz76KI4fP47x48fDbrdjz549KuMKpfkaPXo0fv3rX6OxsRGzZs1CaWkpTCYTdDodH9zRnOzTpw/69evH0R1eXl7sZevm5gZ3d3eUlZWhpKRElUpWp9PB19cX6enpfOBLMs3DwwOFhYVwcHBAaGgocnNzkZ+fj9GjRyMhIQE1NTVwcXFBaWkpy4KAgABkZWXxmufk5MRRVcCdgycHBwf+/MUXX8TGjRthsVi4b2l8U8TawYMHceXKFbz44ovo0qWLKt84rUFarRZPPPEEIiMjUVRUhCVLlqCurg4GgwHDhg3jQ41HH30UBw4c4MMcZZQGreHUZjk5ObBYLBgzZgzeffddGAwGTv2j0+nQuXNnXLt2DXq9Hq+88gqOHDmCY8eOwdXVlVOmtbZmzZ07F5WVlXj66aexdetWHve0PhqNRrRp0wb5+flwcnJCSEgIrl69yuOZ5sioUaMQExMDi8WC559/HqtWrYLNZsODDz6II0eOcEqe27dvsyPAqFGjcOLECbRv3x42m41rYg0dOhSnT5+GRtNU7/H8+fNch+vAgQPcx0OHDkV8fDw6derEm+Y+ffpwehAay+7u7hwdRu02ePBgnD17FhqNBh4eHpgzZw5eeeUVDBgwgOXvk08+icTERHz55ZfQ6XR49dVXceHCBezZswcRERH49NNP0bZtW/YAttvt8PHx4dRIpCc1NDTAbDarHG7mzJmDvLw87Nq1i9cc6ktvb29kZWXB0dERq1atwksvvcSHj3TYFxgYiKKiIk7lOmLECMTGxvJ6Qp7QU6ZMwe7du2E2m7F48WLs2LED165dg6enJ4qKiuDu7s73Juegjh078mad0gu++uqrKCoqwl/+8heW0z4+PkhPT0e3bt1w8eJF1r1CQ0ORmJh41zbz8vLC1KlT8Y9//APV1dXo2LEjCgoK+LCrpKQE1dXV8PX1ZaNOXV0dHBwc4OXlhWPHjuHAgQOYN28eR4EBTYfIdKBIqbi6du2KK1eusCymiDGz2XxXWas0NiplLckK8oo3Go1wdHREu3btUFBQgJUrV3JNCqX+SfPEbrdjypQpsNvtiImJgV6vh9lsZgPU4MGD2bFi7ty5ePPNN1FVVcUH0rW1tWhsbFTJ/ilTpnA9GpPJxPsDd3d3rFq1SrXf+Oijjzhlz5w5c7B161aMGzcOdXV1iImJgZeXF0aPHo1jx45x9LGDgwP8/f0xatQobN++nfvF3d2dawb5+fmhpKQEVqsV7dq1Q25uLrch6QAU9d65c2cUFRXB19cX586dYx2DIk6pxlN1dTXef/99/PKXv2SnJJPJhLq6Ou6bNm3aoFu3bkhMTOQ1idYjcgqirA4dO3ZESkoK7HY715BSGrXNZjNsNhvsdjtMJhPWr1+PgoICrFixgtOWUspbGmN9+/bFrVu34OLioopIU6Y+12g06NixI/Lz8zFx4kQkJSXhiy++4LFIawy1w8yZM7F582YMGjQIly9fZkOMMr3zgAEDMG3aNK4h1rzeIcm9tWvXYteuXcjJyUH79u15jnbs2BFXr17lAz5/f38kJibC3d0dM2fOxL/+9S94e3sjJSWFnS8GDx6MlJQUODg4sAHm8OHDrHP7+/sjKyuLIwkp6o2MAEqDCNCkMzU2NqKqqorna5cuXfjwvaGhAc7OztBomuoO5uXlcVpDMihTPZTg4GCkp6ezHpaYmAiz2cyRWbdv30ZoaCiKi4v5DOG1115DYmIiR/OYTCbk5OSwwYsie2/fvq2KSKU+ozS7RUVFPM9qa2tZ79doNJwetrq6GmPGjEFBQQEbWEiPJZRjmfQAahcaI2azGVarlXVW0vccHBxUaYJpLJMT2a9+9SsAwLvvvst7XrPZzJHNGk1TJoUbN26gd+/euHjxIj+XVqtVRaCazWZMmjQJGRkZuHjxImpra9nARtHubm5umDp1KrZu3YrZs2fjrbfe4kgTZcrfyMhI3LhxA/n5+fD19eVICNp3k/PHX/7yFyxbtgxhYWFsfKQxRNclR9tDhw6htraWs3GQkY+iSWfOnInY2FjY7XZUVFTw+CVdk84gPD09OTMG7S1pL0P7l+7du+Orr75inUIp8+rr6zl1ITkhmEwmNtrW1tbCy8sLc+fOxSuvvAJfX18kJiZCr9dzzcj09HTWGx599FEcPHgQXbt2xfXr11XRKRQtTHsOMjhR+3l6esJqtWLs2LE4duwYOwJ27doV165dg0ajwezZs5GYmMg165YtWwaNRsPp6KqqqritSBbT3NJomurtUV9Q1h43NzesWLECzz77LK9dzWUU0OQouWTJErz44os8Ho1GIxvZjUYjBg0ahBUrVmDSpEkcFQkAffr0gaOjIz7//HNoNBqsXbsWY8aMAdDkiHvp0iWYTCZMmzYNCQkJfAYUFxeHp556ivVRkrG1tbVskHz88cexePFizJw5ExcvXuR9XFhYGMrLy/lsYtCgQXBxccHp06c5deSyZctQXFyMdevW8bihiFC9Xs/RgNXV1ZgwYQI2btzIc23UqFE4fvw42rRpwzrE66+/jpiYGHZW69OnDyorK3H79m1UVVVh8ODBePzxx2EymTB//nzuJ9KnSKbSmkXnbqRzOjk5obS0FE5OTujUqROuX7+OwYMH49SpU7wnJxlI54eenp7QaDTslENnTqSH1dfXIygoCLdv38aIESNw4sQJ/juNa4PBAG9vb9bXQ0ND8eWXX/JY8fHxQX5+Pnr06IGXXnoJS5YsYYc6q9XKRiQAvO+12+3o1asXxo8fj3feeYfX5sGDByM/Px+FhYV89nns2DHExcVhyZIlaGhoQFVVFcLDw3k9rKioQGxsLJYsWYIzZ85w5iIHBwf4+vriypUrcHd3R48ePTBu3Dh88sknuHXrFnJzczn9P0VXlZaW8visrKzE+PHjeV8yceJEnDx5kg19cXFxuHnzJt566y3k5eWha9eucHZ2xokTJ5CVlQUXFxd4e3tjz549qtqEP1fEYCX8V5GSkoJly5bh0qVLcHJywoQJEzB37lzVZJ0xYways7PZ6xNoEo5bt27F9u3bOWexi4sLJk+erPp9Xl4eJkyYgNLSUjg7OyMqKgoLFy6Ek5MTtm7divfffx/FxcXw9fVFQ0MDcnNzWblwcXFBVFQUCgsLkZCQAJ1OBx8fH6SlpcFms2Hw4MFYtGgRQkJCcPz4cSxZsoQt9CaTiQW4o6MjSkpK4OjoCE9PT06ro9VqebOs1+uxceNGjBgxAikpKZgzZw57jQNN3jWvv/46nnjiCcyfPx9Wq5W9Y5rj7u6OiRMnoqioiDfrzZk4cSJeffVVNDY24qGHHuJ0C0rCwsKwZs0ahISEIDo6GteuXeP6KgR5WdlsNnTp0gVRUVF48803W4RMGwwGVgxMJhOngyspKWm1gKSrqyt+9atf4ezZs2ywU0KHoX379kW/fv2wZ88eVowdHR0xdOhQlJSU4KuvvoJW21RfRVmnym63w9/fnxX57OxsTs9iNBo5tWR1dTV69+6NgoICmM1mzJo1C2+88QbS0tLg6ekJs9mM7Oxs2O12NgIojWtarZYVGEopYjabcebMGR7758+fZ88cer7Ro0fjoYcewpIlS1ShxZQb22QyISoqCunp6fjmm29Uc+P48eNcGwRoUvpfe+01bNu2DV9++SWHRJNySWlcRo0ahZUrV+L3v/89z7eUlBTMnTuX0wrShq1bt25YsGABp1nYuXMn1qxZ0yIHuclkwtSpUzF//nzU1tZiwYIFOHnypMpbk5QRFxcXBAUFIT09nVMX0EEUAJ5vFosF0dHRdy22O2zYMPz973/HqVOn8Kc//alVQ2ynTp0wb948DBkyBGvXrsW2bdt4I+jg4MDt4+fnh7KyMpSWlrKyrRz/QNN8o0M+KvhNBxvkVT137lx8/vnnKhnRHA8PDz7ArK+vZ8OCt7c3KioqUFVVBaPRiKNHj6KyslLVL4RWq0VAQACmTZuGadOmISYmptV+AZrC+F9//XX86U9/4rmjxNXVFfPnz8fkyZNZVpL3EG3KDQYD+vXrx/3yxBNPqIyRzZ+tTZs26N27NzIyMjjVCOXqB5oitubPn48hQ4ZgzZo12LZtW4v2bn5NBwcHnnf0mcFg4JQuN2/eVB0yNm/zXr16ISMjo0X6JGL69Ol46aWXsGbNGnzwwQfct62xcuVK7N69mw8HjEYjxo4di0WLFqnWHBoDyjpTAFqsOdQ21AZ9+/bF0qVLec1ZuHBhi9QDxOHDh+Hn54fly5djz549dy3k2r9/f/z1r3/F5MmTVamHyHMuKCgIWVlZKCkpgaenJx9iAnfWCIvFgmXLluHTTz9VGfKApjWif//+aGhowMWLF1XvQweetbW17N362WefYe3atdi9e3eLdYQ2uTNnzuS6jMrxodU25UR/6KGHkJSUhKSkJLi7u3PaW/Igpucj2UhybPfu3di6dStu376tegej0Yjo6GjMnz8fDz/8cKvzxWQy4f/+7/8wdepUTJs2DZcvX2517CrbjHQTAK2ugzqdDgEBAWhoaMA333wDT09P5OTkwN3dHeXl5Xdds8LDw+Hq6opz586hpqaG1yZqI6BprFGqqHsVQyfIG99gMCApKYkNrTS36DDRZDKxsSI3N5flJh0eODo6oqqqCh4eHqwblZeX8xqq0+lU6fWUODg4sNMNQYd+5EQzcOBAnDlzBhUVFfDw8GCdsrn8dXNzg8lkQlFRETp06IAXXngBQ4YMwfLly/Hxxx+zjKN1UqfTYfjw4Zg5cybWrFmDK1eutNpnzfsPuOPB7OnpyTUy6MD7busBjVODwQCTydRqPVb6XkBAAB9eK+WTwWBgw4Oyvk9YWBj8/f2RkJAAvV6vajM6ZK+vr2ddWJmW6NvaTLlm0eE2RbuTgwIAdpIKDAyEzWZjHWbnzp1YvXp1i7lP0Qnr1q2Dr68vFixYgBMnTqhkyaOPPvq9Ze1nn312V11i7ty5mDVr1veStR9++GELnWrZsmU4deoUDAYD6zY055SyVin7yUud2l6pjzffb9BhnLOzM493AC3Gcmtjh2qzKQ1I5OCi0+mwYMECHD9+vIWeSvLgb3/7G9asWYPs7GzExsa26BegaY527doVixYtQq9evXD79m08++yzLfQXBwcHjB07FgsXLuS9W2trt7OzM1566SXExsZyCru7odFoEBISguXLl+P06dN33TcZDAY88sgjWLx4MfLy8rBs2TLeeyjnuYeHB1544QXExsYiOTkZHTt2xM2bN9lRS3lfap8HHngAmzdvxrp161q999ChQ7Fu3ToUFRXhpZdewtWrV1u8t6enJ+bMmYMpU6bwnjg2NhbLly9nJ0C6LxnBH374Ybi5uXFUOBk8APBe5MEHH8SCBQvw/vvv46233mJDJBlBGxoa+N908N/82eierq6uHIlDhiiC9HmNRoNf/vKXePbZZ7Fp0ybExMS0aDtHR0f069cPs2fPhpOTE++TaA5rNBr07t2bo/npDMHR0RHOzs68n3VwcMCsWbOQkJCAW7dusRG5NV2oc+fOWLp0KY/lxx577K5jpXv37li0aBGSk5Oxc+dOJCcnq9YsirZTjuU///nPXD9Uia+vL5599lnExsbi8uXLcHFxgYeHR4u5Qdf18/Pj9L93W7dpLDc0NHDKvrvh5OSE6OhozJ07F7W1tVi+fDk70AFNczc8PBwrVqxAcHAwy1VKeUfrDUUAlpSUwGw2o6GhodV9h06nw6ZNm/iwe+3atUhOTm6xloaHh2Pjxo1wc3NT7dGU7zho0CB06NABR44cQVFRERu3lftGAJx+k+oNUipk5Z577969yMjIwNq1a5GSkoLGxsYWz6TVatGhQwe0bdsWly9fvqueAjQ5dYWEhODChQuqurSOjo6Ijo7GP//5T8yfPx8uLi7YunVrq2nAHB0dERgYiIKCAjZ4klx1c3PDY489hrlz56K4uJijo1tjy5YtGDFiBGJiYrB8+XKVjkBnFY6OjjwPmxs0WyM2NhbPPPMMOxIq26h///5YsmQJgoODERMTg2XLlqkce5Sy9tq1a5g5c2ar99Dr9fjb3/7GJR0A4LHHHkNeXh6qqqrYKYfGZ0hICObMmaM65yBMJhOefPJJdq6dMWMGMjMz0bNnT5w8eVI13qlGV5cuXbBgwQL85S9/QXp6uqqNmuPk5IS3334bgYGBWLlyJT755BMulUHjxGQyYfjw4aro8uZQhPWoUaNw9epVLvvg7OwMnU6H4uJi7hsyKLm6umLixImYO3cuYmNjsXTpUtV68Ktf/Qpz5szBO++8g/feew95eXmt3tvDw4PnCNUoa419+/YhKysLb7zxBqdWVD6TyWRCZGQkXnzxRezcuRN79+6961kf0JSuMyAgAMePH2+xv27Tpg0mTJiAcePGYfXq1ap1QImXlxeCgoL47GrkyJGt7tO8vLywfv169OrVix2BW2Py5MlYuHAhkpOTsXr1aty6dYvrcdJ7+vr64g9/+INqfN6+fRvLli1DfHw819vs1asXli1bhpCQEGRkZGDp0qW4dOkSn01ptVr4+flh3Lhx+M1vfnPXGsk/N8RgJQiCIAiCIAiCIAiCIAiCIAiCINxXpIaVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIws+A0NBQbNiw4Xv95ty5cwgNDcW5c+d+pKcSBEEQBEEQBEH4zyAGK0EQBEEQBEEQBEH4AVy8eBEbNmxAeXn5/X4UQRAEQRAEQfjZo2lsbGy83w8hCIIgCIIgCIIg3Jva2lrodDro9frv/Bu73Y76+noYDAZoteKv+J/m7bffxqpVq3DixAn4+/vf78cRBEEQBEEQhJ81smMRBEEQBEEQBEH4/xS73Y7a2loAgMlk+l7GKgDQarUwmUw/a2NVY2Mjampq7vdjCIIgCIIgCILwI/Pz3bUIgiAIgiAIgiD8TNiwYQNCQ0ORkpKC559/Hr1798aAAQOwfPlyNkgBTXWqli5din379mHs2LHo0aMHTp06xX9rXsMqLy8PCxcuxNChQ9G9e3eMHDkSS5YsQV1dHYDWa1jNmDEDjz76KJKTkzFjxgz07NkTw4YNwz/+8Y8Wz52dnY1Zs2bhgQcewKBBg7BixQqcOnXqe9fF+uijjxAaGorz589j8eLFGDBgAHr37o358+ejrKxM9d2RI0fimWeewalTpzBp0iSEh4dj165dAIDbt2/jueeeQ//+/dGzZ09MnToVn376qer39M5xcXHYuHEjhg0bhl69euG5555DRUUF6urq8PLLL2PQoEHo1asXFixYwO3VWj889NBD6NGjByZNmoTz58/zdzZs2IBVq1YBAEaNGoXQ0FCEhoYiKyvrO7eLIAiCIAiCIAh3+H7ueYIgCIIgCIIgCMIPZs6cOfDz88O8efNw+fJlvPvuuygvL2fDBwCcPXsWhw4dwvTp0+Hm5gY/P79Wr5WXl4cpU6agoqICU6dORXBwMPLy8nDkyBHU1NTAaDTe9TnKysrw29/+FlFRURgzZgyOHDmC119/HZ07d0ZkZCQAwGq14he/+AUKCgowc+ZMtG3bFgcOHPhehqrmLF26FC4uLpg9ezbS0tKwc+dO5OTk4N1334VGo+HvpaWlYd68eYiOjsbUqVPRoUMHFBYW4oknnkB1dTVmzJgBNzc3fPzxx/j973+P9evXIyoqSnWvrVu3wsHBAb/73e+QkZGBHTt2QK/XQ6PRoLy8HLNnz8aVK1fw0Ucfwc/PD7Nnz1b9/vz584iLi8OMGTNgNBqxc+dO/Pa3v8Xu3bvRuXNnREVFIT09HQcOHMCCBQvg5uYGAHB3d//B7SMIgiAIgiAI/8uIwUoQBEEQBEEQBOEnwt/fH5s3bwYATJ8+HRaLBe+//z5+/etfIywsDECTsWb//v3o2LHjPa+1Zs0aFBYW4sMPP0SPHj348+effx7fVqo4Pz8fr776KiZOnAgAmDJlCkaOHImYmBg2WH3wwQe4ffs2Nm3ahNGjRwMAnnjiCf7ND8FgMOCdd96BwWAAAPj6+uK1117DyZMnMWrUKP5eRkYG3nrrLQwbNow/W7FiBQoLC/Hee++hb9++AIDHH38c48ePx8qVKzFq1ChV6sOGhga8++67fK+SkhIcPHhQFU02ffp0ZGZm4qOPPmphsLp16xZiYmLQvXt3AMDYsWPx8MMPY/369di4cSPCwsLQtWtXHDhwAKNHj5YaVoIgCIIgCILwbyIpAQVBEARBEARBEH4ipk+frvr3U089BQD4/PPP+bN+/fp9q7HKbrfj+PHjGDFihMpYRSijlVrD0dEREyZM4H8bjUb06NEDt2/f5s9OnToFb29vlSHJZDJh6tSp97z2vYiOjmYDEgA8+eST0Ov1+Oyzz1Tf8/f3VxmrAOCzzz5DeHg4G6sAwMnJCdHR0cjOzkZycrLq+xMmTFDdKzw8HI2NjZg8ebLqe+Hh4cjNzYXNZlN93qtXLzZWAU3GtVGjRuH06dNoaGj4nm8uCIIgCIIgCMK3IQYrQRAEQRAEQRCEn4jAwEDVv9u3bw+tVquqe/RdInWKi4tRWVmJTp06/aDn8PHxaWHUcnV1VdWTys7ORvv27Vt8r3379j/onkDL93dycoKnpyeys7NVn7fWBjk5OejQoUOLz4ODg/nvSnx9fVX/dnZ2BgC0a9euxed2ux0VFRX3fFYACAoKQnV1NYqLi1v8TRAEQRAEQRCEfw8xWAmCIAiCIAiCINwnWouEcnBw+NHvq9PpfvR7/Dv8J9pAmR7wu3z+bWkUBUEQBEEQBEH4cRGDlSAIgiAIgiAIwk9ERkZGi3/b7fbvXf/I3d0dFosFSUlJ/8nHU+Hn54fMzMwWhpzMzMwffM3m719VVYWCggL4+fl96299fX2RlpbW4vPU1FT++3+S5s8KAOnp6TCbzXB3dwfw7akXBUEQBEEQBEH47ojBShAEQRAEQRAE4SfivffeU/17x44dAICIiIjvdR2tVovRo0fjk08+wdWrV1v8/T8RLTR06FDk5eXhxIkT/FltbS0+/PDDH3zNDz74APX19fzvnTt3wmazfaf3j4yMxFdffYVLly7xZ1arFR9++CH8/Py+te7X9+XSpUu4du0a/zs3NxcnTpzAkCFDOELNbDYDQIt0goIgCIIgCIIgfH/09/sBBEEQBEEQBEEQ/lfIysrCrFmzMGzYMFy+fBn79u3Do48+irCwsO99rRdeeAHx8fGYMWMGpk6dipCQEBQUFODw4cN4//334eLi8m89a3R0NHbs2IF58+Zh5syZ8PT0xP79+2EymQD8sOii+vp6/PKXv8SYMWOQlpaG999/H3369MGoUaO+9be/+93vcPDgQTz99NOYMWMGXF1dsXfvXmRlZWHDhg13TfX3Q+ncuTN+85vfYMaMGTAajdi5cycA4I9//CN/p1u3bgCAtWvX4pFHHoHBYMCIESPg6Oj4H30WQRAEQRAEQfhfQAxWgiAIgiAIgiAIPxFvvPEG1q1bh9WrV0Ov1+Opp57C/Pnzf9C1vL298eGHH2LdunXYv38/Kisr4e3tjYiIiP9IDSgnJyds27YNy5cvx/bt2+Ho6IiJEyeiV69e+OMf/8iGq+/D4sWLsX//fqxfvx719fUYO3YsFi1a9J2MX23btsWuXbvw2muvYceOHaitrUVoaCi2bNmC4cOH/4A3vDf9+vXDAw88gE2bNiEnJwcdO3bEypUrVcbF8PBwPP/889i1axdOnToFu92OEydOiMFKEARBEARBEH4AmkapLCsIgiAIgiAIgvCjsmHDBmzcuBFnzpzh+kc/V9555x2sXLkSn3/+Oby9vb/Tbz766CMsWLAAe/bsQY8ePX7kJ/z3CQ0NxfTp07F48eL7/SiCIAiCIAiC8D+D1LASBEEQBEEQBEEQWqWmpkb179raWnzwwQcICgr6zsYqQRAEQRAEQRCE74KkBBQEQRAEQRAEQRBaZfbs2fD19UVYWBgqKyuxb98+pKam4vXXXwfQZNCqqKi45zVcXV1/ikcVBEEQBEEQBOFnjhisBEEQBEEQBEEQhFYZOnQo9uzZg/3796OhoQEdO3bE2rVr8cgjjwAA4uLisGDBgnteY/v27T/FowqCIAiCIAiC8DNHalgJgiAIgiAIgiAIP4j8/HwkJyff8zvdunWTKCtBEARBEARBEL4VMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xXt/X4AQRAEQRAEQRAEQRAEQRAEQRAE4X8bMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI95X/B/V7Nk7OqfW/AAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "df[[\"pricing_prompt\", \"pricing_completion\"]].plot.scatter(\n", - " x=\"pricing_prompt\", y=\"pricing_completion\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "unsupported operand type(s) for /: 'str' and 'str'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:218\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 218\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:242\u001b[0m, in \u001b[0;36mevaluate\u001b[0;34m(op, a, b, use_numexpr)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_numexpr:\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# error: \"None\" not callable\u001b[39;00m\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_str\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _evaluate_standard(op, op_str, a, b)\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:73\u001b[0m, in \u001b[0;36m_evaluate_standard\u001b[0;34m(op, op_str, a, b)\u001b[0m\n\u001b[1;32m 72\u001b[0m _store_test_result(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[46], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprice_ratio\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_prompt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/common.py:76\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer..new_method\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[1;32m 74\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/arraylike.py:210\u001b[0m, in \u001b[0;36mOpsMixin.__truediv__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__truediv__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__truediv__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtruediv\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/series.py:6135\u001b[0m, in \u001b[0;36mSeries._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 6133\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_arith_method\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, op):\n\u001b[1;32m 6134\u001b[0m \u001b[38;5;28mself\u001b[39m, other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_align_for_op(other)\n\u001b[0;32m-> 6135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexOpsMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/base.py:1382\u001b[0m, in \u001b[0;36mIndexOpsMixin._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 1379\u001b[0m rvalues \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(rvalues\u001b[38;5;241m.\u001b[39mstart, rvalues\u001b[38;5;241m.\u001b[39mstop, rvalues\u001b[38;5;241m.\u001b[39mstep)\n\u001b[1;32m 1381\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(\u001b[38;5;28mall\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 1382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_construct_result(result, name\u001b[38;5;241m=\u001b[39mres_name)\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:283\u001b[0m, in \u001b[0;36marithmetic_op\u001b[0;34m(left, right, op)\u001b[0m\n\u001b[1;32m 279\u001b[0m _bool_arith_check(op, left, right) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 281\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_na_arithmetic_op\" has incompatible type\u001b[39;00m\n\u001b[1;32m 282\u001b[0m \u001b[38;5;66;03m# \"Union[ExtensionArray, ndarray[Any, Any]]\"; expected \"ndarray[Any, Any]\"\u001b[39;00m\n\u001b[0;32m--> 283\u001b[0m res_values \u001b[38;5;241m=\u001b[39m \u001b[43m_na_arithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res_values\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:227\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_cmp \u001b[38;5;129;01mand\u001b[39;00m (\n\u001b[1;32m 221\u001b[0m left\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(right, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[1;32m 222\u001b[0m ):\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# Don't do this for comparisons, as that will handle complex numbers\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# incorrectly, see GH#32047\u001b[39;00m\n\u001b[0;32m--> 227\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43m_masked_arith_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:163\u001b[0m, in \u001b[0;36m_masked_arith_op\u001b[0;34m(x, y, op)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# See GH#5284, GH#5035, GH#19448 for historical reference\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[0;32m--> 163\u001b[0m result[mask] \u001b[38;5;241m=\u001b[39m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43myrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_scalar(y):\n", - "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'" - ] - } - ], - "source": [ - "df[\"price_ratio\"] = df[\"pricing_completion\"] / df[\"pricing_prompt\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# df[\"total_price\"] =" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py deleted file mode 100644 index c94786208..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py +++ /dev/null @@ -1,118 +0,0 @@ -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# CONTENTS: -# - [Description](#description) - -# %% [markdown] -# -# # Description -# -# This notebook examines ... - -# %% -# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet jupyterlab-vim)" -# #!jupyter labextension enable - -# %% -# %load_ext autoreload -# %autoreload 2 - -import logging - -import helpers.hdbg as hdbg -import helpers.henv as henv - -# %% -print(henv.get_system_signature()[0]) - -hnotebook.config_notebook() - -# %% -# hdbg.init_logger(verbosity=logging.DEBUG) -hdbg.init_logger(verbosity=logging.INFO) -# hdbg.test_logger() -_LOG = logging.getLogger(__name__) - -# %% -# !sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet openai requests)" - -# %% -import helpers.hllm as hllm -import helpers.hpandas as hpandas - -# %% -val = hllm.get_model_stats() - -# %% -import pprint - -pprint.pprint(val[0]) - -# %% -import pandas as pd - -# %% -# Normalize the nested JSON -df = pd.json_normalize(val, sep="_") -df -# View the resulting DataFrame -# print(df.T) # Transpose just for readable vertical inspection - -# %% -df.iloc[0].T - -# %% -col_names = ["id", "context_length", "pricing_prompt", "pricing_completion"] - -# %% -df.dtypes - -# %% [markdown] -# # - -# %% -for col in df.columns: - print(hpandas.infer_column_types(df[col])) - -# %% -df.apply(lambda x: pd.Series(hpandas.infer_column_types(x))).T - -# %% -hpandas.infer_column_types_df(df) - - -# %% -pd.to_numeric(df["pricing_request"], errors="coerce").notna() - -# %% -df["pricing_completion"] - -# %% -df.sort_values("pricing_prompt")[col_names] - -# %% -df[["pricing_prompt", "pricing_completion"]].plot.scatter( - x="pricing_prompt", y="pricing_completion" -) - -# %% -df["price_ratio"] = df["pricing_completion"] / df["pricing_prompt"] - -# %% - -# %% -# df["total_price"] = diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb deleted file mode 100644 index 60491a1c6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb +++ /dev/null @@ -1,993 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-07T22:25:23.663978Z", - "start_time": "2020-06-07T22:25:23.661756Z" - } - }, - "source": [ - "# Description" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Imports" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:30:40.920362Z", - "start_time": "2020-06-09T19:30:40.864535Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "%matplotlib inline\n", - "\n", - "import json\n", - "import logging\n", - "\n", - "import jsonpickle\n", - "import jsonpickle.ext.pandas as jsonpickle_pandas\n", - "\n", - "jsonpickle_pandas.register_handlers()\n", - "\n", - "import pandas as pd # noqa: E402\n", - "\n", - "import helpers.hdbg as hdbg # noqa: E402\n", - "import helpers.henv as henv # noqa: E402\n", - "import helpers.hplayback as hplayba # noqa: E402" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:30:43.871255Z", - "start_time": "2020-06-09T19:30:43.739350Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0mWARNING: Logger already initialized: skipping\n", - "# Packages\n", - " python: 3.7.6\n", - " gluonnlp: 0.9.1\n", - " gluonts: 0.5.0\n", - " joblib: 0.15.1\n", - " mxnet: 1.6.0\n", - " numpy: 1.18.4\n", - " pandas: 1.0.3\n", - " pyarrow: 0.17.1\n", - " scipy: 1.4.1\n", - " seaborn: 0.10.1\n", - " sklearn: 0.23.1\n", - " statsmodels: 0.11.1\n", - "# Last commits:\n", - " * 268f2f1 saggese PTask2231: Checkpoint ( 2 days ago) Sun Jun 7 20:58:52 2020 (HEAD -> PTask2231_Playback_approach_for_unit_testing, origin/PTask2231_Playback_approach_for_unit_testing)\n", - " * 7025106 pavel-... PTask2291: Add args, kwargs. New tests ( 6 days ago) Wed Jun 3 11:38:56 2020 \n", - " * 60e0b11 saggese PTask2291: Add leftover files ( 10 days ago) Sat May 30 10:06:29 2020 \n" - ] - } - ], - "source": [ - "hdbg.init_logger(verbosity=logging.INFO)\n", - "\n", - "_LOG = logging.getLogger(__name__)\n", - "\n", - "_LOG.info(\"%s\", henv.get_system_signature()[0])\n", - "\n", - "hnotebook.config_notebook()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:53:12.564104Z", - "start_time": "2020-06-09T19:53:12.513350Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Product Price\n", - "hello \n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200\n" - ] - } - ], - "source": [ - "data = {\n", - " \"Product\": [\"Desktop Computer\", \"Tablet\", \"iPhone\", \"Laptop\"],\n", - " \"Price\": [700, 250, 800, 1200],\n", - "}\n", - "\n", - "df = pd.DataFrame(data, columns=[\"Product\", \"Price\"])\n", - "df.index.name = \"hello\"\n", - "print(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:56:24.324137Z", - "start_time": "2020-06-09T19:56:24.279767Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Product': hello\n", - " 0 Desktop Computer\n", - " 1 Tablet\n", - " 2 iPhone\n", - " 3 Laptop\n", - " Name: Product, dtype: object,\n", - " 'Price': hello\n", - " 0 700\n", - " 1 250\n", - " 2 800\n", - " 3 1200\n", - " Name: Price, dtype: int64}" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# df.to_json(orient=\"\")\n", - "df.to_dict(orient=\"series\")" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:58:05.848188Z", - "start_time": "2020-06-09T19:58:05.747808Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "\"pd.DataFrame({'Product': ['Desktop Computer', 'Tablet', 'iPhone', 'Laptop'], 'Price': [700, 250, 800, 1200]})\"" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hplayba.to_python_code(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:58:31.870465Z", - "start_time": "2020-06-09T19:58:31.822189Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ProductPrice
0Desktop Computer700
1Tablet250
2iPhone800
3Laptop1200
\n", - "
" - ], - "text/plain": [ - " Product Price\n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame.from_dict(\n", - " {\n", - " \"Product\": [\"Desktop Computer\", \"Tablet\", \"iPhone\", \"Laptop\"],\n", - " \"Price\": [700, 250, 800, 1200],\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T20:14:52.983985Z", - "start_time": "2020-06-09T20:14:52.861966Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# Initialize values for unit test.\n", - "dummy_0 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", - "dummy_0 = jsonpickle.decode(dummy_0)\n", - "dummy_1 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", - "dummy_1 = jsonpickle.decode(dummy_1)\n", - "# Call function.\n", - "act = F(dummy_0, dummy_1)\n", - "# Create expected value of function output.\n", - "exp = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop ComputerDesktop Computer,1400\\nTabletTablet,500\\niPhoneiPhone,1600\\nLaptopLaptop,2400\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", - "exp = jsonpickle.decode(exp)\n", - "# Check.\n", - "assert act.equals(exp)\n" - ] - } - ], - "source": [ - "use_playback = True\n", - "\n", - "\n", - "def F(a, b):\n", - " if use_playback:\n", - " playback = Playback(\"assert_equal\", \"F\", a, b)\n", - " playback.start()\n", - " c = a + b\n", - " if use_playback:\n", - " output = playback.end(c)\n", - " res = output\n", - " else:\n", - " res = c\n", - " return res\n", - "\n", - "\n", - "a = df\n", - "b = df\n", - "print(F(a, b))" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T20:20:24.981307Z", - "start_time": "2020-06-09T20:20:24.839197Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'[3, 3, ]'" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hplayba.to_python_code([\"3\", 3])" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:30:54.111194Z", - "start_time": "2020-06-09T19:30:54.046499Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# obj1=\n", - " Product Price\n", - "hello \n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200\n", - "class=\n", - "# frozen=\n", - "{\n", - " \"meta\": {\n", - " \"dtypes\": {\n", - " \"Price\": \"int64\",\n", - " \"Product\": \"object\"\n", - " },\n", - " \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"\n", - " },\n", - " \"py/object\": \"pandas.core.frame.DataFrame\",\n", - " \"txt\": true,\n", - " \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\"\n", - "}\n", - "# obj2=\n", - " Product Price\n", - "hello \n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200\n", - "class=\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ProductPrice
hello
0Desktop Computer700
1Tablet250
2iPhone800
3Laptop1200
\n", - "
" - ], - "text/plain": [ - " Product Price\n", - "hello \n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hplayba.round_trip_convert(df, logging.INFO)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-07T22:32:12.623139Z", - "start_time": "2020-06-07T22:32:12.577435Z" - } - }, - "outputs": [], - "source": [ - "hplayba.round_trip_convert(\"hello\", logging.INFO)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:45:27.442281Z", - "start_time": "2020-06-09T19:45:27.380299Z" - } - }, - "outputs": [], - "source": [ - "def F(a, b):\n", - " return a + b" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:45:36.907940Z", - "start_time": "2020-06-09T19:45:36.861549Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "# Initialize values for unit test.\n", - "dummy_0 = r\"3\"\n", - "dummy_0 = jsonpickle.decode(dummy_0)\n", - "dummy_1 = r\"2\"\n", - "dummy_1 = jsonpickle.decode(dummy_1)\n", - "# Call function.\n", - "act = F(dummy_0, dummy_1)\n", - "# Create expected value of function output.\n", - "exp = r\"5\"\n", - "exp = jsonpickle.decode(exp)\n", - "# Check.\n", - "assert act == exp\n", - "\n", - "\n", - "# #############################################################################\n", - "# Playback\n", - "# #############################################################################" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:31:00.704146Z", - "start_time": "2020-05-29T18:31:00.695276Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "class Playback:\n", - " # def __init__(self, file_name, mode, *args, **kwargs):\n", - " # self.args = args\n", - " # self.kwargs = kwargs\n", - " def __init__(self, file_name, mode, func_name, a, b):\n", - " self.a = a\n", - " self.b = b\n", - "\n", - " def start(self):\n", - " self.a_json = jsonpickle.encode(self.a)\n", - " self.b_json = jsonpickle.encode(self.b)\n", - "\n", - " def end(self, ret):\n", - " self.ret_json = jsonpickle.encode(ret)\n", - " output = []\n", - " output.append(\"# Initialize values for unit test.\")\n", - " output.append(\"a = %s\" % jsonpickle.decode(self.a_json))\n", - " output.append(\"b = %s\" % jsonpickle.decode(self.b_json))\n", - " output.append(\"# Apply values.\")\n", - " output.append(\"act = F(a, b)\")\n", - " output.append(\"exp = %s\" % jsonpickle.decode(self.ret_json))\n", - " # output.append(\"self.assertEqual(act, exp)\")\n", - " # output.append(\"assert act == exp\")\n", - " output = \"\\n\".join(output)\n", - " print(\"output=\", output)\n", - "\n", - "\n", - "# def F(a: int, b: int):\n", - "# c = {}\n", - "# c[\"pavel\"] = a + b\n", - "# return c\n", - "\n", - "\n", - "def F(a: int, b: int):\n", - " playback = Playback(\"\", \"\", \"F\", a, b)\n", - " playback.start()\n", - " c = {}\n", - " c[\"pavel\"] = a + b\n", - " playback.end(c)\n", - " return c\n", - "\n", - "\n", - "res = F(3, 4)\n", - "print(res)\n", - "\n", - "\n", - "# #############################################################################\n", - "# Playback\n", - "# #############################################################################" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:55:14.350318Z", - "start_time": "2020-05-29T18:55:14.319820Z" - } - }, - "outputs": [], - "source": [ - "class Playback: # noqa: F811\n", - " # def __init__(self, file_name, mode, *args, **kwargs):\n", - " # self.args = args\n", - " # self.kwargs = kwargs\n", - " def __init__(self, file_name, mode, func_name, a, b):\n", - " self.a = a\n", - " self.b = b\n", - "\n", - " def start(self):\n", - " self.a_json = jsonpickle.encode(self.a)\n", - " self.b_json = jsonpickle.encode(self.b)\n", - "\n", - " def end(self, ret):\n", - " self.ret_json = jsonpickle.encode(ret)\n", - " output = []\n", - " output.append(\"# Initialize values for unit test.\")\n", - " # output.append(\"a = %s\" % jsonpickle.decode(self.a_json))\n", - " # output.append(\"b = %s\" % jsonpickle.decode(self.b_json))\n", - " output.append(f\"a = r'{self.a_json}'\")\n", - " output.append(\"a = jsonpickle.decode(a)\")\n", - " output.append(f\"b = r'{self.b_json}'\")\n", - " output.append(\"b = jsonpickle.decode(b)\")\n", - " output.append(\"# Apply values.\")\n", - " # output.append(\"act = F(a, b)[1]\")\n", - " output.append(\"act = F(a, b)\")\n", - " output.append(f\"exp = r'{self.ret_json}'\")\n", - " output.append(\"exp = jsonpickle.decode(exp)\")\n", - " # output.append(\"self.assertEqual(act, exp)\")\n", - " output.append(\"assert act.equals(exp)\")\n", - " # output.append(\"assert act == exp\")\n", - " output = \"\\n\".join(output)\n", - " return output\n", - "\n", - "\n", - "# def F(a: int, b: int):\n", - "# c = {}\n", - "# c[\"pavel\"] = a + b\n", - "# return c\n", - "\n", - "use_playback = True\n", - "\n", - "\n", - "def F(a: pd.DataFrame, b: pd.DataFrame):\n", - " if use_playback:\n", - " playback = Playback(\"\", \"\", \"F\", a, b)\n", - " playback.start()\n", - " # c = {}\n", - " # c[\"pavel\"] = a + b\n", - " c = a + b\n", - " if use_playback:\n", - " output = playback.end(c)\n", - " res = output, c\n", - " else:\n", - " res = c\n", - " return res\n", - "\n", - "\n", - "a = pd.DataFrame({\"Price\": [700, 250, 800, 1200]})\n", - "b = pd.DataFrame({\"Price\": [1, 1, 1, 1]})\n", - "\n", - "res = F(a, b)\n", - "output = res[0]\n", - "print(output)\n", - "exec(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:51:02.968918Z", - "start_time": "2020-05-29T18:51:02.964513Z" - } - }, - "outputs": [], - "source": [ - "# Initialize values for unit test.\n", - "a = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}'\n", - "a = jsonpickle.decode(a)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:44:10.790106Z", - "start_time": "2020-05-29T18:44:10.779459Z" - } - }, - "outputs": [], - "source": [ - "a = pd.DataFrame({\"Price\": [700, 250, 800, 1200]})\n", - "\n", - "# round_trip(a)\n", - "frozen = jsonpickle.encode(a)\n", - "print(frozen)\n", - "print(f\"frozen2 = '{frozen}'\")\n", - "# print(\"frozen = '%s'\" % frozen)\n", - "assert 0\n", - "#\n", - "print(\"frozen=\")\n", - "print(json_pretty_print(frozen)) # noqa: F821\n", - "#\n", - "obj2 = jsonpickle.decode(frozen)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:49:44.390404Z", - "start_time": "2020-05-29T18:49:44.384524Z" - } - }, - "outputs": [], - "source": [ - "frozen2 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}'\n", - "print(frozen2)\n", - "# print(\"\\n\")\n", - "# print(frozen)\n", - "if False and isinstance(frozen2, str):\n", - " # print(frozen2[61])\n", - " # assert 0\n", - " frozen2 = json.loads(frozen2)\n", - " print(frozen2)\n", - "frozen2 = jsonpickle.decode(frozen2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:40:34.682031Z", - "start_time": "2020-05-29T18:40:34.668987Z" - } - }, - "outputs": [], - "source": [ - "a = \"\"\"{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}\"\"\"\n", - "a = jsonpickle.decode(a)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:39:18.728676Z", - "start_time": "2020-05-29T18:39:18.711958Z" - } - }, - "outputs": [], - "source": [ - "# Initialize values for unit test.\n", - "a = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", - "a = jsonpickle.decode(a)\n", - "b = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n1\\n1\\n1\\n1\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", - "b = jsonpickle.decode(b)\n", - "# Apply values.\n", - "act = F(a, b)\n", - "exp = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n701\\n251\\n801\\n1201\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", - "exp = jsonpickle.decode(exp)\n", - "assert act == exp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:27:30.781670Z", - "start_time": "2020-05-29T18:27:30.777539Z" - } - }, - "outputs": [], - "source": [ - "# Initialize values for unit test.\n", - "a = 3\n", - "b = 4\n", - "# Apply values.\n", - "act = F(a, b)\n", - "exp = {\"pavel\": 7}\n", - "assert act == exp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:10:03.802405Z", - "start_time": "2020-05-29T18:10:03.790642Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "df2 = round_trip(df) # noqa: F821\n", - "\n", - "\n", - "# #############################################################################\n", - "# Thing\n", - "# #############################################################################" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T21:48:14.394447Z", - "start_time": "2020-05-11T21:48:14.384307Z" - } - }, - "outputs": [], - "source": [ - "class Thing:\n", - " def __init__(self, name):\n", - " self.name = name\n", - "\n", - "\n", - "obj = Thing(\"Awesome\")\n", - "\n", - "round_trip(obj) # noqa: F821" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T19:58:38.314059Z", - "start_time": "2020-05-11T19:58:38.309331Z" - } - }, - "outputs": [], - "source": [ - "def test(a: int, b: int):\n", - " print(round_trip(a)) # noqa: F821\n", - "\n", - "\n", - "test(\"strunz\", 6)\n", - "test(4, 6)\n", - "test([\"hello\"], 6)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T14:03:41.315868Z", - "start_time": "2020-05-11T14:03:41.311264Z" - } - }, - "outputs": [], - "source": [ - "df.index.dtype #" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T14:03:00.632566Z", - "start_time": "2020-05-11T14:03:00.623714Z" - } - }, - "outputs": [], - "source": [ - "df.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:16:34.748252Z", - "start_time": "2020-05-29T18:16:34.736249Z" - } - }, - "outputs": [], - "source": [ - "# import io\n", - "# import io.StringIO\n", - "# from io import StringIO\n", - "\n", - "# output = StringIO.StringIO()\n", - "\n", - "orient = \"columns\"\n", - "# orient = \"split\"\n", - "# orient = \"records\"\n", - "# orient = \"table\"\n", - "df_as_str = df.to_json(orient=orient)\n", - "\n", - "# split\n", - "# records\n", - "# index\n", - "# values\n", - "# table\n", - "# columns (the default format)\n", - "\n", - "python_code = []\n", - "target_var = \"df_as_str\"\n", - "python_code.append(f\"{target_var} = {df_as_str}\")\n", - "python_code.append(f\"{target_var}.index.name = '{df.index.name}'\")\n", - "python_code = \"\\n\".join(python_code)\n", - "print(python_code)\n", - "\n", - "exec(python_code)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T13:42:52.575973Z", - "start_time": "2020-05-11T13:42:52.568178Z" - } - }, - "outputs": [], - "source": [ - "arr = eval(df_as_str)\n", - "df2 = pd.DataFrame.from_dict(arr, orient=\"columns\")\n", - "df2.index.name" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:.conda-develop] *", - "language": "python", - "name": "conda-env-.conda-develop-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "165px" - }, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py deleted file mode 100644 index 22176ce52..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py +++ /dev/null @@ -1,374 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python [conda env:.conda-develop] * -# language: python -# name: conda-env-.conda-develop-py -# --- - -# %% [markdown] -# # Description - -# %% [markdown] -# # Imports - -# %% -# %load_ext autoreload -# %autoreload 2 -# %matplotlib inline - -import json -import logging - -import jsonpickle -import jsonpickle.ext.pandas as jsonpickle_pandas - -jsonpickle_pandas.register_handlers() - -import pandas as pd # noqa: E402 - -import helpers.hdbg as hdbg # noqa: E402 -import helpers.henv as henv # noqa: E402 -import helpers.hplayback as hplayba # noqa: E402 - -# %% -hdbg.init_logger(verbosity=logging.INFO) - -_LOG = logging.getLogger(__name__) - -_LOG.info("%s", henv.get_system_signature()[0]) - -hnotebook.config_notebook() - -# %% -data = { - "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], - "Price": [700, 250, 800, 1200], -} - -df = pd.DataFrame(data, columns=["Product", "Price"]) -df.index.name = "hello" -print(df) - -# %% -# df.to_json(orient="") -df.to_dict(orient="series") - -# %% -hplayba.to_python_code(df) - -# %% -pd.DataFrame.from_dict( - { - "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], - "Price": [700, 250, 800, 1200], - } -) - -# %% -use_playback = True - - -def F(a, b): - if use_playback: - playback = Playback("assert_equal", "F", a, b) - playback.start() - c = a + b - if use_playback: - output = playback.end(c) - res = output - else: - res = c - return res - - -a = df -b = df -print(F(a, b)) - -# %% -hplayba.to_python_code(["3", 3]) - -# %% -hplayba.round_trip_convert(df, logging.INFO) - -# %% -hplayba.round_trip_convert("hello", logging.INFO) - - -# %% -def F(a, b): - return a + b - - -# %% -# Initialize values for unit test. -dummy_0 = r"3" -dummy_0 = jsonpickle.decode(dummy_0) -dummy_1 = r"2" -dummy_1 = jsonpickle.decode(dummy_1) -# Call function. -act = F(dummy_0, dummy_1) -# Create expected value of function output. -exp = r"5" -exp = jsonpickle.decode(exp) -# Check. -assert act == exp - - -# ############################################################################# -# Playback -# ############################################################################# - - -# %% -class Playback: - # def __init__(self, file_name, mode, *args, **kwargs): - # self.args = args - # self.kwargs = kwargs - def __init__(self, file_name, mode, func_name, a, b): - self.a = a - self.b = b - - def start(self): - self.a_json = jsonpickle.encode(self.a) - self.b_json = jsonpickle.encode(self.b) - - def end(self, ret): - self.ret_json = jsonpickle.encode(ret) - output = [] - output.append("# Initialize values for unit test.") - output.append("a = %s" % jsonpickle.decode(self.a_json)) - output.append("b = %s" % jsonpickle.decode(self.b_json)) - output.append("# Apply values.") - output.append("act = F(a, b)") - output.append("exp = %s" % jsonpickle.decode(self.ret_json)) - # output.append("self.assertEqual(act, exp)") - # output.append("assert act == exp") - output = "\n".join(output) - print("output=", output) - - -# def F(a: int, b: int): -# c = {} -# c["pavel"] = a + b -# return c - - -def F(a: int, b: int): - playback = Playback("", "", "F", a, b) - playback.start() - c = {} - c["pavel"] = a + b - playback.end(c) - return c - - -res = F(3, 4) -print(res) - - -# ############################################################################# -# Playback -# ############################################################################# - - -# %% -class Playback: # noqa: F811 - # def __init__(self, file_name, mode, *args, **kwargs): - # self.args = args - # self.kwargs = kwargs - def __init__(self, file_name, mode, func_name, a, b): - self.a = a - self.b = b - - def start(self): - self.a_json = jsonpickle.encode(self.a) - self.b_json = jsonpickle.encode(self.b) - - def end(self, ret): - self.ret_json = jsonpickle.encode(ret) - output = [] - output.append("# Initialize values for unit test.") - # output.append("a = %s" % jsonpickle.decode(self.a_json)) - # output.append("b = %s" % jsonpickle.decode(self.b_json)) - output.append(f"a = r'{self.a_json}'") - output.append("a = jsonpickle.decode(a)") - output.append(f"b = r'{self.b_json}'") - output.append("b = jsonpickle.decode(b)") - output.append("# Apply values.") - # output.append("act = F(a, b)[1]") - output.append("act = F(a, b)") - output.append(f"exp = r'{self.ret_json}'") - output.append("exp = jsonpickle.decode(exp)") - # output.append("self.assertEqual(act, exp)") - output.append("assert act.equals(exp)") - # output.append("assert act == exp") - output = "\n".join(output) - return output - - -# def F(a: int, b: int): -# c = {} -# c["pavel"] = a + b -# return c - -use_playback = True - - -def F(a: pd.DataFrame, b: pd.DataFrame): - if use_playback: - playback = Playback("", "", "F", a, b) - playback.start() - # c = {} - # c["pavel"] = a + b - c = a + b - if use_playback: - output = playback.end(c) - res = output, c - else: - res = c - return res - - -a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) -b = pd.DataFrame({"Price": [1, 1, 1, 1]}) - -res = F(a, b) -output = res[0] -print(output) -exec(output) - -# %% -# Initialize values for unit test. -a = r'{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}' -a = jsonpickle.decode(a) - -# %% -a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) - -# round_trip(a) -frozen = jsonpickle.encode(a) -print(frozen) -print(f"frozen2 = '{frozen}'") -# print("frozen = '%s'" % frozen) -assert 0 -# -print("frozen=") -print(json_pretty_print(frozen)) # noqa: F821 -# -obj2 = jsonpickle.decode(frozen) - -# %% -frozen2 = r'{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}' -print(frozen2) -# print("\n") -# print(frozen) -if False and isinstance(frozen2, str): - # print(frozen2[61]) - # assert 0 - frozen2 = json.loads(frozen2) - print(frozen2) -frozen2 = jsonpickle.decode(frozen2) - -# %% -a = """{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}""" -a = jsonpickle.decode(a) - -# %% -# Initialize values for unit test. -a = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' -a = jsonpickle.decode(a) -b = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n1\n1\n1\n1\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' -b = jsonpickle.decode(b) -# Apply values. -act = F(a, b) -exp = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n701\n251\n801\n1201\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' -exp = jsonpickle.decode(exp) -assert act == exp - -# %% -# Initialize values for unit test. -a = 3 -b = 4 -# Apply values. -act = F(a, b) -exp = {"pavel": 7} -assert act == exp - -# %% -df2 = round_trip(df) # noqa: F821 - - -# ############################################################################# -# Thing -# ############################################################################# - - -# %% -class Thing: - def __init__(self, name): - self.name = name - - -obj = Thing("Awesome") - -round_trip(obj) # noqa: F821 - - -# %% -def test(a: int, b: int): - print(round_trip(a)) # noqa: F821 - - -test("strunz", 6) -test(4, 6) -test(["hello"], 6) - -# %% -df.index.dtype # - -# %% -df.dtypes - -# %% -# import io -# import io.StringIO -# from io import StringIO - -# output = StringIO.StringIO() - -orient = "columns" -# orient = "split" -# orient = "records" -# orient = "table" -df_as_str = df.to_json(orient=orient) - -# split -# records -# index -# values -# table -# columns (the default format) - -python_code = [] -target_var = "df_as_str" -python_code.append(f"{target_var} = {df_as_str}") -python_code.append(f"{target_var}.index.name = '{df.index.name}'") -python_code = "\n".join(python_code) -print(python_code) - -exec(python_code) - -# %% -arr = eval(df_as_str) -df2 = pd.DataFrame.from_dict(arr, orient="columns") -df2.index.name - -# %% diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb deleted file mode 100644 index 4516033f2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb +++ /dev/null @@ -1,1774 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "895cb286", - "metadata": {}, - "source": [ - "Show Parquet / Pyarrow API." - ] - }, - { - "cell_type": "markdown", - "id": "b068d525", - "metadata": {}, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "id": "8f46ec68", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:53:22.684558Z", - "start_time": "2021-06-16T20:53:22.645267Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0m\u001b[33mWARNING\u001b[0m: Logger already initialized: skipping\n" - ] - } - ], - "source": [ - "import logging\n", - "import os\n", - "import random\n", - "\n", - "import pandas as pd\n", - "import pyarrow as pa\n", - "import pyarrow.dataset as ds\n", - "import pyarrow.parquet as pq\n", - "from pyarrow.dataset import DirectoryPartitioning\n", - "\n", - "import helpers.hdbg as hdbg\n", - "import helpers.hio as hio\n", - "\n", - "hdbg.init_logger(verbosity=logging.INFO)\n", - "_LOG = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "215ff89e", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:19:03.323062Z", - "start_time": "2021-06-15T11:19:03.303632Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " idx instr val1 val2\n", - "2000-01-01 0 A 99 30\n", - "2000-01-02 0 A 54 46\n", - "2000-01-03 0 A 85 86\n", - "2000-01-04 0 A 97 62\n", - "2000-01-05 0 A 12 25\n" - ] - } - ], - "source": [ - "def get_df() -> pd.DataFrame:\n", - " \"\"\"\n", - " Create pandas random data, like:\n", - "\n", - " ```\n", - " idx instr val1 val2\n", - " 2000-01-01 0 A 99 30\n", - " 2000-01-02 0 A 54 46\n", - " 2000-01-03 0 A 85 86\n", - " ```\n", - " \"\"\"\n", - " instruments = \"A B C D E\".split()\n", - " \"id stock val1 val2\".split()\n", - " df_idx = pd.date_range(\n", - " pd.Timestamp(\"2000-01-01\"), pd.Timestamp(\"2000-01-15\"), freq=\"1D\"\n", - " )\n", - " # print(df_idx)\n", - " random.seed(1000)\n", - "\n", - " df = []\n", - " for idx, inst in enumerate(instruments):\n", - " df_tmp = pd.DataFrame(\n", - " {\n", - " \"idx\": idx,\n", - " \"instr\": inst,\n", - " \"val1\": [random.randint(0, 100) for k in range(len(df_idx))],\n", - " \"val2\": [random.randint(0, 100) for k in range(len(df_idx))],\n", - " },\n", - " index=df_idx,\n", - " )\n", - " # print(df_tmp)\n", - " df.append(df_tmp)\n", - " df = pd.concat(df)\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "8e8235d0", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:35:16.903580Z", - "start_time": "2021-06-15T11:35:16.895316Z" - } - }, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "EOL while scanning string literal (, line 4)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m4\u001b[0m\n\u001b[0;31m txt += \"# df=\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m EOL while scanning string literal\n" - ] - } - ], - "source": [ - "def df_to_str(df: pd.DataFrame) -> str:\n", - " txt = \"\"\n", - " txt += \"# df=\\n%s\" % df.head(3)\n", - " txt += \"\\n# df.shape=\\n%s\" % str(df.shape)\n", - " txt += \"\\n# df.dtypes=\\n%s\" % str(df.dtypes)\n", - " return txt" - ] - }, - { - "cell_type": "markdown", - "id": "17cc474b", - "metadata": {}, - "source": [ - "# Save and load all data in one file" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "cb399156", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:25:27.514505Z", - "start_time": "2021-06-15T11:25:27.496811Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " idx instr val1 val2\n", - "2000-01-01 0 A 99 30\n", - "2000-01-02 0 A 54 46\n", - "2000-01-03 0 A 85 86\n", - "# df.shape=\n", - "(75, 4)\n", - "# df.dtypes=\n", - "idx int64\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "df = get_df()\n", - "# print(df.head())\n", - "print(df_to_str(df))" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "940dc7d2", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:25:34.893472Z", - "start_time": "2021-06-15T11:25:34.886977Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "table=\n", - "pyarrow.Table\n", - "idx: int64\n", - "instr: string\n", - "val1: int64\n", - "val2: int64\n", - "__index_level_0__: timestamp[ns]\n" - ] - } - ], - "source": [ - "table = pa.Table.from_pandas(df)\n", - "\n", - "print(\"table=\\n%s\" % table)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "93df67fc", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:25:38.560269Z", - "start_time": "2021-06-15T11:25:38.533905Z" - } - }, - "outputs": [], - "source": [ - "# Save.\n", - "file_name = \"df_in_one_file.pq\"\n", - "pq.write_table(table, file_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "155e36c0", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:25:51.016044Z", - "start_time": "2021-06-15T11:25:51.001034Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pyarrow.Table\n", - "idx: int64\n", - "instr: string\n", - "val1: int64\n", - "val2: int64\n", - "__index_level_0__: timestamp[us]\n", - "# df=\n", - " idx instr val1 val2\n", - "2000-01-01 0 A 99 30\n", - "2000-01-02 0 A 54 46\n", - "2000-01-03 0 A 85 86\n", - "# df.shape=\n", - "(75, 4)\n", - "# df.dtypes=\n", - "idx int64\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Load.\n", - "df2 = pq.read_table(file_name)\n", - "print(df2)\n", - "\n", - "df2 = df2.to_pandas()\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "markdown", - "id": "1098757c", - "metadata": {}, - "source": [ - "## Read a subset of columns" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "6f4a652f", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:27:11.924350Z", - "start_time": "2021-06-15T11:27:11.910680Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pyarrow.Table\n", - "idx: int64\n", - "val1: int64\n", - "# df=\n", - " idx val1\n", - "0 0 99\n", - "1 0 54\n", - "2 0 85\n", - "# df.shape=\n", - "(75, 2)\n", - "# df.dtypes=\n", - "idx int64\n", - "val1 int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "df2 = pq.read_table(file_name, columns=[\"idx\", \"val1\"])\n", - "print(df2)\n", - "\n", - "df2 = df2.to_pandas()\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "markdown", - "id": "012cebdb", - "metadata": {}, - "source": [ - "## Partitioned dataset\n", - "\n", - "from https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data\n", - "\n", - "- A dataset can exploit a nested structure, where the sub-dir names hold information about which subset of the data is stored in that dir\n", - "- E.g., \"Hive\" patitioning scheme \"key=vale\" dir names" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "ca26642e", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:30:11.964993Z", - "start_time": "2021-06-15T11:30:11.947282Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " idx instr val1 val2\n", - "2000-01-01 0 A 99 30\n", - "2000-01-02 0 A 54 46\n", - "2000-01-03 0 A 85 86\n", - "# df.shape=\n", - "(75, 4)\n", - "# df.dtypes=\n", - "idx int64\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "df = get_df()\n", - "print(df_to_str(df))" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "7cae349f", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:30:27.617064Z", - "start_time": "2021-06-15T11:30:27.541418Z" - } - }, - "outputs": [], - "source": [ - "base = \".\"\n", - "dir_name = os.path.join(base, \"parquet_dataset_partitioned\")\n", - "os.system(\"rm -rf %s\" % dir_name)\n", - "\n", - "pq.write_to_dataset(table, dir_name, partition_cols=[\"idx\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "fd57116d", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:30:30.672054Z", - "start_time": "2021-06-15T11:30:30.389512Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" - ] - } - ], - "source": [ - "!ls parquet_dataset_partitioned" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "ac82b5ad", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:31:29.322947Z", - "start_time": "2021-06-15T11:31:29.298883Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "./parquet_dataset_partitioned/idx=0/cab9de6eff0c47bcb688a1ce437c7f89.parquet\n", - "./parquet_dataset_partitioned/idx=1/56813e569097420cae892720d3bb0789.parquet\n", - "./parquet_dataset_partitioned/idx=2/5c9a17d2e1294dd58c7d8695868c2cb5.parquet\n", - "./parquet_dataset_partitioned/idx=3/b28576eb22d54999980a313a24511497.parquet\n", - "./parquet_dataset_partitioned/idx=4/8ee3f0d7585b48959a560c954562add8.parquet\n" - ] - } - ], - "source": [ - "# Read data back.\n", - "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "print(\"\\n\".join(dataset.files))" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "64394b7f", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:32:01.839074Z", - "start_time": "2021-06-15T11:32:01.822727Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " instr val1 val2 idx\n", - "2000-01-01 A 99 30 0\n", - "2000-01-02 A 54 46 0\n", - "2000-01-03 A 85 86 0\n", - "# df.shape=\n", - "(75, 4)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Read everything.\n", - "df2 = dataset.to_table().to_pandas()\n", - "\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "df96e1db", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:33:05.171630Z", - "start_time": "2021-06-15T11:33:05.147040Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " instr val1 val2 idx\n", - "2000-01-01 B 18 22 1\n", - "2000-01-02 B 59 89 1\n", - "2000-01-03 B 91 90 1\n", - "# df.shape=\n", - "(15, 4)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "dtype: object\n", - "# df=\n", - " instr val1 val2 idx\n", - "2000-01-01 A 99 30 0\n", - "2000-01-02 A 54 46 0\n", - "2000-01-03 A 85 86 0\n", - "# df.shape=\n", - "(45, 4)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Load part of the data.\n", - "\n", - "df2 = dataset.to_table(filter=ds.field(\"idx\") == 1).to_pandas()\n", - "print(df_to_str(df2))\n", - "\n", - "df2 = dataset.to_table(filter=ds.field(\"idx\") < 3).to_pandas()\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "markdown", - "id": "b3c27848", - "metadata": {}, - "source": [ - "## Add year-month partitions" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "69d2ea15", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:36:11.106142Z", - "start_time": "2021-06-15T11:36:11.087701Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " idx instr val1 val2 year month\n", - "2000-01-01 0 A 99 30 2000 1\n", - "2000-01-02 0 A 54 46 2000 1\n", - "2000-01-03 0 A 85 86 2000 1\n", - "# df.shape=\n", - "(75, 6)\n", - "# df.dtypes=\n", - "idx int64\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "year int64\n", - "month int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "df = get_df()\n", - "df[\"year\"] = df.index.year\n", - "df[\"month\"] = df.index.month\n", - "\n", - "print(df_to_str(df))" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "1a2f8c3a", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:01.738085Z", - "start_time": "2021-06-15T11:37:01.730748Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "table=\n", - "pyarrow.Table\n", - "idx: int64\n", - "instr: string\n", - "val1: int64\n", - "val2: int64\n", - "year: int64\n", - "month: int64\n", - "__index_level_0__: timestamp[ns]\n" - ] - } - ], - "source": [ - "table = pa.Table.from_pandas(df)\n", - "\n", - "print(\"table=\\n%s\" % table)" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "9112ed65", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:04.832037Z", - "start_time": "2021-06-15T11:37:04.702121Z" - } - }, - "outputs": [], - "source": [ - "base = \".\"\n", - "dir_name = os.path.join(base, \"pq_partitioned2\")\n", - "os.system(\"rm -rf %s\" % dir_name)\n", - "\n", - "pq.write_to_dataset(table, dir_name, partition_cols=[\"idx\", \"year\", \"month\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "844913cc", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:17.553902Z", - "start_time": "2021-06-15T11:37:17.276875Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" - ] - } - ], - "source": [ - "!ls $dir_name" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "e5ba8be3", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:07.695235Z", - "start_time": "2021-06-15T11:37:07.433612Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bc6b2314c7f640a38c62029280f6f65e.parquet\r\n" - ] - } - ], - "source": [ - "!ls $dir_name/idx=0/year=2000/month=1" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "2d93f116", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:26.153218Z", - "start_time": "2021-06-15T11:37:26.109040Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "./pq_partitioned2/idx=0/year=2000/month=1/bc6b2314c7f640a38c62029280f6f65e.parquet\n", - "./pq_partitioned2/idx=1/year=2000/month=1/bb178ff0bdd344ca8328f9d67398b322.parquet\n", - "./pq_partitioned2/idx=2/year=2000/month=1/16081eea25fd4da6bd802037b541766c.parquet\n", - "./pq_partitioned2/idx=3/year=2000/month=1/1557b3c461054eadba16e3072fbd3a8a.parquet\n", - "./pq_partitioned2/idx=4/year=2000/month=1/07a0c7fcf054450296b35452b57236ef.parquet\n" - ] - } - ], - "source": [ - "# Read data back.\n", - "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "print(\"\\n\".join(dataset.files))" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "21148afd", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:39:19.396955Z", - "start_time": "2021-06-15T11:39:19.374534Z" - }, - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " instr val1 val2 idx year month\n", - "2000-01-01 C 99 37 2 2000 1\n", - "2000-01-02 C 98 48 2 2000 1\n", - "2000-01-03 C 70 58 2 2000 1\n", - "# df.shape=\n", - "(15, 6)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "year int32\n", - "month int32\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Read data back.\n", - "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "df2 = dataset.to_table(filter=ds.field(\"idx\") == 2).to_pandas()\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "id": "d9e4e596", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:54:34.478646Z", - "start_time": "2021-06-16T20:54:34.250254Z" - }, - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "idx: int64\n", - "instr: string\n", - "val1: int64\n", - "val2: int64\n", - "year: int64\n", - "month: int64\n", - "__index_level_0__: timestamp[ns]\n", - "-- schema metadata --\n", - "pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975\n" - ] - } - ], - "source": [ - "# We could scan manually and create the dirs manually if we don't want to add\n", - "# add a new dir.\n", - "base = \".\"\n", - "dir_name = os.path.join(base, \"parquet_dataset_partitioned2\")\n", - "os.system(\"rm -rf %s\" % dir_name)\n", - "\n", - "schemas = []\n", - "\n", - "schema = pa.Table.from_pandas(df).schema\n", - "print(schema)\n", - "# assert 0\n", - "# idx: int64\n", - "# instr: string\n", - "# val1: int64\n", - "# val2: int64\n", - "# year: int64\n", - "# month: int64\n", - "\n", - "# grouped = df.groupby(lambda x: x.day)\n", - "group_by_idx = df.groupby(\"idx\")\n", - "for idx, df_tmp in group_by_idx:\n", - " _LOG.debug(\"idx=%s -> df.shape=%s\", idx, str(df_tmp.shape))\n", - " #\n", - " group_by_year = df_tmp.groupby(lambda x: x.year)\n", - " for year, df_tmp2 in group_by_year:\n", - " _LOG.debug(\"year=%s -> df.shape=%s\", year, str(df_tmp2.shape))\n", - " #\n", - " group_by_month = df_tmp2.groupby(lambda x: x.month)\n", - " for month, df_tmp3 in group_by_month:\n", - " _LOG.debug(\"month=%s -> df.shape=%s\", month, str(df_tmp3.shape))\n", - " # file_name = \"df_in_one_file.pq\"\n", - " # pq.write_table(table, file_name)\n", - " # /app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet\n", - " subdir_name = os.path.join(\n", - " dir_name, f\"idx={idx}\", f\"year={year}\", f\"month={month}\"\n", - " )\n", - " table = pa.Table.from_pandas(df_tmp3, schema=schema)\n", - " schemas.append(table.schema)\n", - " # print(df_tmp3)\n", - " # print(table.schema)\n", - " # pq.write_to_dataset(table,\n", - " # subdir_name, schema=schema)\n", - " file_name = os.path.join(subdir_name, \"df_out.pq\")\n", - " hio.create_enclosing_dir(file_name)\n", - " pq.write_table(table, file_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "id": "8309de4a", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:41:14.320037Z", - "start_time": "2021-06-16T20:41:14.314354Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "schemas[0] == schemas[4]" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "id": "f0e49f46", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:42:26.864001Z", - "start_time": "2021-06-16T20:42:26.856395Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", - " idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", - " idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", - " idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", - " idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975]" - ] - }, - "execution_count": 104, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "schemas" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1130cbc2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 124, - "id": "e5bdcdd8", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:53:50.373825Z", - "start_time": "2021-06-16T20:53:50.099251Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df_out.pq\r\n" - ] - } - ], - "source": [ - "!ls $dir_name/idx=0/year=2000/month=1" - ] - }, - { - "cell_type": "code", - "execution_count": 130, - "id": "aaf67ae6", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:55:02.764098Z", - "start_time": "2021-06-16T20:55:02.717192Z" - } - }, - "outputs": [ - { - "ename": "ArrowInvalid", - "evalue": "Unable to merge: Field month has incompatible types: int64 vs int32", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mArrowInvalid\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#src_dir = f\"{dir_name}/idx=0/year=2000/month=1\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0msrc_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"{dir_name}/idx=0/year=2000\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m dataset = ds.dataset(src_dir,\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"parquet\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m partitioning=\"hive\")\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/dataset.py\u001b[0m in \u001b[0;36mdataset\u001b[0;34m(source, schema, format, filesystem, partitioning, partition_base_dir, exclude_invalid_files, ignore_prefixes)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 656\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_filesystem_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 657\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 658\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_is_path_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msource\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/dataset.py\u001b[0m in \u001b[0;36m_filesystem_dataset\u001b[0;34m(source, schema, filesystem, partitioning, format, partition_base_dir, exclude_invalid_files, selector_ignore_prefixes)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mfactory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mFileSystemDatasetFactory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpaths_or_selector\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfactory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfinish\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/_dataset.pyx\u001b[0m in \u001b[0;36mpyarrow._dataset.DatasetFactory.finish\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.pyarrow_internal_check_status\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.check_status\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mArrowInvalid\u001b[0m: Unable to merge: Field month has incompatible types: int64 vs int32" - ] - } - ], - "source": [ - "# Read data back.\n", - "# https://github.com/dask/dask/issues/4194\n", - "# src_dir = f\"{dir_name}/idx=0/year=2000/month=1\"\n", - "src_dir = f\"{dir_name}/idx=0/year=2000\"\n", - "dataset = ds.dataset(src_dir, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "df2 = dataset.to_table().to_pandas()\n", - "# print(df_to_str(df2))\n", - "print(\"\\n\".join(dataset.files))" - ] - }, - { - "cell_type": "markdown", - "id": "98f4111d", - "metadata": {}, - "source": [ - "## Partition manually" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "id": "f0b33d85", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T00:57:11.260871Z", - "start_time": "2021-06-15T00:57:11.235982Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(((year == 2009) and (month == 11)) and (day == 3))\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Neither field_names nor schema was passed; cannot infer field_names", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpartitioning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/2009/11/3\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mpartitioning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdiscover\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/_dataset.pyx\u001b[0m in \u001b[0;36mpyarrow._dataset.DirectoryPartitioning.discover\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Neither field_names nor schema was passed; cannot infer field_names" - ] - } - ], - "source": [ - "partitioning = DirectoryPartitioning(\n", - " pa.schema([(\"year\", pa.int16()), (\"month\", pa.int8()), (\"day\", pa.int8())])\n", - ")\n", - "print(partitioning.parse(\"/2009/11/3\"))\n", - "\n", - "# partitioning.discover()" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "ad70cbee", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:03:31.809969Z", - "start_time": "2021-06-16T11:03:31.526597Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" - ] - } - ], - "source": [ - "!ls /app/data" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "b19d1189", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:18:31.838549Z", - "start_time": "2021-06-16T11:18:31.821223Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet\n", - "/app/data/idx=0/year=2000/month=1/0435eeb615b14155bdc26266b91a4b1b.parquet\n", - "/app/data/idx=0/year=2000/month=1/05cc8a039ec746acb5668fde0a372028.parquet\n", - "/app/data/idx=0/year=2000/month=1/06039c8c6e9e4d54be7dcbef2bcdfa78.parquet\n", - "/app/data/idx=0/year=2000/month=1/0bb8e349594445a08fca4e337a7922d4.parquet\n", - "/app/data/idx=0/year=2000/month=1/115df7cedef540469cec56ee40ac19bd.parquet\n", - "/app/data/idx=0/year=2000/month=1/1174a70ffe614f4a9875b680e255902b.parquet\n", - "/app/data/idx=0/year=2000/month=1/122bdd75109c4fc7918d6db00f7bed41.parquet\n", - "/app/data/idx=0/year=2000/month=1/1543e41a14234c279fdfaa8656e8a71d.parquet\n", - "/app/data/idx=0/year=2000/month=1/17bd91618d5240fe83309608e91cb1ef.parquet\n", - "/app/data/idx=0/year=2000/month=1/18461c0ee57845768a503cfc865e323b.parquet\n", - "/app/data/idx=0/year=2000/month=1/1994694468184272a388fb8b40f03d5b.parquet\n", - "/app/data/idx=0/year=2000/month=1/1eb5f1adbe57418fa5d866d35902c39a.parquet\n", - "/app/data/idx=0/year=2000/month=1/2bd3c3ae435b489bb194ef7b2a715d9a.parquet\n", - "/app/data/idx=0/year=2000/month=1/2ded0d292def4e1186653d90852295f6.parquet\n", - "/app/data/idx=0/year=2000/month=1/2ff4a2fa54664e67bab85a76324738ed.parquet\n", - "/app/data/idx=0/year=2000/month=1/378e55b8faf24033abf1c275741a88e8.parquet\n", - "/app/data/idx=0/year=2000/month=1/37a96e8834af4a87bc64ec3d1199ad54.parquet\n", - "/app/data/idx=0/year=2000/month=1/3a24331d6e51402d9a86c974f8a3bd05.parquet\n", - "/app/data/idx=0/year=2000/month=1/3ae93c6a21cc4a88bbaf90219f275563.parquet\n", - "/app/data/idx=0/year=2000/month=1/3b5f35d2add64a738cec5061659e35a2.parquet\n", - "/app/data/idx=0/year=2000/month=1/3d16749690f840c49facce0e37461a7e.parquet\n", - "/app/data/idx=0/year=2000/month=1/44bf70580b9a43829addb2a9e8f89dc6.parquet\n", - "/app/data/idx=0/year=2000/month=1/46237f338cee47c69f33b15fcb83817a.parquet\n", - "/app/data/idx=0/year=2000/month=1/46f19733b2d642c29adb58bf9499b6ca.parquet\n", - "/app/data/idx=0/year=2000/month=1/485f7d3d06b3486ca4bc8b35420f997a.parquet\n", - "/app/data/idx=0/year=2000/month=1/4cd8551c6c8f4daab7313732b9c1cea8.parquet\n", - "/app/data/idx=0/year=2000/month=1/4eca6951932d47d8a5678422da4a3d70.parquet\n", - "/app/data/idx=0/year=2000/month=1/512a55d6f91c412f951ba66728bfe118.parquet\n", - "/app/data/idx=0/year=2000/month=1/521193be72e1465ca33034cfd8e93ac9.parquet\n", - "/app/data/idx=0/year=2000/month=1/59aa37cc4792493899e812215b3bb822.parquet\n", - "/app/data/idx=0/year=2000/month=1/5aaa32a61b614d65a91747336c8028f0.parquet\n", - "/app/data/idx=0/year=2000/month=1/5cf9306e97ae42fdae53369710a5d0b6.parquet\n", - "/app/data/idx=0/year=2000/month=1/5fb97e575a9c4ff282293e9810040594.parquet\n", - "/app/data/idx=0/year=2000/month=1/613e6048f8434fccafed8c9d457fddc1.parquet\n", - "/app/data/idx=0/year=2000/month=1/61d608f23a69494eaef248d79a776ede.parquet\n", - "/app/data/idx=0/year=2000/month=1/62bf226aebb641229b33f7e3bf9f5cb1.parquet\n", - "/app/data/idx=0/year=2000/month=1/62c09d56d67d4c738568fed318152ca9.parquet\n", - "/app/data/idx=0/year=2000/month=1/652129318d7a4d5b83e256a94803ecdc.parquet\n", - "/app/data/idx=0/year=2000/month=1/67607b3708e44233927974a861972a99.parquet\n", - "/app/data/idx=0/year=2000/month=1/69915fb955c24302a905e4520a76f547.parquet\n", - "/app/data/idx=0/year=2000/month=1/6cc812431ef44bd195e5baf9715095a6.parquet\n", - "/app/data/idx=0/year=2000/month=1/6ce2493e58b34b53ae42da84ee0ef165.parquet\n", - "/app/data/idx=0/year=2000/month=1/6e88cf1910bc4a71bcea865ed1605363.parquet\n", - "/app/data/idx=0/year=2000/month=1/70c1ba22a85f4b489096f80eacd5855c.parquet\n", - "/app/data/idx=0/year=2000/month=1/7705f37eac7e40ceb2fba4c9fd2cb81d.parquet\n", - "/app/data/idx=0/year=2000/month=1/7bdb4feb6a874697b8c2f9a6cb03a6e6.parquet\n", - "/app/data/idx=0/year=2000/month=1/7fd82496e8274e999d217df302fd46b0.parquet\n", - "/app/data/idx=0/year=2000/month=1/8130570ceae44ca69ce7b2cd9865c3ec.parquet\n", - "/app/data/idx=0/year=2000/month=1/83f8e04fd5ac49ec80ac7b98e8221278.parquet\n", - "/app/data/idx=0/year=2000/month=1/8469e01698bf47f28fda41a3935eeb64.parquet\n", - "/app/data/idx=0/year=2000/month=1/874aa31290804dd0abca1a8f40dc4875.parquet\n", - "/app/data/idx=0/year=2000/month=1/888ef49654f241df8cae8454a5cd3f07.parquet\n", - "/app/data/idx=0/year=2000/month=1/8aa4e41d00fc438c9de0906ecc66bbb9.parquet\n", - "/app/data/idx=0/year=2000/month=1/90e662712235472ebae79fd64eaae094.parquet\n", - "/app/data/idx=0/year=2000/month=1/91e7dcbfc57a495a943bad2400690bc1.parquet\n", - "/app/data/idx=0/year=2000/month=1/9394c04aef64432fb94219d0e8b50286.parquet\n", - "/app/data/idx=0/year=2000/month=1/9740961302bc40b192d20715c52d6ef6.parquet\n", - "/app/data/idx=0/year=2000/month=1/99e92f5585514ed4bd43b5bf50bdaaa8.parquet\n", - "/app/data/idx=0/year=2000/month=1/9bf5c3598f69411fb1acdc30779b25bd.parquet\n", - "/app/data/idx=0/year=2000/month=1/9d81c342203c4396ac2d9efcbb0cae7a.parquet\n", - "/app/data/idx=0/year=2000/month=1/9d8b2486e80f40468cf4ae50a41fda41.parquet\n", - "/app/data/idx=0/year=2000/month=1/a170565f336f4b3b99994c8d83012a4d.parquet\n", - "/app/data/idx=0/year=2000/month=1/a50138dae90f478781bf032908703ef4.parquet\n", - "/app/data/idx=0/year=2000/month=1/a5ab58aa310e47669e9d3604bf94f155.parquet\n", - "/app/data/idx=0/year=2000/month=1/a5bd118e999e4df6ab3306e52671228e.parquet\n", - "/app/data/idx=0/year=2000/month=1/a5c0a7da693147b98f68811b4af7c79e.parquet\n", - "/app/data/idx=0/year=2000/month=1/a84afce396eb4afa91de3b08129e2ab7.parquet\n", - "/app/data/idx=0/year=2000/month=1/a8c1f364a7c944bb89d59d354059e596.parquet\n", - "/app/data/idx=0/year=2000/month=1/aa3bb180eda948c4aab93428ece443a8.parquet\n", - "/app/data/idx=0/year=2000/month=1/aa868fa8e11a4a838c19a1a260dcf6f6.parquet\n", - "/app/data/idx=0/year=2000/month=1/b01aa53c572d492f9667f157455742fc.parquet\n", - "/app/data/idx=0/year=2000/month=1/b6a7fc9dd14a4af6a3635cd138abdfe2.parquet\n", - "/app/data/idx=0/year=2000/month=1/b740e474de9f4b5497877c14f688faed.parquet\n", - "/app/data/idx=0/year=2000/month=1/b81d3d9c4045498c9deb3968b935e422.parquet\n", - "/app/data/idx=0/year=2000/month=1/b8c5a9f58500424785e4c83520931127.parquet\n", - "/app/data/idx=0/year=2000/month=1/b9176233e3934efebb0b12e1a780a3b1.parquet\n", - "/app/data/idx=0/year=2000/month=1/ba3d62351b7745f5a4e18f27159d5820.parquet\n", - "/app/data/idx=0/year=2000/month=1/bb9f583ed63840b39ada7bb0f45b9d57.parquet\n", - "/app/data/idx=0/year=2000/month=1/c55358bb09194e7aad9828678b5eaa61.parquet\n", - "/app/data/idx=0/year=2000/month=1/c5e31c9f04a6491dbf068fa889095e27.parquet\n", - "/app/data/idx=0/year=2000/month=1/c70308ef1a954ccea429f0de60c41fb3.parquet\n", - "/app/data/idx=0/year=2000/month=1/cf1e928b55ba4dd09bfa2765dadffb76.parquet\n", - "/app/data/idx=0/year=2000/month=1/d08715970c714455b7b9fbf18a86e8c0.parquet\n", - "/app/data/idx=0/year=2000/month=1/d27b68dc839f47e2a25814d805b9d759.parquet\n", - "/app/data/idx=0/year=2000/month=1/d46043c1511647a5b3b96450580ce6e1.parquet\n", - "/app/data/idx=0/year=2000/month=1/d592794fbc7f4ed0877d5a350fabf8d4.parquet\n", - "/app/data/idx=0/year=2000/month=1/d8b05ee145d046a1ac321708b68e91de.parquet\n", - "/app/data/idx=0/year=2000/month=1/d9163626e55f40bb88142c43eb4b9fab.parquet\n", - "/app/data/idx=0/year=2000/month=1/dcb0cd8bc9084246955a6090f643a43d.parquet\n", - "/app/data/idx=0/year=2000/month=1/dd0db6d0e040442bb0b950efa6ac6e6a.parquet\n", - "/app/data/idx=0/year=2000/month=1/dd306d9fd65a459fbbf1e32fc9260ae3.parquet\n", - "/app/data/idx=0/year=2000/month=1/e05e535b8969470680658f6c2924bb68.parquet\n", - "/app/data/idx=0/year=2000/month=1/e3aff8e0f7094609b4de8bacac5faa4c.parquet\n", - "/app/data/idx=0/year=2000/month=1/e439d12c5539461da2b12a54d7dbb1c3.parquet\n", - "/app/data/idx=0/year=2000/month=1/e51258868c044644a708c74ff4c2ca46.parquet\n", - "/app/data/idx=0/year=2000/month=1/ea632843bd34467496837fea693443ff.parquet\n", - "/app/data/idx=0/year=2000/month=1/ecf1306aadb04ecdabb50803116eb0fa.parquet\n", - "/app/data/idx=0/year=2000/month=1/ef2355b80a7346afbabd33743d7e69a2.parquet\n", - "/app/data/idx=0/year=2000/month=1/ef7d760f2a2245e08f8c038bdf554edd.parquet\n", - "/app/data/idx=0/year=2000/month=1/f4ca5d31138248eca2beb467548461ed.parquet\n", - "/app/data/idx=0/year=2000/month=1/fba715c8fda84ad88d370f71b2408c12.parquet\n", - "/app/data/idx=0/year=2000/month=1/fe435999dba9476baec1b3009d529d32.parquet\n", - "/app/data/idx=0/year=2000/month=1/fe53414bfef84cb39ca04b48c8e8332c.parquet\n", - "/app/data/idx=0/year=2000/month=1/ff75b3e1006f42c9ba9deb689324ee3e.parquet\n", - "/app/data/idx=1/year=2000/month=1/056b4d30021044298d7fde4cdd296561.parquet\n", - "/app/data/idx=1/year=2000/month=1/0c138f0939f347928f5c2d1c92207d57.parquet\n", - "/app/data/idx=1/year=2000/month=1/0cb27647424c4302b7a1cd47369b4e6d.parquet\n", - "/app/data/idx=1/year=2000/month=1/1064ed9fc62a450890a19bd906d7953a.parquet\n", - "/app/data/idx=1/year=2000/month=1/14f3b6e2235c4a2eabf23840c82059ec.parquet\n", - "/app/data/idx=1/year=2000/month=1/1541e4cf70a048b88c7f8296456b8437.parquet\n", - "/app/data/idx=1/year=2000/month=1/19e5b00a91f64342be20a2faee8ef69c.parquet\n", - "/app/data/idx=1/year=2000/month=1/2512f9bc30c04375bd71f270e1901050.parquet\n", - "/app/data/idx=1/year=2000/month=1/2641066820c74d5fadd5d1a42b40d23f.parquet\n", - "/app/data/idx=1/year=2000/month=1/2b1c634e1ded48a2887abbb539f1ea41.parquet\n", - "/app/data/idx=1/year=2000/month=1/2bc577092b964473943428b8c04f6414.parquet\n", - "/app/data/idx=1/year=2000/month=1/2bc84c76804345c581c00b8e0ad59752.parquet\n", - "/app/data/idx=1/year=2000/month=1/2bd2238465b1416a8870494b579fae42.parquet\n", - "/app/data/idx=1/year=2000/month=1/2d5c13231ffc48aeb76bdb071663ceff.parquet\n", - "/app/data/idx=1/year=2000/month=1/2e48508ad08c4154813996117b6a833a.parquet\n", - "/app/data/idx=1/year=2000/month=1/3ca7d082ede544aab9f1f564acbffc14.parquet\n", - "/app/data/idx=1/year=2000/month=1/3d1f61cf39764307bbf39762d9c38af7.parquet\n", - "/app/data/idx=1/year=2000/month=1/40a2f2b0bd8c49be95aafc319ffd4a69.parquet\n", - "/app/data/idx=1/year=2000/month=1/4201c94937bc44f3809d9bf883b49cd7.parquet\n", - "/app/data/idx=1/year=2000/month=1/422474d1c6934fd298944ef7c9f21bfe.parquet\n", - "/app/data/idx=1/year=2000/month=1/444a6621429443c8b6550c6c04b27a24.parquet\n", - "/app/data/idx=1/year=2000/month=1/4940c21244274606bd6b543df4738ccf.parquet\n", - "/app/data/idx=1/year=2000/month=1/4b87781720884af7ae79d3f59fd69cd3.parquet\n", - "/app/data/idx=1/year=2000/month=1/4dd866c257864005a62854991f666b25.parquet\n", - "/app/data/idx=1/year=2000/month=1/4f06000c93bb45f18edfa84eeb89a1b9.parquet\n", - "/app/data/idx=1/year=2000/month=1/50716e5b2e004ba38d414a101ae09427.parquet\n", - "/app/data/idx=1/year=2000/month=1/50fc4338cf41483091d11a2616eb6221.parquet\n", - "/app/data/idx=1/year=2000/month=1/563109ba1ed647ef9518393a9d1ddb2e.parquet\n", - "/app/data/idx=1/year=2000/month=1/586e3969f1084af2bf28cee6f721cdc6.parquet\n", - "/app/data/idx=1/year=2000/month=1/5a1ba9682db3414ea33666e64d055535.parquet\n", - "/app/data/idx=1/year=2000/month=1/5e2241ecbf364a0784626be86e38d6eb.parquet\n", - "/app/data/idx=1/year=2000/month=1/6300ef1b3beb44f0937dc8f890e845ce.parquet\n", - "/app/data/idx=1/year=2000/month=1/64aeabc396ba42ada56c695a32ed12a7.parquet\n", - "/app/data/idx=1/year=2000/month=1/664ca39e99134dabbe6d4c7402f626aa.parquet\n", - "/app/data/idx=1/year=2000/month=1/68efc5543f394005bb82c0dc63a3b01f.parquet\n", - "/app/data/idx=1/year=2000/month=1/6c51260b47964705a3dcfa1cf25ca106.parquet\n", - "/app/data/idx=1/year=2000/month=1/6f9ad552153244679f73a058dfc5b42e.parquet\n", - "/app/data/idx=1/year=2000/month=1/718ffd8c75a14cde953e8e3275341d31.parquet\n", - "/app/data/idx=1/year=2000/month=1/728984a554734a25a69f0eb1f32f842f.parquet\n", - "/app/data/idx=1/year=2000/month=1/75296fd97a724c74bc09e9d64b528f50.parquet\n", - "/app/data/idx=1/year=2000/month=1/76ca85d0dfd849829f105ee6fddb6439.parquet\n", - "/app/data/idx=1/year=2000/month=1/77ac6bd92e7f4a46bbc7634de174bbf3.parquet\n", - "/app/data/idx=1/year=2000/month=1/79a48d3eb0c144ccb13fa4baf944c92b.parquet\n", - "/app/data/idx=1/year=2000/month=1/7a1ae42ab80b4cbf9c00a5b7f213a12c.parquet\n", - "/app/data/idx=1/year=2000/month=1/7af9fe9698494063a751f9a8f5a317dc.parquet\n", - "/app/data/idx=1/year=2000/month=1/7cd226f5679b4cae9af7b881fa1787b7.parquet\n", - "/app/data/idx=1/year=2000/month=1/7fed9a3f251c44209ce0933cfe60ec98.parquet\n", - "/app/data/idx=1/year=2000/month=1/842f90063cbb44b4ae1e7d6b9b4aa59e.parquet\n", - "/app/data/idx=1/year=2000/month=1/84dceabacd264c82981347142463feb9.parquet\n", - "/app/data/idx=1/year=2000/month=1/85d7b8fa841e42b097e34dcd8f13beca.parquet\n", - "/app/data/idx=1/year=2000/month=1/878a1b363a0a48c3b0af294e9f885d72.parquet\n", - "/app/data/idx=1/year=2000/month=1/887e26b6f1004e4fb2a5e373b4d9c5f3.parquet\n", - "/app/data/idx=1/year=2000/month=1/88bc144aa2ed4334b077b19f702a9a99.parquet\n", - "/app/data/idx=1/year=2000/month=1/88fe979886ee453789ca1b1083300618.parquet\n", - "/app/data/idx=1/year=2000/month=1/8b75d58338e64ae1bc694bb0d7044597.parquet\n", - "/app/data/idx=1/year=2000/month=1/8cf24285a4a5450ca5c56c731f5c87a0.parquet\n", - "/app/data/idx=1/year=2000/month=1/8d873dde8103478ba44283b5c90e5060.parquet\n", - "/app/data/idx=1/year=2000/month=1/8e25293517d8490b9f12892f63f35b3a.parquet\n", - "/app/data/idx=1/year=2000/month=1/92bbf16c4b7f4888ae4f93efcec6d40a.parquet\n", - "/app/data/idx=1/year=2000/month=1/9443d531d13f41b491771f22caa9d5a4.parquet\n", - "/app/data/idx=1/year=2000/month=1/94b871d36d384a24a6f42f34d56f822c.parquet\n", - "/app/data/idx=1/year=2000/month=1/9543cef54d3340ba9c8a2dca154947b8.parquet\n", - "/app/data/idx=1/year=2000/month=1/985415e78a0c4abcb42a96c44bdef44b.parquet\n", - "/app/data/idx=1/year=2000/month=1/9b501f9c98c3455ab37f13dc32d4836e.parquet\n", - "/app/data/idx=1/year=2000/month=1/9f0ac6f2e23242b1afb424389a8a1f08.parquet\n", - "/app/data/idx=1/year=2000/month=1/a20bfc0770454e1185f3d1b91efed93c.parquet\n", - "/app/data/idx=1/year=2000/month=1/a31f4026dbab4ef9807081ad9be5e5cc.parquet\n", - "/app/data/idx=1/year=2000/month=1/a472f43a45da4357b63cb0b5535e3237.parquet\n", - "/app/data/idx=1/year=2000/month=1/a74453d72e364b0f819ecf238d9b53fd.parquet\n", - "/app/data/idx=1/year=2000/month=1/a94d3fce611243d29a21b612f01e5a18.parquet\n", - "/app/data/idx=1/year=2000/month=1/a990f67b865f4e599ffa926341915ae2.parquet\n", - "/app/data/idx=1/year=2000/month=1/aa28c2d20ed140b18ddead5b11b96a0b.parquet\n", - "/app/data/idx=1/year=2000/month=1/aa724649481e4f7aa95b78cfe333c72d.parquet\n", - "/app/data/idx=1/year=2000/month=1/ac4487b08071423481580622be8d9914.parquet\n", - "/app/data/idx=1/year=2000/month=1/ad2a3795a1ad46f0b7b509a6ebdc85f4.parquet\n", - "/app/data/idx=1/year=2000/month=1/afa56f8175ed41a8b34bac4ac6786cf3.parquet\n", - "/app/data/idx=1/year=2000/month=1/b6c7cee2c50642bbaacf29e16dbbece5.parquet\n", - "/app/data/idx=1/year=2000/month=1/b9c0158311a04c3fa9c594d6db280053.parquet\n", - "/app/data/idx=1/year=2000/month=1/bbbd7a1b72b645ed8afdada3a0fd9fac.parquet\n", - "/app/data/idx=1/year=2000/month=1/bbce481ce9fc404684db9578007edd4b.parquet\n", - "/app/data/idx=1/year=2000/month=1/bbf2ea53874d4bb49b7ebf959c24b060.parquet\n", - "/app/data/idx=1/year=2000/month=1/bd054b89ad8a46f29968468a4fd6d34d.parquet\n", - "/app/data/idx=1/year=2000/month=1/c1a395d1127240c1b9d7ebcb0d63842f.parquet\n", - "/app/data/idx=1/year=2000/month=1/c27376832ccd439685bdc3b11cdcec0f.parquet\n", - "/app/data/idx=1/year=2000/month=1/c5c55b01bbe1494e9297385e99e9f0d3.parquet\n", - "/app/data/idx=1/year=2000/month=1/c872faa9a863454cadc603827abd3f6c.parquet\n", - "/app/data/idx=1/year=2000/month=1/c9528d72e8574a279c0995c3de171de3.parquet\n", - "/app/data/idx=1/year=2000/month=1/cb7475b11c924a689515ade22ec7b134.parquet\n", - "/app/data/idx=1/year=2000/month=1/cb9a2e526b7845daaaf8f3ced61d8597.parquet\n", - "/app/data/idx=1/year=2000/month=1/cd356e54f63c483ea4792e842667c1ac.parquet\n", - "/app/data/idx=1/year=2000/month=1/cdd3925db9ae44a0ba2760031b229219.parquet\n", - "/app/data/idx=1/year=2000/month=1/d118c630c6194befaae2217985c9073c.parquet\n", - "/app/data/idx=1/year=2000/month=1/d68ddf28bd144430a5dc2c4437f37472.parquet\n", - "/app/data/idx=1/year=2000/month=1/d7adfebd0e9249f989f41e10ca61bf59.parquet\n", - "/app/data/idx=1/year=2000/month=1/d9b7947e9c6b400080d2226093fcc571.parquet\n", - "/app/data/idx=1/year=2000/month=1/d9f610ef03c748619ee5ef2ddcde2634.parquet\n", - "/app/data/idx=1/year=2000/month=1/dcbf892a4231404c90139ee3adfc6815.parquet\n", - "/app/data/idx=1/year=2000/month=1/e083fc488a7446bbbdad82c37f8fca29.parquet\n", - "/app/data/idx=1/year=2000/month=1/e5f84abccb0d407898e892f78dcb9ce1.parquet\n", - "/app/data/idx=1/year=2000/month=1/e74ca84dac2e4d53977a54d9daeb7adc.parquet\n", - "/app/data/idx=1/year=2000/month=1/e85272be7a1c411a886bc856c6012396.parquet\n", - "/app/data/idx=1/year=2000/month=1/ec83d2e5ff534be1b28b4cf511b67e0d.parquet\n", - "/app/data/idx=1/year=2000/month=1/ef6709a1008c43cc994cf01278474c94.parquet\n", - "/app/data/idx=1/year=2000/month=1/f7249440aa6f403f934e5018d34a583c.parquet\n", - "/app/data/idx=1/year=2000/month=1/fc96559adfd2419a9a1cf883b4d521fb.parquet\n", - "/app/data/idx=2/year=2000/month=1/0210672cfa44441bbcf4c07a2bd3c467.parquet\n", - "/app/data/idx=2/year=2000/month=1/0259160641d446518dffe477c5265240.parquet\n", - "/app/data/idx=2/year=2000/month=1/04cce58d49ba4c3982dd0823f43f29a9.parquet\n", - "/app/data/idx=2/year=2000/month=1/058031e2ce2d4bd99cbe7297756dd547.parquet\n", - "/app/data/idx=2/year=2000/month=1/06918413b55f43a19fb7f4e13712c396.parquet\n", - "/app/data/idx=2/year=2000/month=1/07a8bd5cb80140a48f709d86fe3e00aa.parquet\n", - "/app/data/idx=2/year=2000/month=1/0bca80d1ee444038871e5fbb1ccc4d21.parquet\n", - "/app/data/idx=2/year=2000/month=1/0bd86024c6234346b739be5af1a49ed2.parquet\n", - "/app/data/idx=2/year=2000/month=1/0c2d3de1afda4b8f82f43cf658a09fb8.parquet\n", - "/app/data/idx=2/year=2000/month=1/0d4d954eab7043a0a8d7bd751897deb5.parquet\n", - "/app/data/idx=2/year=2000/month=1/0d976fec817b4dd88d3082fe39e6f2b6.parquet\n", - "/app/data/idx=2/year=2000/month=1/12255adedd3948d4b8ced88001a61e04.parquet\n", - "/app/data/idx=2/year=2000/month=1/1406843e1322465e8384ba8685a9eb9d.parquet\n", - "/app/data/idx=2/year=2000/month=1/15ab3cbd13ce4fc7ab69d5c2b1672ca2.parquet\n", - "/app/data/idx=2/year=2000/month=1/18b68b427e2947bbaee4122bc2b0fbf8.parquet\n", - "/app/data/idx=2/year=2000/month=1/1a883ab5889441578fbf5f0a2c822c07.parquet\n", - "/app/data/idx=2/year=2000/month=1/1cc7612ae5e34455a716fc38b84427bb.parquet\n", - "/app/data/idx=2/year=2000/month=1/1e188269ac30443fa796a8bdbea70e46.parquet\n", - "/app/data/idx=2/year=2000/month=1/1e803b9281ef4d4289f8a207de2fd2a2.parquet\n", - "/app/data/idx=2/year=2000/month=1/2099fd988d544989b1117a45cd92e2c5.parquet\n", - "/app/data/idx=2/year=2000/month=1/245b2e48c3d442f990dfd9f3f18f5544.parquet\n", - "/app/data/idx=2/year=2000/month=1/2cc8cd4af02e48728683551df1d9b517.parquet\n", - "/app/data/idx=2/year=2000/month=1/2e389e9f7c4f43ea8ff96d1fa13f0347.parquet\n", - "/app/data/idx=2/year=2000/month=1/2edcd33c70704b64b80987aba03d724e.parquet\n", - "/app/data/idx=2/year=2000/month=1/3170732421924aeaa451ca82a4b77131.parquet\n", - "/app/data/idx=2/year=2000/month=1/3227923c1dab4e7fbe07511111e76f67.parquet\n", - "/app/data/idx=2/year=2000/month=1/3607d6e90ab64fff84b4f2c9477540ce.parquet\n", - "/app/data/idx=2/year=2000/month=1/3b733f19c98f44ebb6ab31e93d18f09b.parquet\n", - "/app/data/idx=2/year=2000/month=1/3d79e3dd93d44a208aadd899a9632005.parquet\n", - "/app/data/idx=2/year=2000/month=1/3e2d5106997b4d2a8a4aaaada70b5c34.parquet\n", - "/app/data/idx=2/year=2000/month=1/3ea888ba5f0c4c46aaa55795799c8614.parquet\n", - "/app/data/idx=2/year=2000/month=1/4065fbfffe364f5b8f661dd0caff5c00.parquet\n", - "/app/data/idx=2/year=2000/month=1/4438f729a59e4bee856e9766a7866777.parquet\n", - "/app/data/idx=2/year=2000/month=1/489418f859104268b59905195289b433.parquet\n", - "/app/data/idx=2/year=2000/month=1/4a07e13d3bab4ee4bed09868f4d0ae6a.parquet\n", - "/app/data/idx=2/year=2000/month=1/4ade79216a6f42ffbfa7ee5c2949d904.parquet\n", - "/app/data/idx=2/year=2000/month=1/508e221eeacc4624977761af65fdf95f.parquet\n", - "/app/data/idx=2/year=2000/month=1/520f6ff1dee6468099730664d5bea3de.parquet\n", - "/app/data/idx=2/year=2000/month=1/537a5c5b6d2949eca8c35db48dcc123f.parquet\n", - "/app/data/idx=2/year=2000/month=1/552fbc5a37494e7bb792e3c225cd4021.parquet\n", - "/app/data/idx=2/year=2000/month=1/589b0598f3eb4f178125912219919413.parquet\n", - "/app/data/idx=2/year=2000/month=1/62c0c2448a5d49889e2d2b8421264798.parquet\n", - "/app/data/idx=2/year=2000/month=1/6312935db784424a957645de2de4a4c2.parquet\n", - "/app/data/idx=2/year=2000/month=1/64f32e163bed483b860f21c6666b0a7d.parquet\n", - "/app/data/idx=2/year=2000/month=1/66414c74b1ab4c3cb155b440359b1705.parquet\n", - "/app/data/idx=2/year=2000/month=1/6f0e1508809f47efba9fe398311b711c.parquet\n", - "/app/data/idx=2/year=2000/month=1/724d5c288c834e34846ad8871a94ee10.parquet\n", - "/app/data/idx=2/year=2000/month=1/740e15b45d2745a997e81672fc58481e.parquet\n", - "/app/data/idx=2/year=2000/month=1/75d5db2fb8404493bd6f6ebbeee50e91.parquet\n", - "/app/data/idx=2/year=2000/month=1/765f6b9e1260430680f79e9c4b8de8a1.parquet\n", - "/app/data/idx=2/year=2000/month=1/7893a366f6fd4770ac34af71a74af552.parquet\n", - "/app/data/idx=2/year=2000/month=1/7d6b206a0cdc4c7baefb675350602e10.parquet\n", - "/app/data/idx=2/year=2000/month=1/7d7c7ec0eaf04cf386ce6d93c5107246.parquet\n", - "/app/data/idx=2/year=2000/month=1/7e5eb92603774185bce487436db2af8f.parquet\n", - "/app/data/idx=2/year=2000/month=1/7f393857790e43da9549ed4c69797d18.parquet\n", - "/app/data/idx=2/year=2000/month=1/7f72ff606a804972a50960d0efcebcae.parquet\n", - "/app/data/idx=2/year=2000/month=1/8415983fe0a549c89ea28b25db102138.parquet\n", - "/app/data/idx=2/year=2000/month=1/86cf478f40914946b5b86106be97f7d8.parquet\n", - "/app/data/idx=2/year=2000/month=1/86f1de6e862141be8bd612465486fd16.parquet\n", - "/app/data/idx=2/year=2000/month=1/895fb45b8f554034a79ebd9c8eff9cad.parquet\n", - "/app/data/idx=2/year=2000/month=1/896bad5a081440b582d71fbb5baa4998.parquet\n", - "/app/data/idx=2/year=2000/month=1/8c2163530eef4b7b9e22fc1d4d99d6d5.parquet\n", - "/app/data/idx=2/year=2000/month=1/8c3b5f112ddf48e1a165bcad69f7e548.parquet\n", - "/app/data/idx=2/year=2000/month=1/8f54037c274c424fa2e13e83afe6a983.parquet\n", - "/app/data/idx=2/year=2000/month=1/9267bc6aecba4d66952bc7778a97bbb0.parquet\n", - "/app/data/idx=2/year=2000/month=1/978623e40a264ecbb8e3e7afee4a9221.parquet\n", - "/app/data/idx=2/year=2000/month=1/9b501c10edd94539b8147571202e7dfe.parquet\n", - "/app/data/idx=2/year=2000/month=1/a2cbd94909a7409cb233cc388fcd53be.parquet\n", - "/app/data/idx=2/year=2000/month=1/a570b6d3b72d4c8090c4efcb2eeb2d70.parquet\n", - "/app/data/idx=2/year=2000/month=1/a88b8e956c104202a8f2d279c7e58741.parquet\n", - "/app/data/idx=2/year=2000/month=1/a97573410ce04706ac3d5c88f9cd285e.parquet\n", - "/app/data/idx=2/year=2000/month=1/a9c31f330c2d454a8911627eaafe7e31.parquet\n", - "/app/data/idx=2/year=2000/month=1/aa941bd2b9574ce294967019aa4cd515.parquet\n", - "/app/data/idx=2/year=2000/month=1/af86ac06c6f7484c8bbb8215a408ce73.parquet\n", - "/app/data/idx=2/year=2000/month=1/b35d48ff673541559bf27f4c3e1feab6.parquet\n", - "/app/data/idx=2/year=2000/month=1/b5b85036b2c540f9add4b86012873462.parquet\n", - "/app/data/idx=2/year=2000/month=1/b8b4abc89c824a17a263d898f4bca476.parquet\n", - "/app/data/idx=2/year=2000/month=1/bb6a1df466d84085bc0900641233cbc3.parquet\n", - "/app/data/idx=2/year=2000/month=1/bb95334225ce41768c1175ccabad174b.parquet\n", - "/app/data/idx=2/year=2000/month=1/bca9c21e480249eebb26aeed167b1293.parquet\n", - "/app/data/idx=2/year=2000/month=1/bf49382a8e024ffe9c17e4849ce4127f.parquet\n", - "/app/data/idx=2/year=2000/month=1/c06c38062a2b4e13b4e1ee1eaf03bfa2.parquet\n", - "/app/data/idx=2/year=2000/month=1/c1f40b6256444001af06dc2fb98f5e5c.parquet\n", - "/app/data/idx=2/year=2000/month=1/c4968d0cbcd54c83a0dd3e57039f0578.parquet\n", - "/app/data/idx=2/year=2000/month=1/c6afa57132184a71becf083d1b553473.parquet\n", - "/app/data/idx=2/year=2000/month=1/c87a24c747984bf58745b666dac98323.parquet\n", - "/app/data/idx=2/year=2000/month=1/cc34429087f54f7aaf1e84bc12517c26.parquet\n", - "/app/data/idx=2/year=2000/month=1/cc839cdd3fbe465abc78861a4cc11acf.parquet\n", - "/app/data/idx=2/year=2000/month=1/db6c45d7e8234bc1949ddd8973010d7f.parquet\n", - "/app/data/idx=2/year=2000/month=1/dbb0a2e2bdbc4319a07d04af0d9356fc.parquet\n", - "/app/data/idx=2/year=2000/month=1/dbde0aee2a4647939d6f027a99e37cc4.parquet\n", - "/app/data/idx=2/year=2000/month=1/ddd0738116b5496391991ad6d3e781b9.parquet\n", - "/app/data/idx=2/year=2000/month=1/e52fd781bd78475789d4160624a6e34a.parquet\n", - "/app/data/idx=2/year=2000/month=1/e9c5c04f931f4fd4b6afb51db34cda54.parquet\n", - "/app/data/idx=2/year=2000/month=1/eee841a6139a4fe19620045f04c2f908.parquet\n", - "/app/data/idx=2/year=2000/month=1/ef42e36ceb794730ac25dad68f73294d.parquet\n", - "/app/data/idx=2/year=2000/month=1/efe26f73b0494f828fcf2686b6874c71.parquet\n", - "/app/data/idx=2/year=2000/month=1/f15094f2f10748e59573fecb5435ecc4.parquet\n", - "/app/data/idx=2/year=2000/month=1/f1e37026291c41c5ae698956baa6bf39.parquet\n", - "/app/data/idx=2/year=2000/month=1/f1f56b07a73646e4a5219a2623b04489.parquet\n", - "/app/data/idx=2/year=2000/month=1/f25704c4b00a418c9fa2385f9018adc7.parquet\n", - "/app/data/idx=2/year=2000/month=1/f60540924a1641de9d64f66c1af980dd.parquet\n", - "/app/data/idx=2/year=2000/month=1/f62eada23e1d430dacb69eeff0d5ba59.parquet\n", - "/app/data/idx=2/year=2000/month=1/f9b43fe646ec4607baa500b1360a6e1c.parquet\n", - "/app/data/idx=2/year=2000/month=1/fc3a31bc82ba4f17a93a18138887d9d5.parquet\n", - "/app/data/idx=3/year=2000/month=1/00b291e6d0d2494a8652e6ffcf1746c5.parquet\n", - "/app/data/idx=3/year=2000/month=1/01b6882837054cc4801c6929a630abd7.parquet\n", - "/app/data/idx=3/year=2000/month=1/09ebeae420f348c28a365f607978aeda.parquet\n", - "/app/data/idx=3/year=2000/month=1/0c41010bec604c93b974e72fa35cc2c7.parquet\n", - "/app/data/idx=3/year=2000/month=1/0cb995ed168f4829a38db4f75d4ed14b.parquet\n", - "/app/data/idx=3/year=2000/month=1/0cf1a660ee984efcaabe1d1bb9263a9a.parquet\n", - "/app/data/idx=3/year=2000/month=1/0d0bbc2ee628424f8204240680f44389.parquet\n", - "/app/data/idx=3/year=2000/month=1/0f72553d38cb47f095fdf35e03507dd3.parquet\n", - "/app/data/idx=3/year=2000/month=1/0ff3e55ae9464e369302d1fb2abaec40.parquet\n", - "/app/data/idx=3/year=2000/month=1/1165cf18728c41edb7bb8a765ae7854d.parquet\n", - "/app/data/idx=3/year=2000/month=1/12a3b4dadd4f43389c269f4b736278c2.parquet\n", - "/app/data/idx=3/year=2000/month=1/1a204362f488461da026ee347c817e2e.parquet\n", - "/app/data/idx=3/year=2000/month=1/1c306421662241b48b85f24d033898fc.parquet\n", - "/app/data/idx=3/year=2000/month=1/22155eaaf5ce4e36bbb36b162dadae9e.parquet\n", - "/app/data/idx=3/year=2000/month=1/229cb1d3321f4660866b414f3a647fff.parquet\n", - "/app/data/idx=3/year=2000/month=1/280b6ca59e1f4312b872fd23d96ed6df.parquet\n", - "/app/data/idx=3/year=2000/month=1/2859c7dccfe54951a955941fa23a33b1.parquet\n", - "/app/data/idx=3/year=2000/month=1/2a17999c98294f38ac3e60af45779214.parquet\n", - "/app/data/idx=3/year=2000/month=1/2e3b411a5a3a48aba5e52053e54dbe9f.parquet\n", - "/app/data/idx=3/year=2000/month=1/2eb295d22ddd4ca9801d7b0a6a950261.parquet\n", - "/app/data/idx=3/year=2000/month=1/313a5fc7ea2c49009cd68f31ce030eb3.parquet\n", - "/app/data/idx=3/year=2000/month=1/319b8c873aba46d9a39aaed1d7ade697.parquet\n", - "/app/data/idx=3/year=2000/month=1/36c17affd08e450ba034d29818f6c94f.parquet\n", - "/app/data/idx=3/year=2000/month=1/37170fb9855d47f0871cbf1b3c4a5763.parquet\n", - "/app/data/idx=3/year=2000/month=1/3772fba9cef64744a8aa5ad999a1d48d.parquet\n", - "/app/data/idx=3/year=2000/month=1/3d68d10aee3b46e9ab4c2341f395e9f8.parquet\n", - "/app/data/idx=3/year=2000/month=1/3da7295cc0ee4953aad41cddb746c0ec.parquet\n", - "/app/data/idx=3/year=2000/month=1/401a2d5e38ee4581ac5950131e7739ed.parquet\n", - "/app/data/idx=3/year=2000/month=1/40bb809ba5824fa48218e2543e1317d8.parquet\n", - "/app/data/idx=3/year=2000/month=1/42c11bbbec28471d818c4eda7ffa0316.parquet\n", - "/app/data/idx=3/year=2000/month=1/430d92d720ef40aca2043cdd9a4216a7.parquet\n", - "/app/data/idx=3/year=2000/month=1/4344d9475f474d4289c16c14e3d76205.parquet\n", - "/app/data/idx=3/year=2000/month=1/4965043c1c58485fb9a81ca502c9704c.parquet\n", - "/app/data/idx=3/year=2000/month=1/4c954d56c1f040f8adcb92a116fc3e4a.parquet\n", - "/app/data/idx=3/year=2000/month=1/4cb7c012e50c4e45988d6c73f931babf.parquet\n", - "/app/data/idx=3/year=2000/month=1/4d11aa2de91047638fd1fbb49180b828.parquet\n", - "/app/data/idx=3/year=2000/month=1/59de1ba8fd7b41d7819849137f7b9817.parquet\n", - "/app/data/idx=3/year=2000/month=1/5a31ef5acc2340b7a575b1d77e9e9917.parquet\n", - "/app/data/idx=3/year=2000/month=1/5b14185275384ee5ae5839b6d69c714e.parquet\n", - "/app/data/idx=3/year=2000/month=1/5b35b2943a7c476aa5dc3a2af08f13fe.parquet\n", - "/app/data/idx=3/year=2000/month=1/5e6bb9eceb2d4a4ebddd39e06db86d67.parquet\n", - "/app/data/idx=3/year=2000/month=1/5f8372dbc36a4681bdebfaa9f3328eec.parquet\n", - "/app/data/idx=3/year=2000/month=1/6317cb7958d2459595a28bdca41f42d5.parquet\n", - "/app/data/idx=3/year=2000/month=1/67ba93ec02b44b0593c0ff37aa3db5b7.parquet\n", - "/app/data/idx=3/year=2000/month=1/69be17b95a9046c2a4553f5c077f5fff.parquet\n", - "/app/data/idx=3/year=2000/month=1/6ac05cada45b48b89ec15b0f76df21ac.parquet\n", - "/app/data/idx=3/year=2000/month=1/6ce38fe0d6a54853a757745eb148960a.parquet\n", - "/app/data/idx=3/year=2000/month=1/7000686e11b34200ae44dfe294dc8c8e.parquet\n", - "/app/data/idx=3/year=2000/month=1/70f44eb7513c4100aa2cd5779e3c5d67.parquet\n", - "/app/data/idx=3/year=2000/month=1/7421bdc2222640b38ada8d94e10e5865.parquet\n", - "/app/data/idx=3/year=2000/month=1/78f4a6251bb7423e800ada3444bb54c1.parquet\n", - "/app/data/idx=3/year=2000/month=1/874eb82772844f269bc5360ef1971245.parquet\n", - "/app/data/idx=3/year=2000/month=1/87baf01b30ce467ca976e26ad5bec1e2.parquet\n", - "/app/data/idx=3/year=2000/month=1/8a31ab99c92a4a8b829f37561cc99956.parquet\n", - "/app/data/idx=3/year=2000/month=1/8aa9003415c649288a13560a1352805b.parquet\n", - "/app/data/idx=3/year=2000/month=1/8ae3a6e6214f4816b469f09b01c2e955.parquet\n", - "/app/data/idx=3/year=2000/month=1/8ff02b303fca4f86a129197874e8e6fe.parquet\n", - "/app/data/idx=3/year=2000/month=1/94c27fe8b6084f7b8606cef710bab753.parquet\n", - "/app/data/idx=3/year=2000/month=1/94c4de33006f424e8cb424accfad8a2c.parquet\n", - "/app/data/idx=3/year=2000/month=1/9c9b600151fb47e5a073e51a735e1537.parquet\n", - "/app/data/idx=3/year=2000/month=1/9e59161660e140209e94cab5f7ea5098.parquet\n", - "/app/data/idx=3/year=2000/month=1/9fadcdc1ab7a4b9783128af7b744d705.parquet\n", - "/app/data/idx=3/year=2000/month=1/9fd3848ab9c54869b34c3a5d8e79be9a.parquet\n", - "/app/data/idx=3/year=2000/month=1/a2c45c983d5b469997c55c4e2ad72427.parquet\n", - "/app/data/idx=3/year=2000/month=1/a3f1f0a5cca84c4eaa7f2a1bef1f88b0.parquet\n", - "/app/data/idx=3/year=2000/month=1/a43049d78c9341668d77a63fc3b4d57f.parquet\n", - "/app/data/idx=3/year=2000/month=1/aa89184d32ca40c28f44109c97cee774.parquet\n", - "/app/data/idx=3/year=2000/month=1/ab3cf71e9caa44ec90adc43a56867162.parquet\n", - "/app/data/idx=3/year=2000/month=1/acab0d093d9a4bca854719e790512a25.parquet\n", - "/app/data/idx=3/year=2000/month=1/acf77747edbf4df5b457cfc8a77e0dc0.parquet\n", - "/app/data/idx=3/year=2000/month=1/b5672b45b393472986217241b378742f.parquet\n", - "/app/data/idx=3/year=2000/month=1/b7fd4df9bc9440ff94d713a7e43959d2.parquet\n", - "/app/data/idx=3/year=2000/month=1/b81af51b094e457faa6c786d1fffc470.parquet\n", - "/app/data/idx=3/year=2000/month=1/bbedc33b622c46b7af6af9c62e139163.parquet\n", - "/app/data/idx=3/year=2000/month=1/befaac43d5fa49f0a118ffaac6b5c4d3.parquet\n", - "/app/data/idx=3/year=2000/month=1/c0a4a83a65d94f2281b2039cac0e2c9e.parquet\n", - "/app/data/idx=3/year=2000/month=1/c4f44bc2181f45a3866cc232d80f2e46.parquet\n", - "/app/data/idx=3/year=2000/month=1/c63bff60ba67488d8ce536aa47774b53.parquet\n", - "/app/data/idx=3/year=2000/month=1/c74c114cc7e34985aeb20e14c2b26f3c.parquet\n", - "/app/data/idx=3/year=2000/month=1/c7eb09b4b0cf44eab86d88f11d00c222.parquet\n", - "/app/data/idx=3/year=2000/month=1/ce3160350479478da1a327405dc4cbe8.parquet\n", - "/app/data/idx=3/year=2000/month=1/cfb6a5a4bdbb4bb0a6afa699aa2e100a.parquet\n", - "/app/data/idx=3/year=2000/month=1/d137ffa9eeeb418491e792c7871334c6.parquet\n", - "/app/data/idx=3/year=2000/month=1/d43ffbf42b694713ae6e4b1e408529f9.parquet\n", - "/app/data/idx=3/year=2000/month=1/d7f91f13f3444032995bc7c6c0bcd1cd.parquet\n", - "/app/data/idx=3/year=2000/month=1/d9aa67eaa7f144fc8613ce81bd072167.parquet\n", - "/app/data/idx=3/year=2000/month=1/ddeb24d5cdb043f380654ff98d83adc9.parquet\n", - "/app/data/idx=3/year=2000/month=1/e1e0b2ae05154f459914dad148a7779f.parquet\n", - "/app/data/idx=3/year=2000/month=1/e74ecdc304164cd8b953c808a1353bfd.parquet\n", - "/app/data/idx=3/year=2000/month=1/e7eb8d26146c423eaa1a77343d16920b.parquet\n", - "/app/data/idx=3/year=2000/month=1/e937a5e6dd0241c1a50b24a1c9b4ea7a.parquet\n", - "/app/data/idx=3/year=2000/month=1/e94bfddc06704799a2699d3a90d9843b.parquet\n", - "/app/data/idx=3/year=2000/month=1/f08328e844ab486ca07eda98bf1ca9ba.parquet\n", - "/app/data/idx=3/year=2000/month=1/f154e97e55b0428185553c4acb9ce227.parquet\n", - "/app/data/idx=3/year=2000/month=1/f38d8f30947f4bd08fb1c10bc81d8ee7.parquet\n", - "/app/data/idx=3/year=2000/month=1/f3fb7ade438a4929aba0109858f4abe4.parquet\n", - "/app/data/idx=3/year=2000/month=1/f458cf905d5845f1ac64183bba7a4826.parquet\n", - "/app/data/idx=3/year=2000/month=1/f6ce7accff3e4eb8b601078583655865.parquet\n", - "/app/data/idx=3/year=2000/month=1/f7d729c528904fd182207989fef04050.parquet\n", - "/app/data/idx=3/year=2000/month=1/f9d5734d70c542a3bf5ba9e004cb2e95.parquet\n", - "/app/data/idx=3/year=2000/month=1/fbeb2f31e5784074a90d737fb8c4e047.parquet\n", - "/app/data/idx=3/year=2000/month=1/fc1fb4ad31c448eeb8724a3069e760f0.parquet\n", - "/app/data/idx=3/year=2000/month=1/fde846fa6d8649c9b1770638786fb18c.parquet\n", - "/app/data/idx=3/year=2000/month=1/fe9c940d68fd4759a90408a1245022a6.parquet\n", - "/app/data/idx=3/year=2000/month=1/ffebea86d7fe4a64a973415ab3b6eccf.parquet\n", - "/app/data/idx=4/year=2000/month=1/01a585864dc644b6a4a7b13ae97c1f85.parquet\n", - "/app/data/idx=4/year=2000/month=1/0251c252cf544dc49285c7e4fcbf9784.parquet\n", - "/app/data/idx=4/year=2000/month=1/026b7ed2f32a4a4d9b1fe4bf2e2c45ce.parquet\n", - "/app/data/idx=4/year=2000/month=1/03343bb5f29d42f19ce58caddb755df7.parquet\n", - "/app/data/idx=4/year=2000/month=1/04f9e581b08c424595f85fa85f87cb2c.parquet\n", - "/app/data/idx=4/year=2000/month=1/05751ecfd2734eedb17546ca81f8344a.parquet\n", - "/app/data/idx=4/year=2000/month=1/05f08cd7531f42a792e243c617b344f1.parquet\n", - "/app/data/idx=4/year=2000/month=1/061bd006ae35412eb8e5b758c50102c4.parquet\n", - "/app/data/idx=4/year=2000/month=1/06ba2d68586e4088921c99eddd5a5d86.parquet\n", - "/app/data/idx=4/year=2000/month=1/06df2daa4186437791d71a6b8e23519d.parquet\n", - "/app/data/idx=4/year=2000/month=1/07369c0250b5496bbac305aa1909eaa1.parquet\n", - "/app/data/idx=4/year=2000/month=1/0beb1321d8304074994a90b3a7eb94c5.parquet\n", - "/app/data/idx=4/year=2000/month=1/0f0e0602ffe5408a82d5265b2dc5ec18.parquet\n", - "/app/data/idx=4/year=2000/month=1/0fc5d753f2184cb0868ae28fc84c227e.parquet\n", - "/app/data/idx=4/year=2000/month=1/135fcc4c1e5a4823ae050c1e89fa413c.parquet\n", - "/app/data/idx=4/year=2000/month=1/156b561654924ad1b111bd5c965a46c2.parquet\n", - "/app/data/idx=4/year=2000/month=1/168d6922b1824cedb14d5654d75ba284.parquet\n", - "/app/data/idx=4/year=2000/month=1/1827f11f108341ccb48a0bb6ab694a64.parquet\n", - "/app/data/idx=4/year=2000/month=1/18e1c91f8c724d30a77bdd47e665c571.parquet\n", - "/app/data/idx=4/year=2000/month=1/19016c157bce43e394b117e8e0ed2557.parquet\n", - "/app/data/idx=4/year=2000/month=1/1a2c4e9d435f4c5faf83efbbb559118b.parquet\n", - "/app/data/idx=4/year=2000/month=1/1d3c8ecb9804470c87bfd7c25a3dab28.parquet\n", - "/app/data/idx=4/year=2000/month=1/1ee5f78eb54548278ae0a857c616e84c.parquet\n", - "/app/data/idx=4/year=2000/month=1/1ff311b87ba74e998ff7a5267ba52832.parquet\n", - "/app/data/idx=4/year=2000/month=1/285e2e6ef8c34d45b73916b4bfe1a2bf.parquet\n", - "/app/data/idx=4/year=2000/month=1/288d2d389b1e4a7695454e12fc442592.parquet\n", - "/app/data/idx=4/year=2000/month=1/2bfb7829ce324e1bb182159d8a6e7966.parquet\n", - "/app/data/idx=4/year=2000/month=1/2cb8084772654371bc4aab66bab3d5fc.parquet\n", - "/app/data/idx=4/year=2000/month=1/2d0d1ce706fe41feadf69279c0290101.parquet\n", - "/app/data/idx=4/year=2000/month=1/2d4a13244f154d278d237535e957d174.parquet\n", - "/app/data/idx=4/year=2000/month=1/2d73ffb2b7314b48b25c924dad691fa1.parquet\n", - "/app/data/idx=4/year=2000/month=1/30c6048fdac04824831e0a984445c238.parquet\n", - "/app/data/idx=4/year=2000/month=1/3b3610138fd84568b3f6b20ccce2b296.parquet\n", - "/app/data/idx=4/year=2000/month=1/3d33b2adeb0c406aafda7296398833d2.parquet\n", - "/app/data/idx=4/year=2000/month=1/3fb3450af6ed4ddc996b10c7316018af.parquet\n", - "/app/data/idx=4/year=2000/month=1/4384e6f19b984984a0e583891fab8200.parquet\n", - "/app/data/idx=4/year=2000/month=1/4499b3a4074d42ad87a6a74f031bad48.parquet\n", - "/app/data/idx=4/year=2000/month=1/578cd70733f54818812b7fee342f7922.parquet\n", - "/app/data/idx=4/year=2000/month=1/5a42dc9b52a845b394f570bc7e233637.parquet\n", - "/app/data/idx=4/year=2000/month=1/5e3f996936cd466c8f182e4925b457b9.parquet\n", - "/app/data/idx=4/year=2000/month=1/6171f6c076d442ce9ee9b2223a1c9e29.parquet\n", - "/app/data/idx=4/year=2000/month=1/637fabc040bd4139901780de2f98df24.parquet\n", - "/app/data/idx=4/year=2000/month=1/649b57f24c1c49e7aa025d1a111f31a6.parquet\n", - "/app/data/idx=4/year=2000/month=1/65dac4a30aba4d3e9a18e731bef42800.parquet\n", - "/app/data/idx=4/year=2000/month=1/6759ad29fa9a416498d408a97082da2d.parquet\n", - "/app/data/idx=4/year=2000/month=1/682c068895b54404aa02c22ec59d98d7.parquet\n", - "/app/data/idx=4/year=2000/month=1/6a2c44eebd7c447ab0eac8b5596612ce.parquet\n", - "/app/data/idx=4/year=2000/month=1/6c36185edd4a41bc8869406a3bc9b533.parquet\n", - "/app/data/idx=4/year=2000/month=1/6ce5ab2e0fce43c9be58cd6ca0ab1b0c.parquet\n", - "/app/data/idx=4/year=2000/month=1/6e7dfa62c7ab4743bd5b47c2d65fcd3f.parquet\n", - "/app/data/idx=4/year=2000/month=1/7137092484b641e3a41226810acbe2b7.parquet\n", - "/app/data/idx=4/year=2000/month=1/727c5b50be444555bb0c8cb3493f136c.parquet\n", - "/app/data/idx=4/year=2000/month=1/761ed2925727400586b3f95bebe32b12.parquet\n", - "/app/data/idx=4/year=2000/month=1/779bc731cfdc4eb582b7d45275f45f7d.parquet\n", - "/app/data/idx=4/year=2000/month=1/79ee8145c2814549a38530b2c506544e.parquet\n", - "/app/data/idx=4/year=2000/month=1/7b59b765fa454ce0a9fbd88628d6f604.parquet\n", - "/app/data/idx=4/year=2000/month=1/8292f989424444f6aa18bbcfc68f1734.parquet\n", - "/app/data/idx=4/year=2000/month=1/83b3730a855b494487dd6728a517ee3b.parquet\n", - "/app/data/idx=4/year=2000/month=1/84ebbce76a7a4107b939b685da66b5f4.parquet\n", - "/app/data/idx=4/year=2000/month=1/855921d6f64644a38bd2be5d9669fe0a.parquet\n", - "/app/data/idx=4/year=2000/month=1/85ab57ea6d0e48efac390b6047a6f435.parquet\n", - "/app/data/idx=4/year=2000/month=1/8b974e69e33e41cdb5bde25a6a422fd6.parquet\n", - "/app/data/idx=4/year=2000/month=1/8dd5278b54e9413ebd42286dea00c4a3.parquet\n", - "/app/data/idx=4/year=2000/month=1/93128598152643a297db72dec38a07b5.parquet\n", - "/app/data/idx=4/year=2000/month=1/95448aeaacdc40fe97d207b2c80ca784.parquet\n", - "/app/data/idx=4/year=2000/month=1/9571568631184e1386c3528b8ce9ed26.parquet\n", - "/app/data/idx=4/year=2000/month=1/95f8e19b3af344db98dcc5c5f9546c3a.parquet\n", - "/app/data/idx=4/year=2000/month=1/9921911b40d041f6ac72c4d44578c5cf.parquet\n", - "/app/data/idx=4/year=2000/month=1/9b4d80c840c14d3b9c67da4c9877b628.parquet\n", - "/app/data/idx=4/year=2000/month=1/9e259ea36fbb4c0ba9b6535a3f34544e.parquet\n", - "/app/data/idx=4/year=2000/month=1/9e3706d0ded44106bf8e0dee8900cd28.parquet\n", - "/app/data/idx=4/year=2000/month=1/9e67c71850a54fe1aa354c43d2cd9c38.parquet\n", - "/app/data/idx=4/year=2000/month=1/9ef278bcdc3b41e89059c309bcbb005e.parquet\n", - "/app/data/idx=4/year=2000/month=1/a33e6304bb1b47daa86853f19b009366.parquet\n", - "/app/data/idx=4/year=2000/month=1/a9bc812dc596492eafcc73f01d0e53a3.parquet\n", - "/app/data/idx=4/year=2000/month=1/abf0e405806744df9ea3e9908eb0451f.parquet\n", - "/app/data/idx=4/year=2000/month=1/acd468d1addc4d75944766e48c3eb324.parquet\n", - "/app/data/idx=4/year=2000/month=1/adbdc6fec62c463aa94e0ce707ae1768.parquet\n", - "/app/data/idx=4/year=2000/month=1/b24807369dfc461e92eb8a56a7931070.parquet\n", - "/app/data/idx=4/year=2000/month=1/b2f3d43c99f44131969e0fcf27cfbf3c.parquet\n", - "/app/data/idx=4/year=2000/month=1/b461c7cfd0f4483f8309f670f4f4265d.parquet\n", - "/app/data/idx=4/year=2000/month=1/b584791f45f74432a067632281285b9a.parquet\n", - "/app/data/idx=4/year=2000/month=1/b7ca6973a34c4f92831f16216beb33f2.parquet\n", - "/app/data/idx=4/year=2000/month=1/b7f16808e8e4491e8f86d3ae9766f2b9.parquet\n", - "/app/data/idx=4/year=2000/month=1/b9a2d05a74a84d71a1b65a0f05895011.parquet\n", - "/app/data/idx=4/year=2000/month=1/bc3036cc653e4584893f8b36e33c8f85.parquet\n", - "/app/data/idx=4/year=2000/month=1/be1318c7564d48be8435c11344627932.parquet\n", - "/app/data/idx=4/year=2000/month=1/bf90009dc7b14cfaab939f435d975a0b.parquet\n", - "/app/data/idx=4/year=2000/month=1/c0105d7e54fc42dc93d5140782960815.parquet\n", - "/app/data/idx=4/year=2000/month=1/c450cbe2674e488d8e30953252bc7a4b.parquet\n", - "/app/data/idx=4/year=2000/month=1/c82d37b18d65434ca1fe1b9cf4d29ccb.parquet\n", - "/app/data/idx=4/year=2000/month=1/cb53085f9145493b9a171d31b682e75f.parquet\n", - "/app/data/idx=4/year=2000/month=1/cc14bf7a74c9498889bc52e29f83edff.parquet\n", - "/app/data/idx=4/year=2000/month=1/ce3c90dd7e7a4f5a862580c14aa22c28.parquet\n", - "/app/data/idx=4/year=2000/month=1/d47149e3e1e34123a48f623ca121e8a8.parquet\n", - "/app/data/idx=4/year=2000/month=1/d9e0e3e786a942f5892c6ce17b37eb4a.parquet\n", - "/app/data/idx=4/year=2000/month=1/dbb82450694e4e76ab34f3e650d36594.parquet\n", - "/app/data/idx=4/year=2000/month=1/dc67b56f0c814648b9ebf8e1c483b923.parquet\n", - "/app/data/idx=4/year=2000/month=1/de2e16496bcd405b8d48aec4da4d5ae4.parquet\n", - "/app/data/idx=4/year=2000/month=1/de37409ac14b49c38c9c0da26d6c721f.parquet\n", - "/app/data/idx=4/year=2000/month=1/e8072594944141a5b078b74e739307d3.parquet\n", - "/app/data/idx=4/year=2000/month=1/e9b36b985eb44b44a5436af438f7ceb0.parquet\n", - "/app/data/idx=4/year=2000/month=1/eefdd2cdde1d4085964d1469a11f462c.parquet\n", - "/app/data/idx=4/year=2000/month=1/f89525bf20e540f29b021ce5f4d9eb3c.parquet\n", - "/app/data/idx=4/year=2000/month=1/fab7e098a4c8489785225a74b71ec2ef.parquet\n" - ] - } - ], - "source": [ - "dir_name = \"/app/data\"\n", - "\n", - "# Read data back.\n", - "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "print(\"\\n\".join(dataset.files))" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "ba4d7dc4", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:17:50.865185Z", - "start_time": "2021-06-16T11:17:50.378460Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " instr val1 val2 idx year month\n", - "2000-01-09 00:00:00-05:00 A 99 54 0 2000 1\n", - "2000-01-13 09:30:00-05:00 A 99 62 0 2000 1\n", - "2000-01-13 09:35:00-05:00 A 54 76 0 2000 1\n", - "# df.shape=\n", - "(18075, 6)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "year int32\n", - "month int32\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Read everything.\n", - "df2 = dataset.to_table().to_pandas()\n", - "\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "68e84388", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:05:43.018220Z", - "start_time": "2021-06-16T11:05:43.007510Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['A' 'B' 'C' 'D' 'E']\n", - "DatetimeIndex(['2000-01-06 00:00:00-05:00', '2000-01-10 00:00:00-05:00',\n", - " '2000-01-01 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", - " '2000-01-08 00:00:00-05:00', '2000-01-12 00:00:00-05:00',\n", - " '2000-01-09 00:00:00-05:00', '2000-01-02 00:00:00-05:00',\n", - " '2000-01-14 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", - " '2000-01-07 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", - " '2000-01-15 00:00:00-05:00', '2000-01-05 00:00:00-05:00',\n", - " '2000-01-11 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", - " '2000-01-05 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", - " '2000-01-02 00:00:00-05:00', '2000-01-14 00:00:00-05:00',\n", - " '2000-01-12 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", - " '2000-01-13 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", - " '2000-01-07 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", - " '2000-01-08 00:00:00-05:00', '2000-01-10 00:00:00-05:00',\n", - " '2000-01-11 00:00:00-05:00', '2000-01-09 00:00:00-05:00',\n", - " '2000-01-02 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", - " '2000-01-05 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", - " '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", - " '2000-01-10 00:00:00-05:00', '2000-01-11 00:00:00-05:00',\n", - " '2000-01-14 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", - " '2000-01-07 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", - " '2000-01-08 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", - " '2000-01-09 00:00:00-05:00', '2000-01-08 00:00:00-05:00',\n", - " '2000-01-14 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", - " '2000-01-03 00:00:00-05:00', '2000-01-02 00:00:00-05:00',\n", - " '2000-01-04 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", - " '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", - " '2000-01-10 00:00:00-05:00', '2000-01-07 00:00:00-05:00',\n", - " '2000-01-05 00:00:00-05:00', '2000-01-11 00:00:00-05:00',\n", - " '2000-01-09 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", - " '2000-01-11 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", - " '2000-01-14 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", - " '2000-01-10 00:00:00-05:00', '2000-01-09 00:00:00-05:00',\n", - " '2000-01-12 00:00:00-05:00', '2000-01-07 00:00:00-05:00',\n", - " '2000-01-06 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", - " '2000-01-01 00:00:00-05:00', '2000-01-08 00:00:00-05:00',\n", - " '2000-01-02 00:00:00-05:00', '2000-01-05 00:00:00-05:00',\n", - " '2000-01-15 00:00:00-05:00'],\n", - " dtype='datetime64[ns, America/New_York]', freq=None)\n" - ] - } - ], - "source": [ - "print(df2[\"instr\"].unique())\n", - "print(df2.index)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "205.6px" - }, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py deleted file mode 100644 index d7d5f9e56..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py +++ /dev/null @@ -1,304 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# Show Parquet / Pyarrow API. - -# %% [markdown] -# ## Imports - -# %% -import logging -import os -import random - -import pandas as pd -import pyarrow as pa -import pyarrow.dataset as ds -import pyarrow.parquet as pq -from pyarrow.dataset import DirectoryPartitioning - -import helpers.hdbg as hdbg -import helpers.hio as hio - -hdbg.init_logger(verbosity=logging.INFO) -_LOG = logging.getLogger(__name__) - - -# %% -def get_df() -> pd.DataFrame: - """ - Create pandas random data, like: - - ``` - idx instr val1 val2 - 2000-01-01 0 A 99 30 - 2000-01-02 0 A 54 46 - 2000-01-03 0 A 85 86 - ``` - """ - instruments = "A B C D E".split() - "id stock val1 val2".split() - df_idx = pd.date_range( - pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-15"), freq="1D" - ) - # print(df_idx) - random.seed(1000) - - df = [] - for idx, inst in enumerate(instruments): - df_tmp = pd.DataFrame( - { - "idx": idx, - "instr": inst, - "val1": [random.randint(0, 100) for k in range(len(df_idx))], - "val2": [random.randint(0, 100) for k in range(len(df_idx))], - }, - index=df_idx, - ) - # print(df_tmp) - df.append(df_tmp) - df = pd.concat(df) - return df - - -# %% -def df_to_str(df: pd.DataFrame) -> str: - txt = "" - txt += "# df=\n%s" % df.head(3) - txt += "\n# df.shape=\n%s" % str(df.shape) - txt += "\n# df.dtypes=\n%s" % str(df.dtypes) - return txt - - -# %% [markdown] -# # Save and load all data in one file - -# %% -df = get_df() -# print(df.head()) -print(df_to_str(df)) - -# %% -table = pa.Table.from_pandas(df) - -print("table=\n%s" % table) - -# %% -# Save. -file_name = "df_in_one_file.pq" -pq.write_table(table, file_name) - -# %% -# Load. -df2 = pq.read_table(file_name) -print(df2) - -df2 = df2.to_pandas() -print(df_to_str(df2)) - -# %% [markdown] -# ## Read a subset of columns - -# %% -df2 = pq.read_table(file_name, columns=["idx", "val1"]) -print(df2) - -df2 = df2.to_pandas() -print(df_to_str(df2)) - -# %% [markdown] -# ## Partitioned dataset -# -# from https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data -# -# - A dataset can exploit a nested structure, where the sub-dir names hold information about which subset of the data is stored in that dir -# - E.g., "Hive" patitioning scheme "key=vale" dir names - -# %% -df = get_df() -print(df_to_str(df)) - -# %% -base = "." -dir_name = os.path.join(base, "parquet_dataset_partitioned") -os.system("rm -rf %s" % dir_name) - -pq.write_to_dataset(table, dir_name, partition_cols=["idx"]) - -# %% -# !ls parquet_dataset_partitioned - -# %% -# Read data back. -dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") - -print("\n".join(dataset.files)) - -# %% -# Read everything. -df2 = dataset.to_table().to_pandas() - -print(df_to_str(df2)) - -# %% -# Load part of the data. - -df2 = dataset.to_table(filter=ds.field("idx") == 1).to_pandas() -print(df_to_str(df2)) - -df2 = dataset.to_table(filter=ds.field("idx") < 3).to_pandas() -print(df_to_str(df2)) - -# %% [markdown] -# ## Add year-month partitions - -# %% -df = get_df() -df["year"] = df.index.year -df["month"] = df.index.month - -print(df_to_str(df)) - -# %% -table = pa.Table.from_pandas(df) - -print("table=\n%s" % table) - -# %% -base = "." -dir_name = os.path.join(base, "pq_partitioned2") -os.system("rm -rf %s" % dir_name) - -pq.write_to_dataset(table, dir_name, partition_cols=["idx", "year", "month"]) - -# %% -# !ls $dir_name - -# %% -# !ls $dir_name/idx=0/year=2000/month=1 - -# %% -# Read data back. -dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") - -print("\n".join(dataset.files)) - -# %% -# Read data back. -dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") - -df2 = dataset.to_table(filter=ds.field("idx") == 2).to_pandas() -print(df_to_str(df2)) - -# %% -# We could scan manually and create the dirs manually if we don't want to add -# add a new dir. -base = "." -dir_name = os.path.join(base, "parquet_dataset_partitioned2") -os.system("rm -rf %s" % dir_name) - -schemas = [] - -schema = pa.Table.from_pandas(df).schema -print(schema) -# assert 0 -# idx: int64 -# instr: string -# val1: int64 -# val2: int64 -# year: int64 -# month: int64 - -# grouped = df.groupby(lambda x: x.day) -group_by_idx = df.groupby("idx") -for idx, df_tmp in group_by_idx: - _LOG.debug("idx=%s -> df.shape=%s", idx, str(df_tmp.shape)) - # - group_by_year = df_tmp.groupby(lambda x: x.year) - for year, df_tmp2 in group_by_year: - _LOG.debug("year=%s -> df.shape=%s", year, str(df_tmp2.shape)) - # - group_by_month = df_tmp2.groupby(lambda x: x.month) - for month, df_tmp3 in group_by_month: - _LOG.debug("month=%s -> df.shape=%s", month, str(df_tmp3.shape)) - # file_name = "df_in_one_file.pq" - # pq.write_table(table, file_name) - # /app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet - subdir_name = os.path.join( - dir_name, f"idx={idx}", f"year={year}", f"month={month}" - ) - table = pa.Table.from_pandas(df_tmp3, schema=schema) - schemas.append(table.schema) - # print(df_tmp3) - # print(table.schema) - # pq.write_to_dataset(table, - # subdir_name, schema=schema) - file_name = os.path.join(subdir_name, "df_out.pq") - hio.create_enclosing_dir(file_name) - pq.write_table(table, file_name) - -# %% -schemas[0] == schemas[4] - -# %% -schemas - -# %% - -# %% -# !ls $dir_name/idx=0/year=2000/month=1 - -# %% -# Read data back. -# https://github.com/dask/dask/issues/4194 -# src_dir = f"{dir_name}/idx=0/year=2000/month=1" -src_dir = f"{dir_name}/idx=0/year=2000" -dataset = ds.dataset(src_dir, format="parquet", partitioning="hive") - -df2 = dataset.to_table().to_pandas() -# print(df_to_str(df2)) -print("\n".join(dataset.files)) - -# %% [markdown] -# ## Partition manually - -# %% -partitioning = DirectoryPartitioning( - pa.schema([("year", pa.int16()), ("month", pa.int8()), ("day", pa.int8())]) -) -print(partitioning.parse("/2009/11/3")) - -# partitioning.discover() - -# %% -# !ls /app/data - -# %% -dir_name = "/app/data" - -# Read data back. -dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") - -print("\n".join(dataset.files)) - -# %% -# Read everything. -df2 = dataset.to_table().to_pandas() - -print(df_to_str(df2)) - -# %% -print(df2["instr"].unique()) -print(df2.index) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb deleted file mode 100644 index 6dcf8078c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb +++ /dev/null @@ -1,210 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "81a273af", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:42.864614Z", - "start_time": "2021-06-16T11:41:42.860710Z" - } - }, - "outputs": [], - "source": [ - "# https://s3fs.readthedocs.io/en/latest/" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "8fef0639", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:42.888158Z", - "start_time": "2021-06-16T11:41:42.869135Z" - } - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 3" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "37fe11a3", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.140014Z", - "start_time": "2021-06-16T11:41:42.890655Z" - } - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import helpers.hs3 as hs3" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a4130a2c", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.145271Z", - "start_time": "2021-06-16T11:41:43.141535Z" - } - }, - "outputs": [], - "source": [ - "aws_profile = \"am\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a49a28ff", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.158474Z", - "start_time": "2021-06-16T11:41:43.148428Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "aws_region=%s us-east-1\n" - ] - } - ], - "source": [ - "# s3 = s3fs.S3FileSystem(anon=False, key=aws_access_key_id, secret=aws_secret_access_key)\n", - "\n", - "s3 = hs3.get_s3fs(aws_profile)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "1795133f", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.819759Z", - "start_time": "2021-06-16T11:41:43.160432Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bucket=alphamatic-data\n" - ] - }, - { - "data": { - "text/plain": [ - "['alphamatic-data/README.md', 'alphamatic-data/data']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bucket = hs3.get_s3_bucket_path(aws_profile, add_s3_prefix=False)\n", - "print(\"bucket=\" + bucket)\n", - "s3.ls(bucket)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "9bc9623e", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.828493Z", - "start_time": "2021-06-16T11:41:43.822315Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['alphamatic-data/README.md', 'alphamatic-data/data']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s3.ls(bucket)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "65f95a8a", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.839153Z", - "start_time": "2021-06-16T11:41:43.832520Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "print(os.environ[\"AWS_DEFAULT_REGION\"])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py deleted file mode 100644 index 65aa9d9f8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py +++ /dev/null @@ -1,44 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 -# language: python -# name: python3 -# --- - -# %% -# https://s3fs.readthedocs.io/en/latest/ - -# %% -# %load_ext autoreload -# %autoreload 3 - -# %% -import os - -import helpers.hs3 as hs3 - -# %% -aws_profile = "am" - -# %% -# s3 = s3fs.S3FileSystem(anon=False, key=aws_access_key_id, secret=aws_secret_access_key) - -s3 = hs3.get_s3fs(aws_profile) - -# %% -bucket = hs3.get_s3_bucket_path(aws_profile, add_s3_prefix=False) -print("bucket=" + bucket) -s3.ls(bucket) - -# %% -s3.ls(bucket) - -# %% -print(os.environ["AWS_DEFAULT_REGION"]) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb deleted file mode 100644 index 9f3df144d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb +++ /dev/null @@ -1,448 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "ae351077", - "metadata": {}, - "source": [ - "# Maple\n", - "\n", - "https://www.sagemath.org/" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "67b105e6", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T08:59:38.281663Z", - "start_time": "2022-11-24T08:59:32.166395Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting sagemath\n", - " Downloading sagemath-1.3.0.tar.gz (9.4 kB)\n", - "Collecting cython>=0.26\n", - " Downloading Cython-0.29.32-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (1.9 MB)\n", - "\u001b[K |████████████████████████████████| 1.9 MB 3.2 MB/s eta 0:00:01\n", - "\u001b[?25hBuilding wheels for collected packages: sagemath\n", - " Building wheel for sagemath (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for sagemath: filename=sagemath-1.3.0-py3-none-any.whl size=9330 sha256=eb8efd936116026e66a021d4bdd88dc4d9ce207fd633706229625d26878de267\n", - " Stored in directory: /root/.cache/pip/wheels/da/63/1f/6dc0b464e0fec31a0d318d11748e11be903fe893fd6fb713fe\n", - "Successfully built sagemath\n", - "Installing collected packages: cython, sagemath\n", - "Successfully installed cython-0.29.32 sagemath-1.3.0\n" - ] - } - ], - "source": [ - "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install sagemath)\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "70f1c613", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "8dd49c0c", - "metadata": {}, - "source": [ - "# Sympy" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "bab397f4", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T08:59:31.082906Z", - "start_time": "2022-11-24T08:59:08.303577Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting sympy\n", - " Downloading sympy-1.11.1-py3-none-any.whl (6.5 MB)\n", - "\u001b[K |████████████████████████████████| 6.5 MB 4.4 MB/s eta 0:00:01\n", - "\u001b[?25hCollecting mpmath>=0.19\n", - " Downloading mpmath-1.2.1-py3-none-any.whl (532 kB)\n", - "\u001b[K |████████████████████████████████| 532 kB 6.2 MB/s eta 0:00:01\n", - "\u001b[?25hInstalling collected packages: mpmath, sympy\n", - "Successfully installed mpmath-1.2.1 sympy-1.11.1\n" - ] - } - ], - "source": [ - "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install sympy)\"" - ] - }, - { - "cell_type": "markdown", - "id": "c32a78b2", - "metadata": {}, - "source": [ - "## Features\n", - "\n", - "https://docs.sympy.org/latest/tutorials/intro-tutorial/features.html#" - ] - }, - { - "cell_type": "markdown", - "id": "547104ae", - "metadata": {}, - "source": [ - "## Logic\n", - "\n", - "https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html#what-is-symbolic-computation" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "016ffec6", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T08:59:48.097485Z", - "start_time": "2022-11-24T08:59:47.660109Z" - } - }, - "outputs": [], - "source": [ - "import sympy\n", - "from sympy import * # noqa: F403" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "15a65c7c", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:01:58.628860Z", - "start_time": "2022-11-24T09:01:58.614742Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle y \\vee \\left(x \\wedge y\\right)$" - ], - "text/plain": [ - "y | (x & y)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x, y = sympy.symbols(\"x,y\")\n", - "y | (x & y)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "c016e526", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:02:16.425181Z", - "start_time": "2022-11-24T09:02:16.418742Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle x \\Rightarrow y$" - ], - "text/plain": [ - "Implies(x, y)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x >> y" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "961ab5b7", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:02:36.687945Z", - "start_time": "2022-11-24T09:02:36.681518Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle \\text{True}$" - ], - "text/plain": [ - "True" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Evaluate an expression.\n", - "(y & x).subs({x: True, y: True})" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "d36a6df4", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:03:53.122377Z", - "start_time": "2022-11-24T09:03:53.108926Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle \\left(x \\wedge \\neg w\\right) \\vee \\left(y \\wedge z \\wedge \\neg x\\right)$" - ], - "text/plain": [ - "(x & ~w) | (y & z & ~x)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "w, x, y, z = sympy.symbols(\"w x y z\")\n", - "minterms = [{w: 0, x: 1}, {y: 1, z: 1, x: 0}]\n", - "sympy.SOPform([w, x, y, z], minterms)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "351f8a29", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:04:52.260031Z", - "start_time": "2022-11-24T09:04:52.244286Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle \\neg x \\wedge \\neg y$" - ], - "text/plain": [ - "~x & ~y" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "b = (~x & ~y & ~z) | (~x & ~y & z)\n", - "sympy.simplify_logic(b)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "6997a50b", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:07:01.070407Z", - "start_time": "2022-11-24T09:07:01.063092Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0, 0] -> True\n", - "[0, 1] -> True\n", - "[1, 0] -> False\n", - "[1, 1] -> True\n" - ] - } - ], - "source": [ - "# Compute truth table.\n", - "from sympy.logic.boolalg import truth_table # noqa: E402\n", - "\n", - "table = truth_table(x >> y, [x, y])\n", - "for t in table:\n", - " print(f\"{t[0]} -> {t[1]}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "c70e51cf", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:08:01.433951Z", - "start_time": "2022-11-24T09:08:01.298800Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sympy.satisfiable(x & ~x)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "f9d0eda7", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:08:14.379803Z", - "start_time": "2022-11-24T09:08:14.364702Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{y: True, x: True}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sympy.satisfiable((x | y) & (x | ~y) & (~x | y))" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "782bd93c", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:28:42.188931Z", - "start_time": "2022-11-24T09:28:42.124276Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{L: True, Q: True, B: False, N: False}" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# - (not L => Q and B and N)\n", - "# - (N => not L)\n", - "# - not Q => B\n", - "# - not B\n", - "\n", - "L, N, Q, B = sympy.symbols(\"L N Q B\")\n", - "\n", - "C = (\n", - " sympy.Implies(~L, Q & B & N)\n", - " & sympy.Implies(N, ~L)\n", - " & sympy.Implies(~Q, B)\n", - " & ~B\n", - ")\n", - "sympy.satisfiable(C)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1298f34b", - "metadata": {}, - "outputs": [], - "source": [ - "## Stats\n", - "\n", - "# https://docs.sympy.org/latest/modules/stats.html#" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py deleted file mode 100644 index bd5b8a5aa..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py +++ /dev/null @@ -1,98 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# # Maple -# -# https://www.sagemath.org/ - -# %% -# !sudo /bin/bash -c "(source /venv/bin/activate; pip install sagemath)" - -# %% - -# %% [markdown] -# # Sympy - -# %% -# !sudo /bin/bash -c "(source /venv/bin/activate; pip install sympy)" - -# %% [markdown] -# ## Features -# -# https://docs.sympy.org/latest/tutorials/intro-tutorial/features.html# - -# %% [markdown] -# ## Logic -# -# https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html#what-is-symbolic-computation - -# %% -import sympy -from sympy import * # noqa: F403 - -# %% -x, y = sympy.symbols("x,y") -y | (x & y) - -# %% -x >> y - -# %% -# Evaluate an expression. -(y & x).subs({x: True, y: True}) - -# %% -w, x, y, z = sympy.symbols("w x y z") -minterms = [{w: 0, x: 1}, {y: 1, z: 1, x: 0}] -sympy.SOPform([w, x, y, z], minterms) - -# %% -b = (~x & ~y & ~z) | (~x & ~y & z) -sympy.simplify_logic(b) - -# %% -# Compute truth table. -from sympy.logic.boolalg import truth_table # noqa: E402 - -table = truth_table(x >> y, [x, y]) -for t in table: - print(f"{t[0]} -> {t[1]}") - -# %% -sympy.satisfiable(x & ~x) - -# %% -sympy.satisfiable((x | y) & (x | ~y) & (~x | y)) - -# %% -# - (not L => Q and B and N) -# - (N => not L) -# - not Q => B -# - not B - -L, N, Q, B = sympy.symbols("L N Q B") - -C = ( - sympy.Implies(~L, Q & B & N) - & sympy.Implies(N, ~L) - & sympy.Implies(~Q, B) - & ~B -) -sympy.satisfiable(C) - -# %% -## Stats - -# https://docs.sympy.org/latest/modules/stats.html# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py deleted file mode 100644 index 7550952ca..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py +++ /dev/null @@ -1,192 +0,0 @@ -""" -Import as: - -import helpers.old.conda as holdcond -""" - -import json -import logging -import os -from typing import Any, Dict, List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hsystem as hsystem -import helpers.old.user_credentials as holuscre - -_LOG = logging.getLogger(__name__) - - -def conda_system(cmd: str, *args: Any, **kwargs: Any) -> int: - """ - When running a conda command we need to execute a script to configure - conda. This script is typically executed in .bashrc but here we create a - new bash shell every time to execute a command, so we need to re-initialize - the shell before any conda command. - - :param cmd: - :param args: - :param kwargs: - :return: - """ - # TODO(gp): Pass conda_env_name as done in get_conda_list() - path = holuscre.get_credentials()["conda_sh_path"] - hdbg.dassert_path_exists(path) - hdbg.dassert(os.path.isfile(path), "'%s' is not a file", path) - cmd = f"source {path} && {cmd}" - output: int = hsystem.system(cmd, *args, **kwargs) - return output - - -def conda_system_to_string( - cmd: str, *args: Any, **kwargs: Any -) -> Tuple[int, str]: - path = holuscre.get_credentials()["conda_sh_path"] - hdbg.dassert_path_exists(path) - hdbg.dassert(os.path.isfile(path), "'%s' is not a file", path) - cmd = f"source {path} && {cmd}" - output: Tuple[int, str] = hsystem.system_to_string(cmd, *args, **kwargs) - return output - - -def get_conda_envs_dirs() -> List[str]: - """ - :return: list of the env dirs from conda - """ - _, ret = conda_system_to_string(r"conda config --show envs_dirs --json") - _LOG.debug("ret=%s", ret) - envs = json.loads(ret) - hdbg.dassert_in("envs_dirs", envs) - envs = envs["envs_dirs"] - hdbg.dassert_isinstance(envs, list) - return list(envs) - - -def set_conda_env_root(conda_env_path: str) -> None: - """ - Set conda env dirs so that it matches what specified in. - - > conda config --show envs_dirs --json - { - "envs_dirs": [ - "/Users/gp/.conda/envs", - ] - } - - > conda config --prepend envs_dirs /data/gp_wd/anaconda2/envs2 - """ - envs = get_conda_envs_dirs() - # - if not envs or envs[0] != conda_env_path: - _LOG.warning( - "%s is not the first env dir in %s", conda_env_path, str(envs) - ) - # Reset the list of conda envs. - _LOG.debug("Resetting envs_dir %s", str(envs)) - for env in envs: - _LOG.debug("Deleting %s", env) - cmd = f"conda config --remove envs_dirs {env}" - # We don't abort because of a bug in conda not deleting the key - # when asked for. - # CondaKeyError: 'envs_dirs': u'/data/shared/anaconda2/envs' is not - # in the u'envs_dirs' key of the config file - conda_system(cmd, abort_on_error=False) - envs = get_conda_envs_dirs() - _LOG.debug("Current envs: %s", str(envs)) - # Add the conda env. - cmd = f"conda config --prepend envs_dirs {conda_env_path}" - conda_system(cmd) - # Check. - envs = get_conda_envs_dirs() - hdbg.dassert( - envs or envs[0] != conda_env_path, - msg=f"{conda_env_path} is not first env dir in {envs}", - ) - else: - _LOG.debug( - "Nothing to do, since %s is already in %s", conda_env_path, envs - ) - - -def get_conda_info_envs() -> Tuple[dict, None]: - """ - :return: (env_dict, active_env) - - env_dict: map 'conda env name -> conda env path' - - active_env: name of the active conda env - """ - # > conda info --envs - # # conda environments: - # # - # aws /Users/gp/.conda/envs/aws - # bbg /Users/gp/.conda/envs/bbg - # deeplearning /Users/gp/.conda/envs/deeplearning - # jupyter /Users/gp/.conda/envs/jupyter - # test_conda /Users/gp/.conda/envs/test_conda - # TODO(gp): Use --json but we need to parse the json without any module. - ret = conda_system_to_string(r"conda info --envs")[1] - _LOG.debug("Parsing conda info\n%s", ret) - ret = ret.split("\n") - env_dict = {} - active_env = None - for line in ret: - line = line.rstrip().lstrip() - if line == "": - continue - if line.startswith("#"): - continue - vals = line.split() - if len(vals) == 2: - env_name, env_path = vals - env_dict[env_name] = env_path - elif len(vals) == 3: - env_name, star, env_path = vals - hdbg.dassert_eq(star, "*") - env_dict[env_name] = env_path - else: - _LOG.debug("Can't parse line='%s'", line) - return env_dict, active_env - - -def get_conda_list(conda_env_name: str) -> Dict[str, Dict[str, str]]: - """ - :return: env_dict mapping package name to their info - - env_dict: map 'conda env name -> conda env path' - - active_env: name of the active conda env - """ - # > conda list - # # packages in environment at /Users/gp/.conda/envs/: - # # - # # Name Version Build Channel - # absl-py 0.5.0 py_0 conda-forge - # agate 1.6.0 py_3 conda-forge - # agate-dbf 0.2.0 py27_0 conda-forge - # agate-excel 0.2.2 py_0 conda-forge - # TODO(gp): Use --json but we need to parse the json without any module. - cmd = rf"(conda activate {conda_env_name} 2>&1) >/dev/null && conda list" - ret = conda_system_to_string(cmd)[1] - ret = ret.split("\n") - env_dict = {} - labels = {1: "version", 2: "build", 3: "channel"} - for line in ret: - line = line.rstrip().lstrip() - _LOG.debug("line='%s'", line) - if line == "": - continue - if line.startswith("#"): - continue - vals = line.split() - env_dict[vals[0]] = {labels[k]: vals[k] for k in range(1, len(vals[:4]))} - return env_dict - - -_CONDA_PATH = None - - -def get_conda_path() -> Optional[str]: - global _CONDA_PATH - if not _CONDA_PATH: - rc, txt = conda_system_to_string("which conda", abort_on_error=False) - if rc == 0: - _CONDA_PATH = str(txt) - else: - _CONDA_PATH = "n/a" - return _CONDA_PATH diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py deleted file mode 100644 index 5b0445a31..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py +++ /dev/null @@ -1,17 +0,0 @@ -import pathlib -from typing import Any, Optional - - -def pytest_ignore_collect( # type: ignore - collection_path: pathlib.Path, path: Any, config: Any -) -> Optional[bool]: - """ - Skip all tests in this directory. - - :param collection_path: path to analyze - :param path: path to analyze (deprecated) - :param config: pytest config object - :return: True if the path should be ignored - """ - # Ignore this directory and all its subdirectories. - return True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py deleted file mode 100644 index f51cb5d8d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Import as: - -import helpers.old.env2 as holdenv2 -""" - -import logging -import os -from typing import Tuple - -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.old.conda as holdcond - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -def get_system_info(add_frame: bool) -> str: - msg = "" - if add_frame: - msg += hprint.frame("System info") + "\n" - msg += f"user name={hsystem.get_user_name()}\n" - msg += f"server name={hsystem.get_server_name()}\n" - msg += f"os name={hsystem.get_os_name()}\n" - msg += f"conda path={holdcond.get_conda_path()}\n" - msg += f"conda env root={str(holdcond.get_conda_envs_dirs())}\n" - return msg - - -def get_package_summary(conda_env_name: str, add_frame: bool) -> str: - msg = "" - if add_frame: - msg += hprint.frame("Package summary") + "\n" - conda_list = holdcond.get_conda_list(conda_env_name) - msg = "" - for package in ["pandas", "numpy", "scipy", "arrow-cpp"]: - ver = conda_list[package]["version"] if package in conda_list else "None" - line = f"{package}: {ver}" - msg += line + "\n" - return msg - - -def get_conda_export_list(conda_env_name: str, add_frame: bool) -> str: - msg = "" - if add_frame: - msg += hprint.frame("Package summary") + "\n" - cmd = rf"(conda activate {conda_env_name} 2>&1 >/dev/null) && conda list --export" - _, msg_tmp = holdcond.conda_system_to_string(cmd) - msg += msg_tmp - return msg - - -def save_env_file(conda_env_name: str, dir_name: str) -> Tuple[str, str]: - msg = "" - msg += get_system_info(add_frame=True) - msg += get_package_summary(conda_env_name, add_frame=True) - msg += get_conda_export_list(conda_env_name, add_frame=True) - # Save results. - if dir_name is not None: - file_name = ( - f"{conda_env_name}.{hsystem.get_user_name()}.{hsystem.get_os_name()}." - f"{hsystem.get_server_name()}.txt" - ) - dst_file = os.path.join(dir_name, file_name) - dst_file = os.path.abspath(dst_file) - hio.create_enclosing_dir(dst_file, incremental=True) - _LOG.info("Saving conda env signature to '%s'", dst_file) - hio.to_file(dst_file, msg) - else: - dst_file = None - return msg, dst_file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py deleted file mode 100644 index a9d6b4f46..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py +++ /dev/null @@ -1,267 +0,0 @@ -""" -Import as: - -import helpers.old.tunnels as holdtunn -""" - -import logging -import os -from typing import Any, Dict, List, Tuple, Union, cast - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.old.user_credentials as holuscre - -_LOG = logging.getLogger(__name__) - - -def _get_services_info() -> list: - # Server ports. - services = [ - # service name, server public IP, local port, remote port. - ("MongoDb", hsystem.get_env_var("OLD_DEV_SERVER"), 27017, 27017), - ("Jenkins", hsystem.get_env_var("JENKINS_SERVER"), 8080, 8080), - # ("Reviewboard", hsystem.get_env_var("REVIEWBOARD_SERVER"), 8000, 8000), - # ("Doc server", hsystem.get_env_var("REVIEWBOARD_SERVER"), 8001, 80), - # Netdata to Jenkins and Dev server. - # ("Dev system performance", DEV_SERVER, 19999), - # ("Jenkins system performance", DEV_SERVER, 19999), - ] - return services - - -# ############################################################################# - - -def get_tunnel_info() -> Tuple[list, str]: - credentials = holuscre.get_credentials() - # - tunnel_info = credentials["tunnel_info"] - hdbg.dassert_is_not(tunnel_info, None) - # Add tunnels for standard services. - services = _get_services_info() - tunnel_info.extend(services) - # - ssh_key_path = credentials["ssh_key_path"] - hdbg.dassert_is_not(ssh_key_path, None) - # TODO(gp): Add check to make sure that the source ports are all different. - return tunnel_info, ssh_key_path - - -def tunnel_info_to_string(tunnel_info: list) -> str: - ret = "\n".join(map(str, tunnel_info)) - ret = hprint.indent(ret) - return ret - - -def parse_service( - service: Tuple[str, str, int, int], -) -> Dict[str, Union[str, int]]: - hdbg.dassert_eq(len(service), 4, "service=%s", service) - service_name, server, local_port, remote_port = service - return { - "service_name": service_name, - "server": server, - "local_port": local_port, - "remote_port": remote_port, - } - - -def find_service( - service_name: str, tunnel_info: list -) -> Tuple[str, str, int, int]: - found_service = False - for service in tunnel_info: - if service_name == parse_service(service)["service_name"]: - hdbg.dassert(not found_service) - found_service = True - ret: Tuple[str, str, int, int] = service - hdbg.dassert(found_service) - return ret - - -def get_server_ip(service_name: str) -> str: # pylint: disable=unused-argument - tunnel_info, _ = get_tunnel_info() - _LOG.debug("tunnels=\n%s", tunnel_info_to_string(tunnel_info)) - service = find_service("Doc server", tunnel_info) - server = parse_service(service)["server"] - server = cast(str, server) - return server - - -def _get_tunnel_info() -> Tuple[Any, str]: - credentials = holuscre.get_credentials() - # - tunnel_info = credentials["tunnel_info"] - hdbg.dassert_is_not(tunnel_info, None) - # Add tunnels for standard services. - services = _get_services_info() - tunnel_info.extend(services) - # - ssh_key_path = credentials["ssh_key_path"] - hdbg.dassert_is_not(ssh_key_path, None) - # TODO(gp): Add check to make sure that the source ports are all different. - return tunnel_info, ssh_key_path - - -def _tunnel_info_to_string(tunnel_info: list) -> str: - ret = "\n".join(map(str, tunnel_info)) - ret = hprint.indent(ret) - return ret - - -def _service_to_string(service: Tuple[str, str, str, str]) -> str: - service_name, server, local_port, remote_port = service - ret = ( - f"tunnel for service '{service_name}'" - + f" server='{server}'" - + f" port='{local_port}->{remote_port}'" - ) - return ret - - -# ############################################################################# - - -def _get_ssh_tunnel_process( - local_port: int, remote_port: int, fuzzy_match: bool -) -> Tuple[List[int], str]: - """ - Return the pids of the processes attached to a given port. - """ - - def _keep_line(line: str) -> bool: - keep = "ssh -i" in line - if keep: - if fuzzy_match: - keep = (f" {local_port}:localhost " in line) or ( - f" localhost:{remote_port} " in line - ) - else: - keep = f" {local_port}:localhost:{remote_port} " in line - return keep - - _LOG.debug("local_port=%d -> remote_port=%d", local_port, remote_port) - pids, txt = hsystem.get_process_pids(_keep_line) - _LOG.debug("pids=%s", pids) - _LOG.debug("txt=\n%s", txt) - return pids, txt - - -def _create_tunnel( - server_name: str, - local_port: int, - remote_port: int, - user_name: str, - ssh_key_path: str, -) -> None: - """ - Create tunnel from localhost to 'server' for the ports `local_port -> - remote_port` and `user_name`. - """ - ssh_key_path = os.path.expanduser(ssh_key_path) - _LOG.debug("ssh_key_path=%s", ssh_key_path) - hdbg.dassert_path_exists(ssh_key_path) - # - cmd = ( - "ssh -i {ssh_key_path} -f -nNT -L {local_port}:localhost:{remote_port}" - + " {user_name}@{server}" - ) - cmd = cmd.format( - user_name=user_name, - ssh_key_path=ssh_key_path, - local_port=local_port, - remote_port=remote_port, - server=server_name, - ) - hsystem.system(cmd, blocking=False) - # Check that the tunnel is up and running. - pids = _get_ssh_tunnel_process(local_port, remote_port, fuzzy_match=True) - hdbg.dassert_lte(1, len(pids)) - - -def _kill_ssh_tunnel_process(local_port: int, remote_port: int) -> None: - """ - Kill all the processes attached to either local or remote port. - """ - get_pids = lambda: _get_ssh_tunnel_process( - local_port, remote_port, fuzzy_match=True - ) - hsystem.kill_process(get_pids) - - -# ############################################################################# - - -def start_tunnels(user_name: str) -> None: - """ - Start all the tunnels for the given user. - """ - _LOG.debug("user_name=%s", user_name) - # Get tunnel info. - tunnel_info, ssh_key_path = _get_tunnel_info() - _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) - # - for service in tunnel_info: - _, server, local_port, remote_port = service - pids, _ = _get_ssh_tunnel_process( - local_port, remote_port, fuzzy_match=False - ) - if not pids: - _LOG.info("Starting %s", _service_to_string(service)) - _create_tunnel( - server, local_port, remote_port, user_name, ssh_key_path - ) - else: - _LOG.warning( - "%s already exists: skipping", _service_to_string(service) - ) - - -def stop_tunnels() -> None: - """ - Stop all the tunnels for the given user. - """ - # Get the tunnel info. - tunnel_info, _ = _get_tunnel_info() - _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) - # - for service in tunnel_info: - _, _, local_port, remote_port = service - _LOG.info("Stopping %s", _service_to_string(service)) - _kill_ssh_tunnel_process(local_port, remote_port) - - -def check_tunnels() -> None: - """ - Check the status of the tunnels for the given user. - """ - # Get the tunnel info. - tunnel_info, _ = _get_tunnel_info() - _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) - # - for service in tunnel_info: - _, _, local_port, remote_port = service - pids, _ = _get_ssh_tunnel_process( - local_port, remote_port, fuzzy_match=False - ) - if pids: - msg = f"exists with pid={pids}" - else: - msg = "doesn't exist" - _LOG.info("%s -> %s", _service_to_string(service), msg) - - -def kill_all_tunnel_processes() -> None: - """ - Kill all the processes that have `ssh -i ...:localhost:...". - """ - - # cmd = "ps ax | grep 'ssh -i' | grep localhost: | grep -v grep" - def _keep_line(line: str) -> bool: - keep = ("ssh -i" in line) and (":localhost:" in line) - return keep - - get_pids = lambda: hsystem.get_process_pids(_keep_line) - hsystem.kill_process(get_pids) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py deleted file mode 100755 index 5faded15d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py +++ /dev/null @@ -1,208 +0,0 @@ -#!/usr/bin/env python -""" -Import as: - -import helpers.old.user_credentials as holuscre -""" - -import argparse -import logging -import os -import pprint -from typing import Any, Dict, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hparser as hparser -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def get_dev_server_ip() -> str: - """ - Get the dev server name from the user environment. - """ - env_var_name = "" - if env_var_name not in os.environ: - _LOG.error( - "Can't find '%s': re-run dev_scripts/setenv.sh?", env_var_name - ) - raise RuntimeError - dev_server = os.environ[env_var_name] - return dev_server - - -# pylint: disable=too-many-statements -def get_credentials() -> Dict[str, Any]: - """ - Report information about a user set-up as a function of: 1) user name 2) - server name 3) git repository name. - - The mandatory information are: - 1) git_user_name - 2) git_user_email - 3) conda_sh_path: the path of the script bootstrapping conda - - To find "conda_sh_path": - > which conda - /data/root/anaconda3/bin/conda - > find /data/root/anaconda3 -name "conda.sh" - - In one instruction: - > CONDA_DIR=$(dirname $(which conda))"/.."; find $CONDA_DIR -name "conda.sh" - - If there are multiple ones you want to pick the one under - `profile.d`, e.g., `/anaconda3/etc/profile.d/conda.sh` - 4) conda_env_path: the path of the dir storing the conda environments - - To find "conda_env_path" - > conda info - ... - envs directories : /data/saggese/.conda/envs - - The optional information are: - 5) ssh_key_path: the path of the ssh key to use - 6) tunnel_info: list of "personal" ports to forward - - This is an advanced behavior that allows to specify in your user - config a set of ports to forward from one computer (typically your - laptop) to a set of services that are specific of your set-up (e.g., - started through `run_jupyter_server.py`) - - E.g., - ```python - if server_name in ("gpmac.local", "gpmac.lan"): - if git_repo_name == "": - service = ("Jupyter1", get_dev_server_ip(), 10003, 10003) - ``` - when GP runs `ssh_tunnels.py` from his laptop in a - `` client, a tunnel is open to the dev - server where `run_jupyter_server.py` will have started a notebook server - 7) jupyter_port: on which port to start a jupyter server on a specific server - - It's a good idea for everybody to have a different port to avoid port - collisions - 8) notebook_html_path: the path where to save html of notebooks - 9) notebook_backup_path: the path where to backup the source .ipynb code of - notebooks - """ - # - user_name = hsystem.get_user_name() - server_name = hsystem.get_server_name() - _LOG.debug("user_name='%s'", user_name) - _LOG.debug("server_name='%s'", server_name) - git_repo_name = hgit.get_repo_full_name_from_client(super_module=True) - # Values to assign. - git_user_name = "" - git_user_email = "" - conda_sh_path = "" - ssh_key_path = "~/.ssh/id_rsa" - tunnel_info: List[Tuple[str, str, str, str]] = [] - jupyter_port = -1 - notebook_html_path = "" - notebook_backup_path = "" - # - conda_env_path = "~/.conda/envs" - conda_env_path = os.path.expanduser(conda_env_path) - if server_name in (): - conda_sh_path = "/anaconda3/etc/profile.d/conda.sh" - if user_name == "saggese": - # GP. - git_user_name = "saggese" - git_user_email = "abc@xyz.com" - if server_name.startswith("gpmac") or server_name.startswith( - "giacintos-mbp" - ): - # Laptop. - conda_sh_path = "/Users/saggese/opt/anaconda3/etc/profile.d/conda.sh" - conda_env_path = "/Users/saggese/.conda/envs" - if git_repo_name == "": - # Forward port 10003 to the notebook server that is started by - # `run_jupyter_server.py` when executed on the dev server. - # service = ("Jupyter1", get_dev_server_ip(), 10003, 10003) - # tunnel_info.append(service) - # jupyter_port = 10001 - pass - elif server_name == "": - if git_repo_name == "": - jupyter_port = 10003 - else: - hdbg.dassert_ne(conda_sh_path, "") - elif user_name == "paul": - # Paul. - git_user_name = "paul" - git_user_email = "abc@xyz.com" - if server_name in ("Pauls-MacBook-Pro.local", "Pauls-MBP"): - conda_sh_path = "/Users/paul/anaconda3/etc/profile.d/conda.sh" - conda_env_path = "/Users/paul/.conda/envs" - # Check. - for var_name, val_name in [ - ("git_user_name", git_user_name), - ("git_user_email", git_user_email), - ("conda_sh_path", conda_sh_path), - ("conda_env_path", conda_env_path), - # We allow the rest of the variables (e.g., ssh_key_path, tunnel_info) to - # be empty since in some configurations they can be undefined. - ]: - hdbg.dassert_is_not( - val_name, - None, - "Undefined '%s': add your credentials for user_name='%s' and " - "server_name='%s' to '%s'", - var_name, - user_name, - server_name, - __file__, - ) - conda_sh_path = os.path.expanduser(conda_sh_path) - conda_sh_path = os.path.abspath(conda_sh_path) - hdbg.dassert_path_exists(conda_sh_path) - # - conda_env_path = os.path.abspath(os.path.expanduser(conda_env_path)) - # Not necessarily the conda_env_path exists. - if not os.path.exists(conda_env_path): - _LOG.warning("The dir '%s' doesn't exist: creating it", conda_env_path) - hio.create_dir(conda_env_path, incremental=True) - hdbg.dassert_path_exists(os.path.dirname(conda_env_path)) - # - for service in tunnel_info: - # TODO(gp): We should call in ssh_tunnels.py to keep this encapsulated. - hdbg.dassert_eq(len(service), 4) - service_name, server, local_port, remote_port = service - _ = service_name, server, local_port, remote_port - ret = { - "git_user_name": git_user_name, - "git_user_email": git_user_email, - "conda_sh_path": conda_sh_path, - "conda_env_path": conda_env_path, - "ssh_key_path": ssh_key_path, - "tunnel_info": tunnel_info, - "jupyter_port": jupyter_port, - "notebook_html_path": notebook_html_path, - "notebook_backup_path": notebook_backup_path, - } - _LOG.debug("Credentials: %s", ret) - return ret - - -# ############################################################################# - - -def _parse() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument( - "--user", action="store", default=None, help="Impersonate a user" - ) - hparser.add_verbosity_arg(parser) - return parser - - -def _main(parser: argparse.ArgumentParser) -> None: - args = parser.parse_args() - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) - if args.user: - hsystem.set_user_name(args.user) - usc = get_credentials() - pprint.pprint(usc) - - -if __name__ == "__main__": - _main(_parse()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh deleted file mode 100644 index 45acd8194..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/sh - -# NOTE TO MAINTAINERS: this must be updated each time a new texlive is -# released! -default_version=2024 -tlversion=${1:-"$default_version"} -installer_archive=install-tl-unx.tar.gz - -usage () -{ - printf 'Install TeXLive\n' - printf 'Usage: %s [OPTIONS]\n\n' "$0" - printf 'Options:\n' - printf ' -t: TeXLive version (default %s)\n' "$default_version" - printf ' -m: mirror URL\n' -} - -if ! args=$(getopt 't:m:' "$@"); then - usage && exit 1 -fi -# The variable is intentionally left unquoted. -# shellcheck disable=SC2086 -set -- $args - -tlversion= -mirror_url= - -while true; do - case "$1" in - (-t) - tlversion="${2}" - shift 2 - ;; - (-m) - mirror_url="${2}" - shift 2 - ;; - (--) - shift - break - ;; - (*) - printf 'Unknown option: %s\n' "$1" - usage - exit 1 - ;; - esac -done - -[ -n "$tlversion" ] || tlversion="$default_version" - -if [ -z "$mirror_url" ] && [ "$tlversion" != "$default_version" ]; then - # Default mirror for historic releases - mirror_url="ftp://tug.org/historic/" -fi - -if [ -z "$mirror_url" ]; then - # Get the mirror URL from the redirect. Otherwise, if we were to - # always use the mirror URL, we'd run into problems whenever we get - # installer and signatures from different mirrors that are not 100% - # in sync. - mirror_url=$(wget -4 --quiet --output-document=/dev/null \ - --server-response \ - http://mirror.ctan.org/ \ - 2>&1 | \ - sed -ne 's/.*Location: \(.*\)$/\1/p' | head -n 1) -fi - -# Trim trailing slash(es) -mirror_url=$(echo "$mirror_url" | sed -e 's/\/*$//') - -if [ "$tlversion" = "$default_version" ]; then - installer_url="$mirror_url/systems/texlive/tlnet/" - repository= -else - installer_url="$mirror_url/systems/texlive/$tlversion/tlnet-final/" - repository=$installer_url -fi - -# Log the installer and repository url -printf 'installer URL: %s\n' "${installer_url}" -printf 'repository: %s\n' "${repository}" - -# Download the install-tl perl script. The archive integrity and signature is -# verified later, so it's ok if we use an insecure connection. -wget -4 --no-verbose --no-check-certificate \ - "$installer_url/$installer_archive" \ - "$installer_url/$installer_archive".sha512 \ - "$installer_url/$installer_archive".sha512.asc \ - || exit 1 - -## Verifiy installer integrity -# get current signing key -gpg --keyserver keyserver.ubuntu.com \ - --receive-key 0xC78B82D8C79512F79CC0D7C80D5E5D9106BAB6BC || exit 5 -gpg --verify "$installer_archive".sha512.asc || exit 5 -sha512sum "$installer_archive".sha512 || exit 5 - -## Proceed with installation -# Extract installer -mkdir -p ./install-tl -tar --strip-components 1 -zvxf "$installer_archive" -C "$PWD/install-tl" \ - || exit 1 - -# Run the default installation with the specified profile. -./install-tl/install-tl ${repository:+-repository "$repository"} \ - --profile=/root/texlive.profile - -# Cleanup installation artifacts. -rm -rf ./install-tl \ - "$installer_archive" \ - "$installer_archive.sha512" \ - "$installer_archive.sha512.asc" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt deleted file mode 100644 index 9e4ccf64f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt +++ /dev/null @@ -1,115 +0,0 @@ -# Packages listed in https://pandoc.org/MANUAL.html#creating-a-pdf - -######################################################################### -### Packages required by pandoc -amsfonts # math fonts -amsmath # math commands -babel # required when pandoc is used with lang -beamer # for presentations -bidi # used by xelatex if the `dir` variable is set -bookmark # bookmarks with hyperref -booktabs # nicer-looking tables -caption # customize captions in floating envs; required for beamer -csquotes # used for typography if the `csquotes` variable is set to true -euler # use AMS Euler fonts for math -eurosym # Metafont and macros for Euro sign -fancyvrb # Verbatim environments for code blocks -framed # Needed with certain `--highlight-style` options -geometry # required if the `geometry` variable set -graphics # required if the document contains images -hyperref # hyperlinks -listings # if the `--listing` option is used -lm # Latin modern fonts -lm-math # Latin modern fonts for math -memoir # frequently used document class -multirow # Tabular cells spanning multiple rows -pgf # for TikZ and beamer -setspace # required if the `linestretch` variable is used -soul # required for underlined text -subfig # Figures broken into subfigures -tools # the LaTeX standard tools bundle; e.g., calc, longtable -xcolor # colors - -# Deprecated! Only used by older pandoc versions before 3.0. -ulem - -######################################################################### -### Semi-optional packages -# -# The following packages will be used to improve output quality if -# present, but pandoc does not require them to be present: -footnotehyper # to allow footnotes in tables -microtype # for better spacing adjustments -parskip # for better inter-paragraph spaces -upquote # for straight quotes in verbatim environments -xurl # for better line breaks in URLs - -######################################################################### -### Intentionally **NOT** installed due to size constraints. -# -#xeCJR # If CJKmainfont is set, xeCJK is needed. - -######################################################################### -### Required when using pandoc-crossref -cleveref # Intelligent cross-referencing -float # Improved interface for floating objects - -######################################################################### -### Extra engines and packages for XeLaTeX and LuaLaTeX. -fontspec # required with xelatex or lualatex -ifmtarg # if-then-else commands used in the default template -iftex # Checks for the specific LaTeX engine being used -latexmk -lua-ul # LuaLaTeX replacement of soul -luacode -luacolor -lualatex-math # LuaTeX specific math patches -luatexbase -mathspec # used by xelatex if the `mathspec` variable is set -selnolig # Used with LuaLaTeX to disable illegal typographic ligatures -unicode-math # Unicode math support for XeTeX and LuaTeX -xetex - -######################################################################### -### Reference management tools -biber -biblatex -bibtex -natbib - -######################################################################### -### I18n and languages -# -# The choice of selected languages is historic, those were the ones -# installed by TeXLive by default for a long time. -bidi -babel-basque -babel-czech -babel-danish -babel-dutch -babel-english -babel-finnish -babel-french -babel-german -babel-hungarian -babel-italian -babel-norsk -babel-polish -babel-portuges -babel-spanish -babel-swedish -hyphen-basque -hyphen-czech -hyphen-danish -hyphen-dutch -hyphen-english -hyphen-finnish -hyphen-french -hyphen-german -hyphen-hungarian -hyphen-italian -hyphen-norwegian -hyphen-polish -hyphen-portuguese -hyphen-spanish -hyphen-swedish diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile deleted file mode 100644 index dd5364e87..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile +++ /dev/null @@ -1,32 +0,0 @@ -# texlive.profile written on Tue Feb 5 09:43:07 2019 UTC -# It will NOT be updated and reflects only the -# installation profile at installation time. -# -# NOTE: see also alpine/latex.Dockerfile which appends -# `binary_x86_64-linuxmusl 1` to this file, use for non-glibc distributions. -selected_scheme scheme-basic -TEXDIR /opt/texlive/texdir -TEXMFLOCAL /opt/texlive/texmf-local -TEXMFSYSVAR /opt/texlive/texdir/texmf-var -TEXMFSYSCONFIG /opt/texlive/texdir/texmf-config -TEXMFVAR ~/.texlive/texmf-var -TEXMFCONFIG ~/.texlive/texmf-config -TEXMFHOME ~/texmf -instopt_adjustpath 0 -instopt_adjustrepo 1 -instopt_letter 0 -instopt_portable 0 -instopt_write18_restricted 1 -tlpdbopt_autobackup 1 -tlpdbopt_backupdir tlpkg/backups -tlpdbopt_create_formats 1 -tlpdbopt_desktop_integration 1 -tlpdbopt_file_assocs 1 -tlpdbopt_generate_updmap 0 -tlpdbopt_install_docfiles 0 -tlpdbopt_install_srcfiles 0 -tlpdbopt_post_code 1 -tlpdbopt_sys_bin /usr/local/bin -tlpdbopt_sys_info /usr/local/share/info -tlpdbopt_sys_man /usr/local/share/man -tlpdbopt_w32_multi_user 1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py deleted file mode 100644 index d8807f46b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py +++ /dev/null @@ -1,411 +0,0 @@ -""" -Import as: - -import helpers.repo_config_utils as hrecouti -""" - -import logging -import os -from typing import Any, Dict, List, Optional, Union - -import yaml - -_LOG = logging.getLogger(__name__) - -# ############################################################################# - -# Copied from hprint to avoid import cycles. - - -# TODO(gp): It should use *. -def indent(txt: str, num_spaces: int = 2) -> str: - """ - Add `num_spaces` spaces before each line of the passed string. - """ - spaces = " " * num_spaces - txt_out = [] - for curr_line in txt.split("\n"): - if curr_line.lstrip().rstrip() == "": - # Do not prepend any space to a line with only white characters. - txt_out.append("") - continue - txt_out.append(spaces + curr_line) - res = "\n".join(txt_out) - return res - - -# End copy. - - -# ############################################################################# - - -def _find_config_file(file_name: str) -> str: - """ - Find recursively the dir of config file. - - This function traverses the directory hierarchy upward from a - specified starting path to find the directory that contains the - config file. - - :param file_name: name of the file to find - :return: path to the file - """ - curr_dir = os.getcwd() - while True: - path = os.path.join(curr_dir, file_name) - if os.path.exists(path): - break - parent = os.path.dirname(curr_dir) - if parent == curr_dir: - # We cannot use helpers since it creates circular import. - raise FileNotFoundError( - f"Could not find '{file_name}' in current directory or any parent directories" - ) - curr_dir = parent - return path - - -def _get_env_var( - env_name: str, - as_bool: bool = False, - default_value: Any = None, - abort_on_missing: bool = True, -) -> Union[str, bool]: - """ - Get an environment variable by name. - - :param env_name: name of the env var - :param as_bool: convert the value into a Boolean - :param default_value: the default value to use in case it's not - defined - :param abort_on_missing: if the env var is not defined aborts, - otherwise use the default value - :return: value of env var - """ - if env_name not in os.environ: - if abort_on_missing: - assert 0, f"Can't find env var '{env_name}' in '{str(os.environ)}'" - else: - return default_value - value = os.environ[env_name] - if as_bool: - # Convert the value into a boolean. - if value in ("0", "", "None", "False"): - value = False - else: - value = True - return value - - -# ############################################################################# -# RepoConfig -# ############################################################################# - - -class RepoConfig: - def __init__(self, data: Dict) -> None: - """ - Set the data to be used by the module. - """ - self._data = data - - def set_repo_config_data(self, data: Dict) -> None: - self._data = data - - @classmethod - def from_file(cls, file_name: Optional[str] = None) -> "RepoConfig": - """ - Return the text of the code stored in `repo_config.yaml`. - """ - if file_name is None: - file_name = RepoConfig._get_repo_config_file() - assert os.path.exists(file_name), f"File '{file_name}' doesn't exist" - _LOG.debug("Reading file_name='%s'", file_name) - try: - with open(file_name, "r") as file: - # Use `safe_load()` to avoid executing arbitrary code. - data = yaml.safe_load(file) - assert isinstance(data, dict), ( - "data=\n%s\nis not a dict but %s", - str(data), - type(data), - ) - except Exception as e: - raise ValueError(f"Error reading YAML file {file_name}: {e}") - return cls(data) - - # TODO(gp): -> __str__? - def config_func_to_str(self) -> str: - """ - Return the string representation of the config function. - """ - ret: List[str] = [] - ret.append(f"get_host_name='{self.get_host_name()}'") - ret.append( - f"get_html_dir_to_url_mapping='{self.get_html_dir_to_url_mapping()}'" - ) - ret.append(f"get_invalid_words='{self.get_invalid_words()}'") - ret.append( - f"get_docker_base_image_name='{self.get_docker_base_image_name()}'" - ) - ret.append(f"get_release_team='{self.get_release_team()}'") - txt = "\n".join(ret) - return txt - - # repo_info - - # TODO(gp): -> get_repo_name - def get_name(self) -> str: - """ - Return the name of the repo, e.g., in `//amp`. - """ - value = self._data["repo_info"]["repo_name"] - return f"//{value}" - - def get_github_repo_account(self) -> str: - """ - Return the account name of the repo on GitHub, e.g., `causify-ai`, - `gpsaggese`. - """ - value = self._data["repo_info"]["github_repo_account"] - return value - - def get_repo_short_name(self) -> str: - """ - Return the short name of the repo, e.g., `amp`. - """ - value = self._data["repo_info"]["repo_name"] - return value - - def get_repo_full_name(self) -> str: - """ - Return the full name of the repo, e.g., `causify-ai/amp`, - `gpsaggese/notes`. - """ - github_repo_account = self._data["repo_info"]["github_repo_account"] - repo_name = self._data["repo_info"]["repo_name"] - value = f"{github_repo_account}/{repo_name}" - return value - - def get_repo_full_name_with_hostname(self) -> str: - """ - Return the full name of the repo, e.g., `github.com/causify-ai/amp`. - """ - repo_full_name = self.get_repo_full_name() - host_name = self.get_host_name() - value = f"{host_name}/{repo_full_name}" - return value - - # TODO(gp): We should replace this with `get_full_repo_name()`, since - # the mapping is not needed. - def get_repo_map(self) -> Dict[str, str]: - """ - Return a mapping of short repo name -> long repo name. - - E.g., - ``` - {"amp": "causify-ai/amp"} - {"helpers": "causify-ai/helpers"} - ``` - """ - repo_name = self._data["repo_info"]["repo_name"] - github_repo_account = self._data["repo_info"]["github_repo_account"] - repo_map = {repo_name: f"{github_repo_account}/{repo_name}"} - return repo_map - - # TODO(gp): Is this needed? - def get_extra_amp_repo_sym_name(self) -> str: - github_repo_account = self._data["repo_info"]["github_repo_account"] - repo_name = self._data["repo_info"]["repo_name"] - if repo_name in ["orange", "lemonade"]: - # TODO(Grisha): it should return cmamp name, not the current - return f"{github_repo_account}/cmamp" - else: - return f"{github_repo_account}/{repo_name}" - - # TODO(gp): -> get_github_host_name - def get_host_name(self) -> str: - """ - Return the host name of the repo, e.g., `github.com`. - """ - value = self._data["repo_info"]["github_host_name"] - return value - - def get_invalid_words(self) -> List[str]: - """ - Return a list of words that are considered invalid in the repo. - """ - values = self._data["repo_info"]["invalid_words"] - if values is None: - invalid_words = [] - else: - invalid_words = values.split(",") - return invalid_words - - def get_issue_prefix(self) -> str: - """ - Return the prefix for the issue, e.g., `CmampTask`, `HelpersTask`. - """ - value = self._data["repo_info"]["issue_prefix"] - return value - - # docker_info - - def get_docker_base_image_name(self) -> str: - """ - Return a base name for docker image. - - E.g., `helpers`. - """ - value = self._data["docker_info"]["docker_image_name"] - return value - - def get_release_team(self) -> str: - """ - Return the release team name for docker image. - - E.g., `dev_system`. - """ - value = self._data["docker_info"].get("release_team") - return value - - # s3_bucket_info - - def get_unit_test_bucket_path(self) -> str: - """ - Return the path to the unit test bucket. - """ - value = self._data["s3_bucket_info"]["unit_test_bucket_name"] - return value - - def get_html_bucket_path(self) -> str: - """ - Return the path to the bucket where published HTMLs are stored. - """ - value = self._data["s3_bucket_info"]["html_bucket_name"] - return value - - def get_html_bucket_path_v2(self) -> str: - """ - Return the path to the bucket with published HTMLs. - - "v2" version allows for the published HTMLs to be browsed. - """ - html_bucket = self.get_html_bucket_path() - html_bucket_path = os.path.join(html_bucket, "v2") - return html_bucket_path - - def get_html_ip(self) -> str: - """ - Return the IP of the bucket where published HTMLs are stored. - """ - value = self._data["s3_bucket_info"]["html_ip"] - return value - - def get_html_ip_v2(self) -> str: - """ - Return the IP of the bucket with published HTMLs. - - "v2" version allows for the published HTMLs to be browsed. - """ - ip = self.get_html_ip() - ip_v2 = f"{ip}/v2" - return ip_v2 - - def get_html_dir_to_url_mapping(self) -> Dict[str, str]: - """ - Return a mapping between directories mapped on URLs. - - This is used when we have web servers serving files from - specific directories. - """ - dir_to_url = { - self.get_html_bucket_path(): self.get_html_ip(), - self.get_html_bucket_path_v2(): self.get_html_ip_v2(), - } - return dir_to_url - - def get_shared_configs_bucket_name(self, environment: str) -> str: - """ - Return the name of the shared configs bucket. - """ - if "shared_configs_bucket_name" not in self._data["s3_bucket_info"]: - return None - value: Dict[str, str] = self._data["s3_bucket_info"][ - "shared_configs_bucket_name" - ] - bucket_name = value.get(environment, None) - return bucket_name - - def get_dir_suffix(self) -> str: - """ - Return the suffix of the dev_scripts_{dir_suffix} dir for the repo. - - E.g., `helpers` for `dev_scripts_helpers` in //helpers repo. - """ - value = self._data["runnable_dir_info"]["dir_suffix"] - return value - - def use_helpers_as_nested_module(self) -> bool: - """ - Return whether the helpers repo is used as a nested module. - """ - value = bool( - self._data["runnable_dir_info"]["use_helpers_as_nested_module"] - ) - return value - - # TODO(gp): Add functions for container_registry_info. - - def get_container_registry_url(self, registry: str = "ecr") -> str: - """ - Return the URL of the container registry. - - :param registry: the name of the container registry (e.g., `ecr`, `ghcr`) - :return: the URL of the container registry - """ - return self._data["container_registry_info"][registry] - - # Utils. - - @staticmethod - def _get_repo_config_file() -> str: - """ - Return the absolute path to `repo_config.yml` that should be used. - - The `repo_config.yml` is determined based on an overriding env var or - based on the root of the Git path. - """ - env_var = "CSFY_REPO_CONFIG_PATH" - file_path = _get_env_var(env_var, abort_on_missing=False) - if file_path: - _LOG.warning( - "Using value '%s' for %s from env var", file_path, env_var - ) - else: - # client_root = _find_git_root() - # We cannot use git root here because the config file doesn't always - # reside in the root of the repo (e.g., it can be in subdir such as - # //cmamp/ck.infra for runnable dir). - file_path = _find_config_file("repo_config.yaml") - file_path = os.path.abspath(file_path) - _LOG.debug("Reading file_name='%s'", file_path) - # Check if path exists. - # We can't use helpers since it creates circular import. - if not os.path.exists(file_path): - raise FileNotFoundError(f"File '{file_path}' doesn't exist") - return file_path - - -_REPO_CONFIG = None - - -def get_repo_config() -> RepoConfig: - """ - Return the repo config object. - """ - global _REPO_CONFIG - if _REPO_CONFIG is None: - _REPO_CONFIG = RepoConfig.from_file() - return _REPO_CONFIG diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py deleted file mode 100644 index cd24fecf1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Import as: - -import helpers.stage_linked_file as hstlifil -""" - -import argparse -import logging -import os -import shutil -from typing import List - -_LOG = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) - - -def find_symlinks(dst_dir: str) -> List[str]: - """ - Find all symbolic links in the destination directory. - - :param dst_dir: Directory to search for symbolic links. - :return: List of paths to symbolic links. - """ - symlinks = [] - for root, _, files in os.walk(dst_dir): - for file in files: - file_path = os.path.join(root, file) - if os.path.islink(file_path): - symlinks.append(file_path) - return symlinks - - -def stage_links(symlinks: List[str]) -> None: - """ - Replace symbolic links with writable copies of the linked files. - - :param symlinks: List of symbolic links to replace. - """ - for link in symlinks: - # Resolve the original file the symlink points to. - target_file = os.readlink(link) - if not os.path.exists(target_file): - _LOG.warning( - f"Warning: Target file does not exist for link {link} -> {target_file}" - ) - continue - # Replace the symlink with a writable copy of the target file. - try: - os.remove(link) - # Copy file to the symlink location. - shutil.copy2(target_file, link) - # Make the file writable. - os.chmod(link, 0o644) - _LOG.info("Staged: %s -> %s", link, target_file) - except Exception as e: - _LOG.error("Error staging link %s: %s", link, e) - - -def main(): - parser = argparse.ArgumentParser( - description="Stage symbolic links for modification." - ) - parser.add_argument( - "--dst_dir", required=True, help="Destination directory." - ) - args = parser.parse_args() - symlinks = find_symlinks(args.dst_dir) - if not symlinks: - _LOG.info("No symbolic links found to stage.") - return - stage_links(symlinks) - _LOG.info("Staged %s files for modification.", len(symlinks)) - - -if __name__ == "__main__": - main() - -""" -Usage - - - python3 stage_linked_file.py --dst_dir /path/to/dst - -""" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py deleted file mode 100644 index 27344070d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py +++ /dev/null @@ -1,30 +0,0 @@ -""" -Import as: - -import helpers.telegram_notify.config as htenocon -""" - -import getpass -import os -from typing import Tuple - -import helpers.hdbg as hdbg - -NOTIFY_JUPYTER_TOKEN = os.environ["CSFY_TELEGRAM_TOKEN"] - - -def get_info() -> Tuple[str, str]: - user = getpass.getuser() - # telegram_token is the token of your bot - # - You can use @NotifyJupyterBot, its token is - # '***REMOVED***' - # chat_id: To get it, start messaging with the bot. Then go to - # https://api.telegram.org/bot/getUpdates and get your chat id. - # (If you are using @NotifyJupyterBot, go to - # https://api.telegram.org/bot***REMOVED***/getUpdates ) - if user in ("saggese", "gsaggese", "root"): - telegram_token = NOTIFY_JUPYTER_TOKEN - chat_id = "967103049" - else: - hdbg.dfatal(f"User `{user}` is not in the config.py") - return telegram_token, chat_id diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py deleted file mode 100644 index e90c3968d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python - -""" -Import as: - -import helpers.telegram_notify.get_chat_id as htngchid -""" - -import argparse -import json -import logging -from typing import Dict, cast - -import requests - -import helpers.telegram_notify.config as htenocon -import helpers.telegram_notify.telegram_notify as htnoteno - -_LOG = logging.getLogger(__name__) -_LOG.setLevel(logging.INFO) - - -def _get_updates_dict(token: str) -> dict: - updates_cont = requests.post( - f"https://api.telegram.org/bot{token}/getUpdates" - ).content - updates_dict = json.loads(updates_cont) - assert updates_dict["ok"], updates_dict - return cast(dict, updates_dict) - - -def _get_username_id(updates_dict: dict) -> Dict[str, str]: - return { - result["message"]["from"]["username"]: result["message"]["from"]["id"] - for result in updates_dict["result"] - } - - -def _get_chat_id_updates_dict(username: str, updates_dict: dict) -> str: - username_id = _get_username_id(updates_dict) - assert username in username_id.keys(), ( - "Either the username is wrong or you" - " have not sent a message to the bot yet" - ) - return username_id[username] - - -def send_chat_id(token: str, username: str) -> str: - updates_dict = _get_updates_dict(token) - chat_id = _get_chat_id_updates_dict(username, updates_dict) - htnoteno.TelegramNotify.send( - text=f"Your chat id is: {chat_id}", token=token, chat_id=chat_id - ) - return chat_id - - -def _main() -> None: - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument("--username", required=True, action="store", type=str) - parser.add_argument("--token", required=False, action="store", type=str) - args = parser.parse_args() - username = args.username - if args.token: - token_ = args.token - else: - _LOG.info("Using default token for NotifyJupyterBot.") - token_ = htenocon.NOTIFY_JUPYTER_TOKEN - chat_id_ = send_chat_id(token_, username) - print(f"Your chat id is: {chat_id_}") - - -if __name__ == "__main__": - _main() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py deleted file mode 100644 index 6e0e3eb16..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py +++ /dev/null @@ -1,155 +0,0 @@ -""" -Import as: - -import helpers.telegram_notify.telegram_notify as htnoteno -""" - -import json -import logging -import os -import os.path -import re -import sys -from typing import Optional - -import requests - -# Alternative that works for both Python 2 and 3: -import requests.compat as rcompa - -import helpers.telegram_notify.config as htenocon - -_LOG = logging.getLogger(__name__) - - -def _get_launcher_name() -> str: - """ - Return the name of jupyter notebook or path to python file you are running. - """ - import ipykernel - - try: # Python 3 (see Edit2 below for why this may not work in Python 2) - import notebook.notebookapp as ihnb - except ImportError: # Python 2 - import warnings - - import IPython.utils.shimmodule as iush - - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=iush.ShimWarning) - import IPython.html.notebookapp as ihnb - launcher = sys.argv[0] - if os.path.basename(launcher) == "ipykernel_launcher.py": - match = re.search( - "kernel-(.*).json", ipykernel.connect.get_connection_file() - ) - if match is None: - return launcher - kernel_id = match.group(1) - servers = ihnb.list_running_servers() - for ss in servers: - response = requests.get( - rcompa.urljoin(ss["url"], "api/sessions"), # type: ignore - params={"token": ss.get("token", "")}, - ) - for nn in json.loads(response.text): - if nn["kernel"]["id"] == kernel_id: - relative_path = nn["notebook"]["path"] - return str(os.path.basename(relative_path)) - return launcher - - -# ############################################################################# -# TelegramNotebookNotify -# ############################################################################# - - -class TelegramNotebookNotify: - """ - Sends notifications. - """ - - def __init__(self) -> None: - self.launcher_name = _get_launcher_name() - self.token, self.chat_id = htenocon.get_info() - - @staticmethod - def send( - text: str, token: Optional[str], chat_id: Optional[str] - ) -> Optional[bytes]: - if chat_id is None or token is None: - _LOG.warning( - "Not sending notifications. To send notifications, both " - "`chat_id` and `token` need to be specified. Go to README.md" - "for more information." - ) - return None - payload = {"chat_id": chat_id, "text": text, "parse_mode": "HTML"} - return requests.post( - f"https://api.telegram.org/bot{token}/sendMessage", - data=payload, - ).content - - def notify(self, message: str) -> None: - msg = f"
{self.launcher_name}
: {message}" - self.send(msg, self.token, self.chat_id) - - -# ############################################################################# -# _RequestsHandler -# ############################################################################# - - -class _RequestsHandler(logging.Handler): - def emit(self, record: logging.LogRecord) -> bytes: # type: ignore - token, chat_id = htenocon.get_info() - log_entry = self.format(record) - payload = {"chat_id": chat_id, "text": log_entry, "parse_mode": "HTML"} - return requests.post( - f"https://api.telegram.org/bot{token}/sendMessage", - data=payload, - ).content - - -# ############################################################################# -# _LogFormatter -# ############################################################################# - - -class _LogFormatter(logging.Formatter): - def format(self, record: logging.LogRecord) -> str: - launcher_name = _get_launcher_name() - return f"
{launcher_name}
: {record.msg}" - - -def init_tglogger(log_level: int = logging.DEBUG) -> None: - """ - Send notifications using logging. - """ - _tg_log = logging.getLogger("telegram_notify") - _tg_log.setLevel(log_level) - handler = _RequestsHandler() - formatter = _LogFormatter() - handler.setFormatter(formatter) - _tg_log.handlers = [handler] - - -# ############################################################################# -# TelegramNotify -# ############################################################################# - - -class TelegramNotify: - """ - Send notifications. - """ - - def __init__(self) -> None: - self.token, self.chat_id = htenocon.get_info() - - def send(self, text: str) -> Optional[bytes]: - payload = {"chat_id": self.chat_id, "text": text, "parse_mode": "HTML"} - return requests.post( - f"https://api.telegram.org/bot{self.token}/sendMessage", - data=payload, - ).content diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt deleted file mode 100644 index 3135b8c8e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt deleted file mode 100644 index 3135b8c8e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt deleted file mode 100644 index 3135b8c8e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt deleted file mode 100644 index 2f396a270..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt deleted file mode 100644 index 2f396a270..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt deleted file mode 100644 index 2f396a270..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt deleted file mode 100644 index 00529190c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ - a b c -0 0 2 2 -1 3 4 5 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt deleted file mode 100644 index 95d09f2b1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt deleted file mode 100644 index b68450ebb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt deleted file mode 100644 index b68450ebb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt deleted file mode 100644 index b68450ebb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt deleted file mode 100644 index efbdde823..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt +++ /dev/null @@ -1,31 +0,0 @@ -original shape=(7, 2) -Head: -{ - "0":{ - "col_1":1.0, - "col_2":1 - }, - "1":{ - "col_1":2.0, - "col_2":2 - }, - "2":{ - "col_1":3.0, - "col_2":3 - } -} -Tail: -{ - "4":{ - "col_1":5.0, - "col_2":5 - }, - "5":{ - "col_1":6.0, - "col_2":6 - }, - "6":{ - "col_1":7.0, - "col_2":7 - } -} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt deleted file mode 100644 index cab20a014..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"421470c7-7797-4a94-b584-eb83ff2de88a", - "col_2":1 - }, - "1":{ - "col_1":"22cde381-1782-43dc-8c7a-8712cbdf5ee1", - "col_2":2 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt deleted file mode 100644 index 4a6c9e821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"2020-01-01T00:00:00", - "col_2":1.0 - }, - "1":{ - "col_1":"2020-05-12T00:00:00", - "col_2":2.0 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt deleted file mode 100644 index 4a6c9e821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"2020-01-01T00:00:00", - "col_2":1.0 - }, - "1":{ - "col_1":"2020-05-12T00:00:00", - "col_2":2.0 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt deleted file mode 100644 index 3c50fde31..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt +++ /dev/null @@ -1,31 +0,0 @@ -original shape=(7, 2) -Head: -{ - "0":{ - "col_1":1.0, - "col_2":1 - }, - "1":{ - "col_1":2.0, - "col_2":2 - }, - "2":{ - "col_1":3.0, - "col_2":3 - } -} -Tail: -{ - "4":{ - "col_1":5.0, - "col_2":5 - }, - "5":{ - "col_1":6.0, - "col_2":6 - }, - "6":{ - "col_1":7.0, - "col_2":7 - } -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt deleted file mode 100644 index 4a6c9e821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"2020-01-01T00:00:00", - "col_2":1.0 - }, - "1":{ - "col_1":"2020-05-12T00:00:00", - "col_2":2.0 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt deleted file mode 100644 index 4a6c9e821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"2020-01-01T00:00:00", - "col_2":1.0 - }, - "1":{ - "col_1":"2020-05-12T00:00:00", - "col_2":2.0 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt deleted file mode 100644 index cab20a014..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"421470c7-7797-4a94-b584-eb83ff2de88a", - "col_2":1 - }, - "1":{ - "col_1":"22cde381-1782-43dc-8c7a-8712cbdf5ee1", - "col_2":2 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt deleted file mode 100644 index 9c8c2a07e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ -## docker_images_ls_repo: -## docker_login: -eval $(aws ecr get-login --profile am --no-include-email --region us-east-1) -docker image ls 665840871993.dkr.ecr.us-east-1.amazonaws.com diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt deleted file mode 100644 index e2df28b1f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -## docker_kill: all=True -docker ps -a -docker rm -f $(docker ps -a -q) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt deleted file mode 100644 index 44a4748dc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -## docker_kill: all=False -docker ps -l -docker rm -f $(docker ps -l -q) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt deleted file mode 100644 index 613a41c2d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -docker ps --format='table {{.ID}}\t{{.Label "user"}}\t{{.Image}}\t{{.Command}}\t{{.RunningFor}}\t{{.Status}}\t{{.Ports}}\t{{.Label "com.docker.compose.service"}}' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt deleted file mode 100644 index 0c262d7ea..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -report_memory_usage=False report_cpu_usage=False -## git_clean: dry_run=False -find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt deleted file mode 100644 index e8a2a8473..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -report_memory_usage=False report_cpu_usage=False -## git_fetch_master: -git fetch origin master:master diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt deleted file mode 100644 index 36f22574b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ -report_memory_usage=False report_cpu_usage=False -## git_pull: -git pull --autostash -git submodule foreach 'git pull --autostash' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt deleted file mode 100644 index 06d15ab26..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -## print_setup: -ECR_BASE_PATH=665840871993.dkr.ecr.us-east-1.amazonaws.com -BASE_IMAGE=amp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt deleted file mode 100644 index 265ef5fcf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('eval $(aws ecr get-login --no-include-email --region us-east-1)') -call('docker image ls 665840871993.dkr.ecr.us-east-1.amazonaws.com') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt deleted file mode 100644 index 202366437..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('docker ps -a') -call('docker rm -f $(docker ps -a -q)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt deleted file mode 100644 index 4ee19d730..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('docker ps -l') -call('docker rm -f $(docker ps -l -q)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt deleted file mode 100644 index c8b46747d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('eval $(aws ecr get-login --profile am --no-include-email --region us-east-1)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt deleted file mode 100644 index 614c9318f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('docker ps --format=\'table {{.ID}}\\t{{.Label "user"}}\\t{{.Image}}\\t{{.Command}}\\t{{.RunningFor}}\\t{{.Status}}\\t{{.Ports}}\\t{{.Label "com.docker.compose.service"}}\'') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt deleted file mode 100644 index 029e8a64f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('eval $(aws ecr get-login --no-include-email --region us-east-1)') -call('docker pull 665840871993.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev', pty=True) -call('docker pull 665840871993.dkr.ecr.us-east-1.amazonaws.com/helpers:prod', pty=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt deleted file mode 100644 index 7d238de7e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call("docker stats --no-stream --format='table {{.ID}}\\t{{.Name}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}\\t{{.PIDs}}'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt deleted file mode 100644 index dc7c8a671..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('gh pr create --repo alphamatic/amp --draft --title "AmpTask1310_Implement_RH1E" --body ""') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt deleted file mode 100644 index 1aa1034a0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('gh pr create --repo github.com/alphamatic/amp --draft --title "test" --body "\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt deleted file mode 100644 index d93250129..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('gh pr create --repo github.com/alphamatic/amp --draft --title "test" --body "hello_world\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt deleted file mode 100644 index a7010f356..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('gh pr create --repo github.com/alphamatic/amp --title "test" --body "\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt deleted file mode 100644 index 7e38db5a7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('gh workflow run fast_tests.yml --ref AmpTask1310_Implement_RH1E') -call('gh workflow run slow_tests.yml --ref AmpTask1310_Implement_RH1E') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt deleted file mode 100644 index e79742c64..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git pull --autostash') -call('git checkout -b test') -call('git push --set-upstream origin test') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt deleted file mode 100644 index 25c178bb7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git pull --autostash --rebase') -call('git checkout -b AmpTask123_test') -call('git push --set-upstream origin AmpTask123_test') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt deleted file mode 100644 index 72eb80ddc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git pull --autostash --rebase') -call('git checkout -b CmampTask1_fix_amp_tmux_session_script') -call('git push --set-upstream origin CmampTask1_fix_amp_tmux_session_script') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt deleted file mode 100644 index b7c58a3d2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git clean -fd >/dev/null 2>&1') -call("git submodule foreach 'git clean -fd >/dev/null 2>&1'") -call("find . -name '*\\.pyc' -o -name '*\\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' -o -name '.*_cache' -o -name 'htmlcov' | sort | xargs rm -rf") \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt deleted file mode 100644 index b7c58a3d2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git clean -fd >/dev/null 2>&1') -call("git submodule foreach 'git clean -fd >/dev/null 2>&1'") -call("find . -name '*\\.pyc' -o -name '*\\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' -o -name '.*_cache' -o -name 'htmlcov' | sort | xargs rm -rf") \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt deleted file mode 100644 index 0241acc2e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('git fetch origin master:master') -call("git submodule foreach 'git fetch origin master:master'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt deleted file mode 100644 index d9d3fc510..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git fetch origin master:master') -call("git submodule foreach 'git fetch origin master:master'") -call('git merge master') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt deleted file mode 100644 index 78883f1ba..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('git pull --autostash') -call("git submodule foreach 'git pull --autostash'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt deleted file mode 100644 index 70a06c388..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('./linters/base.py --files ./helpers/lib_tasks.py ./helpers/test/TestDryRunTasks2.test_git_branch_create/output/test.txt ./helpers/test/TestDryRunTasks2.test_git_merge_master/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint1/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint2/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint3/output/test.txt ./helpers/test/test_lib_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt deleted file mode 100644 index 28b088e72..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('./linters/base.py --files core/dataflow/builders.py core/dataflow/core.py core/dataflow/dataflow_design.md core/dataflow/runners.py core/dataflow/visualization.py core/test/test_core.py dev_scripts/client_setup/build.sh devops/docker_build/install_packages.sh devops/docker_build/install_requirements.sh devops/docker_build/poetry.lock devops/docker_build/pyproject.toml documentation/general/workflows.txt helpers/datetime_.py helpers/git.py helpers/lib_tasks.py helpers/test/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt helpers/test/TestDryRunTasks1.test_docker_kill_all/output/test.txt helpers/test/TestDryRunTasks1.test_docker_kill_last/output/test.txt helpers/test/TestDryRunTasks1.test_docker_ps/output/test.txt helpers/test/TestDryRunTasks1.test_docker_stats/output/test.txt helpers/test/TestDryRunTasks1.test_git_clean/output/test.txt helpers/test/TestDryRunTasks1.test_git_pull/output/test.txt helpers/test/TestDryRunTasks1.test_git_pull_master/output/test.txt helpers/test/TestDryRunTasks1.test_print_setup/output/test.txt helpers/test/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt helpers/test/TestDryRunTasks2.test_docker_kill_all/output/test.txt helpers/test/TestDryRunTasks2.test_docker_kill_last/output/test.txt helpers/test/TestDryRunTasks2.test_docker_login/output/test.txt helpers/test/TestDryRunTasks2.test_docker_ps/output/test.txt helpers/test/TestDryRunTasks2.test_docker_pull/output/test.txt helpers/test/TestDryRunTasks2.test_docker_stats/output/test.txt helpers/test/TestDryRunTasks2.test_gh_create_pr/output/test.txt helpers/test/TestDryRunTasks2.test_gh_issue_title/output/test.txt helpers/test/TestDryRunTasks2.test_gh_workflow_list/output/test.txt helpers/test/TestDryRunTasks2.test_gh_workflow_run/output/test.txt helpers/test/TestDryRunTasks2.test_git_branch_files/output/test.txt helpers/test/TestDryRunTasks2.test_git_clean/output/test.txt helpers/test/TestDryRunTasks2.test_git_clean2/output/test.txt helpers/test/TestDryRunTasks2.test_git_pull/output/test.txt helpers/test/TestDryRunTasks2.test_git_pull_master/output/test.txt helpers/test/TestDryRunTasks2.test_print_setup/output/test.txt helpers/test/test_cache.py helpers/test/test_lib_tasks.py im/kibot/data/load/kibot_s3_data_loader.py im/kibot/data/load/test/test_s3_data_loader.py tasks.py test/test_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt deleted file mode 100644 index 9fac068a3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('./linters/base.py --files /app/amp/helpers/test/test_lib_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt deleted file mode 100644 index ac6627a2e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt +++ /dev/null @@ -1,101 +0,0 @@ -,Name,Frequency,Country,Unit,Start Date,End Date,Commodity,Contracts,Business Category,is_alive,source_code,dataset_code,series_code,original_name,extracted_frequency,is_downloaded,WIND Commodity,Update,id_is_broken -0,Coal and coke CO2 emissions – Aruba – million metric tonnes carbon dioxide,Annual,Aruba,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ABW-MMTCD.A,"Coal and coke CO2 emissions, Aruba, Annual — million metric tonnes carbon dioxide",Annual,success,,, -1,Coal and coke CO2 emissions – Albania – million metric tonnes carbon dioxide,Annual,Albania,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ALB-MMTCD.A,"Coal and coke CO2 emissions, Albania, Annual — million metric tonnes carbon dioxide",Annual,success,,, -2,Coal and coke CO2 emissions – United Arab Emirates – million metric tonnes carbon dioxide,Annual,United Arab Emirates,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARE-MMTCD.A,"Coal and coke CO2 emissions, United Arab Emirates, Annual — million metric tonnes carbon dioxide",Annual,success,,, -3,Coal and coke CO2 emissions – Argentina – million metric tonnes carbon dioxide,Annual,Argentina,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARG-MMTCD.A,"Coal and coke CO2 emissions, Argentina, Annual — million metric tonnes carbon dioxide",Annual,success,,, -4,Coal and coke CO2 emissions – Armenia – million metric tonnes carbon dioxide,Annual,Armenia,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARM-MMTCD.A,"Coal and coke CO2 emissions, Armenia, Annual — million metric tonnes carbon dioxide",Annual,success,,, -5,Germany: Term Structure of Interest Rate on Listed Federal Securities: 1,Daily,Germany,%,1997-08-07,2019-12-19,,,Upstream,True,WIND,Deutsche Bundesbank,G0008063,,,success,Gold,2019-12-20,False -6,Germany: Term Structure of Interest Rate on Listed Federal Securities: 10,Daily,Germany,%,1997-08-07,2019-12-19,,,Upstream,True,WIND,Deutsche Bundesbank,SG000S6E,,,not_attempted,Gold,2019-12-20,True -7,France: Treasury Bills Reference Rate: 1Y,Daily,France,%,1989-01-03,2019-12-19,,,Upstream,True,WIND,Banque de France,G0008146,,,success,Gold,2019-12-20,False -8,France: Treasury Bills Reference Rate: 10Y,Daily,France,%,1987-01-02,2019-12-19,,,Upstream,True,WIND,Banque de France,G1400003,,,success,Gold,2019-12-20,False -9,Spain: Government Securities Yields: 12M,Daily,Spain,%,1987-07-01,2019-12-19,,,Upstream,True,WIND,Bank of Spain,G2700068,,,success,Gold,2019-12-20,False -10,Spain: Government Securities Yields: 10Y,Daily,Spain,%,1989-07-18,2019-12-19,,,Upstream,True,WIND,Bank of Spain,G2700075,,,success,Gold,2019-12-20,False -11,Italy: Government Securities Yields: 3Y_,Daily,Italy,%,1989-07-24,2019-12-19,,,Upstream,True,WIND,Bank of Italy,G1700018,,,success,Gold,2019-12-20,False -12,Italy: Government Securities Yields: 10,Daily,Italy,%,1991-03-05,2019-12-19,,,Upstream,True,WIND,Bank of Italy,G1700020,,,success,Gold,2019-12-20,False -13,Futures Closing Price (Active Contract): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M0066358,,,success,Gold,2019-12-20,False -14,Futures Settlement Price (Continuous 3M): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0068142,,,success,Gold,2019-12-20,False -15,Futures Closing Price (Continuous): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0147027,,,success,Gold,2019-12-20,False -16,Futures Closing Price (Continuous 3M): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0031868,,,success,Gold,2019-12-20,False -17,Futures Settlement Price (Active Contract): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0181376,,,success,Gold,2019-12-20,False -18,Futures Trading Volume: Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6409,,,not_attempted,Gold,2019-12-20,True -19,Futures Turnover: Gold,Daily,China,"CNY, in 10,000s",2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6410,,,not_attempted,Gold,2019-12-20,True -20,Futures Position: Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6411,,,not_attempted,Gold,2019-12-20,True -21,Futures Trading Volume (Active Contract): Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M0096581,,,success,Gold,2019-12-20,False -22,Futures Position (Active Contract): Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00096614,,,not_attempted,Gold,2019-12-20,True -23,Closing Stock on Warrant: Gold,Daily,China,kg,2008-01-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0049497,,,success,Gold,2019-12-20,False -24,Duplicate) Closing Stock on Warrant: Gold: Total,Daily,China,kg,2008-01-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0049505,,,success,Gold,2019-12-20,False -25,Futures Closing Price (Continuous): COMEX Gold,Daily,United States,USD/ounce,1975-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0069669,,,success,Gold,2019-12-20,False -26,Futures Closing Price (Active Contract}: COMEX Gold),Daily,United States,USD/ounce,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0180903,,,success,Gold,2019-12-20,False -27,Futures Closing Price (Continuous): COMEX Mini Gold,Daily,United States,USD/ounce,2010-12-06,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0147014,,,success,Gold,2019-12-19,False -28,Futures Settlement Price (Active Contract}: COMEX Gold),Daily,United States,USD/ounce,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G0945,,,not_attempted,Gold,2019-12-20,True -29,Futures Closing Price (Active Contract: COMEX Mini Gold),Daily,United States,USD/ounce,2013-01-03,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G0906,,,not_attempted,Gold,2019-12-20,True -30,Futures Settlement Price (Active Contract}: COMEX Mini Gold),Daily,United States,USD/ounce,2013-01-03,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G094E,,,not_attempted,Gold,2019-12-20,True -31,Futures Trading Volume (Active Contract): COMEX Gold,Daily,United States,lots,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,M00096642,,,not_attempted,Gold,2019-12-20,True -32,Futures Position (Active Contract): COMEX Gold,Daily,United States,lots,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,M0096645,,,success,Gold,2019-12-20,False -33,COMEX: Silver: Inventory,Daily,United States,ozt,1992-09-01,2019-12-19,Silver,"COMEX:6Q,COMEX:QI,COMEX:SI,COMEX:SIL,COMEX:SIT,COMEX:SSP,COMEX:SV,COMEX:XY,COMEX:YV,DGCX:DS,ICEUS:YI,ICEUS:ZI,IFUS:HIO,IFUS:YI,IFUS:ZI,LME:AG,MCX:SILVER,SHFE:AG,TCE:12",Midstream,True,WIND,CME,S0114145,,,success,Gold,2019-12-20,False -34,SGE Gold: Closing Price: Au9995,Daily,China,yuan/g,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035818,,,success,Gold,2019-12-20,False -35,SGE Gold: Closing Price: Au9999,Daily,China,yuan/g,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035819,,,success,Gold,2019-12-20,False -36,SGE Gold: Closing Price: Au100G,Daily,China,yuan/g,2006-12-25,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035820,,,success,Gold,2019-12-20,False -37,SGE Gold: Closing Price: AuT+D,Daily,China,yuan/g,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035821,,,success,Gold,2019-12-20,False -38,SGE Gold: Settlement Price: Au (T+D),Daily,China,yuan/g,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0202645,,,success,Gold,2019-12-20,False -39,SGE Gold: Volume: Au9995,Daily,China,kg,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035824,,,success,Gold,2019-12-20,False -40,SGE Gold: Volume: Au9999,Daily,China,kg,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035825,,,success,Gold,2019-12-20,False -41,SGE Gold: Volume: Au100g,Daily,China,kg,2006-12-25,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035826,,,success,Gold,2019-12-20,False -42,SGE Gold: Volume: AuT+D,Daily,China,kg,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035827,,,success,Gold,2019-12-20,False -43,SGE Gold: Position: Au (T+D),Daily,China,kg,2008-08-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S5806075,,,success,Gold,2019-12-20,False -44,SGE Gold: Deferred Payment of Direction: Au (T+D),Daily,China,,2008-08-22,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0182163,,,success,Gold,2019-12-20,False -45,SGE Gold: Delivery Volume: Au (T+D),Daily,China,kg,2008-08-22,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0181748,,,success,Gold,2019-12-20,False -46,Loco London Gold: In USD,Daily,United Kingdom,USD/ounce,1968-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031645,,,success,Gold,2019-12-20,False -47,Loco Londen Gold: In EUR,Daily,United Kingdom,EUR/ounce,1999-01-04,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031646,,,success,Gold,2019-12-20,False -48,Loco Londen Gold: In GBP,Daily,United Kingdom,GBP/ounce,1968-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031647,,,success,Gold,2019-12-20,False -49,Closing Price: Paper Gold: Bank of China,Daily,China,yuan/g,2011-01-20,2014-10-31,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Company Website,S5806366,,,not_attempted,Gold,2014-11-03,False -50,Closing Price: Paper Gold: China Construction Bank,Daily,China,yuan/g,2011-01-20,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Company Website,S5806367,,,success,Gold,2019-12-20,False -51,Closing Price: Paper Gold: Industrial and Commercial Bank of China,Daily,China,yuan/g,2011-01-20,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Company Website,S5806365,,,success,Gold,2019-12-20,False -52,Price: Gold: 99.95,Daily,China,yuan/g,2007-01-04,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S5801701,,,success,Gold,2019-12-20,False -53,Price: Gold: 99.99,Daily,China,yuan/g,2007-01-04,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S5801702,,,success,Gold,2019-12-20,False -54,SPDR Gold Shares: Total Net Asset Value Qunces in the Trust,Daily,United States,ozt,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105520,,,success,Gold,2019-12-20,False -55,SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United States,tons,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105521,,,success,Gold,2019-12-20,False -56,SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United States,USD,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105522,,,success,Gold,2019-12-20,False -57,iShares: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United States,USD,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807688,,,success,Gold,2019-12-20,False -58,iShares: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United States,ozt,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807690,,,success,Gold,2019-12-20,False -59,iShares: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United States,tons,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807691,,,success,Gold,2019-12-20,False -60,GBS: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,USD,2004-04-01,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807692,,,success,Gold,2019-12-20,False -61,GBS: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807693,,,success,Gold,2019-12-20,False -62,GBS: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807694,,,success,Gold,2019-12-20,False -63,PHAU: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,USD,2007-04-25,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807695,,,success,Gold,2019-12-20,False -64,PHAU: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-04-24,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807696,,,success,Gold,2019-12-20,False -65,PHAU: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-04-24,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807697,,,success,Gold,2019-12-20,False -66,SGBS: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,Switzerland,USD,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808213,,,success,Gold,2019-12-20,False -67,SGBS: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,Switzerland,ozt,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808214,,,success,Gold,2019-12-20,False -68,SGBS: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,Switzerland,tons,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808215,,,success,Gold,2019-12-20,False -69,GOLD: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,AUD,2004-01-09,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807698,,,success,Gold,2019-12-20,False -70,GOLD: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807699,,,success,Gold,2019-12-20,False -71,GOLD: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807700,,,success,Gold,2019-12-20,False -72,SGOL: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,Switzerland,USD,2009-09-04,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807701,,,success,Gold,2019-10-31,False -73,SGOL: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,Switzerland,ozt,2009-09-02,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807702,,,success,Gold,2019-10-31,False -74,SGOL: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,Switzerland,tons,2009-09-02,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807703,,,success,Gold,2019-10-31,False -75,Price: Chinese Major Ports FOB: Silicon: 98.5,Daily,United Kingdom,USD/ton,2006-06-02,2014-05-30,,,Upstream,False,WIND,According to the Press Finishing,S0149035,,,not_attempted,,2014-06-03,False -76,"Price: Silicon Powder: -200 Mesh,-300 Mesh: Shanghai-made",Daily,China,yuan/kg,2005-01-04,2019-12-24,,,Upstream,True,WIND,According to the Press Finishing,S5801759,,,success,,2019-12-24,False -77,Market Price: Secondary Metallurgical Coke: National,Daily,China,yuan/ton,2013-12-31,2019-12-20,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,National Bureau of Statistics of China,S5914487,,,success,,2019-12-24,False -78,"Ex-factory Price (Tax-inclusive): Metallurgical Coke Grade 3 (A15%,0.6%): Yunng",Daily,China,yuan/ton,2004-10-22,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S0033511,,,success,,2019-12-24,False -79,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Rizhao,Daily,China,yuan/ton,2012-03-08,2015-02-11,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,False,WIND,Wind,S5118432,,,success,,2015-02-11,False -80,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Zibo,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118277,,,success,,2019-12-24,False -81,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Yinchuan,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118276,,,success,,2019-12-24,False -82,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Xinjiang County,Daily,China,yuan/ton,2012-03-05,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118275,,,success,,2019-12-24,False -83,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Xuzhou,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118273,,,success,,2019-12-24,False -84,Exit Price (Tax-inclusive): Secondary Metallurgical Coke: Tianjin,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118270,,,success,,2019-12-24,False -85,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Shuangyashan,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118267,,,success,,2019-12-24,False -86,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Shijiazhuang,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118266,,,success,,2019-12-24,False -87,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Shanghai,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118264,,,success,,2019-12-24,False -88,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Qigihar,Daily,China,yuan/ton,2011-09-09,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118263,,,success,,2019-12-24,False -89,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Panzhihua,Daily,China,yuan/ton,2010-04-13,2019-12-06,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118260,,,success,,2019-12-06,False -90,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Mudanjlang,Daily,China,yuan/ton,2011-09-01,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118259,,,success,,2019-12-24,False -91,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Lvliang,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118258,,,success,,2019-12-24,False -92,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Linyt,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118255,,,success,,2019-12-24,False -93,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Linfen,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118254,,,success,,2019-12-24,False -94,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Jinzhong,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118252,,,success,,2019-12-24,False -95,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Hancheng,Daily,China,yuan/ton,2012-03-06,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118247,,,success,,2019-12-24,False -96,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Fukang,Daily,China,yuan/ton,2012-03-05,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118246,,,success,,2019-12-24,False -97,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Fushun,Daily,China,yuan/ton,2011-09-09,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118245,,,success,,2019-12-24,False -98,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Ordos,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118243,,,success,,2019-12-24,False -99,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Anyang,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118241,,,success,,2019-12-24,False diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt deleted file mode 100644 index 2de8022c8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - Name Frequency Country Unit Start Date End Date Commodity Contracts Business Category is_alive source_code dataset_code series_code original_name extracted_frequency is_downloaded WIND Commodity Update id_is_broken -5 Germany: Term Structure of Interest Rate on Listed Federal Securities: 1 Daily Germany % 1997-08-07 2019-12-19 NaN NaN Upstream True WIND Deutsche Bundesbank G0008063 NaN NaN success Gold 2019-12-20 False -7 France: Treasury Bills Reference Rate: 1Y Daily France % 1989-01-03 2019-12-19 NaN NaN Upstream True WIND Banque de France G0008146 NaN NaN success Gold 2019-12-20 False -8 France: Treasury Bills Reference Rate: 10Y Daily France % 1987-01-02 2019-12-19 NaN NaN Upstream True WIND Banque de France G1400003 NaN NaN success Gold 2019-12-20 False -... -97 Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Fushun Daily China yuan/ton 2011-09-09 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118245 NaN NaN success NaN 2019-12-24 False -98 Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Ordos Daily China yuan/ton 2010-04-20 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118243 NaN NaN success NaN 2019-12-24 False -99 Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Anyang Daily China yuan/ton 2010-04-13 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118241 NaN NaN success NaN 2019-12-24 False diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt deleted file mode 100644 index 8c6bdf3cf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt +++ /dev/null @@ -1,18 +0,0 @@ -def func1(): - """ - First function. - - ``` - foo - ``` - """ - - -def func2(): - """ - Second function. - - ``` - foo - ``` - """ \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt deleted file mode 100644 index 3f4d616bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt +++ /dev/null @@ -1,52 +0,0 @@ -# Test created for __main__.plbck_sum. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestPlbckSum(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 0 - b = 1 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test2(self) -> None: - # Define input variables. - a = 1 - b = 2 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test3(self) -> None: - # Define input variables. - a = 2 - b = 3 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test4(self) -> None: - # Define input variables. - a = 3 - b = 4 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt deleted file mode 100644 index 3f4d616bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt +++ /dev/null @@ -1,52 +0,0 @@ -# Test created for __main__.plbck_sum. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestPlbckSum(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 0 - b = 1 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test2(self) -> None: - # Define input variables. - a = 1 - b = 2 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test3(self) -> None: - # Define input variables. - a = 2 - b = 3 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test4(self) -> None: - # Define input variables. - a = 3 - b = 4 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt deleted file mode 100644 index 1a2ceab1a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt +++ /dev/null @@ -1,30 +0,0 @@ -# Test created for __main__.plbck_sum. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestPlbckSum(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 0 - b = 1 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test2(self) -> None: - # Define input variables. - a = 1 - b = 2 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt deleted file mode 100644 index b5439e39d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 3 - b = 2 - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = 5 - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt deleted file mode 100644 index 6631e9e27..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = {"1": 2} - b = {"3": 4} - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt deleted file mode 100644 index 80e85048a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}) - b = pd.DataFrame.from_dict({'Price': [1, 1, 1, 1]}) - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = hpandas.df_to_str(actual, num_rows=None) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt deleted file mode 100644 index 1d91a4a88..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = {"1": ["a", 2]} - b = {"3": pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}), "4": {"5": 6}} - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = {"1": ["a", 2], "3": pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}), "4": {"5": 6}} - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt deleted file mode 100644 index badcab6f7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = pd.Series(data=[10, 20, 15], index=RangeIndex(start=0, stop=3, step=1), name="N Numbers", dtype=int64) - b = pd.Series(data=[10.0, 0.0, 5.5], index=RangeIndex(start=0, stop=3, step=1), name="Z Numbers", dtype=float64) - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = hpandas.df_to_str(actual, num_rows=None) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt deleted file mode 100644 index 6b92491e1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = pd.Series(data=[10, 20, 15], index=RangeIndex(start=0, stop=3, step=1), name="N Numbers", dtype=int64) - b = pd.Series(data=[10.0, 0.0, 5.5], index=RangeIndex(start=0, stop=3, step=1), name="Z Numbers", dtype=float64) - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = pd.Series(data=[20.0, 20.0, 20.5], index=RangeIndex(start=0, stop=3, step=1), name="None", dtype=float64) - expected = jsonpickle.decode(expected) - actual = hpandas.df_to_str(actual, num_rows=None) - expected = hpandas.df_to_str(expected, num_rows=None) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt deleted file mode 100644 index 403295821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = cconfig.Config.from_python("Config({'meta': 'meta value 1', 'list': [1, 2]})") - b = cconfig.Config.from_python("Config({'meta': 'meta value 2'})") - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt deleted file mode 100644 index 5a0f6c938..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = cconfig.Config.from_python("Config({'meta': 'meta value 1', 'list': [1, 2]})") - b = cconfig.Config.from_python("Config({'meta': 'meta value 2'})") - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = cconfig.Config.from_python("Config({'meta': 'meta value 2', 'list': [1, 2]})") - expected = jsonpickle.decode(expected) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt deleted file mode 100644 index 1884fe5bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string_none. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckStringNone(hunitest.TestCase): - def test1(self) -> None: - # Call function to test. - actual = get_result_check_string_none() - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt deleted file mode 100644 index 710587bb8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal_none. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqualNone(hunitest.TestCase): - def test1(self) -> None: - # Call function to test. - actual = get_result_assert_equal_none() - # Define expected output. - expected = "Some string." - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt deleted file mode 100644 index 40dc558c5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = "test" - b = "case" - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = "testcase" - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt deleted file mode 100644 index 68b93d84d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = [1, 2, 3] - b = [4, 5, 6] - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = [1, 2, 3, 4, 5, 6] - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt deleted file mode 100644 index faa6861c2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = {"1": 2} - b = {"3": 4} - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = {"1": 2, "3": 4} - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt deleted file mode 100644 index abfa197bd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt +++ /dev/null @@ -1,22 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}) - b = pd.DataFrame.from_dict({'Price': [1, 1, 1, 1]}) - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = pd.DataFrame.from_dict({'Price': [701, 251, 801, 1201]}) - actual = hpandas.df_to_str(actual, num_rows=None) - expected = hpandas.df_to_str(expected, num_rows=None) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt deleted file mode 100644 index f7fa7c8c9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = r'{"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B98BAQ=="]]}' - a = jsonpickle.decode(a) - b = r'{"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B9wBAQ=="]]}' - b = jsonpickle.decode(b) - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = r'{"py/reduce": [{"py/type": "datetime.timedelta"}, {"py/tuple": [1096, 0, 0]}]}' - expected = jsonpickle.decode(expected) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt deleted file mode 100644 index 25588d901..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 3 - b = 2 - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt deleted file mode 100644 index cd51f2ced..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = "test" - b = "case" - # Call function to test. - actual = get_result_check_string(a=a, b=b) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt deleted file mode 100644 index c42805818..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = [1, 2, 3] - b = [4, 5, 6] - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt deleted file mode 100644 index 8547d2955..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt +++ /dev/null @@ -1,30 +0,0 @@ -original shape=(5, 3) -Head: -{ - "0":{ - "id":1, - "column_1":1000.0, - "column_2":"test_string_1" - }, - "1":{ - "id":2, - "column_1":1001.0, - "column_2":"test_string_2" - }, - "2":{ - "id":3, - "column_1":1002.0, - "column_2":"test_string_3" - }, - "3":{ - "id":4, - "column_1":1003.0, - "column_2":"test_string_4" - }, - "4":{ - "id":5, - "column_1":1004.0, - "column_2":"test_string_5" - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt deleted file mode 100644 index 4f0f96902..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -INSERT INTO test_table(id,column_1,column_2) VALUES %s diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt deleted file mode 100644 index c5faf0358..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -original shape=(3, 3) -Head: -{ - "0":{ - "id":1, - "column_1":1000.0, - "column_2":"test_string_1" - }, - "1":{ - "id":4, - "column_1":1002.0, - "column_2":"test_string_3" - }, - "2":{ - "id":5, - "column_1":1001.0, - "column_2":"test_string_2" - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt deleted file mode 100644 index 8547d2955..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt +++ /dev/null @@ -1,30 +0,0 @@ -original shape=(5, 3) -Head: -{ - "0":{ - "id":1, - "column_1":1000.0, - "column_2":"test_string_1" - }, - "1":{ - "id":2, - "column_1":1001.0, - "column_2":"test_string_2" - }, - "2":{ - "id":3, - "column_1":1002.0, - "column_2":"test_string_3" - }, - "3":{ - "id":4, - "column_1":1003.0, - "column_2":"test_string_4" - }, - "4":{ - "id":5, - "column_1":1004.0, - "column_2":"test_string_5" - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt deleted file mode 100644 index 8547d2955..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt +++ /dev/null @@ -1,30 +0,0 @@ -original shape=(5, 3) -Head: -{ - "0":{ - "id":1, - "column_1":1000.0, - "column_2":"test_string_1" - }, - "1":{ - "id":2, - "column_1":1001.0, - "column_2":"test_string_2" - }, - "2":{ - "id":3, - "column_1":1002.0, - "column_2":"test_string_3" - }, - "3":{ - "id":4, - "column_1":1003.0, - "column_2":"test_string_4" - }, - "4":{ - "id":5, - "column_1":1004.0, - "column_2":"test_string_5" - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt deleted file mode 100644 index cd2308af6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt +++ /dev/null @@ -1,65 +0,0 @@ -# Dir structure -. -dummy_value_1=1 -dummy_value_1=1/dummy_value_2=A -dummy_value_1=1/dummy_value_2=A/data.parquet -dummy_value_1=2 -dummy_value_1=2/dummy_value_2=B -dummy_value_1=2/dummy_value_2=B/data.parquet -dummy_value_1=3 -dummy_value_1=3/dummy_value_2=C -dummy_value_1=3/dummy_value_2=C/data.parquet -# File signatures -len(file_names)=3 -file_names=dummy_value_1=1/dummy_value_2=A/data.parquet, dummy_value_1=2/dummy_value_2=B/data.parquet, dummy_value_1=3/dummy_value_2=C/data.parquet -# dummy_value_1=1/dummy_value_2=A/data.parquet -num_lines=13 -''' -original shape=(1, 1) -Head: -{ - "0":{ - "dummy_value_3":0 - } -} -Tail: -{ - "0":{ - "dummy_value_3":0 - } -} -''' -# dummy_value_1=2/dummy_value_2=B/data.parquet -num_lines=13 -''' -original shape=(1, 1) -Head: -{ - "0":{ - "dummy_value_3":0 - } -} -Tail: -{ - "0":{ - "dummy_value_3":0 - } -} -''' -# dummy_value_1=3/dummy_value_2=C/data.parquet -num_lines=13 -''' -original shape=(1, 1) -Head: -{ - "0":{ - "dummy_value_3":0 - } -} -Tail: -{ - "0":{ - "dummy_value_3":0 - } -} -''' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt deleted file mode 100644 index ca3ab848c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ - description comment is_ok -0 hello Number of not submitted OMS child orders=0 / 7... True -1 hello2 ok True -is_ok=True \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt deleted file mode 100644 index b0e7738bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ - description comment is_ok -0 hello Number of not submitted OMS child orders=0 / 7... True -1 hello2 not_ok False -is_ok=False \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt deleted file mode 100644 index 393449cf4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -Some text before - - - -:::: -::::{.column width=40%} - - - -Middle text - - - -:::columns -::::{.column width=60%} - - - -Some text after \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt deleted file mode 100644 index f3bdbccbf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - - -:::: -::: - diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt deleted file mode 100644 index d5e54b365..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt +++ /dev/null @@ -1,9 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -df1.columns.difference(df2.columns)= -Index(['B'], dtype='object') -df2.columns.difference(df1.columns)= -Index(['C'], dtype='object') -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt deleted file mode 100644 index 464343e55..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt +++ /dev/null @@ -1,9 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -df1.index.difference(df2.index)= -Index([1, 4], dtype='int64') -df2.index.difference(df1.index)= -Index([5, 6], dtype='int64') -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json deleted file mode 100644 index 1e4b47491..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"10 + 15\", \"gpt-5-nano\"], \"kwargs\": {}}": [ - "25", - 3.195e-05 - ], - "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"2 + 3\", \"gpt-5-nano\"], \"kwargs\": {}}": [ - "5", - 3.195e-05 - ] -} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt deleted file mode 100644 index dbd21a9a0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt +++ /dev/null @@ -1,41 +0,0 @@ - 0 -2010-01-31 NaN -2010-02-28 NaN -2010-03-31 NaN -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-06-30 NaN -2010-07-31 NaN -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 -2013-02-28 NaN -2013-03-31 NaN -2013-04-30 NaN diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt deleted file mode 100644 index 6e33e1427..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt +++ /dev/null @@ -1,33 +0,0 @@ - 0 -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt deleted file mode 100644 index 3a043159d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt +++ /dev/null @@ -1,41 +0,0 @@ - 0 -2010-01-31 NaN -2010-02-28 NaN -2010-03-31 NaN -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-06-30 0.146756 -2010-07-31 0.146756 -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 -2013-02-28 0.686501 -2013-03-31 0.686501 -2013-04-30 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt deleted file mode 100644 index 200d35c7a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt +++ /dev/null @@ -1,38 +0,0 @@ - 0 -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-06-30 0.146756 -2010-07-31 0.146756 -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 -2013-02-28 0.686501 -2013-03-31 0.686501 -2013-04-30 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt deleted file mode 100644 index 590e9e5f7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt +++ /dev/null @@ -1,41 +0,0 @@ - 0 -2010-01-31 0.000000 -2010-02-28 0.000000 -2010-03-31 0.000000 -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-06-30 0.000000 -2010-07-31 0.000000 -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 -2013-02-28 0.000000 -2013-03-31 0.000000 -2013-04-30 0.000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt deleted file mode 100644 index 9f8585df5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -Consecutive headers increase by more than one level: - HeaderInfo(1, 'Chapter 1', 1) - HeaderInfo(3, 'Subsection 1.1.1', 6) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt deleted file mode 100644 index ce0136250..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv deleted file mode 100644 index 0ddcc75ab..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv +++ /dev/null @@ -1,5 +0,0 @@ -col1,col2,col3 -a,a,a -b,b,b -c,,c -d,, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt deleted file mode 100644 index 4f8eb6107..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt deleted file mode 100644 index b31ec5ee2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt +++ /dev/null @@ -1,6 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -hello -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt deleted file mode 100644 index 134e5b23c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt +++ /dev/null @@ -1,6 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -hello world -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt deleted file mode 100644 index f99e55fe0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -Caught assertion while formatting message: -'not all arguments converted during string formatting' -hello %s world too_many -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt deleted file mode 100644 index 5ebc30e5f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -Caught assertion while formatting message: -'not enough arguments for format string' -hello %s -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt deleted file mode 100644 index c941ca91b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -You passed '['hello']' or type '' instead of str diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt deleted file mode 100644 index 41b8447e3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -'1' -== -'2' -hello world -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt deleted file mode 100644 index 41b8447e3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -'1' -== -'2' -hello world -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt deleted file mode 100644 index 3bdf77365..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt +++ /dev/null @@ -1,10 +0,0 @@ - -################################################################################ -* Failed assertion * -'1' -== -'2' -Caught assertion while formatting message: -'not enough arguments for format string' -hello %s -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt deleted file mode 100644 index 5e9f4aa95..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -issubclass() arg 2 must be a class, a tuple of classes, or a union \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt deleted file mode 100644 index 3eeaf0ce1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -Instance '' of class '_Man' is not a subclass of '' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt deleted file mode 100644 index e5b23c85f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -Instance '' of class '_Man' is not a subclass of '' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt deleted file mode 100644 index 69b3f64e9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -val1=3 -[1, 2, 3] -val2=3 -[1, 2, 4] -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt deleted file mode 100644 index 11a472589..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -'a' in '['xyz']' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt deleted file mode 100644 index bb58d202b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -'a' is 'None' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt deleted file mode 100644 index fca016604..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -Instance of 'a' is '' instead of '' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt deleted file mode 100644 index b377f94fe..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -Instance of 'a' is '' instead of '(, )' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt deleted file mode 100644 index 1c61bf06a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt +++ /dev/null @@ -1,9 +0,0 @@ - -################################################################################ -* Failed assertion * -val1= -[1, 2, 4, 3] -is not sorted -sorted(val1)= -[1, 2, 3, 4] -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt deleted file mode 100644 index a13f9d582..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt +++ /dev/null @@ -1,9 +0,0 @@ - -################################################################################ -* Failed assertion * -val1= -[1, 2, 4, 3] -is not sorted -sorted(val1)= -[4, 3, 2, 1] -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt deleted file mode 100644 index 9fe19e631..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -val1= -[1, 3, 3] -has duplicates -3 -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt deleted file mode 100644 index a1f1fdce9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -date='2022-11-01' doesn't have the right format: time data '2022-11-01' does not match format '%Y%m%d' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt deleted file mode 100644 index 48cd44539..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt +++ /dev/null @@ -1,28 +0,0 @@ -################################################################################ -data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -2 7 8 9 -3 10 11 12 -################################################################################ -filters -################################################################################ -{'col_0': (('gt', 1), ('lt', 7)), 'col_1': ('eq', 5)} -################################################################################ -filtered_data -################################################################################ - col_0 col_1 col_2 -1 4 5 6 -################################################################################ -info -################################################################################ -OrderedDict([('nrows', 4), - ('n_col_0_gt_1', np.int64(3)), - ('perc_col_0_gt_1', '3 / 4 = 75.00%'), - ('n_col_0_lt_7', np.int64(2)), - ('perc_col_0_lt_7', '2 / 4 = 50.00%'), - ('n_col_1_eq_5', np.int64(1)), - ('perc_col_1_eq_5', '1 / 4 = 25.00%'), - ('nrows_remaining', np.int64(1))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt deleted file mode 100644 index c935f88e6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt +++ /dev/null @@ -1,28 +0,0 @@ -################################################################################ -data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -2 7 8 9 -3 10 11 12 -################################################################################ -filters -################################################################################ -{'col_0': ('gt', 2), 'col_1': ('eq', 5)} -################################################################################ -filtered_data -################################################################################ - col_0 col_1 col_2 -1 4 5 6 -2 7 8 9 -3 10 11 12 -################################################################################ -info -################################################################################ -OrderedDict([('nrows', 4), - ('n_col_0_gt_2', np.int64(3)), - ('perc_col_0_gt_2', '3 / 4 = 75.00%'), - ('n_col_1_eq_5', np.int64(1)), - ('perc_col_1_eq_5', '1 / 4 = 25.00%'), - ('nrows_remaining', np.int64(3))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt deleted file mode 100644 index 456d06923..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt +++ /dev/null @@ -1,26 +0,0 @@ -################################################################################ -data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -################################################################################ -filters -################################################################################ -{'col_0': (1, 12), 'col_1': (2, 11), 'col_2': (3, 6)} -################################################################################ -filtered_data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -################################################################################ -info -################################################################################ -OrderedDict([('nrows', 2), - ('n_col_0', np.int64(1)), - ('perc_col_0', '1 / 2 = 50.00%'), - ('n_col_1', np.int64(1)), - ('perc_col_1', '1 / 2 = 50.00%'), - ('n_col_2', np.int64(2)), - ('perc_col_2', '2 / 2 = 100.00%'), - ('nrows_remaining', np.int64(1))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt deleted file mode 100644 index ae70053b9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt +++ /dev/null @@ -1,27 +0,0 @@ -################################################################################ -data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -################################################################################ -filters -################################################################################ -{'col_0': (1, 12), 'col_1': (2, 11), 'col_2': (3, 6)} -################################################################################ -filtered_data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -################################################################################ -info -################################################################################ -OrderedDict([('nrows', 2), - ('n_col_0', np.int64(1)), - ('perc_col_0', '1 / 2 = 50.00%'), - ('n_col_1', np.int64(1)), - ('perc_col_1', '1 / 2 = 50.00%'), - ('n_col_2', np.int64(2)), - ('perc_col_2', '2 / 2 = 100.00%'), - ('nrows_remaining', np.int64(2))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt deleted file mode 100644 index a947c3402..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -A fake check_string output to use for test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt deleted file mode 100644 index 62b216ee4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -A fake check_string output to use for test2 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt deleted file mode 100644 index 3b18e512d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt deleted file mode 100644 index 3b18e512d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt deleted file mode 100644 index 3b18e512d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt deleted file mode 100644 index 3b18e512d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv deleted file mode 100644 index abc3dac80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C,D,E -1,2.3456,c,d,78 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types deleted file mode 100644 index 81816c1d2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types +++ /dev/null @@ -1 +0,0 @@ -{'A': 'int64', 'B': 'float64', 'C': 'object', 'D': 'object', 'E': 'int64'} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt deleted file mode 100644 index 4a3a582fe..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt +++ /dev/null @@ -1,58 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt deleted file mode 100644 index 47371468a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt +++ /dev/null @@ -1,58 +0,0 @@ -stage='prod', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs={'/data/shared': '/shared_data'}, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=0 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - - /data/shared:/shared_data - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt deleted file mode 100644 index 5ebe91b26..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt +++ /dev/null @@ -1,60 +0,0 @@ -stage='prod', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=0 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} -networks: - default: - name: main_network diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt deleted file mode 100644 index eb8d4824a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt +++ /dev/null @@ -1,57 +0,0 @@ -stage='dev', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=0 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - - ../../:/app - environment: - - MYPYPATH - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt deleted file mode 100644 index 2c9d5ecf0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt +++ /dev/null @@ -1,56 +0,0 @@ -stage='dev', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=0 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - - CSFY_CI=$CSFY_CI - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - - ../../../:/app - environment: - - MYPYPATH - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt deleted file mode 100644 index 9ba5c60c9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt +++ /dev/null @@ -1,63 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_GIT_ROOT_PATH=/app - - CSFY_HELPERS_ROOT_PATH=/app/helpers_root - - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /data/dummy/src/cmamp1:/app - working_dir: /app - linter: - extends: base_app - volumes: - - /data/dummy/src/cmamp1:/src - working_dir: /src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt deleted file mode 100644 index 91e37ffc3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt +++ /dev/null @@ -1,63 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_GIT_ROOT_PATH=/app - - CSFY_HELPERS_ROOT_PATH=/app - - CSFY_USE_HELPERS_AS_NESTED_MODULE=0 - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /data/dummy/src/helpers1:/app - working_dir: /app - linter: - extends: base_app - volumes: - - /data/dummy/src/helpers1:/src - working_dir: /src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt deleted file mode 100644 index a16d2f133..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt +++ /dev/null @@ -1,63 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_GIT_ROOT_PATH=/app - - CSFY_HELPERS_ROOT_PATH=/app/helpers_root - - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /data/dummy/src/cmamp1:/app - working_dir: /app/ck.infra - linter: - extends: base_app - volumes: - - /data/dummy/src/cmamp1/ck.infra:/src - working_dir: /src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt deleted file mode 100644 index b4afb6c80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt +++ /dev/null @@ -1,63 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_GIT_ROOT_PATH=/app - - CSFY_HELPERS_ROOT_PATH=/app/amp/helpers_root - - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /data/dummy/src/orange1:/app - working_dir: /app - linter: - extends: base_app - volumes: - - /data/dummy/src/orange1:/src - working_dir: /src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl deleted file mode 100644 index 25ffea79afb3dad6014da937fd8ff7c64cfbb55f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 405 zcmbu(u}TCn5C-5=POmty^Br!R#KqQ5tptU`K4g&`x>)b*xwLl zAPnC>A16QGM~haxHX(s9!SC>iPp{kDcc1UPQuAlsa1IP%@R`>S$wC5qgNF9qcEih^ zM)$?XXzv!h9D($~l)kwaw@T#&&7%2;aOs%O@yz+ry2;oJv-TCg^Z#rJt zOv+;7YpG+DdW`ZPBmWrrRNJ=rhZ*Uc+e;dwsBxOiGhf16goGn_QVDW#6MA: - a='False' - b='hello' - c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt deleted file mode 100644 index 7aad26473..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False, b=hello, c=3.14) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt deleted file mode 100644 index d491215bd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False , b=hello , c=3.14 ) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt deleted file mode 100644 index b5e297083..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt +++ /dev/null @@ -1,12 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False, b=hello, c=3.14, hello=. at 0x>) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' - hello='. at 0x>' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt deleted file mode 100644 index b69634f84..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt +++ /dev/null @@ -1,12 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False, b=hello, c=3.14, _hello=under) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' - _hello='under' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt deleted file mode 100644 index 332cd0a1d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt +++ /dev/null @@ -1,12 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False, b=hello, c=3.14, _Object1__hello=double_dunder) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' - _Object1__hello='double_dunder' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt deleted file mode 100644 index 28193b95d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt deleted file mode 100644 index fccd31195..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(object2=_Object2 at 0x=(x=True, y=world, z=6.28), p=p, q=q) -################################################################################ -repr: -################################################################################ -: - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' - p='p' - q='q' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt deleted file mode 100644 index 08aebee19..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p , q=q , object2=_Object2 at 0x=(x=True, y=world, z=6.28) ) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt deleted file mode 100644 index 28193b95d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt deleted file mode 100644 index 28193b95d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt deleted file mode 100644 index 28193b95d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt deleted file mode 100644 index 14a9380bb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -xdg-open a.html diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt deleted file mode 100644 index b0047fa49..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt deleted file mode 100644 index b0047fa49..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt deleted file mode 100644 index b0047fa49..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt deleted file mode 100644 index 34d8d7aa1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt +++ /dev/null @@ -1,16 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - ```python - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - ``` -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt deleted file mode 100644 index 38f3146a7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - - - ```python - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - ``` - - -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt deleted file mode 100644 index 34d8d7aa1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt +++ /dev/null @@ -1,16 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - ```python - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - ``` -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt deleted file mode 100644 index dacb761b7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -0:- Functions can be declared in the body of another function -1:- E.g., to hide utility functions in the scope of the function that uses them -2: - -3: ```python -4: def print_integers(values): -5: -6: def _is_integer(value): -7: try: -8: return value == int(value) -9: except: -10: return False -11: -12: for v in values: -13: if _is_integer(v): -14: print(v) -15: ``` -16: - -17:- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt deleted file mode 100644 index 52f34afc3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -$AM_AWS_S3_BUCKET = $AM_AWS_S3_BUCKET -$CSFY_AWS_S3_BUCKET = $CSFY_AWS_S3_BUCKET diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed deleted file mode 100644 index 0850990c3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dev_scripts/testing/test/test_run_tests.py": true, - "dev_scripts/testing/test/test_run_tests2.py": true, - "helpers/test/test_printing.py::Test_dedent1::test1": true, - "helpers/test/test_printing.py::Test_dedent1::test2": true, - "helpers/test/test_printing.py::Test_dedent2::test1": true, - "documentation/scripts/test/test_all.py": true, - "documentation/scripts/test/test_render_md.py": true, - "helpers/test/helpers/test/test_list.py::Test_list_1": true, - "helpers/test/helpers/test/test_list.py::Test_list_2": true, - "helpers/test/test_cache.py::TestAmpTask1407": true -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt deleted file mode 100644 index 61323668a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt +++ /dev/null @@ -1,15 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 10 failed pytest 'tests' target(s); to reproduce run: -pytest dev_scripts/testing/test/test_run_tests.py -pytest dev_scripts/testing/test/test_run_tests2.py -pytest documentation/scripts/test/test_all.py -pytest documentation/scripts/test/test_render_md.py -pytest helpers/test/helpers/test/test_list.py::Test_list_1 -pytest helpers/test/helpers/test/test_list.py::Test_list_2 -pytest helpers/test/test_cache.py::TestAmpTask1407 -pytest helpers/test/test_printing.py::Test_dedent1::test1 -pytest helpers/test/test_printing.py::Test_dedent1::test2 -pytest helpers/test/test_printing.py::Test_dedent2::test1 -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt deleted file mode 100644 index 9e66e81bd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt +++ /dev/null @@ -1,325 +0,0 @@ -============================= test session starts ============================== -platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 -cachedir: .pytest_cache -rootdir: /app, configfile: pytest.ini -plugins: flaky-3.7.0, timeout-2.0.2, rerunfailures-10.2, cov-3.0.0, instafail-0.4.2, xdist-2.5.0, forked-1.4.0 -collecting ... >>ENV<<: is_inside_container=True: code_version=1.0.6, container_version=1.0.6, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=False AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True -# Git - branch_name='CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests' - hash='ca2dbf510' - # Last commits: - * ca2dbf510 Sonya Nikiforova Merge branch 'master' into CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests ( 2 hours ago) Mon Feb 14 16:25:29 2022 (HEAD -> CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests, origin/CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests) - |\ - * | 63a471cca sonniki CmTask695: Update for reproducibility ( 2 hours ago) Mon Feb 14 16:15:14 2022 - | * 0d236ad57 Nikola Jašek CMTask1103: Add tests for HistoricalPqByTileClient (#1176) ( 2 hours ago) Mon Feb 14 16:01:56 2022 (origin/master, origin/HEAD) -# Machine info - system=Linux - node name=61ceebd0998a - release=5.11.0-1028-aws - version=#31~20.04.1-Ubuntu SMP Fri Jan 14 14:37:50 UTC 2022 - machine=x86_64 - processor=x86_64 - cpu count=8 - cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) - memory=svmem(total=33295769600, available=23499386880, percent=29.4, used=9048117248, free=17212899328, active=2693218304, inactive=12081451008, buffers=651313152, cached=6383439872, shared=286130176, slab=934486016) - disk usage=sdiskusage(total=104021790720, used=40223850496, free=63781163008, percent=38.7) -# Packages - python: 3.8.10 - gluonnlp: ? - gluonts: 0.6.7 - joblib: 1.1.0 - mxnet: 1.9.0 - numpy: 1.22.0 - pandas: 1.3.5 - pyarrow: 6.0.1 - scipy: 1.6.1 - seaborn: 0.11.2 - sklearn: 1.0.2 - statsmodels: 0.13.1 -INFO: > cmd='/venv/bin/pytest datapull/common/data/client/test/test_historical_pq_clients.py' -INFO: Saving log to file 'tmp.pytest.log' -collected 9 items - -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 (1.14 s) PASSED [ 11%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_start_ts_for_symbol1 (1.05 s) PASSED [ 22%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_universe1 (0.00 s) PASSED [ 33%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 (1.26 s) FAILED [ 44%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 (1.44 s) FAILED [ 55%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 (1.09 s) FAILED [ 66%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 (0.95 s) FAILED [ 77%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 (0.86 s) FAILED [ 88%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data6 (1.05 s) PASSED [100%] - -=================================== FAILURES =================================== -________________ TestHistoricalPqByTileClient1.test_read_data1 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 89, in test_read_data1 - self._test_read_data1( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 44, in _test_read_data1 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data1 --------------------------------------------------------------------------------- - -# df= ( -index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(4320, 4) | df.shape=(4320, 4) -full_symbol close year month ( -timestamp ( -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( -2021-12-30 00:02:00+00:00 1467591036 2 2021 12 ( -... ( -2022-01-01 23:57:00+00:00 1467591036 4317 2022 1 ( -2022-01-01 23:58:00+00:00 1467591036 4318 2022 1 ( -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data1/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data1/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data1 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month -shape=(4320, 4) - full_symbol close year month -timestamp -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 -2021-12-30 00:02:00+00:00 1467591036 2 2021 12 -... -2022-01-01 23:57:00+00:00 1467591036 4317 2022 1 -2022-01-01 23:58:00+00:00 1467591036 4318 2022 1 -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1""" -________________ TestHistoricalPqByTileClient1.test_read_data2 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 132, in test_read_data2 - self._test_read_data2( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 61, in _test_read_data2 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data2 --------------------------------------------------------------------------------- - -# df= ( -index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(8640, 4) | df.shape=(8640, 4) -full_symbol close year month ( -timestamp ( -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( -2021-12-30 00:00:00+00:00 1508924190 0 2021 12 ( -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( -... ( -2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 ( -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( -2022-01-01 23:59:00+00:00 1508924190 4319 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data2/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data2/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data2 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month -shape=(8640, 4) - full_symbol close year month -timestamp -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 -2021-12-30 00:00:00+00:00 1508924190 0 2021 12 -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 -... -2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 -2022-01-01 23:59:00+00:00 1508924190 4319 2022 1""" -________________ TestHistoricalPqByTileClient1.test_read_data3 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 176, in test_read_data3 - self._test_read_data3( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 79, in _test_read_data3 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data3 --------------------------------------------------------------------------------- - -# df= ( -index=[2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(2640, 4) | df.shape=(2640, 4) -full_symbol close year month ( -timestamp ( -2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 ( -2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 ( -2022-01-01 02:01:00+00:00 1467591036 3001 2022 1 ( -... ( -2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 ( -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( -2022-01-01 23:59:00+00:00 1508924190 4319 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data3/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data3/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data3 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month -shape=(2640, 4) - full_symbol close year month -timestamp -2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 -2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 -2022-01-01 02:01:00+00:00 1467591036 3001 2022 1 -... -2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 -2022-01-01 23:59:00+00:00 1508924190 4319 2022 1""" -________________ TestHistoricalPqByTileClient1.test_read_data4 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 221, in test_read_data4 - self._test_read_data4( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 97, in _test_read_data4 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data4 --------------------------------------------------------------------------------- - -# df= ( -index=[2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(6002, 4) | df.shape=(6002, 4) -full_symbol close year month ( -timestamp ( -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( -2021-12-30 00:00:00+00:00 1508924190 0 2021 12 ( -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( -... ( -2022-01-01 01:59:00+00:00 1508924190 2999 2022 1 ( -2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 ( -2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data4/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data4/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data4 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] -columns=full_symbol,close,year,month -shape=(6002, 4) - full_symbol close year month -timestamp -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 -2021-12-30 00:00:00+00:00 1508924190 0 2021 12 -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 -... -2022-01-01 01:59:00+00:00 1508924190 2999 2022 1 -2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 -2022-01-01 02:00:00+00:00 1508924190 3000 2022 1""" -________________ TestHistoricalPqByTileClient1.test_read_data5 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 267, in test_read_data5 - self._test_read_data5( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 114, in _test_read_data5 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data5 --------------------------------------------------------------------------------- - -# df= ( -index=[2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] | df.index in [2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(242, 4) | df.shape=(242, 4) -full_symbol close year month ( -timestamp ( -2021-12-31 23:00:00+00:00 1467591036 2820 2021 12 ( -2021-12-31 23:00:00+00:00 1508924190 2820 2021 12 ( -2021-12-31 23:01:00+00:00 1467591036 2821 2021 12 ( -... ( -2022-01-01 00:59:00+00:00 1508924190 2939 2022 1 ( -2022-01-01 01:00:00+00:00 1467591036 2940 2022 1 ( -2022-01-01 01:00:00+00:00 1508924190 2940 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data5/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data5/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data5 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] -columns=full_symbol,close,year,month -shape=(242, 4) - full_symbol close year month -timestamp -2021-12-31 23:00:00+00:00 1467591036 2820 2021 12 -2021-12-31 23:00:00+00:00 1508924190 2820 2021 12 -2021-12-31 23:01:00+00:00 1467591036 2821 2021 12 -... -2022-01-01 00:59:00+00:00 1508924190 2939 2022 1 -2022-01-01 01:00:00+00:00 1467591036 2940 2022 1 -2022-01-01 01:00:00+00:00 1508924190 2940 2022 1""" -============================= slowest 3 durations ============================== -1.44s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 -1.26s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 -1.14s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 -=========================== short test summary info ============================ -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 -========================= 5 failed, 4 passed in 10.94s ========================= diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt deleted file mode 100644 index c297aad27..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt +++ /dev/null @@ -1,10 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 5 failed pytest 'tests' target(s); to reproduce run: -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt deleted file mode 100644 index 8c9d7793d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt +++ /dev/null @@ -1,10 +0,0 @@ -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1175206Z SKIPPED [1] core/statistics/test/test_requires_statsmodels.py:315: cmamp #654. -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1175722Z SKIPPED [1] config_root/config/test/test_config.py:325: See AmpTask1573 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1176275Z XFAIL core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1176859Z XFAIL core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1177550Z FAILED dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1178650Z FAILED dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1179474Z FAILED dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 - Na... -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1180384Z ^[[31m= ^[[31m^[[1m3 failed^[[0m, ^[[32m1511 passed^[[0m, ^[[33m155 skipped^[[0m, ^[[33m60 deselected^[[0m, ^[[33m2 xfailed^[[0m, ^[[33m1 rerun^[[0m^[[31m in 211.15s (0:03:31)^[[0m^[[31m =^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1367972Z 11:53:07 @ 2022-02-19 06:51:34 - ^[[36mINFO ^[[0m hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=28.0 KB -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1381857Z 11:53:07 @ 2022-02-19 06:51:34 - ^[[33mWARN ^[[0m hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt deleted file mode 100644 index e16188c74..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 -pytest dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 -pytest dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt deleted file mode 100644 index 58f583b0e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt +++ /dev/null @@ -1,61 +0,0 @@ -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0521158Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_daily1 (0.03 s) ^[[32mPASSED^[[0m^[[31m [ 99%]^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0932903Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) ^[[32mPASSED^[[0m^[[31m [100%]^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0933619Z -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0933865Z =================================== FAILURES =================================== -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0934800Z ^[[31m^[[1m_____________________ TestRealTimeMvnReturnsWithOms1.test1 _____________________^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0935555Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0936347Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 388, in test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0937188Z market_data = self.get_market_data(event_loop) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0938027Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 325, in get_market_data -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0939155Z df = self.get_market_data_df() -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0939988Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 310, in get_market_data_df -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0940754Z df = node.fit()["df_out"] -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0941392Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0941905Z self._lazy_load(fit=True) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0942562Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0943252Z rets = self._generate_returns(fit) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0943957Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0944732Z vol = cofinanc.compute_annualized_volatility(avg_rets) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0945561Z NameError: name 'cofinanc' is not defined -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0959832Z ^[[31m^[[1m____________________ TestMultivariateNormalDataSource.test1 ____________________^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0961700Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0968475Z File "/app/dataflow/core/nodes/test/test_sources.py", line 175, in test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0970838Z df = node.fit()["df_out"] -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0972952Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0973577Z self._lazy_load(fit=True) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0974176Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0976810Z rets = self._generate_returns(fit) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0977529Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0978880Z vol = cofinanc.compute_annualized_volatility(avg_rets) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0981739Z NameError: name 'cofinanc' is not defined -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0982702Z ^[[31m^[[1m_________________________ TestMvnReturnsBuilder.test1 __________________________^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0985191Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0985837Z File "/app/dataflow/core/test/test_builders.py", line 74, in test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0986469Z result_bundle = dag_runner.fit() -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0987113Z File "/app/dataflow/core/dag_runner.py", line 170, in fit -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0987711Z return self._run_dag(method) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0988321Z File "/app/dataflow/core/dag_runner.py", line 181, in _run_dag -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0988936Z df_out, info = self._run_dag_helper(method) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0989566Z File "/app/dataflow/core/dag_runner.py", line 110, in _run_dag_helper -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0990221Z df_out = self.dag.run_leq_node(nid, method)["df_out"] -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0991397Z File "/app/dataflow/core/dag.py", line 428, in run_leq_node -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0992521Z self._run_node(id_, pred_nid, method) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0993298Z File "/app/dataflow/core/dag.py", line 593, in _run_node -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0993800Z output = getattr(node, method)(**kwargs) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0994361Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0994834Z self._lazy_load(fit=True) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0995336Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0995859Z rets = self._generate_returns(fit) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0996779Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0997405Z vol = cofinanc.compute_annualized_volatility(avg_rets) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0998205Z NameError: name 'cofinanc' is not defined -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1048290Z ============================= slowest 3 durations ============================== -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1048893Z 26.48s setup oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1049478Z 8.44s call helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1050189Z 5.32s setup dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1116212Z =========================== short test summary info ============================ -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z SKIPPED [1] test/test_tasks.py:68: Test needs to be run outside Docker -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119804Z SKIPPED [1] test/test_tasks.py:60: Test needs to be run outside Docker -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt deleted file mode 100644 index bc2ab8612..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt +++ /dev/null @@ -1,61 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 -pytest dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 -pytest dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 - -# TestRealTimeMvnReturnsWithOms1.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 388, in test1 - market_data = self.get_market_data(event_loop) - File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 325, in get_market_data - df = self.get_market_data_df() - File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 310, in get_market_data_df - df = node.fit()["df_out"] - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit - self._lazy_load(fit=True) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load - rets = self._generate_returns(fit) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns - vol = cofinanc.compute_annualized_volatility(avg_rets) -NameError: name 'cofinanc' is not defined -^[[31m^[[1m__________________ - -# TestMultivariateNormalDataSource.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/core/nodes/test/test_sources.py", line 175, in test1 - df = node.fit()["df_out"] - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit - self._lazy_load(fit=True) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load - rets = self._generate_returns(fit) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns - vol = cofinanc.compute_annualized_volatility(avg_rets) -NameError: name 'cofinanc' is not defined -^[[31m^[[1m_______________________ - -# TestMvnReturnsBuilder.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/core/test/test_builders.py", line 74, in test1 - result_bundle = dag_runner.fit() - File "$GIT_ROOT/dataflow/core/dag_runner.py", line 170, in fit - return self._run_dag(method) - File "$GIT_ROOT/dataflow/core/dag_runner.py", line 181, in _run_dag - df_out, info = self._run_dag_helper(method) - File "$GIT_ROOT/dataflow/core/dag_runner.py", line 110, in _run_dag_helper - df_out = self.dag.run_leq_node(nid, method)["df_out"] - File "$GIT_ROOT/dataflow/core/dag.py", line 428, in run_leq_node - self._run_node(id_, pred_nid, method) - File "$GIT_ROOT/dataflow/core/dag.py", line 593, in _run_node - output = getattr(node, method)(**kwargs) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit - self._lazy_load(fit=True) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load - rets = self._generate_returns(fit) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns - vol = cofinanc.compute_annualized_volatility(avg_rets) -NameError: name 'cofinanc' is not defined - -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt deleted file mode 100644 index b0f4950ce..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt +++ /dev/null @@ -1,36 +0,0 @@ -amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] - -=================================== FAILURES =================================== -__________________________ TestE8c_ModelBuilder.test1 __________________________ -Traceback (most recent call last): - File "/app/dataflow/pipelines/E8/test/test_E8c_pipeline.py", line 79, in test1 - self.check_string(actual) - File "/app/amp/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "/app/amp/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '/app/dataflow/pipelines/E8/test/TestE8c_ModelBuilder.test1/output/test.txt.tmp' -################################################################################ - -__________________________ TestE8a_ModelBuilder.test1 __________________________ -Traceback (most recent call last): - File "/app/dataflow/pipelines/E8/test/test_E8a_pipeline.py", line 72, in test1 - self.check_string(actual) - File "/app/amp/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "/app/amp/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '/app/dataflow/pipelines/E8/test/TestE8a_ModelBuilder.test1/output/test.txt.tmp' -################################################################################ - -============================= slowest 3 durations ============================== -10.36s call dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 -7.77s call dataflow/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit -7.31s call dataflow/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit -=========================== short test summary info ============================ -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_ModelBuilder::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt deleted file mode 100644 index 063e0af62..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt +++ /dev/null @@ -1,36 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 2 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_ModelBuilder::test1 -pytest dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 - -# TestE8a_ModelBuilder.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/pipelines/E8/test/test_E8a_pipeline.py", line 72, in test1 - self.check_string(actual) - File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow/pipelines/E8/test/TestE8a_ModelBuilder.test1/output/test.txt.tmp' -################################################################################ - -# TestE8c_ModelBuilder.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/pipelines/E8/test/test_E8c_pipeline.py", line 79, in test1 - self.check_string(actual) - File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow/pipelines/E8/test/TestE8c_ModelBuilder.test1/output/test.txt.tmp' -################################################################################ - -________________________ - -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt deleted file mode 100644 index a2ee5ad54..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt +++ /dev/null @@ -1,2533 +0,0 @@ -INFO: > cmd='/local/home/gsaggese/src/venv/amp.client_venv/bin/invoke run_fast_slow_superslow_tests' ->>ENV<<: is_inside_container=False: code_version=1.0.3, container_version=None, is_inside_docker=False, is_inside_ci=False, CI_defined=False, CSFY_CI='nan' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=False -## run_fast_slow_superslow_tests:  -## run_fast_tests:  -15:12:49 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"' -IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"'  -WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. -WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. -WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. -WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. -Creating compose_app_run ... - - -Creating compose_app_run ... done -##> devops/docker_run/entrypoint.sh -UID=0 -GID=0 -# Activate environment -##> devops/docker_build/entrypoint/patch_environment_variables.sh -# Set PATH -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -# Set PYTHONPATH -PYTHONPATH=/app/amp:/app: -# Configure env -Testing sudo -/app -Setting up Docker -{ "storage-driver": "vfs" } - * Starting Docker: docker  -[ OK ] - * Docker is running -# Check AWS authentication setup -AWS_DEFAULT_REGION='us-east-1' - Name Value Type Location - ---- ----- ---- -------- - profile am manual --profile -access_key ****************3J32 shared-credentials-file -secret_key ****************QpHW shared-credentials-file - region us-east-1 env AWS_DEFAULT_REGION -CONTAINER_VERSION='' -BUILD_TAG='' -which python: /venv/bin/python -python -V: Python 3.8.10 -docker -v: Docker version 20.10.12, build e91ed57 -docker-compose -v: docker-compose version 1.25.0, build unknown -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -PYTHONPATH=/app/amp:/app: -entrypoint.sh: 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"' -============================= test session starts ============================== -platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 -cachedir: .pytest_cache -rootdir: /app, configfile: pytest.ini -plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 -timeout: 5.0s -timeout method: signal -timeout func_only: True -collecting ...  -collecting 0 items  -collecting 0 items  -collecting 67 items  -collecting 70 items  -collecting 230 items  -collecting 548 items  -collecting 622 items  -collecting 801 items  -collecting 1084 items  -collecting 1419 items  -collecting 1775 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True ------------------------------------------------------------------------------ -This code is not in sync with the container: -code_version='1.0.3' != container_version='amp-1.0.3' ------------------------------------------------------------------------------ -You need to: -- merge origin/master into your branch with `invoke git_merge_master` -- pull the latest container with `invoke docker_pull` -# Git - branch_name='AmpTask2163_Implement_tiled_backtesting_5' - hash='29bdaf1' - # Last commits: - * 29bdaf1 saggese Lint ( 3 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) - * c26c937 saggese Checkpoint ( 3 minutes ago) Mon Mar 7 20:09:34 2022 - * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) -# Machine info - system=Linux - node name=5f6da4732626 - release=3.10.0-1160.36.2.el7.x86_64 - version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 - machine=x86_64 - processor=x86_64 - cpu count=8 - cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) - memory=svmem(total=66548252672, available=51710918656, percent=22.3, used=11804581888, free=14433091584, active=30353010688, inactive=18354896896, buffers=0, cached=40310579200, shared=2491396096, slab=2053443584) - disk usage=sdiskusage(total=107362627584, used=32545419264, free=74817208320, percent=30.3) -# Packages - python: 3.8.10 - gluonnlp: ? - gluonts: 0.6.7 - joblib: 1.1.0 - mxnet: 1.8.0 - numpy: 1.21.1 - pandas: 1.3.4 - pyarrow: 6.0.1 - scipy: 1.6.1 - seaborn: 0.11.2 - sklearn: 1.0.1 - statsmodels: 0.13.1 -INFO: > cmd='/venv/bin/pytest -m not slow and not superslow . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun Failed: Timeout' -INFO: Saving log to file 'tmp.pytest.log' - -collected 1874 items / 81 deselected / 1793 selected  - -amp/dataflow/model/test/test_experiment_utils.py::Test_get_configs_from_command_line1::test1 (0.02 s) PASSED [ 0%] -amp/core/finance/test/test_prediction_processing.py::TestStackPredictionDf::test1 (0.03 s) PASSED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call1 SKIPPED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call2 SKIPPED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call3 SKIPPED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call4 SKIPPED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_parser SKIPPED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_get_df1 (0.01 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_read_with_filter1 (0.03 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_everything1 (0.02 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_one_column1 (0.02 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_two_columns1 (0.02 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_merge1 (0.08 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read1 (0.05 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read2 (0.06 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read3 (0.03 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read4 (0.02 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_full1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_half1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_half2 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_invalid1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_invalid2 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_one_year1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_one_year2 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_over_two_years1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_two_years1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns2 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns3 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns4 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_get_test_data1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_to_partitioned_dataset SKIPPED [ 1%] -amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_to_partitioned_dataset_wrong_column (0.00 s) PASSED [ 1%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test1 (0.03 s) PASSED [ 1%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test2 (0.02 s) PASSED [ 1%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test3 (0.02 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test4 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test5 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test6 (0.02 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test7 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test8 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexSkLearnModel::test1 (0.07 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexSkLearnModel::test2 (0.10 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexPooledSkLearnModel::test1 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexPooledSkLearnModel::test2 (0.08 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_end_ts_for_symbol1 (0.21 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_universe1 (0.00 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data1 (0.05 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data2 (0.07 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data3 (0.08 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data4 (0.08 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data5 (0.08 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_unadjusted_data5 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_parquet_data2 (0.06 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_parquet_data5 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_metadata1 (0.11 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_universe1 (0.00 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data1 (0.05 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data2 (0.07 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data3 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data4 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data5 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_expiry_data5 (0.09 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_data2 (0.06 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_data5 (0.07 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_expiry_data5 (0.07 s) PASSED [ 3%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_mixed_constraints SKIPPED [ 3%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_only_gmv_constraint SKIPPED [ 3%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_restrictions SKIPPED [ 4%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_short_ban SKIPPED [ 4%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer2::test1 SKIPPED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse1 (0.00 s) PASSED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse2 (0.38 s) PASSED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse3 (0.00 s) PASSED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse4 (0.00 s) PASSED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse_empty_traceback1 (0.00 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestGhLogin1::test_gh_login (0.23 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_images_ls_repo (0.56 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_kill_all SKIPPED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_kill_last SKIPPED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_ps (0.21 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_stats SKIPPED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean (0.22 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_fetch_master (0.22 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_pull (0.21 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_images_ls_repo (0.36 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_kill_all SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_kill_last SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_login (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_ps (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_pull (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_stats SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_find_test_class1 (0.14 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr1 SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr2 SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr3 SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_issue_title (0.42 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_workflow_list SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_files (0.15 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_clean (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_clean2 (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create3 (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_fetch_master (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_merge_master (0.08 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_pull (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint1 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint2 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint3 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_print_setup (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title1 (0.47 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title3 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title4 (0.44 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRemoveSpaces1::test1 (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash1 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash2 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash3 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash4 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash5 (0.02 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_jupyter1 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests1 (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests2 (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests4 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests5 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class1 (0.14 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class2 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class3 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_decorator1 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_decorator2 SKIPPED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_files1 (0.09 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_files2 (0.09 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_diff_files_abort1 (0.16 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_branch1 (0.33 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_files1 (0.23 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_last_commit1 (0.37 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_modified1 (0.44 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_parse_linter_output1::test1 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_parse_linter_output1::test2 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test1 (0.15 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2 (0.14 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert1 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert3 (0.07 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_branch1 SKIPPED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files1 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files3 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_last_commit1 (0.03 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_modified1 (0.07 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_classes1 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_classes2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_files1 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_files2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_tests1 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_tests2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test1 (0.22 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test2 (0.22 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test3 (0.22 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test4 (0.22 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test5 (0.22 s) PASSED [ 9%] -amp/helpers/test/test_lib_tasks.py::TestFailing::test_failing (0.00 s) PASSED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data2 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input1 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input2 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input3 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input4 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_order_book SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_order_book_invalid_input1 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_get_exchange_currency_pairs SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_initialize_class SKIPPED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_1 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_2 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_3 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_2tiles_1 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_2tiles_2 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_3tiles_1 (0.00 s) PASSED [ 9%] -amp/helpers/test/test_hpandas.py::Test_dassert_is_unique1::test_dassert_is_unique1 (0.00 s) PASSED [ 9%] -amp/helpers/test/test_hpandas.py::Test_dassert_is_unique1::test_dassert_is_unique2 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_to_series1::test1 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_to_series1::test2 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_to_series1::test3 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df1 (0.02 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df2 (0.02 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df3 (0.02 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df4 (0.01 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types1 (0.01 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types2 (0.01 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types3 (0.01 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str1 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str2 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str3 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str4 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str5 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_datetime (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_timestamp (0.00 s) PASSED [ 11%] -amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_uuid (0.00 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_universe1 (0.00 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data2 (0.06 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data3 (0.07 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data4 (0.06 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data5 (0.07 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data6 (0.00 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_universe1 (0.00 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data2 (0.06 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data3 (0.07 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data4 (0.06 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data5 (0.07 s) PASSED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data6 (0.00 s) PASSED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_end_ts_for_symbol1 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_start_ts_for_symbol1 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_universe1 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data1 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data2 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data3 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data4 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data5 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data6 SKIPPED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_infs (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_zero_in_bin_interior_false (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_zero_in_bin_interior_true (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_digitize1::test1 (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_digitize1::test_heaviside1 (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_compute_weighted_sum1::test1 (0.00 s) PASSED [ 12%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal1 (0.00 s) PASSED [ 12%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal5 (0.03 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal_fuzzy_match1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_not_equal1 (0.03 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_not_equal2 (0.04 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir2 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir3 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir4 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_output_dir1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_s3_scratch_dir1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_s3_scratch_dir2 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space2 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space3 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_equal1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_equal2 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_not_equal1 (0.03 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_not_equal_debug SKIPPED [ 13%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string1 (0.01 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing1 (0.01 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing2 (0.00 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing3 (0.15 s) (WARNING: Test was updated) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal1 (0.04 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal2 (0.03 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal3 (0.04 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal1 (0.02 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal2 (0.02 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal3 (0.02 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing1 -WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_missing1/output/test_df.txt'(0.02 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing2 (0.01 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing3 -WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_missing3/output/test_df.txt'(0.15 s) (WARNING: Test was updated) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal1 (0.06 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal2 (0.05 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal3 -WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt'(0.03 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal4 (0.05 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::Test_check_string_debug1::test1 (0.16 s) (WARNING: Test was updated) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_check_string_debug1::test2 -WARNING: Update golden outcome file '/app/amp/helpers/test/Test_check_string_debug1.test2/output/test_df.txt'(0.15 s) (WARNING: Test was updated) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_unit_test1::test_purify_txt_from_client1 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_unit_test1::test_purify_txt_from_client2 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::TestSubsetDf1::test1 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_get_dir_signature1::test1 (0.02 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_get_dir_signature1::test2 (0.02 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test1 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test2 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test3 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test1 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test2 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test3 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_amp_reference1::test1 (0.00 s) PASSED [ 15%] -amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeReturnPipeline1::test1 (0.47 s) PASSED [ 15%] -amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimePipelineWithOms1::test1 (0.98 s) PASSED [ 15%] -amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 SKIPPED [ 15%] -amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms2::test1 SKIPPED [ 15%] -amp/datapull/common/data/transform/test/test_convert_csv_to_pq.py::TestCsvToPq::test_csv_to_pq_script SKIPPED [ 16%] -dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 (0.00 s) FAILED [ 16%] -research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit (6.31 s) RERUN [ 16%] -research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit (1.61 s) PASSED [ 16%] -research/RH2E/test/test_RH2E_pipeline.py::TestRH2E_DagBuilder::test1 (4.68 s) PASSED [ 16%] -research/RH2E/test/test_RH2Ec_pipeline.py::TestRH2Ec_DagBuilder::test1 (0.18 s) PASSED [ 16%] -research/RH2E/test/test_RH2Ed_pipeline.py::TestRH2Ed_DagBuilder::test1 (0.19 s) PASSED [ 16%] -research/RH2E/test/test_RH2Ee_pipeline.py::TestRH2Ee_DagBuilder::test1 (0.39 s) PASSED [ 16%] -research/RH2E/test/test_RH2Ef_pipeline.py::TestRH2Ef_DagBuilder::test1 (4.61 s) PASSED [ 16%] -research/RH2E/test/test_RH2Eg_pipeline.py::TestRH2Eg_DagBuilder::test1 (3.68 s) PASSED [ 16%] -research/RH1E/test/test_RH1E_pipeline.py::TestRH1E_DagBuilder::test1 (2.02 s) PASSED [ 16%] -research/RH1E/test/test_RH1Eb_pipeline.py::TestRH1Eb_DagBuilder::test1 (0.17 s) PASSED [ 16%] -research/RH1E/test/test_RH1Eb_pipeline.py::TestRH1Eb_DagBuilder::test2 (1.97 s) PASSED [ 16%] -oms_lime/test/test_eg_broker.py::TestEgBroker1::test_place_order1 (0.94 s) PASSED [ 16%] -oms_lime/test/test_eg_portfolio_example.py::TestEgPortfolioExample1::test_get_eg_portfolio_example1 (1.74 s) PASSED [ 16%] -oms_lime/test/test_eg_portfolio_example.py::TestEgPortfolioExample1::test_get_eg_portfolio_example2 (0.06 s) PASSED [ 16%] -oms_lime/test/test_eg_restrictions.py::TestEgRestrictions1::test_get_trading_restrictions (0.02 s) PASSED [ 16%] -dataflow_lime/system/test/test_E8d_replayed_system_runner.py::TestReplayedE8dWithMockedOms1::test_save_data SKIPPED [ 16%] -dataflow_lime/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_DagBuilder::test1 (0.47 s) PASSED [ 16%] -dataflow_lime/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_DagBuilder::test1 (4.82 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (5.13 s) RERUN [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (6.41 s) RERUN [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (2.90 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_predict (0.79 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution (6.48 s) RERUN [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution (4.11 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder3::test_get_dag1 (0.01 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder3::test_get_dag2 (0.01 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder4::test_fit (2.90 s) PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove1 PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove2 PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove3 PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove4 PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove5 PASSED [ 17%] -amp/oms/test/test_oms_db.py::TestOmsDbRemoveAllTables1::test1 SKIPPED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio1::test_state (0.02 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics1 (0.02 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics2 (0.09 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics3 (0.01 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_initialization_with_cash1 (0.02 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_initialization_with_holdings1 (0.08 s) PASSED [ 18%] -amp/oms/test/test_portfolio.py::TestMockedPortfolio1::test1 SKIPPED [ 18%] -amp/oms/test/test_portfolio.py::TestMockedPortfolio1::test2 SKIPPED [ 18%] -amp/oms/test/test_portfolio.py::TestMockedPortfolio2::test1 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestSimulatedProcessForecasts1::test_initialization1 (0.63 s) PASSED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts1::test_mocked_system1 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system1 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system2 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system3 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system4 SKIPPED [ 18%] -amp/oms/test/test_restrictions.py::TestRestrictions1::test1 SKIPPED [ 18%] -amp/oms/test/test_restrictions.py::TestRestrictions1::test2 SKIPPED [ 18%] -amp/dataflow/system/test/test_real_time_dag_adapter.py::TestRealtimeDagAdapter1::testMvnReturnsBuilder1 (0.05 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes1 (0.01 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes2 (0.01 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes3 (0.01 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes4 (0.02 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes5 (0.13 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes1 (0.02 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes10 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes2 (0.02 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes3 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes4 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes5 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes6 (0.02 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes7 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes8 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes9 (0.01 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks1 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks2 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks3 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag_adapter.py::TestDagAdapter1::test1 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag_adapter.py::TestDagAdapter1::test2 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_from_config1 (0.01 s) PASSED [ 19%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_get_columns_for_tag1 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_get_tags_for_column1 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_pickle1 (0.05 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_to_config1 (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_to_dict_and_back (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_feature_col_names1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags2 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags3 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_targets_and_predictions_for_tags1 (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_prediction_col_names1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_target_col_names1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_to_config1 (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_runners.py::TestRollingFitPredictDagRunner1::test1 (0.43 s) PASSED [ 20%] -amp/dataflow/core/test/test_runners.py::TestIncrementalDagRunner1::test1 (0.47 s) PASSED [ 20%] -amp/dataflow/core/test/test_utils.py::Test_get_df_info_as_string::test1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_utils.py::Test_get_df_info_as_string::test2 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_visualization.py::Test_dataflow_core_visualization1::test_draw1 (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_visualization.py::Test_dataflow_core_visualization1::test_draw_to_file1 (0.01 s) PASSED [ 20%] -amp/core/plotting/test/test_portfolio_stats.py::Test_plot_portfolio_stats1::test1 PASSED [ 20%] -amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test1 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test2 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test3 (0.00 s) PASSED [ 21%] -amp/config_root/config/test/test_config_builders.py::TestGetConfigsFromBuilder1::test1 (0.00 s) PASSED [ 21%] -amp/config_root/config/test/test_config_builders.py::TestGetConfigFromEnv::test_no_env_variables (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test1 (0.01 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test2 (0.01 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test3 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test4 (0.01 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test1 (0.02 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test2 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test3 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test1 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test2 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test3 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test4 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::TestComputeTurn1::test1 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::TestMaximizeWeightEntropy1::test1 (0.13 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::TestFindNearestAffinePoint1::test1 (0.01 s) PASSED [ 22%] -research/returns/test/test_dataflow_lime_returns_pipeline.py::TestReturnsPipeline::test1 (0.11 s) PASSED [ 22%] -im_lime/eg/test/test_eg_transform_pq_by_date_to_by_asset.py::TestEgTransformByDateToByTile1::test_transform1 (4.30 s) PASSED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test1 (3.12 s) PASSED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache1 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache2 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache3 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache4 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache5 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache6 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_historical1 (0.70 s) PASSED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_real_time1 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_replayed_time1 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_save_data SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgMultipleInstrumentDataReader1::test_historical1 (0.75 s) PASSED [ 22%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_incorrect_datetime (0.00 s) PASSED [ 22%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_integer_datetime (0.00 s) PASSED [ 22%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_string_datetime (0.00 s) PASSED [ 22%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_index_already_present (0.00 s) PASSED [ 23%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_milliseconds (0.00 s) PASSED [ 23%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_seconds (0.00 s) PASSED [ 23%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_wrong_column (0.00 s) PASSED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_start_ts_for_symbol1 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_universe1 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data6 SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_command_line SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_function_call1 SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_function_call2 SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_process_chunk SKIPPED [ 23%] -amp/helpers/test/test_lib_tasks_find.py::Test_find_short_import1::test1 (0.00 s) PASSED [ 23%] -amp/helpers/test/test_lib_tasks_find.py::Test_find_func_class_uses1::test1 (0.00 s) PASSED [ 24%] -amp/dataflow/system/test/test_real_time_runner.py::TestRealTimeDagRunner1::test_replayed_time1 SKIPPED [ 24%] -amp/dataflow/system/test/test_real_time_runner.py::TestRealTimeDagRunner1::test_simulated_replayed_time1 (0.46 s) PASSED [ 24%] -amp/dataflow/core/test/test_builders.py::TestArmaReturnsBuilder::test1 (0.16 s) PASSED [ 24%] -amp/dataflow/core/test/test_builders.py::TestArmaReturnsBuilder::test_str1 (0.00 s) PASSED [ 24%] -amp/dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 (0.15 s) PASSED [ 24%] -amp/dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test_str1 (0.00 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_gluonts_models.py::TestDeepARGlobalModel::test_fit1 (1.13 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_gluonts_models.py::TestDeepARGlobalModel::test_fit_dag1 (1.07 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_col_csv1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_col_parquet1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_index_csv1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_index_parquet1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_filter_dates1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_filter_dates_open_boundary1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestArmaDataSource::test1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test1 (0.07 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test2 (0.02 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test3 (0.07 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test4 (0.08 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test5 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2 (0.13 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3 (0.16 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test01 (0.10 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test02 (0.10 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test03 (0.13 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test04 (0.13 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test05 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test06 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test07 (0.19 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test08 SKIPPED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test09 (0.37 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test10 (0.10 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test11 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test12 (0.13 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test13 (0.16 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1 (0.20 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2 (0.25 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3 (0.34 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1 (0.01 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2 (0.01 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1 (0.01 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test1 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test2 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test3 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test4 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test5 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test1 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test2 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test3 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test4 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test5 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test1 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test10 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test11 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test12 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test2 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test3 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test4 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test5 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test6 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test7 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test8 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test9 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_t_test.py::TestTTest1samp::test1 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_t_test.py::TestTTest1samp::test2 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_t_test.py::TestTTest1samp::test3 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_t_test.py::TestTTest1samp::test4 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator1::test_high_sample_count (0.02 s) PASSED [ 27%] -amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator1::test_moderate_sample_count (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator2::test_high_sample_count (0.41 s) PASSED [ 28%] -amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator2::test_moderate_sample_count (0.20 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test1 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test2 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test3 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test4 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test5 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test6 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test7 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test1 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test2 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test3 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test4 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test5 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test6 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test1 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test2 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test3 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test4 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test5 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test6 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeNumFiniteSamples::test1 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeNumUniqueValues::test1 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeDenominatorAndPackage::test1 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test1 (0.01 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test2 (0.01 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test3 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test1 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test2 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test3 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test4 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test5 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test6 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_2dof (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_4dof (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_almost_normal (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test1 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test10 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test11 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test12 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test2 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test3 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test5 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test6 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test7 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test8 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test9 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test1 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test2 (0.01 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test3 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test4 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test5 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test6 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test7 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test3 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeNormalizedDrawdownCdf::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeNormalizedDrawdownCdf::test2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test3 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test3 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::Test_compute_drawdown::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::Test_compute_time_under_water::test1 (0.01 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::Test_compute_time_under_water::test2 (0.01 s) PASSED [ 31%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_scale_invariance1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test4 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test5 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test3 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test4 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test5 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test6 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_small_df (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_small_series (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_user_supplied_pi0 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test0 SKIPPED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test1 (0.38 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test_generate_input_data SKIPPED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test0 SKIPPED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test1 (0.02 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test2 (0.05 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test3 (0.04 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test_generate_input_data SKIPPED [ 33%] -amp/core/statistics/test/test_requires_statsmodels.py::TestComputeKratio::test1 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_requires_statsmodels.py::TestComputeKratio::test2 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test1 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 (0.00 s) XFAIL [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test3 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test1 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test2 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test3 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test4 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test5 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test6 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 (0.01 s) XFAIL [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test1 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test2 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test3 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test4 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test5 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test6 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test7 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test8 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test1 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test2 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test3 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test4 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test5 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test6 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test7 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test8 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test1 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test2 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test3 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test4 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test5 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test6 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test7 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test8 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test9 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test1 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test2 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test3 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test4 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test5 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test6 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test7 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test8 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test_nan (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test_smoke (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test1 (0.03 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test2 (0.03 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test3 (0.03 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test1 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test2 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test3 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test1 (0.01 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test2 (0.01 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test3 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeSharpeRatio::test1 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeSharpeRatioStandardError::test1 (0.00 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test1 (0.02 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test2 (0.09 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test3 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatioStandardError::test1 (0.02 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatioStandardError::test2 (0.09 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_summarize_sharpe_ratio::test1 (0.00 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test1 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test2 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test3 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test4 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_nans1 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_oos_not_from_interval1 (0.00 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_zeros1 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestSharpeRatioCorrelationConversion::test1 (0.00 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestSharpeRatioCorrelationConversion::test2 (0.00 s) PASSED [ 37%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_smooth_derivative1::test1 (0.03 s) PASSED [ 37%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_smooth_moving_average1::test1 (0.01 s) PASSED [ 37%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test1 (0.00 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test2 (0.00 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test3 (0.00 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test4 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test5 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test6 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test7 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_moment1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_norm1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_var1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_std1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_demean1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_skew1::test1 (0.02 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_kurtosis1::test1 (0.02 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_sharpe_ratio1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_corr1::test1 (0.02 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zcorr1::test1 (0.03 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_atol1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_clean1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_inf1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_inf2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_nan1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_nan2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_zero1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_zero2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_default_values1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_default_values2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_atol1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_clean1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_inf1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_inf2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_nan1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_nan2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_zero1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_zero2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_atol1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_clean1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_inf1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_inf2 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_nan1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_nan2 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_zero1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_zero2 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_annualized_sharpe_ratio::test1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test1 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test2 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test3 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test4 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test5 (0.16 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test6 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test2 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test3 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test4 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test5 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_nan1 (0.14 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_nan2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_zero1 (0.10 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_zero2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_winsorize1 (0.11 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_winsorize2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test1 (0.00 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test2 (0.00 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test3 (0.00 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_clean1 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_depth (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode1 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode3 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode1 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode3 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test1 (0.03 s) PASSED [ 42%] -amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test2 (0.03 s) PASSED [ 42%] -amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test3 (0.03 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test1 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test2 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test_lag_1 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test_correlate_with_lagged_cumsum::test1 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test_correlate_with_lagged_cumsum::test2 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test_calculate_inverse::test1 (0.00 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test_calculate_presudoinverse::test1 (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_set_non_ath_to_nan1::test1 (0.01 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_remove_times_outside_window::test_bypass (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_remove_times_outside_window::test_remove (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_set_weekends_to_nan::test1 (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_set_weekends_to_nan::test2 (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_remove_weekends::test_bypass (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_remove_weekends::test_remove (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_ask_value (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_bid_value (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_centered_order_book_imbalance (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_geometric_mid (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_log_relative_spread (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_mid (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_mid_value (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_order_book_imbalance (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_quoted_spread (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_relative_spread (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_weighted_mid (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_time_bars1::test1 (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_time_bars1::test2 (0.02 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test1 (0.02 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test2 (0.02 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test3 (0.02 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_nans1 (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_nans2 (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_no_nans1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_no_nans2 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_offset (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_endpoints_daily (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_endpoints_intraday (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_invariance (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_business_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_month1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_week1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_year1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_no_freq_day_to_business_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_business_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_minute1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_upsample_business_day_to_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_upsample_month_to_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_business_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_month1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_week1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_year1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_no_freq_day_to_business_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_business_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_minute1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_upsample_business_day_to_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_upsample_month_to_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::TestComputeOvernightReturns::test1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test1 (0.00 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test2 (0.00 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test3 (0.00 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test4 (0.00 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test5 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test6 (0.00 s) PASSED [ 45%] -amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_config_with_function (0.00 s) PASSED [ 45%] -amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_config_with_object (0.00 s) PASSED [ 45%] -amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_roundtrip_transform1 (0.00 s) PASSED [ 45%] -amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_set1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_existing_key2 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key2 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key3 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key4 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigIn1::test_in1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigIn1::test_not_in1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key2 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key3 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key4 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key2 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key3 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key2 SKIPPED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key3 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key4 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_not_existing_key1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config_print1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config_to_python1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_roundtrip_transform1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_in1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_in2 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in2 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in3 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in4 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update2 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update3 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigFlatten1::test_flatten1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigFlatten1::test_flatten2 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestSubtractConfig1::test_test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config.py::TestSubtractConfig1::test_test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config.py::TestDassertIsSerializable1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config.py::TestDassertIsSerializable1::test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config.py::TestFromEnvVar1::test1 (0.44 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_validate_configs1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_validate_configs1::test_check_same_configs_error (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_get_config_from_flattened_dict1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_get_config_from_flattened_dict1::test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_get_config_from_nested_dict1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_get_config_from_nested_dict1::test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_intersect_configs1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_intersect_configs1::test_same_config (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_subtract_configs1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_subtract_configs1::test_same_config (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test_same_config (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_convert_to_dataframe1::test1 (0.01 s) PASSED [ 49%] -amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test1 (0.00 s) PASSED [ 49%] -amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test2 (0.00 s) PASSED [ 49%] -amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test3 (0.00 s) PASSED [ 49%] -dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test1 (0.47 s) PASSED [ 49%] -dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test2 (0.26 s) PASSED [ 49%] -dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test3 (0.27 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_log_portfolio_read_portfolio (0.10 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_multiday_overnight_returns_injected (0.02 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_1_asset_floating_gmv (0.02 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_1_asset_targeted_gmv (0.02 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_3_assets_floating_gmv (0.03 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_3_assets_targeted_gmv (0.03 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_4_assets_dollar_neutrality_demean (0.04 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_4_assets_dollar_neutrality_side_preserving (0.04 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_multiday_1_asset_targeted_gmv (0.02 s) PASSED [ 49%] -im_lime/eg/test/test_eg_historical_pq_by_date_taq_bar_client.py::TestEgHistoricalPqByDateTaqBarClient1::test_read_data1 (1.75 s) PASSED [ 49%] -im_lime/eg/test/test_eg_historical_pq_by_date_taq_bar_client.py::TestEgHistoricalPqByDateTaqBarClient1::test_read_data2 (1.82 s) PASSED [ 49%] -market_data_lime/test/test_eg_historical_market_data.py::TestEgHistoricalMarketData1::test_get_data_at_timestamp1 (2.07 s) PASSED [ 50%] -market_data_lime/test/test_eg_historical_market_data.py::TestEgHistoricalMarketData1::test_should_be_online1 (0.00 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_at_timestamp1 (0.15 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval1 SKIPPED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval2 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval3 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval4 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval5 (0.15 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period1 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period2 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period3 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period4 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period5 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period6 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period7 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_last_end_time1 (0.06 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_last_price1 (0.23 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_twap_price1 (0.15 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_is_online1 (0.06 s) PASSED [ 51%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_should_be_online1 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test1 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test2 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test3 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test4 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test5 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test6 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test7 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestParseFullSymbol::test1 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestParseFullSymbol::test2 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestConstructFullSymbol::test1 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestConstructFullSymbol::test2 (0.00 s) PASSED [ 51%] -amp/datapull/common/universe/test/test_universe_utils.py::TestStringToNumericalId::test1 (0.00 s) PASSED [ 51%] -amp/datapull/common/universe/test/test_universe_utils.py::TestBuildNumericalToStringIdMapping::test1 (0.00 s) PASSED [ 51%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates1 (0.00 s) PASSED [ 51%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates2 (0.00 s) PASSED [ 51%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates3 (0.00 s) PASSED [ 51%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates4 (0.00 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_get_available_dates1 (0.00 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test1 (1.39 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test2 (2.44 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test3 (2.81 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval1::test_tsla1 (1.61 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval1::test_tsla2 (1.05 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_convert_string_to_timestamp1 (0.02 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_generate_raw_eg_data SKIPPED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_process_bar_data1 (0.02 s) PASSED [ 52%] -vendors_lime/datastream_liquidity/test/test_datastream_liquidity_utils.py::TestDatastreamLiquidityUtils1::test_get_liquidity_data1 (0.82 s) PASSED [ 52%] -vendors_lime/datastream_liquidity/test/test_datastream_liquidity_utils.py::TestDatastreamLiquidityUtils1::test_get_liquidity_data2 (0.74 s) PASSED [ 52%] -research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline1::test_replayed_time1 SKIPPED [ 52%] -research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline1::test_save_data SKIPPED [ 52%] -research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_tiny1 (0.00 s) PASSED [ 52%] -research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v1 (0.00 s) PASSED [ 52%] -research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v2_all (0.20 s) PASSED [ 52%] -research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v2_top100 (0.08 s) PASSED [ 52%] -oms_lime/test/test_eg_portfolio.py::TestEgPortfolio1::test_send_orders1 SKIPPED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData1::test_should_be_online1 (0.02 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_data1 (0.04 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_data3 (0.05 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_last_end_time1 (0.01 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_is_online1 (0.02 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_sql_get_query1 (0.01 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData1::test_save_market_data1 SKIPPED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data1 (0.19 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_at_timestamp1 (0.19 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_at_timestamp2 (0.17 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_for_interval1 (0.18 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_print_info_for_serialized_data1 SKIPPED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_round_trip1 (0.16 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData3::test_get_data1 (0.50 s) PASSED [ 53%] -market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period1 (0.00 s) SKIPPED [ 53%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data1 (0.08 s) PASSED [ 53%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data2 (0.08 s) PASSED [ 53%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data3 (0.12 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data4 (0.12 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data5 (0.12 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_0 (0.02 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_1 (0.07 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_3 (0.08 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_6 (0.08 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_63 (0.11 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_at_timestamp1 (0.04 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_at_timestamp2 (0.02 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_for_interval1 (0.03 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_for_interval2 (0.04 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_get_last_end_time1 (0.03 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available1 (0.07 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available2 (0.05 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available3 (0.84 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData4::test_is_last_bar_available1 (0.08 s) PASSED [ 54%] -amp/dataflow/model/test/test_stats_computer.py::TestStatsComputer1::test_compute_portfolio_stats1 (0.04 s) PASSED [ 54%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_bash SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_cmd1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_images_ls_repo1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_jupyter1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_login1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_ps SKIPPED (T...) [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_stats SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_list SKIPPED (Test n...) [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_print_setup1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_collect_only2 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_docker_build_local_image SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_docker_build_prod_image SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_docker_jupyter1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_docker_pull1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_lint1 SKIPPED (Test ...) [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_run_blank_tests1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_run_fast_tests SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_run_fast_tests_failed SKIPPED [ 55%] -amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order1 SKIPPED [ 56%] -amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order2 SKIPPED [ 56%] -amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order_and_timeout1 SKIPPED [ 56%] -amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order_and_timeout2 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerCmd::test1 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerCmd::test2 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerDown::test1 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerDown::test2 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerUp::test1 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerUp::test2 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test1 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test2 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test3 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test4 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test1 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test2 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test3 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestImDockerCmd::test1 SKIPPED [ 56%] -amp/datapull/ccxt/universe/test/test_universe.py::TestGetUniverse::test_get_universe1 (0.00 s) PASSED [ 57%] -amp/datapull/ccxt/universe/test/test_universe.py::TestGetUniverse::test_get_universe2 (0.00 s) PASSED [ 57%] -amp/datapull/ccxt/universe/test/test_universe.py::TestGetVendorUniverse::test1 (0.00 s) PASSED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_exchange_id1 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_exchange_id2 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_symbol_id1 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_symbol_id2 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_trade_symbol_id1 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_trade_symbol_id2 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data1 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data2 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data3 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data4 SKIPPED [ 57%] -amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test1 (0.00 s) PASSED [ 57%] -amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test2 (0.00 s) PASSED [ 57%] -amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test3 (0.00 s) PASSED [ 57%] -amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test4 (0.00 s) PASSED [ 57%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test1 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test10 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test11 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test12 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test13 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test14 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test15 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test16 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test17 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test18 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test2 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test3 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test4 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test5 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test6 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test7 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test8 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test9 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_config1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_dataseries1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_df1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_dict1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_float1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_float2 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_float3 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_int1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_int2 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_int3 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_list1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_str1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_str2 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_str3 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestPlaybackFilePath1::test1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test1 (0.50 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test2 (0.50 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test3 (0.49 s) PASSED [ 59%] -amp/helpers/test/test_printing.py::Test_printing1::test_color_highlight1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test2 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test3 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test4 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test5 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test6 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_log::test2 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_log::test3 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_log::test4 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_sort_dictionary::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_indent1::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_dedent1::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_dedent1::test2 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_dedent1::test_roundtrip1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_align_on_left1::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_logging1::test_log_frame1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_logging1::test_log_frame2 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_logging1::test_log_frame3 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test1 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test2 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test3 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test4 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test5 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test6 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test7 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system2::test_get_os_name (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system2::test_get_server_name (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system2::test_get_user_name (0.10 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test1 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test2 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test3 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test1 (0.17 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test2 (0.18 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test3 (0.18 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test4 (0.18 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test5 (0.18 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_Linux_commands1::test_du1 (0.19 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_not_timestamp1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp2 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp3 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp4 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp5 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test2 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test_no_timestamp1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test_no_timestamp2 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestCacheFunctions::test_get_cache_name1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_changed_function (0.12 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_redefined_function (0.12 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching1 (0.30 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching2 (0.30 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching3 (0.30 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching4 (0.32 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching5 (0.29 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_disk_reset (0.39 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_mem_reset (0.40 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_mem_reset2 (0.43 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_without_caching1 (0.00 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestFunctionSpecificCache1::test_with_caching1 (0.70 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestFunctionSpecificCache1::test_with_caching2 (0.64 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCachePerformance::test_performance_dataframe (0.17 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCachePerformance::test_performance_series (0.16 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheDecorator::test_decorated_function (0.11 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheDecorator::test_decorated_function_no_mem (0.11 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestAmpTask1407::test1 (0.10 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestAmpTask1407::test2 (0.11 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCachingOnS3::test_with_caching1 SKIPPED [ 63%] -amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_disk_cache1 (0.33 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_mem_cache1 (0.32 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_mem_disk_cache1 (0.32 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheUpdateFunction1::test1 (0.01 s) PASSED [ 64%] -amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_disk_cache1 (0.32 s) PASSED [ 64%] -amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_mem_cache1 (0.32 s) PASSED [ 64%] -amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_mem_disk_cache1 (0.33 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test1 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test2 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test3 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test4 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test5 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test6 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test7 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test1 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test2 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test3 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test4 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test5 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_eq_all1 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_eq_all2 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_in1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_in2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance3 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance4 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance5 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted3 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted4 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_subset1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_subset2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_no_duplicates1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_no_duplicates2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_not_intersection1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_not_intersection2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_set_eq1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_set_eq2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test3 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test3 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert3 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert4 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert3 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_fail1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man2 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man_fail1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man_fail2 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_callable1::test1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_callable1::test_fail1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_branch_name1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_client_root1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_client_root2 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_path_from_supermodule1 (0.11 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_project_dirname1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_submodule_paths1 (0.05 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_is_amp (0.11 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_is_inside_submodule1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_get_head_hash1 (0.05 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_get_remote_head_hash1 (0.05 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes2 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes3 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_all_repo_names1 (0.10 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_all_repo_names2 (0.11 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_client1 (0.05 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_client2 (0.05 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_dirname1 (0.05 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_dirname2 (0.05 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name1 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name2 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name4 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name_rountrip1 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_task_prefix_from_repo_short_name1 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name1 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name2 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name3 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name4 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root1 SKIPPED [ 68%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root2 SKIPPED [ 69%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root3 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root4 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root5 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files1 (0.11 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files_in_branch1 (0.05 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_previous_committed_files1 (0.07 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_summary_files_in_branch1 (0.47 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_git_log1 (0.07 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test1 (0.13 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test2 (0.13 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test3 (0.17 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test4 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test5 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_docker_base_image_name1 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_host_name1 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_repo_map1 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_has_didn_support1 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_hasyncio.py::Test_hasyncio1::test_real_time1 (1.00 s) PASSED [ 70%] -amp/helpers/test/test_hasyncio.py::Test_hasyncio1::test_simulated_time1 (0.00 s) PASSED [ 70%] -amp/helpers/test/test_hlogging.py::Test_logging1::test_logging_levels1 (0.00 s) PASSED [ 70%] -amp/helpers/test/test_hlogging.py::Test_hlogging_asyncio1::test_real_time1 (1.00 s) PASSED [ 70%] -amp/helpers/test/test_hlogging.py::Test_hlogging_asyncio1::test_simulated_time1 (0.00 s) PASSED [ 70%] -amp/helpers/test/test_io_.py::Test_find_all_files1::test1 (0.20 s) PASSED [ 70%] -amp/helpers/test/test_io_.py::Test_change_filename_extension1::test1 (0.00 s) PASSED [ 70%] -amp/helpers/test/test_io_.py::Test_load_df_from_json::test1 (0.01 s) PASSED [ 70%] -amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_get_file_name1 (0.00 s) PASSED [ 70%] -amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_get_file_name2 (0.00 s) PASSED [ 70%] -amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_run1 (0.36 s) PASSED [ 70%] -amp/dev_scripts/test/test_amp_dev_scripts.py::Test_env1::test_get_system_signature1 (0.16 s) PASSED [ 70%] -amp/dev_scripts/infra/test/test_all.py::Test_ssh_tunnel::test1 SKIPPED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_caesar1 (0.00 s) PASSED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_author1 SKIPPED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_file_size1 (0.28 s) PASSED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_master1 (0.05 s) PASSED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_words_in_text1 (0.00 s) PASSED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex1 (0.00 s) PASSED [ 71%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex2 (0.00 s) PASSED [ 71%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex3 (0.00 s) PASSED [ 71%] -amp/dataflow/model/test/test_forecast_mixer.py::TestForecastMixer1::test_generate_portfolio_bar_metrics_df (0.05 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_model_selection1 (1.77 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_model_return_correlation1 (0.28 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_multiple_tests_adjustment1 (0.18 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_positions1 (0.43 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_rets_and_vol1 (0.71 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_rets_signal_analysis1 (0.58 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_return_correlation1 (0.32 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_returns_and_predictions1 (1.13 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_sharpe_ratio_panel1 (0.40 s) PASSED [ 71%] -amp/dataflow/model/test/test_regression_analyzer.py::TestRegressionAnalyzer1::test_compute_moments (0.06 s) PASSED [ 71%] -amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_replayed_time1 (0.00 s) PASSED [ 71%] -amp/core/test/test_real_time.py::TestReplayedTime1::test1 (0.00 s) PASSED [ 71%] -amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_real_time1 (3.03 s) PASSED [ 71%] -amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_replayed_time1 (4.01 s) PASSED [ 72%] -amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_simulated_replayed_time1 (0.00 s) PASSED [ 72%] -amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_simulated_time1 (0.00 s) PASSED [ 72%] -amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test1 (0.40 s) PASSED [ 72%] -amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test2 (0.39 s) PASSED [ 72%] -amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test3 (0.39 s) PASSED [ 72%] -amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test4 (0.39 s) PASSED [ 72%] -amp/optimizer/test/test_utils.py::Test_compute_tangency_portfolio::test_precision_equivalency (0.00 s) PASSED [ 72%] -amp/optimizer/test/test_utils.py::Test_compute_tangency_portfolio::test_toy_case (0.00 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_get_data1 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_get_twap_price1 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread1 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread2 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread3 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread4 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint1 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint2 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint3 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread1 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread2 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread3 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread4 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread5 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread6 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price1 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price2 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price3 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test1 (0.06 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random1 (0.06 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random2 (0.08 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random3 (0.10 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test1 (0.03 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test2 (0.03 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test3 (0.07 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test_perf1 SKIPPED [ 73%] -amp/oms/test/test_api.py::Test_Contract1::test1 (0.00 s) PASSED [ 73%] -amp/oms/test/test_api.py::Test_Contract1::test_cmp1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Contract1::test_cmp2 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Order1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_OrderStatus1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Trade1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_cmp1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_cmp2 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_diff1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_diff2 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_diff3 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_OMS1::test1 SKIPPED (unconditional skip) [ 74%] -amp/oms/test/test_api.py::Test_OMS1::test2 SKIPPED (unconditional skip) [ 74%] -amp/oms/test/test_broker.py::TestSimulatedBroker1::test_submit_and_fill1 (0.05 s) PASSED [ 74%] -amp/oms/test/test_broker.py::TestMockedBroker1::test1 SKIPPED (Need ...) [ 74%] -amp/oms/test/test_order.py::TestOrder1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_order.py::TestOrders1::test1 (0.00 s) PASSED [ 74%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_exchange_exist1 SKIPPED [ 74%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_symbol_exist1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_trade_symbol_exist1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_get_remaining_data_to_load SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_daily_data1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_daily_data_with_holes SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_minute_data1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_minute_data_with_holes SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_daily_data1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_minute_data1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_tick_data1 SKIPPED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract1 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract2 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract3 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol1 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol2 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol3 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol4 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract1 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract2 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract3 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract4 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract5 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract6 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract7 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract1 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract2 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract3 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract1 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract2 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract3 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract4 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contract_slow1 (0.66 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts1 (0.06 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts2 (0.06 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts3 (0.06 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures1 (0.05 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures3 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures4 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures5 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures6 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures_slow1 (0.39 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures_slow2 (0.39 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata1 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata2 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata3 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata4 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata5 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow1 (0.38 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow2 (0.40 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow3 (0.39 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_zero_element1 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_zero_element2 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_kibot_hardcoded_contract_lifetime_computer1 (0.00 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_kibot_hardcoded_contract_lifetime_computer2 (0.00 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_load.py::TestTickerListLoader::test_parsing_logic (0.00 s) PASSED [ 78%] -amp/im/kibot/metadata/test/test_load.py::TestTickerListLoader::test_real_call SKIPPED [ 78%] -amp/im/kibot/metadata/test/test_load.py::TestAdjustmentsLoader::test_real_call SKIPPED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_etfs (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_forex (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_futures (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_stocks (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_sp500 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_daily (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_minutely (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_tick (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractContractType::test_continuous (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractContractType::test_expiry (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test1 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test10 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test11 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test12 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test13 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test14 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test2 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test3 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test4 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test5 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test6 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test7 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test8 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test9 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_s3_data_loader.py::TestKibotS3DataLoader::test1 (0.17 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_s3_data_loader.py::TestKibotS3DataLoader::test_read_data_with_start_end_ts SKIPPED [ 79%] -amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_daily_data_from_s3_1 SKIPPED [ 79%] -amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_daily_data_from_s3_2 SKIPPED [ 79%] -amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_minutely_data_from_s3_1 SKIPPED [ 79%] -amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_minutely_data_from_s3_2 SKIPPED [ 79%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol1 (0.00 s) PASSED [ 79%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol2 (0.00 s) PASSED [ 79%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol3 (0.00 s) PASSED [ 79%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol4 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name1 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name2 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name3 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name4 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name5 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_parse_symbols_file1 (0.00 s) PASSED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_exchange_exist1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_symbol_exist1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_trade_symbol_exist1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_get_remaining_data_to_load SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_daily_data1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_daily_data_with_holes SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_minute_data1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_minute_data_with_holes SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_daily_data1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_minute_data1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_tick_data1 SKIPPED [ 80%] -amp/im/kibot/data/extract/test/test_kibot_data_download.py::TestKibotDownload::test_extract_dataset_links (0.03 s) PASSED [ 81%] -amp/im/kibot/data/extract/test/test_kibot_data_download.py::TestKibotDownload::test_extract_payload_links (1.53 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path1 (0.00 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path2 (0.00 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path3 (0.00 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_get_latest_symbols_file1 (0.03 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_dtypes1 (0.04 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data1 (0.10 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data2 (0.04 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data3 (0.10 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data_check_date_type (0.05 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data_with_start_end_ts (1.51 s) PASSED [ 81%] -amp/im/eoddata/test/test_read_symbol_list.py::Test_read_symbols_from_file::test1 (0.00 s) PASSED [ 81%] -amp/im/ib/connect/test/test_im_tasks.py::TestImTwsStartIbInterface::test1 SKIPPED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_filter_table1 (0.00 s) PASSED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_filter_table2 (0.00 s) PASSED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_filter_table3 (0.00 s) PASSED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_from_text1 (0.00 s) PASSED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_from_text_invalid1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_from_text_invalid2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_repr1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_str1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_unique1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_unique2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_timer.py::TestTimedScope::test_1 (1.00 s) PASSED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test__check_version1 SKIPPED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test__check_version2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test_check_version1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test_get_changelog_version1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test_get_container_version1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_find_duplicates1::test1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_find_duplicates1::test2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test3 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_extract1::test1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_extract1::test2 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test3 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test4 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test5 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test6 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test7 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test2 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test3 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test4 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test5 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_find_duplicates1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_find_duplicates2 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates2 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates3 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_open.py::Test_open_unknown::test_unknown_extension1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_open.py::Test_open_unknown::test_unknown_os1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_open.py::Test_open_html::test_linux1 SKIPPED (...) [ 84%] -amp/helpers/test/test_open.py::Test_open_html::test_mac1 SKIPPED (Se...) [ 84%] -amp/helpers/test/test_open.py::Test_open_html::test_windows1 SKIPPED [ 84%] -amp/helpers/test/test_open.py::Test_open_pdf::test_mac1 (0.06 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_get_credentials1::test1 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_functions1::test_extract_bucket_from_path1 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_exists1 (0.01 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_exists2 (0.05 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_exists3 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_glob1 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_ls1 (0.01 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_dry_run1 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_asyncio_threading1 (0.06 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_asyncio_threading2 (0.03 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_loky1 (0.06 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_loky2 (1.97 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_serial1 (0.06 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_asyncio_threading1 (0.02 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_asyncio_threading2 (0.02 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_loky1 (1.34 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_loky2 (1.19 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_serial1 (0.02 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_serial2 (0.02 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading1 (0.08 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading2 (0.04 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading3 (0.08 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading4 (0.04 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky1 (0.08 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky2 (1.38 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky3 PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_serial1 (0.07 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_serial2 (0.08 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_joblib_example1::test1 SKIPPED [ 85%] -amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_datetime1 (0.00 s) PASSED [ 85%] -amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_datetime_fail1 (0.00 s) PASSED [ 85%] -amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_strict_datetime1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_strict_datetime_fail1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_dassert_is_datetime1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_dassert_is_datetime_assert1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_datetime_conversions (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime2 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime3 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp2 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp_assert1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp_assert2 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_ET (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_UTC (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_naive_ET (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_naive_UTC (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_annual1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_bimonthly1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_daily1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_index1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly2 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly3 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly4 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly5 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly2 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly3 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_semiannual1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_semiannual2 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_srs1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_weekly1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test2 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test3 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test2 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test3 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test2 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test3 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test4 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test5 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test6 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test7 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test8 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test9 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_env.py::Test_env1::test_get_system_signature1 (0.17 s) PASSED [ 88%] -amp/helpers/test/test_hnumpy.py::TestRandomSeedContext::test_example1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_hnumpy.py::TestRandomSeedContext::test_example2 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_csv_helpers.py::Test_convert_csv_to_dict::test1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_csv_helpers.py::Test_from_typed_csv::test1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_csv_helpers.py::Test_to_typed_csv::test1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dataframe.py::Test_filter_data_by_values1::test_conjunction1 (0.01 s) PASSED [ 88%] -amp/helpers/test/test_dataframe.py::Test_filter_data_by_values1::test_disjunction1 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_filter_data_by_comparison::test_conjunction1 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_filter_data_by_comparison::test_disjunction1 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::TestFilterDataByMethod::test1 (0.02 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test1 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test2 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test3 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test4 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test5 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test6 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test1 (0.01 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test2 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test3 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test4 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test5 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test6 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test7 (0.00 s) PASSED [ 89%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md1::test_uml_file_names1 (0.00 s) PASSED [ 89%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md2::test_render_command1 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md2::test_render_command2 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml1 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml2 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml3 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml4 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml_playback1 (0.01 s) PASSED [ 90%] -amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test1 (0.00 s) PASSED [ 90%] -amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test2 (0.00 s) PASSED [ 90%] -amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test3 (0.00 s) PASSED [ 90%] -amp/dataflow/pipelines/features/test/test_feature_pipeline.py::TestFeaturePipeline::test1 (0.23 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_fit_with_oos (0.02 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_fit_without_oos (0.03 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_predict_with_oos (0.04 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_predict_without_oos (0.00 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1 (0.01 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1 (0.01 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2 (0.01 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json3 (0.00 s) PASSED [ 91%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_merge (0.01 s) PASSED [ 91%] -amp/dataflow/model/test/test_model_evaluator.py::TestModelEvaluator1::test_aggregate_models1 (0.26 s) PASSED [ 91%] -amp/dataflow/model/test/test_model_evaluator.py::TestModelEvaluator1::test_calculate_stats1 (1.41 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer1::test_column_arithmetic (0.03 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer2::test_resampling (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer3::test_multicolumn_processing1 (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer3::test_multicolumn_processing2 (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer4::test_drop_nans (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer4::test_drop_nans_without_reindexing (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer1::test1 (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans (0.01 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_then_join (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_without_reindexing (0.01 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_without_reindexing_then_attempt_join (0.01 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer1::test1 (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer2::test1 (0.04 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer2::test2 (0.51 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans (0.01 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_then_join (0.02 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_without_reindexing (0.01 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_without_reindexing_then_attempt_join (0.01 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestFunctionWrapper::test1 (0.01 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestTwapVwapComputer::test1 (0.02 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestTwapVwapComputer::test2 (0.03 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestMultiindexTwapVwapComputer::test1 (0.08 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestMultiindexTwapVwapComputer::test2 (0.13 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test1 (0.02 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test2 (0.03 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test3 (0.05 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test1 (0.04 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test2 (0.06 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test3 (0.08 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test1 (0.04 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test2 (0.04 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test3 (0.06 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_local_level_model.py::TestLocalLevelModel::test1 (0.01 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test0 SKIPPED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test1 (0.06 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test2 (0.05 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test3 (0.09 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test4 (0.05 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test5 (0.09 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test6 (0.05 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test7 (0.09 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test_generate_input_data SKIPPED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1 (1.04 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2 (1.07 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_step_one1 SKIPPED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_with_constant1 (1.25 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict2 (1.07 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_different_intervals1 (1.46 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_different_intervals_no_x1 (1.28 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_with_nan (1.08 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test1 (0.02 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test_invert_zret_0_zscoring1 (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test_invert_zret_3_zscoring1 (0.02 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_pass_through (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_pass_through_no_writing (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_write (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_pass_through (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_pass_through_no_writing (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_write (0.01 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_linearize_eigval_eigvec (0.01 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_sort_eigval1 (0.00 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_sort_eigval2 (0.00 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_stabilize_eigenvec1 (0.01 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_stabilize_eigenvec2 (0.02 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer2::test1 (0.23 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer2::test2 (0.35 s) PASSED [ 94%] -amp/core/test/test_timeseries_study.py::TestTimeSeriesDailyStudy::test_usual_case (0.29 s) PASSED [ 94%] -amp/core/test/test_timeseries_study.py::TestTimeSeriesMinutelyStudy::test_usual_case (0.58 s) PASSED [ 94%] -amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test1 (0.13 s) PASSED [ 95%] -amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test2 (0.13 s) PASSED [ 95%] -amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test3 (0.13 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test_shape1 (0.00 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test_truncate1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_local_ts (0.28 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_none_x_vars (0.00 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_series_target (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_correctness SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_correctness_local_ts SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform_artificial_ts SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform_none_x_vars SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluonForecasts::test_transform1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToSklean::test_transform1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToSklean::test_transform_none_x_vars1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromSklean::test_transform1 (0.01 s) PASSED [ 96%] -amp/core/test/test_explore.py::Test_explore1::test_ols_regress_series (0.20 s) PASSED [ 96%] -amp/core/test/test_explore.py::Test_explore1::test_rolling_pca_over_time1 SKIPPED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column1 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column2 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column3 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column4 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index1 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index2 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index3 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index4 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_no_intersection (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pairs::test1 (0.01 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_difference1 (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_difference2 (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_mean (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_difference1 (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_difference2 (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_difference_of_logs (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_mean (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_mean_of_logs (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_normalized_difference1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_normalized_difference2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_identity_1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_identity_2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_3 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_3 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_3 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compare_subspaces::test1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_effective_rank::test1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_effective_rank::test2 (0.00 s) PASSED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test1 (0.02 s) PASSED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test2 SKIPPED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test3 SKIPPED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test4 SKIPPED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test1 (0.01 s) PASSED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test2 SKIPPED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test3 (0.01 s) PASSED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test4 (0.01 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestResampleIndex1::test1 (0.01 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test1 (0.01 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test2 (0.09 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test3 (0.08 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test4 (0.13 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test5 (0.02 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestReadDataFromS3::test_read_csv1 (0.10 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestReadDataFromS3::test_read_parquet1 (1.08 s) PASSED [ 98%] -amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test1 (0.00 s) PASSED [ 98%] -amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test2 (0.00 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test3 (0.00 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::TestMultivariateNormalProcess::test1 (0.01 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::TestMultivariateNormalProcess::test2 (0.00 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::Test_generate_arima_signal_and_response::test1 (0.00 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::TestGenerateRecipeDataset::test1 (0.01 s) PASSED [ 99%] -amp/core/test/test_backtest.py::TestGeneratePredictions::test1 SKIPPED [ 99%] -amp/core/test/test_backtest.py::TestGeneratePredictions::test2 SKIPPED [ 99%] -amp/core/test/test_backtest.py::TestGeneratePredictions::test3 SKIPPED [ 99%] -amp/core/information_bars/test/test_bars.py::TestBars::test_get_dollar_bars (0.07 s) PASSED [ 99%] -amp/core/information_bars/test/test_bars.py::TestBars::test_get_tick_bars (0.02 s) PASSED [ 99%] -amp/core/information_bars/test/test_bars.py::TestBars::test_get_volume_bars (0.07 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_daily1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_daily_shift_freq1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_minutely1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_multiple_responses_daily1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_daily1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] - -=================================== FAILURES =================================== -__________________ Test_get_configs_from_command_line1.test1 ___________________ -Traceback (most recent call last): - File "/app/dataflow_lime/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 - configs = dtfmoexuti.get_configs_from_command_line(args) - File "/app/amp/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - configs = cconfig.get_configs_from_builder(config_builder) - File "/app/amp/config_root/config/builder.py", line 46, in get_configs_from_builder - imp = importlib.import_module(import_) - File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - return _bootstrap._gcd_import(name[level:], package, level) - File "", line 1014, in _gcd_import - File "", line 991, in _find_and_load - File "", line 973, in _find_and_load_unlocked -ModuleNotFoundError: No module named 'dataflow_lime.pipelines.E8.8Ed_configs' -============================= slowest 3 durations ============================== -6.49s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution -6.41s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit -6.31s call research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit -=========================== short test summary info ============================ -SKIPPED [5] amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py: Need dind support -SKIPPED [1] amp/helpers/test/test_hparquet.py:741: CmTask1305: after removing circular dependencies in `hio.from_file`, this test fails reading a parquet file -SKIPPED [5] amp/optimizer/test/test_single_period_optimization.py: Requires special docker container. -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:200: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:192: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:184: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:263: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:271: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:287: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:298: Only run in amp as supermodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:307: Only run in amp as supermodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:316: Only run in amp as supermodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:332: Only run in amp -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:390: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:399: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:408: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:481: CmampTask #683. -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:536: Only run in amp as submodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:571: Only run in amp as submodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:600: Only run in amp as submodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:635: Only run in amp as supermodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:698: Only run in amp as submodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:792: Only run in amp -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:1003: Only run in amp -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:1343: This test makes sense for a branch -SKIPPED [9] amp/datapull/ccxt/data/extract/test/test_exchange_class.py: Enable after CMTask1292 is resolved. -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:789: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:769: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:809: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:530: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:573: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:620: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:666: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:711: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:757: Need dind support -SKIPPED [1] amp/helpers/test/test_unit_test.py:335: This is only used to debug the debugging the infrastructure -SKIPPED [1] amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py:380: Need dind support -SKIPPED [1] amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py:534: Need dind support -SKIPPED [1] amp/datapull/common/data/transform/test/test_convert_csv_to_pq.py:60: CmTask1305: after removing circular dependencies in `hio.from_file`, this test fails reading a parquet file -SKIPPED [1] dataflow_lime/system/test/test_E8d_replayed_system_runner.py:130: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py: Need dind support -SKIPPED [1] amp/oms/test/test_portfolio.py:291: Need dind support -SKIPPED [1] amp/oms/test/test_portfolio.py:320: Need dind support -SKIPPED [1] amp/oms/test/test_portfolio.py:412: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:119: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:238: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:243: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:248: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:253: Need dind support -SKIPPED [1] amp/oms/test/test_restrictions.py:18: Need dind support -SKIPPED [1] amp/oms/test/test_restrictions.py:45: Need dind support -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:57: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:75: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:93: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:124: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:150: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:198: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:274: Next PR will rewrite this -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:325: LimeTask296: Break 2022-01-06 -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:311: Run manually -SKIPPED [9] amp/datapull/common/data/client/test/test_historical_pq_clients.py: Some tests are returning an empty df -SKIPPED [4] amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py: TODO(gp): Need to update this tests after transform v1.3 -SKIPPED [1] amp/dataflow/system/test/test_real_time_runner.py:39: Too slow for real time -SKIPPED [1] amp/dataflow/core/nodes/test/test_volatility_models.py:423: unconditional skip -SKIPPED [1] amp/core/statistics/test/test_regression.py:46: This test fails on some computers due to AmpTask1649 -SKIPPED [1] amp/core/statistics/test/test_regression.py:17: This test generates the input data -SKIPPED [1] amp/core/statistics/test/test_regression.py:137: This test fails on some computers due to AmpTask1649 -SKIPPED [1] amp/core/statistics/test/test_regression.py:108: This test generates the input data -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:270: cmamp #654. -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:283: cmamp #654. -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:296: cmamp #654. -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:303: cmamp #654. -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:315: cmamp #654. -SKIPPED [1] amp/config_root/config/test/test_config.py:325: See AmpTask1573 -SKIPPED [1] amp/market_data/test/test_market_data_im_client.py:134: CmTask882. -SKIPPED [1] vendors_lime/taq_bars/test/test_taq_bars_utils.py:304: This is used to generate the frozen input -SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:52: LimeTask222 Use volume for volume everywhere -SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:39: Run manually -SKIPPED [1] oms_lime/test/test_eg_portfolio.py:14: Finish this -SKIPPED [1] market_data_lime/test/test_eg_replayed_market_data.py:26: Run manually -SKIPPED [1] market_data_lime/test/test_eg_replayed_market_data.py:110: Run manually -SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:36: Skip on Mondays -SKIPPED [1] amp/test/test_tasks.py:68: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:60: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:44: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:64: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:56: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:48: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:52: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:36: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:40: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:122: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:95: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:102: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:85: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:89: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:142: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:112: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:129: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:134: Test needs to be run outside Docker -SKIPPED [1] amp/oms/test/test_order_processor.py:70: Need dind support -SKIPPED [1] amp/oms/test/test_order_processor.py:78: Need dind support -SKIPPED [1] amp/oms/test/test_order_processor.py:86: Need dind support -SKIPPED [1] amp/oms/test/test_order_processor.py:96: Need dind support -SKIPPED [7] amp/datapull/test/test_im_lib_tasks.py: CMTask #789. -SKIPPED [1] amp/datapull/test/test_im_lib_tasks.py:240: amp #1189 -SKIPPED [10] amp/im/kibot/data/load/test/test_sql_data_loader.py: CmTask666 -SKIPPED [1] amp/helpers/test/test_cache.py:731: See CMTask #952. -SKIPPED [1] amp/helpers/test/test_git.py:217: Run only in amp as super-module -SKIPPED [1] amp/helpers/test/test_git.py:229: Run only in amp as sub-module -SKIPPED [1] amp/dev_scripts/infra/test/test_all.py: unconditional skip -SKIPPED [1] amp/dev_scripts/git/git_hooks/test/test_install_hooks.py:21: There are no Git credentials inside Docker -SKIPPED [1] amp/oms/test/test_pnl_simulator.py:432: For performance measurement -SKIPPED [1] amp/oms/test/test_api.py:162: unconditional skip -SKIPPED [1] amp/oms/test/test_api.py:191: unconditional skip -SKIPPED [1] amp/oms/test/test_broker.py:55: Need dind support -SKIPPED [11] amp/im/kibot/test/test_kibot_sql_writer_backend.py: CmTask666 -SKIPPED [1] amp/im/kibot/metadata/test/test_load.py:47: Disabled waiting for PTask4139 -SKIPPED [1] amp/im/kibot/metadata/test/test_load.py:66: Disabled waiting for PTask4139 -SKIPPED [1] amp/im/kibot/data/load/test/test_s3_data_loader.py:23: Not implemented yet -SKIPPED [4] amp/im/ib/data/transform/test/test_transform.py: CmTask666 -SKIPPED [11] amp/im/ib/test/test_ib_sql_writer_backend.py: CmTask666 -SKIPPED [1] amp/im/ib/connect/test/test_im_tasks.py: unconditional skip -SKIPPED [1] amp/helpers/test/test_versioning.py:23: CmampTask570 -SKIPPED [3] amp/helpers/test/test_open.py: See cryptomtc/cmamp#321 -SKIPPED [1] amp/helpers/test/test_joblib_helpers.py: Just for experimenting with joblib -SKIPPED [1] amp/dataflow/core/nodes/test/test_regression_models.py:35: This test fails on some computers due to AmpTask1649 -SKIPPED [1] amp/dataflow/core/nodes/test/test_regression_models.py:18: This test generates the input data -SKIPPED [1] amp/dataflow/core/nodes/test/test_sarimax_models.py:39: cmamp #654. -SKIPPED [1] amp/core/test/test_data_adapters.py:146: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_data_adapters.py:161: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_data_adapters.py:118: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_data_adapters.py:177: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_data_adapters.py:132: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_explore.py:25: https://github.com/.../.../issues/3676 -SKIPPED [1] amp/core/test/test_features.py:510: Apparent instability -SKIPPED [1] amp/core/test/test_features.py:517: Apparent instability -SKIPPED [1] amp/core/test/test_features.py:524: Apparent instability -SKIPPED [1] amp/core/test/test_features.py:556: Apparent instability -SKIPPED [1] amp/core/test/test_backtest.py:27: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_backtest.py:69: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_backtest.py:111: Disabled because of PTask2440 -XFAIL amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 -XFAIL amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 -FAILED dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 -= 1 failed, 1581 passed, 209 skipped, 81 deselected, 2 xfailed, 4 rerun in 200.01s (0:03:20) = -15:16:12 @ 2022-03-07 10:15:22 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=120.0 KB -15:16:12 @ 2022-03-07 10:15:22 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' -15:16:12 @ 2022-03-07 10:15:22 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... -15:16:12 @ 2022-03-07 10:15:22 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan -ERROR: 1 -15:16:15 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3720 Fast tests failed -## run_slow_tests:  -15:16:15 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"' -IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"'  -WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. -WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. -WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. -WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. -Creating compose_app_run ... - - -Creating compose_app_run ... done -##> devops/docker_run/entrypoint.sh -UID=0 -GID=0 -# Activate environment -##> devops/docker_build/entrypoint/patch_environment_variables.sh -# Set PATH -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -# Set PYTHONPATH -PYTHONPATH=/app/amp:/app: -# Configure env -Testing sudo -/app -Setting up Docker -{ "storage-driver": "vfs" } - * Starting Docker: docker  -[ OK ] - * Docker is running -# Check AWS authentication setup -AWS_DEFAULT_REGION='us-east-1' - Name Value Type Location - ---- ----- ---- -------- - profile am manual --profile -access_key ****************3J32 shared-credentials-file -secret_key ****************QpHW shared-credentials-file - region us-east-1 env AWS_DEFAULT_REGION -CONTAINER_VERSION='' -BUILD_TAG='' -which python: /venv/bin/python -python -V: Python 3.8.10 -docker -v: Docker version 20.10.12, build e91ed57 -docker-compose -v: docker-compose version 1.25.0, build unknown -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -PYTHONPATH=/app/amp:/app: -entrypoint.sh: 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"' -============================= test session starts ============================== -platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 -cachedir: .pytest_cache -rootdir: /app, configfile: pytest.ini -plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 -timeout: 30.0s -timeout method: signal -timeout func_only: True -collecting ...  -collecting 0 items  -collecting 0 items  -collecting 67 items  -collecting 70 items  -collecting 230 items  -collecting 548 items  -collecting 562 items  -collecting 794 items  -collecting 1037 items  -collecting 1375 items  -collecting 1424 items  -collecting 1775 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True ------------------------------------------------------------------------------ -This code is not in sync with the container: -code_version='1.0.3' != container_version='amp-1.0.3' ------------------------------------------------------------------------------ -You need to: -- merge origin/master into your branch with `invoke git_merge_master` -- pull the latest container with `invoke docker_pull` -# Git - branch_name='AmpTask2163_Implement_tiled_backtesting_5' - hash='29bdaf1' - # Last commits: - * 29bdaf1 saggese Lint ( 6 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) - * c26c937 saggese Checkpoint ( 7 minutes ago) Mon Mar 7 20:09:34 2022 - * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) -# Machine info - system=Linux - node name=d232c57e32e2 - release=3.10.0-1160.36.2.el7.x86_64 - version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 - machine=x86_64 - processor=x86_64 - cpu count=8 - cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) - memory=svmem(total=66548252672, available=51706417152, percent=22.3, used=11809091584, free=14425956352, active=30357913600, inactive=18355712000, buffers=0, cached=40313204736, shared=2491396096, slab=2054676480) - disk usage=sdiskusage(total=107362627584, used=32545501184, free=74817126400, percent=30.3) -# Packages - python: 3.8.10 - gluonnlp: ? - gluonts: 0.6.7 - joblib: 1.1.0 - mxnet: 1.8.0 - numpy: 1.21.1 - pandas: 1.3.4 - pyarrow: 6.0.1 - scipy: 1.6.1 - seaborn: 0.11.2 - sklearn: 1.0.1 - statsmodels: 0.13.1 -INFO: > cmd='/venv/bin/pytest -m slow and not superslow . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun Failed: Timeout' -INFO: Saving log to file 'tmp.pytest.log' - -collected 1874 items / 1803 deselected / 71 selected  - -amp/helpers/test/test_sql.py::TestSql1::test_copy_rows_with_copy_from1 SKIPPED [ 1%] -amp/helpers/test/test_sql.py::TestSql1::test_create_database SKIPPED [ 2%] -amp/helpers/test/test_sql.py::TestSql1::test_create_insert_query SKIPPED [ 4%] -amp/helpers/test/test_sql.py::TestSql1::test_db_connection_to_tuple SKIPPED [ 5%] -amp/helpers/test/test_sql.py::TestSql1::test_duplicate_removal1 SKIPPED [ 7%] -amp/helpers/test/test_sql.py::TestSql1::test_duplicate_removal2 SKIPPED [ 8%] -amp/helpers/test/test_sql.py::TestSql1::test_execute_insert_query1 SKIPPED [ 9%] -amp/helpers/test/test_sql.py::TestSql1::test_remove_database1 SKIPPED [ 11%] -amp/helpers/test/test_sql.py::TestSql1::test_remove_database_invalid SKIPPED [ 12%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create1 (0.84 s) PASSED [ 14%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create2 (0.47 s) PASSED [ 15%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data1_database_portfolio SKIPPED [ 16%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data1_database_vs_dataframe_portfolio SKIPPED [ 18%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data2_database_portfolio SKIPPED [ 19%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data2_database_vs_dataframe_portfolio SKIPPED [ 21%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data3_database_portfolio SKIPPED [ 22%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data1 SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_extract_data_from_db.py::TestExtractDataFromDb1::test_extract_data_from_db SKIPPED [ 25%] -dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 (1.29 s) FAILED [ 26%] -dataflow_lime/system/test/test_E8d_replayed_system_runner.py::TestReplayedE8dWithMockedOms1::test1 SKIPPED [ 28%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance1 (19.17 s) PASSED [ 29%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance2 (19.22 s) PASSED [ 30%] -research/RH2E/test/test_RH2E_prod_models.py::Test_RH2Eg_ProdModels::test_end_to_end_slow1 SKIPPED [ 32%] -amp/oms/test/test_oms_db.py::TestOmsDbSubmittedOrdersTable1::test_create_table1 SKIPPED [ 33%] -amp/oms/test/test_oms_db.py::TestOmsDbAcceptedOrdersTable1::test_create_table1 SKIPPED [ 35%] -amp/oms/test/test_oms_db.py::TestOmsDbAcceptedOrdersTable1::test_insert1 SKIPPED [ 36%] -amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table1 SKIPPED [ 38%] -amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table2 SKIPPED [ 39%] -amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table3 SKIPPED [ 40%] -amp/oms/test/test_oms_db.py::TestOmsDbCurrentPositionsTable1::test_create_table1 SKIPPED [ 42%] -amp/oms/test/test_oms_db.py::TestOmsDbRestrictionsTable1::test_create_table1 SKIPPED [ 43%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentSuccess1::test_parallel1 (8.08 s) PASSED [ 45%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentSuccess1::test_serial1 (11.31 s) PASSED [ 46%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_parallel1 (11.48 s) PASSED [ 47%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_parallel2 (11.42 s) PASSED [ 49%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_serial1 (14.94 s) PASSED [ 50%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_serial2 (15.19 s) PASSED [ 52%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentArchiveOnS3::test_serial1 (8.62 s) PASSED [ 53%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test_parallel1 (11.59 s) PASSED [ 54%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test_serial1 (11.41 s) PASSED [ 56%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_parallel1 (15.41 s) PASSED [ 57%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_parallel2 (15.34 s) PASSED [ 59%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_serial1 (11.61 s) PASSED [ 60%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_serial2 (12.15 s) PASSED [ 61%] -im_lime/eg/test/test_eg_historical_pq_by_asset_taq_bar_client.py::TestEgHistoricalPqByTileTaqBarClient1::test_read_data_for_multiple_symbols1 (1.92 s) PASSED [ 63%] -amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_equities1 (1.31 s) PASSED [ 64%] -amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_futures1 (16.72 s) PASSED [ 66%] -amp/dataflow/core/nodes/test/test_gluonts_models.py::TestContinuousDeepArModel::test_fit_dag1 (6.20 s) PASSED [ 67%] -amp/dataflow/core/nodes/test/test_gluonts_models.py::TestContinuousDeepArModel::test_predict_dag1 (4.90 s) PASSED [ 69%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval_perf1::test1 (13.29 s) PASSED [ 70%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval_perf1::test2 (7.24 s) PASSED [ 71%] -research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline2::test_real_time1 SKIPPED [ 73%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_last_end_time2 (0.03 s) PASSED [ 74%] -market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period2 (0.00 s) SKIPPED [ 76%] -market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period3 (0.00 s) SKIPPED [ 77%] -market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period_compare1 (0.00 s) SKIPPED [ 78%] -amp/test/test_tasks.py::TestExecuteTasks2::test_collect_only1 SKIPPED [ 80%] -amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_create_all_tables1 SKIPPED [ 81%] -amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_create_im_database SKIPPED [ 83%] -amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_up1 SKIPPED [ 84%] -amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_real_time1 (3.62 s) PASSED [ 85%] -amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_real_time2 (3.99 s) PASSED [ 87%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_parse_symbols_file2 (11.92 s) PASSED [ 88%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky4 (1.33 s) PASSED [ 90%] -amp/dataflow/system/test/test_source_nodes.py::TestKibotEquityReader::test1 (7.63 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1 (1.90 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1 (1.48 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict1 (1.49 s) PASSED [ 95%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_no_x1 (1.57 s) PASSED [ 97%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_summary (2.32 s) PASSED [ 98%] -amp/core/test/test_backtest.py::TestGeneratePredictions::test4 SKIPPED [100%] - -=================================== FAILURES =================================== -_________________ Test_TiledBacktest_E8d.test_end_to_end_slow1 _________________ -Traceback (most recent call last): - File "/app/dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py", line 35, in test_end_to_end_slow1 - self._test(config_builder, experiment_builder, run_model_extra_opts) - File "/app/amp/dataflow/model/run_prod_model_flow.py", line 175, in _test - self.check_string(configs_signature, fuzzy_match=True, tag=tag) - File "/app/amp/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "/app/amp/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '/app/dataflow_lime/pipelines/E8/test/Test_TiledBacktest_E8d.test_end_to_end_slow1/output/configs_signature.txt.tmp' -################################################################################ - -============================= slowest 3 durations ============================== -19.22s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance2 -19.17s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance1 -16.72s call amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_futures1 -=========================== short test summary info ============================ -SKIPPED [1] amp/helpers/test/test_sql.py:95: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:36: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:46: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:21: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:111: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:131: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:79: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:58: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:71: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:126: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:210: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:162: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:226: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:200: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/extract/test/test_exchange_class.py:35: Enable after CMTask1292 is resolved. -SKIPPED [1] amp/datapull/common/data/transform/test/test_extract_data_from_db.py:38: Need dind support -SKIPPED [1] dataflow_lime/system/test/test_E8d_replayed_system_runner.py:250: Need dind support -SKIPPED [1] research/RH2E/test/test_RH2E_prod_models.py:61: Disabled since cache was invalidated -SKIPPED [1] amp/oms/test/test_oms_db.py:46: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:127: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:136: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:192: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:203: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:223: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:292: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:310: Need dind support -SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:131: LimeTask222 Use volume for volume everywhere -SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:59: Skip on Mondays -SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:81: Skip on Mondays -SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:105: Skip on Mondays -SKIPPED [1] amp/test/test_tasks.py:116: Test needs to be run outside Docker -SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:20: Need dind support -SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:49: Need dind support -SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:12: Need dind support -SKIPPED [1] amp/core/test/test_backtest.py:153: Disabled because of PTask2440 -FAILED dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 -==== 1 failed, 35 passed, 35 skipped, 1803 deselected in 297.23s (0:04:57) ===== -15:21:15 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=35.7 MB -15:21:15 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' -15:21:15 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... -15:21:15 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan -ERROR: 1 -15:21:18 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3737 Slow tests failed -## run_superslow_tests:  -15:21:18 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"' -IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"'  -WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. -WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. -WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. -WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. -Creating compose_app_run ... - - -Creating compose_app_run ... done -##> devops/docker_run/entrypoint.sh -UID=0 -GID=0 -# Activate environment -##> devops/docker_build/entrypoint/patch_environment_variables.sh -# Set PATH -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -# Set PYTHONPATH -PYTHONPATH=/app/amp:/app: -# Configure env -Testing sudo -/app -Setting up Docker -{ "storage-driver": "vfs" } - * Starting Docker: docker  -[ OK ] - * Docker is running -# Check AWS authentication setup -AWS_DEFAULT_REGION='us-east-1' - Name Value Type Location - ---- ----- ---- -------- - profile am manual --profile -access_key ****************3J32 shared-credentials-file -secret_key ****************QpHW shared-credentials-file - region us-east-1 env AWS_DEFAULT_REGION -CONTAINER_VERSION='' -BUILD_TAG='' -which python: /venv/bin/python -python -V: Python 3.8.10 -docker -v: Docker version 20.10.12, build e91ed57 -docker-compose -v: docker-compose version 1.25.0, build unknown -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -PYTHONPATH=/app/amp:/app: -entrypoint.sh: 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"' -============================= test session starts ============================== -platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 -cachedir: .pytest_cache -rootdir: /app, configfile: pytest.ini -plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 -timeout: 3600.0s -timeout method: signal -timeout func_only: True -collecting ...  -collecting 0 items  -collecting 0 items  -collecting 67 items  -collecting 70 items  -collecting 230 items  -collecting 548 items  -collecting 641 items  -collecting 801 items  -collecting 1084 items  -collecting 1391 items  -collecting 1671 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True ------------------------------------------------------------------------------ -This code is not in sync with the container: -code_version='1.0.3' != container_version='amp-1.0.3' ------------------------------------------------------------------------------ -You need to: -- merge origin/master into your branch with `invoke git_merge_master` -- pull the latest container with `invoke docker_pull` -# Git - branch_name='AmpTask2163_Implement_tiled_backtesting_5' - hash='29bdaf1' - # Last commits: - * 29bdaf1 saggese Lint (11 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) - * c26c937 saggese Checkpoint (12 minutes ago) Mon Mar 7 20:09:34 2022 - * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) -# Machine info - system=Linux - node name=61bb36f6d969 - release=3.10.0-1160.36.2.el7.x86_64 - version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 - machine=x86_64 - processor=x86_64 - cpu count=8 - cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) - memory=svmem(total=66548252672, available=51712106496, percent=22.3, used=11803402240, free=14392971264, active=30350835712, inactive=18393743360, buffers=0, cached=40351879168, shared=2491396096, slab=2055942144) - disk usage=sdiskusage(total=107362627584, used=32546025472, free=74816602112, percent=30.3) -# Packages - python: 3.8.10 - gluonnlp: ? - gluonts: 0.6.7 - joblib: 1.1.0 - mxnet: 1.8.0 - numpy: 1.21.1 - pandas: 1.3.4 - pyarrow: 6.0.1 - scipy: 1.6.1 - seaborn: 0.11.2 - sklearn: 1.0.1 - statsmodels: 0.13.1 -INFO: > cmd='/venv/bin/pytest -m not slow and superslow . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun Failed: Timeout' -INFO: Saving log to file 'tmp.pytest.log' - -collected 1874 items / 1864 deselected / 10 selected  - -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data3_database_vs_dataframe_portfolio SKIPPED [ 10%] -dataflow_lime/system/test/test_E8d_replayed_system_runner.py::Test_E8d_Replayed_SystemRunner::test1 - - -(462.17 s) PASSED [ 20%] -research/RH2E/test/test_RH2E_prod_models.py::Test_RH2Eg_ProdModels::test_end_to_end_superslow1 SKIPPED [ 30%] -im_lime/eg/test/test_eg_historical_pq_by_asset_taq_bar_client.py::TestEgHistoricalPqByTileTaqBarClient1::test_read_data1 (30.67 s) PASSED [ 40%] -amp/core/test/test_gallery_signal_processing.py::Test_gallery_signal_processing1::test_notebook1 (47.16 s) PASSED [ 50%] -dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource2::test1 (47.27 s) PASSED [ 60%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_1 (14.17 s) PASSED [ 70%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_2 (0.21 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_3 (0.26 s) PASSED [ 90%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_4 (0.21 s) PASSED [100%] - -============================= slowest 3 durations ============================== -462.17s call dataflow_lime/system/test/test_E8d_replayed_system_runner.py::Test_E8d_Replayed_SystemRunner::test1 -47.27s call dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource2::test1 -47.16s call amp/core/test/test_gallery_signal_processing.py::Test_gallery_signal_processing1::test_notebook1 -=========================== short test summary info ============================ -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:239: Need dind support -SKIPPED [1] research/RH2E/test/test_RH2E_prod_models.py:88: Disabled since cache was invalidated -========== 8 passed, 2 skipped, 1864 deselected in 610.66s (0:10:10) =========== -15:31:32 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=0.0 b -15:31:32 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' -15:31:32 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... -15:31:32 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan -15:31:34 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3758 Fast tests failed -15:31:34 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3763 Slow tests failed -15:31:34 - INFO  lib_tasks.py run_fast_slow_superslow_tests:3770 Superslow tests passed -Traceback (most recent call last): - File "/local/home/gsaggese/src/venv/amp.client_venv/bin/invoke", line 8, in - sys.exit(program.run()) - File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/program.py", line 384, in run - self.execute() - File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/program.py", line 566, in execute - executor.execute(*self.tasks) - File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/executor.py", line 129, in execute - result = call.task(*args, **call.kwargs) - File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/tasks.py", line 127, in __call__ - result = self.body(*args, **kwargs) - File "/local/home/gsaggese/src/sasm-lime4/amp/helpers/lib_tasks.py", line 3772, in run_fast_slow_superslow_tests - raise RuntimeError("Some tests failed") -RuntimeError: Some tests failed diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt deleted file mode 100644 index 4168d0576..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt +++ /dev/null @@ -1,41 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 2 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 -pytest dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 - -# Test_get_configs_from_command_line1.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow_lime/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 - configs = dtfmoexuti.get_configs_from_command_line(args) - File "$GIT_ROOT/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - configs = cconfig.get_configs_from_builder(config_builder) - File "$GIT_ROOT/config_root/config/builder.py", line 46, in get_configs_from_builder - imp = importlib.import_module(import_) - File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - return _bootstrap._gcd_import(name[level:], package, level) - File "", line 1014, in _gcd_import - File "", line 991, in _find_and_load - File "", line 973, in _find_and_load_unlocked -ModuleNotFoundError: No module named 'dataflow_lime.pipelines.E8.8Ed_configs' - - -_______________ - -# Test_TiledBacktest_E8d.test_end_to_end_slow1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py", line 35, in test_end_to_end_slow1 - self._test(config_builder, experiment_builder, run_model_extra_opts) - File "$GIT_ROOT/dataflow/model/run_prod_model_flow.py", line 175, in _test - self.check_string(configs_signature, fuzzy_match=True, tag=tag) - File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow_lime/pipelines/E8/test/Test_TiledBacktest_E8d.test_end_to_end_slow1/output/configs_signature.txt.tmp' -################################################################################ - -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt deleted file mode 100644 index 955be2326..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt +++ /dev/null @@ -1,396 +0,0 @@ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4532280Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4532780Z -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4533026Z =================================== FAILURES =================================== -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4533724Z _______________________ TestDryRunTasks1.test_git_clean ________________________ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4534485Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535021Z File "/app/helpers/test/test_lib_tasks.py", line 189, in test_git_clean -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535516Z self.dry_run(target) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535998Z File "/app/helpers/test/test_lib_tasks.py", line 170, in dry_run -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4536460Z self.check_string(act) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4536939Z File "/app/helpers/hunit_test.py", line 1266, in check_string -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4537409Z is_equal = assert_equal( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4537889Z File "/app/helpers/hunit_test.py", line 881, in assert_equal -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4538319Z diff_files( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4538746Z File "/app/helpers/hunit_test.py", line 586, in diff_files -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4539220Z raise RuntimeError(msg_as_str) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4539617Z RuntimeError: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4540266Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4540856Z ACTUAL vs EXPECTED: TestDryRunTasks1.test_git_clean -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541568Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541928Z -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4542194Z report_memory_usage=False report_cpu_usage=False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4542753Z ## git_clean: dry_run=False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4543426Z > git clean -fd >/dev/null 2>&1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4544154Z > git submodule foreach 'git clean -fd >/dev/null 2>&1' -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4544817Z > git clean -fd -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4545492Z > git submodule foreach 'git clean -fd' -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4546194Z find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.i ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4546664Z Diff with: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4547307Z > vimdiff helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.actual.txt helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.expected.txt -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4547931Z or running: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4548273Z > ./tmp_diff.sh -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4548887Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4549484Z EXPECTED VARIABLE: TestDryRunTasks1.test_git_clean -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4550166Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4550742Z exp = r"""report_memory_usage=False report_cpu_usage=False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4551201Z ## git_clean: dry_run=False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4552300Z find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4553032Z """ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4637180Z ____ Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 ____ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4638716Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4641551Z File "/app/dataflow/system/example1/test/test_example1_forecast_system.py", line 50, in test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4642218Z self._test_fit_over_backtest_period1(system, output_col_name) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4642836Z File "/app/dataflow/system/dtfamsys.py", line 114, in _test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4643452Z self.check_string(actual, fuzzy_match=True, purify_text=True) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644017Z File "/app/helpers/hunit_test.py", line 1266, in check_string -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644483Z is_equal = assert_equal( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644949Z File "/app/helpers/hunit_test.py", line 881, in assert_equal -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4645544Z diff_files( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4645997Z File "/app/helpers/hunit_test.py", line 586, in diff_files -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4646473Z raise RuntimeError(msg_as_str) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4646868Z RuntimeError: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4647564Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4648290Z FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649091Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649436Z -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649668Z system_config ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4650153Z dag_config: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4650634Z filter_ath: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4651136Z col_mode: replace_all ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4651652Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4652148Z start_time: 09:30:00 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4652605Z end_time: 16:00:00 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4653072Z resample: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4653690Z in_col_groups: [('close',), ('volume',), ('feature1',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4654208Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4654696Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4655175Z rule: 5T ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4655784Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4656444Z vwap_groups: [('close', 'volume', 'vwap')] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4656971Z reindex_like_input: False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4657465Z join_output_with_input: False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4657953Z compute_ret_0: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4658550Z in_col_groups: [('close',), ('vwap',), ('twap',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4659057Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4659540Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660015Z mode: log_rets ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660489Z col_mapping: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660968Z close: close.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4661457Z vwap: vwap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4661945Z twap: twap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4662415Z compute_vol: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4663124Z in_col_group: ('vwap.ret_0',) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4663731Z out_col_group: ('vwap.ret_0.vol',) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4664238Z drop_nans: True ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4664863Z permitted_exceptions: (,) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4665372Z adjust_rets: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4665979Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4666481Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4666971Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4667587Z term1_col: vwap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4668076Z term2_col: vwap.ret_0.vol ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4668576Z out_col: vwap.ret_0.vol_adj ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4669067Z term2_delay: 2 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4669552Z operation: div ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4670017Z drop_nans: True ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4670500Z compress_rets: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4671111Z in_col_groups: [('vwap.ret_0.vol_adj',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4671611Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4672094Z col_mapping: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4672591Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4673126Z dag_builder_object: nid_prefix= ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4673669Z dag_builder_class: Example1_DagBuilder < -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4674415Z system_class: Example1_ForecastSystem < -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4674950Z dag_config_config: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4675420Z resample: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4675903Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4676387Z rule: 1T ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4676933Z dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.expected.txt -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4695757Z or running: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4696094Z > ./tmp_diff.sh -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4696806Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4697507Z EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4698303Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4698867Z exp = r"""################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4699274Z system_config -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4699701Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700106Z dag_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700449Z filter_ath: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700810Z col_mode: replace_all -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701206Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701589Z start_time: 09:30:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701975Z end_time: 16:00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4702320Z resample: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4702851Z in_col_groups: [('close',), ('volume',), ('feature1',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4703292Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4703670Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4704018Z rule: 5T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4704700Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4705360Z vwap_groups: [('close', 'volume', 'vwap')] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4705811Z reindex_like_input: False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4706221Z join_output_with_input: False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4707219Z compute_ret_0: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4707770Z in_col_groups: [('close',), ('vwap',), ('twap',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708195Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708558Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708947Z mode: log_rets -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4709311Z col_mapping: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4709685Z close: close.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710049Z vwap: vwap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710416Z twap: twap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710775Z compute_vol: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4711241Z in_col_group: ('vwap.ret_0',) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4711723Z out_col_group: ('vwap.ret_0.vol',) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4712125Z drop_nans: True -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4712643Z permitted_exceptions: (,) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4713077Z adjust_rets: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4713595Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714124Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714543Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714941Z term1_col: vwap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4715350Z term2_col: vwap.ret_0.vol -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4715752Z out_col: vwap.ret_0.vol_adj -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4716142Z term2_delay: 2 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4716723Z operation: div -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717096Z drop_nans: True -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717445Z compress_rets: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717947Z in_col_groups: [('vwap.ret_0.vol_adj',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4718433Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4718799Z col_mapping: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4719194Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4719637Z dag_builder_object: nid_prefix= -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4720674Z dag_builder_class: Example1_DagBuilder -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721179Z system_class: Example1_ForecastSystem -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721575Z dag_config_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721934Z resample: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4722300Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4722662Z rule: 1T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4723350Z dag_runner_object: > -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724172Z market_data_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724575Z asset_id_col_name: asset_id -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724989Z asset_ids: [1467591036, 3303714233] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4725381Z backtest_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4725884Z universe_str: example1_v1-top2 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4726310Z trading_period_str: 1T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4726712Z time_interval_str: Jan2000 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4727275Z start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4727822Z end_timestamp: 2000-01-31 00:00:00+00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4728377Z market_object: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4728897Z dag_object: name=None -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4729261Z mode=strict -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4732047Z nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4734910Z edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4735787Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4736220Z vwap.ret_0.vol_adj.c -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4736654Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737097Z 1467591036 3303714233 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737459Z end_ts -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737924Z 2000-01-01 10:00:00-05:00 -0.98 -0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4738406Z 2000-01-01 10:05:00-05:00 0.98 0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4738892Z 2000-01-01 10:10:00-05:00 -0.98 -0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4739261Z """ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4739878Z ________ Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 _________ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4740430Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4741028Z File "/app/dataflow/system/example1/test/test_example1_forecast_system.py", line 57, in test_fit_over_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4741597Z self._test_fit_over_period1( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4742128Z File "/app/dataflow/system/dtfamsys.py", line 137, in _test_fit_over_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4742734Z self.check_string(actual, fuzzy_match=True, purify_text=True) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4743389Z File "/app/helpers/hunit_test.py", line 1266, in check_string -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4743841Z is_equal = assert_equal( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4744328Z File "/app/helpers/hunit_test.py", line 881, in assert_equal -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4744767Z diff_files( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4745216Z File "/app/helpers/hunit_test.py", line 586, in diff_files -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4745672Z raise RuntimeError(msg_as_str) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4746068Z RuntimeError: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4746708Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4747409Z FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748182Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748547Z -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748848Z system_config ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4749340Z dag_config: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4749833Z filter_ath: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4750343Z col_mode: replace_all ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4750846Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4751336Z start_time: 09:30:00 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4751805Z end_time: 16:00:00 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4752323Z resample: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4752956Z in_col_groups: [('close',), ('volume',), ('feature1',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4753459Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4754106Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4754581Z rule: 5T ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4755213Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4755868Z vwap_groups: [('close', 'volume', 'vwap')] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4756379Z reindex_like_input: False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4756884Z join_output_with_input: False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4757379Z compute_ret_0: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4757975Z in_col_groups: [('close',), ('vwap',), ('twap',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4758480Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4758952Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4759435Z mode: log_rets ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4759916Z col_mapping: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4760412Z close: close.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4786435Z vwap: vwap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4787169Z twap: twap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4787693Z compute_vol: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4788439Z in_col_group: ('vwap.ret_0',) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4789095Z out_col_group: ('vwap.ret_0.vol',) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4789615Z drop_nans: True ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4790296Z permitted_exceptions: (,) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4790847Z adjust_rets: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4791745Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4792292Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4792798Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4793324Z term1_col: vwap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4794102Z term2_col: vwap.ret_0.vol ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4794647Z out_col: vwap.ret_0.vol_adj ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4795177Z term2_delay: 2 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4795669Z operation: div ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4796310Z drop_nans: True ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4796825Z compress_rets: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4797500Z in_col_groups: [('vwap.ret_0.vol_adj',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4798031Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4798529Z col_mapping: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4799069Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4799632Z dag_builder_object: nid_prefix= ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4800220Z dag_builder_class: Example1_DagBuilder < -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4800794Z system_class: Example1_ForecastSystem < -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4801362Z dag_config_config: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4801873Z resample: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4802385Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4802893Z rule: 1T ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4803458Z dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.expected.txt -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4817745Z or running: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4818089Z > ./tmp_diff.sh -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4818751Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4819543Z EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4820358Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4820978Z exp = r"""################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4821394Z system_config -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4821833Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822241Z dag_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822588Z filter_ath: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822959Z col_mode: replace_all -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4823370Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4823780Z start_time: 09:30:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4824182Z end_time: 16:00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4824541Z resample: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825093Z in_col_groups: [('close',), ('volume',), ('feature1',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825551Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825951Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4826312Z rule: 5T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4827017Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4827800Z vwap_groups: [('close', 'volume', 'vwap')] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4828262Z reindex_like_input: False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4828701Z join_output_with_input: False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4829101Z compute_ret_0: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4829632Z in_col_groups: [('close',), ('vwap',), ('twap',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830079Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830488Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830867Z mode: log_rets -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4831247Z col_mapping: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4831634Z close: close.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832038Z vwap: vwap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832422Z twap: twap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832798Z compute_vol: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4833272Z in_col_group: ('vwap.ret_0',) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4833972Z out_col_group: ('vwap.ret_0.vol',) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4834393Z drop_nans: True -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4834954Z permitted_exceptions: (,) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4835410Z adjust_rets: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4835964Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4836403Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4836795Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4837206Z term1_col: vwap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4837629Z term2_col: vwap.ret_0.vol -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838047Z out_col: vwap.ret_0.vol_adj -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838451Z term2_delay: 2 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838829Z operation: div -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4839211Z drop_nans: True -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4839571Z compress_rets: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840090Z in_col_groups: [('vwap.ret_0.vol_adj',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840527Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840906Z col_mapping: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4841320Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4841792Z dag_builder_object: nid_prefix= -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4842257Z dag_builder_class: Example1_DagBuilder -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4842739Z system_class: Example1_ForecastSystem -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4843303Z dag_config_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4843654Z resample: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4844032Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4844412Z rule: 1T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4845150Z dag_runner_object: > -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4845882Z market_data_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4846282Z asset_id_col_name: asset_id -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4846700Z asset_ids: [1467591036, 3303714233] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847096Z backtest_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847559Z universe_str: example1_v1-top2 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847978Z trading_period_str: 1T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4848377Z time_interval_str: Jan2000 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4848941Z start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4849581Z end_timestamp: 2000-01-31 00:00:00+00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4850153Z market_object: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4850683Z dag_object: name=None -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4851051Z mode=strict -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4853864Z nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4856657Z edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4857531Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4857948Z vwap.ret_0.vol_adj.c -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4858372Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4858854Z 1467591036 3303714233 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4859226Z end_ts -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4859697Z 2000-01-01 10:00:00-05:00 -0.98 -0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4860188Z 2000-01-01 10:05:00-05:00 0.98 0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4860695Z 2000-01-01 10:10:00-05:00 -0.98 -0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4861053Z """ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4862316Z ============================= slowest 3 durations ============================== -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4866438Z 26.87s setup oms/test/test_broker.py::TestDatabaseBroker1::test1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4867269Z 6.46s setup datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_parser -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4868189Z 6.24s setup datapull/talos/data/client/test/test_talos_clients.py::TestTalosSqlRealTimeImClient1::test_build_numerical_to_string_id_mapping -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4868892Z =========================== short test summary info ============================ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4956618Z FAILED helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean - Run... -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4957400Z FAILED dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4958274Z FAILED dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_period1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt deleted file mode 100644 index 94e600076..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt +++ /dev/null @@ -1,399 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_backtest_period1 -pytest dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_period1 -pytest helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean - -# TestDryRunTasks1.test_git_clean -Traceback (most recent call last): - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 189, in test_git_clean - self.dry_run(target) - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 170, in dry_run - self.check_string(act) - File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string - is_equal = assert_equal( - File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal - diff_files( - File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -ACTUAL vs EXPECTED: TestDryRunTasks1.test_git_clean --------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541928Z -report_memory_usage=False report_cpu_usage=False ( -## git_clean: dry_run=False ( - > git clean -fd >/dev/null 2>&1 - > git submodule foreach 'git clean -fd >/dev/null 2>&1' - > git clean -fd - > git submodule foreach 'git clean -fd' -find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.i ( -Diff with: -> vimdiff helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.actual.txt helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestDryRunTasks1.test_git_clean --------------------------------------------------------------------------------- -exp = r"""report_memory_usage=False report_cpu_usage=False -## git_clean: dry_run=False -find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf -""" -__ - -# Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/system/example1/test/test_example1_forecast_system.py", line 50, in test_fit_over_backtest_period1 - self._test_fit_over_backtest_period1(system, output_col_name) - File "$GIT_ROOT/dataflow/system/dtfamsys.py", line 114, in _test_fit_over_backtest_period1 - self.check_string(actual, fuzzy_match=True, purify_text=True) - File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string - is_equal = assert_equal( - File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal - diff_files( - File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 --------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649436Z -system_config ( -dag_config: ( -filter_ath: ( -col_mode: replace_all ( -transformer_kwargs: ( -start_time: 09:30:00 ( -end_time: 16:00:00 ( -resample: ( -in_col_groups: [('close',), ('volume',), ('feature1',)] ( -out_col_group: () ( -transformer_kwargs: ( -rule: 5T ( -resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( -vwap_groups: [('close', 'volume', 'vwap')] ( -reindex_like_input: False ( -join_output_with_input: False ( -compute_ret_0: ( -in_col_groups: [('close',), ('vwap',), ('twap',)] ( -out_col_group: () ( -transformer_kwargs: ( -mode: log_rets ( -col_mapping: ( -close: close.ret_0 ( -vwap: vwap.ret_0 ( -twap: twap.ret_0 ( -compute_vol: ( -in_col_group: ('vwap.ret_0',) ( -out_col_group: ('vwap.ret_0.vol',) ( -drop_nans: True ( -permitted_exceptions: (,) ( -adjust_rets: ( -in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( -out_col_group: () ( -transformer_kwargs: ( -term1_col: vwap.ret_0 ( -term2_col: vwap.ret_0.vol ( -out_col: vwap.ret_0.vol_adj ( -term2_delay: 2 ( -operation: div ( -drop_nans: True ( -compress_rets: ( -in_col_groups: [('vwap.ret_0.vol_adj',)] ( -out_col_group: () ( -col_mapping: ( -vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( -dag_builder_object: nid_prefix= ( -dag_builder_class: Example1_DagBuilder < -system_class: Example1_ForecastSystem < -dag_config_config: ( -resample: ( -transformer_kwargs: ( -rule: 1T ( -dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 --------------------------------------------------------------------------------- -exp = r"""################################################################################ -system_config -################################################################################ -dag_config: - filter_ath: - col_mode: replace_all - transformer_kwargs: - start_time: 09:30:00 - end_time: 16:00:00 - resample: - in_col_groups: [('close',), ('volume',), ('feature1',)] - out_col_group: () - transformer_kwargs: - rule: 5T - resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] - vwap_groups: [('close', 'volume', 'vwap')] - reindex_like_input: False - join_output_with_input: False - compute_ret_0: - in_col_groups: [('close',), ('vwap',), ('twap',)] - out_col_group: () - transformer_kwargs: - mode: log_rets - col_mapping: - close: close.ret_0 - vwap: vwap.ret_0 - twap: twap.ret_0 - compute_vol: - in_col_group: ('vwap.ret_0',) - out_col_group: ('vwap.ret_0.vol',) - drop_nans: True - permitted_exceptions: (,) - adjust_rets: - in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] - out_col_group: () - transformer_kwargs: - term1_col: vwap.ret_0 - term2_col: vwap.ret_0.vol - out_col: vwap.ret_0.vol_adj - term2_delay: 2 - operation: div - drop_nans: True - compress_rets: - in_col_groups: [('vwap.ret_0.vol_adj',)] - out_col_group: () - col_mapping: - vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c -dag_builder_object: nid_prefix= -dag_builder_class: Example1_DagBuilder -system_class: Example1_ForecastSystem -dag_config_config: - resample: - transformer_kwargs: - rule: 1T -dag_runner_object: > -market_data_config: - asset_id_col_name: asset_id - asset_ids: [1467591036, 3303714233] -backtest_config: - universe_str: example1_v1-top2 - trading_period_str: 1T - time_interval_str: Jan2000 - start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 - end_timestamp: 2000-01-31 00:00:00+00:00 -market_object: -dag_object: name=None -mode=strict -nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] -edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] -################################################################################ -vwap.ret_0.vol_adj.c -################################################################################ - 1467591036 3303714233 -end_ts -2000-01-01 10:00:00-05:00 -0.98 -0.98 -2000-01-01 10:05:00-05:00 0.98 0.98 -2000-01-01 10:10:00-05:00 -0.98 -0.98 -""" -______ - -# Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/system/example1/test/test_example1_forecast_system.py", line 57, in test_fit_over_period1 - self._test_fit_over_period1( - File "$GIT_ROOT/dataflow/system/dtfamsys.py", line 137, in _test_fit_over_period1 - self.check_string(actual, fuzzy_match=True, purify_text=True) - File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string - is_equal = assert_equal( - File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal - diff_files( - File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 --------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748547Z -system_config ( -dag_config: ( -filter_ath: ( -col_mode: replace_all ( -transformer_kwargs: ( -start_time: 09:30:00 ( -end_time: 16:00:00 ( -resample: ( -in_col_groups: [('close',), ('volume',), ('feature1',)] ( -out_col_group: () ( -transformer_kwargs: ( -rule: 5T ( -resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( -vwap_groups: [('close', 'volume', 'vwap')] ( -reindex_like_input: False ( -join_output_with_input: False ( -compute_ret_0: ( -in_col_groups: [('close',), ('vwap',), ('twap',)] ( -out_col_group: () ( -transformer_kwargs: ( -mode: log_rets ( -col_mapping: ( -close: close.ret_0 ( -vwap: vwap.ret_0 ( -twap: twap.ret_0 ( -compute_vol: ( -in_col_group: ('vwap.ret_0',) ( -out_col_group: ('vwap.ret_0.vol',) ( -drop_nans: True ( -permitted_exceptions: (,) ( -adjust_rets: ( -in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( -out_col_group: () ( -transformer_kwargs: ( -term1_col: vwap.ret_0 ( -term2_col: vwap.ret_0.vol ( -out_col: vwap.ret_0.vol_adj ( -term2_delay: 2 ( -operation: div ( -drop_nans: True ( -compress_rets: ( -in_col_groups: [('vwap.ret_0.vol_adj',)] ( -out_col_group: () ( -col_mapping: ( -vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( -dag_builder_object: nid_prefix= ( -dag_builder_class: Example1_DagBuilder < -system_class: Example1_ForecastSystem < -dag_config_config: ( -resample: ( -transformer_kwargs: ( -rule: 1T ( -dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 --------------------------------------------------------------------------------- -exp = r"""################################################################################ -system_config -################################################################################ -dag_config: - filter_ath: - col_mode: replace_all - transformer_kwargs: - start_time: 09:30:00 - end_time: 16:00:00 - resample: - in_col_groups: [('close',), ('volume',), ('feature1',)] - out_col_group: () - transformer_kwargs: - rule: 5T - resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] - vwap_groups: [('close', 'volume', 'vwap')] - reindex_like_input: False - join_output_with_input: False - compute_ret_0: - in_col_groups: [('close',), ('vwap',), ('twap',)] - out_col_group: () - transformer_kwargs: - mode: log_rets - col_mapping: - close: close.ret_0 - vwap: vwap.ret_0 - twap: twap.ret_0 - compute_vol: - in_col_group: ('vwap.ret_0',) - out_col_group: ('vwap.ret_0.vol',) - drop_nans: True - permitted_exceptions: (,) - adjust_rets: - in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] - out_col_group: () - transformer_kwargs: - term1_col: vwap.ret_0 - term2_col: vwap.ret_0.vol - out_col: vwap.ret_0.vol_adj - term2_delay: 2 - operation: div - drop_nans: True - compress_rets: - in_col_groups: [('vwap.ret_0.vol_adj',)] - out_col_group: () - col_mapping: - vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c -dag_builder_object: nid_prefix= -dag_builder_class: Example1_DagBuilder -system_class: Example1_ForecastSystem -dag_config_config: - resample: - transformer_kwargs: - rule: 1T -dag_runner_object: > -market_data_config: - asset_id_col_name: asset_id - asset_ids: [1467591036, 3303714233] -backtest_config: - universe_str: example1_v1-top2 - trading_period_str: 1T - time_interval_str: Jan2000 - start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 - end_timestamp: 2000-01-31 00:00:00+00:00 -market_object: -dag_object: name=None -mode=strict -nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] -edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] -################################################################################ -vwap.ret_0.vol_adj.c -################################################################################ - 1467591036 3303714233 -end_ts -2000-01-01 10:00:00-05:00 -0.98 -0.98 -2000-01-01 10:05:00-05:00 0.98 0.98 -2000-01-01 10:10:00-05:00 -0.98 -0.98 -""" - -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt deleted file mode 100644 index d0b931699..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt +++ /dev/null @@ -1,7 +0,0 @@ - -```python - -def check_empty_lines(): - print("Check empty lines are present!") - -``` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt deleted file mode 100644 index 34d8d7aa1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt +++ /dev/null @@ -1,16 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - ```python - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - ``` -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt deleted file mode 100644 index de229ba17..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt +++ /dev/null @@ -1,16 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt deleted file mode 100644 index fb18a0a9c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt +++ /dev/null @@ -1,9 +0,0 @@ -```python -def no_start_python(): - print("No mention of python at the start")``` -``` - -``` - A markdown paragraph contains - delimiters that needs to be removed. -``` \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt deleted file mode 100644 index 6c1304cfb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt +++ /dev/null @@ -1,7 +0,0 @@ -Text before -:::: -::::{.column width=40%} -Middle text -:::columns -::::{.column width=60%} -Text after \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt deleted file mode 100644 index 0ac895652..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -:::: -::: \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt deleted file mode 100644 index 9f8585df5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -Consecutive headers increase by more than one level: - HeaderInfo(1, 'Chapter 1', 1) - HeaderInfo(3, 'Subsection 1.1.1', 6) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt deleted file mode 100644 index ab5bbf048..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt +++ /dev/null @@ -1,71 +0,0 @@ -################################################################################ -level=1, description='Chapter 1' -################################################################################ -- **Chapter 1** - - Section 1.1 - - Section 1.2 -- Chapter 2 -################################################################################ -level=2, description='Section 1.1' -################################################################################ -- Chapter 1 - - **Section 1.1** - - Subsection 1.1.1 - - Subsection 1.1.2 - - Section 1.2 -- Chapter 2 -################################################################################ -level=3, description='Subsection 1.1.1' -################################################################################ -- Chapter 1 - - Section 1.1 - - **Subsection 1.1.1** - - Subsection 1.1.2 - - Section 1.2 -- Chapter 2 -################################################################################ -level=3, description='Subsection 1.1.2' -################################################################################ -- Chapter 1 - - Section 1.1 - - Subsection 1.1.1 - - **Subsection 1.1.2** - - Section 1.2 -- Chapter 2 -################################################################################ -level=2, description='Section 1.2' -################################################################################ -- Chapter 1 - - Section 1.1 - - **Section 1.2** -- Chapter 2 -################################################################################ -level=1, description='Chapter 2' -################################################################################ -- Chapter 1 -- **Chapter 2** - - Section 2.1 - - Section 2.2 -################################################################################ -level=2, description='Section 2.1' -################################################################################ -- Chapter 1 -- Chapter 2 - - **Section 2.1** - - Subsection 2.1.1 - - Section 2.2 -################################################################################ -level=3, description='Subsection 2.1.1' -################################################################################ -- Chapter 1 -- Chapter 2 - - Section 2.1 - - **Subsection 2.1.1** - - Section 2.2 -################################################################################ -level=2, description='Section 2.2' -################################################################################ -- Chapter 1 -- Chapter 2 - - Section 2.1 - - **Section 2.2** \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt deleted file mode 100644 index df89fcd63..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt +++ /dev/null @@ -1,40 +0,0 @@ -################################################################################ -level=1, description='Models' -################################################################################ -- **Models** - - Naive Bayes - - Decision trees - - Random forests - - Linear models -################################################################################ -level=2, description='Naive Bayes' -################################################################################ -- Models - - **Naive Bayes** - - Decision trees - - Random forests - - Linear models -################################################################################ -level=2, description='Decision trees' -################################################################################ -- Models - - Naive Bayes - - **Decision trees** - - Random forests - - Linear models -################################################################################ -level=2, description='Random forests' -################################################################################ -- Models - - Naive Bayes - - Decision trees - - **Random forests** - - Linear models -################################################################################ -level=2, description='Linear models' -################################################################################ -- Models - - Naive Bayes - - Decision trees - - Random forests - - **Linear models** \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt deleted file mode 100644 index 1c6176761..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt +++ /dev/null @@ -1,40 +0,0 @@ -OrderedDict([('build-system', - OrderedDict([('build-backend', 'poetry.masonry.api'), - ('requires', ['poetry>=0.12'])])), - ('tool', - OrderedDict([('poetry', - OrderedDict([('authors', ['']), - ('dependencies', - OrderedDict([('awscli', '*'), - ('boto3', '*'), - ('bs4', '*'), - ('flaky', '*'), - ('fsspec', '*'), - ('gluonts', '*'), - ('invoke', '*'), - ('jsonpickle', '*'), - ('jupyter', '*'), - ('lxml', '*'), - ('matplotlib', '*'), - ('mxnet', '*'), - ('networkx', '*'), - ('pandas', '^1.1.0'), - ('psycopg2', '*'), - ('pyarrow', '*'), - ('pytest', '^6.0.0'), - ('pytest-cov', '*'), - ('pytest-instafail', - '*'), - ('pytest-xdist', '*'), - ('python', '^3.7'), - ('pywavelets', '*'), - ('requests', '*'), - ('s3fs', '*'), - ('seaborn', '*'), - ('sklearn', '*'), - ('statsmodels', '*'), - ('tqdm', '*')])), - ('description', ''), - ('dev-dependencies', OrderedDict()), - ('name', 'lm'), - ('version', '0.1.0')]))]))]) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt deleted file mode 100644 index 66475c930..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ -time data "28-07-2023 15:05:13" doesn't match format "%Y%m%d_%H%M%S", at position 0. You might want to try: - - passing `format` if your strings have a consistent format; - - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format; - - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this. \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt deleted file mode 100644 index 41895df11..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -Unknown datetime string format, unable to parse: qwe28abc07-201234, at position 0 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt deleted file mode 100644 index 0498168e2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt +++ /dev/null @@ -1,16 +0,0 @@ - -################################################################################ -################################################################################ -_system() failed -################################################################################ -################################################################################ -# _system: cmd='(ls this_file_doesnt_exist) 2>&1', abort_on_error=True, suppress_error=None, suppress_output=True, blocking=True, wrapper=None, output_file=None, num_error_lines=30, tee=False, dry_run=False, log_level=10 -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -cmd='(ls this_file_doesnt_exist) 2>&1' -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -- rc='' -- output=' -ls: cannot access 'this_file_doesnt_exist': No such file or directory -' -- Output saved in 'tmp.system_output.txt' -- Command saved in 'tmp.system_cmd.sh' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv deleted file mode 100644 index abc3dac80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C,D,E -1,2.3456,c,d,78 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py deleted file mode 100644 index 7b0473b8a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py +++ /dev/null @@ -1,136 +0,0 @@ -import filecmp -import os -import pathlib -import shutil -from typing import List, Tuple - -import dev_scripts_helpers.system_tools.create_links as dshstcrli -import helpers.hio as hio -import helpers.hunit_test as hunitest - - -# ############################################################################# -# Test_create_links -# ############################################################################# - - -class Test_create_links(hunitest.TestCase): - """ - Unit tests for the `create_links.py` script. - """ - - def create_file( - self, dir_path: pathlib.Path, file_name: str, content: str - ) -> pathlib.Path: - """ - Create a file with the given content in the specified directory. - - This helper function ensures the directory exists before - creating the file and writing the specified content into it. - - :param dir_path: path to the directory where the file will be - created - :param file_name: name of the file to create - :param content: content to write into the file - :return: full path to the created file - """ - dir_path = pathlib.Path(dir_path) - file_path = dir_path / file_name - hio.to_file(file_name=str(file_path), txt=content) - return file_path - - def test__find_common_files(self) -> None: - """ - Test identifying common files between two directories. - - Create two directories, each containing identical files, - and checks that the `_find_common_files` function identifies these files. - """ - base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) - src_dir: pathlib.Path = base_dir / "test_src_dir" - dst_dir: pathlib.Path = base_dir / "test_dst_dir" - src_dir.mkdir(parents=True, exist_ok=True) - dst_dir.mkdir(parents=True, exist_ok=True) - file1_src: pathlib.Path = self.create_file( - src_dir, "file1.txt", "Hello, World!" - ) - file1_dst: pathlib.Path = shutil.copy(file1_src, dst_dir) - common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( - str(src_dir), str(dst_dir) - ) - self.assertEqual(len(common_files), 1) - self.assertEqual(common_files[0], (str(file1_src), str(file1_dst))) - - def test__replace_with_links_absolute(self) -> None: - """ - Test replacing common files with absolute symbolic links. - - Create identical files in two directories and replace the files - in the destination directory with absolute symbolic links - pointing to the source files. - """ - base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) - src_dir: pathlib.Path = base_dir / "test_src_dir" - dst_dir: pathlib.Path = base_dir / "test_dst_dir" - file1: pathlib.Path = self.create_file( - src_dir, "file1.txt", "Hello, World!" - ) - shutil.copy(file1, dst_dir) - common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( - str(src_dir), str(dst_dir) - ) - dshstcrli._replace_with_links(common_files, use_relative_paths=False) - for _, dst_file in common_files: - self.assertTrue(os.path.islink(dst_file)) - self.assert_equal(os.readlink(dst_file), str(file1)) - - def test__replace_with_links_relative(self) -> None: - """ - Test replacing common files with relative symbolic links. - - Create identical files in two directories and replace the files - in the destination directory with relative symbolic links - pointing to the source files. - """ - base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) - src_dir: pathlib.Path = base_dir / "test_src_dir" - dst_dir: pathlib.Path = base_dir / "test_dst_dir" - file1: pathlib.Path = self.create_file( - src_dir, "file1.txt", "Hello, World!" - ) - shutil.copy(file1, dst_dir) - common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( - src_dir, dst_dir - ) - dshstcrli._replace_with_links(common_files, use_relative_paths=True) - for src_file, dst_file in common_files: - self.assertTrue(os.path.islink(dst_file)) - expected_link: str = os.path.relpath( - src_file, os.path.dirname(dst_file) - ) - self.assert_equal(os.readlink(dst_file), expected_link) - - def test__stage_links(self) -> None: - """ - Test replacing symbolic links with writable file copies. - - Create symbolic links in a directory and then stage them by - replacing each link with a copy of the original file it points - to. - """ - base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) - src_dir: pathlib.Path = base_dir / "test_src_dir" - dst_dir: pathlib.Path = base_dir / "test_dst_dir" - src_dir.mkdir(parents=True, exist_ok=True) - dst_dir.mkdir(parents=True, exist_ok=True) - file1: pathlib.Path = self.create_file( - src_dir, "file1.txt", "Hello, World!" - ) - link1: pathlib.Path = dst_dir / "file1.txt" - os.symlink(file1, link1) - symlinks: List[str] = dshstcrli._find_symlinks(dst_dir) - dshstcrli._stage_links(symlinks) - for link in symlinks: - self.assertFalse(os.path.islink(link)) - self.assertTrue(os.path.isfile(link)) - self.assertTrue(filecmp.cmp(link, file1, shallow=False)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py deleted file mode 100644 index 98994cb5a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py +++ /dev/null @@ -1,96 +0,0 @@ -import asyncio -import logging -from typing import Optional - -import helpers.hasyncio as hasynci -import helpers.hdatetime as hdateti -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_hasyncio1 -# ############################################################################# - - -class Test_hasyncio1(hunitest.TestCase): - """ - Execute a workload using different time semantics: - - - real time - - simulated time - """ - - @staticmethod - async def workload(get_wall_clock_time: hdateti.GetWallClockTime) -> None: - """ - Coroutine simulating a workload waiting for 1s. - """ - - def _print_time() -> None: - true_wall_clock_time = hdateti.get_current_time("ET") - _LOG.debug("wall_clock_time=%s", true_wall_clock_time) - event_loop_time = get_wall_clock_time() - _LOG.debug("event_loop_time=%s", event_loop_time) - - _print_time() - # The execution here is just waiting. - _LOG.debug(" -> execute") - await asyncio.sleep(1.0) - # - _print_time() - - def run_test( - self, - event_loop: Optional[asyncio.AbstractEventLoop], - get_wall_clock_time: hdateti.GetWallClockTime, - ) -> None: - coroutine = self.workload(get_wall_clock_time) - hasynci.run(coroutine, event_loop=event_loop) - - def test_real_time1(self) -> None: - """ - Use real-time semantic. - - In this case: - ``` - wall_clock_time=2021-09-27 20:40:43.775683-04:00 - event_loop_time=2021-09-27 20:40:43.799074-04:00 - -> execute - wall_clock_time=2021-09-27 20:40:44.808990-04:00 - event_loop_time=2021-09-27 20:40:44.812472-04:00 - ``` - - - the wall clock time and the event loop time both advance - """ - # Use the wall clock time with no special event loop. - get_wall_clock_time = lambda: hdateti.get_current_time(tz="ET") - event_loop = None - # Run. - self.run_test(event_loop, get_wall_clock_time) - - def test_simulated_time1(self) -> None: - """ - Use simulated time semantic. - - In this case: - ``` - wall_clock_time=2021-09-27 20:38:47.843501-04:00 - event_loop_time=2021-09-27 20:38:47.841555-04:00 - -> execute - wall_clock_time=2021-09-27 20:38:47.868272-04:00 - event_loop_time=2021-09-27 20:38:48.841555-04:00 - ``` - - - the wall_clock time doesn't advance since the execution is instantaneous - - the event loop time moves forward 1 sec - """ - # Use the solipsistic event loop to simulate the real-time faster. - with hasynci.solipsism_context() as event_loop: - # Use the simulated wall clock time. - get_wall_clock_time = lambda: hdateti.get_current_time( - tz="ET", event_loop=event_loop - ) - # Run. - self.run_test(event_loop, get_wall_clock_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py deleted file mode 100644 index 5469e009e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py +++ /dev/null @@ -1,276 +0,0 @@ -import os -import unittest.mock as umock -from typing import Optional - -import boto3 -import pytest -from botocore.client import BaseClient -from moto import mock_aws - -import helpers.haws as haws -import helpers.hunit_test as hunitest - - -# ############################################################################# -# Haws_test_case -# ############################################################################# - - -class Haws_test_case(hunitest.TestCase): - @pytest.fixture(autouse=True, scope="class") - def aws_credentials(self) -> None: - """ - Mocked AWS credentials for moto. - """ - os.environ["MOCK_AWS_ACCESS_KEY_ID"] = "testing" - os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["MOCK_AWS_SECURITY_TOKEN"] = "testing" - os.environ["MOCK_AWS_SESSION_TOKEN"] = "testing" - os.environ["MOCK_AWS_DEFAULT_REGION"] = "us-east-1" - - -# ############################################################################# -# Test_get_session -# ############################################################################# - - -class Test_get_session(Haws_test_case): - @pytest.fixture(autouse=True) - def set_up_test(self) -> None: - os.environ["MOCK_AWS_S3_BUCKET"] = "mock_aws_bucket" - - @mock_aws - @umock.patch("boto3.Session") - def test_get_session1(self, mock_boto3_session: umock.Mock) -> None: - """ - Test that `haws.get_session` correctly return a session without region - parameter. - """ - aws_profile = "__mock__" - # Create a mock session. - mock_session = umock.MagicMock() - mock_boto3_session.return_value = mock_session - # Test that get_session returns a session object. - session = haws.get_session(aws_profile) - self.assertEqual(session, mock_session) - # Verify that `boto3.Session` was called with the correct profile. - mock_boto3_session.assert_called_once_with(profile_name=aws_profile) - - @mock_aws - @umock.patch("boto3.Session") - def test_get_session2(self, mock_boto3_session: umock.Mock) -> None: - """ - Test that `haws.get_session` correctly return a session with region - parameter. - """ - aws_profile = "__mock__" - region = "us-east-1" - # Create a mock session - mock_session = umock.MagicMock() - mock_boto3_session.return_value = mock_session - # Test that `get_session` returns a session object with the specified region. - session = haws.get_session(aws_profile, region=region) - self.assertEqual(session, mock_session) - # Verify that `boto3.Session` was called with the correct profile and region. - mock_boto3_session.assert_called_once_with( - profile_name=aws_profile, region_name=region - ) - - -# ############################################################################# -# Test_get_service_client -# ############################################################################# - - -class Test_get_service_client(Haws_test_case): - @mock_aws - @umock.patch("helpers.haws.get_session") - def test1(self, mock_get_session: umock.Mock) -> None: - """ - Test `haws.get_service_client()` returns a client for S3. - """ - aws_profile = "__mock__" - service_name = "s3" - region = "us-east-1" - # Create a mock session with the expected credentials. - mock_session = boto3.session.Session( - aws_access_key_id="testing", - aws_secret_access_key="testing", - region_name=region, - ) - mock_get_session.return_value = mock_session - # Create mock client for S3. - client = haws.get_service_client( - aws_profile=aws_profile, service_name=service_name, region=region - ) - # Check that the returned client is for the S3 service. - self.assert_equal(client.meta.service_model.service_name, "s3") - # Check for region. - self.assert_equal(client.meta.region_name, region) - - -# ############################################################################# -# Test_get_service_resource -# ############################################################################# - - -class Test_get_service_resource(Haws_test_case): - @mock_aws - @umock.patch("helpers.haws.get_session") - def test1(self, mock_get_session: umock.Mock) -> None: - """ - Test that `haws.get_service_resource()` correctly retrieves a S3 - resource. - """ - aws_profile = "__mock__" - service_name = "s3" - # Create a mock session with the expected credentials. - mock_session = boto3.session.Session( - aws_access_key_id="testing", - aws_secret_access_key="testing", - region_name="us-east-1", - ) - mock_get_session.return_value = mock_session - # Create mock S3 bucket. - s3 = boto3.resource("s3") - s3.create_bucket(Bucket="my-test-bucket") - s3_resource = haws.get_service_resource( - aws_profile=aws_profile, service_name=service_name - ) - # Get all `S3` buckets. - buckets = list(s3_resource.buckets.all()) - bucket_names = [bucket.name for bucket in buckets] - # Check. - self.assertIn("my-test-bucket", bucket_names) - - -# ############################################################################# -# Test_get_task_definition_image_url -# ############################################################################# - - -class Test_get_task_definition_image_url(Haws_test_case): - @mock_aws - @umock.patch("helpers.haws.get_service_client") - def test1(self, mock_get_service_client: umock.Mock) -> None: - """ - Test that `get_task_definition_image_url` retrieves correct image URL. - """ - # Mock data. - task_definition_name = "my-task-definition" - mock_image_url = "old_image_url" - region = "us-east-1" - # Mock the return value of `get_service_client`. - mock_client = boto3.client("ecs", region_name=region) - mock_get_service_client.return_value = mock_client - # Create a mock task definition. - mock_client.register_task_definition( - family=task_definition_name, - # The following are required parameters. - containerDefinitions=[ - {"name": "my-container", "image": mock_image_url, "memory": 512} - ], - ) - image_url = haws.get_task_definition_image_url( - task_definition_name, environment="test" - ) - self.assertEqual(image_url, mock_image_url) - - -# ############################################################################# -# Test_update_task_definition -# ############################################################################# - - -class Test_update_task_definition(Haws_test_case): - @mock_aws - @umock.patch("helpers.haws.get_ecs_client") - def test1(self, mock_get_ecs_client: BaseClient) -> None: - """ - Test updating a task definition with a new image URL. - """ - # Mock data. - task_definition_name = "my-task-definition" - old_image_url = "old_image_url" - new_image_url = "new_image_url" - region = "us-east-1" - # Mock the return value of `get_ecs_client`. - mock_client = boto3.client("ecs", region_name=region) - mock_get_ecs_client.return_value = mock_client - # Create a mock task definition. - mock_client.register_task_definition( - family=task_definition_name, - containerDefinitions=[ - {"name": "my-container", "image": old_image_url} - ], - executionRoleArn="__mock__", - networkMode="bridge", - requiresCompatibilities=["EC2"], - cpu="256", - memory="512", - ) - # Update task definition. - haws.update_task_definition( - task_definition_name, - new_image_url, - region=region, - environment="test", - ) - # Check if the task definition is updated. - task_description = mock_client.describe_task_definition( - taskDefinition=task_definition_name - ) - updated_image_url = task_description["taskDefinition"][ - "containerDefinitions" - ][0]["image"] - self.assertEqual(updated_image_url, new_image_url) - - -# ############################################################################# -# Test_get_ecs_client -# ############################################################################# - - -class Test_get_ecs_client(Haws_test_case): - def mock_aws_client( - self, mock_get_session: umock.Mock, *, region: Optional[str] = None - ) -> None: - aws_profile = "__mock__" - test_cluster_name = "test-cluster" - # Create a mock session with the expected credentials. - mock_session = boto3.session.Session( - aws_access_key_id="testing", - aws_secret_access_key="testing", - region_name=region or "us-east-1", - ) - mock_get_session.return_value = mock_session - # Create mock ECS client. - ecs_client = boto3.client("ecs", region_name="us-east-1") - ecs_client.create_cluster(clusterName=test_cluster_name) - # Get ECS client. - if region: - test_client = haws.get_ecs_client(aws_profile, region=region) - else: - test_client = haws.get_ecs_client(aws_profile) - # Get the created cluster. - cluster_name = test_client.list_clusters()["clusterArns"][0] - # Check cluster name. - self.assertIn(test_cluster_name, cluster_name) - - @mock_aws - @umock.patch("helpers.haws.get_session") - def test1(self, mock_get_session: umock.Mock) -> None: - """ - Test that `haws.get_ecs_client()` correctly return a client to work - with ECS within a specified region. - """ - self.mock_aws_client(mock_get_session, region="us-east-1") - - @mock_aws - @umock.patch("helpers.haws.get_session") - def test2(self, mock_get_session: umock.Mock) -> None: - """ - Test that `haws.get_ecs_client()` correctly return a client to work - with ECS without a specified region. - """ - self.mock_aws_client(mock_get_session) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py deleted file mode 100644 index 1699e7bcd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py +++ /dev/null @@ -1,1002 +0,0 @@ -import logging -import tempfile -import time -from typing import Any, Callable, Generator, Tuple - -import numpy as np -import pandas as pd -import pytest - -import helpers.hcache as hcache -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Do not commit this. -# _LOG.debug = _LOG.info - - -# TODO(gp): Use hprint.log_frame -def _LOG_frame(txt: str) -> None: - _LOG.debug("\n%s", hprint.frame(txt)) - - -# ############################################################################# - - -def _get_add_function() -> Callable: - """ - Return a function with the ability to track state, used for testing. - """ - - def func(x: int, y: int) -> int: - func.executed = True # type: ignore[attr-defined] - return x + y - - func.executed = False # type: ignore[attr-defined] - return func - - -def _reset_add_function(func: Callable) -> None: - """ - Reset the function before another execution, so we can verify if it was - executed or not. - - We should do this every time we run the cached version of the - function. - """ - func.executed = False # type: ignore[attr-defined] - hdbg.dassert(not func.executed) # type: ignore[attr-defined] - - -# ############################################################################# - - -# ############################################################################# -# _ResetGlobalCacheHelper -# ############################################################################# - - -class _ResetGlobalCacheHelper(hunitest.TestCase): - """ - Create a global cache for each test method and resets it at every test - method invocation. - """ - - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def _remove_all_caches(self) -> None: - """ - Clean and remove all the caches for this test. - """ - cache_type = "all" - hcache.clear_global_cache(cache_type, tag=self.cache_tag, destroy=True) - - def set_up_test(self) -> None: - # Create a tag like "TestCacheFeatures::test_without_caching1". - self.cache_tag = f"{self.__class__.__name__}::{self._testMethodName}" - # Clean all the caches before this test method is run. - self._remove_all_caches() - - def tear_down_test(self) -> None: - # Clean and remove all the caches after the test method is run. - self._remove_all_caches() - - def _get_f_cf_functions( - self, **cached_kwargs: Any - ) -> Tuple[Callable, hcache._Cached]: - """ - Create the intrinsic function `f` and its cached version `cf`. - """ - # Make sure that we are using the unit test cache. - # disk_cache_name = hcache._get_global_cache_name("disk", self.cache_tag) - # _LOG.debug("disk_cache_name=%s", disk_cache_name) - # _LOG.debug( - # "disk_cache_path=%s", hcache._get_global_cache_path("disk", self.cache_tag) - # ) - # TODO(gp): Add an assertion. - # Create the intrinsic function. - f = _get_add_function() - # Create the cached function. - cf = hcache._Cached(f, tag=self.cache_tag, **cached_kwargs) - # Reset all the caches. - hcache.clear_global_cache("all", self.cache_tag) - cf._reset_cache_tracing() - return f, cf - - def _execute_and_check_state( - self, - f: Callable, - cf: hcache._Cached, - val1: int, - val2: int, - exp_cf_state: str, - ) -> None: - """ - Call the function `f(val1, val2) and its cached function `cf(val1, - val2)` and check whether the intrinsic function was executed and what - caches were used, according to `exp_f_state` and `exp_cf_state`. - """ - # If there was no caching then we must have executed the function. - exp_f_state = exp_cf_state == "no_cache" - _LOG.debug( - "\n%s", - hprint.frame( - f"val1={val1}, val2={val2}, exp_f_state={exp_f_state}, " - f"exp_cf_state={exp_cf_state}", - char1="<", - ), - ) - # Reset the intrinsic function since we want to verify if it was called - # or not when we call the cached function. - _reset_add_function(f) - # Call the cached function. - actual = cf(val1, val2) - expected = val1 + val2 - # Check the result. - self.assertEqual(actual, expected) - # Check which function was executed and what caches were used. - _LOG.debug( - "f.executed=%s vs %s", - f.executed, # type: ignore[attr-defined] - exp_f_state, - ) - _LOG.debug( - "cf.get_last_cache_accessed=%s vs %s", - cf.get_last_cache_accessed(), - exp_cf_state, - ) - self.assertEqual(f.executed, exp_f_state) # type: ignore[attr-defined] - self.assertEqual(cf.get_last_cache_accessed(), exp_cf_state) - - -# ############################################################################# - - -# ############################################################################# -# TestCacheFunctions -# ############################################################################# - - -class TestCacheFunctions(hunitest.TestCase): - def test_get_cache_name1(self) -> None: - """ - Make sure we are using the unit test cache and not the development - cache, by checking the name of the disk cache. - """ - cache_tag = "unittest" - disk_cache_name = hcache._get_global_cache_name("disk", cache_tag) - _LOG.debug("disk_cache_name=%s", disk_cache_name) - self.assertIn(cache_tag, disk_cache_name) - - -# ############################################################################# - - -# ############################################################################# -# TestGlobalCache1 -# ############################################################################# - - -class TestGlobalCache1(_ResetGlobalCacheHelper): - def test_without_caching1(self) -> None: - """ - If we execute two times without caching, we get two executions of the - intrinsic function. - """ - f = _get_add_function() - self.assertFalse(f.executed) # type: ignore[attr-defined] - # Execute. - actual = f(3, 4) - self.assertEqual(actual, 7) - # The function was executed. - self.assertTrue(f.executed) # type: ignore[attr-defined] - # Reset. - _reset_add_function(f) - self.assertFalse(f.executed) # type: ignore[attr-defined] - # Execute again. - actual = f(3, 4) - self.assertEqual(actual, 7) - # Check that the function is executed again, since there is no caching. - self.assertTrue(f.executed) # type: ignore[attr-defined] - - def test_with_caching1(self) -> None: - """ - - Leave the caches enabled - - Show that the memory cache is used - """ - # Both memory and disk cache enabled. - f, cf = self._get_f_cf_functions() - # 1) Execute and verify that it is executed, since it was not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Execute and verify that it is not executed, since it's cached in memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 3) Execute and verify that it is not executed, since it's cached. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - - def test_with_caching2(self) -> None: - """ - - Leave the caches enabled - - Cache different values - """ - # Both memory and disk cache enabled. - f, cf = self._get_f_cf_functions() - # 1) Execute and verify that it is executed, since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Use a different workload. - _LOG.debug("\n%s", hprint.frame("Execute")) - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - # 3) Execute the second time: verify that it is not executed, since cached. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 4) Use a different workload: not executed since cached. - _LOG.debug("\n%s", hprint.frame("Execute")) - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="mem") - - def test_with_caching3(self) -> None: - """ - - Disable both mem and disk cache - - Cache a single value - """ - # Disable both memory and disk cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=False, use_disk_cache=False - ) - # 1) Execute the first time: executed since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - # 2) Execute the second time: executed since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - - def test_with_caching4(self) -> None: - """ - - Disable only the disk cache - - Cache different values - """ - # Use only memory cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=True, use_disk_cache=False - ) - # 1) Execute and verify that it is executed since not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - # 2) Execute the second time: verify that it was cached from memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="mem") - - def test_with_caching5(self) -> None: - """ - - Disable only the memory cache - - Cache different values - """ - # Use only disk cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=False, use_disk_cache=True - ) - # 1) Verify that it is executed since there is no cache. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - # 2) Verify that it is executed, since it's cached in memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="disk") - - # //////////////////////////////////////////////////////////////////////////// - - def test_with_caching_mem_reset(self) -> None: - """ - - Use only the memory cache - - Execute and cache - - Reset the mem cache - - Execute again - - Check that the cached function is recomputed - """ - # Use only memory cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=True, use_disk_cache=False - ) - # 1) Verify that it is executed, since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Verify that it is not executed, since it's cached in memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 3) Reset memory cache. - _LOG.debug("\n%s", hprint.frame("Reset memory cache")) - hcache.clear_global_cache("mem", self.cache_tag) - # 4) Verify that it is executed, since the cache was emptied. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - - def test_with_caching_disk_reset(self) -> None: - """ - Same as `test_with_caching_mem_reset()` but using the disk cache. - """ - # Use only disk cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=False, use_disk_cache=True - ) - # 1) Verify that it is executed, since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Verify that it is not executed, since cached in disk. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 3) Reset disk cache. - _LOG.debug("\n%s", hprint.frame("Reset memory cache")) - hcache.clear_global_cache("disk", self.cache_tag) - # 4) Verify that it is executed, since the cache was emptied. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - - def test_with_caching_mem_reset2(self) -> None: - """ - - Use both caches - - Execute and cache - - Reset the mem cache - - Execute again - - Check that the cached value is found in the disk cache - """ - # Use both memory and disk cache - f, cf = self._get_f_cf_functions(use_mem_cache=True, use_disk_cache=True) - # 1) Verify that it is executed. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Verify that it is not executed, since it's cached in memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 3) Reset memory cache. - hcache.clear_global_cache("mem", self.cache_tag) - # 4) Verify that it is not executed, since it's in the disk cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - - # //////////////////////////////////////////////////////////////////////////// - - def test_redefined_function(self) -> None: - """ - If the cached function is redefined, but it's still the same, then the - intrinsic function should not be recomputed. - """ - # Define the function inline imitating working in a notebook. - _LOG.debug("\n%s", hprint.frame("Define function")) - add = _get_add_function() - cached_add = hcache._Cached(add, tag=self.cache_tag) - # 1) Execute the first time. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state( - add, cached_add, 1, 2, exp_cf_state="no_cache" - ) - # 2) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 3) Redefine the function inline. - _LOG.debug("\n%s", hprint.frame("Redefine function")) - add = _get_add_function() - cached_add = hcache._Cached(add, tag=self.cache_tag) - # 4) Execute the third time. Should still use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 5) Execute the fourth time. Should still use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 6) Check that call with other arguments miss the cache. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state( - add, cached_add, 3, 4, exp_cf_state="no_cache" - ) - - def test_changed_function(self) -> None: - """ - If the function is redefined, but the code is not the same, then the - intrinsic function should be recomputed. - """ - # Define the function imitating working in a notebook. - _LOG.debug("\n%s", hprint.frame("Define function")) - - def add(x: int, y: int) -> int: - add.executed = True # type: ignore[attr-defined] - return x + y - - cached_add = hcache._Cached(add, tag=self.cache_tag) - # 1) Execute the first time. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state( - add, cached_add, 1, 2, exp_cf_state="no_cache" - ) - # 2) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 3) Redefine the function with different code. - _LOG.debug("\n%s", hprint.frame("Redefine function")) - - # pylint: disable=function-redefined - def add(x: int, y: int) -> int: # type: ignore[no-redef] - add.executed = True # type: ignore[attr-defined] - z = x + y - return z - - cached_add = hcache._Cached(add, tag=self.cache_tag) - # 4) Execute the third time. Should still use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state( - add, cached_add, 1, 2, exp_cf_state="no_cache" - ) - # 5) Execute the fourth time. Should still use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 6) Check that call with other arguments miss the cache. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state( - add, cached_add, 3, 4, exp_cf_state="no_cache" - ) - - -# ############################################################################# - - -# ############################################################################# -# _ResetFunctionSpecificCacheHelper -# ############################################################################# - - -class _ResetFunctionSpecificCacheHelper(_ResetGlobalCacheHelper): - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test2() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test2(self) -> None: - self.set_up_test() - # Create temp directories to store the cache. - self.disk_cache_dir = tempfile.mkdtemp() - # Clear global cache. - hcache.clear_global_cache("all", tag=self.cache_tag) - - -# ############################################################################# -# TestFunctionSpecificCache1 -# ############################################################################# - - -class TestFunctionSpecificCache1(_ResetFunctionSpecificCacheHelper): - def test_with_caching1(self) -> None: - """ - - Test using the function-specific disk cache - - Disable function-specific cache and switching to global cache - - Test using the global cache - """ - # Use a global cache and - _LOG.debug("\n%s", hprint.frame("Starting")) - _LOG.debug( - "# get_global_cache_info()=\n%s", - hcache.get_global_cache_info(tag=self.cache_tag), - ) - f, cf = self._get_f_cf_functions( - use_mem_cache=False, - use_disk_cache=True, - disk_cache_path=self.disk_cache_dir, - ) - _LOG.debug( - "# cf.get_function_cache_info()=\n%s", cf.get_function_cache_info() - ) - # 1) Execute and verify that it is executed. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Execute and verify that it is not executed, since it's cached on disk. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 3) Clear the global cache. - _LOG.debug("\n%s", hprint.frame("clear_global_cache")) - hcache.clear_global_cache("all") - # 4) Execute and verify that it is not executed, since it's cached on disk. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - - def test_with_caching2(self) -> None: - """ - - Test using the function-specific disk cache - - Disable function-specific cache and switching to global cache - - Test using the global cache - """ - # Use only per-function disk cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=False, disk_cache_path=self.disk_cache_dir - ) - # 1) Execute and verify that it is executed. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Clear the global cache. - _LOG.debug("\n%s", hprint.frame("clear_global_cache")) - hcache.clear_global_cache("all") - # 3) Execute and verify that it is not executed. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 4) Use the global cache. - _LOG.debug( - "\n%s", hprint.frame("Disable function cache and use global cache") - ) - cf.set_function_cache_path(None) - # 5) Execute and verify that function is executed with global cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 6) Execute. Now we get the value from the memory cache since disabling - # the function cache means enabling the memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 7) Restore back specific cache. - _LOG.debug("\n%s", hprint.frame("Restore function cache")) - cf.set_function_cache_path(self.disk_cache_dir) - # Verify that it is *NOT* executed with specific cache. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - - -# ############################################################################# - - -# ############################################################################# -# TestCachePerformance -# ############################################################################# - - -class TestCachePerformance(_ResetGlobalCacheHelper): - @staticmethod - # pylint: disable=unused-argument - def _computation(*args: Any) -> None: - """ - Simulate work. - - :param args: throw away arguments - """ - # Emulate small quantity of work. - time.sleep(0.01) - - @staticmethod - def _timeit(func: Callable, *args: Any) -> float: - """ - Get performance measure of the call to fn with args. - - :param fn: callable function - :param args: any arguments to pass to the function fn - :return: precise time in seconds - """ - perf_start = time.perf_counter() - func(*args) - perf_diff = time.perf_counter() - perf_start - return perf_diff - - def _test_performance(self, val: Any) -> None: - """ - Test performance of the cache over some argument val. - - :param val: any hashable argument - """ - # Create cached versions of the computation function. - _mem_cached_computation = hcache._Cached( - self._computation, - tag=self.cache_tag, - use_mem_cache=True, - use_disk_cache=False, - ) - _disk_cached_computation = hcache._Cached( - self._computation, - tag=self.cache_tag, - use_mem_cache=False, - use_disk_cache=True, - ) - # First step: no cache. - no_cache_ct = self._timeit(lambda: self._computation(val)) - print(f"no cache run time={no_cache_ct}") - # Second step: memory cache. - memory_no_cache_ct = self._timeit(lambda: _mem_cached_computation(val)) - print(f"empty memory cache run time={memory_no_cache_ct}") - print(f"empty memory cache overhead={memory_no_cache_ct - no_cache_ct}") - memory_cache_ct = self._timeit(lambda: _mem_cached_computation(val)) - print(f"hot memory cache run time={memory_cache_ct}") - print(f"hot memory cache benefit={no_cache_ct - memory_cache_ct}") - # Third step: disk cache. - disk_no_cache_ct = self._timeit(lambda: _disk_cached_computation(val)) - print(f"empty disk cache run time={disk_no_cache_ct}") - print(f"empty disk cache overhead={disk_no_cache_ct - no_cache_ct}") - disk_cache_ct = self._timeit(lambda: _disk_cached_computation(val)) - print(f"hot disk cache run time={disk_cache_ct}") - print(f"hot disk cache benefit={no_cache_ct - disk_cache_ct}") - - def test_performance_dataframe(self) -> None: - """ - Test performance of the cache over pandas DataFrame. - """ - # Create a somewhat big DataFrame with random data. - df = pd.DataFrame( - np.random.randint(0, 100, size=(100, 4)), columns=list("ABCD") - ) - print("testing pandas dataframe, with sample size", df.shape) - self._test_performance(df) - - def test_performance_series(self) -> None: - """ - Test performance of the cache over pandas Series. - """ - # Create a somewhat big DataFrame with random data. - s = pd.Series(np.random.randint(0, 100, size=100)) - print("testing pandas series, with sample size", s.shape) - self._test_performance(s) - - -# ############################################################################# - - -# ############################################################################# -# TestCacheDecorator -# ############################################################################# - - -class TestCacheDecorator(_ResetGlobalCacheHelper): - def test_decorated_function(self) -> None: - """ - Test decorator with both caches enabled. - """ - - # Define the function inline imitating working in a notebook. - @hcache.cache(tag=self.cache_tag) - def add(x: int, y: int) -> int: - add.__wrapped__.executed = True - return x + y - - # Execute the first time. - self._execute_and_check_state( - add.__wrapped__, add, 1, 2, exp_cf_state="no_cache" - ) - # Execute the second time. Must use memory cache. - self._execute_and_check_state( - add.__wrapped__, add, 1, 2, exp_cf_state="mem" - ) - - def test_decorated_function_no_mem(self) -> None: - """ - Test decorator with only disk cache. - """ - - # Define the function inline imitating working in a notebook. - @hcache.cache(tag=self.cache_tag, use_mem_cache=False) - def add(x: int, y: int) -> int: - add.__wrapped__.executed = True - return x + y - - # Execute the first time. - self._execute_and_check_state( - add.__wrapped__, add, 1, 2, exp_cf_state="no_cache" - ) - # Execute the second time. Must use disk cache. - self._execute_and_check_state( - add.__wrapped__, add, 1, 2, exp_cf_state="disk" - ) - - -# ############################################################################# - - -# ############################################################################# -# TestAmpTask1407 -# ############################################################################# - - -class TestAmpTask1407(_ResetGlobalCacheHelper): - def test1(self) -> None: - """ - A class method can't be cached. - """ - - class _AmpTask1407Class: - def __init__(self, string: str) -> None: - self._string = string - - @hcache.cache(tag=self.cache_tag) - def print(self, n: int) -> str: - string = "" - for _ in range(n): - string += "hello" + ("o" * len(self._string)) + " " - return string - - obj = _AmpTask1407Class("test") - with self.assertRaises(ValueError): - obj.print(5) - - def test2(self) -> None: - """ - A static method can be cached. - """ - - class _AmpTask1407Class: - def __init__(self, string: str) -> None: - self._string = string - - @staticmethod - @hcache.cache(tag=self.cache_tag) - def static_print(n: int) -> str: - print("--> hello: ", n) - string = "" - for _ in range(n): - string += "hello" + ("o" * len("world")) + " " - return string - - @hcache.cache(tag=self.cache_tag) - def print(self, n: int) -> str: - string = "" - for _ in range(n): - string += "hello" + ("o" * len(self._string)) + " " - return string - - obj = _AmpTask1407Class("test") - obj.static_print(5) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "no_cache") - # - obj.static_print(5) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") - obj.static_print(5) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") - # - obj.static_print(6) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "no_cache") - obj.static_print(6) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") - - -# ############################################################################# - - -# ############################################################################# -# TestCachingOnS3 -# ############################################################################# - - -class TestCachingOnS3(_ResetFunctionSpecificCacheHelper): - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test3() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test3(self) -> None: - self.set_up_test2() - # Get a directory to store the cache on S3. - self.disk_cache_dir = self.get_s3_scratch_dir() - self.aws_profile = "am" - # Clear global cache. - hcache.clear_global_cache("all", tag=self.cache_tag) - - @pytest.mark.skip(reason="See CMTask #952.") - def test_with_caching1(self) -> None: - """ - - Test using the function-specific cache - - Disable function-specific cache and switching to global cache - - Test using the global cache - """ - _LOG.debug("\n%s", hprint.frame("Starting")) - _LOG.debug( - "\n%s", - hcache.get_global_cache_info(tag=self.cache_tag, add_banner=True), - ) - f, cf = self._get_f_cf_functions( - use_mem_cache=False, - disk_cache_path=self.disk_cache_dir, - aws_profile=self.aws_profile, - ) - _LOG.debug("\n%s", cf.get_function_cache_info(add_banner=True)) - cf.clear_function_cache(destroy=False) - # 1) Execute and verify that it is executed, since the value is not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Execute and verify that it is not executed, since it's cached on disk. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 3) Clear the global cache. - _LOG.debug("\n%s", hprint.frame("Clear global cache")) - hcache.clear_global_cache("all") - # 4) Verify that it is *NOT* executed, since the S3 cache is used. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 5) Clear the function cache. - _LOG.debug("\n%s", hprint.frame("Clear function cache")) - cf.clear_function_cache() - # 6) Clear the function cache. - _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 7) Verify that it is executed. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - - -# ############################################################################# - - -# ############################################################################# -# TestCacheEnableReadOnly1 -# ############################################################################# - - -class TestCacheEnableReadOnly1(_ResetGlobalCacheHelper): - def _helper(self, cache_from: str, **kwargs: Any) -> None: - """ - Test that when enabling read-only mode we get an assertion only if the - function invocation was not cached. - """ - # Both memory and disk cache enabled, although we use only memory. - f, cf = self._get_f_cf_functions(**kwargs) - # Execute and verify that it is executed, since it was not cached. - _LOG_frame("Execute the 1st time") - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # Execute and verify that it is not executed, since it's cached in memory. - _LOG_frame("Execute the 2nd time") - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) - _LOG_frame("Execute the 3rd time") - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) - # - # Enable the read-only mode. - # - _LOG_frame("Enable read-only mode") - cf.enable_read_only(True) - # This is cached so it doesn't raise. - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) - # This is not cached so it should raise. - with self.assertRaises(hcache.NotCachedValueException) as cm: - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - actual = str(cm.exception) - self.check_string(actual) - # - # Disable the read-only mode. - # - _LOG_frame("Disable read-only mode") - cf.enable_read_only(False) - # Now this doesn't assert even if it's not in the cache. - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - - def test_mem_cache1(self) -> None: - self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=False) - - def test_disk_cache1(self) -> None: - self._helper(cache_from="disk", use_mem_cache=False, use_disk_cache=True) - - def test_mem_disk_cache1(self) -> None: - self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=True) - - -# ############################################################################# - - -# ############################################################################# -# TestCacheUpdateFunction1 -# ############################################################################# - - -class TestCacheUpdateFunction1(_ResetGlobalCacheHelper): - def test1(self) -> None: - # Define the function imitating working in a notebook. - _LOG.debug("\n%s", hprint.frame("Define function")) - - def add(x: int, y: int) -> int: - add.executed = True # type: ignore[attr-defined] - return x + y - - disk_cache_dir = self.get_scratch_space() - _LOG.debug("disk_cache_dir=%s", disk_cache_dir) - cached_add = hcache._Cached( - add, - use_mem_cache=False, - use_disk_cache=True, - disk_cache_path=disk_cache_dir, - ) - # 1) Execute the first time. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state( - add, cached_add, 1, 2, exp_cf_state="no_cache" - ) - # 2) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - func_path = cached_add._get_function_specific_code_path() - code_before = hio.from_file(func_path) - _LOG.debug("code_before=\n%s", code_before) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="disk") - # 3) Redefine the function with different code while running. - _LOG.debug("\n%s", hprint.frame("Update function")) - - # This function is redefined on purpose to test the code. - def add(x: int, y: int) -> int: # type: ignore[no-redef] - add.executed = True # type: ignore[attr-defined] - return x * y - - cached_add._func = add - cached_add._disk_cached_func.func = add - cached_add.update_func_code_without_invalidating_cache() - # - code_after = hio.from_file(func_path) - _LOG.debug("code_after=\n%s", code_after) - self.assertNotEqual(code_before, code_after) - # 4) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="disk") - - -# ############################################################################# - - -# ############################################################################# -# TestCacheEnableCheckOnlyIfPresent1 -# ############################################################################# - - -class TestCacheEnableCheckOnlyIfPresent1(_ResetGlobalCacheHelper): - def _helper(self, cache_from: str, **kwargs: Any) -> None: - # Both memory and disk cache enabled. - f, cf = self._get_f_cf_functions(**kwargs) - # 1) Execute the first time. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 1, 2, exp_cf_state="no_cache") - # 2) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) - # 3) Enable the `check_only_if_present` mode. - _LOG.debug("\n%s", hprint.frame("Enable check_only_if_present")) - cf.enable_check_only_if_present(True) - # Since the value was cached, we should get an assertion. - with self.assertRaises(hcache.CachedValueException) as cm: - self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) - actual = str(cm.exception) - self.check_string(actual) - # 4) Try with a new value. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 2, 2, exp_cf_state="no_cache") - # 5) Disable the `check_only_if_present` mode. - _LOG.debug("\n%s", hprint.frame("Disable check_only_if_present")) - cf.enable_check_only_if_present(False) - # 6) Execute a value: we should get a cache hit. - _LOG.debug("\n%s", hprint.frame("Execute the 4rd time")) - self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) - # 7) Execute a value: we should get a cache hit. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state(f, cf, 2, 2, exp_cf_state=cache_from) - - # TODO(gp): Add a test for verbose mode in __call__ - # TODO(gp): get_function_cache_info - def test_mem_cache1(self) -> None: - self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=False) - - def test_disk_cache1(self) -> None: - self._helper(cache_from="disk", use_mem_cache=False, use_disk_cache=True) - - def test_mem_disk_cache1(self) -> None: - self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py deleted file mode 100644 index 15ae65c1a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py +++ /dev/null @@ -1,1815 +0,0 @@ -import logging -import os -from typing import Any, Dict - -import pandas as pd -import pytest - -import helpers.hcache_simple as hcacsimp -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_json_double(x: int) -> int: - """ - Return double the input and cache it using JSON. - - :param x: input integer to be doubled - :return: doubled value (x * 2) - """ - res = x * 2 - return res - - -@hcacsimp.simple_cache(cache_type="pickle") -def _cached_pickle_square(x: int) -> int: - """ - Return the square of the input and cache it using pickle. - - :param x: input integer to be squared - :return: squared value (x**2) - """ - res = x**2 - return res - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_multi_arg_sum(a: int, b: int) -> int: - """ - Return the sum of two numbers. - - :param a: first number - :param b: second number - :return: sum of a and b. - """ - res = a + b - return res - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_refreshable_func(x: int) -> int: - """ - Return x multiplied by 10 and update the call count. - - :param x: The input integer - :return: The result of multiplying x by 10 - """ - _cached_refreshable_func.call_count += 1 - res = x * 10 - return res - - -# Initialize the call counter for the refreshable function. -_cached_refreshable_func.call_count = 0 - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_kwarg_diff(a: int, b: int = 0) -> int: - """ - Return the difference between a and b. - - :param a: The minuend - :param b: The subtrahend (defaults to 0) - :return: The difference (a - b) - """ - res = a - b - return res - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_add_100(x: int) -> int: - """ - Return x plus 100. Used primarily for testing cache statistics. - - :param x: The input integer - :return: value (x + 100) - """ - res = x + 100 - return res - - -# ############################################################################# -# _BaseCacheTest -# ############################################################################# - - -class _BaseCacheTest(hunitest.TestCase): - """ - Base test class to provide common setup and teardown functionality. - - Instead of using setUp/tearDown, we use set_up_test/tear_down_test along - with a pytest fixture that ensures these methods run before and after each - test. - """ - - @pytest.fixture(autouse=True) - def setup_teardown_test(self): - # Run common setup before each test. - self.set_up_test() - yield - # Run common teardown after each test. - self.tear_down_test() - - def set_up_test(self) -> None: - """ - Setup operations to run before each test: - - - Set specific cache properties needed for the tests. - """ - _LOG.debug("set_up_test") - super().setUp() - # - self._cache_dir = hcacsimp.get_cache_dir() - hcacsimp.set_cache_dir(self.get_scratch_space()) - - def tear_down_test(self) -> None: - """ - Teardown operations to run after each test: - - Reset cache(in-memory, disk). - - Reset cache properties. - """ - _LOG.debug("tear_down_test") - hcacsimp.reset_cache("", interactive=False) - hcacsimp.reset_cache_property() - hcacsimp.set_cache_dir(self._cache_dir) - - -# ############################################################################# -# Test_get_cache -# ############################################################################# - - -class Test_get_cache(_BaseCacheTest): - """ - Test get_cache functionality for retrieving cached values. - """ - - def test1(self) -> None: - """ - Verify that get_cache returns a cache with the expected key and value. - """ - # Populate the cache by calling _cached_json_double. - _cached_json_double(2) - # Retrieve the in-memory cache for _cached_json_double. - cache: Dict[str, Any] = hcacsimp.get_cache("_cached_json_double") - # Assert that the key '{"args": [2], "kwargs": {}}' is in the cache and - # its value is 4. - self.assertIn('{"args": [2], "kwargs": {}}', cache) - self.assertEqual(cache['{"args": [2], "kwargs": {}}'], 4) - - -# ############################################################################# -# Test_flush_cache_to_disk -# ############################################################################# - - -class Test_flush_cache_to_disk(_BaseCacheTest): - """ - Test flush_cache_to_disk functionality for persisting cache to disk. - """ - - def test1(self) -> None: - """ - Verify that flushing creates a cache file on disk. - """ - # Call _cached_json_double to populate the cache. - _cached_json_double(3) - # Flush the cache to disk. - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Define expected cache file name. - cache_file = hcacsimp._get_cache_file_name("_cached_json_double") - # Assert that the cache file now exists on disk. - self.assertTrue( - os.path.exists(cache_file), - f"Cache file {cache_file} should exist on disk.", - ) - - def test2(self) -> None: - """ - Verify that the disk cache file contains the expected key and value. - """ - # Populate cache and flush to disk. - _cached_json_double(3) - # Flush the cache to disk. - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Define the expected cache file name. - cache_file = hcacsimp._get_cache_file_name("_cached_json_double") - # # Open and load the disk cache file. - disk_cache = hcacsimp._load_func_cache_data_from_file(cache_file, "json") - # Assert that the disk cache contains the key '{"args": [3], "kwargs": - # {}}' with the correct value. - self.assertIn('{"args": [3], "kwargs": {}}', disk_cache) - # Assert that the value for key '{"args": [3], "kwargs": {}}' is 6. - self.assertEqual(disk_cache['{"args": [3], "kwargs": {}}'], 6) - - -# ############################################################################# -# Test_reset_mem_cache -# ############################################################################# - - -class Test_reset_mem_cache(_BaseCacheTest): - """ - Test reset_mem_cache functionality for clearing in-memory cache. - """ - - def test1(self) -> None: - """ - Verify that the cache is empty after `reset_mem_cache` is called. - """ - # Populate the in-memory cache. - _cached_json_double(5) - # Reset the in-memory cache. - hcacsimp.reset_mem_cache("_cached_json_double") - # Retrieve the memory cache after reset. - cache_after: Dict[str, Any] = hcacsimp.get_mem_cache( - "_cached_json_double" - ) - # Verify that the key '{"args": [5], "kwargs": {}}' is no longer in the cache. - self.assertNotIn('{"args": [5], "kwargs": {}}', cache_after) - - -# ############################################################################# -# Test_force_cache_from_disk -# ############################################################################# - - -class Test_force_cache_from_disk(_BaseCacheTest): - """ - Test force_cache_from_disk functionality for loading cache from disk. - """ - - def test1(self) -> None: - """ - Verify that the memory cache is empty after a reset. - """ - # Populate cache and flush to disk. - _cached_json_double(7) - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Reset in-memory cache. - hcacsimp.reset_mem_cache("_cached_json_double") - mem_cache: Dict[str, Any] = hcacsimp.get_mem_cache("_cached_json_double") - # Ensure that the in-memory cache is empty. - self.assertNotIn( - '{"args": [7], "kwargs": {}}', - mem_cache, - "Memory cache should be empty after reset.", - ) - - def test2(self) -> None: - """ - Populate disk cache, reset memory, force reload, and verify that the - key appears. - """ - # Populate cache, flush to disk, and then reset in-memory cache. - _cached_json_double(7) - hcacsimp.flush_cache_to_disk("_cached_json_double") - hcacsimp.reset_mem_cache("_cached_json_double") - _LOG.debug("Force reload disk cache for '_cached_json_double'") - # Force reload cache from disk. - hcacsimp.force_cache_from_disk("_cached_json_double") - full_cache: Dict[str, Any] = hcacsimp.get_cache("_cached_json_double") - # Assert that the key is restored in the in-memory cache. - self.assertIn( - '{"args": [7], "kwargs": {}}', - full_cache, - "After forcing, disk key should appear in memory.", - ) - - -# ############################################################################# -# Test_get_cache_perf -# ############################################################################# - - -class Test_get_cache_perf(_BaseCacheTest): - """ - Test cache performance tracking functionality. - """ - - def test1(self) -> None: - """ - Verify that performance tracking records hits and misses correctly. - """ - # Enable performance tracking. - hcacsimp.enable_cache_perf("_cached_json_double") - _LOG.debug("Call _cached_json_double(8) twice") - # First call should be a miss. - _LOG.debug("# First call should be a miss") - _cached_json_double(8) - # Second call should be a hit. - _LOG.debug("# Second call should be a hit") - _cached_json_double(8) - # Retrieve performance statistics. - stats: str = hcacsimp.get_cache_perf_stats("_cached_json_double") - # Verify that one hit and one miss are recorded. - self.assertIn("hits=1", stats) - self.assertIn("misses=1", stats) - - def test2(self) -> None: - """ - Verify that disabling performance tracking returns None. - """ - # Disable performance tracking. - hcacsimp.disable_cache_perf("_cached_json_double") - # Assert that performance data is no longer available. - self.assertIsNone(hcacsimp.get_cache_perf("_cached_json_double")) - - -# ############################################################################# -# Test_set_cache_property -# ############################################################################# - - -class Test_set_cache_property(_BaseCacheTest): - """ - Test set_cache_property and get_cache_property functionality. - """ - - def test1(self) -> None: - """ - Verify that setting a valid cache property works and can be retrieved. - """ - # Set a valid cache property. - hcacsimp.set_cache_property( - "_cached_json_double", "report_on_cache_miss", True - ) - # Retrieve and verify the property. - val: bool = hcacsimp.get_cache_property( - "_cached_json_double", "report_on_cache_miss" - ) - self.assertTrue(val) - - def test2(self) -> None: - """ - Verify that resetting cache properties clears previously set - properties. - """ - # Set and verify the cache property. - hcacsimp.set_cache_property( - "_cached_json_double", "report_on_cache_miss", True - ) - self.assertTrue( - hcacsimp.get_cache_property( - "_cached_json_double", "report_on_cache_miss" - ) - ) - # Reset all cache properties. - hcacsimp.reset_cache_property() - # Verify that the property is no longer True. - self.assertFalse( - hcacsimp.get_cache_property( - "_cached_json_double", "report_on_cache_miss" - ) - ) - - def test3(self) -> None: - """ - Verify that setting an invalid cache property raises an error. - """ - # Verify that setting an invalid property raises an error. - with self.assertRaises(AssertionError): - hcacsimp.set_cache_property( - "_cached_json_double", "invalid_prop", True - ) - - def test4(self) -> None: - """ - Verify return of a string containing the property value. - """ - # Set force_refresh property and verify that it appears in the properties string. - hcacsimp.set_cache_property("_cached_json_double", "force_refresh", True) - prop_str: str = hcacsimp.cache_property_to_str("_cached_json_double") - # Check output. - self.assertIn("force_refresh: True", prop_str) - - -# ############################################################################# -# Test_get_cache_func_names -# ############################################################################# - - -class Test_get_cache_func_names(_BaseCacheTest): - """ - Test get_cache_func_names functionality for retrieving cached function names. - """ - - def test1(self) -> None: - """ - Verify that memory cache function names include `_cached_json_double`. - """ - # Populate in-memory cache. - _cached_json_double(9) - # Retrieve function names from the memory cache. - mem_funcs = hcacsimp.get_cache_func_names("mem") - # Check output. - self.assertIn("_cached_json_double", mem_funcs) - - def test2(self) -> None: - """ - Verify that all cache function names include both JSON and pickle - functions. - """ - # Populate and flush caches for JSON and pickle functions. - _cached_json_double(2) - # Flush _cached_json_double cache to disk. - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Call _cached_pickle_square with input 2. - _cached_pickle_square(2) - # Flush _cached_pickle_square cache to disk. - hcacsimp.flush_cache_to_disk("_cached_pickle_square") - # Retrieve all cache function names (both memory and disk). - all_funcs = hcacsimp.get_cache_func_names("all") - # Check output. - self.assertIn("_cached_json_double", all_funcs) - self.assertIn("_cached_pickle_square", all_funcs) - - def test3(self) -> None: - """ - Verify that disk cache function names include `_cached_json_double` after - flushing. - """ - # Flush JSON cache to disk and verify disk cache function names. - _cached_json_double(2) - # Flush _cached_json_double cache to disk. - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Retrieve function names from the disk cache. - disk_funcs = hcacsimp.get_cache_func_names("disk") - # Check output. - self.assertIn("_cached_json_double", disk_funcs) - - -# ############################################################################# -# Test_cache_stats_to_str -# ############################################################################# - - -class Test_cache_stats_to_str(_BaseCacheTest): - """ - Test cache_stats_to_str functionality for generating cache statistics. - """ - - def test1(self) -> None: - """ - Verify that cache_stats_to_str returns a DataFrame with 'memory' and - 'disk' columns. - """ - # Populate cache. - _cached_add_100(1) - stats_df: pd.DataFrame = hcacsimp.cache_stats_to_str("_cached_add_100") - # Assert that the returned object is a DataFrame. - self.assertIsInstance(stats_df, pd.DataFrame) - # Verify that it contains the 'memory' and 'disk' columns. - self.assertIn("memory", stats_df.columns) - self.assertIn("disk", stats_df.columns) - - -# ############################################################################# -# Test__cached_kwarg_diff -# ############################################################################# - - -class Test__cached_kwarg_diff(_BaseCacheTest): - """ - Test caching behavior with keyword arguments. - """ - - def test1(self) -> None: - """ - Test that verifies keyword arguments are handled correctly by the - cache. - """ - # Call with different keyword argument values. - res1: int = _cached_kwarg_diff(5, b=3) - res2: int = _cached_kwarg_diff(5, b=10) - # Both calls should return the different result as both args, kwargs are used for caching. - self.assertNotEqual(res1, res2) - - -# ############################################################################# -# Test__cached_multi_arg_sum -# ############################################################################# - - -class Test__cached_multi_arg_sum(_BaseCacheTest): - """ - Test caching behavior with multiple positional arguments. - """ - - def test1(self) -> None: - """ - Verify that the cache for _cached_multi_arg_sum contains the correct key. - """ - # Populate the cache. - _cached_multi_arg_sum(1, 2) - cache: Dict[str, Any] = hcacsimp.get_cache("_cached_multi_arg_sum") - _LOG.debug("cache=%s", cache) - # Verify that the cache key is formatted as '{"args": [1, 2], "kwargs": {}}'. - self.assertIn('{"args": [1, 2], "kwargs": {}}', cache) - - -# ############################################################################# -# Test__cached_pickle_square -# ############################################################################# - - -class Test__cached_pickle_square(_BaseCacheTest): - """ - Test caching with pickle serialization. - """ - - def test1(self) -> None: - """ - Ensure that _cached_pickle_square returns the correct value and disk - file. - """ - # Call the function to square the input. - res: int = _cached_pickle_square(4) - # Flush the cache to disk. - hcacsimp.flush_cache_to_disk("_cached_pickle_square") - cache_file = hcacsimp._get_cache_file_name("_cached_pickle_square") - # Open and load the pickle cache file. - func_cache_data = hcacsimp._load_func_cache_data_from_file( - cache_file, "pickle" - ) - _LOG.debug("func_cache_data=%s", func_cache_data) - # Verify the result and cache contents. - self.assertEqual(res, 16) - self.assertIn('{"args": [4], "kwargs": {}}', func_cache_data) - self.assertEqual(func_cache_data['{"args": [4], "kwargs": {}}'], 16) - - -# ############################################################################# -# Test__cached_refreshable_func -# ############################################################################# - - -class Test__cached_refreshable_func(_BaseCacheTest): - """ - Test force_refresh cache property functionality. - """ - - def test1(self) -> None: - """ - Verify that `_cached_refreshable_func` is called only once initially. - """ - # Reset call counter. - _cached_refreshable_func.call_count = 0 - # Call the function twice with the same input. - _cached_refreshable_func(3) - _cached_refreshable_func(3) - # Verify that the function was only called once (cache hit on the second - # call). - self.assertEqual( - _cached_refreshable_func.call_count, - 1, - "Function should be called only once initially.", - ) - - def test2(self) -> None: - """ - Verify that enabling `force_refresh` causes `_cached_refreshable_func` to - be re-called. - """ - # Call the function normally. - res: int = _cached_refreshable_func(3) - # Enable force_refresh so that the function will be re-called. - hcacsimp.set_cache_property( - "_cached_refreshable_func", "force_refresh", True - ) - # Verify that the function returns the correct value (3 * 10 = 30). - self.assertEqual(res, 30) - # Verify that the function's call count has incremented, indicating it - # was re-called. - self.assertEqual( - _cached_refreshable_func.call_count, - 2, - "Function should be re-called when force_refresh is enabled.", - ) - - -# ############################################################################# -# Test_reset_cache_perf -# ############################################################################# - - -class Test_reset_cache_perf(_BaseCacheTest): - """ - Test reset_cache_perf functionality for resetting performance statistics. - """ - - def test1(self) -> None: - """ - Verify that reset_cache_perf resets stats for a single function. - """ - # Prepare inputs. - hcacsimp.enable_cache_perf("_cached_json_double") - _cached_json_double(5) - _cached_json_double(5) - # Run test. - hcacsimp.reset_cache_perf("_cached_json_double") - # Check outputs. - perf = hcacsimp.get_cache_perf("_cached_json_double") - self.assertEqual(perf["tot"], 0) - self.assertEqual(perf["hits"], 0) - self.assertEqual(perf["misses"], 0) - - def test2(self) -> None: - """ - Verify that reset_cache_perf with empty func_name resets all - functions. - """ - # Prepare inputs. - hcacsimp.enable_cache_perf("_cached_json_double") - hcacsimp.enable_cache_perf("_cached_multi_arg_sum") - _cached_json_double(1) - _cached_multi_arg_sum(1, 2) - # Run test. - hcacsimp.reset_cache_perf("") - # Check outputs. - perf1 = hcacsimp.get_cache_perf("_cached_json_double") - perf2 = hcacsimp.get_cache_perf("_cached_multi_arg_sum") - self.assertEqual(perf1["tot"], 0) - self.assertEqual(perf2["tot"], 0) - - -# ############################################################################# -# Test_disable_cache_perf -# ############################################################################# - - -class Test_disable_cache_perf(_BaseCacheTest): - """ - Test disable_cache_perf functionality for disabling performance tracking. - """ - - def test1(self) -> None: - """ - Verify that disable_cache_perf with empty func_name disables all - functions. - """ - # Prepare inputs. - hcacsimp.enable_cache_perf("_cached_json_double") - hcacsimp.enable_cache_perf("_cached_multi_arg_sum") - _cached_json_double(1) - _cached_multi_arg_sum(1, 2) - # Run test. - hcacsimp.disable_cache_perf("") - # Check outputs. - perf1 = hcacsimp.get_cache_perf("_cached_json_double") - perf2 = hcacsimp.get_cache_perf("_cached_multi_arg_sum") - # After disabling, perf should be None. - self.assertIsNone(perf1) - self.assertIsNone(perf2) - - -# ############################################################################# -# Test_get_cache_perf_stats -# ############################################################################# - - -class Test_get_cache_perf_stats(_BaseCacheTest): - """ - Test get_cache_perf_stats for retrieving performance statistics. - """ - - def test1(self) -> None: - """ - Verify that get_cache_perf_stats returns empty string when no stats - exist. - """ - # Prepare inputs. - # Ensure no perf stats exist for a non-tracked function. - hcacsimp.disable_cache_perf("_cached_json_double") - # Run test. - stats = hcacsimp.get_cache_perf_stats("_cached_json_double") - # Check outputs. - self.assertEqual(stats, "") - - -# ############################################################################# -# Test_cache_property_to_str -# ############################################################################# - - -class Test_cache_property_to_str(_BaseCacheTest): - """ - Test cache_property_to_str for converting properties to string. - """ - - def test1(self) -> None: - """ - Verify that cache_property_to_str with empty func_name returns all - functions. - """ - # Prepare inputs. - # Call functions to ensure they are cached. - _cached_json_double(1) - _cached_multi_arg_sum(1, 2) - hcacsimp.set_cache_property("_cached_json_double", "force_refresh", True) - hcacsimp.set_cache_property("_cached_multi_arg_sum", "enable_perf", True) - # Run test. - result = hcacsimp.cache_property_to_str("") - # Check outputs. - self.assertIn("_cached_json_double", result) - self.assertIn("_cached_multi_arg_sum", result) - self.assertIn("force_refresh: True", result) - self.assertIn("enable_perf: True", result) - - -# ############################################################################# -# Test_reset_mem_cache_all -# ############################################################################# - - -class Test_reset_mem_cache_all(_BaseCacheTest): - """ - Test reset_mem_cache with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that reset_mem_cache with empty func_name resets all caches. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - # Run test. - hcacsimp.reset_mem_cache("") - # Check outputs. - cache1 = hcacsimp.get_mem_cache("_cached_json_double") - cache2 = hcacsimp.get_mem_cache("_cached_multi_arg_sum") - self.assertEqual(len(cache1), 0) - self.assertEqual(len(cache2), 0) - - -# ############################################################################# -# Test_reset_disk_cache_all -# ############################################################################# - - -class Test_reset_disk_cache_all(_BaseCacheTest): - """ - Test reset_disk_cache with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that reset_disk_cache with empty func_name removes all cache - files. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - hcacsimp.flush_cache_to_disk("_cached_json_double") - hcacsimp.flush_cache_to_disk("_cached_multi_arg_sum") - # Run test. - hcacsimp.reset_disk_cache("", interactive=False) - # Check outputs. - cache_file1 = hcacsimp._get_cache_file_name("_cached_json_double") - self.assertFalse(os.path.exists(cache_file1)) - cache_file2 = hcacsimp._get_cache_file_name("_cached_multi_arg_sum") - self.assertFalse(os.path.exists(cache_file2)) - - -# ############################################################################# -# Test_force_cache_from_disk_all -# ############################################################################# - - -class Test_force_cache_from_disk_all(_BaseCacheTest): - """ - Test force_cache_from_disk with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that force_cache_from_disk with empty func_name loads all - caches. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - hcacsimp.flush_cache_to_disk("_cached_json_double") - hcacsimp.flush_cache_to_disk("_cached_multi_arg_sum") - hcacsimp.reset_mem_cache("") - # Run test. - hcacsimp.force_cache_from_disk("") - # Check outputs. - cache1 = hcacsimp.get_mem_cache("_cached_json_double") - cache2 = hcacsimp.get_mem_cache("_cached_multi_arg_sum") - self.assertGreater(len(cache1), 0) - self.assertGreater(len(cache2), 0) - - -# ############################################################################# -# Test_flush_cache_to_disk_all -# ############################################################################# - - -class Test_flush_cache_to_disk_all(_BaseCacheTest): - """ - Test flush_cache_to_disk with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that flush_cache_to_disk with empty func_name flushes all - caches. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - # Run test. - hcacsimp.flush_cache_to_disk("") - # Check outputs. - cache_file1 = hcacsimp._get_cache_file_name("_cached_json_double") - self.assertTrue(os.path.exists(cache_file1)) - # - cache_file2 = hcacsimp._get_cache_file_name("_cached_multi_arg_sum") - self.assertTrue(os.path.exists(cache_file2)) - - -# ############################################################################# -# Test_cache_stats_to_str_all -# ############################################################################# - - -class Test_cache_stats_to_str_all(_BaseCacheTest): - """ - Test cache_stats_to_str with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that cache_stats_to_str with empty func_name returns stats for - all functions. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - # Run test. - result = hcacsimp.cache_stats_to_str("") - # Check outputs. - self.assertIsNotNone(result) - self.assertIn("_cached_json_double", result.index) - self.assertIn("_cached_multi_arg_sum", result.index) - - -# ############################################################################# -# Test_get_cache_func_names_invalid -# ############################################################################# - - -class Test_get_cache_func_names_invalid(_BaseCacheTest): - """ - Test get_cache_func_names with invalid type parameter. - """ - - def test1(self) -> None: - """ - Verify that get_cache_func_names raises ValueError for invalid type. - """ - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hcacsimp.get_cache_func_names("invalid_type") - self.assertIn("Invalid type", str(cm.exception)) - - -# ############################################################################# -# Test__get_cache_file_name -# ############################################################################# - - -class Test__get_cache_file_name(_BaseCacheTest): - """ - Test _get_cache_file_name for invalid cache type. - """ - - def test1(self) -> None: - """ - Verify that _get_cache_file_name raises ValueError for invalid cache - type. - """ - # Prepare inputs. - hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hcacsimp._get_cache_file_name("_cached_json_double") - self.assertIn("Invalid cache type", str(cm.exception)) - # Reset type to valid value for teardown. - hcacsimp.set_cache_property("_cached_json_double", "type", "json") - - -# ############################################################################# -# Test__save_cache_dict_to_disk -# ############################################################################# - - -class Test__save_cache_dict_to_disk(_BaseCacheTest): - """ - Test _save_cache_dict_to_disk for invalid cache type. - """ - - def test1(self) -> None: - """ - Verify that _save_cache_dict_to_disk raises ValueError for invalid - cache type. - """ - # Prepare inputs. - hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") - data = {"key": "value"} - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hcacsimp._save_cache_dict_to_disk("_cached_json_double", data) - self.assertIn("Invalid cache type", str(cm.exception)) - # Reset type to valid value for teardown. - hcacsimp.set_cache_property("_cached_json_double", "type", "json") - - -# ############################################################################# -# Test_get_disk_cache_invalid -# ############################################################################# - - -class Test_get_disk_cache_invalid(_BaseCacheTest): - """ - Test get_disk_cache for invalid cache type. - """ - - def test1(self) -> None: - """ - Verify that get_disk_cache raises ValueError for invalid cache type. - """ - # Prepare inputs. - hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hcacsimp.get_disk_cache("_cached_json_double") - self.assertIn("Invalid cache type", str(cm.exception)) - # Reset type to valid value for teardown. - hcacsimp.set_cache_property("_cached_json_double", "type", "json") - - -@hcacsimp.simple_cache(cache_type="json") -def _cache_mode_function(x: int) -> int: - """ - Test function to verify cache_mode parameter. - - :param x: input integer - :return: x * 5 - """ - _cache_mode_function.call_count += 1 - res = x * 5 - return res - - -_cache_mode_function.call_count = 0 - - -# ############################################################################# -# Test_cache_mode -# ############################################################################# - - -class Test_cache_mode(_BaseCacheTest): - """ - Test cache_mode parameter functionality. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_cache_mode_function", "type", "json") - _cache_mode_function.call_count = 0 - - def tear_down_test(self) -> None: - """ - Teardown operations to run after each test. - """ - super().tear_down_test() - hcacsimp.reset_cache("_cache_mode_function", interactive=False) - - def test1(self) -> None: - """ - Verify that setting force_refresh property forces cache refresh. - """ - # Prepare inputs. - _cache_mode_function(10) - initial_count = _cache_mode_function.call_count - # Set force_refresh property. - hcacsimp.set_cache_property( - "_cache_mode_function", "force_refresh", True - ) - # Run test. - result = _cache_mode_function(10) - # Check outputs. - self.assertEqual(result, 50) - self.assertEqual(_cache_mode_function.call_count, initial_count + 1) - - def test2(self) -> None: - """ - Verify that setting abort_on_cache_miss property aborts on cache miss. - """ - # Prepare inputs. - hcacsimp.set_cache_property( - "_cache_mode_function", "abort_on_cache_miss", True - ) - # Run test and check output. - with self.assertRaises(ValueError) as cm: - _cache_mode_function(99) - self.assertIn("Cache miss", str(cm.exception)) - - def test3(self) -> None: - """ - Verify that calling with different arguments bypasses cache. - """ - # Prepare inputs. - _cache_mode_function(15) - initial_count = _cache_mode_function.call_count - # Run test. - result1 = _cache_mode_function(16) - result2 = _cache_mode_function(17) - # Check outputs. - self.assertEqual(result1, 80) - self.assertEqual(result2, 85) - self.assertEqual(_cache_mode_function.call_count, initial_count + 2) - - -@hcacsimp.simple_cache(cache_type="json") -def _abort_test_function(x: int) -> int: - """ - Test function to verify abort_on_cache_miss parameter. - - :param x: input integer - :return: x * 7 - """ - res = x * 7 - return res - - -# ############################################################################# -# Test_abort_on_cache_miss -# ############################################################################# - - -class Test_abort_on_cache_miss(_BaseCacheTest): - """ - Test abort_on_cache_miss functionality. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_abort_test_function", "type", "json") - - def tear_down_test(self) -> None: - """ - Teardown operations to run after each test. - """ - super().tear_down_test() - hcacsimp.reset_cache("_abort_test_function", interactive=False) - - def test1(self) -> None: - """ - Verify that abort_on_cache_miss=True raises error on cache miss. - """ - # Run test and check output. - with self.assertRaises(ValueError) as cm: - _abort_test_function(100, abort_on_cache_miss=True) - self.assertIn("Cache miss", str(cm.exception)) - - -@hcacsimp.simple_cache(cache_type="json") -def _report_test_function(x: int) -> int: - """ - Test function to verify report_on_cache_miss parameter. - - :param x: input integer - :return: x * 8 - """ - res = x * 8 - return res - - -# ############################################################################# -# Test_report_on_cache_miss -# ############################################################################# - - -class Test_report_on_cache_miss(_BaseCacheTest): - """ - Test report_on_cache_miss functionality. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_report_test_function", "type", "json") - - def tear_down_test(self) -> None: - """ - Teardown operations to run after each test. - """ - super().tear_down_test() - hcacsimp.reset_cache("_report_test_function", interactive=False) - - def test1(self) -> None: - """ - Verify that report_on_cache_miss=True returns '_cache_miss_' on miss. - """ - # Run test. - result = _report_test_function(200, report_on_cache_miss=True) - # Check outputs. - self.assertEqual(result, "_cache_miss_") - - -@hcacsimp.simple_cache(cache_type="json", write_through=True) -def _write_through_function(x: int) -> int: - """ - Test function to verify write_through parameter. - - :param x: input integer - :return: x * 9 - """ - res = x * 9 - return res - - -# ############################################################################# -# Test_write_through -# ############################################################################# - - -class Test_write_through(_BaseCacheTest): - """ - Test write_through functionality for automatic disk caching. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_write_through_function", "type", "json") - - def tear_down_test(self) -> None: - """ - Teardown operations to run after each test. - """ - super().tear_down_test() - hcacsimp.reset_cache("_write_through_function", interactive=False) - - def test1(self) -> None: - """ - Verify that write_through=True automatically writes to disk. - """ - # Run test. - _write_through_function(11) - # Check outputs. - cache_file = hcacsimp._get_cache_file_name("_write_through_function") - self.assertTrue(os.path.exists(cache_file)) - # - disk_cache = hcacsimp._load_func_cache_data_from_file(cache_file, "json") - self.assertIn('{"args": [11], "kwargs": {}}', disk_cache) - self.assertEqual(disk_cache['{"args": [11], "kwargs": {}}'], 99) - - -@hcacsimp.simple_cache(cache_type="json") -def _test_cache_mode_kwarg(x: int, **kwargs) -> int: - """ - Test function that accepts kwargs to test cache_mode parameter. - - :param x: input integer - :param kwargs: additional keyword arguments - :return: x * 3 - """ - _test_cache_mode_kwarg.call_count += 1 - res = x * 3 - return res - - -_test_cache_mode_kwarg.call_count = 0 - - -# ############################################################################# -# Test_cache_mode_parameter -# ############################################################################# - - -class Test_cache_mode_parameter(_BaseCacheTest): - """ - Test cache_mode parameter as a keyword argument. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_test_cache_mode_kwarg", "type", "json") - _test_cache_mode_kwarg.call_count = 0 - - def tear_down_test(self) -> None: - """ - Teardown operations to run after each test. - """ - super().tear_down_test() - hcacsimp.reset_cache("_test_cache_mode_kwarg", interactive=False) - - def test1(self) -> None: - """ - Verify that cache_mode='REFRESH_CACHE' keyword forces refresh. - """ - # Prepare inputs. - _test_cache_mode_kwarg(20) - initial_count = _test_cache_mode_kwarg.call_count - # Run test. - result = _test_cache_mode_kwarg(20, cache_mode="REFRESH_CACHE") - # Check outputs. - self.assertEqual(result, 60) - self.assertEqual(_test_cache_mode_kwarg.call_count, initial_count + 1) - - def test2(self) -> None: - """ - Verify that cache_mode='HIT_CACHE_OR_ABORT' raises error on miss. - """ - # Run test and check output. - with self.assertRaises(ValueError) as cm: - _test_cache_mode_kwarg(88, cache_mode="HIT_CACHE_OR_ABORT") - self.assertIn("Cache miss", str(cm.exception)) - - def test3(self) -> None: - """ - Verify that cache_mode='DISABLE_CACHE' bypasses cache. - """ - # Prepare inputs. - _test_cache_mode_kwarg(30) - initial_count = _test_cache_mode_kwarg.call_count - # Run test. - result1 = _test_cache_mode_kwarg(30, cache_mode="DISABLE_CACHE") - result2 = _test_cache_mode_kwarg(30, cache_mode="DISABLE_CACHE") - # Check outputs. - self.assertEqual(result1, 90) - self.assertEqual(result2, 90) - self.assertEqual(_test_cache_mode_kwarg.call_count, initial_count + 2) - - -# ############################################################################# -# Module-level helpers for new tests. -# ############################################################################# - - -@hcacsimp.simple_cache(cache_type="json") -def _test_intrinsic_func_intrinsic(x: int) -> int: - """ - Return x times 3. Named with `_intrinsic` suffix to test suffix stripping. - - :param x: input integer - :return: x * 3 - """ - res = x * 3 - return res - - -@hcacsimp.simple_cache(cache_type="json", exclude_keys=["session_id"]) -def _test_exclude_keys_func(x: int, *, session_id: str = "") -> int: - """ - Return x times 2, ignoring session_id for caching purposes. - - :param x: input integer - :param session_id: session identifier (excluded from cache key) - :return: x * 2 - """ - res = x * 2 - return res - - -@hcacsimp.simple_cache(cache_type="json", write_through=False) -def _test_no_write_through(x: int) -> int: - """ - Return x plus 1, with write_through disabled. - - :param x: input integer - :return: x + 1 - """ - res = x + 1 - return res - - -# ############################################################################# -# Test_sanity_check_function_cache -# ############################################################################# - - -class Test_sanity_check_function_cache(_BaseCacheTest): - """ - Test sanity_check_function_cache for validating function cache dicts. - """ - - def test1(self) -> None: - """ - Verify that sanity_check_function_cache passes for valid cache data. - """ - # Prepare inputs. - func_cache_data = {'{"args": [1], "kwargs": {}}': 2} - # Run test. - hcacsimp.sanity_check_function_cache(func_cache_data) - # Check outputs (no exception raised). - - def test2(self) -> None: - """ - Verify that sanity_check_function_cache passes for empty dict when - assert_on_empty=False. - """ - # Prepare inputs. - func_cache_data: dict = {} - # Run test. - hcacsimp.sanity_check_function_cache( - func_cache_data, assert_on_empty=False - ) - # Check outputs (no exception raised). - - -# ############################################################################# -# Test_sanity_check_cache -# ############################################################################# - - -class Test_sanity_check_cache(_BaseCacheTest): - """ - Test sanity_check_cache for validating nested cache dicts. - """ - - def test1(self) -> None: - """ - Verify that sanity_check_cache passes for valid nested cache data. - """ - # Prepare inputs. - cache_data = {"my_func": {'{"args": [1], "kwargs": {}}': 42}} - # Run test. - hcacsimp.sanity_check_cache(cache_data) - # Check outputs (no exception raised). - - def test2(self) -> None: - """ - Verify that sanity_check_cache passes for empty dict when - assert_on_empty=False. - """ - # Prepare inputs. - cache_data: dict = {} - # Run test. - hcacsimp.sanity_check_cache(cache_data, assert_on_empty=False) - # Check outputs (no exception raised). - - -# ############################################################################# -# Test_cache_data_to_str -# ############################################################################# - - -class Test_cache_data_to_str(_BaseCacheTest): - """ - Test cache_data_to_str for converting cache data to a string. - """ - - def test1(self) -> None: - """ - Verify that cache_data_to_str returns a string with the function name - and cache key. - """ - # Prepare inputs. - cache_data = {"my_func": {'{"args": [1], "kwargs": {}}': 42}} - # Run test. - result = hcacsimp.cache_data_to_str(cache_data) - # Check outputs. - self.assertIn("my_func", result) - self.assertIn('{"args": [1], "kwargs": {}}', result) - self.assertIn("42", result) - - -# ############################################################################# -# Test_get_cache_property_system -# ############################################################################# - - -class Test_get_cache_property_system(_BaseCacheTest): - """ - Test get_cache_property for system properties on unknown functions. - """ - - def test1(self) -> None: - """ - Verify that get_cache_property returns None for a system property when - the function is not in the cache property dict. - """ - # Run test. - val = hcacsimp.get_cache_property("_nonexistent_func_xyz", "type") - # Check outputs. - self.assertIsNone(val) - - -# ############################################################################# -# Test_set_cache_property_new_func -# ############################################################################# - - -class Test_set_cache_property_new_func(_BaseCacheTest): - """ - Test set_cache_property for a brand new function not yet in cache property. - """ - - def test1(self) -> None: - """ - Verify that set_cache_property creates a new entry for a function that - was not previously registered. - """ - # Run test. - hcacsimp.set_cache_property("_brand_new_func_xyz", "force_refresh", True) - # Check outputs. - val = hcacsimp.get_cache_property("_brand_new_func_xyz", "force_refresh") - self.assertTrue(val) - - -# ############################################################################# -# Test_cache_property_to_str_no_props -# ############################################################################# - - -class Test_cache_property_to_str_no_props(_BaseCacheTest): - """ - Test cache_property_to_str for a function with no properties in the cache. - """ - - def test1(self) -> None: - """ - Verify that cache_property_to_str returns the function name header even - when the function has no registered cache properties. - """ - # Run test with a function name not in _CACHE_PROPERTY. - result = hcacsimp.cache_property_to_str("_nonexistent_func_xyz") - # Check outputs. - self.assertIn("_nonexistent_func_xyz", result) - - -# ############################################################################# -# Test__get_cache_file_name_auto_detect -# ############################################################################# - - -class Test__get_cache_file_name_auto_detect(_BaseCacheTest): - """ - Test _get_cache_file_name when cache type is None (auto-detect from disk). - """ - - def test1(self) -> None: - """ - Verify that _get_cache_file_name infers .pkl extension when a .pkl - file exists on disk. - """ - # Prepare inputs: create a valid .pkl file in the cache dir. - cache_dir = hcacsimp.get_cache_dir() - func_name = "_auto_detect_pkl_func" - pkl_path = os.path.join(cache_dir, f"tmp.cache_simple.{func_name}.pkl") - hcacsimp._save_func_cache_data_to_file(pkl_path, "pickle", {}) - # Run test. - file_name = hcacsimp._get_cache_file_name(func_name) - # Check outputs. - self.assertTrue(file_name.endswith(".pkl")) - - def test2(self) -> None: - """ - Verify that _get_cache_file_name infers .json extension when a .json - file exists on disk. - """ - # Prepare inputs: create a valid .json file in the cache dir. - cache_dir = hcacsimp.get_cache_dir() - func_name = "_auto_detect_json_func" - json_path = os.path.join(cache_dir, f"tmp.cache_simple.{func_name}.json") - hcacsimp._save_func_cache_data_to_file(json_path, "json", {}) - # Run test. - file_name = hcacsimp._get_cache_file_name(func_name) - # Check outputs. - self.assertTrue(file_name.endswith(".json")) - - def test3(self) -> None: - """ - Verify that _get_cache_file_name defaults to .json when no file exists. - """ - # Prepare inputs: use a brand new function name with no disk file. - func_name = "_no_file_func_xyz" - # Run test. - file_name = hcacsimp._get_cache_file_name(func_name) - # Check outputs. - self.assertTrue(file_name.endswith(".json")) - - -# ############################################################################# -# Test__save_func_cache_data_to_file_infer -# ############################################################################# - - -class Test__save_func_cache_data_to_file_infer(_BaseCacheTest): - """ - Test _save_func_cache_data_to_file when cache_type is None (inferred from - file extension). - """ - - def test1(self) -> None: - """ - Verify that _save_func_cache_data_to_file infers pickle format from - .pkl extension when cache_type is None. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - file_name = os.path.join(scratch_dir, "tmp_test_infer.pkl") - data = {'{"args": [1], "kwargs": {}}': 42} - # Run test. - hcacsimp._save_func_cache_data_to_file(file_name, None, data) - # Check outputs. - self.assertTrue(os.path.exists(file_name)) - loaded = hcacsimp._load_func_cache_data_from_file(file_name, "pickle") - self.assertEqual(loaded, data) - - -# ############################################################################# -# Test__load_func_cache_data_from_file_infer -# ############################################################################# - - -class Test__load_func_cache_data_from_file_infer(_BaseCacheTest): - """ - Test _load_func_cache_data_from_file when cache_type is None (inferred - from file extension). - """ - - def test1(self) -> None: - """ - Verify that _load_func_cache_data_from_file infers pickle format from - .pkl extension when cache_type is None. - """ - # Prepare inputs: save a pickle file. - scratch_dir = self.get_scratch_space() - file_name = os.path.join(scratch_dir, "tmp_test_load_infer.pkl") - data = {'{"args": [5], "kwargs": {}}': 25} - hcacsimp._save_func_cache_data_to_file(file_name, "pickle", data) - # Run test with None cache_type (should infer from .pkl). - result = hcacsimp._load_func_cache_data_from_file(file_name, None) - # Check outputs. - self.assertEqual(result, data) - - -# ############################################################################# -# Test_reset_disk_cache_no_file -# ############################################################################# - - -class Test_reset_disk_cache_no_file(_BaseCacheTest): - """ - Test reset_disk_cache when the target function has no disk cache file. - """ - - def test1(self) -> None: - """ - Verify that reset_disk_cache does not raise when the function has no - cache file on disk. - """ - # Prepare inputs: use a function that has never been cached to disk. - func_name = "_cached_json_double" - # Ensure no disk file exists. - hcacsimp.reset_disk_cache(func_name, interactive=False) - cache_file = hcacsimp._get_cache_file_name(func_name) - self.assertFalse(os.path.exists(cache_file)) - # Run test: reset again when no file exists (should not raise). - hcacsimp.reset_disk_cache(func_name, interactive=False) - # Check outputs (no exception raised). - - -# ############################################################################# -# Test_mock_cache -# ############################################################################# - - -class Test_mock_cache(_BaseCacheTest): - """ - Test mock_cache for inserting values directly into the cache. - """ - - def test1(self) -> None: - """ - Verify that mock_cache inserts a value into the function cache that can - be retrieved as a cache hit. - """ - # Prepare inputs. - func_name = "_cached_json_double" - cache_key = '{"args": [99], "kwargs": {}}' - value = 198 - # Run test. - hcacsimp.mock_cache(func_name, cache_key, value) - # Check outputs. - cache = hcacsimp.get_cache(func_name) - self.assertEqual(cache[cache_key], value) - - def test2(self) -> None: - """ - Verify that a mocked cache value causes a cache hit when the decorated - function is called. - """ - # Prepare inputs. - func_name = "_cached_json_double" - cache_key = '{"args": [77], "kwargs": {}}' - value = 154 - # Run test. - hcacsimp.mock_cache(func_name, cache_key, value) - result = _cached_json_double(77, abort_on_cache_miss=True) - # Check outputs. - self.assertEqual(result, value) - - -# ############################################################################# -# Test_mock_cache_from_args_kwargs -# ############################################################################# - - -class Test_mock_cache_from_args_kwargs(_BaseCacheTest): - """ - Test mock_cache_from_args_kwargs for inserting values via args/kwargs. - """ - - def test1(self) -> None: - """ - Verify that mock_cache_from_args_kwargs inserts the correct value into - the cache for the given args and kwargs. - """ - # Prepare inputs. - func_name = "_cached_json_double" - args = (55,) - kwargs: dict = {} - value = 110 - # Run test. - hcacsimp.mock_cache_from_args_kwargs(func_name, args, kwargs, value) - # Check outputs. - expected_key = '{"args": [55], "kwargs": {}}' - cache = hcacsimp.get_cache(func_name) - self.assertEqual(cache[expected_key], value) - - -# ############################################################################# -# Test_mock_cache_from_disk -# ############################################################################# - - -class Test_mock_cache_from_disk(_BaseCacheTest): - """ - Test mock_cache_from_disk for bulk-inserting cache data from a dict. - """ - - def test1(self) -> None: - """ - Verify that mock_cache_from_disk populates the cache from a dict of - pre-computed values. - """ - # Prepare inputs. - func_name = "_cached_json_double" - func_cache_data = { - '{"args": [33], "kwargs": {}}': 66, - '{"args": [44], "kwargs": {}}': 88, - } - # Run test. - hcacsimp.mock_cache_from_disk(func_name, func_cache_data) - # Check outputs. - cache = hcacsimp.get_cache(func_name) - self.assertEqual(cache['{"args": [33], "kwargs": {}}'], 66) - self.assertEqual(cache['{"args": [44], "kwargs": {}}'], 88) - - -# ############################################################################# -# Test_simple_cache_intrinsic -# ############################################################################# - - -class Test_simple_cache_intrinsic(_BaseCacheTest): - """ - Test simple_cache decorator with a function whose name ends in _intrinsic. - """ - - def tear_down_test(self) -> None: - """ - Teardown including reset of the intrinsic function cache. - """ - super().tear_down_test() - hcacsimp.reset_cache("_test_intrinsic_func", interactive=False) - - def test1(self) -> None: - """ - Verify that the _intrinsic suffix is stripped and the cache key uses - the base function name. - """ - # Run test. - result = _test_intrinsic_func_intrinsic(5) - # Check outputs. - self.assertEqual(result, 15) - # Cache should be stored under the base name (without _intrinsic). - cache = hcacsimp.get_cache("_test_intrinsic_func") - self.assertIn('{"args": [5], "kwargs": {}}', cache) - - -# ############################################################################# -# Test_simple_cache_existing_type -# ############################################################################# - - -class Test_simple_cache_existing_type(_BaseCacheTest): - """ - Test that simple_cache preserves a pre-existing cache type setting. - """ - - def test1(self) -> None: - """ - Verify that applying simple_cache with cache_type='json' does not - override an existing 'pickle' type already set for the function. - """ - # Prepare inputs: set the type before decoration. - hcacsimp.set_cache_property("_inline_type_func", "type", "pickle") - - def _inline_type_func(x: int) -> int: - return x - - # Apply decorator with a different cache_type. - hcacsimp.simple_cache(cache_type="json")(_inline_type_func) - # Check outputs: type should remain 'pickle'. - val = hcacsimp.get_cache_property("_inline_type_func", "type") - self.assertEqual(val, "pickle") - - -# ############################################################################# -# Test_simple_cache_exclude_keys -# ############################################################################# - - -class Test_simple_cache_exclude_keys(_BaseCacheTest): - """ - Test simple_cache decorator with exclude_keys parameter. - """ - - def tear_down_test(self) -> None: - """ - Teardown including reset of the exclude_keys test function cache. - """ - super().tear_down_test() - hcacsimp.reset_cache("_test_exclude_keys_func", interactive=False) - - def test1(self) -> None: - """ - Verify that calls with the same primary arg but different excluded - kwargs produce a single cache entry (the excluded key is ignored). - """ - # Run test: two calls with same x but different session_id. - result1 = _test_exclude_keys_func(5, session_id="abc") - result2 = _test_exclude_keys_func(5, session_id="xyz") - # Check outputs. - self.assertEqual(result1, 10) - self.assertEqual(result2, 10) - # Only one cache entry should exist. - cache = hcacsimp.get_cache("_test_exclude_keys_func") - self.assertEqual(len(cache), 1) - - -# ############################################################################# -# Test_simple_cache_no_write_through -# ############################################################################# - - -class Test_simple_cache_no_write_through(_BaseCacheTest): - """ - Test simple_cache decorator with write_through=False. - """ - - def tear_down_test(self) -> None: - """ - Teardown including reset of the no-write-through test function cache. - """ - super().tear_down_test() - hcacsimp.reset_cache("_test_no_write_through", interactive=False) - - def test1(self) -> None: - """ - Verify that with write_through=False the computed value is not - automatically persisted to disk after a function call. - """ - # Run test. - result = _test_no_write_through(7) - self.assertEqual(result, 8) - # Reset memory cache so that reading goes to disk. - hcacsimp.reset_mem_cache("_test_no_write_through") - # Check outputs: disk cache should not contain the computed value. - disk_cache = hcacsimp.get_disk_cache("_test_no_write_through") - self.assertNotIn('{"args": [7], "kwargs": {}}', disk_cache) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py deleted file mode 100644 index 4ab1219a4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py +++ /dev/null @@ -1,335 +0,0 @@ -import logging -import os -from typing import Any, List - -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.hcfile as hcfile - -_LOG = logging.getLogger(__name__) - - -def _create_test_file(self_: Any, filename: str, content: str) -> str: - """ - Create a test file with given content in the scratch directory. - - :param scratch_dir: Directory to create file in - :param filename: Name of file to create - :param content: Content to write to file - :return: Full path to created file - """ - scratch_dir = self_.get_scratch_space() - file_path = os.path.join(scratch_dir, filename) - content = hprint.dedent(content) - hio.to_file(file_path, content) - return file_path - - -def _create_cfile(self_: Any, cfile_content: List[str]) -> str: - """ - Create a cfile with TODOs in the scratch directory. - - :param scratch_dir: Directory to create file in - :param cfile_content: List of TODO lines to write - :return: Full path to created cfile - """ - content = "\n".join(cfile_content) - return _create_test_file(self_, "cfile.txt", content) - - -# ############################################################################# -# Test_parse_cfile1 -# ############################################################################# - - -class Test_parse_cfile1(hunitest.TestCase): - def helper(self, cfile_content: str, expected: str) -> None: - """ - Helper function to test parsing a cfile. - - :param cfile_content: Content to write to the test cfile - :param expected: Expected output from parse_cfile - """ - # Prepare inputs. - cfile_path = _create_test_file(self, "cfile.txt", cfile_content) - # Run function under test. - actual = hcfile.parse_cfile(cfile_path) - actual = "\n".join(map(str, actual)) - # Check output. - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test parsing a cfile with valid entries. - """ - cfile_content = r""" - file1.py:10: Add docstring - file2.py:20: Add type hints - file3.py:30: Fix formatting - """ - expected = r""" - ('file1.py', '10', ' Add docstring') - ('file2.py', '20', ' Add type hints') - ('file3.py', '30', ' Fix formatting') - """ - self.helper(cfile_content, expected) - - def test2(self) -> None: - """ - Test parsing a cfile with valid entries. - """ - cfile_content = r""" - dev_scripts_helpers/llms/llm_transform.py:63:33: F821 undefined name '_extract_bullet_points' [flake8] - dev_scripts_helpers/llms/llm_cli.py:23: [C0301(line-too-long), ] Line too long (109/100) [pylint] - helpers/hio.py: 'pandas' is imported multiple times [normalize_imports] - helpers/hmarkdown.py:770:38: W605 invalid escape sequence '\S' [flake8] - """ - expected = r""" - ('dev_scripts_helpers/llms/llm_transform.py', '63', "33: F821 undefined name '_extract_bullet_points' [flake8]") - ('dev_scripts_helpers/llms/llm_cli.py', '23', ' [C0301(line-too-long), ] Line too long (109/100) [pylint]') - ('helpers/hmarkdown.py', '770', "38: W605 invalid escape sequence '\\S' [flake8]") - """ - self.helper(cfile_content, expected) - - def test_empty_file(self) -> None: - """ - Test parsing an empty cfile. - """ - self.helper("", "") - - def test_invalid_entries(self) -> None: - """ - Test parsing a cfile with invalid entries that should be skipped. - """ - cfile_content = r""" - file1.py:10: Valid entry - Invalid line without proper format - file2.py:20: Another valid entry - :30: Missing filename - file3.py:: Missing line number - """ - expected = r""" - ('file1.py', '10', ' Valid entry') - ('file2.py', '20', ' Another valid entry') - (' ', '30', ' Missing filename') - """ - self.helper(cfile_content, expected) - - -# ############################################################################# -# Test_inject_todos_from_cfile1 -# ############################################################################# - - -class Test_inject_todos_from_cfile1(hunitest.TestCase): - def _inject_todos(self, cfile_content: str) -> None: - """ - Helper to inject TODOs with standard parameters. - """ - todo_user = "user" - comment_prefix = "#" - hcfile.inject_todos_from_cfile(cfile_content, todo_user, comment_prefix) - - def test1(self) -> None: - """ - Test injecting TODOs from a cfile into a Python file. - """ - # Create a test file. - test_file_content = """ - def hello(msg): - print(msg) - - def world(): - print("world") - """ - file_path = _create_test_file(self, "test.py", test_file_content) - # Create cfile with TODOs. - cfile_content = [ - f"{file_path}:1: Add type hints.", - f"{file_path}:4: Add docstring.", - ] - _create_cfile(self, cfile_content) - # Run the function under test. - self._inject_todos("\n".join(cfile_content)) - # Check output. - actual = hio.from_file(file_path) - expected = """ - # TODO(user): Add type hints. - def hello(msg): - print(msg) - - # TODO(user): Add docstring. - def world(): - print("world") - """ - self.assert_equal(actual, expected, dedent=True) - - def test_one_line_file(self) -> None: - """ - Test injecting TODOs into an empty file. - """ - # Create an empty test file - test_file_content = """ - print("hello") - """ - file_path = _create_test_file(self, "empty.py", test_file_content) - # Create cfile with TODOs - cfile_content = [f"{file_path}:1: Add content to empty file."] - _create_cfile(self, cfile_content) - # Run the function under test - self._inject_todos("\n".join(cfile_content)) - # Check output - actual = hio.from_file(file_path) - expected = """ - # TODO(user): Add content to empty file. - print("hello") - """ - self.assert_equal(actual, expected, dedent=True) - - def test_invalid_line_numbers(self) -> None: - """ - Test handling of TODOs with invalid line numbers. - """ - # Create a test file - test_file_content = """ - line1 - line2 - """ - file_path = _create_test_file(self, "test.py", test_file_content) - # Create cfile with invalid line numbers - cfile_content = [ - f"{file_path}:999: This line number doesn't exist.", - ] - _create_cfile(self, cfile_content) - # This should raise an assertion error due to invalid line numbers - with self.assertRaises(AssertionError) as err: - self._inject_todos("\n".join(cfile_content)) - # Check output. - expected = """ - ################################################################################ - * Failed assertion * - 998 < 2 - ################################################################################ - """ - self.assert_equal( - str(err.exception), expected, dedent=True, fuzzy_match=True - ) - - def test2(self) -> None: - """ - Test injecting TODOs from a cfile into a Python file with a complex - class. - """ - # Create a test file. - test_file_content = """ - import logging - from typing import List, Optional - - class DataProcessor: - def __init__(self): - self.logger = logging.getLogger(__name__) - self.data = [] - - def process_batch(self, items): - for item in items: - self.data.append(self._transform(item)) - - def _transform(self, item): - return item.upper() - - def get_results(self): - return self.data - - def clear(self): - self.data = [] - """ - file_path = _create_test_file(self, "test.py", test_file_content) - # Create cfile with TODOs. - cfile_content = [ - f"{file_path}:4: Add class docstring explaining purpose and usage", - f"{file_path}:5: Add type hints for instance variables", - f"{file_path}:9: Add type hints for items parameter", - f"{file_path}:10: Consider adding batch size validation", - f"{file_path}:13: Add error handling for non-string inputs", - f"{file_path}:16: Add return type hint and docstring", - f"{file_path}:19: Add docstring explaining clear behavior", - ] - _create_cfile(self, cfile_content) - # Run function under test. - self._inject_todos("\n".join(cfile_content)) - # Check output. - actual = hio.from_file(file_path) - expected = """ - import logging - from typing import List, Optional - - # TODO(user): Add class docstring explaining purpose and usage - class DataProcessor: - # TODO(user): Add type hints for instance variables - def __init__(self): - self.logger = logging.getLogger(__name__) - self.data = [] - - # TODO(user): Add type hints for items parameter - def process_batch(self, items): - # TODO(user): Consider adding batch size validation - for item in items: - self.data.append(self._transform(item)) - - # TODO(user): Add error handling for non-string inputs - def _transform(self, item): - return item.upper() - - # TODO(user): Add return type hint and docstring - def get_results(self): - return self.data - - # TODO(user): Add docstring explaining clear behavior - def clear(self): - self.data = [] - """ - self.assert_equal(actual, expected, dedent=True) - - def test3(self) -> None: - """ - Test injecting TODOs from a cfile into multiple Python files. - """ - # Create first test file. - test_file1_content = """ - def foo(): - pass - """ - file_path1 = _create_test_file(self, "test1.py", test_file1_content) - # Create second test file. - test_file2_content = """ - def bar(): - return None - """ - file_path2 = _create_test_file(self, "test2.py", test_file2_content) - # Create cfile. - cfile_content = [ - f"{file_path1}:1: Add docstring for foo.", - f"{file_path2}:1: Add docstring for bar.", - f"{file_path2}:2: Add type hint for return.", - ] - _create_cfile(self, cfile_content) - # Run function under test. - self._inject_todos("\n".join(cfile_content)) - # Check output. - actual1 = hio.from_file(file_path1) - expected1 = """ - # TODO(user): Add docstring for foo. - def foo(): - pass - """ - self.assert_equal(actual1, expected1, dedent=True) - # - actual2 = hio.from_file(file_path2) - expected2 = """ - # TODO(user): Add docstring for bar. - def bar(): - # TODO(user): Add type hint for return. - return None - """ - self.assert_equal(actual2, expected2, dedent=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py deleted file mode 100644 index d8f2c19e2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py +++ /dev/null @@ -1,81 +0,0 @@ -import logging -import os - -import pandas as pd - -import helpers.hcsv as hcsv -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_convert_csv_to_dict -# ############################################################################# - - -class Test_convert_csv_to_dict(hunitest.TestCase): - def test1(self) -> None: - dir_name = self.get_input_dir() - test_csv_path = os.path.join(dir_name, "test.csv") - actual_result = hcsv.convert_csv_to_dict(test_csv_path, remove_nans=True) - expected_result = { - "col1": ["a", "b", "c", "d"], - "col2": ["a", "b"], - "col3": ["a", "b", "c"], - } - self.assertEqual(actual_result, expected_result) - - -# ############################################################################# -# Test_from_typed_csv -# ############################################################################# - - -class Test_from_typed_csv(hunitest.TestCase): - """ - Check the opportunity to load correctly. - - .csv file with dtype param, which exist in .types prefix file. And - finally it checks that dtypes of loaded dataframe didn't change - compared with the original one. - """ - - def test1(self) -> None: - dir_name = self.get_input_dir() - test_csv_path = os.path.join(dir_name, "test.csv") - os.path.join(dir_name, "test.csv.types") - actual_result = ( - hcsv.from_typed_csv(test_csv_path) - .dtypes.apply(lambda x: x.name) - .to_dict() - ) - expected_result = { - "A": "int64", - "B": "float64", - "C": "object", - "D": "object", - "E": "int64", - } - self.assertEqual(actual_result, expected_result) - - -# ############################################################################# -# Test_to_typed_csv -# ############################################################################# - - -class Test_to_typed_csv(hunitest.TestCase): - """ - Check whether the function 'to_typed_csv' create file with '.types' prefix - or not. - """ - - def test1(self) -> None: - dir_name = self.get_input_dir() - test_csv_path = os.path.join(dir_name, "test.csv") - test_csv_types_path = os.path.join(dir_name, "test.csv.types") - df = pd.read_csv(test_csv_path) - hcsv.to_typed_csv(df, test_csv_path) - self.assertTrue(os.path.exists(test_csv_types_path)) - os.remove(test_csv_types_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py deleted file mode 100644 index aaa5c0c9e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py +++ /dev/null @@ -1,299 +0,0 @@ -""" -Import as: - -import helpers.test.test_dataframe as httdat -""" - -import collections -import logging -import os - -import numpy as np -import pandas as pd - -import helpers.hdataframe as hdatafr -import helpers.hpandas as hpandas -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_filter_data_by_values1 -# ############################################################################# - - -class Test_filter_data_by_values1(hunitest.TestCase): - def test_conjunction1(self) -> None: - data = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - data = data.add_prefix("col_") - filters = {"col_0": (1, 12), "col_1": (2, 11), "col_2": (3, 6)} - info: collections.OrderedDict = collections.OrderedDict() - filtered_data = hdatafr.filter_data_by_values(data, filters, "and", info) - # TODO(gp): Factor out the common code. - str_output = ( - f"{hprint.frame('data')}\n" - f"{hpandas.df_to_str(data)}\n" - f"{hprint.frame('filters')}\n{filters}\n" - f"{hprint.frame('filtered_data')}\n" - f"{hpandas.df_to_str(filtered_data)}\n" - f"{hunitest.convert_info_to_string(info)}" - ) - self.check_string(str_output) - - def test_disjunction1(self) -> None: - data = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - data = data.add_prefix("col_") - filters = {"col_0": (1, 12), "col_1": (2, 11), "col_2": (3, 6)} - info: collections.OrderedDict = collections.OrderedDict() - filtered_data = hdatafr.filter_data_by_values(data, filters, "or", info) - str_output = ( - f"{hprint.frame('data')}\n" - f"{hpandas.df_to_str(data)}\n" - f"{hprint.frame('filters')}\n{filters}\n" - f"{hprint.frame('filtered_data')}" - f"\n{hpandas.df_to_str(filtered_data)}\n" - f"{hunitest.convert_info_to_string(info)}" - ) - self.check_string(str_output) - - -# ############################################################################# -# Test_filter_data_by_comparison -# ############################################################################# - - -class Test_filter_data_by_comparison(hunitest.TestCase): - def test_conjunction1(self) -> None: - data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) - data = data.add_prefix("col_") - filters = {"col_0": (("gt", 1), ("lt", 7)), "col_1": ("eq", 5)} - info: collections.OrderedDict = collections.OrderedDict() - filtered_data = hdatafr.filter_data_by_comparison( - data, filters, "and", info - ) - str_output = ( - f"{hprint.frame('data')}\n" - f"{hpandas.df_to_str(data)}\n" - f"{hprint.frame('filters')}\n{filters}\n" - f"{hprint.frame('filtered_data')}\n" - f"{hpandas.df_to_str(filtered_data)}\n" - f"{hunitest.convert_info_to_string(info)}" - ) - self.check_string(str_output) - - def test_disjunction1(self) -> None: - data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) - data = data.add_prefix("col_") - filters = {"col_0": ("gt", 2), "col_1": ("eq", 5)} - info: collections.OrderedDict = collections.OrderedDict() - filtered_data = hdatafr.filter_data_by_comparison( - data, filters, "or", info - ) - str_output = ( - f"{hprint.frame('data')}\n" - f"{hpandas.df_to_str(data)}\n" - f"{hprint.frame('filters')}\n{filters}\n" - f"{hprint.frame('filtered_data')}" - f"\n{hpandas.df_to_str(filtered_data)}\n" - f"{hunitest.convert_info_to_string(info)}" - ) - self.check_string(str_output) - - -# ############################################################################# -# TestFilterDataByMethod -# ############################################################################# - - -class TestFilterDataByMethod(hunitest.TestCase): - """ - Test was generated automatically with Playback. - """ - - def test1(self) -> None: - # Define input variables. - input_path = os.path.join(self.get_input_dir(), "test.txt") - data = pd.read_csv(input_path, index_col=0) - filters = { - "Frequency": {"isin": {"values": ["Monthly", "Weekly", "Daily"]}}, - "source_code": {"isin": {"values": ["WIND"]}}, - "is_downloaded": {"isin": {"values": ["success"]}}, - } - mode = "and" - info: collections.OrderedDict = collections.OrderedDict() - # Call function to test. - actual = hdatafr.filter_data_by_method( - df=data, filters=filters, mode=mode, info=info - ) - actual = hpandas.df_to_str(actual, precision=3) - # Check output. - self.check_string(actual, fuzzy_match=True) - - -# ############################################################################# -# Test_apply_nan_mode -# ############################################################################# - - -class Test_apply_nan_mode(hunitest.TestCase): - @staticmethod - def _get_series_with_nans(seed: int) -> pd.Series: - date_range = {"start": "1/1/2010", "periods": 40, "freq": "M"} - series = hpandas.get_random_df( - num_cols=1, - seed=seed, - date_range_kwargs=date_range, - )[0] - series[:3] = np.nan - series[-3:] = np.nan - series[5:7] = np.nan - return series - - def test1(self) -> None: - """ - Test for `mode=leave_unchanged`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series) - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - def test2(self) -> None: - """ - Test for `mode="drop"`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series, mode="drop") - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - def test3(self) -> None: - """ - Test for `mode="ffill"`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series, mode="ffill") - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - def test4(self) -> None: - """ - Test for `mode="ffill_and_drop_leading"`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series, mode="ffill_and_drop_leading") - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - def test5(self) -> None: - """ - Test for `mode="fill_with_zero"`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series, mode="fill_with_zero") - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - # Smoke test for empty input. - def test6(self) -> None: - series = pd.Series(dtype="float64") - hdatafr.apply_nan_mode(series) - - -# ############################################################################# -# Test_compute_points_per_year_for_given_freq -# ############################################################################# - - -class Test_compute_points_per_year_for_given_freq(hunitest.TestCase): - def test1(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("T") - np.testing.assert_equal(actual, 525780.125) - - def test2(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("B") - np.testing.assert_equal(actual, 260.875) - - def test3(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("D") - np.testing.assert_equal(actual, 365.25) - - def test4(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("W") - np.testing.assert_equal(actual, 52.25) - - def test5(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("M") - np.testing.assert_equal(actual, 12.0) - - def test6(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("Y") - np.testing.assert_equal(actual, 1.0) - - def test7(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("0D") - np.testing.assert_equal(actual, 0.0) - - -# ############################################################################# -# TestRemoveDuplicates -# ############################################################################# - - -class TestRemoveDuplicates(hunitest.TestCase): - def test_remove_duplicates1(self) -> None: - test_data = { - "dummy_value_1": [1, 2, 1], - "dummy_value_2": ["A", "A", "A"], - "knowledge_timestamp": [3, 2, 1], - "end_download_timestamp": [3, 2, 1], - } - df = pd.DataFrame(data=test_data) - duplicate_columns = ["dummy_value_1", "dummy_value_2"] - control_column = None - actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) - actual = hpandas.df_to_str(actual) - expected = r""" - dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp - 0 1 A 3 3 - 1 2 A 2 2""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_remove_duplicates2(self) -> None: - test_data = { - "dummy_value_1": [1, 2, 1], - "dummy_value_2": ["A", "A", "A"], - "knowledge_timestamp": [3, 2, 1], - "end_download_timestamp": [3, 2, 1], - } - df = pd.DataFrame(data=test_data) - duplicate_columns = None - control_column = "knowledge_timestamp" - actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) - actual = hpandas.df_to_str(actual) - expected = r""" - dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp - 0 1 A 3 3 - 1 2 A 2 2 - 2 1 A 1 1""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_remove_duplicates3(self) -> None: - test_data = { - "dummy_value_1": [1, 2, 1], - "dummy_value_2": ["A", "A", "A"], - "knowledge_timestamp": [3, 2, 1], - "end_download_timestamp": [3, 2, 1], - } - df = pd.DataFrame(data=test_data) - duplicate_columns = ["dummy_value_1", "dummy_value_2"] - control_column = "knowledge_timestamp" - actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) - actual = hpandas.df_to_str(actual) - expected = r""" - dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp - 1 2 A 2 2 - 2 1 A 1 1""" - self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py deleted file mode 100644 index fac073570..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py +++ /dev/null @@ -1,932 +0,0 @@ -import datetime -import logging - -import pandas as pd -import pytz - -import helpers.hdatetime as hdateti -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - -_STR_TS_NAIVE = "2021-01-04 09:30:00" -_STR_TS_UTC = "2021-01-04 09:30:00-00:00" -_STR_TS_ET = "2021-01-04 09:30:00-05:00" - -_PD_TS_NAIVE = pd.Timestamp("2021-01-04 09:30:00") -_PD_TS_UTC = pd.Timestamp("2021-01-04 09:30:00-00:00", tz="UTC") -_PD_TS_ET = pd.Timestamp("2021-01-04 09:30:00-05:00", tz="America/New_York") - -_DT_DT_NAIVE = datetime.datetime(2021, 1, 4, 9, 30, 0) -_DT_DT_UTC = pytz.timezone("UTC").localize(_DT_DT_NAIVE) -_DT_DT_ET = pytz.timezone("America/New_York").localize(_DT_DT_NAIVE) - - -# ############################################################################# -# Test_dassert_is_datetime1 -# ############################################################################# - - -class Test_dassert_is_datetime1(hunitest.TestCase): - def test_is_datetime1(self) -> None: - """ - Test valid datetime objects. - """ - objs = [ - _STR_TS_NAIVE, - _STR_TS_UTC, - _STR_TS_ET, - _PD_TS_NAIVE, - _PD_TS_UTC, - _PD_TS_ET, - _DT_DT_NAIVE, - _DT_DT_UTC, - _DT_DT_ET, - ] - for obj in objs: - _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) - hdateti.dassert_is_datetime(obj) - - def test_is_datetime_fail1(self) -> None: - """ - Test invalid datetime objects. - """ - objs = [0, 0.0] - for obj in objs: - _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) - with self.assertRaises(AssertionError): - hdateti.dassert_is_datetime(obj) - - def test_is_strict_datetime1(self) -> None: - """ - Test valid datetime objects. - """ - objs = [ - _PD_TS_NAIVE, - _PD_TS_UTC, - _PD_TS_ET, - _DT_DT_NAIVE, - _DT_DT_UTC, - _DT_DT_ET, - ] - for obj in objs: - _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) - hdateti.dassert_is_strict_datetime(obj) - - def test_is_strict_datetime_fail1(self) -> None: - """ - Test invalid datetime objects. - """ - objs = [0, _STR_TS_NAIVE, _STR_TS_UTC, _STR_TS_ET, "hello"] - for obj in objs: - _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) - with self.assertRaises(AssertionError): - hdateti.dassert_is_strict_datetime(obj) - - -# ############################################################################# -# Test_dassert_tz1 -# ############################################################################# - - -class Test_dassert_tz1(hunitest.TestCase): - def test_datetime_conversions(self) -> None: - # Get a tz-naive datetime. - dt = datetime.datetime(2020, 1, 5, 9, 30, 0) - hdateti.dassert_is_tz_naive(dt) - # Localize it to UTC. - dt_utc = pytz.timezone("UTC").localize(dt) - hdateti.dassert_has_tz(dt_utc) - hdateti.dassert_has_UTC_tz(dt_utc) - # Convert to ET. - dt_et = dt_utc.astimezone(pytz.timezone("US/Eastern")) - hdateti.dassert_has_tz(dt_et) - hdateti.dassert_has_ET_tz(dt_et) - # Convert it back to UTC. - dt_utc2 = dt_et.astimezone(pytz.timezone("UTC")) - hdateti.dassert_has_tz(dt_utc2) - hdateti.dassert_has_UTC_tz(dt_utc2) - self.assertEqual(dt_utc, dt_utc2) - # Make it naive. - dt2 = dt_utc2.replace(tzinfo=None) - hdateti.dassert_is_tz_naive(dt2) - self.assertEqual(dt, dt2) - - def test_dassert_is_datetime1(self) -> None: - for obj in [ - _STR_TS_NAIVE, - _STR_TS_UTC, - _STR_TS_ET, - _PD_TS_NAIVE, - _PD_TS_UTC, - _PD_TS_ET, - _DT_DT_NAIVE, - _DT_DT_UTC, - _DT_DT_ET, - ]: - hdateti.dassert_is_datetime(obj) - - def test_dassert_is_datetime_assert1(self) -> None: - datetime_ = 5 - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_is_datetime(datetime_) - actual = str(cm.exception) - # pylint: disable=line-too-long - expected = r""" - * Failed assertion * - Instance of '5' is '' instead of '(, , )' - datetime_='5' of type '' is not a DateTimeType - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_to_datetime1(self) -> None: - """ - Apply `to_datetime` to a naive datetime. - """ - for obj in [ - _STR_TS_NAIVE, - _PD_TS_NAIVE, - _DT_DT_NAIVE, - ]: - _LOG.debug("obj='%s' type='%s'", obj, type(obj)) - actual = hdateti.to_datetime(obj) - expected = _DT_DT_NAIVE - self.assertEqual(actual, expected) - # Check the tz info. - hdateti.dassert_is_tz_naive(actual) - with self.assertRaises(AssertionError): - hdateti.dassert_has_tz(actual) - hdateti.dassert_has_UTC_tz(actual) - hdateti.dassert_has_ET_tz(actual) - - def test_to_datetime2(self) -> None: - """ - Apply `to_datetime` to a UTC datetime. - """ - for obj in [ - _STR_TS_UTC, - _PD_TS_UTC, - _DT_DT_UTC, - ]: - _LOG.debug("obj='%s' type='%s'", obj, type(obj)) - actual = hdateti.to_datetime(obj) - expected = _DT_DT_UTC - self.assertEqual(actual, expected) - # Check the tz info. - hdateti.dassert_has_tz(actual) - hdateti.dassert_has_UTC_tz(actual) - with self.assertRaises(AssertionError): - hdateti.dassert_is_tz_naive(actual) - hdateti.dassert_has_ET_tz(actual) - - def test_to_datetime3(self) -> None: - """ - Apply `to_datetime` to an ET datetime. - """ - for obj in [ - _STR_TS_ET, - _PD_TS_ET, - _DT_DT_ET, - ]: - _LOG.debug("obj='%s' type='%s'", obj, type(obj)) - actual = hdateti.to_datetime(obj) - expected = _DT_DT_ET - self.assertEqual(str(actual), str(expected)) - - -# ############################################################################# -# Test_dassert_tz_compatible1 -# ############################################################################# - - -class Test_dassert_tz_compatible1(hunitest.TestCase): - def test_dassert_compatible_timestamp1(self) -> None: - """ - Both datetimes are naive. - """ - for datetime1 in [_PD_TS_NAIVE, _DT_DT_NAIVE]: - for datetime2 in [_PD_TS_NAIVE, _DT_DT_NAIVE]: - hdateti.dassert_tz_compatible(datetime1, datetime2) - - def test_dassert_compatible_timestamp2(self) -> None: - """ - Both datetimes have tz info. - """ - for datetime1 in [_PD_TS_UTC, _PD_TS_ET]: - for datetime2 in [_DT_DT_UTC, _DT_DT_ET]: - hdateti.dassert_tz_compatible(datetime1, datetime2) - - def test_dassert_compatible_timestamp_assert1(self) -> None: - """ - Test a single not compatible pair of datetimes and check the raised - exception. - """ - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_tz_compatible(_PD_TS_NAIVE, _DT_DT_UTC) - actual = str(cm.exception) - # pylint: disable=line-too-long - expected = """ - * Failed assertion * - 'False' - == - 'True' - datetime1='2021-01-04 09:30:00' and datetime2='2021-01-04 09:30:00+00:00' are not compatible - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_dassert_compatible_timestamp_assert2(self) -> None: - """ - Test a pairs of non-compatible datetimes making sure the assertion is - raised. - """ - for datetime1 in [ - _PD_TS_NAIVE, - _DT_DT_NAIVE, - _PD_TS_NAIVE, - _DT_DT_NAIVE, - ]: - for datetime2 in [_PD_TS_UTC, _PD_TS_ET, _DT_DT_UTC, _DT_DT_ET]: - with self.assertRaises(AssertionError): - hdateti.dassert_tz_compatible(datetime1, datetime2) - - -# ############################################################################# -# Test_dassert_have_same_tz1 -# ############################################################################# - - -class Test_dassert_have_same_tz1(hunitest.TestCase): - """ - Test an assertion that checks that timezones are equal for input - timestamps. - """ - - def test1(self) -> None: - """ - Timezones are equal. - """ - hdateti.dassert_have_same_tz(_DT_DT_ET, _PD_TS_ET) - - def test2(self) -> None: - """ - Both timestamps are tz-naive. - """ - hdateti.dassert_have_same_tz(_PD_TS_NAIVE, _DT_DT_NAIVE) - - def test3(self) -> None: - """ - Different timezones. - """ - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_have_same_tz(_DT_DT_ET, _DT_DT_UTC) - actual = str(cm.exception) - # pylint: disable=line-too-long - expected = """ - * Failed assertion * - 'America/New_York' - == - 'UTC' - datetime1=2021-01-04 09:30:00-05:00 (datetime1.tzinfo=America/New_York) datetime2=2021-01-04 09:30:00+00:00 (datetime2.tzinfo=UTC) - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Same timezone but different DST mode (i.e. EST vs EDT). - """ - ts_est = pd.Timestamp("2023-03-12 01:55:00-05:00", tz="America/New_York") - ts_edt = pd.Timestamp("2023-03-12 03:00:00-04:00", tz="America/New_York") - hdateti.dassert_have_same_tz(ts_est, ts_edt) - - -# ############################################################################# -# Test_get_current_time1 -# ############################################################################# - - -class Test_get_current_time1(hunitest.TestCase): - def test_get_current_time_UTC(self) -> None: - tz = "UTC" - dt = hdateti.get_current_time(tz) - _LOG.debug("tz=%s -> dt=%s", tz, dt) - hdateti.dassert_has_UTC_tz(dt) - - def test_get_current_time_ET(self) -> None: - tz = "ET" - dt = hdateti.get_current_time(tz) - _LOG.debug("tz=%s -> dt=%s", tz, dt) - hdateti.dassert_has_ET_tz(dt) - - def test_get_current_time_naive_UTC(self) -> None: - tz = "naive_UTC" - dt = hdateti.get_current_time(tz) - _LOG.debug("tz=%s -> dt=%s", tz, dt) - hdateti.dassert_is_tz_naive(dt) - - def test_get_current_time_naive_ET(self) -> None: - tz = "naive_ET" - dt = hdateti.get_current_time(tz) - _LOG.debug("tz=%s -> dt=%s", tz, dt) - hdateti.dassert_is_tz_naive(dt) - - -# ############################################################################# -# Test_to_generalized_datetime -# ############################################################################# - - -class Test_to_generalized_datetime(hunitest.TestCase): - def test_srs1(self) -> None: - srs = pd.Series(["2010-01-01", "2010-01-02"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_index1(self) -> None: - idx = pd.Index(["2010-01-01", "2010-01-02"]) - actual = hdateti.to_generalized_datetime(idx) - expected = pd.Index( - [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] - ) - pd.testing.assert_index_equal(actual, expected) - - def test_daily1(self) -> None: - srs = pd.Series(["1 Jan 2010", "2 Jan 2010"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_weekly1(self) -> None: - srs = pd.Series(["2021-W14", "2021-W15"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-04-10"), pd.Timestamp("2021-04-17")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_semiannual1(self) -> None: - srs = pd.Series(["2021-S1", "2021-S2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-06-30"), pd.Timestamp("2021-12-31")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_semiannual2(self) -> None: - srs = pd.Series(["2021/S1", "2021/S2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-06-30"), pd.Timestamp("2021-12-31")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_bimonthly1(self) -> None: - srs = pd.Series(["2021-B1", "2021-B2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-01-01"), pd.Timestamp("2021-03-01")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly1(self) -> None: - srs = pd.Series(["2020-M1", "2020-M2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly2(self) -> None: - srs = pd.Series(["2020M01", "2020M02"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly3(self) -> None: - srs = pd.Series(["2020-01", "2020-02"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly4(self) -> None: - srs = pd.Series(["2020 Jan", "2020 Feb"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly5(self) -> None: - srs = pd.Series(["January 2020", "February 2020"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_quarterly1(self) -> None: - srs = pd.Series(["2020-Q1", "2020-Q2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_quarterly2(self) -> None: - srs = pd.Series(["2020Q1", "2020Q2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_quarterly3(self) -> None: - srs = pd.Series(["Q1 2020", "Q2 2020"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_annual1(self) -> None: - srs = pd.Series(["2021", "2022"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-12-31"), pd.Timestamp("2022-12-31")] - ) - pd.testing.assert_series_equal(actual, expected) - - -# ############################################################################# -# Test_find_bar_timestamp1 -# ############################################################################# - - -class Test_find_bar_timestamp1(hunitest.TestCase): - """ - Use mode="round". - """ - - def helper1(self, current_timestamp: pd.Timestamp) -> None: - bar_duration_in_secs = 15 * 60 - max_distance_in_secs = 10 - actual = hdateti.find_bar_timestamp( - current_timestamp, - bar_duration_in_secs, - max_distance_in_secs=max_distance_in_secs, - ) - expected = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test1(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") - self.helper1(current_timestamp) - - def test2(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T08:00:05", tz="UTC") - self.helper1(current_timestamp) - - def test3(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T07:59:55", tz="UTC") - self.helper1(current_timestamp) - - def test4(self) -> None: - current_timestamp = pd.Timestamp( - "2021-09-09 08:01:59.500000+0000", tz="UTC" - ) - bar_duration_in_secs = 1 - # - actual = hdateti.find_bar_timestamp( - current_timestamp, bar_duration_in_secs, mode="round" - ) - expected = pd.Timestamp("2021-09-09T08:02:00+0000", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - # /////////////////////////////////////////////////////////////////////////// - - def test5(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T07:59:20", tz="UTC") - with self.assertRaises(AssertionError) as cm: - self.helper1(current_timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 40 <= 10 - current_timestamp=2021-09-09 07:59:20+00:00 is too distant from bar_timestamp=2021-09-09 08:00:00+00:00 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test6(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T08:10:20", tz="UTC") - with self.assertRaises(AssertionError) as cm: - self.helper1(current_timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 280 <= 10 - current_timestamp=2021-09-09 08:10:20+00:00 is too distant from bar_timestamp=2021-09-09 08:15:00+00:00 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_find_bar_timestamp2 -# ############################################################################# - - -class Test_find_bar_timestamp2(hunitest.TestCase): - """ - Use mode="floor". - """ - - def test1(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T07:59:55", tz="UTC") - bar_duration_in_secs = 15 * 60 - # - actual = hdateti.find_bar_timestamp( - current_timestamp, bar_duration_in_secs, mode="floor" - ) - expected = pd.Timestamp("2021-09-09T07:45:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test2(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T08:01:55", tz="UTC") - bar_duration_in_secs = 15 * 60 - # - actual = hdateti.find_bar_timestamp( - current_timestamp, bar_duration_in_secs, mode="floor" - ) - expected = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test3(self) -> None: - current_timestamp = pd.Timestamp( - "2021-09-09 08:01:59.500000+0000", tz="UTC" - ) - bar_duration_in_secs = 1 - # - actual = hdateti.find_bar_timestamp( - current_timestamp, bar_duration_in_secs, mode="floor" - ) - expected = pd.Timestamp("2021-09-09T08:01:59+0000", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# -# Test_convert_seconds_to_minutes -# ############################################################################# - - -class Test_convert_seconds_to_minutes(hunitest.TestCase): - def test1(self) -> None: - """ - Check that conversion is implemented correcty. - """ - num_secs = 300 - actual = hdateti.convert_seconds_to_minutes(num_secs) - expected = int(num_secs / 60) - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - Check that an error is raised when input is not an integer number of - minutes. - """ - num_secs = 10 - with self.assertRaises(AssertionError) as cm: - hdateti.convert_seconds_to_minutes(num_secs) - actual = str(cm.exception) - expected = """ - * Failed assertion * - '10' - == - '0' - num_secs=10 is not an integer number of minutes - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_convert_unix_epoch_to_timestamp -# ############################################################################# - - -class Test_convert_unix_epoch_to_timestamp(hunitest.TestCase): - def test1(self) -> None: - """ - Test with default parameter values. - """ - epoch = 1631145600000 - actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch) - expected = pd.Timestamp("2021-09-09T00:00:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test2(self) -> None: - """ - Test with specified unit. - """ - epoch = 1631145600 - unit = "s" - actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch, unit=unit) - expected = pd.Timestamp("2021-09-09T00:00:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test3(self) -> None: - """ - Test with specified timezone. - """ - epoch = 1631145600000 - tz = "US/Pacific" - actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch, tz=tz) - expected = pd.Timestamp("2021-09-08T17:00:00", tz="US/Pacific") - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# -# Test_convert_timestamp_to_unix_epoch -# ############################################################################# - - -class Test_convert_timestamp_to_unix_epoch(hunitest.TestCase): - def test1(self) -> None: - """ - Test with default parameter values. - """ - timestamp = pd.Timestamp("2021-09-09") - actual = hdateti.convert_timestamp_to_unix_epoch(timestamp=timestamp) - expected = 1631145600000 - self.assert_equal(str(actual), str(expected)) - - def test2(self) -> None: - """ - Test with specified unit. - """ - timestamp = pd.Timestamp("2021-09-09") - unit = "s" - actual = hdateti.convert_timestamp_to_unix_epoch( - timestamp=timestamp, unit=unit - ) - expected = 1631145600 - self.assert_equal(str(actual), str(expected)) - - def test3(self) -> None: - """ - Test for a timestamp with specified timezone. - """ - timestamp = pd.Timestamp("2021-09-08T17:00:00", tz="US/Pacific") - actual = hdateti.convert_timestamp_to_unix_epoch(timestamp=timestamp) - expected = 1631145600000 - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# -# Test_str_to_timestamp1 -# ############################################################################# - - -class Test_str_to_timestamp1(hunitest.TestCase): - """ - Test if string representation of datetime is converted correctly. - """ - - def test1(self) -> None: - """ - - `datetime_str` has a valid format - - `datetime_format` has a valid pattern for `datetime_str` - """ - datetime_str = "20230728_150513" - timezone_info = "US/Eastern" - datetime_format = "%Y%m%d_%H%M%S" - actual = hdateti.str_to_timestamp( - datetime_str, timezone_info, datetime_format=datetime_format - ) - expected = pd.Timestamp("2023-07-28 15:05:13-0400", tz="US/Eastern") - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - - `datetime_str` has a valid format - - `datetime_format` has an valid pattern for `datetime_str` - - `timezone_info` is UTC - """ - datetime_str = "20230728_150513" - timezone_info = "UTC" - format = "%Y%m%d_%H%M%S" - actual = hdateti.str_to_timestamp( - datetime_str, timezone_info, datetime_format=format - ) - expected = pd.Timestamp("2023-07-28 15:05:13+0000", tz="UTC") - self.assertEqual(actual, expected) - - def test3(self) -> None: - """ - - `datetime_str` has a valid format - - `datetime_format` has an invalid pattern for `datetime_str` - """ - datetime_str = "28-07-2023 15:05:13" - timezone_info = "US/Eastern" - datetime_format = "%Y%m%d_%H%M%S" - # The datetime format does not match the string representation of datetime. - with self.assertRaises(ValueError) as err: - hdateti.str_to_timestamp( - datetime_str, timezone_info, datetime_format=datetime_format - ) - actual = str(err.exception) - self.check_string(actual) - - def test4(self) -> None: - """ - - `datetime_str` has an invalid format - - `datetime_format` is not defined - """ - datetime_str = "qwe28abc07-201234" - timezone_info = "US/Eastern" - # Invalid datetime, should raise a ValueError. - with self.assertRaises(ValueError) as err: - hdateti.str_to_timestamp(datetime_str, timezone_info) - actual = str(err.exception) - self.check_string(actual) - - -# ############################################################################# -# Test_dassert_str_is_date -# ############################################################################# - - -class Test_dassert_str_is_date(hunitest.TestCase): - """ - Test that the function checks a string representation of date correctly. - """ - - def test1(self) -> None: - """ - - date has a valid format - """ - date_str = "20221101" - hdateti.dassert_str_is_date(date_str) - - def test2(self) -> None: - """ - - date has an invalid format - """ - date = "2022-11-01" - with self.assertRaises(ValueError) as err: - hdateti.dassert_str_is_date(date) - actual = str(err.exception) - self.check_string(actual) - - -# ############################################################################# -# Test_dassert_is_valid_timestamp -# ############################################################################# - - -class Test_dassert_is_valid_timestamp(hunitest.TestCase): - def test1(self) -> None: - """ - Test should not raise an exception when timestamp has a timezone. - """ - timestamp = pd.Timestamp( - "2021-01-04 09:30:00-05:00", tz="America/New_York" - ) - hdateti.dassert_is_valid_timestamp(timestamp) - - def test2(self) -> None: - """ - Test should raise an exception when timestamp is without timezone info. - """ - # Set inputs. - timestamp = pd.Timestamp("2021-01-04 09:30:00") - # Run. - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_is_valid_timestamp(timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 'None' is not 'None' - datetime_='2021-01-04 09:30:00' doesn't have timezone info - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Test should not raise an exception when timestamp is none. - """ - timestamp = None - hdateti.dassert_is_valid_timestamp(timestamp) - - def test4(self) -> None: - """ - Test should raise an exception when timestamp is of type string. - """ - # Set input. - timestamp = "2021-01-04 09:30:00" - # Run. - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_is_valid_timestamp(timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - Instance of '2021-01-04 09:30:00' is '' instead of '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_timestamp_lt -# ############################################################################# - - -class Test_dassert_timestamp_lt(hunitest.TestCase): - def test1(self) -> None: - """ - Test with valid timestamps where start is less than end. - """ - start_timestamp = pd.Timestamp("2021-01-02 09:30:00-00:00", tz="UTC") - end_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - - def test2(self) -> None: - """ - Test with equal timestamps, this is should raise an exception. - """ - # Set inputs. - start_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") - end_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") - # Run. - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 2021-02-02 09:30:00+00:00 < 2021-02-02 09:30:00+00:00 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Test with start timestamp greater than end timestamp, this is should - raise an exception. - """ - # Set inputs. - start_timestamp = pd.Timestamp( - "2021-02-04 09:30:00-05:00", tz="America/New_York" - ) - end_timestamp = pd.Timestamp( - "2021-01-04 09:30:00-05:00", tz="America/New_York" - ) - # Run. - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 2021-02-04 09:30:00-05:00 < 2021-01-04 09:30:00-05:00 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Test with start timestamp as None. - """ - start_timestamp = None - end_timestamp = pd.Timestamp( - "2021-01-04 09:30:00-05:00", tz="America/New_York" - ) - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - - def test5(self) -> None: - """ - Test with end timestamp as None. - """ - start_timestamp = pd.Timestamp( - "2021-01-04 09:30:00-05:00", tz="America/New_York" - ) - end_timestamp = None - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - - def test6(self) -> None: - """ - Test with both timestamps as None. - """ - start_timestamp = None - end_timestamp = None - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py deleted file mode 100644 index 9dd38d00e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py +++ /dev/null @@ -1,934 +0,0 @@ -import collections -import logging -from typing import List, Tuple - -import helpers.hdbg as hdbg -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Make sure the coverage is 100%. - -# ############################################################################# - - -# ############################################################################# -# Test_dassert1 -# ############################################################################# - - -# TODO(gp): Use a self.assert_equal() instead of a check_string() since this -# code needs to be stable. -class Test_dassert1(hunitest.TestCase): - """ - Test `dassert()`. - """ - - def test1(self) -> None: - """ - An assertion that is verified. - """ - hdbg.dassert(True) - - def test2(self) -> None: - """ - An assertion that is not verified. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False) - self.check_string(str(cm.exception)) - - def test3(self) -> None: - """ - An assertion with a message. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False, msg="hello") - self.check_string(str(cm.exception)) - - def test4(self) -> None: - """ - An assertion with a message to format. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False, "hello %s", "world") - self.check_string(str(cm.exception)) - - def test5(self) -> None: - """ - Too many parameters. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False, "hello %s", "world", "too_many") - self.check_string(str(cm.exception)) - - def test6(self) -> None: - """ - Not enough parameters. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False, "hello %s") - self.check_string(str(cm.exception)) - - def test7(self) -> None: - """ - Common error of calling `dassert()` instead of `dassert_eq()`. - - According to the user's intention the assertion should trigger, - but, because of using `dassert()` instead of `dassert_eq()`, the - assertion will not trigger. We notice that the user passed a - list instead of a string as `msg` and raise. - """ - with self.assertRaises(AssertionError) as cm: - y = ["world"] - hdbg.dassert(y, ["hello"]) - self.check_string(str(cm.exception)) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_eq1 -# ############################################################################# - - -class Test_dassert_eq1(hunitest.TestCase): - def test1(self) -> None: - hdbg.dassert_eq(1, 1) - - def test2(self) -> None: - hdbg.dassert_eq(1, 1, msg="hello world") - - def test3(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_eq(1, 2, msg="hello world") - self.check_string(str(cm.exception)) - - def test4(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_eq(1, 2, "hello %s", "world") - self.check_string(str(cm.exception)) - - def test5(self) -> None: - """ - Raise assertion with incorrect message. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_eq(1, 2, "hello %s") - self.check_string(str(cm.exception)) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_misc1 -# ############################################################################# - - -# TODO(gp): Break it in piece. -class Test_dassert_misc1(hunitest.TestCase): - # dassert_in - - def test_in1(self) -> None: - hdbg.dassert_in("a", "abc") - - def test_in2(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_in("a", "xyz".split()) - self.check_string(str(cm.exception)) - - # dassert_is - - def test_is1(self) -> None: - a = None - hdbg.dassert_is(a, None) - - def test_is2(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is("a", None) - self.check_string(str(cm.exception)) - - # dassert_isinstance - - def test_is_instance1(self) -> None: - hdbg.dassert_isinstance("a", str) - - def test_is_instance2(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_isinstance("a", int) - self.check_string(str(cm.exception)) - - def test_is_instance3(self) -> None: - hdbg.dassert_isinstance("a", (str, int)) - - def test_is_instance4(self) -> None: - hdbg.dassert_isinstance(5.0, (float, int)) - - def test_is_instance5(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_isinstance("a", (float, int)) - # TODO(gp): Replace all check_string with assert_equal - self.check_string(str(cm.exception)) - - # dassert_set_eq - - def test_set_eq1(self) -> None: - a = [1, 2, 3] - b = [2, 3, 1] - hdbg.dassert_set_eq(a, b) - - def test_set_eq2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 3] - b = [2, 2, 1] - hdbg.dassert_set_eq(a, b) - # Check. - actual = str(cm.exception) - expected = """ - * Failed assertion * - val1 - val2=[3] - val2 - val1=[] - val1=[1, 2, 3] - set eq - val2=[1, 2] - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - # dassert_is_subset - - def test_is_subset1(self) -> None: - a = [1, 2] - b = [2, 1, 3] - hdbg.dassert_is_subset(a, b) - - def test_is_subset2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 3] - b = [4, 2, 1] - hdbg.dassert_is_subset(a, b) - # Check. - actual = str(cm.exception) - expected = """ - * Failed assertion * - val1=[1, 2, 3] - issubset - val2=[1, 2, 4] - val1 - val2=[3] - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - # dassert_not_intersection - - def test_not_intersection1(self) -> None: - a = [1, 2, 3] - b = [4, 5] - hdbg.dassert_not_intersection(a, b) - - def test_not_intersection2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 3] - b = [4, 2, 1] - hdbg.dassert_not_intersection(a, b) - actual = str(cm.exception) - expected = """ - * Failed assertion * - val1=[1, 2, 3] - has no intersection - val2=[1, 2, 4] - val1.intersection(val2)=[1, 2] - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - # dassert_no_duplicates - - def test_no_duplicates1(self) -> None: - a = [1, 2, 3] - hdbg.dassert_no_duplicates(a) - - def test_no_duplicates2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 3, 3] - hdbg.dassert_no_duplicates(a) - self.check_string(str(cm.exception)) - - # dassert_is_sorted - - def test_is_sorted1(self) -> None: - a = [1, 2, 3] - hdbg.dassert_is_sorted(a) - - def test_is_sorted2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 4, 3] - hdbg.dassert_is_sorted(a) - self.check_string(str(cm.exception)) - - def test_is_sorted3(self) -> None: - """ - Test an array that is sorted descending. - """ - a = [3, 2, 2] - hdbg.dassert_is_sorted(a, sort_kwargs={"reverse": True}) - - def test_is_sorted4(self) -> None: - """ - Test an array that is not sorted descending. - """ - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 4, 3] - sort_kwargs = {"reverse": True} - hdbg.dassert_is_sorted(a, sort_kwargs=sort_kwargs) - self.check_string(str(cm.exception)) - - # dassert_eq_all - - def test_eq_all1(self) -> None: - a = [1, 2, 3] - b = [1, 2, 3] - hdbg.dassert_eq_all(a, b) - - def test_eq_all2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 3] - b = [1, 2, 4] - hdbg.dassert_eq_all(a, b) - self.check_string(str(cm.exception)) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_lgt1 -# ############################################################################# - - -class Test_dassert_lgt1(hunitest.TestCase): - def test1(self) -> None: - """ - No assertion raised since `0 <= 0 <= 3`. - """ - hdbg.dassert_lgt( - 0, 0, 3, lower_bound_closed=True, upper_bound_closed=True - ) - - def test2(self) -> None: - """ - Raise assertion since it is not true that `0 < 0 <= 3`. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_lgt( - 0, 0, 3, lower_bound_closed=False, upper_bound_closed=True - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 0 < 0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Raise assertion since it is not true that `0 < 100 <= 3`. - - The formatting of the assertion is correct. - """ - with self.assertRaises(AssertionError) as cm: - lower_bound_closed = False - upper_bound_closed = True - hdbg.dassert_lgt( - 0, - 100, - 3, - lower_bound_closed, - upper_bound_closed, - "hello %s", - "world", - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 100 <= 3 - hello world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_is_proportion1 -# ############################################################################# - - -class Test_dassert_is_proportion1(hunitest.TestCase): - def test1(self) -> None: - """ - Passing assertion with correct message and format. - """ - hdbg.dassert_is_proportion(0.1, "hello %s", "world") - - def test2(self) -> None: - """ - Passing assertion with correct message and format. - """ - hdbg.dassert_is_proportion(0.0, "hello %s", "world") - - def test3(self) -> None: - """ - Passing assertion with correct message and format. - """ - hdbg.dassert_is_proportion(1.0, "hello %s", "world") - - def test_assert1(self) -> None: - """ - Failing assertion with correct message and format. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_proportion(1.01, "hello %s", "world") - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 1.01 <= 1 - hello world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert2(self) -> None: - """ - Failing assertion with correct message. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_proportion(1.01, "hello world") - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 1.01 <= 1 - hello world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert3(self) -> None: - """ - Failing assertion with incorrect message formatting. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_proportion(1.01, "hello", "world") - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 1.01 <= 1 - Caught assertion while formatting message: - 'not all arguments converted during string formatting' - hello world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert4(self) -> None: - """ - Failing assertion with incorrect message formatting. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_proportion(1.01, "hello %s %s", "world") - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 1.01 <= 1 - Caught assertion while formatting message: - 'not enough arguments for format string' - hello %s %s world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_container_type1 -# ############################################################################# - - -class Test_dassert_container_type1(hunitest.TestCase): - def test1(self) -> None: - list_ = "a b c".split() - hdbg.dassert_container_type(list_, List, str) - - def test_assert1(self) -> None: - """ - Check that assertion fails since a list is not a tuple. - """ - list_ = "a b c".split() - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_container_type(list_, Tuple, str) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '['a', 'b', 'c']' is '' instead of 'typing.Tuple' - obj='['a', 'b', 'c']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert2(self) -> None: - """ - Check that assertion fails since a list contains strings and ints. - """ - list_ = ["a", 2, "c", "d"] - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_container_type(list_, list, str) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '2' is '' instead of '' - obj='['a', 2, 'c', 'd']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert3(self) -> None: - """ - Like `test_assert3()` but with a message. - """ - list_ = ["a", 2, "c", "d"] - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_container_type( - list_, list, str, "list_ is %s homogeneous", "not" - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '2' is '' instead of '' - list_ is not homogeneous - obj='['a', 2, 'c', 'd']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# _Animal -# ############################################################################# - - -class _Animal: - pass - - -# ############################################################################# -# _Man -# ############################################################################# - - -class _Man(_Animal): - pass - - -# ############################################################################# -# _Vegetable -# ############################################################################# - - -class _Vegetable: - pass - - -# ############################################################################# -# Test_dassert_issubclass1 -# ############################################################################# - - -class Test_dassert_issubclass1(hunitest.TestCase): - def test_man1(self) -> None: - """ - An instance of `_Man` descends from `_Animal`. - """ - man = _Man() - hdbg.dassert_issubclass(man, _Man) - - def test_man2(self) -> None: - """ - An instance of `_Man` descends from object. - """ - man = _Man() - hdbg.dassert_issubclass(man, object) - - def test_man_fail1(self) -> None: - """ - An instance of `_Man` doesn't descends from `_Vegetable`. - """ - man = _Man() - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_issubclass(man, _Vegetable) - # We need to purify from object references. - self.check_string(str(cm.exception), purify_text=True) - - def test_man_fail2(self) -> None: - """ - An instance of `_Man` doesn't descends from `int`. - """ - man = _Man() - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_issubclass(man, int) - self.check_string(str(cm.exception), purify_text=True) - - def test1(self) -> None: - """ - In Python everything is an object. - """ - hdbg.dassert_issubclass(5, object) - hdbg.dassert_issubclass(int, object) - hdbg.dassert_issubclass(int, (object, int)) - - def test_fail1(self) -> None: - """ - `issubclass` only accepts classes and not instances as second argument. - """ - with self.assertRaises(Exception) as cm: - hdbg.dassert_issubclass(int, 5.0) - self.check_string(str(cm.exception), purify_text=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_callable1 -# ############################################################################# - - -class Test_dassert_callable1(hunitest.TestCase): - def test1(self) -> None: - func = lambda x: x - hdbg.dassert_callable(func) - - def test_fail1(self) -> None: - func = 4 - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_callable(func) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Obj '4' of type '' is not callable - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_all_defined_or_all_None -# ############################################################################# - - -class Test_dassert_all_defined_or_all_None(hunitest.TestCase): - def test1(self) -> None: - """ - Verify that test passes when all the values are defined. - """ - vals = [1, 2, 3] - hdbg.dassert_all_defined_or_all_None(vals) - - def test2(self) -> None: - """ - Verify that assertion is raised when at least one of the values is not - defined. - """ - vals = [1, 2, None, None] - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_all_defined_or_all_None(vals) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Some values in list are defined and some are None: '[1, 2, None, None]' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Verify that test passes when all the values are not defined. - """ - vals = [None, None, None] - hdbg.dassert_all_defined_or_all_None(vals) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_related_params1 -# ############################################################################# - - -class Test_dassert_related_params1(hunitest.TestCase): - def test1(self) -> None: - obj = {"val1": 1, "val2": 1, "val3": "hello"} - mode = "all_or_none_non_null" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - - def test2(self) -> None: - obj = {"val1": 0, "val2": None, "val3": ""} - mode = "all_or_none_non_null" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - - def test3(self) -> None: - obj = {"val1": 1, "val2": 0, "val3": "hello"} - with self.assertRaises(Exception) as cm: - mode = "all_or_none_non_null" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - actual = str(cm.exception) - expected = """ - * Failed assertion * - All or none parameter should be non-null: - val2=0 - params={'val1': 1, 'val2': 0, 'val3': 'hello'} - message 'hello world' - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_related_params2 -# ############################################################################# - - -class Test_dassert_related_params2(hunitest.TestCase): - def test1(self) -> None: - obj = {"val1": 1, "val2": 1, "val3": "hello"} - mode = "all_or_none_non_None" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - - def test2(self) -> None: - obj = { - "val1": None, - "val2": None, - "val3": None, - } - mode = "all_or_none_non_None" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - - def test3(self) -> None: - obj = {"val1": None, "val2": None, "val3": "hello"} - with self.assertRaises(Exception) as cm: - mode = "all_or_none_non_None" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - actual = str(cm.exception) - expected = """ - * Failed assertion * - All or none parameter should be non-None: - val1=None - params={'val1': None, 'val2': None, 'val3': 'hello'} - message 'hello world' - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_all_attributes_are_same1 -# ############################################################################# - - -class Test_dassert_all_attributes_are_same1(hunitest.TestCase): - def test1(self) -> None: - """ - Wrong type of object. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_all_attributes_are_same(5, "a") - actual = str(cm.exception) - expected = """ - * Failed assertion * - Instance of '5' is '' instead of '' - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test2(self) -> None: - """ - Wrong type of attribute. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_all_attributes_are_same([1, 2, 3], 1) - actual = str(cm.exception) - expected = """ - * Failed assertion * - Instance of '1' is '' instead of '' - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test3(self) -> None: - """ - Attribute with different values. - """ - Obj = collections.namedtuple("Obj", ["a", "b"]) - list_ = [Obj(1, 2), Obj(1, 3)] - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_all_attributes_are_same(list_, "b") - actual = str(cm.exception) - expected = """ - * Failed assertion * - Elements in the list have different values for - attribute b: - {2, 3} - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test4(self) -> None: - """ - Attribute with same values. - """ - Obj = collections.namedtuple("Obj", ["a", "b"]) - list_ = [Obj(1, 2), Obj(1, 2)] - hdbg.dassert_all_attributes_are_same(list_, "b") - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_lt -# ############################################################################# - - -class Test_dassert_lt(hunitest.TestCase): - def test1(self) -> None: - """ - Test that the function doesn't raise an exception if first value is - less than second value. - """ - val1 = 1 - val2 = 2 - hdbg.dassert_lt(val1, val2) - - def test2(self) -> None: - """ - Test that the function raises an exception if first value is equal to - second value. - """ - # Set inputs. - val1 = 2 - val2 = 2 - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_lt(val1, val2) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 2 < 2 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Test that the function raises an exception if first value is greater - than second value. - """ - # Set inputs. - val1 = 3 - val2 = 2 - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_lt(val1, val2) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 3 < 2 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Test that the function doesn't raise an exception when we pass string - inputs. - """ - val1 = "a" - val2 = "b" - hdbg.dassert_lt(val1, val2) - - def test5(self) -> None: - """ - Test that the function raises an exception where first value is greater - than second value with floats. - """ - # Set inputs. - val1 = 2.0 - val2 = 1.0 - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_lt(val1, val2) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 2.0 < 1.0 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_is_integer -# ############################################################################# - - -class Test_dassert_is_integer(hunitest.TestCase): - def test1(self) -> None: - """ - Test that the function do not raise the exception with integer values. - """ - val = 5 - hdbg.dassert_is_integer(val) - - def test2(self) -> None: - """ - Test that the function do not raise the exception with float values - that represent an integer. - """ - val = 5.0 - hdbg.dassert_is_integer(val) - - def test3(self) -> None: - """ - Test that the function raises an exception for float values that do not - represent an integer. - """ - # Set inputs. - val = 5.5 - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_integer(val) - actual = str(cm.exception) - expected = """ - * Failed assertion * - Invalid val='5.5' of type '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Test that the function raises an exception for non-integer and non- - float types. - """ - # Set inputs. - val = "5" - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_integer(val) - actual = str(cm.exception) - expected = """ - * Failed assertion * - Invalid val='5' of type '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py deleted file mode 100644 index b3f6d7f04..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py +++ /dev/null @@ -1,107 +0,0 @@ -import logging - -import config_root.config as cconfig -import helpers.hdict as hdict -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_get_nested_dict_iterator -# ############################################################################# - - -class Test_get_nested_dict_iterator(hunitest.TestCase): - def test1(self) -> None: - """ - Test basic case with no nesting. - """ - dict_ = {"key0": "value0", "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0",), "value0"), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test2(self) -> None: - """ - Test simple nested case. - """ - dict_ = { - "key0": {"key00": "value00", "key01": "value01"}, - "key1": "value1", - } - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [ - (("key0", "key00"), "value00"), - (("key0", "key01"), "value01"), - (("key1",), "value1"), - ] - self.assertListEqual(actual_result, expected_result) - - def test3(self) -> None: - """ - Test multilevel nested case. - """ - dict_ = {"key0": {"key00": {"key000": "value000"}}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [ - (("key0", "key00", "key000"), "value000"), - (("key1",), "value1"), - ] - self.assertListEqual(actual_result, expected_result) - - def test4(self) -> None: - """ - Test flat case with `None` value. - """ - dict_ = {"key0": "value0", "key1": None} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0",), "value0"), (("key1",), None)] - self.assertListEqual(actual_result, expected_result) - - def test5(self) -> None: - """ - Test nested case with `None` value. - """ - dict_ = {"key0": {"key00": None}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0", "key00"), None), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test6(self) -> None: - """ - Test flat case with empty dict value. - """ - dict_ = {"key0": {}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0",), {}), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test7(self) -> None: - """ - Test nested case with empty dict value. - """ - dict_ = {"key0": {"key00": {}}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0", "key00"), {}), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test8(self) -> None: - """ - Test flat case with empty Config value. - """ - config = cconfig.Config() - dict_ = {"key0": config, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0",), config), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test9(self) -> None: - """ - Test nexted case with empty Config value. - """ - config = cconfig.Config() - dict_ = {"key0": {"key00": config}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0", "key00"), config), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py deleted file mode 100644 index 7220d1474..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py +++ /dev/null @@ -1,624 +0,0 @@ -import logging -import os -import unittest.mock as umock -from typing import List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hdocker as hdocker -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_replace_shared_root_path1 -# ############################################################################# - - -class Test_replace_shared_root_path1(hunitest.TestCase): - def test1(self) -> None: - """ - Test replacing shared root path. - """ - # Mock `hserver.get_shared_data_dirs()` to return a dummy mapping. - mock_mapping = { - "/data/shared1": "/shared_folder1", - "/data/shared2": "/shared_folder2", - } - with umock.patch.object( - hserver, "get_shared_data_dirs", return_value=mock_mapping - ): - # Test replacing shared root path. - path1 = "/data/shared1/asset1" - act1 = hdocker.replace_shared_root_path(path1) - exp1 = "/shared_folder1/asset1" - self.assertEqual(act1, exp1) - # - path2 = "/data/shared2/asset2" - act2 = hdocker.replace_shared_root_path(path2) - exp2 = "/shared_folder2/asset2" - self.assertEqual(act2, exp2) - # - path3 = 'object("/data/shared2/asset2/item")' - act3 = hdocker.replace_shared_root_path(path3) - exp3 = 'object("/shared_folder2/asset2/item")' - self.assertEqual(act3, exp3) - - def test2(self) -> None: - """ - Test replacing shared root path with the `replace_ecs_tokyo` parameter. - """ - # Mock `hserver.get_shared_data_dirs()` to return a dummy mapping. - mock_mapping = { - "/data/shared": "/shared_folder", - } - with umock.patch.object( - hserver, "get_shared_data_dirs", return_value=mock_mapping - ): - # Test if `ecs_tokyo` is replaced if `replace_ecs_tokyo = True`. - path1 = 'object("/data/shared/ecs_tokyo/asset2/item")' - replace_ecs_tokyo = True - act1 = hdocker.replace_shared_root_path( - path1, replace_ecs_tokyo=replace_ecs_tokyo - ) - exp1 = 'object("/shared_folder/ecs/asset2/item")' - self.assertEqual(act1, exp1) - # Test if `ecs_tokyo` is not replaced if `replace_ecs_tokyo` is not - # defined. - path2 = 'object("/data/shared/ecs_tokyo/asset2/item")' - act2 = hdocker.replace_shared_root_path(path2) - exp2 = 'object("/shared_folder/ecs_tokyo/asset2/item")' - self.assertEqual(act2, exp2) - - -# ############################################################################# -# Test_convert_to_docker_path1 -# ############################################################################# - - -class Test_convert_to_docker_path1(hunitest.TestCase): - @staticmethod - def convert_caller_to_callee_docker_path( - in_file_path: str, - is_caller_host: bool, - use_sibling_container_for_callee: bool, - check_if_exists: bool, - ) -> Tuple[str, str]: - """ - Prepare inputs and call the function to convert a file name to Docker - paths. - - :return: A tuple containing - - docker_file_path: the Docker file path - - mount: the Docker mount string - """ - ( - source_host_path, - callee_mount_path, - mount, - ) = hdocker.get_docker_mount_info( - is_caller_host, use_sibling_container_for_callee - ) - docker_file_path = hdocker.convert_caller_to_callee_docker_path( - in_file_path, - source_host_path, - callee_mount_path, - check_if_exists=check_if_exists, - is_input=True, - is_caller_host=is_caller_host, - use_sibling_container_for_callee=use_sibling_container_for_callee, - ) - return docker_file_path, mount - - def helper( - self, - in_file_path: str, - is_caller_host: bool, - use_sibling_container_for_callee: bool, - check_if_exists: bool, - exp_docker_file_path: str, - exp_mount: str, - ) -> None: - """ - Test converting a file name to Docker paths. - """ - # Run test. - docker_file_path, mount = self.convert_caller_to_callee_docker_path( - in_file_path, - is_caller_host, - use_sibling_container_for_callee, - check_if_exists, - ) - # Check output. - self.assert_equal(docker_file_path, exp_docker_file_path) - self.assert_equal(mount, exp_mount) - - def test1(self) -> None: - """ - Test converting a file name to Docker paths. - """ - # - Prepare inputs. - dir_name = self.get_input_dir() - in_file_path = os.path.join(dir_name, "tmp.llm_transform.in.txt") - is_caller_host = True - use_sibling_container_for_callee = True - check_if_exists = False - # - Prepare outputs. - helpers_root_path = hgit.find_helpers_root() - exp_docker_file_path = os.path.join( - helpers_root_path, - "helpers/test/outcomes", - "Test_convert_to_docker_path1.test1/input", - "tmp.llm_transform.in.txt", - ) - exp_mount = "type=bind,source=/app,target=/app" - self.helper( - in_file_path, - is_caller_host, - use_sibling_container_for_callee, - check_if_exists, - exp_docker_file_path, - exp_mount, - ) - - def test2(self) -> None: - """ - Test converting a file name of an existing file to a Docker path. - """ - # - Prepare inputs. - dir_name = self.get_input_dir() - # Create a file. - # E.g., in_file_path='/app/helpers/test/outcomes/Test_convert_to_docker_path1.test2/input/input.md' - in_file_path = os.path.join(dir_name, "tmp.input.md") - hio.to_file(in_file_path, "empty") - _LOG.debug(hprint.to_str("in_file_path")) - is_caller_host = True - use_sibling_container_for_callee = True - check_if_exists = True - # - Prepare outputs. - helpers_root_path = hgit.find_helpers_root() - exp_docker_file_path = os.path.join( - helpers_root_path, - "helpers/test/outcomes", - "Test_convert_to_docker_path1.test2/input", - "tmp.input.md", - ) - exp_mount = "type=bind,source=/app,target=/app" - self.helper( - in_file_path, - is_caller_host, - use_sibling_container_for_callee, - check_if_exists, - exp_docker_file_path, - exp_mount, - ) - - -# ############################################################################# -# Test_is_path1 -# ############################################################################# - - -class Test_is_path1(hunitest.TestCase): - def helper(self, path: str, expected: bool) -> None: - """ - Test helper for `is_path()` function. - """ - # Run test. - actual = hdocker.is_path(path) - # Check outputs. - _LOG.debug(hprint.to_str("path actual expected")) - self.assertEqual(actual, expected) - - def test_file_with_extension(self) -> None: - """ - Test paths with file extensions. - """ - # Prepare inputs. - test_cases = [ - ("file.txt", True), - ("document.pdf", True), - ("script.py", True), - ("data.csv", True), - ("image.jpg", True), - ("config.json", True), - ("readme.md", True), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - def test_absolute_paths(self) -> None: - """ - Test absolute paths. - """ - # Prepare inputs. - test_cases = [ - ("/path/to/file.py", True), - ("/usr/bin/python", True), - ("/etc/config", True), - ("/home/user", True), - ("/", True), - ("/data/shared", True), - ] - # Check outputs. - for path, expected in test_cases: - self.helper(path, expected) - - def test_relative_paths(self) -> None: - """ - Test relative paths starting with ./ or ../. - """ - # Prepare inputs and run tests. - test_cases = [ - ("./file.txt", True), - ("../data.csv", True), - ("./folder/subfolder", True), - ("../parent/file", True), - ("./", True), - ("../", True), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - def test_trailing_slash_paths(self) -> None: - """ - Test paths ending with slash (indicating directories). - """ - # Prepare inputs and run tests. - test_cases = [ - ("folder/", True), - ("data/", True), - ("my_directory/", True), - ("nested/folder/", True), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - def test_non_path_strings(self) -> None: - """ - Test strings that should not be considered paths. - """ - # Prepare inputs and run tests. - test_cases = [ - ("readme", False), - ("hello", False), - ("command", False), - ("data", False), - ("test", False), - ("python", False), - ("docker", False), - ("", False), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - def test_edge_cases(self) -> None: - """ - Test edge cases and complex scenarios. - """ - # Prepare inputs and run tests. - test_cases = [ - # - Files with multiple extensions. - ("file.tar.gz", True), - ("backup.sql.bz2", True), - # - Hidden files. - (".hidden", True), - (".gitignore", True), - # - Complex paths. - ("./nested/folder/file.txt", True), - ("../parent/folder/", True), - ("/absolute/path/file.py", True), - # - Files without extension in paths. - # True because it contains a slash. - ("folder/README", True), - # True because starts with "./". - ("./config", True), - # True because starts with "/". - ("/usr/bin/python", True), - # - Strings that might be confused with paths. - # True because has extension. - ("folder.name", True), - # False because no extension, slash, or path prefix. - ("file-name", False), - # False because no extension, slash, or path prefix. - ("under_score", False), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - -# ############################################################################# -# Test_convert_all_paths_from_caller_to_callee_docker_path1 -# ############################################################################# - - -class Test_convert_all_paths_from_caller_to_callee_docker_path1( - hunitest.TestCase -): - def helper( - self, - cmd_opts: List[str], - expected_str: str, - *, - is_caller_host: bool = True, - use_sibling_container_for_callee: bool = True, - create_files: Optional[List[str]] = None, - ) -> None: - """ - Helper for `convert_all_paths_from_caller_to_callee_docker_path()`. - """ - hdbg.dassert_isinstance(cmd_opts, list) - hdbg.dassert_isinstance(expected_str, str) - # Prepare inputs. - if create_files: - # Create temporary files for testing existing file paths. - for file_path in create_files: - dir_name = os.path.dirname(file_path) - if dir_name: - hio.create_dir(dir_name, incremental=True) - hio.to_file(file_path, "test content") - # Get docker mount info for the test. - ( - caller_mount_path, - callee_mount_path, - _, - ) = hdocker.get_docker_mount_info( - is_caller_host, use_sibling_container_for_callee - ) - # Run test. - actual = hdocker.convert_all_paths_from_caller_to_callee_docker_path( - cmd_opts, - caller_mount_path, - callee_mount_path, - is_caller_host, - use_sibling_container_for_callee, - ) - _LOG.debug("actual=\n%s", str(actual)) - # Check outputs. - actual_str = "\n".join(actual) - actual_str = huntepur.purify_text(actual_str) - expected_str = huntepur.purify_text(expected_str) - self.assert_equal(actual_str, expected_str, dedent=True) - - # ///////////////////////////////////////////////////////////////////////////// - - def test_mixed_options_with_paths_and_non_paths(self) -> None: - """ - Test converting mixed command options with paths and non-paths. - """ - # Prepare inputs. - cmd_opts = [ - "--verbose", - "file.txt", # Path-like (has extension) - "--output", - "./output.log", # Path-like (relative path) - "command", # Not a path - # "/absolute/path", # Path-like (absolute) - "--flag", - "folder/", # Path-like (trailing slash) - ] - expected_output = [ - "--verbose", - "/app/file.txt", # Converted - "--output", - "/app/output.log", # Converted - "command", # Not converted - # "/app/absolute/path", # Converted - "--flag", - "/app/folder", # Converted - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_existing_files_get_converted(self) -> None: - """ - Test that existing files are converted even without path-like - appearance. - """ - # Prepare inputs. - temp_dir = self.get_scratch_space() - existing_file = os.path.join(temp_dir, "testfile") - cmd_opts = [ - "--input", - existing_file, # Will exist, should be converted - "nonexistent", # Doesn't exist and not path-like, won't be converted - ] - expected_output = [ - "--input", - f"/app/{os.path.relpath(existing_file, hgit.find_git_root())}", # Converted - "nonexistent", # Not converted - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output, create_files=[existing_file]) - - def test_path_like_strings_without_existing_files(self) -> None: - """ - Test that path-like strings are converted even if files don't exist. - """ - # Prepare inputs. - cmd_opts = [ - "script.py", # Path-like (extension) but doesn't exist - "./config.json", # Path-like (relative) but doesn't exist - # "/usr/bin/tool", # Path-like (absolute) but doesn't exist - "plain_word", # Not path-like and doesn't exist - ] - expected_output = [ - "/app/script.py", # Converted (has extension) - "/app/config.json", # Converted (relative path) - # "/app/usr/bin/tool", # Converted (absolute path) - "plain_word", # Not converted - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_empty_command_options(self) -> None: - """ - Test handling of empty command options list. - """ - # Prepare inputs. - cmd_opts = [] - expected_output = [] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_only_non_path_options(self) -> None: - """ - Test command options with no paths. - """ - # Prepare inputs. - cmd_opts = [ - "--verbose", - "--debug", - "command", - "argument", - "--flag", - ] - expected_output = [ - "--verbose", - "--debug", - "command", - "argument", - "--flag", - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_only_path_options(self) -> None: - """ - Test command options with only paths. - """ - # Prepare inputs. - cmd_opts = [ - "input.txt", - "./config.yaml", - # "/var/log/app.log", - "data/", - "./output.json", - ] - expected_output = [ - "/app/input.txt", - "/app/config.yaml", - # "/app/var/log/app.log", - "/app/data", - "/app/output.json", - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_complex_paths_with_extensions(self) -> None: - """ - Test complex paths with multiple extensions and special cases. - """ - # Prepare inputs. - cmd_opts = [ - "archive.tar.gz", # Multiple extensions - ".hidden", # Hidden file - "backup.sql.bz2", # Multiple extensions - ".gitignore", # Hidden config file - ] - expected_output = """ - $GIT_ROOT/archive.tar.gz - $GIT_ROOT/.hidden - $GIT_ROOT/backup.sql.bz2 - $GIT_ROOT/.gitignore - """ - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_sibling_vs_child_container_modes(self) -> None: - """ - Test different container modes (sibling vs child). - """ - # Prepare inputs. - cmd_opts = ["input.txt", "output/"] - # Test sibling container mode. - expected_output = ["/app/input.txt", "/app/output"] - expected_output = "\n".join(expected_output) - self.helper( - cmd_opts, - expected_output, - is_caller_host=True, - use_sibling_container_for_callee=True, - ) - # Test child container mode. - expected_output = ["/app/input.txt", "/app/output"] - expected_output = "\n".join(expected_output) - self.helper( - cmd_opts, - expected_output, - is_caller_host=True, - use_sibling_container_for_callee=False, - ) - - -# ############################################################################# -# Test_get_docker_mount_info1 -# ############################################################################# - - -class Test_get_docker_mount_info1(hunitest.TestCase): - def test1(self) -> None: - """ - With CSFY_ENABLE_DIND, sibling-style docker.sock must still bind the - repo root inside this container, not CSFY_HOST_GIT_ROOT_PATH. - """ - # - Prepare inputs. - git_root = hgit.find_git_root() - env = { - "CSFY_ENABLE_DIND": "1", - "CSFY_HOST_GIT_ROOT_PATH": "/path/only/on/outer/host", - } - # - Prepare outputs. - exp_target = "/app" - exp_mount = f"type=bind,source={git_root},target=/app" - # Run test. - with umock.patch.dict(os.environ, env, clear=False): - source, target, mount = hdocker.get_docker_mount_info( - is_caller_host=False, - use_sibling_container_for_callee=True, - ) - # Check outputs. - self.assert_equal(source, git_root) - self.assert_equal(target, exp_target) - self.assert_equal(mount, exp_mount) - - def test2(self) -> None: - """ - Without DinD, sibling mode uses CSFY_HOST_GIT_ROOT_PATH for bind - source. - """ - # - Prepare inputs. - host_root = "/tmp/explicit_host_git_root_for_test" - env = { - "CSFY_ENABLE_DIND": "0", - "CSFY_HOST_GIT_ROOT_PATH": host_root, - } - # - Prepare outputs. - exp_target = "/app" - exp_mount = f"type=bind,source={host_root},target=/app" - # Run test. - with umock.patch.dict(os.environ, env, clear=False): - source, target, mount = hdocker.get_docker_mount_info( - is_caller_host=False, - use_sibling_container_for_callee=True, - ) - # Check outputs. - self.assert_equal(source, host_root) - self.assert_equal(target, exp_target) - self.assert_equal(mount, exp_mount) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py deleted file mode 100644 index 203ae012e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py +++ /dev/null @@ -1,158 +0,0 @@ -""" -Unit tests for hdocker_tests.py -""" - -import logging -import os - -import helpers.hdocker_tests as hdoctest -import helpers.hio as hio -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_get_docker_test_files -# ############################################################################# - - -class Test_get_docker_test_files(hunitest.TestCase): - """ - Test the get_docker_test_files function. - """ - - def test1(self) -> None: - """ - Test finding docker test files in a directory. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - hio.to_file(os.path.join(scratch_dir, "docker_test_1.py"), "") - hio.to_file(os.path.join(scratch_dir, "docker_test_2.py"), "") - hio.to_file(os.path.join(scratch_dir, "other_file.py"), "") - # Run test. - actual = hdoctest.get_docker_test_files(scratch_dir) - # Check outputs. - self.assertEqual(len(actual), 2) - self.assertTrue(any("docker_test_1.py" in f for f in actual)) - self.assertTrue(any("docker_test_2.py" in f for f in actual)) - - def test2(self) -> None: - """ - Test with no matching files. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create non-matching files. - hio.to_file(os.path.join(scratch_dir, "test_file.py"), "") - hio.to_file(os.path.join(scratch_dir, "other_file.py"), "") - # Run test. - actual = hdoctest.get_docker_test_files(scratch_dir) - # Check outputs. - self.assertEqual(len(actual), 0) - - def test3(self) -> None: - """ - Test with single docker test file. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - hio.to_file(os.path.join(scratch_dir, "docker_test_single.py"), "") - # Run test. - actual = hdoctest.get_docker_test_files(scratch_dir) - # Check outputs. - self.assertEqual(len(actual), 1) - self.assertTrue("docker_test_single.py" in actual[0]) - - def test4(self) -> None: - """ - Test that files are returned in sorted order. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - hio.to_file(os.path.join(scratch_dir, "docker_test_z.py"), "") - hio.to_file(os.path.join(scratch_dir, "docker_test_a.py"), "") - hio.to_file(os.path.join(scratch_dir, "docker_test_m.py"), "") - # Run test. - actual = hdoctest.get_docker_test_files(scratch_dir) - # Check outputs. - self.assertEqual(len(actual), 3) - basenames = [os.path.basename(f) for f in actual] - self.assertEqual( - basenames, - ["docker_test_a.py", "docker_test_m.py", "docker_test_z.py"], - ) - - -# ############################################################################# -# Test_run_docker_cmd -# ############################################################################# - - -class Test_run_docker_cmd(hunitest.TestCase): - """ - Test the run_docker_cmd function. - """ - - def test1(self) -> None: - """ - Test that error is raised when docker_cmd.sh does not exist in - script_dir. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Run test and check output. - with self.assertRaises(AssertionError): - hdoctest.run_docker_cmd(scratch_dir) - - def test2(self) -> None: - """ - Test that error is raised when script_dir does not exist. - """ - # Prepare inputs. - nonexistent_dir = "/nonexistent_dir_that_does_not_exist" - # Run test and check output. - with self.assertRaises(AssertionError): - hdoctest.run_docker_cmd(nonexistent_dir) - - -# ############################################################################# -# Test_run_all_tests -# ############################################################################# - - -class Test_run_all_tests(hunitest.TestCase): - """ - Test the run_all_tests function. - """ - - def test1(self) -> None: - """ - Test with no docker test files returns 0. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create non-matching files. - hio.to_file(os.path.join(scratch_dir, "test_file.py"), "") - # Run test. - actual = hdoctest.run_all_tests(scratch_dir) - # Check outputs. - self.assertEqual(actual, 0) - - def test2(self) -> None: - """ - Test with docker test files when docker_cmd_script doesn't exist. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - hio.to_file(os.path.join(scratch_dir, "docker_test_1.py"), "") - nonexistent_docker_cmd = os.path.join( - scratch_dir, "nonexistent_docker_cmd.sh" - ) - # Run test and check output. - with self.assertRaises(AssertionError): - hdoctest.run_all_tests( - scratch_dir, docker_cmd_script=nonexistent_docker_cmd - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py deleted file mode 100644 index d1f229435..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging - -import helpers.henv as henv -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_env1 -# ############################################################################# - - -class Test_env1(hunitest.TestCase): - def test_get_system_signature1(self) -> None: - txt = henv.get_system_signature() - _LOG.debug(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py deleted file mode 100644 index f50f79994..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py +++ /dev/null @@ -1,347 +0,0 @@ -import logging -import pathlib - -import helpers.hfile_tree as hfiltree -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_generate_tree -# ############################################################################# - - -class Test_generate_tree(hunitest.TestCase): - def test1(self) -> None: - """ - Test generating default tree. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = False - include_python = False - only_dirs = False - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- compose", - "- docker_build", - " - create_users.sh", - " - pip_list.txt", - "- docker_run", - ] - ) - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - Test generating default tree with depth. - """ - # Prepare inputs. - path = self.devops_dir - depth = 1 - include_tests = False - include_python = False - only_dirs = False - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- compose", - "- docker_build", - "- docker_run", - ] - ) - self.assertEqual(actual, expected) - - def test3(self) -> None: - """ - Test generating tree including test files and dirs. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = True - include_python = False - only_dirs = False - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- compose", - "- docker_build", - "- docker_run", - "- test", - " - test_docker.py", - ] - ) - self.assertEqual(actual, expected) - - def test4(self) -> None: - """ - Test generating tree including python files. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = False - include_python = True - only_dirs = False - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- __init__.py", - "- compose", - "- docker_build", - "- docker_run", - " - execute.py", - "- user_credentials.py", - ] - ) - self.assertEqual(actual, expected) - - def test5(self) -> None: - """ - Test generating tree with only directories. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = False - include_python = False - only_dirs = True - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- compose", - "- docker_build", - "- docker_run", - ] - ) - self.assertEqual(actual, expected) - - def test6(self) -> None: - """ - Test generating tree including tests, python files, and only - directories. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = True - include_python = True - only_dirs = True - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- __init__.py", - "- compose", - "- docker_build", - "- docker_run", - " - execute.py", - "- test", - " - test_docker.py", - "- user_credentials.py", - ] - ) - self.assertEqual(actual, expected) - - def test7(self) -> None: - """ - Test writing tree to file. - """ - # Prepare inputs. - scratch = pathlib.Path(self.get_scratch_space()) - path = self.devops_dir - depth = 0 - include_tests = False - include_python = False - only_dirs = False - output = scratch / "TREE.md" - # Call tested function. - _ = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - actual = output.read_text(encoding="utf-8") - # Check output. - expected = ( - "\n".join( - [ - "", - "devops", - "- compose", - "- docker_build", - " - create_users.sh", - " - pip_list.txt", - "- docker_run", - "", - ] - ) - + "\n" - ) - self.assertEqual(actual, expected) - - def test8(self) -> None: - """ - Test updating tree on existing file, preserving comments. - """ - # Prepare inputs. - scratch = pathlib.Path(self.get_scratch_space()) - path = self.devops_dir - depth = 0 - include_tests = False - include_python = False - only_dirs = False - output = scratch / "TREE.md" - # Create existing file. - content = ( - "\n".join( - [ - "", - "devops", - "- compose # compose-comment", - "- docker_build", - " - pip_list.txt # pip-comment", - "", - ] - ) - + "\n" - ) - output.write_text(content, encoding="utf-8") - # Call tested function. - _ = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - actual = output.read_text(encoding="utf-8") - # Check output. - expected = ( - "\n".join( - [ - "", - "devops", - "- compose # compose-comment", - "- docker_build", - " - create_users.sh", - " - pip_list.txt # pip-comment", - "- docker_run", - "", - ] - ) - + "\n" - ) - self.assertEqual(actual, expected) - - def setUp(self) -> None: - """ - Create a `devops` directory in scratch space. - - Scratch directory layout: - ``` - devops - - __init__.py - - user_credentials.py - - compose - - docker_run - - execute.py - - docker_build - - create_users.sh - - pip_list.txt - - test - - TestDocker - - test_docker.py - ``` - """ - super().setUp() - scratch = self.get_scratch_space() - self.devops_dir = pathlib.Path(scratch) / "devops" - self.devops_dir.mkdir() - structure = { - "": ["__init__.py", "user_credentials.py"], - "compose": [], - "docker_run": ["execute.py"], - "docker_build": ["create_users.sh", "pip_list.txt"], - "test": ["TestDocker", "test_docker.py"], - } - # Create empty dirs and files. - for subdir, files in structure.items(): - folder = self.devops_dir / subdir if subdir else self.devops_dir - if subdir: - folder.mkdir() - for name in files: - (folder / name).touch() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py deleted file mode 100644 index 8a7135578..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py +++ /dev/null @@ -1,822 +0,0 @@ -import logging -import os -import tempfile -from typing import Generator, List, Optional - -import pytest - -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# Unfortunately we can't check the outcome of some of these functions since we -# don't know in which dir we are running. Thus we just test that the function -# completes and visually inspect the outcome, if possible. - - -# ############################################################################# -# Test_git_submodule1 -# ############################################################################# - - -class Test_git_submodule1(hunitest.TestCase): - def test_get_client_root1(self) -> None: - actual = hgit.get_client_root(super_module=True) - _LOG.debug("actual=%s", actual) - - def test_get_client_root2(self) -> None: - actual = hgit.get_client_root(super_module=False) - _LOG.debug("actual=%s", actual) - - def test_get_project_dirname1(self) -> None: - actual = hgit.get_project_dirname() - _LOG.debug("actual=%s", actual) - - def test_get_branch_name1(self) -> None: - actual = hgit.get_branch_name() - _LOG.debug("actual=%s", actual) - - def test_is_inside_submodule1(self) -> None: - actual = hgit.is_inside_submodule() - _LOG.debug("actual=%s", actual) - - # Outside CK infra, the following call hangs, so we skip it. - # TODO(gp): I don't see why it requires our infra. - @pytest.mark.requires_ck_infra - def test_is_amp(self) -> None: - actual = hgit.is_amp() - _LOG.debug("actual=%s", actual) - - def test_get_path_from_supermodule1(self) -> None: - actual = hgit.get_path_from_supermodule() - _LOG.debug("actual=%s", actual) - - def test_get_submodule_paths1(self) -> None: - actual = hgit.get_submodule_paths() - _LOG.debug("actual=%s", actual) - - -# ############################################################################# -# Test_git_submodule2 -# ############################################################################# - - -class Test_git_submodule2(hunitest.TestCase): - # def test_get_submodule_hash1(self) -> None: - # dir_name = "amp" - # _ = hgit._get_submodule_hash(dir_name) - - def test_get_remote_head_hash1(self) -> None: - dir_name = "." - actual = hgit.get_head_hash(dir_name) - _LOG.debug("actual=%s", actual) - - # def test_report_submodule_status1(self) -> None: - # dir_names = ["."] - # short_hash = True - # _ = hgit.report_submodule_status(dir_names, short_hash) - - def test_get_head_hash1(self) -> None: - dir_name = "." - actual = hgit.get_head_hash(dir_name) - _LOG.debug("actual=%s", actual) - - def _helper_group_hashes( - self, - head_hash: str, - remh_hash: str, - subm_hash: Optional[str], - expected: str, - ) -> None: - actual = hgit._group_hashes(head_hash, remh_hash, subm_hash) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_group_hashes1(self) -> None: - head_hash = "a2bfc704" - remh_hash = "a2bfc704" - subm_hash = None - expected = "head_hash = remh_hash = a2bfc704" - # - self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) - - def test_group_hashes2(self) -> None: - head_hash = "22996772" - remh_hash = "92167662" - subm_hash = "92167662" - expected = """ - head_hash = 22996772 - remh_hash = subm_hash = 92167662 - """ - # - self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) - - def test_group_hashes3(self) -> None: - head_hash = "7ea03eb6" - remh_hash = "7ea03eb6" - subm_hash = "7ea03eb6" - expected = "head_hash = remh_hash = subm_hash = 7ea03eb6" - # - self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) - - -# ############################################################################# -# Test_git_repo_name1 -# ############################################################################# - - -class Test_git_repo_name1(hunitest.TestCase): - def test_parse_github_repo_name1(self) -> None: - repo_name = "git@github.com:alphamatic/amp" - host_name, repo_name = hgit._parse_github_repo_name(repo_name) - self.assert_equal(host_name, "github.com") - self.assert_equal(repo_name, "alphamatic/amp") - - def test_parse_github_repo_name2(self) -> None: - repo_name = "https://github.com/alphamatic/amp" - hgit._parse_github_repo_name(repo_name) - host_name, repo_name = hgit._parse_github_repo_name(repo_name) - self.assert_equal(host_name, "github.com") - self.assert_equal(repo_name, "alphamatic/amp") - - def test_parse_github_repo_name3(self) -> None: - repo_name = "git@github.fake.com:alphamatic/amp" - host_name, repo_name = hgit._parse_github_repo_name(repo_name) - self.assert_equal(host_name, "github.fake.com") - self.assert_equal(repo_name, "alphamatic/amp") - - def test_parse_github_repo_name4(self) -> None: - repo_name = "https://github.fake.com/alphamatic/amp" - host_name, repo_name = hgit._parse_github_repo_name(repo_name) - self.assert_equal(host_name, "github.fake.com") - self.assert_equal(repo_name, "alphamatic/amp") - - def test_get_repo_full_name_from_dirname1(self) -> None: - actual = hgit.get_repo_full_name_from_dirname( - dir_name=".", include_host_name=False - ) - _LOG.debug("actual=%s", actual) - - def test_get_repo_full_name_from_dirname2(self) -> None: - actual = hgit.get_repo_full_name_from_dirname( - dir_name=".", include_host_name=True - ) - _LOG.debug("actual=%s", actual) - - def test_get_repo_full_name_from_client1(self) -> None: - actual = hgit.get_repo_full_name_from_client(super_module=True) - _LOG.debug("actual=%s", actual) - - def test_get_repo_full_name_from_client2(self) -> None: - actual = hgit.get_repo_full_name_from_client(super_module=False) - _LOG.debug("actual=%s", actual) - - -# ############################################################################# -# Test_git_path1 -# ############################################################################# - - -# Outside CK infra, the following class hangs, so we skip it. -@pytest.mark.requires_ck_infra -class Test_git_path1(hunitest.TestCase): - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", - ) - def test_get_path_from_git_root1(self) -> None: - file_name = "/app/helpers/test/test_hgit.py" - actual = hgit.get_path_from_git_root(file_name, super_module=True) - _LOG.debug("get_path_from_git_root()=%s", actual) - # Check. - expected = "helpers/test/test_hgit.py" - self.assert_equal(actual, expected) - - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), - reason="Run only in amp as sub-module", - ) - def test_get_path_from_git_root2(self) -> None: - file_name = "/app/amp/helpers/test/test_hgit.py" - actual = hgit.get_path_from_git_root(file_name, super_module=True) - _LOG.debug("get_path_from_git_root()=%s", actual) - # Check. - expected = "amp/helpers/test/test_hgit.py" - self.assert_equal(actual, expected) - - def test_get_path_from_git_root3(self) -> None: - file_name = "/app/amp/helpers/test/test_hgit.py" - git_root = "/app" - actual = hgit.get_path_from_git_root( - file_name, super_module=False, git_root=git_root - ) - # Check. - expected = "amp/helpers/test/test_hgit.py" - self.assert_equal(actual, expected) - - def test_get_path_from_git_root4(self) -> None: - file_name = "/app/amp/helpers/test/test_hgit.py" - git_root = "/app/amp" - actual = hgit.get_path_from_git_root( - file_name, super_module=False, git_root=git_root - ) - # Check. - expected = "helpers/test/test_hgit.py" - self.assert_equal(actual, expected) - - def test_get_path_from_git_root5(self) -> None: - file_name = "helpers/test/test_hgit.py" - git_root = "/app/amp" - with self.assertRaises(ValueError): - hgit.get_path_from_git_root( - file_name, super_module=False, git_root=git_root - ) - - -# ############################################################################# -# Test_git_modified_files1 -# ############################################################################# - - -# Outside CK infra, the following class hangs, so we skip it. -@pytest.mark.requires_ck_infra -@pytest.mark.slow(reason="Around 7s") -@pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", -) -class Test_git_modified_files1(hunitest.TestCase): - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - - def set_up_test(self) -> None: - """ - All these tests need a reference to Git master branch. - """ - hgit.fetch_origin_master_if_needed() - - def test_get_modified_files1(self) -> None: - actual = hgit.get_modified_files() - _LOG.debug("actual=%s", actual) - - def test_get_previous_committed_files1(self) -> None: - actual = hgit.get_previous_committed_files() - _LOG.debug("actual=%s", actual) - - def test_get_modified_files_in_branch1(self) -> None: - actual = hgit.get_modified_files_in_branch("master") - _LOG.debug("actual=%s", actual) - - def test_get_summary_files_in_branch1(self) -> None: - actual = hgit.get_summary_files_in_branch("master") - _LOG.debug("actual=%s", actual) - - def test_git_log1(self) -> None: - actual = hgit.git_log() - _LOG.debug("actual=%s", actual) - - -# ############################################################################# - - -# ############################################################################# -# Test_find_docker_file1 -# ############################################################################# - - -# Outside CK infra, the following class hangs, so we skip it. -@pytest.mark.requires_ck_infra -class Test_find_docker_file1(hunitest.TestCase): - def test1(self) -> None: - """ - Test for a file `amp/helpers/test/test_hgit.py` that is not from Docker - (i.e., it doesn't start with `/app`) and exists in the repo. - """ - amp_dir = hgit.get_amp_abs_path() - # Use this file since `find_docker_file()` needs to do a `find` in the - # repo, and we need to have a fixed file structure. - file_name = hgit.find_file_in_git_tree("test_hgit.py") - actual = hgit.find_docker_file( - file_name, - root_dir=amp_dir, - ) - expected = ["helpers/test/test_hgit.py"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test2(self) -> None: - """ - Test for a file `/app/amp/helpers/test/test_hgit.py` that is from - Docker (i.e., it starts with `/app`) and exists in the repo. - """ - amp_dir = hgit.get_amp_abs_path() - # Use this file since `find_docker_file()` needs to do a `find` in the - # repo, and we need to have a fixed file structure. - file_name = hgit.find_file_in_git_tree("test_hgit.py") - expected = ["helpers/test/test_hgit.py"] - actual = hgit.find_docker_file( - file_name, - root_dir=amp_dir, - ) - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test3(self) -> None: - """ - Test for a file `/venv/lib/python3.8/site-packages/invoke/tasks.py` - that is from Docker (e.g., it starts with `/app`), but doesn't exist in - the repo. - """ - file_name = "/venv/lib/python3.8/site-packages/invoke/tasks.py" - actual = hgit.find_docker_file(file_name) - expected: List[str] = [] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test4(self) -> None: - """ - Test for a file `./core/dataflow/utils.py` that is from Docker (i.e., - it starts with `/app`), but has multiple copies in the repo. - """ - amp_dir = hgit.get_amp_abs_path() - file_name = "/app/amp/core/dataflow/utils.py" - dir_depth = 1 - candidate_files = [ - "core/dataflow/utils.py", - "core/foo/utils.py", - "core/bar/utils.py", - ] - candidate_files = [os.path.join(amp_dir, f) for f in candidate_files] - actual = hgit.find_docker_file( - file_name, - root_dir=amp_dir, - dir_depth=dir_depth, - candidate_files=candidate_files, - ) - # Only one candidate file matches basename and one dirname. - expected = ["core/dataflow/utils.py"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test5(self) -> None: - amp_dir = hgit.get_amp_abs_path() - file_name = "/app/amp/core/dataflow/utils.py" - dir_depth = -1 - candidate_files = [ - "core/dataflow/utils.py", - "bar/dataflow/utils.py", - "core/foo/utils.py", - ] - candidate_files = [os.path.join(amp_dir, f) for f in candidate_files] - actual = hgit.find_docker_file( - file_name, - root_dir=amp_dir, - dir_depth=dir_depth, - candidate_files=candidate_files, - ) - # Only one file matches `utils.py` using all the 3 dir levels. - expected = ["core/dataflow/utils.py"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_extract_gh_issue_number_from_branch -# ############################################################################# - - -class Test_extract_gh_issue_number_from_branch(hunitest.TestCase): - def test_extract_gh_issue_number_from_branch1(self) -> None: - """ - Tests extraction from a branch name with a specific format. - """ - branch_name = "CmampTask10725_Add_more_tabs_to_orange_tmux" - actual = hgit.extract_gh_issue_number_from_branch(branch_name) - expected = "10725" - self.assert_equal(str(actual), expected) - - def test_extract_gh_issue_number_from_branch2(self) -> None: - """ - Tests extraction from another branch name format. - """ - branch_name = "HelpersTask23_Add_more_tabs_to_orange_tmux" - actual = hgit.extract_gh_issue_number_from_branch(branch_name) - expected = "23" - self.assert_equal(str(actual), expected) - - def test_extract_gh_issue_number_from_branch3(self) -> None: - """ - Tests extraction from a short branch name format. - """ - branch_name = "CmTask3434" - actual = hgit.extract_gh_issue_number_from_branch(branch_name) - expected = "3434" - self.assert_equal(str(actual), expected) - - def test_extract_gh_issue_number_from_branch4(self) -> None: - """ - Tests behavior when no issue number is present in the branch name. - """ - branch_name = "NoTaskNumberHere" - actual = hgit.extract_gh_issue_number_from_branch(branch_name) - expected = "None" - self.assert_equal(str(actual), expected) - - -# ############################################################################# -# Test_find_git_root1 -# ############################################################################# - - -class Test_find_git_root1(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is a super repo (e.g. //orange) - - the repo contains another super repo (e.g. //amp) as submodule (first level) - - the first level submodule contains another submodule (e.g. //helpers) (second level) - - Directory structure: - orange/ - |-- .git/ - `-- amp/ - |-- .git (points to ../.git/modules/amp) - |-- ck.infra/ - `-- helpers_root/ - `-- .git (points to ../../.git/modules/amp/modules/helpers_root) - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create `orange` repo. - self.repo_dir = os.path.join(temp_dir, "orange") - hio.create_dir(self.repo_dir, incremental=False) - self.git_dir = os.path.join(self.repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create `amp` submodule under `orange`. - self.submodule_dir = os.path.join(self.repo_dir, "amp") - hio.create_dir(self.submodule_dir, incremental=False) - submodule_git_file = os.path.join(self.submodule_dir, ".git") - txt = "gitdir: ../.git/modules/amp" - hio.to_file(submodule_git_file, txt) - submodule_git_file_dir = os.path.join( - self.repo_dir, ".git", "modules", "amp" - ) - hio.create_dir(submodule_git_file_dir, incremental=False) - # Create `helpers_root` submodule under `amp`. - self.subsubmodule_dir = os.path.join(self.submodule_dir, "helpers_root") - hio.create_dir(self.subsubmodule_dir, incremental=False) - subsubmodule_git_file = os.path.join(self.subsubmodule_dir, ".git") - txt = "gitdir: ../../.git/modules/amp/modules/helpers_root" - hio.to_file(subsubmodule_git_file, txt) - subsubmodule_git_file_dir = os.path.join( - self.repo_dir, ".git", "modules", "amp", "modules", "helpers_root" - ) - hio.create_dir(subsubmodule_git_file_dir, incremental=False) - # Create `ck.infra` runnable dir under `amp`. - self.runnable_dir = os.path.join(self.submodule_dir, "ck.infra") - hio.create_dir(self.runnable_dir, incremental=False) - - def test1(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in the super repo (e.g. //orange) - """ - self.set_up_test() - with hsystem.cd(self.repo_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test2(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in first level submodule (e.g. //amp) - """ - self.set_up_test() - with hsystem.cd(self.submodule_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test3(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in second level submodule (e.g. //helpers) - """ - self.set_up_test() - with hsystem.cd(self.subsubmodule_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test4(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in a runnable dir (e.g. ck.infra) under the - first level submodule (e.g. //amp) - """ - self.set_up_test() - with hsystem.cd(self.runnable_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - -# ############################################################################# -# Test_find_git_root2 -# ############################################################################# - - -class Test_find_git_root2(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is a super repo (e.g. //cmamp) - - the repo contains //helpers as submodule - - Directory structure: - cmamp/ - |-- .git/ - |-- ck.infra/ - `-- helpers_root/ - `-- .git (points to ../.git/modules/helpers_root) - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create `cmamp` repo. - self.repo_dir = os.path.join(temp_dir, "cmamp") - hio.create_dir(self.repo_dir, incremental=False) - self.git_dir = os.path.join(self.repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create `helpers_root` submodule under `cmamp`. - self.submodule_dir = os.path.join(self.repo_dir, "helpers_root") - hio.create_dir(self.submodule_dir, incremental=False) - submodule_git_file = os.path.join(self.submodule_dir, ".git") - txt = "gitdir: ../.git/modules/helpers_root" - hio.to_file(submodule_git_file, txt) - submodule_git_file_dir = os.path.join( - self.repo_dir, ".git", "modules", "helpers_root" - ) - hio.create_dir(submodule_git_file_dir, incremental=False) - # Create `ck.infra` runnable dir under `cmamp`. - self.runnable_dir = os.path.join(self.repo_dir, "ck.infra") - hio.create_dir(self.runnable_dir, incremental=False) - - def test1(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in the super repo (e.g. //cmamp) - """ - self.set_up_test() - with hsystem.cd(self.repo_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test2(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is the submodule (e.g. //helpers) - """ - self.set_up_test() - with hsystem.cd(self.submodule_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test3(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in a runnable dir (e.g. ck.infra) - """ - self.set_up_test() - with hsystem.cd(self.runnable_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - -# ############################################################################# -# Test_find_git_root3 -# ############################################################################# - - -class Test_find_git_root3(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is //helpers - - Directory structure: - helpers/ - |-- .git/ - `-- arbitrary1/ - `-- arbitrary1a/ - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create `helpers` repo. - self.repo_dir = os.path.join(temp_dir, "helpers") - hio.create_dir(self.repo_dir, incremental=False) - self.git_dir = os.path.join(self.repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create arbitrary directory under `helpers`. - self.arbitrary_dir = os.path.join( - self.repo_dir, "arbitrary1", "arbitrary1a" - ) - hio.create_dir(self.arbitrary_dir, incremental=False) - - def test1(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is the root of repo - """ - self.set_up_test() - with hsystem.cd(self.repo_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test2(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in an arbitrary directory under the repo - """ - self.set_up_test() - with hsystem.cd(self.arbitrary_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - -# ############################################################################# -# Test_find_git_root4 -# ############################################################################# - - -class Test_find_git_root4(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is a linked repo - - Directory structure: - repo/ - `-- .git/ - linked_repo/ - `-- .git (points to /repo/.git) - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create repo. - self.repo_dir = os.path.join(temp_dir, "repo") - hio.create_dir(self.repo_dir, incremental=False) - self.git_dir = os.path.join(self.repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create linked repo. - self.linked_repo_dir = os.path.join(temp_dir, "linked_repo") - hio.create_dir(self.linked_repo_dir, incremental=False) - # Create pointer from linked repo to the actual repo. - linked_git_file = os.path.join(self.linked_repo_dir, ".git") - txt = f"gitdir: {self.git_dir}\n" - hio.to_file(linked_git_file, txt) - - def test1(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is the linked repo - """ - self.set_up_test() - with hsystem.cd(self.linked_repo_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - -# ############################################################################# -# Test_find_git_root5 -# ############################################################################# - - -class Test_find_git_root5(hunitest.TestCase): - """ - Check that the error is raised when no .git directory is found. - - Directory structure: - arbitrary_dir/ - broken_repo/ - `-- .git (points to /nonexistent/path/to/gitdir) - """ - - @pytest.fixture(autouse=True) - def setup_teardown_test(self): - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test(self) -> None: - # `self.get_scratch_space()` does not work in the case as it creates - # a temp directory within the repo where `.git` exists by default - # (e.g. /app/helpers/test/outcomes/Test_find_git_root5.test1/tmp.scratch) - # This preventing the exception from being raised. - # We need a structure without `.git` for this test. - self.temp_dir = tempfile.TemporaryDirectory() - # Create arbitrary directory that is not a git repo. - self.arbitrary_dir = os.path.join(self.temp_dir.name, "arbitrary_dir") - hio.create_dir(self.arbitrary_dir, incremental=False) - # Create arbitrary directory that is a submodule or linked repo that - # point to non existing super repo. - self.repo_dir = os.path.join(self.temp_dir.name, "broken_repo") - hio.create_dir(self.repo_dir, incremental=False) - # Create an invalid `.git` file with a non-existent `gitdir`. - invalid_git_file = os.path.join(self.repo_dir, ".git") - txt = "gitdir: /nonexistent/path/to/gitdir" - hio.to_file(invalid_git_file, txt) - - def tear_down_test(self) -> None: - self.temp_dir.cleanup() - - def test1(self) -> None: - """ - Check that the error is raised when the caller is in a directory that - is not either a git repo or a submodule. - """ - with ( - hsystem.cd(self.arbitrary_dir), - self.assertRaises(AssertionError) as cm, - ): - _ = hgit.find_git_root(".") - actual = str(cm.exception) - expected = """ - * Failed assertion * - '/' - != - '/' - No .git directory or file found in any parent directory. - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test2(self) -> None: - """ - Check that the error is raised when the caller is in a submodule or - linked repo that points to non existing super repo. - """ - with hsystem.cd(self.repo_dir), self.assertRaises(AssertionError) as cm: - _ = hgit.find_git_root(".") - actual = str(cm.exception) - expected = """ - * Failed assertion * - '/' - != - '/' - Top-level .git directory not found. - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - -# ############################################################################# -# Test_find_git_root6 -# ############################################################################# - - -class Test_find_git_root6(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is a worktree - - Directory structure: - main_repo/ - `-- .git/ - |-- config - `-- worktrees/ - `-- csfy2/ - |-- HEAD - `-- config - csfy2/ (worktree) - `-- .git (points to /main_repo/.git/worktrees/csfy2) - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create main repo with a .git directory. - self.main_repo_dir = os.path.join(temp_dir, "main_repo") - hio.create_dir(self.main_repo_dir, incremental=False) - self.git_dir = os.path.join(self.main_repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create worktree git metadata directory. - self.worktree_git_dir = os.path.join(self.git_dir, "worktrees", "csfy2") - hio.create_dir(self.worktree_git_dir, incremental=False) - # Create worktree directory. - self.worktree_dir = os.path.join(temp_dir, "csfy2") - hio.create_dir(self.worktree_dir, incremental=False) - # Create pointer from worktree to the git directory. - worktree_git_file = os.path.join(self.worktree_dir, ".git") - txt = f"gitdir: {self.worktree_git_dir}\n" - hio.to_file(worktree_git_file, txt) - - def test1(self) -> None: - """ - Check that the function returns the worktree root when called from a worktree. - """ - self.set_up_test() - with hsystem.cd(self.worktree_dir): - git_root = hgit.find_git_root(".") - # For worktrees, the function should return the worktree root, - # not the main repository root. - self.assert_equal(git_root, self.worktree_dir) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py deleted file mode 100644 index 2e4a97ca4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py +++ /dev/null @@ -1,406 +0,0 @@ -import logging -import os -import re -from typing import Any, Callable - -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hpickle as hpickle -import helpers.hstring as hstring -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_is_pickleable -# ############################################################################# - - -def hello() -> bool: - return False - - -# ############################################################################# -# _ClassPickleable -# ############################################################################# - - -class _ClassPickleable: - """ - Class with pickleable param values. - """ - - def __init__(self) -> None: - self._arg1 = 1 - self._arg2 = ["2", 3] - - @staticmethod - def say2(self) -> None: - print("Hello") - - def say(self) -> None: - print("Hello") - - -# ############################################################################# -# _ClassNonPickleable -# ############################################################################# - - -class _ClassNonPickleable: - """ - Class with non-pickleable param values. - """ - - def __init__(self) -> None: - self._arg1 = lambda x: x - self._arg2 = 2 - - -# ############################################################################# -# Test_is_pickleable1 -# ############################################################################# - - -class Test_is_pickleable1(hunitest.TestCase): - def helper( - self, - obj: Any, - exp_str: str, - exp_bound: bool, - exp_lambda: bool, - exp_pickled: bool, - ) -> None: - _LOG.debug("obj=%s", obj) - # - act_str = str(obj) - _LOG.debug("act_str=%s", act_str) - _LOG.debug("exp_str=%s", exp_str) - self.assert_equal(act_str, exp_str, purify_text=True) - # - act_bound = hintros.is_bound_to_object(obj) - _LOG.debug("act_bound=%s", act_bound) - _LOG.debug("exp_bound=%s", exp_bound) - self.assertEqual(act_bound, exp_bound) - # - act_lambda = hintros.is_lambda_function(obj) - _LOG.debug("act_lambda=%s", act_lambda) - _LOG.debug("exp_lambda=%s", exp_lambda) - self.assertEqual(act_lambda, exp_lambda) - # Try to pickle. - try: - file_name = os.path.join(self.get_scratch_space(), "obj.pkl") - hpickle.to_pickle(obj, file_name) - act_pickled = True - except AttributeError as e: - _LOG.error("e=%s", e) - act_pickled = False - _LOG.debug("act_pickled=%s", act_pickled) - _LOG.debug("exp_pickled=%s", exp_pickled) - self.assertEqual(act_pickled, exp_pickled) - - def test_lambda1(self) -> None: - # Local lambda. - lambda_ = lambda: 0 - func = lambda_ - exp_str = r". at 0x>" - # A lambda is not bound to an object. - exp_bound = False - exp_lambda = True - # A lambda is not pickleable. - exp_pickled = False - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_lambda2(self) -> None: - lambda_ = lambda x: x - func = lambda_ - exp_str = r". at 0x>" - # A lambda is not bound to an object. - exp_bound = False - exp_lambda = True - # A lambda is not pickleable. - exp_pickled = False - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_func1(self) -> None: - def _hello() -> bool: - return False - - # - func = _hello - exp_str = ( - r"._hello at 0x>" - ) - exp_bound = False - exp_lambda = False - # A local object is not pickleable. - exp_pickled = False - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_func2(self) -> None: - # Global function. - func = hello - exp_str = r"" - exp_bound = False - exp_lambda = False - # A global function is pickleable since it's not bound locally or - # to an object. - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_method1(self) -> None: - # A class method but unbound to an object. - func = _ClassPickleable.say - exp_str = r"" - exp_bound = False - exp_lambda = False - # A unbound class method is actually pickleable. - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_method2(self) -> None: - # A static class method. - func = _ClassPickleable.say2 - exp_str = r"" - exp_bound = False - exp_lambda = False - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_method3(self) -> None: - # A bound method. - class_instance = _ClassPickleable() - func = class_instance.say - exp_str = r">" - exp_bound = True - exp_lambda = False - # A method bound to an object is just a function, so it's pickleable. - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_method4(self) -> None: - # A static class method. - class_instance = _ClassPickleable() - func = class_instance.say2 - exp_str = r"" - exp_bound = False - exp_lambda = False - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - -# ############################################################################# -# Test_is_pickleable2 -# ############################################################################# - - -class Test_is_pickleable2(hunitest.TestCase): - def helper( - self, - obj: Any, - mode: str, - expected: bool, - ) -> None: - """ - Check that picklebility is detected correctly for specified mode. - """ - _LOG.debug("obj=%s", obj) - actual = hintros.is_pickleable(obj, mode=mode) - _LOG.debug("actual=%s", actual) - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - - def test_non_callable1(self) -> None: - obj = [1, "2", 0.3] - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_non_callable2(self) -> None: - obj = [1, "2", 0.3] - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_lambda1(self) -> None: - obj = lambda x: x - mode = "type_search" - expected = False - self.helper(obj, mode, expected) - - def test_lambda2(self) -> None: - obj = lambda x: x - mode = "try_and_catch" - expected = False - self.helper(obj, mode, expected) - - def test_local_object1(self) -> None: - def _hello() -> bool: - return False - - obj = _hello - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_local_object2(self) -> None: - def _hello() -> bool: - return False - - obj = _hello - mode = "try_and_catch" - expected = False - self.helper(obj, mode, expected) - - def test_global_object1(self) -> None: - obj = hello - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_global_object2(self) -> None: - obj = hello - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_unbound_class_method1(self) -> None: - obj = _ClassPickleable.say - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_unbound_class_method2(self) -> None: - obj = _ClassPickleable.say - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_static_class_method1(self) -> None: - obj = _ClassPickleable.say - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_static_class_method2(self) -> None: - obj = _ClassPickleable.say - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_bound_to_object_method1(self) -> None: - class_instance = _ClassPickleable() - obj = class_instance.say - mode = "type_search" - expected = False - self.helper(obj, mode, expected) - - def test_bound_to_object_method2(self) -> None: - class_instance = _ClassPickleable() - obj = class_instance.say - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_pickleable_class1(self) -> None: - obj = _ClassPickleable() - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_pickleable_class2(self) -> None: - obj = _ClassPickleable() - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_nonpickleable_class1(self) -> None: - obj = _ClassNonPickleable() - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_nonpickleable_class2(self) -> None: - obj = _ClassNonPickleable() - mode = "try_and_catch" - expected = False - self.helper(obj, mode, expected) - - -# ############################################################################# -# Test_get_function_name1 -# ############################################################################# - - -def test_function() -> None: - pass - - -# ############################################################################# -# Test_get_function_name1 -# ############################################################################# - - -class Test_get_function_name1(hunitest.TestCase): - def test1(self) -> None: - actual = hintros.get_function_name() - expected = "test1" - self.assert_equal(actual, expected, purify_text=True) - - -# ############################################################################# -# Test_get_name_from_function1 -# ############################################################################# - - -class Test_get_name_from_function1(hunitest.TestCase): - def test1(self) -> None: - actual = hintros.get_name_from_function(test_function) - actual = hstring.remove_prefix(actual, "amp.", assert_on_error=False) - expected = "helpers.test.test_hintrospection.test_function" - self.assert_equal(actual, expected, purify_text=True) - - -# ############################################################################# -# Test_get_function_from_string1 -# ############################################################################# - - -def dummy_function() -> None: - pass - - -# ############################################################################# -# Test_get_function_from_string1 -# ############################################################################# - - -class Test_get_function_from_string1(hunitest.TestCase): - def test1(self) -> None: - """ - Test that function is correctly extracted from a string. - """ - func_str = "helpers.test.test_hintrospection.dummy_function" - # Compute the actual value. - act_func = hintros.get_function_from_string(func_str) - actual = hintros.get_name_from_function(act_func) - actual = hstring.remove_prefix(actual, "amp.", assert_on_error=False) - # Compute the expected value. - exp_func = dummy_function - expected = hintros.get_name_from_function(exp_func) - expected = hstring.remove_prefix(expected, "amp.", assert_on_error=False) - # Run. - hdbg.dassert_isinstance(act_func, Callable) - # The function can have different names depending on whether `helpers` - # is a sub-repo or a super-repo: - # helpers.test.test_hintrospection.dummy_function - # helpers_root.helpers.test.test_hintrospection.dummy_function - # - actual = re.sub( - r"helpers_root\.helpers\.", "helpers.", actual, flags=re.MULTILINE - ) - expected = re.sub( - r"helpers_root\.helpers\.", "helpers.", expected, flags=re.MULTILINE - ) - self.assert_equal(actual, expected, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py deleted file mode 100644 index cbf1f16f3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py +++ /dev/null @@ -1,225 +0,0 @@ -import logging -import os - -import numpy as np -import pandas as pd - -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_rename_file_if_exists -# ############################################################################# - - -class Test_rename_file_if_exists(hunitest.TestCase): - """ - Test that the function renames existing files correctly. - """ - - def check_file( - self, - file_to_rename: str, - before_extension: bool, - expected_file_name: str, - ) -> None: - """ - Check that file is renamed correctly. - """ - # Create a target file to rename. - scratch_dir = self.get_scratch_space() - file_name = "test_file.txt" - file_path = os.path.join(scratch_dir, file_name) - lines = "" - hio.to_file(file_path, lines) - # Rename the file. - file_to_rename = os.path.join(scratch_dir, file_to_rename) - suffix = "suffix" - hio.rename_file_if_exists( - file_to_rename, suffix, before_extension=before_extension - ) - # Check that file is renamed. - expected_file_path = os.path.join(scratch_dir, expected_file_name) - self.assertTrue(os.path.exists(expected_file_path)) - - def test1(self) -> None: - """ - Test that suffix is added before an extension. - """ - file_to_rename = "test_file.txt" - before_extension = True - expected_file_name = "test_file.suffix.txt" - self.check_file(file_to_rename, before_extension, expected_file_name) - - def test2(self) -> None: - """ - Test that suffix is added after an extension. - """ - file_to_rename = "test_file.txt" - before_extension = False - expected_file_name = "test_file.txt.suffix" - self.check_file(file_to_rename, before_extension, expected_file_name) - - def test3(self) -> None: - """ - Test that non-existing file is not renamed. - """ - file_to_rename = "not_exist.txt" - before_extension = False - expected_file_name = "not_exist.txt" - with self.assertRaises(AssertionError): - self.check_file(file_to_rename, before_extension, expected_file_name) - - -# ############################################################################# -# Test_find_all_files1 -# ############################################################################# - - -class Test_find_all_files1(hunitest.TestCase): - def test1(self) -> None: - dir_name = hgit.get_client_root(super_module=False) - # Check that there are files. - pattern = "*" - only_files = True - use_relative_paths = True - all_files = hio.listdir( - dir_name, pattern, only_files, use_relative_paths - ) - self.assertGreater(len(all_files), 0) - # Check that there are more files than Python files. - exclude_paired_jupytext = False - py_files = hio.keep_python_files(all_files, exclude_paired_jupytext) - self.assertGreater(len(py_files), 0) - self.assertGreater(len(all_files), len(py_files)) - # Check that there are more Python files than not paired Python files. - exclude_paired_jupytext = True - not_paired_py_files = hio.keep_python_files( - all_files, exclude_paired_jupytext - ) - self.assertGreater(len(not_paired_py_files), 0) - self.assertGreater(len(py_files), len(not_paired_py_files)) - - -# ############################################################################# -# Test_change_filename_extension1 -# ############################################################################# - - -class Test_change_filename_extension1(hunitest.TestCase): - def test1(self) -> None: - file_name = "./core/dataflow_model/notebooks/Master_experiment_runner.py" - actual = hio.change_filename_extension(file_name, "py", "ipynb") - expected = ( - "./core/dataflow_model/notebooks/Master_experiment_runner.ipynb" - ) - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_load_df_from_json -# ############################################################################# - - -class Test_load_df_from_json(hunitest.TestCase): - def test1(self) -> None: - test_json_path = os.path.join(self.get_input_dir(), "test.json") - actual_result = hio.load_df_from_json(test_json_path) - expected_result = pd.DataFrame( - { - "col1": ["a", "b", "c", "d"], - "col2": ["a", "b", np.nan, np.nan], - "col3": ["a", "b", "c", np.nan], - } - ) - actual_result = hpandas.df_to_str(actual_result) - expected_result = hpandas.df_to_str(expected_result) - self.assertEqual(actual_result, expected_result) - - -# ############################################################################# -# Test_safe_rm_file -# ############################################################################# - - -class Test_safe_rm_file(hunitest.TestCase): - def test_successful_removal_within_git_client(self) -> None: - """ - Test successful removal of directory within Git client. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - test_dir = os.path.join(scratch_dir, "test_dir_to_remove") - os.makedirs(test_dir) - # Create a test file in the directory to ensure it has content - test_file = os.path.join(test_dir, "test_file.txt") - hio.to_file(test_file, "test content") - # Verify directory exists before removal - self.assertTrue(os.path.exists(test_dir)) - # Run test. - hio.safe_rm_file(test_dir) - # Check output. - self.assertFalse(os.path.exists(test_dir)) - - def test_removal_of_nested_directory(self) -> None: - """ - Test removal of deeply nested directory structure. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - nested_dir = os.path.join(scratch_dir, "parent", "child", "grandchild") - os.makedirs(nested_dir) - # Create files at different levels - hio.to_file(os.path.join(nested_dir, "file1.txt"), "content1") - hio.to_file( - os.path.join(os.path.dirname(nested_dir), "file2.txt"), "content2" - ) - parent_dir = os.path.join(scratch_dir, "parent") - # Verify directory exists - self.assertTrue(os.path.exists(parent_dir)) - # Run test. - hio.safe_rm_file(parent_dir) - # Check output. - self.assertFalse(os.path.exists(parent_dir)) - - def test_directory_does_not_exist(self) -> None: - """ - Test that function raises assertion error for non-existent directory. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - non_existent_dir = os.path.join(scratch_dir, "non_existent_directory") - # Ensure directory doesn't exist - self.assertFalse(os.path.exists(non_existent_dir)) - # Run test and check output. - with self.assertRaises(AssertionError) as cm: - hio.safe_rm_file(non_existent_dir) - self.assertIn("does not exist", str(cm.exception)) - - def test_cannot_delete_git_root(self) -> None: - """ - Test that function prevents deletion of Git client root directory. - """ - # Prepare inputs. - git_root = hgit.find_git_root() - # Run test and check output. - with self.assertRaises(AssertionError) as cm: - hio.safe_rm_file(git_root) - self.assertIn("Cannot delete Git client root", str(cm.exception)) - - def test_directory_outside_git_client_rejected(self) -> None: - """ - Test that function rejects directories outside Git client. - """ - # Prepare inputs. - # Use /tmp which should be outside any Git client - outside_dir = "/tmp" - # Run test and check output. - with self.assertRaises(AssertionError) as cm: - hio.safe_rm_file(outside_dir) - self.assertIn("is not within Git client root", str(cm.exception)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py deleted file mode 100644 index 70450e943..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py +++ /dev/null @@ -1,665 +0,0 @@ -""" -Unit tests for hlatex module. - -This module tests LaTeX text processing utilities including: -- Removing LaTeX formatting commands -- Detecting LaTeX line separators -- Framing sections with separator lines -- Detecting LaTeX comments -- Extracting section headers and their hierarchy -""" - -import logging - -import helpers.hlatex as hlatex -import helpers.hmarkdown_headers as hmarhead -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -# ############################################################################# -# Test_remove_latex_formatting1 -# ############################################################################# - - -class Test_remove_latex_formatting1(hunitest.TestCase): - """ - Test the remove_latex_formatting function. - """ - - def test1(self) -> None: - """ - Test removal of textcolor commands from LaTeX text. - """ - # Prepare inputs. - txt = r""" - - If there is \textcolor{red}{no pattern}, we can try learning: - - Measure if \textcolor{blue}{learning works}. - - In the \textcolor{orange}{worst case}, conclude that it - \textcolor{green}{does not work}. - - If we can find the \textcolor{purple}{solution in one step} or - \textcolor{cyan}{program the solution}: - - \textcolor{brown}{Machine learning} is not the \textcolor{teal}{recommended - technique}, but it still works. - - Without \textcolor{magenta}{data}, we cannot do anything: - \textcolor{violet}{data is all that matters}. - """ - txt = hprint.dedent(txt) - # Prepare outputs. - expected = r""" - - If there is no pattern, we can try learning: - - Measure if learning works. - - In the worst case, conclude that it - does not work. - - If we can find the solution in one step or - program the solution: - - Machine learning is not the recommended - technique, but it still works. - - Without data, we cannot do anything: - data is all that matters.""" - expected = hprint.dedent(expected) - # Run test. - actual = hlatex.remove_latex_formatting(txt) - # Check outputs. - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_is_latex_line_separator1 -# ############################################################################# - - -class Test_is_latex_line_separator1(hunitest.TestCase): - """ - Test the _is_latex_line_separator function. - """ - - def test1(self) -> None: - """ - Test that a line with repeated # characters is recognized as separator. - """ - # Prepare inputs. - line = "% ##########" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertTrue(actual) - - def test2(self) -> None: - """ - Test that a line with repeated = characters is recognized as separator. - """ - # Prepare inputs. - line = "% ==========" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertTrue(actual) - - def test3(self) -> None: - """ - Test that a line with repeated - characters is recognized as separator. - """ - # Prepare inputs. - line = "% ----------" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertTrue(actual) - - def test4(self) -> None: - """ - Test that a line with too few repeated characters is not a separator. - """ - # Prepare inputs. - line = "% ####" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertFalse(actual) - - def test5(self) -> None: - """ - Test that a regular comment is not recognized as separator. - """ - # Prepare inputs. - line = "% This is a regular comment" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertFalse(actual) - - -# ############################################################################# -# Test_frame_sections1 -# ############################################################################# - - -class Test_frame_sections1(hunitest.TestCase): - """ - Test the frame_sections function. - """ - - def helper(self, input_txt: str, expected: str) -> None: - """ - Helper method to test frame_sections function. - - :param input_txt: Input LaTeX text - :param expected: Expected output after processing - """ - # Prepare inputs. - lines = hprint.dedent(input_txt) - lines = lines.split("\n") - # Run test. - actual = hlatex.frame_sections(lines) - actual = "\n".join(actual) - # Prepare outputs. - expected = hprint.dedent(expected) - # Check outputs. - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test adding separator before a single section command. - """ - # Prepare inputs. - input_txt = r""" - \section{Introduction} - This is the introduction. - """ - # Prepare outputs. - expected = r""" - % ############################################################################## - \section{Introduction} - This is the introduction. - """ - # Run test. - self.helper(input_txt, expected) - - def test2(self) -> None: - """ - Test adding separators before section, subsection, and subsubsection. - """ - # Prepare inputs. - input_txt = r""" - \section{Proposed framework} - - \subsection{Combining Physics-Informed and Data-Driven Approaches} - - \subsubsection{Detailed Analysis} - """ - # Prepare outputs. - expected = r""" - % ############################################################################## - \section{Proposed framework} - - % ============================================================================== - \subsection{Combining Physics-Informed and Data-Driven Approaches} - - % ------------------------------------------------------------------------------ - \subsubsection{Detailed Analysis} - """ - # Run test. - self.helper(input_txt, expected) - - def test3(self) -> None: - """ - Test that existing separators are removed and replaced with correct ones. - """ - # Prepare inputs. - input_txt = r""" - % ============== - \section{Introduction} - - % ############## - \subsection{Background} - """ - # Prepare outputs. - expected = r""" - % ############################################################################## - \section{Introduction} - - % ============================================================================== - \subsection{Background} - """ - # Run test. - self.helper(input_txt, expected) - - def test4(self) -> None: - """ - Test that multiple consecutive empty lines are reduced to one. - """ - # Prepare inputs. - input_txt = r""" - \section{Introduction} - - - - This is text after multiple empty lines. - """ - # Prepare outputs. - expected = r""" - % ############################################################################## - \section{Introduction} - - This is text after multiple empty lines. - """ - # Run test. - self.helper(input_txt, expected) - - def test5(self) -> None: - """ - Test with mixed content including text, sections, and empty lines. - """ - # Prepare inputs. - input_txt = r""" - This is some introductory text. - - \section{Methods} - - We describe the methods here. - - - \subsection{Data Collection} - - Details about data collection. - - \subsubsection{Sampling Strategy} - - Sampling details here. - """ - # Prepare outputs. - expected = r""" - This is some introductory text. - - % ############################################################################## - \section{Methods} - - We describe the methods here. - - % ============================================================================== - \subsection{Data Collection} - - Details about data collection. - - % ------------------------------------------------------------------------------ - \subsubsection{Sampling Strategy} - - Sampling details here. - """ - # Run test. - self.helper(input_txt, expected) - - def test6(self) -> None: - """ - Test that lines without section commands are left unchanged. - """ - # Prepare inputs. - input_txt = r""" - This is regular text. - No sections here. - Just content. - """ - # Prepare outputs. - expected = r""" - This is regular text. - No sections here. - Just content. - """ - # Run test. - self.helper(input_txt, expected) - - -# ############################################################################# -# Test_is_latex_comment -# ############################################################################# - - -class Test_is_latex_comment(hunitest.TestCase): - """ - Test the _is_latex_comment function. - """ - - def test1(self) -> None: - """ - Test that a line starting with % is recognized as a comment. - """ - # Prepare inputs. - line = "% This is a comment" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertTrue(actual) - - def test2(self) -> None: - """ - Test that a line with leading whitespace and % is a comment. - """ - # Prepare inputs. - line = " % This is a comment" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertTrue(actual) - - def test3(self) -> None: - """ - Test that a regular line is not recognized as a comment. - """ - # Prepare inputs. - line = "This is regular text" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertFalse(actual) - - def test4(self) -> None: - """ - Test that a line with escaped % character is not a comment. - """ - # Prepare inputs. - line = r"The value is \% of the total" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertFalse(actual) - - def test5(self) -> None: - """ - Test that a line with % in the middle is not a comment. - """ - # Prepare inputs. - line = r"Text before \% and after" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertFalse(actual) - - def test6(self) -> None: - """ - Test that a line with only % is a comment. - """ - # Prepare inputs. - line = "%" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertTrue(actual) - - -# ############################################################################# -# Test_extract_latex_section -# ############################################################################# - - -class Test_extract_latex_section(hunitest.TestCase): - """ - Test the _extract_latex_section function. - """ - - def helper( - self, line: str, expected_level: int, expected_title: str - ) -> None: - """ - Helper method to test extraction of LaTeX section commands. - - :param line: LaTeX line to parse - :param expected_level: Expected section level (0 if no section) - :param expected_title: Expected title (empty string if no section) - """ - # Prepare inputs - line_number is arbitrary for testing. - line_number = 1 - # Run test. - header_info = hlatex._extract_latex_section(line, line_number) - # Check outputs. - if expected_level == 0: - # No section expected. - self.assertIsNone(header_info) - else: - # Section expected. - self.assertIsNotNone(header_info) - self.assert_equal(str(header_info.level), str(expected_level)) - self.assert_equal(header_info.description, expected_title) - - def test1(self) -> None: - """ - Test extraction of basic section command. - """ - line = r"\section{Introduction}" - self.helper(line, 1, "Introduction") - - def test2(self) -> None: - """ - Test extraction of basic subsection command. - """ - line = r"\subsection{Background}" - self.helper(line, 2, "Background") - - def test3(self) -> None: - """ - Test extraction of basic subsubsection command. - """ - line = r"\subsubsection{Details}" - self.helper(line, 3, "Details") - - def test4(self) -> None: - """ - Test extraction of section with nested LaTeX commands. - """ - line = r"\section{Introduction to \textbf{Machine Learning}}" - self.helper(line, 1, r"Introduction to \textbf{Machine Learning}") - - def test5(self) -> None: - """ - Test extraction of section with optional short title. - """ - line = r"\section[Short Title]{Long Title for Table of Contents}" - # Should extract the long title (in curly braces). - self.helper(line, 1, "Long Title for Table of Contents") - - def test6(self) -> None: - """ - Test extraction of section with escaped special characters. - """ - line = r"\section{Cost Analysis: \$100 \& More}" - self.helper(line, 1, r"Cost Analysis: \$100 \& More") - - def test7(self) -> None: - """ - Test extraction of section with leading whitespace. - """ - line = r" \section{Methods}" - self.helper(line, 1, "Methods") - - def test8(self) -> None: - """ - Test that a regular line is not recognized as a section. - """ - line = "This is regular text" - self.helper(line, 0, "") - - def test9(self) -> None: - """ - Test that section with empty title is not extracted. - """ - line = r"\section{}" - # Sections with empty titles should not be extracted. - self.helper(line, 0, "") - - -# ############################################################################# -# Test_extract_headers_from_latex -# ############################################################################# - - -class Test_extract_headers_from_latex(hunitest.TestCase): - """ - Test the extract_headers_from_latex function. - """ - - def helper(self, lines: str, expected: str, *, max_level: int = 3) -> None: - """ - Helper method to test header extraction from LaTeX documents. - - :param lines: LaTeX document content as a string - :param expected: Expected string representation of header list - :param max_level: Maximum header level to extract (default: 3) - """ - # Prepare inputs. - lines_list = hprint.dedent(lines).split("\n") - # Run test. - actual = hlatex.extract_headers_from_latex( - lines_list, max_level, sanity_check=False - ) - actual_str = hmarhead.header_list_to_str(actual) - # Prepare outputs. - expected = hprint.dedent(expected) - # Check outputs. - self.assert_equal(actual_str, expected) - - def test1(self) -> None: - """ - Test extraction from a basic LaTeX document with multiple section levels. - """ - # Prepare inputs. - lines = r""" - \section{Introduction} - This is the introduction. - - \subsection{Background} - Background information here. - - \section{Methods} - Methods description. - """ - # Prepare outputs. - expected = """ - HeaderInfo(1, 'Introduction', 1) - HeaderInfo(2, 'Background', 4) - HeaderInfo(1, 'Methods', 7)""" - # Run test. - self.helper(lines, expected) - - def test2(self) -> None: - """ - Test that commented-out sections are skipped. - """ - # Prepare inputs. - lines = r""" - \section{Introduction} - % \section{Old Section} - \subsection{Current Subsection} - % \subsection{Old Subsection} - """ - # Prepare outputs. - expected = """ - HeaderInfo(1, 'Introduction', 1) - HeaderInfo(2, 'Current Subsection', 3)""" - # Run test. - self.helper(lines, expected) - - def test3(self) -> None: - """ - Test that only headers up to max_level are extracted. - """ - # Prepare inputs. - lines = r""" - \section{Chapter 1} - \subsection{Section 1.1} - \subsubsection{Section 1.1.1} - """ - # Prepare outputs. - # Should only get section and subsection, not subsubsection. - expected = """ - HeaderInfo(1, 'Chapter 1', 1) - HeaderInfo(2, 'Section 1.1', 2)""" - # Run test. - self.helper(lines, expected, max_level=2) - - def test4(self) -> None: - """ - Test extraction with nested LaTeX commands in titles. - """ - # Prepare inputs. - lines = r""" - \section{Introduction to \textbf{ML}} - \subsection{Using \emph{Neural Networks}} - """ - # Prepare outputs. - expected = r""" - HeaderInfo(1, 'Introduction to \textbf{ML}', 1) - HeaderInfo(2, 'Using \emph{Neural Networks}', 2)""" - # Run test. - self.helper(lines, expected) - - def test5(self) -> None: - """ - Test that line numbers are correctly recorded. - """ - # Prepare inputs. - lines = r""" - Some text here. - - \section{First Section} - More text. - - \subsection{First Subsection} - Even more text. - """ - # Prepare outputs. - # Line numbers should be 3 and 6 (1-indexed). - expected = """ - HeaderInfo(1, 'First Section', 3) - HeaderInfo(2, 'First Subsection', 6)""" - # Run test. - self.helper(lines, expected) - - def test6(self) -> None: - """ - Test extraction from document with no sections. - """ - # Prepare inputs. - lines = """ - This is just regular text. - No sections here. - """ - # Prepare outputs. - expected = "" - # Run test. - self.helper(lines, expected) - - def test7(self) -> None: - """ - Test extraction with all three section levels. - """ - # Prepare inputs. - lines = r""" - \section{Chapter 1} - Introduction to chapter. - - \subsection{Section 1.1} - Section content. - - \subsubsection{Subsection 1.1.1} - Detailed content. - - \subsection{Section 1.2} - More content. - - \section{Chapter 2} - Second chapter. - """ - # Prepare outputs. - expected = """ - HeaderInfo(1, 'Chapter 1', 1) - HeaderInfo(2, 'Section 1.1', 4) - HeaderInfo(3, 'Subsection 1.1.1', 7) - HeaderInfo(2, 'Section 1.2', 10) - HeaderInfo(1, 'Chapter 2', 13)""" - # Run test. - self.helper(lines, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py deleted file mode 100644 index f8d9b237d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py +++ /dev/null @@ -1,176 +0,0 @@ -import logging -from typing import List, Optional - -import helpers.hlist as hlist -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_list_find_duplicates1 -# ############################################################################# - - -class Test_list_find_duplicates1(hunitest.TestCase): - def test1(self) -> None: - list_ = "a b c d".split() - list_out = hlist.find_duplicates(list_) - self.assertEqual(list_out, []) - - def test2(self) -> None: - list_ = "a b c a d e f f".split() - list_out = hlist.find_duplicates(list_) - self.assertEqual(set(list_out), set("a f".split())) - - -# ############################################################################# -# Test_list_remove_duplicates1 -# ############################################################################# - - -class Test_list_remove_duplicates1(hunitest.TestCase): - def test1(self) -> None: - list_ = "a b c d".split() - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "a b c d".split()) - - def test2(self) -> None: - list_ = "a b c a d e f f".split() - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "a b c d e f".split()) - - def test3(self) -> None: - list_ = "a b c a d e f f".split() - list_ = list(reversed(list_)) - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "f e d a c b".split()) - - -# ############################################################################# -# Test_list_extract1 -# ############################################################################# - - -class Test_list_extract1(hunitest.TestCase): - def _helper( - self, - start_idx: Optional[int], - end_idx: Optional[int], - expected_list: List[str], - ) -> None: - list_ = "a b c d".split() - actual_list = hlist.extract(list_, start_idx, end_idx) - self.assertEqual(actual_list, expected_list) - - def test1(self) -> None: - start_idx = 0 - end_idx = 1 - expected_list = "a".split() - self._helper(start_idx, end_idx, expected_list) - - def test2(self) -> None: - start_idx = 1 - end_idx = None - expected_list = "b c d".split() - self._helper(start_idx, end_idx, expected_list) - - def test3(self) -> None: - start_idx = None - end_idx = None - expected_list = "a b c d".split() - self._helper(start_idx, end_idx, expected_list) - - def test4(self) -> None: - start_idx = None - end_idx = 2 - expected_list = "a b".split() - self._helper(start_idx, end_idx, expected_list) - - def test5(self) -> None: - start_idx = None - end_idx = 2 - expected_list = "a b".split() - self._helper(start_idx, end_idx, expected_list) - - def test6(self) -> None: - start_idx = 0 - end_idx = 4 - expected_list = "a b c d".split() - self._helper(start_idx, end_idx, expected_list) - - def test7(self) -> None: - start_idx = 0 - end_idx = 3 - expected_list = "a b c".split() - self._helper(start_idx, end_idx, expected_list) - - -# ############################################################################# -# Test_list_chunk1 -# ############################################################################# - - -class Test_list_chunk1(hunitest.TestCase): - def _helper(self, n: int, expected_list: List[List[str]]) -> None: - list_ = "a b c d e f".split() - actual_list = hlist.chunk(list_, n) - self.assertEqual(actual_list, expected_list) - - def test1(self) -> None: - n = 1 - expected_list = ["a b c d e f".split()] - self._helper(n, expected_list) - - def test2(self) -> None: - n = 2 - expected_list = [["a", "b", "c"], ["d", "e", "f"]] - self._helper(n, expected_list) - - def test3(self) -> None: - n = 3 - expected_list = [["a", "b"], ["c", "d"], ["e", "f"]] - self._helper(n, expected_list) - - def test4(self) -> None: - n = 4 - expected_list = [["a", "b"], ["c", "d"], ["e"], ["f"]] - self._helper(n, expected_list) - - def test5(self) -> None: - n = 6 - expected_list = [["a"], ["b"], ["c"], ["d"], ["e"], ["f"]] - self._helper(n, expected_list) - - -# ############################################################################# -# Test_list1 -# ############################################################################# - - -class Test_list1(hunitest.TestCase): - def test_find_duplicates1(self) -> None: - list_ = "a b c d".split() - list_out = hlist.find_duplicates(list_) - self.assertEqual(list_out, []) - - def test_find_duplicates2(self) -> None: - list_ = "a b c a d e f f".split() - list_out = hlist.find_duplicates(list_) - self.assertEqual(set(list_out), set("a f".split())) - - def test_remove_duplicates1(self) -> None: - list_ = "a b c d".split() - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "a b c d".split()) - - def test_remove_duplicates2(self) -> None: - list_ = "a b c a d e f f".split() - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "a b c d e f".split()) - - def test_remove_duplicates3(self) -> None: - list_ = "a b c a d e f f".split() - list_ = list(reversed(list_)) - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "f e d a c b".split()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py deleted file mode 100644 index 820d21519..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py +++ /dev/null @@ -1,361 +0,0 @@ -import os -import types -import unittest.mock as umock -from typing import Any, Dict - -import pandas as pd -import pytest - -pytest.importorskip("openai") # noqa: E402 # pylint: disable=wrong-import-position -import helpers.hdbg as hdbg # noqa: E402 -import helpers.hllm as hllm # noqa: E402 -import helpers.hunit_test as hunitest # noqa: E402 - -_USER_PROMPT1 = "what is machine learning?" -_USER_PROMPT2 = _USER_PROMPT1.upper() - -_SYSTEM_PROMPT1 = "You are a helpful AI assistant." -_SYSTEM_PROMPT2 = ( - "You are a helpful AI assistant and excellent in explaining things." -) - -_TEMPERATURE1 = 0.1 -_TEMPERATURE2 = 0.2 - -_TOP_P1 = 0.5 - -_MODEL1 = "gpt-4o-mini" -_MODEL2 = "gpt-3.5-turbo" -_MODEL3 = "deepseek/deepseek-r1-0528-qwen3-8b:free" -_MODEL4 = "openai/gpt-4o-mini" - - -# Test functions for the unit tests. -def _get_completion_parameters1() -> Dict[str, Any]: - data = { - "user_prompt": _USER_PROMPT1, - "system_prompt": _SYSTEM_PROMPT1, - "temperature": _TEMPERATURE1, - "model": _MODEL1, - } - return data - - -def _get_completion_parameters2() -> Dict[str, Any]: - data = { - "user_prompt": _USER_PROMPT2, - "system_prompt": _SYSTEM_PROMPT2, - "temperature": _TEMPERATURE2, - "model": _MODEL2, - "top_p": _TOP_P1, - } - return data - - -def _get_completion_parameters3() -> Dict[str, Any]: - data = { - "user_prompt": _USER_PROMPT2, - "system_prompt": _SYSTEM_PROMPT2, - "temperature": _TEMPERATURE2, - "model": _MODEL3, - "top_p": _TOP_P1, - } - return data - - -def _get_completion_parameters4() -> Dict[str, Any]: - data = { - "user_prompt": _USER_PROMPT1, - "system_prompt": _SYSTEM_PROMPT1, - "temperature": _TEMPERATURE1, - "model": _MODEL4, - } - return data - - -# ############################################################################# -# Test_get_completion -# ############################################################################# - - -class Test_get_completion(hunitest.TestCase): - def test1(self) -> None: - """ - Verify that get_completion() returns response from cache with the - expected response. - """ - parameters1 = _get_completion_parameters1() - actual_response = hllm.get_completion( - **parameters1, cache_mode="HIT_CACHE_OR_ABORT" - ) - self.assertIsInstance(actual_response, str) - self.check_string(actual_response) - - def test2(self) -> None: - """ - Verify with different openai models. - """ - parameters2 = _get_completion_parameters2() - actual_response = hllm.get_completion( - **parameters2, cache_mode="HIT_CACHE_OR_ABORT" - ) - self.assertIsInstance(actual_response, str) - self.check_string(actual_response) - - def test3(self) -> None: - """ - Verify if hllm.get_completion() support openrouter models. - """ - parameters3 = _get_completion_parameters3() - actual_response = hllm.get_completion( - **parameters3, cache_mode="HIT_CACHE_OR_ABORT" - ) - self.assertIsInstance(actual_response, str) - self.check_string(actual_response) - - def test4(self) -> None: - """ - Verify with OpenAI-prefixed models. - """ - parameters4 = _get_completion_parameters4() - actual_response = hllm.get_completion( - **parameters4, cache_mode="HIT_CACHE_OR_ABORT" - ) - self.assertIsInstance(actual_response, str) - self.check_string(actual_response) - - -# ############################################################################# -# Test_response_to_txt -# ############################################################################# - - -class Test_response_to_txt(hunitest.TestCase): - # Dummy classes to satisfy `isinstance` checks. - - class DummyChatCompletion: - def __init__(self, text: str = "") -> None: - msg = types.SimpleNamespace(content=text) - choice = types.SimpleNamespace(message=msg) - self.choices = [choice] - - class DummyThreadMessage: - def __init__(self, text: str = "") -> None: - # mimic .content[0].text.value - value_obj = types.SimpleNamespace(value=text) - text_obj = types.SimpleNamespace(text=value_obj) - self.content = [text_obj] - - @umock.patch( - "openai.types.chat.chat_completion.ChatCompletion", - new=DummyChatCompletion, - ) - def test_chat_completion_branch(self) -> None: - resp = Test_response_to_txt.DummyChatCompletion("hello chat") - actual = hllm.response_to_txt(resp) - expected = "hello chat" - self.assert_equal(actual, expected) - - @umock.patch( - "openai.types.beta.threads.message.Message", - new=DummyThreadMessage, - ) - def test_thread_message_branch(self) -> None: - resp = Test_response_to_txt.DummyThreadMessage("thread reply") - actual = hllm.response_to_txt(resp) - expected = "thread reply" - self.assert_equal(actual, expected) - - def test_str_pass_through(self) -> None: - actual = hllm.response_to_txt("just a string") - expected = "just a string" - self.assert_equal(actual, expected) - - def test_unknown_type_raises(self) -> None: - with self.assertRaises(ValueError) as cm: - hllm.response_to_txt(12345) - self.assertIn("Unknown response type", str(cm.exception)) - - -# ############################################################################# -# Test_retrieve_openrouter_model_info -# ############################################################################# - - -class Test_retrieve_openrouter_model_info(hunitest.TestCase): - @umock.patch("requests.get") - def test_retrieve_success(self, mock_get) -> None: - # Prepare dummy JSON data. - data = [ - {"id": "model1", "name": "Model One"}, - {"id": "model2", "name": "Model Two"}, - ] - mock_response = umock.Mock() - mock_response.json.return_value = {"data": data} - mock_get.return_value = mock_response - # Call the function under test. - df = hllm._retrieve_openrouter_model_info() - # Build expected DataFrame. - expected_df = pd.DataFrame(data) - # Verify DataFrame content. - self.assertEqual( - df.to_dict(orient="records"), expected_df.to_dict(orient="records") - ) - # Ensure the correct URL was requested. - mock_get.assert_called_once_with("https://openrouter.ai/api/v1/models") - - @umock.patch("requests.get") - def test_missing_data_key_raises(self, mock_get) -> None: - # JSON missing the 'data' key. - mock_response = umock.Mock() - mock_response.json.return_value = {"wrong": []} - mock_get.return_value = mock_response - # Expect an assertion from hdbg.dassert_eq. - with self.assertRaises(AssertionError): - hllm._retrieve_openrouter_model_info() - - -# ############################################################################# -# Test_save_models_info_to_csv -# ############################################################################# - - -class Test_save_models_info_to_csv(hunitest.TestCase): - def get_temp_path(self) -> str: - """ - Helper function for creating temporary directory. - """ - self.tmp_dir = self.get_scratch_space() - tmp_file_name = "tmp.models_info.csv" - self.tmp_path = os.path.join(self.tmp_dir, tmp_file_name) - return self.tmp_path - - def test_save_models_info(self) -> None: - """ - Save Dataframe as a CSV and check. - """ - # Prepare a DataFrame with extra columns. - data = [ - { - "id": "m1", - "name": "Model1", - "description": "desc1", - "pricing": {"prompt": 0.1, "completion": 0.2}, - "supported_parameters": ["a", "b"], - "extra_col": 123, - }, - { - "id": "m2", - "name": "Model2", - "description": "desc2", - "pricing": {"prompt": 0.3, "completion": 0.4}, - "supported_parameters": ["c"], - "extra_col": 456, - }, - ] - df = pd.DataFrame(data) - output_file: str = self.get_temp_path() - # Call the function under test. - returned_df = hllm._save_models_info_to_csv(df, output_file) - # The returned DataFrame should have only the selected columns. - expected_columns = [ - "id", - "name", - "description", - "prompt_pricing", - "completion_pricing", - "supported_parameters", - ] - hdbg.dassert_eq(list(returned_df.columns), expected_columns) - # Verify pricing values are extracted correctly. - self.assert_equal( - str(returned_df["prompt_pricing"]), - str(pd.Series([0.1, 0.3], name="prompt_pricing", dtype=float)), - ) - self.assert_equal( - str(returned_df["completion_pricing"]), - str(pd.Series([0.2, 0.4], name="completion_pricing", dtype=float)), - ) - # File should be created and readable. - hdbg.dassert_file_exists(output_file) - saved_df = pd.read_csv(output_file) - self.assert_equal( - str(returned_df["completion_pricing"]), - str(saved_df["completion_pricing"]), - ) - self.assert_equal( - str(returned_df["prompt_pricing"]), str(saved_df["prompt_pricing"]) - ) - - -# ############################################################################# -# Test_calculate_cost -# ############################################################################# - - -class Test_calculate_cost(hunitest.TestCase): - def get_tmp_path(self) -> str: - """ - Return temporary file path. - """ - self.tmp_dir = self.get_scratch_space() - tmp_file_name: str = "tmp.models_info.csv" - self.tmp_path = os.path.join(self.tmp_dir, tmp_file_name) - return self.tmp_path - - def test_openai_cost(self) -> None: - """ - Known OpenAI model and token counts produce expected cost. - """ - comp = types.SimpleNamespace( - usage=types.SimpleNamespace( - prompt_tokens=1000000, completion_tokens=2000000 - ) - ) - llm_cost_tracker = hllm.LLMCostTracker() - cost = llm_cost_tracker.calculate_cost( - comp, model="gpt-3.5-turbo", models_info_file="" - ) - # 1000000*(0.5/1000000) + 20000000*(1.5/1000000) = 3.5 - self.assertAlmostEqual(cost, 3.5) - - def test_openai_unknown_model(self) -> None: - """ - Passing an unknown OpenAI model should raise an assertion or - ValueError. - """ - comp = types.SimpleNamespace( - usage=types.SimpleNamespace(prompt_tokens=1, completion_tokens=1) - ) - llm_cost_tracker = hllm.LLMCostTracker() - with pytest.raises(AssertionError): - llm_cost_tracker.calculate_cost( - comp, model="nonexistent-model", models_info_file="" - ) - - def test_openrouter_load_existing_csv(self) -> None: - """ - Assume that the CSV file exists for OpenRouter. - - Then we should load CSV and calculate cost without fetching. - """ - # Write a tiny CSV: id,prompt_pricing,completion_pricing - temp_csv_file = self.get_tmp_path() - pd.DataFrame( - { - "id": ["deepseek/m1"], - "prompt_pricing": [0.1], - "completion_pricing": [0.2], - } - ).to_csv(temp_csv_file, index=False) - comp = types.SimpleNamespace( - usage=types.SimpleNamespace(prompt_tokens=1, completion_tokens=1) - ) - llm_cost_tracker = hllm.LLMCostTracker() - cost = llm_cost_tracker.calculate_cost( - comp, - model="deepseek/m1", - models_info_file=temp_csv_file, - ) - # 1*0.1 + 1*0.2 = 0.1 + 0.2 = 0.3 - self.assertAlmostEqual(cost, 0.3) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py deleted file mode 100644 index fc684420b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py +++ /dev/null @@ -1,1403 +0,0 @@ -import logging -import os -import time -from typing import Callable, Dict, Optional - -import pandas as pd -import pytest - -import helpers.hcache_simple as hcacsimp -import helpers.hio as hio -import helpers.hllm_cli as hllmcli -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -from helpers.test.test_hcache_simple import _BaseCacheTest - -_LOG = logging.getLogger(__name__) - -# Disable calling LLM when testing. -_RUN_REAL_LLM = False -# _RUN_REAL_LLM = True - -# ############################################################################# -# Test_apply_llm_with_files -# ############################################################################# - -# Test cases shared across both library and executable tests. -# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. -_TEST_CASES = [ - # llm_cli.py --input_file input.txt --output_file output.txt - ( - "Basic usage with input file", - {}, - ), - # llm_cli.py --input_file input.txt --output_file output.txt --system_prompt "You are a helpful math assistant. Solve the problem step by step." - ( - "With custom system prompt", - { - "system_prompt": "You are a helpful math assistant. Solve the problem step by step." - }, - ), - # llm_cli.py --input_file input.txt --output_file output.txt --model gpt-4 - ( - "With specific model selection", - {"model": "gpt-4"}, - ), - # llm_cli.py --input_file input.txt --output_file output.txt --expected_num_chars 500 - ( - "With progress bar (expected character count)", - {"expected_num_chars": 500}, - ), - # llm_cli.py --input_file input.txt --output_file output.txt --system_prompt "You are a helpful assistant that provides concise answers" --model gpt-4o-mini --expected_num_chars 1000 - ( - "Complete example with all options", - { - "system_prompt": "You are a helpful assistant that provides concise answers", - "model": "gpt-4o-mini", - "expected_num_chars": 1000, - }, - ), -] - -# Test cases for input_text functionality. -# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. -_TEST_CASES_INPUT_TEXT = [ - # llm_cli.py --input_text "2+2=" --output_file output.txt - ( - "Basic usage with input text", - { - "input_text": "2+2=", - }, - ), - # llm_cli.py --input_text "What is Python?" --output_file output.txt --system_prompt "You are a helpful assistant" - ( - "With input text and system prompt", - { - "input_text": "What is Python?", - "system_prompt": "You are a helpful assistant", - }, - ), - # llm_cli.py --input_text "Explain recursion" --output_file output.txt --model gpt-4o-mini - ( - "With input text and specific model", - { - "input_text": "Explain recursion", - "model": "gpt-4o-mini", - }, - ), -] - -# Test cases for print_only functionality. -# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. -_TEST_CASES_PRINT_ONLY = [ - # llm_cli.py --input_text "2+2=" --output_file - - ( - "Print to screen with input text", - { - "input_text": "2+2=", - "print_only": True, - }, - ), -] - - -# ############################################################################# -# TestApplyLlmBase -# ############################################################################# - - -class TestApplyLlmBase(_BaseCacheTest): - """ - Base class with helper methods for testing apply_llm functions. - - Provides common helper methods used across different test classes to - reduce code duplication and maintain consistency. - """ - - def _run_test_cases(self, use_llm_executable: bool) -> None: - """ - Helper method to run test cases with specified interface. - - :param use_llm_executable: if True, use CLI executable; if False, use library - """ - # Get scratch space for test files. - scratch_dir = self.get_scratch_space() - # Create input file. - input_file = os.path.join(scratch_dir, "input.txt") - hio.to_file(input_file, "2+2=") - # Run each test case. - for idx, (description, kwargs) in enumerate(_TEST_CASES, 1): - _LOG.info("Running test case %d: %s", idx, description) - output_file = os.path.join(scratch_dir, f"output_{idx}.txt") - # Run test. - hllmcli.apply_llm_with_files( - input_file=input_file, - output_file=output_file, - use_llm_executable=use_llm_executable, - **kwargs, - ) - # Check that output file was created. - self.assertTrue(os.path.exists(output_file)) - # Check that output file is not empty. - output_content = hio.from_file(output_file) - self.assertGreater(len(output_content), 0) - - def _run_test_cases_input_text(self, use_llm_executable: bool) -> None: - """ - Helper method to run input_text test cases with specified interface. - - :param use_llm_executable: if True, use CLI executable; if False, use library - """ - # Get scratch space for test files. - scratch_dir = self.get_scratch_space() - # Run each test case. - for idx, (description, kwargs) in enumerate(_TEST_CASES_INPUT_TEXT, 1): - _LOG.info("Running test case %d: %s", idx, description) - output_file = os.path.join(scratch_dir, f"output_text_{idx}.txt") - # Extract input_text from kwargs. - kwargs_copy = kwargs.copy() - input_text = kwargs_copy.pop("input_text") - # Run test using apply_llm directly. - response = hllmcli.apply_llm( - input_text, - use_llm_executable=use_llm_executable, - **kwargs_copy, - ) - # Write output to file. - hio.to_file(output_file, response) - # Check that output file was created. - self.assertTrue(os.path.exists(output_file)) - # Check that output file is not empty. - output_content = hio.from_file(output_file) - self.assertGreater(len(output_content), 0) - - -# ############################################################################# -# Test_apply_llm_with_files1 -# ############################################################################# - - -@pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", -) -class Test_apply_llm_with_files1(TestApplyLlmBase): - """ - Test apply_llm_with_files using both library and executable interfaces. - - Tests run various command-line configurations to ensure they execute - without errors. Does not verify output correctness. - """ - - def test_library(self) -> None: - """ - Test multiple command-line configurations using library interface. - - Tests various command-line argument combinations to ensure they - execute without errors. Does not verify output correctness. - """ - self._run_test_cases(use_llm_executable=False) - - @pytest.mark.skipif( - not hllmcli._check_llm_executable(), reason="llm executable not found" - ) - def test_executable(self) -> None: - """ - Test multiple command-line configurations using executable interface. - - Tests various command-line argument combinations to ensure they - execute without errors. Does not verify output correctness. - """ - self._run_test_cases(use_llm_executable=True) - - -# ############################################################################# -# Test_apply_llm_with_files2 -# ############################################################################# - - -@pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", -) -class Test_apply_llm_with_files2(TestApplyLlmBase): - def test1_library(self) -> None: - """ - Test input_text parameter using library interface. - - Tests that input_text parameter works correctly when text is provided - directly instead of from a file. Does not verify output correctness. - """ - self._run_test_cases_input_text(use_llm_executable=False) - - @pytest.mark.skipif( - not hllmcli._check_llm_executable(), reason="llm executable not found" - ) - def test1_executable(self) -> None: - """ - Test input_text parameter using executable interface. - - Tests that input_text parameter works correctly when text is provided - directly instead of from a file. Does not verify output correctness. - """ - self._run_test_cases_input_text(use_llm_executable=True) - - # ////////////////////////////////////////////////////////////////////////// - - def _run_test_cases_print_only(self, use_llm_executable: bool) -> None: - """ - Helper method to run print_only test cases with specified interface. - - :param use_llm_executable: if True, use CLI executable; if False, use library - """ - # Run each test case. - for idx, (description, kwargs) in enumerate(_TEST_CASES_PRINT_ONLY, 1): - _LOG.info("Running test case %d: %s", idx, description) - # Extract parameters from kwargs. - kwargs_copy = kwargs.copy() - input_text = kwargs_copy.pop("input_text") - kwargs_copy.pop("print_only") # Not needed for apply_llm - # Run test using apply_llm directly - this should print to stdout. - response = hllmcli.apply_llm( - input_text, - use_llm_executable=use_llm_executable, - **kwargs_copy, - ) - # Print response to stdout (simulating print_only behavior). - print(response) - - def test2_library(self) -> None: - """ - Test print_only parameter using library interface. - - Tests that print_only parameter works correctly when output should be - printed to screen instead of written to file. Does not verify output - correctness. - """ - self._run_test_cases_print_only(use_llm_executable=False) - - @pytest.mark.skipif( - not hllmcli._check_llm_executable(), reason="llm executable not found" - ) - def test2_executable(self) -> None: - """ - Test print_only parameter using executable interface. - - Tests that print_only parameter works correctly when output should be - printed to screen instead of written to file. Does not verify output - correctness. - """ - self._run_test_cases_print_only(use_llm_executable=True) - - -# ############################################################################# -# Test_llm1 -# ############################################################################# - - -@pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", -) -class Test_llm1(hunitest.TestCase): - """ - Test _llm() function with different models and prompt lengths. - - Tests verify that _llm() correctly processes prompts of varying lengths - across different models, and tracks timing and cost information. - """ - - @staticmethod - def get_short_prompt() -> str: - """ - Get a short test prompt. - - :return: short system prompt string - """ - prompt = "You are a helpful assistant. Answer concisely." - return prompt - - @staticmethod - def get_medium_prompt() -> str: - """ - Get a medium-length test prompt. - - :return: medium-length system prompt string - """ - prompt = """ - You are a helpful assistant. Your task is to provide clear and - accurate answers to questions. Always be concise but thorough in - your explanations. If you don't know something, acknowledge it. - Use simple language that anyone can understand. - """ - prompt = hprint.dedent(prompt) - return prompt - - @staticmethod - def get_long_prompt() -> str: - """ - Get a long test prompt. - - :return: long system prompt string - """ - prompt = """ - You are a highly knowledgeable AI assistant with expertise across - multiple domains including technology, science, mathematics, and - general knowledge. Your primary objectives are: - - 1. Provide accurate and well-researched information - 2. Explain concepts clearly and thoroughly - 3. Use examples when they help clarify complex topics - 4. Cite sources or acknowledge uncertainty when appropriate - 5. Adapt your language to the user's level of understanding - 6. Break down complex problems into manageable steps - 7. Verify calculations and logical reasoning before responding - 8. Consider multiple perspectives when discussing controversial topics - - When answering questions: - - Start with a direct answer to the question - - Follow with supporting details and context - - Use bullet points or numbered lists for clarity - - Provide examples when helpful - - Suggest follow-up resources if relevant - - Always maintain a professional, helpful, and respectful tone. - """ - prompt = hprint.dedent(prompt) - return prompt - - def test1(self) -> None: - """ - Test _llm() with multiple models and prompt lengths. - - Tests short, medium, and long prompts across different models to - verify proper handling and cost calculation. Reports results in a - comprehensive table with time, cost, and cost-per-character metrics. - """ - hcacsimp.set_cache_property("_test_llm", "mode", "DISABLE_CACHE") - # Define test configurations with model-specific inputs. - # Questions are designed to elicit longer responses for more accurate cost - # comparisons. - test_configs = [ - ( - "gpt-5-nano", - "Explain the concept of machine learning and provide examples of its applications in real-world scenarios.", - ), - ( - "gpt-4o-mini", - "Describe the history and culture of Paris, France, including its major landmarks and contributions to art and literature.", - ), - ( - "gpt-4o", - "Explain what recursion is in computer science, provide multiple examples with code, and discuss when to use recursion versus iteration.", - ), - ] - # Store results for tabular reporting. - results = [] - # Run tests for each model and prompt type combination. - for model, input_str in test_configs: - for prompt_type, prompt_getter in [ - ("short", self.get_short_prompt), - ("medium", self.get_medium_prompt), - ("long", self.get_long_prompt), - ]: - _LOG.info("Testing model=%s with %s prompt", model, prompt_type) - system_prompt = prompt_getter() - # Run test. - start_time = time.time() - response, cost = hllmcli._llm(system_prompt, input_str, model) - elapsed_time = time.time() - start_time - # Check outputs. - self.assertIsInstance(response, str) - self.assertGreater(len(response), 0) - self.assertIsInstance(cost, float) - self.assertGreaterEqual(cost, 0.0) - # Calculate cost per character and cost per 1M characters. - response_len = len(response) - cost_per_char = cost / response_len if response_len > 0 else 0.0 - cost_per_1m_chars = ( - cost_per_char * 1_000_000 if response_len > 0 else 0.0 - ) - # Store results. - results.append( - { - "Model": model, - "Prompt Type": prompt_type, - "Time (s)": elapsed_time, - "Cost ($)": cost, - "Response Length": response_len, - "Cost/Char ($)": cost_per_char, - "Cost/1M Chars ($)": cost_per_1m_chars, - } - ) - # Create DataFrame for tabular display. - results_df = pd.DataFrame(results) - # Format numeric columns. - results_df["Time (s)"] = results_df["Time (s)"].round(2) - results_df["Cost ($)"] = results_df["Cost ($)"].round(6) - results_df["Cost/Char ($)"] = results_df["Cost/Char ($)"].round(8) - results_df["Cost/1M Chars ($)"] = results_df["Cost/1M Chars ($)"].round( - 2 - ) - # Log results table. - _LOG.info("\n%s", hprint.frame("LLM Test Results")) - with pd.option_context( - "display.max_columns", - None, - "display.max_rows", - None, - "display.width", - None, - "display.max_colwidth", - None, - ): - _LOG.info("\n%s", results_df.to_string(index=False)) - - -# ############################################################################# -# Test_apply_llm_batch1 -# ############################################################################# - - -def _eval_functor(input_str: str, *, delay: float = 0.0) -> str: - """ - Evaluate the input string using eval and return the result as a string. - - :param input_str: mathematical expression to evaluate - :return: result of evaluation as a string - """ - _LOG.debug("input_str='%s'", input_str) - if delay > 0.0: - time.sleep(delay) - result = eval(input_str) - result_str = str(result) - _LOG.debug("-> result_str='%s'", result_str) - return result_str - - -# ############################################################################# -# Test_apply_llm_batch1 -# ############################################################################# - - -class Test_apply_llm_batch1(hunitest.TestCase): - """ - Test and compare three batch processing approaches. - - Tests: - - apply_llm_batch_individual() - - apply_llm_batch_with_shared_prompt() - - apply_llm_batch_combined() - to verify they return consistent results using a testing functor that uses - eval. - """ - - @staticmethod - def get_test_prompt() -> str: - """ - Get a simple test prompt for batch processing. - - :return: system prompt string - """ - prompt = "You are a calculator. Return only the numeric result." - return prompt - - def helper( - self, - model: str, - func: Callable, - testing_functor: Optional[Callable[[str], str]], - ) -> None: - """ - Helper function to run a batch processing function with test inputs. - - :param func: batch processing function to test - :param testing_functor: optional testing functor for mocking - """ - _LOG.trace(hprint.to_str("model func testing_functor")) - # Create test inputs. - prompt = self.get_test_prompt() - input_list = ["2 + 2", "3 * 3", "10 - 5", "20 / 4"] - expected_responses = ["4", "9", "5", "5"] - # Run the function. - responses, cost = func( - prompt=prompt, - input_list=input_list, - model=model, - testing_functor=testing_functor, - ) - # Check basic properties. - responses = [str(int(float(r))) for r in responses] - self.assertEqual(responses, expected_responses) - if testing_functor is None: - self.assertGreater(cost, 0.0) - else: - self.assertEqual(cost, 0.0) - - @pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", - ) - def test_individual1(self) -> None: - """ - Test apply_llm_batch_individual without testing_functor. - - This test uses the real LLM API. - """ - model = "gpt-5-nano" - func = hllmcli.apply_llm_batch_individual - testing_functor = None - self.helper( - model, - func, - testing_functor, - ) - - def test_individual2(self) -> None: - """ - Test apply_llm_batch_individual with testing_functor. - - This test uses a mock calculator instead of the real LLM API. - """ - model = "" - func = hllmcli.apply_llm_batch_individual - testing_functor = _eval_functor - self.helper( - model, - func, - testing_functor, - ) - - @pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", - ) - def test_shared1(self) -> None: - """ - Test apply_llm_batch_with_shared_prompt without testing_functor. - - This test uses the real LLM API. - """ - model = "gpt-5-nano" - func = hllmcli.apply_llm_batch_with_shared_prompt - testing_functor = None - self.helper( - model, - func, - testing_functor, - ) - - def test_shared2(self) -> None: - """ - Test apply_llm_batch_with_shared_prompt with testing_functor. - - This test uses a mock calculator instead of the real LLM API. - """ - model = "" - func = hllmcli.apply_llm_batch_with_shared_prompt - testing_functor = _eval_functor - self.helper( - model, - func, - testing_functor, - ) - - @pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", - ) - def test_combined1(self) -> None: - """ - Test apply_llm_batch_combined without testing_functor. - - This test uses the real LLM API. - """ - model = "gpt-5-nano" - # model = "gpt-4o-mini" - func = hllmcli.apply_llm_batch_combined - testing_functor = None - self.helper( - model, - func, - testing_functor, - ) - - def test_combined2(self) -> None: - """ - Test apply_llm_batch_combined with testing_functor. - - This test uses a mock calculator instead of the real LLM API. - """ - model = "" - func = hllmcli.apply_llm_batch_combined - testing_functor = _eval_functor - self.helper( - model, - func, - testing_functor, - ) - - -# ############################################################################# -# Test_apply_llm_prompt_to_df1 -# ############################################################################# - - -class Test_apply_llm_prompt_to_df1(hunitest.TestCase): - """ - Test apply_llm_prompt_to_df with testing_functor. - - This is used to test the logic around `apply_llm_batch_*()` functions. - """ - - @staticmethod - def _extract_expression(obj) -> str: - """ - Extract mathematical expression from a DataFrame row or string. - - :param obj: either a string or a pandas Series - :return: extracted string for evaluation - """ - if isinstance(obj, pd.Series): - # Extract from DataFrame row. - if "expression" in obj.index: - expr = obj["expression"] - # Handle None, NaN, or empty string. - if pd.isna(expr) or expr == "": - return "" - return str(expr) - return "" - else: - # Already a string. - if pd.isna(obj) or obj == "": - return "" - return str(obj) - - def helper( - self, - df: pd.DataFrame, - batch_size: int, - expected_df: pd.DataFrame, - expected_stats: Dict[str, int], - ) -> None: - """ - Test apply_llm_prompt_to_df with testing_functor that uses eval. - """ - # Prepare inputs. - prompt = "Dummy" - extractor = self._extract_expression - # To test the progress bar. - # delay = 0.5 - delay = 0.0 - testing_functor = lambda input_str: _eval_functor(input_str, delay=delay) - # Run test. - result_df, stats = hllmcli.apply_llm_prompt_to_df( - prompt=prompt, - df=df, - extractor=extractor, - target_col="result", - batch_mode="individual", - batch_size=batch_size, - model="gpt-5-nano", - testing_functor=testing_functor, - use_sys_stderr=True, - ) - # Check outputs. - self.assert_equal(str(result_df), str(expected_df)) - elapsed_time = stats.pop("elapsed_time_in_seconds") - self.assertGreater(elapsed_time, 0.0) - self.assertEqual(stats, expected_stats) - - def helper_test1(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with testing_functor that uses eval. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": ["2 + 3", "10 * 5", "100 - 25", "15 / 3"], - } - ) - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": ["2 + 3", "10 * 5", "100 - 25", "15 / 3"], - "result": ["5", "50", "75", "5.0"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 0, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - def helper_test2(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with larger dataframe and batch_size > 1. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": [ - "1 + 1", - "2 * 3", - "10 - 5", - "20 / 4", - "3 ** 2", - "100 // 3", - "15 % 4", - ], - } - ) - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": [ - "1 + 1", - "2 * 3", - "10 - 5", - "20 / 4", - "3 ** 2", - "100 // 3", - "15 % 4", - ], - "result": ["2", "6", "5", "5.0", "9", "33", "3"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 0, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - def helper_test3(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with pre-filled target column values. - - This test verifies that all rows are processed and pre-filled values - are overwritten with computed results from the testing_functor. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": [ - "5 + 5", - "3 * 4", - "20 - 8", - "16 / 2", - "2 ** 3", - ], - } - ) - # Pre-fill some values in the target column. - df["result"] = [None, "12", None, None, "8"] - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": [ - "5 + 5", - "3 * 4", - "20 - 8", - "16 / 2", - "2 ** 3", - ], - "result": ["10", "12", "12", "8.0", "8"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 0, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - def helper_test4(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with rows that have empty extraction results. - - This test verifies that rows with empty or None expressions are skipped - and marked with empty string in the result column. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": ["5 + 5", "", "10 + 10", None, "15 + 15"], - } - ) - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": ["5 + 5", "", "10 + 10", None, "15 + 15"], - "result": ["10", "", "20", "", "30"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 2, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - def helper_test5(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with batch where all items have missing data. - - This test verifies that batches with all empty/None items are skipped - entirely and the else branch is executed. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": ["1 + 1", "", None, "", "5 + 5"], - } - ) - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": ["1 + 1", "", None, "", "5 + 5"], - "result": ["2", "", "", "", "10"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 3, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - # batch_size=1 - - def test1_num_batch1(self) -> None: - self.helper_test1(batch_size=1) - - def test2_num_batch1(self) -> None: - self.helper_test2(batch_size=1) - - def test3_num_batch1(self) -> None: - self.helper_test3(batch_size=1) - - def test4_num_batch1(self) -> None: - self.helper_test4(batch_size=1) - - def test5_num_batch1(self) -> None: - self.helper_test5(batch_size=1) - - # batch_size=2 - - def test1_num_batch2(self) -> None: - self.helper_test1(batch_size=2) - - def test2_num_batch2(self) -> None: - self.helper_test2(batch_size=2) - - def test3_num_batch2(self) -> None: - self.helper_test3(batch_size=2) - - def test4_num_batch2(self) -> None: - self.helper_test4(batch_size=2) - - def test5_num_batch2(self) -> None: - self.helper_test5(batch_size=2) - - # batch_size=3 - - def test1_num_batch3(self) -> None: - self.helper_test1(batch_size=3) - - def test2_num_batch3(self) -> None: - self.helper_test2(batch_size=3) - - def test3_num_batch3(self) -> None: - self.helper_test3(batch_size=3) - - def test4_num_batch3(self) -> None: - self.helper_test4(batch_size=3) - - def test5_num_batch3(self) -> None: - self.helper_test5(batch_size=3) - - # batch_size=10 - - def test1_num_batch10(self) -> None: - self.helper_test1(batch_size=10) - - def test2_num_batch10(self) -> None: - self.helper_test2(batch_size=10) - - def test3_num_batch10(self) -> None: - self.helper_test3(batch_size=10) - - def test4_num_batch10(self) -> None: - self.helper_test4(batch_size=10) - - def test5_num_batch10(self) -> None: - self.helper_test5(batch_size=10) - - -# ############################################################################# -# Test_apply_llm_prompt_to_df2 -# ############################################################################# - - -# TODO(gp): Convert this into a unit test for apply_llm_prompt. -class Test_apply_llm_prompt_to_df2(_BaseCacheTest): - """ - Test apply_llm_prompt_to_df with mocked cache. - """ - - @staticmethod - def get_test_prompt() -> str: - """ - Get a simple test prompt for LLM. - - This prompt asks the LLM to sum two numbers, providing a simple - and predictable test case. - - :return: system prompt string - """ - prompt = """ - You are a calculator. Given input in the format "a + b", return only - the sum as a number. - - Return ONLY the numeric result, nothing else. - """ - prompt = hprint.dedent(prompt) - return prompt - - @staticmethod - def extract_test_fields(obj) -> str: - """ - Extract test fields from a DataFrame row or string. - - :param obj: either a string or a pandas Series - :return: extracted string for LLM processing - """ - if isinstance(obj, pd.Series): - # Extract from DataFrame row. - if "num1" in obj.index and "num2" in obj.index: - num1 = obj["num1"] - num2 = obj["num2"] - return f"{num1} + {num2}" - return "" - else: - # Already a string. - return obj - - def create_test_df(self) -> pd.DataFrame: - """ - Create a minimal DataFrame with test data (2 rows). - """ - df = pd.DataFrame( - { - "num1": [2, 10], - "num2": [3, 15], - } - ) - return df - - def run_cached_apply_llm_prompt_to_df(self) -> None: - prompt = self.get_test_prompt() - df = self.create_test_df() - prompt = self.get_test_prompt() - extractor = self.extract_test_fields - result_df, _ = hllmcli.apply_llm_prompt_to_df( - prompt=prompt, - df=df, - extractor=extractor, - target_col="sum", - batch_mode="individual", - model="gpt-5-nano", - batch_size=10, - use_sys_stderr=True, - ) - _LOG.debug("result_df=%s", result_df) - # Check outputs. - expected_df = pd.DataFrame( - { - "num1": [2, 10], - "num2": [3, 15], - "sum": ["5", "25"], - } - ) - self.assert_equal(str(result_df), str(expected_df)) - - @pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", - ) - def test1(self) -> None: - """ - Warm up cache by calling apply_llm and save cache to file. - - This test creates a cache by calling apply_llm with test data, - then saves the cache to a file for use in subsequent tests. - """ - # Create a file with the cache content for test2 in the input directory. - input_dir = self.get_input_dir( - test_class_name=self.__class__.__name__, - test_method_name="test2", - ) - hcacsimp.set_cache_dir(input_dir) - # Call apply_llm to warm up the cache for both inputs. - self.run_cached_apply_llm_prompt_to_df() - # Flush the cache to disk to ensure it's saved. - hcacsimp.flush_cache_to_disk("_llm") - func_cache_data = hcacsimp.get_disk_cache("_llm") - # Check that the cache file exists and is not empty. - hcacsimp.sanity_check_function_cache( - func_cache_data, assert_on_empty=True - ) - - def test2(self) -> None: - """ - Test apply_llm_prompt_to_df with mocked cache. - - This test - - loads the cache file created in test1 - - mocks the cache with the data from the cache file - - verifies that apply_llm_prompt_to_df uses the cached values without - hitting the LLM API. - """ - # Prepare inputs. - # # Set up temporary cache directory. - scratch_dir = self.get_scratch_space() - hcacsimp.set_cache_dir(scratch_dir) - # Load the saved cache file from test2's input directory. - input_dir = self.get_input_dir() - # Load the cache data from the cache file. - cache_file = os.path.join(input_dir, "tmp.cache_simple._llm.json") - _LOG.debug("cache_file=%s", cache_file) - func_cache_data = hcacsimp._load_func_cache_data_from_file( - cache_file, "json" - ) - _LOG.debug("func_cache_data=%s", func_cache_data) - hcacsimp.sanity_check_function_cache( - func_cache_data, assert_on_empty=True - ) - _LOG.debug("Loaded func_cache_data=\n%s", func_cache_data) - hcacsimp.mock_cache_from_disk("_llm", func_cache_data) - try: - # Set abort_on_cache_miss to ensure we don't hit the LLM API. - hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", True) - # Run apply_llm_prompt_to_df with mocked cache. - self.run_cached_apply_llm_prompt_to_df() - finally: - # Reset the cache property. - hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", False) - - def test3(self) -> None: - """ - Test apply_llm_prompt_to_df without mocked cache. - - This test verifies that apply_llm_prompt_to_df raises an error when the - cache is missed and abort_on_cache_miss=True. - """ - # Set up temporary cache directory. - scratch_dir = self.get_scratch_space() - hcacsimp.set_cache_dir(scratch_dir) - try: - # Set abort_on_cache_miss to ensure we don't hit the LLM API. - hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", True) - with self.assertRaises(ValueError) as fail: - # Run apply_llm_prompt_to_df without mocked cache. - self.run_cached_apply_llm_prompt_to_df() - self.assertIn("Cache miss", str(fail.exception)) - finally: - # Reset the cache property. - hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", False) - - -# ############################################################################# -# Test_apply_llm_batch_cost_comparison -# ############################################################################# - - -@pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", -) -class Test_apply_llm_batch_cost_comparison(hunitest.TestCase): - """ - Test and compare costs of different batch processing approaches. - - Tests both direct batch function calls and apply_llm_prompt_to_df with - different batch modes. - """ - - @staticmethod - def get_person_industry_prompt() -> str: - """ - Get the industry classification prompt for testing. - - :return: system prompt string - """ - prompt = """ - Given the following list of industries with examples, classify the text into the - corresponding industry: - - Industrial & Built Environment - - Transportation & Logistics - - Consumer & Retail - - Technology & Digital Services - - Health & Life Sciences - - Finance & Professional Services - - Public & Social Sector - - Media, Marketing & Experiences - - You MUST report the industry exactly as one of the options above. Do not - include any other text. - If you are not sure about the industry, return "unknown". - """ - prompt = hprint.dedent(prompt) - return prompt - - @staticmethod - def get_test_industries() -> list: - """ - Get a list of test company descriptions for industry classification. - - :return: list of company descriptions - """ - industries = [ - "A company that sells fresh produce and operates farms", - "A car manufacturer that produces electric vehicles", - "A construction company specializing in residential buildings", - "A company that manufactures consumer electronics and appliances", - "An online learning platform providing courses for students", - "An electric utility company providing power generation services", - "A civil engineering firm providing infrastructure design", - "A company organizing corporate events and conferences", - "A bank providing retail banking and investment services", - "A nonprofit organization focused on environmental conservation", - "A hospital providing emergency and surgical medical services", - "A staffing agency providing recruitment and temp worker services", - "A data center company providing server hardware and infrastructure", - "A software development company creating enterprise resource planning systems", - "A cybersecurity firm providing threat detection and penetration testing", - "A cloud infrastructure provider offering scalable computing resources", - "An IT company providing network management and server maintenance", - "A consulting firm helping businesses integrate SAP and Oracle systems", - "A help desk company providing 24/7 technical support services", - "A data analytics company building business intelligence dashboards", - "A DevOps company providing CI/CD pipeline automation tools", - "A law firm specializing in corporate mergers and acquisitions", - "A shipping company providing international freight and logistics", - "A factory manufacturing industrial machinery and equipment", - "An advertising agency creating brand campaigns for consumer products", - "A streaming service providing movies and TV shows online", - "A pharmaceutical company developing new drugs and vaccines", - "A commercial real estate firm managing office building portfolios", - "An online retailer selling clothing and accessories through eCommerce", - "A sports equipment manufacturer producing gear for athletes", - "A telecommunications company providing mobile and internet services", - "A hotel chain operating luxury resorts and vacation properties", - ] - return industries - - def helper(self, model: str, batch_size: int) -> None: - """ - Compare costs and time of different batch modes in apply_llm_prompt_to_df. - - This test compares the performance of three batch modes: - 1. individual: processes each query separately - 2. shared_prompt: uses shared prompt context - 3. combined: combines all queries into single API call - """ - # Reset cache before each batch mode to ensure fair comparison. - hcacsimp.set_cache_dir(self.get_scratch_space()) - _LOG.info("Cache directory: %s", hcacsimp.get_cache_dir()) - hcacsimp.reset_cache("", interactive=False) - # Prepare inputs. - prompt = self.get_person_industry_prompt() - industries = self.get_test_industries() - testing_functor = None - # Create DataFrame from test data. - df = pd.DataFrame({"description": industries}) - - # Extractor function to get text from DataFrame row. - def extractor(obj): - if isinstance(obj, pd.Series): - return obj["description"] - return str(obj) - - # Test each batch mode. - batch_modes = ["individual", "shared_prompt", "combined"] - results = [] - # Store result DataFrames to compare across batch modes. - result_dfs = {} - for batch_mode in batch_modes: - _LOG.info( - "\n%s", hprint.frame("Testing batch mode: %s" % batch_mode) - ) - # Create a copy of the DataFrame for this batch mode. - df_copy = df.copy() - # Call apply_llm_prompt_to_df with the current batch mode. - result_df, stats = hllmcli.apply_llm_prompt_to_df( - prompt=prompt, - df=df_copy, - extractor=extractor, - target_col="industry", - batch_mode=batch_mode, - model=model, - batch_size=batch_size, - testing_functor=testing_functor, - use_sys_stderr=True, - ) - # Get elapsed time from stats. - elapsed_time = stats["elapsed_time_in_seconds"] - # Print time and cost for this batch mode. - _LOG.info( - "Batch mode '%s': Time=%.2fs, Cost=$%.6f", - batch_mode, - elapsed_time, - stats["total_cost_in_dollars"], - ) - # Store results. - results.append( - { - "Batch Mode": batch_mode, - "Time (s)": elapsed_time, - "Num Items": stats["num_items"], - "Num Skipped": stats["num_skipped"], - "Num Batches": stats["num_batches"], - "Total Cost ($)": stats["total_cost_in_dollars"], - } - ) - # Store result DataFrame for comparison. - result_dfs[batch_mode] = result_df - # Verify results. - self.assertEqual(len(result_df), len(industries)) - self.assertIn("industry", result_df.columns) - # Check that all batch modes produce the same results. - # Compare each batch mode's results with the first batch mode. - first_batch_mode = batch_modes[0] - first_result_df = result_dfs[first_batch_mode]["industry"].reset_index( - drop=True - ) - for batch_mode in batch_modes[1:]: - compare_result_df = result_dfs[batch_mode]["industry"].reset_index( - drop=True - ) - # Create a comparison DataFrame between the two batch modes. - match_df = pd.DataFrame( - { - first_batch_mode: first_result_df, - batch_mode: compare_result_df, - } - ) - # Add a column with whether they match or not. - match_df["Match"] = ( - match_df[first_batch_mode] == match_df[batch_mode] - ) - all_match = match_df["Match"].all() - if not all_match: - _LOG.error( - "Results mismatch between '%s' and '%s':\n%s", - first_batch_mode, - batch_mode, - match_df, - ) - _LOG.info( - "Results match between '%s' and '%s'", - first_batch_mode, - batch_mode, - ) - # Create comparison DataFrame. - comparison_df = pd.DataFrame(results) - # Add relative metrics compared to individual mode. - individual_time = comparison_df.loc[ - comparison_df["Batch Mode"] == "individual", "Time (s)" - ].iloc[0] - individual_cost = comparison_df.loc[ - comparison_df["Batch Mode"] == "individual", "Total Cost ($)" - ].iloc[0] - comparison_df["Time Ratio"] = comparison_df["Time (s)"] / individual_time - comparison_df["Cost Ratio"] = ( - comparison_df["Total Cost ($)"] / individual_cost - ) - # Format the DataFrame for better readability. - comparison_df["Time (s)"] = comparison_df["Time (s)"].round(2) - comparison_df["Total Cost ($)"] = comparison_df["Total Cost ($)"].round( - 6 - ) - comparison_df["Time Ratio"] = comparison_df["Time Ratio"].round(2) - comparison_df["Cost Ratio"] = comparison_df["Cost Ratio"].round(2) - # Print comparison_df without truncation. - with pd.option_context( - "display.max_columns", - None, - "display.max_rows", - None, - "display.width", - None, - "display.max_colwidth", - None, - ): - _LOG.info("Batch mode comparison:\n%s", comparison_df) - - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 17.98 32 4 0.000653 1.00 1.00 - # shared_prompt 17.60 32 4 0.000998 0.98 1.53 - # combined 8.42 32 4 0.000330 0.47 0.51 - # - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 19.27 32 2 0.000651 1.00 1.00 - # shared_prompt 19.34 32 2 0.001385 1.00 2.13 - # combined 7.45 32 2 0.000277 0.39 0.43 - # - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 16.38 32 1 0.000651 1.00 1.00 - # shared_prompt 17.51 32 1 0.002148 1.07 3.30 - # combined 6.15 32 1 0.000251 0.38 0.39 - def test1(self) -> None: - model = "gpt-4o-mini" - batch_size = 8 - self.helper(model, batch_size) - # - batch_size = 16 - self.helper(model, batch_size) - # - batch_size = 32 - self.helper(model, batch_size) - - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 68.57 32 4 0.002711 1.00 1.00 - # shared_prompt 53.07 32 4 0.002638 0.77 0.97 - # combined 29.30 32 4 0.001654 0.43 0.61 - # - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 68.40 32 2 0.002788 1.00 1.00 - # shared_prompt 53.88 32 2 0.002809 0.79 1.01 - # combined 25.99 32 2 0.001643 0.38 0.59 - # - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 59.38 32 1 0.002610 1.00 1.00 - # shared_prompt 52.61 32 1 0.002482 0.89 0.95 - # combined 15.79 32 1 0.001118 0.27 0.43 - def test2(self) -> None: - model = "gpt-5-nano" - batch_size = 8 - self.helper(model, batch_size) - # - batch_size = 16 - self.helper(model, batch_size) - # - batch_size = 32 - self.helper(model, batch_size) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py deleted file mode 100644 index a7e567679..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py +++ /dev/null @@ -1,103 +0,0 @@ -import asyncio -import logging -from typing import Optional - -import helpers.hasyncio as hasynci -import helpers.hdatetime as hdateti -import helpers.hlogging as hloggin -import helpers.hunit_test as hunitest -import helpers.hwall_clock_time as hwacltim - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -# ############################################################################# -# Test_logging1 -# ############################################################################# - - -class Test_logging1(hunitest.TestCase): - def test_logging_levels1(self) -> None: - hloggin.test_logger() - - -# ############################################################################# - - -# ############################################################################# -# Test_hlogging_asyncio1 -# ############################################################################# - - -class Test_hlogging_asyncio1(hunitest.TestCase): - @staticmethod - async def workload(get_wall_clock_time: hdateti.GetWallClockTime) -> None: - """ - Coroutine simulating a workload waiting for 1s. - """ - # Set the coroutine name. - task = asyncio.current_task() - task.set_name("workload") - - def _print_time() -> None: - true_wall_clock_time = hdateti.get_current_time("ET") - _LOG.debug("wall_clock_time=%s", true_wall_clock_time) - event_loop_time = get_wall_clock_time() - _LOG.debug("event_loop_time=%s", event_loop_time) - - _print_time() - _LOG.debug(" -> wait") - await asyncio.sleep(1.0) - _print_time() - - def run_test( - self, - event_loop: Optional[asyncio.AbstractEventLoop], - get_wall_clock_time: hdateti.GetWallClockTime, - ) -> None: - coroutine = self.workload(get_wall_clock_time) - hasynci.run(coroutine, event_loop=event_loop) - - # pylint: disable=line-too-long - def test_real_time1(self) -> None: - """ - Use the logger. - - The output is like: - - ``` - 07:55:54 hunit_test.py setUp:932 Resetting random.seed to 20000101 - 07:55:54 hunit_test.py setUp:935 Resetting np.random.seed to 20000101 - 07:55:54 hunit_test.py setUp:944 base_dir_name=/app/amp/helpers/test - ``` - """ - # Use the wall clock time with no special event loop. - get_wall_clock_time = lambda: hdateti.get_current_time(tz="ET") - event_loop = None - # Run. - self.run_test(event_loop, get_wall_clock_time) - - # pylint: disable=line-too-long - def test_simulated_time1(self) -> None: - """ - Use the logger with event_loop and asyncio. - - The output is like: - - ``` - 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py _print_time:28 wall_clock_time=2022-01-18 07:52:55.337574-05:00 - 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py _print_time:30 event_loop_time=2022-01-18 07:52:55.310587-05:00 - 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py workload:33 -> wait - ``` - """ - with hasynci.solipsism_context() as event_loop: - # Use the simulate wall clock time. - get_wall_clock_time = lambda: hdateti.get_current_time( - tz="ET", event_loop=event_loop - ) - hwacltim.set_wall_clock_time(get_wall_clock_time) - # Run. - self.run_test(event_loop, get_wall_clock_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py deleted file mode 100644 index 2f1653c79..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py +++ /dev/null @@ -1,716 +0,0 @@ -import logging -import os -from typing import List - -import helpers.hio as hio -import helpers.hmarkdown as hmarkdo -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_remove_bullets -# ############################################################################# - - -class Test_remove_bullets(hunitest.TestCase): - """ - Test the remove_bullets function. - """ - - def helper(self, text: str, expected: str) -> None: - """ - Helper to test remove_bullets function. - - :param text: Input text with bullets - :param expected: Expected output with bullets removed - """ - # Run test. - text = hprint.dedent(text) - actual = hmarkdo.remove_bullets(text) - # Check outputs. - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test basic bullet removal. - """ - # Prepare inputs. - text = """ - - First item - - Second item - - Third item - """ - # Prepare outputs. - expected = """ - First item - Second item - Third item - """ - # Run test. - self.helper(text, expected) - - def test2(self) -> None: - """ - Test nested bullets removal. - """ - # Prepare inputs. - text = """ - - First item - - Nested item - - Another nested - - Second item - """ - # Prepare outputs. - expected = """ - First item - Nested item - Another nested - Second item - """ - # Run test. - self.helper(text, expected) - - def test3(self) -> None: - """ - Test mixed content with bullets and non-bullets. - """ - # Prepare inputs. - text = """ - - Bullet item - Regular text line - - Another bullet - More regular text - """ - # Prepare outputs. - expected = """ - Bullet item - Regular text line - Another bullet - More regular text - """ - # Run test. - self.helper(text, expected) - - def test4(self) -> None: - """ - Test empty lines preservation. - """ - # Prepare inputs. - text = """ - - First item - - - Second item - - - Third item - """ - # Prepare outputs. - expected = """ - First item - - Second item - - Third item - """ - # Run test. - self.helper(text, expected) - - -# ############################################################################# -# Test_bold_first_level_bullets1 -# ############################################################################# - - -class Test_bold_first_level_bullets1(hunitest.TestCase): - def helper(self, text: str, expected: str) -> None: - """ - Helper to test bold_first_level_bullets function. - """ - text = hprint.dedent(text) - lines = text.split("\n") - actual_lines = hmarkdo.bold_first_level_bullets(lines) - actual = "\n".join(actual_lines) - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test basic first-level bullet bolding. - """ - text = r""" - - First item - - Sub item - - Second item - """ - expected = r""" - - **First item** - - Sub item - - **Second item** - """ - self.helper(text, expected) - - def test2(self) -> None: - """ - Test with mixed content including non-bullet text. - """ - text = r""" - Some text here - - First bullet - More text - - Second bullet - - Nested bullet - Final text - """ - expected = r""" - Some text here - - **First bullet** - More text - - **Second bullet** - - Nested bullet - Final text - """ - self.helper(text, expected) - - def test3(self) -> None: - """ - Test with multiple levels of nesting. - """ - text = r""" - - Top level - - Second level - - Third level - - Back to second - - Another top - """ - expected = r""" - - **Top level** - - Second level - - Third level - - Back to second - - **Another top** - """ - self.helper(text, expected) - - def test4(self) -> None: - """ - Test with empty lines between bullets. - """ - text = r""" - - First item - - - Second item - - Sub item - - - Third item - """ - expected = r""" - - **First item** - - - **Second item** - - Sub item - - - **Third item** - """ - self.helper(text, expected) - - def test5(self) -> None: - """ - Test with text that already contains some bold markers. - """ - text = r""" - - First **important** point - - Sub point - - Second point with emphasis - """ - expected = r""" - - First **important** point - - Sub point - - **Second point with emphasis** - """ - self.helper(text, expected) - - -# ############################################################################# -# Test_colorize_bold_text1 -# ############################################################################# - - -class Test_colorize_bold_text1(hunitest.TestCase): - def test1(self) -> None: - """ - Test basic case with single bold text. - """ - text = "This is **bold** text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"This is **\red{bold}** text" - self.assert_equal(actual, expected) - - def test2(self) -> None: - """ - Test multiple bold sections get different colors. - """ - text = "**First** normal **Second** text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"**\red{First}** normal **\teal{Second}** text" - self.assert_equal(actual, expected) - - def test3(self) -> None: - """ - Test underscore style bold text. - """ - text = "This is __bold__ text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"This is **\red{bold}** text" - self.assert_equal(actual, expected) - - def test4(self) -> None: - """ - Test text with no bold sections returns unchanged. - """ - text = "This is plain text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = "This is plain text" - self.assert_equal(actual, expected) - - def test5(self) -> None: - """ - Test mixed bold styles in same text. - """ - text = "**First** and __Second__ bold" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"**\red{First}** and **\teal{Second}** bold" - self.assert_equal(actual, expected) - - def test6(self) -> None: - """ - Test with abbreviations=False uses full \textcolor syntax. - """ - text = "This is **bold** text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=False - ) - expected = r"This is **\textcolor{red}{bold}** text" - self.assert_equal(actual, expected) - - def test7(self) -> None: - """ - Test with multiple bullet lists and different colors. - """ - text = """ - **List 1:** - - First item - - Second item - - **List 2:** - - Another item - - Final item - """ - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r""" - **\red{List 1:}** - - First item - - Second item - - **\teal{List 2:}** - - Another item - - Final item - """ - self.assert_equal(actual, expected) - - def test8(self) -> None: - text = hprint.dedent( - r""" - - **\red{Objective}** - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - - - **\orange{Key Components}** - - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - Utility update: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - - - **\blue{Learning Process}** - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - - - **\violet{Advantages}** - - More sample-efficient than direct utility estimation - - Leverages structure of the MDP to generalize better - - - **\pink{Challenges}** - - Requires accurate model estimation - - Computational cost of solving Bellman equations repeatedly - - - **\olive{Example}** - - A thermostat estimates room temperature dynamics and uses them to predict - comfort level under a fixed heating schedule - - - **\darkgray{Use Case}** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ - ) - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = hprint.dedent( - r""" - - **\red{Objective}** - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - - - **\orange{Key Components}** - - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - Utility update: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - - - **\olive{Learning Process}** - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - - - **\green{Advantages}** - - More sample-efficient than direct utility estimation - - Leverages structure of the MDP to generalize better - - - **\cyan{Challenges}** - - Requires accurate model estimation - - Computational cost of solving Bellman equations repeatedly - - - **\blue{Example}** - - A thermostat estimates room temperature dynamics and uses them to predict - comfort level under a fixed heating schedule - - - **\darkgray{Use Case}** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ - ) - self.assert_equal(actual, expected) - - def test9(self) -> None: - """ - Test basic case with single bold text. - """ - text = "**First** normal **Second** text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"**\red{First}** normal **\teal{Second}** text" - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_format_first_level_bullets1 -# ############################################################################# - - -class Test_format_first_level_bullets1(hunitest.TestCase): - # TODO(ai): Rename -> helper - def format_and_compare_markdown(self, text: str, expected: str) -> None: - text = hprint.dedent(text) - expected = hprint.dedent(expected) - # - lines = text.split("\n") - actual_lines = hmarkdo.format_first_level_bullets(lines) - actual = "\n".join(actual_lines) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test basic case with single first level bullet. - """ - text = """ - Some text - - First bullet - More text""" - expected = """ - Some text - - - First bullet - More text""" - self.format_and_compare_markdown(text, expected) - - def test2(self) -> None: - """ - Test multiple first level bullets. - """ - text = """ - - First bullet - - Second bullet - - Third bullet""" - expected = """ - - First bullet - - - Second bullet - - - Third bullet""" - self.format_and_compare_markdown(text, expected) - - def test3(self) -> None: - """ - Test mixed first level and indented bullets. - """ - text = """ - - First level - - - Second level - - Another second - - Back to first""" - expected = """ - - First level - - Second level - - Another second - - - Back to first""" - self.format_and_compare_markdown(text, expected) - - def test4(self) -> None: - """ - Test mixed content with text and bullets. - """ - text = """ - Some initial text - - First bullet - Some text in between - - Second bullet - Final text""" - expected = """ - Some initial text - - - First bullet - Some text in between - - - Second bullet - Final text""" - self.format_and_compare_markdown(text, expected) - - def test5(self) -> None: - """ - Test nested bullets with multiple levels. - """ - text = """ - - Level 1 - - Level 2 - - Level 3 - - Another level 1 - - Level 2 again""" - expected = """ - - Level 1 - - Level 2 - - Level 3 - - - Another level 1 - - Level 2 again""" - self.format_and_compare_markdown(text, expected) - - def test6(self) -> None: - """ - Test empty lines handling. - """ - text = """ - - First bullet - - - Second bullet - - - Third bullet""" - expected = """ - - First bullet - - - Second bullet - - - Third bullet""" - self.format_and_compare_markdown(text, expected) - - def test7(self) -> None: - """ - Test mixed content with bullets and text. - """ - text = """ - Some text here - - First bullet - More text - - Second bullet - - Nested bullet - Final paragraph - - Last bullet""" - expected = """ - Some text here - - - First bullet - More text - - - Second bullet - - Nested bullet - Final paragraph - - - Last bullet""" - self.format_and_compare_markdown(text, expected) - - def test8(self) -> None: - """ - Test bullets with inline formatting. - """ - text = """ - - **Bold bullet** point - - *Italic nested* bullet - - `Code bullet` here - - **_Mixed_** formatting""" - expected = """ - - **Bold bullet** point - - *Italic nested* bullet - - - `Code bullet` here - - **_Mixed_** formatting""" - self.format_and_compare_markdown(text, expected) - - def test9(self) -> None: - """ - Test bullets with special characters. - """ - text = """ - - Bullet with (parentheses) - - Bullet with [brackets] - - Bullet with {braces} - - Bullet with $math$""" - expected = """ - - Bullet with (parentheses) - - Bullet with [brackets] - - - Bullet with {braces} - - Bullet with $math$""" - self.format_and_compare_markdown(text, expected) - - def test10(self) -> None: - text = hprint.dedent( - r""" - - **Objective** - - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - - - **Key Components** - - - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - **Utility update**: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - - - **Learning Process** - - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - - - **Use Case** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ - ) - expected = hprint.dedent( - r""" - - **Objective** - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - - - **Key Components** - - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - **Utility update**: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - - - **Learning Process** - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - - - **Use Case** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ - ) - self.format_and_compare_markdown(text, expected) - - -# ############################################################################# -# Test_process_lines1 -# ############################################################################# - - -class Test_process_lines1(hunitest.TestCase): - # TODO(gp): This doesn't seem correct. - def test1(self) -> None: - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - txt_in = hio.from_file(input_file_path) - txt_in = hprint.dedent(txt_in) - lines = txt_in.split("\n") - out = [] - for i, line in hmarkdo.process_lines(lines): - _LOG.debug(hprint.to_str("line")) - out.append(f"{i}:{line}") - actual = "\n".join(out) - self.check_string( - actual, dedent=True, remove_lead_trail_empty_lines=True - ) - - -# ############################################################################# -# Test_process_code_block1 -# ############################################################################# - - -class Test_process_code_block1(hunitest.TestCase): - def helper(self, txt: str) -> str: - out: List[str] = [] - in_code_block = False - lines = txt.split("\n") - for i, line in enumerate(lines): - _LOG.debug("%s:line=%s", i, line) - # Process the code block. - do_continue, in_code_block, out_tmp = hmarkdo.process_code_block( - line, in_code_block, i, lines - ) - out.extend(out_tmp) - if do_continue: - continue - # - out.append(line) - return "\n".join(out) - - def test1(self) -> None: - # Prepare inputs. - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - txt_in = hio.from_file(input_file_path) - txt_in = hprint.dedent(txt_in, remove_lead_trail_empty_lines_=True) - # Run function. - actual = self.helper(txt_in) - # Check output. - self.check_string( - actual, dedent=True, remove_lead_trail_empty_lines=True - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py deleted file mode 100644 index e33c04dc8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py +++ /dev/null @@ -1,205 +0,0 @@ -import helpers.hmarkdown as hmarkdo -import helpers.hunit_test as hunitest - - -# ############################################################################# -# Test_process_color_commands1 -# ############################################################################# - - -class Test_process_color_commands1(hunitest.TestCase): - def test_text_content1(self) -> None: - """ - Test with plain text content. - """ - txt_in = r"\red{Hello world}" - expected = r"\textcolor{red}{\text{Hello world}}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - def test_math_content1(self) -> None: - """ - Test color command with mathematical content. - """ - txt_in = r"\blue{x + y = z}" - expected = r"\textcolor{blue}{x + y = z}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - def test_multiple_colors1(self) -> None: - """ - Test multiple color commands in the same line. - """ - txt_in = r"The \red{quick} \blue{fox} \green{jumps}" - expected = r"The \textcolor{red}{\text{quick}} \textcolor{blue}{\text{fox}} \textcolor{darkgreen}{\text{jumps}}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - def test_mixed_content1(self) -> None: - """ - Test color commands with both text and math content. - """ - txt_in = r"\red{Result: x^2 + y^2}" - expected = r"\textcolor{red}{Result: x^2 + y^2}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - def test_nested_braces1(self) -> None: - """ - Test color command with nested braces. - """ - txt_in = r"\blue{f(x) = {x + 1}}" - expected = r"\textcolor{blue}{f(x) = {x + 1}}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_colorize_bullet_points_in_slide1 -# ############################################################################# - - -class Test_colorize_bullet_points_in_slide1(hunitest.TestCase): - def test1(self) -> None: - # Prepare inputs. - text = r""" - - **VC Theory** - - Measures model - - - **Bias-Variance Decomposition** - - Prediction error - - **Bias** - - **Variance** - - - **Computation Complexity** - - Balances model - - Related to - - E.g., Minimum - - - **Bayesian Approach** - - Treats ML as probability - - Combines prior knowledge with observed data to update belief about a model - - - **Problem in ML Theory:** - - Assumptions may not align with practical problems - """ - # Run function. - all_md_colors = [ - "red", - "orange", - "yellow", - "lime", - "green", - "teal", - "cyan", - "blue", - "purple", - "violet", - "magenta", - "pink", - "brown", - "olive", - "gray", - "darkgray", - "lightgray", - "black", - "white", - ] - - actual = hmarkdo.colorize_bullet_points_in_slide( - text, all_md_colors=all_md_colors - ) - # Check output. - expected = r""" - - **\red{VC Theory}** - - Measures model - - - **\orange{Bias-Variance Decomposition}** - - Prediction error - - **\yellow{Bias}** - - **\lime{Variance}** - - - **\green{Computation Complexity}** - - Balances model - - Related to - - E.g., Minimum - - - **\teal{Bayesian Approach}** - - Treats ML as probability - - Combines prior knowledge with observed data to update belief about a model - - - **\cyan{Problem in ML Theory:}** - - Assumptions may not align with practical problems - """ - self.assert_equal(actual, expected) - - def test2(self) -> None: - # Prepare inputs. - text = r""" - * Machine Learning Flow - - ::: columns - :::: {.column width=90%} - - Question - - E.g., "How can we predict house prices?" - - Input data - - E.g., historical data of house sales - - - _"If I were given one hour to save the planet, I would spend 59 minutes - defining the problem and one minute resolving it"_ (Albert Einstein) - - - **Not all phases are equally important!** - - Question $>$ Data $>$ Features $>$ Algorithm - - Clarity of the question impacts project success - - Quality and relevance of data are crucial for performance - - Proper feature selection simplifies the model and improves accuracy - - Algorithm is often less important (contrary to popular belief!) - :::: - :::: {.column width=5%} - - ```graphviz[height=90%] - digraph BayesianFlow { - rankdir=TD; - splines=true; - ... - } - ``` - :::: - ::: - """ - # Run function. - actual = hmarkdo.colorize_bullet_points_in_slide(text) - # Check output. - expected = r""" - * Machine Learning Flow - - ::: columns - :::: {.column width=90%} - - Question - - E.g., "How can we predict house prices?" - - Input data - - E.g., historical data of house sales - - - _"If I were given one hour to save the planet, I would spend 59 minutes - defining the problem and one minute resolving it"_ (Albert Einstein) - - - **\red{Not all phases are equally important!}** - - Question $>$ Data $>$ Features $>$ Algorithm - - Clarity of the question impacts project success - - Quality and relevance of data are crucial for performance - - Proper feature selection simplifies the model and improves accuracy - - Algorithm is often less important (contrary to popular belief!) - :::: - :::: {.column width=5%} - - ```graphviz[height=90%] - digraph BayesianFlow { - rankdir=TD; - splines=true; - ... - } - ``` - :::: - ::: - """ - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py deleted file mode 100644 index 8d47a3966..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py +++ /dev/null @@ -1,355 +0,0 @@ -import logging -from typing import List, Tuple - -import helpers.hprint as hprint -import helpers.hmarkdown_div_blocks as hmadiblo -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def _prepare_div_block_inputs(txt: str, expected: str) -> Tuple[List[str], str]: - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=False) - if expected.startswith("\n"): - expected = expected[1:] - if expected.endswith("\n"): - expected = expected[:-1] - lines = txt.split("\n") - return lines, expected - - -# ############################################################################# -# Test_add_prettier_ignore_to_div_blocks -# ############################################################################# - - -class Test_add_prettier_ignore_to_div_blocks(hunitest.TestCase): - """ - Test the function to add prettier-ignore comments around div blocks. - """ - - def helper(self, txt: str, expected: str) -> None: - # Prepare inputs. - lines, expected = _prepare_div_block_inputs(txt, expected) - # Run test. - actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.assert_equal(actual, expected) - - def test_simple_div_block(self) -> None: - """ - Test a simple div block with two colons. - """ - txt = """ - :::: - ::: - """ - # Add a leading empty line in expected since function adds it. - expected = """ - - - :::: - ::: - - - """ - self.helper(txt, expected) - - def test_div_block_with_attributes(self) -> None: - """ - Test a div block with column attributes. - """ - txt = """ - :::: - ::::{.column width=40%} - """ - expected = """ - - - :::: - ::::{.column width=40%} - - - """ - self.helper(txt, expected) - - def test_multiple_div_blocks(self) -> None: - """ - Test multiple div blocks in the same content. - """ - txt = """ - Some text before - - :::: - ::::{.column width=40%} - - Middle text - - :::columns - ::::{.column width=60%} - - Some text after - """ - expected = """ - Some text before - - - - :::: - ::::{.column width=40%} - - - - Middle text - - - - :::columns - ::::{.column width=60%} - - - - Some text after - """ - self.helper(txt, expected) - - def test_no_div_blocks(self) -> None: - """ - Test content with no div blocks. - """ - txt = """ - Some normal text - with no div blocks - at all - """ - expected = """ - Some normal text - with no div blocks - at all - """ - self.helper(txt, expected) - - def test_unclosed_div_block(self) -> None: - """ - Test a div block that is not closed. - """ - txt = """ - Some text - - :::: - - More text - """ - expected = """ - Some text - - :::: - - More text - """ - self.helper(txt, expected) - - -# ############################################################################# -# Test_remove_prettier_ignore_from_div_blocks -# ############################################################################# - - -class Test_remove_prettier_ignore_from_div_blocks(hunitest.TestCase): - """ - Test the function to remove prettier-ignore comments from div blocks. - """ - - def helper(self, txt: str, expected: str) -> None: - # Prepare inputs. - lines, expected = _prepare_div_block_inputs(txt, expected) - # Run test. - actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.assert_equal(actual, expected) - - def test_remove_simple_block(self) -> None: - """ - Test removing prettier-ignore from a simple div block. - """ - txt = """ - - - :::: - ::: - - - """ - expected = """ - :::: - ::: - """ - self.helper(txt, expected) - - def test_remove_block_with_content(self) -> None: - """ - Test removing prettier-ignore from a div block with content. - """ - txt = """ - Some text before - - - :::: - ::::{.column width=40%} - - - Some text after - """ - expected = """ - Some text before - :::: - ::::{.column width=40%} - Some text after - """ - self.helper(txt, expected) - - def test_remove_multiple_blocks(self) -> None: - """ - Test removing prettier-ignore from multiple div blocks. - """ - txt = """ - Text before - - - :::: - ::::{.column width=40%} - - - Middle text - - - :::columns - ::::{.column width=60%} - - - Text after - """ - expected = """ - Text before - :::: - ::::{.column width=40%} - Middle text - :::columns - ::::{.column width=60%} - Text after - """ - self.helper(txt, expected) - - def test_no_prettier_ignore_comments(self) -> None: - """ - Test content with no prettier-ignore comments. - """ - txt = """ - Some normal text - with no prettier-ignore comments - at all - """ - expected = """ - Some normal text - with no prettier-ignore comments - at all - """ - self.helper(txt, expected) - - -# ############################################################################# -# Test_add_remove_prettier_ignore_roundtrip -# ############################################################################# - - -class Test_add_remove_prettier_ignore_roundtrip(hunitest.TestCase): - """ - Test that adding and removing prettier-ignore comments is a roundtrip. - """ - - def helper(self, txt: str) -> None: - # Prepare inputs. - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - # Add prettier-ignore comments. - lines_with_comments = hmadiblo.add_prettier_ignore_to_div_blocks(lines) - # Remove prettier-ignore comments. - lines_restored = hmadiblo.remove_prettier_ignore_from_div_blocks( - lines_with_comments - ) - actual = "\n".join(lines_restored) - expected = txt - # Check outputs. - self.assert_equal(actual, expected) - - def test_roundtrip_simple(self) -> None: - """ - Test that add and remove operations are inverses for simple div block. - """ - txt = """ - :::: - ::: - """ - self.helper(txt) - - def test_roundtrip_complex1(self) -> None: - """ - Test roundtrip for content with multiple div blocks and text. - """ - txt = """ - Text1 - - :::: - ::::{.column width=40%} - - Text2 - - :::columns - ::::{.column width=60%} - - Text3 - """ - self.helper(txt) - - def test_roundtrip_complex2(self) -> None: - """ - Test roundtrip for content with multiple div blocks and text. - """ - txt = """ - Text1 - ::: - ::::{.column width=40%} - Text2 - :::: - ::::{.column width=40%} - Text3 - :::columns - ::::{.column width=60%} - Text4 - """ - self.helper(txt) - - def test_roundtrip_complex3(self) -> None: - """ - Test roundtrip for content with multiple div blocks and text. - """ - txt = """ - Text1 - - ::: - ::::{.column width=40%} - - Text2 - :::: - ::::{.column width=40%} - - Text3 - :::columns - ::::{.column width=60%} - Text4 - """ - self.helper(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py deleted file mode 100644 index c8ccc96b8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py +++ /dev/null @@ -1,218 +0,0 @@ -import logging -import pprint -from typing import Dict, List - -import helpers.hmarkdown as hmarkdo -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_replace_fenced_blocks_with_tags1 -# ############################################################################# - - -class Test_replace_fenced_blocks_with_tags1(hunitest.TestCase): - def helper( - self, text: str, expected_lines: List[str], expected_map: Dict[str, str] - ) -> None: - """ - Test replacing fenced code blocks with tags. - """ - lines = hprint.dedent(text, remove_lead_trail_empty_lines_=True) - lines = lines.split("\n") - # Call function. - actual_lines, fence_map = hmarkdo.replace_fenced_blocks_with_tags(lines) - # Check output. - fence_map_as_str = pprint.pformat(fence_map) - expected_map_as_str = pprint.pformat(expected_map) - self.assert_equal(fence_map_as_str, expected_map_as_str) - # - actual_lines = "\n".join(actual_lines) - expected_lines = hprint.dedent( - expected_lines, remove_lead_trail_empty_lines_=True - ) - self.assert_equal(actual_lines, expected_lines) - - def helper_round_trip(self, text: str) -> None: - """ - Test the round trip. - """ - # Do the round trip. - lines = text.split("\n") - actual_lines, fence_map = hmarkdo.replace_fenced_blocks_with_tags(lines) - act_text = hmarkdo.replace_tags_with_fenced_blocks( - actual_lines, fence_map - ) - # Check output. - act_text = "\n".join(act_text) - self.assert_equal(act_text, text) - - def test1(self) -> None: - """ - Test replacing fenced code blocks with tags. - """ - # Prepare inputs. - text = """ - Some text before - ```python - def foo(): - return 42 - ``` - Text between blocks - ```` - Plain code block - ```` - Some text after - """ - # Prepare outputs. - expected_lines = """ - Some text before - - Text between blocks - - Some text after - """ - # Check fence map. - expected_map = { - "1": "```python\ndef foo():\n return 42\n```", - "2": "````\nPlain code block\n````", - } - self.helper(text, expected_lines, expected_map) - - def test2(self) -> None: - """ - Test nested fenced blocks. - """ - text = """ - ```` - Outer block - ```python - def nested(): - pass - ``` - Still outer - ```` - """ - expected_lines = """ - - """ - expected_map = { - "1": "````\nOuter block\n```python\ndef nested():\n pass\n```\nStill outer\n````" - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test3(self) -> None: - """ - Test empty fenced blocks. - """ - text = """ - Before - ``` - ``` - After - ```python - ``` - End - """ - expected_lines = """ - Before - - After - - End - """ - expected_map = {"1": "```\n```", "2": "```python\n```"} - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test4(self) -> None: - """ - Test blocks with different fence lengths. - """ - text = """ - Start - ``` - Three - ``` - Middle - ````` - Five - ````` - End - """ - expected_lines = """ - Start - - Middle - - End - """ - expected_map = {"1": "```\nThree\n```", "2": "`````\nFive\n`````"} - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test5(self) -> None: - """ - Test blocks with language specifiers. - """ - text = """ - ```python - def foo(): pass - ``` - ```bash - echo hello - ``` - ```javascript - console.log('hi'); - ``` - """ - expected_lines = """ - - - - """ - expected_map = { - "1": "```python\ndef foo(): pass\n```", - "2": "```bash\necho hello\n```", - "3": "```javascript\nconsole.log('hi');\n```", - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test6(self) -> None: - """ - Test blocks with indentation. - """ - text = """ - Outside - ``` - Indented block - More indent - ``` - ```python - def foo(): - pass - ``` - End - """ - expected_lines = """ - Outside - - - End - """ - expected_map = { - "1": " ```\n Indented block\n More indent\n ```", - "2": " ```python\n def foo():\n pass\n ```", - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py deleted file mode 100644 index 91efef1f4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py +++ /dev/null @@ -1,449 +0,0 @@ -import logging - -import helpers.hmarkdown_filtering as hmarfilt -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_filter_by_header1 -# ############################################################################# - - -class Test_filter_by_header1(hunitest.TestCase): - def test_basic_header_extraction(self) -> None: - """ - Test basic header extraction functionality. - """ - # Prepare inputs. - test_content = """ - # Introduction - This is the introduction section. - Some content here. - - ## Section 1 - Content for section 1. - - # Conclusion - Final thoughts here. - """ - test_content = hprint.dedent( - test_content, remove_lead_trail_empty_lines_=False - ) - lines = test_content.split("\n") - # Run test. - result_lines = hmarfilt.filter_by_header(lines, "Introduction") - result_content = "\n".join(result_lines) - # Check outputs. - expected = """ - # Introduction - This is the introduction section. - Some content here. - - ## Section 1 - Content for section 1. - """ - self.assert_equal(result_content, expected, dedent=True) - - def test_header_not_found(self) -> None: - """ - Test behavior when header is not found. - """ - # Prepare inputs. - test_content = """ - # Introduction - This is the introduction section. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - # Check outputs. - with self.assertRaises(ValueError): - hmarfilt.filter_by_header(lines, "NonExistent") - - -# ############################################################################# -# Test_parse_range1 -# ############################################################################# - - -class Test_parse_range1(hunitest.TestCase): - def test_numeric_range(self) -> None: - """ - Test parsing numeric range (0-indexed). - """ - # Run test. - start, end = hmarfilt._parse_range("0:10", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 10) - - def test_none_start(self) -> None: - """ - Test range with None start (defaults to 0). - """ - # Run test. - start, end = hmarfilt._parse_range("None:10", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 10) - - def test_none_end(self) -> None: - """ - Test range with None end (defaults to max_value). - """ - # Run test. - start, end = hmarfilt._parse_range("0:None", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 20) - - def test_both_none(self) -> None: - """ - Test range with both None (0:max_value). - """ - # Run test. - start, end = hmarfilt._parse_range("None:None", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 20) - - def test_invalid_range(self) -> None: - """ - Test invalid range format. - """ - # Run test. - with self.assertRaises(AssertionError): - hmarfilt._parse_range("invalid", 20) - - def test_case_insensitive_none(self) -> None: - """ - Test case insensitive None parsing. - """ - # Run test. - start, end = hmarfilt._parse_range("NONE:none", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 20) - - -# ############################################################################# -# Test_filter_by_lines1 -# ############################################################################# - - -class Test_filter_by_lines1(hunitest.TestCase): - def test_basic_line_filtering(self) -> None: - """ - Test basic line filtering functionality (0-indexed). - """ - # Prepare inputs. - test_content = """ - Line 1 - Line 2 - Line 3 - Line 4 - Line 5 - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (indices 1:3 = Line 2 and Line 3). - result_lines = hmarfilt.filter_by_lines(lines, "1:3") - result_content = "\n".join(result_lines) - # Check outputs. - expected = "Line 2\nLine 3" - self.assertEqual(result_content, expected) - - def test_line_filtering_with_none(self) -> None: - """ - Test line filtering with None start (defaults to 0). - """ - # Prepare inputs. - test_content = """ - Line 1 - Line 2 - Line 3 - Line 4 - Line 5 - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (None:2 = indices 0:2 = Line 1 and Line 2). - result_lines = hmarfilt.filter_by_lines(lines, "None:2") - result_content = "\n".join(result_lines) - # Check outputs. - expected = "Line 1\nLine 2" - self.assertEqual(result_content, expected) - - def test_line_filtering_to_end(self) -> None: - """ - Test line filtering from start to end. - """ - # Prepare inputs. - test_content = """ - Line 1 - Line 2 - Line 3 - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (1:None = indices 1:3 = Line 2 and Line 3). - result_lines = hmarfilt.filter_by_lines(lines, "1:None") - result_content = "\n".join(result_lines) - # Check outputs. - expected = "Line 2\nLine 3" - self.assertEqual(result_content, expected) - - def test_invalid_range_order(self) -> None: - """ - Test that start line <= end line is enforced. - """ - # Prepare inputs. - test_content = "Line 1\nLine 2\nLine 3" - lines = test_content.split("\n") - # Run test. - # Check outputs. - with self.assertRaises(AssertionError): - hmarfilt.filter_by_lines(lines, "2:1") - - -# ############################################################################# -# Test_filter_by_slides1 -# ############################################################################# - - -class Test_filter_by_slides1(hunitest.TestCase): - def test_basic_slide_filtering(self) -> None: - """ - Test basic slide filtering functionality. - """ - # Prepare inputs. - test_content = """ - # Header 1 - - - - - * Slide 1 - Content for slide 1. - - * Slide 2 - Content for slide 2. - - * Slide 3 - Content for slide 3. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - result_lines = hmarfilt.filter_by_slides(lines, "0:1") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("Slide 1", result_content) - self.assertNotIn("Slide 2", result_content) - - def test_slide_filtering_with_none_end(self) -> None: - """ - Test slide filtering to the end. - """ - # Prepare inputs. - test_content = """ - * Slide 1 - Content 1. - - * Slide 2 - Content 2. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - result_lines = hmarfilt.filter_by_slides(lines, "0:None") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("Slide 1", result_content) - self.assertIn("Slide 2", result_content) - - def test_slide_filtering_invalid_range(self) -> None: - """ - Test that invalid slide ranges raise errors. - """ - # Prepare inputs. - test_content = """ - * Slide 1 - Content 1. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - # Check outputs. - with self.assertRaises(AssertionError): - hmarfilt.filter_by_slides(lines, "1:0") - - def test_slide_filtering_beyond_slides(self) -> None: - """ - Test filtering with end beyond available slides. - """ - # Prepare inputs. - test_content = """ - * Slide 1 - Content 1. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - # Check outputs. - with self.assertRaises(AssertionError): - hmarfilt.filter_by_slides(lines, "0:5") - - def test_no_slides_content(self) -> None: - """ - Test behavior with content that has no slides. - """ - # Prepare inputs. - test_content = """ - # Header 1 - Just regular content without slides. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - # Check outputs (should fail validation since there are no slides). - with self.assertRaises(AssertionError): - hmarfilt.filter_by_slides(lines, "0:1") - - def test_slide_filtering_single_slide(self) -> None: - """ - Test filtering a single slide when there's only one slide (0-indexed). - """ - # Prepare inputs. - test_content = """ - * Only Slide - This is the only content. - Additional content after the slide. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (0:1 = only slide at index 0). - result_lines = hmarfilt.filter_by_slides(lines, "0:1") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("Only Slide", result_content) - self.assertIn("This is the only content.", result_content) - - def test_slide_end_boundary(self) -> None: - """ - Test filtering to the end of slides (0-indexed). - """ - # Prepare inputs. - test_content = """ - * Slide 1 - Content 1. - - * Slide 2 - Content 2. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (0:2 = slides 0 and 1). - result_lines = hmarfilt.filter_by_slides(lines, "0:2") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("Slide 1", result_content) - self.assertIn("Slide 2", result_content) - - -# ############################################################################# -# Test_additional_edge_cases1 -# ############################################################################# - - -class Test_additional_edge_cases1(hunitest.TestCase): - def test_filter_by_header_with_subsection(self) -> None: - """ - Test extracting a subsection header. - """ - # Prepare inputs. - test_content = """ - # Introduction - This is the introduction. - - ## Subsection 1 - Content for subsection 1. - - ## Subsection 2 - Content for subsection 2. - - # Conclusion - Final thoughts. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - result_lines = hmarfilt.filter_by_header(lines, "Subsection 1") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("## Subsection 1", result_content) - self.assertIn("Content for subsection 1.", result_content) - - def test_parse_range_edge_cases(self) -> None: - """ - Test edge cases for range parsing (0-indexed). - """ - # Run test. - start, end = hmarfilt._parse_range("0:0", 1) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 0) - # Run test. - start, end = hmarfilt._parse_range("None:None", 1000) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 1000) - - def test_filter_lines_single_line(self) -> None: - """ - Test filtering with empty range (0:0). - """ - # Prepare inputs. - test_content = "Single line content" - lines = test_content.split("\n") - # Run test (0:0 = empty range). - result_lines = hmarfilt.filter_by_lines(lines, "0:0") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertEqual(result_content, "") - - def test_filter_lines_exact_range(self) -> None: - """ - Test filtering with exact boundaries (0-indexed). - """ - # Prepare inputs. - test_content = """ - Line 1 - Line 2 - Line 3 - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (0:2 = indices 0 and 1 = Line 1 and Line 2). - result_lines = hmarfilt.filter_by_lines(lines, "0:2") - result_content = "\n".join(result_lines) - # Check outputs. - expected = "Line 1\nLine 2" - self.assertEqual(result_content, expected) - - def test_parse_range_invalid_formats(self) -> None: - """ - Test various invalid range formats. - """ - # Run test. - with self.assertRaises(AssertionError): - hmarfilt._parse_range("5", 10) - # Run test. - with self.assertRaises(AssertionError): - hmarfilt._parse_range("", 10) - # Run test. - with self.assertRaises(ValueError): - hmarfilt._parse_range("1:2:3", 10) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py deleted file mode 100644 index abf2faf66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py +++ /dev/null @@ -1,1403 +0,0 @@ -import logging -import os - -import helpers.hio as hio -import helpers.hmarkdown_div_blocks as hmadiblo -import helpers.hmarkdown_formatting as hmarform -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_remove_end_of_line_periods1 -# ############################################################################# - - -class Test_remove_end_of_line_periods1(hunitest.TestCase): - def helper(self, input_text: str, expected_text: str) -> None: - # Prepare inputs. - input_text = hprint.dedent(input_text).strip() - expected_text = hprint.dedent(expected_text).strip() - lines = input_text.split("\n") - # Run test. - actual_lines = hmarform.remove_end_of_line_periods(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.assertEqual(actual, expected_text) - - def test_standard_case(self) -> None: - input_text = """ - Hello. - World. - This is a test. - """ - expected_text = """ - Hello - World - This is a test - """ - self.helper(input_text, expected_text) - - def test_no_periods(self) -> None: - input_text = """ - Hello - World - This is a test - """ - expected_text = """ - Hello - World - This is a test - """ - self.helper(input_text, expected_text) - - def test_multiple_periods(self) -> None: - input_text = """ - Line 1..... - Line 2..... - End. - """ - expected_text = """ - Line 1 - Line 2 - End - """ - self.helper(input_text, expected_text) - - def test_empty_string(self) -> None: - input_text = "" - expected_text = "" - self.helper(input_text, expected_text) - - def test_leading_and_trailing_periods(self) -> None: - input_text = """ - .Line 1. - .Line 2. - ..End.. - """ - expected_text = """ - .Line 1 - .Line 2 - ..End - """ - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_md_clean_up1 -# ############################################################################# - - -class Test_md_clean_up1(hunitest.TestCase): - def test1(self) -> None: - # Prepare inputs. - txt = r""" - **States**: - - \( S = \{\text{Sunny}, \text{Rainy}\} \) - **Observations**: - - \( O = \{\text{Yes}, \text{No}\} \) (umbrella) - - ### Initial Probabilities: - \[ - P(\text{Sunny}) = 0.6, \quad P(\text{Rainy}) = 0.4 - \] - - ### Transition Probabilities: - \[ - \begin{aligned} - P(\text{Sunny} \to \text{Sunny}) &= 0.7, \quad P(\text{Sunny} \to \text{Rainy}) = 0.3 \\ - P(\text{Rainy} \to \text{Sunny}) &= 0.4, \quad P(\text{Rainy} \to \text{Rainy}) = 0.6 - \end{aligned} - \] - - ### Observation (Emission) Probabilities: - \[ - \begin{aligned} - P(\text{Yes} \mid \text{Sunny}) &= 0.1, \quad P(\text{No} \mid \text{Sunny}) = 0.9 \\ - P(\text{Yes} \mid \text{Rainy}) &= 0.8, \quad P(\text{No} \mid \text{Rainy}) = 0.2 - \end{aligned} - \] - """ - txt = hprint.dedent(txt) - actual = hmarform.md_clean_up(txt) - actual = hprint.dedent(actual) - expected = r""" - **States**: - - $S = \{\text{Sunny}, \text{Rainy}\}$ - **Observations**: - - $O = \{\text{Yes}, \text{No}\}$ (umbrella) - - ### Initial Probabilities: - $$ - \Pr(\text{Sunny}) = 0.6, \quad \Pr(\text{Rainy}) = 0.4 - $$ - - ### Transition Probabilities: - $$ - \begin{aligned} - \Pr(\text{Sunny} \to \text{Sunny}) &= 0.7, \quad \Pr(\text{Sunny} \to \text{Rainy}) = 0.3 \\ - \Pr(\text{Rainy} \to \text{Sunny}) &= 0.4, \quad \Pr(\text{Rainy} \to \text{Rainy}) = 0.6 - \end{aligned} - $$ - - ### Observation (Emission) Probabilities: - $$ - \begin{aligned} - \Pr(\text{Yes} | \text{Sunny}) &= 0.1, \quad \Pr(\text{No} | \text{Sunny}) = 0.9 \\ - \Pr(\text{Yes} | \text{Rainy}) &= 0.8, \quad \Pr(\text{No} | \text{Rainy}) = 0.2 - \end{aligned} - $$""" - self.assert_equal(actual, expected, dedent=True) - - -# ############################################################################# -# Test_remove_code_delimiters1 -# ############################################################################# - - -class Test_remove_code_delimiters1(hunitest.TestCase): - def test1(self) -> None: - """ - Test a basic example. - """ - # Prepare inputs. - content = r""" - ```python - def hello_world(): - print("Hello, World!") - ``` - """ - content = hprint.dedent(content) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - def hello_world(): - print("Hello, World!") - """ - self.assert_equal(actual, expected, dedent=True) - - def test2(self) -> None: - """ - Test an example with empty lines at the start and end. - """ - # Prepare inputs. - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - content = hio.from_file(input_file_path) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - def check_empty_lines(): - print("Check empty lines are present!") - """ - self.assert_equal(actual, expected, dedent=True) - - def test3(self) -> None: - """ - Test a markdown with headings, Python and yaml blocks. - """ - # Prepare inputs. - content = r""" - # Section 1 - - This section contains comment and python code. - - > "Knowledge is like a tree, growing stronger with each branch of understanding." - - ```python - def greet(name): - return f"Hello, {name}!" - print(greet("World")) - ``` - - # Section 2 - - Key points below. - - - Case Study 1: Implementation in modern industry - - Case Study 2: Comparative analysis of traditional vs. modern methods - - ```yaml - future: - - AI integration - - Process optimization - - Sustainable solutions - ``` - """ - content = hprint.dedent(content) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - # Section 1 - - This section contains comment and python code. - - > "Knowledge is like a tree, growing stronger with each branch of understanding." - - - def greet(name): - return f"Hello, {name}!" - print(greet("World")) - - - # Section 2 - - Key points below. - - - Case Study 1: Implementation in modern industry - - Case Study 2: Comparative analysis of traditional vs. modern methods - - yaml - future: - - AI integration - - Process optimization - - Sustainable solutions - - """ - self.assert_equal(actual, expected, dedent=True) - - def test4(self) -> None: - """ - Test another markdown with headings and multiple indent Python blocks. - """ - # Prepare inputs. - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - content = hio.from_file(input_file_path) - content = hprint.dedent(content) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - self.check_string(actual, dedent=True) - - def test5(self) -> None: - """ - Test an empty string. - """ - # Prepare inputs. - content = "" - lines = content.split("\n") if content else [] - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = "" - self.assert_equal(actual, expected, dedent=True) - - def test6(self) -> None: - """ - Test a Python and immediate markdown code block. - """ - # Prepare inputs. - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - content = hio.from_file(input_file_path) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - def no_start_python(): - print("No mention of python at the start") - - - - A markdown paragraph contains - delimiters that needs to be removed. - """ - self.assert_equal(actual, expected, dedent=True) - - -# ############################################################################# -# Test_format_markdown_slide -# ############################################################################# - - -class Test_format_markdown_slide(hunitest.TestCase): - def helper(self, input_text: str, expected_text: str) -> None: - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual = hmarform.format_markdown_slide(lines) - actual = "\n".join(actual) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - _LOG.debug("actual=\n%s", actual) - _LOG.debug("expected=\n%s", expected) - self.assert_equal(str(actual), str(expected)) - - def test1(self) -> None: - """ - Test formatting a simple slide with bullets. - """ - input_text = """ - * Slide title - - First bullet - - Second bullet - """ - expected_text = """ - * Slide Title - - - First bullet - - - Second bullet - """ - self.helper(input_text, expected_text) - - def test2(self) -> None: - """ - Test formatting multiple slides. - """ - input_text = """ - * First slide - - Point A - - Point B - * Second slide - - Point X - - Point Y - """ - expected_text = """ - * First Slide - - - Point A - - - Point B - * Second Slide - - - Point X - - - Point Y - """ - self.helper(input_text, expected_text) - - def test3(self) -> None: - """ - Test formatting slides with nested bullets. - """ - input_text = """ - * Main slide - - First level - - Nested point - - Another nested - - Second level - """ - expected_text = """ - * Main Slide - - - First level - - Nested point - - Another nested - - - Second level - """ - self.helper(input_text, expected_text) - - def test4(self) -> None: - """ - Test formatting empty input. - """ - # Prepare inputs. - input_text = """ - """ - # Check outputs. - expected_text = """ - """ - self.helper(input_text, expected_text) - - def test5(self) -> None: - """ - Test formatting slide title capitalization. - """ - input_text = """ - * mixed case slide title - - Point one - """ - expected_text = """ - * Mixed Case Slide Title - - - Point one - """ - self.helper(input_text, expected_text) - - def test6(self) -> None: - """ - Test formatting slide with only title, no bullet points. - """ - input_text = """ - * Solo slide title - """ - expected_text = """ - * Solo Slide Title - """ - self.helper(input_text, expected_text) - - def test7(self) -> None: - """ - Test formatting slide with deeply nested bullets. - """ - input_text = """ - * Main slide - - Level 1 - - Level 2 - - Level 3 - - Level 4 - - Back to level 1 - """ - expected_text = """ - * Main Slide - - - Level 1 - - Level 2 - - Level 3 - - Level 4 - - - Back to level 1 - """ - self.helper(input_text, expected_text) - - def test8(self) -> None: - """ - Test formatting slide with nested bullets and special formatting. - """ - input_text = r""" - * What Are Data Analytics? - - **Collections of data** - - - Aggregated, organized data sets for analysis - - - E.g., customer purchase histories in a CRM system - - **Dashboards** - - - Visual displays of key metrics for insights - - E.g., dashboard showing quarterly revenue, expenses - - - **Descriptive statistics** - - Summary metrics: mean, median, mode, standard deviation - - E.g., average sales per quarter to understand trends - - **Historical reports** - - - Examination of past performance - - E.g., monthly sales reports for past fiscal year - - **Models** - - Statistical representations to forecast, explain phenomena - - - E.g., predictive model to anticipate customer churn based on behavioral data - """ - expected_text = r""" - * What Are Data Analytics? - - - **Collections of data** - - Aggregated, organized data sets for analysis - - E.g., customer purchase histories in a CRM system - - - **Dashboards** - - Visual displays of key metrics for insights - - E.g., dashboard showing quarterly revenue, expenses - - - **Descriptive statistics** - - Summary metrics: mean, median, mode, standard deviation - - E.g., average sales per quarter to understand trends - - - **Historical reports** - - Examination of past performance - - E.g., monthly sales reports for past fiscal year - - - **Models** - - Statistical representations to forecast, explain phenomena - - E.g., predictive model to anticipate customer churn based on behavioral data - """ - self.helper(input_text, expected_text) - - def test9(self) -> None: - """ - This reproduces a broken behavior of prettier with fenced divs. - """ - input_text = r""" - * Incremental vs Iterative - ::: columns - :::: {.column width=55%} - - - **Incremental Development** - - Each increment adds functional components - - Require upfront planning to divide features meaningfully - - Integration of increments can be complex - - - **Iterative Development** - - Each increment delivers usable system - - Refine and improve product through repeated cycles - - Get feedback - - Uncover and adjust for unknown requirements - - - **Incremental $\gg$ Iterative** - - :::: - :::: {.column width=40%} - - ![](msml610/lectures_source/figures/Lesson02_Monalisa_incremental.png){width=90%} - - \small _Incremental - - \vspace{0.5cm} - - ![](msml610/lectures_source/figures/Lesson02_Monalisa_iterative.png){width=90%} - - \small _Iterative_ - - \vspace{0.5cm} - - ![](msml610/lectures_source/figures/Lesson02_Skateboard.png){width=90%} - - \small _Incremental vs Iterative_ - :::: - ::: - """ - expected_text = r""" - * Incremental vs Iterative - ::: columns - :::: {.column width=55%} - - - **Incremental Development** - - Each increment adds functional components - - Require upfront planning to divide features meaningfully - - Integration of increments can be complex - - - **Iterative Development** - - Each increment delivers usable system - - Refine and improve product through repeated cycles - - Get feedback - - Uncover and adjust for unknown requirements - - - **Incremental $\gg$ Iterative** - :::: - :::: {.column width=40%} - ![](msml610/lectures_source/figures/Lesson02_Monalisa_incremental.png){width=90%} - \small \_Incremental - \vspace{0.5cm} - ![](msml610/lectures_source/figures/Lesson02_Monalisa_iterative.png){width=90%} - \small _Iterative_ - \vspace{0.5cm} - ![](msml610/lectures_source/figures/Lesson02_Skateboard.png){width=90%} - \small _Incremental vs Iterative_ - :::: - ::: - """ - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_format_figures -# ############################################################################# - - -class Test_format_figures(hunitest.TestCase): - def helper(self, input_text: str, expected_text: str) -> None: - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual_lines = hmarform.format_figures(lines) - actual = "\n".join(actual_lines) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - self.assert_equal(actual, expected) - - def test_basic_text_with_figures(self) -> None: - """ - Test converting basic text with figures to column format. - """ - input_text = """ - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - Read / write small amounts of data frequently - - **Columnar DBs** - - E.g., Amazon Redshift, Snowflake - - Read / write large amounts of data infrequently - - Analytics requires a few columns - - Better data compression - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) - """ - expected_text = """ - ::: columns - :::: {.column width=65%} - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - Read / write small amounts of data frequently - - **Columnar DBs** - - E.g., Amazon Redshift, Snowflake - - Read / write large amounts of data infrequently - - Analytics requires a few columns - - Better data compression - :::: - :::: {.column width=40%} - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - def test_no_figures_no_change(self) -> None: - """ - Test that text without figures remains unchanged. - """ - input_text = """ - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - **Columnar DBs** - - E.g., Amazon Redshift, Snowflake - - Better data compression - """ - expected_text = """ - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - **Columnar DBs** - - E.g., Amazon Redshift, Snowflake - - Better data compression - """ - self.helper(input_text, expected_text) - - def test_already_in_columns_format_no_change(self) -> None: - """ - Test that text already in columns format remains unchanged. - """ - input_text = """ - ::: columns - :::: {.column width=65%} - - **Row-based DBs** - - E.g., MySQL, Postgres - :::: - :::: {.column width=40%} - ![](some_image.png) - :::: - ::: - """ - expected_text = """ - ::: columns - :::: {.column width=65%} - - **Row-based DBs** - - E.g., MySQL, Postgres - :::: - :::: {.column width=40%} - ![](some_image.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - def test_single_figure(self) -> None: - """ - Test converting text with a single figure. - """ - input_text = """ - - **Important concept** - - This is the main point - - Supporting detail - - ![](path/to/image.png) - """ - expected_text = """ - ::: columns - :::: {.column width=65%} - - **Important concept** - - This is the main point - - Supporting detail - :::: - :::: {.column width=40%} - - ![](path/to/image.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - def test_mixed_content_with_figures(self) -> None: - """ - Test converting mixed content including text and figures. - """ - input_text = """ - ## Section header - - Some introductory text here. - - - **Point one** - - Detail A - - Detail B - - **Point two** - - Detail X - - Detail Y - - ![](image1.png) - - Additional text between figures. - - ![](image2.png) - """ - expected_text = """ - ::: columns - :::: {.column width=65%} - ## Section header - - Some introductory text here. - - - **Point one** - - Detail A - - Detail B - - **Point two** - - Detail X - - Detail Y - :::: - :::: {.column width=40%} - - ![](image1.png) - - Additional text between figures. - - ![](image2.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - def test_empty_input(self) -> None: - """ - Test that empty input returns empty output. - """ - input_text = "" - expected_text = "" - self.helper(input_text, expected_text) - - def test_with_slide_title(self) -> None: - """ - Test that slide title is left unchanged. - """ - input_text = """ - * VCS: How to Track Data - - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - Read / write small amounts of data frequently - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) - """ - expected_text = """ - * VCS: How to Track Data - ::: columns - :::: {.column width=65%} - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - Read / write small amounts of data frequently - :::: - :::: {.column width=40%} - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_format_md_links_to_latex_format -# ############################################################################# - - -class Test_format_md_links_to_latex_format(hunitest.TestCase): - def helper(self, input_text: str, expected_text: str) -> None: - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual_lines = hmarform.format_md_links_to_latex_format(lines) - actual = "\n".join(actual_lines) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - self.assert_equal(actual, expected) - - # ========================================================================= - # Edge cases. - # ========================================================================= - - def test_empty_input(self) -> None: - """ - Test empty input. - """ - # Prepare inputs. - input_text = "" - expected_text = "" - # Run test. - self.helper(input_text, expected_text) - - def test_no_links(self) -> None: - """ - Test content without any links. - """ - # Prepare inputs. - input_text = """ - # Important Notes - - - This is regular text - - No links here - - Just plain content - """ - expected_text = """ - # Important Notes - - - This is regular text - - No links here - - Just plain content - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Plain URL conversion: http://... or https://... - # ========================================================================= - - def test_plain_http_url(self) -> None: - """ - Test converting single plain HTTP URL. - """ - # Prepare inputs. - input_text = """ - Visit http://example.com - """ - expected_text = r""" - Visit [\textcolor{blue}{\underline{http://example.com}}](http://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_https_url(self) -> None: - """ - Test converting single plain HTTPS URL. - """ - # Prepare inputs. - input_text = """ - Visit https://example.com - """ - expected_text = r""" - Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_with_path(self) -> None: - """ - Test converting plain URLs with paths. - """ - # Prepare inputs. - input_text = """ - Check out https://ubuntu.com/tutorials/command-line-for-beginners - """ - expected_text = r""" - Check out [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_with_query_parameters(self) -> None: - """ - Test converting plain URL with query parameters. - """ - # Prepare inputs. - input_text = """ - Search: https://example.com/search?q=python&page=1 - """ - expected_text = r""" - Search: [\textcolor{blue}{\underline{https://example.com/search?q=python&page=1}}](https://example.com/search?q=python&page=1) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_with_fragment(self) -> None: - """ - Test converting plain URL with fragment. - """ - # Prepare inputs. - input_text = """ - Docs: https://docs.python.org/3/tutorial/index.html#tutorial-index - """ - expected_text = r""" - Docs: [\textcolor{blue}{\underline{https://docs.python.org/3/tutorial/index.html#tutorial-index}}](https://docs.python.org/3/tutorial/index.html#tutorial-index) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_at_line_start(self) -> None: - """ - Test plain URL at beginning of line. - """ - # Prepare inputs. - input_text = """ - https://example.com is a good site - """ - expected_text = r""" - [\textcolor{blue}{\underline{https://example.com}}](https://example.com) is a good site - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_at_line_end(self) -> None: - """ - Test plain URL at end of line. - """ - # Prepare inputs. - input_text = """ - Check this link https://example.com - """ - expected_text = r""" - Check this link [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # URL in backticks conversion: `http://...` or `https://...` - # ========================================================================= - - def test_backtick_url(self) -> None: - """ - Test converting single URL in backticks. - """ - # Prepare inputs. - input_text = """ - Visit `https://example.com` for details - """ - expected_text = r""" - Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) for details - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Markdown link conversion: [Text](URL) - # ========================================================================= - - def test_markdown_link_simple(self) -> None: - """ - Test converting simple markdown link [Text](URL). - """ - # Prepare inputs. - input_text = """ - Check out [this tutorial](https://example.com/tutorial) - """ - expected_text = r""" - Check out [\textcolor{blue}{\underline{this tutorial}}](https://example.com/tutorial) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_markdown_link_preserves_text(self) -> None: - """ - Test that markdown link preserves the display text. - """ - # Prepare inputs. - input_text = """ - See [documentation](https://docs.example.com) here - """ - expected_text = r""" - See [\textcolor{blue}{\underline{documentation}}](https://docs.example.com) here - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Email link conversion: [email@domain.com](email@domain.com) - # ========================================================================= - - def test_email_link_simple1(self) -> None: - """ - Test converting simple email link. - """ - # Prepare inputs. - input_text = """ - Contact: [support@example.com](support@example.com) - """ - expected_text = r""" - Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_email_link_simple2(self) -> None: - """ - Test converting simple email link. - """ - # Prepare inputs. - input_text = """ - Contact: [](support@example.com) - """ - expected_text = r""" - Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Multiple URLs. - # ========================================================================= - - def test_multiple_urls_same_line(self) -> None: - """ - Test converting multiple URLs on same line. - """ - # Prepare inputs. - input_text = """ - Visit https://example.com and https://another.com - """ - expected_text = r""" - Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) and [\textcolor{blue}{\underline{https://another.com}}](https://another.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_multiple_urls_different_lines(self) -> None: - """ - Test converting multiple URLs on different lines. - """ - # Prepare inputs. - input_text = """ - Tutorial: https://ubuntu.com/tutorials/command-line-for-beginners - - Documentation: https://docs.python.org/3/ - """ - expected_text = r""" - Tutorial: [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) - - Documentation: [\textcolor{blue}{\underline{https://docs.python.org/3/}}](https://docs.python.org/3/) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Mixed link types. - # ========================================================================= - - def test_mixed_plain_and_backtick_urls(self) -> None: - """ - Test handling mixed plain and backtick URLs. - """ - # Prepare inputs. - input_text = """ - Plain: https://example.com - Backtick: `https://docs.example.com` - """ - expected_text = r""" - Plain: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - Backtick: [\textcolor{blue}{\underline{https://docs.example.com}}](https://docs.example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_mixed_plain_and_markdown_links(self) -> None: - """ - Test handling mixed plain URLs and markdown links. - """ - # Prepare inputs. - input_text = """ - Plain: https://example.com - Markdown: [Click here](https://docs.example.com) - """ - expected_text = r""" - Plain: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - Markdown: [\textcolor{blue}{\underline{Click here}}](https://docs.example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_mixed_all_types(self) -> None: - """ - Test handling all link types in same content. - """ - # Prepare inputs. - input_text = r""" - ## Resources - - - Plain URL: https://ubuntu.com/tutorials/command-line-for-beginners - - Backtick URL: `https://docs.python.org/3/` - - Markdown link: [Click here](https://github.com) - - Email: [support@example.com](support@example.com) - - Already formatted: [\textcolor{blue}{\underline{https://stackoverflow.com}}](https://stackoverflow.com) - """ - expected_text = r""" - ## Resources - - - Plain URL: [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) - - Backtick URL: [\textcolor{blue}{\underline{https://docs.python.org/3/}}](https://docs.python.org/3/) - - Markdown link: [\textcolor{blue}{\underline{Click here}}](https://github.com) - - Email: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) - - Already formatted: [\textcolor{blue}{\underline{https://stackoverflow.com}}](https://stackoverflow.com) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Complex scenarios. - # ========================================================================= - - def test_url_with_file_extension(self) -> None: - """ - Test URL pointing to file with extension. - """ - # Prepare inputs. - input_text = """ - Download: https://cdn.example.com/files/document.pdf - """ - expected_text = r""" - Download: [\textcolor{blue}{\underline{https://cdn.example.com/files/document.pdf}}](https://cdn.example.com/files/document.pdf) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_already_formatted_link_preserved(self) -> None: - """ - Test that already formatted links are preserved. - """ - # Prepare inputs. - input_text = r""" - Link: [\textcolor{blue}{\underline{Example Site}}](https://example.com) - """ - expected_text = r""" - Link: [\textcolor{blue}{\underline{Example Site}}](https://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Image/picture links should be left untouched. - # ========================================================================= - - def test_filter_image_simple(self) -> None: - """ - Test that simple image links are left untouched. - """ - # Prepare inputs. - input_text = """ - Check this image: ![](path/to/image.png) - """ - expected_text = """ - Check this image: ![](path/to/image.png) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_filter_jpg_images(self) -> None: - """ - Test that JPG image links are left untouched. - """ - # Prepare inputs. - input_text = """ - ![](lectures_source/images/lec_4_1_slide_5_image_1.jpg) - """ - expected_text = """ - ![](lectures_source/images/lec_4_1_slide_5_image_1.jpg) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_filter_mixed_images_and_emails(self) -> None: - """ - Test that image links are not processed while email links are. - """ - # Prepare inputs. - input_text = """ - Contact: [](support@example.com) - Image: ![](path/to/image.png) - Link: https://example.com - """ - expected_text = r""" - Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) - Image: ![](path/to/image.png) - Link: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_filter_image_with_alt_text(self) -> None: - """ - Test that image links with alt text are left untouched. - """ - # Prepare inputs. - input_text = """ - ![Alt text](path/to/image.png) - """ - expected_text = """ - ![Alt text](path/to/image.png) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_filter_multiple_images(self) -> None: - """ - Test that multiple image links are left untouched. - """ - # Prepare inputs. - input_text = """ - ![](image1.png) - ![](image2.jpg) - ![](image3.gif) - """ - expected_text = """ - ![](image1.png) - ![](image2.jpg) - ![](image3.gif) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_markdown_link_with_escaped_underscores(self) -> None: - """ - Test markdown link with escaped underscores in the text. - """ - # Prepare inputs. - input_text = r""" - [tutorial\_docker\_compose](https://github.com/gpsaggese/umd_classes/tree/main/data605/tutorials/tutorial_docker_compose) - """ - expected_text = r""" - [\textcolor{blue}{\underline{tutorial\_docker\_compose}}](https://github.com/gpsaggese/umd_classes/tree/main/data605/tutorials/tutorial_docker_compose) - """ - # Run test. - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_add_prettier_ignore_to_div_blocks -# ############################################################################# - - -class Test_add_prettier_ignore_to_div_blocks(hunitest.TestCase): - """ - Test the function to add prettier-ignore comments around div blocks. - """ - - def test_simple_div_block(self) -> None: - """ - Test a simple div block with two colons. - """ - # Prepare inputs. - txt = """ - :::: - ::: - """ - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.check_string(actual) - - def test_multiple_div_blocks(self) -> None: - """ - Test multiple div blocks in the same content. - """ - # Prepare inputs. - txt = """ - Some text before - - :::: - ::::{.column width=40%} - - Middle text - - :::columns - ::::{.column width=60%} - - Some text after - """ - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.check_string(actual) - - -# ############################################################################# -# Test_remove_prettier_ignore_from_div_blocks -# ############################################################################# - - -class Test_remove_prettier_ignore_from_div_blocks(hunitest.TestCase): - """ - Test the function to remove prettier-ignore comments from div blocks. - """ - - def test_remove_simple_block(self) -> None: - """ - Test removing prettier-ignore from a simple div block. - """ - # Prepare inputs. - txt = """ - - - :::: - ::: - - - """ - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.check_string(actual) - - def test_remove_multiple_blocks(self) -> None: - """ - Test removing prettier-ignore from multiple div blocks. - """ - # Prepare inputs. - txt = """ - Text before - - - :::: - ::::{.column width=40%} - - - Middle text - - - :::columns - ::::{.column width=60%} - - - Text after - """ - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py deleted file mode 100644 index 34ea20964..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py +++ /dev/null @@ -1,2002 +0,0 @@ -import logging -import os -import pprint -from typing import Any, List, Tuple, cast - -import helpers.hio as hio -import helpers.hmarkdown as hmarkdo -import helpers.hmarkdown_headers as hmarhead -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def _to_header_list(data: List[Tuple[int, str]]) -> hmarkdo.HeaderList: - res = [ - hmarkdo.HeaderInfo(level, text, 5 * i + 1) - for i, (level, text) in enumerate(data) - ] - return res - - -def get_header_list1() -> hmarkdo.HeaderList: - data = [ - (1, "Chapter 1"), - (2, "Section 1.1"), - (3, "Subsection 1.1.1"), - (3, "Subsection 1.1.2"), - (2, "Section 1.2"), - (1, "Chapter 2"), - (2, "Section 2.1"), - (3, "Subsection 2.1.1"), - (2, "Section 2.2"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_header_list2() -> hmarkdo.HeaderList: - data = [ - (1, "Module Alpha"), - (2, "Lesson Alpha-1"), - (3, "Topic Alpha-1.a"), - (3, "Topic Alpha-1.b"), - (2, "Lesson Alpha-2"), - (3, "Topic Alpha-2.a"), - (1, "Module Beta"), - (2, "Lesson Beta-1"), - (3, "Topic Beta-1.a"), - (2, "Lesson Beta-2"), - (1, "Module Gamma"), - (2, "Lesson Gamma-1"), - (3, "Topic Gamma-1.a"), - (3, "Topic Gamma-1.b"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_header_list3() -> hmarkdo.HeaderList: - data = [ - (1, "Topic A"), - (2, "Subtopic A.1"), - (3, "Detail A.1.i"), - (3, "Detail A.1.ii"), - (2, "Subtopic A.2"), - (1, "Topic B"), - (2, "Subtopic B.1"), - (3, "Detail B.1.i"), - (2, "Subtopic B.2"), - (3, "Detail B.2.i"), - (3, "Detail B.2.ii"), - (2, "Subtopic B.3"), - (1, "Topic C"), - (2, "Subtopic C.1"), - (3, "Detail C.1.i"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_header_list4() -> hmarkdo.HeaderList: - data = [ - (1, "Chapter 1"), - (3, "Subsection 1.1.1"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_header_list5() -> hmarkdo.HeaderList: - data = [ - (1, "Chapter 1"), - (2, "Section 1.1"), - (3, "Subsection 1.1.1"), - (1, "Chapter 2"), - ] - header_list = _to_header_list(data) - return header_list - - -def _get_markdown_example1() -> str: - content = r""" - # Header1 - Content under header 1. - ## Header2 - Content under subheader 2. - # Header3 - Content under header 3. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_example2() -> str: - content = r""" - # Header1 - Content under header 1. - ## Header2 - Content under subheader 2. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_no_header_example1() -> str: - content = r""" - This is some content without any headers. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_example4() -> str: - content = r""" - # Chapter 1 - - Welcome to the first chapter. This chapter introduces fundamental concepts and - lays the groundwork for further exploration. - - ## Section 1.1 - - This section discusses the initial principles and key ideas that are crucial for - understanding the topic. - - ### Subsection 1.1.1 - - The first subsection dives deeper into the details, providing examples and - insights that help clarify the concepts. - - Example: - ```python - def greet(name): - return f"Hello, {name}!" - print(greet("World")) - ``` - - ### Subsection 1.1.2 - - Here, we examine alternative perspectives and additional considerations that - were not covered in the previous subsection. - - - Key Point 1: Understanding different viewpoints enhances comprehension. - - Key Point 2: Practical application reinforces learning. - - ## Section 1.2 - - This section introduces new frameworks and methodologies that build upon the - foundation established earlier. - - > "Knowledge is like a tree, growing stronger with each branch of understanding." - - # Chapter 2 - - Moving forward, this chapter explores advanced topics and real-world - applications. - - ## Section 2.1 - - This section provides an in-depth analysis of core mechanisms that drive the - subject matter. - - ### Subsection 2.1.1 - - A deep dive into specific case studies and empirical evidence that support - theoretical claims. - - - Case Study 1: Implementation in modern industry - - Case Study 2: Comparative analysis of traditional vs. modern methods - - ## Section 2.2 - - The final section of this chapter presents summary conclusions, key takeaways, - and potential future developments. - - ```yaml - future: - - AI integration - - Process optimization - - Sustainable solutions - ``` - - Stay curious and keep exploring! - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_example5() -> hmarkdo.HeaderList: - content = r""" - # Models - test - ## Naive Bayes - test2 - ## Decision trees - test3 - ## Random forests - ## Linear models - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_slides_example1() -> str: - content = r""" - # Header1 - - * Slide 1 - Content 1. - - ## Header2 - - * Slide 2 - Content 2. - - * Slide 3 - Content 3. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_slides_example2() -> str: - content = r""" - # Header1 - - * Slide1 - Content 1. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _test_navigation_flow( - self_: Any, - txt: str, - header_list_exp: str, - header_tree_exp: str, - level: int, - description: str, - nav_str_exp: str, -) -> None: - # 1) Extract headers. - lines = txt.split("\n") - header_list = hmarkdo.extract_headers_from_markdown(lines, max_level=3) - actual = pprint.pformat(header_list) - self_.assert_equal( - actual, header_list_exp, dedent=True, remove_lead_trail_empty_lines=True - ) - # 2) Build header tree. - tree = hmarkdo.build_header_tree(header_list) - actual = hmarkdo.header_tree_to_str(tree, ancestry=None) - self_.assert_equal( - actual, header_tree_exp, dedent=True, remove_lead_trail_empty_lines=True - ) - # 3) Compute the navigation bar for a specific header. - actual = hmarkdo.selected_navigation_to_str(tree, level, description) - self_.assert_equal( - actual, nav_str_exp, dedent=True, remove_lead_trail_empty_lines=True - ) - - -def _test_full_navigation_flow(self_: Any, txt: str) -> None: - res: List[str] = [] - # Extract headers. - lines = txt.split("\n") - header_list = hmarkdo.extract_headers_from_markdown(lines, max_level=3) - # Build header tree. - tree = hmarkdo.build_header_tree(header_list) - # Create a navigation map for any header. - for node in header_list: - level, description, _ = node.as_tuple() - res_tmp = hprint.frame(hprint.to_str("level description")) - res.append(res_tmp) - # - res_tmp = hmarkdo.selected_navigation_to_str(tree, level, description) - res.append(res_tmp) - # Check. - actual = "\n".join(res) - self_.check_string(actual) - - -# ############################################################################# -# Test_header_list_to_vim_cfile1 -# ############################################################################# - - -class Test_header_list_to_vim_cfile1(hunitest.TestCase): - def test1(self) -> None: - """ - Test conversion of header list to vim cfile format with multiple - levels. - """ - # Prepare inputs. - markdown_file = "test.py" - headers = get_header_list1() - # Call function. - actual_lines = hmarkdo.header_list_to_vim_cfile(markdown_file, headers) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - test.py:1:Chapter 1 - test.py:6:Section 1.1 - test.py:11:Subsection 1.1.1 - test.py:16:Subsection 1.1.2 - test.py:21:Section 1.2 - test.py:26:Chapter 2 - test.py:31:Section 2.1 - test.py:36:Subsection 2.1.1 - test.py:41:Section 2.2 - """ - self.assert_equal(actual, expected, dedent=True) - - -# ############################################################################# -# Test_header_list_to_markdown1 -# ############################################################################# - - -class Test_header_list_to_markdown1(hunitest.TestCase): - def helper( - self, headers: hmarkdo.HeaderList, mode: str, expected: str - ) -> None: - """ - Helper method to test header_list_to_markdown function. - - :param headers: list of HeaderInfo objects - :param mode: conversion mode ("list" or "headers") - :param expected: expected output string - """ - # Call function. - actual_lines = hmarkdo.header_list_to_markdown(headers, mode) - actual = "\n".join(actual_lines) - # Check output. - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test conversion of header list to markdown list format with - indentation. - """ - # Prepare inputs. - headers = get_header_list1() - mode = "list" - # Prepare outputs. - expected = r""" - - Chapter 1 - - Section 1.1 - - Subsection 1.1.1 - - Subsection 1.1.2 - - Section 1.2 - - Chapter 2 - - Section 2.1 - - Subsection 2.1.1 - - Section 2.2 - """ - # Run test. - self.helper(headers, mode, expected) - - def test2(self) -> None: - """ - Test conversion of header list to markdown headers format with - proper heading levels. - """ - # Prepare inputs. - headers = get_header_list1() - mode = "headers" - # Prepare outputs. - expected = r""" - # Chapter 1 - ## Section 1.1 - ### Subsection 1.1.1 - ### Subsection 1.1.2 - ## Section 1.2 - # Chapter 2 - ## Section 2.1 - ### Subsection 2.1.1 - ## Section 2.2 - """ - # Run test. - self.helper(headers, mode, expected) - - -# ############################################################################# -# Test_is_markdown_line_separator1 -# ############################################################################# - - -class Test_is_markdown_line_separator1(hunitest.TestCase): - def helper(self, line: str, expected: bool) -> None: - """ - Helper method to test is_markdown_line_separator function. - - :param line: input line to test - :param expected: expected boolean result - """ - # Call function. - actual = hmarkdo.is_markdown_line_separator(line) - # Check output. - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test that a line with only dashes is recognized as a separator. - """ - # Prepare inputs. - line = "-----------------------" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test2(self) -> None: - """ - Test that a line with hash prefix and dashes is a valid separator. - """ - # Prepare inputs. - line = "# ------" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test3(self) -> None: - """ - Test that a line with hash prefix and hash characters is a valid - separator. - """ - # Prepare inputs. - line = "# #########" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test4(self) -> None: - """ - Test that a line with triple hash prefix and equals is a valid - separator. - """ - # Prepare inputs. - line = "### =====" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test5(self) -> None: - """ - Test that a line with hash and slashes is a valid separator. - """ - # Prepare inputs. - line = "#//////" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test6(self) -> None: - """ - Test that a line with hash, spaces, and slashes is a valid - separator. - """ - # Prepare inputs. - line = "# //////" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test7(self) -> None: - """ - Test that plain text is not recognized as a separator. - """ - # Prepare inputs. - line = "Not a separator" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test8(self) -> None: - """ - Test that a short dash line is not a valid separator. - """ - # Prepare inputs. - line = "# --" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test9(self) -> None: - """ - Test that mixed separator characters are not valid. - """ - # Prepare inputs. - line = "# ###---" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test10(self) -> None: - """ - Test that two equals signs alone are not a valid separator. - """ - # Prepare inputs. - line = "==" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test11(self) -> None: - """ - Test that dash prefix with slashes is not a valid separator. - """ - # Prepare inputs. - line = "- //////" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test12(self) -> None: - """ - Test that separators with trailing text are not valid. - """ - # Prepare inputs. - line = "=== Not a seperator" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test13(self) -> None: - """ - Test that separators with surrounding text are not valid. - """ - # Prepare inputs. - line = "--- Not a seperator ---" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - -# ############################################################################# -# Test_extract_section_from_markdown1 -# ############################################################################# - - -class Test_extract_section_from_markdown1(hunitest.TestCase): - def helper(self, content: str, header_name: str, expected: str) -> None: - """ - Helper method to test extract_section_from_markdown function. - - :param content: markdown content to extract from - :param header_name: name of header to extract - :param expected: expected output string - """ - # Call function. - lines = content.split("\n") - actual_lines = hmarkdo.extract_section_from_markdown(lines, header_name) - actual = "\n".join(actual_lines) - # Check output. - self.assert_equal(actual, expected, dedent=True) - - # TODO(gp): This doesn't seem correct. - def test1(self) -> None: - """ - Test extracting a section that includes a subheader. - """ - # Prepare inputs. - content = _get_markdown_example1() - # Prepare outputs. - expected = r""" - # Header1 - Content under header 1. - ## Header2 - Content under subheader 2. - """ - # Run test. - self.helper(content, "Header1", expected) - - def test2(self) -> None: - """ - Test extracting a subheader section only. - """ - # Prepare inputs. - content = _get_markdown_example1() - content = hprint.dedent(content) - # Prepare outputs. - expected = r""" - ## Header2 - Content under subheader 2. - """ - # Run test. - self.helper(content, "Header2", expected) - - def test3(self) -> None: - """ - Test extracting the last header section in the document. - """ - # Prepare inputs. - content = _get_markdown_example1() - content = hprint.dedent(content) - # Prepare outputs. - expected = r""" - # Header3 - Content under header 3. - """ - # Run test. - self.helper(content, "Header3", expected) - - def test4(self) -> None: - """ - Test extracting a header that spans to the end of document. - """ - # Prepare inputs. - content = _get_markdown_example2() - # Prepare outputs. - expected = r""" - # Header1 - Content under header 1. - ## Header2 - Content under subheader 2. - """ - # Run test. - self.helper(content, "Header1", expected) - - def test5(self) -> None: - # Prepare inputs. - content = _get_markdown_no_header_example1() - # Call tested function. - with self.assertRaises(ValueError) as fail: - lines = content.split("\n") - hmarkdo.extract_section_from_markdown(lines, "Header4") - # Check output. - actual = str(fail.exception) - expected = r"Header 'Header4' not found" - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_extract_headers_from_markdown1 -# ############################################################################# - - -class Test_extract_headers_from_markdown1(hunitest.TestCase): - def helper(self, content: str, max_level: int, expected: str) -> None: - """ - Helper method to test extract_headers_from_markdown function. - - :param content: markdown content to extract headers from - :param max_level: maximum header level to extract - :param expected: expected output string representation - """ - # Call function. - lines = content.split("\n") - actual = hmarkdo.extract_headers_from_markdown( - lines, max_level=max_level - ) - # Check output. - self.assert_equal(str(actual), expected) - - def test1(self) -> None: - """ - Test extracting multiple headers with different levels from markdown - content. - """ - # Prepare inputs. - content = _get_markdown_example1() - max_level = 3 - # Prepare outputs. - expected = r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3), HeaderInfo(1, 'Header3', 5)]""" - # Run test. - self.helper(content, max_level, expected) - - def test2(self) -> None: - """ - Test extracting headers from a simple two-level structure. - """ - # Prepare inputs. - content = _get_markdown_example2() - max_level = 3 - # Prepare outputs. - expected = ( - r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3)]""" - ) - # Run test. - self.helper(content, max_level, expected) - - def test3(self) -> None: - # Prepare inputs. - content = r""" - This is some content without any headers. - """ - content = hprint.dedent(content) - # Call function. - lines = content.split("\n") - actual = hmarkdo.extract_headers_from_markdown(lines, max_level=3) - # Check output. - expected: List[str] = [] - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# -# Test_extract_slides_from_markdown1 -# ############################################################################# - - -class Test_extract_slides_from_markdown1(hunitest.TestCase): - def helper(self, content: str, expected: str) -> None: - """ - Helper method to test extract_slides_from_markdown function. - - :param content: markdown content to extract slides from - :param expected: expected output string representation - """ - # Call function. - lines = content.split("\n") - actual = hmarkdo.extract_slides_from_markdown(lines) - # Check output. - self.assert_equal(str(actual), expected) - - def test1(self) -> None: - """ - Test extracting multiple slides from markdown presentation format. - """ - # Prepare inputs. - content = _get_markdown_slides_example1() - # Prepare outputs. - expected = r"""([HeaderInfo(1, 'Slide 1', 3), HeaderInfo(1, 'Slide 2', 8), HeaderInfo(1, 'Slide 3', 11)], 12)""" - # Run test. - self.helper(content, expected) - - def test2(self) -> None: - """ - Test extracting a single slide from markdown presentation format. - """ - # Prepare inputs. - content = _get_markdown_slides_example2() - # Prepare outputs. - expected = r"""([HeaderInfo(1, 'Slide1', 3)], 4)""" - # Run test. - self.helper(content, expected) - - def test3(self) -> None: - # Prepare inputs. - content = _get_markdown_no_header_example1() - # Call function. - lines = content.split("\n") - actual = hmarkdo.extract_slides_from_markdown(lines) - # Check output. - expected = r"""([], 1)""" - self.assert_equal(str(actual), expected) - - -# ############################################################################# -# Test_selected_navigation_to_str1 -# ############################################################################# - - -class Test_selected_navigation_to_str1(hunitest.TestCase): - def test1(self) -> None: - """ - Create navigation bar from Markdown text `_get_markdown_example4()`. - """ - txt = _get_markdown_example4() - header_list_exp = """ - [HeaderInfo(1, 'Chapter 1', 1), - HeaderInfo(2, 'Section 1.1', 6), - HeaderInfo(3, 'Subsection 1.1.1', 11), - HeaderInfo(3, 'Subsection 1.1.2', 23), - HeaderInfo(2, 'Section 1.2', 31), - HeaderInfo(1, 'Chapter 2', 38), - HeaderInfo(2, 'Section 2.1', 43), - HeaderInfo(3, 'Subsection 2.1.1', 48), - HeaderInfo(2, 'Section 2.2', 56)] - """ - header_tree_exp = """ - - Chapter 1 - - Chapter 2 - """ - level = 3 - description = "Subsection 1.1.2" - nav_str_exp = """ - - Chapter 1 - - Section 1.1 - - Subsection 1.1.1 - - **Subsection 1.1.2** - - Section 1.2 - - Chapter 2 - """ - _test_navigation_flow( - self, - txt, - header_list_exp, - header_tree_exp, - level, - description, - nav_str_exp, - ) - - def test2(self) -> None: - txt = _get_markdown_example4() - _test_full_navigation_flow(self, txt) - - -# ############################################################################# -# Test_selected_navigation_to_str2 -# ############################################################################# - - -class Test_selected_navigation_to_str2(hunitest.TestCase): - def test1(self) -> None: - """ - Create navigation bar from Markdown text `_get_markdown_example5()`. - """ - txt = _get_markdown_example5() - header_list_exp = r""" - [HeaderInfo(1, 'Models', 1), - HeaderInfo(2, 'Naive Bayes', 3), - HeaderInfo(2, 'Decision trees', 5), - HeaderInfo(2, 'Random forests', 7), - HeaderInfo(2, 'Linear models', 8)] - """ - header_tree_exp = """ - - Models - """ - level = 2 - description = "Decision trees" - nav_str_exp = """ - - Models - - Naive Bayes - - **Decision trees** - - Random forests - - Linear models - """ - _test_navigation_flow( - self, - txt, - header_list_exp, - header_tree_exp, - level, - description, - nav_str_exp, - ) - - def test2(self) -> None: - txt = _get_markdown_example5() - _test_full_navigation_flow(self, txt) - - -# ############################################################################# -# Test_modify_header_level1 -# ############################################################################# - - -class Test_modify_header_level1(hunitest.TestCase): - def helper( - self, input_lines: List[str], level: int, expected_lines: List[str] - ) -> None: - """ - Helper method to test `modify_header_level` function. - - :param input_lines: list of input text lines - :param level: level adjustment to apply - :param expected_lines: list of expected output lines - """ - # Prepare inputs. - input_text = "\n".join(input_lines) - # Call tested function. - actual_lines = hmarkdo.modify_header_level(input_lines, level) - actual = "\n".join(actual_lines) - # Check output. - expected = "\n".join(expected_lines) - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test the inputs to increase headings. - """ - # Prepare inputs and outputs. - input_lines = [ - "# Chapter 1", - "## Section 1.1", - "### Subsection 1.1.1", - "#### Sub-subsection 1.1.1.1", - ] - level = 1 - expected_lines = [ - "## Chapter 1", - "### Section 1.1", - "#### Subsection 1.1.1", - "##### Sub-subsection 1.1.1.1", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test2(self) -> None: - """ - Test inputs to increase headings with level 5 becoming level 6. - """ - # Prepare inputs and outputs. - input_lines = ["# Chapter 1", "##### Sub-sub-subsection 1.1.1.1.1"] - level = 1 - expected_lines = ["## Chapter 1", "###### Sub-sub-subsection 1.1.1.1.1"] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test3(self) -> None: - """ - Test inputs to increase headings including a paragraph which remains - unchanged. - """ - # Prepare inputs and outputs. - input_lines = ["# Chapter 1", "Paragraph 1"] - level = 1 - expected_lines = ["## Chapter 1", "Paragraph 1"] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test4(self) -> None: - """ - Test inputs of paragraphs which remain unchanged. - """ - # Prepare inputs and outputs. - input_lines = ["Paragraph 1", "Paragraph 2"] - level = 1 - expected_lines = ["Paragraph 1", "Paragraph 2"] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test5(self) -> None: - """ - Test to increase headings with mixed levels. - """ - # Prepare inputs and outputs. - input_lines = [ - "# Chapter 1", - "##### Sub-sub-subsection 1.1.1.1.1", - "# Chapter 2", - "### Subsection 2.1", - "# Chapter 3", - ] - level = 1 - expected_lines = [ - "## Chapter 1", - "###### Sub-sub-subsection 1.1.1.1.1", - "## Chapter 2", - "#### Subsection 2.1", - "## Chapter 3", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test6(self) -> None: - """ - Test the inputs to decrease headings. - """ - # Prepare inputs and outputs. - input_lines = [ - "## Section 1.1", - "### Subsection 1.1.1", - "#### Sub-subsection 1.1.1.1", - "##### Sub-sub-subsection 1.1.1.1.1", - ] - level = -1 - expected_lines = [ - "# Section 1.1", - "## Subsection 1.1.1", - "### Sub-subsection 1.1.1.1", - "#### Sub-sub-subsection 1.1.1.1.1", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test7(self) -> None: - """ - Test inputs to decrease headings by one level. - """ - # Prepare inputs and outputs. - input_lines = [ - "## Chapter 1", - "##### Sub-subsection 1.1.1.1", - ] - level = -1 - expected_lines = [ - "# Chapter 1", - "#### Sub-subsection 1.1.1.1", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test8(self) -> None: - """ - Test inputs of paragraphs which remain unchanged. - """ - # Prepare inputs and outputs. - input_lines = ["Paragraph 1", "Paragraph 2", "Paragraph 3"] - level = -1 - expected_lines = ["Paragraph 1", "Paragraph 2", "Paragraph 3"] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test9(self) -> None: - """ - Test increasing headers by 2 levels. - """ - # Prepare inputs and outputs. - input_lines = [ - "# Chapter 1", - "## Section 1.1", - "### Subsection 1.1.1", - ] - level = 2 - expected_lines = [ - "### Chapter 1", - "#### Section 1.1", - "##### Subsection 1.1.1", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test10(self) -> None: - """ - Test decreasing headers by 2 levels. - """ - # Prepare inputs and outputs. - input_lines = [ - "### Chapter 1", - "#### Section 1.1", - "##### Subsection 1.1.1", - ] - level = -2 - expected_lines = [ - "# Chapter 1", # 3-2=1 - "## Section 1.1", # 4-2=2 - "### Subsection 1.1.1", # 5-2=3 - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test11(self) -> None: - """ - Test increasing headers by 2 levels. - """ - # Prepare inputs and outputs. - input_lines = [ - "### Level 3", - "#### Level 4", - ] - level = 2 - expected_lines = [ - "##### Level 3", # 3+2=5 - "###### Level 4", # 4+2=6 - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - -# ############################################################################# -# Test_format_headers1 -# ############################################################################# - - -class Test_format_headers1(hunitest.TestCase): - def helper( - self, input_text: List[str], expected: List[str], max_lev: int - ) -> None: - """ - Process the given text with a specified maximum level and compare the - result with the expected output. - - :param input_text: the text to be processed - :param expected: the expected output after processing the text - :param max_lev: the maximum heading level to be formatted - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - write_file = os.path.join(scratch_dir, "write_file.txt") - # Call tested function. - hmarkdo.format_headers(input_text, write_file, max_lev=max_lev) - # Check output. - actual = hio.from_file(write_file) - self.assertEqual(actual, "\n".join(expected)) - - def test1(self) -> None: - """ - Test the inputs to check the basic formatting of headings. - """ - input_text = [ - "# Chapter 1", - "section text", - ] - expected = [ - "# #############################################################################", - "# Chapter 1", - "# #############################################################################", - "section text", - ] - self.helper(input_text, expected, max_lev=1) - - def test2(self) -> None: - """ - Test inputs with headings beyond the maximum level to ensure they are - ignored during formatting. - """ - input_text = [ - "# Chapter 1", - "## Section 1.1", - "### Section 1.1.1", - ] - expected = [ - "# #############################################################################", - "# Chapter 1", - "# #############################################################################", - "## ############################################################################", - "## Section 1.1", - "## ############################################################################", - "### Section 1.1.1", - ] - self.helper(input_text, expected, max_lev=2) - - def test3(self) -> None: - """ - Test the inputs to check that markdown line separators are removed. - """ - input_text = [ - "# Chapter 1", - "-----------------", - "Text", - "############", - ] - expected = [ - "# #############################################################################", - "# Chapter 1", - "# #############################################################################", - "Text", - ] - self.helper(input_text, expected, max_lev=1) - - def test4(self) -> None: - """ - Test inputs where max_level is inferred from the file content. - """ - input_text = [ - "# Chapter 1", - "max_level=1", - "## Section 1.1", - ] - expected = [ - "# #############################################################################", - "# Chapter 1", - "# #############################################################################", - "max_level=1", - "## Section 1.1", - ] - self.helper(input_text, expected, max_lev=2) - - def test5(self) -> None: - """ - Test inputs with no headers to ensure they remain unchanged. - """ - input_text = [ - "Only text", - "No headings", - ] - expected = [ - "Only text", - "No headings", - ] - self.helper(input_text, expected, max_lev=3) - - -# ############################################################################# -# Test_sanity_check_header_list1 -# ############################################################################# - - -class Test_sanity_check_header_list1(hunitest.TestCase): - def test1(self) -> None: - """ - Test that the header list with valid level increase is accepted. - """ - # Prepare inputs. - header_list = get_header_list1() - # Call function. - hmarkdo.sanity_check_header_list(header_list) - - def test2(self) -> None: - """ - Test that the header list with an increase of more than one level - raises an error. - """ - # Prepare inputs. - header_list = get_header_list4() - # Call function. - with self.assertRaises(ValueError) as err: - hmarkdo.sanity_check_header_list(header_list) - # Check output. - actual = str(err.exception) - self.check_string(actual) - - def test3(self) -> None: - """ - Test that the header list is accepted when heading levels decrease by - more than one. - """ - # Prepare inputs. - header_list = get_header_list5() - # Call function. - hmarkdo.sanity_check_header_list(header_list) - - -# ############################################################################# -# Test__has_internal_capitals1 -# ############################################################################# - - -class Test__has_internal_capitals1(hunitest.TestCase): - """ - Test `_has_internal_capitals` function. - """ - - def helper(self, word: str, expected: bool) -> None: - """ - Test helper for `_has_internal_capitals`. - - :param word: word to test - :param expected: expected result - """ - # Run test. - actual = hmarhead._has_internal_capitals(word) - # Check outputs. - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test word with internal capital letters. - """ - # Prepare inputs. - word = "SimpleFeedForward" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test2(self) -> None: - """ - Test word with multiple internal capital letters. - """ - # Prepare inputs. - word = "DeepNPTS" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test3(self) -> None: - """ - Test word with capital only at the start. - """ - # Prepare inputs. - word = "Machine" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test4(self) -> None: - """ - Test all lowercase word. - """ - # Prepare inputs. - word = "learning" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test5(self) -> None: - """ - Test all uppercase word. - """ - # Prepare inputs. - word = "ML" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test6(self) -> None: - """ - Test single lowercase character. - """ - # Prepare inputs. - word = "a" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test7(self) -> None: - """ - Test single uppercase character. - """ - # Prepare inputs. - word = "A" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test8(self) -> None: - """ - Test empty string. - """ - # Prepare inputs. - word = "" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test9(self) -> None: - """ - Test camelCase word. - """ - # Prepare inputs. - word = "camelCase" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - -# ############################################################################# -# Test_capitalize_header1 -# ############################################################################# - - -class Test_capitalize_header1(hunitest.TestCase): - def helper(self, txt: str, expected: str) -> None: - # Prepare inputs. - txt = hprint.dedent(txt) - # Run function. - lines = txt.split("\n") - actual_lines = hmarkdo.capitalize_header(lines) - actual = "\n".join(actual_lines) - # Check outputs. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test capitalizing a short two-word title. - """ - txt = r""" - * ML theory - """ - expected = r""" - * ML Theory - """ - self.helper(txt, expected) - - def test2(self) -> None: - """ - Test capitalizing a longer multi-word title. - """ - txt = r""" - * A map of machine learning - """ - expected = r""" - * A Map of Machine Learning - """ - self.helper(txt, expected) - - def test3(self) -> None: - """ - Test that strings inside backticks are preserved. - """ - txt = r""" - # Using `python` for Machine Learning - """ - expected = r""" - # Using `python` for Machine Learning - """ - self.helper(txt, expected) - - def test4(self) -> None: - """ - Test that strings inside single quotes are preserved. - """ - txt = r""" - * Working with 'machine learning' algorithms - """ - expected = r""" - * Working with 'machine learning' Algorithms - """ - self.helper(txt, expected) - - def test5(self) -> None: - """ - Test that strings inside double quotes are preserved. - """ - txt = r""" - # Understanding "deep learning" concepts - """ - expected = r""" - # Understanding "deep learning" Concepts - """ - self.helper(txt, expected) - - def test6(self) -> None: - """ - Test mixed usage of quotes and backticks. - """ - txt = r""" - * Using `python` and "machine learning" for 'data science' - """ - expected = r""" - * Using `python` and "machine learning" for 'data science' - """ - self.helper(txt, expected) - - def test7(self) -> None: - """ - Test complex title with various quote types. - """ - txt = r""" - # Introduction to `sklearn` and "data preprocessing" in 'python' - """ - expected = r""" - # Introduction to `sklearn` and "data preprocessing" in 'python' - """ - self.helper(txt, expected) - - def test8(self) -> None: - """ - Test that words with internal capitals are preserved. - """ - txt = r""" - # SimpleFeedForward model - """ - expected = r""" - # SimpleFeedForward Model - """ - self.helper(txt, expected) - - def test9(self) -> None: - """ - Test multiple words with internal capitals. - """ - txt = r""" - * DeepNPTS and SimpleFeedForward models - """ - expected = r""" - * DeepNPTS and SimpleFeedForward Models - """ - self.helper(txt, expected) - - def test10(self) -> None: - """ - Test mixed normal words and words with internal capitals. - """ - txt = r""" - # Using SimpleFeedForward for machine learning - """ - expected = r""" - # Using SimpleFeedForward for Machine Learning - """ - self.helper(txt, expected) - - def test11(self) -> None: - """ - Test that headers inside fenced code blocks are not processed. - """ - txt = r""" - # Main header - - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - ``` - - ## Another header - """ - expected = r""" - # Main Header - - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - ``` - - ## Another Header - """ - self.helper(txt, expected) - - def test12(self) -> None: - """ - Test headers inside multiple fenced code blocks are not processed. - """ - txt = r""" - # First header - - ```python - # comment in code - x = 1 - ``` - - ## Second header - - ```bash - # shell comment - echo "hello" - ``` - """ - expected = r""" - # First Header - - ```python - # comment in code - x = 1 - ``` - - ## Second Header - - ```bash - # shell comment - echo "hello" - ``` - """ - self.helper(txt, expected) - - def test13(self) -> None: - """ - Test that the first word after a numeric prefix is capitalized. - """ - txt = r""" - ## 4.4 the Victim Triangle - """ - expected = r""" - ## 4.4 The Victim Triangle - """ - self.helper(txt, expected) - - def test14(self) -> None: - """ - Test that "of", "a", "an" after a numeric prefix are capitalized. - """ - txt = r""" - ## 1.1 of mice and men - """ - expected = r""" - ## 1.1 Of Mice and Men - """ - self.helper(txt, expected) - - def test15(self) -> None: - """ - Test that "of", "a", "an" are capitalized. - """ - txt = r""" - ## of mice and men - """ - expected = r""" - ## Of Mice and Men - """ - self.helper(txt, expected) - - -# ############################################################################# -# Test_capitalize_header2 -# ############################################################################# - - -class Test_capitalize_header2(hunitest.TestCase): - """ - Test enhanced capitalize_header functionality for mixed case words and - fenced blocks. - """ - - def helper(self, txt: str, expected: str) -> None: - """ - Helper method to test capitalize_header function. - - :param txt: input text to process - :param expected: expected output after processing - """ - # Prepare inputs. - txt = hprint.dedent(txt) - # Run function. - lines = txt.split("\n") - actual_lines = hmarkdo.capitalize_header(lines) - actual = "\n".join(actual_lines) - # Check outputs. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test that SimpleFeedForward is preserved as-is. - """ - txt = r""" - # using SimpleFeedForward for predictions - """ - expected = r""" - # Using SimpleFeedForward for Predictions - """ - self.helper(txt, expected) - - def test2(self) -> None: - """ - Test that DeepNPTS is preserved as-is. - """ - txt = r""" - # training with DeepNPTS model - """ - expected = r""" - # Training with DeepNPTS Model - """ - self.helper(txt, expected) - - def test3(self) -> None: - """ - Test multiple mixed case words in the same header. - """ - txt = r""" - # comparing SimpleFeedForward and DeepNPTS models - """ - expected = r""" - # Comparing SimpleFeedForward and DeepNPTS Models - """ - self.helper(txt, expected) - - def test4(self) -> None: - """ - Test mixed case words combined with all caps words. - """ - txt = r""" - # using API with SimpleFeedForward for ML tasks - """ - expected = r""" - # Using API with SimpleFeedForward for ML Tasks - """ - self.helper(txt, expected) - - def test5(self) -> None: - """ - Test mixed case word as the first word in header. - """ - txt = r""" - # SimpleFeedForward network architecture - """ - expected = r""" - # SimpleFeedForward Network Architecture - """ - self.helper(txt, expected) - - def test6(self) -> None: - """ - Test that headers inside fenced blocks are not capitalized. - """ - txt = r""" - # Main header - Some text - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - ``` - """ - expected = r""" - # Main Header - Some text - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - ``` - """ - self.helper(txt, expected) - - def test7(self) -> None: - """ - Test that multiple headers inside fenced blocks are not capitalized. - """ - txt = r""" - # introduction to forecasting - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - q75 = forecast.quantile(0.75) - - # 90% confidence interval - q05 = forecast.quantile(0.05) - q95 = forecast.quantile(0.95) - - # mean and median - mean = forecast.mean - median = forecast.quantile(0.5) - ``` - # conclusion - """ - expected = r""" - # Introduction to Forecasting - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - q75 = forecast.quantile(0.75) - - # 90% confidence interval - q05 = forecast.quantile(0.05) - q95 = forecast.quantile(0.95) - - # mean and median - mean = forecast.mean - median = forecast.quantile(0.5) - ``` - # Conclusion - """ - self.helper(txt, expected) - - def test8(self) -> None: - """ - Test that headers in fenced blocks with language specifier are not - capitalized. - """ - txt = r""" - # data processing - ```bash - # run the script - python script.py - ``` - """ - expected = r""" - # Data Processing - ```bash - # run the script - python script.py - ``` - """ - self.helper(txt, expected) - - def test9(self) -> None: - """ - Test mixed case words inside fenced blocks are preserved. - """ - txt = r""" - # using SimpleFeedForward model - ```python - # SimpleFeedForward implementation - class SimpleFeedForward: - pass - ``` - """ - expected = r""" - # Using SimpleFeedForward Model - ```python - # SimpleFeedForward implementation - class SimpleFeedForward: - pass - ``` - """ - self.helper(txt, expected) - - def test10(self) -> None: - """ - Test multiple fenced blocks in the same document. - """ - txt = r""" - # first section - ```python - # code block 1 - x = 1 - ``` - # second section - ```python - # code block 2 - y = 2 - ``` - """ - expected = r""" - # First Section - ```python - # code block 1 - x = 1 - ``` - # Second Section - ```python - # code block 2 - y = 2 - ``` - """ - self.helper(txt, expected) - - def test11(self) -> None: - """ - Test that slide titles (starting with *) also preserve mixed case. - """ - txt = r""" - * using SimpleFeedForward for predictions - """ - expected = r""" - * Using SimpleFeedForward for Predictions - """ - self.helper(txt, expected) - - def test12(self) -> None: - """ - Test mixed case words with punctuation. - """ - txt = r""" - # SimpleFeedForward: a neural network approach - """ - expected = r""" - # SimpleFeedForward: a Neural Network Approach - """ - self.helper(txt, expected) - - def test13(self) -> None: - """ - Test that normal words without mixed case are still capitalized - properly. - """ - txt = r""" - # introduction to machine learning - """ - expected = r""" - # Introduction to Machine Learning - """ - self.helper(txt, expected) - - def test14(self) -> None: - """ - Test empty fenced blocks don't cause issues. - """ - txt = r""" - # header before - ``` - ``` - # header after - """ - expected = r""" - # Header Before - ``` - ``` - # Header After - """ - self.helper(txt, expected) - - -# ############################################################################# -# Test_has_mixed_case1 -# ############################################################################# - - -class Test_has_mixed_case1(hunitest.TestCase): - """ - Test the _has_mixed_case helper function. - """ - - def helper(self, word: str, expected: bool) -> None: - """ - Test helper for has_mixed_case. - - :param word: word to test - :param expected: expected result - """ - # Call function. - actual = hmarkdo.has_mixed_case(word) - # Check output. - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test SimpleFeedForward has mixed case. - """ - # Prepare inputs. - word = "SimpleFeedForward" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test2(self) -> None: - """ - Test DeepNPTS has mixed case (all caps after first). - """ - # Prepare inputs. - word = "DeepNPTS" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test3(self) -> None: - """ - Test Machine does not have mixed case (only first char capital). - """ - # Prepare inputs. - word = "Machine" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test4(self) -> None: - """ - Test lowercase word has no mixed case. - """ - # Prepare inputs. - word = "machine" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test5(self) -> None: - """ - Test all caps word has mixed case (caps after first position). - """ - # Prepare inputs. - word = "API" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test6(self) -> None: - """ - Test single character has no mixed case. - """ - # Prepare inputs. - word = "A" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test7(self) -> None: - """ - Test two character word with first capital has no mixed case. - """ - # Prepare inputs. - word = "At" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test8(self) -> None: - """ - Test two character word with both caps has mixed case. - """ - # Prepare inputs. - word = "ML" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test9(self) -> None: - """ - Test camelCase word has mixed case. - """ - # Prepare inputs. - word = "camelCase" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py deleted file mode 100644 index f12ae2d5a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py +++ /dev/null @@ -1,377 +0,0 @@ -import logging -from typing import List, Tuple, cast - -import helpers.hmarkdown as hmarkdo -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def _to_header_list(data: List[Tuple[int, str]]) -> hmarkdo.HeaderList: - res = [ - hmarkdo.HeaderInfo(level, text, 5 * i + 1) - for i, (level, text) in enumerate(data) - ] - return res - - -def get_header_list6() -> hmarkdo.HeaderList: - """ - - Spelling - - All - - LLM - - Linter - - Python - - Naming - - LLM - - Linter - - Docstrings - - LLM - - Linter - - Unit_tests - - All - - LLM - - Linter - """ - data = [ - (1, "Spelling"), - (2, "All"), - (3, "LLM"), - (3, "Linter"), - (1, "Python"), - (2, "Naming"), - (3, "LLM"), - (3, "Linter"), - (2, "Docstrings"), - (3, "LLM"), - (3, "Linter"), - (1, "Unit_tests"), - (2, "All"), - (3, "LLM"), - (3, "Linter"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_guidelines_txt1() -> str: - txt = r""" - # General - - ## Spelling - - ### LLM - - ### Linter - - - Spell commands in lower case and programs with the first letter in upper case - - E.g., `git` as a command, `Git` as a program - - E.g., capitalize the first letter of `Python` - - Capitalize `JSON`, `CSV`, `DB` and other abbreviations - - # Python - - ## Naming - - ### LLM - - - Name functions using verbs and verbs/actions - - Good: `download_data()`, `process_input()`, `calculate_sum()` - - Good: Python internal functions as `__repr__`, `__init__` are valid - - Good: Functions names like `to_dict()`, `_parse()`, `_main()` are valid - - Name classes using nouns - - Good: `Downloader()`, `DataProcessor()`, `User()` - - Bad: `DownloadStuff()`, `ProcessData()`, `UserActions()` - - ### Linter - - - Name executable Python scripts using verbs and actions - - E.g., `download.py` and not `downloader.py` - - # Unit_tests - - ## Rules - - ### LLM - - - A test class should test only one function or class to help understanding - test failures - - A test method should only test a single case to ensures clarity and - precision in testing - - E.g., "for these inputs the function responds with this output" - """ - txt = hprint.dedent(txt) - txt = cast(str, txt) - return txt - - -# ############################################################################# -# Test_convert_header_list_into_guidelines1 -# ############################################################################# - - -class Test_convert_header_list_into_guidelines1(hunitest.TestCase): - def test1(self) -> None: - """ - Test converting a header list into guidelines. - """ - # Prepare inputs. - header_list = get_header_list6() - # Call function. - guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) - # Check output. - actual = "\n".join(map(str, guidelines)) - expected = """ - HeaderInfo(1, 'Spelling:All:LLM', 11) - HeaderInfo(1, 'Spelling:All:Linter', 16) - HeaderInfo(1, 'Python:Naming:LLM', 31) - HeaderInfo(1, 'Python:Naming:Linter', 36) - HeaderInfo(1, 'Python:Docstrings:LLM', 46) - HeaderInfo(1, 'Python:Docstrings:Linter', 51) - HeaderInfo(1, 'Unit_tests:All:LLM', 66) - HeaderInfo(1, 'Unit_tests:All:Linter', 71) - """ - self.assert_equal(actual, expected, dedent=True) - - -# ############################################################################# -# Test_extract_rules1 -# ############################################################################# - - -class Test_extract_rules1(hunitest.TestCase): - def helper(self, selection_rules: List[str], expected: str) -> None: - """ - Test extracting rules from a markdown file. - """ - # Prepare inputs. - guidelines = get_header_list6() - guidelines = hmarkdo.convert_header_list_into_guidelines(guidelines) - # Call function. - selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) - # Check output. - actual = "\n".join(map(str, selected_guidelines)) - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["Spelling:*:LLM"] - expected = """ - HeaderInfo(1, 'Spelling:All:LLM', 11) - """ - self.helper(selection_rules, expected) - - def test2(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["Spelling:NONE:LLM"] - expected = """ - """ - self.helper(selection_rules, expected) - - def test3(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["Spelling:All:*"] - expected = """ - HeaderInfo(1, 'Spelling:All:LLM', 11) - HeaderInfo(1, 'Spelling:All:Linter', 16) - """ - self.helper(selection_rules, expected) - - def test4(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["Spelling:All:*", "Python:*:*"] - expected = """ - HeaderInfo(1, 'Spelling:All:LLM', 11) - HeaderInfo(1, 'Spelling:All:Linter', 16) - HeaderInfo(1, 'Python:Naming:LLM', 31) - HeaderInfo(1, 'Python:Naming:Linter', 36) - HeaderInfo(1, 'Python:Docstrings:LLM', 46) - HeaderInfo(1, 'Python:Docstrings:Linter', 51) - """ - self.helper(selection_rules, expected) - - -# ############################################################################# -# Test_parse_rules_from_txt1 -# ############################################################################# - - -class Test_parse_rules_from_txt1(hunitest.TestCase): - def helper(self, text: str, expected: List[str]) -> None: - # Prepare inputs. - text = hprint.dedent(text) - lines = text.split("\n") - # Call function. - actual = hmarkdo.parse_rules_from_txt(lines) - # Check output. - actual = str(actual) - expected = str(expected) - self.assert_equal(actual, expected, dedent=True) - - def test_basic_list1(self) -> None: - """ - Test extracting simple first-level bullet points. - """ - text = """ - - Item 1 - - Item 2 - - Item 3 - """ - expected = ["- Item 1", "- Item 2", "- Item 3"] - self.helper(text, expected) - - def test_nested_list1(self) -> None: - """ - Test extracting bullet points with nested sub-items. - """ - text = """ - - Item 1 - - Item 2 - - Sub-item 2.1 - - Sub-item 2.2 - - Item 3 - """ - expected = [ - "- Item 1", - "- Item 2\n - Sub-item 2.1\n - Sub-item 2.2", - "- Item 3", - ] - self.helper(text, expected) - - def test_empty_list1(self) -> None: - """ - Test handling empty input. - """ - text = "" - expected = [] - self.helper(text, expected) - - -# ############################################################################# -# Test_end_to_end_rules1 -# ############################################################################# - - -class Test_end_to_end_rules1(hunitest.TestCase): - def test_get_header_list1(self) -> None: - """ - Test extracting headers from a markdown file. - """ - # Prepare inputs. - txt = get_guidelines_txt1() - max_level = 4 - # Run function. - lines = txt.split("\n") - header_list = hmarkdo.extract_headers_from_markdown(lines, max_level) - # Check output. - actual = "\n".join(map(str, header_list)) - expected = """ - HeaderInfo(1, 'General', 1) - HeaderInfo(2, 'Spelling', 3) - HeaderInfo(3, 'LLM', 5) - HeaderInfo(3, 'Linter', 7) - HeaderInfo(1, 'Python', 14) - HeaderInfo(2, 'Naming', 16) - HeaderInfo(3, 'LLM', 18) - HeaderInfo(3, 'Linter', 28) - HeaderInfo(1, 'Unit_tests', 33) - HeaderInfo(2, 'Rules', 35) - HeaderInfo(3, 'LLM', 37) - """ - self.assert_equal(actual, expected, dedent=True) - # Run function. - guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) - # Check output. - actual = "\n".join(map(str, guidelines)) - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'General:Spelling:Linter', 7) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.assert_equal(actual, expected, dedent=True) - - def helper_extract_rules( - self, selection_rules: List[str], expected: str - ) -> None: - """ - Helper function to test extracting rules from a markdown file. - """ - # Prepare inputs. - txt = get_guidelines_txt1() - max_level = 4 - lines = txt.split("\n") - header_list = hmarkdo.extract_headers_from_markdown(lines, max_level) - guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) - # Call function. - selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) - # Check output. - actual = "\n".join(map(str, selected_guidelines)) - self.assert_equal(actual, expected, dedent=True) - - def test_extract_rules1(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["General:*:LLM"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules2(self) -> None: - selection_rules = ["General:NONE:LLM"] - expected = """ - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules3(self) -> None: - selection_rules = ["*:*:LLM"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules4(self) -> None: - selection_rules = ["*:*:LLM", "General:*:*"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'General:Spelling:Linter', 7) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules5(self) -> None: - selection_rules = ["*:*:*"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'General:Spelling:Linter', 7) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules6(self) -> None: - selection_rules = ["*:*:*", "General:*:*"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'General:Spelling:Linter', 7) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.helper_extract_rules(selection_rules, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py deleted file mode 100644 index 39137551e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py +++ /dev/null @@ -1,399 +0,0 @@ -import logging -from typing import List - -import helpers.hmarkdown as hmarkdo -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_process_slides -# ############################################################################# - - -class Test_process_slides(hunitest.TestCase): - @staticmethod - def transform( - slide_text: List[str], - *, - slide_title: str = "", - slide_line_number: int = 0, - ) -> str: - """ - Example adding a `@` to the beginning of each line of the slide. - - :param slide_text: List of lines in the slide - :param slide_title: Title of the slide - :param slide_line_number: Line number of the slide - :return: Transformed text - """ - _LOG.debug("input=\n%s", "\n".join(slide_text)) - # Transform. - text_out = [f"@{line}" for line in slide_text] - _LOG.debug("output=\n%s", "\n".join(text_out)) - return text_out - - def helper(self, text: str, expected: str) -> None: - """ - Test helper for process_slides. - - :param text: Input text with slides - :param expected: Expected output after transformation - """ - # Prepare inputs. - text = hprint.dedent(text, remove_lead_trail_empty_lines_=False) - # Process. - actual = hmarkdo.process_slides(text, self.transform) - # Check output. - expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=False) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test multiple slides. - """ - text = """ - * Slide 1 - - Point 1 - - Point 2 - - * Slide 2 - - Point A - - Point B - """ - expected = """ - @* Slide 1 - @ - Point 1 - @ - Point 2 - @ - @* Slide 2 - @ - Point A - @ - Point B - """ - self.helper(text, expected) - - def test2(self) -> None: - """ - Test single line slide. - """ - text = """ - * Single line slide - """ - expected = """ - @* Single line slide - """ - self.helper(text, expected) - - def test3(self) -> None: - """ - Test slide with inline comment. - """ - text = """ - * Slide with comment - # This is a comment - - Point 1 - """ - expected = """ - @* Slide with comment - @ # This is a comment - @ - Point 1 - """ - self.helper(text, expected) - - def test4(self) -> None: - """ - Test slide with comment block. - """ - text = """ - * Slide with block - - - Point 1 - """ - expected = """ - @* Slide with block - @ - @ - Point 1 - """ - self.helper(text, expected) - - def test5(self) -> None: - text = """ - * Slide 1 - * Slide 2 - """ - expected = """ - @* Slide 1 - @* Slide 2 - """ - self.helper(text, expected) - - def test6(self) -> None: - text = """ - - * Slide 1 - * Slide 2 - """ - expected = """ - - @* Slide 1 - @* Slide 2 - """ - self.helper(text, expected) - - def test7(self) -> None: - text = """ - - * Slide 1 - * Slide 2 - - """ - expected = """ - - @* Slide 1 - @* Slide 2 - @ - """ - self.helper(text, expected) - - def test8(self) -> None: - text = """ - //* Slide 1 - * Slide 2 - - """ - expected = """ - //* Slide 1 - @* Slide 2 - @ - """ - self.helper(text, expected) - - -# ############################################################################# -# Test_convert_slide_to_markdown -# ############################################################################# - - -class Test_convert_slide_to_markdown(hunitest.TestCase): - """ - Test converting slide bullets to markdown headers. - """ - - def helper(self, input_text, expected_text) -> None: - """ - Test helper for convert_slide_to_markdown. - - :param input_text: Input text with slide bullets - :param expected_text: Expected output with markdown headers - """ - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual = hmarkdo.convert_slide_to_markdown(lines) - actual = "\n".join(actual) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test converting a simple slide bullet to markdown header. - """ - input_text = """* This is a slide title""" - expected_text = """##### This is a slide title""" - self.helper(input_text, expected_text) - - def test2(self) -> None: - """ - Test converting multiple slide bullets. - """ - input_text = """ - * First slide - - Some content - * Second slide - - More content - """ - expected_text = """ - ##### First slide - - Some content - ##### Second slide - - More content - """ - self.helper(input_text, expected_text) - - def test3(self) -> None: - """ - Test converting slides mixed with other content. - """ - input_text = """ - Some intro text - * Slide title - - Point 1 - - Point 2 - Regular markdown text - * Another slide - """ - expected_text = """ - Some intro text - ##### Slide title - - Point 1 - - Point 2 - Regular markdown text - ##### Another slide - """ - self.helper(input_text, expected_text) - - def test4(self) -> None: - """ - Test converting text with no slide bullets. - """ - input_text = """ - Regular text - More text - - Regular bullet point - """ - expected_text = """ - Regular text - More text - - Regular bullet point - """ - self.helper(input_text, expected_text) - - def test5(self) -> None: - """ - Test converting empty input. - """ - input_text = "" - expected_text = "" - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_convert_markdown_to_slide -# ############################################################################# - - -class Test_convert_markdown_to_slide(hunitest.TestCase): - """ - Test converting markdown headers to slide bullets. - """ - - def helper(self, input_text: str, expected_text: str) -> None: - """ - Test helper for convert_markdown_to_slide. - - :param input_text: Input text with markdown headers - :param expected_text: Expected output with slide bullets - """ - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual = hmarkdo.convert_markdown_to_slide(lines) - actual = "\n".join(actual) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test converting a simple h5 header to slide bullet. - """ - input_text = """ - ##### This is a slide title - """ - expected_text = """ - * This is a slide title - """ - self.helper(input_text, expected_text) - - def test2(self) -> None: - """ - Test converting multiple h5 headers. - """ - input_text = """ - ##### First slide - - Some content - ##### Second slide - - More content - """ - expected_text = """ - * First slide - - Some content - * Second slide - - More content - """ - self.helper(input_text, expected_text) - - def test3(self) -> None: - """ - Test converting headers mixed with other content. - """ - input_text = """ - Some intro text - ##### Slide title - - Point 1 - - Point 2 - Regular markdown text - ##### Another slide - """ - expected_text = """ - Some intro text - * Slide title - - Point 1 - - Point 2 - Regular markdown text - * Another slide - """ - self.helper(input_text, expected_text) - - def test4(self) -> None: - """ - Test converting text with no h5 headers. - """ - input_text = """ - Regular text - # H1 header - ## H2 header - #### H4 header - """ - expected_text = """ - Regular text - # H1 header - ## H2 header - #### H4 header - """ - self.helper(input_text, expected_text) - - def test5(self) -> None: - """ - Test converting empty input. - """ - input_text = "" - expected_text = "" - self.helper(input_text, expected_text) - - def test6(self) -> None: - """ - Test that converting slide to markdown and back gives original result. - """ - # Prepare inputs. - input_text = """ - * First slide - - Some content - * Second slide - Regular text - """ - original_lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - markdown_lines = hmarkdo.convert_slide_to_markdown(original_lines) - roundtrip_lines = hmarkdo.convert_markdown_to_slide(markdown_lines) - # Check outputs. - self.assert_equal(str(roundtrip_lines), str(original_lines)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py deleted file mode 100644 index f651aa3bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py +++ /dev/null @@ -1,196 +0,0 @@ -import logging -import pprint -from typing import Dict, List - -import helpers.hmarkdown_tables as hmartabl -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_replace_tables_with_tags1 -# ############################################################################# - - -class Test_replace_tables_with_tags1(hunitest.TestCase): - def helper( - self, text: str, expected_lines: List[str], expected_map: Dict[str, str] - ) -> None: - """ - Test replacing markdown tables with tags. - """ - lines = hprint.dedent(text, remove_lead_trail_empty_lines_=True) - lines = lines.split("\n") - # Call function. - actual_lines, table_map = hmartabl.replace_tables_with_tags(lines) - # Check output. - table_map_as_str = pprint.pformat(table_map) - expected_map_as_str = pprint.pformat(expected_map) - self.assert_equal(table_map_as_str, expected_map_as_str) - # - actual_lines = "\n".join(actual_lines) - expected_lines = hprint.dedent( - expected_lines, remove_lead_trail_empty_lines_=True - ) - self.assert_equal(actual_lines, expected_lines) - - def helper_round_trip(self, text: str) -> None: - """ - Test the round trip. - """ - # Do the round trip. - lines = text.split("\n") - actual_lines, table_map = hmartabl.replace_tables_with_tags(lines) - act_text = hmartabl.replace_tags_with_tables(actual_lines, table_map) - # Check output. - act_text = "\n".join(act_text) - self.assert_equal(act_text, text) - - def test1(self) -> None: - """ - Test replacing simple markdown table with tags. - """ - # Prepare inputs. - text = """ - Some text before - | Column 1 | Column 2 | - |----------|----------| - | Value 1 | Value 2 | - | Value 3 | Value 4 | - Text between tables - | Name | Age | City | - |------|-----|------| - | John | 25 | NYC | - Some text after - """ - # Prepare outputs. - expected_lines = """ - Some text before - - Text between tables - - Some text after - """ - # Check table map. - expected_map = { - "1": "| Column 1 | Column 2 |\n|----------|----------|\n| Value 1 | Value 2 |\n| Value 3 | Value 4 |", - "2": "| Name | Age | City |\n|------|-----|------|\n| John | 25 | NYC |", - } - self.helper(text, expected_lines, expected_map) - - def test2(self) -> None: - """ - Test table with alignment indicators. - """ - text = """ - | Left | Center | Right | - |:-----|:------:|------:| - | L1 | C1 | R1 | - | L2 | C2 | R2 | - """ - expected_lines = """ - - """ - expected_map = { - "1": "| Left | Center | Right |\n|:-----|:------:|------:|\n| L1 | C1 | R1 |\n| L2 | C2 | R2 |" - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test3(self) -> None: - """ - Test table with minimal structure. - """ - text = """ - Before - | A | B | - |---|---| - | 1 | 2 | - After - """ - expected_lines = """ - Before - - After - """ - expected_map = {"1": "| A | B |\n|---|---|\n| 1 | 2 |"} - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test4(self) -> None: - """ - Test table with empty cells. - """ - text = """ - | Col1 | Col2 | Col3 | - |------|------|------| - | A | | C | - | | B | | - """ - expected_lines = """ - - """ - expected_map = { - "1": "| Col1 | Col2 | Col3 |\n|------|------|------|\n| A | | C |\n| | B | |" - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test5(self) -> None: - """ - Test multiple tables with different column counts. - """ - text = """ - First table: - | A | B | - |---|---| - | 1 | 2 | - - Second table: - | X | Y | Z | W | - |---|---|---|---| - | a | b | c | d | - | e | f | g | h | - """ - expected_lines = """ - First table: - - - Second table: - - """ - expected_map = { - "1": "| A | B |\n|---|---|\n| 1 | 2 |", - "2": "| X | Y | Z | W |\n|---|---|---|---|\n| a | b | c | d |\n| e | f | g | h |", - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test6(self) -> None: - """ - Test table with indentation. - """ - text = """ - Outside - | Col1 | Col2 | - |------|------| - | Val1 | Val2 | - End - """ - expected_lines = """ - Outside - - End - """ - expected_map = { - "1": " | Col1 | Col2 |\n |------|------|\n | Val1 | Val2 |" - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py deleted file mode 100644 index fc88b62a1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py +++ /dev/null @@ -1,228 +0,0 @@ -import logging - -import helpers.hmarkdown as hmarkdo -import helpers.hmarkdown_toc as hmartoc -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_extract_yaml_frontmatter1 -# ############################################################################# - - -class Test_extract_yaml_frontmatter1(hunitest.TestCase): - """ - Test the extract_yaml_frontmatter function. - """ - - def helper( - self, - txt: str, - expected_frontmatter: list, - expected_remaining: list, - ) -> None: - """ - Test helper for extract_yaml_frontmatter. - - :param txt: Input text to process - :param expected_frontmatter: Expected front matter lines - :param expected_remaining: Expected remaining lines - """ - # Prepare inputs. - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Run test. - frontmatter, remaining = hmartoc.extract_yaml_frontmatter(lines) - # Check outputs. - self.assertEqual(frontmatter, expected_frontmatter) - self.assertEqual(remaining, expected_remaining) - - def test1(self) -> None: - """ - Test extracting YAML front matter from a file. - """ - # Prepare inputs. - txt = """ - --- - title: My Document - date: 2024-01-01 - --- - # Content - This is the main content. - """ - # Prepare outputs. - expected_frontmatter = [ - "---", - "title: My Document", - "date: 2024-01-01", - "---", - ] - expected_remaining = ["# Content", "This is the main content."] - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - def test2(self) -> None: - """ - Test processing a file without YAML front matter. - """ - # Prepare inputs. - txt = """ - # Content - This is the main content. - """ - # Prepare outputs. - expected_frontmatter = [] - expected_remaining = ["# Content", "This is the main content."] - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - def test3(self) -> None: - """ - Test handling incomplete YAML front matter (missing closing delimiter). - """ - # Prepare inputs. - txt = """ - --- - title: My Document - # Content without closing delimiter - """ - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Prepare outputs. - expected_frontmatter = [] - expected_remaining = lines - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - def test4(self) -> None: - """ - Test extracting empty YAML front matter. - """ - # Prepare inputs. - txt = """ - --- - --- - # Content - """ - # Prepare outputs. - expected_frontmatter = ["---", "---"] - expected_remaining = ["# Content"] - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - def test5(self) -> None: - """ - Test that separators not at the beginning are not treated as front matter. - """ - # Prepare inputs. - txt = """ - # Content - --- - More content - """ - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Prepare outputs. - expected_frontmatter = [] - expected_remaining = lines - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - -# ############################################################################# -# Test_remove_table_of_contents1 -# ############################################################################# - - -class Test_remove_table_of_contents1(hunitest.TestCase): - def test1(self) -> None: - """ - Test removing table of contents from markdown text. - """ - # Prepare inputs. - text = """ - # Introduction - - This is an introduction. - - - - [Section 1](#section-1) - - [Section 2](#section-2) - - - ## Section 1 - - Content of section 1. - """ - expected = """ - # Introduction - - This is an introduction. - - - - ## Section 1 - - Content of section 1. - """ - text = hprint.dedent(text) - # Run test. - actual = hmarkdo.remove_table_of_contents(text) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test2(self) -> None: - """ - Test text without table of contents remains unchanged. - """ - # Prepare inputs. - text = """ - # Introduction - - This is an introduction. - - ## Section 1 - - Content of section 1. - """ - text = hprint.dedent(text) - # Run test. - actual = hmarkdo.remove_table_of_contents(text) - # Check output. - self.assert_equal(actual, text) - - def test3(self) -> None: - """ - Test removing multi-line table of contents. - """ - # Prepare inputs. - text = """ - # Introduction - - - - [Section 1](#section-1) - - [Subsection 1.1](#subsection-11) - - [Section 2](#section-2) - - [Subsection 2.1](#subsection-21) - - [Subsection 2.2](#subsection-22) - - - ## Section 1 - """ - expected = """ - # Introduction - - - - ## Section 1 - """ - text = hprint.dedent(text) - # Run test. - actual = hmarkdo.remove_table_of_contents(text) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py deleted file mode 100644 index 16f0f097a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py +++ /dev/null @@ -1,394 +0,0 @@ -import logging - -import helpers.hmkdocs as hmkdocs -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_dedent_python_code_blocks1 -# ############################################################################# - - -class Test_dedent_python_code_blocks1(hunitest.TestCase): - def test_simple_code_block(self) -> None: - """ - Test dedenting a simple Python code block. - """ - # Prepare inputs. - text = """ - # Example - - ```python - def hello(): - print("Hello") - ``` - """ - expected = """ - # Example - - ```python - def hello(): - print("Hello") - ``` - """ - text = hprint.dedent(text) - expected = hprint.dedent(expected) - # Run test. - actual = hmkdocs.dedent_python_code_blocks(text) - # Check output. - self.assert_equal(actual, expected) - - def test_multiple_code_blocks(self) -> None: - """ - Test dedenting multiple Python code blocks. - """ - # Prepare inputs. - text = """ - # Example 1 - - ```python - def hello(): - print("Hello") - ``` - - # Example 2 - - ```python - def goodbye(): - print("Goodbye") - ``` - """ - expected = """ - # Example 1 - - ```python - def hello(): - print("Hello") - ``` - - # Example 2 - - ```python - def goodbye(): - print("Goodbye") - ``` - """ - text = hprint.dedent(text) - expected = hprint.dedent(expected) - # Run test. - actual = hmkdocs.dedent_python_code_blocks(text) - # Check output. - self.assert_equal(actual, expected) - - def test_no_python_blocks(self) -> None: - """ - Test text without Python code blocks remains unchanged. - """ - # Prepare inputs. - text = """ - # Example - - This is just text. - - ```javascript - console.log("Hello"); - ``` - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.dedent_python_code_blocks(text) - # Check output. - self.assert_equal(actual, text) - - def test_already_aligned_code(self) -> None: - """ - Test code that is already aligned. - """ - # Prepare inputs. - text = """ - # Example - - ```python - def hello(): - print("Hello") - ``` - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.dedent_python_code_blocks(text) - # Check output. - self.assert_equal(actual, text) - - -# ############################################################################# -# Test_replace_indentation1 -# ############################################################################# - - -class Test_replace_indentation1(hunitest.TestCase): - def test_two_to_four_spaces(self) -> None: - """ - Test replacing 2-space indentation with 4-space indentation. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - - Sub item 2 - """ - expected = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - - Sub item 2 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=4 - ) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test_four_to_two_spaces(self) -> None: - """ - Test replacing 4-space indentation with 2-space indentation. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - - Sub item 2 - """ - expected = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - - Sub item 2 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=4, output_spaces=2 - ) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test_two_to_eight_spaces(self) -> None: - """ - Test replacing 2-space indentation with 8-space indentation. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - expected = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=8 - ) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test_three_to_six_spaces(self) -> None: - """ - Test replacing 3-space indentation with 6-space indentation. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - expected = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=3, output_spaces=6 - ) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test_no_indentation(self) -> None: - """ - Test text without indentation remains unchanged. - """ - # Prepare inputs. - text = """ - - Item 1 - - Item 2 - - Item 3 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=4 - ) - # Check output. - self.assert_equal(actual, text) - - def test_same_input_output_spaces(self) -> None: - """ - Test that using same input and output spaces leaves text unchanged. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=2 - ) - # Check output. - self.assert_equal(actual, text) - - def test_empty_text(self) -> None: - """ - Test empty text handling. - """ - # Prepare inputs. - text = "" - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=4 - ) - # Check output. - self.assert_equal(actual, text) - - def test_zero_to_four_spaces(self) -> None: - """ - Test converting zero indentation to 4 spaces (edge case). - """ - # Prepare inputs. - text = """ - Item 1 - Item 2 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=1, output_spaces=4 - ) - # Check output. - self.assert_equal(actual, text) - - -# ############################################################################# -# Test_preprocess_mkdocs_markdown1 -# ############################################################################# - - -class Test_preprocess_mkdocs_markdown1(hunitest.TestCase): - def test_full_preprocessing(self) -> None: - """ - Test the complete preprocessing pipeline. - """ - # Prepare inputs. - text = """ - # Introduction - - - - [Section 1](#section-1) - - [Section 2](#section-2) - - - ## Section 1 - - Here is some Python code: - - ```python - def example(): - print("Hello") - if True: - print("World") - ``` - - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - """ - expected = """ - # Introduction - - - - ## Section 1 - - Here is some Python code: - - ```python - def example(): - print("Hello") - if True: - print("World") - ``` - - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - """ - text = hprint.dedent(text) - expected = hprint.dedent(expected) - # Run test. - actual = hmkdocs.preprocess_mkdocs_markdown(text) - # Check output. - self.assert_equal(actual, expected) - - def test_empty_text(self) -> None: - """ - Test preprocessing empty text. - """ - # Prepare inputs. - text = "" - # Run test. - actual = hmkdocs.preprocess_mkdocs_markdown(text) - # Check output. - self.assert_equal(actual, text) - - def test_text_without_preprocessing_needs(self) -> None: - """ - Test text that doesn't need any preprocessing. - """ - # Prepare inputs. - text = """ - # Simple Markdown - - This is just simple text. - - - Item 1 - - Item 2 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.preprocess_mkdocs_markdown(text) - # Check output. - self.assert_equal(actual, text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py deleted file mode 100644 index abb48a154..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py +++ /dev/null @@ -1,25 +0,0 @@ -import logging - -import helpers.hmodule as hmodule -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_hmodule1 -# ############################################################################# - - -class Test_hmodule1(hunitest.TestCase): - def test_has_module1(self) -> None: - """ - Check that the function returns true for the existing package. - """ - self.assertTrue(hmodule.has_module("numpy")) - - def test_has_not_module1(self) -> None: - """ - Check that the function returns false for the non-existing package. - """ - self.assertFalse(hmodule.has_module("no_such_module")) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py deleted file mode 100644 index 4d6b7bceb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py +++ /dev/null @@ -1,215 +0,0 @@ -import logging - -import numpy as np -import collections - -import helpers.hnumpy as hnumpy -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestRandomSeedContext -# ############################################################################# - - -class TestRandomSeedContext(hunitest.TestCase): - def test_example1(self) -> None: - """ - Getting more random numbers without context manager changes the - sequence of random numbers. - """ - n = 3 - # First batch. - np.random.seed(0) - vals1a = np.random.randn(n) - vals2a = np.random.randn(n) - # Second batch. - np.random.seed(0) - vals1b = np.random.randn(n) - vals = np.random.randn(n) - _ = vals - vals2b = np.random.randn(n) - # Check. - self.assertEqual(str(vals1a), str(vals1b)) - # Of course this might fail with a vanishingly small probability. - self.assertNotEqual(str(vals2a), str(vals2b)) - - def test_example2(self) -> None: - """ - Getting more random numbers with context manager doesn't change the - sequence of random numbers. - """ - n = 3 - # First batch. - np.random.seed(0) - vals1a = np.random.randn(n) - vals2a = np.random.randn(n) - # Second batch. - np.random.seed(0) - vals1b = np.random.randn(n) - with hnumpy.random_seed_context(42): - vals = np.random.randn(n) - _ = vals - vals2b = np.random.randn(n) - # Check. - self.assertEqual(str(vals1a), str(vals1b)) - self.assertEqual(str(vals2a), str(vals2b)) - - -# ############################################################################# -# TestFloorWithPrecision -# ############################################################################# - - -class TestFloorWithPrecision(hunitest.TestCase): - def _test_floor_with_precision( - self, - value: float, - precision: int, - expected: str, - ) -> None: - """ """ - actual = hnumpy.floor_with_precision(value, precision) - self.assert_equal(str(actual), expected) - - def test_floor_with_precision1(self) -> None: - """ - Test for negative float values as input. - """ - expected_as_str = "-4.63" - self._test_floor_with_precision(-4.6385, 2, expected_as_str) - - def test_floor_with_precision2(self) -> None: - """ - Test for Zero precision. - """ - expected_as_str = "-4.0" - self._test_floor_with_precision(-4.6385, 0, expected_as_str) - - def test_floor_with_precision3(self) -> None: - """ - Test for negative precision. - """ - value = 4.6385 - amount_precision = -2 - with self.assertRaises(AssertionError) as cm: - hnumpy.floor_with_precision(value, amount_precision) - # Check. - actual = str(cm.exception) - expected = """ - * Failed assertion * - 0 <= -2 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_floor_with_precision4(self) -> None: - """ - Test for positive float values as input. - """ - expected_as_str = "4.63" - self._test_floor_with_precision(4.6385, 2, expected_as_str) - - def test_floor_with_precision5(self) -> None: - """ - Test for integer values as input. - """ - expected_as_str = "4.0" - self._test_floor_with_precision(4, 0, expected_as_str) - - def test_floor_with_precision6(self) -> None: - """ - Test for very small value as input. - """ - expected = 0.0000532 - self._test_floor_with_precision(0.0000532999, 7, str(expected)) - - def test_floor_with_precision7(self) -> None: - """ - Test for very large value as input. - """ - expected_as_str = "4289734.12345" - self._test_floor_with_precision(4289734.1234599999, 5, expected_as_str) - - -# ############################################################################# -# Test_OrderedDict_repr_str -# ############################################################################# - - -class Test_OrderedDict_repr_str(hunitest.TestCase): - """ - The tests are used to gatekeep the expected behavior of - dunder method __str__ and __repr__ for the OrderedDict class. - - The tests stem from changes in Python 3.12. Observe below: - - Python 3.9.5: - >>> from collections import OrderedDict - >>> import numpy - >>> dct = OrderedDict({ "test": numpy.int64(42)}) - >>> dct["test"] - 42 - >>> print(dct) - OrderedDict([('test', 42)]) - >>> str(dct) - "OrderedDict([('test', 42)])" - >>> repr(dct) - "OrderedDict([('test', 42)])" - >>> str(dct["test"]) - '42' - >>> repr(dct["test"]) - '42' - - Python 3.12.3: - >>> from collections import OrderedDict - >>> import numpy - >>> dct = OrderedDict({"test": numpy.int64(42)}) - >>> dct = OrderedDict({"test": numpy.int64(42)}) - KeyboardInterrupt - >>> str(dct) - "OrderedDict({'test': np.int64(42)})" - >>> repr(dct) - "OrderedDict({'test': np.int64(42)})" - >>> str(dct["test"]) - '42' - >>> repr(dct["test"]) - 'np.int64(42)' - """ - - def test_str_single1(self) -> None: - """ - Test that the __str__ method on a single item in OrderedDict returns the expected string. - """ - d = collections.OrderedDict({"test": np.int64(42)}) - actual = str(d["test"]) - expected = "42" - self.assert_equal(actual, expected) - - def test_repr_single1(self) -> None: - """ - Test that the __repr__ method on a single item in OrderedDict returns the expected string. - """ - d = collections.OrderedDict({"test": np.int64(42)}) - actual = repr(d["test"]) - expected = "np.int64(42)" - self.assert_equal(actual, expected) - - def test_str_full1(self) -> None: - """ - Test that the __str__ method of OrderedDict returns the expected string. - """ - d = collections.OrderedDict({"test": np.int64(42)}) - actual = str(d) - expected = "OrderedDict({'test': np.int64(42)})" - self.assert_equal(actual, expected) - - def test_repr_full1(self) -> None: - """ - Test that the __repr__ method of OrderedDict returns the expected string. - """ - d = collections.OrderedDict({"test": np.int64(42)}) - actual = repr(d) - expected = "OrderedDict({'test': np.int64(42)})" - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py deleted file mode 100644 index 6106dd551..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py +++ /dev/null @@ -1,392 +0,0 @@ -import abc -import logging -from typing import Any, Callable, List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hobject as hobject -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# _Obj_to_str_TestCase -# ############################################################################# - - -# Note that we can't derive this class from `hunitest.TestCase` otherwise the -# unit test framework will try to run the tests in this class. -class _Obj_to_str_TestCase(abc.ABC): - """ - Test case for testing `obj_to_str()` and `obj_to_repr()`. - """ - - @abc.abstractmethod - def get_object(self) -> Any: - """ - Build object to test. - """ - ... - - def helper(self, *, expected: Optional[str] = None, **kwargs: Any) -> None: - obj = self.get_object() - hdbg.dassert_is_not(obj, None) - # - txt: List[str] = [] - # Get `str()`. - txt.append(hprint.frame("str:")) - txt.append(hobject.obj_to_str(obj, **kwargs)) - # Get `repr()`. - txt.append(hprint.frame("repr:")) - txt.append(hobject.obj_to_repr(obj, **kwargs)) - # Concat. - txt = "\n".join(txt) - # Check. - if expected is None: - self.check_string(txt, purify_text=True) - else: - hdbg.dassert_isinstance(expected, str) - self.assert_equal(txt, expected, purify_text=True, fuzzy_match=True) - - def test1(self, expected: str) -> None: - """ - Use `__dict__` to extract the attributes. - """ - self.helper(expected=expected, attr_mode="__dict__") - - def test2(self, expected: str) -> None: - """ - Use `dir` to extract the attributes. - """ - self.helper(expected=expected, attr_mode="dir") - - def test3(self, expected: str) -> None: - """ - Use `__dict__` and print the type of the attributes. - """ - self.helper(expected=expected, print_type=True) - - def test4(self) -> None: - """ - Print only callable attributes. - """ - self.helper(callable_mode="all") - - def test5(self) -> None: - """ - Print only private attributes. - """ - self.helper(private_mode="all") - - def test6(self) -> None: - """ - Print only dunder attributes. - """ - self.helper(dunder_mode="all") - - -# ############################################################################# -# _Object1 -# ############################################################################# - - -class _Object1: - """ - Object storing only scalar members and not other nested objects. - """ - - def __init__(self) -> None: - self.a = False - self.b = "hello" - self.c = 3.14 - self._hello = "under" - self.__hello = "double_dunder" - self.hello = lambda x: x + 1 - - -# ############################################################################# -# Test_obj_to_str1 -# ############################################################################# - - -class Test_obj_to_str1(hunitest.TestCase, _Obj_to_str_TestCase): - def get_object(self) -> Any: - obj = _Object1() - return obj - - def test1(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object1 at 0x=(a=False, b=hello, c=3.14) - ################################################################################ - repr: - ################################################################################ - : - a='False' - b='hello' - c='3.14' - """ - super().test1(expected) - - def test2(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object1 at 0x=(a=False, b=hello, c=3.14) - ################################################################################ - repr: - ################################################################################ - : - a='False' - b='hello' - c='3.14' - """ - super().test2(expected) - - def test3(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object1 at 0x=(a=False , b=hello , c=3.14 ) - ################################################################################ - repr: - ################################################################################ - : - a='False' - b='hello' - c='3.14' - """ - super().test3(expected) - - -# ############################################################################# -# _Object2 -# ############################################################################# - - -class _Object2: - """ - Object using a `obj_to_str()` as repr. - """ - - def __init__(self) -> None: - self.x = True - self.y = "world" - self.z = 6.28 - self._hello = "under" - self.__hello = "double_dunder" - self.hello = lambda x: x + 1 - - def __repr__(self) -> str: - return hobject.obj_to_str(self) - - -# ############################################################################# -# _Object3 -# ############################################################################# - - -class _Object3: - """ - Object storing another object. - """ - - def __init__(self) -> None: - self.p = "p" - self.q = "q" - self.object2 = _Object2() - - -# ############################################################################# -# Test_obj_to_str2 -# ############################################################################# - - -class Test_obj_to_str2(hunitest.TestCase, _Obj_to_str_TestCase): - def get_object(self) -> Any: - obj = _Object3() - return obj - - def test1(self) -> None: - # TODO(gp): object2 in repr should be printed recursively as repr, but - # it's not. - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) - ################################################################################ - repr: - ################################################################################ - : - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' - """ - super().test1(expected) - - def test2(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object3 at 0x=(object2=_Object2 at 0x=(x=True, y=world, z=6.28), p=p, q=q) - ################################################################################ - repr: - ################################################################################ - : - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' - p='p' - q='q' - """ - super().test2(expected) - - def test3(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object3 at 0x=(p=p , q=q , object2=_Object2 at 0x=(x=True, y=world, z=6.28) ) - ################################################################################ - repr: - ################################################################################ - : - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' - """ - super().test3(expected) - - -# ############################################################################# -# _Abstract_ClassA -# ############################################################################# - - -class _Abstract_ClassA(abc.ABC, hobject.PrintableMixin): - """ - Abstract class descending from `PrintableMixin`. - """ - - def __init__(self) -> None: - self._arg0 = 0 - self._arg1 = "one" - self._arg2 = 2 - - @staticmethod - def get_config_attributes() -> List[str]: - return ["_arg1", "_arg2"] - - -# ############################################################################# -# _ClassB -# ############################################################################# - - -class _ClassB(hobject.PrintableMixin): - """ - Class descending from `PrintableMixin`. - """ - - def __init__(self, get_wall_clock_time: Callable) -> None: - self._arg5 = {"key1": "five", "key2": 5} - self._arg6 = "abc" - self._get_wall_clock_time = get_wall_clock_time - - @staticmethod - def get_config_attributes() -> List[str]: - return ["_arg5", "_get_wall_clock_time"] - - def get_wall_clock_time(self) -> pd.Timestamp: - """ - Return wall clock time in the timezone specified in the ctor. - - Initially wall clock time can be in any timezone, but cannot be - timezone-naive. - """ - wall_clock_time = self._get_wall_clock_time() - return wall_clock_time - - -# ############################################################################# -# _ClassA -# ############################################################################# - - -class _ClassA(_Abstract_ClassA): - """ - Class descending from `_AbstractClassA` and embedding `_ClassB`. - """ - - def __init__(self) -> None: - super().__init__() - self._arg3 = [3, 3, 3] - get_wall_clock_time = lambda: pd.Timestamp( - "2022-04-23", tz="America/New_York" - ) - helper_class = _ClassB(get_wall_clock_time) - self._arg4 = helper_class - self._arg10 = { - "key": 1, - "get_wall_clock_time": helper_class.get_wall_clock_time, - } - - def get_config_attributes(self) -> List[str]: - config_attributes = super().get_config_attributes() - child_class_attributes = ["_arg3", "_arg4", "_arg10"] - config_attributes.extend(child_class_attributes) - return config_attributes - - -# ############################################################################# -# Test_PrintableMixin_to_config_str -# ############################################################################# - - -class Test_PrintableMixin_to_config_str(hunitest.TestCase): - def check_test_class_str(self, test_class: Any, expected: str) -> None: - actual = test_class.to_config_str() - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Print `_Abstract_ClassA`. - """ - test_class = _Abstract_ClassA() - expected = r""" - : - _arg1='one' - _arg2='2' - """ - self.check_test_class_str(test_class, expected) - - def test2(self) -> None: - """ - Print `_ClassA`. - """ - test_class = _ClassA() - expected = r""" - : - _arg1='one' - _arg2='2' - _arg3='[3, 3, 3]' - _arg4=: - _arg5='{'key1': 'five', 'key2': 5}' - _get_wall_clock_time='. at 0x>' - _arg10= - {'get_wall_clock_time': : - _arg5='{'key1': 'five', 'key2': 5}' - _arg6='abc' >, - 'key': 1} - """ - self.check_test_class_str(test_class, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py deleted file mode 100644 index 9e9887915..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py +++ /dev/null @@ -1,92 +0,0 @@ -import logging - -import pytest - -import helpers.hopen as hopen -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): Some of these tests should be executed outside of the container to -# test other systems. - - -# ############################################################################# -# Test_open_unknown -# ############################################################################# - - -class Test_open_unknown(hunitest.TestCase): - """ - Test unknown extension and unknown systems. - """ - - def test_unknown_extension1(self) -> None: - """ - Test unknown extension raises an error. - """ - with self.assertRaises(AssertionError) as cm: - hopen.open_file("a.unknown_ext") - # Check error text. - self.assertIn("unknown_ext", str(cm.exception)) - - def test_unknown_os1(self) -> None: - """ - Test unknown OS raises an error. - """ - with self.assertRaises(AssertionError) as cm: - hopen._cmd_open_html("b.html", "UnknownOS") - # Check error text. - self.assertIn("UnknownOS", str(cm.exception)) - - -# ############################################################################# -# Test_open_html -# ############################################################################# - - -@pytest.mark.skip(reason="See cryptomtc/cmamp#321") -class Test_open_html(hunitest.TestCase): - """ - Test different command correctness for opening html file. - """ - - def test_linux1(self) -> None: - """ - Test Linux. - """ - cmd = hopen._cmd_open_html("a.html", "Linux") - self.check_string(str(cmd)) - - def test_windows1(self) -> None: - """ - Test Windows. - """ - cmd = hopen._cmd_open_html("b.html", "Windows") - self.check_string(str(cmd)) - - def test_mac1(self) -> None: - """ - Test Darwin. - """ - cmd = hopen._cmd_open_html("c.html", "Darwin") - self.check_string(str(cmd)) - - -# ############################################################################# -# Test_open_pdf -# ############################################################################# - - -class Test_open_pdf(hunitest.TestCase): - """ - Test different command correctness for opening pdf file. - """ - - def test_mac1(self) -> None: - """ - Test Darwin. - """ - cmd = hopen._cmd_open_html("a.pdf", "Darwin") - self.check_string(str(cmd)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py deleted file mode 100644 index be5200d47..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py +++ /dev/null @@ -1,42 +0,0 @@ -import logging - -import numpy as np -import pandas as pd -import pytest - -import helpers.hpandas_analysis as hpananal -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_explore1 -# ############################################################################# - - -class Test_explore1(hunitest.TestCase): - def test_ols_regress_series(self) -> None: - x = 5 * np.random.randn(100) - y = x + np.random.randn(*x.shape) - df = pd.DataFrame() - df["x"] = x - df["y"] = y - hpananal.ols_regress_series( - df["x"], df["y"], intercept=True, print_model_stats=False - ) - - @pytest.mark.skip(reason="https://github.com/.../.../issues/3676") - def test_rolling_pca_over_time1(self) -> None: - np.random.seed(42) - df = pd.DataFrame(np.random.randn(10, 5)) - df.index = pd.date_range("2017-01-01", periods=10) - corr_df, eigval_df, eigvec_df = hpananal.rolling_pca_over_time( - df, 0.5, "fill_with_zero" - ) - txt = ( - "corr_df=\n%s\n" % corr_df.to_string() - + "eigval_df=\n%s\n" % eigval_df.to_string() - + "eigvec_df=\n%s\n" % eigvec_df.to_string() - ) - self.check_string(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py deleted file mode 100644 index 595877a97..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py +++ /dev/null @@ -1,67 +0,0 @@ -import logging - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_CheckSummary -# ############################################################################# - - -class Test_CheckSummary(hunitest.TestCase): - def test1(self) -> None: - """ - All the tests have passed. - """ - # Prepare inputs. - obj = hpandas.CheckSummary() - obj.add( - "hello", - "Number of not submitted OMS child orders=0 / 73 = 0.00%", - True, - ) - obj.add("hello2", "ok", True) - # Check. - is_ok = obj.is_ok() - self.assertTrue(is_ok) - # - actual = obj.report_outcome(notebook_output=False, assert_on_error=False) - self.check_string(actual) - # No assertion expected. - obj.report_outcome() - - def test2(self) -> None: - """ - Not all the tests have passed. - """ - # Prepare inputs. - obj = hpandas.CheckSummary() - obj.add( - "hello", - "Number of not submitted OMS child orders=0 / 73 = 0.00%", - True, - ) - obj.add("hello2", "not_ok", False) - # Check. - is_ok = obj.is_ok() - self.assertFalse(is_ok) - # - actual = obj.report_outcome(notebook_output=False, assert_on_error=False) - self.check_string(actual) - # - with self.assertRaises(ValueError) as e: - actual = obj.report_outcome() - actual_exception = str(e.exception) - expected_exception = r""" - The checks have failed: - description comment is_ok - 0 hello Number of not submitted OMS child orders=0 / 7... True - 1 hello2 not_ok False - is_ok=False - """ - self.assert_equal(actual_exception, expected_exception, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py deleted file mode 100644 index a65340957..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py +++ /dev/null @@ -1,364 +0,0 @@ -import logging - -import numpy as np -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# TestDropNa -# ############################################################################# - - -class TestDropNa(hunitest.TestCase): - def test_dropna1(self) -> None: - """ - Test if all types of NaNs are dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, 1, 3, 2, 0], - "dummy_value_2": ["0", "A", "B", None, "D"], - "dummy_value_3": [0, 0, pd.NA, 0, 0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.dropna(test_df, drop_infs=False) - # Prepare expected result. - expected = { - "dummy_value_1": [1, 0], - "dummy_value_2": ["A", "D"], - "dummy_value_3": [0, 0], - } - # Set the dtype of numeral columns to float to match the dataframe after NA dropping. - expected = pd.DataFrame(data=expected).astype( - {"dummy_value_1": "float64", "dummy_value_3": "object"} - ) - # Set the index of the rows that remained. - expected = expected.set_index(pd.Index([1, 4])) - # Check. - hunitest.compare_df(actual, expected) - - def test_dropna2(self) -> None: - """ - Test if infs are dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [-np.inf, 1, 3, 2, 0], - "dummy_value_2": ["0", "A", "B", "C", "D"], - "dummy_value_3": [0, 0, np.inf, 0, 0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.dropna(test_df, drop_infs=True) - # Prepare expected result. - expected = { - "dummy_value_1": [1, 2, 0], - "dummy_value_2": ["A", "C", "D"], - "dummy_value_3": [0, 0, 0], - } - # Set the dtype of numeral columns to float to match the dataframe after NA dropping. - expected = pd.DataFrame(data=expected).astype( - {"dummy_value_1": "float64", "dummy_value_3": "float64"} - ) - # Set the index of the rows that remained. - expected = expected.set_index(pd.Index([1, 3, 4])) - # Check. - hunitest.compare_df(actual, expected) - - -# ############################################################################# -# TestDropAxisWithAllNans -# ############################################################################# - - -class TestDropAxisWithAllNans(hunitest.TestCase): - def test_drop_rows1(self) -> None: - """ - Test if row full of nans is dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, 2, 3], - "dummy_value_2": [pd.NA, "B", "C"], # type: ignore - "dummy_value_3": [None, 1.0, 1.0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.drop_axis_with_all_nans(test_df, drop_rows=True) - # Prepare expected result. - expected = { - "dummy_value_1": [2, 3], - "dummy_value_2": ["B", "C"], - "dummy_value_3": [1.0, 1.0], - } - # Set the dtype of numeral columns to float to match the dataframe after NA dropping. - expected = pd.DataFrame(data=expected).astype( - {"dummy_value_1": "float64"} - ) - # Set the index of the rows that remained. - expected = expected.set_index(pd.Index([1, 2])) - # Check. - hunitest.compare_df(actual, expected) - - def test_drop_rows2(self) -> None: - """ - Test if non fully nan row is not dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, 2, 3], - "dummy_value_2": ["A", "B", "C"], # type: ignore - "dummy_value_3": [None, 1.0, 1.0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.drop_axis_with_all_nans(test_df, drop_rows=True) - # Prepare expected result. - expected = { - "dummy_value_1": [np.nan, 2, 3], - "dummy_value_2": ["A", "B", "C"], # type: ignore - "dummy_value_3": [None, 1.0, 1.0], - } - # Set the dtype of numeral columns to float to match the dataframe after NA dropping. - expected = pd.DataFrame(data=expected).astype( - {"dummy_value_1": "float64"} - ) - # Set the index of the rows that remained. - expected = expected.set_index(pd.Index([0, 1, 2])) - # Check. - hunitest.compare_df(actual, expected) - - def test_drop_columns1(self) -> None: - """ - Test if column full of nans is dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, pd.NA, None], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [1.0, 1.0, 1.0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.drop_axis_with_all_nans(test_df, drop_columns=True) - # Prepare expected result. - expected = { - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [1.0, 1.0, 1.0], - } - expected = pd.DataFrame(data=expected) - # Check. - hunitest.compare_df(actual, expected) - - def test_drop_columns2(self) -> None: - """ - Test if column that is not full of nans is not dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, 2, None], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [1.0, 1.0, 1.0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.drop_axis_with_all_nans(test_df, drop_columns=True) - # Prepare expected result. - expected = { - "dummy_value_1": [np.nan, 2, None], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [1.0, 1.0, 1.0], - } - expected = pd.DataFrame(data=expected) - # Check. - hunitest.compare_df(actual, expected) - - -# ############################################################################# -# TestDropDuplicates -# ############################################################################# - - -class TestDropDuplicates(hunitest.TestCase): - """ - Test that duplicates are dropped correctly. - """ - - @staticmethod - def get_test_data() -> pd.DataFrame: - test_data = [ - (1, "A", 3.2), - (1, "A", 3.2), - (10, "B", 3.2), - (8, "A", 3.2), - (4, "B", 8.2), - (10, "B", 3.2), - ] - index = [ - "dummy_value1", - "dummy_value3", - "dummy_value2", - "dummy_value1", - "dummy_value1", - "dummy_value2", - ] - columns = ["int", "letter", "float"] - df = pd.DataFrame(data=test_data, index=index, columns=columns) - return df - - def test_drop_duplicates1(self) -> None: - """ - - use_index = True - - column_subset is not None - """ - # Prepare test data. - df = self.get_test_data() - use_index = True - column_subset = ["float"] - no_duplicates_df = hpandas.drop_duplicates( - df, use_index, column_subset=column_subset - ) - no_duplicates_df = hpandas.df_to_str(no_duplicates_df) - # Prepare expected result. - expected_signature = r""" - int letter float - dummy_value1 1 A 3.2 - dummy_value3 1 A 3.2 - dummy_value2 10 B 3.2 - dummy_value1 4 B 8.2 - """ - # Check. - self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) - - def test_drop_duplicates2(self) -> None: - """ - - use_index = True - - column_subset = None - """ - # Prepare test data. - df = self.get_test_data() - use_index = True - no_duplicates_df = hpandas.drop_duplicates(df, use_index) - no_duplicates_df = hpandas.df_to_str(no_duplicates_df) - # Prepare expected result. - expected_signature = r""" - int letter float - dummy_value1 1 A 3.2 - dummy_value3 1 A 3.2 - dummy_value2 10 B 3.2 - dummy_value1 8 A 3.2 - dummy_value1 4 B 8.2 - """ - # Check. - self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) - - def test_drop_duplicates3(self) -> None: - """ - - use_index = False - - column_subset = None - """ - # Prepare test data. - df = self.get_test_data() - use_index = False - no_duplicates_df = hpandas.drop_duplicates(df, use_index) - no_duplicates_df = hpandas.df_to_str(no_duplicates_df) - # Prepare expected result. - expected_signature = r""" - int letter float - dummy_value1 1 A 3.2 - dummy_value2 10 B 3.2 - dummy_value1 8 A 3.2 - dummy_value1 4 B 8.2 - """ - # Check. - self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) - - def test_drop_duplicates4(self) -> None: - """ - - use_index = False - - column_subset is not None - """ - # Prepare test data. - df = self.get_test_data() - use_index = False - column_subset = ["letter", "float"] - no_duplicates_df = hpandas.drop_duplicates( - df, use_index, column_subset=column_subset - ) - no_duplicates_df = hpandas.df_to_str(no_duplicates_df) - # Prepare expected result. - expected_signature = r""" - int letter float - dummy_value1 1 A 3.2 - dummy_value2 10 B 3.2 - dummy_value1 4 B 8.2 - """ - # Check. - self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) - - -# ############################################################################# -# Test_impute_nans -# ############################################################################# - - -class Test_impute_nans(hunitest.TestCase): - def test1(self) -> None: - """ - Test basic imputation of "nan" strings with empty string. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": ["value1", "nan", "value3"], - "col2": ["a", "b", "c"], - } - ) - # Call function to test. - result_df = hpandas.impute_nans(df, "col1", "") - # Check output. - self.assertEqual(result_df["col1"].tolist(), ["value1", "", "value3"]) - self.assertEqual(result_df["col2"].tolist(), ["a", "b", "c"]) - - def test2(self) -> None: - """ - Test imputation with a custom value. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": ["value1", "nan", "value3"], - "col2": ["a", "nan", "c"], - } - ) - # Call function to test. - result_df = hpandas.impute_nans(df, "col2", "MISSING") - # Check output. - self.assertEqual(result_df["col1"].tolist(), ["value1", "nan", "value3"]) - self.assertEqual(result_df["col2"].tolist(), ["a", "MISSING", "c"]) - - def test3(self) -> None: - """ - Test with no "nan" values present. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": ["value1", "value2", "value3"], - "col2": ["a", "b", "c"], - } - ) - # Call function to test. - result_df = hpandas.impute_nans(df, "col1", "") - # Check output - should be unchanged. - self.assertEqual( - result_df["col1"].tolist(), ["value1", "value2", "value3"] - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py deleted file mode 100644 index 9567c91e5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py +++ /dev/null @@ -1,650 +0,0 @@ -import logging -from typing import Tuple - -import numpy as np -import pandas as pd -import pytest - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# TestCompareDataframeRows -# ############################################################################# - - -class TestCompareDataframeRows(hunitest.TestCase): - def get_test_data(self) -> pd.DataFrame: - test_data = { - "dummy_value_1": [0, 1, 3, 2, 0], - "dummy_value_2": ["0", "A", "C", "B", "D"], - "dummy_value_3": [0, 0, 0, 0, 0], - } - df = pd.DataFrame(data=test_data) - df.index.name = "test" - return df - - def test_compare_dataframe_rows1(self) -> None: - """ - Verify that differences are caught and displayed properly. - """ - # Prepare inputs. - test_data = self.get_test_data() - edited_test_data = test_data.copy()[1:-1] - edited_test_data.loc[1, "dummy_value_2"] = "W" - edited_test_data.loc[2, "dummy_value_2"] = "Q" - edited_test_data.loc[2, "dummy_value_3"] = "1" - # Run. - data_difference = hpandas.compare_dataframe_rows( - test_data, edited_test_data - ) - # Check output. - actual = hpandas.df_to_str(data_difference) - expected = r""" dummy_value_2 dummy_value_3 test - self other self other - 0 W A 1 - 1 Q C 1 0 2""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_compare_dataframe_rows2(self) -> None: - """ - Verify that differences are caught and displayed properly without - original index. - """ - # Prepare inputs. - test_data = self.get_test_data() - test_data.index.name = None - edited_test_data = test_data.copy()[1:-1] - edited_test_data.loc[1, "dummy_value_2"] = "W" - edited_test_data.loc[2, "dummy_value_2"] = "Q" - edited_test_data.loc[2, "dummy_value_3"] = "1" - # Run. - data_difference = hpandas.compare_dataframe_rows( - test_data, edited_test_data - ) - # Check output. - actual = hpandas.df_to_str(data_difference) - expected = r""" dummy_value_2 dummy_value_3 - self other self other - 0 W A NaN NaN - 1 Q C 1 0.0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_compare_dfs -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -class Test_compare_dfs(hunitest.TestCase): - """ - - Define two DataFrames that can be either equal or different in terms of columns or rows - - Compare its values by calculating the difference - """ - - @staticmethod - def get_test_dfs_equal() -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Both DataFrames have only equal rows and columns names. - """ - timestamp_index1 = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - values1 = { - "tsA": pd.Series([1, 2, 3]), - "tsB": pd.Series([4, 5, 6]), - "tsC": pd.Series([7, 8, 9]), - "timestamp": timestamp_index1, - } - df1 = pd.DataFrame(data=values1) - df1 = df1.set_index("timestamp") - # - timestamp_index2 = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - values2 = { - "tsA": pd.Series([1.1, 1.9, 3.15]), - "tsB": pd.Series([0, 5, 5.8]), - "tsC": pd.Series([6.5, 8.6, 9.07]), - "timestamp": timestamp_index2, - } - df2 = pd.DataFrame(data=values2) - df2 = df2.set_index("timestamp") - return df1, df2 - - @staticmethod - def get_test_dfs_close_to_zero() -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - DataFrames with values that are close to 0. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - ] - values1 = { - "tsA": [3e-9, -3e-9], - "tsB": [6e-3, 4e-9], - "timestamp": timestamp_index, - } - df1 = pd.DataFrame(data=values1) - df1 = df1.set_index("timestamp") - # - values2 = { - "tsA": [15e-3, -5e-9], - "tsB": [5e-9, 3e-9], - "timestamp": timestamp_index, - } - df2 = pd.DataFrame(data=values2) - df2 = df2.set_index("timestamp") - return df1, df2 - - def get_test_dfs_different(self) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - DataFrames have both unique and equal rows and columns. - """ - df1, df2 = self.get_test_dfs_equal() - df2 = df2.rename( - columns={"tsC": "extra_col"}, - index={ - pd.Timestamp("2022-01-01 21:03:00+00:00"): pd.Timestamp( - "2022-01-01 21:04:00+00:00" - ) - }, - ) - return df1, df2 - - def test1(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "diff" - """ - df1, df2 = self.get_test_dfs_equal() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="diff", - assert_diff_threshold=None, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.diff tsB.diff tsC.diff - timestamp - 2022-01-01 21:01:00+00:00 -0.10 4.0 0.50 - 2022-01-01 21:02:00+00:00 0.10 0.0 -0.60 - 2022-01-01 21:03:00+00:00 -0.15 0.2 -0.07 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "pct_change" - - zero_vs_zero_is_zero = False - - remove_inf = False - """ - df1, df2 = self.get_test_dfs_equal() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="pct_change", - assert_diff_threshold=None, - zero_vs_zero_is_zero=False, - remove_inf=False, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change - timestamp - 2022-01-01 21:01:00+00:00 -9.090909 inf 7.692308 - 2022-01-01 21:02:00+00:00 5.263158 0.000000 -6.976744 - 2022-01-01 21:03:00+00:00 -4.761905 3.448276 -0.771775 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - - DataFrames are not equal - - Column and row modes are `inner` - - diff_mode = "diff" - """ - df1, df2 = self.get_test_dfs_different() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="inner", - column_mode="inner", - diff_mode="diff", - assert_diff_threshold=None, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.diff tsB.diff - timestamp - 2022-01-01 21:01:00+00:00 -0.1 4.0 - 2022-01-01 21:02:00+00:00 0.1 0.0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - - DataFrames are not equal - - Column and row modes are `inner` - - diff_mode = "pct_change" - """ - df1, df2 = self.get_test_dfs_different() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="inner", - column_mode="inner", - diff_mode="pct_change", - assert_diff_threshold=None, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change - timestamp - 2022-01-01 21:01:00+00:00 -9.090909 NaN - 2022-01-01 21:02:00+00:00 5.263158 0.0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test5(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "diff" - - All values of the second DataFrame are zeros - - Check that if the second DataFrame consists of zeros, - the function will perform comparison to the initial DataFrame. - """ - df1, df2 = self.get_test_dfs_different() - # Create DataFrame with zeros. - df2 = df1 * 0 - # Compare. - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="diff", - assert_diff_threshold=None, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.diff tsB.diff tsC.diff - timestamp - 2022-01-01 21:01:00+00:00 1 4 7 - 2022-01-01 21:02:00+00:00 2 5 8 - 2022-01-01 21:03:00+00:00 3 6 9 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test6(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "pct_change" - - close_to_zero_threshold = 1e-6 - - zero_vs_zero_is_zero = True - - remove_inf = True - - The second DataFrame has numbers below the close_to_zero_threshold. - """ - df1, df2 = self.get_test_dfs_close_to_zero() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="pct_change", - assert_diff_threshold=None, - zero_vs_zero_is_zero=True, - remove_inf=True, - ) - # - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change - timestamp - 2022-01-01 21:01:00+00:00 -100.0 NaN - 2022-01-01 21:02:00+00:00 0.0 0.0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test7(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "pct_change" - - close_to_zero_threshold = 1e-6 - - zero_vs_zero_is_zero = False - - remove_inf = False - - The second DataFrame has numbers below the close_to_zero_threshold. - """ - df1, df2 = self.get_test_dfs_close_to_zero() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="pct_change", - assert_diff_threshold=None, - zero_vs_zero_is_zero=False, - remove_inf=False, - ) - # - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change - timestamp - 2022-01-01 21:01:00+00:00 -100.0 inf - 2022-01-01 21:02:00+00:00 NaN NaN - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test8(self) -> None: - """ - Test NaN comparison with NaNs present at different location in two - dataframes. - """ - # Build test dataframes. - df1 = pd.DataFrame( - data={ - "A": [1.1, np.nan, 3.1, np.nan, np.inf, np.inf], - "B": [0, 0, 0, 0, 0, 0], - } - ) - df2 = pd.DataFrame( - data={ - "A": [3.0, 2.2, np.nan, np.nan, np.nan, np.inf], - "B": [0, 0, 0, 0, 0, 0], - } - ) - # Check. - with self.assertRaises(AssertionError) as cm: - compare_nans = True - hpandas.compare_dfs( - df1, df2, compare_nans=compare_nans, only_warning=False - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - DataFrame.iloc[:, 0] (column name="A") are different - - DataFrame.iloc[:, 0] (column name="A") values are different (66.66667 %) - [index]: [0, 1, 2, 3, 4, 5] - [left]: [1.1, nan, 3.1, nan, inf, inf] - [right]: [3.0, 2.2, nan, nan, nan, inf] - At positional index 0, first diff: 1.1 != 3.0 - df1= - A B - 0 1.1 0 - 1 NaN 0 - 2 3.1 0 - 3 NaN 0 - 4 inf 0 - 5 inf 0 - and df2= - A B - 0 3.0 0 - 1 2.2 0 - 2 NaN 0 - 3 NaN 0 - 4 NaN 0 - 5 inf 0 - are not equal. - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test9(self) -> None: - """ - Test to verify the error when df1 and df2 have different index types. - """ - df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - # Create df2 with a DatetimeIndex. - dates = pd.date_range("2021-01-01", periods=3) - df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "timestamp": dates}) - df2 = df2.set_index("timestamp") - with self.assertRaises(AssertionError) as cm: - hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - df1.index.difference(df2.index)= - RangeIndex(start=0, stop=3, step=1) - df2.index.difference(df1.index)= - DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq=None) - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test10(self) -> None: - """ - Check `assert_diff_threshold` functionality in presence of NaN values - in df_diff. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - df2 = pd.DataFrame( - { - "tsA": [100, 200, 300], - "tsB": [400, 500, 600], - "tsC": [700, 800, 900], - "timestamp": timestamp_index, - } - ) - df2 = df2.set_index("timestamp") - adjustment_factor = 1.000001 - df1 = df2 * adjustment_factor - df1.iloc[1, 2] = np.nan - df_diff = hpandas.compare_dfs( - df1, - df2, - diff_mode="pct_change", - only_warning=True, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change - timestamp - 2022-01-01 21:01:00+00:00 0.0001 0.0001 0.0001 - 2022-01-01 21:02:00+00:00 0.0001 0.0001 NaN - 2022-01-01 21:03:00+00:00 0.0001 0.0001 0.0001 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test11(self) -> None: - """ - Check functionality for `remove_inf = False` in presence of `diff_mode - = 'pct_change'`. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - df2 = pd.DataFrame( - { - "tsA": [100, 200, 300], - "tsB": [400, 500, 600], - "tsC": [700, 800, 900], - "timestamp": timestamp_index, - } - ) - df2 = df2.set_index("timestamp") - adjustment_factor = 1.00001 - df1 = df2 * adjustment_factor - df1.iloc[1, 2] = np.inf - with self.assertRaises(AssertionError) as cm: - hpandas.compare_dfs( - df1, - df2, - diff_mode="pct_change", - remove_inf=False, - only_warning=False, - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - DataFrame.iloc[:, 0] (column name="tsA") are different - - DataFrame.iloc[:, 0] (column name="tsA") values are different (100.0 %) - [index]: [2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00, 2022-01-01 21:03:00+00:00] - [left]: [False, False, False] - [right]: [True, True, True] - df1= - tsA tsB tsC - timestamp - 2022-01-01 21:01:00+00:00 100.001 400.004 700.007 - 2022-01-01 21:02:00+00:00 200.002 500.005 inf - 2022-01-01 21:03:00+00:00 300.003 600.006 900.009 - and df2= - tsA tsB tsC - timestamp - 2022-01-01 21:01:00+00:00 100 400 700 - 2022-01-01 21:02:00+00:00 200 500 800 - 2022-01-01 21:03:00+00:00 300 600 900 - have pct_change more than `assert_diff_threshold`. - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test12(self) -> None: - """ - Check functionality for `remove_inf = True` in presence of `diff_mode = - 'pct_change'`. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - df2 = pd.DataFrame( - { - "tsA": [100, 200, 300], - "tsB": [400, 500, 600], - "tsC": [700, 800, 900], - "timestamp": timestamp_index, - } - ) - df2 = df2.set_index("timestamp") - adjustment_factor = 1.00001 - df1 = df2 * adjustment_factor - df1.iloc[1, 2] = np.inf - df_diff = hpandas.compare_dfs( - df1, - df2, - diff_mode="pct_change", - only_warning=True, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change - timestamp - 2022-01-01 21:01:00+00:00 0.001 0.001 0.001 - 2022-01-01 21:02:00+00:00 0.001 0.001 NaN - 2022-01-01 21:03:00+00:00 0.001 0.001 0.001 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test13(self) -> None: - """ - Check test case when negative values in df2. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - df2 = pd.DataFrame( - { - "tsA": [100, 200, -300], - "tsB": [400, -500, 600], - "tsC": [700, -800, 900], - "timestamp": timestamp_index, - } - ) - df2 = df2.set_index("timestamp") - adjustment_factor = 1.00001 - df1 = df2 * adjustment_factor - df_diff = hpandas.compare_dfs( - df1, - df2, - diff_mode="pct_change", - only_warning=True, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change - timestamp - 2022-01-01 21:01:00+00:00 0.001 0.001 0.001 - 2022-01-01 21:02:00+00:00 0.001 -0.001 -0.001 - 2022-01-01 21:03:00+00:00 -0.001 0.001 0.001 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_invalid_input(self) -> None: - """ - Put two different DataFrames with `equal` mode. - """ - df1, df2 = self.get_test_dfs_different() - with self.assertRaises(AssertionError): - hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="pct_change", - ) - - -# ############################################################################# -# Test_compare_nans_in_dataframes -# ############################################################################# - - -class Test_compare_nans_in_dataframes(hunitest.TestCase): - def test1(self) -> None: - """ - Check that NaN differences are identified correctly. - """ - # Build test dataframes. - df1 = pd.DataFrame( - data={ - "A": [1.1, np.nan, 3.1, np.nan, np.inf, np.inf], - "B": [0, 0, 0, 0, 0, 0], - } - ) - df2 = pd.DataFrame( - data={ - "A": [3.0, 2.2, np.nan, np.nan, np.nan, np.inf], - "B": [0, 0, 0, 0, 0, 0], - } - ) - df = hpandas.compare_nans_in_dataframes(df1, df2) - actual = hpandas.df_to_str(df) - expected = r""" - A - df1 df2 - 1 NaN 2.2 - 2 3.1 NaN - 4 inf NaN - """ - self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py deleted file mode 100644 index 0bd4eaeee..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py +++ /dev/null @@ -1,276 +0,0 @@ -import logging - -import numpy as np -import pandas as pd -import pytest - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_to_series1 -# ############################################################################# - - -class Test_to_series1(hunitest.TestCase): - def helper(self, n: int, expected: str) -> None: - vals = list(range(n)) - df = pd.DataFrame([vals], columns=[f"a{i}" for i in vals]) - df = df.T - _LOG.debug("df=\n%s", df) - srs = hpandas.to_series(df) - _LOG.debug("srs=\n%s", srs) - actual = str(srs) - self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) - - def test1(self) -> None: - n = 0 - expected = r""" - Series([], dtype: float64) - """ - self.helper(n, expected) - - def test2(self) -> None: - n = 1 - expected = r""" - a0 0 - dtype: int64""" - self.helper(n, expected) - - def test3(self) -> None: - n = 5 - expected = r""" - a0 0 - a1 1 - a2 2 - a3 3 - a4 4 - Name: 0, dtype: int64""" - self.helper(n, expected) - - -# ############################################################################# -# Test_cast_series_to_type -# ############################################################################# - - -class Test_cast_series_to_type(hunitest.TestCase): - """ - Test converting a series into a given type. - """ - - def test1(self) -> None: - series = pd.Series(["1", "2", "3"]) - series_type = int - actual = hpandas.cast_series_to_type(series, series_type) - self.assertEqual(actual.dtype.type, np.int64) - - def test2(self) -> None: - series = pd.Series(["0.1", "0.2", "0.3"]) - series_type = float - actual = hpandas.cast_series_to_type(series, series_type) - self.assertEqual(actual.dtype.type, np.float64) - - def test3(self) -> None: - series = pd.Series(["None", "None", "None"]) - series_type = None - actual = hpandas.cast_series_to_type(series, series_type) - for i in range(len(actual)): - self.assertIsNone(actual.iloc[i]) - - def test4(self) -> None: - series = pd.Series(["2020-01-01", "2020-02-02", "2020-03-03"]) - series_type = pd.Timestamp - actual = hpandas.cast_series_to_type(series, series_type) - self.assertEqual(actual.dtype.type, np.datetime64) - - def test5(self) -> None: - series = pd.Series(["{}", "{1: 2, 3: 4}", "{'a': 'b'}"]) - series_type = dict - actual = hpandas.cast_series_to_type(series, series_type) - for i in range(len(actual)): - self.assertEqual(type(actual.iloc[i]), dict) - - -# ############################################################################# -# Test_convert_to_type -# ############################################################################# - - -class Test_convert_to_type(hunitest.TestCase): - def test_convert_to_type_bool(self) -> None: - """ - Check converting to bool column. - """ - # Mix of booleans, truthy/falsy strings, numerics, and invalid values - data = [True, False, "True", "false", 1, 0, "1", "0", "yes", None] - series = pd.Series(data) - result = hpandas.convert_to_type(series, "is_bool") - expected = pd.Series( - [True, False, True, False, True, False, True, False, None, None] - ) - pd.testing.assert_series_equal(result, expected) - - def test_convert_to_type_int_and_numeric(self) -> None: - """ - Check converting to numeric and int column. - """ - # Strings that parse to numbers, floats, invalid strings, and ints - series = pd.Series(["1", "2", "3.5", "abc", 4], dtype=object) - # is_int should coerce numeric strings to numbers, invalid -> NaN - result_int = hpandas.convert_to_type(series, "is_int") - expected_int = pd.to_numeric(series, errors="coerce") - pd.testing.assert_series_equal(result_int, expected_int) - # is_numeric is the same as to_numeric - result_numeric = hpandas.convert_to_type(series, "is_numeric") - pd.testing.assert_series_equal(result_numeric, expected_int) - - def test_convert_to_type_string(self) -> None: - """ - Check converting to string column. - """ - # Strings vs non-strings - data = ["a", 1, None, "hello", True, 3.14] - series = pd.Series(data, dtype=object) - result = hpandas.convert_to_type(series, "is_string") - expected = pd.Series(["a", "1", "None", "hello", "True", "3.14"]) - pd.testing.assert_series_equal(result, expected) - - def test_convert_to_type_unknown(self) -> None: - "Check converting to invalid datatype column." - series = pd.Series([1, 2, 3], dtype=object) - with pytest.raises(ValueError) as exc: - hpandas.convert_to_type(series, "invalid_type") - self.assertIn("Unknown column type: invalid_type", str(exc.value)) - - -# ############################################################################# -# Test_infer_column_types -# ############################################################################# - - -class Test_infer_column_types(hunitest.TestCase): - def test_numeric_dominance(self) -> None: - """ - Check with numeric dominant column. - """ - # 5 elements: '1','2',3 (numeric), 'a', None - col = pd.Series(["1", "2", 3, "a", None], dtype=object) - vals = hpandas.infer_column_types(col) - # is_numeric: True for "1","2",3 → 3/5 = 0.6 - assert pytest.approx(vals["is_numeric"], rel=1e-6) == 0.6 - # is_bool: none are bool → 0.0 - assert vals["is_bool"] == 0.0 - # is_string: "1","2","a" are str → 3/5 = 0.6 - assert pytest.approx(vals["is_string"], rel=1e-6) == 0.6 - # numeric ≥ string, and bool < numeric ⇒ type is numeric - self.assert_equal(vals["type"], "is_numeric") - - def test_bool_dominance(self) -> None: - """ - Check with bool dominant column. - """ - # 4 elements: True, False, True (bool), "x" - col = pd.Series([True, False, True, "x"], dtype=object) - vals = hpandas.infer_column_types(col) - # is_bool: 3/4 = 0.75 - assert pytest.approx(vals["is_bool"], rel=1e-6) == 0.75 - # is_numeric: True→1, False→0, True→1, "x"→NaN → notna → 3/4 = 0.75 - assert pytest.approx(vals["is_numeric"], rel=1e-6) == 0.75 - # is_string: only "x" → 1/4 = 0.25 - assert pytest.approx(vals["is_string"], rel=1e-6) == 0.25 - # bool ≥ numeric ⇒ type is bool - self.assert_equal(vals["type"], "is_bool") - - def test_string_dominance(self) -> None: - """ - Check with string dominant column. - """ - # 3 elements: 1.5 (numeric), "a","b" (strings) - col = pd.Series([1.5, "a", "b"], dtype=object) - vals = hpandas.infer_column_types(col) - # is_bool: none are bool → 0/3 = 0.0 - assert pytest.approx(vals["is_bool"], rel=1e-6) == 0.0 - # is_numeric: 1/3 ≈ 0.333... - assert pytest.approx(vals["is_numeric"], rel=1e-6) == pytest.approx( - 1 / 3, rel=1e-6 - ) - # is_string: 2/3 ≈ 0.666... - assert pytest.approx(vals["is_string"], rel=1e-6) == pytest.approx( - 2 / 3, rel=1e-6 - ) - # bool < numeric < string ⇒ type is string - self.assert_equal(vals["type"], "is_string") - - -# ############################################################################# -# Test_convert_df -# ############################################################################# - - -class Test_convert_df(hunitest.TestCase): - def test_convert_df_all_bool(self) -> None: - """ - A column of pure booleans should stay booleans. - """ - df = pd.DataFrame({"flag": [True, False, True, False]}) - df_out = hpandas.convert_df(df) - # Expect a DataFrame back - assert isinstance(df_out, pd.DataFrame) - # Column dtype must be bool - self.assert_equal(df_out["flag"].dtype.name, "bool") - # Values preserved - self.assert_equal( - str(df_out["flag"].tolist()), str([True, False, True, False]) - ) - - def test_convert_df_all_numeric(self) -> None: - """ - A column of numeric strings and ints should become floats. - """ - df = pd.DataFrame({"score": ["1", 2, "3.5", 4]}, dtype=object) - df_out = hpandas.convert_df(df) - assert isinstance(df_out, pd.DataFrame) - # dtype should be float64 - assert df_out["score"].dtype == float - # Values converted correctly - assert df_out["score"].tolist() == [1.0, 2.0, 3.5, 4.0] - - def test_convert_df_all_string(self) -> None: - """ - A column of strings (and mixed non-numeric non-bool) stays as-is. - """ - df = pd.DataFrame( - {"name": ["alice", "bob", "", "charlie"]}, dtype=object - ) - df_out = hpandas.convert_df(df) - print(df_out.head(5)) - assert isinstance(df_out, pd.DataFrame) - # dtype remains object (strings) - self.assert_equal(df_out["name"].dtype.name, "object") - self.assert_equal( - str(df_out["name"].tolist()), str(["alice", "bob", "", "charlie"]) - ) - - def test_convert_df_mixed_columns(self) -> None: - """ - Different datatype columns should convert accordingly. - """ - df = pd.DataFrame( - { - "flag": [True, False, False], - "value": [10, 20, "xyz"], - "text": ["one", "hello", 2], - }, - dtype=object, - ) - df_out = hpandas.convert_df(df) - # flag → bool - self.assert_equal(df_out["flag"].dtype.name, "bool") - self.assertIn("float", df_out["value"].dtype.name) - self.assert_equal(df_out["text"].dtype.name, "object") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py deleted file mode 100644 index 44b7c7b18..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py +++ /dev/null @@ -1,448 +0,0 @@ -import logging - -import numpy as np -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_dassert_is_unique1 -# ############################################################################# - - -class Test_dassert_is_unique1(hunitest.TestCase): - def get_df1(self) -> pd.DataFrame: - """ - Return a df without duplicated index. - """ - num_rows = 5 - idx = [ - pd.Timestamp("2000-01-01 9:00") + pd.Timedelta(minutes=i) - for i in range(num_rows) - ] - values = [[i] for i in range(len(idx))] - df = pd.DataFrame(values, index=idx) - _LOG.debug("df=\n%s", df) - # - actual = hpandas.df_to_str(df) - expected = r""" - 0 - 2000-01-01 09:00:00 0 - 2000-01-01 09:01:00 1 - 2000-01-01 09:02:00 2 - 2000-01-01 09:03:00 3 - 2000-01-01 09:04:00 4""" - self.assert_equal(actual, expected, fuzzy_match=True) - return df - - def test_dassert_is_unique1(self) -> None: - df = self.get_df1() - hpandas.dassert_unique_index(df) - - def get_df2(self) -> pd.DataFrame: - """ - Return a df with duplicated index. - """ - num_rows = 4 - idx = [ - pd.Timestamp("2000-01-01 9:00") + pd.Timedelta(minutes=i) - for i in range(num_rows) - ] - idx.append(idx[0]) - values = [[i] for i in range(len(idx))] - df = pd.DataFrame(values, index=idx) - _LOG.debug("df=\n%s", df) - # - actual = hpandas.df_to_str(df) - expected = r""" - 0 - 2000-01-01 09:00:00 0 - 2000-01-01 09:01:00 1 - 2000-01-01 09:02:00 2 - 2000-01-01 09:03:00 3 - 2000-01-01 09:00:00 4""" - self.assert_equal(actual, expected, fuzzy_match=True) - return df - - def test_dassert_is_unique2(self) -> None: - df = self.get_df2() - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_unique_index(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - Duplicated rows are: - 0 - 2000-01-01 09:00:00 0 - 2000-01-01 09:00:00 4 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_valid_remap -# ############################################################################# - - -class Test_dassert_valid_remap(hunitest.TestCase): - def test1(self) -> None: - """ - Check that the function works with correct inputs. - """ - # Set inputs. - to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] - remap_dict = { - "dummy_value_1": "1, 2, 3", - "dummy_value_2": "A, B, C", - } - # Check. - hpandas.dassert_valid_remap(to_remap, remap_dict) - - def test2(self) -> None: - """ - Check that an assertion is raised if dictionary keys are not a subset. - """ - # Set inputs. - to_remap = ["dummy_value_1", "dummy_value_2"] - remap_dict = { - "dummy_value_1": "1, 2, 3", - "dummy_value_2": "A, B, C", - "dummy_value_3": "A1, A2, A3", - } - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_valid_remap(to_remap, remap_dict) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - val1=['dummy_value_1', 'dummy_value_2', 'dummy_value_3'] - issubset - val2=['dummy_value_1', 'dummy_value_2'] - val1 - val2=['dummy_value_3'] - Keys to remap should be a subset of existing columns""" - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check that an assertion is raised if the duplicate values are present - in the dict. - """ - # Set inputs. - to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] - remap_dict = { - "dummy_value_1": 1, - "dummy_value_2": "A, B, C", - "dummy_value_3": "A, B, C", - } - # Run. - with self.assertRaises(AttributeError) as cm: - hpandas.dassert_valid_remap(to_remap, remap_dict) - actual = str(cm.exception) - expected = r""" - 'dict_values' object has no attribute 'count'""" - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Check that an assertion is raised if the input is not a list. - """ - # Set inputs. - to_remap = {"dummy_value_1"} - remap_dict = { - "dummy_value_1": "1, 2, 3", - } - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_valid_remap(to_remap, remap_dict) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '{'dummy_value_1'}' is '' instead of '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test5(self) -> None: - """ - Check that an assertion is raised if the input is not a dictionary. - """ - # Set inputs. - to_remap = ["dummy_value_1"] - remap_dict = [ - "dummy_value_1 : 1, 2, 3", - ] - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_valid_remap(to_remap, remap_dict) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '['dummy_value_1 : 1, 2, 3']' is '' instead of '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_increasing_index -# ############################################################################# - - -class Test_dassert_increasing_index(hunitest.TestCase): - def test1(self) -> None: - """ - Check that a monotonically increasing index passes the assert. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:03"), - pd.Timestamp("2000-01-01 9:04"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - hpandas.dassert_increasing_index(df) - - def test2(self) -> None: - """ - Check that an assert is raised when index is not monotonically - increasing. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:04"), - pd.Timestamp("2000-01-01 9:03"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_increasing_index(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - Not increasing indices are: - 0 - 2000-01-01 09:04:00 0 - 2000-01-01 09:03:00 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check that a monotonically increasing index with duplicates passes the - assert. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:00"), - pd.Timestamp("2000-01-01 9:00"), - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:01"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - hpandas.dassert_increasing_index(df) - - -# ############################################################################# -# Test_dassert_strictly_increasing_index -# ############################################################################# - - -class Test_dassert_strictly_increasing_index(hunitest.TestCase): - def test1(self) -> None: - """ - Check that unique and monotonically increasing index passes the assert. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:03"), - pd.Timestamp("2000-01-01 9:04"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - hpandas.dassert_strictly_increasing_index(df) - - def test2(self) -> None: - """ - Check that an assert is raised for an increasing index with duplicates. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:03"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_strictly_increasing_index(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - Duplicated rows are: - 0 - 2000-01-01 09:01:00 0 - 2000-01-01 09:01:00 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check that an assert is raised for a not monotonically increasing - index. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:03"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:04"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_strictly_increasing_index(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - Not increasing indices are: - 0 - 2000-01-01 09:03:00 0 - 2000-01-01 09:02:00 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_index_is_datetime -# ############################################################################# - - -class Test_dassert_index_is_datetime(hunitest.TestCase): - @staticmethod - def get_multiindex_df( - index_is_datetime: bool, - ) -> pd.DataFrame: - """ - Helper function to get test multi-index dataframe. Example of dataframe - returned when `index_is_datetime = True`: - - ``` - column1 column2 - index timestamp - index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431 - 2022-01-01 21:10:00+00:00 1.303778 -0.288235 - index2 2022-01-01 21:00:00+00:00 1.237079 1.168012 - 2022-01-01 21:10:00+00:00 1.333692 1.708455 - ``` - - Example of dataframe returned when `index_is_datetime = False`: - - ``` - column1 column2 - index timestamp - index1 string1 -0.122140 -1.949431 - string2 1.303778 -0.288235 - index2 string1 1.237079 1.168012 - string2 1.333692 1.708455 - ``` - """ - if index_is_datetime: - index_inner = [ - pd.Timestamp("2022-01-01 21:00:00", tz="UTC"), - pd.Timestamp("2022-01-01 21:10:00", tz="UTC"), - ] - else: - index_inner = ["string1", "string2"] - index_outer = ["index1", "index2"] - iterables = [index_outer, index_inner] - index = pd.MultiIndex.from_product( - iterables, names=["index", "timestamp"] - ) - columns = ["column1", "column2"] - nums = np.random.uniform(-2, 2, size=(4, 2)) - df = pd.DataFrame(nums, index=index, columns=columns) - return df - - def test1(self) -> None: - """ - Check that multi-index dataframe index is datetime type. - """ - index_is_datetime = True - df = self.get_multiindex_df(index_is_datetime) - hpandas.dassert_index_is_datetime(df) - - def test2(self) -> None: - """ - Check that multi-index dataframe index is not datetime type. - """ - index_is_datetime = False - df = self.get_multiindex_df(index_is_datetime) - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_index_is_datetime(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check for empty dataframe. - """ - df = pd.DataFrame() - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_index_is_datetime(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of 'RangeIndex(start=0, stop=0, step=1)' is '' instead of '' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Check that single-indexed dataframe index is datetime type. - """ - index_is_datetime = True - df = self.get_multiindex_df(index_is_datetime) - df = df.loc["index1"] - hpandas.dassert_index_is_datetime(df) - - -# ############################################################################# -# Test_dassert_approx_eq1 -# ############################################################################# - - -class Test_dassert_approx_eq1(hunitest.TestCase): - def test1(self) -> None: - hpandas.dassert_approx_eq(1, 1.0000001) - - def test2(self) -> None: - srs1 = pd.Series([1, 2.0000001]) - srs2 = pd.Series([0.999999, 2.0]) - hpandas.dassert_approx_eq(srs1, srs2, msg="hello world") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py deleted file mode 100644 index 2c69e4fe7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py +++ /dev/null @@ -1,685 +0,0 @@ -import datetime -import logging -import unittest.mock -import uuid -from typing import Optional, Union - -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hpandas_display as hpandisp -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestDataframeToJson -# ############################################################################# - - -class TestDataframeToJson(hunitest.TestCase): - """ - Test dataframe to JSON conversion. - """ - - def test1(self) -> None: - """ - Verify correctness of dataframe to JSON transformation. - """ - # Prepare inputs. - test_dataframe = pd.DataFrame( - { - "col_1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], - "col_2": [1, 2, 3, 4, 5, 6, 7], - } - ) - # Run test. - output_str = hpandas.convert_df_to_json_string( - test_dataframe, n_head=3, n_tail=3 - ) - # Check output. - self.check_string(output_str) - - def test2(self) -> None: - """ - Verify correctness of UUID-containing dataframe transformation. - """ - # Prepare inputs. - test_dataframe = pd.DataFrame( - { - "col_1": [ - uuid.UUID("421470c7-7797-4a94-b584-eb83ff2de88a"), - uuid.UUID("22cde381-1782-43dc-8c7a-8712cbdf5ee1"), - ], - "col_2": [1, 2], - } - ) - # Run test. - output_str = hpandas.convert_df_to_json_string( - test_dataframe, n_head=None, n_tail=None - ) - # Check output. - self.check_string(output_str) - - def test3(self) -> None: - """ - Verify correctness of transformation of a dataframe with Timestamps. - """ - # Prepare inputs. - test_dataframe = pd.DataFrame( - { - "col_1": [ - pd.Timestamp("2020-01-01"), - pd.Timestamp("2020-05-12"), - ], - "col_2": [1.0, 2.0], - } - ) - # Run test. - output_str = hpandas.convert_df_to_json_string( - test_dataframe, n_head=None, n_tail=None - ) - # Check output. - self.check_string(output_str) - - def test4(self) -> None: - """ - Verify correctness of transformation of a dataframe with datetime. - """ - # Prepare inputs. - test_dataframe = pd.DataFrame( - { - "col_1": [ - datetime.datetime(2020, 1, 1), - datetime.datetime(2020, 5, 12), - ], - "col_2": [1.0, 2.0], - } - ) - # Run test. - output_str = hpandas.convert_df_to_json_string( - test_dataframe, n_head=None, n_tail=None - ) - # Check output. - self.check_string(output_str) - - -# ############################################################################# -# Test_list_to_str -# ############################################################################# - - -class Test_list_to_str(hunitest.TestCase): - """ - Test list to string conversion. - """ - - def test1(self) -> None: - """ - Check that a list is converted to string correctly. - """ - # Prepare inputs. - items = [1, "two", 3, 4, "five"] - # Run test. - actual = hprint.list_to_str2(items, enclose_str_char="|", sep_char=" ; ") - # Check output. - expected = "5 [|1| ; |two| ; |3| ; |4| ; |five|]" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - Check that a list is converted to string and truncated correctly. - """ - # Prepare inputs. - items = list(range(15)) - # Run test. - actual = hprint.list_to_str2(items, enclose_str_char="", sep_char=" - ") - # Check output. - expected = "15 [0 - 1 - 2 - 3 - 4 - ... - 10 - 11 - 12 - 13 - 14]" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check that a list is converted to string correctly, without additional - parameters. - """ - # Prepare inputs. - items = [1, 2, 3, 4, "five"] - # Run test. - actual = hprint.list_to_str2(items) - # Check output. - expected = "5 ['1', '2', '3', '4', 'five']" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_display_df -# ############################################################################# - - -class Test_display_df(hunitest.TestCase): - """ - Test the display_df function. - """ - - def helper_test_display_df( - self, - df: Union[pd.DataFrame, pd.Series], - expected: Optional[str], - **kwargs, - ) -> None: - """ - Test helper for display_df. - - :param df: Input dataframe or series - :param expected: Expected output to compare with actual output - :param kwargs: Keyword arguments to pass to display_df - """ - # Capture the output from print_or_display and logging. - outputs = [] - tag = kwargs.get("tag") - - def mock_print_or_display( - mock_df: pd.DataFrame, - *, - index: bool = True, - as_txt: bool = False, - log_level: int = logging.INFO, - ) -> None: - """ - Capture the dataframe string representation. - """ - if as_txt or not index: - output = mock_df.to_string(index=index) - else: - output = mock_df.to_html(index=index) - outputs.append(output) - - # Run test. - with unittest.mock.patch( - "helpers.hpandas_display.print_or_display", - side_effect=mock_print_or_display, - ): - with unittest.mock.patch( - "helpers.hpandas_display._LOG.log" - ) as mock_log: - hpandisp.display_df( - df, - log_level=logging.DEBUG, - **kwargs, - ) - # Capture tag logging if present. - if tag is not None and mock_log.called: - for call in mock_log.call_args_list: - if "tag=" in str(call): - outputs.append(f"tag={tag}") - # Check output if expected is provided. - if expected is not None: - expected = hprint.dedent(expected) - actual = "\n".join(outputs) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Test display_df with small dataframe. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
01a
12b
23c
- """ - # Run test. - self.helper_test_display_df(df, expected=expected) - - def test2(self) -> None: - """ - Test display_df with large dataframe and max_lines. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": list(range(100)), - "col_2": [f"val_{i}" for i in range(100)], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
00val_0
11val_1
.........
9898val_98
9999val_99
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, max_lines=5) - - def test3(self) -> None: - """ - Test display_df with inline_index=True. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - } - ) - # Prepare outputs. - expected = """ - . col_1 col_2 - 0 1 a - 1 2 b - 2 3 c - """ - # Run test. - self.helper_test_display_df( - df, expected=expected, inline_index=True, index=True - ) - - def test4(self) -> None: - """ - Test display_df with index=False. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - } - ) - # Prepare outputs. - expected = """ - col_1 col_2 - 1 a - 2 b - 3 c - """ - # Run test. - self.helper_test_display_df(df, expected=expected, index=False) - - def test5(self) -> None: - """ - Test display_df with named index and inline_index=True. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - } - ) - df.index.name = "my_index" - # Prepare outputs. - expected = """ - my_index col_1 col_2 - 0 1 a - 1 2 b - 2 3 c - """ - # Run test. - self.helper_test_display_df( - df, expected=expected, inline_index=True, index=False - ) - - def test6(self) -> None: - """ - Test display_df with Pandas Series (should convert to DataFrame). - """ - # Prepare inputs. - series = pd.Series([1, 2, 3, 4, 5], name="my_series") - # Prepare outputs. - expected = """ - . my_series - 0 1 - 1 2 - 2 3 - 3 4 - 4 5 - - """ - # Run test. - self.helper_test_display_df( - series, expected=expected, inline_index=True, index=False - ) - - def test7(self) -> None: - """ - Test display_df with tag parameter. - """ - # Prepare inputs. - df = pd.DataFrame({"col_1": [1, 2, 3]}) - # Prepare outputs. - expected = """ - . col_1 - 0 1 - 1 2 - 2 3 - tag=my_tag - """ - # Run test. - self.helper_test_display_df( - df, expected=expected, tag="my_tag", inline_index=True, index=False - ) - - def test8(self) -> None: - """ - Test display_df with mode='all_rows'. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": list(range(50)), - "col_2": [f"val_{i}" for i in range(50)], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
00val_0
11val_1
.........
4848val_48
4949val_49
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, mode="all_rows") - - def test9(self) -> None: - """ - Test display_df with mode='all_cols'. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - "col_3": [10.5, 20.5, 30.5], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2col_3
01a10.5
12b20.5
23c30.5
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, mode="all_cols") - - def test10(self) -> None: - """ - Test display_df with mode='all'. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": list(range(50)), - "col_2": [f"val_{i}" for i in range(50)], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
00val_0
11val_1
.........
4848val_48
4949val_49
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, mode="all") - - def test11(self) -> None: - """ - Test display_df with invalid mode raises error. - """ - # Prepare inputs. - df = pd.DataFrame({"col_1": [1, 2, 3]}) - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hpandisp.display_df( - df, - mode="invalid_mode", - log_level=logging.DEBUG, - ) - self.assertIn("Invalid mode", str(cm.exception)) - - def test12(self) -> None: - """ - Test display_df with duplicate columns raises assertion. - """ - # Prepare inputs. - df = pd.DataFrame([[1, 2], [3, 4]]) - df.columns = ["col", "col"] - # Run test and check output. - with self.assertRaises(AssertionError): - hpandisp.display_df(df, log_level=logging.DEBUG) - - def test13(self) -> None: - """ - Test display_df with single row dataframe. - """ - # Prepare inputs. - df = pd.DataFrame({"col_1": [1], "col_2": ["a"]}) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - -
col_1col_2
01a
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, max_lines=5) - - def test14(self) -> None: - """ - Test display_df with max_lines=1 (edge case). - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": list(range(10)), - "col_2": [f"val_{i}" for i in range(10)], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
00val_0
11val_1
.........
88val_8
99val_9
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, mode="all") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py deleted file mode 100644 index c1f66b0d8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py +++ /dev/null @@ -1,43 +0,0 @@ -import logging -import os - -import pytest - -import helpers.hpandas as hpandas -import helpers.hs3 as hs3 -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# TestReadDataFromS3 -# ############################################################################# - - -class TestReadDataFromS3(hunitest.TestCase): - def test_read_csv1(self) -> None: - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_name = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - # TODO(sonaal): Reorganize all s3 input data, CmampTask5650. - "alphamatic-data", - "data/kibot/all_stocks_1min/RIMG.csv.gz", - ) - hs3.dassert_path_exists(file_name, s3fs) - stream, kwargs = hs3.get_local_or_s3_stream(file_name, s3fs=s3fs) - hpandas.read_csv_to_df(stream, **kwargs) - - @pytest.mark.slow("~15 sec.") - def test_read_parquet1(self) -> None: - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_name = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "data/kibot/pq/sp_500_1min/AAPL.pq", - ) - hs3.dassert_path_exists(file_name, s3fs) - stream, kwargs = hs3.get_local_or_s3_stream(file_name, s3fs=s3fs) - hpandas.read_parquet_to_df(stream, **kwargs) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py deleted file mode 100644 index 0e1b813fa..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py +++ /dev/null @@ -1,680 +0,0 @@ -import logging - -import numpy as np -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_subset_multiindex_df -# ############################################################################# - - -class Test_subset_multiindex_df(hunitest.TestCase): - """ - Filter Multiindex DataFrame with 2 column levels. - """ - - @staticmethod - def get_multiindex_df() -> pd.DataFrame: - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - ] - iterables = [["asset1", "asset2"], ["open", "high", "low", "close"]] - index = pd.MultiIndex.from_product(iterables, names=[None, "timestamp"]) - nums = np.array( - [ - [ - 0.77650806, - 0.12492164, - -0.35929232, - 1.04137784, - 0.20099949, - 1.4078602, - -0.1317103, - 0.10023361, - ], - [ - -0.56299812, - 0.79105046, - 0.76612895, - -1.49935339, - -1.05923797, - 0.06039862, - -0.77652117, - 2.04578691, - ], - [ - 0.77348467, - 0.45237724, - 1.61051308, - 0.41800008, - 0.20838053, - -0.48289112, - 1.03015762, - 0.17123323, - ], - [ - 0.40486053, - 0.88037142, - -1.94567068, - -1.51714645, - -0.52759748, - -0.31592803, - 1.50826723, - -0.50215196, - ], - [ - 0.17409714, - -2.13997243, - -0.18530403, - -0.48807381, - 0.5621593, - 0.25899393, - 1.14069646, - 2.07721856, - ], - ] - ) - df = pd.DataFrame(nums, index=timestamp_index, columns=index) - return df - - def test1(self) -> None: - """ - Filter by: - - - Timestamp index range - - Level 1 columns - - Level 2 columns - """ - df = self.get_multiindex_df() - df_filtered = hpandas.subset_multiindex_df( - df, - start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), - end_timestamp=pd.Timestamp("2022-01-01 21:03:00+00:00"), - columns_level0=["asset1"], - columns_level1=["high", "low"], - ) - expected_length = 3 - expected_column_names = [("asset1", "high"), ("asset1", "low")] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:03:00+00:00] - columns=('asset1', 'high'),('asset1', 'low') - shape=(3, 2) - asset1 - timestamp high low - 2022-01-01 21:01:00+00:00 0.124922 -0.359292 - 2022-01-01 21:02:00+00:00 0.791050 0.766129 - 2022-01-01 21:03:00+00:00 0.452377 1.610513 - """ - self.check_df_output( - df_filtered, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test2(self) -> None: - """ - Filter by: - - - Timestamp index range - - Level 1 columns - """ - df = self.get_multiindex_df() - df_filtered = hpandas.subset_multiindex_df( - df, - start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), - end_timestamp=pd.Timestamp("2022-01-01 21:02:00+00:00"), - columns_level1=["close"], - ) - expected_length = 2 - expected_column_names = [("asset1", "close"), ("asset2", "close")] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00] - columns=('asset1', 'close'),('asset2', 'close') - shape=(2, 2) - asset1 asset2 - timestamp close close - 2022-01-01 21:01:00+00:00 1.041378 0.100234 - 2022-01-01 21:02:00+00:00 -1.499353 2.045787 - """ - self.check_df_output( - df_filtered, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test3(self) -> None: - """ - Filter by: - - - Timestamp index range - - Level 2 columns - """ - df = self.get_multiindex_df() - df_filtered = hpandas.subset_multiindex_df( - df, - start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), - end_timestamp=pd.Timestamp("2022-01-01 21:02:00+00:00"), - columns_level0=["asset2"], - ) - expected_length = 2 - expected_column_names = [ - ("asset2", "close"), - ("asset2", "high"), - ("asset2", "low"), - ("asset2", "open"), - ] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00] - columns=('asset2', 'close'),('asset2', 'high'),('asset2', 'low'),('asset2', 'open') - shape=(2, 4) - asset2 - timestamp close high low open - 2022-01-01 21:01:00+00:00 0.100234 1.407860 -0.131710 0.200999 - 2022-01-01 21:02:00+00:00 2.045787 0.060399 -0.776521 -1.059238 - """ - self.check_df_output( - df_filtered, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test4(self) -> None: - """ - Filter by: - - - Level 1 columns - - Level 2 columns - """ - df = self.get_multiindex_df() - df_filtered = hpandas.subset_multiindex_df( - df, - columns_level0=["asset2"], - columns_level1=["low"], - ) - expected_length = 5 - expected_column_names = [("asset2", "low")] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:05:00+00:00] - columns=('asset2', 'low') - shape=(5, 1) - asset2 - timestamp low - 2022-01-01 21:01:00+00:00 -0.131710 - 2022-01-01 21:02:00+00:00 -0.776521 - 2022-01-01 21:03:00+00:00 1.030158 - 2022-01-01 21:04:00+00:00 1.508267 - 2022-01-01 21:05:00+00:00 1.140696 - """ - self.check_df_output( - df_filtered, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test_columns_level0_invalid_input(self) -> None: - df = self.get_multiindex_df() - with self.assertRaises(AssertionError): - hpandas.subset_multiindex_df( - df, - columns_level0=["invalid_input"], - ) - - def test_columns_level1_invalid_input(self) -> None: - df = self.get_multiindex_df() - with self.assertRaises(AssertionError): - hpandas.subset_multiindex_df( - df, - columns_level1=["invalid_input"], - ) - - -# ############################################################################# -# Test_compare_multiindex_dfs -# ############################################################################# - - -class Test_compare_multiindex_dfs(hunitest.TestCase): - """ - Subset Multiindex DataFrames with 2 column levels and compare its values. - """ - - @staticmethod - def get_multiindex_dfs() -> pd.DataFrame: - timestamp_index1 = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - ] - iterables1 = [["asset1", "asset2"], ["open", "high", "low", "close"]] - index1 = pd.MultiIndex.from_product( - iterables1, names=[None, "timestamp"] - ) - nums1 = np.array( - [ - [ - 0.77650806, - 0.12492164, - -0.35929232, - 1.04137784, - 0.20099949, - 1.4078602, - -0.1317103, - 0.10023361, - ], - [ - -0.56299812, - 0.79105046, - 0.76612895, - -1.49935339, - -1.05923797, - 0.06039862, - -0.77652117, - 2.04578691, - ], - [ - 0.77348467, - 0.45237724, - 1.61051308, - 0.41800008, - 0.20838053, - -0.48289112, - 1.03015762, - 0.17123323, - ], - [ - 0.40486053, - 0.88037142, - -1.94567068, - -1.51714645, - -0.52759748, - -0.31592803, - 1.50826723, - -0.50215196, - ], - [ - 0.17409714, - -2.13997243, - -0.18530403, - -0.48807381, - 0.5621593, - 0.25899393, - 1.14069646, - 2.07721856, - ], - ] - ) - df1 = pd.DataFrame(nums1, index=timestamp_index1, columns=index1) - # - timestamp_index2 = [ - pd.Timestamp("2022-01-01 21:00:00+00:00"), - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - ] - iterables2 = [ - ["asset1", "asset2", "asset3"], - ["open", "high", "low", "close", "volume"], - ] - index2 = pd.MultiIndex.from_product( - iterables2, names=[None, "timestamp"] - ) - nums2 = [ - [ - 0.79095104, - -0.10304008, - -0.69848962, - 0.50078409, - 0.41756371, - -1.33487885, - 1.04546138, - 0.191062, - 0.08841533, - 0.61717725, - -2.15558483, - 1.21036169, - 2.60355386, - 0.07508052, - 1.00702849, - ], - [ - 0.56223723, - 0.97433151, - -1.40471182, - 0.53292355, - 0.24381913, - 0.64343069, - -0.46733655, - -1.20471491, - -0.08347491, - 0.33365524, - 0.04370572, - -0.53547653, - -1.07622168, - 0.7318155, - -0.47146482, - ], - [ - -0.48272741, - 1.17859032, - -0.40816664, - 0.46684297, - 0.42518077, - -1.52913855, - 1.09925095, - 0.48817537, - 1.2662552, - -0.59757824, - 0.23724902, - -0.00660826, - 0.09780482, - -0.17166633, - -0.54515917, - ], - [ - -0.37618442, - -0.3086281, - 1.09168123, - -1.1751162, - 0.38291194, - 1.80830268, - 1.28318855, - 0.75696503, - -1.04042572, - 0.06493231, - -0.10392893, - 1.89053412, - -0.21200498, - 1.61212857, - -2.00765278, - ], - [ - -0.19674075, - -1.02532132, - -0.22486018, - 0.37664998, - 0.35619408, - -0.77304675, - 0.59053699, - -1.53249898, - 0.57548424, - -0.32093537, - -0.52109972, - 1.70938034, - -0.55419632, - 0.45531674, - 0.66878119, - ], - [ - 0.05903553, - 1.2040308, - 0.62323671, - -0.23639535, - 0.87270792, - 2.60253287, - -0.77788842, - 0.80645833, - 1.85438743, - -1.77561587, - 0.41469478, - -0.29791883, - 0.75140743, - 0.50389702, - 0.55311024, - ], - [ - -0.97820763, - -1.32155197, - -0.6143911, - 0.01473404, - 0.87798665, - 0.1701048, - -0.75376376, - 0.72503616, - 0.5791076, - 0.43942739, - 0.62505817, - 0.44998739, - 0.37350664, - -0.73485633, - -0.70406184, - ], - [ - -1.35719477, - -1.82401288, - 0.77263763, - 2.36399552, - -0.45353019, - 0.33983713, - -0.62895329, - 1.34256611, - 0.2207564, - 0.24146184, - 0.90769186, - 0.57426869, - -0.04587782, - -1.6319128, - 0.38094798, - ], - ] - df2 = pd.DataFrame(nums2, index=timestamp_index2, columns=index2) - return df1, df2 - - def test1(self) -> None: - """ - - Subset by both columns and index - - Make inner intersection and compute pct_change - """ - df1, df2 = self.get_multiindex_dfs() - subset_multiindex_df_kwargs = { - "start_timestamp": pd.Timestamp("2022-01-01 21:02:00+00:00"), - "end_timestamp": pd.Timestamp("2022-01-01 21:04:00+00:00"), - "columns_level0": ["asset1", "asset2"], - "columns_level1": ["low", "high"], - } - compare_dfs_kwargs = { - "column_mode": "inner", - "row_mode": "inner", - "diff_mode": "pct_change", - "assert_diff_threshold": None, - } - df_diff = hpandas.compare_multiindex_dfs( - df1, - df2, - subset_multiindex_df_kwargs=subset_multiindex_df_kwargs, - compare_dfs_kwargs=compare_dfs_kwargs, - ) - expected_length = 3 - expected_column_names = [ - ("asset1.pct_change", "high.pct_change"), - ("asset1.pct_change", "low.pct_change"), - ("asset2.pct_change", "high.pct_change"), - ("asset2.pct_change", "low.pct_change"), - ] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:02:00+00:00, 2022-01-01 21:04:00+00:00] - columns=('asset1.pct_change', 'high.pct_change'),('asset1.pct_change', 'low.pct_change'),('asset2.pct_change', 'high.pct_change'),('asset2.pct_change', 'low.pct_change') - shape=(3, 4) - asset1.pct_change asset2.pct_change - timestamp high.pct_change low.pct_change high.pct_change low.pct_change - 2022-01-01 21:02:00+00:00 -32.881643 287.700041 -94.505475 -259.066028 - 2022-01-01 21:03:00+00:00 246.576815 47.525948 -137.632125 36.090517 - 2022-01-01 21:04:00+00:00 185.862978 -765.280229 -153.498432 198.418808 - """ - self.check_df_output( - df_diff, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - -# ############################################################################# -# Test_multiindex_df_info1 -# ############################################################################# - - -class Test_multiindex_df_info1(hunitest.TestCase): - @staticmethod - def get_multiindex_df_with_datetime_index() -> pd.DataFrame: - datetime_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - ] - iterables = [["asset1", "asset2"], ["open", "high", "low", "close"]] - index = pd.MultiIndex.from_product(iterables, names=[None, "timestamp"]) - nums = np.array( - [ - [ - 0.77650806, - 0.12492164, - -0.35929232, - 1.04137784, - 0.20099949, - 1.4078602, - -0.1317103, - 0.10023361, - ], - [ - -0.56299812, - 0.79105046, - 0.76612895, - -1.49935339, - -1.05923797, - 0.06039862, - -0.77652117, - 2.04578691, - ], - [ - 0.77348467, - 0.45237724, - 1.61051308, - 0.41800008, - 0.20838053, - -0.48289112, - 1.03015762, - 0.17123323, - ], - [ - 0.40486053, - 0.88037142, - -1.94567068, - -1.51714645, - -0.52759748, - -0.31592803, - 1.50826723, - -0.50215196, - ], - [ - 0.17409714, - -2.13997243, - -0.18530403, - -0.48807381, - 0.5621593, - 0.25899393, - 1.14069646, - 2.07721856, - ], - ] - ) - df = pd.DataFrame(nums, index=datetime_index, columns=index) - return df - - @staticmethod - def get_multiindex_df_with_non_datetime_index() -> pd.DataFrame: - non_datetime_index = ["M", "N"] - index = pd.MultiIndex.from_product([["A", "B"], ["X", "Y"]]) - data = [[1, 2, 3, 4], [5, 6, 7, 8]] - df = pd.DataFrame(data, index=non_datetime_index, columns=index) - return df - - def test1(self) -> None: - """ - Test DataFrame with a datetime index. - """ - df = self.get_multiindex_df_with_datetime_index() - actual = hpandas.multiindex_df_info(df) - # This is required by `pandas` >= 2.2. - expected = """ - shape=2 x 4 x 5 - columns_level0=2 ['asset1', 'asset2'] - columns_level1=4 ['close', 'high', 'low', 'open'] - rows=5 ['2022-01-01 21:01:00+00:00', '2022-01-01 21:02:00+00:00', '2022-01-01 21:03:00+00:00', '2022-01-01 21:04:00+00:00', '2022-01-01 21:05:00+00:00'] - start_timestamp=2022-01-01 21:01:00+00:00 - end_timestamp=2022-01-01 21:05:00+00:00 - frequency=min - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - Test DataFrame with a non-frequency datetime index. - """ - df = self.get_multiindex_df_with_datetime_index() - non_frequency_datetime_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:04:30+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - ] - df.index = non_frequency_datetime_index - actual = hpandas.multiindex_df_info(df) - expected = """ - shape=2 x 4 x 5 - columns_level0=2 ['asset1', 'asset2'] - columns_level1=4 ['close', 'high', 'low', 'open'] - rows=5 ['2022-01-01 21:01:00+00:00', '2022-01-01 21:02:00+00:00', '2022-01-01 21:04:00+00:00', '2022-01-01 21:04:30+00:00', '2022-01-01 21:06:00+00:00'] - start_timestamp=2022-01-01 21:01:00+00:00 - end_timestamp=2022-01-01 21:06:00+00:00 - frequency=None - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Test DataFrame with a non-datetime index. - """ - df = self.get_multiindex_df_with_non_datetime_index() - actual = hpandas.multiindex_df_info(df) - expected = """ - shape=2 x 2 x 2 - columns_level0=2 ['A', 'B'] - columns_level1=2 ['X', 'Y'] - rows=2 ['M', 'N'] - """ - self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py deleted file mode 100644 index f0295958f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py +++ /dev/null @@ -1,426 +0,0 @@ -import logging -from typing import Dict, List - -import pandas as pd - -import helpers.hprint as hprint -import helpers.hpandas as hpandas -import helpers.hpandas_stats as hpanstat -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_compute_duration_df -# ############################################################################# - - -class Test_compute_duration_df(hunitest.TestCase): - """ - Compute timestamp stats from dfs and check the intersection. - """ - - @staticmethod - def get_dict_with_dfs() -> Dict[str, pd.DataFrame]: - timestamp_index1 = [ - pd.Timestamp("2022-01-01 21:00:00+00:00"), - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - ] - timestamp_index2 = [ - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - ] - timestamp_index3 = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - ] - # - value1 = {"value1": [None, None, 1, 2, 3, 4, 5, None]} - value2 = {"value2": [1, 2, 3, None]} - value3 = {"value3": [None, None, 1, 2]} - # - df1 = pd.DataFrame(value1, index=timestamp_index1) - df2 = pd.DataFrame(value2, index=timestamp_index2) - df3 = pd.DataFrame(value3, index=timestamp_index3) - # - tag_to_df = { - "tag1": df1, - "tag2": df2, - "tag3": df3, - } - return tag_to_df - - def helper( - self, - valid_intersect: bool, - expected_start_timestamp: pd.Timestamp, - expected_end_timestamp: pd.Timestamp, - ) -> None: - """ - Checks if the intersection is valid and the same amongst all dfs. - """ - tag_to_df = self.get_dict_with_dfs() - _, tag_dfs = hpandas.compute_duration_df( - tag_to_df, valid_intersect=valid_intersect, intersect_dfs=True - ) - # Collect all start timestamps. - start_timestamps = [tag_dfs[tag].index.min() for tag in tag_dfs] - # Check that all start timestamps are equal. - start_equal = all( - element == start_timestamps[0] for element in start_timestamps - ) - self.assertTrue(start_equal) - # Check that start intersection is correct. - required_start_intersection = expected_start_timestamp - self.assertEqual(start_timestamps[0], required_start_intersection) - # Collect all end timestamps. - end_timestamps = [tag_dfs[tag].index.max() for tag in tag_dfs] - # Check that all end timestamps are equal. - end_equal = all( - element == end_timestamps[0] for element in end_timestamps - ) - self.assertTrue(end_equal) - # Check that end intersection is correct. - required_end_intersection = expected_end_timestamp - self.assertEqual(end_timestamps[0], required_end_intersection) - - def test1(self) -> None: - """ - Check only timestamp stats. - """ - tag_to_df = self.get_dict_with_dfs() - df_stats, _ = hpandas.compute_duration_df(tag_to_df) - expected_length = 3 - expected_column_names = [ - "max_index", - "max_valid_index", - "min_index", - "min_valid_index", - ] - expected_column_unique_values = None - expected_signature = r""" - # df= - index=[tag1, tag3] - columns=min_index,max_index,min_valid_index,max_valid_index - shape=(3, 4) - min_index max_index min_valid_index max_valid_index - tag1 2022-01-01 21:00:00+00:00 2022-01-01 21:06:00+00:00 2022-01-01 21:02:00+00:00 2022-01-01 21:06:00+00:00 - tag2 2022-01-01 21:02:00+00:00 2022-01-01 21:05:00+00:00 2022-01-01 21:02:00+00:00 2022-01-01 21:04:00+00:00 - tag3 2022-01-01 21:01:00+00:00 2022-01-01 21:04:00+00:00 2022-01-01 21:03:00+00:00 2022-01-01 21:04:00+00:00 - """ - expected_signature = hprint.dedent(expected_signature) - self.check_df_output( - df_stats, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test2(self) -> None: - """ - Modify initial DataFrames in dictionary with non-valid intersection - (incl NaNs). - """ - valid_intersect = False - expected_start_timestamp = pd.Timestamp("2022-01-01 21:02:00+00:00") - expected_end_timestamp = pd.Timestamp("2022-01-01 21:04:00+00:00") - self.helper( - valid_intersect, expected_start_timestamp, expected_end_timestamp - ) - - def test3(self) -> None: - """ - Modify initial DataFrames in dictionary with valid intersection - (excluding NaNs). - """ - valid_intersect = True - expected_start_timestamp = pd.Timestamp("2022-01-01 21:03:00+00:00") - expected_end_timestamp = pd.Timestamp("2022-01-01 21:04:00+00:00") - self.helper( - valid_intersect, expected_start_timestamp, expected_end_timestamp - ) - - -# ############################################################################# -# Test_compute_weighted_sum -# ############################################################################# - - -class Test_compute_weighted_sum(hunitest.TestCase): - def helper( - self, - index1: List[int], - index2: List[int], - weights_data: Dict[str, List[float]], - index_mode: str, - expected_signature: str, - ) -> None: - """ - Build inputs and check that function output is correct. - """ - # Create test data. - data1 = {"A": [1, 2], "B": [3, 4]} - df1 = pd.DataFrame(data1, index=index1) - data2 = {"A": [5, 6], "B": [7, 8]} - df2 = pd.DataFrame(data2, index=index2) - dfs = {"df1": df1, "df2": df2} - # Create weights DataFrame. - weights = pd.DataFrame(weights_data, index=dfs.keys()) - # Run the function. - weighted_sums = hpandas.compute_weighted_sum( - dfs=dfs, weights=weights, index_mode=index_mode - ) - actual_signature = str(weighted_sums) - self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) - - def test1(self) -> None: - """ - Check that weighted sums are computed correctly. - - index_mode = "assert_equal". - """ - index1 = [0, 1] - index2 = [0, 1] - weights_data = {"w1": [0.2, 0.8]} - index_mode = "assert_equal" - expected_signature = r""" - {'w1': A B - 0 4.2 6.2 - 1 5.2 7.2} - """ - expected_signature = hprint.dedent(expected_signature) - self.helper(index1, index2, weights_data, index_mode, expected_signature) - - def test2(self) -> None: - """ - Check that weighted sums are computed correctly. - - index_mode = "intersect". - """ - index1 = [0, 1] - index2 = [0, 2] - weights_data = {"w1": [0.2, 0.8], "w2": [0.5, 0.5]} - index_mode = "intersect" - expected_signature = r""" - {'w1': A B - 0 4.2 6.2 - 1 NaN NaN - 2 NaN NaN, 'w2': A B - 0 3.0 5.0 - 1 NaN NaN - 2 NaN NaN} - """ - expected_signature = hprint.dedent(expected_signature) - self.helper(index1, index2, weights_data, index_mode, expected_signature) - - def test3(self) -> None: - """ - Check that weighted sums are computed correctly. - - index_mode = "leave_unchanged". - """ - index1 = [0, 1] - index2 = [2, 3] - weights_data = {"w1": [0.2, 0.8]} - index_mode = "leave_unchanged" - expected_signature = r""" - {'w1': A B - 0 NaN NaN - 1 NaN NaN - 2 NaN NaN - 3 NaN NaN} - """ - expected_signature = hprint.dedent(expected_signature) - self.helper(index1, index2, weights_data, index_mode, expected_signature) - - def test4(self) -> None: - """ - Check that an assertion is raised if input is an empty dict. - """ - dfs: Dict[str, pd.DataFrame] = {} - weights_data = {"w1": [0.2, 0.8]} - index_mode = "assert_equal" - with self.assertRaises(AssertionError) as cm: - hpandas.compute_weighted_sum( - dfs=dfs, - weights=pd.DataFrame(weights_data), - index_mode=index_mode, - ) - actual_signature = str(cm.exception) - expected_signature = r""" - * Failed assertion * - cond={} - dictionary of dfs must be nonempty - """ - expected_signature = hprint.dedent(expected_signature) - self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) - - -# ############################################################################# -# Test_get_value_counts_stats_df -# ############################################################################# - - -class Test_get_value_counts_stats_df(hunitest.TestCase): - """ - Test value counts statistics computation. - """ - - def helper( - self, - category_data: List[str], - num_rows: int, - expected: str, - ) -> None: - """ - Test value counts with given parameters. - """ - # Prepare inputs. - df = pd.DataFrame({"category": category_data}) - # Run test. - result_df = hpandas.get_value_counts_stats_df( - df, "category", num_rows=num_rows - ) - # Check outputs. - actual = str(result_df) - expected = hprint.dedent(expected) - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test basic value counts with default parameters. - """ - # Prepare inputs. - category_data = ["A", "B", "A", "C", "A", "B", "D", "A", "C", "A"] - num_rows = 10 - # Prepare outputs. - expected = """ - count pct [%] - category - A 5 50.0 - B 2 20.0 - C 2 20.0 - D 1 10.0 - """ - # Run test. - self.helper(category_data, num_rows, expected) - - def test2(self) -> None: - """ - Test limiting the number of rows returned. - """ - # Prepare inputs. - category_data = ["A", "B", "A", "C", "A", "B", "D", "A", "C", "A"] - num_rows = 2 - # Prepare outputs. - expected = """ - count pct [%] - category - A 5 50.0 - B 2 20.0 - """ - # Run test. - self.helper(category_data, num_rows, expected) - - def test3(self) -> None: - """ - Test with num_rows=0 to return all rows. - """ - # Prepare inputs. - category_data = ["A", "B", "A", "C", "A", "B"] - num_rows = 0 - # Prepare outputs. - expected = """ - count pct [%] - category - A 3 50.000000 - B 2 33.333333 - C 1 16.666667 - """ - # Run test. - self.helper(category_data, num_rows, expected) - - -# ############################################################################# -# Test__get_unique_values_stats -# ############################################################################# - - -class Test__get_unique_values_stats(hunitest.TestCase): - """ - Test unique values count and percentage computation. - """ - - def helper(self, df_data: Dict, expected: str) -> None: - """ - Test unique values stats computation. - """ - # Prepare inputs. - df = pd.DataFrame(df_data) - # Run test. - result_df = hpanstat._get_unique_values_stats(df) - # Check outputs. - actual = str(result_df) - expected = hprint.dedent(expected) - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test basic unique values computation. - """ - df_data = { - "col1": [1, 2, 1, 3, 1], - "col2": ["a", "b", "a", "c", "d"], - "col3": [1.0, 1.0, 1.0, 1.0, 1.0], - } - expected = """ - num_unique unique [%] - col1 3 60.0 - col2 4 80.0 - col3 1 20.0 - """ - self.helper(df_data, expected) - - def test2(self) -> None: - """ - Test with NaN values. - """ - df_data = { - "col1": [1, 2, 1, None, 1], - "col2": ["a", "b", "a", None, "c"], - } - expected = """ - num_unique unique [%] - col1 2 40.0 - col2 3 60.0 - """ - self.helper(df_data, expected) - - def test3(self) -> None: - """ - Test with single unique value. - """ - df_data = { - "col1": [5, 5, 5, 5], - "col2": ["x", "x", "x", "x"], - } - expected = """ - num_unique unique [%] - col1 1 25.0 - col2 1 25.0 - """ - self.helper(df_data, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py deleted file mode 100644 index f11d6988a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py +++ /dev/null @@ -1,1888 +0,0 @@ -import csv -import io -import logging -import re -import time -from typing import Any, Dict, List, Optional, Tuple - -import numpy as np -import pandas as pd -import pytest - -import helpers.hdatetime as hdateti -import helpers.hpandas as hpandas -import helpers.hpandas_transform as hpantran -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_trim_df1 -# ############################################################################# - - -class Test_trim_df1(hunitest.TestCase): - def get_df(self, *args: Any, **kwargs: Any) -> pd.DataFrame: - """ - Return a df where the CSV txt is read verbatim without inferring dates. - - The `start_time` column is thus a str. - """ - txt = """ - ,start_time,egid,close - 4,2022-01-04 21:38:00.000000,13684,1146.48 - 8,2022-01-04 21:38:00.000000,17085,179.45 - 14,2022-01-04 21:37:00.000000,13684,1146.26 - 18,2022-01-04 21:37:00.000000,17085,179.42 - 24,2022-01-04 21:36:00.000000,13684,1146.0 - 27,2022-01-04 21:36:00.000000,17085,179.46 - 34,2022-01-04 21:35:00.000000,13684,1146.0 - 38,2022-01-04 21:35:00.000000,17085,179.42 - 40,2022-01-04 21:34:00.000000,17085,179.42 - 44,2022-01-04 21:34:00.000000,13684,1146.0 - """ - txt = hprint.dedent(txt) - df = pd.read_csv(io.StringIO(txt), *args, index_col=0, **kwargs) - df["start_time"] = pd.to_datetime(df["start_time"]) - return df - - def test_types1(self) -> None: - """ - Check the types of a df coming from `read_csv()`. - - The timestamps in `start_time` are left as strings. - """ - df = self.get_df() - # - actual = hpandas.df_to_str( - df, print_dtypes=True, print_shape_info=True, tag="df" - ) - expected = r"""# df= - index=[4, 44] - columns=start_time,egid,close - shape=(10, 3) - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 - 1 start_time datetime64[ns] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 - 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 - 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - ... - 38 2022-01-04 21:35:00 17085 179.42 - 40 2022-01-04 21:34:00 17085 179.42 - 44 2022-01-04 21:34:00 13684 1146.00""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def get_df_with_parse_dates(self) -> pd.DataFrame: - """ - Read the CSV parsing `start_time` as timestamps. - - The inferred type is a nasty `datetime64` which is not as well- - behaved as our beloved `pd.Timestamp`. - """ - df = self.get_df(parse_dates=["start_time"]) - return df - - def test_types2(self) -> None: - """ - Check the types of a df coming from `read_csv()` forcing parsing some - values as dates. - """ - df = self.get_df_with_parse_dates() - # Check. - actual = hpandas.df_to_str( - df, print_dtypes=True, print_shape_info=True, tag="df" - ) - expected = r"""# df= - index=[4, 44] - columns=start_time,egid,close - shape=(10, 3) - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 - 1 start_time datetime64[ns] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 - 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 - 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - ... - 38 2022-01-04 21:35:00 17085 179.42 - 40 2022-01-04 21:34:00 17085 179.42 - 44 2022-01-04 21:34:00 13684 1146.00""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def get_df_with_tz_timestamp(self) -> pd.DataFrame: - """ - Force the column parsed as `datetime64` into a tz-aware object. - - The resulting object is a `datetime64[ns, tz]`. - """ - df = self.get_df_with_parse_dates() - # Apply the tz. - col_name = "start_time" - df[col_name] = ( - df[col_name].dt.tz_localize("UTC").dt.tz_convert("America/New_York") - ) - df[col_name] = pd.to_datetime(df[col_name]) - return df - - def test_types3(self) -> None: - """ - Check the types of a df coming from `read_csv()` after conversion to - tz-aware objects. - """ - df = self.get_df_with_tz_timestamp() - # Check. - actual = hpandas.df_to_str( - df, print_dtypes=True, print_shape_info=True, tag="df" - ) - expected = r"""# df= - index=[4, 44] - columns=start_time,egid,close - shape=(10, 3) - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 - 1 start_time datetime64[ns, America/New_York] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 - 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 - 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 - start_time egid close - 4 2022-01-04 16:38:00-05:00 13684 1146.48 - 8 2022-01-04 16:38:00-05:00 17085 179.45 - 14 2022-01-04 16:37:00-05:00 13684 1146.26 - ... - 38 2022-01-04 16:35:00-05:00 17085 179.42 - 40 2022-01-04 16:34:00-05:00 17085 179.42 - 44 2022-01-04 16:34:00-05:00 13684 1146.00""" - self.assert_equal(actual, expected, fuzzy_match=True) - - # ////////////////////////////////////////////////////////////////////////////// - - def helper( - self, - df: pd.DataFrame, - ts_col_name: Optional[str], - start_ts: Optional[pd.Timestamp], - end_ts: Optional[pd.Timestamp], - left_close: bool, - right_close: bool, - expected: str, - ) -> None: - """ - Run trimming and check the outcome. - - See param description in `hpandas.trim_df`. - - :param expected: the expected oucome of the trimming - """ - df_trim = hpandas.trim_df( - df, ts_col_name, start_ts, end_ts, left_close, right_close - ) - actual = hpandas.df_to_str(df_trim, print_shape_info=True, tag="df_trim") - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_trim_df1(self) -> None: - """ - Test trimming: baseline case. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[4, 38] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - ... - 27 2022-01-04 21:36:00 17085 179.46 - 34 2022-01-04 21:35:00 13684 1146.00 - 38 2022-01-04 21:35:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df2(self) -> None: - """ - Trim a df with a column that is `datetime64` without tz using a - `pd.Timestamp` without tz. - - This operation is valid. - """ - df = self.get_df_with_parse_dates() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[4, 38] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - ... - 27 2022-01-04 21:36:00 17085 179.46 - 34 2022-01-04 21:35:00 13684 1146.00 - 38 2022-01-04 21:35:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df3(self) -> None: - """ - Trim a df with a column that is `datetime64` with tz vs a `pd.Timestamp - with tz. - - This operation is valid. - """ - df = self.get_df_with_tz_timestamp() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00", tz="UTC") - end_ts = pd.Timestamp("2022-01-04 21:38:00", tz="UTC") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[4, 38] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 4 2022-01-04 16:38:00-05:00 13684 1146.48 - 8 2022-01-04 16:38:00-05:00 17085 179.45 - 14 2022-01-04 16:37:00-05:00 13684 1146.26 - ... - 27 2022-01-04 16:36:00-05:00 17085 179.46 - 34 2022-01-04 16:35:00-05:00 13684 1146.00 - 38 2022-01-04 16:35:00-05:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - # pylint: disable=line-too-long - def test_trim_df4(self) -> None: - """ - Trim a df with a column that is `datetime64` with tz vs a - `pd.Timestamp` without tz. - - This operation is invalid and we expect an assertion. - """ - df = self.get_df_with_tz_timestamp() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - with self.assertRaises(TypeError) as cm: - hpandas.trim_df( - df, ts_col_name, start_ts, end_ts, left_close, right_close - ) - # Check. - actual = str(cm.exception) - expected = r""" - Invalid comparison between dtype=datetime64[ns, America/New_York] and Timestamp""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_trim_df5(self) -> None: - """ - Test filtering on the index. - """ - df = self.get_df() - df = df.set_index("start_time") - # Run. - ts_col_name = None - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] - columns=egid,close - shape=(8, 2) - egid close - start_time - 2022-01-04 21:38:00 13684 1146.48 - 2022-01-04 21:38:00 17085 179.45 - 2022-01-04 21:37:00 13684 1146.26 - ... - 2022-01-04 21:36:00 17085 179.46 - 2022-01-04 21:35:00 13684 1146.00 - 2022-01-04 21:35:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df6(self) -> None: - """ - Test excluding the lower boundary. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = False - right_close = True - expected = r"""# df_trim= - index=[4, 27] - columns=start_time,egid,close - shape=(6, 3) - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - 18 2022-01-04 21:37:00 17085 179.42 - 24 2022-01-04 21:36:00 13684 1146.00 - 27 2022-01-04 21:36:00 17085 179.46""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df7(self) -> None: - """ - Test excluding the upper boundary. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = False - expected = r"""# df_trim= - index=[14, 38] - columns=start_time,egid,close - shape=(6, 3) - start_time egid close - 14 2022-01-04 21:37:00 13684 1146.26 - 18 2022-01-04 21:37:00 17085 179.42 - 24 2022-01-04 21:36:00 13684 1146.00 - 27 2022-01-04 21:36:00 17085 179.46 - 34 2022-01-04 21:35:00 13684 1146.00 - 38 2022-01-04 21:35:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df8(self) -> None: - """ - Test filtering on a sorted column. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - df = df.sort_values(ts_col_name) - expected = r"""# df_trim= - index=[4, 38] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 34 2022-01-04 21:35:00 13684 1146.00 - 38 2022-01-04 21:35:00 17085 179.42 - 24 2022-01-04 21:36:00 13684 1146.00 - ... - 18 2022-01-04 21:37:00 17085 179.42 - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df9(self) -> None: - """ - Test filtering on a sorted index. - """ - df = self.get_df() - df = df.set_index("start_time") - # Run. - ts_col_name = None - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - df = df.sort_index() - expected = r"""# df_trim= - index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] - columns=egid,close - shape=(8, 2) - egid close - start_time - 2022-01-04 21:35:00 13684 1146.00 - 2022-01-04 21:35:00 17085 179.42 - 2022-01-04 21:36:00 13684 1146.00 - ... - 2022-01-04 21:37:00 17085 179.42 - 2022-01-04 21:38:00 13684 1146.48 - 2022-01-04 21:38:00 17085 179.45""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df10(self) -> None: - """ - Test filtering on a sorted index, excluding lower and upper boundaries. - """ - df = self.get_df() - df = df.set_index("start_time") - # Run. - ts_col_name = None - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = False - right_close = False - df = df.sort_index() - expected = r"""# df_trim= - index=[2022-01-04 21:36:00, 2022-01-04 21:37:00] - columns=egid,close - shape=(4, 2) - egid close - start_time - 2022-01-04 21:36:00 13684 1146.00 - 2022-01-04 21:36:00 17085 179.46 - 2022-01-04 21:37:00 13684 1146.26 - 2022-01-04 21:37:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df11(self) -> None: - """ - Test filtering on a non-sorted column, with `start_ts` being None. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = None - end_ts = pd.Timestamp("2022-01-04 21:37:00") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[14, 44] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 14 2022-01-04 21:37:00 13684 1146.26 - 18 2022-01-04 21:37:00 17085 179.42 - 24 2022-01-04 21:36:00 13684 1146.00 - ... - 38 2022-01-04 21:35:00 17085 179.42 - 40 2022-01-04 21:34:00 17085 179.42 - 44 2022-01-04 21:34:00 13684 1146.00""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df12(self) -> None: - """ - Test filtering on a sorted index, with `end_ts` being None. - """ - df = self.get_df() - df = df.set_index("start_time") - # Run. - ts_col_name = None - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = None - left_close = True - right_close = True - df = df.sort_index() - expected = r"""# df_trim= - index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] - columns=egid,close - shape=(8, 2) - egid close - start_time - 2022-01-04 21:35:00 13684 1146.00 - 2022-01-04 21:35:00 17085 179.42 - 2022-01-04 21:36:00 13684 1146.00 - ... - 2022-01-04 21:37:00 17085 179.42 - 2022-01-04 21:38:00 13684 1146.48 - 2022-01-04 21:38:00 17085 179.45""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - -# ############################################################################# -# Test_trim_df2 -# ############################################################################# - - -@pytest.mark.skip( - "Used for comparing speed of different trimming methods (CmTask1404)." -) -class Test_trim_df2(Test_trim_df1): - """ - Test the speed of different approaches to df trimming. - """ - - def get_data( - self, set_as_index: bool, sort: bool - ) -> Tuple[pd.DataFrame, str, pd.Timestamp, pd.Timestamp]: - """ - Get the data for experiments. - - :param set_as_index: whether to set the filtering values as - index - :param sort: whether to sort the filtering values - :return: the df to trim, the parameters for trimming - """ - # Get a large df. - df = self.get_df() - df = df.loc[df.index.repeat(100000)].reset_index(drop=True) - # Define the params. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - # Prepare the data. - if set_as_index: - df = df.set_index(ts_col_name, append=True, drop=False) - if sort: - df = df.sort_index(level=ts_col_name) - elif sort: - df = df.sort_values(ts_col_name) - return df, ts_col_name, start_ts, end_ts - - def check_trimmed_df( - self, - df: pd.DataFrame, - ts_col_name: str, - start_ts: pd.Timestamp, - end_ts: pd.Timestamp, - ) -> None: - """ - Confirm that the trimmed df matches what is expected. - - The trimmed df is compared to the one produced by - `hpandas.trim_df()` with lower and upper boundaries included. - Thus, it is ensured that all the trimming methods produce the - same output. - - See param descriptions in `hpandas.trim_df()`. - - :param df: the df trimmed in a test, to compare with the - `hpandas.trim_df()` one - """ - # Clean up the df from the test. - if df.index.nlevels > 1: - df = df.droplevel(ts_col_name) - df = df.reset_index(drop=True) - df = df.sort_values(by=[ts_col_name, "egid"], ascending=[False, True]) - # Get the reference trimmed df. - left_close = True - right_close = True - df_trim_for_comparison = hpandas.trim_df( - df, ts_col_name, start_ts, end_ts, left_close, right_close - ) - assert df.equals(df_trim_for_comparison) - - def test_simple_mask_col(self) -> None: - """ - Trim with a simple mask; filtering on a column. - """ - set_as_index = False - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - mask = df[ts_col_name] >= start_ts - df = df[mask] - if not df.empty: - mask = df[ts_col_name] <= end_ts - df = df[mask] - end_time = time.time() - _LOG.info( - "Simple mask trim (column): %.2f seconds", (end_time - start_time) - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_simple_mask_idx(self) -> None: - """ - Trim with a simple mask; filtering on an index. - """ - set_as_index = True - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - mask = df.index.get_level_values(ts_col_name) >= start_ts - df = df[mask] - if not df.empty: - mask = df.index.get_level_values(ts_col_name) <= end_ts - df = df[mask] - end_time = time.time() - _LOG.info( - "Simple mask trim (index): %.2f seconds", (end_time - start_time) - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_between_col(self) -> None: - """ - Trim using `pd.Series.between`; filtering on a column. - """ - set_as_index = False - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df[df[ts_col_name].between(start_ts, end_ts, inclusive="both")] - end_time = time.time() - _LOG.info( - "`pd.Series.between` trim (column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_between_idx(self) -> None: - """ - Trim using `pd.Series.between`; filtering on an index. - """ - set_as_index = True - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - filter_values = pd.Series( - df.index.get_level_values(ts_col_name) - ).between(start_ts, end_ts, inclusive="both") - df = df.droplevel(ts_col_name) - df = df[filter_values] - end_time = time.time() - _LOG.info( - "`pd.Series.between` trim (index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_truncate_non_sorted_col(self) -> None: - """ - Trim using `pd.DataFrame.truncate`; filtering on a non-sorted column. - """ - set_as_index = False - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df.set_index(df[ts_col_name], append=True).sort_index( - level=ts_col_name - ) - df = df.swaplevel() - df = df.truncate(before=start_ts, after=end_ts) - end_time = time.time() - _LOG.info( - "`pd.DataFrame.truncate` trim (non-sorted column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_truncate_non_sorted_idx(self) -> None: - """ - Trim using `pd.DataFrame.truncate`; filtering on a non-sorted index. - """ - set_as_index = True - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - df = df.swaplevel() - # Run. - start_time = time.time() - df = df.sort_index(level=ts_col_name) - df = df.truncate(before=start_ts, after=end_ts) - end_time = time.time() - _LOG.info( - "`pd.DataFrame.truncate` trim (non-sorted index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_truncate_sorted_col(self) -> None: - """ - Trim using `pd.DataFrame.truncate`; filtering on a sorted column. - """ - set_as_index = False - sort = True - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df.set_index(ts_col_name, drop=False) - df = df.truncate(before=start_ts, after=end_ts) - end_time = time.time() - _LOG.info( - "`pd.DataFrame.truncate` trim (sorted column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_truncate_sorted_idx(self) -> None: - """ - Trim using `pd.DataFrame.truncate`; filtering on a sorted index. - """ - set_as_index = True - sort = True - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - df = df.swaplevel() - # Run. - start_time = time.time() - df = df.truncate(before=start_ts, after=end_ts) - end_time = time.time() - _LOG.info( - "`pd.DataFrame.truncate` trim (sorted index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_searchsorted_non_sorted_col(self) -> None: - """ - Trim using `pd.Series.searchsorted`; filtering on a non-sorted column. - """ - set_as_index = False - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df.sort_values(ts_col_name, ascending=True) - left_idx = df[ts_col_name].searchsorted(start_ts, side="left") - right_idx = df[ts_col_name].searchsorted(end_ts, side="right") - df = df.iloc[left_idx:right_idx] - end_time = time.time() - _LOG.info( - "`pd.Series.searchsorted` trim (non-sorted column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_searchsorted_non_sorted_idx(self) -> None: - """ - Trim using `pd.Series.searchsorted`; filtering on a non-sorted index. - """ - set_as_index = True - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df.sort_index(level=ts_col_name) - left_idx = df.index.get_level_values(ts_col_name).searchsorted( - start_ts, side="left" - ) - right_idx = df.index.get_level_values(ts_col_name).searchsorted( - end_ts, side="right" - ) - df = df.iloc[left_idx:right_idx] - end_time = time.time() - _LOG.info( - "`pd.Series.searchsorted` trim (non-sorted index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_searchsorted_sorted_col(self) -> None: - """ - Trim using `pd.Series.searchsorted`; filtering on a sorted column. - """ - set_as_index = False - sort = True - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - left_idx = df[ts_col_name].searchsorted(start_ts, side="left") - right_idx = df[ts_col_name].searchsorted(end_ts, side="right") - df = df.iloc[left_idx:right_idx] - end_time = time.time() - _LOG.info( - "`pd.Series.searchsorted` trim (sorted column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_searchsorted_sorted_idx(self) -> None: - """ - Trim using `pd.Series.searchsorted`; filtering on a sorted index. - """ - set_as_index = True - sort = True - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - left_idx = df.index.get_level_values(ts_col_name).searchsorted( - start_ts, side="left" - ) - right_idx = df.index.get_level_values(ts_col_name).searchsorted( - end_ts, side="right" - ) - df = df.iloc[left_idx:right_idx] - end_time = time.time() - _LOG.info( - "`pd.Series.searchsorted` trim (sorted index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - -# ############################################################################# -# Test_assemble_df_rows -# ############################################################################# - - -class Test_assemble_df_rows(hunitest.TestCase): - """ - Test assembing df values into a column-row structure. - """ - - @staticmethod - def get_rows_values_example(df_as_str: str) -> hpantran.RowsValues: - """ - Prepare the input. - """ - # Separate the rows. - rows = df_as_str.split("\n") - # Clean up extra spaces. - rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] - # Identify individual values in the rows. - rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) - return rows_values - - def test1(self) -> None: - """ - Test unnamed index, compact df. - """ - # Get the input. - df_as_str = """ - col1 col2 col3 col4 - 0 0.1 0.1 0.1 0.1 - 1 0.2 0.2 0.2 0.2""" - rows_values = self.get_rows_values_example(df_as_str) - # Run. - actual = hpantran._assemble_df_rows(rows_values) - # Check. - expected = [ - ["", "col1", "col2", "col3", "col4"], - ["0", "0.1", "0.1", "0.1", "0.1"], - ["1", "0.2", "0.2", "0.2", "0.2"], - ] - self.assertListEqual(actual, expected) - - def test2(self) -> None: - """ - Test unnamed index, large df. - """ - # Get the input. - df_as_str = """ - column_with_a_very_long_name_1 column_with_a_very_long_name_2 column_with_a_very_long_name_3 column_with_a_very_long_name_4 column_with_a_very_long_name_5 - 0 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 - 1 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789""" - rows_values = self.get_rows_values_example(df_as_str) - # Run. - actual = hpantran._assemble_df_rows(rows_values) - # Check. - expected = [ - [ - "", - "column_with_a_very_long_name_1", - "column_with_a_very_long_name_2", - "column_with_a_very_long_name_3", - "column_with_a_very_long_name_4", - "column_with_a_very_long_name_5", - ], - [ - "0", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - ], - [ - "1", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - ], - ] - self.assertListEqual(actual, expected) - - def test3(self) -> None: - """ - Test named index, compact df. - """ - # Get the input. - df_as_str = """ - col1 col2 col3 col4 - idx - 0 0.1 0.1 0.1 0.1 - 1 0.2 0.2 0.2 0.2""" - rows_values = self.get_rows_values_example(df_as_str) - # Run. - actual = hpantran._assemble_df_rows(rows_values) - # Check. - expected = [ - ["idx", "col1", "col2", "col3", "col4"], - ["0", "0.1", "0.1", "0.1", "0.1"], - ["1", "0.2", "0.2", "0.2", "0.2"], - ] - self.assertListEqual(actual, expected) - - def test4(self) -> None: - """ - Test named index, large df. - """ - # Get the input. - df_as_str = """ - column_with_a_very_long_name_1 column_with_a_very_long_name_2 column_with_a_very_long_name_3 column_with_a_very_long_name_4 column_with_a_very_long_name_5 - idx - 0 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 - 1 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789""" - rows_values = self.get_rows_values_example(df_as_str) - # Run. - actual = hpantran._assemble_df_rows(rows_values) - # Check. - expected = [ - [ - "idx", - "column_with_a_very_long_name_1", - "column_with_a_very_long_name_2", - "column_with_a_very_long_name_3", - "column_with_a_very_long_name_4", - "column_with_a_very_long_name_5", - ], - [ - "0", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - ], - [ - "1", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - ], - ] - self.assertListEqual(actual, expected) - - -# ############################################################################# -# Test_str_to_df -# ############################################################################# - - -class Test_str_to_df(hunitest.TestCase): - """ - Test converting a string representation of a dataframe into a Pandas df. - """ - - def test1(self) -> None: - # Prepare input. - df_as_str = """ - col1 col2 col3 col4 - 0 0.1 a None 2020-01-01 - 1 0.2 "b c" None 2021-05-05""" - col_to_type = { - "__index__": int, - "col1": float, - "col2": str, - "col3": None, - "col4": pd.Timestamp, - } - col_to_name_type: Dict[str, type] = {} - # Run. - actual = hpandas.str_to_df(df_as_str, col_to_type, col_to_name_type) - # Check. - expected = pd.DataFrame( - { - "col1": [0.1, 0.2], - "col2": ["a", "b c"], - "col3": [None, None], - "col4": [ - pd.Timestamp("2020-01-01"), - pd.Timestamp("2021-05-05"), - ], - }, - index=[0, 1], - ) - hunitest.compare_df(actual, expected) - - def test2(self) -> None: - """ - Run a full circle check. - - The df used for testing: - - 1 2 - end_timestamp - 2023-08-15 0.21 1.7 - 2023-08-16 0.22 1.8 - 2023-08-17 0.23 1.9 - """ - # Create a df from the data. - data = { - 1: [0.21, 0.22, 0.23], - 2: [1.7, 1.8, 1.9], - } - timestamps = [ - pd.Timestamp("2023-08-15"), - pd.Timestamp("2023-08-16"), - pd.Timestamp("2023-08-17"), - ] - expected = pd.DataFrame(data, index=timestamps) - expected.index.name = "end_timestamp" - # Convert the df into a string. - df_as_str = hpandas.df_to_str(expected) - # Convert the resulting string back into a df. - col_to_type = { - "__index__": pd.Timestamp, - "1": float, - "2": float, - } - col_to_name_type = { - "1": int, - "2": int, - } - actual = hpandas.str_to_df(df_as_str, col_to_type, col_to_name_type) - # Check that the initial df and the final df are the same. - hunitest.compare_df(actual, expected) - - -# ############################################################################# -# TestFindGapsInDataframes -# ############################################################################# - - -class TestFindGapsInDataframes(hunitest.TestCase): - def test_find_gaps_in_dataframes(self) -> None: - """ - Verify that gaps are caught. - """ - # Prepare inputs. - test_data = pd.DataFrame( - data={ - "dummy_value_1": [1, 2, 3], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [0, 0, 0], - } - ) - # Run. - missing_data = hpandas.find_gaps_in_dataframes( - test_data.head(2), test_data.tail(2) - ) - # Check output. - actual = pd.concat(missing_data) - actual = hpandas.df_to_str(actual) - expected = r""" dummy_value_1 dummy_value_2 dummy_value_3 - 2 3 C 0 - 0 1 A 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# TestSubsetDf1 -# ############################################################################# - - -class TestSubsetDf1(hunitest.TestCase): - def test1(self) -> None: - # Generate some random data. - np.random.seed(42) - df = pd.DataFrame( - np.random.randint(0, 100, size=(20, 4)), columns=list("ABCD") - ) - # Subset. - df2 = hpandas.subset_df(df, nrows=5, seed=43) - # Check. - actual = hpandas.df_to_str(df2) - expected = r""" - A B C D - 0 51 92 14 71 - 1 60 20 82 86 - 3 23 2 21 52 - ... - 17 80 35 49 3 - 18 1 5 53 3 - 19 53 92 62 17 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# TestCheckAndFilterMatchingColumns -# ############################################################################# - - -class TestCheckAndFilterMatchingColumns(hunitest.TestCase): - """ - Test that matching columns are filtered correctly. - """ - - @staticmethod - def get_test_data() -> pd.DataFrame: - df = pd.DataFrame( - data=[[3, 4, 5]] * 3, - columns=["col1", "col2", "col3"], - ) - return df - - def test_check_and_filter_matching_columns1(self) -> None: - """ - - required columns = received columns - - `filter_data_mode` = "assert" - """ - df = self.get_test_data() - columns = ["col1", "col2", "col3"] - filter_data_mode = "assert" - df = hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - actual_columns = df.columns.to_list() - self.assert_equal(str(actual_columns), str(columns)) - - def test_check_and_filter_matching_columns2(self) -> None: - """ - - received columns contain some columns apart from required ones - - `filter_data_mode` = "assert" - """ - df = self.get_test_data() - columns = ["col1", "col3"] - filter_data_mode = "assert" - with self.assertRaises(AssertionError): - hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - - def test_check_and_filter_matching_columns3(self) -> None: - """ - - received columns do not contain some of required columns - - `filter_data_mode` = "assert" - """ - df = self.get_test_data() - columns = ["col1", "col4"] - filter_data_mode = "assert" - with self.assertRaises(AssertionError): - hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - - def test_check_and_filter_matching_columns4(self) -> None: - """ - - received columns contain some columns apart from required ones - - `filter_data_mode` = "warn_and_trim" - """ - df = self.get_test_data() - columns = ["col1", "col3"] - filter_data_mode = "warn_and_trim" - df = hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - actual_columns = df.columns.to_list() - self.assert_equal(str(actual_columns), str(columns)) - - def test_check_and_filter_matching_columns5(self) -> None: - """ - - received columns do not contain some of required columns - - `filter_data_mode` = "warn_and_trim" - """ - df = self.get_test_data() - columns = ["col1", "col2", "col4"] - filter_data_mode = "warn_and_trim" - df = hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - actual_columns = df.columns.to_list() - expected_columns = ["col1", "col2"] - self.assert_equal(str(actual_columns), str(expected_columns)) - - -# ############################################################################# - - -# ############################################################################# -# Test_merge_dfs1 -# ############################################################################# - - -class Test_merge_dfs1(hunitest.TestCase): - """ - Test that 2 dataframes are merged correctly. - """ - - @staticmethod - def get_dataframe(data: Dict, index: List[int]) -> pd.DataFrame: - df = pd.DataFrame.from_dict(data) - index = pd.Index(index) - df = df.set_index(index, drop=True) - return df - - def test1(self) -> None: - """ - Overlap of `threshold_col` values is 100%. - """ - # Create test data. - data1 = { - "col1": [1, 10, 100], - "col2": [2, np.nan, 200], - "col3": [3, 30, 300], - "threshold_col": [7, 70, 700], - } - index1 = [1, 2, 3] - df1 = self.get_dataframe(data1, index1) - # - data2 = { - "col3": [3, 30, 300], - "col4": [4, 40, 400], - "col5": [5, np.nan, 500], - "threshold_col": [7, 70, 700], - } - index2 = [3, 4, 5] - df2 = self.get_dataframe(data2, index2) - # - threshold_col_name = "threshold_col" - cols_to_merge_on = ["col3", "threshold_col"] - merged_df = hpandas.merge_dfs( - df1, - df2, - threshold_col_name, - how="outer", - on=cols_to_merge_on, - ) - # Set expected values. - expected_length = 3 - expected_column_names = [ - "col1", - "col2", - "col3", - "col4", - "col5", - "threshold_col", - ] - expected_column_unique_values = None - expected_signature = r""" - # df= - index=[0, 2] - columns=col1,col2,col3,threshold_col,col4,col5 - shape=(3, 6) - col1 col2 col3 threshold_col col4 col5 - 0 1 2.0 3 7 4 5.0 - 1 10 NaN 30 70 40 NaN - 2 100 200.0 300 700 400 500.0 - """ - # Check. - self.check_df_output( - merged_df, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test2(self) -> None: - """ - Overlap of `threshold_col` values is below the threshold. - """ - # Create test data. - data1 = { - "col1": [1, 10, 100], - "col2": [2, np.nan, 200], - "col3": [3, 30, 300], - "threshold_col": [7, 70, 700], - } - index1 = [1, 2, 3] - df1 = self.get_dataframe(data1, index1) - # - data2 = { - "col3": [3, 30, 300], - "col4": [4, 40, 400], - "col5": [5, np.nan, 500], - "threshold_col": [7, 60, 600], - } - index2 = [3, 4, 5] - df2 = self.get_dataframe(data2, index2) - # - threshold_col_name = "threshold_col" - cols_to_merge_on = ["col3", "threshold_col"] - # Check. - with self.assertRaises(AssertionError): - hpandas.merge_dfs( - df1, - df2, - threshold_col_name, - how="outer", - on=cols_to_merge_on, - ) - - def test3(self) -> None: - """ - Overlap of `threshold_col` values is above the threshold. - """ - # Create test data. - data1 = { - "col1": [1, 3, 5, 7, 10, 100, 100, 100, 100, 10, 10], - "col2": [2, 4, 6, 8, np.nan, 200, 200, np.nan, 10, 10, 100], - "col3": [1, 2, 3, 4, 30, 300, 300, np.nan, 300, 300, 30], - "threshold_col": [0, 1, 3, 5, 7, 9, 11, 13, 15, 70, 700], - } - index1 = range(0, 11) - df1 = self.get_dataframe(data1, index1) - # - data2 = { - "col3": [3, 30, 300, 1, 2, 3, 4, 30, 300, 300, np.nan], - "col4": [4, 40, 400, 2, 4, 6, 8, 11, 13, 15, 70], - "col5": [5, np.nan, 500, 5, 7, 10, 1, 2, 3, 4, 30], - "threshold_col": [1, 2, 3, 5, 7, 9, 11, 13, 15, 70, 700], - } - index2 = range(9, 20) - df2 = self.get_dataframe(data2, index2) - # - threshold_col_name = "threshold_col" - cols_to_merge_on = ["col3", "threshold_col"] - merged_df = hpandas.merge_dfs( - df1, - df2, - threshold_col_name, - how="outer", - on=cols_to_merge_on, - ) - # Set expected values. - expected_length = 20 - expected_column_names = [ - "col1", - "col2", - "col3", - "col4", - "col5", - "threshold_col", - ] - expected_column_unique_values = None - # This is required by `pandas` >= 2.2. - expected_signature = r""" - # df= - index=[0, 19] - columns=col1,col2,col3,threshold_col,col4,col5 - shape=(20, 6) - col1 col2 col3 threshold_col col4 col5 - 0 1.0 2.0 1.0 0 NaN NaN - 1 NaN NaN 1.0 5 2.0 5.0 - 2 3.0 4.0 2.0 1 NaN NaN - ... - 17 10.0 10.0 300.0 70 15.0 4.0 - 18 100.0 NaN NaN 13 NaN NaN - 19 NaN NaN NaN 700 70.0 30.0 - """ - # Check. - self.check_df_output( - merged_df, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test4(self) -> None: - """ - There are common columns (besides columns to merge on) in dataframes. - """ - # Create test data. - data1 = { - "col1": [1, 10, 100], - "col5": [2, np.nan, 200], - "col3": [3, 30, 300], - "threshold_col": [7, 70, 700], - } - index1 = [1, 2, 3] - df1 = self.get_dataframe(data1, index1) - # - data2 = { - "col3": [3, 30, 300], - "col4": [4, 40, 400], - "col5": [5, np.nan, 500], - "threshold_col": [7, 70, 700], - } - index2 = [3, 4, 5] - df2 = self.get_dataframe(data2, index2) - # - threshold_col_name = "threshold_col" - cols_to_merge_on = ["col3", "threshold_col"] - # Check. - with self.assertRaises(AssertionError): - hpandas.merge_dfs( - df1, - df2, - threshold_col_name, - how="outer", - on=cols_to_merge_on, - ) - - -# ############################################################################# -# Test_apply_index_mode -# ############################################################################# - - -class Test_apply_index_mode(hunitest.TestCase): - @staticmethod - def get_test_data() -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Generate toy dataframes for the test. - """ - # Define common columns. - columns = ["A", "B"] - # Build dataframes with intersecting indices. - idx1 = [0, 1, 2, 3, 4] - data1 = [ - [0.21, 0.44], - [0.11, 0.42], - [1.99, 0.8], - [3.1, 0.91], - [3.5, 1.4], - ] - df1 = pd.DataFrame(data1, columns=columns, index=idx1) - # - idx2 = [0, 6, 2, 3, 5] - data1 = [ - [0.1, 0.4], - [0.11, 0.2], - [1.29, 0.38], - [0.1, 0.9], - [3.3, 2.4], - ] - df2 = pd.DataFrame(data1, columns=columns, index=idx2) - return df1, df2 - - def test1(self) -> None: - """ - Check that returned dataframes have indices that are equal to the - common index. - - - `mode="intersect"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - # Use an index intersection to transform dataframes. - mode = "intersect" - df1_out, df2_out = hpandas.apply_index_mode(df1_in, df2_in, mode) - # Check that indices are common. - common_index = df1_in.index.intersection(df2_in.index) - common_index = hpandas.df_to_str(common_index) - idx1 = hpandas.df_to_str(df1_out.index) - idx2 = hpandas.df_to_str(df2_out.index) - self.assert_equal(idx1, common_index) - self.assert_equal(idx2, common_index) - - def test2(self) -> None: - """ - Check that dataframe indices did not change after applying an index - mode. - - - `mode="leave_unchanged"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - mode = "leave_unchanged" - df1_out, df2_out = hpandas.apply_index_mode(df1_in, df2_in, mode) - # Check that indices are as-is. - df1_in_idx = hpandas.df_to_str(df1_in.index) - df1_out_idx = hpandas.df_to_str(df1_out.index) - self.assert_equal(df1_in_idx, df1_out_idx) - # - df2_in_idx = hpandas.df_to_str(df2_in.index) - df2_out_idx = hpandas.df_to_str(df2_out.index) - self.assert_equal(df2_in_idx, df2_out_idx) - - def test3(self) -> None: - """ - Check that an assertion is raised when indices are not equal. - - - `mode="assert_equal"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - mode = "assert_equal" - # Check that both indices are equal, assert otherwise. - with self.assertRaises(AssertionError) as cm: - hpandas.apply_index_mode(df1_in, df2_in, mode) - actual = str(cm.exception) - # Check the error exception message. - self.check_string(actual) - - -# ############################################################################# -# Test_apply_column_mode -# ############################################################################# - - -class Test_apply_column_mode(hunitest.TestCase): - """ - Test that function applies column modes correctly. - """ - - @staticmethod - def get_test_data() -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Generate toy dataframes for the test. - """ - # Build dataframes with intersecting columns. - columns_1 = ["A", "B"] - data1 = [ - [0.21, 0.44], - [0.11, 0.42], - [1.99, 0.8], - [3.1, 0.91], - [3.5, 1.4], - ] - df1 = pd.DataFrame(data1, columns=columns_1) - # - columns_2 = ["A", "C"] - data2 = [ - [0.1, 0.4], - [0.11, 0.2], - [1.29, 0.38], - [0.1, 0.9], - [3.3, 2.4], - ] - df2 = pd.DataFrame(data2, columns=columns_2) - return df1, df2 - - def test1(self) -> None: - """ - Check that returned dataframes have columns that are equal to the - common ones. - - - `mode="intersect"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - # Use a column intersection mode to transform dataframes. - mode = "intersect" - df1_out, df2_out = hpandas.apply_columns_mode(df1_in, df2_in, mode) - # Check that dfs have equal column names. - common_columns = df1_in.columns.intersection(df2_in.columns) - common_columns = hpandas.df_to_str(common_columns) - columns1 = hpandas.df_to_str(df1_out.columns) - self.assert_equal(columns1, common_columns) - # - columns2 = hpandas.df_to_str(df2_out.columns) - self.assert_equal(columns2, common_columns) - - def test2(self) -> None: - """ - Check that dataframes' columns did not change after applying a column - mode. - - - `mode="leave_unchanged"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - mode = "leave_unchanged" - df1_out, df2_out = hpandas.apply_columns_mode(df1_in, df2_in, mode) - # Check that columns are as-is. - df1_in_columns = hpandas.df_to_str(df1_in.columns) - df1_out_columns = hpandas.df_to_str(df1_out.columns) - self.assert_equal(df1_in_columns, df1_out_columns) - # - df2_in_columns = hpandas.df_to_str(df2_in.columns) - df2_out_columns = hpandas.df_to_str(df2_out.columns) - self.assert_equal(df2_in_columns, df2_out_columns) - - def test3(self) -> None: - """ - Check that an assertion is raised when columns are not equal. - - - `mode="assert_equal"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - mode = "assert_equal" - # Check that both dataframes columns are equal, assert otherwise. - with self.assertRaises(AssertionError) as cm: - hpandas.apply_columns_mode(df1_in, df2_in, mode) - actual = str(cm.exception) - # Compare the actual outcome with an expected one. - self.check_string(actual) - - -# ############################################################################# - - -# ############################################################################# -# Test_get_df_from_iterator -# ############################################################################# - - -class Test_get_df_from_iterator(hunitest.TestCase): - def test1(self) -> None: - """ - Check that a dataframe is correctly built from an iterator of - dataframes. - """ - # Build iterator of dataframes for the test. - data1 = { - "num_col": [1, 2], - "str_col": ["A", "B"], - } - df1 = pd.DataFrame(data=data1) - data2 = { - "num_col": [3, 4], - "str_col": ["C", "D"], - } - df2 = pd.DataFrame(data=data2) - data3 = { - "num_col": [5, 6], - "str_col": ["E", "F"], - } - df3 = pd.DataFrame(data=data3) - # Run. - iter_ = iter([df1, df2, df3]) - df = hpandas.get_df_from_iterator(iter_) - actual_signature = hpandas.df_to_str(df) - expected_signature = """ num_col str_col - 0 1 A - 0 3 C - 0 5 E - 1 2 B - 1 4 D - 1 6 F - """ - self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) - - -# ############################################################################# -# TestFilterByTime -# ############################################################################# - - -class TestFilterByTime(hunitest.TestCase): - @staticmethod - def _get_test_data() -> pd.DataFrame: - """ - Get data for testing. - - :return: data for testing - """ - df = pd.DataFrame( - { - "col1": [1, 2, 3, 4], - "col2": [ - hdateti.to_datetime("2018-04-05"), - hdateti.to_datetime("2018-04-06"), - hdateti.to_datetime("2018-04-07"), - hdateti.to_datetime("2018-04-08"), - ], - } - ) - df.index = pd.date_range("2017-01-01", periods=4) - return df - - def test_filter_by_index1(self) -> None: - """ - Verify that `[lower_bound, upper_bound)` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2017-01-02") - upper_bound = hdateti.to_datetime("2017-01-04") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="left", - ts_col_name=None, - ) - expected = df[1:3] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_index2(self) -> None: - """ - Verify that `(lower_bound, upper_bound]` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2017-01-02") - upper_bound = hdateti.to_datetime("2017-01-04") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="right", - ts_col_name=None, - ) - expected = df[2:4] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_index3(self) -> None: - """ - Verify that `[lower_bound, upper_bound]` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2017-01-02") - upper_bound = hdateti.to_datetime("2017-01-04") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="both", - ts_col_name=None, - ) - expected = df[1:4] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_index4(self) -> None: - """ - Verify that `(lower_bound, upper_bound)` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2017-01-02") - upper_bound = hdateti.to_datetime("2017-01-04") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="neither", - ts_col_name=None, - ) - expected = df[2:3] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_column1(self) -> None: - """ - Verify that `[lower_bound, upper_bound)` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2018-04-06") - upper_bound = hdateti.to_datetime("2018-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="left", - ts_col_name="col2", - ) - expected = df[1:3] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_column2(self) -> None: - """ - Verify that `(lower_bound, upper_bound]` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2018-04-06") - upper_bound = hdateti.to_datetime("2018-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="right", - ts_col_name="col2", - ) - expected = df[2:4] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_column3(self) -> None: - """ - Verify that `[lower_bound, upper_bound]` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2018-04-06") - upper_bound = hdateti.to_datetime("2018-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="both", - ts_col_name="col2", - ) - expected = df[1:4] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_column4(self) -> None: - """ - Verify that `(lower_bound, upper_bound)` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2018-04-06") - upper_bound = hdateti.to_datetime("2018-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="neither", - ts_col_name="col2", - ) - expected = df[2:3] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_no_intersection(self) -> None: - """ - Verify that if time interval is not covered by data then empty - DataFrame is returned. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2021-04-06") - upper_bound = hdateti.to_datetime("2021-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="both", - ts_col_name=None, - ) - self.assertEqual(actual.shape[0], 0) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py deleted file mode 100644 index 67eddb250..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py +++ /dev/null @@ -1,251 +0,0 @@ -import logging - -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_df_to_str -# ############################################################################# - - -class Test_df_to_str(hunitest.TestCase): - @staticmethod - def get_test_data() -> pd.DataFrame: - test_data = { - "dummy_value_1": [1, 2, 3], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [0, 0, 0], - } - df = pd.DataFrame(data=test_data) - return df - - def test_df_to_str1(self) -> None: - """ - Test common call to `df_to_str` with basic df. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df) - expected = r""" - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str2(self) -> None: - """ - Test common call to `df_to_str` with tag. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df, tag="df") - expected = r"""# df= - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str3(self) -> None: - """ - Test common call to `df_to_str` with print_shape_info. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df, print_shape_info=True) - expected = r""" - index=[0, 2] - columns=dummy_value_1,dummy_value_2,dummy_value_3 - shape=(3, 3) - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str4(self) -> None: - """ - Test common call to `df_to_str` with print_dtypes. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df, print_dtypes=True) - expected = r""" - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 3 / 3 = 100.00% 0 / 3 = 0.00% 0 - 1 dummy_value_1 int64 3 / 3 = 100.00% 0 / 3 = 0.00% 1 - 2 dummy_value_2 object 3 / 3 = 100.00% 0 / 3 = 0.00% A - 3 dummy_value_3 int64 1 / 3 = 33.33% 0 / 3 = 0.00% 0 - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str5(self) -> None: - """ - Test common call to `df_to_str` with multiple args. - """ - df = self.get_test_data() - actual = hpandas.df_to_str( - df, print_shape_info=True, print_dtypes=True, tag="df" - ) - expected = r""" - # df= - index=[0, 2] - columns=dummy_value_1,dummy_value_2,dummy_value_3 - shape=(3, 3) - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 3 / 3 = 100.00% 0 / 3 = 0.00% 0 - 1 dummy_value_1 int64 3 / 3 = 100.00% 0 / 3 = 0.00% 1 - 2 dummy_value_2 object 3 / 3 = 100.00% 0 / 3 = 0.00% A - 3 dummy_value_3 int64 1 / 3 = 33.33% 0 / 3 = 0.00% 0 - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str6(self) -> None: - """ - Test common call to `df_to_str` with `pd.Series`. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df["dummy_value_2"]) - expected = r""" - dummy_value_2 - 0 A - 1 B - 2 C - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str7(self) -> None: - """ - Test common call to `df_to_str` with `pd.Index`. - """ - df = self.get_test_data() - index = df.index - index.name = "index_name" - actual = hpandas.df_to_str(index) - expected = r""" - index_name - 0 0 - 1 1 - 2 2 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str8(self) -> None: - """ - Test that `-0.0` is replaced with `0.0`. - """ - test_data = { - "dummy_value_1": [1, 2, 3, 4], - "dummy_value_2": ["A", "B", "C", "D"], - "dummy_value_3": [0, 0, 0, 0], - "dummy_value_4": [+0.0, -0.0, +0.0, -0.0], - } - df = pd.DataFrame(data=test_data) - actual = hpandas.df_to_str(df, handle_signed_zeros=True) - expected = r""" - dummy_value_1 dummy_value_2 dummy_value_3 dummy_value_4 - 0 1 A 0 0.0 - 1 2 B 0 0.0 - 2 3 C 0 0.0 - 3 4 D 0 0.0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str9(self) -> None: - """ - Test that `-0.0` is replaced with `0.0` in a multi-index dataframe. - """ - test_data = { - ("A", "X"): [-0.0, 5.0, -0.0], - ("A", "Y"): [2, 6, 0], - ("B", "X"): [0, 7, 3], - ("B", "Y"): [4.4, -0.0, 5.1], - } - df = pd.DataFrame(data=test_data) - actual = hpandas.df_to_str(df, handle_signed_zeros=True) - expected = r""" - A B - X Y X Y - 0 0.0 2 0 4.4 - 1 5.0 6 7 0.0 - 2 0.0 0 3 5.1""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str10(self) -> None: - """ - Test common call to `df_to_str` with `print_memory_usage = True`. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df, print_memory_usage=True) - # This is required by `numpy` >= 2.1.0 - expected = r""" - * memory= - shallow deep - Index 132.0 b 132.0 b - dummy_value_1 24.0 b 24.0 b - dummy_value_2 24.0 b 150.0 b - dummy_value_3 24.0 b 24.0 b - total 204.0 b 330.0 b - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_head -# ############################################################################# - - -class Test_head(hunitest.TestCase): - def test1(self) -> None: - """ - Test basic head functionality without seed. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": [1, 2, 3, 4, 5], - "col2": ["a", "b", "c", "d", "e"], - } - ) - hpandas.head(df, num_rows=2) - - def test2(self) -> None: - """ - Test head with a seed for reproducible sampling. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": list(range(10)), - "col2": list("abcdefghij"), - } - ) - hpandas.head(df, seed=42, num_rows=3) - - def test3(self) -> None: - """ - Test head with different num_rows parameter. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": list(range(5)), - "col2": list("abcde"), - } - ) - hpandas.head(df, num_rows=4) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py deleted file mode 100644 index a1be56d40..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py +++ /dev/null @@ -1,1468 +0,0 @@ -import datetime -import logging -import os -import random -from typing import Any, List, Optional, Tuple - -import pandas as pd -import pyarrow -import pyarrow.parquet as parquet -import pytest - -import helpers.hdbg as hdbg -import helpers.hmoto as hmoto -import helpers.hpandas as hpandas -import helpers.hparquet as hparque -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# Most of these unit tests are taken from -# `amp/helpers/notebooks/gallery_parquet.ipynb` - - -def _get_df(date: datetime.date, seed: int = 42) -> pd.DataFrame: - """ - Create pandas random data, like: - - ``` - idx instr val1 val2 - 2000-01-01 0 A 99 30 - 2000-01-02 0 A 54 46 - 2000-01-03 0 A 85 86 - ``` - """ - instruments = "A B C D E".split() - date = pd.Timestamp(date, tz="America/New_York") - start_date = date.replace(hour=9, minute=30) - end_date = date.replace(hour=16, minute=0) - df_idx = pd.date_range(start_date, end_date, freq="5T") - _LOG.debug("df_idx=[%s, %s]", min(df_idx), max(df_idx)) - _LOG.debug("len(df_idx)=%s", len(df_idx)) - random.seed(seed) - # For each instruments generate random data. - df = [] - for idx, inst in enumerate(instruments): - df_tmp = pd.DataFrame( - { - "idx": idx, - "instr": inst, - "val1": [random.randint(0, 100) for _ in range(len(df_idx))], - "val2": [random.randint(0, 100) for _ in range(len(df_idx))], - }, - index=df_idx, - ) - df.append(df_tmp) - # Create a single df for all the instruments. - df = pd.concat(df) - return df - - -def _get_test_df_with_timestamps() -> pd.DataFrame: - """ - Create a DataFrame with timestamps. - """ - timestamp = pd.Timestamp("2022-01-01 00:00:00.123456", tz="America/New_York") - index = [timestamp for _ in range(6)] - df = pd.DataFrame( - { - "n_legs": [2, 2, 4, 4, 5, 100], - "animal": [ - "Flamingo", - "Parrot", - "Dog", - "Horse", - "Brittle stars", - "Centipede", - ], - "year": [2001, 2002, 2001, 2003, 2003, 2001], - }, - index=index, - ) - knowledge_timestamp = pd.Timestamp.now(tz="UTC") - df["knowledge_timestamp"] = knowledge_timestamp - return df - - -def _get_df_example1() -> pd.DataFrame: - date = datetime.date(2020, 1, 1) - df = _get_df(date) - _LOG.debug("df=\n%s", df.head(3)) - return df - - -def _compare_dfs(self: Any, df1: pd.DataFrame, df2: pd.DataFrame) -> str: - df1_as_str: str = hpandas.df_to_str(df1, print_shape_info=True, tag="") - df2_as_str = hpandas.df_to_str(df2, print_shape_info=True, tag="") - self.assert_equal(df1_as_str, df2_as_str, fuzzy_match=True) - # When Parquet reads partitioned dataset can convert partitioning columns into - # categorical variables that can create false positives. - pd.testing.assert_frame_equal( - df1, df2, check_dtype=False, check_categorical=False - ) - return df1_as_str - - -# ############################################################################# - - -# ############################################################################# -# TestParquet1 -# ############################################################################# - - -class TestParquet1(hunitest.TestCase): - def test_get_df1(self) -> None: - """ - Check the output of `_get_df()`. - """ - # Prepare data. - df = _get_df_example1() - # Check. - actual = hpandas.df_to_str(df, print_shape_info=True, tag="df") - expected = r"""# df= - index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] - columns=idx,instr,val1,val2 - shape=(395, 4) - idx instr val1 val2 - 2020-01-01 09:30:00-05:00 0 A 81 35 - 2020-01-01 09:35:00-05:00 0 A 14 58 - 2020-01-01 09:40:00-05:00 0 A 3 81 - ... - 2020-01-01 15:50:00-05:00 4 E 57 3 - 2020-01-01 15:55:00-05:00 4 E 33 50 - 2020-01-01 16:00:00-05:00 4 E 96 75""" - self.assert_equal(actual, expected, fuzzy_match=True) - - # ////////////////////////////////////////////////////////////////////////////// - - def get_file_name(self) -> str: - dir_name = self.get_scratch_space() - file_name = os.path.join(dir_name, "df.parquet") - return file_name - - def write_data_as_parquet(self) -> Tuple[pd.DataFrame, str]: - # Prepare data. - df = _get_df_example1() - # Save data. - file_name = self.get_file_name() - hparque.to_parquet(df, file_name, log_level=logging.INFO) - return df, file_name - - def write_and_read_helper(self, columns: List[str]) -> None: - """ - - Save a dataframe as Parquet - - Read back certain columns of the data from the file - - Check that the df is what expected - """ - df, file_name = self.write_data_as_parquet() - # Read back one column of the data. - df2 = hparque.from_parquet( - file_name, columns=columns, log_level=logging.INFO - ) - _LOG.debug("df2=\n%s", df2.head(3)) - # Check. - df = df[columns] - _compare_dfs(self, df, df2) - - def test_write_and_read_everything1(self) -> None: - """ - Read all the columns from the file. - """ - df, file_name = self.write_data_as_parquet() - # Read data back. - df2 = hparque.from_parquet(file_name, log_level=logging.INFO) - _LOG.debug("df2=\n%s", df2.head(3)) - # Check. - _compare_dfs(self, df, df2) - - def test_write_and_read_one_column1(self) -> None: - """ - - Read back one column of the data from the file. - """ - # Read back one column of the data. - columns = ["val1"] - self.write_and_read_helper(columns) - - def test_write_and_read_two_columns1(self) -> None: - """ - Read back one column of the data from the file. - """ - # Read back two columns of the data. - columns = ["idx", "val1"] - self.write_and_read_helper(columns) - - # ////////////////////////////////////////////////////////////////////////////// - - def read_filtered_parquet( - self, file_name: str, filters: Any - ) -> pd.DataFrame: - filesystem = None - dataset = parquet.ParquetDataset( - file_name, - filesystem=filesystem, - filters=filters, - ) - columns = None - table = dataset.read(columns=columns) - df = table.to_pandas() - _LOG.debug("df=\n%s", df.head(3)) - return df - - def test_read_with_filter1(self) -> None: - """ - Read only a subset of the rows. - """ - _, file_name = self.write_data_as_parquet() - # Read. - filters = [] - filters.append([("idx", "=", 0)]) - df2 = self.read_filtered_parquet(file_name, filters) - # Check. - actual = hpandas.df_to_str(df2, print_shape_info=True, tag="df") - expected = r"""# df= - index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] - columns=idx,instr,val1,val2 - shape=(79, 4) - idx instr val1 val2 - 2020-01-01 09:30:00-05:00 0 A 81 35 - 2020-01-01 09:35:00-05:00 0 A 14 58 - 2020-01-01 09:40:00-05:00 0 A 3 81 - ... - 2020-01-01 15:50:00-05:00 0 A 29 76 - 2020-01-01 15:55:00-05:00 0 A 12 8 - 2020-01-01 16:00:00-05:00 0 A 48 49""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_write_and_read_partition_parquet_files_with_unit(self) -> None: - """ - Write the Pandas DataFrame to partitioned Parquet files and read it - back, verifying the retention of time unit information in the index. - """ - # Prepare test data. - dst_dir = os.path.join(self.get_scratch_space(), "tmp.partition_parquet") - initial_df = _get_test_df_with_timestamps() - initial_df.index = initial_df.index.as_unit("us") - partition_columns = initial_df.columns.tolist() - # The `to_partitioned_parquet` saves the given dataframe as Parquet - # files partitioned along the given columns. - hparque.to_partitioned_parquet(initial_df, partition_columns, dst_dir) - df_from_parquet_files = hparque.from_parquet(dst_dir) - # Check that the time unit is ns. - self.assert_equal("ns", df_from_parquet_files.index.unit) - # TODO(Vlad): Refactor after CmampTask7331 is resolved. - # self.assert_equal(initial_df.index.unit, df.index.unit) - - def test_write_and_read_parquet_file_with_unit(self) -> None: - """ - Write the provided DataFrame to Parquet file and read it back, - verifying the retention of time unit information in the index. - """ - test_parquet_file = os.path.join( - self.get_scratch_space(), "tmp_dummy.parquet" - ) - initial_df = _get_test_df_with_timestamps() - initial_df.index = initial_df.index.as_unit("us") - # The `to_parquet` function writes a DF to a single parquet file without - # any partition. - hparque.to_parquet(initial_df, test_parquet_file) - df = hparque.from_parquet(test_parquet_file) - self.assert_equal("ns", df.index.unit) - # TODO(Vlad): Refactor after CmampTask7331 is resolved. - # self.assert_equal(initial_df.index.unit, df.index.unit) - - @pytest.mark.skip(reason="TODO(Juraj): HelpersTask21.") - def test_save_read_concat_data(self) -> None: - """ - Verify that data produced by different version of Pandas preserves - types when reading/writing to/from Parquet. - """ - # Copy sample data that saved with the Pandas v.1.5.1 from S3 to the - # scratch dir. - s3_path = self.get_s3_input_dir() - local_path = self.get_scratch_space() - aws_profile = "ck" - hs3.copy_data_from_s3_to_local_dir(s3_path, local_path, aws_profile) - # Read sample data from the scratch dir. - sample_data = hparque.from_parquet(local_path) - # Generate artificial test data. - data = { - "timestamp": [1696896000000], - "open": [27578.4], - "high": [27584.3], - "low": [27571.2], - "close": [27571.3], - "volume": [154.933], - "exchange_id": ["binance"], - "knowledge_timestamp": [ - pd.Timestamp("2023-11-06 14:15:11.241716+0000", tz="UTC") - ], - } - index = pd.Series( - [pd.Timestamp("2023-10-10T00:00:00+00:00")], name="timestamp" - ) - test_data = pd.DataFrame(data, index=index) - # Concatenate sample and test data and save it to the scratch dir. - combined_test_data = pd.concat([sample_data, test_data]) - local_combined_file_path = os.path.join( - local_path, "combined_dummy.parquet" - ) - hparque.to_parquet(combined_test_data, local_combined_file_path) - # Read the data back from the scratch dir. - actual_df = hparque.from_parquet(local_combined_file_path) - # Check that the data types the same as in the sample data. - dtypes_sample = str(sample_data.dtypes) - dtypes_actual = str(actual_df.dtypes) - self.assert_equal(dtypes_sample, dtypes_actual, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# TestPartitionedParquet1 -# ############################################################################# - - -class TestPartitionedParquet1(hunitest.TestCase): - # From https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data - # A dataset can exploit a nested structure, where the sub-dir names hold - # information about which subset of the data is stored in that dir - # E.g., "Hive" partitioning scheme "key=vale" dir names - - def write_partitioned_dataset_and_check( - self, - df: pd.DataFrame, - partition_cols: List[str], - exp_dir_signature: Optional[str], - ) -> str: - """ - - Write df as a partitioned dataset - - (Optional) Check the signature of the directory - - :param partition_cols: columns used for - :param exp_dir_signature: expected signature of the written directory - :return path to the saved Parquet data - """ - _LOG.debug(hprint.to_str("partition_cols")) - # Prepare data. - dir_name = os.path.join(self.get_scratch_space(), "data.parquet") - table = pyarrow.Table.from_pandas(df) - # Write partitioned dataset. - parquet.write_to_dataset( - table, - dir_name, - partition_cols, - ) - # Check dir signature. - if exp_dir_signature is not None: - include_file_content = False - remove_dir_name = True - dir_signature = hunitest.get_dir_signature( - dir_name, include_file_content, remove_dir_name=remove_dir_name - ) - self.assert_equal( - dir_signature, - exp_dir_signature, - fuzzy_match=True, - purify_text=True, - ) - return dir_name - - def write_and_read_helper( - self, - df: pd.DataFrame, - partition_cols: List[str], - exp_dir_signature: Optional[str], - columns_to_read: Optional[List[str]], - ) -> str: - """ - - Write df as a partitioned dataset using `partitioned_cols` - - Read certain column back - - :param partition_cols: columns used for - :param exp_dir_signature: expected signature of the written directory - :return: read df as string - """ - _LOG.debug(hprint.to_str("partition_cols columns_to_read")) - # Write and check. - dir_name = self.write_partitioned_dataset_and_check( - df, partition_cols, exp_dir_signature - ) - # Read back certain columns. - df2 = hparque.from_parquet( - dir_name, columns=columns_to_read, log_level=logging.INFO - ) - # Compare. - if columns_to_read is not None: - df = df[columns_to_read] - # - hdbg.dassert_set_eq(df.columns, df2.columns) - df2 = df2[df.columns] - df_as_str = _compare_dfs(self, df, df2) - return df_as_str - - # ////////////////////////////////////////////////////////////////////////////// - - def test_write_and_read1(self) -> None: - """ - - Write a partitioned dataset with one partitioning column - - Read everything back - """ - df = _get_df_example1() - partition_cols = ["idx"] - exp_dir_signature = r""" - # Dir structure - . - idx=0 - idx=0/data.parquet - idx=1 - idx=1/data.parquet - idx=2 - idx=2/data.parquet - idx=3 - idx=3/data.parquet - idx=4 - idx=4/data.parquet""" - columns_to_read = None - self.write_and_read_helper( - df, partition_cols, exp_dir_signature, columns_to_read - ) - - def test_write_and_read2(self) -> None: - """ - - Write a partitioned dataset with two partitioning columns - - Read everything back - """ - df = _get_df_example1() - partition_cols = ["idx", "instr"] - exp_dir_signature = r"""# Dir structure - . - idx=0 - idx=0/instr=A - idx=0/instr=A/data.parquet - idx=1 - idx=1/instr=B - idx=1/instr=B/data.parquet - idx=2 - idx=2/instr=C - idx=2/instr=C/data.parquet - idx=3 - idx=3/instr=D - idx=3/instr=D/data.parquet - idx=4 - idx=4/instr=E - idx=4/instr=E/data.parquet""" - # Read back everything. - columns_to_read = None - self.write_and_read_helper( - df, partition_cols, exp_dir_signature, columns_to_read - ) - - def test_write_and_read3(self) -> None: - """ - - Write a partitioned dataset with one partitioning column - - Read two columns back - """ - df = _get_df_example1() - partition_cols = ["idx"] - exp_dir_signature = None - columns_to_read = ["idx", "instr"] - df_as_str = self.write_and_read_helper( - df, partition_cols, exp_dir_signature, columns_to_read - ) - expected = r"""# = - index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] - columns=idx,instr - shape=(395, 2) - idx instr - 2020-01-01 09:30:00-05:00 0 A - 2020-01-01 09:35:00-05:00 0 A - 2020-01-01 09:40:00-05:00 0 A - ... - 2020-01-01 15:50:00-05:00 4 E - 2020-01-01 15:55:00-05:00 4 E - 2020-01-01 16:00:00-05:00 4 E""" - self.assert_equal(df_as_str, expected, fuzzy_match=True) - - def test_write_and_read4(self) -> None: - """ - - Write a partitioned dataset with one partitioning column - - Read two columns back filtering by the one of the partitioned column - """ - df = _get_df_example1() - partition_cols = ["idx"] - exp_dir_signature = None - # Write and check. - dir_name = self.write_partitioned_dataset_and_check( - df, partition_cols, exp_dir_signature - ) - # Read back everything. - columns_to_read = ["idx", "instr"] - filters = [] - filters.append(("idx", "=", 0)) - # Note that `from_parquet` doesn't work with filters. - # df2 = hparque.from_parquet( - # dir_name, - # columns=columns_to_read, - # filters=filters, - # log_level=logging.INFO, - # ) - filesystem = None - dataset = parquet.ParquetDataset( - dir_name, - filesystem=filesystem, - filters=filters, - ) - table = dataset.read(columns=columns_to_read) - df2 = table.to_pandas() - # Compare. - df_as_str = hpandas.df_to_str(df2, print_shape_info=True, tag="df") - expected = r"""# df= - index=[0, 78] - columns=idx,instr - shape=(79, 2) - idx instr - 0 0 A - 1 0 A - 2 0 A - ... - 76 0 A - 77 0 A - 78 0 A""" - self.assert_equal(df_as_str, expected, fuzzy_match=True) - - # ////////////////////////////////////////////////////////////////////////////// - - def test_merge1(self) -> None: - """ - - Write a partitioned dataset in multiple chunks using the same partitioning - column - - Make sure that reading it back we get the original data. - """ - df = _get_df_example1() - # - partition_cols = ["idx"] - # Write the first chunk. - df_chunk1 = df[df["idx"].isin([0, 1])] - exp_dir_signature = """ - # Dir structure - . - idx=0 - idx=0/data.parquet - idx=1 - idx=1/data.parquet""" - # Write and check. - _ = self.write_partitioned_dataset_and_check( - df_chunk1, partition_cols, exp_dir_signature - ) - # Write the second chunk. - df_chunk2 = df[df["idx"].isin([2, 3, 4])] - exp_dir_signature = """ - # Dir structure - . - idx=0 - idx=0/data.parquet - idx=1 - idx=1/data.parquet - idx=2 - idx=2/data.parquet - idx=3 - idx=3/data.parquet - idx=4 - idx=4/data.parquet""" - # Write and check. - dir_name = self.write_partitioned_dataset_and_check( - df_chunk2, partition_cols, exp_dir_signature - ) - # Read everything. - columns_to_read = None - df2 = hparque.from_parquet( - dir_name, columns=columns_to_read, log_level=logging.INFO - ) - # Compare. - hdbg.dassert_set_eq(df.columns, df2.columns) - df2 = df2[df.columns] - df_as_str = _compare_dfs(self, df, df2) - expected = r""" - # = - index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] - columns=idx,instr,val1,val2 - shape=(395, 4) - idx instr val1 val2 - 2020-01-01 09:30:00-05:00 0 A 81 35 - 2020-01-01 09:35:00-05:00 0 A 14 58 - 2020-01-01 09:40:00-05:00 0 A 3 81 - ... - 2020-01-01 15:50:00-05:00 4 E 57 3 - 2020-01-01 15:55:00-05:00 4 E 33 50 - 2020-01-01 16:00:00-05:00 4 E 96 75""" - self.assert_equal(df_as_str, expected, fuzzy_match=True) - self.assert_equal(df_as_str, expected, fuzzy_match=True) - - def _run_write_and_read_mixed_units_partitioned_dataset( - self, first_unit: str, second_unit: str - ) -> None: - """ - Write two DataFrames with different time units to a partitioned Parquet - dataset and read it back. - - :param first_unit: time unit of the first DataFrame - :param second_unit: time unit of the second DataFrame - """ - initial_df = _get_test_df_with_timestamps() - partition_columns = ["n_legs", "animal", "year"] - dst_dir = os.path.join(self.get_scratch_space(), "tmp.pp_mixed_units") - # Write first DF as partitioned parquet. - first_df = initial_df.copy() - first_df.index = first_df.index.as_unit(first_unit) - first_df["knowledge_timestamp"] = first_df["knowledge_timestamp"].astype( - f"datetime64[{first_unit}, UTC]" - ) - hparque.to_partitioned_parquet(first_df, partition_columns, dst_dir) - # Write second DF as partitioned parquet. - second_df = initial_df.copy() - second_df.index = second_df.index.as_unit(second_unit) - second_df["knowledge_timestamp"] = second_df[ - "knowledge_timestamp" - ].astype(f"datetime64[{second_unit}, UTC]") - hparque.to_partitioned_parquet(second_df, partition_columns, dst_dir) - # Read it back. - _ = hparque.from_parquet(dst_dir) - - def test_write_and_read_mixed_units_partition_dataset_1(self) -> None: - """ - Write two DataFrames with different time units to a partitioned Parquet - dataset and read it back. - - The combination `ns` and `us` should not raise an error. - See CmampTask7331 for details. - """ - self._run_write_and_read_mixed_units_partitioned_dataset("ns", "us") - - @pytest.mark.skip( - reason="Since names and order the files is not guaranteed, the test is " - "flaky, decided to skip it for now.", - ) - def test_write_and_read_mixed_units_partition_dataset_2(self) -> None: - """ - Write two DataFrames with different time units to a partitioned Parquet - dataset and read it back. - - The combination `ms` and `us` should raise an error. - """ - with self.assertRaises(pyarrow.lib.ArrowInvalid): - self._run_write_and_read_mixed_units_partitioned_dataset("ms", "us") - - -# ############################################################################# - - -# ############################################################################# -# TestGetParquetFiltersFromTimestampInterval1 -# ############################################################################# - - -class TestGetParquetFiltersFromTimestampInterval1(hunitest.TestCase): - def test_no_interval(self) -> None: - """ - No timestamps provided. - """ - partition_mode = "by_year_month" - start_ts = None - end_ts = None - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - self.assertIsNone(filters) - - def test_by_month_half1(self) -> None: - """ - Test a left-bound interval [..., None]. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = None - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = r"[[('year', '==', 2020), ('month', '>=', 1)], [('year', '>', 2020)]]" - self.assert_equal(actual, expected) - - def test_by_month_half2(self) -> None: - """ - Test a right-bound interval [None, ...]. - """ - partition_mode = "by_year_month" - start_ts = None - end_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = r"[[('year', '==', 2020), ('month', '<=', 1)], [('year', '<', 2020)]]" - self.assert_equal(actual, expected) - - def test_by_month_one_year1(self) -> None: - """ - Test an interval contained in a whole year. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = pd.Timestamp("2020-12-02 09:31:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = ( - r"[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 12)]]" - ) - self.assert_equal(actual, expected) - - def test_by_month_one_year2(self) -> None: - """ - Test an interval contained in a whole year. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = pd.Timestamp("2020-01-02 09:32:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = ( - r"[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 1)]]" - ) - self.assert_equal(actual, expected) - - def test_by_month_invalid1(self) -> None: - """ - Test an invalid interval. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = pd.Timestamp("2020-01-02 09:30:00+00:00") - with self.assertRaises(AssertionError) as fail: - hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(fail.exception) - expected = r""" - * Failed assertion * - 2020-01-02 09:31:00+00:00 <= 2020-01-02 09:30:00+00:00 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_by_month_invalid2(self) -> None: - """ - Test an invalid partition mode. - """ - partition_mode = "new_mode" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = pd.Timestamp("2020-01-02 09:32:00+00:00") - with self.assertRaises(ValueError) as fail: - hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(fail.exception) - expected = r"Unknown partition mode `new_mode`!" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_by_month_two_years1(self) -> None: - """ - Test an interval spanning two years. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") - end_ts = pd.Timestamp("2021-12-02 09:31:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = ( - r"[[('year', '==', 2020), ('month', '>=', 6)], " - r"[('year', '==', 2021), ('month', '<=', 12)]]" - ) - self.assert_equal(actual, expected) - - def test_by_month_over_two_years1(self) -> None: - """ - Test an interval longer than two years. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = ( - r"[[('year', '==', 2020), ('month', '>=', 6)], " - r"[('year', '>', 2020), ('year', '<', 2022)], " - r"[('year', '==', 2022), ('month', '<=', 12)]]" - ) - self.assert_equal(actual, expected) - - def test_additional_filters1(self) -> None: - """ - No timestamps provided while a single additional filter is provided. - """ - partition_mode = "by_year_month" - start_ts = None - end_ts = None - additional_filters = [ - ( - "currency_pair", - "in", - ("BTC_USDT",), - ) - ] - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, - start_ts, - end_ts, - additional_filters=additional_filters, - ) - actual = str(filters) - expected = r"[('currency_pair', 'in', ('BTC_USDT',))]" - self.assert_equal(actual, expected) - - def test_additional_filters2(self) -> None: - """ - Test an interval with multiple additional filters. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") - additional_filters = [ - ("exchange_id", "in", ("binance")), - ("currency_pairs", "in", ("ADA_USDT", "BTC_USDT")), - ] - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, - start_ts, - end_ts, - additional_filters=additional_filters, - ) - actual = str(filters) - expected = ( - r"[[('exchange_id', 'in', 'binance'), " - r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " - r"('year', '==', 2020), ('month', '>=', 6)], " - r"[('exchange_id', 'in', 'binance'), " - r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " - r"('year', '>', 2020), ('year', '<', 2022)], " - r"[('exchange_id', 'in', 'binance'), " - r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " - r"('year', '==', 2022), ('month', '<=', 12)]]" - ) - self.assert_equal(actual, expected) - - -# ############################################################################# - - -# ############################################################################# -# TestAddDatePartitionColumns -# ############################################################################# - - -class TestAddDatePartitionColumns(hunitest.TestCase): - def add_date_partition_columns_helper( - self, partition_mode: str, expected: str - ) -> None: - # Prepare inputs. - test_data = { - "dummy_value": [1, 2, 3], - "dummy_timestamp": [1638646800000, 1638646860000, 1638646960000], - } - start_timestamp = "2021-12-04 19:40:00+00:00" - end_timestamp = "2021-12-04 19:42:00+00:00" - index = pd.date_range(start_timestamp, end_timestamp, freq="1T") - df = pd.DataFrame(index=index, data=test_data) - # Run. - hparque.add_date_partition_columns(df, partition_mode) - # Check output. - actual = hpandas.df_to_str(df) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_add_date_partition_columns1(self) -> None: - partition_mode = "by_date" - expected = r""" dummy_value dummy_timestamp date - 2021-12-04 19:40:00+00:00 1 1638646800000 20211204 - 2021-12-04 19:41:00+00:00 2 1638646860000 20211204 - 2021-12-04 19:42:00+00:00 3 1638646960000 20211204""" - self.add_date_partition_columns_helper(partition_mode, expected) - - def test_add_date_partition_columns2(self) -> None: - partition_mode = "by_year" - expected = r""" dummy_value dummy_timestamp year - 2021-12-04 19:40:00+00:00 1 1638646800000 2021 - 2021-12-04 19:41:00+00:00 2 1638646860000 2021 - 2021-12-04 19:42:00+00:00 3 1638646960000 2021""" - self.add_date_partition_columns_helper(partition_mode, expected) - - def test_add_date_partition_columns3(self) -> None: - partition_mode = "by_year_month_day" - # pylint: disable=line-too-long - expected = r""" dummy_value dummy_timestamp year month day - 2021-12-04 19:40:00+00:00 1 1638646800000 2021 12 4 - 2021-12-04 19:41:00+00:00 2 1638646860000 2021 12 4 - 2021-12-04 19:42:00+00:00 3 1638646960000 2021 12 4""" - self.add_date_partition_columns_helper(partition_mode, expected) - - def test_add_date_partition_columns4(self) -> None: - partition_mode = "by_year_week" - expected = r""" dummy_value dummy_timestamp year weekofyear - 2021-12-04 19:40:00+00:00 1 1638646800000 2021 48 - 2021-12-04 19:41:00+00:00 2 1638646860000 2021 48 - 2021-12-04 19:42:00+00:00 3 1638646960000 2021 48""" - self.add_date_partition_columns_helper(partition_mode, expected) - - -# ############################################################################# - - -# ############################################################################# -# TestToPartitionedDataset -# ############################################################################# - - -class TestToPartitionedDataset(hunitest.TestCase): - @staticmethod - def get_test_data1() -> pd.DataFrame: - test_data = { - "dummy_value_1": [1, 2, 3], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [0, 0, 0], - } - df = pd.DataFrame(data=test_data) - return df - - def test_get_test_data1(self) -> None: - test_data = self.get_test_data1() - actual = hpandas.df_to_str(test_data) - expected = r""" - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - @pytest.mark.skip( - reason="CmTask1305: after removing circular dependencies in " - "`hio.from_file`, this test fails reading a parquet file" - ) - def test_to_partitioned_dataset(self) -> None: - """ - Test partitioned Parquet datasets with existing columns. - """ - # Prepare inputs. - test_dir = self.get_scratch_space() - df = self.get_test_data1() - # Run. - partition_cols = ["dummy_value_1", "dummy_value_2"] - hparque.to_partitioned_parquet(df, partition_cols, test_dir) - # Check output. - include_file_content = False - remove_dir_name = True - dir_signature = hunitest.get_dir_signature( - test_dir, include_file_content, remove_dir_name=remove_dir_name - ) - expected = r""" - # Dir structure - . - dummy_value_1=1 - dummy_value_1=1/dummy_value_2=A - dummy_value_1=1/dummy_value_2=A/data.parquet - dummy_value_1=2 - dummy_value_1=2/dummy_value_2=B - dummy_value_1=2/dummy_value_2=B/data.parquet - dummy_value_1=3 - dummy_value_1=3/dummy_value_2=C - dummy_value_1=3/dummy_value_2=C/data.parquet""" - self.assert_equal( - dir_signature, expected, purify_text=True, fuzzy_match=True - ) - # - include_file_content = True - dir_signature = hunitest.get_dir_signature( - test_dir, include_file_content, remove_dir_name=remove_dir_name - ) - self.check_string(dir_signature, purify_text=True, fuzzy_match=True) - - def test_to_partitioned_dataset_wrong_column(self) -> None: - """ - Assert that wrong columns are detected before partitioning. - """ - # Prepare inputs. - test_dir = self.get_scratch_space() - df = self.get_test_data1() - # Run. - partition_cols = ["void_column", "dummy_value_2"] - # Check output. - with self.assertRaises(AssertionError) as cm: - hparque.to_partitioned_parquet(df, partition_cols, test_dir) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - val1=['dummy_value_2', 'void_column'] - issubset - val2=['dummy_value_1', 'dummy_value_2', 'dummy_value_3'] - val1 - val2=['void_column'] - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# TestListAndMergePqFiles -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -@pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", -) -class TestListAndMergePqFiles(hmoto.S3Mock_TestCase): - def generate_test_data(self) -> hs3.AwsProfile: - """ - Upload test daily Parquet files for 3 days to the mocked S3 bucket. - """ - start_date = "2022-02-02" - end_date = "2022-02-04" - assets = ["A", "B", "C", "D", "E", "F"] - asset_col_name = "asset" - test_dir = self.get_scratch_space() - partition_mode = "by_year_month" - custom_partition_cols = "asset,year,month" - hparque.generate_parquet_files( - start_date, - end_date, - assets, - asset_col_name, - test_dir, - partition_mode=partition_mode, - custom_partition_cols=custom_partition_cols, - ) - s3fs_ = hs3.get_s3fs(self.mock_aws_profile) - s3_bucket = f"s3://{self.bucket_name}" - s3fs_.put(test_dir, s3_bucket, recursive=True) - return s3fs_ - - @pytest.mark.slow("~7 seconds.") - def test_list_and_merge_pq_files(self) -> None: - """ - Check if predefined generated Parquet files are properly merged. - """ - s3fs_ = self.generate_test_data() - # Prepare common `hs3.listdir` params. - s3_bucket = f"s3://{self.bucket_name}" - pattern = "*.parquet" - only_files = True - use_relative_paths = True - # Check bucket content before merge. - parquet_path_list_before = hs3.listdir( - s3_bucket, - pattern, - only_files, - use_relative_paths, - aws_profile=s3fs_, - ) - self.assertEqual(len(parquet_path_list_before), 6) - # Add extra parquet files and rename existing one. - # e.g., `dummy.parquet`, `dummy_new.parquet`. - # Every second file is left intact to replicate ready out-of-the-box folder. - # e.g., `asset=A/year=2022/month=2/77a2534aaf9649fab6511cea53a6bf7f-0.parquet`. - for path in parquet_path_list_before[::2]: - original_path = f"{s3_bucket}/{path}" - original_file_name = os.path.basename(original_path) - renamed_path = original_path.replace( - original_file_name, "dummy.parquet" - ) - additional_path = original_path.replace( - original_file_name, "dummy_new.parquet" - ) - s3fs_.rename(original_path, renamed_path) - s3fs_.copy(renamed_path, additional_path) - # Check if edits are in place. - updated_parquet_path_list = hs3.listdir( - s3_bucket, - pattern, - only_files, - use_relative_paths, - aws_profile=s3fs_, - ) - data_parquet_path_list = [ - path for path in updated_parquet_path_list if "dummy" not in path - ] - self.assertEqual(len(updated_parquet_path_list), 9) - self.assertEqual(len(data_parquet_path_list), 3) - # Check bucket content after merge. - hparque.list_and_merge_pq_files(self.bucket_name, aws_profile=s3fs_) - parquet_path_list_after = hs3.listdir( - s3_bucket, - pattern, - only_files, - use_relative_paths, - aws_profile=s3fs_, - ) - parquet_path_list_after.sort() - expected_list = [ - "tmp.scratch/asset=A/year=2022/month=2/data.parquet", - "tmp.scratch/asset=B/year=2022/month=2/data.parquet", - "tmp.scratch/asset=C/year=2022/month=2/data.parquet", - "tmp.scratch/asset=D/year=2022/month=2/data.parquet", - "tmp.scratch/asset=E/year=2022/month=2/data.parquet", - "tmp.scratch/asset=F/year=2022/month=2/data.parquet", - ] - self.assertListEqual(parquet_path_list_after, expected_list) - - def test_list_and_merge_pq_files_duplicate_drop(self) -> None: - # Prepare test data. - test_data = { - "dummy_value_1": [1, 1, 1], - "dummy_value_2": ["A", "A", "A"], - "knowledge_timestamp": [1, 2, 3], - "end_download_timestamp": [3, 2, 1], - } - df = pd.DataFrame(data=test_data) - # Save test data to s3 bucket. - s3fs_ = hs3.get_s3fs(self.mock_aws_profile) - s3_bucket = f"s3://{self.bucket_name}" - original_sample_path = f"{s3_bucket}/dummy/data.parquet" - dummy_sample_path = original_sample_path.replace( - "data.parquet", "dummy.parquet" - ) - hparque.to_parquet(df, dummy_sample_path, aws_profile=s3fs_) - # Check if new columns are in place. - df = hparque.from_parquet(dummy_sample_path, aws_profile=s3fs_) - self.assertIn("knowledge_timestamp", df.columns) - self.assertIn("end_download_timestamp", df.columns) - self.assertEqual(len(df), 3) - # Check if duplicates are dropped after merge. - hparque.list_and_merge_pq_files(self.bucket_name, aws_profile=s3fs_) - df = hparque.from_parquet(original_sample_path, aws_profile=s3fs_) - self.assertEqual(len(df), 1) - - -# ############################################################################# - - -# ############################################################################# -# TestListAndMergePqFilesMixedUnits -# ############################################################################# - - -class TestListAndMergePqFilesMixedUnits(hunitest.TestCase): - def _list_and_merge_mixed_units_pq_files( - self, first_unit: str, second_unit: str - ) -> None: - """ - Run `list_and_merge_pq_files` with different time units in the same - column and index. - - :param first_unit: first time unit. - :param second_unit: second time unit. - """ - # Prepare test data. - dst_dir = os.path.join(self.get_scratch_space(), "tmp.list_and_merge") - first_file_name = os.path.join(dst_dir, "tmp.1first.parquet") - second_file_name = os.path.join(dst_dir, "tmp.2second.parquet") - merged_file_name = os.path.join(dst_dir, "tmp.merged.parquet") - # Write first DF with the `first_unit`. - initial_df = _get_test_df_with_timestamps() - first_df = initial_df.copy() - first_df.index = first_df.index.as_unit(first_unit) - first_df["knowledge_timestamp"] = first_df["knowledge_timestamp"].astype( - f"datetime64[{first_unit}, UTC]" - ) - hparque.to_parquet(first_df, first_file_name) - # Write second DF with the `second_unit`. - second_df = initial_df.copy() - second_df.index = second_df.index.as_unit(second_unit) - second_df["knowledge_timestamp"] = second_df[ - "knowledge_timestamp" - ].astype(f"datetime64[{second_unit}, UTC]") - hparque.to_parquet(second_df, second_file_name) - # List and merge. - hparque.list_and_merge_pq_files(dst_dir, file_name="tmp.merged.parquet") - # Read it back. - _ = hparque.from_parquet(merged_file_name) - - def test_parquet_files_with_mixed_time_units_1(self) -> None: - """ - Test merging Parquet files with the `ns` and `us`. - """ - first_unit = "ns" - second_unit = "us" - self._list_and_merge_mixed_units_pq_files(first_unit, second_unit) - - # TODO(Nina): @Samarth fix the test. - @pytest.mark.skip(reason="Broken.") - def test_parquet_files_with_mixed_time_units_2(self) -> None: - """ - Test merging Parquet files with the `ms` and `ns`. - - It should raise an error. See CmampTask7331 for details. - - The test will not raise an asserion when the time units is `ms` and - `us`. The reason is that we do not lose data when converting from - the first time unit, which is `ms`, to the second time unit, which - is `us`, transitioning from low resolution to high resolution. - """ - first_unit = "us" - second_unit = "ms" - with self.assertRaises(pyarrow.lib.ArrowInvalid): - self._list_and_merge_mixed_units_pq_files(first_unit, second_unit) - - -# ############################################################################# - - -# ############################################################################# -# TestYieldParquetTiles -# ############################################################################# - - -class TestYieldParquetTiles(hunitest.TestCase): - def generate_test_data(self) -> None: - """ - Generate test data and write it to a scratch dir. - - Data has the following structure: - - ``` - asset_id ... year month - end_ts - 2021-11-01 100 2021 11 - 2021-11-01 200 2021 11 - 2021-11-01 300 2021 11 - ... - 2022-02-01 200 2022 2 - 2022-02-01 300 2022 2 - 2022-02-01 400 2022 2 - ``` - """ - # Generate synthetic data. - asset_ids = [100, 200, 300, 400] - prices = list(range(1, 17)) - volatility = list(range(17, 33)) - dates = ["2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01"] - dates = map(pd.Timestamp, dates) - index_ = [dates, asset_ids] - multi_index = pd.MultiIndex.from_product( - index_, names=["end_ts", "asset_id"] - ) - df = pd.DataFrame( - {"price": prices, "volatility": volatility}, index=multi_index - ) - df["year"] = df.index.get_level_values(0).year - df["month"] = df.index.get_level_values(0).month - df = df.reset_index(level=1) - _LOG.debug("Test data: df=\n%s", hpandas.df_to_str(df)) - # Write the data to a scratch dir. - partition_columns = ["asset_id", "year", "month"] - dst_dir = self.get_scratch_space() - hparque.to_partitioned_parquet(df, partition_columns, dst_dir) - - def test_yield_tiles_by_asset(self) -> None: - """ - Test reading only certain asset ids. - """ - self.generate_test_data() - # Read data. - file_name = self.get_scratch_space() - asset_ids = [100, 200] - asset_id_col = "asset_id" - asset_batch_size = 1 - columns = [asset_id_col, "price"] - generator_ = hparque.yield_parquet_tiles_by_assets( - file_name, asset_ids, asset_id_col, asset_batch_size, columns - ) - df = pd.concat(generator_) - _LOG.debug("Filtered data: df=\n%s", hpandas.df_to_str(df)) - # Check asset ids filtering. - actual = str(asset_ids) - expected = str(df[asset_id_col].unique().tolist()) - self.assert_equal(actual, expected) - - def test_yield_tiles_by_year(self) -> None: - """ - Test reading only certain asset ids and dates. - """ - self.generate_test_data() - # Read data. - file_name = self.get_scratch_space() - start_year = 2021 - start_month = 12 - start_date = datetime.date(start_year, start_month, 1) - end_year = 2022 - end_month = 1 - end_date = datetime.date(end_year, end_month, 2) - asset_ids = [300, 400] - asset_id_col = "asset_id" - columns = [asset_id_col, "price"] - generator_ = hparque.yield_parquet_tiles_by_year( - file_name, - start_date, - end_date, - columns, - asset_ids=asset_ids, - asset_id_col=asset_id_col, - ) - df = pd.concat(generator_) - _LOG.debug("Filtered data: df=\n%s", hpandas.df_to_str(df)) - # Check asset ids filtering. - actual = str(asset_ids) - expected = str(df[asset_id_col].unique().tolist()) - self.assert_equal(actual, expected) - # Check start date filtering. - min_date = df.index.min() - self.assertEqual(min_date.month, start_month) - self.assertEqual(min_date.year, start_year) - # Check end date filtering. - max_date = df.index.max() - self.assertEqual(max_date.month, end_month) - self.assertEqual(max_date.year, end_year) - - -# ############################################################################# - - -# ############################################################################# -# TestBuildFilterWithOnlyEqualities -# ############################################################################# - - -class TestBuildFilterWithOnlyEqualities(hunitest.TestCase): - def test_year_month_day_equality(self) -> None: - """ - Test interval with same year, month and day. - """ - start_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-02 21:31:00+00:00") - filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) - actual = str(filters) - expected = ( - r"[('year', '==', 2022), ('month', '==', 12), ('day', '==', 2)]" - ) - self.assert_equal(actual, expected) - - def test_year_month_equality(self) -> None: - """ - Test interval with same year and month. - """ - start_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-28 21:31:00+00:00") - filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) - actual = str(filters) - expected = r"[('year', '==', 2022), ('month', '==', 12)]" - self.assert_equal(actual, expected) - - def test_year_equality(self) -> None: - """ - Test interval with same year. - """ - start_ts = pd.Timestamp("2022-10-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-02 21:31:00+00:00") - filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) - actual = str(filters) - expected = r"[('year', '==', 2022)]" - self.assert_equal(actual, expected) - - def test_no_equality(self) -> None: - """ - Test interval with different start and end years. - """ - start_ts = pd.Timestamp("2021-10-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-10-02 21:31:00+00:00") - filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) - actual = str(filters) - expected = r"[]" - self.assert_equal(actual, expected) - - -# ############################################################################# - - -# ############################################################################# -# TestPartitionedParquet2 -# ############################################################################# - - -class TestPartitionedParquet2(hunitest.TestCase): - """ - Test case for writing and reading partitioned Parquet datasets with mixed - timestamp formats. - """ - - def _get_test_df(self) -> pd.DataFrame: - """ - Create a DataFrame with timestamps. - """ - # Mock the get_current_time method. - timestamp = pd.Timestamp("2024-05-20 00:00:00", tz="UTC") - index = [timestamp for _ in range(4)] - df = pd.DataFrame( - { - "bids": [200, 123, 263, 167], - "asks": [150, 120, 240, 150], - "symbol": ["BTC_USDT" for _ in range(4)], - }, - index=index, - ) - end_download_timestamp = "2024-06-04 20:38:43.467599+00:00" - df["end_download_timestamp"] = end_download_timestamp - return df - - def _run_write_and_read_mixed_timestamp_partitioned_dataset(self) -> None: - """ - Write two DataFrames with different timestamp formats to a partitioned - Parquet dataset and read it back. - """ - initial_df = self._get_test_df() - partition_columns = ["bids", "asks", "symbol"] - dst_dir = os.path.join(self.get_scratch_space(), "tmp.pp_mixed_units") - # Write first DF as partitioned parquet. - first_df = initial_df.copy() - hparque.to_partitioned_parquet(first_df, partition_columns, dst_dir) - # Write second DF as partitioned parquet. - second_df = initial_df.copy() - second_df["end_download_timestamp"] = pd.to_datetime( - second_df["end_download_timestamp"] - ) - hparque.to_partitioned_parquet(second_df, partition_columns, dst_dir) - # Read it back and verify the output. - combined_df = hparque.from_parquet(dst_dir) - combined_df["end_download_timestamp"] = pd.to_datetime( - combined_df["end_download_timestamp"] - ).dt.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") - actual = hpandas.df_to_str(combined_df) - expected = r""" - end_download_timestamp bids asks symbol - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 123 120 BTC_USDT - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 123 120 BTC_USDT - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 167 150 BTC_USDT - ... - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 200 150 BTC_USDT - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 263 240 BTC_USDT - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 263 240 BTC_USDT - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Test writing and reading a partitioned Parquet dataset with mixed - timestamp formats. - """ - self._run_write_and_read_mixed_timestamp_partitioned_dataset() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py deleted file mode 100644 index 8e65eeb2e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py +++ /dev/null @@ -1,398 +0,0 @@ -import argparse -import os - -import helpers.hio as hio -import helpers.hparser as hparser -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestParseLimitRange -# ############################################################################# - - -class TestParseLimitRange(hunitest.TestCase): - def test_parse_limit_range_valid1(self) -> None: - """ - Test parsing valid range format. - """ - limit_str = "1:5" - expected = (1, 5) - actual = hparser.parse_limit_range(limit_str) - self.assertEqual(actual, expected) - - def test_parse_limit_range_valid2(self) -> None: - """ - Test parsing valid range format with same start and end. - """ - limit_str = "3:3" - expected = (3, 3) - actual = hparser.parse_limit_range(limit_str) - self.assertEqual(actual, expected) - - def test_parse_limit_range_valid3(self) -> None: - """ - Test parsing valid range format with larger numbers. - """ - limit_str = "10:100" - expected = (10, 100) - actual = hparser.parse_limit_range(limit_str) - self.assertEqual(actual, expected) - - def test_parse_limit_range_no_colon(self) -> None: - """ - Test that missing colon raises assertion error. - """ - limit_str = "15" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_multiple_colons(self) -> None: - """ - Test that multiple colons raise assertion error. - """ - limit_str = "1:2:3" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_invalid_start(self) -> None: - """ - Test that non-integer start raises fatal error. - """ - limit_str = "abc:5" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_invalid_end(self) -> None: - """ - Test that non-integer end raises fatal error. - """ - limit_str = "1:xyz" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_start_zero(self) -> None: - """ - Test that start index of 0 raises assertion error. - """ - limit_str = "0:5" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_end_zero(self) -> None: - """ - Test that end index of 0 raises assertion error. - """ - limit_str = "1:0" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_start_greater_than_end(self) -> None: - """ - Test that start greater than end raises assertion error. - """ - limit_str = "5:3" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - -# ############################################################################# -# TestApplyLimitRange -# ############################################################################# - - -class TestApplyLimitRange(hunitest.TestCase): - def test_apply_limit_range_no_limit(self) -> None: - """ - Test that None limit range returns original items. - """ - items = ["a", "b", "c", "d", "e"] - actual = hparser.apply_limit_range(items, None) - self.assertEqual(actual, items) - - def test_apply_limit_range_valid_range(self) -> None: - """ - Test applying valid range to items. - """ - items = ["a", "b", "c", "d", "e"] - limit_range = (1, 3) - expected = ["b", "c", "d"] # 0-indexed, inclusive - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - def test_apply_limit_range_single_item(self) -> None: - """ - Test applying range that selects single item. - """ - items = ["a", "b", "c", "d", "e"] - limit_range = (2, 2) - expected = ["c"] - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - def test_apply_limit_range_first_item(self) -> None: - """ - Test applying range starting from first item. - """ - items = ["a", "b", "c", "d", "e"] - limit_range = (0, 1) - expected = ["a", "b"] - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - def test_apply_limit_range_last_item(self) -> None: - """ - Test applying range ending at last item. - """ - items = ["a", "b", "c", "d", "e"] - limit_range = (3, 4) - expected = ["d", "e"] - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - def test_apply_limit_range_start_exceeds_length(self) -> None: - """ - Test that start index exceeding items length raises assertion error. - """ - items = ["a", "b", "c"] - limit_range = (5, 6) - with self.assertRaises(AssertionError): - hparser.apply_limit_range(items, limit_range) - - def test_apply_limit_range_end_exceeds_length(self) -> None: - """ - Test that end index exceeding items length raises assertion error. - """ - items = ["a", "b", "c"] - limit_range = (1, 5) - with self.assertRaises(AssertionError): - hparser.apply_limit_range(items, limit_range) - - def test_apply_limit_range_custom_item_name(self) -> None: - """ - Test that custom item name doesn't affect functionality. - """ - items = [1, 2, 3, 4, 5] - limit_range = (0, 2) - expected = [1, 2, 3] - actual = hparser.apply_limit_range( - items, limit_range, item_name="numbers" - ) - self.assertEqual(actual, expected) - - def test_apply_limit_range_empty_list(self) -> None: - """ - Test applying limit range to empty list. - """ - items = [] - limit_range = (0, 1) - with self.assertRaises(AssertionError): - hparser.apply_limit_range(items, limit_range) - - def test_apply_limit_range_complex_objects(self) -> None: - """ - Test applying limit range to complex objects. - """ - items = [{"id": i, "value": f"item{i}"} for i in range(10)] - limit_range = (2, 4) - expected = [ - {"id": 2, "value": "item2"}, - {"id": 3, "value": "item3"}, - {"id": 4, "value": "item4"}, - ] - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - -# ############################################################################# -# Test_add_multi_file_args -# ############################################################################# - - -class Test_add_multi_file_args(hunitest.TestCase): - def test_adds_correct_arguments(self) -> None: - """ - Test that add_multi_file_args adds the correct arguments to parser. - """ - # Prepare inputs. - parser = argparse.ArgumentParser() - # Run function. - hparser.add_multi_file_args(parser) - # Check that the arguments were added. - namespace = parser.parse_args([]) - self.assertTrue(hasattr(namespace, "files")) - self.assertTrue(hasattr(namespace, "from_files")) - self.assertTrue(hasattr(namespace, "input")) - - -# ############################################################################# -# Test_parse_multi_file_args -# ############################################################################# - - -class Test_parse_multi_file_args(hunitest.TestCase): - # Helper method. - def _create_test_file(self, file_path: str, content: str = "test") -> None: - """ - Create a test file with given content. - """ - hio.create_dir(os.path.dirname(file_path), incremental=True) - hio.to_file(file_path, content) - - def test_files_comma_separated(self) -> None: - """ - Test parsing comma-separated file list. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - file1 = f"{scratch_dir}/file1.txt" - file2 = f"{scratch_dir}/file2.txt" - file3 = f"{scratch_dir}/file3.txt" - self._create_test_file(file1) - self._create_test_file(file2) - self._create_test_file(file3) - # Create namespace with files argument. - args = argparse.Namespace() - args.files = f"{file1},{file2},{file3}" - args.from_files = None - args.input = None - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1, file2, file3] - self.assert_equal(str(actual), str(expected)) - - def test_from_files(self) -> None: - """ - Test parsing file containing list of files. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - file1 = f"{scratch_dir}/file1.txt" - file2 = f"{scratch_dir}/file2.txt" - file3 = f"{scratch_dir}/file3.txt" - self._create_test_file(file1) - self._create_test_file(file2) - self._create_test_file(file3) - # Create file list. - list_file = f"{scratch_dir}/list.txt" - content = f"{file1}\n{file2}\n{file3}\n" - self._create_test_file(list_file, content) - # Create namespace with from_files argument. - args = argparse.Namespace() - args.files = None - args.from_files = list_file - args.input = None - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1, file2, file3] - self.assert_equal(str(actual), str(expected)) - - def test_from_files_with_empty_lines(self) -> None: - """ - Test parsing file with empty lines and comments. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - file1 = f"{scratch_dir}/file1.txt" - file2 = f"{scratch_dir}/file2.txt" - self._create_test_file(file1) - self._create_test_file(file2) - # Create file list with empty lines and comments. - list_file = f"{scratch_dir}/list.txt" - content = f""" - # This is a comment - {file1} - - # Another comment - {file2} - - """ - self._create_test_file(list_file, content) - # Create namespace with from_files argument. - args = argparse.Namespace() - args.files = None - args.from_files = list_file - args.input = None - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1, file2] - self.assert_equal(str(actual), str(expected)) - - def test_input_multiple(self) -> None: - """ - Test parsing repeated --input arguments. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - file1 = f"{scratch_dir}/file1.txt" - file2 = f"{scratch_dir}/file2.txt" - self._create_test_file(file1) - self._create_test_file(file2) - # Create namespace with input argument. - args = argparse.Namespace() - args.files = None - args.from_files = None - args.input = [file1, file2] - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1, file2] - self.assert_equal(str(actual), str(expected)) - - def test_backward_compatibility_single_file(self) -> None: - """ - Test that single -i/--input still works. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test file. - file1 = f"{scratch_dir}/file1.txt" - self._create_test_file(file1) - # Create namespace with input argument (single file, not list). - args = argparse.Namespace() - args.files = None - args.from_files = None - args.input = file1 # Single file as string, not list - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1] - self.assert_equal(str(actual), str(expected)) - - def test_file_validation(self) -> None: - """ - Test that non-existent files raise error. - """ - # Create namespace with non-existent file. - args = argparse.Namespace() - args.files = "/nonexistent/file1.txt,/nonexistent/file2.txt" - args.from_files = None - args.input = None - # Run function and check that it raises error. - with self.assertRaises(AssertionError): - hparser.parse_multi_file_args(args) - - def test_empty_file_list(self) -> None: - """ - Test empty file list handling. - """ - # Prepare inputs. - - # Create namespace with no files. - args = argparse.Namespace() - args.files = None - args.from_files = None - args.input = None - # Run function and check that it raises error. - with self.assertRaises(AssertionError) as cm: - hparser.parse_multi_file_args(args) - # Check the error message. - act = str(cm.exception) - self.assertIn("No input files specified", act) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py deleted file mode 100644 index 8064ddbe1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py +++ /dev/null @@ -1,97 +0,0 @@ -import logging - -import helpers.hpickle as hpickle -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestToPickleable -# ############################################################################# - - -class TestToPickleable(hunitest.TestCase): - def test_list1(self) -> None: - """ - Test that a list is converted to a pickleable correctly. - - force_values_to_string = False - """ - _obj = [1, "2", [3, 0.4], (5, None)] - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = [1, "2", [3, 0.4], (5, None)] - self.assertEqual(actual, expected) - - def test_list2(self) -> None: - """ - Test that a list is converted to a pickleable correctly. - - force_values_to_string = True - """ - _obj = [1, "2", [3, 0.4], (5, None)] - force_values_to_string = True - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = ["1", "2", ["3", "0.4"], ("5", "None")] - self.assertEqual(actual, expected) - - def test_tuple1(self) -> None: - """ - Test that a tuple is converted to a pickleable correctly. - - force_values_to_string = False - """ - _obj = (1, "2", [3, 0.4], (5, None)) - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = (1, "2", [3, 0.4], (5, None)) - self.assertEqual(actual, expected) - - def test_dict1(self) -> None: - """ - Test that a dict is converted to a pickleable correctly. - - force_values_to_string = False - """ - _obj = {"a": 1, 2: ["b", 3], "c": {0.4: None}} - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = {"a": 1, 2: ["b", 3], "c": {0.4: None}} - self.assertEqual(actual, expected) - - def test_iterable1(self) -> None: - """ - Test that an iterable is converted to a pickleable correctly. - - force_values_to_string = False - """ - _obj = {1, 2, 3} - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = [1, 2, 3] - self.assertEqual(actual, expected) - - def test_unpickleable1(self) -> None: - """ - Test that an unpickleable object is converted to a string. - - force_values_to_string = False - """ - _obj = lambda x: x - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = ". at 0x>" - self.assert_equal(actual, expected, purify_text=True) - - def test_unpickleable2(self) -> None: - """ - Test that an unpickleable object is converted to a string. - - force_values_to_string = True - """ - _obj = lambda x: x - force_values_to_string = True - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = ". at 0x>" - self.assert_equal(actual, expected, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py deleted file mode 100644 index a829ea82f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py +++ /dev/null @@ -1,506 +0,0 @@ -import datetime -import logging -import os -from typing import Any, Optional - -import pandas as pd -import pytest - -import config_root.config as cconfig -import helpers.hio as hio -import helpers.hplayback as hplayba -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestJsonRoundtrip1 -# ############################################################################# - - -class TestJsonRoundtrip1(hunitest.TestCase): - """ - Test roundtrip conversion through jsonpickle for different types. - """ - - def test1(self) -> None: - obj = 3 - # - hplayba.round_trip_convert(obj, logging.DEBUG) - - def test2(self) -> None: - obj = "hello" - # - hplayba.round_trip_convert(obj, logging.DEBUG) - - def test3(self) -> None: - data = { - "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], - "Price": [700, 250, 800, 1200], - } - df = pd.DataFrame(data, columns=["Product", "Price"]) - df.index.name = "hello" - # - obj = df - hplayba.round_trip_convert(obj, logging.DEBUG) - - def test4(self) -> None: - obj = datetime.date(2015, 1, 1) - # - hplayba.round_trip_convert(obj, logging.DEBUG) - - -# ############################################################################# -# TestPlaybackInputOutput1 -# ############################################################################# - - -class TestPlaybackInputOutput1(hunitest.TestCase): - """ - Freeze the output of Playback. - """ - - def helper(self, mode: str, *args: Any, **kwargs: Any) -> None: - # TODO(gp): Factor out the common code. - # Define a function to generate a unit test for. - def get_result_assert_equal(a: Any, b: Any) -> Any: - p = hplayba.Playback("assert_equal") - if isinstance(a, datetime.date) and isinstance(b, datetime.date): - return p.run(abs(a - b)) - if isinstance(a, dict) and isinstance(b, dict): - c = {} - c.update(a) - c.update(b) - return p.run(c) - if isinstance(a, cconfig.Config) and isinstance(b, cconfig.Config): - c = cconfig.Config(update_mode="overwrite") - c.update(a) - c.update(b) - return p.run(c) - return p.run(a + b) - - def get_result_check_string(a: Any, b: Any) -> Any: - p = hplayba.Playback("check_string") - if isinstance(a, datetime.date) and isinstance(b, datetime.date): - return p.run(abs(a - b)) - if isinstance(a, dict) and isinstance(b, dict): - c = {} - c.update(a) - c.update(b) - return p.run(c) - if isinstance(a, cconfig.Config) and isinstance(b, cconfig.Config): - c = cconfig.Config(update_mode="overwrite") - c.update(a) - c.update(b) - return p.run(c) - return p.run(a + b) - - def get_result_assert_equal_none() -> Any: - p = hplayba.Playback("assert_equal") - return p.run("Some string.") - - def get_result_check_string_none() -> Any: - p = hplayba.Playback("check_string") - return p.run("Some string") - - if mode == "assert_equal": - if not args and not kwargs: - code = get_result_assert_equal_none() - else: - code = get_result_assert_equal(*args, **kwargs) - elif mode == "check_string": - if not args and not kwargs: - code = get_result_check_string_none() - else: - code = get_result_check_string(*args, **kwargs) - else: - raise ValueError("Invalid mode ") - self.check_string(code, purify_text=True) - _LOG.debug("Testing code:\n%s", code) - exec(code, locals()) # pylint: disable=exec-used - - def test1(self) -> None: - """ - Test for int inputs. - """ - # Create inputs. - a = 3 - b = 2 - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test2(self) -> None: - """ - Test for string inputs. - """ - # Create inputs. - a = "test" - b = "case" - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test3(self) -> None: - """ - Test for list inputs. - """ - # Create inputs. - a = [1, 2, 3] - b = [4, 5, 6] - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test4(self) -> None: - """ - Test for dict inputs. - """ - # Create inputs. - a = {"1": 2} - b = {"3": 4} - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test5(self) -> None: - """ - Test for pd.DataFrame inputs. - """ - # Create inputs. - a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) - b = pd.DataFrame({"Price": [1, 1, 1, 1]}) - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test6(self) -> None: - """ - Test for datetime.date inputs (using `jsonpickle`). - """ - # Create inputs. - a = datetime.date(2015, 1, 1) - b = datetime.date(2012, 1, 1) - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test7(self) -> None: - """ - Test for int inputs with check_string. - """ - # Create inputs. - a = 3 - b = 2 - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test8(self) -> None: - """ - Test for string inputs with check_string. - """ - # Create inputs. - a = "test" - b = "case" - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test9(self) -> None: - """ - Test for list inputs with check_string. - """ - # Create inputs. - a = [1, 2, 3] - b = [4, 5, 6] - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test10(self) -> None: - """ - Test for dict inputs with check_string. - """ - # Create inputs. - a = {"1": 2} - b = {"3": 4} - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test11(self) -> None: - """ - Test for pd.DataFrame inputs with check_string. - """ - # Create inputs. - a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) - b = pd.DataFrame({"Price": [1, 1, 1, 1]}) - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test12(self) -> None: - """ - Test for dict inputs with data structures recursion. - """ - # Create inputs. - a = {"1": ["a", 2]} - b = {"3": pd.DataFrame({"Price": [700, 250, 800, 1200]}), "4": {"5": 6}} - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test13(self) -> None: - """ - Test for pd.Series inputs with check_string. - """ - # Create inputs. - a = pd.Series([10, 20, 15], name="N Numbers") - b = pd.Series([10.0, 0.0, 5.5], name="Z Numbers") - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test14(self) -> None: - """ - Test for pd.Series inputs with assert_equal. - """ - # Create inputs. - a = pd.Series([10, 20, 15], name="N Numbers") - b = pd.Series([10.0, 0.0, 5.5], name="Z Numbers") - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test15(self) -> None: - """ - Test for cconfig.Config inputs with check_string. - """ - # Create inputs. - a = cconfig.Config([("meta", "meta value 1"), ("list", [1, 2])]) - b = cconfig.Config([("meta", "meta value 2")]) - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test16(self) -> None: - """ - Test for cconfig.Config inputs with assert_equal. - """ - # Create inputs. - a = cconfig.Config([("meta", "meta value 1"), ("list", [1, 2])]) - b = cconfig.Config([("meta", "meta value 2")]) - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test17(self) -> None: - """ - Test if testing function has no args with check_string. - """ - self.helper("check_string") - - def test18(self) -> None: - """ - Test if testing function has no args with assert_equal. - """ - self.helper("assert_equal") - - -# ############################################################################# -# TestToPythonCode1 -# ############################################################################# - - -class TestToPythonCode1(hunitest.TestCase): - """ - Test to_python_code() for different types. - """ - - def _check(self, input_obj: Any, expected: str) -> None: - res = hplayba.to_python_code(input_obj) - self.assert_equal(res, expected) - - def test_float1(self) -> None: - """ - Test float without first zero. - """ - self._check(0.1, "0.1") - - def test_float2(self) -> None: - """ - Test positive float. - """ - self._check(1.0, "1.0") - - def test_float3(self) -> None: - """ - Test negative float. - """ - self._check(-1.1, "-1.1") - - def test_int1(self) -> None: - """ - Test zero. - """ - self._check(0, "0") - - def test_int2(self) -> None: - """ - Test positive int. - """ - self._check(10, "10") - - def test_int3(self) -> None: - """ - Test negative int. - """ - self._check(-10, "-10") - - def test_str1(self) -> None: - """ - Test str simple. - """ - self._check("a", '"a"') - - def test_str2(self) -> None: - """ - Test str with double quotes. - """ - self._check('"b"', '"\\"b\\""') - - def test_str3(self) -> None: - """ - Test str with single quotes. - """ - self._check("'c'", "\"'c'\"") - - def test_list1(self) -> None: - """ - Test List. - """ - self._check([1, 0.2, "3"], '[1, 0.2, "3"]') - - def test_dict1(self) -> None: - """ - Test Dist. - """ - self._check({"a": 0.2, 3: "b"}, '{"a": 0.2, 3: "b"}') - - def test_df1(self) -> None: - """ - Test pd.DataFrame (single quotes expected in field names) - """ - self._check( - pd.DataFrame.from_dict({"a": [0.2, 0.1]}), - "pd.DataFrame.from_dict({'a': [0.2, 0.1]})", - ) - - def test_dataseries1(self) -> None: - """ - Test pd.Series. - """ - self._check( - pd.Series([0.2, 0.1], name="a"), - "pd.Series(data=[0.2, 0.1], index=RangeIndex(start=0, stop=2, step=1), " - 'name="a", dtype=float64)', - ) - - def test_config1(self) -> None: - """ - Test cconfig.Config. - """ - config = cconfig.Config() - config["var1"] = "val1" - config["var2"] = cconfig.Config([("var3", 10), ("var4", "val4")]) - self._check( - config, - "cconfig.Config.from_python(\"Config({'var1': 'val1', " - "'var2': Config({'var3': 10, 'var4': 'val4'})})\")", - ) - - -# ############################################################################# -# TestPlaybackFilePath1 -# ############################################################################# - - -class TestPlaybackFilePath1(hunitest.TestCase): - """ - Test file mode correctness. - """ - - def test1(self) -> None: - """ - Test writing to file when number of tests is more than generated (10). - """ - test_file = hplayba.Playback._get_test_file_name( - "./path/to/somewhere.py" - ) - self.assert_equal( - test_file, "./path/to/test/test_by_playback_somewhere.py" - ) - - -# ############################################################################# -# TestPlaybackFileMode1 -# ############################################################################# - - -class TestPlaybackFileMode1(hunitest.TestCase): - """ - Test file mode correctness. - """ - - def get_code(self, max_tests: Optional[int] = None) -> str: - """ - Return a code for executable file to run. - """ - max_tests_str = "" if max_tests is None else f", max_tests={max_tests}" - code = ( - "\n".join( - [ - "import helpers.hplayback as hplayba", - "def plbck_sum(a: int, b: int) -> int:", - ' hplayba.Playback("check_string", to_file=True%s).run(None)', - " return a + b", - "", - "[plbck_sum(i, i + 1) for i in range(4)]", - ] - ) - % max_tests_str - ) - return code - - def helper(self, max_tests: Optional[int] = None) -> Any: - """ - Return generated by playback code. - """ - # Get file paths. - tmp_dir = self.get_scratch_space() - # File with code. - code_basename = "code_.py" - tmp_py_file = os.path.join(tmp_dir, code_basename) - # File with test. - tmp_test_file = os.path.join( - tmp_dir, "test", "test_by_playback_" + code_basename - ) - # Save the code to the file. - hio.to_file(tmp_py_file, self.get_code(max_tests)) - # Executes the code. - hsystem.system(f"python {tmp_py_file}") - playback_code = hio.from_file(tmp_test_file) - return playback_code - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~10 seconds.") - def test1(self) -> None: - """ - Test writing to file when number of tests is more than generated. - """ - max_tests = 100 - self.check_string(self.helper(max_tests)) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~10 seconds.") - def test2(self) -> None: - """ - Test writing to file when number of tests is default. - """ - self.check_string(self.helper()) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~10 seconds.") - def test3(self) -> None: - """ - Test writing to file when number of tests is lower than generated. - """ - max_tests = 2 - self.check_string(self.helper(max_tests)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py deleted file mode 100644 index 395138e7a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py +++ /dev/null @@ -1,844 +0,0 @@ -import logging -import pprint -from typing import List - -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_printing1 -# ############################################################################# - - -class Test_printing1(hunitest.TestCase): - def test_color_highlight1(self) -> None: - for c in hprint._COLOR_MAP: - _LOG.debug(hprint.color_highlight(c, c)) - - -# ############################################################################# -# Test_to_str1 -# ############################################################################# - - -class Test_to_str1(hunitest.TestCase): - def test1(self) -> None: - x = 1 - # To disable linter complaints. - _ = x - actual = hprint.to_str("x") - expected = "x=1" - self.assertEqual(actual, expected) - - def test2(self) -> None: - x = "hello world" - # To disable linter complaints. - _ = x - actual = hprint.to_str("x") - expected = "x='hello world'" - self.assertEqual(actual, expected) - - def test3(self) -> None: - x = 2 - # To disable linter complaints. - _ = x - actual = hprint.to_str("x*2") - expected = "x*2=4" - self.assertEqual(actual, expected) - - def test4(self) -> None: - """ - Test printing multiple values separated by space. - """ - x = 1 - y = "hello" - # To disable linter complaints. - _ = x, y - actual = hprint.to_str("x y") - expected = "x=1, y='hello'" - self.assertEqual(actual, expected) - - def test5(self) -> None: - """ - Test printing multiple strings separated by space. - """ - x = "1" - y = "hello" - # To disable linter complaints. - _ = x, y - actual = hprint.to_str("x y") - expected = "x='1', y='hello'" - self.assertEqual(actual, expected) - - def test6(self) -> None: - """ - Test printing a list. - """ - x = [1, "hello", "world"] - # To disable linter complaints. - _ = x - actual = hprint.to_str("x") - expected = "x=[1, 'hello', 'world']" - self.assertEqual(actual, expected) - - -# ############################################################################# - - -def example_func1(x: int, y: str) -> str: - _ = x, y - ret = hprint.func_signature_to_str() - return ret # type: ignore[no-any-return] - - -def example_func2() -> str: - ret = hprint.func_signature_to_str() - return ret # type: ignore[no-any-return] - - -def example_func3(x: int, y: str) -> str: - _ = x, y - ret = hprint.func_signature_to_str("y") - return ret # type: ignore[no-any-return] - - -def example_func4(x: int, y: str, z: float) -> str: - _ = x, y, z - ret = hprint.func_signature_to_str("x z") - return ret # type: ignore[no-any-return] - - -def example_func5(x: int, y: str, z: float) -> str: - _ = x, y, z - ret = hprint.func_signature_to_str(["y", "z"]) - return ret # type: ignore[no-any-return] - - -# ############################################################################# -# Test_func_signature_to_str1 -# ############################################################################# - - -class Test_func_signature_to_str1(hunitest.TestCase): - def test1(self) -> None: - actual = example_func1(1, "hello") - expected = "# example_func1: x=1, y='hello'" - self.assert_equal(actual, expected) - - def test2(self) -> None: - actual = example_func2() - expected = "# example_func2:" - self.assert_equal(actual, expected) - - def test3(self) -> None: - actual = example_func3(1, "hello") - expected = "# example_func3: x=1" - self.assert_equal(actual, expected) - - def test4(self) -> None: - actual = example_func4(1, "hello", 3.14) - expected = "# example_func4: y='hello'" - self.assert_equal(actual, expected) - - def test5(self) -> None: - actual = example_func5(1, "hello", 3.14) - expected = "# example_func5: x=1" - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_log -# ############################################################################# - - -class Test_log(hunitest.TestCase): - def test2(self) -> None: - x = 1 - # To disable linter complaints. - _ = x - for verb in [logging.DEBUG, logging.INFO]: - hprint.log(_LOG, verb, "x") - - def test3(self) -> None: - x = 1 - y = "hello" - # To disable linter complaints. - _ = x, y - for verb in [logging.DEBUG, logging.INFO]: - hprint.log(_LOG, verb, "x y") - - def test4(self) -> None: - """ - The command: - - > pytest -k Test_log::test4 -o log_cli=true --dbg_verbosity DEBUG - - should print something like: - - DEBUG test_printing:printing.py:315 x=1, y='hello', z=['cruel', 'world'] - INFO test_printing:printing.py:315 x=1, y='hello', z=['cruel', 'world'] - """ - x = 1 - y = "hello" - z = ["cruel", "world"] - # To disable linter complaints. - _ = x, y, z - for verb in [logging.DEBUG, logging.INFO]: - hprint.log(_LOG, verb, "x y z") - - -# ############################################################################# -# Test_sort_dictionary -# ############################################################################# - - -class Test_sort_dictionary(hunitest.TestCase): - def test1(self) -> None: - dict_ = { - "tool": { - "poetry": { - "name": "lm", - "version": "0.1.0", - "description": "", - "authors": [""], - "dependencies": { - "awscli": "*", - "boto3": "*", - "flaky": "*", - "fsspec": "*", - "gluonts": "*", - "invoke": "*", - "jupyter": "*", - "matplotlib": "*", - "mxnet": "*", - "networkx": "*", - "pandas": "^1.1.0", - "psycopg2": "*", - "pyarrow": "*", - "pytest": "^6.0.0", - "pytest-cov": "*", - "pytest-instafail": "*", - "pytest-xdist": "*", - "python": "^3.7", - "pywavelets": "*", - "s3fs": "*", - "seaborn": "*", - "sklearn": "*", - "statsmodels": "*", - "bs4": "*", - "jsonpickle": "*", - "lxml": "*", - "tqdm": "*", - "requests": "*", - }, - "dev-dependencies": {}, - } - }, - "build-system": { - "requires": ["poetry>=0.12"], - "build-backend": "poetry.masonry.api", - }, - } - actual = hprint.sort_dictionary(dict_) - self.check_string(pprint.pformat(actual)) - - -# ############################################################################# -# Test_indent1 -# ############################################################################# - - -class Test_indent1(hunitest.TestCase): - def test1(self) -> None: - txt = """foo - -klass TestHelloWorld(hunitest.TestCase): - bar -""" - num_spaces = 2 - actual = hprint.indent(txt, num_spaces=num_spaces) - expected = """ foo - - klass TestHelloWorld(hunitest.TestCase): - bar -""" - self.assert_equal(actual, expected, fuzzy_match=False) - - -# ############################################################################# -# Test_dedent1 -# ############################################################################# - - -class Test_dedent1(hunitest.TestCase): - def test1(self) -> None: - txt = """ - foo - - klass TestHelloWorld(hunitest.TestCase): - bar -""" - actual = hprint.dedent(txt) - expected = """foo - -klass TestHelloWorld(hunitest.TestCase): - bar""" - self.assert_equal(actual, expected, fuzzy_match=False) - - def test2(self) -> None: - txt = r""" - read_data: - file_name: foo_bar.txt - nrows: 999 - single_val: hello - zscore: - style: gaz - com: 28""" - actual = hprint.dedent(txt) - expected = """read_data: - file_name: foo_bar.txt - nrows: 999 -single_val: hello -zscore: - style: gaz - com: 28""" - self.assert_equal(actual, expected, fuzzy_match=False) - - def test_roundtrip1(self) -> None: - """ - Verify that `indent` and `dedent` are inverse of each other. - """ - txt1 = """foo - - -# ############################################################################# -# TestHelloWorld -# ############################################################################# - - -class TestHelloWorld(hunitest.TestCase): - bar""" - num_spaces = 3 - txt2 = hprint.indent(txt1, num_spaces=num_spaces) - txt3 = hprint.dedent(txt2) - self.assert_equal(txt1, txt3, fuzzy_match=False) - - -# ############################################################################# -# Test_align_on_left1 -# ############################################################################# - - -class Test_align_on_left1(hunitest.TestCase): - def test1(self) -> None: - txt = """foo - -klass TestHelloWorld(hunitest.TestCase): - bar -""" - actual = hprint.align_on_left(txt) - expected = """foo - -klass TestHelloWorld(hunitest.TestCase): -bar -""" - self.assert_equal(actual, expected, fuzzy_match=False) - - -# ############################################################################# -# Test_logging1 -# ############################################################################# - - -class Test_logging1(hunitest.TestCase): - def test_log_frame1(self) -> None: - hprint.log_frame(_LOG, "%s %s", "hello", "world") - - def test_log_frame2(self) -> None: - hprint.log_frame(_LOG, "%s", "hello", level=1) - - def test_log_frame3(self) -> None: - hprint.log_frame(_LOG, "%s", "hello", level=2, verbosity=logging.INFO) - - -# ############################################################################# -# Test_remove_lead_trail_empty_lines1 -# ############################################################################# - - -class Test_remove_lead_trail_empty_lines1(hunitest.TestCase): - def helper(self, input_str: str, expected_output: List[str]) -> None: - """ - Test the `remove_lead_trail_empty_lines` function. - - :param input_str: The input string to be processed. - :param expected_output: The expected output list of strings. - - Example: - input_str = "line1\n\n\nline2" - expected_output = ["line1", "", "", "line2"] - """ - # Test as string. - actual = hprint.remove_lead_trail_empty_lines(input_str) - expected = "\n".join(expected_output) - self.assertEqual(actual, expected) - # Test as list of strings. - input_str = input_str.splitlines() - actual = hprint.remove_lead_trail_empty_lines(input_str) - self.assertEqual(actual, expected_output) - - def test_empty_string_returns_empty_list(self) -> None: - input_str: str = "" - expected_output: List[str] = [] - self.helper(input_str, expected_output) - - def test_single_line_string_returns_single_line_list(self) -> None: - input_str: str = "line" - expected_output = ["line"] - self.helper(input_str, expected_output) - - def test_multiple_lines_with_no_empty_lines_returns_same_lines( - self, - ) -> None: - input_str: str = "line1\nline2\nline3" - expected_output = ["line1", "line2", "line3"] - self.helper(input_str, expected_output) - - def test_leading_empty_lines_are_removed(self) -> None: - input_str: str = "\n\nline1\nline2" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_trailing_empty_lines_are_removed(self) -> None: - input_str: str = "line1\nline2\n\n" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_leading_and_trailing_empty_lines_are_removed(self) -> None: - input_str: str = "\n\nline1\nline2\n\n" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_consecutive_empty_lines_in_middle_are_not_removed(self) -> None: - input_str: str = "line1\n\n\nline2" - expected_output = ["line1", "", "", "line2"] - self.helper(input_str, expected_output) - - def test_only_empty_lines_returns_empty_list(self) -> None: - input_str: str = "\n\n\n" - expected_output: List[str] = [] - self.helper(input_str, expected_output) - - def test_mixed_content_with_leading_trailing_and_middle_empty_lines( - self, - ) -> None: - input_str: str = "\n\nline1\n\nline2\n\n" - expected_output = ["line1", "", "line2"] - self.helper(input_str, expected_output) - - def test_single_empty_line_returns_empty_list(self) -> None: - input_str: str = "\n" - expected_output: List[str] = [] - self.helper(input_str, expected_output) - - def test_multiple_consecutive_empty_lines_at_beginning_and_end( - self, - ) -> None: - input_str: str = "\n\n\nline1\nline2\n\n\n" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_input_with_only_spaces_and_tabs_as_empty_lines(self) -> None: - input_str: str = " \n\t\nline1\nline2\n \n\t" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_input_with_mixed_line_endings_unix_and_windows(self) -> None: - input_str: str = "line1\n\nline2\r\n\r\nline3" - expected_output = ["line1", "", "line2", "", "line3"] - self.helper(input_str, expected_output) - - def test_input_with_special_characters(self) -> None: - input_str: str = "line1\n\n!@#$%^&*()\n\nline2" - expected_output = ["line1", "", "!@#$%^&*()", "", "line2"] - self.helper(input_str, expected_output) - - -# ############################################################################# -# Test_remove_empty_lines -# ############################################################################# - - -class Test_remove_empty_lines(hunitest.TestCase): - """ - Test remove_empty_lines function with different modes. - """ - - def helper(self, lines: str, mode: str, expected: str) -> None: - """ - Test helper for remove_empty_lines. - - :param lines: Input text as string (will be split into list) - :param mode: Mode parameter for remove_empty_lines - :param expected: Expected output as string (will be split into list) - """ - # Prepare inputs. - lines_str = hprint.dedent(lines) - if lines_str: - lines_list = lines_str.split("\n") - else: - lines_list = [] - # Prepare outputs. - expected_str = hprint.dedent(expected) - if expected_str: - expected_list = expected_str.split("\n") - else: - expected_list = [] - # Run test. - actual = hprint.remove_empty_lines(lines_list, mode=mode) - # Check outputs. - self.assert_equal(str(actual), str(expected_list)) - - def test1(self) -> None: - """ - Test no_empty_lines mode with an empty list. - """ - # Prepare inputs. - lines = "" - mode = "no_empty_lines" - # Prepare outputs. - expected = "" - # Run test. - self.helper(lines, mode, expected) - - def test2(self) -> None: - """ - Test no_empty_lines mode with no empty lines in the input. - """ - # Prepare inputs. - lines = """ - line1 - line2 - line3 - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test3(self) -> None: - """ - Test no_empty_lines mode with all lines being empty. - """ - # Prepare inputs. - lines = """ - - - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = "" - # Run test. - self.helper(lines, mode, expected) - - def test4(self) -> None: - """ - Test no_empty_lines mode removes leading empty lines. - """ - # Prepare inputs. - lines = """ - - line1 - line2 - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test5(self) -> None: - """ - Test no_empty_lines mode removes trailing empty lines. - """ - # Prepare inputs. - lines = """ - line1 - line2 - - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test6(self) -> None: - """ - Test no_empty_lines mode removes empty lines in the middle. - """ - # Prepare inputs. - lines = """ - line1 - - line2 - - line3 - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test7(self) -> None: - """ - Test no_empty_lines mode removes lines with only whitespace. - """ - # Prepare inputs. - lines = """ - line1 - - line2 - \t - line3 - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test8(self) -> None: - """ - Test no_consecutive_empty_lines mode with empty list. - """ - # Prepare inputs. - lines = "" - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = "" - # Run test. - self.helper(lines, mode, expected) - - def test9(self) -> None: - """ - Test no_consecutive_empty_lines mode with no empty lines. - """ - # Prepare inputs. - lines = """ - line1 - line2 - line3 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test10(self) -> None: - """ - Test no_consecutive_empty_lines mode keeps single empty line. - """ - # Prepare inputs. - lines = """ - line1 - - line2 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test11(self) -> None: - """ - Test no_consecutive_empty_lines mode keeps one of two consecutive empty lines. - """ - # Prepare inputs. - lines = """ - line1 - - - line2 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test12(self) -> None: - """ - Test no_consecutive_empty_lines mode keeps one of multiple consecutive empty lines. - """ - # Prepare inputs. - lines = """ - line1 - - - - - line2 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test13(self) -> None: - """ - Test no_consecutive_empty_lines mode with multiple groups of consecutive empty lines. - """ - # Prepare inputs. - lines = """ - line1 - - - line2 - - - - line3 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test14(self) -> None: - """ - Test no_consecutive_empty_lines mode keeps all non-consecutive empty lines. - """ - # Prepare inputs. - lines = """ - line1 - - line2 - - line3 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test15(self) -> None: - """ - Test that invalid mode raises ValueError. - """ - # Prepare inputs. - lines = ["line1", "line2"] - mode = "invalid_mode" - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hprint.remove_empty_lines(lines, mode=mode) - actual = str(cm.exception) - expected = "Invalid mode='invalid_mode'" - self.assert_equal(actual, expected) - - def test16(self) -> None: - """ - Test remove_empty_lines with string input (decorator functionality). - """ - # Prepare inputs. - text = """ - line1 - - line2 - - line3 - """ - text = hprint.dedent(text) - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - expected = hprint.dedent(expected) - # Run test. - actual = hprint.remove_empty_lines(text, mode=mode) - # Check outputs. - self.assert_equal(actual, expected) - - def test17(self) -> None: - """ - Test no_consecutive_empty_lines with string input (decorator functionality). - """ - # Prepare inputs. - text = """ - line1 - - - line2 - """ - text = hprint.dedent(text) - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - """ - expected = hprint.dedent(expected) - # Run test. - actual = hprint.remove_empty_lines(text, mode=mode) - # Check outputs. - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py deleted file mode 100644 index 652fdf47a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py +++ /dev/null @@ -1,228 +0,0 @@ -import io -import os -import pprint -import re -from contextlib import redirect_stdout - -import pytest - -# TODO(heanh): add `junitparser` in `//helpers` image. -pytest.importorskip("junitparser") - -import helpers.hio as hio # noqa: E402 -import helpers.hpytest as hpytest # noqa: E402 -import helpers.hunit_test as hunitest # noqa: E402 - - -def _strip_color_codes(text: str) -> str: - """ - Remove ANSI color escape codes from text. - - :param text: text to strip the color codes from - :return: text with the color codes removed - """ - # Remove ANSI escape codes. - txt = re.sub(r"\033\[[0-9;]*m", "", text) - return txt - - -# ############################################################################# -# Test_JUnitReporter -# ############################################################################# - - -class Test_JUnitReporter(hunitest.TestCase): - """ - Test scenario where there are passed, skipped tests with leads to `PASSED` - result. - """ - - def helper(self) -> hpytest.JUnitReporter: - """ - Helper function to create a `JUnitReporter` object. - - :return: `JUnitReporter` object - """ - xml_str = """ - - - - - /app/dummy/test/test_module.py:25: Dummy skip message for testing purposes. - - - - - """ - input_dir = self.get_scratch_space() - input_file_path = os.path.join(input_dir, "test.xml") - hio.to_file(input_file_path, xml_str) - reporter = hpytest.JUnitReporter(input_file_path) - return reporter - - def test_parse(self) -> None: - """ - Test parsing the JUnit XML file. - """ - reporter = self.helper() - reporter.parse() - actual = pprint.pformat(reporter.overall_stats) - expected = r""" - {'error': 0, - 'failed': 0, - 'passed': 1, - 'skipped': 1, - 'total_tests': 2, - 'total_time': 3.0} - """ - self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) - - def test_print_summary(self) -> None: - """ - Test printing the summary of the results from JUnit XML file. - """ - reporter = self.helper() - reporter.parse() - captured_output = io.StringIO() - with redirect_stdout(captured_output): - reporter.print_summary() - actual = captured_output.getvalue() - actual = _strip_color_codes(actual) - expected = r""" - ====================================================================== - collected 2 items - - ====================================================================== - Test: dummy-test-suite-1 - Timestamp: 2025-01-01T12:00:00.000000+00:00 - ---------------------------------------------------------------------- - dummy.test.test_module.DummyTestCase::test_dummy_function PASSED (1.000s) - dummy.test.test_module.DummyTestCase::test_another_function SKIPPED (1.000s) - Summary: 1 passed, 1 skipped in 2.000s - - ====================================================================== - Test: dummy-test-suite-2 - Timestamp: 2025-01-01T12:01:00.000000+00:00 - ---------------------------------------------------------------------- - Summary: no tests in 1.000s - - ====================================================================== - Summary: 1 passed, 1 skipped in 3.00s - Result: PASSED - """ - self.assert_equal( - actual, - expected, - dedent=True, - fuzzy_match=True, - ) - - -# ############################################################################# -# Test_JUnitReporter2 -# ############################################################################# - - -class Test_JUnitReporter2(hunitest.TestCase): - """ - Test scenario where there are passed, error, failed, and skipped tests with - leads to `FAILED` result. - """ - - def helper(self) -> hpytest.JUnitReporter: - """ - Helper function to create a `JUnitReporter` object. - - :return: `JUnitReporter` object - """ - xml_str = """ - - - - - /app/dummy/test/test_module.py:25: Dummy skip message for testing purposes. - - - - - - /app/dummy/test/test_module.py:30: Dummy failure message for testing purposes. - - - /app/dummy/test/test_module.py:35: Dummy error message for testing purposes. - - - - - """ - input_dir = self.get_scratch_space() - input_file_path = os.path.join(input_dir, "test.xml") - hio.to_file(input_file_path, xml_str) - reporter = hpytest.JUnitReporter(input_file_path) - return reporter - - def test_parse(self) -> None: - """ - Test parsing the JUnit XML file. - """ - reporter = self.helper() - reporter.parse() - actual = pprint.pformat(reporter.overall_stats) - expected = r""" - {'error': 1, - 'failed': 1, - 'passed': 2, - 'skipped': 1, - 'total_tests': 5, - 'total_time': 6.0} - """ - self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) - - def test_print_summary(self) -> None: - """ - Test printing the summary of the results from JUnit XML file. - """ - reporter = self.helper() - reporter.parse() - captured_output = io.StringIO() - with redirect_stdout(captured_output): - reporter.print_summary() - actual = captured_output.getvalue() - actual = _strip_color_codes(actual) - expected = r""" - ====================================================================== - collected 5 items - - ====================================================================== - Test: dummy-test-suite-1 - Timestamp: 2025-01-01T12:00:00.000000+00:00 - ---------------------------------------------------------------------- - dummy.test.test_module.DummyTestCase::test_dummy_function PASSED (1.000s) - dummy.test.test_module.DummyTestCase::test_another_function SKIPPED (1.000s) - Summary: 1 passed, 1 skipped in 2.000s - - ====================================================================== - Test: dummy-test-suite-2 - Timestamp: 2025-01-01T12:01:00.000000+00:00 - ---------------------------------------------------------------------- - dummy.test.test_module.DummyTestCase::test_passed_function PASSED (1.000s) - dummy.test.test_module.DummyTestCase::test_failed_function FAILED (1.000s) - dummy.test.test_module.DummyTestCase::test_error_function ERROR (1.000s) - Summary: 1 passed, 1 failed, 1 error in 3.000s - - ====================================================================== - Test: dummy-test-suite-3 - Timestamp: 2025-01-01T12:02:00.000000+00:00 - ---------------------------------------------------------------------- - Summary: no tests in 1.000s - - ====================================================================== - Summary: 2 passed, 1 failed, 1 error, 1 skipped in 6.00s - Result: FAILED - """ - self.assert_equal( - actual, - expected, - dedent=True, - fuzzy_match=True, - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py deleted file mode 100644 index d64310202..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py +++ /dev/null @@ -1,154 +0,0 @@ -import asyncio -import logging - -import pytest - -import helpers.hretry as hretry -import helpers.htimer as htimer -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -EXCEPTIONS = (AttributeError, ValueError) - - -# ############################################################################# -# Test_retry -# ############################################################################# - - -class Test_retry(hunitest.TestCase): - def test_retry1(self) -> None: - """ - Test normal case. - """ - self.exception_count = 0 - num_attempts = 3 - - @hretry.sync_retry(num_attempts, EXCEPTIONS) - def func() -> bool: - if self.exception_count < num_attempts - 1: - self.exception_count += 1 - raise ValueError("Simulated expected error") - _LOG.debug("All good") - return True - - self.assertTrue(func()) - self.assertEqual(self.exception_count, num_attempts - 1) - - def test_retry2(self) -> None: - """ - Test when the number of exceptions is greater than the number of - retries. - """ - self.exception_count = 0 - num_attempts = 3 - - @hretry.sync_retry(num_attempts, EXCEPTIONS) - def func() -> bool: - if self.exception_count < num_attempts: - self.exception_count += 1 - raise ValueError("Simulated expected error") - _LOG.debug("All good") - return True - - with self.assertRaises(ValueError): - func() - - def test_retry3(self) -> None: - """ - Test when the raised exception is not in the list of expected - exceptions. - """ - self.exception_count = 0 - num_attempts = 3 - - @hretry.sync_retry(num_attempts, EXCEPTIONS) - def func() -> None: - if self.exception_count < num_attempts - 1: - self.exception_count += 1 - raise IndexError("Simulated non expected error") - _LOG.debug("All good") - - with self.assertRaises(IndexError): - func() - - -# ############################################################################# -# Test_retry2 -# ############################################################################# - - -class Test_retry2(hunitest.TestCase): - def test_async_retry1(self) -> None: - """ - Test normal case. - """ - self.exception_count = 0 - num_attempts = 3 - retry_delay_in_sec = 1 - - @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) - async def func() -> bool: - if self.exception_count < num_attempts - 1: - self.exception_count += 1 - await asyncio.sleep(0.1) - raise ValueError("Simulated expected error") - _LOG.debug("All good") - return True - - with htimer.TimedScope(logging.INFO, "async_retry_loop") as ts: - result = asyncio.run(func()) - self.assertEqual(round(ts.elapsed_time, 1), 2.2) - self.assertTrue(result) - self.assertEqual(self.exception_count, num_attempts - 1) - - @pytest.mark.skip(reason="See CmTask11013") - def test_async_retry2(self) -> None: - """ - Test when the number of exceptions is greater than the number of - retries. - """ - self.exception_count = 0 - num_attempts = 3 - retry_delay_in_sec = 1 - - @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) - async def func() -> bool: - if self.exception_count < num_attempts: - self.exception_count += 1 - await asyncio.sleep(0.1) - raise ValueError("Simulated expected error") - _LOG.debug("All good") - return True - - with self.assertRaises(ValueError) as fail: - with htimer.TimedScope(logging.INFO, "async_retry_loop") as ts: - asyncio.run(func()) - self.assertEqual(round(ts.elapsed_time, 1), 3.3) - actual = str(fail.exception) - expected = "Simulated expected error" - self.assert_equal(actual, expected) - - def test_async_retry3(self) -> None: - """ - Test when the raised exception is not in the list of expected - exceptions. - """ - self.exception_count = 0 - num_attempts = 3 - retry_delay_in_sec = 1 - - @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) - async def func() -> None: - if self.exception_count < num_attempts - 1: - self.exception_count += 1 - await asyncio.sleep(0.1) - raise IndexError("Simulated non expected error") - _LOG.debug("All good") - - with self.assertRaises(IndexError) as fail: - asyncio.run(func()) - actual = str(fail.exception) - expected = "Simulated non expected error" - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py deleted file mode 100644 index 8f9dd84df..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py +++ /dev/null @@ -1,597 +0,0 @@ -import logging -import os -from typing import Generator, Tuple - -import pytest - -import helpers.hio as hio -import helpers.hmoto as hmoto -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestReplaceStarWithDoubleStar -# ############################################################################# - - -class TestReplaceStarWithDoubleStar(hunitest.TestCase): - def test1(self) -> None: - """ - Test non replacement of a single asterisk at the end of the path. - """ - pattern_to_modify = "s3://bucket/path/*" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/path/*") - - def test2(self) -> None: - """ - Test replacement of a single asterisk within the path. - """ - pattern_to_modify = "s3://bucket/path/*/file" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/path/**/*/file") - - def test3(self) -> None: - """ - Test no replacement when there are no asterisks in the path. - """ - pattern_to_modify = "s3://bucket/path/file" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/path/file") - - def test4(self) -> None: - """ - Test replacement when multiple asterisk are in the path. - """ - pattern_to_modify = "s3://bucket/*/path/*" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/**/*/path/*") - - def test5(self) -> None: - """ - Test non-replacement of asterisk at the end of the path in a special - case. - """ - pattern_to_modify = "s3://bucket/*/path/csv*" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/**/*/path/csv*") - - -# ############################################################################# -# TestToFileAndFromFile1 -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -@pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", -) -class TestToFileAndFromFile1(hmoto.S3Mock_TestCase): - def write_read_helper(self, file_name: str, force_flush: bool) -> None: - # Prepare inputs. - file_content = "line_mock1\nline_mock2\nline_mock3" - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - s3_path = f"s3://{self.bucket_name}/{file_name}" - # Save file. - # TODO(Nikola): Is it possible to verify `force_flush`? - hs3.to_file( - file_content, - s3_path, - aws_profile=moto_s3fs, - force_flush=force_flush, - ) - # Read file. - saved_content = hs3.from_file(s3_path, aws_profile=moto_s3fs) - # Check output. - expected = r"""line_mock1 - line_mock2 - line_mock3""" - self.assert_equal(saved_content, expected, fuzzy_match=True) - - # ######################################################################### - - def test_to_file_and_from_file1(self) -> None: - """ - Verify that regular `.txt` file is saved/read on S3. - """ - # Prepare inputs. - regular_file_name = "mock.txt" - force_flush = False - self.write_read_helper(regular_file_name, force_flush) - - def test_to_file_and_from_file2(self) -> None: - """ - Verify that compressed (e.g,`.gz`,`gzip`) file is saved/read on S3. - """ - # Prepare inputs. - gzip_file_name = "mock.gzip" - force_flush = True - self.write_read_helper(gzip_file_name, force_flush) - - def test_to_file_invalid1(self) -> None: - """ - Verify that only binary mode is allowed. - """ - # Prepare inputs. - regular_file_name = "mock.txt" - regular_file_content = "line_mock1\nline_mock2\nline_mock3" - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - s3_path = f"s3://{self.bucket_name}/{regular_file_name}" - # Save file with `t` mode. - with self.assertRaises(ValueError) as fail: - hs3.to_file( - regular_file_content, s3_path, mode="wt", aws_profile=moto_s3fs - ) - # Check output. - actual = str(fail.exception) - expected = r"S3 only allows binary mode!" - self.assert_equal(actual, expected) - - def test_from_file_invalid1(self) -> None: - """ - Verify that encoding is not allowed. - """ - # Prepare inputs. - regular_file_name = "mock.txt" - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - s3_path = f"s3://{self.bucket_name}/{regular_file_name}" - # Read with encoding. - with self.assertRaises(ValueError) as fail: - hs3.from_file(s3_path, encoding=True, aws_profile=moto_s3fs) - # Check output. - actual = str(fail.exception) - expected = r"Encoding is not supported when reading from S3!" - self.assert_equal(actual, expected) - - -# ############################################################################# -# TestListdir1 -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -@pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", -) -class TestListdir1(hmoto.S3Mock_TestCase): - def prepare_test_data(self) -> Tuple[str, hs3.AwsProfile]: - bucket_s3_path = f"s3://{self.bucket_name}" - depth_one_s3_path = f"{bucket_s3_path}/depth_one" - # Prepare test files. - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - first_s3_path = f"{depth_one_s3_path}/mock1.txt" - lines = [b"line_mock1"] - with moto_s3fs.open(first_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - second_s3_path = f"{depth_one_s3_path}/mock2.gzip" - with moto_s3fs.open(second_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - # Prepare test directories. - # `moto_s3fs.mkdir` is useless as empty directory is not visible. - # There must be at least one file in the directory to be visible. - regular_dir_s3_path = f"{depth_one_s3_path}/mock" - additional_file_s3_path = f"{regular_dir_s3_path}/regular_mock3.txt" - with moto_s3fs.open(additional_file_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - git_dir_s3_path = f"s3://{bucket_s3_path}/.git" - additional_file_s3_path = f"{git_dir_s3_path}/git_mock3.txt" - with moto_s3fs.open(additional_file_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - return bucket_s3_path, moto_s3fs - - # ######################################################################### - - def test_listdir1(self) -> None: - """ - Verify that all paths are found. - """ - bucket_s3_path, moto_s3fs = self.prepare_test_data() - pattern = "*" - only_files = False - use_relative_paths = False - paths = hs3.listdir( - bucket_s3_path, - pattern, - only_files, - use_relative_paths, - aws_profile=moto_s3fs, - exclude_git_dirs=False, - ) - paths.sort() - expected_paths = [ - "mock_bucket/.git", - "mock_bucket/.git/git_mock3.txt", - "mock_bucket/depth_one", - "mock_bucket/depth_one/mock", - "mock_bucket/depth_one/mock/regular_mock3.txt", - "mock_bucket/depth_one/mock1.txt", - "mock_bucket/depth_one/mock2.gzip", - ] - self.assertListEqual(paths, expected_paths) - - def test_listdir2(self) -> None: - """ - Verify that all relative paths are found. - """ - bucket_s3_path, moto_s3fs = self.prepare_test_data() - # Exclude `.git` by going level below. - bucket_s3_path = os.path.join(bucket_s3_path, "depth_one") - pattern = "*" - only_files = False - use_relative_paths = True - paths = hs3.listdir( - bucket_s3_path, - pattern, - only_files, - use_relative_paths, - aws_profile=moto_s3fs, - exclude_git_dirs=False, - ) - paths.sort() - expected_paths = [ - "mock", - "mock/regular_mock3.txt", - "mock1.txt", - "mock2.gzip", - ] - self.assertListEqual(paths, expected_paths) - - def test_listdir3(self) -> None: - """ - Verify that all paths are found, except `.git` ones. - """ - bucket_s3_path, moto_s3fs = self.prepare_test_data() - pattern = "*" - only_files = False - use_relative_paths = False - paths = hs3.listdir( - bucket_s3_path, - pattern, - only_files, - use_relative_paths, - aws_profile=moto_s3fs, - ) - paths.sort() - expected_paths = [ - "mock_bucket/depth_one", - "mock_bucket/depth_one/mock", - "mock_bucket/depth_one/mock/regular_mock3.txt", - "mock_bucket/depth_one/mock1.txt", - "mock_bucket/depth_one/mock2.gzip", - ] - self.assertListEqual(paths, expected_paths) - - def test_listdir4(self) -> None: - """ - Verify that all file paths are found. - """ - bucket_s3_path, moto_s3fs = self.prepare_test_data() - pattern = "*" - only_files = True - use_relative_paths = False - paths = hs3.listdir( - bucket_s3_path, - pattern, - only_files, - use_relative_paths, - aws_profile=moto_s3fs, - exclude_git_dirs=False, - ) - paths.sort() - expected_paths = [ - "mock_bucket/.git/git_mock3.txt", - "mock_bucket/depth_one/mock/regular_mock3.txt", - "mock_bucket/depth_one/mock1.txt", - "mock_bucket/depth_one/mock2.gzip", - ] - self.assertListEqual(paths, expected_paths) - - -# ############################################################################# -# TestDu1 -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -@pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", -) -class TestDu1(hmoto.S3Mock_TestCase): - def test_du1(self) -> None: - """ - Verify that total file size is returned. - """ - bucket_s3_path = f"s3://{self.bucket_name}" - depth_one_s3_path = f"{bucket_s3_path}/depth_one" - # Prepare test files. - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - first_s3_path = f"{bucket_s3_path}/mock1.txt" - lines = [b"line_mock\n"] * 150 - with moto_s3fs.open(first_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - second_s3_path = f"{depth_one_s3_path}/mock2.txt" - with moto_s3fs.open(second_s3_path, "wb") as s3_file: - # One level deeper to test recursive `du`. - s3_file.writelines(lines) - # Get multiple files. - size = hs3.du(bucket_s3_path, aws_profile=moto_s3fs) - expected_size = 3000 - self.assertEqual(size, expected_size) - size = hs3.du(depth_one_s3_path, aws_profile=moto_s3fs) - expected_size = 1500 - self.assertEqual(size, expected_size) - # Get exactly one file. - size = hs3.du(second_s3_path, aws_profile=moto_s3fs) - self.assertEqual(size, expected_size) - # Verify size in human-readable form. - size = hs3.du(bucket_s3_path, human_format=True, aws_profile=moto_s3fs) - expected_size = r"2.9 KB" - self.assert_equal(size, expected_size) - - -# ############################################################################# -# TestGenerateAwsFiles -# ############################################################################# - - -class TestGenerateAwsFiles(hunitest.TestCase): - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test(self) -> None: - self.setUp() - os.environ["MOCK_AWS_ACCESS_KEY_ID"] = "mock_access_key" - os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] = "mock_secret_access_key" - os.environ["MOCK_AWS_SESSION_TOKEN"] = "mock_session_token" - os.environ["MOCK_AWS_S3_BUCKET"] = "mock_s3_bucket" - os.environ["MOCK_AWS_DEFAULT_REGION"] = "mock_default_region" - # - os.environ["TEST_AWS_ACCESS_KEY_ID"] = "test_access_key" - os.environ["TEST_AWS_SECRET_ACCESS_KEY"] = "test_secret_access_key" - os.environ["TEST_AWS_SESSION_TOKEN"] = "test_session_token" - os.environ["TEST_AWS_S3_BUCKET"] = "test_s3_bucket" - os.environ["TEST_AWS_DEFAULT_REGION"] = "test_default_region" - # Generate AWS files with mock AWS profiles. - self._scratch_test_dir = self.get_scratch_space() - aws_profiles = ["mock", "test"] - hs3.generate_aws_files( - home_dir=self._scratch_test_dir, aws_profiles=aws_profiles - ) - - def tear_down_test(self) -> None: - del os.environ["MOCK_AWS_ACCESS_KEY_ID"] - del os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] - del os.environ["MOCK_AWS_SESSION_TOKEN"] - del os.environ["MOCK_AWS_S3_BUCKET"] - del os.environ["MOCK_AWS_DEFAULT_REGION"] - # - del os.environ["TEST_AWS_ACCESS_KEY_ID"] - del os.environ["TEST_AWS_SECRET_ACCESS_KEY"] - del os.environ["TEST_AWS_SESSION_TOKEN"] - del os.environ["TEST_AWS_S3_BUCKET"] - del os.environ["TEST_AWS_DEFAULT_REGION"] - - def helper(self, file_name: str, expected: str) -> None: - # Check. - target_dir = os.path.join(self._scratch_test_dir, ".aws") - actual = hio.from_file(os.path.join(target_dir, file_name)) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Check that AWS credentials file is generated correctly. - """ - file_name = "credentials" - expected = r""" - [mock] - aws_access_key_id=mock_access_key - aws_secret_access_key=mock_secret_access_key - aws_session_token=mock_session_token - aws_s3_bucket=mock_s3_bucket - - [test] - aws_access_key_id=test_access_key - aws_secret_access_key=test_secret_access_key - aws_session_token=test_session_token - aws_s3_bucket=test_s3_bucket - """ - self.helper(file_name, expected) - - def test2(self) -> None: - """ - Check that AWS config file is generated correctly. - """ - file_name = "config" - expected = """ - [profile mock] - region=mock_default_region - - [profile test] - region=test_default_region - """ - self.helper(file_name, expected) - - -# ############################################################################# - - -# ############################################################################# -# Test_get_s3_bucket_from_stage -# ############################################################################# - - -class Test_get_s3_bucket_from_stage(hunitest.TestCase): - def test1(self) -> None: - """ - Check for a valid stage. - """ - # Define arguments. - stage = "test" - # Run. - actual = hs3.get_s3_bucket_from_stage(stage) - expected = "cryptokaizen-data-test" - self.assert_equal(actual, expected) - - def test2(self) -> None: - """ - Check for a valid stage and optional suffix. - """ - # Define arguments. - stage = "preprod" - suffix = "suffix_test" - # Run. - actual = hs3.get_s3_bucket_from_stage(stage, add_suffix=suffix) - expected = "cryptokaizen-data.preprod/suffix_test" - self.assert_equal(actual, expected) - - def test3(self) -> None: - """ - Check Invalid stage. - """ - # Define arguments. - stage = "Invalid" - # Run. - with self.assertRaises(AssertionError) as cm: - hs3.get_s3_bucket_from_stage(stage) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 'Invalid' in '{'test': 'cryptokaizen-data-test', 'preprod': 'cryptokaizen-data.preprod', 'prod': 'cryptokaizen-data'}' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_s3_get_credentials1 -# ############################################################################# - - -@pytest.mark.requires_aws -@pytest.mark.requires_ck_infra -class Test_s3_get_credentials1(hunitest.TestCase): - def test1(self) -> None: - res = hs3.get_aws_credentials(_AWS_PROFILE) - _LOG.debug("res=%s", str(res)) - - -# ############################################################################# -# Test_s3_functions1 -# ############################################################################# - - -class Test_s3_functions1(hunitest.TestCase): - def test_extract_bucket_from_path1(self) -> None: - path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "tmp/TestCachingOnS3.test_with_caching1/joblib", - ) - bucket, path = hs3.split_path(path) - self.assert_equal(bucket, "cryptokaizen-unit-test") - self.assert_equal(path, "/tmp/TestCachingOnS3.test_with_caching1/joblib") - - -# ############################################################################# -# Test_s3_1 -# ############################################################################# - - -@pytest.mark.requires_aws -@pytest.mark.requires_ck_infra -class Test_s3_1(hunitest.TestCase): - def test_ls1(self) -> None: - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "README.md", - ) - _LOG.debug("file_path=%s", file_path) - # > aws s3 ls s3://***** - # PRE data/ - # 2021-04-06 1:17:44 48 README.md - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_names = s3fs.ls(file_path) - _LOG.debug("file_names=%s", file_names) - self.assertGreater(len(file_names), 0) - - @pytest.mark.requires_aws - @pytest.mark.requires_ck_infra - def test_glob1(self) -> None: - # > aws s3 ls s3://alphamatic-data/data/ib/metadata/ - # 2021-04-26 08:39:00 18791 exchanges-2021-04-01-134738089177.csv - # 2021-04-26 08:39:00 18815 exchanges-2021-04-01-143112738505.csv - # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-134738089177.csv - # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-143112738505.csv - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "data/ib/metadata", - ) - glob_pattern = file_path + "/exchanges-*" - _LOG.debug("glob_pattern=%s", glob_pattern) - file_names = s3fs.glob(glob_pattern) - _LOG.debug("file_names=%s", file_names) - self.assertGreater(len(file_names), 0) - - @pytest.mark.requires_aws - @pytest.mark.requires_ck_infra - def test_exists1(self) -> None: - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "README.md", - ) - _LOG.debug("file_path=%s", file_path) - actual = s3fs.exists(file_path) - expected = True - self.assertEqual(actual, expected) - - @pytest.mark.requires_aws - @pytest.mark.requires_ck_infra - def test_exists2(self) -> None: - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "README_does_not_exist.md", - ) - _LOG.debug("file_path=%s", file_path) - actual = s3fs.exists(file_path) - expected = False - self.assertEqual(actual, expected) - - @pytest.mark.requires_aws - @pytest.mark.requires_ck_infra - def test_exists3(self) -> None: - # > aws s3 ls alphamatic-data/data/ib/metadata/symbols-2021-04-01-143112738505.csv - # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-143112738505.csv - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "data/ib/metadata/symbols-2021-04-01-143112738505.csv", - ) - _LOG.debug("file_path=%s", file_path) - actual = s3fs.exists(file_path) - expected = True - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py deleted file mode 100644 index cc046ddac..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py +++ /dev/null @@ -1,209 +0,0 @@ -# TODO(gp): Use pytest.import_skip instead of all this machinery. -_HAS_MOTO = True -try: - import moto -except ImportError: - # `moto` may not be installed in a non-cmamp repo, so we skip it (see "DevTools376: - # Break 2022-02-22"). - import helpers.hgit as hgit - - assert not hgit.is_cmamp(), ( - "`cmamp` should have moto, while other repos are allowed to not have it)" - ) - _HAS_MOTO = False - -if _HAS_MOTO: - import json - import logging - import unittest.mock as umock - - import boto3 - import botocore - import pytest - - import helpers.hgit as hgit - import helpers.hs3 as hs3 - import helpers.hsecrets as hsecret - import helpers.hserver as hserver - import helpers.hunit_test as hunitest - - _LOG = logging.getLogger(__name__) - - # The `mock_aws` decorator ensures the calls to the AWS API are - # mocked. - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - class TestCreateClient(hunitest.TestCase): - def test_create_client1(self) -> None: - """ - Simple smoke test to verify connection to AWS. - """ - client = hsecret.get_secrets_client(aws_profile="ck") - self.assertIsInstance(client, botocore.client.BaseClient) - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - class TestGetSecret(hunitest.TestCase): - @moto.mock_aws - def test_get_secret(self) -> None: - """ - Verify that the secret can be retrieved correctly. - """ - # Make sure the region name matches the one used in `hsecret` profile. - client = boto3.client( - "secretsmanager", region_name=hs3.AWS_EUROPE_REGION_1 - ) - secret = {"testkey": "testvalue"} - secret_name = "test.local.sandbox.1" - client.create_secret( - Name=secret_name, SecretString=json.dumps(secret) - ) - self.assertDictEqual(hsecret.get_secret(secret_name), secret) - - @moto.mock_aws - @pytest.mark.skip( - reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." - ) - def test_trading_key(self) -> None: - """ - Verify locking mechanism for trading key is processed correctly. - """ - # Define test params. - secret_value = {"test.trading.key": "test.trading.value"} - secret_name = "test.trading.sandbox.1" - usedBy = "pytest" - hsecret.store_secret(secret_name, secret_value) - # Define expected values. - usedBy = hsecret._get_flag_value(usedBy) - expected = f"Secret key is already in use by {usedBy}" - # Call get secret to lock the key. - _ = hsecret.get_secret(secret_name) - # Recall get secret for same key to verify the lock. - try: - hsecret.get_secret(secret_name) - except RuntimeError as rte: - actual = str(rte) - self.assert_equal(actual, expected, fuzzy_match=True) - - @moto.mock_aws - @pytest.mark.skip( - reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." - ) - def test_lock_for_different_script(self) -> None: - """ - Verify locking mechanism for access to trading key is passed if - scripts are different. - """ - # Define test params. - secret_value = {"test.trading.key": "test.trading.value"} - secret_name = "test.trading.sandbox.1" - script1 = "pytest" - script2 = "run_system_observer.py" - hsecret.store_secret(secret_name, secret_value) - # Call get secret to lock the key with testing script. - _ = hsecret.get_secret(secret_name) - usedBy1 = hsecret._get_flag_value(script1) - # Define expected values. - usedBy2 = hsecret._get_flag_value(script2) - # Update secret value with expected usedBy script names. - secret_value["usedBy"] = [usedBy1, usedBy2] - # Call get secret for same key to verify the lock for mocked script. - with umock.patch("sys.argv", [script2]): - actual = hsecret.get_secret(secret_name) - self.assert_equal( - str(actual), expected=str(secret_value), fuzzy_match=True - ) - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - class TestStoreSecret(hunitest.TestCase): - @moto.mock_aws - def test_store_secret1(self) -> None: - """ - Verify that a secret can be stored correctly. - """ - secret = {"testkey": "testvalue"} - secret_name = "test.local.sandbox.1" - hsecret.store_secret(secret_name, secret) - # Make sure the region name matches the one used in `hsecret`. - client = boto3.client( - "secretsmanager", region_name=hs3.AWS_EUROPE_REGION_1 - ) - test_secret_value = json.loads( - client.get_secret_value(SecretId=secret_name)["SecretString"] - ) - self.assertDictEqual(test_secret_value, secret) - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - @pytest.mark.skip( - reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." - ) - class TestLockSecret(hunitest.TestCase): - @moto.mock_aws - def test_lock_secret(self) -> None: - """ - Verify that the lock secret function locks the key. - """ - # Define test params. - secret = {"testkey": "testvalue"} - secret_name = "test.local.sandbox.1" - hsecret.store_secret(secret_name, secret) - usedBy = "pytest" - # Lock the stored secret. - hsecret.lock_secret(secret_name, secret) - # Retry locking the same secret. - try: - hsecret.lock_secret(secret_name, secret) - except RuntimeError as rte: - usedBy = hsecret._get_flag_value(usedBy) - expected = f"Secret key is already in use by {usedBy}" - actual = str(rte) - self.assert_equal(actual, expected, fuzzy_match=True) - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - @pytest.mark.skip( - reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." - ) - class TestUpdateUsedby(hunitest.TestCase): - @moto.mock_aws - def test1(self) -> None: - """ - Verify that update_usedby updates value in secrets manager. - """ - # Define test params. - secret_value = {"testkey": "testvalue"} - secret_name = "test.local.sandbox.1" - usedBy = "pytest" - hsecret.store_secret(secret_name, secret_value) - # Define expected value. - expected = r""" - {'testkey': 'testvalue', 'usedBy': ['pytest']} - """ - # Run. - hsecret.update_usedby(secret_name, secret_value, usedBy) - actual = hsecret.get_secret(secret_name) - # Verify. - self.assert_equal(str(actual), expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py deleted file mode 100644 index 3e6a1ba7d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py +++ /dev/null @@ -1,321 +0,0 @@ -import logging - -import pytest - -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# _TestCase1 -# ############################################################################# - - -class _TestCase1: - # def test_config_func_to_str1(self) -> None: - # val = hserver.config_func_to_str() - # _LOG.info("val=\n%s", val) - # if self.exp_config_func_to_str is not None: - # self.assert_equal(val, self.exp_config_func_to_str) - - def test_consistency1(self) -> None: - hserver._dassert_setup_consistency() - - def test_is_host_csfy_server1(self) -> None: - val = hserver.is_host_csfy_server() - _LOG.info("val=\n%s", val) - if self.exp_is_host_csfy_server is not None: - self.assertEqual(val, self.exp_is_host_csfy_server) - - def test_is_host_mac1(self) -> None: - val = hserver.is_host_mac() - _LOG.info("val=\n%s", val) - if self.exp_is_host_mac is not None: - self.assertEqual(val, self.exp_is_host_mac) - - def test_get_docker_info1(self) -> None: - val = hserver.get_docker_info() - _LOG.info("val=\n%s", val) - # Remove the docker version since it is not stable. - val = hprint.filter_text("docker_version=", val) - if self.exp_get_docker_info is not None: - self.assert_equal(val, self.exp_get_docker_info) - - def test_get_setup_settings1(self) -> None: - setups = hserver._get_setup_settings() - val = hserver._setup_to_str(setups) - _LOG.info("val=\n%s", val) - if self.exp_get_setup_settings is not None: - self.assert_equal(val, self.exp_get_setup_settings) - - # def test_get_setup_signature1(self) -> None: - # val = hserver._get_setup_signature() - # _LOG.info("val=\n%s", val) - # if self.exp_get_setup_signature is not None: - # self.assert_equal(val, self.exp_get_setup_signature) - - def test_is_inside_ci1(self) -> None: - val = hserver.is_inside_ci() - _LOG.info("val=\n%s", val) - if self.exp_is_inside_ci is not None: - self.assertEqual(val, self.exp_is_inside_ci) - - -# ############################################################################# -# Test_hserver1 -# ############################################################################# - - -class Test_hserver1(_TestCase1, hunitest.TestCase): - """ - Smoke test without checking anything. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = None - self.exp_get_docker_info = None - self.exp_get_setup_settings = None - self.exp_get_setup_signature = None - self.exp_is_host_csfy_server = None - self.exp_is_host_mac = None - self.exp_is_inside_ci = None - - -# ############################################################################# -# Test_hserver_inside_ci1 -# ############################################################################# - - -@pytest.mark.skipif( - not hserver.is_inside_ci(), - reason="Config not matching", -) -class Test_hserver_inside_ci1(_TestCase1, hunitest.TestCase): - """ - Run tests inside CI. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = None - self.exp_get_docker_info = hprint.dedent(r""" - Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=True - has_docker_sibling_containers_support=True - has_docker_children_containers_support=True - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server False - is_outside_docker_container_on_csfy_server False - is_inside_docker_container_on_host_mac False - is_outside_docker_container_on_host_mac False - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci True - """) - self.exp_get_setup_signature = None - self.exp_is_host_csfy_server = False - self.exp_is_host_mac = False - self.exp_is_inside_ci = True - - -# ############################################################################# -# Test_hserver_inside_docker_container_on_csfy_server1 -# ############################################################################# - - -@pytest.mark.skipif( - not hserver.is_inside_docker_container_on_csfy_server(), - reason="Config not matching", -) -class Test_hserver_inside_docker_container_on_csfy_server1( - _TestCase1, hunitest.TestCase -): - """ - Run tests inside Docker container on a Causify dev server. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" - # Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=True - has_docker_sibling_containers_support=True - has_docker_children_containers_support=True - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server True - is_outside_docker_container_on_csfy_server False - is_inside_docker_container_on_host_mac False - is_outside_docker_container_on_host_mac False - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci False - """) - self.exp_get_setup_signature = "" - self.exp_is_host_csfy_server = True - self.exp_is_host_mac = False - self.exp_is_inside_ci = False - - -# ############################################################################# -# Test_hserver_outside_docker_container_on_csfy_server1 -# ############################################################################# - - -@pytest.mark.skipif( - not hserver.is_outside_docker_container_on_csfy_server(), - reason="Config not matching", -) -class Test_hserver_outside_docker_container_on_csfy_server1( - _TestCase1, hunitest.TestCase -): - """ - Run tests outside Docker container on a Causify dev server. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" - # Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=False - has_docker_sibling_containers_support=*undef* - has_docker_children_containers_support=*undef* - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server False - is_outside_docker_container_on_csfy_server True - is_inside_docker_container_on_host_mac False - is_outside_docker_container_on_host_mac False - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci False - """) - self.exp_get_setup_signature = "" - self.exp_is_host_csfy_server = True - self.exp_is_host_mac = False - self.exp_is_inside_ci = False - - -# ############################################################################# -# Test_hserver_inside_docker_container_on_gp_mac1 -# ############################################################################# - - -@pytest.mark.skipif( - not (hserver.is_inside_docker() and hserver.is_host_gp_mac()), - reason="Config not matching", -) -class Test_hserver_inside_docker_container_on_gp_mac1( - _TestCase1, hunitest.TestCase -): - """ - Run tests inside Docker container on GP's Mac. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" - # Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=True - has_docker_sibling_containers_support=True - has_docker_children_containers_support=True - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server False - is_outside_docker_container_on_csfy_server False - is_inside_docker_container_on_host_mac True - is_outside_docker_container_on_host_mac False - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci False - """) - self.exp_get_setup_signature = "" - self.exp_is_host_csfy_server = False - self.exp_is_host_mac = True - self.exp_is_inside_ci = False - - -# ############################################################################# -# Test_hserver_outside_docker_container_on_gp_mac1 -# ############################################################################# - - -@pytest.mark.skipif( - not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), - reason="Config not matching", -) -class Test_hserver_outside_docker_container_on_gp_mac1( - _TestCase1, hunitest.TestCase -): - """ - Run tests outside Docker container on GP's Mac. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" - # Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=False - has_docker_sibling_containers_support=*undef* - has_docker_children_containers_support=*undef* - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server False - is_outside_docker_container_on_csfy_server False - is_inside_docker_container_on_host_mac False - is_outside_docker_container_on_host_mac True - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci False - """) - self.exp_get_setup_signature = "" - self.exp_is_host_csfy_server = False - self.exp_is_host_mac = True - self.exp_is_inside_ci = False - - -# ############################################################################# - - -# TODO(gp): Add test mocking the environment variables in _get_setup_signature. -# We should have one class for each set up (e.g., outside Mac, outside Linux, -# inside Docker, inside CI, etc.) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py deleted file mode 100644 index 998b65c86..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py +++ /dev/null @@ -1,81 +0,0 @@ -import os -import unittest.mock as umock - -import helpers.hslack as hslack -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestSlackNotifier -# ############################################################################# - - -class TestSlackNotifier(hunitest.TestCase): - def test1(self) -> None: - """ - Check that `SlackNotifier` initializes with provided bot token. - """ - # Create notifier with explicit token. - notifier = hslack.SlackNotifier(bot_token="xoxb-test1-token") - self.assertEqual(notifier.bot_token, "xoxb-test1-token") - - def test2(self) -> None: - """ - Check that `SlackNotifier` initializes with environment variable token. - """ - # Mock environment variable and create notifier. - with umock.patch.dict( - os.environ, {"SLACK_BOT_TOKEN": "xoxb-test2-token"} - ): - notifier = hslack.SlackNotifier() - self.assertEqual(notifier.bot_token, "xoxb-test2-token") - - def test3(self) -> None: - """ - Check that `SlackNotifier` raises `ValueError` when no token is - provided. - """ - # Clear environment and verify initialization fails. - with umock.patch.dict(os.environ, {}, clear=True): - with self.assertRaises(ValueError) as cm: - hslack.SlackNotifier() - self.assertIn("No bot token provided", str(cm.exception)) - - def test4(self) -> None: - """ - Check that `send_message()` successfully sends message to Slack - channel. - """ - # Mock successful Slack API response. - with umock.patch("helpers.hslack.requests.post") as mock_post: - mock_response = umock.MagicMock() - mock_response.json.return_value = {"ok": True} - mock_response.raise_for_status.return_value = None - mock_post.return_value = mock_response - # Send message and verify API call. - notifier = hslack.SlackNotifier(bot_token="xoxb-test4-token") - notifier.send_message("#test4", "test4 message content") - # Verify request parameters. - mock_post.assert_called_once() - _, kwargs = mock_post.call_args - self.assertEqual(kwargs["json"]["channel"], "#test4") - self.assertEqual(kwargs["json"]["text"], "test4 message content") - - def test5(self) -> None: - """ - Check that `send_message()` raises `ValueError` on Slack API error. - """ - # Mock Slack API error response. - with umock.patch("helpers.hslack.requests.post") as mock_post: - mock_response = umock.MagicMock() - mock_response.json.return_value = { - "ok": False, - "error": "channel_not_found", - } - mock_response.raise_for_status.return_value = None - mock_post.return_value = mock_response - # Verify error is raised with correct message. - notifier = hslack.SlackNotifier(bot_token="xoxb-test5-token") - with self.assertRaises(ValueError) as cm: - notifier.send_message("#test5", "test5 message content") - self.assertIn("channel_not_found", str(cm.exception)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py deleted file mode 100644 index f6adba2f6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py +++ /dev/null @@ -1,29 +0,0 @@ -import helpers.hsql as hsql -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestCreateInOperator -# ############################################################################# - - -class TestCreateInOperator(hunitest.TestCase): - def test_create_in_operator1(self) -> None: - """ - Test creating IN operator for more than one value. - """ - values = ["binance", "ftx"] - column = "exchange_id" - actual = hsql.create_in_operator(values, column) - expected = "exchange_id IN ('binance','ftx')" - self.assertEqual(actual, expected) - - def test_create_in_operator2(self) -> None: - """ - Test creating IN operator for one value. - """ - values = ["ftx"] - column = "exchange_id" - actual = hsql.create_in_operator(values, column) - expected = "exchange_id IN ('ftx')" - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py deleted file mode 100644 index 1e5b4ff01..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py +++ /dev/null @@ -1,270 +0,0 @@ -import os -from typing import List, Tuple - -import helpers.hio as hio -import helpers.hstring as hstring -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestExtractVersionFromFileName -# ############################################################################# - - -class TestExtractVersionFromFileName(hunitest.TestCase): - def _test_extract_version_from_file_name( - self, version: str, expected: Tuple[int, int] - ) -> None: - """ - Verify function provides expected output on valid inputs. - - :param version: version in string format to input, e.g. 1.0 - :param expected: expected output version in (major, minor) - format - """ - fn = f"/app/datapull/ccxt/universe/download/universe_v{version}.json" - self.assertEqual(hstring.extract_version_from_file_name(fn), expected) - - def test_extract_version_from_file_name1(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("1.1", (1, 1)) - - def test_extract_version_from_file_name2(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("4", (4, 0)) - - def test_extract_version_from_file_name3(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("1.0", (1, 0)) - - def test_extract_version_from_file_name4(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("3.11", (3, 11)) - - def test_extract_version_from_file_name5(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("16.2", (16, 2)) - - def test_extract_version_from_file_name6(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("25.11", (25, 11)) - - def _test_extract_version_from_file_name_incorrect_format( - self, file_name: str - ) -> None: - """ - Helper function to verify function raises AssertionError on incorrect - input format. - - :param file_name: incorrect file_name to test - """ - expected_fail = "Can't parse file" - with self.assertRaises(AssertionError) as fail: - _ = hstring.extract_version_from_file_name(file_name) - self.assertIn(expected_fail, str(fail.exception)) - - def test_extract_version_from_file_name_incorrect_format1(self) -> None: - """ - Verify function raises AssertionError on incorrect input format. - """ - self._test_extract_version_from_file_name_incorrect_format("incorrect") - - def test_extract_version_from_file_name_incorrect_format2(self) -> None: - """ - Verify function raises AssertionError on incorrect input format. - """ - self._test_extract_version_from_file_name_incorrect_format( - "universe_vxx.json" - ) - - def test_extract_version_from_file_name_incorrect_format3(self) -> None: - """ - Verify function raises AssertionError on incorrect input format. - """ - self._test_extract_version_from_file_name_incorrect_format( - "universe_v.1.json" - ) - - def test_extract_version_from_file_name_incorrect_format4(self) -> None: - """ - Verify function raises AssertionError on incorrect input format. - """ - self._test_extract_version_from_file_name_incorrect_format( - "universe_11.json" - ) - - -# ############################################################################# -# TestGetDocstringLineIndices -# ############################################################################# - - -class TestGetDocstringLineIndices(hunitest.TestCase): - """ - Test determining which code lines are inside (doc)strings. - """ - - def helper(self, code: str, expected: List[str]) -> None: - lines = code.split("\n") - actual_idxs = hstring.get_docstring_line_indices(lines) - actual = [lines[i].strip() for i in actual_idxs] - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test one type of quotes. - """ - code = """ - def test_assert_equal1(self) -> None: - ''' - Test one. - ''' - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - s = ''' - Inside a string. - ''' - d = '''Does not count''' - self.check_string(actual) - - """ - expected = ["'''", "Test one.", "s = '''", "Inside a string."] - self.helper(code, expected) - - def test2(self) -> None: - """ - Test the second type of quotes. - """ - code = ''' - def test_assert_equal1(self) -> None: - """ - Test one. - """ - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - s = """ - Inside a string. - """ - d = """Does not count""" - self.check_string(actual) - - ''' - expected = ['"""', "Test one.", 's = """', "Inside a string."] - self.helper(code, expected) - - def test3(self) -> None: - """ - Test quotes within quotes. - """ - code = """ - def test_assert_equal1(self) -> None: - ''' - Test one. - """ - code += '''\ -""" - String within "Test one". - """ - ''' - code += """\ -''' - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - s = ''' - Inside a string. - ''' - d = '''Does not count''' - self.check_string(actual) - - """ - expected = [ - "'''", - "Test one.", - '"""', - 'String within "Test one".', - '"""', - "s = '''", - "Inside a string.", - ] - self.helper(code, expected) - - -# ############################################################################# -# TestGetCodeBlockLineIndices -# ############################################################################# - - -class TestGetCodeBlockLineIndices(hunitest.TestCase): - def helper(self, code: str, expected: List[str]) -> None: - lines = code.split("\n") - actual_idxs = hstring.get_code_block_line_indices(lines) - actual = [lines[i].strip() for i in actual_idxs] - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test getting code block line indices. - """ - code = """ - def test_assert_equal1(self) -> None: - ``` - Test one. - ``` - d = ```Does not count``` - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - """ - expected = ["```", "Test one."] - self.helper(code, expected) - - -# ############################################################################# -# TestGetDocstrings -# ############################################################################# - - -class TestGetDocstrings(hunitest.TestCase): - def test1(self) -> None: - """ - Test that grouped lines within docstrings are correctly returned. - """ - # Prepare inputs. - test_get_docstring_lines_input_dir = self.get_input_dir() - text_file_path = os.path.join( - test_get_docstring_lines_input_dir, "test.txt" - ) - text = hio.from_file(text_file_path) - lines = text.splitlines() - # Run. - actual = hstring.get_docstrings(lines) - # Check. - expected = [ - [1, 2, 3, 4, 5, 6], - [11, 12, 13, 14, 15, 16], - ] - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py deleted file mode 100644 index 4d2431bca..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py +++ /dev/null @@ -1,494 +0,0 @@ -import logging -import os -import platform -import re -import tempfile -from typing import List - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur - -_LOG = logging.getLogger(__name__) - - -def _get_ls_error_message(filename: str = "this_file_doesnt_exist") -> str: - """ - Get the expected error message for ls command for the current OS. - - :param filename: The filename that doesn't exist - """ - if platform.system() == "Darwin": - return f"ls: {filename}: No such file or directory" - elif platform.system() == "Linux": - return f"ls: cannot access '{filename}': No such file or directory" - raise RuntimeError(f"Unsupported OS: {platform.system()}") - -# ############################################################################# - - -# ############################################################################# -# Test_system1 -# ############################################################################# - - -class Test_system1(hunitest.TestCase): - def test1(self) -> None: - hsystem.system("ls") - - def test2(self) -> None: - hsystem.system("ls /dev/null", suppress_output=False) - - def test3(self) -> None: - """ - Output to a file. - """ - with tempfile.NamedTemporaryFile() as fp: - temp_file_name = fp.name - _LOG.debug("temp_file_name=%s", temp_file_name) - hsystem.system("ls", output_file=temp_file_name) - hdbg.dassert_path_exists(temp_file_name) - - def test4(self) -> None: - """ - Tee to a file. - """ - with tempfile.NamedTemporaryFile() as fp: - temp_file_name = fp.name - _LOG.debug("temp_file_name=%s", temp_file_name) - hsystem.system("ls", output_file=temp_file_name, tee=True) - hdbg.dassert_path_exists(temp_file_name) - - def test5(self) -> None: - """ - Test dry_run. - """ - temp_file_name = tempfile._get_default_tempdir() # type: ignore - candidate_name = tempfile._get_candidate_names() # type: ignore - temp_file_name += "/" + next(candidate_name) - _LOG.debug("temp_file_name=%s", temp_file_name) - hsystem.system("ls", output_file=temp_file_name, dry_run=True) - hdbg.dassert_path_not_exists(temp_file_name) - - def test6(self) -> None: - """ - Test abort_on_error=True. - """ - hsystem.system("ls this_file_doesnt_exist", abort_on_error=False) - - def test7(self) -> None: - """ - Test abort_on_error=True (default). - """ - with self.assertRaises(RuntimeError) as cm: - hsystem.system("ls this_file_doesnt_exist") - actual = str(cm.exception) - # Different systems return different rc. - actual = re.sub(r"rc='\d+'", "rc=''", actual) - # Use OS-specific expected error message. - error_msg = _get_ls_error_message() - expected = f""" - ################################################################################ - ################################################################################ - _system() failed - ################################################################################ - ################################################################################ - # _system: cmd='(ls this_file_doesnt_exist) 2>&1', print_command=False, abort_on_error=True, suppress_error=None, suppress_output=True, blocking=True, wrapper=None, output_file=None, num_error_lines=30, tee=False, dry_run=False, log_level=10 - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - cmd='(ls this_file_doesnt_exist) 2>&1' - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - rc='' - - output=' - {error_msg} - ' - - Output saved in 'tmp.system_output.txt' - - Command saved in 'tmp.system_cmd.sh' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test8(self) -> None: - """ - Check that an assert error is raised when `tee` is passed without a log - file. - """ - with self.assertRaises(AssertionError) as cm: - _ = hsystem.system("ls this_should_fail", tee=True) - actual = str(cm.exception) - expected = r""" - ################################################################################ - * Failed assertion * - 'True' implies 'False' - ################################################################################ - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test9(self) -> None: - """ - Check that the failing command fails and logs are stored in the log - file. - - - `allow_errors = False` - - `tee = True` - - Log file path is passed - """ - log_dir = self.get_scratch_space() - log_file_path = os.path.join(log_dir, "tee_log") - with self.assertRaises(RuntimeError) as cm: - _ = hsystem.system( - "ls this_should_fail", tee=True, output_file=log_file_path - ) - actual = str(cm.exception) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # Normalize rc value (differs across systems). - actual = re.sub(r"rc='\d+'", "rc=''", actual) - # Check log output contains the OS-specific error message. - actual = hio.from_file(log_file_path) - error_msg = _get_ls_error_message("this_should_fail") - expected = error_msg + "\n" - self.assert_equal(actual, expected) - - def test10(self) -> None: - """ - Check that the failing command passes and logs are stored in the log - file. - - - `allow_errors = True` - - `tee = True` - - Log file path is passed - """ - log_dir = self.get_scratch_space() - log_file_path = os.path.join(log_dir, "tee_log") - rc = hsystem.system( - "ls this_should_fail", - tee=True, - abort_on_error=False, - output_file=log_file_path, - ) - self.assertNotEqual(rc, 0) - # Check log output. - actual = hio.from_file(log_file_path) - # Use OS-specific expected error message. - error_msg = _get_ls_error_message("this_should_fail") - expected = error_msg + "\n" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_system2 -# ############################################################################# - - -class Test_system2(hunitest.TestCase): - def test_get_user_name(self) -> None: - actual = hsystem.get_user_name() - _LOG.debug("actual=%s", actual) - # - expected = hsystem.system_to_string("whoami")[1] - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - # - expected = hsystem.system_to_one_line("whoami")[1] - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - - def test_get_server_name(self) -> None: - actual = hsystem.get_server_name() - _LOG.debug("actual=%s", actual) - # - expected = hsystem.system_to_string("uname -n")[1] - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - - def test_get_os_name(self) -> None: - actual = hsystem.get_os_name() - _LOG.debug("actual=%s", actual) - # - expected = hsystem.system_to_string("uname -s")[1] - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - - -# ############################################################################# - - -# ############################################################################# -# Test_compute_file_signature1 -# ############################################################################# - - -class Test_compute_file_signature1(hunitest.TestCase): - def test1(self) -> None: - """ - Compute the signature of a file using 1 enclosing dir. - """ - file_name = ( - "/app/amp/core/test/TestCheckSameConfigs." - + "test_check_same_configs_error/output/test.txt" - ) - dir_depth = 1 - actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) - expected = ["output", "test.txt"] - self.assert_equal(str(actual), str(expected)) - - def test2(self) -> None: - """ - Compute the signature of a file using 2 enclosing dirs. - """ - file_name = ( - "/app/amp/core/test/TestCheckSameConfigs." - + "test_check_same_configs_error/output/test.txt" - ) - dir_depth = 2 - actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) - expected = [ - "TestCheckSameConfigs.test_check_same_configs_error", - "output", - "test.txt", - ] - self.assert_equal(str(actual), str(expected)) - - def test3(self) -> None: - """ - Compute the signature of a file using 4 enclosing dirs. - """ - file_name = "/app/amp/core/test/TestApplyAdfTest.test1/output/test.txt" - dir_depth = 4 - actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) - expected = [ - "core", - "test", - "TestApplyAdfTest.test1", - "output", - "test.txt", - ] - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# - - -# ############################################################################# -# Test_find_file_with_dir1 -# ############################################################################# - - -class Test_find_file_with_dir1(hunitest.TestCase): - def test1(self) -> None: - """ - Check whether we can find this file using one enclosing dir. - """ - # Use this file. - file_name = "helpers/test/test_hsystem.py" - dir_depth = 1 - actual = hsystem.find_file_with_dir(file_name, dir_depth=dir_depth) - expected = r"""['helpers/test/test_hsystem.py']""" - self.assert_equal(str(actual), str(expected), purify_text=True) - - def _helper(self, dir_depth: int, mode: str) -> List[str]: - """ - Test helper for find_file_with_dir. - - :param dir_depth: Number of directory levels to use for matching - :param mode: Search mode for matching - :return: List of matching files - """ - # Create a fake golden outcome to be used in this test. - golden_content = "hello world" - self.check_string(golden_content) - # E.g., helpers/test/test_hsystem.py::Test_find_file_with_dir1::test2/test.txt - file_name = os.path.join(self.get_output_dir(), "test.txt") - _LOG.debug("file_name=%s", file_name) - actual = hsystem.find_file_with_dir( - file_name, dir_depth=dir_depth, mode=mode - ) - _LOG.debug("Found %d matching files", len(actual)) - return actual - - def test2(self) -> None: - """ - Check whether we can find a test golden output using different number - of enclosing dirs. - - With only 1 enclosing dir, we can't find it. - """ - # Use only one dir which is not enough to identify the file. - # E.g., .../test/TestSqlWriterBackend1.test_insert_tick_data1/output/test.txt - dir_depth = 1 - mode = "return_all_results" - actual = self._helper(dir_depth, mode) - # For sure there are more than 100 tests. - self.assertGreater(len(actual), 100) - - def test3(self) -> None: - """ - Like `test2`, but using 2 levels for sure we are going to identify the - file. - """ - dir_depth = 2 - mode = "return_all_results" - actual = self._helper(dir_depth, mode) - _LOG.debug("Found %d matching files", len(actual)) - # There should be a single match. - expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt']""" - self.assert_equal(str(actual), str(expected), purify_text=True) - self.assertEqual(len(actual), 1) - - def test4(self) -> None: - """ - Like `test2`, but using 2 levels for sure we are going to identify the - file and asserting in case we don't find a single result. - """ - dir_depth = 2 - mode = "assert_unless_one_result" - actual = self._helper(dir_depth, mode) - _LOG.debug("Found %d matching files", len(actual)) - # There should be a single match. - expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt']""" - self.assert_equal(str(actual), str(expected), purify_text=True) - self.assertEqual(len(actual), 1) - - def test5(self) -> None: - """ - Like `test2`, using more level than 2, again, we should have a single - result. - """ - dir_depth = 3 - mode = "assert_unless_one_result" - actual = self._helper(dir_depth, mode) - _LOG.debug("Found %d matching files", len(actual)) - expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt']""" - self.assert_equal(str(actual), str(expected), purify_text=True) - self.assertEqual(len(actual), 1) - - -# ############################################################################# - - -# ############################################################################# -# Test_Linux_commands1 -# ############################################################################# - - -class Test_Linux_commands1(hunitest.TestCase): - def test_du1(self) -> None: - hsystem.du(".") - - -# ############################################################################# - - -# ############################################################################# -# Test_has_timestamp1 -# ############################################################################# - - -class Test_has_timestamp1(hunitest.TestCase): - def test_has_not_timestamp1(self) -> None: - """ - No timestamp. - """ - file_name = "patch.amp.8c5a2da9.tgz" - actual = hsystem.has_timestamp(file_name) - expected = False - self.assertEqual(actual, expected) - - def test_has_timestamp1(self) -> None: - """ - Valid timestamp. - """ - file_name = "patch.amp.8c5a2da9.20210725_225857.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - def test_has_timestamp2(self) -> None: - """ - Valid timestamp. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725-22_58_57.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - def test_has_timestamp3(self) -> None: - """ - Valid timestamp. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725225857.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - def test_has_timestamp4(self) -> None: - """ - Valid timestamp. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_22_58_57.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - def test_has_timestamp5(self) -> None: - """ - Valid timestamp. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725225857.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - -# ############################################################################# -# Test_append_timestamp_tag1 -# ############################################################################# - - -class Test_append_timestamp_tag1(hunitest.TestCase): - def test_no_timestamp1(self) -> None: - """ - Invalid timestamp, with no tag. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.tgz" - tag = "" - actual = hsystem.append_timestamp_tag(file_name, tag) - # /foo/bar/patch.amp.8c5a2da9.20210726-15_11_25.tgz - expected = r"/foo/bar/patch.amp.8c5a2da9.\S+.tgz" - self.assertRegex(actual, expected) - - def test_no_timestamp2(self) -> None: - """ - Invalid timestamp, with no tag. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.tgz" - tag = "hello" - actual = hsystem.append_timestamp_tag(file_name, tag) - # /foo/bar/patch.amp.8c5a2da9.20210726-15_11_25.hello.tgz - expected = r"/foo/bar/patch.amp.8c5a2da9.\S+.hello.tgz" - self.assertRegex(actual, expected) - - def test1(self) -> None: - """ - Valid timestamp, with no tag. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" - tag = "" - actual = hsystem.append_timestamp_tag(file_name, tag) - # /foo/bar/patch.amp.8c5a2da9.20210725_225857.20210726-15_11_25.tgz - expected = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - Valid timestamp, with a tag. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" - tag = "hello" - actual = hsystem.append_timestamp_tag(file_name, tag) - expected = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.hello.tgz" - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py deleted file mode 100644 index 385de303a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py +++ /dev/null @@ -1,159 +0,0 @@ -import logging - -import helpers.hprint as hprint -import helpers.htable as htable -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestTable1 -# ############################################################################# - - -class TestTable1(hunitest.TestCase): - # ######################################################################### - - @staticmethod - def _get_table() -> htable.Table: - txt = """completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests""" - cols = ["status", "outcome", "descr", "workflow"] - # table = [line for line in csv.reader(txt.split("\n"), delimiter=' ')] - # _LOG.debug(hprint.to_str("table")) - # _LOG.debug("size=%s", str(htable.size(table))) - table = htable.Table.from_text(cols, txt, delimiter=" ") - return table - - def test_from_text1(self) -> None: - table = self._get_table() - self.assertIsInstance(table, htable.Table) - _LOG.debug(hprint.to_str("table")) - - def test_from_text_invalid1(self) -> None: - txt = """completed failure Lint Run_linter -completed success Lint -completed success Lint Slow_tests""" - cols = ["status", "outcome", "descr", "workflow"] - with self.assertRaises(AssertionError) as cm: - htable.Table.from_text(cols, txt, delimiter=" ") - actual = str(cm.exception) - expected = """ - * Failed assertion * - '3' - == - '4' - Invalid row='['completed', 'success', 'Lint']' for cols='['status', 'outcome', 'descr', 'workflow']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_from_text_invalid2(self) -> None: - txt = """completed failure Lint Run_linter - completed success Lint Fast_tess - completed success Lint Slow_tests""" - cols = ["status", "outcome", "descr", "workflow", "EXTRA"] - with self.assertRaises(AssertionError) as cm: - htable.Table.from_text(cols, txt, delimiter=" ") - actual = str(cm.exception) - expected = """ - * Failed assertion * - '4' - == - '5' - Invalid row='['completed', 'failure', 'Lint', 'Run_linter']' for cols='['status', 'outcome', 'descr', 'workflow', 'EXTRA']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - # ######################################################################### - - def test_repr1(self) -> None: - table = self._get_table() - actual = repr(table) - expected = r""" -cols=['status', 'outcome', 'descr', 'workflow'] -table= -['completed', 'failure', 'Lint', 'Run_linter'] -['completed', 'success', 'Lint', 'Fast_tests'] -['completed', 'success', 'Lint', 'Slow_tests'] -size=(3, 4) -""" - expected = expected.rstrip().lstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - - def test_str1(self) -> None: - table = self._get_table() - actual = str(table) - expected = r""" -status | outcome | descr | workflow | ---------- | ------- | ----- | ---------- | -completed | failure | Lint | Run_linter | -completed | success | Lint | Fast_tests | -completed | success | Lint | Slow_tests | -""" - expected = expected.rstrip().lstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - - # ######################################################################### - - def test_filter_table1(self) -> None: - """ - Filter resulting in a single matching row. - """ - table = self._get_table() - # - table_filter = table.filter_rows("outcome", "failure") - expected = r""" -cols=['status', 'outcome', 'descr', 'workflow'] -table= -['completed', 'failure', 'Lint', 'Run_linter'] -size=(1, 4) -""" - actual = repr(table_filter) - expected = expected.rstrip().lstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - - def test_filter_table2(self) -> None: - """ - Filter resulting in no matches. - """ - table = self._get_table() - # - table_filter = table.filter_rows("status", "in progress") - expected = r""" -cols=['status', 'outcome', 'descr', 'workflow'] -table= - -size=(0, 4) -""" - actual = repr(table_filter) - expected = expected.rstrip().lstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - - def test_filter_table3(self) -> None: - """ - Filter with a column constant using the constant value. - """ - table = self._get_table() - # - table_filter = table.filter_rows("descr", "Lint") - actual = repr(table_filter) - expected = repr(table) - self.assert_equal(actual, expected, fuzzy_match=False) - - # ######################################################################### - - def test_unique1(self) -> None: - table = self._get_table() - # - actual = table.unique("descr") - expected = ["Lint"] - self.assert_equal(str(actual), str(expected), fuzzy_match=False) - - def test_unique2(self) -> None: - table = self._get_table() - # - actual = table.unique("workflow") - expected = ["Fast_tests", "Run_linter", "Slow_tests"] - self.assert_equal(str(actual), str(expected), fuzzy_match=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py deleted file mode 100644 index fa2059b0b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py +++ /dev/null @@ -1,578 +0,0 @@ -import logging - -import helpers.hprint as hprint -import helpers.htext_protect as htexprot -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test__extract_protected_content -# ############################################################################# - - -class Test__extract_protected_content(hunitest.TestCase): - """ - Test the extract_protected_content function. - """ - - def helper( - self, - txt: str, - file_type: str, - expected_txt: str, - expected_map_size: int, - ) -> None: - """ - Test helper for extract_protected_content. - - :param txt: Input text to process - :param file_type: File type ('md', 'txt', or 'tex') - :param expected_txt: Expected output text with placeholders - :param expected_map_size: Expected number of protected items - """ - # Prepare inputs. - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Run test. - actual_lines, protected_map = htexprot.extract_protected_content( - lines, file_type - ) - # Check outputs. - actual = "\n".join(actual_lines) - expected = hprint.dedent( - expected_txt, remove_lead_trail_empty_lines_=True - ) - self.assert_equal(actual, expected) - self.assertEqual(len(protected_map), expected_map_size) - - def test1(self) -> None: - """ - Test extracting single fenced block with content. - """ - # Prepare inputs. - txt = """ - Some text here. - ```python - def foo(): - return 42 - ``` - More text. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Some text here. - ```python - <<>> - ``` - More text. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test2(self) -> None: - """ - Test extracting multiple fenced blocks. - """ - # Prepare inputs. - txt = """ - Text. - ```python - code1 - ``` - Middle. - ```javascript - code2 - ``` - End. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text. - ```python - <<>> - ``` - Middle. - ```javascript - <<>> - ``` - End. - """ - expected_map_size = 2 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test3(self) -> None: - """ - Test extracting empty fenced block. - """ - # Prepare inputs. - txt = """ - Text before. - ```python - ``` - Text after. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text before. - ```python - <<>> - ``` - Text after. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test4(self) -> None: - """ - Test extracting fenced blocks with different languages. - """ - # Prepare inputs. - txt = """ - ```python - python_code - ``` - ```javascript - js_code - ``` - ```bash - bash_code - ``` - """ - file_type = "md" - # Prepare outputs. - expected = """ - ```python - <<>> - ``` - ```javascript - <<>> - ``` - ```bash - <<>> - ``` - """ - expected_map_size = 3 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test5(self) -> None: - """ - Test extracting HTML single-line comment. - """ - # Prepare inputs. - txt = """ - Text before. - - Text after. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text before. - <<>> - Text after. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test6(self) -> None: - """ - Test extracting HTML multi-line comment. - """ - # Prepare inputs. - txt = """ - Text before. - - Text after. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text before. - <<>> - Text after. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test7(self) -> None: - """ - Test extracting LaTeX comment. - """ - # Prepare inputs. - txt = """ - Some LaTeX text. - % This is a LaTeX comment - More text. - """ - file_type = "tex" - # Prepare outputs. - expected = """ - Some LaTeX text. - <<>> - More text. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test8(self) -> None: - """ - Test extracting math block. - """ - # Prepare inputs. - txt = """ - Text before. - $$ - E = mc^2 - $$ - Text after. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text before. - $$ - <<>> - $$ - Text after. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test9(self) -> None: - """ - Test fenced block not extracted for tex files. - """ - # Prepare inputs. - txt = """ - LaTeX text. - ``` - This should not be extracted for tex files - ``` - More text. - """ - file_type = "tex" - # Prepare outputs. - expected = """ - LaTeX text. - ``` - This should not be extracted for tex files - ``` - More text. - """ - expected_map_size = 0 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test10(self) -> None: - """ - Test mixed content (fenced blocks + comments + normal text). - """ - # Prepare inputs. - txt = """ - # Title - Some text. - ```python - code here - ``` - - $$ - math here - $$ - End. - """ - file_type = "md" - # Prepare outputs. - expected = """ - # Title - Some text. - ```python - <<>> - ``` - <<>> - $$ - <<>> - $$ - End. - """ - expected_map_size = 3 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - -# ############################################################################# -# Test__restore_protected_content -# ############################################################################# - - -class Test__restore_protected_content(hunitest.TestCase): - """ - Test the restore_protected_content function. - """ - - def helper( - self, - txt: str, - protected_map: dict, - expected_txt: str, - ) -> None: - """ - Test helper for restore_protected_content. - - :param txt: Input text with placeholders - :param protected_map: Mapping of placeholders to original content - :param expected_txt: Expected output with restored content - """ - # Prepare inputs. - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Run test. - actual_lines = htexprot.restore_protected_content(lines, protected_map) - # Check outputs. - actual = "\n".join(actual_lines) - expected = hprint.dedent( - expected_txt, remove_lead_trail_empty_lines_=True - ) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test restoring single placeholder. - """ - # Prepare inputs. - txt = """ - Text before. - ```python - <<>> - ``` - Text after. - """ - protected_map = { - "<<>>": "def foo():\n return 42" - } - # Prepare outputs. - expected = """ - Text before. - ```python - def foo(): - return 42 - ``` - Text after. - """ - # Run test. - self.helper(txt, protected_map, expected) - - def test2(self) -> None: - """ - Test restoring multiple placeholders. - """ - # Prepare inputs. - txt = """ - ```python - <<>> - ``` - <<>> - ``` - <<>> - ``` - """ - protected_map = { - "<<>>": "code1", - "<<>>": "", - "<<>>": "code2", - } - # Prepare outputs. - expected = """ - ```python - code1 - ``` - - ``` - code2 - ``` - """ - # Run test. - self.helper(txt, protected_map, expected) - - def test3(self) -> None: - """ - Test restoring multi-line content from single placeholder. - """ - # Prepare inputs. - txt = """ - Text. - <<>> - More text. - """ - protected_map = { - "<<>>": "" - } - # Prepare outputs. - expected = """ - Text. - - More text. - """ - # Run test. - self.helper(txt, protected_map, expected) - - def test4(self) -> None: - """ - Test with empty map (no-op). - """ - # Prepare inputs. - txt = """ - Text line 1. - Text line 2. - Text line 3. - """ - protected_map = {} - # Prepare outputs. - expected = """ - Text line 1. - Text line 2. - Text line 3. - """ - # Run test. - self.helper(txt, protected_map, expected) - - def test5(self) -> None: - """ - Test restoring empty content. - """ - # Prepare inputs. - txt = """ - Before. - ``` - <<>> - ``` - After. - """ - protected_map = {"<<>>": ""} - # Prepare outputs. - expected = """ - Before. - ``` - - ``` - After. - """ - # Run test. - self.helper(txt, protected_map, expected) - - -# ############################################################################# -# Test_extract_restore_roundtrip -# ############################################################################# - - -class Test_extract_restore_roundtrip(hunitest.TestCase): - """ - Test that extract followed by restore is identity operation. - """ - - def helper(self, txt: str, file_type: str) -> None: - """ - Test helper for roundtrip (extract then restore). - - :param txt: Input text - :param file_type: File type ('md', 'txt', or 'tex') - """ - # Prepare inputs. - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - original = "\n".join(lines) - # Run test. - extracted_lines, protected_map = htexprot.extract_protected_content( - lines, file_type - ) - restored_lines = htexprot.restore_protected_content( - extracted_lines, protected_map - ) - # Check outputs. - actual = "\n".join(restored_lines) - self.assert_equal(actual, original) - - def test1(self) -> None: - """ - Test roundtrip with fenced blocks. - """ - # Prepare inputs. - txt = """ - # Title - Some text. - ```python - def foo(): - return 42 - ``` - More text. - """ - file_type = "md" - # Run test. - self.helper(txt, file_type) - - def test2(self) -> None: - """ - Test roundtrip with mixed content. - """ - # Prepare inputs. - txt = """ - Text. - ```python - code - ``` - - $$ - E = mc^2 - $$ - End. - """ - file_type = "md" - # Run test. - self.helper(txt, file_type) - - def test3(self) -> None: - """ - Test roundtrip with LaTeX comments. - """ - # Prepare inputs. - txt = """ - LaTeX text. - % Comment 1 - More text. - % Comment 2 - End. - """ - file_type = "tex" - # Run test. - self.helper(txt, file_type) - - def test4(self) -> None: - """ - Test roundtrip with no protected content. - """ - # Prepare inputs. - txt = """ - Just regular text. - No special content here. - Just plain paragraphs. - """ - file_type = "md" - # Run test. - self.helper(txt, file_type) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py deleted file mode 100644 index ff57a87c0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py +++ /dev/null @@ -1,24 +0,0 @@ -import logging -import time - -import helpers.htimer as htimer -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestTimedScope -# ############################################################################# - - -class TestTimedScope(hunitest.TestCase): - def test_1(self) -> None: - """ - Test that elapsed time is correctly computed. - """ - # Run the function to test. - with htimer.TimedScope(logging.INFO, "Test") as ts: - time.sleep(1) - # Round actual time up to 1 decimal and compare it with expected. - actual_rounded_time = round(ts.elapsed_time, 1) - expected_rounded_time = 1.0 - self.assertEqual(actual_rounded_time, expected_rounded_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py deleted file mode 100644 index 808a2221e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py +++ /dev/null @@ -1,474 +0,0 @@ -import logging -from typing import List - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.htraceback as htraceb -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_Traceback1 -# ############################################################################# - - -class Test_Traceback1(hunitest.TestCase): - def test_parse0(self) -> None: - txt = """ - - TEST - Traceback - TEST - Traceback (most recent call last): - File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": - NameError: name 'repo_short_name' is not defined - TEST TEST TEST - """ - txt = hprint.dedent(txt) - _LOG.debug("txt=\n%s", txt) - purify_from_client = False - # Run the function under test. - act_cfile, act_traceback = htraceb.parse_traceback( - txt, purify_from_client=purify_from_client - ) - # Check. - exp_traceback = """Traceback (most recent call last): - File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": -NameError: name 'repo_short_name' is not defined - TEST TEST TEST""" - self.assertEqual(act_traceback, exp_traceback) - - # pylint: disable=line-too-long - # TODO(gp): Add test and fix for the following traceback: - - # Bug1: - # Traceback (most recent call last): - # File "/Users/saggese/src/venv/amp.client_venv/bin/invoke", line 8, in - # sys.exit(program.run()) - # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 373, in run - # self.parse_collection() - # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 465, in parse_collection - # self.load_collection() - # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 696, in load_collection - # module, parent = loader.load(coll_name) - # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/loader.py", line 76, in load - # module = imp.load_module(name, fd, path, desc) - # File "/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/imp.py", line 234, in load_module - # return load_source(name, filename, file) - # File "/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/imp.py", line 171, in load_source - # module = _load(spec) - # File "", line 711, in _load - # File "", line 680, in _load_unlocked - # File "", line 855, in exec_module - # File "", line 228, in _call_with_frames_removed - # File "/Users/saggese/src/lem1/amp/tasks.py", line 8, in - # from helpers.lib_tasks import set_default_params # This is not an invoke target. - # File "/Users/saggese/src/lem1/amp/helpers/lib_tasks.py", line 23, in - # import helpers.hgit as hgit - # File "/Users/saggese/src/lem1/amp/helpers/git.py", line 16, in - # import helpers.hsystem as hsystem - # File "/Users/saggese/src/lem1/amp/helpers/system_interaction.py", line 529 - # signature2 = _compute_file_signature(file_name, dir_depth) - # ^ - # SyntaxError: invalid syntax - # Traceback (most recent call last): - # File "/Users/saggese/src/lem1/amp/dev_scripts/tg.py", line 21, in - # import helpers.hsystem as hsystem - # File "/Users/saggese/src/lem1/amp/helpers/system_interaction.py", line 529 - # signature2 = _compute_file_signature(file_name, dir_depth) - # ^ - # SyntaxError: invalid syntax - - # Bug2: - # Traceback (most recent call last): - # File "/app/amp/dataflow/pipelines/real_time/test/test_dataflow_amp_real_time_pipeline.py", line 46, in test1 - # ) = mdmdinex.get_ReplayedTimeMarketData_example2( - # TypeError: get_ReplayedTimeMarketData_example2() got an unexpected keyword argument 'df' - # - # 13:34:45 INFO traceback_to_cfile : _main : 76 : in_file_name=log.txt - # 13:34:45 INFO parser : read_file : 304 : Reading from 'log.txt' - # 13:34:45 ERROR traceback_to_cfile : _main : 87 : Can't find traceback in the file - - # Bug3: - # =================================== FAILURES =================================== - # _________________________ TestGetDataForInterval.test1 _________________________ - # Traceback (most recent call last): - # File "/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3361, in get_loc - # return self._engine.get_loc(casted_key) - # File "pandas/_libs/index.pyx", line 76, in pandas._libs.index.IndexEngine.get_loc - # File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc - # File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item - # File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item - # KeyError: 'end_ts' - # - # The above exception was the direct cause of the following exception: - # - # Traceback (most recent call last): - # File "/app/amp/market_data/test/test_market_data_client.py", line 46, in test1 - # data = market_data_client.get_data_for_interval( - # File "/app/amp/market_data/market_data.py", line 212, in get_data_for_interval - # df = self._get_data( - # File "/app/amp/market_data/market_data_client.py", line 93, in _get_data - # market_data["start_ts"] = market_data["end_ts"] - pd.Timedelta( - # File "/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 3458, in __getitem__ - # indexer = self.columns.get_loc(key) - # File "/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3363, in get_loc - # raise KeyError(key) from err - # KeyError: 'end_ts' - - # Bug4: - # dataflow/model/test/test_experiment_utils.py::Test_get_configs_from_command_line1::test1 (0.01 s) FAILED [100%] - # - # =================================== FAILURES =================================== - # __________________ Test_get_configs_from_command_line1.test1 ___________________ - # Traceback (most recent call last): - # File "/app/dataflow/model/test/test_experiment_utils.py", line 35, in test1 - # configs = dtfmoexuti.get_configs_from_command_line(args) - # File "/app/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - # configs = cconfig.get_configs_from_builder(config_builder) - # File "/app/config_root/config/builder.py", line 48, in get_configs_from_builder - # imp = importlib.import_module(import_) - # File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - # return _bootstrap._gcd_import(name[level:], package, level) - # File "", line 1014, in _gcd_import - # File "", line 991, in _find_and_load - # File "", line 961, in _find_and_load_unlocked - # File "", line 219, in _call_with_frames_removed - # File "", line 1014, in _gcd_import - # File "", line 991, in _find_and_load - # File "", line 961, in _find_and_load_unlocked - # File "", line 219, in _call_with_frames_removed - # File "", line 1014, in _gcd_import - # File "", line 991, in _find_and_load - # File "", line 973, in _find_and_load_unlocked - # ModuleNotFoundError: No module named 'research' - # ============================= slowest 3 durations ============================== - - # pylint: enable=line-too-long - - def _parse_traceback_helper( - self, - txt: str, - purify_from_client: bool, - exp_cfile: str, - exp_traceback: str, - ) -> None: - hdbg.dassert_isinstance(txt, str) - hdbg.dassert_isinstance(exp_cfile, str) - hdbg.dassert_isinstance(exp_traceback, str) - txt = hprint.dedent(txt) - _LOG.debug("txt=\n%s", txt) - # Run the function under test. - act_cfile, act_traceback = htraceb.parse_traceback( - txt, purify_from_client=purify_from_client - ) - _LOG.debug("act_cfile=\n%s", act_cfile) - _LOG.debug("act_traceback=\n%s", act_traceback) - # Compare cfile. - act_cfile = htraceb.cfile_to_str(act_cfile) - exp_cfile = hprint.dedent(exp_cfile) - _LOG.debug(hprint.to_str("exp_cfile act_cfile")) - self.assert_equal( - act_cfile, exp_cfile, fuzzy_match=True, purify_text=True - ) - # Compare traceback. - # Handle `None`. - act_traceback = str(act_traceback) - exp_traceback = hprint.dedent(exp_traceback) - _LOG.debug(hprint.to_str("exp_traceback act_traceback")) - self.assert_equal( - act_traceback, exp_traceback, fuzzy_match=True, purify_text=True - ) - - def test_parse1(self) -> None: - """ - Parse traceback with all files from Docker that actually exist in the - current repo. - """ - txt = """ - - TEST - Traceback - TEST - Traceback (most recent call last): - File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": - NameError: name 'repo_short_name' is not defined - TEST TEST TEST - """ - purify_from_client = False - # pylint: disable=line-too-long - exp_cfile = [ - ( - "$GIT_ROOT/helpers/test/test_lib_tasks.py", - 27, - "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)", - ), - ( - "$GIT_ROOT/helpers/lib_tasks.py", - 1265, - "_get_gh_issue_title:task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name)", - ), - ( - "$GIT_ROOT/helpers/git.py", - 397, - 'get_task_prefix_from_repo_short_name:if repo_short_name == "amp":', - ), - ] - exp_cfile = htraceb.cfile_to_str(exp_cfile) - # pylint: enable=line-too-long - exp_traceback = """ - Traceback (most recent call last): - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "$GIT_ROOT/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "$GIT_ROOT/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": - NameError: name 'repo_short_name' is not defined - TEST TEST TEST - """ - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse_empty_traceback1(self) -> None: - """ - Parse an empty traceback file. - """ - txt = """ - - TEST - Traceback - TEST TEST TEST - """ - purify_from_client = True - exp_cfile: List[htraceb.CfileRow] = [] - exp_cfile = htraceb.cfile_to_str(exp_cfile) - exp_traceback = "None" - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse2(self) -> None: - """ - Parse a traceback file with both files from Docker and local files. - """ - # Use references to this file so that we are independent of the file - # layout. - # pylint: disable=line-too-long - txt = """ - Traceback (most recent call last): - File "./helpers/test/test_htraceback.py", line 146, in - _main(_parse()) - File "./helpers/test/test_htraceback.py", line 105, in _main - configs = cdtfut.get_configs_from_command_line(args) - File "/app/amp/./helpers/test/test_htraceback.py", line 228, in get_configs_from_command_line - "config_builder": args.config_builder, - """ - purify_from_client = True - exp_cfile = """ - helpers/test/test_htraceback.py:146::_main(_parse()) - helpers/test/test_htraceback.py:105:_main:configs = cdtfut.get_configs_from_command_line(args) - helpers/test/test_htraceback.py:228:get_configs_from_command_line:"config_builder": args.config_builder, - """ - exp_traceback = """ - Traceback (most recent call last): - File "./helpers/test/test_htraceback.py", line 146, in - _main(_parse()) - File "./helpers/test/test_htraceback.py", line 105, in _main - configs = cdtfut.get_configs_from_command_line(args) - File "$GIT_ROOT/./helpers/test/test_htraceback.py", line 228, in get_configs_from_command_line - "config_builder": args.config_builder, - """ - # pylint: enable=line-too-long - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse3(self) -> None: - """ - Parse a traceback file with both files from Docker and local files. - """ - # Use references to this file so that we are independent from the file - # layout. - # pylint: disable=line-too-long - txt = """ - collected 6 items - - helpers/test/test_lib_tasks.py::Test_pytest_failed1::test_classes1 (0.02 s) FAILED [ 16%] - - =================================== FAILURES =================================== - ______________________ Test_pytest_failed1.test_classes1 _______________________ - Traceback (most recent call last): - File "/app/amp/helpers/test/test_lib_tasks.py", line 1460, in test_classes1 - self._helper(file_name, target_type, expected) - File "/app/amp/helpers/test/test_lib_tasks.py", line 1440, in _helper - actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, - File "/venv/lib/python3.8/site-packages/invoke/tasks.py", line 127, in __call__ - result = self.body(*args, **kwargs) - File "/app/amp/helpers/lib_tasks.py", line 2140, in pytest_failed - hdbg.dassert(m, "Invalid test='%s'", test) - File "/app/amp/helpers/dbg.py", line 129, in dassert - _dfatal(txt, msg, *args) - File "/app/amp/helpers/dbg.py", line 117, in _dfatal - dfatal(dfatal_txt) - File "/app/amp/helpers/dbg.py", line 63, in dfatal - raise assertion_type(ret) - AssertionError: - * Failed assertion * - cond=None - Invalid test='dev_scripts/testing/test/test_run_tests.py' - """ - # pylint: enable=line-too-long - purify_from_client = False - exp_cfile = """ - $GIT_ROOT/helpers/test/test_lib_tasks.py:1460:test_classes1:self._helper(file_name, target_type, expected) - $GIT_ROOT/helpers/test/test_lib_tasks.py:1440:_helper:actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, - /venv/lib/python3.8/site-packages/invoke/tasks.py:127:__call__:result = self.body(*args, **kwargs) - $GIT_ROOT/helpers/lib_tasks.py:2140:pytest_failed:hdbg.dassert(m, "Invalid test='%s'", test) - $GIT_ROOT/helpers/dbg.py:129:dassert:_dfatal(txt, msg, *args) - $GIT_ROOT/helpers/dbg.py:117:_dfatal:dfatal(dfatal_txt) - $GIT_ROOT/helpers/dbg.py:63:dfatal:raise assertion_type(ret)""" - exp_traceback = r""" - Traceback (most recent call last): - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 1460, in test_classes1 - self._helper(file_name, target_type, expected) - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 1440, in _helper - actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, - File "/venv/lib/python3.8/site-packages/invoke/tasks.py", line 127, in __call__ - result = self.body(*args, **kwargs) - File "$GIT_ROOT/helpers/lib_tasks.py", line 2140, in pytest_failed - hdbg.dassert(m, "Invalid test='%s'", test) - File "$GIT_ROOT/helpers/dbg.py", line 129, in dassert - _dfatal(txt, msg, *args) - File "$GIT_ROOT/helpers/dbg.py", line 117, in _dfatal - dfatal(dfatal_txt) - File "$GIT_ROOT/helpers/dbg.py", line 63, in dfatal - raise assertion_type(ret) - AssertionError: - * Failed assertion * - cond=None - Invalid test='dev_scripts/testing/test/test_run_tests.py' - """ - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse4(self) -> None: - """ - Parse a traceback file with both files from Docker and local files. - """ - # pylint: disable=line-too-long - txt = """ - =================================== FAILURES =================================== - ____________ TestEgSingleInstrumentDataReader2.test_true_real_time1 ____________ - Traceback (most recent call last): - File "/app/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 182, in test_true_real_time1 - self._execute_node(node) - File "/app/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 238, in _execute_node - dict_ = node.fit() - File "/app/amp/core/dataflow/nodes/sources.py", line 385, in fit - self.df = self._get_data_until_current_time() - File "/app/amp/core/dataflow/nodes/sources.py", line 429, in _get_data_until_current_time - df = self._get_data() - File "/app/amp/core/dataflow/nodes/sources.py", line 574, in _get_data - hdbg.dassert_lte(df.index.max(), current_time) - File "/app/amp/helpers/dbg.py", line 172, in dassert_lte - cond = val1 <= val2 - TypeError: '<=' not supported between instances of 'float' and 'Timestamp' - ============================= slowest 3 durations ============================== - """ - purify_from_client = False - exp_cfile = r""" - $GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:182:test_true_real_time1:self._execute_node(node) - $GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:238:_execute_node:dict_ = node.fit() - $GIT_ROOT/core/dataflow/nodes/sources.py:385:fit:self.df = self._get_data_until_current_time() - $GIT_ROOT/core/dataflow/nodes/sources.py:429:_get_data_until_current_time:df = self._get_data() - $GIT_ROOT/core/dataflow/nodes/sources.py:574:_get_data:hdbg.dassert_lte(df.index.max(), current_time) - $GIT_ROOT/helpers/dbg.py:172:dassert_lte:cond = val1 <= val2/TypeError: '<=' not supported between instances of 'float' and 'Timestamp'""" - exp_traceback = r""" - Traceback (most recent call last): - File "$GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 182, in test_true_real_time1 - self._execute_node(node) - File "$GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 238, in _execute_node - dict_ = node.fit() - File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 385, in fit - self.df = self._get_data_until_current_time() - File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 429, in _get_data_until_current_time - df = self._get_data() - File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 574, in _get_data - hdbg.dassert_lte(df.index.max(), current_time) - File "$GIT_ROOT/helpers/dbg.py", line 172, in dassert_lte - cond = val1 <= val2 - TypeError: '<=' not supported between instances of 'float' and 'Timestamp'""" - # pylint: enable=line-too-long - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse5(self) -> None: - """ - Parse a traceback file with both files from Docker and local files. - """ - # pylint: disable=line-too-long - txt = """ - Traceback (most recent call last): - File "/app/dataflow_lm/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 - configs = dtfmoexuti.get_configs_from_command_line(args) - File "/app/amp/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - configs = cconfig.get_configs_from_builder(config_builder) - File "/app/amp/config_root/config/builder.py", line 46, in get_configs_from_builder - imp = importlib.import_module(import_) - File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - return _bootstrap._gcd_import(name[level:], package, level) - File "", line 1014, in _gcd_import - File "", line 991, in _find_and_load - File "", line 973, in _find_and_load_unlocked - ModuleNotFoundError: No module named 'dataflow_lm.pipelines.E8.8Ed_configs' - """ - purify_from_client = False - exp_cfile = """ - $GIT_ROOT/dataflow_lm/pipelines/E8/test/test_E8d_configs.py:37:test1:configs = dtfmoexuti.get_configs_from_command_line(args) - $GIT_ROOT/dataflow/model/experiment_utils.py:195:get_configs_from_command_line:configs = cconfig.get_configs_from_builder(config_builder) - $GIT_ROOT/config_root/config/builder.py:46:get_configs_from_builder:imp = importlib.import_module(import_) - /usr/lib/python3.8/importlib/__init__.py:127:import_module:return _bootstrap._gcd_import(name[level:], package, level) - :1014:_gcd_import: - :991:_find_and_load: - :973:_find_and_load_unlocked: - """ - exp_traceback = """ - Traceback (most recent call last): - File "$GIT_ROOT/dataflow_lm/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 - configs = dtfmoexuti.get_configs_from_command_line(args) - File "$GIT_ROOT/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - configs = cconfig.get_configs_from_builder(config_builder) - File "$GIT_ROOT/config_root/config/builder.py", line 46, in get_configs_from_builder - imp = importlib.import_module(import_) - File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - return _bootstrap._gcd_import(name[level:], package, level) - File "", line 1014, in _gcd_import - File "", line 991, in _find_and_load - File "", line 973, in _find_and_load_unlocked - ModuleNotFoundError: No module named 'dataflow_lm.pipelines.E8.8Ed_configs' - """ - # pylint: enable=line-too-long - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py deleted file mode 100644 index a6e1e2ef6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py +++ /dev/null @@ -1,954 +0,0 @@ -""" -Import as: - -import helpers.test.test_unit_test as ttutes -""" - -import logging -import tempfile -from typing import Optional, Tuple - -import pandas as pd -import pytest - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur - -_LOG = logging.getLogger(__name__) - - -def _git_add(file_name: str) -> None: - # pylint: disable=unreachable - cmd = f"git add -u {file_name}" - _LOG.debug("> %s", cmd) - rc = hsystem.system(cmd, abort_on_error=False) - if rc: - _LOG.warning( - "Can't run '%s': you need to add the file manually", - cmd, - ) - - -def _to_skip_on_update_outcomes() -> bool: - """ - Determine whether to skip on `--update_outcomes`. - - Some tests can't pass with `--update_outcomes`, since they exercise - the logic in `--update_outcomes` itself. - - We can't always use `@pytest.mark.skipif(hunitest.get_update_tests)` - since pytest decides which tests need to be run before the variable - is actually set. - """ - to_skip = False - if hunitest.get_update_tests(): - _LOG.warning( - "Skip this test since it exercises the logic for --update_outcomes" - ) - to_skip = True - return to_skip - - -# ############################################################################# -# TestTestCase1 -# ############################################################################# - - -class TestTestCase1(hunitest.TestCase): - """ - Test free-standing functions in unit_test.py. - """ - - def test_get_input_dir1(self) -> None: - """ - Test hunitest.get_input_dir(). - """ - actual = self.get_input_dir() - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_input_dir1/input" - self.assertEqual(actual, expected) - - def test_get_input_dir2(self) -> None: - use_only_test_class = False - test_class_name = "test_class" - test_method_name = "test_method" - actual = self.get_input_dir( - use_only_test_class=use_only_test_class, - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # - expected = "$GIT_ROOT/helpers/test/outcomes/test_class.test_method/input" - self.assertEqual(actual, expected) - - def test_get_input_dir3(self) -> None: - use_only_test_class = False - test_class_name = None - test_method_name = None - actual = self.get_input_dir( - use_only_test_class=use_only_test_class, - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # - expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_input_dir3/input" - self.assertEqual(actual, expected) - - def test_get_input_dir4(self) -> None: - use_only_test_class = True - test_class_name = None - test_method_name = None - actual = self.get_input_dir( - use_only_test_class=use_only_test_class, - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # - expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1/input" - self.assertEqual(actual, expected) - - def test_get_output_dir1(self) -> None: - """ - Test hunitest.get_output_dir(). - """ - actual = self.get_output_dir() - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_output_dir1/output" - self.assertEqual(actual, expected) - - def test_get_scratch_space1(self) -> None: - """ - Test hunitest.get_scratch_space(). - """ - actual = self.get_scratch_space() - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = ( - "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_scratch_space1" - "/tmp.scratch" - ) - self.assertEqual(actual, expected) - - def test_get_scratch_space2(self) -> None: - test_class_name = "test_class" - test_method_name = "test_method" - actual = self.get_scratch_space( - test_class_name=test_class_name, test_method_name=test_method_name - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = ( - "$GIT_ROOT/helpers/test/outcomes/test_class.test_method/tmp.scratch" - ) - self.assertEqual(actual, expected) - - def test_get_scratch_space3(self) -> None: - test_class_name = "test_class" - test_method_name = "test_method" - use_absolute_path = False - actual = self.get_scratch_space( - test_class_name=test_class_name, - test_method_name=test_method_name, - use_absolute_path=use_absolute_path, - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = "outcomes/test_class.test_method/tmp.scratch" - self.assertEqual(actual, expected) - - def test_get_s3_scratch_dir1(self) -> None: - actual = self.get_s3_scratch_dir() - _LOG.debug("actual=%s", actual) - # It is difficult to test, so we just execute. - - def test_get_s3_scratch_dir2(self) -> None: - test_class_name = "test_class" - test_method_name = "test_method" - actual = self.get_s3_scratch_dir( - test_class_name=test_class_name, test_method_name=test_method_name - ) - _LOG.debug("actual=%s", actual) - # It is difficult to test, so we just execute. - - def test_assert_equal1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_assert_not_equal1(self) -> None: - actual = "hello world" - expected = "hello world w" - tmp_dir = tempfile.mkdtemp() - with self.assertRaises(RuntimeError): - self.assert_equal(actual, expected, dst_dir=tmp_dir) - - def test_assert_not_equal2(self) -> None: - actual = "hello world" - expected = "hello world w" - # Create a dir like `/var/tmp/tmph_kun9xq`. - tmp_dir = tempfile.mkdtemp() - self.assert_equal( - actual, expected, abort_on_error=False, dst_dir=tmp_dir - ) - # Compute the signature from the dir. - actual = hunitest.get_dir_signature( - tmp_dir, include_file_content=True, num_lines=None - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - actual = actual.replace(tmp_dir, "$TMP_DIR") - # pylint: disable=line-too-long - expected = """ - # Dir structure - $TMP_DIR - $TMP_DIR/tmp_diff.sh - # File signatures - len(file_names)=1 - file_names=$TMP_DIR/tmp_diff.sh - # $TMP_DIR/tmp_diff.sh - num_lines=8 - ''' - #!/bin/bash - if [[ $1 == "wrap" ]]; then - cmd='vimdiff -c "windo set wrap"' - else - cmd='vimdiff' - fi; - cmd="$cmd helpers/test/outcomes/TestTestCase1.test_assert_not_equal2/tmp.final.actual.txt helpers/test/outcomes/TestTestCase1.test_assert_not_equal2/tmp.final.expected.txt" - eval $cmd - - ''' - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert_equal_fuzzy_match1(self) -> None: - actual = "hello world" - expected = "hello world " - is_equal = self.assert_equal(actual, expected, fuzzy_match=True) - self.assertTrue(is_equal) - - def test_assert_equal5(self) -> None: - actual = "hello world" - expected = "hello world2" - with self.assertRaises(RuntimeError): - self.assert_equal(actual, expected, fuzzy_match=True) - - def _remove_lines1(self) -> None: - txt = r""" - # ##################################################################### - * Failed assertion * - 'in1' not in '{'in1': 'out1'}' - ## - `in1` already receiving input from node n1 - # ##################################################################### - # ##################################################################### - """ - actual = hunitest._remove_spaces(txt) - expected = r""" - * Failed assertion * - 'in1' not in '{'in1': 'out1'}' - ## - `in1` already receiving input from node n1 - # ##################################################################### - """ - self.assert_equal(actual, expected, fuzzy_match=False) - - -# ############################################################################# -# Test_AssertEqual1 -# ############################################################################# - - -class Test_AssertEqual1(hunitest.TestCase): - def test_equal1(self) -> None: - """ - Matching actual and expected without fuzzy matching. - """ - actual = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - expected = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - test_name = self._get_test_name() - test_dir = self.get_scratch_space() - is_equal = hunitest.assert_equal(actual, expected, test_name, test_dir) - _LOG.debug(hprint.to_str("is_equal")) - self.assertTrue(is_equal) - - def test_equal2(self) -> None: - """ - Matching actual and expected with fuzzy matching. - """ - actual = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - expected = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - test_name = self._get_test_name() - test_dir = self.get_scratch_space() - fuzzy_match = True - is_equal = hunitest.assert_equal( - actual, expected, test_name, test_dir, fuzzy_match=fuzzy_match - ) - _LOG.debug(hprint.to_str("is_equal")) - self.assertTrue(is_equal) - - def test_not_equal1(self) -> None: - """ - Mismatching actual and expected. - """ - actual = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - expected = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - test_name = self._get_test_name() - test_dir = self.get_scratch_space() - fuzzy_match = False - with self.assertRaises(RuntimeError) as cm: - hunitest.assert_equal( - actual, expected, test_name, test_dir, fuzzy_match=fuzzy_match - ) - # Check that the assertion is what expected. - actual = str(cm.exception) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = ''' --------------------------------------------------------------------------------- -ACTUAL vs EXPECTED: Test_AssertEqual1.test_not_equal1 --------------------------------------------------------------------------------- - - ( -completed failure Lint Run_linter | completed failure Lint Run_linter -completed success Lint Fast_tests ( -completed success Lint Slow_tests ( -Diff with: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -ACTUAL VARIABLE: Test_AssertEqual1.test_not_equal1 --------------------------------------------------------------------------------- -expected = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -"""''' - if actual != expected: - hio.to_file("actual.txt", actual) - hio.to_file("expected.txt", expected) - self.assert_equal(actual, expected, fuzzy_match=False) - # We don't use self.assert_equal() since this is exactly we are testing, - # so we use a trusted function. - self.assertEqual(actual, expected) - - # For debugging: don't commit code with this test enabled. - @pytest.mark.skip( - reason="This is only used to debug the debugging the infrastructure" - ) - def test_not_equal_debug(self) -> None: - """ - Create a mismatch on purpose to see how the suggested updated to - expected variable looks like. - """ - actual = r"""empty -start - -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests - -end - -""" - expected = "hello" - self.assert_equal(actual, expected, fuzzy_match=False) - - -# ############################################################################# -# TestCheckString1 -# ############################################################################# - - -class TestCheckString1(hunitest.TestCase): - def test_check_string1(self) -> None: - """ - Compare the actual value to a matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - golden_outcome = "hello world" - # - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - # Overwrite the golden file, so that --update_golden doesn't matter. - hio.to_file(file_name, golden_outcome) - try: - # Check. - outcome_updated, file_exists, is_equal = self.check_string(actual) - # Actual match the golden outcome and it wasn't updated. - finally: - # Clean up. - hio.to_file(file_name, golden_outcome) - _git_add(file_name) - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertTrue(is_equal) - - def test_check_string_not_equal1(self) -> None: - """ - Compare the actual value to a mismatching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - golden_outcome = "hello world2" - # - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - # Modify the golden. - hio.to_file(file_name, golden_outcome) - try: - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False - ) - finally: - # Clean up. - hio.to_file(file_name, golden_outcome) - _git_add(file_name) - # Actual doesn't match the golden outcome. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - - def test_check_string_not_equal2(self) -> None: - """ - Compare the actual value to a mismatching golden outcome and udpate it. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - golden_outcome = "hello world2" - # Force updating the golden outcomes. - self.mock_update_tests() - # - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - # Modify the golden. - hio.to_file(file_name, golden_outcome) - try: - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False - ) - new_golden = hio.from_file(file_name) - _git_add(file_name) - finally: - # Clean up. - hio.to_file(file_name, golden_outcome) - _git_add(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertTrue(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - # The golden outcome was updated. - self.assertEqual(new_golden, "hello world") - - def test_check_string_not_equal3(self) -> None: - """ - Like test_check_string_not_equal1() but raising the exception. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - golden_outcome = "hello world2" - # - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - # Modify the golden. - hio.to_file(file_name, golden_outcome) - try: - # Check. - with self.assertRaises(RuntimeError): - self.check_string(actual) - finally: - # Clean up. - hio.to_file(file_name, golden_outcome) - _git_add(file_name) - - def test_check_string_missing1(self) -> None: - """ - When running with --update_outcomes, the golden outcome was missing and - so it was added. - - This tests the code path when action_on_missing_golden="update". - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - # Force updating the golden outcomes. - self.mock_update_tests() - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False - ) - hdbg.dassert_file_exists(file_name) - new_golden = hio.from_file(file_name) - finally: - # Clean up. - hio.delete_file(file_name) - _git_add(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertTrue(outcome_updated) - self.assertFalse(file_exists) - self.assertFalse(is_equal) - # - self.assertEqual(new_golden, "hello world") - - def test_check_string_missing2(self) -> None: - """ - Without running with --update_outcomes, the golden outcome was missing, - action_on_missing_golden="assert", and the unit test framework - asserted. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False, action_on_missing_golden="assert" - ) - hdbg.dassert_file_exists(file_name + ".tmp") - new_golden = hio.from_file(file_name + ".tmp") - finally: - # Clean up. - hio.delete_file(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertFalse(outcome_updated) - self.assertFalse(file_exists) - self.assertFalse(is_equal) - # - self.assertEqual(new_golden, "hello world") - - def test_check_string_missing3(self) -> None: - """ - Without running with --update_outcomes, the golden outcome was missing, - action_on_missing_golden="update", and the unit test framework updates - the golden. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False, action_on_missing_golden="update" - ) - hdbg.dassert_file_exists(file_name) - new_golden = hio.from_file(file_name) - finally: - # Clean up. - hio.delete_file(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertTrue(outcome_updated) - self.assertFalse(file_exists) - self.assertFalse(is_equal) - # - self.assertEqual(new_golden, "hello world") - - -# ############################################################################# -# TestCheckDataFrame1 -# ############################################################################# - - -class TestCheckDataFrame1(hunitest.TestCase): - """ - Some of these tests can't pass with `--update_outcomes`, since they - exercise the logic in `--update_outcomes` itself. - """ - - def _check_df_helper( - self, actual: pd.DataFrame, abort_on_error: bool, err_threshold: float - ) -> Tuple[bool, bool, Optional[bool]]: - golden_outcomes = pd.DataFrame( - [[0, 1, 2], [3, 4, 5]], columns="a b c".split() - ) - # - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - # Overwrite the golden file, so that --update_golden doesn't matter. - hio.create_enclosing_dir(file_name, incremental=True) - golden_outcomes.to_csv(file_name) - try: - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, - abort_on_error=abort_on_error, - err_threshold=err_threshold, - ) - finally: - # Clean up. - golden_outcomes.to_csv(file_name) - _git_add(file_name) - return outcome_updated, file_exists, is_equal - - def test_check_df_equal1(self) -> None: - """ - Compare the actual value of a df to a matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = True - err_threshold = 0.0001 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome matches the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertTrue(is_equal) - - def test_check_df_equal2(self) -> None: - """ - Compare the actual value of a df to a matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1.01, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = True - err_threshold = 0.05 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome matches the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertTrue(is_equal) - - def test_check_df_equal3(self) -> None: - """ - Compare the actual value of a df to a matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1.05, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = True - err_threshold = 0.05 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome matches the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertTrue(is_equal) - - def test_check_df_not_equal1(self) -> None: - """ - Compare the actual value of a df to a non-matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1.06, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = False - err_threshold = 0.05 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome doesn't match the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - exp_error_msg = """ - actual= - a b c - 0 0 1.06 2 - 1 3 4.00 5 - expected= - a b c - 0 0 1 2 - 1 3 4 5 - actual_masked= - [[ nan 1.06 nan] - [ nan nan nan]] - expected_masked= - [[nan 1. nan] - [nan nan nan]] - err= - [[ nan 0.06 nan] - [ nan nan nan]] - max_err=0.060 - """ - self.assert_equal(self._error_msg, exp_error_msg, fuzzy_match=True) - - def test_check_df_not_equal2(self) -> None: - """ - Compare the actual value of a df to a not matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a d c".split()) - abort_on_error = False - err_threshold = 0.05 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome doesn't match the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - - def test_check_df_not_equal3(self) -> None: - """ - Compare the actual value to a mismatching golden outcome and update it. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - golden_outcome = pd.DataFrame( - [[0, 2, 2], [3, 4, 5]], columns="a b c".split() - ) - # Force updating the golden outcomes. - self.mock_update_tests() - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - # Modify the golden. - hio.create_enclosing_dir(file_name, incremental=True) - golden_outcome.to_csv(file_name) - try: - # Check. - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, abort_on_error=False - ) - # - new_golden = pd.read_csv(file_name, index_col=0) - finally: - # Clean up. - hio.to_file(file_name, str(golden_outcome)) - _git_add(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertTrue(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - # Check golden. - self.assert_equal(str(new_golden), str(actual)) - - def test_check_df_not_equal4(self) -> None: - """ - Like `test_check_df_not_equal1()` but raising the exception. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1.06, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = True - err_threshold = 0.05 - with self.assertRaises(RuntimeError): - self._check_df_helper(actual, abort_on_error, err_threshold) - - def test_check_df_missing1(self) -> None: - """ - When running with --update_outcomes, the golden outcome was missing and - so it was added. - - This tests the code path when action_on_missing_golden="update". - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - # Force updating the golden outcomes. - self.mock_update_tests() - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - _LOG.debug(hprint.to_str("file_name")) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, abort_on_error=False - ) - hdbg.dassert_file_exists(file_name) - new_golden = pd.read_csv(file_name, index_col=0) - finally: - # Clean up. - hio.delete_file(file_name) - _git_add(file_name) - # Expected outcome doesn't exists and it was updated. - self.assertTrue(outcome_updated) - self.assertFalse(file_exists) - self.assertFalse(is_equal) - # Check golden. - self.assert_equal(str(new_golden), str(actual)) - - def test_check_df_missing2(self) -> None: - """ - Without running with --update_outcomes, the golden outcome was missing, - action_on_missing_golden="assert", and the unit test framework - asserted. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, abort_on_error=False, action_on_missing_golden="assert" - ) - hdbg.dassert_file_exists(file_name + ".tmp") - new_golden = pd.read_csv(file_name + ".tmp", index_col=0) - hdbg.dassert_path_not_exists(file_name) - finally: - # Clean up. - hio.delete_file(file_name) - # Expected outcome doesn't exists and it was not updated. - self.assertFalse(outcome_updated) - self.assertFalse(file_exists) - self.assertIs(is_equal, None) - # Check golden. - self.assert_equal(str(new_golden), str(actual)) - - def test_check_df_missing3(self) -> None: - """ - Without running with --update_outcomes, the golden outcome was missing, - action_on_missing_golden="update", and the unit test framework updates - the golden. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, abort_on_error=False, action_on_missing_golden="update" - ) - hdbg.dassert_file_exists(file_name) - new_golden = pd.read_csv(file_name, index_col=0) - finally: - # Clean up. - hio.delete_file(file_name) - # Expected outcome doesn't exists and it was not updated. - self.assertTrue(outcome_updated) - self.assertFalse(file_exists) - self.assertIs(is_equal, None) - # Check golden. - self.assert_equal(str(new_golden), str(actual)) - - -# ############################################################################# -# Test_check_string_debug1 -# ############################################################################# - - -class Test_check_string_debug1(hunitest.TestCase): - def test1(self) -> None: - actual = "hello" - # action_on_missing_golden = "assert" - action_on_missing_golden = "update" - self.check_string( - actual, action_on_missing_golden=action_on_missing_golden - ) - - def test2(self) -> None: - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - # action_on_missing_golden = "assert" - action_on_missing_golden = "update" - self.check_dataframe( - actual, action_on_missing_golden=action_on_missing_golden - ) - - -# ############################################################################# -# Test_get_dir_signature1 -# ############################################################################# - - -class Test_get_dir_signature1(hunitest.TestCase): - def helper(self, include_file_content: bool) -> str: - in_dir = self.get_input_dir() - actual = hunitest.get_dir_signature( - in_dir, include_file_content, num_lines=None - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - return actual # type: ignore[no-any-return] - - def test1(self) -> None: - """ - Test dir signature excluding the file content. - """ - include_file_content = False - actual = self.helper(include_file_content) - # pylint: disable=line-too-long - expected = r""" - # Dir structure - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0 - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.txt - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/run_notebook.0.log - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1 - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.pkl - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.txt - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/run_notebook.1.log - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - Test dir signature including the file content. - """ - include_file_content = True - actual = self.helper(include_file_content) - # The golden outcome is long and uninteresting so we use check_string. - self.check_string(actual, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py deleted file mode 100644 index 14910d1f5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py +++ /dev/null @@ -1,288 +0,0 @@ -import logging -import unittest.mock as umock -from typing import Any - -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def _check(self: Any, str_to_eval: str, exp_val: str) -> None: - """ - Evaluate `str_to_eval` and compare it to expected value `exp_val`. - """ - # The variable lives 3 levels in the stack trace from here. - act_val = hprint.to_str(str_to_eval, frame_level=3) - _LOG.debug("%s", act_val) - self.assert_equal(act_val, exp_val, purify_text=True) - - -# ############################################################################# -# _Class -# ############################################################################# - - -class _Class: - def __init__(self) -> None: - self.a = 3 - self.b = 14 - - def get_a(self) -> int: - return self.a - - def get_b(self) -> int: - return self.b - - -# ############################################################################# -# _TestCase -# ############################################################################# - - -class _TestCase(hunitest.TestCase): - def check(self, *args, **kwargs) -> None: - _check(self, *args, **kwargs) - - -# ############################################################################# -# Test_Mock1 -# ############################################################################# - - -# References -# - https://docs.python.org/3/library/unittest.mock.html -# - https://realpython.com/python-mock-library/ -# -# - Mocks are used to imitate objects in the code base and need to have the same -# interface of objects they are replacing -# - `Mock` and `MagicMock` objects -# - avoid to create stubs by creating attributes and methods as they are -# accessed -# - accessing the same attribute returns the same mock -# - can be configured to specify return values -# - store details of how they have been used -# - After execution, one can make assertions about how mocks have been used - -# umock.Mockspec -# :param spec: specification for the mock object, e.g., using a class to create -# the proper interface - - -# ############################################################################# -# Test_Mock1 -# ############################################################################# - - -class Test_Mock1(_TestCase): - """ - - A `Mock` creates attributes / methods as you access them - - The return value of a mocked attribute / method is also a `Mock` - """ - - def test_lazy_attributes1(self) -> None: - """ - Assigning a class attribute on a Mock creates a Mock. - """ - obj = umock.Mock() - # obj is a Mock object. - self.check("obj", "obj=") - # Calling an attribute creates a Mock. - self.check("obj.a", "obj.a=") - # Assigning an attribute in the mock creates an attribute. - obj.a = 3 - self.check("obj.a", "obj.a=3") - - def test_lazy_methods1(self) -> None: - """ - Calling a method on a Mock creates a Mock. - """ - # Mock json module `import json`. - json = umock.Mock() - self.check("json", "json=") - # Create a function on the fly that returns a mock. - v = json.dumps() - self.assertTrue(isinstance(v, umock.Mock)) - self.check("json.dumps", "json.dumps=") - # The mocked function and the returned value from a mock function are - # different mocks. - self.check("v", "v=") - self.check("type(v)", "type(v)=") - self.check( - "json.dumps()", "json.dumps()=" - ) - self.assertTrue(isinstance(json.dumps, umock.Mock)) - self.assertNotEqual(id(v), id(json.dumps)) - - def test_assert1(self) -> None: - """ - Check what function was called. - """ - json = umock.Mock() - json.loads("hello") - # Check that the mocked function was called as expected. - json.loads.assert_called() - json.loads.assert_called_once() - json.loads.assert_called_with("hello") - self.assertEqual(json.loads.call_count, 1) - - def test_str1(self) -> None: - mock = umock.Mock() - # Calling `str()` on a mock creates a mock on the fly. - self.check("str(mock)", "str(mock)=\"\"") - # Assign a mocked function returning "hello" to mock.__str__. - mock.__str__ = umock.Mock(return_value="hello") - self.assertEqual(str(mock), "hello") - # One can't assign the return value, like one would do with a MagicMock. - # mock.__str__.return_value = "hello" - - def test_spec1(self) -> None: - # Create a Mock based on the class `_Class`. - mock = umock.Mock(spec=_Class) - # - self.assertTrue(isinstance(mock, _Class)) - mock.get_a = umock.Mock(return_value=3) - self.assertEqual(mock.get_a(), 3) - - -# ############################################################################# -# Test_MagicMock1 -# ############################################################################# - - -class Test_MagicMock1(_TestCase): - """ - A `MagicMock` is a subclass of `Mock` with some magic methods already - created. - """ - - def test_get1(self) -> None: - """ - Assign a MagicMock using array notation. - """ - mock = umock.MagicMock() - # MagicMock automatically infer `__get_item__()`. - mock[3] = "fish" - # Check. - mock.__setitem__.assert_called_with(3, "fish") - - def test_get2(self) -> None: - mock = umock.MagicMock() - mock.__getitem__.return_value = "result" - - def test_str1(self) -> None: - """ - Mock `str()` method. - """ - mock = umock.MagicMock() - # Mock `str()`. - mock.__str__.return_value = "foobar" - # Check. - self.assertEqual(str(mock), "foobar") - mock.__str__.assert_called_with() - - -# ############################################################################# -# Test_Mock_Class1 -# ############################################################################# - - -class Test_Mock_Class1(_TestCase): - def test_without_mock1(self) -> None: - obj = _Class() - self.assertEqual(obj.get_a(), 3) - self.assertEqual(obj.get_b(), 14) - - def test_with_mock1(self) -> None: - obj = _Class() - # Mock method `get_a()`. - obj.get_a = umock.MagicMock(return_value=4) - # Check. - self.assertEqual(obj.get_a(), 4) - obj.get_a.assert_called() - - def test_with_mock2(self) -> None: - obj = _Class() - # Mock method `get_a()`. - obj.get_a = umock.MagicMock(side_effect=KeyError("foo")) - # Check. - with self.assertRaises(KeyError) as cm: - obj.get_a() - # - actual = str(cm.exception) - expected = "'foo'" - self.assert_equal(actual, expected) - obj.get_a.assert_called() - - -# ############################################################################# -# Test_Mock_Class_with_decorator1 -# ############################################################################# - -# `umock.patch()` -# - replaces classes in a particular module with a Mock object -# - by default creates a MagicMock - -# `umock.patch.object(target, attribute)` patches the named member "attribute" -# on the object "target" with a mock object. - - -# ############################################################################# -# Test_Mock_Class_with_decorator1 -# ############################################################################# - - -class Test_Mock_Class_with_decorator1(_TestCase): - @umock.patch.object(_Class, "get_a", return_value=4) - def test1(self, mock_method: umock.MagicMock) -> None: - """ - Patch method of an object using a decorator. - """ - obj = _Class() - # Check. - # self.assertIs(mock_method, umock.MagicMock) - self.check( - "mock_method", "mock_method=" - ) - self.assertEqual(obj.get_a(), 4) - mock_method.assert_called() - obj.get_a.assert_called() - - -# ############################################################################# -# Test_Mock_Class_with_context_manager1 -# ############################################################################# - - -class Test_Mock_Class_with_context_manager1(_TestCase): - def test1(self) -> None: - """ - Patch an object method using a context manager. - """ - # Inside the context manager, the method is mocked. - with umock.patch.object(_Class, "get_a", return_value=4): - obj = _Class() - # Check. - self.check( - "obj.get_a", "obj.get_a=" - ) - self.assertEqual(obj.get_a(), 4) - obj.get_a.assert_called() - # Outside the context manager everything is normal. - obj = _Class() - # Check. - self.check( - "obj.get_a", - "obj.get_a=>", - ) - self.assertEqual(obj.get_a(), 3) - - def test_dict1(self) -> None: - """ - Patch a dictionary. - """ - foo = {"key": "value"} - with umock.patch.dict(foo, {"key": "new_value"}, clear=True): - self.assertEqual(foo["key"], "new_value") - # Outside the context manager everything is normal. - self.assertEqual(foo["key"], "value") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py deleted file mode 100644 index 6488621a1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py +++ /dev/null @@ -1,1065 +0,0 @@ -""" -Import as: - -import helpers.test.test_hunit_test_purification as thuntepur -""" - -import datetime -import logging -import os -import unittest.mock as umock -from typing import Any, List - -import pytest - -import helpers.hgit as hgit -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_purify_text1 -# ############################################################################# - - -class Test_purify_text1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str, **kwargs: Any) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(txt) - self.assert_equal(actual, expected, **kwargs) - - def test1(self) -> None: - txt = "amp/helpers/test/test_system_interaction.py" - expected = "helpers/test/test_system_interaction.py" - self.check_helper(txt, expected) - - def test2(self) -> None: - txt = "amp/helpers/test/test_system_interaction.py" - expected = "helpers/test/test_system_interaction.py" - self.check_helper(txt, expected) - - def test3(self) -> None: - txt = "['amp/helpers/test/test_system_interaction.py']" - expected = "['helpers/test/test_system_interaction.py']" - self.check_helper(txt, expected) - - def test4(self) -> None: - txt = "app.helpers.test.test_system_interaction.py" - expected = "helpers.test.test_system_interaction.py" - self.check_helper(txt, expected) - - def test5(self) -> None: - """ - Test that longer paths are processed before shorter ones. - """ - txt = "/home/user/project/src/file.py" - with ( - umock.patch("helpers.hgit.get_client_root") as mock_git_root, - umock.patch("os.getcwd") as mock_pwd, - ): - mock_git_root.return_value = "/home/user/project" - mock_pwd.return_value = "/home/user" - expected = "$GIT_ROOT/src/file.py" - self.check_helper(txt, expected) - - def test6(self) -> None: - """ - Test that paths with multiple occurrences of the same pattern are - processed correctly. - """ - txt = "/home/user/project/src/project/file.py" - with ( - umock.patch("helpers.hgit.get_client_root") as mock_git_root, - umock.patch("os.getcwd") as mock_pwd, - ): - mock_git_root.return_value = "/home/user/project" - mock_pwd.return_value = "/home/user" - expected = "$GIT_ROOT/src/project/file.py" - self.check_helper(txt, expected) - - def test7(self) -> None: - """ - Test that paths with multiple patterns are processed in the correct - order. - """ - txt = "/home/user/project/src/project/file.py" - with ( - umock.patch("helpers.hgit.get_client_root") as mock_git_root, - umock.patch("os.getcwd") as mock_pwd, - ): - mock_git_root.return_value = "/home/user/project" - mock_pwd.return_value = "/home/user/project/src" - expected = "$GIT_ROOT/src/project/file.py" - self.check_helper(txt, expected) - - def test8(self) -> None: - """ - Test that paths with no matching patterns are left unchanged. - """ - txt = "/home/user/other/file.py" - with ( - umock.patch("helpers.hgit.get_client_root") as mock_git_root, - umock.patch("os.getcwd") as mock_pwd, - ): - mock_git_root.return_value = "/home/user/project" - mock_pwd.return_value = "/home/user/project/src" - expected = "/home/user/other/file.py" - self.check_helper(txt, expected) - - def test9(self) -> None: - super_module_path = hgit.get_client_root(super_module=True) - # TODO(gp): We should remove the current path. - # pylint: disable=line-too-long - txt = r""" - ************* Module input [pylint] - $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint] - $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8] - $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8] - cmd line='$SUPER_MODULE/dev_scripts/linter.py -f $SUPER_MODULE/amp/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log' - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint] - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint] - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy] - """ - txt = hprint.dedent(txt) - txt = txt.replace("$SUPER_MODULE", super_module_path) - expected = r""" - ************* Module input [pylint] - $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint] - $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8] - $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8] - cmd line='$GIT_ROOT/dev_scripts/linter.py -f $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log' - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint] - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint] - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy] - """ - # pylint: enable=line-too-long - self.check_helper(txt, expected, dedent=True) - - def test10(self) -> None: - """ - Test case when client root path is equal to `/` - """ - # pylint: disable=redefined-outer-name - hgit = umock.Mock() - hgit.get_client_root.return_value = "/" - txt = "/tmp/subdir1" - expected = txt - self.check_helper(txt, expected) - - def test11(self) -> None: - """ - Test the correct order of `app` -> `amp` purification with multiple - import statements. - """ - txt = """ - import app.amp.helpers_root.helpers.test.test_file - from app.amp.helpers_root.helpers.hprint import dedent - import app.amp.helpers.config - from amp.app.helpers.config import get_config - import amp.app.helpers_root.config - """ - expected = """ - import helpers.test.test_file - from helpers.hprint import dedent - import helpers.config - from helpers.config import get_config - import helpers.config - """ - self.check_helper(txt, expected) - - def test12(self) -> None: - """ - Test amp and app purification in file path strings. - """ - txt = """ - app/amp/helpers_root/helpers/test/test_file.py - amp/app/helpers_root/helpers/test/test_file.py - """ - expected = """ - helpers/test/test_file.py - helpers/test/test_file.py - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_directory_paths1 -# ############################################################################# - - -class Test_purify_directory_paths1(hunitest.TestCase): - def check_helper(self, input_: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_directory_paths(input_) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Test the replacement of `GIT_ROOT`. - """ - with ( - umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ), - umock.patch.dict( - "os.environ", - {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, - clear=True, - ), - umock.patch("os.getcwd", return_value="/home/user"), - ): - input_ = "/home/user/gitroot/src/subdir/file.py" - expected = "$GIT_ROOT/src/subdir/file.py" - self.check_helper(input_, expected) - - def test2(self) -> None: - """ - Test the replacement of `CSFY_HOST_GIT_ROOT_PATH`. - """ - with ( - umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ), - umock.patch.dict( - "os.environ", - {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, - clear=True, - ), - umock.patch("os.getcwd", return_value="/home/user"), - ): - input_ = "/home/user/csfy_host_git_root/other/file.py" - expected = "$CSFY_HOST_GIT_ROOT_PATH/other/file.py" - self.check_helper(input_, expected) - - def test3(self) -> None: - """ - Test the replacement of `PWD`. - """ - with ( - umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ), - umock.patch.dict( - "os.environ", - {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, - clear=True, - ), - umock.patch("os.getcwd", return_value="/home/user"), - ): - input_ = "/home/user/documents/file.py" - expected = "$PWD/documents/file.py" - self.check_helper(input_, expected) - - def test4(self) -> None: - """ - Test the replacement when `GIT_ROOT`, `CSFY_HOST_GIT_ROOT_PATH` and - current working directory are the same. - """ - with ( - umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user" - ), - umock.patch.dict( - "os.environ", - {"CSFY_HOST_GIT_ROOT_PATH": "/home/user"}, - clear=True, - ), - umock.patch("os.getcwd", return_value="/home/user"), - ): - input_ = "/home/user/file.py" - expected = "$GIT_ROOT/file.py" - self.check_helper(input_, expected) - - -# ############################################################################# -# Test_purify_from_environment1 -# ############################################################################# - - -class Test_purify_from_environment1(hunitest.TestCase): - def check_helper(self, input_: str, expected: str) -> None: - try: - # Manually set a user name to test the behaviour. - hsystem.set_user_name("root") - # Run. - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_from_environment(input_) - self.assert_equal(actual, expected, fuzzy_match=True) - finally: - # Reset the global user name variable regardless of a test results. - hsystem.set_user_name(None) - - def test1(self) -> None: - input_ = "IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-root-1.0.0" - expected = "IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0" - self.check_helper(input_, expected) - - def test2(self) -> None: - input_ = "--name root.amp_test.app.app" - expected = "--name $USER_NAME.amp_test.app.app" - self.check_helper(input_, expected) - - def test3(self) -> None: - input_ = "run --rm -l user=root" - expected = "run --rm -l user=$USER_NAME" - self.check_helper(input_, expected) - - def test4(self) -> None: - input_ = "run_docker_as_root='True'" - expected = "run_docker_as_root='True'" - self.check_helper(input_, expected) - - def test5(self) -> None: - input_ = "out_col_groups: [('root_q_mv',), ('root_q_mv_adj',), ('root_q_mv_os',)]" - expected = "out_col_groups: [('root_q_mv',), ('root_q_mv_adj',), ('root_q_mv_os',)]" - self.check_helper(input_, expected) - - -# ############################################################################# -# Test_purify_amp_reference1 -# ############################################################################# - - -class Test_purify_amp_reference1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - txt = hprint.dedent(txt) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_amp_references(txt) - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Remove the reference to `amp.`. - """ - txt = """ - * Failed assertion * - Instance '' - of class '_Man' is not a subclass of '' - """ - expected = r""" - * Failed assertion * - Instance '' - of class '_Man' is not a subclass of '' - """ - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test removing multiple amp references in a single string. - """ - txt = """ - ImportError: No module named 'amp.helpers.test.test_file' - """ - expected = r""" - ImportError: No module named 'helpers.test.test_file' - """ - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test removing amp references in file paths. - """ - txt = """ - File "/home/user/amp/helpers/test/test_dbg.py", line 10 - File "/home/user/amp/helpers/test/test_file.py", line 20 - """ - expected = r""" - File "/home/user/helpers/test/test_dbg.py", line 10 - File "/home/user/helpers/test/test_file.py", line 20 - """ - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test removing amp references in import statements. - """ - txt = """ - from amp.helpers.test import test_dbg - import amp.helpers.test.test_file - from amp.helpers.test.test_dbg import _Man - """ - expected = r""" - from helpers.test import test_dbg - import helpers.test.test_file - from helpers.test.test_dbg import _Man - """ - self.check_helper(txt, expected) - - def test5(self) -> None: - """ - Test removing amp references in docstrings and comments. - """ - txt = """ - # This is a test for amp.helpers.test.test_dbg - """ - expected = r""" - # This is a test for helpers.test.test_dbg - """ - self.check_helper(txt, expected) - - def test6(self) -> None: - """ - Test removing amp references in error messages with multiple - occurrences. - """ - txt = """ - Error in amp.helpers.test.test_dbg: Invalid input - Error in amp.helpers.test.test_file: File not found - Error in amp.helpers.test.test_dbg: Permission denied - """ - expected = r""" - Error in helpers.test.test_dbg: Invalid input - Error in helpers.test.test_file: File not found - Error in helpers.test.test_dbg: Permission denied - """ - self.check_helper(txt, expected) - - def test7(self) -> None: - """ - Test that longer amp paths are processed before shorter ones. - """ - txt = "amp/helpers/amp/test/test_file.py" - expected = "helpers/test/test_file.py" - self.check_helper(txt, expected) - - def test8(self) -> None: - """ - Test that nested amp references are processed correctly. - """ - txt = "amp.helpers.test.amp.TestClass" - expected = "helpers.test.amp.TestClass" - self.check_helper(txt, expected) - - def test9(self) -> None: - """ - Test removing amp references from test creation comments with various - module paths. - """ - txt = """ - # Test created for amp.helpers.test.test_file - # Test created for amp.core.dataflow.model - # Test created for amp.helpers.test.test_dbg._Man - """ - expected = r""" - # Test created for helpers.test.test_file - # Test created for core.dataflow.model - # Test created for helpers.test.test_dbg._Man - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_app_references1 -# ############################################################################# - - -class Test_purify_app_references1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_app_references(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test app.helpers reference removal. - """ - txt = "app.helpers.test.test_file" - expected = "helpers.test.test_file" - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test app.amp.helpers reference removal. - """ - txt = "app.amp.helpers.test.test_file" - expected = "amp.helpers.test.test_file" - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test app.amp.helpers_root.helpers reference removal. - """ - txt = "app.amp.helpers_root.helpers.test.test_file" - expected = "amp.helpers.test.test_file" - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test multiple app references in the same string. - """ - txt = """ - app.helpers.test.test_file - app.amp.helpers.test.test_file - app.amp.helpers_root.helpers.test.test_file - """ - expected = """ - helpers.test.test_file - amp.helpers.test.test_file - amp.helpers.test.test_file - """ - self.check_helper(txt, expected) - - def test5(self) -> None: - """ - Test that longer app paths are processed before shorter ones. - """ - txt = "app/helpers/app/test/test_file.py" - expected = "helpers/test/test_file.py" - self.check_helper(txt, expected) - - def test6(self) -> None: - """ - Test that app.amp.helpers_root references are processed before app.amp. - """ - txt = "app.amp.helpers_root.helpers.test.TestClass" - expected = "amp.helpers.test.TestClass" - self.check_helper(txt, expected) - - def test7(self) -> None: - """ - Test string with no app references. - """ - txt = "path/to/file.txt" - expected = "path/to/file.txt" - self.check_helper(txt, expected) - - def test8(self) -> None: - """ - Test removing app references from test creation comments with various - module paths. - """ - txt = """ - # Test created for app.helpers.test.test_file - # Test created for app.core.dataflow.model - # Test created for app.helpers.test.test_dbg._Man - """ - expected = r""" - # Test created for helpers.test.test_file - # Test created for core.dataflow.model - # Test created for helpers.test.test_dbg._Man - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_from_env_vars -# ############################################################################# - - -# TODO(ShaopengZ): numerical issue. (arm vs x86) -@pytest.mark.requires_ck_infra -class Test_purify_from_env_vars(hunitest.TestCase): - """ - Test purification from env vars. - """ - - def check_helper(self, env_var: str) -> None: - env_var_value = os.environ[env_var] - input_ = f"s3://{env_var_value}/" - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_from_env_vars(input_) - expected = f"s3://${env_var}/" - self.assert_equal(actual, expected, fuzzy_match=True) - - @pytest.mark.skipif( - not hrecouti.get_repo_config().get_name() == "//cmamp", - reason="Run only in //cmamp", - ) - def test1(self) -> None: - """ - - $CSFY_AWS_S3_BUCKET - """ - env_var = "CSFY_AWS_S3_BUCKET" - self.check_helper(env_var) - - -# TODO(gp): HelpersTask1 -# @pytest.mark.skipif( -# not hrecouti.get_repo_config().get_name() == "//cmamp", -# reason="Run only in //cmamp", -# ) -# def test_end_to_end(self) -> None: -# """ -# - Multiple env vars. -# """ -# #am_aws_s3_bucket = os.environ["AM_AWS_S3_BUCKET"] -# csfy_aws_s3_bucket = os.environ["CSFY_AWS_S3_BUCKET"] -# # -# text = f""" -# $AM_AWS_S3_BUCKET = {am_aws_s3_bucket} -# $CSFY_AWS_S3_BUCKET = {csfy_aws_s3_bucket} -# """ -# # -# text_purifier = huntepur.TextPurifier() -# actual = text_purifier.purify_from_env_vars(text) -# self.check_string(actual, fuzzy_match=True) - - -# ############################################################################# -# Test_purify_object_representation1 -# ############################################################################# - - -class Test_purify_object_representation1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - txt = hprint.dedent(txt) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_object_representation(txt) - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - txt = """ - load_prices: {'source_node_name': 'RealTimeDataSource object - at 0x7f571c329b50 - """ - expected = r""" - load_prices: {'source_node_name': 'RealTimeDataSource object - at 0x""" - self.check_helper(txt, expected) - - def test2(self) -> None: - txt = """ - load_prices: {'source_node_name at 0x7f571c329b51': - 'RealTimeDataSource object at 0x7f571c329b50 - """ - expected = r""" - load_prices: {'source_node_name at 0x': - 'RealTimeDataSource object at 0x""" - self.check_helper(txt, expected) - - def test3(self) -> None: - txt = """ - load_prices: {'source_node_name': 'RealTimeDataSource', - 'source_node_kwargs': {'market_data': - , 'period': 'last_5mins', 'asset_id_col': 'asset_id', - 'multiindex_output': True}} process_forecasts: {'prediction_col': 'close', - 'execution_mode': 'real_time', 'process_forecasts_config': - {'market_data': - ,'portfolio ': , 'order_type': 'price@twap', 'ath_start_time': - datetime.time(9, 30), 'trading_start_time': datetime.time(9, 30), - 'ath_end_time': datetime.time(16, 40), 'trading_end_time': - datetime.time(16, 4 0)}} - """ - expected = r""" - load_prices: {'source_node_name': 'RealTimeDataSource', - 'source_node_kwargs': {'market_data': - , 'period': 'last_5mins', 'asset_id_col': 'asset_id', - 'multiindex_output': True}} process_forecasts: {'prediction_col': 'close', - 'execution_mode': 'real_time', 'process_forecasts_config': - {'market_data': - ,'portfolio ': , 'order_type': 'price@twap', 'ath_start_time': - datetime.time(9, 30), 'trading_start_time': datetime.time(9, 30), - 'ath_end_time': datetime.time(16, 40), 'trading_end_time': - datetime.time(16, 4 0)}}""" - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test replacing wall_clock_time=Timestamp('..., tz='America/New_York')) - """ - txt = """ - _knowledge_datetime_col_name='timestamp_db' _delay_in_secs='0' - >, 'bar_duration_in_secs': 300, 'rt_timeout_in_secs_or_time': 900} , - _dst_dir=None , _fit_at_beginning=False , - _wake_up_timestamp=None , _bar_duration_in_secs=300 , - _events=[Event(num_it=1, current_time=Timestamp('2000-01-01 - 10:05:00-0500', tz='America/New_York'), - wall_clock_time=Timestamp('2022-08-04 09:29:13.441715-0400', - tz='America/New_York')), Event(num_it=2, - current_time=Timestamp('2000-01-01 10:10:00-0500', - tz='America/New_York'), wall_clock_time=Timestamp('2022-08-04 - 09:29:13.892793-0400', tz='America/New_York')), Event(num_it=3, - current_time=Timestamp('2000-01-01 10:15:00-0500', - tz='America/New_York'), wall_clock_time=Timestamp('2022-08-04 - 09:29:14.131619-0400', tz='America/New_York'))] ) - """ - expected = """ - _knowledge_datetime_col_name='timestamp_db' _delay_in_secs='0' - >, 'bar_duration_in_secs': 300, 'rt_timeout_in_secs_or_time': 900} , - _dst_dir=None , _fit_at_beginning=False , - _wake_up_timestamp=None , _bar_duration_in_secs=300 , - _events=[Event(num_it=1, current_time=Timestamp('2000-01-01 - 10:05:00-0500', tz='America/New_York'), - wall_clock_time=Timestamp('xxx', tz='America/New_York')), - Event(num_it=2, current_time=Timestamp('2000-01-01 10:10:00-0500', - tz='America/New_York'), wall_clock_time=Timestamp('xxx', - tz='America/New_York')), Event(num_it=3, - current_time=Timestamp('2000-01-01 10:15:00-0500', - tz='America/New_York'), wall_clock_time=Timestamp('xxx', - tz='America/New_York'))] ) - """ - txt = " ".join(hprint.dedent(txt).split("\n")) - expected = " ".join(hprint.dedent(expected).split("\n")) - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_today_date1 -# ############################################################################# - - -class Test_purify_today_date1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_today_date(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test replacing today's date and time with placeholders. - """ - today = datetime.date.today() - today_str = today.strftime("%Y%m%d") - txt = f""" - Report generated on {today_str}_103045. - Next run scheduled at {today_str}_235959. - """ - expected = """ - Report generated on YYYYMMDD_HHMMSS. - Next run scheduled at YYYYMMDD_HHMMSS. - """ - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test replacing today's date only with placeholder. - """ - today = datetime.date.today() - today_str = today.strftime("%Y%m%d") - txt = f""" - Backup completed: {today_str}. - Last modified: {today_str}. - """ - expected = """ - Backup completed: YYYYMMDD. - Last modified: YYYYMMDD. - """ - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test to check that non-date-like numbers are not replaced. - """ - txt = """ - ID: 20000319_123456 - Code: 20000321 - Reference: 20000320_999999 - """ - expected = """ - ID: 20000319_123456 - Code: 20000321 - Reference: 20000320_999999 - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_white_spaces1 -# ############################################################################# - - -class Test_purify_white_spaces1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_white_spaces(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test removing trailing spaces and tabs. - """ - txt = "Line 1 \nLine 2\t\nLine 3 \t \n" - expected = "Line 1\nLine 2\nLine 3\n" - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test removing trailing spaces and preserving empty lines. - """ - txt = "Line 1\n\n\nLine 2\n\n\n\nLine 3 " - expected = "Line 1\n\n\nLine 2\n\n\n\nLine 3" - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test removing trailing whitespace and preserving leading whitespace. - """ - txt = " \n Line 1\nLine 2\n Line 3 \n " - expected = " \n Line 1\nLine 2\n Line 3\n" - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test preserving intentional whitespace within lines. - """ - txt = "Line 1 with spaces\nLine 2\twith\ttabs" - expected = "Line 1 with spaces\nLine 2\twith\ttabs\n" - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_parquet_file_names1 -# ############################################################################# - - -class Test_purify_parquet_file_names1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_parquet_file_names(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test purification of Parquet file names with the path. - - The Parquet file names with the - GUID have to be replaced with the `data.parquet` string. - """ - txt = """ - s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=1/ea5e3faed73941a2901a2128abeac4ca-0.parquet - s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=2/f7a39fefb69b40e0987cec39569df8ed-0.parquet - """ - expected = """ - s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=1/data.parquet - s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=2/data.parquet - """ - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test purification of Parquet file name without the path. - """ - txt = """ - ffa39fffb69b40e0987cec39569df8ed-0.parquet - """ - expected = """ - data.parquet - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_helpers1 -# ############################################################################# - - -class Test_purify_helpers1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_helpers(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test replacing helpers references in import statements. - """ - txt = """ - import helpers_root.helpers.hdbg as hdbg - from helpers_root.helpers.hprint import dedent - import helpers_root.config_root.config as config - """ - expected = """ - import helpers.hdbg as hdbg - from helpers.hprint import dedent - import config_root.config as config - """ - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test replacing helpers references in file paths. - """ - txt = """ - /path/to/helpers/hdbg.py - /path/to/helpers/hprint.py - /path/to/config_root/config.py - """ - expected = """ - /path/to/helpers/hdbg.py - /path/to/helpers/hprint.py - /path/to/config_root/config.py - """ - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test replacing helpers references in docstrings and comments. - """ - txt = """ - import helpers_root.helpers.hdbg - from /path/to/helpers_root/helpers/hprint import dedent - import helpers_root.config_root.config - from /path/to/helpers_root/config_root/config import settings - """ - expected = """ - import helpers.hdbg - from /path/to/helpers/hprint import dedent - import config_root.config - from /path/to/config_root/config import settings - """ - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test that non-matching patterns are not replaced. - """ - txt = """ - import other_module - from other_package import helpers - import helpers_utils - path/to/other/helpers/file.py - """ - expected = """ - import other_module - from other_package import helpers - import helpers_utils - path/to/other/helpers/file.py - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_docker_image_name1 -# ############################################################################# - - -class Test_purify_docker_image_name1(hunitest.TestCase): - def test1(self) -> None: - txt = r""" - docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.edb567be pdflatex -output-directory - """ - expected = r""" - docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.xxxxxxxx pdflatex -output-directory - """ - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_docker_image_name(txt) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - Test patterns like `tmp.latex.aarch64.2f590c86.2f590c86`. - """ - txt = r""" - docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.2f590c86.2f590c86 pdflatex -output-directory - """ - expected = r""" - docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.xxxxxxxx pdflatex -output-directory - """ - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_docker_image_name(txt) - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_purify_line_number1 -# ############################################################################# - - -class Test_purify_line_number1(hunitest.TestCase): - def test1(self) -> None: - """ - Check that the text is purified from line numbers correctly. - """ - txt = """ - dag_config (marked_as_used=False, writer=None, val_type=config_root.config.config_.Config): - in_col_groups (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::286::apply_history_lookback, val_type=list): [('close',), ('volume',)] - out_col_group (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::286::apply_history_lookback, val_type=tuple): () - """ - expected = r""" - dag_config (marked_as_used=False, writer=None, val_type=config_root.config.config_.Config): - in_col_groups (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::$LINE_NUMBER::apply_history_lookback, val_type=list): [('close',), ('volume',)] - out_col_group (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::$LINE_NUMBER::apply_history_lookback, val_type=tuple): () - """ - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_line_number(txt) - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_purify_file_names1 -# ############################################################################# - - -class Test_purify_file_names1(hunitest.TestCase): - def check_helper(self, file_names: List[str], expected: List[str]) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(file_names) - actual = "\n".join(str(path) for path in actual) - expected = "\n".join(str(path) for path in expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test basic file name purification with relative paths. - """ - with umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ): - txt = [ - "/home/user/gitroot/helpers/test/test_file.py", - "/home/user/gitroot/amp/helpers/test/test_dbg.py", - ] - expected = [ - "helpers/test/test_file.py", - "helpers/test/test_dbg.py", - ] - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test file name purification with nested amp references. - """ - with umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ): - txt = [ - "/home/user/gitroot/amp/helpers/amp/test/test_file.py", - "/home/user/gitroot/amp/helpers/test/amp/test_dbg.py", - ] - expected = [ - "helpers/test/test_file.py", - "helpers/test/test_dbg.py", - ] - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test file name purification with app references to ensure that they are - not replaced. - """ - with umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ): - txt = [ - "/home/user/gitroot/app/helpers/test/test_file.py", - "/home/user/gitroot/app/amp/helpers/test/test_dbg.py", - ] - expected = [ - "app/helpers/test/test_file.py", - "app/helpers/test/test_dbg.py", - ] - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test file name purification with empty list. - """ - with umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ): - txt = [] - expected = [] - self.check_helper(txt, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py deleted file mode 100644 index f3873fdfc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py +++ /dev/null @@ -1,347 +0,0 @@ -import os - -import helpers.hio as hio -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.hunit_test_utils as hunteuti - - -# ############################################################################# -# TestUnitTestRenamer -# ############################################################################# - - -class TestUnitTestRenamer(hunitest.TestCase): - """ - Test class renaming functionality. - """ - - -# ############################################################################# -# TestCases -# ############################################################################# - - - @staticmethod - def helper() -> str: - """ - Create file content. - """ - content = """ -class TestCases(hunitest.TestCase): - def test_assert_equal1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - self.check_string(actual) - """ - return content - - -# ############################################################################# -# TestNewCase -# ############################################################################# - - - def test_rename_class1(self) -> None: - """ - Test renaming of existing class. - """ - content = self.helper() - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer("TestCases", "TestNewCase", root_dir) - actual, _ = renamer._rename_class(content) - expected = """ -class TestNewCase(hunitest.TestCase): - def test_assert_equal1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - self.check_string(actual) - """ - self.assert_equal(actual, expected) - - def test_rename_class2(self) -> None: - """ - Test renaming of non existing class. - """ - content = self.helper() - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer("TestCase", "TestNewCase", root_dir) - actual, _ = renamer._rename_class(content) - # Check if the content of the file was not changed. - self.assert_equal(actual, content) - - -# ############################################################################# -# TestPytestRenameMethod -# ############################################################################# - - -class TestPytestRenameMethod(hunitest.TestCase): - """ - Test method renaming functionality. - """ - - -# ############################################################################# -# TestCases -# ############################################################################# - - - @staticmethod - def helper() -> str: - """ - Create file content. - """ - content = """ -class TestCases(hunitest.TestCase): - def test1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test10(self) -> None: - actual = "hello world" - self.check_string(actual) - - -# ############################################################################# -# TestOtherCases -# ############################################################################# - - -class TestOtherCases(hunitest.TestCase): - def test1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test10(self) -> None: - actual = "hello world" - self.check_string(actual) - """ - return content - - -# ############################################################################# -# TestCases -# ############################################################################# - - - def test_rename_method1(self) -> None: - """ - Test renaming of existing method. - """ - content = self.helper() - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer( - "TestCases.test1", "TestCases.test_new", root_dir - ) - actual, _ = renamer._rename_method(content) - expected = """ -class TestCases(hunitest.TestCase): - def test_new(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test10(self) -> None: - actual = "hello world" - self.check_string(actual) - - -# ############################################################################# -# TestOtherCases -# ############################################################################# - - -class TestOtherCases(hunitest.TestCase): - def test1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test10(self) -> None: - actual = "hello world" - self.check_string(actual) - """ - self.assert_equal(actual, expected) - - def test_rename_method2(self) -> None: - """ - Test renaming of non existing method. - """ - content = self.helper() - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer( - "TestOtherCases.test5", "TestOtherCases.test6", root_dir - ) - actual, _ = renamer._rename_method(content) - # Check if the content of the file was not changed. - self.assert_equal(actual, content) - - def test_rename_method3(self) -> None: - """ - Test renaming of invalid method names. - """ - self.helper() - root_dir = os.getcwd() - with self.assertRaises(AssertionError): - hunteuti.UnitTestRenamer( - "TestCases.test10", "TestOtherCases.test6", root_dir - ) - - -# ############################################################################# -# TestPytestRenameOutcomes -# ############################################################################# - - -class TestPytestRenameOutcomes(hunitest.TestCase): - """ - Test golden outcomes directory renaming. - """ - - @staticmethod - def helper(toy_test: str) -> None: - """ - Create the temporary outcome to rename. - - :param toy_test: the name of the toy directory - """ - outcomes_paths = [ - "TestCase.test_check_string1", - "TestCase.test_rename", - "TestCase.test_rename3", - "TestCases.test_rename2", - "TestRename.test_rename1", - ] - for path in outcomes_paths: - outcomes_dir = os.path.join(toy_test, "test/outcomes", path) - hio.create_dir(outcomes_dir, incremental=False) - hio.to_file(f"{outcomes_dir}/test.txt", "Test files.") - cmd = f"git add {toy_test}/" - hsystem.system(cmd, abort_on_error=False, suppress_output=False) - - def _clean_up(self, toy_test: str) -> None: - """ - Remove temporary test directory. - - :param toy_test: the name of the toy directory - """ - cmd = f"git reset {toy_test}/ && rm -rf {toy_test}/" - hsystem.system(cmd, abort_on_error=False, suppress_output=False) - - def test_rename_class_outcomes(self) -> None: - """ - Rename outcome directory. - """ - toy_test = "toyCmTask1279." + self._testMethodName - # Create outcomes directory. - test_path = os.path.join(toy_test, "test") - # Create the toy outcomes. - self.helper(toy_test) - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer( - "TestCase", "TestRenamedCase", root_dir - ) - renamer.rename_outcomes( - test_path, - ) - # Check if the dirs were renamed. - outcomes_path = os.path.join(test_path, "outcomes") - outcomes_dirs = os.listdir(outcomes_path) - actual = sorted( - [ - ent - for ent in outcomes_dirs - if os.path.isdir(os.path.join(outcomes_path, ent)) - ] - ) - expected = [ - "TestCases.test_rename2", - "TestRename.test_rename1", - "TestRenamedCase.test_check_string1", - "TestRenamedCase.test_rename", - "TestRenamedCase.test_rename3", - ] - self.assertEqual(actual, expected) - self._clean_up(toy_test) - - def test_rename_method_outcomes(self) -> None: - """ - Rename outcome directory. - """ - toy_test = "toyCmTask1279." + self._testMethodName - # Create outcomes directory. - test_path = os.path.join(toy_test, "test") - # Create the toy outcomes. - self.helper(toy_test) - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer( - "TestCase.test_rename", - "TestCase.test_method_renamed", - root_dir, - ) - renamer.rename_outcomes( - test_path, - ) - # Check if the dirs were renamed. - outcomes_path = os.path.join(test_path, "outcomes") - outcomes_dirs = os.listdir(outcomes_path) - actual = sorted( - [ - ent - for ent in outcomes_dirs - if os.path.isdir(os.path.join(outcomes_path, ent)) - ] - ) - expected = [ - "TestCase.test_check_string1", - "TestCase.test_method_renamed", - "TestCase.test_rename3", - "TestCases.test_rename2", - "TestRename.test_rename1", - ] - self.assertEqual(actual, expected) - self._clean_up(toy_test) - - -# ############################################################################# -# Test_get_test_file_for_source -# ############################################################################# - - -class Test_get_test_file_for_source(hunitest.TestCase): - """ - Test mapping source files to test files. - """ - - def test1(self) -> None: - """ - Source file with existing test file returns the test path. - """ - actual = hunteuti.get_test_file_for_source("helpers/hdbg.py") - expected = "helpers/test/test_hdbg.py" - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - Source file without test file returns None. - """ - actual = hunteuti.get_test_file_for_source("tasks.py") - self.assertIsNone(actual) - - def test3(self) -> None: - """ - Test file as input returns None. - """ - actual = hunteuti.get_test_file_for_source("helpers/test/test_hdbg.py") - self.assertIsNone(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py deleted file mode 100644 index 79aa3ab80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py +++ /dev/null @@ -1,74 +0,0 @@ -import logging - -import helpers.hunit_test as hunitest -import helpers.hversion as hversio - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestVersioning1 -# ############################################################################# - - -class TestVersioning1(hunitest.TestCase): - def test_get_changelog_version1(self) -> None: - """ - Test `cmamp` version. - """ - container_dir_name = "." - code_version = hversio.get_changelog_version(container_dir_name) - _LOG.debug("code_version=%s", code_version) - - def test_get_container_version1(self) -> None: - container_version = hversio.get_container_version() - _LOG.debug("container_version=%s", container_version) - - def test_check_version1(self) -> None: - container_dir_name = "." - hversio.check_version(container_dir_name) - - def test__check_version1(self) -> None: - code_version = "1.0.0" - container_version = "1.0.2" - is_ok = hversio._check_version(code_version, container_version) - self.assertFalse(is_ok) - - def test__check_version2(self) -> None: - code_version = "1.0.0" - container_version = "1.0.0" - is_ok = hversio._check_version(code_version, container_version) - self.assertTrue(is_ok) - - def test__check_version3(self) -> None: - code_version = "1.0.0" - container_version = "amp-1.0.0" - is_ok = hversio._check_version(code_version, container_version) - self.assertTrue(is_ok) - - def test_bump_version1(self) -> None: - """ - Test major version bump. - """ - version = "2.2.0" - result = hversio.bump_version(version, bump_type="major") - expected = "3.0.0" - self.assertEqual(result, expected) - - def test_bump_version2(self) -> None: - """ - Test minor version bump. - """ - version = "2.2.0" - result = hversio.bump_version(version, bump_type="minor") - expected = "2.3.0" - self.assertEqual(result, expected) - - def test_bump_version3(self) -> None: - """ - Test patch version bump. - """ - version = "2.2.0" - result = hversio.bump_version(version, bump_type="patch") - expected = "2.2.1" - self.assertEqual(result, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py deleted file mode 100644 index 987b30476..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py +++ /dev/null @@ -1,569 +0,0 @@ -import logging -import os -import time -from typing import Any, List, Optional, Union - -import pytest - -import helpers.hjoblib as hjoblib -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# ############################################################################# - - -def workload_function( - val1: int, - val2: str, - # - **kwargs: Any, -) -> str: - """ - Execute the test workload. - """ - _LOG.info("Starting workload %s", val1) - incremental = kwargs.pop("incremental") - num_attempts = kwargs.pop("num_attempts") - _ = val1, val2, incremental, num_attempts - res: str = hprint.to_str("val1 val2 incremental num_attempts kwargs") - _LOG.debug("res=%s", res) - sleep = 0.01 - # sleep = 2 - time.sleep(sleep) - _LOG.info("Ending workload %s", val1) - if val1 == -1: - raise ValueError(f"Error: {res}") - return res - - -# ############################################################################# -# Test_parallel_execute1 -# ############################################################################# - - -def get_workload1( - randomize: bool, *, seed: Optional[int] = None -) -> hjoblib.Workload: - """ - Return a workload for `workload_function()` with 5 tasks that succeeds. - """ - tasks = [] - for i in range(5): - # val1, val2 - task = ((i, 2 * i), {f"hello{i}": f"world{2 * i}", "good": "bye"}) - tasks.append(task) - workload: hjoblib.Workload = (workload_function, "workload_function", tasks) - if randomize: - # Randomize workload. - workload = hjoblib.randomize_workload(workload, seed=seed) - return workload - - -# ############################################################################# - - -def _outcome_to_string(outcome: List[str]) -> str: - outcome = "\n".join(sorted(map(str, outcome))) - return outcome - - -def _helper_success( - self_: Any, - workload: hjoblib.Workload, - num_threads: Union[str, int], - abort_on_error: bool, - expected_return: str, - backend: str, -) -> None: - """ - Run a workload that is supposed to succeed and check its result. - """ - dry_run = False - incremental = True - num_attempts = 1 - log_file = os.path.join(self_.get_scratch_space(), "log.txt") - # - res = hjoblib.parallel_execute( - workload, - dry_run, - num_threads, - incremental, - abort_on_error, - num_attempts, - log_file, - backend=backend, - ) - # Check. - _LOG.debug("res=%s", str(res)) - actual = _outcome_to_string(res) - self_.assert_equal(actual, expected_return) - - -# ############################################################################# -# Test_parallel_execute1 -# ############################################################################# - - -class Test_parallel_execute1(hunitest.TestCase): - """ - Execute a workload of 5 tasks that all succeed. - """ - - # pylint: disable=line-too-long - EXPECTED_RETURN = r"""val1=0, val2=0, incremental=True, num_attempts=1, kwargs={'hello0': 'world0', 'good': 'bye'} -val1=1, val2=2, incremental=True, num_attempts=1, kwargs={'hello1': 'world2', 'good': 'bye'} -val1=2, val2=4, incremental=True, num_attempts=1, kwargs={'hello2': 'world4', 'good': 'bye'} -val1=3, val2=6, incremental=True, num_attempts=1, kwargs={'hello3': 'world6', 'good': 'bye'} -val1=4, val2=8, incremental=True, num_attempts=1, kwargs={'hello4': 'world8', 'good': 'bye'}""" - - def test_dry_run1(self) -> None: - """ - Dry-run a workload. - """ - workload = get_workload1(randomize=True) - dry_run = True - num_threads = "serial" - incremental = True - num_attempts = 1 - abort_on_error = True - log_file = os.path.join(self.get_scratch_space(), "log.txt") - res = hjoblib.parallel_execute( - workload, - dry_run, - num_threads, - incremental, - abort_on_error, - num_attempts, - log_file, - ) - _LOG.debug("res=%s", str(res)) - self.assertIs(res, None) - - def _run_test(self, num_threads: Union[str, int], backend: str) -> None: - workload = get_workload1(randomize=True) - abort_on_error = True - # - expected_return = self.EXPECTED_RETURN - _helper_success( - self, - workload, - num_threads, - abort_on_error, - expected_return, - backend, - ) - - # pylint: enable=line-too-long - - def test_serial1(self) -> None: - num_threads = "serial" - backend = "" - self._run_test(num_threads, backend) - - def test_parallel_loky1(self) -> None: - num_threads = "1" - backend = "loky" - self._run_test(num_threads, backend) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~6 seconds, see CmTask4951.") - def test_parallel_loky2(self) -> None: - num_threads = "3" - backend = "loky" - self._run_test(num_threads, backend) - - def test_parallel_asyncio_threading1(self) -> None: - num_threads = "1" - backend = "asyncio_threading" - self._run_test(num_threads, backend) - - def test_parallel_asyncio_threading2(self) -> None: - num_threads = "3" - backend = "asyncio_threading" - self._run_test(num_threads, backend) - - -# ############################################################################# -# Test_parallel_execute2 -# ############################################################################# - - -def get_workload2() -> hjoblib.Workload: - """ - Return a workload for `workload_function()` with 1 task that fails. - """ - task = ((-1, 7), {"hello2": "world2", "good2": "bye2"}) - tasks = [task] - workload: hjoblib.Workload = (workload_function, "workload_function", tasks) - return workload - - -def _helper_fail( - self_: Any, - workload: hjoblib.Workload, - num_threads: Union[str, int], - abort_on_error: bool, - expected_assertion: str, - backend: str, -) -> None: - dry_run = False - incremental = True - num_attempts = 1 - log_file = os.path.join(self_.get_scratch_space(), "log.txt") - # - with self_.assertRaises(ValueError) as cm: - res = hjoblib.parallel_execute( - workload, - dry_run, - num_threads, - incremental, - abort_on_error, - num_attempts, - log_file, - backend=backend, - ) - # Print result if it succeeds. - _LOG.debug("res=%s", str(res)) - # Check. - actual = str(cm.exception) - self_.assert_equal(actual, expected_assertion) - - -# # To observe the output in real-time. -# if __name__ == "__main__": -# hdbg.init_logger(verbosity=logging.INFO) -# workload = get_workload1(randomize=True) -# # num_threads = "serial" -# num_threads = "1" -# # num_threads = "5" -# # backend = "loky" -# backend = "asyncio_threading" -# # backend = "asyncio_multiprocessing" -# abort_on_error = True -# # -# dry_run = False -# incremental = True -# num_attempts = 1 -# log_file = "./log.txt" -# # -# _LOG.info("\n" + hprint.frame("Start workload")) -# with htimer.TimedScope(logging.INFO, "Execute workload"): -# res = hjoblib.parallel_execute( -# workload, -# dry_run, -# num_threads, -# incremental, -# abort_on_error, -# num_attempts, -# log_file, -# backend=backend, -# ) -# _LOG.info("\n" + hprint.frame("Results")) -# import pprint -# -# print(pprint.pformat(res)) - - -# ############################################################################# -# Test_parallel_execute2 -# ############################################################################# - - -class Test_parallel_execute2(hunitest.TestCase): - """ - Execute a workload of 1 task that fails. - """ - - # pylint: disable=line-too-long - EXPECTED_STRING = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'}""" - - # pylint: enable=line-too-long - - def _run_test( - self, - abort_on_error: bool, - num_threads: Union[str, int], - backend: str, - should_succeed: bool, - ) -> None: - workload = get_workload2() - # - expected_return = self.EXPECTED_STRING - if should_succeed: - _helper_success( - self, - workload, - num_threads, - abort_on_error, - expected_return, - backend, - ) - else: - _helper_fail( - self, - workload, - num_threads, - abort_on_error, - expected_return, - backend, - ) - - def test_serial1(self) -> None: - num_threads = "serial" - abort_on_error = True - backend = "" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_serial2(self) -> None: - num_threads = "serial" - abort_on_error = False - backend = "" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~7 seconds.") - def test_parallel_loky1(self) -> None: - num_threads = 2 - abort_on_error = True - backend = "loky" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~7 seconds.") - def test_parallel_loky2(self) -> None: - num_threads = 2 - abort_on_error = False - backend = "loky" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading1(self) -> None: - num_threads = 2 - abort_on_error = True - backend = "asyncio_threading" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading2(self) -> None: - num_threads = 2 - abort_on_error = False - backend = "asyncio_threading" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - -# ############################################################################# -# Test_parallel_execute3 -# ############################################################################# - - -def get_workload3( - randomize: bool, seed: Optional[int] = None -) -> hjoblib.Workload: - """ - Return a workload for `workload_function()` with 5 tasks succeeding and one - task failing. - """ - workload: hjoblib.Workload = get_workload1(randomize=True) - # Modify the workflow in place. - (workload_func, func_name, tasks) = workload - _ = workload_func, func_name - task = ((-1, 7), {"hello2": "world2", "good2": "bye2"}) - tasks.append(task) - if randomize: - # Randomize workload. - workload = hjoblib.randomize_workload(workload, seed=seed) - return workload - - -# ############################################################################# -# Test_parallel_execute3 -# ############################################################################# - - -class Test_parallel_execute3(hunitest.TestCase): - """ - Execute a workload with 5 tasks that succeed and 1 task that fails. - """ - - # pylint: disable=line-too-long - EXPECTED_STRING1 = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'}""" - - EXPECTED_STRING2 = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'} -val1=0, val2=0, incremental=True, num_attempts=1, kwargs={'hello0': 'world0', 'good': 'bye'} -val1=1, val2=2, incremental=True, num_attempts=1, kwargs={'hello1': 'world2', 'good': 'bye'} -val1=2, val2=4, incremental=True, num_attempts=1, kwargs={'hello2': 'world4', 'good': 'bye'} -val1=3, val2=6, incremental=True, num_attempts=1, kwargs={'hello3': 'world6', 'good': 'bye'} -val1=4, val2=8, incremental=True, num_attempts=1, kwargs={'hello4': 'world8', 'good': 'bye'}""" - - # pylint: enable=line-too-long - - def _run_test( - self, - abort_on_error: bool, - num_threads: Union[str, int], - backend: str, - should_succeed: bool, - ) -> None: - workload = get_workload3(randomize=False) - # Since there is an error and `abort_on_error=True` we only get information - # about the failed task. - if should_succeed: - expected_return = self.EXPECTED_STRING2 - _helper_success( - self, - workload, - num_threads, - abort_on_error, - expected_return, - backend, - ) - else: - # Since there is an error and `abort_on_error=True` we only get information - # about the failed task. - expected_exception = self.EXPECTED_STRING1 - _helper_fail( - self, - workload, - num_threads, - abort_on_error, - expected_exception, - backend, - ) - - def test_serial1(self) -> None: - num_threads = "serial" - abort_on_error = True - backend = "" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_serial2(self) -> None: - """ - Execute: - - a workload with 5 tasks that succeed and 1 task that fails - - serially - - don't abort because abort_on_error=False - """ - num_threads = "serial" - abort_on_error = False - backend = "" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_loky1(self) -> None: - num_threads = "1" - abort_on_error = True - backend = "loky" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~7 seconds.") - def test_parallel_loky2(self) -> None: - num_threads = "3" - abort_on_error = True - backend = "loky" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_loky3(self) -> None: - num_threads = "1" - abort_on_error = False - backend = "loky" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - @pytest.mark.slow("~5 seconds.") - def test_parallel_loky4(self) -> None: - num_threads = "3" - abort_on_error = False - backend = "loky" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading1(self) -> None: - num_threads = "1" - abort_on_error = True - backend = "asyncio_threading" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading2(self) -> None: - num_threads = "3" - abort_on_error = True - backend = "asyncio_threading" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading3(self) -> None: - num_threads = "1" - abort_on_error = False - backend = "asyncio_threading" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading4(self) -> None: - num_threads = "3" - abort_on_error = False - backend = "asyncio_threading" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - -# ############################################################################# - - -# ############################################################################# -# Test_joblib_example1 -# ############################################################################# - - -@pytest.mark.skip(reason="Just for experimenting with joblib") -class Test_joblib_example1(hunitest.TestCase): - @staticmethod - def func(val: int) -> int: - print(f"val={val}") - if val == -1: - raise ValueError(f"val={val}") - print(f" out={val}") - return val - - def test1(self) -> None: - """ - Show that when a job fails the entire `joblib.Parallel` fails without - returning anything, but just propagating the exception. - """ - # num_threads = 5 - num_threads = 1 - vals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - # vals[1] = -1 - vals[5] = -1 - import joblib - - backend = "loky" - res = joblib.Parallel(n_jobs=num_threads, backend=backend, verbose=200)( - joblib.delayed(Test_joblib_example1.func)(val) for val in vals - ) - print(f"res={str(res)}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py deleted file mode 100644 index 12f04c506..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py +++ /dev/null @@ -1,540 +0,0 @@ -# This should only test helper functions from `lib_tasks.py`. -# `test_tasks.py` associated to `tasks.py` should test specific task targets. - -import logging -import os -import re -import unittest.mock as umock -from typing import Dict, Generator - -import invoke -import pytest - -import helpers.hgit as hgit -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.lib_tasks as hlibtask -import helpers.lib_tasks_gh as hlitagh -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - - -def _get_default_params() -> Dict[str, str]: - """ - Get fake params pointing to a different image so we can test the code - without affecting the official images. - """ - ecr_base_path = os.environ["CSFY_ECR_BASE_PATH"] - default_params = { - "CSFY_ECR_BASE_PATH": ecr_base_path, - "BASE_IMAGE": "amp_test", - "HELPERS_IMAGE_PROD": f"{ecr_base_path}/helpers:prod", - } - return default_params - - -# ############################################################################# -# _LibTasksTestCase -# ############################################################################# - - -class _LibTasksTestCase(hunitest.TestCase): - """ - Test class injecting default parameters in the `lib_tasks` singleton in - `set_up_test()` and cleaning up the singleton in `tear_down_test()`. - """ - - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test(self) -> None: - params = _get_default_params() - hlitauti.set_default_params(params) - - def tear_down_test(self) -> None: - hlitauti.reset_default_params() - - -# ############################################################################# - - -# TODO(gp): Make it public. -def _build_mock_context_returning_ok() -> invoke.MockContext: - """ - Build a MockContext catching any command and returning rc=0. - """ - ctx = invoke.MockContext( - repeat=True, run={re.compile(".*"): invoke.Result(exited=0)} - ) - return ctx - - -# ############################################################################# -# _CheckDryRunTestCase -# ############################################################################# - - -class _CheckDryRunTestCase(hunitest.TestCase): - """ - Test class running an invoke target with/without dry-run and checking that - the issued commands are what is expected. - """ - - def _check_calls(self, ctx: invoke.MockContext) -> None: - """ - `check_string()` the sequence of commands issued in the context. - """ - actual = "\n".join(map(str, ctx.run.mock_calls)) - actual = hprint.remove_non_printable_chars(actual) - self.check_string(actual) - - def _check_output(self, target: str, check: bool = True) -> None: - """ - Dry run target checking that the sequence of commands issued is the - expected one. - """ - ctx = _build_mock_context_returning_ok() - # pylint: disable=exec-used - exec(f"hlibtask.{target}") - # pylint: enable=exec-used - # Check the outcome. - if check: - self._check_calls(ctx) - - -# TODO(gp): We should group the tests by what is tested and not how it's -# tested. E.g. TestDryRunTasks1::test_print_setup and -# TestDryRunTasks2::test_print_setup should go together in a class. - - -# ############################################################################# -# TestDryRunTasks1 -# ############################################################################# - - -class TestDryRunTasks1(hunitest.TestCase): - """ - - Run invoke in dry-run mode from command line - - Compare the output to the golden outcomes - """ - - # TODO(gp): -> TestGitCommands1 - - def dry_run( - self, target: str, dry_run: bool = True, check_string: bool = True - ) -> None: - """ - Invoke the given target with dry run. - - This is used to test the commands that we can't actually - execute. - """ - opts = "--dry" if dry_run else "" - # - # TODO(vitalii): While deploying the container versioning - # we disable the check in the unit tests. Remove `SKIP_VERSION_CHECK=1` - # after CmampTask570 is fixed. - cmd = f"SKIP_VERSION_CHECK=1 invoke {opts} {target} | grep -v INFO | grep -v '>>ENV<<:'" - _, actual = hsystem.system_to_string(cmd) - # - actual = hprint.remove_non_printable_chars(actual) - # docker_ps: sudo=False - regex = r"# \S+:" - actual = hunitest.filter_text(regex, actual) - # - regex = r"(WARN|INFO)\s+hcache.py" - actual = hunitest.filter_text(regex, actual) - # Filter out `no module` warnings. - # TODO(Grisha): add the "no module warning" filtering - # to `purify_text()` in `check_string()`. - regex = "WARN.*No module" - actual = hunitest.filter_text(regex, actual) - if check_string: - self.check_string(actual) - - # ######################################################################### - - # TODO(gp): We can't test this since amp and cmamp have now different base image. - # def test_print_setup(self) -> None: - # target = "print_setup" - # self.dry_run(target) - - # The problem is that we use system and not ctx to execute the command, so that - # --dry-run doesn't work. - @pytest.mark.skip(reason="This is actually run") - def test_git_pull(self) -> None: - target = "git_pull" - self.dry_run(target) - - @pytest.mark.skip(reason="This is actually run") - def test_git_fetch_master(self) -> None: - target = "git_fetch_master" - self.dry_run(target) - - @pytest.mark.skip(reason="This is actually run deleting files") - def test_git_clean(self) -> None: - target = "git_clean" - self.dry_run(target) - - # ######################################################################### - # TODO(gp): -> TestDockerCommands1 - - @pytest.mark.slow("~6 sec.") - @pytest.mark.skipif( - hserver.is_inside_ci(), reason="In CI the output is different" - ) - def test_docker_images_ls_repo(self) -> None: - target = "docker_images_ls_repo" - # TODO(gp): amp and cmamp have different version of aws cli and so the - # output is different. - check_string = False - self.dry_run(target, check_string=check_string) - - @pytest.mark.slow("~6 sec.") - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. Different golden outcomes in helpers and other repos.", - ) - def test_docker_ps(self) -> None: - target = "docker_ps" - self.dry_run(target) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_stats(self) -> None: - target = "docker_stats" - self.dry_run(target) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_kill_last(self) -> None: - target = "docker_kill" - self.dry_run(target) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_kill_all(self) -> None: - target = "docker_kill --all" - self.dry_run(target) - - -# ############################################################################# - - -# ############################################################################# -# TestDryRunTasks2 -# ############################################################################# - - -# Outside CK infra, the class hangs, so we skip it. -@pytest.mark.requires_ck_infra -@pytest.mark.slow(reason="Around 7s") -@pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", -) -class TestDryRunTasks2(_LibTasksTestCase, _CheckDryRunTestCase): - """ - - Call the invoke task directly from Python - - `check_string()` that the sequence of commands issued by the target is the - expected one using mocks to return ok for every system call. - """ - - def test_print_setup(self) -> None: - target = "print_setup(ctx)" - self._check_output(target) - - def test_git_pull(self) -> None: - target = "git_pull(ctx)" - self._check_output(target) - - def test_git_fetch_master(self) -> None: - target = "git_fetch_master(ctx)" - self._check_output(target) - - def test_git_clean(self) -> None: - target = "git_clean(ctx)" - self._check_output(target) - - # TODO(Grisha): is not it the same as `test_git_clean()`? - def test_git_clean2(self) -> None: - target = "git_clean(ctx, dry_run=False)" - self._check_output(target) - - # ######################################################################### - - def test_docker_images_ls_repo(self) -> None: - target = "docker_images_ls_repo(ctx)" - self._check_output(target, check=False) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_kill_all(self) -> None: - target = "docker_kill(ctx, all=True)" - self._check_output(target) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_kill_last(self) -> None: - target = "docker_kill(ctx)" - self._check_output(target) - - def test_docker_ps(self) -> None: - target = "docker_ps(ctx)" - self._check_output(target) - - def test_docker_pull(self) -> None: - target = "docker_pull(ctx)" - self._check_output(target, check=False) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_stats(self) -> None: - target = "docker_stats(ctx)" - self._check_output(target) - - # ######################################################################### - # TODO(gp): -> TestGhCommands1 - - # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_gh_create_pr1(self) -> None: - with ( - umock.patch.object( - hgit, "get_branch_name", return_value="AmpTask1_test_branch" - ), - umock.patch.object( - hlitagh, - "_get_repo_full_name_from_cmd", - return_value=("github.com/alphamatic/amp", "amp"), - ), - ): - target = "gh_create_pr(ctx, title='test')" - self._check_output(target) - - # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_gh_create_pr2(self) -> None: - with ( - umock.patch.object( - hgit, "get_branch_name", return_value="AmpTask1_test_branch" - ), - umock.patch.object( - hlitagh, - "_get_repo_full_name_from_cmd", - return_value=("github.com/alphamatic/amp", "amp"), - ), - ): - target = "gh_create_pr(ctx, body='hello_world', title='test')" - self._check_output(target) - - # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_gh_create_pr3(self) -> None: - with ( - umock.patch.object( - hgit, "get_branch_name", return_value="AmpTask1_test_branch" - ), - umock.patch.object( - hlitagh, - "_get_repo_full_name_from_cmd", - return_value=("github.com/alphamatic/amp", "amp"), - ), - ): - target = "gh_create_pr(ctx, draft=False, title='test')" - self._check_output(target) - - # TODO(*): Remove skip after migration to `csfy`.` - @pytest.mark.skip( - reason="migration to new repo " - "ref: https://github.com/causify-ai/cmamp/issues/13063" - ) - def test_gh_issue_title(self) -> None: - target = "gh_issue_title(ctx, 1)" - self._check_output(target) - - # TODO(Shaopengz): Outside CK infra, the test hangs, so skip. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") - def test_gh_workflow_list(self) -> None: - target = "gh_workflow_list(ctx, filter_by_branch='master')" - self._check_output(target) - - # This is an action with side effects so we can't test it. - # def test_gh_workflow_run(self) -> None: - # target = "gh_workflow_run(ctx)" - # self._check_output(target) - - # ######################################################################### - # TODO(gp): -> TestGitCommands1 - def test_git_branch_files(self) -> None: - # This test needs a reference to Git master branch. - hgit.fetch_origin_master_if_needed() - # - target = "git_branch_files(ctx)" - self._check_output(target) - - @pytest.mark.skip( - reason="HelpersTask638: Skip Failing test to merge the PR in cmamp" - ) - def test_git_branch_create1(self) -> None: - target = ( - "git_branch_create(ctx, branch_name='AmpTask123_test', " - "only_branch_from_master=False)" - ) - self._check_output(target) - - # TODO(*): Remove skip after migration to `csfy`.` - @pytest.mark.skip( - reason="migration to new repo " - "ref: https://github.com/causify-ai/cmamp/issues/13063" - ) - def test_git_branch_create2(self) -> None: - # Difference between `cmamp` and `kaizenflow`. - target = ( - "git_branch_create(ctx, issue_id=1, only_branch_from_master=False)" - ) - self._check_output(target) - - def test_git_branch_create3(self) -> None: - with self.assertRaises(AssertionError): - target = ( - "git_branch_create(ctx, branch_name='test', issue_id=1, " - "only_branch_from_master=False)" - ) - self._check_output(target, check=False) - - # This is an action with side effects so we can't test it. - # def test_git_branch_delete_merged(self) -> None: - # target = "git_branch_delete_merged(ctx)" - # self._check_output(target) - - def test_git_merge_master(self) -> None: - target = "git_merge_master(ctx, abort_if_not_clean=False)" - self._check_output(target) - - # ######################################################################### - # TODO(gp): -> TestLintCommands1 - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_lint1(self) -> None: - target = "lint(ctx, modified=True)" - # The output depends on the client, so don't check it. - self._check_output(target, check=False) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_lint2(self) -> None: - target = "lint(ctx, branch=True)" - # The output depends on the client, so don't check it. - self._check_output(target, check=False) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_lint3(self) -> None: - file = __file__ - target = f"lint(ctx, files='{file}')" - # The output depends on the client, so don't check it. - self._check_output(target, check=False) - - def test_find_test_class1(self) -> None: - class_name = self.__class__.__name__ - target = f"find_test_class(ctx, class_name='{class_name}')" - self._check_output(target) - - # ######################################################################### - - @pytest.mark.skipif( - hserver.is_inside_ci(), reason="In CI the output is different" - ) - def test_docker_login(self) -> None: - """ - Instead of using _build_mock_context_returning_ok(), set the return - values more explicitly. - """ - stdout = "aws-cli/1.19.49 Python/3.7.6 Darwin/19.6.0 botocore/1.20.49\n" - ctx = invoke.MockContext( - run={ - "aws --version": invoke.Result(stdout), - re.compile("^docker login"): invoke.Result(exited=0), - re.compile("^eval"): invoke.Result(exited=0), - } - ) - hlibtask.docker_login(ctx) - # Check the outcome. - # self._check_calls(ctx) - - -# ############################################################################# - -# TODO(gp): Run test coverage with -# > i run_fast_slow_tests \ -# --pytest-opts="helpers/test/test_lib_tasks.py test/test_tasks.py" \ -# --coverage - -# TODO(gp): Add tests for: -# - print_tasks -# - git_files -# - git_last_commit_files -# - check_python_files -# - docker_stats -# - traceback (with checked in file) -# - lint - - -# ############################################################################# - - -# ############################################################################# -# TestFailing -# ############################################################################# - - -class TestFailing(hunitest.TestCase): - """ - Run a test that fails based on CSFY_FORCE_TEST_FAIL environment variable. - """ - - def test_failing(self) -> None: - if os.environ.get("CSFY_FORCE_TEST_FAIL", "") == "1": - self.fail("test failed succesfully") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py deleted file mode 100644 index 80ea28ffb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py +++ /dev/null @@ -1,494 +0,0 @@ -import logging -import os -import re -import unittest.mock as umock -from typing import Dict, Optional - -import pytest - -import helpers.hgit as hgit -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur -import helpers.lib_tasks_docker as hlitadoc -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - - -# pylint: disable=protected-access - - -# ############################################################################# -# Test_generate_compose_file1 -# ############################################################################# - - -class Test_generate_compose_file1(hunitest.TestCase): - def helper( - self, - stage: str, - *, - use_privileged_mode: bool = False, - use_sibling_container: bool = False, - shared_data_dirs: Optional[Dict[str, str]] = None, - mount_as_submodule: bool = False, - use_network_mode_host: bool = True, - use_main_network: bool = False, - ) -> None: - txt = [] - # - params = [ - "stage", - "use_privileged_mode", - "use_sibling_container", - "shared_data_dirs", - "mount_as_submodule", - "use_network_mode_host", - ] - txt_tmp = hprint.to_str(" ".join(params)) - txt.append(txt_tmp) - # - file_name = None - txt_tmp = hlitadoc._generate_docker_compose_file( - stage, - use_privileged_mode, - use_sibling_container, - shared_data_dirs, - mount_as_submodule, - use_network_mode_host, - use_main_network, - file_name, - ) - # Remove all the env variables that are function of the host. - txt_tmp = hunitest.filter_text("CSFY_HOST_", txt_tmp) - txt_tmp = hunitest.filter_text("CSFY_GIT_ROOT_PATH", txt_tmp) - txt_tmp = hunitest.filter_text("CSFY_HELPERS_ROOT_PATH", txt_tmp) - txt_tmp = hunitest.filter_text( - "CSFY_USE_HELPERS_AS_NESTED_MODULE", txt_tmp - ) - txt_tmp = hunitest.filter_text("OPENAI_API_KEY", txt_tmp) - txt.append(txt_tmp) - # - txt = "\n".join(txt) - txt = hunitest.filter_text(r"working_dir", txt) - self.check_string(txt) - - def test1(self) -> None: - self.helper(stage="prod", use_privileged_mode=True) - - def test2(self) -> None: - self.helper( - stage="prod", shared_data_dirs={"/data/shared": "/shared_data"} - ) - - def test3(self) -> None: - self.helper(stage="prod", use_main_network=True) - - # TODO(ShaopengZ): This hangs outside CK infra, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - hgit.is_in_amp_as_submodule(), reason="Only run in amp directly" - ) - def test4(self) -> None: - self.helper(stage="dev") - - # TODO(ShaopengZ): This hangs outside CK infra, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test5(self) -> None: - self.helper(stage="dev") - - -# ############################################################################# -# Test_generate_compose_file2 -# ############################################################################# - - -class Test_generate_compose_file2(hunitest.TestCase): - def helper( - self, - mock_getcwd: str, - mock_find_git_root: str, - mock_find_helpers_root: str, - mock_is_in_helpers_as_supermodule: bool, - *, - stage: str = "prod", - use_privileged_mode: bool = True, - use_sibling_container: bool = False, - shared_data_dirs: Optional[Dict[str, str]] = None, - mount_as_submodule: bool = False, - use_network_mode_host: bool = True, - use_main_network: bool = False, - ) -> None: - txt = [] - # - params = [ - "stage", - "use_privileged_mode", - "use_sibling_container", - "shared_data_dirs", - "mount_as_submodule", - "use_network_mode_host", - ] - txt_tmp = hprint.to_str(" ".join(params)) - txt.append(txt_tmp) - # - file_name = None - with ( - umock.patch.object(os, "getcwd", return_value=mock_getcwd), - umock.patch.object( - hgit, "find_git_root", return_value=mock_find_git_root - ), - umock.patch.object( - hgit, "find_helpers_root", return_value=mock_find_helpers_root - ), - umock.patch.object( - hgit, - "is_in_helpers_as_supermodule", - return_value=mock_is_in_helpers_as_supermodule, - ), - ): - txt_tmp = hlitadoc._generate_docker_compose_file( - stage, - use_privileged_mode, - use_sibling_container, - shared_data_dirs, - mount_as_submodule, - use_network_mode_host, - use_main_network, - file_name, - ) - # Remove all the env variables that are function of the host. - txt_tmp = hunitest.filter_text("CSFY_HOST_", txt_tmp) - txt_tmp = hunitest.filter_text("OPENAI_API_KEY", txt_tmp) - txt.append(txt_tmp) - # - txt = "\n".join(txt) - self.check_string(txt) - - def test1(self) -> None: - """ - Check that file is generated correctly when the repo is `//cmamp`. - """ - self.helper( - mock_getcwd="/data/dummy/src/cmamp1", - mock_find_git_root="/data/dummy/src/cmamp1", - mock_find_helpers_root="/data/dummy/src/cmamp1/helpers_root", - mock_is_in_helpers_as_supermodule=False, - ) - - def test2(self) -> None: - """ - Check that file is generated correctly when the repo is `//helpers`. - """ - self.helper( - mock_getcwd="/data/dummy/src/helpers1", - mock_find_git_root="/data/dummy/src/helpers1", - mock_find_helpers_root="/data/dummy/src/helpers1", - mock_is_in_helpers_as_supermodule=True, - ) - - def test3(self) -> None: - """ - Check that file is generated correctly when the repo is `//cmamp` and - `//cmamp/ck.infra` is a runnable dir. - """ - self.helper( - mock_getcwd="/data/dummy/src/cmamp1/ck.infra", - mock_find_git_root="/data/dummy/src/cmamp1", - mock_find_helpers_root="/data/dummy/src/cmamp1/helpers_root", - mock_is_in_helpers_as_supermodule=False, - ) - - def test4(self) -> None: - """ - Check that file is generated correctly when the repo is `//orange`. - """ - self.helper( - mock_getcwd="/data/dummy/src/orange1", - mock_find_git_root="/data/dummy/src/orange1", - mock_find_helpers_root="/data/dummy/src/orange1/amp/helpers_root", - mock_is_in_helpers_as_supermodule=False, - ) - - -# ############################################################################# - - -# ############################################################################# -# TestLibTasksGetDockerCmd1 -# ############################################################################# - - -# TODO(ShaopengZ): This hangs outside CK infra, so we skip it. -@pytest.mark.requires_ck_infra -class TestLibTasksGetDockerCmd1(httestlib._LibTasksTestCase): - """ - Test `_get_docker_compose_cmd()`. - """ - - def check(self, actual: str, expected: str) -> None: - # Remove current timestamp (e.g., `20220317_232120``) from the `--name` - # so that the tests pass. - timestamp_regex = r"\.\d{8}_\d{6}" - actual = re.sub(timestamp_regex, "", actual) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # This is required when different repos run Docker with user vs root / remap. - actual = hunitest.filter_text("--user", actual) - self.assert_equal(actual, expected, fuzzy_match=True) - - @pytest.mark.requires_ck_infra - # TODO(gp): After using a single docker file as part of AmpTask2308 - # "Update_amp_container" we can probably run these tests in any repo, so - # we should be able to remove this `skipif`. - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_docker_bash1(self) -> None: - """ - Command for docker_bash target. - """ - base_image = "" - stage = "dev" - version = "1.0.0" - cmd = "bash" - service_name = "app" - use_entrypoint = False - print_docker_config = False - actual = hlitadoc._get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - service_name=service_name, - use_entrypoint=use_entrypoint, - print_docker_config=print_docker_config, - ) - expected = r""" - IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.app.app \ - --entrypoint bash \ - app - """ - self.check(actual, expected) - - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_docker_bash2(self) -> None: - """ - Command for docker_bash with entrypoint. - """ - base_image = "" - stage = "local" - version = "1.0.0" - cmd = "bash" - print_docker_config = False - actual = hlitadoc._get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - print_docker_config=print_docker_config, - ) - expected = r"""IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.app.app \ - app \ - bash """ - self.check(actual, expected) - - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_docker_bash3(self) -> None: - """ - Command for docker_bash with some env vars. - """ - base_image = "" - stage = "local" - version = "1.0.0" - cmd = "bash" - extra_env_vars = ["PORT=9999", "SKIP_RUN=1"] - print_docker_config = False - actual = hlitadoc._get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - extra_env_vars=extra_env_vars, - print_docker_config=print_docker_config, - ) - expected = r""" - IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0 \ - PORT=9999 \ - SKIP_RUN=1 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.app.app \ - app \ - bash - """ - self.check(actual, expected) - - if False: - - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_docker_bash4(self) -> None: - base_image = "" - stage = "dev" - version = "1.0.0" - cmd = "bash" - entrypoint = False - print_docker_config = False - actual = hlitadoc._get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - entrypoint=entrypoint, - print_docker_config=print_docker_config, - ) - expected = r""" - IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.app.app \ - --entrypoint bash \ - app - """ - self.check(actual, expected) - - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_docker_jupyter1(self) -> None: - base_image = "" - stage = "dev" - version = "1.0.0" - port = 9999 - self_test = True - print_docker_config = False - actual = hlitadoc._get_docker_jupyter_cmd( - base_image, - stage, - version, - port, - self_test, - print_docker_config=print_docker_config, - ) - expected = r""" - IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ - PORT=9999 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.jupyter_server_test.app \ - --service-ports \ - jupyter_server_test - """ - self.check(actual, expected) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_is_image_name_valid1 -# ############################################################################# - - -class Test_dassert_is_image_name_valid1(hunitest.TestCase): - def test1(self) -> None: - """ - Check that valid images pass the assertion. - """ - valid_images = [ - "12345.dkr.ecr.us-east-1.amazonaws.com/amp:dev", - "abcde.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0", - "12345.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0", - "sorrentum/cmamp", - ] - for image in valid_images: - hlitadoc.dassert_is_image_name_valid(image) - - def test2(self) -> None: - """ - Check that invalid images do not pass the assertion. - """ - invalid_images = [ - # Missing required parts. - "invalid-image-name", - # Missing stage/version. - "12345.dkr.ecr.us-east-1.amazonaws.com/amp:", - # Invalid version. - "12345.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-invalid", - ] - # TODO(gp): Add a check for the output. - for image in invalid_images: - with self.assertRaises(AssertionError): - hlitadoc.dassert_is_image_name_valid(image) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_is_base_image_name_valid1 -# ############################################################################# - - -class Test_dassert_is_base_image_name_valid1(hunitest.TestCase): - def test1(self) -> None: - """ - Check that valid base images pass the assertion. - """ - valid_base_images = [ - "12345.dkr.ecr.us-east-1.amazonaws.com/amp", - "sorrentum/cmamp", - "ghcr.io/cryptokaizen/cmamp", - ] - for base_image in valid_base_images: - hlitadoc._dassert_is_base_image_name_valid(base_image) - - def test2(self) -> None: - """ - Check that invalid base images do not pass the assertion. - """ - invalid_base_images = [ - # Missing required parts. - "invalid-base-image", - # Extra character at the end. - "abcde.dkr.ecr.us-east-1.amazonaws.com/amp:", - # Extra part in the name. - "ghcr.io/cryptokaizen/cmamp/invalid", - ] - for base_image in invalid_base_images: - with self.assertRaises(AssertionError): - hlitadoc._dassert_is_base_image_name_valid(base_image) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py deleted file mode 100644 index ff430ed24..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py +++ /dev/null @@ -1,1530 +0,0 @@ -import logging -import os -import unittest.mock as umock -from typing import Generator, List - -import boto3 -import moto -import pytest - -import helpers.hgit as hgit -import helpers.hunit_test as hunitest -import helpers.lib_tasks_docker as hlitadoc -import helpers.lib_tasks_docker_release as hltadore -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - - -def _extract_commands_from_call(calls: List[umock._Call]) -> List[str]: - """ - Extract command strings from a list of mock call arguments. - - Example: - calls = [ - ( - # args tuple: (context, command) - (mock_ctx, "docker build --no-cache image1"), - # kwargs dictionary - {"pty": True} - ) - ] - After extraction: - ["docker build --no-cache image1"] - - :param calls: list of mock call objects containing (args, kwargs) - :return: list of command strings - """ - # Each mock call is a (args, kwargs) tuple, extract the command string - # from args[1] in each call. - call_list = [args_[1] for args_, kwargs_ in calls] - return call_list - - -# ############################################################################# -# _DockerFlowTestHelper -# ############################################################################# - - -class _DockerFlowTestHelper(hunitest.TestCase): - """ - Helper test class to perform common setup, teardown logic and assertion - checks for Docker flow tests. - """ - - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - self.set_up_test() - yield - self.tear_down_test() - - def set_up_test(self) -> None: - # Mock system calls. - self.system_patcher = umock.patch("helpers.hsystem.system") - self.mock_system = self.system_patcher.start() - # Mock run. - self.run_patcher = umock.patch("helpers.lib_tasks_utils.run") - self.mock_run = self.run_patcher.start() - # Mock version validation. - self.version_patcher = umock.patch( - "helpers.lib_tasks_docker.dassert_is_subsequent_version" - ) - self.mock_version = self.version_patcher.start() - # Mock docker login. - self.docker_login_patcher = umock.patch( - "helpers.lib_tasks_docker.docker_login" - ) - self.mock_docker_login = self.docker_login_patcher.start() - # Mock environment variable. - self.env_patcher = umock.patch.dict( - "os.environ", {"CSFY_ECR_BASE_PATH": "test.ecr.path"} - ) - self.get_default_param_patcher = umock.patch( - "helpers.lib_tasks_utils.get_default_param", - side_effect=lambda param: { - "CSFY_ECR_BASE_PATH": "test.ecr.path", - "BASE_IMAGE": "test-image", - }.get(param, ""), - ) - self.mock_get_default_param = self.get_default_param_patcher.start() - self.env_patcher.start() - self.get_docker_base_image_name_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_docker_base_image_name" - ) - self.mock_get_docker_base_image_name = ( - self.get_docker_base_image_name_patcher.start() - ) - # - self.patchers = { - "system": self.system_patcher, - "run": self.run_patcher, - "version": self.version_patcher, - "docker_login": self.docker_login_patcher, - "env": self.env_patcher, - "docker_base_image_name": self.get_docker_base_image_name_patcher, - "default_param": self.get_default_param_patcher, - } - # Test inputs. - self.mock_ctx = httestlib._build_mock_context_returning_ok() - self.test_version = "1.0.0" - self.test_base_image = "test-registry.com/test-image" - self.test_multi_arch = "linux/amd64,linux/arm64" - self.mock_get_docker_base_image_name.return_value = "test-image" - - def tear_down_test(self) -> None: - """ - Clean up test environment by stopping all mocks after each test case. - """ - for patcher in self.patchers.values(): - patcher.stop() - - def _check_docker_command_output( - self, expected: str, call_args_list: List[umock._Call] - ) -> None: - """ - Verify that the sequence of Docker commands from mock calls matches the - expected string. - - :param expected: expected command string - :param call_args_list: list of mock call objects - """ - actual_cmds = _extract_commands_from_call(call_args_list) - actual_cmds = "\n".join(actual_cmds) - _LOG.debug("Actual Docker commands:\n%s", actual_cmds) - self.assert_equal( - actual_cmds, - expected, - purify_text=True, - purify_expected_text=True, - fuzzy_match=True, - remove_lead_trail_empty_lines=True, - dedent=True, - ) - - -# ############################################################################# -# Test_docker_build_local_image1 -# ############################################################################# - - -class Test_docker_build_local_image1(_DockerFlowTestHelper): - """ - Test building a local Docker image. - """ - - def test_single_arch1(self) -> None: - """ - Test building with single architecture. - - This test checks: - - Single architecture build - - No-cache build options - - Custom build arguments - - Local user-specific tagging - """ - # Call tested function. - hltadore.docker_build_local_image( - self.mock_ctx, - self.test_version, - cache=False, - base_image=self.test_base_image, - poetry_mode="update", - ) - # The output is a list of strings, each representing a command. - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test-registry.com/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test-registry.com/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_multi_arch1(self) -> None: - """ - Test building with multiple architectures. - - This test checks: - - Multi-architecture build (amd64, arm64) - - Buildx driver setup - - Platform-specific build options - - Image pushing to registry - """ - # Call tested function. - hltadore.docker_build_local_image( - self.mock_ctx, - self.test_version, - cache=False, - base_image=self.test_base_image, - poetry_mode="update", - multi_arch=self.test_multi_arch, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test-registry.com/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - docker pull test-registry.com/test-image:local-$USER_NAME-1.0.0 - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test-registry.com/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_build_prod_image1 -# ############################################################################# - - -class Test_docker_build_prod_image1(_DockerFlowTestHelper): - """ - Test building a prod Docker image. - """ - - def test_single_arch_prod_image1(self) -> None: - """ - Test building with single architecture. - - This test checks: - - Production build workflow - - Single architecture build - - Build arguments for prod environment - - Prod image versioning - - Default and versioned tagging - """ - # Call tested function. - hltadore.docker_build_prod_image( - self.mock_ctx, - self.test_version, - base_image=self.test_base_image, - cache=False, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --tag test-registry.com/test-image:prod-1.0.0 \ - --file /app/devops/docker_build/prod.Dockerfile \ - --build-arg VERSION=1.0.0 \ - --build-arg ECR_BASE_PATH=test.ecr.path \ - --build-arg IMAGE_NAME=test-image \ - /app - docker tag test-registry.com/test-image:prod-1.0.0 test-registry.com/test-image:prod - docker image ls test-registry.com/test-image:prod - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_multi_arch_prod_image1(self) -> None: - """ - Test building with multiple architectures. - - This test checks: - - Multi-architecture production build - - Buildx setup for multi-platform builds - - Push to registry during build - - Production build arguments - - Multi-arch specific options - """ - # Call tested function. - hltadore.docker_build_multi_arch_prod_image( - self.mock_ctx, - self.test_version, - base_image=self.test_base_image, - cache=False, - multi_arch=self.test_multi_arch, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg VERSION=1.0.0 --build-arg ECR_BASE_PATH=test.ecr.path \ - --tag test-registry.com/test-image:prod-1.0.0 \ - --file devops/docker_build/prod.Dockerfile \ - - - docker pull test-registry.com/test-image:prod-1.0.0 - docker image ls test-registry.com/test-image:prod-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - # TODO(gp): Is the assertion too strict? - reason="Needs to run inside a super module", - ) - def test_candidate_tag1(self) -> None: - """ - Test building with candidate mode using tag. - - This test checks: - - Production build using candidate mode - - Custom tag specification - - Build arguments - - Non-default image tagging - """ - test_tag = "test_tag" - # Call tested function. - hltadore.docker_build_prod_image( - self.mock_ctx, - self.test_version, - base_image=self.test_base_image, - cache=False, - candidate=True, - tag=test_tag, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --tag test-registry.com/test-image:prod-test_tag \ - --file /app/devops/docker_build/prod.Dockerfile \ - --build-arg VERSION=1.0.0 \ - --build-arg ECR_BASE_PATH=test.ecr.path \ - --build-arg IMAGE_NAME=test-image \ - /app - docker image ls test-registry.com/test-image:prod-test_tag - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_candidate_user_tag1(self) -> None: - """ - Test building with candidate mode using user tag. - - This test checks: - - Production build using candidate mode - - Combined user and custom tag parameters - - Custom tag format (prod-user-tag) - - Build arguments - """ - test_user_tag = "test_user" - test_tag = "test_tag" - # Call tested function. - hltadore.docker_build_prod_image( - self.mock_ctx, - self.test_version, - base_image=self.test_base_image, - cache=False, - candidate=True, - user_tag=test_user_tag, - tag=test_tag, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --tag test-registry.com/test-image:prod-test_user-test_tag \ - --file /app/devops/docker_build/prod.Dockerfile \ - --build-arg VERSION=1.0.0 \ - --build-arg ECR_BASE_PATH=test.ecr.path \ - --build-arg IMAGE_NAME=test-image \ - /app - docker image ls test-registry.com/test-image:prod-test_user-test_tag - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_tag_push_multi_arch_prod_image1 -# ############################################################################# - - -class Test_docker_tag_push_multi_arch_prod_image1(_DockerFlowTestHelper): - """ - Test tagging and pushing a multi-architecture Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test pushing to AWS ECR. - - This test checks: - - Multi-arch image tagging - - AWS ECR target registry - - Production image versioning - """ - # Call tested function. - target_registry = "aws_ecr.ck" - hltadore.docker_tag_push_multi_arch_prod_image( - self.mock_ctx, - self.test_version, - target_registry=target_registry, - ) - expected = r""" - docker buildx imagetools create -t test.ecr.path/test-image:prod test.ecr.path/test-image:prod-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_dockerhub1(self) -> None: - """ - Test pushing to DockerHub from AWS ECR. - - This test checks: - - Multi-arch image tagging - - DockerHub registry (differs from AWS ECR test) - - Version and latest tagging - - Cross-registry image copying - """ - # Call tested function. - target_registry = "dockerhub.causify" - hltadore.docker_tag_push_multi_arch_prod_image( - self.mock_ctx, - self.test_version, - target_registry=target_registry, - ) - expected = r""" - docker buildx imagetools create -t causify/test-image:prod-1.0.0 test.ecr.path/test-image:prod-1.0.0 - docker buildx imagetools create -t causify/test-image:prod test.ecr.path/test-image:prod-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_tag_push_multi_build_local_image_as_dev1 -# ############################################################################# - - -class Test_docker_tag_push_multi_build_local_image_as_dev1( - _DockerFlowTestHelper -): - """ - Test tagging and pushing a multi-arch local Docker image as dev. - """ - - def test_aws_ecr1(self) -> None: - """ - Test pushing to AWS ECR. - - This test checks: - - Multi-arch image tagging - - AWS ECR target registry - - Dev image versioning - - Default and versioned tagging - """ - # Call tested function. - target_registry = "aws_ecr.ck" - hltadore.docker_tag_push_multi_build_local_image_as_dev( - self.mock_ctx, - self.test_version, - target_registry=target_registry, - ) - expected = r""" - docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_dockerhub1(self) -> None: - """ - Test pushing to DockerHub from AWS ECR. - - This test checks: - - Multi-arch image tagging - - DockerHub registry (differs from AWS ECR test) - - Version and latest tagging - - Cross-registry image copying - """ - # Call tested function. - target_registry = "dockerhub.causify" - hltadore.docker_tag_push_multi_build_local_image_as_dev( - self.mock_ctx, - self.test_version, - target_registry=target_registry, - ) - expected = r""" - docker buildx imagetools create -t causify/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t causify/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_release_dev_image1 -# ############################################################################# - - -class Test_docker_release_dev_image1(_DockerFlowTestHelper): - """ - Test releasing a dev Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test releasing the dev image to AWS ECR. - - This test checks: - - Build workflow - - No-cache build options - - Dev image versioning - - Default and versioned tagging - - Registry target selection - - Architecture support - - Tagging and versioning - """ - # Call tested function. - hltadore.docker_release_dev_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - push_to_repo=True, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 test.ecr.path/test-image:dev-1.0.0 - docker tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 test.ecr.path/test-image:dev - docker push test.ecr.path/test-image:dev-1.0.0 - docker push test.ecr.path/test-image:dev - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_release_prod_image1 -# ############################################################################# - - -class Test_docker_release_prod_image1(_DockerFlowTestHelper): - """ - Test releasing a prod Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test releasing the prod image to AWS ECR. - - This test checks: - - Build workflow - - No-cache build options - - Prod image versioning - - Default and versioned tagging - - Registry target selection - - Architecture support - - Tagging and versioning - """ - # Call tested function. - hltadore.docker_release_prod_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - push_to_repo=True, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --tag test.ecr.path/test-image:prod-1.0.0 \ - --file /app/devops/docker_build/prod.Dockerfile \ - --build-arg VERSION=1.0.0 \ - --build-arg ECR_BASE_PATH=test.ecr.path \ - --build-arg IMAGE_NAME=test-image \ - /app - docker tag test.ecr.path/test-image:prod-1.0.0 test.ecr.path/test-image:prod - docker image ls test.ecr.path/test-image:prod - docker push test.ecr.path/test-image:prod-1.0.0 - docker push test.ecr.path/test-image:prod - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_release_multi_build_dev_image1 -# ############################################################################# - - -class Test_docker_release_multi_build_dev_image1(_DockerFlowTestHelper): - """ - Test releasing a multi-arch dev Docker image. - """ - - def test_single_registry1(self) -> None: - """ - Test releasing to a single registry. - - This test checks: - - Multi-arch build setup - - Build and push workflow - - Dev image tagging - - Test skipping options - - Single registry target - """ - # Call tested function. - hltadore.docker_release_multi_build_dev_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - target_registries="aws_ecr.ck", - ) - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - docker pull test.ecr.path/test-image:local-$USER_NAME-1.0.0 - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_multiple_registries1(self) -> None: - """ - Test releasing to multiple registries. - - This test checks: - - Multi-arch build workflow - - Multiple registry targets (AWS ECR and DockerHub) - - Parallel image tagging - - Image retagging for different registries - """ - # Call tested function. - hltadore.docker_release_multi_build_dev_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - target_registries="aws_ecr.ck,dockerhub.causify", - ) - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - docker pull test.ecr.path/test-image:local-$USER_NAME-1.0.0 - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t causify/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t causify/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_rollback_dev_image1 -# ############################################################################# - - -class Test_docker_rollback_dev_image1(_DockerFlowTestHelper): - """ - Test rolling back a dev Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test rolling back and pushing to AWS ECR. - - This test checks: - - Dev image rollback workflow - - Version-specific image pull - - Retagging as latest - - Repository pushing - """ - # Call tested function. - hltadore.docker_rollback_dev_image( - self.mock_ctx, - self.test_version, - push_to_repo=True, - ) - expected = r""" - docker pull test.ecr.path/test-image:dev-1.0.0 - docker tag test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:dev - docker push test.ecr.path/test-image:dev-1.0.0 - docker push test.ecr.path/test-image:dev - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_rollback_prod_image1 -# ############################################################################# - - -class Test_docker_rollback_prod_image1(_DockerFlowTestHelper): - """ - Test rolling back a prod Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test rolling back and pushing to AWS ECR. - - This test checks: - - Production image rollback workflow - - Version-specific image pull - - Retagging as latest production - - Repository pushing - """ - # Call tested function. - hltadore.docker_rollback_prod_image( - self.mock_ctx, - self.test_version, - push_to_repo=True, - ) - expected = r""" - docker pull test.ecr.path/test-image:prod-1.0.0 - docker tag test.ecr.path/test-image:prod-1.0.0 test.ecr.path/test-image:prod - docker push test.ecr.path/test-image:prod-1.0.0 - docker push test.ecr.path/test-image:prod - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_push_prod_candidate_image1 -# ############################################################################# - - -class Test_docker_push_prod_candidate_image1(_DockerFlowTestHelper): - """ - Test pushing a prod candidate Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test pushing to AWS ECR. - - This test checks: - - Candidate image pushing - - AWS ECR target registry - - Hash-based image tagging - """ - # Call tested function. - candidate = "4759b3685f903e6c669096e960b248ec31c63b69" - hltadore.docker_push_prod_candidate_image( - self.mock_ctx, - candidate=candidate, - ) - expected = r""" - docker push test.ecr.path/test-image:prod-4759b3685f903e6c669096e960b248ec31c63b69 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_release_multi_arch_prod_image1 -# ############################################################################# - - -class Test_docker_release_multi_arch_prod_image1(_DockerFlowTestHelper): - """ - Test releasing a multi-arch prod Docker image. - """ - - def test_multiple_registries1(self) -> None: - """ - Test releasing to AWS ECR and DockerHub. - - This test checks: - - Multi-arch build workflow - - AWS ECR and DockerHub target registries - - Test skipping options - - Image tagging and pushing - """ - # Call tested function. - hltadore.docker_release_multi_arch_prod_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - docker_registry=["aws_ecr.ck", "dockerhub.causify"], - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg VERSION=1.0.0 --build-arg ECR_BASE_PATH=test.ecr.path \ - --tag test.ecr.path/test-image:prod-1.0.0 \ - --file devops/docker_build/prod.Dockerfile \ - - - docker pull test.ecr.path/test-image:prod-1.0.0 - docker image ls test.ecr.path/test-image:prod-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:prod test.ecr.path/test-image:prod-1.0.0 - docker buildx imagetools create -t causify/test-image:prod-1.0.0 test.ecr.path/test-image:prod-1.0.0 - docker buildx imagetools create -t causify/test-image:prod test.ecr.path/test-image:prod-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_create_candidate_image1 -# ############################################################################# - - -class Test_docker_create_candidate_image1(_DockerFlowTestHelper): - """ - Test creating a candidate Docker image. - """ - - def set_up_test2(self) -> None: - """ - Set up test environment with additional mocks specific to this test - class. - """ - self.set_up_test() - # Mock git hash. - self.git_hash_patcher = umock.patch( - "helpers.hgit.get_head_hash", - return_value="4759b3685f903e6c669096e960b248ec31c63b69", - ) - self.mock_git_hash = self.git_hash_patcher.start() - self.patchers["git_hash"] = self.git_hash_patcher - # Mock workspace size check. - self.workspace_check_patcher = umock.patch( - "helpers.lib_tasks_docker_release._check_workspace_dir_sizes" - ) - self.mock_workspace_check = self.workspace_check_patcher.start() - self.patchers["workspace_check"] = self.workspace_check_patcher - # Mock file existence check to handle both paths. - self.file_exists_patcher = umock.patch( - "helpers.hdbg.dassert_file_exists" - ) - self.mock_file_exists = self.file_exists_patcher.start() - self.patchers["file_exists"] = self.file_exists_patcher - # Mock `docker_build_prod_image()`. - self.build_prod_patcher = umock.patch( - "helpers.lib_tasks_docker_release.docker_build_prod_image" - ) - self.mock_build_prod = self.build_prod_patcher.start() - self.patchers["build_prod"] = self.build_prod_patcher - # Mock `docker_push_prod_candidate_image()`. - self.push_prod_patcher = umock.patch( - "helpers.lib_tasks_docker_release.docker_push_prod_candidate_image" - ) - self.mock_push_prod = self.push_prod_patcher.start() - self.patchers["push_prod"] = self.push_prod_patcher - - def tear_down_test2(self) -> None: - """ - Clean up test environment. - """ - self.tear_down_test() - - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - """ - Set up and tear down test environment for each test. - """ - self.set_up_test2() - yield - self.tear_down_test2() - - def test_aws_ecr1(self) -> None: - """ - Test creating and pushing to AWS ECR. - - This test checks: - - Task definition update with correct parameters - - Proper command construction for aws_update_task_definition.py - """ - # Call tested function. - hltadore.docker_create_candidate_image( - self.mock_ctx, - user_tag="test_user", - ) - # Verify the mocks were called with correct parameters. - self.mock_build_prod.assert_called_once_with( - self.mock_ctx, - container_dir_name=".", - version=hlitadoc._IMAGE_VERSION_FROM_CHANGELOG, - candidate=True, - tag="test_user-4759b3685f903e6c669096e960b248ec31c63b69", - ) - self.mock_push_prod.assert_called_once_with( - self.mock_ctx, - "test_user-4759b3685f903e6c669096e960b248ec31c63b69", - ) - - -# ############################################################################# -# Test_docker_update_prod_task_definition1 -# ############################################################################# - - -class Test_docker_update_prod_task_definition1(_DockerFlowTestHelper): - """ - Test updating a prod task definition to the desired version. - """ - - @pytest.fixture(autouse=True) - def aws_credentials(self) -> None: - """ - Mocked AWS credentials for moto. - """ - os.environ["DOCKER_MOCK_AWS_ACCESS_KEY_ID"] = "testing" - os.environ["DOCKER_MOCK_AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["DOCKER_MOCK_AWS_SECURITY_TOKEN"] = "testing" - os.environ["DOCKER_MOCK_AWS_SESSION_TOKEN"] = "testing" - os.environ["DOCKER_MOCK_AWS_DEFAULT_REGION"] = "us-east-1" - - def set_up_test2(self) -> None: - """ - Set up test environment with additional mocks specific to this test - class. - """ - self.set_up_test() - # Mock AWS and S3 functionality. - self.aws_patcher = umock.patch( - "helpers.haws.get_task_definition_image_url" - ) - self.mock_aws = self.aws_patcher.start() - self.mock_aws.return_value = ( - "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69" - ) - self.patchers["aws"] = self.aws_patcher - self.s3_patcher = umock.patch("helpers.hs3.get_s3fs") - self.mock_s3 = self.s3_patcher.start() - self.mock_s3.return_value.cat.return_value = b"test_content" - self.patchers["s3"] = self.s3_patcher - # Mock file operations. - self.file_patcher = umock.patch( - "helpers.hs3.from_file", return_value="test_content" - ) - self.mock_file = self.file_patcher.start() - self.patchers["file"] = self.file_patcher - # Mock listdir to return test DAG files. - self.listdir_patcher = umock.patch( - "helpers.hs3.listdir", - return_value=["/app/im_v2/airflow/dags/test_dag.py"], - ) - self.mock_listdir = self.listdir_patcher.start() - self.patchers["listdir"] = self.listdir_patcher - - def tear_down_test2(self) -> None: - """ - Clean up test environment. - """ - # Clean up environment variables. - for key in [ - "DOCKER_MOCK_AWS_ACCESS_KEY_ID", - "DOCKER_MOCK_AWS_SECRET_ACCESS_KEY", - "DOCKER_MOCK_AWS_SECURITY_TOKEN", - "DOCKER_MOCK_AWS_SESSION_TOKEN", - "DOCKER_MOCK_AWS_DEFAULT_REGION", - ]: - if key in os.environ: - del os.environ[key] - # Call parent teardown. - self.tear_down_test() - - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - """ - Set up and tear down test environment for each test. - """ - self.set_up_test2() - yield - self.tear_down_test2() - - @moto.mock_aws - @umock.patch("helpers.haws.update_task_definition") - @umock.patch("helpers.haws.get_ecs_client") - def test_promotion_to_prod( - self, - mock_get_ecs_client: umock.Mock, - mock_update_task_definition: umock.Mock, - ) -> None: - """ - Test the promotion of a preprod Docker image and DAGs to production. - - This test checks: - - Task definition update workflow - - Preprod to prod image conversion. - - DAG file synchronization - - Image tagging and pushing - """ - # Mock AWS ECS client using moto and register a task definition. - region = "us-east-1" - mock_ecs_client = boto3.client("ecs", region_name=region) - mock_ecs_client.register_task_definition( - family="test_task", - containerDefinitions=[ - { - "name": "test-container", - "image": "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69", - } - ], - executionRoleArn="__mock__", - networkMode="bridge", - requiresCompatibilities=["EC2"], - cpu="256", - memory="512", - ) - mock_get_ecs_client.return_value = mock_ecs_client - # Add mock client to patchers for cleanup. - self.ecs_client_patcher = umock.patch( - "boto3.client", return_value=mock_ecs_client - ) - self.mock_ecs_client = self.ecs_client_patcher.start() - self.patchers["ecs_client_test1"] = self.ecs_client_patcher - # Call tested function. - hltadore.docker_update_prod_task_definition( - self.mock_ctx, - version=self.test_version, - preprod_tag="4759b3685f903e6c669096e960b248ec31c63b69", - airflow_dags_s3_path="s3://test-bucket/dags/", - task_definition="test_task", - ) - expected = r""" - docker pull test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 - docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod-1.0.0 - docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod - docker rmi test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 - docker push test.ecr.path/test-image:prod-1.0.0 - docker push test.ecr.path/test-image:prod - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - # Check whether `update_task_definition` was called with the expected arguments. - expected_image_url = "test.ecr.path/test-image:prod-1.0.0" - mock_update_task_definition.assert_called_once_with( - "test_task", expected_image_url, environment="prod" - ) - - @moto.mock_aws - @umock.patch("helpers.haws.get_ecs_client") - def test_promotion_to_prod_exception_handling( - self, mock_get_ecs_client: umock.Mock - ) -> None: - """ - Test exception handling and rollback behavior when updating prod task - definition. - - This test checks: - - Exception handling during task definition update - - Rollback of task definition to original image - - Rollback of S3 DAG files - - Proper error propagation - """ - # Mock AWS ECS client using moto and register a task definition. - region = "us-east-1" - mock_ecs_client = boto3.client("ecs", region_name=region) - mock_ecs_client.register_task_definition( - family="test_task", - containerDefinitions=[ - { - "name": "test-container", - "image": "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69", - } - ], - executionRoleArn="__mock__", - networkMode="bridge", - requiresCompatibilities=["EC2"], - cpu="256", - memory="512", - ) - mock_get_ecs_client.return_value = mock_ecs_client - # Add mock client to patchers for cleanup. - self.ecs_client_patcher = umock.patch( - "boto3.client", return_value=mock_ecs_client - ) - self.mock_ecs_client = self.ecs_client_patcher.start() - self.patchers["ecs_client_test2"] = self.ecs_client_patcher - # Mock S3 bucket operations to simulate a failure. - self.mock_s3.return_value.put.side_effect = Exception("S3 upload failed") - # Call tested function and verify exception is raised. - with self.assertRaises(Exception) as cm: - hltadore.docker_update_prod_task_definition( - self.mock_ctx, - version=self.test_version, - preprod_tag="4759b3685f903e6c669096e960b248ec31c63b69", - airflow_dags_s3_path="s3://test-bucket/dags/", - task_definition="test_task", - ) - # Check the error message. - self.assertIn("S3 upload failed", str(cm.exception)) - # Check whether rollback commands were executed. - expected = r""" - docker pull test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 - docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod-1.0.0 - docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod - docker rmi test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - # Check whether task definition was rolled back. - self.mock_aws.assert_called_with("test_task") - - -# ############################################################################# -# Test_docker_tag_push_dev_image1 -# ############################################################################# - - -class Test_docker_tag_push_dev_image1(_DockerFlowTestHelper): - """ - Test tagging and pushing dev image from a base registry to multiple registries. - """ - - def set_up_test2(self) -> None: - """ - Set up test environment with additional mocks for GHCR workflow. - """ - super().set_up_test() - # Mock version retrieval from changelog. - self.changelog_version_patcher = umock.patch( - "helpers.hversion.get_changelog_version" - ) - self.mock_changelog_version = self.changelog_version_patcher.start() - self.mock_changelog_version.return_value = self.test_version - # Mock repo config for GHCR registry URL and image name. - self.get_container_registry_url_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_container_registry_url" - ) - self.mock_get_container_registry_url = ( - self.get_container_registry_url_patcher.start() - ) - # Use side_effect to return different values based on registry. - self.mock_get_container_registry_url.side_effect = lambda registry: { - "ghcr": "ghcr.io/causify-ai", - "ecr": "test.ecr.path", - }.get(registry, "ghcr.io/causify-ai") - # Add new patchers to cleanup list. - self.patchers.update( - { - "changelog_version": self.changelog_version_patcher, - "container_registry_url": self.get_container_registry_url_patcher, - } - ) - - def tear_down_test2(self) -> None: - """ - Clean up test environment. - """ - self.tear_down_test() - - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - """ - Set up and tear down test environment for each test. - """ - self.set_up_test2() - yield - self.tear_down_test2() - - def test_normal_execution1(self) -> None: - """ - Test normal execution without dry_run. - - This test checks: - - GHCR image pulling - - Tagging for GHCR and AWS ECR - - Pushing to both registries - - Versioned and latest image handling - """ - # Call tested function. - hltadore.docker_tag_push_dev_image( - self.mock_ctx, - target_registries="ghcr,ecr", - container_dir_name=".", - dry_run=False, - ) - # Verify expected Docker commands were executed. - expected = r""" - docker pull ghcr.io/causify-ai/test-image:dev-1.0.0 - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev - docker push ghcr.io/causify-ai/test-image:dev - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev-1.0.0 - docker push ghcr.io/causify-ai/test-image:dev-1.0.0 - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev - docker push test.ecr.path/test-image:dev - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev-1.0.0 - docker push test.ecr.path/test-image:dev-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_dry_run1(self) -> None: - """ - Test dry_run mode execution. - - This test checks: - - No actual Docker commands are executed when dry_run=True - - All operations are simulated - - Function completes without errors - - Mock calls should include dry_run parameter - """ - # Call tested function with dry_run enabled. - hltadore.docker_tag_push_dev_image( - self.mock_ctx, - target_registries="ghcr,ecr", - container_dir_name=".", - dry_run=True, - ) - # Verify expected Docker commands were executed. - expected = r""" - docker pull ghcr.io/causify-ai/test-image:dev-1.0.0 - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev - docker push ghcr.io/causify-ai/test-image:dev - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev-1.0.0 - docker push ghcr.io/causify-ai/test-image:dev-1.0.0 - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev - docker push test.ecr.path/test-image:dev - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev-1.0.0 - docker push test.ecr.path/test-image:dev-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_build_test_dev_image1 -# ############################################################################# - - -class Test_docker_build_test_dev_image1(_DockerFlowTestHelper): - """ - Test the complete periodic dev image release workflow. - """ - - def set_up_test(self) -> None: - """ - Set up test environment with additional mocks for the dev image - workflow. - """ - super().set_up_test() - # Mock version operations. - self.get_changelog_version_patcher = umock.patch( - "helpers.hversion.get_changelog_version" - ) - self.mock_get_changelog_version = ( - self.get_changelog_version_patcher.start() - ) - self.mock_get_changelog_version.return_value = "2.3.0" - self.bump_version_patcher = umock.patch("helpers.hversion.bump_version") - self.mock_bump_version = self.bump_version_patcher.start() - self.mock_bump_version.return_value = "2.4.0" - # Mock repo config methods. - self.get_release_team_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_release_team" - ) - self.mock_get_release_team = self.get_release_team_patcher.start() - self.mock_get_release_team.return_value = "dev_system" - self.get_issue_prefix_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_issue_prefix" - ) - self.mock_get_issue_prefix = self.get_issue_prefix_patcher.start() - self.mock_get_issue_prefix.return_value = "TestTask" - self.get_container_registry_url_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_container_registry_url" - ) - self.mock_get_container_registry_url = ( - self.get_container_registry_url_patcher.start() - ) - self.mock_get_container_registry_url.return_value = "ghcr.io/causify-ai" - # Mock GitHub operations. - self.gh_get_team_member_names_patcher = umock.patch( - "helpers.lib_tasks_gh.gh_get_team_member_names" - ) - self.mock_gh_get_team_member_names = ( - self.gh_get_team_member_names_patcher.start() - ) - self.mock_gh_get_team_member_names.return_value = ["user1", "user2"] - self.gh_create_pr_patcher = umock.patch( - "helpers.lib_tasks_gh.gh_create_pr" - ) - self.mock_gh_create_pr = self.gh_create_pr_patcher.start() - # Mock file operations. - self.get_client_root_patcher = umock.patch( - "helpers.hversion._get_client_root" - ) - self.mock_get_client_root = self.get_client_root_patcher.start() - self.mock_get_client_root.return_value = "/test/root" - self.from_file_patcher = umock.patch("helpers.hio.from_file") - self.mock_from_file = self.from_file_patcher.start() - self.mock_from_file.return_value = "# Existing changelog content\n" - self.to_file_patcher = umock.patch("helpers.hio.to_file") - self.mock_to_file = self.to_file_patcher.start() - # Mock file existence check for dassert_file_exists (changelog validation). - self.file_exists_patcher = umock.patch( - "helpers.hdbg.dassert_file_exists" - ) - self.mock_file_exists = self.file_exists_patcher.start() - # Mock os.path.exists selectively for file staging logic. - # Store the original function before patching - original_exists = os.path.exists - # Define which files should exist for staging - staged_files = { - "/test/root/./devops/docker_build/poetry.lock", - "/test/root/./devops/docker_build/pip_list.txt", - "/test/root/./changelog.txt", - } - - def selective_exists(path): - # Return True for staged files, use original function for everything else - if path in staged_files: - return True - return original_exists(path) - - self.path_exists_patcher = umock.patch( - "os.path.exists", side_effect=selective_exists - ) - self.mock_path_exists = self.path_exists_patcher.start() - # Mock date operations. - self.date_patcher = umock.patch("datetime.date") - self.mock_date = self.date_patcher.start() - # Set up strftime to return different formats based on the format string. - # Branch name uses %Y%m%d, changelog uses %Y-%m-%d - self.mock_date.today.return_value.strftime.side_effect = lambda fmt: { - "%Y%m%d": "20251023", - "%Y-%m-%d": "2025-10-23", - }.get(fmt, "2025-10-23") - # Mock Docker image operations. - self.get_image_patcher = umock.patch( - "helpers.lib_tasks_docker.get_image" - ) - self.mock_get_image = self.get_image_patcher.start() - self.mock_get_image.return_value = ( - "test.ecr.path/test-image:local-testuser-2.4.0" - ) - # Mock _run_tests to prevent actual test execution. - self.run_tests_patcher = umock.patch( - "helpers.lib_tasks_docker_release._run_tests" - ) - self.mock_run_tests = self.run_tests_patcher.start() - # Mock is_inside_ci to control CI-specific behavior. - self.is_inside_ci_patcher = umock.patch("helpers.hserver.is_inside_ci") - self.mock_is_inside_ci = self.is_inside_ci_patcher.start() - # Default to True to simulate CI environment. - self.mock_is_inside_ci.return_value = True - # Add all new patchers to cleanup list. - self.patchers.update( - { - "get_changelog_version": self.get_changelog_version_patcher, - "bump_version": self.bump_version_patcher, - "get_release_team": self.get_release_team_patcher, - "get_issue_prefix": self.get_issue_prefix_patcher, - "container_registry_url": self.get_container_registry_url_patcher, - "gh_get_team_member_names": self.gh_get_team_member_names_patcher, - "gh_create_pr": self.gh_create_pr_patcher, - "get_client_root": self.get_client_root_patcher, - "from_file": self.from_file_patcher, - "to_file": self.to_file_patcher, - "file_exists": self.file_exists_patcher, - "path_exists": self.path_exists_patcher, - "date": self.date_patcher, - "get_image": self.get_image_patcher, - "run_tests": self.run_tests_patcher, - "is_inside_ci": self.is_inside_ci_patcher, - } - ) - - def test_complete_workflow1(self) -> None: - """ - Test the complete periodic dev image release workflow. - """ - # Call the tested function. - hltadore.docker_build_test_dev_image( - self.mock_ctx, - reviewers="", # Empty to trigger team lookup - container_dir_name=".", - ) - # Verify version operations were called. - self.mock_bump_version.assert_called_once_with( - "2.3.0", bump_type="minor" - ) - # Verify GitHub team lookup was performed. - self.mock_get_release_team.assert_called_once() - self.mock_gh_get_team_member_names.assert_called_once_with("dev_system") - # Verify issue prefix was fetched for branch creation. - self.mock_get_issue_prefix.assert_called() - # Verify PR was created with team members as reviewers. - self.mock_gh_create_pr.assert_called_once() - pr_call_args = self.mock_gh_create_pr.call_args - self.assertIn("reviewer", pr_call_args.kwargs) - self.assertEqual(pr_call_args.kwargs["reviewer"], "user1,user2") - # Verify expected Docker and Git commands were executed. - expected = r""" - git checkout -b TestTask_Periodic_image_release_20251023 - cp -f devops/docker_build/dockerignore.dev /app/.dockerignore - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker build \ - \ - --build-arg AM_CONTAINER_VERSION=2.4.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test.ecr.path/test-image:local-testuser-2.4.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - invoke docker_cmd --stage local --version 2.4.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test.ecr.path/test-image:local-testuser-2.4.0 - sudo chmod -R 777 .git/objects/ - git add /test/root/./devops/docker_build/poetry.lock - git add /test/root/./devops/docker_build/pip_list.txt - git add /test/root/./changelog.txt - git commit -m "Poetry output from the v2.4.0 build" --no-verify - git push origin TestTask_Periodic_image_release_20251023 - docker tag test.ecr.path/test-image:local-testuser-2.4.0 ghcr.io/causify-ai/test-image:dev-2.4.0 - docker push ghcr.io/causify-ai/test-image:dev-2.4.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_with_existing_reviewers1(self) -> None: - """ - Test the workflow when reviewers is already provided. - """ - # Call the tested function with a specific reviewer. - hltadore.docker_build_test_dev_image( - self.mock_ctx, - reviewers="specific_user", - container_dir_name=".", - ) - # Verify PR was created with the provided reviewer. - self.mock_gh_create_pr.assert_called_once() - pr_call_args = self.mock_gh_create_pr.call_args - self.assertIn("reviewer", pr_call_args.kwargs) - self.assertEqual(pr_call_args.kwargs["reviewer"], "specific_user") - # Verify team lookup was NOT performed since reviewers was provided. - self.mock_gh_get_team_member_names.assert_not_called() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py deleted file mode 100644 index 886e1dc36..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py +++ /dev/null @@ -1,267 +0,0 @@ -import logging -import os - -import pytest - -import helpers.hgit as hgit -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur -import helpers.lib_tasks_find as hlitafin -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_find_short_import1 -# ############################################################################# - - -class Test_find_short_import1(hunitest.TestCase): - def test1(self) -> None: - iterator = [ - ("file1.py", 10, "import dataflow.core.dag_runner as dtfcodarun"), - ("file1.py", 11, "import helpers.hpandas as hpandas"), - ] - results = hlitafin._find_short_import(iterator, "dtfcodarun") - actual = "\n".join(map(str, results)) - # pylint: disable=line-too-long - expected = r"""('file1.py', 10, 'import dataflow.core.dag_runner as dtfcodarun', 'dtfcodarun', 'import dataflow.core.dag_runner as dtfcodarun')""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_find_func_class_uses1 -# ############################################################################# - - -class Test_find_func_class_uses1(hunitest.TestCase): - def test1(self) -> None: - iterator = [ - ( - "file1.py", - 10, - "dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)", - ), - ( - "file1.py", - 11, - "This test is similar to `TestRealTimeDagRunner1`. It uses:", - ), - ("file1.py", 12, "dag_builder: dtfcodabui.DagRunner,"), - ("file1.py", 13, ":param dag_builder: `DagRunner` instance"), - ] - results = hlitafin._find_func_class_uses(iterator, "DagRunner") - actual = "\n".join(map(str, results)) - expected = r""" - ('file1.py', 10, 'dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)', 'dtfamsys', 'RealTimeDagRunner') - ('file1.py', 12, 'dag_builder: dtfcodabui.DagRunner,', 'dtfcodabui', 'DagRunner')""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# TestLibTasksRunTests1 -# ############################################################################# - - -class TestLibTasksRunTests1(hunitest.TestCase): - """ - Test `_find_test_files()`, `_find_test_decorator()`. - """ - - def test_find_test_files1(self) -> None: - """ - Find all the test files in the current dir. - """ - files = hlitafin._find_test_files() - # For sure there are more than 1 test files: at least this one. - self.assertGreater(len(files), 1) - - def test_find_test_files2(self) -> None: - """ - Find all the test files from the top of the super module root. - """ - git_root = hgit.get_client_root(super_module=True) - files = hlitafin._find_test_files(git_root) - # For sure there are more than 1 test files: at least this one. - self.assertGreater(len(files), 1) - - def test_find_test_class1(self) -> None: - """ - Find the current test class. - """ - git_root = hgit.get_client_root(super_module=True) - file_names = hlitafin._find_test_files(git_root) - # - file_names = hlitafin._find_test_class( - "TestLibTasksRunTests1", file_names - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(file_names) - expected = ["helpers/test/test_lib_tasks_find.py::TestLibTasksRunTests1"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test_find_test_class2(self) -> None: - """ - Find the current test class. - """ - file_names = [__file__] - # - file_names = hlitafin._find_test_class( - "TestLibTasksRunTests1", file_names - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(file_names) - expected = ["helpers/test/test_lib_tasks_find.py::TestLibTasksRunTests1"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test_find_test_class3(self) -> None: - """ - Create synthetic code and look for a class. - """ - scratch_space = self.get_scratch_space() - dir_name = os.path.join(scratch_space, "test") - file_dict = { - "test_this.py": hprint.dedent( - """ - foo - - class TestHelloWorld(hunitest.TestCase): - bar - """ - ), - "test_that.py": hprint.dedent( - """ - foo - baz - - class TestHello_World(hunitest.): - bar - """ - ), - } - incremental = True - hunitest.create_test_dir(dir_name, incremental, file_dict) - # - file_names = hlitafin._find_test_files(dir_name) - act_file_names = [os.path.relpath(d, scratch_space) for d in file_names] - exp_file_names = ["test/test_that.py", "test/test_this.py"] - self.assert_equal(str(act_file_names), str(exp_file_names)) - # - actual = hlitafin._find_test_class("TestHelloWorld", file_names) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(actual) - expected = [ - "helpers/test/outcomes/TestLibTasksRunTests1.test_find_test_class3/tmp.scratch/" - "test/test_this.py::TestHelloWorld" - ] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test_find_test_decorator1(self) -> None: - """ - Find test functions in the "no_container" in synthetic code. - """ - scratch_space = self.get_scratch_space() - dir_name = os.path.join(scratch_space, "test") - file_dict = { - "test_this.py": hprint.dedent( - """ - foo - - class TestHelloWorld(hunitest.TestCase): - bar - """ - ), - "test_that.py": hprint.dedent( - """ - foo - baz - - @pytest.mark.no_container - class TestHello_World(hunitest.): - bar - """ - ), - } - incremental = True - hunitest.create_test_dir(dir_name, incremental, file_dict) - # - file_names = hlitafin._find_test_files(dir_name) - actual = hlitafin._find_test_decorator("no_container", file_names) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(actual) - expected = [ - "helpers/test/outcomes/TestLibTasksRunTests1.test_find_test_decorator1/" - "tmp.scratch/test/test_that.py" - ] - self.assert_equal(str(actual), str(expected), purify_text=True) - - # TODO(gp): This test can run in amp. - @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") - def test_find_test_decorator2(self) -> None: - """ - Find test functions in the "no_container" test list. - """ - file_name = hgit.find_file_in_git_tree("hunit_test.py") - file_names = [file_name] - actual = hlitafin._find_test_decorator("qa", file_names) - expected = ["$GIT_ROOT/helpers/hunit_test.py"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - -# ############################################################################# -# Test_find_check_string_output1 -# ############################################################################# - - -class Test_find_check_string_output1(hunitest.TestCase): - def helper(self, expected: str, fuzzy_match: bool) -> None: - # Look for the `check_string()` corresponding to this test. - ctx = httestlib._build_mock_context_returning_ok() - class_name = self.__class__.__name__ - method_name = self._testMethodName - as_python = True - # We don't want to copy but just print. - pbcopy = False - actual = hlitafin.find_check_string_output( - ctx, class_name, method_name, as_python, fuzzy_match, pbcopy - ) - # Check that it matches exactly. - self.assert_equal(actual, expected, fuzzy_match=False) - - def test1(self) -> None: - """ - Test `find_check_string_output()` by searching the `check_string` of - this test. - """ - # Force to generate a `check_string` file so we can search for it. - actual = "A fake check_string output to use for test1" - self.check_string(actual) - # Check. - expected = ''' - actual = - expected = r""" - A fake check_string output to use for test1 - """.lstrip().rstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - ''' - self.helper(expected, fuzzy_match=False) - - def test2(self) -> None: - """ - Like test1 but using `fuzzy_match=True`. - """ - # Force to generate a `check_string` file so we can search for it. - actual = "A fake check_string output to use for test2" - self.check_string(actual) - # Check. - expected = ''' - actual = - expected = r""" -A fake check_string output to use for test2 - - """.lstrip().rstrip() - self.assert_equal(actual, expected, fuzzy_match=True) - ''' - self.helper(expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py deleted file mode 100644 index a5ee64c9e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py +++ /dev/null @@ -1,133 +0,0 @@ -import logging -import unittest.mock as umock - -import pytest - -import helpers.hgit as hgit -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.lib_tasks_gh as hlitagh - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -# ############################################################################# -# TestLibTasks1 -# ############################################################################# - - -class TestLibTasks1(hunitest.TestCase): - """ - Test some auxiliary functions, e.g., `_get_gh_issue_title()`. - """ - - @pytest.mark.skip("CmTask #2362.") - def test_get_gh_issue_title1(self) -> None: - issue_id = 1 - repo = "amp" - actual = hlitagh._get_gh_issue_title(issue_id, repo) - expected = ( - "AmpTask1_Bridge_Python_and_R", - "https://github.com/alphamatic/amp/issues/1", - ) - self.assert_equal(str(actual), str(expected)) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="""Skip unless helpers is the supermodule. Fails when updating submodules; - passes in fast tests super-repo run. See CmTask10845.""", - ) - def test_get_gh_issue_title4(self) -> None: - cmd = "invoke gh_login" - hsystem.system(cmd) - # - issue_id = 1 - repo = "current" - _ = hlitagh._get_gh_issue_title(issue_id, repo) - - def test_get_org_name1(self) -> None: - """ - Test _get_org_name when org_name is provided. - """ - org_name = "test-org" - result = hlitagh._get_org_name(org_name) - expected = "test-org" - self.assertEqual(result, expected) - - @umock.patch.object(hgit, "get_repo_full_name_from_dirname") - def test_get_org_name2(self, mock_get_repo: umock.Mock) -> None: - """ - Test _get_org_name when org_name is empty (infers from repo). - """ - mock_get_repo.return_value = "causify-ai/helpers" - result = hlitagh._get_org_name("") - expected = "causify-ai" - self.assertEqual(result, expected) - mock_get_repo.assert_called_once_with(".", include_host_name=False) - - -# ############################################################################# -# TestGhOrgTeamFunctions -# ############################################################################# - - -class TestGhOrgTeamFunctions(hunitest.TestCase): - """ - Test gh_get_org_team_names and gh_get_team_member_names with mocked data. - """ - - @umock.patch.object(hlitagh, "_gh_run_and_get_json") - @umock.patch.object(hlitagh, "_get_org_name") - def test_gh_get_org_team_names1( - self, mock_get_org_name: umock.Mock, mock_gh_run: umock.Mock - ) -> None: - """ - Test gh_get_org_team_names with sorted team names. - """ - # Setup mocks. - mock_get_org_name.return_value = "test-org" - mock_gh_run.return_value = [ - {"slug": "dev_backend", "id": 1}, - {"slug": "dev_frontend", "id": 2}, - {"slug": "qa_team", "id": 3}, - ] - # Call function. - result = hlitagh.gh_get_org_team_names("test-org", sort=True) - # Verify result. - expected = ["dev_backend", "dev_frontend", "qa_team"] - self.assertEqual(result, expected) - # Verify mocks were called correctly. - mock_get_org_name.assert_called_once_with("test-org") - mock_gh_run.assert_called_once_with( - "gh api /orgs/test-org/teams --paginate" - ) - - @umock.patch.object(hlitagh, "_gh_run_and_get_json") - @umock.patch.object(hlitagh, "_get_org_name") - def test_gh_get_team_member_names1( - self, mock_get_org_name: umock.Mock, mock_gh_run: umock.Mock - ) -> None: - """ - Test gh_get_team_member_names with member list. - """ - # Setup mocks. - mock_get_org_name.return_value = "test-org" - mock_gh_run.return_value = [ - {"login": "user1", "id": 101}, - {"login": "user2", "id": 102}, - {"login": "user3", "id": 103}, - ] - # Call function. - result = hlitagh.gh_get_team_member_names( - "dev_team", org_name="test-org" - ) - # Verify result. - expected = ["user1", "user2", "user3"] - self.assertEqual(result, expected) - # Verify mocks were called correctly. - mock_get_org_name.assert_called_once_with("test-org") - mock_gh_run.assert_called_once_with( - "gh api /orgs/test-org/teams/dev_team/members --paginate" - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py deleted file mode 100644 index 2695a505f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py +++ /dev/null @@ -1,249 +0,0 @@ -import logging -from typing import List - -import pytest - -import helpers.hgit as hgit -import helpers.hunit_test as hunitest -import helpers.lib_tasks_git as hlitagit -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -# ############################################################################# -# TestLibTasksGitCreatePatch1 -# ############################################################################# - - -@pytest.mark.slow(reason="Around 7s") -@pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", -) -class TestLibTasksGitCreatePatch1(hunitest.TestCase): - """ - Test `git_patch_create()`. - """ - - @staticmethod - def helper( - modified: bool, branch: bool, last_commit: bool, files: str - ) -> None: - ctx = httestlib._build_mock_context_returning_ok() - # - mode = "tar" - hlitagit.git_patch_create( - ctx, mode, modified, branch, last_commit, files - ) - # - mode = "diff" - hlitagit.git_patch_create( - ctx, mode, modified, branch, last_commit, files - ) - - def test1(self) -> None: - """ - Test modified files mode. - """ - hgit.fetch_origin_master_if_needed() - # Prepare inputs. - modified = True - branch = False - last_commit = False - files = "" - # Run test. - self.helper(modified, branch, last_commit, files) - - def test2(self) -> None: - """ - Test branch mode. - """ - # Prepare inputs. - modified = False - branch = True - last_commit = False - files = "" - # Run test. - self.helper(modified, branch, last_commit, files) - - def test3(self) -> None: - """ - Test last commit mode. - """ - hgit.fetch_origin_master_if_needed() - # Prepare inputs. - modified = False - branch = False - last_commit = True - files = "" - # Run test. - self.helper(modified, branch, last_commit, files) - - def test4(self) -> None: - """ - Test tar mode with specific files. - """ - hgit.fetch_origin_master_if_needed() - # Prepare inputs. - ctx = httestlib._build_mock_context_returning_ok() - mode = "tar" - modified = True - branch = False - last_commit = False - files = __file__ - # Run test. - hlitagit.git_patch_create( - ctx, mode, modified, branch, last_commit, files - ) - - def test5(self) -> None: - """ - Test diff mode with files but no mode flag raises AssertionError. - """ - hgit.fetch_origin_master_if_needed() - # Prepare inputs. - ctx = httestlib._build_mock_context_returning_ok() - mode = "diff" - modified = False - branch = False - last_commit = False - files = __file__ - # Run test and check output. - with self.assertRaises(AssertionError) as cm: - hlitagit.git_patch_create( - ctx, mode, modified, branch, last_commit, files - ) - actual = str(cm.exception) - expected = """ - * Failed assertion * - '0' - == - '1' - Specify only one among --modified, --branch, --last-commit - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# TestFilterGitFilesByType -# ############################################################################# - - -class TestFilterGitFilesByType(hunitest.TestCase): - """ - Test _filter_git_files_by_type() function. - """ - - def helper( - self, files: List[str], types: List[str], expected: List[str] - ) -> None: - """ - Test helper for _filter_git_files_by_type. - - :param files: List of files to filter - :param types: List of file types to filter by - :param expected: Expected filtered result - """ - # Run test. - result = hlitagit._filter_git_files_by_type(files, types) - # Check outputs. - self.assertEqual(result, expected) - - def test1(self) -> None: - """ - Test filtering to include only Python files. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - types = ["py"] - # Prepare outputs. - expected = ["foo.py"] - # Run test. - self.helper(files, types, expected) - - def test2(self) -> None: - """ - Test filtering to include only Jupyter notebooks. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - types = ["ipynb"] - # Prepare outputs. - expected = ["bar.ipynb"] - # Run test. - self.helper(files, types, expected) - - def test3(self) -> None: - """ - Test filtering to include only Markdown files. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - types = ["md"] - # Prepare outputs. - expected = ["baz.md"] - # Run test. - self.helper(files, types, expected) - - def test4(self) -> None: - """ - Test filtering with multiple file types. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md", "qux.txt"] - types = ["py", "md"] - # Prepare outputs. - expected = ["foo.py", "baz.md"] - # Run test. - self.helper(files, types, expected) - - def test5(self) -> None: - """ - Test filtering with all file types. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - types = ["py", "ipynb", "md"] - # Prepare outputs. - expected = files - # Run test. - self.helper(files, types, expected) - - def test6(self) -> None: - """ - Test filtering with empty file list. - """ - # Prepare inputs. - files: List[str] = [] - types = ["py", "ipynb"] - # Prepare outputs. - expected: List[str] = [] - # Run test. - self.helper(files, types, expected) - - def test7(self) -> None: - """ - Test filtering when no files match. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - types = ["txt"] - # Prepare outputs. - expected: List[str] = [] - # Run test. - self.helper(files, types, expected) - - def test8(self) -> None: - """ - Test that filtering preserves file order. - """ - # Prepare inputs. - files = ["c.py", "a.ipynb", "b.md", "d.py"] - types = ["py", "md"] - # Prepare outputs. - expected = ["c.py", "b.md", "d.py"] - # Run test. - self.helper(files, types, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py deleted file mode 100644 index 47a41e0d8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py +++ /dev/null @@ -1,27 +0,0 @@ -import helpers.hunit_test as hunitest -import helpers.lib_tasks_integrate as hlitaint - - -# ############################################################################# -# Test_infer_dst_dir1 -# ############################################################################# - - -class Test_infer_dst_dir1(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - src_dir = "/src/cmamp1/oms/broker/broker.py" - # Call function to test. - actual = hlitaint._infer_dst_file_path( - src_dir, - default_src_dir_basename="cmamp1", - default_dst_dir_basename="amp1", - check_exists=False, - ) - # Define expected output. - expected = ( - "/src/amp1/oms/broker/broker.py", - "oms/broker/broker.py", - ) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py deleted file mode 100644 index cb40f72a5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py +++ /dev/null @@ -1,32 +0,0 @@ -import logging - -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.lib_tasks_lint as hlitalin -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_lint_check_if_it_was_run -# ############################################################################# - - -class Test_lint_check_if_it_was_run(hunitest.TestCase): - """ - Test `lint_check_if_it_was_run()`. - """ - - def test1(self) -> None: - # Build a mock context. - ctx = httestlib._build_mock_context_returning_ok() - # Stash the leftover changes from the previous tests. - cmd = "git stash --include-untracked" - hsystem.system(cmd) - # Simple check that the function does not fail. - _ = hlitalin.lint_check_if_it_was_run(ctx) - # Pop the stashed changes to restore the original state. - cmd = "git stash pop" - # Do not abort on error because the stash may be empty. - hsystem.system(cmd, abort_on_error=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py deleted file mode 100644 index 321f7f515..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py +++ /dev/null @@ -1,1163 +0,0 @@ -import logging -import os -import re -import unittest.mock as umock -from typing import List - -import pytest - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.lib_tasks_pytest as hlitapyt -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -def _remove_junit_suite_name(text: str) -> str: - """ - Remove the junit suite name from the input text. - - E.g. '-o junit_suite_name="helpers"' -> '-o junit_suite_name=""' - - :param text: input text to process - :return: text with the junit suite name removed - """ - txt = re.sub(r'(-o\s*junit_suite_name=)"[^"]*"', r'\1""', text) - return txt - - -def _purify_pytest_command(text: str) -> str: - """ - Purify the pytest command by removing environment-specific values. - - :param text: input text to process - :return: text with environment-specific values removed - """ - txt = _remove_junit_suite_name(text) - return txt - - -# ############################################################################# -# Test_build_run_command_line1 -# ############################################################################# - - -class Test_build_run_command_line1(hunitest.TestCase): - def run_fast_tests1_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Basic run fast tests. - - :param is_dev_csfy_return_value: mocking the return_value of - `hserver.is_dev_csfy()` - :param is_inside_ci_return_value: mocking the return_value of - `hserver.is_inside_ci()` - :param expected: expected output string - """ - custom_marker = "" - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = False - n_threads = "1" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests1_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests1_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests1_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_inside_ci_return_value = False - is_dev_csfy_return_value = False - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def run_fast_tests2_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Coverage and collect-only. - - See `run_fast_tests1_helper()` for params description. - """ - custom_marker = "" - pytest_opts = "" - skip_submodules = False - coverage = True - collect_only = True - tee_to_file = False - n_threads = "1" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests2_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - r'pytest -m "not slow and not superslow" . ' - r"-o timeout_func_only=true --timeout 5 --reruns 2 " - r'--only-rerun "Failed: Timeout" --cov=.' - r" --cov-branch --cov-report term-missing --cov-report html " - r"--collect-only -n 1 " - r"--junit-xml=tmp.junit.xml " - r'-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests2_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests2_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests2_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - r'pytest -m "not slow and not superslow" . ' - r"-o timeout_func_only=true --timeout 50 --reruns 2 " - r'--only-rerun "Failed: Timeout" --cov=.' - r" --cov-branch --cov-report term-missing --cov-report html " - r"--collect-only -n 1 " - r"--junit-xml=tmp.junit.xml " - r'-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - self.run_fast_tests2_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - @pytest.mark.skip(reason="Fix support for pytest_mark") - @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") - def test_run_fast_tests4(self) -> None: - """ - Select pytest_mark. - """ - scratch_space = self.get_scratch_space(use_absolute_path=False) - dir_name = os.path.join(scratch_space, "test") - file_dict = { - "test_this.py": hprint.dedent( - """ - foo - - class TestHelloWorld(hunitest.TestCase): - bar - """ - ), - "test_that.py": hprint.dedent( - """ - foo - baz - - @pytest.mark.no_container - class TestHello_World(hunitest.): - bar - """ - ), - } - incremental = True - hunitest.create_test_dir(dir_name, incremental, file_dict) - # - test_list_name = "fast_tests" - custom_marker = "" - pytest_opts = "" - skip_submodules = True - coverage = False - collect_only = False - tee_to_file = False - n_threads = "1" - # - actual = hlitapyt._build_run_command_line( - test_list_name, - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - expected = ( - "pytest Test_build_run_command_line1.test_run_fast_tests4/tmp.scratch/" - "test/test_that.py" - ) - self.assert_equal(actual, expected) - - def run_fast_tests5_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Basic run fast tests tee-ing to a file. Mock depending on - `is_dev_csfy_return_value`. - - See `run_fast_tests1_helper()` for params description. - """ - custom_marker = "" - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = True - n_threads = "1" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests5_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - " 2>&1" - " | tee tmp.pytest.fast_tests.log" - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests5_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests5_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests5_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - " 2>&1" - " | tee tmp.pytest.fast_tests.log" - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - self.run_fast_tests5_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def run_fast_tests6_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Run fast tests with a custom test marker. - - See `run_fast_tests1_helper()` for params description. - """ - custom_marker = "optimizer" - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = False - n_threads = "1" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests6_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - 'pytest -m "optimizer and not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests6_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests6_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests6_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - 'pytest -m "optimizer and not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - self.run_fast_tests6_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def run_fast_tests7_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Run fast tests with parallelization. - - See `run_fast_tests1_helper()` for params description. - """ - custom_marker = "" - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = False - n_threads = "auto" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests7_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n auto ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests7_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests7_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests7_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n auto ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - self.run_fast_tests7_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def get_custom_marker_helper( - self, - run_only_test_list: str, - skip_test_list: str, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Check that a correct cmd line is generated with custom marker string. - - :param run_only_test_list: a string of comma-separated markers - to run - :param skip_test_list: a string of comma-separated markers to - skip - :param is_dev_csfy_return_value: see `run_fast_tests1_helper()` - :param is_inside_ci_return_value: see `run_fast_tests1_helper()` - :param expected: expected output string - """ - # Mock settings. - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = False - n_threads = "1" - # Mock test. - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - custom_marker = hlitapyt._get_custom_marker( - run_only_test_list=run_only_test_list, - skip_test_list=skip_test_list, - ) - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_get_custom_marker1_full(self) -> None: - # Input params. - run_only_test_list = "run_marker_1,run_marker_2" - skip_test_list = "skip_marker_1,skip_marker_2" - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - # Expected output. - expected = ( - 'pytest -m "' - "run_marker_1 and run_marker_2 " - "and not requires_ck_infra " - "and not skip_marker_1 and not skip_marker_2 " - 'and not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - # Mock check. - self.get_custom_marker_helper( - run_only_test_list, - skip_test_list, - is_dev_csfy_return_value, - is_inside_ci_return_value, - expected, - ) - - def get_custom_marker2_empty(self) -> None: - # Input params. - run_only_test_list = "" - skip_test_list = "" - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - # Expected output. - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1' - ) - # Mock check. - self.get_custom_marker_helper( - run_only_test_list, - skip_test_list, - is_dev_csfy_return_value, - is_inside_ci_return_value, - expected, - ) - - -# ############################################################################# -# Test_pytest_repro1 -# ############################################################################# - - -class Test_pytest_repro1(hunitest.TestCase): - def helper(self, file_name: str, mode: str, expected: List[str]) -> None: - script_name = os.path.join( - self.get_scratch_space(), "tmp.pytest_repro.sh" - ) - ctx = httestlib._build_mock_context_returning_ok() - actual = hlitapyt.pytest_repro( - ctx, mode=mode, file_name=file_name, script_name=script_name - ) - hdbg.dassert_isinstance(actual, str) - expected = "\n".join(["pytest " + x for x in expected]) - self.assert_equal(actual, expected) - - # //////////////////////////////////////////////////////////////////////////// - - def _build_pytest_filehelper(self, txt: str) -> str: - txt = hprint.dedent(txt) - file_name = os.path.join(self.get_scratch_space(), "cache/lastfailed") - hio.to_file(file_name, txt) - return file_name - - def _build_pytest_file1(self) -> str: - txt = """ - { - "dev_scripts/testing/test/test_run_tests.py": true, - "dev_scripts/testing/test/test_run_tests2.py": true, - "helpers/test/test_printing.py::Test_dedent1::test2": true, - "documentation/scripts/test/test_all.py": true, - "documentation/scripts/test/test_render_md.py": true, - "helpers/test/helpers/test/test_list.py::Test_list_1": true, - "helpers/test/test_cache.py::TestAmpTask1407": true - } - """ - return self._build_pytest_filehelper(txt) - - def test_tests1(self) -> None: - file_name = self._build_pytest_file1() - mode = "tests" - expected = [ - "dev_scripts/testing/test/test_run_tests.py", - "dev_scripts/testing/test/test_run_tests2.py", - "documentation/scripts/test/test_all.py", - "documentation/scripts/test/test_render_md.py", - "helpers/test/helpers/test/test_list.py::Test_list_1", - "helpers/test/test_cache.py::TestAmpTask1407", - "helpers/test/test_printing.py::Test_dedent1::test2", - ] - self.helper(file_name, mode, expected) - - def test_files1(self) -> None: - file_name = self._build_pytest_file1() - mode = "files" - expected = [ - "dev_scripts/testing/test/test_run_tests.py", - "dev_scripts/testing/test/test_run_tests2.py", - "documentation/scripts/test/test_all.py", - "documentation/scripts/test/test_render_md.py", - "helpers/test/helpers/test/test_list.py", - "helpers/test/test_cache.py", - "helpers/test/test_printing.py", - ] - self.helper(file_name, mode, expected) - - def test_classes1(self) -> None: - file_name = self._build_pytest_file1() - mode = "classes" - expected = [ - "helpers/test/helpers/test/test_list.py::Test_list_1", - "helpers/test/test_cache.py::TestAmpTask1407", - "helpers/test/test_printing.py::Test_dedent1", - ] - self.helper(file_name, mode, expected) - - def _build_pytest_file2(self) -> str: - # pylint: disable=line-too-long - txt = """ - { - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1": true, - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2": true, - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1": true, - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test2": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test3": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test4": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test01": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test02": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test03": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test04": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test05": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test06": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test07": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test09": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test10": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test11": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test12": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test13": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1": true, - "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder::test1": true, - "core/dataflow/test/test_runners.py::TestIncrementalDagRunner::test1": true, - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_dump_json1": true, - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_load_json1": true, - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test1": true, - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test2": true, - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test3": true, - "core/test/test_config.py::Test_subtract_config1::test_test1": true, - "core/test/test_config.py::Test_subtract_config1::test_test2": true, - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1": true, - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1": true, - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2": true, - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test1": true, - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test2": true, - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test3": true, - "helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2": true, - "helpers/test/test_printing.py::Test_dedent1::test2": true - } - """ - # pylint: enable=line-too-long - return self._build_pytest_filehelper(txt) - - def test_tests2(self) -> None: - file_name = self._build_pytest_file2() - mode = "tests" - # pylint: disable=line-too-long - expected = [ - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1", - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2", - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1", - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1", - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1", - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2", - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3", - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1", - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2", - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test1", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test2", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test3", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test4", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test01", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test02", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test03", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test04", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test05", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test06", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test07", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test09", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test10", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test11", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test12", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test13", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1", - "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder::test1", - "core/dataflow/test/test_runners.py::TestIncrementalDagRunner::test1", - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_dump_json1", - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_load_json1", - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test1", - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test2", - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test3", - "core/test/test_config.py::Test_subtract_config1::test_test1", - "core/test/test_config.py::Test_subtract_config1::test_test2", - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1", - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1", - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2", - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test1", - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test2", - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test3", - "helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2", - "helpers/test/test_printing.py::Test_dedent1::test2", - ] - # pylint: enable=line-too-long - self.helper(file_name, mode, expected) - - def test_files2(self) -> None: - file_name = self._build_pytest_file2() - mode = "files" - # pylint: disable=line-too-long - expected = [ - "core/dataflow/nodes/test/test_sarimax_models.py", - "core/dataflow/nodes/test/test_volatility_models.py", - "core/dataflow/test/test_builders.py", - "core/dataflow/test/test_runners.py", - "core/dataflow_model/test/test_model_evaluator.py", - "core/dataflow_model/test/test_run_experiment.py", - "core/test/test_config.py", - "core/test/test_dataframe_modeler.py", - "dev_scripts/test/test_run_notebook.py", - "helpers/test/test_lib_tasks.py", - "helpers/test/test_printing.py", - ] - # pylint: enable=line-too-long - self.helper(file_name, mode, expected) - - def test_classes2(self) -> None: - file_name = self._build_pytest_file2() - mode = "classes" - # pylint: disable=line-too-long - expected = [ - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator", - "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder", - "core/dataflow/test/test_runners.py::TestIncrementalDagRunner", - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator", - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1", - "core/test/test_config.py::Test_subtract_config1", - "core/test/test_dataframe_modeler.py::TestDataFrameModeler", - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1", - "helpers/test/test_lib_tasks.py::Test_find_check_string_output1", - "helpers/test/test_printing.py::Test_dedent1", - ] - # pylint: enable=line-too-long - self.helper(file_name, mode, expected) - - -# ############################################################################# -# Test_pytest_repro_end_to_end -# ############################################################################# - - -@pytest.mark.slow("~6 sec.") -class Test_pytest_repro_end_to_end(hunitest.TestCase): - """ - - Run the `pytest_repro` invoke from command line - - A fixed file imitating the pytest output file is used - - Compare the output to the golden outcome - """ - - def helper(self, cmd: str) -> None: - # Save output in tmp dir. - script_name = os.path.join( - self.get_scratch_space(), "tmp.pytest_repro.sh" - ) - cmd += f" --script-name {script_name}" - # Run the command. - _, actual = hsystem.system_to_string(cmd) - # Filter out the "No module named ..." warnings. - # TODO(Grisha): add the "no module warning" filtering to - # `purify_text()` in `check_string()`. - regex = "WARN.*No module" - actual = hunitest.filter_text(regex, actual) - # Remove "Encountered unexpected exception importing solver GLPK" - # generated on Mac. - regex = "Encountered unexpected exception importing solver GLPK" - actual = hunitest.filter_text(regex, actual) - # ImportError("cannot import name 'glpk' from 'cvxopt' (/venv/lib/python3.9/site-packages/cvxopt/__init__.py)") - regex = r"""ImportError\("cannot import name""" - actual = hunitest.filter_text(regex, actual) - # Modify the outcome for reproducibility. - actual = hprint.remove_non_printable_chars(actual) - actual = re.sub(r"[0-9]{2}:[0-9]{2}:[0-9]{2} - ", r"HH:MM:SS - ", actual) - actual = actual.replace("/app/amp/", "/app/") - actual = re.sub( - r"lib_tasks_pytest.py pytest_repro:[0-9]+", - r"lib_tasks_pytest.py pytest_repro:{LINE_NUM}", - actual, - ) - # Remove unstable content. - lines = actual.split("\n") - line_cmd = lines[0] - _LOG.debug("%s", "\n".join(lines)) - for i, line in enumerate(lines): - m = re.search("# pytest_repro: ", line) - if m: - test_output_start = i + 1 - break - lines_test_output = lines[test_output_start:] - # - actual = "\n".join([line_cmd] + lines_test_output) - regex = "init_logger" - actual = hunitest.filter_text(regex, actual) - regex = r"(WARN|INFO)\s+hcache.py" - actual = hunitest.filter_text(regex, actual) - # Check the outcome. - self.check_string(actual, purify_text=True, fuzzy_match=True) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test1(self) -> None: - file_name = f"{self.get_input_dir()}/cache/lastfailed" - cmd = f"invoke pytest_repro --file-name='{file_name}'" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test2(self) -> None: - """ - The tests are different since the input depends on the test and it's - different for different tests. - """ - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}'" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test3(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}'" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test4(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test5(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test6(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test7(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" - self.helper(cmd) - - -# ############################################################################# -# Test_pytest_failed1 -# ############################################################################# - - -class Test_pytest_failed1(hunitest.TestCase): - def get_pytest_text1(self) -> str: - txt = """ - 20:48:15 - ^[[36mINFO ^[[0m hdbg.py init_logger:1018 > cmd='/venv/bin/pytest helpers_root/dev_scripts_helpers/documentation/' - collected 47 items - - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1::test1 (2.07 s) FAILED [ 2%] - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question1 (0.00 s) PASSED [ 4%] - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question2 (0.00 s) PASSED [ 6%] - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question3 (0.00 s) PASSED [ 8%] - - - =================================== FAILURES =================================== - _________________________ Test_preprocess_notes1.test1 _________________________ - - FAILED helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3::test_run_all1 - AttributeError: 'list' object has no attribute 'split' - FAILED helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1::test2 - RuntimeError: cmd='(/app/helpers_root/dev_scripts_helpers/documentation/notes_to_pdf.py --input /app/helpers_root/dev_scripts_helpers/documentation/test/outcomes/Test_notes - - ======================== 4 failed, 43 passed in 40.48s ========================= - """ - txt = hprint.dedent(txt) - return txt - - def helper( - self, - txt: str, - only_file: bool, - only_class: bool, - exp_failed_tests: str, - exp_num_failed: int, - exp_num_passed: int, - ) -> None: - act_failed_tests, act_num_failed, act_num_passed = ( - hlitapyt._parse_failed_tests(txt, only_file, only_class) - ) - act_failed_tests = "\n".join(act_failed_tests) - self.assert_equal( - act_failed_tests, - exp_failed_tests, - dedent=True, - remove_lead_trail_empty_lines=True, - ) - self.assertEqual(act_num_failed, exp_num_failed) - self.assertEqual(act_num_passed, exp_num_passed) - - def test1(self) -> None: - # Prepare inputs and outputs. - txt = self.get_pytest_text1() - only_file = False - only_class = False - exp_failed_tests = """ - helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1::test2 - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1::test1 - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3::test_run_all1 - """ - exp_num_failed = 4 - exp_num_passed = 43 - # Check. - self.helper( - txt, - only_file, - only_class, - exp_failed_tests, - exp_num_failed, - exp_num_passed, - ) - - def test2(self) -> None: - # Prepare inputs and outputs. - txt = self.get_pytest_text1() - only_file = True - only_class = False - exp_failed_tests = """ - helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py - """ - exp_num_failed = 4 - exp_num_passed = 43 - # Check. - self.helper( - txt, - only_file, - only_class, - exp_failed_tests, - exp_num_failed, - exp_num_passed, - ) - - def test3(self) -> None: - # Prepare inputs and outputs. - txt = self.get_pytest_text1() - only_file = False - only_class = True - exp_failed_tests = """ - helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1 - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1 - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3 - """ - exp_num_failed = 4 - exp_num_passed = 43 - # Check. - self.helper( - txt, - only_file, - only_class, - exp_failed_tests, - exp_num_failed, - exp_num_passed, - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py deleted file mode 100644 index ac2b17b42..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py +++ /dev/null @@ -1,301 +0,0 @@ -import logging -import os - -import pytest - -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hunit_test as hunitest -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - - -# pylint: disable=protected-access - - -# ############################################################################# -# Test_get_files_to_process1 -# ############################################################################# - - -class Test_get_files_to_process1(hunitest.TestCase): - """ - We can't check the outcome so we just execute the code. - """ - - def test_modified1(self) -> None: - """ - Retrieve files modified in this client. - """ - modified = True - branch = False - last_commit = False - all_ = False - files_from_user = "" - mutually_exclusive = True - remove_dirs = True - _ = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - - @pytest.mark.skipif( - hgit.get_branch_name() != "master", - reason="This test makes sense for a branch", - ) - def test_branch1(self) -> None: - """ - Retrieved files modified in this client. - """ - # This test needs a reference to Git master branch. - hgit.fetch_origin_master_if_needed() - # - modified = False - branch = True - last_commit = False - all_ = False - files_from_user = "" - mutually_exclusive = True - remove_dirs = True - _ = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - - def test_last_commit1(self) -> None: - """ - Retrieved files modified in the last commit. - """ - modified = False - branch = False - last_commit = True - all_ = False - files_from_user = "" - mutually_exclusive = True - remove_dirs = True - _ = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - - def test_files1(self) -> None: - """ - Pass through files from user. - """ - modified = False - branch = False - last_commit = False - all_ = False - files_from_user = __file__ - mutually_exclusive = True - remove_dirs = True - files = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - self.assertEqual(files, [__file__]) - - def test_files2(self) -> None: - """ - Pass through files from user. - - Use two types of paths we don't want to process: - - non-existent python file - - pattern "/*" that matches no files - """ - modified = False - branch = False - last_commit = False - all_ = False - files_from_user = "testfile1.py testfiles1/*" - mutually_exclusive = True - remove_dirs = True - files = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - self.assertEqual(files, []) - - def test_files3(self) -> None: - """ - Pass through files from user. - - Use the sequence of paths separated by newlines. - """ - modified = False - branch = False - last_commit = False - all_ = False - # Specify the number of toy files. - n_toy_files = 4 - files_from_user = [] - # Get root directory. - root_dir = hgit.get_client_root(super_module=False) - # Generate toy files and store their paths. - for file_num in range(n_toy_files): - # Build the name of the test file. - file_name = f"test_toy{str(file_num)}.tmp.py" - # Build the path to the test file. - test_path = os.path.join(root_dir, file_name) - # Create the empty toy file. - hio.to_file(test_path, "") - files_from_user.append(test_path) - mutually_exclusive = True - remove_dirs = True - # Join the names with `\n` separator. - joined_files_from_user = "\n".join(files_from_user) - files = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - joined_files_from_user, - mutually_exclusive, - remove_dirs, - ) - # Remove the toy files. - for path in files_from_user: - hio.delete_file(path) - self.assertEqual(files, files_from_user) - - def test_assert1(self) -> None: - """ - Test that --modified and --branch together cause an assertion. - """ - modified = True - branch = True - last_commit = False - all_ = True - files_from_user = "" - mutually_exclusive = True - remove_dirs = True - with self.assertRaises(AssertionError) as cm: - hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - '3' - == - '1' - Specify only one among --modified, --branch, --last-commit, --all_files, and --files - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert2(self) -> None: - """ - Test that --modified and --files together cause an assertion if - `mutually_exclusive=True`. - """ - modified = True - branch = False - last_commit = False - all_ = False - files_from_user = __file__ - mutually_exclusive = True - remove_dirs = True - with self.assertRaises(AssertionError) as cm: - hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - '2' - == - '1' - Specify only one among --modified, --branch, --last-commit, --all_files, and --files - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert3(self) -> None: - """ - Test that --modified and --files together don't cause an assertion if - `mutually_exclusive=False`. - """ - modified = True - branch = False - last_commit = False - all_ = False - files_from_user = __file__ - mutually_exclusive = False - remove_dirs = True - files = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - self.assertEqual(files, [__file__]) - - -# ############################################################################# - - -# ############################################################################# -# TestLibTasksRemoveSpaces1 -# ############################################################################# - - -class TestLibTasksRemoveSpaces1(hunitest.TestCase): - def test1(self) -> None: - txt = r""" - IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev \ - docker-compose \ - --file $GIT_ROOT/devops/compose/docker-compose_as_submodule.yml \ - run \ - --rm \ - -l user=$USER_NAME \ - --entrypoint bash \ - user_space - """ - actual = hlitauti._to_single_line_cmd(txt) - expected = ( - "IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev" - " docker-compose --file" - " $GIT_ROOT/devops/compose/docker-compose_as_submodule.yml" - " run --rm -l user=$USER_NAME --entrypoint bash user_space" - ) - self.assert_equal(actual, expected, fuzzy_match=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py deleted file mode 100644 index ac46b6c17..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py +++ /dev/null @@ -1,74 +0,0 @@ -import os - -import pytest - -import config_root.config as cconfig -import dev_scripts_helpers.notebooks.run_notebook_test_case as dshnrntca -import helpers.hgit as hgit -import helpers.hserver as hserver -import helpers.lib_tasks_gh as hlitagh - - -def build_config() -> cconfig.ConfigList: - """ - Get an empty config for the test. - """ - config = {} - config = cconfig.Config() - config_list = cconfig.ConfigList([config]) - return config_list - - -# ############################################################################# -# Test_Master_buildmeister_dashboard_notebook -# ############################################################################# - - -class Test_Master_buildmeister_dashboard_notebook( - dshnrntca.Test_Run_Notebook_TestCase -): - @pytest.mark.skipif( - not hserver.is_inside_ci(), - reason="No access to data from `lemonade` repo locally", - ) - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", - ) - @pytest.mark.superslow("~42 sec.") - def test1(self) -> None: - amp_dir = hgit.get_amp_abs_path() - notebook_path = os.path.join( - amp_dir, - "devops", - "notebooks", - "Master_buildmeister_dashboard.ipynb", - ) - config_builder = ( - "helpers.test.test_master_buildmeister_dashboard.build_config()" - ) - self._test_run_notebook(notebook_path, config_builder) - - @pytest.mark.skipif( - not hserver.is_inside_ci(), - reason="No access to data from `lemonade` repo locally", - ) - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", - ) - @pytest.mark.superslow("~30 sec.") - def test2(self) -> None: - """ - Check that we can get status for all the workflows. - """ - repo_list = [ - "causify-ai/cmamp", - "causify-ai/orange", - "causify-ai/lemonade", - "causify-ai/kaizenflow", - "causify-ai/helpers", - "causify-ai/quant_dashboard", - ] - for repo_name in repo_list: - hlitagh.gh_get_workflow_type_names(repo_name) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py deleted file mode 100644 index ced80844b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py +++ /dev/null @@ -1,284 +0,0 @@ -import logging - -import pytest - -import helpers.hgit as hgit -import helpers.hserver as hserver -import helpers.hunit_test as hunitest -import helpers.hunit_test_utils as hunteuti -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestRepoConfig_Amp -# ############################################################################# - - -class TestRepoConfig_Amp(hunitest.TestCase): - # Difference between `cmamp` and `kaizenflow`. - expected_repo_name = "//cmamp" - - def test_repo_name1(self) -> None: - """ - Show that when importing repo_config, one doesn't get necessarily the - outermost repo_config (e.g., for lime one gets amp.repo_config). - """ - - actual = hrecouti.get_repo_config().get_name() - _LOG.info( - "actual=%s expected_repo_name=%s", actual, self.expected_repo_name - ) - - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_repo_name2(self) -> None: - """ - If //amp is a supermodule, then repo_config should report //amp. - """ - actual = hrecouti.get_repo_config().get_name() - self.assertEqual(actual, self.expected_repo_name) - - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_repo_name3(self) -> None: - """ - If //amp is a supermodule, then repo_config should report something - different than //amp. - """ - actual = hrecouti.get_repo_config().get_name() - self.assertNotEqual(actual, self.expected_repo_name) - - def test_config_func_to_str(self) -> None: - _LOG.info(hserver.config_func_to_str()) - - def test_is_dev4(self) -> None: - """ - Amp could run on dev4 or not. - """ - _ = hserver.is_dev4() - - def test_is_CK_S3_available(self) -> None: - """ - When running Amp on dev_csfy, the CSFY bucket should be available. - """ - if hserver.is_dev_csfy(): - actual = hserver.is_CK_S3_available() - expected = True - self.assertEqual(actual, expected) - - -# ############################################################################# -# TestRepoConfig_Amp_signature -# ############################################################################# - - -# > pytest ./amp/helpers/test/test_repo_config_amp.py - - -# ############################################################################# -# TestRepoConfig_Amp_signature1 -# ############################################################################# - - -class TestRepoConfig_Amp_signature1(hunitest.TestCase): - def test_dev_csfy_server(self) -> None: - target_name = "amp" - hunteuti.execute_only_in_target_repo(target_name) - # - hunteuti.execute_only_on_dev_csfy() - # - expected = r""" - # Repo config: - # repo_config.config - enable_privileged_mode='True' - get_docker_base_image_name='amp' - get_docker_shared_group='' - get_docker_user='' - get_host_name='github.com' - get_invalid_words='[]' - get_shared_data_dirs='{'/data/shared': '/shared_data'}' - has_dind_support='True' - has_docker_sudo='True' - is_CK_S3_available='True' - run_docker_as_root='False' - skip_submodules_test='False' - use_docker_db_container_name_to_connect='False' - use_docker_network_mode_host='False' - use_docker_sibling_containers='False' - # Server config: - # hserver.config - is_AM_S3_available()='True' - is_dev4()='False' - is_dev_csfy()='True' - is_inside_ci()='False' - is_inside_docker()='True' - is_mac(version='Catalina')='False' - is_mac(version='Monterey')='False' - is_mac(version='Sequoia')='False' - is_mac(version='Ventura')='False' - # Env vars: - CSFY_ENABLE_DIND='1' - CSFY_FORCE_TEST_FAIL='' - CSFY_REPO_CONFIG_CHECK='True' - CSFY_REPO_CONFIG_PATH='' - CSFY_CI='' - GH_ACTION_ACCESS_TOKEN=empty - """ - hunteuti.check_env_to_str(self, expected) - - def test_mac(self) -> None: - target_name = "amp" - hunteuti.execute_only_in_target_repo(target_name) - # - hunteuti.execute_only_on_mac(version="Catalina") - # - expected = r""" - # Repo config: - # repo_config.config - enable_privileged_mode='False' - get_docker_base_image_name='amp' - get_docker_shared_group='' - get_docker_user='' - get_host_name='github.com' - get_invalid_words='[]' - get_shared_data_dirs='None' - has_dind_support='False' - has_docker_sudo='True' - is_CK_S3_available='False' - run_docker_as_root='False' - skip_submodules_test='False' - use_docker_db_container_name_to_connect='True' - use_docker_network_mode_host='False' - use_docker_sibling_containers='True' - # Server config: - # hserver.config - is_AM_S3_available='True' - is_dev4='False' - is_dev_csfy='False' - is_inside_ci='False' - is_inside_docker='True' - is_mac='True' - # Env vars: - CSFY_ENABLE_DIND='1' - CSFY_FORCE_TEST_FAIL='' - CSFY_REPO_CONFIG_CHECK='False' - CSFY_REPO_CONFIG_PATH='' - CSFY_CI='' - GH_ACTION_ACCESS_TOKEN=empty - """ - hunteuti.check_env_to_str(self, expected) - # - exp_enable_privileged_mode = True - exp_has_dind_support = True - hrecouti.assert_setup( - self, exp_enable_privileged_mode, exp_has_dind_support - ) - - @pytest.mark.skipif( - not hrecouti.get_repo_config().get_name() == "//amp", - reason="Run only in //amp", - ) - def test_amp_ci(self) -> None: - hunteuti.execute_only_on_ci() - # - expected = r""" - # Repo config: - # repo_config.config - enable_privileged_mode='True' - get_docker_base_image_name='amp' - get_docker_shared_group='' - get_docker_user='' - get_host_name='github.com' - get_invalid_words='[]' - get_shared_data_dirs='None' - has_dind_support='True' - has_docker_sudo='False' - is_CK_S3_available='False' - run_docker_as_root='True' - skip_submodules_test='False' - use_docker_db_container_name_to_connect='False' - use_docker_network_mode_host='False' - use_docker_sibling_containers='False' - # Server config: - # hserver.config - is_AM_S3_available()='True' - is_dev4()='False' - is_dev_csfy()='False' - is_inside_ci()='True' - is_inside_docker()='True' - is_mac(version='Catalina')='False' - is_mac(version='Monterey')='False' - is_mac(version='Ventura')='False' - is_mac(version='Sequoia')='False' - # Env vars: - CSFY_CI='true' - CSFY_ENABLE_DIND='1' - CSFY_FORCE_TEST_FAIL='' - CSFY_REPO_CONFIG_CHECK='True' - CSFY_REPO_CONFIG_PATH='' - """ - # We ignore the AWS vars, since GH Actions does some replacement to mask - # the env vars coming from secrets. - skip_secrets_vars = True - hunteuti.check_env_to_str( - self, expected, skip_secrets_vars=skip_secrets_vars - ) - - @pytest.mark.skipif( - not hrecouti.get_repo_config().get_name() == "//cmamp", - reason="Run only in //cmamp", - ) - def test_cmamp_ci(self) -> None: - hunteuti.execute_only_on_ci() - # - expected = r""" - # Repo config - get_host_name='github.com' - get_html_dir_to_url_mapping='{'s3://cryptokaizen-html': 'http://172.30.2.44', 's3://cryptokaizen-html/v2': 'http://172.30.2.44/v2'}' - get_invalid_words='[]' - get_docker_base_image_name='cmamp' - # Server config - enable_privileged_mode='True' - get_docker_shared_group='' - get_docker_user='' - get_host_user_name='runner' - get_shared_data_dirs='None' - has_dind_support='True' - has_docker_sudo='False' - is_AM_S3_available='True' - is_CK_S3_available='True' - is_dev4='False' - is_dev_csfy='False' - is_external_linux='False' - is_host_mac='False' - is_ig_prod='False' - is_inside_ci='True' - is_inside_docker='True' - is_inside_ecs_container='False' - is_inside_unit_test='True' - is_prod_csfy='False' - run_docker_as_root='True' - skip_submodules_test='False' - use_docker_db_container_name_to_connect='False' - use_docker_network_mode_host='False' - use_docker_sibling_containers='False' - use_main_network='False' - # Env vars - CSFY_CI='true' - CSFY_ECR_BASE_PATH='$CSFY_ECR_BASE_PATH' - CSFY_ENABLE_DIND='1' - CSFY_FORCE_TEST_FAIL='' - CSFY_REPO_CONFIG_CHECK='True' - CSFY_REPO_CONFIG_PATH='' - """ - # We ignore the AWS vars, since GH Actions does some replacement to mask - # the env vars coming from secrets. - skip_secrets_vars = True - hunteuti.check_env_to_str( - self, expected, skip_secrets_vars=skip_secrets_vars - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py deleted file mode 100644 index f5b284c58..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py +++ /dev/null @@ -1,65 +0,0 @@ -import logging -import os - -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_repo_config1 -# ############################################################################# - - -class Test_repo_config1(hunitest.TestCase): - def create_test_file(self) -> str: - yaml_txt = """ - repo_info: - repo_name: helpers - github_repo_account: causify-ai - github_host_name: github.com - invalid_words: - issue_prefix: HelpersTask - - docker_info: - docker_image_name: helpers - - s3_bucket_info: - unit_test_bucket_name: s3://cryptokaizen-unit-test - html_bucket_name: s3://cryptokaizen-html - html_ip: http://172.30.2.44 - - container_registry_info: - ecr: 623860924167.dkr.ecr.eu-north-1.amazonaws.com - ghcr: ghcr.io/cryptokaizen - - runnable_dir_info: - use_helpers_as_nested_module: False - venv_tag: helpers - dir_suffix: helpers - """ - yaml_txt = hprint.dedent(yaml_txt) - file_name = os.path.join(self.get_scratch_space(), "yaml.txt") - hio.to_file(file_name, yaml_txt) - return file_name - - def test1(self) -> None: - file_name = self.create_test_file() - repo_config = hrecouti.RepoConfig.from_file(file_name) - actual = repo_config.get_name() - expected = "//helpers" - self.assert_equal(actual, expected) - - def test2(self) -> None: - file_name = self.create_test_file() - repo_config = hrecouti.RepoConfig.from_file(file_name) - actual = repo_config.get_repo_map() - expected = { - "helpers": "causify-ai/helpers", - } - self.assert_equal(str(actual), str(expected)) - - # TODO(gp): Test all the methods of the RepoConfig class. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 5a0064d78..f46201cbb 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -19,8 +19,8 @@ # ── Container settings ──────────────────────────────────── container_name: house-price image_name: house-price-project - host_port: 5001 # port on your Mac - container_port: 5000 # port inside the container + host_port: 5001 + container_port: 5000 # ── Paths ───────────────────────────────────────────────── project_root: "{{ playbook_dir }}" From ce3e8f3c02a82d04b14ed1e0c69645db423abca9 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Wed, 6 May 2026 17:26:17 -0400 Subject: [PATCH 54/58] adding the file --- .../ReadMe.md | 75 + .../helpers/README.md | 285 + .../helpers/__init__.py | 0 .../helpers/asana_utils.py | 1156 ++ .../helpers/github_utils.py | 2110 +++ .../helpers/hasyncio.py | 508 + .../helpers/haws.py | 266 + .../helpers/hcache.py | 1086 ++ .../helpers/hcache_simple.py | 1963 +++ .../helpers/hcfile.py | 135 + .../helpers/hchatgpt.py | 549 + .../helpers/hchatgpt_instructions.py | 32 + .../helpers/hcoverage.py | 183 + .../helpers/hcsv.py | 365 + .../helpers/hdataframe.py | 309 + .../helpers/hdatetime.py | 909 ++ .../helpers/hdbg.py | 1134 ++ .../helpers/hdict.py | 119 + .../helpers/hdocker.py | 871 ++ .../helpers/hdocker_tests.py | 197 + .../helpers/hemail.py | 47 + .../helpers/henv.py | 541 + .../helpers/hfile_tree.py | 232 + .../helpers/hgit.py | 1869 +++ .../helpers/hgoogle_drive_api.py | 1183 ++ .../helpers/hintrospection.py | 284 + .../helpers/hio.py | 1046 ++ .../helpers/hjoblib.py | 880 ++ .../helpers/hjupyter.py | 383 + .../helpers/hlatex.py | 334 + .../helpers/hlint.py | 29 + .../helpers/hlist.py | 78 + .../helpers/hllm.py | 680 + .../helpers/hllm_cli.py | 840 + .../helpers/hllm_cost.py | 233 + .../helpers/hlogging.py | 809 + .../helpers/hlogging.pyi | 14 + .../helpers/hmarkdown.py | 18 + .../helpers/hmarkdown_bullets.py | 248 + .../helpers/hmarkdown_coloring.py | 286 + .../helpers/hmarkdown_comments.py | 66 + .../helpers/hmarkdown_div_blocks.py | 132 + .../helpers/hmarkdown_fenced_blocks.py | 131 + .../helpers/hmarkdown_filtering.py | 109 + .../helpers/hmarkdown_formatting.py | 530 + .../helpers/hmarkdown_headers.py | 841 + .../helpers/hmarkdown_rules.py | 367 + .../helpers/hmarkdown_slides.py | 201 + .../helpers/hmarkdown_tables.py | 121 + .../helpers/hmarkdown_toc.py | 164 + .../helpers/hmatplotlib.py | 106 + .../helpers/hmkdocs.py | 170 + .../helpers/hmodule.py | 121 + .../helpers/hmoto.py | 111 + .../helpers/hnetwork.py | 97 + .../helpers/hnotebook.py | 105 + .../helpers/hnumba.py | 43 + .../helpers/hnumpy.py | 57 + .../helpers/hobject.py | 500 + .../helpers/hopen.py | 106 + .../helpers/hpandas.py | 18 + .../helpers/hpandas.py.old | 2684 ++++ .../helpers/hpandas_analysis.py | 628 + .../helpers/hpandas_check_summary.py | 111 + .../helpers/hpandas_clean.py | 282 + .../helpers/hpandas_compare.py | 289 + .../helpers/hpandas_conversion.py | 221 + .../helpers/hpandas_dassert.py | 371 + .../helpers/hpandas_display.py | 302 + .../helpers/hpandas_io.py | 128 + .../helpers/hpandas_multiindex.py | 183 + .../helpers/hpandas_stats.py | 527 + .../helpers/hpandas_transform.py | 1023 ++ .../helpers/hpandas_utils.py | 649 + .../helpers/hparquet.py | 1309 ++ .../helpers/hparser.py | 1176 ++ .../helpers/hpickle.py | 253 + .../helpers/hplayback.py | 495 + .../helpers/hprint.py | 1076 ++ .../helpers/hpytest.py | 266 + .../helpers/hretry.py | 94 + .../helpers/hs3.py | 1129 ++ .../helpers/hsecrets.py | 233 + .../helpers/hserver.py | 1167 ++ .../helpers/hsftp.py | 204 + .../helpers/hslack.py | 66 + .../helpers/hsql.py | 36 + .../helpers/hsql_implementation.py | 954 ++ .../helpers/hsql_test.py | 273 + .../helpers/hstring.py | 176 + .../helpers/hsystem.py | 1097 ++ .../helpers/htable.py | 180 + .../helpers/htest_logger.py | 48 + .../helpers/htext_protect.py | 262 + .../helpers/hthreading.py | 43 + .../helpers/htimer.py | 275 + .../helpers/htqdm.py | 48 + .../helpers/htraceback.py | 228 + .../helpers/htranslate.py | 109 + .../helpers/htypes.py | 11 + .../helpers/hunit_test.py | 1876 +++ .../helpers/hunit_test_purification.py | 450 + .../helpers/hunit_test_utils.py | 658 + .../helpers/hversion.py | 300 + .../helpers/hwall_clock_time.py | 125 + .../helpers/hwarnings.py | 156 + .../helpers/lib_tasks.py | 37 + .../helpers/lib_tasks_aws.py | 407 + .../helpers/lib_tasks_bash.py | 104 + .../helpers/lib_tasks_docker.py | 1590 ++ .../helpers/lib_tasks_docker_release.py | 1890 +++ .../helpers/lib_tasks_find.py | 606 + .../helpers/lib_tasks_gh.py | 1252 ++ .../helpers/lib_tasks_git.py | 1502 ++ .../helpers/lib_tasks_integrate.py | 837 + .../helpers/lib_tasks_lint.py | 443 + .../helpers/lib_tasks_perms.py | 380 + .../helpers/lib_tasks_print.py | 103 + .../helpers/lib_tasks_pytest.py | 1743 +++ .../helpers/lib_tasks_utils.py | 397 + .../helpers/logging_testing/__init__.py | 0 .../helpers/logging_testing/logging_main.py | 81 + .../helpers/logging_testing/logging_module.py | 10 + .../helpers/notebooks/conftest.py | 17 + .../helpers/notebooks/hcache.tutorial.ipynb | 638 + .../helpers/notebooks/hcache.tutorial.py | 274 + .../notebooks/hcache_simple.tutorial.ipynb | 858 + .../notebooks/hcache_simple.tutorial.py | 486 + .../hgoodle_drive_api.tutorial.ipynb | 424 + .../notebooks/hgoodle_drive_api.tutorial.py | 107 + .../helpers/notebooks/hllm.tutorial.ipynb | 13040 ++++++++++++++++ .../helpers/notebooks/hllm.tutorial.py | 118 + .../notebooks/hplayback.tutorial.ipynb | 993 ++ .../helpers/notebooks/hplayback.tutorial.py | 374 + .../helpers/notebooks/parquet.tutorial.ipynb | 1774 +++ .../helpers/notebooks/parquet.tutorial.py | 304 + .../helpers/notebooks/s3.tutorial.ipynb | 210 + .../helpers/notebooks/s3.tutorial.py | 44 + .../helpers/notebooks/sage.tutorial.ipynb | 448 + .../helpers/notebooks/sage.tutorial.py | 98 + .../helpers/old/__init__.py | 0 .../helpers/old/conda.py | 192 + .../helpers/old/conftest.py | 17 + .../helpers/old/env2.py | 75 + .../helpers/old/tunnels.py | 267 + .../helpers/old/user_credentials.py | 208 + .../pandoc_docker_files/install-texlive.sh | 113 + .../helpers/pandoc_docker_files/packages.txt | 115 + .../pandoc_docker_files/texlive.profile | 32 + .../helpers/repo_config_utils.py | 411 + .../helpers/stage_linked_file.py | 83 + .../helpers/telegram_notify/__init__.py | 0 .../helpers/telegram_notify/config.py | 30 + .../helpers/telegram_notify/get_chat_id.py | 76 + .../telegram_notify/telegram_notify.py | 155 + .../helpers/test/__init__.py | 0 .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test_df.txt | 3 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../TestDataframeToJson.test1/output/test.txt | 31 + .../TestDataframeToJson.test2/output/test.txt | 13 + .../TestDataframeToJson.test3/output/test.txt | 13 + .../TestDataframeToJson.test4/output/test.txt | 13 + .../output/test.txt | 31 + .../output/test.txt | 13 + .../output/test.txt | 13 + .../output/test.txt | 13 + .../output/test.txt | 4 + .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 1 + .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 4 + .../output/test.txt | 3 + .../output/test.txt | 2 + .../output/test.txt | 2 + .../output/test.txt | 2 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 3 + .../output/test.txt | 1 + .../output/test.txt | 0 .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 0 .../output/test.txt | 0 .../output/test.txt | 2 + .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 0 .../output/test.txt | 3 + .../output/test.txt | 3 + .../output/test.txt | 2 + .../output/test.txt | 3 + .../output/test.txt | 2 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 0 .../input/test.txt | 101 + .../output/test.txt | 8 + .../TestGetDocstrings.test1/input/test.txt | 18 + .../output/test.txt | 52 + .../output/test.txt | 52 + .../output/test.txt | 30 + .../output/test.txt | 20 + .../output/test.txt | 19 + .../output/test.txt | 20 + .../output/test.txt | 20 + .../output/test.txt | 20 + .../output/test.txt | 23 + .../output/test.txt | 19 + .../output/test.txt | 21 + .../output/test.txt | 15 + .../output/test.txt | 17 + .../output/test.txt | 20 + .../output/test.txt | 20 + .../output/test.txt | 20 + .../output/test.txt | 22 + .../output/test.txt | 23 + .../output/test.txt | 19 + .../output/test.txt | 18 + .../output/test.txt | 19 + .../output/test.txt | 30 + .../output/test.txt | 1 + .../output/test.txt | 20 + .../output/test.txt | 30 + .../output/test.txt | 30 + .../output/test.txt | 65 + .../Test_CheckSummary.test1/output/test.txt | 4 + .../Test_CheckSummary.test2/output/test.txt | 4 + .../output/test.txt | 19 + .../output/test.txt | 5 + .../output/test.txt | 9 + .../output/test.txt | 9 + .../input/tmp.cache_simple._llm.json | 10 + .../Test_apply_nan_mode.test1/output/test.txt | 41 + .../Test_apply_nan_mode.test2/output/test.txt | 33 + .../Test_apply_nan_mode.test3/output/test.txt | 41 + .../Test_apply_nan_mode.test4/output/test.txt | 38 + .../Test_apply_nan_mode.test5/output/test.txt | 41 + .../output/test.txt | 3 + .../output/test.txt | 1 + .../output/test_df.txt | 3 + .../input/test.csv | 5 + .../Test_dassert1.test2/output/test.txt | 5 + .../Test_dassert1.test3/output/test.txt | 6 + .../Test_dassert1.test4/output/test.txt | 6 + .../Test_dassert1.test5/output/test.txt | 8 + .../Test_dassert1.test6/output/test.txt | 8 + .../Test_dassert1.test7/output/test.txt | 1 + .../Test_dassert_eq1.test3/output/test.txt | 8 + .../Test_dassert_eq1.test4/output/test.txt | 8 + .../Test_dassert_eq1.test5/output/test.txt | 10 + .../output/test.txt | 1 + .../output/test.txt | 5 + .../output/test.txt | 5 + .../output/test.txt | 8 + .../output/test.txt | 5 + .../output/test.txt | 5 + .../output/test.txt | 5 + .../output/test.txt | 5 + .../output/test.txt | 9 + .../output/test.txt | 9 + .../output/test.txt | 8 + .../output/test.txt | 1 + .../output/test.txt | 28 + .../output/test.txt | 28 + .../output/test.txt | 26 + .../output/test.txt | 27 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../output/test.txt | 1 + .../Test_from_typed_csv.test1/input/test.csv | 2 + .../input/test.csv.types | 1 + .../output/test.txt | 58 + .../output/test.txt | 58 + .../output/test.txt | 60 + .../output/test.txt | 57 + .../output/test.txt | 56 + .../output/test.txt | 63 + .../output/test.txt | 63 + .../output/test.txt | 63 + .../output/test.txt | 63 + .../input/result_0/config.pkl | Bin 0 -> 405 bytes .../input/result_0/config.txt | 7 + .../input/result_0/run_notebook.0.log | 0 .../input/result_1/config.pkl | Bin 0 -> 405 bytes .../input/result_1/config.txt | 7 + .../input/result_1/run_notebook.1.log | 0 .../input/result_0/config.txt | 7 + .../input/result_0/run_notebook.0.log | 0 .../input/result_1/config.txt | 7 + .../input/result_1/run_notebook.1.log | 0 .../output/test.txt | 45 + .../input/test.json | 17 + .../Test_obj_to_str1.test1/output/test.txt | 11 + .../Test_obj_to_str1.test2/output/test.txt | 11 + .../Test_obj_to_str1.test3/output/test.txt | 11 + .../Test_obj_to_str1.test4/output/test.txt | 12 + .../Test_obj_to_str1.test5/output/test.txt | 12 + .../Test_obj_to_str1.test6/output/test.txt | 12 + .../Test_obj_to_str2.test1/output/test.txt | 11 + .../Test_obj_to_str2.test2/output/test.txt | 11 + .../Test_obj_to_str2.test3/output/test.txt | 11 + .../Test_obj_to_str2.test4/output/test.txt | 11 + .../Test_obj_to_str2.test5/output/test.txt | 11 + .../Test_obj_to_str2.test6/output/test.txt | 11 + .../output/test.txt | 1 + .../Test_open_html.test_mac1/output/test.txt | 1 + .../output/test.txt | 1 + .../Test_open_pdf.test_mac1/output/test.txt | 1 + .../input/test.txt | 16 + .../output/test.txt | 20 + .../Test_process_lines1.test1/input/test.txt | 16 + .../Test_process_lines1.test1/output/test.txt | 20 + .../output/test.txt | 2 + .../input/cache/lastfailed | 12 + .../output/test.txt | 15 + .../input/log.txt | 325 + .../output/test.txt | 10 + .../input/log.txt | 10 + .../output/test.txt | 8 + .../input/log.txt | 61 + .../output/test.txt | 61 + .../input/log.txt | 36 + .../output/test.txt | 36 + .../input/log.txt | 2533 +++ .../output/test.txt | 41 + .../input/log.txt | 396 + .../output/test.txt | 399 + .../input/test.txt | 7 + .../input/test.txt | 16 + .../output/test.txt | 16 + .../input/test.txt | 9 + .../output/test.txt | 7 + .../output/test.txt | 2 + .../output/test.txt | 3 + .../output/test.txt | 71 + .../output/test.txt | 40 + .../output/test.txt | 40 + .../output/test.txt | 4 + .../output/test.txt | 1 + .../Test_system1.test7/output/test.txt | 16 + .../Test_to_typed_csv.test1/input/test.csv | 2 + .../helpers/test/test_create_link.py | 136 + .../helpers/test/test_hasyncio.py | 96 + .../helpers/test/test_haws.py | 276 + .../helpers/test/test_hcache.py | 1002 ++ .../helpers/test/test_hcache_simple.py | 2606 +++ .../helpers/test/test_hcfile.py | 335 + .../helpers/test/test_hcsv.py | 81 + .../helpers/test/test_hdataframe.py | 299 + .../helpers/test/test_hdatetime.py | 932 ++ .../helpers/test/test_hdbg.py | 934 ++ .../helpers/test/test_hdict.py | 107 + .../helpers/test/test_hdocker.py | 624 + .../helpers/test/test_hdocker_tests.py | 158 + .../helpers/test/test_henv.py | 17 + .../helpers/test/test_hfile_tree.py | 347 + .../helpers/test/test_hgit.py | 822 + .../helpers/test/test_hintrospection.py | 406 + .../helpers/test/test_hio.py | 225 + .../helpers/test/test_hlatex.py | 665 + .../helpers/test/test_hlist.py | 176 + .../helpers/test/test_hllm.py | 361 + .../helpers/test/test_hllm_cli.py | 1403 ++ .../helpers/test/test_hlogging.py | 103 + .../helpers/test/test_hmarkdown_bullets.py | 716 + .../helpers/test/test_hmarkdown_coloring.py | 205 + .../helpers/test/test_hmarkdown_div_blocks.py | 355 + .../test/test_hmarkdown_fenced_blocks.py | 218 + .../helpers/test/test_hmarkdown_filtering.py | 449 + .../helpers/test/test_hmarkdown_formatting.py | 1403 ++ .../helpers/test/test_hmarkdown_headers.py | 2002 +++ .../helpers/test/test_hmarkdown_rules.py | 377 + .../helpers/test/test_hmarkdown_slides.py | 399 + .../helpers/test/test_hmarkdown_tables.py | 196 + .../helpers/test/test_hmarkdown_toc.py | 228 + .../helpers/test/test_hmkdocs.py | 394 + .../helpers/test/test_hmodule.py | 25 + .../helpers/test/test_hnumpy.py | 215 + .../helpers/test/test_hobject.py | 392 + .../helpers/test/test_hopen.py | 92 + .../helpers/test/test_hpandas_analysis.py | 42 + .../test/test_hpandas_check_summary.py | 67 + .../helpers/test/test_hpandas_clean.py | 364 + .../helpers/test/test_hpandas_compare.py | 650 + .../helpers/test/test_hpandas_conversion.py | 276 + .../helpers/test/test_hpandas_dassert.py | 448 + .../helpers/test/test_hpandas_display.py | 685 + .../helpers/test/test_hpandas_io.py | 43 + .../helpers/test/test_hpandas_multiindex.py | 680 + .../helpers/test/test_hpandas_stats.py | 426 + .../helpers/test/test_hpandas_transform.py | 1888 +++ .../helpers/test/test_hpandas_utils.py | 251 + .../helpers/test/test_hparquet.py | 1468 ++ .../helpers/test/test_hparser.py | 398 + .../helpers/test/test_hpickle.py | 97 + .../helpers/test/test_hplayback.py | 506 + .../helpers/test/test_hprint.py | 844 + .../helpers/test/test_hpytest.py | 228 + .../helpers/test/test_hretry.py | 154 + .../helpers/test/test_hs3.py | 597 + .../helpers/test/test_hsecrets.py | 209 + .../helpers/test/test_hserver.py | 321 + .../helpers/test/test_hslack.py | 81 + .../helpers/test/test_hsql.py | 29 + .../helpers/test/test_hstring.py | 270 + .../helpers/test/test_hsystem.py | 494 + .../helpers/test/test_htable.py | 159 + .../helpers/test/test_htext_protect.py | 578 + .../helpers/test/test_htimer.py | 24 + .../helpers/test/test_htraceback.py | 474 + .../helpers/test/test_hunit_test.py | 954 ++ .../helpers/test/test_hunit_test_mock.py | 288 + .../test/test_hunit_test_purification.py | 1065 ++ .../helpers/test/test_hunit_test_utils.py | 553 + .../helpers/test/test_hversion.py | 74 + .../helpers/test/test_joblib_helpers.py | 569 + .../helpers/test/test_lib_tasks.py | 540 + .../helpers/test/test_lib_tasks_docker.py | 494 + .../test/test_lib_tasks_docker_release.py | 1530 ++ .../helpers/test/test_lib_tasks_find.py | 267 + .../helpers/test/test_lib_tasks_gh.py | 133 + .../helpers/test/test_lib_tasks_git.py | 267 + .../helpers/test/test_lib_tasks_integrate.py | 27 + .../helpers/test/test_lib_tasks_lint.py | 32 + .../helpers/test/test_lib_tasks_pytest.py | 1163 ++ .../helpers/test/test_lib_tasks_utils.py | 301 + .../test_master_buildmeister_dashboard.py | 74 + .../helpers/test/test_repo_config_amp.py | 284 + .../helpers/test/test_repo_config_utils.py | 65 + .../playbook.yaml | 57 +- .../results/leaderboard.csv | 4 - .../results/metrics.csv | 4 - .../results/plots/accuracy_by_activation.png | Bin 16500 -> 0 bytes .../plots/accuracy_by_grad_clipping.png | Bin 17404 -> 0 bytes .../results/plots/accuracy_by_model.png | Bin 16115 -> 0 bytes .../results/plots/accuracy_by_optimizer.png | Bin 17334 -> 0 bytes .../results/plots/accuracy_vs_seq_length.png | Bin 19941 -> 0 bytes .../results/plots/best_loss.png | Bin 26093 -> 0 bytes .../results/plots/f1_by_activation.png | Bin 14468 -> 0 bytes .../results/plots/f1_by_grad_clipping.png | Bin 15098 -> 0 bytes .../results/plots/f1_by_model.png | Bin 13683 -> 0 bytes .../results/plots/f1_by_optimizer.png | Bin 14784 -> 0 bytes .../results/plots/f1_vs_seq_length.png | Bin 17654 -> 0 bytes .../plots/loss_rnn_relu_adam_L50_clip1.png | Bin 26093 -> 0 bytes .../results/plots/worst_loss.png | Bin 26093 -> 0 bytes .../results/price_by_neighborhood.png | Bin 27259 -> 27162 bytes .../results/price_vs_quality.png | Bin 0 -> 39114 bytes .../template.API.ipynb | 185 +- .../template.API.py | 6 +- .../template.example.ipynb | 540 +- .../template.example.py | 6 +- .../template_utils.py | 100 + 477 files changed, 130454 insertions(+), 133 deletions(-) create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_aws.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py create mode 100755 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/run_notebook.0.log create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.pkl create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/run_notebook.1.log create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_0/config.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_0/run_notebook.0.log create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_1/config.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_1/run_notebook.1.log create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_load_df_from_json.test1/input/test.json create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/leaderboard.csv delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/metrics.csv delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_activation.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_grad_clipping.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_model.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_optimizer.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_vs_seq_length.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/best_loss.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_activation.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_grad_clipping.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_model.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_optimizer.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_vs_seq_length.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/loss_rnn_relu_adam_L50_clip1.png delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/worst_loss.png create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_vs_quality.png diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ReadMe.md b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ReadMe.md index 81906ebef..db3d2835b 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ReadMe.md +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ReadMe.md @@ -15,6 +15,81 @@ playbook multiple times will not change the system beyond the initial application, ensuring stability and predictability. +## How to run the project +## 🚀 Step-by-Step Execution + + +### Step 1 — Build the Docker image + +```bash +docker build -t house-price-project . +``` + +> This installs all dependencies from `requirements.txt` and sets up +> JupyterLab inside the container. Takes ~3–5 minutes on first build. + +### Step 2 — Start the container + +```bash +docker run -it -p 5001:5000 -p 8888:8888 \ + --name house-price \ + -v $(pwd):/project \ + house-price-project +``` +| Flag | Meaning | +|------|---------| +| `-it` | Interactive shell | +| `-p 5001:5000` | Mac port 5001 → container port 5000 (Flask API) | +| `-p 8888:8888` | Mac port 8888 → container port 8888 (JupyterLab) | +| `--name house-price` | Give the container a fixed name | +| `-v $(pwd):/project` | Mount project folder so files persist | + +You will land inside the container at `root@container:/project#`. + +### Step 3 — Train the model + +Inside the container: + +If you want to run the JupyterLab interface, execute: +```bash +PORT=5000 jupyter lab --ip=0.0.0.0 --no-browser --allow-root +``` + + if you want to run the training script, execute: +```bash +python template.example.py +``` + +Expected output: +WARNING: File 'ml_model/train.csv' not found – generating synthetic dataset. +INFO: Dataset shape: (1460, 16) +INFO: Cross-validating GradientBoosting (5 folds)… +INFO: Cross-validating RandomForest (5 folds)… +INFO: Cross-validating Ridge (5 folds)… +INFO: Best model: GradientBoosting +INFO: Test R²: 0.9822 +INFO: Model saved to '/project/ml_model/house_price_model.pkl'. + +> If you have the Kaggle dataset, place `train.csv` in `ml_model/` before +> running this step to train on real data instead of synthetic data. + +### Step 4 — Start the Flask API + +Inside the container (keep this terminal open): + +```bash +PORT=5000 python app.py +``` + +then in Jupyter Notebook run template.API.py inside the notebook and run the cells + +Once everything is done, you can run the whole process using Ansible. Make sure you have ansible installed and configured properly. Then, execute the following command in your terminal: + +```bash +ansible-playbook playbook.yml +``` + + ## Project Objective The goal of the project is to automate the deployment of a machine learning model using Ansible. Students will create a playbook that provisions a virtual diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md new file mode 100644 index 000000000..8578eccd3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md @@ -0,0 +1,285 @@ +# Summary + +The `helpers/` directory is the core Python library providing utilities, +development tools, and infrastructure components for the helpers ecosystem. +Modules follow the `h` naming convention and are organized by domain. + +# Directory Structure + +- `logging_testing/` + - Utilities for testing logging behavior across modules +- `notebooks/` + - Jupyter notebooks and tutorials (e.g., hcache_simple usage) +- `old/` + - Deprecated and archived modules (conda, tunnels, user_credentials) +- `pandoc_docker_files/` + - Docker setup files and package lists for pandoc and texlive +- `telegram_notify/` + - Telegram bot notification module with config and chat ID utilities +- `test/` + - Unit tests for all modules (90+ test files organized by module name) + +# Files + +## Core Infrastructure + +- `hdbg.py` + - Debugging utilities with specialized assertions, logging, and fatal error handling +- `hio.py` + - Filesystem operations, file read/write, and directory management utilities +- `hsystem.py` + - System interaction: shell commands, environment variables, process management +- `henv.py` + - Environment variable checks and module installation management +- `hserver.py` + - Identify which server the code is running on +- `hversion.py` + - Code version control and Docker container compatibility checking +- `hlogging.py` + - Logging configuration, custom formatters, and logging utilities +- `hwarnings.py` + - Suppress annoying Python warnings when imported +- `htraceback.py` + - Traceback parsing, formatting, and manipulation utilities +- `hprint.py` + - Debugging and pretty-printing utilities for Python objects +- `hparser.py` + - Argparse helpers: verbosity, action, limit-range, and other standard arguments +- `hobject.py` + - Introspect and print the state of a Python object +- `hintrospection.py` + - Python introspection and module analysis utilities +- `hmodule.py` + - Dynamic module installation and import management utilities +- `htimer.py` + - Timer class for measuring and reporting elapsed time +- `htqdm.py` + - tqdm progress bar stream redirected to Python logger +- `hthreading.py` + - Timeout decorator to enforce execution time limits on functions +- `hretry.py` + - Retry decorators for synchronous and asynchronous functions +- `hasyncio.py` + - Async/await utilities and coroutine management for asyncio +- `hnetwork.py` + - Network utilities including URL availability checking +- `hopen.py` + - Cross-platform file opening utility +- `htypes.py` + - General type aliases and type utilities based on standard Python libraries +- `hwall_clock_time.py` + - Wall clock time simulation and management for testing and replays + +## Data Processing + +- `hpandas.py` + - Pandas utilities aggregating all hpandas_* submodules +- `hpandas_analysis.py` + - Statistical analysis and ML-related functions for pandas DataFrames +- `hpandas_check_summary.py` + - DataFrame check and summary reporting utilities +- `hpandas_clean.py` + - DataFrame cleaning operations (deduplicate, fill NaN, sanitize) +- `hpandas_compare.py` + - DataFrame comparison utilities for diffing and equality checks +- `hpandas_conversion.py` + - DataFrame and Series conversion and casting utilities +- `hpandas_dassert.py` + - Pandas-specific assertions and validation functions +- `hpandas_display.py` + - DataFrame display formatting and signature generation +- `hpandas_io.py` + - Pandas I/O operations for local and S3 storage +- `hpandas_multiindex.py` + - MultiIndex creation, manipulation, and access operations +- `hpandas_stats.py` + - Pandas statistics, duration computation, and time-series helpers +- `hpandas_transform.py` + - DataFrame transformation operations (pivot, reshape, normalize) +- `hpandas_utils.py` + - General-purpose pandas utilities and helper functions +- `hdataframe.py` + - Lower-level helper functions for processing pandas DataFrames +- `hnumpy.py` + - NumPy utilities, array helpers, and random seed management +- `hnumba.py` + - Numba JIT compilation wrapper and acceleration utilities +- `hparquet.py` + - Parquet file read/write operations using pyarrow +- `hcsv.py` + - CSV file operations and DataFrame I/O utilities +- `hdatetime.py` + - Date/time manipulation, parsing, and timezone handling utilities +- `hdict.py` + - Dictionary manipulation and nested dictionary operation utilities +- `hlist.py` + - List manipulation, deduplication, and membership utilities +- `hstring.py` + - String manipulation, formatting, and transformation utilities +- `htable.py` + - Lightweight rectangular table class with no pandas dependency + +## Caching and Performance + +- `hcache.py` + - Advanced function caching using joblib with S3 and git integration +- `hcache_simple.py` + - Simple caching with JSON or pickle file-based storage backends +- `hjoblib.py` + - Joblib parallelization, memory caching, and job management +- `hpickle.py` + - Pickle and JSON serialization and deserialization routines + +## Testing Framework + +- `hunit_test.py` + - Enhanced unit testing framework built on unittest and pytest with golden files +- `hunit_test_purification.py` + - Text purification utilities to sanitize test output for comparison +- `hunit_test_utils.py` + - Unit test utilities including test renaming and helpers +- `hpytest.py` + - Pytest integration utilities and test artifact handling +- `hcoverage.py` + - Code coverage utilities and test coverage analysis helpers +- `hplayback.py` + - Automatically generate unit tests by recording and replaying function calls +- `htest_logger.py` + - Test logging script template +- `hmoto.py` + - AWS service mocking with moto for unit testing + +## Markdown Processing + +- `hmarkdown.py` + - Markdown processing entry point aggregating all hmarkdown_* submodules +- `hmarkdown_bullets.py` + - Markdown bullet point processing and formatting +- `hmarkdown_coloring.py` + - Markdown text coloring utilities for LaTeX and HTML output +- `hmarkdown_comments.py` + - Markdown comment detection, extraction, and removal utilities +- `hmarkdown_div_blocks.py` + - Utilities for handling HTML div blocks within markdown files +- `hmarkdown_fenced_blocks.py` + - Fenced code block parsing and manipulation in markdown +- `hmarkdown_filtering.py` + - Markdown section extraction and content filtering utilities +- `hmarkdown_formatting.py` + - Markdown text formatting and whitespace normalization utilities +- `hmarkdown_headers.py` + - Markdown header manipulation, extraction, and level adjustment +- `hmarkdown_rules.py` + - Markdown rule validation and processing utilities +- `hmarkdown_slides.py` + - Markdown slide extraction, splitting, and processing for presentations +- `hmarkdown_tables.py` + - Markdown table parsing, formatting, and manipulation utilities +- `hmarkdown_toc.py` + - Markdown table of contents generation and YAML frontmatter handling +- `hlint.py` + - Linting utilities for text and code files +- `htext_protect.py` + - Utilities for protecting content regions during text processing + +## External Services and Cloud + +- `haws.py` + - AWS services integration with boto3 client and resource management +- `hs3.py` + - S3 file operations, listing, and S3-backed filesystem utilities +- `hsecrets.py` + - AWS Secrets Manager integration for secret retrieval +- `htranslate.py` + - AWS Translate service wrapper for text translation +- `hgit.py` + - Git repository operations, branch management, and diff utilities +- `hdocker.py` + - Docker container operations, image management, and Docker utilities +- `hdocker_tests.py` + - Utilities for running tests inside Docker containers +- `hdockerized_executables.py` + - Wrappers for Dockerized executables: prettier, pandoc, latex, and others +- `hgoogle_drive_api.py` + - Google Drive and Google Sheets API integration utilities +- `hchatgpt.py` + - OpenAI API integration with file management and chat utilities +- `hchatgpt_instructions.py` + - ChatGPT system instructions and prompt templates +- `hllm.py` + - LLM API integration with caching, cost tracking, and response handling +- `hllm_cli.py` + - LLM CLI interaction wrapper and cost estimation utilities +- `hllm_cost.py` + - LLM cost calculation for OpenRouter and other APIs +- `hslack.py` + - Slack notification utilities for sending messages to channels +- `hemail.py` + - Email sending utilities via SMTP +- `hsftp.py` + - SFTP file transfer operations using pysftp +- `hsql.py` + - SQL database operations as a PostgreSQL wrapper +- `hsql_implementation.py` + - Low-level SQL implementation with psycopg2 driver +- `hsql_test.py` + - SQL testing utilities, fixtures, and database test helpers +- `asana_utils.py` + - Enhanced Asana analytics with time estimation and team grouping +- `github_utils.py` + - GitHub API utilities for caching and repository data retrieval + +## Notebooks and Visualization + +- `hnotebook.py` + - Jupyter notebook configuration and display setup utilities +- `hjupyter.py` + - Jupyter notebook execution and output capture utilities +- `hmatplotlib.py` + - Matplotlib utilities, figure management, and plotting helpers +- `hmkdocs.py` + - MkDocs-specific markdown generation and documentation utilities +- `hlatex.py` + - LaTeX conversion utilities using pandoc + +## Miscellaneous + +- `hfile_tree.py` + - Directory tree building and formatted output utilities +- `hcfile.py` + - C file parsing and transformation utilities +- `repo_config_utils.py` + - Repository configuration utilities loaded from YAML +- `stage_linked_file.py` + - Symbolic link staging utility for git operations + +## Task System (`lib_tasks_*.py`) + +- `lib_tasks.py` + - Entry point that aggregates all invoke task modules +- `lib_tasks_aws.py` + - Invoke tasks for AWS operations and deployments +- `lib_tasks_bash.py` + - Invoke tasks for bash script execution +- `lib_tasks_docker.py` + - Invoke tasks for Docker build, run, and management operations +- `lib_tasks_docker_release.py` + - Invoke tasks for Docker image release and publishing workflows +- `lib_tasks_find.py` + - Invoke tasks for searching and finding files in the repo +- `lib_tasks_gh.py` + - Invoke tasks for GitHub pull requests and issues +- `lib_tasks_git.py` + - Invoke tasks for git branch, merge, and commit operations +- `lib_tasks_integrate.py` + - Invoke tasks for integrating changes between repositories +- `lib_tasks_lint.py` + - Invoke tasks for linting and code quality checks +- `lib_tasks_perms.py` + - Invoke tasks for managing file permissions +- `lib_tasks_print.py` + - Invoke tasks for printing setup and environment info +- `lib_tasks_pytest.py` + - Invoke tasks for running pytest suites (fast, slow, superslow) +- `lib_tasks_utils.py` + - Shared utilities and helpers used across task modules diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py new file mode 100644 index 000000000..0aa7f7f4b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py @@ -0,0 +1,1156 @@ +""" +Enhanced Asana Analytics with Time Estimation and Team Grouping. + +Import as: + +import helpers.asana_utils as hasautil +""" + +import datetime as datetime_lib +import json +import logging +import os +from typing import Any, Dict, List, Optional + +import asana +import asana.rest as arest +import dateutil.parser as dateutil_parser +import pandas as pd + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# EnhancedAsanaAnalytics +# ############################################################################# + + +class EnhancedAsanaAnalytics: + def __init__(self, access_token: Optional[str] = None) -> None: + # Get token from parameter or environment variable. + token = access_token or os.getenv("ASANA_ACCESS_TOKEN") + if not token: + raise ValueError( + "Asana access token must be provided or set in ASANA_ACCESS_TOKEN" + ) + # Initialize Asana API client with access token. + configuration = asana.Configuration() + configuration.access_token = token + self.api_client = asana.ApiClient(configuration) + # Initialize API endpoints. + self.workspaces_api = asana.WorkspacesApi(self.api_client) + self.users_api = asana.UsersApi(self.api_client) + self.tasks_api = asana.TasksApi(self.api_client) + self.stories_api = asana.StoriesApi(self.api_client) + self.projects_api = asana.ProjectsApi(self.api_client) + self.custom_fields_api = asana.CustomFieldsApi(self.api_client) + + def get_workspace_gid(self, workspace_name: Optional[str] = None) -> str: + """ + Get the workspace GID by name or return the first available workspace. + + Retrieve the GID (Global ID) for an Asana workspace. If no + workspace name is provided, return the GID of the first + workspace available to the user. + + :param workspace_name: name of the workspace to find. + :return: workspace GID as a string + """ + _LOG.info( + "Fetching workspace GID for workspace: %s", + workspace_name or "first available", + ) + # Fetch all available workspaces. + opts: Dict[str, Any] = {} + workspaces = self.workspaces_api.get_workspaces(opts) + # Convert to list if needed. + workspace_list = list(workspaces) if workspaces else [] + _LOG.info("Found %s workspaces", len(workspace_list)) + # Check if any workspaces exist. + if not workspace_list: + raise ValueError("No workspaces found") + result = None + # Search for specific workspace by name if provided. + if workspace_name: + for ws in workspace_list: + if ws["name"].lower() == workspace_name.lower(): + _LOG.info( + "Found workspace '%s' with GID: %s", + workspace_name, + ws["gid"], + ) + result = str(ws["gid"]) + break + if result is None: + raise ValueError(f"Workspace '{workspace_name}' not found") + else: + # Return first workspace if no name specified. + _LOG.info( + "Using first workspace: %s (GID: %s)", + workspace_list[0]["name"], + workspace_list[0]["gid"], + ) + result = str(workspace_list[0]["gid"]) + return result + + def get_team_members(self, workspace_gid: str) -> List[Dict[str, Any]]: + """ + Get all team members in a workspace. + + :param workspace_gid: workspace GID to query for users + :return: user information with keys 'gid','name', and 'email' + """ + _LOG.info("Fetching team members for workspace: %s", workspace_gid) + # Fetch all users in the workspace. + opts: Dict[str, Any] = {} + users = self.users_api.get_users_for_workspace(workspace_gid, opts) + # Convert to list if needed. + users_list = list(users) if users else [] + _LOG.info("Found %s team members", len(users_list)) + # Extract relevant user information. + result = [ + {"gid": u["gid"], "name": u["name"], "email": u.get("email", "N/A")} + for u in users_list + ] + # Log member names. + member_names = [r["name"] for r in result] + _LOG.debug("Team members: %s", ", ".join(member_names)) + return result + + def get_user_by_name( + self, workspace_gid: str, username: str + ) -> Optional[Dict[str, Any]]: + """ + Get a specific user by their name in a workspace. + + Search for a user by their display name (case-insensitive + partial match). + + :param workspace_gid: workspace GID to search in + :param username: username or partial name to search for + :return: user with 'gid', 'name', and 'email' + """ + _LOG.info("Searching for user: %s", username) + team_members = self.get_team_members(workspace_gid) + res = None + # Search for exact match first. + for team_member in team_members: + if team_member["name"].lower() == username.lower(): + _LOG.info("Found exact match: %s", team_member["name"]) + res = team_member + # Search for partial match. + for team_member in team_members: + if username.lower() in team_member["name"].lower(): + _LOG.info("Found partial match: %s", team_member["name"]) + res = team_member + if res is None: + _LOG.warning("User '%s' not found in workspace", username) + return res + + def get_user_tasks_detailed( + self, + workspace_gid: str, + user_identifier: str, + *, + start_date: Optional[datetime_lib.datetime] = None, + end_date: Optional[datetime_lib.datetime] = None, + ) -> List[Dict[str, Any]]: + """ + Get detailed task information including estimated time. + + Fetch all tasks for a user with extended fields including custom + fields for time estimates, projects, tags, sections, and dates. + + :param workspace_gid: workspace GID to query + :param user_identifier: user GID or username to retrieve tasks + for + :param start_date: start date for filtering tasks by creation + date. + :param end_date: end date for filtering tasks by creation date. + :return: data with name, completion status, timestamps, custom + fields, and project associations + """ + # Resolve username to GID if needed. + if not user_identifier.isdigit(): + _LOG.info("Resolving username '%s' to GID", user_identifier) + user = self.get_user_by_name(workspace_gid, user_identifier) + if not user: + _LOG.error("User '%s' not found", user_identifier) + return [] + user_gid = user["gid"] + _LOG.debug("Resolved '%s' to GID: %s", user_identifier, user_gid) + else: + user_gid = user_identifier + _LOG.info("Fetching detailed tasks for user GID: %s", user_gid) + try: + # Define query parameters for task retrieval with extended fields. + opts = { + "assignee": user_gid, + "workspace": workspace_gid, + "opt_fields": ( + "name,completed,completed_at,created_at,modified_at," + "projects.name,projects.gid,num_subtasks,memberships.section.name," + "custom_fields,custom_fields.name,custom_fields.display_value," + "custom_fields.number_value,due_on,due_at,start_on," + "assignee.name,tags.name" + ), + } + # Fetch all tasks for the user. + _LOG.debug("Querying Asana API for detailed tasks...") + tasks = self.tasks_api.get_tasks(opts) + # Convert to list if generator. + tasks_list = list(tasks) if tasks else [] + _LOG.info( + "Retrieved %d tasks from API for user GID: %s", + len(tasks_list), + user_gid, + ) + # Make start_date and end_date timezone-aware if they aren't already. + if start_date and start_date.tzinfo is None: + start_date = start_date.replace(tzinfo=datetime_lib.timezone.utc) + if end_date and end_date.tzinfo is None: + end_date = end_date.replace(tzinfo=datetime_lib.timezone.utc) + # Filter tasks by date range if specified. + filtered_tasks = [] + for task in tasks_list: + # Parse creation date. + created_at = ( + dateutil_parser.parse(task["created_at"]) + if task.get("created_at") + else None + ) + # Apply start date filter. + if start_date and created_at and created_at < start_date: + continue + # Apply end date filter. + if end_date and created_at and created_at > end_date: + continue + # Add task to filtered results. + filtered_tasks.append(task) + _LOG.info( + "Filtered to %d tasks within date range for user GID: %s", + len(filtered_tasks), + user_gid, + ) + return filtered_tasks + except arest.ApiException as e: + _LOG.error("API error fetching detailed tasks: %s", e) + raise + except Exception as e: + _LOG.error("Unexpected error fetching detailed tasks: %s", e) + return [] + + def extract_time_estimate(self, task: Dict[str, Any]) -> Optional[float]: + """ + Extract time estimate from custom fields. + + Search through task custom fields for time estimation values. + Looks for common field names like 'estimated time', 'estimate', + 'hours', etc. + + :param task: tasks data containing custom_fields + :return: estimated hours as float, or None if not found + """ + result = None + if not task.get("custom_fields"): + _LOG.debug( + "No custom fields found for task: %s", task.get("gid", "unknown") + ) + return result + # Common field names for time estimates. + time_field_names = [ + "estimated time", + "estimate", + "time estimate", + "hours", + "estimated hours", + "effort", + ] + for field in task["custom_fields"]: + field_name = field.get("name", "").lower() + # Check if field name matches any time estimation pattern. + if any(time_name in field_name for time_name in time_field_names): + # Try number_value first, then display_value. + if field.get("number_value") is not None: + result = float(field["number_value"]) / 60.0 + _LOG.debug( + "Found time estimate %s hours in field '%s' for task: %s", + result, + field.get("name"), + task.get("gid", "unknown"), + ) + break + elif field.get("display_value"): + try: + result = float(field["display_value"]) / 60.0 + _LOG.debug( + "Found time estimate %s hours in field '%s' for task: %s", + result, + field.get("name"), + task.get("gid", "unknown"), + ) + break + except (ValueError, TypeError): + _LOG.warning( + "Could not parse display_value '%s' as float for task: %s", + field.get("display_value"), + task.get("gid", "unknown"), + ) + return result + + def get_task_stories(self, task_gid: str) -> List[Dict[str, Any]]: + """ + Get all stories (comments and activity) for a task. + + Fetch all stories including comments, task updates, and system + activities for a specific task. + + :param task_gid: task GID to fetch stories for + :return: data of type, text, created_at, and creator information + """ + _LOG.info("Fetching stories for task: %s", task_gid) + try: + opts = { + "opt_fields": ( + "type,text,created_at,created_by.name,created_by.email," + "resource_subtype,is_edited" + ) + } + stories = self.stories_api.get_stories_for_task(task_gid, opts) + stories_list = list(stories) if stories else [] + _LOG.debug( + "Found %d stories for task %s", len(stories_list), task_gid + ) + return stories_list + except arest.ApiException as e: + _LOG.error("API error fetching stories for task %s: %s", task_gid, e) + return [] + except Exception as e: + _LOG.error( + "Unexpected error fetching stories for task %s: %s", task_gid, e + ) + return [] + + def extract_comment_metrics(self, task_gid: str) -> Dict[str, Any]: + """ + Extract comment and activity metrics for a task. + + Analyze all stories for a task to extract metrics including: + - Total comment count + - Unique commenters + - Activity count (system updates) + - Last activity timestamp + - Comment frequency + + :param task_gid: task GID to analyze + :return: comment metrics + """ + stories = self.get_task_stories(task_gid) + # Initialize counters. + num_comments = 0 + num_activities = 0 + unique_commenters = set() + last_activity_at = None + for story in stories: + # Parse created timestamp. + created_at = ( + dateutil_parser.parse(story["created_at"]) + if story.get("created_at") + else None + ) + # Track last activity. + if created_at: + if last_activity_at is None or created_at > last_activity_at: + last_activity_at = created_at + # Categorize story type. + story_type = story.get("type", "") + if story_type == "comment": + num_comments += 1 + # Track unique commenters. + if story.get("created_by") and story["created_by"].get("name"): + unique_commenters.add(story["created_by"]["name"]) + else: + # System activities (status changes, assignments, etc). + num_activities += 1 + result = { + "num_comments": num_comments, + "num_activities": num_activities, + "total_stories": len(stories), + "unique_commenters": len(unique_commenters), + "unique_commenter_names": list(unique_commenters), + "last_activity_at": last_activity_at, + } + _LOG.debug( + "Task %s metrics: %d comments, %d activities, %d unique commenters", + task_gid, + num_comments, + num_activities, + len(unique_commenters), + ) + return result + + def calculate_activity_rate( + self, + created_at: datetime_lib.datetime, + last_activity_at: Optional[datetime_lib.datetime], + num_comments: int, + num_activities: int, + ) -> Dict[str, float]: + """ + Calculate activity rate metrics for a task. + + Compute various activity rate metrics based on task timeline and + activity counts. + + :param created_at: task creation timestamp + :param last_activity_at: timestamp of last activity/comment + :param num_comments: total number of comments + :param num_activities: total number of system activities + :return: activity rate metric + """ + now = datetime_lib.datetime.now(datetime_lib.timezone.utc) + + # Calculate task age in days. + task_age_days = (now - created_at).total_seconds() / 86400 + + # Calculate days since last activity. + days_since_activity = None + if last_activity_at: + days_since_activity = ( + now - last_activity_at + ).total_seconds() / 86400 + + # Calculate activity rates (avoid division by zero). + if task_age_days > 0: + comments_per_day = num_comments / task_age_days + activities_per_day = num_activities / task_age_days + total_activity_per_day = ( + num_comments + num_activities + ) / task_age_days + else: + comments_per_day = 0.0 + activities_per_day = 0.0 + total_activity_per_day = 0.0 + + result = { + "task_age_days": task_age_days, + "comments_per_day": comments_per_day, + "activities_per_day": activities_per_day, + "total_activity_per_day": total_activity_per_day, + "days_since_activity": days_since_activity, + } + + return result + + def get_user_tasks_with_activity( + self, + workspace_gid: str, + user_identifier: str, + *, + start_date: Optional[datetime_lib.datetime] = None, + end_date: Optional[datetime_lib.datetime] = None, + include_comments: bool = True, + ) -> List[Dict[str, Any]]: + """ + Get detailed task information including comments and activity metrics. + + Extended version of get_user_tasks_detailed that also fetches + comment and activity data for each task. + + :param workspace_gid: workspace GID to query + :param user_identifier: user GID or username to retrieve tasks + for + :param start_date: start date for filtering tasks by creation + date + :param end_date: end date for filtering tasks by creation date + :param include_comments: if True, fetch comment/activity data + for each task (default: True). Set to False for faster + execution + :return: task data with comment and activity metrics included + """ + # Get detailed tasks first. + tasks = self.get_user_tasks_detailed( + workspace_gid, + user_identifier, + start_date=start_date, + end_date=end_date, + ) + + if not include_comments: + return tasks + + _LOG.info("Fetching comment/activity data for %d tasks", len(tasks)) + + # Enhance each task with comment metrics. + for i, task in enumerate(tasks): + if (i + 1) % 10 == 0: + _LOG.info( + "Processing task %d/%d for comments...", i + 1, len(tasks) + ) + + # Get comment metrics. + comment_metrics = self.extract_comment_metrics(task["gid"]) + + # Add metrics to task. + task["num_comments"] = comment_metrics["num_comments"] + task["num_activities"] = comment_metrics["num_activities"] + task["total_stories"] = comment_metrics["total_stories"] + task["unique_commenters"] = comment_metrics["unique_commenters"] + task["unique_commenter_names"] = comment_metrics[ + "unique_commenter_names" + ] + task["last_activity_at"] = comment_metrics["last_activity_at"] + + # Calculate activity rates if we have created_at. + if task.get("created_at"): + created_at = dateutil_parser.parse(task["created_at"]) + activity_rates = self.calculate_activity_rate( + created_at, + comment_metrics["last_activity_at"], + comment_metrics["num_comments"], + comment_metrics["num_activities"], + ) + task.update(activity_rates) + + _LOG.info("Comment/activity data added to all tasks") + return tasks + + def create_task_dataframe( + self, + workspace_gid: str, + user_identifiers: Optional[List[str]] = None, + *, + project_names: Optional[List[str]] = None, + start_date: Optional[datetime_lib.datetime] = None, + end_date: Optional[datetime_lib.datetime] = None, + team_mapping: Optional[Dict[str, str]] = None, + include_comments: bool = False, + ) -> pd.DataFrame: + """ + Create comprehensive task DataFrame for all users. + + Build a detailed DataFrame containing all task information for + specified users, with optional filtering by project and date + range. Includes time estimates, sprint information, and team + assignments. + + :param workspace_gid: workspace GID to query + :param user_identifiers: usernames or GIDs to analyze. + :param project_names: project names to filter by and use + as team names (e.g., ["tech-now", "tech-next"]). If + provided, team will be determined from project name + :param start_date: start date for filtering tasks by creation + date + :param end_date: end date for filtering tasks by creation date + :param team_mapping: username to team name. Only + used if project_names is not provided + - Example: {"John Doe": "tech-now", "Jane Smith": "tech-next"} + :param include_comments: if True, fetch comment/activity data + (default: False). Set to True to include activity metrics + :return: data with columns including user info, task + details, dates, completion status, time estimates, project, + sprint, section, tags, and subtasks + """ + _LOG.info("Creating comprehensive task DataFrame") + # Get users to analyze. + team_members = [] + if user_identifiers: + for user_id in user_identifiers: + if user_id.isdigit(): + # If GID, fetch user info. + opts = {"opt_fields": "name,email"} + user_info = self.users_api.get_user(user_id, opts) + team_members.append( + { + "gid": user_id, + "name": user_info["name"], + "email": user_info.get("email", "N/A"), + } + ) + else: + # If username, resolve to user. + user = self.get_user_by_name(workspace_gid, user_id) + if user: + team_members.append(user) + else: + # Get all team members if no specific users provided. + team_members = self.get_team_members(workspace_gid) + all_task_data = [] + # Process tasks for each team member. + for member in team_members: + _LOG.info("Processing tasks for: %s", member["name"]) + # Fetch detailed tasks for this user. + if include_comments: + tasks = self.get_user_tasks_with_activity( + workspace_gid, + member["gid"], + start_date=start_date, + end_date=end_date, + include_comments=True, + ) + else: + tasks = self.get_user_tasks_detailed( + workspace_gid, + member["gid"], + start_date=start_date, + end_date=end_date, + ) + # Process each task. + for task in tasks: + # Parse dates. + created_at = ( + dateutil_parser.parse(task["created_at"]) + if task.get("created_at") + else None + ) + completed_at = ( + dateutil_parser.parse(task["completed_at"]) + if task.get("completed_at") + else None + ) + due_at = ( + dateutil_parser.parse(task["due_at"]) + if task.get("due_at") + else None + ) + # Check if task is overdue. + is_overdue = False + if not task.get("completed") and due_at: + is_overdue = due_at < datetime_lib.datetime.now( + datetime_lib.timezone.utc + ) + # Extract time estimate from custom fields. + estimated_hours = self.extract_time_estimate(task) + # Calculate actual hours if task is completed. + actual_hours = None + if completed_at and created_at: + actual_hours = ( + completed_at - created_at + ).total_seconds() / 3600 + # Extract projects, tags, and sections. + projects = [p["name"] for p in task.get("projects", [])] + project_gids = [p["gid"] for p in task.get("projects", [])] + tags = [t["name"] for t in task.get("tags", [])] + # Extract sections (sprints in Asana). + sections = [] + sprints = [] + if task.get("memberships"): + for membership in task["memberships"]: + if membership.get("section"): + section_name = membership["section"]["name"] + sections.append(section_name) + # Identify sprint sections using common patterns. + if any( + keyword in section_name.lower() + for keyword in [ + "sprint", + "iteration", + "cycle", + "week", + ] + ): + sprints.append(section_name) + # Build task data dictionary. + task_data = { + # User info. + "user_name": member["name"], + "user_email": member["email"], + "user_gid": member["gid"], + # Task info. + "task_name": task.get("name", "Untitled"), + "task_gid": task["gid"], + # Dates. + "created_at": created_at, + "completed_at": completed_at, + "due_on": task.get("due_on"), + "due_at": due_at, + "start_on": task.get("start_on"), + # Status. + "is_completed": task.get("completed", False), + "is_overdue": is_overdue, + # Time tracking. + "estimated_hours": estimated_hours, + "actual_hours": actual_hours, + # Organization. + "project": projects[0] if projects else None, + "all_projects": ", ".join(projects) if projects else None, + "project_gid": project_gids[0] if project_gids else None, + "tags": ", ".join(tags) if tags else None, + "section": sections[0] if sections else None, + "sprint": sprints[0] if sprints else None, + "all_sprints": ", ".join(sprints) if sprints else None, + "num_subtasks": task.get("num_subtasks", 0), + } + # Add comment/activity metrics if included. + if include_comments: + task_data.update( + { + "num_comments": task.get("num_comments", 0), + "num_activities": task.get("num_activities", 0), + "total_stories": task.get("total_stories", 0), + "unique_commenters": task.get( + "unique_commenters", 0 + ), + "last_activity_at": task.get("last_activity_at"), + "task_age_days": task.get("task_age_days", 0), + "comments_per_day": task.get( + "comments_per_day", 0.0 + ), + "activities_per_day": task.get( + "activities_per_day", 0.0 + ), + "total_activity_per_day": task.get( + "total_activity_per_day", 0.0 + ), + "days_since_activity": task.get( + "days_since_activity" + ), + } + ) + # Add team - either from project name or mapping. + if project_names: + # Determine team from project name. + task_data["team"] = task_data["project"] + elif team_mapping: + task_data["team"] = team_mapping.get( + member["name"], "Unassigned" + ) + else: + # No team mapping, use project as team (default). + task_data["team"] = task_data["project"] + all_task_data.append(task_data) + # Create DataFrame. + df = pd.DataFrame(all_task_data) + # Filter by project if specified. + if project_names and len(df) > 0: + df = df[df["project"].isin(project_names)] + _LOG.info( + "Filtered to %d tasks from projects: %s", len(df), project_names + ) + _LOG.info("Created DataFrame with %d tasks", len(df)) + result = df + return result + + def create_team_comparison_df( + self, task_df: pd.DataFrame, metrics: Optional[List[str]] = None + ) -> pd.DataFrame: + """ + Create team-level comparison DataFrame from task DataFrame. + + Aggregate task-level data to team-level metrics for comparison + across teams. Requires task DataFrame to have 'team' column. + + :param task_df: data with 'team' column + :param metrics: metrics to calculate. If None, calculate all + :return: data with team-level aggregated metrics + """ + if "team" not in task_df.columns: + _LOG.error( + "task_df missing 'team' column. Available columns: %s", + task_df.columns.tolist(), + ) + raise ValueError( + "task_df must have 'team' column. Pass team_mapping or " + "project_names to create_task_dataframe()" + ) + + _LOG.info("Creating team comparison DataFrame") + _LOG.info("Found %d unique teams in data", task_df["team"].nunique()) + + # Set default metrics if not provided. + if metrics is None: + metrics = [ + "total_tasks", + "completed_tasks", + "in_progress_tasks", + "completion_rate", + "total_estimated_hours", + "avg_estimated_hours", + "total_actual_hours", + "overdue_tasks", + "overdue_rate", + "unique_users", + ] + team_stats = [] + # Calculate metrics for each team. + for team_name in task_df["team"].unique(): + if team_name is None or ( + isinstance(team_name, float) and pd.isna(team_name) + ): + _LOG.warning("Skipping None/NaN team name") + continue + + team_data = task_df[task_df["team"] == team_name] + _LOG.debug( + "Processing team: %s (%d tasks)", team_name, len(team_data) + ) + + stats = {"team": team_name} + # Calculate each requested metric. + if "total_tasks" in metrics: + stats["total_tasks"] = len(team_data) + if "completed_tasks" in metrics: + stats["completed_tasks"] = team_data["is_completed"].sum() + if "in_progress_tasks" in metrics: + stats["in_progress_tasks"] = (~team_data["is_completed"]).sum() + if "completion_rate" in metrics: + if len(team_data) > 0: + stats["completion_rate"] = ( + stats["completed_tasks"] / len(team_data) + ) * 100 + else: + stats["completion_rate"] = 0.0 + if "total_estimated_hours" in metrics: + stats["total_estimated_hours"] = team_data[ + "estimated_hours" + ].sum() + if "avg_estimated_hours" in metrics: + stats["avg_estimated_hours"] = team_data[ + "estimated_hours" + ].mean() + if "total_actual_hours" in metrics: + stats["total_actual_hours"] = team_data["actual_hours"].sum() + if "overdue_tasks" in metrics: + stats["overdue_tasks"] = team_data["is_overdue"].sum() + if "overdue_rate" in metrics: + active_tasks = (~team_data["is_completed"]).sum() + if active_tasks > 0: + stats["overdue_rate"] = ( + stats["overdue_tasks"] / active_tasks + ) * 100 + else: + stats["overdue_rate"] = 0.0 + if "unique_users" in metrics: + stats["unique_users"] = team_data["user_name"].nunique() + team_stats.append(stats) + + _LOG.info("Team comparison completed for %d teams", len(team_stats)) + result = pd.DataFrame(team_stats) + return result + + def create_user_comparison_df( + self, task_df: pd.DataFrame, metrics: Optional[List[str]] = None + ) -> pd.DataFrame: + """ + Create user-level comparison DataFrame with aggregated metrics. + + Aggregate task-level data to user-level metrics for individual + performance comparison. + + :param task_df: tasks data + :param metrics: metrics to calculate. If None, calculate all + :return: data with user-level aggregated metrics + """ + # Set default metrics if not provided. + if metrics is None: + metrics = [ + "total_tasks", + "completed_tasks", + "completion_rate", + "total_estimated_hours", + "avg_estimated_hours", + "overdue_tasks", + "unique_projects", + ] + user_stats = [] + # Calculate metrics for each user. + for user_name in task_df["user_name"].unique(): + user_data = task_df[task_df["user_name"] == user_name] + stats = { + "user_name": user_name, + "user_email": user_data["user_email"].iloc[0], + } + # Add team if available. + if "team" in task_df.columns: + stats["team"] = user_data["team"].iloc[0] + # Calculate each requested metric. + if "total_tasks" in metrics: + stats["total_tasks"] = len(user_data) + if "completed_tasks" in metrics: + stats["completed_tasks"] = user_data["is_completed"].sum() + if "completion_rate" in metrics: + if len(user_data) > 0: + stats["completion_rate"] = ( + stats["completed_tasks"] / len(user_data) + ) * 100 + else: + stats["completion_rate"] = 0.0 + if "total_estimated_hours" in metrics: + stats["total_estimated_hours"] = user_data[ + "estimated_hours" + ].sum() + if "avg_estimated_hours" in metrics: + stats["avg_estimated_hours"] = user_data[ + "estimated_hours" + ].mean() + if "overdue_tasks" in metrics: + stats["overdue_tasks"] = user_data["is_overdue"].sum() + if "unique_projects" in metrics: + projects = user_data["all_projects"].dropna() + unique_projects = set() + for proj_str in projects: + unique_projects.update(proj_str.split(", ")) + stats["unique_projects"] = len(unique_projects) + user_stats.append(stats) + result = pd.DataFrame(user_stats) + return result + + +# ############################################################################# +# Convenience functions +# ############################################################################# + + +def list_workspace_users( + workspace_name: str, *, access_token: Optional[str] = None +) -> List[str]: + """ + Get all usernames in a workspace. + + Convenience function to quickly see all available users in a + workspace. + + :param workspace_name: name of workspace to query + :param access_token: Asana access token + :return: usernames (display names) + """ + # Initialize analytics instance. + analytics_instance = EnhancedAsanaAnalytics(access_token) + # Get workspace GID. + workspace_gid_local = analytics_instance.get_workspace_gid(workspace_name) + # Get team members. + team_members = analytics_instance.get_team_members(workspace_gid_local) + # Extract usernames. + result = [member["name"] for member in team_members] + return result + + +def get_user_by_name( + workspace_name: str, + username: str, + *, + access_token: Optional[str] = None, +) -> Optional[Dict[str, Any]]: + """ + Get a specific user by their name in a workspace. + + Convenience function to find a user without instantiating the class. + + :param workspace_name: name of workspace to search in + :param username: username or partial name to search for + :param access_token: Asana access token + :return: user with 'gid', 'name', and 'email', or None if not found + """ + # Initialize analytics instance. + analytics_instance = EnhancedAsanaAnalytics(access_token) + # Get workspace GID. + workspace_gid_local = analytics_instance.get_workspace_gid(workspace_name) + # Find user. + result = analytics_instance.get_user_by_name(workspace_gid_local, username) + return result + + +def create_kibana_ready_dataset( + workspace_name: str, + start_date: datetime_lib.datetime, + end_date: datetime_lib.datetime, + *, + project_names: Optional[List[str]] = None, + team_mapping: Optional[Dict[str, str]] = None, + access_token: Optional[str] = None, + user_list: Optional[List[str]] = None, + include_comments: bool = False, +) -> Dict[str, pd.DataFrame]: + """ + Create Kibana-ready datasets with all metrics. + + Generate three DataFrames suitable for Kibana visualization: detailed + task-level data, user-level aggregates, and team-level aggregates. + By default, extracts ALL tasks from ALL users and ALL projects. + The 'project' column can be used for filtering in Kibana. + + :param workspace_name: Asana workspace name to analyze + :param start_date: start date for analysis period + :param end_date: end date for analysis period + :param project_names: project names to filter by + (e.g., ["tech-now", "tech-next"]). If None, extract ALL projects. + When provided, also uses project names as team names + :param team_mapping: usernames to team names. + Alternative to project_names. If both are None, uses project as + team + - Example: {"John Doe": "tech-now", "Jane Smith": "tech-next"} + :param access_token: Asana access token. If None, reads from + environment variable ASANA_ACCESS_TOKEN + :param user_list: specific usernames or GIDs to analyze. If + None, analyze ALL team members + :param include_comments: if True, fetch comment/activity data + (default: False). Set to True to include activity metrics + :return: data with three DataFrames: + - 'tasks': detailed task-level data with sprint/section info + - 'users': user-level aggregated metrics + - 'teams': team-level aggregated metrics + """ + _LOG.info("=" * 70) + _LOG.info("STARTING KIBANA DATASET CREATION") + _LOG.info("=" * 70) + _LOG.info("Workspace: %s", workspace_name) + _LOG.info("Date range: %s to %s", start_date.date(), end_date.date()) + _LOG.info("Project filter: %s", project_names if project_names else "ALL") + _LOG.info("User filter: %s", user_list if user_list else "ALL") + _LOG.info("Include comments: %s", include_comments) + + # Initialize analytics instance. + _LOG.info("Initializing Asana Analytics client...") + analytics = EnhancedAsanaAnalytics(access_token) + + # Get workspace GID. + _LOG.info("Resolving workspace GID for: %s", workspace_name) + workspace_gid = analytics.get_workspace_gid(workspace_name) + _LOG.info("Workspace GID resolved: %s", workspace_gid) + + # Create detailed task DataFrame. + _LOG.info("-" * 70) + _LOG.info("STEP 1/3: Creating detailed task DataFrame...") + _LOG.info("-" * 70) + task_df = analytics.create_task_dataframe( + workspace_gid, + user_identifiers=user_list, + project_names=project_names, + start_date=start_date, + end_date=end_date, + team_mapping=team_mapping, + include_comments=include_comments, + ) + _LOG.info("Task DataFrame created with %d rows", len(task_df)) + + # Create user-level comparison DataFrame. + _LOG.info("-" * 70) + _LOG.info("STEP 2/3: Creating user-level aggregates...") + _LOG.info("-" * 70) + user_df = analytics.create_user_comparison_df(task_df) + _LOG.info("User DataFrame created with %d rows", len(user_df)) + + # Create team-level comparison DataFrame. + _LOG.info("-" * 70) + _LOG.info("STEP 3/3: Creating team-level aggregates...") + _LOG.info("-" * 70) + team_df = analytics.create_team_comparison_df(task_df) + _LOG.info("Team DataFrame created with %d rows", len(team_df)) + + _LOG.info("=" * 70) + _LOG.info("DATASET CREATION COMPLETE!") + _LOG.info("=" * 70) + _LOG.info("Summary:") + _LOG.info(" Tasks: %d rows", len(task_df)) + _LOG.info(" Users: %d rows", len(user_df)) + _LOG.info(" Teams: %d rows", len(team_df)) + _LOG.info("=" * 70) + + result = {"tasks": task_df, "users": user_df, "teams": team_df} + return result + + +def save_to_ndjson( + df: pd.DataFrame, filepath: str, index_name: Optional[str] = None +) -> None: + """ + Save DataFrame to NDJSON format for Kibana/OpenSearch bulk upload. + + Convert DataFrame to newline-delimited JSON format suitable for + Elasticsearch/OpenSearch bulk API ingestion. + + :param df: data to save + :param filepath: output file path (e.g., 'asana_tasks.ndjson') + :param index_name: optional index name to include in bulk action + metadata. If None, only document data is written + """ + _LOG.info("Saving DataFrame to NDJSON: %s", filepath) + _LOG.info("DataFrame shape: %d rows, %d columns", len(df), len(df.columns)) + + # Convert DataFrame to records (list of dicts). + records = df.to_dict(orient="records") + + # Open file for writing. + with open(filepath, "w") as f: + for record in records: + # Convert timestamps to ISO format strings. + for key, value in record.items(): + if pd.isna(value): + # Convert NaN/None to null. + record[key] = None + elif isinstance(value, pd.Timestamp): + # Convert pandas Timestamp to ISO string. + record[key] = value.isoformat() + + if index_name: + # Write bulk API metadata line. + action = {"index": {"_index": index_name}} + f.write(json.dumps(action) + "\n") + + # Write document data line. + f.write(json.dumps(record) + "\n") + + _LOG.info("Successfully saved %d records to %s", len(records), filepath) + + +def save_datasets_for_kibana( + datasets: Dict[str, pd.DataFrame], + output_dir: str = ".", + *, + use_ndjson: bool = True, + index_prefix: str = "asana", +) -> Dict[str, str]: + """ + Save all datasets to files for Kibana ingestion. + + Save task, user, and team DataFrames to either NDJSON or CSV format + for Kibana/OpenSearch ingestion. + + :param datasets: dictionary with 'tasks', 'users', 'teams' + DataFrames from create_kibana_ready_dataset() + :param output_dir: directory to save files (default: current + directory) + :param use_ndjson: if True, save as NDJSON format. If False, save as + CSV (default: True) + :param index_prefix: prefix for index names when using NDJSON + (default: 'asana') + :return: dataset names to saved file paths + """ + _LOG.info("=" * 70) + _LOG.info("SAVING DATASETS FOR KIBANA") + _LOG.info("=" * 70) + _LOG.info("Output directory: %s", output_dir) + _LOG.info("Format: %s", "NDJSON" if use_ndjson else "CSV") + + saved_files = {} + extension = "ndjson" if use_ndjson else "csv" + + for dataset_name, df in datasets.items(): + # Construct file path. + filename = "{}_{}_{}.{}".format( + index_prefix, dataset_name, "kibana", extension + ) + filepath = "{}/{}".format(output_dir, filename) + + _LOG.info("Saving %s dataset (%d rows)...", dataset_name, len(df)) + + if use_ndjson: + # Save as NDJSON with index name. + index_name = "{}-{}".format(index_prefix, dataset_name) + save_to_ndjson(df, filepath, index_name=index_name) + else: + # Save as CSV. + df.to_csv(filepath, index=False) + _LOG.info("Saved to CSV: %s", filepath) + + saved_files[dataset_name] = filepath + + _LOG.info("=" * 70) + _LOG.info("ALL DATASETS SAVED!") + _LOG.info("=" * 70) + for dataset_name, filepath in saved_files.items(): + _LOG.info(" %s: %s", dataset_name, filepath) + _LOG.info("=" * 70) + + result = saved_files + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py new file mode 100644 index 000000000..318897d3e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py @@ -0,0 +1,2110 @@ +""" +Import as: + +import helpers.github_utils as hgitutil +""" + +import collections +import datetime +import functools +import itertools +import json +import logging +import os +import time +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple + +import github +import matplotlib.pyplot as plt +import pandas as pd +from tqdm import tqdm + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): Why not using helpers.hcache_simple as hcacsimp. +def github_cached(cache_type: str = "json", write_through: bool = True): + """ + Cache decorator specifically for GitHub API functions. + + Automatically excludes the 'client' parameter (first positional arg) + from cache keys since client instances change across sessions. + + :param cache_type: Type of cache ('json' or 'pickle') + :param write_through: If True, write to disk after each cache update + :return: Decorated function with caching + """ + + def decorator(func: Callable) -> Callable: + # Get function name for cache. + func_name = func.__name__ + if func_name.endswith("_intrinsic"): + func_name = func_name[: -len("_intrinsic")] + # Set cache type property. + existing_type = hcacsimp.get_cache_property(func_name, "type") + if not existing_type: + hcacsimp.set_cache_property(func_name, "type", cache_type) + + # Create a cached version that only uses args after client. + @functools.wraps(func) + def wrapper(client, *args, **kwargs): + # Create cache key from everything EXCEPT client. + cache_key = json.dumps( + {"args": args, "kwargs": kwargs}, + sort_keys=True, + default=str, + ) + # Get cache. + cache = hcacsimp.get_cache(func_name) + # Check if we have cached value. + if cache_key in cache: + _LOG.debug("Cache hit for %s", func_name) + return cache[cache_key] + # Cache miss - call the actual function. + _LOG.debug("Cache miss for %s, fetching from API", func_name) + result = func(client, *args, **kwargs) + # Store in cache + cache[cache_key] = result + # Write to disk if enabled. + if write_through: + hcacsimp.flush_cache_to_disk(func_name) + return result + + return wrapper + + return decorator + + +# ############################################################################# +# GitHubAPI +# ############################################################################# + + +class GitHubAPI: + """ + Initialize and manage authentication with the GitHub API using PyGithub. + """ + + def __init__( + self, + *, + access_token: Optional[str] = None, + base_url: Optional[str] = None, + ): + """ + Initialize the GitHub API client. + + :param access_token: GitHub personal access token; if not provided, it + is fetched from the environment variable `GITHUB_ACCESS_TOKEN` + :param base_url: optional custom GitHub Enterprise base URL + """ + self.access_token = access_token or os.getenv("GITHUB_ACCESS_TOKEN") + if not self.access_token: + raise ValueError( + "GitHub Access Token is required. Set it as an environment variable or pass it explicitly." + ) + auth = github.Auth.Token(self.access_token) + self.github = ( + github.Github(base_url=base_url, auth=auth) + if base_url + else github.Github(auth=auth) + ) + + def get_client(self) -> github.Github: + """ + Return the authenticated GitHub client. + + :return: an instance of the authenticated PyGithub client + """ + return self.github + + def close_connection(self) -> None: + """ + Close the GitHub API connection. + """ + self.github.close() + + +# ############################################################################# +# Utility APIs +# ############################################################################# + + +def get_repo_names(client: github.Github, org_name: str) -> Dict[str, List[str]]: + """ + Retrieve a list of repositories under a specific organization. + + :param client: authenticated instance of the PyGithub client + :param org_name: name of the GitHub organization + :return: a dictionary containing: + - owner: name of the organization + - repositories: repository names + """ + owner = client.get_organization(org_name) + hdbg.dassert_is_not( + owner, + None, + "'%s' is not a valid GitHub organization", + org_name, + ) + repos = [repo.name for repo in owner.get_repos()] + result = {"owner": org_name, "repositories": repos} + return result + + +def get_github_contributors( + client: github.Github, repo_names: List[str] +) -> Dict[str, List[str]]: + """ + Retrieve GitHub usernames contributing to specified repositories. + + :param client: authenticated instance of the PyGithub client + :param repo_names: repository names in the format 'owner/repo' to fetch + contributor usernames + :return: a dictionary containing: + - repository: repository name + - contributors: contributor GitHub usernames + """ + result = {} + for repo_name in repo_names: + repo = client.get_repo(repo_name) + hdbg.dassert_is_not(repo, None, "Could not fetch repo: %s", repo_name) + contributors = [ + contributor.login for contributor in repo.get_contributors() + ] + result[repo_name] = contributors + return result + + +def normalize_period_to_utc( + period: Optional[Tuple[datetime.datetime, datetime.datetime]], +) -> Tuple[Optional[datetime.datetime], Optional[datetime.datetime]]: + """ + Convert a datetime period to UTC and ensure both dates are timezone-aware. + + :param period: start and end datetime + :return: UTC-aware start and end datetime, or (None, None) if period + is None + """ + + def to_utc(dt: Optional[datetime.datetime]) -> Optional[datetime.datetime]: + res = None + if dt is None: + return res + else: + res = ( + dt.replace(tzinfo=datetime.timezone.utc) + if dt.tzinfo is None + else dt.astimezone(datetime.timezone.utc) + ) + return res + + norm = ( + tuple(to_utc(dt) for dt in period) + if period is not None + else (None, None) + ) + return norm + + +# ############################################################################# +# Global Metrics APIs +# ############################################################################# + + +def get_total_commits( + client: github.Github, + org_name: str, + *, + usernames: Optional[List[str]] = None, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, Any]: + """ + Fetch the number of commits made in the repositories of the specified + organization, optionally filtered by GitHub usernames and a specified time + period. + + :param client: authenticated instance of the PyGithub client + :param org_name: name of the GitHub organization + :param usernames: GitHub usernames to filter commits; if None, fetches for + all users + :param period: start and end datetime for filtering commits + :return: a dictionary containing: + - total_commits (int): total number of commits across all repositories + - period (str): the time range considered + - commits_per_repository (Dict[str, int]): repository names as keys and + commit counts as values + """ + # Retrieve organization repositories + repos_info = get_repo_names(client, org_name) + hdbg.dassert_in( + "repositories", + repos_info, + "Missing 'repositories' key in get_repo_names() output", + ) + repositories = repos_info["repositories"] + total_commits = 0 + commits_per_repository = {} + since, until = period if period else (None, None) + for repo_name in tqdm( + repositories, desc="Processing repositories", unit="repo" + ): + repo = client.get_repo(f"{org_name}/{repo_name}") + hdbg.dassert_is_not(repo, None, "Could not retrieve repo: %s", repo_name) + repo_commit_count = 0 + if usernames: + for username in usernames: + commits = repo.get_commits( + author=username, since=since, until=until + ) + hdbg.dassert_is_not( + commits, + None, + "Failed to get commits by '%s' in %s", + username, + repo_name, + ) + repo_commit_count += commits.totalCount + else: + commits = repo.get_commits(since=since, until=until) + hdbg.dassert_is_not( + commits, None, "Failed to get commits in %s", repo_name + ) + repo_commit_count = commits.totalCount + commits_per_repository[repo_name] = repo_commit_count + total_commits += repo_commit_count + result = { + "total_commits": total_commits, + "period": f"{since} to {until}" if since and until else "All time", + "commits_per_repository": commits_per_repository, + } + return result + + +def get_total_prs( + client: github.Github, + org_name: str, + *, + usernames: Optional[List[str]] = None, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, + state: str = "all", +) -> Dict[str, Any]: + """ + Fetch the number of pull requests made in the repositories of the specified + organization, optionally filtered by GitHub usernames, a specified time + period, and the state of the pull requests. + + :param client: authenticated instance of the PyGithub client + :param org_name: name of the GitHub organization + :param usernames: GitHub usernames to filter pull requests; if None, fetches + for all users + :param period: start and end datetime for filtering pull requests + :param state: the state of the pull requests to fetch; can be 'open', 'closed', or 'all' + :return: a dictionary containing: + - total_prs (int): total number of pull requests + - period (str): the time range considered + - prs_per_repository (Dict[str, int]): repository names as keys and pull + request counts as values + """ + # Retrieve repositories for the organization + repos_info = get_repo_names(client, org_name) + hdbg.dassert_in( + "repositories", repos_info, "Missing 'repositories' key in repo info" + ) + repositories = repos_info["repositories"] + total_prs = 0 + prs_per_repository = {} + since, until = normalize_period_to_utc(period) + for repo_name in tqdm( + repositories, desc="Processing repositories", unit="repo" + ): + repo = client.get_repo(f"{org_name}/{repo_name}") + hdbg.dassert_is_not( + repo, None, "Could not retrieve repository: %s", repo_name + ) + repo_pr_count = 0 + pulls = repo.get_pulls(state=state) + for pr in pulls: + hdbg.dassert_is_not( + pr, None, "PR could not be fetched in %s", repo_name + ) + if usernames and pr.user.login not in usernames: + continue + pr_created_at = ( + pr.created_at.replace(tzinfo=datetime.timezone.utc) + if pr.created_at.tzinfo is None + else pr.created_at.astimezone(datetime.timezone.utc) + ) + if since and until and not (since <= pr_created_at <= until): + continue + repo_pr_count += 1 + prs_per_repository[repo_name] = repo_pr_count + total_prs += repo_pr_count + result = { + "total_prs": total_prs, + "period": f"{since} to {until}" if since and until else "All time", + "prs_per_repository": prs_per_repository, + } + return result + + +def get_prs_not_merged( + client: github.Github, + org_name: str, + *, + usernames: Optional[List[str]] = None, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, Any]: + """ + Fetch the count of closed but unmerged pull requests in the specified + repositories and by the specified GitHub users within a given period. + + :param client: authenticated instance of the PyGithub client + :param org_name: name of the GitHub organization + :param usernames: GitHub usernames to filter pull requests; if None, fetches for all users + :param period: start and end datetime for filtering pull requests + :return: a dictionary containing: + - prs_not_merged (int): total number of closed but unmerged pull requests + - period (str): the time range considered + - prs_per_repository (Dict[str, int]): repository names as keys and + unmerged pull request counts as values + """ + # Fetch all repositories in the org. + repos_info = get_repo_names(client, org_name) + hdbg.dassert_in( + "repositories", + repos_info, + "Missing 'repositories' in get_repo_names() output", + ) + repositories = repos_info["repositories"] + total_unmerged_prs = 0 + prs_per_repository = {} + since, until = normalize_period_to_utc(period) + for repo_name in tqdm( + repositories, desc="Processing repositories", unit="repo" + ): + # Fetch repo object. + repo = client.get_repo(f"{org_name}/{repo_name}") + hdbg.dassert_is_not( + repo, + None, + "Could not fetch repo: %s/%s", + org_name, + repo_name, + ) + repo_unmerged_pr_count = 0 + issues = repo.get_issues(state="closed", since=since) + pulls = [] + for issue in issues: + if issue.pull_request: + pull = repo.get_pull(issue.number) + hdbg.dassert_is_not( + pull, + None, + "Could not fetch pull request #%d in %s", + issue.number, + repo_name, + ) + pulls.append(pull) + for pr in pulls: + _LOG.debug("Processing PR #%d from %s", pr.number, repo_name) + pr_created_at = pr.created_at or datetime.datetime.min + pr_created_at = ( + pr_created_at.replace(tzinfo=datetime.timezone.utc) + if pr_created_at.tzinfo is None + else pr_created_at.astimezone(datetime.timezone.utc) + ) + if pr.merged: + continue + if usernames and pr.user.login not in usernames: + continue + if since and until and not (since <= pr_created_at <= until): + continue + repo_unmerged_pr_count += 1 + prs_per_repository[repo_name] = repo_unmerged_pr_count + total_unmerged_prs += repo_unmerged_pr_count + result = { + "prs_not_merged": total_unmerged_prs, + "period": f"{since} to {until}" if since and until else "All time", + "prs_per_repository": prs_per_repository, + } + return result + + +# ############################################################################# +# Individual User Metrics APIs +# ############################################################################# + + +def get_commits_by_user( + client: github.Github, + username: str, + org_name: str, + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, Any]: + """ + Retrieve the number of commits made by a specific GitHub user. + + :param client: authenticated instance of the PyGithub client + :param username: GitHub username to fetch commit data for + :param org_name: name of the GitHub organization + :param period: start and end datetime for filtering commits + :return: a dictionary containing: + - user (str): GitHub username + - total_commits (int): total number of commits made by the user + - period (str): the time range considered + - commits_per_repository (Dict[str, int]): repository names as keys and + commit counts as values + """ + result = get_total_commits( + client=client, org_name=org_name, usernames=[username], period=period + ) + res_dict = { + "user": username, + "total_commits": result["total_commits"], + "period": result["period"], + "commits_per_repository": result["commits_per_repository"], + } + return res_dict + + +def get_prs_by_user( + client: github.Github, + username: str, + org_name: str, + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, + state: str = "all", +) -> Dict[str, Any]: + """ + Fetch the number of pull requests created by a specific GitHub user in the + given repositories and time period. + + :param client: authenticated instance of the PyGithub client + :param username: GitHub username to fetch pull request data for + :param org_name: name of the GitHub organization + :param period: start and end datetime for filtering pull requests + :param state: state of the pull requests to fetch; can be 'open', 'closed', + or 'all' + :return: a dictionary containing: + - user (str): GitHub username + - total_prs (int): total number of pull requests created + - period (str): the time range considered + - prs_per_repository (Dict[str, int]): repository names as keys and pull + request counts as values + """ + result = get_total_prs( + client=client, + org_name=org_name, + usernames=[username], + period=period, + state=state, + ) + res_dict = { + "user": username, + "total_prs": result["total_prs"], + "period": result["period"], + "prs_per_repository": result["prs_per_repository"], + } + return res_dict + + +def get_prs_not_merged_by_user( + client: github.Github, + username: str, + org_name: str, + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, Any]: + """ + Fetch the number of closed but unmerged pull requests created by a specific + GitHub user in the given repositories and time period. + + :param client: authenticated instance of the PyGithub client + :param username: GitHub username to fetch unmerged pull request data for + :param org_name: name of the GitHub organization + :param period: start and end datetime for filtering pull requests + :return: a dictionary containing: + - user (str): GitHub username + - prs_not_merged (int): total number of closed but unmerged pull requests + - period (str): the time range considered + - prs_per_repository (Dict[str, int]): repository names as keys and + unmerged PR counts as values + """ + result = get_prs_not_merged( + client=client, org_name=org_name, usernames=[username], period=period + ) + res_dict = { + "user": username, + "prs_not_merged": result["prs_not_merged"], + "period": result["period"], + "prs_per_repository": result["prs_per_repository"], + } + return res_dict + + +def days_between( + period: Tuple[datetime.datetime, datetime.datetime], +) -> List[datetime.date]: + """ + Generate each date in time span. + + :param period: start and end datetime + :return: date span + """ + start_date = period[0].date() + end_date = period[1].date() + days: List[datetime.date] = [] + current = start_date + while current <= end_date: + days.append(current) + current += datetime.timedelta(days=1) + _LOG.debug("Generated %d days in period.", len(days)) + return days + + +@github_cached(cache_type="json", write_through=True) +def get_commit_datetimes_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: Optional[str], + since: datetime.datetime, + until: datetime.datetime, +) -> List[str]: + """ + Fetch commit timestamps for user in repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: commit timestamps in ISO format + """ + timestamps: List[str] = [] + # Fetch the repository object. + repo_obj = client.get_repo(f"{org}/{repo}") + # Retrieve all commits in the specified time period. + commits = repo_obj.get_commits(since=since, until=until) + # Iterate through each commit to find ones by the specified user. + for c in commits: + # Skip commits with incomplete metadata. + if not c.commit or not c.commit.author or not c.commit.author.date: + continue + # Extract author and committer logins. + author_login = c.author.login if c.author else None + committer_login = c.committer.login if c.committer else None + # Check if this commit belongs to the target user. + if username in (author_login, committer_login): + # Convert commit date to UTC timezone. + dt = c.commit.author.date + dt_utc = ( + dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) + ) + # Add timestamp to results list. + timestamps.append(dt_utc.isoformat()) + # Log the results summary. + if not timestamps: + _LOG.info( + "No commits found for %s/%s user=%s in %s to %s - possibly outdated or inactive.", + org, + repo, + username, + since.date(), + until.date(), + ) + else: + _LOG.info( + "Fetched %d commits for %s/%s user=%s.", + len(timestamps), + org, + repo, + username, + ) + return timestamps + + +@github_cached(cache_type="json", write_through=True) +def get_pr_datetimes_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: str, + since: datetime.datetime, + until: datetime.datetime, +) -> List[str]: + """ + Fetch pull request timestamps for user in repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: PR created timestamps in ISO format + """ + timestamps: List[str] = [] + # Format dates for GitHub search query. + since_date = since.date().isoformat() + until_date = until.date().isoformat() + # Build search query for PRs authored by the user. + query = f"repo:{org}/{repo} is:pr author:{username} created:{since_date}..{until_date}" + # Execute the search query. + results = client.search_issues(query) + # Process each PR from search results. + for issue in results: + # Convert PR creation date to UTC timezone. + dt = issue.created_at + dt_utc = dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) + # Add timestamp to results list. + timestamps.append(dt_utc.isoformat()) + # Log the results summary. + if not timestamps: + _LOG.debug( + "No PRs found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", + org, + repo, + username, + since_date, + until_date, + ) + else: + _LOG.info( + "Found %d PRs for %s/%s user=%s.", + len(timestamps), + org, + repo, + username, + ) + return timestamps + + +@github_cached(cache_type="json", write_through=True) +def get_issue_datetimes_by_repo_intrinsic( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> Dict[str, List[str]]: + """ + Fetch opened and closed issue timestamps for a user in a repo over a given + period. + + :param client: authenticated PyGithub client + :param org: GitHub organization name + :param repo: repository name + :param username: GitHub username + :param period: time window to filter issues + :return: 'assigned' and 'closed' issues containing ISO timestamps + """ + # Extract and format the time period. + since_date = period[0].date().isoformat() + until_date = period[1].date().isoformat() + # Build search query for issues assigned to the user. + query = ( + f"repo:{org}/{repo} type:issue assignee:{username} " + f"created:{since_date}..{until_date}" + ) + # Execute the search query. + issues = client.search_issues(query) + # Initialize lists for assigned and closed issues. + assigned: List[str] = [] + closed: List[str] = [] + # Process each issue from search results. + for issue in issues: + # Skip pull requests that appear in issue search. + if issue.pull_request is not None: + continue + # Add issue creation timestamp to assigned list. + assigned.append(issue.created_at.isoformat()) + # Check if issue was closed within the period. + if issue.closed_at: + # Convert closed date to UTC timezone. + closed_dt = issue.closed_at + dt_utc = ( + closed_dt + if closed_dt.tzinfo + else closed_dt.replace(tzinfo=datetime.timezone.utc) + ) + # Add to closed list if within the specified period. + if period[0] <= dt_utc <= period[1]: + closed.append(dt_utc.isoformat()) + # Log the results summary. + _LOG.info( + "Found %d opened and %d closed issues for %s/%s user=%s", + len(assigned), + len(closed), + org, + repo, + username, + ) + # Return the results dictionary. + result_dict = {"assigned": assigned, "closed": closed} + return result_dict + + +@github_cached(cache_type="json", write_through=True) +def get_loc_stats_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: str, + since: datetime.datetime, + until: datetime.datetime, +) -> List[Dict[str, int]]: + """ + Fetch commit LOC stats for user in repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: additions, deletions in code + """ + stats_list: List[Dict[str, int]] = [] + # Fetch the repository object. + repo_obj = client.get_repo(f"{org}/{repo}") + # Retrieve all commits in the specified time period. + commits = repo_obj.get_commits(since=since, until=until) + # Track number of commits processed for safety limit. + commit_count = 0 + # Process each commit to extract LOC statistics. + for c in commits: + # Extract author and committer logins. + author_login = c.author.login if c.author else None + committer_login = c.committer.login if c.committer else None + # Skip commits not by the target user. + if username not in (author_login, committer_login): + continue + # Fetch commit statistics. + s = c.stats + # Skip if statistics are not available. + if s is None: + _LOG.debug("No stats available for commit %s", c.sha) + continue + # Convert commit date to UTC timezone. + dt = c.commit.author.date + dt_utc = dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) + iso = dt_utc.date().isoformat() + # Add statistics to results list. + stats_list.append( + {"date": iso, "additions": s.additions, "deletions": s.deletions} + ) + # Increment commit counter and check safety limit. + commit_count += 1 + if commit_count > 1000: + _LOG.warning("Processed 1000 commits, stopping to avoid timeout") + break + # Log the results summary. + if not stats_list: + _LOG.info( + "No LOC stats found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", + org, + repo, + username, + since.date(), + until.date(), + ) + else: + _LOG.info( + "Fetched LOC stats for %s/%s user=%s entries=%d.", + org, + repo, + username, + len(stats_list), + ) + return stats_list + + +@github_cached(cache_type="json", write_through=True) +def get_issue_comment_datetimes_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: str, + since: datetime.datetime, + until: datetime.datetime, +) -> List[str]: + """ + Fetch issue comment timestamps for user in repo over period using search + API. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: comment timestamps in ISO format + """ + timestamps: List[str] = [] + # Format dates for GitHub search query. + since_date = since.date().isoformat() + until_date = until.date().isoformat() + # Build search query for issues where user has commented. + query = f"repo:{org}/{repo} is:issue commenter:{username} updated:{since_date}..{until_date}" + # Execute the search query. + results = client.search_issues(query) + # Process each issue to find user's comments. + for issue in results: + # Skip pull requests that appear in issue search. + if issue.pull_request: + continue + # Fetch all comments for this issue. + comments = issue.get_comments() + # Filter comments by the target user. + for comment in comments: + # Skip comments by other users. + if comment.user.login != username: + continue + # Convert comment date to UTC timezone. + comment_dt = comment.created_at + comment_dt_utc = ( + comment_dt + if comment_dt.tzinfo + else comment_dt.replace(tzinfo=datetime.timezone.utc) + ) + # Add timestamp if within the specified period. + if since <= comment_dt_utc <= until: + timestamps.append(comment_dt_utc.isoformat()) + # Log the results summary. + if not timestamps: + _LOG.info( + "No issue comments found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", + org, + repo, + username, + since.date(), + until.date(), + ) + else: + _LOG.info( + "Fetched %d issue comments for %s/%s user=%s.", + len(timestamps), + org, + repo, + username, + ) + return timestamps + + +@github_cached(cache_type="json", write_through=True) +def get_pr_review_datetimes_by_repo_period_intrinsic( + client, + org: str, + repo: str, + username: str, + since: datetime.datetime, + until: datetime.datetime, +) -> List[str]: + """ + Fetch PR review timestamps for user in repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param since: start datetime + :param until: end datetime + :return: review timestamps in ISO format + """ + timestamps: List[str] = [] + # Fetch the repository object. + repo_obj = client.get_repo(f"{org}/{repo}") + # Format dates for GitHub search query. + since_date = since.date().isoformat() + until_date = until.date().isoformat() + # Build search query for PRs reviewed by the user. + query = f"repo:{org}/{repo} is:pr reviewed-by:{username} updated:{since_date}..{until_date}" + # Execute the search query. + results = client.search_issues(query) + # Process each PR to find user's reviews. + for issue in results: + # Fetch the full PR object. + pr = repo_obj.get_pull(issue.number) + # Fetch all reviews for this PR. + reviews = pr.get_reviews() + # Filter reviews by the target user. + for review in reviews: + # Skip reviews by other users. + if review.user.login != username: + continue + # Convert review date to UTC timezone. + review_dt = review.submitted_at + review_dt_utc = ( + review_dt + if review_dt.tzinfo + else review_dt.replace(tzinfo=datetime.timezone.utc) + ) + # Add timestamp if within the specified period. + if since <= review_dt_utc <= until: + timestamps.append(review_dt_utc.isoformat()) + # Log the results summary. + if not timestamps: + _LOG.info( + "No PR reviews found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", + org, + repo, + username, + since.date(), + until.date(), + ) + else: + _LOG.info( + "Fetched %d PR reviews for %s/%s user=%s.", + len(timestamps), + org, + repo, + username, + ) + return timestamps + + +def build_daily_commit_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily commit counts for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, commits, repo, user + """ + since, until = period + timestamps = get_commit_datetimes_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) + df["date"] = df.ts.dt.date + daily = df.groupby("date").size().reset_index(name="commits") + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + daily["commits"] = daily["commits"].fillna(0).astype(int) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily commit DataFrame rows=%d.", len(daily)) + return daily + + +def slice_by_date(df, start, end, date_col="date"): + """ + Slice DataFrame by date range. + + :param df: input DataFrame + :param start: start date (inclusive) + :param end: end date (inclusive) + :param date_col: name of the date column in df + :return: filtered DataFrame + """ + out = df.copy() + out[date_col] = pd.to_datetime(out[date_col], errors="coerce") + res = out.loc[(out[date_col] >= start) & (out[date_col] <= end)].copy() + return res + + +def build_daily_issue_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily assigned / closed issue counts for a user-repo pair. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with columns date, issues_assigned, issues_closed, + repo, user + """ + issue_data = get_issue_datetimes_by_repo_intrinsic( + client, org, repo, username, period + ) + df_assigned = pd.DataFrame( + {"ts": pd.to_datetime(issue_data["assigned"]), "issues_assigned": 1} + ) + df_assigned["date"] = df_assigned.ts.dt.date + df_closed = pd.DataFrame( + {"ts": pd.to_datetime(issue_data["closed"]), "issues_closed": 1} + ) + df_closed["date"] = df_closed.ts.dt.date + # Daily counts. + daily_assigned = ( + df_assigned.groupby("date")["issues_assigned"].sum().reset_index() + ) + daily_closed = df_closed.groupby("date")["issues_closed"].sum().reset_index() + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily_assigned, on="date", how="left").merge( + daily_closed, on="date", how="left" + ) + daily[["issues_assigned", "issues_closed"]] = ( + daily[["issues_assigned", "issues_closed"]].fillna(0).astype(int) + ) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily issue DataFrame rows=%d.", len(daily)) + return daily + + +def build_daily_pr_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily PR counts for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, prs, repo, user + """ + since, until = period + timestamps = get_pr_datetimes_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) + df["date"] = df.ts.dt.date + daily = df.groupby("date").size().reset_index(name="prs") + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + daily["prs"] = daily["prs"].fillna(0).astype(int) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily PR DataFrame rows=%d.", len(daily)) + return daily + + +def build_daily_loc_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily LOC additions and deletions for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, additions, deletions, repo, user + """ + since, until = period + # Fetch raw LOC stats list. + stats_list = get_loc_stats_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + # If no stats, return zeros for full range. + if not stats_list: + all_days = pd.DataFrame({"date": days_between(period)}) + # Initialize zeroes. + all_days["additions"] = all_days["date"].apply(lambda _: 0) + all_days["deletions"] = all_days["date"].apply(lambda _: 0) + # Format signs. + all_days["additions"] = ( + all_days["additions"].astype(str).apply(lambda x: "+" + x) + ) + all_days["deletions"] = ( + all_days["deletions"].astype(str).apply(lambda x: "-" + x) + ) + # Add context. + all_days["repo"] = repo + all_days["user"] = username + # TODO(*): Logging-248: Use `_LOG.debug()` instead of `_LOG.info()` for tracing execution. + _LOG.debug("Built daily LOC DataFrame rows=%d (no data).", len(all_days)) + return all_days + # Otherwise build from stats_list. + df = pd.DataFrame(stats_list) + df["date"] = pd.to_datetime(df["date"]).dt.date + # Sum per date. + daily = df.groupby("date")[["additions", "deletions"]].sum().reset_index() + # Ensure full date coverage. + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + # Fill missing and integerize. + daily[["additions", "deletions"]] = ( + daily[["additions", "deletions"]].fillna(0).astype(int) + ) + # Apply sign formatting. + daily["additions"] = daily["additions"].astype(str).apply(lambda x: "+" + x) + daily["deletions"] = daily["deletions"].astype(str).apply(lambda x: "-" + x) + # Add context. + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily LOC DataFrame rows=%d.", len(daily)) + return daily + + +def get_total_loc_for_period( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> Dict[str, int]: + """ + Get total LOC additions and deletions for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: additions and deletions totals + """ + since, until = period + stats = get_loc_stats_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + total_add = sum(item["additions"] for item in stats) + total_del = sum(item["deletions"] for item in stats) + _LOG.info( + "Total LOC for %s/%s user=%s => +%d -%d.", + org, + repo, + username, + total_add, + total_del, + ) + return {"additions": total_add, "deletions": total_del} + + +def prefetch_periodic_user_repo_data( + client, + org: str, + repos: List[str], + users: List[str], + period: Tuple[datetime.datetime, datetime.datetime], +) -> None: + """ + Prefetch and cache commits, PRs, LOC, issues, comments, and reviews for + each user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repos: repository names + :param users: GitHub usernames + :param period: start and end datetime objects + """ + # Validate input types. + if not isinstance(org, str): + raise ValueError(f"org must be a string, got {type(org).__name__}") + if not isinstance(repos, list) or not all(isinstance(r, str) for r in repos): + raise ValueError("repos must be a list of strings") + if not isinstance(users, list) or not all(isinstance(u, str) for u in users): + raise ValueError("users must be a list of strings") + # Initialize timer and pair up (repo, user) combinations. + start = time.time() + count = 0 + since, until = period + user_repo_pairs = list(itertools.product(repos, users)) + # Prefetch and cache GitHub data for each user-repo pair. + for repo, user in tqdm(user_repo_pairs, desc="Prefetching user-repo data"): + commits = get_commit_datetimes_by_repo_period_intrinsic( + client, org, repo, user, since, until + ) + prs = get_pr_datetimes_by_repo_period_intrinsic( + client, org, repo, user, since, until + ) + locs = get_loc_stats_by_repo_period_intrinsic( + client, org, repo, user, since, until + ) + issues = get_issue_datetimes_by_repo_intrinsic( + client, org, repo, user, period + ) + # issue_comments = get_issue_comment_datetimes_by_repo_period_intrinsic( + # client, org, repo, user, since, until + # ) + # pr_reviews = get_pr_review_datetimes_by_repo_period_intrinsic( + # client, org, repo, user, since, until + # ) + issue_comments = [] + pr_reviews = [] + _LOG.info( + "%s/%s: %d commits, %d PRs, %d LOC entries, %d issues assigned, " + "%d closed, %d issue comments, %d PR reviews", + repo, + user, + len(commits), + len(prs), + len(locs), + len(issues["assigned"]), + len(issues["closed"]), + len(issue_comments), + len(pr_reviews), + ) + count += 1 + # Report overall prefetch duration. + elapsed = time.time() - start + _LOG.info( + "Prefetched %d user-repo combos in %.2f seconds for period %s to %s.", + count, + elapsed, + period[0], + period[1], + ) + + +def build_daily_issue_comment_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily issue comment counts for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, issue_comments, repo, user + """ + since, until = period + timestamps = get_issue_comment_datetimes_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) + df["date"] = df.ts.dt.date + daily = df.groupby("date").size().reset_index(name="issue_comments") + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + daily["issue_comments"] = daily["issue_comments"].fillna(0).astype(int) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily issue comment DataFrame rows=%d.", len(daily)) + return daily + + +def build_daily_pr_review_df( + client, + org: str, + repo: str, + username: str, + period: Tuple[datetime.datetime, datetime.datetime], +) -> pd.DataFrame: + """ + Build daily PR review counts for user and repo over period. + + :param client: authenticated PyGithub client + :param org: GitHub org name + :param repo: repository name + :param username: GitHub username + :param period: start and end datetime objects + :return: data with date, pr_reviews, repo, user + """ + since, until = period + timestamps = get_pr_review_datetimes_by_repo_period_intrinsic( + client, org, repo, username, since, until + ) + df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) + df["date"] = df.ts.dt.date + daily = df.groupby("date").size().reset_index(name="pr_reviews") + all_days = pd.DataFrame({"date": days_between(period)}) + daily = all_days.merge(daily, on="date", how="left") + daily["pr_reviews"] = daily["pr_reviews"].fillna(0).astype(int) + daily["repo"] = repo + daily["user"] = username + _LOG.debug("Built daily PR review DataFrame rows=%d.", len(daily)) + return daily + + +def collect_all_metrics( + client, + org: str, + repos: List[str], + users: List[str], + period: Tuple[datetime.datetime, datetime.datetime], + *, + skip_issue_comments: bool = True, + skip_pr_reviews: bool = True, +) -> pd.DataFrame: + """ + Collect daily metrics for all user-repo combinations. + + :param client: authenticated PyGithub client + :param org: Github org name + :param repos: repository names + :param users: github usernames + :param period: start and end datetime + :param skip_issue_comments: skip fetching issue comments (for speed) + :param skip_pr_reviews: skip fetching PR reviews (for speed) + :return: concatenated data with date, commits, prs, additions, + deletions, issues_assigned, issues_closed, issue_comments + (optional), pr_reviews (optional), repo, user + """ + combined_frames: List[pd.DataFrame] = [] + # Track progress. + total_combinations = len(repos) * len(users) + current = 0 + for repo in repos: + # Ensure repo is a string. + if not isinstance(repo, str): + raise ValueError(f"Expected repo to be a string but got {repo!r}") + for user in users: + # Ensure user is a string. + if not isinstance(user, str): + raise ValueError( + f"Expected user to be a string but got {user!r}" + ) + current += 1 + _LOG.info( + "Processing %d/%d: %s/%s", + current, + total_combinations, + repo, + user, + ) + # Build each metric DataFrame. + df_c = build_daily_commit_df(client, org, repo, user, period) + df_p = build_daily_pr_df(client, org, repo, user, period) + df_l = build_daily_loc_df(client, org, repo, user, period) + df_i = build_daily_issue_df(client, org, repo, user, period) + # Start merging with required metrics. + df = ( + df_c.merge(df_p, on=["date", "repo", "user"], how="inner") + .merge(df_l, on=["date", "repo", "user"], how="inner") + .merge(df_i, on=["date", "repo", "user"], how="inner") + ) + # Optionally add issue comments. + if not skip_issue_comments: + df_ic = build_daily_issue_comment_df( + client, org, repo, user, period + ) + df = df.merge(df_ic, on=["date", "repo", "user"], how="inner") + else: + # Add dummy column for consistency. + df["issue_comments"] = 0 + # Optionally add PR reviews. + if not skip_pr_reviews: + df_pr = build_daily_pr_review_df(client, org, repo, user, period) + df = df.merge(df_pr, on=["date", "repo", "user"], how="inner") + else: + # Add dummy column for consistency. + df["pr_reviews"] = 0 + combined_frames.append(df) + # Concatenate all DataFrames or return empty. + combined = ( + pd.concat(combined_frames, ignore_index=True) + if combined_frames + else pd.DataFrame() + ) + _LOG.info("Collected metrics for %d daily records", len(combined)) + return combined + + +def summarize_user_metrics_for_repo( + combined: pd.DataFrame, repo: str +) -> pd.DataFrame: + """ + Summarize total commits, PRs, LOC, issues, comments, and reviews per user + in a specific repository. + + :param combined: data with all metrics + :param repo: repository name + :return: data with columns user, commits, prs, additions, deletions, + issues_assigned, issues_closed, issue_comments, pr_reviews + """ + df = combined[combined["repo"] == repo].copy() + df["additions"] = df["additions"].str.replace("+", "").astype(int) + df["deletions"] = df["deletions"].str.replace("-", "").astype(int) + df["issues_assigned"] = df["issues_assigned"].astype(int) + df["issues_closed"] = df["issues_closed"].astype(int) + # df["issue_comments"] = df["issue_comments"].astype(int) + # df["pr_reviews"] = df["pr_reviews"].astype(int) + summary = ( + df.groupby("user") + .agg( + commits=pd.NamedAgg(column="commits", aggfunc="sum"), + prs=pd.NamedAgg(column="prs", aggfunc="sum"), + additions=pd.NamedAgg(column="additions", aggfunc="sum"), + deletions=pd.NamedAgg(column="deletions", aggfunc="sum"), + issues_assigned=pd.NamedAgg(column="issues_assigned", aggfunc="sum"), + issues_closed=pd.NamedAgg(column="issues_closed", aggfunc="sum"), + # issue_comments=pd.NamedAgg(column="issue_comments", aggfunc="sum"), + # pr_reviews=pd.NamedAgg(column="pr_reviews", aggfunc="sum"), + ) + .reset_index() + ) + return summary + + +def summarize_repo_metrics_for_user( + combined: pd.DataFrame, user: str +) -> pd.DataFrame: + """ + Summarize total commits, PRs, LOC, issues, comments, and reviews per repo + for a user. + + :param combined: data with all metrics + :param user: GitHub username + :return: columns repo, commits, prs, additions, deletions, + issues_assigned, issues_closed, issue_comments, pr_reviews + """ + df = combined[combined["user"] == user].copy() + df["additions"] = df["additions"].str.replace("+", "").astype(int) + df["deletions"] = df["deletions"].str.replace("-", "").astype(int) + df["issue_comments"] = df["issue_comments"].astype(int) + df["pr_reviews"] = df["pr_reviews"].astype(int) + summary = ( + df.groupby("repo") + .agg( + commits=pd.NamedAgg(column="commits", aggfunc="sum"), + prs=pd.NamedAgg(column="prs", aggfunc="sum"), + additions=pd.NamedAgg(column="additions", aggfunc="sum"), + deletions=pd.NamedAgg(column="deletions", aggfunc="sum"), + issues_assigned=pd.NamedAgg(column="issues_assigned", aggfunc="sum"), + issues_closed=pd.NamedAgg(column="issues_closed", aggfunc="sum"), + issue_comments=pd.NamedAgg(column="issue_comments", aggfunc="sum"), + pr_reviews=pd.NamedAgg(column="pr_reviews", aggfunc="sum"), + ) + .reset_index() + ) + return summary + + +def summarize_users_across_repos( + combined: pd.DataFrame, + users: List[str], + repos: List[str], +) -> pd.DataFrame: + """ + Aggregate commit / PR / LOC / issue / comment / review totals per-user + across a repo subset. + + :param combined: output of `collect_all_metrics` + :param users: GitHub usernames + :param repos: repository names + :return: data with columns user, commits, prs, additions, deletions, + issues_assigned, issues_closed, issue_comments, pr_reviews + """ + # Filter to requested slice. + df = combined[ + combined["user"].isin(users) & combined["repo"].isin(repos) + ].copy() + # Normalise numeric columns. + df["additions"] = df["additions"].str.replace("+", "").astype(int) + df["deletions"] = df["deletions"].str.replace("-", "").astype(int) + df["issue_comments"] = df["issue_comments"].astype(int) + df["pr_reviews"] = df["pr_reviews"].astype(int) + df.rename( + columns={ + "issues_assigned": "issues_assigned", + "issues_closed": "issues_closed", + }, + inplace=True, + errors="ignore", + ) + # Aggregate across repos. + summary = ( + df.groupby("user") + .agg( + commits=("commits", "sum"), + prs=("prs", "sum"), + additions=("additions", "sum"), + deletions=("deletions", "sum"), + issues_assigned=("issues_assigned", "sum"), + issues_closed=("issues_closed", "sum"), + issue_comments=("issue_comments", "sum"), + pr_reviews=("pr_reviews", "sum"), + ) + .reset_index() + ) + return summary + + +def _filter_period( + df: pd.DataFrame, + *, + start: Optional[datetime.datetime] = None, + end: Optional[datetime.datetime] = None, +) -> pd.DataFrame: + """ + Slice a DataFrame by date using optional start and end boundaries. + + :param df: data with a 'date' column + :param start: start datetime (inclusive) + :param end: end datetime (inclusive) + :return: filtered data such that start ≤ date ≤ end + """ + if not pd.api.types.is_datetime64_any_dtype(df["date"]): + df = df.copy() + df["date"] = pd.to_datetime(df["date"]) + if start is not None: + df = df[df["date"] >= start] + if end is not None: + df = df[df["date"] <= end] + return df + + +def _plot_grouped_bars( + summary: pd.DataFrame, + index_col: str, + title: str, + *, + metrics: Optional[List[str]] = None, +) -> None: + """ + Internal helper to render grouped bar plots. + + :param summary: data with one row per category (user or repo), and + one column per metric + :param index_col: column name(e.g., "user" or "repo") + :param metrics: subset of metrics to plot (e.g., ["commits", "prs"]) + :param title: chart title + """ + # Validate and prepare the list of metrics to plot. + default_metrics = [ + "commits", + "prs", + "additions", + "deletions", + "issues_assigned", + "issues_closed", + "issue_comments", + "pr_reviews", + ] + to_plot = metrics if metrics else default_metrics + for m in to_plot: + if m not in default_metrics: + raise ValueError(f"Unsupported metric '{m}'") + # Filter to only metrics that exist in the summary. + to_plot = [m for m in to_plot if m in summary.columns] + # Compute layout parameters. + categories = summary[index_col].tolist() + x = range(len(to_plot)) + n_cat = len(categories) + width = 0.8 / n_cat if n_cat else 0.8 + # Plot bars for each category (user or repo). + fig_width = max(12, len(to_plot) * 1.5) + fig, ax = plt.subplots(figsize=(fig_width, 5)) + for idx, cat in enumerate(categories): + values = ( + summary.loc[summary[index_col] == cat, to_plot].astype(int).iloc[0] + ) + pos = [i + idx * width for i in x] + bars = ax.bar(pos, values, width=width, label=str(cat)) + for b in bars: + ax.text( + b.get_x() + b.get_width() / 2, + b.get_height(), + str(int(b.get_height())), + ha="center", + va="bottom", + fontsize=8, + ) + # Finalize plot aesthetics. + ax.set_xticks([i + width * (n_cat - 1) / 2 for i in x]) + ax.set_xticklabels( + [m.replace("_", " ").title() for m in to_plot], rotation=45, ha="right" + ) + ax.set_ylabel("Count") + ax.set_title(title) + ax.legend(title=index_col.replace("_", " ").title()) + plt.tight_layout() + plt.show() + + +def plot_metrics_by_user( + combined: pd.DataFrame, + repo: str, + *, + start: Optional[datetime.datetime] = None, + end: Optional[datetime.datetime] = None, + users: Optional[List[str]] = None, + metrics: Optional[List[str]] = None, +) -> None: + """ + Plot selected metrics for users in one repo. + + :param combined: output from `collect_all_metrics` + :param repo: repository name + :param start: start datetime (inclusive) + :param end: end datetime (inclusive) + :param users: optional subset of GitHub usernames to show + :param metrics: list of metrics to plot; defaults to all numeric columns + :return: grouped bar chart where each group = metric, each bar = user + """ + df_period = _filter_period(df=combined, start=start, end=end) + summary = summarize_user_metrics_for_repo(df_period, repo) + if users is not None: + summary = summary[summary["user"].isin(users)] + _plot_grouped_bars( + summary, + index_col="user", + metrics=metrics, + title=f"Metric comparison for {repo} " + f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})", + ) + + +def plot_metrics_by_repo( + combined: pd.DataFrame, + user: str, + *, + start: Optional[datetime.datetime] = None, + end: Optional[datetime.datetime] = None, + repos: Optional[List[str]] = None, + metrics: Optional[List[str]] = None, +) -> None: + """ + Plot specified metrics for repos for a single user as grouped bar chart. + + :param combined: data from `collect_all_metrics` + :param user: GitHub username + :param start: start datetime (inclusive) + :param end: end datetime (inclusive) + :param repos: repos to include + :param metrics: metrics to plot; defaults to all numeric columns + :return: grouped bar chart where each group = metric, each bar = repo + """ + df_period = _filter_period(df=combined, start=start, end=end) + summary = summarize_repo_metrics_for_user(df_period, user) + if repos is not None: + summary = summary[summary["repo"].isin(repos)] + _plot_grouped_bars( + summary, + index_col="repo", + metrics=metrics, + title=f"Metric comparison for {user} " + f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})", + ) + + +def plot_multi_metrics_totals_by_user( + combined: pd.DataFrame, + metrics: List[str], + *, + start: Optional[datetime.datetime] = None, + end: Optional[datetime.datetime] = None, + users: Optional[List[str]] = None, + repos: Optional[List[str]] = None, +) -> None: + """ + Plot multiple metrics (summed across repos) per user as grouped bars. + + :param combined: data from `collect_all_metrics` + :param metrics: metrics to plot, e.g. ["commits", "prs", "additions"] + :param start: start datetime (inclusive) + :param end: end datetime (inclusive) + :param users: users to include + :param repos: repos to include + :return: grouped bar chart where each group = user, each bar = one metric + """ + df_period = _filter_period(df=combined, start=start, end=end) + # Aggregate totals for each user across the selected repos. + summary = summarize_users_across_repos( + df_period, + users or df_period["user"].unique().tolist(), + repos or df_period["repo"].unique().tolist(), + ) + if users is not None: + summary = summary[summary["user"].isin(users)] + # Validate metrics exist. + for metric in metrics: + if metric not in summary.columns: + raise ValueError(f"Metric '{metric}' not found in summary columns") + # Set up bar positions and sizing. + users_sorted = summary["user"].tolist() + x = range(len(users_sorted)) + width = 0.8 / len(metrics) if metrics else 0.8 + fig_width = max(10, len(users_sorted) * 0.7) + fig, ax = plt.subplots(figsize=(fig_width, 5)) + # Draw bars for each metric across users + for i, metric in enumerate(metrics): + offsets = [pos + i * width for pos in x] + values = ( + summary.set_index("user") + .loc[users_sorted, metric] + .astype(int) + .tolist() + ) + bars = ax.bar( + offsets, values, width=width, label=metric.replace("_", " ").title() + ) + for bar in bars: + ax.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height(), + str(int(bar.get_height())), + ha="center", + va="bottom", + fontsize=8, + ) + # Final plot styling. + ax.set_xticks([pos + width * (len(metrics) - 1) / 2 for pos in x]) + ax.set_xticklabels(users_sorted, rotation=15, ha="right") + ax.set_ylabel("Total count across repos") + ax.set_title( + f"Metric totals across repos by user " + f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})" + ) + ax.legend() + plt.tight_layout() + plt.show() + + +def get_contributors_for_repo( + client, + org: str, + repo: str, + *, + top_n: Optional[int] = None, +) -> List[str]: + """ + Fetch GitHub usernames of contributors to a repository. + + :param client: authenticated PyGithub client + :param org: GitHub organization name + :param repo: repository name + :param top_n: if specified, return only the top N contributors by + commit count + :return: GitHub usernames + """ + repo_obj = client.get_repo(f"{org}/{repo}") + contributors = repo_obj.get_contributors() + usernames = list() + for idx, user in enumerate(contributors): + if top_n and idx >= top_n: + break + usernames.append(user.login) + _LOG.info("Fetched %d contributors for %s/%s", len(usernames), org, repo) + return usernames + + +def utc_period( + start: str, end: str +) -> Tuple[datetime.datetime, datetime.datetime]: + """ + Construct a UTC datetime period from string inputs. + + :param start: start date e.g. '2025-01-01' + :param end: end date e.g. '2025-05-24' + """ + date = ( + datetime.datetime.fromisoformat(start).replace( + tzinfo=datetime.timezone.utc + ), + datetime.datetime.fromisoformat(end).replace( + tzinfo=datetime.timezone.utc + ), + ) + return date + + +def slice_period( + df: pd.DataFrame, + start: datetime.date, + end: datetime.date, +) -> pd.DataFrame: + """ + Filter a DataFrame by date range. + + :param df: data with a `date` column of type `datetime.date` + :param start: start date for the filtering window + :param end: end date for the filtering window + :return: filtered data within the specified date range + """ + req_period = df[(df["date"] >= start) & (df["date"] <= end)] + return req_period + + +def compute_z_scores(summary: pd.DataFrame, metrics: List[str]) -> pd.DataFrame: + """ + Compute z-score (standardized score) for specified metrics across users. + + This helps assess how far a user's metric is from the group mean in units + of standard deviation. + + :param summary: data with users and raw metric values + :param metrics: metric column names to compute z-scores for + :return: data with added z-score columns suffixed with `_z` + """ + z_df = summary.copy() + for metric in metrics: + mean = z_df[metric].mean() + std = z_df[metric].std() + z_df[metric + "_z"] = (z_df[metric] - mean) / std + return z_df + + +def compute_percentile_ranks( + summary: pd.DataFrame, metrics: List[str] +) -> pd.DataFrame: + """ + Compute percentile rank for each user for the specified metrics. + + Percentile rank reflects the relative standing of a user compared to the + group. For example, a percentile of 0.8 means the user is ahead of 80% + of the group for that metric. + + :param summary: data with users and raw metric values + :param metrics: metric column names + :return: data with added percentile columns suffixed with `_pctile` + """ + perc_df = summary.copy() + for metric in metrics: + perc_df[metric + "_pctile"] = perc_df[metric].rank(pct=True) + return perc_df + + +def visualize_user_metric_comparison( + stats: pd.DataFrame, + *, + score_type: Literal["z", "percentile"] = "z", + top_n: Optional[int] = None, +) -> None: + """ + Visualize user performance across all available metrics using z-scores or + percentiles. + + :param stats: data with user metrics and their standardized scores + :param score_type: "z" for z-scores or "percentile" for relative + percentiles + :param top_n: number of top users to show in leaderboard bar chart + """ + suffix = "_z" if score_type == "z" else "_pctile" + score_cols = [col for col in stats.columns if col.endswith(suffix)] + if not score_cols: + raise ValueError( + f"No columns ending with '{suffix}' found in input DataFrame." + ) + # Stylized table. + import IPython + + IPython.display.display( + stats[["user"] + score_cols] + .set_index("user") + .style.format("{:.2f}") + .background_gradient( + axis=0, cmap="Greens" if score_type == "percentile" else "RdYlGn" + ) + ) + # Leaderboard chart (by average score). + stats["__score_avg__"] = stats[score_cols].mean(axis=1) + if top_n is None: + top_users = stats.sort_values("__score_avg__", ascending=False) + top_n_display = len(top_users) + else: + top_users = stats.sort_values("__score_avg__", ascending=False).head( + top_n + ) + top_n_display = top_n + fig, ax = plt.subplots(figsize=(max(8, 0.5 * len(top_users)), 4)) + ax.bar(top_users["user"], top_users["__score_avg__"], color="skyblue") + ax.set_ylabel( + "Average Score" + + (" (Z-score)" if score_type == "z" else " (Percentile)") + ) + ax.set_title(f"Top {top_n_display} Users by Average {score_type.title()}") + ax.axhline(0 if score_type == "z" else 0.5, color="gray", linestyle="--") + plt.xticks(rotation=15, ha="right") + plt.tight_layout() + plt.show() + stats.drop(columns="__score_avg__", inplace=True) + + +def compute_engagement_score( + summary: pd.DataFrame, + weights: Optional[Dict[str, float]] = None, +) -> pd.DataFrame: + """ + Compute a weighted engagement score for each user based on all metrics. + + :param summary: data with user metrics + :param weights: optional dictionary of metric weights; if None, uses + defaults + :return: summary with an added 'engagement_score' column + """ + # Default weights emphasizing collaboration and code quality. + default_weights = { + "commits": 1.0, + "prs": 2.0, + "additions": 0.001, + "deletions": 0.0005, + "issues_assigned": 0.5, + "issues_closed": 1.5, + "issue_comments": 0.3, + "pr_reviews": 2.5, + } + weights = weights or default_weights + summary = summary.copy() + summary["engagement_score"] = 0 + for metric, weight in weights.items(): + if metric in summary.columns: + summary["engagement_score"] += summary[metric] * weight + # Normalize to 0-100 scale. + max_score = summary["engagement_score"].max() + if max_score > 0: + summary["engagement_score"] = ( + summary["engagement_score"] / max_score * 100 + ).round(2) + summary_sorted = summary.sort_values("engagement_score", ascending=False) + return summary_sorted + + +# ############################################################################# +# PR Statistics +# ############################################################################# + + +def count_open_prs_by_author( + repo_obj, +) -> Dict[str, Dict[str, int]]: + """ + Count open PRs grouped by author and draft/ready status. + + :param repo_obj: PyGithub repository object + :return: dict mapping author -> {"ready": int, "draft": int} + """ + stats: Dict[str, Dict[str, int]] = collections.defaultdict( + lambda: {"ready": 0, "draft": 0} + ) + pulls = repo_obj.get_pulls(state="open") + for pr in pulls: + author = pr.user.login + status = "draft" if pr.draft else "ready" + stats[author][status] += 1 + _LOG.debug("Open PR #%d by %s status=%s", pr.number, author, status) + return dict(stats) + + +def count_closed_prs_by_author( + repo_obj, + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> Dict[str, int]: + """ + Count closed PRs grouped by author, optionally filtered by period. + + :param repo_obj: PyGithub repository object + :param period: optional (start, end) UTC-aware datetimes for filtering + :return: dict mapping author -> count of closed PRs + """ + stats: Dict[str, int] = collections.defaultdict(int) + since, until = normalize_period_to_utc(period) + pulls = repo_obj.get_pulls(state="closed") + for pr in pulls: + # Normalize the PR closed_at timestamp to UTC. + closed_at = pr.closed_at + if closed_at is None: + continue + if closed_at.tzinfo is None: + closed_at = closed_at.replace(tzinfo=datetime.timezone.utc) + else: + closed_at = closed_at.astimezone(datetime.timezone.utc) + # Filter by period if specified. + if since is not None and until is not None: + if not (since <= closed_at <= until): + continue + author = pr.user.login + stats[author] += 1 + _LOG.debug("Closed PR #%d by %s at %s", pr.number, author, closed_at) + return dict(stats) + + +def print_open_pr_stats( + open_stats: Dict[str, Dict[str, int]], +) -> None: + """ + Print open PR statistics by author and draft/ready status. + + :param open_stats: dict mapping author -> {"ready": int, "draft": int} + """ + if not open_stats: + _LOG.info("No open PRs found.") + return + # Sort by total PR count descending. + sorted_authors = sorted( + open_stats.items(), + key=lambda item: item[1]["ready"] + item[1]["draft"], + reverse=True, + ) + total_ready = 0 + total_draft = 0 + header = f"{'Author':<25} {'Ready':>7} {'Draft':>7} {'Total':>7}" + separator = "-" * len(header) + _LOG.info("Open PRs by author:") + _LOG.info(separator) + _LOG.info(header) + _LOG.info(separator) + for author, counts in sorted_authors: + ready = counts["ready"] + draft = counts["draft"] + total = ready + draft + total_ready += ready + total_draft += draft + _LOG.info("%-25s %7d %7d %7d", author, ready, draft, total) + _LOG.info(separator) + _LOG.info( + "%-25s %7d %7d %7d", + "TOTAL", + total_ready, + total_draft, + total_ready + total_draft, + ) + + +def print_closed_pr_stats( + closed_stats: Dict[str, int], + *, + period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, +) -> None: + """ + Print closed PR statistics by author. + + :param closed_stats: dict mapping author -> count of closed PRs + :param period: optional period used for filtering (for display only) + """ + if not closed_stats: + _LOG.info("No closed PRs found.") + return + # Sort by count descending. + sorted_authors = sorted( + closed_stats.items(), key=lambda item: item[1], reverse=True + ) + period_str = "all time" + if period is not None: + since, until = period + period_str = f"{since.date()} to {until.date()}" + header = f"{'Author':<25} {'Closed':>7}" + separator = "-" * len(header) + _LOG.info("Closed PRs by author (%s):", period_str) + _LOG.info(separator) + _LOG.info(header) + _LOG.info(separator) + total = 0 + for author, count in sorted_authors: + total += count + _LOG.info("%-25s %7d", author, count) + _LOG.info(separator) + _LOG.info("%-25s %7d", "TOTAL", total) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py new file mode 100644 index 000000000..96c8af1da --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py @@ -0,0 +1,508 @@ +""" +Import as: + +import helpers.hasyncio as hasynci +""" + +import asyncio +import contextlib +import datetime +import logging +import math +import time +from typing import ( + Any, + Callable, + Coroutine, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, + cast, +) + +import async_solipsism # type: ignore[import-not-found] +import numpy as np +import pandas as pd + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hprint as hprint + +# Avoid dependency from other `helpers` modules, such as `helpers.hsql`, to prevent +# import cycles. + + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# _EventLoop +# ############################################################################# + + +# TODO(gp): We could make this a mixin and add this behavior to both asyncio and +# async_solipsism event loop. +# TODO(gp): -> _AsyncSolipsismEventLoop +# TODO(gp): Consider injecting a `get_wall_clock_time: hdatetim.GetWallClockTime` +# in the event loop so we can simplify the interfaces. An event loop always needs +# a function to get the wall clock. +class _EventLoop(async_solipsism.EventLoop): + """ + An `async_solipsism.EventLoop` returning also the wall-clock time. + """ + + # TODO(gp): If we pass an `initial_replayed_timestamp` we could incorporate here also + # the replayed time approach and can remove `ReplayedTime` object. + def __init__(self) -> None: + super().__init__() + self._initial_dt = datetime.datetime.utcnow() + + def get_current_time(self) -> datetime.datetime: + # `loop.time()` returns the number of seconds as `float` from when the event + # loop was created. + try: + num_secs = super().time() + except AttributeError: + # Sometimes we call the logger before `async_solipsism` is fully initialized. + # File "/app/amp/helpers/hdatetime.py", line 255, in get_current_time + # timestamp = event_loop.get_current_time() + # File "/app/amp/helpers/hasyncio.py", line 60, in get_current_time + # num_secs = super().time() + # File "/venv/lib/python3.8/site-packages/async_solipsism/loop.py", line 39, in time + # return self._selector.clock.time() + # AttributeError: 'NoneType' object has no attribute 'clock' + # Call stack: + # File "/app/amp/helpers/hcache.py", line 311, in clear_global_cache + # _LOG.info("After clear_global_cache: %s", info_after) + # Message: 'After clear_global_cache: %s' + # Arguments: ("'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan",) + # To avoid the error above we just set the `num_secs` to 0. + num_secs = 0 + return self._initial_dt + datetime.timedelta(seconds=num_secs) + + +# From https://stackoverflow.com/questions/49555991 +@contextlib.contextmanager +def solipsism_context() -> Iterator: + """ + Context manager to isolate an `asyncio_solipsism` event loop. + """ + # Use the variation of solipsistic `EventLoop` above. + event_loop = _EventLoop() + asyncio.set_event_loop(event_loop) + try: + yield event_loop + finally: + asyncio.set_event_loop(None) + + +async def gather_coroutines_with_wall_clock( + event_loop: asyncio.AbstractEventLoop, + *coroutines: Callable[[Any], Coroutine[Any, Any, Any]], +) -> List[Any]: + """ + Inject a wall clock associated to `event_loop` in all the coroutines and + then gathers them in a single coroutine. + """ + get_wall_clock_time = lambda: hdateti.get_current_time( + tz="ET", event_loop=event_loop + ) + # Construct the coroutines here by passing the `get_wall_clock_time()` + # function. + coros_list = [coro(get_wall_clock_time) for coro in coroutines] + # + result: List[Any] = await asyncio.gather(*coros_list) + return result + + +# TODO(gp): For some reason `asyncio.run()` doesn't seem to pick up the new event +# loop. So we use a re-implementation of `run` that does that. +def run( + coroutine: Coroutine, + event_loop: Optional[asyncio.AbstractEventLoop], + *, + close_event_loop: bool = True, +) -> Any: + """ + `asyncio.run()` wrapper that allows to use a specified `EventLoop`. + + :param coroutine: the coroutine to run + :param event_loop: the event loop to use. `None` means the standard `asyncio` + event loop + :param close_event_loop: if False the event loop is not closed, so that we can + run multiple times in the same event loop + :return: same output of `run_until_complete()` + """ + if event_loop is None: + # Use a normal `asyncio` EventLoop. + event_loop = asyncio.new_event_loop() + hdbg.dassert_issubclass(event_loop, asyncio.AbstractEventLoop) + hprint.log_frame(_LOG, "asyncio.run") + try: + ret = event_loop.run_until_complete(coroutine) + finally: + if close_event_loop: + event_loop.close() + return ret + + +# ############################################################################# +# Synchronous / asynchronous polling. +# ############################################################################# + + +# The result of a polling function in terms of a bool indicating success (which +# when True stops the polling) and a result. +PollOutput = Tuple[bool, Any] + +# A polling function accepts any inputs and returns a `PollOutput` in terms of +# (success, result). Typically polling functions don't accept any inputs and are +# built through lambdas and closures. +PollingFunction = Callable[[], PollOutput] + + +def _get_max_num_iterations( + sleep_in_secs: float, + timeout_in_secs: float, +) -> int: + hdbg.dassert_lt(0, sleep_in_secs) + hdbg.dassert_lt(0, timeout_in_secs) + max_num_iter = int(math.ceil(timeout_in_secs / sleep_in_secs)) + hdbg.dassert_lte(1, max_num_iter) + return max_num_iter + + +# TODO(gp): This is probably better implemented with an iterator. +def _poll_iterate( + polling_func: PollingFunction, + sleep_in_secs: float, + timeout_in_secs: float, + get_wall_clock_time: hdateti.GetWallClockTime, + num_iter: int, + max_num_iter: int, + tag: str, +) -> Tuple[int, PollOutput]: + """ + Execute an iteration of the polling loop. + + :return: the number of iterations executed and the output of the + polling function (sucess, return value) + :raises: TimeoutError in case of timeout + """ + _LOG.debug( + "\n## %s: wall clock time=%s: iter=%s/%s", + tag, + get_wall_clock_time(), + num_iter, + max_num_iter, + ) + hdbg.dassert_callable(get_wall_clock_time) + # Poll. + success, value = polling_func() + _LOG.debug("success=%s, value=%s", success, value) + if success: + # If success, then exit. + hprint.log_frame( + _LOG, + "%s: wall clock time=%s: poll done", + tag, + get_wall_clock_time(), + ) + else: + # Otherwise update state. + num_iter += 1 + if num_iter > max_num_iter: + msg = "Timeout for " + hprint.to_str( + "polling_func sleep_in_secs timeout_in_secs tag" + ) + _LOG.error(msg) + raise TimeoutError(msg) + return num_iter, (success, value) + + +# TODO(ai_gp): -> async_poll +async def poll( + polling_func: PollingFunction, + sleep_in_secs: float, + timeout_in_secs: float, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + tag: Optional[str] = None, +) -> Tuple[int, Any]: + """ + Call `polling_func()` every `sleep_in_secs` secs until the polling function + returns success or there is a timeout. A timeout happens if no success is + achieved within `timeout_in_secs` secs. + + :param polling_func: function returning a tuple (success, value) + :return: + - number of iterations before a successful call to `polling_func` + - result from `polling_func` + :raises: TimeoutError in case of timeout + """ + _LOG.debug(hprint.to_str("polling_func sleep_in_secs timeout_in_secs tag")) + if tag is None: + # Use the function calling this function. + tag = hintros.get_function_name(count=0) + max_num_iter = _get_max_num_iterations(sleep_in_secs, timeout_in_secs) + num_iter = 1 + while True: + num_iter, (success, value) = _poll_iterate( + polling_func, + sleep_in_secs, + timeout_in_secs, + get_wall_clock_time, + num_iter, + max_num_iter, + tag, + ) + if success: + return num_iter, value + _LOG.debug("sleep for %s secs", sleep_in_secs) + await asyncio.sleep(sleep_in_secs) + + +def sync_poll( + polling_func: PollingFunction, + sleep_in_secs: float, + timeout_in_secs: float, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + tag: Optional[str] = None, +) -> Tuple[int, Any]: + """ + Same interface and behavior of `poll()` but using a synchronous + implementation. + """ + _LOG.debug(hprint.to_str("polling_func sleep_in_secs timeout_in_secs tag")) + if tag is None: + # Use the function calling this function. + tag = hintros.get_function_name(count=0) + max_num_iter = _get_max_num_iterations(sleep_in_secs, timeout_in_secs) + num_iter = 1 + while True: + num_iter, (success, value) = _poll_iterate( + polling_func, + sleep_in_secs, + timeout_in_secs, + get_wall_clock_time, + num_iter, + max_num_iter, + tag, + ) + if success: + return success, value + _LOG.debug("sleep for %s secs", sleep_in_secs) + time.sleep(sleep_in_secs) + + +def get_poll_kwargs( + get_wall_clock_time: hdateti.GetWallClockTime, + *, + # TODO(ai_gp): Avoid using defaults. + sleep_in_secs: float = 1.0, + timeout_in_secs: float = 10.0, +) -> Dict[str, Any]: + hdbg.dassert_lt(0, sleep_in_secs) + hdbg.dassert_lt(0, timeout_in_secs) + hdbg.dassert_callable(get_wall_clock_time) + poll_kwargs = { + "sleep_in_secs": sleep_in_secs, + "timeout_in_secs": timeout_in_secs, + "get_wall_clock_time": get_wall_clock_time, + } + return poll_kwargs + + +# ############################################################################# +# Wait. +# ############################################################################# + + +# Represent a deterministic, if float, or random delay in [a, b] if a Tuple. +# All values are in seconds. +WaitInSecs = Union[float, Tuple[float, float]] + + +async def sleep( + delay_in_secs: WaitInSecs, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + # TODO(gp): -> msg + tag: Optional[str] = None, + # TODO(gp): How to handle random seed here? + seed: int = 42, +) -> None: + """ + Wait a deterministic or a randomized delay. + """ + if tag is None: + # Use the name of the function calling this function. + tag = hintros.get_function_name(count=0) + # Extract or compute the delay. + if isinstance(delay_in_secs, (int, float)): + # Deterministic delay. + pass + elif isinstance(delay_in_secs, tuple): + # Randomized delay. + hdbg.dassert_eq(len(delay_in_secs), 2) + min_, max_ = delay_in_secs + hdbg.dassert_lte(0, min_) + hdbg.dassert_lte(min_, max_) + delay_in_secs = np.random.rand(min_, max_) + else: + raise ValueError(f"Invalid delay_in_secs='{delay_in_secs}'") + # Wait. + hprint.log_frame( + _LOG, + "%s: wall_clock_time=%s: started waiting for %s secs", + tag, + get_wall_clock_time(), + delay_in_secs, + ) + hdbg.dassert_lte(0, delay_in_secs) + delay_in_secs = cast(float, delay_in_secs) + await asyncio.sleep(delay_in_secs) + hprint.log_frame( + _LOG, + "%s: wall_clock_time=%s: done waiting for %s secs", + tag, + get_wall_clock_time(), + delay_in_secs, + ) + + +# ////////////////////////////////////////////////////////////////////////////////// + + +def get_seconds_to_align_to_grid( + bar_duration_in_secs: int, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + add_buffer_in_secs: int = 0, +) -> Tuple[pd.Timestamp, float]: + """ + Given the current time return the amount of seconds to wait to align on a + grid with period `bar_duration_in_secs`. + + E.g., current_time=9:31:02am, bar_duration_in_secs=120 -> return 58 + + :param add_buffer_in_secs: number of seconds to add to make sure we + are right after the grid time + """ + hdbg.dassert_lte(0, add_buffer_in_secs) + current_time = get_wall_clock_time() + _LOG.debug("current_time=%s ...", current_time) + # Align on the time grid. + hdbg.dassert_isinstance(bar_duration_in_secs, int) + hdbg.dassert_lt(0, bar_duration_in_secs) + freq = f"{bar_duration_in_secs}S" + target_time = current_time.ceil(freq) + hdbg.dassert_lte(current_time, target_time) + _LOG.debug("target_time=%s", target_time) + secs_to_wait = (target_time - current_time).total_seconds() + # E.g., for + # target_time=2022-07-11 11:30:00-04:00 + # curr_time=2022-07-11 11:29:15.129365-04:00 + # The difference is 44secs, so we need to add 1 sec to make sure we pass + # the target time. + secs_to_wait += add_buffer_in_secs + return target_time, secs_to_wait + + +def _wait_until( + wait_until_timestamp: pd.Timestamp, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + tag: Optional[str] = None, +) -> float: + """ + Return amount of seconds to wait for. + + More accurate version of _wait_until, uses total_seconds() which + allows for returning fractional second values. + """ + if tag is None: + # Use the name of the function calling this function. + tag = hintros.get_function_name(count=2) + curr_timestamp = get_wall_clock_time() + _LOG.debug( + "wait_until_timestamp=%s, curr_timestamp=%s", + wait_until_timestamp, + curr_timestamp, + ) + # We can only wait for times in the future. + if curr_timestamp > wait_until_timestamp: + _LOG.warning( + "curr_timestamp=%s, wait_until_timestamp=%s is in the future: " + "continuing ", + curr_timestamp, + wait_until_timestamp, + ) + time_in_secs = 0 + else: + time_in_secs = (wait_until_timestamp - curr_timestamp).total_seconds() + _LOG.debug( + "%s: wall_clock_time=%s: sleep for %s secs", + tag, + get_wall_clock_time(), + time_in_secs, + ) + return time_in_secs + + +def sync_wait_until( + wait_until_timestamp: pd.Timestamp, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + tag: Optional[str] = None, + log_verbosity: int = logging.DEBUG, +) -> None: + """ + Synchronous wait until the wall clock time is `timestamp`. + + More accurate version of sync_wait_until allowing to wait for + fractional seconds. + """ + # Sync wait. + time_in_secs = _wait_until( + wait_until_timestamp, get_wall_clock_time, tag=tag + ) + hdbg.dassert_lte(0, time_in_secs) + # TODO(gp): Consider using part of align_on_time_grid for high-precision clock. + time.sleep(time_in_secs) + # + hprint.log_frame( + _LOG, + "%s: wall_clock_time=%s: done waiting", + tag, + get_wall_clock_time(), + verbosity=log_verbosity, + ) + + +async def async_wait_until( + wait_until_timestamp: pd.Timestamp, + get_wall_clock_time: hdateti.GetWallClockTime, + *, + # TODO(gp): -> msg + tag: Optional[str] = None, +) -> None: + """ + Asynchronous wait until the wall clock time is `timestamp`. + """ + _LOG.debug(hprint.to_str("wait_until_timestamp")) + time_in_secs = _wait_until( + wait_until_timestamp, get_wall_clock_time, tag=tag + ) + # Async wait. + hdbg.dassert_lte(0, time_in_secs) + await asyncio.sleep(time_in_secs) + # + hprint.log_frame( + _LOG, "%s: wall_clock_time=%s: done waiting", tag, get_wall_clock_time() + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py new file mode 100644 index 000000000..e010f5b08 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py @@ -0,0 +1,266 @@ +""" +Import as: + +import helpers.haws as haws +""" + +import logging +from typing import Dict, List, Optional + +import boto3 +import boto3.session +from boto3.resources.base import ServiceResource +from botocore.client import BaseClient + +import helpers.hdbg as hdbg +import helpers.hserver as hserver + +_LOG = logging.getLogger(__name__) + + +# AWS profile is used as a mechanism to differentiate between different AWS accounts. +# See CmampTask12943. +# `test` and `preprod` environments are in the same account using `ck` profile. +# `prod` environment is in the different account using `csfy` profile. +AWS_PROFILE = { + "test": "ck", + "preprod": "ck", + "prod": "csfy", +} + +# ############################################################################# +# Utils +# ############################################################################# + + +def get_session( + aws_profile: str, *, region: Optional[str] = None +) -> boto3.session.Session: + """ + Return connected Boto3 session. + + :param aws_profile: AWS profile name to use for the session. + :param region: AWS region, if None get region from AWS credentials. + :return: Boto3 session object. + """ + hdbg.dassert_isinstance(aws_profile, str) + # When deploying jobs via ECS the container obtains credentials based on + # passed task role specified in the ECS task-definition, refer to: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html + if aws_profile in ["ck", "csfy"] and hserver.is_inside_ecs_container(): + _LOG.info("Fetching credentials from task IAM role") + session = boto3.session.Session() + else: + # We do not need to extract the credential from the file because + # the credential is already set and `boto3` know where to find them. + if region: + session = boto3.Session(profile_name=aws_profile, region_name=region) + else: + session = boto3.Session(profile_name=aws_profile) + return session + + +def get_service_client( + aws_profile: str, service_name: str, *, region: Optional[str] = None +) -> BaseClient: + """ + Return client to work with desired service in the specific region. + + For params look at `get_session()` + """ + session = get_session(aws_profile, region=region) + client = session.client(service_name=service_name) + return client + + +def get_service_resource(aws_profile: str, service_name: str) -> ServiceResource: + """ + Return resource to work with desired service in the specific region. + """ + session = get_session(aws_profile) + resource = session.resource(service_name=service_name) + return resource + + +# ############################################################################# +# ECS +# ############################################################################# + + +# TODO(Toma): Deprecate in favor of `get_service_client`. +def get_ecs_client( + aws_profile: str, *, region: Optional[str] = None +) -> BaseClient: + """ + Return client to work with Elastic Container Service in the specific + region. + + For params look at `get_session()` + """ + session = get_session(aws_profile, region=region) + client = session.client(service_name="ecs") + return client + + +def get_task_definition_image_url( + task_definition_name: str, environment: str, *, region: Optional[str] = None +) -> str: + """ + Get ECS task definition by name and return only image URL. + + :param task_definition_name: The name of the ECS task definition, + e.g., `cmamp-test`. + :param region: AWS region, if None get region from AWS credentials. + :param region: look at `get_session()` + """ + aws_profile = AWS_PROFILE[environment] + service_name = "ecs" + client = get_service_client(aws_profile, service_name, region=region) + # Get the last revision of the task definition. + task_description = client.describe_task_definition( + taskDefinition=task_definition_name + ) + task_definition_json = task_description["taskDefinition"] + image_url = task_definition_json["containerDefinitions"][0]["image"] + return image_url + + +def is_task_definition_exists( + task_definition_name: str, *, region: Optional[str] = None +) -> bool: + """ + Check if a task definition exists in the specified region. + + :param task_definition_name: the name of the ECS task definition + :param region: region of the task definition + :return: whether the task definition exists + """ + client = get_ecs_client("ck", region=region) + try: + client.describe_task_definition(taskDefinition=task_definition_name) + return True + except client.exceptions.ClientError as e: + _LOG.warning( + "Failed to describe task definition '%s': %s", + task_definition_name, + e, + ) + return False + + +# TODO(Nikola): Pass a dict config instead, so any part can be updated. +def update_task_definition( + task_definition_name: str, + new_image_url: str, + *, + region: Optional[str] = None, + environment: str, +) -> None: + """ + Create the new revision of specified ECS task definition. + + If region is different then the default one, it is assumed that ECR + replication is enabled from the default region to the target region. + + :param task_definition_name: The name of the ECS task definition for + which an update to container image URL is made, e.g., `cmamp- + test`. + :param new_image_url: New image URL for task definition. e.g., + `***.dkr.ecr.***/cmamp:prod`. + :param region: AWS region, if None get region from AWS credentials. + """ + aws_profile = AWS_PROFILE[environment] + client = get_ecs_client(aws_profile, region=region) + # Get the last revision of the task definition. + task_description = client.describe_task_definition( + taskDefinition=task_definition_name + ) + task_definition_json = task_description["taskDefinition"] + # Set new image. + old_image_url = task_definition_json["containerDefinitions"][0]["image"] + if old_image_url == new_image_url: + _LOG.info( + "New image url `%s` is already set for task definition `%s`!", + new_image_url, + task_definition_name, + ) + return + task_definition_json["containerDefinitions"][0]["image"] = new_image_url + # Register the new revision with the new image. + response = client.register_task_definition( + family=task_definition_name, + taskRoleArn=task_definition_json.get("taskRoleArn", ""), + executionRoleArn=task_definition_json["executionRoleArn"], + networkMode=task_definition_json["networkMode"], + containerDefinitions=task_definition_json["containerDefinitions"], + volumes=task_definition_json["volumes"], + placementConstraints=task_definition_json["placementConstraints"], + requiresCompatibilities=task_definition_json["requiresCompatibilities"], + cpu=task_definition_json["cpu"], + memory=task_definition_json["memory"], + ) + updated_image_url = response["taskDefinition"]["containerDefinitions"][0][ + "image" + ] + # Check if the image URL is updated. + hdbg.dassert_eq(updated_image_url, new_image_url) + _LOG.info( + "The image URL of `%s` task definition is updated to `%s`", + task_definition_name, + updated_image_url, + ) + + +def list_all_objects( + s3_client: BaseClient, bucket_name: str, prefix: str +) -> List[Dict]: + """ + List all objects in the specified S3 bucket under the given prefix, + handling pagination. + + :param s3_client: Instance of boto3 S3 client. + :param bucket_name: The name of the S3 bucket e.g., `cryptokaizen-data-test`. + :param prefix: Prefix to filter the S3 objects e.g., `binance/historical_bid_ask/`. + :return: A list of dictionaries containing metadata about each object. E.g., + ``` + [ + { + 'Key': 'binance/historical_bid_ask/S_DEPTH/1000BONK_USDT/2023-05-27/data.tar.gz', + 'LastModified': datetime.datetime(2024, 5, 30, 17, 12, 12, tzinfo=tzlocal()), + 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', + 'Size': 0, + 'StorageClass': 'STANDARD' + }, + { + 'Key': 'binance/historical_bid_ask/S_DEPTH/1000BONK_USDT/2023-05-28/data.tar.gz', + 'LastModified': datetime.datetime(2024, 5, 30, 17, 12, 12, tzinfo=tzlocal()), + 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', + 'Size': 0, + 'StorageClass': 'STANDARD' + } + ] + ``` + """ + objects = [] + continuation_token = None + while True: + # If there's a continuation token, include it in the request to fetch + # the next page of results. + if continuation_token: + response = s3_client.list_objects_v2( + Bucket=bucket_name, + Prefix=prefix, + ContinuationToken=continuation_token, + ) + else: + response = s3_client.list_objects_v2( + Bucket=bucket_name, Prefix=prefix + ) + # Extend the objects list with the contents of the current page. + objects.extend(response.get("Contents", [])) + # Check if there are more pages. + if response.get("IsTruncated"): + continuation_token = response.get("NextContinuationToken") + else: + break + return objects diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py new file mode 100644 index 000000000..d72a2f708 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py @@ -0,0 +1,1086 @@ +""" +See `docs/coding/all.hcache.explanation.md` for implementation details. + +Import as: + +import helpers.hcache as hcache +""" + +import atexit +import copy +import functools +import logging +import os +import time +from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast + +import joblib +import joblib.func_inspect as jfunci +import joblib.memory as jmemor + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hintrospection as hintros +import helpers.hlogging as hloggin +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hsystem as hsystem +import helpers.htimer as htimer + +_LOG = hloggin.getLogger(__name__) +# Enable extra verbose debugging. Do not commit. +_TRACE = False + +# ############################################################################# + + +_IS_CACHE_ENABLED: bool = True + + +def enable_caching(val: bool) -> None: + """ + Enable or disable all caching, i.e., global, tagged global, function- + specific. + """ + global _IS_CACHE_ENABLED + if _TRACE: + _LOG.trace("") + _LOG.warning("Setting caching to %s -> %s", _IS_CACHE_ENABLED, val) + _IS_CACHE_ENABLED = val + + +def is_caching_enabled() -> bool: + """ + Check if cache is enabled. + + :return: whether the cache is enabled or not + """ + if _TRACE: + _LOG.trace("") + return _IS_CACHE_ENABLED + + +# Global switch to allow or prevent clearing the cache. +_IS_CLEAR_CACHE_ENABLED: bool = True + + +def enable_clear_cache(val: bool) -> None: + """ + Enable or disable clearing a cache (both global and function-specific). + """ + global _IS_CLEAR_CACHE_ENABLED + if _TRACE: + _LOG.trace("") + _LOG.warning( + "Enabling clear cache to %s -> %s", _IS_CLEAR_CACHE_ENABLED, val + ) + _IS_CLEAR_CACHE_ENABLED = val + + +# ############################################################################# +# Global cache interface +# ############################################################################# + + +def _get_cache_types() -> List[str]: + """ + Return the types (aka levels) of the cache. + """ + return ["mem", "disk"] + + +def _dassert_is_valid_cache_type(cache_type: str) -> None: + """ + Assert that `cache_type` is a valid cache type. + """ + hdbg.dassert_in(cache_type, _get_cache_types()) + + +def _get_global_cache_name(cache_type: str, tag: Optional[str] = None) -> str: + """ + Get the canonical cache name for a type of cache and tag, both global and + function-specific. + + E.g., `tmp.cache.{cache_type}.{tag}` like `tmp.cache.mem.unit_tests` + + :param cache_type: type of a cache + :param tag: optional unique tag of the cache + :return: name of the folder for a cache + """ + _dassert_is_valid_cache_type(cache_type) + cache_name = f"tmp.cache.{cache_type}" + if tag is not None: + cache_name += f".{tag}" + return cache_name + + +def _get_global_cache_path(cache_type: str, tag: Optional[str] = None) -> str: + """ + Get path to the directory storing the cache. + + For a memory cache, the path is in a predefined RAM disk. + For a disk cache, the path is on the file system relative to Git root. + + :return: the file system path to the cache + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + # Get the cache name. + cache_name = _get_global_cache_name(cache_type, tag) + # Get the enclosing directory path. + if cache_type == "mem": + if hsystem.get_os_name() == "Darwin": + root_path = "/tmp" + else: + root_path = "/mnt/tmpfs" + elif cache_type == "disk": + root_path = hgit.get_client_root(super_module=True) + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + # Compute path. + file_name = os.path.join(root_path, cache_name) + file_name = os.path.abspath(file_name) + return file_name + + +def _get_cache_size(path: str, description: str) -> str: + """ + Report information about a cache (global or function) stored at a given + path. + """ + if _TRACE: + _LOG.trace("") + if path is None: + txt = f"'{description}' cache: path='{path}' doesn't exist yet" + else: + if os.path.exists(path): + size_in_bytes = hsystem.du(path) + if isinstance(size_in_bytes, str): + size_as_str = size_in_bytes + else: + size_as_str = hintros.format_size(size_in_bytes) + else: + size_as_str = "nan" + # TODO(gp): Compute number of files. + txt = f"'{description}' cache: path='{path}', size={size_as_str}" + return txt + + +def get_global_cache_info( + tag: Optional[str] = None, add_banner: bool = False +) -> str: + """ + Report information on global cache. + """ + if _TRACE: + _LOG.trace("") + txt = [] + if add_banner: + txt.append(hprint.frame("get_global_cache_info()", char1="<")) + txt.append(f"is global cache enabled={is_caching_enabled()}") + # + cache_types = _get_cache_types() + txt.append(f"cache_types={str(cache_types)}") + for cache_type in cache_types: + path = _get_global_cache_path(cache_type, tag=tag) + description = f"global {cache_type}" + cache_info = _get_cache_size(path, description) + txt.append(cache_info) + txt = "\n".join(txt) + return txt + + +# This is the global memory cache. +_MEMORY_CACHE: Optional[joblib.Memory] = None + + +# This is the global disk cache. +_DISK_CACHE: Optional[joblib.Memory] = None + + +def _create_global_cache_backend( + cache_type: str, tag: Optional[str] = None +) -> joblib.Memory: + """ + Create a Joblib memory object storing a cache. + + :return: cache backend object + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + dir_name = _get_global_cache_path(cache_type, tag) + _LOG.debug( + "Creating cache for cache_type='%s' and tag='%s' at '%s'", + cache_type, + tag, + dir_name, + ) + cache_backend = joblib.Memory(dir_name, verbose=0, compress=True) + return cache_backend + + +# TODO(gp): -> _get_global_cache +def get_global_cache( + cache_type: str, tag: Optional[str] = None +) -> joblib.Memory: + """ + Get global cache by cache type. + + :return: caching backend + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + global _MEMORY_CACHE + global _DISK_CACHE + if tag is None: + if cache_type == "mem": + # Create global memory cache if it doesn't exist. + if _MEMORY_CACHE is None: + _MEMORY_CACHE = _create_global_cache_backend(cache_type) + global_cache = _MEMORY_CACHE + elif cache_type == "disk": + # Create global disk cache if it doesn't exist. + if _DISK_CACHE is None: + _DISK_CACHE = _create_global_cache_backend(cache_type) + global_cache = _DISK_CACHE + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + else: + # Build a one-off cache using tag. + global_cache = _create_global_cache_backend(cache_type, tag) + return global_cache + + +def set_global_cache(cache_type: str, cache_backend: joblib.Memory) -> None: + """ + Set global cache by cache type. + + :param cache_type: type of a cache + :param cache_backend: caching backend + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + global _MEMORY_CACHE + global _DISK_CACHE + if cache_type == "mem": + _MEMORY_CACHE = cache_backend + elif cache_type == "disk": + _DISK_CACHE = cache_backend + + +def clear_global_cache( + cache_type: str, tag: Optional[str] = None, destroy: bool = False +) -> None: + """ + Reset the global cache by cache type. + + :param cache_type: type of a cache. `None` to clear all the caches. + :param tag: optional unique tag of the cache, empty by default + :param destroy: remove physical directory + """ + if _TRACE: + _LOG.trace("") + if cache_type == "all": + for cache_type_tmp in _get_cache_types(): + clear_global_cache(cache_type_tmp, tag=tag, destroy=destroy) + return + _dassert_is_valid_cache_type(cache_type) + # Clear and / or destroy the cache `cache_type` with the given `tag`. + cache_path = _get_global_cache_path(cache_type, tag) + if not _IS_CLEAR_CACHE_ENABLED: + hdbg.dfatal(f"Trying to delete cache '{cache_path}'") + description = f"global {cache_type}" + try: + # TODO(ShaopengZ): in some test run outside CK infra, the + # _get_cache_size() hangs. + info_before = _get_cache_size(cache_path, description) + except ValueError: + _LOG.warning("Cache has already been deleted by another process.") + return + _LOG.info("Before clear_global_cache: %s", info_before) + _LOG.warning("Resetting 'global %s' cache '%s'", cache_type, cache_path) + if hs3.is_s3_path(cache_path): + # For now we only allow to delete caches under the unit test path. + _, abs_path = hs3.split_path(cache_path) + hdbg.dassert( + abs_path.startswith("/tmp/cache.unit_test/"), + "The path '%s' is not valid", + abs_path, + ) + if destroy: + _LOG.warning("Destroying '%s' ...", cache_path) + hio.delete_dir(cache_path) + else: + cache_backend = get_global_cache(cache_type, tag) + try: + cache_backend.clear(warn=True) + except FileNotFoundError as e: + # A race condition can cause: + # FileNotFoundError: [Errno 2] No such file or directory: '/app/tmp.cache.disk/joblib' + _LOG.error("Caught %s: continuing", str(e)) + # Report stats before and after. + try: + info_after = _get_cache_size(cache_path, description) + except ValueError: + _LOG.warning("Cache has already been deleted by another process.") + return + _LOG.info("After clear_global_cache: %s", info_after) + + +# ############################################################################# +# CachedValueException +# ############################################################################# + + +class CachedValueException(RuntimeError): + """ + A cached function is run for a value present in the cache. + + This exception is thrown when the `check_only_if_present` mode is + used. + """ + + +# ############################################################################# +# NotCachedValueException +# ############################################################################# + + +class NotCachedValueException(RuntimeError): + """ + A cached function is run for a value not present in the cache. + + This exception is thrown when the `enable_read_only` mode is used. + """ + + +# ############################################################################# +# _Cached +# ############################################################################# + + +class _Cached: + # pylint: disable=protected-access + """ + Implement a cache in memory and disk for a function. + + If the function value was not cached either in memory or on disk, the function + `f()` is executed and the value is stored in both caches for future calls. + + This class uses 2 levels of caching: + - memory cache: useful for caching across multiple executions of a function in + a process or in notebooks without resetting the state + - disk cache: useful for retrieving the state among different executions of a + process or when a notebook is reset + """ + + def _create_function_memory_cache(self) -> joblib.Memory: + """ + Initialize Joblib object storing a memory cache for this function. + """ + if _TRACE: + _LOG.trace("") + _LOG.debug("Create memory cache") + # For memory always use the global cache. + cache_type = "mem" + memory_cache = get_global_cache(cache_type, self._tag) + # Get the Joblib object corresponding to the cached function. + return memory_cache.cache(self._func) + + def _create_function_disk_cache( + self, + ) -> Tuple[joblib.Memory, joblib.memory.MemorizedFunc]: + """ + Initialize Joblib object storing a disk cache for this function. + """ + if _TRACE: + _LOG.trace("") + if self.has_function_cache(): + hdbg.dassert( + not self._use_mem_cache, + "When using function cache the memory cache needs to be disabled", + ) + # Create a function-specific cache. + memory_kwargs: Dict[str, Any] = { + "verbose": 0, + "compress": True, + } + if hs3.is_s3_path(self._disk_cache_path): + import helpers.hjoblib as hjoblib + + # Register the S3 backend. + hjoblib.register_s3fs_store_backend() + s3fs = hs3.get_s3fs(self._aws_profile) + bucket, path = hs3.split_path(self._disk_cache_path) + # Remove the initial `/` from the path that makes the path + # absolute, since `Joblib.Memory` wants a path relative to the + # bucket. + hdbg.dassert( + path.startswith("/"), + "The path should be absolute instead of %s", + path, + ) + path = path[1:] + memory_kwargs.update( + { + "backend": "s3", + "backend_options": {"s3fs": s3fs, "bucket": bucket}, + } + ) + else: + path = self._disk_cache_path + _LOG.debug("path='%s'\nmemory_kwargs=\n%s", path, str(memory_kwargs)) + disk_cache = joblib.Memory(path, **memory_kwargs) + else: + # Use the global cache. + cache_type = "disk" + disk_cache = get_global_cache(cache_type, self._tag) + # Get the Joblib object corresponding to the cached function. + disk_cached_func = disk_cache.cache(self._func) + return disk_cache, disk_cached_func + # + + # /////////////////////////////////////////////////////////////////////////// + + def _reset_cache_tracing(self) -> None: + """ + Reset the values used to track which cache we are hitting when + executing the cached function. + """ + if _TRACE: + _LOG.trace("") + # The reset values depend on which caches are enabled. + self._last_used_disk_cache = self._use_disk_cache + self._last_used_mem_cache = self._use_mem_cache + + # TODO(gp): Either allow users to initialize `mem_cache_path` here or with + # `set_function_cache_path()` but not both code paths. It's unclear which option + # is better. On the one side `set_function_cache_path()` is more explicit, but + # it can't be changed. On the other side the wrapper needs to be initialized in + # one shot. + def __init__( + self, + func: Callable, + *, + use_mem_cache: bool = True, + use_disk_cache: bool = True, + verbose: bool = False, + tag: Optional[str] = None, + disk_cache_path: Optional[str] = None, + aws_profile: Optional[str] = "am", + ): + """ + Construct the class. + + :param func: function to cache + :param use_mem_cache, use_disk_cache: whether we allow memory and disk caching + :param verbose: print high-level information about the cache + behavior, e.g., + - whether a function was cached or not + - from which level the data was retrieved + - the execution time + - the amount of data retrieved + :param tag: a tag added to the global cache path to make it specific (e.g., + when running unit tests we want to use a different cache) + :param disk_cache_path: path of the function-specific cache + :param aws_profile: the AWS profile to use in case of S3 backend + """ + # Make the class have the same attributes (e.g., `__name__`, `__doc__`, + # `__dict__`) as the called function. + functools.update_wrapper(self, func) + if _TRACE: + _LOG.trace("") + # Save interface parameters. + hdbg.dassert_callable(func) + self._func = func + # TODO(gp): We should use memory cache only inside Jupyter notebooks. + self._use_mem_cache = use_mem_cache + self._use_disk_cache = use_disk_cache + self._is_verbose = verbose + self._tag = tag + self._disk_cache_path = disk_cache_path + self._aws_profile = aws_profile + # + self._reset_cache_tracing() + # Create the memory and disk cache objects for this function. + # TODO(gp): We might simplify the code by using a dict instead of 2 variables. + # Store the Joblib memory cache object for this function. + self._memory_cached_func = self._create_function_memory_cache() + # Store the Joblib memory object and the Joblib memory cache object for + # this function. + ( + self._disk_cache, + self._disk_cached_func, + ) = self._create_function_disk_cache() + # Enable a mode where an exception `NotCachedValueException` is thrown if + # the value is not in the cache. + self._enable_read_only = False + # Enable a mode where an exception `NotCachedValueException` is thrown if + # the value is in the cache, instead of accessing the value. + self._check_only_if_present = False + + def get_function_cache_info(self, add_banner: bool = False) -> str: + """ + Return info about the caching properties for this function. + """ + if _TRACE: + _LOG.trace("") + txt = [] + if add_banner: + txt.append(hprint.frame("get_global_cache_info()", char1="<")) + has_func_cache = self.has_function_cache() + txt.append(f"has function-specific cache={has_func_cache}") + if has_func_cache: + # Function-specific cache: print the paths of the local cache. + cache_type = "disk" + txt.append(f"local {cache_type} cache path={self._disk_cache_path}") + txt = "\n".join(txt) + return txt + + def get_last_cache_accessed(self) -> str: + """ + Get the cache used in the latest call of the wrapped function. + + :return: type of cache used in the last call + """ + if _TRACE: + _LOG.trace("") + if self._last_used_mem_cache: + ret = "mem" + elif self._last_used_disk_cache: + # If the disk cache was used, then the memory cache should not been used. + hdbg.dassert(not self._last_used_mem_cache) + ret = "disk" + else: + ret = "no_cache" + return ret + + def enable_read_only(self, val: bool) -> None: + """ + If set to True, the cached function can only read from the cache but + not execute for new values. + + Otherwise a `NotCachedValueException` is thrown. + """ + if _TRACE: + _LOG.trace("") + _LOG.warning( + "Setting enable_read_only to %s -> %s", self._enable_read_only, val + ) + self._enable_read_only = val + + def enable_check_only_if_present(self, val: bool) -> None: + """ + If set to True, the cached function a `CachedValueException` is thrown + if a function invocation was cached, instead of executing it. + + This can be used to check if a value was already cached without + triggering retrieving the value from the cache, e.g., when + probing the content of the cache. + """ + _LOG.warning( + "Setting check_only_if_present to %s -> %s", + self._check_only_if_present, + val, + ) + self._check_only_if_present = val + + def _get_memorized_result(self, cache_type: str) -> joblib.MemorizedResult: + """ + Get the instance of a cache by type. + + From https://github.com/joblib/joblib/blob/master/joblib/memory.py + A `MemorizedResult` is an object representing a cached value + + :param cache_type: type of a cache + :return: instance of the Joblib cache + """ + if _TRACE: + _LOG.trace("") + _dassert_is_valid_cache_type(cache_type) + if cache_type == "mem": + memorized_result = self._memory_cached_func + elif cache_type == "disk": + memorized_result = self._disk_cached_func + _LOG.debug("memorized_result=%s", memorized_result) + return memorized_result + + def _get_function_specific_code_path(self) -> str: + if _TRACE: + _LOG.trace("") + # Get the store backend. + cache_type = "disk" + memorized_result = self._get_memorized_result(cache_type) + store_backend = memorized_result.store_backend + # Get the function id (which is the full path). + func_id = jmemor._build_func_identifier(self._func) + # Assemble the path. + func_path = os.path.join(store_backend.location, func_id, "func_code.py") + _LOG.debug("func_path='%s'", func_path) + hdbg.dassert( + store_backend._item_exists(func_path), "Can't find '%s'", func_path + ) + return func_path + + def update_func_code_without_invalidating_cache(self) -> None: + """ + Update the Python code stored in the cache. + + This is used when we make changes to the cached function but we don't want + to invalidate the cache. + + NOTE: here the caller must guarantee that the new function yields exactly + the same results than the previous ones. Use carefully. + """ + if _TRACE: + _LOG.trace("") + hdbg.dassert( + self.has_function_cache(), + "This is used only for function-specific caches", + ) + # From `store_cached_func_code` in + # https://github.com/joblib/joblib/tree/master/joblib/_store_backends.py + func_path = self._get_function_specific_code_path() + # Archive old code. + new_func_path = ( + func_path + "." + hdateti.get_current_timestamp_as_string(tz="ET") + ) + _LOG.debug("new_func_path='%s'", new_func_path) + # Get the store backend. + cache_type = "disk" + memorized_result = self._get_memorized_result(cache_type) + store_backend = memorized_result.store_backend + hdbg.dassert( + not store_backend._item_exists(new_func_path), + "'%s' already exists", + new_func_path, + ) + store_backend._move_item(func_path, new_func_path) + # Write out function code to the cache. + func_code, _, first_line = jfunci.get_func_code(memorized_result.func) + memorized_result._write_func_code(func_code, first_line) + _LOG.debug("Updated func_path='%s'", func_path) + + # /////////////////////////////////////////////////////////////////////////// + # Function-specific cache. + # /////////////////////////////////////////////////////////////////////////// + + def has_function_cache(self) -> bool: + """ + Return whether this function has a function-specific cache or uses the + global cache. + """ + if _TRACE: + _LOG.trace("") + has_func_cache = self._disk_cache_path is not None + return has_func_cache + + # TODO(gp): Can we reuse the same code for `clear_function_cache` as above? + def clear_function_cache(self, destroy: bool = False) -> None: + """ + Clear a function-specific cache. + """ + if _TRACE: + _LOG.trace("") + hdbg.dassert( + self.has_function_cache(), + "This function has no function-specific cache", + ) + # Get the path for the disk cache. + cache_path = self._disk_cache_path + hdbg.dassert_is_not(cache_path, None) + cache_path = cast(str, cache_path) + if not _IS_CLEAR_CACHE_ENABLED: + hdbg.dfatal(f"Trying to delete function cache '{cache_path}'") + # Collect info before. + cache_type = "disk" + description = f"function {cache_type}" + info_before = _get_cache_size(cache_path, description) + _LOG.info("Before clear_function_cache: %s", info_before) + # Clear / destroy the cache. + _LOG.warning( + "Resetting '%s' cache for function '%s' in dir '%s'", + cache_type, + self._func.__name__, + cache_path, + ) + if hs3.is_s3_path(cache_path): + # For now we only allow to delete caches under the unit test path. + _, abs_path = hs3.split_path(cache_path) + hdbg.dassert( + abs_path.startswith("/tmp/"), + "The path '%s' is not valid", + abs_path, + ) + if destroy: + _LOG.warning("Destroying '%s' ...", cache_path) + hio.delete_dir(cache_path) + else: + self._disk_cache.clear() + # Print stats. + info_after = _get_cache_size(cache_path, description) + _LOG.info("After clear_function_cache: %s", info_after) + + def set_function_cache_path(self, cache_path: Optional[str]) -> None: + """ + Set the path for the function-specific cache for a cache type. + + :param cache_path: cache directory or `None` to use global cache + """ + if _TRACE: + _LOG.trace("") + if cache_path: + hdbg.dassert_dir_exists(cache_path) + # We need to disable the memory cache. + if cache_path: + self._use_mem_cache = False + else: + self._use_mem_cache = True + self._disk_cache_path = cache_path + ( + self._disk_cache, + self._disk_cached_func, + ) = self._create_function_disk_cache() + + # /////////////////////////////////////////////////////////////////////////// + + # TODO(gp): We should use the actual stored dir. + def _get_cache_dir(self, cache_type: str, tag: Optional[str]) -> str: + """ + Return the dir of the cache corresponding to `cache_type` and `tag`. + """ + if _TRACE: + _LOG.trace("") + if cache_type == "no_cache": + return "no_cache" + if self.has_function_cache(): + hdbg.dassert_eq(cache_type, "disk") + ret = self._disk_cache_path + else: + ret = _get_global_cache_path(cache_type, tag=tag) + ret = cast(str, ret) + return ret + + def _get_identifiers( + self, cache_type: str, *args: Any, **kwargs: Any + ) -> Tuple[str, str]: + """ + Get digests for current function and arguments to be used in cache. + + :param cache_type: type of a cache + :param args: original arguments of the call + :param kwargs: original kw-arguments of the call + :return: digests of the function and current arguments + """ + memorized_result = self._get_memorized_result(cache_type) + _LOG.debug("memorized_result=%s", memorized_result) + hdbg.dassert_is_not( + memorized_result, + None, + "Cache backend not initialized for %s", + cache_type, + ) + # This is needed for joblib >= 1.4.2. + func_id = memorized_result.func_id + args_id = memorized_result._get_args_id(*args, **kwargs) + _LOG.debug("func_id=%s args_id=%s", func_id, args_id) + return func_id, args_id + + def _has_cached_version( + self, cache_type: str, func_id: str, args_id: str + ) -> bool: + """ + Check if a cache contains an entry for a corresponding function and + arguments digests, and that function source has not changed. + + :param cache_type: type of a cache + :param func_id: digest of the function obtained from _get_identifiers + :param args_id: digest of arguments obtained from _get_identifiers + :return: whether there is an entry in a cache + """ + if _TRACE: + _LOG.trace("") + memorized_result = self._get_memorized_result(cache_type) + has_cached_version = memorized_result.store_backend.contains_item( + [func_id, args_id] + ) + _LOG.debug("has_cached_version=%s", has_cached_version) + if has_cached_version: + # We must check that the source of the function is the same, otherwise, + # cache tracing will not be correct. + # First, try faster check via joblib hash. + if self._func in jmemor._FUNCTION_HASHES: + func_hash = memorized_result._hash_func() + if func_hash == jmemor._FUNCTION_HASHES[self._func]: + return True + # Otherwise, check the the source of the function is still the same. + func_code, _, _ = jmemor.get_func_code(self._func) + old_func_code_cache = ( + memorized_result.store_backend.get_cached_func_code([func_id]) + ) + old_func_code, _ = jmemor.extract_first_line(old_func_code_cache) + if func_code == old_func_code: + return True + return False + + def _store_cached_version( + self, cache_type: str, func_id: str, args_id: str, obj: Any + ) -> None: + """ + Store returned value from the intrinsic function in the cache. + + :param cache_type: type of a cache + :param func_id: digest of the function obtained from `_get_identifiers()` + :param args_id: digest of arguments obtained from `_get_identifiers()` + :param obj: return value of the intrinsic function + """ + if _TRACE: + _LOG.trace("") + # This corresponds to + # /venv/lib/python3.8/site-packages/joblib/memory.py + # __call__ + if self._enable_read_only: + raise NotCachedValueException + memorized_result = self._get_memorized_result(cache_type) + # Write out function code to the cache. + func_code, _, first_line = jfunci.get_func_code(memorized_result.func) + memorized_result._write_func_code(func_code, first_line) + # Store the returned value into the cache. + memorized_result.store_backend.dump_item([func_id, args_id], obj) + + def _execute_func_from_disk_cache(self, *args: Any, **kwargs: Any) -> Any: + if _TRACE: + _LOG.trace("") + func_info = ( + f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" + ) + # Get the function signature. + func_id, args_id = self._get_identifiers("disk", *args, **kwargs) + if self._has_cached_version("disk", func_id, args_id): + _LOG.debug("There is a disk cached version") + with htimer.TimedScope( + logging.INFO, "Loading cached version from disk" + ): + obj = self._disk_cached_func(*args, **kwargs) + if self._check_only_if_present: + raise CachedValueException(func_info) + else: + # INV: we didn't hit neither memory nor the disk cache. + self._last_used_disk_cache = False + # + _LOG.debug( + "%s: execute the intrinsic function", + func_info, + ) + # If the cache was read-only, then assert. + if self._enable_read_only: + msg = f"{func_info}: trying to execute" + raise NotCachedValueException(msg) + with htimer.TimedScope( + logging.INFO, "Updating cached version on disk" + ): + obj = self._disk_cached_func(*args, **kwargs) + # obj = self._execute_intrinsic_function(*args, **kwargs) + # The function was not cached in disk, so now we need to update the + # memory cache. + # self._store_cached_version("disk", func_id, args_id, obj) + return obj + + def _execute_intrinsic_function(self, *args: Any, **kwargs: Any) -> Any: + if _TRACE: + _LOG.trace("") + with htimer.TimedScope(logging.INFO, "Executing intrinsic function"): + func_info = ( + f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" + ) + _LOG.debug("%s: execute intrinsic function", func_info) + if self._enable_read_only: + msg = f"{func_info}: trying to execute" + raise NotCachedValueException(msg) + obj = self._func(*args, **kwargs) + return obj + + def _execute_func_from_mem_cache(self, *args: Any, **kwargs: Any) -> Any: + """ + Execute the function from memory cache and if not possible try the + lower cache levels. + """ + if _TRACE: + _LOG.trace("") + func_info = ( + f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" + ) + # Get the function signature. + func_id, args_id = self._get_identifiers("mem", *args, **kwargs) + if self._has_cached_version("mem", func_id, args_id): + _LOG.debug("There is a mem cached version") + if self._check_only_if_present: + raise CachedValueException(func_info) + # The function execution was cached in the mem cache. + with htimer.TimedScope( + logging.INFO, "Loading cached version from memory" + ): + obj = self._memory_cached_func(*args, **kwargs) + else: + # INV: we know that we didn't hit the memory cache, but we don't know + # about the disk cache. + _LOG.debug("There is not a mem cached version") + self._last_used_mem_cache = False + # + if self._use_disk_cache: + # Try the disk cache. + _LOG.debug( + "Trying to retrieve from disk", + ) + obj = self._execute_func_from_disk_cache(*args, **kwargs) + else: + _LOG.warning("Skipping disk cache") + obj = self._execute_intrinsic_function(*args, **kwargs) + # The function was not cached in memory, so now we need to update the + # memory cache. + self._store_cached_version("mem", func_id, args_id, obj) + return obj + + def _execute_func(self, *args: Any, **kwargs: Any) -> Any: + if _TRACE: + _LOG.trace("") + func_info = ( + f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" + ) + _LOG.debug( + "%s: use_mem_cache=%s use_disk_cache=%s", + func_info, + self._use_mem_cache, + self._use_disk_cache, + ) + if self._use_mem_cache: + _LOG.debug("Trying to retrieve from memory") + obj = self._execute_func_from_mem_cache(*args, **kwargs) + else: + if self.has_function_cache(): + # For function-specific cache, skipping the memory cache is the + # normal behavior. + _LOG.debug( + "Function has function-specific cache: skipping memory cache" + ) + else: + _LOG.warning("Skipping memory cache") + self._last_used_mem_cache = False + if self._use_disk_cache: + obj = self._execute_func_from_disk_cache(*args, **kwargs) + else: + _LOG.warning("Skipping disk cache") + self._last_used_disk_cache = False + obj = self._execute_intrinsic_function(*args, **kwargs) + return obj + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + """ + Execute the wrapped function using the caches, if needed. + + :return: object returned by the wrapped function + """ + if _TRACE: + _LOG.trace("") + perf_counter_start: float + if self._is_verbose: + perf_counter_start = time.perf_counter() + # Execute the cached function. + if not is_caching_enabled(): + # No caching is allowed: execute the function. + _LOG.warning("All caching is disabled") + self._last_used_disk_cache = self._last_used_mem_cache = False + obj = self._func(*args, **kwargs) + else: + # Caching is allowed. + self._reset_cache_tracing() + obj = self._execute_func(*args, **kwargs) + _LOG.debug( + "%s: executed from '%s'", + self._func.__name__, + self.get_last_cache_accessed(), + ) + # TODO(gp): Not sure making a deep copy is a good idea. In the end, + # the client should not modify a cached value. + obj = copy.deepcopy(obj) + # Print caching info. + if self._is_verbose: + # Get time. + elapsed_time = time.perf_counter() - perf_counter_start + # Get memory. + # TODO(gp): This is very slow. + # obj_size = hintros.get_size_in_bytes(obj) + # obj_size_as_str = hintros.format_size(obj_size) + obj_size_as_str = "nan" + last_cache = self.get_last_cache_accessed() + cache_dir = self._get_cache_dir(last_cache, self._tag) + _LOG.info( + " --> Cache data for '%s' from '%s' cache " + "(size=%s, time=%.2f s, tag=%s, loc=%s)", + self._func.__name__, + last_cache, + obj_size_as_str, + elapsed_time, + self._tag, + cache_dir, + ) + return obj + + +# ############################################################################# +# Decorator +# ############################################################################# + + +def cache( + use_mem_cache: bool = True, + use_disk_cache: bool = True, + set_verbose_mode: bool = False, + tag: Optional[str] = None, + disk_cache_path: Optional[str] = None, + aws_profile: Optional[str] = None, +) -> Union[Callable, _Cached]: + """ + Decorate a function with a cache. + + The parameters are the same as `hcache._Cached`. + + Usage examples: + ``` + import helpers.hcache as hcache + + @hcache.cache() + def add(x: int, y: int) -> int: + return x + y + + @hcache.cache(use_mem_cache=False) + def add(x: int, y: int) -> int: + return x + y + ``` + """ + + def wrapper(func: Callable) -> _Cached: + return _Cached( + func, + use_mem_cache=use_mem_cache, + use_disk_cache=use_disk_cache, + verbose=set_verbose_mode, + tag=tag, + disk_cache_path=disk_cache_path, + aws_profile=aws_profile, + ) + + return wrapper + + +# ############################################################################# + +# Clean up the memory cache on-exit. +# TODO(gp): Add another function and make it silent. +atexit.register(clear_global_cache, cache_type="mem", destroy="true") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py new file mode 100644 index 000000000..0b3804436 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py @@ -0,0 +1,1963 @@ +""" +Detailed documentation at. + +- //helpers/docs/tools/helpers/all.hcache_simple.explanation.md +- //helpers/notebooks/hcache_simple.tutorial.ipynb + +Import as: + +import helpers.hcache_simple as hcacsimp +""" + +import functools +import glob +import json +import logging +import os +import pickle +import re +from typing import Any, Callable, Dict, List, Optional, Union, cast + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# Disable tracing for production code. +_LOG.trace = lambda *args, **kwargs: None +# _LOG.trace = _LOG.debug + +# ############################################################################# +# Memory cache. +# ############################################################################# + +# Type for the cache of a single function: key -> value properties. E.g., +# ``` +# { +# "{\"args\": [4], \"kwargs\": {}}": 16 +# } +# ``` +_FunctionCacheType = Dict[str, Any] + +# Basic type for caching data: func_name -> key -> value properties. E.g., +# ``` +# { +# "slow_square": { +# "{\"args\": [4], \"kwargs\": {}}": 16 +# } +# } +# ``` +_CacheType = Dict[str, _FunctionCacheType] + +# Type for cache property storage: func_name -> property_name -> property_value. E.g., +# ``` +# { +# "slow_square": { +# "type": "json", +# "cache_dir": "/tmp/cache", +# "write_through": True +# } +# } +# ``` +_CachePropertyType = Dict[str, Dict[str, Any]] + +# Create global variable for the memory cache. +if "_CACHE" not in globals(): + _LOG.trace("Creating _CACHE") + _CACHE: _CacheType = {} + +# Process-wide default `cache_mode` applied to every `@simple_cache` function +# when no explicit `cache_mode` is passed at the call site. Used by CLI scripts +# to flip all cached functions into refresh/disable/hit-or-abort mode from a +# single switch (see `hparser.add_cache_control_arg`). +_VALID_CACHE_MODES = ("REFRESH_CACHE", "DISABLE_CACHE", "HIT_CACHE_OR_ABORT") +_GLOBAL_CACHE_MODE: Optional[str] = None + + +def set_global_cache_mode(mode: Optional[str]) -> None: + """ + Set the process-wide default `cache_mode`. + + :param mode: one of `REFRESH_CACHE`, `DISABLE_CACHE`, + `HIT_CACHE_OR_ABORT`, or `None` to clear + """ + global _GLOBAL_CACHE_MODE + if mode is not None: + hdbg.dassert_in(mode, _VALID_CACHE_MODES) + _GLOBAL_CACHE_MODE = mode + + +def get_global_cache_mode() -> Optional[str]: + """ + Return the process-wide default `cache_mode`, or `None` if unset. + """ + return _GLOBAL_CACHE_MODE + + +# When enabled, every `@simple_cache` call emits a WARNING describing whether +# the result came from the cache, was computed on miss, or was recomputed +# because of an active `cache_mode`. +_CACHE_DEBUG: bool = False + + +def set_cache_debug(enabled: bool) -> None: + """ + Enable or disable process-wide cache-decision logging at WARNING level. + """ + global _CACHE_DEBUG + hdbg.dassert_isinstance(enabled, bool) + _CACHE_DEBUG = enabled + + +def get_cache_debug() -> bool: + """ + Return True if cache-decision logging is enabled. + """ + return _CACHE_DEBUG + + +def sanity_check_function_cache( + func_cache_data: _FunctionCacheType, *, assert_on_empty: bool = True +) -> None: + """ + Sanity check the function cache data. + + :param func_cache_data: The function cache data to check. + :param assert_on_empty: If True, assert that the function cache data + is not empty. + """ + hdbg.dassert_isinstance(func_cache_data, dict) + if assert_on_empty: + hdbg.dassert_ne(len(func_cache_data), 0, "Function data is empty") + for cache_key, cached_value in func_cache_data.items(): + hdbg.dassert_isinstance(cache_key, str) + hdbg.dassert_ne(cache_key, "", "Cache key is empty") + # cached_value can be any type, so no type check needed. + _ = cached_value + + +def sanity_check_cache( + cache_data: _CacheType, *, assert_on_empty: bool = True +) -> None: + """ + Sanity check the cache data. + + :param cache_data: The cache data to check. + :param assert_on_empty: If True, assert that the cache data is not + empty. + """ + hdbg.dassert_isinstance(cache_data, dict) + if assert_on_empty: + hdbg.dassert_ne(len(cache_data), 0, "Cache data is empty") + for func_name, func_cache_data in cache_data.items(): + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_ne(func_name, "", "Function name is empty") + sanity_check_function_cache( + func_cache_data, assert_on_empty=assert_on_empty + ) + + +def cache_data_to_str(cache_data: _CacheType) -> str: + """ + Convert cache data to a human-readable string. + + :param cache_data: The cache data to convert. + :return: A string representation of the cache data. + """ + txt = [] + txt.append(hprint.frame("Cache data")) + hdbg.dassert_isinstance(cache_data, dict) + for func_name, func_data in cache_data.items(): + txt.append(f"# func_name={func_name}") + hdbg.dassert_isinstance(func_data, dict) + for cache_key, cached_value in func_data.items(): + txt.append(f" cache_key={cache_key} cached_value={cached_value}") + result = "\n".join(txt) + return result + + +# ############################################################################# +# Cache properties. +# ############################################################################# + +# There are several ways to control caching behavior: +# - By passing special control parameters to the decorated function: +# `force_refresh`, `abort_on_cache_miss`, `report_on_cache_miss`, `cache_mode` +# - By setting cache properties: +# - E.g., set_cache_property("func_name", "write_through", False) + +# - There are two types of properties: +# - `User Properties`: Configurable by the user to alter caching behavior. +# E.g., +# - `abort_on_cache_miss`: Whether to raise an error if a cache miss occurs +# - `report_on_cache_miss`: Whether to return a special value ("_cache_miss_") +# on a cache miss +# - `force_refresh`: Whether to bypass the cache and refresh the value +# - `System Properties`: +# - cache type (e.g., "json" or "pickle") +# - write through (e.g., True or False) +# - exclude keys (e.g., ["password", "api_key"]) +# - per-function cache location (cache_dir, cache_prefix) +# - per-function S3 configuration (s3_bucket, s3_prefix, aws_profile, auto_sync_s3) + +_SYSTEM_PROPERTIES = [ + "type", + "write_through", + "exclude_keys", + "cache_dir", + "cache_prefix", + "s3_bucket", + "s3_prefix", + "aws_profile", + "auto_sync_s3", +] + + +def get_main_cache_dir() -> str: + """ + Get the main cache directory (git root). + + :return: The absolute path to the main cache directory. + """ + git_dir = hgit.find_git_root() + cache_dir = os.path.abspath(git_dir) + return cache_dir + + +# Create global variable for the cache directory. +if "_CACHE_DIR" not in globals(): + _LOG.trace("Creating _CACHE_DIR") + _CACHE_DIR = get_main_cache_dir() + + +def set_cache_dir(cache_dir: str) -> None: + """ + Set the cache directory. + """ + global _CACHE_DIR + hdbg.dassert_isinstance(cache_dir, str) + _CACHE_DIR = os.path.abspath(cache_dir) + hio.create_dir(_CACHE_DIR, incremental=True) + _LOG.trace("Setting _CACHE_DIR to %s", _CACHE_DIR) + + +def get_cache_dir() -> str: + """ + Get the cache directory. + """ + return _CACHE_DIR + + +# Create global variable for the cache file prefix. +if "_CACHE_FILE_PREFIX" not in globals(): + _LOG.trace("Creating _CACHE_FILE_PREFIX") + _CACHE_FILE_PREFIX = "tmp.cache_simple" + + +def set_cache_file_prefix(prefix: str) -> None: + """ + Set the cache file prefix. + + :param prefix: prefix to use for cache files + """ + global _CACHE_FILE_PREFIX + hdbg.dassert_isinstance(prefix, str) + hdbg.dassert_ne(prefix, "", "Cache file prefix cannot be empty") + if prefix.endswith("."): + _LOG.warning( + "Prefix '%s' ends with '.' - cache files will have '..' in names", + prefix, + ) + _CACHE_FILE_PREFIX = prefix + _LOG.trace("Setting _CACHE_FILE_PREFIX to %s", _CACHE_FILE_PREFIX) + + +def get_cache_file_prefix() -> str: + """ + Get the cache file prefix. + + :return: cache file prefix + """ + return _CACHE_FILE_PREFIX + + +# ############################################################################# +# S3 cache configuration. +# ############################################################################# + +# Create global variable for S3 bucket. +if "_S3_BUCKET" not in globals(): + _LOG.trace("Creating _S3_BUCKET") + _S3_BUCKET: Optional[str] = None + +# Create global variable for S3 prefix. +if "_S3_PREFIX" not in globals(): + _LOG.trace("Creating _S3_PREFIX") + _S3_PREFIX: str = "cache" + +# Create global variable for AWS profile. +if "_AWS_PROFILE" not in globals(): + _LOG.trace("Creating _AWS_PROFILE") + _AWS_PROFILE: str = "ck" + +# Create global variable to track S3 auto-pull attempts. +if "_S3_AUTO_PULL_ATTEMPTED" not in globals(): + _LOG.trace("Creating _S3_AUTO_PULL_ATTEMPTED") + _S3_AUTO_PULL_ATTEMPTED: set = set() + + +def set_s3_bucket(bucket: str) -> None: + """ + Set the S3 bucket for cache storage. + + :param bucket: S3 bucket name (e.g., "my-bucket" or "s3://my- + bucket") + """ + global _S3_BUCKET + hdbg.dassert_isinstance(bucket, str) + hdbg.dassert_ne(bucket, "", "S3 bucket cannot be empty") + # Keep s3:// prefix if present, otherwise add it. + if not bucket.startswith("s3://"): + bucket = f"s3://{bucket}" + _S3_BUCKET = bucket + _LOG.trace("Setting _S3_BUCKET to %s", _S3_BUCKET) + + +def get_s3_bucket() -> Optional[str]: + """ + Get the S3 bucket for cache storage. + + :return: S3 bucket name with s3:// prefix, or None if not configured + """ + return _S3_BUCKET + + +def set_s3_prefix(prefix: str) -> None: + """ + Set the S3 prefix for cache files. + + :param prefix: S3 prefix path (e.g., "cache" or "app/cache") + """ + global _S3_PREFIX + hdbg.dassert_isinstance(prefix, str) + # Remove leading/trailing slashes. + prefix = prefix.strip("/") + _S3_PREFIX = prefix + _LOG.trace("Setting _S3_PREFIX to %s", _S3_PREFIX) + + +def get_s3_prefix() -> str: + """ + Get the S3 prefix for cache files. + + :return: S3 prefix path + """ + return _S3_PREFIX + + +def set_aws_profile(profile: str) -> None: + """ + Set the AWS profile for S3 access. + + :param profile: AWS profile name (e.g., "ck", "csfy") + """ + global _AWS_PROFILE + hdbg.dassert_isinstance(profile, str) + hdbg.dassert_ne(profile, "", "AWS profile cannot be empty") + _AWS_PROFILE = profile + _LOG.trace("Setting _AWS_PROFILE to %s", _AWS_PROFILE) + + +def get_aws_profile() -> str: + """ + Get the AWS profile for S3 access. + + :return: AWS profile name + """ + return _AWS_PROFILE + + +def get_cache_property_file() -> str: + """ + Get the cache property file name. + + :return: The cache property file name. + """ + prefix = get_cache_file_prefix() + val = os.path.join(get_cache_dir(), f"{prefix}_property.pkl") + return val + + +def _get_initial_cache_property() -> _CachePropertyType: + """ + Get the initial cache property from disk or create an empty one. + + :return: A dictionary containing cache properties. + """ + file_name_ = get_cache_property_file() + if os.path.exists(file_name_): + _LOG.trace("Loading from %s", file_name_) + # TODO(gp): Use _load_data_from_file, if possible. + with open(file_name_, "rb") as file: + val = pickle.load(file) + else: + # func_name -> property_name -> value. + val = {} + val = cast(_CachePropertyType, val) + return val + + +# Create global variables for the cache properties. +if "_CACHE_PROPERTY" not in globals(): + _LOG.trace("Creating _CACHE_PROPERTY") + _CACHE_PROPERTY: _CachePropertyType = _get_initial_cache_property() + + +def _check_valid_cache_property(property_name: str) -> None: + """ + Verify that a cache property name is valid for the given type. + + :param property_name: The property name to validate. + """ + _LOG.trace(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(property_name, str) + valid_properties = [ + # Abort if there is a cache miss. This is used to make sure everything + # is cached. + "abort_on_cache_miss", + # Report if there is a cache miss and return `_cache_miss_` instead of + # accessing the real value. + "report_on_cache_miss", + # Force to refresh the value. + "force_refresh", + # TODO(gp): "force_refresh_once" + # json or pickle cache type. + "type", + # Write-through mode: flush cache to disk after each update. + "write_through", + # List of keys to exclude from cache key generation. + "exclude_keys", + # Per-function cache directory. + "cache_dir", + # Per-function cache file prefix. + "cache_prefix", + # Per-function S3 bucket. + "s3_bucket", + # Per-function S3 prefix. + "s3_prefix", + # Per-function AWS profile. + "aws_profile", + # Auto-sync to S3 after cache updates. + "auto_sync_s3", + ] + hdbg.dassert_in(property_name, valid_properties) + + +def _infer_cache_type_from_path(file_path: str) -> str: + """ + Infer cache type from file path extension. + + :param file_path: path to cache file (local or S3) + :return: inferred type ("pickle" or "json") + """ + if file_path.endswith(".pkl"): + out = "pickle" + elif file_path.endswith(".json"): + out = "json" + else: + # Default to json. + out = "json" + return out + + +def _save_func_cache_data_to_file( + file_name: str, + cache_type: Optional[str], + func_cache_data: _FunctionCacheType, +) -> None: + """ + Save the function cache data to a file. + + :param file_name: The name of the file. + :param func_cache_data: The function cache data to save. + """ + # Infer cache type from file extension if not set. + if cache_type is None: + cache_type = _infer_cache_type_from_path(file_name) + hio.create_enclosing_dir(file_name, incremental=True) + _LOG.trace("Saving to '%s'", file_name) + # Save data. + if cache_type == "pickle": + with open(file_name, "wb") as file: + pickle.dump(func_cache_data, file) + elif cache_type == "json": + with open(file_name, "w", encoding="utf-8") as file: + json.dump( + func_cache_data, + file, + indent=4, + sort_keys=True, + ensure_ascii=False, + ) + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + + +def set_cache_property(func_name: str, property_name: str, val: Any) -> None: + """ + Set a property for the cache of a given function name. + + :param func_name: The name of the function whose cache property is + to be set. + :param property_name: The name of the property to set. + :param val: The value to set for the property. + """ + _LOG.trace(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_isinstance(property_name, str) + _check_valid_cache_property(property_name) + # Assign value. + cache_property = _CACHE_PROPERTY + if func_name not in cache_property: + cache_property[func_name] = {} + dict_ = cache_property[func_name] + dict_[property_name] = val + # Update values on the disk. + file_name = get_cache_property_file() + _LOG.trace("Updating %s", file_name) + # Make sure the dict is well-formed. + for func_name_tmp in cache_property: + hdbg.dassert_isinstance(func_name_tmp, str) + _LOG.trace( + "func_name_tmp='%s' -> %s", + func_name_tmp, + cache_property[func_name_tmp], + ) + hio.create_enclosing_dir(file_name, incremental=True) + _save_func_cache_data_to_file(file_name, "pickle", cache_property) + + +def get_cache_property( + func_name: str, property_name: str +) -> Optional[Union[bool, Any]]: + """ + Get the value of a property for the cache of a given function name. + + :return: The property value, which can be of any type depending on + the property. Returns None if the property is not set (for + system properties), or False (for user properties). + """ + _LOG.trace(hprint.func_signature_to_str()) + _check_valid_cache_property(property_name) + # Read from in-memory property storage. + cache_property = _CACHE_PROPERTY + if property_name in _SYSTEM_PROPERTIES: + if func_name not in cache_property: + return None + value = cache_property[func_name].get(property_name) + else: + value = cache_property.get(func_name, {}).get(property_name, False) + return value + + +def reset_cache_property() -> None: + """ + Reset the cache property for the given type. + """ + file_name = get_cache_property_file() + _LOG.warning("Resetting %s", file_name) + # Empty the values. + global _CACHE_PROPERTY + cache_property = _CACHE_PROPERTY + # Empty the values excluding the system properties like `type` and + # `write_through`. + _LOG.trace("before cache_property=%s", cache_property) + # Iterate over a list of keys to avoid modifying the dictionary during iteration. + for func_name_tmp in list(cache_property.keys()): + # Only remove non-system properties from the function's property dict. + func_prop = cache_property[func_name_tmp] + for property_name_tmp in list(func_prop.keys()): + if property_name_tmp not in _SYSTEM_PROPERTIES: + del func_prop[property_name_tmp] + _LOG.trace("after cache_property=%s", cache_property) + # Update values on the disk. + _LOG.trace("Updating %s", file_name) + hio.create_enclosing_dir(file_name, incremental=True) + _save_func_cache_data_to_file(file_name, "pickle", cache_property) + + +# ############################################################################# +# Get cache. +# ############################################################################# + +# Functions to retrieve cache (both memory and disk). + + +def _get_valid_cache_prefixes() -> set: + """ + Get all valid cache file prefixes. + + :return: set of valid prefixes (global + per-function custom + prefixes) + """ + global_prefix = get_cache_file_prefix() + valid_prefixes = {global_prefix} + for func_name_tmp in _CACHE_PROPERTY: + func_prefix = get_cache_property(func_name_tmp, "cache_prefix") + if func_prefix: + valid_prefixes.add(func_prefix) + return valid_prefixes + + +def _extract_func_names_from_cache_files( + file_paths: List[str], valid_prefixes: set +) -> set: + """ + Extract function names from cache file paths. + + :param file_paths: list of file paths to process + :param valid_prefixes: set of valid cache prefixes to filter by + :return: set of function names + """ + func_names = set() + pattern = r"^(.+)\.([^\.]+)\.(?:json|pkl)$" + for file_path in file_paths: + base_name = os.path.basename(file_path) + match = re.match(pattern, base_name) + if match: + file_prefix = match.group(1) + # Only include if prefix is valid for this project. + if file_prefix in valid_prefixes: + func_name = match.group(2) + func_names.add(func_name) + return func_names + + +def get_cached_func_names(type_: str) -> List[str]: + """ + Retrieve the function names cached with the specified type. + + :param type_: the type of cache to retrieve: + - 'mem': memory cache only + - 'disk': disk cache only (includes global and custom local cache + directories) + - 's3': S3 cache only (includes global and custom S3 buckets) + - 'local': local caches (mem + disk) + - 'all': all caches (mem + disk + s3) + :return: names of functions cached with the specified type + """ + if type_ == "mem": + # Only include functions with non-empty cache dicts. + out = sorted([fn for fn in _CACHE.keys() if len(_CACHE[fn]) > 0]) + elif type_ == "disk": + all_func_names = set() + cache_dir = get_cache_dir() + # Collect all valid prefixes. + valid_prefixes = _get_valid_cache_prefixes() + # Search global cache directory. + disk_files = glob.glob(os.path.join(cache_dir, "*.json")) + disk_files += glob.glob(os.path.join(cache_dir, "*.pkl")) + property_file_name = os.path.basename(get_cache_property_file()) + # Filter out property file. + disk_files = [ + f for f in disk_files if os.path.basename(f) != property_file_name + ] + # Extract function names from disk files. + all_func_names.update( + _extract_func_names_from_cache_files(disk_files, valid_prefixes) + ) + # Search custom cache directories. + for func_name_tmp in _CACHE_PROPERTY: + func_cache_dir = get_cache_property(func_name_tmp, "cache_dir") + if func_cache_dir: + # Function has custom cache directory. + file_name = _get_cache_file_name(func_name_tmp) + if os.path.exists(file_name): + all_func_names.add(func_name_tmp) + out = sorted(all_func_names) + elif type_ == "s3": + all_func_names = set() + # Search global S3 bucket. + if _check_s3_configured(): + bucket = get_s3_bucket() + prefix = get_s3_prefix() + aws_profile = get_aws_profile() + func_names = _list_s3_cached_func_names(bucket, prefix, aws_profile) + all_func_names.update(set(func_names)) + # Search custom S3 buckets. + s3_configs = set() + for func_name_tmp in _CACHE_PROPERTY: + func_s3_bucket = get_cache_property(func_name_tmp, "s3_bucket") + if func_s3_bucket: + func_s3_prefix = get_cache_property(func_name_tmp, "s3_prefix") + if not func_s3_prefix: + func_s3_prefix = get_s3_prefix() + func_aws_profile = get_cache_property( + func_name_tmp, "aws_profile" + ) + if not func_aws_profile: + func_aws_profile = get_aws_profile() + config_key = ( + func_s3_bucket, + func_s3_prefix, + func_aws_profile, + ) + s3_configs.add(config_key) + # List files from each unique S3 bucket config. + for bucket, prefix, aws_profile in s3_configs: + func_names = _list_s3_cached_func_names(bucket, prefix, aws_profile) + all_func_names.update(set(func_names)) + out = sorted(all_func_names) + elif type_ == "local": + mem_func_names = get_cached_func_names("mem") + disk_func_names = get_cached_func_names("disk") + out = sorted(set(mem_func_names + disk_func_names)) + elif type_ == "all": + local_func_names = get_cached_func_names("local") + s3_func_names = get_cached_func_names("s3") + out = sorted(set(local_func_names + s3_func_names)) + else: + raise ValueError( + f"Invalid type '{type_}'. Valid types: 'mem', 'disk', 's3', " + "'local', 'all'" + ) + return out + + +def cache_property_to_str(func_name: str = "") -> str: + """ + Convert cache properties to a string representation. + + :param func_name: the name of the function whose cache properties + are to be converted + :return: a string representation of the cache properties. E.g., + ``` + # func_name=slow_square + type: json + write_through: False + exclude_keys: [] + ``` + """ + txt: List[str] = [] + if func_name == "": + func_names = get_cached_func_names("all") + for func_name_tmp in func_names: + txt.append(cache_property_to_str(func_name_tmp)) + result = "\n".join(txt) + return result + # + txt.append(f"# func_name={func_name}") + cache_property = _CACHE_PROPERTY + _LOG.trace("cache_property=%s", cache_property) + if func_name in cache_property: + for k, v in cache_property[func_name].items(): + txt.append(f"{k}: {v}") + result = "\n".join(txt) + return result + + +# ############################################################################# +# Cache performance. +# ############################################################################# + + +# Create global variable for the cache performance. +if "_CACHE_PERF" not in globals(): + _LOG.trace("Creating _CACHE_PERF") + # func_name -> perf properties (such as tot, hits, misses). + # Note: Values can be None when performance tracking is disabled. + _CACHE_PERF: Dict[str, Optional[Dict[str, int]]] = {} + + +def enable_cache_perf(func_name: str) -> None: + """ + Enable cache performance statistics for a given function. + """ + _CACHE_PERF[func_name] = {"tot": 0, "hits": 0, "misses": 0} + + +def disable_cache_perf(func_name: str = "") -> None: + """ + Disable cache performance statistics for a given function. + + If `func_name` is empty, disable cache performance statistics for all + functions. + """ + if func_name == "": + for func_name_tmp in get_cached_func_names("all"): + disable_cache_perf(func_name_tmp) + return + _CACHE_PERF[func_name] = None + + +def reset_cache_perf(func_name: str = "") -> None: + """ + Reset cache performance statistics for a given function. + """ + if func_name == "": + for func_name_tmp in get_cached_func_names("all"): + reset_cache_perf(func_name_tmp) + return + _CACHE_PERF[func_name] = {"tot": 0, "hits": 0, "misses": 0} + + +def get_cache_perf(func_name: str) -> Optional[Dict[str, int]]: + """ + Get the cache performance object for a given function. + """ + if func_name in _CACHE_PERF: + return _CACHE_PERF[func_name] + return None + + +def get_cache_perf_stats(func_name: str) -> str: + """ + Get the cache performance statistics for a given function. + + :param func_name: The name of the function whose cache performance + stats are to be retrieved. + :return: A string with the cache performance statistics. E.g., + `slow_square: hits=2 misses=0 tot=2 hit_rate=1.00`. + """ + perf = get_cache_perf(func_name) + if perf is None: + _LOG.warning("No cache performance stats for '%s'", func_name) + return "" + hits = perf["hits"] + misses = perf["misses"] + tot = perf["tot"] + hit_rate = hits / tot if tot > 0 else 0 + txt = ( + f"{func_name}: hits={hits} misses={misses} tot={tot}" + f" hit_rate={hit_rate:.2f}" + ) + return txt + + +# ############################################################################# +# Disk cache. +# ############################################################################# + +# Functions to save and retrieve cache from disk. +# ``` +# { +# "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"10 + 15\", \"gpt-5-nano\"], \"kwargs\": {}}": [ +# "25", +# 3.195e-05 +# ], +# "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"2 + 3\", \"gpt-5-nano\"], \"kwargs\": {}}": [ +# "5", +# 3.195e-05 +# ] +# } +# ``` + + +def _get_cache_file_name(func_name: str) -> str: + """ + Get the cache file name for a given function. + + The function returns the full cache file path including the local + directory, configured globally or per-function. + + :param func_name: the name of the function + :return: the cache file name with appropriate extension + """ + _LOG.trace("func_name='%s'", func_name) + hdbg.dassert_isinstance(func_name, str) + # Check for per-function cache dir, otherwise use global. + func_cache_dir = get_cache_property(func_name, "cache_dir") + if func_cache_dir: + cache_dir = func_cache_dir + else: + cache_dir = get_cache_dir() + # Check for per-function cache file prefix, otherwise use global. + func_cache_prefix = get_cache_property(func_name, "cache_prefix") + if func_cache_prefix: + prefix = func_cache_prefix + else: + prefix = get_cache_file_prefix() + file_name = os.path.join(cache_dir, f"{prefix}.{func_name}") + cache_type = get_cache_property(func_name, "type") + _LOG.trace(hprint.to_str("cache_type")) + if cache_type == "pickle": + file_name += ".pkl" + elif cache_type == "json": + file_name += ".json" + elif cache_type is None: + # Try to infer cache type from existing files. + if os.path.exists(file_name + ".pkl"): + file_name += ".pkl" + elif os.path.exists(file_name + ".json"): + file_name += ".json" + else: + # Default to json if no file exists. + file_name += ".json" + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + return file_name + + +def _save_cache_dict_to_disk( + func_name: str, func_cache_data: _FunctionCacheType +) -> None: + """ + Save a cache dictionary to the disk cache. + + :param func_name: The name of the function. + :param func_cache_data: The function cache data to save. + """ + # Get the filename for the disk cache. + file_name = _get_cache_file_name(func_name) + cache_type = get_cache_property(func_name, "type") + _LOG.trace(hprint.to_str("file_name cache_type")) + _save_func_cache_data_to_file(file_name, cache_type, func_cache_data) + + +def _load_func_cache_data_from_file( + file_name: str, cache_type: Optional[str] +) -> _FunctionCacheType: + """ + Load the function cache data from a file. + + :param file_name: the name of the file + :param cache_type: the type of the cache + :return: the function cache data + """ + # Infer cache type from file extension if not set. + if cache_type is None: + cache_type = _infer_cache_type_from_path(file_name) + # Load data. + _LOG.trace("Loading from '%s'", file_name) + hdbg.dassert_file_exists(file_name) + if cache_type == "pickle": + with open(file_name, "rb") as file: + func_cache_data = pickle.load(file) + elif cache_type == "json": + with open(file_name, "r", encoding="utf-8") as file: + func_cache_data = json.load(file) + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + return func_cache_data + + +# TODO(gp): Maybe private? +def get_disk_cache(func_name: str) -> _FunctionCacheType: + """ + Retrieve the disk cache for a given function. + + :param func_name: the name of the function + :return: cache data, if it exists + """ + file_name = _get_cache_file_name(func_name) + # Return empty cache if the disk cache does not exist. + if not os.path.exists(file_name): + _LOG.trace("No cache file on disk") + return {} + # Load data from existing file. + cache_type = get_cache_property(func_name, "type") + _LOG.trace(hprint.to_str("cache_type")) + func_cache_data = _load_func_cache_data_from_file(file_name, cache_type) + return func_cache_data + + +# ############################################################################# +# S3 cache. +# ############################################################################# + +# Functions to save and retrieve cache from S3. + + +def _build_s3_cache_path_for_type(func_name: str, cache_type: str) -> str: + """ + Build S3 cache path for a specific cache type. + + :param func_name: the name of the function + :param cache_type: the cache type ("json" or "pickle") + :return: the S3 path with appropriate extension + """ + # Check for per-function S3 bucket, otherwise use global. + bucket = get_cache_property(func_name, "s3_bucket") + if bucket: + # Ensure s3:// prefix. + if not bucket.startswith("s3://"): + bucket = f"s3://{bucket}" + else: + bucket = get_s3_bucket() + if bucket is None: + raise ValueError("S3 bucket not configured") + # Check for per-function S3 prefix, otherwise use global. + s3_prefix = get_cache_property(func_name, "s3_prefix") + if not s3_prefix: + s3_prefix = get_s3_prefix() + # Build cache file name with explicit type. + func_cache_prefix = get_cache_property(func_name, "cache_prefix") + if func_cache_prefix: + prefix = func_cache_prefix + else: + prefix = get_cache_file_prefix() + # Build filename with appropriate extension. + if cache_type == "pickle": + base_name = f"{prefix}.{func_name}.pkl" + elif cache_type == "json": + base_name = f"{prefix}.{func_name}.json" + else: + raise ValueError(f"Invalid cache type '{cache_type}'") + # Construct S3 path. + if s3_prefix: + s3_path = f"{bucket}/{s3_prefix}/{base_name}" + else: + s3_path = f"{bucket}/{base_name}" + return s3_path + + +def _get_s3_cache_path(func_name: str) -> str: + """ + Get the full S3 path for a cache file. + + :param func_name: the name of the function + :return: the S3 path (e.g., "s3://bucket/prefix/cache_file.json") + """ + # Check for per-function S3 bucket, otherwise use global. + bucket = get_cache_property(func_name, "s3_bucket") + if bucket: + # Ensure s3:// prefix. + if not bucket.startswith("s3://"): + bucket = f"s3://{bucket}" + else: + bucket = get_s3_bucket() + if bucket is None: + raise ValueError("S3 bucket not configured") + # Check for per-function S3 prefix, otherwise use global. + s3_prefix = get_cache_property(func_name, "s3_prefix") + if not s3_prefix: + s3_prefix = get_s3_prefix() + base_name = os.path.basename(_get_cache_file_name(func_name)) + if s3_prefix: + s3_path = f"{bucket}/{s3_prefix}/{base_name}" + else: + s3_path = f"{bucket}/{base_name}" + return s3_path + + +def _extract_func_name_from_cache_file(cache_file_name: str) -> Optional[str]: + """ + Extract function name from cache file name. + + Cache file names follow the format: .. + + :param cache_file_name: the cache file name (e.g., + "cache.my_func.json") + :return: the function name, or None if pattern does not match + """ + pattern = r"^(.+)\.([^\.]+)\.(?:json|pkl)$" + match = re.match(pattern, cache_file_name) + if match: + return match.group(2) + return None + + +def _list_s3_cached_func_names( + bucket: str, + prefix: Optional[str], + aws_profile: str, +) -> List[str]: + """ + List names of functions cached in S3 bucket. + + :param bucket: S3 bucket path (e.g., "s3://my-bucket") + :param prefix: S3 prefix path (e.g., "cache/shared") + :param aws_profile: AWS profile name + :return: names of functions cached in S3 bucket + """ + # Build S3 directory path. + if prefix: + s3_dir = f"{bucket}/{prefix}" + else: + s3_dir = bucket + # List files in S3 directory. + try: + s3_files = hs3.listdir( + s3_dir, + pattern="*", + only_files=True, + use_relative_paths=False, + aws_profile=aws_profile, + ) + except Exception as e: + _LOG.warning("Failed to list S3 directory '%s': %s", s3_dir, e) + return [] + # Collect all valid cache file prefixes. + valid_prefixes = _get_valid_cache_prefixes() + # Extract function names from S3 file names. + func_names = _extract_func_names_from_cache_files(s3_files, valid_prefixes) + out = sorted(func_names) + return out + + +def _check_s3_configured(func_name: Optional[str] = None) -> bool: + """ + Check if S3 is properly configured. + + :param func_name: the name of the function to check per-function S3 + settings + :return: True if S3 is configured, False otherwise + """ + # Check if per-function S3 bucket is defined. + if func_name: + func_s3_bucket = get_cache_property(func_name, "s3_bucket") + if func_s3_bucket: + return True + # Check if global bucket is defined. + bucket = get_s3_bucket() + if bucket is None: + _LOG.warning("S3 bucket not configured - use set_s3_bucket()") + return False + return True + + +def _upload_cache_to_s3(func_name: str) -> None: + """ + Upload a cache file to S3. + + :param func_name: the name of the function + """ + if not _check_s3_configured(func_name): + return + # Get local file. + local_file = _get_cache_file_name(func_name) + if not os.path.exists(local_file): + _LOG.debug("No local cache file to upload for '%s'", func_name) + return + # Get S3 path. + s3_path = _get_s3_cache_path(func_name) + # Check for per-function AWS profile, otherwise use global. + func_aws_profile = get_cache_property(func_name, "aws_profile") + if func_aws_profile: + aws_profile = func_aws_profile + else: + aws_profile = get_aws_profile() + _LOG.info("Uploading cache to %s", s3_path) + # Read local file and write to S3. + cache_type = get_cache_property(func_name, "type") + # Infer cache type from file extension if not set. + if cache_type is None: + cache_type = _infer_cache_type_from_path(local_file) + if cache_type == "pickle": + # Read pickle files as bytes and write. + with open(local_file, "rb") as f: + data = f.read() + s3fs_ = hs3.get_s3fs(aws_profile) + with s3fs_.open(s3_path, "wb") as f: + f.write(data) + else: + # Read JSON files as string and write. + data = hio.from_file(local_file) + hs3.to_file(data, s3_path, aws_profile=aws_profile) + + +def _download_cache_from_s3(func_name: str) -> bool: + """ + Download a cache file from S3. + + The function downloads the cache file from S3 to the local cache + directory, configured globally or per-function. + + :param func_name: the name of the function + :return: True if download is successful, False otherwise + """ + if not _check_s3_configured(func_name): + return False + # Check for per-function AWS profile, otherwise use global. + func_aws_profile = get_cache_property(func_name, "aws_profile") + if func_aws_profile: + aws_profile = func_aws_profile + else: + aws_profile = get_aws_profile() + s3fs_ = hs3.get_s3fs(aws_profile) + # Check cache type to determine file extension. + cache_type = get_cache_property(func_name, "type") + # If type is unknown, try both extensions in S3. + if cache_type is None: + # Try both .json and .pkl extensions. + for ext_type in ["json", "pickle"]: + # Build S3 path for this type. + s3_path_candidate = _build_s3_cache_path_for_type(func_name, ext_type) + if s3fs_.exists(s3_path_candidate): + # Set type property and use this path. + cache_type = ext_type + s3_path = s3_path_candidate + set_cache_property(func_name, "type", cache_type) + _LOG.debug("Found S3 cache with type=%s", ext_type) + break + else: + # Neither extension found in S3. + _LOG.debug("No S3 cache found for '%s'", func_name) + return False + else: + # Type is known, get paths normally. + s3_path = _get_s3_cache_path(func_name) + if not s3fs_.exists(s3_path): + _LOG.debug("No S3 cache found for '%s'", func_name) + return False + # Get local file path. + local_file = _get_cache_file_name(func_name) + _LOG.info("Downloading cache from %s", s3_path) + # Download from S3. + cache_type = get_cache_property(func_name, "type") + # Infer cache type from file extension if not set. + if cache_type is None: + cache_type = _infer_cache_type_from_path(s3_path) + hio.create_enclosing_dir(local_file, incremental=True) + if cache_type == "pickle": + # Read pickle files as bytes and write. + with s3fs_.open(s3_path, "rb") as f: + data = f.read() + with open(local_file, "wb") as f: + f.write(data) + else: + # Read JSON files as string and write. + data = hs3.from_file(s3_path, aws_profile=aws_profile) + hio.to_file(local_file, data) + return True + + +def push_cache_to_s3(func_name: str = "") -> None: + """ + Push local cache to S3 for a given function. + + :param func_name: the name of the function. If empty, push all + caches + """ + # Flush memory cache to disk. + flush_cache_to_disk(func_name) + funcs_to_push = [func_name] if func_name else get_cached_func_names("disk") + for func_name_tmp in funcs_to_push: + _LOG.info("Pushing cache to S3 for '%s'", func_name_tmp) + # Upload to S3. + _upload_cache_to_s3(func_name_tmp) + + +def pull_cache_from_s3(func_name: str = "") -> None: + """ + Pull cache from S3 to local storage for a given function. + + If no function name is provided, pulls all functions cached on S3 and + specified in _CACHE_PROPERTY and/or found in the global S3 bucket. + + Functions cached in a custom S3 bucket using another machine cannot be + pulled without sharing the _CACHE_PROPERTY file. + - Without it, the pull only retrieves cache files from the global bucket + - For more info, see `docs/tools/helpers/all.hcache_simple.explanation.md` + + :param func_name: the name of the function. If empty, pull all + discoverable caches + """ + if func_name != "": + _LOG.info("Pulling cache from S3 for '%s'", func_name) + # Download from S3. + success = _download_cache_from_s3(func_name) + if success: + # Load into memory cache. + force_cache_from_disk(func_name) + else: + _LOG.warning("Failed to pull cache from S3 for '%s'", func_name) + return + # Discover all cached functions and pull each one. + all_funcs = get_cached_func_names("s3") + for func_name_tmp in all_funcs: + pull_cache_from_s3(func_name_tmp) + _LOG.info("Pulled %d functions from S3", len(all_funcs)) + + +def sync_cache_with_s3(func_name: str = "") -> None: + """ + Sync cache between local and S3 (bidirectional merge). + + Downloads S3 cache, merges with local, and uploads result to S3. + + If no function name is provided, syncs all discoverable functions. + + :param func_name: the name of the function. If empty, sync all + caches + """ + if func_name == "": + # Discover all cached functions and sync each one. + all_funcs = get_cached_func_names("all") + for func_name_tmp in all_funcs: + sync_cache_with_s3(func_name_tmp) + _LOG.info("Synced %d functions with S3", len(all_funcs)) + return + _LOG.info("Syncing cache with S3 for '%s'", func_name) + # Get current local cache (disk + memory, memory takes precedence). + local_cache = get_disk_cache(func_name).copy() + local_cache.update(get_mem_cache(func_name).copy()) + # Download cache from S3. + success = _download_cache_from_s3(func_name) + if success: + # Load S3 cache. + s3_cache = get_disk_cache(func_name) + # Merge; if available, local takes precedence over what was downloaded + # from S3. + s3_cache.update(local_cache) + # Only save, upload, and store if merged cache is non-empty. + # Do not create empty cache files or entries. + if len(s3_cache) > 0: + # Save merged cache. + _save_cache_dict_to_disk(func_name, s3_cache) + # Upload back to S3. + _upload_cache_to_s3(func_name) + # Update memory cache. + global _CACHE + _CACHE[func_name] = s3_cache + else: + # Upload local cache to S3. + push_cache_to_s3(func_name) + + +# ############################################################################# +# Stats. +# ############################################################################# + + +def cache_stats_to_str( + func_name: Optional[str] = "", +) -> Optional["pd.DataFrame"]: # noqa: F821 + """ + Print the cache stats. + + If `func_name` is empty or None, returns stats for all functions with local cache + (mem + disk). + + E.g., + ``` + find_email: + memory: - + disk: 1044 + + verify_email: + memory: - + disk: 2322 + ``` + """ + # We want to limit the dependency from pandas in the cache. + import pandas as pd + + # Handle None as empty string. + if func_name is None: + func_name = "" + if func_name == "": + result = [] + for func_name_tmp in get_cached_func_names("local"): + result_tmp = cache_stats_to_str(func_name_tmp) + result.append(result_tmp) + if result: + result = pd.concat(result) + else: + result = None + return result + result = {} + # Memory cache. + if func_name in _CACHE: + result["memory"] = len(_CACHE[func_name]) + else: + result["memory"] = "-" + # Disk cache. + file_name = _get_cache_file_name(func_name) + if os.path.exists(file_name): + disk_cache = get_disk_cache(func_name) + result["disk"] = len(disk_cache) + else: + result["disk"] = "-" + result = pd.Series(result).to_frame().T + result.index = [func_name] + return result + + +def force_cache_from_disk(func_name: Optional[str] = "") -> None: + """ + Force loading the cache from disk and update the memory cache. + + :param func_name: the name of the function. If empty or None, apply + to all discoverable functions with cache on local disk + """ + # Handle None as empty string. + if func_name is None: + func_name = "" + if func_name == "": + _LOG.info("Before:\n%s", cache_stats_to_str()) + for func_name_tmp in get_cached_func_names("disk"): + force_cache_from_disk(func_name_tmp) + _LOG.info("After:\n%s", cache_stats_to_str()) + return + _LOG.trace("func_name='%s'", func_name) + # Get disk cache. + disk_cache = get_disk_cache(func_name) + _LOG.trace("disk_cache=%s", len(disk_cache)) + # Update the memory cache only if non-empty. + # Do not store empty dicts to avoid phantom cached functions. + if len(disk_cache) > 0: + global _CACHE + _CACHE[func_name] = disk_cache + + +def get_mem_cache(func_name: str) -> _FunctionCacheType: + """ + Retrieve the memory cache for a given function. + + :param func_name: the name of the function + :return: memory cache data + """ + mem_cache = _CACHE.get(func_name, {}) + return mem_cache + + +def flush_cache_to_disk(func_name: Optional[str] = "") -> None: + """ + Flush the memory cache to disk and update the memory cache. + + This merges memory cache with disk cache (memory takes precedence) + and saves to disk, then updates memory with the merged result. + + :param func_name: the name of the function. If empty or None, apply + to all functions with memory cache + """ + # Handle None as empty string. + if func_name is None: + func_name = "" + if func_name == "": + _LOG.info("Before:\n%s", cache_stats_to_str()) + for func_name_tmp in get_cached_func_names("mem"): + flush_cache_to_disk(func_name_tmp) + _LOG.info("After:\n%s", cache_stats_to_str()) + return + _LOG.trace("func_name='%s'", func_name) + # Get memory cache. + mem_cache = get_mem_cache(func_name) + _LOG.trace("mem_cache=%s", len(mem_cache)) + # Get disk cache. + disk_cache = get_disk_cache(func_name) + _LOG.trace("disk_cache=%s", len(disk_cache)) + # Merge disk cache with memory cache. + disk_cache.update(mem_cache) + # Save merged cache to disk only if non-empty. + # Do not create empty cache files. + if len(disk_cache) > 0: + _save_cache_dict_to_disk(func_name, disk_cache) + # Update the memory cache. + global _CACHE + _CACHE[func_name] = disk_cache + + +def get_cache(func_name: str) -> _FunctionCacheType: + """ + Retrieve the cache for a given function name. + + This function implements a three-tier cache lookup: + 1. Memory cache (fastest) + 2. Disk cache (persistent) + 3. S3 cache (shared, if configured) + + If S3 is configured and cache is not in memory/disk, attempts to pull + from S3 automatically (once per function per session). + + :param func_name: the name of the function whose cache is to be + retrieved + :return: cache data + """ + global _CACHE + global _S3_AUTO_PULL_ATTEMPTED + if func_name in _CACHE: + _LOG.trace("Loading mem cache for '%s'", func_name) + cache = get_mem_cache(func_name) + # Return cache from memory. + if cache: + return cache + # Try loading cache from local disk. + _LOG.trace("Loading disk cache for '%s'", func_name) + func_cache_data = get_disk_cache(func_name) + if func_cache_data: + _CACHE[func_name] = func_cache_data + return func_cache_data + # Try S3 auto-pull if configured. + if func_name not in _S3_AUTO_PULL_ATTEMPTED: + _S3_AUTO_PULL_ATTEMPTED.add(func_name) + if _check_s3_configured(func_name): + _LOG.trace( + "Cache not in memory/disk for '%s', attempting S3 pull", + func_name, + ) + success = _download_cache_from_s3(func_name) + if success: + _LOG.trace("S3 pull succeeded for '%s'", func_name) + # Reload from disk after S3 pull. + func_cache_data = get_disk_cache(func_name) + # Store in memory only if non-empty. + if len(func_cache_data) > 0: + _CACHE[func_name] = func_cache_data + return func_cache_data + # Return empty dict without storing it in _CACHE. + # Only store when we have actual cached data. + empty_cache: _FunctionCacheType = {} + return empty_cache + + +# ############################################################################# +# Reset cache. +# ############################################################################# + +# Functions to reset cache (both memory and disk). + + +def reset_mem_cache(func_name: Optional[str] = "") -> None: + """ + Reset the memory cache for a given function. + + :param func_name: The name of the function. If empty or None, reset + all memory caches (for functions currently in memory). + """ + _LOG.trace(hprint.func_signature_to_str()) + # Handle None as empty string. + if func_name is None: + func_name = "" + hdbg.dassert_isinstance(func_name, str) + if func_name == "": + _LOG.trace("Before resetting memory cache:\n%s", cache_stats_to_str()) + for func_name_tmp in get_cached_func_names("mem"): + reset_mem_cache(func_name=func_name_tmp) + _LOG.trace("After:\n%s", cache_stats_to_str()) + return + # Delete if present. + if func_name in _CACHE: + del _CACHE[func_name] + + +def reset_disk_cache( + func_name: Optional[str] = "", interactive: bool = True +) -> None: + """ + Reset the disk cache for a given function name. + + If `func_name` is empty or None, reset all discoverable disk cache files: + - All files in global cache directory matching global prefix + - All files for functions with custom cache_dir/cache_prefix tracked in + _CACHE_PROPERTY + + Note: This cannot discover orphaned cache files in custom directories + for functions not tracked in _CACHE_PROPERTY. + + :param func_name: The name of the function whose disk cache is to + be reset. If empty or None, reset all discoverable disk cache files. + :param interactive: If True, prompt the user for confirmation before + resetting the disk cache. + """ + _LOG.trace(hprint.func_signature_to_str()) + # Handle None as empty string. + if func_name is None: + func_name = "" + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_isinstance(interactive, bool) + if interactive: + hsystem.query_yes_no( + f"Are you sure you want to reset the disk cache for func_name={func_name}?" + ) + if func_name == "": + _LOG.trace("Before resetting disk cache:\n%s", cache_stats_to_str()) + _LOG.warning("Resetting disk cache") + # Reset files in global cache directory. + prefix = get_cache_file_prefix() + cache_files = glob.glob(os.path.join(get_cache_dir(), f"{prefix}.*")) + for file_name in cache_files: + if os.path.isfile(file_name): + os.remove(file_name) + # Reset files in per-function cache directories. + cache_property = _CACHE_PROPERTY + for func_name_tmp in cache_property: + func_props = cache_property[func_name_tmp] + # Check if function has per-function cache dir or prefix. + if "cache_dir" in func_props or "cache_prefix" in func_props: + # Get cache file for this function. + func_cache_file = _get_cache_file_name(func_name_tmp) + if os.path.exists(func_cache_file): + _LOG.debug( + "Removing per-function cache file '%s'", func_cache_file + ) + os.remove(func_cache_file) + _LOG.trace("After:\n%s", cache_stats_to_str()) + return + # + file_name = _get_cache_file_name(func_name) + if os.path.exists(file_name): + _LOG.warning("Removing cache file '%s'", file_name) + os.remove(file_name) + + +def reset_cache(func_name: Optional[str] = "", interactive: bool = True) -> None: + """ + Reset both memory and disk cache for a given function. + + If `func_name` is empty or None, reset all discoverable caches: + - All memory caches (for functions currently in memory) + - All disk cache files in global cache directory matching global prefix + - All disk cache files for functions with custom cache_dir/cache_prefix + tracked in _CACHE_PROPERTY + + Note: This cannot discover orphaned cache files in custom directories + for functions not tracked in _CACHE_PROPERTY. + + :param func_name: The name of the function. If empty or None, reset all + discoverable caches. + :param interactive: If True, prompt the user for confirmation before + resetting the disk cache. + """ + _LOG.trace(hprint.func_signature_to_str()) + # Handle None as empty string. + if func_name is None: + func_name = "" + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_isinstance(interactive, bool) + reset_mem_cache(func_name=func_name) + reset_disk_cache(func_name=func_name, interactive=interactive) + + +# ############################################################################# +# Mock / unit test cache. +# ############################################################################# + + +def _get_cache_key(args: Any, kwargs: Any) -> str: + cache_key = json.dumps( + {"args": args, "kwargs": kwargs}, + sort_keys=True, + default=str, + ) + _LOG.trace("cache_key=%s", cache_key) + return cache_key + + +def mock_cache(func_name: str, cache_key: str, value: Any) -> None: + """ + Mock the function cache for a given function and cache key. + + :param func_name: The name of the function. + :param cache_key: The cache key. + :param value: The value to store in the cache. + """ + # We should not use the main cache directory for mocking. + hdbg.dassert_ne( + get_cache_dir(), + get_main_cache_dir(), + msg="Do not use the main cache directory for mocking", + ) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_ne(func_name, "", "Function name is empty") + hdbg.dassert_isinstance(cache_key, str) + hdbg.dassert_ne(cache_key, "", "Cache key is empty") + # Get the function cache. + func_cache_data = get_cache(func_name) + # Update the function cache. + func_cache_data[cache_key] = value + # Ensure the cache dict is stored in memory. + global _CACHE + _CACHE[func_name] = func_cache_data + + +def mock_cache_from_args_kwargs( + func_name: str, args: Any, kwargs: Any, value: Any +) -> None: + """ + Mock the function cache for a given function and args/kwargs. + + E.g., when testing a cached expensive function (e.g., an LLM call or + downloading data) we can mock the cache to return a fixed value, + instead of calling the function. + + :param func_name: The name of the function. + :param args: The arguments for the function. + :param kwargs: The keyword arguments for the function. + :param value: The value to store in the cache. + """ + hdbg.dassert_isinstance(args, tuple, "args is not a tuple: %s", args) + hdbg.dassert_isinstance(kwargs, dict, "kwargs is not a dict: %s", kwargs) + # Get the cache key. + cache_key = _get_cache_key(args, kwargs) + # Mock the cache. + mock_cache(func_name, cache_key, value) + + +def mock_cache_from_disk( + func_name: str, func_cache_data: _FunctionCacheType +) -> None: + """ + Mock the function cache from disk data. + + :param func_name: The name of the function. + :param cache_data: The cache data to mock. + """ + hdbg.dassert_isinstance(func_name, str) + sanity_check_function_cache(func_cache_data, assert_on_empty=True) + for cache_key, cached_value in func_cache_data.items(): + mock_cache(func_name, cache_key, cached_value) + + +# ############################################################################# +# Decorator +# ############################################################################# + +# - Decorated functions accept special keyword arguments to control caching +# behavior: +# - `force_refresh=True`: Bypass cache and recompute the result +# - `abort_on_cache_miss=True`: Raise an exception if cache miss occurs +# - `report_on_cache_miss=True`: Return "_cache_miss_" instead of computing on +# cache miss +# - `cache_mode`: Alternative way to control caching with predefined modes: +# - `"REFRESH_CACHE"`: Force cache refresh (same as `force_refresh=True`) +# - `"HIT_CACHE_OR_ABORT"`: Abort on cache miss (same as +# `abort_on_cache_miss=True`) +# - `"DISABLE_CACHE"`: Completely disable caching for this call + + +# TODO(gp): Not sure that cache_mode is worth having the duplication. +def simple_cache( + *, + cache_type: str = "json", + write_through: bool = True, + exclude_keys: Optional[List[str]] = None, + cache_dir: Optional[str] = None, + cache_prefix: Optional[str] = None, + s3_bucket: Optional[str] = None, + s3_prefix: Optional[str] = None, + aws_profile: str = "ck", + auto_sync_s3: bool = False, +) -> Callable[..., Any]: + """ + Decorate a function to cache its results. + + The cache is stored in memory and on disk, with optional S3 support. + + All decorator parameters are stored as properties and persisted to disk. + This allows runtime modification via `set_cache_property(func_name, + property_name, value)`. + + Note: The cache type is only set during first decoration to prevent + accidental cache corruption (e.g., changing from json to pickle would + orphan existing cache files). To change cache type for an existing + function, first clear the property via reset_cache_property() or + manually set it via set_cache_property(). + + :param cache_type: type of cache to use ('json' or 'pickle') + :param write_through: if True, the cache is written to disk after + each access + :param exclude_keys: keys to exclude from the cache key + :param cache_dir: directory for this function's cache files. If + None, uses global cache directory + :param cache_prefix: prefix for this function's cache files. If + None, uses global cache prefix + :param s3_bucket: S3 bucket for this function's cache (e.g., + "s3://my-bucket"). If specified, enables S3 cache syncing for + this function + :param s3_prefix: S3 prefix path for this function's cache + :param aws_profile: AWS profile for S3 access + :param auto_sync_s3: if True, automatically sync to S3 after each + cache update + :return: a decorator that can be applied to a function + """ + + def decorator(func: Callable[..., Any]) -> Callable[..., Any]: + """ + Decorate a function to cache its results. + """ + hdbg.dassert_in(cache_type, ("json", "pickle")) + func_name = getattr(func, "__name__", "unknown_function") + if func_name.endswith("_intrinsic"): + func_name = func_name[: -len("_intrinsic")] + # Store function-specific properties. + # Note: cache type is only set if not already set to prevent accidental + # cache corruption (e.g., changing from json to pickle would orphan + # existing cache files). To change cache type, use reset_cache_property() + # first or manually set it via set_cache_property(). + existing_type = get_cache_property(func_name, "type") + if not existing_type: + set_cache_property(func_name, "type", cache_type) + # Store caching behavior settings. + set_cache_property(func_name, "write_through", write_through) + # Store exclude_keys as empty list if None for consistency. + exclude_keys_list: List[str] = ( + exclude_keys if exclude_keys is not None else [] + ) + set_cache_property(func_name, "exclude_keys", exclude_keys_list) + # Store per-function cache settings. + if cache_dir is not None: + set_cache_property(func_name, "cache_dir", cache_dir) + if cache_prefix is not None: + set_cache_property(func_name, "cache_prefix", cache_prefix) + # Store per-function S3 settings. + if s3_bucket is not None: + set_cache_property(func_name, "s3_bucket", s3_bucket) + if s3_prefix is not None: + set_cache_property(func_name, "s3_prefix", s3_prefix) + if aws_profile is not None: + set_cache_property(func_name, "aws_profile", aws_profile) + set_cache_property(func_name, "auto_sync_s3", auto_sync_s3) + + @functools.wraps(func) + def wrapper( + *args: Any, + force_refresh: bool = False, + abort_on_cache_miss: bool = False, + report_on_cache_miss: bool = False, + **kwargs: Any, + ) -> Any: + """ + Cache the results of the decorated function. + + :param args: Positional arguments for the function. + :param force_refresh: If True, the cache is refreshed + regardless of whether the key exists in the cache. + :param abort_on_cache_miss: If True, an exception is raised + if the key is not found in the cache. + :param report_on_cache_miss: If True, a message is logged if + the key is not found in the cache, and the function + returns "_cache_miss_" instead of accessing the real + value. + :param kwargs: Keyword arguments for the function. + :return: The cached value or the result of the function. + """ + # Get the function name. + func_name = getattr(func, "__name__", "unknown_function") + if func_name.endswith("_intrinsic"): + func_name = func_name[: -len("_intrinsic")] + # Get the cache. + cache = get_cache(func_name) + # Remove keys that should not be part of the cache key. + # Read from properties first, fall back to closure. + exclude_keys_prop = get_cache_property(func_name, "exclude_keys") + exclude_keys_to_use = ( + exclude_keys_prop + if exclude_keys_prop is not None + else exclude_keys_list + ) + # Also exclude cache_mode since it's a control parameter. + excluded_keys = set(exclude_keys_to_use) | {"cache_mode"} + kwargs_for_cache_key = { + k: v for k, v in kwargs.items() if k not in excluded_keys + } + # Prepare kwargs for the actual function call. + # Keep cache_mode since the wrapped function may need it in its signature. + kwargs_for_func = kwargs.copy() + # Resolve effective cache_mode: explicit kwarg wins, otherwise + # fall back to the process-wide global (set via + # `set_global_cache_mode`). Do NOT inject into kwargs_for_func, as + # the wrapped function may not accept a `cache_mode` parameter. + if "cache_mode" in kwargs: + cache_mode = kwargs.get("cache_mode") + else: + cache_mode = _GLOBAL_CACHE_MODE + # `cache_mode` is a special keyword argument to control caching + # behavior. + if cache_mode is not None: + _LOG.trace("cache_mode=%s", cache_mode) + if cache_mode == "REFRESH_CACHE": + # Force to refresh the cache. + _LOG.trace("Forcing cache refresh") + force_refresh = True + if cache_mode == "HIT_CACHE_OR_ABORT": + # Abort if the cache is not hit. + _LOG.trace("Abort on cache miss") + abort_on_cache_miss = True + if cache_mode == "DISABLE_CACHE": + # Disable the cache. + _LOG.trace("Disabling cache") + if _CACHE_DEBUG: + _LOG.warning( + "cache[%s]: COMPUTE (cache disabled by cache_mode=DISABLE_CACHE)", + func_name, + ) + value = func(*args, **kwargs_for_func) + return value + # Get the key. + cache_key = _get_cache_key(args, kwargs_for_cache_key) + # Update the performance stats. + cache_perf = get_cache_perf(func_name) + _LOG.trace("cache_perf is None=%s", cache_perf is None) + if cache_perf: + hdbg.dassert_in("tot", cache_perf) + cache_perf["tot"] += 1 + # Handle a forced refresh. + force_refresh = force_refresh or get_cache_property( + func_name, "force_refresh" + ) + _LOG.trace("force_refresh=%s", force_refresh) + if cache_key in cache and not force_refresh: + _LOG.trace("Cache hit for key='%s'", cache_key) + if _CACHE_DEBUG: + _LOG.warning("cache[%s]: HIT", func_name) + # Update the performance stats. + if cache_perf: + cache_perf["hits"] += 1 + # Retrieve the value from the cache. + value = cache[cache_key] + else: + _LOG.trace("Cache miss for key='%s'", cache_key) + # Update the performance stats. + if cache_perf: + cache_perf["misses"] += 1 + # Abort on cache miss. + abort_on_cache_miss = abort_on_cache_miss or get_cache_property( + func_name, "abort_on_cache_miss" + ) + _LOG.trace("abort_on_cache_miss=%s", abort_on_cache_miss) + if abort_on_cache_miss: + raise ValueError(f"Cache miss for key='{cache_key}'") + # Report on cache miss. + report_on_cache_miss = report_on_cache_miss or get_cache_property( + func_name, "report_on_cache_miss" + ) + _LOG.trace("report_on_cache_miss=%s", report_on_cache_miss) + if report_on_cache_miss: + _LOG.trace("Cache miss for key='%s'", cache_key) + return "_cache_miss_" + if _CACHE_DEBUG: + if force_refresh: + _LOG.warning( + "cache[%s]: RECOMPUTE (cache_mode=REFRESH_CACHE)", + func_name, + ) + else: + _LOG.warning("cache[%s]: COMPUTE (miss)", func_name) + # Access the intrinsic function. + value = func(*args, **kwargs_for_func) + # Update cache. + cache[cache_key] = value + # Ensure the cache dict is stored in memory. + global _CACHE + _CACHE[func_name] = cache + _LOG.trace( + "Updating cache with key='%s' value='%s'", cache_key, value + ) + # Check if write-through is enabled. + write_through_prop = get_cache_property( + func_name, "write_through" + ) + write_through_enabled = ( + write_through_prop + if write_through_prop is not None + else write_through + ) + if write_through_enabled: + _LOG.trace("Writing through to disk") + flush_cache_to_disk(func_name) + # Check if auto-sync to S3 is enabled. + auto_sync = get_cache_property(func_name, "auto_sync_s3") + if auto_sync: + _LOG.debug("Auto-syncing cache to S3 for '%s'", func_name) + _upload_cache_to_s3(func_name) + return value + + return wrapper + + return decorator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py new file mode 100644 index 000000000..e2f54a02c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py @@ -0,0 +1,135 @@ +""" +Import as: + +import helpers.hcfile as hcfile +""" + +import logging +import re +from typing import List, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hio as hio + +_LOG = logging.getLogger(__name__) + + +def parse_cfile(cfile: str) -> List[Tuple[str, str, str]]: + """ + Read and parse a cfile. + + :param cfile: path to the cfile + :return: list of tuples, each containing a line number and a transform, e.g., + [(file_name, line_number, transform), ...] + """ + # Read the cfile. + cfile_lines = hio.from_file(cfile) + cfile_lines = cfile_lines.split("\n") + # + ret = [] + # Parse the cfile. + for line in cfile_lines: + _LOG.debug("line=%s", line) + hdbg.dassert_isinstance(line, str) + # Parse the lines of the cfile, like + # ``` + # dev_scripts_helpers/llms/llm_prompts.py:106: in public function `test`:D404: ... + # dev_scripts_helpers/llms/llm_prompts.py:110: error: Need type annotation for ... + # dev_scripts_helpers/llms/llm_transform.py:63:33: F821 undefined name '_extract_bullet_points' [flake8] + # ``` + # extracting the file name, line number, and transform. + regex = r"^([^:]+):(\d+):(.*)$" + match = re.match(regex, line) + if match is None: + _LOG.debug("Failed to parse line '%s'", line) + continue + # Extract the file name, line number, and transform. + file_name = match.group(1) + line_number = match.group(2) + transform = match.group(3) + # Add values to the list. + ret.append((file_name, line_number, transform)) + return ret + + +# ############################################################################# + + +def inject_todos_from_cfile( + cfile_txt: str, todo_user: str, comment_prefix: str +) -> None: + """ + Inject the TODOs from a cfile in the corresponding files. + + Given a cfile with the following content: + the function will inject the TODO in the corresponding file and line + + :param cfile_txt: The content of the cfile. + :param todo_user: The user to use in the TODO. + :param comment_prefix: The prefix to use for the comment (e.g., "#") + """ + # For each file, store + # - the current file content + # - the offset (i.e., how many lines we inserted in the file so far, so + # we can inject the TODO at the correct line number) + # - the index of the last line modified to make sure the TODOs are for + # increasing line numbers. + file_content = {} + for todo_line in cfile_txt.split("\n"): + _LOG.debug("\n%s", hprint.frame(f"todo line='{todo_line}'")) + if todo_line.strip() == "": + continue + # dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py:101: The logic for extracting required status checks and pull request reviews is repeated. Consider creating a helper function to handle this extraction to reduce redundancy. + m = re.match(r"^\s*(\S+):(\d+):\s*(.*)$", todo_line) + if not m: + _LOG.warning("Can't parse line='%s': skipping", todo_line) + continue + file_name, todo_line_number, todo = m.groups() + todo_line_number = int(todo_line_number) + _LOG.debug(hprint.to_str("file_name todo_line_number todo")) + # Update the state if needed. + if file_name not in file_content: + _LOG.debug("Reading %s", file_name) + hdbg.dassert_path_exists(file_name) + txt = hio.from_file(file_name).split("\n") + offset = 0 + last_line_modified = 0 + file_content[file_name] = (txt, offset, last_line_modified) + # Extract the info for the file to process. + txt, offset, last_line_modified = file_content[file_name] + _LOG.debug(hprint.to_str("offset last_line_modified")) + hdbg.dassert_lt( + last_line_modified, + todo_line_number, + "The TODOs don't look like they are increasing line numbers: " + "TODO at line %d is before the last line modified %d", + todo_line_number, + last_line_modified, + ) + # We subtract 1 from the line number since TODOs count from 1, while + # Python arrays count from 0. + act_line_number = todo_line_number - 1 + offset + hdbg.dassert_lte(0, act_line_number) + hdbg.dassert_lt(act_line_number, len(txt)) + insert_line = txt[act_line_number] + _LOG.debug(hprint.to_str("act_line_number insert_line")) + # Extract how many spaces there are at place where the line to insert + # the TODO. + m = re.match(r"^(\s*)\S", insert_line) + hdbg.dassert(m, "Can't parse insert_line='%s'", insert_line) + spaces = len(m.group(1)) * " " # type: ignore[union-attr] + # Build the new line to insert. + new_line = spaces + f"{comment_prefix} TODO({todo_user}): {todo}" + _LOG.debug(hprint.to_str("new_line")) + # Insert the new line in txt at the correct position. + txt = txt[:act_line_number] + [new_line] + txt[act_line_number:] + # Update the state. + offset += 1 + file_content[file_name] = (txt, offset, todo_line_number) + # Write updated files back. + for file_name, (txt, offset, last_line_modified) in file_content.items(): + _ = last_line_modified + _LOG.info("Writing %d lines in %s", offset, file_name) + txt = "\n".join(txt) + hio.to_file(file_name, txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py new file mode 100644 index 000000000..675ba557d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py @@ -0,0 +1,549 @@ +""" +Import as: + +import helpers.hchatgpt as hchatgp +""" + +import logging +import math +import os +import sys +import time +from typing import Dict, List, Optional + +import helpers.hdbg as hdbg +import helpers.hio as hio + +# import helpers.henv as henv +# henv.install_module_if_not_present("openai") +import openai # noqa: E402 + +_LOG = logging.getLogger(__name__) + +# Setting API as env var in your terminal is the correct approach. +# NEVER upload any OpenAI API key to GitHub, OpenAI will revoke it. + +client = openai.OpenAI() + +# The OpenAI File ID cache will be saved as `prefix_to_root/gpt_id.json` +# Only files under the given root directory may be uploaded to OpenAI. +prefix_to_root = os.path.join(os.path.dirname(__file__), "..") + +# ############################################################################# +# Create/update/delete Assistant. +# ############################################################################# + + +def create_assistant( + assistant_name: str, + instructions: str, + *, + model: str = "gpt-3.5-turbo-1106", + use_retrieval: bool = True, + use_code_interpreter: bool = True, + use_function: Optional[Dict] = None, +) -> str: + """ + Create an OpenAI Assistant for your OpenAI Organization. All configs can + still be updated after creation. + + This method should only be used when a new Assistant is needed. + Otherwise, use the Assistant name to retrieve an existing Assistant. + + :param assistant_name: name of the Assistant to be created + :param instructions: instruction string that describes the expected + behavior of assistant + :param model: GPT model used by the assistant + :param use_retrieval: enable the retrieval tool from OpenAI + :param use_code_interpreter: enable the code interpreter tool from + OpenAI + :param use_function: enable the function tool from OpenAI (To be + implemented) + """ + # Create the assistant + tools = [] + if use_retrieval: + tools.append({"type": "retrieval"}) + if use_code_interpreter: + tools.append({"type": "code_interpreter"}) + if use_function: + tools.append(use_function) + if not model: + model = "gpt-3.5-turbo-1106" + assistant = client.beta.assistants.create( + instructions=instructions, + name=assistant_name, + model=model, + tools=tools, + ) + return assistant.id + + +def update_assistant_by_id( + assistant_id: str, + *, + instructions: str = "", + name: str = "", + tools: Optional[List[Dict[str, str]]] = None, + model: str = "", + file_ids: Optional[List[str]] = None, +) -> str: + """ + Update an existing OpenAI Assistant in our OpenAI Organization. + + :param assistant_id: Assistant to be updated + :param instructions: instruction string that describes the expected + behavior of assistant + :param name: change the name of assistant, no change when empty + :param tools: change the tools of assistant, no change when empty + :param model: change the model of assistant, no change when empty + :param file_ids: change the files linked to assistant, no change + when empty + """ + if tools is None: + tools = [] + if file_ids is None: + file_ids = [] + update_config = { + "instructions": instructions, + "name": name, + "tools": tools, + "model": model, + "file_ids": file_ids, + } + not_empty_params = {k: v for k, v in update_config.items() if v} + updated_assistant = client.beta.assistants.update( + assistant_id, **not_empty_params + ) + return updated_assistant.id + + +def delete_assistant_by_id(assistant_id: str) -> None: + """ + Delete an Assistant from our OpenAI Organization. + """ + client.beta.assistants.delete(assistant_id) + + +def get_all_assistants() -> List[openai.types.beta.assistant.Assistant]: + """ + Get all available Assistant objects in our OpenAI Organization. + """ + list_assistants_response = client.beta.assistants.list( + order="desc", + limit="100", + ) + assistants = list_assistants_response.data + return assistants + + +def get_all_assistant_names() -> List[str]: + """ + Get all available Assistant names in our OpenAI Organization. + """ + assistants = get_all_assistants() + return [assistant.name for assistant in assistants] + + +def get_assistant_id_by_name(assistant_name) -> str: + """ + Get the id of an Assistant by its name. + """ + assistant = None + assistants = get_all_assistants() + for cur_assistant in assistants: + if cur_assistant.name == assistant_name: + assistant = cur_assistant + break + hdbg.dassert_is_not( + assistant, None, f"Assistant '{assistant_name}' not found" + ) + assert assistant is not None + return assistant.id + + +# ############################################################################# +# Create directory structure storing gpt file ids +# ############################################################################# + + +def _path_to_dict(path: str) -> Dict: + """ + Generate a dictionary of all files under a given folder. + """ + for root, dirs, files in os.walk(path): + tree = {d: _path_to_dict(os.path.join(root, d)) for d in dirs} + tree.update({f: {"name": f} for f in files}) + return tree + return {} + + +# TODO(Henry): We use fileIO here to store the directory structure, which may +# not be thread-safe. Should change to use DAO if we have any. +def _dump_gpt_ids(dictionary: Dict) -> None: + """ + Dump a given OpenAI File ID dictionary into a cache file for furture use. + """ + file_path = os.path.join(prefix_to_root, "gpt_id.json") + hio.to_json(file_path, dictionary) + return + + +def _load_gpt_ids() -> Dict: + """ + Load the OpenAI File ID dictionary from the cache file. + """ + file_path = os.path.join(prefix_to_root, "gpt_id.json") + if os.path.exists(file_path) and os.path.isfile(file_path): + return hio.from_json(file_path) + else: + directory_dict = _path_to_dict(prefix_to_root) + _dump_gpt_ids(directory_dict) + return directory_dict + + +# ############################################################################# +# Upload file to OpenAI account +# ############################################################################# + + +def _upload_to_gpt_no_set_id(path_from_root: str) -> str: + """ + Upload a file to OpenAI. + + This method will NOT set File ID to cache. + """ + _LOG.info("Uploading file %s to chatgpt", path_from_root) + upload_file_response = client.files.create( + # Must use 'rb' regardless of file type. + file=open(os.path.join(prefix_to_root, path_from_root), "rb"), + purpose="assistants", + ) + gpt_id = upload_file_response.id + return gpt_id + + +def _get_gpt_id_file(dictionary: Dict, path_from_root: str) -> Dict[str, str]: + """ + Get the OpenAI File ID for a given file using a specific cache. + + If this file has not been uploaded to OpenAI, this method will + upload it and generate its OpenAI File ID. + """ + cur = dictionary + path_list = path_from_root.split("/") + for level in path_list: + cur = cur[level] + if "gpt_id" not in cur: + cur["gpt_id"] = _upload_to_gpt_no_set_id(path_from_root) + _dump_gpt_ids(dictionary) + return cur + + +def _set_gpt_id(path_from_root: str, gpt_id: str) -> None: + """ + Manually set the cached OpenAI File ID of a given file. + + This method should ONLY be called if a file manually uploaded to + OpenAI. It will NOT upload the given file to OpenAI. + """ + gpt_id_dict = _load_gpt_ids() + item = _get_gpt_id_file(gpt_id_dict, path_from_root) + item["gpt_id"] = gpt_id + _dump_gpt_ids(gpt_id_dict) + + +def _remove_gpt_id(path_from_root: str): + """ + Remove the cached ID of a given file. + + It does NOT fully remove a file from OpenAI. Use `remove_from_gpt` + to fully remove a file. + """ + gpt_id_dict = _load_gpt_ids() + item = _get_gpt_id_file(gpt_id_dict, path_from_root) + if "gpt_id" in item: + del item["gpt_id"] + _dump_gpt_ids(gpt_id_dict) + + +def get_gpt_id(path_from_root: str) -> str: + """ + Get the OpenAI File ID from cache for a given file. + + If this file has not been uploaded to OpenAI, this method will + upload it and generate its OpenAI File ID. + """ + gpt_id_dict = _load_gpt_ids() + return _get_gpt_id_file(gpt_id_dict, path_from_root)["gpt_id"] + + +def upload_to_gpt(path_from_root: str) -> str: + """ + Upload a file to OpenAI and set its File ID to cache. + """ + gpt_id = _upload_to_gpt_no_set_id(path_from_root) + _set_gpt_id(path_from_root, gpt_id) + return gpt_id + + +def remove_from_gpt(path_from_root: str) -> None: + """ + Fully remove a file from OpenAI. + + This method will first delete the file from OpenAI account, then + remove its OpenAI File ID from the cache. + """ + gpt_id = get_gpt_id(path_from_root) + client.files.delete(gpt_id) + _remove_gpt_id(path_from_root) + + +def get_gpt_file_from_id(gpt_id: str) -> openai.types.file_object.FileObject: + """ + Get a OpenAI File Object using its OpenAI File ID. + """ + return client.files.retrieve(gpt_id) + + +def get_gpt_file_from_path( + path_from_root: str, +) -> openai.types.file_object.FileObject: + """ + Get a OpenAI File Object using its file path. + """ + gpt_id = get_gpt_id(path_from_root) + return get_gpt_file_from_id(gpt_id) + + +# ############################################################################# +# Add/Remove files for an assistant +# ############################################################################# + +# Note that files for Assistant means files constantly used by this assistant +# (like guidelines). For one-time used files, add them to a message instead. +# One Assistant can have up to 20 files linked to it. + + +def set_assistant_files_by_name( + assistant_name: str, file_path_list: List[str] +) -> str: + """ + Use the given file list to overwrite the file list linked to an assistant. + """ + assistant_id = get_assistant_id_by_name(assistant_name) + file_ids = [get_gpt_id(path) for path in file_path_list] + return update_assistant_by_id(assistant_id, file_ids=file_ids) + + +def add_files_to_assistant_by_name( + assistant_name: str, file_path_list: List[str] +) -> str: + """ + Link all given files to an assistant. + + An Assistant can hold only 20 files, the oldest files will be + unlinked automatically. + """ + assistant_id = get_assistant_id_by_name(assistant_name) + assistant_files = client.beta.assistants.files.list( + assistant_id=assistant_id + ).data + existing_file_ids = [file.id for file in assistant_files] + new_file_ids = [get_gpt_id(path) for path in file_path_list] + file_ids = list(set(existing_file_ids + new_file_ids)) + file_ids = file_ids[-20:] + return update_assistant_by_id(assistant_id, file_ids=file_ids) + + +def delete_file_from_assistant_by_id(assistant_id: str, file_id: str) -> None: + """ + Unlink a file from an Assistant using Assistant id and file id. + + This method does NOT remove the file from OpenAI account. + """ + client.beta.assistants.files.delete( + assistant_id=assistant_id, file_id=file_id + ) + + +def delete_file_from_assistant_by_name( + assistant_name: str, file_path: str +) -> None: + """ + Unlink a file from an Assistant using Assistant name and file path. + + This method does NOT remove the file from OpenAI account. + """ + gpt_id = get_gpt_id(file_path) + assistant_id = get_assistant_id_by_name(assistant_name) + delete_file_from_assistant_by_id(assistant_id, gpt_id) + + +# ############################################################################# +# Create Thread and Message from user input +# ############################################################################# + + +def create_thread() -> str: + message_thread = client.beta.threads.create() + return message_thread.id + + +def create_message_on_thread( + thread_id: str, content: str, file_ids: List[str] +) -> str: + """ + Create a message on a thread, then link files to the message using file id. + + Files linked to a message can only be used by ChatGPT in the thread + that holds this message. + """ + if not content: + _LOG.error( + "Message content must not be empty. This will cause an OpenAI error." + ) + if file_ids: + message = client.beta.threads.messages.create( + thread_id=thread_id, + role="user", + content=content, + file_ids=file_ids, + ) + else: + message = client.beta.threads.messages.create( + thread_id=thread_id, + role="user", + content=content, + ) + return message.id + + +def create_message_on_thread_with_file_names( + thread_id: str, content: str, file_names: List[str] +) -> str: + """ + Create a message on a thread, then link files to the message using file + name. + + Files linked to a message can only be used by ChatGPT in the thread + that holds this message. + """ + if file_names: + file_ids = [get_gpt_id(file) for file in file_names] + else: + file_ids = [] + return create_message_on_thread(thread_id, content, file_ids) + + +# ############################################################################# +# Run thread on certain assistant +# ############################################################################# + + +def run_thread_on_assistant(assistant_id, thread_id, model: str = "") -> str: + """ + Run a thread on a given Assistant id. + + This is similar to sending a message to ChatGPT. + """ + if model: + run = client.beta.threads.runs.create( + thread_id=thread_id, assistant_id=assistant_id, model=model + ) + else: + run = client.beta.threads.runs.create( + thread_id=thread_id, assistant_id=assistant_id + ) + return run.id + + +def run_thread_on_assistant_by_name( + assistant_name: str, thread_id: str, model: str = "" +) -> str: + """ + Run a thread on a given Assistant name. + + This is similar to sending a message to ChatGPT. + """ + assistant_id = get_assistant_id_by_name(assistant_name) + if model: + return run_thread_on_assistant(assistant_id, thread_id, model) + else: + return run_thread_on_assistant(assistant_id, thread_id) + + +def wait_for_run_result(thread_id: str, run_id: str, timeout: int = 180) -> List: + """ + Wait for the thread to be processed. + + This is similar to waiting for ChatGPT's typing. + """ + finished = False + _LOG.info("Waiting for chatgpt response...") + for i in range(math.ceil(timeout / 5)): + _LOG.info("%s/%s seconds before timeout", i * 5, timeout) + time.sleep(5) + run = client.beta.threads.runs.retrieve( + thread_id=thread_id, run_id=run_id + ) + finished = run.status == "completed" + if finished: + break + if not finished: + raise TimeoutError("Failed to retrieve response from OpenAI.") + messages = client.beta.threads.messages.list(thread_id=thread_id).data + return messages + + +# ############################################################################# +# ChatGPT runner +# ############################################################################# + + +def e2e_assistant_runner( + assistant_name: str, + user_input: str = "", + *, + model: str = "", + input_file_names: Optional[List[str]] = None, + output_file_path: str = "", + vim_mode: bool = False, +) -> str: + """ + Send a message with files to an Assistant and wait for its reply. + + :param assistant_name: Assistant that should process this message + :param user_input: message to be sent to ChatGPT assistant + :param model: change the GPT model used by the assistant, no change + when empty this WILL update the configuration of the assistant + :param input_file_names: files to be used in this conversation + :param output_file_path: redirect ChatGPT's output to the given file + :param vim_mode: if True, take input from stdin and output to stdout + forcely + """ + if input_file_names is None: + input_file_names = [] + if not assistant_name: + _LOG.error("No Assistant name provided.") + return "" + if vim_mode: + user_input = "".join(sys.stdin.readlines()) + thread_id = create_thread() + create_message_on_thread_with_file_names( + thread_id, user_input, input_file_names + ) + if model: + run_id = run_thread_on_assistant_by_name( + assistant_name, thread_id, model + ) + else: + run_id = run_thread_on_assistant_by_name(assistant_name, thread_id) + messages = wait_for_run_result(thread_id, run_id) + output = messages[0].content[0].text.value + if vim_mode or not output_file_path: + sys.stdout.write(output) + if output_file_path: + with open(output_file_path, "w", encoding="utf-8") as fp: + fp.write(output) + return output diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py new file mode 100644 index 000000000..18ce63d7d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py @@ -0,0 +1,32 @@ +""" +Import as: + +import helpers.hchatgpt_instructions as hchainst +""" + +instructions = { + "MarkdownLinter": """ +You are a markdown linter. +If you are given a piece of text under markdown format, treat these text as the +content of the markdown content you need to lint. +If you are given a filename, you should find the file in your linked files, use +it as the markdown content you need to lint. +After get the markdown content, find and fix grammatical errors in that content +with the minimum amount of changes possible and preserve the formatting. +You don't need to add periods at the end of each sentence. +You should not add ```markdown ``` around the output content. +Your only output message should be the linted result of that file, no additional +explanations should be added in your output. + """, + "DocWriter": """ +You are a documentation writer. +If you are given several python code files, try to understand these files and +how they may work. +You should write a markdown document about these files for users that have not +read the codes to know the basic workflow of them, your can use examples to show +the user how they can easily use those codes. +For the format of markdown document, you can use files linked to you as +reference. You don't need to strictly follow the format, the goal is to make the +document easy to understand + """, +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py new file mode 100644 index 000000000..2fd175bf4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py @@ -0,0 +1,183 @@ +""" +Import as: + +import helpers.hcoverage as hcovera +""" + +import glob +import logging +import os +import pathlib +import site +import subprocess +import sysconfig + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def _detect_site_packages() -> pathlib.Path: + """ + Return the Path to the site-packages directory for the active interpreter. + + - Try sysconfig first + - Fall back to site.getsitepackages() or user-site. + """ + try: + purelib = sysconfig.get_path("purelib") + if purelib: + return pathlib.Path(purelib) + except (KeyError, IOError): + _LOG.debug( + "sysconfig.get_path('purelib') failed, falling back to site packages" + ) + try: + sp_dirs = site.getsitepackages() + except AttributeError: + sp_dirs = [] + for d in sp_dirs: + if "site-packages" in d: + return pathlib.Path(d) + return pathlib.Path(site.getusersitepackages()) + + +def inject(coveragerc: str = ".coveragerc") -> None: + """ + Install the coverage startup hook into this env site-packages. + """ + rc = pathlib.Path(coveragerc).resolve() + os.environ["COVERAGE_PROCESS_START"] = str(rc) + _LOG.debug("Set COVERAGE_PROCESS_START to %s", rc) + sp = _detect_site_packages() + target = sp / "coverage.pth" + hook_line = "import coverage; coverage.process_startup()" + cmd = f'echo "{hook_line}" | sudo tee "{target}" > /dev/null' + try: + hsystem.system(cmd) + _LOG.debug("Installed coverage hook to %s via sudo tee", target) + except (OSError, subprocess.SubprocessError) as e: + hdbg.dassert(False, f"Failed to install coverage hook via sudo tee: {e}") + + +def remove() -> None: + """ + Remove the coverage startup hook from this env site-packages. + """ + sp = _detect_site_packages() + target = sp / "coverage.pth" + if target.is_file(): + cmd = f'sudo rm -f "{target}"' + try: + hsystem.system(cmd) + _LOG.info("Removed coverage hook from %s via sudo rm", target) + except Exception as e: + _LOG.error("Failed to remove coverage hook via sudo rm: %s", e) + raise + else: + # TODO(Maddy): Is this acceptable? + _LOG.warning("No coverage.pth found in %s", sp) + # Remove coverage environment variables. + try: + if "COVERAGE_PROCESS_START" in os.environ: + del os.environ["COVERAGE_PROCESS_START"] + _LOG.info("Removed COVERAGE_PROCESS_START from environment") + else: + _LOG.debug("COVERAGE_PROCESS_START not found in environment") + except Exception as e: + _LOG.error("Failed to remove COVERAGE_PROCESS_START: %s", e) + raise + + +def generate_coverage_dockerfile() -> str: + """ + Build a Dockerfile string that appends coverage support. + """ + # This requires to: + # - Install coverage, pytest, pytest-cov at build time + # - Create /coverage_data and writes .coveragerc + # - Set ENV COVERAGE_PROCESS_START to /coverage_data/.coveragerc + # - Write a coverage.pth into site-packages so coverage auto-starts + txt = """ + # Install coverage and testing dependencies. + RUN pip install --no-cache-dir coverage pytest pytest-cov + + # Create coverage data directory with proper permissions. + RUN mkdir -p /app/coverage_data && chmod 777 /app/coverage_data + + # Setup coverage configuration. + COPY .coveragerc /app/coverage_data/.coveragerc + ENV COVERAGE_PROCESS_START=/app/coverage_data/.coveragerc + + # Create coverage.pth file for automatic startup. + # This ensures coverage tracking starts automatically when Python runs. + RUN python - < None: + """ + Execute shell commands to run coverage steps in a Docker container. + + Assumes: + - A valid .coveragerc exists in the current working directory. + - coverage_data/ is the mounted folder inside the container. + """ + commands = [ + "mkdir -p coverage_data", + "chmod 777 coverage_data", + "cp .coveragerc coverage_data/.coveragerc", + "chmod 644 coverage_data/.coveragerc", + ] + for cmd in commands: + hsystem.system(cmd, suppress_output=False) + + +def coverage_combine() -> None: + """ + Execute shell commands to combine coverage data. + + Assumes: + - .coverage.* files are present in the current directory or coverage_data/. + """ + # Check if there are any coverage files in coverage_data/ and copy them. + if os.path.exists("coverage_data"): + coverage_files_cmd = ( + "find coverage_data -name '.coverage.*' 2>/dev/null | wc -l" + ) + rc = hsystem.system(coverage_files_cmd, abort_on_error=False) + if rc == 0: + # Use a simple existence check instead of parsing command output. + coverage_files = glob.glob("coverage_data/.coverage.*") + if coverage_files: + _LOG.info( + "Found coverage files in coverage_data/, copying to current directory" + ) + commands = [ + "cp coverage_data/.coverage.* . 2>/dev/null || true", + "rm -rf coverage_data/.coverage.* 2>/dev/null || true", + ] + for cmd in commands: + hsystem.system(cmd, suppress_output=False) + # Check if there are any .coverage.* files to combine. + coverage_files = glob.glob(".coverage.*") + num_files = len(coverage_files) + if num_files > 0: + _LOG.info("Found %d coverage data files to combine", num_files) + commands = [ + "coverage combine", + "coverage report --skip-empty", + ] + for cmd in commands: + hsystem.system(cmd, suppress_output=False) + else: + _LOG.warning("No .coverage.* files found to combine") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py new file mode 100644 index 000000000..6c64659c0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py @@ -0,0 +1,365 @@ +""" +Import as: + +import helpers.hcsv as hcsv +""" + +import ast +import logging +import os +from typing import Any, Callable, Dict, List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hs3 as hs3 + +_LOG = logging.getLogger(__name__) + + +def _append_csv( + df: pd.DataFrame, path: str, *, index: bool = False, **kwargs: Any +) -> None: + """ + Append a df to the CSV file `path` without header. + """ + with open(path, "a") as f: + df.to_csv(f, header=False, index=index, **kwargs) + + +def _read_csv_range( + csv_path: str, from_: int, to: int, **kwargs: Any +) -> pd.DataFrame: + """ + Read a specified row range of a CSV file and convert to a DataFrame. + + This function: + - assumes the CSV file to have header, considered to be row 0. + - reads [from_, to), e.g., (to - from_) lines following list slicing semantics. + + :param csv_path: location of CSV file + :param from_: first line to read (header is row 0 and is always read) + :param to: last line to read, not inclusive + :return: DataFrame with columns from CSV line 0 (header) + """ + hdbg.dassert_lt(0, from_, msg="Row 0 assumed to be header row") + hdbg.dassert_lt(from_, to, msg="Empty range requested!") + skiprows = list(range(1, from_)) + nrows = to - from_ + df = pd.read_csv(csv_path, skiprows=skiprows, nrows=nrows, **kwargs) + if df.shape[0] < to: + _LOG.warning("Number of df rows = %i vs requested = %i", df.shape[0], to) + return df + + +# TODO(gp): There is no use of this function. +def build_chunk( + csv_path: str, + col_name: str, + start: int, + *, + nrows_at_a_time: int = 1000, + **kwargs: Any, +) -> pd.DataFrame: + """ + Build a DataFrame from a CSV subset as follows: + + - Names the columns using the header line (row 0) + - Reads the value in (row, col) coordinates (`start`, `col_name`) (if it + exists) as `value` + - Adds row `start` and all subsequent contiguous rows with `value` in + column `col_name` + + For memory efficiency, the CSV is processed in chunks of size `nrows_at_a_time`. + + :param csv_path: location of CSV file + :param col_name: name of column whose values define chunks + :param start: first row to process + :param nrows_at_a_time: size of chunks to process + :return: DataFrame with columns from CSV line 0 + """ + hdbg.dassert_lt(0, start) + stop = False + dfs: List[pd.DataFrame] = [] + init_df = _read_csv_range(csv_path, start, start + 1, **kwargs) + if init_df.shape[0] < 1: + return init_df + val = init_df[col_name].iloc[0] + _LOG.debug("Building chunk for %s", val) + counter = 0 + while not stop: + from_ = start + counter * nrows_at_a_time + df = _read_csv_range(csv_path, from_, from_ + nrows_at_a_time) + # Break if there are no matches. + if df.shape[0] == 0: + break + if not (df[col_name] == val).any(): + break + # Stop if we have run out of rows to read. + if df.shape[0] < nrows_at_a_time: + stop = True + idx_max = (df[col_name] == val)[::-1].idxmax() + # Stop if we have reached a new value. + if idx_max < (df.shape[0] - 1): + stop = True + dfs.append(df.iloc[0 : idx_max + 1]) + counter += 1 + if not dfs: + return pd.DataFrame() + return pd.concat(dfs, axis=0).reset_index(drop=True) + + +# TODO(gp): There is no use of this function. +def find_first_matching_row( + csv_path: str, + col_name: str, + val: str, + *, + start: int = 1, + nrows_at_a_time: int = 1000000, + **kwargs: Any, +) -> Optional[int]: + """ + Find first row in CSV where value in column `col_name` equals `val`. + + :param csv_path: location of CSV file + :param col_name: name of column whose values define chunks + :param val: value to match on + :param start: first row (inclusive) to start search on + :param nrows_at_a_time: size of chunks to process + :return: line in CSV of first matching row at or past start + """ + curr = start + while True: + _LOG.debug("Start of current chunk = line %i", curr) + df = _read_csv_range(csv_path, curr, curr + nrows_at_a_time, **kwargs) + if df.shape[0] < 1: + _LOG.info("Value %s not found", val) + break + matches = df[col_name] == val + if matches.any(): + idx_max = matches.idxmax() + return int(curr + idx_max) + curr += nrows_at_a_time + return None + + +# ############################################################################# +# CSV to PQ conversion +# ############################################################################# + + +def _csv_mapreduce( + csv_path: str, + out_dir: str, + key_func: Callable, + chunk_preprocessor: Optional[Callable], + *, + chunk_size: int = 1000000, +) -> None: + """ + Map-reduce-type processing of CSV. + + The phases are: + - Read the CSV in chunks as DataFrame + - Key each row of the DataFrame using a `groupby` + - "Reduce" keyed groups by writing and appending to a CSV + + :param csv_path: input CSV path + :param out_dir: output dir for CSV with filenames corresponding to keys + :param key_func: function to apply to each chunk DataFrame to key rows + Should return an iterable with elements like (key, df) + :param chunk_preprocessor: function to apply to each chunk DataFrame before + applying key_func + :param chunk_size: chunk_size of input to process + """ + # Read CSV data in chunks. + chunks = pd.read_csv(csv_path, chunksize=chunk_size) + # Preprocess chunk, if needed. + if chunk_preprocessor is not None: + chunks = map(chunk_preprocessor, chunks) + # Apply key_func to each chunk. + keyed_group_blocks = map(key_func, chunks) + # Append results. + for block in keyed_group_blocks: + for idx, df in block: + file_name = os.path.join(out_dir, idx + ".csv") + _append_csv(df, file_name) + + +def convert_csv_to_pq( + csv_path: str, + pq_path: str, + *, + normalizer: Optional[Callable] = None, + header: Optional[int] = 0, + compression: Optional[str] = "gzip", +) -> None: + """ + Convert CSV file to Parquet file. + + Output of `csv_map_reduce()` is typically header-less to support append mode, + and so `normalizer` may be used to add appropriate headers. Note that Parquet + requires string column names, whereas Pandas by default uses integer column + names. + + :param csv_path: full path of CSV + :param pq_path: full path of parquet + :param header: header specification of CSV + :param normalizer: function to apply to df before writing to PQ + """ + df = pd.read_csv(csv_path, header=header) + # TODO(Paul): Ensure that one of header, normalizer is not None. + if normalizer is not None: + df = normalizer(df) + df.to_parquet(pq_path, compression=compression) + + +def convert_csv_dir_to_pq_dir( + csv_dir: str, + pq_dir: str, + *, + normalizer: Optional[Callable] = None, + header: Optional[int] = None, +) -> None: + """ + Apply `convert_csv_to_pq()` to all files in `csv_dir`. + + :param csv_dir: directory storing CSV files on S3 or local + :param pq_dir: target directory to save PQ files (only local + filesystem) + :param header: header specification of CSV + :param normalizer: function to apply to df before writing to PQ + """ + # Get the filenames in `csv_dir`. + if hs3.is_s3_path(csv_dir): + # TODO(gp): Pass aws_profile. + s3fs = hs3.get_s3fs("am") + filenames = s3fs.ls(csv_dir) + else: + # Local filesystem. + hdbg.dassert_dir_exists(csv_dir) + # TODO(Paul): check .endswith(".csv") or do glob(csv_dir + "/*.csv") + filenames = os.listdir(csv_dir) + hdbg.dassert(filenames, "No files in the directory '%s'", csv_dir) + # Process all the filenames. + # TODO(gp): Add tqdm. + # TODO(gp): Consider parallelizing. + for filename in filenames: + # Remove .csv/.csv.gz. + csv_stem = hio.remove_extension( + filename, ".csv", check_file_exists=True, check_has_extension=False + ) + if csv_stem is None: + csv_stem = hio.remove_extension( + filename, + ".csv.gz", + check_file_exists=True, + check_has_extension=False, + ) + if csv_stem is None: + _LOG.warning( + "Skipping filename=%s since it has invalid extension", csv_stem + ) + continue + # Convert file to PQ. + pq_filename = csv_stem + ".pq" + convert_csv_to_pq( + os.path.join(csv_dir, filename), + os.path.join(pq_dir, pq_filename), + normalizer=normalizer, + header=header, + ) + + +# ############################################################################# +# CSV-JSON dict conversion +# ############################################################################# + + +# TODO(gp): convert_csv_to_json_dict? +# TODO(gp): path_to_csv -> file_name +def convert_csv_to_dict(path_to_csv: str, remove_nans: bool) -> Dict[Any, Any]: + """ + Convert a CSV file storing a dataframe into a JSON-compatible dict. + + :param path_to_csv: path to the CSV file + :param remove_nans: whether to remove NaNs from the dictionary + :return: a JSON-compatible dict with the dataframe data + """ + hdbg.dassert_file_exists(path_to_csv) + # Load the dataframe from a CSV file. + df = pd.read_csv(path_to_csv) + # Transform the dataframe into a dict. + dict_df = df.to_dict(orient="list") + if remove_nans: + # Remove NaNs from the dict. + for key in dict_df: + dict_df[key] = [x for x in dict_df[key] if not pd.isnull(x)] + return dict_df # type: ignore + + +# TODO(gp): path_to_csv -> file_name +def save_csv_as_json( + path_to_csv: str, remove_nans: bool, path_to_json: Optional[str] = None +) -> None: + """ + Convert the df from a CSV into a dict and save it into a JSON file. + + If the `path_to_json` is not provided, the JSON is saved in the folder where + the CSV file is located. + + :param path_to_csv: path to the CSV file + :param remove_nans: whether to remove NaNs from the dictionary + :param path_to_json: path to save the JSON file + """ + # Convert the df from the CSV into a JSON-compatible dict. + dict_df = convert_csv_to_dict(path_to_csv, remove_nans) + # Determine the JSON destination path. + if path_to_json is None: + path_to_json = hio.change_filename_extension( + path_to_csv, ".csv", ".json" + ) + # Save the dict into a JSON file. + hio.to_json(path_to_json, dict_df) + + +# ############################################################################# +# CSV files with types +# ############################################################################# + + +def to_typed_csv(df: pd.DataFrame, file_name: str) -> str: + """ + Convert df into CSV and creates a file with the dtypes of columns. + + This function creates a file containing the types with the same name + and suffix e.g., `foobar.csv.types`. + """ + # Save the types. + dtypes_filename = file_name + ".types" + hio.create_enclosing_dir(dtypes_filename, incremental=True) + dtypes_dict = str(df.dtypes.apply(lambda x: x.name).to_dict()) + # Save the data. + df.to_csv(file_name, index=False) + with open(dtypes_filename, "w") as dtypes_file: + dtypes_file.write(dtypes_dict) + return dtypes_filename + + +def from_typed_csv(file_name: str) -> pd.DataFrame: + """ + Load CSV file as df applying the original types of columns. + + This function uses a file with name `file_name.types` to load + information about the column types. + """ + # Load the types. + dtypes_filename = file_name + ".types" + hdbg.dassert_path_exists(dtypes_filename) + with open(dtypes_filename) as dtypes_file: + dtypes_dict = ast.literal_eval(list(dtypes_file)[0]) + # Load the data, applying the types. + df = pd.read_csv(file_name, dtype=dtypes_dict) + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py new file mode 100644 index 000000000..2849dfb10 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py @@ -0,0 +1,309 @@ +""" +Helper functions for processing pandas dataframes. + +Import as: + +import helpers.hdataframe as hdatafr +""" + +# TODO(gp): Consider merging with `helpers/pandas_helpers.py`. + +import collections +import functools +import logging +import operator +from typing import Any, Dict, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + + +_METHOD_TO_APPLY = Dict[str, Dict[str, Any]] + + +def _combine_masks( + masks: pd.DataFrame, mode: str, info: collections.OrderedDict +) -> pd.Series: + if mode == "and": + combined_mask = masks.all(axis=1) + elif mode == "or": + combined_mask = masks.any(axis=1) + else: + raise ValueError(f"Invalid `mode`='{mode}'") + if combined_mask.sum() == 0: + _LOG.warning("No data remaining after filtering.") + info["nrows_remaining"] = combined_mask.sum() + return combined_mask + + +def filter_data_by_values( + df: pd.DataFrame, + filters: Dict[Union[int, str], Tuple[Any, ...]], + mode: str, + info: Optional[collections.OrderedDict] = None, +) -> pd.DataFrame: + """ + Filter dataframe rows based on column values. + + :param df: dataframe + :param filters: `{col_name: (possible_values)}` + :param mode: `and` for conjunction and `or` for disjunction of filters + :param info: information storage + :return: filtered dataframe + """ + if info is None: + info = collections.OrderedDict() + info["nrows"] = df.shape[0] + if not filters: + info["nrows_remaining"] = df.shape[0] + return df.copy() + # Create filter masks for each column. + masks = [] + for col_name, vals in filters.items(): + hdbg.dassert_isinstance(vals, tuple) + mask = df[col_name].isin(vals) + info[f"n_{col_name}"] = mask.sum() + info[f"perc_{col_name}"] = hprint.perc(mask.sum(), df.shape[0]) + masks.append(mask) + masks = pd.concat(masks, axis=1) + combined_mask = _combine_masks(masks, mode, info) + filtered_df = df.loc[combined_mask].copy() + return filtered_df + + +def filter_data_by_comparison( + df: pd.DataFrame, + filters: Dict[ + Union[int, str], Union[Tuple[str, Any], Tuple[Tuple[str, Any], ...]] + ], + mode: str, + info: Optional[collections.OrderedDict] = None, +) -> pd.DataFrame: + """ + Filter dataframe by comparing columns to values. + + :param df: dataframe + :param filters: `{col_name: (comparison_method, value)}` or + `{col_name: ((comparison_method_i, value_i))}`. + `comparison_method` is one of the ("eq", "ne", "le", "lt", "ge", "gt") + pandas method names. + :param mode: `and` for conjunction and `or` for disjunction of filters + :param info: information storage + :return: filtered dataframe + """ + if info is None: + info = collections.OrderedDict() + info["nrows"] = df.shape[0] + if not filters: + info["nrows_remaining"] = df.shape[0] + return df.copy() + # Create filter masks for each column. + masks = [] + for col_name, tuple_ in filters.items(): + if not isinstance(tuple_[0], tuple): + tuple_ = (tuple_,) # type: ignore + for comparison_method, val in tuple_: + hdbg.dassert_in( + comparison_method, ("eq", "ne", "le", "lt", "ge", "gt") + ) + mask = getattr(df[col_name], comparison_method)(val) + info[f"n_{col_name}_{comparison_method}_{val}"] = mask.sum() + info[f"perc_{col_name}_{comparison_method}_{val}"] = hprint.perc( + mask.sum(), df.shape[0] + ) + masks.append(mask) + masks = pd.concat(masks, axis=1) + combined_mask = _combine_masks(masks, mode, info) + filtered_df = df.loc[combined_mask].copy() + return filtered_df + + +def filter_data_by_method( + df: pd.DataFrame, + filters: Dict[Union[int, str], _METHOD_TO_APPLY], + mode: str, + info: Optional[collections.OrderedDict] = None, +) -> pd.DataFrame: + """ + Filter dataframe by calling a method specified for each column. + + :param df: dataframe + :param filters: `{col_name: {method: kwargs}}`, where `method` is the + method called on the dataframe column, e.g. "isin" or "str.contains", + and `kwargs` are the kwargs for this method + :param mode: `and` for conjunction and `or` for disjunction of filters + :param info: information storage + :return: filtered dataframe + """ + if info is None: + info = collections.OrderedDict() + info["nrows"] = df.shape[0] + if not filters: + info["nrows_remaining"] = df.shape[0] + return df.copy() + # Create filter masks for each column. + masks = [] + for col_name, method_dict in filters.items(): + for method, kwargs in method_dict.items(): + mask = operator.attrgetter(method)(df[col_name])(**kwargs) + info[f"n_{col_name}"] = mask.sum() + info[f"perc_{col_name}"] = hprint.perc(mask.sum(), df.shape[0]) + masks.append(mask) + masks = pd.concat(masks, axis=1) + combined_mask = _combine_masks(masks, mode, info) + filtered_df = df.loc[combined_mask].copy() + return filtered_df + + +# ############################################################################# + + +def apply_nan_mode( + srs: pd.Series, + mode: str = "leave_unchanged", + info: Optional[dict] = None, +) -> pd.Series: + """ + Process NaN values in a series according to the parameters. + + :param srs: pd.Series to process + :param mode: method of processing NaNs + - "leave_unchanged" - no transformation + - "drop" - drop all NaNs + - "ffill" - forward fill not leading NaNs + - "ffill_and_drop_leading" - do ffill and drop leading NaNs + - "fill_with_zero" - fill NaNs with 0 + - "strict" - raise ValueError that NaNs are detected + :param info: information storage + :return: transformed copy of input series + """ + hdbg.dassert_isinstance(srs, pd.Series) + if srs.empty: + _LOG.warning("Empty input series `%s`", srs.name) + if mode == "leave_unchanged": + res = srs.copy() + elif mode == "drop": + res = srs.dropna().copy() + elif mode == "ffill": + res = srs.ffill().copy() + elif mode == "ffill_and_drop_leading": + res = srs.ffill().dropna().copy() + elif mode == "fill_with_zero": + res = srs.fillna(0).copy() + elif mode == "strict": + res = srs.copy() + if srs.isna().any(): + raise ValueError(f"NaNs detected in mode `{mode}`") + else: + raise ValueError(f"Unrecognized mode `{mode}`") + # + if info is not None: + hdbg.dassert_isinstance(info, dict) + # Dictionary should be empty. + hdbg.dassert(not info) + info["series_name"] = srs.name + info["num_elems_before"] = len(srs) + info["num_nans_before"] = np.isnan(srs).sum() + info["num_elems_removed"] = len(srs) - len(res) + info["num_nans_imputed"] = ( + info["num_nans_before"] - info["num_elems_removed"] + ) + info["percentage_elems_removed"] = ( + 100.0 * info["num_elems_removed"] / info["num_elems_before"] + ) + info["percentage_elems_imputed"] = ( + 100.0 * info["num_nans_imputed"] / info["num_elems_before"] + ) + return res + + +@functools.lru_cache() +def compute_points_per_year_for_given_freq(freq: str) -> float: + """ + Return the number of index time points per year. + + :param freq: string identifier of date frequency + :return: number of time points per year (approximate) + """ + # `pd.date_range` breaks for zero-period frequencies, so we need to work + # around that. + try: + # Leap years: 2012, 2016. + points_in_span = pd.date_range( + freq=freq, start="2012-01-01", end="2019-12-31" + ).size + span_in_years = 8 + points_per_year: float = points_in_span / span_in_years + return points_per_year + except ZeroDivisionError: + return 0.0 + + +# ############################################################################# + + +def infer_sampling_points_per_year(df: Union[pd.Series, pd.DataFrame]) -> float: + """ + Return the number of index time points per year. + + TODO(*): Consider extending to all frequencies and count points by + explicitly building indices of the given frequency. + + :param df: series or dataframe with non-null `df.index.freq` + :return: number of time points per year (approximate) + """ + hdbg.dassert(hasattr(df.index, "freq") and df.index.freq is not None) + freq = df.index.freq + # TODO(*): Make start, end dates parameters that can be passed in. + return compute_points_per_year_for_given_freq(freq) + + +def compute_count_per_year(df: Union[pd.Series, pd.DataFrame]) -> float: + """ + Return df.count() divided by the length of `df` in years. + """ + hdbg.dassert( + hasattr(df.index, "freq") and df.index.freq is not None, + msg="`df` must have a `DatetimeIndex` with a `freq`", + ) + assert hasattr(df.index, "freq") and df.index.freq is not None + freq = df.index.freq + # Calculate the time span of `df` in years. + points_per_year = compute_points_per_year_for_given_freq(freq) + span_in_years = df.size / points_per_year + # Determine the number of non-NaN/inf/etc. data points per year. + count_per_year = df.count() / span_in_years + count_per_year = cast(float, count_per_year) + return count_per_year + + +# ############################################################################# + + +def remove_duplicates( + df: pd.DataFrame, + duplicate_columns: Optional[List[str]], + control_column: Optional[str], +) -> pd.DataFrame: + """ + Remove duplicates from DataFrame. + + :param df: DataFrame to process + :param duplicate_columns: subset of column names, None for all + :param control_column: column max value of which determines the kept + row + :return: DataFrame with removed duplicates + """ + # Fix maximum value of control column at the bottom. + if control_column: + df = df.sort_values(by=control_column) + duplicate_columns = duplicate_columns or df.columns + df = df.drop_duplicates(subset=duplicate_columns) + # Sort by index to return to original view. + df = df.sort_index() + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py new file mode 100644 index 000000000..e63152593 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py @@ -0,0 +1,909 @@ +""" +Import as: + +import helpers.hdatetime as hdateti +""" + +import asyncio +import calendar +import datetime +import logging +import re +from typing import Callable, Iterable, Optional, Tuple, Union + +# TODO(gp): Use hdbg.WARNING +_WARNING = "\033[33mWARNING\033[0m" + +# Avoid dependency from other `helpers` modules to prevent import cycles. + +import pandas as pd # noqa: E402 # pylint: disable=wrong-import-position + +# TODO(gp): Check if dateutils is equivalent to `pytz` or better so we can simplify +# the dependencies. +try: + import pytz +except ModuleNotFoundError: + _module = "pytz" + print(_WARNING + f": Can't find {_module}: continuing") + + +import helpers.hdbg as hdbg # noqa: E402 # pylint: disable=wrong-import-position +import helpers.hprint as hprint # noqa: E402 # pylint: disable=wrong-import-position +import helpers.hwall_clock_time as hwacltim # noqa: E402 # pylint: disable=wrong-import-position + +_LOG = logging.getLogger(__name__) + +# We use the type `Datetime` to allow flexibility in the interface exposed to client. +# The typical pattern is: +# - we call `to_datetime()`, as soon as we enter functions exposed to users, +# to convert the user-provided datetime into a `datetime.datetime` +# - we use only `datetime.datetime` in the private interfaces +# TODO(gp): In practice we are using `pd.Timestamp` +# +# It's often worth to import this file even for just the type `Datetime`, +# since typically as soon as the caller uses this type, they also want to use +# `to_datetime()` and `dassert_*()` functions. +# TODO(gp): It would be better to call this `GeneralDateTime`, `FlexibleDateTime`, +# and rename `StrictDateTime` -> `DateTime`. +Datetime = Union[str, pd.Timestamp, datetime.datetime] + +# The type `StrictDateTime` is for stricter interfaces, although it is a bit of a +# compromise. +# Either one wants to allow everything that can be interpreted as a datetime (and +# then use `Datetime`), or strict (and then use only `datetime.datetime`). +StrictDatetime = Union[pd.Timestamp, datetime.datetime] + + +def dassert_is_datetime(datetime_: Datetime) -> None: + """ + Assert that `datetime_` is of type `Datetime`. + """ + hdbg.dassert_isinstance( + datetime_, + (str, pd.Timestamp, datetime.datetime), + "datetime_='%s' of type '%s' is not a DateTimeType", + datetime_, + str(type(datetime_)), + ) + + +def dassert_is_strict_datetime(datetime_: StrictDatetime) -> None: + """ + Assert that `datetime_` is of type `StrictDatetime`. + """ + hdbg.dassert_isinstance( + datetime_, + (pd.Timestamp, datetime.datetime), + "datetime_='%s' of type '%s' is not a StrictDateTimeType", + datetime_, + str(type(datetime_)), + ) + + +def dassert_str_is_date(date: str) -> None: + """ + Check if an input string is a date. + + :param date: date as string, e.g., "20221101" + """ + hdbg.dassert_isinstance(date, str) + try: + _ = datetime.datetime.strptime(date, "%Y%m%d") + except ValueError as e: + raise ValueError(f"date='{date}' doesn't have the right format: {e}") + + +# TODO(Grisha): also pass timezone. +def to_datetime(datetime_: Datetime) -> datetime.datetime: + """ + Convert a `Datetime` into a `datetime.datetime`. + + :return: tz-aware or naive datetime.datetime + """ + dassert_is_datetime(datetime_) + if isinstance(datetime_, str): + datetime_ = pd.Timestamp(datetime_) + if isinstance(datetime_, pd.Timestamp): + datetime_ = datetime_.to_pydatetime() + return datetime_ # type: ignore + + +def to_timestamp(datetime_: Datetime) -> pd.Timestamp: + """ + Convert a `Datetime` into a `pd.Timestamp`. + + :return: tz-aware or naive datetime.datetime + """ + dassert_is_datetime(datetime_) + timestamp = pd.Timestamp(datetime_) + return timestamp + + +# //////////////////////////////////////////////////////////////////////////////////O + + +def dassert_is_tz_naive(datetime_: StrictDatetime) -> None: + """ + Assert that the passed timestamp is tz-naive, i.e., doesn't have timezone + info. + """ + hdbg.dassert_is( + datetime_.tzinfo, None, "datetime_='%s' is not tz naive", datetime_ + ) + + +def dassert_has_tz(datetime_: StrictDatetime) -> None: + """ + Assert that the passed timestamp has timezone info. + """ + hdbg.dassert_is_not( + datetime_.tzinfo, + None, + "datetime_='%s' doesn't have timezone info", + datetime_, + ) + + +def dassert_has_specified_tz( + datetime_: StrictDatetime, tz_zones: Iterable[str] +) -> None: + """ + Assert that the passed timestamp has the timezone passed in `tz_zones`. + """ + # Make sure that the passed timestamp has timezone information. + dassert_has_tz(datetime_) + # Get the timezone. + tz_info = datetime_.tzinfo + # Unlike other timezones UTC is a `datetime.timezone` object not a + # `pytz.tzfile`. See CmTask5895 for details. + if ( + isinstance(tz_info, datetime.timezone) + and tz_info == datetime.timezone.utc + ): + tz_zone = "UTC" + else: + tz_zone = tz_info.zone # type: ignore + has_expected_tz = tz_zone in tz_zones + hdbg.dassert( + has_expected_tz, + "datetime_=%s (type=%s) tz_info=%s tz_info.zone=%s instead of tz_zones=%s", + datetime_, + type(datetime_), + tz_info, + tz_zone, + tz_zones, + ) + + +def dassert_has_UTC_tz(datetime_: StrictDatetime) -> None: + """ + Assert that the passed timestamp is UTC. + """ + tz_zones = (pytz.timezone("UTC").zone,) + dassert_has_specified_tz(datetime_, tz_zones) + + +def dassert_has_ET_tz(datetime_: StrictDatetime) -> None: + """ + Assert that the passed timestamp is Eastern Time (ET). + """ + tz_zones = ( + pytz.timezone("US/Eastern").zone, + pytz.timezone("America/New_York").zone, + ) + dassert_has_specified_tz(datetime_, tz_zones) + + +def dassert_tz_compatible( + datetime1: StrictDatetime, datetime2: StrictDatetime +) -> None: + """ + Assert that two timestamps are both naive or both have timezone info. + """ + dassert_is_strict_datetime(datetime1) + dassert_is_strict_datetime(datetime2) + has_tz1 = datetime1.tzinfo is not None + has_tz2 = datetime2.tzinfo is not None + hdbg.dassert_eq( + has_tz1, + has_tz2, + "datetime1='%s' and datetime2='%s' are not compatible", + str(datetime1), + str(datetime2), + ) + + +def dassert_have_same_tz( + datetime1: StrictDatetime, datetime2: StrictDatetime +) -> None: + """ + Assert that both timestamps have the same tz. + + The timezones are compared regardless of a DST mode. + """ + dassert_tz_compatible(datetime1, datetime2) + # Convert to string to remove DST mode info. + tz1_as_str = str(datetime1.tzinfo) + tz2_as_str = str(datetime2.tzinfo) + hdbg.dassert_eq( + tz1_as_str, + tz2_as_str, + "datetime1=%s (datetime1.tzinfo=%s) datetime2=%s (datetime2.tzinfo=%s) ", + datetime1, + tz1_as_str, + datetime2, + tz2_as_str, + ) + + +# TODO(gp): Replace this check with compatibility between series vs scalar. +# def dassert_srs_tz_compatible( +# def dassert_srs_has_tz +# def dassert_srs_is_tz_naive +def dassert_tz_compatible_timestamp_with_df( + datetime_: StrictDatetime, + df: pd.DataFrame, + col_name: Optional[str], +) -> None: + """ + Assert that timestamp and a df column are both naive or both have timezone + info. + + :param col_name: col_name. `None` represents the index. + """ + dassert_is_strict_datetime(datetime_) + hdbg.dassert_isinstance(df, pd.DataFrame) + if df.empty: + return + if col_name is None: + # We assume that the first element in the index is representative. + df_datetime = df.index[0] + else: + hdbg.dassert_in(col_name, df.columns) + df_datetime = df[col_name].iloc[0] + dassert_tz_compatible(df_datetime, datetime_) + + +# //////////////////////////////////////////////////////////////////////////////////O + + +def dassert_is_valid_timestamp(timestamp: Optional[pd.Timestamp]) -> None: + """ + Assert that a timestamp is `None` or a `pd.Timestamp` with timezone. + """ + if timestamp is not None: + hdbg.dassert_isinstance(timestamp, pd.Timestamp) + dassert_has_tz(timestamp) + + +def dassert_timestamp_lte( + start_timestamp: Optional[pd.Timestamp], + end_timestamp: Optional[pd.Timestamp], +) -> None: + dassert_is_valid_timestamp(start_timestamp) + dassert_is_valid_timestamp(end_timestamp) + if start_timestamp is not None and end_timestamp is not None: + hdbg.dassert_lte(start_timestamp, end_timestamp) + + +def dassert_timestamp_lt( + start_timestamp: Optional[pd.Timestamp], + end_timestamp: Optional[pd.Timestamp], +) -> None: + dassert_is_valid_timestamp(start_timestamp) + dassert_is_valid_timestamp(end_timestamp) + if start_timestamp is not None and end_timestamp is not None: + hdbg.dassert_lt(start_timestamp, end_timestamp) + + +def dassert_is_valid_interval( + start_timestamp: Optional[pd.Timestamp], + end_timestamp: Optional[pd.Timestamp], + left_close: bool, + right_close: bool, +) -> None: + """ + Assert that an interval has valid start and end timestamps. + """ + _LOG.debug( + hprint.to_str("start_timestamp end_timestamp left_close right_close") + ) + dassert_is_valid_timestamp(start_timestamp) + dassert_is_valid_timestamp(end_timestamp) + # Check the requested interval. + if start_timestamp is not None and end_timestamp is not None: + if left_close and right_close: + # If they are both closed, an interval like [a, a] makes sense, + # otherwise it doesn't. + hdbg.dassert_lte(start_timestamp, end_timestamp) + else: + hdbg.dassert_lt(start_timestamp, end_timestamp) + + +# ############################################################################# + + +def get_UTC_tz() -> datetime.tzinfo: + """ + Return the UTC timezone. + """ + return pytz.timezone("UTC") + + +def get_ET_tz() -> datetime.tzinfo: + """ + Return the US Eastern Time timezone. + """ + # TODO(Grisha): -> `US/Eastern`? + # It appears that "America/New_York" is to be preferred over "US/Eastern". + # https://www.iana.org/time-zones + # https://en.wikipedia.org/wiki/Tz_database + return pytz.timezone("America/New_York") + + +# Function returning the current (true, replayed, simulated) wall-clock time as a +# timestamp. +# TODO(gp): maybe GetWallClockTimeFunc is better to clarify that this is a function +# and not time. We often pass +GetWallClockTime = Callable[[], pd.Timestamp] + + +# TODO(gp): -> get_wall_clock_time +# TODO(gp): tz -> tz_mode since we are not passing neither a timezone or a +# timezone_as_str. +def get_current_time( + tz: str, + # TODO(gp): Add * + # *, + event_loop: Optional[asyncio.AbstractEventLoop] = None, +) -> pd.Timestamp: + """ + Return current time in UTC / ET timezone or as a naive time. + + This should be the only way to get the current wall-clock time, + since it handles both wall-clock time and "simulated" wall-clock + time through asyncio. + + :param tz: how to represent the returned time (e.g., "UTC", "ET", + "naive") + """ + if event_loop is not None: + # We accept only `hasyncio.EventLoop` here. If we are using standard asyncio + # EventLoop we rely on wall-clock time instead of `loop.time()`. + hdbg.dassert_isinstance(event_loop, asyncio.AbstractEventLoop) + hdbg.dassert(hasattr(event_loop, "get_current_time")) + timestamp = event_loop.get_current_time() + else: + # Use true real-time. + timestamp = datetime.datetime.utcnow() + # Convert it into the right + timestamp = pd.Timestamp(timestamp, tz=get_UTC_tz()) + if tz == "UTC": + pass + elif tz == "ET": + timestamp = timestamp.tz_convert(get_ET_tz()) + elif tz == "naive_UTC": + timestamp = timestamp.replace(tzinfo=None) + elif tz == "naive_ET": + timestamp = timestamp.tz_convert(get_ET_tz()) + timestamp = timestamp.replace(tzinfo=None) + else: + raise ValueError(f"Invalid tz='{tz}'") + return timestamp + + +def get_current_timestamp_as_string(tz: str) -> str: + """ + Return the current time in the format `YYYYMMDD_HHMMSS` (e.g., + 20210728_221734). + + Note that no information about the timezone is returned. Thus the + same time corresponds to `20210728_171749` for tz="ET" and + `20210728_221749` for tz="UTC". + """ + timestamp = get_current_time(tz) + ret = timestamp.strftime("%Y%m%d-%H%M%S") + return ret + + +def get_current_date_as_string(tz: str) -> str: + """ + Return the current date in the format `YYYYMMDD` (e.g., 20210728). + """ + timestamp = get_current_time(tz) + ret = timestamp.strftime("%Y%m%d") + return ret + + +# ############################################################################# +# Bar-related utilities +# ############################################################################# + + +def convert_seconds_to_minutes(num_secs: int) -> int: + hdbg.dassert_lt(0, num_secs) + hdbg.dassert_eq( + num_secs % 60, + 0, + "num_secs=%s is not an integer number of minutes", + num_secs, + ) + num_mins = int(num_secs / 60) + hdbg.dassert_lt(0, num_mins) + _LOG.debug(hprint.to_str("num_secs num_mins")) + return num_mins + + +# TODO(Dan): Unit test. +def convert_seconds_to_pandas_minutes(val: int) -> str: + """ + Convert a number of seconds to its Pandas delay representation in minutes. + + E.g. 300 -> '5T' + + :param val: number of seconds to convert + :return: Pandas delay representation + """ + res = convert_seconds_to_minutes(val) + res = f"{res}T" + return res + + +def convert_minutes_to_seconds(num_minutes: int) -> int: + """ + Convert minutes to seconds. + + E.g., 5 (minutes) -> 300 (seconds). + + :param num_minutes: the number of minutes to convert + :return: the number of seconds + """ + hdbg.dassert_isinstance(num_minutes, int) + hdbg.dassert_lt(0, num_minutes) + num_seconds = num_minutes * 60 + _LOG.debug(hprint.to_str("num_minutes num_seconds")) + return num_seconds + + +# TODO(gp): bar_duration_in_secs -> bar_{length,period}_in_secs +def find_bar_timestamp( + current_timestamp: pd.Timestamp, + bar_duration_in_secs: int, + *, + mode: str = "round", + max_distance_in_secs: int = 10, +) -> pd.Timestamp: + """ + Compute the bar (a, b] with period `bar_duration_in_secs` including + `current_timestamp`. + + :param current_timestamp: current timestamp + :param bar_duration_in_secs: bar duration in seconds + :param mode: how to compute the bar + - `round`: snap to the closest bar extreme + - `floor`: pick timestamp to the bar that includes it, returning the lower + bound. E.g., For `9:13am` and 5 mins bars returns `9:10am` + :param max_distance_in_secs: number of seconds representing the maximal distance + that it's allowed from the start of the bar + """ + _LOG.debug( + hprint.to_str( + "current_timestamp bar_duration_in_secs mode max_distance_in_secs" + ) + ) + hdbg.dassert_isinstance(current_timestamp, pd.Timestamp) + # Align. + reference_timestamp = f"{bar_duration_in_secs}S" + if mode == "round": + bar_timestamp = current_timestamp.round(reference_timestamp) + elif mode == "floor": + bar_timestamp = current_timestamp.floor(reference_timestamp) + hdbg.dassert_lte(bar_timestamp, current_timestamp) + else: + raise ValueError(f"Invalid mode='{mode}'") + _LOG.debug( + hprint.to_str("current_timestamp bar_duration_in_secs bar_timestamp") + ) + # Sanity check. + if mode == "round": + hdbg.dassert_lte(1, max_distance_in_secs) + if bar_timestamp >= current_timestamp: + distance_in_secs = (bar_timestamp - current_timestamp).seconds + else: + distance_in_secs = (current_timestamp - bar_timestamp).seconds + hdbg.dassert_lte(0, distance_in_secs) + hdbg.dassert_lte( + distance_in_secs, + max_distance_in_secs, + "current_timestamp=%s is too distant from bar_timestamp=%s", + current_timestamp, + bar_timestamp, + ) + _LOG.debug(hprint.to_str("bar_timestamp")) + return bar_timestamp + + +# This can't go in `helpers.hwall_clock_time` since it has a dependency from +# `find_bar_timestamp()` and might introduce an import loop. +def set_current_bar_timestamp( + current_timestamp: pd.Timestamp, + bar_duration_in_secs: int, +) -> None: + """ + Compute the current bar by snapping the current timestamp to the grid. + """ + mode = "round" + # E.g., `current_timestamp` is 09:26 and the next bar is at 09:30, so + # the distance is 4 minutes, i.e. max distance should be within a bar's + # length. + max_distance_in_secs = bar_duration_in_secs + bar_timestamp = find_bar_timestamp( + current_timestamp, + bar_duration_in_secs, + mode=mode, + max_distance_in_secs=max_distance_in_secs, + ) + _LOG.debug(hprint.to_str("current_timestamp bar_timestamp")) + hwacltim.set_current_bar_timestamp(bar_timestamp) + + +# ############################################################################# + + +def str_to_timestamp( + timestamp_as_str: str, tz: str, *, datetime_format: Optional[str] = None +) -> pd.Timestamp: + """ + Convert timestamp as string to `pd.Timestamp`. + + Localize input time to the specified timezone. + + E.g., `timestamp_as_str = "20230523_150513"`: + - `tz = "UTC"` -> "2023-05-23 15:05:13+0000" + - `tz = "US/Eastern"` -> "2023-05-23 15:05:13-0400" + + :param timestamp_as_str: string datetime (e.g., 20230523_150513) + :param tz: timezone info (e.g., "US/Eastern") + :param datetime_format: datetime format (e.g., %Y%m%d_%H%M%S) + If None, infer automatically + :return: pd.Timestamp with a specified timezone + """ + hdbg.dassert_isinstance(timestamp_as_str, str) + hdbg.dassert_isinstance(tz, str) + msg = "timestamp_as_str must be nonempty." + hdbg.dassert_is_not(timestamp_as_str, "", msg=msg) + _LOG.debug(hprint.to_str("timestamp_as_str tz datetime_format")) + if datetime_format is None: + # Try to infer the format automatically. + timestamp = pd.to_datetime(timestamp_as_str, infer_datetime_format=True) + else: + # Convert using the provided format. + timestamp = pd.to_datetime(timestamp_as_str, format=datetime_format) + # Convert to the specified timezone + timestamp = timestamp.tz_localize(tz) + return timestamp + + +def _handle_incorrect_conversions( + date: str, +) -> Optional[Tuple[Optional[str], Callable[[str], str]]]: + """ + Change data pre-processing for cases when `pd.to_datetime` is mistaken. + + :param date: string date + :return: date format and a function to apply to string dates before + passing them into `pd.to_datetime()` + """ + if len(date) in [7, 8]: + # "2021-M2" is transformed to '2020-01-01 00:00:01' by + # `pd.to_datetime`. + if date[:4].isdigit() and date[4] in ["-", ".", "/"] and date[5] == "M": + + def modify_monthly_date(x: str) -> str: + year_number = int(x[:4]) + month_number = x[6:] + num_days_in_month = calendar.monthrange( + year_number, int(month_number) + )[1] + modified_x = f"{x[:4]}-{month_number}-{num_days_in_month}" + return modified_x + + return "%Y-%m-%d", modify_monthly_date + return None + + +def _shift_to_period_end( # pylint: disable=too-many-return-statements + date: str, +) -> Optional[Callable[[StrictDatetime], StrictDatetime]]: + """ + Get function to shift the dates to the end of period. + + :param date: string date + :return: a function to shift the dates to the end of period. If `None`, no + shift is needed + """ + + def shift_to_month_end(x: StrictDatetime) -> StrictDatetime: + return x + pd.offsets.MonthEnd(0) + + def shift_to_quarter_end(x: StrictDatetime) -> StrictDatetime: + return x + pd.offsets.QuarterEnd(0) + + def shift_to_year_end(x: StrictDatetime) -> StrictDatetime: + return x + pd.offsets.YearEnd(0) + + if date[:4].isdigit(): + if len(date) == 7: + if date[5:].isdigit(): + # "2020-12" format. + return shift_to_month_end + if date[5] == "Q": + # "2021-Q1" format. + return shift_to_quarter_end + elif len(date) == 6: + # "2021Q1" format. + if date[4] == "Q": + return shift_to_quarter_end + elif len(date) == 4: + # "2021" format. + return shift_to_year_end + # "September 2020" or "Sep 2020" format. + # Get a flat list of month aliases. The full month name comes first. + # Since the `calendar` is using the natural month order, we need to + # shift the month aliases by one to get the correct order. + # E.g., `calendar.month_name[1:]` is `['January', 'February', ...]` and + # `calendar.month_abbr[1:]` is `['Jan', 'Feb', ...]`. + month_aliases = list(calendar.month_name[1:]) + list(calendar.month_abbr[1:]) + pattern = re.compile("|".join(month_aliases), re.IGNORECASE) + match = pattern.search(date) + if match is None: + return None + span = match.span() + date_without_month = f"{date[: span[0]]}{date[span[1] :]}".strip() + if len(date_without_month) == 4 and date_without_month.isdigit(): + return shift_to_month_end + return None + + +def _determine_date_format( + date: str, date_standard: Optional[str] = None +) -> Optional[Tuple[str, Callable[[str], str]]]: + """ + Determine date format for cases when `pd.to_datetime` fails. + + :param date: date string + :param date_standard: "standard" or "ISO_8601", `None` defaults to + "standard" + :return: date format and a function to transform date strings before + converting them to datetime using `pd.to_datetime` + """ + date_standard = date_standard or "standard" + if date_standard == "standard": + year_format = "%Y" + week_format = "%W" + day_of_week_format = "%w" + elif date_standard == "ISO_8601": + year_format = "%G" + week_format = "%V" + day_of_week_format = "%u" + else: + raise ValueError(f"Invalid `date_standard`='{date_standard}'") + # Determine format and original `date` modification function. + format_ = "" + if date[:4].isdigit(): + format_ += year_format + elif date[0] == "Q" and len(date) == 7 and date[-4:].isdigit(): + # "Q1 2020" format. + + def modify_quarterly_data(x: str) -> str: + year_number = x[-4:] + quarter = int(x[1:2]) + last_month_of_quarter = 3 * quarter + last_day_of_quarter = calendar.monthrange( + int(year_number), last_month_of_quarter + )[1] + modified_x = ( + f"{year_number}-{last_month_of_quarter}-{last_day_of_quarter}" + ) + return modified_x + + format_ = f"{year_format}-%m-%d" + return format_, modify_quarterly_data + else: + _LOG.error("This format is not supported: '%s'", date) + return None + next_char = date[4] + if next_char in ["-", ".", "/", " "]: + if len(date) not in [7, 8]: + _LOG.error("This format is not supported: '%s'", date) + return None + format_ += "-" + next_char = date[5] + if next_char == "W": + # "2020-W14" format. + + def modify_weekly_date(x: str) -> str: + x = re.sub(r"[//.\s]", "-", x) + return x + "-6" + + date_modification_func = modify_weekly_date + format_ += f"W{week_format}-{day_of_week_format}" + elif next_char == "S": + # "2020-S1" - semi-annual format. + def modify_semiannual_date(x: str) -> str: + x = re.sub(r"[//.\s]", "-", x) + return x.replace("S1", "06-30").replace("S2", "12-31") + + date_modification_func = modify_semiannual_date + format_ += "%m-%d" + elif next_char == "B": + # "2020-B1" - bi-monthly format (every other month). + # We'll index by the start of the month starting with January + # based on PiT. + + def modify_bimonthly_date(x: str) -> str: + x = re.sub(r"[//.\s]", "-", x) + bimonth_number = x[6] + month_number = int(bimonth_number) * 2 - 1 + modified_x = f"{x[:5]}{month_number}-01" + return modified_x + + date_modification_func = modify_bimonthly_date + format_ += "%m-%d" + else: + _LOG.error("This format is not supported: '%s'", date) + return None + elif next_char == "M" and len(date) == 7: + # "1959M01" format. + + def modify_monthly_date(x: str) -> str: + year_number = int(x[:4]) + month_number = x[5:] + num_days_in_month = calendar.monthrange( + year_number, int(month_number) + )[1] + modified_x = f"{x[:4]}-{month_number}-{num_days_in_month}" + return modified_x + + date_modification_func = modify_monthly_date + format_ += "-%m-%d" + else: + _LOG.error("This format is not supported: '%s'", date) + return None + return format_, date_modification_func + + +def to_generalized_datetime( + dates: Union[pd.Series, pd.Index], date_standard: Optional[str] = None +) -> Union[pd.Series, pd.Index]: + """ + Convert string dates to datetime. + + This works like `pd.to_datetime`, but supports more date formats and shifts + the dates to the end of period instead of the start. + + :param dates: series or index of dates to convert + :param date_standard: "standard" or "ISO_8601", `None` defaults to + "standard" + :return: datetime dates + """ + # This function doesn't deal with mixed formats. + hdbg.dassert_isinstance(dates, Iterable) + hdbg.dassert(not isinstance(dates, str)) + # Try converting to datetime using `pd.to_datetime`. + format_example_index = -1 + date_example = dates.tolist()[format_example_index] + format_fix = _handle_incorrect_conversions(date_example) + if format_fix is not None: + format_, date_modification_func = format_fix + dates = dates.map(date_modification_func) + date_example = dates.tolist()[format_example_index] + else: + format_ = None + datetime_dates = pd.to_datetime(dates, format=format_, errors="coerce") + # Shift to end of period if conversion has been successful. + # Handle both scalar and array cases for `pd.isna()`. + if hasattr(datetime_dates, "all"): + # datetime_dates is a Series or array-like + all_na = pd.isna(datetime_dates).all() + datetime_example = ( + datetime_dates.tolist()[format_example_index] + if hasattr(datetime_dates, "tolist") + else datetime_dates + ) + else: + # datetime_dates is a scalar + all_na = pd.isna(datetime_dates) + datetime_example = datetime_dates + if not all_na: + if ( + not pd.isna(datetime_example) + and hasattr(datetime_example, "strftime") + and datetime_example.strftime("%Y-%m-%d") == date_example + ): + return datetime_dates + shift_func = _shift_to_period_end(date_example) + if shift_func is not None: + if hasattr(datetime_dates, "map"): + datetime_dates = datetime_dates.map(shift_func) + else: + # For scalar case, apply the shift function directly + datetime_dates = shift_func(datetime_dates) + return datetime_dates + # If standard conversion fails, attempt our own conversion. + date_standard = date_standard or "standard" + format_determination_output = _determine_date_format( + date_example, date_standard + ) + if format_determination_output is None: + return datetime_dates + format_, date_modification_func = format_determination_output + dates = dates.map(date_modification_func) + return pd.to_datetime(dates, format=format_) + + +# ############################################################################# +# Unix to epoch conversion +# ############################################################################# + + +def convert_unix_epoch_to_timestamp( + epoch: int, unit: str = "ms", tz: str = "UTC" +) -> pd.Timestamp: + """ + Convert Unix epoch to timestamp. + + :param epoch: Unix time epoch + :param unit: epoch's time unit + :param tz: resulting timestamp timezone + :return: timestamp + """ + timestamp = pd.Timestamp(epoch, unit=unit, tz=tz) + return timestamp + + +def convert_timestamp_to_unix_epoch( + timestamp: pd.Timestamp, unit: str = "ms" +) -> int: + """ + Convert timestamp to Unix epoch. + + :param timestamp: timestamp + :param unit: epoch's time unit + :return: Unix time epoch + """ + # Make timestamp tz-naive if it is not. Converted to UTC tz before becoming + # naive automatically. + if timestamp.tz: + timestamp = timestamp.tz_convert(None) + # Convert to epoch. + epoch: int = (timestamp - pd.Timestamp("1970-01-01")) // pd.Timedelta( + "1" + unit + ) + return epoch + + +# TODO(Sameep): Reuse this function across the code base (`jackpy strftime`) when +# it doesn't make the import graph too complicated. +# TODO(gp): This seems redundant with get_timestamp() in `hwall_clock_time`. +def timestamp_to_str( + timestamp: pd.Timestamp, *, include_msec: bool = False +) -> str: + """ + Convert timestamp to string. + + :param timestamp: timestamp to convert + :param include_msec: whether to include milliseconds e.g. + `20230727_111057_123` + :return: timestamp in string format e.g. `20230727_111057`. + """ + hdbg.dassert_isinstance(timestamp, pd.Timestamp) + # Convert timestamp to string. + if include_msec: + # %f is the format code for microseconds. We truncate the last 3 digits + # to get milliseconds. + # This results in a string like "20230426_153042_123". + timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S_%f")[:-3] + else: + timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S") + return timestamp_str diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py new file mode 100644 index 000000000..a11dfb243 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py @@ -0,0 +1,1134 @@ +""" +Import as: + +import helpers.hdbg as hdbg +""" + +import functools +import logging +import os +import pprint +import sys +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union + +# This module can depend only on: +# - Python standard modules +# - `helpers/hserver.py` +# See `helpers/dependencies.txt` for more details + +_LOG = logging.getLogger(__name__) + + +# Enforce that certain warnings are disabled. +import helpers.hwarnings as hwarnin # # isort:skip # noqa: E402,F401,F403 # pylint: disable=unused-import + + +# TODO(gp): Make these generate from MAPPING below. +INFO = "\033[36mINFO\033[0m" +WARNING = "\033[33mWARNING\033[0m" +ERROR = "\033[31mERROR\033[0m" + + +# ############################################################################# +# dfatal. +# ############################################################################# + +# Copied from printing.py to avoid cyclical dependencies. + + +def _line(chars: str = "#", num_cols: int = 80) -> str: + line_ = chars * num_cols + "\n" + return line_ + + +def _frame(x: str, chars: str = "#", num_cols: int = 80) -> str: + """ + Return a string with a frame of num_cols chars around the object x. + + :param x: object to print through str() + :param num_cols: number + """ + line_ = _line(chars=chars, num_cols=num_cols) + ret = "" + ret += line_ + ret += str(x) + "\n" + ret += line_ + return ret + + +# End of copy. + + +def dfatal(message: str, assertion_type: Optional[Any] = None) -> None: + """ + Print an error message and exits. + """ + ret = "" + message = str(message) + ret = "\n" + _frame(message, "#", 80) + if assertion_type is None: + assertion_type = AssertionError + raise assertion_type(ret) + + +# ############################################################################# +# dassert. +# ############################################################################# + +# TODO(gp): Would be nice to have a way to disable the assertions in certain +# builds, or at least know how much time is spent in the assertions. +# To disable we could have a fake_dbg.py that has all `dassert_*`, `logging` +# defined as `lambda x: 0`. + + +# INVARIANTS: +# - `dassert_COND()` checks that COND is true, and raises if COND is False +# - For this reason the condition inside the `dassert` is typically in the form +# `if not (...):`, even this might annoy the linter or look weird +# - The parameter `only_warning` is to report a problem but keep going. +# This can be used (sparingly) for production when we want to be aware of +# certain conditions without aborting. + + +def _to_msg(msg: Optional[str], *args: Any) -> str: + """ + Format error message `msg` using the params in `args`, like `msg % args`. + """ + if msg is None: + # If there is no message, we should have no arguments to format. + assert not args, f"args={str(args)}" + res = "" + else: + try: + res = msg % args + except TypeError as e: + # The arguments didn't match the format string: report error and + # print the result somehow. + res = f"Caught assertion while formatting message:\n'{str(e)}'" + _LOG.warning(res) + res += "\n" + msg + " " + " ".join(map(str, args)) + # res = "(" + res + ") " + return res + + +def _dfatal( + txt: Union[str, Iterable[str]], + msg: Optional[str], + *args: Any, + only_warning: bool = False, +) -> None: + """ + Abort execution. + + :param only_warning: issue a warning instead of aborting + """ + dfatal_txt = "* Failed assertion *\n" + # TODO(gp): This should be an iterable. + if isinstance(txt, list): + dfatal_txt += "\n".join(txt) + else: + dfatal_txt += str(txt) + msg = _to_msg(msg, *args) + if msg: + if not dfatal_txt.endswith("\n"): + dfatal_txt += "\n" + dfatal_txt += msg + if only_warning: + # Only warn. + dfatal_txt += "\nContinuing as per user request with only_warning=True" + _LOG.warning(dfatal_txt) + else: + # Abort. + dfatal(dfatal_txt) + + +def dassert( + cond: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # Handle the somehow frequent case of using `dassert` instead of another + # one, e.g., `dassert(y, list)` + if msg is not None: + assert isinstance(msg, str), ( + f"You passed '{msg}' or type '{type(msg)}' instead of str" + ) + if not cond: + txt = f"cond={cond}" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_eq( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 == val2 + if not cond: + txt = f"'{val1}'\n==\n'{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_ne( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 != val2 + if not cond: + txt = f"'{val1}'\n!=\n'{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_imply( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = not val1 or val2 + if not cond: + txt = f"'{val1}' implies '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# Comparison related. +# ############################################################################# + + +def dassert_lt( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 < val2 + if not cond: + txt = f"{val1} < {val2}" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_lte( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 <= val2 + if not cond: + txt = f"{val1} <= {val2}" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_lgt( + lower_bound: float, + x: float, + upper_bound: float, + lower_bound_closed: bool, + upper_bound_closed: bool, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert that `lower_bound <= x <= upper_bound`. + + :param lower_bound_closed, upper_bound_closed: control the open- + ness/close-ness of the interval extremes. + """ + # `lower_bound <= or < x`. + if lower_bound_closed: + dassert_lte(lower_bound, x, msg, *args, only_warning=only_warning) + else: + dassert_lt(lower_bound, x, msg, *args, only_warning=only_warning) + # `x <= or < upper_bound`. + if upper_bound_closed: + dassert_lte(x, upper_bound, msg, *args, only_warning=only_warning) + else: + dassert_lt(x, upper_bound, msg, *args, only_warning=only_warning) + + +def dassert_is_proportion( + x: float, msg: Optional[str] = None, *args: Any, only_warning: bool = False +) -> None: + """ + Assert that `0 <= x <= 1`. + """ + lower_bound_closed = True + upper_bound_closed = True + dassert_lgt( + 0, + x, + 1, + lower_bound_closed, + upper_bound_closed, + msg, + *args, + only_warning=only_warning, + ) + + +# ############################################################################# +# Membership. +# ############################################################################# + + +def dassert_in( + value: Any, + valid_values: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = value in valid_values + if not cond: + txt = f"'{value}' in '{valid_values}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_not_in( + value: Any, + valid_values: Iterable[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = value not in valid_values + if not cond: + txt = f"'{value}' not in '{valid_values}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# Type related. +# ############################################################################# + + +def dassert_is( + val1: Optional[str], + val2: Optional[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 is val2 + if not cond: + txt = f"'{val1}' is '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_is_not( + val1: Any, + val2: Optional[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = val1 is not val2 + if not cond: + txt = f"'{val1}' is not '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_type_is( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # pylint: disable=unidiomatic-typecheck + cond = type(val1) is val2 + if not cond: + txt = f"Type of '{val1}' is '{type(val1)}' instead of '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# TODO(gp): This is redundant with dassert_isinstance(..., (str, float)). +def dassert_type_in( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # pylint: disable=unidiomatic-typecheck + cond = type(val1) in val2 + if not cond: + txt = f"Type of '{val1}' is '{type(val1)}' not in '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_isinstance( + val1: Any, + val2: Union[type, Iterable[type]], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + cond = isinstance(val1, val2) # type: ignore[arg-type] + if not cond: + txt = f"Instance of '{val1}' is '{type(val1)}' instead of '{val2}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_issubclass( + val1: Any, + val2: Union[type, Iterable[type]], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert that an object `val1` is a subclass of `val2`. + """ + cond = issubclass(val1.__class__, val2) # type: ignore[arg-type] + if not cond: + txt = ( + f"Instance '{str(val1)}' of class '{val1.__class__.__name__}' is " + f"not a subclass of '{val2}'" + ) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_is_integer( + val: Union[int, float], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert that val represents an integer number, independently of the type. + """ + if isinstance(val, int): + pass + elif isinstance(val, float): + cond = val == int(val) + if not cond: + txt = f"Invalid val='{val}' of type '{type(val)}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + else: + txt = f"Invalid val='{val}' of type '{type(val)}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_callable( + func: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert that an object `val1` is callable. + """ + cond = callable(func) + if not cond: + txt = f"Obj '{str(func)}' of type '{str(type(func))}' is not callable" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# Set related. +# ############################################################################# + + +# TODO(gp): A more general solution is to have a function that traverses an obj +# and creates a corresponding obj only with deterministic data structures (e.g., +# converting sets and dicts to sorted lists). Then we can print with `pprint`. +def _set_to_str(set_: Set[Any], thr: Optional[int] = 20) -> str: + """ + Return a string with the ordered content of a set. + + This is useful when printing assertions that we want to be deterministic (e.g., + if we use it inside unit tests like: + ``` + with self.assertRaises(AssertionError) as cm: + ... + actual = str(cm.exception) + expected = r + self.assert_equal(actual, expected, fuzzy_match=True) + ``` + """ + try: + list_ = sorted(list(set_)) + # If sets have less than `thr` elements print them as well, otherwise + # print the beginning / end. + if thr is not None and len(list_) > thr: + txt = f"{len(list_)} [{min(list_)}, ... {max(list_)}]" + else: + txt = str(list_) + except TypeError: + # Sometimes the set has elements of different types and we can't easily + # sort them. In these cases we just skip the sorting. + txt = str(list(set_)) + return txt + + +def dassert_set_eq( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val1` has the same elements as `val2`, raise otherwise. + + :param only_warning: issue a warning instead of aborting + """ + val1 = set(val1) + val2 = set(val2) + # pylint: disable=superfluous-parens + if not (val1 == val2): + txt = [] + txt.append("val1 - val2=" + _set_to_str(val1.difference(val2))) + txt.append("val2 - val1=" + _set_to_str(val2.difference(val1))) + txt.append("val1=" + _set_to_str(val1)) + txt.append("set eq") + txt.append("val2=" + _set_to_str(val2)) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# TODO(gp): -> dassert_issubset to match Python set function. +def dassert_is_subset( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val1` is a subset of `val2`, raise otherwise. + """ + val1 = set(val1) + val2 = set(val2) + if not val1.issubset(val2): + txt = [] + txt.append("val1=" + _set_to_str(val1)) + txt.append("issubset") + txt.append("val2=" + _set_to_str(val2)) + txt.append("val1 - val2=" + _set_to_str(val1.difference(val2))) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# TODO(gp): -> dassert_no_intersection to match other functions. +def dassert_not_intersection( + val1: Any, + val2: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val1` has no intersection `val2`, raise otherwise. + """ + val1 = set(val1) + val2 = set(val2) + if val1.intersection(val2): + txt = [] + txt.append("val1=" + _set_to_str(val1)) + txt.append("has no intersection") + txt.append("val2=" + _set_to_str(val2)) + txt.append( + "val1.intersection(val2)=" + _set_to_str(val1.intersection(val2)) + ) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_is_iterable( + val: Any, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val` is an iterable (excluding strings, bytes), raise otherwise. + """ + cond = isinstance(val, Iterable) and not isinstance( + val, (str, bytes, bytearray) + ) + if not cond: + txt = f"Val '{val}' of type '{type(val)}' is not an iterable" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# Array related. +# ############################################################################# + + +def dassert_no_duplicates( + val1: Iterable[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val1` has no duplicates, raise otherwise. + """ + dassert_is_iterable(val1) + cond = len(set(val1)) == len(val1) + if not cond: + # Count the occurrences of each element of the seq. + v_to_num = [(v, val1.count(v)) for v in set(val1)] + # Build list of elements with duplicates. + dups = [v for v, n in v_to_num if n > 1] + txt = [] + txt.append("val1=\n" + pprint.pformat(val1)) + txt.append("has duplicates") + txt.append(",".join(map(str, dups))) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_is_sorted( + val1: Union[List, Tuple], + sort_kwargs: Optional[Dict[Any, Any]] = None, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that `val` is sorted, raise otherwise. + """ + # TODO(gp): Extend for pd.Series using the proper method. + dassert_isinstance(val1, (list, tuple)) + sort_kwargs = {} if sort_kwargs is None else sort_kwargs + sorted_val1 = sorted(val1, **sort_kwargs) + cond = sorted_val1 == val1 + if not cond: + txt = [] + txt.append("val1=\n" + pprint.pformat(val1)) + txt.append("is not sorted") + txt.append("sorted(val1)=\n" + pprint.pformat(sorted_val1)) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_eq_all( + val1: Iterable[Any], + val2: Iterable[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that two iterables `val1` and `val2` are equal, raise otherwise. + """ + dassert_is_iterable(val1) + val1 = list(val1) + dassert_is_iterable(val2) + val2 = list(val2) + cond = val1 == val2 + if not cond: + # mask = val1 != val2 + txt = [] + txt.append(f"val1={len(val1)}\n{val1}") + txt.append(f"val2={len(val2)}\n{val2}") + # txt += "\ndiff=%s" % mask.sum() + # txt += "\n%s" % val1[mask] + # txt += "\n%s" % val2[mask] + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def _get_first_type(obj: Iterable, tag: str) -> Type: + obj_types = {type(v) for v in obj} + dassert_eq( + len(obj_types), + 1, + "More than one type for elem of %s=%s", + tag, + map(str, obj_types), + ) + return list(obj_types)[0] + + +# TODO(gp): IMO a bit overfit to the use case. Move this to the files that are +# using is. +def dassert_all_attributes_are_same( + list_: List[Any], + attribute_name: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check if all the elements in the list have the same attribute value. + + :param list_: list of objects + :param attribute_name: name of the attribute to check + """ + dassert_isinstance(list_, list) + dassert_isinstance(attribute_name, str) + attribute_values = [getattr(element, attribute_name) for element in list_] + if len(set(attribute_values)) != 1: + txt = [] + txt.append("Elements in the list have different values for ") + txt.append(f"attribute {attribute_name}:\n\t{set(attribute_values)}") + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_array_has_same_type_element( + obj1: Any, + obj2: Any, + only_first_elem: bool, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that two objects iterables like arrays (e.g., pd.Index) have elements + of the same type. + + :param only_first_elem: whether to check only the first element or + all the elements of the iterable. + """ + # Get the types to compare. + if only_first_elem: + obj1_first_type = type(obj1[0]) + obj2_first_type = type(obj2[0]) + else: + obj1_first_type = _get_first_type(obj1, "obj1") + obj2_first_type = _get_first_type(obj2, "obj2") + # + if obj1_first_type != obj2_first_type: + txt = [] + num_elems = 5 + txt.append(f"obj1=\n{obj1[:num_elems]}") + txt.append(f"obj2=\n{obj2[:num_elems]}") + txt.append( + f"type(obj1)='{obj1_first_type}' is different from type(obj2)='{obj2_first_type}'" + ) + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_container_type( + obj: Any, + container_type: Optional[Any], + elem_type: Optional[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert `obj` is a certain type of container containing certain type of + objects. + + E.g., `obj` is a list of strings. + """ + # Add information about the obj. + if not msg: + msg = "" + msg = msg.rstrip("\n") + f"\nobj='{str(obj)}'" + # Check container. + if container_type is not None: + dassert_isinstance( + obj, container_type, msg, *args, only_warning=only_warning + ) + # Check the elements of the container. + if elem_type is not None: + for elem in obj: + dassert_isinstance( + elem, elem_type, msg, *args, only_warning=only_warning + ) + + +# TODO(gp): @all Replace calls to this with calls to `dassert_container_type()`. +def dassert_list_of_strings( + list_: List[str], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # TODO(gp): Allow iterable? + dassert_isinstance(list_, list, msg, *args, only_warning=only_warning) + for elem in list_: + dassert_isinstance(elem, str, msg, *args, only_warning=only_warning) + + +def dassert_all_defined_or_all_None( + vals: List[Any], + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check that all the values in a list are either all defined or all None. + """ + all_defined_cond = all(val is not None for val in vals) + all_none_cond = all(val is None for val in vals) + cond = all_defined_cond or all_none_cond + if not cond: + txt = f"Some values in list are defined and some are None: '{vals}'" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +# ############################################################################# +# File related. +# ############################################################################# + + +def dassert_path_exists( + path: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + dassert_isinstance(path, str) + path = os.path.abspath(path) + if not os.path.exists(path): + txt = f"Path '{path}' doesn't exist!" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_path_not_exists( + path: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + dassert_isinstance(path, str) + dassert_ne(path, "") + path = os.path.abspath(path) + if os.path.exists(path): + txt = f"Path '{path}' already exist!" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_file_exists( + file_name: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert unless `file_name` exists and it's a file and not a directory. + """ + dassert_isinstance(file_name, str) + dassert_ne(file_name, "") + file_name = os.path.abspath(file_name) + # `file_name` exists. + exists = os.path.exists(file_name) + if not exists: + txt = f"File '{file_name}' doesn't exist" + _dfatal(txt, msg, *args, only_warning=only_warning) + # `file_name` is a file. + is_file = os.path.isfile(file_name) + if not is_file: + txt = f"'{file_name}' is not a file" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_dir_exists( + dir_name: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Assert unless `dir_name` exists and it's a directory. + """ + dassert_isinstance(dir_name, str) + dassert_ne(dir_name, "") + dir_name = os.path.abspath(dir_name) + # `dir_name` exists. + exists = os.path.exists(dir_name) + if not exists: + txt = f"Dir '{dir_name}' doesn't exist" + _dfatal(txt, msg, *args, only_warning=only_warning) + # `dir_name` is a directory. + is_dir = os.path.isdir(dir_name) + if not is_dir: + txt = f"'{dir_name}' is not a dir" + _dfatal(txt, msg, *args, only_warning=only_warning) + + +def dassert_file_extension( + file_name: str, + extensions: Union[str, List[str]], + only_warning: bool = False, +) -> None: + """ + Ensure that file has one of the given extensions. + + :param extensions: don't need to start with `.`, e.g., use `csv` instead of + `.csv` + """ + # Handle single extension case. + if isinstance(extensions, str): + extensions = [extensions] + # Make sure extension starts with . + extensions = [f".{e}" if not e.startswith(".") else e for e in extensions] + # Check. + name, act_ext = os.path.splitext(file_name) + if act_ext == ".gz": + # Concatenate with the preceding extension, e.g., `.csv.gz`. + ext = os.path.splitext(name)[-1] + act_ext = (ext + act_ext).lower() + dassert_in( + act_ext, + extensions, + "Invalid extension '%s' for file '%s'", + act_ext, + file_name, + only_warning=only_warning, + ) + + +def dassert_is_path_abs(path: str, only_warning: bool = False) -> None: + """ + Assert that `path` is an absolute path. + """ + dassert_isinstance(path, str) + dassert_ne(path, "") + dassert( + os.path.isabs(path), + "Path '%s' is not absolute", + path, + only_warning=only_warning, + ) + + +def dassert_related_params( + params: Dict[str, Any], + mode: str, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + """ + Check whether `params` have a certain relationship. + + :params params: dictionary of parameter name, value + :params mode: + - `all_or_none_non_null`: either all params are null (i.e., `bool` evaluate + to false) or are non-null + - `all_or_none_non_None`: either all params are None or all params are not + None. This is useful when passing set of params that are optional + """ + # TODO(gp): Allow iterable? + dassert_isinstance(params, dict, msg, *args, only_warning=only_warning) + if mode == "all_or_none_non_null": + # Find out if at least one value is set. + is_non_null = map(bool, params.values()) + one_is_non_null = functools.reduce(lambda x, y: x or y, is_non_null) + for k, v in params.items(): + if bool(v) != one_is_non_null: + txt = f"All or none parameter should be non-null:\n{k}={v}\nparams={pprint.pformat(params)}\n" + _dfatal(txt, msg, *args, only_warning=only_warning) + elif mode == "all_or_none_non_None": + # Find out if at least one value is not None. + is_non_None = map(lambda x: x is not None, params.values()) + one_is_non_None = functools.reduce(lambda x, y: x or y, is_non_None) + for k, v in params.items(): + if (v is not None) != one_is_non_None: + txt = f"All or none parameter should be non-None:\n{k}={v}\nparams={pprint.pformat(params)}\n" + _dfatal(txt, msg, *args, only_warning=only_warning) + else: + raise ValueError(f"Invalid mode='{mode}'") + + +# ############################################################################# +# Command line. +# ############################################################################# + + +# Sample at the beginning of time before we start fiddling with command line +# args. +_CMD_LINE = " ".join(arg for arg in sys.argv) + + +def get_command_line() -> str: + return _CMD_LINE + + +# ############################################################################# +# Logger. +# ############################################################################# + + +# TODO(gp): Move this to helpers/hlogging.py and change all the callers. + + +# TODO(gp): maybe replace "force_verbose_format" and "force_print_format" with +# a "mode" in ("auto", "verbose", "print") +def init_logger( + verbosity: int = logging.INFO, + use_exec_path: bool = False, + log_filename: Optional[str] = None, + force_verbose_format: bool = False, + force_print_format: bool = False, + force_white: bool = True, + force_no_warning: bool = False, + in_pytest: bool = False, + report_memory_usage: bool = False, + report_cpu_usage: bool = False, + report_command_line: bool = True, +) -> None: + """ + Send stderr and stdout to logging (optionally teeing the logs to file). + + - Note that: + - logging.DEBUG = 10 + - logging.INFO = 20 + + :param verbosity: verbosity to use + :param use_exec_path: use the name of the executable + :param log_filename: log to that file + :param force_verbose_format: use the verbose format for the logging + :param force_print_format: use the print format for the logging + :param force_white: use white color for printing. This can pollute the + output of a script when redirected to file with echo characters + :param in_pytest: True when we are running through pytest, so that we + can overwrite the default logger from pytest + :param report_memory_usage: turn on reporting memory usage + :param report_cpu_usage: turn on reporting CPU usage + :param report_command_line: turn on reporting command line + """ + # Try to minimize dependencies. + import helpers.hlogging as hloggin + + # TODO(gp): Print the stacktrace every time is called. + if force_white: + sys.stdout.write("\033[0m") + if isinstance(verbosity, str): + # pylint: disable=protected-access + dassert(hasattr(logging, "_checkLevel")) + assert hasattr(logging, "_checkLevel") + verbosity = logging._checkLevel(verbosity) + # From https://stackoverflow.com/questions/14058453 + root_logger = logging.getLogger() + # Set verbosity for all loggers. + root_logger.setLevel(verbosity) + # if False: + # eff_level = root_logger.getEffectiveLevel() + # print( + # "effective level= %s (%s)" + # % (eff_level, logging.getLevelName(eff_level)) + # ) + # if False: + # # dassert_eq(root_logger.getEffectiveLevel(), verbosity) + # for handler in root_logger.handlers: + # handler.setLevel(verbosity) + # Exit to avoid to replicate the same output multiple times. + if not in_pytest and root_logger.handlers: + print(WARNING + ": Logger already initialized: skipping") + if False: + # Print info about the caller. + import traceback + + traceback.print_stack() + return + # + ch = logging.StreamHandler(sys.stdout) + ch.setLevel(verbosity) + # Set the formatter. + # formatter = hloggin.set_v1_formatter( + dassert(hasattr(hloggin, "set_v2_formatter")) + assert hasattr(hloggin, "set_v2_formatter") + formatter = hloggin.set_v2_formatter( + ch, + root_logger, + force_no_warning, + force_print_format, + force_verbose_format, + report_memory_usage, + report_cpu_usage, + ) + # Find name of the log file. + if use_exec_path and log_filename is None: + dassert_is(log_filename, None, msg="Can't specify conflicting filenames") + # Use the name of the executable. + import inspect + + frame = inspect.stack()[1] + module = inspect.getmodule(frame[0]) + if not hasattr(module, __file__): + if module is None: + filename = "none" + else: + filename = str(module.__file__) + else: + filename = "unknown_module" + log_filename = os.path.realpath(filename) + ".log" + # Handle teeing to a file. + if log_filename: + # Create a dir (and all its missing parent dirs) if it doesn't exist. + log_dirname = os.path.dirname(log_filename) + if log_dirname != "" and not os.path.exists(log_dirname): + os.makedirs(log_dirname) + # Delete the file since we don't want to append. + if os.path.exists(log_filename): + try: + os.unlink(log_filename) + except FileNotFoundError as e: + print(e) + # Tee to file. + file_handler = logging.FileHandler(log_filename) + root_logger.addHandler(file_handler) + file_handler.setFormatter(formatter) + # + _LOG.info("Saving log to file '%s'", log_filename) + # + _LOG.debug("Effective logging level=%s", _LOG.getEffectiveLevel()) + # Shut up chatty modules. + dassert(hasattr(hloggin, "shutup_chatty_modules")) + assert hasattr(hloggin, "shutup_chatty_modules") + hloggin.shutup_chatty_modules(verbose=False) + if report_command_line: + _LOG.info("> cmd='%s'", get_command_line()) + # + # test_logger() + + +def set_logger_verbosity( + verbosity: int, module_name: Optional[str] = None +) -> None: + """ + Change the verbosity of the logging after the initialization. + + Passing a module_name (e.g., matplotlib) one can change the logging + of that specific module. + + E.g., set_logger_verbosity(logging.WARNING, "matplotlib") + """ + logger = logging.getLogger(module_name) + if module_name is None and not logger.handlers: + assert 0, "ERROR: Logger not initialized" + logger.setLevel(verbosity) + eff_level = logger.getEffectiveLevel() + print(f"effective level= {eff_level} ({logging.getLevelName(eff_level)})") + dassert_eq(logger.getEffectiveLevel(), verbosity) + + +def get_logger_verbosity() -> int: + root_logger = logging.getLogger() + if not root_logger.handlers: + assert 0, "ERROR: Logger not initialized" + return root_logger.getEffectiveLevel() + + +# ############################################################################# +# Command line. +# ############################################################################# + + +# Sample at the beginning of time before we start fiddling with command line +# args. +_CMD_LINE = " ".join(arg for arg in sys.argv) +_EXEC_NAME = os.path.abspath(sys.argv[0]) + + +def get_command_line() -> str: + return _CMD_LINE + + +def get_exec_name() -> str: + return _EXEC_NAME diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py new file mode 100644 index 000000000..13d388249 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py @@ -0,0 +1,119 @@ +""" +Import as: + +import helpers.hdict as hdict +""" + +import logging +from typing import ( + Any, + Dict, + Generator, + Iterable, + Mapping, + Optional, + Tuple, + Union, +) + +try: + from collections.abc import Mapping as AbcMapping +except ImportError: + from collections import Mapping as AbcMapping + +import helpers.hdbg as hdbg + +_LOG = logging.getLogger(__name__) + + +def get_nested_dict_iterator( + nested: Mapping[Any, Any], + path: Optional[Iterable[Any]] = None, +) -> Generator[Tuple[Tuple, Any], None, None]: + """ + Return nested mapping iterator that iterates in a depth-first fashion. + + :param nested: nested dictionary + :param path: path to node to start the visit from or `None` to start from + the root + :return: path to leaf node, value + """ + if path is None: + path = [] + if not isinstance(path, tuple): + path = tuple(path) + if not nested.items(): + yield path, nested + for key, value in nested.items(): + local_path = path + (key,) + if isinstance(value, AbcMapping): + yield from get_nested_dict_iterator(value, local_path) + else: + yield local_path, value + + +def extract_leaf_values(nested: Dict[Any, Any], key: Any) -> Dict[Any, Any]: + """ + Extract leaf values with key matching `key`. + + :param nested: nested dictionary + :param key: leaf key value to match + :return: dict with key = path as tuple, value = leaf value + """ + d = {} + for k, v in get_nested_dict_iterator(nested): + if k[-1] == key: + d[k] = v + return d + + +_NO_VALUE_SPECIFIED = "__NO_VALUE_SPECIFIED__" + + +def typed_get( + dict_: Union[Dict, "Config"], # noqa: F821 + key: Any, + default_value: Optional[Any] = _NO_VALUE_SPECIFIED, + *, + expected_type: Optional[Any] = None, +) -> Any: + """ + Equivalent to `dict.get(key, default_val)` and check the type of the + output. + + :param default_value: default value to return if key is not in `config` + :param expected_type: expected type of `value` + :return: config[key] if available, else `default_value` + """ + hdbg.dassert_isinstance(dict_, dict) + if default_value == _NO_VALUE_SPECIFIED: + # No value is specified so check that the key is present with dassert_in + # to report a decent error. + hdbg.dassert_in(key, dict_) + try: + ret = dict_.__getitem__(key) + except KeyError as e: + # No key: use the default val if it was passed or asserts. + _LOG.debug("e=%s", e) + # We can't use None since None can be a valid default value, so we use + # another value. + if default_value != _NO_VALUE_SPECIFIED: + ret = default_value + else: + # No default value found, then raise. + raise e + if expected_type is not None: + hdbg.dassert_isinstance(ret, expected_type) + return ret + + +def checked_get( + dict_: Dict, + key: Any, +) -> Any: + """ + Ensure that the key exists and print a decent error message in case of + error, instead of a generic `TypeError`. + """ + hdbg.dassert_in(key, dict_) + return dict_[key] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py new file mode 100644 index 000000000..44f973a89 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py @@ -0,0 +1,871 @@ +""" +Import as: + +import helpers.hdocker as hdocker +""" + +import argparse +import copy +import hashlib +import logging +import os +import platform +import subprocess +import time +from typing import List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.henv as henv +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Docker utilities +# ############################################################################# + + +# TODO(gp): This is a function of the architecture. Move to the repo_config.py +# or the config file. +def get_use_sudo() -> bool: + """ + Check if Docker commands should be run with sudo. + + :return: Whether to use sudo for Docker commands. + """ + use_sudo = False + # if hserver.is_inside_docker(): + # use_sudo = True + return use_sudo + + +# TODO(gp): use_sudo should be set to None and the correct value inferred from +# the repo config. +def get_docker_executable(use_sudo: bool) -> str: + """ + Get the Docker executable with / without sudo, if needed. + """ + executable = "sudo " if use_sudo else "" + executable += "docker" + return executable + + +def process_docker_cmd( + docker_cmd: str, container_image: str, dockerfile: str, mode: str +) -> str: + """ + Process a Docker command according to the mode. + + :param docker_cmd: The Docker command to process. + :param container_image: The name of the Docker container. + :param dockerfile: The content of the Dockerfile. + :param mode: The mode to process the Docker command. + - "return_cmd": return the command as is. + - "system": execute the command. + - "save_to_file": save the command to a file. + :return: The output of the Docker command. + """ + _LOG.debug(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(docker_cmd, str) + hdbg.dassert_isinstance(container_image, str) + hdbg.dassert_isinstance(dockerfile, str) + if mode == "return_cmd": + ret = docker_cmd + elif mode == "system": + # TODO(gp): Note that `suppress_output=False` seems to hang the call. + hsystem.system(docker_cmd, suppress_output=False) + ret = "" + elif mode == "system_without_output": + hsystem.system(docker_cmd, suppress_output=True) + ret = "" + elif mode == "save_to_file": + file_name = f"tmp.process_docker_cmd.{container_image}.txt" + txt = [] + txt.append(f"docker_cmd={docker_cmd}") + txt.append(f"container_image={container_image}") + txt.append(f"dockerfile={dockerfile}") + txt = "\n".join(txt) + hio.to_file(file_name, txt) + ret = "" + else: + raise ValueError(f"Invalid mode='{mode}'") + return ret + + +def container_exists(container_name: str, use_sudo: bool) -> Tuple[bool, str]: + """ + Check if a Docker container is running by executing a command like: + + ``` + > docker container ls --filter=tmp.prettier -aq + aed8a5ce33a9 + ``` + """ + _LOG.debug(hprint.func_signature_to_str()) + # + executable = get_docker_executable(use_sudo) + cmd = f"{executable} container ls --filter name=/{container_name} -aq" + _, container_id = hsystem.system_to_one_line(cmd) + container_id = container_id.rstrip("\n") + exists = container_id != "" + _LOG.debug(hprint.to_str("exists container_id")) + return exists, container_id + + +def image_exists(image_name: str, use_sudo: bool) -> Tuple[bool, str]: + """ + Check if a Docker image already exists by executing a command like: + + ``` + > docker images tmp.prettier -aq + aed8a5ce33a9 + ``` + """ + _LOG.debug(hprint.func_signature_to_str()) + # + executable = get_docker_executable(use_sudo) + cmd = f"{executable} image ls --filter reference={image_name} -q" + _, image_id = hsystem.system_to_one_line(cmd) + image_id = image_id.rstrip("\n") + exists = image_id != "" + _LOG.debug(hprint.to_str("exists image_id")) + return exists, image_id + + +def container_rm(container_name: str, use_sudo: bool) -> None: + """ + Remove a Docker container by its name. + + :param container_name: Name of the Docker container to remove. + :param use_sudo: Whether to use sudo for Docker commands. + :raises AssertionError: If the container ID is not found. + """ + _LOG.debug(hprint.func_signature_to_str()) + # + executable = get_docker_executable(use_sudo) + # Find the container ID from the name. + # Docker filter refers to container names using a leading `/`. + cmd = f"{executable} container ls --filter name=/{container_name} -aq" + _, container_id = hsystem.system_to_one_line(cmd) + container_id = container_id.rstrip("\n") + hdbg.dassert_ne(container_id, "") + # Delete the container. + _LOG.debug(hprint.to_str("container_id")) + cmd = f"{executable} container rm --force {container_id}" + hsystem.system(cmd) + _LOG.debug("docker container '%s' deleted", container_name) + + +def volume_rm(volume_name: str, use_sudo: bool) -> None: + """ + Remove a Docker volume by its name. + + :param volume_name: Name of the Docker volume to remove. + :param use_sudo: Whether to use sudo for Docker commands. + """ + _LOG.debug(hprint.func_signature_to_str()) + # + executable = get_docker_executable(use_sudo) + cmd = f"{executable} volume rm {volume_name}" + hsystem.system(cmd) + _LOG.debug("docker volume '%s' deleted", volume_name) + + +# ############################################################################# + + +def get_current_arch() -> str: + """ + Return the architecture that we are running on (e.g., arm64, aarch64, + x86_64). + """ + cmd = "uname -m" + _, current_arch = hsystem.system_to_one_line(cmd) + _LOG.debug(hprint.to_str("current_arch")) + return current_arch + + +def _is_compatible_arch(val1: str, val2: str) -> bool: + valid_arch = ["x86_64", "amd64", "aarch64", "arm64"] + hdbg.dassert_in(val1, valid_arch) + hdbg.dassert_in(val2, valid_arch) + if val1 == val2: + return True + compatible_sets = [{"x86_64", "amd64"}, {"aarch64", "arm64"}] + for comp_set in compatible_sets: + if {val1, val2}.issubset(comp_set): + return True + return False + + +def check_image_compatibility_with_current_arch( + image_name: str, + *, + use_sudo: Optional[bool] = None, + pull_image_if_needed: bool = True, + assert_on_error: bool = True, +) -> None: + """ + Check if the Docker image is compatible with the current architecture. + + :param image_name: Name of the Docker image to check. + :param use_sudo: Whether to use sudo for Docker commands. + :param pull_image_if_needed: Whether to pull the image if it doesn't + exist. + :param assert_on_error: Whether to raise an error if the image is + not compatible with the current architecture. + """ + _LOG.debug(hprint.func_signature_to_str()) + hdbg.dassert_ne(image_name, "") + if use_sudo is None: + use_sudo = get_use_sudo() + # Get the architecture that we are running on. + current_arch = get_current_arch() + # > docker image inspect \ + # 623860924167.dkr.ecr.eu-north-1.amazonaws.com/helpers:local-saggese-1.1.0 \ + # --format '{{.Architecture}}' + # arm64 + # Check and pull the image if needed. + has_image, _ = image_exists(image_name, use_sudo) + if not has_image: + _LOG.warning("Image '%s' not found: trying to pull it", image_name) + if pull_image_if_needed: + cmd = f"docker pull {image_name}" + hsystem.system(cmd) + else: + hdbg.dfatal("Image '%s' not found", image_name) + # Check the image architecture. + executable = get_docker_executable(use_sudo) + cmd = f"{executable} inspect {image_name}" + r" --format '{{.Architecture}}'" + _, image_arch = hsystem.system_to_one_line(cmd) + _LOG.debug(hprint.to_str("image_arch")) + # Check architecture compatibility. + if not _is_compatible_arch(current_arch, image_arch): + msg = f"Running architecture '{current_arch}' != image architecture '{image_arch}'" + if assert_on_error: + hdbg.dfatal(msg) + else: + _LOG.warning(msg) + _LOG.debug( + "Running architecture '%s' and image architecture '%s' are compatible", + current_arch, + image_arch, + ) + + +# ############################################################################# + + +def wait_for_file_in_docker( + container_id: str, + docker_file_path: str, + out_file_path: str, + *, + check_interval_in_secs: float = 0.5, + timeout_in_secs: int = 10, +) -> None: + """ + Wait for a file to be generated inside a Docker container and copy it to + the host. + + This function periodically checks for the existence of a file inside + a Docker container. Once the file is found, it copies the file to + the specified output path on the host. + + :param container_id: ID of the Docker container. + :param docker_file_path: Path to the file inside the Docker + container. + :param out_file_path: Path to copy the file to on the host. + :param check_interval_in_secs: Time in seconds between checks. + :param timeout_in_secs: Maximum time to wait for the file in + seconds. + :raises ValueError: If the file is not found within the timeout + period. + """ + _LOG.debug("Waiting for file: %s:%s", container_id, docker_file_path) + start_time = time.time() + while not os.path.exists(out_file_path): + cmd = f"docker cp {container_id}:{docker_file_path} {out_file_path}" + hsystem.system(cmd) + if time.time() - start_time > timeout_in_secs: + raise ValueError( + "Timeout reached. File not found: " + f"{container_id}:{docker_file_path}" + ) + time.sleep(check_interval_in_secs) + _LOG.debug("File generated: %s", out_file_path) + + +def replace_shared_root_path( + path: str, *, replace_ecs_tokyo: Optional[bool] = False +) -> str: + """ + Replace root path of the shared directory based on the mapping. + + :param path: path to replace, e.g., `/data/shared` + :param replace_ecs_tokyo: if True replace `ecs_tokyo` to `ecs` in the path + :return: replaced shared data dir root path, e.g., + - `/data/shared/ecs_tokyo/.../20240522_173000.20240522_182500/` -> + `/shared_data/ecs/.../20240522_173000.20240522_182500/` + - `/data/shared/ecs/.../20240522_173000.20240522_182500` -> + `/shared_data/ecs/.../20240522_173000.20240522_182500` + """ + # Inside ECS, we keep the original shared data path and replace it only when + # running inside Docker on the dev server. + if hserver.is_inside_docker() and not hserver.is_inside_ecs_container(): + shared_data_dirs = hserver.get_shared_data_dirs() + if shared_data_dirs is not None: + if replace_ecs_tokyo: + # Make a copy to avoid modifying the original one. + shared_data_dirs = copy.deepcopy(shared_data_dirs) + shared_data_dirs["ecs_tokyo"] = "ecs" + for shared_dir, docker_shared_dir in shared_data_dirs.items(): + path = path.replace(shared_dir, docker_shared_dir) + _LOG.debug( + "Running inside Docker on the dev server, thus replacing %s " + "with %s", + shared_dir, + docker_shared_dir, + ) + else: + _LOG.debug("No replacement found, returning path as-is: %s", path) + return path + + +# ############################################################################# +# Dockerized executable utils. +# ############################################################################# + +# See `docs/tools/docker/all.dockerized_flow.explanation.md` for details +# about the Dockerized flow. + + +def get_docker_base_cmd(use_sudo: bool) -> List[str]: + """ + Get the base command for running a Docker container. + + E.g., + ``` + docker run --rm --user $(id -u):$(id -g) \ + -e CSFY_AWS_PROFILE -e CSFY_ECR_BASE_PATH \ + ... + -e OPENAI_API_KEY + ``` + + :param use_sudo: Whether to use sudo for Docker commands. + :return: The base command for running a Docker container. + """ + docker_executable = get_docker_executable(use_sudo) + # Get the env vars to pass to the Docker container. + vars_to_pass = henv.get_csfy_env_vars() + henv.get_api_key_env_vars() + vars_to_pass = sorted(vars_to_pass) + vars_to_pass_as_str = " ".join(f"-e {v}" for v in vars_to_pass) + # Build the command as a list. + docker_cmd = [ + docker_executable, + "run --rm", + "--user $(id -u):$(id -g)", + vars_to_pass_as_str, + ] + # Handle coverage. + # TODO(gp): Is this env var standard, or should it be + # CSFY_COVERAGE_PROCESS_START? + # if os.environ.get("COVERAGE_PROCESS_START"): + # _LOG.debug("Enabling coverage") + # host_cov_dir = os.path.abspath("coverage_data") + # # TODO(gp): Use `hio.create_dir()` instead. + # os.makedirs(host_cov_dir, exist_ok=True) + # os.chmod(host_cov_dir, 0o777) + # coverage_dir_container = "/app/coverage_data" + # docker_cmd.extend( + # [ + # f"-e COVERAGE_FILE={coverage_dir_container}/.coverage", + # f"-e COVERAGE_PROCESS_START={coverage_dir_container}/.coveragerc", + # f"-v {host_cov_dir}:{coverage_dir_container}", + # ] + # ) + return docker_cmd + + +def get_container_image_name( + image_name: str, dockerfile: str +) -> Tuple[str, str]: + """ + Get the name of the container image. + + :param image_name: Name of the Docker container to build. + :param dockerfile: Content of the Dockerfile for building the + container. + :return: Name of the container image. + """ + _LOG.debug(hprint.func_signature_to_str("image_name dockerfile")) + hdbg.dassert_ne(image_name, "") + hdbg.dassert_ne(dockerfile, "") + dockerfile = hprint.dedent(dockerfile) + # if os.environ.get("COVERAGE_PROCESS_START"): + # _LOG.debug("Enabling coverage") + # # Check if this is a Python-based Dockerfile. + # if any( + # keyword in dockerfile.lower() + # for keyword in ["python", "pip", "python3"] + # ): + # coverage_dockerfile = hcovera.generate_coverage_dockerfile() + # _LOG.debug("Coverage Dockerfile content:\n%s", coverage_dockerfile) + # dockerfile = dockerfile.strip() + "\n" + coverage_dockerfile + # _LOG.debug("Coverage support added to Dockerfile") + # else: + # _LOG.warning( + # "Skipping coverage addition - not a Python-based Dockerfile" + # ) + _LOG.debug("Final Dockerfile:\n%s", dockerfile) + # Get the current architecture. + current_arch = get_current_arch() + sha256_hash = hashlib.sha256(dockerfile.encode()).hexdigest() + short_hash = sha256_hash[:8] + # Build the name of the container image. + image_name_out = f"{image_name}.{current_arch}.{short_hash}" + return image_name_out, dockerfile + + +def build_container_image( + image_name: str, + dockerfile: str, + force_rebuild: bool, + use_sudo: bool, + *, + use_cache: bool = True, + incremental: bool = True, +) -> str: + """ + Build a Docker image from a Dockerfile. + + :param image_name: Name of the Docker container to build. + :param dockerfile: Content of the Dockerfile for building the + container. + :param force_rebuild: Whether to force rebuild the Docker container. + There are two level of caching. The first level of caching is + our approach of skipping `docker build` if the image already + exists and the Dockerfile hasn't changed. The second level is + the Docker cache itself, which is invalidated by `--no-cache`. + :param use_sudo: Whether to use sudo for Docker commands. + :return: Name of the built Docker container. + :raises AssertionError: If the container ID is not found. + """ + _LOG.debug(hprint.func_signature_to_str("dockerfile")) + # + image_name_out, dockerfile = get_container_image_name(image_name, dockerfile) + # Check if the container already exists. If not, build it. + has_container, _ = image_exists(image_name_out, use_sudo) + coverage_enabled = os.environ.get("COVERAGE_PROCESS_START") + # if coverage_enabled: + # # Add coverage suffix to image name for tracking. + # image_name_out += ".coverage" + # # Force rebuild when coverage is enabled. + # has_container = False + # _LOG.debug( + # "Coverage enabled - forcing rebuild of image: {image_name_out}" + # ) + if bool(os.environ.get("CSFY_DOCKER_FORCE_REBUILD", False)): + _LOG.warning( + "CSFY_DOCKER_FORCE_REBUILD forcing to rebuild container without cache" + ) + force_rebuild = True + if force_rebuild: + _LOG.warning( + "Forcing to rebuild of container '%s' without cache", + image_name, + ) + has_container = False + use_cache = False + _LOG.debug(hprint.to_str("has_container use_cache")) + # # Always prepare coverage files when coverage is enabled, regardless of container existence. + # if coverage_enabled: + # # Create build context directory for coverage files. + # build_context_dir = "tmp.docker_build" + # hio.create_dir(build_context_dir, incremental=incremental) + # # Always copy .coveragerc when coverage is enabled. + # coveragerc_src = ".coveragerc" + # coveragerc_dst = os.path.join(build_context_dir, ".coveragerc") + # if os.path.exists(coveragerc_src): + # shutil.copy2(coveragerc_src, coveragerc_dst) + # _LOG.debug( + # "Coverage enabled - copied {coveragerc_src} to {coveragerc_dst}" + # ) + # else: + # _LOG.warning( + # "Coverage enabled but .coveragerc not found at {coveragerc_src}" + # ) + if not has_container: + # Create a temporary Dockerfile. + _LOG.warning("Building Docker container...") + build_context_dir = "tmp.docker_build" + if not coverage_enabled: + # Only create build context if not already created for coverage + hio.create_dir(build_context_dir, incremental=incremental) + temp_dockerfile = os.path.join(build_context_dir, "Dockerfile") + hio.to_file(temp_dockerfile, dockerfile) + # Build the container. + docker_executable = get_docker_executable(use_sudo) + cmd = [ + f"{docker_executable} build", + f"-f {temp_dockerfile}", + f"-t {image_name_out}", + # "--platform linux/aarch64", + ] + if not use_cache: + cmd.append("--no-cache") + cmd.append(build_context_dir) + cmd = " ".join(cmd) + hsystem.system(cmd, suppress_output=False) + _LOG.info("Building Docker container... done") + return image_name_out + + +# ############################################################################# + + +def get_host_git_root() -> str: + """ + Get the Git root path on the host machine, when inside a Docker container. + """ + hdbg.dassert_in("CSFY_HOST_GIT_ROOT_PATH", os.environ) + host_git_root_path = os.environ["CSFY_HOST_GIT_ROOT_PATH"] + return host_git_root_path + + +def get_docker_mount_info( + is_caller_host: bool, use_sibling_container_for_callee: bool +) -> Tuple[str, str, str]: + """ + Get the Docker mount information for the current environment. + + This function determines the appropriate source and target paths for + mounting a directory in a Docker container. + + Same inputs as `convert_caller_to_callee_docker_path()`. + + :return: A tuple containing + - caller_mount_path: the mount path on the caller filesystem, e.g., + `/app` or `/Users/.../src/cmamp1` + - callee_mount_path: the mount path inside the called Docker container, + e.g., `/app` + - the mount string, e.g., + `source={caller_mount_path},target={callee_mount_path}` + type=bind,source=/app,target=/app + """ + _LOG.debug(hprint.func_signature_to_str()) + # Compute the mount path on the caller filesystem. + if is_caller_host: + # On the host machine, the mount path is the Git root. + caller_mount_path = hgit.find_git_root() + else: + # Inside a Docker container, the mount path depends on the container + # style. + use_host_git_root = ( + use_sibling_container_for_callee + and not hserver.is_csfy_dind_enabled() + ) + if use_host_git_root: + # For sibling containers, we need to get the Git root on the host. + caller_mount_path = get_host_git_root() + else: + # For children containers, we need to get the local Git root on the + # host. + caller_mount_path = hgit.find_git_root() + # The target mount path is always `/app` inside the Docker container. + callee_mount_path = "/app" + # Build the Docker mount string. + mount = f"type=bind,source={caller_mount_path},target={callee_mount_path}" + _LOG.debug(hprint.to_str("caller_mount_path callee_mount_path mount")) + return caller_mount_path, callee_mount_path, mount + + +def get_docker_mount_context() -> Tuple[bool, bool, str, str, str]: + """ + Return Docker mount context for container operations. + + :return: (is_caller_host, use_sibling_container_for_callee, + caller_mount_path, callee_mount_path, mount) + """ + is_caller_host = not hserver.is_inside_docker() + use_sibling_container_for_callee = hserver.use_docker_sibling_containers() + caller_mount_path, callee_mount_path, mount = get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + return ( + is_caller_host, + use_sibling_container_for_callee, + caller_mount_path, + callee_mount_path, + mount, + ) + + +def build_and_run_docker_cmd( + use_sudo: bool, + callee_mount_path: str, + mount: str, + container_image: str, + dockerfile: str, + tool_cmd: str, + mode: str, + *, + override_entrypoint: bool = False, + wrap_in_bash: bool = False, +) -> str: + """ + Build and execute a Docker command. + """ + docker_cmd = get_docker_base_cmd(use_sudo) + if override_entrypoint: + docker_cmd.append("--entrypoint ''") + # Check that the container image exists. + hdbg.dassert( + image_exists(container_image, use_sudo)[0], + "Container image '%s' does not exist", + container_image, + ) + docker_cmd.extend( + [ + f"--workdir {callee_mount_path} --mount {mount}", + container_image, + ] + ) + if wrap_in_bash: + docker_cmd.append(f'bash -c "{tool_cmd}"') + else: + docker_cmd.append(tool_cmd) + docker_cmd_str = " ".join(docker_cmd) + return process_docker_cmd(docker_cmd_str, container_image, dockerfile, mode) + + +# TODO(gp): Move to helpers.hdbg. +def _dassert_valid_path(file_path: str, is_input: bool) -> None: + """ + Assert that a file path is valid, based on it being input or output. + + For input files, it ensures that the file or directory exists. For + output files, it ensures that the enclosing directory exists. + + :param file_path: The file path to check. + :param is_input: Whether the file path is an input file. + """ + if is_input: + # If it's an input file, then `file_path` must exist as a file or a dir. + hdbg.dassert_path_exists(file_path) + else: + # If it's an output, we might be writing a file that doesn't exist yet, + # but we assume that the including directory is already present. + dir_name = os.path.normpath(os.path.dirname(file_path)) + hio.create_dir(dir_name, incremental=True) + hdbg.dassert( + os.path.exists(file_path) or os.path.exists(dir_name), + "Invalid path: '%s' and '%s' don't exist", + file_path, + dir_name, + ) + + +# TODO(gp): Move to helpers.hdbg. +def _dassert_is_path_included(file_path: str, including_path: str) -> None: + """ + Assert that a file path is included within another path. + + This function checks if the given file path starts with the + specified including path. If not, it raises an assertion error. + + :param file_path: The file path to check. + :param including_path: The path that should include the file path. + """ + # TODO(gp): Maybe we need to normalize the paths. + hdbg.dassert( + file_path.startswith(including_path), + "'%s' needs to be underneath '%s'", + file_path, + including_path, + ) + + +def convert_caller_to_callee_docker_path( + caller_file_path: str, + caller_mount_path: str, + callee_mount_path: str, + check_if_exists: bool, + is_input: bool, + is_caller_host: bool, + use_sibling_container_for_callee: bool, +) -> str: + """ + Convert a file path from the (current) caller filesystem to the called + Docker container path. + + :param caller_file_path: The file path on the caller filesystem. + :param caller_mount_path: The source mount path on the host machine. + :param callee_mount_path: The target mount path inside the Docker + container. + :param check_if_exists: Whether to check if the file path exists. + :param is_input: Whether the file path is an input file (used only if + `check_if_exists` is True). + :param is_caller_host: Whether the caller is running on the host + machine or inside a Docker container. + :param use_sibling_container_for_callee: Whether to use a sibling + container or a children container + :return: The converted file path inside the Docker container. + """ + _LOG.debug(hprint.func_signature_to_str()) + hdbg.dassert_ne(caller_file_path, "") + hdbg.dassert_ne(caller_mount_path, "") + hdbg.dassert_ne(callee_mount_path, "") + if check_if_exists: + _dassert_valid_path(caller_file_path, is_input) + # Make the path absolute with respect to the (current) caller filesystem. + abs_caller_file_path = os.path.abspath(caller_file_path) + if is_caller_host: + # On the host, the path needs to be underneath the caller mount point. + caller_mount_point = caller_mount_path + else: + # We are inside a Docker container, so the path needs to be under + # the local Git root, since this is the mount point. + caller_mount_point = hgit.find_git_root() + _ = use_sibling_container_for_callee + # This is not always possible, e.g., '/var/log/app.log' needs to be + # underneath '/app' + _dassert_is_path_included(abs_caller_file_path, caller_mount_point) + # Make the path relative to the caller mount point. + _LOG.debug(hprint.to_str("caller_file_path caller_mount_point")) + rel_path = os.path.relpath(caller_file_path, caller_mount_point) + docker_path = os.path.join(callee_mount_path, rel_path) + docker_path = os.path.normpath(docker_path) + # + _LOG.debug( + " Converted %s -> %s -> %s", caller_file_path, rel_path, docker_path + ) + return docker_path + + +def is_path(path: str) -> bool: + """ + Check if `path` can be considered a file or a directory using heuristics. + + - return: True if the string looks like a path, False otherwise. + """ + # E.g., + # ``` + # is_path("file.txt") # True, since it has an extension + # is_path("/path/to/file.py") # True, since it has an absolute path + # is_path("/path/to") # True, since it has an absolute path + # is_path("../data.csv") # True, since it has an relative path + # is_path("folder/") # True, since it has a trailing slash + # is_path(".hidden") # True, since it has a leading dot + # is_path("readme") # False, since it has no extension and no path + # ``` + # Check if it has a file extension (e.g., .txt, .csv). + if os.path.splitext(path)[1]: + return True + # Check if it is an absolute or relative path (e.g., starts with "/" or "./" + # or "../") + if path.startswith("/") or path.startswith("./") or path.startswith("../"): + return True + # Check if it ends with a slash. + if path.endswith("/"): + return True + # Check if it has a hidden file. + basename = os.path.basename(path) + if basename.startswith(".") and basename.count(".") == 1: + return True + # Check if it contains a slash. + if "/" in path: + return True + return False + + +def convert_all_paths_from_caller_to_callee_docker_path( + cmd_opts: List[str], + caller_mount_path: str, + callee_mount_path: str, + is_caller_host: bool, + use_sibling_container_for_callee: bool, +) -> List[str]: + """ + Convert all the paths from the caller to the callee Docker container path. + + The paths are recognized by checking whether they point to an existing file + or directory. + + The limitation of this approach is that output files are not recognized. To + work around this problem: + - Create output dirs + - Explicitly parse options that are outputs (e.g., `-o `) + + :param cmd_opts: List of command options. + :param caller_mount_path: See `get_docker_mount_info()`. + :param callee_mount_path: See `get_docker_mount_info()`. + :param is_caller_host: See `get_docker_mount_info()`. + :param use_sibling_container_for_callee: See `get_docker_mount_info()`. + :return: List of converted command options. + """ + _LOG.debug(hprint.func_signature_to_str()) + # Converted command options. + cmd_opts_out = [] + # Scan the list of command option. + for cmd_opt_in in cmd_opts: + exists = os.path.exists(cmd_opt_in) + is_path_ = is_path(cmd_opt_in) + _LOG.debug(hprint.to_str("cmd_opt_in exists is_path_")) + if exists or is_path_: + check_if_exists = False + is_input = False + cmd_opt_out = convert_caller_to_callee_docker_path( + cmd_opt_in, + caller_mount_path, + callee_mount_path, + check_if_exists, + is_input, + is_caller_host, + use_sibling_container_for_callee, + ) + _LOG.debug(hprint.to_str("cmd_opt_in -> cmd_opt_out")) + cmd_opts_out.append(cmd_opt_out) + else: + _LOG.debug("File does not exist: %s", cmd_opt_in) + cmd_opts_out.append(cmd_opt_in) + _LOG.debug(hprint.to_str("cmd_opts_out")) + return cmd_opts_out + + +# ############################################################################# +# CLI utilities +# ############################################################################# + + +def add_open_arg(parser: argparse.ArgumentParser) -> None: + """ + Add --open option to parser for opening output files on macOS. + + :param parser: ArgumentParser instance to add the option to + """ + parser.add_argument( + "--open", + action="store_true", + default=False, + help="Open the output file on macOS", + ) + + +def open_file_on_macos(file_path: str) -> None: + """ + Open a file on macOS using the 'open' command. + + :param file_path: Path to the file to open + :raises subprocess.CalledProcessError: If open command fails + """ + if platform.system() != "Darwin": + _LOG.warning("--open flag only works on macOS") + return + subprocess.run(["open", file_path], check=True) + _LOG.info("Opened file with macOS 'open' command: %s", file_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py new file mode 100644 index 000000000..0ab2f2f2f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py @@ -0,0 +1,197 @@ +""" +Utilities for running docker tests. + +Import as: + +import helpers.hdocker_tests as hdoctest +""" + +import glob +import logging +import os +from typing import List + +import pytest + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# ############################################################################# +# Constants +# ############################################################################# + + +# Pattern for docker test files. +DOCKER_TEST_PATTERN = "docker_test_*.py" + + +# ############################################################################# +# Helper functions +# ############################################################################# + + +def get_docker_test_files(test_dir: str) -> List[str]: + """ + Find all docker test files in the specified directory. + + :param test_dir: directory to search for test files + :return: sorted list of test file paths + """ + pattern = os.path.join(test_dir, DOCKER_TEST_PATTERN) + files = sorted(glob.glob(pattern)) + _LOG.info("Found %d docker test files", len(files)) + for file in files: + _LOG.debug(" - %s", file) + return files + + +def _run_docker_pytest_cmd( + test_file: str, *, docker_cmd_script: str = "./docker_cmd.sh" +) -> int: + """ + Run a test file through docker_cmd.sh with pytest. + + :param test_file: path to the test file + :param docker_cmd_script: path to docker_cmd.sh script + :return: return code from the command + """ + hdbg.dassert_file_exists(test_file) + hdbg.dassert_file_exists(docker_cmd_script) + cmd = f'{docker_cmd_script} "pytest {test_file}"' + _LOG.info("Running: %s", cmd) + rc = hsystem.system(cmd, abort_on_error=False) + return rc + + +def run_docker_cmd(script_dir: str, *, shell_cmd: str = "ls /git_root") -> None: + """ + Run an arbitrary shell command inside Docker via docker_cmd.sh. + + :param script_dir: directory containing docker_cmd.sh + :param shell_cmd: shell command to run inside the container + """ + hdbg.dassert_path_exists(script_dir) + docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") + hdbg.dassert_file_exists(docker_cmd_script) + cmd = f"cd {script_dir} && bash {docker_cmd_script} '{shell_cmd}'" + hsystem.system(cmd) + + +def run_all_tests( + test_dir: str, *, docker_cmd_script: str = "./docker_cmd.sh" +) -> int: + """ + Find and run all docker test files in the directory. + + :param test_dir: directory containing test files + :param docker_cmd_script: path to docker_cmd.sh script + :return: 0 if all tests passed, non-zero otherwise + """ + test_files = get_docker_test_files(test_dir) + if not test_files: + _LOG.warning("No docker test files found in %s", test_dir) + return 0 + failed_tests = [] + for test_file in test_files: + return_code = _run_docker_pytest_cmd( + test_file, docker_cmd_script=docker_cmd_script + ) + if return_code != 0: + failed_tests.append(test_file) + if failed_tests: + _LOG.error("Failed tests: %s", failed_tests) + return 1 + _LOG.info("All tests passed") + return 0 + + +# ############################################################################# +# DockerTestCase +# ############################################################################# + + +# TODO(gp): Can this be used for run_dockerized_* tests? +class DockerTestCase(hunitest.TestCase): + """ + Base test class for Docker tests. + + Subclasses must set `_test_file = __file__` and may add notebook test + methods that call `self._helper(notebook_name)`. + """ + + _test_file: str = "" + + @pytest.mark.slow + def test_docker_build(self) -> None: + """ + Test that docker_build.sh runs without error. + """ + # Prepare inputs. + script_dir = os.path.dirname( + os.path.dirname(os.path.abspath(self._test_file)) + ) + docker_build_script = os.path.join(script_dir, "docker_build.sh") + hdbg.dassert_file_exists(docker_build_script) + # Run test. + cmd = f"cd {script_dir} && bash {docker_build_script}" + hsystem.system(cmd) + + @pytest.mark.slow + def test_docker_cmd(self) -> None: + """ + Test that docker_cmd.sh 'ls /git_root' runs without error. + """ + # Prepare inputs. + script_dir = os.path.dirname( + os.path.dirname(os.path.abspath(self._test_file)) + ) + docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") + hdbg.dassert_file_exists(docker_cmd_script) + # Run test. + cmd = f"cd {script_dir} && bash {docker_cmd_script} 'ls /git_root'" + hsystem.system(cmd) + + def test_docker_bash(self) -> None: + """ + Test that docker_bash.sh runs 'ls /git_root' and exits without error. + """ + # Prepare inputs. + script_dir = os.path.dirname( + os.path.dirname(os.path.abspath(self._test_file)) + ) + docker_bash_script = os.path.join(script_dir, "docker_bash.sh") + if not os.path.exists(docker_bash_script): + pytest.skip("docker_bash.sh not found in " + script_dir) + # Run test. + shell_cmd = "ls /git_root" + cmd = f"echo '{shell_cmd}' | bash {docker_bash_script}" + hsystem.system(cmd) + + def _run_notebook(self, notebook_name: str) -> None: + """ + Run a single notebook inside Docker. + + :param notebook_name: notebook filename relative to the project dir + """ + # Prepare inputs. + script_dir = os.path.dirname( + os.path.dirname(os.path.abspath(self._test_file)) + ) + docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") + notebook_path = os.path.join(script_dir, notebook_name) + hdbg.dassert_file_exists(notebook_path) + # Compute the notebook path inside the container via /git_root. + git_root = hgit.find_git_root(script_dir) + rel_path = os.path.relpath(script_dir, git_root) + container_notebook_path = f"/git_root/{rel_path}/{notebook_name}" + cmd = ( + f"cd {script_dir} && " + f"bash {docker_cmd_script} " + f"'jupyter nbconvert --execute --to html " + f"--ExecutePreprocessor.timeout=-1 {container_notebook_path}'" + ) + hsystem.system(cmd) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py new file mode 100644 index 000000000..f52fc9230 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py @@ -0,0 +1,47 @@ +""" +Import as: + +import helpers.hemail as hemail +""" + +import email.mime.multipart as emmult +import email.mime.text as emtext +import os +import smtplib +from typing import Optional + + +def send_email( + subject: str, + message: str, + to_adr: str, + email_address: Optional[str] = None, + email_password: Optional[str] = None, + html: bool = False, +) -> None: + """ + Send mail to specified e-mail addresses. + + :param message: Message to be sent + :param to_adr: Mail to which to send messages + :type list + :return: None + """ + server = smtplib.SMTP("smtp.gmail.com", 587) + server.starttls() + if email_address is None: + email_address = os.environ["AM_EMAIL_ADDRESS"] + if email_password is None: + email_password = os.environ["AM_EMAIL_PASSWORD"] + server.login(email_address, email_password) + msg = emmult.MIMEMultipart() + msg["From"] = email_address + msg["To"] = ", ".join(to_adr) + msg["Subject"] = subject + if html: + msg.attach(emtext.MIMEText(message, "html")) + else: + msg.attach(emtext.MIMEText(message, "plain")) + + text = msg.as_string() + server.sendmail(email_address, to_adr, text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py new file mode 100644 index 000000000..f2e0719bd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py @@ -0,0 +1,541 @@ +""" +Import as: + +import helpers.henv as henv +""" + +import logging +import os +from typing import Any, List, Tuple, Union + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hversion as hversio +import helpers.repo_config_utils as hrecouti + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + + +_WARNING = "\033[33mWARNING\033[0m" + + +# All printing functions should: +# - Return a string and not a list of strings +# - Add a newline at the end of the string (i.e., the string should end with +# `\n`) + + +# ############################################################################# +# Get env vars info. +# ############################################################################# + + +def get_env_var( + env_name: str, + *, + as_bool: bool = False, + default_value: Any = None, + abort_on_missing: bool = True, +) -> Union[str, bool, Any]: + """ + Get an environment variable by name. + + :param env_name: name of the env var + :param as_bool: convert the value into a Boolean + :param default_value: the default value to use in case it's not + defined + :param abort_on_missing: if the env var is not defined aborts, + otherwise use the default value + :return: value of env var + """ + if env_name not in os.environ: + if abort_on_missing: + hdbg.dassert_in( + env_name, + os.environ, + "Can't find env var '%s' in '%s'", + env_name, + str(os.environ), + ) + else: + return default_value + value = os.environ[env_name] + if as_bool: + # Convert the value into a boolean. + if value in ("0", "", "None", "False"): + value = False + else: + value = True + return value + + +def get_csfy_env_vars() -> List[str]: + """ + Get all the environment variables that start with `AM_`, `CK_`, `CSFY_`. + """ + # TODO(gp): We should only pass the `CSFY_` vars. + env_var_names = [ + v + for v in os.environ.keys() + if v.startswith("AM_") or v.startswith("CK_") or v.startswith("CSFY_") + ] + return env_var_names + + +# TODO(gp): Extract all the env vars that start with AM_, CK_, CSFY_ and make +# sure they have a description here. +def get_env_vars() -> List[str]: + """ + Return all the env vars that are expected to be set in Docker. + """ + # Keep in sync with `lib_tasks.py:_generate_compose_file()`. + env_var_names = [ + # Force enabling Docker-in-Docker. + "CSFY_ENABLE_DIND", + # Enable forcing certain unit tests to fail to check that unit test + # failures are caught. + "CSFY_FORCE_TEST_FAIL", + # The name of the host running Docker. + "CSFY_HOST_NAME", + # The OS of the host running Docker. + "CSFY_HOST_OS_NAME", + # The version of the host running Docker. + "CSFY_HOST_OS_VERSION", + # The name of the user running the host. + "CSFY_HOST_USER_NAME", + # Whether to check if certain property of the repo are as expected or not. + "CSFY_REPO_CONFIG_CHECK", + # Path to use for `repo_config.py`. E.g., used when running `helpers` + # container to avoid using the `repo_config.py` corresponding to the + # container launching the linter. + "CSFY_REPO_CONFIG_PATH", + "GH_ACTION_ACCESS_TOKEN", + # Whether we are running inside GH Actions. + "CSFY_CI", + # TODO(gp): Difference between amp and cmamp. + # CK AWS credentials. + "CSFY_AWS_ACCESS_KEY_ID", + "CSFY_AWS_DEFAULT_REGION", + "CSFY_AWS_SECRET_ACCESS_KEY", + "CSFY_AWS_SESSION_TOKEN", + # S3 bucket to use for CK. + "CSFY_AWS_S3_BUCKET", + # Path to the ECR for the Docker images for CK. + "CSFY_ECR_BASE_PATH", + ] + # No duplicates. + # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. + hdbg.dassert_eq( + len(set(env_var_names)), + len(env_var_names), + "There are duplicates", + str(env_var_names), + ) + # Sort. + env_var_names = sorted(env_var_names) + return env_var_names + + +def get_secret_env_vars() -> List[str]: + """ + Return the list of env vars that are secrets. + """ + secret_env_var_names = [ + # TODO(gp): Difference between amp and cmamp. + "CSFY_AWS_ACCESS_KEY_ID", + "CSFY_AWS_SECRET_ACCESS_KEY", + "GH_ACTION_ACCESS_TOKEN", + ] + # No duplicates. + # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. + hdbg.dassert_eq( + len(set(secret_env_var_names)), + len(secret_env_var_names), + "There are duplicates", + str(secret_env_var_names), + ) + # Secret env vars are a subset of the env vars. + env_vars = get_env_vars() + # TODO(gp): GFI. Use `hdbg.dassert_issubset()` instead. + if not set(secret_env_var_names).issubset(set(env_vars)): + diff = set(secret_env_var_names).difference(set(env_vars)) + cmd = f"Secret vars in `{str(diff)} are not in '{str(env_vars)}'" + assert 0, cmd + # Sort. + secret_env_var_names = sorted(secret_env_var_names) + return secret_env_var_names + + +def get_api_key_env_vars() -> List[str]: + """ + Return the list of env vars that are API keys. + """ + # Find all the env vars that end with "_API_KEY". + env_var_names = [ + env_var for env_var in os.environ.keys() if env_var.endswith("_API_KEY") + ] + return env_var_names + + +def check_env_vars() -> None: + """ + Make sure all the expected env vars are defined. + """ + env_vars = get_env_vars() + for env_var in env_vars: + hdbg.dassert_in( + env_var, + os.environ, + "env_var='%s' is not in env_vars='%s'", + env_var, + str(os.environ.keys()), + ) + + +def env_vars_to_string() -> str: + """ + Return a string with the signature of all the expected env vars (including + the secret ones). + """ + txt: List[str] = [] + # Get the expected env vars and the secret ones. + env_vars = get_env_vars() + secret_env_vars = get_secret_env_vars() + # Print a signature. + for env_name in env_vars: + is_defined = env_name in os.environ + is_empty = is_defined and os.environ[env_name] == "" + if not is_defined: + txt.append(f"{env_name}=undef") + else: + if env_name in secret_env_vars: + # Secret env var: print if it's empty or not. + if is_empty: + txt.append(f"{env_name}=empty") + else: + txt.append(f"{env_name}=***") + else: + # Not a secret var: print the value. + txt.append(f"{env_name}='{os.environ[env_name]}'") + result = "\n".join(txt) + return result + + +# ############################################################################# +# Get Git info. +# ############################################################################# + + +# Copied from helpers.hgit to avoid circular dependencies. + + +def _git_log(num_commits: int = 5, my_commits: bool = False) -> str: + """ + Return the output of a pimped version of git log. + + :param num_commits: number of commits to report + :param my_commits: True to report only the current user commits + :return: string + """ + cmd = [] + cmd.append("git log --date=local --oneline --graph --date-order --decorate") + cmd.append( + "--pretty=format:'%h %<(8)%aN% %<(65)%s (%>(14)%ar) %ad %<(10)%d'" + ) + cmd.append(f"-{num_commits}") + if my_commits: + # This doesn't work in a container if the user relies on `~/.gitconfig` to + # set the user name. + # TODO(gp): We should use `get_git_name()`. + cmd.append("--author $(git config user.name)") + cmd = " ".join(cmd) + data: Tuple[int, str] = hsystem.system_to_string(cmd) + _, txt = data + return txt + + +# End copy. + + +def _get_git_signature(git_commit_type: str = "all") -> str: + """ + Get information about current branch and latest commits. + """ + txt: List[str] = [] + # Get the branch name. + cmd = "git branch --show-current" + _, branch_name = hsystem.system_to_one_line(cmd) + txt.append(f"branch_name='{branch_name}'") + # Get the short Git hash of the current branch. + cmd = "git rev-parse --short HEAD" + _, hash_ = hsystem.system_to_one_line(cmd) + txt.append(f"hash='{hash_}'") + # Add info about the latest commits. + num_commits = 3 + if git_commit_type == "all": + txt.append("# Last commits:") + log_txt = _git_log(num_commits=num_commits, my_commits=False) + txt.append(hprint.indent(log_txt)) + elif git_commit_type == "mine": + txt.append("# Your last commits:") + log_txt = _git_log(num_commits=num_commits, my_commits=True) + txt.append(hprint.indent(log_txt)) + elif git_commit_type == "none": + pass + else: + raise ValueError(f"Invalid value='{git_commit_type}'") + # + result = "\n".join(txt) + "\n" + hdbg.dassert(result.endswith("\n"), "result='%s'", result) + return result + + +# def _get_submodule_signature( +# partial_signature: List[str], *, git_commit_type: str = "all" +# ) -> str: +# """ +# Add git signature for all submodules. +# :param partial_signature: the signature to append to +# `git_commit_type` the type of git commit to include in the +# signature +# :return: system signature enhanced by git submodule info +# """ +# # TODO(Juraj): Think of a better generalisation rather listing all the options. +# submodule_options = ["amp", "amp/helpers_root", "helpers_root"] +# signature = partial_signature +# prev_cwd = os.getcwd() +# for submodule in submodule_options: +# if os.path.exists(submodule): +# try: +# # Temporarily descend into submodule. +# os.chdir(submodule) +# signature.append(f"# Git {submodule}") +# git_amp_sig = _get_git_signature(git_commit_type) +# signature = _append(signature, git_amp_sig) +# # In case there is a runtime error we want to end up in a consistent +# # state (the original path). +# finally: +# os.chdir(prev_cwd) +# hdbg.dassert(txt_tmp.endswith("\n"), f"txt_tmp='%s'", txt_tmp) +# return signature + + +# ############################################################################# +# Get system info. +# ############################################################################# + + +def _get_platform_info() -> str: + """ + Get platform information as a list of strings. + """ + import platform + + txt_tmp: List[str] = [] + uname = platform.uname() + txt_tmp.append(f"system={uname.system}") + txt_tmp.append(f"node name={uname.node}") + txt_tmp.append(f"release={uname.release}") + txt_tmp.append(f"version={uname.version}") + txt_tmp.append(f"machine={uname.machine}") + txt_tmp.append(f"processor={uname.processor}") + # + txt = hprint.to_info("Platform info", txt_tmp) + return txt + + +def _get_psutil_info() -> str: + """ + Get system resource information using psutil. + """ + try: + import psutil + + has_psutil = True + except ModuleNotFoundError as e: + _LOG.warning("psutil is not installed: %s", str(e)) + has_psutil = False + txt_tmp = [] + if has_psutil: + txt_tmp.append(f"cpu count={psutil.cpu_count()}") + if hasattr(psutil, "cpu_freq") and psutil.cpu_freq is not None: + txt_tmp.append(f"cpu freq={str(psutil.cpu_freq())}") + else: + txt_tmp.append("cpu freq=unavailable") + # TODO(gp): Report in MB or GB. + txt_tmp.append(f"memory={str(psutil.virtual_memory())}") + txt_tmp.append(f"disk usage={str(psutil.disk_usage('/'))}") + else: + txt_tmp.append("psutil is not installed") + # + txt = hprint.to_info("psutils info", txt_tmp) + return txt + + +# ############################################################################# +# Get package info. +# ############################################################################# + + +def _get_library_version(lib_name: str) -> str: + try: + cmd = f"import {lib_name}" + # pylint: disable=exec-used + exec(cmd) + except ImportError: + version = "?" + else: + cmd = f"{lib_name}.__version__" + version = eval(cmd) + return version + + +def _get_package_info() -> Tuple[str, int]: + """ + Get package version information. + + Returns: + Tuple containing: + - List of strings with package info + - Number of failed imports + """ + import platform + + txt_tmp = [] + packages = [] + packages.append(("python", platform.python_version())) + # import sys + # print(sys.version) + libs = [ + "cvxopt", + "cvxpy", + "gluonnlp", + "gluonts", + "joblib", + "mxnet", + "numpy", + "pandas", + "pyarrow", + "scipy", + "seaborn", + "sklearn", + "statsmodels", + ] + libs = sorted(libs) + failed_imports = 0 + for lib in libs: + # This is due to Cmamp4924: + # WARNING: libarmpl_lp64_mp.so: cannot open shared object file: No such + # file or directory + try: + version = _get_library_version(lib) + except OSError as e: + print(_WARNING + ": " + str(e)) + if version.startswith("ERROR"): + failed_imports += 1 + packages.append((lib, version)) + txt_tmp.extend([f"{lib}: {version}" for (lib, version) in packages]) + # + txt = hprint.to_info("Packages", txt_tmp) + return txt, failed_imports + + +# ############################################################################# + + +def _get_git_info(git_commit_type: str) -> str: + txt_tmp: List[str] = [] + try: + txt_tmp.append(_get_git_signature(git_commit_type)) + # If there are any submodules, fetch their git signature. + # txt_tmp.append(_get_submodule_signature(txt_tmp, git_commit_type)) + except RuntimeError as e: + _LOG.warning(str(e)) + txt_tmp.append("No git info") + # + txt = hprint.to_info("Git info", txt_tmp) + return txt + + +# ############################################################################# +# Get system signature. +# ############################################################################# + + +def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: + """ + Return a string with the system signature. + + :param git_commit_type: the type of git commit to include in the + signature + :return: the system signature and the number of failed imports + """ + txt: List[str] = [] + # Add container version. + txt_tmp = hversio.get_container_version_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add Git signature. + txt_tmp = _get_git_info(git_commit_type) + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add platform info. + txt_tmp = _get_platform_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add psutil info. + txt_tmp = _get_psutil_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add Docker info. + txt_tmp = hserver.get_docker_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add package info. + txt_tmp, failed_imports = _get_package_info() + hprint.dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # + txt_str: str = hprint.to_info("System signature", txt) + return txt_str, failed_imports + + +# ############################################################################# +# Package all the information into a string. +# ############################################################################# + + +def env_to_str( + repo_config: bool = True, + server_config: bool = True, + system_signature: bool = True, + env_vars: bool = True, +) -> str: + """ + Package all the information into a string. + """ + # + msg = "" + # + if repo_config: + repo_config_str = hrecouti.get_repo_config().config_func_to_str() + msg += hprint.to_info("Repo config", repo_config_str) + "\n" + # + if server_config: + server_config_str = hserver.config_func_to_str() + msg += hprint.to_info("Server config", server_config_str) + "\n" + # + if system_signature: + msg += get_system_signature()[0] + "\n" + # + if env_vars: + env_vars_str = env_vars_to_string() + msg += hprint.to_info("Env vars", env_vars_str) + "\n" + return msg diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py new file mode 100644 index 000000000..d758ff16b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py @@ -0,0 +1,232 @@ +""" +Import as: + +import helpers.hfile_tree as hfiltree +""" + +import logging +import os +import pathlib +import re +from typing import Dict, List + +_LOG = logging.getLogger(__name__) + + +def _build_tree_lines( + dir_name: str, + nodes: List[pathlib.Path], + comments: Dict[str, str], +) -> str: + """ + Build the text lines for the directory tree while preserving inline + comments. + + :param dir_name: the directory name + :param nodes: relative paths under the given directory + :param comments: inline comments from existing file + :return: a formatted tree + + Example output: + ``` + devops + - __init__.py + - compose + - __init__.py + - tmp.docker-compose.yml + - docker_build + - create_users.sh + - dev.Dockerfile + - dockerignore.dev + - dockerignore.prod + - etc_sudoers + - fstab + - install_cprofile.sh + - install_dind.sh + - install_os_packages.sh + - install_publishing_tools.sh + - install_python_packages.sh + - pip_list.txt + - poetry.lock + - poetry.toml + - prod.Dockerfile + - pyproject.python_data_stack.toml + - pyproject.toml + - update_os.sh + - utils.sh + - docker_run + - bashrc + - docker_setenv.sh + - entrypoint.sh + - run_jupyter_server.sh + - env + - default.env + ``` + """ + lines = [dir_name] + for rel in nodes: + indent = " " * (len(rel.parts) - 1) + key = "/".join(rel.parts) + suffix = comments.get(key, "") + lines.append(f"{indent}- {rel.name}{suffix}".rstrip()) + return "\n".join(lines) + + +def _parse_comments(old_tree: List[str]) -> Dict[str, str]: + """ + Parse existing tree lines to extract inline comments. + + :param old_tree: the existing tree block + :return: inline comments and indentations + """ + comments: Dict[str, str] = {} + stack: List[str] = [] + for line in old_tree: + # Find indents, bullet points, name, and inline comments. + match = re.match(r"^(\s*)-\s+([^\s#]+)(\s*#.*)?$", line) + if not match: + continue + indent, name, suffix = match.groups() + level = len(indent) // 2 + stack = stack[:level] + stack.append(name) + key = "/".join(stack) + comments[key] = suffix or "" + return comments + + +def _get_tree_nodes( + dir_path: pathlib.Path, + depth: int, + include_tests: bool, + include_python: bool, + only_dirs: bool, +) -> List[pathlib.Path]: + """ + Get relative paths under the given directory based on filters. + + Filters include: + - Test files and directories + - Python files + + :param dir_path: the directory path + :param depth: maximum depth to traverse + :param include_tests: include test files or directories + :param include_python: only show python files + :param only_dirs: only show directories + :return: all relative paths that match the specified flags + """ + nodes: List[pathlib.Path] = [] + for dirpath, dirnames, filenames in os.walk(dir_path): + rel_dir = pathlib.Path(dirpath).relative_to(dir_path) + level = len(rel_dir.parts) + if 0 < depth <= level: + # Stop pruning on given depth. + dirnames[:] = [] + continue + if not include_tests: + # Prune out test directories. + filtered = [] + for d in dirnames: + dir_lower = d.lower() + if not ( + dir_lower.startswith("test_") + or dir_lower in {"test", "tests"} + ): + filtered.append(d) + dirnames[:] = filtered + candidates = dirnames + filenames + for name in candidates: + full_path = pathlib.Path(dirpath) / name + rel_path = full_path.relative_to(dir_path) + name_lower = name.lower() + is_dir = full_path.is_dir() + is_test_name = name_lower.startswith("test_") or name_lower in { + "test", + "tests", + } + is_test = is_test_name or name_lower.endswith("_test.py") + is_python = full_path.suffix in {".py", ".ipynb"} + if is_dir: + # Always include directories. + nodes.append(rel_path) + continue + # Flag filter to include test or python files. + allowed_by_flag = (include_tests and is_test) or ( + include_python and is_python + ) + if only_dirs: + include_file = allowed_by_flag + else: + include_file = allowed_by_flag or ( + not is_test + and not is_python + and not include_tests + and not include_python + ) + if include_file: + nodes.append(rel_path) + nodes.sort() + return nodes + + +def generate_tree( + path: str, + depth: int, + include_tests: bool, + include_python: bool, + only_dirs: bool, + output: str, +) -> str: + """ + Generate a directory tree, and optionally update or create a markdown file. + + :param path: directory path to traverse + :param depth: maximum depth to traverse + :param include_tests: include test files or directories + :param include_python: include show python files + :param only_dirs: only show directories + :param output: path of the markdown file to create or update + """ + dir_path = pathlib.Path(path).resolve() + nodes = _get_tree_nodes( + dir_path, depth, include_tests, include_python, only_dirs + ) + _LOG.debug("Collected %d nodes under '%s'", len(nodes), dir_path) + if output: + output_path = pathlib.Path(output) + start_marker = f"" + end_marker = "" + prefix = [] + suffix = [] + comments = {} + if output_path.exists(): + # Parse inline comments. + file = output_path.read_text(encoding="utf-8") + lines = file.splitlines() + _LOG.debug("Reading existing file '%s' for markers", output_path) + try: + idx_start = lines.index(start_marker) + idx_end = lines.index(end_marker) + _LOG.debug("Markers found at lines %d–%d", idx_start, idx_end) + except ValueError as exc: + raise RuntimeError( + "Couldn't find tree markers in output file." + ) from exc + # Parse existing file. + prefix = lines[:idx_start] + old_tree = lines[idx_start + 1 : idx_end] + suffix = lines[idx_end + 1 :] + comments = _parse_comments(old_tree) + # Build the directory tree. + tree_block = _build_tree_lines(dir_path.name, nodes, comments) + # Build the content of the file. + content = ( + "\n".join(prefix + [start_marker, tree_block, end_marker] + suffix) + + "\n" + ) + output_path.write_text(content, encoding="utf-8") + _LOG.debug("Writing updated tree to '%s'", output_path) + # Return tree without markers. + tree_block = _build_tree_lines(dir_path.name, nodes, {}) + return tree_block diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py new file mode 100644 index 000000000..d63d59cea --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py @@ -0,0 +1,1869 @@ +""" +Import as: + +import helpers.hgit as hgit +""" + +import collections +import functools +import logging +import os +import random +import re +import string +from typing import cast, List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + +# We refer to "Git" when we talk about the control system (e.g., "in a Git +# repository") and `git` when we refer to implementation of Git as a program +# installed in a computer. + +# TODO(gp): Check +# https://git-scm.com/book/en/v2/Appendix-B%3A-Embedding-Git-in-your-Applications-Dulwich + +# TODO(gp): Avoid "stuttering": the module is already called "git", so no need +# to make reference to git again. + +# TODO(gp): Add mem caching to some functions below. We assume that one doesn't +# change dir (which is a horrible idea) and thus we can memoize. + +# TODO(gp): Spell super_module and sub_module always in the same way in both +# comments and code. For simplicity (e.g., instead of `super_module` in code and +# `super-module` in comment) we might want to spell `supermodule` everywhere. + +# ############################################################################# +# Git branch functions +# ############################################################################# + + +def extract_gh_issue_number_from_branch(branch_name: str) -> Optional[int]: + """ + Extract the GitHub issue number from a branch name. + + Example: + CmampTask10725_Add_more_tabs_to_orange_tmux -> 10725 + HelpersTask23_Add_more_tabs_to_orange_tmux -> 23. + + Works only if `invoke gh_branch_create` was used to create the branch. + or the name was retrieved using `invoke gh_issue_title`. + + :param branch_name: the name of the branch + :return: the issue number or None if it can't be extracted + """ + match = re.match(r".*Task_?(\d+)(?:_\w+)?", branch_name) + if match: + # Return the captured number. + return int(match.group(1)) + return None + + +def get_branch_name(dir_name: str = ".") -> str: + """ + Return the name of the Git branch in a directory. + + E.g., `master` or `AmpTask672_Add_script_to_check_and_merge_PR` + + :param dir_name: directory containing the git repository + :return: the name of the current branch + """ + hdbg.dassert_path_exists(dir_name) + # > git rev-parse --abbrev-ref HEAD + # master + cmd = f"cd {dir_name} && git rev-parse --abbrev-ref HEAD" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + return output + + +def _get_branch_next_name_via_github_api( + curr_branch_name: str, + *, + max_num_ids: int = 100, +) -> Optional[str]: + """ + Find the next available branch name using GitHub API (fast method). + + Uses `gh pr list` to query merged branches and extract the highest number. + + :param curr_branch_name: current branch name (e.g., "gp_scratch") + :param max_num_ids: maximum number of IDs to check + :return: next available branch name or None if GitHub API is not available + """ + try: + # Query all PRs (merged, closed, open) and extract branch names + # matching pattern. + cmd = ( + "gh pr list --state all --json headRefName " + "| jq -r '.[].headRefName | select(test(\"^{branch}_[0-9]+$\"))' " + "| sed 's/.*_//' | sort -rn | head -1" + ).format(branch=re.escape(curr_branch_name)) + _LOG.debug("Running GitHub API query: %s", cmd) + ret, output = hsystem.system_to_one_line(cmd, suppress_output=True) + if ret != 0: + _LOG.debug("GitHub API query failed, falling back to linear scan") + return None + # Extract the highest number from all branches. + output = output.strip() + if output: + highest_num = int(output) + next_num = highest_num + 1 + new_branch_name = f"{curr_branch_name}_{next_num}" + _LOG.info( + "Found highest number '%s' in all branches, next is '%s'", + highest_num, + next_num, + ) + return new_branch_name + # No existing numbered branches found. + _LOG.debug("No existing numbered branches found, starting at 1") + return f"{curr_branch_name}_1" + except Exception as e: + _LOG.debug( + "Error querying GitHub API: %s, falling back to linear scan", + e, + ) + return None + + +@functools.lru_cache() +def _get_gh_pr_list() -> str: + """ + Get a cached list of all pull requests from GitHub (merged and open). + + Results are cached via functools.lru_cache to avoid repeated GitHub API calls. + + :return: raw output from `gh pr list` command + """ + cmd = "gh pr list -s all --limit 1000" + rc, txt = hsystem.system_to_string(cmd) + _ = rc + return txt + + +def does_branch_exist( + branch_name: str, + mode: str, + *, + dir_name: str = ".", +) -> bool: + """ + Check if a branch with the given name exists in local git or on GitHub. + + Supports checking in local git repository or on GitHub via the `gh` CLI. + + :param branch_name: the name of the branch to check + :param mode: where to check ("all" checks all, "git_local", "git_remote", "github") + :param dir_name: directory containing the git repository + :return: True if the branch exists in the specified location + """ + _LOG.debug(hprint.to_str("branch_name mode dir_name")) + # Handle the "all" case by recursion on all the possible modes. + if mode == "all": + exists = False + for mode_tmp in ("git_local", "git_remote", "github"): + exists_tmp = does_branch_exist( + branch_name, mode_tmp, dir_name=dir_name + ) + exists = exists or exists_tmp + return exists + # + hdbg.dassert_in(mode, ("git_local", "git_remote", "github")) + exists = False + if mode in ("git_local", "git_remote"): + # From https://stackoverflow.com/questions/35941566 + cmd = f"cd {dir_name} && git fetch --prune" + hsystem.system(cmd, abort_on_error=False) + # From https://stackoverflow.com/questions/5167957 + # > git rev-parse --verify LimeTask197_Get_familiar_with_CF2 + # f03bfa0b4577c2524afd6a1f24d06013f8aa9f1a + # > git rev-parse --verify I_dont_exist + # fatal: Needed a single revision + git_branch_name = branch_name + if mode == "git_remote": + git_branch_name = f"origin/{git_branch_name}" + cmd = f"cd {dir_name} && git rev-parse --verify {git_branch_name}" + rc = hsystem.system(cmd, abort_on_error=False) + exists = rc == 0 + _LOG.debug("branch_name='%s' on git: exists=%s", branch_name, exists) + # Check on GitHub. + if mode == "github": + txt = _get_gh_pr_list() + # ``` + # > gh pr list -s all --limit 10000 | grep AmpTask2163 + # 347 AmpTask2163_Implement_tiled_backtesting_1 AmpTask2163 ... MERGED + # ``` + # The text is separated by tabs. + # + # If there are no issues on the GitHub repo, just return. + # ``` + # > gh pr list -s all --limit 1000 + # no pull requests match your search in causify-ai/sports_analytics + # ``` + if txt == "": + return False + for line in txt.split("\n"): + # number, GH branch name, Git branch name, status. + fields = line.split("\t") + # fields=['179', + # 'CmTask2914: Add end-to-end unit test for prod reconcile', + # 'CmTask2914_Add_end_to_end_unit_test_around_the_prod_reconciliation', + # 'DRAFT', '2022-09-27 19:56:50 +0000 UTC'] + hdbg.dassert_lte(4, len(fields), "fields=%s", fields) + number, gh_branch_name, git_branch_name = fields[:3] + _ = number, gh_branch_name + if branch_name == git_branch_name: + exists = True + _LOG.debug( + "branch_name='%s' on github: exists=%s", branch_name, exists + ) + return exists + + +def _get_branch_next_name_linear_scan( + dir_name: str, + curr_branch_name: str, + *, + max_num_ids: int = 100, + log_verb: int = logging.DEBUG, +) -> str: + """ + Find the next available branch name using linear scanning (fallback method). + + Tries branch names sequentially until finding one that doesn't exist. + + :param dir_name: directory containing the git repository + :param curr_branch_name: current branch name (e.g., "gp_scratch") + :param max_num_ids: maximum number of IDs to check + :param log_verb: logging verbosity level + :return: next available branch name + """ + for i in range(1, max_num_ids): + new_branch_name = f"{curr_branch_name}_{i}" + _LOG.info("Trying branch name '%s' ...", new_branch_name) + mode = "all" + exists = does_branch_exist(new_branch_name, mode, dir_name=dir_name) + _LOG.log(log_verb, "-> exists=%s", exists) + if not exists: + _LOG.log(log_verb, "new_branch_name='%s'", new_branch_name) + return new_branch_name + raise ValueError( + f"Can't find the next branch name for '{curr_branch_name}' " + f"within {max_num_ids} ids" + ) + + +def get_branch_next_name( + dir_name: str = ".", + *, + curr_branch_name: Optional[str] = None, + log_verb: int = logging.DEBUG, + method: str = "auto", +) -> str: + """ + Return a name derived from the branch so that the branch doesn't exist. + + E.g., `AmpTask1903_Implemented_system_Portfolio` -> + `AmpTask1903_Implemented_system_Portfolio_3` + + :param dir_name: directory containing the git repository + :param curr_branch_name: branch name to use (if None, gets current branch) + :param log_verb: logging verbosity level + :param method: method to use ('auto' tries fast first, 'github_api', 'linear_scan') + :return: next available branch name + """ + if curr_branch_name is None: + curr_branch_name = get_branch_name(dir_name=dir_name) + hdbg.dassert_ne( + curr_branch_name, "master", "Cannot get next name for 'master' branch" + ) + _LOG.log(log_verb, "curr_branch_name='%s'", curr_branch_name) + max_num_ids = 100 + hdbg.dassert_in( + method, ["auto", "github_api", "linear_scan"], "Invalid method specified" + ) + # Try GitHub API method first (faster) if requested or on auto mode. + next_name: Optional[str] = None + if method in ("auto", "github_api"): + next_name = _get_branch_next_name_via_github_api( + curr_branch_name, + max_num_ids=max_num_ids, + ) + if next_name is None and method == "github_api": + raise ValueError("GitHub API method requested but failed") + # Fall back to linear scanning if GitHub API failed in auto mode. + if next_name is None and method == "auto": + _LOG.warning("GitHub API method failed, falling back to linear scan") + next_name = _get_branch_next_name_linear_scan( + dir_name, + curr_branch_name, + max_num_ids=max_num_ids, + log_verb=log_verb, + ) + else: + # Fall back to linear scanning method when explicitly requested. + next_name = _get_branch_next_name_linear_scan( + dir_name, + curr_branch_name, + max_num_ids=max_num_ids, + log_verb=log_verb, + ) + hdbg.dassert_ne(next_name, None) + return cast(str, next_name) + + +def get_branch_hash(dir_name: str = ".") -> str: + """ + Return the hash of the commit right before the branch was created. + + This finds the merge-base between the current branch and master, which is + the commit where the branch was created. + + :param dir_name: directory containing the git repository + :return: the hash of the commit where the branch diverged from master + """ + curr_branch_name = get_branch_name(dir_name=dir_name) + hdbg.dassert_ne( + curr_branch_name, "master", "Cannot get branch hash for 'master' branch" + ) + _LOG.debug("curr_branch_name=%s", curr_branch_name) + cmd = f"cd {dir_name} && git merge-base master {curr_branch_name}" + _, hash_ = hsystem.system_to_string(cmd) + hash_ = hash_.rstrip("\n").lstrip("\n") + hdbg.dassert_eq( + len(hash_.split("\n")), 1, "Expected single hash line from merge-base" + ) + return hash_ + + +# ############################################################################# + + +@functools.lru_cache() +def is_inside_submodule(git_dir: str = ".") -> bool: + """ + Return whether a dir is inside a Git submodule or a Git supermodule. + + We determine this by checking if the current Git repo is included inside another Git repo. + + :param git_dir: directory to check + :return: True if the directory is inside a submodule + """ + cmd = [] + # Go to the directory. + cmd.append(f"cd {git_dir}") + # > cd im/ + # > git rev-parse --show-toplevel + # /Users/saggese/src/.../amp + cmd.append('cd "$(git rev-parse --show-toplevel)/.."') + # > git rev-parse --is-inside-work-tree + # true + cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") + # Execute the command chain and check the return code. + cmd_as_str = " && ".join(cmd) + rc = hsystem.system(cmd_as_str, abort_on_error=False) + ret: bool = rc == 0 + return ret + + +# ############################################################################# +# Git submodule functions +# ############################################################################# + + +@functools.lru_cache() +def get_client_root(super_module: bool) -> str: + """ + Return the full path of the root of the Git client. + + E.g., `/Users/saggese/src/.../amp`. + + :param super_module: if True use the root of the Git super_module, + if we are in a submodule. Otherwise use the Git sub_module root + """ + if super_module and is_inside_submodule(): + # https://stackoverflow.com/questions/957928 + # > cd /Users/saggese/src/.../amp + # > git rev-parse --show-superproject-working-tree + # /Users/saggese/src/... + cmd = "git rev-parse --show-superproject-working-tree" + else: + # > git rev-parse --show-toplevel + # /Users/saggese/src/.../amp + cmd = "git rev-parse --show-toplevel" + # TODO(gp): Use system_to_one_line(). + _, out = hsystem.system_to_string(cmd) + out = out.rstrip("\n") + hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") + client_root: str = os.path.realpath(out) + return client_root + + +# TODO(gp): Replace `get_client_root` with this. +# TODO(gp): -> get_client_root2() or get_outermost_supermodule_root() +def find_git_root(path: str = ".") -> str: + """ + Find recursively the dir of the outermost super module. + + This function traverses the directory hierarchy upward from a specified + starting path to find the root directory of a Git repository. + It supports: + - standard git repository: where a `.git` directory exists at the root + - submodule: where repository is nested inside another, and the `.git` file contains + a `gitdir:` reference to the submodule's actual Git directory + - linked repositories: where the `.git` file points to a custom Git directory + location, such as in Git worktrees or relocated `.git` directories + + :param path: starting file system path. Defaults to the current directory (".") + :return: absolute path to the top-level Git repository directory + """ + import helpers.hio as hio + + path = os.path.abspath(path) + git_root_dir = None + while True: + git_dir = os.path.join(path, ".git") + _LOG.debug("git_dir=%s", git_dir) + # Check if `.git` is a directory which indicates a standard Git repository. + if os.path.isdir(git_dir): + # Found the Git root directory. + git_root_dir = path + break + # Check if `.git` is a file which indicates submodules or linked setups. + if os.path.isfile(git_dir): + txt = hio.from_file(git_dir) + lines = txt.split("\n") + for line in lines: + # Look for a `gitdir:` line that specifies the linked directory. + # Example: `gitdir: ../.git/modules/helpers_root` (submodule) + # or `gitdir: /path/to/.git/worktrees/name` (worktree). + if line.startswith("gitdir:"): + git_dir_path = line.split(":", 1)[1].strip() + _LOG.debug("git_dir_path=%s", git_dir_path) + # For worktrees, the current path is the root of the worktree. + # The worktree's `.git` file points to the shared git directory + # (e.g., main_repo/.git/worktrees/worktree_name). + if ".git/worktrees/" in git_dir_path: + git_root_dir = path + else: + # For other linked setups (submodules, custom .git directory), + # traverse up to find the root of the target repository. + abs_git_dir = os.path.abspath( + os.path.join(path, git_dir_path) + ) + # Traverse up to find the top-level `.git` directory. + while True: + # Check if the current directory is a `.git` directory. + if os.path.basename(abs_git_dir) == ".git": + git_root_dir = os.path.dirname(abs_git_dir) + # Found the root. + break + # Move one level up in the directory structure. + parent = os.path.dirname(abs_git_dir) + # Reached the filesystem root without finding the `.git` directory. + hdbg.dassert_ne( + parent, + abs_git_dir, + "Top-level .git directory not found.", + ) + # Continue traversing up. + abs_git_dir = parent + break + # Exit the loop if the Git root directory is found. + if git_root_dir is not None: + break + # Move up one level in the directory hierarchy. + parent = os.path.dirname(path) + # Reached the filesystem root without finding `.git`. + hdbg.dassert_ne( + parent, + path, + "No .git directory or file found in any parent directory.", + ) + # Update the path to the parent directory for the next iteration. + path = parent + hdbg.dassert_is_not( + git_root_dir, None, "Git root directory should have been found" + ) + return str(git_root_dir) + + +# ############################################################################# + + +# TODO(gp): There are several functions doing the same work. +# helpers_root/helpers/hgit.py:827:def find_file_in_git_tree( +# helpers_root/helpers/hsystem.py:757:def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: +def find_file(file_name: str, *, dir_path: Optional[str] = None) -> str: + """ + Find a file within a directory hierarchy, excluding version control and cache dirs. + + Searches for the file starting from a directory, skipping .git and .mypy_cache + to avoid expensive traversals. + + :param file_name: the name of the file to find + :param dir_path: the directory to start the search from (defaults to git root) + :return: the first absolute path to the file found + """ + if dir_path is None: + dir_path = find_git_root() + _LOG.debug(hprint.to_str("dir_path")) + cmd = ( + rf"find {dir_path} " + + r"\( -path '*/.git' -o -path '*/.mypy_cache' \) -prune " + + rf'-o -name "{file_name}" -print' + ) + _LOG.debug(hprint.to_str("cmd")) + _, res = hsystem.system_to_one_line(cmd) + hdbg.dassert_ne(res, "Can't find file '%s' in '%s'", file_name, dir_path) + return res + + +def _is_repo(repo_short_name: str) -> bool: + """ + Check if the current directory is in a repository with the given short name. + + Uses repo config to determine the repository type without relying on directory names. + + :param repo_short_name: the short name of the repository to check (e.g., "helpers", "amp") + :return: True if the current directory is in the specified repository + """ + import helpers.repo_config_utils as hrecouti + + curr_repo_short_name = hrecouti.get_repo_config().get_repo_short_name() + is_repo = bool(curr_repo_short_name == repo_short_name) + return is_repo + + +def is_helpers() -> bool: + """ + Return whether we are inside `helpers` repo. + + Either as super module, or a sub module depending on a current + working directory. + """ + return _is_repo("helpers") + + +def find_helpers_root(dir_path: str = ".") -> str: + """ + Find the root directory of the `helpers` repository. + + If the current directory is within the `helpers` repository, the root of the + repository is returned. Otherwise, the function searches for the `helpers_root` + directory starting from the root of the repository. + + :param dir_path: starting directory for the search + :return: absolute path to the `helpers_root` directory + """ + with hsystem.cd(dir_path): + git_root = find_git_root() + if is_helpers(): + # If we are in `helpers` repo as supermodule, its root is the helpers_root. + cmd = "git rev-parse --show-toplevel" + _, helpers_root = hsystem.system_to_one_line(cmd) + else: + # Search for the `helpers_root` directory from the root of the supermodule. + helpers_root = find_file("helpers_root", dir_path=git_root) + helpers_root = os.path.abspath(helpers_root) + # Verify that the directory and `helpers` subdirectory exist. + hdbg.dassert_dir_exists( + helpers_root, "helpers_root directory must exist" + ) + hdbg.dassert_dir_exists( + os.path.join(helpers_root, "helpers"), + "helpers subdirectory must exist within helpers_root", + ) + return helpers_root + + +# ############################################################################# + + +def resolve_git_client_dir(git_client_name: str) -> str: + """ + Resolve the absolute path of the Git client directory. + + Supports both relative names (assumed to be in ~/src/) and absolute paths. + + :param git_client_name: the name of the Git client (e.g., "helpers1" + or "/Users/saggese/src/helpers1") + :return: the absolute path of the Git client directory + """ + if not os.path.isabs(git_client_name): + # Relative names are resolved relative to ~/src/ directory for convenience. + git_client_dir = os.path.join(os.environ["HOME"], "src", git_client_name) + else: + # Absolute paths are used as-is. + git_client_dir = git_client_name + _LOG.debug(hprint.to_str("git_client_dir")) + hdbg.dassert_dir_exists(git_client_dir, "Git client directory must exist") + return git_client_dir + + +def project_file_name_in_git_client( + file_name: str, + git_src_dir: str, + git_dst_dir: str, + *, + check_src_file_exists: bool = False, + check_dst_file_exists: bool = False, +) -> str: + """ + Find the file corresponding to `file_name` in `git_src_dir` for the client + `git_dst_dir`. + + This is useful when we want to find the file in a destination Git client + directory corresponding to a file in a source Git client directory. + + E.g., for: + ``` + file_name = '/Users/saggese/src/helpers1/dev_scripts_helpers/system_tools/path.py' + git_src_dir = '/Users/saggese/src/helpers1' + git_dst_dir = '/Users/saggese/src/helpers2' + ``` + the output is + `/Users/saggese/src/helpers2/dev_scripts_helpers/system_tools/path.py` + + :param file_name: the name of the file to find (which is under `git_src_dir`) + :param git_src_dir: the directory of the Git client from which `file_name` is + :param git_dst_dir: the directory of the Git client to which find the + corresponding file + :param check_src_file_exists: if True, check that `file_name` exists in + `git_src_dir` + :param check_dst_file_exists: if True, check that the file in `git_dst_dir` + exists + :return: the absolute path of the file in `git_dst_dir` + """ + if not os.path.isabs(file_name): + file_name = os.path.abspath(file_name) + if check_src_file_exists: + hdbg.dassert_file_exists(file_name) + if not os.path.isabs(git_src_dir): + git_src_dir = os.path.abspath(git_src_dir) + if not os.path.isabs(git_dst_dir): + git_dst_dir = os.path.abspath(git_dst_dir) + # Compute the relative path of the file in the source git client. + hdbg.dassert_is_path_abs(file_name) + hdbg.dassert_is_path_abs(git_src_dir) + rel_path = os.path.relpath(file_name, git_src_dir) + # Compute the absolute path of the file in the destination git client. + hdbg.dassert_is_path_abs(git_dst_dir) + dst_file_path = os.path.join(git_dst_dir, rel_path) + dst_file_path = os.path.abspath(dst_file_path) + if check_dst_file_exists: + hdbg.dassert_file_exists(dst_file_path) + return dst_file_path + + +def get_project_dirname(only_index: bool = False) -> str: + """ + Return the name of the project directory (e.g., `/Users/saggese/src/amp1` -> `amp1`). + + NOTE: This works properly only outside Docker. Inside Docker the Git client is + mapped to `/app`, so the result might be incorrect. + + :param only_index: if True, return only the numeric suffix (e.g., "1" from "amp1") + :return: the directory name or numeric index suffix + """ + # git_dir = get_client_root(super_module=True) + git_dir = find_git_root() + _LOG.debug("git_dir=%s", git_dir) + ret = os.path.basename(git_dir) + if only_index: + last_char = ret[-1] + hdbg.dassert( + last_char.isdigit(), + "The last char `%s` of the git dir `%s` is not a digit", + last_char, + git_dir, + ) + ret = last_char + _LOG.debug("ret=%s", ret) + return ret + + +def is_amp() -> bool: + """ + Return whether we are inside `amp` repo. + + Either as super module or a sub module depending on a current + working directory. + """ + return _is_repo("amp") or _is_repo("cmamp") or _is_repo("sorr") + + +def is_in_helpers_as_supermodule() -> bool: + """ + Return whether we are in the `helpers` repo and it's a super-module, i.e., + `helpers` by itself. + """ + return is_helpers() and not is_inside_submodule(".") + + +# TODO(gp): Be consistent with submodule and sub-module in the code. Same for +# supermodule. +def is_in_amp_as_submodule() -> bool: + """ + Return whether we are in the `amp` repo and it's a sub-module, e.g., of + `lm`. + """ + return is_amp() and is_inside_submodule(".") + + +def is_in_amp_as_supermodule() -> bool: + """ + Return whether we are in the `amp` repo and it's a super-module, i.e., + `amp` by itself. + """ + return is_amp() and not is_inside_submodule(".") + + +def is_amp_present(*, dir_name: str = ".") -> bool: + """ + Return whether the `amp` dir exists. + + This is a bit of an hacky way of knowing if there is the amp + submodule. + + :param dir_name: path to the directory where we want to + check the existence of `amp`. + """ + amp_path = os.path.join(dir_name, "amp") + return os.path.exists(amp_path) + + +# Using these functions is the last resort to skip / change the tests depending +# on the repo. We should control the tests through what functionalities they +# have, rather than the name of the repo. + + +def is_cmamp() -> bool: + """ + Return whether we are inside `cmamp` repo. + """ + return _is_repo("cmamp") + + +def is_lem() -> bool: + """ + Return whether we are inside `lem` repo. + """ + return _is_repo("lem") + + +def is_lime() -> bool: + """ + Return whether we are inside `lime` repo. + """ + return _is_repo("lime") + + +# ############################################################################# + + +def _get_submodule_hash(dir_name: str) -> str: + """ + Report the Git hash that a submodule is at from the supermodule perspective. + + Uses git ls-tree to get the submodule commit hash from the parent repository. + > git ls-tree master | grep + 160000 commit 0011776388b4c0582161eb2749b665fc45b87e7e amp + + :param dir_name: the name of the submodule directory + :return: the git commit hash of the submodule + """ + hdbg.dassert_path_exists(dir_name) + # Use git ls-tree to get the submodule entry which includes its hash. + cmd = f"git ls-tree master | grep {dir_name}" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + _LOG.debug("output=%s", output) + # Parse the output; format is: "160000 commit ". + data: List[str] = output.split() + _LOG.debug("data=%s", data) + # Extract the hash from the third field (index 2). + git_hash = data[2] + return git_hash + + +@functools.lru_cache() +def get_path_from_supermodule() -> Tuple[str, str]: + """ + Return the path to the Git repo including the Git submodule for a submodule. + + Returns the superproject path and submodule path, or empty for a supermodule. + E.g., + - for amp included in another repo returns 'amp' + - for amp without supermodule returns '' + + :return: tuple of (superproject_path, submodule_path) + """ + # Get the superproject working tree path. + cmd = "git rev-parse --show-superproject-working-tree" + # > cd /Users/saggese/src/.../lm/amp + # > git rev-parse --show-superproject-working-tree + # /Users/saggese/src/.../lm + # + # > cd /Users/saggese/src/.../lm + # > git rev-parse --show-superproject-working-tree + # (No result) + superproject_path: str = hsystem.system_to_one_line(cmd)[1] + _LOG.debug("superproject_path='%s'", superproject_path) + # Query the .gitmodules file to get the path for the current submodule. + cmd = ( + f"git config --file {superproject_path}/.gitmodules --get-regexp path" + '| grep $(basename "$(pwd)")' + "| awk '{ print $2 }'" + ) + # > git config --file /Users/saggese/src/.../.gitmodules --get-regexp path + # submodule.amp.path amp + submodule_path: str = hsystem.system_to_one_line(cmd)[1] + _LOG.debug("submodule_path='%s'", submodule_path) + return superproject_path, submodule_path + + +@functools.lru_cache() +def get_submodule_paths() -> List[str]: + """ + Return the path of the submodules in this repo. + + :return: list of submodule paths, e.g., ["amp"] or [] + """ + # Query .gitmodules to get submodule paths. + # > git config --file .gitmodules --get-regexp path + # submodule.amp.path amp + cmd = "git config --file .gitmodules --get-regexp path | awk '{ print $2 }'" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug("txt=%s", txt) + # Convert the output string to a list of paths. + files: List[str] = hsystem.text_to_list(txt) + _LOG.debug("files=%s", files) + return files + + +def has_submodules() -> bool: + """ + Return whether the repository has any submodules configured. + + :return: True if the repository contains submodules + """ + return len(get_submodule_paths()) > 0 + + +# ############################################################################# + + +def _get_hash(git_hash: str, short_hash: bool, num_digits: int = 8) -> str: + """ + Return the git hash, optionally shortened. + + :param git_hash: the full git hash + :param short_hash: if True, return only the first num_digits characters + :param num_digits: number of digits for short hash + :return: the git hash or shortened version + """ + hdbg.dassert_lte(1, num_digits) + # Return shortened hash if requested, otherwise return full hash. + if short_hash: + ret = git_hash[:num_digits] + else: + ret = git_hash + return ret + + +def _group_hashes(head_hash: str, remh_hash: str, subm_hash: str) -> str: + """ + Group multiple hashes and display which ones are equal. + + Transform three hashes into a string that shows which ones are identical. + For example, if head_hash == remh_hash, display "head_hash = remh_hash = ". + + :param head_hash: the head hash + :param remh_hash: the remote head hash + :param subm_hash: the submodule hash + :return: formatted string showing hash equality + """ + # Build a mapping from hash names to their values. + map_ = collections.OrderedDict() + map_["head_hash"] = head_hash + map_["remh_hash"] = remh_hash + if subm_hash: + map_["subm_hash"] = subm_hash + # Invert the mapping to group identical hashes together. + inv_map = collections.OrderedDict() + for k, v in map_.items(): + if v not in inv_map: + inv_map[v] = [k] + else: + inv_map[v].append(k) + # Format the output so equal hashes are grouped together. + txt = [] + for k, v in inv_map.items(): + # Transform: + # ('a2bfc704', ['head_hash', 'remh_hash']) + # into + # 'head_hash = remh_hash = a2bfc704' + txt.append(f"{' = '.join(v)} = {k}") + txt = "\n".join(txt) + return txt + + +# ############################################################################# +# GitHub repository name +# ############################################################################# + + +# All functions should take as input `repo_short_name` and have a switch `mode` +# to distinguish full vs short repo name. + +# TODO(gp): Maybe rename full -> long to keep it more symmetric "short vs long". + + +def _parse_github_repo_name(repo_name: str) -> Tuple[str, str]: + """ + Parse a repo name from `git remote`. + + The supported formats are both SSH and HTTPS, e.g., + - `git@github.com:alphamatic/amp` + - `https://github.com/alphamatic/amp` + + For both of these strings the function returns ("github.com", "alphamatic/amp"). + """ + # Try to parse the SSH format, e.g., `git@github.com:alphamatic/amp` + m = re.match(r"^git@(\S+.com):(\S+)$", repo_name) + if not m: + # Try tp parse the HTTPS format, e.g., `https://github.com/alphamatic/amp` + m = re.match(r"^https://(\S+.com)/(\S+)$", repo_name) + hdbg.dassert(m, "Can't parse '%s'", repo_name) + # The linter doesn't understand that `dassert` is equivalent to an + # `assert`. + assert m is not None + host_name = m.group(1) + repo_name = m.group(2) + _LOG.debug("host_name=%s repo_name=%s", host_name, repo_name) + # We expect something like "alphamatic/amp". + m = re.match(r"^\S+/\S+$", repo_name) + hdbg.dassert(m, "repo_name='%s'", repo_name) + # The linter doesn't understand that `dassert` is equivalent to an + # `assert`. + assert m is not None + # origin git@github.com:.../ORG_....git (fetch) + suffix_to_remove = ".git" + if repo_name.endswith(suffix_to_remove): + repo_name = repo_name[: -len(suffix_to_remove)] + return host_name, repo_name + + +def get_repo_full_name_from_dirname( + dir_name: str, include_host_name: bool +) -> str: + """ + Return the full name of the repo in a directory. + + E.g., "alphamatic/amp" or "github.com/alphamatic/amp" (if hostname included). + + This function relies on `git remote` to extract the origin URL. + + :param dir_name: directory containing the git repository + :param include_host_name: if True, prepend the GitHub hostname (e.g., + "github.com/alphamatic/amp") + :return: the full name of the repo + - E.g., "alphamatic/amp", "github.com/alphamatic/amp". + """ + hdbg.dassert_path_exists(dir_name) + cmd = f"cd {dir_name}; (git remote -v | grep origin | grep fetch)" + _, output = hsystem.system_to_string(cmd) + # > git remote -v + # origin git@github.com:alphamatic/amp (fetch) + # origin git@github.com:alphamatic/amp (push) + data: List[str] = output.split() + _LOG.debug("data=%s", data) + hdbg.dassert_eq(len(data), 3, "Expected 3 fields from git remote output") + # Extract the origin URL (second field). + repo_name = data[1] + # Parse SSH/HTTPS URL into host and org/repo parts. + host_name, repo_name = _parse_github_repo_name(repo_name) + if include_host_name: + res = f"{host_name}/{repo_name}" + else: + res = repo_name + return res + + +# ############################################################################# +# Git hash +# ############################################################################# + + +def get_head_hash(dir_name: str = ".", short_hash: bool = False) -> str: + """ + Return the git commit hash of a repository with submodule/random suffix. + + Gets the HEAD commit hash and appends either the amp submodule hash (if present) + or a random suffix to make the hash unique across different module configurations. + + ``` + > git rev-parse HEAD + 4759b3685f903e6c669096e960b248ec31c63b69 + ``` + + :param dir_name: directory containing the git repository + :param short_hash: if True, return abbreviated hash (useful when combined with suffix) + :return: the commit hash with submodule/random suffix (e.g., "4759b36-abc123") + """ + hdbg.dassert_path_exists(dir_name) + # Get the commit hash, optionally abbreviated to 7 characters. + opts = "--short " if short_hash else " " + cmd = f"cd {dir_name} && git rev-parse {opts}HEAD" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + # Check whether we are building an orange image. If the condition + # is True, add './amp' hash to the tag as well. + if is_amp_present(dir_name=dir_name): + amp_hash = get_head_hash(os.path.join(dir_name, "amp"), short_hash=True) + output = output + "-" + amp_hash + else: + # Use random suffix when no submodule exists (needed for Docker image tags). + random_string = "".join( + random.choices(string.ascii_lowercase + string.digits, k=3) + ) + output = output + "-" + random_string + return output + + +def get_remote_head_hash(dir_name: str) -> str: + """ + Return the commit hash that the remote repository's HEAD points to. + + Queries the remote origin to get the current HEAD hash without fetching. + + :param dir_name: directory containing the git repository + :return: the remote HEAD commit hash + """ + hdbg.dassert_path_exists(dir_name) + sym_name = get_repo_full_name_from_dirname(dir_name, include_host_name=False) + cmd = f"git ls-remote git@github.com:{sym_name} HEAD 2>/dev/null" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + # > git ls-remote git@github.com:alphamatic/amp HEAD 2>/dev/null + # 921676624f6a5f3f36ab507baed1b886227ac2e6 HEAD + return output + + +def report_submodule_status(dir_names: List[str], short_hash: bool) -> str: + """ + Return a formatted string reporting the status of git repositories. + + Reports whether each directory is a submodule, current branch, and commit hashes + (local, remote, and submodule hash if applicable). + + :param dir_names: list of directory paths to report on + :param short_hash: if True, truncate hashes to 8 characters + :return: formatted string with status information for each directory + """ + txt = [] + for dir_name in dir_names: + txt.append(f"dir_name='{dir_name}'") + txt.append(f" is_inside_submodule: {is_inside_submodule(dir_name)}") + # Get branch name, highlighting if not on master (likely indicates incomplete work). + branch_name = get_branch_name(dir_name) + if branch_name != "master": + branch_name = f"!!! {branch_name} !!!" + txt.append(f" branch: {branch_name}") + # Get local and remote commit hashes. + head_hash = get_head_hash(dir_name) + head_hash = _get_hash(head_hash, short_hash) + txt.append(f" head_hash: {head_hash}") + remh_hash = get_remote_head_hash(dir_name) + remh_hash = _get_hash(remh_hash, short_hash) + txt.append(f" remh_hash: {remh_hash}") + # Get submodule hash if this is not the root directory. + if dir_name != ".": + subm_hash = _get_submodule_hash(dir_name) + subm_hash = _get_hash(subm_hash, short_hash) + txt.append(f" subm_hash: {subm_hash}") + txt_as_str = "\n".join(txt) + return txt_as_str + + +def get_repo_full_name_from_client(super_module: bool) -> str: + """ + Return the full name of the repo (e.g., "alphamatic/amp") from a Git + client. + + :param super_module: like in get_client_root() + """ + # Get the Git remote in the dir containing the Git repo. + git_dir = get_client_root(super_module) + repo_name = get_repo_full_name_from_dirname(git_dir, include_host_name=False) + return repo_name + + +def is_cwd_git_repo() -> bool: + """ + Return whether the current directory is a git repository root. + + Checks for the presence of a .git file or directory in the current location. + + :return: True if .git exists in the current directory + """ + return os.path.exists(".git") + + +# ############################################################################# +# Git path +# ############################################################################# + + +# TODO(gp): Use find_file +@functools.lru_cache() +def find_file_in_git_tree( + file_name: str, super_module: bool = True, remove_tmp_base: bool = False +) -> str: + """ + Find the path of a file in a Git tree. + + We get the Git root and then search for the file from there. + """ + root_dir = get_client_root(super_module=super_module) + cmd = rf"find {root_dir} -name '{file_name}' -not -path '*/.git/*'" + if remove_tmp_base: + cmd += r" -not -path '*/tmp\.base/*'" + _, file_name_out = hsystem.system_to_one_line(cmd) + _LOG.debug(hprint.to_str("file_name_out")) + hdbg.dassert_ne( + file_name_out, + "", + "Can't find file '%s' in dir '%s'", + file_name, + root_dir, + ) + file_name_out: str = os.path.abspath(file_name_out) + hdbg.dassert_path_exists(file_name_out) + return file_name_out + + +def get_path_from_git_root( + file_name: str, + super_module: bool, + *, + git_root: Optional[str] = None, +) -> str: + """ + Get the path of `file_name` from the root of the Git client. + + E.g., in Docker: + - `super_module=True` -> git_root=/app + - `super_module=False` -> git_root=/app/amp + + :param super_module: like get_client_root() + """ + # Get the root of the Git client. + if git_root is None: + git_root = get_client_root(super_module) + # + git_root = os.path.normpath(git_root) + _LOG.debug("git_root=%s", git_root) + file_name = os.path.normpath(file_name) + _LOG.debug("file_name=%s", file_name) + if file_name.startswith(git_root): + # Remove the `git_root` from file_name. + ret = os.path.relpath(file_name, git_root) + else: + # If the file is not under the root, we can't normalize it. + raise ValueError( + f"Can't normalize file_name='{file_name}' for git_root='{git_root}'" + ) + _LOG.debug( + "file_name=%s, git_root=%s (super_module=%s) -> ret=%s", + file_name, + git_root, + super_module, + ret, + ) + return str(ret) + + +# TODO(gp): Rewrite this function in a better way. +@functools.lru_cache() +def get_amp_abs_path() -> str: + """ + Return the absolute path of `amp` dir. + """ + repo_sym_name = get_repo_full_name_from_client(super_module=False) + _LOG.debug("repo_sym_name=%s", repo_sym_name) + # + repo_sym_names = ["alphamatic/amp"] + import helpers.repo_config_utils as hrecouti + + extra_amp_repo_sym_name = ( + hrecouti.get_repo_config().get_extra_amp_repo_sym_name() + ) + repo_sym_names.append(extra_amp_repo_sym_name) + _LOG.debug("repo_sym_names=%s", repo_sym_names) + # + if repo_sym_name in repo_sym_names: + # If we are in the amp repo, then the git client root is the amp + # directory. + git_root = get_client_root(super_module=False) + amp_dir = git_root + else: + # If we are not in the amp repo, then look for the amp dir. + amp_dir = find_file_in_git_tree( + "amp", super_module=True, remove_tmp_base=True + ) + git_root = get_client_root(super_module=True) + amp_dir = os.path.join(git_root, amp_dir) + amp_dir = os.path.abspath(amp_dir) + # Sanity check. + hdbg.dassert_dir_exists(amp_dir) + return amp_dir + + +# TODO(gp): Is this needed? +def get_repo_dirs() -> List[str]: + """ + Return the list of the repo repositories, e.g., `[".", "amp", "infra"]`. + """ + dir_names = ["."] + dirs = ["amp"] + for dir_name in dirs: + if os.path.exists(dir_name): + dir_names.append(dir_name) + return dir_names + + +# TODO(gp): It should go in hdocker? +# TODO(gp): There are functions in hdocker.py that might be more general than +# this. +def find_docker_file( + file_name: str, + *, + root_dir: str = ".", + dir_depth: int = -1, + mode: str = "return_all_results", + candidate_files: Optional[List[str]] = None, +) -> List[str]: + """ + Convert a file or dir that was generated inside Docker to a file in the + current Git client. + + This operation is best-effort since it might not be able to find the + corresponding file in the current repo. + + E.g., + - A file like '/app/amp/core/dataflow_model/utils.py', in a Docker container + with Git root in '/app' becomes 'amp/core/dataflow_model/utils.py' + - For a file like '/app/amp/core/dataflow_model/utils.py' outside Docker, we + look for the file 'dataflow_model/utils.py' in the current client and + then normalize with respect to the + + :param dir_depth: same meaning as in `find_file_with_dir()` + :param mode: same as `system_interaction.select_result_file_from_list()` + :param candidate_files: list of results from the `find` command for unit + test mocking + :return: the best guess for the file name corresponding to `file_name` + """ + _LOG.debug(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(file_name, str) + # Clean up file name. + file_name = os.path.normpath(file_name) + _LOG.debug("file_name=%s", file_name) + # Find the file in the dir. + file_names = hsystem.find_file_with_dir( + file_name, + root_dir=root_dir, + dir_depth=dir_depth, + mode=mode, + candidate_files=candidate_files, + ) + # Purify. + _LOG.debug("Purifying file_names=%s", file_names) + file_names = [ + os.path.relpath(file_name, root_dir) for file_name in file_names + ] + return file_names + + +# TODO(gp): Use get_head_hash() and remove this. +def get_current_commit_hash(dir_name: str = ".") -> str: + """ + Return the full SHA-1 hash of the current HEAD commit. + + :param dir_name: directory containing the git repository + :return: the full commit hash (e.g., "0011776388b4c0582161eb2749b665fc45b87e7e") + """ + hdbg.dassert_path_exists(dir_name) + cmd = f"cd {dir_name} && git rev-parse HEAD" + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, sha = data + # 0011776388b4c0582161eb2749b665fc45b87e7e + _LOG.debug("sha=%s", sha) + return sha + + +# ############################################################################# +# Modified files +# ############################################################################# + + +def get_modified_files( + dir_name: str = ".", remove_files_non_present: bool = True +) -> List[str]: + """ + Return the files that are added and modified in the Git client. + + In other words the files that will be committed with a `git commit -am ...`. + Equivalent to `dev_scripts/git_files.sh` + + :param dir_name: directory with Git client + :param remove_files_non_present: remove the files that are not + currently present in the client + :return: list of files + """ + # If the client status is: + # > git status -s + # AM dev_scripts/infra/ssh_tunnels.py + # M helpers/git.py + # ?? linter_warnings.txt + # + # The result is: + # > git diff --cached --name-only + # dev_scripts/infra/ssh_tunnels.py + # + # > git ls-files -m + # dev_scripts/infra/ssh_tunnels.py + # helpers/git.py + cmd = "(git diff --cached --name-only; git ls-files -m) | sort | uniq" + files: List[str] = hsystem.system_to_files( + cmd, dir_name, remove_files_non_present + ) + return files + + +# TODO(gp): -> ...previously... +def get_previous_committed_files( + dir_name: str = ".", + num_commits: int = 1, + remove_files_non_present: bool = True, +) -> List[str]: + """ + Return files changed in the Git client in the last `num_commits` commits. + + Equivalent to `dev_scripts/git_previous_commit_files.sh` + + :param dir_name: directory with Git client + :param num_commits: how many commits in the past to consider + :param remove_files_non_present: remove the files that are not + currently present in the client + :return: list of files + """ + cmd = [] + cmd.append('git show --pretty="" --name-only') + cmd.append(f'$(git log --author "$(git config user.name)" -{num_commits}') + cmd.append(r"""| \grep "^commit " | perl -pe 's/commit (.*)/$1/')""") + cmd_as_str = " ".join(cmd) + files: List[str] = hsystem.system_to_files( + cmd_as_str, dir_name, remove_files_non_present + ) + return files + + +def get_modified_files_in_branch( + dst_branch: str, dir_name: str = ".", remove_files_non_present: bool = True +) -> List[str]: + """ + Return files modified in the current branch with respect to `dst_branch`. + + Equivalent to `git diff --name-only master...` + Please remember that there is a difference between `master` and `origin/master`. + See https://stackoverflow.com/questions/18137175 + + :param dir_name: directory with Git client + :param dst_branch: branch to compare to, e.g., `master`, `HEAD` + :param remove_files_non_present: remove the files that are not + currently present in the client + :return: list of files + """ + if dst_branch == "HEAD": + target = dst_branch + else: + target = f"{dst_branch}..." + cmd = f"git diff --name-only {target}" + files: List[str] = hsystem.system_to_files( + cmd, dir_name, remove_files_non_present + ) + return files + + +def get_modified_and_untracked_files( + repo_path: str = ".", *, mode: str = "all" +) -> List[str]: + """ + Get list of modified and untracked files in a git repository. + + Excludes files from submodules and deleted files. + + Mode options: + - "all": Both modified and untracked files (default, current behavior) + - "modified": Only files with changes (staged, modified, added, renamed, copied) + - "untracked": Only untracked files + + This includes (when mode="all"): + - Modified files (both staged and unstaged) + - Untracked files + - Cached/staged files + + The function uses `git status --porcelain -u` which shows all changes + including cached (staged) files. + + :param repo_path: Path to the git repository + :param mode: Filter mode: "all", "modified", or "untracked" + :return: List of file paths relative to repo_path + """ + hdbg.dassert_dir_exists(repo_path) + # Validate mode. + valid_modes = ["all", "modified", "untracked"] + hdbg.dassert_in( + mode, + valid_modes, + "Invalid mode '%s'; must be one of: %s", + mode, + ", ".join(valid_modes), + ) + # Get modified and untracked files, excluding submodules. + # The command uses: + # - git status --porcelain -u: Get status in machine-readable format with untracked files + # This includes both cached (staged) and modified files + # Status codes: ?? = untracked, M/A/R/C/D = modified/added/renamed/copied/deleted + cmd = f"cd {repo_path} && git status --porcelain -u" + _, output = hsystem.system_to_string(cmd, abort_on_error=False) + # Get submodule paths to exclude. + submodule_cmd = ( + f"cd {repo_path} && " + "git config -f .gitmodules --get-regexp path 2>/dev/null || true" + ) + _, submodule_output = hsystem.system_to_string( + submodule_cmd, abort_on_error=False + ) + submodule_paths = set() + for line in submodule_output.strip().split("\n"): + if line: + # Format: "submodule..path " + parts = line.split() + if len(parts) >= 2: + submodule_paths.add(parts[-1]) + # Parse output. + files = [] + for line in output.strip().split("\n"): + line = line.strip() + if not line: + continue + # Extract status code (first 2 characters) and filename (from position 3). + status_code = line[:2] if len(line) >= 2 else "" + file_name = line[3:].strip() if len(line) > 3 else "" + # Filter by mode. + if mode == "untracked": + # Untracked files have status "??" + if status_code != "??": + continue + elif mode == "modified": + # Modified files have any status other than "??" + if status_code == "??": + continue + # Skip submodule paths. + is_in_submodule = any( + file_name.startswith(subpath + "/") or file_name == subpath + for subpath in submodule_paths + ) + if is_in_submodule: + _LOG.debug("Skipping submodule file: %s", file_name) + continue + # Check if file exists (exclude deleted files). + file_path = os.path.join(repo_path, file_name) + if os.path.exists(file_path) and os.path.isfile(file_path): + files.append(file_name) + else: + _LOG.debug("Skipping non-existent or non-file: %s", file_path) + return files + + +def get_summary_files_in_branch( + dst_branch: str, + *, + dir_name: str = ".", +) -> str: + """ + Report summary of files in the current branch with respect to `dst_branch'. + + Same interface as `get_modified_files_in_branch`. + """ + # File types (from https://git-scm.com/docs/git-diff). + file_types = [ + ("added", "A"), + ("copied", "C"), + ("deleted", "D"), + ("modified", "M"), + ("renamed", "R"), + ("type changed", "T"), + ("unmerged", "U"), + ("unknown", "X"), + ("broken pairing", "B"), + ] + res = "" + for tag, diff_type in file_types: + cmd = f"git diff --diff-filter={diff_type} --name-only {dst_branch}..." + files = hsystem.system_to_files( + cmd, dir_name, remove_files_non_present=False + ) + _LOG.debug("files=%s", "\n".join(files)) + if files: + res += f"# {tag}: {len(files)}\n" + res += hprint.indent("\n".join(files)) + "\n" + res = res.rstrip("\n") + return res + + +# ############################################################################# +# Git commands. +# ############################################################################# + + +# TODO(gp): -> get_user_name() +@functools.lru_cache() +def get_git_name() -> str: + """ + Return the configured git user name from git config. + + Caches the result to avoid repeated config lookups. + + :return: the configured git user name (e.g., from user.name setting) + """ + cmd = "git config --get user.name" + # For some reason data is annotated as Any by mypy, instead of + # Tuple[int, str] so we need to cast it to the right value. + data: Tuple[int, str] = hsystem.system_to_one_line(cmd) + _, output = data + return output + + +def git_log(num_commits: int = 5, my_commits: bool = False) -> str: + """ + Return a formatted git log with graph, timestamps, and author information. + + Uses a custom pretty format to display commits in a user-friendly layout + with graph visualization, relative time, and author name. + + :param num_commits: number of commits to report + :param my_commits: if True, filter to only commits by the current git user + :return: formatted git log output + """ + cmd = [] + cmd.append("git log --date=local --oneline --graph --date-order --decorate") + cmd.append( + "--pretty=format:'%h %<(8)%aN% %<(65)%s (%>(14)%ar) %ad %<(10)%d'" + ) + cmd.append(f"-{num_commits}") + if my_commits: + # This doesn't work in a container if the user relies on `~/.gitconfig` to + # set the user name. + # TODO(gp): We should use `get_git_name()`. + cmd.append("--author $(git config user.name)") + cmd = " ".join(cmd) + data: Tuple[int, str] = hsystem.system_to_string(cmd) + _, txt = data + return txt + + +def git_stash_push( + prefix: str, msg: Optional[str] = None, log_level: int = logging.DEBUG +) -> Tuple[str, bool]: + """ + Stash current changes with a timestamped, labeled message. + + Creates a unique stash name from prefix, username, server, and timestamp to + enable tracking of which changes were stashed when and by whom. + + :param prefix: prefix for the stash tag (e.g., "backup", "work") + :param msg: optional message to append to the stash description + :param log_level: logging level for system output + :return: tuple of (stash_tag, was_stashed) indicating success + """ + import helpers.hdatetime as hdateti + + user_name = hsystem.get_user_name() + server_name = hsystem.get_server_name() + timestamp = hdateti.get_current_timestamp_as_string("naive_ET") + # Build unique tag from context to identify who stashed what when. + tag = f"{user_name}-{server_name}-{timestamp}" + tag = prefix + "." + tag + _LOG.debug("tag='%s'", tag) + cmd = "git stash push" + _LOG.debug("msg='%s'", msg) + push_msg = tag[:] + if msg: + push_msg += ": " + msg + cmd += f" -m '{push_msg}'" + hsystem.system(cmd, suppress_output=False, log_level=log_level) + # Verify that something was actually stashed (git stash push is silent on no-op). + cmd = rf"git stash list | \grep '{tag}' | wc -l" + _, output = hsystem.system_to_string(cmd) + was_stashed = int(output) > 0 + if not was_stashed: + msg = "Nothing was stashed" + _LOG.warning(msg) + # raise RuntimeError(msg) + return tag, was_stashed + + +def git_stash_apply(mode: str, log_level: int = logging.DEBUG) -> None: + """ + Apply or pop the most recent git stash. + + Displays the stash list before applying to help the user verify they're applying + the correct stash. + + :param mode: "apply" to keep the stash or "pop" to remove after applying + :param log_level: logging level for system output + """ + _LOG.debug("# Checking stash head ...") + cmd = "git stash list | head -3" + hsystem.system(cmd, suppress_output=False, log_level=log_level) + # Restore the stashed changes, either keeping or removing the stash. + _LOG.debug("# Restoring local changes...") + if mode == "pop": + cmd = "git stash pop --quiet" + elif mode == "apply": + cmd = "git stash apply --quiet" + else: + raise ValueError(f"mode='{mode}'") + hsystem.system(cmd, suppress_output=False, log_level=log_level) + + +# TODO(gp): Consider using this everywhere. Maybe it can simplify handling issues +# stemming from the super-module / sub-module repo. +def _get_git_cmd(super_module: bool) -> str: + """ + Build a git command prefix with explicit repository and working tree paths. + + Useful for running git commands from outside the repository or when working + with specific submodules/supermodules. + + :param super_module: if True, use supermodule root; else use current module root + :return: git command prefix (e.g., "git --git-dir=... --work-tree=...") + """ + cmd = [] + cmd.append("git") + client_root = get_client_root(super_module=super_module) + # Set the path to the repository (".git" directory), avoiding Git to search for + # it (from https://git-scm.com/docs/git) + cmd.append(f"--git-dir='{client_root}/.git'") + # Explicitly specify working tree location. + cmd.append(f"--work-tree='{client_root}'") + cmd = " ".join(cmd) + return cmd + + +def git_tag( + tag_name: str, super_module: bool = True, log_level: int = logging.DEBUG +) -> None: + """ + Create a git tag on the current commit (locally, not pushed). + + Overwrites existing tags with the same name (using -f flag). + + :param tag_name: the name of the tag to create + :param super_module: if True, tag the supermodule; else tag the current module + :param log_level: logging level for system output + """ + _LOG.debug("# Tagging current commit ...") + git_cmd = _get_git_cmd(super_module) + cmd = f"{git_cmd} tag -f {tag_name}" + _ = hsystem.system(cmd, suppress_output=False, log_level=log_level) + + +def git_push_tag( + tag_name: str, + remote: str = "origin", + super_module: bool = True, + log_level: int = logging.DEBUG, +) -> None: + """ + Push a git tag to the remote repository. + + :param tag_name: the name of the tag to push + :param remote: the remote name to push to (default: origin) + :param super_module: if True, tag the supermodule; else tag the current module + :param log_level: logging level for system output + """ + _LOG.debug("# Pushing current commit ...") + git_cmd = _get_git_cmd(super_module) + cmd = f"{git_cmd} push {remote} {tag_name}" + _ = hsystem.system(cmd, suppress_output=False, log_level=log_level) + + +def git_describe( + match: Optional[str] = None, log_level: int = logging.DEBUG +) -> str: + """ + Return the most recent git tag, or abbreviated commit hash if no tags exist. + + Useful for version identification and release tracking. + + :param match: optional glob pattern to filter tags (e.g., "cmamp-*") + :param log_level: logging level for system output + :return: the closest tag (e.g., "1.0.0") or short commit hash + """ + _LOG.debug("# Looking for version ...") + cmd = "git describe --tags --always --abbrev=0" + if match is not None: + hdbg.dassert_isinstance(match, str, "match pattern must be a string") + hdbg.dassert_ne(match, "", "match pattern cannot be empty") + cmd = f"{cmd} --match '{match}'" + num, tag = hsystem.system_to_one_line(cmd, log_level=log_level) + _ = num + return tag + + +def git_add_update( + file_list: Optional[List[str]] = None, log_level: int = logging.DEBUG +) -> None: + """ + Add files to the git staging area. + + If no file list is provided, adds all modified and deleted files (git add -u). + + :param file_list: list of specific files to add; if None, add all modified files + :param log_level: logging level for system output + """ + _LOG.debug("# Adding all changed files to staging ...") + cmd = f"git add {' '.join(file_list) if file_list is not None else '-u'}" + hsystem.system(cmd, suppress_output=False, log_level=log_level) + + +def fetch_origin_master_if_needed() -> None: + """ + Fetch the master branch from origin if running in a CI environment. + + In CI, master may not be fetched when testing a branch, but it's often needed + for tests that compare against baseline or merge behavior. This ensures master + is available if needed. + """ + if hserver.is_inside_ci(): + _LOG.warning("Running inside CI so fetching master") + cmd = "git branch -a" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug("%s=%s", cmd, txt) + cmd = r'git branch -a | egrep "\s+master\s*$" | wc -l' + # * (HEAD detached at pull/1337/merge) + # master + # remotes/origin/master + # remotes/pull/1337/merge + _, num = hsystem.system_to_one_line(cmd) + num = int(num) + _LOG.debug("num=%s", num) + if num == 0: + # See AmpTask1321 and AmpTask1338 for details. + cmd = "git fetch origin master:refs/remotes/origin/master" + hsystem.system(cmd) + cmd = "git branch --track master origin/master" + hsystem.system(cmd) + + +def is_client_clean( + dir_name: str = ".", + abort_if_not_clean: bool = False, +) -> bool: + """ + Return whether there are files modified, added, or removed in a directory. + + Ignores submodule changes (amp, helpers_root) to focus on actual code changes. + + :param dir_name: directory containing the git repository + :param abort_if_not_clean: if True and the client is not clean, + abort with a detailed message showing the modified files + :return: True if no files are modified (excluding submodules) + """ + _LOG.debug(hprint.to_str("abort_if_not_clean")) + files = get_modified_files(dir_name) + # Exclude submodule directories from consideration since their changes + # are tracked separately and don't affect code cleanliness. + if "amp" in files: + _LOG.warning("Skipping 'amp' in modified files") + files = [f for f in files if "amp" != f] + elif "helpers_root" in files: + _LOG.warning("Skipping 'helpers_root' in modified files") + files = [f for f in files if "helpers_root" != f] + # A Git client is clean iff there are no files in the index. + is_clean = len(files) == 0 + if abort_if_not_clean: + hdbg.dassert( + is_clean, "The Git client is not clean:\n%s", "\n".join(files) + ) + return is_clean + + +def delete_branches( + dir_name: str, + mode: str, + branches: List[str], + confirm_delete: bool, + abort_on_error: bool = True, +) -> None: + """ + Delete local or remote git branches. + + Optionally prompts the user for confirmation before performing deletion. + + :param dir_name: directory containing the git repository + :param mode: "local" for local branches or "remote" for remote branches + :param branches: list of branch names to delete + :param confirm_delete: if True, prompt user for confirmation before deletion + :param abort_on_error: if True, abort on any deletion error + """ + hdbg.dassert_isinstance( + branches, list, "branches must be a list, got type %s", type(branches) + ) + delete_cmd = f"cd {dir_name} && " + if mode == "local": + delete_cmd += "git branch -d" + elif mode == "remote": + delete_cmd += "git push origin --delete" + else: + raise ValueError(f"Invalid mode='{mode}'") + # Prompt for confirmation to prevent accidental deletion of important branches. + if confirm_delete: + branches_as_str = " ".join(branches) + msg = ( + hdbg.WARNING + + f": Delete {len(branches)} {mode} branch(es) '{branches_as_str}'?" + ) + hsystem.query_yes_no(msg, abort_on_no=True) + for branch in branches: + if mode == "remote": + prefix = "origin/" + hdbg.dassert( + branch.startswith(prefix), + "Remote branch '%s' needs to start with '%s'", + branch, + prefix, + ) + branch = branch[len(prefix) :] + cmd = f"{delete_cmd} {branch}" + hsystem.system( + cmd, + suppress_output=False, + log_level="echo", + abort_on_error=abort_on_error, + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py new file mode 100644 index 000000000..e796b865f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py @@ -0,0 +1,1183 @@ +""" +Use cases for this module are at: +helpers/notebooks/Master_how_to_use_hgoogle_drive_api.ipynb + +Import as: + +import helpers.hgoogle_drive_api as hgodrapi +""" + +import datetime +import importlib +import logging +import os +import re +import sys +from typing import List, Optional, Union + +# Keep try-except to avoid `ModuleNotFoundError` in CI/CD (HelpersTask #1183). +try: + # Authentication for Google API to produce credentials. + import google.oauth2.service_account as goasea + + # Google API client for service objects (e.g., Drive, Sheets, etc.) + import googleapiclient.discovery as godisc + + # Built on top of Google API to simplify interactions with Google Sheets. + import gspread + + _GOOGLE_API_AVAILABLE = True +except ImportError: + # If Google API packages are not installed, set placeholders. + _GOOGLE_API_AVAILABLE = False + +import pandas as pd + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg +import helpers.hmodule as hmodule +import helpers.hpandas as hpandas + +_LOG = logging.getLogger(__name__) + + +def install_needed_modules( + *, use_sudo: bool = True, venv_path: Optional[str] = None +) -> None: + """ + Install needed modules for Google Drive API. + + :param use_sudo: whether to use sudo to install the module + :param venv_path: path to the virtual environment E.g., + /Users/saggese/src/venv/client_venv.helpers + """ + hmodule.install_module_if_not_present( + "google", + package_name="google-auth", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + hmodule.install_module_if_not_present( + "googleapiclient", + package_name="google-api-python-client", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + hmodule.install_module_if_not_present( + "gspread", + package_name="gspread", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + # Reload this module (hgoogle_drive_api) if already imported + this_module_name = __name__ + if this_module_name in sys.modules: + importlib.reload(sys.modules[this_module_name]) + + +# ############################################################################# +# Credentials +# ############################################################################# + + +def get_credentials( + *, + service_key_path: Optional[str] = None, +) -> "goasea.Credentials": + """ + Get credentials for Google API with service account key. + + :param service_key_path: service account key file path. + :return: Google credentials. + """ + # service_key_path = "/home/.config/gspread_pandas/google_secret.json" + if not service_key_path: + service_key_path = os.path.join( + os.path.expanduser("~"), + ".config", + "gspread_pandas", + "google_secret.json", + ) + service_key_path = os.path.join(os.path.dirname(__file__), service_key_path) + # Download service.json from Google API, then save it as + # /home/.config/gspread_pandas/google_secret.json + # Instructions: https://gspread-pandas.readthedocs.io/en/latest/getting_started.html#client-credentials" + hdbg.dassert_file_exists( + service_key_path, + "Failed to read service key file: %s", + service_key_path, + ) + # Scopes required for making API calls. + scopes = [ + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/spreadsheets", + ] + creds = goasea.Credentials.from_service_account_file( + service_key_path, scopes=scopes + ) + return creds + + +# ############################################################################# +# Google Sheets API +# ############################################################################# + + +# TODO(gp): Extend this to work with v3, v4, etc. +# TODO(ai_gp): Make it private if it's not called by anybody else. +def get_sheets_service(credentials: "goasea.Credentials") -> "godisc.Resource": + """ + Get Google Sheets service with provided credentials. + + :param credentials: Google credentials object. + :return: Google Sheets service instance. + """ + # Ensure credentials are provided. + hdbg.dassert(credentials, "The 'credentials' parameter must be provided") + # Build the Sheets service. + sheets_service = godisc.build( + "sheets", "v4", credentials=credentials, cache_discovery=False + ) + return sheets_service + + +def _get_gsheet_id( + credentials: "goasea.Credentials", + sheet_id: str, + *, + tab_name: Optional[str] = None, +) -> str: + """ + Get the sheet ID from the sheet name in a Google Sheets document. + + :param credentials: Google credentials object. + :param sheet_id: ID of the Google Sheet document. + :param tab_name: Name of the sheet (tab) in the Google Sheets + document. + :return: Sheet ID of the sheet with the given name or the first + sheet if the name is not provided. + """ + sheets_service = get_sheets_service(credentials) + sheet_metadata = ( + sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() + ) + sheets = sheet_metadata.get("sheets", []) + if tab_name: + for sheet in sheets: + properties = sheet.get("properties", {}) + if properties.get("title") == tab_name: + return properties.get("sheetId") + raise ValueError(f"Sheet with name '{tab_name}' not found.") + # Return the ID of the first sheet if no sheet name is provided. + first_sheet_id = sheets[0].get("properties", {}).get("sheetId") + return first_sheet_id + + +def get_gsheet_name( + url: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Get the name of a Google Sheet from its URL. + + E.g., https://docs.google.com/spreadsheets/d/1GnnmtGTrHDwMP77VylEK0bSF_RLUV5BWf1iGmxuBQpI + -> pitchbook.Outreach_AI_companies + + :param url: URL of the Google Sheets file. + :param credentials: Google credentials object. + :return: Name of the Google Sheet (spreadsheet title). + """ + if credentials is None: + credentials = get_credentials() + # TODO(ai): Should we use the Sheets API instead? + client = gspread.authorize(credentials) + spreadsheet = client.open_by_url(url) + tab_name = spreadsheet.title + _LOG.debug("Retrieved sheet name: '%s'", tab_name) + return tab_name + + +def get_tabs_from_gsheet( + url: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> List[str]: + """ + Get all the tabs (worksheets) from a Google Sheet. + + :param url: URL of the Google Sheet. + :param credentials: Google credentials object. + :return: List of tab names. + """ + if credentials is None: + credentials = get_credentials() + client = gspread.authorize(credentials) + spreadsheet = client.open_by_url(url) + return [sheet.title for sheet in spreadsheet.worksheets()] + + +# ############################################################################# + + +def _extract_file_id_from_url(url: str) -> str: + """ + Extract the file ID from a Google Docs/Sheets/Drive URL. + + E.g., + https://docs.google.com/spreadsheets/d/FILE_ID/... + https://docs.google.com/document/d/FILE_ID/... + https://drive.google.com/file/d/FILE_ID/... + + :param url: URL of the Google Docs/Sheets/Drive file. + :return: File ID extracted from the URL. + """ + # Handle URLs like: + # https://docs.google.com/spreadsheets/d/FILE_ID/... + # https://docs.google.com/document/d/FILE_ID/... + # https://drive.google.com/file/d/FILE_ID/... + pattern = r"/d/([a-zA-Z0-9-_]+)" + match = re.search(pattern, url) + hdbg.dassert(match, "Invalid URL format: %s", url) + file_id = match.group(1) + _LOG.debug("Extracted file ID: '%s' from URL: '%s'", file_id, url) + return file_id + + +def get_gsheet_tab_url( + url: str, + tab_name: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Generate the full URL for a specific tab in a Google Sheet. + + E.g., + - Input URL: https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI + - Tab name: Sheet3 + - Output: https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI/edit?gid=229426446#gid=229426446 + + :param url: URL of the Google Sheets file. + :param tab_name: Name of the tab to generate the URL for. + :param credentials: Google credentials object. + :return: Full URL with the gid parameter for the specified tab. + """ + if credentials is None: + credentials = get_credentials() + hdbg.dassert(tab_name, "tab_name parameter must be provided") + # Extract the spreadsheet ID from the URL. + sheet_id = _extract_file_id_from_url(url) + _LOG.debug("Extracted sheet_id: '%s' from URL: '%s'", sheet_id, url) + # Get the gid for the specified tab. + gid = _get_gsheet_id(credentials, sheet_id, tab_name=tab_name) + _LOG.debug("Retrieved gid: '%s' for tab: '%s'", gid, tab_name) + # Construct the full URL with the gid parameter. + full_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/edit?gid={gid}#gid={gid}" + _LOG.debug("Generated full URL: '%s'", full_url) + return full_url + + +def _freeze_rows_in_gsheet( + credentials: "goasea.Credentials", + sheet_id: str, + num_rows_to_freeze: int, + *, + tab_name: Optional[str] = None, + bold: bool = True, +) -> None: + """ + Freeze specified rows in the given sheet. + + :param credentials: Google credentials object. + :param sheet_id: ID of the Google Sheet (spreadsheet ID). + :param num_rows_to_freeze: Number of rows to freeze (starting from + row 0). + :param tab_name: Name of the sheet (tab) to freeze rows in. Defaults + to the first tab if not provided. + :param bold: If True, make the frozen rows bold. + """ + hdbg.dassert_lt(0, num_rows_to_freeze) + tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) + sheets_service = get_sheets_service(credentials) + # Build the batch update request. + requests = [] + # Add freeze rows request. + requests.append( + { + "updateSheetProperties": { + "properties": { + "sheetId": tab_id, + "gridProperties": {"frozenRowCount": num_rows_to_freeze}, + }, + "fields": "gridProperties.frozenRowCount", + } + } + ) + # Add bold formatting request if requested. + if bold: + requests.append( + { + "repeatCell": { + "range": { + "sheetId": tab_id, + "startRowIndex": 0, + "endRowIndex": num_rows_to_freeze, + }, + "cell": { + "userEnteredFormat": { + "textFormat": { + "bold": True, + } + } + }, + "fields": "userEnteredFormat.textFormat.bold", + } + } + ) + _LOG.debug( + "Adding bold formatting to %s frozen rows", num_rows_to_freeze + ) + # Execute the batch update. + freeze_request = {"requests": requests} + response = ( + sheets_service.spreadsheets() + .batchUpdate(spreadsheetId=sheet_id, body=freeze_request) + .execute() + ) + _LOG.debug("response: %s", response) + + +def _set_row_height_in_gsheet( + credentials: "goasea.Credentials", + sheet_id: str, + height: int, + *, + start_index: Optional[int] = None, + end_index: Optional[int] = None, + tab_name: Optional[str] = None, +) -> None: + """ + Set the height for rows in the given Google sheet. + + :param credentials: Google credentials object. + :param sheet_id: ID of the Google Sheet (spreadsheet ID). + :param height: Height of the rows in pixels. + :param start_index: Starting index of the rows (zero-based). If + None, applies to all rows. + :param end_index: Ending index of the rows (zero-based). If None, + applies to all rows. + :param tab_name: Name of the sheet (tab) to set row height in. + Defaults to the first tab if not provided. + """ + tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) + sheets_service = get_sheets_service(credentials) + if start_index is None and end_index is None: + sheet_metadata = ( + sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() + ) + sheet_properties = next( + sheet + for sheet in sheet_metadata.get("sheets", []) + if sheet.get("properties", {}).get("sheetId") == tab_id + ).get("properties", {}) + grid_properties = sheet_properties.get("gridProperties", {}) + start_index, end_index = 0, grid_properties.get("rowCount", 1000) + elif start_index is None: + start_index = 0 + elif end_index is None: + sheet_metadata = ( + sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() + ) + sheet_properties = next( + sheet + for sheet in sheet_metadata.get("sheets", []) + if sheet.get("properties", {}).get("sheetId") == tab_id + ).get("properties", {}) + grid_properties = sheet_properties.get("gridProperties", {}) + end_index = grid_properties.get("rowCount", 1000) + elif start_index >= end_index: + raise ValueError( + f"Invalid params: start_index ({start_index}) must be less than end_index ({end_index})." + ) + # Create request. + set_row_height_request = { + "requests": [ + { + "updateDimensionProperties": { + "range": { + "sheetId": tab_id, + "dimension": "ROWS", + "startIndex": start_index, + "endIndex": end_index, + }, + "properties": {"pixelSize": height}, + "fields": "pixelSize", + } + } + ] + } + # Get response. + response = ( + sheets_service.spreadsheets() + .batchUpdate(spreadsheetId=sheet_id, body=set_row_height_request) + .execute() + ) + _LOG.debug("response: %s", response) + + +def _set_text_wrapping_clip_in_gsheet( + credentials: "goasea.Credentials", + sheet_id: str, + *, + tab_name: Optional[str] = None, +) -> None: + """ + Set text wrapping to "CLIP" for all columns in the given Google sheet. + + :param credentials: Google credentials object. + :param sheet_id: ID of the Google Sheet (spreadsheet ID). + :param tab_name: Name of the sheet (tab) to set text wrapping in. + Defaults to the first tab if not provided. + """ + tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) + sheets_service = get_sheets_service(credentials) + # Get sheet metadata to determine the range. + sheet_metadata = ( + sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() + ) + sheet_properties = next( + sheet + for sheet in sheet_metadata.get("sheets", []) + if sheet.get("properties", {}).get("sheetId") == tab_id + ).get("properties", {}) + grid_properties = sheet_properties.get("gridProperties", {}) + row_count = grid_properties.get("rowCount", 1000) + col_count = grid_properties.get("columnCount", 26) + _LOG.debug( + "Setting text wrapping to CLIP for sheet with %s rows and %s columns", + row_count, + col_count, + ) + # Create request to set text wrapping to CLIP. + set_wrapping_request = { + "requests": [ + { + "repeatCell": { + "range": { + "sheetId": tab_id, + "startRowIndex": 0, + "endRowIndex": row_count, + "startColumnIndex": 0, + "endColumnIndex": col_count, + }, + "cell": { + "userEnteredFormat": { + "wrapStrategy": "CLIP", + } + }, + "fields": "userEnteredFormat.wrapStrategy", + } + } + ] + } + # Execute the batch update. + response = ( + sheets_service.spreadsheets() + .batchUpdate(spreadsheetId=sheet_id, body=set_wrapping_request) + .execute() + ) + _LOG.debug("response: %s", response) + + +def from_gsheet( + url: str, + *, + tab_name: Optional[str] = None, + credentials: Optional["goasea.Credentials"] = None, +) -> pd.DataFrame: + """ + Read data from a Google Sheet. + + :param url: URL of the Google Sheets file. + :param tab_name: Name of the tab to read (default: first sheet if + not specified). + :param credentials: Google credentials object. + :return: pandas DataFrame with the sheet data. + """ + if credentials is None: + credentials = get_credentials() + client = gspread.authorize(credentials) + spreadsheet = client.open_by_url(url) + if tab_name is None: + # Read the first sheet. + worksheet = spreadsheet.get_worksheet(0) + else: + # Read the specified sheet. + worksheet = spreadsheet.worksheet(tab_name) + data = worksheet.get_all_records() + hdbg.dassert(data, "The sheet '%s' is empty", tab_name) + df = pd.DataFrame(data) + _LOG.debug("Data fetched") + return df + + +def to_gsheet( + df: pd.DataFrame, + url: str, + *, + tab_name: Optional[str] = "new_data", + freeze_rows: bool = False, + set_text_wrapping_clip: bool = False, + credentials: Optional["goasea.Credentials"] = None, +) -> None: + """ + Write data to a specified Google Sheet and tab. + + :param df: Data to be written. + :param url: URL of the Google Sheet. + :param tab_name: Name of the tab where the data will be written. + :param freeze_rows: If True, freeze the header row. + :param set_text_wrapping_clip: If True, set text wrapping to CLIP. + :param credentials: Google credentials object. + """ + if credentials is None: + credentials = get_credentials() + client = gspread.authorize(credentials) + spreadsheet = client.open_by_url(url) + # Try to get existing worksheet or create new one. + try: + worksheet = spreadsheet.worksheet(tab_name) + except gspread.exceptions.WorksheetNotFound: + _LOG.debug( + "Tab '%s' not found, creating a new tab with that name", + tab_name, + ) + worksheet = spreadsheet.add_worksheet( + title=tab_name, rows="100", cols="20" + ) + # + if freeze_rows: + _freeze_rows_in_gsheet( + credentials, + spreadsheet.id, + num_rows_to_freeze=1, + tab_name=tab_name, + ) + # + _set_row_height_in_gsheet( + credentials, + spreadsheet.id, + height=20, + tab_name=tab_name, + ) + # Clear and write data. + worksheet.clear() + # Replace NaN/inf values with empty strings for JSON compatibility. + df_clean = df.fillna("").replace([float("inf"), float("-inf")], "") + values = [df_clean.columns.values.tolist()] + df_clean.values.tolist() + worksheet.update("A1", values) + # + if set_text_wrapping_clip: + _set_text_wrapping_clip_in_gsheet( + credentials, + spreadsheet.id, + tab_name=tab_name, + ) + _LOG.info("Data written to:\ntab '%s'\nGoogle Sheet '%s'", tab_name, url) + _LOG.info( + "url=%s", get_gsheet_tab_url(url, tab_name, credentials=credentials) + ) + + +# ############################################################################# +# Google file API +# ############################################################################# + + +def _get_gdrive_service(credentials: "goasea.Credentials") -> "godisc.Resource": + """ + Get Google Drive service with provided credentials. + + :param credentials: Google credentials object. + :return: Google Drive service instance. + """ + # Ensure credentials are provided. + hdbg.dassert(credentials, "The 'credentials' parameter must be provided") + # Build the drive service. + gdrive_service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + return gdrive_service + + +def _create_new_google_document( + credentials: "goasea.Credentials", + doc_name: str, + doc_type: str, +) -> str: + """ + Create a new Google document (Sheet or Doc). + + :param credentials: Google credentials object. + :param doc_name: The name of the new Google document. + :param doc_type: The type of the Google document ('sheets' or + 'docs'). + :return: doc_id. The ID of the created document in Google Drive. + """ + if doc_type not in ["sheets", "docs"]: + raise ValueError("Invalid doc_type. Must be 'sheets' or 'docs'.") + # Build the service for the respective document type. + service = godisc.build( + doc_type, + "v4" if doc_type == "sheets" else "v1", + credentials=credentials, + cache_discovery=False, + ) + # Create the document with the specified name. + document = {"properties": {"title": doc_name}} + create_method = ( + service.spreadsheets().create + if doc_type == "sheets" + else service.documents().create + ) + response = create_method( + body=document, + fields="spreadsheetId" if doc_type == "sheets" else "documentId", + ).execute() + # Extract the document ID. + doc_id = response.get( + "spreadsheetId" if doc_type == "sheets" else "documentId" + ) + return doc_id + + +def move_gfile_to_dir( + gfile_id: str, + folder_id: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> dict: + """ + Move a Google file to a specified folder in Google Drive. + + :param gfile_id: The ID of the Google file. + :param folder_id: The ID of the folder. + :param credentials: Google credentials object. + :return: The response from the API after moving the file. + """ + if credentials is None: + credentials = get_credentials() + service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + res = ( + service.files() + .update( + fileId=gfile_id, + body={}, + addParents=folder_id, + removeParents="root", + supportsAllDrives=True, + ) + .execute() + ) + return res + + +def share_google_file( + gfile_id: str, + user: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> None: + """ + Share a Google file with a user. + + :param gfile_id: The ID of the Google file. + :param user: The email address of the user. + :param credentials: Google credentials object. + """ + if credentials is None: + credentials = get_credentials() + # Build the Google Drive service using the provided credentials. + # TODO(gp): -> get_gdrive_service + service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + # Create the permission. + parameters = {"role": "reader", "type": "user", "emailAddress": user} + new_permission = ( + service.permissions().create(fileId=gfile_id, body=parameters).execute() + ) + _LOG.debug( + "The new permission ID of the document is: '%s'", + new_permission.get("id"), + ) + _LOG.debug("The Google file is shared with '%s'", user) + + +def create_empty_google_file( + gfile_type: str, + gfile_name: str, + gdrive_folder_id: str, + *, + user: Optional[str] = None, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Create a new Google file (sheet or doc) and move it to a specified folder. + + :param gfile_type: the type of the Google file ('sheet' or 'doc'). + :param gfile_name: the name of the new Google file. + :param gdrive_folder_id: the ID of the Google Drive folder. + :param user: the email address of the user to share the Google file. + :param credentials: Google credentials object for API access. + :return: the ID of the created Google file, or None if an error + occurred. + """ + if credentials is None: + credentials = get_credentials() + # Create the new Google file (either Sheet or Doc). + if gfile_type == "sheet": + gfile_id = _create_new_google_document( + credentials, + doc_name=gfile_name, + doc_type="sheets", + ) + elif gfile_type == "doc": + gfile_id = _create_new_google_document( + credentials, + doc_name=gfile_name, + doc_type="docs", + ) + else: + raise ValueError(f"Invalid gfile_type={gfile_type}") + _LOG.debug("Created a new Google %s '%s'", gfile_type, gfile_name) + # Move the Google file to the specified folder. + if gdrive_folder_id: + move_gfile_to_dir(gfile_id, gdrive_folder_id, credentials=credentials) + # Share the Google file to the user and send an email. + if user: + share_google_file(gfile_id, user, credentials=credentials) + _LOG.debug( + "The new Google '%s': '%s' is shared with '%s'", + gfile_type, + gfile_name, + user, + ) + # Return the file ID. + return gfile_id + + +def create_or_overwrite_with_timestamp( + file_name: str, + folder_id: str, + *, + file_type: str = "sheets", + overwrite: bool = False, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Create or overwrite a Google Sheet or Google Doc with a timestamp in a + specific Google Drive folder. + + :param file_name: Name for the file (timestamp will be added). + :param folder_id: Google Drive folder ID where the file will be + created or updated. + :param file_type: Type of file to create ('sheets' or 'docs'). + :param overwrite: If True, overwrite an existing file. Otherwise, + create a new file. + :param credentials: Google credentials object. + :return: The ID of the created or overwritten file. + """ + if credentials is None: + credentials = get_credentials() + # Authenticate with Google APIs using the provided credentials. + # TODO(gp): -> get_gdrive_service + drive_service = godisc.build("drive", "v3", credentials=credentials) + if file_type == "sheets": + mime_type = "application/vnd.google-apps.spreadsheet" + elif file_type == "docs": + mime_type = "application/vnd.google-apps.document" + else: + raise ValueError("Invalid file_type. Must be 'sheets' or 'docs'.") + query = ( + f"'{folder_id}' in parents and mimeType = '{mime_type}'" + f" and name contains '{file_name}'" + ) + response = ( + drive_service.files() + .list( + q=query, + fields="files(id, name)", + includeItemsFromAllDrives=True, + supportsAllDrives=True, + ) + .execute() + ) + files = response.get("files", []) + # Check if overwriting or creating new file. + if files and overwrite: + file_id = files[0]["id"] + _LOG.debug("Overwriting existing file '%s'", files[0]["name"]) + else: + # Create new file with timestamp. + timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + new_file_name = f"{file_name}_{timestamp}" + file_metadata = { + "name": new_file_name, + "mimeType": mime_type, + "parents": [folder_id], + } + file = ( + drive_service.files() + .create(body=file_metadata, fields="id", supportsAllDrives=True) + .execute() + ) + file_id = file.get("id") + _LOG.debug( + "New file '%s' created successfully in folder '%s'", + new_file_name, + folder_id, + ) + return file_id + + +# ############################################################################# +# Google folder API +# ############################################################################# + + +def create_google_drive_folder( + folder_name: str, + parent_folder_id: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> str: + """ + Create a new Google Drive folder inside the given folder. + + :param folder_name: the name of the new Google Drive folder. + :param parent_folder_id: the ID of the parent folder. + :param credentials: Google credentials object. + :return: the ID of the created Google Drive folder. + """ + if credentials is None: + credentials = get_credentials() + # Build the Google Drive service using the provided credentials. + # TODO(gp): -> get_gdrive_service + service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + # Define the metadata for the new folder. + file_metadata = { + "name": folder_name, + "mimeType": "application/vnd.google-apps.folder", + "parents": [parent_folder_id], + } + # Create the folder in Google Drive. + folder = service.files().create(body=file_metadata, fields="id").execute() + # Log and return the folder ID. + _LOG.debug("Created a new Google Drive folder '%s'", folder_name) + _LOG.debug("The new folder id is '%s'", folder.get("id")) + return folder.get("id") + + +def _get_folders_in_gdrive(*, credentials: "goasea.Credentials") -> list: + """ + Get a list of folders in Google Drive. + + :param credentials: Google credentials object. + :return: A list of folders (each containing an ID and name). + """ + # Build the Google Drive service using the provided credentials. + # TODO(gp): -> get_gdrive_service + service = godisc.build( + "drive", "v3", credentials=credentials, cache_discovery=False + ) + # Make the API request to list folders. + response = ( + service.files() + .list( + q="mimeType='application/vnd.google-apps.folder' and trashed=false", + spaces="drive", + fields="nextPageToken, files(id, name)", + ) + .execute() + ) + # Return the list of folders (id and name). + return response.get("files", []) + + +def get_folder_id_by_name( + credentials: "goasea.Credentials", + name: str, +) -> dict: + """ + Get the folder id by the folder name. + + :param credentials: Google credentials object. + :param name: The name of the folder. + :return: Dictionary with folder id and name. + """ + folders = _get_folders_in_gdrive(credentials=credentials) + folder_list = [] + # Find all folders matching the name. + for folder in folders: + if folder.get("name") == name: + folder_list.append(folder) + if len(folder_list) == 1: + _LOG.debug("Found folder: %s", folder_list[0]) + elif len(folder_list) > 1: + for folder in folder_list: + _LOG.debug( + "Found folder: '%s', '%s'", + folder.get("name"), + folder.get("id"), + ) + _LOG.debug( + "Return the first found folder. '%s' '%s' ", + folder_list[0].get("name"), + folder_list[0].get("id"), + ) + _LOG.debug( + "if you want to use another '%s' folder, " + "please change the folder id manually.", + name, + ) + else: + raise ValueError(f"Can't find the folder '{name}'.") + return folder_list[0] + + +def _get_folder_path_list( + service: "godisc.Resource", + file_id: str, +) -> List[str]: + """ + Get the full folder path as a list of folder names. + + :param service: Google Drive service instance. + :param file_id: The ID of the file. + :return: List of folder names from root to immediate parent folder. + Returns empty list if file is at root level. + """ + # Get file metadata with parents. + file_metadata = ( + service.files() + .get( + fileId=file_id, + fields="parents", + supportsAllDrives=True, + ) + .execute() + ) + parents = file_metadata.get("parents", []) + # If no parents, file is at root level. + if not parents: + _LOG.debug("File is at root level") + return [] + # Build the path by traversing up the folder hierarchy. + path_list = [] + current_id = parents[0] # Files typically have one parent in Google Drive. + while current_id: + folder_metadata = ( + service.files() + .get( + fileId=current_id, + fields="name,parents", + supportsAllDrives=True, + ) + .execute() + ) + folder_name = folder_metadata.get("name") + path_list.insert(0, folder_name) + parents = folder_metadata.get("parents", []) + current_id = parents[0] if parents else None + _LOG.debug("Folder path: %s", path_list) + return path_list + + +def get_google_path_from_url( + url: str, + *, + credentials: Optional["goasea.Credentials"] = None, +) -> List[str]: + """ + Get the full folder path from a Google Docs/Sheets/Drive URL. + + E.g., https://docs.google.com/spreadsheets/d/1GnnmtGTrHDwMP77VylEK0bSF_RLUV5BWf1iGmxuBQpI + -> ['My Drive', 'Folder1', 'Folder2'] + + :param url: URL of the Google Docs/Sheets/Drive file. + :param credentials: Google credentials object. + :return: List of folder names from root to immediate parent folder. + Returns empty list if file is at root level. + """ + if credentials is None: + credentials = get_credentials() + # Extract file ID from URL. + file_id = _extract_file_id_from_url(url) + # Get Google Drive service. + service = _get_gdrive_service(credentials) + # Get folder path as list. + path_list = _get_folder_path_list(service, file_id) + _LOG.debug("Retrieved folder path for URL '%s': %s", url, path_list) + return path_list + + +def print_info_about_google_url( + url: str, + *, + tab_name: Optional[str] = None, + credentials: Optional["goasea.Credentials"] = None, +) -> None: + """ + Print information about a Google Sheet URL. + + :param url: URL of the Google Sheets file. + :param tab_name: Optional tab name to display full URL for. + :param credentials: Google credentials object. + """ + if credentials is None: + credentials = get_credentials() + print("url: '%s'" % url) + print("file name: '%s'" % get_gsheet_name(url, credentials=credentials)) + print("tab names: '%s'" % get_tabs_from_gsheet(url, credentials=credentials)) + if tab_name is not None: + print( + "full url: '%s'" + % get_gsheet_tab_url(url, tab_name, credentials=credentials) + ) + print( + "folder path: '%s'" + % "/".join(get_google_path_from_url(url, credentials=credentials)) + ) + + +# TODO(gp): Add clean up +# TODO(gp): Make url mandatory and when url = "tmp" use the hardcored value. +# TODO(gp): -> save_df_to_gsheet +def save_df_to_tmp_gsheet( + df: pd.DataFrame, + *, + url: str = "", + tab_name: str = "", + remove_empty_columns: bool = False, + remove_stable_columns: bool = False, + verbose: bool = True, + credentials: Optional["goasea.Credentials"] = None, +) -> None: + """ + Save a DataFrame to a Google Sheet. + + :param df: The DataFrame to save. + :param url: URL of the Google Sheet (empty means default temp + sheet). + :param tab_name: The name of the tab to save the DataFrame to. + :param remove_empty_columns: Whether to remove empty columns. + :param remove_stable_columns: Whether to remove stable columns. + :param verbose: Whether to print verbose output. + :param credentials: Google credentials object. + """ + if credentials is None: + credentials = get_credentials() + if remove_stable_columns: + df = hpandas.remove_stable_columns(df, verbose=verbose) + if remove_empty_columns: + df = hpandas.remove_empty_columns(df, verbose=verbose) + if url == "": + url = "https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI/edit?gid=0#gid=0" + if tab_name == "": + # Find the first tab name that is not empty. + tab_names = get_tabs_from_gsheet(url, credentials=credentials) + for i in range(0, 100): + tab_name = "Sheet" + str(i) + if tab_name not in tab_names: + break + hdbg.dassert_ne(tab_name, "No empty tab name found") + to_gsheet( + df, + url, + tab_name=tab_name, + freeze_rows=True, + set_text_wrapping_clip=True, + credentials=credentials, + ) + + +def _get_gsheet_to_df(url: str, tab_name: Optional[str]) -> pd.DataFrame: + credentials = get_credentials() + file_name = get_gsheet_name(url, credentials=credentials) + _LOG.info( + "Reading data:\n url='%s'\n file_name='%s'\n tab_name='%s'" + % (url, file_name, tab_name) + ) + df = from_gsheet(url, tab_name=tab_name, credentials=credentials) + return df + + +get_cached_gsheet_to_df = hcacsimp.simple_cache( + cache_type="pickle", write_through=True +)(_get_gsheet_to_df) + + +# TODO(gp): This is redundant with disable cache. +# TODO(gp): Create a function to normalize the column names. +def get_gsheet_to_df( + url: str, + tab_name: Optional[str], + *, + remove_spaces_in_cols: bool = True, + force_no_cache: bool = False, +) -> pd.DataFrame: + """ + Get a Google Sheet as a DataFrame with optional caching. + + :param url: The URL of the Google Sheet. + :param tab_name: The name of the tab to read + - `None` means the first sheet + :param remove_spaces_in_cols: Whether to remove spaces in the column names. + :param force_no_cache: Whether to bypass the cache and fetch fresh data. + :return: DataFrame containing the sheet data. + """ + if force_no_cache: + df = get_gsheet_to_df(url, tab_name) + else: + df = get_cached_gsheet_to_df(url, tab_name) + if remove_spaces_in_cols: + df.columns = df.columns.str.replace(" ", "") + return df + + +def read_all_gsheets( + url: str, *, tab_names: Union[str, List[str]], concat: bool = False +) -> Union[pd.DataFrame, List[pd.DataFrame]]: + """ + Read all the sheets from a Google Sheet. + + :param url: The URL of the Google Sheet. + :param tab_names: The names of the sheets to read. + :param concat: Whether to concatenate the DataFrames. + :return: A list of DataFrames, one for each sheet. + """ + dfs = [] + # TODO(ai_gp): -> _all_ + if tab_names == "all": + tab_names = get_tabs_from_gsheet(url) + for tab_name in tab_names: + df = get_cached_gsheet_to_df(url, tab_name) + dfs.append(df) + if len(dfs) > 1 and concat: + # Assert if the columns are the same. + for df in dfs[1:]: + hdbg.dassert_eq(df.columns, dfs[0].columns) + # Concatenate the DataFrames. + df = pd.concat(dfs) + df.reset_index(drop=True, inplace=True) + return df + return dfs diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py new file mode 100644 index 000000000..fdc7ed66c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py @@ -0,0 +1,284 @@ +""" +Import as: + +import helpers.hintrospection as hintros +""" + +import collections.abc as cabc +import importlib +import inspect +import logging +import pickle +import re +import sys +import types +from typing import Any, Callable, List, Optional, cast + +import helpers.hdbg as hdbg + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + +_LOG = logging.getLogger(__name__) + + +# Copied from `hstring` to avoid import cycles. + + +def remove_prefix(string: str, prefix: str, assert_on_error: bool = True) -> str: + if string.startswith(prefix): + res = string[len(prefix) :] + else: + if assert_on_error: + raise RuntimeError( + f"string='{string}' doesn't start with prefix ='{prefix}'" + ) + return res + + +# End copy. + +# TODO(gp): object -> Any? + + +# ############################################################################# +# Function introspection +# ############################################################################# + + +def get_function_name(count: int = 0) -> str: + """ + Return the name of the function calling this function. + """ + ptr = inspect.currentframe() + # count=0 corresponds to the calling function, so we need to add an extra + # step walking the call stack. + count += 1 + for _ in range(count): + hdbg.dassert_is_not(ptr, None) + ptr = ptr.f_back # type: ignore + func_name = ptr.f_code.co_name # type: ignore + return func_name + + +def get_name_from_function(func: Callable) -> str: + """ + Return the name of the passed function. + + E.g., amp.helpers.test.test_hintrospection.test_function + """ + func_name = func.__name__ + # + module = inspect.getmodule(func) + hdbg.dassert_is_not( + module, None, f"Could not get module for function {func}" + ) + assert module is not None + module_name = module.__name__ + # Remove `app.` if needed from the module name, e.g., + # `app.amp.helpers.test.test_hintrospection`. + prefix = "app." + if module_name.startswith(prefix): + module_name = remove_prefix(module_name, prefix) + return f"{module_name}.{func_name}" + + +def get_function_from_string(func_as_str: str) -> Callable: + """ + Return the function from its name including the import. + + E.g., `import im.scripts.AmpTask317_transform_pq_by_date_to_by_asset` + """ + # Split txt in an import and function name. + m = re.match(r"^(\S+)\.(\S+)$", func_as_str) + hdbg.dassert(m, "txt='%s'", func_as_str) + m = cast(re.Match, m) + import_, function = m.groups() + _LOG.debug("import=%s", import_) + _LOG.debug("function=%s", function) + # Import the needed module. + imp = importlib.import_module(import_) + # Force the linter not to remove this import which is needed in the following + # eval. + _ = imp + python_code = f"imp.{function}" + func: Callable = eval(python_code) + _LOG.debug("%s -> func=%s", func_as_str, func) + return func + + +def get_methods(obj: Any, access: str = "all") -> List[str]: + """ + Return list of names corresponding to class methods of an object `obj`. + + :param obj: class or class object + :param access: allows to select private, public or all methods of + the object. + """ + methods = [method for method in dir(obj) if callable(getattr(obj, method))] + if access == "all": + pass + elif access == "private": + methods = [method for method in methods if method.startswith("_")] + elif access == "public": + methods = [method for method in methods if not method.startswith("_")] + else: + raise ValueError(f"Invalid access='{access}'") + return methods + + +# ############################################################################# + + +def is_iterable(obj: object) -> bool: + """ + Return whether obj can be iterated upon or not. + + Note that a string is iterable in Python, but typically we refer to + iterables as lists, tuples, so we exclude strings. + """ + # From https://stackoverflow.com/questions/1952464 + return not isinstance(obj, str) and isinstance(obj, cabc.Iterable) + + +# From https://stackoverflow.com/questions/53225 +def is_bound_to_object(method: object) -> bool: + """ + Return whether a method is bound to an object. + """ + _LOG.debug("method=%s", method) + if not hasattr(method, "__self__"): + _LOG.debug("hasattr(im_self)=False") + val = False + else: + # val = method.im_self is not None + val = True + return val + + +# From https://stackoverflow.com/questions/23852423 +def is_lambda_function(method: object) -> bool: + _LOG.debug("type(method)=%s", str(type(method))) + return isinstance(method, types.LambdaType) and method.__name__ == "" + + +def is_pickleable(obj: object, *, mode: str = "try_and_catch") -> bool: + """ + Return if an object is a bound method. + + :param obj: object to process + :param mode: approach to detect non-pikleable objects + - "type_search": detect non-pickleable objects by type, e.g., lambda + functions are not Pickleable + - "try_and_catch": try to pickle an object directly, if it fails, + an object is non-pickleable then + """ + _LOG.debug("obj=%s", obj) + if mode == "type_search": + _LOG.debug("callable=%s", callable(obj)) + if not callable(obj): + return True + # + is_bound = is_bound_to_object(obj) + _LOG.debug("is_bound=%s", is_bound) + if is_bound: + return False + # + is_lambda = is_lambda_function(obj) + _LOG.debug("is_lambda=%s", is_lambda) + if is_lambda: + return False + return True + elif mode == "try_and_catch": + try: + _ = pickle.dumps(obj) + return True + # `AttributeError` is raised when obj is a class with lambda param + # values, and `TypeError`is raised when the class has DB connection + # object as value. + except (AttributeError, TypeError) as e: + _LOG.debug("Cannot pickle object=%s, the error is %s", obj, str(e)) + return False + else: + raise ValueError(f"Invalid mode='{mode}'") + + +# ############################################################################# +# Object size +# ############################################################################# + + +# https://code.activestate.com/recipes/577504/ +# https://stackoverflow.com/questions/449560/how-do-i-determine-the-size-of-an-object-in-python + + +def get_size_in_bytes(obj: object, seen: Optional[set] = None) -> int: + """ + Recursively find size of an object `obj` in bytes. + """ + # From https://github.com/bosswissam/pysize + # getsizeof() returns the size in bytes. + size = sys.getsizeof(obj) + if seen is None: + seen = set() + obj_id = id(obj) + if obj_id in seen: + return 0 + # Mark as seen *before* entering recursion to gracefully handle + # self-referential objects. + seen.add(obj_id) + if hasattr(obj, "__dict__"): + for cls in obj.__class__.__mro__: + if "__dict__" in cls.__dict__: + d = cls.__dict__["__dict__"] + if inspect.isgetsetdescriptor(d) or inspect.ismemberdescriptor( + d + ): + size += get_size_in_bytes(obj.__dict__, seen) + break + if isinstance(obj, dict): + size += sum((get_size_in_bytes(v, seen) for v in obj.values())) + size += sum((get_size_in_bytes(k, seen) for k in obj.keys())) + elif isinstance(obj, cabc.Iterable) and not isinstance( + obj, (str, bytes, bytearray) + ): + size += sum((get_size_in_bytes(i, seen) for i in obj)) + if hasattr(obj, "__slots__"): # can have __slots__ with __dict__ + slots = getattr(obj, "__slots__", None) + if slots is not None: + size += sum( + get_size_in_bytes(getattr(obj, s), seen) + for s in slots + if hasattr(obj, s) + ) + return size + + +# TODO(gp): -> move to helpers/hprint.py +def format_size(num: float) -> str: + """ + Return a human-readable string for a filesize (e.g., "3.5 MB"). + """ + # From http://stackoverflow.com/questions/1094841 + for x in ["b", "KB", "MB", "GB", "TB"]: + if num < 1024.0: + return f"%3.1f {x}" % num + num /= 1024.0 + assert 0, f"Invalid num='{num}'" + + +# ############################################################################# +# Stacktrace +# ############################################################################# + + +def stacktrace_to_str() -> str: + """ + Print the stack trace. + """ + import traceback + + txt = traceback.format_stack() + txt = "".join(txt) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py new file mode 100644 index 000000000..bc2f71ab7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py @@ -0,0 +1,1046 @@ +""" +Functions to handle filesystem operations. + +Import as: + +import helpers.hio as hio +""" + +import datetime +import gzip +import json +import logging +import os +import re +import shlex +import shutil +import time +import uuid +from typing import Any, Dict, List, Optional, Union + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + +# Set logging level of this file. +_LOG.setLevel(logging.INFO) + +# ############################################################################# +# Glob. +# ############################################################################# + + +def purify_file_name(file_name: str) -> str: + """ + Remove non-Linux friendly characters from the basename. + """ + basename = os.path.basename(file_name) + for char in (" ", "_", "'", '"', "`", "/"): + basename = basename.replace(char, "_") + # + dir_name = os.path.dirname(file_name) + file_name_out = os.path.join(dir_name, basename) + file_name_out: str = os.path.normpath(file_name_out) + return file_name_out + + +def listdir( + dir_name: str, + pattern: str, + only_files: bool, + use_relative_paths: bool, + *, + exclude_git_dirs: bool = True, + maxdepth: Optional[int] = None, +) -> List[str]: + """ + Find all files and subdirectories under `directory` that match `pattern`. + + :param dir_name: path to the directory where to look for files + :param pattern: pattern to match a filename against (e.g., `*.py`) + :param only_files: look for only files instead of both files and directories + :param use_relative_paths: remove `dir_name` from path + :param exclude_git_dirs: skip `.git` dirs + :param maxdepth: limit the depth of directory traversal + """ + hdbg.dassert_dir_exists(dir_name) + # Escape the directory path. + dir_name = shlex.quote(dir_name) + cmd = [f"find {dir_name}", f'-name "{pattern}"'] + if maxdepth is not None: + cmd.append(f'-maxdepth "{maxdepth}"') + if only_files: + cmd.append("-type f") + if exclude_git_dirs: + cmd.append(r'-not -path "*/\.git/*"') + cmd = " ".join(cmd) + _, output = hsystem.system_to_string(cmd) + # TODO(gp): -> system_to_files + paths = [path for path in output.split("\n") if path != ""] + _LOG.debug("Found %s paths in %s", len(paths), dir_name) + _LOG.debug("\n".join(paths)) + if use_relative_paths: + paths = [os.path.relpath(path, start=dir_name) for path in paths] + return paths + + +def is_valid_filename_extension(ext: str) -> bool: + """ + By convention extensions don't include the initial `.`. + + E.g., "tgz" is valid, but not ".tgz". + """ + valid = not ext.startswith(".") + return valid + + +def change_filename_extension(filename: str, old_ext: str, new_ext: str) -> str: + """ + Change extension of a filename (e.g. "data.csv" to "data.json"). + + :param filename: the old filename (including extension) + :param old_ext: the extension of the old filename (e.g., "csv") + - If empty, it is extracted from the filename + :param new_ext: the extension to replace the old extension (e.g., "json") + :return: a filename with the new extension + """ + # If the old extension is empty, extract it from the filename. + if old_ext == "": + _, old_ext = os.path.splitext(filename) + # Remove the leading dot. + old_ext = old_ext.lstrip(".") + hdbg.dassert( + is_valid_filename_extension(old_ext), "Invalid extension '%s'", old_ext + ) + hdbg.dassert( + is_valid_filename_extension(new_ext), "Invalid extension '%s'", new_ext + ) + hdbg.dassert( + filename.endswith(old_ext), + "Extension '%s' doesn't match file '%s'", + old_ext, + filename, + ) + # Remove the old extension. + len_ext = len(old_ext) + new_filename = filename[:-len_ext] + hdbg.dassert(new_filename.endswith("."), "new_filename='%s'", new_filename) + # Add the new extension. + new_filename += new_ext + return new_filename + + +def is_paired_jupytext_python_file(py_filename: str) -> bool: + """ + Return if a Python file has a paired Jupyter notebook. + """ + hdbg.dassert( + py_filename.endswith("py"), "Invalid python filename='%s'", py_filename + ) + hdbg.dassert_file_exists(py_filename) + # Check if a corresponding ipynb file exists. + ipynb_filename = change_filename_extension(py_filename, "py", "ipynb") + is_paired = os.path.exists(ipynb_filename) + _LOG.debug( + "Checking ipynb file='%s' for py file='%s': is_paired=%s", + py_filename, + ipynb_filename, + is_paired, + ) + return is_paired + + +def keep_python_files( + file_names: List[str], exclude_paired_jupytext: bool +) -> List[str]: + """ + Return a list with all Python file names (i.e., with the `py` extension). + + :param exclude_paired_jupytext: exclude Python file that are associated to + notebooks (i.e., that have a corresponding `.ipynb` file) + """ + hdbg.dassert_isinstance(file_names, list) + # Check all the files. + py_file_names = [] + for file_name in file_names: + if file_name.endswith(".py"): + if exclude_paired_jupytext: + # Include only the non-paired Python files. + is_paired = is_paired_jupytext_python_file(file_name) + add = not is_paired + else: + # Include all the Python files. + add = True + else: + add = False + _LOG.debug("file_name='%s' -> add='%s'", file_name, add) + if add: + py_file_names.append(file_name) + _LOG.debug("Found %s python files", len(py_file_names)) + return py_file_names + + +def delete_file(file_name: str) -> None: + _LOG.debug("Deleting file '%s'", file_name) + # hs3.dassert_is_not_s3_path(file_name) + if not os.path.exists(file_name) or file_name == "/dev/null": + # Nothing to delete. + return + try: + os.unlink(file_name) + except OSError as e: + # It can happen that we try to delete the file, while somebody already + # deleted it, so we neutralize the corresponding exception. + if e.errno == 2: + # OSError: [Errno 2] No such file or directory. + pass + else: + raise e + + +def _create_dir( + dir_name: str, + incremental: bool, + abort_if_exists: bool = False, + ask_to_delete: bool = False, +) -> None: + """ + Create a directory `dir_name` if it doesn't exist. + + Same interface as `create_dir()` but without handling + `backup_dir_if_exists`. + """ + _LOG.debug( + hprint.to_str("dir_name incremental abort_if_exists ask_to_delete") + ) + hdbg.dassert_is_not(dir_name, None) + dir_name = os.path.normpath(dir_name) + if os.path.normpath(dir_name) == ".": + _LOG.debug("Can't create dir '%s'", dir_name) + exists = os.path.exists(dir_name) + is_dir = os.path.isdir(dir_name) + _LOG.debug(hprint.to_str("dir_name exists is_dir")) + if abort_if_exists: + hdbg.dassert_path_not_exists(dir_name) + # dir exists / dir does not exist + # incremental no-op mkdir + # not incremental rm+mkdir mkdir + if exists: + if incremental and is_dir: + # The dir exists and we want to keep it (i.e., incremental), so we + # are done. + # os.chmod(dir_name, 0755) + _LOG.debug( + "The dir '%s' exists and incremental=True: exiting", dir_name + ) + return + if ask_to_delete: + hsystem.query_yes_no( + f"Do you really want to delete dir '{dir_name}'?", + abort_on_no=True, + ) + # The dir exists and we want to create it from scratch (i.e., not + # incremental), so we need to delete the dir. + _LOG.debug("Deleting dir '%s'", dir_name) + if os.path.islink(dir_name): + delete_file(dir_name) + else: + hdbg.dassert_ne(os.path.normpath(dir_name), ".") + shutil.rmtree(dir_name) + _LOG.debug("Creating directory '%s'", dir_name) + # NOTE: `os.makedirs` raises `OSError` if the target directory already exists. + # A race condition can happen when another process creates our target + # directory, while we have just found that it doesn't exist, so we need to + # handle this situation gracefully. + try: + os.makedirs(dir_name) + except OSError as e: + _LOG.error(str(e)) + # It can happen that we try to create the directory while somebody else + # created it, so we neutralize the corresponding exception. + if e.errno == 17: + # OSError: [Errno 17] File exists. + pass + else: + raise e + + +def create_dir( + dir_name: str, + incremental: bool, + *, + abort_if_exists: bool = False, + ask_to_delete: bool = False, + backup_dir_if_exists: bool = False, +) -> None: + """ + Create a directory. + + :param incremental: if False then the directory is deleted and re- + created, otherwise the same directory is reused as it is + :param abort_if_exists: abort if the target directory already exists + :param ask_to_delete: if it is not incremental and the dir exists, + asks before deleting. This option is used when we want to start + with a clean dir (i.e., incremental=False) but, at the same + time, we want to make sure that the user doesn't want to delete + the content of the dir. Another approach is to automatically + rename the old dir with backup_dir_if_exists. + :param backup_dir_if_exists: if the target dir already exists, then + rename it using a timestamp (e.g., dir_20231003_080000) and + create a new target dir + """ + if backup_dir_if_exists: + if not os.path.exists(dir_name): + # Create new dir. + _LOG.debug("Creating dir '%s'", dir_name) + _create_dir(dir_name, incremental=True) + else: + _LOG.debug("Dir '%s' already exists", dir_name) + # Get dir timestamp. + dir_timestamp = os.path.getmtime(dir_name) + dir_datetime = datetime.datetime.fromtimestamp(dir_timestamp) + # Build new dir name with timestamp. + dir_name_new = ( + dir_name + "." + dir_datetime.strftime("%Y%m%d_%H%M%S") + ) + # Rename dir. + if not os.path.exists(dir_name_new): + _LOG.warning("Renaming dir '%s' -> '%s'", dir_name, dir_name_new) + os.rename(dir_name, dir_name_new) + else: + _LOG.warning("Dir '%s' already exists", dir_name_new) + # Create new dir. + _LOG.debug("Creating dir '%s'", dir_name) + _create_dir(dir_name, incremental=True) + else: + _create_dir( + dir_name, + incremental, + abort_if_exists=abort_if_exists, + ask_to_delete=ask_to_delete, + ) + + +# ############################################################################# +# Filesystem. +# ############################################################################# + + +def create_soft_link(src: str, dst: str) -> None: + """ + Create a soft-link to called (where and are files + or directories as in a Linux ln command). + + This is equivalent to a command like "cp " but creating a + soft link. + """ + _LOG.debug("# CreateSoftLink") + # hs3.dassert_is_not_s3_path(src) + # hs3.dassert_is_not_s3_path(dst) + # Create the enclosing directory, if needed. + enclosing_dir = os.path.dirname(dst) + _LOG.debug("enclosing_dir=%s", enclosing_dir) + create_dir(enclosing_dir, incremental=True) + # Create the link. Note that the link source needs to be an absolute path. + src = os.path.abspath(src) + cmd = f"ln -s {src} {dst}" + hsystem.system(cmd) + + +def delete_dir( + dir_: str, + change_perms: bool = False, + errnum_to_retry_on: int = 16, + num_retries: int = 1, + num_secs_retry: int = 1, +) -> None: + """ + Delete a directory. + + :param change_perms: change permissions to -R rwx before deleting to deal with + incorrect permissions left over + :param errnum_to_retry_on: specify the error to retry on, e.g., + ``` + OSError: [Errno 16] Device or resource busy: + 'gridTmp/.nfs0000000002c8c10b00056e57' + ``` + """ + _LOG.debug("Deleting dir '%s'", dir_) + # hs3.dassert_is_not_s3_path(dir_) + if not os.path.isdir(dir_): + # No directory so nothing to do. + return + if change_perms and os.path.isdir(dir_): + cmd = "chmod -R +rwx " + dir_ + hsystem.system(cmd) + i = 1 + while True: + try: + shutil.rmtree(dir_) + # Command succeeded: exit. + break + except OSError as e: + if errnum_to_retry_on is not None and e.errno == errnum_to_retry_on: + # TODO(saggese): Make it less verbose once we know it's working + # properly. + _LOG.warning( + "Couldn't delete %s: attempt=%s / %s", dir_, i, num_retries + ) + i += 1 + if i > num_retries: + hdbg.dfatal( + f"Couldn't delete {dir_} after {num_retries} attempts ({str(e)})" + ) + else: + time.sleep(num_secs_retry) + else: + # Unforeseen error: just propagate it. + raise e + + +def backup_file_or_dir_if_exists(path: str) -> None: + """ + Create a timestamped backup of a file or directory if it exists. + + If the path exists, it is moved to a new location with a timestamp + appended to the name (e.g., path.20231003_080000.backup). + + :param path: path to the file or directory to back up + """ + if not os.path.exists(path): + # Nothing to back up. + return + _LOG.warning("Path '%s' already exists: making a backup", path) + # Get current timestamp. + timestamp = datetime.datetime.now() + timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S") + # Build backup path. + backup_path = f"{path}.{timestamp_str}.backup" + # Move the file or directory to backup. + shutil.move(path, backup_path) + _LOG.info("Backed up '%s' -> '%s'", path, backup_path) + + +def dassert_is_valid_file_name(file_name: str) -> None: + hdbg.dassert_isinstance(file_name, str) + hdbg.dassert_ne(file_name, "") + + +# TODO(gp): Don't use default incremental. +def create_enclosing_dir(file_name: str, incremental: bool = False) -> str: + """ + Create the dir enclosing file_name, if needed. + + :param incremental: same meaning as in `create_dir()` + """ + _LOG.debug(hprint.to_str("file_name incremental")) + dassert_is_valid_file_name(file_name) + # hs3.dassert_is_not_s3_path(file_name) + # + dir_name = os.path.dirname(file_name) + _LOG.debug(hprint.to_str("dir_name")) + if dir_name != "": + _LOG.debug( + "Creating dir_name='%s' for file_name='%s'", dir_name, file_name + ) + create_dir(dir_name, incremental=incremental) + hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) + return dir_name + + +# ############################################################################# +# File. +# ############################################################################# + + +# TODO(saggese): We should have `lines` first since it is an input param. +# TODO(Nikola): Remove `use_gzip` and use `file_name` extension instead. +def to_file( + file_name: str, + txt: str, + use_gzip: bool = False, + mode: Optional[str] = None, + force_flush: bool = False, +) -> None: + """ + Write the content of txt into file_name, creating the enclosing directory + if needed. + + :param file_name: name of written file + :param txt: content of the file + :param use_gzip: whether the file should be compressed as gzip + :param mode: file writing mode + :param force_flush: whether to forcibly clear the file buffer + """ + _LOG.debug(hprint.to_str("file_name use_gzip mode force_flush")) + dassert_is_valid_file_name(file_name) + hdbg.dassert_isinstance(txt, str) + # Choose default writing mode based on compression. + if mode is None: + if use_gzip: + # Override default binary mode for `gzip`. + mode = "wt" + else: + mode = "w" + # Create the enclosing dir, if needed. + create_enclosing_dir(file_name, incremental=True) + if use_gzip: + # Check if user provided correct file name. + if not file_name.endswith(("gz", "gzip")): + _LOG.warning("The provided file extension is not for a gzip file.") + # Open gzipped file. + f = gzip.open(file_name, mode) + else: + # Open regular text file. + # buffering = 0 if mode == "a" else -1 + buffering = 0 if force_flush else -1 + f = open( # pylint: disable=consider-using-with,assignment + file_name, mode, buffering=buffering + ) + # Write file contents. + f.write(txt) # type: ignore + f.close() + # Clear internal buffer of the file. + if force_flush: + f.flush() + os.fsync(f.fileno()) + + +def _raise_file_decode_error(error: Exception, file_name: str) -> None: + """ + Raise UnicodeDecodeError with detailed error message. + + :param error: raised UnicodeDecodeError + :param file_name: name of read file that raised the exception + """ + msg = [] + msg.append(f"error={error}") + msg.append(f"file_name='{file_name}'") + msg_as_str = "\n".join(msg) + _LOG.error(msg_as_str) + raise RuntimeError(msg_as_str) + + +def from_file( + file_name: str, + *, + encoding: Optional[Any] = None, +) -> str: + """ + Read contents of a file as string. + + :param file_name: path to .txt,.gz or .pq file + :param encoding: encoding to use when reading the string + :return: contents of file as string + """ + dassert_is_valid_file_name(file_name) + hdbg.dassert_path_exists(file_name) + data: str = "" + if file_name.endswith((".gz", ".gzip")): + # Open gzipped file. + f = gzip.open(file_name, "rt", encoding=encoding) + else: + # Open regular text file. + f = open( # pylint: disable=consider-using-with + file_name, "r", encoding=encoding + ) + try: + # Read data. + data = f.read() + except UnicodeDecodeError as e: + # Raise unicode decode error message. + _raise_file_decode_error(e, file_name) + finally: + f.close() + hdbg.dassert_isinstance(data, str) + return data + + +# TODO(gp): Use hintro.format_size +def get_size_as_str(file_name: str) -> str: + if os.path.exists(file_name): + size_in_bytes = os.path.getsize(file_name) + if size_in_bytes < (1024**2): + size_in_kb = size_in_bytes / 1024.0 + res = "%.1f KB" % size_in_kb + elif size_in_bytes < (1024**3): + size_in_mb = size_in_bytes / (1024.0**2) + res = "%.1f MB" % size_in_mb + else: + size_in_gb = size_in_bytes / (1024.0**3) + res = "%.1f GB" % size_in_gb + else: + res = "nan" + return res + + +def remove_extension( + filename: str, + extension: str, + *, + check_file_exists: bool = False, + check_has_extension: bool = True, +) -> Optional[str]: + """ + Attempt to remove `extension` from `filename`. + + :param filename: str filename + :param extension: file extension starting with a dot. E.g., ".csv" + :return: filename without `extension`, if applicable, else returns `None`. + """ + hdbg.dassert_isinstance(filename, str) + hdbg.dassert(filename) + if check_file_exists: + hdbg.dassert_file_exists(filename) + # + hdbg.dassert_isinstance(extension, str) + hdbg.dassert( + extension.startswith("."), + "Filename extension=`%s` expected to start with `.`", + extension, + ) + # + ret: Optional[str] = None + if check_has_extension: + hdbg.dassert( + filename.endswith(extension), + "Filename '%s' doesn't have extension=`%s`", + filename, + extension, + ) + if filename.endswith(extension): + ret = filename[: -len(extension)] + return ret + + +# TODO(gp): @all Use msg in all uses of this script `jackpyc "create_executable"` +# TODO(gp): `file_name` should go last. +def create_executable_script( + file_name: str, content: str, *, msg: str = "" +) -> None: + # Write the file. + hdbg.dassert_isinstance(content, str) + to_file(file_name, content) + # Make it executable. + cmd = "chmod +x " + file_name + hsystem.system(cmd) + if msg: + print(f"# {msg}:\n> {file_name}") + + +def add_suffix_to_filename( + file_name: str, + suffix: Union[int, str], + *, + before_extension: bool = True, + with_underscore: bool = True, +) -> str: + """ + Add a suffix to a file name, with or without changing the extension. + + E.g., {base_name}.{ext} -> {file_name}.{suffix}.{ext} + + :param file_name: file name to modify + :param suffix: index to add to the file name + :param before_extension: whether to insert the index before the file + extension + :param with_underscore: whether to separate the index with an + underscore + :return: modified file name with an index + """ + suffix = str(suffix) + if with_underscore: + suffix = "_" + suffix + _LOG.debug(hprint.to_str("suffix")) + # + if before_extension: + # Add the suffix to the file name before the extension. + data = file_name.rsplit(".", 1) + if len(data) == 1: + # E.g., `system_log_dir` -> `system_log_dir_1` + ret = file_name + suffix + else: + # E.g., `dir/file.txt` -> `dir/file_1.txt`. + hdbg.dassert_eq(len(data), 2, "Invalid file_name='%s'", file_name) + file_name_no_ext, ext = data + ret = file_name_no_ext + suffix + "." + ext + else: + # Add the suffix after the name of the file. + # E.g., `dir/file.txt` -> `dir/file.txt_1`. + ret = file_name + suffix + _LOG.debug(hprint.to_str("ret")) + return ret + + +def rename_file_if_exists( + file_path: str, + suffix: str, + *, + before_extension: bool = True, +) -> None: + """ + Rename a file if it exists using provided suffix. + + Used to avoid overwriting if writing multiple files with the same name. + + :param file_path: a file path to modify + :param suffix: index to add to the file name + :param before_extension: whether to insert the suffix before the file extension + - if True, {file_path}.{ext} -> {file_path}.{suffix}.{ext} + - if False, {file_path}.{ext} -> {file_path}.{ext}.{suffix} + """ + if os.path.exists(file_path): + # Add a suffix to a file name. + if before_extension: + # Add a suffix before an extension, e.g., `file.suffix.csv`. + dir_path, file_name = os.path.split(file_path) + file_name, ext = os.path.splitext(file_name) + hdbg.dassert(ext.startswith("."), "Invalid extension='%s'", ext) + new_file_path = f"{file_name}.{suffix}{ext}" + new_file_path = os.path.join(dir_path, new_file_path) + else: + # Add a suffix after an extension, e.g., `file.csv.suffix`. + new_file_path = f"{file_path}.{suffix}" + hdbg.dassert_path_not_exists(new_file_path) + _LOG.debug("renaming %s to %s", file_path, new_file_path) + os.rename(file_path, new_file_path) + + +def change_file_extension(file_path: str, new_extension: str) -> str: + """ + Change the extension of a file path. + + :param file_path: The path of the file to change the extension of. + :param new_extension: The new extension to use, starting with `.` + :return: The new file path with the new extension. + """ + # Make sure the new extension starts with a dot + hdbg.dassert( + new_extension.startswith("."), "Invalid extension='%s'", new_extension + ) + # Split the file path into root and extension + file_name, _ = os.path.splitext(file_path) + # Create the new file path + new_file_path = file_name + new_extension + return new_file_path + + +def wait_for_file( + file_path: str, + *, + check_interval_in_secs: float = 0.5, + timeout_in_secs: int = 10, +) -> None: + """ + Wait until a specified file is generated or until the timeout is reached. + + :param file_path: The path of the file to wait for. + :param check_interval_in_secs: Time in seconds between checks + :param timeout_in_secs: Maximum time to wait for the file in seconds + """ + _LOG.debug("Waiting for file: %s", file_path) + start_time = time.time() + while not os.path.exists(file_path): + if time.time() - start_time > timeout_in_secs: + raise ValueError(f"Timeout reached. File not found: {file_path}") + time.sleep(check_interval_in_secs) + _LOG.debug("File generated: %s", file_path) + + +# ############################################################################# +# JSON +# ############################################################################# + + +def serialize_custom_types_for_json_encoder(obj: Any) -> Any: + """ + Serialize DataFrame and other objects for JSON. + + E.g. dataframe {"A": [0, 1], "B": [0, 1]} will go to a list of dictionaries: + [{"A": 0, "B": 0}, {"A": 1, "B": 1}] - each dictionary is for one row. + """ + import numpy as np + import pandas as pd + + result = None + if isinstance(obj, pd.DataFrame): # type: ignore + result = obj.to_dict("records") + elif isinstance(obj, pd.Series): # type: ignore + result = obj.to_dict() + elif isinstance(obj, np.int64): # type: ignore + result = int(obj) + elif isinstance(obj, np.float64): # type: ignore + result = float(obj) + elif isinstance(obj, uuid.UUID): + result = str(obj) + elif isinstance(obj, datetime.date): + result = obj.isoformat() + elif isinstance(obj, type(pd.NaT)): + result = None + elif isinstance(obj, type(pd.NA)): + result = None + else: + raise TypeError(f"Can not serialize {obj} of type {type(obj)}") + return result + + +def to_json(file_name: str, obj: dict, *, use_types: bool = False) -> None: + """ + Write an object into a JSON file. + + :param obj: data for writing + :param file_name: name of file + :param use_types: whether to use jsonpickle to save the file + """ + if not file_name.endswith(".json"): + _LOG.warning("The file '%s' doesn't end in .json", file_name) + # Create dir. + dir_name = os.path.dirname(file_name) + if dir_name != "" and not os.path.isdir(dir_name): + create_dir(dir_name, incremental=True) + # Write data as JSON. + with open(file_name, "w") as outfile: + if use_types: + # Use jsonpickle to save types. + import jsonpickle # type: ignore[import-untyped] + + txt = jsonpickle.encode(obj, indent=4) + outfile.write(txt) + else: + json.dump( + obj, + outfile, + indent=4, + default=serialize_custom_types_for_json_encoder, + ) + + +def from_json(file_name: str, *, use_types: bool = False) -> Dict: + """ + Read object from JSON file. + + :param file_name: name of file + :param use_types: whether to use jsonpickle to load the file + :return: dict with data + """ + hdbg.dassert(file_name) + if not file_name.endswith(".json"): + _LOG.warning("The file '%s' doesn't end in .json", file_name) + # Read file as text. + hdbg.dassert_file_exists(file_name) + txt = from_file(file_name) + # Remove comments (which are not supported natively by JSON). + txt_tmp = [] + for line in txt.split("\n"): + if re.match(r"^\s*#", line): + continue + txt_tmp.append(line) + txt_tmp = "\n".join(txt_tmp) + _LOG.debug("txt_tmp=\n%s", txt_tmp) + # Convert text into Python data structures. + data = {} + if use_types: + import jsonpickle # type: ignore + + data = jsonpickle.decode(txt_tmp) + else: + data = json.loads(txt_tmp) + return data + + +# TODO(gp): -> pandas_helpers.py +def load_df_from_json(path_to_json: str) -> "pd.DataFrame": # noqa: F821 # type: ignore + """ + Load a dataframe from a json file. + + :param path_to_json: path to the json file + :return: + """ + import pandas as pd + + # Load the dict with the data. + data = from_json(path_to_json) + # Preprocess the dict to handle arrays with different length. + data = {k: pd.Series(v) for k, v in data.items()} + # Package into a dataframe. + df = pd.DataFrame(data) + return df + + +# ############################################################################# +# Directory operations +# ############################################################################# + +# Copied from `hgit.py` to avoid import cycles. + + +def _find_git_root(path: str = ".") -> str: + """ + Find recursively the dir of the outermost super module. + + This function traverses the directory hierarchy upward from a specified + starting path to find the root directory of a Git repository. + It supports: + - standard git repository: where a `.git` directory exists at the root + - submodule: where repository is nested inside another, and the `.git` file contains + a `gitdir:` reference to the submodule's actual Git directory + - linked repositories: where the `.git` file points to a custom Git directory + location, such as in Git worktrees or relocated `.git` directories + + :param path: starting file system path. Defaults to the current directory (".") + :return: absolute path to the top-level Git repository directory + """ + path = os.path.abspath(path) + git_root_dir = None + while True: + git_dir = os.path.join(path, ".git") + _LOG.debug("git_dir=%s", git_dir) + # Check if `.git` is a directory which indicates a standard Git repository. + if os.path.isdir(git_dir): + # Found the Git root directory. + git_root_dir = path + break + # Check if `.git` is a file which indicates submodules or linked setups. + if os.path.isfile(git_dir): + # Using the `open()` to avoid import cycles with the `hio` module. + with open(git_dir, "r") as f: + txt = f.read() + lines = txt.split("\n") + for line in lines: + # Look for a `gitdir:` line that specifies the linked directory. + # Example: `gitdir: ../.git/modules/helpers_root`. + if line.startswith("gitdir:"): + git_dir_path = line.split(":", 1)[1].strip() + _LOG.debug("git_dir_path=%s", git_dir_path) + # Resolve the relative path to the absolute path of the Git directory. + abs_git_dir = os.path.abspath( + os.path.join(path, git_dir_path) + ) + # Traverse up to find the top-level `.git` directory. + while True: + # Check if the current directory is a `.git` directory. + if os.path.basename(abs_git_dir) == ".git": + git_root_dir = os.path.dirname(abs_git_dir) + # Found the root. + break + # Move one level up in the directory structure. + parent = os.path.dirname(abs_git_dir) + # Reached the filesystem root without finding the `.git` directory. + hdbg.dassert_ne( + parent, + abs_git_dir, + "Top-level .git directory not found.", + ) + # Continue traversing up. + abs_git_dir = parent + break + # Exit the loop if the Git root directory is found. + if git_root_dir is not None: + break + # Move up one level in the directory hierarchy. + parent = os.path.dirname(path) + # Reached the filesystem root without finding `.git`. + hdbg.dassert_ne( + parent, + path, + "No .git directory or file found in any parent directory.", + ) + # Update the path to the parent directory for the next iteration. + path = parent + return git_root_dir + + +# End copy. + + +def safe_rm_file(dir_path: str) -> None: + """ + Safely remove a file after ensuring it's within our Git client. + + This function provides a safety check to prevent accidental deletion + of files outside our Git repository. + + :param dir_path: Path to the directory to delete + :raises AssertionError: If dir_path is not within the Git client + :raises OSError: If directory doesn't exist or can't be deleted + """ + # Convert to absolute path for comparison. + dir_path = os.path.abspath(dir_path) + # Get the Git client root. + git_root = _find_git_root() + git_root = os.path.abspath(git_root) + # Ensure the directory is within our Git client. + hdbg.dassert( + dir_path.startswith(git_root), + "Directory '%s' is not within Git client root '%s'", + dir_path, + git_root, + ) + # Additional safety check: prevent deletion of Git root itself. + hdbg.dassert_ne( + dir_path, + git_root, + "Cannot delete Git client root directory '%s'", + git_root, + ) + # Verify directory exists before attempting deletion. + hdbg.dassert( + os.path.exists(dir_path), + "Directory '%s' does not exist", + dir_path, + ) + hdbg.dassert( + os.path.isdir(dir_path), + "Path '%s' is not a directory", + dir_path, + ) + # Perform the deletion. + _LOG.debug("Safely removing directory: %s", dir_path) + shutil.rmtree(dir_path) + _LOG.debug("Successfully removed directory: %s", dir_path) + + +# TODO(ai_gp): Add unit tests. +def is_subdir(dir1: str, dir2: str) -> bool: + """ + Check if `dir1` is a subdirectory of `dir2`. + + :param dir1: First directory + :param dir2: Second directory + :return: True if `dir1` is a subdirectory of `dir2`, False otherwise + """ + # Resolve to absolute and normalized paths. + abs_dir1 = os.path.abspath(dir1) + abs_dir2 = os.path.abspath(dir2) + # Get the common path prefix. + common = os.path.commonpath([abs_dir1, abs_dir2]) + # It's a subdir if they share the same common path as the parent. + return common == abs_dir2 + + +def write_file_back( + file_name: str, txt_old: List[str], txt_new: List[str] +) -> None: + """ + Write new text to file only if it differs from the old text. + + :param file_name: Path to the file to write to + :param txt_old: Original text as a list of strings + :param txt_new: New text as a list of strings + """ + # Process old text. + hdbg.dassert_list_of_strings(txt_old) + txt_as_str = "\n".join(txt_old) + # Process new text. + hdbg.dassert_list_of_strings(txt_new) + txt_new_as_str = "\n".join(txt_new) + # Write file back, if needed. + if txt_as_str != txt_new_as_str: + to_file(file_name, txt_new_as_str) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py new file mode 100644 index 000000000..d11ecbafc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py @@ -0,0 +1,880 @@ +""" +Import as: + +import helpers.hjoblib as hjoblib +""" + +import concurrent.futures +import logging +import math +import os +import pprint +import random +import sys +import traceback +from functools import wraps +from multiprocessing import Process, Queue +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import joblib +from joblib._store_backends import StoreBackendBase, StoreBackendMixin +from tqdm.autonotebook import tqdm + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.htimer as htimer +import helpers.htqdm as htqdm + +# Avoid dependency from other `helpers` modules, such as `helpers.hcache`, to +# prevent import cycles. + + +_LOG = logging.getLogger(__name__) + +# - Assume one wants to execute `n` invocations of a given `func` +# - E.g., `func(param_1), func(param_2), ..., func(param_n)` +# - Each `param` is a tuple of `*args` and `**kwargs` to apply to `func` +# - A `Workload` is composed of: +# - `workload_func`: the function to execute +# - `func_name`: the name / description of the function `func` +# - `tasks`: a list of `n` set of parameters `*args`, `**kwargs` to apply +# to the function (e.g., `param_1`, ..., `param_n`) +# - Each `Task` executes a subset of the functions +# - `Tasks` are a partition of the function invocations, i.e., each function +# invocation is executed by one and only one task +# - The `n` `Tasks` are then executed by `k` threads in parallel or serially +# - Note that a single task can correspond to processing of multiple logical +# chunks of work, because they need to be processed together or because we +# want to enforce that it is executed on a single processor +# - E.g., if we want to concatenate files we can map multiple filenames in a +# single `Task`. In this case the `Task` contains a list of filenames to +# concatenate together + +# ############################################################################# +# Task +# ############################################################################# + +# A `Task` contains the parameters to pass to the function that needs to be +# executed. +# A `Task` is represented by a tuple of `*args` and `**kwargs`, e.g., +# ``` +# args=() +# kwargs={ +# 'asset_col_name': 'asset', +# 'dst_dir': './tmp.s3_out', +# 'parquet_file_names': [ +# './tmp.s3/20220110/data.parquet', +# './tmp.s3/20220111/data.parquet', +# './tmp.s3/20220112/data.parquet'] +# } +# ``` +Task = Tuple[Tuple[Any], Dict[str, Any]] + + +# TODO(gp): @Nikola add unit tests +def split_list_in_tasks( + list_in: List[Any], + n: int, + *, + keep_order: bool = False, + num_elems_per_task: Optional[int] = None, +) -> List[List[Any]]: + """ + Split a list in tasks based on the number of threads or elements per + partition. + + :param num_elems_per_task: force each task to have the given number of elements + :param keep_order: split the list so that consecutive elements of the list + are in different tasks. This favors executing the workload in order on `n` + threads + :return: list of lists of elements, where each list can be assigned to an + execution thread + + - E.g., [a, b, c, d, e] executed on 3 threads [1, 2, 3] gives the allocation + for `keep_order=True`: + ``` + 1 -> [a, d] + 2 -> [b, e] + 3 -> [c] + ``` + - For `keep_order=False` the allocation is: + ``` + 1 -> [a, b] + 2 -> [c, d] + 3 -> [e] + ``` + - For `num_elems_per_task=3` the allocation is: + ``` + 1 -> [a, b, c] + 2 -> [d, e] + 3 -> [] + ``` + """ + hdbg.dassert_lte(1, n) + hdbg.dassert_lte(n, len(list_in), "There are fewer tasks than threads") + if keep_order: + hdbg.dassert_is( + num_elems_per_task, + None, + "Can't specify num_elems_per_task with keep_order", + ) + list_out: List[list] = [[] for _ in range(n)] + for i, elem in enumerate(list_in): + _LOG.debug("%s: %s -> %s", i, elem, i % n) + list_out[i % n].append(elem) + else: + if num_elems_per_task is None: + k = int(math.ceil(len(list_in) / n)) + else: + k = num_elems_per_task + hdbg.dassert_lte(1, k) + list_out = [list_in[i : i + k] for i in range(0, len(list_in), k)] + # Ensure that the elements are all distributed. + hdbg.dassert_eq(sum(len(l_) for l_ in list_out), len(list_in)) + return list_out + + +def apply_incremental_mode( + src_dst_file_name_map: List[Tuple[str, str]], +) -> List[Tuple[str, str]]: + """ + Apply incremental mode to a map of source to destination files. + + Often the function in a `Workload` corresponds to reading a file, processing it, + and writing the output in a file. In this case, applying the incremental mode + means removing the tuples in the src_file -> dst_file mapping where the dst file + already exists. + + :return: filtered mapping + """ + hdbg.dassert_container_type(src_dst_file_name_map, list, tuple) + # + src_dst_file_name_map_tmp = [] + for src_dst_file_name in src_dst_file_name_map: + # Parse the element of the mapping. + hdbg.dassert_eq(len(src_dst_file_name), 2) + src_file_name, dst_file_name = src_dst_file_name + _LOG.debug("%s -> %s", src_file_name, dst_file_name) + # Discard the mapping element if the destination file already exists. + hdbg.dassert_path_exists(src_file_name) + if os.path.exists(dst_file_name): + _LOG.debug("Skipping %s -> %s", src_file_name, dst_file_name) + else: + src_dst_file_name_map_tmp.append((src_file_name, dst_file_name)) + _LOG.info( + "After applying incremental mode, there are %s / %s files to process", + len(src_dst_file_name_map_tmp), + len(src_dst_file_name_map), + ) + return src_dst_file_name_map_tmp + + +def validate_task(task: Task) -> bool: + """ + Assert if `Task` is malformed, otherwise return True. + + A valid `Task` is a tuple `(*args, **kwargs)`. + """ + # A `Task` is a tuple. + hdbg.dassert_isinstance(task, tuple) + hdbg.dassert_eq(len(task), 2) + # Parse the `Task`. + args, kwargs = task + _LOG.debug("task.args=%s", pprint.pformat(args)) + hdbg.dassert_isinstance(args, tuple) + _LOG.debug("task.kwargs=%s", pprint.pformat(kwargs)) + hdbg.dassert_isinstance(kwargs, dict) + return True + + +def task_to_string(task: Task, *, use_pprint: bool = True) -> str: + hdbg.dassert(validate_task(task)) + args, kwargs = task + txt = [] + if use_pprint: + txt.append(f"args={pprint.pformat(args)}") + txt.append(f"kwargs={pprint.pformat(kwargs)}") + else: + txt.append(f"args={str(args)}") + txt.append(f"kwargs={str(kwargs)}") + txt = "\n".join(txt) + return txt + + +# ############################################################################# +# Workload +# ############################################################################# + +# A `Workload` consists of multiple executions of a function with different +# parameters represented by `Tasks`. +# Note: `joblib_helper` can be used together with caching. The workload function +# doesn't have to be the one that is cached, but it can trigger caching of function +# results in the call stack. +Workload = Tuple[ + # `func`: the function representing the workload to execute + Callable, + # `func_name`: the mnemonic name of the function, which is used for debugging + # info and for naming the directory storing the cache + # - E.g., `vltbut.get_cached_bar_data_for_date_interval` + # - Note that the `func_name` can be different than the name of `func` + # - E.g., we can call + # `vltbut.get_cached_bar_data_for_date_interval_for_interval` inside `func`, + # in order to create a cache for + # `vltbut.get_cached_bar_data_for_date_interval`, so the cache name + # should be for `vltbut.get_cached_bar_data_for_date_interval` + str, + # `tasks`: a list of (*args, **kwargs) to pass to `func` + List[Task], +] + + +def validate_workload(workload: Workload) -> bool: + """ + Assert if the `Workload` is malformed, otherwise return True. + + A valid `Workload` is a triple `(func, func_name, List[Task])`. + """ + # A valid workload` is a triple. + hdbg.dassert_isinstance(workload, tuple) + hdbg.dassert_eq(len(workload), 3) + # Parse. + workload_func, func_name, tasks = workload + # Check each component. + hdbg.dassert_callable(workload_func) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert_container_type(tasks, List, tuple) + hdbg.dassert(all(validate_task(task) for task in tasks)) + return True + + +def randomize_workload( + workload: Workload, *, seed: Optional[int] = None +) -> Workload: + validate_workload(workload) + # Parse the workload. + workload_func, func_name, tasks = workload + # Randomize `tasks`. + seed = seed or 42 + random.seed(seed) + random.shuffle(tasks) + # Build a new workload. + workload = (workload_func, func_name, tasks) + validate_workload(workload) + return workload + + +def reverse_workload( + workload: Workload, *, seed: Optional[int] = None +) -> Workload: + """ + Reverse the workload. + + Typically we generate workload in chronological order, but sometimes + we want to run from most recent data to least recent, so that we + have the results about the most recent periods first, which is what + we care most about. + """ + validate_workload(workload) + # Parse the workload. + workload_func, func_name, tasks = workload + # Reverse. + _LOG.warning("Reversing the workload as per user request") + tasks = list(reversed(tasks)) + # Build a new workload. + workload = (workload_func, func_name, tasks) + validate_workload(workload) + return workload + + +def truncate_workload( + workload: Workload, + max_num: int, +) -> Workload: + """ + Limit the workload to the first `max_num` tasks. + """ + validate_workload(workload) + # Parse the workload. + workload_func, func_name, tasks = workload + # Truncate the workload. + _LOG.warning("Considering only the first %d / %d tasks", max_num, len(tasks)) + hdbg.dassert_lte(1, max_num) + hdbg.dassert_lte(max_num, len(tasks)) + tasks = tasks[:max_num] + # Build a new workload. + workload = (workload_func, func_name, tasks) + validate_workload(workload) + return workload + + +def workload_to_string(workload: Workload, *, use_pprint: bool = True) -> str: + """ + Print the workload. + + E.g., + + ``` + workload_func=_LimeTask317_process_chunk + func_name=_LimeTask317_process_chunk + # task 1 / 3 + args=([('./tmp.s3/20220110/data.parquet', + './tmp.s3_out/./tmp.s3/20220110/data.parquet')],) + kwargs={} + # task 2 / 3 + args=([('./tmp.s3/20220111/data.parquet', + './tmp.s3_out/./tmp.s3/20220111/data.parquet')],) + kwargs={} + # task 3 / 3 + args=([('./tmp.s3/20220112/data.parquet', + './tmp.s3_out/./tmp.s3/20220112/data.parquet')],) + kwargs={} + ``` + """ + validate_workload(workload) + workload_func, func_name, tasks = workload + txt = [] + workload_func_str = getattr(workload_func, "__name__", "unknown_function") + txt.append(f"workload_func={workload_func_str}") + txt.append(f"func_name={func_name}") + for i, task in enumerate(tasks): + txt.append(f"# task {i + 1} / {len(tasks)}") + txt.append(task_to_string(task, use_pprint=use_pprint)) + txt = "\n".join(txt) + return txt + + +# ############################################################################# +# Template for functions to execute in parallel. +# ############################################################################# + +# NOTE: the workload function: +# - asserts if there is an error, since the return value is a string with a summary +# of the execution +# - doesn't have to be the function that we intend to cache + + +def _workload_function(*args: Any, **kwargs: Any) -> str: + """ + Execute the function task. + + :raises: in case of error + :return: string representing information about the cached function + execution + """ + _ = args + incremental = kwargs.pop("incremental") + num_attempts = kwargs.pop("num_attempts") + _ = incremental, num_attempts + func_output: List[str] = [] + result = "\n".join(func_output) + return result + + +def _get_workload( + # args: argparse.Namespace +) -> None: + """ + Prepare the workload using the parameters from command line. + """ + # _ = args + + +# ############################################################################# +# Layer passing information from `parallel_execute` to the function to execute +# in parallel. +# ############################################################################# + + +def get_num_executing_threads(args_num_threads: Union[str, int]) -> int: + """ + Return the number of executing threads based on the value of + `args.num_threads`. + + E.g., + - `serial` corresponds to 1 + - `-1` corresponds to all available CPUs + """ + if args_num_threads == "serial": + num_executing_threads = 1 + elif args_num_threads == -1: + # All CPUs available. + num_executing_threads = joblib.cpu_count() + else: + # Assume it's an int. + num_executing_threads = int(args_num_threads) + hdbg.dassert_lte(1, num_executing_threads) + return num_executing_threads + + +def _run_in_process(func: Callable, q: Queue, *args: Any, **kwargs: Any) -> None: + """ + Run function as a process and store output in the input Queue. + """ + _LOG.debug("pid after processify=", os.getpid()) + try: + ret = func(*args, **kwargs) + except Exception: + # Store error logs in the queue. + ex_type, ex_value, tb = sys.exc_info() + error = ex_type, ex_value, "".join(traceback.format_tb(tb)) + ret = None + else: + error = None + q.put((ret, error)) + + +# TODO(grisha): Add type hints, add unit test to understand the behavior. +# From https://gist.github.com/schlamar/2311116 +# Note that this is not going to work with joblib.parallel with +# backend="multiprocessing" returning an error +# AssertionError: daemonic processes are not allowed to have children +def processify(func): + """ + Decorator to run a function as a process. + + Be sure that every argument and the return value is *pickable*. The + created process is joined, so the code does not run in parallel. + """ + + @wraps(func) + def wrapper(*args, **kwargs): + q = Queue() + p = Process( + target=_run_in_process, args=[func] + [q] + list(args), kwargs=kwargs + ) + p.start() + ret, error = q.get() + p.join() + if error: + ex_type, ex_value, tb_str = error + message = f"{ex_value.message} (in subprocess)\n{tb_str}" + raise ex_type(message) + return ret + + return wrapper + + +def _parallel_execute_decorator( + task_idx: int, + task_len: int, + incremental: bool, + abort_on_error: bool, + num_attempts: int, + log_file: str, + # TODO(gp): Pass these parameters first. + workload_func: Callable, + func_name: str, + processify_func: bool, + task: Task, + enable_file_logging: bool, + verbose_log: bool, +) -> Any: + """ + Parameters have the same meaning as in `parallel_execute()`. + + :param abort_on_error: control whether to abort on `workload_func` function + that is failing and asserting + - If `workload_func` fails: + - if `abort_on_error=True` the exception from `workload_func` is + propagated and the return value is `None` + - if `abort_on_error=False` the exception is not propagated, but the + return value is the string representation of the exception + :param processify_func: switch to enable wrapping a function into a process + :param enable_file_logging: see same parameter in `parallel_execute()` + :param verbose_log: see same parameter in `parallel_execute()` + :return: the return value of the workload function or the exception string + """ + # Validate very carefully all the parameters. + hdbg.dassert_lte(0, task_idx) + hdbg.dassert_lt(task_idx, task_len) + hdbg.dassert_isinstance(incremental, bool) + hdbg.dassert_isinstance(abort_on_error, bool) + hdbg.dassert_lte(1, num_attempts) + hdbg.dassert_isinstance(log_file, str) + hdbg.dassert_callable(workload_func) + hdbg.dassert_isinstance(func_name, str) + hdbg.dassert(validate_task(task)) + # Redirect the logging output of each task to a different file. + # TODO(gp): This file should go in the `task_dst_dir`. + # log_to_file = True + log_to_file = False + if log_to_file: + dst_dir = os.path.dirname(os.path.abspath(log_file)) + print(dst_dir) + hio.create_dir(dst_dir, incremental=True) + file_name = os.path.join( + dst_dir, f"{func_name}.{task_idx + 1}_{task_len}.log" + ) + _LOG.warning("Logging to %s", file_name) + file_handler = logging.FileHandler(file_name) + root_logger = logging.getLogger() + root_logger.addHandler(file_handler) + # Save information about the function to be executed. + txt = [] + # `start_ts` needs to be before running the function. + start_ts = hdateti.get_current_timestamp_as_string("naive_ET") + tag = f"{task_idx + 1}/{task_len} ({start_ts})" + txt.append("\n" + hprint.frame(tag) + "\n") + txt.append(f"tag={tag}") + workload_func_str = getattr(workload_func, "__name__", "unknown_function") + txt.append(f"workload_func={workload_func_str}") + txt.append(f"func_name={func_name}") + txt.append(task_to_string(task)) + # Run the workload. + args, kwargs = task + kwargs.update({"incremental": incremental, "num_attempts": num_attempts}) + with htimer.TimedScope( + logging.DEBUG, f"Execute '{workload_func_str}'" + ) as ts: + try: + if processify_func: + _LOG.debug("Using processify") + # Wrap the function into a process to enforce de-allocating + # memory at the end of the execution (see + # CmampTask5854: Resolve backtest memory leakage). + _LOG.debug("pid before processify=%s", os.getpid()) + workload_func = processify(workload_func) + res = workload_func(*args, **kwargs) + error = False + except Exception as e: # pylint: disable=broad-except + exception = e + txt.append(f"exception='{str(e)}'") + res = None + error = True + _LOG.error("Execution failed") + # Save information about the execution of the function. + elapsed_time = ts.elapsed_time + end_ts = hdateti.get_current_timestamp_as_string("naive_ET") + # TODO(gp): -> func_result + if verbose_log: + txt.append(f"func_res=\n{hprint.indent(str(res))}") + else: + txt.append("func_res=") + txt.append(f"elapsed_time_in_secs={elapsed_time}") + txt.append(f"start_ts={start_ts}") + txt.append(f"end_ts={end_ts}") + txt.append(f"error={error}") + # Update log file. + txt = "\n".join(txt) + _LOG.debug("txt=\n%s", hprint.indent(txt)) + if enable_file_logging: + hio.to_file(log_file, txt, mode="a") + if error: + # The execution wasn't successful. + _LOG.error(txt) + if abort_on_error: + _LOG.error("Aborting since abort_on_error=%s", abort_on_error) + raise exception # noqa: F821 + _LOG.error( + "Continuing execution since abort_on_error=%s", abort_on_error + ) + res = str(exception) + else: + # The execution was successful. + pass + return res + + +# TODO(gp): Pass a `task_dst_dir` to each task so it can write there. +# This is a generalization of `experiment_result_dir` for `run_config_list` and +# `run_notebook`. +def parallel_execute( + workload: Workload, + # Options for the `parallel_execute` framework. + dry_run: bool, + num_threads: Union[str, int], + incremental: bool, + abort_on_error: bool, + num_attempts: int, + log_file: str, + *, + backend: str = "loky", + enable_file_logging: bool = True, + verbose_log: bool = False, +) -> Optional[List[Any]]: + """ + Run a workload in parallel using joblib or asyncio. + + Note: + - if `abort_on_error=True` and a task fails early, `joblib` does not return partial results + - use `enable_logging=False` to disable logging entirely (useful for large results) + - use `verbose_log=False` to keep logging enabled but skip verbose output per task + + :param workload: the workload to execute + :param dry_run: if True, print the workload and exit without executing it + :param num_threads: joblib parameter to control how many threads to use + :param incremental: parameter passed to the function to execute to control if + we want to re-execute tasks already executed or not + :param abort_on_error: when True, if one task asserts then stop executing the + workload and return the exception of the failing task + - If False, the execution continues + :param num_attempts: number of times to attempt running a function before + declaring an error + :param log_file: file used to log information about the execution + :param backend: specify the backend type (e.g., joblib `loky` or `asyncio_process_executor`) + :param enable_file_logging: if False, skip writing any log file + :param verbose_log: if True, write detailed task results to the log file + - If False, large outputs will be omitted from the log to reduce file size + :return: results from executing `func` or the exception of the failing function + """ + # Print the parameters. + _LOG.info(hprint.frame("Workload")) + # It's too verbose to print all the workload. + # print(workload_to_string(workload, use_pprint=False)) + _LOG.info( + hprint.to_str( + "dry_run num_threads incremental num_attempts abort_on_error" + ) + ) + # Parse the workload. + validate_workload(workload) + workload_func, func_name, tasks = workload + _LOG.info("Saving log info in '%s'", log_file) + _LOG.info( + "Number of executing threads=%s (%s)", + get_num_executing_threads(num_threads), + num_threads, + ) + _LOG.info("Number of tasks=%s", len(tasks)) + # + if dry_run: + file_name = "./tmp.parallel_execute.workload.txt" + workload_as_str = workload_to_string(workload, use_pprint=False) + hio.to_file(file_name, workload_as_str) + _LOG.warning("Workload saved at '%s'", file_name) + _LOG.warning("Exiting without executing workload, as per user request") + return None + # Run. + task_len = len(tasks) + tqdm_out = htqdm.TqdmToLogger(_LOG, level=logging.INFO) + tqdm_iter = tqdm( + enumerate(tasks), + total=task_len, + file=tqdm_out, + desc=f"num_threads={num_threads} backend={backend}", + ) + if backend == "threading": + # Enable wrapping a function into a process for threading backend + # to force memory de-allocation. + # TODO(Grisha): unclear if there are cases when we want to use + # `False` with `threading` backends, consider exposing to the + # interface. + # TODO(Grisha): should we enable the switch for `num_threads="serial"`? will it work? + processify_func = True + else: + processify_func = False + if num_threads == "serial": + # Execute the tasks serially. + res = [] + for task_idx, task in tqdm_iter: + _LOG.debug("\n%s", hprint.frame(f"Task {task_idx + 1} / {task_len}")) + # Execute. + res_tmp = _parallel_execute_decorator( + task_idx, + task_len, + incremental, + abort_on_error, + num_attempts, + log_file, + # + workload_func, + func_name, + processify_func, + task, + enable_file_logging, + verbose_log, + ) + res.append(res_tmp) + else: + # Execute the tasks in parallel. + num_threads = int(num_threads) + # -1 is interpreted by joblib like for all cores. + _LOG.info("Using %d threads, backend='%s'", num_threads, backend) + if backend in ("loky", "threading", "multiprocessing"): + # from joblib.externals.loky import set_loky_pickler + # set_loky_pickler('cloudpickle') + # Removed `verbose` param which causes issues in HelpersTask715. + res = joblib.Parallel(n_jobs=num_threads, backend=backend)( + joblib.delayed(_parallel_execute_decorator)( + task_idx, + task_len, + incremental, + abort_on_error, + num_attempts, + log_file, + # + workload_func, + func_name, + processify_func, + task, + enable_file_logging, + verbose_log, + ) + # We can't use `tqdm_iter` since this only shows the submission of + # the jobs but not their completion. + for task_idx, task in enumerate(tasks) + ) + elif backend in ("asyncio_threading", "asyncio_multiprocessing"): + if backend == "asyncio_threading": + executor = concurrent.futures.ThreadPoolExecutor + elif backend == "asyncio_multiprocessing": + executor = concurrent.futures.ProcessPoolExecutor + else: + raise ValueError(f"Invalid backend='{backend}'") + func = lambda args_: _parallel_execute_decorator( + args_[0], + task_len, + incremental, + abort_on_error, + num_attempts, + log_file, + # + workload_func, + func_name, + processify_func, + args_[1], + enable_file_logging, + verbose_log, + ) + args = list(enumerate(tasks)) + use_progress_bar = True + if not use_progress_bar: + # Implementation without progress bar. + with executor(max_workers=num_threads) as executor_: + res = list(executor_.map(func, args)) + else: + # Implementation with progress bar. + res = [] + with tqdm_iter as pbar: + with executor(max_workers=num_threads) as executor_: + futures = { + executor_.submit(func, arg): arg for arg in args + } + _LOG.debug("done submitting") + for future in concurrent.futures.as_completed(futures): + res_tmp = future.result() + res.append(res_tmp) + pbar.update(1) + else: + raise ValueError(f"Invalid backend='{backend}'") + _LOG.info("Saved log info in '%s'", log_file) + return res + + +# ############################################################################# +# joblib storage backend for S3. +# ############################################################################# + +# This allows to store a joblib cache on S3. + +# Adapted from https://github.com/aabadie/joblib-s3 + + +# ############################################################################# +# _S3FSStoreBackend +# ############################################################################# + + +class _S3FSStoreBackend(StoreBackendBase, StoreBackendMixin): + """ + A StoreBackend for S3 cloud storage file system. + """ + + def __init__(self) -> None: + super().__init__() + self._objs: List[Any] = [] + + def _flush(self) -> None: + _ = self + + def clear_location(self, location: str) -> None: + """ + Check if object exists in store. + """ + if self.storage.exists(location): + self._flush() + self.storage.rm(location, recursive=True) + + def _mkdirp(self, directory: str) -> None: + """ + Create recursively a directory on the S3 store. + """ + # Remove root cachedir from input directory to create as it should + # have already been created in the configure function. + if directory.startswith(self.location): + directory = directory.replace(self.location + "/", "") + current_path = self.location + for sub_dir in directory.split("/"): + current_path = os.path.join(current_path, sub_dir) + self.storage.mkdir(current_path) + + def create_location(self, location: str) -> None: + """ + Create object location on store. + """ + self._mkdirp(location) + + def get_items(self) -> List[Any]: + """ + Return the whole list of items available in cache. + """ + _ = self + return [] + + def configure( + self, + location: str, + backend_options: Dict[str, Any], + verbose: int = 0, + ) -> None: + """ + Configure the store backend. + """ + options = backend_options + hdbg.dassert_in("s3fs", options) + self.storage = options["s3fs"] + hdbg.dassert_in("bucket", options) + bucket = options["bucket"] + # Ensure the given bucket exists. + root_bucket = os.path.join("s3://", bucket) + if not self.storage.exists(root_bucket): + self.storage.mkdir(root_bucket) + if location.startswith("/"): + location.replace("/", "") + self.location = os.path.join(root_bucket, location) + if not self.storage.exists(self.location): + self.storage.mkdir(self.location) + # Computation results can be stored compressed for faster I/O. + self.compress = backend_options["compress"] + # Memory map mode is not supported. + self.mmap_mode = None + # TODO(gp): No need to flush for now. + # for fd in self._objs: + # fd.flush(force=True) + + def _open_item(self, fd: Any, mode: str) -> Any: + self._objs.append(fd) + return self.storage.open(fd, mode) + + def _item_exists(self, path: str) -> bool: + self._flush() + ret: bool = self.storage.exists(path) + return ret + + def _move_item(self, src: str, dst: str) -> None: + self.storage.mv(src, dst) + + +_REGISTER_S3FS_STORE = False + + +def register_s3fs_store_backend() -> None: + """ + Register the S3 store backend for joblib memory caching. + """ + global _REGISTER_S3FS_STORE + if not _REGISTER_S3FS_STORE: + joblib.register_store_backend("s3", _S3FSStoreBackend) + _REGISTER_S3FS_STORE = True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py new file mode 100644 index 000000000..5b8aa72aa --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py @@ -0,0 +1,383 @@ +""" +Import as: + +import helpers.hjupyter as hjupyte +""" + +import logging +import os +from typing import Dict, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hsystem as hsystem +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + + +def run_notebook( + file_name: str, + scratch_dir: str, + *, + pre_cmd: str = "", +) -> None: + """ + Run jupyter notebook. + + Assert if the notebook doesn't complete successfully. + + :param file_name: path to the notebook to run. If this is a .py + file, convert to .ipynb first + :param scratch_dir: temporary dir storing the output + :param pre_cmd: + """ + file_name = os.path.abspath(file_name) + hdbg.dassert_path_exists(file_name) + hio.create_dir(scratch_dir, incremental=True) + # Build command line. + cmd = [] + if pre_cmd: + cmd.append(f"{pre_cmd} &&") + # Convert .py file into .ipynb if needed. + root, ext = os.path.splitext(file_name) + if ext == ".ipynb": + notebook_name = file_name + elif ext == ".py": + cmd.append(f"jupytext --update --to notebook {file_name};") + notebook_name = f"{root}.ipynb" + else: + raise ValueError(f"Unsupported file format for file_name='{file_name}'") + # Execute notebook. + cmd.append(f"cd {scratch_dir} &&") + cmd.append(f"jupyter nbconvert {notebook_name}") + cmd.append("--execute") + cmd.append("--to html") + cmd.append("--ExecutePreprocessor.kernel_name=python") + # No time-out. + cmd.append("--ExecutePreprocessor.timeout=-1") + # Execute. + cmd_as_str = " ".join(cmd) + hsystem.system(cmd_as_str, abort_on_error=True, suppress_output=False) + + +def run_notebook_cells( + notebook_path: str, + dst_notebook_path: str, + *, + num_cells: Optional[int] = None, + kernel_name: str = "python3", + timeout: int = 30, +) -> None: + """ + Execute the first N cells of a notebook and save the result. + + :param notebook_path: path to the source notebook to execute + :param dst_notebook_path: path where the executed notebook will be saved + :param num_cells: number of cells to execute from the beginning; if None, + execute all cells + :param kernel_name: name of the Jupyter kernel to use + :param timeout: execution timeout in seconds per cell + """ + import nbformat + from nbconvert.preprocessors import ExecutePreprocessor + + hdbg.dassert_path_exists(notebook_path) + # Read the notebook. + _LOG.info("Reading notebook '%s'", notebook_path) + with open(notebook_path) as f: + nb = nbformat.read(f, as_version=4) + # Truncate to first N cells if requested. + total_cells = len(nb.cells) + if num_cells is not None: + hdbg.dassert_lte(1, num_cells, "num_cells must be >= 1") + hdbg.dassert_lte( + num_cells, + total_cells, + "num_cells=%d exceeds total cells=%d in notebook", + num_cells, + total_cells, + ) + _LOG.info("Executing first %d of %d cells", num_cells, total_cells) + nb.cells = nb.cells[:num_cells] + else: + _LOG.info("Executing all %d cells", total_cells) + # Execute the cells. + ep = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name) + ep.preprocess(nb) + # Save the executed notebook. + _LOG.info("Saving executed notebook to '%s'", dst_notebook_path) + with open(dst_notebook_path, "w") as f: + nbformat.write(nb, f) + + +def build_run_notebook_cmd( + config_builder: str, + dst_dir: str, + notebook_path: str, + *, + extra_opts: str = "", +) -> str: + """ + Construct a command string to run dev_scripts/notebooks/run_notebook.py + with specified configurations. + + :param config_builder: the configuration builder to use for the + notebook execution + :param dst_dir: the destination directory where the notebook results + will be saved + :param notebook_path: the path to the notebook that should be + executed + :param extra_opts: options for "run_notebook.py", e.g., "-- + publish_notebook" + """ + # Importing inside func to avoid error while creating dockerized executable. + # TODO(Shaunak): debug why. + import helpers.hgit as hgit + + # TODO(Vlad): Factor out common code with the + # `helpers.lib_tasks_gh.publish_buildmeister_dashboard_to_s3()`. + run_notebook_script_path = hgit.find_file_in_git_tree("run_notebook.py") + cmd_run_txt = [ + run_notebook_script_path, + f"--notebook {notebook_path}", + f"--config_builder '{config_builder}'", + f"--dst_dir '{dst_dir}'", + f"{extra_opts}", + ] + cmd_run_txt = " ".join(cmd_run_txt) + return cmd_run_txt + + +# ############################################################################# + + +def find_paired_files( + directory: str, + *, + pattern: str = "*.py", + exclude_pattern: str = None, +) -> tuple: + """ + Find Python files and paired Jupyter notebooks in a directory. + + :param directory: path to the directory to search + :param pattern: glob pattern for Python files (default: "*.py") + :param exclude_pattern: suffix pattern to exclude (e.g., "_utils.py") + :return: tuple of (python_files, paired_notebooks, unpaired_notebooks) + - python_files: list of .py files matching pattern + - paired_notebooks: list of .ipynb files with corresponding .py + - unpaired_notebooks: list of .ipynb files without corresponding .py + """ + hdbg.dassert_path_exists(directory) + # Find Python files matching pattern. + py_files = hio.listdir( + directory, + pattern, + only_files=True, + use_relative_paths=False, + maxdepth=1, + ) + # Exclude files matching exclude_pattern. + if exclude_pattern: + py_files = [f for f in py_files if not f.endswith(exclude_pattern)] + py_files = sorted(py_files) + # Find notebook files. + nb_pattern = pattern.replace(".py", ".ipynb") + nb_files = hio.listdir( + directory, + nb_pattern, + only_files=True, + use_relative_paths=False, + maxdepth=1, + ) + nb_files = sorted(nb_files) + # Build set of base names from Python files. + py_basenames = set() + for py_file in py_files: + basename = os.path.basename(py_file) + basename = os.path.splitext(basename)[0] + py_basenames.add(basename) + # Check which notebooks have corresponding .py files. + paired_notebooks = [] + unpaired_notebooks = [] + for nb_file in nb_files: + basename = os.path.basename(nb_file) + basename = os.path.splitext(basename)[0] + if basename in py_basenames: + paired_notebooks.append(nb_file) + else: + unpaired_notebooks.append(nb_file) + return py_files, paired_notebooks, unpaired_notebooks + + +def execute_file_with_docker( + file_path: str, + *, + working_dir: str, + is_notebook: bool, +) -> Tuple[bool, str, float]: + """ + Execute a Python file or notebook using docker_cmd. + + :param file_path: path to the file to execute + :param working_dir: directory to cd into before execution + :param is_notebook: True if file is a notebook, False if Python script + :return: tuple of (success, error_message, elapsed_time) + """ + timer = htimer.Timer() + success = False + error_msg = "" + try: + if is_notebook: + # For notebooks, use hjupyter.run_notebook via docker_cmd. + scratch_dir = os.path.join(working_dir, "tmp.notebook_scratch") + # Build Python command to run notebook. + cmd = ( + f'python -c "' + f"import helpers.hjupyter as hjupyte; " + f"import helpers.hio as hio; " + f"hio.create_dir('{scratch_dir}', incremental=True); " + f"hjupyte.run_notebook('{file_path}', '{scratch_dir}')\"" + ) + else: + # For Python scripts, execute directly. + cmd = f"python {file_path}" + # Build invoke docker_cmd command. + docker_cmd = f'invoke docker_cmd --cmd "{cmd}"' + # Execute in the working directory. + hsystem.system( + docker_cmd, + abort_on_error=False, + suppress_output=False, + ) + success = True + except Exception as e: + error_msg = str(e) + elapsed = timer.get_elapsed() + return success, error_msg, elapsed + + +def execute_file_directly( + file_path: str, + *, + working_dir: str, + is_notebook: bool, +) -> Tuple[bool, str, float]: + """ + Execute a Python file or notebook directly (inside container). + + :param file_path: path to the file to execute + :param working_dir: directory to cd into before execution + :param is_notebook: True if file is a notebook, False if Python script + :return: tuple of (success, error_message, elapsed_time) + """ + timer = htimer.Timer() + success = False + error_msg = "" + try: + if is_notebook: + # For notebooks, use hjupyter.run_notebook. + scratch_dir = os.path.join(working_dir, "tmp.notebook_scratch") + hio.create_dir(scratch_dir, incremental=True) + run_notebook( + file_path, + scratch_dir, + pre_cmd=f"cd {working_dir}", + ) + else: + # For Python scripts, execute directly. + cmd = f"cd {working_dir} && python {file_path}" + hsystem.system( + cmd, + abort_on_error=True, + suppress_output=False, + ) + success = True + except Exception as e: + error_msg = str(e) + elapsed = timer.get_elapsed() + return success, error_msg, elapsed + + +def report_execution_results( + py_results: Dict[str, Tuple[bool, str, float]], + nb_results: Dict[str, Tuple[bool, str, float]], +) -> Tuple[int, str]: + """ + Report execution results and return failure information. + + :param py_results: results from Python file execution + :param nb_results: results from notebook execution + :return: tuple of (total_failures, error_message) + """ + # Collect failures. + py_failures = [f for f, (success, _, _) in py_results.items() if not success] + nb_failures = [f for f, (success, _, _) in nb_results.items() if not success] + # Calculate statistics. + py_total = len(py_results) + py_success = py_total - len(py_failures) + nb_total = len(nb_results) + nb_success = nb_total - len(nb_failures) + total_files = py_total + nb_total + total_success = py_success + nb_success + total_failures = len(py_failures) + len(nb_failures) + # Calculate timing statistics. + py_times = [elapsed for _, _, elapsed in py_results.values()] + nb_times = [elapsed for _, _, elapsed in nb_results.values()] + py_total_time = sum(py_times) if py_times else 0.0 + nb_total_time = sum(nb_times) if nb_times else 0.0 + total_time = py_total_time + nb_total_time + # Report summary. + _LOG.info("=" * 80) + _LOG.info("EXECUTION SUMMARY") + _LOG.info("=" * 80) + _LOG.info( + "Python scripts: %d total, %d success, %d failed", + py_total, + py_success, + len(py_failures), + ) + if py_total > 0: + _LOG.info(" Total time: %.2f seconds", py_total_time) + _LOG.info(" Average time: %.2f seconds", py_total_time / py_total) + _LOG.info( + "Notebooks: %d total, %d success, %d failed", + nb_total, + nb_success, + len(nb_failures), + ) + if nb_total > 0: + _LOG.info(" Total time: %.2f seconds", nb_total_time) + _LOG.info(" Average time: %.2f seconds", nb_total_time / nb_total) + _LOG.info("-" * 80) + _LOG.info( + "TOTAL: %d files, %d success, %d failed", + total_files, + total_success, + total_failures, + ) + _LOG.info("Total execution time: %.2f seconds", total_time) + # Build error message if failures exist. + error_message = "" + if total_failures > 0: + _LOG.error("=" * 80) + _LOG.error("FAILURES DETECTED") + _LOG.error("=" * 80) + if py_failures: + _LOG.error("Failed Python scripts:") + for file_path in py_failures: + basename = os.path.basename(file_path) + _, error, _ = py_results[file_path] + _LOG.error(" - %s: %s", basename, error) + if nb_failures: + _LOG.error("Failed notebooks:") + for file_path in nb_failures: + basename = os.path.basename(file_path) + _, error, _ = nb_results[file_path] + _LOG.error(" - %s: %s", basename, error) + _LOG.error("=" * 80) + error_message = ( + f"{total_failures} file(s) failed to execute. See log for details." + ) + return total_failures, error_message diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py new file mode 100644 index 000000000..5e0ec6214 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py @@ -0,0 +1,334 @@ +""" +Import as: + +import helpers.hlatex as hlatex +""" + +import logging +import re +from typing import List, Optional + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hmarkdown_headers as hmarhead +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Consider using `pypandoc` instead of calling `pandoc` directly. +# https://boisgera.github.io/pandoc + + +# TODO(gp): Add a switch to keep the tmp files or delete them. +def convert_pandoc_md_to_latex(txt: str) -> str: + """ + Run pandoc to convert a markdown file to a latex file. + """ + hdbg.dassert_isinstance(txt, str) + # Save to tmp file. + in_file_name = "./tmp.run_pandoc_in.md" + hio.to_file(in_file_name, txt) + # Run Pandoc. + out_file_name = "./tmp.run_pandoc_out.tex" + cmd = ( + f"pandoc {in_file_name} -o {out_file_name} --read=markdown --write=latex" + ) + container_type = "pandoc_only" + + # To minimze the dependency. + import dev_scripts_helpers.dockerize.lib_pandoc as dshdlipa + + dshdlipa.run_dockerized_pandoc(cmd, container_type) + # Read tmp file. + res = hio.from_file(out_file_name) + # Remove lines that contain \tightlist. + res = "\n".join( + [line for line in res.splitlines() if "\\tightlist" not in line] + ) + return res + + +def markdown_list_to_latex(markdown: str) -> str: + """ + Convert a Markdown list to LaTeX format. + + :param markdown: The Markdown text to convert + :return: The converted LaTeX text + """ + hdbg.dassert_isinstance(markdown, str) + markdown = hprint.dedent(markdown) + # Remove the first line if it's a title. + markdown_lines = markdown.split("\n") + m = re.match(r"^(\*+ )(.*)", markdown_lines[0]) + if m: + title = m.group(2) + markdown_lines = markdown_lines[1:] + else: + title = "" + markdown = "\n".join(markdown_lines) + # Convert. + txt = convert_pandoc_md_to_latex(markdown) + # Remove `\tightlist` and empty lines. + lines = txt.splitlines() + lines = [line for line in lines if "\\tightlist" not in line] + lines = [line for line in lines if line.strip() != ""] + txt = "\n".join(lines) + # Add the title frame. + if title: + txt = f"\\begin{{frame}}{{{title}}}" + "\n" + txt + "\n" + "\\end{frame}" + return txt + + +def remove_latex_formatting(latex_string: str) -> str: + r""" + Remove LaTeX formatting such as \textcolor{color}{content} and retains only + the content. + """ + cleaned_string = re.sub( + r"\\textcolor\{[^}]*\}\{([^}]*)\}", r"\1", latex_string + ) + return cleaned_string + + +def format_latex(txt: str) -> str: + """ + Format LaTeX text using `prettier`. + + :param txt: input LaTeX text to format + :return: formatted LaTeX text + """ + file_type = "tex" + # To minimize the dependency. + import dev_scripts_helpers.dockerize.lib_prettier as dshdlipr + + txt = dshdlipr.prettier_on_str(txt, file_type) + return txt + + +# ############################################################################# +# Frame Latex sections +# ############################################################################# + + +def _is_latex_line_separator(line: str, *, min_repeats: int = 5) -> bool: + """ + Check if the given line is a LaTeX comment separator. + + This function determines if a line consists of a comment character + `%` followed by repeated characters (`#`, `=`, `-`) that would + indicate a section separator. + + :param line: current line of text being processed + :param min_repeats: minimum number of times the characters have to + be repeated to be considered a separator + :return: whether the line is a separator + """ + separator_pattern = rf""" + ^\s*%\s* # % + ([#=\-])\1{{{min_repeats - 1},}} # Capture a character, then repeat it + # (`min_repeats` - 1) times. + \s*$ # Match only whitespace characters + # until the end of the line. + """ + res = bool(re.match(separator_pattern, line, re.VERBOSE)) + return res + + +def frame_sections(lines: List[str]) -> List[str]: + r""" + Add line separators before LaTeX section commands. + + This function adds comment separators before \section, \subsection, and + \subsubsection commands in LaTeX files. The separators are: + ``` + % #####... + \section + + % =====... + \subsection: + + % -----... + \subsubsection + ``` + + If a separator comment already exists immediately before the section command, + no separator is added. + + :param lines: list of strings representing the LaTeX file content + :return: list of strings with separators added before section commands + """ + hdbg.dassert_isinstance(lines, list) + # Loop 1: Remove existing latex separators. + txt_tmp: List[str] = [] + for line in lines: + if not _is_latex_line_separator(line): + txt_tmp.append(line) + # Loop 2: Remove consecutive empty lines, leaving only one. + txt_tmp2: List[str] = [] + prev_was_empty = False + for line in txt_tmp: + is_empty = line.strip() == "" + if is_empty: + if not prev_was_empty: + txt_tmp2.append(line) + prev_was_empty = True + else: + txt_tmp2.append(line) + prev_was_empty = False + # Loop 3: Add correct LaTeX separator based on section commands. + txt_new: List[str] = [] + # Define the section patterns and their corresponding separators. + # Total line length is 80 characters, "% " is 2 characters, so 78 separator chars. + prefix = "% " + section_patterns = [ + (r"^\\section\{", prefix + "#" * 78), + (r"^\\subsection\{", prefix + "=" * 78), + (r"^\\subsubsection\{", prefix + "-" * 78), + ] + for i, line in enumerate(txt_tmp2): + _LOG.debug("line=%d:%s", i, line) + txt_processed = False + # Check if the line matches any section command. + for pattern, separator in section_patterns: + m = re.match(pattern, line.strip()) + if m: + _LOG.debug(" -> Found section command") + txt_new.append(separator) + _LOG.debug(" -> Added separator: %s", separator) + txt_new.append(line) + txt_processed = True + break + if not txt_processed: + txt_new.append(line) + hdbg.dassert_isinstance(txt_new, list) + return txt_new + + +# ############################################################################# +# LaTeX Header Extraction +# ############################################################################# + + +def _is_latex_comment(line: str) -> bool: + r""" + Check if a line is a LaTeX comment. + + A LaTeX comment line starts with the `%` character. This function + handles the edge case where `%` is escaped (e.g., `\%`), which + should not be treated as a comment. + + :param line: line of text to check + :return: True if the line is a comment, False otherwise + """ + hdbg.dassert_isinstance(line, str) + # Strip leading whitespace to check the first non-whitespace character. + stripped_line = line.lstrip() + # Check if line starts with %. + if not stripped_line.startswith("%"): + return False + # Check if the % is escaped by looking at the character before it in the + # original line. + # Find the position of % in the original line. + percent_pos = line.find("%") + # If there's a character before %, check if it's a backslash. + if percent_pos > 0 and line[percent_pos - 1] == "\\": + # Check if the backslash itself is escaped. + if percent_pos > 1 and line[percent_pos - 2] == "\\": + # Double backslash before %, so % is not escaped. + return True + # Single backslash before %, so % is escaped. + return False + # % is at the beginning or has no backslash before it. + return True + + +def _extract_latex_section( + line: str, line_number: int +) -> Optional[hmarhead.HeaderInfo]: + r""" + Parse a LaTeX section command and extract section information. + + This function identifies LaTeX section commands (\section{}, \subsection{}, + \subsubsection{}) and extracts the section title. It handles several edge + cases including: + - Regex parsing of `\section[Short]{Long Title}` (extracts "Long Title") + - Handles nested braces within titles (e.g., `\section{Intro to \textbf{ML}}`) + - Does not handle multi-line section titles + + :param line: line of text to parse + :param line_number: line number in the original file + :return: HeaderInfo object if section found, None otherwise + """ + hdbg.dassert_isinstance(line, str) + hdbg.dassert_isinstance(line_number, int) + # Define section patterns with their corresponding levels. + # Pattern supports optional [short title] before {long title}. + regex = r"(?:\[.*?\])?\{(.*)\}" + section_patterns = [ + (r"\\section" + regex, 1), + (r"\\subsection" + regex, 2), + (r"\\subsubsection" + regex, 3), + ] + line_stripped = line.strip() + # Try to match each section pattern. + for pattern, level in section_patterns: + # Check if line starts with the section command. + match = re.match(pattern, line_stripped) + if match: + # Extract the title from the first capture group. + title = match.group(1) + # Skip sections with empty titles. + if not title: + return None + # Return HeaderInfo with level, title, and line number. + return hmarhead.HeaderInfo(level, title, line_number) + # No section command found. + return None + + +def extract_headers_from_latex( + lines: List[str], max_level: int, *, sanity_check: bool = True +) -> hmarhead.HeaderList: + r""" + Extract headers from a LaTeX file and return a HeaderList. + + This function processes a LaTeX file line by line, identifies section + commands (\section, \subsection, \subsubsection), and creates a list + of HeaderInfo objects. It skips commented-out lines (lines starting + with %) and only includes headers up to the specified maximum level. + + :param lines: content of the input LaTeX file as list of strings + :param max_level: maximum header levels to parse (e.g., '3' parses + \section, \subsection, and \subsubsection, but not deeper levels) + :param sanity_check: whether to check that the header list is valid + using the same validation as Markdown headers + :return: list of HeaderInfo objects, each containing (level, title, + line_number), e.g.: + ``` + [ + HeaderInfo(1, "Introduction", 5), + HeaderInfo(2, "Background", 10), + ... + ] + ``` + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_lte(1, max_level) + header_list: hmarhead.HeaderList = [] + # Process the input file to extract headers. + for line_number, line in enumerate(lines, start=1): + # Skip LaTeX comment lines. + if _is_latex_comment(line): + continue + # Check if this line contains a section command. + header_info = _extract_latex_section(line, line_number) + if header_info and header_info.level <= max_level: + # Add HeaderInfo to list. + header_list.append(header_info) + # Check the header list. + if sanity_check: + hmarhead.sanity_check_header_list(header_list) + else: + _LOG.debug("Skipping sanity check") + hdbg.dassert_isinstance(header_list, list) + return header_list diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py new file mode 100644 index 000000000..8f857d385 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py @@ -0,0 +1,29 @@ +""" +Linting utilities for text and code files. + +Import as: + +import helpers.hlint as hlint +""" + +import logging + +import helpers.hgit as hgit +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def lint_file(file_path: str) -> None: + """ + Run lint_txt.py on the file to ensure proper formatting. + + :param file_path: path to the file to lint + """ + _LOG.info("Linting file: %s", file_path) + lint_script = hgit.find_file_in_git_tree("lint_txt.py", super_module=True) + # Run lint_txt.py. + cmd = f"{lint_script} -i {file_path} -v CRITICAL" + _LOG.debug("Running command: %s", cmd) + hsystem.system(cmd, suppress_output=True) + _LOG.info("File linted successfully: %s", file_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py new file mode 100644 index 000000000..c13ed1255 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py @@ -0,0 +1,78 @@ +""" +Import as: + +import helpers.hlist as hlist +""" + +from typing import Any, List, Optional, Set + +import helpers.hdbg as hdbg + + +# TODO(gp): -> return_single_element, return_single_element_or_assert? +def assert_single_element_and_return(list_: List[Any]) -> Any: + """ + Assert that the passed list has a single element and return that single + element. + + :return: return the unique element in the list + """ + hdbg.dassert_isinstance(list_, list) + hdbg.dassert_eq(len(list_), 1, "List has %d elements!", len(list_)) + return list_[0] + + +def find_duplicates(list_: List[Any]) -> List[Any]: + """ + Find the elements duplicated in a list. + """ + hdbg.dassert_isinstance(list_, list) + # Count the occurrences of each element of the seq. + set_l = set(list_) + v_to_num = [(v, list_.count(v)) for v in set_l] + # Build list of elems with duplicates. + res = [v for v, n in v_to_num if n > 1] + return res + + +def remove_duplicates(list_: List[Any]) -> List[Any]: + """ + Remove the elements duplicated in a list, without changing the order. + """ + hdbg.dassert_isinstance(list_, list) + list_out = [] + set_l: Set[Any] = set() + for v in list_: + if v not in set_l: + set_l.add(v) + list_out.append(v) + return list_out + + +def extract( + list_: List[Any], start_idx: Optional[int], end_idx: Optional[int] +) -> List[Any]: + """ + Filter the list using [start_idx, end_idx). + """ + if start_idx is not None: + hdbg.dassert_lte(0, start_idx) + else: + start_idx = 0 + if end_idx is not None: + hdbg.dassert_lte(end_idx, len(list_)) + else: + end_idx = len(list_) + if list_: + hdbg.dassert_lt(start_idx, end_idx) + list_ = list_[start_idx:end_idx] + return list_ + + +def chunk(list_: List[Any], n: int) -> List[Any]: + hdbg.dassert_lte(1, n) + hdbg.dassert_lte(n, len(list_)) + k, m = divmod(len(list_), n) + return [ + list_[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n) + ] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py new file mode 100644 index 000000000..f821d4f76 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py @@ -0,0 +1,680 @@ +""" +Import as: + +import helpers.hllm as hllm +""" + +import functools +import logging +import os +import re +from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union + +import openai +import tqdm +from pydantic import BaseModel + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg +import helpers.hllm_cost as hllmcost +import helpers.hprint as hprint +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + + +# Create a generic type variable. +T = TypeVar("T", bound=BaseModel) + +# ############################################################################# +# Update LLM cache +# ############################################################################# + + +_UPDATE_LLM_CACHE = False + + +def set_update_llm_cache(update: bool) -> None: + """ + Set whether to update the LLM cache. + + :param update: True to update the cache, False otherwise + """ + global _UPDATE_LLM_CACHE + _UPDATE_LLM_CACHE = update + + +def get_update_llm_cache() -> bool: + """ + Get whether to update the LLM cache. + + :return: True if the cache should be updated, False otherwise + """ + return _UPDATE_LLM_CACHE + + +# ############################################################################# +# Utility Functions +# ############################################################################# + + +def _get_llm_provider_and_model(model: str) -> Tuple[str, str]: + """ + Get the provider and model names from a model string. + + The model can be specified as: + - "gpt-4o-mini" + - "openai/gpt-4o-mini" + - "deepseek/deepseek-r1-0528-qwen3-8b:free/" + + :param model: model to use for the completion + :return: tuple of provider name and model name + """ + if "/" in model: + if model.startswith("openai/"): + provider_name = "openai" + model = model.split("/")[1] + else: + provider_name = "openrouter" + else: + provider_name = "openai" + hdbg.dassert_in( + provider_name, + ("openai", "openrouter"), + "Unknown provider: %s", + provider_name, + ) + return provider_name, model + + +def response_to_txt(response: Any) -> str: + """ + Convert an OpenAI API response to a text string. + + :param response: API response object + :return: extracted text contents as a string + """ + if isinstance(response, openai.types.chat.chat_completion.ChatCompletion): + ret = response.choices[0].message.content + elif isinstance(response, openai.types.responses.Response): + ret = response.output_text + # elif isinstance(response, openai.pagination.SyncCursorPage): + # ret = response.data[0].content[0].text.value + elif isinstance(response, openai.types.beta.threads.message.Message): + ret = response.content[0].text.value + elif isinstance(response, str): + ret = response + elif isinstance(response, dict): + # Handle Chat Completions dict form. + if "choices" in response and "message" in response["choices"][0]: + ret = response["choices"][0]["message"]["content"] + # Handle Responses API dict form. + elif "output_text" in response: + ret = response["output_text"] + else: + raise ValueError( + f"Unknown dict structure in response: {response.keys()}" + ) + else: + raise ValueError(f"Unknown response type: {type(response)}") + hdbg.dassert_isinstance(ret, str) + return ret + + +def build_chat_completion_messages( + system_prompt: str, + user_prompt: str, + *, + images_as_base64: Optional[Tuple[str, ...]] = None, +) -> List[Dict[str, Any]]: + """ + Construct the standard messages payload for the Chat Completions API. + + :param system_prompt: system prompt + :param user_prompt: user prompt + :param images_as_base64: base64-encoded images + :return: messages in the format expected by the Chat Completions API + """ + hdbg.dassert_isinstance(system_prompt, str) + hdbg.dassert_isinstance(user_prompt, str) + ret = [{"role": "system", "content": system_prompt}] + # Build user message content. + if images_as_base64: + # Multi-modal message with text and images + user_content = [{"type": "text", "text": user_prompt}] + for image_b64 in images_as_base64: + user_content.append( + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}, + } + ) + ret.append({"role": "user", "content": user_content}) + else: + # Text-only message. + ret.append({"role": "user", "content": user_prompt}) + return ret + + +def build_responses_input( + user_prompt: str, + *, + images_as_base64: Optional[Tuple[str, ...]] = None, +) -> List[Dict[str, Any]]: + """ + Construct the user input payload for the Responses API. + + :param user_prompt: user prompt + :param images_as_base64: base64-encoded images + :return: input in the format expected by the Responses API + """ + hdbg.dassert_isinstance(user_prompt, str) + # Build user message content. + content_blocks = [{"type": "input_text", "text": user_prompt}] + if images_as_base64: + # Add image input. + for image_b64 in images_as_base64: + content_blocks.append( + { + "type": "input_image", + "image_url": f"data:image/jpeg;base64,{image_b64}", + } + ) + responses_input = [ + { + "role": "user", + "content": content_blocks, + } + ] + return responses_input + + +# ############################################################################# + + +@hcacsimp.simple_cache( + write_through=True, exclude_keys=["client", "cache_mode", "cost_tracker"] +) +def _call_api_sync( + # pylint: disable=unused-argument + # This is needed to support caching. + cache_mode: str, + client: openai.OpenAI, + user_prompt: str, + system_prompt: str, + temperature: float, + model: str, + *, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional[hllmcost.LLMCostTracker] = None, + use_responses_api: bool = False, + **create_kwargs, +) -> Dict[Any, Any]: + """ + Make a non-streaming API call. + + See `get_completion()` for other parameter descriptions. + + :param client: LLM client + :param cost_tracker: LLMCostTracker instance to track costs + :param use_responses_api: whether to use the Responses API instead + of Chat Completions + :return: OpenAI API result as a dictionary + """ + if not use_responses_api: + messages = build_chat_completion_messages( + system_prompt, user_prompt, images_as_base64=images_as_base64 + ) + completion = client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + **create_kwargs, + ) + else: + user_input = build_responses_input( + user_prompt, images_as_base64=images_as_base64 + ) + completion = client.responses.create( + model=model, + instructions=system_prompt, + input=user_input, + temperature=temperature, + **create_kwargs, + ) + completion_obj = completion.to_dict() + if isinstance(completion, openai.types.responses.Response): + # Store the output of the Responses API. + completion_obj["output_text"] = completion.output_text + if cost_tracker is not None: + # Calculate the cost of the completion. + hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) + cost = cost_tracker.calculate_cost(completion, model) + cost_tracker.accumulate_cost(cost) + # Store the cost in the completion object. + completion_obj["cost"] = cost + return completion_obj + + +@hcacsimp.simple_cache( + cache_type="pickle", + write_through=True, + exclude_keys=["client", "cache_mode", "cost_tracker"], +) +def _call_structured_api_sync( + # pylint: disable=unused-argument + # This is needed to support caching. + cache_mode: str, + client: openai.OpenAI, + model: str, + user_prompt: str, + system_prompt: str, + temperature: float, + response_format: type[T], + *, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional[hllmcost.LLMCostTracker] = None, + print_cost: bool = False, + **create_kwargs, +) -> T: + """ + Make a non-streaming structured API call. + + See `get_structured_completion()` for parameter descriptions. + + :param client: LLM client + :param response_format: expected structured output format + :return: parsed output as the specified Pydantic model + """ + user_input = build_responses_input( + user_prompt, images_as_base64=images_as_base64 + ) + response = client.responses.parse( + model=model, + instructions=system_prompt, + input=user_input, + temperature=temperature, + text_format=response_format, + **create_kwargs, + ) + # Extract the parsed output. + parsed_output: T = response.output_parsed + # Track costs. + if cost_tracker is not None: + hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) + cost = cost_tracker.calculate_cost(response) + cost_tracker.accumulate_cost(cost) + if print_cost: + _LOG.info("cost=%.6f", cost) + return parsed_output + + +# ############################################################################# +# LLMClient +# ############################################################################# + + +class LLMClient: + """ + Class to handle LLM API client creation and requests. + """ + + def __init__( + self, + model: str, + ) -> None: + """ + Initialize the LLMClient. + + The model can be specified as: + - "gpt-4o-mini" + - "openai/gpt-4o-mini" + - "deepseek/deepseek-r1-0528-qwen3-8b:free/" + + :param model: model to use for the completion. + """ + hdbg.dassert_isinstance(model, str) + if model == "": + provider_name, model = self.get_default_model() + else: + provider_name, model = _get_llm_provider_and_model(model) + + self.provider_name = provider_name + self.model = model + self.client = None + + def get_default_model(self) -> Tuple[str, str]: + """ + Get the default provider and model for the client. + + :return: default provider and model used in the client + """ + provider_name = "openai" + model = self._get_default_model(provider_name) + return provider_name, model + + def create_client(self) -> None: + """ + Create an LLM client. + """ + if self.provider_name == "openai": + base_url = "https://api.openai.com/v1" + api_key = os.environ.get("OPENAI_API_KEY") + elif self.provider_name == "openrouter": + base_url = "https://openrouter.ai/api/v1" + api_key = os.environ.get("OPENROUTER_API_KEY") + else: + raise ValueError(f"Unknown provider: {self.provider_name}") + _LOG.debug(hprint.to_str("self.provider_name base_url")) + client = openai.OpenAI(base_url=base_url, api_key=api_key) + self.client = client + + def call_llm( + self, + cache_mode: str, + user_prompt: str, + system_prompt: str, + temperature: float, + *, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional[hllmcost.LLMCostTracker] = None, + use_responses_api: bool = False, + **create_kwargs, + ) -> Dict[Any, Any]: + """ + Call the LLM API. + + Check `_call_api_sync()` params for more details. + """ + return _call_api_sync( + cache_mode=cache_mode, + client=self.client, + user_prompt=user_prompt, + system_prompt=system_prompt, + temperature=temperature, + model=self.model, + images_as_base64=images_as_base64, + cost_tracker=cost_tracker, + use_responses_api=use_responses_api, + **create_kwargs, + ) + + def _get_default_model(self, provider_name: str) -> str: + """ + Get the default model for a provider. + + :return: default model for the provider + """ + if provider_name == "openai": + model = "gpt-4o" + elif provider_name == "openrouter": + model = "openai/gpt-4o" + else: + raise ValueError(f"Unknown provider: {self.provider_name}") + return model + + +# ############################################################################# + + +@functools.lru_cache(maxsize=1024) +def get_completion( + user_prompt: str, + *, + system_prompt: str = "", + model: str = "", + report_progress: bool = False, + print_cost: bool = False, + cache_mode: str = "DISABLE_CACHE", + temperature: float = 0.1, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional["hllmcost.LLMCostTracker"] = None, + use_responses_api: bool = False, + return_raw: bool = False, + **create_kwargs, +) -> Union[str, Dict[Any, Any]]: + """ + Generate a completion using OpenAI's API. + + :param user_prompt: user input message + :param system_prompt: system instruction + :param model: model to use or empty string to use the default model + :param report_progress: whether to report progress running the API + call + :param cache_mode: + - "DISABLE_CACHE": No caching + - "REFRESH_CACHE": Make API calls and save responses to cache + - "HIT_CACHE_OR_ABORT": Use cached responses, fail if not in cache + - "NORMAL": Use cached responses if available, otherwise make API call + :param cache_file: file to save/load completion cache + :param temperature: adjust an LLM's sampling diversity: lower values make it + more deterministic, while higher values foster creative variation. + 0 < temperature <= 2, 0.1 is default value in OpenAI models. + :param images_as_base64: base64-encoded images to include in the user message + :param cost_tracker: LLMCostTracker instance to track costs + :param use_responses_api: whether to use the Responses API instead of Chat + Completions + :param return_raw: whether to return the raw API response instead of + extracting the text content + :param create_kwargs: additional params for the API call + :return: API response or its text content + """ + hdbg.dassert_in( + cache_mode, + ("DISABLE_CACHE", "REFRESH_CACHE", "HIT_CACHE_OR_ABORT", "NORMAL"), + ) + update_llm_cache = get_update_llm_cache() + if update_llm_cache: + cache_mode = "REFRESH_CACHE" + # Initialize LLM client. + # Skip client creation for HIT_CACHE_OR_ABORT mode since: + # - If cache hits, we never use the client + # - If cache misses, we abort before calling the function + llm_client = LLMClient(model=model) + if cache_mode != "HIT_CACHE_OR_ABORT": + llm_client.create_client() + if use_responses_api and llm_client.provider_name != "openai": + raise ValueError( + "Responses API is only supported for the 'openai' provider." + ) + if report_progress and return_raw: + raise ValueError( + "Streaming mode is only supported while returning text content." + ) + if report_progress and cache_mode == "HIT_CACHE_OR_ABORT": + raise ValueError( + "Streaming mode (report_progress=True) is not supported with " + "cache_mode='HIT_CACHE_OR_ABORT'." + ) + # Construct messages in OpenAI API request format. + _LOG.info("LLM API call ... ") + memento = htimer.dtimer_start(logging.DEBUG, "LLM API call") + if not report_progress: + completion = llm_client.call_llm( + cache_mode=cache_mode, + user_prompt=user_prompt, + system_prompt=system_prompt, + temperature=temperature, + images_as_base64=images_as_base64, + cost_tracker=cost_tracker, + use_responses_api=use_responses_api, + **create_kwargs, + ) + if not use_responses_api: + txt_response = completion["choices"][0]["message"]["content"] + else: + txt_response = completion["output_text"] + else: + # TODO(gp): This is not working. It doesn't show the progress and it + # doesn't show the cost. + # Stream the output to show progress. + collected_messages = [] + if not use_responses_api: + # Stream Chat Completions API. + messages = build_chat_completion_messages( + system_prompt, user_prompt, images_as_base64=images_as_base64 + ) + completion = llm_client.client.chat.completions.create( + model=model, + messages=messages, + stream=True, + **create_kwargs, + ) + for chunk in tqdm.tqdm( + completion, desc="Generating completion", unit=" chunks" + ): + if chunk.choices[0].delta.content is not None: + collected_messages.append(chunk.choices[0].delta.content) + else: + # Stream Responses API. + user_input = build_responses_input( + user_prompt, images_as_base64=images_as_base64 + ) + completion = llm_client.client.responses.create( + model=model, + instructions=system_prompt, + input=user_input, + stream=True, + **create_kwargs, + ) + for event in tqdm.tqdm( + completion, desc="Generating response", unit=" events" + ): + if event.type == "response.output_text.delta": + collected_messages.append(event.delta.value) + txt_response = "".join(collected_messages) + # Report the time taken. + msg, _ = htimer.dtimer_stop(memento) + _LOG.info(msg) + if print_cost and "cost" in completion: + _LOG.info("cost=%.6f", completion["cost"]) + if return_raw: + # Return the full completion/response object. + return completion + return txt_response + + +@functools.lru_cache(maxsize=1024) +def get_structured_completion( + user_prompt: str, + response_format: type[T], + *, + system_prompt: str = "", + model: str = "", + cache_mode: str = "DISABLE_CACHE", + temperature: float = 0.1, + images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional[hllmcost.LLMCostTracker] = None, + print_cost: bool = False, + **create_kwargs, +) -> T: + """ + Generate a Structured Output using OpenAI's API. + + See `get_completion()` for other parameter descriptions. + + :param response_format: expected structured output format + :param cache_mode: + - "DISABLE_CACHE": No caching + - "REFRESH_CACHE": Make API calls and save responses to cache + - "HIT_CACHE_OR_ABORT": Use cached responses, fail if not in cache + - "NORMAL": Use cached responses if available, otherwise make API call + :return: output parsed into the specified format + """ + hdbg.dassert_in( + cache_mode, + ("DISABLE_CACHE", "REFRESH_CACHE", "HIT_CACHE_OR_ABORT", "NORMAL"), + ) + update_llm_cache = get_update_llm_cache() + if update_llm_cache: + cache_mode = "REFRESH_CACHE" + # Initialize LLM client. + # Skip client creation for HIT_CACHE_OR_ABORT mode since: + # - If cache hits, we never use the client + # - If cache misses, we abort before calling the function + if cache_mode == "HIT_CACHE_OR_ABORT": + # Don't create the client; pass None since it won't be used. + llm_client = LLMClient(model=model) + client = None + model_to_use = llm_client.model + else: + llm_client = LLMClient(model=model) + llm_client.create_client() + if llm_client.provider_name != "openai": + raise ValueError( + "`get_structured_completion()` currently only supports the " + "'openai' provider (Responses API + Structured Outputs). " + f"Got provider_name='{llm_client.provider_name}'." + ) + client = llm_client.client + model_to_use = llm_client.model + # Retrieve a structured response. + parsed_output: T = _call_structured_api_sync( + cache_mode=cache_mode, + client=client, + model=model_to_use, + user_prompt=user_prompt, + system_prompt=system_prompt, + temperature=temperature, + response_format=response_format, + images_as_base64=images_as_base64, + cost_tracker=cost_tracker, + print_cost=print_cost, + **create_kwargs, + ) + return parsed_output + + +# ############################################################################# + + +def apply_prompt_to_dataframe( + df, + prompt, + model: str, + input_col, + response_col, + *, + chunk_size=50, + allow_overwrite: bool = False, +): + _LOG.debug(hprint.to_str("prompt model input_col response_col chunk_size")) + hdbg.dassert_in(input_col, df.columns) + if not allow_overwrite: + hdbg.dassert_not_in(response_col, df.columns) + response_data = [] + for start in tqdm.tqdm( + range(0, len(df), chunk_size), desc="Processing chunks" + ): + end = start + chunk_size + chunk = df.iloc[start:end] + _LOG.debug("chunk.size=%s", chunk.shape[0]) + data = chunk[input_col].astype(str).tolist() + data = [f"{i + 1}: {val}" for i, val in enumerate(data)] + user = "\n".join(data) + _LOG.debug("user=\n%s", user) + try: + response = get_completion(user, system_prompt=prompt, model=model) + except Exception as e: + _LOG.error( + f"Error processing column {input} in chunk {start}-{end}: {e}" + ) + raise e + # processed_response = response.split("\n") + processed_response = [ + ln.rstrip() for ln in response.splitlines() if ln.strip() + ] + _LOG.debug(hprint.to_str("processed_response")) + _LOG.debug("len(processed_response)=%s", len(processed_response)) + hdbg.dassert_eq(len(processed_response), chunk.shape[0]) + for i in range(len(processed_response)): + m = re.match(r"\d+: (.*)\s*", processed_response[i]) + hdbg.dassert(m, f"Invalid response: {processed_response[i]}") + # The linter doesn't understand that `dassert` is equivalent to an + # `assert`. + assert m is not None + processed_response[i] = m.group(1).rstrip().lstrip() + _LOG.debug(hprint.to_str("processed_response")) + response_data.extend(processed_response) + df[response_col] = response_data + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py new file mode 100644 index 000000000..bc42d6816 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py @@ -0,0 +1,840 @@ +""" +Import as: + +import helpers.hllm_cli as hllmcli +""" + +import json +import logging +import shlex +import subprocess +import sys +import importlib +import pprint +import time +from typing import Callable, Dict, List, Optional, Tuple, Union + +try: + import llm + import tokencost + + _LLM_AVAILABLE = True +except ImportError: + _LLM_AVAILABLE = False + +import pandas as pd +from tqdm import tqdm + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hmodule as hmodule +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +# _LOG.trace = lambda *args, **kwargs: None +_LOG.trace = _LOG.debug + + +def install_needed_modules( + *, use_sudo: bool = True, venv_path: Optional[str] = None +) -> None: + """ + Install needed modules for LLM CLI. + + :param use_sudo: whether to use sudo to install the module + :param venv_path: path to the virtual environment + E.g., /Users/saggese/src/venv/client_venv.helpers + """ + hmodule.install_module_if_not_present( + "llm", + package_name="llm", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + hmodule.install_module_if_not_present( + "tokencost", + package_name="tokencost", + use_sudo=use_sudo, + use_activate=True, + venv_path=venv_path, + ) + # Reload this module if already imported. + this_module_name = __name__ + if this_module_name in sys.modules: + importlib.reload(sys.modules[this_module_name]) + + +def shutup_llm_logging() -> None: + """ + Shut up OpenAI logging. + """ + # OpenAI client logging. + logging.getLogger("openai").setLevel(logging.WARNING) + # Common HTTP logging sources + logging.getLogger("httpx").setLevel(logging.WARNING) + logging.getLogger("httpcore").setLevel(logging.WARNING) + logging.getLogger("urllib3").setLevel(logging.WARNING) + + +# ############################################################################# +# Helper functions +# ############################################################################# + + +def _check_llm_executable() -> bool: + """ + Check if the llm command-line executable is available. + + :return: True if llm executable exists, False otherwise + """ + try: + hsystem.system("which llm", suppress_output=True) + _LOG.debug("llm command found") + return True + except Exception: + _LOG.debug("llm command not found") + return False + + +def _apply_llm_via_executable( + input_str: str, + *, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + expected_num_chars: Optional[int] = None, +) -> Tuple[str, float]: + """ + Apply LLM using the llm CLI executable. + + :param input_str: the input text to process + :param system_prompt: optional system prompt to use + :param model: optional model name to use + :param expected_num_chars: optional expected number of characters in + output (used for progress bar) + :return: tuple of (LLM response as string, cost in dollars) + """ + # Build command. + cmd = ["llm"] + if system_prompt: + cmd.extend(["--system", system_prompt]) + if model: + cmd.extend(["--model", model]) + # Add the user prompt. + cmd.append(input_str) + _LOG.debug("Running command: %s", " ".join(cmd)) + # Execute command. + if expected_num_chars: + # Use streaming with progress bar. + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + response_parts = [] + with tqdm(total=expected_num_chars, unit="char") as pbar: + for line in proc.stdout: + response_parts.append(line) + pbar.update(len(line)) + # Wait for process to complete. + proc.wait() + if proc.returncode != 0: + error_msg = proc.stderr.read() if proc.stderr else "" + hdbg.dfatal( + f"llm command failed with return code: {proc.returncode} error: {error_msg}" + ) + response = "".join(response_parts) + else: + # Run without progress bar. + cmd_str = " ".join(shlex.quote(arg) for arg in cmd) + _, response = hsystem.system_to_string(cmd_str) + # Cost calculation not available when using executable. + cost = 0.0 + _LOG.debug("Cost calculation not available when using llm executable") + return response, cost + + +def _calculate_cost_from_usage( + usage: object, + model: str, +) -> float: + """ + Calculate LLM cost from usage object. + + :param usage: usage object from LLM result containing input/output token counts + :param model: model name for cost calculation + :return: total cost in dollars + """ + input_tokens = usage.input + output_tokens = usage.output + prompt_cost = tokencost.calculate_cost_by_tokens( + num_tokens=input_tokens, model=model, token_type="input" + ) + completion_cost = tokencost.calculate_cost_by_tokens( + num_tokens=output_tokens, model=model, token_type="output" + ) + cost = float(prompt_cost + completion_cost) + return cost + + +def _apply_llm_via_library( + input_str: str, + *, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + expected_num_chars: Optional[int] = None, +) -> Tuple[str, float]: + """ + Apply LLM using the llm Python library. + + :param input_str: the input text to process + :param system_prompt: optional system prompt to use + :param model: optional model name to use + :param expected_num_chars: optional expected number of characters in + output (used for progress bar) + :return: tuple of (LLM response as string, cost in dollars) + """ + # Get the model. + if model: + llm_model = llm.get_model(model) + else: + llm_model = llm.get_model() + _LOG.debug("Using model: %s", llm_model.model_id) + # Execute with or without progress bar. + if expected_num_chars: + # Use streaming with progress bar. + response_parts = [] + with tqdm(total=expected_num_chars, unit="char") as pbar: + for chunk in llm_model.prompt( + input_str, system=system_prompt, stream=True + ): + chunk_str = str(chunk) + response_parts.append(chunk_str) + pbar.update(len(chunk_str)) + response = "".join(response_parts) + # Streaming doesn't provide usage info, so we can't calculate cost. + cost = 0.0 + _LOG.debug("Cost calculation not available for streaming mode") + else: + # Run without progress bar. + _LOG.trace("system_prompt=\n%s", system_prompt) + _LOG.trace("input_str=\n%s", input_str) + result = llm_model.prompt(input_str, system=system_prompt) + response = result.text() + _LOG.trace("response=\n%s", response) + # Calculate cost. + usage = result.usage() + cost = _calculate_cost_from_usage( + usage=usage, + model=llm_model.model_id, + ) + _LOG.debug( + "Cost: $%.6f (input: %d tokens, output: %d tokens)", + cost, + usage.input, + usage.output, + ) + return response, cost + + +# ############################################################################# +# Main functions +# ############################################################################# + + +@hcacsimp.simple_cache(cache_type="json", write_through=True) +def apply_llm( + input_str: str, + *, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + use_llm_executable: bool = False, + expected_num_chars: Optional[int] = None, +) -> Tuple[str, float]: + """ + Apply an LLM to process input text using either CLI executable or library. + + This function provides a unified interface to call LLMs either through the + llm command-line executable or through the llm Python library. It supports + optional system prompts, model selection, and progress bars for long outputs. + + :param input_str: the input text to process with the LLM + :param system_prompt: optional system prompt to guide the LLM's behavior + :param model: optional model name to use (e.g., "gpt-4", "claude-3-opus") + :param use_llm_executable: if True, use the llm CLI executable; if False, + use the llm Python library + :param expected_num_chars: optional expected number of characters in + output; if provided, displays a progress bar during generation + :return: tuple of (LLM response as string, cost in dollars) + """ + hdbg.dassert_isinstance(input_str, str) + hdbg.dassert_ne(input_str, "", "Input string cannot be empty") + if system_prompt is not None: + hdbg.dassert_isinstance(system_prompt, str) + if model is not None: + hdbg.dassert_isinstance(model, str) + hdbg.dassert_ne(model, "", "Model cannot be empty string") + if expected_num_chars is not None: + hdbg.dassert_isinstance(expected_num_chars, int) + hdbg.dassert_lt(0, expected_num_chars) + _LOG.debug("Applying LLM to input text") + _LOG.debug("use_llm_executable=%s", use_llm_executable) + # Route to appropriate implementation. + if use_llm_executable: + # Check that llm executable exists. + hdbg.dassert( + _check_llm_executable(), + "llm executable not found. Install it using: pip install llm", + ) + response, cost = _apply_llm_via_executable( + input_str, + system_prompt=system_prompt, + model=model, + expected_num_chars=expected_num_chars, + ) + else: + response, cost = _apply_llm_via_library( + input_str, + system_prompt=system_prompt, + model=model, + expected_num_chars=expected_num_chars, + ) + _LOG.debug("LLM processing completed") + return response, cost + + +def apply_llm_with_files( + input_file: str, + output_file: str, + *, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + use_llm_executable: bool = False, + expected_num_chars: Optional[int] = None, +) -> float: + """ + Apply an LLM to process text from an input file and save to output file. + + This is a convenience wrapper around apply_llm() that handles reading from + and writing to files. It reads the input file, processes the content using + the LLM, and writes the result to the output file. + + :param input_file: path to the input file containing text to process + :param output_file: path to the output file where result will be saved + :param system_prompt: optional system prompt to guide the LLM's behavior + :param model: optional model name to use (e.g., "gpt-4", "claude-3-opus") + :param use_llm_executable: if True, use the llm CLI executable; if False, + use the llm Python library + :param expected_num_chars: optional expected number of characters in + output; if provided, displays a progress bar during generation + :return: cost in dollars + """ + hdbg.dassert_isinstance(input_file, str) + hdbg.dassert_ne(input_file, "", "Input file cannot be empty") + hdbg.dassert_isinstance(output_file, str) + hdbg.dassert_ne(output_file, "", "Output file cannot be empty") + _LOG.debug("Reading input from file: %s", input_file) + # Read input file. + input_str = hio.from_file(input_file) + _LOG.debug("Read %d characters from input file", len(input_str)) + # Process with LLM. + response, cost = apply_llm( + input_str, + system_prompt=system_prompt, + model=model, + use_llm_executable=use_llm_executable, + expected_num_chars=expected_num_chars, + ) + # Write output file. + _LOG.debug("Writing output to file: %s", output_file) + hio.to_file(output_file, response) + _LOG.debug("Wrote %d characters to output file", len(response)) + return cost + + +# ############################################################################# +# Batch processing +# ############################################################################# + + +def _validate_batch_inputs( + prompt: str, + input_list: List[str], +) -> None: + """ + Validate prompt and input list for batch processing. + + :param prompt: System prompt to validate + :param input_list: List of inputs to validate + :raises: Assertion errors if validation fails + """ + hdbg.dassert_isinstance(prompt, str) + hdbg.dassert_isinstance(input_list, list) + hdbg.dassert_lt(0, len(input_list), "Input list cannot be empty") + for idx, input_str in enumerate(input_list): + hdbg.dassert_isinstance( + input_str, + str, + "Input at index %d must be a string", + idx, + ) + hdbg.dassert_ne( + input_str, + "", + "Input at index %d cannot be empty", + idx, + ) + + +@hcacsimp.simple_cache(cache_type="json", write_through=True) +def _llm( + system_prompt: str, + input_str: str, + model: str, +) -> Tuple[str, float]: + """ + Apply LLM using the llm Python library. + + :param input_str: the input text to process + :param system_prompt: optional system prompt to use + :param model: optional model name to use + :param expected_num_chars: optional expected number of characters in + output (used for progress bar) + :return: LLM response as string + """ + hdbg.dassert_isinstance(system_prompt, str) + _LOG.trace("system_prompt=\n%s", system_prompt) + # + hdbg.dassert_isinstance(input_str, str) + _LOG.trace("input_str=\n%s", input_str) + # + hdbg.dassert_isinstance(model, str) + hdbg.dassert_ne(model, "", "Model cannot be empty") + llm_model = llm.get_model(model) + _LOG.debug("model=%s", llm_model.model_id) + # Call the LLM. + result = llm_model.prompt(input_str, system=system_prompt) + response = result.text() + _LOG.trace("response=\n%s", response) + usage = result.usage() + cost = _calculate_cost_from_usage( + usage=usage, + model=model, + ) + return response, cost + + +def _call_llm_or_test_functor( + input_str: str, + system_prompt: Optional[str], + model: str, + testing_functor: Optional[Callable[[str], str]], +) -> Tuple[str, float]: + """ + Call LLM or testing functor if provided. + + :param input_str: Input text to process + :param system_prompt: System prompt (can be None) + :param model: Model name (required for cost calculation) + :param testing_functor: Optional testing functor + :return: Tuple of (response, cost) where cost is 0.0 if not calculated + """ + if testing_functor is None: + response, cost = _llm(system_prompt, input_str, model) + # # Calculate cost for this call. + # # Build full prompt for cost calculation. + # if system_prompt: + # full_prompt = system_prompt + "\n" + input_str + # else: + # full_prompt = input_str + # cost = _calculate_llm_cost(full_prompt, response, model) + else: + response = testing_functor(input_str) + cost = 0.0 + return response, cost + + +def _calculate_llm_cost( + prompt: str, + completion: str, + model: str, +) -> float: + """ + Calculate the cost of an LLM call using tokencost library. + + :param prompt: the prompt sent to the LLM + :param completion: the completion returned by the LLM + :param model: the model name used + :return: total cost in dollars + """ + prompt_cost = tokencost.calculate_prompt_cost(prompt, model) + completion_cost = tokencost.calculate_completion_cost(completion, model) + total_cost = prompt_cost + completion_cost + # Convert to float to ensure consistent type. + return float(total_cost) + + +def apply_llm_batch_individual( + prompt: str, + input_list: List[str], + *, + model: str, + testing_functor: Optional[Callable[[str], str]] = None, + progress_bar_object: Optional[tqdm] = None, +) -> Tuple[List[str], float]: + """ + Apply an LLM to process a batch of inputs one at the time. + """ + _validate_batch_inputs(prompt, input_list) + _LOG.debug("Processing batch of %d inputs individually", len(input_list)) + # Process each input sequentially with progress bar and error handling. + responses = [] + # Initialize total cost accumulator. + total_cost = 0.0 + for input_str in input_list: + response, cost = _call_llm_or_test_functor( + input_str=input_str, + system_prompt=prompt, + model=model, + testing_functor=testing_functor, + ) + total_cost += cost + responses.append(response) + if progress_bar_object is not None: + progress_bar_object.update(1) + _LOG.debug("Batch processing completed") + _LOG.debug("Total cost for batch with individual prompt: $%.6f", total_cost) + return responses, total_cost + + +def apply_llm_batch_with_shared_prompt( + prompt: str, + input_list: List[str], + *, + model: str, + testing_functor: Optional[Callable[[str], str]] = None, + progress_bar_object: Optional[tqdm] = None, +) -> Tuple[List[str], float]: + """ + Apply an LLM to process a batch of input texts using the same system prompt. + """ + _validate_batch_inputs(prompt, input_list) + _LOG.debug("Processing batch of %d inputs", len(input_list)) + # Process each input sequentially with progress bar. + responses = [] + total_cost = 0.0 + if testing_functor is None: + # TODO(gp): Factor this out and use a cache. + llm_model = llm.get_model(model) + conv = llm.Conversation(model=llm_model) + for input_str in input_list: + result = conv.prompt(input_str, system=prompt) + response = result.text() + usage = result.usage() + cost = _calculate_cost_from_usage( + usage=usage, + model=model, + ) + total_cost += cost + responses.append(response) + if progress_bar_object is not None: + progress_bar_object.update(1) + else: + for input_str in input_list: + response = testing_functor(input_str) + responses.append(response) + if progress_bar_object is not None: + progress_bar_object.update(1) + _LOG.debug("Batch processing completed") + _LOG.debug("Total cost for batch with shared prompt: $%.6f", total_cost) + return responses, total_cost + + +def apply_llm_batch_combined( + prompt: str, + input_list: List[str], + *, + model: str, + max_retries: int = 3, + testing_functor: Optional[Callable[[str], str]] = None, + progress_bar_object: Optional[tqdm] = None, +) -> Tuple[List[str], float]: + """ + Apply an LLM to process a batch using a single combined prompt. + + This function combines all queries into a single prompt and expects + structured JSON output. It includes retry logic for failed JSON parsing. + """ + _validate_batch_inputs(prompt, input_list) + hdbg.dassert_isinstance(max_retries, int) + hdbg.dassert_lt(0, max_retries) + _LOG.debug( + "Processing batch of %d inputs with combined prompt", len(input_list) + ) + # Build combined prompt. + + combined_prompt = f"{prompt}\n\n" + instruction = """ + Return the results only as a valid JSON object with string values, using + zero-based numeric keys that match the item numbers. + + Output format: + '{"0": "result1", "1": "result2", ...} + + """ + combined_prompt += hprint.dedent(instruction) + for idx, input_str in enumerate(input_list): + combined_prompt += f"{idx}: {input_str}\n" + combined_prompt += "\nReturn ONLY the JSON object, no other text." + _LOG.debug("Combined prompt:\n%s", combined_prompt) + # You are a calculator. Return only the numeric result. + # ``` + # Process the following items and return results as JSON in the format: + # {"0": "result1", "1": "result2", ...} + # 0: 2 + 2 + # 1: 3 * 3 + # 2: 10 - 5 + # 3: 20 / 4 + # Return ONLY the JSON object, no other text. + # ``` + # Process with retries for JSON parsing. + total_cost = 0.0 + if testing_functor is None: + for retry_num in range(max_retries): + _LOG.debug( + "Processing batch of %d inputs with combined prompt (attempt %d/%d)", + len(input_list), + retry_num + 1, + max_retries, + ) + system_prompt = combined_prompt + user_prompt = "Process the items listed above." + response, cost = _llm(system_prompt, user_prompt, model) + total_cost += cost + try: + # Parse JSON response. + # E.g., + # ``` + # {"0": "4", "1": "9", "2": "5", "3": "5"} + # ``` + _LOG.debug("Parsing JSON response:\n%s", response) + # Extract JSON from response (handle cases where LLM adds extra text). + response_stripped = response.strip() + # Find JSON object boundaries. + json_start = response_stripped.find("{") + json_end = response_stripped.rfind("}") + 1 + hdbg.dassert_lte(0, json_start) + hdbg.dassert_lt(json_start, json_end) + json_str = response_stripped[json_start:json_end] + result_dict = json.loads(json_str) + # Convert dict to list in order. + responses = [] + for idx in range(len(input_list)): + key = str(idx) + if key in result_dict: + responses.append(result_dict[key]) + else: + _LOG.warning("Missing result for index %d", idx) + responses.append("") + _LOG.debug("Successfully parsed JSON response") + if progress_bar_object is not None: + progress_bar_object.update(len(input_list)) + _LOG.debug( + "Total cost for batch with combined prompt: $%.6f", + total_cost, + ) + return responses, total_cost + except (json.JSONDecodeError, ValueError) as e: + _LOG.debug( + "JSON parsing failed (attempt %d/%d): %s", + retry_num + 1, + max_retries, + e, + ) + if retry_num == max_retries - 1: + hdbg.dfatal( + "Failed to parse JSON after %d retries", max_retries + ) + # Add instruction to retry. + combined_prompt += "\n\nPrevious response had invalid JSON format. Please return ONLY a valid JSON object." + else: + responses = [] + for input_str in input_list: + response = testing_functor(input_str) + responses.append(response) + if progress_bar_object is not None: + progress_bar_object.update(1) + total_cost = 0.0 + return responses, total_cost + # Should not reach here. + raise RuntimeError("Unexpected error in apply_llm_batch_combined") + + +# ############################################################################# + + +# TODO(gp): Move it somewhere else. +def get_tqdm_progress_bar() -> tqdm: + # Use appropriate tqdm for notebook or terminal + try: + from IPython import get_ipython + + if get_ipython() is not None: + from tqdm.notebook import tqdm as notebook_tqdm + + tqdm_progress = notebook_tqdm + else: + tqdm_progress = tqdm + except ImportError: + tqdm_progress = tqdm + return tqdm_progress + + +# TODO(gp): Skip values that already have a value in the target column. +# TODO(gp): Parallelize +def apply_llm_prompt_to_df( + prompt: str, + df: pd.DataFrame, + extractor: Callable[[Union[str, pd.Series]], str], + target_col: str, + batch_mode: str, + *, + model: str, + batch_size: int = 50, + dump_every_batch: Optional[str] = None, + tag: str = "Processing", + testing_functor: Optional[Callable[[str], str]] = None, + use_sys_stderr: bool = False, +) -> Tuple[pd.DataFrame, Dict[str, int]]: + """ + Apply an LLM to process a dataframe column using the same system prompt. + + This function processes text from dataframe rows using an extractor function, + applies the LLM to each item in batches, and stores the results in a target + column. It can optionally save progress to a file after each batch. + + :param prompt: system prompt to guide the LLM's behavior + :param df: dataframe to process + :param extractor: callable that extracts text from a row or string + :param target_col: name of column to store results + :param batch_mode: batch mode to use (individual, shared_prompt, combined) + :param model: model name to use (e.g., "gpt-4", "claude-3-opus") + :param batch_size: number of items to process in each batch + :param dump_every_batch: optional file path to dump the dataframe after each batch + :param tag: description tag for progress bar + :param testing_functor: optional functor to use for testing + :return: tuple of (dataframe with results, statistics dict) + """ + start_time = time.time() + hdbg.dassert_isinstance(prompt, str) + hdbg.dassert_ne(prompt, "", "Prompt cannot be empty") + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_lt(0, len(df), "Dataframe cannot be empty") + hdbg.dassert_isinstance(target_col, str) + hdbg.dassert_ne(target_col, "", "Target column cannot be empty") + hdbg.dassert_isinstance(model, str) + hdbg.dassert_ne(model, "", "Model cannot be empty") + hdbg.dassert_isinstance(batch_size, int) + hdbg.dassert_lt(0, batch_size) + if dump_every_batch is not None: + hdbg.dassert_isinstance(dump_every_batch, str) + hdbg.dassert_ne(dump_every_batch, "", "Dump file path cannot be empty") + # Create target column if it doesn't exist. + if target_col not in df.columns: + df[target_col] = None + # Process items in batches with progress bar for entire workload. + num_items = len(df) + num_batches = (num_items + batch_size - 1) // batch_size + _LOG.info( + "Processing %d items in %d batches of %d items each", + num_items, + num_batches, + batch_size, + ) + _LOG.info(hprint.to_str("model batch_mode")) + num_skipped = 0 + progress_bar_ctor = get_tqdm_progress_bar() + progress_bar_object = progress_bar_ctor( # type: ignore + total=num_items, + desc=tag, + dynamic_ncols=True, + # Workaround for unit tests. + file=sys.__stderr__ if use_sys_stderr else None, + ) + total_cost = 0.0 + # TODO(gp): Precompute the batch indices that needs to be processed. + for batch_num in range(num_batches): + # Get batch rows. + start_idx = batch_num * batch_size + end_idx = min(start_idx + batch_size, len(df)) + rows = df.iloc[start_idx:end_idx] + # Extract items from rows, filtering out invalid ones. + batch_items = [] + batch_indices = [] + for idx, row in rows.iterrows(): + extracted_text = extractor(row) + # Check if extraction returned valid text (not NaN/None/empty). + if extracted_text != "": + batch_items.append(extracted_text) + batch_indices.append(idx) + else: + # Set NaN for rows with missing company information. + df.at[idx, target_col] = "" + num_skipped += 1 + progress_bar_object.update(1) + # Call LLM only if there are valid items in this batch. + if batch_items: + _LOG.debug( + "Processing batch %d/%d (%d items, %d skipped)", + batch_num + 1, + num_batches, + len(batch_items), + len(rows) - len(batch_items), + ) + if batch_mode == "individual": + func = apply_llm_batch_individual + elif batch_mode == "shared_prompt": + func = apply_llm_batch_with_shared_prompt + elif batch_mode == "combined": + func = apply_llm_batch_combined + else: + hdbg.dfatal("Invalid batch mode: %s", batch_mode) + batch_responses, batch_cost = func( + prompt=prompt, + input_list=batch_items, + model=model, + testing_functor=testing_functor, + progress_bar_object=progress_bar_object, + ) + # Update total_cost. + total_cost += batch_cost + # Store results back into dataframe. + for idx, response in zip(batch_indices, batch_responses): + df.at[idx, target_col] = response + else: + _LOG.debug( + "Skipping batch %d/%d (all %d items have missing data)", + batch_num + 1, + num_batches, + len(rows), + ) + # Dump dataframe to file after batch if requested. + if dump_every_batch is not None: + _LOG.debug("Dumping dataframe to file: %s", dump_every_batch) + df.to_csv(dump_every_batch, index=False) + # Calculate elapsed time. + elapsed_time = time.time() - start_time + stats = { + "num_items": num_items, + "num_skipped": num_skipped, + "num_batches": num_batches, + "total_cost_in_dollars": total_cost, + "elapsed_time_in_seconds": elapsed_time, + } + _LOG.info("Processing completed:\n%s", pprint.pformat(stats)) + return df, stats diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py new file mode 100644 index 000000000..3d33b17d8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py @@ -0,0 +1,233 @@ +""" +Import as: + +import helpers.hllm_cost as hllmcost +""" + +import logging +import os +from typing import Any + +import requests + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# OpenRouter API Helpers +# ############################################################################# + + +def _get_models_info_file() -> str: + """ + Get the path to the file for storing OpenRouter models info. + """ + dir_path = hgit.get_helpers_root_dir() + file_path = os.path.join( + dir_path, "dev_scripts_helpers/llms", "openrouter_models_info.csv" + ) + return file_path + + +def _retrieve_openrouter_model_info() -> "pd.DataFrame": + """ + Retrieve OpenRouter models info from the OpenRouter API. + """ + import pandas as pd + + response = requests.get("https://openrouter.ai/api/v1/models") + # {'architecture': {'input_modalities': ['text', 'image'], + # 'instruct_type': None, + # 'modality': 'text+image->text', + # 'output_modalities': ['text'], + # 'tokenizer': 'Mistral'}, + # 'context_length': 131072, + # 'created': 1746627341, + # 'description': 'Mistral Medium 3 is a high-performance enterprise-grade ' + # 'language model designed to deliver frontier-level ' + # ... + # 'broad compatibility across cloud environments.', + # 'id': 'mistralai/mistral-medium-3', + # 'name': 'Mistral: Mistral Medium 3', + # 'per_request_limits': None, + # 'pricing': {'completion': '0.000002', + # 'image': '0', + # 'internal_reasoning': '0', + # 'prompt': '0.0000004', + # 'request': '0', + # 'web_search': '0'}, + # 'supported_parameters': ['tools', + # 'tool_choice', + # 'max_tokens', + # 'temperature', + # 'top_p', + # 'stop', + # 'frequency_penalty', + # 'presence_penalty', + # 'response_format', + # 'structured_outputs', + # 'seed'], + # 'top_provider': {'context_length': 131072, + # 'is_moderated': False, + # 'max_completion_tokens': None}} + response_json = response.json() + # There is only one key in the response. + hdbg.dassert_eq(list(response_json.keys()), ["data"]) + response_json = response_json["data"] + model_info_df = pd.DataFrame(response_json) + return model_info_df + + +def _save_models_info_to_csv( + model_info_df: "pd.DataFrame", + file_name: str, +) -> "pd.DataFrame": + """ + Save models info to a CSV file. + """ + hdbg.dassert_isinstance(file_name, str) + hdbg.dassert_ne(file_name, "") + # TODO(*): Save all the data. + # Extract prompt, completion pricing from pricing column. + model_info_df["prompt_pricing"] = model_info_df["pricing"].apply( + lambda x: x["prompt"] + ) + model_info_df["completion_pricing"] = model_info_df["pricing"].apply( + lambda x: x["completion"] + ) + required_columns = [ + "id", + "name", + "description", + "prompt_pricing", + "completion_pricing", + "supported_parameters", + ] + # Take only relevant columns. + model_info_df = model_info_df.loc[:, required_columns] + # Save to CSV file. + model_info_df.to_csv(file_name, index=False) + return model_info_df + + +# ############################################################################# +# LLMCostTracker +# ############################################################################# + + +class LLMCostTracker: + """ + Track the costs of LLM API calls through one of the providers. + """ + + def __init__(self, provider_name: str, model: str) -> None: + """ + Initialize the class. + """ + self.current_cost: float = 0.0 + self.provider_name = provider_name + self.model = model + + def end_logging_costs(self) -> None: + """ + End logging costs by resetting the current cost to 0. + """ + self.current_cost = 0.0 + + def accumulate_cost(self, cost: float) -> None: + """ + Accumulate the cost. + + :param cost: The cost to accumulate + """ + self.current_cost += cost + + def get_current_cost(self) -> float: + """ + Get the current accumulated cost. + + :return: The current cost + """ + return self.current_cost + + def calculate_cost( + self, + completion: Any, + *, + models_info_file: str = "", + ) -> float: + """ + Calculate the cost of an API call, based on the provider. + + :param completion: the completion response from API + :return: the calculated cost in dollars + """ + import pandas as pd + + # Get the number of input and output tokens. + usage = getattr(completion, "usage", None) + hdbg.dassert( + usage is not None, + "Completion/response object has no 'usage' attribute", + ) + if hasattr(usage, "prompt_tokens") and hasattr( + usage, "completion_tokens" + ): + prompt_tokens = usage.prompt_tokens + completion_tokens = usage.completion_tokens + elif hasattr(usage, "input_tokens") and hasattr(usage, "output_tokens"): + prompt_tokens = usage.input_tokens + completion_tokens = usage.output_tokens + else: + raise ValueError( + f"Unknown usage structure on completion object: {usage}" + ) + # Get the provider and model details. + if self.provider_name == "openai": + # Get the pricing for the selected model. + # TODO(gp): Use pricing from OpenAI or Openrouter API. + # https://openai.com/api/pricing/ + # https://gptforwork.com/tools/openai-chatgpt-api-pricing-calculator + # Cost per 1M tokens. + pricing = { + "gpt-3.5-turbo": {"prompt": 0.5, "completion": 1.5}, + "gpt-4o-mini": {"prompt": 0.15, "completion": 0.60}, + "gpt-4o": {"prompt": 2.5, "completion": 10}, + "gpt-5.2": {"prompt": 1.75, "completion": 14.0}, + "gpt-5.1": {"prompt": 1.25, "completion": 10.0}, + "gpt-5-mini": {"prompt": 0.25, "completion": 2.00}, + } + hdbg.dassert_in(self.model, pricing) + model_pricing = pricing[self.model] + # Calculate the cost. + cost = (prompt_tokens / 1e6) * model_pricing["prompt"] + ( + completion_tokens / 1e6 + ) * model_pricing["completion"] + elif self.provider_name == "openrouter": + # If the model info file doesn't exist, download one. + if models_info_file == "": + models_info_file = _get_models_info_file() + _LOG.debug(hprint.to_str("models_info_file")) + if not os.path.isfile(models_info_file): + model_info_df = _retrieve_openrouter_model_info() + _save_models_info_to_csv(model_info_df, models_info_file) + else: + model_info_df = pd.read_csv(models_info_file) + # Extract pricing for this model. + hdbg.dassert_in(self.model, model_info_df["id"].values) + row = model_info_df.loc[model_info_df["id"] == self.model].iloc[0] + prompt_price = row["prompt_pricing"] + completion_price = row["completion_pricing"] + # Compute cost. + cost = ( + prompt_tokens * prompt_price + + completion_tokens * completion_price + ) + else: + raise ValueError(f"Unknown provider: {self.provider_name}") + _LOG.debug(hprint.to_str("prompt_tokens completion_tokens cost")) + return cost diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py new file mode 100644 index 000000000..94738202c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py @@ -0,0 +1,809 @@ +""" +Import as: + +import helpers.hlogging as hloggin +""" + +import asyncio +import contextlib +import copy +import datetime +import logging +from typing import Any, Iterable, List, Optional, Tuple, Union + +# Avoid dependency from other helpers modules since this is used when the code +# is bootstrapped. + + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +# Copied from `helpers/hsystem.py` to avoid circular imports. +def _is_running_in_ipynb() -> bool: + try: + _ = get_ipython().config # type: ignore + res = True + except NameError: + res = False + return res + + +# Copied from `helpers/hsystem.py` to avoid circular dependencies. +def get_user_name() -> str: + import getpass + + res = getpass.getuser() + return res + + +# ############################################################################# +# Memory usage +# ############################################################################# + +# TODO(gp): Consider moving to hmemory.py + + +MemoryUsage = Tuple[float, float, float] + + +def get_memory_usage(process: Optional[Any] = None) -> MemoryUsage: + """ + Return the memory usage in terms of resident, virtual, and percent of total + used memory. + """ + if process is None: + import psutil + + process = psutil.Process() + rss_in_GB = process.memory_info().rss / (1024**3) + vms_in_GB = process.memory_info().vms / (1024**3) + mem_pct = process.memory_percent() + return (rss_in_GB, vms_in_GB, mem_pct) + + +def memory_to_str(resource_use: MemoryUsage, *, verbose: bool = True) -> str: + (rss_in_GB, vms_in_GB, mem_pct) = resource_use + if verbose: + txt = "rss=%.3fGB vms=%.3fGB mem_pct=%.0f%%" % ( + rss_in_GB, + vms_in_GB, + mem_pct, + ) + else: + txt = "%.3fGB %.3fGB %.0f%%" % (rss_in_GB, vms_in_GB, mem_pct) + return txt + + +def get_memory_usage_as_str(process: Optional[Any] = None) -> str: + """ + Like `get_memory_usage()` but returning a formatted string. + """ + resource_use = get_memory_usage(process) + txt = memory_to_str(resource_use) + return txt + + +# ############################################################################# +# Utils. +# ############################################################################# + +# White: 37. +# Red: 31 +# Green: 32 +# Yellow: 33 +# Blu: 34 +# Cyan: 36 +# White on red background: 41 + +_COLOR_MAPPING = { + # Green. + "TRACE": (32, "TRACE"), + # Blu. + "DEBUG": (34, "DEBUG"), + # Cyan. + "INFO": (36, "INFO "), + # White on red background. + "WARNING": (41, "WARN "), + "ERROR": (41, "ERROR"), + "CRITICAL": (41, "CRTCL"), +} + + +def reset_logger() -> None: + import importlib + + print("Resetting logger...") + logging.shutdown() + importlib.reload(logging) + + +def get_all_loggers() -> List: + """ + Return list of all registered loggers. + """ + logger_dict = logging.root.manager.loggerDict # type: ignore # pylint: disable=no-member + loggers = [logging.getLogger(name) for name in logger_dict] + return loggers + + +def get_matching_loggers( + module_names: Union[str, Iterable[str]], verbose: bool +) -> List: + """ + Find loggers that match a name or a name in a set. + """ + if isinstance(module_names, str): + module_names = [module_names] + loggers = get_all_loggers() + if verbose: + print("loggers=\n", "\n".join(map(str, loggers))) + # + sel_loggers = [] + for module_name in module_names: + if verbose: + print(f"module_name={module_name}") + # TODO(gp): We should have a regex. + # str(logger) looks like `` + sel_loggers_tmp = [ + logger + for logger in loggers + if str(logger).startswith(" None: + """ + Reduce the verbosity for external modules that are very chatty. + + :param verbosity: level of verbosity used for chatty modules: the + higher the better + :param verbose: print extra information + """ + module_names = [ + "aiobotocore", + "asyncio", + "boto", + "boto3", + "botocore", + "ccxt", # CCXT also needs to be shut up after the `exchange` is built. + "fsspec", + "hooks", + "httpcore", + "httpx", + "invoke", + "matplotlib", + "nose", + "openai", + "s3fs", + "s3transfer", + "urllib3", + # "ib_insync", + ] + # verbose = True + loggers = get_matching_loggers(module_names, verbose) + loggers = sorted(loggers, key=lambda logger: logger.name) + for logger in loggers: + logger.setLevel(verbosity) + if len(loggers) > 0: + logger_names = list({logger.name for logger in loggers}) + _LOG.debug( + "Shut up %d modules: %s", len(loggers), ", ".join(logger_names) + ) + # if _LOG.getEffectiveLevel() < logging.DEBUG: + # print(WARNING + + # " Shutting up %d modules: %s" + # % (len(loggers), ", ".join([logger.name for logger in loggers])) + # ) + + +# ############################################################################# +# _LocalTimeZoneFormatter +# ############################################################################# + + +# From https://stackoverflow.com/questions/32402502 +class _LocalTimeZoneFormatter: + """ + Override logging.Formatter to use an aware datetime object. + """ + + def __init__(self, *args: Any, **kwargs: Any): + super().__init__(*args, **kwargs) # type: ignore[call-arg] + try: + # TODO(gp): Automatically detect the time zone. It might be complicated in + # Docker. + import pytz + + self._tzinfo = pytz.timezone("America/New_York") + except ModuleNotFoundError: + # print(f"Can't import pytz: using UTC\n{str(e)}") + self._tzinfo = None + + def converter(self, timestamp: float) -> datetime.datetime: + # To make the linter happy and respecting the signature of the + # superclass method. + _ = self + # timestamp=1622423570.0147252 + dt = datetime.datetime.utcfromtimestamp(timestamp) + # Convert it to an aware datetime object in UTC time. + dt = dt.replace(tzinfo=datetime.timezone.utc) + if self._tzinfo is not None: + # Convert it to desired timezone. + dt = dt.astimezone(self._tzinfo) + return dt + + def formatTime( + self, record: logging.LogRecord, datefmt: Optional[str] = None + ) -> str: + dt = self.converter(record.created) + if datefmt: + s = dt.strftime(datefmt) + else: + try: + s = dt.isoformat(timespec="milliseconds") + except TypeError: + s = dt.isoformat() + return s + + +# ############################################################################# +# _ColoredFormatter +# ############################################################################# + + +# [mypy] error: Definition of "converter" in base class +# "_LocalTimeZoneFormatter" is incompatible with definition in base class +# "Formatter" +class _ColoredFormatter( # type: ignore[misc] + _LocalTimeZoneFormatter, logging.Formatter +): + """ + Logging formatter using colors for different levels. + """ + + _SKIP_DEBUG = True + + def format(self, record: logging.LogRecord) -> str: + colored_record = copy.copy(record) + # `levelname` is the internal name and can't be changed to `level_name` + # as per our conventions. + levelname = colored_record.levelname + if _ColoredFormatter._SKIP_DEBUG and levelname == "DEBUG": + colored_levelname = "" + else: + # Use white as default. + prefix = "\033[" + suffix = "\033[0m" + assert levelname in _COLOR_MAPPING, "Can't find info '%s'" + color_code, tag = _COLOR_MAPPING[levelname] + # Align the level name. + colored_levelname = f"{prefix}{color_code}m{tag}{suffix}" + colored_record.levelname = colored_levelname + return logging.Formatter.format(self, colored_record) + + +# From https://stackoverflow.com/questions/2183233 +def addLoggingLevel(levelName, levelNum, methodName=None): + """ + Comprehensively adds a new logging level to the `logging` module and the + currently configured logging class. + + `levelName` becomes an attribute of the `logging` module with the value + `levelNum`. `methodName` becomes a convenience method for both `logging` + itself and the class returned by `logging.getLoggerClass()` (usually just + `logging.Logger`). If `methodName` is not specified, `levelName.lower()` is + used. + + To avoid accidental clobberings of existing attributes, this method will + raise an `AttributeError` if the level name is already an attribute of the + `logging` module or if the method name is already present + + Example + ------- + >>> addLoggingLevel('TRACE', logging.DEBUG - 5) + >>> logging.getLogger(__name__).setLevel("TRACE") + >>> logging.getLogger(__name__).trace('that worked') + >>> logging.trace('so did this') + >>> logging.TRACE + 5 + """ + if not methodName: + methodName = levelName.lower() + + if hasattr(logging, levelName): + raise AttributeError( + "{} already defined in logging module".format(levelName) + ) + if hasattr(logging, methodName): + raise AttributeError( + "{} already defined in logging module".format(methodName) + ) + if hasattr(logging.getLoggerClass(), methodName): + raise AttributeError( + "{} already defined in logger class".format(methodName) + ) + + # This method was inspired by the answers to Stack Overflow post + # http://stackoverflow.com/q/2183233/2988730, especially + # http://stackoverflow.com/a/13638084/2988730 + def logForLevel(self, message, *args, **kwargs): + if self.isEnabledFor(levelNum): + self._log(levelNum, message, args, **kwargs) + + def logToRoot(message, *args, **kwargs): + logging.log(levelNum, message, *args, **kwargs) + + logging.addLevelName(levelNum, levelName) + setattr(logging, levelName, levelNum) + setattr(logging.getLoggerClass(), methodName, logForLevel) + setattr(logging, methodName, logToRoot) + + +addLoggingLevel("TRACE", 5) + + +# Note that this doesn't avoid evaluating the call. +# The only way to be completely sure that there is no evaluation is: +# ``` +# if False: _LOG.debug(...) +# ``` +def shut_up_log_debug(logger: logging.Logger) -> None: + logging.disable(logging.DEBUG) + # logger.debug = lambda *_: 0 + # logger.trace = lambda *_: 0 + + +# ############################################################################# +# ResourceUsageFilter +# ############################################################################# + + +# From https://stackoverflow.com/questions/10848342 +# and https://docs.python.org/3/howto/logging-cookbook.html#filters-contextual +class ResourceUsageFilter(logging.Filter): + """ + Add fields to the logger about memory and CPU use. + """ + + def __init__(self, report_cpu_usage: bool): + super().__init__() + import psutil + + self._process = psutil.Process() + self._report_cpu_usage = report_cpu_usage + if self._report_cpu_usage: + # Start sampling the CPU usage. + self._process.cpu_percent(interval=1.0) + + def filter(self, record: logging.LogRecord) -> bool: + """ + Override `logging.Filter()`, adding several fields to the logger. + """ + p = self._process + # Report memory usage. + resource_use = get_memory_usage_as_str(p) + # Report CPU usage. + if self._report_cpu_usage: + # CPU usage since the previous call. + cpu_use = p.cpu_percent(interval=None) + resource_use += " cpu=%.0f%%" % cpu_use + record.resource_use = resource_use # type: ignore + return True + + +# ############################################################################# + + +# TODO(gp): Replace `force_print_format` and `force_verbose_format` with `mode`. +def _get_logging_format( + force_print_format: bool, + force_verbose_format: bool, + force_no_warning: bool, + report_memory_usage: bool, + date_format_mode: str = "time", +) -> Tuple[str, str]: + """ + Compute the logging format depending whether running on notebook or in a + shell. + + The logging format can be: + - print: looks like a `print` statement + + :param force_print_format: force to use the non-verbose format + :param force_verbose_format: force to use the verbose format + """ + if _is_running_in_ipynb() and not force_no_warning: + print("WARNING: Running in Jupyter") + verbose_format = not _is_running_in_ipynb() + # + assert not (force_verbose_format and force_print_format), ( + f"Can't use both force_verbose_format={force_verbose_format} " + + f"and force_print_format={force_print_format}" + ) + if force_verbose_format: + verbose_format = True + if force_print_format: + verbose_format = False + # + if verbose_format: + # TODO(gp): We would like to have filename:name:funcName:lineno all + # justified on 15 chars. + # See https://docs.python.org/3/howto/logging-cookbook.html#use-of + # -alternative-formatting-styles + # Something like: + # {{asctime}-5s {{filename}{name}{funcname}{linedo}d}-15s {message} + # + # %(pathname)s Full pathname of the source file where the logging call was + # issued (if available). + # %(filename)s Filename portion of pathname. + # %(module)s Module (name portion of filename). + if True: + log_format = ( + # 04-28_08:08 INFO : + "%(asctime)-5s %(levelname)-5s" + ) + if report_memory_usage: + # rss=0.3GB vms=2.0GB mem_pct=2% cpu=91% + log_format += " [%(resource_use)-40s]" + log_format += ( + # lib_tasks _delete_branches + " %(module)-20s: %(funcName)-30s:" + # 142: ... + " %(lineno)-4d:" + " %(message)s" + ) + else: + # Super verbose: to help with debugging print more info without trimming. + log_format = ( + # 04-28_08:08 INFO : + "%(asctime)-5s %(levelname)-5s" + # .../src/lem1/amp/helpers/system_interaction.py + # _system : + " %(pathname)s %(funcName)-20s " + # 199: ... + " %(lineno)d:" + " %(message)s" + ) + if date_format_mode == "time": + date_fmt = "%H:%M:%S" + elif date_format_mode == "date_time": + date_fmt = "%m-%d_%H:%M" + elif date_format_mode == "date_timestamp": + date_fmt = "%Y-%m-%d %I:%M:%S %p" + else: + raise ValueError(f"Invalid date_format_mode='{date_format_mode}'") + else: + # Make logging look like a normal print(). + # TODO(gp): We want to still prefix with WARNING and ERROR. + log_format = "%(message)s" + date_fmt = "" + return date_fmt, log_format + + +def set_v1_formatter( + ch: Any, + root_logger: Any, + force_no_warning: bool, + force_print_format: bool, + force_verbose_format: bool, + report_cpu_usage: bool, + report_memory_usage: bool, +) -> _ColoredFormatter: + # Decide whether to use verbose or print format. + date_fmt, log_format = _get_logging_format( + force_print_format, + force_verbose_format, + force_no_warning, + report_memory_usage, + ) + # Use normal formatter. + # formatter = logging.Formatter(log_format, datefmt=date_fmt) + # Use formatter with colors. + formatter = _ColoredFormatter(log_format, date_fmt) + ch.setFormatter(formatter) + root_logger.addHandler(ch) + # Report resource usage. + if report_memory_usage: + # Get root logger. + log = logging.getLogger("") + # Create filter. + f = ResourceUsageFilter(report_cpu_usage) + # The ugly part:adding filter to handler. + log.handlers[0].addFilter(f) + return formatter + + +# ############################################################################# +# CustomFormatter +# ############################################################################# + + +# pylint: disable=line-too-long +class CustomFormatter(logging.Formatter): + """ + Override `format` to implement a completely custom logging formatting. + + The logging output looks like: + ``` + 07:37:17 /app/amp/helpers/hunit_test.py setUp 932 - Resetting random.seed to 20000101 + ``` + or for simulated time: + ``` + 07:43:17 @ 2022-01-18 02:43:17 workload /app/amp/helpers/test/test_hlogging.py workload:33 - -> wait + ``` + """ + + def __init__( + self, + *args: Any, + date_format_mode: str = "time", + report_memory_usage: bool = False, + report_cpu_usage: bool = False, + **kwargs: Any, + ): + super().__init__(*args, **kwargs) + self._date_fmt = self._get_date_format(date_format_mode) + # + try: + # TODO(gp): Automatically detect the time zone. It might be complicated + # in Docker. + import pytz + + self._tzinfo = pytz.timezone("America/New_York") + except ModuleNotFoundError: + # print(f"Can't import pytz: using UTC\n{str(e)}") + self._tzinfo = None + # + self._report_memory_usage = report_memory_usage + self._report_cpu_usage = report_cpu_usage + if self._report_memory_usage or self._report_cpu_usage: + import psutil + + self._process = psutil.Process() + if self._report_cpu_usage: + # Start sampling the CPU usage. + self._process.cpu_percent(interval=1.0) + + def format(self, record: logging.LogRecord) -> str: + # record = copy.copy(record) + # print(pprint.pformat(record.__dict__)) + # `record` looks like: + # {'args': (30,), + # 'created': 1642456725.5569131, + # 'exc_info': None, + # 'exc_text': None, + # 'filename': 'logging_main.py', + # 'funcName': 'test_logger', + # 'levelname': 'WARNING', + # 'levelno': 30, + # 'lineno': 105, + # 'module': 'logging_main', + # 'msecs': 556.9131374359131, + # 'msg': 'WARNING=%s', + # 'name': '__main__', + # 'pathname': 'helpers/logging_testing/logging_main.py', + # 'process': 16484, + # 'processName': 'MainProcess', + # 'relativeCreated': 29.956817626953125, + # 'stack_info': None, + # 'thread': 140250120021824, + # 'threadName': 'MainThread'} + msg = "" + # Add the wall clock time. + msg += self._get_wall_clock_time() + # Report memory usage, if needed. + # rss=0.240GB vms=1.407GB mem_pct=2% cpu=92% + if self._report_memory_usage: + msg_tmp = get_memory_usage_as_str(self._process) + # Escape the % to avoid confusing for a string to expand. + msg_tmp = msg_tmp.replace("%", "%%") + msg += " " + msg_tmp + # Report CPU usage, if needed. + if self._report_cpu_usage: + # CPU usage since the previous call. + msg_tmp = " cpu=%.0f" % self._process.cpu_percent(interval=None) + # Escape the % to avoid confusing for a string to expand. + msg_tmp += "%%" + msg += msg_tmp + # Get the (typically) simulated wall clock time. + import helpers.hwall_clock_time as hwacltim + + simulated_wall_clock_time = hwacltim.get_wall_clock_time() + if simulated_wall_clock_time is not None: + date_fmt = "%Y-%m-%d %I:%M:%S" + msg += " @ " + self._convert_time_to_string( + simulated_wall_clock_time, date_fmt + ) + # Colorize / shorten the logging level if it's not DEBUG. + if record.levelno != logging.DEBUG: + msg += f" - {self._colorize_level(record.levelname)}" + # Add information about which coroutine we are running in. + try: + asyncio.get_running_loop() + task = asyncio.current_task() + if task is not None: + msg += f" {task.get_name()}" + except (RuntimeError, AttributeError): + pass + # Add information about the caller. + # ``` + # /helpers/hunit_test.py setUp:932 + # ``` + # pathname = record.pathname.replace("/amp", "") + # msg += f" {pathname} {record.funcName}:{record.lineno}" + # ``` + # test_hlogging.py _print_time:28 + # ``` + msg += f" {record.filename} {record.funcName}:{record.lineno}" + # Indent. + if len(msg) < 50: + msg = "%-60s" % msg + else: + msg = "%-80s" % msg + # Add the caller string. + msg += f" {record.msg}" + record.msg = msg + return super().format(record) + + @staticmethod + def _get_date_format(date_format_mode: str) -> str: + if date_format_mode == "time": + date_fmt = "%H:%M:%S" + elif date_format_mode == "date_time": + date_fmt = "%m-%d_%H:%M" + elif date_format_mode == "date_timestamp": + date_fmt = "%Y-%m-%d %I:%M:%S %p" + else: + raise ValueError("Invalid date_format") + return date_fmt + + def _convert_time_to_string( + self, now: datetime.datetime, date_fmt: str + ) -> str: + # Convert it to an tz-aware datetime object in UTC time. + dt = now.replace(tzinfo=datetime.timezone.utc) + if self._tzinfo is not None: + # Convert it to desired timezone. + dt = dt.astimezone(self._tzinfo) + time_as_str = dt.strftime(date_fmt) + return time_as_str + + def _get_wall_clock_time(self) -> str: + dt = datetime.datetime.utcnow() + return self._convert_time_to_string(dt, self._date_fmt) + + def _colorize_level(self, level_name: str) -> str: + # Use white as default. + prefix = "\033[" + suffix = "\033[0m" + # Print stacktrace to debug. + if False: + import traceback + + txt = traceback.format_stack() + txt = "".join(txt) + print(txt) + + assert level_name in _COLOR_MAPPING, "Can't find info '%s'" + color_code, tag = _COLOR_MAPPING[level_name] + colored_level_name = f"{prefix}{color_code}m{tag}{suffix}" + return colored_level_name + + +def set_v2_formatter( + ch: Any, + root_logger: Any, + force_no_warning: bool, + force_print_format: bool, + force_verbose_format: bool, + report_memory_usage: bool, + report_cpu_usage: bool, +) -> Union[logging.Formatter, CustomFormatter]: + """ + See params in `init_logger()`. + """ + assert not (force_verbose_format and force_print_format), ( + f"Can't use both force_verbose_format={force_verbose_format} " + + f"and force_print_format={force_print_format}" + ) + # When running in a notebook make logging behave like a `print`. + verbose_format = True + if _is_running_in_ipynb(): + verbose_format = False + if not force_no_warning: + print("WARNING: Running in Jupyter") + # + if force_verbose_format: + verbose_format = True + if force_print_format: + verbose_format = False + # + if verbose_format: + # Force to report memory / CPU usage. + # report_memory_usage = report_cpu_usage = True + # print( + # "report_memory_usage=%s report_cpu_usage=%s" + # % (report_memory_usage, report_cpu_usage) + # ) + formatter: Union[logging.Formatter, CustomFormatter] = CustomFormatter( + report_memory_usage=report_memory_usage, + report_cpu_usage=report_cpu_usage, + ) + else: + # Make logging look like a normal `print()`. + log_format = "%(levelname)-5s %(message)s" + date_fmt = "" + formatter = logging.Formatter(log_format, datefmt=date_fmt) + ch.setFormatter(formatter) + root_logger.addHandler(ch) + return formatter + + +# TODO(gp): Not sure it works properly. +@contextlib.contextmanager +def set_level(logger: Any, level: int) -> None: + """ + Context manager changing the verbosity level. + """ + previous_level = logger.getEffectiveLevel() + try: + logger.setLevel(level) + yield + finally: + logger.setLevel(previous_level) + assert logger.getEffectiveLevel() == previous_level + + +# ############################################################################# + + +def getLogger(name: str) -> logging.Logger: + """ + Get logger with custom trace method support. + + This function provides the same functionality as `logging.getLogger()` + but with proper type hints that include the custom trace method. + + Usage: + ``` + # Instead of `import logging`. + import helpers.hlogging as hlogging + + _LOG = hlogging.getLogger(__name__) + _LOG.trace("This works without type checker errors") + _LOG.debug("Standard logging methods also work") + ``` + """ + return logging.getLogger(name) + + +def test_logger() -> None: + print("# Testing logger ...") + print("effective level=", _LOG.getEffectiveLevel()) + # + if hasattr(_LOG, "trace"): + if hasattr(logging, "TRACE"): + _LOG.trace("TRACE=%s", logging.TRACE) + else: + _LOG.trace("TRACE level not available") + # + _LOG.debug("DEBUG=%s", logging.DEBUG) + # + _LOG.info("INFO=%s", logging.INFO) + # + _LOG.warning("WARNING=%s", logging.WARNING) + # + _LOG.error("ERROR=%s", logging.ERROR) + # + _LOG.critical("CRITICAL=%s", logging.CRITICAL) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi new file mode 100644 index 000000000..993f9cc14 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi @@ -0,0 +1,14 @@ +""" +Type stub for hlogging module with custom Logger that includes trace method. +""" + +import logging +from typing import Any + +class Logger(logging.Logger): + """ + Custom Logger class that includes trace method. + """ + def trace(self, msg: str, *args: Any, **kwargs: Any) -> None: ... + +def getLogger(name: str) -> Logger: ... diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py new file mode 100644 index 000000000..07fe8d14f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py @@ -0,0 +1,18 @@ +""" +Import as: + +import helpers.hmarkdown as hmarkdo +""" + +from helpers.hmarkdown_bullets import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_coloring import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_comments import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_div_blocks import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_fenced_blocks import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_filtering import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_formatting import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_headers import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_rules import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_slides import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_tables import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hmarkdown_toc import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py new file mode 100644 index 000000000..0edb705a4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py @@ -0,0 +1,248 @@ +""" +Import as: + +import helpers.hmarkdown_bullets as hmarbull +""" + +import logging +import re +from typing import Generator, List, Tuple + +from helpers.hmarkdown_comments import ( + process_comment_block, + process_single_line_comment, +) + +_LOG = logging.getLogger(__name__) + +_TRACE = False + +# ############################################################################# +# Formatting markdown +# ############################################################################# + + +# These are the colors that are supported by Latex / markdown, are readable on +# white, and form an equidistant color palette. +_ALL_COLORS = [ + "red", + "orange", + "brown", + "olive", + "green", + "teal", + "cyan", + "blue", + "violet", + "darkgray", + "gray", +] + + +# TODO(gp): -> hmarkdown_color.py? +# TODO(gp): This seems the same as `_colorize_bullet_points()`. +def colorize_bold_text( + markdown_text: str, color_sequence: bool, *, use_abbreviations: bool = True +) -> str: + r""" + Add colors to bold text in markdown using equidistant colors from an array. + + The function finds all bold text (enclosed in ** or __) and adds + LaTeX color commands while preserving the rest of the markdown + unchanged. + + :param markdown_text: Input markdown text + :param color_sequence: Sequence of colors to use + :param use_abbreviations: Use LaTeX abbreviations for colors, + `\red{text}` instead of `\textcolor{red}{text}` + :return: Markdown text with colored bold sections + """ + # Remove any existing color formatting. + # Remove \color{text} format. + markdown_text = re.sub(r"\\[a-z]+\{([^}]+)\}", r"\1", markdown_text) + # Remove \textcolor{color}{text} format. + markdown_text = re.sub( + r"\\textcolor\{[^}]+\}\{([^}]+)\}", r"\1", markdown_text + ) + # Find all bold text (both ** and __ formats). + bold_pattern = r"\*\*(.*?)\*\*|__(.*?)__" + # matches will look like: + # For **text**: group(1)='text', group(2)=None. + # For __text__: group(1)=None, group(2)='text'. + matches = list(re.finditer(bold_pattern, markdown_text)) + if not matches: + return markdown_text + result = markdown_text + # Calculate color spacing to use equidistant colors. + if color_sequence == "equidistant": + color_step = len(_ALL_COLORS) / len(matches) + elif color_sequence == "fixed": + color_step = 1 + else: + raise ValueError(f"Invalid color sequence: {color_sequence}") + # Process matches in reverse to not mess up string indices. + for i, match in enumerate(reversed(matches)): + # Get the matched bold text (either ** or __ format). + bold_text = match.group(1) or match.group(2) + # Calculate `color_idx` using equidistant spacing. + color_idx = int((len(matches) - 1 - i) * color_step) % len(_ALL_COLORS) + color = _ALL_COLORS[color_idx] + # Create the colored version. + if use_abbreviations: + # E.g., \red{text} + colored_text = f"\\{color}{{{bold_text}}}" + else: + # E.g., \textcolor{red}{text} + colored_text = f"\\textcolor{{{color}}}{{{bold_text}}}" + # Apply bold. + colored_text = f"**{colored_text}**" + # Replace in the original text. + result = result[: match.start()] + colored_text + result[match.end() :] + return result + + +def remove_bullets(markdown_text: str) -> str: + """ + Remove bullet points (dashes) and leading spaces from markdown text. + + This function removes all leading dashes (`-`) from lines and removes + leading whitespace. Empty lines are preserved. + + :param markdown_text: Input markdown text + :return: Markdown text with bullets removed + """ + lines = markdown_text.split("\n") + result = [] + for line in lines: + # Check if line is not empty. + if line.strip(): + # Remove leading whitespace. + stripped_line = line.lstrip() + # Check if line starts with a bullet point. + if stripped_line.startswith("- "): + # Remove the bullet and the space after it. + result.append(stripped_line[2:]) + else: + # Keep the line as is (no leading whitespace). + result.append(stripped_line) + else: + # Preserve empty lines. + result.append("") + return "\n".join(result) + + +def format_first_level_bullets(markdown_text: str) -> str: + """ + Add empty lines only before first level bullets and remove all empty lines + from markdown text. + + :param markdown_text: Input markdown text + :return: Formatted markdown text + """ + # Split into lines and remove empty ones. + lines = [line for line in markdown_text.split("\n") if line.strip()] + # Add empty lines only before first level bullets. + result = [] + for i, line in enumerate(lines): + # Check if current line is a first level bullet (no indentation). + if re.match(r"^- ", line): + # Add empty line before first level bullet if not at start. + if i > 0: + result.append("") + result.append(line) + return "\n".join(result) + + +def process_code_block( + line: str, in_code_block: bool, i: int, lines: List[str] +) -> Tuple[bool, bool, List[str]]: + """ + Process lines of text to handle code blocks that start and end with '```'. + + The transformation is to: + - add an empty line before the start/end of the code + - indent the code block with four spaces + - replace '//' with '# ' to comment out lines in Python code + + :param line: The current line of text being processed. + :param in_code_block: A flag indicating if the function is currently + inside a code block. + :param i: The index of the current line in the list of lines. + :param lines: the lines of text to process + :return: tuple containing: + - `do_continue`: whether to continue processing the current line or skip + it + - `in_code_block`: boolean indicating whether the function is currently + inside a code block + - list of processed lines of text + """ + out: List[str] = [] + do_continue = False + # Look for a code block. + if re.match(r"^(\s*)```", line): + _LOG.debug(" -> code block") + in_code_block = not in_code_block + # Add empty line before the start of the code block. + if ( + in_code_block + and (i + 1 < len(lines)) + and re.match(r"\s*", lines[i + 1]) + ): + out.append("\n") + out.append(" " + line) + if ( + not in_code_block + and (i + 1 < len(lines)) + and re.match(r"\s*", lines[i + 1]) + ): + out.append("\n") + do_continue = True + return do_continue, in_code_block, out + if in_code_block: + line = line.replace("// ", "# ") + out.append(" " + line) + # We don't do any of the other post-processing. + do_continue = True + return do_continue, in_code_block, out + return do_continue, in_code_block, out + + +# TODO(gp): -> iterator +# TODO(gp): where is this used? +def process_lines(lines: List[str]) -> Generator[Tuple[int, str], None, None]: + """ + Process lines of text to handle comment blocks, code blocks, and single + line comments. + + :param lines: list of all the lines of text being processed + :return: generator of processed lines of text + """ + out: List[str] = [] + in_skip_block = False + in_code_block = False + for i, line in enumerate(lines): + _LOG.debug("%s:line=%s", i, line) + # 1) Remove comment block. + if _TRACE: + _LOG.debug("# 1) Process comment block.") + do_continue, in_skip_block = process_comment_block(line, in_skip_block) + if do_continue: + continue + # 2) Remove code block. + if _TRACE: + _LOG.debug("# 2) Process code block.") + do_continue, in_code_block, out_tmp = process_code_block( + line, in_code_block, i, lines + ) + out.extend(out_tmp) + if do_continue: + continue + # 3) Remove single line comment. + if _TRACE: + _LOG.debug("# 3) Process single line comment.") + do_continue = process_single_line_comment(line) + if do_continue: + continue + out.append(line) + # + yield from enumerate(out) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py new file mode 100644 index 000000000..ba7278726 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py @@ -0,0 +1,286 @@ +""" +Utilities for colorizing markdown and LaTeX text with color commands. + +Import as: + +import helpers.hmarkdown_coloring as hmarcolo +""" + +import logging +import re +from typing import Dict, List, Optional + +import helpers.hdbg as hdbg +from helpers.hmarkdown_fenced_blocks import ( + replace_fenced_blocks_with_tags, + replace_tags_with_fenced_blocks, +) +from helpers.hmarkdown_tables import ( + replace_tables_with_tags, + replace_tags_with_tables, +) + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Colorize +# ############################################################################# + +# Mapping of markdown color names to their LaTeX color equivalents for use in +# \textcolor{} commands. +_MD_COLORS_LATEX_MAPPING = { + "red": "red", + "orange": "orange", + "yellow": "yellow", + "lime": "lime", + "green": "darkgreen", + "teal": "teal", + "cyan": "cyan", + "blue": "blue", + "purple": "purple", + "violet": "violet", + "magenta": "magenta", + "pink": "pink", + "brown": "brown", + "olive": "olive", + "gray": "gray", + "darkgray": "darkgray", + "lightgray": "lightgray", + "black": "black", + "white": "white", +} + + +def get_md_colors_latex_mapping() -> Dict[str, str]: + """ + Get a copy of the markdown-to-LaTeX color mapping. + + :return: Dict mapping color names (e.g., 'red', 'blue') to LaTeX color names + """ + return dict(_MD_COLORS_LATEX_MAPPING) + + +# Curated list of colors that are visually distinguishable and work well in +# both markdown and LaTeX contexts (excludes ones which are too light or have +# poor contrast). +_MD_COLORS = [ + "red", + "orange", + # "yellow", + # "lime", + "green", + "teal", + "cyan", + "blue", + # "purple", + "violet", + "magenta", + # "pink", + "brown", + "olive", + "gray", + "darkgray", + # "lightgray", + "black", + # "white", +] + + +def get_md_colors() -> List[str]: + """ + Get a copy of the curated list of markdown colors. + + :return: List of color names suitable for colorizing markdown/LaTeX + """ + return list(_MD_COLORS) + + +def process_color_commands(in_line: str) -> str: + r""" + Transform color commands like `\red{xyz}` into valid LaTeX syntax. + + If the content is text (not math), wraps it in `\text{}`. + + E.g.: + - `\red{abc}` -> `\textcolor{red}{\text{abc}}` + - `\blue{x + y}` -> `\textcolor{blue}{x + y}` + + :param in_line: input line to process + :return: line with color commands transformed + """ + for md_color, latex_color in get_md_colors_latex_mapping().items(): + # This regex matches color commands like \red{content}, \blue{content}, + # etc. + pattern = re.compile( + rf""" + \\{md_color} # Match the color command (e.g., \red, \blue, etc.). + \{{ # Match the opening curly brace. + ([^}}]*) # Capture everything inside the curly braces. + \}} # Match the closing curly brace. + """, + re.VERBOSE, + ) + + def _replacement(match: re.Match, latex_color: str) -> str: + """ + Replace a color command with LaTeX \textcolor directive. + """ + content = match.group(1) + # Math expressions (containing operators, brackets, etc.) render + # directly; plain text needs \text{} wrapper for proper LaTeX rendering. + is_math_expr = any(c in content for c in "+-*/=<>{}[]()^_") + if is_math_expr: + ret = rf"\textcolor{{{latex_color}}}{{{content}}}" + else: + ret = rf"\textcolor{{{latex_color}}}{{\text{{{content}}}}}" + return ret + + # Replace the color command with the LaTeX color command. + in_line = re.sub( + pattern, lambda m: _replacement(m, latex_color), in_line + ) + return in_line + + +def has_color_command(text: str) -> bool: + """ + Check if text contains any color commands like `\\red{...}` or `\\blue{...}`. + + :param text: text to check + :return: True if text contains at least one color command + """ + hdbg.dassert_isinstance(text, str) + # hdbg.dassert_not_in("\n", line) + for color in _MD_COLORS_LATEX_MAPPING.keys(): + # This regex matches LaTeX color commands like \red{content}, + # \blue{content}, etc. + pattern = re.compile( + rf""" + \\{color} # Match the color command (e.g., \red, \blue, etc.). + \{{ # Match the opening curly brace. + ([^}}]*) # Capture everything inside the curly braces. + \}} # Match the closing curly brace. + """, + re.VERBOSE, + ) + if re.search(pattern, text): + return True + return False + + +# TODO(gp): -> List[str] +# TODO(gp): Use hmarkdown.process_lines() and test it. +def colorize_bullet_points_in_slide( + txt: str, + *, + use_abbreviations: bool = True, + interpolate_colors: bool = False, + all_md_colors: Optional[List[str]] = None, +) -> str: + r""" + Colorize bold markdown items `**text**` with color commands. + + Scans the text line-by-line for bold markdown items and wraps each in a + color command (e.g., `**\red{text}**`). Skips code blocks and tables to + preserve their formatting. Bold items are colored sequentially using the + provided color list. + + :param txt: Markdown text containing bold items to colorize + :param use_abbreviations: + - If True, use abbreviated color syntax (e.g., `\red{foo}`) + - If False, use full LaTeX syntax (e.g., `\textcolor{red}{foo}`) + :param interpolate_colors: + - If True, evenly space selected colors across all bold items + - If False, use a predefined sequence for common counts (1-4 items get + fixed color sets, more items cycle through all_md_colors) + :param all_md_colors: List of available colors to cycle through + - Default: curated list from `get_md_colors()` + :return: Markdown text with bold items wrapped in color commands + """ + hdbg.dassert_isinstance(txt, str) + if all_md_colors is None: + all_md_colors = list(get_md_colors()) + # Strip code blocks and tables to avoid colorizing content inside them. + lines = txt.split("\n") + lines, fence_map = replace_fenced_blocks_with_tags(lines) + _LOG.debug("Found %s fenced blocks", len(fence_map)) + lines, table_map = replace_tables_with_tags(lines) + _LOG.debug("Found %s tables", len(table_map)) + # Count bold markers (**) to determine how many bold items exist. + tot_bold = 0 + # Scan the text line by line and count how many bold items there are. + for line in lines: + # Count the number of bold items. + num_bold = len(re.findall(r"\*\*", line)) + tot_bold += num_bold + _LOG.debug("tot_bold=%s", tot_bold) + if tot_bold == 0: + return txt + # Divide by 2 since each bold item is wrapped with ** on both sides. + # hdbg.dassert_eq(tot_bold % 2, 0, "tot_bold=%s needs to be even", tot_bold) + num_bolds = tot_bold // 2 + + def _interpolate_colors(num_bolds: int) -> List[str]: + """ + Sample colors evenly spaced to cover all bold items distinctly. + """ + step = len(all_md_colors) // num_bolds + colors = list(all_md_colors)[::step][:num_bolds] + return colors + + if interpolate_colors: + colors = _interpolate_colors(num_bolds) + else: + # Use fixed color sequences for small numbers of bold items; for larger + # counts, cycle through the available colors. + if num_bolds == 1: + colors = ["red"] + elif num_bolds == 2: + colors = ["red", "blue"] + elif num_bolds == 3: + colors = ["red", "green", "blue"] + elif num_bolds == 4: + colors = ["red", "green", "blue", "violet"] + else: + colors = all_md_colors[:num_bolds] + _LOG.debug("colors=%s", colors) + hdbg.dassert_lte( + num_bolds, len(colors), "Number of bold items exceeds available colors" + ) + color_idx = 0 + txt_out = [] + for line in lines: + + def color_replacer(match: re.Match[str]) -> str: + """ + Replace strings like "**foo**" with strings like "**\red{foo}**". + """ + nonlocal color_idx + text = match.group(1) + hdbg.dassert_lte( + color_idx, + len(colors), + "Color index out of bounds; not enough colors assigned", + ) + color_to_use = colors[color_idx] + hdbg.dassert_in( + color_to_use, + get_md_colors_latex_mapping(), + "Selected color is not in the color mapping", + ) + latex_color = get_md_colors_latex_mapping()[color_to_use] + color_idx += 1 + if use_abbreviations: + ret = f"**\\{color_to_use}{{{text}}}**" + else: + ret = f"**\\textcolor{{{latex_color}}}{{{text}}}**" + return ret + + line = re.sub(r"\*\*([^*]+)\*\*", color_replacer, line) + txt_out.append(line) + # Restore code blocks and tables that were temporarily replaced with tags. + txt_out = replace_tags_with_fenced_blocks(txt_out, fence_map) + txt_out = replace_tags_with_tables(txt_out, table_map) + txt_out = "\n".join(txt_out) + return txt_out diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py new file mode 100644 index 000000000..5b626a15a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py @@ -0,0 +1,66 @@ +""" +Import as: + +import helpers.hmarkdown_comments as hmarcomm +""" + +import logging +import re +from typing import Tuple + +import helpers.hdbg as hdbg +from helpers.hmarkdown_headers import is_markdown_line_separator + +_LOG = logging.getLogger(__name__) + + +def process_single_line_comment(line: str) -> bool: + """ + Handle single line comment. + + We need to do it after the '//' in code blocks have been handled. + + :param line: line of text to process + :return: whether to continue processing the line or skip it + """ + do_continue = False + if line.startswith(r"%%") or line.startswith(r"//"): + do_continue = True + _LOG.debug(" -> do_continue=True") + return do_continue + # Skip frame. + if is_markdown_line_separator(line): + do_continue = True + _LOG.debug(" -> do_continue=True") + return do_continue + # Nothing to do. + return do_continue + + +def process_comment_block(line: str, in_skip_block: bool) -> Tuple[bool, bool]: + """ + Process lines of text to identify blocks that start with '' or '*/'. + + :param line: current line of text being processed + :param in_skip_block: flag indicating if the function is currently + inside a comment block + :return: tuple containing: + - `do_continue`: whether to continue processing the current line or skip + it + - `in_skip_block`: boolean indicating whether the function is currently + inside a comment block + """ + do_continue = False + if line.startswith(r"") or re.search(r"^\s*\*\/", line): + # End skipping comments. + in_skip_block = False + # Skip comment. + _LOG.debug(" -> skip") + do_continue = True + return do_continue, in_skip_block diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py new file mode 100644 index 000000000..169e06624 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py @@ -0,0 +1,132 @@ +""" +Utilities for handling div blocks in markdown files. + +This module provides functions to add and remove prettier-ignore comments +around div blocks in markdown files. + +Import as: + +import helpers.hmarkdown_div_blocks as hmadiblo +""" + +from typing import List, Tuple + + +def _split_lines_into_chunks( + lines: List[str], +) -> List[Tuple[bool, List[str]]]: + """ + Split lines into chunks of div blocks and non-div blocks. + + A div block starts with a line containing ::: and ends with another + line containing :::. + + :param lines: List of strings representing lines in a markdown file. + :return: List of tuples (is_div_block, chunk_lines) where is_div_block + indicates if the chunk is a div block. + """ + chunks = [] + i = 0 + while i < len(lines): + line = lines[i] + # Check if this line starts a div block. + if line.strip().startswith(":::"): + # Look ahead to find the closing div block. + j = i + 1 + while j < len(lines): + if lines[j].strip().startswith(":::"): + # Found the end of the div block. + chunk_lines = lines[i : j + 1] + chunks.append((True, chunk_lines)) + i = j + 1 + break + j += 1 + else: + # No closing div block found, treat as regular line. + chunks.append((False, [line])) + i += 1 + else: + # Start a non-div block chunk. + chunk_lines = [line] + i += 1 + # Continue collecting non-div lines. + while i < len(lines) and not lines[i].strip().startswith(":::"): + chunk_lines.append(lines[i]) + i += 1 + chunks.append((False, chunk_lines)) + return chunks + + +def add_prettier_ignore_to_div_blocks(lines: List[str]) -> List[str]: + """ + Add prettier-ignore comments around div blocks. + + A div block starts with a line containing ::: and has another line + with ::: following it. + + Examples of div blocks: + - :::: + ::::{.column width=40%} + - :::columns + ::::{.column width=60%} + - :::: + ::: + + :param lines: List of strings representing lines in a markdown file. + :return: List of strings with prettier-ignore comments added. + """ + # Step 1: Split into chunks. + chunks = _split_lines_into_chunks(lines) + # Step 2: Process chunks and add prettier-ignore comments. + result = [] + for is_div_block, chunk_lines in chunks: + if is_div_block: + # Add prettier-ignore comments around div blocks. + result.append("") + result.append("") + result.extend(chunk_lines) + result.append("") + result.append("") + else: + # Add non-div block lines as-is. + result.extend(chunk_lines) + return result + + +def remove_prettier_ignore_from_div_blocks(lines: List[str]) -> List[str]: + """ + Remove all prettier-ignore comments from lines. + + This function removes: + - lines + - lines + - Empty lines before prettier-ignore-start + - Empty lines after prettier-ignore-end + + :param lines: List of strings representing lines in a markdown file. + :return: List of strings with prettier-ignore comments removed. + """ + result = [] + i = 0 + while i < len(lines): + line = lines[i] + # Check if this is a prettier-ignore-start comment. + if line.strip() == "": + # Remove empty line before prettier-ignore-start if present. + if result and result[-1] == "": + result.pop() + # Skip the prettier-ignore-start line. + i += 1 + continue + # Check if this is a prettier-ignore-end comment. + if line.strip() == "": + # Skip the prettier-ignore-end line. + i += 1 + # Skip empty line after prettier-ignore-end if present. + if i < len(lines) and lines[i] == "": + i += 1 + continue + # Add all other lines. + result.append(line) + i += 1 + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py new file mode 100644 index 000000000..8d3614b9b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py @@ -0,0 +1,131 @@ +""" +Import as: + +import helpers.hmarkdown_fenced_blocks as hmafeblo +""" + +import logging +import pprint +import re +from typing import Dict, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Add a decorator like in hprint to process both strings and lists +# of strings. + + +def replace_fenced_blocks_with_tags( + lines: List[str], +) -> Tuple[List[str], Dict[str, str]]: + """ + Replace fenced blocks with a tag and return the mapping from tags to the + fenced block text. + + E.g., + ```` + hello + world + ```python + foo + ``` + bye + ```` + is replaced with: + ``` + hello + world + + bye + ``` + + :param lines: list of lines to process + :return: tuple containing: + - list of lines with the fenced blocks replaced by tags + - mapping from tags to the fenced block text + """ + hdbg.dassert_isinstance(lines, list) + result = [] + # True if we are inside a fenced block. + in_fenced_block = False + # Count the number of fenced blocks found. + fenced_block_count = 0 + # Store the mapping between the block number and the fence type. + fence_map = {} + # Store the text of the fenced block. + fence_depth = 0 + fence_text = [] + for i, line in enumerate(lines): + _LOG.debug("%d:line='%s'", i, line) + _LOG.debug( + " " + + hprint.to_str("fenced_block_count in_fenced_block fence_depth") + ) + # Look for the start of a fenced block. + fence_match = re.match(r"^\s*(`{3,})", line) + if fence_match: + _LOG.debug(" -> fence_match") + curr_fence_depth = len(fence_match.group(0)) + if not in_fenced_block: + # Start of a fenced block. + _LOG.debug(" -> start of fenced block") + in_fenced_block = True + fence_depth = curr_fence_depth + fenced_block_count += 1 + fence_text.append(line) + else: + # We are already in a fenced block. + fence_text.append(line) + if curr_fence_depth == fence_depth: + # End of block found. + _LOG.debug(" -> end of fenced block") + in_fenced_block = False + # Replace nested code block markers with tag. + result.append(f"") + fence_map[str(fenced_block_count)] = "\n".join(fence_text) + _LOG.debug(" -> added to fence_map") + # Reset state. + fence_depth = 0 + fence_text = [] + else: + if in_fenced_block: + _LOG.debug(" -> in_fenced_block") + fence_text.append(line) + else: + result.append(line) + return result, fence_map + + +def replace_tags_with_fenced_blocks( + lines: List[str], fence_map: Dict[str, str] +) -> List[str]: + """ + Replace tags with fenced blocks. + + :param lines: list of lines to process + :param fence_map: mapping from tags to fenced block text + :return: list of lines with tags replaced by fenced blocks + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_isinstance(fence_map, dict) + result = [] + for line in lines: + if line.startswith("")[0] + hdbg.dassert_in(tag, fence_map, "Found unmatched tag %s", tag) + result.append(fence_map[tag]) + del fence_map[tag] + else: + result.append(line) + hdbg.dassert_eq( + len(fence_map), + 0, + "Found %s unmatched tags:\n%s", + len(fence_map), + pprint.pformat(fence_map), + ) + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py new file mode 100644 index 000000000..666c3d03b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py @@ -0,0 +1,109 @@ +""" +Import as: + +import helpers.hmarkdown_filtering as hmarfilt +""" + +import logging +import re +from typing import List, Tuple + +import helpers.hdbg as hdbg +from helpers.hmarkdown_headers import ( + extract_section_from_markdown, +) +from helpers.hmarkdown_slides import extract_slides_from_markdown + +_LOG = logging.getLogger(__name__) + + +def filter_by_header(lines: List[str], header: str) -> List[str]: + """ + Extract a specific header from markdown text. + + :param lines: list of markdown lines to be processed + :param header: header to filter by (e.g., `# Introduction`) + :return: filtered lines + """ + hdbg.dassert_isinstance(lines, list) + # Filter by header. + txt_lines = extract_section_from_markdown(lines, header) + hdbg.dassert_isinstance(txt_lines, list) + return txt_lines + + +def _parse_range(range_as_str: str, max_value: int) -> Tuple[int, int]: + """ + Parse a 0-indexed range string like '0:10' into start and end indices. + + :param range_as_str: string in format 'start:end' where start/end + can be numbers or 'None' (None means 0 for start, max_value for end) + :param max_value: maximum value to use when 'None' is specified for end + :return: tuple of '(start_index, end_index)' as 0-indexed integers + """ + m = re.match(r"^(\S+):(\S+)$", range_as_str) + hdbg.dassert(m, "Invalid range_as_str='%s'", range_as_str) + assert m is not None + start_value, end_value = m.groups() + if start_value.lower() == "none": + start_value = 0 + else: + start_value = int(start_value) + if end_value.lower() == "none": + end_value = max_value + else: + end_value = int(end_value) + return start_value, end_value + + +def filter_by_lines(lines: List[str], filter_by_lines: str) -> List[str]: + """ + Filter the lines of text in `[start_line, end_line[` (0-indexed). + + :param lines: list of lines to be processed + :param filter_by_lines: 0-indexed range string like `0:10`, `0:None`, or `None:10` + :return: filtered lines + """ + hdbg.dassert_isinstance(lines, list) + start_line, end_line = _parse_range(filter_by_lines, len(lines)) + hdbg.dassert_lte(start_line, end_line) + txt = lines[start_line:end_line] + _LOG.warning( + "filter_by_lines='%s' -> lines=[%s:%s]", + filter_by_lines, + start_line, + end_line, + ) + hdbg.dassert_isinstance(txt, list) + return txt + + +def filter_by_slides(lines: List[str], filter_by_slides: str) -> List[str]: + """ + Filter the lines of text in `[start_slide, end_slide[` (0-indexed). + + :param lines: list of lines to be processed + :param filter_by_slides: 0-indexed range string like `0:10`, `0:None`, or `None:10` + :return: filtered lines + """ + hdbg.dassert_isinstance(lines, list) + slides_info, last_line_number = extract_slides_from_markdown(lines) + _LOG.debug("slides_info=%s\n%s", len(slides_info), slides_info) + start_slide, end_slide = _parse_range(filter_by_slides, len(slides_info)) + _LOG.debug("start_slide=%s, end_slide=%s", start_slide, end_slide) + hdbg.dassert_lte(start_slide, end_slide) + hdbg.dassert_lte(end_slide, len(slides_info)) + start_line = slides_info[start_slide].line_number + if end_slide == len(slides_info): + end_line = last_line_number + else: + end_line = slides_info[end_slide].line_number + _LOG.warning( + "filter_by_slides='%s' -> lines=[%s:%s]", + filter_by_slides, + start_line, + end_line, + ) + txt = lines[start_line - 1 : end_line - 1] + hdbg.dassert_isinstance(txt, list) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py new file mode 100644 index 000000000..f3fd1b4a9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py @@ -0,0 +1,530 @@ +""" +Import as: + +import helpers.hmarkdown_formatting as hmarform +""" + +import logging +import re +from typing import List + +import helpers.hdbg as hdbg +import helpers.hmarkdown_headers as hmarhead +import helpers.hmarkdown_slides as hmarslid +import dev_scripts_helpers.dockerize.lib_prettier as dshdlipr + +_LOG = logging.getLogger(__name__) + + +def remove_end_of_line_periods(lines: List[str]) -> List[str]: + """ + Remove periods at the end of each line in the given text. + + :param lines: list of input lines to process + :return: lines with end-of-line periods removed + """ + hdbg.dassert_isinstance(lines, list) + txt_out = [line.rstrip(".") for line in lines] + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +def remove_empty_lines(lines: List[str]) -> List[str]: + """ + Remove empty lines from the given text. + + :param lines: list of input lines to process + :return: lines with empty lines removed + """ + hdbg.dassert_isinstance(lines, list) + txt_out = [line for line in lines if line != ""] + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +# def remove_gdoc_artifacts(lines: List[str]) -> List[str]: +# """ +# Remove empty lines from the given text. + +# :param lines: list of input lines to process +# :return: lines with empty lines removed +# """ +# hdbg.dassert_isinstance(lines, list) +# # Remove “” and …. +# lines = re.sub(r"“", '"', lines) +# lines = re.sub(r"”", '"', lines) +# lines = re.sub(r"’", "'", lines) +# lines = re.sub(r"…", "", lines) +# hdbg.dassert_isinstance(lines, list) +# return lines + + +# TODO(gp): Add tests. +def remove_code_delimiters(lines: List[str]) -> List[str]: + """ + Remove ```python and ``` delimiters from a given text. + + :param lines: list of input lines containing code delimiters + :return: lines with the code delimiters removed + """ + hdbg.dassert_isinstance(lines, list) + # Join lines back to text, apply regex logic, then split again. + txt = "\n".join(lines) + # Replace the ```python and ``` delimiters with empty strings. + txt_out = txt.replace("```python", "").replace("```", "") + txt_out = txt_out.strip() + # Remove the numbers at the beginning of the line, if needed + # E.g., `3: """` -> `"""`. + txt_out = re.sub(r"(^\d+: )", "", txt_out, flags=re.MULTILINE) + # Split back into lines. + result = txt_out.split("\n") if txt_out else [] + hdbg.dassert_isinstance(result, list) + return result + + +def add_line_numbers(lines: List[str]) -> List[str]: + """ + Add line numbers to each line of text. + + :param lines: list of input lines to process + :return: lines with line numbers added + """ + hdbg.dassert_isinstance(lines, list) + numbered_lines = [] + for i, line in enumerate(lines, 1): + numbered_lines.append(f"{i}: {line}") + hdbg.dassert_isinstance(numbered_lines, list) + return numbered_lines + + +def remove_formatting(txt: str) -> str: + """ + Remove markdown and LaTeX formatting from text. + + :param txt: input text to process + :return: text with formatting removed + """ + # Replace bold markdown syntax with plain text. + txt = re.sub(r"\*\*(.*?)\*\*", r"\1", txt) + # Replace italic markdown syntax with plain text. + txt = re.sub(r"\*(.*?)\*", r"\1", txt) + # Remove \textcolor{red}{ ... }. + txt = re.sub(r"\\textcolor\{(.*?)\}\{(.*?)\}", r"\2", txt) + # Remove \red{ ... }. + txt = re.sub(r"\\\S+\{(.*?)\}", r"\1", txt) + return txt + + +def md_clean_up(txt: str) -> str: + """ + Clean up a Markdown file copy-pasted from Google Docs, ChatGPT. + + :param txt: input text to process + :return: text with the cleaning up applied + """ + # 0) General formatting. + # Remove dot at the end of each line. + txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) + # 1) ChatGPT formatting. + # E.g.,`` • Description Logics (DLs) are a family`` + # Replace `•` with `-` + txt = re.sub(r"•\s+", r"- ", txt) + # Replace `\t` with 2 spaces + txt = re.sub(r"\t", r" ", txt) + # Remove `⋅`. + txt = re.sub(r"⸻", r"", txt) + # “ + txt = re.sub(r"“", r'"', txt) + # ” + txt = re.sub(r"”", r'"', txt) + # ’ + txt = re.sub(r"’", r"'", txt) + # … + txt = re.sub(r"…", r"...", txt) + # 2) Latex formatting. + # Replace \( ... \) math syntax with $ ... $. + txt = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", txt) + # Replace \[ ... \] math syntax with $$ ... $$, handling multiline equations. + txt = re.sub(r"\\\[(.*?)\\\]", r"$$\1$$", txt, flags=re.DOTALL) + # Replace `P(.)`` with `\Pr(.)`. + txt = re.sub(r"P\((.*?)\)", r"\\Pr(\1)", txt) + # + txt = re.sub(r"\\left\[", r"[", txt) + txt = re.sub(r"\\right\]", r"]", txt) + # + txt = re.sub(r"\\mid", r"|", txt) + # + txt = re.sub(r"→", r"$\\rightarrow$", txt) + # Remove empty spaces at beginning / end of Latex equations $...$. + # E.g., $ \text{Student} $ becomes $\text{Student}$ + # txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) + # Transform `Example: Training a deep` into `E.g., training a deep`, + # converting the word after `Example:` to lower case. + txt = re.sub(r"\bExample:", "E.g.,", txt) + txt = re.sub(r"\bE.g.,\s+(\w)", lambda m: "E.g., " + m.group(1).lower(), txt) + return txt + + +def remove_empty_lines_from_markdown(lines: List[str]) -> List[str]: + """ + Remove all empty lines from markdown text. + + :param lines: list of input markdown lines + :return: formatted markdown lines + """ + hdbg.dassert_isinstance(lines, list) + # Remove empty lines. + result = [line for line in lines if line.strip()] + hdbg.dassert_isinstance(result, list) + return result + + +def prettier_markdown(txt: str) -> str: + """ + Format markdown text using `prettier`. + + :param txt: input text to format + :return: formatted text + """ + file_type = "md" + txt = dshdlipr.prettier_on_str(txt, file_type) + return txt + + +def format_markdown(txt: str) -> str: + """ + Format markdown text. + + :param txt: input text to format + :return: formatted text + """ + file_type = "md" + txt = dshdlipr.prettier_on_str(txt, file_type) + lines = txt.split("\n") + clean_lines = remove_empty_lines_from_markdown(lines) + txt = "\n".join(clean_lines) + return txt + + +def bold_first_level_bullets( + lines: List[str], *, max_length: int = 30 +) -> List[str]: + """ + Make first-level bullets bold in markdown text. + + :param lines: list of input markdown lines + :param max_length: max length of the bullet text to be bolded. The + value '-1' means no limit + :return: formatted markdown lines with first-level bullets in bold + """ + hdbg.dassert_isinstance(lines, list) + result = [] + for line in lines: + # Check if this is a first-level bullet point. + if re.match(r"^\s*- ", line): + # Check if the line has already bold text it in it. + if not re.search(r"\*\*", line): + # Bold first-level bullets. + indentation = len(line) - len(line.lstrip()) + if indentation == 0: + # First-level bullet, add bold markers. + m = re.match(r"^(\s*-\s+)(.*)", line) + hdbg.dassert(m, "Can't parse line='%s'", line) + bullet_text = m.group(2) # type: ignore[union-attr] + if max_length > -1 and len(bullet_text) <= max_length: + spaces = m.group(1) # type: ignore[union-attr] + line = spaces + "**" + bullet_text + "**" + result.append(line) + hdbg.dassert_isinstance(result, list) + return result + + +def format_figures(lines: List[str]) -> List[str]: + """ + Convert markdown slides with figures to use fenced div syntax with column + layout. + + If the input already uses column format or contains no figures, + returns unchanged. + + :param lines: list of input markdown lines + :return: formatted markdown lines with figures in column layout + """ + hdbg.dassert_isinstance(lines, list) + # Check if already in column format. + text = "\n".join(lines) + if "::: columns" in text and ":::: {.column" in text: + return lines + # Find first figure line to split content. + first_figure_idx = -1 + for i, line in enumerate(lines): + if re.match(r"^\s*!\[.*\]\(.*\)\s*$", line.strip()): + first_figure_idx = i + break + # If no figures found, return original lines unchanged. + if first_figure_idx == -1: + return lines + # Split content: slide titles (lines starting with *) stay outside columns, + # other content before first figure goes to left column, + # everything from first figure onwards goes to right column. + pre_figure_lines = lines[:first_figure_idx] + figure_content = lines[first_figure_idx:] + # Separate slide titles from other content + slide_titles = [] + text_lines = [] + for line in pre_figure_lines: + if line.strip().startswith("*"): + slide_titles.append(line) + else: + text_lines.append(line) + # Remove empty lines at the beginning and end of text_lines. + while text_lines and not text_lines[0].strip(): + text_lines.pop(0) + while text_lines and not text_lines[-1].strip(): + text_lines.pop() + # Build the column format. + result = [] + # Add slide titles first (outside columns) + result.extend(slide_titles) + result.append("::: columns") + result.append(":::: {.column width=65%}") + result.extend(text_lines) + result.append("::::") + result.append(":::: {.column width=40%}") + result.append("") + result.extend(figure_content) + result.append("::::") + result.append(":::") + hdbg.dassert_isinstance(result, list) + return result + + +def format_md_links_to_latex_format(lines: List[str]) -> List[str]: + r""" + Convert markdown links to formatted links with LaTeX styling. + + Convert markdown links: + - Plain URLs: + http://... or https://... + to the format: + [\textcolor{blue}{\underline{URL}}](URL) + + - Existing formatted links: + [Text](URL) + to the format: + [\textcolor{blue}{\underline{Text}}](URL) + + - Email links: + [](email@domain.com) or [](http://...) or [](https://...) + to the format: + [\textcolor{blue}{\underline{URL}}](URL) + + - Picture links + ![](lectures_source/.../lec_4_1_slide_5_image_1.png) + are left untouched + + :param lines: list of input markdown lines + :return: formatted markdown lines with styled links + """ + hdbg.dassert_isinstance(lines, list) + result = [] + # URL regex pattern. + url_pattern = r"https?://[^\s)}\]`]+" + # Pattern for URLs in backticks. + backtick_url_pattern = r"`(https?://[^\s`]+)`" + # Pattern for existing formatted links that need normalization. + # This matches [\textcolor{blue}{\underline{Text}}](URL) where Text != URL. + formatted_link_pattern = ( + r"\[\\textcolor\{blue\}\{\\underline\{([^}]+)\}\}\]\((https?://[^)]+)\)" + ) + # Pattern for markdown links: [Text](URL). + # Matches text that can include escaped underscores (\_ ). + markdown_link_pattern = r"\[((?:[^\]\\]|\\[_])+)\]\((https?://[^\)]+)\)" + # Pattern for email links: [email@domain.com](email@domain.com). + email_link_pattern = r"\[([^\]\\]+@[^\]\\]+)\]\(([^)]+@[^)]+)\)" + # Pattern for empty bracket links: [](URL) or [](email). + empty_bracket_pattern = r"\[\]\(([^\)]+)\)" + # Pattern for image links: ![...](...). + image_link_pattern = r"!\[.*?\]\([^\)]+\)" + for line in lines: + # Process the line for all URL patterns. + processed_line = line + # Store image links temporarily to avoid processing them. + image_placeholders = [] + + def store_image_link(match): + placeholder = f"__IMAGE_LINK_{len(image_placeholders)}__" + image_placeholders.append(match.group(0)) + return placeholder + + processed_line = re.sub( + image_link_pattern, store_image_link, processed_line + ) + + # Convert empty bracket links [](URL) or [](email). + def convert_empty_bracket_link(match): + target = match.group(1) + return rf"[\textcolor{{blue}}{{\underline{{{target}}}}}]({target})" + + processed_line = re.sub( + empty_bracket_pattern, convert_empty_bracket_link, processed_line + ) + + # Convert URLs in backticks. + def convert_backtick_url(match): + url = match.group(1) + return rf"[\textcolor{{blue}}{{\underline{{{url}}}}}]({url})" + + processed_line = re.sub( + backtick_url_pattern, convert_backtick_url, processed_line + ) + + # Normalize existing formatted links to keep existing display text. + def normalize_formatted_link(match): + text = match.group(1) + url = match.group(2) + return rf"[\textcolor{{blue}}{{\underline{{{text}}}}}]({url})" + + processed_line = re.sub( + formatted_link_pattern, normalize_formatted_link, processed_line + ) + + # Convert markdown links [Text](URL) to formatted links. + def convert_markdown_link(match): + text = match.group(1) + url = match.group(2) + return rf"[\textcolor{{blue}}{{\underline{{{text}}}}}]({url})" + + processed_line = re.sub( + markdown_link_pattern, convert_markdown_link, processed_line + ) + + # Convert email links [email@domain.com](email@domain.com) to formatted links. + def convert_email_link(match): + email = match.group(2) + return rf"[\textcolor{{blue}}{{\underline{{{email}}}}}]({email})" + + processed_line = re.sub( + email_link_pattern, convert_email_link, processed_line + ) + # Convert plain URLs (but avoid converting URLs that are already part + # of formatted links). + # First, temporarily replace formatted links to avoid interfering with + # them. + temp_placeholders = [] + # Store existing correctly formatted links temporarily. + correct_formatted_link_pattern = ( + r"\[\\textcolor\{blue\}\{\\underline\{([^}]+)\}\}\]\(([^)]+)\)" + ) + + def store_formatted_link(match): + placeholder = f"__FORMATTED_LINK_{len(temp_placeholders)}__" + temp_placeholders.append(match.group(0)) + return placeholder + + temp_line = re.sub( + correct_formatted_link_pattern, store_formatted_link, processed_line + ) + + # Convert remaining plain URLs. + def convert_plain_url(match): + url = match.group(0) + return rf"[\textcolor{{blue}}{{\underline{{{url}}}}}]({url})" + + temp_line = re.sub(url_pattern, convert_plain_url, temp_line) + # Restore formatted links. + for i, placeholder in enumerate(temp_placeholders): + temp_line = temp_line.replace(f"__FORMATTED_LINK_{i}__", placeholder) + # Restore image links. + for i, image_link in enumerate(image_placeholders): + temp_line = temp_line.replace(f"__IMAGE_LINK_{i}__", image_link) + result.append(temp_line) + hdbg.dassert_isinstance(result, list) + return result + + +# TODO(gp): -> format_first_level_bullets_in_slide +def format_first_level_bullets(lines: List[str]) -> List[str]: + """ + Add empty lines to separate first level bullets and remove all remaining + empty lines. + + This is the formatting we use in the slides. + + :param lines: list of input markdown lines + :return: formatted markdown lines + """ + hdbg.dassert_isinstance(lines, list) + # Remove empty lines. + lines_clean = [line for line in lines if line.strip()] + # Handle special case: if input was only empty lines, preserve structure. + if not lines_clean and lines: + return lines + # Add empty lines only before first level bullets. + result = [] + for i, line in enumerate(lines_clean): + # Check if current line is a first level bullet (no indentation). + if re.match(r"^- ", line): + # Add empty line before first level bullet if not at start. + if i > 0: + result.append("") + result.append(line) + hdbg.dassert_isinstance(result, list) + return result + + +# TODO(gp): Implement and add tests. +def format_column_blocks(lines: List[str]) -> List[str]: + """ + # Make sure that there is a single empty line before and after the following + # block: + # + # 1) + # ``` + # ::: columns + # :::: {.column width=55%} + # ``` + # 2) + # ``` + # :::: + # :::: {.column width=40%} + # ``` + # 3) + # ``` + # :::: + # ::: + # ``` + + # + """ + return lines + + +def format_markdown_slide(lines: List[str]) -> List[str]: + """ + Format markdown text for a slide. + + :param lines: input lines to format + :return: formatted slide text + """ + hdbg.dassert_isinstance(lines, list) + if False: + lines = bold_first_level_bullets(lines) + txt = "\n".join(lines) + # Format the markdown slides. + # TODO(gp): Maybe the conversion should be done inside `prettier_on_str` + # passing a marker to indicate that the text is a slide. + lines = hmarslid.convert_slide_to_markdown(lines) + # lines = format_column_blocks() + # + file_type = "md" + txt = "\n".join(lines) + txt = dshdlipr.prettier_on_str(txt, file_type) + # + lines = txt.split("\n") + lines = hmarslid.convert_markdown_to_slide(lines) + # Format the first level bullets. + lines = format_first_level_bullets(lines) + # + lines = hmarhead.capitalize_header(lines) + return lines diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py new file mode 100644 index 000000000..532de2aee --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py @@ -0,0 +1,841 @@ +""" +Import as: + +import helpers.hmarkdown_headers as hmarhead +""" + +import dataclasses +import logging +import re +from typing import List, Optional, Tuple, cast + +import helpers.hdbg as hdbg +import helpers.hparser as hparser +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + +_TRACE = False + + +def is_markdown_line_separator(line: str, *, min_repeats: int = 5) -> bool: + """ + Check if the given line is a Markdown separator. + + This function determines if a line consists of repeated characters + (`#`, `/`, `-`, `=`) that would indicate a markdown separator. + + :param line: current line of text being processed + :param min_repeats: minimum number of times the characters have to + be repeated to be considered a separator, e.g., if `min_repeats + = 2`, then `##`, `###`, `//` are considered to be line + separators, but `#`, `/` are not + :return: whether the line is a separator + """ + separator_pattern = rf""" + \#*\s* # Optional leading `#` and whitespace. + ([#/=\-])\1{{{min_repeats - 1},}} # Capture a character, then repeat it + # (`min_repeats` - 1) times. + \s*$ # Match only whitespace characters + # until the end of the line. + """ + res = bool(re.match(separator_pattern, line, re.VERBOSE)) + return res + + +def is_header(line: str) -> Tuple[bool, int, str]: + """ + Check if the given line is a Markdown header. + + :param line: line to check + :return: tuple containing: + - boolean indicating if the line is a header + - level of the header (`0` if not a header) + - title of the header (empty string if not a header) + """ + # hdbg.dassert(not is_markdown_line_separator(line), "line='%s'", line) + m = re.match(r"(#+)\s+(.*)", line) + is_header_ = bool(m) + if m: + level = len(m.group(1)) + title = m.group(2) + else: + level = 0 + title = "" + return is_header_, level, title + + +# ############################################################################# +# Frame chapters +# ############################################################################# + + +def _has_internal_capitals(word: str) -> bool: + """ + Check if a word has capital letters within it (not just at the start). + + This function detects words like `SimpleFeedForward`, `DeepNPTS` that + should be preserved without title case transformation. + + Note: uppercase letters immediately after an apostrophe are excluded + from this check, since they are not "internal capitals" but rather + normal English capitalization (e.g., "Won'T" has a capital T that is + not internal but rather a grammatical artifact of title case tools). + + :param word: word to check + :return: `True` if the word has internal capitals, `False` otherwise + """ + hdbg.dassert_isinstance(word, str) + # A word has internal capitals if it contains at least one uppercase letter + # after the first character, excluding uppercase letters immediately after + # an apostrophe. + if len(word) <= 1: + return False + for i in range(1, len(word)): + if word[i].isupper() and word[i - 1] != "'": + return True + return False + + +def frame_chapters(lines: List[str], *, max_lev: int = 4) -> List[str]: + """ + Add the frame around each chapter. + """ + hdbg.dassert_isinstance(lines, list) + txt_new: List[str] = [] + # _LOG.debug("lines=%s", lines) + for i, line in enumerate(lines): + _LOG.debug("line=%d:%s", i, line) + m = re.match(r"^(\#+) ", line) + txt_processed = False + if m: + comment = m.group(1) + lev = len(comment) + _LOG.debug(" -> lev=%s", lev) + if lev < max_lev: + sep = comment + " " + "#" * (80 - 1 - len(comment)) + txt_new.append(sep) + txt_new.append(line) + txt_new.append(sep) + txt_processed = True + else: + _LOG.debug( + " -> Skip formatting the chapter frame: lev=%d, max_lev=%d", + lev, + max_lev, + ) + if not txt_processed: + txt_new.append(line) + hdbg.dassert_isinstance(txt_new, list) + return txt_new + + +def has_mixed_case(word: str) -> bool: + """ + Check if a word has capital letters in positions other than the first. + + This detects words like "SimpleFeedForward", "DeepNPTS", etc. that should + be preserved as-is. + + :param word: word to check + :return: True if the word has capital letters after the first position + """ + if len(word) <= 1: + return False + # Check if any character after the first position is uppercase. + return any(c.isupper() for c in word[1:]) + + +def _capitalize_title_word(word: str) -> str: + """ + Capitalize the first letter of a word without capitalizing after apostrophes. + + Python's `str.title()` capitalizes the first letter after ANY non-alphanumeric + character, including apostrophes. For example, `"won't".title()` returns + `"Won'T"` instead of the expected `"Won't"`. + + This function instead capitalizes only the first letter of the word and + lowercases any uppercase letters that follow an apostrophe. + + :param word: word to capitalize + :return: word with proper title case (first letter capitalized, no capitals + after apostrophes) + """ + if not word: + return word + chars = list(word) + chars[0] = chars[0].upper() + for i in range(1, len(chars)): + if chars[i - 1] == "'": + chars[i] = chars[i].lower() + return "".join(chars) + + +def capitalize_header(lines: List[str]) -> List[str]: + """ + Improve the header and slide titles. + + - Headers start with one or more `#`s + - Slide titles start with one `*` + + - The title is transformed to title case as below: + - ML theory -> ML Theory + - A map of machine learning -> A Map of Machine Learning + - Business strategists -> + Business Strategists + - Establish a phased, collaborative approach -> + Establish a Phased, Collaborative Approach + + - Strings inside backticks, single quotes, and double quotes are preserved, + with careful handling to avoid matching apostrophes in contractions. + - Words with internal capital letters are preserved (e.g., SimpleFeedForward, + DeepNPTS). + - Contractions and words with apostrophes are properly capitalized + (e.g., "won't" becomes "Won't", not "Won'T"). + - Headers inside fenced code blocks are not processed. + """ + import helpers.hmarkdown_fenced_blocks as hmafeblo + + hdbg.dassert_isinstance(lines, list) + # Replace fenced blocks with tags to prevent processing headers inside them. + lines_without_fenced, fence_map = hmafeblo.replace_fenced_blocks_with_tags( + lines + ) + txt_new: List[str] = [] + for i, line in enumerate(lines_without_fenced): + # Parse header (starting with `#`) and slide title (starting with `*`). + m = re.match(r"^(\#+|\*) (.*)$", line) + if m: + # Parse the title. + title = m.group(2) + # Transform to title case, leaving words that are all capitalized + # and conjunctions as is, while preserving quoted strings. + non_cap_words = { + "a", + "an", + "and", + "as", + "at", + "but", + "by", + "for", + "in", + "of", + "on", + "or", + "the", + "to", + "vs", + "with", + } + # Find and temporarily replace quoted strings to preserve them. + quoted_strings = [] + placeholders = [] + # Pattern to match strings inside backticks, single quotes, or double quotes. + # Single quotes are matched only when not preceded or followed by word + # characters, to avoid matching apostrophes in contractions like "don't". + # Backtick and double-quote patterns are simpler since they're less likely + # to be used in natural text. + quote_pattern = r""" + ( # Start of alternation + `[^`]*` # Backtick-quoted string + | # OR + (? str: + quoted_strings.append(match.group(0)) + placeholder = f"__QUOTED_{len(quoted_strings) - 1}__" + placeholders.append(placeholder) + return placeholder + + # Replace quoted strings with placeholders. + title_with_placeholders = re.sub( + quote_pattern, replace_quoted, title, flags=re.VERBOSE + ) + # Split into words. + words = title_with_placeholders.split() + # Find the first non-numeric word index to always capitalize it, + # even if it's in non_cap_words (e.g., "4.4 the Victim" -> "4.4 The Victim"). + first_text_word_idx = None + for j, word in enumerate(words): + if word.startswith("__QUOTED_") and word.endswith("__"): + continue + # Skip numeric/punctuation-only prefixes like "4.4", "1.", "1.2.3". + if not re.match(r"^[\d\.\-]+$", word): + first_text_word_idx = j + break + # If all words are numeric, fall back to index 0. + if first_text_word_idx is None and words: + first_text_word_idx = 0 + # Process each word. + for i, word in enumerate(words): + if word.startswith("__QUOTED_") and word.endswith("__"): + # Skip placeholder words, they will be restored later. + continue + elif i == first_text_word_idx and not word.isupper(): + # Capitalize the first text word (may follow numeric prefix + # like "4.4") even if it's in non_cap_words. + if _has_internal_capitals(word): + # Preserve words with internal capitals. + pass + else: + words[i] = _capitalize_title_word(word) + elif word.isupper(): + # Skip words that are all caps (e.g. ML, API). + continue + elif _has_internal_capitals(word): + # Preserve words with internal capitals (e.g., SimpleFeedForward). + pass + elif word.lower() in non_cap_words: + # Don't capitalize conjunctions and other minor words. + words[i] = word.lower() + else: + # Capitalize other words. + words[i] = _capitalize_title_word(word) + title = " ".join(words) + # Restore quoted strings. + for i, placeholder in enumerate(placeholders): + title = title.replace(placeholder, quoted_strings[i]) + # Reconstruct the line. + line = m.group(1) + " " + title + txt_new.append(line) + else: + txt_new.append(line) + # Restore fenced blocks. + txt_new = hmafeblo.replace_tags_with_fenced_blocks(txt_new, fence_map) + hdbg.dassert_isinstance(txt_new, list) + return txt_new + + +# ############################################################################# +# Header processing +# ############################################################################# + + +# TODO(gp): This could be done by processing `HeaderList`. +def extract_section_from_markdown( + lines: List[str], header_name: str +) -> List[str]: + """ + Extract a section of text from a Markdown document based on the header + name. + + The function identifies a section by locating the specified header + and captures all lines until encountering another header of the same + or higher level. Headers are identified by the '#' prefix, and their + level is determined by the number of '#' characters. + + :param lines: markdown content as a list of strings + :param header_name: exact header name to extract (excluding `#` + symbols) + :return: extracted section as a list of strings, including the header line + itself and all lines until the next header of the same or higher + level + """ + hdbg.dassert_isinstance(lines, list) + _LOG.debug(hprint.to_str("lines")) + extracted_lines = [] + # Level of the current header being processed. + current_level: Optional[int] = None + # Flag to indicate if we're inside the desired section. + inside_section: bool = False + found = False + # Process each line in the markdown content. + for line in lines: + _LOG.debug(hprint.to_str("line")) + # Check if the line is a markdown header. + if line.strip().startswith("#"): + # Determine the level of the header by counting leading '#' + # characters. + header_level = len(line) - len(line.lstrip("#")) + # Extract the actual header text by stripping '#' and surrounding + # whitespace. + header_text = line.strip("#").strip() + _LOG.debug(hprint.to_str("header_level, header_text")) + # Handle the end of the desired section when encountering another + # header. + if inside_section: + hdbg.dassert_is_not(current_level, None) + current_level = cast(int, current_level) + if header_level <= current_level: + break + # Check if the current line is the desired header. + if header_text == header_name: + found = True + # Set the level of the matched header. + current_level = header_level + # Mark that we are now inside the desired section. + inside_section = True + # Add the line to the output if inside the desired section. + if inside_section: + extracted_lines.append(line) + _LOG.debug(hprint.to_str("extracted_lines")) + if not found: + raise ValueError(f"Header '{header_name}' not found") + hdbg.dassert_isinstance(extracted_lines, list) + return extracted_lines + + +# ############################################################################# +# HeaderInfo +# ############################################################################# + + +@dataclasses.dataclass +class HeaderInfo: + """ + Store the header level, the description, and the line number in the + original file. + + E.g., `(1, "Chapter 1", 5)` and `(2, "Section 1.1", 10)` + """ + + level: int + description: str + line_number: int + + def __init__(self, level: int, description: str, line_number: int): + hdbg.dassert_isinstance(level, int) + hdbg.dassert_lte(1, level) + self.level = level + # + hdbg.dassert_isinstance(description, str) + hdbg.dassert_ne( + description, + "", + "Invalid HeaderInfo: %s, %s, %s", + level, + description, + line_number, + ) + self.description = description + # + hdbg.dassert_isinstance(line_number, int) + hdbg.dassert_lte(1, line_number) + self.line_number = line_number + # + self.children: List[HeaderInfo] = [] + + def as_tuple(self) -> Tuple[int, str, int]: + return (self.level, self.description, self.line_number) + + def __repr__(self) -> str: + return ( + f"HeaderInfo({self.level}, '{self.description}', {self.line_number})" + ) + + +HeaderList = List[HeaderInfo] + + +def header_list_to_str(header_list: HeaderList) -> str: + """ + Convert a list of headers into a string. + + :param header_list: list of headers + :return: string representation of the header list + """ + return "\n".join([str(header) for header in header_list]) + + +def sanity_check_header_list(header_list: HeaderList) -> None: + """ + Check that the header list is valid. + + 1) The first header should be level 1. + 2) All level 1 headers are unique. + 3) Check that consecutive elements in the header list only increase by at + most one level at a time (even if it can decrease by multiple levels). + - E.g., the following is valid: + ``` + # Header 1 + # Header 2 + ## Header 2.1 + ## Header 2.2 + # Header 3 + ``` + - E.g., the following is valid: + ``` + # Header1 + ## Header 1.1 + ### Header 1.1.1 + # Header 2 + ``` + - E.g., the following is not valid: + ``` + # Header 1 + ### Header 1.0.1 + # Header 2 + ``` + + :param header_list: list of headers to validate + """ + # 1) The first header should be level 1. + if header_list and header_list[0].level > 1: + _LOG.warning( + "First header '%s' at line %s is not level 1, but %s", + header_list[0].description, + header_list[0].line_number, + header_list[0].level, + ) + # 2) All level 1 headers are unique. + level_1_headers = [ + header.description for header in header_list if header.level == 1 + ] + hdbg.dassert_no_duplicates(level_1_headers) + # 3) Check that consecutive elements in the header list only increase by at + # most one level at a time (even if it can decrease by multiple levels). + if len(header_list) > 1: + for i in range(1, len(header_list)): + hdbg.dassert_isinstance(header_list[i - 1], HeaderInfo) + hdbg.dassert_isinstance(header_list[i], HeaderInfo) + if header_list[i].level - header_list[i - 1].level > 1: + msg = [] + msg.append( + "Consecutive headers increase by more than one level:" + ) + msg.append(f" {header_list[i - 1]}") + msg.append(f" {header_list[i]}") + msg = "\n".join(msg) + raise ValueError(msg) + + +# TODO(gp): Move sanity check outside? +def extract_headers_from_markdown( + lines: List[str], max_level: int, *, sanity_check: bool = True +) -> HeaderList: + """ + Extract headers from Markdown file and return an `HeaderList`. + + :param lines: content of the input Markdown file as list of strings + :param max_level: maximum header levels to parse (e.g., '3' parses all levels + included `###`, but not `####`) + :param sanity_check: whether to check that the header list is valid + :return: generated `HeaderList`, e.g., + ``` + [ + (1, "Chapter 1", 5), + (2, "Section 1.1", 10), ...] + ``` + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_lte(1, max_level) + header_list: HeaderList = [] + # Process the input file to extract headers. + for line_number, line in enumerate(lines, start=1): + # TODO(gp): Use the iterator. + # Skip the visual separators. + if is_markdown_line_separator(line): + continue + # Get the header level and title. + is_header_, level, title = is_header(line) + if is_header_ and level <= max_level: + header_info = HeaderInfo(level, title, line_number) + header_list.append(header_info) + # Check the header list. + if sanity_check: + sanity_check_header_list(header_list) + else: + _LOG.debug("Skipping sanity check") + hdbg.dassert_isinstance(header_list, list) + return header_list + + +def header_list_to_vim_cfile( + markdown_file: str, header_list: HeaderList +) -> List[str]: + """ + Convert a list of headers into a Vim cfile format. + + Use the generated file in Vim as: + `:cfile ` + Use `:cnext` and `:cprev` to navigate between headers. + + :param markdown_file: path to the input Markdown file + :param header_list: list of headers, where each header is a tuple containing + the line number, level, and title + :return: generated cfile content as a list of strings in the format: + ``` + ... + ::
+ ... + ``` + """ + hdbg.dassert_isinstance(markdown_file, str) + hdbg.dassert_isinstance(header_list, list) + _LOG.debug(hprint.to_str("header_list")) + output_lines = [ + f"{markdown_file}:{header_info.line_number}:{header_info.description}" + for header_info in header_list + ] + hdbg.dassert_isinstance(output_lines, list) + return output_lines + + +def header_list_to_markdown(header_list: HeaderList, mode: str) -> List[str]: + """ + Convert a list of headers into a Markdown format. + + :param header_list: list of headers, where each header is a tuple + containing the level, title, and line number + :param mode: format of the output: + - `list`: indents headers to create a nested list + - `headers`: uses Markdown header syntax (e.g., '#', '##', '###') + :return: generated Markdown content as a list of strings + """ + hdbg.dassert_isinstance(header_list, list) + _LOG.debug(hprint.to_str("header_list mode")) + output_lines = [] + for header_info in header_list: + level, title, line_number = header_info.as_tuple() + _ = line_number + if mode == "list": + header_prefix = " " * (level - 1) + "-" + elif mode == "headers": + header_prefix = "#" * level + else: + raise ValueError(f"Invalid mode '{mode}'") + output_lines.append(f"{header_prefix} {title}") + hdbg.dassert_isinstance(output_lines, list) + return output_lines + + +# ############################################################################# +# Process headers. +# ############################################################################# + + +def format_headers(lines: List[str], out_file_name: str, max_lev: int) -> None: + """ + Format the headers in the input lines and write the formatted text to the + output file. + + :param lines: list of input lines to process + :param out_file_name: name of the output file to write the formatted + text to + :param max_lev: maximum level of headings to include in the + formatted text + """ + hdbg.dassert_isinstance(lines, list) + txt = lines[:] + # + for line in txt: + m = re.search(r"max_level=(\d+)", line) + if m: + max_lev = int(m.group(1)) + _LOG.warning("Inferred max_level=%s", max_lev) + break + hdbg.dassert_lte(1, max_lev) + # Remove all headings. + txt_tmp = [] + for line in txt: + # Keep the comments. + if not is_markdown_line_separator(line): + txt_tmp.append(line) + txt = txt_tmp[:] + # Add proper heading of the correct length. + txt_tmp = [] + for line in txt: + # Keep comments. + found = False + for i in range(1, max_lev + 1): + if line.startswith("#" * i + " "): + row = "#" * i + " " + "#" * (79 - 1 - i) + txt_tmp.append(row) + txt_tmp.append(line) + txt_tmp.append(row) + found = True + if not found: + txt_tmp.append(line) + # TODO(gp): Remove all empty lines after a heading. + # TODO(gp): Format title (first line capital and then small). + hparser.to_file(txt_tmp, out_file_name) + + +def modify_header_level(lines: List[str], level: int) -> List[str]: + """ + Increase or decrease the level of headings by the specified amount. + + :param lines: input lines to modify + :param level: amount to adjust header levels (positive increases, + negative decreases) + :return: modified lines with header levels adjusted + """ + hdbg.dassert_isinstance(lines, list) + txt_tmp = [] + for line in lines: + # TODO(gp): Use the iterator. + line = line.rstrip(r"\n") + is_header_, current_level, title = is_header(line) + if is_header_: + modified_level = current_level + level + # Ensure modified level is within valid range (1-6 for markdown headers). + hdbg.dassert_lte(1, modified_level) + hdbg.dassert_lte(modified_level, 6) + line = "#" * modified_level + " " + title + txt_tmp.append(line) + hdbg.dassert_isinstance(txt_tmp, list) + return txt_tmp + + +# ############################################################################# +# _HeaderTreeNode +# ############################################################################# + + +# This is a different representation of the data than the one in `HeaderList` +# because it is a tree structure. So we use a different type hint. +_HeaderTree = List[HeaderInfo] + + +def build_header_tree(header_list: HeaderList) -> _HeaderTree: + """ + Build a tree (list of Node objects) from the flat list. + + We assume that the level changes never jump by more than 1. + + :param header_list: flat list of headers + :return: tree structure of headers + """ + tree: _HeaderTree = [] + stack: _HeaderTree = [] + for node in header_list: + if node.level == 1: + tree.append(node) + stack = [node] + else: + # Pop until we find the proper parent: one with level < current + # level. + while stack and stack[-1].level >= node.level: + stack.pop() + if stack: + stack[-1].children.append(node) + else: + tree.append(node) + stack.append(node) + # hdbg.dassert_eq(len(header_list), len(tree)) + # hdbg.dassert_eq(len(stack), 0) + return tree + + +def _find_header_tree_ancestry( + tree: _HeaderTree, level: int, description: str +) -> Optional[_HeaderTree]: + """ + Recursively search for the node matching (level, description). + + If found, return the ancestry as a list from the root down to that + node. Otherwise return None. + + :param tree: header tree to search + :param level: header level to match + :param description: header description to match + :return: ancestry list from root to matching node, or None if not + found + """ + for node in tree: + if node.level == level and node.description == description: + return [node] + result = _find_header_tree_ancestry(node.children, level, description) + if result: + return [node] + result + return None + + +def header_tree_to_str( + tree: _HeaderTree, + ancestry: Optional[_HeaderTree], + *, + open_modifier: str = "**", + close_modifier: str = "**", + indent: int = 0, +) -> str: + """ + Return the tree as a string. + + Only expand (i.e. recursively include children) for a node if it is part of + the ancestry of the selected node. + + :param tree: tree to convert to a string + :param ancestry: ancestry of the selected node + :param open_modifier: modifier to use for the open of the selected node + :param close_modifier: modifier to use for the close of the selected node + :param indent: indent of the tree + :return: string representation of the tree + + - Nodes not in the ancestry are included on one line (even if they have + children). + - The selected node (last in the ancestry) is included highlighted. + """ + prefix = " " * indent + "- " + result = [] + for node in tree: + _LOG.debug(hprint.to_str("node")) + # Check if this node is the next expected one in the ancestry branch. + if ancestry and node is ancestry[0]: + # If this is the last in the ancestry, it is the selected node. + val = prefix + if len(ancestry) == 1: + val += open_modifier + node.description + close_modifier + else: + val += node.description + _LOG.debug("-> %s", hprint.to_str("val")) + if val: + result.append(val) + # Expand this node’s children using the rest of the ancestry. + val = header_tree_to_str( + node.children, + ancestry[1:], + indent=indent + 1, + open_modifier=open_modifier, + close_modifier=close_modifier, + ) + else: + # For nodes not on the selected branch, include them without + # expanding. + val = prefix + node.description + _LOG.debug("-> %s", hprint.to_str("val")) + if val: + result.append(val) + return "\n".join(result) + + +def selected_navigation_to_str( + tree: _HeaderTree, + level: int, + description: str, + *, + open_modifier: str = "**", + close_modifier: str = "**", +) -> str: + """ + Given a level and description for the selected node, print the navigation. + + :param tree: header tree + :param level: level of the selected node + :param description: description of the selected node + :param open_modifier: modifier for opening the selected node + :param close_modifier: modifier for closing the selected node + :return: navigation string with selected node highlighted + """ + ancestry = _find_header_tree_ancestry(tree, level, description) + hdbg.dassert_ne( + ancestry, + None, + "Node (%s, '%s') not found", + level, + description, + ) + _LOG.debug(hprint.to_str("ancestry")) + txt = header_tree_to_str( + tree, + ancestry, + open_modifier=open_modifier, + close_modifier=close_modifier, + ) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py new file mode 100644 index 000000000..a471a44cc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py @@ -0,0 +1,367 @@ +""" +Import as: + +import helpers.hmarkdown_rules as hmarrule +""" + +import logging +import re +from typing import Dict, List + +import helpers.hdbg as hdbg +import helpers.hmarkdown_headers as hmarhead +import helpers.hprint as hprint +from helpers.hmarkdown_headers import ( + extract_headers_from_markdown, + sanity_check_header_list, +) + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Add a decorator like in hprint to process both strings and lists +# of strings. + +# ############################################################################# +# Rules processing. +# ############################################################################# + +# Rules are organized in 4 levels of a markdown file: +# +# 1) Rule sets (level 1) +# - E.g., `General`, `Python`, `Notebooks`, `Markdown` +# - Level 1 is a set of rules determined mainly by the type of the file we +# are processing +# - Several sets of rules can be applied to a given file type +# - E.g., rules in `Python` and `Notebooks` apply to all Python files +# 2) Sections (level 2) +# - E.g., `Naming`, `Comments`, `Code_design`, `Imports`, `Type_annotations` +# 3) Targets (level 3) +# - E.g., LLM vs Linter +# 4) Atomic rules (level 4) +# - This is the set of rules that are applied to the file +# ``` +# - Spell commands in lower case and programs with the first letter in upper case +# - E.g., `git` as a command, `Git` as a program +# - E.g., capitalize the first letter of `Python` +# ``` + +# Extract the rules from the markdown file: +# ``` +# > extract_toc_from_txt.py \ +# -i docs/code_guidelines/all.coding_style_guidelines.reference.md \ +# --max_level 2 +# - General +# - Spelling +# - LLM +# - Linter +# - Python +# - Naming +# - LLM +# - Linter +# - Docstrings +# - ... +# - Comments +# - Code_implementation +# - Code_design +# - Imports +# - Type_annotations +# - Functions +# - Scripts +# - Logging +# - Misc +# - Unit_tests +# - All +# - Notebooks +# - General +# - Plotting +# - Jupytext +# - Markdown +# - Naming +# - General +# ``` + +# - The rules to apply to a Python file are automatically extractedas: +# `([`General:*`, `Python:*`], `LLM`)` +# - The rules to apply to a Notebook file are automatically extracted as: +# `([`General:*`, `Python:*`, `Notebooks:*`], `LLM`)` +# - A user can specify to apply a subset of rules like +# `([`General:*`, `Python:Naming,Docstrings`], `LLM,Linter`)` +# - Atomic rules are the first-level bullets of the markdown file, e.g., +# ``` +# - Spell commands in lower case and programs with the first letter in upper case +# - E.g., `git` as a command, `Git` as a program +# - E.g., capitalize the first letter of `Python` +# ``` + + +def sanity_check_rules(lines: List[str]) -> None: + """ + Sanity check the rules. + + :param lines: list of text lines to check + """ + header_list = extract_headers_from_markdown(lines, max_level=5) + # 1) Start with level 1 headers. + # 2) All level 1 headers are unique. + # 3) Header levels are increasing / decreasing by at most 1. + sanity_check_header_list(header_list) + # 4) Level 3 headers are always `LLM` or `Linter`. + # for header in header_list: + # if header.level != 3: + # hdbg.dassert_in(header.description, ["LLM", "Linter"]) + # TODO(gp): Implement this. + # 5) All headers have no spaces. + # TODO(gp): Implement this. + + +# A `Rule` is a string separated by `:` characters, where each part can be: +# - `*` (which means "match any string") +# - a `string` (e.g., `Spelling`) +# - a list of strings separated by `|` (e.g., `LLM|Linter`) +# +# E.g., valid rules are: +# - `General:*:LLM`, `*:*:Linter|LLM`, `General|Python:*:LLM`, `Python:*:Linter` +# - For a Python file -> `General|Python:*:LLM` +# - For a Notebook file -> `General|Python|Notebooks:*:LLM` +# - `Python:Naming|Docstrings|Comments:LLM` +SelectionRule = str + + +# A `Guidelines`` is a header list with only level 1 headers storing the full +# hierarchy of the rules as a description, e.g., +# `(1, "Spelling:All:LLM", xyz)` +# TODO(gp): Make Guidelines descend from HeaderList. + +HeaderInfo = hmarhead.HeaderInfo +HeaderList = hmarhead.HeaderList +Guidelines = HeaderList + + +def convert_header_list_into_guidelines( + header_list: HeaderList, +) -> Guidelines: + """ + Convert the header list into a `Guidelines` object with only level 1 + headers and full hierarchy of the rules as description. + + Expand a header list like: + ``` + - General + - Spelling + - LLM + - Linter + - Python + - Naming + - LLM + - Linter + ``` + represented internally as: + ``` + (1, "General", xyz), + (2, "Spelling", xyz), + (3, "LLM", xyz), + (3, "Linter", xyz), + (1, "Python", xyz), + (2, "Naming", xyz), + (3, "LLM", xyz), + (3, "Linter", xyz), + ``` + into: + ``` + [ + (1, "Spelling:All:LLM", xyz), + (1, "Spelling:All:Linter", xyz), + (1, "Python:Naming:LLM", xyz), + (1, "Python:Naming:Linter", xyz), + ] + ``` + + :param header_list: input header list to convert + :return: guidelines with flattened hierarchy + """ + hdbg.dassert_isinstance(header_list, list) + # Store the last level headers. + level_1 = "" + level_2 = "" + # Accumulate the last level headers. + level_3_headers = [] + # Scan the header list. + for header_info in header_list: + level, description, line_number = header_info.as_tuple() + # Store the headers found at each level. + if level == 1: + level_1 = description + elif level == 2: + level_2 = description + elif level == 3: + # Store the level 3 header. + hdbg.dassert_ne(level_1, "") + hdbg.dassert_ne(level_2, "") + full_level_3 = f"{level_1}:{level_2}:{description}" + header_info_tmp = HeaderInfo(1, full_level_3, line_number) + level_3_headers.append(header_info_tmp) + else: + raise ValueError(f"Invalid header info={header_info}") + return level_3_headers + + +def _convert_rule_into_regex(selection_rule: SelectionRule) -> str: + r""" + Convert a rule into an actual regular expression. + + E.g., + - `Spelling:*:LLM` -> `Spelling:(\S*):LLM` + - `*:*:Linter|LLM` -> `(\S*):(\S*):(Linter|LLM)` + - `Spelling|Python:*:LLM` -> `Spelling|Python:(\S*):LLM` + - `Python:*:Linter` -> `Python:(\S*):Linter` + + :param selection_rule: rule to convert to regex + :return: regex pattern string + """ + hdbg.dassert_isinstance(selection_rule, SelectionRule) + # Parse the rule into tokens. + selection_rule_parts = selection_rule.split(":") + hdbg.dassert_eq(len(selection_rule_parts), 3) + # Process each part of the rule regex. + rule_parts_out = [] + for rule_part_in in selection_rule_parts: + hdbg.dassert_not_in(" ", rule_part_in) + if rule_part_in == "*": + # Convert `*` into `\S*`. + rule_part_out = r"(\S*)" + elif "|" in rule_part_in: + # Convert `LLM|Linter` into `(LLM|Linter)`. + rule_part_out = "(" + rule_part_in + ")" + else: + # Keep the string as is. + rule_part_out = rule_part_in + rule_parts_out.append(rule_part_out) + # Join the parts of the rule back together. + rule_out = ":".join(rule_parts_out) + return rule_out + + +def extract_rules( + guidelines: Guidelines, selection_rules: List[SelectionRule] +) -> Guidelines: + """ + Extract the set of rules from the `guidelines` that match the rule regex. + + :param guidelines: guidelines to extract the rules from + :param selection_rules: selection rules to use to extract the rules + :return: extracted rules + """ + hdbg.dassert_isinstance(guidelines, list) + hdbg.dassert_isinstance(selection_rules, list) + # A rule regex is a string separated by `:` characters, where each part is + # - `*` (meaning "any string") + # - a `string` (e.g., `Spelling`) + # - a list of strings separated by `|` (e.g., `LLM|Linter`) + # E.g., `Spelling:*:LLM`, `*:*:Linter|LLM`, `Spelling|Python:*:LLM`. + # Convert each rule regex into a regular expression. + rule_regex_map: Dict[str, str] = {} + for rule_regex_str in selection_rules: + hdbg.dassert_isinstance(rule_regex_str, SelectionRule) + regex = _convert_rule_into_regex(rule_regex_str) + _LOG.debug(hprint.to_str("rule_regex_str regex")) + hdbg.dassert_not_in(rule_regex_str, rule_regex_map) + rule_regex_map[rule_regex_str] = regex + # Extract the set of rules from the `guidelines` that match the rule regex. + rule_sections = [] + for guideline in guidelines: + # A guideline description is a string separated by `:` characters, where each part is + # (1, "Python:Naming:Linter", xyz), + for k, v in rule_regex_map.items(): + if re.match(v, guideline.description): + _LOG.debug("%s matches %s", k, guideline.description) + if guideline not in rule_sections: + rule_sections.append(guideline) + # Select the rules. + _LOG.debug( + "Selected %s sections:\n%s", + len(rule_sections), + "\n".join([r.description for r in rule_sections]), + ) + return rule_sections + + +# TODO(gp): This seems private? +def parse_rules_from_txt(lines: List[str]) -> List[str]: + """ + Parse rules from a chunk of markdown text. + + - Extract first-level bullet point list items from text until the next one. + - Sub-lists nested under first-level items are extracted together with the + first-level items. + + :param lines: list of text lines to process + ``` + - Item 1 + - Item 2 + - Item 3 + - Item 4 + ``` + :return: extracted bullet points + """ + hdbg.dassert_isinstance(lines, list) + # Store the first-level bullet points. + bullet_points = [] + # Store the current item including the first level bullet point and all + # its sub-items. + current_item = "" + for line in lines: + line = line.rstrip() + if not line: + continue + if re.match(r"^- ", line): + # Match first-level bullet point item. + if current_item: + # Store the previous item, if any. + bullet_points.append(current_item) + # Start a new first-level bullet point item. + current_item = line + elif re.match(r"^\s+- ", line): + # Match a sub-item (non first-level bullet point item). + # Append a sub-item to the current item. + current_item += "\n" + line + elif len(line.strip()) != 0 and current_item: + # Append a line to the current item. + current_item += "\n" + line + # Add the last item if there is one. + if current_item: + bullet_points.append(current_item) + hdbg.dassert_isinstance(bullet_points, list) + return bullet_points + + +def extract_rules_from_section( + lines: List[str], start_line_number: int +) -> List[str]: + """ + Extract rules from a section of a markdown file. + + :param lines: list of markdown text lines to extract the rules from + :param start_line_number: line number of the section to start extracting + the rules from + :return: extracted rules + """ + hdbg.dassert_isinstance(lines, list) + # Find the line number of the next header. + end_line_number = start_line_number + while True: + hdbg.dassert_lt(end_line_number, len(lines)) + line = lines[end_line_number] + if line.startswith("#"): + break + end_line_number += 1 + _LOG.debug("end_line_number=%s", end_line_number) + # Parse the markdown text into a list of bullet points. + bullet_points = parse_rules_from_txt( + lines[start_line_number:end_line_number] + ) + # Extract the rules from the bullet points. + rules = [] + for bullet_point in bullet_points: + rules.append(bullet_point) + hdbg.dassert_isinstance(rules, list) + return rules diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py new file mode 100644 index 000000000..2cefec7a8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py @@ -0,0 +1,201 @@ +""" +Import as: + +import helpers.hmarkdown_slides as hmarslid +""" + +import logging +import re +from typing import Any, Callable, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +from helpers.hmarkdown_comments import process_comment_block +from helpers.hmarkdown_headers import ( + HeaderInfo, + HeaderList, + is_markdown_line_separator, +) + +_LOG = logging.getLogger(__name__) + + +_TRACE = True + + +def extract_slides_from_markdown( + lines: List[str], +) -> Tuple[HeaderList, int]: + """ + Extract slides (i.e., sections prepended by `*`) from Markdown file and + return an `HeaderList`. + + :param lines: content of the input Markdown file as list of strings + :return: tuple containing: + - generated `HeaderList` + ``` + [ + (1, "Slide 1", 5), + (1, "Slide 2", 10), ...] + ``` + - last line number of the file, e.g., '100' + """ + hdbg.dassert_isinstance(lines, list) + header_list: HeaderList = [] + # Process the input file to extract headers. + for line_number, line in enumerate(lines, start=1): + _LOG.debug("%d: %s", line_number, line) + # TODO(gp): Use the iterator. + # Skip the visual separators. + if is_markdown_line_separator(line): + continue + # Get the header level and title. + m = re.match(r"^\* (.*)$", line) + if m: + title = m.group(1) + header_info = HeaderInfo(1, title, line_number) + header_list.append(header_info) + last_line_number = len(lines) + # Return results. + hdbg.dassert_isinstance(header_list, list) + return header_list, last_line_number + + +# TODO(gp): Consider passing and returning List[str] +def process_slides(txt: str, transform: Callable[..., Any]) -> str: + """ + Process markdown text by applying a transform function to each slide. + + - Slides are sections prepended by `*` + - The text is processed by: + - Extracting the slides one by one + - Calling a `transform()` function on each slide (defined by the user) + - Joining the transformed slides back together + - Comments are left untouched. + + :param txt: markdown text to process + :param transform: function to transform each slide + :return: transformed text + """ + hdbg.dassert_isinstance(txt, str) + # Text of the current slide. + slide_txt: List[str] = [] + # Store all the transformed slides. + transformed_txt: List[str] = [] + # True inside a block to skip. + in_skip_block = False + # True inside a slide. + in_slide = False + # Track line number where slide started. + slide_start_line = 0 + lines = txt.splitlines() + for i, line in enumerate(lines): + _LOG.debug("%s:line='%s'", i, line) + # 1) Remove comment block. + do_continue, in_skip_block = process_comment_block(line, in_skip_block) + if _TRACE: + _LOG.debug(" -> %s", hprint.to_str("do_continue in_skip_block")) + if do_continue: + transformed_txt.append(line) + continue + # 2) Process slide. + if _TRACE: + _LOG.debug(" -> %s", hprint.to_str("in_slide")) + if line.startswith("* ") or line.startswith("#### "): + _LOG.debug("### Found slide") + # Found a slide or the end of the file. + if slide_txt: + _LOG.debug("# Transform slide") + # Transform the slide. + slide_title = slide_txt[0] + transformed_slide = transform( + slide_txt, + slide_title=slide_title, + slide_line_number=slide_start_line, + ) + hdbg.dassert_isinstance(transformed_slide, list) + transformed_txt.extend(transformed_slide) + else: + _LOG.debug("# First slide") + # Start a new slide. + slide_txt = [] + slide_txt.append(line) + slide_start_line = i + in_slide = True + elif in_slide: + _LOG.debug("# Accumulate slide") + slide_txt.append(line) + else: + _LOG.debug("# Accumulate txt outside slide") + transformed_txt.append(line) + # Process the last slide, if needed. + if slide_txt: + hdbg.dassert(in_slide) + in_slide = False + # Transform the slide. + slide_title = slide_txt[0] + transformed_slide = transform( + slide_txt, + slide_title=slide_title, + slide_line_number=slide_start_line, + ) + hdbg.dassert_isinstance(transformed_slide, list) + transformed_txt.extend(transformed_slide) + # + hdbg.dassert( + not in_skip_block, + "Found end of file while still parsing a comment block", + ) + hdbg.dassert(not in_slide, "Found end of file while still parsing a slide") + # Join the transformed slides back together. + result = "\n".join(transformed_txt) + return result + + +# ############################################################################# +# Slides conversion to markdown and back +# ############################################################################# + + +def convert_slide_to_markdown(lines: List[str], *, level: int = 5) -> List[str]: + """ + Convert slide to standard markdown. + + - Handle * bullets to markdown headers level 5 + + :param lines: list of lines to convert + :param level: level of the markdown headers to convert to + :return: list of converted lines + """ + hdbg.dassert_isinstance(lines, list) + converted_lines = [] + for line in lines: + if line.startswith("* "): + # Convert slide bullet to markdown header level 5. + converted_line = "#" * level + " " + line[2:] + converted_lines.append(converted_line) + else: + converted_lines.append(line) + return converted_lines + + +def convert_markdown_to_slide(lines: List[str], *, level: int = 5) -> List[str]: + """ + Convert standard markdown back to slide. + + - Handle markdown headers level 5 to * bullets + + :param lines: list of lines to convert + :param level: level of the markdown headers to convert to + :return: list of converted lines + """ + hdbg.dassert_isinstance(lines, list) + converted_lines = [] + for line in lines: + if line.startswith("#" * level + " "): + # Convert markdown header level 5 back to slide bullet. + converted_line = "* " + line[6:] + converted_lines.append(converted_line) + else: + converted_lines.append(line) + return converted_lines diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py new file mode 100644 index 000000000..becc00b09 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py @@ -0,0 +1,121 @@ +""" +Import as: + +import helpers.hmarkdown_tables as hmartabl +""" + +import logging +from typing import Dict, List, Tuple + +import helpers.hdbg as hdbg + +_LOG = logging.getLogger(__name__) + + +def replace_tables_with_tags( + lines: List[str], +) -> Tuple[List[str], Dict[str, str]]: + """ + Replace markdown tables with tag and return mapping from tags to the table. + + E.g., + ``` + Some text before + | Column 1 | Column 2 | + |----------|----------| + | Value 1 | Value 2 | + | Value 3 | Value 4 | + More text after + ``` + is replaced with: + ``` + Some text before + + More text after + ``` + + :param lines: list of lines to process + :return: tuple containing: + - list of lines with the tables replaced by tags + - mapping from tags to the table text + """ + hdbg.dassert_isinstance(lines, list) + result = [] + table_map = {} + table_count = 0 + i = 0 + while i < len(lines): + line = lines[i].strip() + # Check if this line starts a table (contains |). + if "|" in line and line.strip(): + # Look ahead to see if next line is a separator. + if i + 1 < len(lines): + next_line = lines[i + 1].strip() + # Check if next line is a table separator (contains --- and |). + if "|" in next_line and "-" in next_line: + # Found a table, collect all table lines. + table_lines = [] + # Add header line. + table_lines.append(lines[i]) + i += 1 + # Add separator line. + table_lines.append(lines[i]) + i += 1 + # Add data rows (continue while lines contain |). + while ( + i < len(lines) + and "|" in lines[i].strip() + and lines[i].strip() + ): + table_lines.append(lines[i]) + i += 1 + # Store the table. + table_count += 1 + table_text = "\n".join(table_lines) + table_map[str(table_count)] = table_text + result.append(f"") + continue + # Not a table line, add as-is. + result.append(lines[i]) + i += 1 + return result, table_map + + +def replace_tags_with_tables( + lines: List[str], table_map: Dict[str, str] +) -> List[str]: + """ + Replace tags with markdown tables. + + :param lines: list of lines to process + :param table_map: mapping from tags to table text + :return: list of lines with tags replaced by tables + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_isinstance(table_map, dict) + # Initialize output. + result = [] + table_map_copy = table_map.copy() + # Parse data. + for line in lines: + if line.startswith(""): + # Extract table number from tag like . + tag_match = line[6:-1] # Remove '' + hdbg.dassert_in( + tag_match, table_map_copy, f"Found unmatched tag {tag_match}" + ) + # Split table text into lines and add them. + table_text = table_map_copy[tag_match] + table_lines = table_text.split("\n") + result.extend(table_lines) + # Remove used tag from map. + del table_map_copy[tag_match] + else: + result.append(line) + # Ensure all tags were used. + hdbg.dassert_eq( + len(table_map_copy), + 0, + f"Found {len(table_map_copy)} unmatched tags: {list(table_map_copy.keys())}", + ) + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py new file mode 100644 index 000000000..7d8cb8d75 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py @@ -0,0 +1,164 @@ +""" +Import as: + +import helpers.hmarkdown_toc as hmartoc +""" + +import logging +import os +import re +import tempfile +from typing import Any, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hdocker as hdocker +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import dev_scripts_helpers.dockerize.lib_markdown_toc as dshdlmato + +_LOG = logging.getLogger(__name__) + +# ############################################################################# +# YAML preamble +# ############################################################################# + + +def extract_yaml_frontmatter(lines: List[str]) -> Tuple[List[str], List[str]]: + """ + Extract YAML front matter from the beginning of the file. + + YAML front matter is delimited by `---` at the beginning and end. + Example: + ``` + --- + title: My Document + date: 2024-01-01 + --- + ``` + + :param lines: The lines to be processed. + :return: A tuple of (frontmatter_lines, remaining_lines). + """ + _LOG.debug("lines=%s", lines) + # Check if file starts with YAML front matter. + if len(lines) < 3: + # Not enough lines for front matter. + return [], lines + if not re.match(r"^---\s*$", lines[0]): + # No front matter marker at the beginning. + return [], lines + # Find the closing --- marker. + for i in range(1, len(lines)): + if re.match(r"^---\s*$", lines[i]): + # Found closing marker. + frontmatter = lines[: i + 1] + remaining = lines[i + 1 :] + _LOG.debug("Found YAML front matter: %d lines", len(frontmatter)) + return frontmatter, remaining + # No closing marker found, treat as no front matter. + _LOG.debug("No closing YAML front matter marker found") + return [], lines + + +def reattach_yaml_frontmatter( + yaml_frontmatter: List[str], lines: List[str] +) -> List[str]: + """ + Reattach YAML front matter to the beginning of the content lines. + + :param yaml_frontmatter: The YAML front matter lines to reattach. + :param lines: The content lines to prepend the front matter to. + :return: Combined lines with YAML front matter reattached. + """ + if not yaml_frontmatter: + return lines + # Add an empty line after the front matter if the remaining content doesn't + # start with one. + if lines and lines[0] != "": + return yaml_frontmatter + [""] + lines + return yaml_frontmatter + lines + + +# ############################################################################# +# TOC +# ############################################################################# + + +def refresh_toc( + lines: List[str], + *, + use_dockerized_markdown_toc: bool = True, + # TODO(gp): Remove this. + **kwargs: Any, +) -> List[str]: + """ + Refresh the table of contents (TOC) in the given text. + + :param lines: The lines to be processed. + :param use_dockerized_markdown_toc: if True, run markdown-toc in a + Docker container + :return: The lines with the updated TOC. + """ + _LOG.debug("lines=%s", lines) + # Check whether there is a TOC otherwise add it. + # Add `` comment in the doc to generate the TOC after that + # line. By default, it will generate at the top of the file. + # This workaround is useful to generate the TOC after the heading of the doc + # at the top and not include it in the TOC. + if "" not in lines: + _LOG.warning("No tags for table of content in md file: adding it") + lines = [""] + lines + txt = "\n".join(lines) + # Write file. + curr_dir = os.getcwd() + tmp_file_name = tempfile.NamedTemporaryFile(dir=curr_dir).name + hio.to_file(tmp_file_name, txt) + # Process TOC. + cmd_opts: List[str] = [] + if use_dockerized_markdown_toc: + # Run `markdown-toc` in a Docker container. + use_sudo = hdocker.get_use_sudo() + force_rebuild = False + dshdlmato.run_dockerized_markdown_toc( + tmp_file_name, + cmd_opts, + use_sudo=use_sudo, + force_rebuild=force_rebuild, + ) + else: + # Run `markdown-toc` installed on the host directly. + executable = "markdown-toc" + cmd = [executable] + cmd_opts + cmd.append("-i " + tmp_file_name) + # + cmd_as_str = " ".join(cmd) + _, output_tmp = hsystem.system_to_string(cmd_as_str, abort_on_error=True) + _LOG.debug("output_tmp=%s", output_tmp) + # Read file. + txt = hio.from_file(tmp_file_name) + # Clean up. + os.remove(tmp_file_name) + # Remove empty lines introduced by `markdown-toc`. + txt = hprint.remove_lead_trail_empty_lines(txt) + ret = txt.split("\n") + hdbg.dassert_isinstance(ret, list) + return ret + + +def remove_table_of_contents(txt: str) -> str: + """ + Remove the table of contents from the text of a markdown file. + + The table of contents is stored between + ``` + + ... + + ``` + + :param txt: Input markdown text + :return: Text with table of contents removed + """ + txt = re.sub(r".*?", "", txt, flags=re.DOTALL) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py new file mode 100644 index 000000000..b8087b9fd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py @@ -0,0 +1,106 @@ +""" +Matplotlib utilities and plotting helpers. + +Import as: + +import helpers.hmatplotlib as hmatplo +""" + +import logging +import math +from typing import Any, Optional, Tuple + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np + +import helpers.hdbg as hdbg +import helpers.hio as hio + +_LOG = logging.getLogger(__name__) + +# Default figure size for plots. +# TODO(gp): Is this used? +FIG_SIZE = (20, 5) + + +def get_multiple_plots( + num_plots: int, + num_cols: int, + y_scale: Optional[float] = None, + *args: Any, + **kwargs: Any, +) -> Tuple[mpl.figure.Figure, np.array]: + """ + Create figure to accommodate `num_plots` plots. + + The figure is arranged in rows with `num_cols` columns. + + :param num_plots: number of plots + :param num_cols: number of columns to use in the subplot + :param y_scale: the height of each plot. If `None`, the size of the whole + figure equals the default `figsize` + :return: figure and array of axes + """ + hdbg.dassert_lte(1, num_plots) + hdbg.dassert_lte(1, num_cols) + # Heuristic to find the dimension of the fig. + if y_scale is not None: + hdbg.dassert_lt(0, y_scale) + ysize = math.ceil(num_plots / num_cols) * y_scale + figsize: Optional[Tuple[float, float]] = (20, ysize) + else: + figsize = None + if "tight_layout" not in kwargs and not kwargs.get( + "constrained_layout", False + ): + kwargs["tight_layout"] = True + fig, ax = plt.subplots( + math.ceil(num_plots / num_cols), + num_cols, + figsize=figsize, + *args, + **kwargs, + ) + if isinstance(ax, np.ndarray): + ax = ax.flatten() + else: + ax = np.array([ax]) + # Remove extra axes that can appear when `num_cols` > 1. + empty_axes = ax[num_plots:] + for empty_ax in empty_axes: + empty_ax.remove() + return fig, ax[:num_plots] + + +def save_fig( + fig: Optional[mpl.figure.Figure], + file_name: str, + *, + print_markdown: bool = False, + path_prefix: Optional[str] = None, +) -> None: + """ + Save matplotlib figure to file and optionally print markdown reference. + + :param fig: Matplotlib figure. If None, uses the active figure. + :param file_name: Output filename + :param print_markdown: If True, print markdown image reference + :param path_prefix: Path prefix for markdown reference (e.g., "msml610/lectures_source") + """ + if fig is None: + fig = plt.gcf() + hdbg.dassert_isinstance(fig, mpl.figure.Figure) + hdbg.dassert_isinstance(file_name, str) + hio.create_enclosing_dir(file_name, incremental=True) + fig.savefig(file_name, dpi=300, bbox_inches="tight") + # Use print instead of _LOG.info. + print(f"Saved figure to '{file_name}'") + # + if print_markdown: + if path_prefix: + markdown_path = f"{path_prefix}/{file_name}" + else: + markdown_path = file_name + markdown_ref = f"![]({markdown_path})" + print(markdown_ref) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py new file mode 100644 index 000000000..27e5130ca --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py @@ -0,0 +1,170 @@ +""" +Import as: + +import helpers.hmkdocs as hmkdocs +""" + +import re + +import helpers.hdbg as hdbg +import helpers.hmarkdown as hmarkdo + +# TODO(ai): Make function private. +# TODO(ai): Convert str to List[str] +# TODO(ai): Add unit tests. + + +# TODO(gp): -> hmarkdown_?.py +def dedent_python_code_blocks(txt: str) -> str: + """ + Dedent Python code blocks so they are aligned to column 0. + + This is needed by mkdocs to render a Python code block correctly. + + :param txt: Input markdown text + :return: Text with Python code blocks dedented + """ + import textwrap + + lines = txt.split("\n") + result = [] + # Store whether the parser is inside a code block. + in_python_block = False + # Store the current Python code block. + code_block_lines = [] + # Track whether current block is indented (inside a list item). + block_is_indented = False + for line in lines: + if line.strip() == "```python": + in_python_block = True + # Only dedent top-level blocks (fence at column 0). + block_is_indented = line != line.lstrip() + result.append(line) + elif line.strip() == "```" and in_python_block: + if code_block_lines and not block_is_indented: + # Dedent only top-level code blocks. + code_text = "\n".join(code_block_lines) + dedented_code = textwrap.dedent(code_text) + result.extend(dedented_code.split("\n")) + code_block_lines = [] + elif code_block_lines: + # Indented block: pass through unchanged. + result.extend(code_block_lines) + code_block_lines = [] + result.append(line) + in_python_block = False + block_is_indented = False + elif in_python_block: + code_block_lines.append(line) + else: + result.append(line) + return "\n".join(result) + + +def replace_indentation(txt: str, input_spaces: int, output_spaces: int) -> str: + """ + Replace indentation from input_spaces to output_spaces. + + :param txt: Input markdown text + :param input_spaces: Number of spaces to detect as one indentation + level + :param output_spaces: Number of spaces to replace each indentation + level with + :return: Text with indentation replaced + """ + hdbg.dassert_lte(1, input_spaces) + hdbg.dassert_lte(1, output_spaces) + lines = txt.split("\n") + result = [] + for line in lines: + # Count leading spaces. + leading_spaces = len(line) - len(line.lstrip()) + if leading_spaces > 0 and leading_spaces % input_spaces == 0: + # Calculate indentation level and convert to output spaces. + indentation_level = leading_spaces // input_spaces + new_indentation = " " * (indentation_level * output_spaces) + result.append(new_indentation + line.lstrip()) + else: + result.append(line) + return "\n".join(result) + + +def replace_indentation_with_four_spaces(txt: str) -> str: + """ + Replace 2 spaces indentation with 4 spaces since this is what mkdocs needs. + + :param txt: Input markdown text + :return: Text with 2-space indentation replaced with 4-space + indentation + """ + return replace_indentation(txt, input_spaces=2, output_spaces=4) + + +def convert_slides_to_markdown(txt: str, level: int) -> str: + """ + Convert strings storing "slides", i.e., `* ...` to markdown headers. + + E.g., + ``` + * Tools for Vision component + ``` + to: + ``` + #### Tools for Vision component + ``` + """ + lines = txt.split("\n") + result = [] + for line in lines: + if line.startswith("* "): + result.append("#" * level + " " + line[2:]) + else: + result.append(line) + return "\n".join(result) + + +def rewrite_absolute_doc_links(txt: str) -> str: + """ + Rewrite absolute /docs/ markdown links to root-relative HTML links. + + MkDocs only converts relative `.md` links to `.html`. Absolute links + like `/docs/path/file.md` are left unchanged and 404 at serve time. + This converts them to `/path/file.html` so they resolve correctly. + + :param txt: Input markdown text + :return: Text with absolute /docs/ links rewritten + """ + + def _replace(m: re.Match) -> str: + path = m.group(1) + # Strip /docs/ prefix and convert .md → .html. + path = re.sub(r"^/docs/", "/", path) + path = re.sub( + r"\.md(#[^)]*)?$", lambda h: ".html" + (h.group(1) or ""), path + ) + return f"({path})" + + # Match markdown links: ([text](/docs/...md)) including optional anchors. + txt = re.sub(r"\((/docs/[^)]+\.md(?:#[^)]*)?)\)", _replace, txt) + return txt + + +def preprocess_mkdocs_markdown(txt: str) -> str: + """ + Preprocess markdown text for mkdocs. + + This function applies the following transformations: + 1. Remove table of contents + 2. Dedent Python code blocks + 3. Replace 2 spaces indentation with 4 spaces + 4. Rewrite absolute /docs/ links to root-relative HTML links + + :param txt: Input markdown text + :return: Preprocessed markdown text + """ + txt = hmarkdo.remove_table_of_contents(txt) + txt = dedent_python_code_blocks(txt) + txt = replace_indentation_with_four_spaces(txt) + txt = convert_slides_to_markdown(txt, level=4) + txt = rewrite_absolute_doc_links(txt) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py new file mode 100644 index 000000000..66ed59b39 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py @@ -0,0 +1,121 @@ +""" +Import as: + +import helpers.hmodule as hmodule +""" + +import logging +import os +import subprocess +import textwrap +from typing import Any, Dict, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hserver as hserver + +_LOG = logging.getLogger(__name__) + +_WARNING = "\033[33mWARNING\033[0m" + + +# Use this to avoid extra dependencies from `hsystem`. +def _system_to_string(cmd: str) -> Tuple[int, str]: + """ + Run a command and return the output and the return code. + + :param cmd: command to run + :return: tuple of (return code, output) + """ + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + # Redirect stderr to stdout. + stderr=subprocess.STDOUT, + shell=True, + text=True, + ) + rc = result.returncode + output = result.stdout + output = output.strip() + return rc, output + + +def has_module(module: str) -> bool: + """ + Return whether a Python module can be imported or not. + """ + if module == "gluonts" and hserver.is_host_mac(): + # Gluonts and mxnet modules are not properly supported on the ARM + # architecture yet, see CmTask4886 for details. + return False + code = f""" + try: + import {module} + has_module_ = True + except ImportError as e: + _LOG.warning("%s: %s", _WARNING, str(e)) + has_module_ = False + """ + code = textwrap.dedent(code) + # To make the linter happy. + has_module_ = True + locals_: Dict[str, Any] = {} + # Need to explicitly declare and pass `locals_`: + # https://docs.python.org/3/library/functions.html#exec + # `Pass an explicit locals dictionary if you need to see effects + # of the code on locals after function exec() returns.` + exec(code, globals(), locals_) + has_module_ = locals_["has_module_"] + return has_module_ + + +def install_module_if_not_present( + import_name: str, + *, + package_name: Optional[str] = None, + use_sudo: bool = True, + use_activate: bool = False, + venv_path: Optional[str] = None, + quiet: bool = True, +) -> None: + """ + Install a Python module if it is not already installed. + + :param import_name: name used to import the module (e.g., "openai") + :param package_name: name of the package on PyPI (if different from `import_name`) + :param use_sudo: whether to use sudo to install the module + :param use_activate: whether to use the activate script to install the module + (e.g., "source /venv/bin/activate; pip install --quiet --upgrade openai") + :param venv_path: path to the virtual environment + E.g., /Users/saggese/src/venv/client_venv.helpers + :param quiet: whether to install the module quietly + """ + _has_module = has_module(import_name) + if _has_module: + print(f"Module '{import_name}' is already installed.") + return + print(f"Installing module '{import_name}'...") + # Sometime the package name is different from the import name. + # E.g., we import using `import dash_bootstrap_components` but the package + # name is `dash-bootstrap-components`. + if package_name is None: + package_name = import_name + # Sometime the package name is different from the import name. + # E.g., we import using `import dash_bootstrap_components` but the package + # name is `dash-bootstrap-components`. + if quiet: + quiet_flag = "--quiet" + else: + quiet_flag = "" + if venv_path is None: + venv_path = "/venv" + venv_path = os.path.join(venv_path, "bin/activate") + hdbg.dassert_file_exists(venv_path, "Can't find venv_path='{venv_path}'") + if use_activate: + cmd = f'/bin/bash -c "(source {venv_path}; pip install {quiet_flag} --upgrade {package_name})"' + else: + cmd = f"pip install {quiet_flag} {package_name}" + if use_sudo: + cmd = f"sudo {cmd}" + _, output = _system_to_string(cmd) + print(output) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py new file mode 100644 index 000000000..525673032 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py @@ -0,0 +1,111 @@ +""" +Import as: + +import helpers.hmoto as hmoto +""" + +import unittest.mock as umock +from typing import Generator, Union + +import pytest # isort:skip # noqa: E402 # pylint: disable=wrong-import-position + +# Equivalent to `import moto`, but skip this module if the module is not present. +# `moto` must be imported before `boto3` to properly mock it. +moto = pytest.importorskip("moto") + +# It is necessary that boto3 is imported after moto. +# If not, boto3 will access real AWS. +import boto3 # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position + +import helpers.hdbg as hdbg # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hs3 as hs3 # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hunit_test as hunitest # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position + + +# ############################################################################# +# S3Mock_TestCase +# ############################################################################# + + +@pytest.mark.requires_aws +@pytest.mark.requires_ck_infra +class S3Mock_TestCase(hunitest.TestCase): + # Mocked AWS credentials. + mock_aws_credentials_patch = umock.patch.dict( + hs3.os.environ, + { + "MOCK_AWS_ACCESS_KEY_ID": "mock_key_id", + "MOCK_AWS_SECRET_ACCESS_KEY": "mock_secret_access_key", + "MOCK_AWS_DEFAULT_REGION": "us-east-1", + }, + ) + mock_aws_credentials = None + mock_aws_profile = "__mock__" + # Mocked bucket. + mock_s3 = moto.mock_aws() + bucket_name = "mock_bucket" + # TODO(Nikola): Temporary here to ensure it is called only once. + # Used in some tests that are obtaining data from 3rd party providers. + binance_secret = None + + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def _mock_get_s3fs( + self, aws_profile: Union[str, hs3.S3FileSystem] + ) -> hs3.S3FileSystem: + """ + Mock implementation of `get_s3fs` to use the mocked environment + variables from `moto`. + """ + from s3fs import S3FileSystem + + hdbg.dassert_isinstance(aws_profile, (str, S3FileSystem)) + aws_profile = S3FileSystem(anon=False) + return aws_profile + + def set_up_test(self) -> None: + # Getting necessary secret before boto3 is mocked. + if self.binance_secret is None: + import helpers.hsecrets as hsecret + + self.binance_secret = hsecret.get_secret("binance.preprod.trading.1") + # Start boto3 mock. + self.mock_s3.start() + # Start AWS credentials mock. Must be started after moto mock, + # or it will be overridden by moto with `foobar` values. + self.mock_aws_credentials = self.mock_aws_credentials_patch.start() + # Initialize boto client and create bucket for testing. + s3_client = boto3.client("s3") + s3_client.create_bucket(Bucket=self.bucket_name) + # Precaution to ensure that we are using mocked botocore. + s3_test_client = boto3.client("s3") + buckets = s3_test_client.list_buckets()["Buckets"] + self.assertEqual(len(buckets), 1) + self.assertEqual(buckets[0]["Name"], self.bucket_name) + # Patch `get_s3fs` that uses the mocked environment variables. + self.mock_get_s3fs = umock.patch.object( + hs3, "get_s3fs", side_effect=self._mock_get_s3fs + ) + self.mock_get_s3fs.start() + + def tear_down_test(self) -> None: + # Empty the bucket otherwise deletion will fail. + s3_client = boto3.resource("s3") + hdbg.dassert_eq(self.bucket_name, "mock_bucket") + bucket = s3_client.Bucket(self.bucket_name) + bucket.objects.all().delete() + # Delete bucket. + bucket.delete() + # Stop mocked `get_s3fs`. + if hasattr(self, "mock_get_s3fs"): + self.mock_get_s3fs.stop() + # Stop moto. + self.mock_aws_credentials_patch.stop() + self.mock_s3.stop() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py new file mode 100644 index 000000000..13ae41c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py @@ -0,0 +1,97 @@ +""" +Import as: + +import helpers.hnetwork as hnetwor +""" + +import logging +import os +import re +from typing import Optional, Tuple + +import requests + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def check_url(url: str) -> None: + """ + Check that an URL responds. + """ + try: + request = requests.get(url) + exists = request.status_code == 200 + # pylint: disable=broad-except + except Exception: + # TODO(gp): RuntimeError doesn't seem to catch. Find a narrower + # exception to catch. + exists = False + if not exists: + _LOG.warning("url '%s' doesn't exist", url) + + +def get_prefixes(jupyter_port: Optional[int] = None) -> Tuple[str, str]: + """ + Return the prefixes that a file should have under a GitHub repo and a + Jupyter notebook. + """ + hsystem.get_user_name() + if jupyter_port is None: + jupyter_port = 10001 + _LOG.warning( + "jupyter_port not available: using the default one %s", jupyter_port + ) + repo_name = hgit.get_repo_full_name_from_client(super_module=False) + _LOG.debug("repo_name=%s", repo_name) + github_prefix = f"https://github.com/{repo_name}/blob/master" + jupyter_prefix = f"http://localhost:{jupyter_port}/tree" + return github_prefix, jupyter_prefix + + +# TODO(gp): -> get_canonical_file_name_from_url +def get_file_name(url: str) -> str: + """ + Given an URL from GitHub or from Jupyter server extract the path + corresponding to the file. + + E.g., + - http://localhost:10001/notebooks/research/... + oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb + -> + oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb + + - https://github.com/.../.../blob/master/... + oil/ST/Task229_Exploratory_analysis_of_ST_data.ipynb + -> + oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb + """ + # "http://localhost:10001/notebooks/... + # oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb" + ret = None + if ret is None: + m = re.search(r"http.*://localhost:\d+/(.*)", url) + if m: + ret = m.group(1) + to_remove = "notebooks/" + idx = ret.index(to_remove) + if idx >= 0: + end_idx = idx + len(to_remove) + ret = ret[end_idx:] + if ret is None: + # https://github.com/.../.../blob/master/... + # oil/ST/Task229_Exploratory_analysis_of_ST_data.ipynb + m = re.search(r"http.*://.*github.com/(.*)", url) + if m: + ret = m.group(1) + # Remove ".../.../blob/master" + ret = "/".join(ret.split("/")[4:]) + if ret is None: + if os.path.exists(url): + ret = url + if ret is None: + hdbg.dassert_is_not(ret, None, "url=%s", url) + return ret # type: ignore diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py new file mode 100644 index 000000000..75ecabfe4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py @@ -0,0 +1,105 @@ +""" +Import as: + +import helpers.hnotebook as hnotebo +""" + +import logging + + +def config_notebook(sns_set: bool = True) -> None: + """ + Configure the notebook for plotting. + """ + import helpers.hmodule as hmodule + + # Matplotlib. + module = "matplotlib" + if hmodule.has_module(module): + # Matplotlib. + import matplotlib.pyplot as plt + + # plt.rcParams + plt.rcParams["figure.figsize"] = (20, 5) + plt.rcParams["legend.fontsize"] = 14 + plt.rcParams["font.size"] = 14 + plt.rcParams["image.cmap"] = "rainbow" + if False: + # Tweak the size of the plots to make it more readable when embedded in + # documents or presentations. + # font = {'family' : 'normal', + # #'weight' : 'bold', + # 'size' : 32} + # matplotlib.rc('font', **font) + scale = 3 + small_size = 8 * scale + medium_size = 10 * scale + bigger_size = 12 * scale + # Default text sizes. + plt.rc("font", size=small_size) + # Fontsize of the axes title. + plt.rc("axes", titlesize=small_size) + # Fontsize of the x and y labels. + plt.rc("axes", labelsize=medium_size) + # Fontsize of the tick labels. + plt.rc("xtick", labelsize=small_size) + # Fontsize of the tick labels. + plt.rc("ytick", labelsize=small_size) + # Legend fontsize. + plt.rc("legend", fontsize=small_size) + # Fontsize of the figure title. + plt.rc("figure", titlesize=bigger_size) + else: + print("No module '{module}'") + # Seaborn. + module = "seaborn" + if hmodule.has_module(module): + import seaborn as sns + + if sns_set: + sns.set() + else: + print("No module '{module}'") + # Pandas. + module = "pandas" + if hmodule.has_module(module): + import pandas as pd + + pd.set_option("display.max_rows", 500) + pd.set_option("display.max_columns", 500) + pd.set_option("display.width", 1000) + else: + print("No module '{module}'") + # Warnings. + import helpers.hwarnings as hwarnin + + # Force the linter to keep this import. + _ = hwarnin + + +def _info_print(msg: str, *args, **kwargs) -> None: + """ + Print a message with optional formatting arguments. + """ + if args: + msg = msg % args + print(msg) + + +def set_logger_to_print(log) -> None: + """ + Replace logger.info method with a print function. + + :param log: logger object to modify + """ + log.info = _info_print + + +def set_all_loggers_to_print() -> None: + """ + Replace all loggers' info method with a print function. + """ + for name in logging.root.manager.loggerDict: + logger = logging.getLogger(name) + # print("Setting logger %s to print" % name) + set_logger_to_print(logger) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py new file mode 100644 index 000000000..47fc37975 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py @@ -0,0 +1,43 @@ +""" +Import as: + +import helpers.hnumba as hnumba +""" + +import logging +from typing import Any, Callable, TypeVar + +try: + import numba + + numba_available = True +except ImportError: + numba_available = False + +_LOG = logging.getLogger(__name__) + +# Switch to enable numba at run-time. +# For using in notebooks you need to force a reload of the library, like: +# import importlib +# importlib.reload(numba_) +# numba_.USE_NUMBA = False + +USE_NUMBA = True +RT = TypeVar("RT") # Return type for decorator. + + +def jit(f: Callable[..., RT]) -> Callable[..., RT]: + if USE_NUMBA and not numba_available: + _LOG.warning("numba is not installed") + use_numba = USE_NUMBA and numba_available + + if use_numba: + _LOG.debug("Using numba!") + wrapper: Callable[..., RT] = numba.jit(f) + else: + + def wrapper(*args: Any, **kwargs: Any) -> RT: + _LOG.debug("Not using numba!") + return f(*args, **kwargs) + + return wrapper diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py new file mode 100644 index 000000000..4cd0e8c4d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py @@ -0,0 +1,57 @@ +""" +Import as: + +import helpers.hnumpy as hnumpy +""" + +import contextlib +from typing import Iterator + +import numpy as np + +import helpers.hdbg as hdbg + + +# From https://stackoverflow.com/questions/49555991 +@contextlib.contextmanager +def random_seed_context(seed: int) -> Iterator: + """ + Context manager to isolate a numpy random seed. + """ + state = np.random.get_state() + np.random.seed(seed) + try: + yield + finally: + np.random.set_state(state) + + +# TODO(Juraj): unit test in CmTask5092. +def floor_with_precision(value: float, amount_precision: int) -> float: + """ + Floor a value using desired precision. + + The invariant for this function is that negative number are floored based + on their absolute value: e.g floor_with_precision(-4.6, 0) == -4. This is + useful for calculating share size where there are decimal precision + limitations. The desired behavior is to rather round down than overfill. + + Other examples: + floor_with_precision(0.125, 2) == 0.12 + floor_with_precision(0.4, 0) == 0.0 + + :param value: value to floor with desire + :param amount_precision: number of decimal points to floor to + :return: value floored using desired precision. + """ + # Custom solution to allow flooring using precision. + # https://stackoverflow.com/questions/58065055/floor-and-ceil-with-number-of-decimals/58065394#58065394 + # Precision < 0 does not make sense. + hdbg.dassert_lte(0, amount_precision) + # Store sign and get absolute value to get the desire + sign = -1 if value < 0 else 1 + value_abs = abs(value) + value_floored = np.true_divide( + np.floor(value_abs * 10**amount_precision), 10**amount_precision + ) + return value_floored * sign diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py new file mode 100644 index 000000000..e9424b8cc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py @@ -0,0 +1,500 @@ +""" +Methods to introspect and print the state of an object. + +Import as: + +import helpers.hobject as hobject +""" + +import abc +import logging +import pprint +from typing import Any, Dict, List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hpandas as hpandas +import helpers.hprint as hprint +import helpers.hstring as hstring + +_LOG = logging.getLogger(__name__) + +# Mute this module unless we want to debug it. +_LOG.setLevel(logging.INFO) + +# ############################################################################# +# _to_skip* +# ############################################################################# + + +def _to_skip(is_: bool, mode: str) -> bool: + """ + Return whether to skip the attribute. + + :param is_: if `True` the attribute is of the type we are checking + :param mode: how to handle the attribute + :return: whether to skip the attribute + """ + hdbg.dassert_in(mode, ("skip", "only", "all")) + skip = False + if mode == "skip": + if is_: + # Skip everything. + skip = True + elif mode == "only": + if not is_: + # Keep only the callables. + skip = True + elif mode == "all": + # Keep everything. + skip = False + else: + raise ValueError(f"Invalid mode='{mode}'") + return skip + + +def _to_skip_callable_attribute(attr_name: Any, mode: str) -> bool: + """ + Decide whether to skip a callable attribute. + """ + # Check whether the attribute is callable. + is_callable = callable(attr_name) + skip = _to_skip(is_callable, mode) + return skip + + +def _to_skip_private_attribute(attr_name: str, mode: str) -> bool: + """ + Decide whether to skip a private attribute. + """ + # _Object__hello + # TODO(gp): This can be improved by passing the name of the object. + is_dunder = attr_name.startswith("_") and "__" in attr_name + # We assume that private attributes start with `_` and are not dunder. + is_private = not is_dunder and attr_name.startswith("_") + skip = _to_skip(is_private, mode) + return skip + + +def _to_skip_dunder_attribute(attr_name: str, mode: str) -> bool: + """ + Decide whether to skip a double under attribute. + """ + # Check if it is a dunder (i.e., double under method). E.g., `__hello__`. + is_dunder = attr_name.startswith("_") and "__" in attr_name + skip = _to_skip(is_dunder, mode) + return skip + + +def _to_skip_attribute( + attr_name: Any, + attr_value: Any, + callable_mode: str, + private_mode: str, + dunder_mode: str, + attr_names_to_skip: Optional[List[str]], +) -> bool: + """ + Decide whether to skip an attribute. + + :param attr_name: name of the attribute + :param attr_value: value of the attribute + :param callable_mode: how to handle attributes that are callable methods + :param private_mode: how to handle attributes that are private (e.g., + `_hello`) + :param dunder_mode: how to handle attributes that are dunder (e.g., + `__hello`) + :param attr_names_to_skip: a list of attributes (e.g., private, callable, dunder) + to skip. `None` to skip nothing. + :return: whether to skip the attribute + """ + # Check whether the attribute is one that was requested explicitly to skip. + if attr_names_to_skip is not None: + if attr_name in attr_names_to_skip: + skip = True + return skip + # Handle callable methods. + skip = _to_skip_callable_attribute(attr_value, callable_mode) + if skip: + _LOG.debug("Skip callable") + return skip + # Handle private methods. + skip = _to_skip_private_attribute(attr_name, private_mode) + if skip: + _LOG.debug("Skip private") + return skip + # Handle dunder methods. + skip = _to_skip_dunder_attribute(attr_name, dunder_mode) + if skip: + _LOG.debug("Skip dunder") + return skip + return False + + +# ############################################################################# +# obj_to_str +# ############################################################################# + + +def _type_to_str(attr_value: Any) -> str: + """ + Print the attribute value together with its type. + + E.g., `a=False , b=hello , c=3.14 ` + """ + type_as_str = str(type(attr_value)) + # Convert from `` to `str`. + type_as_str = hstring.remove_prefix(type_as_str, "") + # Add `<` and `>` around the type. + type_as_str = f"<{type_as_str}>" + return type_as_str + + +def _attr_to_str(attr_value: Any, print_type: bool) -> str: + """ + Print the attribute value handling different types. + """ + _LOG.debug("type(attr_value)=%s", type(attr_value)) + if isinstance(attr_value, pd.DataFrame): + res = f"pd.df({attr_value.shape}" + elif isinstance(attr_value, pd.Series): + res = f"pd.srs({attr_value.shape}" + elif isinstance(attr_value, dict): + res = str(attr_value) + else: + res = str(attr_value) + # Add the type, if needed. + if print_type: + res += " " + _type_to_str(attr_value) + return res + + +def obj_to_str( + obj: Any, + *, + attr_mode: str = "__dict__", + sort: bool = False, + print_type: bool = False, + callable_mode: str = "skip", + private_mode: str = "skip", + dunder_mode: str = "skip", + attr_names_to_skip: Optional[List[str]] = None, +) -> str: + """ + Print the attributes of an object. + + An object is printed as name of its class and its attributes, e.g., + ``` + _Object1 at 0x...=(a=False, b=hello, c=3.14) + ``` + + :param attr_mode: use `__dict__` or `dir()` + - It doesn't seem to make much difference + :sort: sort the attributes in order of name, or not + :param print_type: print the type of the attribute + :param callable_mode: how to handle attributes that are callable (i.e., + methods) + - `skip`: skip the callable methods + - `only`: print only the callable methods + - `all`: always print + :param private_mode: how to handle private attributes. Same params as + `callable_mode` + :param dunder_mode: how to handle double under attributes. Same params as + `callable_mode` + :param attr_names_to_skip: a list of attributes (e.g., private, callable, + dunder) to skip. This is used to avoid to print data that is redundant + (e.g., a cached value) + """ + ret = [] + if attr_mode == "__dict__": + # Use `__dict__` to get the attributes of the object. + values = obj.__dict__ + elif attr_mode == "dir": + # Use `dir()` to get the attributes of the object. + values = dir(obj) + elif attr_mode == "config": + # Use object method to get the attributes to print info for. + values = obj.get_config_attributes() + else: + raise ValueError(f"Invalid attr_mode='{attr_mode}'") + if sort: + values = sorted(values) + for attr_name in values: + if attr_mode == "__dict__": + attr_value = obj.__dict__[attr_name] + elif attr_mode in ["dir", "config"]: + attr_value = getattr(obj, attr_name) + else: + raise ValueError(f"Invalid attr_mode='{attr_mode}'") + skip = _to_skip_attribute( + attr_name, + attr_value, + callable_mode, + private_mode, + dunder_mode, + attr_names_to_skip, + ) + # `attr_value` can be callable object and needs to be properly handled + # for string conversion and formatting. + _LOG.debug(hprint.to_str("attr_name attr_value skip")) + if skip: + continue + # + out = f"{attr_name}=" + _attr_to_str(attr_value, print_type) + ret.append(out) + # + txt = hprint.to_object_str(obj) + "=" + txt += "(" + ", ".join(ret) + ")" + return txt + + +# ############################################################################# +# obj_to_repr +# ############################################################################# + + +def _attr_to_repr(attr_name: Any, attr_value: Any, print_type: bool) -> str: + """ + Print an object as name of its class and its attributes. + + E.g., + ``` + : + a='False' + b='hello' + c='3.14' + ``` + """ + _LOG.debug("type(attr_value)=%s", type(attr_value)) + if isinstance(attr_value, (pd.DataFrame, pd.Series)): + attr_value_as_str = hpandas.df_to_str(attr_value) + elif isinstance(attr_value, dict): + attr_value_as_str = pprint.pformat(attr_value) + else: + attr_value_as_str = repr(attr_value) + # + if len(attr_value_as_str.split("\n")) > 1: + # The string representing the attribute value spans multiple lines, so + # print like: + # ``` + # attr_name= (type) + # attr_value + # ``` + out = f"{attr_name}=" + if print_type: + out += " " + _type_to_str(attr_value) + out += "\n" + hprint.indent(attr_value_as_str) + else: + # The string representing the attribute value is a single line, so print + # like: + # ``` + # attr_name='attr_value' (type) + # ``` + out = f"{attr_name}='{str(attr_value)}'" + if print_type: + out += " " + _type_to_str(attr_value) + return out + + +# TODO(gp): Merge the code with obj_to_repr() using a switch for the different +# code. +def obj_to_repr( + obj: Any, + *, + attr_mode: str = "__dict__", + sort: bool = False, + print_type: bool = False, + callable_mode: str = "skip", + private_mode: str = "skip", + dunder_mode: str = "skip", + attr_names_to_skip: Optional[List[str]] = None, +) -> str: + """ + Same interface and behavior as `obj_to_str()`. + + Use `_attr_to_repr()` instead of a simple `attr_name = attr_value` + like in `obj_to_str()`. + """ + ret = [] + # TODO(Grisha): factor out the logic in a function `get_class_attributes(attr_mode)`. + if attr_mode == "__dict__": + values = obj.__dict__ + elif attr_mode == "dir": + values = dir(obj) + elif attr_mode == "config": + values = obj.get_config_attributes() + else: + raise ValueError(f"Invalid attr_mode='{attr_mode}'") + if sort: + values = sorted(values) + for attr_name in values: + if attr_mode == "__dict__": + attr_value = obj.__dict__[attr_name] + elif attr_mode in ["dir", "config"]: + attr_value = getattr(obj, attr_name) + else: + raise ValueError(f"Invalid attr_mode='{attr_mode}'") + skip = _to_skip_attribute( + attr_name, + attr_value, + callable_mode, + private_mode, + dunder_mode, + attr_names_to_skip, + ) + # `attr_value` can be callable object and needs to be properly handled + # for string conversion and formatting. + _LOG.debug(hprint.to_str("attr_name attr_value skip")) + if skip: + continue + # + out = _attr_to_repr(attr_name, attr_value, print_type) + ret.append(out) + # + txt = [] + txt.append(hprint.to_object_repr(obj) + ":") + txt.append(hprint.indent("\n".join(ret))) + return "\n".join(txt) + + +# ############################################################################# +# PrintableMixin +# ############################################################################# + + +class PrintableMixin: + """ + Implement `__str__()` and `__repr__()` to print the state of an object. + + These methods can be overridden with more specific methods by + derived classes. + """ + + @staticmethod + @abc.abstractmethod + def get_config_attributes() -> List[str]: + """ + Get list of attributes that are relevant to the configuration of each + block. + """ + ... + + # TODO(Grisha): decide if we need this method: what are the use-cases? + # Ideally we should just save `SystemConfig` and load it when needed. + def to_config_dict(self) -> Dict[str, Any]: + """ + Get class configuration as dict. + """ + res_dict = {} + # Get class attribute names to print. + attributes = self.get_config_attributes() + hdbg.dassert_is_subset(attributes, self.__dict__.keys()) + # Iterate over attributes and add their state to the dict. + for attr in attributes: + value = getattr(self, attr) + # Get a list of types the value class is derived from. + value_parent_classes = value.__class__.__mro__ + if any( + "helpers.hobject.PrintableMixin" in str(parent_class) + for parent_class in value_parent_classes + ): + # Call the function recursively if value is also + # a `PrintableMixin` descendant. + dict_val = value.to_config_dict() + else: + # Get attribute value representation. + dict_val = _attr_to_repr(attr, value, print_type=True) + # Put value in the result dict. + res_dict[attr] = dict_val + return res_dict + + def to_config_str(self) -> str: + """ + Get class configuration as string. + """ + ret = [] + attributes = self.get_config_attributes() + hdbg.dassert_is_subset(attributes, self.__dict__.keys()) + # Iterate over attributes and add their state to the dict. + for attr in attributes: + value = getattr(self, attr) + if isinstance(value, PrintableMixin): + # Call the function recursively if value is also + # a `PrintableMixin` descendant. + dict_val = value.to_config_str() + # Add attribute name for string representation. + dict_val = f"{attr}={dict_val}" + else: + dict_val = _attr_to_repr(attr, value, print_type=True) + # Put value in the result dict. + ret.append(dict_val) + txt = [] + txt.append(hprint.to_object_repr(self) + ":") + txt.append(hprint.indent("\n".join(ret))) + txt = "\n".join(txt) + return txt + + def __repr__( + self, + *, + attr_names_to_skip: Optional[List[str]] = None, + ) -> str: + """ + Used for debugging and development and need to be unambiguous. + """ + txt = obj_to_repr( + self, + print_type=True, + private_mode="all", + attr_names_to_skip=attr_names_to_skip, + ) + return txt + + def __str__( + self, + *, + attr_names_to_skip: Optional[List[str]] = None, + ) -> str: + """ + Used for creating output for end user and need to be readable. + """ + txt = obj_to_str( + self, + print_type=True, + private_mode="all", + attr_names_to_skip=attr_names_to_skip, + ) + return txt + + +# ############################################################################# + + +# TODO(gp): CleanUp. This is for testing and should be in hobject_test.py. +# TODO(gp): -> check_object_signature +def test_object_signature( + self_: Any, obj: Any, *, remove_lines_regex: Optional[str] = None +) -> None: + """ + Print a string representation of an object using both `str()` and `repr()`. + + :param obj: the object to print + :param remove_lines_regex: a regex to remove certain lines from the + output + """ + txt = [] + # + txt.append(hprint.frame("str:")) + txt.append(str(obj)) + # + txt.append(hprint.frame("repr:")) + txt.append(repr(obj)) + # + txt = "\n".join(txt) + # Remove certain lines, if needed. + if remove_lines_regex: + txt = hprint.filter_text(remove_lines_regex, txt) + # + self_.check_string(txt, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py new file mode 100644 index 000000000..2c6d9c729 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py @@ -0,0 +1,106 @@ +""" +Support opening a file. + +Import as: + +import helpers.hopen as hopen +""" + +# TODO(gp): -> open_file or move it to system_interaction.py + +import logging +import os +from typing import Optional + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# ############################################################################# + + +def _cmd_open_html(file_name: str, os_name: str) -> Optional[str]: + """ + Get OS-specific command to open an HTML file. + """ + # Retrieve the executable. + os_cmds = { + "Darwin": "open", + "Windows": "start", + "Linux": "xdg-open", + } + hdbg.dassert_in(os_name, os_cmds) + exec_name = os_cmds[os_name] + if not hsystem.check_exec(exec_name): + _LOG.warning( + "Can't execute the command '%s' on this platform", exec_name + ) + return None + # Build the command. + full_cmd = f"{exec_name} {file_name}" + if os_name == "Linux": + _LOG.warning( + "To open files faster launch in background '%s &'", exec_name + ) + return full_cmd + + +def _cmd_open_pdf(file_name: str, os_name: str) -> Optional[str]: + """ + Get OS-specific command to open a PDF file. + """ + os_cmds = { + "Darwin": ( + "/usr/bin/osascript << EOF\n" + f'set theFile to POSIX file "{file_name}" as alias\n' + 'tell application "Skim"\n' + "activate\n" + "set theDocs to get documents whose path is " + "(get POSIX path of theFile)\n" + "if (count of theDocs) > 0 then revert theDocs\n" + "open theFile\n" + "end tell\n" + "EOF\n" + ) + } + if os_name not in os_cmds: + _LOG.warning("Opening PDF files on '%s' is not supported yet", os_name) + full_cmd = None + else: + full_cmd = os_cmds[os_name] + return full_cmd + + +def open_file(file_name: str) -> None: + """ + Open file locally if its extension is supported. + """ + # Detect file format by the (last) extension. + # E.g., 'hello.html.txt' is considered a txt file. + extension = os.path.split(file_name)[-1].split(".")[-1] + extension = extension.lower() + # Make sure file exists. + _LOG.info( + "\n%s", + hprint.frame( + f"Opening {extension} file '{file_name}'", char1="<", char2=">" + ), + ) + hdbg.dassert_path_exists(file_name) + # Get opening command. + os_name = hsystem.get_os_name() + cmd: Optional[str] + if extension == "pdf": + cmd = _cmd_open_pdf(file_name, os_name) + elif extension == "html": + cmd = _cmd_open_html(file_name, os_name) + else: + hdbg.dfatal(f"Opening '{extension}' files is not supported yet") + # Run command. + if cmd is not None: + _LOG.info("%s", cmd) + hio.to_file("open_file_cmd.sh", cmd) + hsystem.system("source open_file_cmd.sh", suppress_output=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py new file mode 100644 index 000000000..535e7f081 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py @@ -0,0 +1,18 @@ +""" +Import as: + +import helpers.hpandas as hpandas +""" + +from helpers.hpandas_analysis import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_check_summary import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_clean import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_compare import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_conversion import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_dassert import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_display import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_io import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_multiindex import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_stats import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_transform import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.hpandas_utils import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old new file mode 100644 index 000000000..5be1b281a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old @@ -0,0 +1,2684 @@ +""" +Import as: + +import helpers.hpandas as hpandas +""" + +import csv +import dataclasses +import logging +import helpers.hlogging as hlogging +import random +import re +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd + +# Handle different versions of s3fs where core module may be at different +# locations. +try: + import s3fs + + # Try to access s3fs.core to check if it exists + if hasattr(s3fs, "core"): + from s3fs.core import S3File, S3FileSystem + else: + # In newer versions, classes might be directly in s3fs module. + try: + from s3fs import S3File, S3FileSystem + except ImportError: + # Fallback to dynamic import + S3File = getattr(s3fs, "S3File", None) + S3FileSystem = getattr(s3fs, "S3FileSystem", None) +except ImportError: + # If s3fs is not available, define dummy classes for type hints. + s3fs = None + + class S3File: + pass + + class S3FileSystem: + pass + + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +# Avoid the following dependency from other `helpers` modules to prevent import +# cycles: +# import helpers.hs3 as hs3 +# import helpers.hsql as hsql +# import helpers.hunit_test as hunitest + + +_LOG = hlogging.getLogger(__name__) + +# Enable extra verbose debugging. Do not commit. +_TRACE = False + +RowsValues = List[List[str]] + + +# ############################################################################# + + +def to_series(df: pd.DataFrame, *, series_dtype: str = "float64") -> pd.Series: + """ + Convert a pd.DataFrame with a single column into a pd.Series. The problem + is that empty df or df with a single row are not converted correctly to a + pd.Series. + + :param df: dataframe with a single column to convert to a series + :param series_dtype: dtype of the desired series in case a DataFrame + is empty, otherwise inherit dtype from a DataFrame + """ + # See https://stackoverflow.com/questions/33246771 + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_eq(df.shape[1], 1, "df=%s doesn't have a single column", df) + if df.empty: + srs = pd.Series(dtype=series_dtype) + elif df.shape[0] > 1: + srs = df.squeeze() + else: + srs = pd.Series(df.iloc[0, 0], index=[df.index.values[0]]) + srs.name = df.index.name + hdbg.dassert_isinstance(srs, pd.Series) + return srs + + +def as_series(data: Union[pd.DataFrame, pd.Series]) -> pd.Series: + """ + Convert a single-column dataframe to a series or no-op if already a series. + """ + if isinstance(data, pd.Series): + return data + return to_series(data) + + +def dassert_is_days( + timedelta: pd.Timedelta, *, min_num_days: Optional[int] = None +) -> None: + hdbg.dassert( + (timedelta / pd.Timedelta(days=1)).is_integer(), + "timedelta='%s' is not an integer number of days", + timedelta, + ) + if min_num_days is not None: + hdbg.dassert_lte(1, timedelta.days) + + +# ############################################################################# + + +def _get_index(obj: Union[pd.Index, pd.DataFrame, pd.Series]) -> pd.Index: + """ + Return the index of a Pandas object. + """ + if isinstance(obj, pd.Index): + index = obj + else: + hdbg.dassert_isinstance(obj, (pd.Series, pd.DataFrame)) + index = obj.index + return index + + +# TODO(gp): Maybe for symmetry with the other functions, rename to +# dassert_datetime_index +def dassert_index_is_datetime( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the dataframe has an index containing datetimes. + + It works for both single and multi-indexed dataframes. + """ + index = _get_index(obj) + if isinstance(index, pd.MultiIndex): + # In case of multi index check that at least one level is a datetime. + is_any_datetime = any( + isinstance(level, pd.DatetimeIndex) for level in index.levels + ) + hdbg.dassert(is_any_datetime, msg, *args) + else: + hdbg.dassert_isinstance(index, pd.DatetimeIndex, msg, *args) + + +def dassert_unique_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a unique index. + """ + index = _get_index(obj) + if not index.is_unique: + dup_indices = index.duplicated(keep=False) + df_dup = obj[dup_indices] + dup_msg = f"Duplicated rows are:\n{df_to_str(df_dup)}\n" + if msg is None: + msg = dup_msg + else: + msg = dup_msg + msg + hdbg.dassert(index.is_unique, msg=msg, *args) + + +# TODO(gp): @all Add unit tests. +def dassert_increasing_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has an increasing index. + """ + index = _get_index(obj) + if not index.is_monotonic_increasing: + # Print information about the problematic indices like: + # ``` + # Not increasing indices are: + # full_symbol open high + # timestamp + # 2018-08-17 01:39:00+00:00 binance::BTC_USDT 6339.250000 6348.910000 + # 2018-08-17 00:01:00+00:00 kucoin::ETH_USDT 286.712987 286.712987 + # ``` + # Find the problematic indices. + mask = np.diff(index) <= pd.Timedelta(seconds=0) + mask = np.insert(mask, 0, False) + # TODO(gp): We might want to specify an integer with how many rows before + # after we want to show. + # Shift back to get the previous index that was creating the issue. + mask_shift = np.empty_like(mask) + mask_shift[: len(mask) - 1] = mask[1 : len(mask)] + mask_shift[len(mask) - 1] = False + # + mask = mask | mask_shift + dup_msg = f"Not increasing indices are:\n{df_to_str(obj[mask])}\n" + if msg is None: + msg = dup_msg + else: + msg = dup_msg + msg + # Dump the data to file for further inspection. + # obj.to_csv("index.csv") + hdbg.dassert(index.is_monotonic_increasing, msg=msg, *args) + + +# TODO(gp): @all Add more info in case of failures and unit tests. +def dassert_strictly_increasing_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a strictly increasing index. + """ + dassert_unique_index(obj, msg, *args) + dassert_increasing_index(obj, msg, *args) + + +# TODO(gp): Not sure it's used or useful? +def dassert_monotonic_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a monotonic (i.e., strictly increasing or + decreasing index). + """ + dassert_unique_index(obj, msg, *args) + index = _get_index(obj) + cond = index.is_monotonic_increasing or index.is_monotonic_decreasing + hdbg.dassert(cond, msg=msg, *args) + + +# TODO(Paul): @gp -> dassert_datetime_indexed_df +def dassert_time_indexed_df( + df: pd.DataFrame, allow_empty: bool, strictly_increasing: bool +) -> None: + """ + Validate that input dataframe is time indexed and well-formed. + + It works for both single and multi-indexed dataframes. + + :param df: dataframe to validate + :param allow_empty: allow empty data frames + :param strictly_increasing: if True the index needs to be strictly + increasing, instead of just increasing + """ + # Verify that Pandas dataframe is passed as input. + hdbg.dassert_isinstance(df, pd.DataFrame) + if not allow_empty: + # Verify that a non-empty dataframe is passed as input. + hdbg.dassert_lt(0, df.shape[0]) + # Verify that the dataframe has at least 1 column. + hdbg.dassert_lte(1, len(df.columns)) + # Verify that the index is increasing. + if strictly_increasing: + dassert_strictly_increasing_index(df) + else: + dassert_increasing_index(df) + # Check that the index is in datetime format. + dassert_index_is_datetime(df) + # Check that the passed timestamp has timezone info. + index_item = df.index[0] + if isinstance(index_item, tuple): + # In case of multi index assume that the first level is a datetime. + index_item = index_item[0] + hdateti.dassert_has_tz(index_item) + + +def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None: + """ + Ensure that remapping rows / columns is valid. + """ + hdbg.dassert_isinstance(to_remap, list) + hdbg.dassert_isinstance(remap_dict, dict) + # All the rows / columns to remap, should exist. + hdbg.dassert_is_subset( + remap_dict.keys(), + to_remap, + "Keys to remap should be a subset of existing columns", + ) + # The mapping is invertible. + hdbg.dassert_no_duplicates(remap_dict.keys()) + hdbg.dassert_no_duplicates(remap_dict.values()) + # Rows / columns should not be remapped on existing rows / columns. + hdbg.dassert_not_intersection(remap_dict.values(), to_remap) + + +def dassert_series_type_is( + srs: pd.Series, + type_: type, + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the data type of `srs` is `type_`. + + Examples of valid series types are + - np.float64 + - np.int64 + - pd.Timestamp + """ + hdbg.dassert_isinstance(srs, pd.Series) + hdbg.dassert_isinstance(type_, type) + hdbg.dassert_eq(srs.dtype.type, type_, msg, *args) + + +def dassert_series_type_in( + srs: pd.Series, + types: List[type], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the data type of `srs` is one of the types in `types`. + """ + hdbg.dassert_isinstance(srs, pd.Series) + hdbg.dassert_container_type(types, list, type) + hdbg.dassert_in(srs.dtype.type, types, msg, *args) + + +def dassert_indices_equal( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + allow_series: bool = False, + only_warning: bool = False, +) -> None: + """ + Ensure that `df1` and `df2` share a common index. + + Print the symmetric difference of indices if equality does not hold. + """ + if allow_series: + if isinstance(df1, pd.Series): + df1 = df1.to_frame() + if isinstance(df2, pd.Series): + df2 = df2.to_frame() + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert( + df1.index.equals(df2.index), + "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", + df1.index.difference(df2.index), + df2.index.difference(df1.index), + only_warning=only_warning, + ) + + +def dassert_columns_equal( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + sort_cols: bool = False, + only_warning: bool = False, +) -> None: + """ + Ensure that `df1` and `df2` have the same columns. + + Print the symmetric difference of columns if equality does not hold. + """ + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + if sort_cols: + _LOG.debug("Sorting dataframe columns.") + df1 = df1.sort_index(axis=1) + df2 = df2.sort_index(axis=1) + hdbg.dassert( + df1.columns.equals(df2.columns), + "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", + df1.columns.difference(df2.columns), + df2.columns.difference(df1.columns), + only_warning=only_warning, + ) + + +def dassert_axes_equal( + df1: pd.DataFrame, df2: pd.DataFrame, *, sort_cols: bool = False +) -> None: + """ + Ensure that `df1` and `df2` have the same index and same columns. + """ + dassert_indices_equal(df1, df2) + dassert_columns_equal(df1, df2, sort_cols=sort_cols) + + +# TODO(Grisha): instead of passing `rtol` and `atol` use `**allclose_kwargs: Dict[str, Any]`. +def dassert_approx_eq( + val1: Any, + val2: Any, + rtol: float = 1e-05, + atol: float = 1e-08, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # Approximate comparison is not applicable for strings. + hdbg.dassert_is_not(type(val1), str) + hdbg.dassert_is_not(type(val2), str) + # Convert iterable inputs to list in order to comply with numpy. + if isinstance(val1, Iterable): + val1 = list(val1) + if isinstance(val2, Iterable): + val2 = list(val2) + cond = np.allclose( + np.array(val1), np.array(val2), rtol=rtol, atol=atol, equal_nan=True + ) + if not cond: + txt = f"'{val1}'\n==\n'{val2}' rtol={rtol}, atol={atol}" + hdbg._dfatal(txt, msg, *args, only_warning=only_warning) # type: ignore + + +# ############################################################################# + + +def resample_index(index: pd.DatetimeIndex, frequency: str) -> pd.DatetimeIndex: + """ + Resample `DatetimeIndex`. + + :param index: `DatetimeIndex` to resample + :param frequency: frequency from `pd.date_range()` to resample to + :return: resampled `DatetimeIndex` + """ + _LOG.debug(hprint.to_str("index frequency")) + hdbg.dassert_isinstance(index, pd.DatetimeIndex) + dassert_unique_index(index, msg="Index must have only unique values") + min_date = index.min() + max_date = index.max() + _LOG.debug("min_date=%s max_date=%s", min_date, max_date) + # TODO(gp): Preserve the index name. + # index_name = index.name + resampled_index = pd.date_range( + start=min_date, + end=max_date, + freq=frequency, + ) + # Enable detailed debugging. + if False: + if len(resampled_index) > len(index): + # Downsample. + _LOG.debug( + "Index length increased by %s = %s - %s", + len(resampled_index) - len(index), + len(resampled_index), + len(index), + ) + elif len(resampled_index) < len(index): + # Upsample. + _LOG.debug( + "Index length decreased by %s = %s - %s", + len(index) - len(resampled_index), + len(index), + len(resampled_index), + ) + else: + _LOG.debug("Index length=%s has not changed", len(index)) + # resampled_index.name = index_name + return resampled_index + + +def resample_df(df: pd.DataFrame, frequency: str) -> pd.DataFrame: + """ + Resample `DataFrame` by placing NaN in missing locations in the index. + + :param df: `DataFrame` to resample + :param frequency: frequency from `pd.date_range()` to resample to + :return: resampled `DataFrame` + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + # Preserve the index name. + index_name = df.index.name + resampled_index = resample_index(df.index, frequency) + df_reindex = df.reindex(resampled_index) + df_reindex.index.name = index_name + return df_reindex + + +def find_gaps_in_dataframes( + df1: pd.DataFrame, df2: pd.DataFrame +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Find data present in one dataframe and missing in the other one. + + :param df1: first dataframe for comparison + :param df2: second dataframe for comparison + :return: two dataframes with missing data + """ + # Get data present in first, but not present in second dataframe. + first_missing_indices = df2.index.difference(df1.index) + first_missing_data = df2.loc[first_missing_indices] + # Get data present in second, but not present in first dataframe. + second_missing_indices = df1.index.difference(df2.index) + second_missing_data = df1.loc[second_missing_indices] + return first_missing_data, second_missing_data + + +# TODO(Grisha): use this idiom everywhere in the codebase, e.g., in `compare_dfs()`. +def apply_index_mode( + df1: pd.DataFrame, + df2: pd.DataFrame, + mode: str, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Process DataFrames according to the index mode. + + :param df1: first input df + :param df2: second input df + :param mode: method of processing indices + - "assert_equal": check that both indices are equal, assert otherwise + - "intersect": restrict both dfs to a common index + - "leave_unchanged": ignore any indices mismatch and return dfs as-is + :return: transformed copy of the inputs + """ + _LOG.debug("mode=%s", mode) + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert_isinstance(mode, str) + # Copy in order not to modify the inputs. + df1_copy = df1.copy() + df2_copy = df2.copy() + if mode == "assert_equal": + dassert_indices_equal(df1_copy, df2_copy) + elif mode == "intersect": + # TODO(Grisha): Add sorting on demand. + common_index = df1_copy.index.intersection(df2_copy.index) + df1_copy = df1_copy[df1_copy.index.isin(common_index)] + df2_copy = df2_copy[df2_copy.index.isin(common_index)] + elif mode == "leave_unchanged": + _LOG.debug( + "Ignoring any index missmatch as per user's request.\n" + "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", + df1_copy.index.difference(df2_copy.index), + df2_copy.index.difference(df1_copy.index), + ) + else: + raise ValueError(f"Unsupported index_mode={mode}") + return df1_copy, df2_copy + + +def apply_columns_mode( + df1: pd.DataFrame, + df2: pd.DataFrame, + mode: str, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Process DataFrames according to the column mode. + + :param df1: first input df + :param df2: second input df + :param mode: method of processing columns + - "assert_equal": check that both dfs have equal columns, assert otherwise + - "intersect": restrict both dfs to only include common columns + - "leave_unchanged": ignore any column mismatches and return dfs as-is + :return: transformed copy of the inputs + """ + _LOG.debug("mode=%s", mode) + # Input validation. + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert_isinstance(mode, str) + # Copy in order not to modify the inputs. + df1_copy = df1.copy() + df2_copy = df2.copy() + if mode == "assert_equal": + # Check if columns are equal or not. + dassert_columns_equal(df1_copy, df2_copy) + elif mode == "intersect": + # Filter dataframes based on its common columns. + common_columns = df1_copy.columns.intersection(df2_copy.columns) + df1_copy = df1_copy[common_columns] + df2_copy = df2_copy[common_columns] + # Log the string representation of 2 dfs. + _LOG.debug("df1 after filtering=\n%s", df_to_str(df1)) + _LOG.debug("df2 after filtering=\n%s", df_to_str(df2)) + elif mode == "leave_unchanged": + # Ignore mismatch. + _LOG.debug( + "Ignoring any column missmatch as per user's request.\n" + "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", + df1.columns.difference(df2.columns), + df2.columns.difference(df1.columns), + ) + else: + raise ValueError(f"Unsupported column mode: {mode}") + return df1_copy, df2_copy + + +def find_gaps_in_time_series( + time_series: pd.Series, + start_timestamp: pd.Timestamp, + end_timestamp: pd.Timestamp, + freq: str, +) -> pd.Series: + """ + Find missing points on a time interval specified by [start_timestamp, + end_timestamp], where point distribution is determined by . + + If the passed time series is of a unix epoch format. It is + automatically tranformed to pd.Timestamp. + + :param time_series: time series to find gaps in + :param start_timestamp: start of the time interval to check + :param end_timestamp: end of the time interval to check + :param freq: distance between two data points on the interval. + Aliases correspond to pandas.date_range's freq parameter, i.e. + "S" -> second, "T" -> minute. + :return: pd.Series representing missing points in the source time + series. + """ + _time_series = time_series + if str(time_series.dtype) in ["int32", "int64"]: + _time_series = _time_series.map(hdateti.convert_unix_epoch_to_timestamp) + correct_time_series = pd.date_range( + start=start_timestamp, end=end_timestamp, freq=freq + ) + return correct_time_series.difference(_time_series) + + +def check_and_filter_matching_columns( + df: pd.DataFrame, required_columns: List[str], filter_data_mode: str +) -> pd.DataFrame: + """ + Check that columns are the required ones and if not filter data depending + on `filter_data_mode`. + + :param df: data to check columns for + :param required_columns: columns to return, skipping columns that are not required + :param filter_data_mode: control behaviour with respect to extra or missing columns + - "assert": raise an error if required columns do not match received columns + - "warn_and_trim": return the intersection of required and received columns and + issue a warning + :return: input data as it is if required columns match received columns otherwise + processed data, see `filter_data_mode` + """ + received_columns = df.columns.to_list() + hdbg.dassert_lte(1, len(received_columns)) + # + if filter_data_mode == "assert": + # Raise an assertion. + only_warning = False + elif filter_data_mode == "warn_and_trim": + # Just issue a warning. + only_warning = True + # Get columns intersection while preserving the order of the columns. + columns_intersection = [ + col_name + for col_name in required_columns + if col_name in received_columns + ] + hdbg.dassert_lte(1, len(columns_intersection)) + df = df[columns_intersection] + else: + raise ValueError(f"Invalid filter_data_mode='{filter_data_mode}'") + hdbg.dassert_set_eq( + required_columns, + received_columns, + only_warning=only_warning, + msg="Received columns do not match required columns.", + ) + return df + + +def compare_dataframe_rows(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: + """ + Compare contents of rows with same indices. + + Index is set to default sequential integer values because compare is + sensitive to multi index (probably because new multi indexes are created + for each difference in `compare`). Multi index columns are regular columns now. + Excess columns are removed so both dataframes are always same shape because + `compare` expects identical dataframes (same number of rows, columns, etc.). + + :param df1: first dataframe for comparison + :param df2: second dataframe for comparison + :return: dataframe with data with same indices and different contents + """ + # Get rows on which the two dataframe indices match. + idx_intersection = df1.index.intersection(df2.index) + # Remove excess columns and reset indexes. + trimmed_second = df2.loc[idx_intersection].reset_index() + trimmed_first = df1.loc[idx_intersection].reset_index() + # Get difference between second and first dataframe. + data_difference = trimmed_second.compare(trimmed_first) + # Update data difference with original dataframe index names + # for easier identification. + index_names = tuple(df2.index.names) + # If index or multi index is named, it will be visible in data difference. + if index_names != (None,): + for index in data_difference.index: + for column in index_names: + data_difference.loc[index, column] = trimmed_second.loc[index][ + column + ] + data_difference = data_difference.convert_dtypes() + return data_difference + + +def drop_duplicates( + data: Union[pd.Series, pd.DataFrame], + use_index: bool, + column_subset: Optional[List[str]] = None, + *args: Any, + **kwargs: Any, +) -> Union[pd.Series, pd.DataFrame]: + """ + Wrap `pandas.drop_duplicates()`. + + See the official docs: + - https://pandas.pydata.org/docs/reference/api/pandas.Series.drop_duplicates.html + - https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html + + :param use_index: + - if `True`, use index values together with a column subset for + identifying duplicates + - if `False`, duplicated rows are with the exact same values in a subset + and different indices + :param column_subset: a list of columns to consider for identifying duplicates + :return: data without duplicates + """ + _LOG.debug(hprint.to_str("use_index column_subset args kwargs")) + num_rows_before = data.shape[0] + # Get all columns list for subset if no subset is passed. + if column_subset is None: + column_subset = data.columns.tolist() + else: + hdbg.dassert_lte(1, len(column_subset), "Columns subset cannot be empty") + if use_index: + # Add dummy index column to use it for duplicates detection. + index_col_name = "use_index_col" + hdbg.dassert_not_in(index_col_name, data.columns.tolist()) + column_subset.insert(0, index_col_name) + data[index_col_name] = data.index + # + data_no_dups = data.drop_duplicates(subset=column_subset, *args, **kwargs) + # + if use_index: + # Remove dummy index column. + data_no_dups = data_no_dups.drop([index_col_name], axis=1) + # Report the change. + num_rows_after = data_no_dups.shape[0] + if num_rows_before != num_rows_after: + _LOG.debug( + "Removed %s rows", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + return data_no_dups + + +def dropna( + df: pd.DataFrame, + *args: Any, + drop_infs: bool = False, + report_stats: bool = False, + **kwargs: Any, +) -> pd.DataFrame: + """ + Create a wrapper around pd.dropna() reporting information about the removed + rows. + + :param df: dataframe to process + :param drop_infs: if +/- np.inf should be considered as nans + :param report_stats: if processing stats should be reported + :return: dataframe with nans dropped + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + num_rows_before = df.shape[0] + if drop_infs: + df = df.replace([np.inf, -np.inf], np.nan) + df = df.dropna(*args, **kwargs) + if report_stats: + num_rows_after = df.shape[0] + pct_removed = hprint.perc( + num_rows_before - num_rows_after, num_rows_before + ) + _LOG.info("removed rows with nans: %s", pct_removed) + return df + + +def drop_axis_with_all_nans( + df: pd.DataFrame, + drop_rows: bool = True, + drop_columns: bool = False, + drop_infs: bool = False, + report_stats: bool = False, +) -> pd.DataFrame: + """ + Remove columns and rows not containing information (e.g., with only nans). + + The operation is not performed in place and the resulting df is + returned. Assume that the index is timestamps. + + :param df: dataframe to process + :param drop_rows: remove rows with only nans + :param drop_columns: remove columns with only nans + :param drop_infs: remove also +/- np.inf + :param report_stats: report the stats of the operations + :return: dataframe with specific nan axis dropped + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + if drop_infs: + df = df.replace([np.inf, -np.inf], np.nan) + if drop_columns: + # Remove columns with all nans, if any. + cols_before = df.columns[:] + df = df.dropna(axis=1, how="all") + if report_stats: + # Report results. + cols_after = df.columns[:] + removed_cols = set(cols_before).difference(set(cols_after)) + pct_removed = hprint.perc( + len(cols_before) - len(cols_after), len(cols_after) + ) + _LOG.info( + "removed cols with all nans: %s %s", + pct_removed, + hprint.list_to_str(removed_cols), + ) + if drop_rows: + # Remove rows with all nans, if any. + rows_before = df.index[:] + df = df.dropna(axis=0, how="all") + if report_stats: + # Report results. + rows_after = df.index[:] + removed_rows = set(rows_before).difference(set(rows_after)) + if len(rows_before) == len(rows_after): + # Nothing was removed. + min_ts = max_ts = None + else: + # TODO(gp): Report as intervals of dates. + min_ts = min(removed_rows) + max_ts = max(removed_rows) + pct_removed = hprint.perc( + len(rows_before) - len(rows_after), len(rows_after) + ) + _LOG.info( + "removed rows with all nans: %s [%s, %s]", + pct_removed, + min_ts, + max_ts, + ) + return df + + +def reindex_on_unix_epoch( + df: pd.DataFrame, in_col_name: str, unit: str = "s" +) -> pd.DataFrame: + """ + Transform the column `in_col_name` into a datetime index. `in_col_name` + contains Unix epoch (e.g., 1638194400) and it is converted into a UTC time. + + :param df: dataframe with a unix epoch + :param in_col_name: column containing unix epoch + :param unit: the unit of unix epoch + """ + # Convert. + temp_col_name = in_col_name + "_tmp" + hdbg.dassert_in(in_col_name, df.columns) + hdbg.dassert_not_in(temp_col_name, df.columns) + # Save. + df[temp_col_name] = pd.to_datetime(df[in_col_name], unit=unit, utc=True) + df.set_index(temp_col_name, inplace=True, drop=True) + df.index.name = None + return df + + +def get_df_signature(df: pd.DataFrame, num_rows: int = 6) -> str: + """ + Compute a simple signature of a dataframe in string format. + + The signature contains metadata about dataframe size and certain + amount of rows from start and end of a dataframe. It is used for + testing purposes. + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + text: List[str] = [f"df.shape={str(df.shape)}"] + with pd.option_context( + "display.max_colwidth", int(1e6), "display.max_columns", None + ): + # If dataframe size exceeds number of rows, show only subset in form of + # first and last rows. Otherwise, whole dataframe is shown. + if len(df) > num_rows: + text.append(f"df.head=\n{df.head(num_rows // 2)}") + text.append(f"df.tail=\n{df.tail(num_rows // 2)}") + else: + text.append(f"df.full=\n{df}") + text: str = "\n".join(text) + return text + + +# ############################################################################# + + +def trim_df( + df: pd.DataFrame, + ts_col_name: Optional[str], + start_ts: Optional[pd.Timestamp], + end_ts: Optional[pd.Timestamp], + left_close: bool, + right_close: bool, +) -> pd.DataFrame: + """ + Trim the dataframe using values in `ts_col_name`. + + The dataframe is trimmed in the interval bounded by `start_ts` and `end_ts`. + + :param df: the dataframe to trim + :param ts_col_name: the name of the column; `None` means index + :param start_ts: the start boundary for trimming + :param end_ts: the end boundary for trimming + :param left_close: whether to include the start boundary of the interval + - True: [start_ts, ... + - False: (start_ts, ... + :param right_close: whether to include the end boundary of the interval + - True: ..., end_ts] + - False: ..., end_ts) + :return: the trimmed dataframe + """ + if _TRACE: + _LOG.trace( + df_to_str(df, print_dtypes=True, print_shape_info=True, tag="df") + ) + _LOG.debug( + hprint.to_str("ts_col_name start_ts end_ts left_close right_close") + ) + if _TRACE: + _LOG.trace("df=\n%s", df_to_str(df)) + if df.empty: + # If the df is empty, there is nothing to trim. + return df + if start_ts is None and end_ts is None: + # If no boundaries are specified, there are no points of reference to trim + # to. + return df + num_rows_before = df.shape[0] + if start_ts is not None and end_ts is not None: + # Confirm that the interval boundaries are valid. + hdateti.dassert_tz_compatible(start_ts, end_ts) + hdbg.dassert_lte(start_ts, end_ts) + # Get the values to filter by. + if ts_col_name is None: + values_to_filter_by = pd.Series(df.index, index=df.index) + else: + hdbg.dassert_in(ts_col_name, df.columns) + values_to_filter_by = df[ts_col_name] + if values_to_filter_by.is_monotonic_increasing: + _LOG.trace("df is monotonic") + # The values are sorted; using the `pd.Series.searchsorted()` method. + # Find the index corresponding to the left boundary of the interval. + if start_ts is not None: + side = "left" if left_close else "right" + left_idx = values_to_filter_by.searchsorted(start_ts, side) + else: + # There is nothing to filter, so the left index is the first one. + left_idx = 0 + _LOG.debug(hprint.to_str("start_ts left_idx")) + # Find the index corresponding to the right boundary of the interval. + if end_ts is not None: + side = "right" if right_close else "left" + right_idx = values_to_filter_by.searchsorted(end_ts, side) + else: + # There is nothing to filter, so the right index is None. + right_idx = df.shape[0] + _LOG.debug(hprint.to_str("end_ts right_idx")) + # + hdbg.dassert_lte(0, left_idx) + hdbg.dassert_lte(left_idx, right_idx) + hdbg.dassert_lte(right_idx, df.shape[0]) + _LOG.debug(hprint.to_str("start_ts left_idx")) + if right_idx < df.shape[0]: + _LOG.debug(hprint.to_str("end_ts right_idx")) + df = df.iloc[left_idx:right_idx] + else: + _LOG.trace("df is not monotonic") + # The values are not sorted; using the `pd.Series.between` method. + if left_close and right_close: + inclusive = "both" + elif left_close: + inclusive = "left" + elif right_close: + inclusive = "right" + else: + inclusive = "neither" + epsilon = pd.DateOffset(minutes=1) + if start_ts is None: + start_ts = values_to_filter_by.min() - epsilon + if end_ts is None: + end_ts = values_to_filter_by.max() + epsilon + df = df[ + values_to_filter_by.between(start_ts, end_ts, inclusive=inclusive) + ] + # Report the changes. + num_rows_after = df.shape[0] + if num_rows_before != num_rows_after: + _LOG.debug( + "Removed %s rows", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + return df + + +# TODO(Nina): Add `filter_data_mode`. +def merge_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + threshold_col_name: str, + *, + threshold: float = 0.9, + intersecting_columns: Optional[List[str]] = None, + **pd_merge_kwargs: Any, +) -> pd.DataFrame: + """ + Wrap `pd.merge`. + + :param threshold_col_name: a column's name to check the minimum + overlap on + :param threshold: minimum overlap of unique values in a specified + column to perform the merge + :param intersecting_columns: allow certain columns to appear in both + dataframes; store both in the resulting df with corresponding + suffixes + """ + _LOG.debug( + hprint.to_str( + "threshold_col_name threshold intersecting_columns pd_merge_kwargs" + ) + ) + # Sanity check column types. + threshold_col1 = df1[threshold_col_name] + threshold_col2 = df2[threshold_col_name] + only_first_elem = False + hdbg.dassert_array_has_same_type_element( + threshold_col1, threshold_col2, only_first_elem + ) + # TODO(Grisha): @Dan Implement asserts for each asset id. + # Check that an overlap of unique values is above the specified threshold. + threshold_unique_values1 = set(threshold_col1) + threshold_unique_values2 = set(threshold_col2) + threshold_common_values = set(threshold_unique_values1) & set( + threshold_unique_values2 + ) + threshold_common_values_share1 = len(threshold_common_values) / len( + threshold_unique_values1 + ) + threshold_common_values_share2 = len(threshold_common_values) / len( + threshold_unique_values2 + ) + hdbg.dassert_lte(threshold, threshold_common_values_share1) + hdbg.dassert_lte(threshold, threshold_common_values_share2) + # Use an empty set instead of None to perform set difference further. + intersecting_columns_set = ( + set() if intersecting_columns is None else set(intersecting_columns) + ) + # Check that there are no common columns except for the ones in `intersecting_columns`. + df1_cols = ( + set(df1.columns.to_list()) + - set(pd_merge_kwargs["on"]) + - intersecting_columns_set + ) + df2_cols = ( + set(df2.columns.to_list()) + - set(pd_merge_kwargs["on"]) + - intersecting_columns_set + ) + hdbg.dassert_not_intersection(df1_cols, df2_cols) + # + res_df = df1.merge(df2, **pd_merge_kwargs) + return res_df + + +# TODO(gp): Is this (ironically) a duplicate of drop_duplicates? +def drop_duplicated( + df: pd.DataFrame, *, subset: Optional[List[str]] = None +) -> pd.DataFrame: + """ + Implement `df.duplicated` but considering also the index and ignoring nans. + """ + _LOG.debug("before df=\n%s", df_to_str(df)) + # Move the index to the df. + old_index_name = df.index.name + new_index_name = "_index.tmp" + hdbg.dassert_not_in(new_index_name, df.columns) + df.index.name = new_index_name + df.reset_index(drop=False, inplace=True) + # Remove duplicates by ignoring nans. + if subset is not None: + hdbg.dassert_isinstance(subset, list) + subset = [new_index_name] + subset + duplicated = df.fillna(0.0).duplicated(subset=subset, keep="first") + # Report the result of the operation. + if duplicated.sum() > 0: + num_rows_before = df.shape[0] + _LOG.debug("Removing duplicates df=\n%s", df_to_str(df.loc[duplicated])) + df = df.loc[~duplicated] + num_rows_after = df.shape[0] + _LOG.warning( + "Removed repeated rows num_rows=%s", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + _LOG.debug("after removing duplicates df=\n%s", df_to_str(df)) + # Set the index back. + df.set_index(new_index_name, inplace=True) + df.index.name = old_index_name + _LOG.debug("after df=\n%s", df_to_str(df)) + return df + + +# ############################################################################# + + +def infer_column_types(col: pd.Series): + """ + Determine which data type is most prevalent in a column. + + Examine the values in the given pandas Series and decides whether the + majority of entries are strings, numeric values, or booleans. + + :param col: The column to inspect. + :return: One of `"is_string"`, `"is_numeric"`, or `"is_bool"`, representing + the predominant type. + """ + vals = { + "is_numeric": pd.to_numeric(col, errors="coerce").notna(), + #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), + "is_bool": col.map(lambda x: isinstance(x, bool)), + "is_string": col.map(lambda x: isinstance(x, str)), + } + vals = {k: float(v.mean()) for k, v in vals.items()} + # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", + # (vals["is_numeric"] >= vals["is_string"], "is_numeric", + # "is_string")) + if vals["is_bool"] >= vals["is_numeric"] and (vals["is_bool"] != 0): + type_ = "is_bool" + elif vals["is_numeric"] >= vals["is_string"] and (vals["is_numeric"] != 0): + type_ = "is_numeric" + else: + type_ = "is_string" + vals["type"] = type_ + return vals + + +def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Identify the predominant data type for each column in a DataFrame. + + :param df: The DataFrame whose columns will be analyzed. + :return: A DataFrame with two columns: + - `column`: the name of each original column. + - `predominant_type`: the most frequent type in that column, + one of `"string"`, `"numeric"`, or `"bool"`. + """ + return df.apply(lambda x: pd.Series(infer_column_types(x))).T + + +def convert_to_type(col: pd.Series, type_: str) -> pd.Series: + """ + Convert a pandas Series to a specified data type. + + :param col: The input column to be converted. + :param type_: The target data type. Expected values include: + - `"is_bool"`: convert values to booleans. + - `"is_int"`: convert values to integers. + - `"is_numeric"`: convert values to float. + - `"is_string"`: convert values to strings. + :return: A new Series with the same index as `col`, cast to the requested + type. + """ + if type_ == "is_bool": + return col.map( + lambda x: ( + True + if x in ["True", 1, "1", "true", True] + else False + if x in [0, "0", "False", False, "false"] + else None + ) + ) + elif type_ == "is_int": + return pd.to_numeric(col, errors="coerce", downcast="integer") + elif type_ == "is_numeric": + return pd.to_numeric(col, errors="coerce") + elif type_ == "is_string": + return col.astype(str) + else: + raise ValueError(f"Unknown column type: {type_}") + + +def convert_col_to_int( + df: pd.DataFrame, + col: str, +) -> pd.DataFrame: + """ + Convert a column to an integer column. + + Example use case: Parquet uses categoricals. If supplied with a + categorical-type column, this function will convert it to an integer + column. + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(col, str) + hdbg.dassert_in(col, df.columns) + # Attempt the conversion. + df[col] = df[col].astype("int64") + # Trust, but verify. + dassert_series_type_is(df[col], np.int64) + return df + + +def cast_series_to_type( + series: pd.Series, series_type: Optional[type] +) -> pd.Series: + """ + Convert a Pandas series to a given type. + + :param series: the input series + :param series_type: the type to convert the series into + - if None, then the series values are turned into Nones + :return: the series in the required type + """ + if series_type is None: + # Turn the series values into None. + series[:] = None + elif series_type is pd.Timestamp: + # Convert to timestamp. + series = pd.to_datetime(series) + elif series_type is dict: + # Convert to dict. + series = series.apply(eval) + else: + # Convert to the specified type. + series = series.astype(series_type) + return series + + +def _display(log_level: int, df: pd.DataFrame) -> None: + """ + Display a df in a notebook at the given log level. + + The behavior is similar to a command like `_LOG.log(log_level, ...)` but + for a notebook `display` command. + + :param log_level: log level at which to display a df. E.g., if `log_level = + logging.DEBUG`, then we display the df only if we are running with + `-v DEBUG`. If `log_level = logging.INFO` then we don't display it + """ + from IPython.display import display + + if ( + hsystem.is_running_in_ipynb() + and log_level >= hdbg.get_logger_verbosity() + ): + display(df) + + +def _df_to_str( + df: pd.DataFrame, + num_rows: Optional[int], + max_columns: int, + max_colwidth: int, + max_rows: int, + precision: int, + display_width: int, + use_tabulate: bool, + log_level: int, +) -> str: + is_in_ipynb = hsystem.is_running_in_ipynb() + out = [] + # Set dataframe print options. + with pd.option_context( + "display.max_colwidth", + max_colwidth, + # "display.height", 1000, + "display.max_rows", + max_rows, + "display.precision", + precision, + "display.max_columns", + max_columns, + "display.width", + display_width, + ): + if use_tabulate: + import tabulate + + out.append(tabulate.tabulate(df, headers="keys", tablefmt="psql")) + # TODO(Grisha): Add an option to display all rows since if `num_rows` + # is `None`, only first and last 5 rows are displayed. Consider using + # `df.to_string()` instead of `str(df)`. + if num_rows is None or df.shape[0] <= num_rows: + # Print the entire data frame. + if not is_in_ipynb: + out.append(str(df)) + else: + # Display dataframe. + _display(log_level, df) + else: + nr = num_rows // 2 + if not is_in_ipynb: + # Print top and bottom of df. + out.append(str(df.head(nr))) + out.append("...") + tail_str = str(df.tail(nr)) + # Remove index and columns from tail_df. + skipped_rows = 1 + if df.index.name: + skipped_rows += 1 + tail_str = "\n".join(tail_str.split("\n")[skipped_rows:]) + out.append(tail_str) + else: + # TODO(gp): @all use this approach also above and update all the + # unit tests. + df = [ + df.head(nr), + pd.DataFrame( + [["..."] * df.shape[1]], index=[" "], columns=df.columns + ), + df.tail(nr), + ] + df = pd.concat(df) + # Display dataframe. + _display(log_level, df) + if not is_in_ipynb: + txt = "\n".join(out) + else: + txt = "" + return txt + + +# TODO(gp): Maybe we can have a `_LOG_df_to_str(log_level, *args, **kwargs)` that +# calls `_LOG.log(log_level, hpandas.df_to_str(*args, **kwargs, log_level=log_level))`. +# TODO(gp): We should make sure this works properly in a notebook, although +# it's not easy to unit test. +def df_to_str( + df: Union[pd.DataFrame, pd.Series, pd.Index], + *, + # TODO(gp): Remove this hack in the integration. + # handle_signed_zeros: bool = False, + handle_signed_zeros: bool = True, + num_rows: Optional[int] = 6, + print_dtypes: bool = False, + print_shape_info: bool = False, + print_nan_info: bool = False, + print_memory_usage: bool = False, + memory_usage_mode: str = "human_readable", + tag: Optional[str] = None, + max_columns: int = 10000, + max_colwidth: int = 2000, + max_rows: int = 500, + precision: int = 6, + display_width: int = 10000, + use_tabulate: bool = False, + log_level: int = logging.DEBUG, +) -> str: + """ + Print a dataframe to string reporting all the columns without trimming. + + Note that code like: `_LOG.info(hpandas.df_to_str(df, num_rows=3))` works + properly when called from outside a notebook, i.e., the dataframe is printed + But it won't display the dataframe in a notebook, since the default level at + which the dataframe is displayed is `logging.DEBUG`. + + In this case to get the correct behavior one should do: + + ``` + log_level = ... + _LOG.log(log_level, hpandas.df_to_str(df, num_rows=3, log_level=log_level)) + ``` + + :param: handle_signed_zeros: convert `-0.0` to `0.0` + :param: num_rows: max number of rows to print (half from the top and half from + the bottom of the dataframe) + - `None` to print the entire dataframe + :param print_dtypes: report dataframe types and information about the type of + each column by looking at the first value + :param print_shape_info: report dataframe shape, index and columns + :param print_memory_usage: report memory use for each + """ + if df is None: + return "" + if isinstance(df, pd.Series): + df = pd.DataFrame(df) + elif isinstance(df, pd.Index): + df = df.to_frame(index=False) + hdbg.dassert_isinstance(df, pd.DataFrame) + # For some reason there are so-called "negative zeros", but we consider + # them equal to `0.0`. + df = df.copy() + if handle_signed_zeros: + for col_name in df.select_dtypes(include=[np.float64, float]).columns: + df[col_name] = df[col_name].where(df[col_name] != -0.0, 0.0) + out = [] + # Print the tag. + if tag is not None: + out.append(f"# {tag}=") + if not df.empty: + # Print information about the shape and index. + # TODO(Nikola): Revisit and rename print_shape_info to print_axes_info + if print_shape_info: + # TODO(gp): Unfortunately we can't improve this part of the output + # since there are many golden inside the code that would need to be + # updated. Consider automating updating the expected values in the code. + txt = f"index=[{df.index.min()}, {df.index.max()}]" + out.append(txt) + txt = f"columns={','.join(map(str, df.columns))}" + out.append(txt) + txt = f"shape={str(df.shape)}" + out.append(txt) + # Print information about the types. + if print_dtypes: + out.append("* type=") + + table = [] + + def _report_srs_stats(srs: pd.Series) -> List[Any]: + """ + Report dtype, the first element, and its type of series. + """ + row: List[Any] = [] + first_elem = srs.values[0] + num_unique = srs.nunique() + num_nans = srs.isna().sum() + row.extend( + [ + srs.dtype, + hprint.perc(num_unique, len(srs)), + hprint.perc(num_nans, len(srs)), + first_elem, + type(first_elem), + ] + ) + return row + + row = [] + col_name = "index" + row.append(col_name) + row.extend(_report_srs_stats(df.index)) + row = map(str, row) + table.append(row) + for col_name in df.columns: + row_: List[Any] = [] + row_.append(col_name) + row_.extend(_report_srs_stats(df[col_name])) + row_ = map(str, row_) + table.append(row_) + # + columns = [ + "col_name", + "dtype", + "num_unique", + "num_nans", + "first_elem", + "type(first_elem)", + ] + df_stats = pd.DataFrame(table, columns=columns) + stats_num_rows = None + df_stats_as_str = _df_to_str( + df_stats, + stats_num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + out.append(df_stats_as_str) + # Print info about memory usage. + if print_memory_usage: + out.append("* memory=") + mem_use_df = pd.concat( + [df.memory_usage(deep=False), df.memory_usage(deep=True)], + axis=1, + keys=["shallow", "deep"], + ) + # Add total row. + mem_use_df_total = pd.DataFrame({"total": mem_use_df.sum(axis=0)}) + mem_use_df = pd.concat([mem_use_df, mem_use_df_total.T]) + # Convert into the desired format. + if memory_usage_mode == "bytes": + pass + elif memory_usage_mode == "human_readable": + import helpers.hintrospection as hintros + + mem_use_df = mem_use_df.applymap(hintros.format_size) + else: + raise ValueError( + f"Invalid memory_usage_mode='{memory_usage_mode}'" + ) + memory_num_rows = None + memory_usage_as_txt = _df_to_str( + mem_use_df, + memory_num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + out.append(memory_usage_as_txt) + # Print info about nans. + if print_nan_info: + num_elems = df.shape[0] * df.shape[1] + num_nans = df.isna().sum().sum() + txt = f"num_nans={hprint.perc(num_nans, num_elems)}" + out.append(txt) + # + num_zeros = df.isnull().sum().sum() + txt = f"num_zeros={hprint.perc(num_zeros, num_elems)}" + out.append(txt) + # TODO(gp): np can't do isinf on objects like strings. + # num_infinite = np.isinf(df).sum().sum() + # txt = "num_infinite=" + hprint.perc(num_infinite, num_elems) + # out.append(txt) + # + num_nan_rows = df.dropna().shape[0] + txt = f"num_nan_rows={hprint.perc(num_nan_rows, num_elems)}" + out.append(txt) + # + num_nan_cols = df.dropna(axis=1).shape[1] + txt = f"num_nan_cols={hprint.perc(num_nan_cols, num_elems)}" + out.append(txt) + if hsystem.is_running_in_ipynb(): + if len(out) > 0 and log_level >= hdbg.get_logger_verbosity(): + print("\n".join(out)) + txt = None + # Print the df. + df_as_str = _df_to_str( + df, + num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + if not hsystem.is_running_in_ipynb(): + out.append(df_as_str) + txt = "\n".join(out) + return txt + + +def _assemble_df_rows(rows_values: RowsValues) -> RowsValues: + """ + Organize dataframe values into a column-row structure. + + - Indentation artifacts are removed + - The index placement is handled, i.e. + - if the index is named, the name is located and moved to the same + row as the column names + - if the index is not named, the row with the column names receives + a placeholder empty value in its place + - Empty columns are dropped + + :param rows_values: row values extracted from a string df representation + :return: row values assembled into a valid column-row structure + """ + # Clean up indentation artifacts. + if all(row[0] == "" for row in rows_values): + # Remove the first empty cell in each row. + for row in rows_values: + del row[0] + # If the index is named, its name is located in the second row, + # with an optional extra empty value cell value next to it. + if len(rows_values[1]) == 1 or ( + len(rows_values[1]) == 2 and rows_values[1][1] == "" + ): + # Move the index name to the row with all the column names. + if rows_values[0][0] == "": + rows_values[0][0] = rows_values[1][0] + else: + rows_values[0].insert(0, rows_values[1][0]) + # Drop the former index name row. + del rows_values[1] + else: + # Add an empty cell for the absent index name. + rows_values[0].insert(0, "") + # Identify and remove empty columns. + min_len_row = min(len(row) for row in rows_values) + idxs_to_delete = [] + for i in range(min_len_row): + if all(row[i] == "" for row in rows_values): + idxs_to_delete.append(i) + for idx in idxs_to_delete: + for row in rows_values: + del row[idx] + # Confirm that all the rows have the same number of values. + hdbg.dassert_eq(len({len(row) for row in rows_values}), 1) + return rows_values + + +def str_to_df( + df_as_str: str, + col_to_type: Dict[str, Optional[type]], + col_to_name_type: Dict[str, type], +) -> pd.DataFrame: + """ + Convert a string representation of a dataframe into a Pandas df. + + :param df_as_str: a df as a string + - the format of the string is the same as the output of + `hpandas.df_to_str()` on a pd.DataFrame, e.g. + ``` + col1 col2 col3 col4 + 0 0.1 a None 2020-01-01 + 1 0.2 "b c" None 2021-05-05 + ``` + - values (including column names) that contain spaces need + to be enclosed in double quotation marks, e.g. + "2023-03-15 16:35:41.205000+00:00" + :param col_to_type: a mapping between the column names and the + types of the values in these columns + - if a column is not present in the mapping, its values will + remain strings + - to indicate the type of index values, use {"__index__": ...} + mapping, e.g. {"__index__": pd.Timestamp} + :param col_to_name_type: a mapping between the column names and + the required types of these column names + - same conventions apply as for `col_to_type` (see above) + :return: a converted Pandas dataframe + """ + # Separate the rows. + rows = df_as_str.split("\n") + # Clean up extra spaces. + rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] + # Identify individual values in the rows. + rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) + # Remove the placeholder ["..."] row. + rows_values = [row for row in rows_values if row != ["..."]] + # Organize values into a proper column-row structure. + rows_values = _assemble_df_rows(rows_values) + # Get the column names. + column_names = rows_values[0][1:] + # Get the index. + index_values = [row[0] for row in rows_values[1:]] + index_name = rows_values[0][0] + # Construct the df. + df = pd.DataFrame( + [row[1:] for row in rows_values[1:]], + columns=column_names, + index=index_values, + ) + if index_name != "": + df.index.name = index_name + # Cast the columns into appropriate types. + for col, col_type in col_to_type.items(): + if col == "__index__": + df.index = cast_series_to_type(df.index, col_type) + else: + df[col] = cast_series_to_type(df[col], col_type) + # Cast the column names into appropriate types. + for col, col_name_type in col_to_name_type.items(): + if col == "__index__": + df.index = df.index.rename(col_name_type(df.index.name)) + else: + df = df.rename(columns={col: col_name_type(col)}) + return df + + +def convert_df_to_json_string( + df: pd.DataFrame, + n_head: Optional[int] = 10, + n_tail: Optional[int] = 10, + columns_order: Optional[List[str]] = None, +) -> str: + """ + Convert dataframe to pretty-printed JSON string. + + To select all rows of the dataframe, pass `n_head` as None. + + :param df: dataframe to convert + :param n_head: number of printed top rows + :param n_tail: number of printed bottom rows + :param columns_order: order for the KG columns sort + :return: dataframe converted to JSON string + """ + # Append shape of the initial dataframe. + shape = f"original shape={df.shape}" + # Reorder columns. + if columns_order is not None: + hdbg.dassert_set_eq(columns_order, df.cols) + df = df[columns_order] + # Select head. + if n_head is not None: + head_df = df.head(n_head) + else: + # If no n_head provided, append entire dataframe. + head_df = df + # Transform head to json. + head_json = head_df.to_json( + orient="index", + force_ascii=False, + indent=4, + default_handler=str, + date_format="iso", + date_unit="s", + ) + if n_tail is not None: + # Transform tail to json. + tail = df.tail(n_tail) + tail_json = tail.to_json( + orient="index", + force_ascii=False, + indent=4, + default_handler=str, + date_format="iso", + date_unit="s", + ) + else: + # If no tail specified, append an empty string. + tail_json = "" + # Join shape and dataframe to single string. + output_str = "\n".join([shape, "Head:", head_json, "Tail:", tail_json]) + return output_str + + +def convert_df( + df: pd.DataFrame, *, print_invalid_values: bool = False +) -> pd.DataFrame: + """ + Convert each DataFrame column to its predominant type. + + This function inspects every column in `df`, determines whether the + majority of its values are boolean, numeric, or string, and then + casts the column to that type using `convert_to_type`. + + :param df: The input DataFrame whose columns will be converted. + :param print_invalid_values: If True, print any original values that could + not be converted (they become NaN after conversion) + :return: a new DataFrame with each column cast to its detected predominant + type. + """ + df_out = pd.DataFrame(index=df.index) + for col in df.columns: + series = df[col] + # Determine the dominant datatype. + col_type = infer_column_types(series)["type"] + hdbg.dassert_in(col_type, ("is_bool", "is_numeric", "is_string")) + # Convert the column to dominant datatype. + converted = convert_to_type(series, col_type) + if print_invalid_values: + invalid_mask = series.notna() & converted.isna() + if invalid_mask.any(): + invalid = series[invalid_mask].tolist() + print(f"Column {col} dropped invalid values: {invalid}") + df_out[col] = converted + return df_out + + +# ############################################################################# + + +def read_csv_to_df( + stream: Union[str, S3File, S3FileSystem], + *args: Any, + **kwargs: Any, +) -> pd.DataFrame: + """ + Read a CSV file into a `pd.DataFrame`. + """ + # Gets filename from stream if it is not already a string, + # so it can be inspected for extension type. + file_name = stream if isinstance(stream, str) else vars(stream)["path"] + # Handle zipped files. + if any(file_name.endswith(ext) for ext in (".gzip", ".gz", ".tgz")): + hdbg.dassert_not_in("compression", kwargs) + kwargs["compression"] = "gzip" + elif file_name.endswith(".zip"): + hdbg.dassert_not_in("compression", kwargs) + kwargs["compression"] = "zip" + # Read. + _LOG.debug(hprint.to_str("args kwargs")) + df = pd.read_csv(stream, *args, **kwargs) + return df + + +def read_parquet_to_df( + stream: Union[str, S3File, S3FileSystem], + *args: Any, + **kwargs: Any, +) -> pd.DataFrame: + """ + Read a Parquet file into a `pd.DataFrame`. + """ + # Read. + _LOG.debug(hprint.to_str("args kwargs")) + df = pd.read_parquet(stream, *args, **kwargs) + return df + + +# ############################################################################# + + +# TODO(Paul): Add unit tests. +def compute_weighted_sum( + dfs: Dict[str, pd.DataFrame], + weights: pd.DataFrame, + *, + index_mode: str = "assert_equal", +) -> Dict[str, pd.DataFrame]: + """ + Compute weighted sums of `dfs` using `weights`. + + :param dfs: dataframes keyed by id; all dfs should have the same cols, + indices are handled based on the `index_mode` + :param weights: float weights indexed by id with unique col names + :param index_mode: same as `mode` in `apply_index_mode()` + :return: weighted sums keyed by weight col names + """ + hdbg.dassert_isinstance(dfs, dict) + hdbg.dassert(dfs, "dictionary of dfs must be nonempty") + # Get a dataframe from the dictionary and record its index and columns. + id_ = list(dfs)[0] + hdbg.dassert_isinstance(id_, str) + df = dfs[id_] + hdbg.dassert_isinstance(df, pd.DataFrame) + cols = df.columns + # Sanity-check dataframes in dictionary. + for key, value in dfs.items(): + hdbg.dassert_isinstance(key, str) + hdbg.dassert_isinstance(value, pd.DataFrame) + # The reference df is not modified. + _, value = apply_index_mode(df, value, index_mode) + hdbg.dassert( + value.columns.equals(cols), + "Column equality fails for keys=%s, %s", + id_, + key, + ) + # Sanity-check weights. + hdbg.dassert_isinstance(weights, pd.DataFrame) + hdbg.dassert_eq(weights.columns.nlevels, 1) + hdbg.dassert(not weights.columns.has_duplicates) + hdbg.dassert_set_eq(weights.index.to_list(), list(dfs)) + # Create a multiindexed dataframe to facilitate computing the weighted sums. + weighted_dfs = {} + combined_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys()) + # TODO(Paul): Consider relaxing the NaN-handling. + for col in weights.columns: + weighted_combined_df = combined_df.multiply(weights[col], level=0) + weighted_sums = weighted_combined_df.groupby(axis=1, level=1).sum( + min_count=len(dfs) + ) + weighted_dfs[col] = weighted_sums + return weighted_dfs + + +def subset_df(df: pd.DataFrame, nrows: int, seed: int = 42) -> pd.DataFrame: + """ + Remove N rows from the input data and shuffle the remaining ones. + + :param df: input data + :param nrows: the number of rows to remove from the original data + :param seed: see `random.seed()` + :return: shuffled data with removed rows + """ + hdbg.dassert_lte(1, nrows) + hdbg.dassert_lte(nrows, df.shape[0]) + idx = list(range(df.shape[0])) + random.seed(seed) + random.shuffle(idx) + idx = sorted(idx[nrows:]) + return df.iloc[idx] + + +def remap_obj( + obj: Union[pd.Series, pd.Index], + map_: Dict[Any, Any], + **kwargs: Any, +) -> pd.Series: + """ + Substitute each value of an object with another value from a dictionary. + + :param obj: an object to substitute value in + :param map_: values to substitute with + :return: remapped pandas series + """ + hdbg.dassert_lte(1, obj.shape[0]) + # TODO(Grisha): consider extending for other mapping types supported by + # `pd.Series.map`. + hdbg.dassert_isinstance(map_, dict) + # Check that every element of the object is in the mapping. + hdbg.dassert_is_subset(obj, map_.keys()) + new_srs = obj.map(map_, **kwargs) + return new_srs + + +def get_random_df( + num_cols: int, + seed: Optional[int] = None, + date_range_kwargs: Optional[Dict[str, Any]] = None, +) -> pd.DataFrame: + """ + Compute df with random data with `num_cols` columns and index obtained by + calling `pd.date_range(**kwargs)`. + + :param num_cols: the number of columns in a DataFrame to generate + :param seed: see `random.seed()` + :param date_range_kwargs: kwargs for `pd.date_range()` + """ + if seed: + np.random.seed(seed) + dt = pd.date_range(**date_range_kwargs) + df = pd.DataFrame(np.random.rand(len(dt), num_cols), index=dt) + return df + + +# ############################################################################# + +# TODO(gp): -> AxisNameSet +ColumnSet = Optional[Union[str, List[str]]] + + +# TODO(gp): -> _resolve_axis_names +def _resolve_column_names( + column_set: ColumnSet, + columns: Union[List[str], pd.Index], + *, + keep_order: bool = False, +) -> List[str]: + """ + Change format of the columns and perform some sanity checks. + + :param column_set: columns to proceed + :param columns: all columns available + :param keep_order: preserve the original order or allow sorting + """ + # Ensure that `columns` is well-formed. + if isinstance(columns, pd.Index): + columns = columns.to_list() + hdbg.dassert_isinstance(columns, list) + hdbg.dassert_lte(1, len(columns)) + # + if column_set is None: + # Columns were not specified, thus use the list of all the columns. + column_set = columns + else: + if isinstance(column_set, str): + column_set = [column_set] + hdbg.dassert_isinstance(column_set, list) + hdbg.dassert_lte(1, len(column_set)) + hdbg.dassert_is_subset(column_set, columns) + if keep_order: + # Keep the selected columns in the same order as in the original + # `columns`. + column_set = [c for c in columns if c in column_set] + return column_set + + +# TODO(Grisha): finish the function. +# TODO(Grisha): merge with the one in `dataflow.model.correlation.py`? +def remove_outliers( + df: pd.DataFrame, + lower_quantile: float, + *, + column_set: ColumnSet, + # TODO(Grisha): the params are not used. + fill_value: float = np.nan, + mode: str = "remove_outliers", + axis: Any = 0, + upper_quantile: Optional[float] = None, +) -> pd.DataFrame: + hdbg.dassert_eq(len(df.shape), 2, "Multi-index dfs not supported") + # + hdbg.dassert_lte(0.0, lower_quantile) + if upper_quantile is None: + upper_quantile = 1.0 - lower_quantile + hdbg.dassert_lte(lower_quantile, upper_quantile) + hdbg.dassert_lte(upper_quantile, 1.0) + # + df = df.copy() + if axis == 0: + all_columns = df.columns + columns = _resolve_column_names(column_set, all_columns) + hdbg.dassert_is_subset(columns, df.columns) + for column in all_columns: + if column in columns: + df[column] = df[column].quantile( + [lower_quantile, upper_quantile] + ) + elif axis == 1: + all_rows = df.rows + rows = _resolve_column_names(column_set, all_rows) + hdbg.dassert_is_subset(rows, df.rows) + for row in all_rows: + if row in rows: + df[row] = df[row].quantile([lower_quantile, upper_quantile]) + else: + raise ValueError(f"Invalid axis='{axis}'") + return df + + +# ############################################################################# + + +# TODO(Grisha): add assertions/logging. +def get_df_from_iterator( + iter_: Iterator[pd.DataFrame], + *, + sort_index: bool = True, +) -> pd.DataFrame: + """ + Concat all the dataframes in the iterator in one dataframe. + + :param iter_: dataframe iterator + :param sort_index: whether to sort output index or not + :return: combined iterator data + """ + # TODO(gp): @all make a copy of `iter_` so we don't consume it. + dfs = list(iter_) + df_res = pd.concat(dfs) + if sort_index: + df_res = df_res.sort_index() + return df_res + + +def heatmap_df(df: pd.DataFrame, *, axis: Any = None) -> pd.DataFrame: + """ + Colorize a df with a heatmap depending on the numeric values. + + :param axis: along which axis to compute the heatmap + - 0 colorize along rows + - 1 colorize along columns + - None: colorize everything + """ + # Keep it here to avoid long start up times. + import seaborn as sns + + cm = sns.diverging_palette(5, 250, as_cmap=True) + df = df.style.background_gradient(axis=axis, cmap=cm) + return df + + +def compare_nans_in_dataframes( + df1: pd.DataFrame, df2: pd.DataFrame +) -> pd.DataFrame: + """ + Compare equality of DataFrames in terms of NaNs. + + For example: + - `5 vs np.nan` is a mismatch + - `np.nan vs 5` is a mismatch + - `np.nan vs np.nan` is a match + - `np.nan vs np.inf` is a mismatch + + :param df1: dataframe to compare + :param df2: dataframe to compare with + :return: dataframe that shows the differences stacked side by side, see + `pandas.DataFrame.compare()` for an example + """ + dassert_axes_equal(df1, df2) + # Keep rows where df1's value is NaN and df2's value is not NaN and vice versa. + mask1 = df1.isna() & ~df2.isna() + mask2 = ~df1.isna() & df2.isna() + mask3 = mask1 | mask2 + # Compute a dataframe with the differences. + nan_diff_df = df1[mask3].compare(df2[mask3], result_names=("df1", "df2")) + return nan_diff_df + + +# TODO(Grisha): -> `compare_dataframes()`? +def compare_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + row_mode: str = "equal", + column_mode: str = "equal", + # TODO(Grisha): should be True by default? + compare_nans: bool = False, + diff_mode: str = "diff", + assert_diff_threshold: float = 1e-3, + close_to_zero_threshold: float = 1e-6, + zero_vs_zero_is_zero: bool = True, + remove_inf: bool = True, + log_level: int = logging.DEBUG, + only_warning: bool = True, +) -> pd.DataFrame: + """ + Compare two dataframes. + + This works for dataframes with and without multi-index. + + :param row_mode: control how the rows are handled + - "equal": rows need to be the same for the two dataframes + - "inner": compute the common rows for the two dataframes + :param column_mode: same as `row_mode` + :param compare_nans: include NaN comparison if True otherwise just + compare non-NaN values + :param diff_mode: control how the dataframes are compared in terms of + corresponding elements + - "diff": use the difference + - "pct_change": use the percentage difference + :param assert_diff_threshold: maximum allowed total difference + - do not assert if `None` + - works when `diff_mode` is "pct_change" + :param close_to_zero_threshold: round numbers below the threshold to 0 + :param zero_vs_zero_is_zero: replace the diff with 0 when comparing 0 to 0 + if True, otherwise keep the actual result + :param remove_inf: replace +-inf with `np.nan` + :param log_level: logging level + :param only_warning: when `True` the function issues a warning instead of aborting + :return: a singe dataframe with differences as values + """ + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + # Check value of `assert_diff_threshold`, if it was passed. + if assert_diff_threshold: + hdbg.dassert_lte(assert_diff_threshold, 1.0) + hdbg.dassert_lte(0.0, assert_diff_threshold) + # TODO(gp): Factor out this logic and use it for both compare_visually_dfs + # and + if row_mode == "equal": + dassert_indices_equal(df1, df2) + elif row_mode == "inner": + # TODO(gp): Add sorting on demand, otherwise keep the columns in order. + same_rows = list((set(df1.index)).intersection(set(df2.index))) + df1 = df1[df1.index.isin(same_rows)] + df2 = df2[df2.index.isin(same_rows)] + else: + raise ValueError(f"Invalid row_mode='{row_mode}'") + # + if column_mode == "equal": + hdbg.dassert_eq(sorted(df1.columns), sorted(df2.columns)) + elif column_mode == "inner": + # TODO(gp): Add sorting on demand, otherwise keep the columns in order. + col_names = sorted(list(set(df1.columns).intersection(set(df2.columns)))) + df1 = df1[col_names] + df2 = df2[col_names] + else: + raise ValueError(f"Invalid column_mode='{column_mode}'") + # Round small numbers to 0 to exclude them from the diff computation. + close_to_zero_threshold_mask = lambda x: abs(x) < close_to_zero_threshold + df1[close_to_zero_threshold_mask] = df1[close_to_zero_threshold_mask].round( + 0 + ) + df2[close_to_zero_threshold_mask] = df2[close_to_zero_threshold_mask].round( + 0 + ) + # Compute the difference df. + if diff_mode == "diff": + # Test and convert the assertion into a boolean. + is_ok = True + try: + pd.testing.assert_frame_equal( + df1, df2, check_like=True, check_dtype=False + ) + except AssertionError as e: + is_ok = False + _ = e + # Check `is_ok` and raise an assertion depending on `only_warning`. + if not is_ok: + hdbg._dfatal( + _, + "df1=\n%s\n and df2=\n%s\n are not equal.", + df_to_str(df1, log_level=log_level), + df_to_str(df2, log_level=log_level), + only_warning=only_warning, + ) + # Calculate the difference. + df_diff = df1 - df2 + if remove_inf: + df_diff = df_diff.replace([np.inf, -np.inf], np.nan) + elif diff_mode == "pct_change": + # Compare NaN values in dataframes. + nan_diff_df = compare_nans_in_dataframes(df1, df2) + _LOG.debug("Dataframe with NaN differences=\n%s", df_to_str(nan_diff_df)) + msg = "There are NaN values in one of the dataframes that are not in the other one." + hdbg.dassert_eq( + 0, nan_diff_df.shape[0], msg=msg, only_warning=only_warning + ) + # Compute pct_change. + df_diff = 100 * (df1 - df2) / df2.abs() + if zero_vs_zero_is_zero: + # When comparing 0 to 0 set the diff (which is NaN by default) to 0. + df1_mask = df1 == 0 + df2_mask = df2 == 0 + zero_vs_zero_mask = df1_mask & df2_mask + df_diff[zero_vs_zero_mask] = 0 + if remove_inf: + df_diff = df_diff.replace([np.inf, -np.inf], np.nan) + # Check if `df_diff` values are less than `assert_diff_threshold`. + if assert_diff_threshold is not None: + nan_mask = df_diff.isna() + within_threshold = ( + df_diff.abs() <= assert_diff_threshold + ) | nan_mask + expected = pd.DataFrame( + True, + index=within_threshold.index, + columns=within_threshold.columns, + ) + # Test and convert the assertion into boolean. + is_ok = True + try: + pd.testing.assert_frame_equal( + within_threshold, expected, check_exact=True + ) + except AssertionError as e: + is_ok = False + _ = e + # Check `is_ok` and raise assertion depending on `only_warning`. + if not is_ok: + hdbg._dfatal( + _, + "df1=\n%s\n and df2=\n%s\n have pct_change more than `assert_diff_threshold`.", + df_to_str(df1, log_level=log_level), + df_to_str(df2, log_level=log_level), + only_warning=only_warning, + ) + # Report max diff. + max_diff = df_diff.abs().max().max() + _LOG.log( + log_level, + "Maximum percentage difference between the two dataframes = %s", + max_diff, + ) + else: + raise ValueError(f"diff_mode={diff_mode}") + df_diff = df_diff.add_suffix(f".{diff_mode}") + return df_diff + + +# ############################################################################# +# Multi-index dfs +# ############################################################################# + + +# TODO(Grisha): should be a more elegant way to add a column. +def add_multiindex_col( + df: pd.DataFrame, multiindex_col: pd.DataFrame, col_name: str +) -> pd.DataFrame: + """ + Add column to a multiindex DataFrame. + + Note: each column in a multiindex DataFrame is a DataFrame itself. + + :param df: multiindex df + :param multiindex_col: column (i.e. singleindex df) of a multiindex df + :param col_name: name of a new column + :return: a multiindex DataFrame with a new column + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + hdbg.dassert_isinstance(multiindex_col, pd.DataFrame) + hdbg.dassert_isinstance(col_name, str) + hdbg.dassert_not_in(col_name, df.columns) + for col in multiindex_col.columns: + df[col_name, col] = multiindex_col[col] + return df + + +def list_to_str( + vals: List[Any], + *, + sep_char: str = ", ", + enclose_str_char: str = "'", + max_num: Optional[int] = 10, +) -> str: + """ + Convert a list of values into a formatted string representation. + + E.g., [1, "two", 3, 4, 5] -> "5 ['1', 'two', '3', '4', '5']" + + :param vals: values to be converted + :param sep_char: separator to use between elements + :param enclose_str_char: character to enclose each element's string + representation; if empty, elements are not enclosed + :param max_num: maximum number of elements to display in the output + :return: the formatted string representing the list + """ + vals_as_str = list(map(str, vals)) + # Add a str around. + if enclose_str_char: + vals_as_str = [ + enclose_str_char + v + enclose_str_char for v in vals_as_str + ] + # + ret = f"{len(vals)} [" + if max_num is not None and len(vals) > max_num: + hdbg.dassert_lt(1, max_num) + ret += sep_char.join(vals_as_str[: int(max_num / 2)]) + ret += sep_char + "..." + sep_char + ret += sep_char.join(vals_as_str[-int(max_num / 2) :]) + else: + ret += sep_char.join(vals_as_str) + ret += "]" + return ret + + +def multiindex_df_info( + df: pd.DataFrame, + *, + log_level: int = logging.INFO, + **list_to_str_kwargs: Dict[str, Any], +) -> str: + """ + Report information about a multi-index df. + """ + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + columns_level0 = df.columns.levels[0] + columns_level1 = df.columns.levels[1] + rows = df.index + ret = [] + ret.append( + f"shape={len(columns_level0)} x {len(columns_level1)} x {len(rows)}" + ) + ret.append( + "columns_level0=" + list_to_str(columns_level0, **list_to_str_kwargs) + ) + ret.append( + "columns_level1=" + list_to_str(columns_level1, **list_to_str_kwargs) + ) + ret.append("rows=" + list_to_str(rows, **list_to_str_kwargs)) + if isinstance(df.index, pd.DatetimeIndex): + # Display timestamp info. + start_timestamp = df.index.min() + end_timestamp = df.index.max() + frequency = df.index.freq + if frequency is None: + # Try to infer frequency. + frequency = pd.infer_freq(df.index) + ret.append(f"start_timestamp={start_timestamp}") + ret.append(f"end_timestamp={end_timestamp}") + ret.append(f"frequency={frequency}") + ret = "\n".join(ret) + _LOG.log(log_level, ret) + return ret + + +def subset_multiindex_df( + df: pd.DataFrame, + *, + # TODO(gp): Consider passing trim_df_kwargs as kwargs. + start_timestamp: Optional[pd.Timestamp] = None, + end_timestamp: Optional[pd.Timestamp] = None, + columns_level0: ColumnSet = None, + columns_level1: ColumnSet = None, + keep_order: bool = False, +) -> pd.DataFrame: + """ + Filter multi-index DataFrame by timestamp index and column levels. + + :param start_timestamp: see `trim_df()` + :param end_timestamp: see `trim_df()` + :param columns_level0: column names that corresponds to `df.columns.levels[0]` + - `None` means no filtering + :param columns_level1: column names that corresponds to `df.columns.levels[1]` + - `None` means no filtering + :param keep_order: see `_resolve_column_names()` + :return: filtered DataFrame + """ + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + # Filter by timestamp. + allow_empty = False + strictly_increasing = False + dassert_time_indexed_df(df, allow_empty, strictly_increasing) + df = trim_df( + df, + ts_col_name=None, + start_ts=start_timestamp, + end_ts=end_timestamp, + left_close=True, + right_close=True, + ) + # Filter level 0. + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + all_columns_level0 = df.columns.levels[0] + columns_level0 = _resolve_column_names( + columns_level0, all_columns_level0, keep_order=keep_order + ) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_is_subset(columns_level0, df.columns.levels[0]) + df = df[columns_level0] + # Filter level 1. + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + all_columns_level1 = df.columns.levels[1] + columns_level1 = _resolve_column_names( + columns_level1, all_columns_level1, keep_order=keep_order + ) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_is_subset(columns_level1, df.columns.levels[1]) + df = df.swaplevel(axis=1)[columns_level1].swaplevel(axis=1) + return df + + +# ############################################################################# + + +def compare_multiindex_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + subset_multiindex_df_kwargs: Optional[Dict[str, Any]] = None, + compare_dfs_kwargs: Optional[Dict[str, Any]] = None, +) -> pd.DataFrame: + """ + - Subset both multi-index dfs, if needed + - Compare dfs + + :param subset_multiindex_df: params for `subset_multiindex_df()` + :param compare_dfs_kwargs: params for `compare_dfs()` + :return: df with differences as values + """ + # Subset dfs. + if subset_multiindex_df_kwargs is None: + subset_multiindex_df_kwargs = {} + subset_df1 = subset_multiindex_df(df1, **subset_multiindex_df_kwargs) + subset_df2 = subset_multiindex_df(df2, **subset_multiindex_df_kwargs) + # Compare dfs. + if compare_dfs_kwargs is None: + compare_dfs_kwargs = {} + diff_df = compare_dfs(subset_df1, subset_df2, **compare_dfs_kwargs) + return diff_df + + +# ############################################################################# + + +def compute_duration_df( + tag_to_df: Dict[str, pd.DataFrame], + *, + intersect_dfs: bool = False, + valid_intersect: bool = False, +) -> Tuple[pd.DataFrame, Dict[str, pd.DataFrame]]: + """ + Compute a df with some statistics about the time index. + + E.g., + ``` + min_index max_index min_valid_index max_valid_index + tag1 + tag2 + ``` + + :param intersect_dfs: return a transformed dict with the intersection of + indices of all the dfs if True, otherwise return the input data as is + :param valid_intersect: intersect indices without NaNs if True, otherwise + intersect indices as is + :return: timestamp stats and updated dict of dfs, see `intersect_dfs` param + """ + hdbg.dassert_isinstance(tag_to_df, Dict) + # Create df and assign columns. + data_stats = pd.DataFrame() + min_col = "min_index" + max_col = "max_index" + min_valid_index_col = "min_valid_index" + max_valid_index_col = "max_valid_index" + # Collect timestamp info from all dfs. + for tag in tag_to_df.keys(): + # Check that the passed timestamp has timezone info. + hdateti.dassert_has_tz(tag_to_df[tag].index[0]) + dassert_index_is_datetime(tag_to_df[tag]) + # Compute timestamp stats. + data_stats.loc[tag, min_col] = tag_to_df[tag].index.min() + data_stats.loc[tag, max_col] = tag_to_df[tag].index.max() + data_stats.loc[tag, min_valid_index_col] = ( + tag_to_df[tag].dropna().index.min() + ) + data_stats.loc[tag, max_valid_index_col] = ( + tag_to_df[tag].dropna().index.max() + ) + # Make a copy so we do not modify the original data. + tag_to_df_updated = tag_to_df.copy() + # Change the initial dfs with intersection. + if intersect_dfs: + if valid_intersect: + # Assign start, end date column according to specs. + min_col = min_valid_index_col + max_col = max_valid_index_col + # The start of the intersection will be the max value amongt all start dates. + intersection_start_date = data_stats[min_col].max() + # The end of the intersection will be the min value amongt all end dates. + intersection_end_date = data_stats[max_col].min() + for tag in tag_to_df_updated.keys(): + df = trim_df( + tag_to_df_updated[tag], + ts_col_name=None, + start_ts=intersection_start_date, + end_ts=intersection_end_date, + left_close=True, + right_close=True, + ) + tag_to_df_updated[tag] = df + return data_stats, tag_to_df_updated + + +# ############################################################################# + + +# TODO(gp): Remove this since it's in Google API. +def to_gsheet( + df: pd.DataFrame, + gsheet_name: str, + gsheet_sheet_name: str, + overwrite: bool, +) -> None: + """ + Save a dataframe to a Google sheet. + + :param df: the dataframe to save to a Google sheet + :param gsheet_name: the name of the Google sheet to save the df + into; the Google sheet with this name must already exist on the + Google Drive + :param gsheet_sheet_name: the name of the sheet in the Google sheet + :param overwrite: if True, the contents of the sheet are erased + before saving the dataframe into it; if False, the dataframe is + appended to the contents of the sheet + """ + import gspread_pandas + + spread = gspread_pandas.Spread( + gsheet_name, sheet=gsheet_sheet_name, create_sheet=True + ) + if overwrite: + spread.clear_sheet() + else: + sheet_contents = spread.sheet_to_df(index=None) + combined_df = pd.concat([sheet_contents, df]) + df = combined_df.drop_duplicates() + spread.df_to_sheet(df, index=False) + + +# ############################################################################# +# _SummaryRow +# ############################################################################# + + +@dataclasses.dataclass +class _SummaryRow: + """ + Output of a check corresponding to a row of the summary df. + """ + + # Description of the check. + description: str + # Description of the output. + comment: str + # Whether the check was successful or not. + is_ok: bool + + +# ############################################################################# +# CheckSummary +# ############################################################################# + + +class CheckSummary: + """ + Collect and report the results of several checks performed in a notebook. + """ + + def __init__(self, *, title: Optional[str] = ""): + self.title = title + # + self._array: List[_SummaryRow] = [] + + def add(self, description: str, comment: str, is_ok: bool) -> None: + """ + Add the result of a single check. + """ + summary_row = _SummaryRow(description, comment, is_ok) + self._array.append(summary_row) + + def is_ok(self) -> bool: + """ + Compute whether all the checks were succesfull or not. + """ + is_ok = all(sr.is_ok for sr in self._array) + return is_ok + + def report_outcome( + self, *, notebook_output: bool = True, assert_on_error: bool = True + ) -> Optional[str]: + """ + Report the result of the entire check. + + :param notebook_output: report the result of the checks for a + notebook or as a string + :param assert_on_error: assert if one check failed + """ + df = pd.DataFrame(self._array) + + # Compute result as a string. + result = [] + if self.title: + result.append("# " + self.title) + result.append(str(df)) + is_ok = self.is_ok() + result.append(f"is_ok={is_ok}") + result = "\n".join(result) + # Display on a notebook, if needed. + if notebook_output: + if self.title: + print(self.title) + + # Convert DataFrame to HTML with colored rows based on 'is_ok' column. + def _color_rows(row: bool) -> str: + """ + Apply red/green color based on boolean value in `row["is_ok"]`. + """ + is_ok = row["is_ok"] + color = "#FA6B84" if not is_ok else "#ACF3AE" + return [f"background-color: {color}"] * len(row) + + df_html = df.style.apply(_color_rows, axis=1) + from IPython.display import display + + display(df_html) + print(f"is_ok={is_ok}") + # Assert if at least one of the check failed. + if not is_ok and assert_on_error: + raise ValueError("The checks have failed:\n" + result) + # For notebooks, we want to return None, since the outcome was + # already displayed. + if notebook_output: + result = None + return result + + +# ############################################################################# + + +def add_end_download_timestamp( + obj: Union[pd.DataFrame, Dict], *, timezone: str = "UTC" +) -> Union[pd.DataFrame, Dict]: + """ + Add a column 'end_download_timestamp' to the DataFrame with the current + time. + + :param obj: The DataFrame to which the column will be added. + :param timezone: The timezone for the current time. Defaults to + 'UTC'. + """ + # Get current timestamp. + current_ts = hdateti.get_current_time(timezone) + # Set value of end_download_timestamp. + obj["end_download_timestamp"] = current_ts + return obj + + +def filter_df( + df: pd.DataFrame, + col_name: str, + value: Any, + *, + invert: bool = False, + check_value: bool = True, + print_info: bool = True, +) -> pd.DataFrame: + hdbg.dassert_in(col_name, df.columns) + if isinstance(value, list): + mask = df[col_name].isin(value) + else: + if check_value: + hdbg.dassert_in(value, df[col_name].unique()) + mask = df[col_name] == value + if invert: + mask = ~mask + if print_info: + _LOG.info("selected=%s", hprint.perc(mask.sum(), df.shape[0])) + return df[mask] + + +def to_perc(vals: Union[List, pd.Series], **perc_kwargs: Dict[str, Any]) -> str: + """ + Report percentage of True for a list / series. + """ + if isinstance(vals, list): + vals = pd.Series(vals) + ret = hprint.perc(vals.sum(), len(vals), **perc_kwargs) + return ret diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py new file mode 100644 index 000000000..54ca04c93 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py @@ -0,0 +1,628 @@ +""" +Statistical analysis and ML functions for pandas DataFrames. + +Import as: + +import helpers.hpandas_analysis as hpananal +""" + +import datetime +import logging +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hprint as hprint + +# Lazy imports to avoid slow module loading. +# When a type checker analyzes the code: it pretends the imports exist, so you +# can use those names in type annotations without “unknown name” errors. +# These heavy dependencies are only imported when functions are actually called. +if TYPE_CHECKING: + import matplotlib as mpl + +_LOG = logging.getLogger(__name__) + + +def _get_num_pcs_to_plot(num_pcs_to_plot: int, max_pcs: int) -> int: + """ + Get the number of principal components to plot. + + :param num_pcs_to_plot: requested number of PCs to plot, use -1 for + all + :param max_pcs: maximum number of available principal components + :return: validated number of PCs to plot + """ + if num_pcs_to_plot == -1: + num_pcs_to_plot = max_pcs + hdbg.dassert_lte(0, num_pcs_to_plot) + hdbg.dassert_lte(num_pcs_to_plot, max_pcs) + return num_pcs_to_plot + + +def rolling_corr_over_time( + df: pd.DataFrame, com: float, nan_mode: str +) -> pd.DataFrame: + """ + Compute rolling correlation over time. + + :return: corr_df is a multi-index df storing correlation matrices + with labels + """ + import helpers.hpandas_dassert as hpandass + + hpandass.dassert_strictly_increasing_index(df) + # Handle NaNs based on mode. + if nan_mode == "drop": + df = df.dropna(how="any") + elif nan_mode == "fill_with_zero": + df = df.fillna(0.0) + elif nan_mode == "abort": + num_nans = np.isnan(df).sum().sum() + if num_nans > 0: + raise ValueError("df has %d nans\n%s" % (num_nans, df)) + else: + raise ValueError("Invalid nan_mode='%s'" % nan_mode) + corr_df = df.ewm(com=com, min_periods=3 * com).corr() + return corr_df + + +def _get_eigvals_eigvecs( + df: pd.DataFrame, dt: datetime.date, sort_eigvals: bool +) -> Tuple[np.array, np.array]: + """ + Compute eigenvalues and eigenvectors for a correlation matrix at a specific + date. + + :param df: correlation matrix dataframe with multiindex (date, + columns) + :param dt: date for which to compute eigenvalues/eigenvectors + :param sort_eigvals: whether to sort eigenvalues in descending order + :return: tuple of (eigenvalues array, eigenvectors array) + """ + hdbg.dassert_isinstance(dt, datetime.date) + df_tmp = df.loc[dt].copy() + # Compute rolling eigenvalues and eigenvectors. + # TODO(gp): Count and report inf and nans as warning. + df_tmp.replace([np.inf, -np.inf], np.nan, inplace=True) + df_tmp.fillna(0.0, inplace=True) + eigval, eigvec = np.linalg.eigh(df_tmp) + # Sort eigenvalues, if needed. + if not (sorted(eigval) == eigval).all(): + _LOG.debug("eigvals not sorted: %s", eigval) + if sort_eigvals: + _LOG.debug( + "Before sorting:\neigval=\n%s\neigvec=\n%s", eigval, eigvec + ) + _LOG.debug("eigvals: %s", eigval) + idx = eigval.argsort()[::-1] + eigval = eigval[idx] + eigvec = eigvec[:, idx] + _LOG.debug( + "After sorting:\neigval=\n%s\neigvec=\n%s", eigval, eigvec + ) + # + if (eigval == 0).all(): + eigvec = np.nan * eigvec + return eigval, eigvec + + +def rolling_pca_over_time( + df: pd.DataFrame, com: float, nan_mode: str, sort_eigvals: bool = True +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + """ + Compute rolling PCAs over time. + + :param sort_eigvals: sort the eigenvalues in descending orders + :return: + - eigval_df stores eigenvalues for the different components indexed by + timestamps + - eigvec_df stores eigenvectors as multiindex df + """ + import tqdm.autonotebook as tauton + + import helpers.hpandas_dassert as hpandass + + # Compute rolling correlation. + corr_df = rolling_corr_over_time(df, com, nan_mode) + # Compute eigvalues and eigenvectors. + timestamps = corr_df.index.get_level_values(0).unique() + eigval = np.zeros((timestamps.shape[0], df.shape[1])) + eigvec = np.zeros((timestamps.shape[0], df.shape[1], df.shape[1])) + for i, dt in tauton.tqdm( + enumerate(timestamps), + total=timestamps.shape[0], + desc="Computing rolling PCA", + ): + eigval[i], eigvec[i] = _get_eigvals_eigvecs(corr_df, dt, sort_eigvals) + # Package results. + eigval_df = pd.DataFrame(eigval, index=timestamps) + hdbg.dassert_eq(eigval_df.shape[0], len(timestamps)) + hpandass.dassert_strictly_increasing_index(eigval_df) + # Normalize by sum. + # TODO(gp): Move this up. + eigval_df = eigval_df.multiply(1 / eigval_df.sum(axis=1), axis="index") + # + # pylint ref: github.com/PyCQA/pylint/issues/3139 + eigvec = eigvec.reshape((-1, eigvec.shape[-1])) # pylint: disable=unsubscriptable-object + idx = pd.MultiIndex.from_product( + [timestamps, df.columns], names=["datetime", None] + ) + eigvec_df = pd.DataFrame(eigvec, index=idx, columns=range(df.shape[1])) # pylint: disable=unsubscriptable-object + hdbg.dassert_eq( + len(eigvec_df.index.get_level_values(0).unique()), len(timestamps) + ) + return corr_df, eigval_df, eigvec_df + + +def plot_pca_over_time( + eigval_df: pd.DataFrame, + eigvec_df: pd.DataFrame, + num_pcs_to_plot: int = 0, + num_cols: int = 2, +) -> None: + """ + Similar to plot_pca_analysis() but over time. + """ + import helpers.hmatplotlib as hmatplo + + # Plot eigenvalues. + eigval_df.plot(title="Eigenvalues over time", ylim=(0, 1)) + # Plot cumulative variance. + eigval_df.cumsum(axis=1).plot( + title="Fraction of variance explained by top PCs over time", ylim=(0, 1) + ) + # Plot eigenvalues. + max_pcs = eigvec_df.shape[1] + num_pcs_to_plot = _get_num_pcs_to_plot(num_pcs_to_plot, max_pcs) + _LOG.info("num_pcs_to_plot=%s", num_pcs_to_plot) + if num_pcs_to_plot > 0: + _, axes = hmatplo.get_multiple_plots( + num_pcs_to_plot, + num_cols=num_cols, + y_scale=4, + sharex=True, + sharey=True, + ) + for i in range(num_pcs_to_plot): + eigvec_df[i].unstack(1).plot( + ax=axes[i], ylim=(-1, 1), title="PC%s" % i + ) + + +def plot_time_distributions( + dts: List[Union[datetime.datetime, pd.Timestamp]], + mode: str, + density: bool = True, +) -> "mpl.axes.Axes": + """ + Compute distribution for an array of timestamps `dts`. + + - mode: see below + """ + hdbg.dassert_type_in(dts[0], (datetime.datetime, pd.Timestamp)) + hdbg.dassert_in( + mode, + ( + "time_of_the_day", + "weekday", + "minute_of_the_hour", + "day_of_the_month", + "month_of_the_year", + "year", + ), + ) + if mode == "time_of_the_day": + # Convert in minutes from the beginning of the day. + data = [dt.time() for dt in dts] + data = [t.hour * 60 + t.minute for t in data] + # 1 hour bucket. + step = 60 + bins = np.arange(0, 24 * 60 + step, step) + vals = pd.cut( + data, + bins=bins, + include_lowest=True, + right=False, + retbins=False, + labels=False, + ) + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = [ + "%02d:%02d" % (bins[k] / 60, bins[k] % 60) for k in count.index + ] + elif mode == "weekday": + data = [dt.date().weekday() for dt in dts] + bins = np.arange(0, 7 + 1) + vals = pd.cut( + data, + bins=bins, + include_lowest=True, + right=False, + retbins=False, + labels=False, + ) + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = "Mon Tue Wed Thu Fri Sat Sun".split() + elif mode == "minute_of_the_hour": + vals = [dt.time().minute for dt in dts] + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = list(map(str, list(range(1, 60 + 1)))) + elif mode == "day_of_the_month": + vals = [dt.date().day for dt in dts] + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = list(map(str, list(range(1, 31 + 1)))) + elif mode == "month_of_the_year": + vals = [dt.date().month for dt in dts] + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec".split() + elif mode == "year": + vals = [dt.date().year for dt in dts] + # Count. + count = pd.Series(vals).value_counts(sort=False) + # Compute the labels. + yticks = pd.Series(vals).unique().tolist() + else: + raise ValueError("Invalid mode='%s'" % mode) + hdbg.dassert_eq(count.sum(), len(dts)) + # + if density: + count /= count.sum() + label = "num points=%s" % len(dts) + ax = count.plot(kind="bar", label=label, figsize=(20, 7)) + ax.set_xticklabels(yticks) + if density: + ax.set_ylabel("Probability") + else: + ax.set_ylabel("Count") + ax.legend(loc="best") + return ax + + +# TODO(gp): It can't accept ax. Remove this limitation. +def jointplot( + df: pd.DataFrame, + predicted_var: str, + predictor_var: str, + height: Optional[int] = None, + *args: Any, + **kwargs: Any, +) -> None: + """ + Perform a scatterplot of two columns of a dataframe using + seaborn.jointplot(). + + :param df: dataframe + :param predicted_var: y-var + :param predictor_var: x-var :param args, kwargs: arguments passed to + seaborn.jointplot() + """ + import seaborn as sns + + hdbg.dassert_in(predicted_var, df.columns) + hdbg.dassert_in(predictor_var, df.columns) + df = df[[predicted_var, predictor_var]] + # Remove non-finite values. + # TODO(gp): Use explore.dropna(). + mask = np.all(np.isfinite(df.values), axis=1) + df = df[mask] + # Plot. + sns.jointplot( + x=predictor_var, y=predicted_var, data=df, height=height, *args, **kwargs + ) + + +def _preprocess_regression( + df: pd.DataFrame, + intercept: bool, + predicted_var: str, + predicted_var_delay: int, + predictor_vars: Union[str, List[str]], + predictor_vars_delay: int, +) -> Optional[Tuple[pd.DataFrame, List[str], List[str]]]: + """ + Preprocess data in dataframe form in order to perform a regression. + """ + # Sanity check vars. + hdbg.dassert_type_is(df, pd.DataFrame) + hdbg.dassert_lte(1, df.shape[0]) + if isinstance(predictor_vars, str): + predictor_vars = [predictor_vars] + hdbg.dassert_type_is(predictor_vars, list) + # hdbg.dassert_type_is(predicted_var, str) + hdbg.dassert_not_in(predicted_var, predictor_vars) + if not predictor_vars: + # No predictors. + _LOG.warning("No predictor vars: skipping") + return None + # + col_names = [predicted_var] + predictor_vars + hdbg.dassert_is_subset(col_names, df.columns) + df = df[col_names].copy() + num_rows = df.shape[0] + # Shift. + if predicted_var_delay != 0: + df[predicted_var] = df[predicted_var].shift(predicted_var_delay) + _LOG.warning("Shifting predicted_var=%s", predicted_var_delay) + if predictor_vars_delay != 0: + df[predictor_vars] = df[predictor_vars].shift(predictor_vars_delay) + _LOG.warning("Shifting predictor_vars=%s", predictor_vars_delay) + # Remove non-finite values. + # TODO(gp): Use the function. + df.dropna(how="all", inplace=True) + num_rows_after_drop_nan_all = df.shape[0] + if num_rows_after_drop_nan_all != num_rows: + _LOG.info( + "Removed %s rows with all nans", + hprint.perc(num_rows - num_rows_after_drop_nan_all, num_rows), + ) + # + df.dropna(how="any", inplace=True) + num_rows_after_drop_nan_any = df.shape[0] + if num_rows_after_drop_nan_any != num_rows_after_drop_nan_all: + _LOG.warning( + "Removed %s rows with any nans", + hprint.perc(num_rows - num_rows_after_drop_nan_any, num_rows), + ) + # Prepare data. + if intercept: + if "const" not in df.columns: + df.insert(0, "const", 1.0) + predictor_vars = ["const"] + predictor_vars[:] + param_names = predictor_vars[:] + hdbg.dassert(np.all(np.isfinite(df[predicted_var].values))) + hdbg.dassert( + np.all(np.isfinite(df[predictor_vars].values)), + msg="predictor_vars=%s" % predictor_vars, + ) + # Perform regression. + if df.shape[0] < 1: + return None + return df, param_names, predictor_vars + + +def ols_regress( + df: pd.DataFrame, + predicted_var: str, + predictor_vars: str, + intercept: bool, + print_model_stats: bool = True, + tsplot: bool = False, + tsplot_figsize: Optional[Any] = None, + jointplot_: bool = True, + jointplot_height: Optional[Any] = None, + predicted_var_delay: int = 0, + predictor_vars_delay: int = 0, + max_nrows: float = 1e4, +) -> Optional[Dict[str, Any]]: + """ + Perform OLS on columns of a dataframe. + + :param df: dataframe + :param predicted_var: y variable + :param predictor_vars: x variables + :param intercept: + :param print_model_stats: print or return the model stats + :param tsplot: plot a time-series if possible + :param tsplot_figsize: + :param jointplot_: plot a scatter plot + :param jointplot_height: + :param predicted_var_delay: + :param predictor_vars_delay: + :param max_nrows: do not plot if there are too many rows, since + notebook can be slow or hang + :return: + """ + import statsmodels.api + + import helpers.hmatplotlib as hmatplo + + obj = _preprocess_regression( + df, + intercept, + predicted_var, + predicted_var_delay, + predictor_vars, + predictor_vars_delay, + ) + if obj is None: + return None + df, param_names, predictor_vars = obj + hdbg.dassert_lte(1, df.shape[0]) + model = statsmodels.api.OLS( + df[predicted_var], df[predictor_vars], hasconst=intercept + ).fit() + regr_res = { + "param_names": param_names, + "coeffs": model.params, + "pvals": model.pvalues, + # pylint: disable=no-member + "rsquared": model.rsquared, + "adj_rsquared": model.rsquared_adj, + "model": model, + } + if print_model_stats: + # pylint: disable=no-member + _LOG.info(model.summary().as_text()) + if tsplot or jointplot_: + if max_nrows is not None and df.shape[0] > max_nrows: + _LOG.warning( + "Skipping plots since df has %d > %d rows", + df.shape[0], + max_nrows, + ) + else: + predictor_vars = [p for p in predictor_vars if p != "const"] + if len(predictor_vars) == 1: + if tsplot: + # Plot the data over time. + if tsplot_figsize is None: + tsplot_figsize = hmatplo.FIG_SIZE + df[[predicted_var, predictor_vars[0]]].plot( + figsize=tsplot_figsize + ) + if jointplot_: + # Perform scatter plot. + if jointplot_height is None: + jointplot_height = hmatplo.FIG_SIZE[1] + jointplot( + df, + predicted_var, + predictor_vars[0], + height=jointplot_height, + ) + else: + _LOG.warning( + "Skipping plots since there are too many predictors" + ) + if print_model_stats: + return None + return regr_res + + +def ols_regress_series( + srs1: pd.Series, + srs2: pd.Series, + intercept: bool, + srs1_name: Optional[Any] = None, + srs2_name: Optional[Any] = None, + convert_to_dates: bool = False, + **kwargs: Any, +) -> Dict[str, Any]: + """ + Regress two series against each other. + + Wrapper around regress() to regress series against each other. + """ + # Validate inputs are Series. + hdbg.dassert_isinstance(srs1, pd.Series) + hdbg.dassert_isinstance(srs2, pd.Series) + srs1 = srs1.copy() + srs2 = srs2.copy() + # + if convert_to_dates: + _LOG.warning("Sampling to date") + srs1.index = [pd.to_datetime(dt).date() for dt in srs1.index] + srs2.index = [pd.to_datetime(dt).date() for dt in srs2.index] + # + hdbg.dassert_array_has_same_type_element(srs1, srs2, only_first_elem=True) + # Check common indices. + common_idx = srs1.index.intersection(srs2.index) + hdbg.dassert_lte(1, len(common_idx)) + # Merge series into a dataframe. + if srs1_name is None: + srs1_name = srs1.name if srs1.name is not None else "" + if srs2_name is None: + srs2_name = srs2.name if srs2.name is not None else "" + if srs1_name == srs2_name: + srs1_name += "_1" + srs2_name += "_2" + _LOG.warning("Series have the same name: adding suffix to distinguish") + df = pd.concat([srs1, srs2], axis=1, join="outer") + df.columns = [srs1_name, srs2_name] + # + val = ols_regress(df, srs1_name, srs2_name, intercept=intercept, **kwargs) + val = cast(Dict[str, Any], val) + return val + + +def robust_regression( + df: pd.DataFrame, + predicted_var: str, + predictor_vars: str, + intercept: bool, + jointplot_: bool = True, + jointplot_figsize: Optional[Any] = None, + predicted_var_delay: int = 0, + predictor_vars_delay: int = 0, +) -> None: + """ + Perform robust regression using RANSAC algorithm to handle outliers. + + :param df: dataframe with data + :param predicted_var: dependent variable column name + :param predictor_vars: independent variable column name(s) + :param intercept: whether to include intercept in regression + :param jointplot_: whether to create a scatter plot + :param jointplot_figsize: size of the joint plot + :param predicted_var_delay: shift predicted variable by this many + periods + :param predictor_vars_delay: shift predictor variables by this many + periods + """ + import matplotlib.pyplot as plt + import sklearn.linear_model + + import helpers.hmatplotlib as hmatplo + + obj = _preprocess_regression( + df, + intercept, + predicted_var, + predicted_var_delay, + predictor_vars, + predictor_vars_delay, + ) + if obj is None: + return + # From http://scikit-learn.org/stable/auto_examples/linear_model/ + # plot_robust_fit.html#sphx-glr-auto-examples-linear-model-plot-robust-fit-py + # TODO(gp): Add also TheilSenRegressor and HuberRegressor. + + hdbg.dassert_eq(len(predictor_vars), 1) + y = df[predicted_var] + X = df[predictor_vars] + # Fit line using all data. + lr = sklearn.linear_model.LinearRegression() + lr.fit(X, y) + # Robustly fit linear model with RANSAC algorithm. + ransac = sklearn.linear_model.RANSACRegressor() + ransac.fit(X, y) + inlier_mask = ransac.inlier_mask_ + outlier_mask = np.logical_not(inlier_mask) + # Predict data of estimated models. + line_X = np.linspace(X.min().values[0], X.max().values[0], num=100)[ + :, np.newaxis + ] + line_y = lr.predict(line_X) + line_y_ransac = ransac.predict(line_X) + # Compare estimated coefficients + _LOG.info("Estimated coef for linear regression=%s", lr.coef_) + _LOG.info("Estimated coef for RANSAC=%s", ransac.estimator_.coef_) + if jointplot_: + if jointplot_figsize is None: + jointplot_figsize = hmatplo.FIG_SIZE + plt.figure(figsize=jointplot_figsize) + plt.scatter( + X[inlier_mask], + y[inlier_mask], + color="red", + marker="o", + label="Inliers", + ) + plt.scatter( + X[outlier_mask], + y[outlier_mask], + color="blue", + marker="o", + label="Outliers", + ) + plt.plot(line_X, line_y, color="green", linewidth=2, label="OLS") + plt.plot( + line_X, line_y_ransac, color="black", linewidth=3, label="RANSAC" + ) + plt.legend(loc="best") + plt.xlabel(", ".join(predictor_vars)) + plt.ylabel(predicted_var) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py new file mode 100644 index 000000000..0604afd67 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py @@ -0,0 +1,111 @@ +""" +Import as: + +import helpers.hpandas_check_summary as hpachsum +""" + +import dataclasses +from typing import List, Optional + +import pandas as pd + +import helpers.hlogging as hloggin + +_LOG = hloggin.getLogger(__name__) + + +# ############################################################################# +# _SummaryRow +# ############################################################################# + + +@dataclasses.dataclass +class _SummaryRow: + """ + Output of a check corresponding to a row of the summary df. + """ + + # Description of the check. + description: str + # Description of the output. + comment: str + # Whether the check was successful or not. + is_ok: bool + + +# ############################################################################# +# CheckSummary +# ############################################################################# + + +class CheckSummary: + """ + Collect and report the results of several checks performed in a notebook. + """ + + def __init__(self, *, title: Optional[str] = ""): + self.title = title + # Initialize the array for storing summary rows. + self._array: List[_SummaryRow] = [] + + def add(self, description: str, comment: str, is_ok: bool) -> None: + """ + Add the result of a single check. + """ + summary_row = _SummaryRow(description, comment, is_ok) + self._array.append(summary_row) + + def is_ok(self) -> bool: + """ + Compute whether all the checks were successful or not. + """ + is_ok = all(sr.is_ok for sr in self._array) + return is_ok + + def report_outcome( + self, *, notebook_output: bool = True, assert_on_error: bool = True + ) -> Optional[str]: + """ + Report the result of the entire check. + + :param notebook_output: report the result of the checks for a + notebook or as a string + :param assert_on_error: assert if one check failed + """ + df = pd.DataFrame(self._array) + + # Compute result as a string. + result = [] + if self.title: + result.append("# " + self.title) + result.append(str(df)) + is_ok = self.is_ok() + result.append(f"is_ok={is_ok}") + result = "\n".join(result) + # Display on a notebook, if needed. + if notebook_output: + if self.title: + print(self.title) + + # Convert DataFrame to HTML with colored rows based on 'is_ok' column. + def _color_rows(row: bool) -> str: + """ + Apply red/green color based on boolean value in `row["is_ok"]`. + """ + is_ok = row["is_ok"] + color = "#FA6B84" if not is_ok else "#ACF3AE" + return [f"background-color: {color}"] * len(row) + + df_html = df.style.apply(_color_rows, axis=1) + from IPython.display import display + + display(df_html) + print(f"is_ok={is_ok}") + # Assert if at least one of the check failed. + if not is_ok and assert_on_error: + raise ValueError("The checks have failed:\n" + result) + # For notebooks, we want to return None, since the outcome was + # already displayed. + if notebook_output: + result = None + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py new file mode 100644 index 000000000..c421095a3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py @@ -0,0 +1,282 @@ +""" +Import as: + +import helpers.hpandas_clean as hpanclea +""" + +from typing import Any, List, Optional, Union + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hpandas_utils as hpanutil +import helpers.hprint as hprint + +_LOG = hloggin.getLogger(__name__) + + +def drop_duplicates( + data: Union[pd.Series, pd.DataFrame], + use_index: bool, + column_subset: Optional[List[str]] = None, + *args: Any, + **kwargs: Any, +) -> Union[pd.Series, pd.DataFrame]: + """ + Wrap `pandas.drop_duplicates()` with additional index handling. + + See the official docs: + - https://pandas.pydata.org/docs/reference/api/pandas.Series.drop_duplicates.html + - https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html + + :param data: input series or dataframe + :param use_index: whether to consider index values when identifying duplicates + - if `True`, use index values together with a column subset for + identifying duplicates + - if `False`, duplicated rows are with the exact same values in a subset + and different indices + :param column_subset: a list of columns to consider for identifying duplicates + :param args: additional arguments passed to pandas.drop_duplicates() + :param kwargs: additional keyword arguments passed to pandas.drop_duplicates() + :return: data without duplicates + """ + _LOG.debug(hprint.to_str("use_index column_subset args kwargs")) + num_rows_before = data.shape[0] + # Get all columns list for subset if no subset is passed. + if column_subset is None: + column_subset = data.columns.tolist() + else: + hdbg.dassert_lte(1, len(column_subset), "Columns subset cannot be empty") + if use_index: + # Add dummy index column to use it for duplicates detection. + index_col_name = "use_index_col" + hdbg.dassert_not_in(index_col_name, data.columns.tolist()) + column_subset.insert(0, index_col_name) + data[index_col_name] = data.index + # Drop duplicates based on the column subset. + data_no_dups = data.drop_duplicates(subset=column_subset, *args, **kwargs) + # Clean up the temporary index column if it was added. + if use_index: + # Remove dummy index column. + data_no_dups = data_no_dups.drop([index_col_name], axis=1) + # Report the change. + num_rows_after = data_no_dups.shape[0] + if num_rows_before != num_rows_after: + _LOG.debug( + "Removed %s rows", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + return data_no_dups + + +def dropna( + df: pd.DataFrame, + *args: Any, + drop_infs: bool = False, + report_stats: bool = False, + **kwargs: Any, +) -> pd.DataFrame: + """ + Create a wrapper around pd.dropna() reporting information about the removed + rows. + + :param df: dataframe to process + :param drop_infs: if +/- np.inf should be considered as nans + :param report_stats: if processing stats should be reported + :return: dataframe with nans dropped + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + num_rows_before = df.shape[0] + if drop_infs: + df = df.replace([np.inf, -np.inf], np.nan) + df = df.dropna(*args, **kwargs) + if report_stats: + num_rows_after = df.shape[0] + pct_removed = hprint.perc( + num_rows_before - num_rows_after, num_rows_before + ) + _LOG.info("removed rows with nans: %s", pct_removed) + return df + + +def drop_axis_with_all_nans( + df: pd.DataFrame, + drop_rows: bool = True, + drop_columns: bool = False, + drop_infs: bool = False, + report_stats: bool = False, +) -> pd.DataFrame: + """ + Remove columns and rows not containing information (e.g., with only nans). + + The operation is not performed in place and the resulting df is + returned. Assume that the index is timestamps. + + :param df: dataframe to process + :param drop_rows: remove rows with only nans + :param drop_columns: remove columns with only nans + :param drop_infs: remove also +/- np.inf + :param report_stats: report the stats of the operations + :return: dataframe with specific nan axis dropped + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + if drop_infs: + df = df.replace([np.inf, -np.inf], np.nan) + if drop_columns: + # Remove columns with all nans, if any. + cols_before = df.columns[:] + df = df.dropna(axis=1, how="all") + if report_stats: + # Report results. + cols_after = df.columns[:] + removed_cols = set(cols_before).difference(set(cols_after)) + pct_removed = hprint.perc( + len(cols_before) - len(cols_after), len(cols_after) + ) + _LOG.info( + "removed cols with all nans: %s %s", + pct_removed, + hprint.list_to_str(removed_cols), + ) + if drop_rows: + # Remove rows with all nans, if any. + rows_before = df.index[:] + df = df.dropna(axis=0, how="all") + if report_stats: + # Report results. + rows_after = df.index[:] + removed_rows = set(rows_before).difference(set(rows_after)) + if len(rows_before) == len(rows_after): + # Nothing was removed. + min_ts = max_ts = None + else: + # TODO(gp): Report as intervals of dates. + min_ts = min(removed_rows) + max_ts = max(removed_rows) + pct_removed = hprint.perc( + len(rows_before) - len(rows_after), len(rows_after) + ) + _LOG.info( + "removed rows with all nans: %s [%s, %s]", + pct_removed, + min_ts, + max_ts, + ) + return df + + +def drop_duplicated( + df: pd.DataFrame, *, subset: Optional[List[str]] = None +) -> pd.DataFrame: + """ + Implement `df.duplicated` but considering also the index and ignoring nans. + """ + _LOG.debug("before df=\n%s", hpanutil.df_to_str(df)) + # Move the index to the df. + old_index_name = df.index.name + new_index_name = "_index.tmp" + hdbg.dassert_not_in(new_index_name, df.columns) + df.index.name = new_index_name + df.reset_index(drop=False, inplace=True) + # Remove duplicates by ignoring nans. + if subset is not None: + hdbg.dassert_isinstance(subset, list) + subset = [new_index_name] + subset + duplicated = df.fillna(0.0).duplicated(subset=subset, keep="first") + # Report the result of the operation. + if duplicated.sum() > 0: + num_rows_before = df.shape[0] + _LOG.debug( + "Removing duplicates df=\n%s", + hpanutil.df_to_str(df.loc[duplicated]), + ) + df = df.loc[~duplicated] + num_rows_after = df.shape[0] + _LOG.warning( + "Removed repeated rows num_rows=%s", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + _LOG.debug("after removing duplicates df=\n%s", hpanutil.df_to_str(df)) + # Set the index back. + df.set_index(new_index_name, inplace=True) + df.index.name = old_index_name + _LOG.debug("after df=\n%s", hpanutil.df_to_str(df)) + return df + + +def impute_nans(df: pd.DataFrame, column: str, value: Any) -> pd.DataFrame: + """ + Assign `value` to the `column` of `df` where the value is "nan". + + :param df: The DataFrame to modify. + :param column: The column in which to replace "nan" values. + :param value: The value to assign to "nan" entries. + :return: The DataFrame with the "nan" values assigned. + """ + df[column] = df[column].astype(str) + mask = df[column] == "nan" + # Assign the new value or keep the original value. + df[column] = np.where(mask, value, df[column]) + # There should be no more nans. + mask = df[column] == "nan" + hdbg.dassert_eq(mask.sum(), 0) + # + return df + + +# ############################################################################# + + +def remove_outliers( + df: pd.DataFrame, + lower_quantile: float, + *, + column_set: hpanutil.ColumnSet, + # TODO(Grisha): the params are not used. + fill_value: float = np.nan, + mode: str = "remove_outliers", + axis: Any = 0, + upper_quantile: Optional[float] = None, +) -> pd.DataFrame: + """ + Remove outliers from a dataframe based on quantile thresholds. + + :param df: input dataframe + :param lower_quantile: lower quantile threshold (0.0 to 1.0) + :param column_set: columns to apply outlier removal to + :param fill_value: value to use for filling outliers (currently unused) + :param mode: outlier removal mode (currently unused) + :param axis: axis along which to compute quantiles (0 for columns, 1 for rows) + :param upper_quantile: upper quantile threshold, defaults to 1 - lower_quantile + :return: dataframe with outliers removed based on quantile thresholds + """ + hdbg.dassert_eq(len(df.shape), 2, "Multi-index dfs not supported") + # Validate quantile parameters. + hdbg.dassert_lte(0.0, lower_quantile) + if upper_quantile is None: + upper_quantile = 1.0 - lower_quantile + hdbg.dassert_lte(lower_quantile, upper_quantile) + hdbg.dassert_lte(upper_quantile, 1.0) + # Create a copy of the dataframe to avoid modifying the original. + df = df.copy() + if axis == 0: + all_columns = df.columns + columns = hpanutil.resolve_column_names(column_set, all_columns) + hdbg.dassert_is_subset(columns, df.columns) + for column in all_columns: + if column in columns: + df[column] = df[column].quantile( + [lower_quantile, upper_quantile] + ) + elif axis == 1: + all_rows = df.rows + rows = hpanutil.resolve_column_names(column_set, all_rows) + hdbg.dassert_is_subset(rows, df.rows) + for row in all_rows: + if row in rows: + df[row] = df[row].quantile([lower_quantile, upper_quantile]) + else: + raise ValueError(f"Invalid axis='{axis}'") + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py new file mode 100644 index 000000000..b40308daa --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py @@ -0,0 +1,289 @@ +""" +Import as: + +import helpers.hpandas_compare as hpancomp +""" + +import logging +from typing import List + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hpandas_dassert as hpandass +import helpers.hpandas_utils as hpanutil + +_LOG = hloggin.getLogger(__name__) + +RowsValues = List[List[str]] + + +def compare_dataframe_rows(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: + """ + Compare contents of rows with same indices. + + Index is set to default sequential integer values because compare is + sensitive to multi index (probably because new multi indexes are created + for each difference in `compare`). Multi index columns are regular columns now. + Excess columns are removed so both dataframes are always same shape because + `compare` expects identical dataframes (same number of rows, columns, etc.). + + :param df1: first dataframe for comparison + :param df2: second dataframe for comparison + :return: dataframe with data with same indices and different contents + """ + # Get rows on which the two dataframe indices match. + idx_intersection = df1.index.intersection(df2.index) + # Remove excess columns and reset indexes. + trimmed_second = df2.loc[idx_intersection].reset_index() + trimmed_first = df1.loc[idx_intersection].reset_index() + # Get difference between second and first dataframe. + data_difference = trimmed_second.compare(trimmed_first) + # Update data difference with original dataframe index names + # for easier identification. + index_names = tuple(df2.index.names) + # If index or multi index is named, it will be visible in data difference. + if index_names != (None,): + for index in data_difference.index: + for column in index_names: + data_difference.loc[index, column] = trimmed_second.loc[index][ + column + ] + data_difference = data_difference.convert_dtypes() + return data_difference + + +def compare_nans_in_dataframes( + df1: pd.DataFrame, df2: pd.DataFrame +) -> pd.DataFrame: + """ + Compare equality of DataFrames in terms of NaNs. + + For example: + - `5 vs np.nan` is a mismatch + - `np.nan vs 5` is a mismatch + - `np.nan vs np.nan` is a match + - `np.nan vs np.inf` is a mismatch + + :param df1: dataframe to compare + :param df2: dataframe to compare with + :return: dataframe that shows the differences stacked side by side, see + `pandas.DataFrame.compare()` for an example + """ + hpandass.dassert_axes_equal(df1, df2) + # Keep rows where df1's value is NaN and df2's value is not NaN and vice versa. + mask1 = df1.isna() & ~df2.isna() + mask2 = ~df1.isna() & df2.isna() + mask3 = mask1 | mask2 + # Compute a dataframe with the differences. + nan_diff_df = df1[mask3].compare(df2[mask3], result_names=("df1", "df2")) + return nan_diff_df + + +# TODO(Grisha): -> `compare_dataframes()`? + + +def compare_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + row_mode: str = "equal", + column_mode: str = "equal", + # TODO(Grisha): should be True by default? + compare_nans: bool = False, + diff_mode: str = "diff", + assert_diff_threshold: float = 1e-3, + close_to_zero_threshold: float = 1e-6, + zero_vs_zero_is_zero: bool = True, + remove_inf: bool = True, + log_level: int = logging.DEBUG, + only_warning: bool = True, +) -> pd.DataFrame: + """ + Compare two dataframes. + + This works for dataframes with and without multi-index. + + :param row_mode: control how the rows are handled + - "equal": rows need to be the same for the two dataframes + - "inner": compute the common rows for the two dataframes + :param column_mode: same as `row_mode` + :param compare_nans: include NaN comparison if True otherwise just + compare non-NaN values + :param diff_mode: control how the dataframes are compared in terms of + corresponding elements + - "diff": use the difference + - "pct_change": use the percentage difference + :param assert_diff_threshold: maximum allowed total difference + - do not assert if `None` + - works when `diff_mode` is "pct_change" + :param close_to_zero_threshold: round numbers below the threshold to 0 + :param zero_vs_zero_is_zero: replace the diff with 0 when comparing 0 to 0 + if True, otherwise keep the actual result + :param remove_inf: replace +-inf with `np.nan` + :param log_level: logging level + :param only_warning: when `True` the function issues a warning instead of aborting + :return: a singe dataframe with differences as values + """ + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + # Check value of `assert_diff_threshold`, if it was passed. + if assert_diff_threshold: + hdbg.dassert_lte(assert_diff_threshold, 1.0) + hdbg.dassert_lte(0.0, assert_diff_threshold) + # TODO(gp): Factor out this logic and use it for both compare_visually_dfs + # and + if row_mode == "equal": + hpandass.dassert_indices_equal(df1, df2) + elif row_mode == "inner": + # TODO(gp): Add sorting on demand, otherwise keep the columns in order. + same_rows = list((set(df1.index)).intersection(set(df2.index))) + df1 = df1[df1.index.isin(same_rows)] + df2 = df2[df2.index.isin(same_rows)] + else: + raise ValueError(f"Invalid row_mode='{row_mode}'") + # Handle column comparison mode. + if column_mode == "equal": + hdbg.dassert_eq(sorted(df1.columns), sorted(df2.columns)) + elif column_mode == "inner": + # TODO(gp): Add sorting on demand, otherwise keep the columns in order. + col_names = sorted(list(set(df1.columns).intersection(set(df2.columns)))) + df1 = df1[col_names] + df2 = df2[col_names] + else: + raise ValueError(f"Invalid column_mode='{column_mode}'") + # Round small numbers to 0 to exclude them from the diff computation. + close_to_zero_threshold_mask = lambda x: abs(x) < close_to_zero_threshold + df1[close_to_zero_threshold_mask] = df1[close_to_zero_threshold_mask].round( + 0 + ) + df2[close_to_zero_threshold_mask] = df2[close_to_zero_threshold_mask].round( + 0 + ) + # Compute the difference df. + if diff_mode == "diff": + # Test and convert the assertion into a boolean. + is_ok = True + try: + pd.testing.assert_frame_equal( + df1, df2, check_like=True, check_dtype=False + ) + except AssertionError as e: + is_ok = False + _ = e + # Check `is_ok` and raise an assertion depending on `only_warning`. + if not is_ok: + hdbg._dfatal( + _, + "df1=\n%s\n and df2=\n%s\n are not equal.", + hpanutil.df_to_str(df1, log_level=log_level), + hpanutil.df_to_str(df2, log_level=log_level), + only_warning=only_warning, + ) + # Calculate the difference. + df_diff = df1 - df2 + if remove_inf: + df_diff = df_diff.replace([np.inf, -np.inf], np.nan) + elif diff_mode == "pct_change": + # Compare NaN values in dataframes. + nan_diff_df = compare_nans_in_dataframes(df1, df2) + _LOG.debug( + "Dataframe with NaN differences=\n%s", + hpanutil.df_to_str(nan_diff_df), + ) + msg = "There are NaN values in one of the dataframes that are not in the other one." + hdbg.dassert_eq( + 0, nan_diff_df.shape[0], msg=msg, only_warning=only_warning + ) + # Compute pct_change. + df_diff = 100 * (df1 - df2) / df2.abs() + if zero_vs_zero_is_zero: + # When comparing 0 to 0 set the diff (which is NaN by default) to 0. + df1_mask = df1 == 0 + df2_mask = df2 == 0 + zero_vs_zero_mask = df1_mask & df2_mask + df_diff[zero_vs_zero_mask] = 0 + if remove_inf: + df_diff = df_diff.replace([np.inf, -np.inf], np.nan) + # Check if `df_diff` values are less than `assert_diff_threshold`. + if assert_diff_threshold is not None: + nan_mask = df_diff.isna() + within_threshold = ( + df_diff.abs() <= assert_diff_threshold + ) | nan_mask + expected = pd.DataFrame( + True, + index=within_threshold.index, + columns=within_threshold.columns, + ) + # Test and convert the assertion into boolean. + is_ok = True + try: + pd.testing.assert_frame_equal( + within_threshold, expected, check_exact=True + ) + except AssertionError as e: + is_ok = False + _ = e + # Check `is_ok` and raise assertion depending on `only_warning`. + if not is_ok: + hdbg._dfatal( + _, + "df1=\n%s\n and df2=\n%s\n have pct_change more than `assert_diff_threshold`.", + hpanutil.df_to_str(df1, log_level=log_level), + hpanutil.df_to_str(df2, log_level=log_level), + only_warning=only_warning, + ) + # Report max diff. + max_diff = df_diff.abs().max().max() + _LOG.log( + log_level, + "Maximum percentage difference between the two dataframes = %s", + max_diff, + ) + else: + raise ValueError(f"diff_mode={diff_mode}") + df_diff = df_diff.add_suffix(f".{diff_mode}") + return df_diff + + +def find_common_columns( + names: List[str], dfs: List[pd.DataFrame] +) -> pd.DataFrame: + """ + Find common columns across multiple dataframes. + + :param names: list of names for each dataframe + :param dfs: list of dataframes to compare + :return: dataframe showing common columns between each pair of dataframes + """ + df = [] + for i, df1 in enumerate(dfs): + df1 = dfs[i].columns + for j in range(i + 1, len(dfs)): + df2 = dfs[j].columns + common_cols = [c for c in df1 if c in df2] + df.append( + ( + names[i], + len(df1), + names[j], + len(df2), + len(common_cols), + ", ".join(common_cols), + ) + ) + df = pd.DataFrame( + df, + columns=[ + "table1", + "num_cols1", + "num_cols2", + "table2", + "num_comm_cols", + "common_cols", + ], + ) + return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py new file mode 100644 index 000000000..c9443c888 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py @@ -0,0 +1,221 @@ +""" +Import as: + +import helpers.hpandas_conversion as hpanconv +""" + +from typing import List, Optional, Union + +import numpy as np +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin + +_LOG = hloggin.getLogger(__name__) + +RowsValues = List[List[str]] + +# ############################################################################# +# DataFrame/Series Conversion +# ############################################################################# + + +def to_series(df: pd.DataFrame, *, series_dtype: str = "float64") -> pd.Series: + """ + Convert a pd.DataFrame with a single column into a pd.Series. The problem + is that empty df or df with a single row are not converted correctly to a + pd.Series. + + :param df: dataframe with a single column to convert to a series + :param series_dtype: dtype of the desired series in case a DataFrame + is empty, otherwise inherit dtype from a DataFrame + """ + # See https://stackoverflow.com/questions/33246771 + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_eq(df.shape[1], 1, "df=%s doesn't have a single column", df) + if df.empty: + srs = pd.Series(dtype=series_dtype) + elif df.shape[0] > 1: + srs = df.squeeze() + else: + srs = pd.Series(df.iloc[0, 0], index=[df.index.values[0]]) + srs.name = df.index.name + hdbg.dassert_isinstance(srs, pd.Series) + return srs + + +def as_series(data: Union[pd.DataFrame, pd.Series]) -> pd.Series: + """ + Convert a single-column dataframe to a series or no-op if already a series. + """ + if isinstance(data, pd.Series): + return data + return to_series(data) + + +# ############################################################################# +# Infer type +# ############################################################################# + + +def infer_column_types(col: pd.Series): + """ + Determine which data type is most prevalent in a column. + + Examine the values in the given pandas Series and decides whether + the majority of entries are strings, numeric values, or booleans. + + :param col: The column to inspect. + :return: One of `"is_string"`, `"is_numeric"`, or `"is_bool"`, + representing the predominant type. + """ + vals = { + "is_numeric": pd.to_numeric(col, errors="coerce").notna(), + #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), + "is_bool": col.map(lambda x: isinstance(x, bool)), + "is_string": col.map(lambda x: isinstance(x, str)), + } + vals = {k: float(v.mean()) for k, v in vals.items()} + # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", + # (vals["is_numeric"] >= vals["is_string"], "is_numeric", + # "is_string")) + if vals["is_bool"] >= vals["is_numeric"] and (vals["is_bool"] != 0): + type_ = "is_bool" + elif vals["is_numeric"] >= vals["is_string"] and (vals["is_numeric"] != 0): + type_ = "is_numeric" + else: + type_ = "is_string" + vals["type"] = type_ + return vals + + +def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Identify the predominant data type for each column in a DataFrame. + + :param df: The DataFrame whose columns will be analyzed. + :return: A DataFrame with two columns: + - `column`: the name of each original column. + - `predominant_type`: the most frequent type in that column, + one of `"string"`, `"numeric"`, or `"bool"`. + """ + return df.apply(lambda x: pd.Series(infer_column_types(x))).T + + +def convert_to_type(col: pd.Series, type_: str) -> pd.Series: + """ + Convert a pandas Series to a specified data type. + + :param col: The input column to be converted. + :param type_: The target data type. Expected values include: + - `"is_bool"`: convert values to booleans. + - `"is_int"`: convert values to integers. + - `"is_numeric"`: convert values to float. + - `"is_string"`: convert values to strings. + :return: A new Series with the same index as `col`, cast to the requested + type. + """ + if type_ == "is_bool": + return col.map( + lambda x: ( + True + if x in ["True", 1, "1", "true", True] + else False + if x in [0, "0", "False", False, "false"] + else None + ) + ) + elif type_ == "is_int": + return pd.to_numeric(col, errors="coerce", downcast="integer") + elif type_ == "is_numeric": + return pd.to_numeric(col, errors="coerce") + elif type_ == "is_string": + return col.astype(str) + else: + raise ValueError(f"Unknown column type: {type_}") + + +def convert_col_to_int( + df: pd.DataFrame, + col: str, +) -> pd.DataFrame: + """ + Convert a column to an integer column. + + Example use case: Parquet uses categoricals. If supplied with a + categorical-type column, this function will convert it to an integer + column. + """ + import helpers.hpandas_dassert as hpandass + + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(col, str) + hdbg.dassert_in(col, df.columns) + # Attempt the conversion. + df[col] = df[col].astype("int64") + # Trust, but verify. + hpandass.dassert_series_type_is(df[col], np.int64) + return df + + +def cast_series_to_type( + series: pd.Series, series_type: Optional[type] +) -> pd.Series: + """ + Convert a Pandas series to a given type. + + :param series: the input series + :param series_type: the type to convert the series into + - if None, then the series values are turned into Nones + :return: the series in the required type + """ + if series_type is None: + # Turn the series values into None. + series[:] = None + elif series_type is pd.Timestamp: + # Convert to timestamp. + series = pd.to_datetime(series) + elif series_type is dict: + # Convert to dict. + series = series.apply(eval) + else: + # Convert to the specified type. + series = series.astype(series_type) + return series + + +def convert_df( + df: pd.DataFrame, *, print_invalid_values: bool = False +) -> pd.DataFrame: + """ + Convert each DataFrame column to its predominant type. + + This function inspects every column in `df`, determines whether the + majority of its values are boolean, numeric, or string, and then + casts the column to that type using `convert_to_type`. + + :param df: The input DataFrame whose columns will be converted. + :param print_invalid_values: If True, print any original values that could + not be converted (they become NaN after conversion) + :return: a new DataFrame with each column cast to its detected predominant + type. + """ + df_out = pd.DataFrame(index=df.index) + for col in df.columns: + series = df[col] + # Determine the dominant datatype. + col_type = infer_column_types(series)["type"] + hdbg.dassert_in(col_type, ("is_bool", "is_numeric", "is_string")) + # Convert the column to dominant datatype. + converted = convert_to_type(series, col_type) + if print_invalid_values: + invalid_mask = series.notna() & converted.isna() + if invalid_mask.any(): + invalid = series[invalid_mask].tolist() + _LOG.info("Column %s dropped invalid values: %s", col, invalid) + df_out[col] = converted + return df_out + + +# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py new file mode 100644 index 000000000..7d62b84b3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py @@ -0,0 +1,371 @@ +""" +Import as: + +import helpers.hpandas_dassert as hpandass +""" + +from typing import Any, Dict, Iterable, List, Optional, Union + +import numpy as np +import pandas as pd + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin + +_LOG = hloggin.getLogger(__name__) + + +RowsValues = List[List[str]] + +# ############################################################################# +# Index/Axis Validation & Assertions +# ############################################################################# + + +def _get_index(obj: Union[pd.Index, pd.DataFrame, pd.Series]) -> pd.Index: + """ + Return the index of a Pandas object. + + :param obj: pandas Index, DataFrame, or Series + :return: the index of the object + """ + if isinstance(obj, pd.Index): + index = obj + else: + hdbg.dassert_isinstance(obj, (pd.Series, pd.DataFrame)) + index = obj.index + return index + + +# TODO(gp): Maybe for symmetry with the other functions, rename to +# dassert_datetime_index + + +def dassert_index_is_datetime( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the dataframe has an index containing datetimes. + + It works for both single and multi-indexed dataframes. + """ + index = _get_index(obj) + if isinstance(index, pd.MultiIndex): + # In case of multi index check that at least one level is a datetime. + is_any_datetime = any( + isinstance(level, pd.DatetimeIndex) for level in index.levels + ) + hdbg.dassert(is_any_datetime, msg, *args) + else: + hdbg.dassert_isinstance(index, pd.DatetimeIndex, msg, *args) + + +def dassert_unique_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a unique index. + """ + import helpers.hpandas_utils as hpanutil + + index = _get_index(obj) + if not index.is_unique: + dup_indices = index.duplicated(keep=False) + df_dup = obj[dup_indices] + df_dup_as_str = hpanutil.df_to_str(df_dup) + dup_msg = f"Duplicated rows are:\n{df_dup_as_str}\n" + if msg is None: + msg = dup_msg + else: + msg = dup_msg + msg + hdbg.dassert(index.is_unique, msg=msg, *args) + + +# TODO(gp): @all Add unit tests. + + +def dassert_increasing_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has an increasing index. + """ + import helpers.hpandas_utils as hpanutil + + index = _get_index(obj) + if not index.is_monotonic_increasing: + # Print information about the problematic indices like: + # ``` + # Not increasing indices are: + # full_symbol open high + # timestamp + # 2018-08-17 01:39:00+00:00 binance::BTC_USDT 6339.250000 6348.910000 + # 2018-08-17 00:01:00+00:00 kucoin::ETH_USDT 286.712987 286.712987 + # ``` + # Find the problematic indices. + mask = np.diff(index) <= pd.Timedelta(seconds=0) + mask = np.insert(mask, 0, False) + # TODO(gp): We might want to specify an integer with how many rows before + # after we want to show. + # Shift back to get the previous index that was creating the issue. + mask_shift = np.empty_like(mask) + mask_shift[: len(mask) - 1] = mask[1 : len(mask)] + mask_shift[len(mask) - 1] = False + # + mask = mask | mask_shift + df_dup_as_str = hpanutil.df_to_str(obj[mask]) + dup_msg = f"Not increasing indices are:\n{df_dup_as_str}\n" + if msg is None: + msg = dup_msg + else: + msg = dup_msg + msg + # Dump the data to file for further inspection. + # obj.to_csv("index.csv") + hdbg.dassert(index.is_monotonic_increasing, msg=msg, *args) + + +# TODO(gp): @all Add more info in case of failures and unit tests. + + +def dassert_strictly_increasing_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a strictly increasing index. + """ + dassert_unique_index(obj, msg, *args) + dassert_increasing_index(obj, msg, *args) + + +# TODO(gp): Not sure it's used or useful? + + +def dassert_monotonic_index( + obj: Union[pd.Index, pd.DataFrame, pd.Series], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that a Pandas object has a monotonic (i.e., strictly increasing or + decreasing index). + """ + dassert_unique_index(obj, msg, *args) + index = _get_index(obj) + cond = index.is_monotonic_increasing or index.is_monotonic_decreasing + hdbg.dassert(cond, msg=msg, *args) + + +# TODO(Paul): @gp -> dassert_datetime_indexed_df + + +def dassert_time_indexed_df( + df: pd.DataFrame, allow_empty: bool, strictly_increasing: bool +) -> None: + """ + Validate that input dataframe is time indexed and well-formed. + + It works for both single and multi-indexed dataframes. + + :param df: dataframe to validate + :param allow_empty: allow empty data frames + :param strictly_increasing: if True the index needs to be strictly + increasing, instead of just increasing + """ + # Verify that Pandas dataframe is passed as input. + hdbg.dassert_isinstance(df, pd.DataFrame) + if not allow_empty: + # Verify that a non-empty dataframe is passed as input. + hdbg.dassert_lt(0, df.shape[0]) + # Verify that the dataframe has at least 1 column. + hdbg.dassert_lte(1, len(df.columns)) + # Verify that the index is increasing. + if strictly_increasing: + dassert_strictly_increasing_index(df) + else: + dassert_increasing_index(df) + # Check that the index is in datetime format. + dassert_index_is_datetime(df) + # Check that the passed timestamp has timezone info. + index_item = df.index[0] + if isinstance(index_item, tuple): + # In case of multi index assume that the first level is a datetime. + index_item = index_item[0] + hdateti.dassert_has_tz(index_item) + + +def dassert_indices_equal( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + allow_series: bool = False, + only_warning: bool = False, +) -> None: + """ + Ensure that `df1` and `df2` share a common index. + + Print the symmetric difference of indices if equality does not hold. + """ + if allow_series: + if isinstance(df1, pd.Series): + df1 = df1.to_frame() + if isinstance(df2, pd.Series): + df2 = df2.to_frame() + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert( + df1.index.equals(df2.index), + "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", + df1.index.difference(df2.index), + df2.index.difference(df1.index), + only_warning=only_warning, + ) + + +def dassert_columns_equal( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + sort_cols: bool = False, + only_warning: bool = False, +) -> None: + """ + Ensure that `df1` and `df2` have the same columns. + + Print the symmetric difference of columns if equality does not hold. + """ + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + if sort_cols: + _LOG.debug("Sorting dataframe columns.") + df1 = df1.sort_index(axis=1) + df2 = df2.sort_index(axis=1) + hdbg.dassert( + df1.columns.equals(df2.columns), + "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", + df1.columns.difference(df2.columns), + df2.columns.difference(df1.columns), + only_warning=only_warning, + ) + + +def dassert_axes_equal( + df1: pd.DataFrame, df2: pd.DataFrame, *, sort_cols: bool = False +) -> None: + """ + Ensure that `df1` and `df2` have the same index and same columns. + """ + dassert_indices_equal(df1, df2) + dassert_columns_equal(df1, df2, sort_cols=sort_cols) + + +# TODO(Grisha): instead of passing `rtol` and `atol` use `**allclose_kwargs: Dict[str, Any]`. + + +def dassert_series_type_is( + srs: pd.Series, + type_: type, + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the data type of `srs` is `type_`. + + Examples of valid series types are + - np.float64 + - np.int64 + - pd.Timestamp + """ + hdbg.dassert_isinstance(srs, pd.Series) + hdbg.dassert_isinstance(type_, type) + hdbg.dassert_eq(srs.dtype.type, type_, msg, *args) + + +def dassert_series_type_in( + srs: pd.Series, + types: List[type], + msg: Optional[str] = None, + *args: Any, +) -> None: + """ + Ensure that the data type of `srs` is one of the types in `types`. + """ + hdbg.dassert_isinstance(srs, pd.Series) + hdbg.dassert_container_type(types, list, type) + hdbg.dassert_in(srs.dtype.type, types, msg, *args) + + +def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None: + """ + Ensure that remapping rows / columns is valid. + """ + hdbg.dassert_isinstance(to_remap, list) + hdbg.dassert_isinstance(remap_dict, dict) + # All the rows / columns to remap, should exist. + hdbg.dassert_is_subset( + remap_dict.keys(), + to_remap, + "Keys to remap should be a subset of existing columns", + ) + # The mapping is invertible. + hdbg.dassert_no_duplicates(remap_dict.keys()) + hdbg.dassert_no_duplicates(remap_dict.values()) + # Rows / columns should not be remapped on existing rows / columns. + hdbg.dassert_not_intersection(remap_dict.values(), to_remap) + + +def dassert_approx_eq( + val1: Any, + val2: Any, + rtol: float = 1e-05, + atol: float = 1e-08, + msg: Optional[str] = None, + *args: Any, + only_warning: bool = False, +) -> None: + # Approximate comparison is not applicable for strings. + hdbg.dassert_is_not(type(val1), str) + hdbg.dassert_is_not(type(val2), str) + # Convert iterable inputs to list in order to comply with numpy. + if isinstance(val1, Iterable): + val1 = list(val1) + if isinstance(val2, Iterable): + val2 = list(val2) + cond = np.allclose( + np.array(val1), np.array(val2), rtol=rtol, atol=atol, equal_nan=True + ) + if not cond: + txt = f"'{val1}'\n==\n'{val2}' rtol={rtol}, atol={atol}" + hdbg._dfatal(txt, msg, *args, only_warning=only_warning) # type: ignore + + +# ############################################################################# + + +def dassert_is_days( + timedelta: pd.Timedelta, *, min_num_days: Optional[int] = None +) -> None: + """ + Assert that a timedelta represents an integer number of days. + + :param timedelta: the timedelta to check + :param min_num_days: optional minimum number of days to enforce + """ + hdbg.dassert( + (timedelta / pd.Timedelta(days=1)).is_integer(), + "timedelta='%s' is not an integer number of days", + timedelta, + ) + if min_num_days is not None: + hdbg.dassert_lte(1, timedelta.days) + + +# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py new file mode 100644 index 000000000..6c73c8988 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py @@ -0,0 +1,302 @@ +""" +Import as: + +import helpers.hpandas_display as hpandisp +""" + +import logging +import os +from typing import List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hlist as hlist +import helpers.hlogging as hloggin +import helpers.hsystem as hsystem + +_LOG = hloggin.getLogger(__name__) + + +# Invariant: +# - When we are in a notebook we want to: +# - Convert `_LOG.info()` in `print()` using `hnotebo.set_logger_to_print()` +# - Display any dataframe using the `hpandas.display` function +# - Do not return any value +# +# - When we are not in a notebook we want to: +# - Use `_LOG.info()` and `_LOG.debug()` to log messages +# - Print the dataframe with `_LOG.debug()` +# - Return the result through a `return` statement +# +# - Each function should have a `log_level` parameter to control the logging level. +# - If `log_level` is not provided, it should be set to `logging.DEBUG` if we are not in a notebook, +# and `logging.INFO` if we are in a notebook. + + +def get_df_signature(df: pd.DataFrame, num_rows: int = 6) -> str: + """ + Compute a simple signature of a dataframe in string format. + + The signature contains metadata about dataframe size and certain + amount of rows from start and end of a dataframe. It is used for + testing purposes. + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + text: List[str] = [f"df.shape={str(df.shape)}"] + with pd.option_context( + "display.max_colwidth", int(1e6), "display.max_columns", None + ): + # If dataframe size exceeds number of rows, show only subset in form of + # first and last rows. Otherwise, whole dataframe is shown. + if len(df) > num_rows: + text.append(f"df.head=\n{df.head(num_rows // 2)}") + text.append(f"df.tail=\n{df.tail(num_rows // 2)}") + else: + text.append(f"df.full=\n{df}") + text: str = "\n".join(text) + return text + + +# ############################################################################# + + +def convert_df_to_json_string( + df: pd.DataFrame, + n_head: Optional[int] = 10, + n_tail: Optional[int] = 10, + columns_order: Optional[List[str]] = None, +) -> str: + """ + Convert dataframe to pretty-printed JSON string. + + To select all rows of the dataframe, pass `n_head` as None. + + :param df: dataframe to convert + :param n_head: number of printed top rows + :param n_tail: number of printed bottom rows + :param columns_order: order for the KG columns sort + :return: dataframe converted to JSON string + """ + # Append shape of the initial dataframe. + shape = f"original shape={df.shape}" + # Reorder columns. + if columns_order is not None: + hdbg.dassert_set_eq(columns_order, df.columns) + df = df[columns_order] + # Select head. + if n_head is not None: + head_df = df.head(n_head) + else: + # If no n_head provided, append entire dataframe. + head_df = df + # Transform head to json. + head_json = head_df.to_json( + orient="index", + force_ascii=False, + indent=4, + default_handler=str, + date_format="iso", + date_unit="s", + ) + if n_tail is not None: + # Transform tail to json. + tail = df.tail(n_tail) + tail_json = tail.to_json( + orient="index", + force_ascii=False, + indent=4, + default_handler=str, + date_format="iso", + date_unit="s", + ) + else: + # If no tail specified, append an empty string. + tail_json = "" + # Join shape and dataframe to single string. + output_str = "\n".join([shape, "Head:", head_json, "Tail:", tail_json]) + return output_str + + +# ############################################################################# + + +def convert_df_to_png( + df: pd.DataFrame, + file_path: str, + index: bool = True, + table_conversion: str = "kaleido", + dpi: int = 300, + print_markdown: bool = False, + markdown_path_prefix: Optional[str] = None, +) -> None: + """ + Convert a dataframe to a PNG image file. + + Uses the dataframe_image library to render the DataFrame as an image + with HTML styling. + + :param df: dataframe to convert + :param file_path: path where the PNG image will be saved + :param index: whether to include the index in the image + :param table_conversion: conversion method ('kaleido', 'chrome', or 'playwright') + :param dpi: resolution in dots per inch (default: 300 for print quality, + higher values = higher resolution and larger file size) + :param print_markdown: if True, print markdown image reference like + ![](path/to/image.png) + :param markdown_path_prefix: optional path to prepend to the image path in + the markdown reference (e.g., '../figures/' or 'assets/') + """ + # Keep this import here since it's an optional one. + import dataframe_image as dfi + + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(file_path, str) + # Ensure the output directory exists. + hio.create_enclosing_dir(file_path, incremental=True) + # Prepare dataframe for export, handling index parameter. + export_df = df + if not index: + # Reset index to exclude it from the image. + export_df = df.reset_index(drop=True) + dfi.export(export_df, file_path, table_conversion=table_conversion, dpi=dpi) + # Use print instead of _LOG.info. + print(f"PNG image saved to: '{file_path}'") + if print_markdown: + # Construct the markdown path. + markdown_path = file_path + if markdown_path_prefix: + markdown_path = os.path.join(markdown_path_prefix, file_path) + markdown_ref = f"![]({markdown_path})" + # Use print instead of _LOG.info. + print(markdown_ref) + + +# ############################################################################# + + +def print_or_display( + df: pd.DataFrame, + *, + index: bool = True, + as_txt: bool = False, + log_level: int = logging.INFO, +) -> None: + """ + Print or display a dataframe in a notebook at the given log level. + + :param df: dataframe to print + :param index: whether to show the index or not + :param as_txt: print if True, otherwise render as usual HTML table + :param log_level: log level at which to print the dataframe + """ + # print(_LOG.getEffectiveLevel()) + # print(log_level) + # print(_LOG.isEnabledFor(log_level)) + if hsystem.is_running_in_ipynb() and not as_txt: + from IPython.display import display, HTML + + if _LOG.isEnabledFor(log_level): + display(HTML(df.to_html(index=index))) + else: + _LOG.log(log_level, "%s", df.to_string(index=index)) + + +def display_df( + df: pd.DataFrame, + *, + index: bool = True, + inline_index: bool = False, + max_lines: Optional[int] = 5, + tag: Optional[str] = None, + mode: Optional[str] = None, + as_txt: bool = False, + log_level: int = logging.INFO, +) -> None: + """ + Display a Pandas object (series, df, panel) in a better way than the + ipython display, e.g., by printing head and tail of the dataframe, and + other formatting options. + + :param index: whether to show the index or not + :param inline_index: make the index part of the dataframe. This is used + when cutting and pasting to other applications, which are not happy + with the output pandas HTML form + :param max_lines: number of lines to print + :param mode: use different formats temporarily overriding the default, e.g., + - "all_rows": print all the rows + - "all_cols": print all the columns + - "all": print the entire df (it could be huge) + :param as_txt: print if True, otherwise render as usual html table + :param log_level: log level at which to print the dataframe + """ + # Convert Series to DataFrame if needed. + if isinstance(df, pd.Series): + df = pd.DataFrame(df) + # + hdbg.dassert_type_is(df, pd.DataFrame) + hdbg.dassert_eq( + hlist.find_duplicates(df.columns.tolist()), + [], + msg="Find duplicated columns", + ) + if tag is not None: + _LOG.log(log_level, "tag=%s", tag) + # Shrink the dataframe to the number of lines specified by `max_lines`, + # if needed. + if max_lines is not None: + hdbg.dassert_lte(1, max_lines) + if df.shape[0] > max_lines: + # log.error("Printing only top / bottom %s out of %s rows", + # max_lines, df.shape[0]) + ellipses = pd.DataFrame( + [["..."] * len(df.columns)], columns=df.columns, index=["..."] + ) + df = pd.concat( + [ + df.head(int(max_lines / 2)), + ellipses, + df.tail(int(max_lines / 2)), + ], + axis=0, + ) + # Inline the index, if needed. + if inline_index: + df = df.copy() + # Copy the index to a column and don't print the index. + if df.index.name is None: + col_name = "." + else: + col_name = df.index.name + df.insert(0, col_name, df.index) + df.index.name = None + index = False + # Print or display the dataframe. + if mode is None: + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + elif mode == "all_rows": + with pd.option_context( + "display.max_rows", None, "display.max_columns", 3 + ): + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + elif mode == "all_cols": + with pd.option_context( + "display.max_colwidth", int(1e6), "display.max_columns", None + ): + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + elif mode == "all": + with pd.option_context( + "display.max_rows", + int(1e6), + "display.max_columns", + 3, + "display.max_colwidth", + int(1e6), + "display.max_columns", + None, + ): + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + else: + print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) + raise ValueError("Invalid mode=%s" % mode) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py new file mode 100644 index 000000000..a1049d77f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py @@ -0,0 +1,128 @@ +""" +Import as: + +import helpers.hpandas_io as hpanio +""" + +from typing import Any, Union + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hprint as hprint + +# Handle different versions of s3fs where core module may be at different +# locations. +try: + import s3fs + + # Try to access s3fs.core to check if it exists + if hasattr(s3fs, "core"): + from s3fs.core import S3File, S3FileSystem + else: + # In newer versions, classes might be directly in s3fs module. + try: + from s3fs import S3File, S3FileSystem + except ImportError: + # Fallback to dynamic import + S3File = getattr(s3fs, "S3File", None) + S3FileSystem = getattr(s3fs, "S3FileSystem", None) +except ImportError: + # If s3fs is not available, define dummy classes for type hints. + s3fs = None + + class S3File: + pass + + class S3FileSystem: + pass + + +_LOG = hloggin.getLogger(__name__) + + +def read_csv_to_df( + stream: Union[str, S3File, S3FileSystem], + *args: Any, + **kwargs: Any, +) -> pd.DataFrame: + """ + Read a CSV file into a `pd.DataFrame`. + + :param stream: file path, S3File, or S3FileSystem object + :param args: additional arguments passed to pd.read_csv() + :param kwargs: additional keyword arguments passed to pd.read_csv() + :return: dataframe with CSV contents + """ + # Gets filename from stream if it is not already a string, + # so it can be inspected for extension type. + file_name = stream if isinstance(stream, str) else vars(stream)["path"] + # Handle zipped files. + if any(file_name.endswith(ext) for ext in (".gzip", ".gz", ".tgz")): + hdbg.dassert_not_in("compression", kwargs) + kwargs["compression"] = "gzip" + elif file_name.endswith(".zip"): + hdbg.dassert_not_in("compression", kwargs) + kwargs["compression"] = "zip" + # Read. + _LOG.debug(hprint.to_str("args kwargs")) + df = pd.read_csv(stream, *args, **kwargs) + return df + + +def read_parquet_to_df( + stream: Union[str, S3File, S3FileSystem], + *args: Any, + **kwargs: Any, +) -> pd.DataFrame: + """ + Read a Parquet file into a `pd.DataFrame`. + + :param stream: file path, S3File, or S3FileSystem object + :param args: additional arguments passed to pd.read_parquet() + :param kwargs: additional keyword arguments passed to pd.read_parquet() + :return: dataframe with Parquet contents + """ + # Read. + _LOG.debug(hprint.to_str("args kwargs")) + df = pd.read_parquet(stream, *args, **kwargs) + return df + + +# ############################################################################# + + +# TODO(Paul): Remove this since it's a dup of hgoogle_drive_api.py. + + +def to_gsheet( + df: pd.DataFrame, + tab_name: str, + gsheet_tab_name: str, + overwrite: bool, +) -> None: + """ + Save a dataframe to a Google sheet. + + :param df: the dataframe to save to a Google sheet + :param tab_name: the name of the Google sheet to save the df + into; the Google sheet with this name must already exist on the + Google Drive + :param gsheet_tab_name: the name of the sheet in the Google sheet + :param overwrite: if True, the contents of the sheet are erased + before saving the dataframe into it; if False, the dataframe is + appended to the contents of the sheet + """ + import gspread_pandas + + spread = gspread_pandas.Spread( + tab_name, sheet=gsheet_tab_name, create_sheet=True + ) + if overwrite: + spread.clear_sheet() + else: + sheet_contents = spread.sheet_to_df(index=None) + combined_df = pd.concat([sheet_contents, df]) + df = combined_df.drop_duplicates() + spread.df_to_sheet(df, index=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py new file mode 100644 index 000000000..f139a3ba9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py @@ -0,0 +1,183 @@ +""" +Import as: + +import helpers.hpandas_multiindex as hpanmult +""" + +import logging +from typing import Any, Dict, List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hpandas_compare as hpancomp +import helpers.hpandas_dassert as hpandass +import helpers.hpandas_transform as hpantran +import helpers.hpandas_utils as hpanutil +import helpers.hprint as hprint + +_LOG = hloggin.getLogger(__name__) + +RowsValues = List[List[str]] + +# ############################################################################# +# Functions +# ############################################################################# + + +def add_multiindex_col( + df: pd.DataFrame, multiindex_col: pd.DataFrame, col_name: str +) -> pd.DataFrame: + """ + Add column to a multiindex DataFrame. + + Note: each column in a multiindex DataFrame is a DataFrame itself. + + :param df: multiindex df + :param multiindex_col: column (i.e. singleindex df) of a multiindex df + :param col_name: name of a new column + :return: a multiindex DataFrame with a new column + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + hdbg.dassert_isinstance(multiindex_col, pd.DataFrame) + hdbg.dassert_isinstance(col_name, str) + hdbg.dassert_not_in(col_name, df.columns) + for col in multiindex_col.columns: + df[col_name, col] = multiindex_col[col] + return df + + +def multiindex_df_info( + df: pd.DataFrame, + *, + log_level: int = logging.INFO, + **list_to_str_kwargs: Dict[str, Any], +) -> str: + """ + Report information about a multi-index df. + """ + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + columns_level0 = df.columns.levels[0] + columns_level1 = df.columns.levels[1] + rows = df.index + ret = [] + ret.append( + f"shape={len(columns_level0)} x {len(columns_level1)} x {len(rows)}" + ) + ret.append( + "columns_level0=" + + hprint.list_to_str2(columns_level0, **list_to_str_kwargs) + ) + ret.append( + "columns_level1=" + + hprint.list_to_str2(columns_level1, **list_to_str_kwargs) + ) + ret.append("rows=" + hprint.list_to_str2(rows, **list_to_str_kwargs)) + if isinstance(df.index, pd.DatetimeIndex): + # Display timestamp info. + start_timestamp = df.index.min() + end_timestamp = df.index.max() + frequency = df.index.freq + if frequency is None: + # Try to infer frequency. + frequency = pd.infer_freq(df.index) + ret.append(f"start_timestamp={start_timestamp}") + ret.append(f"end_timestamp={end_timestamp}") + ret.append(f"frequency={frequency}") + ret = "\n".join(ret) + _LOG.log(log_level, ret) + return ret + + +def subset_multiindex_df( + df: pd.DataFrame, + *, + # TODO(gp): Consider passing trim_df_kwargs as kwargs. + start_timestamp: Optional[pd.Timestamp] = None, + end_timestamp: Optional[pd.Timestamp] = None, + columns_level0: hpanutil.ColumnSet = None, + columns_level1: hpanutil.ColumnSet = None, + keep_order: bool = False, +) -> pd.DataFrame: + """ + Filter multi-index DataFrame by timestamp index and column levels. + + :param start_timestamp: see `trim_df()` + :param end_timestamp: see `trim_df()` + :param columns_level0: column names that corresponds to `df.columns.levels[0]` + - `None` means no filtering + :param columns_level1: column names that corresponds to `df.columns.levels[1]` + - `None` means no filtering + :param keep_order: see `hpandas_utils.resolve_column_names()` + :return: filtered DataFrame + """ + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_eq(2, len(df.columns.levels)) + # Filter by timestamp. + allow_empty = False + strictly_increasing = False + hpandass.dassert_time_indexed_df(df, allow_empty, strictly_increasing) + df = hpantran.trim_df( + df, + ts_col_name=None, + start_ts=start_timestamp, + end_ts=end_timestamp, + left_close=True, + right_close=True, + ) + # Filter level 0. + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + all_columns_level0 = df.columns.levels[0] + columns_level0 = hpanutil.resolve_column_names( + columns_level0, all_columns_level0, keep_order=keep_order + ) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_is_subset(columns_level0, df.columns.levels[0]) + df = df[columns_level0] + # Filter level 1. + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + all_columns_level1 = df.columns.levels[1] + columns_level1 = hpanutil.resolve_column_names( + columns_level1, all_columns_level1, keep_order=keep_order + ) + hdbg.dassert_isinstance(df.columns, pd.MultiIndex) + hdbg.dassert_is_subset(columns_level1, df.columns.levels[1]) + df = df.swaplevel(axis=1)[columns_level1].swaplevel(axis=1) + return df + + +# ############################################################################# + + +def compare_multiindex_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + *, + subset_multiindex_df_kwargs: Optional[Dict[str, Any]] = None, + compare_dfs_kwargs: Optional[Dict[str, Any]] = None, +) -> pd.DataFrame: + """ + - Subset both multi-index dfs, if needed + - Compare dfs + + :param subset_multiindex_df: params for `subset_multiindex_df()` + :param compare_dfs_kwargs: params for `compare_dfs()` + :return: df with differences as values + """ + # Subset dfs. + if subset_multiindex_df_kwargs is None: + subset_multiindex_df_kwargs = {} + subset_df1 = subset_multiindex_df(df1, **subset_multiindex_df_kwargs) + subset_df2 = subset_multiindex_df(df2, **subset_multiindex_df_kwargs) + # Compare dfs. + if compare_dfs_kwargs is None: + compare_dfs_kwargs = {} + diff_df = hpancomp.compare_dfs(subset_df1, subset_df2, **compare_dfs_kwargs) + return diff_df + + +# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py new file mode 100644 index 000000000..b0a6bf9d8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py @@ -0,0 +1,527 @@ +""" +Import as: + +import helpers.hpandas_stats as hpanstat +""" + +import logging +from typing import Any, Dict, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hpandas_dassert as hpandass +import helpers.hpandas_transform as hpantran +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = hloggin.getLogger(__name__) + + +def compute_duration_df( + tag_to_df: Dict[str, pd.DataFrame], + *, + intersect_dfs: bool = False, + valid_intersect: bool = False, +) -> Tuple[pd.DataFrame, Dict[str, pd.DataFrame]]: + """ + Compute a df with some statistics about the time index. + + E.g., + ``` + min_index max_index min_valid_index max_valid_index + tag1 2022-01-01 21:00:00+00:00 ... + tag2 2022-01-01 21:02:00+00:00 ... + tag3 2022-01-01 21:01:00+00:00 ... + ``` + + :param intersect_dfs: return a transformed dict with the intersection of + indices of all the dfs if True, otherwise return the input data as is + :param valid_intersect: intersect indices without NaNs if True, otherwise + intersect indices as is + :return: timestamp stats and updated dict of dfs, see `intersect_dfs` param + """ + hdbg.dassert_isinstance(tag_to_df, Dict) + # Create df and assign columns. + data_stats = pd.DataFrame() + min_col = "min_index" + max_col = "max_index" + min_valid_index_col = "min_valid_index" + max_valid_index_col = "max_valid_index" + # Collect timestamp info from all dfs. + for tag in tag_to_df.keys(): + # Check that the passed timestamp has timezone info. + first_idx = tag_to_df[tag].index[0] + hdateti.dassert_has_tz(cast(pd.Timestamp, first_idx)) + hpandass.dassert_index_is_datetime(tag_to_df[tag]) + # Compute timestamp stats. + data_stats.loc[tag, min_col] = tag_to_df[tag].index.min() + data_stats.loc[tag, max_col] = tag_to_df[tag].index.max() + data_stats.loc[tag, min_valid_index_col] = ( + tag_to_df[tag].dropna().index.min() + ) + data_stats.loc[tag, max_valid_index_col] = ( + tag_to_df[tag].dropna().index.max() + ) + # Make a copy so we do not modify the original data. + tag_to_df_updated = tag_to_df.copy() + # Change the initial dfs with intersection. + if intersect_dfs: + if valid_intersect: + # Assign start, end date column according to specs. + min_col = min_valid_index_col + max_col = max_valid_index_col + # The start of the intersection will be the max value amongt all start dates. + intersection_start_date = cast(pd.Timestamp, data_stats[min_col].max()) + # The end of the intersection will be the min value amongt all end dates. + intersection_end_date = cast(pd.Timestamp, data_stats[max_col].min()) + for tag in tag_to_df_updated.keys(): + df = hpantran.trim_df( + tag_to_df_updated[tag], + ts_col_name=None, + start_ts=intersection_start_date, + end_ts=intersection_end_date, + left_close=True, + right_close=True, + ) + tag_to_df_updated[tag] = df + return data_stats, tag_to_df_updated + + +# ############################################################################# + + +# TODO(gp): Remove this since it's in Google API. + + +def compute_weighted_sum( + dfs: Dict[str, pd.DataFrame], + weights: pd.DataFrame, + *, + index_mode: str = "assert_equal", +) -> Dict[str, pd.DataFrame]: + """ + Compute weighted sums of `dfs` using `weights`. + + :param dfs: dataframes keyed by id; all dfs should have the same cols, + indices are handled based on the `index_mode` + :param weights: float weights indexed by id with unique col names + :param index_mode: same as `mode` in `apply_index_mode()` + :return: weighted sums keyed by weight col names + """ + hdbg.dassert_isinstance(dfs, dict) + hdbg.dassert(dfs, "dictionary of dfs must be nonempty") + # Get a dataframe from the dictionary and record its index and columns. + id_ = list(dfs)[0] + hdbg.dassert_isinstance(id_, str) + df = dfs[id_] + hdbg.dassert_isinstance(df, pd.DataFrame) + cols = df.columns + # Sanity-check dataframes in dictionary. + for key, value in dfs.items(): + hdbg.dassert_isinstance(key, str) + hdbg.dassert_isinstance(value, pd.DataFrame) + # The reference df is not modified. + _, value = hpantran.apply_index_mode(df, value, index_mode) + hdbg.dassert( + value.columns.equals(cols), + "Column equality fails for keys=%s, %s", + id_, + key, + ) + # Sanity-check weights. + hdbg.dassert_isinstance(weights, pd.DataFrame) + hdbg.dassert_eq(weights.columns.nlevels, 1) + hdbg.dassert(not weights.columns.has_duplicates) + hdbg.dassert_set_eq(weights.index.to_list(), list(dfs)) + # Create a multiindexed dataframe to facilitate computing the weighted sums. + weighted_dfs = {} + combined_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys()) + # TODO(Paul): Consider relaxing the NaN-handling. + for col in weights.columns: + weighted_combined_df = combined_df.multiply(weights[col], level=0) + weighted_sums = weighted_combined_df.groupby(axis=1, level=1).sum( + min_count=len(dfs) + ) + weighted_dfs[col] = weighted_sums + return weighted_dfs + + +def remap_obj( + obj: Union[pd.Series, pd.Index], + map_: Dict[Any, Any], + **kwargs: Any, +) -> pd.Series: + """ + Substitute each value of an object with another value from a dictionary. + + :param obj: a Series or Index to remap values in + :param map_: dictionary mapping old values to new values + :param kwargs: additional keyword arguments passed to pd.Series.map() + :return: remapped pandas series + """ + hdbg.dassert_lte(1, obj.shape[0]) + # TODO(Grisha): consider extending for other mapping types supported by + # `pd.Series.map`. + hdbg.dassert_isinstance(map_, dict) + # Check that every element of the object is in the mapping. + hdbg.dassert_is_subset(obj, map_.keys()) + new_srs = obj.map(map_, **kwargs) + return cast(pd.Series, new_srs) + + +def get_random_df( + num_cols: int, + seed: Optional[int] = None, + date_range_kwargs: Optional[Dict[str, Any]] = None, +) -> pd.DataFrame: + """ + Compute df with random data with `num_cols` columns and index obtained by + calling `pd.date_range(**kwargs)`. + + :param num_cols: the number of columns in a DataFrame to generate + :param seed: see `random.seed()` + :param date_range_kwargs: kwargs for `pd.date_range()` + """ + if seed: + np.random.seed(seed) + if date_range_kwargs is None: + date_range_kwargs = {} + dt = pd.date_range(**date_range_kwargs) + df = pd.DataFrame(np.random.rand(len(dt), num_cols), index=dt) + return df + + +# ############################################################################# + + +def heatmap_df(df: pd.DataFrame, *, axis: Any = None) -> Any: + """ + Colorize a df with a heatmap depending on the numeric values. + + :param axis: along which axis to compute the heatmap + - 0 colorize along rows + - 1 colorize along columns + - None: colorize everything + """ + # Keep it here to avoid long start up times. + import seaborn as sns + + cm = sns.diverging_palette(5, 250, as_cmap=True) + return df.style.background_gradient(axis=axis, cmap=cm) + + +def to_perc(vals: Union[List, pd.Series], **perc_kwargs: Any) -> str: + """ + Report percentage of True values in a list or series. + + :param vals: list or series of boolean values + :param perc_kwargs: additional keyword arguments passed to hprint.perc() + :return: formatted percentage string + """ + if isinstance(vals, list): + vals = pd.Series(vals) + ret = hprint.perc(vals.sum(), len(vals), **perc_kwargs) + return cast(str, ret) + + +def add_end_download_timestamp( + obj: Union[pd.DataFrame, Dict], *, timezone: str = "UTC" +) -> Union[pd.DataFrame, Dict]: + """ + Add a column 'end_download_timestamp' to the DataFrame with the current + time. + + :param obj: The DataFrame to which the column will be added. + :param timezone: The timezone for the current time. Defaults to + 'UTC'. + """ + # Get current timestamp. + current_ts = hdateti.get_current_time(timezone) + # Set value of end_download_timestamp. + obj["end_download_timestamp"] = current_ts + return obj + + +def get_value_counts_stats_df( + df: pd.DataFrame, col_name: str, *, num_rows: int = 10 +) -> pd.DataFrame: + """ + Get the value counts of `col_name` in `df`. + + :param df: The DataFrame to get the value counts of `col_name` from. + :param col_name: The column name to get the value counts of. + :param num_rows: The number of rows to return. + :return: A DataFrame with the value counts of `col_name` in `df`. E.g., + ``` + count pct [%] + Venture Fund 1004 25.100 + Financial Services 274 6.850 + Venture Capital & Private Equity 176 4.400 + Computer Software 163 4.075 + Higher Education 133 3.325 + Information Technology & Services 73 1.825 + ``` + """ + hdbg.dassert_in(col_name, df.columns) + stats_df = df[col_name].value_counts().to_frame() + stats_df["pct [%]"] = stats_df["count"] / len(df) * 100 + if num_rows > 0: + stats_df = stats_df.head(num_rows) + return stats_df + + +def display_value_counts_stats_df( + df: pd.DataFrame, col_names: Union[str, List[str]], *, num_rows: int = 10 +) -> None: + if isinstance(col_names, list): + for col_name in col_names: + display_value_counts_stats_df(df, col_name, num_rows=num_rows) + return + import IPython.display + + hdbg.dassert_isinstance(col_names, str) + _LOG.info("# %s", col_names) + stats_df = get_value_counts_stats_df(df, col_names, num_rows=num_rows) + IPython.display.display(stats_df) + + +# ############################################################################# +# Functions moved from core/explore.py +# ############################################################################# + + +def report_zero_nan_inf_stats( + df: pd.DataFrame, + *, + zero_threshold: float = 1e-9, + verbose: bool = False, + as_txt: bool = False, + dbg_log_level: int = logging.DEBUG, +) -> pd.DataFrame: + """ + Report count and percentage about zeros, nans, infs for a df. + + :param df: dataframe to report the stats of + :param zero_threshold: threshold for classifying values as "zero" + :param verbose: if True, print the stats + :param as_txt: if True, print the stats as text + :param dbg_log_level: log level at which to print the debug info + :return: a DataFrame with the stats + """ + # Convert Series to DataFrame if needed. + if isinstance(df, pd.Series): + df = pd.DataFrame(df) + # Print stats about the input dataframe. + _LOG.log(dbg_log_level, "index in [%s, %s]", df.index.min(), df.index.max()) + num_rows = df.shape[0] + _LOG.log(dbg_log_level, "num_rows=%s", hprint.thousand_separator(num_rows)) + _LOG.log(dbg_log_level, "data=") + import helpers.hpandas_display as hpandisp + + hpandisp.display_df(df, as_txt=as_txt, log_level=dbg_log_level) + # Compute date-based stats only if index is datetime. + if isinstance(df.index, pd.DatetimeIndex): + # TODO(gp): Can we do this faster? + dates = [d.date() for d in df.index] + num_days = len(set(dates)) + _LOG.log(dbg_log_level, "num_days=%s", num_days) + num_weekdays = len(set(d for d in dates if d.weekday() < 5)) + _LOG.log(dbg_log_level, "num_weekdays=%s", num_weekdays) + # + stats_df = pd.DataFrame(None, index=df.columns) + if False: + # Find the index of the first non-nan value. + df = df.applymap(lambda x: not np.isnan(x)) + min_idx = df.idxmax(axis=0) + min_idx.name = "min_idx" + # Find the index of the last non-nan value. + max_idx = df.reindex(index=df.index[::-1]).idxmax(axis=0) + max_idx.name = "max_idx" + stats_df["num_rows"] = num_rows + # + num_zeros = (np.abs(df) < zero_threshold).sum(axis=0) + if verbose: + stats_df["num_zeros"] = num_zeros + stats_df["zeros [%]"] = (100.0 * num_zeros / num_rows).apply( + hprint.round_digits + ) + # + num_nans = np.isnan(df).sum(axis=0) + if verbose: + stats_df["num_nans"] = num_nans + stats_df["nans [%]"] = (100.0 * num_nans / num_rows).apply( + hprint.round_digits + ) + # + num_infs = np.isinf(df).sum(axis=0) + if verbose: + stats_df["num_infs"] = num_infs + stats_df["infs [%]"] = (100.0 * num_infs / num_rows).apply( + hprint.round_digits + ) + # + num_valid = df.shape[0] - num_zeros - num_nans - num_infs + if verbose: + stats_df["num_valid"] = num_valid + stats_df["valid [%]"] = (100.0 * num_valid / num_rows).apply( + hprint.round_digits + ) + # + _LOG.log(dbg_log_level, "stats_df=\n%s", stats_df) + return stats_df + + +def pvalue_to_stars(pval: Optional[float]) -> str: + """ + Convert p-value to star notation for statistical significance. + + :param pval: p-value to convert + :return: star notation (* to ****) or ? for non-significant, NA for NaN + """ + if pval is None or np.isnan(pval): + stars = "NA" + else: + hdbg.dassert_lte(0.0, pval) + hdbg.dassert_lte(pval, 1.0) + if pval < 0.005: + # More than 99.5% confidence. + stars = "****" + elif pval < 0.01: + # More than 99% confidence. + stars = "***" + elif pval < 0.05: + # More than 95% confidence. + stars = "**" + elif pval < 0.1: + # More than 90% confidence. + stars = "*" + else: + stars = "?" + return stars + + +def format_ols_regress_results(regr_res: Optional[pd.DataFrame]) -> pd.DataFrame: + """ + Format OLS regression results into a readable DataFrame. + + :param regr_res: regression results dictionary with coeffs, pvals, rsquared, etc. + :return: formatted DataFrame with coefficients and statistics + """ + if regr_res is None: + _LOG.warning("regr_res=None: skipping") + df = pd.DataFrame(None) + return df + row: List[Union[float, str]] = [ + "%.3f (%s)" % (coeff, pvalue_to_stars(pval)) + for (coeff, pval) in zip(regr_res["coeffs"], regr_res["pvals"]) + ] + row.append(float("%.2f" % (regr_res["rsquared"] * 100.0))) + row.append(float("%.2f" % (regr_res["adj_rsquared"] * 100.0))) + col_names = regr_res["param_names"] + ["R^2 [%]", "Adj R^2 [%]"] + df = pd.DataFrame([row], columns=col_names) + return df + + +# ############################################################################# +# Exploratory analysis functions +# ############################################################################# + + +def _get_unique_values_stats(df: pd.DataFrame) -> pd.DataFrame: + """ + Get unique values count and percentage for each column. + + :param df: dataframe to analyze + :return: DataFrame with num_unique and unique [%] columns + """ + stats_df = pd.DataFrame(None, index=df.columns) + num_unique = df.nunique() + stats_df["num_unique"] = num_unique + stats_df["unique [%]"] = (100.0 * num_unique / df.shape[0]).apply( + hprint.round_digits + ) + return stats_df + + +def explore_dataframe( + df: pd.DataFrame, + *, + show_distributions: bool = False, + show_correlations: bool = False, + zero_threshold: float = 1e-9, + dbg_log_level: int = logging.DEBUG, +) -> Optional[pd.DataFrame]: + """ + Perform comprehensive exploratory analysis of a DataFrame. + + Computes data quality metrics (zeros, NaNs, infinities, valid data), + optionally plots distributions of high-variability columns, and + optionally displays a correlation matrix. + + :param df: Input dataframe to analyze + :param show_distributions: If True, plots distributions of top-variability + columns in a 3-column grid + :param show_correlations: If True, displays correlation matrix as a heatmap + :param zero_threshold: Threshold for classifying values as "zero" in + quality report + :return: Statistics DataFrame from report_zero_nan_inf_stats with columns: + num_rows, zeros [%], nans [%], infs [%], valid [%] + """ + import matplotlib.pyplot as plt + from IPython.display import display + + hdbg.dassert_lt(0, len(df), "Dataframe is empty") + # Compute and display data quality statistics. + stats_df = report_zero_nan_inf_stats( + df, zero_threshold=zero_threshold, dbg_log_level=dbg_log_level + ) + # Add information about the number of unique values and percentage of unique values for each column. + unique_stats_df = _get_unique_values_stats(df) + stats_df = pd.concat([stats_df, unique_stats_df], axis=1) + if hsystem.is_running_in_ipynb(): + _LOG.info("stats_df=") + display(stats_df) + _LOG.debug("stats_df=\n%s", stats_df) + # Plot distributions if requested. + if hsystem.is_running_in_ipynb(): + if show_distributions: + _LOG.info("Univariate distributions:") + numeric_cols = df.select_dtypes(include="number").columns.tolist() + if len(numeric_cols) > 0: + # Compute standard deviation and select top columns. + std_vals = df[numeric_cols].std().sort_values(ascending=False) + num_to_plot = len(numeric_cols) + top_cols = std_vals.head(num_to_plot).index.tolist() + # Create grid of subplots. + import helpers.hmatplotlib as hmatplo + + fig, axes = hmatplo.get_multiple_plots( + num_to_plot, 3, y_scale=3.5 + ) + _ = fig + for i, col in enumerate(top_cols): + ax = axes[i] + col_data = df[col].dropna() + weights = np.ones_like(col_data) / len(col_data) * 100 + ax.hist(col_data, bins=30, weights=weights, edgecolor="k") + ax.set_title(col) + ax.set_xlabel("Value") + ax.set_ylabel("Percentage [%]") + plt.tight_layout() + plt.show() + # Display correlation matrix if requested. + if show_correlations: + numeric_df = df.select_dtypes(include="number") + if len(numeric_df.columns) >= 2: + corr_matrix = numeric_df.corr() + _LOG.info("Correlation matrix:") + # TODO(gp): Improve the plot changing the number of digits. + corr_heatmap = heatmap_df(corr_matrix) + display(corr_heatmap) + if hsystem.is_running_in_ipynb(): + return None + return stats_df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py new file mode 100644 index 000000000..6eae1fa57 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py @@ -0,0 +1,1023 @@ +""" +Import as: + +import helpers.hpandas_transform as hpantran +""" + +import csv +import logging +import math +import random +import re +from typing import ( + Any, + Callable, + Collection, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, +) + +import pandas as pd + +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin + +# TODO(ai_gp): Import the file and not the package to avoid cyclic imports. +import helpers.hpandas_conversion as hpanconv +import helpers.hprint as hprint + +_LOG = hloggin.getLogger(__name__) + +# Enable extra verbose debugging. Do not commit. +_TRACE = False + +RowsValues = List[List[str]] + +# ############################################################################# +# Resampling & Time Series Operations +# ############################################################################# + + +def resample_index(index: pd.DatetimeIndex, frequency: str) -> pd.DatetimeIndex: + """ + Resample `DatetimeIndex`. + + :param index: `DatetimeIndex` to resample + :param frequency: frequency from `pd.date_range()` to resample to + :return: resampled `DatetimeIndex` + """ + # Import locally to avoid cyclic import. + import helpers.hpandas_dassert as hpandass + + _LOG.debug(hprint.to_str("index frequency")) + hdbg.dassert_isinstance(index, pd.DatetimeIndex) + hpandass.dassert_unique_index( + index, msg="Index must have only unique values" + ) + min_date = index.min() + max_date = index.max() + _LOG.debug("min_date=%s max_date=%s", min_date, max_date) + # TODO(gp): Preserve the index name. + # index_name = index.name + resampled_index = pd.date_range( + start=min_date, + end=max_date, + freq=frequency, + ) + # Enable detailed debugging. + if False: + if len(resampled_index) > len(index): + # Downsample. + _LOG.debug( + "Index length increased by %s = %s - %s", + len(resampled_index) - len(index), + len(resampled_index), + len(index), + ) + elif len(resampled_index) < len(index): + # Upsample. + _LOG.debug( + "Index length decreased by %s = %s - %s", + len(index) - len(resampled_index), + len(index), + len(resampled_index), + ) + else: + _LOG.debug("Index length=%s has not changed", len(index)) + # resampled_index.name = index_name + return resampled_index + + +def resample_df(df: pd.DataFrame, frequency: str) -> pd.DataFrame: + """ + Resample `DataFrame` by placing NaN in missing locations in the index. + + :param df: `DataFrame` to resample + :param frequency: frequency from `pd.date_range()` to resample to + :return: resampled `DataFrame` + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + # Preserve the index name. + index_name = df.index.name + resampled_index = resample_index(df.index, frequency) + df_reindex = df.reindex(resampled_index) + df_reindex.index.name = index_name + return df_reindex + + +def reindex_on_unix_epoch( + df: pd.DataFrame, in_col_name: str, unit: str = "s" +) -> pd.DataFrame: + """ + Transform the column `in_col_name` into a datetime index. `in_col_name` + contains Unix epoch (e.g., 1638194400) and it is converted into a UTC time. + + :param df: dataframe with a unix epoch + :param in_col_name: column containing unix epoch + :param unit: the unit of unix epoch + """ + # Convert. + temp_col_name = in_col_name + "_tmp" + hdbg.dassert_in(in_col_name, df.columns) + hdbg.dassert_not_in(temp_col_name, df.columns) + # Save. + df[temp_col_name] = pd.to_datetime(df[in_col_name], unit=unit, utc=True) + df.set_index(temp_col_name, inplace=True, drop=True) + df.index.name = None + return df + + +def find_gaps_in_dataframes( + df1: pd.DataFrame, df2: pd.DataFrame +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Find data present in one dataframe and missing in the other one. + + :param df1: first dataframe for comparison + :param df2: second dataframe for comparison + :return: two dataframes with missing data + """ + # Get data present in first, but not present in second dataframe. + first_missing_indices = df2.index.difference(df1.index) + first_missing_data = df2.loc[first_missing_indices] + # Get data present in second, but not present in first dataframe. + second_missing_indices = df1.index.difference(df2.index) + second_missing_data = df1.loc[second_missing_indices] + return first_missing_data, second_missing_data + + +# TODO(Grisha): use this idiom everywhere in the codebase, e.g., in `compare_dfs()`. + + +def find_gaps_in_time_series( + time_series: pd.Series, + start_timestamp: pd.Timestamp, + end_timestamp: pd.Timestamp, + freq: str, +) -> pd.Series: + """ + Find missing points on a time interval specified by [start_timestamp, + end_timestamp], where point distribution is determined by . + + If the passed time series is of a unix epoch format. It is + automatically tranformed to pd.Timestamp. + + :param time_series: time series to find gaps in + :param start_timestamp: start of the time interval to check + :param end_timestamp: end of the time interval to check + :param freq: distance between two data points on the interval. + Aliases correspond to pandas.date_range's freq parameter, i.e. + "S" -> second, "T" -> minute. + :return: pd.Series representing missing points in the source time + series. + """ + _time_series = time_series + if str(time_series.dtype) in ["int32", "int64"]: + _time_series = _time_series.map(hdateti.convert_unix_epoch_to_timestamp) + correct_time_series = pd.date_range( + start=start_timestamp, end=end_timestamp, freq=freq + ) + return correct_time_series.difference(_time_series) + + +# ############################################################################# +# DataFrame Transformation +# ############################################################################# + + +def apply_index_mode( + df1: pd.DataFrame, + df2: pd.DataFrame, + mode: str, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Process DataFrames according to the index mode. + + :param df1: first input df + :param df2: second input df + :param mode: method of processing indices + - "assert_equal": check that both indices are equal, assert otherwise + - "intersect": restrict both dfs to a common index + - "leave_unchanged": ignore any indices mismatch and return dfs as-is + :return: transformed copy of the inputs + """ + # Import locally to avoid cyclic import + import helpers.hpandas_dassert as hpandass + + _LOG.debug("mode=%s", mode) + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert_isinstance(mode, str) + # Copy in order not to modify the inputs. + df1_copy = df1.copy() + df2_copy = df2.copy() + if mode == "assert_equal": + hpandass.dassert_indices_equal(df1_copy, df2_copy) + elif mode == "intersect": + # TODO(Grisha): Add sorting on demand. + common_index = df1_copy.index.intersection(df2_copy.index) + df1_copy = df1_copy[df1_copy.index.isin(common_index)] + df2_copy = df2_copy[df2_copy.index.isin(common_index)] + elif mode == "leave_unchanged": + _LOG.debug( + "Ignoring any index missmatch as per user's request.\n" + "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", + df1_copy.index.difference(df2_copy.index), + df2_copy.index.difference(df1_copy.index), + ) + else: + raise ValueError(f"Unsupported index_mode={mode}") + return df1_copy, df2_copy + + +def apply_columns_mode( + df1: pd.DataFrame, + df2: pd.DataFrame, + mode: str, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Process DataFrames according to the column mode. + + :param df1: first input df + :param df2: second input df + :param mode: method of processing columns + - "assert_equal": check that both dfs have equal columns, assert otherwise + - "intersect": restrict both dfs to only include common columns + - "leave_unchanged": ignore any column mismatches and return dfs as-is + :return: transformed copy of the inputs + """ + # Import locally to avoid cyclic import + import helpers.hpandas_dassert as hpandass + import helpers.hpandas_utils as hpanutil + + _LOG.debug("mode=%s", mode) + # Input validation. + hdbg.dassert_isinstance(df1, pd.DataFrame) + hdbg.dassert_isinstance(df2, pd.DataFrame) + hdbg.dassert_isinstance(mode, str) + # Copy in order not to modify the inputs. + df1_copy = df1.copy() + df2_copy = df2.copy() + if mode == "assert_equal": + # Check if columns are equal or not. + hpandass.dassert_columns_equal(df1_copy, df2_copy) + elif mode == "intersect": + # Filter dataframes based on its common columns. + common_columns = df1_copy.columns.intersection(df2_copy.columns) + df1_copy = df1_copy[common_columns] + df2_copy = df2_copy[common_columns] + # Log the string representation of 2 dfs. + _LOG.debug("df1 after filtering=\n%s", hpanutil.df_to_str(df1)) + _LOG.debug("df2 after filtering=\n%s", hpanutil.df_to_str(df2)) + elif mode == "leave_unchanged": + # Ignore mismatch. + _LOG.debug( + "Ignoring any column missmatch as per user's request.\n" + "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", + df1.columns.difference(df2.columns), + df2.columns.difference(df1.columns), + ) + else: + raise ValueError(f"Unsupported column mode: {mode}") + return df1_copy, df2_copy + + +def trim_df( + df: pd.DataFrame, + ts_col_name: Optional[str], + start_ts: Optional[pd.Timestamp], + end_ts: Optional[pd.Timestamp], + left_close: bool, + right_close: bool, +) -> pd.DataFrame: + """ + Trim the dataframe using values in `ts_col_name`. + + The dataframe is trimmed in the interval bounded by `start_ts` and `end_ts`. + + :param df: the dataframe to trim + :param ts_col_name: the name of the column; `None` means index + :param start_ts: the start boundary for trimming + :param end_ts: the end boundary for trimming + :param left_close: whether to include the start boundary of the interval + - True: [start_ts, ... + - False: (start_ts, ... + :param right_close: whether to include the end boundary of the interval + - True: ..., end_ts] + - False: ..., end_ts) + :return: the trimmed dataframe + """ + if _TRACE: + # Import locally to avoid cyclic import + import helpers.hpandas_utils as hpanutil + + _LOG.trace( + hpanutil.df_to_str( + df, print_dtypes=True, print_shape_info=True, tag="df" + ) + ) + _LOG.debug( + hprint.to_str("ts_col_name start_ts end_ts left_close right_close") + ) + if _TRACE: + # Import locally to avoid cyclic import + import helpers.hpandas_utils as hpanutil + + _LOG.trace("df=\n%s", hpanutil.df_to_str(df)) + if df.empty: + # If the df is empty, there is nothing to trim. + return df + if start_ts is None and end_ts is None: + # If no boundaries are specified, there are no points of reference to trim + # to. + return df + num_rows_before = df.shape[0] + if start_ts is not None and end_ts is not None: + # Confirm that the interval boundaries are valid. + hdateti.dassert_tz_compatible(start_ts, end_ts) + hdbg.dassert_lte(start_ts, end_ts) + # Get the values to filter by. + if ts_col_name is None: + values_to_filter_by = pd.Series(df.index, index=df.index) + else: + hdbg.dassert_in(ts_col_name, df.columns) + values_to_filter_by = df[ts_col_name] + if values_to_filter_by.is_monotonic_increasing: + _LOG.trace("df is monotonic") + # The values are sorted; using the `pd.Series.searchsorted()` method. + # Find the index corresponding to the left boundary of the interval. + if start_ts is not None: + side = "left" if left_close else "right" + left_idx = values_to_filter_by.searchsorted(start_ts, side) + else: + # There is nothing to filter, so the left index is the first one. + left_idx = 0 + _LOG.debug(hprint.to_str("start_ts left_idx")) + # Find the index corresponding to the right boundary of the interval. + if end_ts is not None: + side = "right" if right_close else "left" + right_idx = values_to_filter_by.searchsorted(end_ts, side) + else: + # There is nothing to filter, so the right index is None. + right_idx = df.shape[0] + _LOG.debug(hprint.to_str("end_ts right_idx")) + # + hdbg.dassert_lte(0, left_idx) + hdbg.dassert_lte(left_idx, right_idx) + hdbg.dassert_lte(right_idx, df.shape[0]) + _LOG.debug(hprint.to_str("start_ts left_idx")) + if right_idx < df.shape[0]: + _LOG.debug(hprint.to_str("end_ts right_idx")) + df = df.iloc[left_idx:right_idx] + else: + _LOG.trace("df is not monotonic") + # The values are not sorted; using the `pd.Series.between` method. + if left_close and right_close: + inclusive = "both" + elif left_close: + inclusive = "left" + elif right_close: + inclusive = "right" + else: + inclusive = "neither" + epsilon = pd.DateOffset(minutes=1) + if start_ts is None: + start_ts = values_to_filter_by.min() - epsilon + if end_ts is None: + end_ts = values_to_filter_by.max() + epsilon + df = df[ + values_to_filter_by.between(start_ts, end_ts, inclusive=inclusive) + ] + # Report the changes. + num_rows_after = df.shape[0] + if num_rows_before != num_rows_after: + _LOG.debug( + "Removed %s rows", + hprint.perc(num_rows_before - num_rows_after, num_rows_before), + ) + return df + + +def _assemble_df_rows(rows_values: RowsValues) -> RowsValues: + """ + Organize dataframe values into a column-row structure. + + - Indentation artifacts are removed + - The index placement is handled, i.e. + - if the index is named, the name is located and moved to the same + row as the column names + - if the index is not named, the row with the column names receives + a placeholder empty value in its place + - Empty columns are dropped + + :param rows_values: row values extracted from a string df representation + :return: row values assembled into a valid column-row structure + """ + # Clean up indentation artifacts. + if all(row[0] == "" for row in rows_values): + # Remove the first empty cell in each row. + for row in rows_values: + del row[0] + # If the index is named, its name is located in the second row, + # with an optional extra empty value cell value next to it. + if len(rows_values[1]) == 1 or ( + len(rows_values[1]) == 2 and rows_values[1][1] == "" + ): + # Move the index name to the row with all the column names. + if rows_values[0][0] == "": + rows_values[0][0] = rows_values[1][0] + else: + rows_values[0].insert(0, rows_values[1][0]) + # Drop the former index name row. + del rows_values[1] + else: + # Add an empty cell for the absent index name. + rows_values[0].insert(0, "") + # Identify and remove empty columns. + min_len_row = min(len(row) for row in rows_values) + idxs_to_delete = [] + for i in range(min_len_row): + if all(row[i] == "" for row in rows_values): + idxs_to_delete.append(i) + for idx in idxs_to_delete: + for row in rows_values: + del row[idx] + # Confirm that all the rows have the same number of values. + hdbg.dassert_eq(len({len(row) for row in rows_values}), 1) + return rows_values + + +# TODO(Nina): Add `filter_data_mode`. + + +def str_to_df( + df_as_str: str, + col_to_type: Dict[str, Optional[type]], + col_to_name_type: Dict[str, type], +) -> pd.DataFrame: + """ + Convert a string representation of a dataframe into a Pandas df. + + :param df_as_str: a df as a string + - the format of the string is the same as the output of + `hpandas_utils.df_to_str()` on a pd.DataFrame, e.g. + ``` + col1 col2 col3 col4 + 0 0.1 a None 2020-01-01 + 1 0.2 "b c" None 2021-05-05 + ``` + - values (including column names) that contain spaces need + to be enclosed in double quotation marks, e.g. + "2023-03-15 16:35:41.205000+00:00" + :param col_to_type: a mapping between the column names and the + types of the values in these columns + - if a column is not present in the mapping, its values will + remain strings + - to indicate the type of index values, use {"__index__": ...} + mapping, e.g. {"__index__": pd.Timestamp} + :param col_to_name_type: a mapping between the column names and + the required types of these column names + - same conventions apply as for `col_to_type` (see above) + :return: a converted Pandas dataframe + """ + # Separate the rows. + rows = df_as_str.split("\n") + # Clean up extra spaces. + rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] + # Identify individual values in the rows. + rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) + # Remove the placeholder ["..."] row. + rows_values = [row for row in rows_values if row != ["..."]] + # Organize values into a proper column-row structure. + rows_values = _assemble_df_rows(rows_values) + # Get the column names. + column_names = rows_values[0][1:] + # Get the index. + index_values = [row[0] for row in rows_values[1:]] + index_name = rows_values[0][0] + # Construct the df. + df = pd.DataFrame( + [row[1:] for row in rows_values[1:]], + columns=column_names, + index=index_values, + ) + if index_name != "": + df.index.name = index_name + # Cast the columns into appropriate types. + # Import locally to avoid cyclic import + import helpers.hpandas_conversion as hpanconv + + for col, col_type in col_to_type.items(): + if col == "__index__": + df.index = hpanconv.cast_series_to_type(df.index, col_type) + else: + df[col] = hpanconv.cast_series_to_type(df[col], col_type) + # Cast the column names into appropriate types. + for col, col_name_type in col_to_name_type.items(): + if col == "__index__": + df.index = df.index.rename(col_name_type(df.index.name)) + else: + df = df.rename(columns={col: col_name_type(col)}) + return df + + +# ############################################################################# +# Column Operations +# ############################################################################# + + +def check_and_filter_matching_columns( + df: pd.DataFrame, required_columns: List[str], filter_data_mode: str +) -> pd.DataFrame: + """ + Check that columns are the required ones and if not filter data depending + on `filter_data_mode`. + + :param df: data to check columns for + :param required_columns: columns to return, skipping columns that are not required + :param filter_data_mode: control behaviour with respect to extra or missing columns + - "assert": raise an error if required columns do not match received columns + - "warn_and_trim": return the intersection of required and received columns and + issue a warning + :return: input data as it is if required columns match received columns otherwise + processed data, see `filter_data_mode` + """ + received_columns = df.columns.to_list() + hdbg.dassert_lte(1, len(received_columns)) + # + if filter_data_mode == "assert": + # Raise an assertion. + only_warning = False + elif filter_data_mode == "warn_and_trim": + # Just issue a warning. + only_warning = True + # Get columns intersection while preserving the order of the columns. + columns_intersection = [ + col_name + for col_name in required_columns + if col_name in received_columns + ] + hdbg.dassert_lte(1, len(columns_intersection)) + df = df[columns_intersection] + else: + raise ValueError(f"Invalid filter_data_mode='{filter_data_mode}'") + hdbg.dassert_set_eq( + required_columns, + received_columns, + only_warning=only_warning, + msg="Received columns do not match required columns.", + ) + return df + + +# TODO(Grisha): finish the function. +# TODO(Grisha): merge with the one in `dataflow.model.correlation.py`? + + +# ############################################################################# +# Merge +# ############################################################################# + + +def merge_dfs( + df1: pd.DataFrame, + df2: pd.DataFrame, + threshold_col_name: str, + *, + threshold: float = 0.9, + intersecting_columns: Optional[List[str]] = None, + **pd_merge_kwargs: Any, +) -> pd.DataFrame: + """ + Wrap `pd.merge`. + + :param threshold_col_name: a column's name to check the minimum + overlap on + :param threshold: minimum overlap of unique values in a specified + column to perform the merge + :param intersecting_columns: allow certain columns to appear in both + dataframes; store both in the resulting df with corresponding + suffixes + """ + _LOG.debug( + hprint.to_str( + "threshold_col_name threshold intersecting_columns pd_merge_kwargs" + ) + ) + # Sanity check column types. + threshold_col1 = df1[threshold_col_name] + threshold_col2 = df2[threshold_col_name] + only_first_elem = False + hdbg.dassert_array_has_same_type_element( + threshold_col1, threshold_col2, only_first_elem + ) + # TODO(Grisha): @Dan Implement asserts for each asset id. + # Check that an overlap of unique values is above the specified threshold. + threshold_unique_values1 = set(threshold_col1) + threshold_unique_values2 = set(threshold_col2) + threshold_common_values = set(threshold_unique_values1) & set( + threshold_unique_values2 + ) + threshold_common_values_share1 = len(threshold_common_values) / len( + threshold_unique_values1 + ) + threshold_common_values_share2 = len(threshold_common_values) / len( + threshold_unique_values2 + ) + hdbg.dassert_lte(threshold, threshold_common_values_share1) + hdbg.dassert_lte(threshold, threshold_common_values_share2) + # Use an empty set instead of None to perform set difference further. + intersecting_columns_set = ( + set() if intersecting_columns is None else set(intersecting_columns) + ) + # Check that there are no common columns except for the ones in `intersecting_columns`. + df1_cols = ( + set(df1.columns.to_list()) + - set(pd_merge_kwargs["on"]) + - intersecting_columns_set + ) + df2_cols = ( + set(df2.columns.to_list()) + - set(pd_merge_kwargs["on"]) + - intersecting_columns_set + ) + hdbg.dassert_not_intersection(df1_cols, df2_cols) + # + res_df = df1.merge(df2, **pd_merge_kwargs) + return res_df + + +# TODO(gp): Is this (ironically) a duplicate of drop_duplicates? + + +def get_df_from_iterator( + iter_: Iterator[pd.DataFrame], + *, + sort_index: bool = True, +) -> pd.DataFrame: + """ + Concat all the dataframes in the iterator in one dataframe. + + :param iter_: dataframe iterator + :param sort_index: whether to sort output index or not + :return: combined iterator data + """ + # TODO(gp): @all make a copy of `iter_` so we don't consume it. + dfs = list(iter_) + df_res = pd.concat(dfs) + if sort_index: + df_res = df_res.sort_index() + return df_res + + +# ############################################################################# +# Filter +# ############################################################################# + + +def subset_df(df: pd.DataFrame, nrows: int, seed: int = 42) -> pd.DataFrame: + """ + Remove N rows from the input data and shuffle the remaining ones. + + :param df: input data + :param nrows: the number of rows to remove from the original data + :param seed: see `random.seed()` + :return: shuffled data with removed rows + """ + hdbg.dassert_lte(1, nrows) + hdbg.dassert_lte(nrows, df.shape[0]) + idx = list(range(df.shape[0])) + random.seed(seed) + random.shuffle(idx) + idx = sorted(idx[nrows:]) + return df.iloc[idx] + + +def filter_df( + df: pd.DataFrame, + col_name: str, + value: Any, + *, + invert: bool = False, + check_value: bool = True, + # TODO(gp): -> verbose + print_info: bool = True, +) -> pd.DataFrame: + """ + Filter a dataframe based on a column value. + + :param df: dataframe to filter + :param col_name: column name to filter on + :param value: value to filter on + :param invert: whether to invert the filter + :param check_value: whether to check that the value is in the column + :param print_info: whether to print information about the filter + :return: filtered dataframe + """ + hdbg.dassert_in(col_name, df.columns) + if isinstance(value, list): + mask = df[col_name].isin(value) + else: + if check_value: + hdbg.dassert_in(value, df[col_name].unique()) + mask = df[col_name] == value + if invert: + mask = ~mask + if print_info: + _LOG.info("selected=%s", hprint.perc(mask.sum(), df.shape[0])) + return df[mask] + + +def remove_empty_columns( + df: pd.DataFrame, *, verbose: bool = True +) -> pd.DataFrame: + """ + Remove empty columns from a dataframe. + + :param df: dataframe to remove empty columns from + :return: dataframe with empty columns removed + """ + mask = df.apply(lambda col: col.notna() & (col != "")).any() + non_empty_columns = df.columns[mask] + empty_columns = df.columns[~mask] + if verbose: + _LOG.info( + "kept %s columns: %s", + hprint.perc(len(non_empty_columns), len(df.columns)), + hprint.list_to_str(non_empty_columns), + ) + _LOG.info( + "removed %s columns: %s", + hprint.perc(len(empty_columns), len(df.columns)), + hprint.list_to_str(empty_columns), + ) + df = df[non_empty_columns] + return df + + +def remove_stable_columns( + df: pd.DataFrame, *, threshold: float = 0.9, verbose: bool = True +) -> pd.DataFrame: + """ + Remove columns from a dataframe that have less than threshold unique + values. + + :param df: dataframe to remove stable columns from + :param threshold: threshold for the percentage of stable columns to + remove + :return: dataframe with stable columns removed + """ + high_variability_columns = [] + for col in df.columns: + unique_values = df[col].unique() + if len(unique_values) / len(df) >= threshold: + high_variability_columns.append(col) + # Compute the columns to remove. + columns_to_remove = df.columns[~df.columns.isin(high_variability_columns)] + if verbose: + _LOG.info( + "kept %s columns: %s", + hprint.perc(len(high_variability_columns), len(df.columns)), + hprint.list_to_str(high_variability_columns), + ) + _LOG.info( + "removed %s columns: %s", + hprint.perc(len(columns_to_remove), len(df.columns)), + hprint.list_to_str(columns_to_remove), + ) + df = df[high_variability_columns] + return df + + +def adapt_to_series(f: Callable) -> Callable: + """ + Extend a function working on dataframes so that it can work on series. + """ + + def wrapper( + obj: Union[pd.Series, pd.DataFrame], *args: Any, **kwargs: Any + ) -> Any: + # Convert a pd.Series to a pd.DataFrame. + was_series = False + if isinstance(obj, pd.Series): + obj = pd.DataFrame(obj) + was_series = True + hdbg.dassert_isinstance(obj, pd.DataFrame) + # Apply the function. + res = f(obj, *args, **kwargs) + # Transform the output, if needed. + if was_series: + if isinstance(res, tuple): + res_obj, res_tmp = res[0], res[1:] + res_obj_srs = hpanconv.to_series(res_obj) + res_obj_srs = [res_obj_srs] + res_obj_srs.extend(res_tmp) + res = tuple(res_obj_srs) + else: + res = hpanconv.to_series(res) + return res + + return wrapper + + +# ############################################################################# + + +def add_pct( + df: pd.DataFrame, + col_name: str, + total: int, + dst_col_name: str, + num_digits: int = 2, + use_thousands_separator: bool = True, +) -> pd.DataFrame: + """ + Add to df a column "dst_col_name" storing the percentage of values in + column "col_name" with respect to "total". The rest of the parameters are + the same as hprint.round_digits(). + + :return: updated df + """ + # Add column with percentage right after col_name. + pos_col_name = df.columns.tolist().index(col_name) + df.insert(pos_col_name + 1, dst_col_name, (100.0 * df[col_name]) / total) + # Format. + df[col_name] = [ + hprint.round_digits( + v, num_digits=None, use_thousands_separator=use_thousands_separator + ) + for v in df[col_name] + ] + df[dst_col_name] = [ + hprint.round_digits( + v, num_digits=num_digits, use_thousands_separator=False + ) + for v in df[dst_col_name] + ] + return df + + +# ############################################################################# + + +def remove_columns( + df: pd.DataFrame, cols: Collection[str], log_level: int = logging.DEBUG +) -> pd.DataFrame: + """ + Remove specified columns from a dataframe. + + :param df: dataframe to remove columns from + :param cols: collection of column names to remove + :param log_level: logging level for reporting removed columns + :return: dataframe with specified columns removed + """ + to_remove = set(cols).intersection(set(df.columns)) + _LOG.log(log_level, "to_remove=%s", hprint.list_to_str(to_remove)) + df.drop(to_remove, axis=1, inplace=True) + _LOG.debug("df=\n%s", df.head(3)) + _LOG.log(log_level, hprint.list_to_str(df.columns)) + return df + + +def filter_with_df( + df: pd.DataFrame, filter_df: pd.DataFrame, log_level: int = logging.DEBUG +) -> pd.Series: + """ + Compute a mask for DataFrame df using common columns and values in + "filter_df". + """ + mask = None + for c in filter_df: + hdbg.dassert_in(c, df.columns) + vals = filter_df[c].unique() + if mask is None: + mask = df[c].isin(vals) + else: + mask &= df[c].isin(vals) + mask: pd.DataFrame + _LOG.log(log_level, "after filter=%s", hprint.perc(mask.sum(), len(mask))) + return mask + + +def filter_by_time( + df: pd.DataFrame, + lower_bound: hdateti.StrictDatetime, + upper_bound: hdateti.StrictDatetime, + inclusive: str, + ts_col_name: Optional[str], + log_level: int = logging.DEBUG, +) -> pd.DataFrame: + """ + Filter data by time between `lower_bound` and `upper_bound`. + + Pass `None` to `ts_col_name` to filter by `DatetimeIndex`. + + :param df: data to filter + :param lower_bound: left limit point of the time interval + :param upper_bound: right limit point of the time interval + :param inclusive: include boundaries + - "both": `[lower_bound, upper_bound]` + - "neither": `(lower_bound, upper_bound)` + - "right": `(lower_bound, upper_bound]` + - "left": `[lower_bound, upper_bound)` + :param ts_col_name: name of a timestamp column to filter with, or None to + use the DatetimeIndex + :param log_level: the level of logging, e.g. `DEBUG` + :return: dataframe filtered by time + """ + hdateti.dassert_is_strict_datetime(lower_bound) + hdateti.dassert_is_strict_datetime(upper_bound) + # Time filtering is not working if timezones are different. + hdateti.dassert_tz_compatible_timestamp_with_df(lower_bound, df, ts_col_name) + hdateti.dassert_tz_compatible_timestamp_with_df(upper_bound, df, ts_col_name) + # + if ts_col_name is None: + # Filter data by index. + hdbg.dassert_isinstance(df.index, pd.DatetimeIndex) + # Cast index to `pd.Series` to use the `between` method. + mask = df.index.to_series().between(lower_bound, upper_bound, inclusive) + else: + # Filter data by a specified column. + hdbg.dassert_in(ts_col_name, df.columns) + mask = df[ts_col_name].between(lower_bound, upper_bound, inclusive) + # + _LOG.log( + log_level, + "Filtering between %s and %s with inclusive=`%s`, selected rows=%s", + lower_bound, + upper_bound, + inclusive, + hprint.perc(mask.sum(), df.shape[0]), + ) + return df[mask] + + +def filter_by_val( + df: pd.DataFrame, + col_name: str, + min_val: float, + max_val: float, + use_thousands_separator: bool = True, + log_level: int = logging.DEBUG, +) -> pd.DataFrame: + """ + Filter out rows of df where df[col_name] is not in [min_val, max_val]. + """ + # TODO(gp): If column is ordered, this can be done more efficiently with + # binary search. + num_rows = df.shape[0] + if min_val is not None and max_val is not None: + hdbg.dassert_lte(min_val, max_val) + mask = None + if min_val is not None: + mask = min_val <= df[col_name] + if max_val is not None: + mask2 = df[col_name] <= max_val + if mask is None: + mask = mask2 + else: + mask &= mask2 + res = df[mask] + hdbg.dassert_lt(0, res.shape[0]) + _LOG.log( + log_level, + "Rows kept %s, removed %s rows", + hprint.perc( + res.shape[0], + num_rows, + use_thousands_separator=use_thousands_separator, + ), + hprint.perc( + num_rows - res.shape[0], + num_rows, + use_thousands_separator=use_thousands_separator, + ), + ) + return res + + +# ############################################################################# +# PCA +# ############################################################################# + + +def sample_rolling_df( + rolling_df: pd.DataFrame, periods: int +) -> Tuple[pd.DataFrame, pd.DatetimeIndex]: + """ + Given a rolling metric stored as multiindex (e.g., correlation computed by + pd.ewm) sample `periods` equispaced samples. + + :return: sampled df, array of timestamps selected + """ + timestamps = rolling_df.index.get_level_values(0) + ts = timestamps[:: math.ceil(len(timestamps) / periods)] + _LOG.debug("timestamps=%s", str(ts)) + # rolling_df_out = rolling_df.unstack().reindex(ts).stack(dropna=False) + rolling_df_out = rolling_df.loc[ts] + return rolling_df_out, ts diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py new file mode 100644 index 000000000..aaacb290a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py @@ -0,0 +1,649 @@ +""" +Import as: + +import helpers.hpandas_utils as hpanutil +""" + +import logging +from typing import Any, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd +import tqdm.autonotebook as tauton + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = hloggin.getLogger(__name__) + +# Import add_pct for use in this module. + + +# TODO(gp): -> AxisNameSet +ColumnSet = Optional[Union[str, List[str]]] + + +# ############################################################################# + + +# TODO(gp): Maybe we can have a `_LOG_df_to_str(log_level, *args, **kwargs)` that +# calls `_LOG.log(log_level, hpandas.df_to_str(*args, **kwargs, log_level=log_level))`. +# TODO(gp): We should make sure this works properly in a notebook, although +# it's not easy to unit test. + + +def _display(log_level: int, df: pd.DataFrame) -> None: + """ + Display a dataframe in a notebook at the given log level. + + The behavior is similar to a command like `_LOG.log(log_level, ...)` but + for a notebook `display` command. + + :param log_level: log level at which to display a df. E.g., if `log_level = + logging.DEBUG`, then we display the df only if we are running with + `-v DEBUG`. If `log_level = logging.INFO` then we don't display it + :param df: dataframe to display + """ + from IPython.display import display + + if ( + hsystem.is_running_in_ipynb() + and log_level >= hdbg.get_logger_verbosity() + ): + display(df) + + +def _df_to_str( + df: pd.DataFrame, + num_rows: Optional[int], + max_columns: int, + max_colwidth: int, + max_rows: int, + precision: int, + display_width: int, + use_tabulate: bool, + log_level: int, +) -> str: + """ + Convert a DataFrame to a string representation. + + :param df: The DataFrame to convert to a string. + :param num_rows: The number of rows to display. + :param max_columns: The maximum number of columns to display. + :param max_colwidth: The maximum width of each column. + :param max_rows: The maximum number of rows to display. + :param precision: The precision of the numbers. + :param display_width: The width of the display. + :param use_tabulate: Whether to use the tabulate library to format + the DataFrame. + :param log_level: The log level to use. + :return: A string representation of the DataFrame. + """ + is_in_ipynb = hsystem.is_running_in_ipynb() + out = [] + # Set dataframe print options. + with pd.option_context( + "display.max_colwidth", + max_colwidth, + # "display.height", 1000, + "display.max_rows", + max_rows, + "display.precision", + precision, + "display.max_columns", + max_columns, + "display.width", + display_width, + ): + if use_tabulate: + import tabulate + + out.append(tabulate.tabulate(df, headers="keys", tablefmt="psql")) + # TODO(Grisha): Add an option to display all rows since if `num_rows` + # is `None`, only first and last 5 rows are displayed. Consider using + # `df.to_string()` instead of `str(df)`. + if num_rows is None or df.shape[0] <= num_rows: + # Print the entire data frame. + if not is_in_ipynb: + out.append(str(df)) + else: + # Display dataframe. + _display(log_level, df) + else: + nr = num_rows // 2 + if not is_in_ipynb: + # Print top and bottom of df. + out.append(str(df.head(nr))) + out.append("...") + tail_str = str(df.tail(nr)) + # Remove index and columns from tail_df. + skipped_rows = 1 + if df.index.name: + skipped_rows += 1 + tail_str = "\n".join(tail_str.split("\n")[skipped_rows:]) + out.append(tail_str) + else: + # TODO(gp): @all use this approach also above and update all the + # unit tests. + df = [ + df.head(nr), + pd.DataFrame( + [["..."] * df.shape[1]], index=[" "], columns=df.columns + ), + df.tail(nr), + ] + df = pd.concat(df) + # Display dataframe. + _display(log_level, df) + if not is_in_ipynb: + txt = "\n".join(out) + else: + txt = "" + return txt + + +def _report_srs_stats(srs: pd.Series) -> List[Any]: + """ + Report dtype, the first element, and its type of series. + + :param srs: The series to report the stats of. + :return: A list of the stats. + """ + row: List[Any] = [] + first_elem = srs.values[0] + num_unique = srs.nunique() + num_nans = srs.isna().sum() + row.extend( + [ + srs.dtype, + hprint.perc(num_unique, len(srs)), + hprint.perc(num_nans, len(srs)), + first_elem, + type(first_elem), + ] + ) + return row + + +def df_to_str( + df: Union[pd.DataFrame, pd.Series, pd.Index], + *, + # TODO(gp): Remove this hack in the integration. + # handle_signed_zeros: bool = False, + handle_signed_zeros: bool = True, + num_rows: Optional[int] = 6, + print_dtypes: bool = False, + print_shape_info: bool = False, + print_nan_info: bool = False, + print_memory_usage: bool = False, + memory_usage_mode: str = "human_readable", + tag: Optional[str] = None, + max_columns: int = 10000, + max_colwidth: int = 2000, + max_rows: int = 500, + precision: int = 6, + display_width: int = 10000, + use_tabulate: bool = False, + log_level: int = logging.DEBUG, +) -> str: + """ + Print a dataframe to string reporting all the columns without trimming. + + Note that code like: `_LOG.info(hpandas.df_to_str(df, num_rows=3))` works + properly when called from outside a notebook, i.e., the dataframe is printed + But it won't display the dataframe in a notebook, since the default level at + which the dataframe is displayed is `logging.DEBUG`. + + In this case to get the correct behavior one should do: + ``` + log_level = ... + _LOG.log(log_level, hpandas.df_to_str(df, num_rows=3, log_level=log_level)) + ``` + + :param: handle_signed_zeros: convert `-0.0` to `0.0` + :param: num_rows: max number of rows to print (half from the top and half from + the bottom of the dataframe) + - `None` to print the entire dataframe + :param print_dtypes: report dataframe types and information about the type of + each column by looking at the first value + :param print_shape_info: report dataframe shape, index and columns + :param print_memory_usage: report memory use for each + """ + if df is None: + return "" + if isinstance(df, pd.Series): + df = pd.DataFrame(df) + elif isinstance(df, pd.Index): + df = df.to_frame(index=False) + hdbg.dassert_isinstance(df, pd.DataFrame) + # Convert "negative zeros" to `0.0`. + df = df.copy() + if handle_signed_zeros: + for col_name in df.select_dtypes(include=[np.float64, float]).columns: + df[col_name] = df[col_name].where(df[col_name] != -0.0, 0.0) + out = [] + # Print the tag. + if tag is not None: + out.append(f"# {tag}=") + if not df.empty: + # Print information about the shape and index. + # TODO(Nikola): Revisit and rename print_shape_info to print_axes_info + if print_shape_info: + # TODO(gp): Unfortunately we can't improve this part of the output + # since there are many golden inside the code that would need to be + # updated. Consider automating updating the expected values in the code. + txt = f"index=[{df.index.min()}, {df.index.max()}]" + out.append(txt) + txt = f"columns={','.join(map(str, df.columns))}" + out.append(txt) + txt = f"shape={str(df.shape)}" + out.append(txt) + # Print information about the types. + if print_dtypes: + out.append("* type=") + table = [] + row = [] + col_name = "index" + row.append(col_name) + row.extend(_report_srs_stats(df.index)) + row = map(str, row) + table.append(row) + for col_name in df.columns: + row_: List[Any] = [] + row_.append(col_name) + row_.extend(_report_srs_stats(df[col_name])) + row_ = map(str, row_) + table.append(row_) + # + columns = [ + "col_name", + "dtype", + "num_unique", + "num_nans", + "first_elem", + "type(first_elem)", + ] + df_stats = pd.DataFrame(table, columns=columns) + stats_num_rows = None + df_stats_as_str = _df_to_str( + df_stats, + stats_num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + out.append(df_stats_as_str) + # Print info about memory usage. + if print_memory_usage: + out.append("* memory=") + mem_use_df = pd.concat( + [df.memory_usage(deep=False), df.memory_usage(deep=True)], + axis=1, + keys=["shallow", "deep"], + ) + # Add total row. + mem_use_df_total = pd.DataFrame({"total": mem_use_df.sum(axis=0)}) + mem_use_df = pd.concat([mem_use_df, mem_use_df_total.T]) + # Convert into the desired format. + if memory_usage_mode == "bytes": + pass + elif memory_usage_mode == "human_readable": + import helpers.hintrospection as hintros + + mem_use_df = mem_use_df.applymap(hintros.format_size) + else: + raise ValueError( + f"Invalid memory_usage_mode='{memory_usage_mode}'" + ) + memory_num_rows = None + memory_usage_as_txt = _df_to_str( + mem_use_df, + memory_num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + out.append(memory_usage_as_txt) + # Print info about nans. + if print_nan_info: + num_elems = df.shape[0] * df.shape[1] + num_nans = df.isna().sum().sum() + txt = f"num_nans={hprint.perc(num_nans, num_elems)}" + out.append(txt) + # + num_zeros = df.isnull().sum().sum() + txt = f"num_zeros={hprint.perc(num_zeros, num_elems)}" + out.append(txt) + # TODO(gp): np can't do isinf on objects like strings. + # num_infinite = np.isinf(df).sum().sum() + # txt = "num_infinite=" + hprint.perc(num_infinite, num_elems) + # out.append(txt) + # + num_nan_rows = df.dropna().shape[0] + txt = f"num_nan_rows={hprint.perc(num_nan_rows, num_elems)}" + out.append(txt) + # + num_nan_cols = df.dropna(axis=1).shape[1] + txt = f"num_nan_cols={hprint.perc(num_nan_cols, num_elems)}" + out.append(txt) + if hsystem.is_running_in_ipynb(): + if len(out) > 0 and log_level >= hdbg.get_logger_verbosity(): + print("\n".join(out)) + txt = None + # Print the df. + df_as_str = _df_to_str( + df, + num_rows, + max_columns, + max_colwidth, + max_rows, + precision, + display_width, + use_tabulate, + log_level, + ) + if not hsystem.is_running_in_ipynb(): + out.append(df_as_str) + txt = "\n".join(out) + return txt + + +# ############################################################################# + + +def head( + df: pd.DataFrame, + *, + print_columns: bool = False, + num_rows: int = 2, + seed: Union[int, None] = None, +) -> str: + """ + Display a sample of rows from a DataFrame. + + By default shows the first `num_rows` rows. When a seed is provided, + randomly samples `num_rows` rows instead. + + :param df: The DataFrame to sample from. + :param num_rows: Number of rows to display. + :param seed: Optional random seed for reproducible sampling. If None, shows + first rows. + """ + txt = "" + if print_columns: + txt += "columns=%s\n" % ",".join(df.columns.tolist()) + txt += "shape=%s\n" % str(df.shape) + # + if seed is not None: + np.random.seed(seed) + index = np.random.choice(df.index, num_rows, replace=False) + index = sorted(index) + df = df.loc[index] + else: + df = df.head(num_rows) + with pd.option_context( + "display.width", + 200, + "display.max_columns", + None, + "display.max_colwidth", + None, + ): + txt += "\n" + str(df) + return txt + + +# ############################################################################# + + +def resolve_column_names( + column_set: ColumnSet, + columns: Union[List[str], pd.Index], + *, + keep_order: bool = False, +) -> List[str]: + """ + Change format of the columns and perform some sanity checks. + + :param column_set: columns to proceed + :param columns: all columns available + :param keep_order: preserve the original order or allow sorting + """ + # Ensure that `columns` is well-formed. + if isinstance(columns, pd.Index): + columns = columns.to_list() + hdbg.dassert_isinstance(columns, list) + hdbg.dassert_lte(1, len(columns)) + # + if column_set is None: + # Columns were not specified, thus use the list of all the columns. + column_set = columns + else: + if isinstance(column_set, str): + column_set = [column_set] + hdbg.dassert_isinstance(column_set, list) + hdbg.dassert_lte(1, len(column_set)) + hdbg.dassert_is_subset(column_set, columns) + if keep_order: + # Keep the selected columns in the same order as in the original + # `columns`. + column_set = [c for c in columns if c in column_set] + return column_set + + +def _get_unique_elements_in_column(df: pd.DataFrame, col_name: str) -> List[Any]: + """ + Get unique elements in a column, handling unhashable types. + + :param df: dataframe containing the column + :param col_name: name of the column to get unique elements from + :return: list of unique elements + """ + try: + vals = df[col_name].unique() + except TypeError: + # TypeError: unhashable type: 'list' + _LOG.error("Column '%s' has unhashable types", col_name) + vals = list(set(map(str, df[col_name]))) + cast(List[Any], vals) + return vals + + +def _get_variable_cols( + df: pd.DataFrame, threshold: int = 1 +) -> Tuple[List[str], List[str]]: + """ + Return columns of a df that contain less than unique values. + + :return: (variable columns, constant columns) + """ + var_cols = [] + const_cols = [] + for col_name in df.columns: + unique_elems = _get_unique_elements_in_column(df, col_name) + num_unique_elems = len(unique_elems) + if num_unique_elems <= threshold: + const_cols.append(col_name) + else: + var_cols.append(col_name) + return var_cols, const_cols + + +def remove_columns_with_low_variability( + df: pd.DataFrame, threshold: int = 1, log_level: int = logging.DEBUG +) -> pd.DataFrame: + """ + Remove columns of a df that contain less than unique values. + + :return: df with only columns with sufficient variability + """ + var_cols, const_cols = _get_variable_cols(df, threshold=threshold) + _LOG.log(log_level, "# Constant cols") + for col_name in const_cols: + unique_elems = _get_unique_elements_in_column(df, col_name) + _LOG.log( + log_level, + " %s: %s", + col_name, + hprint.list_to_str(list(map(str, unique_elems))), + ) + _LOG.log(log_level, "# Var cols") + _LOG.log(log_level, hprint.list_to_str(var_cols)) + return df[var_cols] + + +# Start copy-paste From helpers/hpandas_transform.py + + +def add_pct( + df: pd.DataFrame, + col_name: str, + total: int, + dst_col_name: str, + num_digits: int = 2, + use_thousands_separator: bool = True, +) -> pd.DataFrame: + """ + Add to df a column "dst_col_name" storing the percentage of values in + column "col_name" with respect to "total". The rest of the parameters are + the same as hprint.round_digits(). + + :return: updated df + """ + # Add column with percentage right after col_name. + pos_col_name = df.columns.tolist().index(col_name) + df.insert(pos_col_name + 1, dst_col_name, (100.0 * df[col_name]) / total) + # Format. + df[col_name] = [ + hprint.round_digits( + v, num_digits=None, use_thousands_separator=use_thousands_separator + ) + for v in df[col_name] + ] + df[dst_col_name] = [ + hprint.round_digits( + v, num_digits=num_digits, use_thousands_separator=False + ) + for v in df[dst_col_name] + ] + return df + + +# End copy-paste. + + +def print_column_variability( + df: pd.DataFrame, + max_num_vals: int = 3, + num_digits: int = 2, + use_thousands_separator: bool = True, +) -> pd.DataFrame: + """ + Print statistics about the values in each column of a data frame. + + This is useful to get a sense of which columns are interesting. + """ + print(("# df.columns=%s" % hprint.list_to_str(df.columns))) + res = [] + for c in tauton.tqdm(df.columns, desc="Computing column variability"): + vals = _get_unique_elements_in_column(df, c) + try: + min_val = min(vals) + except TypeError as e: + _LOG.debug("Column='%s' reported %s", c, e) + min_val = "nan" + try: + max_val = max(vals) + except TypeError as e: + _LOG.debug("Column='%s' reported %s", c, e) + max_val = "nan" + if len(vals) <= max_num_vals: + txt = ", ".join(map(str, vals)) + else: + txt = ", ".join(map(str, [min_val, "...", max_val])) + row = ["%20s" % c, len(vals), txt] + res.append(row) + res = pd.DataFrame(res, columns=["col_name", "num", "elems"]) + res.sort_values("num", inplace=True) + # TODO(gp): Fix this. + # res = add_count_as_idx(res) + res = add_pct( + res, + "num", + df.shape[0], + "[diff %]", + num_digits=num_digits, + use_thousands_separator=use_thousands_separator, + ) + res.reset_index(drop=True, inplace=True) + return res + + +def breakdown_table( + df: pd.DataFrame, + col_name: str, + num_digits: int = 2, + use_thousands_separator: bool = True, + verbosity: bool = False, +) -> pd.DataFrame: + """ + Create a breakdown table showing value counts and percentages for a column. + + :param df: dataframe to analyze + :param col_name: column name to create breakdown for + :param num_digits: number of decimal digits for percentages + :param use_thousands_separator: whether to use thousands separator + in counts + :param verbosity: whether to print additional details + :return: breakdown table with counts and percentages + """ + if isinstance(col_name, list): + for c in col_name: + print(("\n" + hprint.frame(c).rstrip("\n"))) + res = breakdown_table(df, c) + print(res) + return None + # + if verbosity: + print(("# col_name=%s" % col_name)) + first_col_name = df.columns[0] + res = df.groupby(col_name)[first_col_name].count() + res = pd.DataFrame(res) + res.columns = ["count"] + res.sort_values(["count"], ascending=False, inplace=True) + res = pd.concat( + [res, pd.DataFrame([df.shape[0]], index=["Total"], columns=["count"])] + ) + res["pct"] = (100.0 * res["count"]) / df.shape[0] + # Format. + res["count"] = [ + hprint.round_digits( + v, num_digits=None, use_thousands_separator=use_thousands_separator + ) + for v in res["count"] + ] + res["pct"] = [ + hprint.round_digits( + v, num_digits=num_digits, use_thousands_separator=False + ) + for v in res["pct"] + ] + if verbosity: + for k, df_tmp in df.groupby(col_name): + print((hprint.frame("%s=%s" % (col_name, k)))) + cols = [col_name, "description"] + with pd.option_context( + "display.max_colwidth", 100000, "display.width", 130 + ): + print((df_tmp[cols])) + return res diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py new file mode 100644 index 000000000..319c6cf44 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py @@ -0,0 +1,1309 @@ +""" +Import as: + +import helpers.hparquet as hparque +""" + +import collections +import datetime +import glob +import logging +import os +from typing import Any, Callable, Iterator, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.fs as pafs +import pyarrow.parquet as pq + +# Check if S3FileSystem is available in `pyarrow.fs`. +if hasattr(pafs, "S3FileSystem"): + S3FileSystemAvailable = True + PyArrowS3FileSystem = pafs.S3FileSystem +else: + S3FileSystemAvailable = False + + # Define a dummy class for type hints when S3FileSystem is not available. + class PyArrowS3FileSystem: + def __init__(self, *args, **kwargs): + raise ImportError( + "S3FileSystem is not available in this version of pyarrow.fs" + ) + + +from tqdm.autonotebook import tqdm + +import helpers.hdataframe as hdatafr +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hpandas as hpandas +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# ParquetDataFrameGenerator +# ############################################################################# + + +class ParquetDataFrameGenerator: + # Allowed types. + OUTPUT_TYPES = ("basic", "verbose_open", "cm_task_1103") + + @staticmethod + def _wrap_all_assets_df(df: List[pd.DataFrame]) -> pd.DataFrame: + # Create a single dataframe for all the assets. + df = pd.concat(df) + _LOG.debug(hpandas.df_to_str(df, print_shape_info=True, tag="df")) + return df + + def _get_core_dataframes(self) -> List[pd.DataFrame]: + """ + Create core dataframes that are updated according to the output type. + + :return: list of core dataframes for specified assets with string values + Example: + + ``` + asset + 2000-01-01 A + 2000-01-02 A + 2000-01-03 A + ``` + """ + # Generate core dataframe for each asset. + df = [] + for asset in self._assets: + asset_df = pd.DataFrame( + {self._asset_col_name: asset}, + index=self._dataframe_index, + ) + _LOG.debug( + hpandas.df_to_str( + asset_df, print_shape_info=True, tag="asset_df" + ) + ) + df.append(asset_df) + return df + + def _get_daily_basic_dataframe(self) -> pd.DataFrame: + """ + Update core dataframes with additional columns. + + :return: updated core dataframe as presented below + Example: + + ``` + idx asset val1 val2 + 2000-01-01 0 A 00 00 + 2000-01-02 0 A 01 01 + 2000-01-03 0 A 02 02 + ``` + """ + asset_dataframes = self._get_core_dataframes() + for idx, asset_dataframe in enumerate(asset_dataframes): + # Positioned left from `asset` column. + asset_dataframe.insert(loc=0, column="idx", value=idx) + # Positioned right from `asset` column. + asset_dataframe.insert( + loc=2, + column="val1", + value=list(range(len(self._dataframe_index))), + ) + asset_dataframe.insert( + loc=3, + column="val2", + value=list(range(len(self._dataframe_index))), + ) + return self._wrap_all_assets_df(asset_dataframes) + + def _get_verbose_open_dataframe(self) -> pd.DataFrame: + """ + Update core dataframes with additional columns. + + :return: update core dataframe as presented below + Example: + + ``` + vendor_date interval start_time end_time ticker currency open id + 2021-11-24 60 1637762400 1637762460 A USD 100 1 + 2021-11-24 60 1637762400 1637762460 A USD 200 2 + ``` + """ + interval = self._dataframe_index[1] - self._dataframe_index[0] + interval = interval.seconds + asset_dataframes = self._get_core_dataframes() + for id_, asset_dataframe in enumerate(asset_dataframes): + start_time = ( + asset_dataframe.index - pd.Timestamp("1970-01-01") + ) // pd.Timedelta("1s") + end_time = start_time + interval + # Positioned left from `ticker` column. + asset_dataframe.insert( + loc=0, + column="vendor_date", + value=asset_dataframe.index.date.astype(str), + ) + asset_dataframe.insert(loc=1, column="interval", value=interval) + asset_dataframe.insert(loc=2, column="start_time", value=start_time) + asset_dataframe.insert(loc=3, column="end_time", value=end_time) + # Positioned right from `ticker` column. + asset_dataframe.insert(loc=5, column="currency", value="USD") + asset_dataframe.insert( + loc=6, + column="open", + value=list(range(len(self._dataframe_index))), + ) + asset_dataframe.insert(loc=7, column="id", value=id_) + return self._wrap_all_assets_df(asset_dataframes) + + # TODO(Dan): CmTask1490. + def _get_cm_task_1103_dataframe(self) -> pd.DataFrame: + """ + Update core dataframes with additional columns. + + :return: updated core dataframe as presented below + Example: + + ``` + full_symbol close + 2000-01-01 10689 100 + 2000-01-02 10689 200 + 2000-01-03 10689 300 + ``` + """ + asset_dataframes = self._get_core_dataframes() + for asset_dataframe in asset_dataframes: + # Positioned right from asset column. + asset_dataframe.insert( + loc=1, + column="close", + value=list(range(len(self._dataframe_index))), + ) + return self._wrap_all_assets_df(asset_dataframes) + + def __init__( + self, + start_date: str, + end_date: str, + output_type: str, + assets: List[Union[str, int]], + asset_col_name: str, + freq: str, + ) -> None: + """ + Constructor. + + :param start_date: start of date range including start_date + :param end_date: end of date range excluding end_date + :param output_type: type of data that is generated + :param assets: list of desired assets that can be names or ids + :param asset_col_name: name of the column that stores assets + :param freq: frequency of steps between start and end date + """ + self._start_date = start_date + self._end_date = end_date + self._output_type = output_type + self._assets = assets + self._asset_col_name = asset_col_name + self._freq = freq + self._dataframe_index = pd.date_range( + self._start_date, + self._end_date, + freq=self._freq, + inclusive="left", + tz="UTC", + ) + self._OUTPUT_TYPE_FUNCTION_MAP = { + "basic": self._get_daily_basic_dataframe, + "verbose_open": self._get_verbose_open_dataframe, + "cm_task_1103": self._get_cm_task_1103_dataframe, + } + + @property + def output_type_function(self) -> Callable: + """ + Return proper function for data generation depending on output type. + """ + return self._OUTPUT_TYPE_FUNCTION_MAP[self._output_type] + + def generate(self) -> pd.DataFrame: + """ + Generate specific dataframe based on inputs provided in instance + creation. + """ + if self._output_type not in self.OUTPUT_TYPES: + raise ValueError(f"Unsupported data type `{self._output_type}`!") + return self.output_type_function() + + +def add_date_partition_columns( + df: pd.DataFrame, partition_mode: str +) -> Tuple[pd.DataFrame, List[str]]: + """ + Add partition columns like year, month, day from datetime index. + + :param df: dataframe indexed by timestamp + :param partition_mode: + - "by_date": extract the date from the index + - E.g., an index like `2022-01-10 14:00:00+00:00` is transform to a + column `20220110` + - "by_year_month_day": split the index in year, month, day columns + - "by_year_month": split by year and month + - "by_year_week": split by year and week of the year + - "by_year": split by year + :return: + - df with additional partitioning columns + - list of partitioning columns + """ + with htimer.TimedScope(logging.DEBUG, "# add_date_partition_cols"): + if partition_mode == "by_date": + df["date"] = df.index.strftime("%Y%m%d") + partition_columns = ["date"] + else: + if partition_mode == "by_year_month_day": + partition_columns = ["year", "month", "day"] + elif partition_mode == "by_year_month": + partition_columns = ["year", "month"] + elif partition_mode == "by_year_week": + partition_columns = ["year", "weekofyear"] + elif partition_mode == "by_year": + partition_columns = ["year"] + elif partition_mode == "by_month": + partition_columns = ["month"] + else: + raise ValueError(f"Invalid partition_mode='{partition_mode}'") + # Add date columns chosen by partition mode. + for column_name in partition_columns: + # Extract data corresponding to `column_name` (e.g., + # `df.index.year`). + if column_name == "weekofyear": + # The `weekofyear` attribute has been deprecated in Pandas + # 2.1.0, so weeks are extracted using a function instead of + # the attribute name. + df["weekofyear"] = df.index.isocalendar().week + else: + df[column_name] = getattr(df.index, column_name) + return df, partition_columns + + +def to_partitioned_parquet( + df: pd.DataFrame, + partition_columns: List[str], + dst_dir: str, + *, + aws_profile: hs3.AwsProfile = None, + basename_template: str = None, +) -> None: + """ + Save the given dataframe as Parquet file partitioned along the given + columns. + + :param df: dataframe + :param partition_columns: partitioning columns + :param dst_dir: location of partitioned dataset + :param aws_profile: the name of an AWS profile or a s3fs filesystem + + E.g., in case of partition using `date`, the file layout looks like: + ``` + dst_dir/ + date=20211230/ + data.parquet + date=20211231/ + data.parquet + date=20220101/ + data.parquet + ``` + + In case of multiple columns like `asset`, `year`, `month`, the file layout + looks like: + ``` + dst_dir/ + asset=A/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ... + asset=B/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ``` + """ + # Use either S3 or local filesystem. + filesystem = None + if aws_profile is not None: + filesystem = hs3.get_s3fs(aws_profile) + # ParquetDataset appends an extra "/", creating an empty-named folder + # when saving on S3. + dst_dir = dst_dir.rstrip("/") + with htimer.TimedScope(logging.DEBUG, "# partition_dataset"): + # Read. + table = pa.Table.from_pandas(df) + # Write using partition. + # TODO(gp): add this logic to hparquet.to_parquet as a possible option. + _LOG.debug(hprint.to_str("partition_columns dst_dir")) + hdbg.dassert_is_subset(partition_columns, df.columns) + # TODO(gp): We would like to avoid overriding existing tiles. It's not clear + # how to do it. Either setting permissions to read-only before writing. + # Or having a list of files that will be written and ensure that none of + # those files already existing. + pq.write_to_dataset( + table, + dst_dir, + partition_cols=partition_columns, + filesystem=filesystem, + basename_template=basename_template, + ) + + +def generate_parquet_files( + start_date: str, + end_date: str, + assets: List[Union[str, int]], + asset_col_name: str, + dst_dir: str, + *, + freq: str = "1H", + output_type: str = "basic", + partition_mode: str = "by_date", + custom_partition_cols: Optional[str] = None, + reset_index: bool = False, +) -> None: + """ + Generate parquet files for testing. + + :param start_date: date from which the data is generated, value + included + :param end_date: date until which the data is generated, value + excluded + :param assets: list of assets that can be either names or ids + :param asset_col_name: name of the column that stores assets + :param dst_dir: destination dir for generated data + :param freq: frequency of data generation + :param output_type: type of data that is generated + :param partition_mode: Partition mode for parquet DataFrame, default + by date + :param custom_partition_cols: overrides default partition by time + :param reset_index: reset dataframe index to default sequential + integer values + """ + # Generate timespan. + hdbg.dassert_lt(start_date, end_date) + timespan = pd.date_range(start_date, end_date) + hdbg.dassert_lt(2, len(timespan)) + # Run dataframe generation. + pdg = ParquetDataFrameGenerator( + start_date, end_date, output_type, assets, asset_col_name, freq + ) + parquet_df = pdg.generate() + # Add partition columns to the dataframe. + df, partition_cols = add_date_partition_columns(parquet_df, partition_mode) + if custom_partition_cols: + # If custom partition is provided, it will override date partition. + # Sample: `["asset", "year", "month"]` + custom_partition_cols = custom_partition_cols.split(",") + # Ensure that date partition columns are present. + hdbg.dassert_is_subset(partition_cols, custom_partition_cols) + partition_cols = custom_partition_cols + # Partition and write dataset. + if reset_index: + df = df.reset_index(drop=True) + # TODO(Nikola): When direct run is possible, expose usage of `aws_profile` + # so generator can be used in conjunction with `helpers.hmoto.S3Mock_TestCase`. + # Will probably be part of CMTask #1490. + to_partitioned_parquet(df, partition_cols, dst_dir) + + +def get_pyarrow_s3fs(*args: Any, **kwargs: Any) -> PyArrowS3FileSystem: + """ + Return an Pyarrow S3Fs object from a given AWS profile. + + Same as `hs3.get_s3fs`, used specifically for accessing Parquet + datasets. + """ + # Check if S3FileSystem is available + hdbg.dassert( + S3FileSystemAvailable, + "S3FileSystem is not available in this version of pyarrow.fs", + ) + # When deploying jobs via ECS the container obtains credentials based on passed + # task role specified in the ECS task-definition, refer to: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html + if hserver.is_inside_ecs_container(): + _LOG.info("Fetching credentials from task IAM role") + s3fs_ = PyArrowS3FileSystem() + else: + aws_credentials = hs3.get_aws_credentials(*args, **kwargs) + s3fs_ = PyArrowS3FileSystem( + access_key=aws_credentials["aws_access_key_id"], + secret_key=aws_credentials["aws_secret_access_key"], + session_token=aws_credentials["aws_session_token"], + region=aws_credentials["aws_region"], + ) + return s3fs_ + + +def _get_parquet_tiles_from_file_path(file_path: str) -> List[Tuple[str, Any]]: + """ + Hacky function to help get tile values from parquet file path. + + Used by from_parquet when loading first n rows of a dataset only. + + Example + input: ...ccxt/binance/v1_0_0/currency_pair=CTK_USDT/ + year=2023/month=3/26dc59f62b87403d9a3e9f04c7c21382-0.parquet + output: [("currency_pair", "CTK_USDT"), ("year", 2023), ("month", 3)] + """ + path_parts = file_path.split("/") + tiles = [] + for part in path_parts: + if "=" in part: + col, value = part.split("=") + value = int(value) if value.isdigit() else value + tiles.append((col, value)) + return tiles + + +# TODO(Dan): Add mode to allow querying even when some non-existing columns are passed. +def from_parquet( + file_name: str, + *, + columns: Optional[List[str]] = None, + filters: Optional[List[Any]] = None, + n_rows: Optional[int] = None, + schema: Optional[List[Tuple[str, pa.DataType]]] = None, + log_level: int = logging.DEBUG, + report_stats: bool = False, + aws_profile: hs3.AwsProfile = None, +) -> pd.DataFrame: + """ + Load a dataframe from a Parquet file. + + The difference with `pd.read_pq` is that here we use Parquet + Dataset. + + :param file_name: path to a Parquet dataset + :param columns: columns to return, skipping reading columns that are not requested + - `None` means return all available columns + :param filters: Parquet query + :param n_rows: the number of rows to load, load all data if `None` + :param schema: see `pyarrow.Schema`, e.g., `schema = + [("int_col", pa.int32()), ("str_col", pa.string())]` + :param log_level: logging level to execute at + :param report_stats: whether to report Parquet file size or not + :param aws_profile: AWS profile to use if and only if using an S3 path, + otherwise `None` for local path + :return: data from Parquet dataset + """ + _LOG.debug(hprint.to_str("file_name columns filters schema")) + hdbg.dassert_isinstance(file_name, str) + hs3.dassert_is_valid_aws_profile(file_name, aws_profile) + if hs3.is_s3_path(file_name): + if isinstance(aws_profile, str): + filesystem = get_pyarrow_s3fs(aws_profile) + else: + # Note: `s3fs` filesystem is only to be used on exact file path + # as `pq.ParquetDataset` is not properly handling directory path. + filesystem = aws_profile + # Pyarrow S3FileSystem does not have `exists` method. + s3_filesystem = hs3.get_s3fs(aws_profile) + hs3.dassert_path_exists(file_name, s3_filesystem) + file_name = file_name.lstrip("s3://") + else: + filesystem = None + hdbg.dassert_path_exists(file_name) + # Load data. + with htimer.TimedScope( + logging.DEBUG, f"# Reading Parquet file '{file_name}'" + ) as ts: + if n_rows: + # Get the latest parquet file in the directory. + hdbg.dassert_isinstance( + aws_profile, + str, + "aws_profile must be a string for S3 operations", + ) + last_pq_file = hs3.get_latest_pq_in_s3_dir(file_name, aws_profile) + file = s3_filesystem.open(last_pq_file, "rb") + # Load the data. + parquet_file = pq.ParquetFile(file) + # Get the head of the data. + df = ( + parquet_file.read_row_group(0, columns=parquet_file.schema.names) + .to_pandas() + .head(n_rows) + ) + if columns: + # Note: `schema.names` also includes and index. + hdbg.dassert_is_subset(columns, parquet_file.schema.names) + df = df[columns] + # Hacky way to append tile values lost when obtaining particular .pq file. + tiles = _get_parquet_tiles_from_file_path(last_pq_file) + for col, value in tiles: + df[col] = value + else: + if schema is not None: + # Pass partition columns types explicitly. + schema = pa.schema(schema) + partitioning = ds.partitioning(schema, flavor="hive") + dataset = pq.ParquetDataset( + # Replace URI with path. + file_name, + filesystem=filesystem, + filters=filters, + partitioning=partitioning, + ) + if columns: + # Note: `schema.names` also includes and index. + hdbg.dassert_is_subset(columns, dataset.schema.names) + # To read also the index we need to use `read_pandas()`, instead of + # `read_table()`. + # See https://arrow.apache.org/docs/python/parquet.html#reading-and-writing-single-files. + table = dataset.read_pandas(columns=columns) + # Convert the Pandas Dataframe timestamp columns and index to `ns` + # resolution. The general approach is to preserve the time unit + # information after reading data back from Parquet files. + # Currently, it's challenging to resolve this issue since Parquet + # data is mixed with data from CSV files, which convert the time + # unit to `ns` by default. Refer to CmampTask7331 for details. + # https://github.com/cryptokaizen/cmamp/issues/7331 + df = table.to_pandas(coerce_temporal_nanoseconds=True) + if isinstance(df.index, pd.DatetimeIndex): + df.index = df.index.as_unit("ns") + # Report stats about the df. + _LOG.debug("df.shape=%s", str(df.shape)) + mem = df.memory_usage().sum() + _LOG.debug("df.memory_usage=%s", hintros.format_size(mem)) + # Report stats about the Parquet file size. + if report_stats: + file_size = hs3.du(file_name, human_format=True, aws_profile=aws_profile) + _LOG.log( + log_level, + "Loaded '%s' (size=%s, time=%.1fs)", + file_name, + file_size, + ts.elapsed_time, + ) + return df + + +# Copied from `hio.create_enclosing_dir()` to avoid circular dependencies. +def _create_enclosing_dir(file_name: str) -> Optional[str]: + dir_name = os.path.dirname(file_name) + if dir_name != "": + _LOG.debug( + "Creating dir_name='%s' for file_name='%s'", dir_name, file_name + ) + hdbg.dassert_is_not(dir_name, None) + dir_name = os.path.normpath(dir_name) + if os.path.normpath(dir_name) == ".": + _LOG.debug("Can't create dir '%s'", dir_name) + if os.path.exists(dir_name): + # The dir exists and we want to keep it, so we are done. + _LOG.debug("The dir '%s' exists: exiting", dir_name) + return None + _LOG.debug("Creating directory '%s'", dir_name) + try: + os.makedirs(dir_name) + except OSError as e: + _LOG.error(str(e)) + # It can happen that we try to create the directory while somebody else + # created it, so we neutralize the corresponding exception. + if e.errno == 17: + # OSError: [Errno 17] File exists. + pass + else: + raise e + hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) + return dir_name + + +def to_parquet( + df: pd.DataFrame, + file_name: str, + *, + log_level: int = logging.DEBUG, + report_stats: bool = False, + aws_profile: hs3.AwsProfile = None, +) -> None: + """ + Save a dataframe as Parquet. + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_isinstance(file_name, str) + hs3.dassert_is_valid_aws_profile(file_name, aws_profile) + if hs3.is_s3_path(file_name): + filesystem = hs3.get_s3fs(aws_profile) + hs3.dassert_path_not_exists(file_name, filesystem) + file_name = file_name.lstrip("s3://") + else: + filesystem = None + hdbg.dassert_path_not_exists(file_name) + hdbg.dassert_file_extension(file_name, ["parquet", "pq"]) + # There is no concept of directory on S3. + # Only applicable to local filesystem. + if aws_profile is None: + _create_enclosing_dir(file_name) + # Report stats about the df. + _LOG.debug("df.shape=%s", str(df.shape)) + mem = df.memory_usage().sum() + _LOG.debug("df.memory_usage=%s", hintros.format_size(mem)) + # Save data. + with htimer.TimedScope( + logging.DEBUG, f"# Writing Parquet file '{file_name}'" + ) as ts: + table = pa.Table.from_pandas(df) + # This is needed to handle: + # ``` + # pyarrow.lib.ArrowInvalid: Casting from timestamp[ns, tz=America/New_York] + # to timestamp[us] would lose data: 1663595160000000030 + # ``` + # No need to cast to `us` since pyarrow >= 15.0.0. + # See + # https://github.com/cryptokaizen/cmamp/blob/master/docs/infra/all.parquet.explanation.md#time-unit-conversion-when-writing-to-parquet + # for details. + # parquet_args = { + # "coerce_timestamps": "us", + # "allow_truncated_timestamps": True, + # } + # pq.write_table(table, file_name, filesystem=filesystem, **parquet_args) + pq.write_table(table, file_name, filesystem=filesystem) + # Report stats about the Parquet file size. + if report_stats: + file_size = hs3.du(file_name, human_format=True, aws_profile=aws_profile) + _LOG.log( + log_level, + "Saved '%s' (size=%s, time=%.1fs)", + file_name, + file_size, + ts.elapsed_time, + ) + + +# ############################################################################# + + +def _yield_parquet_tile( + file_name: str, + columns: Optional[List[str]], + filters: List[Any], + asset_id_col: str, +) -> Iterator[pd.DataFrame]: + """ + Yield Parquet data in a single tile given the filters. + + It is assumed that data is partitioned by asset_id, year and month, i.e. + the file layout is: + + ``` + file_name/ + asset_id=1032127330/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ... + asset_id=2133227690/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ``` + + :param file_name: see `from_parquet()` + :param columns: see `from_parquet()` + :param filters: see `from_parquet()` + :param asset_id_col: name of the column with asset ids + :return: a generator of `from_parquet()` dataframe + """ + # Without the schema being provided `pyarrow` incorrectly infers + # type of the asset id column, i.e. `pyarrow` reads assets as + # strings instead of integers. See the related discussion at + # `https://issues.apache.org/jira/browse/ARROW-6114`. + int_type = np.int64 + pyarrow_int_type = pa.from_numpy_dtype(int_type) + schema = [ + (asset_id_col, pyarrow_int_type), + # TODO(Grisha): consider passing year and month column names as params. + ("year", pyarrow_int_type), + ("month", pyarrow_int_type), + ] + tile = from_parquet( + file_name, + columns=columns, + filters=filters, + schema=schema, + ) + hpandas.dassert_series_type_is(tile[asset_id_col], int_type) + yield tile + + +def build_asset_id_filter( + asset_ids: List[int], + asset_id_col: str, +) -> List[List[Tuple[str, str, int]]]: + filters = [] + for asset_id in asset_ids: + filters.append([(asset_id_col, "==", asset_id)]) + return filters + + +def build_year_month_filter( + start_date: datetime.date, + end_date: datetime.date, +) -> list: + """ + Use the year/months to build a Parquet filter. + + If `start_date.year == end_date.year`, then return a list of + three tuples (to be "ANDed" together) based on the year and months. + Else, return a list of list of tuples: + - the inner lists consist of AND filters; the inner lists are ORed + together if used as a single filter + - each inner list filter represents a calendar year or part thereof + + One use case of this function is to generate a filter whose OR + components can be processed one-by-one. For example, if memory constraints + prevent loading an entire tile at once, then one could instead attempt to + load one-year tiles one at a time. + + NOTE: `start_date.day` and `end_date.day` are ignored. + + TODO(Paul): Consider adding a switch to support smaller AND filter chunks + (e.g., at monthly instead of yearly granularity). + """ + hdbg.dassert_isinstance(start_date, datetime.date) + hdbg.dassert_isinstance(end_date, datetime.date) + hdbg.dassert_lte(start_date, end_date) + start_year = start_date.year + end_year = end_date.year + filter_ = [] + # + if start_year == end_year: + filter_.append(("year", "==", start_year)) + filter_.append(("month", ">=", start_date.month)) + filter_.append(("month", "<=", end_date.month)) + else: + start_year_filter = [] + start_year_filter.append(("year", "==", start_year)) + start_year_filter.append(("month", ">=", start_date.month)) + end_year_filter = [] + end_year_filter.append(("year", "==", end_year)) + end_year_filter.append(("month", "<=", end_date.month)) + filter_.append(start_year_filter) + filter_.append(end_year_filter) + for year in range(start_year + 1, end_year): + year_filter = [] + year_filter.append(("year", "==", year)) + filter_.append(year_filter) + return filter_ + + +def yield_parquet_tiles_by_year( + file_name: str, + start_date: datetime.date, + end_date: datetime.date, + cols: List[Union[int, str]], + *, + asset_ids: Optional[List[int]] = None, + asset_id_col: str = "asset_id", +) -> Iterator[pd.DataFrame]: + """ + Yield Parquet data in tiles up to one year in length. + + :param file_name: as in `from_parquet()` + :param start_date: first date to load; day is ignored + :param end_date: last date to load; day is ignored + :param cols: if an `int` is supplied, it is cast to a string before reading + :param asset_ids: asset ids to load + :param asset_id_col: see `_yield_parquet_tile()` + :return: a generator of `from_parquet()` dataframes + """ + time_filters = build_year_month_filter(start_date, end_date) + hdbg.dassert_isinstance(time_filters, list) + # The list should not be empty. + hdbg.dassert(time_filters) + if not isinstance(time_filters[0], list): + time_filters = [time_filters] + columns = [str(col) for col in cols] + if asset_ids is None: + asset_ids = [] + asset_id_filter = build_asset_id_filter(asset_ids, asset_id_col) + for time_filter in time_filters: + if asset_id_filter: + combined_filter = [ + id_filter + time_filter for id_filter in asset_id_filter + ] + else: + combined_filter = time_filter + yield from _yield_parquet_tile( + file_name, columns, combined_filter, asset_id_col + ) + + +# TODO(Paul): Add additional time-restriction filter. +def yield_parquet_tiles_by_assets( + file_name: str, + asset_ids: List[int], + asset_id_col: str, + asset_batch_size: int, + cols: Optional[List[Union[int, str]]], +) -> Iterator[pd.DataFrame]: + """ + Yield Parquet data in tiles batched by asset ids. + + :param file_name: as in `from_parquet()` + :param asset_ids: asset ids to load + :param asset_id_col: see `_yield_parquet_tile()` + :param asset_batch_size: the number of asset to load in a single batch + :param cols: if an `int` is supplied, it is cast to a string before reading + :return: a generator of `from_parquet()` dataframes + """ + hdbg.dassert_isinstance(asset_id_col, str) + hdbg.dassert(asset_id_col, "`asset_id_col` must be nonempty") + batches = [ + asset_ids[i : i + asset_batch_size] + for i in range(0, len(asset_ids), asset_batch_size) + ] + columns: Optional[List[str]] = None + if cols: + columns = [str(col) for col in cols] + for batch in tqdm(batches): + _LOG.debug("assets=%s", batch) + filter_ = build_asset_id_filter(batch, asset_id_col) + yield from _yield_parquet_tile(file_name, columns, filter_, asset_id_col) + + +def build_filter_with_only_equalities( + start_timestamp: pd.Timestamp, end_timestamp: pd.Timestamp +) -> list: + """ + Build a list of Parquet filters based on equality conditions for partition + columns. + + This function creates a filter for each partition column (year, month, day) based on the + equality conditions between components of the timestamp arguments when possible. + + Example: + Input args: + start_timestamp: 2022-08-31T00:01:00+00:00 + end-timestamp: 2022-08-31T23:59:59+00:00 + Output: + [("year", "=", 2022), ("month", "=", 8), ("day", "=", 31)] + + These filters enhance performance by allowing to load data quicker when used in tandem with timestamp filters. + Less memory will be used because less `.pq` need to be loaded. + + :param start_timestamp: start of the interval. + :param end_timestamp: end of the interval: + """ + hdbg.dassert_isinstance(start_timestamp, pd.Timestamp) + hdbg.dassert_isinstance(end_timestamp, pd.Timestamp) + filters = [] + if start_timestamp.year == end_timestamp.year: + filters.append(("year", "==", start_timestamp.year)) + if start_timestamp.month == end_timestamp.month: + filters.append(("month", "==", start_timestamp.month)) + if start_timestamp.day == end_timestamp.day: + filters.append(("day", "==", start_timestamp.day)) + return filters + + +# TODO(Paul): The `int` assumption is baked in. We can generalize to strings +# if needed, but if we do, then we should continue to handle string ints as +# ints as we do here (e.g., there are sorting advantages, among others). +def _process_walk_triple( + triple: tuple, start_depth: int +) -> Tuple[Tuple[str, ...], Tuple[int, ...]]: + """ + Process a triple returned by `os.walk()` + + :param triple: (dirpath: str, dirnames: List[str], filenames: List[str]) + :param start_depth: the "depth" of `path` used in the call + `os.walk(path)` + :return: tuple(lhs_vals), tuple(rhs_vals) + """ + lhs_vals: List[str] = [] + rhs_vals: List[int] = [] + # If there are subdirectories, do not process. + if triple[1]: + return tuple(lhs_vals), tuple(rhs_vals) + depth = len(triple[0].split("/")) + rel_depth = depth - start_depth + key = tuple(triple[0].split("/")[start_depth:]) + if len(key) == 0: + return tuple(lhs_vals), tuple(rhs_vals) + hdbg.dassert_eq(len(key), rel_depth) + lhs_vals = [] + rhs_vals = [] + for string in key: + lhs, rhs = string.split("=") + lhs_vals.append(lhs) + rhs_vals.append(int(rhs)) + hdbg.dassert_eq(len(lhs_vals), len(rhs_vals)) + return tuple(lhs_vals), tuple(rhs_vals) + + +def collate_parquet_tile_metadata( + path: str, +) -> pd.DataFrame: + """ + Report stats in a dataframe on Parquet file partitions. + + The directories should be of the form `lhs=rhs` where "rhs" is a string + representation of an `int`. + + :param path: path to top-level Parquet directory + :return: dataframe with two file size columns and a multiindex reflecting + the Parquet path structure. + """ + hdbg.dassert_dir_exists(path) + # Remove the trailing slash to simplify downstream accounting. + if path.endswith("/"): + path = path[:-1] + hdbg.dassert(not path.endswith("/")) + # Walk the path. + # os.walk() yields a 3-tuple of the form + # (dirpath: str, dirnames: List[str], filenames: List[str]) + start_depth = len(path.split("/")) + headers_set = set() + dict_ = collections.OrderedDict() + for triple in os.walk(path): + # If the walk has taken us to, e.g., + # asset_id=100/year=2010/month=1/data.parquet + # then we expect + # lhs = ("asset_id", "year", "month") + # rhs = (100, 2010, 1) + lhs, rhs = _process_walk_triple(triple, start_depth) + # If the walkabout has not yet taken us to a file, continue. + if not lhs: + continue + # The tuple `lhs` is to become the index headers. We check later + # for uniqueness. + headers_set.add(lhs) + # Get the file name and full path. + file_name = triple[2][0] + file_path = os.path.join(triple[0], file_name) + # Record the size of the file. We keep this in bytes for easy + # join aggregations. + size_in_bytes = os.path.getsize(file_path) + dict_[rhs] = size_in_bytes + # Ensure that headers are unambiguous. + hdbg.dassert_eq(len(headers_set), 1) + # Convert to a multiindexed dataframe. + df = pd.DataFrame(dict_.values(), index=dict_.keys()) + df.rename(columns={0: "file_size_in_bytes"}, inplace=True) + headers = headers_set.pop() + df.index.names = headers + df.sort_index(inplace=True) + # Add a more human-readable file size column. Keep the original numerical + # one for downstream aggregations. + file_size = df["file_size_in_bytes"].apply(hintros.format_size) + df["file_size"] = file_size + return df + + +# ############################################################################# + +# A Parquet filtering condition. e.g., `("year", "=", year)` +ParquetFilter = Tuple[str, str, Any] +# The AND of Parquet filtering conditions, e.g., +# `[("year", "=", year), ("month", "=", month)]` +ParquetAndFilter = List[ParquetFilter] +# A OR-AND Parquet filtering condition, e.g., +# ``` +# [[('year', '=', 2020), ('month', '=', 1)], +# [('year', '=', 2020), ('month', '=', 2)], +# [('year', '=', 2020), ('month', '=', 3)]] +# ``` +ParquetOrAndFilter = List[ParquetAndFilter] + + +# TODO(gp): @Nikola add light unit tests for `by_year_week` and for additional_filter. +# TODO(gp): Can we return a single type? +def get_parquet_filters_from_timestamp_interval( + partition_mode: str, + start_timestamp: Optional[pd.Timestamp], + end_timestamp: Optional[pd.Timestamp], + *, + additional_filters: Optional[List[ParquetFilter]] = None, +) -> Union[ParquetOrAndFilter, ParquetAndFilter]: + """ + Convert a constraint on a timestamp [start_timestamp, end_timestamp] into a + Parquet filters expression, based on the passed partitioning / tiling + criteria. + + :param partition_mode: control filtering of Parquet datasets. It needs to be + in sync with the way the data was saved + :param start_timestamp: start of the interval. `None` means no bound + :param end_timestamp: end of the interval. `None` means no bound + :param additional_filters: AND conditions to add to the final filter. + E.g., if we want to constraint also on `exchange_id` and 'currency_pair`, + we can specify + `[("exchange_id", "in", (...)),("currency_pair", "in", (...))]` + :return: list of OR-AND predicates + """ + # Check timestamp interval. + left_close = True + right_close = True + hdateti.dassert_is_valid_interval( + start_timestamp, + end_timestamp, + left_close=left_close, + right_close=right_close, + ) + or_and_filter = [] + if partition_mode == "by_year_month": + # Handle the first and last year of the interval. + if start_timestamp: + # `[('year', '==', 2020), ('month', '>=', 6)]` + and_filter = [ + ("year", "==", start_timestamp.year), + ("month", ">=", start_timestamp.month), + ] + or_and_filter.append(and_filter) + if end_timestamp: + # `[('year', '==', 2021), ('month', '<=', 3)]` + and_filter = [ + ("year", "==", end_timestamp.year), + ("month", "<=", end_timestamp.month), + ] + or_and_filter.append(and_filter) + if start_timestamp and end_timestamp: + number_of_years = len( + range(start_timestamp.year, end_timestamp.year + 1) + ) + if number_of_years == 1: + # For a one-year range, we overwrite the result with a single AND + # statement, e.g., `[Jan 2020, Mar 2020]` corresponds to + # `[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 3)]]`. + # Note that this interval is different from and OR-AND form as + # `[[('year', '==', 2020), ('month', '>=', 1)], + # [('year', '==', 2020), ('month', '<=', 3)]]` + # since the first AND clause include months <= 3 and the second one + # include months >= 1, and the OR corresponds to the entire year, + # instead of the interval `[Jan 2020, Mar 2020]`. + and_filter = [ + ("year", "==", start_timestamp.year), + ("month", ">=", start_timestamp.month), + ("month", "<=", end_timestamp.month), + ] + or_and_filter = [and_filter] + elif number_of_years > 2: + # For ranges over two years, one OR statement is necessary to bridge + # the gap between first and last AND statement. + # `[('year', '>', 2020), ('year', '<', 2023)]` + # Inserted in middle as bridge between AND statements. + and_filter = [ + ("year", ">", start_timestamp.year), + ("year", "<", end_timestamp.year), + ] + or_and_filter.insert(1, and_filter) + else: + # For intervals of exactly two years the two AND conditions are + # enough to select the desired period of time. + pass + elif len(or_and_filter) == 1: + # Handle the case when exactly one of the interval bounds is passed, + # e.g., [June 2020, None]. + # In this case the first year was covered by the code above (i.e., + # `year >= 2020 and month == 6`) and we need to specify the rest of + # the years (i.e., `year > 2020`). + operator = ">" if start_timestamp else "<" + timestamp = start_timestamp if start_timestamp else end_timestamp + hdbg.dassert_is_not(timestamp, None, "timestamp should not be None") + extra_filter = [("year", operator, timestamp.year)] + or_and_filter.append(extra_filter) + else: + # If there is no interval provided, leave empty `or_and_filter` as is. + pass + elif partition_mode == "by_year_week": + # TODO(gp): Consider using the same approach above for months also here. + # Partition by year and week. + hdbg.dassert_is_not( + end_timestamp, + None, + "Parquet backend can't determine the boundaries of the data", + ) + # Include last week in the interval. + end_timestamp += pd.DateOffset(weeks=1) + # Get all weeks in the interval. + hdbg.dassert_is_not( + start_timestamp, + None, + "start_timestamp should not be None for by_year_week partition mode", + ) + dates = pd.date_range( + start_timestamp.date(), end_timestamp.date(), freq="W" + ) + for date in dates: + year = date.year + # https://docs.python.org/3/library/datetime.html#datetime.date.isocalendar + weekofyear = date.isocalendar().week + and_filter = [("year", "=", year), ("weekofyear", "=", weekofyear)] + or_and_filter.append(and_filter) + else: + raise ValueError(f"Unknown partition mode `{partition_mode}`!") + if additional_filters: + hdbg.dassert_isinstance(additional_filters, list) + if or_and_filter: + # Append additional filters for every present timestamp filter. + or_and_filter = [ + additional_filters + and_filter for and_filter in or_and_filter + ] + else: + # If no timestamp filters are provided, use additional filters. + or_and_filter = additional_filters + _LOG.debug("or_and_filter=%s", str(or_and_filter)) + if len(or_and_filter) == 0: + # Empty list is not acceptable value for pyarrow dataset. + # Only logical expression or `None`. + or_and_filter = None + return or_and_filter + + +def list_and_merge_pq_files( + root_dir: str, + *, + file_name: str = "data.parquet", + aws_profile: hs3.AwsProfile = None, + drop_duplicates_mode: Optional[str] = None, +) -> None: + """ + Merge all files of the Parquet dataset. + + Can be generalized to any used partition. + + The standard partition (also known as "by-tile") assumed is: + + ``` + root_dir/ + currency_pair=ADA_USDT/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ... + currency_pair=EOS_USDT/ + year=2021/ + month=12/ + data.parquet + year=2022/ + month=01/ + data.parquet + ``` + + :param root_dir: root directory of Parquet dataset + :param file_name: name of the single resulting file + :param aws_profile: the name of an AWS profile or a s3fs filesystem + """ + if aws_profile is not None: + filesystem = hs3.get_s3fs(aws_profile) + else: + filesystem = None + # Get full paths to each Parquet file inside root dir. + if filesystem: + # Use specialized S3 filesystem function to list Parquet files efficiently. + # since glob.glob() is very slow as it does a lot of accesses to S3. + # The extra `**/*` is needed by `pyarrow` >= 17. + parquet_files = filesystem.glob(f"{root_dir}/**/*.parquet") + else: + # For local filesystem, use glob.glob + parquet_files = glob.glob(f"{root_dir}/**/*.parquet", recursive=True) + _LOG.debug("Parquet files: '%s'", parquet_files) + # Get paths only to the lowest level of dataset folders. + dataset_folders = {f.rsplit("/", 1)[0] for f in parquet_files} + for folder in dataset_folders: + # Get files per folder and merge if there are multiple ones. + if filesystem: + # Use specialized S3 filesystem function to list Parquet files efficiently. + folder_files = filesystem.ls(folder) + else: + # For local filesystem, use os.listdir + folder_files = [os.path.join(folder, f) for f in os.listdir(folder)] + hdbg.dassert_ne( + len(folder_files), 0, msg=f"Empty folder `{folder}` detected!" + ) + if len(folder_files) == 1 and folder_files[0].endswith("/data.parquet"): + # If there is already single `data.parquet` file, no action is required. + continue + # Read all files in target folder. + # `partitioning=None` is required to read the dataset without + # partitioning columns. See CmTask7324 for details. + # https://github.com/cryptokaizen/cmamp/issues/7324 + data = pq.ParquetDataset( + folder_files, filesystem=filesystem, partitioning=None + ).read() + data = data.to_pandas() + # Drop duplicates on all non-metadata columns. + # TODO(gp): hparquet is general and we should pass the columns to remove + # or perform the transform after. + if drop_duplicates_mode is None: + duplicate_columns = data.columns.to_list() + for col_name in ["knowledge_timestamp", "end_download_timestamp"]: + if col_name in duplicate_columns: + duplicate_columns.remove(col_name) + control_column = None + elif drop_duplicates_mode == "bid_ask": + # Drop duplicates on timestamp index. + duplicate_columns = ["timestamp", "exchange_id"] + control_column = None + elif drop_duplicates_mode == "ohlcv": + # Drop duplicates on timestamp and keep one with largest volume. + duplicate_columns = ["timestamp", "exchange_id"] + control_column = "volume" + else: + hdbg.dfatal("Supported drop duplicates modes: ohlcv, bid_ask") + data = hdatafr.remove_duplicates(data, duplicate_columns, control_column) + # Remove all old files and write the new, merged one. + if filesystem: + filesystem.rm(folder, recursive=True) + pq.write_table( + pa.Table.from_pandas(data), + folder + "/" + file_name, + filesystem=filesystem, + ) + else: + # Use os.remove for local filesystem to remove files. + for file_path in folder_files: + os.remove(file_path) + data.to_parquet(os.path.join(folder, file_name)) + + +def maybe_cast_to_int(string: str) -> Union[str, int]: + """ + Return `string` as an `int` if convertible, otherwise a no-op. + + This is useful for parsing mixed-type dataframe columns that may + contain strings and ints. For example, a dataframe with columns + `feature1, feature2, 1, 2, 3` will be written and read back with + columns `1`, `2`, `3` as the strings "1", "2", "3" rather than the + ints. This function can be used to rectify that in a post-processing + column rename. + """ + hdbg.dassert_isinstance(string, str) + try: + val = int(string) + except ValueError: + val = string + return val diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py new file mode 100644 index 000000000..bb04164ea --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py @@ -0,0 +1,1176 @@ +""" +Import as: + +import helpers.hparser as hparser +""" + +import argparse +import logging +import os +import sys +from typing import Any, Dict, List, Optional, Tuple, Union + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# TODO(gp): arg -> args + + +# ############################################################################# + + +def add_bool_arg( + parser: argparse.ArgumentParser, + name: str, + *, + default_value: bool = False, + help_: Optional[str] = None, +) -> argparse.ArgumentParser: + """ + Add options to a parser like `--xyz` and `--no_xyz`, controlled by + `args.xyz`. + + E.g., `add_bool_arg(parser, "run_diff_script", default_value=True)` adds + two options: + ``` + --run_diff_script Run the diffing script or not + --no_run_diff_script + ``` + corresponding to `args.run_diff_script`, where the default behavior is to have + that value equal to True unless one specifies `--no_run_diff_script`. + """ + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument("--" + name, dest=name, action="store_true", help=help_) + group.add_argument("--no_" + name, dest=name, action="store_false") + parser.set_defaults(**{name: default_value}) + return parser + + +# ############################################################################# + + +def add_verbosity_arg( + parser: argparse.ArgumentParser, *, log_level: str = "INFO" +) -> argparse.ArgumentParser: + parser.add_argument( + "-v", + dest="log_level", + default=log_level, + # TRACE=5 + # DEBUG=10 + # INFO=20 + # WARNING=30 + # CRITICAL=50 + choices=["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set the logging level", + ) + parser.add_argument( + "--no_report_command_line", + action="store_true", + help="Disable printing of executed commands", + ) + return parser + + +# TODO(gp): Use this everywhere. +def parse_verbosity_args( + args: argparse.Namespace, *args_: Any, **kwargs: Any +) -> None: + if hasattr(args, "no_report_command_line") and args.no_report_command_line: + report_command_line = False + else: + report_command_line = True + kwargs["report_command_line"] = report_command_line + # if args.log_level == "VERB_DEBUG": + # args.log_level = 5 + hdbg.init_logger(verbosity=args.log_level, *args_, **kwargs) + + +# ############################################################################# +# Command line for `@hcache_simple.simple_cache` functions. +# ############################################################################# + + +# TODO(gp): Use the ones from hcache_simple.py for DRY. +_CACHE_MODE_CHOICES = ("REFRESH_CACHE", "DISABLE_CACHE", "HIT_CACHE_OR_ABORT") + + +def add_cache_control_arg( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add `--cache_mode` switch controlling every + `@hcache_simple.simple_cache`-decorated function in the process. + + The resolved mode is applied globally via + `hcache_simple.set_global_cache_mode` in `parse_cache_control_args()`. + """ + parser.add_argument( + "--cache_mode", + action="store", + default=None, + choices=list(_CACHE_MODE_CHOICES), + help=( + "Override cache behavior for all @simple_cache functions. " + "REFRESH_CACHE repopulates, DISABLE_CACHE bypasses, " + "HIT_CACHE_OR_ABORT raises on miss." + ), + ) + parser.add_argument( + "--cache_debug", + action="store_true", + help=( + "Log at WARNING level for every @simple_cache call whether the " + "result was served from cache, computed on miss, or recomputed " + "because of `cache_mode`" + ), + ) + return parser + + +def parse_cache_control_args(args: argparse.Namespace) -> None: + """ + Apply `--cache_mode`, `--cache_debug` by setting the `hcache_simple` + process-wide globals. + """ + # Import lazily to avoid a circular dependency at module load time. + import helpers.hcache_simple as hcacsimp + + mode = getattr(args, "cache_mode", None) + if mode is not None: + _LOG.info("Setting global cache_mode=%s", mode) + hcacsimp.set_global_cache_mode(mode) + cache_debug = bool(getattr(args, "cache_debug", False)) + if cache_debug: + _LOG.info("Enabling cache_debug logging") + hcacsimp.set_cache_debug(cache_debug) + + +# ############################################################################# +# Command line options for handling the destination dir. +# ############################################################################# + + +def add_dst_dir_arg( + parser: argparse.ArgumentParser, + dst_dir_required: bool, + dst_dir_default: Optional[str] = None, +) -> argparse.ArgumentParser: + """ + Add command line options related to destination dir. + + E.g., `--dst_dir`, `--clean_dst_dir` + """ + # TODO(gp): Add unit test to check this. + # A required dst_dir implies no default dst_dir. + hdbg.dassert_imply( + dst_dir_required, + not dst_dir_default, + "Since dst_dir_required='%s', you need to specify a default " + "destination dir, instead of dst_dir_default='%s'", + dst_dir_required, + dst_dir_default, + ) + # If dst_dir is not required, then a default dst_dir must be specified. + hdbg.dassert_imply( + not dst_dir_required, + dst_dir_default, + "Since dst_dir_required='%s', you can't specify a default " + "destination dir, dst_dir_default='%s'", + dst_dir_required, + dst_dir_default, + ) + parser.add_argument( + "--dst_dir", + action="store", + default=dst_dir_default, + required=dst_dir_required, + help="Directory storing the results", + ) + parser.add_argument( + "--clean_dst_dir", + action="store_true", + help="Delete the destination dir before running", + ) + parser.add_argument( + "--no_confirm", + action="store_true", + help="Do not confirm before deleting dst dir", + ) + return parser + + +def parse_dst_dir_arg(args: argparse.Namespace) -> Tuple[str, bool]: + """ + Process the command line options related to destination dir. + + :return: a tuple (dst_dir, clean_dst_dir) + - dst_dir: the destination dir + - clean_dst_dir: whether to clean the destination dir or not + """ + dst_dir = args.dst_dir + _LOG.debug("dst_dir=%s", dst_dir) + # TODO(Dan): Fix `clean_dst_dir` usage since it is always `False` now. + clean_dst_dir = False + if args.clean_dst_dir: + _LOG.info("Cleaning dst_dir='%s'", dst_dir) + if os.path.exists(dst_dir): + _LOG.warning("Dir '%s' already exists", dst_dir) + if not args.no_confirm: + hsystem.query_yes_no( + f"Do you want to delete the dir '{dst_dir}'", + abort_on_no=True, + ) + hio.create_dir(dst_dir, incremental=False) + hio.create_dir(dst_dir, incremental=True) + _LOG.debug("clean_dst_dir=%s", clean_dst_dir) + return dst_dir, clean_dst_dir + + +# ############################################################################# +# Command line options related to selection actions. +# ############################################################################# + +# # Define valid and default actions. +# valid_actions = ["download", "process", "upload", "cleanup"] +# default_actions = ["download", "process"] +# # Create parser and add action arguments. +# parser = argparse.ArgumentParser(... +# hparser.add_action_arg(parser, valid_actions, default_actions) +# args = parser.parse_args() +# # Select which actions to execute based on CLI arguments. +# actions = hparser.select_actions(args, valid_actions, default_actions) +# # Display the selected actions in a formatted table. +# print(hparser.actions_to_string(actions, valid_actions, add_frame=True)) +# # mark_action() handles tracking which actions remain and logs skipped ones. +# while actions: +# # Current action to check +# action = actions[0] +# # Determine if this action should execute and get remaining actions +# # to_execute: True if action is in the list, False otherwise +# # actions: updated list with current action removed if to_execute=True +# to_execute, actions = hparser.mark_action(action, actions) +# if to_execute: +# # Execute the action +# if action == "download": +# print("Downloading data...") +# elif action == "process": + + +def add_action_arg( + parser: argparse.ArgumentParser, + valid_actions: List[str], + default_actions: Optional[List[str]], +) -> argparse.ArgumentParser: + """ + Add command line options to select actions to execute, skip, or enable. + + The function creates a mutually exclusive group with three options: + - `-a/--action`: specify exact actions to execute + - `-sa/--skip_action`: skip specific actions from default set + - `-e/--enable`: enable additional actions on top of defaults + + Available actions are listed once in the help epilog to avoid repetition. + + :param parser: parser to add the option to + :param valid_actions: list of valid actions + :param default_actions: list of default actions to execute + :return: parser with the option added + """ + # Add epilog with list of available actions to avoid repeating them. + actions_list = ", ".join(valid_actions) + if parser.epilog: + parser.epilog += f"\n\nAvailable actions: {actions_list}" + else: + parser.epilog = f"Available actions: {actions_list}" + # Create mutually exclusive group for action selection. + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "-a", + "--action", + action="append", + dest="action", + help="Actions to execute (see available actions below)", + ) + group.add_argument( + "-sa", + "--skip_action", + action="append", + dest="skip_action", + help="Actions to skip from default set (see available actions below)", + ) + group.add_argument( + "-e", + "--enable", + action="append", + dest="enable_action", + help="Enable additional actions on top of defaults (see available actions below)", + ) + if default_actions is not None: + hdbg.dassert_is_subset(default_actions, valid_actions) + parser.add_argument( + "--all", + action="store_true", + help=f"Run all the actions ({' '.join(default_actions)})", + ) + return parser + + +def actions_to_string( + actions: List[str], valid_actions: List[str], add_frame: bool +) -> str: + """ + Convert a list of actions to a string. + + :param actions: list of actions to convert + :param valid_actions: list of valid actions + :param add_frame: if `True`, add a frame around the actions + :return: string of the actions + """ + space = max(len(a) for a in valid_actions) + 2 + format_ = "%" + str(space) + "s: %s" + actions = [ + format_ % (a, "Yes" if a in actions else "-") for a in valid_actions + ] + actions_as_str = "\n".join(actions) + if add_frame: + ret = hprint.frame("# Action selected:") + "\n" + ret += hprint.indent(actions_as_str) + else: + ret = actions_as_str + return ret # type: ignore + + +def select_actions( + args: argparse.Namespace, + valid_actions: List[str], + default_actions: List[str], +) -> List[str]: + """ + Select actions based on the command line arguments. + + Supports three mutually exclusive modes: + - `--action`: run only specified actions + - `--skip_action`: run default actions minus specified ones + - `--enable`: run default actions plus specified additional ones + + :param args: command line arguments + :param valid_actions: list of valid actions + :param default_actions: list of default actions to execute + :return: list of selected actions + """ + hdbg.dassert( + not (args.action and args.all), + "You can't specify together --action and --all", + ) + hdbg.dassert( + not (args.action and args.skip_action), + "You can't specify together --action and --skip_action", + ) + # TODO(ai_gp): Is this still needed? + # Check for enable_action attribute (added for backward compatibility). + has_enable = hasattr(args, "enable_action") + if has_enable: + hdbg.dassert( + not (args.action and args.enable_action), + "You can't specify together --action and --enable", + ) + hdbg.dassert( + not (args.skip_action and args.enable_action), + "You can't specify together --skip_action and --enable", + ) + # Select actions. + if not args.action or args.all: + if default_actions is None: + default_actions = valid_actions[:] + hdbg.dassert_is_subset(default_actions, valid_actions) + # Convert it into list since through some code paths it can be a tuple. + actions = list(default_actions) + else: + # Validate actions specified by user. + for action in args.action: + hdbg.dassert_in( + action, + valid_actions, + "Invalid action '%s'", + action, + ) + actions = args.action[:] + hdbg.dassert_isinstance(actions, list) + hdbg.dassert_no_duplicates(actions) + # Remove actions, if needed. + if args.skip_action: + hdbg.dassert_isinstance(args.skip_action, list) + for skip_action in args.skip_action: + # Validate that skip_action is a valid action. + hdbg.dassert_in( + skip_action, + valid_actions, + "Invalid action '%s'", + skip_action, + ) + # Validate that skip_action is in the current action list. + if skip_action not in actions: + _LOG.warning( + "Skipping action '%s' since it's already not in actions='%s'", + skip_action, + actions, + ) + actions = [a for a in actions if a != skip_action] + # Add enabled actions on top of defaults. + if has_enable and args.enable_action: + hdbg.dassert_isinstance(args.enable_action, list) + for enable_action in args.enable_action: + hdbg.dassert_in( + enable_action, + valid_actions, + "Invalid action '%s'", + enable_action, + ) + if enable_action not in actions: + actions.append(enable_action) + # Reorder actions according to 'valid_actions'. + actions = [action for action in valid_actions if action in actions] + return actions + + +def mark_action( + action: str, actions: Optional[List[str]] +) -> Tuple[bool, Optional[List[str]]]: + """ + Mark an action as to be executed or skipped. + + :param action: action to mark + :param actions: list of actions, or None to execute all actions + :return: tuple of (to_execute, actions) + """ + if actions is None: + # If actions is None, execute all actions. + to_execute = True + else: + to_execute = action in actions + _LOG.debug("\n%s", hprint.frame(f"action={action}")) + if to_execute: + if actions is not None: + actions = [a for a in actions if a != action] + else: + _LOG.warning("Skip action='%s'", action) + return to_execute, actions + + +# ############################################################################# +# Command line options for input/output processing. +# ############################################################################# + +# For non-dockerized scripts the following idiom is used: +# +# ```python +# # Add input/output arguments to parser. +# hparser.add_input_output_args(parser) +# # Handle input/output arguments, including stdin/stdout. +# in_file_name, out_file_name = hparser.parse_input_output_args(args) +# ... +# # Read input file, handling stdin. +# in_lines = hparser.from_file(in_file_name) +# ... +# # Write output, handling stdout. +# hparser.to_file(txt, out_file_name) +# ``` +# See helpers_root/dev_scripts_helpers/coding_tools/transform_template.py as an +# example. + +# For dockerized scripts the following idiom is used inside the wrapper, which +# calls the dockerized script: +# +# ```python +# # Add input/output arguments to parser. +# hparser.add_input_output_args(parser) +# # Handle input/output arguments, including stdin/stdout. +# in_file_name, out_file_name = hparser.parse_input_output_args(args) +# tmp_in_file_name, tmp_out_file_name = hparser.adapt_input_output_args_for_dockerized_scripts( +# in_file_name, "llm_transform") +# ... +# # For stdin/stdout, suppress the output of the container. +# suppress_output = in_file_name == "-" or out_file_name == "-" +# _run_dockerized_llm_transform( +# tmp_in_file_name, +# cmd_line_opts, +# tmp_out_file_name, +# mode="system", +# force_rebuild=args.dockerized_force_rebuild, +# use_sudo=args.dockerized_use_sudo, +# suppress_output=suppress_output, +# ) +# ... +# # Write output, handling stdout. +# hparser.to_file(txt, out_file_name) +# ``` +# +# See helpers_root/dev_scripts_helpers/llms/llm_transform.py as an example. + + +def add_input_output_args( + parser: argparse.ArgumentParser, + *, + in_default: Optional[str] = None, + in_required: bool = True, + out_default: Optional[str] = None, + out_required: bool = False, +) -> argparse.ArgumentParser: + """ + Add options to parse input and output file name, and handle stdin / stdout. + + :param in_default: default file to be used for input + - If `None`, it must be specified by the user + :param in_required: whether the input file is required + :param out_default: default file to be used for output + - If `None`, it must be specified by the user + :param out_required: whether the output file is required + """ + parser.add_argument( + "-i", + "--input", + dest="input", + required=in_required, + type=str, + default=in_default, + help="Input file or `-` for stdin", + ) + parser.add_argument( + "-o", + "--output", + dest="output", + required=out_required, + type=str, + default=out_default, + help="Output file or `-` for stdout", + ) + return parser + + +def parse_input_output_args( + args: argparse.Namespace, *, clear_screen: bool = False +) -> Tuple[str, str]: + """ + Parse input and output file name, handling stdin / stdout. + + :return input and output file name. + """ + in_file_name = args.input + out_file_name = args.output + if out_file_name is None: + # If the output file is not specified, use the input file name, i.e., + # in place. + out_file_name = in_file_name + # Print summary. If we are using stdin / stdout, don't print anything since + # we don't want to pollute the output. + if in_file_name != "-": + if clear_screen: + os.system("clear") + _LOG.info(hprint.to_str("in_file_name")) + _LOG.info(hprint.to_str("out_file_name")) + + return in_file_name, out_file_name + + +def init_logger_for_input_output_transform( + args: argparse.Namespace, *, verbose: bool = True +) -> None: + """ + Initialize the logger when input/output transformation is used. + + :param verbose: if `False`, set the log level to `CRITICAL` so that no + output is printed and avoid to print: + ``` + 09:34:24 - INFO hdbg.py init_logger:1013 Saving log to file '/User... + 09:34:24 - INFO hdbg.py init_logger:1018 > cmd='/Users/saggese/src... + 09:34:24 - INFO hparser.py parse_input_output_args:368 in_file_name='lectures_source/Les... + 09:34:24 - INFO hparser.py parse_input_output_args:369 out_file_name='-' + ``` + """ + verbosity = args.log_level + if not verbose: + # Unless user has specified DEBUG level, set the log level to `CRITICAL` + # so that no output is printed. + if args.log_level == "INFO": + verbosity = "CRITICAL" + else: + # If the input is stdin, we don't want to print the command line or any + # other log messages, unless the user specified a more verbose log level. + if args.input == "-": + if args.log_level == "INFO": + verbosity = "CRITICAL" + else: + print("cmd line: " + hdbg.get_command_line()) + hdbg.init_logger(verbosity=verbosity, use_exec_path=True, force_white=False) + + +def from_file(file_name: str) -> List[str]: + """ + Read file or stdin (represented by `-`), returning an array of lines. + + If file_name is "pb" and the platform is macOS, read from clipboard. + """ + if file_name == "-": + _LOG.info("Reading from stdin") + # Read. + txt = [] + for line in sys.stdin: + txt.append(line.rstrip("\n")) + elif file_name == "pb": + # Read from clipboard (macOS only). + if hserver.is_host_mac(): + _LOG.info("Reading from clipboard") + cmd = "pbpaste" + rc, txt_str = hsystem.system_to_string(cmd) + txt = txt_str.split("\n") + else: + hdbg.dfatal("Reading from clipboard (pb) only works on macOS") + else: + txt = hio.from_file(file_name) + txt = txt.split("\n") + return txt + + +def to_file(txt: Union[str, List[str]], file_name: str) -> None: + """ + Write txt in a file or stdout (represented by `-`). + + If file_name is "pb" and the platform is macOS, write to clipboard. + """ + if isinstance(txt, str): + txt = [txt] + if file_name == "-": + _LOG.debug("Saving to stdout") + print("\n".join(txt)) + elif file_name == "pb": + # Write to clipboard (macOS only). + if hserver.is_host_mac(): + _LOG.info("Writing to clipboard") + txt_str = "\n".join(txt) + # Use echo with pbcopy, escaping single quotes. + txt_str_escaped = txt_str.replace("'", "'\\''") + cmd = f"echo -n '{txt_str_escaped}' | pbcopy" + hsystem.system(cmd) + _LOG.info("Written to clipboard") + else: + hdbg.dfatal("Writing to clipboard (pb) only works on macOS") + else: + _LOG.debug("Saving to file") + with open(file_name, "w") as f: + f.write("\n".join(txt)) + _LOG.info("Written file '%s'", file_name) + + +def adapt_input_output_args_for_dockerized_scripts( + in_file_name: str, tag: str +) -> Tuple[str, str]: + """ + Adapt input and output file name for dockerized scripts. + + Since we need to call a container and passing stdin/stdout is tricky, + we read the input and save it in a temporary file. + + :param tag: tag to be used for the temporary file name (e.g., `llm_transform`) + """ + # Since we need to call a container and passing stdin/stdout is tricky, + # we read the input and save it in a temporary file. + in_lines = from_file(in_file_name) + if in_file_name == "-": + tmp_in_file_name = f"tmp.{tag}.in.txt" + in_txt = "\n".join(in_lines) + hio.to_file(tmp_in_file_name, in_txt) + else: + tmp_in_file_name = in_file_name + # + tmp_out_file_name = f"tmp.{tag}.out.txt" + return tmp_in_file_name, tmp_out_file_name + + +# ############################################################################# +# Command line options for parallel processing. +# ############################################################################# + + +# pylint: disable=line-too-long +# TODO(gp): These should go in hjoblib.py +def add_parallel_processing_arg( + parser: argparse.ArgumentParser, + *, + num_threads_default: Optional[str] = None, +) -> argparse.ArgumentParser: + """ + Add parallel processing args. + + The "incremental idiom" means skipping processing computation that has + already been performed. E.g., if we need to transform files from one dir to + another we skip the files already processed (assuming that a file present + in the destination dir is an indication that it has already been + processed). + + The default behavior should always be incremental since "incremental mode" + is not destructive like the non-incremental, i.e., delete and restart + + The incremental behavior is disabled with `--no_incremental`. This implies + performing the computation in any case + - It is often implemented by deleting the destination dir and then running + again, even in incremental mode + - If the destination dir already exists, then we require the user to + explicitly use `--force` to confirm that the user knows what is doing + """ + parser.add_argument( + "--dry_run", + action="store_true", + help="Print the workload and exit without running it", + ) + parser.add_argument( + "--no_incremental", + action="store_true", + help="Skip workload already performed", + ) + parser.add_argument( + "--force", + action="store_true", + help="Confirm that one wants to remove the previous results. It works only together with --no_incremental", + ) + # + help = """ + Number of threads to use: + - '-1' to use all CPUs; + - '1' to use one-thread at the time but using the parallel execution (mainly used + for debugging) + - 'serial' to serialize the execution without using parallel execution""" + if num_threads_default is None: + parser.add_argument( + "--num_threads", + action="store", + help=help, + required=True, + ) + else: + parser.add_argument( + "--num_threads", + action="store", + help=help, + default=num_threads_default, + ) + parser.add_argument("--no_keep_order", action="store_true", help="") + parser.add_argument( + "--num_func_per_task", + action="store", + type=int, + default=None, + help="Number of function execute in a (parallel) task of the workload. `None` means automatically decided by the function", + ) + parser.add_argument( + "--skip_on_error", + action="store_true", + help="Continue execution after encountering an error", + ) + parser.add_argument( + "--num_attempts", + default=1, + type=int, + help="Repeat running an experiment up to `num_attempts` times", + required=False, + ) + return parser + + +def create_incremental_dir(dst_dir: str, args: argparse.Namespace) -> None: + """ + Create a dir using the "incremental idiom". + + If the dir already exists and the user requested the not + incremental, we require `--force` to confirm deleting the dir. + """ + if args.force: + hdbg.dassert( + args.no_incremental, "--force only works with --no_incremental" + ) + _LOG.debug(hprint.to_str("dst_dir args")) + if args.no_incremental: + # Create the dir from scratch. + _LOG.debug("No incremental mode") + if os.path.exists(dst_dir): + _LOG.debug("Dir '%s' already exists", dst_dir) + hdbg.dassert_dir_exists(dst_dir, "'%s' must be a directory") + if not args.force: + _LOG.warning( + "The directory '%s' already exists. To confirm deleting it use --force", + dst_dir, + ) + sys.exit(-1) + _LOG.warning("Deleting %s", dst_dir) + hio.create_dir(dst_dir, incremental=False) + else: + _LOG.debug("Incremental mode") + hio.create_dir(dst_dir, incremental=True) + + +# ############################################################################# +# Command line options for metadata output. +# ############################################################################# + + +def add_json_output_metadata_args( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add arguments related to storing the output metadata from a script. + + This data can be read / used by other scripts to post-process a + script results. + """ + parser.add_argument( + "--json_output_metadata", + type=str, + action="store", + help="File storing the output metadata of this script in JSON format", + ) + return parser + + +# Store the metadata about the output of a script. +OutputMetadata = Dict[str, str] + + +def process_json_output_metadata_args( + args: argparse.Namespace, + output_metadata: OutputMetadata, +) -> Optional[str]: + """ + Save the output metadata according to the command line options. + + :return: file name with the output metadata + """ + hdbg.dassert_isinstance(output_metadata, dict) + if args.json_output_metadata is None: + return None + file_name: str = args.json_output_metadata + _LOG.info("Saving output metadata into file '%s'", file_name) + if not file_name.endswith(".json"): + _LOG.warning( + "The output metadata file '%s' doesn't end in .json: adding it", + file_name, + ) + file_name += ".json" + hio.to_json(file_name, output_metadata) + _LOG.info("Saved output metadata into file '%s'", file_name) + return file_name + + +def read_output_metadata(output_metadata_file: str) -> OutputMetadata: + """ + Read the output metadata. + """ + output_metadata: OutputMetadata = hio.from_json(output_metadata_file) + return output_metadata + + +def str_to_bool(value: str) -> bool: + """ + Convert string representing true or false to the corresponding bool. + """ + if value.lower() == "true": + ret = True + elif value.lower() == "false": + ret = False + else: + raise argparse.ArgumentTypeError( + f"Invalid boolean value {value}. Use 'true' or 'false'." + ) + return ret + + +# ############################################################################# +# Command line options for dockerized scripts. +# ############################################################################# + + +def add_dockerized_script_arg( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add common command line arguments for dockerized scripts. + """ + parser.add_argument( + "--dockerized_force_rebuild", + action="store_true", + help="Force to rebuild the Docker container", + ) + parser.add_argument( + "--dockerized_use_sudo", + action="store_true", + help="Use sudo inside the container", + ) + return parser + + +def add_llm_prompt_arg( + parser: argparse.ArgumentParser, + *, + default_prompt: str = "", + is_required: bool = True, +) -> argparse.ArgumentParser: + """ + Add common command line arguments for `*llm_transform.py` scripts. + + :param default_prompt: default prompt to use + :param is_required: whether the prompt is required + :return: parser with the option added + """ + parser.add_argument( + "--debug", + action="store_true", + help="Print before/after the transform", + ) + if default_prompt != "": + is_required = False + parser.add_argument( + "-p", + "--prompt", + required=is_required, + type=str, + help="Prompt to apply", + default=default_prompt, + ) + parser.add_argument( + "-f", + "--fast_model", + action="store_true", + help="Use a fast LLM model vs a high-quality one", + ) + return parser + + +# ############################################################################# +# Command line options for limit range processing. +# ############################################################################# + + +def add_limit_range_arg( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add argument for limiting processing to a range of items. + + The range format is X:Y where X and Y are 1-indexed integers. + """ + parser.add_argument( + "--limit", + action="store", + help="Limit processing to item range X:Y (integers >= 1, inclusive)", + ) + return parser + + +def parse_limit_range(limit_str: str) -> Tuple[int, int]: + """ + Parse limit string in format "X:Y" and return tuple (start, end). + + :param limit_str: string in format "X:Y" where X and Y are integers >= 1 + :return: tuple in [start_index, end_index] + """ + hdbg.dassert( + ":" in limit_str, "Limit format must be X:Y, got: %s", limit_str + ) + parts = limit_str.split(":") + hdbg.dassert_eq( + len(parts), 2, "Limit format must be X:Y, got: %s", limit_str + ) + try: + start = int(parts[0]) + end = int(parts[1]) + except ValueError as e: + hdbg.dfatal("Invalid limit format, must be integers: %s" % str(e)) + hdbg.dassert_lte(1, start, "Start index must be >= 1, got: %s", start) + hdbg.dassert_lte(1, end, "End index must be >= 1, got: %s", end) + hdbg.dassert_lte( + start, end, "Start index must be <= end index, got: %s:%s", start, end + ) + return start, end + + +def parse_limit_range_args( + args: argparse.Namespace, +) -> Optional[Tuple[int, int]]: + """ + Parse limit range from command line arguments and log the result. + + :param args: parsed command line arguments containing 'limit' + attribute + :return: tuple of (start_index, end_index) as 0-indexed integers, or + None if no limit + """ + limit_range = None + if args.limit: + limit_range = parse_limit_range(args.limit) + _LOG.warning( + "Using limit range: [%s:%s]", limit_range[0], limit_range[1] + ) + return limit_range + + +def apply_limit_range( + items: List[Any], + limit_range: Optional[Tuple[int, int]] = None, + *, + item_name: str = "items", +) -> List[Any]: + """ + Apply limit range filtering to a list of items. + + :param items: list of items to filter + :param limit_range: optional tuple (start, end) for 0-indexed range + filtering + :param item_name: name of items for logging purposes + :return: filtered list of items + """ + if limit_range is not None: + start_idx, end_idx = limit_range + total_items = len(items) + hdbg.dassert_lt( + start_idx, + total_items, + "Start index %s exceeds available %s %s", + start_idx, + item_name, + total_items, + ) + hdbg.dassert_lt( + end_idx, + total_items, + "End index %s exceeds available %s %s", + end_idx, + item_name, + total_items, + ) + items = items[start_idx : end_idx + 1] + _LOG.warning( + "Found %s %s, limited to range %s:%s (%s %s)", + total_items, + item_name, + start_idx, + end_idx, + len(items), + item_name, + ) + else: + _LOG.info("Found %s %s to process", len(items), item_name) + # Print the items that will be processed. + _LOG.debug("Items to process:") + for i, item in enumerate(items): + _LOG.debug(" [%s]: %s", i, item) + return items + + +# ############################################################################# +# Command line options for multiple file input. +# ############################################################################# + + +def add_multi_file_args( + parser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """ + Add command line options for specifying multiple input files. + + Three mutually exclusive methods are supported: + - `--files="file1,file2,..."`: comma-separated list of files + - `--from_files="file.txt"`: file containing one file per line + - `--input file1 --input file2`: repeated argument + + These options work alongside the existing `-i/--input` for backward + compatibility. + + :param parser: parser to add the options to + :return: parser with the options added + """ + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "--files", + type=str, + help="Comma-separated list of files to process (e.g., 'file1.txt,file2.txt,file3.txt')", + ) + group.add_argument( + "--from_files", + type=str, + help="Path to file containing one file path per line", + ) + group.add_argument( + "-i", + "--input", + action="append", + help="File to process (can be specified multiple times)", + ) + return parser + + +def parse_multi_file_args( + args: argparse.Namespace, +) -> List[str]: + """ + Parse multi-file command line arguments and return list of file paths. + + Handles three input methods: + - `--files="file1,file2,..."`: comma-separated list + - `--from_files="file.txt"`: file containing one file per line + - `--input file1 --input file2`: repeated argument + + If none of the multi-file options are specified, falls back to the single + `-i/--input` argument for backward compatibility. + + :param args: parsed command line arguments + :return: list of file paths to process + """ + file_list: List[str] = [] + # Check which multi-file option was specified. + if hasattr(args, "files") and args.files: + # Parse comma-separated list. + _LOG.debug("Using --files option") + file_list = [f.strip() for f in args.files.split(",")] + # Remove empty strings. + file_list = [f for f in file_list if f] + elif hasattr(args, "from_files") and args.from_files: + # Read file containing list of files. + _LOG.debug("Using --from_files option") + hdbg.dassert_path_exists(args.from_files) + content = hio.from_file(args.from_files) + lines = content.split("\n") + for line in lines: + # Strip whitespace. + line = line.strip() + # Skip empty lines and comments. + if line and not line.startswith("#"): + file_list.append(line) + elif hasattr(args, "input") and args.input: + # Check if args.input is a list (from --input repeated argument) or a string (from -i/--input single file). + if isinstance(args.input, list): + # Use repeated argument from add_multi_file_args. + _LOG.debug("Using --input option (repeated argument)") + file_list = args.input + else: + # Backward compatibility: support single file via -i/--input from add_input_output_args. + _LOG.debug( + "Using -i/--input option (single file, backward compatibility)" + ) + file_list = [args.input] + else: + # No file specified. + hdbg.dfatal("No input files specified") + # Validate that we have at least one file. + hdbg.dassert_isinstance(file_list, list) + hdbg.dassert_lt( + 0, len(file_list), "No input files specified after parsing arguments" + ) + # Validate that all files exist. + for file_path in file_list: + hdbg.dassert_path_exists(file_path) + _LOG.info("Found %s file(s) to process", len(file_list)) + return file_list diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py new file mode 100644 index 000000000..e46fc8143 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py @@ -0,0 +1,253 @@ +""" +Pickle and JSON serialization/deserialization routines. + +Import as: + +import helpers.hpickle as hpickle +""" + +import gzip +import json +import logging +import marshal +import os +import pickle +import types +from typing import Any, Callable, Optional + +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hio as hio + +# TODO(Grisha): Can this module depend on hs3? +import helpers.hs3 as hs3 +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + + +def to_pickleable(obj: Any, force_values_to_string: bool) -> Any: + """ + Convert an object into an object with the same nested structure (e.g., + lists and dicts), but where all values are replaced with their pickleable + representations. + + :param obj: object to convert + :param force_values_to_string: if True, store all the object values + as strings + :return: pickleable object + """ + if isinstance(obj, list): + # Process list values recursively. + out = [to_pickleable(k, force_values_to_string) for k in obj] + elif isinstance(obj, tuple): + # Process tuple values recursively. + out = tuple([to_pickleable(k, force_values_to_string) for k in obj]) + elif isinstance(obj, dict): + # Process dict keys and values recursively. + out = {} + for k, v in obj.items(): + k = to_pickleable(k, force_values_to_string) + v = to_pickleable(v, force_values_to_string) + out[k] = v + elif hintros.is_iterable(obj): + # TODO(Grisha): is it ok that we convert any Iterable (e.g., set) to list? + # This means that input and output data types do not match. + # Process other iterable values recursively. + out = [to_pickleable(v, force_values_to_string) for v in obj] + else: + # We need to use try_and_catch mode because of CmTask7713. + if hintros.is_pickleable(obj, mode="try_and_catch"): + # Store a pickleable object. + if force_values_to_string: + # Store as string if specified. + out = str(obj) + else: + out = obj + else: + # Store a string representation of an unpickleable object. + out = str(obj) + return out + + +# ############################################################################# +# pickle +# ############################################################################# + + +def to_pickle( + obj: Any, + file_name: str, + *, + backend: str = "pickle", + log_level: int = logging.DEBUG, + aws_profile: Optional[hs3.AwsProfile] = None, +) -> None: + """ + Pickle object `obj` into file `file_name`. + + :param file_name: the file_name is not changed, but it is checked for + consistency with the backend (e.g., `pickle_gzip` needs a `.pkl.gz` + extension) + :param backend: pickle, dill, pickle_gzip + """ + hdbg.dassert_type_is(file_name, str) + hio.create_enclosing_dir(file_name, incremental=True) + with htimer.TimedScope(logging.DEBUG, f"Pickling to '{file_name}'") as ts: + # We assume that the user always specifies a .pkl extension and then we + # change the extension based on the backend. + if backend in ("pickle", "dill"): + hdbg.dassert_file_extension(file_name, "pkl") + if backend == "pickle": + # Use S3 file system. + if hs3.is_s3_path(file_name): + s3fs_ = hs3.get_s3fs(aws_profile) + with s3fs_.open(file_name, "wb") as s3_file: + pickler = pickle.Pickler( + s3_file, pickle.HIGHEST_PROTOCOL + ) + pickler.fast = True + pickler.dump(obj) + # Use local file system. + else: + with open(file_name, "wb") as fd: + pickler = pickle.Pickler(fd, pickle.HIGHEST_PROTOCOL) + pickler.fast = True + pickler.dump(obj) + elif backend == "dill": + import dill + + with open(file_name, "wb") as fd: + dill.dump(obj, fd) + else: + raise ValueError(f"Invalid backend='{backend}'") + elif backend == "pickle_gzip": + hdbg.dassert_file_extension(file_name, "pkl.gz") + with gzip.open(file_name, "wb") as zfd: + pickler = pickle.Pickler(zfd, pickle.HIGHEST_PROTOCOL) + pickler.fast = True + pickler.dump(obj) + else: + raise ValueError(f"Invalid backend='{backend}'") + # Report time and size. + if hs3.is_s3_path(file_name): + file_size = hs3.du(file_name, aws_profile=aws_profile, human_format=True) + else: + file_size = hintros.format_size(os.path.getsize(file_name)) + _LOG.log( + log_level, + "Saved '%s' (size=%s, time=%.1fs)", + file_name, + file_size, + ts.elapsed_time, + ) + + +def from_pickle( + file_name: str, + backend: str = "pickle", + *, + log_level: int = logging.DEBUG, + aws_profile: Optional[hs3.AwsProfile] = None, +) -> Any: + """ + Unpickle and return object stored in `file_name`. + """ + hdbg.dassert_isinstance(file_name, str) + with htimer.TimedScope( + logging.DEBUG, f"Unpickling from '{file_name}'" + ) as ts: + # We assume that the user always specifies a .pkl extension and then we + # change the extension based on the backend. + if backend in ("pickle", "dill"): + hdbg.dassert_file_extension(file_name, "pkl") + if backend == "pickle": + # Use S3 file system. + if hs3.is_s3_path(file_name): + s3fs_ = hs3.get_s3fs(aws_profile) + with s3fs_.open(file_name) as s3_file: + unpickler = pickle.Unpickler(s3_file) + obj = unpickler.load() + else: + with open(file_name, "rb") as fd: + unpickler = pickle.Unpickler(fd) + obj = unpickler.load() + elif backend == "dill": + import dill + + with open(file_name, "rb") as fd: + obj = dill.load(fd) + else: + raise ValueError(f"Invalid backend='{backend}'") + elif backend == "pickle_gzip": + hdbg.dassert_file_extension(file_name, "pkl.gz") + with gzip.open(file_name, "rb") as zfd: + unpickler = pickle.Unpickler(zfd) + obj = unpickler.load() + else: + raise ValueError(f"Invalid backend='{backend}'") + # Report time and size. + if hs3.is_s3_path(file_name): + file_size = hs3.du(file_name, aws_profile=aws_profile, human_format=True) + else: + file_size = hintros.format_size(os.path.getsize(file_name)) + _LOG.log( + log_level, + "Read '%s' (size=%s, time=%.1fs)", + file_name, + file_size, + ts.elapsed_time, + ) + return obj + + +# ############################################################################# + + +# TODO(gp): -> to_pickle_function +def pickle_function(func: Callable) -> str: + """ + Pickle a function into bytecode stored into a string. + + - return: string + """ + hdbg.dassert_callable(func) + hdbg.dassert(hasattr(func, "__code__")) + assert hasattr(func, "__code__") + code_as_bytes = marshal.dumps(func.__code__) + return code_as_bytes.decode() + + +# TODO(gp): -> from_pickle_function +def unpickle_function(code_as_str: str, func_name: str) -> Callable: + """ + Unpickle a function saved into string . The function is + injected in the global namespace as . + + - return: function + """ + hdbg.dassert_isinstance(code_as_str, str) + code = marshal.loads(code_as_str.encode()) + func = types.FunctionType(code, globals(), name=func_name) + return func + + +# ############################################################################# +# JSON +# ############################################################################# + +# TODO(gp): Maybe move helpers/hjson.py? + + +# TODO(gp): Switch file_name and obj to be consistent with the pickle functions. +def to_json(file_name: str, obj: object) -> None: + hdbg.dassert_file_extension(file_name, "json") + with open(file_name, "w") as outfile: + json.dump(obj, outfile) + + +def from_json(file_name: str) -> object: + hdbg.dassert_path_exists(file_name) + hdbg.dassert_file_extension(file_name, "json") + obj = json.loads(hio.from_file(file_name)) + return obj diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py new file mode 100644 index 000000000..5e1df13c8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py @@ -0,0 +1,495 @@ +""" +Code to automatically generate unit tests for functions. + +Import as: + +import helpers.hplayback as hplayba +""" + +import inspect +import json +import logging +import os +from typing import Any, Callable, List, Optional + +import jsonpickle # type: ignore +import jsonpickle.ext.pandas as jepand # type: ignore +import pandas as pd + +import config_root.config as cconfig +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint + +jepand.register_handlers() + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): Use repr to serialize: +# >>> a = {"hello": [1, 2, (3, 4)]} +# >>> repr(a) +# "{'hello': [1, 2, (3, 4)]}" +# TODO(gp): Add more types. +# TODO(gp): -> _to_python_code +def to_python_code(obj: Any) -> str: + """ + Serialize an object into a string of Python code. + + :param obj: an object to serialize + :return: a string of Python code building the object + """ + output = [] + if isinstance(obj, (int, float)): + # Float 2.5 -> "2.5". + output.append(str(obj)) + elif isinstance(obj, str): + # String test -> '"test"'. + # Use jsonpickle to handle double quotes. + output.append(jsonpickle.encode(obj)) + elif isinstance(obj, list): + # List ["a", 1] -> '["a", 1]'. + output_tmp = "[" + for el in obj: + output_tmp += to_python_code(el) + ", " + output_tmp = output_tmp.rstrip(", ") + "]" + output.append(output_tmp) + elif isinstance(obj, tuple): + # Tuple ["a", 1] -> '["a", 1]'. + output_tmp = "(" + for el in obj: + output_tmp += to_python_code(el) + ", " + output_tmp = output_tmp.rstrip(", ") + ")" + output.append(output_tmp) + elif isinstance(obj, dict): + # Dict {"a": 1} -> '{"a": 1}'. + output_tmp = "{" + for key in obj: + output_tmp += ( + to_python_code(key) + ": " + to_python_code(obj[key]) + ", " + ) + output_tmp = output_tmp.rstrip(", ") + "}" + output.append(output_tmp) + elif isinstance(obj, pd.DataFrame): + # Dataframe with a column "a" and row values 1, 2 -> + # "pd.DataFrame.from_dict({'a': [1, 2]})". + vals = obj.to_dict(orient="list") + output.append(f"pd.DataFrame.from_dict({vals})") + elif isinstance(obj, pd.Series): + # Series init as pd.Series([1, 2]) + output.append( + f'pd.Series(data={obj.tolist()}, index={obj.index}, name="{obj.name}", ' + f"dtype={obj.dtype})" + ) + elif isinstance(obj, cconfig.Config): + # Config -> python_code -> "cconfig.Config.from_python(python_code)" + val = obj.to_python() + output.append(f'cconfig.Config.from_python("{val}")') + else: + # Use `jsonpickle` for serialization. + _LOG.warning( + "Type %s not found in serialization function: using jsonpickle.", + type(obj), + ) + output.append(f"r'{jsonpickle.encode(obj)}'") + output = "\n".join(output) + return output + + +# ############################################################################# +# Playback +# ############################################################################# + + +class Playback: + @staticmethod + def _get_test_file_name(file_with_code: str) -> str: + """ + Construct the test file name based on the file with the code to test. + + :param file_with_code: path to file with code to test. + :return: path to the file with generated test. + """ + # Get directory and filename of the testing code. + dirname_with_code, filename_with_code = os.path.split(file_with_code) + dirname_with_test = os.path.join(dirname_with_code, "test") + # Construct test file. + test_file = os.path.join( + dirname_with_test, f"test_by_playback_{filename_with_code}" + ) + return test_file + + def _update_code_to_existing(self) -> None: + """ + Get existing content from the file with test. + + If the file doesn't exist - creates it. + """ + # Create test file if it doesn't exist. + if not os.path.exists(self._test_file): + hio.create_enclosing_dir(self._test_file, True) + hio.to_file(self._test_file, "", mode="w") + else: + # Get already existing content in the test file. + self._code = hio.from_file(self._test_file).split("\n") + self._file_exists = True + + def _append(self, string: str, num_tabs: int = 0) -> None: + """ + Add indented line to the code. + """ + num_spaces = num_tabs * 4 + self._code.append(hprint.indent(string, num_spaces=num_spaces)) + + def __init__( + self, + mode: str, + to_file: Optional[bool] = None, + max_tests: Optional[int] = None, + ) -> None: + """ + Initialize the class variables. + + :param mode: the type of unit test to be generated (e.g. "assert_equal") + :param to_file: save playback output to the file + test/test_by_playback_.py + :param max_tests: limit a number of generated tests for the testing + function. Can be useful if the function is called a lot of times + during the execution. + """ + _LOG.debug(hprint.to_str("mode to_file max_tests")) + hdbg.dassert_in(mode, ("check_string", "assert_equal")) + self.mode = mode + # TODO(gp): Factor out in a function but need to discard one more level + # in the stack trace. + cur_frame = inspect.currentframe() + self._func_name = cur_frame.f_back.f_code.co_name # type: ignore + # We can use kw arguments for all args. Python supports this. + self._kwargs = cur_frame.f_back.f_locals.copy() # type: ignore + # It treats all arguments defined before itself as arguments. If this + # is done, it will mess up the function call that will be created in + # `Playback.run`. + expected_arg_count = cur_frame.f_back.f_code.co_argcount # type: ignore + if "kwargs" in self._kwargs: + expected_arg_count += 1 + _LOG.debug(hprint.to_str("expected_arg_count")) + # TODO(gp): Is this necessary? + # hdbg.dassert_eq( + # expected_arg_count, + # len(cur_frame.f_back.f_locals), # type: ignore + # msg="the Playback class should be the first thing instantiated in" + # " a function.", + # ) + # If the function is a method, store the parent class so we can also + # create that in the test. + if "self" in self._kwargs: + x = self._kwargs.pop("self") + self._parent_class = x + self._code = [ + f"# Test created for {cur_frame.f_back.f_globals['__name__']}" # type: ignore + f".{x.__class__.__name__}.{self._func_name}." + ] + else: + self._parent_class = None + self._code = [ + # pylint: disable=line-too-long + f"# Test created for {cur_frame.f_back.f_globals['__name__']}.{self._func_name}." # type: ignore + ] + self._append("") + # Check if need to write the code directly to file. + self._to_file = to_file if to_file is not None else False + # Find filename to write the code. + file_with_code = cur_frame.f_back.f_code.co_filename # type: ignore + self._test_file = self._get_test_file_name(file_with_code) + # Check if file exists, need to keep code already here. + self._file_exists = False + if self._to_file: + self._update_code_to_existing() + # Limit number of tests per tested function. + self._max_tests = max_tests or float("+inf") + + @staticmethod + def test_code(output: str) -> None: + # Try to execute in a fake environment. + # ``` + # local_env = {} + # _ = exec(output, local_env) + # ``` + _ = exec(output) # pylint: disable=exec-used + + def _check_code(self, func_output: Any) -> None: + """ + Generate test code that makes an assertion. + """ + if self.mode == "check_string": + if isinstance(func_output, (pd.DataFrame, pd.Series, str)): + if not isinstance(func_output, str): + self._append( + "actual = hpandas.df_to_str(actual, num_rows=None)", 2 + ) + if not isinstance(func_output, (str, bytes)): + self._append("actual = str(actual)", 2) + self._append("# Check output.", 2) + self._append("self.check_string(actual)", 2) + elif self.mode == "assert_equal": + self._append("# Define expected output.", 2) + func_output_as_code = to_python_code(func_output) + self._append(f"expected = {func_output_as_code}", 2) + if not isinstance( + func_output, (int, float, str, list, dict, pd.DataFrame) + ): + self._append("expected = jsonpickle.decode(expected)", 2) + + if isinstance(func_output, (pd.DataFrame, pd.Series)): + self._append( + "actual = hpandas.df_to_str(actual, num_rows=None)", 2 + ) + self._append( + "expected = hpandas.df_to_str(expected, num_rows=None)", 2 + ) + self._append("# Compare actual and expected output.", 2) + self._append("self.assertEqual(actual, expected)", 2) + else: + raise ValueError(f"Invalid mode='{self.mode}'") + + def _add_imports(self, additional: Optional[List[str]] = None) -> None: + """ + Add the code with imports. + """ + # Add imports. + self._append("import helpers.hpandas as hpandas") + self._append("import helpers.hunit_test as hunitest") + self._append("import jsonpickle") + self._append("import pandas as pd") + self._append("import config_root.config as cconfi") + for a in additional or []: + self._append(a) + self._code.extend(["", ""]) + + def _get_class_name_string(self) -> str: + """ + Get a string for the test code with the name of the test class. + + I.e. "class TestMyMethod(hunitest.TestCase):". + """ + test_name = ( + self._parent_class.__class__.__name__ + if self._parent_class is not None + else "" + ) + test_name += "".join( + [x.capitalize() for x in self._func_name.split("_")] + ) + class_string = f"class Test{test_name}(hunitest.TestCase):" + return class_string + + def _get_class_count(self) -> int: + """ + Find a number of already generated tests for the method. + """ + class_string = self._get_class_name_string() + count = 0 + for line in self._code: + count += line == class_string + return count + + def _add_test_class(self) -> None: + """ + Add the code with the test class definition and the test method + definition. + """ + # Add test class and test method. + class_string = self._get_class_name_string() + # Find how many times method was tested. + count = self._get_class_count() + if count >= self._max_tests: + # If it was already tested enough times, raise. + raise IndexError(f"{self._max_tests} tests already generated") + # Otherwise, continue to create a test code. + self._append(class_string) + self._append(f"def test{count + 1}(self) -> None:", 1) + + def _add_function_call(self) -> None: + """ + Add a call of the function to test to the test code. + """ + self._append("# Call function to test.", 2) + if self._parent_class is None: + fnc_call = [f"{k}={k}" for k in self._kwargs.keys()] + self._append(f"actual = {self._func_name}({', '.join(fnc_call)})", 2) + else: + var_code = to_python_code(self._parent_class) + # Re-create the parent class. + self._append(f"cls = {var_code}", 2) + self._append("cls = jsonpickle.decode(cls)", 2) + fnc_call = [f"{k}={k}" for k in self._kwargs.keys()] + # Call the method as a child of the parent class. + self._append( + f"actual = cls.{self._func_name}({', '.join(fnc_call)})", 2 + ) + + def _add_var_definitions(self) -> None: + """ + Add variables definitions for the function to test. + """ + if self._kwargs: + self._append("# Define input variables.", 2) + for key in self._kwargs: + as_python = to_python_code(self._kwargs[key]) + self._append(f"{key} = {as_python}", 2) + # Decode back to an actual Python object, if necessary. + if not isinstance( + self._kwargs[key], + ( + int, + float, + str, + list, + dict, + pd.DataFrame, + pd.Series, + cconfig.Config, + ), + ): + self._append(f"{key} = jsonpickle.decode({key})", 2) + + def _gen_code(self) -> str: + """ + Construct string with all generated test code. + """ + code = "\n".join(self._code) + "\n" + _LOG.debug("code=\n%s", code) + if self._to_file: + hio.to_file(self._test_file, code) + return code + + def run(self, func_output: Any) -> str: + """ + Generate a unit test for the function. + + The unit test compares the actual function output with the expected + `func_output`. + + :param func_output: the expected function output + :return: the code of the unit test + """ + if self._to_file and self._file_exists: + # Imports were added before, so skip. + pass + else: + # Start with imports. + self._add_imports() + # Count if we reached max number of tests generated for a single function. + try: + self._add_test_class() + except IndexError as exception: + # If there are already enough tests, not add anything. + _LOG.warning(str(exception)) + return "" + self._add_var_definitions() + self._add_function_call() + self._check_code(func_output) + return self._gen_code() + + +# ############################################################################# + + +def json_pretty_print(parsed: Any) -> str: + """ + Pretty print a JSON object. + + :param parsed: a JSON object + :return: a prettified JSON object + """ + if isinstance(parsed, str): + parsed = json.loads(parsed) + # `ret = pprint.pformat(parsed) + ret = json.dumps(parsed, indent=4, sort_keys=True) + return ret + + +def round_trip_convert(obj1: Any, log_level: int) -> Any: + """ + Encode and decode with `jsonpickle` ensuring the object remains the same. + + :param obj1: the initial object + :param log_level: the level of logging + :return: the object after encoding and decoding + """ + _LOG.log(log_level, "# obj1=\n%s", obj1) + _LOG.log(log_level, "class=%s", type(obj1)) + # Encode. + frozen = jsonpickle.encode(obj1) + _LOG.log(log_level, "# frozen=\n%s", json_pretty_print(frozen)) + # Decode. + obj2 = jsonpickle.decode(frozen) + _LOG.log(log_level, "# obj2=\n%s", obj2) + _LOG.log(log_level, "class=%s", type(obj1)) + # Check whether the decoded version is the same as the initial object. + if str(type(obj1)).startswith(" Callable: + def wrapper(*args: Any, **kwargs: Any) -> Any: + import helpers.hplayback as hplayba + + playback = hplayba.Playback("assert_equal") + res = func(*args, **kwargs) + code = playback.run(res) + print(code) + return res + + return wrapper(func) + + +# Inline the decorator as: +# +# 1) Rename `target_func` -> `target_func_tmp` +# ``` +# def target_function_tmp(...): +# ... +# ``` +# +# 2) Add wrapper: +# ``` +# def target_function_tmp(...): +# ... +# +# from typing import Any +# +# def target_function(*args: Any, **kwargs: Any) -> Any: +# import helpers.hplayback as hplayb +# playback = hplayb.Playback("assert_equal") +# res = target_func_tmp(*args, **kwargs) +# code = playback.run(res) +# print(code) +# return res +# ``` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py new file mode 100644 index 000000000..29a504226 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py @@ -0,0 +1,1076 @@ +""" +Import as: + +import helpers.hprint as hprint +""" + +import functools +import inspect +import logging +import pprint +import re +import sys +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union + +import helpers.hdbg as hdbg + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + +# Mute this module unless we want to debug it. +_LOG.setLevel(logging.INFO) + + +# ############################################################################# +# Debug output +# ############################################################################# + +_COLOR_MAP = { + "bold": 1, + # Colors. + "blue": 94, + "green": 92, + "white": 0, + "purple": 95, + "red": 91, + "yellow": 33, + # Blue. + "DEBUG": 34, + # Cyan. + "INFO": 36, + # Yellow. + "WARNING": 33, + # Red. + "ERROR": 31, + # White on red background. + "CRITICAL": 41, +} + + +def color_highlight(text: str, color: str) -> str: + """ + Return a colored string. + """ + prefix = "\033[" + suffix = "\033[0m" + hdbg.dassert_in(color, _COLOR_MAP) + color_code = _COLOR_MAP[color] + txt = f"{prefix}{color_code}m{text}{suffix}" + return txt + + +def clear_screen() -> None: + print((chr(27) + "[2J")) + + +def line(char: Optional[str] = None, num_chars: Optional[int] = None) -> str: + """ + Return a line with the desired character. + """ + char = "#" if char is None else char + num_chars = 80 if num_chars is None else num_chars + return char * num_chars + + +def pprint_pformat(obj: Any, *, sort_dicts: bool = False) -> str: + """ + Pretty-print in color. + """ + from pygments import highlight + from pygments.formatters import Terminal256Formatter + from pygments.lexers import PythonLexer + + txt = pprint.pformat(obj, sort_dicts=sort_dicts) + txt = highlight(txt, PythonLexer(), Terminal256Formatter()) + txt = txt.rstrip() + return txt + + +def pprint_color(obj: Any, *, tag: Optional[str] = None, sep: str = "") -> None: + """ + Pretty-print in color. + """ + txt = "" + if tag is not None: + txt += tag + "= " + sep + txt += pprint_pformat(obj) + print(txt) + + +# TODO(gp): -> Use *args instead of forcing to build a string to simplify the caller. +def frame( + message: str, + *, + char1: Optional[str] = None, + num_chars: Optional[int] = None, + char2: Optional[str] = None, + thickness: int = 1, + level: int = 0, +) -> str: + """ + Print a frame around a message. + + :param message: message to print + :param char1: char for top line of the frame + :param num_chars: how many chars in each line (by default 80 chars) + :param char2: char for bottom line of the frame + :param thickness: how many overlapping lines + - E.g., thickness = 2 + ``` + # #######... + # #######... + # hello + # #######... + # #######... + ``` + :param level: level of framing indent based on `#` char: + - E.g., level = 0 + ``` + #######... + hello + #######... + ``` + - E.g., level = 1 + ``` + # #######... + # hello + # #######... + ``` + """ + hdbg.dassert_isinstance(message, str) + # Fill in the default values. + if char1 is None: + # User didn't specify any char. + char1 = char2 = "#" + elif char1 is not None and char2 is None: + # User specified only one char. + char2 = char1 + elif char1 is None and char2 is not None: + # User specified the second char, but not the first one. + hdbg.dfatal(f"Invalid char1='{char1}' char2='{char2}'") + else: + # User specified both chars. Nothing to do. + pass + num_chars = 80 if num_chars is None else num_chars + # Sanity check. + hdbg.dassert_eq(len(char1), 1) + hdbg.dassert_lte(1, num_chars) + hdbg.dassert_eq(len(char2), 1) + hdbg.dassert_lte(1, thickness) + hdbg.dassert_lte(0, level) + # Build the return value. + prefix = "" + if level: + prefix = "#" * level + " " + ret = ( + (prefix + (line(char1, num_chars) + "\n") * thickness) + + (prefix + message + "\n") + + (prefix + (line(char2, num_chars) + "\n") * thickness) + ).rstrip("\n") + return ret + + +# ############################################################################# + + +StrOrList = Union[str, List[str]] + + +# TODO(gp): Use this everywhere in the codebase to avoid back-and-forth +# transforms between strings and lists of strings. +def split_lines(func: Callable) -> Callable: + """ + A decorator that splits a string input into lines before passing it to the + decorated function which expects a list of lines. + """ + + @functools.wraps(func) + def wrapper(txt: StrOrList, *args: Any, **kwargs: Any) -> StrOrList: + if isinstance(txt, str): + # Split the txt into lines. + lines = txt.splitlines() + is_str = True + else: + # The txt is already a list of lines: pass it as is. + hdbg.dassert_isinstance(txt, list) + lines = txt + is_str = False + # Call the function. + lines = func(lines, *args, **kwargs) + if is_str: + # Join the lines back together. + out = "\n".join(lines) + else: + # The output is already a list of lines. + hdbg.dassert_isinstance(lines, list) + out = lines + return out + + return wrapper + + +@split_lines +def prepend(lines: List[str], prefix: str) -> List[str]: + """ + Add `prefix` before each line of the string `txt`. + """ + hdbg.dassert_isinstance(lines, list) + lines_out = [prefix + curr_line for curr_line in lines] + hdbg.dassert_isinstance(lines_out, list) + return lines_out + + +@split_lines +def indent(lines: List[str], *, num_spaces: int = 2) -> List[str]: + """ + Add `num_spaces` spaces before each line of the passed string. + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_isinstance(num_spaces, int) + hdbg.dassert_lte(0, num_spaces) + spaces = " " * num_spaces + txt_out = [] + for curr_line in lines: + if curr_line.lstrip().rstrip() == "": + # Do not prepend any space to a line with only white characters. + txt_out.append("") + continue + txt_out.append(spaces + curr_line) + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +@split_lines +def strict_split(lines: List[str], max_length: int) -> List[str]: + """ + Split a string into chunks of `max_length` characters. + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_lte(1, max_length) + lines_out = [] + for line in lines: + for i in range(0, len(line), max_length): + lines_out.append(line[i : i + max_length]) + hdbg.dassert_isinstance(lines_out, list) + return lines_out + + +@split_lines +def remove_lead_trail_empty_lines(lines: List[str]) -> List[str]: + """ + Remove consecutive empty lines only at the beginning / end of a string. + """ + hdbg.dassert_isinstance(lines, list) + # Remove leading empty lines. + while lines and not lines[0].strip(): + lines.pop(0) + # Remove trailing empty lines. + while lines and not lines[-1].strip(): + lines.pop() + hdbg.dassert_isinstance(lines, list) + return lines + + +@split_lines +def dedent( + lines: List[str], *, remove_lead_trail_empty_lines_: bool = True +) -> List[str]: + """ + Remove from each line the minimum number of spaces to align the text on the + left. + + It is the opposite of `indent()`. + + :param txt: multi-line string + :param txt: multi-line string + :param remove_lead_trail_empty_lines_: if True, remove all the empty + lines at the beginning and at the end + """ + if remove_lead_trail_empty_lines_: + lines = remove_lead_trail_empty_lines(lines) + # Find the minimum number of leading spaces. + min_num_spaces = None + for curr_line in lines: + _LOG.debug( + "min_num_spaces=%s: curr_line='%s'", min_num_spaces, curr_line + ) + # Skip empty lines. + if curr_line.lstrip().rstrip() == "": + _LOG.debug(" -> Skipping empty line") + continue + m = re.search(r"^(\s*)", curr_line) + hdbg.dassert(m) + # The linter doesn't understand that `dassert` is equivalent to an + # `assert`. + assert m is not None + curr_num_spaces = len(m.group(1)) + _LOG.debug(" -> curr_num_spaces=%s", curr_num_spaces) + if min_num_spaces is None or curr_num_spaces < min_num_spaces: + min_num_spaces = curr_num_spaces + _LOG.debug("min_num_spaces=%s", min_num_spaces) + # Process each line and remove the minimum indentation. + txt_out = [] + for curr_line in lines: + _LOG.debug("curr_line='%s'", curr_line) + # Skip empty lines. + if curr_line.lstrip().rstrip() == "": + txt_out.append("") + continue + hdbg.dassert_lte(min_num_spaces, len(curr_line)) + txt_out.append(curr_line[min_num_spaces:]) + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +@split_lines +def align_on_left(lines: List[str]) -> List[str]: + """ + Remove all leading/trailing spaces for each line. + """ + hdbg.dassert_isinstance(lines, list) + txt_out = [] + for curr_line in lines: + curr_line = curr_line.rstrip(" ").lstrip(" ") + txt_out.append(curr_line) + hdbg.dassert_isinstance(txt_out, list) + return txt_out + + +@split_lines +def remove_empty_lines( + lines: List[str], *, mode: str = "no_empty_lines" +) -> List[str]: + """ + Remove empty lines from a multi-line string. + + :param lines: list of input lines to process + :param mode: + - no_empty_lines: remove all empty lines + - no_consecutive_empty_lines: remove consecutive empty lines + :return: lines with empty lines removed + """ + hdbg.dassert_isinstance(lines, list) + if mode == "no_empty_lines": + lines_out = [line for line in lines if line.rstrip().lstrip() != ""] + elif mode == "no_consecutive_empty_lines": + # If there are two or more consecutive empty lines, remove all but the last one. + lines_out = [] + prev_empty = False + for line in lines: + if re.search(r"^\s*$", line): + if prev_empty: + continue + prev_empty = True + else: + prev_empty = False + lines_out.append(line) + else: + raise ValueError(f"Invalid mode='{mode}'") + hdbg.dassert_isinstance(lines_out, list) + return lines_out + + +def vars_to_debug_string(vars_as_str: List[str], locals_: Dict[str, Any]) -> str: + """ + Create a string with var name -> var value. + + E.g., ["var1", "var2"] is converted into: ``` var1=... var2=... ``` + """ + txt = [] + for var in vars_as_str: + txt.append(var + "=") + txt.append(indent(str(locals_[var]))) + return "\n".join(txt) + + +# ############################################################################# +# Pretty print data structures. +# ############################################################################# + + +def to_object_str(obj: Any) -> str: + class_name = str(obj.__class__.__name__) + hex_str = str(hex(id(obj))) + return f"{class_name} at {hex_str}" + + +def to_object_repr(obj: Any) -> str: + class_module = str(obj.__class__.__module__) + class_name = str(obj.__class__.__name__) + hex_str = str(hex(id(obj))) + return f"<{class_module}.{class_name} at {hex_str}>" + + +def thousand_separator(v: float) -> str: + v = "{0:,}".format(v) + return v + + +# TODO(gp): -> to_percentage +def perc( + a: float, + b: float, + *, + invert: bool = False, + num_digits: int = 2, + only_perc: bool = False, + use_float: bool = False, + only_fraction: bool = False, + use_thousands_separator: bool = False, +) -> Union[str, float]: + """ + Calculate percentage a / b as a string. + + Asserts 0 <= a <= b. If true, returns a/b to `num_digits` decimal places. + + :param a: numerator + :param b: denominator + :param invert: assume the fraction is (b - a) / b + This is useful when we want to compute the complement of a count. + :param num_digits: number of digits to represent the percentage + :param only_perc: return only the percentage, without the fraction + - E.g., "50.00%" vs "10 / 20 = 50.00%" + :param use_float: return the percentage as a float. It requires + `only_perc = True` + :param only_fraction: return only the fraction, without the percentage + - E.g., "10 / 20" vs "10 / 20 = 50.00%" + :param use_thousands_separator: report the numbers using thousands separator + :return: string with a/b + """ + hdbg.dassert_lte(0, a) + hdbg.dassert_lte(a, b) + if invert: + a = b - a + if use_thousands_separator: + a_str = str("{0:,}".format(a)) + b_str = str("{0:,}".format(b)) + else: + a_str = str(a) + b_str = str(b) + # Validate and format the percentage. + hdbg.dassert_lte(0, num_digits) + if only_perc: + fmt = "%." + str(num_digits) + "f" + ret = fmt % (float(a) / b * 100.0) + if use_float: + # 57.27 + ret = float(ret) + else: + # 57.27% + hdbg.dassert_isinstance(ret, str) + ret += "%" + elif only_fraction: + # 4225 / 7377 + ret = f"{a_str} / {b_str}" + else: + # 4225 / 7377 = 57.27% + fmt = "%s / %s = %." + str(num_digits) + "f%%" + ret = fmt % (a_str, b_str, float(a) / b * 100.0) + return ret + + +def round_digits( + v: float, *, num_digits: int = 2, use_thousands_separator: bool = False +) -> str: + """ + Round digit returning a string representing the formatted number. + + :param v: value to convert + :param num_digits: number of digits to represent v on None is + (Default value = 2) + :param use_thousands_separator: use "," to separate thousands + (Default value = False) + :return: str with formatted value + """ + if (num_digits is not None) and isinstance(v, float): + fmt = "%0." + str(num_digits) + "f" + res = float(fmt % v) + else: + res = v + if use_thousands_separator: + res = "{0:,}".format(res) # type: ignore + res_as_str = str(res) + return res_as_str + + +# ############################################################################# +# Logging helpers +# ############################################################################# + + +# TODO(gp): Move this to hdbg.hlogging, but there are dependencies from this file. + +# https://stackoverflow.com/questions/2749796 has some solutions to find the +# name of variables from the caller. + + +_VarNamesType = Optional[Union[str, List[str]]] + + +def _to_var_list(expression: _VarNamesType) -> List[str]: + if isinstance(expression, List): + return expression + hdbg.dassert_isinstance(expression, str) + # If expression is a list of space-separated expressions, convert each in a + # string. + exprs = [v.lstrip().rstrip() for v in expression.split(" ")] + # Remove empty var names. + exprs = [v for v in exprs if v.strip().rstrip() != ""] + hdbg.dassert_isinstance(exprs, list) + hdbg.dassert_lte(1, len(exprs)) + return exprs + + +def to_str( + expression: str, + *, + frame_level: int = 1, + print_lhs: bool = True, + char_separator: str = ",", + mode: str = "repr", +) -> str: + """ + Return a string with the value of a variable / expression / multiple + variables. + + If expression is a space-separated compound expression, convert it into + `exp1=val1, exp2=val2, ...`. + + This is similar to Python 3.8 f-string syntax `f"{foo=} {bar=}"`. + We don't want to force to use Python 3.8 just for this feature. + ``` + > x = 1 + > to_str("x+1") + x+1=2 + ``` + + :param expression: the variable / expression to evaluate and print. + E.g., `to_str("exp1")` is converted into `exp1=val1`. + If expression is a space-separated compound expression, e.g., + `to_str("exp1 exp2 ...")`, it is converted into `exp1=val1, exp2=val2, ...` + :param frame_level: level of the frame to inspect + :param print_lhs: whether we want to print the left hand side (i.e., `exp1`) + :param char_separator: separator between the values of the expressions + when printed (e.g., `,`) + :param mode: select how to print the value of the expressions (e.g., `str`, + `repr`, `pprint`, `pprint_color`) + """ + # TODO(gp): If we pass an object it would be nice to find the name of it. + # E.g., https://github.com/pwwang/python-varname + hdbg.dassert_isinstance(expression, str) + if " " in expression: + exprs = _to_var_list(expression) + # Convert each expression into a value. + _to_str = lambda x: to_str(x, frame_level=frame_level + 2) + values = list(map(_to_str, exprs)) + # Assemble in a return value. + hdbg.dassert_lte(len(char_separator), 1) + sep = char_separator + " " + txt = sep.join(values) + return txt + # Certain expressions are evaluated as literals. + if expression in ("", "->", ":", "=", "\n"): + return expression + # Evaluate the expression. + frame_ = sys._getframe(frame_level) # pylint: disable=protected-access + ret = "" + if print_lhs: + ret += expression + "=" + try: + eval_ = eval(expression, frame_.f_globals, frame_.f_locals) + except Exception as e: + print("expression=''", expression) + raise e + if mode == "str": + ret += str(eval_) + elif mode == "repr": + ret += repr(eval_) + elif mode == "pprint": + ret += "\n" + indent(pprint.pformat(eval_)) + elif mode == "pprint_color": + ret += "\n" + indent(pprint_pformat(eval_)) + else: + raise ValueError(f"Invalid mode='{mode}'") + return ret + + +# TODO(gp): Extend this to work on class methods, static and not. +def _func_signature_to_str( + skip_vars: _VarNamesType, + assert_on_skip_vars_error: bool, + frame_level: int, +) -> Tuple[str, str]: + """ + Return the variables of the caller function as a string. + + Same params as `func_signature_to_str()`. + :return: function name and string with the variables of the caller function + as `var1 var2 ...` + """ + if skip_vars is not None: + skip_vars = _to_var_list(skip_vars) + # Get the caller's frame (i.e., the function that called this function). + caller_frame = inspect.currentframe() + for _ in range(frame_level): + hdbg.dassert_is_not( + caller_frame, None, "caller_frame should not be None" + ) + caller_frame = caller_frame.f_back + hdbg.dassert_is_not( + caller_frame, + None, + "caller_frame should not be None after traversing frames", + ) + caller_function_name = caller_frame.f_code.co_name + # _LOG.debug("caller_function_name=%s", caller_function_name) + # Retrieve the function object from the caller's frame. + caller_function = caller_frame.f_globals.get(caller_function_name, None) + if caller_function: + # Get the function's signature + sig = inspect.signature(caller_function) + var_names = list(sig.parameters.keys()) + if skip_vars: + if assert_on_skip_vars_error: + hdbg.dassert_is_subset(skip_vars, var_names) + var_names = [ + var_name for var_name in var_names if var_name not in skip_vars + ] + vars_str = " ".join(var_names) + else: + raise ValueError("Unable to determine caller function") + return caller_function_name, vars_str + + +def func_signature_to_str( + # We don't use * since we want to keep it simple to call this function. + skip_vars: _VarNamesType = None, + *, + assert_on_skip_vars_error: bool = True, + frame_level: int = 2, +) -> str: + r""" + Return the variables of the caller function as a string. + + Use like: + ``` + _LOG.debug("\n%s", hprint.func_signature_to_str()) + ``` + + :param skip_vars: list of variables to skip + :param assert_on_skip_vars_error: whether to assert if the variables to skip + are not found in the function signature + :param frame_level: level of the frame to inspect. By default we need to + access the frame of the caller of the caller, so frame_level = 2 + """ + # Get the variables. + func_name, func_signature = _func_signature_to_str( + skip_vars, + assert_on_skip_vars_error, + frame_level, + ) + # Get the value of the variables. + val = to_str(func_signature, frame_level=frame_level) + val = f"# {func_name}: {val}" + return val + + +# ############################################################################# + + +def log(logger: logging.Logger, verbosity: int, *vals: Any) -> None: + """ + Log at a certain verbosity. + + `log(_LOG, logging.DEBUG, "ticker", "exchange")` + + is equivalent to statements like: + + ``` + _LOG.debug("%s, %s", to_str("ticker"), to_str("exchange")) + _LOG.debug("ticker=%s, exchange=%s", ticker, exchange) + ``` + """ + logger_verbosity = hdbg.get_logger_verbosity() + # print("verbosity=%s logger_verbosity=%s" % (verbosity, logger_verbosity)) + # We want to avoid the overhead of converting strings, so we evaluate the + # expressions only if we are going to print. + if verbosity >= logger_verbosity: + # We need to increment frame_lev since we are 2 levels deeper in the stack. + _to_str = lambda x: to_str(x, frame_level=3) + num_vals = len(vals) + if num_vals == 1: + fstring = "%s" + vals = _to_str(vals[0]) # type: ignore + else: + fstring = ", ".join(["%s"] * num_vals) + vals = list(map(_to_str, vals)) # type: ignore + logger.log(verbosity, fstring, vals) + + +# TODO(gp): Replace calls to `_LOG.debug("\n%s", hprint.frame(...)` with this. +# TODO(gp): Consider changing the signature from +# _log_frame(_LOG, "hello", verbosity=logger.INFO)) +# to +# _log_frame(_LOG.info, "hello", ...) +# by using the first element as a Callable +def log_frame( + logger: logging.Logger, + fstring: str, + *args: Any, + level: int = 1, + char: str = "#", + verbosity: int = logging.DEBUG, +) -> None: + """ + Log using a frame around the text with different number of leading `#` (or + `char`) to organize the log visually. + + The logging output looks like: + _log_frame(_LOG, "hello", verbosity=logger.INFO)) + ``` + 07:44:51 printing : log_frame : 390 : + # ######################################################################### + # hello + # ######################################################################### + ``` + + :param txt: text to print in a frame + :param level: number of `#` (or `char`) to prepend the logged text + :param char: char to prepend the logged text with + :param verbosity: logging verbosity + """ + hdbg.dassert_isinstance(logger, logging.Logger) + hdbg.dassert_isinstance(fstring, str) + msg = fstring % args + msg = msg.rstrip().lstrip() + msg = frame(msg) + # Prepend a `# `, if needed. + if level > 0: + prefix = level * char + " " + msg = prepend(msg, prefix=prefix) + # Add an empty space. + msg = "\n" + msg + logger.log(verbosity, "%s", msg) + + +# ############################################################################# + + +def type_to_string(type_as_str: str) -> str: + """ + Return a short string representing the type of an object, e.g., + "dataflow.Node" (instead of "class <'dataflow.Node'>") + """ + if isinstance(type_as_str, type): + type_as_str = str(type_as_str) + hdbg.dassert_isinstance(type_as_str, str) + # Remove the extra string from: + # + prefix = " str: + ret = f"({type(obj)}) {obj}" + return ret + + +# ############################################################################# + + +def format_list( + list_: List[Any], + *, + sep: str = " ", + max_n: Optional[int] = None, + tag: Optional[str] = None, +) -> str: + # sep = ", " + if max_n is None: + max_n = 10 + hdbg.dassert_lte(1, max_n) + n = len(list_) + txt = "" + if tag is not None: + txt += f"{tag}: " + txt += f"({n}) " + if n < max_n: + txt += sep.join(map(str, list_)) + else: + num_elems = int(max_n / 2) + hdbg.dassert_lte(1, num_elems) + txt += sep.join(map(str, list_[:num_elems])) + txt += " ... " + # pylint: disable=invalid-unary-operand-type + txt += sep.join(map(str, list_[-num_elems:])) + return txt + + +# TODO(gp): Use format_list(). +def list_to_str( + list_: List, + *, + tag: str = "", + sort: bool = False, + axis: int = 0, + to_string: bool = False, +) -> str: + """ + Print list / index horizontally or vertically. + """ + # TODO(gp): Fix this. + _ = to_string + txt = "" + if axis == 0: + if list_ is None: + txt += f"{tag}: (0) None\n" + else: + # hdbg.dassert_in(type(l), (list, pd.Index, pd.Int64Index)) + vals = list(map(str, list_)) + if sort: + vals = sorted(vals) + txt += f"{tag}: ({len(list_)}) {' '.join(vals)}\n" + elif axis == 1: + txt += f"{tag} ({len(list_)}):\n" + vals = list(map(str, list_)) + if sort: + vals = sorted(vals) + txt += "\n".join(vals) + "\n" + else: + raise ValueError(f"Invalid axis='{axis}'") + return txt + + +def list_to_str2( + vals: List[Any], + *, + sep_char: str = ", ", + enclose_str_char: str = "'", + max_num: Optional[int] = 10, +) -> str: + """ + Convert a list of values into a formatted string representation. + + E.g., [1, "two", 3, 4, 5] -> "5 ['1', 'two', '3', '4', '5']" + + :param vals: values to be converted + :param sep_char: separator to use between elements + :param enclose_str_char: character to enclose each element's string + representation; if empty, elements are not enclosed + :param max_num: maximum number of elements to display in the output + :return: the formatted string representing the list + """ + vals_as_str = list(map(str, vals)) + # Add a str around. + if enclose_str_char: + vals_as_str = [ + enclose_str_char + v + enclose_str_char for v in vals_as_str + ] + # Build the output string with optional truncation. + ret = f"{len(vals)} [" + if max_num is not None and len(vals) > max_num: + hdbg.dassert_lt(1, max_num) + ret += sep_char.join(vals_as_str[: int(max_num / 2)]) + ret += sep_char + "..." + sep_char + ret += sep_char.join(vals_as_str[-int(max_num / 2) :]) + else: + ret += sep_char.join(vals_as_str) + ret += "]" + return ret + + +def set_diff_to_str( + obj1: Iterable, + obj2: Iterable, + *, + obj1_name: str = "obj1", + obj2_name: str = "obj2", + sep_char: str = " ", + add_space: bool = False, +) -> str: + """ + Compute the difference between two sequences of data and return a formatted + string. + + :param obj1: The first iterable object. + :param obj2: The second iterable object. + :param obj1_name: The name to use for the first object in the output string. + :param obj2_name: The name to use for the second object in the output string. + :param sep_char: The character to use for separating elements in the output + string. + :param add_space: Whether to add empty lines to make the output more readable. + :return: A formatted string showing the differences between the two objects. + + Example: + ``` + >>> obj1 = [1, 2, 3, 4] + >>> obj2 = [3, 4, 5, 6] + >>> set_diff_to_str(obj1, obj2, obj1_name="list1", obj2_name="list2") + * list1: (4) 1 2 3 4 + * list2: (4) 3 4 5 6 + * intersect=(2) 3 4 + * list1-list2=(2) 1 2 + * list2-list1=(2) 5 6 + ``` + """ + + def _to_string(obj: Iterable) -> str: + obj = sorted(list(obj)) + if sep_char == "\n": + txt = indent("\n" + sep_char.join(map(str, obj))) + else: + txt = sep_char.join(map(str, obj)) + return txt + + res: List[str] = [] + # obj1. + obj1 = set(obj1) + hdbg.dassert_lte(1, len(obj1)) + res.append(f"* {obj1_name}: ({len(obj1)}) {_to_string(obj1)}") + if add_space: + res.append("") + # obj2. + obj2 = set(obj2) + hdbg.dassert_lte(1, len(obj2)) + res.append(f"* {obj2_name}: ({len(obj2)}) {_to_string(obj2)}") + if add_space: + res.append("") + # obj1 intersect obj2. + intersection = obj1.intersection(obj2) + res.append(f"* intersect=({len(intersection)}) {_to_string(intersection)}") + if add_space: + res.append("") + # obj1 - obj2. + diff = obj1 - obj2 + res.append(f"* {obj1_name}-{obj2_name}=({len(diff)}) {_to_string(diff)}") + if add_space: + res.append("") + # obj2 - obj1. + diff = obj2 - obj1 + res.append(f"* {obj2_name}-{obj1_name}=({len(diff)}) {_to_string(diff)}") + if add_space: + res.append("") + # Join all result lines. + result = "\n".join(res) + return result + + +# ############################################################################# + + +def remove_non_printable_chars(txt: str) -> str: + # From https://stackoverflow.com/questions/14693701 + # 7-bit and 8-bit C1 ANSI sequences + ansi_escape = re.compile( + r""" + \x1B # ESC + (?: # 7-bit C1 Fe (except CSI) + [@-Z\\-_] + | # or [ for CSI, followed by a control sequence + \[ + [0-?]* # Parameter bytes + [ -/]* # Intermediate bytes + [@-~] # Final byte + ) + """, + re.VERBOSE, + ) + txt = ansi_escape.sub("", txt) + return txt + + +# TODO(gp): Maybe move to helpers/hpython.py since it's not about printing. +def sort_dictionary(dict_: Dict) -> Dict: + """ + Sort a dictionary recursively using nested OrderedDict. + """ + import collections + + res = collections.OrderedDict() + for k, v in sorted(dict_.items()): + if isinstance(v, dict): + res[k] = sort_dictionary(v) + else: + res[k] = v + return res + + +def to_pretty_str(obj: Any) -> str: + if isinstance(obj, dict): + res = pprint.pformat(obj) + # import json + # res = json.dumps(obj, indent=4, sort_keys=True) + else: + res = str(obj) + return res + + +# TODO(gp): GSI -> rename remove_lines()? +def filter_text(regex: str, txt: str) -> str: + """ + Remove lines in `txt` that match the regex `regex`. + """ + _LOG.debug("Filtering with '%s'", regex) + if regex is None: + return txt + txt_out = [] + txt_as_arr = txt.split("\n") + for line_ in txt_as_arr: + if re.search(regex, line_): + _LOG.debug("Skipping line='%s'", line_) + continue + txt_out.append(line_) + # We can only remove lines. + hdbg.dassert_lte( + len(txt_out), + len(txt_as_arr), + "txt_out=\n'''%s'''\ntxt=\n'''%s'''", + "\n".join(txt_out), + "\n".join(txt_as_arr), + ) + txt = "\n".join(txt_out) + return txt + + +def dassert_one_trailing_newline(txt: str) -> None: + match = re.search(r"\n*$", txt) + hdbg.dassert(match) + assert match is not None + num_newlines = len(match.group()) + hdbg.dassert_eq( + num_newlines, 0, "num_newlines='%s' txt='%s'", num_newlines, txt + ) + + +def to_info(tag: str, txt: Union[str, List[str]]) -> str: + """ + Return a string with a tag and the text indented. + + :param tag: the tag to add to the text + :param txt: the text to indent + :return: the string with the tag and the text indented + """ + hdbg.dassert_isinstance(tag, str) + hdbg.dassert_isinstance(txt, (str, list)) + txt_tmp = "" + txt_tmp += "# " + tag + "\n" + # Indent the text. + if not isinstance(txt, str): + for t in txt: + hdbg.dassert_isinstance(t, str) + txt = "\n".join(txt) + txt_tmp += indent(txt) + # Ensure that there is a single trailing newline. + txt_tmp = txt_tmp.rstrip("\n") + # txt_tmp += "\n" + # _dassert_one_trailing_newline(txt_tmp) + _LOG.debug("'%s'", txt_tmp) + return txt_tmp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py new file mode 100644 index 000000000..c9cdd7be4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py @@ -0,0 +1,266 @@ +""" +Import as: + +import helpers.hpytest as hpytest +""" + +import logging +import os +import shutil +import sys +from typing import List, Optional + +import junitparser + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def _pytest_show_artifacts( + dir_name: str, *, tag: Optional[str] = None +) -> List[str]: + hdbg.dassert_ne(dir_name, "") + hdbg.dassert_dir_exists(dir_name) + cd_cmd = f"cd {dir_name} && " + # There might be no pytest artifacts. + abort_on_error = False + file_names: List[str] = [] + # Find pytest artifacts. + cmd = 'find . -name ".pytest_cache" -type d' + _, output_tmp = hsystem.system_to_string( + cd_cmd + cmd, abort_on_error=abort_on_error + ) + file_names.extend(output_tmp.split()) + # + cmd = 'find . -name "__pycache__" -type d' + _, output_tmp = hsystem.system_to_string( + cd_cmd + cmd, abort_on_error=abort_on_error + ) + file_names.extend(output_tmp.split()) + # Find .pyc artifacts. + cmd = 'find . -name "*.pyc" -type f' + _, output_tmp = hsystem.system_to_string( + cd_cmd + cmd, abort_on_error=abort_on_error + ) + file_names.extend(output_tmp.split()) + # Remove empty lines. + file_names = hprint.remove_empty_lines(file_names) + # + if tag is not None: + num_files = len(file_names) + _LOG.info("%s: %d", tag, num_files) + _LOG.debug("\n%s", hprint.indent("\n".join(file_names))) + return file_names # type: ignore + + +def pytest_clean(dir_name: str, preview: bool = False) -> None: + """ + Clean pytest artifacts. + """ + _LOG.warning("Cleaning pytest artifacts") + hdbg.dassert_ne(dir_name, "") + hdbg.dassert_dir_exists(dir_name) + if preview: + _LOG.warning("Preview only: nothing will be deleted") + # Show before cleaning. + file_names = _pytest_show_artifacts(dir_name, tag="Before cleaning") + # Clean. + for f in file_names: + exists = os.path.exists(f) + _LOG.debug("%s -> exists=%s", f, exists) + if exists: + if not preview: + if os.path.isdir(f): + shutil.rmtree(f) + elif os.path.isfile(f): + os.remove(f) + else: + raise ValueError(f"Can't delete {f}") + else: + _LOG.debug("rm %s", f) + # Show after cleaning. + file_names = _pytest_show_artifacts(dir_name, tag="After cleaning") + hdbg.dassert_eq(len(file_names), 0) + + +# ############################################################################# +# JUnitReporter +# ############################################################################# + + +class JUnitReporter: + def __init__(self, xml_file: str): + self.xml_file = xml_file + self.xml_data = None + self.overall_stats = { + "passed": 0, + "failed": 0, + "error": 0, + "skipped": 0, + "total_time": 0.0, + "total_tests": 0, + } + + def _load(self) -> None: + """ + Load the JUnit XML file. + """ + self.xml_data = junitparser.JUnitXml.fromfile(self.xml_file) + + def parse(self): + """ + Parse the JUnit XML file. + """ + try: + self._load() + # Calculate overall statistics. + for suite in self.xml_data: + if isinstance(suite, junitparser.TestSuite): + self.overall_stats["total_time"] += suite.time or 0 + self.overall_stats["total_tests"] += suite.tests or 0 + self.overall_stats["passed"] += ( + (suite.tests or 0) + - (suite.failures or 0) + - (suite.errors or 0) + - (suite.skipped or 0) + ) + self.overall_stats["failed"] += suite.failures or 0 + self.overall_stats["error"] += suite.errors or 0 + self.overall_stats["skipped"] += suite.skipped or 0 + except Exception as e: + print(hprint.color_highlight(f"Error parsing XML file: {e}", "red")) + sys.exit(1) + + def _get_colored_status(self, case: junitparser.TestCase) -> str: + """ + Get the colored status representation of test case. + """ + if not case.result or len(case.result) == 0: + return hprint.color_highlight("PASSED", "green") + result_type = case.result[0].__class__.__name__ + if result_type == "Failure": + return hprint.color_highlight("FAILED", "red") + elif result_type == "Error": + return hprint.color_highlight("ERROR", "red") + elif result_type == "Skipped": + return hprint.color_highlight("SKIPPED", "yellow") + else: + return hprint.color_highlight("PASSED", "green") + + def _print_detailed_results(self): + print(hprint.color_highlight("=" * 70, "bold")) + print( + hprint.color_highlight( + f"collected {self.overall_stats['total_tests']} items", "bold" + ) + ) + for _, suite in enumerate(self.xml_data): + if not isinstance(suite, junitparser.TestSuite): + continue + # Print suite header. + print(f"\n{hprint.color_highlight('=' * 70, 'blue')}") + print(hprint.color_highlight(f"Test: {suite.name}", "bold")) + print( + hprint.color_highlight( + f"Timestamp: {getattr(suite, 'timestamp', 'Unknown')}", + "bold", + ) + ) + print(hprint.color_highlight("-" * 70, "blue")) + # Print each test case. + for case in suite: + if isinstance(case, junitparser.TestCase): + status_display = self._get_colored_status(case) + test_time = getattr(case, "time", 0) or 0 + print( + f" {case.classname}::{case.name} {status_display} ({test_time:.3f}s)" + ) + # Print suite summary. + suite_passed = ( + (suite.tests or 0) + - (suite.failures or 0) + - (suite.errors or 0) + - (suite.skipped or 0) + ) + summary_parts = [] + if suite_passed > 0: + summary_parts.append( + hprint.color_highlight(f"{suite_passed} passed", "green") + ) + if suite.failures and suite.failures > 0: + summary_parts.append( + hprint.color_highlight(f"{suite.failures} failed", "red") + ) + if suite.errors and suite.errors > 0: + summary_parts.append( + hprint.color_highlight(f"{suite.errors} error", "red") + ) + if suite.skipped and suite.skipped > 0: + summary_parts.append( + hprint.color_highlight(f"{suite.skipped} skipped", "WARNING") + ) + suite_summary = ( + ", ".join(summary_parts) if summary_parts else "no tests" + ) + suite_time = getattr(suite, "time", 0) or 0 + print( + hprint.color_highlight( + f"Summary: {suite_summary} in {suite_time:.3f}s", "INFO" + ) + ) + + def _print_final_summary(self): + summary_parts = [] + if self.overall_stats["passed"] > 0: + summary_parts.append( + hprint.color_highlight( + f"{self.overall_stats['passed']} passed", "green" + ) + ) + if self.overall_stats["failed"] > 0: + summary_parts.append( + hprint.color_highlight( + f"{self.overall_stats['failed']} failed", "red" + ) + ) + if self.overall_stats["error"] > 0: + summary_parts.append( + hprint.color_highlight( + f"{self.overall_stats['error']} error", "red" + ) + ) + if self.overall_stats["skipped"] > 0: + summary_parts.append( + hprint.color_highlight( + f"{self.overall_stats['skipped']} skipped", "yellow" + ) + ) + summary_text = ", ".join(summary_parts) if summary_parts else "no tests" + time_text = "in " + hprint.color_highlight( + f"{self.overall_stats['total_time']:.2f}s", "bold" + ) + # Determine overall status + if self.overall_stats["failed"] > 0 or self.overall_stats["error"] > 0: + status_indicator = hprint.color_highlight("FAILED", "red") + elif ( + self.overall_stats["skipped"] > 0 + and self.overall_stats["passed"] == 0 + ): + status_indicator = hprint.color_highlight("SKIPPED", "yellow") + else: + status_indicator = hprint.color_highlight("PASSED", "green") + # Print summary. + print(f"\n{hprint.color_highlight('=' * 70, 'bold')}") + print( + hprint.color_highlight( + f"Summary: {summary_text} {time_text}", "INFO" + ) + ) + print(hprint.color_highlight(f"Result: {status_indicator}", "INFO")) + + def print_summary(self): + self._print_detailed_results() + self._print_final_summary() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py new file mode 100644 index 000000000..2ee2166f9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py @@ -0,0 +1,94 @@ +""" +Import as: + +import helpers.hretry as hretry +""" + +import asyncio +import functools +import logging +import time +from typing import Any, Tuple + +_LOG = logging.getLogger(__name__) + + +def sync_retry( + num_attempts: int, exceptions: Tuple[Any], retry_delay_in_sec: int = 0 +) -> object: + """ + Decorator retrying the wrapped function/method num_attempts times if the + `exceptions` listed in exceptions are thrown. + + :param num_attempts: the number of times to repeat the wrapped function/method + - The function will be called `num_attempts` times. + :param exceptions: list of exceptions that trigger a retry attempt + :param retry_delay_in_sec: the number of seconds to wait between retry attempts + :return: the result of the wrapped function/method + """ + + def decorator(func) -> object: + @functools.wraps(func) + def retry_wrapper(*args, **kwargs): + attempts_count = 1 + last_exception = None + while attempts_count < num_attempts + 1: + try: + return func(*args, **kwargs) + except exceptions as e: + last_exception = e + _LOG.warning( + "Exception %s thrown when attempting to run %s, attempt " + "%d of %d", + e, + func, + attempts_count, + num_attempts, + ) + attempts_count += 1 + time.sleep(retry_delay_in_sec) + _LOG.error( + "Function %s failed after %d attempts", func, num_attempts + ) + raise last_exception + + return retry_wrapper + + return decorator + + +def async_retry( + num_attempts: int, exceptions: Tuple[Any], retry_delay_in_sec: int = 0 +) -> object: + """ + Same as `sync_retry` decorator but for `async` functions. + """ + + def decorator(func) -> object: + @functools.wraps(func) + async def retry_wrapper(*args, **kwargs): + attempts_count = 1 + last_exception = None + while attempts_count < num_attempts + 1: + try: + return await func(*args, **kwargs) + except exceptions as e: + last_exception = e + _LOG.warning( + "Exception %s thrown when attempting to run %s, attempt " + "%d of %d", + e, + func, + attempts_count, + num_attempts, + ) + attempts_count += 1 + await asyncio.sleep(retry_delay_in_sec) + _LOG.error( + "Function %s failed after %d attempts", func, num_attempts + ) + raise last_exception + + return retry_wrapper + + return decorator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py new file mode 100644 index 000000000..a28914cb7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py @@ -0,0 +1,1129 @@ +""" +Import as: + +import helpers.hs3 as hs3 +""" + +import argparse +import configparser +import copy +import functools +import gzip +import logging +import os +import pathlib +import re +from typing import Any, Dict, List, Optional, Tuple, Union + +_WARNING = "\033[33mWARNING\033[0m" + +try: + import s3fs + + # Handle different versions of s3fs where core module may be at different locations + if hasattr(s3fs, "core"): + from s3fs.core import S3File, S3FileSystem + else: + # In newer versions, classes might be directly in s3fs module + try: + from s3fs import S3File, S3FileSystem + except ImportError: + # Fallback to dynamic import + S3File = getattr(s3fs, "S3File", None) + S3FileSystem = getattr(s3fs, "S3FileSystem", None) +except ModuleNotFoundError: + _module = "s3fs" + print(_WARNING + f": Can't find {_module}: continuing") + # Define dummy classes for type hints when s3fs is not available + s3fs = None + + class S3File: + pass + + class S3FileSystem: + pass + + +# Avoid the following dependency from other `helpers` modules to prevent import cycles. +# import helpers.hpandas as hpandas +# import helpers.hsql as hsql +# import helpers.hunit_test as hunitest + +# To enforce this order of the imports we use the directive for the linter below. +import helpers.hdbg as hdbg # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hintrospection as hintros # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hio as hio # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hprint as hprint # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hserver as hserver # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.hsystem as hsystem # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position +import helpers.htimer as htimer # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position + +_LOG = logging.getLogger(__name__) + +# AWS Region global constants +# Moved to hs3.py from haws.py due to cyclic imports detected in +# build https://github.com/cryptokaizen/cmamp/actions/runs/10729983412/job/29757600889 +AWS_EUROPE_REGION_1 = "eu-north-1" +AWS_TOKYO_REGION_1 = "ap-northeast-1" +AWS_US_REGION_1 = "us-east-1" +AWS_REGIONS = [AWS_EUROPE_REGION_1, AWS_TOKYO_REGION_1, AWS_US_REGION_1] + +# TODO(gp): @all separate S3 code in `helpers/hs3.py` from authentication and +# AWS profile code in `helpers/aws_authentication.py`. + +# ############################################################################# +# Basic utils. +# ############################################################################# + +AwsProfile = Optional[Union[str, S3FileSystem]] + + +def is_s3_path(s3_path: str) -> bool: + """ + Return whether a path is on an S3 bucket, i.e., if it starts with `s3://`. + """ + hdbg.dassert_isinstance(s3_path, str) + valid = s3_path.startswith("s3://") + if s3_path.startswith("s3://s3://"): + valid = False + return valid + + +def dassert_is_s3_path(s3_path: str) -> None: + """ + Assert if a file is not a S3 path. + """ + hdbg.dassert( + is_s3_path(s3_path), + "Invalid S3 file='%s'", + s3_path, + ) + + +def dassert_is_not_s3_path(s3_path: str) -> None: + """ + Assert if a file is a S3 path. + """ + hdbg.dassert( + not is_s3_path(s3_path), + "Passed an S3 file='%s' when it was not expected", + s3_path, + ) + + +def dassert_is_valid_aws_profile(path: str, aws_profile: AwsProfile) -> None: + """ + Check that the value of `aws_profile` is compatible with the S3 or local + file `path`. + + :param path: S3 or local path + :param aws_profile: AWS profile to use if and only if using an S3 path, + otherwise `None` for local path + """ + if is_s3_path(path): + hdbg.dassert_is_not( + aws_profile, None, "path=%s aws_profile=%s", path, aws_profile + ) + else: + hdbg.dassert_is( + aws_profile, None, "path=%s aws_profile=%s", path, aws_profile + ) + + +# /////////////////////////////////////////////////////////////////////////////// + + +def get_s3fs(aws_profile: AwsProfile) -> S3FileSystem: + """ + Return a `s3fs` object from a given AWS profile. + + :param aws_profile: the name of an AWS profile or a s3fs filesystem + """ + if hserver.is_ig_prod(): + # On IG prod machines we let the Docker container infer the right AWS + # account. + _LOG.warning("Not using AWS profile='%s'", aws_profile) + s3fs_ = S3FileSystem() + else: + if isinstance(aws_profile, str): + # When deploying jobs via ECS the container obtains credentials + # based on passed task role specified in the ECS task-definition, + # refer to: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html + if ( + # TODO(heanh): Centralize the list of supported profiles. + aws_profile in ["ck", "csfy"] + and hserver.is_inside_ecs_container() + ): + _LOG.info("Fetching credentials from task IAM role") + s3fs_ = S3FileSystem() + else: + # TODO(heanh): Make this manual extraction of credentials + # code obsoleted. + # From https://stackoverflow.com/questions/62562945 + # aws_credentials = get_aws_credentials(aws_profile) + # _LOG.debug("%s", pprint.pformat(aws_credentials)) + # s3fs_ = S3FileSystem( + # anon=False, + # key=aws_credentials["aws_access_key_id"], + # secret=aws_credentials["aws_secret_access_key"], + # token=aws_credentials["aws_session_token"], + # client_kwargs={"region_name": aws_credentials["aws_region"]}, + # ) + # + # We do not need to extract the credential from the file because + # the config (`~/.aws/config`) and credential + # (`~/.aws/credentials`) are already set. + s3fs_ = S3FileSystem(anon=False, profile=aws_profile) + elif isinstance(aws_profile, S3FileSystem): + s3fs_ = aws_profile + else: + raise ValueError(f"Invalid aws_profile='{aws_profile}'") + return s3fs_ + + +def dassert_path_exists( + path: str, aws_profile: Optional[AwsProfile] = None +) -> None: + """ + Assert if S3 or local path doesn't exist. `aws_profile` is specified if and + only if path is an S3 path. + + :param path: S3 or local path + :param aws_profile: the name of an AWS profile or a s3fs filesystem + """ + dassert_is_valid_aws_profile(path, aws_profile) + if is_s3_path(path): + s3fs_ = get_s3fs(aws_profile) + hdbg.dassert(s3fs_.exists(path), f"S3 path '{path}' doesn't exist!") + else: + hdbg.dassert_path_exists(path) + + +def dassert_path_not_exists( + path: str, aws_profile: Optional[AwsProfile] = None +) -> None: + """ + Assert if S3 or local path exist. `aws_profile` is specified if and only if + path is an S3 path. + + :param path: S3 or local path + :param aws_profile: the name of an AWS profile or a s3fs filesystem + """ + dassert_is_valid_aws_profile(path, aws_profile) + if is_s3_path(path): + s3fs_ = get_s3fs(aws_profile) + hdbg.dassert(not s3fs_.exists(path), f"S3 path '{path}' already exist!") + else: + hdbg.dassert_path_not_exists(path) + + +# TODO(gp): Consider using `s3fs.split_path`. +def split_path(s3_path: str) -> Tuple[str, str]: + """ + Separate an S3 path in the bucket and the rest of the path as absolute from + the root. + + E.g., for `s3://alphamatic-data/tmp/hello` returns (`alphamatic- + data`, /tmp/hello`) + """ + dassert_is_s3_path(s3_path) + # Remove the s3 prefix. + prefix = "s3://" + hdbg.dassert(s3_path.startswith(prefix)) + s3_path = s3_path[len(prefix) :] + # Break the path into dirs. + dirs = s3_path.split("/") + bucket = dirs[0] + abs_path = os.path.join("/", *dirs[1:]) + hdbg.dassert( + abs_path.startswith("/"), + "The path should be absolute instead of %s", + abs_path, + ) + return bucket, abs_path + + +def _replace_star_with_double_star(pattern_to_modify: str) -> str: + """ + Replace a single star with a double star in a pattern. + + Originally we simply used to do `pattern.replace("*", "**")`. + but in the newer versions of `s3fs` this is not allowed: + `ValueError: Invalid pattern: '**' can + only be an entire path component` + + We also need to take care of special such as: + *.csv* -> **/*.csv* + + Examples: + s3://bucket/*/path/* -> s3://bucket/**/*/path/**/* + s3://bucket/*/path/csv* -> s3://bucket/**/*/path/csv* + + :param pattern_to_modify: pattern to replace wildcards in + :return: pattern with wildcards replaced + """ + append_wildcard = False + # Handle the special case of ending with wildcard + # (e.g.: *.csv*). + if re.match(r"(?=.*[a-zA-Z0-9]).*\*$", pattern_to_modify): + pattern_to_modify = pattern_to_modify[:-1] + append_wildcard = True + new_pattern = pattern_to_modify.replace("*", "**/*") + new_pattern = new_pattern + "*" if append_wildcard else new_pattern + return new_pattern + + +def listdir( + dir_name: str, + pattern: str, + only_files: bool, + use_relative_paths: bool, + *, + exclude_git_dirs: bool = True, + aws_profile: Optional[AwsProfile] = None, + maxdepth: Optional[int] = None, +) -> List[str]: + """ + Counterpart to `hio.listdir` with S3 support. + + :param dir_name: S3 or local path + :param aws_profile: AWS profile to use if and only if using an S3 path, + otherwise `None` for local path + :param maxdepth: limit the depth of directory traversal + """ + dassert_is_valid_aws_profile(dir_name, aws_profile) + _LOG.debug("pattern=%s", pattern) + if is_s3_path(dir_name): + s3fs_ = get_s3fs(aws_profile) + dassert_path_exists(dir_name, s3fs_) + # Ensure that there are no multiple stars in pattern. + hdbg.dassert_not_in("**", pattern) + # `hio.listdir` is using `find` which looks for files and directories + # descending recursively in the directory. + # One star in glob will use `maxdepth=1`. + pattern = _replace_star_with_double_star(pattern) + _LOG.debug("pattern=%s", pattern) + # Detailed S3 objects in dict form with metadata. + path_objects = s3fs_.glob( + f"{dir_name}/{pattern}", detail=True, maxdepth=maxdepth + ) + if only_files: + # Original `path_objects` must not be changed during loop. + temp_path_objects = copy.deepcopy(list(path_objects.values())) + # Use metadata to distinguish files from directories without + # calling `s3fs_.isdir/isfile`. + for path_object in temp_path_objects: + if path_object["type"] != "file": + path_objects.pop(path_object["Key"]) + paths = list(path_objects.keys()) + if exclude_git_dirs: + paths = [ + path for path in paths if ".git" not in pathlib.Path(path).parts + ] + bucket, absolute_path = split_path(dir_name) + # Basically the goal is to remove `s3://` from the full S3 path. + root_path = f"{bucket}{absolute_path}" + # Remove redundant separators. + paths = {os.path.normpath(path) for path in paths} + # Remove special entries such as `.` (`root_path` in this case) and + # bucket name to keep the same return format as in `hio.listdir()`. + paths_to_exclude = [bucket, root_path] + paths = [path for path in paths if path not in paths_to_exclude] + if use_relative_paths: + paths = [os.path.relpath(path, start=root_path) for path in paths] + else: + paths = hio.listdir( + dir_name, + pattern, + only_files, + use_relative_paths, + exclude_git_dirs=exclude_git_dirs, + maxdepth=maxdepth, + ) + return paths + + +def du( + path: str, + *, + human_format: bool = False, + aws_profile: Optional[AwsProfile] = None, +) -> Union[int, str]: + """ + Counterpart to `hsystem.du` with S3 support. + + If and only if `aws_profile` is specified, S3 is used instead of + local filesystem. + """ + dassert_is_valid_aws_profile(path, aws_profile) + if is_s3_path(path): + s3fs_ = get_s3fs(aws_profile) + dassert_path_exists(path, s3fs_) + size: Union[int, str] = s3fs_.du(path) + if human_format: + size = hintros.format_size(size) + else: + size = hsystem.du(path, human_format=human_format) + return size + + +def to_file( + lines: str, + file_name: str, + *, + mode: Optional[str] = None, + force_flush: bool = False, + aws_profile: Optional[AwsProfile] = None, +) -> None: + """ + Counterpart to `hio.to_file` with S3 support. + + If and only if `aws_profile` is specified, S3 is used instead of + local filesystem. + """ + dassert_is_valid_aws_profile(file_name, aws_profile) + if is_s3_path(file_name): + # Ensure that `bytes` is used. + if mode is not None and "b" not in mode: + raise ValueError("S3 only allows binary mode!") + hdbg.dassert_isinstance(lines, str) + # Convert lines to bytes, only supported mode for S3. + # Also create a list of new lines as raw bytes is not supported. + os_sep = os.linesep + lines_lst = [f"{line}{os_sep}".encode() for line in lines.split(os_sep)] + # Inspect file name and path. + hio.dassert_is_valid_file_name(file_name) + s3fs_ = get_s3fs(aws_profile) + mode = "wb" if mode is None else mode + # Open S3 file. `rb` is the default mode for S3. + with s3fs_.open(file_name, mode) as s3_file: + if file_name.endswith((".gz", ".gzip")): + # Open and decompress gzipped file. + with gzip.GzipFile(fileobj=s3_file) as gzip_file: + gzip_file.writelines(lines_lst) + else: + # Any other file. + s3_file.writelines(lines_lst) + if force_flush: + # TODO(Nikola): Investigate S3 alternative for `os.fsync(f.fileno())`. + s3_file.flush() + else: + use_gzip = file_name.endswith((".gz", ".gzip")) + hio.to_file( + file_name, + lines, + mode=mode, + use_gzip=use_gzip, + force_flush=force_flush, + ) + + +def from_file( + file_name: str, + encoding: Optional[Any] = None, + aws_profile: Optional[AwsProfile] = None, +) -> str: + """ + Counterpart to `hio.from_file` with S3 support. + + If and only if `aws_profile` is specified, S3 is used instead of + local filesystem. + """ + dassert_is_valid_aws_profile(file_name, aws_profile) + if is_s3_path(file_name): + if encoding: + raise ValueError("Encoding is not supported when reading from S3!") + # Inspect file name and path. + hio.dassert_is_valid_file_name(file_name) + s3fs_ = get_s3fs(aws_profile) + dassert_path_exists(file_name, s3fs_) + # Open s3 file. + with s3fs_.open(file_name) as s3_file: + if file_name.endswith((".gz", ".gzip")): + # Open and decompress gzipped file. + with gzip.GzipFile(fileobj=s3_file) as gzip_file: + data = gzip_file.read().decode() + else: + # Any other file. + data = s3_file.read().decode() + else: + data = hio.from_file(file_name, encoding=encoding) + return data + + +# TODO(Nina): consider adding support for handling dirs. +# TODO(Grisha): consider extending for the regular file system. +def copy_file_to_s3( + file_path: str, + s3_dst_file_path: str, + aws_profile: str, +) -> None: + """ + Copy a local file to S3. + + :param file_path: path to a file to copy + :param s3_dst_file_path: S3 path to copy to + :param aws_profile: aws profile + """ + hdbg.dassert_file_exists(file_path) + dassert_is_s3_path(s3_dst_file_path) + dassert_is_valid_aws_profile(s3_dst_file_path, aws_profile) + aws_s3_cp_cmd = f"aws s3 cp {file_path} {s3_dst_file_path}" + if not hserver.is_inside_ecs_container(): + # There is no `~/.aws/credentials` file inside an ECS container + # but the AWS credentials are received via a task role. So + # no need to pass the profile option. + aws_s3_cp_cmd += f" --profile {aws_profile}" + _LOG.info("Copying from %s to %s", file_path, s3_dst_file_path) + hsystem.system(aws_s3_cp_cmd, suppress_output=False) + + +def get_local_or_s3_stream( + file_name: str, **kwargs: Any +) -> Tuple[Union[S3FileSystem, str], Any]: + """ + Get S3 stream for desired file or simply returns file name. + + :param file_name: file name or full path to file + """ + _LOG.debug(hprint.to_str("file_name kwargs")) + # Handle the s3fs param, if needed. + if is_s3_path(file_name): + # For S3 files we need to have an `s3fs` parameter. + hdbg.dassert_in( + "s3fs", + kwargs, + "Credentials through s3fs are needed to access an S3 path", + ) + s3fs_ = kwargs.pop("s3fs") + hdbg.dassert_isinstance(s3fs_, S3FileSystem) + dassert_path_exists(file_name, s3fs_) + stream = s3fs_.open(file_name) + else: + if "s3fs" in kwargs: + _LOG.warning("Passed `s3fs` without an S3 file: ignoring it") + _ = kwargs.pop("s3fs") + hdbg.dassert_file_exists(file_name) + stream = file_name + return stream, kwargs + + +# ############################################################################# +# AWS. +# ############################################################################# + + +def _get_aws_config(file_name: str) -> configparser.RawConfigParser: + """ + Return a parser to the config in `~/.aws/{file_name}`. + """ + file_name = os.path.join(os.path.expanduser("~"), ".aws", file_name) + hdbg.dassert_file_exists(file_name) + # Read the config. + config = configparser.RawConfigParser() + config.read(file_name) + _LOG.debug("config.sections=%s", config.sections()) + return config + + +# ############################################################################# +# Authentication. +# ############################################################################# + +# Architecture of the AWS authentication +# +# - There can be two or more AWS S3 systems with different credentials, paths to +# bucket, and other properties +# - Some code needs to refer always and only to a specific S3 bucket +# - E.g., AM S3 bucket for Kibot data +# - Other code needs to work with different AWS S3 systems +# - E.g., `publish_notebooks`, saving / retrieving experiments, caching +# +# - The desired AWS S3 systems are selected through an `aws_profile` parameter +# (e.g., `ck`) +# - The value of AWS profile is obtained from +# - the `--aws_profile` command line option; or +# - a client specifying the needed `aws_profile` +# +# - The AWS profile is then used to access the `~/.aws` files and extract: +# - the credentials (e.g., `aws_access_key_id`, `aws_secret_access_key`, +# `aws_region`) +# - other variables (e.g., `aws_s3_bucket`) +# - The variables that are extracted from the files are passed through env vars +# directly for GitHub Actions CI +# - One can specify env vars conditioned to different profiles using the AWS +# profile +# - E.g., `ck` profile for `AWS_ACCESS_KEY_ID` corresponds to +# `CSFY_AWS_ACCESS_KEY_ID` + + +@functools.lru_cache() +def get_aws_credentials( + aws_profile: str, +) -> Dict[str, Optional[str]]: + """ + Read the AWS credentials for a given profile from `~/.aws` or from env + vars. + + :return: a dictionary with `access_key_id`, `aws_secret_access_key`, + `aws_region` and optionally `aws_session_token` + """ + _LOG.debug("Getting credentials for aws_profile='%s'", aws_profile) + if aws_profile == "__mock__": + # `mock` profile is artificial construct used only in tests. + aws_profile = aws_profile.strip("__") + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + # profile_prefix = aws_profile.upper() + profile_prefix = ( + "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() + ) + result: Dict[str, Optional[str]] = {} + if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: + key_to_env_var: Dict[str, str] = { + "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + "aws_session_token": f"{profile_prefix}_AWS_SESSION_TOKEN", + # TODO(gp): AWS_DEFAULT_REGION -> AWS_REGION so we can use the invariant + # that the var is simply the capitalized version of the key. + "aws_region": f"{profile_prefix}_AWS_DEFAULT_REGION", + } + else: + key_to_env_var: Dict[str, str] = { + "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + # TODO(gp): AWS_DEFAULT_REGION -> AWS_REGION so we can use the invariant + # that the var is simply the capitalized version of the key. + "aws_region": f"{profile_prefix}_AWS_DEFAULT_REGION", + } + # If all the AWS credentials are passed through env vars, they override the + # config file. + env_var_override = False + set_env_vars = [ + (env_var in os.environ and os.environ[env_var] != "") + for env_var in sorted(key_to_env_var.values()) + ] + if any(set_env_vars): + if not all(set_env_vars): + _LOG.warning( + "Some but not all AWS env vars are set (%s): ignoring", + str(set_env_vars), + ) + else: + env_var_override = True + if env_var_override: + _LOG.debug("Using AWS credentials from env vars") + # If one variable is defined all should be defined. + for key, env_var in key_to_env_var.items(): + _LOG.debug("'%s' in env vars=%s", env_var, env_var in os.environ) + _LOG.debug( + "'%s' != ''=%s", env_var, os.environ.get(env_var, None) != "" + ) + hdbg.dassert_in(env_var, os.environ) + result[key] = os.environ[env_var] + if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: + result["aws_session_token"] = os.environ[ + f"{profile_prefix}_AWS_SESSION_TOKEN" + ] + else: + result["aws_session_token"] = None + else: + _LOG.debug("Using AWS credentials from files") + # > more ~/.aws/credentials + # [am] + # aws_access_key_id=AKI... + # aws_secret_access_key=mhg.. + # aws_session_token = Fwo... + file_name = "credentials" + config = _get_aws_config(file_name) + # + key = "aws_access_key_id" + result[key] = config.get(aws_profile, key) + # + key = "aws_secret_access_key" + result[key] = config.get(aws_profile, key) + # + key = "aws_session_token" + if config.has_option(aws_profile, key): + result[key] = config.get(aws_profile, key) + else: + result[key] = None + # + key = "aws_s3_bucket" + if config.has_option(aws_profile, key): + result[key] = config.get(aws_profile, key) + else: + result[key] = None + # > more ~/.aws/config + # [am] + # region = us-east-1 + file_name = "config" + config = _get_aws_config(file_name) + key = "aws_region" + # For ~/.aws/config the tag is `profile am` instead of `am`. + result[key] = config.get(f"profile {aws_profile}", "region") + # + hdbg.dassert_is_subset(key_to_env_var.keys(), result.keys()) + return result + + +# ############################################################################# +# Bucket +# ############################################################################# + + +# TODO(Nikola): CmTask #1810 "Increase test coverage in helpers/hs3.py" +def get_s3_bucket_path(aws_profile: str, add_s3_prefix: bool = True) -> str: + """ + Return the S3 bucket from environment variable corresponding to a given + `aws_profile`. + + E.g., `aws_profile="am"` uses the value in `AM_AWS_S3_BUCKET` which + is usually set to `s3://alphamatic-data`. + """ + hdbg.dassert_type_is(aws_profile, str) + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + prefix = aws_profile.upper() + prefix = ( + "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() + ) + env_var = f"{prefix}_AWS_S3_BUCKET" + if env_var in os.environ: + _LOG.debug("No env var '%s'", env_var) + s3_bucket = os.environ[env_var] + else: + # Fall-back to local credentials. + _LOG.debug("Checking credentials") + aws_credentials = get_aws_credentials(aws_profile) + _LOG.debug("%s", aws_credentials) + s3_bucket = aws_credentials.get("aws_s3_bucket", "") + hdbg.dassert_ne(s3_bucket, "") + hdbg.dassert( + not s3_bucket.startswith("s3://"), + "Invalid %s value '%s'", + env_var, + s3_bucket, + ) + if add_s3_prefix: + s3_bucket = "s3://" + s3_bucket + return s3_bucket + + +# TODO(sonaal): Do we really need aws profile as argument or +# we can use default? Ref. https://github.com/cryptokaizen/cmamp/pull/6045#discussion_r1380392748 +def get_s3_bucket_path_unit_test( + aws_profile: str, *, add_s3_prefix: bool = True +) -> str: + if aws_profile == "ck": + s3_bucket = "cryptokaizen-unit-test" + else: + hdbg.dfatal(f"Invalid aws_profile={aws_profile}") + if add_s3_prefix: + s3_bucket = "s3://" + s3_bucket + return s3_bucket + + +def get_latest_pq_in_s3_dir(s3_path: str, aws_profile: str) -> str: + """ + Get the latest Parquet file in the specified directory. + + :param s3_path: the path to s3 directory, e.g. + `cryptokaizen-data/reorg/daily_staged.airflow.pq/bid_ask/crypto_chassis.downloaded_1sec/binance` + :param aws_profile: AWS profile to use + :return: the path to the latest Parquet file in the directory, + E.g. `cryptokaizen-data/reorg/daily_staged.airflow.pq/bid_ask/crypto_chassis.downloaded_1sec/binance/ + currency_pair=ETH_USDT/year=2022/month=12/data.parquet` + """ + hdbg.dassert_type_is(aws_profile, str) + s3fs_ = get_s3fs(aws_profile) + dir_name = f"{s3_path}/**/*.parquet" + pq_files = s3fs_.glob(dir_name, detail=True) + hdbg.dassert_lte(1, len(pq_files), "dir_name=%s", dir_name) + _LOG.debug("pq_files=%s", pq_files) + # Sort the files by the date they were modified for the last time. + sorted_files = sorted( + pq_files.items(), key=lambda t: t[1]["LastModified"], reverse=True + ) + # Get the path to the latest file. + latest_file_path = sorted_files[0][0] + return latest_file_path + + +# ############################################################################# +# Parser. +# ############################################################################# + + +def add_s3_args(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: + """ + Add the command line options for the AWS credentials. + """ + parser.add_argument( + "--aws_profile", + action="store", + type=str, + help="The AWS profile to use for `.aws/credentials` or for env vars", + ) + parser.add_argument( + "--s3_path", + action="store", + type=str, + default=None, + help="Full S3 dir path to use (e.g., `s3://alphamatic-data/foobar/`), " + "overriding any other setting", + ) + return parser + + +def _dassert_all_env_vars_set(key_to_env_var: Dict[str, str]) -> None: + """ + Check that the required AWS env vars are set and are not empty strings. + """ + for v in key_to_env_var.values(): + hdbg.dassert_in(v, os.environ) + hdbg.dassert_ne(v, "") + + +def _get_aws_file_text(key_to_env_var: Dict[str, str]) -> List[str]: + """ + Generate text from env vars for AWS files. + + E.g.: + ``` + aws_access_key_id=*** # gitleaks:allow + aws_secret_access_key=*** # gitleaks:allow + aws_s3_bucket=*** + ``` + :param key_to_env_var: aws settings names to the corresponding env + var names mapping + :return: AWS file text + """ + txt = [] + for k, v in key_to_env_var.items(): + line = f"{k}={os.environ[v]}" + txt.append(line) + return txt + + +def _get_aws_config_text(aws_profile: str) -> str: + """ + Generate text for the AWS config file, i.e. ".aws/config". + """ + # Set which env vars we need to get. + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + # profile_prefix = aws_profile.upper() + profile_prefix = ( + "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() + ) + region_env_var = f"{profile_prefix}_AWS_DEFAULT_REGION" + key_to_env_var = {"region": region_env_var} + # Check that env vars are set. + _dassert_all_env_vars_set(key_to_env_var) + text = _get_aws_file_text(key_to_env_var) + text.insert(0, f"[profile {aws_profile}]") + text = "\n".join(text) + return text + + +def _get_aws_credentials_text(aws_profile: str) -> str: + """ + Generate text for the AWS credentials file, i.e. ".aws/credentials". + """ + # Set which env vars we need to get. + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + # profile_prefix = aws_profile.upper() + profile_prefix = ( + "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() + ) + # Check if AWS session token is set in environment variable. + if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: + key_to_env_var = { + "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + "aws_session_token": f"{profile_prefix}_AWS_SESSION_TOKEN", + # TODO(heanh): Is this needed? + "aws_s3_bucket": f"{profile_prefix}_AWS_S3_BUCKET", + } + else: + key_to_env_var = { + "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + # TODO(heanh): Is this needed? + "aws_s3_bucket": f"{profile_prefix}_AWS_S3_BUCKET", + } + # Check that env vars are set. + _dassert_all_env_vars_set(key_to_env_var) + text = _get_aws_file_text(key_to_env_var) + text.insert(0, f"[{aws_profile}]") + text = "\n".join(text) + return text + + +def generate_aws_files( + home_dir: str = "~", + aws_profiles: Optional[List[str]] = None, +) -> None: + """ + Generate AWS configuration files. + + This is needed to use the AWS CLI and the `boto3` library when we are in CI. + """ + if home_dir == "~": + home_dir = os.path.expanduser(home_dir) + config_file_name = os.path.join(home_dir, ".aws", "config") + credentials_file_name = os.path.join(home_dir, ".aws", "credentials") + # Check if the files already exist. + if os.path.exists(credentials_file_name) and os.path.exists( + config_file_name + ): + _LOG.info( + "Both files exist: %s and %s; exiting", + credentials_file_name, + config_file_name, + ) + return + if aws_profiles is None: + aws_profiles = ["ck"] + config_file_text = [] + credentials_file_text = [] + # Get text with settings for both files. + for profile in aws_profiles: + current_config_text = _get_aws_config_text(profile) + config_file_text.append(current_config_text) + current_credentials_text = _get_aws_credentials_text(profile) + credentials_file_text.append(current_credentials_text) + # Create both files. + config_file_text = "\n\n".join(config_file_text) + hio.to_file(config_file_name, config_file_text) + _LOG.debug("Saved AWS config to %s", config_file_name) + + # + credentials_file_text = "\n\n".join(credentials_file_text) + hio.to_file(credentials_file_name, credentials_file_text) + _LOG.debug("Saved AWS credentials to %s", credentials_file_name) + + +# ############################################################################# +# Archive and retrieve data from S3. +# ############################################################################# + + +# TODO(gp): -> helpers/aws_utils.py + + +def archive_data_on_s3( + src_dir: str, s3_path: str, aws_profile: Optional[str], tag: str = "" +) -> str: + """ + Compress dir `src_dir` and save it on AWS S3 under `s3_path`. + + A timestamp and a tag is added to make the name more informative. + The tgz is created so that when expanded a dir with the name `src_dir` is + created. + + :param src_dir: directory that will be compressed + :param s3_path: full S3 path starting with `s3://` + :param aws_profile: the profile to use. We use a string and not an + `AwsProfile` since this is typically the outermost caller in the stack, + and it doesn't reuse an S3 fs object + :param tag: a tag to add to the name of the file + """ + _LOG.info( + "# Archiving '%s' to '%s' with aws_profile='%s'", + src_dir, + s3_path, + aws_profile, + ) + hdbg.dassert_dir_exists(src_dir) + dassert_is_s3_path(s3_path) + _LOG.info( + "The size of '%s' is %s", + src_dir, + hsystem.du(src_dir, human_format=True), + ) + # Add a timestamp if needed. + dst_path = hsystem.append_timestamp_tag(src_dir, tag) + ".tgz" + # Compress the dir. + # > (cd .../TestRunExperimentArchiveOnS3.test_serial1; \ + # tar cvzf /app/.../TestRunExperimentArchiveOnS3.test_serial1.tgz experiment.RH1E) + # experiment.RH1E/ + # experiment.RH1E/log.20210802-123758.txt + # experiment.RH1E/output_metadata.json + # ... + _LOG.debug("Destination path is '%s'", dst_path) + with htimer.TimedScope(logging.INFO, "Compressing"): + dir_name = os.path.dirname(src_dir) + base_name = os.path.basename(src_dir) + hdbg.dassert_ne(base_name, "", "src_dir=%s", src_dir) + cmd = "" + if dir_name != "": + cmd += f"cd {dir_name} && " + cmd += f"tar czf {dst_path} {base_name}" + hsystem.system(cmd) + _LOG.info( + "The size of '%s' is %s", + dst_path, + hsystem.du(dst_path, human_format=True), + ) + # Test expanding the tgz. The package should expand to the original dir. + # > tar tf /app/.../TestRunExperimentArchiveOnS3.test_serial1.tgz + # experiment.RH1E/ + # experiment.RH1E/log.20210802-123758.txt + # experiment.RH1E/output_metadata.json + _LOG.info("Testing archive") + cmd = f"tar tvf {dst_path}" + hsystem.system(cmd, log_level=logging.INFO, suppress_output=False) + # Copy to S3. + s3_file_path = os.path.join(s3_path, os.path.basename(dst_path)) + _LOG.info("Copying '%s' to '%s'", dst_path, s3_file_path) + hdbg.dassert_file_exists(dst_path) + s3fs_ = get_s3fs(aws_profile) + # TODO(gp): Make sure the S3 dir exists. + s3fs_.put(dst_path, s3_file_path) + _LOG.info("Data archived on S3 to '%s'", s3_file_path) + return s3_file_path + + +def copy_data_from_s3_to_local_dir( + src_s3_dir: str, dst_local_dir: str, aws_profile: str +) -> None: + """ + Copy data from S3 to a local dir. + + :param src_s3_dir: path on S3 storing the data to copy + :param scratch_space_path: local path on scratch space + :param aws_profile: AWS profile to use + """ + _LOG.debug( + "Copying input data from %s to %s", + src_s3_dir, + dst_local_dir, + ) + cmd = f"aws s3 sync {src_s3_dir} {dst_local_dir} --profile {aws_profile}" + hsystem.system(cmd, suppress_output=False, log_level="echo") + + +def retrieve_archived_data_from_s3( + s3_file_path: str, + dst_dir: str, + aws_profile: Optional[str] = None, + incremental: bool = True, +) -> str: + """ + Retrieve tgz file from S3, unless it's already present (incremental mode). + + :param s3_file_path: path to the S3 file with the archived data. E.g., + `s3://.../experiment.20210802-121908.tgz` + :param dst_dir: destination directory where to save the data + :param aws_profile: the profile to use. We use a string and not an + `AwsProfile` since this is typically the outermost caller in the stack, + and it doesn't reuse an S3 fs object + :param incremental: skip if the tgz file is already present locally + :return: path with the local tgz file + """ + _LOG.info( + "# Retrieving archive from '%s' to '%s' with aws_profile='%s'", + s3_file_path, + dst_dir, + aws_profile, + ) + dassert_is_s3_path(s3_file_path) + # Download the tgz file. + hio.create_dir(dst_dir, incremental=True) + dst_file = os.path.join(dst_dir, os.path.basename(s3_file_path)) + _LOG.debug(hprint.to_str("s3_file_path dst_dir dst_file")) + if incremental and os.path.exists(dst_file): + _LOG.warning("Found '%s': skipping downloading", dst_file) + else: + # Download. + s3fs_ = get_s3fs(aws_profile) + dassert_path_exists(s3_file_path, s3fs_) + _LOG.debug("Getting from s3: '%s' -> '%s", s3_file_path, dst_file) + s3fs_.get(s3_file_path, dst_file) + _LOG.info("Saved to '%s'", dst_file) + return dst_file + + +def expand_archived_data(src_tgz_file: str, dst_dir: str) -> str: + """ + Expand an S3 tarball storing results of an experiment. + + E.g., + - given a tgz file like `s3://.../experiment.20210802-121908.tgz` (which is the + result of compressing a dir like `/app/.../experiment.RH1E`) + - expand it into a dir `{dst_dir}/experiment.RH1E` + + :param src_tgz_file: path to the local file with the archived data. E.g., + `/.../experiment.20210802-121908.tgz` + :param dst_dir: directory where expand the archive tarball + :return: dir with the expanded data (e.g., `{dst_dir/experiment.RH1E`) + """ + _LOG.debug("Expanding '%s'", src_tgz_file) + # Get the name of the including dir, e.g., `experiment.RH1E`. + cmd = f"cd {dst_dir} && tar tzf {src_tgz_file} | head -1" + rc, enclosing_tgz_dir_name = hsystem.system_to_one_line(cmd) + _ = rc + _LOG.debug(hprint.to_str("enclosing_tgz_dir_name")) + tgz_dst_dir = os.path.join(dst_dir, enclosing_tgz_dir_name) + if os.path.exists(tgz_dst_dir): + hdbg.dassert_dir_exists(dst_dir) + _LOG.info( + "While expanding '%s' dst dir '%s' already exists: skipping", + src_tgz_file, + tgz_dst_dir, + ) + else: + # Expand the tgz file. + # The output should be the original compressed dir under `{dst_dir}`. + # E.g., + # > tar tzf /app/.../experiment.20210802-133901.tgz + # experiment.RH1E/ + # experiment.RH1E/log.20210802-133859.txt + # experiment.RH1E/result_0/ + with htimer.TimedScope(logging.INFO, "Decompressing"): + hdbg.dassert_file_exists(src_tgz_file) + cmd = f"cd {dst_dir} && tar xzf {src_tgz_file}" + hsystem.system(cmd) + hdbg.dassert_dir_exists(tgz_dst_dir) + # Return `{dst_dir}/experiment.RH1E`. + return tgz_dst_dir + + +def get_s3_bucket_from_stage( + stage: str, *, add_suffix: Optional[str] = None +) -> str: + """ + Retrieve the S3 bucket name based on the provided deployment stage. + + :param stage: the deployment stage, which can be 'test', 'preprod', + or 'prod'. + :param add_suffix: optional suffix to append to the bucket name. + :return: return corresponding S3 bucket name. + """ + # Mapping of stages to their respective S3 bucket names. + _S3_BUCKET_BY_STAGE = { + "test": "cryptokaizen-data-test", + "preprod": "cryptokaizen-data.preprod", + "prod": "cryptokaizen-data", + } + # TODO(Juraj): hack applied until a solution for #CmTask6620 is found. + # Retrieve the region from the environment variable or use the default region 'eu-north-1'. + region = os.environ.get("CSFY_AWS_DEFAULT_REGION", "eu-north-1") + # TODO(Juraj): hack applied until a solution for #CmTask6620 is found. + if region == "ap-northeast-1": + _S3_BUCKET_BY_STAGE["preprod"] = "cryptokaizen-data-tokyo.preprod" + # Ensure the provided stage is valid. + hdbg.dassert_in(stage, _S3_BUCKET_BY_STAGE) + s3_bucket = _S3_BUCKET_BY_STAGE[stage] + # Append the suffix to the bucket name if provided. + if add_suffix: + s3_bucket = os.path.join(s3_bucket, add_suffix) + return s3_bucket diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py new file mode 100644 index 000000000..f86f50342 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py @@ -0,0 +1,233 @@ +""" +Import as: + +import helpers.hsecrets as hsecret +""" + +import atexit +import json +import sys +import warnings +from typing import Any, Dict, Optional + +from botocore.client import BaseClient +from botocore.exceptions import ClientError + +import helpers.hdbg as hdbg + + +def get_secrets_client(aws_profile: str) -> BaseClient: + """ + Return client to work with AWS Secrets Manager in the specified region. + """ + import helpers.haws as haws + + session = haws.get_session(aws_profile) + client = session.client(service_name="secretsmanager") + return client + + +def _get_flag_value(flag: str) -> str: + """ + Return flag value with concatenated date string. + + E.g., for flag = 'pytest' return 'pytest_20240619'. + """ + # Import here to avoid import extra dependencies in the thin environment. + import helpers.hdatetime as hdateti + + timestamp = hdateti.get_current_date_as_string("naive_ET") + updated_flag = "_".join([flag, timestamp]) + return updated_flag + + +def update_usedby( + secret_name: str, + secret_value: Dict[str, Any], + usedBy: str, + *, + remove: bool = False, +) -> Dict[str, Any]: + """ + Update the value of `usedBy` attribute from `secret_value` in AWS secrets + manager to lock the key. Unlock the key at the end of process using default + value of `usedBy`. + + :param secret_name: SecretId of record to be updated. + :param secret_value: Current value of SecretString. + :param usedBy: value of `usedBy` to be updated. Used to remove from + list on deallocation of resource, i.e., when remove is True. + :param remove: Boolean to decide addition or removal of `usedBy` value + in the secret value list of scripts. Default is False. + :return secret_value: SecretString with updated `usedBy` script. + """ + hdbg.dassert_isinstance(secret_name, str) + aws_profile = "ck" + client = get_secrets_client(aws_profile) + # Modify value of used by in secret value. + if not remove: + try: + secret_value["usedBy"].append(usedBy) + except KeyError: + secret_value["usedBy"] = [usedBy] + else: + secret_value["usedBy"].remove(usedBy) + # Update the modified secret value in AWS secret manager. + client.update_secret( + SecretId=secret_name, SecretString=json.dumps(secret_value) + ) + return secret_value + + +def lock_secret( + secret_name: str, secret_value: Dict[str, Any] +) -> Optional[Dict[str, Any]]: + """ + Lock access to a secret to the current script. + + Lock access to secret key with trading keyword in `secret_name`, for a + runtime instance of a script, to avoid parallel run. + Add the script name to `usedBy` list in the AWS secret manager. + Raise error if the same script tries to access a locked key. + + :param secret_name: SecretId of record to be updated. + :param secret_value: Current value of SecretString. + :return secret_value: SecretString with updated `usedBy` script if not + already locked. + """ + current_script = sys.argv[0].split("/")[-1] + # Check if the current script is already using this secret. + current_usedBy = list( + filter(lambda x: current_script in x, secret_value.get("usedBy", [])) + ) + # Check current value of usedBy to determine further action. + if not current_usedBy: + # Fetch and update value of usedBy if not locked. + usedBy = _get_flag_value(current_script) + secret_value = update_usedby(secret_name, secret_value, usedBy) + # Release secret key lock on termination. + atexit.register( + update_usedby, secret_name, secret_value, usedBy, remove=True + ) + else: + # Raise warning of locked resource with current use info. + # raise RuntimeError() + warnings.warn( + f"Secret key is already in use by {current_usedBy[0]}", + RuntimeWarning, + ) + return secret_value + + +# TODO(Juraj): add support to access secrets for different profiles, not important rn +def get_secret(secret_name: str) -> Optional[Dict[str, Any]]: + """ + Fetch secret values(s) from AWS secrets manager. + + :return a dictionary of key-value pairs. E.g., `get_secret('binance')` returns + ``` + { + 'apiKey': '', + 'secret': '' + } + ``` + """ + # TODO(Juraj): This assertion can't be applied universally. + # Check if the secret name format is valid. + # dassert_valid_secret(secret_name) + hdbg.dassert_isinstance(secret_name, str) + # Create a AWS Secrets Manager client. + aws_profile = "ck" + client = get_secrets_client(aws_profile) + # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html + # for the full list of exceptions. + # Define access key to check the entity requesting for secret key. + access_key = "trading" + try: + get_secret_value_response = client.get_secret_value(SecretId=secret_name) + secret_string = get_secret_value_response["SecretString"] + hdbg.dassert_isinstance(secret_string, str) + secret_val = json.loads(secret_string) + # Check access entity value to lock secret key to avoid parallel run. + if access_key in secret_name: + # TODO(Juraj): Temporarily disabled in #Cmtask10068. + # secret_val = lock_secret(secret_name, secret_val) + pass + except ClientError as e: + if e.response["Error"]["Code"] == "ResourceNotFoundException": + # Let user know the secret does not exist. + raise ValueError(f"No such secret: {secret_name}") from e + # If not yet implemented handler then just re-raise. + raise e + return secret_val + + +# TODO(Juraj): add support to store secrets in different regions, not important rn. +def store_secret( + secret_name: str, secret_value: Dict[str, str], *, description: str = "" +) -> Optional[bool]: + """ + Store secret values(s) into AWS secrets manager, specify secret as a dict + of key-value pairs. + + :return: bool representing whether writing was successful or not + """ + hdbg.dassert_isinstance(secret_name, str) + # Create a AWS Secrets Manager client. + aws_profile = "ck" + client = get_secrets_client(aws_profile) + # See + # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_CreateSecret.html + # for the full list of exceptions. + try: + create_secret_value_response = client.create_secret( + Name=secret_name, + Description=description, + SecretString=json.dumps(secret_value), + ) + # If no exception was thrown and we get back the name we passed in the + # response then the secret was stored successfully. + return_name = create_secret_value_response["Name"] + hdbg.dassert_isinstance(return_name, str) + res: bool = create_secret_value_response["Name"] == secret_name + return res + except ClientError as e: + if e.response["Error"]["Code"] == "ResourceExistsException": + # Let user know the secret with this name already exists. + raise ValueError( + "Secret with this name already exists:", secret_name + ) from e + # If not yet implemented handler then just re-raise. + raise e + # If we did not return inside try block then something went wrong. + return False + + +# TODO(Juraj): this might be deprecated since this is only fit for exchange API keys +def dassert_valid_secret(secret_id: str) -> None: + """ + Enforce that the valid format is `exchange_id.stage.account_type.num`. + """ + values = secret_id.split(".") + hdbg.dassert_eq(len(values), 4) + hdbg.dassert_in( + values[0], + [ + "binance", + "bitfinex", + "coinbase", + "coinbaseprime", + "coinbasepro", + "ftx", + "gateio", + "huobi", + "kraken", + "kucoin", + "test", + ], + ) + hdbg.dassert_in(values[1], ["local", "preprod"]) + hdbg.dassert_in(values[2], ["trading", "sandbox"]) + hdbg.dassert( + values[3].isnumeric(), "values[3] should be numeric, got: %s", values[3] + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py new file mode 100644 index 000000000..8e3a0cffb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py @@ -0,0 +1,1167 @@ +""" +Identify on which server we are running. + +Import as: + +import helpers.hserver as hserver +""" + +import functools +import logging +import os +import shutil +import subprocess +from typing import Dict, List, Optional, Tuple + +# This module should depend only on: +# - Python standard modules +# See `helpers/dependencies.txt` for more details + +_LOG = logging.getLogger(__name__) + +_WARNING = "\033[33mWARNING\033[0m" + + +def _print(msg: str) -> None: + _ = msg + # _LOG.info(msg) + if False: + print(msg) + + +# Copied from hprint to avoid import cycles. +def _indent(txt: str, *, num_spaces: int = 2) -> str: + """ + Add `num_spaces` spaces before each line of the passed string. + """ + spaces = " " * num_spaces + txt_out = [] + for curr_line in txt.split("\n"): + if curr_line.lstrip().rstrip() == "": + # Do not prepend any space to a line with only white characters. + txt_out.append("") + continue + txt_out.append(spaces + curr_line) + res = "\n".join(txt_out) + return res + + +# We can't use `hsystem` to avoid import cycles. +def _system_to_string(cmd: str) -> Tuple[int, str]: + """ + Run a command and return the output and the return code. + + :param cmd: command to run + :return: tuple of (return code, output) + """ + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + # Redirect stderr to stdout. + stderr=subprocess.STDOUT, + shell=True, + text=True, + ) + rc = result.returncode + output = result.stdout + output = output.strip() + return rc, output + + +# ############################################################################# +# Host +# ############################################################################# + + +# We can't rely only on the name / version of the host to infer where we are +# running, since inside Docker the name of the host is like `01a7e34a82a5`. Of +# course, there is no way to know anything about the host for security reason, +# so we pass this value from the external environment to the container, through +# env vars (e.g., `CSFY_HOST_NAME`, `CSFY_HOST_OS_NAME`, `CSFY_HOST_OS_VERSION`). + + +# Sometimes we want to know if: +# - The processor is x86_64 or arm64 +# - The host is Mac or Linux +# - We are running on a Causify machine or on an external machine +# - We are inside CI or not +# TODO(gp): Grep all the use cases in the codebase and use the right function. + + +def get_host_user_name() -> Optional[str]: + """ + Return the name of the user running the host. + """ + return os.environ.get("CSFY_HOST_USER_NAME", None) + + +def get_dev_csfy_host_names() -> Tuple[str]: + """ + Return the names of the Causify dev servers. + """ + host_names = ("dev1", "dev2", "dev3") + return list(host_names) + + +# TODO(gp): -> is_inside_docker_container() +def is_inside_docker() -> bool: + """ + Return whether we are inside a container or not. + """ + # From https://stackoverflow.com/questions/23513045 + ret = os.path.exists("/.dockerenv") + return ret + + +def _get_host_name() -> str: + """ + Return the name of the host (not the machine) on which we are running. + + If we are inside a Docker container, we use the name of the host passed + through the `CSFY_HOST_NAME` env var. + """ + if is_inside_docker(): + host_name = os.environ["CSFY_HOST_NAME"] + else: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws' + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' + # machine='x86_64' + host_name = os.uname()[1] + _LOG.debug("host_name=%s", host_name) + return host_name + + +def _get_host_os_name() -> str: + """ + Return the name of the OS on which we are running (e.g., "Linux", + "Darwin"). + + If we are inside a Docker container, we use the name of the OS passed + through the `CSFY_HOST_OS_NAME` env var. + """ + if is_inside_docker(): + host_os_name = os.environ["CSFY_HOST_OS_NAME"] + else: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws' + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' + # machine='x86_64' + host_os_name = os.uname()[0] + _LOG.debug("host_os_name=%s", host_os_name) + return host_os_name + + +def _get_host_os_version() -> str: + """ + Return the version of the OS on which we are running. + + If we are inside a Docker container, we use the version of the OS passed + through the `CSFY_HOST_OS_VERSION` env var. + """ + if is_inside_docker(): + host_os_version = os.environ["CSFY_HOST_OS_VERSION"] + else: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws' + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' + # machine='x86_64' + host_os_version = os.uname()[2] + _LOG.debug("host_os_version=%s", host_os_version) + return host_os_version + + +def is_host_csfy_server() -> bool: + """ + Return whether we are running on a Causify dev server. + """ + host_name = _get_host_name() + ret = host_name in get_dev_csfy_host_names() + return ret + + +_MAC_OS_VERSION_MAPPING = { + "Catalina": "19.", + "Monterey": "21.", + "Ventura": "22.", + "Sequoia": "24.", + # macOS 26 Tahoe uses Darwin 25.x (see `uname -r`). + "Tahoe": "25.", +} + + +def get_host_mac_version() -> str: + """ + Get the macOS version (e.g., "Catalina", "Monterey", "Ventura", "Tahoe"). + """ + host_os_version = _get_host_os_version() + for version, tag in _MAC_OS_VERSION_MAPPING.items(): + if tag in host_os_version: + return version + raise ValueError(f"Invalid host_os_version='{host_os_version}'") + + +def is_host_mac_version(version: str) -> bool: + """ + Return whether we are running on a Mac with a specific version (e.g., + "Catalina", "Monterey", "Ventura"). + """ + assert version in _MAC_OS_VERSION_MAPPING, f"Invalid version='{version}'" + host_mac_version = get_host_mac_version() + ret = version.lower() == host_mac_version.lower() + return ret + + +def is_host_gp_mac() -> bool: + """ + Return whether we are running on a Mac owned by GP. + + This is used to check if we can use a specific feature before + releasing it to all the users. + """ + host_name = _get_host_name() + ret = host_name.startswith("gpmac.") + return ret + + +# ############################################################################# +# Detect server. +# ############################################################################# + + +def is_inside_ci() -> bool: + """ + Return whether we are running inside the Continuous Integration flow. + """ + if "CSFY_CI" not in os.environ: + ret = False + else: + ret = os.environ["CSFY_CI"] != "" + return ret + + +def is_inside_unit_test() -> bool: + """ + Return whether we are running code insider the regressions. + """ + ret = "PYTEST_CURRENT_TEST" in os.environ + return ret + + +# TODO(gp): Remove! +def is_dev_csfy() -> bool: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws', + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025', + # machine='x86_64' + host_name = os.uname()[1] + host_names = ("dev1", "dev2", "dev3") + csfy_host_name = os.environ.get("CSFY_HOST_NAME", "") + _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) + is_dev_csfy_ = host_name in host_names or csfy_host_name in host_names + return is_dev_csfy_ + + +# TODO(gp): This is obsolete and should be removed. +def is_dev4() -> bool: + """ + Return whether it's running on dev4. + """ + host_name = os.uname()[1] + csfy_host_name = os.environ.get("CSFY_HOST_NAME", None) + dev4 = "cf-spm-dev4" + _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) + is_dev4_ = dev4 in (host_name, csfy_host_name) + # + if not is_dev4_: + dev4 = "cf-spm-dev8" + _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) + is_dev4_ = dev4 in (host_name, csfy_host_name) + return is_dev4_ + + +def is_host_mac(*, version: Optional[str] = None) -> bool: + """ + Return whether we are running on macOS and, optionally, on a specific + version. + + :param version: check whether we are running on a certain macOS version (e.g., + `Catalina`, `Monterey`) + """ + _LOG.debug("version=%s", version) + host_os_name = os.uname()[0] + _LOG.debug("os.uname()=%s", str(os.uname())) + csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) + _LOG.debug( + "host_os_name=%s csfy_host_os_name=%s", host_os_name, csfy_host_os_name + ) + is_mac_ = host_os_name == "Darwin" or csfy_host_os_name == "Darwin" + if version is None: + # The user didn't request a specific version, so we return whether we + # are running on a Mac or not. + _LOG.debug("is_mac_=%s", is_mac_) + return is_mac_ + else: + # The user specified a version: if we are not running on a Mac then we + # return False, since we don't even have to check the macOS version. + if not is_mac_: + _LOG.debug("is_mac_=%s", is_mac_) + return False + # Check the macOS version we are running. + if version == "Catalina": + # Darwin gpmac.local 19.6.0 Darwin Kernel Version 19.6.0: + # root:xnu-6153.141.2~1/RELEASE_X86_64 x86_64 + macos_tag = "19.6" + elif version == "Monterey": + # Darwin alpha.local 21.5.0 Darwin Kernel Version 21.5.0: + # root:xnu-8020.121.3~4/RELEASE_ARM64_T6000 arm64 + macos_tag = "21." + elif version == "Ventura": + macos_tag = "22." + elif version == "Sequoia": + # Darwin gpmac.local 24.4.0 Darwin Kernel Version 24.4.0: + # root:xnu-11417.101.15~1/RELEASE_ARM64_T8112 arm64 + macos_tag = "24." + elif version == "Tahoe": + # Darwin … 25.1.0 Darwin Kernel Version 25.1.0: … /RELEASE_ARM64_… arm64 + macos_tag = "25." + else: + raise ValueError(f"Invalid version='{version}'") + _LOG.debug("macos_tag=%s", macos_tag) + host_os_version = os.uname()[2] + # 'Darwin Kernel Version 19.6.0: Mon Aug 31 22:12:52 PDT 2020; + # root:xnu-6153.141.2~1/RELEASE_X86_64' + csfy_host_os_version = os.environ.get("CSFY_HOST_VERSION", "") + _LOG.debug( + "host_os_version=%s csfy_host_os_version=%s", + host_os_version, + csfy_host_os_version, + ) + is_mac_ = macos_tag in host_os_version or macos_tag in csfy_host_os_version + _LOG.debug("is_mac_=%s", is_mac_) + return is_mac_ + + +def is_prod_csfy() -> bool: + """ + Detect whether we are running in a Causify production container. + + This env var is set inside `devops/docker_build/prod.Dockerfile`. + """ + # TODO(gp): CK -> CSFY + return bool(os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", False)) + + +# TODO(gp): Obsolete. +def is_ig_prod() -> bool: + """ + Detect whether we are running in an IG production container. + + This env var is set inside `//lime/devops_cf/setenv.sh` + """ + # CF sets up `DOCKER_BUILD` so we can use it to determine if we are inside + # a CF container or not. + # print("os.environ\n", str(os.environ)) + return bool(os.environ.get("DOCKER_BUILD", False)) + + +# TODO(Grisha): consider adding to `setup_to_str()`. +def is_inside_ecs_container() -> bool: + """ + Detect whether we are running in an ECS container. + """ + # When deploying jobs via ECS the container obtains credentials based + # on passed task role specified in the ECS task-definition, refer to: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html + ret = "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" in os.environ + return ret + + +# ############################################################################# + + +def is_external_linux() -> bool: + """ + Detect whether we are running on a non-server/non-CI Linux machine. + + This returns true when we run on the machine of an intern, or a non- + CSFY contributor. + """ + if is_host_csfy_server() or is_inside_ci(): + # Dev servers and CI are not external Linux systems. + ret = False + else: + # We need to check if the host is Linux. + host_os_name = _get_host_os_name() + ret = host_os_name == "Linux" + return ret + + +def is_external_dev() -> bool: + """ + Detect whether we are running on an system outside of Causify. + + E.g., a Linux / Mac contributor's laptop, an intern's laptop, a non- + CSFY machine. + """ + ret = is_host_mac() or is_external_linux() + return ret + + +# ############################################################################# +# Set up consistency. +# ############################################################################# + + +# TODO(gp): Update this. +def _get_setup_signature() -> str: + """ + Dump all the variables that are used to make a decision about the values of + the functions in `_get_setup_settings()`. + + This function is used to mock the state of the system for testing + purposes. + """ + cmds = [] + # is_prod_csfy() + cmds.append('os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", "*undef*")') + # is_dev4() + # is_dev_csfy() + # is_ig_prod() + cmds.append('os.environ.get("CSFY_HOST_NAME", "*undef*")') + # is_inside_ci() + cmds.append('os.environ.get("CSFY_CI", "*undef*")') + # is_mac() + cmds.append("os.uname()[0]") + cmds.append("os.uname()[2]") + # is_external_linux() + cmds.append('os.environ.get("CSFY_HOST_OS_NAME", "*undef*")') + # Build an array of strings with the results of executing the commands. + results = [] + for cmd in cmds: + result_tmp = cmd + "=" + str(eval(cmd)) + results.append(result_tmp) + # Join the results into a single string. + result = "\n".join(results) + return result + + +# The valid set ups are: +# - Running on a Causify server (e.g., `dev1`, `dev2`, `dev3`) +# - Container +# - Host +# - External Mac (GP, Paul, interns, contributors) +# - Container +# - Host +# - External Linux (interns, contributors) +# - Container +# - Host +# - Prod container on Linux +# - Container +# - CI +# - Container + + +def is_inside_docker_container_on_csfy_server() -> bool: + """ + Return whether we are running on a Docker container on a Causify server. + """ + ret = is_inside_docker() and is_host_csfy_server() + return ret + + +def is_outside_docker_container_on_csfy_server() -> bool: + """ + Return whether we are running outside a Docker container on a Causify + server. + """ + ret = not is_inside_docker() and is_host_csfy_server() + return ret + + +def is_inside_docker_container_on_host_mac() -> bool: + """ + Return whether we are running on a Docker container on a Mac host. + """ + ret = is_inside_docker() and is_host_mac() + return ret + + +def is_outside_docker_container_on_host_mac() -> bool: + """ + Return whether we are running outside of a Docker container on a Mac host. + """ + ret = not is_inside_docker() and is_host_mac() + return ret + + +def is_inside_docker_container_on_external_linux() -> bool: + """ + Return whether we are running on a Docker container on an external Linux. + """ + ret = is_inside_docker() and is_external_linux() + return ret + + +def is_outside_docker_container_on_external_linux() -> bool: + """ + Return whether we are outside of a Docker container on an external Linux. + """ + ret = not is_inside_docker() and is_external_linux() + return ret + + +def _get_setup_settings() -> List[Tuple[str, bool]]: + """ + Return a list of tuples with the name and value of the current server + setup. + + E.g., + ```bash + is_inside_docker_container_on_csfy_server=True + is_outside_docker_container_on_csfy_server=False + is_inside_docker_container_on_host_mac=False + is_outside_docker_container_on_host_mac=True + is_inside_docker_container_on_external_linux=False + is_outside_docker_container_on_external_linux=True + is_dev4=False + is_ig_prod=False + is_prod_csfy=False + is_inside_ci=False + ``` + """ + func_names = [ + "is_inside_docker_container_on_csfy_server", + "is_outside_docker_container_on_csfy_server", + # + "is_inside_docker_container_on_host_mac", + "is_outside_docker_container_on_host_mac", + # + "is_inside_docker_container_on_external_linux", + "is_outside_docker_container_on_external_linux", + # + "is_dev4", + "is_ig_prod", + "is_prod_csfy", + "is_inside_ci", + ] + # Store function name / value pairs as tuples. + setups = [] + for func_name in func_names: + val = eval(f"{func_name}()") + setups.append((func_name, val)) + return setups + + +def _setup_to_str(setups: List[Tuple[str, bool]]) -> str: + """ + Return a string representation of the current server setup configuration. + + :return: string with each setting on a new line, aligned with + padding + """ + # Find maximum length of setting names. + max_len = max(len(name) for name, _ in setups) + 1 + # Format each line with computed padding. + txt = [] + for name, value in setups: + txt.append(f"{name:<{max_len}}{value}") + return "\n".join(txt) + + +def _dassert_setup_consistency() -> None: + """ + Check that one and only one setup configuration is true. + + This is used to ensure that the setup configuration is one of the + expected ones and uniquely defined. + """ + setups = _get_setup_settings() + # One and only one set-up should be true. + sum_ = sum([value for _, value in setups]) + if sum_ != 1: + msg = "One and only one set-up config should be true:\n" + msg += _setup_to_str(setups) + "\n" + msg += "_get_setup_signature() returns:\n" + msg += _indent(_get_setup_signature()) + raise ValueError(msg) + + +# If the env var is not defined then we want to check. The only reason to skip +# it's if the env var is defined and equal to False. +check_repo = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") != "False" +_is_called = False +if check_repo: + # The repo check is executed at import time, before the logger is initialized. + # To debug the repo check, enable the following block. + if False: + import helpers.hdbg as hdbg + + hdbg.init_logger(verbosity=logging.DEBUG) + # Compute and cache the result. + if not _is_called: + _dassert_setup_consistency() + _is_called = True +else: + _LOG.warning("Skipping repo check in %s", __file__) + + +# ############################################################################# +# Detect Docker functionalities. +# ############################################################################# + + +# Each function below should run without asserting. E.g., when we check if +# docker supports privileged mode, we should check if `docker` is available, +# and then if docker supports privileged mode, instead of asserting if `docker` +# doesn't exist on the system. + + +@functools.lru_cache() +def has_docker() -> bool: + """ + Return whether we have Docker installed. + """ + return shutil.which("docker") is not None + + +@functools.lru_cache() +def docker_needs_sudo() -> bool: + """ + Return whether Docker commands need to be run with sudo. + """ + if not has_docker(): + return False + # This check is required to ensure it does not cause issues when running on ECS + # Fargate through Airflow, since ECS Fargate does not support either DinD + # or sibling containers. + # See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/fargate-security-considerations.html + # TODO(heanh): Check if we can use `is_inside_ecs_container()` to check if + # we are inside Airflow. + if not has_dind_support() and not use_docker_sibling_containers(): + return False + # Another way to check is to see if your user is in the docker group: + # > groups | grep docker + rc = os.system("docker run hello-world 2>&1 >/dev/null") + if rc == 0: + return False + # + rc = os.system("sudo docker run hello-world 2>&1 >/dev/null") + if rc == 0: + return True + assert False, "Failed to run docker" + + +def get_docker_executable() -> str: + """ + Return the docker executable, wrapper with `sudo` if needed. + """ + docker_needs_sudo_ = docker_needs_sudo() + executable = "docker" + if docker_needs_sudo_: + executable = "sudo " + executable + return executable + + +@functools.lru_cache() +def has_docker_privileged_mode() -> bool: + """ + Return whether the current container supports privileged mode. + + Docker privileged mode gives containers nearly all the same capabilities as + the host system's kernel. + + Privileged mode allows to: + - run Docker-in-Docker + - mount filesystems + """ + if not has_docker(): + return False + docker_executable = get_docker_executable() + cmd = f"{docker_executable} run --privileged hello-world 2>&1 >/dev/null" + rc = os.system(cmd) + _print(f"cmd={cmd} -> rc={rc}") + has_privileged_mode = rc == 0 + return has_privileged_mode + + +def has_docker_sibling_containers_support() -> bool: + """ + Return whether the current container supports running sibling containers. + """ + # We need to be inside a container to run sibling containers. + if not is_inside_docker(): + return False + # We assume that if the socket exists then we can run sibling containers. + if os.path.exists("/var/run/docker.sock"): + return True + return False + + +def has_docker_children_containers_support() -> bool: + """ + Return whether the current container supports Docker-in-Docker. + """ + # We need to be inside a container to run docker-in-docker. + if not is_inside_docker(): + return False + # We assume that if we have privileged mode then we can run docker-in-docker. + return has_docker_privileged_mode() + + +def is_csfy_dind_enabled() -> bool: + """ + Return whether `CSFY_ENABLE_DIND` is enabled (e.g. users opt-in to use + Docker-in-Docker). + """ + val = os.environ.get("CSFY_ENABLE_DIND", "0") + return val == "1" or val.lower() in ("true", "yes") + + +def can_run_docker_from_docker() -> bool: + """ + Return whether we can run docker from docker, either as children or sibling + container. + """ + return ( + has_docker_children_containers_support() + or has_docker_sibling_containers_support() + ) + + +def get_docker_info() -> str: + txt_tmp: List[str] = [] + # + has_docker_ = has_docker() + txt_tmp.append(f"has_docker={has_docker_}") + # + cmd = r"docker version --format '{{.Server.Version}}'" + _, docker_version = _system_to_string(cmd) + txt_tmp.append(f"docker_version='{docker_version}'") + # + docker_needs_sudo_ = docker_needs_sudo() + txt_tmp.append(f"docker_needs_sudo={docker_needs_sudo_}") + # + has_privileged_mode_ = has_docker_privileged_mode() + txt_tmp.append(f"has_privileged_mode={has_privileged_mode_}") + # + is_inside_docker_ = is_inside_docker() + txt_tmp.append(f"is_inside_docker={is_inside_docker_}") + # + if is_inside_docker_: + has_docker_sibling_containers_support_ = ( + has_docker_sibling_containers_support() + ) + has_docker_children_containers_support_ = ( + has_docker_children_containers_support() + ) + else: + has_docker_sibling_containers_support_ = "*undef*" + has_docker_children_containers_support_ = "*undef*" + txt_tmp.append( + f"has_docker_sibling_containers_support={has_docker_sibling_containers_support_}" + ) + txt_tmp.append( + f"has_docker_children_containers_support={has_docker_children_containers_support_}" + ) + # Format as title with indented items. + txt = "Docker info" + "\n" + _indent("\n".join(txt_tmp)) + return txt + + +def _is_mac_version_with_sibling_containers() -> bool: + if not is_host_mac(): + return False + mac_version = get_host_mac_version() + return mac_version in ("Monterey", "Ventura", "Sequoia", "Tahoe") + + +# ############################################################################# +# Detect Docker functionalities, based on the set-up. +# ############################################################################# + + +# TODO(gp): These approach is sub-optimal. We deduce what we can do based on the +# name of the set-up. We should base our decisions on the actual capabilities of +# the system. + + +# TODO(gp): -> has_docker_privileged_mode +@functools.lru_cache() +def has_dind_support() -> bool: + """ + Return whether the current container supports privileged mode. + + This is needed to use Docker-in-Docker. + """ + _print(f"is_inside_docker()={is_inside_docker()}") + if not is_inside_docker(): + # Outside Docker there is no privileged mode. + _print("-> ret = False") + return False + # TODO(gp): Not sure this is really needed since we do this check + # after enable_privileged_mode controls if we have dind or not. + if _is_mac_version_with_sibling_containers(): + return False + # TODO(gp): This part is not multi-process friendly. When multiple + # processes try to run this code they interfere. A solution is to run `ip + # link` in the entrypoint and create a `has_docker_privileged_mode` file + # which contains the value. + # We rely on the approach from https://stackoverflow.com/questions/32144575 + # to check if there is support for privileged mode. + # Sometimes there is some state left, so we need to clean it up. + # TODO(Juraj): this is slow and inefficient, but works for now. + cmd = "sudo docker run hello-world" + rc = os.system(cmd) + _print(f"cmd={cmd} -> rc={rc}") + has_dind = rc == 0 + # dind is supported on both Mac and GH Actions. + # TODO(Juraj): HelpersTask16. + # if check_repo: + # if hserver.is_inside_ci(): + # # Docker-in-docker is needed for GH actions. For all other builds is optional. + # assert has_dind, ( + # f"Expected privileged mode: has_dind={has_dind}\n" + # + hserver.setup_to_str() + # ) + # else: + # only_warning = True + # _raise_invalid_host(only_warning) + # return False + # else: + # csfy_repo_config = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") + # print( + # _WARNING + # + ": Skip checking since CSFY_REPO_CONFIG_CHECK=" + # + f"'{csfy_repo_config}'" + # ) + return has_dind + + +def _raise_invalid_host(only_warning: bool) -> None: + host_os_name = os.uname()[0] + am_host_os_name = os.environ.get("AM_HOST_OS_NAME", None) + msg = ( + f"Don't recognize host: host_os_name={host_os_name}, " + f"am_host_os_name={am_host_os_name}" + ) + if only_warning: + _LOG.warning(msg) + else: + raise ValueError(msg) + + +# TODO(gp): -> use_docker_in_docker_support +def enable_privileged_mode() -> bool: + """ + Return whether a host supports privileged mode for its containers. + """ + import helpers.repo_config_utils as hrecouti + + repo_name = hrecouti.get_repo_config().get_name() + # TODO(gp): Remove this dependency from a repo. + if repo_name in ("//dev_tools",): + ret = False + else: + # Keep this in alphabetical order. + if is_dev_csfy(): + ret = True + elif is_inside_ci(): + ret = True + elif is_external_linux(): + ret = True + elif is_host_mac(): + mac_version = get_host_mac_version() + if mac_version == "Catalina": + # Docker for macOS Catalina supports dind. + ret = True + elif mac_version in ("Monterey", "Ventura", "Sequoia", "Tahoe"): + # Docker doesn't seem to support dind for these versions of macOS. + ret = False + else: + raise ValueError(f"Invalid version='{mac_version}'") + # Docker doesn't seem to support dind for these versions of macOS. + ret = False + elif is_prod_csfy(): + ret = False + else: + ret = False + only_warning = True + _raise_invalid_host(only_warning) + return ret + + +# TODO(gp): -> use_docker_sudo_in_commands +def has_docker_sudo() -> bool: + """ + Return whether Docker commands should be run with `sudo` or not. + """ + # Keep this in alphabetical order. + if is_dev_csfy(): + ret = True + elif is_external_linux(): + ret = True + elif is_inside_ci(): + ret = False + elif is_host_mac(): + # macOS runs Docker with sudo by default. + # TODO(gp): This is not true. + ret = True + elif is_prod_csfy(): + ret = False + else: + ret = False + only_warning = True + _raise_invalid_host(only_warning) + return ret + + +# TODO(gp): -> use_docker_sibling_container_support +def use_docker_sibling_containers() -> bool: + """ + Return whether to use Docker sibling containers. + + Using sibling containers requires that all Docker containers are in + the same network so that they can communicate with each other. + """ + if is_dev_csfy() or _is_mac_version_with_sibling_containers(): + return True + return has_docker_sibling_containers_support() + # if is_dev_csfy(): + # val = True + # else: + # val = is_dev4() or _is_mac_version_with_sibling_containers() + # return val + + +# TODO(gp): -> use_docker_main_network +def use_main_network() -> bool: + # TODO(gp): Replace this. + return use_docker_sibling_containers() + + +# TODO(gp): -> get_docker_shared_data_dir_map +def get_shared_data_dirs() -> Optional[Dict[str, str]]: + """ + Get path of dir storing data shared between different users on the host and + Docker. + + E.g., one can mount a central dir `/data/shared`, shared by multiple + users, on a dir `/shared_data` in Docker. + """ + # TODO(gp): Keep this in alphabetical order. + if is_dev4(): + shared_data_dirs = { + "/local/home/share/cache": "/cache", + "/local/home/share/data": "/data", + } + elif is_dev_csfy(): + shared_data_dirs = { + "/data/shared": "/shared_data", + "/data/shared2": "/shared_data2", + "/data/shared_k8s": "/shared_k8s", + "/data/shared_test": "/shared_test", + } + elif is_external_dev() or is_inside_ci() or is_prod_csfy(): + shared_data_dirs = None + else: + shared_data_dirs = None + only_warning = True + _raise_invalid_host(only_warning) + return shared_data_dirs + + +def use_docker_network_mode_host() -> bool: + # TODO(gp): Not sure this is needed any more, since we typically run in + # bridge mode. + ret = is_host_mac() or is_dev_csfy() + ret = False + if ret: + assert use_docker_sibling_containers() + return ret + + +def use_docker_db_container_name_to_connect() -> bool: + """ + Connect to containers running DBs just using the container name, instead of + using port and localhost / hostname. + """ + if _is_mac_version_with_sibling_containers(): + # New Macs don't seem to see containers unless we connect with them + # directly with their name. + ret = True + else: + ret = False + if ret: + # This implies that we are using Docker sibling containers. + assert use_docker_sibling_containers() + return ret + + +# TODO(gp): This seems redundant with use_docker_sudo_in_commands +def run_docker_as_root() -> bool: + """ + Return whether Docker should be run with root user. + + I.e., adding `--user $(id -u):$(id -g)` to docker compose or not. + """ + # Keep this in alphabetical order. + if is_dev4() or is_ig_prod(): + # //lime runs on a system with Docker remap which assumes we don't + # specify user credentials. + ret = True + elif is_dev_csfy(): + # On dev1 / dev2 we run as users specifying the user / group id as + # outside. + ret = False + elif is_external_linux(): + ret = False + elif is_inside_ci(): + # When running as user in GH action we get an error: + # ``` + # /home/.config/gh/config.yml: permission denied + # ``` + # see https://github.com/alphamatic/amp/issues/1864 + # So we run as root in GH actions. + ret = True + elif is_host_mac(): + ret = False + elif is_prod_csfy(): + ret = False + else: + ret = False + only_warning = True + _raise_invalid_host(only_warning) + return ret + + +# TODO(gp): Probably obsolete +def get_docker_user() -> str: + """ + Return the user that runs Docker, if any. + """ + if is_dev4(): + val = "spm-sasm" + else: + val = "" + return val + + +# TODO(gp): Probably obsolete +def get_docker_shared_group() -> str: + """ + Return the group of the user running Docker, if any. + """ + if is_dev4(): + val = "sasm-fileshare" + else: + val = "" + return val + + +# TODO(gp): -> repo_config.yaml +def skip_submodules_test() -> bool: + """ + Return whether the tests in the submodules should be skipped. + + E.g. while running `i run_fast_tests`. + """ + import helpers.repo_config_utils as hrecouti + + repo_name = hrecouti.get_repo_config().get_name() + # TODO(gp): Why do we want to skip running tests? + # TODO(gp): Remove this dependency from a repo. + if repo_name in ("//dev_tools",): + # Skip running `amp` tests from `dev_tools`. + return True + return False + + +# ############################################################################# +# S3 buckets. +# ############################################################################# + + +def is_AM_S3_available() -> bool: + # AM bucket is always available. + val = True + _LOG.debug("val=%s", val) + return val + + +def is_CK_S3_available() -> bool: + val = True + if is_inside_ci(): + import helpers.repo_config_utils as hrecouti + + repo_name = hrecouti.get_repo_config().get_name() + # TODO(gp): Remove this dependency from a repo. + if repo_name in ("//amp", "//dev_tools"): + # No CK bucket. + val = False + # TODO(gp): We might want to enable CK tests also on lemonade. + if repo_name in ("//lemonade",): + # No CK bucket. + val = False + elif is_dev4(): + # CK bucket is not available on dev4. + val = False + _LOG.debug("val=%s", val) + return val + + +# ############################################################################# +# Functions. +# ############################################################################# + + +def config_func_to_str() -> str: + """ + Print the value of all the config functions. + """ + ret: List[str] = [] + # Get the functions with: + # grep "def " helpers/hserver.py | sort | awk '{ print $2 }' | perl -i -ne 'print "$1\n" if /^([^\(]+)/' + function_names = [ + "enable_privileged_mode", + "get_docker_shared_group", + "get_docker_user", + "get_host_user_name", + "get_shared_data_dirs", + "has_dind_support", + "has_docker_sudo", + "is_AM_S3_available", + "is_CK_S3_available", + "is_csfy_dind_enabled", + "is_dev4", + "is_dev_csfy", + "is_external_linux", + "is_host_mac", + "is_ig_prod", + "is_inside_ci", + "is_inside_docker", + "is_inside_ecs_container", + "is_inside_unit_test", + "is_prod_csfy", + "run_docker_as_root", + "skip_submodules_test", + "use_docker_db_container_name_to_connect", + "use_docker_network_mode_host", + "use_docker_sibling_containers", + "use_main_network", + ] + for func_name in sorted(function_names): + try: + _LOG.debug("func_name=%s", func_name) + func_value = eval(f"{func_name}()") + except NameError: + func_value = "*undef*" + msg = f"{func_name}='{func_value}'" + ret.append(msg) + # Package. + result = "\n".join(ret) + return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py new file mode 100644 index 000000000..b960bd8bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py @@ -0,0 +1,204 @@ +""" +Import as: + +import helpers.hsftp as hsftp +""" + +import logging +import os +import subprocess +import sys +from io import BytesIO +from typing import List + +import helpers.haws as haws +import helpers.hmodule as hmodule +import helpers.hsecrets as hsecret + +hmodule.install_module_if_not_present("pysftp") + +import pysftp # noqa: E402 + +# Create a logger instance. +_LOG = logging.getLogger(__name__) + + +def install_lftp(): + """ + Install `lftp` using the system package manager. + """ + try: + subprocess.run(["sudo", "apt-get", "update"], check=True) + subprocess.run(["sudo", "apt-get", "install", "-y", "lftp"], check=True) + _LOG.info("`lftp` successfully installed using `apt`.") + except Exception as e: + _LOG.error("Failed to install `lftp`: %s", e) + sys.exit(1) + + +def check_lftp_connection(): + """ + Check if `lftp` is installed. + + If not, install it using the package manager. + """ + try: + # Check if `lftp` is available by trying to run it. + subprocess.run( + ["lftp", "--version"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + _LOG.info("`lftp` is already installed.") + except subprocess.CalledProcessError: + _LOG.error("Error occurred while checking `lftp` version.") + sys.exit(1) + except FileNotFoundError: + _LOG.warning("`lftp` is not installed. Attempting to install it...") + install_lftp() + + +def download_file_using_lftp( + remote_data_path: str, save_path: str, hostname: str, secret_name: str +) -> None: + """ + Download files from a remote SFTP server using `lftp` and a private SSH + key. + + :param remote_data_path: path to the remote directory on the SFTP + server from which files should be downloaded. + :param save_path: local directory where the downloaded files will be + saved. + :param hostname: hostname of the SFTP server. + :param secret_name: Name of the secret in AWS Secrets Manager that + stores the SFTP credentials, including the username and private + key. + :return: None. + """ + # Fetch the private key from AWS Secrets Manager + secret_dict = hsecret.get_secret(secret_name) + username = secret_dict["username"] + private_key = secret_dict["private_key"] + # Write the private key to a temporary file + with open("/tmp/temp_key.pem", "w") as temp_key_file: + temp_key_file.write(private_key) + # Ensure the key file has the correct permissions + os.chmod("/tmp/temp_key.pem", 0o600) + private_key_path = "/tmp/temp_key.pem" + # Construct the lftp command. + # The 'set sftp:connect-program' allows specifying custom SSH options for the SFTP connection. + # -o GSSAPIAuthentication=no: Disables GSSAPI to avoid unnecessary authentication mechanisms. + # -o StrictHostKeyChecking=no: Bypasses the host key verification prompt for new hosts. + # -a: Enables SSH agent forwarding for more seamless authentication. + # -x: Disables X11 forwarding (not needed for file transfer). + # -i {private_key_path}: Specifies the private key for SSH authentication. + # 'mirror --parallel=10': Downloads files from the remote server, with 10 parallel downloads to speed up the process. + lftp_cmd = ( + f"lftp -u {username}, -e \"set sftp:connect-program 'ssh -o GSSAPIAuthentication=no " + f"-o StrictHostKeyChecking=no -a -x -i {private_key_path}'; " + f'mirror --parallel=10 {remote_data_path} {save_path}; quit" ' + f"sftp://{hostname}" + ) + try: + _LOG.info("Executing lftp command: %s", lftp_cmd) + subprocess.run( + lftp_cmd, + shell=True, + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + _LOG.error( + "lftp command failed with error: %s", + e.stderr, + ) + + +def get_sftp_connection(hostname: str, secret_name: str) -> pysftp.Connection: + """ + Return SFTP connection object using a private key stored in AWS Secrets + Manager. + + :param hostname: hostname of the SFTP server. + :param secret_name: name of the secret in AWS Secrets Manager + containing the private key. + :return: active SFTP connection object. + """ + # Fetch the private key from AWS Secrets Manager + secret_dict = hsecret.get_secret(secret_name) + username = secret_dict["username"] + private_key = secret_dict["private_key"] + # Write the private key to a temporary file + with open("/tmp/temp_key.pem", "w") as temp_key_file: + temp_key_file.write(private_key) + # Ensure the key file has the correct permissions + os.chmod("/tmp/temp_key.pem", 0o600) + # Ensure pysftp is installed before attempting connection. + cnopts = pysftp.CnOpts() + # Disable host key checking. + cnopts.hostkeys = None + sftp = pysftp.Connection( + hostname, + username=username, + private_key="/tmp/temp_key.pem", + cnopts=cnopts, + ) + # Remove the temporary key file after establishing the connection + os.remove("/tmp/temp_key.pem") + return sftp + + +def download_file_to_s3( + sftp: pysftp.Connection, + s3_client: haws.BaseClient, + remote_dir: str, + filename: str, + s3_bucket: str, + s3_prefix: str, +) -> None: + """ + Download data from an SFTP server and upload it to an S3 bucket. + + :param sftp: An active SFTP Connection object. + :param s3_client: An AWS Base client object to interact with S3. + :param remote_dir: The directory on the SFTP server where the file + is located. + :param filename: The name of the file to download from the SFTP + server. + :param s3_bucket: The name of the S3 bucket to upload the file to. + :param s3_prefix: The prefix (path) in the S3 bucket where the file + will be stored. + :return: None. + """ + remote_path = f"{remote_dir}/{filename}" + s3_key = f"{s3_prefix}/{filename}" + with sftp.open(remote_path) as file_obj: + # Download data from sftp server. + file_data = file_obj.read() + try: + # Upload data to S3. + s3_client.upload_fileobj(BytesIO(file_data), s3_bucket, s3_key) + _LOG.info( + "Uploaded: %s to s3://%s/%s", remote_path, s3_bucket, s3_key + ) + except Exception as e: + _LOG.error("Failed to upload file to S3. Error: %s", str(e)) + raise e + + +def get_file_names(sftp: pysftp.Connection, sftp_remote_dir: str) -> List[str]: + """ + Retrieve all file names from a specified directory on a remote SFTP server. + + :param sftp: An active SFTP Connection object. + :param sftp_remote_dir: The directory on the SFTP server from which + to list file names. + :return: A list of file names present in the specified directory on + the SFTP server. + """ + file_names = [] + for item in sftp.listdir_attr(sftp_remote_dir): + file_names.append(item.filename) + return file_names diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py new file mode 100644 index 000000000..41c4cf571 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py @@ -0,0 +1,66 @@ +""" +Slack notification utilities for sending messages to Slack channels. + +Import as: + +import helpers.hslack as hslack +""" + +import logging +import os +from typing import Optional + +import requests + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# SlackNotifier +# ############################################################################# + + +class SlackNotifier: + """ + Send notifications to Slack channels using bot tokens. + """ + + def __init__(self, bot_token: Optional[str] = None) -> None: + """ + Initialize Slack notifier. + + :param bot_token: Slack bot token (starts with 'xoxb-') + """ + self.bot_token = bot_token or os.environ.get("SLACK_BOT_TOKEN") + if not self.bot_token: + raise ValueError( + "No bot token provided via parameter or SLACK_BOT_TOKEN env var" + ) + + def send_message( + self, + channel: str, + message: str, + ) -> None: + """ + Send a message to a Slack channel. + + :param channel: Slack channel ID (e.g., 'C1234567890') or + channel name (e.g., '#notifications') + :param message: Message text to send + """ + URL = "https://slack.com/api/chat.postMessage" + headers = { + "Authorization": f"Bearer {self.bot_token}", + "Content-Type": "application/json", + } + payload = { + "channel": channel, + "text": message, + } + response = requests.post(URL, headers=headers, json=payload, timeout=30) + response.raise_for_status() + result = response.json() + if not result.get("ok"): + raise ValueError(f"Slack API error: {result.get('error')}") + _LOG.info("Message sent successfully to %s", channel) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py new file mode 100644 index 000000000..4c3f6a748 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py @@ -0,0 +1,36 @@ +""" +Import as: + +import helpers.hsql as hsql +""" + +import helpers.hmodule as hmodule + +# The problem here is that part of the code base end up including `hsql` which +# requires `psycopg2` even though it's not called at run-time. +# To simplify the dependency management we include the code of `hsql` only if +# `psycopg2` is present. If not, we just create a stub for the needed type hints. +if hmodule.has_module("psycopg2"): + from helpers.hsql_implementation import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import + +else: + from typing import Any, List + + DbConnection = Any + + +def create_in_operator(values: List[str], column_name: str) -> str: + """ + Transform a list of possible values into an IN operator clause. + + :param values: a list of possible values for the given column, e.g. `["binance", "ftx"]` + :param column_name: the name of the column, e.g. 'exchange_id' + :return: IN operator clause with specified values, + e.g. `"exchange_id IN ('binance', 'ftx')"` + """ + in_operator = ( + f"{column_name} IN (" + + ",".join([f"'{value}'" for value in values]) + + ")" + ) + return in_operator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py new file mode 100644 index 000000000..ddd48d1e4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py @@ -0,0 +1,954 @@ +""" +Import as: + +import helpers.hsql_implementation as hsqlimpl +""" + +import collections +import io +import logging +import os +import re +import time +from typing import Any, Dict, List, Optional, Tuple, Union, cast + +import numpy as np +import pandas as pd +import psycopg2 as psycop +import psycopg2.extras as extras +import psycopg2.sql as psql + +import helpers.hasyncio as hasynci +import helpers.hdatetime as hdateti +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hpandas as hpandas +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hsecrets as hsecret +import helpers.htimer as htimer + +_LOG = logging.getLogger(__name__) + +# ############################################################################# +# Connection +# ############################################################################# + +DbConnection = Any + +# Invariant: keep the arguments in the interface in the same order as: +# host, dbname, port, user, password. +DbConnectionInfo = collections.namedtuple( + "DbConnectionInfo", ["host", "dbname", "port", "user", "password"] +) + + +def get_connection( + host: str, + dbname: str, + port: int, + user: str, + password: str, + autocommit: bool = True, +) -> DbConnection: + """ + Create a connection and cursor for a SQL database. + """ + _LOG.debug(hprint.to_str("host dbname port user")) + connection = psycop.connect( + host=host, dbname=dbname, port=port, user=user, password=password + ) + if autocommit: + connection.autocommit = True + return connection + + +def get_connection_from_aws_secret( + aws_region: str, + *, + stage: str = "prod", +) -> DbConnection: + """ + Create an SQL connection using credentials obtained from AWS + SecretsManager. + + The function uses `ck` AWS profile on the backend. + The intended usage is obtaining connection to a DB on RDS instances. + + :param aws_region: AWS DB region, e.g. "eu-north-1", "ap-northeast-1" + :param stage: DB stage to connect to. For "prod" stage it is only possible to obtain a read-only connection via this method. + """ + hdbg.dassert_in(stage, ["prod", "preprod", "test"]) + hdbg.dassert_in(aws_region, hs3.AWS_REGIONS) + dbname = f"{stage}.im_data_db" + if stage == "prod": + secret_name = f"{dbname}.read_only" + else: + secret_name = ( + dbname + if aws_region == hs3.AWS_EUROPE_REGION_1 + else f"{dbname}.{aws_region}" + ) + _LOG.info("Fetching secret: %s", secret_name) + db_creds = hsecret.get_secret(secret_name) + connection = get_connection( + host=db_creds["host"], + dbname=dbname, + port=db_creds["port"], + user=db_creds["username"], + password=db_creds["password"], + ) + return connection + + +def get_connection_from_env_vars() -> DbConnection: + """ + Create a SQL connection with the information from the environment + variables. + """ + # Get values from the environment variables. + host = os.environ["POSTGRES_HOST"] + dbname = os.environ["POSTGRES_DB"] + port = int(os.environ["POSTGRES_PORT"]) + user = os.environ["POSTGRES_USER"] + password = os.environ["POSTGRES_PASSWORD"] + # Build the + connection = get_connection( + host=host, + dbname=dbname, + port=port, + user=user, + password=password, + ) + return connection + + +def get_connection_from_string( + conn_as_str: str, + autocommit: bool = True, +) -> DbConnection: + """ + Create a connection from a string. + + E.g., `host=localhost dbname=im_db_local port=5432 user=... + password=...` + """ + regex = r"host=\w+ dbname=\w+ port=\d+ user=\w+ password=\w+" + m = re.match(regex, conn_as_str) + hdbg.dassert(m, "Invalid connection string: '%s'", conn_as_str) + connection = psycop.connect(conn_as_str) + if autocommit: + connection.autocommit = True + return connection + + +def get_connection_info_from_env_file(env_file_path: str) -> DbConnectionInfo: + """ + Get connection parameters from environment file. + + :param env_file_path: path to an environment file that contains db + connection parameters + """ + import dotenv + + db_config = dotenv.dotenv_values(env_file_path) + params = { + "host": db_config["POSTGRES_HOST"], + "dbname": db_config["POSTGRES_DB"], + "user": db_config["POSTGRES_USER"], + "password": db_config["POSTGRES_PASSWORD"], + } + key = "POSTGRES_PORT" + if key in db_config: + params["port"] = int(db_config[key]) + else: + params["port"] = 5432 + # The parameters' names are fixed and cannot be changed, see + # `https:://hub.docker.com/_/postgres`. + connection_parameters = DbConnectionInfo(**params) + return connection_parameters + + +def check_db_connection( + host: str, + dbname: str, + port: int, + user: str, + password: str, +) -> Tuple[bool, Optional[psycop.OperationalError]]: + """ + Check whether a connection to a DB exists, in a non-blocking way. + """ + try: + get_connection( + host=host, dbname=dbname, port=port, user=user, password=password + ) + connection_exist = True + error = None + except psycop.OperationalError as e: + connection_exist = False + error = e + return connection_exist, error + + +def wait_db_connection( + host: str, + dbname: str, + port: int, + user: str, + password: str, + *, + timeout_in_secs: int = 30, +) -> None: + """ + Wait until the database is available. + + :param timeout_in_secs: secs before timing out with `RuntimeError`. + """ + hdbg.dassert_lte(1, timeout_in_secs) + _LOG.debug("dbname=%s, port=%s, host=%s", dbname, port, host) + elapsed_secs = 0 + while True: + _LOG.info("Waiting for PostgreSQL to become available...") + conn_exists = check_db_connection(host, dbname, port, user, password) + if conn_exists[0]: + _LOG.info("PostgreSQL is available (after %s seconds)", elapsed_secs) + break + if elapsed_secs > timeout_in_secs: + raise psycop.OperationalError( + f"Cannot connect to db host={host} dbname={dbname} port={port} " + f"due to timeout={timeout_in_secs} seconds" + f"\n{conn_exists[1]}" + ) + elapsed_secs += 1 + time.sleep(1) + + +def db_connection_to_tuple(connection: DbConnection) -> DbConnectionInfo: + """ + Get database connection details using connection. Connection details + include: + + - Host + - Database name + - Port + - Username + - Password + + :param connection: a database connection + :return: database connection details + """ + info = connection.info + ret = DbConnectionInfo( + host=info.host, + dbname=info.dbname, + port=info.port, + user=info.user, + password=info.password, + ) + return ret + + +# ############################################################################# +# State of the whole DB +# ############################################################################# + + +def get_engine_version(connection: DbConnection) -> str: + """ + Report information on the SQL engine. + + E.g., ``` PostgreSQL 11.5 on x86_64-pc-linux-gnu compiled by gcc + (GCC) 4.8.3 20140911 (Red Hat 4.8.3-9), 64-bit ``` + """ + query = "SELECT version();" + df = pd.read_sql_query(query, connection) + # pylint: disable=no-member + info: str = df.iloc[0, 0] + return info + + +# ############################################################################# +# Tables +# ############################################################################# + + +def get_table_names(connection: DbConnection) -> List[str]: + """ + Report the name of the tables. + + E.g., tables=['entities', 'events', 'stories', 'taxonomy'] + """ + query = """ + SELECT table_name + FROM information_schema.tables + WHERE table_type = 'BASE TABLE' + AND table_schema = 'public' + """ + cursor = connection.cursor() + cursor.execute(query) + tables = [x[0] for x in cursor.fetchall()] + return tables + + +# TODO(gp): Test / fix this. +def get_indexes(connection: DbConnection) -> pd.DataFrame: + res = [] + tables = get_table_names(connection) + cursor = connection.cursor() + for table in tables: + query = f"""SELECT * FROM pg_indexes WHERE tablename = '{table}' """ + cursor.execute(query) + z = cursor.fetchall() + res.append(pd.DataFrame(z)) + tmp: pd.DataFrame = pd.concat(res) + tmp["index_type"] = tmp[4].apply( + lambda w: w.split("USING")[1].lstrip().split(" ")[0] + ) + tmp.columns = [ + "type: public/private", + "table_name", + "key_name", + "None", + "Statement", + "index_type", + ] + tmp["columns"] = tmp["Statement"].apply(lambda w: w.split("(")[1][:-1]) + + return tmp + + +def disconnect_all_clients(connection: DbConnection) -> None: + # From https://stackoverflow.com/questions/36502401 + # Not sure this will work in our case, since it might kill our own connection. + dbname = connection.info.host + query = f""" + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE datname = '{dbname}';""" + connection.cursor().execute(query) + + +# ############################################################################# +# Database +# ############################################################################# + + +def get_db_names(connection: DbConnection) -> List[str]: + """ + Return the names of the available DBs. + + E.g., ['postgres', 'rdsadmin', 'template0', 'template1'] + """ + query = "SELECT datname FROM pg_database;" + cursor = connection.cursor() + cursor.execute(query) + dbs = list(zip(*cursor.fetchall()))[0] + dbs = sorted(dbs) + return dbs + + +def create_database( + connection: DbConnection, + dbname: str, + *, + overwrite: Optional[bool] = None, +) -> None: + """ + Create empty database. + + :param connection: database connection + :param dbname: database to create + :param overwrite: overwrite existing database + """ + _LOG.debug("connection=%s", connection) + with connection.cursor() as cursor: + if overwrite: + cursor.execute( + psql.SQL("DROP DATABASE IF EXISTS {} WITH (FORCE);").format( + psql.Identifier(dbname) + ) + ) + else: + if dbname in get_table_names(connection): + raise ValueError(f"Database {dbname} already exists") + cursor.execute( + psql.SQL("CREATE DATABASE {};").format(psql.Identifier(dbname)) + ) + + +def remove_database(connection: DbConnection, dbname: str) -> None: + """ + Remove database in current environment. + + :param connection: a database connection + :param dbname: database name to drop, e.g. `im_db_local` + """ + # Drop database. + # From https://stackoverflow.com/questions/36502401 + connection.cursor().execute( + psql.SQL("DROP DATABASE {} WITH (FORCE);").format( + psql.Identifier(dbname) + ) + ) + + +def get_tables_size( + connection: DbConnection, + only_public: bool = True, + summary: bool = True, +) -> pd.DataFrame: + """ + Report the size of each table. + + E.g., + + ``` + table_name row_estimate total index toast table + 0 events 0.0 26 GB 0 bytes 192 bytes 26 GB + 1 stories 0.0 15 GB 43 GB 192 bytes 12 GB + 2 entities 10823400.0 76 MB 0 bytes 192 bytes 76 MB + 3 taxonomy 20691.0 690 kB 0 bytes 192 bytes 652 kB + ``` + """ + q = """SELECT *, pg_size_pretty(total_bytes) AS total + , pg_size_pretty(index_bytes) AS INDEX + , pg_size_pretty(toast_bytes) AS toast + , pg_size_pretty(table_bytes) AS TABLE + FROM ( + SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM ( + SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME + , c.reltuples AS row_estimate + , pg_total_relation_size(c.oid) AS total_bytes + , pg_indexes_size(c.oid) AS index_bytes + , pg_total_relation_size(reltoastrelid) AS toast_bytes + FROM pg_class c + LEFT JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE relkind = 'r' + ) a + ) a + ORDER by total_bytes DESC""" + df = pd.read_sql_query(q, connection) + if only_public: + df = df[df["table_schema"] == "public"] + if summary: + cols = "table_name row_estimate total index toast table".split() + df = df[cols] + return df + + +# ############################################################################# +# Query +# ############################################################################# + + +# TODO(gp): -> as_df +def execute_query_to_df( + connection: DbConnection, + query: str, + limit: Optional[int] = None, + offset: Optional[int] = None, + use_timer: bool = False, + profile: bool = False, + verbose: bool = False, +) -> pd.DataFrame: + """ + Execute a query. + """ + if False: + # Ask the user before executing a query. + print(f"query=\n{query}") + import helpers.hsystem as hsystem + + hsystem.query_yes_no("Ok to execute?") + if limit is not None: + query += f" LIMIT {limit}" + if offset is not None: + query += f" OFFSET {offset}" + if profile: + query = "EXPLAIN ANALYZE " + query + if verbose: + _LOG.info("> %s", query) + # Compute. + if use_timer: + idx = htimer.dtimer_start(0, "Sql time") + cursor = connection.cursor() + try: + df = pd.read_sql_query(query, connection) + except psycop.OperationalError: + # Catch error and execute query directly to print error. + try: + cursor.execute(query) + except psycop.Error as e: + print(e.pgerror) + raise e + if use_timer: + htimer.dtimer_stop(idx) + if profile: + _LOG.info("df=%s", df) + return df + + +def head_table( + connection: DbConnection, + table: str, + limit: int = 5, +) -> str: + """ + Report the head of the table as str. + """ + txt = [] + query = f"SELECT * FROM {table} LIMIT {limit} " + df = execute_query_to_df(connection, query) + # pd.options.display.max_columns = 1000 + # pd.options.display.width = 130 + txt.append(str(df)) + txt = "\n".join(txt) + return txt + + +def head_tables( + connection: DbConnection, + tables: Optional[List[str]] = None, + limit: int = 5, +) -> str: + txt = [] + if tables is None: + tables = get_table_names(connection) + for table in tables: + txt.append("\n" + "#" * 80 + "\n" + table + "\n" + "#" * 80) + txt_tmp = head_table(connection, table, limit=limit) + txt.append(txt_tmp) + txt = "\n".join(txt) + return txt + + +def get_table_columns(connection: DbConnection, table_name: str) -> List[str]: + """ + Get column names for given table. + """ + query = f""" + SELECT column_name + FROM information_schema.columns + WHERE TABLE_NAME = '{table_name}'""" + cursor = connection.cursor() + cursor.execute(query) + columns = [x[0] for x in cursor.fetchall()] + return columns + + +def find_tables_common_columns( + connection: DbConnection, + tables: List[str], + as_df: bool = False, +) -> Optional[pd.DataFrame]: + limit = 5 + df = [] + for i, table in enumerate(tables): + table = tables[i] + query = f"SELECT * FROM {table} LIMIT {limit} " + df1 = execute_query_to_df(connection, query, verbose=False) + if df1 is None: + continue + for j in range(i + 1, len(tables)): + table = tables[j] + query = f"SELECT * FROM {table} LIMIT {limit} " + df2 = execute_query_to_df(connection, query, verbose=False) + if df2 is None: + continue + common_cols = [c for c in df1 if c in df2] + if as_df: + df.append( + ( + tables[i], + tables[j], + len(common_cols), + " ".join(common_cols), + ) + ) + else: + print(f"'{tables[i]}' vs '{tables[j]}'") + print(f" ({len(common_cols)}): {' '.join(common_cols)}") + obj = None + if as_df: + obj = pd.DataFrame( + df, columns=["table1", "table2", "num_comm_cols", "common_cols"] + ) + return obj + + +def remove_table( + connection: DbConnection, table_name: str, cascade: bool = False +) -> None: + """ + Remove a table from a database. + + :param connection: database connection + :param table_name: table name + :param cascade: whether to drop the objects dependent on the table + """ + query = f"DROP TABLE IF EXISTS {table_name}" + if cascade: + query = " ".join([query, "CASCADE"]) + connection.cursor().execute(query) + + +def remove_all_tables(connection: DbConnection, cascade: bool = False) -> None: + """ + Remove all the tables from a database. + + :param connection: database connection + :param cascade: whether to drop the objects dependent on the tables + """ + table_names = get_table_names(connection) + _LOG.warning("Deleting all the tables: %s", table_names) + for table_name in table_names: + _LOG.warning("Deleting %s ...", table_name) + remove_table(connection, table_name, cascade) + + +# ############################################################################# +# Insert +# ############################################################################# + + +def csv_to_series(csv_as_txt: str, sep: str = ",") -> pd.Series: + """ + Convert a text with (key, value) separated by `sep` into a `pd.Series`. + + :param csv_as_txt: a string containing csv data + E.g., + ``` + tradedate,2021-11-12 + targetlistid,1 + ``` + :param sep: csv separator, e.g. `,` + :return: series + """ + lines = hprint.dedent(csv_as_txt).split("\n") + tuples = [tuple(line.split(sep)) for line in lines] + # Remove empty tuples. + tuples = [t for t in tuples if t[0] != ""] + # Build series. + index, data = zip(*tuples) + # _LOG.debug("index=%s", index) + # _LOG.debug("data=%s", data) + srs = pd.Series(data, index=index) + return srs + + +def copy_rows_with_copy_from( + connection: DbConnection, df: pd.DataFrame, table_name: str +) -> None: + """ + Copy dataframe contents into DB directly from buffer. + + This function works much faster for large dataframes (>10000 rows). + + :param connection: DB connection + :param df: data to insert + :param table_name: name of the table for insertion + """ + # The target table needs to exist. + hdbg.dassert_in(table_name, get_table_names(connection)) + # Read the data. + buffer = io.StringIO() + df.to_csv(buffer, index=False, header=False) + buffer.seek(0) + # Copy the data to the DB. + cur = connection.cursor() + cur.copy_from(buffer, table_name, sep=",") + # TODO(gp): CmampTask413, is this still needed because the autocommit. + connection.commit() + + +# TODO(gp): -> table_name, df +def create_insert_query(df: pd.DataFrame, table_name: str) -> str: + """ + Create an INSERT query to insert data into a DB. + + :param df: data to insert into DB + :param table_name: name of the table for insertion + :return: sql query, e.g., + ``` + INSERT INTO ccxt_ohlcv_spot(timestamp,open,high,low,close) VALUES %s + ``` + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + columns = ",".join(list(df.columns)) + query = f"INSERT INTO {table_name}({columns}) VALUES %s" + _LOG.debug("query=%s", query) + return query + + +# TODO(gp): -> table_name, df +def create_insert_on_conflict_do_nothing_query( + df: pd.DataFrame, table_name: str, unique_columns: List[str] +) -> str: + """ + Create an INSERT query to insert data into a DB. If a unique constraint is + violated for a provided set of columns, duplicates are not inserted. + + :param df: data to insert into DB + :param table_name: name of the table for insertion + :param unique_columns: set of columns which should be unique record-wise. + :return: sql query, e.g., + ``` + INSERT INTO ccxt_bid_ask(timestamp,bid_size,bid_price,ask_size, + ask_price,exchange_id,currency_pair) VALUES %s + ON CONFLICT (timestamp, exchange_id, currency_pair) DO NOTHING; + ``` + """ + hdbg.dassert_isinstance(df, pd.DataFrame) + # Check that the constraint is actually applied to columns + # of the DataFrame. + hdbg.dassert_is_subset(unique_columns, list(df.columns)) + columns = ",".join(list(df.columns)) + unique_columns_str = ",".join(unique_columns) + query = f"INSERT INTO {table_name}({columns}) VALUES %s ON CONFLICT ({unique_columns_str}) \ + DO NOTHING" + _LOG.debug("query=%s", query) + return query + + +# TODO(gp): -> connection, table_name, obj +def execute_insert_query( + connection: DbConnection, + obj: Union[pd.DataFrame, pd.Series], + table_name: str, +) -> None: + """ + Insert a DB as multiple rows into the database. + + :param connection: connection to the DB + :param obj: data to insert + :param table_name: name of the table for insertion + """ + if isinstance(obj, pd.Series): + df = obj.to_frame().T + else: + df = obj + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_in(table_name, get_table_names(connection)) + _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) + # Ensure the DataFrame has compatible types with + # downstream consumers (e.g., database). + df = df.applymap(lambda x: float(x) if isinstance(x, np.float64) else x) + # Transform dataframe into list of tuples. + values = [tuple(v) for v in df.to_numpy()] + # Generate a query for multiple rows. + query = create_insert_query(df, table_name) + # Execute query for each provided row. + cur = connection.cursor() + extras.execute_values(cur, query, values) + connection.commit() + + +# TODO(gp): -> connection, table_name, obj +def execute_insert_on_conflict_do_nothing_query( + connection: DbConnection, + obj: Union[pd.DataFrame, pd.Series], + table_name: str, + unique_columns: List[str], +) -> None: + """ + Insert a DB as multiple rows into the database. If a a UNIQUE constraint is + violated for a provided set of columns, duplicates are not inserted. + + :param connection: connection to the DB + :param obj: data to insert + :param table_name: name of the table for insertion + :param unique_columns: set of columns which should be unique record-wise. + If unique_columns is an empty list, a regular DB insert is executed + without the UNIQUE constraint. + """ + if isinstance(obj, pd.Series): + df = obj.to_frame().T + else: + df = obj + hdbg.dassert_isinstance(df, pd.DataFrame) + hdbg.dassert_in(table_name, get_table_names(connection)) + _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) + # Transform dataframe into list of tuples. + values = [tuple(v) for v in df.to_numpy()] + # Generate a query for multiple rows. + if not unique_columns: + # If unique_columns is an empty list, currently used when saving + # bid/ask RT data, to experiment with using no uniqueness constraints. + query = create_insert_query(df, table_name) + else: + query = create_insert_on_conflict_do_nothing_query( + df, table_name, unique_columns + ) + # Execute query for each provided row. + cur = connection.cursor() + try: + extras.execute_values(cur, query, values) + connection.commit() + except Exception as e: + _LOG.error( + "Failed to insert data with the '%s'. Query %s. Values: %s", + str(e), + query, + values, + ) + raise e + + +def execute_query(connection: DbConnection, query: str) -> List[tuple]: + """ + Use for generic simple operations. + + :param connection: connection to the DB + :param query: generic query that can be: insert, update, delete, etc. + :return: list of tuples with the results of the query + """ + _LOG.debug(hprint.to_str("query")) + with connection.cursor() as cursor: + cursor.execute(query) + if not connection.autocommit: + connection.commit() + try: + result = cursor.fetchall() + except psycop.ProgrammingError: + result = [()] + return result + + +# ############################################################################# +# Build more complex SQL queries. +# ############################################################################# + + +# Invariants for functions with SQL queries +# +# - Functions creating tables +# - accept a parameter `incremental that has the same behavior as in +# `hio.create_dir(..., incremental)` +# - It controls the behavior of this function if the target table already exists. +# If `incremental` is True, then skip creating it and reuse it as it is; if +# False delete it and create it from scratch. +# +# - Function creating / execution SQL queries +# - We prefer functions that directly perform SQL queries implementing a given +# functionality (e.g., `get_num_rows()`) +# - Use `get_..._query()` returning the query text only when we want to freeze +# the query in a test, e.g., because it is complex + + +def get_remove_duplicates_query( + table_name: str, id_col_name: str, column_names: List[str] +) -> str: + """ + Get a query to remove duplicates from table, keeping last duplicated row. + + :param table_name: name of table + :param id_col_name: name of unique id column + :param column_names: names of columns to compare on + :return: query to execute duplicate removal + """ + # TODO(*): Add a "limit" parameter if possible, to check only in top N rows. + remove_statement = [] + remove_statement.append(f"DELETE FROM {table_name} a USING {table_name} b") + remove_statement.append(f"WHERE a.{id_col_name} < b.{id_col_name}") + for c in column_names: + remove_statement.append(f"AND a.{c} = b.{c}") + remove_statement = " ".join(remove_statement) + return remove_statement + + +def get_num_rows(connection: DbConnection, table_name: str) -> int: + """ + Return the number of rows in a DB table. + """ + cursor = connection.cursor() + query = f"SELECT COUNT(*) FROM {table_name}" + cursor.execute(query) + vals = cursor.fetchall() + # The return value is like: vals=[(0,)] + hdbg.dassert_eq(len(vals), 1) + return vals[0][0] # type: ignore[no-any-return] + + +# ############################################################################# +# Polling functions +# ############################################################################# + + +def is_row_with_value_present( + connection: DbConnection, + table_name: str, + field_name: str, + target_value: str, + *, + show_db_state: bool = True, +) -> hasynci.PollOutput: + """ + Check with a polling function if a row with `field_name` == `target_value` + is present in the table `table_name` of the DB. + + E.g., this can be used with polling to wait for the target value + "hello_world.txt" in the "filename" field of the table "table_name" to appear + + :return: + - success if the value is present + - result: None + """ + _LOG.debug(hprint.to_str("connection table_name field_name target_value")) + # Print the state of the DB, if needed. + if show_db_state: + query = f"SELECT * FROM {table_name} ORDER BY filename" + df = execute_query_to_df(connection, query) + _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) + # Check if the required row is available. + query = f"SELECT {field_name} FROM {table_name} WHERE {field_name}='{target_value}'" + df = execute_query_to_df(connection, query) + _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) + # Package results. + success = df.shape[0] > 0 + result = None + return success, result + + +# TODO(gp): Add unit test. +async def wait_for_change_in_number_of_rows( + get_wall_clock_time: hdateti.GetWallClockTime, + db_connection: DbConnection, + table_name: str, + poll_kwargs: Dict[str, Any], + *, + tag: Optional[str] = None, +) -> int: + """ + Wait until the number of rows in a table changes. + + :param get_wall_clock_time: a function to get current time + :param db_connection: connection to the target DB + :param table_name: name of the table to poll + :param poll_kwargs: a dictionary with the kwargs for `poll()` + :param tag: name of the caller function + :return: number of new rows found + """ + num_rows = get_num_rows(db_connection, table_name) + + def _is_number_of_rows_changed() -> hasynci.PollOutput: + new_num_rows = get_num_rows(db_connection, table_name) + _LOG.debug("new_num_rows=%s num_rows=%s", new_num_rows, num_rows) + success = new_num_rows != num_rows + diff_num_rows = new_num_rows - num_rows + return success, diff_num_rows + + # Poll. + if tag is None: + # Use name of the caller function. + tag = hintros.get_function_name(count=0) + if poll_kwargs is None: + poll_kwargs = hasynci.get_poll_kwargs(get_wall_clock_time) + num_iters, diff_num_rows = await hasynci.poll( + _is_number_of_rows_changed, + tag=tag, + **poll_kwargs, + ) + _ = num_iters + diff_num_rows = cast(int, diff_num_rows) + return diff_num_rows diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py new file mode 100644 index 000000000..2aeff7c6c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py @@ -0,0 +1,273 @@ +""" +Import as: + +import helpers.hsql_test as hsqltest +""" + +import abc +import logging +import os + +import pytest + +import helpers.hdocker as hdocker +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsql as hsql +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestDbHelper +# ############################################################################# + + +@pytest.mark.requires_docker_in_docker +@pytest.mark.skipif( + not hserver.can_run_docker_from_docker(), + reason="Need docker children / sibling support", +) +class TestDbHelper(hunitest.TestCase, abc.ABC): + """ + Allow testing code that interacts with a DB. + + It creates / destroys a test DB during setup / teardown of the class. This means + that the same DB is reused for multiple test methods of the same class. + + The invariant is that each test method should: + - (ideally) find a clean DB to work with + - not assume that the DB is clean. If the DB is not clean, tests should clean it + before starting, or work around it + - E.g., if a test needs to write a table, but the table is already present and + partially filled as a leftover from a previous test, the new test should + delete the table and create it again + - clean the DB after themselves, i.e., undo the work that has been done + - E.g., if a test creates a table, then the test should delete the table at + the end of the test + + - An existing DB can be reused + - A user can create a persistent local DB in the Docker container, e.g. for OMS: + ``` + docker> (cd oms; sudo docker-compose \ + --file /app/oms/devops/compose/tmp.docker-compose.yml up \ + -d \ + oms_postgres) + ``` + or + ``` + docker> invoke oms_docker_up + ``` + - Then this class skips creating / destructing the DB, making the tests faster + and allowing easier debugging. + """ + + @classmethod + def setUpClass(cls) -> None: + """ + Initialize the test database inside test container. + """ + _LOG.info("\n%s", hprint.frame("setUpClass")) + cls._create_docker_files() + # Read the connection parameters from the env file. + cls.db_env_file = cls._get_db_env_path() + connection_info = hsql.get_connection_info_from_env_file(cls.db_env_file) + _LOG.debug("connection_info=%s", connection_info) + conn_exists = hsql.check_db_connection(*connection_info)[0] + if conn_exists: + _LOG.warning("DB is already up: skipping docker compose") + # Since we have found the DB already up, we assume that we need to + # leave it running after the tests + cls.bring_down_db = False + else: + # Start the service. + cls.docker_compose_file_path = os.path.join( + hgit.get_amp_abs_path(), cls._get_compose_file() + ) + # TODO(Grisha): use invoke task CMTask #547. + cmd = ( + "sudo docker-compose " + f"--file {cls.docker_compose_file_path} " + f"--env-file {cls.db_env_file} " + f"up -d {cls._get_service_name()}" + ) + _LOG.debug("cmd=%s", cmd) + hsystem.system(cmd, suppress_output=False) + # Wait for the DB to be available. + hsql.wait_db_connection(*connection_info) + cls.bring_down_db = True + # Save connection info. + # TODO(gp): -> db_connection + cls.connection = hsql.get_connection(*connection_info, autocommit=True) + + # TODO(Grisha): difference between cmamp and kaizenflow. + @classmethod + def tearDownClass(cls) -> None: + """ + Bring down the test container. + """ + _LOG.info("\n%s", hprint.frame("tearDown")) + docker_compose_cleanup = cls.bring_down_db + if docker_compose_cleanup: + if hserver.use_main_network(): + # When using sibling containers `docker-compose down` tries to shut + # down also the `main_network`, while it is attached to the Docker + # container running the tests + # So we clean up the containers and volumes directly. + # TODO(gp): This could become an invoke target. + # Remove the container, e.g., `compose-oms_postgres7482-1`. + service_name = cls._get_service_name() + container_name = f"compose-{service_name}-1" + use_sudo = hdocker.get_use_sudo() + hdocker.container_rm(container_name, use_sudo) + # Remove the volume, e.g., `compose_oms_postgres7482_data`. + volume_name = f"compose_{service_name}_data" + hdocker.volume_rm(volume_name, use_sudo) + else: + # TODO(Grisha): use invoke task CMTask #547. + cmd = ( + "sudo docker-compose " + f"--file {cls.docker_compose_file_path} " + f"--env-file {cls.db_env_file} " + "down -v" + ) + hsystem.system(cmd, suppress_output=False) + else: + _LOG.warning("Leaving DB up") + if not hunitest.get_incremental_tests(): + os.unlink(cls._get_compose_file()) + os.unlink(cls._get_db_env_path()) + + @classmethod + @abc.abstractmethod + def get_id(cls) -> int: + """ + Return a unique ID to create an OMS instance. + + This ID is used to generate Docker compose / env files and + services, so that we can avoid collisions in case of parallel + execution. + + This function is specified by the unit test in a way that is + unique to each test. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _get_compose_file(cls) -> str: + """ + Get path to Docker compose file. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _get_service_name(cls) -> str: + """ + Get service name. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _get_db_env_path(cls) -> str: + """ + Get path to env file that contains DB connection parameters. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _create_docker_files(cls) -> str: + """ + Create the compose and env file for the DB run. + """ + raise NotImplementedError + + @classmethod + @abc.abstractmethod + def _get_postgres_db(cls) -> str: + """ + Return the name of the postgres DB to use (e.g., im_postgres_db_local). + """ + raise NotImplementedError + + +# ############################################################################# +# TestImOmsDbHelper +# ############################################################################# + + +class TestImOmsDbHelper(TestDbHelper, abc.ABC): + # TODO(gp): Rewrite building a YAML with a package. + @classmethod + def _create_docker_files(cls) -> None: + # Create compose file. + service_name = cls._get_service_name() + idx = cls.get_id() + host_port = 5432 + idx + txt = f"""version: '3.5' +services: + # Docker container running Postgres DB. + {service_name}: + image: postgres:13 + restart: "no" + environment:""" + if not hserver.use_docker_db_container_name_to_connect(): + # Use the port to connect. + txt += f""" + - POSTGRES_HOST=${{POSTGRES_HOST}} + - POSTGRES_DB=${{POSTGRES_DB}} + - POSTGRES_PORT=${{POSTGRES_PORT}} + - POSTGRES_USER=${{POSTGRES_USER}} + - POSTGRES_PASSWORD=${{POSTGRES_PASSWORD}} + volumes: + - {service_name}_data:/var/lib/postgresql/data + ports: + - {host_port}:5432""" + else: + # Do not use the port to connect. + txt += f""" + - POSTGRES_HOST=${{POSTGRES_HOST}} + - POSTGRES_DB=${{POSTGRES_DB}} + - POSTGRES_USER=${{POSTGRES_USER}} + - POSTGRES_PASSWORD=${{POSTGRES_PASSWORD}} + volumes: + - {service_name}_data:/var/lib/postgresql/data""" + # + txt += f""" +volumes: + {service_name}_data: {{}} + +networks: + default: + #name: {service_name}_network + name: main_network""" + compose_file_name = cls._get_compose_file() + hio.to_file(compose_file_name, txt) + # Create env file. + txt = [] + if not hserver.use_docker_db_container_name_to_connect(): + if hserver.is_dev4(): + host = "cf-spm-dev4" + else: + # host = os.environ["CSFY_HOST_NAME"] + host = "localhost" + else: + # Use the service name, e.g., `im_postgres...`. + host = service_name + postgres_db = cls._get_postgres_db() + txt.append(f"POSTGRES_HOST={host}") + txt.append(f"POSTGRES_DB={postgres_db}") + if not hserver.use_docker_db_container_name_to_connect(): + txt.append(f"POSTGRES_PORT={host_port}") + txt.append("POSTGRES_USER=aljsdalsd") + txt.append("POSTGRES_PASSWORD=alsdkqoen") + txt = "\n".join(txt) + env_file_name = cls._get_db_env_path() + hio.to_file(env_file_name, txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py new file mode 100644 index 000000000..a56f9b0a1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py @@ -0,0 +1,176 @@ +""" +Import as: + +import helpers.hstring as hstring +""" + +import logging +import os +import re +import tempfile +from typing import List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def remove_prefix(string: str, prefix: str, assert_on_error: bool = True) -> str: + if string.startswith(prefix): + res = string[len(prefix) :] + else: + res = string + if assert_on_error: + raise RuntimeError( + f"string='{string}' doesn't start with prefix ='{prefix}'" + ) + return res + + +def remove_suffix(string: str, suffix: str, assert_on_error: bool = True) -> str: + if string.endswith(suffix): + res = string[: -len(suffix)] + else: + res = string + if assert_on_error: + raise RuntimeError( + f"string='{string}' doesn't end with suffix='{suffix}'" + ) + return res + + +def diff_strings( + txt1: str, + txt2: str, + txt1_descr: Optional[str] = None, + txt2_descr: Optional[str] = None, + width: int = 130, +) -> str: + # Write file. + def _to_file(txt: str, txt_descr: Optional[str]) -> str: + file_name = tempfile.NamedTemporaryFile().name + if txt_descr is not None: + txt = "# " + txt_descr + "\n" + txt + hio.to_file(file_name, txt) + return file_name + + file_name1 = _to_file(txt1, txt1_descr) + file_name2 = _to_file(txt2, txt2_descr) + # Get the difference between the files. + cmd = f"sdiff --width={width} {file_name1} {file_name2}" + _, txt = hsystem.system_to_string( + cmd, + # We don't care if they are different. + abort_on_error=False, + ) + return txt + + +# TODO(gp): GFI. Move to hpython_code.py +def get_docstring_line_indices(lines: List[str]) -> List[int]: + """ + Get indices of lines of code that are inside (doc)strings. + + :param lines: the code lines to check + :return: the indices of docstrings + """ + docstring_line_indices = [] + quotes = {'"""': False, "'''": False, "```": False} + for i, line in enumerate(lines): + # Determine if the current line is inside a (doc)string. + for quote in quotes: + quotes_matched = re.findall(quote, line) + for q in quotes_matched: + # Switch the docstring flag. + # pylint: disable=modified-iterating-dict + quotes[q] = not quotes[q] + if q in ('"""', "'''") and not quotes[q]: + # A triple-quote has just been closed. + # Reset the triple backticks flag. + quotes["```"] = False + if any(quotes.values()): + # Store the index if the quotes have been opened but not closed yet. + docstring_line_indices.append(i) + return docstring_line_indices + + +def get_docstrings(lines: List[str]) -> List[List[int]]: + """ + Get line indices grouped together by the docstring they belong to. + + :param lines: lines from the file to process + :return: grouped lines within docstrings + """ + # Get indices of lines that are within docstrings. + doc_indices = get_docstring_line_indices(lines) + # Group these indices into consecutive docstrings. + docstrings = [] + if doc_indices: + current_docstring = [doc_indices[0]] + for idx in doc_indices[1:]: + if idx == current_docstring[-1] + 1: + current_docstring.append(idx) + else: + docstrings.append(current_docstring) + current_docstring = [idx] + docstrings.append(current_docstring) + return docstrings + + +# TODO(gp): GFI. Move to hpython_code.py +def get_code_block_line_indices(lines: List[str]) -> List[int]: + """ + Get indices of lines that are inside code blocks. + + Code blocks are lines surrounded by triple backticks, e.g., + ``` + This line. + ``` + Note that the backticks need to be the leftmost element of their line. + + :param lines: the lines to check + :return: the indices of code blocks + """ + code_block_line_indices = [] + quotes = {"```": False} + for i, line in enumerate(lines): + # Determine if the current line is inside a code block. + for quote in quotes: + quotes_matched = re.findall(rf"^\s*({quote})", line) + for q in quotes_matched: + # Switch the flag. + # pylint: disable=modified-iterating-dict + quotes[q] = not quotes[q] + if any(quotes.values()): + # Store the index if the quotes have been opened but not closed yet. + code_block_line_indices.append(i) + return code_block_line_indices + + +def extract_version_from_file_name(file_name: str) -> Tuple[int, int]: + """ + Extract version number from filename_vXX.json file. + + E.g. + - 'universe_v3.1.json' -> (3, 1) + - 'universe_v1.json' -> (1, 0) + - 'dataset_schema_v3.json' -> (3, 0) + + Currently only JSON file extension is supported. + + :param file_name: file to extract version part from + :return: file version tuple in format (major, minor) + """ + basename = os.path.basename(file_name).rstrip(".json") + m = re.search(r"v(\d+(\.\d+)?)$", basename) + hdbg.dassert( + m, + "Can't parse file '%s', correct format is e.g. 'universe_v03.json'.", + basename, + ) + # Groups return tuple. + version = m.groups(1)[0].split(".") # type: ignore[arg-type, union-attr] + major, minor = int(version[0]), 0 if len(version) == 1 else int(version[1]) + return major, minor diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py new file mode 100644 index 000000000..b63bd34f4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py @@ -0,0 +1,1097 @@ +""" +Contain all the code needed to interact with the outside world, e.g., through +system commands, env vars, ... + +Import as: + +import helpers.hsystem as hsystem +""" + +import contextlib +import datetime +import getpass +import glob +import logging +import os +import re +import signal +import subprocess +import sys +import time +from typing import ( + Any, + Callable, + Generator, + List, + Match, + Optional, + Tuple, + Union, + cast, +) + +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hprint as hprint +import helpers.hserver as hserver + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + + +_LOG = logging.getLogger(__name__) + +# Set logging level of this file higher to avoid too much chatter. +_LOG.setLevel(logging.INFO) + +# ############################################################################# + + +# TODO(gp): Move to hdatetime.py and maybe merge with `timestamp_to_str()`. +def get_timestamp() -> str: + timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + return timestamp + + +# TODO(gp): Maybe move to hserver.py +def is_running_in_ipynb() -> bool: + # From https://stackoverflow.com/questions/15411967 + try: + _ = get_ipython().config # type: ignore + res = True + except NameError: + res = False + return res + + +# ############################################################################# + +_USER_NAME = None + + +def set_user_name(user_name: str) -> None: + """ + To impersonate a user. + + To use only in rare cases for testing or back-door. + """ + _LOG.warning("Setting user to '%s'", user_name) + global _USER_NAME + _USER_NAME = user_name + + +def get_user_name() -> str: + if _USER_NAME is None: + res = getpass.getuser() + else: + res = _USER_NAME + hdbg.dassert_ne(res, "") + return res + + +def get_server_name() -> str: + res = os.uname() + # posix.uname_result( + # sysname='Darwin', + # nodename='gpmac.lan', + # release='18.2.0', + # version='Darwin Kernel Version 18.2.0: Mon Nov 12 20:24:46 PST 2018; + # root:xnu-4903.231.4~2/RELEASE_X86_64', + # machine='x86_64') + # This is not compatible with python2.7 + # return res.nodename + return res[1] + + +def get_os_name() -> str: + res = os.uname() + # This is not compatible with python2.7 + # return res.sysname + return res[0] + + +def get_env_var(env_var_name: str) -> str: + if env_var_name not in os.environ: + msg = f"Can't find '{env_var_name}': re-run dev_scripts/setenv.sh?" + _LOG.error(msg) + raise RuntimeError(msg) + return os.environ[env_var_name] + + +# ############################################################################# +# system(), system_to_string() +# ############################################################################# + + +# pylint: disable=too-many-branches,too-many-statements,too-many-arguments,too-many-locals +def _system( + cmd: str, + print_command: bool, + abort_on_error: bool, + suppress_error: Optional[Any], + suppress_output: Union[bool, str], + blocking: bool, + wrapper: Optional[Any], + output_file: Optional[Any], + num_error_lines: Optional[int], + tee: bool, + dry_run: bool, + log_level: Union[int, str], +) -> Tuple[int, str]: + """ + Execute a shell command. + + To print the command and see the output call this as: + ``` + _system(cmd, suppress_output=False, log_level="echo") + ``` + + See `system()` for options. + """ + _LOG.debug(hprint.func_signature_to_str()) + _LOG.debug("##> %s", cmd) + orig_cmd = cmd[:] + _LOG.debug("orig_cmd=%s", orig_cmd) + # Handle `suppress_output`. + hdbg.dassert_in(suppress_output, ("ON_DEBUG_LEVEL", True, False)) + if suppress_output == "ON_DEBUG_LEVEL": + # Show the output if we are at (or lower than) DEBUG level, since + # logging.DEBUG=10 and logging.INFO=20. + show_output = _LOG.getEffectiveLevel() <= logging.DEBUG + suppress_output = not show_output + _LOG.debug(hprint.to_str("suppress_output")) + # Prepare the command line. + cmd = f"({cmd})" + hdbg.dassert_imply(tee, output_file is not None) + if output_file is not None: + # Redirect to a file. + dir_name = os.path.dirname(output_file) + if not dir_name: + dir_name = "." + if not os.path.exists(dir_name): + _LOG.debug("Dir '%s' doesn't exist: creating", dir_name) + hdbg.dassert(bool(dir_name), "dir_name='%s'", dir_name) + os.makedirs(dir_name) + if tee: + cmd += f" 2>&1 | tee -a {output_file};" + cmd += " exit ${PIPESTATUS[0]}" + else: + cmd += f" 2>&1 >{output_file}" + else: + # Do not redirect to a file. + cmd += " 2>&1" + # Handle `wrapper`. + if wrapper: + cmd = wrapper + " && " + cmd + # Handle `log_level`. + # TODO(gp): Make it "ECHO" or "PRINT". + if isinstance(log_level, str): + hdbg.dassert_in(log_level, ("echo", "echo_frame")) + if log_level == "echo_frame": + print(hprint.frame(f"> {cmd}")) + elif log_level == "echo": + print(f"> {cmd}") + else: + raise ValueError(f"Invalid log_level='{log_level}'") + _LOG.debug("> %s", cmd) + else: + _LOG.log(log_level, "> %s", cmd) + output = "" + # Handle `dry_run`. + if dry_run: + _LOG.warning("As per user request, not executing command:\n%s", cmd) + rc = 0 + return rc, output + # Execute the command. + try: + stdout = subprocess.PIPE + stderr = subprocess.STDOUT + if print_command: + _LOG.info("> %s", cmd) + with subprocess.Popen( + cmd, + shell=True, + executable="/bin/bash", + stdout=stdout, + stderr=stderr, + ) as p: + output = "" + if blocking: + # Blocking call: get the output. + while True: + line = p.stdout.readline().decode("utf-8", errors="replace") # type: ignore + if not line: + break + if not suppress_output: + # print(" ==> " + line.rstrip("\n")) + print(" ... " + line.rstrip("\n")) + output += line + p.stdout.close() # type: ignore + rc = p.wait() + else: + # Not blocking. + # Wait until process terminates (without using p.wait()). + max_cnt = 20 + cnt = 0 + while p.poll() is None: + # Process hasn't exited yet, let's wait some time. + time.sleep(0.1) + cnt += 1 + _LOG.debug("cnt=%s, rc=%s", cnt, p.returncode) + if cnt > max_cnt: + break + if cnt > max_cnt: + # Timeout: we assume it worked. + rc = 0 + else: + rc = p.returncode + if suppress_error is not None: + hdbg.dassert_isinstance(suppress_error, set) + if rc in suppress_error: + rc = 0 + except OSError as e: + rc = -1 + _LOG.error("error=%s", str(e)) + _LOG.debug(" ==> rc=%s", rc) + if abort_on_error and rc != 0: + # Report the last `num_error_lines` of the output. + num_error_lines = num_error_lines or 30 + output_error = "\n".join(output.split("\n")[-num_error_lines:]) + msg = [] + msg.append("\n" + hprint.frame("_system() failed", thickness=2)) + msg.append(hprint.func_signature_to_str()) + msg.append(hprint.frame(f"cmd='{cmd}'", char1="%", thickness=1)) + msg.append(f"- rc='{rc}'") + msg.append(f"- output='\n{output_error}'") + # Save the output in a file. + file_name = "tmp.system_output.txt" + with open(file_name, "w") as f: + f.write(output) + msg.append(f"- Output saved in '{file_name}'") + # Save the command in an executable file. + file_name = "tmp.system_cmd.sh" + msg.append(f"- Command saved in '{file_name}'") + with open(file_name, "w") as f: + f.write(cmd) + os.chmod(file_name, 0o755) + # + msg = "\n".join(msg) + raise RuntimeError(msg) + # hdbg.dassert_type_in(output, (str, )) + return rc, output + + +# pylint: disable=too-many-arguments +def system( + cmd: str, + *, + print_command: bool = False, + abort_on_error: bool = True, + suppress_error: Optional[Any] = None, + suppress_output: Union[str, bool] = "ON_DEBUG_LEVEL", + blocking: bool = True, + wrapper: Optional[Any] = None, + output_file: Optional[Any] = None, + num_error_lines: Optional[int] = None, + tee: bool = False, + dry_run: bool = False, + log_level: Union[int, str] = logging.DEBUG, +) -> int: + """ + Execute a shell command, without capturing its output. + + :param cmd: string with command to execute + :param print_command: whether to print the command using `_LOG.info()` + :param abort_on_error: whether we should assert in case of error or not + :param suppress_error: set of error codes to suppress + :param suppress_output: whether to print the output or not + - If "ON_DEBUG_LEVEL" then print the output if the log level is DEBUG + :param blocking: blocking system call or not + :param wrapper: another command to prepend the execution of cmd + :param output_file: redirect stdout and stderr to this file + :param num_error_lines: number of lines of the output to display when + raising `RuntimeError` + :param tee: if True, tee append (i.e., `tee -a`) stdout and stderr to + `output_file` + :param dry_run: print the final command but not execute it + :param log_level: print the command to execute at level "log_level". + - If `echo` then print the command line to screen as `print()` and not + logging + :return: + - return code as int + - output of the command as str + """ + # print("cmd=", cmd) + # print("suppress_output=", suppress_output) + cmd = hprint.dedent(cmd) + rc, _ = _system( + cmd, + print_command=print_command, + abort_on_error=abort_on_error, + suppress_error=suppress_error, + suppress_output=suppress_output, + blocking=blocking, + wrapper=wrapper, + output_file=output_file, + num_error_lines=num_error_lines, + tee=tee, + dry_run=dry_run, + log_level=log_level, + ) + return rc + + +# def _system_to_string(cmd): +# py_ver = sys.version_info[0] +# if py_ver == 2: +# txt = subprocess.check_output(cmd) +# elif py_ver == 3: +# txt = subprocess.getoutput(cmd) +# else: +# raise RuntimeError("Invalid py_ver=" + py_ver) +# txt = [f for f in txt.split("\n") if f] +# hdbg.dassert_eq(len(txt), 1) +# return txt[0] + + +def system_to_string( + cmd: str, + *, + print_command: bool = False, + abort_on_error: bool = True, + suppress_output: Union[bool, str] = "ON_DEBUG_LEVEL", + wrapper: Optional[Any] = None, + dry_run: bool = False, + log_level: Union[int, str] = logging.DEBUG, +) -> Tuple[int, str]: + """ + Execute a shell command and capture its output. + + See _system() for options. + """ + rc, output = _system( + cmd, + print_command=print_command, + abort_on_error=abort_on_error, + suppress_error=None, + suppress_output=suppress_output, + # If we want to see the output the system call must be blocking. + blocking=True, + wrapper=wrapper, + output_file=None, + num_error_lines=None, + tee=False, + dry_run=dry_run, + log_level=log_level, + ) + output = output.rstrip("\n") + return rc, output + + +# ############################################################################# +# system_to_one_line() +# ############################################################################# + + +def get_first_line(output: str) -> str: + """ + Return the first (and only) line from a string. + + This is used when calling system_to_string() and expecting a single + line output. + """ + output = hprint.remove_empty_lines(output) + output_as_arr: List[str] = output.split("\n") + # Remove the annoying spurious matches under `tmp.base`. + output_as_arr = [line for line in output_as_arr if "/tmp.base/" not in line] + hdbg.dassert_eq(len(output_as_arr), 1, "output='%s'", output) + output = output_as_arr[0] + output = output.rstrip().lstrip() + return output + + +# TODO(gp): Move it to a more general file, e.g., `helpers/printing.py`? +def text_to_list(txt: str) -> List[str]: + """ + Convert a string (e.g., from system_to_string) into a list of lines. + """ + res = [line.rstrip().lstrip() for line in txt.split("\n")] + res = [line for line in res if line != ""] + return res + + +def system_to_one_line(cmd: str, *args: Any, **kwargs: Any) -> Tuple[int, str]: + """ + Execute a shell command, capturing its output (expected to be a single + line). + + This is a thin wrapper around system_to_string(). + """ + rc, output = system_to_string(cmd, *args, **kwargs) + output = get_first_line(output) + return rc, output + + +# ############################################################################# +# system_to_files() +# ############################################################################# + + +def to_normal_paths(files: List[str]) -> List[str]: + files = list(map(os.path.normpath, files)) + return files + + +def to_absolute_paths(files: List[str]) -> List[str]: + files = list(map(os.path.abspath, files)) + return files + + +def _remove_files_non_present(files: List[str]) -> List[str]: + """ + Return list of files from `files` excluding the files that don't exist. + """ + files_tmp = [] + for f in files: + if os.path.exists(f): + files_tmp.append(f) + else: + _LOG.warning("File '%s' doesn't exist: skipping", f) + return files_tmp + + +def remove_dirs(files: List[str]) -> List[str]: + """ + Return list of files from `files` excluding the files that are directories. + """ + files_tmp: List[str] = [] + dirs_tmp: List[str] = [] + for file in files: + if os.path.isdir(file): + _LOG.debug("file='%s' is a dir: skipping", file) + dirs_tmp.append(file) + else: + files_tmp.append(file) + if dirs_tmp: + _LOG.warning("Removed dirs: %s", ", ".join(dirs_tmp)) + return files_tmp + + +def select_result_file_from_list( + files: List[str], mode: str, file_name: str +) -> List[str]: + """ + Select a file from a list according to various approaches encoded in + `mode`. + + :param files: list of files to select from + :param file_name: name of the file we are looking for + :param mode: + - "return_all_results": return the list of files, whatever it is + - "assert_unless_one_result": assert unless there is a single file and return + the only file. Note that we still return a list to keep the interface + simple. + """ + res: List[str] = [] + if mode == "assert_unless_one_result": + # Expect to have a single result and return that. + if len(files) == 0: + hdbg.dfatal(f"mode={mode}: didn't find file {file_name}") + elif len(files) > 1: + hdbg.dfatal( + f"mode={mode}: found multiple files:\n" + "\n".join(files) + ) + res = [files[0]] + elif mode == "return_all_results": + # Return all files. + res = files + else: + hdbg.dfatal(f"Invalid mode='{mode}'") + return res + + +def system_to_files( + cmd: str, + dir_name: Optional[str] = None, + remove_files_non_present: bool = False, + mode: str = "return_all_results", +) -> List[str]: + """ + Execute command `cmd` in `dir_name` and return the output as a list of + strings. + + :param remove_files_non_present: remove files that don't exist on + the filesystem + :param mode: like in `select_result_file_from_list()` + """ + if dir_name is None: + dir_name = "." + hdbg.dassert_dir_exists(dir_name) + cmd = f"cd {dir_name} && {cmd}" + _, output = system_to_string(cmd) + # Remove empty lines. + _LOG.debug("output=\n%s", output) + files = output.split("\n") + files = [line.rstrip().rstrip() for line in files] + files = [line for line in files if line != ""] + _LOG.debug("files=%s", " ".join(files)) + # Convert to normalized paths. + files = [os.path.join(dir_name, f) for f in files] + files: List[str] = list(map(os.path.normpath, files)) # type: ignore + _LOG.debug(hprint.to_str("files")) + # Remove non-existent files, if needed. + if remove_files_non_present: + files = _remove_files_non_present(files) + # Process output. + files = select_result_file_from_list(files, mode, cmd) + return files + + +# ############################################################################# +# Functions handling processes +# ############################################################################# + + +def get_process_pids( + keep_line: Callable[[str], bool], +) -> Tuple[List[int], List[str]]: + """ + Find all the processes corresponding to `ps ax` filtered line by line with + `keep_line()`. + + :return: list of pids and filtered output of `ps ax` + """ + cmd = "ps ax" + rc, txt = system_to_string(cmd, abort_on_error=False) + _LOG.debug("txt=\n%s", txt) + pids: List[int] = [] + txt_out: List[str] = [] + if rc == 0: + for line in txt.split("\n"): + _LOG.debug("line=%s", line) + # PID TT STAT TIME COMMAND + if "PID" in line and "TT" in line and "STAT" in line: + txt_out.append(line) + continue + keep = keep_line(line) + _LOG.debug(" keep=%s", keep) + if not keep: + continue + # > ps ax | grep 'ssh -i' | grep localhost + # 19417 ?? Ss 0:00.39 ssh -i /Users/gp/.ssh/id_rsa -f -nNT \ + # -L 19999:localhost:19999 gp@54.172.40.4 + fields = line.split() + try: + pid = int(fields[0]) + except ValueError as e: + _LOG.error( + "Can't parse fields '%s' from line '%s'", fields, line + ) + raise e + _LOG.debug("pid=%s", pid) + pids.append(pid) + txt_out.append(line) + return pids, txt_out + + +def kill_process( + get_pids: Callable[[], Tuple[List[int], str]], + timeout_in_secs: int = 5, + polltime_in_secs: float = 0.1, +) -> None: + """ + Kill all the processes returned by the function `get_pids()`. + + :param timeout_in_secs: how many seconds to wait at most before + giving up + :param polltime_in_secs: how often to check for dead processes + """ + import tqdm + + pids, txt = get_pids() + _LOG.info("Killing %d pids (%s)\n%s", len(pids), pids, "\n".join(txt)) + if not pids: + return + for pid in pids: + try: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError as e: + _LOG.warning(str(e)) + # + _LOG.info("Waiting %d processes (%s) to die", len(pids), pids) + for _ in tqdm.tqdm( + range(int(timeout_in_secs / polltime_in_secs)), desc="Polling process" + ): + time.sleep(polltime_in_secs) + pids, _ = get_pids() + if not pids: + break + pids, txt = get_pids() + hdbg.dassert_eq(len(pids), 0, "Processes are still alive:%s", "\n".join(txt)) + _LOG.info("Processes dead") + + +# ############################################################################# +# User interaction +# ############################################################################# + + +def query_yes_no(question: str, *, abort_on_no: bool = True) -> bool: + """ + Ask a yes/no question via `input()` and return their answer. + + :param question: string with the question presented to the user + :param abort_on_no: exit if the user answers "no" + :return: True for "yes" or False for "no" + """ + hdbg.dassert_isinstance(question, str) + hdbg.dassert_isinstance(abort_on_no, bool) + valid = { + "yes": True, + "y": True, + # + "no": False, + "n": False, + } + prompt = " [y/n] " + while True: + sys.stdout.write(question + prompt) + choice = input().lower() + if choice in valid: + ret = valid[choice] + break + _LOG.debug("ret=%s", ret) + if abort_on_no: + if not ret: + print("You answer no: exiting") + sys.exit(-1) + return ret + + +def press_enter_to_continue(prompt: str = "") -> None: + hdbg.dassert_isinstance(prompt, str) + if not prompt: + prompt = "Press Enter to continue..." + sys.stdout.write(prompt) + _ = input() + + +# ############################################################################# +# Functions similar to Linux commands. +# ############################################################################# + + +def check_exec(tool: str) -> bool: + """ + Check if an executable can be executed. + + :return: True if the executables "tool" can be executed. + """ + suppress_output = _LOG.getEffectiveLevel() > logging.DEBUG + cmd = f"which {tool}" + abort_on_error = False + rc = system( + cmd, + abort_on_error=abort_on_error, + suppress_output=suppress_output, + log_level=logging.DEBUG, + ) + return rc == 0 + + +def to_pbcopy(txt: str, pbcopy: bool) -> None: + """ + Save the content of txt in the system clipboard. + """ + txt = txt.rstrip("\n") + if not pbcopy: + print(txt) + return + if not txt: + print("Nothing to copy") + return + if hserver.is_host_mac(): + # -n = no new line + cmd = f"echo -n '{txt}' | pbcopy" + system(cmd) + _LOG.warning("\n# Copied to system clipboard:\n%s", txt) + else: + _LOG.warning("pbcopy works only on macOS") + print(txt) + + +# ############################################################################# + +# Copied from hgit to avoid import cycles. + + +def _find_git_root(path: str = ".") -> str: + """ + Find recursively the dir of the outermost super module. + + This function traverses the directory hierarchy upward from a specified + starting path to find the root directory of a Git repository. + It supports: + - standard git repository: where a `.git` directory exists at the root + - submodule: where repository is nested inside another, and the `.git` file contains + a `gitdir:` reference to the submodule's actual Git directory + - linked repositories: where the `.git` file points to a custom Git directory + location, such as in Git worktrees or relocated `.git` directories + + :param path: starting file system path. Defaults to the current directory (".") + :return: absolute path to the top-level Git repository directory + """ + path = os.path.abspath(path) + git_root_dir = None + while True: + git_dir = os.path.join(path, ".git") + _LOG.debug("git_dir=%s", git_dir) + # Check if `.git` is a directory which indicates a standard Git repository. + if os.path.isdir(git_dir): + # Found the Git root directory. + git_root_dir = path + break + # Check if `.git` is a file which indicates submodules or linked setups. + if os.path.isfile(git_dir): + # Using the `open()` to avoid import cycles with the `hio` module. + with open(git_dir, "r") as f: + txt = f.read() + lines = txt.split("\n") + for line in lines: + # Look for a `gitdir:` line that specifies the linked directory. + # Example: `gitdir: ../.git/modules/helpers_root`. + if line.startswith("gitdir:"): + git_dir_path = line.split(":", 1)[1].strip() + _LOG.debug("git_dir_path=%s", git_dir_path) + # Resolve the relative path to the absolute path of the Git directory. + abs_git_dir = os.path.abspath( + os.path.join(path, git_dir_path) + ) + # Traverse up to find the top-level `.git` directory. + while True: + # Check if the current directory is a `.git` directory. + if os.path.basename(abs_git_dir) == ".git": + git_root_dir = os.path.dirname(abs_git_dir) + # Found the root. + break + # Move one level up in the directory structure. + parent = os.path.dirname(abs_git_dir) + # Reached the filesystem root without finding the `.git` directory. + hdbg.dassert_ne( + parent, + abs_git_dir, + "Top-level .git directory not found.", + ) + # Continue traversing up. + abs_git_dir = parent + break + # Exit the loop if the Git root directory is found. + if git_root_dir is not None: + break + # Move up one level in the directory hierarchy. + parent = os.path.dirname(path) + # Reached the filesystem root without finding `.git`. + hdbg.dassert_ne( + parent, + path, + "No .git directory or file found in any parent directory.", + ) + # Update the path to the parent directory for the next iteration. + path = parent + return git_root_dir + + +# End copy. + + +def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: + """ + Find file in the repo. + """ + if root_dir is None: + root_dir = _find_git_root() + _, file_name_out = system_to_one_line( + rf"find {root_dir} -name {file_name} -not -path '*/\.git/*'" + ) + hdbg.dassert_ne(file_name_out, "", "File not found in repo: '%s'", file_name) + return file_name_out + + +# TODO(gp): Use find_file +def _find_file(filename: str, *, search_path: str = ".") -> Optional[str]: + """ + Find a file in a directory and report its absolute path. + + :param filename: the name of the file to find (e.g., "helpers_root") + :param search_path: the directory to search in (e.g., "/Users/saggese/src/helpers1") + :return: the absolute path of the file + """ + # Recursive glob. + search_path = os.path.join(search_path, "**", filename) + files = glob.glob(search_path, recursive=True) + if len(files) == 1: + return files[0] + elif len(files) > 1: + msg = f"Found multiple files with basename '{filename}' in directory '{search_path}':\n" + msg += "\n".join(files) + raise RuntimeError(msg) + else: + return None + + +# TODO(gp): -> find_path_greedily +def find_path( + path: str, *, dir_name: str = ".", abort_on_error: bool = False +) -> str: + """ + Find a path in a directory and report its absolute path. + + :param path: the path to find (e.g., "system_tools/path.py") + :param dir_name: the directory to search in (e.g., "/Users/saggese/src/helpers1") + :param abort_on_error: if True, raise an error if the path doesn't exist + :return: the absolute path of the path + """ + # Make the path absolute. + path_out = os.path.abspath(path) + # If the path exists, return it. + if os.path.exists(path_out): + return path_out + # If the path doesn't exist, abort. + if abort_on_error: + msg = f"path '{path}' doesn't exist in '{dir_name}'" + raise RuntimeError(msg) + # Look for a file with the same basename in ``dir_name``. + dir_name = os.path.abspath(dir_name) + basename = os.path.basename(path) + path_out = _find_file(basename, search_path=dir_name) + # If the file doesn't exist, abort. + if path_out is None: + msg = f"path '{path}' doesn't exist in '{dir_name}'" + raise RuntimeError(msg) + return path_out + + +# TODO(Nikola): Use filesystem's `du` and move to `hio` instead? +def du(path: str, human_format: bool = False) -> Union[int, str]: + """ + Return the size in bytes of a file or a directory (recursively). + + :param human_format: represent the size in KB, MB, ... instead of bytes + using `hintrospection.format_size()` + """ + hdbg.dassert_path_exists(path) + cmd = f"du -d 0 {path}" + " | awk '{print $1}'" + # > du -d 0 core + # 20 core + _, txt = system_to_one_line(cmd) + _LOG.debug("txt=%s", txt) + # `du` returns size in KB. + size_in_bytes = int(txt) * 1024 + size: Union[int, str] + if human_format: + size = hintros.format_size(size_in_bytes) + else: + size = size_in_bytes + return size + + +def _compute_file_signature(file_name: str, dir_depth: int) -> Optional[List]: + """ + Compute a signature for files using basename and `dir_depth` enclosing + dirs. + + :return: tuple of extracted enclosing dirs + - E.g., `("core", "dataflow_model", "utils.py")` + """ + # Split a file like: + # /app/amp/core/test/TestCheckSameConfigs.test_check_same_configs_error/output/test.txt + # into + # ['', 'app', 'amp', 'core', 'test', + # 'TestCheckSameConfigs.test_check_same_configs_error', 'output', 'test.txt'] + path = os.path.normpath(file_name) + paths = path.split(os.sep) + hdbg.dassert_lte(1, dir_depth) + if dir_depth > len(paths): + _LOG.warning( + "Can't compute signature of file_name='%s' with" + " dir_depth=%s, len(paths)=%s", + file_name, + dir_depth, + len(paths), + ) + signature = None + else: + signature = paths[-(dir_depth + 1) :] + return signature + + +# TODO(gp): -> hio.py +def find_file_with_dir( + file_name: str, + *, + root_dir: str = ".", + dir_depth: int = -1, + mode: str = "return_all_results", + candidate_files: Optional[List[str]] = None, +) -> List[str]: + """ + Find a file matching basename and several enclosing dir name starting from + `root_dir`. + + E.g., find a file matching `amp/core/dataflow_model/utils.py` with `dir_depth=1` + means looking for a file with basename 'utils.py' under a dir 'dataflow_model'. + + :param dir_depth: how many enclosing dirs in order to declare a match. + - `-1` to use as many enclosing dirs as possible. E.g., + `/app/amp/core/dataflow/utils.py` will use 3 levels, since `/app` is + removed + :param mode: control the returned list of files, like in + `select_result_file_from_list()` + :param candidate_files: list of results from the `find` command for unit test + mocking + :return: list of files found + """ + _LOG.debug(hprint.func_signature_to_str()) + # Find all the files in the dir with the same basename. + if candidate_files is None: + base_name = os.path.basename(file_name) + cmd = rf"find . -name '{base_name}' -not -path '*/\.git/*'" + # > find . -name "utils.py" + # ./amp/core/dataflow/utils.py + # ./amp/core/dataflow_model/utils.py + # ./amp/im/common/test/utils.py + mode_ = "return_all_results" + candidate_files = system_to_files(cmd, dir_name=root_dir, mode=mode_) + _LOG.debug("candidate files=\n%s", "\n".join(candidate_files)) + # + if dir_depth == -1: + # Remove "/app" if present. + prefix = "/app/" + if file_name.startswith(prefix): + file_name = file_name[len(prefix) :] + # Remove "amp" if present. + prefix = "amp/" + if file_name.startswith(prefix): + file_name = file_name[len(prefix) :] + # Count how many dirs levels there are. + dir_depth = len(os.path.normpath(file_name).split("/")) - 1 + _LOG.debug( + "inferred dir_depth=%s for file_name=%s", dir_depth, file_name + ) + # Check the matching files. + matching_files = [] + for candidate_file_name in sorted(candidate_files): + signature1 = _compute_file_signature(candidate_file_name, dir_depth) + signature2 = _compute_file_signature(file_name, dir_depth) + is_equal = signature1 == signature2 + _LOG.debug("found_file=%s -> is_equal=%s", candidate_file_name, is_equal) + if is_equal: + matching_files.append(candidate_file_name) + _LOG.debug( + "Found %d files:\n%s", len(matching_files), "\n".join(matching_files) + ) + # Select the result based on mode. + res = select_result_file_from_list(matching_files, mode, file_name) + _LOG.debug("-> res=%s", str(res)) + return res + + +# https://stackoverflow.com/questions/169070 +@contextlib.contextmanager +def cd(dir_name: str) -> Generator[None, None, None]: + """ + Context manager managing changing directory. + """ + hdbg.dassert_dir_exists(dir_name) + current_dir = os.getcwd() + _LOG.debug("Entering ctx manager: " + hprint.to_str("current_dir")) + try: + os.chdir(dir_name) + _LOG.debug("Switched to dir '%s'", os.getcwd()) + yield + finally: + _LOG.debug("Switching back to dir '%s'", current_dir) + os.chdir(current_dir) + _LOG.debug("Exiting ctx manager") + + +# ############################################################################# +# File timestamping. +# ############################################################################# + + +def has_timestamp(file_name: str) -> bool: + """ + Check whether `file_name` contains a timestamp. + + The timestamp is in the format `%Y%m%d-%H_%M_%S` (e.g., + 20210724-12_45_51). E.g., this function for + `experiment.RH1E.5T.20210724-12_45_51` returns True. + """ + file_name = os.path.basename(file_name) + # E.g., %Y%m%d-%H_%M_%S + # The separator is _, -, or nothing. + sep = "[-_]?" + regex = sep.join( + [r"\d{4}", r"\d{2}", r"\d{2}", r"\d{2}", r"\d{2}", r"\d{2}"] + ) + _LOG.debug("regex=%s", regex) + occurrences = re.findall(regex, file_name) + hdbg.dassert_lte( + len(occurrences), 1, "Found more than one timestamp", str(occurrences) + ) + m = re.search("(" + regex + ")", file_name) + has_timestamp_ = m is not None + if has_timestamp_: + m = cast(Match[str], m) + _LOG.debug("Found a timestamp '%s' in '%s'", m.group(1), file_name) + return has_timestamp_ + + +def append_timestamp_tag(file_name: str, tag: str) -> str: + """ + Add a tag and the current timestamp to a filename, before the extension. + + :return: new filename + """ + dir_name = os.path.dirname(file_name) + base_name = os.path.basename(file_name) + name, extension = os.path.splitext(base_name) + tag_ = "" + # E.g., 20210723-20_52_00 + if not has_timestamp(file_name): + import helpers.hdatetime as hdateti + + tag_ += "." + hdateti.get_current_timestamp_as_string(tz="ET") + # Add tag, if specified. + if tag: + # If the tag is specified prepend a `.` in the filename. + tag_ += "." + tag + new_file_name = os.path.join(dir_name, "".join([name, tag_, extension])) + _LOG.debug(hprint.to_str("file_name new_file_name")) + return new_file_name + + +def tee( + cmd: str, executable: str, abort_on_error: bool +) -> Tuple[int, List[str]]: + """ + Execute command and return its exit code and output lines. + + Captures output, removes empty lines, and optionally aborts on error. + + :param cmd: Command string to execute + :param executable: Executable to use for running the command + :param abort_on_error: Whether to abort execution if command fails + :return: Tuple of (exit code, list of non-empty output lines) + """ + _LOG.debug("cmd=%s executable=%s", cmd, executable) + rc, output = system_to_string(cmd, abort_on_error=abort_on_error) + hdbg.dassert_isinstance(output, str) + output1 = output.split("\n") + _LOG.debug("output1= (%d)\n'%s'", len(output1), "\n".join(output1)) + output2 = hprint.remove_empty_lines(output1) + _LOG.debug("output2= (%d)\n'%s'", len(output2), "\n".join(output2)) + hdbg.dassert_list_of_strings(output2) + return rc, output2 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py new file mode 100644 index 000000000..5278e3984 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py @@ -0,0 +1,180 @@ +""" +Import as: + +import helpers.htable as htable +""" + +import copy +import csv +import logging +from typing import Any, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + + +TableType = List[List[str]] + + +# ############################################################################# +# Table +# ############################################################################# + + +class Table: + """ + A simple (rectangular) table without introducing a dependency from Pandas. + + The element in the table can be anything. + """ + + @staticmethod + def _check_table(table: TableType, column_names: List[str]) -> None: + """ + Check that the table is well-formed (e.g., the list of lists is + rectangular). + """ + hdbg.dassert_isinstance(table, list) + hdbg.dassert_isinstance(column_names, list) + hdbg.dassert_no_duplicates(column_names) + # Columns have no leading or trailing spaces. + for column_name in column_names: + hdbg.dassert_eq(column_name, column_name.rstrip().lstrip()) + # Check that the list of lists is rectangular. + for row in table: + hdbg.dassert_isinstance(table, list) + hdbg.dassert_eq( + len(row), + len(column_names), + "Invalid row='%s' for cols='%s'", + row, + column_names, + ) + + def __repr__(self) -> str: + res = "" + res += f"cols={str(self._column_names)}" + res += "\ntable=\n" + "\n".join(map(str, self._table)) + res += "\n" + f"size={str(self.size())}" + return res + + def __init__(self, table: TableType, column_names: List[str]) -> None: + # Check that the inputs are well-formed. + self._check_table(table, column_names) + # Save state. + self._table = table + self._column_names = column_names + _LOG.debug("%s", self.__repr__()) + # Map a column name to the index of the corresponding column, to allow + # indexing by column. + self._col_to_idx = { + col: idx for idx, col in enumerate(self._column_names) + } + _LOG.debug("col_to_idx=%s", str(self._col_to_idx)) + + @classmethod + def from_text(cls, cols: List[str], txt: str, delimiter: str) -> "Table": + """ + Build a table from a list of columns and the body of a CSV file. + """ + hdbg.dassert_isinstance(txt, str) + table = list(csv.reader(txt.split("\n"), delimiter=delimiter)) + return cls(table, cols) + + def size(self) -> Tuple[int, int]: + """ + Return the size of the table. + + :return: number of rows x columns (i.e., numpy / Pandas convention) + """ + return len(self._table), len(self._column_names) + + def filter_rows(self, column_name: str, value: str) -> "Table": + """ + Return a Table filtered with rows filtered by the criteria "field == + value". + """ + _LOG.debug("self=\n%s", repr(self)) + # Filter the rows. + hdbg.dassert_in(column_name, self._col_to_idx.keys()) + rows_filter = [ + row + for row in self._table + if row[self._col_to_idx[column_name]] == value + ] + _LOG.debug(hprint.to_str("rows_filter")) + # Build the resulting table. + table_filter = Table(rows_filter, self._column_names) + _LOG.debug("table_filter=\n%s", repr(table_filter)) + return table_filter + + def get_column(self, column_name: str) -> List[Any]: + """ + Return the list of unique values for a row / field. + """ + hdbg.dassert_in(column_name, self._column_names) + column_idx = self._col_to_idx[column_name] + # Scan the rows to extract the column. + vals = [] + for row in self._table: + vals.append(row[column_idx]) + return vals + + def unique(self, column_name: str) -> List[Any]: + """ + Return a list of unique values for a field. + """ + vals = self.get_column(column_name) + vals = sorted(list(set(vals))) + return vals + + def remove_column(self, column_name: str) -> "Table": + """ + Return a new Table with the specified column removed. + + :param column_name: name of the column to remove + :return: new Table without the specified column + """ + hdbg.dassert_in(column_name, self._column_names) + # Find the index of the column to remove. + column_idx = self._col_to_idx[column_name] + # Create new column names list without the removed column. + new_column_names = [ + col for col in self._column_names if col != column_name + ] + # Create new table rows without the removed column. + new_table = [ + [val for idx, val in enumerate(row) if idx != column_idx] + for row in self._table + ] + # Build and return the new table. + return Table(new_table, new_column_names) + + def __str__(self) -> str: + """ + Return a string representing the table with columns aligned. + """ + table = copy.deepcopy(self._table) + table.insert(0, self._column_names) + # Convert the cells to strings. + table_as_str = [[str(cell) for cell in row] for row in table] + # Find the length of each columns. + lengths = [max(map(len, col)) for col in zip(*table_as_str)] + _LOG.debug(hprint.to_str("lengths")) + # Compute format for the columns. + fmt = " ".join(f"{{:{x}}} |" for x in lengths) + _LOG.debug(hprint.to_str("fmt")) + # Add the row separating the column names. + row_sep = ["-" * length for length in lengths] + table.insert(1, row_sep) + table_as_str = [[str(cell) for cell in row] for row in table] + # Format rows. + rows_as_str = [fmt.format(*row) for row in table_as_str] + # Remove trailing spaces. + rows_as_str = [row.rstrip() for row in rows_as_str] + # Create string. + res = "\n".join(rows_as_str) + # res += "\nsize=" + str(self.size()) + return res diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py new file mode 100644 index 000000000..8ef0e3a4f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +""" +Add a description of what the script does and examples of command lines. + +Check dev_scripts/linter.py to see an example of a script using this +template. + +Import as: + +import dev_scripts_helpers.script_template as dscscske +""" + +import argparse +import logging + +import helpers.hlogging as hloggin +import helpers.hparser as hparser + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("positional", nargs="*", help="...") + parser.add_argument("--dst_dir", action="store", help="Destination dir") + hparser.add_verbosity_arg(parser) + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hparser.parse_verbosity_args(args, use_exec_path=True) + hloggin.test_logger() + # + # logging.disable(logging.WARNING) + hloggin.shut_up_log_debug(_LOG) + hloggin.test_logger() + + +if __name__ == "__main__": + _main(_parse()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py new file mode 100644 index 000000000..7b6506ce6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py @@ -0,0 +1,262 @@ +""" +Utilities for protecting content during text processing. + +Extract and restore content that should not be modified by formatters and text +transformations (code blocks, comments, etc.). + +Import as: + +import helpers.htext_protect as htexprot +""" + +import logging +import re +from typing import Dict, List, Optional, Tuple + +import helpers.hdbg as hdbg + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Helper functions +# ############################################################################# + + +def _is_fenced_block_delimiter(line: str) -> bool: + """ + Check if line is a fenced block delimiter (```). + + :param line: Line to check + :return: True if line matches fenced block delimiter pattern + """ + return bool(re.match(r"^\s*```", line)) + + +def _is_math_block_delimiter(line: str) -> bool: + """ + Check if line is a math block delimiter ($$). + + :param line: Line to check + :return: True if line matches math block delimiter pattern + """ + return bool(re.match(r"^\s*\$\$\s*$", line)) + + +def _extract_single_line_html_comment(line: str) -> Optional[str]: + """ + Extract single-line HTML comment from line if present. + + Skips TOC markers ( and ) as they need to be + processed by the TOC generation logic. + + :param line: Line to check + :return: Full comment string if found, None otherwise + """ + # Skip TOC markers: they are processed by `refresh_toc`. + if "" in line or "" in line: + return None + # Match on single line. + m = re.match(r"^(\s*\s*)$", line) + if m: + return m.group(1) + return None + + +def _is_html_comment_start(line: str) -> bool: + """ + Check if line starts an HTML comment. + + Skips TOC markers as they need to be processed by TOC generation logic. + + :param line: Line to check + :return: True if line contains + """ + # Skip TOC markers. + if "" in line or "" in line: + return False + return "" not in line + + +def _is_html_comment_end(line: str) -> bool: + """ + Check if line ends an HTML comment. + + :param line: Line to check + :return: True if line contains --> without opening " in line and ") for .md and .txt files + - LaTeX comments (% ...) for .tex files + + :param lines: The lines to be processed + :param file_type: File extension ('md', 'txt', or 'tex') + :return: Tuple of (lines with placeholders, mapping of placeholders to + original content) + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_in(file_type, ["md", "txt", "tex"]) + _LOG.debug("Extracting protected content for file_type=%s", file_type) + # + protected_map: Dict[str, str] = {} + counter = 1 + lines_new: List[str] = [] + # State tracking. + in_fenced_block = False + in_math_block = False + in_html_comment = False + fenced_block_lines: List[str] = [] + math_block_lines: List[str] = [] + html_comment_lines: List[str] = [] + # Process each line. + for line in lines: + # Handle fenced blocks (for .md and .txt files). + if file_type in ["md", "txt"] and _is_fenced_block_delimiter(line): + if not in_fenced_block: + # Opening delimiter. + in_fenced_block = True + lines_new.append(line) + fenced_block_lines = [] + else: + # Closing delimiter: protect only content, keep delimiters visible. + placeholder = f"<<>>" + protected_map[placeholder] = "\n".join(fenced_block_lines) + counter += 1 + lines_new.append(placeholder) + lines_new.append(line) + in_fenced_block = False + fenced_block_lines = [] + continue + # Inside fenced block: accumulate. + if in_fenced_block: + fenced_block_lines.append(line) + continue + # Handle math blocks (for all file types). + if _is_math_block_delimiter(line): + if not in_math_block: + # Opening delimiter. + in_math_block = True + lines_new.append(line) + math_block_lines = [] + else: + # Closing delimiter: protect only content, keep delimiters visible. + placeholder = f"<<>>" + protected_map[placeholder] = "\n".join(math_block_lines) + counter += 1 + lines_new.append(placeholder) + lines_new.append(line) + in_math_block = False + math_block_lines = [] + continue + # Inside math block: accumulate. + if in_math_block: + math_block_lines.append(line) + continue + # Handle HTML comments (for .md and .txt files). + if file_type in ["md", "txt"]: + # Single-line HTML comment. + single_line_comment = _extract_single_line_html_comment(line) + if single_line_comment: + placeholder = f"<<>>" + protected_map[placeholder] = single_line_comment + counter += 1 + lines_new.append(placeholder) + continue + # Multi-line HTML comment start. + if _is_html_comment_start(line): + in_html_comment = True + html_comment_lines = [line] + continue + # Multi-line HTML comment end. + if in_html_comment and _is_html_comment_end(line): + html_comment_lines.append(line) + placeholder = f"<<>>" + protected_map[placeholder] = "\n".join(html_comment_lines) + counter += 1 + lines_new.append(placeholder) + in_html_comment = False + html_comment_lines = [] + continue + # Inside multi-line HTML comment: accumulate. + if in_html_comment: + html_comment_lines.append(line) + continue + # Handle LaTeX comments (for .tex files). + if file_type == "tex" and _is_latex_comment(line): + placeholder = f"<<>>" + protected_map[placeholder] = line + counter += 1 + lines_new.append(placeholder) + continue + # Regular line: keep as-is. + lines_new.append(line) + # Check for unclosed blocks. + if in_fenced_block: + _LOG.warning("Unclosed fenced block detected") + if in_math_block: + _LOG.warning("Unclosed math block detected") + if in_html_comment: + _LOG.warning("Unclosed HTML comment detected") + _LOG.debug("Extracted %d protected content blocks", len(protected_map)) + return lines_new, protected_map + + +def restore_protected_content( + lines: List[str], + protected_map: Dict[str, str], +) -> List[str]: + """ + Restore protected content by replacing placeholders with original text. + + :param lines: Lines containing placeholders + :param protected_map: Mapping of placeholders to original content + :return: Lines with restored content + """ + hdbg.dassert_isinstance(lines, list) + hdbg.dassert_isinstance(protected_map, dict) + _LOG.debug("Restoring %d protected content blocks", len(protected_map)) + # + lines_new: List[str] = [] + for line in lines: + # Check if line contains any placeholder. + restored = False + for placeholder, original in protected_map.items(): + if placeholder in line: + if line.strip() == placeholder: + # Placeholder is entire line: replace with multi-line content. + lines_new.extend(original.split("\n")) + restored = True + break + else: + # Placeholder embedded in line: replace inline. + line = line.replace(placeholder, original) + if not restored: + lines_new.append(line) + return lines_new diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py new file mode 100644 index 000000000..31cd642cf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +""" +`timeout` decorator which is used to limit function execution time. + +Import as: + +import helpers.hthreading as hthread +""" + +import _thread +import sys +import threading +from typing import Any + + +def _timeout_handler() -> None: + sys.stderr.flush() + # Raise KeyboardInterrupt. + _thread.interrupt_main() + + +def timeout(timeout_sec: int) -> Any: + """ + Exit process if its execution takes longer than timeout_sec seconds. This + is a decorator that issue a KeyboardInterrupt, that will be raised if time + limit is exceed. + + :param timeout_sec: time limit + """ + + def outer(fn: Any) -> Any: + def inner(*args: Any, **kwargs: Any) -> Any: + timer = threading.Timer(timeout_sec, _timeout_handler) + timer.start() + try: + result = fn(*args, **kwargs) + finally: + timer.cancel() + return result + + return inner + + return outer diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py new file mode 100644 index 000000000..c3aed5e80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py @@ -0,0 +1,275 @@ +""" +Import as: + +import helpers.htimer as htimer +""" + +import logging +import time +from typing import Any, Callable, Optional, Tuple, cast + +import helpers.hdbg as hdbg +import helpers.hlogging as hloggin + +# Avoid dependency from other `helpers` modules to prevent import cycles. + + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Timer +# ############################################################################# + + +class Timer: + """ + Measure time elapsed in one or more intervals. + """ + + def __init__(self, *, start_on_creation: bool = True): + """ + Create a timer. + + If "start_on_creation" is True start automatically the timer. + """ + self._stop: Optional[float] = None + # Store the time for the last elapsed interval. + self._last_elapsed: Optional[float] = None + # Store the total time for all the measured intervals. + self._total_elapsed = 0.0 + if start_on_creation: + # For better accuracy start the timer as last action, after all the + # bookkeeping. + self._start: Optional[float] = time.time() + else: + self._start = None + + def stop(self) -> None: + """ + Stop the timer and accumulate the interval. + """ + # Timer must have not been stopped before. + hdbg.dassert(self.is_started() and not self.is_stopped()) + # For better accuracy stop the timer as first action. + self._stop = time.time() + # Update the total elapsed time. + # Sometimes we get numerical error tripping this assertion + # (e.g., '1619552498.813126' <= '1619552498.805193') so we give + # a little slack to the assertion. + # hdbg.dassert_lte(self._start, self._stop + 1e-2) + self._last_elapsed = cast(float, self._stop) - cast(float, self._start) + self._total_elapsed += self._last_elapsed + # Stop. + self._start = None + self._stop = None + + def get_elapsed(self) -> float: + """ + Stop if not stopped already, and return the elapsed time. + """ + if not self.is_stopped(): + self.stop() + hdbg.dassert_is_not(self._last_elapsed, None) + return cast(float, self._last_elapsed) + + # ///////////////////////////////////////////////////////////////////////// + + def resume(self) -> None: + """ + Resume the timer after a stop. + """ + # Timer must have been stopped before. + hdbg.dassert(self.is_started() or self.is_stopped()) + self._stop = None + # Start last for better accuracy. + self._start = time.time() + + def is_started(self) -> bool: + return ( + self._start is not None and self._start >= 0 and self._stop is None + ) + + def is_stopped(self) -> bool: + return self._start is None and self._stop is None + + def get_total_elapsed(self) -> float: + """ + Stop if not stopped already, and return the total elapsed time. + """ + if not self.is_stopped(): + self.stop() + return self._total_elapsed + + def accumulate(self, *, timer: "Timer") -> None: + """ + Accumulate the value of a timer to the current object. + """ + # Both timers must be stopped. + hdbg.dassert(timer.is_stopped()) + hdbg.dassert(self.is_stopped()) + hdbg.dassert_lte(0.0, timer.get_total_elapsed()) + self._total_elapsed += timer.get_total_elapsed() + + def __repr__(self) -> str: + """ + Return string with the intervals measured so far. + """ + measured_time = self._total_elapsed + if self.is_started() and not self.is_stopped(): + # Timer still running. + measured_time += time.time() - cast(float, self._start) + ret = "%.3f secs" % measured_time + return ret + + +# ############################################################################# + + +_TimerMemento = Tuple[int, str, Timer] + + +def dtimer_start(log_level: int, message: str) -> _TimerMemento: + """ + Start measuring time. + + :return: memento of the timer. + """ + _LOG.log(log_level, "%s ...", message) + memento = log_level, message, Timer() + return memento + + +def dtimer_stop(memento: _TimerMemento) -> Tuple[str, float]: + """ + End measuring time. + + :return: + - message as as string + - time in seconds (int) + """ + log_level, message, timer = memento + timer.stop() + elapsed_time = round(timer.get_elapsed(), 3) + msg = f"{message} done (%.3f s)" % elapsed_time + _LOG.log(log_level, msg) + return msg, elapsed_time + + +# TODO(gp): Is this useful / used? +def stop_timer(timer: Timer) -> str: + timer.stop() + elapsed_time = round(timer.get_elapsed(), 3) + msg = "%.3f s" % elapsed_time + return msg + + +# ############################################################################# +# TimedScope +# ############################################################################# + + +class TimedScope: + """ + Measure the execution time of a block of code. + + ``` + with htimer.TimedScope(logging.INFO, "Work") as ts: + ... work work work ... + ``` + """ + + def __init__( + self, log_level: int, message: str, *, profile_memory: bool = False + ): + self._log_level = log_level + self._message = message + # TODO(gp): Implement profiling also memory using dmemory_start/end. + # State. + self._memento: Optional[_TimerMemento] = None + self.elapsed_time = None + + def get_result(self) -> str: + msg: str = f"{self._message} done (%.3f s)" % self.elapsed_time + return msg + + def __enter__(self) -> "TimedScope": + self._memento = dtimer_start(self._log_level, self._message) + return self + + def __exit__(self, *args: Any) -> None: + if self._memento is not None: + msg, self.elapsed_time = dtimer_stop(self._memento) + _ = msg + + +# ############################################################################# +# Decorator. +# ############################################################################# + + +def timed(f: Callable) -> Callable: + """ + Add a timer around the invocation of a function. + """ + + def wrapper(*args: Any, **kwargs: Any) -> Any: + func_name = getattr(f, "__name__", "unknown_function") + # + timer = dtimer_start(0, func_name) + v = f(*args, **kwargs) + dtimer_stop(timer) + return v + + return wrapper + + +# TODO(gp): Add an object that accumulates the times from multiple timers. +# E.g., use a dict for message -> time + + +# ############################################################################# + + +_MemoryMemento = Tuple[int, str, hloggin.MemoryUsage] + + +def dmemory_start(log_level: int, message: str) -> _MemoryMemento: + """ + Start measuring memory. + + :return: memento of the memory profile + """ + _LOG.log(log_level, "%s ...", message) + memory_usage = hloggin.get_memory_usage() + memento = (log_level, message, memory_usage) + return memento + + +def dmemory_stop(memento: _MemoryMemento, *, mode: str = "all") -> str: + """ + Stop measuring memory. + + :return: message as as string + """ + log_level, message, start_memory_usage = memento + end_memory_usage = hloggin.get_memory_usage() + verbose = False + start_mem = hloggin.memory_to_str(start_memory_usage, verbose=verbose) + end_mem = hloggin.memory_to_str(end_memory_usage, verbose=verbose) + diff_mem = tuple(x - y for x, y in zip(end_memory_usage, start_memory_usage)) + diff_mem = hloggin.memory_to_str(diff_mem, verbose=verbose) + # Package the output. + msg = [] + msg.append(f"{message} done:") + if mode == "all": + msg.append(f"start=({start_mem})") + msg.append(f"end=({end_mem})") + msg.append(f"diff=({diff_mem})") + elif mode == "only_diff": + msg.append(f"diff=({diff_mem})") + else: + raise ValueError(f"Invalid mode='{mode}'") + msg = " ".join(msg) + _LOG.log(log_level, msg) + return msg diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py new file mode 100644 index 000000000..bb16ad381 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py @@ -0,0 +1,48 @@ +""" +Import as: + +import helpers.htqdm as htqdm +""" + +import io +import logging +from typing import Any, Optional + +# Avoid dependency from other `helpers` modules, such as `helpers.hjoblib`, to +# prevent import cycles. + + +# ############################################################################# +# TqdmToLogger +# ############################################################################# + + +# From https://github.com/tqdm/tqdm/issues/313 +class TqdmToLogger(io.StringIO): + """ + Output stream for `tqdm` which will output to logger module instead of the + `stdout`. + + Use as: + ``` + from tqdm.autonotebook import tqdm + + tqdm_out = TqdmToLogger(_LOG, level=logging.INFO) + for ... tqdm(..., file=tqdm_out): + ``` + """ + + logger = None + level = None + buf = "" + + def __init__(self, logger: Any, level: Optional[int] = None): + super().__init__() + self.logger = logger + self.level = level or logging.INFO + + def write(self, buf: str) -> None: + self.buf = buf.strip("\r\n\t ") + + def flush(self) -> None: + self.logger.log(self.level, self.buf) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py new file mode 100644 index 000000000..03de65ce1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py @@ -0,0 +1,228 @@ +""" +Import as: + +import helpers.htraceback as htraceb +""" + +import logging +import os +import re +from typing import Any, List, Match, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hgit as hgit + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): Move some code to `hcfile.py`. + +# Store elements parsed from a line of a traceback: +# (file_name, line_num, text) +# E.g., +# ("test/test_lib_tasks.py", +# 27, +# "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)" +# ) +CfileRow = Tuple[str, int, str] + + +def cfile_row_to_str(cfile_row: CfileRow) -> str: + # helpers/git.py:295:def get_repo_long_name_from_client(super_module + hdbg.dassert_isinstance(cfile_row, tuple) + return ":".join(list(map(str, cfile_row))) + + +def cfile_to_str(cfile: List[CfileRow]) -> str: + hdbg.dassert_isinstance(cfile, list) + return "\n".join(map(cfile_row_to_str, cfile)) + + +def parse_traceback( + txt: str, *, purify_from_client: bool = True +) -> Tuple[List[CfileRow], Optional[str]]: + """ + Parse a string containing text including a Python traceback. + + :param txt: the text to parse + :param purify_from_client: express the files with respect to the Git root + :return: + - a list of `CFILE_ROW`, e.g., + ``` + ("test/test_lib_tasks.py", + 27, + "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)") + - a string storing the traceback, like: + ``` + Traceback (most recent call last): + File "/app/amp/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "/app/amp/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": + NameError: name 'repo_short_name' is not defined + ``` + - A `None` value means that no traceback was found. + """ + # TODO(gp): Horrible hack to get the tests to pass. IMO this whole function + # needs to be rewritten using a proper parser or library. Now it's full + # of weird handling of edge cases. + txt += "\n" + # + lines = txt.split("\n") + # pylint: disable=line-too-long + # Remove the artifacts of a GH run. E.g., + # "Run_fast_tests Run fast tests 2022-02-19T16:53:07.0945561Z NameError: name 'cofinanc' is not defined" -> + # -> "NameError: name 'cofinanc' is not defined". + lines = [ + re.split( + r"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+Z ", + line, + )[-1] + for line in lines + ] + state = "look_for" + cfile: List[CfileRow] = [] + i = 0 + start_idx = end_idx = 0 + while i < len(lines): + line = lines[i] + _LOG.debug("state=%-10s i=%d: line='%s'", state, i, line) + if state == "look_for": + if line.startswith("Traceback (most recent call last):"): + start_idx = i + # Update the state. + state = "parse" + i += 1 + continue + elif state == "parse": + # The file looks like: + # File "/app/amp/test/test_lib_tasks.py", line 27, in test_get_gh + # actual = ltasks._get_gh_issue_title(issue_id, repo) + regex = r"^\s*File \"(.+)\", line (\d+), in (\S+)$" + m = re.match(regex, line) + hdbg.dassert(m, "Can't parse '%s'", line) + m: Match[Any] + file_name = m.group(1) + line_num = int(m.group(2)) + func_name = m.group(3) + _LOG.debug(" -> %s %d %s", file_name, line_num, func_name) + # + # Parse the next line until the next `File...`. + _LOG.debug("Search end of snippet") + j = i + 1 + hdbg.dassert_lte(j, len(lines)) + while j < len(lines): + _LOG.debug(" j=%d: line='%s'", j, lines[j]) + if lines[j].startswith(' File "') or not lines[j].startswith( + " " + ): + _LOG.debug(" Found end of snippet") + break + j += 1 + # Concatenate the lines into a single line. + code = lines[i + 1 : j] + _LOG.debug(" -> code: [%d, %d]\n%s", i, j, "\n".join(code)) + code = map(lambda x: x.rstrip().lstrip(), code) + code_as_single_line = "/".join(code) + _LOG.debug(" -> code_as_single_line=\n%s", code_as_single_line) + # Assemble the result. + file_name = os.path.normpath(file_name) + cfile_row = ( + file_name, + line_num, + func_name + ":" + code_as_single_line, + ) + _LOG.debug(" => cfile_row='%s'", cfile_row_to_str(cfile_row)) + cfile.append(cfile_row) + # Update the state. + if not lines[j].startswith(" "): + _LOG.debug(" Found end of traceback") + end_idx = j + state = "end" + break + state = "parse" + i = j + continue + # + i += 1 + # + if state == "look_for": + # We didn't find a traceback. + cfile = [] + traceback = None + elif state == "end": + if ( + end_idx < len(lines) - 1 + and "Error:" not in lines[end_idx - 1] + and "Error:" in lines[end_idx] + ): + # Extend the traceback to the lines with the error description. + # E.g., for the snippet below: + # ``` + # if repo_short_name == "amp": + # NameError: name 'repo_short_name' is not defined + # ``` + # If the parsed traceback stops at 'if repo_short_name == "amp":', + # and thus, its last line does not include the error description + # ("NameError:..."), and the following line does include the error + # description, then the traceback will be extended to include the + # following line, making the parsed traceback end with the following + # two lines: + # ``` + # if repo_short_name == "amp": + # NameError: name 'repo_short_name' is not defined + # ``` + to_break = False + while end_idx < len(lines) - 1 and not to_break: + end_idx += 1 + line = lines[end_idx] + _LOG.debug( + "Extend traceback: to_break=%s, end_idx=%s, line='%s'", + to_break, + end_idx, + line, + ) + if ( + "________ Test" in line + or "====== slowest 3 durations" in line + ): + # Stop if we have reached the next traceback or the end of the + # pytest report. + to_break = True + hdbg.dassert_lte(0, start_idx) + hdbg.dassert_lte(start_idx, end_idx) + hdbg.dassert_lt(end_idx, len(lines)) + _LOG.debug("start_idx=%d end_idx=%d", start_idx, end_idx) + traceback = "\n".join(lines[start_idx:end_idx]) + else: + raise ValueError(f"Invalid state='{state}'") + _LOG.debug("traceback=\n%s", traceback) + _LOG.debug("cfile=\n%s", cfile_to_str(cfile)) + # Purify filenames from client so that refer to files in this client. + if cfile and purify_from_client: + _LOG.debug("# Purifying from client") + cfile_tmp = [] + for cfile_row in cfile: + file_name, line_num, text = cfile_row + # Leave the files relative to the current dir. + root_dir = hgit.get_client_root(super_module=False) + mode = "return_all_results" + file_names = hgit.find_docker_file( + file_name, root_dir=root_dir, mode=mode + ) + if len(file_names) == 0: + _LOG.warning("Can't find file corresponding to '%s'", file_name) + elif len(file_names) > 1: + _LOG.warning( + "Found multiple potential files corresponding to '%s'", + file_name, + ) + else: + file_name = file_names[0] + cfile_tmp.append((file_name, line_num, text)) + cfile = cfile_tmp + _LOG.debug("# After purifying from client") + _LOG.debug("cfile=\n%s", cfile_to_str(cfile)) + return cfile, traceback diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py new file mode 100644 index 000000000..d706292ed --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +""" +Allow translating text using AWS Translate. It can be used as a module or CLI +tool. + +Supported languages and languages codes: +https://docs.aws.amazon.com/translate/latest/dg/what-is.html + +Import as: + +import helpers.htranslate as htransl +""" + +import argparse +import configparser +import logging +import pathlib +import sys +from typing import Optional, Tuple + +import boto3 + +_LOG = logging.getLogger(__name__) + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "lang", + help=( + "source language code. " + "https://docs.aws.amazon.com/translate/latest/dg/what-is.html" + ), + ) + parser.add_argument("text", help="string to translate") + parser.add_argument( + "--aws", + type=pathlib.Path, + dest="credentials", + default=pathlib.Path().home() / ".aws/credentials", + help="Path to the aws credentials file.", + ) + return parser.parse_args() + + +def _load_credentials(conf_path: pathlib.Path) -> Tuple[str, str]: + """ + Load aws credentilas from config file. + + :param conf_path:credentials file path. + :return: A tuple consist of aws_access and aws_secret keys. + """ + config = configparser.ConfigParser() + config.read(conf_path) + try: + access = config.get("default", "aws_access_key_id") + secret = config.get("default", "aws_secret_access_key") + except configparser.NoOptionError as err: + _LOG.error("Unable to read option for: %s", err.args) + sys.exit(1) + else: + return access, secret + + +# ############################################################################# +# TranslateAPI +# ############################################################################# + + +class TranslateAPI: + def __init__( + self, + aws_access_key: str, + aws_secret_key: str, + region: Optional[str] = "us-east-2", + ) -> None: + self._translate = boto3.client( + service_name="translate", + region_name=region, + use_ssl=True, + aws_access_key_id=aws_access_key, + aws_secret_access_key=aws_secret_key, + ) + + def translate_text(self, text: str, lang_code: str) -> str: + """ + Translate given text into English. Amazon has a limit on text size: + 5,000 bytes. + + :param text: Foreing language text. + :param lang_code: Language code in accordance with supported + languages and code of Amazon. + :return: English text. + """ + tr = self._translate.translate_text( + Text=text, SourceLanguageCode=lang_code, TargetLanguageCode="en" + ) + return str(tr.get("TranslatedText")) + + +if __name__ == "__main__": + args = _parse_args() + aws_access, aws_secret = _load_credentials(args.credentials) + api = TranslateAPI(aws_access, aws_secret) + result = api.translate_text(args.text, args.lang) + print(result) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py new file mode 100644 index 000000000..1bb3472d7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py @@ -0,0 +1,11 @@ +""" +Contain general types based on standard Python libraries. + +Import as: + +import helpers.htypes as htypes +""" + +from typing import Any, Dict + +Kwargs = Dict[str, Any] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py new file mode 100644 index 000000000..d585faeef --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py @@ -0,0 +1,1876 @@ +""" +Enhanced unit testing framework built on top of unittest and pytest. + +This module provides: +- TestCase base class with golden file testing capabilities +- Utilities for comparing strings, dataframes, and other outputs +- Test outcome management with update and incremental modes +- Directory management for input, output, and scratch space +- Integration with Git for managing test outcomes + +Import as: + +import helpers.hunit_test as hunitest +""" + +import abc +import collections +import inspect +import logging +import os +import pprint +import random +import re +import sys +import traceback +import unittest +from typing import Any, Dict, List, Mapping, Optional, Tuple + +import pytest + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.htimer as htimer +import helpers.hunit_test_purification as huntepur +import helpers.hwall_clock_time as hwacltim +import helpers.repo_config_utils as hrecouti + +# We use strings as type hints (e.g., 'pd.DataFrame') since we are not sure +# we have the corresponding libraries installed. + + +# Minimize dependencies from installed packages. + +# TODO(gp): Use `hprint.color_highlight`. +_WARNING = "\033[33mWARNING\033[0m" + +try: + import numpy as np + + _HAS_NUMPY = True +except ImportError as e: + print(_WARNING + ": " + str(e)) + _HAS_NUMPY = False +try: + import pandas as pd + + _HAS_PANDAS = True +except ImportError as e: + print(_WARNING + ": " + str(e)) + _HAS_PANDAS = False + +try: + import matplotlib.pyplot as plt + + _HAS_MATPLOTLIB = True +except ImportError as e: + print(_WARNING + ": " + str(e)) + _HAS_MATPLOTLIB = False + + +_LOG = logging.getLogger(__name__) + +# Mute this module unless we want to debug it. +_LOG.setLevel(logging.INFO) + +# ############################################################################# + +# Global setter / getter for updating test. + +# This controls whether the output of a test is updated or not. +# Set by `conftest.py`. +_UPDATE_TESTS = False + + +# TODO(gp): -> ..._update_outcomes. +def set_update_tests(val: bool) -> None: + """ + Set the global flag for updating test outcomes. + + :param val: True to enable updating test outcomes, False otherwise + """ + global _UPDATE_TESTS + _UPDATE_TESTS = val + + +def get_update_tests() -> bool: + """ + Get the current state of the update tests flag. + + :return: True if test outcomes should be updated, False otherwise + """ + return _UPDATE_TESTS + + +# ############################################################################# + +# Global setter / getter for incremental mode. + +# This is useful when a long test wants to reuse some data already generated. +# Set by conftest.py. +_INCREMENTAL_TESTS = False + + +def set_incremental_tests(val: bool) -> None: + """ + Set the global flag for incremental test mode. + + :param val: True to enable incremental mode, False otherwise + """ + global _INCREMENTAL_TESTS + _INCREMENTAL_TESTS = val + + +def get_incremental_tests() -> bool: + """ + Get the current state of the incremental tests flag. + + :return: True if incremental mode is enabled, False otherwise + """ + return _INCREMENTAL_TESTS + + +# ############################################################################# + +_CONFTEST_IN_PYTEST = False + + +# TODO(gp): Use https://stackoverflow.com/questions/25188119 +# TODO(gp): -> is_in_unit_test() +def in_unit_test_mode() -> bool: + """ + Return True if we are inside a pytest run. + + This is set by `conftest.py`. + """ + return _CONFTEST_IN_PYTEST + + +# ############################################################################# + + +# Set by `conftest.py`. +_GLOBAL_CAPSYS = None + + +def pytest_print(txt: str) -> None: + """ + Print bypassing `pytest` output capture. + """ + with _GLOBAL_CAPSYS.disabled(): # type: ignore + sys.stdout.write(txt) + + +def pytest_warning(txt: str, prefix: str = "") -> None: + """ + Print a warning bypassing `pytest` output capture. + + :param prefix: prepend the message with a string + """ + txt_tmp = "" + if prefix: + txt_tmp += prefix + txt_tmp += hprint.color_highlight("WARNING", "yellow") + f": {txt}" + pytest_print(txt_tmp) + + +# ############################################################################# +# Generation and conversion functions. +# ############################################################################# + + +# TODO(gp): Is this dataflow Info? If so it should go somewhere else. +def convert_info_to_string(info: Mapping) -> str: + """ + Convert info to string for verifying test results. + + Info often contains `pd.Series`, so pandas context is provided to print all rows + and all contents. + + :param info: info to convert to string + :return: string representation of info + """ + output = [] + # Provide context for full representation of `pd.Series` in info. + with pd.option_context( + "display.max_colwidth", + int(1e6), + "display.max_columns", + None, + "display.max_rows", + None, + ): + output.append(hprint.frame("info")) + output.append(pprint.pformat(info)) + output_str = "\n".join(output) + return output_str + + +# TODO(gp): This seems the python3.9 version of `to_str`. Remove if possible. +def to_string(var: str) -> str: + """ + Generate an f-string expression for debugging variable values. + + :param var: the variable name to create an f-string for + :return: an f-string expression that will print the variable name and value + """ + return f"""f"{var}={{{var}}}""" + + +# ############################################################################# + + +def diff_files( + file_name1: str, + file_name2: str, + *, + tag: Optional[str] = None, + abort_on_exit: bool = True, + dst_dir: str = ".", + error_msg: str = "", +) -> None: + """ + Compare the passed filenames and create script to compare them with + vimdiff. + + :param tag: add a banner the tag + :param abort_on_exit: whether to assert or not + :param dst_dir: dir where to save the comparing script + """ + _LOG.debug(hprint.func_signature_to_str()) + file_name1 = os.path.relpath(file_name1, os.getcwd()) + file_name2 = os.path.relpath(file_name2, os.getcwd()) + msg = [] + # Add tag. + if tag is not None: + msg.append("\n" + hprint.frame(tag, char1="-")) + # Diff to screen. + _, res = hsystem.system_to_string( + f"echo; sdiff --expand-tabs -l -w 150 {file_name1} {file_name2}", + abort_on_error=False, + log_level=logging.DEBUG, + ) + msg.append(res) + # Save a script to diff. + diff_script = os.path.join(dst_dir, "tmp_diff.sh") + vimdiff_cmd = f""" + #!/bin/bash + if [[ $1 == "wrap" ]]; then + cmd='vimdiff -c "windo set wrap"' + else + cmd='vimdiff' + fi; + cmd="$cmd {file_name1} {file_name2}" + eval $cmd + """ + vimdiff_cmd = hprint.dedent(vimdiff_cmd) + # TODO(gp): Use hio.create_executable_script(). + hio.to_file(diff_script, vimdiff_cmd) + cmd = "chmod +x " + diff_script + hsystem.system(cmd) + # Report how to diff. + msg.append("Diff with:") + msg.append("> " + diff_script) + msg_as_str = "\n".join(msg) + # Append also error_msg to the current message. + if error_msg: + msg_as_str += "\n" + error_msg + # Add also the stack trace to the logging error. + if False: + log_msg_as_str = ( + msg_as_str + + "\n" + + hprint.frame("Traceback", char1="-") + + "\n" + + "".join(traceback.format_stack()) + ) + _LOG.error(log_msg_as_str) + # Assert. + if abort_on_exit: + raise RuntimeError(msg_as_str) + + +# ############################################################################# + + +def _remove_spaces(txt: str) -> str: + """ + Remove leading / trailing spaces and empty lines. + + This is used to implement fuzzy matching. + """ + txt = txt.replace("\\n", "\n").replace("\\t", "\t") + # Convert multiple empty spaces (but not newlines) into a single one. + txt = re.sub(r"[^\S\n]+", " ", txt) + # Remove insignificant crap. + lines = [] + for line in txt.split("\n"): + # Remove leading and trailing spaces. + line = re.sub(r"^\s+", "", line) + line = re.sub(r"\s+$", "", line) + # Skip empty lines. + if line != "": + lines.append(line) + txt = "\n".join(lines) + return txt + + +def _remove_banner_lines(txt: str) -> str: + """ + Remove lines of separating characters long at least 20 characters. + """ + txt_tmp: List[str] = [] + for line in txt.split("\n"): + if re.match(r"^\s*[\#\-><=]{20,}\s*$", line): + continue + txt_tmp.append(line) + txt = "\n".join(txt_tmp) + return txt + + +def _fuzzy_clean(txt: str) -> str: + """ + Remove irrelevant artifacts to make string comparison less strict. + """ + hdbg.dassert_isinstance(txt, str) + # Ignore spaces. + txt = _remove_spaces(txt) + # Ignore separation lines. + txt = _remove_banner_lines(txt) + return txt + + +def _ignore_line_breaks(txt: str) -> str: + """ + Replace all line breaks with spaces for loose comparison. + + :param txt: the input text + :return: text with line breaks replaced by spaces + """ + # Ignore line breaks. + txt = txt.replace("\n", " ") + return txt + + +def _sort_lines(txt: str) -> str: + """ + Sort the lines in alphabetical order. + + This is used when we want to perform a comparison of equality but + without order. Of course there are false negatives, since the + relative order of lines might matter. + """ + lines = txt.split("\n") + lines.sort() + lines = "\n".join(lines) + return lines + + +def _save_diff( + actual: str, + expected: str, + tag: str, + test_dir: str, +) -> None: + """ + Save actual and expected strings to temporary files for comparison. + + :param actual: the actual test output + :param expected: the expected test output + :param tag: identifier tag for the files + :param test_dir: directory to save files in + """ + if tag != "": + tag += "." + # Save expected strings to dir. + for dst_dir in (".", test_dir): + act_file_name = f"{dst_dir}/tmp.{tag}actual.txt" + hio.to_file(act_file_name, actual) + exp_file_name = f"{dst_dir}/tmp.{tag}expected.txt" + hio.to_file(exp_file_name, expected) + + +def assert_equal( + actual: str, + expected: str, + full_test_name: str, + test_dir: str, + *, + check_string: bool = False, + remove_lead_trail_empty_lines: bool = False, + dedent: bool = False, + purify_text: bool = False, + purify_expected_text: bool = False, + fuzzy_match: bool = False, + ignore_line_breaks: bool = False, + split_max_len: Optional[int] = None, + sort: bool = False, + abort_on_error: bool = True, + dst_dir: str = ".", + error_msg: str = "", +) -> bool: + """ + See interface in `TestCase.assert_equal()`. + + :param full_test_name: e.g., `TestRunNotebook1.test2` + :param check_string: if it was invoked by `check_string()` or directly + """ + _LOG.debug(hprint.func_signature_to_str("actual expected")) + # Store a mapping tag after each transformation (e.g., original, sort, ...) to + # (actual, expected). + values: Dict[str, str] = collections.OrderedDict() + + def _append(tag: str, actual: str, expected: str) -> None: + _LOG.debug( + "tag=%s\n actual='\n%s'\n expected='\n%s'", tag, actual, expected + ) + hdbg.dassert_not_in(tag, values) + values[tag] = (actual, expected) + + # + _LOG.debug("Before any transformation:") + tag = "original" + _append(tag, actual, expected) + # 1) Remove white spaces. + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_white_spaces(actual) + expected = text_purifier.purify_white_spaces(expected) + tag = "purify_white_spaces" + _append(tag, actual, expected) + # Remove empty leading / trailing lines. + if remove_lead_trail_empty_lines: + tag = "remove_lead_trail_empty_lines" + actual = hprint.remove_lead_trail_empty_lines(actual) + expected = hprint.remove_lead_trail_empty_lines(expected) + _append(tag, actual, expected) + # Dedent only expected since we often align it to make it look more readable + # in the Python code, if needed. + if dedent: + tag = "dedent" + expected = hprint.dedent(expected) + _append(tag, actual, expected) + # Purify text, if needed. + if purify_text: + tag = "purify_text" + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + if purify_expected_text: + expected = text_purifier.purify_txt_from_client(expected) + _append(tag, actual, expected) + # Ensure that there is a single `\n` at the end of the strings. + actual = actual.rstrip("\n") + "\n" + expected = expected.rstrip("\n") + "\n" + # Sort the lines. + if sort: + tag = "sort" + actual = _sort_lines(actual) + expected = _sort_lines(expected) + _append(tag, actual, expected) + # Fuzzy match, if needed. + if fuzzy_match: + tag = "fuzzy_match" + actual = _fuzzy_clean(actual) + expected = _fuzzy_clean(expected) + _append(tag, actual, expected) + # Ignore line breaks, if needed. + if ignore_line_breaks: + tag = "ignore_line_breaks" + actual = _ignore_line_breaks(actual) + expected = _ignore_line_breaks(expected) + _append(tag, actual, expected) + # Split the strings into lines of at most `split_max_len` characters. + if split_max_len: + tag = "split_max_len" + actual = hprint.strict_split(actual, split_max_len) + expected = hprint.strict_split(expected, split_max_len) + _append(tag, actual, expected) + # Check. + tag = "final" + _append(tag, actual, expected) + # + is_equal = expected == actual + _LOG.debug(hprint.to_str("is_equal")) + if is_equal: + return is_equal + _LOG.error( + "%s", + "\n" + + hprint.frame( + f"Test '{full_test_name}' failed", char1="=", num_chars=80 + ), + ) + if not check_string: + # If this is a `self.assert_equal()` and not a `self.check_string()`, + # then print the correct output, like: + # expected = r'""" + # 2021-02-17 09:30:00-05:00 + # 2021-02-17 10:00:00-05:00 + # 2021-02-17 11:00:00-05:00 + # """ + txt = [] + txt.append(hprint.frame(f"ACTUAL VARIABLE: {full_test_name}", char1="-")) + # TODO(gp): Switch to expected or expected_result. + exp_var = "expected = r" + # We always return the variable exactly as this should be, even if we + # could make it look better through indentation in case of fuzzy match. + actual_orig = values["original"][0] + if actual_orig.startswith('"'): + sep = "'''" + else: + sep = '"""' + exp_var += sep + if fuzzy_match: + # We can print in a more readable way since spaces don't matter. + exp_var += "\n" + exp_var += actual_orig + if fuzzy_match: + # We can print in a more readable way since spaces don't matter. + exp_var += "\n" + exp_var += sep + # Save the expected variable to files. + exp_var_file_name = f"{test_dir}/tmp.exp_var.txt" + hio.to_file(exp_var_file_name, exp_var) + # + exp_var_file_name = "tmp.exp_var.txt" + hio.to_file(exp_var_file_name, exp_var) + _LOG.info("Saved exp_var in %s", exp_var_file_name) + # + txt.append(exp_var) + txt = "\n".join(txt) + error_msg += txt + # Save all the values after the transformations. + debug = False + if debug: + for idx, key in enumerate(values.keys()): + actual_tmp, expected_tmp = values[key] + tag = f"{idx}.{key}" + _save_diff(actual_tmp, expected_tmp, tag, test_dir) + else: + key = "final" + actual_tmp, expected_tmp = values[key] + _save_diff(actual_tmp, expected_tmp, key, test_dir) + # Compare the last values. + act_file_name = f"{test_dir}/tmp.final.actual.txt" + exp_file_name = f"{test_dir}/tmp.final.expected.txt" + if fuzzy_match: + msg = "FUZZY ACTUAL vs FUZZY EXPECTED" + else: + msg = "ACTUAL vs EXPECTED" + msg += f": {full_test_name}" + diff_files( + act_file_name, + exp_file_name, + tag=msg, + abort_on_exit=abort_on_error, + dst_dir=dst_dir, + error_msg=error_msg, + ) + return is_equal + + +# TODO(gp): @all move to hpandas +def compare_df(df1: "pd.DataFrame", df2: "pd.DataFrame") -> None: + """ + Compare two dfs including their metadata. + """ + if not df1.equals(df2): + print(df1.compare(df2)) + raise ValueError("Dfs are different") + + def _compute_df_signature(df: "pd.DataFrame") -> str: + txt = [] + txt.append(f"df1=\n{str(df)}") + txt.append(f"df1.dtypes=\n{str(df.dtypes)}") + if hasattr(df.index, "freq"): + txt.append(f"df1.index.freq=\n{str(df.index.freq)}") + return "\n".join(txt) + + full_test_name = "dummy" + test_dir = "." + assert_equal( + _compute_df_signature(df1), + _compute_df_signature(df2), + full_test_name, + test_dir, + ) + + +# ############################################################################# + + +def create_test_dir( + dir_name: str, incremental: bool, file_dict: Dict[str, str] +) -> None: + """ + Create a directory `dir_name` with the files from `file_dict`. + + `file_dict` is interpreted as pair of files relative to `dir_name` + and content. + """ + hdbg.dassert_no_duplicates(file_dict.keys()) + hio.create_dir(dir_name, incremental=incremental) + for file_name in file_dict: + dst_file_name = os.path.join(dir_name, file_name) + _LOG.debug("file_name=%s -> %s", file_name, dst_file_name) + hio.create_enclosing_dir(dst_file_name, incremental=incremental) + file_content = file_dict[file_name] + hio.to_file(dst_file_name, file_content) + + +# TODO(gp): Make remove_dir_name=True default. +def get_dir_signature( + dir_name: str, + include_file_content: bool, + *, + remove_dir_name: bool = False, + num_lines: Optional[int] = None, +) -> str: + """ + Compute a string with the content of the files in `dir_name`. + + :param include_file_content: include the content of the files, besides the + name of files and directories + :param remove_dir_name: use paths relative to `dir_name` + :param num_lines: number of lines to include for each file + + The output looks like: + ``` + # Dir structure + $GIT_ROOT/.../tmp.scratch + $GIT_ROOT/.../tmp.scratch/dummy_value_1=1 + $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A + $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet + ... + + # File signatures + len(file_names)=3 + file_names=$GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet, + $GIT_ROOT/.../tmp.scratch/dummy_value_1=2/dummy_value_2=B/data.parquet, ... + # $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet + num_lines=13 + ''' + original shape=(1, 1) + Head: + { + "0":{ + "dummy_value_3":0 + } + } + Tail: + { + "0":{ + "dummy_value_3":0 + } + } + ''' + # $GIT_ROOT/.../tmp.scratch/dummy_value_1=2/dummy_value_2=B/data.parquet + ``` + """ + + def _remove_dir_name(file_name: str) -> str: + if remove_dir_name: + res = os.path.relpath(file_name, dir_name) + else: + res = file_name + return res + + txt: List[str] = [] + # Find all the files under `dir_name`. + _LOG.debug("dir_name=%s", dir_name) + hdbg.dassert_path_exists(dir_name) + cmd = f'find {dir_name} -name "*"' + remove_files_non_present = False + dir_name_tmp = None + file_names = hsystem.system_to_files( + cmd, dir_name_tmp, remove_files_non_present + ) + file_names = sorted(file_names) + # Save the directory / file structure. + txt.append("# Dir structure") + txt.append("\n".join(map(_remove_dir_name, file_names))) + # + if include_file_content: + txt.append("# File signatures") + # Remove the directories. + file_names = hsystem.remove_dirs(file_names) + # Scan the files. + txt.append(f"len(file_names)={len(file_names)}") + txt.append(f"file_names={', '.join(map(_remove_dir_name, file_names))}") + for file_name in file_names: + _LOG.debug("file_name=%s", file_name) + txt.append("# " + _remove_dir_name(file_name)) + # Read file. + txt_tmp = hio.from_file(file_name) + # This seems unstable on different systems. + # txt.append("num_chars=%s" % len(txt_tmp)) + txt_tmp = txt_tmp.split("\n") + # Filter lines, if needed. + txt.append(f"num_lines={len(txt_tmp)}") + if num_lines is not None: + hdbg.dassert_lte(1, num_lines) + txt_tmp = txt_tmp[:num_lines] + txt.append("'''\n" + "\n".join(txt_tmp) + "\n'''") + else: + hdbg.dassert_is(num_lines, None) + # Concat everything in a single string. + result = "\n".join(txt) + return result + + +# TODO(gp): GSI. Use the copy in helpers/hprint.py +def filter_text(regex: str, txt: str) -> str: + """ + Remove lines in `txt` that match the regex `regex`. + """ + _LOG.debug("Filtering with '%s'", regex) + if regex is None: + return txt + txt_out = [] + txt_as_arr = txt.split("\n") + for line in txt_as_arr: + if re.search(regex, line): + _LOG.debug("Skipping line='%s'", line) + continue + txt_out.append(line) + # We can only remove lines. + hdbg.dassert_lte( + len(txt_out), + len(txt_as_arr), + "txt_out=\n'''%s'''\ntxt=\n'''%s'''", + "\n".join(txt_out), + "\n".join(txt_as_arr), + ) + txt = "\n".join(txt_out) + return txt + + +def diff_strings( + string1: str, + string2: str, + *, + tag: Optional[str] = None, + abort_on_exit: bool = True, + dst_dir: str = ".", +) -> None: + """ + Compare two strings using the diff_files() flow by creating a script to + compare with vimdiff. + + :param dst_dir: where to save the intermediatary files + """ + _LOG.debug(hprint.to_str("tag abort_on_exit dst_dir")) + # Save the actual and expected strings to files. + file_name1 = f"{dst_dir}/tmp.string1.txt" + hio.to_file(file_name1, string1) + # + file_name2 = f"{dst_dir}/tmp.string2.txt" + hio.to_file(file_name2, string2) + # Compare with diff_files. + if tag is None: + tag = "string1 vs string2" + diff_files( + file_name1, + file_name2, + tag=tag, + abort_on_exit=abort_on_exit, + dst_dir=dst_dir, + ) + + +def diff_df_monotonic( + df: "pd.DataFrame", + *, + tag: Optional[str] = None, + abort_on_exit: bool = True, + dst_dir: str = ".", +) -> None: + """ + Check for a dataframe to be monotonic using the vimdiff flow from + diff_files(). + """ + _LOG.debug(hprint.to_str("abort_on_exit dst_dir")) + if not df.index.is_monotonic_increasing: + df2 = df.copy() + df2.sort_index(inplace=True) + diff_strings( + df.to_csv(), + df2.to_csv(), + tag=tag, + abort_on_exit=abort_on_exit, + dst_dir=dst_dir, + ) + + +# ############################################################################# + + +# pylint: disable=protected-access +def get_pd_default_values() -> "pd._config.config.DictWrapper": + """ + Get a deep copy of the current pandas default options. + + :return: a copy of pandas configuration options + """ + import copy + + vals = copy.deepcopy(pd.options) + return vals + + +def set_pd_default_values() -> None: + """ + Set pandas display options to standard default values for testing. + + This ensures consistent output across different test environments. + """ + # 'display': + default_pd_values = { + "chop_threshold": None, + "colheader_justify": "right", + "date_dayfirst": False, + "date_yearfirst": False, + "encoding": "UTF-8", + "expand_frame_repr": True, + "float_format": None, + "html": {"border": 1, "table_schema": False, "use_mathjax": True}, + "large_repr": "truncate", + "latex": { + "escape": True, + "longtable": False, + "multicolumn": True, + "multicolumn_format": "l", + "multirow": False, + "repr": False, + }, + "max_categories": 8, + "max_columns": 20, + "max_colwidth": 50, + "max_info_columns": 100, + "max_info_rows": 1690785, + "max_rows": 60, + "max_seq_items": 100, + "memory_usage": True, + "min_rows": 10, + "multi_sparse": True, + "notebook_repr_html": True, + "pprint_nest_depth": 3, + "precision": 6, + "show_dimensions": "truncate", + "unicode": {"ambiguous_as_wide": False, "east_asian_width": False}, + "width": 80, + } + section = "display" + for key, new_val in default_pd_values.items(): + if isinstance(new_val, dict): + continue + full_key = f"{section}.{key}" + old_val = pd.get_option(full_key) + if old_val != new_val: + _LOG.debug( + "-> Assigning a different value: full_key=%s, " + "old_val=%s, new_val=%s", + full_key, + old_val, + new_val, + ) + pd.set_option(full_key, new_val) + + +# If a golden outcome is missing asserts (instead of updating golden and adding +# it to Git repo, corresponding to "update"). +_ACTION_ON_MISSING_GOLDEN = "assert" + + +# ############################################################################# +# TestCase +# ############################################################################# + + +# TODO(gp): Remove all the calls to `dedent()` and use the `dedent` switch. +class TestCase(unittest.TestCase): + """ + Add some functions to compare actual results to a golden outcome. + """ + + def setUp(self) -> None: + """ + Execute before any test method. + """ + # Set up the base class in case it does something, current + # implementation does nothing, see + # https://docs.python.org/3/library/unittest.html#unittest.TestCase.setUp. + super().setUp() + # Print banner to signal the start of a new test. + func_name = f"{self.__class__.__name__}.{self._testMethodName}" + _LOG.info("\n%s", hprint.frame(func_name)) + # Set the random seed. + random_seed = 20000101 + _LOG.debug("Resetting random.seed to %s", random_seed) + random.seed(random_seed) + if _HAS_NUMPY: + _LOG.debug("Resetting np.random.seed to %s", random_seed) + np.random.seed(random_seed) + # Disable matplotlib plotting by overwriting the `show` function. + if _HAS_MATPLOTLIB: + plt.show = lambda: 0 + # Name of the dir with artifacts for this test. + self._scratch_dir: Optional[str] = None + # The base directory is the one including the class under test. + self._base_dir_name = os.path.dirname(inspect.getfile(self.__class__)) + _LOG.debug("base_dir_name=%s", self._base_dir_name) + # Store whether a test needs to be updated or not. + self._update_tests = get_update_tests() + self._overriden_update_tests = False + # Store whether the golden outcome of this test was updated. + self._test_was_updated = False + # Store whether the output files need to be added to hgit. + self._git_add = True + # Error message printed when comparing actual and expected outcome. + self._error_msg = "" + # Set the default pandas options (see AmpTask1140). + if _HAS_PANDAS: + self._old_pd_options = get_pd_default_values() + set_pd_default_values() + # Reset the timestamp of the current bar. + hwacltim.reset_current_bar_timestamp() + # Start the timer to measure the execution time of the test. + self._timer = htimer.Timer() + + def tearDown(self) -> None: + """ + Execute after each test method completes. + + Handles cleanup, timing, and restoration of default settings. + """ + # Stop the timer to measure the execution time of the test. + self._timer.stop() + pytest_print("(%.2f s) " % self._timer.get_total_elapsed()) + # Report if the test was updated + if self._test_was_updated: + if not self._overriden_update_tests: + pytest_warning("Test was updated) ", prefix="(") + else: + # We forced an update from the unit test itself, so no need + # to report an update. + pass + # Recover the original default pandas options. + if _HAS_PANDAS: + pd.options = self._old_pd_options + # Force matplotlib to close plots to decouple tests. + if _HAS_MATPLOTLIB: + plt.close() + plt.clf() + # Delete the scratch dir, if needed. + if self._scratch_dir and os.path.exists(self._scratch_dir): + if False: + # We want to keep this if the test failed, as an alternative + # to just re-running with --incremental. + result = self._outcome.result + # From https://stackoverflow.com/questions/4414234/getting-pythons-unittest-results-in-a-teardown-method + # https://github.com/pytest-dev/pytest/issues/10631 + # This doesn't work any longer. + # has_error = test_result.failures or test_result.errors + has_error = result._excinfo is not None + else: + # TODO(gp): The problem is that when there is a failure during + # the regressions, having artifacts in the scratch dir causes + # more tests to fail (especially the ones in the cycle detector). + # We need to make tests more robust to this and then we can enable + # the logic to keep files for the failed tests in the scratch dir. + has_error = False + if has_error or get_incremental_tests(): + _LOG.warning("Skipping deleting %s", self._scratch_dir) + else: + _LOG.debug("Deleting %s", self._scratch_dir) + hio.delete_dir(self._scratch_dir) + # Tear down the base class in case it does something, current + # implementation does nothing, see + # https://docs.python.org/3/library/unittest.html#unittest.TestCase.tearDown. + super().tearDown() + + def set_base_dir_name(self, base_dir_name: str) -> None: + """ + Set the base directory for the input, output, and scratch directories. + + This is used to override the standard location of the base + directory which is close to the class under test. + """ + self._base_dir_name = base_dir_name + _LOG.debug("Setting base_dir_name to '%s'", self._base_dir_name) + hio.create_dir(self._base_dir_name, incremental=True) + + def mock_update_tests(self) -> None: + """ + When unit testing the unit test framework we want to test updating the + golden outcome. + """ + self._update_tests = True + self._overriden_update_tests = True + self._git_add = False + + def _get_current_path( + self, + use_only_class_name: bool, + test_class_name: Optional[str], + test_method_name: Optional[str], + use_absolute_path: bool, + ) -> str: + """ + Return the name of the directory containing the input / output data. + + E.g., + ``` + ./core/dataflow/test/outcomes/TestContinuousSarimaxModel.test_compare + ``` + + The parameters have the same meaning as in `get_input_dir()`. + """ + if test_class_name is None: + test_class_name = self.__class__.__name__ + if use_only_class_name: + # Use only class name. + dir_name = test_class_name + else: + # Use both class and test method. + if test_method_name is None: + test_method_name = self._testMethodName + dir_name = f"{test_class_name}.{test_method_name}" + if use_absolute_path: + # E.g., `.../dataflow/test/outcomes/TestContinuousSarimaxModel.test_compare`. + dir_name = os.path.join(self._base_dir_name, "outcomes", dir_name) + else: + # E.g., `outcomes/TestContinuousSarimaxModel.test_compare`. + dir_name = os.path.join("outcomes", dir_name) + return dir_name + + def get_input_dir( + self, + *, + use_only_test_class: bool = False, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + use_absolute_path: bool = True, + ) -> str: + """ + Return the path of the directory storing input data for this test + class. + + E.g., `TestLinearRegression1.test1`. + + :param use_only_test_class: use only the name on the test class and not of + the method. E.g., when one wants all the test methods to use a single + file for testing + :param test_class_name: `None` uses the current test class name + :param test_method_name: `None` uses the current test method name + :param use_absolute_path: use the path from the file containing the test + :return: dir name + """ + # Get the dir of the test. + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + # Add `input` to the dir. + dir_name = os.path.join(dir_name, "input") + return dir_name + + def get_output_dir( + self, + *, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + ) -> str: + """ + Return the path of the directory storing output data for this test + class. + + :param test_class_name: override the current test class name + :param test_method_name: override the current test method name + :return: dir name + """ + # The output dir is specific of this dir. + use_only_test_class = False + use_absolute_path = True + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + # Add `output` to the dir. + dir_name = os.path.join(dir_name, "output") + return dir_name + + # TODO(gp): -> get_scratch_dir(). + def get_scratch_space( + self, + *, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + use_absolute_path: bool = True, + ) -> str: + """ + Return the path of the directory storing scratch data for this test. + + The directory is also created and cleaned up based on whether + the incremental behavior is enabled or not. + """ + if self._scratch_dir is None: + # Create the dir on the first invocation on a given test. + use_only_test_class = False + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + # Add `tmp.scratch` to the dir. + dir_name = os.path.join(dir_name, "tmp.scratch") + # On the first invocation create the dir. + incremental = get_incremental_tests() + hio.create_dir(dir_name, incremental=incremental) + # Store the value. + self._scratch_dir = dir_name + return self._scratch_dir + + def get_s3_scratch_dir( + self, + *, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + ) -> str: + """ + Return the path of a directory storing scratch data on S3 for this + test. + + E.g., + s3://alphamatic-data/tmp/cache.unit_test/ + root.98e1cf5b88c3.amp.TestTestCase1.test_get_s3_scratch_dir1 + """ + # Make the path unique for the test. + use_only_test_class = False + use_absolute_path = False + test_path = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + # Make the path unique for the current user. + user_name = hsystem.get_user_name() + server_name = hsystem.get_server_name() + project_dirname = hgit.get_project_dirname() + dir_name = f"{user_name}.{server_name}.{project_dirname}" + # Assemble everything in a single path. + import helpers.hs3 as hs3 + + aws_profile = "ck" + s3_bucket = hs3.get_s3_bucket_path_unit_test(aws_profile) + scratch_dir = f"{s3_bucket}/tmp/cache.unit_test/{dir_name}.{test_path}" + return scratch_dir + + def get_s3_input_dir( + self, + *, + use_only_test_class: bool = False, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + use_absolute_path: bool = False, + ) -> str: + """ + Return the S3 path for storing input data for this test. + + :param use_only_test_class: use only the test class name, not method + :param test_class_name: override the current test class name + :param test_method_name: override the current test method name + :param use_absolute_path: use the path from the file containing the test + :return: S3 path for test input data + """ + s3_bucket = hrecouti.get_repo_config().get_unit_test_bucket_path() + hdbg.dassert_isinstance(s3_bucket, str) + # Make the path unique for the test. + test_path = self.get_input_dir( + use_only_test_class=use_only_test_class, + test_class_name=test_class_name, + test_method_name=test_method_name, + use_absolute_path=use_absolute_path, + ) + hdbg.dassert_isinstance(test_path, str) + # Assemble everything in a single path. + input_dir = os.path.join(s3_bucket, test_path) + return input_dir + + def _get_test_name(self) -> str: + """ + Return the full test name as `class.method`. + """ + return f"{self.__class__.__name__}.{self._testMethodName}" + + # /////////////////////////////////////////////////////////////////////// + + def assert_equal( + self, + actual: str, + expected: str, + *, + remove_lead_trail_empty_lines: bool = False, + dedent: bool = False, + purify_text: bool = False, + purify_expected_text: bool = False, + fuzzy_match: bool = False, + ignore_line_breaks: bool = False, + split_max_len: Optional[int] = None, + sort: bool = False, + abort_on_error: bool = True, + dst_dir: str = ".", + ) -> bool: + """ + Return if `actual` and `expected` are different and report the + difference. + + Implement a better version of `self.assertEqual()` that reports + mismatching strings with sdiff and save them to files for + further analysis with vimdiff. + + The interface is similar to `check_string()`. + """ + _LOG.debug(hprint.to_str("fuzzy_match abort_on_error dst_dir")) + hdbg.dassert_in(type(actual), (bytes, str), "actual=%s", str(actual)) + hdbg.dassert_in( + type(expected), (bytes, str), "expected=%s", str(expected) + ) + # Get the current dir name. + use_only_test_class = False + test_class_name = None + test_method_name = None + use_absolute_path = True + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + _LOG.debug("dir_name=%s", dir_name) + hio.create_dir(dir_name, incremental=True) + hdbg.dassert_path_exists(dir_name) + # + test_name = self._get_test_name() + is_equal = assert_equal( + actual, + expected, + test_name, + dir_name, + check_string=False, + remove_lead_trail_empty_lines=remove_lead_trail_empty_lines, + dedent=dedent, + purify_text=purify_text, + purify_expected_text=purify_expected_text, + fuzzy_match=fuzzy_match, + ignore_line_breaks=ignore_line_breaks, + split_max_len=split_max_len, + sort=sort, + abort_on_error=abort_on_error, + dst_dir=dst_dir, + ) + return is_equal + + def assert_dfs_close( + self, + actual: "pd.DataFrame", + expected: "pd.DataFrame", + **kwargs: Any, + ) -> None: + """ + Assert dfs have same indexes and columns and that all values are close. + + This is a more robust alternative to `compare_df()`. In + particular, it is less sensitive to floating point round-off + errors. + """ + self.assertEqual(actual.index.to_list(), expected.index.to_list()) + self.assertEqual(actual.columns.to_list(), expected.columns.to_list()) + # Often the output of a failing assertion is difficult to parse + # so we resort to our special `assert_equal()`. + if not np.allclose(actual, expected, **kwargs): + import helpers.hpandas as hpandas + + self.assert_equal( + hpandas.df_to_str(actual), hpandas.df_to_str(expected) + ) + np.testing.assert_allclose(actual, expected, **kwargs) + + # /////////////////////////////////////////////////////////////////////// + + # TODO(gp): This needs to be moved to `helper.git` and generalized. + def _git_add_file(self, file_name: str) -> None: + """ + Add to git repo `file_name`, if needed. + """ + _LOG.debug(hprint.to_str("file_name")) + if self._git_add: + # Find the file relative to here. + mode = "assert_unless_one_result" + # The problem is that when we run from an included repo, we look + # for files like: + # ``` + # helpers_root/helpers/test/outcomes/TestCheckString1.test_check_string_missing3/output/test.txt + # ``` + # but in our directory we find files like: + # ``` + # helpers/test/outcomes/TestCheckString1.test_check_string_missing3/output/test.txt + # ``` + # so we need to make the file relative to the innermost repo. + git_root = hgit.get_client_root(super_module=False) + rel_file_name = os.path.relpath(file_name, git_root) + _LOG.debug(hprint.to_str("rel_file_name")) + file_names_tmp = hgit.find_docker_file(rel_file_name, mode=mode) + hdbg.dassert_eq(len(file_names_tmp), 1) + file_name_tmp = file_names_tmp[0] + _LOG.debug(hprint.to_str("file_name_tmp")) + cmd = f"cd amp; git add -u {file_name_tmp}" + rc = hsystem.system(cmd, abort_on_error=False) + if rc: + pytest_warning( + f"Can't git add file\n'{file_name}' -> '{file_name_tmp}'\n" + "You need to git add the file manually\n", + prefix="\n", + ) + pytest_print(f"> {cmd}\n") + + def _check_string_update_outcome( + self, file_name: str, actual: str, use_gzip: bool + ) -> None: + """ + Update the golden outcome file with actual test output. + + :param file_name: path to the golden outcome file + :param actual: the actual test output to save + :param use_gzip: whether to compress the file with gzip + """ + _LOG.debug(hprint.to_str("file_name")) + hio.to_file(file_name, actual, use_gzip=use_gzip) + # Add to git repo. + self._git_add_file(file_name) + + # /////////////////////////////////////////////////////////////////////// + + def _get_golden_outcome_file_name( + self, + tag: str, + *, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + ) -> Tuple[str, str]: + """ + Get the directory and file name for the golden outcome file. + + :param tag: identifier tag for the golden outcome file + :param test_class_name: override the current test class name + :param test_method_name: override the current test method name + :return: tuple of (directory_path, file_path) + """ + # Get the current dir name. + use_only_test_class = False + use_absolute_path = True + dir_name = self._get_current_path( + use_only_test_class, + test_class_name, + test_method_name, + use_absolute_path, + ) + _LOG.debug("dir_name=%s", dir_name) + hio.create_dir(dir_name, incremental=True) + hdbg.dassert_path_exists(dir_name) + # Get the expected outcome. + file_name = ( + self.get_output_dir( + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + + f"/{tag}.txt" + ) + return dir_name, file_name + + # TODO(gp): There is a lot of similarity between `check_string()` and + # `check_df_string()` that can be factored out if we extract the code that + # reads and saves the golden file. + def check_string( + self, + actual: str, + *, + remove_lead_trail_empty_lines: bool = False, + dedent: bool = False, + purify_text: bool = False, + fuzzy_match: bool = False, + ignore_line_breaks: bool = False, + split_max_len: Optional[int] = None, + sort: bool = False, + use_gzip: bool = False, + tag: str = "test", + abort_on_error: bool = True, + action_on_missing_golden: str = _ACTION_ON_MISSING_GOLDEN, + test_class_name: Optional[str] = None, + test_method_name: Optional[str] = None, + ) -> Tuple[bool, bool, Optional[bool]]: + """ + Check the actual outcome of a test against the expected outcome + contained in the file. If `--update_outcomes` is used, updates the + golden reference file with the actual outcome. + + :param actual: actual outcome of the test + :param remove_lead_trail_empty_lines: remove leading and trailing empty + :param dedent: call `dedent` on the expected string to align it to the + beginning of the row + :param purify_text: remove some artifacts (e.g., usernames, + directories, reference to Git client) + :param fuzzy_match: ignore differences in spaces + :param ignore_line_breaks: ignore difference due to line breaks + :param split_max_len: split the string into lines of at most this length + :param sort: sort the text and then compare it. In other terms we check + whether the lines are the same although in different order + :param use_gzip: use gzip to compress/decompress the golden outcome + :param tag: tag to identify the golden outcome file + :param abort_on_error: whether to raise an exception if the outcome is + different from the golden outcome + :param action_on_missing_golden: what to do (e.g., "assert" or "update" + when the golden outcome is missing) + :param test_class_name: override the current test class name + :param test_method_name: override the current test method name + :return: outcome_updated, file_exists, is_equal + :raises: `RuntimeError` if there is a mismatch. If `abort_on_error` is False + (which should be used only for unit testing) return the result but do not + assert + """ + _LOG.debug( + hprint.to_str( + "remove_lead_trail_empty_lines dedent purify_text fuzzy_match " + "ignore_line_breaks split_max_len sort use_gzip tag " + "abort_on_error action_on_missing_golden test_class_name " + "test_method_name" + ) + ) + hdbg.dassert_in(type(actual), (bytes, str), "actual='%s'", actual) + # + dir_name, file_name = self._get_golden_outcome_file_name( + tag, + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + if use_gzip: + file_name += ".gz" + _LOG.debug("file_name=%s", file_name) + # Remove reference from the current environment. + # TODO(gp): Not sure why we purify here and not delegate to `assert_equal`. + if purify_text: + _LOG.debug("Purifying actual outcome") + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + _LOG.debug("actual=\n%s", actual) + outcome_updated = False + file_exists = os.path.exists(file_name) + _LOG.debug("file_exists=%s", file_exists) + is_equal: Optional[bool] = None + if self._update_tests: + _LOG.debug("# Update golden outcomes") + # Determine whether outcome needs to be updated. + if file_exists: + expected = hio.from_file(file_name) + is_equal = expected == actual + if not is_equal: + outcome_updated = True + else: + # The golden outcome doesn't exist. + outcome_updated = True + _LOG.debug("outcome_updated=%s", outcome_updated) + if outcome_updated: + # Update the golden outcome. + self._check_string_update_outcome(file_name, actual, use_gzip) + else: + # Check the test result. + _LOG.debug("# Check golden outcomes") + if file_exists: + # Golden outcome is available: check the actual outcome against + # the golden outcome. + expected = hio.from_file(file_name) + test_name = self._get_test_name() + is_equal = assert_equal( + actual, + expected, + test_name, + dir_name, + check_string=True, + remove_lead_trail_empty_lines=remove_lead_trail_empty_lines, + dedent=dedent, + # We have handled the purification of the output earlier. + purify_text=False, + fuzzy_match=fuzzy_match, + ignore_line_breaks=ignore_line_breaks, + split_max_len=split_max_len, + sort=sort, + abort_on_error=abort_on_error, + ) + else: + # No golden outcome available. + _LOG.warning("Can't find golden outcome file '%s'", file_name) + if action_on_missing_golden == "assert": + # Save the result to a temporary file and assert. + file_name += ".tmp" + hio.to_file(file_name, actual, use_gzip=use_gzip) + msg = ( + "The golden outcome doesn't exist: saved the actual " + f"output in '{file_name}'" + ) + _LOG.error(msg) + if abort_on_error: + hdbg.dfatal(msg) + elif action_on_missing_golden == "update": + # Create golden file and add it to the repo. + _LOG.warning("Creating the golden outcome") + outcome_updated = True + self._check_string_update_outcome( + file_name, actual, use_gzip + ) + is_equal = None + else: + hdbg.dfatal( + "Invalid action_on_missing_golden=" + + f"'{action_on_missing_golden}'" + ) + self._test_was_updated = outcome_updated + _LOG.debug(hprint.to_str("outcome_updated file_exists is_equal")) + return outcome_updated, file_exists, is_equal + + # /////////////////////////////////////////////////////////////////////// + + def _check_df_update_outcome( + self, + file_name: str, + actual: "pd.DataFrame", + ) -> None: + """ + Update the golden outcome file with actual dataframe output. + + :param file_name: path to the golden outcome file + :param actual: the actual dataframe to save + """ + _LOG.debug(hprint.to_str("file_name")) + hio.create_enclosing_dir(file_name) + actual.to_csv(file_name) + pytest_warning(f"Update golden outcome file '{file_name}'", prefix="\n") + # Add to git repo. + self._git_add_file(file_name) + + def _to_error(self, msg: str) -> None: + """ + Append error message to the accumulated error log. + + :param msg: error message to log and accumulate + """ + self._error_msg += msg + "\n" + _LOG.error(msg) + + def _check_df_compare_outcome( + self, file_name: str, actual: "pd.DataFrame", err_threshold: float + ) -> Tuple[bool, "pd.DataFrame"]: + """ + Compare actual dataframe with golden outcome from file. + + :param file_name: path to the golden outcome file + :param actual: the actual dataframe to compare + :param err_threshold: relative error threshold for numerical comparison + :return: tuple of (is_equal, expected_dataframe) + """ + _LOG.debug(hprint.to_str("file_name")) + _LOG.debug("actual_=\n%s", actual) + hdbg.dassert_lte(0, err_threshold) + hdbg.dassert_lte(err_threshold, 1.0) + # Load the expected df from file. + expected = pd.read_csv(file_name, index_col=0) + _LOG.debug("expected=\n%s", expected) + hdbg.dassert_isinstance(expected, pd.DataFrame) + ret = True + # Compare columns. + if actual.columns.tolist() != expected.columns.tolist(): + msg = f"Columns are different:\n{str(actual.columns)}\n{str(expected.columns)}" + self._to_error(msg) + ret = False + # Compare the values. + _LOG.debug("actual.shape=%s", str(actual.shape)) + _LOG.debug("expected.shape=%s", str(expected.shape)) + # From https://numpy.org/doc/stable/reference/generated/numpy.allclose.html + # absolute(a - b) <= (atol + rtol * absolute(b)) + # absolute(a - b) / absolute(b)) <= rtol + is_close = np.allclose( + actual, expected, rtol=err_threshold, equal_nan=True + ) + if not is_close: + _LOG.error("Dataframe values are not close") + if actual.shape == expected.shape: + close_mask = np.isclose(actual, expected, equal_nan=True) + # + msg = f"actual=\n{actual}" + self._to_error(msg) + # + msg = f"expected=\n{expected}" + self._to_error(msg) + # + actual_masked = np.where(close_mask, np.nan, actual) + msg = f"actual_masked=\n{actual_masked}" + self._to_error(msg) + # + expected_masked = np.where(close_mask, np.nan, expected) + msg = f"expected_masked=\n{expected_masked}" + self._to_error(msg) + # + err = np.abs((actual_masked - expected_masked) / expected_masked) + msg = f"err=\n{err}" + self._to_error(msg) + max_err = np.nanmax(np.nanmax(err)) + msg = "max_err=%.3f" % max_err + self._to_error(msg) + else: + msg = ( + "Shapes are different:\n" + f"actual.shape={str(actual.shape)}\nexpected.shape={str(expected.shape)}" + ) + self._to_error(msg) + ret = False + _LOG.debug("ret=%s", ret) + return ret, expected + + def check_dataframe( + self, + actual: "pd.DataFrame", + *, + err_threshold: float = 0.05, + dedent: bool = False, + tag: str = "test_df", + abort_on_error: bool = True, + action_on_missing_golden: str = _ACTION_ON_MISSING_GOLDEN, + ) -> Tuple[bool, bool, Optional[bool]]: + """ + Like `check_string()` but for pandas dataframes, instead of strings. + """ + _LOG.debug(hprint.to_str("err_threshold tag abort_on_error")) + hdbg.dassert_isinstance(actual, pd.DataFrame) + # + dir_name, file_name = self._get_golden_outcome_file_name(tag) + _LOG.debug("file_name=%s", file_name) + outcome_updated = False + file_exists = os.path.exists(file_name) + _LOG.debug(hprint.to_str("file_exists")) + is_equal: Optional[bool] = None + if self._update_tests: + _LOG.debug("# Update golden outcomes") + # Determine whether outcome needs to be updated. + if file_exists: + is_equal, _ = self._check_df_compare_outcome( + file_name, actual, err_threshold + ) + _LOG.debug(hprint.to_str("is_equal")) + if not is_equal: + outcome_updated = True + else: + # The golden outcome doesn't exist. + outcome_updated = True + _LOG.debug("outcome_updated=%s", outcome_updated) + if outcome_updated: + # Update the golden outcome. + self._check_df_update_outcome(file_name, actual) + else: + # Check the test result. + _LOG.debug("# Check golden outcomes") + if file_exists: + # Golden outcome is available: check the actual outcome against + # the golden outcome. + is_equal, expected = self._check_df_compare_outcome( + file_name, actual, err_threshold + ) + # If not equal, report debug information. + if not is_equal: + test_name = self._get_test_name() + assert_equal( + str(actual), + str(expected), + test_name, + dir_name, + check_string=True, + remove_lead_trail_empty_lines=False, + dedent=dedent, + purify_text=False, + fuzzy_match=False, + ignore_line_breaks=False, + split_max_len=None, + sort=False, + abort_on_error=abort_on_error, + error_msg=self._error_msg, + ) + else: + # No golden outcome available. + _LOG.warning("Can't find golden outcome file '%s'", file_name) + if action_on_missing_golden == "assert": + # Save the result to a temporary file and assert. + file_name += ".tmp" + hio.create_enclosing_dir(file_name) + actual.to_csv(file_name) + msg = ( + "The golden outcome doesn't exist: saved the actual " + f"output in '{file_name}'" + ) + _LOG.error(msg) + if abort_on_error: + hdbg.dfatal(msg) + elif action_on_missing_golden == "update": + # Create golden file and add it to the repo. + _LOG.warning("Creating the golden outcome") + outcome_updated = True + self._check_df_update_outcome(file_name, actual) + is_equal = None + else: + hdbg.dfatal( + "Invalid action_on_missing_golden=" + + f"'{action_on_missing_golden}'" + ) + self._test_was_updated = outcome_updated + # TODO(gp): Print the file with the updated test. + _LOG.debug(hprint.to_str("outcome_updated file_exists is_equal")) + return outcome_updated, file_exists, is_equal + + def check_df_output( + self, + actual_df: "pd.DataFrame", + expected_length: Optional[int], + expected_column_names: Optional[List[str]], + expected_column_unique_values: Optional[Dict[str, List[Any]]], + expected_signature: str, + ) -> None: + """ + Verify that actual outcome dataframe matches the expected one. + + :param actual_df: actual outcome dataframe + :param expected_length: expected outcome dataframe length + - If `None`, skip the check + :param expected_column_names: expected outcome dataframe column names + - If `None`, skip the check + :param expected_column_unique_values: dict of column names and unique values + that they should contain + - If `None`, skip the check + :param expected_signature: expected outcome dataframe as string + - If `__CHECK_STRING__` use the value in `self.check_string()` + """ + # TODO(Grisha): get rid of `hpandas` dependency. + import helpers.hpandas as hpandas + + hdbg.dassert_isinstance(actual_df, pd.DataFrame) + if expected_length: + # Verify that the output length is correct. + actual_length = actual_df.shape[0] + self.assert_equal(str(actual_length), str(expected_length)) + if expected_column_names: + # Verify that the column names are correct. + self.assert_equal( + str(sorted(actual_df.columns)), + str(sorted(expected_column_names)), + ) + if expected_column_unique_values: + hdbg.dassert_is_subset( + list(expected_column_unique_values.keys()), actual_df.columns + ) + # Verify that the unique values in specified columns are correct. + for column in expected_column_unique_values: + actual_one_column_unique_values = sorted( + list(actual_df[column].unique()) + ) + self.assert_equal( + str(actual_one_column_unique_values), + str(sorted(expected_column_unique_values[column])), + ) + # Build signature. + actual_signature = hpandas.df_to_str( + actual_df, + print_shape_info=True, + tag="df", + ) + _LOG.debug("\n%s", actual_signature) + # Check signature. + if expected_signature == "__CHECK_STRING__": + self.check_string(actual_signature, dedent=True, fuzzy_match=True) + else: + hdbg.dassert_isinstance(expected_signature, str) + self.assert_equal( + actual_signature, + expected_signature, + dedent=True, + fuzzy_match=True, + ) + + def check_srs_output( + self, + actual_srs: "pd.Series", + expected_length: Optional[int], + expected_unique_values: Optional[List[Any]], + expected_signature: str, + ) -> None: + """ + Verify that actual outcome series matches the expected one. + + :param actual_srs: actual outcome series + :param expected_length: expected outcome series length + - If `None`, skip the check + :param expected_unique_values: list of expected unique values in series + - If `None`, skip the check + :param expected_signature: expected outcome series as string + """ + # Import `hpandas` dynamically to exclude `pandas` from the thin client + # requirements. See CmTask6613 for details. + import helpers.hpandas as hpandas + + hdbg.dassert_isinstance(actual_srs, pd.Series) + if expected_length: + # Verify that output length is correct. + self.assert_equal(str(actual_srs.shape[0]), str(expected_length)) + if expected_unique_values: + # Verify that unique values in series are correct. + self.assert_equal( + str(sorted(list(actual_srs.unique()))), + str(sorted(expected_unique_values)), + ) + # Build signature. + actual_signature = hpandas.df_to_str(actual_srs, num_rows=None) + _LOG.debug("\n%s", actual_signature) + # Check signature. + if expected_signature == "__CHECK_STRING__": + self.check_string(actual_signature, dedent=True, fuzzy_match=True) + else: + hdbg.dassert_isinstance(expected_signature, str) + self.assert_equal( + actual_signature, + expected_signature, + dedent=True, + fuzzy_match=True, + ) + + +# ############################################################################# +# QaTestCase +# ############################################################################# + + +@pytest.mark.qa +@pytest.mark.skipif( + hserver.is_inside_docker(), reason="Test needs to be run outside Docker" +) +class QaTestCase(TestCase, abc.ABC): + """ + Use for QA to test functionalities (e.g., invoke tasks) that run the dev / + prod container. + """ + + # TODO(Grisha): Linter should not remove `pass` statement from an empty class + # DevToolsTask #476. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py new file mode 100644 index 000000000..cf429b5ac --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py @@ -0,0 +1,450 @@ +""" +Import as: + +import helpers.hunit_test_purification as huntepur +""" + +import datetime +import logging +import os +import re +from typing import List, Tuple + +import helpers.hgit as hgit +import helpers.hintrospection as hintros +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# Mute this module unless we want to debug it. +_LOG.setLevel(logging.INFO) + + +# ############################################################################# +# TextPurifier +# ############################################################################# + + +# TODO(gp): Not sure the class is really needed since now it's in a separate +# file. +class TextPurifier: + """ + A class to purify text by removing environment-specific information and + standardizing output for test comparisons. + """ + + def purify_txt_from_client(self, txt: str) -> str: + """ + Apply all purification steps to the input text. + + :param txt: input text to purify + :return: purified text + """ + # The order of substitutions is important. We want to start from the "most + # specific" (e.g., `amp/helpers/test/...`) to the "least specific" (e.g., + # `amp`). + txt = self.purify_directory_paths(txt) + txt = self.purify_from_environment(txt) + # Correct order: -> `app` -> `amp` -> + # Start with `app.amp.helpers_root.helpers...` + # After purifying app references -> `amp.helpers_root.helpers...` + # After purifying amp references -> `helpers_root.helpers...` + # + # Incorrect order: -> `amp` -> `app` -> + # Start with `amp.helpers_root.helpers...` + # After purifying `amp` references -> `app.amp.helpers_root.helpers...` + # After purifying `app` references -> `amp.helpers_root.helpers...` + # + txt = self.purify_app_references(txt) + txt = self.purify_amp_references(txt) + txt = self.purify_from_env_vars(txt) + txt = self.purify_object_representation(txt) + txt = self.purify_today_date(txt) + txt = self.purify_white_spaces(txt) + txt = self.purify_parquet_file_names(txt) + txt = self.purify_helpers(txt) + txt = self.purify_docker_image_name(txt) + return txt + + def purify_directory_paths(self, txt: str) -> str: + """ + Replace known directory paths with standardized placeholders. + + Apply replacements in this order: + 1. Replace Git root paths with `$GIT_ROOT`. + 2. Replace `CSFY_HOST_GIT_ROOT_PATH` with `$CSFY_HOST_GIT_ROOT_PATH`. + 3. Replace current working directory with `$PWD`. + + :param txt: input text that needs to be purified + :return: purified text + """ + _LOG.debug("Before: txt='\n%s'", txt) + # Collect all paths to replace with their priorities. + replacements = [] + # 1. Git root paths. + # Remove references to Git modules starting from the innermost one. + for super_module in [False, True]: + # Replace the git root path with `$GIT_ROOT`. + git_root = hgit.get_client_root(super_module=super_module) + if git_root and git_root != "/": + replacements.append((git_root, "$GIT_ROOT")) + _LOG.debug("Added git root '%s' for replacement", git_root) + else: + # Skip git root path if it is `/`. + pass + # 2. CSFY_HOST_GIT_ROOT_PATH environment variable. + # Replace the CSFY_HOST_GIT_ROOT_PATH with `$CSFY_HOST_GIT_ROOT_PATH`. + csfy_git_root = os.environ.get("CSFY_HOST_GIT_ROOT_PATH") + if csfy_git_root: + replacements.append((csfy_git_root, "$CSFY_HOST_GIT_ROOT_PATH")) + _LOG.debug( + "Added CSFY_HOST_GIT_ROOT_PATH '%s' for replacement", + csfy_git_root, + ) + # 3. Current working directory. + # Replace the path of current working directory with `$PWD`. + pwd = os.getcwd() + if pwd and pwd != "/": + replacements.append((pwd, "$PWD")) + _LOG.debug("Added PWD '%s' for replacement", pwd) + # Apply replacements in order of priority. + for path, replacement in replacements: + # Use word boundaries to avoid replacing path fragments. + # E.g., To avoid replacing `app` in `application.py`. + pattern = rf"(? str: + """ + Replace environment-specific values with placeholders. + + Perform these transformations: + 1. Replace directory paths with standardized placeholders. + 2. Replace the current user name with $USER_NAME. + 3. Handle special cases like usernames in paths and commands. + + :param txt: input text that needs to be purified + :return: purified text + """ + # Replace current username with `$USER_NAME`. + user_name = hsystem.get_user_name() + # Set a regex pattern that finds a user name surrounded by dot, dash or space. + # E.g., `IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0`, + # `--name $USER_NAME.amp_test.app.app`, `run --rm -l user=$USER_NAME`. + regex = rf"([\s\n\-\.\=]|^)+{user_name}+([.\s/-]|$)" + # Use `\1` and `\2` to preserve specific characters around `$USER_NAME`. + target = r"\1$USER_NAME\2" + txt = re.sub(regex, target, txt) + _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) + return txt + + def _apply_regex_replacements( + self, txt: str, regex_patterns: List[Tuple[str, str]] + ) -> str: + """ + Apply a series of regex replacements to text. + + :param txt: input text to process + :param regex_patterns: list of (pattern, replacement) tuples to + apply in order + :return: text with all regex replacements applied + """ + # Apply regex replacements in order. + txt_out = txt + for regex_pattern, replacement in regex_patterns: + txt_out = re.sub(regex_pattern, replacement, txt_out) + _LOG.debug( + "Applying %s -> %s: before=%s, after=%s", + regex_pattern, + replacement, + txt, + txt_out, + ) + return txt_out + + def purify_amp_references(self, txt: str) -> str: + """ + Remove references to amp from text by applying a series of regex + substitutions. + + Handle these patterns: + 1. Replace path references + - E.g., "amp/helpers/test/..." -> "helpers/test/..." + 2. Replace class references + - E.g., "" -> "" + 3. Replace comment references + - E.g., "# Test created for amp.helpers.test" -> "# Test created for helpers.test" + 4. Replace module references + - E.g., "amp.helpers.test.TestClass" -> "helpers.test.TestClass" + + :param txt: input text containing amp references + :return: text with amp references removed + """ + amp_patterns = [ + # Remove 'amp/' prefix from quoted paths. + (r"'amp/", "'"), + # Remove 'amp/' prefix from path segments. + (r"(?m)(^\s*|\s+)amp/", r"\1"), + # Replace '/amp/' with '/' and '/amp:' with ':' in paths. + (r"(?m)/amp/", "/"), + (r"(?m)/amp:", ":"), + # Remove 'amp.' prefix from class representations and tracebacks. + (r" str: + """ + Remove references to `/app` from text by applying a series of regex + substitutions. + + :param txt: input text containing app references + :return: text with app references removed + """ + app_patterns = [ + # Remove trailing '/app/' references. + (r"(? str: + """ + Replace environment variable values with their variable names. + + :param txt: input text containing environment variable values + :return: text with environment variable values replaced + """ + for env_var in [ + "CSFY_AWS_S3_BUCKET", + "CSFY_ECR_BASE_PATH", + ]: + if env_var in os.environ: + val = os.environ[env_var] + if val == "": + _LOG.debug("Env var '%s' is empty", env_var) + else: + txt = txt.replace(val, f"${env_var}") + _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) + return txt + + def purify_object_representation(self, txt: str) -> str: + """ + Remove references like `at 0x7f43493442e0`. + + :param txt: input text containing object representations + :return: text with object representations standardized + """ + object_patterns = [ + (r"at 0x[0-9A-Fa-f]+", "at 0x"), + (r" id='\d+'>", " id='xxx'>"), + (r"port=\d+", "port=xxx"), + (r"host=\S+ ", "host=xxx "), + ( + r"wall_clock_time=Timestamp\('.*?',", + r"wall_clock_time=Timestamp('xxx',", + ), + ] + txt = self._apply_regex_replacements(txt, object_patterns) + _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) + return txt + + def purify_today_date(self, txt: str) -> str: + """ + Remove today's date like `20220810`. + + :param txt: input text containing dates + :return: text with dates standardized + """ + today_date = datetime.date.today() + today_date_as_str = today_date.strftime("%Y%m%d") + # Replace predict.3.compress_tails.df_out.20220627_094500.YYYYMMDD_171106.csv.gz. + txt = re.sub( + today_date_as_str + r"_\d{6}", + "YYYYMMDD_HHMMSS", + txt, + flags=re.MULTILINE, + ) + txt = re.sub(today_date_as_str, "YYYYMMDD", txt, flags=re.MULTILINE) + return txt + + def purify_white_spaces(self, txt: str) -> str: + """ + Remove trailing white spaces. + + :param txt: input text with whitespace + :return: text with standardized whitespace + """ + txt_new = [] + for line in txt.split("\n"): + line = line.rstrip() + txt_new.append(line) + txt = "\n".join(txt_new) + return txt + + def purify_line_number(self, txt: str) -> str: + """ + Replace line number with `$LINE_NUMBER`. + + :param txt: input text containing line numbers + :return: text with line numbers standardized + """ + txt = re.sub(r"\.py::\d+", ".py::$LINE_NUMBER", txt, flags=re.MULTILINE) + return txt + + def purify_parquet_file_names(self, txt: str) -> str: + """ + Replace UUIDs file names to `data.parquet` in the golden outcomes. + + :param txt: input text containing parquet file names + :return: text with standardized parquet file names + """ + pattern = r""" + [0-9a-f]{32}-[0-9].* # GUID pattern. + (?=\.parquet) # positive lookahead assertion that matches a + # position followed by ".parquet" without + # consuming it. + """ + # TODO(Vlad): Need to change the replacement to `$FILE_NAME` as in the + # `purify_from_environment()` function. For now, some tests are expecting + # `data.parquet` files. + replacement = "data" + # flags=re.VERBOSE allows us to use whitespace and comments in the pattern. + txt = re.sub(pattern, replacement, txt, flags=re.VERBOSE) + return txt + + def purify_helpers(self, txt: str) -> str: + """ + Replace the path `helpers_root.helpers` with `helpers`. + + :param txt: input text containing helper references + :return: text with standardized helper references + """ + txt = re.sub( + r"helpers_root\.helpers\.", "helpers.", txt, flags=re.MULTILINE + ) + txt = re.sub( + r"helpers_root/helpers/", "helpers/", txt, flags=re.MULTILINE + ) + txt = re.sub( + r"helpers_root\.config_root", "config_root", txt, flags=re.MULTILINE + ) + txt = re.sub( + r"helpers_root/config_root/", "config_root/", txt, flags=re.MULTILINE + ) + txt = re.sub( + r"helpers_root/dev_scripts_helpers/", + "dev_scripts_helpers/", + txt, + flags=re.MULTILINE, + ) + return txt + + def purify_docker_image_name(self, txt: str) -> str: + """ + Remove temporary docker image name. + + :param txt: input text containing docker image names + :return: text with standardized docker image names + """ + # Purify command like: + # > docker run --rm ... tmp.latex.edb567be .. + # > ... tmp.latex.aarch64.2f590c86.2f590c86 + pattern = r""" + ^ # Start of line + ( # Start capture group 1 + .*docker.* # Any text containing "docker" + \s+ # One or more whitespace + tmp\.\S+\. # tmp.something. + ) # End capture group 1 + [a-z0-9]{8} # 8 character hex hash + ( # Start capture group 2 + \s+ # One or more whitespace + .* # Rest of the line + ) # End capture group 2 + $ # End of line + """ + txt = re.sub( + pattern, + r"\1xxxxxxxx\2", + txt, + flags=re.MULTILINE | re.VERBOSE, + ) + # Handle patterns like `tmp.latex.aarch64.2f590c86.2f590c86`. + pattern = r""" + ^ # Start of line + ( # Start capture group 1 + .*docker.* # Any text containing "docker" + \s+ # One or more whitespace + tmp\.\S+\.\S+\. # tmp.something.something. + ) # End capture group 1 + [a-z0-9]{8} # 8 character hex hash + \. # Literal dot + [a-z0-9]{8} # Another 8 character hex hash + ( # Start capture group 2 + \s+ # One or more whitespace + .* # Rest of the line + ) # End capture group 2 + $ # End of line + """ + txt = re.sub( + pattern, + r"\1xxxxxxxx\2", + txt, + flags=re.MULTILINE | re.VERBOSE, + ) + return txt + + def purify_file_names(self, file_names: List[str]) -> List[str]: + """ + Express file names in terms of the root of git repo, removing reference + to `amp`. + """ + git_root = hgit.get_client_root(super_module=True) + file_names = [os.path.relpath(f, git_root) for f in file_names] + # Apply amp reference purification to file paths. + file_names = list(map(self.purify_amp_references, file_names)) + return file_names + + +def purify_text(txt: str) -> str: + """ + Purify text by removing environment-specific information and standardizing + output for test comparisons. + """ + purifier = TextPurifier() + return purifier.purify_txt_from_client(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py new file mode 100644 index 000000000..5d00c50ad --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py @@ -0,0 +1,658 @@ +""" +Import as: + +import helpers.hunit_test_utils as hunteuti +""" + +import abc +import contextlib +import glob +import logging +import os +import re +from typing import Any, Dict, Generator, List, Optional, Tuple +import unittest.mock as mock + +import pytest + +import helpers.hdbg as hdbg +import helpers.henv as henv +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hserver as hserver +import helpers.hstring as hstring +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def get_test_directories(root_dir: str) -> List[str]: + """ + Get paths of all the directories that contain unit tests. + + :param root_dir: the dir to start the search from, e.g. + `/src/cmamp1/helpers` + :return: paths of test directories + """ + paths = [] + for path, _, _ in os.walk(root_dir): + # Iterate over the paths to find the test directories. + if path.endswith("/test"): + paths.append(path) + hdbg.dassert_lte(1, len(paths)) + return paths + + +# ############################################################################# +# UnitTestRenamer +# ############################################################################# + + +class UnitTestRenamer: + """ + Rename a unit test in Python code and the corresponding directories + containing the inputs and the expected outputs. + """ + + @staticmethod + def _check_names(old_test_name: str, new_test_name: str) -> None: + """ + Check if the test names are valid. + + :param old_test_name: the old name of the test + :param new_test_name: the new name of the test + """ + # Assert if the classname does not start with `Test`. + for name in [old_test_name, new_test_name]: + hdbg.dassert( + name.startswith("Test"), + "Invalid test_class_name='%s'. A test class should start with `Test`", + name, + ) + # Assert if the names are the same. + hdbg.dassert_ne(old_test_name, new_test_name) + + @staticmethod + def _process_parameters( + old_test_name: str, + new_test_name: str, + ) -> Dict[str, str]: + """ + Build the processing config with the renaming parameters. + + :param old_test_name: the old name of the test + :param new_test_name: the new name of the test + :return: config for renaming process, i.e. a dictionary which includes the fields: + - `old_class`: old name of the class + - `new_class`: new name of the class + - `old_method`: new name of the method. If empty, only class should be renamed + - `new_method`: new name of the method + """ + # Build the processing config. + config: Dict[str, str] = {} + # Split by "." to separate class name and method name. + split_old_name = old_test_name.split(".") + split_new_name = new_test_name.split(".") + # Check the consistency of the names - they should have the same length. + hdbg.dassert_eq( + len(split_old_name), + len(split_new_name), + "The test names are not consistent; one has a method and the other does not.", + ) + # Check the format of the test name. + hdbg.dassert_in( + len(split_old_name), + [1, 2], + msg="Wrong test name format: it must contain no more than 1 dot", + ) + if len(split_old_name) == 1: + # Class name split by `.` is one element array, e.g. `["TestClassName"]`. + old_class_name, old_method_name = split_old_name[0], "" + new_class_name, new_method_name = split_new_name[0], "" + _LOG.debug( + "Trying to change the name of `{old_test_name}` unit test class to `%s`.", + new_test_name, + ) + else: + # Method name split by `.` is 2 element array, e.g. + # TestClassName.test2` - >`["TestClassName", "test2"]`. + old_class_name, old_method_name = split_old_name + new_class_name, new_method_name = split_new_name + hdbg.dassert_eq( + old_class_name, + new_class_name, + "To change the name of the method, specify the methods of the \ + same class. E.g. `--old TestCache.test1 --new TestCache.new_test1`", + ) + _LOG.debug( + "Trying to change the name of `%s` method of `%s` class to `%s`.", + old_method_name, + old_class_name, + new_method_name, + ) + # Fill the processing parameters. + config["old_class"] = old_class_name + config["old_method"] = old_method_name + config["new_class"] = new_class_name + config["new_method"] = new_method_name + return config + + def __init__( + self, old_test_name: str, new_test_name: str, root_dir: str + ) -> None: + """ + Construct the UnitTestRenamer. + + :param old_test_name: the old name of the test + :param new_test_name: the new name of the test + :param root_dir: the directory to start the search from + """ + # Check if the names of the test are valid. + self._check_names(old_test_name, new_test_name) + # Get the directories containing tests. + self.test_dirs = get_test_directories(root_dir) + # Construct the renaming config. + self.cfg = self._process_parameters(old_test_name, new_test_name) + + def _rename_class( + self, + content: str, + ) -> Tuple[str, int]: + """ + Rename a class in a Python file. + + :param content: the content of the file + :return: the content of the file with the class name replaced, + the number of substitutions replaced + """ + lines = content.split("\n") + docstring_line_indices = hstring.get_docstring_line_indices(lines) + num_replaced = 0 + for ind, line in enumerate(lines): + # Skip if the line is inside a docstring. + if ind not in docstring_line_indices: + # Rename the class. + new_line, num_replaced = re.subn( + rf"class {self.cfg['old_class']}\(", + rf"class {self.cfg['new_class']}(", + line, + ) + if num_replaced != 0: + lines[ind] = new_line + break + content = "\n".join(lines) + return content, num_replaced + + def _rename_method( + self, + content: str, + ) -> Tuple[str, int]: + """ + Rename the method of the class. + + :param content: the content of the file + :return: content of the file with the method renamed, the number + of substitutions made + """ + lines = content.split("\n") + # Flag that informs if the class border was found. + class_found = False + # The number of substitutions made in the content of the file. + num_replaced = 0 + class_pattern = rf"class {self.cfg['old_class']}\(" + method_pattern = rf"def {self.cfg['old_method']}\(" + docstring_line_indices = hstring.get_docstring_line_indices(lines) + for ind, line in enumerate(lines): + # Iterate over the lines of the file to find the specific method of the + # class that should be renamed. + # Skip if the line is inside a docstring. + if class_found and ind not in docstring_line_indices: + if line.startswith("class"): + # Break if the next class started and the method was not found. + break + # Rename the method. + new_line, num_replaced = re.subn( + method_pattern, f"def {self.cfg['new_method']}(", line + ) + if num_replaced != 0: + # Replace the line with method definition. + lines[ind] = new_line + break + else: + if re.search(class_pattern, line): + class_found = True + new_content = "\n".join(lines) + return new_content, num_replaced + + def _rename_in_file( + self, + test_dir: str, + file_path: str, + ) -> None: + """ + Process the file: + + - check if the content of the file contains target class + - change the class name, e.g. `TestClassName` -> `TestClassNameNew` + / change the method name `TestClassName.test2` -> `TestClassName.test_new` + - rename the outcomes if they exist + + :param test_dir: the path to the test directory containing the file, e.g. + `/src/cmamp1/helpers/test` + :param file_path: the path to the file, `/src/cmamp1/helpers/test/test_lib_tasks.py` + """ + content = hio.from_file(file_path) + if not re.search(rf"class {self.cfg['old_class']}\(", content): + # Return if target test class does not appear in file content. + return + if self.cfg["old_method"] == "": + # Rename the class. + content, n_replaced = self._rename_class(content) + if n_replaced != 0: + _LOG.info( + "%s: class `%s` was renamed to `%s`.", + file_path, + self.cfg["old_class"], + self.cfg["new_class"], + ) + else: + # Rename the method of the class. + content, n_replaced = self._rename_method(content) + if n_replaced != 0: + _LOG.info( + "%s: method `%s` of `%s` class was renamed to `%s`.", + file_path, + self.cfg["old_method"], + self.cfg["old_class"], + self.cfg["new_method"], + ) + # Rename the directories that contain target test outcomes. + self.rename_outcomes( + test_dir, + ) + # Write processed content back to file. + hio.to_file(file_path, content) + + def run(self) -> None: + """ + Run the renamer tool on the files under `root_dir`. + """ + # Iterate over test directories. + for path in self.test_dirs: + # Get all Python test files from this directory. + _LOG.debug("Scanning `%s` directory.", path) + search_pattern = os.path.join(path, "test_*.py") + files = glob.glob(search_pattern) + for test_file in files: + self._rename_in_file( + path, + test_file, + ) + + @staticmethod + def _rename_directory(outcome_path_old: str, outcome_path_new: str) -> None: + """ + Rename the outcomes directory and add it to git. + + :param outcome_path_old: the old name of outcome directory, e.g. + `/src/cmamp1/helpers/test/outcomes/TestRename.test_old` + :param outcome_path_new: the new name of outcome directory, e.g. + `/src/cmamp1/helpers/test/outcomes/TestRename.test_new` + """ + cmd = f"mv {outcome_path_old} {outcome_path_new}" + # Rename the directory. + rc = hsystem.system(cmd, abort_on_error=True, suppress_output=False) + _LOG.info( + "Renaming `%s` directory to `%s`. Output log: %s", + outcome_path_old, + outcome_path_new, + rc, + ) + # Add to git new outcome directory and remove the old one. + # The sequence of commands is used because `git mv` does not work + # properly while unit testing. + cmd = f"git add {outcome_path_new} && git rm -r {outcome_path_old}" + hsystem.system(cmd, abort_on_error=True, suppress_output=False) + + def _process_outcomes_dir( + self, outcome_dir: str, outcomes_path: str + ) -> bool: + """ + Process the directory containing target test outcomes. + + The stages of processing are: + - generate the new name of the directory + - rename and add it to git + + :param outcome_dir: the name of the directory containing the outcomes + :param outcomes_path: the path to the outcomes directory + :return: if the outcomes were renamed + """ + # Contruct the path to outcomes directory. + outcome_path_old = os.path.join(outcomes_path, outcome_dir) + # Construct old and new target dir names, e.g. + # `TestOldName.` and `TestNewName.` if class should be renamed or + # `TestOldName.test_old` and `TestOldName.test_new` if method should be renamed. + old_target = ".".join([self.cfg["old_class"], self.cfg["old_method"]]) + new_target = ".".join([self.cfg["new_class"], self.cfg["new_method"]]) + if self.cfg["old_method"] == "" and outcome_dir.startswith(old_target): + # Check if the class should be renamed, e.g. + # if `outcome_dir` is `TestOld.test1` and `old_target` is `TestOld.`. + # Split old directory name - the part before "." is the class name. + class_method = outcome_dir.split(".") + # Replace old class name with the new one, `["TestOld", "test1"]` + # -> `["TestNew", "test1"]`. + class_method[0] = self.cfg["new_class"] + # Construct the new outcome directory name -> `TestNew.test1`. + outcome_name_new = ".".join(class_method) + outcome_path_new = os.path.join(outcomes_path, outcome_name_new) + elif self.cfg["old_method"] != "" and outcome_dir == old_target: + # Check if the dir should be renamed. E.g. given that `old_target` + # is `TestOld.test1_new`, then if `outcome_dir` is `TestOld.test1`, + # it should not be renamed, and if `outcome_dir` is `TestOld.test1_new`, + # it should be renamed. + outcome_path_new = os.path.join(outcomes_path, new_target) + else: + return False + # Rename the directory and add it to git. + self._rename_directory(outcome_path_old, outcome_path_new) + return True + + def rename_outcomes( + self, + path: str, + ) -> None: + """ + Rename the directory that contains test outcomes. + + :param path: the path to the test directory, e.g. + `cmamp1/helpers/test/` + """ + outcomes_path = os.path.join(path, "outcomes") + dir_items = os.listdir(outcomes_path) + # Get the list of outcomes directories. + outcomes = [ + dir_name + for dir_name in dir_items + if os.path.isdir(os.path.join(outcomes_path, dir_name)) + ] + renamed = False + for outcome_dir in outcomes: + renamed = self._process_outcomes_dir(outcome_dir, outcomes_path) + if not renamed: + _LOG.info( + "No outcomes for `%s` were found in `%s`.", + self.cfg["old_class"], + outcomes_path, + ) + + +# ############################################################################# +# Obj_to_str_TestCase +# ############################################################################# + + +class Obj_to_str_TestCase(abc.ABC): + """ + Test case for testing `obj_to_str()` and `obj_to_repr()`. + """ + + def helper(self, obj: Any, method_name: str, expected_str: str) -> None: + """ + Common method for testing `__repr__` and `__str__`. + """ + hdbg.dassert_is_not(obj, None) + actual_str = getattr(obj, method_name)() + self.assert_equal( # type: ignore + actual_str, expected_str, purify_text=True, fuzzy_match=True + ) + + def run_test_repr(self, obj: Any, expected_str: str) -> None: + """ + Check that `__repr__` is printed correctly. + """ + method_name = "__repr__" + self.helper(obj, method_name, expected_str) + + def run_test_str(self, obj: Any, expected_str: str) -> None: + """ + Check that `__str__` is printed correctly. + """ + method_name = "__str__" + self.helper(obj, method_name, expected_str) + + def run_test_to_config_str(self, obj: Any, expected_str: str) -> None: + """ + Check that `to_config_str()` is printed correctly. + """ + method_name = "to_config_str" + self.helper(obj, method_name, expected_str) + + +# ############################################################################# + + +def _get_repo_short_name() -> str: + dir_name = "." + include_host_name = False + repo_name = hgit.get_repo_full_name_from_dirname(dir_name, include_host_name) + _LOG.debug("repo_name=%s", repo_name) + # ck/cmamp + short_repo_name = repo_name.split("/")[1] + _LOG.debug("short_repo_name=%s", short_repo_name) + return short_repo_name + + +def execute_only_in_target_repo(target_name: str) -> None: + repo_short_name = _get_repo_short_name() + if repo_short_name != target_name: + pytest.skip(f"Only run on {target_name} and not {repo_short_name}") + + +# TODO(gp): Remove and use pytest.skipif(). +def execute_only_on_ci() -> None: + is_inside_ci_ = hserver.is_inside_ci() + if not is_inside_ci_: + pytest.skip("Only run in CI") + + +def execute_only_on_dev4() -> None: + is_dev4_ = hserver.is_dev4() + if not is_dev4_: + pytest.skip("Only run on dev4") + + +def execute_only_on_dev_csfy() -> None: + is_dev_csfy_ = hserver.is_dev_csfy() + if not is_dev_csfy_: + pytest.skip("Only run on dev CSFY") + + +def execute_only_on_mac(*, version: Optional[str] = None) -> None: + is_host_mac_ = hserver.is_host_mac() + if version: + is_host_mac_ = hserver.is_host_mac_version(version) + if not is_host_mac_: + pytest.skip(f"Only run on Mac with version={version}") + + +def check_env_to_str( + self_: Any, expected: str, *, skip_secrets_vars: bool = False +) -> None: + actual = henv.env_to_str(system_signature=False) + actual = hunitest.filter_text("get_name", actual) + actual = hunitest.filter_text("get_repo_map", actual) + actual = hunitest.filter_text("CSFY_HOST_", actual) + if skip_secrets_vars: + # TODO(gp): Difference between amp and cmamp. + actual = hunitest.filter_text( + "AM_AWS_|CSFY_AWS_|GH_ACTION_ACCESS_TOKEN", actual + ) + self_.assert_equal(actual, expected, fuzzy_match=True, purify_text=True) + + +def is_test_file(file_path: str) -> bool: + """ + Check if a file is a test file. + + A file is considered a test file if: + - It contains "/test/" in its path, OR + - Its basename starts with "test_", OR + - Its basename ends with "_test.py" + + :param file_path: path to check + :return: True if file_path is a test file, False otherwise + """ + return ( + "/test/" in file_path + or file_path.split("/")[-1].startswith("test_") + or file_path.endswith("_test.py") + ) + + +def get_test_file_for_source(source_file: str) -> Optional[str]: + """ + Map a source Python file to its corresponding test file. + + E.g., helpers/hdbg.py -> helpers/test/test_hdbg.py + + :param source_file: path to a source Python file + :return: path to corresponding test file if it exists and source is not + already a test file; None otherwise + """ + if is_test_file(source_file): + return None + base_name = os.path.basename(source_file) + dir_name = os.path.dirname(source_file) + test_file = os.path.join(dir_name, "test", f"test_{base_name}") + if os.path.exists(test_file): + return test_file + return None + + +def get_test_files_for_sources(files: List[str]) -> List[str]: + """ + Map a list of source files to their corresponding test files. + + Filters out test files from the input list, then maps each source file + to its corresponding test file using `get_test_file_for_source`. + + :param files: list of file paths (may include both source and test files) + :return: list of test files that exist for the source files + """ + source_files = [f for f in files if not is_test_file(f)] + test_files = [] + for file_path in source_files: + test_file = get_test_file_for_source(file_path) + if test_file: + test_files.append(test_file) + return test_files + + +def get_parent_dirs(files: List[str]) -> List[str]: + """ + Get the minimal set of parent directories that contain all given files. + + Extracts the parent directory of each file, removes duplicates, and then + removes any directory that is a subdirectory of another directory in the + set. Files at the root level (with empty parent dir) are assigned to ".". + + Example: + Input: ["dev_scripts_helpers/scraping/process_hn_article.py", + "dev_scripts_helpers/scraping/test/__init__.py", + "helpers/hgit.py", + "helpers/lib_tasks_utils.py"] + Output: ["dev_scripts_helpers/scraping", "helpers"] + + :param files: list of file paths + :return: list of minimal parent directories + """ + if not files: + return [] + dirs = set() + for file_path in files: + dir_path = os.path.dirname(file_path) + if not dir_path: + dir_path = "." + dirs.add(dir_path) + dirs = sorted(dirs) + minimal_dirs = [] + for d in dirs: + is_subdir = False + for other_d in dirs: + if d != other_d and d.startswith(other_d + "/"): + is_subdir = True + break + if not is_subdir: + minimal_dirs.append(d) + return minimal_dirs + + +# ############################################################################# +# System call capture utilities +# ############################################################################# + + +@contextlib.contextmanager +def capture_system_calls( + side_effect: Optional[Any] = None, +) -> Generator[List[Dict[str, Any]], None, None]: + """ + Context manager that captures all system calls to `subprocess.run()` and + `hsystem._system()`, returning them as a list of invocations. + + Each invocation is a dict with 'function', 'args', and 'kwargs' keys. + + :param side_effect: Exception or return value to use for mocked calls + :return: List of invocations, each as {'function': str, 'args': tuple, + 'kwargs': dict} + + Example: + ``` + with capture_system_calls() as invocations: + my_function() + # Check captured calls. + assert len(invocations) == 1 + assert invocations[0]['function'] == 'subprocess.run' + ``` + """ + invocations: List[Dict[str, Any]] = [] + + def mock_subprocess_run(*args: Any, **kwargs: Any) -> Any: + invocations.append( + { + "function": "subprocess.run", + "args": args, + "kwargs": kwargs, + } + ) + if side_effect is not None: + if isinstance(side_effect, type) and issubclass( + side_effect, BaseException + ): + raise side_effect() + elif isinstance(side_effect, BaseException): + raise side_effect + return None + + def mock_hsystem(*args: Any, **kwargs: Any) -> Any: + invocations.append( + { + "function": "hsystem._system", + "args": args, + "kwargs": kwargs, + } + ) + if side_effect is not None: + if isinstance(side_effect, type) and issubclass( + side_effect, BaseException + ): + raise side_effect() + elif isinstance(side_effect, BaseException): + raise side_effect + return (0, "") # Return code and output + + with mock.patch("subprocess.run", side_effect=mock_subprocess_run): + with mock.patch("helpers.hsystem._system", side_effect=mock_hsystem): + yield invocations diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py new file mode 100644 index 000000000..18aea68c5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py @@ -0,0 +1,300 @@ +""" +Import as: + +import helpers.hversion as hversio +""" + +# This code implements version control for code +# The code version is used in two circumstances: +# 1) when any code using `hdbg.py` (which is included everywhere) starts in +# order to verify that the running code and the container in which the code +# is running are compatible +# 2) when a container is built to know what version of the code was used to build +# it + +import functools +import logging +import os +import re +from typing import List, Optional, cast + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem + +# This module can depend only on: +# - Python standard modules +# - a few helpers as described in `helpers/dependencies.txt` + +_LOG = logging.getLogger(__name__) + + +_INFO = "\033[36mINFO\033[0m" +_WARNING = "\033[33mWARNING\033[0m" +_ERROR = "\033[31mERROR\033[0m" +# +_VERSION_RE = r"\d+\.\d+\.\d+" + + +# Copied from helpers.hgit to avoid circular dependencies. + + +@functools.lru_cache() +def _is_inside_submodule(git_dir: str = ".") -> bool: + """ + Return whether a dir is inside a Git submodule or a Git supermodule. + + We determine this checking if the current Git repo is included + inside another Git repo. + """ + cmd = [] + # - Find the git root of the current directory + # - Check if the dir one level up is a valid Git repo + # Go to the dir. + cmd.append(f"cd {git_dir}") + # > cd im/ + # > git rev-parse --show-toplevel + # /Users/saggese/src/.../amp + cmd.append('cd "$(git rev-parse --show-toplevel)/.."') + # > git rev-parse --is-inside-work-tree + # true + cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") + cmd_as_str = " && ".join(cmd) + rc = hsystem.system(cmd_as_str, abort_on_error=False) + ret: bool = rc == 0 + return ret + + +@functools.lru_cache() +def _get_client_root(super_module: bool) -> str: + """ + Return the full path of the root of the Git client. + + E.g., `/Users/saggese/src/.../amp`. + + :param super_module: if True use the root of the Git super_module, + if we are in a submodule. Otherwise use the Git sub_module root + """ + if super_module and _is_inside_submodule(): + # https://stackoverflow.com/questions/957928 + # > cd /Users/saggese/src/.../amp + # > git rev-parse --show-superproject-working-tree + # /Users/saggese/src/... + cmd = "git rev-parse --show-superproject-working-tree" + else: + # > git rev-parse --show-toplevel + # /Users/saggese/src/.../amp + cmd = "git rev-parse --show-toplevel" + # TODO(gp): Use system_to_one_line(). + _, out = hsystem.system_to_string(cmd) + out = out.rstrip("\n") + hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") + client_root: str = os.path.realpath(out) + return client_root + + +# End copy. + + +def get_changelog_version( + container_dir_name: str, *, file_name: str = None +) -> Optional[str]: + """ + Return latest version from changelog.txt file. + + :param container_dir_name: container directory relative to the root + directory + :param file_name: changelog file name + """ + version: Optional[str] = None + supermodule = True + root_dir = _get_client_root(supermodule) + # Note: for `amp` as submodule one should pass `container_dir_name` relative + # to the root, e.g., `amp/optimizer` and not just `optimizer`. + hdbg.dassert_ne(container_dir_name, "") + if file_name is None: + file_name = "changelog.txt" + changelog_file = os.path.join(root_dir, container_dir_name, file_name) + hdbg.dassert_file_exists(changelog_file) + changelog = hio.from_file(changelog_file) + match = re.search(_VERSION_RE, changelog) + if match: + version = match.group() + return version + + +def get_container_version() -> Optional[str]: + """ + Return the container version. + + :return: container code version from the env var + """ + container_version: Optional[str] = None + if hserver.is_inside_docker(): + env_var = "AM_CONTAINER_VERSION" + if env_var not in os.environ: + # This can happen when GH Actions pull the image using invoke + # inside their container (but not inside ours), thus there is no + # AM_CONTAINER_VERSION. + print( + _WARNING + + f": The env var {env_var} should be defined when running inside a" + " container" + ) + else: + # We are running inside a container. + # Keep the code and the container in sync by versioning both and + # requiring to be the same. + container_version = os.environ["AM_CONTAINER_VERSION"] + return container_version + + +def _check_version(code_version: str, container_version: str) -> bool: + """ + Check whether the code version and the container version are the same. + + :param code_version: code version from the changelog + :param container_version: container code version from the env var + :return: whether the versions are the same or not + """ + # Since the code version from the changelog is extracted with the + # `_VERSION_RE` regex, we apply the same regex to the container version + # to keep the representations comparable. + match = re.search(_VERSION_RE, container_version) + hdbg.dassert( + match, + ( + "Invalid format of the container code version '%s'; " + "it should contain a number like '1.0.0'" + ), + container_version, + ) + container_version = match.group() # type: ignore + # Check if the versions are the same. + is_ok = container_version == code_version + if not is_ok: + msg = f""" + ----------------------------------------------------------------------------- + This code is not in sync with the container: + code_version='{code_version}' != container_version='{container_version}' + ----------------------------------------------------------------------------- + You need to: + - merge origin/master into your branch with `invoke git_merge_master` + - pull the latest container with `invoke docker_pull` + """ + msg = hprint.dedent(msg) + # Highlight in red. + # TODO(gp): Use the proper function, if dependencies allow it. + msg = f"\033[31m{msg}\033[0m" + print(msg) + if False: + raise RuntimeError(msg) + return is_ok + + +def check_version(container_dir_name: str) -> None: + """ + Check that the code and container code have compatible version, otherwise + raises `RuntimeError`. + + :param container_dir_name: container directory relative to the root + directory + """ + # TODO(gp): -> CK_SKIP_VERSION_CHECK. + if "SKIP_VERSION_CHECK" in os.environ: + # Skip the check altogether. + return + # Get code version. + code_version = get_changelog_version(container_dir_name) + container_version = get_container_version() + # Check version, if possible. + if container_version is None: + # No need to check. + return + code_version = cast(str, code_version) + _check_version(code_version, container_version) + + +def get_latest_changelog_entry( + changelog_path: str, +) -> dict: + """ + Parse the latest changelog entry from a changelog file. + + :param changelog_path: path to the changelog.txt file + :return: dict with keys: 'version', 'date', 'changes' (list of + change lines) + """ + hdbg.dassert_file_exists(changelog_path) + changelog = hio.from_file(changelog_path) + lines = changelog.split("\n") + version = None + date = None + changes = [] + in_entry = False + for line in lines: + line = line.rstrip() + # Check for version header (e.g., "# csfy-2.2.0"). + version_match = re.match(r"^#\s+(.+)$", line) + if version_match: + if version is None: + # This is the first (latest) entry. + version = version_match.group(1) + in_entry = True + else: + # We've reached the next entry, stop. + break + elif in_entry: + # Check for date (e.g., "- 2025-10-06"). + date_match = re.match(r"^-\s+(\d{4}-\d{2}-\d{2})$", line) + if date_match and date is None: + date = date_match.group(1) + # Collect change lines. + elif line.startswith("- ") and not date_match: + changes.append(line) + return {"version": version, "date": date, "changes": changes} + + +def bump_version(version: str, *, bump_type: str = "minor") -> str: + """ + Bump a semantic version number. + + :param version: version string in format X.Y.Z (e.g., "2.2.0") + :param bump_type: type of version bump - "major", "minor", or "patch" + :return: bumped version string + """ + hdbg.dassert_in(bump_type, ("major", "minor", "patch")) + # Parse version using regex. + match = re.match(r"^(\d+)\.(\d+)\.(\d+)$", version) + hdbg.dassert( + match, + f"Invalid version format: '{version}'. Expected X.Y.Z format.", + ) + major, minor, patch = map(int, match.groups()) + # Bump according to type. + if bump_type == "major": + major += 1 + minor = 0 + patch = 0 + elif bump_type == "minor": + minor += 1 + patch = 0 + else: # patch + patch += 1 + return f"{major}.{minor}.{patch}" + + +def get_container_version_info() -> str: + txt_tmp: List[str] = [] + # + container_version = str(get_container_version()) + txt_tmp.append(f"container_version='{container_version}'") + # + container_dir_name = "." + changelog_version = str(get_changelog_version(container_dir_name)) + txt_tmp.append(f"changelog_version='{changelog_version}'") + # + txt = hprint.to_info("Container version", txt_tmp) + return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py new file mode 100644 index 000000000..ea8392f6e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py @@ -0,0 +1,125 @@ +""" +Import as: + +import helpers.hwall_clock_time as hwacltim +""" + +# This should have no dependencies besides Python standard libraries since it's used +# in `helpers/hlogging.py`. + +import datetime +import logging +from typing import Callable, Optional, Union + +_LOG = logging.getLogger(__name__) + +# ############################################################################# +# Simulated real time +# ############################################################################# + +# Copied from `helpers/hdatetime.py` +# +# Function returning the current (true, replayed, simulated) wall-clock time as a +# timestamp. +_GetWallClockTime = Callable[[], "pd.Timestamp"] # noqa: F821 + +_get_wall_clock_time_func: Optional[_GetWallClockTime] = None + + +def set_wall_clock_time(get_wall_clock_time_func_: _GetWallClockTime) -> None: + """ + Set the global function to retrieve the wall clock time. + """ + assert callable(get_wall_clock_time_func_) + global _get_wall_clock_time_func + _get_wall_clock_time_func = get_wall_clock_time_func_ + + +def get_wall_clock_time_func() -> Optional[_GetWallClockTime]: + """ + Retrieve the global function retrieve the wall clock time. + """ + return _get_wall_clock_time_func + + +# We don't want to import `Pandas` just for a type. +def get_wall_clock_time() -> Optional["pd.Timestamp"]: # noqa: F821 + """ + Return the wall clock time (according to the set function) or `None` if no + function was set. + """ + func = _get_wall_clock_time_func + if func is None: + timestamp = None + else: + timestamp = func() + return timestamp + + +# ############################################################################# +# Real-world / machine real time. +# ############################################################################# + + +# TODO(Sameep): Redundant fuction replace by `hdatetime.timestamp_to_str()`. +def to_timestamp_str( + timestamp: "pd.Timestamp", # noqa: F821 + include_msec: bool = False, +) -> str: + if include_msec: + # Chop the last 4 miliseconds digits. This is needed for CcxtBroker_v2. + return timestamp.strftime("%Y%m%d_%H%M%S%f")[:-4] + else: + return timestamp.strftime("%Y%m%d_%H%M%S") + + +# This is redundant with `hdatetime.get_current_time()` and +# `hdateti.get_current_timestamp_as_string()` but we keep them to simplify +# dependencies. +def get_machine_wall_clock_time( + *, + as_str: bool = False, + include_msec: bool = False, +) -> Union[str, datetime.datetime]: + ret = datetime.datetime.utcnow() + if as_str: + ret = to_timestamp_str(ret, include_msec) + return ret + + +# ############################################################################# +# Current bar being processed. +# ############################################################################# + + +_CURR_BAR_TIMESTAMP: Optional["pd.Timestamp"] = None # noqa: F821 + + +def reset_current_bar_timestamp() -> None: + global _CURR_BAR_TIMESTAMP + _LOG.debug("Reset") + _CURR_BAR_TIMESTAMP = None + + +def set_current_bar_timestamp(timestamp: "pd.Timestamp") -> None: # noqa: F821 + _LOG.debug("timestamp=%s", timestamp) + global _CURR_BAR_TIMESTAMP + if _CURR_BAR_TIMESTAMP is not None: + # TODO(Grisha): should we relax the check by using + # `<=` instead of `<`? + assert _CURR_BAR_TIMESTAMP < timestamp, ( + "Bar timestamp can only move forward: " + + f"{_CURR_BAR_TIMESTAMP} <= {timestamp}" + ) + _CURR_BAR_TIMESTAMP = timestamp + + +def get_current_bar_timestamp( + *, + as_str: bool = False, + include_msec: bool = False, +) -> Optional[Union[str, "pd.Timestamp"]]: # noqa: F821 + ret = _CURR_BAR_TIMESTAMP + if _CURR_BAR_TIMESTAMP and as_str: + ret = to_timestamp_str(ret, include_msec=include_msec) + return ret diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py new file mode 100644 index 000000000..4f740f572 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py @@ -0,0 +1,156 @@ +""" +When this module is imported certain annoying warnings are disabled. + +Import as: + +import helpers.hwarnings as hwarnin +""" + +if False: + _WARNING = "\033[33mWARNING\033[0m" + print(f"{_WARNING}: Disabling annoying warnings") + +# Avoid dependency from other `helpers` modules, such as `helpers.hprint`, to +# prevent import cycles. + +import warnings + +# From https://docs.python.org/3/library/warnings.html + +# TODO(gp): For some reason "once" doesn't work, so we ignore all of the warnings. +action = "ignore" + +try: + import statsmodels # noqa: F401 + + _HAS_STATSMODELS = True +except ImportError: + _HAS_STATSMODELS = False + + +if _HAS_STATSMODELS: + # /venv/lib/python3.8/site-packages/statsmodels/tsa/stattools.py:1910: + # InterpolationWarning: The test statistic is outside of the range of p-values + # available in the look-up table. The actual p-value is greater than the + # p-value returned. + from statsmodels.tools.sm_exceptions import InterpolationWarning + + # warnings.simplefilter("ignore", category=InterpolationWarning) + + # /venv/lib/python3.8/site-packages/statsmodels/tsa/stattools.py:1906: + # InterpolationWarning: The test statistic is outside of the range of p-values + # available in the look-up table. The actual p-value is smaller than the + # p-value returned. + warnings.filterwarnings( + action, + category=InterpolationWarning, + module=".*statsmodels.*", + lineno=1906, + append=False, + ) + + warnings.filterwarnings( + action, + category=InterpolationWarning, + module=".*statsmodels.*", + lineno=1910, + append=False, + ) + + +# /venv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: +# DeprecationWarning: `should_run_async` will not call `transform_cell` +# automatically in the future. Please pass the result to `transformed_cell` +# argument and any exception that happen during thetransform in +# `preprocessing_exc_tuple` in IPython 7.17 and above. +# and should_run_async(code) +warnings.filterwarnings( + action, + category=DeprecationWarning, + module=".*ipykernel.*", + lineno=283, + append=False, +) + + +# TODO(gp): Add this TqdmExperimentalWarning + +try: + import pandas as pd + + _HAS_PANDAS = True +except ImportError: + _HAS_PANDAS = False + + +if _HAS_PANDAS: + pd.set_option("mode.chained_assignment", None) + # TODO(gp): We should fix the issues and re-enable. + # See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy + # row["net_cost"] -= cost + # /app/amp/oms/order_processing/order_processor.py:376: SettingWithCopyWarning: + # A value is trying to be set on a copy of a slice from a DataFrame + + # /venv/lib/python3.8/site-packages/pandas/io/sql.py:761: UserWarning: pandas + # only support SQLAlchemy connectable(engine/connection) ordatabase string URI or + # sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider + # using SQLAlchemy + # + # This seems a false alarm: + # https://github.com/pandas-dev/pandas/issues/45660#issuecomment-1077355514 + warnings.filterwarnings( + action, + category=UserWarning, + module=".*pandas.*", + lineno=761, + append=False, + ) + + # run_leq_node: 38%|███▊ | 3/8 [00:05<00:09, 1.98s/it]/app/amp/helpers/hdbg.py:309: PerformanceWarning: indexing past lexsort depth may impact performance. + # cond = value in valid_values + warnings.filterwarnings( + action, + category=pd.errors.PerformanceWarning, + module=".*hdbg.py.*", + lineno=309, + append=False, + ) + + # run_leq_node: 0%| | 0/8 [00:00 str: + """ + Get the shared configs S3 bucket. + + :param environment: environment to get the shared configs for + :return: shared configs S3 bucket + """ + hdbg.dassert_in(environment, ["prod", "preprod", "test"]) + bucket_name = hrecouti.get_repo_config().get_shared_configs_bucket_name( + environment + ) + hdbg.dassert_is_not( + bucket_name, + None, + f"Shared configs bucket is not defined in `repo_config.yaml` for environment: {environment}", + ) + return bucket_name + + +def _get_ecs_task_definition_template(environment: str) -> Dict[str, Any]: + """ + Get the ECS task definition template. + + :return: ECS task definition template + """ + s3_bucket = _get_shared_configs_s3_bucket(environment) + s3_path = f"{s3_bucket}/{environment}/templates/ecs/ecs_task_definition_template.json" + hs3.dassert_is_s3_path(s3_path) + task_definition_config = hs3.from_file( + s3_path, aws_profile=haws.AWS_PROFILE[environment] + ) + task_definition_config = json.loads(task_definition_config) + return task_definition_config + + +def _get_efs_mount_config_template(environment: str) -> Dict[str, Any]: + """ + Get the EFS mount config template. + + :return: EFS mount config template + """ + s3_bucket = _get_shared_configs_s3_bucket(environment) + s3_path = ( + f"{s3_bucket}/{environment}/templates/efs/efs_mount_config_template.json" + ) + hs3.dassert_is_s3_path(s3_path) + efs_config = hs3.from_file( + s3_path, aws_profile=haws.AWS_PROFILE[environment] + ) + efs_config = json.loads(efs_config) + return efs_config + + +def _set_task_definition_config( + task_definition_config: Dict, + task_definition_name: str, + region: str, + environment: str, +) -> Dict[str, Any]: + """ + Update template of ECS task definition with concrete values. + + :param task_definition_config: task definition config template + :param task_definition_name: name of the task definition + :param region: region to create the task definition in + :return: full formed task definition config dictionary + """ + # Replace placeholder values inside container definition + # from the template with concrete values. + # We use single container inside our task definition and + # the convention is to set the same name as the task + # definition itself. + task_definition_config["containerDefinitions"][0]["name"] = ( + task_definition_name + ) + # Set placeholder image URL. + # Get the base registry URL in the base region. + base_registry_url = hrecouti.get_repo_config().get_container_registry_url() + # Build the region-specific ECR registry URL for the target region. + # ECR registry URL format: `{account_id}.dkr.ecr.{region}.amazonaws.com`. + account_id = base_registry_url.split(".")[0] + registry_url = f"{account_id}.dkr.ecr.{region}.amazonaws.com" + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + # Make sure that the ECR replication is configured for the target region, + # so images are available in any new regions. + task_definition_config["containerDefinitions"][0]["image"] = ( + _IMAGE_URL_TEMPLATE.format(registry_url, image_name) + ) + # Set log configuration options. + log_config_opts = copy.deepcopy(_TASK_DEFINITION_LOG_OPTIONS_TEMPLATE) + log_config_opts["awslogs-group"] = log_config_opts["awslogs-group"].format( + task_definition_name + ) + log_config_opts["awslogs-region"] = region + task_definition_config["containerDefinitions"][0]["logConfiguration"][ + "options" + ] = log_config_opts + # Index is based on the order of the environment variables in the template. + # Set environment variable `CSFY_ECR_BASE_PATH`. + task_definition_config["containerDefinitions"][0]["environment"][0][ + "value" + ] = registry_url + # Set environment variable `CSFY_AWS_DEFAULT_REGION`. + task_definition_config["containerDefinitions"][0]["environment"][1][ + "value" + ] = region + # Configure access to EFS. + efs_config = _get_efs_mount_config_template(environment) + task_definition_config["volumes"] = efs_config[region]["volumes"] + task_definition_config["containerDefinitions"][0]["mountPoints"] = ( + efs_config[region]["mountPoints"] + ) + return task_definition_config + + +def _register_task_definition( + task_definition_name: str, region: str, environment: str +) -> None: + """ + Register a new ECS task definition. + + :param task_definition_name: name of the new task definition. + :param config_file: path to the JSON file containing the task + definition configuration. + :param region: region to create the task definition in + :param environment: environment to create the task definition in + """ + task_definition_config = _get_ecs_task_definition_template(environment) + client = haws.get_ecs_client(haws.AWS_PROFILE[environment], region=region) + # Prevent overwriting existing task definition if it exists. + if haws.is_task_definition_exists(task_definition_name, region=region): + _LOG.info( + "Task definition %s already exists in region %s", + task_definition_name, + region, + ) + return + # + task_definition_config = _set_task_definition_config( + task_definition_config, task_definition_name, region, environment + ) + client.register_task_definition( + family=task_definition_name, + taskRoleArn=task_definition_config.get("taskRoleArn", ""), + executionRoleArn=task_definition_config["executionRoleArn"], + networkMode=task_definition_config["networkMode"], + containerDefinitions=task_definition_config["containerDefinitions"], + volumes=task_definition_config.get("volumes", []), + placementConstraints=task_definition_config.get( + "placementConstraints", [] + ), + requiresCompatibilities=task_definition_config[ + "requiresCompatibilities" + ], + cpu=task_definition_config["cpu"], + memory=task_definition_config["memory"], + ) + _LOG.info( + "Registered new task definition: %s in region %s", + task_definition_name, + region, + ) + + +def aws_update_ecs_task_definition( + *, + task_definition: str, + image_tag: str, + region: str, + environment: str, +) -> None: + """ + Update an existing ECS task definition. + + :param task_definition: the name of the ECS task definition for + which an update to container image URL is made, e.g. cmamp-test + :param image_tag: the hash of the new candidate image, e.g. + 13538588e + :param region: region to update the task definition in + """ + hdbg.dassert_in(region, hs3.AWS_REGIONS) + old_image_url = haws.get_task_definition_image_url( + task_definition, environment=environment, region=region + ) + # Edit container version, e.g. cmamp:prod-12a45 - > cmamp:prod-12b46`. + new_image_url = re.sub("prod-(.+)$", f"prod-{image_tag}", old_image_url) + haws.update_task_definition( + task_definition, new_image_url, region=region, environment=environment + ) + + +@task +def aws_create_test_task_definition( + ctx, + issue_id: Optional[int] = None, + region: str = hs3.AWS_EUROPE_REGION_1, +) -> None: + """ + Create a new ECS task definition. + + :param issue_id: issue ID to create the task definition for + :param region: region to create the task definition in + """ + _ = ctx + hlitauti.report_task() + # Check if the `issue_id` provided is valid. + hdbg.dassert_is_not(issue_id, None, "issue_id is required") + is_valid_issue_id = str(issue_id).isdigit() + hdbg.dassert(is_valid_issue_id, f"issue_id '{issue_id}' must be an integer") + # Check if the `region` provided is valid. + hdbg.dassert_in(region, hs3.AWS_REGIONS) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-test-{issue_id}" + # Register task definition. + _register_task_definition( + task_definition_name, region=region, environment="test" + ) + + +@task +def aws_create_preprod_task_definition( + ctx, + region: str = hs3.AWS_EUROPE_REGION_1, +) -> None: + """ + Create a new ECS task definition for preprod environment. + + :param region: region to create the task definition in + """ + _ = ctx + hlitauti.report_task() + hdbg.dassert_in(region, hs3.AWS_REGIONS) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-preprod" + # Register task definition. + _register_task_definition( + task_definition_name, region=region, environment="preprod" + ) + + +@task +def aws_create_prod_task_definition( + ctx, + region: str = hs3.AWS_US_REGION_1, +) -> None: + """ + Create a new ECS task definition. + + :param region: region to create the task definition in + """ + _ = ctx + hlitauti.report_task() + hdbg.dassert_in(region, hs3.AWS_REGIONS) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-prod" + # Register task definition. + _register_task_definition( + task_definition_name, region=region, environment="prod" + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py new file mode 100644 index 000000000..111fa2815 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py @@ -0,0 +1,104 @@ +""" +Import as: + +import helpers.lib_tasks_bash as hlitabas +""" + +import logging +import os + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hfile_tree as hfiltree +import helpers.hsystem as hsystem +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): GFI: Unit test. +@task +def bash_print_path(ctx): # type: ignore + """ + Print the bash path. + """ + _ = ctx + cmd = r"echo $PATH | sed 's/:/\n/g'" + _, ret = hsystem.system_to_string(cmd) + paths = ret.split("\n") + paths.sort() + # + all_paths = [] + # Remove empty lines. + for path in paths: + if path.strip() == "": + _LOG.error("Empty path: '%s'", path) + continue + if not os.path.exists(path): + _LOG.error("Dir doesn't exist: '%s'", path) + continue + if not os.path.isdir(path): + _LOG.error("Not a dir: '%s'", path) + continue + # TODO(gp): Make it efficient. + if paths.count(path) > 1: + _LOG.error("Duplicate path: '%s'", path) + continue + all_paths.append(path) + # Print the paths. + _LOG.info("Valid paths:") + for path in all_paths: + print(path) + + +@task +def bash_print_tree( # type: ignore + ctx, + path=".", + depth=0, + clean=False, + include_tests=False, + include_python=False, + only_dirs=False, + output="", +): + """ + Print a directory tree, and optionally update or create a markdown file. + + ``` + # To print tree for current directory: + > i bash_print_tree + + # Limit depth to 2 and include test files: + > i bash_print_tree --path="devops" --depth=2 --include-tests + + # Include python files: + > i bash_print_tree --path="devops" --include-python + + # Only show directories: + > i bash_print_tree --path="devops" --only-dirs + + # Write the tree to file, preserving comments: + > i bash_print_tree --path="devops" --output="README.md" + ``` + + :param path: directory path to traverse + :param depth: maximum depth to traverse + :param clean: clean untracked files in directory + :param include_tests: include test files or directories + :param include_python: include python files + :param only_dirs: only show directories + :param output: path of the markdown file to create or update + """ + _ = ctx + hdbg.dassert_lte(0, depth, "Depth must be non-negative: %s", depth) + if clean: + cmd = "git clean -fd" + hlitauti.run(ctx, cmd) + tree = hfiltree.generate_tree( + path, depth, include_tests, include_python, only_dirs, output + ) + print(tree) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py new file mode 100644 index 000000000..f7dcadc54 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py @@ -0,0 +1,1590 @@ +""" +Import as: + +import helpers.lib_tasks_docker as hlitadoc +""" + +import functools +import getpass +import logging +import os +import re +from typing import Any, Dict, List, Optional, Union, cast + +# TODO(gp): We should use `pip install types-PyYAML` to get the mypy stubs. +import yaml +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hdict as hdict +import helpers.hdocker as hdocker +import helpers.henv as henv +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hsecrets as hsecret +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hversion as hversio +import helpers.lib_tasks_utils as hlitauti +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +# ############################################################################# +# Basic Docker commands. +# ############################################################################# + + +def _get_docker_exec(sudo: bool) -> str: + docker_exec = "docker" + if sudo: + docker_exec = "sudo " + docker_exec + return docker_exec + + +# //////////////////////////////////////////////////////////////////////////// +# Docker login +# //////////////////////////////////////////////////////////////////////////// + + +@functools.lru_cache() +def _get_aws_cli_version() -> int: + # > aws --version + # aws-cli/1.19.49 Python/3.7.6 Darwin/19.6.0 botocore/1.20.49 + # aws-cli/1.20.1 Python/3.9.5 Darwin/19.6.0 botocore/1.20.106 + cmd = "aws --version" + res = hsystem.system_to_one_line(cmd)[1] + # Parse the output. + m = re.match(r"aws-cli/((\d+)\.\d+\.\d+)\s", res) + hdbg.dassert_is_not(m, None, "Can't parse '%s'", res) + assert m is not None + version = m.group(1) + _LOG.debug("version=%s", version) + major_version = int(m.group(2)) + _LOG.debug("major_version=%s", major_version) + return major_version + + +def _check_docker_login(repo_name: str) -> bool: + """ + Check if we are already logged in to the Docker registry `repo_name`. + """ + file_name = os.path.join(os.environ["HOME"], ".docker/config.json") + json_data = hio.from_json(file_name) + # > more ~/.docker/config.json + # ``` + # { + # "auths": { + # "623860924167.dkr.ecr.eu-north-1.amazonaws.com": {}, + # "665840871993.dkr.ecr.us-east-1.amazonaws.com": {}, + # "https://index.docker.io/v1/": {} + # }, + # ``` + _LOG.debug("json_data=%s", json_data) + is_logged = any(repo_name in val for val in json_data["auths"].keys()) + return is_logged + + +def _docker_login_dockerhub() -> None: + """ + Log into the Docker Hub which is a public Docker image registry. + """ + # Check if we are already logged in to the target registry. + # TODO(gp): Enable caching https://github.com/causify-ai/helpers/issues/20 + use_cache = False + if use_cache: + is_logged = _check_docker_login("623860924167.dkr.ecr") + if is_logged: + _LOG.warning("Already logged in to the target registry: skipping") + return + _LOG.info("Logging in to the target registry") + secret_id = "causify_dockerhub" + secret = hsecret.get_secret(secret_id) + username = hdict.typed_get(secret, "username", expected_type=str) + password = hdict.typed_get(secret, "password", expected_type=str) + cmd = f"docker login -u {username} -p {password}" + hsystem.system(cmd, suppress_output=False) + + +def _docker_login_ecr() -> None: + """ + Log in the AM Docker repo_short_name on AWS. + """ + hlitauti.report_task() + if hserver.is_inside_ci(): + _LOG.warning("Running inside GitHub Action: skipping `docker_login`") + return + # TODO(gp): Enable caching https://github.com/causify-ai/helpers/issues/20 + use_cache = False + if use_cache: + # Check if we are already logged in to the target registry. + is_logged = _check_docker_login("623860924167.dkr.ecr") + if is_logged: + _LOG.warning("Already logged in to the target registry: skipping") + return + _LOG.info("Logging in to the target registry") + # Log in the target registry. + major_version = _get_aws_cli_version() + # docker login \ + # -u AWS \ + # -p eyJ... \ + # -e none \ + # https://*****.dkr.ecr.us-east-1.amazonaws.com + # TODO(gp): Move this to var in repo_config.py. + # TODO(gp): Hack + profile = "ck" + region = hs3.AWS_EUROPE_REGION_1 + cmd = "" + if major_version == 1: + cmd = f"eval $(aws ecr get-login --profile {profile} --no-include-email --region {region})" + elif major_version == 2: + if profile == "ck": + env_var = "CSFY_ECR_BASE_PATH" + else: + env_var = f"{profile.upper()}_ECR_BASE_PATH" + ecr_base_path = hlitauti.get_default_param(env_var) + # TODO(Nikola): Remove `_get_aws_cli_version()` and use only `aws ecr get-login-password` + # as it is present in both versions of `awscli`. + cmd = ( + "docker login -u AWS -p " + f"$(aws ecr get-login-password --profile {profile}) " + f"https://{ecr_base_path}" + ) + else: + NotImplementedError( + f"Docker login for awscli v{major_version} is not implemented!" + ) + # TODO(Grisha): fix properly. We pass `ctx` despite the fact that we do not + # need it with `use_system=True`, but w/o `ctx` invoke tasks (i.e. ones + # with `@task` decorator) do not work. + hsystem.system(cmd, suppress_output=False) + + +@task +def docker_login(ctx, target_registry="aws_ecr.ck"): # type: ignore + """ + Log in the target registry and skip if we are in kaizenflow. + + :param ctx: invoke context + :param target_registry: target Docker image registry to log in to + - "dockerhub.causify": public Causify Docker image registry + - "aws_ecr.ck": private AWS CK ECR + """ + _ = ctx + hlitauti.report_task() + # No login required as the `helpers` and `tutorials` images are accessible + # on the public DockerHub registry. + if not hserver.is_dev_csfy() and hrecouti.get_repo_config().get_name() in [ + "//helpers", + "//tutorials", + ]: + _LOG.warning("Skipping Docker login process for Helpers or Tutorials") + return + # We run everything using `hsystem.system(...)` but `ctx` is needed + # to make the function work as an invoke target. + if target_registry == "aws_ecr.ck": + _docker_login_ecr() + elif target_registry == "dockerhub.causify": + _docker_login_dockerhub() + else: + raise ValueError(f"Invalid Docker image registry='{target_registry}'") + + +@task +def docker_images_ls_repo(ctx, sudo=False): # type: ignore + """ + List images in the logged in repo_short_name. + """ + hlitauti.report_task() + docker_login(ctx) + # TODO(gp): Move this to a var ECR_BASE_PATH="CSFY_ECR_BASE_PATH" in repo_config.py. + ecr_base_path = hlitauti.get_default_param("CSFY_ECR_BASE_PATH") + docker_exec = _get_docker_exec(sudo) + hlitauti.run(ctx, f"{docker_exec} image ls {ecr_base_path}") + + +# //////////////////////////////////////////////////////////////////////////////// +# Version. +# //////////////////////////////////////////////////////////////////////////////// + + +_IMAGE_VERSION_RE = r"\d+\.\d+\.\d+" + + +def _dassert_is_version_valid(version: str) -> None: + """ + Check that the version is valid, i.e. looks like `1.0.0`. + """ + hdbg.dassert_isinstance(version, str) + hdbg.dassert_ne(version, "") + regex = rf"^({_IMAGE_VERSION_RE})$" + _LOG.debug("Testing with regex='%s'", regex) + m = re.match(regex, version) + hdbg.dassert(m, "Invalid version: '%s'", version) + + +# //////////////////////////////////////////////////////////////////////////////// +# Image. +# //////////////////////////////////////////////////////////////////////////////// + + +# This pattern aims to match the full image name including +# both registry and image path. +# Examples of valid matches include: +# - '623860924167.dkr.ecr.eu-north-1.amazonaws.com/cmamp' +# - 'ghcr.io/cryptokaizen/cmamp' +# This change is introduced to match the GHCR registry path, +# since it already includes `/` in the registry name itself. +_FULL_IMAGE_NAME_RE = r"([a-z0-9]+(-[a-z0-9]+)*\.)*[a-z]{2,}(\/[a-z0-9_-]+){1,2}" +_IMAGE_USER_RE = r"[a-z0-9_-]+" +# For candidate prod images which have added hash for easy identification. +_IMAGE_HASH_RE = r"[a-z0-9]{9}" +_IMAGE_STAGE_RE = rf"(local(?:-{_IMAGE_USER_RE})?|dev|prod|prod(?:-{_IMAGE_USER_RE})(?:-{_IMAGE_HASH_RE})?|prod(?:-{_IMAGE_HASH_RE})?)" + + +# TODO(Grisha): call `_dassert_is_base_image_name_valid()` and a separate +# function that validates an image tag. +def dassert_is_image_name_valid(image: str) -> None: + """ + Check whether an image name is valid. + + Invariants: + - Local images contain a username and a version + - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0` + - `dev` and `prod` images have an instance with a version and one without + to indicate the latest + - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0` + and `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev` + - `prod` candidate image has an optional tag (e.g., a username) and + a 9 character hash identifier corresponding Git commit + - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-4rf74b83a` + - and `*****.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-saggese-4rf74b83a` + + An image should look like: + + *****.dkr.ecr.us-east-1.amazonaws.com/amp:dev + *****.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0 + *****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0 + ghcr.io/cryptokaizen/cmamp:dev + """ + regex = "".join( + [ + # E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/cmamp` + # or `sorrentum/cmamp` or ghcr.io/cryptokaizen/cmamp. + rf"^{_FULL_IMAGE_NAME_RE}", + # E.g., `:local-saggese`. + rf"(:{_IMAGE_STAGE_RE})?", + # E.g., `-1.0.0`. + rf"(-{_IMAGE_VERSION_RE})?$", + ] + ) + _LOG.debug("Testing with regex='%s'", regex) + m = re.match(regex, image) + hdbg.dassert(m, "Invalid image: '%s'", image) + + +def _dassert_is_base_image_name_valid(base_image: str) -> None: + """ + Check that the base image is valid, i.e. looks like below. + + *****.dkr.ecr.us-east-1.amazonaws.com/amp ghcr.io/cryptokaizen/cmamp + """ + regex = rf"^{_FULL_IMAGE_NAME_RE}$" + _LOG.debug("regex=%s", regex) + m = re.match(regex, base_image) + hdbg.dassert(m, "Invalid base_image: '%s'", base_image) + + +# TODO(Grisha): instead of using `base_image` which is Docker registry address +# + image name, use those as separate parameters. See CmTask5074. +def _get_base_image(base_image: str) -> str: + """ + :return: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + """ + if base_image == "": + # TODO(gp): Use os.path.join. + base_image = ( + hlitauti.get_default_param("CSFY_ECR_BASE_PATH") + + "/" + + hlitauti.get_default_param("BASE_IMAGE") + ) + _dassert_is_base_image_name_valid(base_image) + return base_image + + +# This code path through Git tag was discontinued with CmTask746. +# def get_git_tag( +# version: str, +# ) -> str: +# """ +# Return the tag to be used in Git that consists of an image name and +# version. +# :param version: e.g., `1.0.0`. If None, the latest version is used +# :return: e.g., `amp-1.0.0` +# """ +# hdbg.dassert_is_not(version, None) +# _dassert_is_version_valid(version) +# base_image = hlibtaskut.get_default_param("BASE_IMAGE") +# tag_name = f"{base_image}-{version}" +# return tag_name + + +# TODO(gp): Consider using a token "latest" in version, so that it's always a +# string and we avoid a special behavior encoded in None. +def get_image( + base_image: str, + stage: str, + version: Optional[str], +) -> str: + """ + Return the fully qualified image name. + + For local stage, it also appends the username to the image name. + + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param stage: e.g., `local`, `dev`, `prod` + :param version: e.g., `1.0.0`, if None empty, the latest version is used + :return: e.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local` or + `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local-1.0.0` + """ + # Docker refers the default image as "latest", although in our stage + # nomenclature we call it "dev". + hdbg.dassert_in(stage, "local dev prod".split()) + # Get the base image. + base_image = _get_base_image(base_image) + _dassert_is_base_image_name_valid(base_image) + # Get the full image name. + image = [base_image] + # Handle the stage. + image.append(f":{stage}") + if stage == "local": + user = hsystem.get_user_name() + image.append(f"-{user}") + # Handle the version. + if version is not None and version != "": + _dassert_is_version_valid(version) + image.append(f"-{version}") + # + image = "".join(image) + dassert_is_image_name_valid(image) + return image + + +@task +def docker_remove_image(ctx, base_image="") -> None: # type: ignore + """ + Delete the current dev image to free up disk space. + + :param base_image: base name of the image (e.g., `*****.dkr.ecr.us- + east-1.amazonaws.com/amp`) + """ + # Display disk space before cleanup. + _LOG.info("Disk space before cleanup:") + hsystem.system("df -h", suppress_output=False) + # Handle the image. + stage = "dev" + version = "" + image = get_image(base_image, stage, version) + _LOG.info("Deleting Docker image: %s", image) + # Get Docker executable configuration. + use_sudo = hdocker.get_use_sudo() + docker_exec = hdocker.get_docker_executable(use_sudo) + # Delete the specific image. + cmd = f"{docker_exec} rmi -f {image}" + _LOG.info("Running: %s", cmd) + try: + result = hsystem.system(cmd, abort_on_error=False, suppress_output=False) + if result != 0: + _LOG.warning( + "Docker image deletion failed with exit code %s for image: %s", + result, + image, + ) + else: + _LOG.info("Successfully deleted Docker image: %s", image) + except Exception as e: + _LOG.error("Error during Docker image deletion: %s", e) + # Display disk space after cleanup. + _LOG.info("Disk space after cleanup:") + hsystem.system("df -h", suppress_output=False) + + +@task +def docker_ps(ctx, sudo=False): # type: ignore + # pylint: disable=line-too-long + """ + List all the running containers. + + ``` + > docker_ps + CONTAINER ID user IMAGE COMMAND CREATED STATUS PORTS service + 2ece37303ec9 gp *****....:latest "./docker_build/entry.sh" 5 seconds ago Up 4 seconds user_space + ``` + """ + hlitauti.report_task() + # pylint: enable=line-too-long + fmt = ( + r"""table {{.ID}}\t{{.Label "user"}}\t{{.Image}}\t{{.Command}}""" + + r"\t{{.RunningFor}}\t{{.Status}}\t{{.Ports}}" + + r'\t{{.Label "com.docker.compose.service"}}' + ) + docker_exec = _get_docker_exec(sudo) + cmd = f"{docker_exec} ps --format='{fmt}'" + cmd = hlitauti._to_single_line_cmd(cmd) + hlitauti.run(ctx, cmd) + + +def _get_last_container_id(sudo: bool) -> str: + docker_exec = _get_docker_exec(sudo) + # Get the last started container. + cmd = f"{docker_exec} ps -l | grep -v 'CONTAINER ID'" + # CONTAINER ID IMAGE COMMAND CREATED + # 90897241b31a eeb33fe1880a "/bin/sh -c '/bin/bash ... + _, txt = hsystem.system_to_one_line(cmd) + # Parse the output: there should be at least one line. + hdbg.dassert_lte(1, len(txt.split(" ")), "Invalid output='%s'", txt) + container_id: str = txt.split(" ")[0] + return container_id + + +@task +def docker_stats( # type: ignore + ctx, + all=False, # pylint: disable=redefined-builtin + sudo=False, +): + # pylint: disable=line-too-long + """ + Report last started Docker container stats, e.g., CPU, RAM. + + ``` + > docker_stats + CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS + 2ece37303ec9 ..._user_space_run_30 0.00% 15.74MiB / 31.07GiB 0.05% 351kB / 6.27kB 34.2MB / 12.3kB 4 + ``` + + :param all: report stats for all the containers + """ + # pylint: enable=line-too-long + hlitauti.report_task(txt=hprint.to_str("all")) + _ = ctx + fmt = ( + r"table {{.ID}}\t{{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" + + r"\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}\t{{.PIDs}}" + ) + docker_exec = _get_docker_exec(sudo) + cmd = f"{docker_exec} stats --no-stream --format='{fmt}'" + _, txt = hsystem.system_to_string(cmd) + if all: + output = txt + else: + # Get the id of the last started container. + container_id = _get_last_container_id(sudo) + print(f"Last container id={container_id}") + # Parse the output looking for the given container. + txt = txt.split("\n") + output = [] + # Save the header. + output.append(txt[0]) + for line in txt[1:]: + if line.startswith(container_id): + output.append(line) + # There should be at most two rows: the header and the one corresponding to + # the container. + hdbg.dassert_lte( + len(output), 2, "Invalid output='%s' for '%s'", output, txt + ) + output = "\n".join(output) + print(output) + + +@task +def docker_kill( # type: ignore + ctx, + all=False, # pylint: disable=redefined-builtin + sudo=False, +): + """ + Kill the last Docker container started. + + :param all: kill all the containers (be careful!) + :param sudo: use sudo for the Docker commands + """ + hlitauti.report_task(txt=hprint.to_str("all")) + docker_exec = _get_docker_exec(sudo) + # Last container. + opts = "-l" + if all: + _LOG.warning("Killing all the containers") + # TODO(gp): Ask if we are sure and add a --just-do-it option. + opts = "-a" + # Print the containers that will be terminated. + cmd = f"{docker_exec} ps {opts}" + hlitauti.run(ctx, cmd) + # Kill. + cmd = f"{docker_exec} rm -f $({docker_exec} ps {opts} -q)" + hlitauti.run(ctx, cmd) + + +# docker system prune +# docker container ps -f "status=exited" +# docker container rm $(docker container ps -f "status=exited" -q) +# docker rmi $(docker images --filter="dangling=true" -q) + +# pylint: disable=line-too-long +# Remove the images with hash +# > docker image ls +# REPOSITORY TAG IMAGE ID CREATED SIZE +# *****.dkr.ecr.us-east-2.amazonaws.com/im 07aea615a2aa9290f7362e99e1cc908876700821 d0889bf972bf 6 minutes ago 684MB +# *****.dkr.ecr.us-east-2.amazonaws.com/im rc d0889bf972bf 6 minutes ago 684MB +# python 3.7-slim-buster e7d86653f62f 14 hours ago 113MB +# *****.dkr.ecr.us-east-1.amazonaws.com/amp 415376d58001e804e840bf3907293736ad62b232 e6ea837ab97f 18 hours ago 1.65GB +# *****.dkr.ecr.us-east-1.amazonaws.com/amp dev e6ea837ab97f 18 hours ago 1.65GB +# *****.dkr.ecr.us-east-1.amazonaws.com/amp local e6ea837ab97f 18 hours ago 1.65GB +# *****.dkr.ecr.us-east-1.amazonaws.com/amp 9586cc2de70a4075b9fdcdb900476f8a0f324e3e c75d2447da79 18 hours ago 1.65GB +# pylint: enable=line-too-long + + +# ############################################################################# +# Docker development. +# ############################################################################# + +# TODO(gp): We might want to organize the code in a base class using a Command +# pattern, so that it's easier to generalize the code for multiple repos. +# +# class DockerCommand: +# def pull(): +# ... +# def cmd(): +# ... +# +# For now we pass the customizable part through the default params. + + +# //////////////////////////////////////////////////////////////////////////// +# Docker pull. +# //////////////////////////////////////////////////////////////////////////// + + +def _docker_pull( + ctx: Any, base_image: str, stage: str, version: Optional[str] +) -> None: + """ + Pull images from the registry. + """ + docker_login(ctx) + # + image = get_image(base_image, stage, version) + _LOG.info("image='%s'", image) + dassert_is_image_name_valid(image) + cmd = f"docker pull {image}" + hlitauti.run(ctx, cmd, pty=True) + + +@task +def docker_pull(ctx, stage="dev", version=None, skip_pull=False): # type: ignore + """ + Pull latest dev image corresponding to the current repo from the registry. + + :param skip_pull: if True skip pulling the docker image + """ + hlitauti.report_task() + if stage == "local": + _LOG.warning("Setting skip_pull to True for local stage") + skip_pull = True + if skip_pull: + _LOG.warning("Skipping pulling docker image as per user request") + return + # + base_image = "" + _docker_pull(ctx, base_image, stage, version) + + +@task +def docker_pull_helpers(ctx, stage="prod", version=None): # type: ignore + """ + Pull latest prod image of `helpers` from the registry. + + :param ctx: invoke context + :param stage: stage of the Docker image + :param version: version of the Docker image + """ + base_image = hlitauti.get_default_param("CSFY_ECR_BASE_PATH") + "/helpers" + _LOG.debug("base_image=%s", base_image) + _docker_pull(ctx, base_image, stage, version) + + +# //////////////////////////////////////////////////////////////////////////////// +# Compose files. +# //////////////////////////////////////////////////////////////////////////////// + +# TODO(gp): All this code can become `DockerComposeFileGenerator`. + +# There are several combinations to consider: +# - whether the Docker host can run with / without privileged mode +# - amp as submodule / as supermodule +# - different supermodules for amp + +# TODO(gp): use_privileged_mode -> use_docker_privileged_mode +# use_sibling_container -> use_docker_containers_containers + +DockerComposeServiceSpec = Dict[str, Union[str, List[str]]] + + +def _get_linter_service(stage: str) -> DockerComposeServiceSpec: + """ + Get the linter service specification for the `tmp.docker-compose.yml` file. + + :return: linter service specification + """ + superproject_path, submodule_path = hgit.get_path_from_supermodule() + if superproject_path: + # We are running in a Git submodule. + work_dir = f"/src/{submodule_path}" + repo_root = superproject_path + else: + work_dir = "/src" + repo_root = os.getcwd() + # TODO(gp): To avoid linter getting confused between `Sequence[str]` and + # `List[str]`, we should assign one element at the time. + linter_service_spec = { + "extends": "base_app", + "volumes": [ + f"{repo_root}:/src", + ], + "working_dir": work_dir, + "environment": [ + "MYPYPATH", + ], + } + if stage != "prod": + # When we run a development Linter container, we need to mount the + # Linter repo under `/app`. For prod container instead we copy / freeze + # the repo code in `/app`, so we should not mount it. + volumes = cast(List[str], linter_service_spec["volumes"]) + if superproject_path: + # When running in a Git submodule we need to go one extra level up. + # TODO(*): Clean up the indentation, #2242 (also below). + volumes.append("../../../:/app") + else: + volumes.append("../../:/app") + if stage == "prod": + # Use the `repo_config.py` inside the helpers container instead of + # the one in the calling repo. + environment = cast(List[str], linter_service_spec["environment"]) + environment.append("CSFY_REPO_CONFIG_PATH=/app/repo_config.py") + return linter_service_spec + + +# TODO(gp): Remove mount_as_submodule +def _generate_docker_compose_file( + stage: str, + use_privileged_mode: bool, + use_sibling_container: bool, + shared_data_dirs: Optional[Dict[str, str]], + mount_as_submodule: bool, + use_network_mode_host: bool, + use_main_network: bool, + file_name: Optional[str], +) -> str: + """ + Generate `tmp.docker-compose.yml` file and save it. + + :param shared_data_dirs: data directory in the host filesystem to mount + inside the container. `None` means no dir sharing + :param use_main_network: use `main_network` as default network + """ + _LOG.debug( + hprint.to_str( + "use_privileged_mode " + "use_sibling_container " + "shared_data_dirs " + "mount_as_submodule " + "use_network_mode_host " + "use_main_network " + "file_name " + ) + ) + # We could pass the env var directly, like: + # ``` + # - CSFY_ENABLE_DIND=$CSFY_ENABLE_DIND + # ``` + # but we prefer to inline it. + if use_privileged_mode: + CSFY_ENABLE_DIND = 1 + else: + CSFY_ENABLE_DIND = 0 + # ``` + # sysname='Linux' + # nodename='cf-spm-dev4' + # release='3.10.0-1160.53.1.el7.x86_64' + # version='#1 SMP Fri Jan 14 13:59:45 UTC 2022' + # machine='x86_64' + # ``` + csfy_host_os_name = os.uname()[0] + csfy_host_name = os.uname()[1] + csfy_host_os_version = os.uname()[2] + csfy_host_user_name = getpass.getuser() + # We assume that we don't use this code inside a container, since otherwise + # we would need to distinguish the container style (see + # docs/work_tools/docker/all.dockerized_flow.explanation.md) to find the + # outermost Git root. + if not hserver.is_inside_unit_test(): + hdbg.dassert(not hserver.is_inside_docker()) + else: + # We call this function as part of the unit tests, which we run insider + # the container. + pass + git_host_root_path = hgit.find_git_root() + # Find git root path in the container. + # The Git root is always mounted in the container at `/app`. So we need to + # use that as starting point. + # E.g. For CSFY_GIT_ROOT_PATH, we need to use `/app`, rather than + # `/data/dummy/src/cmamp1`. + # E.g. For CSFY_HELPERS_ROOT_PATH, we need to use `/app/helpers_root`. + # rather than `/data/dummy/src/cmamp1/helpers_root`. + git_root_path = "/app" + # Find helpers root path in the container. + helper_dir = hgit.find_helpers_root() + helper_relative_path = os.path.relpath(helper_dir, git_host_root_path) + helper_root_path = os.path.normpath( + os.path.join(git_root_path, helper_relative_path) + ) + # A super repo is a repo that contains helpers as a submodule and + # is not a helper itself. + use_helpers_as_nested_module = ( + 0 if hgit.is_in_helpers_as_supermodule() else 1 + ) + # We could do the same also with IMAGE for symmetry. + # Keep the env vars in sync with what we print in `henv.get_env_vars()`. + # Configure `base_app` service. + # TODO(gp): Use henv.get_env_vars() to get the env vars. + environment = [ + f"CSFY_ENABLE_DIND={CSFY_ENABLE_DIND}", + "CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL", + f"CSFY_HOST_NAME={csfy_host_name}", + f"CSFY_HOST_OS_NAME={csfy_host_os_name}", + f"CSFY_HOST_OS_VERSION={csfy_host_os_version}", + f"CSFY_HOST_USER_NAME={csfy_host_user_name}", + "CSFY_REPO_CONFIG_CHECK=True", + # Use inferred path for `repo_config.py`. + "CSFY_REPO_CONFIG_PATH=", + "CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID", + "CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION", + "CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE", + "CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET", + "CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY", + "CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN", + "CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH", + # The path of the outermost Git root on the host. + f"CSFY_HOST_GIT_ROOT_PATH={git_host_root_path}", + # The path of the outermost Git root in the Docker container. + f"CSFY_GIT_ROOT_PATH={git_root_path}", + # The path of the helpers dir in the Docker container (e.g., + # `/app`, `/app/helpers_root`) + f"CSFY_HELPERS_ROOT_PATH={helper_root_path}", + f"CSFY_USE_HELPERS_AS_NESTED_MODULE={use_helpers_as_nested_module}", + "CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN", + # This env var is used by GH Action to signal that we are inside the + # CI. It's set up by default by the GH Action runner. See: + # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables + "CSFY_CI=$CSFY_CI", + # TODO(Vlad): consider removing, locally we use our personal tokens + # from files and inside GitHub actions we use the `GH_TOKEN` + # environment variable. + ] + environment.extend( + [ + "GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN", + # Inside GitHub Actions we use `GH_TOKEN` environment variable, + # see https://cli.github.com/manual/gh_auth_login. + "GH_TOKEN=$GH_ACTION_ACCESS_TOKEN", + ] + ) + api_key_env_vars = henv.get_api_key_env_vars() + environment.extend([f"{env_var}=${env_var}" for env_var in api_key_env_vars]) + # + base_app_spec = { + "cap_add": ["SYS_ADMIN"], + "environment": environment, + "image": "${IMAGE}", + "restart": "no", + "volumes": [ + # TODO(gp): We should pass the value of $HOME from dev.Dockerfile to here. + # E.g., we might define $HOME in the env file. + "~/.aws:/home/.aws", + "~/.config/gspread_pandas/:/home/.config/gspread_pandas/", + "~/.config/gh:/home/.config/gh", + "~/.ssh:/home/.ssh", + ], + } + if use_privileged_mode: + # This is needed: + # - for Docker-in-docker (dind) + # - to mount fstabs + base_app_spec["privileged"] = use_privileged_mode + if shared_data_dirs: + # Mount shared dirs. + shared_volumes = [ + f"{host}:{container}" for host, container in shared_data_dirs.items() + ] + # Mount all dirs that are specified. + base_app_spec["volumes"].extend(shared_volumes) + if False: + # No need to mount file systems. + base_app_spec["volumes"].append("../docker_build/fstab:/etc/fstab") + if use_sibling_container: + # Use sibling-container approach. + base_app_spec["volumes"].append( + "/var/run/docker.sock:/var/run/docker.sock" + ) + if False: + base_app_spec["deploy"] = { + "resources": { + "limits": { + # This should be passed from command line depending on how much + # memory is available. + "memory": "60G", + }, + }, + } + if use_network_mode_host: + # Default network mode set to host so we can reach e.g. + # a database container pointing to localhost:5432. + # In tests we use dind so we need set back to the default "bridge". + # See CmTask988 and https://stackoverflow.com/questions/24319662 + base_app_spec["network_mode"] = "${NETWORK_MODE:-host}" + # Configure `app` service. + # Mount `amp` when it is used as submodule. In this case we need to + # mount the super project in the container (to make git work with the + # supermodule) and then change dir to `amp`. + app_spec = { + "extends": "base_app", + } + # Use absolute path of the dir to mount the volume and set working dir. + # The `app_dir` dir points to the root of the repo. + # The `working_dir` points to the path of the runnable dir. + # - If the runnable dir is the root of the repo, then `working_dir` is `/app`. + # - If the runnable dir is a subdirectory of the repo, then `working_dir` is `/app/subdir`. + curr_dir = os.getcwd() + rel_dir1 = os.path.relpath(curr_dir, git_host_root_path) + rel_dir2 = os.path.relpath(git_host_root_path, curr_dir) + app_dir = os.path.abspath(os.path.join(curr_dir, rel_dir2)) + working_dir = os.path.normpath(os.path.join("/app", rel_dir1)) + app_spec["volumes"] = [f"{app_dir}:/app"] + app_spec["working_dir"] = working_dir + # Configure `linter` service. + linter_spec = _get_linter_service(stage) + # Configure `jupyter_server` service. + # For Jupyter server we cannot use "host" network_mode because + # it is incompatible with the port bindings. + jupyter_server = { + "command": "devops/docker_run/run_jupyter_server.sh", + "environment": [ + "PORT=${PORT}", + ], + "extends": "app", + "network_mode": "${NETWORK_MODE:-bridge}", + # TODO(gp): Rename `AM_PORT`. + "ports": [ + "${PORT}:${PORT}", + ], + } + # Configure `jupyter_server_test` service. + # TODO(gp): For some reason the following doesn't work. + # jupyter_server_test: + # command: jupyter notebook -h 2>&1 >/dev/null + # extends: + # jupyter_server + jupyter_server_test = { + "command": "jupyter notebook -h 2>&1 >/dev/null", + "environment": [ + "PORT=${PORT}", + ], + "extends": "app", + "network_mode": "${NETWORK_MODE:-bridge}", + "ports": [ + "${PORT}:${PORT}", + ], + } + # Specify structure of the docker-compose file. + docker_compose = { + "version": "3", + "services": { + "base_app": base_app_spec, + "app": app_spec, + "linter": linter_spec, + "jupyter_server": jupyter_server, + "jupyter_server_test": jupyter_server_test, + }, + } + # Configure networks. + if use_main_network: + docker_compose["networks"] = {"default": {"name": "main_network"}} + + class _Dumper(yaml.Dumper): + """ + A custom YAML Dumper class that adjusts indentation. + """ + + def increase_indent(self_: Any, flow=False, indentless=False) -> Any: + """ + Override the method to modify YAML indentation behavior. + """ + return super().increase_indent(flow=False, indentless=False) + + # Convert the dictionary to YAML format. + yaml_str = yaml.dump( + docker_compose, + Dumper=_Dumper, + default_flow_style=False, + indent=2, + sort_keys=False, + ) + yaml_str = cast(str, yaml_str) + # Save YAML to file if file_name is specified. + if file_name: + if os.path.exists(file_name) and hserver.is_inside_ci(): + # Permission error is raised if we try to overwrite existing file. + # See CmTask #2321 for detailed info. + compose_directory = os.path.dirname(file_name) + hsystem.system(f"sudo rm -rf {compose_directory}") + hio.to_file(file_name, yaml_str) + return yaml_str + + +def get_base_docker_compose_path() -> str: + """ + Return the absolute path to the Docker compose file. + + E.g., `devops/compose/tmp.docker-compose.yml`. + """ + # Add the default path. + dir_name = "devops/compose" + # TODO(gp): Factor out the piece below. + docker_compose_path = "tmp.docker-compose.yml" + docker_compose_path = os.path.join(dir_name, docker_compose_path) + docker_compose_path = os.path.abspath(docker_compose_path) + return docker_compose_path + + +def _get_docker_compose_files( + stage: str, + generate_docker_compose_file: bool, + service_name: str, + extra_docker_compose_files: Optional[List[str]], +) -> List[str]: + """ + Generate the Docker compose file and return the list of Docker compose + paths. + + :return: list of the Docker compose paths + """ + docker_compose_files = [] + # Get the repo short name (e.g., `amp`). + repo_short_name = hrecouti.get_repo_config().get_repo_short_name() + _LOG.debug("repo_short_name=%s", repo_short_name) + # Check submodule status, if needed. + mount_as_submodule = False + if repo_short_name in ("amp", "cmamp"): + # Check if `amp` is a submodule. + path, _ = hgit.get_path_from_supermodule() + if path != "": + _LOG.warning("amp is a submodule") + mount_as_submodule = True + # Write Docker compose file. + file_name = get_base_docker_compose_path() + if service_name == "linter": + # Since we are running the prod `helpers` container we need to use the + # settings from the `repo_config` from that container, and not the settings + # launch the container corresponding to this repo. + enable_privileged_mode = False + use_docker_sibling_containers = False + get_shared_data_dirs = None + use_docker_network_mode_host = False + use_main_network = False + else: + # Use the settings from the `repo_config` corresponding to this container. + enable_privileged_mode = hserver.enable_privileged_mode() + use_docker_sibling_containers = hserver.use_docker_sibling_containers() + get_shared_data_dirs = hserver.get_shared_data_dirs() + use_docker_network_mode_host = hserver.use_docker_network_mode_host() + use_main_network = hserver.use_main_network() + # + if generate_docker_compose_file: + _generate_docker_compose_file( + stage, + enable_privileged_mode, + use_docker_sibling_containers, + get_shared_data_dirs, + mount_as_submodule, + use_docker_network_mode_host, + use_main_network, + file_name, + ) + else: + _LOG.warning("Skipping generating Docker compose file '%s'", file_name) + docker_compose_files.append(file_name) + # Add the compose files from command line. + if extra_docker_compose_files: + hdbg.dassert_isinstance(extra_docker_compose_files, list) + docker_compose_files.extend(extra_docker_compose_files) + # Add the compose files from the global params. + key = "DOCKER_COMPOSE_FILES" + if hlitauti.has_default_param(key): + docker_compose_files.append(hlitauti.get_default_param(key)) + # + _LOG.debug(hprint.to_str("docker_compose_files")) + for docker_compose in docker_compose_files: + hdbg.dassert_path_exists(docker_compose) + return docker_compose_files + + +_IMAGE_VERSION_FROM_CHANGELOG = "FROM_CHANGELOG" + + +def resolve_version_value( + version: str, + *, + container_dir_name: str = ".", +) -> str: + """ + Pass a version (e.g., 1.0.0) or a symbolic value (e.g., FROM_CHANGELOG) and + return the resolved value of the version. + + :return: full version with patch for prod (e.g., 1.3.2) + """ + hdbg.dassert_isinstance(version, str) + if version == _IMAGE_VERSION_FROM_CHANGELOG: + version = hversio.get_changelog_version(container_dir_name) + _dassert_is_version_valid(version) + prod_version = version + return prod_version + + +def to_dev_version(prod_version: str) -> str: + """ + Pass a prod version (e.g., 1.1.1) and strip the patch value. + + :return: stripped version without patch for dev (e.g., 1.1.0) + """ + hdbg.dassert_isinstance(prod_version, str) + _dassert_is_version_valid(prod_version) + # Strip patch value from the version. + dev_version = prod_version.split(".")[:-1] + dev_version = ".".join(dev_version) + ".0" + return dev_version + + +def dassert_is_subsequent_version( + version: str, + *, + container_dir_name: str = ".", +) -> None: + """ + Check that `version` is bigger than the current one as specified in the + changelog. + """ + if version != _IMAGE_VERSION_FROM_CHANGELOG: + current_version = hversio.get_changelog_version(container_dir_name) + hdbg.dassert_lte(current_version, version) + + +# //////////////////////////////////////////////////////////////////////////////// +# Misc. +# //////////////////////////////////////////////////////////////////////////////// + + +def _run_docker_as_user(as_user_from_cmd_line: bool) -> bool: + as_root = hserver.run_docker_as_root() + as_user = as_user_from_cmd_line + if as_root: + as_user = False + _LOG.debug( + "as_user_from_cmd_line=%s as_root=%s -> as_user=%s", + as_user_from_cmd_line, + as_root, + as_user, + ) + return as_user + + +def _get_container_name(service_name: str) -> str: + """ + Create a container name based on various information. + + E.g., `grisha.cmamp.app.cmamp1.20220317_232120` + + The information used to build a container is: + - Linux username + - Base Docker image name + - Service name + - Project directory that was used to start a container + - Container start timestamp + + :param service_name: `docker-compose` service name, e.g., `app` + :return: container name + """ + hdbg.dassert_ne(service_name, "", "You need to specify a service name") + # Get linux username. + linux_user = hsystem.get_user_name() + # Get dir name. + project_dir = hgit.get_project_dirname() + # Get Docker image base name. + image_name = hlitauti.get_default_param("BASE_IMAGE") + # Get current timestamp. + current_timestamp = hlitauti.get_ET_timestamp() + # Build container name. + container_name = f"{linux_user}.{image_name}.{service_name}.{project_dir}.{current_timestamp}" + _LOG.debug( + "get_container_name: container_name=%s", + container_name, + ) + return container_name + + +def _get_docker_base_cmd( + base_image: str, + stage: str, + version: str, + service_name: str, + # Params from `_get_docker_compose_cmd()`. + generate_docker_compose_file: bool, + extra_env_vars: Optional[List[str]], + extra_docker_compose_files: Optional[List[str]], + skip_docker_image_compatibility_check: bool, +) -> List[str]: + r""" + Get base `docker-compose` command encoded as a list of strings. + + It can be used as a base to build more complex commands, e.g., `run`, `up`, + `down`. + + E.g., + ``` + ['IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev', + '\n docker-compose', + '\n --file amp/devops/compose/tmp.docker-compose.yml', + '\n --file amp/devops/compose/tmp.docker-compose_as_submodule.yml', + '\n --env-file devops/env/default.env'] + ``` + :param generate_docker_compose_file: whether to generate or reuse the existing + Docker compose file + :param extra_env_vars: represent vars to add, e.g., `["PORT=9999", "DRY_RUN=1"]` + :param extra_docker_compose_files: `docker-compose` override files + :param skip_docker_image_compatibility_check: if True, skip checking image + architecture compatibility + """ + _LOG.debug(hprint.func_signature_to_str()) + docker_cmd_: List[str] = [] + # - Handle the image. + image = get_image(base_image, stage, version) + _LOG.debug("base_image=%s stage=%s -> image=%s", base_image, stage, image) + dassert_is_image_name_valid(image) + # The check is mainly for developers to avoid using the wrong image (e.g., + # an x86 vs ARM architecture). + # We can skip the image compatibility check during the CI or when + # explicitly skipped. + if not (hserver.is_inside_ci() or skip_docker_image_compatibility_check): + hdocker.check_image_compatibility_with_current_arch(image) + else: + _LOG.warning("Skipping docker image compatibility check") + docker_cmd_.append(f"IMAGE={image}") + # - Handle extra env vars. + if extra_env_vars: + hdbg.dassert_isinstance(extra_env_vars, list) + for env_var in extra_env_vars: + docker_cmd_.append(f"{env_var}") + # + docker_cmd_.append(r""" + docker compose""") + docker_compose_files = _get_docker_compose_files( + stage, + generate_docker_compose_file, + service_name, + extra_docker_compose_files, + ) + file_opts = " ".join([f"--file {dcf}" for dcf in docker_compose_files]) + _LOG.debug(hprint.to_str("file_opts")) + # TODO(gp): Use something like `.append(rf"{space}{...}")` + docker_cmd_.append(rf""" + {file_opts}""") + # - Handle the env file. + env_file = "devops/env/default.env" + docker_cmd_.append(rf""" + --env-file {env_file}""") + return docker_cmd_ + + +def _get_docker_compose_cmd( + base_image: str, + stage: str, + version: str, + cmd: str, + *, + # TODO(gp): make these params mandatory. + extra_env_vars: Optional[List[str]] = None, + extra_docker_compose_files: Optional[List[str]] = None, + extra_docker_run_opts: Optional[List[str]] = None, + service_name: str = "app", + use_entrypoint: bool = True, + generate_docker_compose_file: bool = True, + as_user: bool = True, + print_docker_config: bool = False, + use_bash: bool = False, + skip_docker_image_compatibility_check: bool = False, +) -> str: + """ + Get `docker-compose` run command. + + E.g., + ``` + IMAGE=*****..dkr.ecr.us-east-1.amazonaws.com/amp:dev \ + docker-compose \ + --file /amp/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name grisha.cmamp.app.cmamp1.20220317_232120 \ + --user $(id -u):$(id -g) \ + app \ + bash + ``` + :param cmd: command to run inside Docker container + :param extra_docker_run_opts: additional `docker-compose` run options + :param service_name: service to use to run a command + :param use_entrypoint: whether to use the `entrypoint.sh` or not + :param generate_docker_compose_file: generate the Docker compose file or not + :param as_user: pass the user / group id or not + :param print_docker_config: print the docker config for debugging purposes + :param use_bash: run command through a shell + :param skip_docker_image_compatibility_check: if True, skip checking image architecture compatibility + """ + _LOG.debug(hprint.func_signature_to_str()) + # - Get the base Docker command. + docker_cmd_ = _get_docker_base_cmd( + base_image, + stage, + version, + service_name, + generate_docker_compose_file, + extra_env_vars, + extra_docker_compose_files, + skip_docker_image_compatibility_check, + ) + # - Add the `config` command for debugging purposes. + docker_config_cmd: List[str] = docker_cmd_[:] + # TODO(gp): Use yaml approach like done for other parts of the code. + docker_config_cmd.append(r""" + config""") + # - Add the `run` command. + docker_cmd_.append(r""" + run \ + --rm""") + # - Add a name to the container. + container_name = _get_container_name(service_name) + docker_cmd_.append(rf""" + --name {container_name}""") + # - Handle the user. + as_user = _run_docker_as_user(as_user) + if as_user: + docker_cmd_.append(r""" + --user $(id -u):$(id -g)""") + # - Handle the extra docker options. + if extra_docker_run_opts: + hdbg.dassert_isinstance(extra_docker_run_opts, list) + extra_opts = " ".join(extra_docker_run_opts) + docker_cmd_.append(rf""" + {extra_opts}""") + # - Handle entrypoint. + if use_entrypoint: + docker_cmd_.append(rf""" + {service_name}""") + if cmd: + if use_bash: + cmd = f"bash -c '{cmd}'" + docker_cmd_.append(rf""" + {cmd}""") + else: + # No entrypoint. + docker_cmd_.append(rf""" + --entrypoint bash \ + {service_name}""") + # Print the config for debugging purpose. + if print_docker_config: + docker_config_cmd_as_str = hlitauti.to_multi_line_cmd(docker_config_cmd) + _LOG.debug("docker_config_cmd=\n%s", docker_config_cmd_as_str) + _LOG.debug( + "docker_config=\n%s", + hsystem.system_to_string(docker_config_cmd_as_str)[1], + ) + # Print the config for debugging purpose. + docker_cmd_: str = hlitauti.to_multi_line_cmd(docker_cmd_) + return docker_cmd_ + + +# //////////////////////////////////////////////////////////////////////////////// +# bash and cmd. +# //////////////////////////////////////////////////////////////////////////////// + + +def _docker_cmd( + ctx: Any, + docker_cmd_: str, + *, + skip_pull: bool = False, + **ctx_run_kwargs: Any, +) -> Optional[int]: + """ + Print and execute a Docker command. + + :param kwargs: kwargs for `ctx.run()` + """ + if hserver.is_inside_ci(): + import helpers.hs3 as hs3 + + # Generate files with the AWS settings that are missing when running + # inside CI. + hs3.generate_aws_files() + docker_pull(ctx, skip_pull=skip_pull) + _LOG.debug("cmd=%s", docker_cmd_) + rc: Optional[int] = hlitauti.run( + ctx, docker_cmd_, pty=True, **ctx_run_kwargs + ) + return rc + + +@task +def docker_bash( # type: ignore + ctx, + base_image="", + stage="dev", + version="", + use_entrypoint=True, + as_user=True, + generate_docker_compose_file=True, + container_dir_name=".", + skip_pull=False, + skip_docker_image_compatibility_check=False, +): + """ + Start a bash shell inside the container corresponding to a stage. + + :param use_entrypoint: whether to use the `entrypoint.sh` or not + :param as_user: pass the user / group id or not + :param generate_docker_compose_file: generate the Docker compose file or not + :param skip_pull: if True skip pulling the docker image + """ + _LOG.debug(hprint.func_signature_to_str("ctx")) + hlitauti.report_task(container_dir_name=container_dir_name) + # + cmd = "bash" + docker_cmd_ = _get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + generate_docker_compose_file=generate_docker_compose_file, + use_entrypoint=use_entrypoint, + as_user=as_user, + skip_docker_image_compatibility_check=skip_docker_image_compatibility_check, + ) + _LOG.debug("docker_cmd_=%s", docker_cmd_) + _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) + + +@task +def docker_cmd( # type: ignore + ctx, + base_image="", + stage="dev", + version="", + cmd="", + as_user=True, + generate_docker_compose_file=True, + use_bash=False, + container_dir_name=".", + skip_pull=False, +): + """ + Execute the command `cmd` inside a container corresponding to a stage. + + :param as_user: pass the user / group id or not + :param generate_docker_compose_file: generate or reuse the Docker + compose file + :param use_bash: run command through a shell + """ + hlitauti.report_task(container_dir_name=container_dir_name) + hdbg.dassert_ne(cmd, "") + # TODO(gp): Do we need to overwrite the entrypoint? + docker_cmd_ = _get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + generate_docker_compose_file=generate_docker_compose_file, + as_user=as_user, + use_bash=use_bash, + ) + _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) + + +# //////////////////////////////////////////////////////////////////////////////// +# Jupyter. +# //////////////////////////////////////////////////////////////////////////////// + + +def _get_docker_jupyter_cmd( + base_image: str, + stage: str, + version: str, + port: int, + self_test: bool, + *, + use_entrypoint: bool = True, + print_docker_config: bool = False, +) -> str: + cmd = "" + extra_env_vars = [f"PORT={port}"] + extra_docker_run_opts = ["--service-ports"] + service_name = "jupyter_server_test" if self_test else "jupyter_server" + # + docker_cmd_ = _get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + extra_env_vars=extra_env_vars, + extra_docker_run_opts=extra_docker_run_opts, + service_name=service_name, + use_entrypoint=use_entrypoint, + print_docker_config=print_docker_config, + ) + return docker_cmd_ + + +@task +def docker_jupyter( # type: ignore + ctx, + stage="dev", + version="", + base_image="", + auto_assign_port=True, + use_entrypoint=True, + port=None, + self_test=False, + container_dir_name=".", + skip_pull=False, +): + """ + Run Jupyter notebook server. + + :param auto_assign_port: use the UID of the user and the inferred + number of the repo (e.g., 4 for `~/src/amp4`) to get a unique + port + :param skip_pull: if True skip pulling the docker image + """ + hlitauti.report_task(container_dir_name=container_dir_name) + if port is None: + if auto_assign_port: + uid = os.getuid() + _LOG.debug("uid=%s", uid) + git_repo_idx = hgit.get_project_dirname(only_index=True) + git_repo_idx = int(git_repo_idx) + _LOG.debug("git_repo_idx=%s", git_repo_idx) + # We assume that there are no more than `max_idx_per_users` clients. + max_idx_per_user = 10 + hdbg.dassert_lte(git_repo_idx, max_idx_per_user) + port = (uid * max_idx_per_user) + git_repo_idx + else: + port = 9999 + _LOG.info("Assigned port is %s", port) + # + print_docker_config = False + docker_cmd_ = _get_docker_jupyter_cmd( + base_image, + stage, + version, + port, + self_test, + use_entrypoint=use_entrypoint, + print_docker_config=print_docker_config, + ) + _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) + + +def _get_docker_dash_app_cmd( + base_image: str, + stage: str, + version: str, + port: int, + *, + print_docker_config: bool = False, +) -> str: + cmd = "" + extra_env_vars = [f"PORT={port}"] + extra_docker_run_opts = ["--service-ports"] + service_name = "dash_app" + # + docker_cmd_ = _get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + extra_env_vars=extra_env_vars, + extra_docker_run_opts=extra_docker_run_opts, + service_name=service_name, + print_docker_config=print_docker_config, + ) + return docker_cmd_ + + +@task +def docker_dash_app( # type: ignore + ctx, + stage="dev", + version="", + base_image="", + auto_assign_port=True, + port=None, + container_dir_name=".", +): + """ + Run dash app. + + :param auto_assign_port: use the UID of the user and the inferred + number of the repo (e.g., 4 for `~/src/amp4`) to get a unique + port + """ + hlitauti.report_task(container_dir_name=container_dir_name) + if port is None: + if auto_assign_port: + uid = os.getuid() + _LOG.debug("uid=%s", uid) + git_repo_idx = hgit.get_project_dirname(only_index=True) + git_repo_idx = int(git_repo_idx) + _LOG.debug("git_repo_idx=%s", git_repo_idx) + # We assume that there are no more than `max_idx_per_users` clients. + max_idx_per_user = 10 + hdbg.dassert_lte(git_repo_idx, max_idx_per_user) + port = (uid * max_idx_per_user) + git_repo_idx + else: + port = 9999 + # + _LOG.info("Assigned port is %s", port) + print_docker_config = False + docker_cmd_ = _get_docker_dash_app_cmd( + base_image, + stage, + version, + port, + print_docker_config=print_docker_config, + ) + _docker_cmd(ctx, docker_cmd_) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py new file mode 100644 index 000000000..4c2149f52 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py @@ -0,0 +1,1890 @@ +""" +Import as: + +import helpers.lib_tasks_docker_release as hltadore +""" + +import datetime +import logging +import os +from operator import attrgetter +from typing import Any, Optional + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hversion as hversio +import helpers.lib_tasks_aws as hlitaaws +import helpers.lib_tasks_docker as hlitadoc +import helpers.lib_tasks_gh as hlitagh +import helpers.lib_tasks_pytest as hlitapyt +import helpers.lib_tasks_utils as hlitauti +import helpers.repo_config_utils as hrecouti + +_DEFAULT_TARGET_REGISTRY = "aws_ecr.ck" +_LOG = logging.getLogger(__name__) +_AUTO_RELEASE_LABEL = "Automated release" + +# pylint: disable=protected-access + + +# ############################################################################# +# Docker image workflows. +# ############################################################################# + + +def _to_abs_path(filename: str) -> str: + filename = os.path.abspath(filename) + hdbg.dassert_path_exists(filename) + return filename + + +def _prepare_docker_ignore( + ctx: Any, + docker_ignore: str, + *, + copy_to_git_root: bool = True, +) -> None: + """ + Copy the target `docker_ignore` in the proper position for `docker build`. + + :param ctx: invoke context + :param docker_ignore: path to the `.dockerignore` file + :param copy_to_git_root: if True, copy the `.dockerignore` file to the + git root directory; otherwise, copy it to the current directory + """ + # Currently there is no built-in way to control which `.dockerignore` to + # use (https://stackoverflow.com/questions/40904409). + hdbg.dassert_path_exists(docker_ignore) + # Since all the runnable dirs copy the entire repo content, we use + # the Git root dir as a docker context so we need to copy the `.dockerignore` + # file to the Git root dir. + if copy_to_git_root: + dest_docker_ignore = os.path.join(hgit.find_git_root(), ".dockerignore") + else: + dest_docker_ignore = ".dockerignore" + cmd = f"cp -f {docker_ignore} {dest_docker_ignore}" + hlitauti.run(ctx, cmd) + + +def _get_dev_version(version: str, container_dir_name: str) -> str: + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + dev_version = hlitadoc.to_dev_version(prod_version) + _LOG.debug("prod_version=%s -> dev_version=%s", prod_version, dev_version) + return dev_version + + +def _create_multiarch_builder( + ctx: Any, +) -> None: + """ + Create a multi-arch builder for Docker buildx. + + :param ctx: invoke context + """ + # Create a multi-arch builder. + platform_builder_name = "multiarch_builder" + cmd = rf""" + docker buildx rm {platform_builder_name} + """ + # We do not abort on error since the platform builder might be present + # or not from previous executions. + hsystem.system(cmd, abort_on_error=False) + cmd = rf""" + docker buildx create \ + --name {platform_builder_name} \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use {platform_builder_name} + """ + hlitauti.run(ctx, cmd) + + +# ############################################################################# +# Local/Dev image flow +# ############################################################################# +# - A "local" image (which is a release candidate for the DEV image) is built +# with: +# ``` +# > i docker_build_local_image +# ``` +# - This creates a local image like `helpers:local.saggese-1.0.0` +# - A qualification process (e.g., running all unit tests and the QA tests) is +# performed on the local image (e.g., locally or through GitHub actions) +# - If the qualification process is passed, the image is released as `dev` on +# the registries + + +# Use Docker buildkit or not. +# DOCKER_BUILDKIT = 1 +DOCKER_BUILDKIT = 0 + + +def _build_multi_arch_image( + ctx: Any, + opts: str, + multi_arch: str, + build_args: str, + build_image: str, + dockerfile: str, +) -> None: + """ + Build a multi-architecture Docker image in a remote Docker registry. + + :param ctx: invoke context + :param opts: build options (e.g., --no-cache) + :param multi_arch: target architectures to build for (e.g., + `linux/amd64,linux/arm64`) + :param build_args: build arguments for the Docker build command + :param build_image: name of the image to build + :param dockerfile: path to the Dockerfile to use for building + """ + # Build the multi-arch image. + # Compress the current directory (in order to dereference symbolic + # links) into a tar stream and pipes it to the `docker build` command. + # See HelpersTask197. + cmd = rf""" + tar -czh . | DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ + time \ + docker buildx build \ + {opts} \ + --push \ + --platform {multi_arch} \ + {build_args} \ + --tag {build_image} \ + --file {dockerfile} \ + - + """ + hlitauti.run(ctx, cmd) + + +def _list_image(ctx: Any, image: str) -> None: + """ + List Docker image. + + :param ctx: invoke context + :param image: docker image reference in REPOSITORY[:TAG] format + Examples: + - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0` + - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev` + - `sorrentum/cmamp:dev-1.0.0` + - `ghcr.io/cryptokaizen/cmamp:prod` + """ + cmd = f"docker image ls {image}" + hlitauti.run(ctx, cmd) + + +def _run_tests( + ctx: Any, + stage: str, + version: str, + *, + skip_tests: Optional[bool] = False, + fast_tests: Optional[bool] = True, + slow_tests: Optional[bool] = True, + superslow_tests: Optional[bool] = True, + qa_tests: Optional[bool] = True, +) -> None: + """ + Run tests for a given stage and version. + + :param ctx: invoke context + :param stage: image stage (must be one of `local`, `dev`, or `prod`) + :param version: version to test + :param skip_tests: skip all tests if True + :param fast_tests: run fast tests + :param slow_tests: run slow tests + :param superslow_tests: run superslow tests + :param qa_tests: run QA tests + """ + hdbg.dassert_in(stage, ("local", "dev", "prod")) + if skip_tests: + _LOG.warning("Skipping all tests") + return + if fast_tests: + hlitapyt.run_fast_tests(ctx, stage=stage, version=version) + if slow_tests: + hlitapyt.run_slow_tests(ctx, stage=stage, version=version) + if superslow_tests: + hlitapyt.run_superslow_tests(ctx, stage=stage, version=version) + if qa_tests: + hlitapyt.run_qa_tests(ctx, stage=stage, version=version) + + +# TODO(sandeep): Consider promoting this to an invoke target and removing the callers. +# Reason: the caller invoke targets only contain this helper call. +def _docker_tag_and_push_multi_arch_image( + ctx: Any, + version: str, + base_image: str, + target_registry: str, + container_dir_name: str, + source_stage: str, + target_stage: str, +) -> None: + """ + Tag and push a multi-arch image to the target registry using `docker buildx + imagetools`. + + :param ctx: invoke context + :param version: version to tag the image with + :param base_image: base name of the image (e.g., + `*****.dkr.ecr.us-east-1.amazonaws.com/amp`) + :param target_registry: target Docker registry to push to (e.g., + `aws_ecr.ck` or `dockerhub.causify`) + :param container_dir_name: directory where Dockerfile is located + :param source_stage: source stage of the image (must be one of `local` or + `prod`) + :param target_stage: target stage to push the image as (must be one + of `dev` or `prod`) + """ + hdbg.dassert_in(source_stage, ("local", "prod")) + hdbg.dassert_in(target_stage, ("dev", "prod")) + # + hlitadoc.docker_login(ctx, target_registry) + # Get source version string. + if source_stage == "local": + source_stage_version = _get_dev_version(version, container_dir_name) + elif source_stage == "prod": + source_stage_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + else: + raise ValueError( + f"Invalid source stage='{source_stage}' for tagging and pushing" + ) + source_image_versioned = hlitadoc.get_image( + base_image, source_stage, source_stage_version + ) + _LOG.info( + "Pushing the %s image %s to the target_registry %s ", + source_stage, + source_image_versioned, + target_registry, + ) + if target_registry == "aws_ecr.ck": + # Use AWS Docker registry. + target_base_image = "" + elif target_registry == "dockerhub.causify": + # Use public GitHub Docker registry. + target_base_image_name = ( + hrecouti.get_repo_config().get_docker_base_image_name() + ) + target_base_image = f"causify/{target_base_image_name}" + else: + raise ValueError( + f"Invalid target Docker image registry='{target_registry}'" + ) + # Only create a versioned image for the 'dev' stage or for the + # `dockerhub.causify` registry. + if target_stage == "dev" or target_registry == "dockerhub.causify": + # Tag and push the source image as versioned target image. + target_versioned_image = hlitadoc.get_image( + target_base_image, target_stage, source_stage_version + ) + cmd = f"docker buildx imagetools create -t {target_versioned_image} {source_image_versioned}" + hlitauti.run(ctx, cmd) + # Tag and push the source image as target image. + target_latest_version = None + target_latest_image = hlitadoc.get_image( + target_base_image, target_stage, version=target_latest_version + ) + cmd = f"docker buildx imagetools create -t {target_latest_image} {source_image_versioned}" + hlitauti.run(ctx, cmd) + + +@task +def docker_push_dev_image( # type: ignore + ctx, + version, + base_image="", + container_dir_name=".", +): + """ + Push the "dev" image to ECR. + + :param ctx: invoke context + :param version: version to tag the image and code with + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # + dev_version = _get_dev_version(version, container_dir_name) + # + hlitadoc.docker_login(ctx) + # Push Docker versioned tag. + image_versioned_dev = hlitadoc.get_image(base_image, "dev", dev_version) + cmd = f"docker push {image_versioned_dev}" + hlitauti.run(ctx, cmd, pty=True) + # Push Docker tag. + latest_version = None + image_dev = hlitadoc.get_image(base_image, "dev", latest_version) + cmd = f"docker push {image_dev}" + hlitauti.run(ctx, cmd, pty=True) + + +@task +def docker_push_prod_image( # type: ignore + ctx, + version, + base_image="", + container_dir_name=".", +): + """ + Push the "prod" image to ECR. + + :param ctx: invoke context + :param version: version to tag the image and code with + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + # + hlitadoc.docker_login(ctx) + # Push versioned tag. + image_versioned_prod = hlitadoc.get_image(base_image, "prod", prod_version) + cmd = f"docker push {image_versioned_prod}" + hlitauti.run(ctx, cmd, pty=True) + # + latest_version = None + image_prod = hlitadoc.get_image(base_image, "prod", latest_version) + cmd = f"docker push {image_prod}" + hlitauti.run(ctx, cmd, pty=True) + + +# TODO(gp): We moved away from versioning of the prod image because we release +# continuously and so it's easier to track the hash. +def _docker_rollback_image( + ctx: Any, + base_image: str, + stage: str, + version: str, + push_to_repo: bool, +) -> None: + """ + Rollback the versioned image for a particular stage and optionally push it + to ECR. + + :param ctx: invoke context + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param stage: select a specific stage for the Docker image (must be + one of `dev` or `prod`) + :param version: version to tag the image and code with + :param push_to_repo: whether to push the rolled back image to ECR + """ + hdbg.dassert_in(stage, ("dev", "prod")) + # TODO(sandeep): Consider removing the redundant pull-push step. Instead of + # pulling the versioned image and pushing it back to ECR, directly push + # the local image. However, note that this may not work for multi-arch images + # since local images are arch-specific, while remote tags include all architectures. + # 1) Ensure that version of the image exists locally. + hlitadoc._docker_pull( + ctx, base_image=base_image, stage=stage, version=version + ) + # 2) Promote requested image to target stage. + image_versioned = hlitadoc.get_image(base_image, stage, version) + latest_version = None + image_latest = hlitadoc.get_image(base_image, stage, latest_version) + cmd = f"docker tag {image_versioned} {image_latest}" + hlitauti.run(ctx, cmd) + # 3) Push the image to ECR. + if push_to_repo: + if stage == "dev": + docker_push_dev_image(ctx, version=version) + elif stage == "prod": + docker_push_prod_image(ctx, version=version) + else: + raise ValueError(f"Invalid stage='{stage}' for rollback") + else: + _LOG.warning("Skipping pushing %s image to ECR, as requested", stage) + + +@task +def docker_build_local_image( # type: ignore + ctx, + version, + cache=True, + base_image="", + poetry_mode="update", + container_dir_name=".", + just_do_it=False, + multi_arch="", + cleanup_installation=True, +): + """ + Build a local image, i.e., a release candidate "dev" image. + + :param ctx: invoke context + :param version: version to tag the image with + :param cache: use the cache + :param base_image: the name for the base image + E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp`. + For base_image, we use "" as default instead None since `invoke` can + only infer a single type. + :param poetry_mode: + - `update`: run `poetry lock` to update the packages + - `no_update`: it uses the current `poetry.lock` file, if it is valid + according to the constraints. This is useful when the goal is to + remove / add / update only a single package without updating + everything + :param container_dir_name: directory where the Dockerfile is located + :param just_do_it: execute the action ignoring the checks + :param multi_arch: + - if not specified, build for the current architecture + - if specified, build for the specified multiple architectures. E.g., + `linux/amd64,linux/arm64` + :param cleanup_installation: force clean up Docker installation. This can + be disabled to speed up the build process + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # For poetry_mode="update", the `poetry.lock` file is updated and saved as + # `/install/poetry.lock.out` to the container. + # For poetry_mode="no_update", the `poetry.lock` file from the repo is used, + # and it's passed as `/install/poetry.lock.in` to the container. + hdbg.dassert_in(poetry_mode, ("update", "no_update")) + if just_do_it: + _LOG.warning("Skipping subsequent version check") + else: + hlitadoc.dassert_is_subsequent_version( + version, container_dir_name=container_dir_name + ) + dev_version = _get_dev_version(version, container_dir_name) + # Prepare `.dockerignore`. + docker_ignore = "devops/docker_build/dockerignore.dev" + _prepare_docker_ignore(ctx, docker_ignore) + # Build the local image. + stage = "local" + image_local = hlitadoc.get_image(base_image, stage, dev_version) + # + dockerfile = "devops/docker_build/dev.Dockerfile" + # Keep the relative path instead of an absolute path to ensure it matches + # files inside the tar stream and avoids file not found errors. + # dockerfile = _to_abs_path(dockerfile) + opts = "--no-cache" if not cache else "" + build_args = [ + ("AM_CONTAINER_VERSION", dev_version), + ("INSTALL_DIND", True), + ("POETRY_MODE", poetry_mode), + ("CLEAN_UP_INSTALLATION", cleanup_installation), + ] + build_args = " ".join(f"--build-arg {k}={v}" for k, v in build_args) + # Build for both a single arch or multi-arch. + if multi_arch: + # Login to AWS ECR because for multi-arch we need to build the local + # image remotely. + hlitadoc.docker_login(ctx) + _create_multiarch_builder(ctx) + _build_multi_arch_image( + ctx, opts, multi_arch, build_args, image_local, dockerfile + ) + # TODO(sandeep): If possible, switch to using hlitadoc._docker_pull(). + # Pull the image from registry after building. + cmd = f"docker pull {image_local}" + hlitauti.run(ctx, cmd) + else: + # Build for a single architecture using `docker build`. + # Compress the current directory (in order to dereference symbolic + # links) into a tar stream and pipes it to the `docker build` command. + # See HelpersTask197. + cmd = rf""" + tar -czh . | DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ + time \ + docker build \ + {opts} \ + {build_args} \ + --tag {image_local} \ + --file {dockerfile} \ + - + """ + hlitauti.run(ctx, cmd) + # Retrieve the package files, if present. + if poetry_mode == "update": + # TODO(gp): Not sure it works properly for multi-arch build, since on + # different platforms the generated poetry.lock might be different. + # TODO(gp): For some reason we can't use more than one bash command in + # docker_cmd. + cmd = "cp -f /install/poetry.lock.out /install/pip_list.txt ." + opts = [ + "--stage local", + f"--version {version}", + f"--cmd '{cmd}'", + ] + opts.append("--skip-pull") + cmd = "invoke docker_cmd " + " ".join(opts) + hlitauti.run(ctx, cmd) + # The destination dir is always in the same relative position. + dst_dir = "./devops/docker_build" + hdbg.dassert_dir_exists(dst_dir) + cmd = f"cp -f poetry.lock.out {dst_dir}/poetry.lock" + hlitauti.run(ctx, cmd) + cmd = f"cp -f pip_list.txt {dst_dir}/pip_list.txt" + hlitauti.run(ctx, cmd) + # Check image and report stats. + _list_image(ctx, image_local) + + +@task +def docker_tag_local_image_as_dev( # type: ignore + ctx, + version, + base_image="", + container_dir_name=".", +): + """ + Mark the "local" image as "dev". + + :param ctx: invoke context + :param version: version to tag the image and code with + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # Get the version. + dev_version = _get_dev_version(version, container_dir_name) + # Tag local image as versioned dev image (e.g., `dev-1.0.0`). + image_versioned_local = hlitadoc.get_image(base_image, "local", dev_version) + image_versioned_dev = hlitadoc.get_image(base_image, "dev", dev_version) + cmd = f"docker tag {image_versioned_local} {image_versioned_dev}" + hlitauti.run(ctx, cmd) + # Tag local image as dev image. + latest_version = None + image_dev = hlitadoc.get_image(base_image, "dev", latest_version) + cmd = f"docker tag {image_versioned_local} {image_dev}" + hlitauti.run(ctx, cmd) + + +@task +def docker_release_dev_image( # type: ignore + ctx, + version, + cache=True, + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=False, + qa_tests=True, + push_to_repo=True, + poetry_mode="update", + container_dir_name=".", +): + """ + Build, test, and release to ECR the latest "dev" image. + + This can be used to test the entire flow from scratch by building an image, + running the tests, and pushing if needed. + + Phases: + 1) Build local image + 2) Run the unit tests (e.g., fast, slow, superslow) on the local image + 3) Mark local as dev image + 4) Run the QA tests on the dev image + 5) Push dev image to the repo + + :param ctx: invoke context + :param version: version to tag the image and code with + :param cache: use the cache + :param skip_tests: skip all the tests and release the dev image + :param fast_tests: run fast tests, unless all tests skipped + :param slow_tests: run slow tests, unless all tests skipped + :param superslow_tests: run superslow tests, unless all tests skipped + :param qa_tests: run QA tests (e.g., end-to-end linter tests) + :param push_to_repo: push the image to the repo_short_name + :param poetry_mode: same as + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # 1) Build "local" image. + docker_build_local_image( + ctx, + version, + cache=cache, + poetry_mode=poetry_mode, + container_dir_name=container_dir_name, + ) + # Run resolve after `docker_build_local_image` so that a proper check + # for subsequent version can be made in case `FROM_CHANGELOG` token + # is used. + dev_version = _get_dev_version(version, container_dir_name) + # 2) Run tests for the "local" image. + stage = "local" + _run_tests( + ctx, + stage, + dev_version, + skip_tests=skip_tests, + fast_tests=fast_tests, + slow_tests=slow_tests, + superslow_tests=superslow_tests, + qa_tests=False, + ) + # 3) Promote the "local" image to "dev". + docker_tag_local_image_as_dev( + ctx, dev_version, container_dir_name=container_dir_name + ) + # 4) Run QA tests for the (local version) of the dev image. + stage = "dev" + _run_tests( + ctx, + stage, + dev_version, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=qa_tests, + ) + # 5) Push the "dev" image to ECR. + if push_to_repo: + docker_push_dev_image( + ctx, dev_version, container_dir_name=container_dir_name + ) + else: + _LOG.warning( + "Skipping pushing dev image to repo_short_name, as requested" + ) + _LOG.info("==> SUCCESS <==") + + +# ///////////////////////////////////////////////////////////////////////////// +# Multi-arch build flow +# ///////////////////////////////////////////////////////////////////////////// + + +# TODO(gp): multi_build -> multi_arch + + +@task +def docker_tag_push_multi_build_local_image_as_dev( # type: ignore + ctx, + version, + local_base_image="", + target_registry=_DEFAULT_TARGET_REGISTRY, + container_dir_name=".", +): + """ + Mark the multi-arch "local" image as "dev" and push it. + + `base_image` and `target_registry` both contain information about the target + Docker registry. Docker image registry address in `local_base_image` name + is ignored when pushing, instead the `target_registry` param provides a + Docker image registry address to push to. + + :param ctx: invoke context + :param version: version to tag the image and code with + :param local_base_image: base name of a local image, + e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param target_registry: target Docker image registry to push the image to + - "dockerhub.causify": public Causify Docker image registry + - "aws_ecr.ck": private AWS CK ECR + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + source_stage = "local" + target_stage = "dev" + _docker_tag_and_push_multi_arch_image( + ctx, + version, + local_base_image, + target_registry, + container_dir_name, + source_stage, + target_stage, + ) + + +# TODO(gp): This needs to be merged with docker_release_dev_image. +@task +def docker_release_multi_build_dev_image( # type: ignore + ctx, + version, + cache=True, + poetry_mode="update", + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=False, + qa_tests=True, + # TODO(Grisha): use iterable values, see + # https://docs.pyinvoke.org/en/stable/concepts/invoking-tasks.html#iterable-flag-values + # target_registries=... + target_registries=_DEFAULT_TARGET_REGISTRY, + container_dir_name=".", +): + """ + Build, test, and release the latest multi-arch "dev" image. + + :param version: version to tag the image and code with + :param cache: use the cache + :param skip_tests: skip all the tests and release the dev image + :param fast_tests: run fast tests, unless all tests skipped + :param slow_tests: run slow tests, unless all tests skipped + :param superslow_tests: run superslow tests, unless all tests + skipped + :param qa_tests: run QA tests (e.g., end-to-end linter tests) + :param poetry_mode: update package dependencies using poetry + :param target_registries: comma separated list of target Docker + image registries to push the image to. E.g., + "aws_ecr.ck,dockerhub.causify". See `docker_login()` for + details. + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + target_registries = target_registries.split(",") + # 1) Build "local" image remotely in the CK AWS ECR registry and pull once + # it is built. + docker_build_local_image( + ctx, + version, + cache=cache, + poetry_mode=poetry_mode, + container_dir_name=container_dir_name, + multi_arch="linux/amd64,linux/arm64", + ) + # Run resolve after `docker_build_local_image` so that a proper check + # for subsequent version can be made in case `FROM_CHANGELOG` token + # is used. + dev_version = _get_dev_version(version, container_dir_name) + # 2) Run tests for the "local" image. + # 3) Run QA tests using the local version of an image. + # Use the local image because it is not possible to tag a multi-arch + # image as dev without releasing (pushing) it. + # The difference between a local and a dev image is just a tag. + stage = "local" + _run_tests( + ctx, + stage, + dev_version, + skip_tests=skip_tests, + fast_tests=fast_tests, + slow_tests=slow_tests, + superslow_tests=superslow_tests, + qa_tests=qa_tests, + ) + # 4) Tag the image as dev image and push it to the target registries. + for target_registry in target_registries: + docker_tag_push_multi_build_local_image_as_dev( + ctx, + version=dev_version, + target_registry=target_registry, + container_dir_name=container_dir_name, + ) + _LOG.info("==> SUCCESS <==") + + +# ############################################################################# +# Prod image flow: +# ############################################################################# +# - Prod image has no release candidate +# - Start from a Dev image already built and qualified +# - The prod image is created from the dev image by copying the code inside the +# image +# - The prod image is tagged as "prod" +# The prod flow doesn't support multi-arch because we only run on x86 in prod. + + +@task +def docker_build_prod_image( # type: ignore + ctx, + version, + cache=True, + base_image="", + candidate=False, + user_tag="", + container_dir_name=".", + tag=None, +): + """ + Build a prod image from a dev image. + + :param version: version to tag the image and code with + :param cache: note that often the prod image is just a copy of the + dev image so caching makes no difference + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param candidate: build a prod image with a tag format: prod-{hash} + where hash is the output of `hgit.get_head_hash()` + :param user_tag: the name of the user building the candidate image + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + # Prepare `.dockerignore`. + docker_ignore = "devops/docker_build/dockerignore.prod" + _prepare_docker_ignore(ctx, docker_ignore) + # TODO(gp): We should do a `i git_clean` to remove artifacts and check that + # the client is clean so that we don't release from a dirty client. + # Build prod image. + if candidate: + # For candidate prod images which need to be tested on the AWS infra add + # a hash identifier. + latest_version = None + image_versioned_prod = hlitadoc.get_image( + base_image, "prod", latest_version + ) + if not tag: + head_hash = hgit.get_head_hash(short_hash=True) + else: + head_hash = tag + # Add username to the prod image name. + if user_tag: + image_versioned_prod += f"-{user_tag}" + # Add head hash to the prod image name. + image_versioned_prod += f"-{head_hash}" + + else: + image_versioned_prod = hlitadoc.get_image( + base_image, "prod", prod_version + ) + # + dockerfile = "devops/docker_build/prod.Dockerfile" + dockerfile = _to_abs_path(dockerfile) + # + # TODO(gp): Use to_multi_line_cmd() + opts = "--no-cache" if not cache else "" + # Use dev version for building prod image. + dev_version = hlitadoc.to_dev_version(prod_version) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + hdbg.dassert( + not hgit.is_inside_submodule(), + "The build should be run from a super repo, not a submodule.", + ) + git_root_dir = hgit.find_git_root() + # TODO(heanh): Expose the build context to the interface and use `git_root_dir` by default. + cmd = rf""" + DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ + time \ + docker build \ + {opts} \ + --tag {image_versioned_prod} \ + --file {dockerfile} \ + --build-arg VERSION={dev_version} \ + --build-arg ECR_BASE_PATH={os.environ["CSFY_ECR_BASE_PATH"]} \ + --build-arg IMAGE_NAME={image_name} \ + {git_root_dir} + """ + hlitauti.run(ctx, cmd) + if candidate: + _LOG.info("Head hash: %s", head_hash) + _list_image(ctx, image_versioned_prod) + else: + # Tag versioned image as latest prod image. + latest_version = None + image_prod = hlitadoc.get_image(base_image, "prod", latest_version) + cmd = f"docker tag {image_versioned_prod} {image_prod}" + hlitauti.run(ctx, cmd) + # + _list_image(ctx, image_prod) + + +@task +def docker_build_multi_arch_prod_image( # type: ignore + ctx, + version, + cache=True, + base_image="", + user_tag="", + container_dir_name=".", + tag=None, + multi_arch="linux/amd64,linux/arm64", +): + """ + Build a multi arch. versioned prod image from a dev image. For e.g.: we + have the dev image `helpers:dev-1.0.0` and we want to build a prod image + `helpers:prod-1.0.0`. + + :param version: version to tag the image and code with + :param cache: note that often the prod image is just a copy of the + dev image so caching makes no difference + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param user_tag: the name of the user building the candidate image + :param container_dir_name: directory where the Dockerfile is located + :param multi_arch: comma separated list of target architectures to + build the image for. E.g., `linux/amd64,linux/arm64` + """ + hlitauti.report_task(container_dir_name=container_dir_name) + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + # Prepare `.dockerignore`. + docker_ignore = "devops/docker_build/dockerignore.prod" + _prepare_docker_ignore(ctx, docker_ignore) + # TODO(gp): We should do a `i git_clean` to remove artifacts and check that + # the client is clean so that we don't release from a dirty client. + # Build prod image. + image_versioned_prod = hlitadoc.get_image(base_image, "prod", prod_version) + # Prepare the build. + dockerfile = "devops/docker_build/prod.Dockerfile" + # Keep the relative path instead of an absolute path to ensure it matches + # files inside the tar stream and avoids file not found errors. + # dockerfile = _to_abs_path(dockerfile) + # + opts = "--no-cache" if not cache else "" + # Use dev version for building prod image. + dev_version = hlitadoc.to_dev_version(prod_version) + build_args = [ + ("VERSION", dev_version), + ("ECR_BASE_PATH", os.environ["CSFY_ECR_BASE_PATH"]), + ] + build_args = " ".join(f"--build-arg {k}={v}" for k, v in build_args) + # Login to AWS ECR because for multi-arch we need to build the local + # image remotely. + hlitadoc.docker_login(ctx) + _create_multiarch_builder(ctx) + _build_multi_arch_image( + ctx, opts, multi_arch, build_args, image_versioned_prod, dockerfile + ) + # TODO(sandeep): If possible, switch to hlitadoc._docker_pull(). + # Pull the image from registry after building. + cmd = f"docker pull {image_versioned_prod}" + hlitauti.run(ctx, cmd) + _list_image(ctx, image_versioned_prod) + + +@task +def docker_tag_push_multi_arch_prod_image( # type: ignore + ctx, + version, + base_image="", + target_registry=_DEFAULT_TARGET_REGISTRY, + container_dir_name=".", +): + """ + Mark the multi-arch versioned "prod" image as "prod" and push them to the + target registry. + + `base_image` and `target_registry` both contain information about the target + Docker registry. + + :param ctx: invoke context + :param version: version to tag the image and code with + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param target_registry: target Docker image registry to push the image to + - "dockerhub.causify": public Causify Docker image registry + - "aws_ecr.ck": private AWS CK ECR + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + source_stage = "prod" + target_stage = "prod" + _docker_tag_and_push_multi_arch_image( + ctx, + version, + base_image, + target_registry, + container_dir_name, + source_stage, + target_stage, + ) + + +# TODO(gp): Can we merge this with docker_push_prod_image? +@task +def docker_push_prod_candidate_image( # type: ignore + ctx, + candidate, + base_image="", + container_dir_name=".", +): + """ + (ONLY CI/CD) Push the "prod" candidate image to ECR. + + :param ctx: invoke context + :param candidate: hash of the candidate prod image to push + :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # + hlitadoc.docker_login(ctx) + # Push image with tagged with a hash ID. + image_versioned_prod = hlitadoc.get_image(base_image, "prod", None) + cmd = f"docker push {image_versioned_prod}-{candidate}" + hlitauti.run(ctx, cmd, pty=True) + + +@task +# TODO(Vlad): Add the release flow with the multi-arch support. +# See HelpersTask339. +def docker_release_prod_image( # type: ignore + ctx, + version, + cache=True, + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=False, + qa_tests=True, + push_to_repo=True, + container_dir_name=".", +): + """ + Build, test, and release to ECR the prod image. + + - Build prod image + - Run the tests + - Push the prod image repo + + :param ctx: invoke context + :param version: version to tag the image and code with + :param cache: use the cache + :param skip_tests: skip all the tests and release the dev image + :param fast_tests: run fast tests, unless all tests skipped + :param slow_tests: run slow tests, unless all tests skipped + :param superslow_tests: run superslow tests, unless all tests skipped + :param qa_tests: run QA tests (e.g., end-to-end linter tests) + :param push_to_repo: push the image to the repo_short_name + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + prod_version = hlitadoc.resolve_version_value( + version, container_dir_name=container_dir_name + ) + # 1) Build prod image. + docker_build_prod_image( + ctx, + cache=cache, + version=prod_version, + container_dir_name=container_dir_name, + ) + # 2) Run tests. + if skip_tests: + _LOG.warning("Skipping all tests and releasing") + fast_tests = slow_tests = superslow_tests = False + stage = "prod" + if fast_tests: + hlitapyt.run_fast_tests(ctx, stage=stage, version=prod_version) + if slow_tests: + hlitapyt.run_slow_tests(ctx, stage=stage, version=prod_version) + if superslow_tests: + hlitapyt.run_superslow_tests(ctx, stage=stage, version=prod_version) + # 3) Run QA tests using the local version of the prod image before pushing + # it to ECR. + if qa_tests: + hlitapyt.run_qa_tests(ctx, stage=stage, version=prod_version) + # 4) Push prod image. + if push_to_repo: + docker_push_prod_image( + ctx, version=prod_version, container_dir_name=container_dir_name + ) + else: + _LOG.warning("Skipping pushing image to repo_short_name as requested") + _LOG.info("==> SUCCESS <==") + + +@task(iterable=["docker_registry"]) +def docker_release_multi_arch_prod_image( + ctx, + version, + cache=True, + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=False, + qa_tests=True, + docker_registry=None, + container_dir_name=".", +): + """ + Build, test, and release to Docker registries the multi-arch prod image. + :param ctx: invoke context + :param version: version to tag the image and code with + :param cache: use the cache + :param skip_tests: skip all the tests + :param fast_tests: run fast tests, unless all tests skipped + :param slow_tests: run slow tests, unless all tests skipped + :param superslow_tests: run superslow tests, unless all tests skipped + :param qa_tests: run QA tests (e.g., end-to-end linter tests) + :param docker_registry: list of Docker image registries to push the image to + :param container_dir_name: directory where the Dockerfile is located + Example usage: + > invoke docker_release_multi_arch_prod_image \ + --version 1.2.0 + --docker-registry dockerhub.causify \ + --docker-registry aws_ecr.ck + """ + hlitauti.report_task() + # The default value for iterative task parameter will be an empty list. + # https://docs.pyinvoke.org/en/stable/concepts/invoking-tasks.html#iterable-flag-values + if len(docker_registry) == 0: + docker_registry = [_DEFAULT_TARGET_REGISTRY] + _LOG.warning( + "No Docker registries provided, using default: %s", docker_registry + ) + # 1) Build prod image. + docker_build_multi_arch_prod_image( + ctx, + version, + cache=cache, + container_dir_name=container_dir_name, + multi_arch="linux/amd64,linux/arm64", + ) + # 2) Run tests. + stage = "prod" + _run_tests( + ctx, + stage, + version, + skip_tests=skip_tests, + fast_tests=fast_tests, + slow_tests=slow_tests, + superslow_tests=superslow_tests, + qa_tests=qa_tests, + ) + # 3) Push prod image. + for registry in docker_registry: + docker_tag_push_multi_arch_prod_image( + ctx, + version=version, + target_registry=registry, + container_dir_name=container_dir_name, + ) + _LOG.info("==> SUCCESS <==") + + +# # TODO(gp): Useless IMO. +@task +def docker_release_all(ctx, version, container_dir_name="."): # type: ignore + """ + (ONLY CI/CD) Release both dev and prod image to ECR. + + This includes: + - docker_release_dev_image + - docker_release_prod_image + + :param version: version to tag the image and code with + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task() + docker_release_dev_image(ctx, version, container_dir_name=container_dir_name) + docker_release_prod_image( + ctx, version, container_dir_name=container_dir_name + ) + _LOG.info("==> SUCCESS <==") + + +@task +def docker_rollback_dev_image( # type: ignore + ctx, + version, + push_to_repo=True, +): + """ + Rollback the version of the dev image. + + Phases: + 1) Ensure that version of the image exists locally + 2) Promote versioned image as dev image + 3) Push dev image to the repo + + :param ctx: invoke context + :param version: version to tag the image and code with + :param push_to_repo: push the image to the ECR repo + """ + hlitauti.report_task() + stage = "dev" + _docker_rollback_image( + ctx, + base_image="", + stage=stage, + version=version, + push_to_repo=push_to_repo, + ) + _LOG.info("==> SUCCESS <==") + + +@task +def docker_rollback_prod_image( # type: ignore + ctx, + version, + push_to_repo=True, +): + """ + Rollback the version of the prod image. + + Same as parameters and meaning as `docker_rollback_dev_image`. + """ + hlitauti.report_task() + stage = "prod" + _docker_rollback_image( + ctx, + base_image="", + stage=stage, + version=version, + push_to_repo=push_to_repo, + ) + _LOG.info("==> SUCCESS <==") + + +def _check_workspace_dir_sizes() -> None: + """ + Check if user doesn't have large files/directories in their workspace. + + Use-case is running the function before building a candidate image. + Large files significanty slow dwon image creation and subsequent + pulling. Overtime it also increases costs of ECR usage. + """ + # Execute system command and split into a list of tuples [size, dir]. + # Threshold is chosen heuristically according to current repo dir sizes. + git_root = hgit.find_git_root() + with hsystem.cd(git_root): + fs_item_max_threshold = "200M" + directory_size_list = hsystem.system_to_string( + f"du --threshold {fs_item_max_threshold} -hs $(ls -A) | sort -hr" + )[1].split("\n") + # Filter out directories ignored by `dockerignore.prod` + "amp/" + # as submodule. + ignored_dirs = [ + "amp", + "ck.infra", + "amp/ck.infra", + "docs", + ".git", + "amp/.git", + ] + offending_items = [ + it.replace("\t", " ") + for it in directory_size_list + if it.split("\t")[1] not in ignored_dirs + ] + hdbg.dassert( + len(offending_items) == 0, + ( + "Your workspace contains one or more files/directories " + f"larger than {fs_item_max_threshold} move " + f"or delete the items:\n\t {offending_items}" + ), + ) + + +@task +def docker_create_candidate_image(ctx, container_dir_name=".", user_tag=""): # type: ignore + """ + Create new prod candidate image and update the specified ECS task + definition such that the Image URL specified in container definition points + to the new candidate image. + + :param task_definition: the name of the ECS task definition for + which an update to container image URL is made, e.g. cmamp-test + :param container_dir_name: the runnable dir path (e.g. + `./ck.infra/`) + :param user_tag: the name of the user creating the image, empty + parameter means the command was run via gh actions + :param region: AWS Region, for Tokyo region specify 'ap-northeast-1' + :return: the tag used for the image + """ + _check_workspace_dir_sizes() + # Get the hash of the image. + tag = hgit.get_head_hash(".", short_hash=True) + if user_tag: + # Add user name to the candidate tag. + tag = f"{user_tag}-{tag}" + # Create new prod image. + docker_build_prod_image( + ctx, + container_dir_name=container_dir_name, + version=hlitadoc._IMAGE_VERSION_FROM_CHANGELOG, + candidate=True, + tag=tag, + ) + # Push candidate image. + docker_push_prod_candidate_image(ctx, tag) + return tag + + +# ############################################################################# +# ECS task definition workflows. +# ECS task definition is a wrapper around a container definition. +# ############################################################################# + + +@task +def docker_release_test_task_definition( + ctx, + task_definition: Optional[str] = None, + user_tag: Optional[str] = None, + region: str = hs3.AWS_EUROPE_REGION_1, +): # type: ignore + """ + Release candidate image to test ECS task definition. + + :param region: region to create the task definition in + """ + hdbg.dassert_in(region, hs3.AWS_REGIONS) + # Verify that task definition is provided. + hdbg.dassert_is_not(task_definition, None, "task definition is required") + # Create candidate image. + current_dir = os.getcwd() + image_tag = docker_create_candidate_image(ctx, current_dir, user_tag) + # Update ECS task definition with new image URL. + hlitaaws.aws_update_ecs_task_definition( + task_definition=task_definition, + image_tag=image_tag, + region=region, + environment="test", + ) + + +@task +def docker_release_preprod_task_definition( + ctx, region: str = hs3.AWS_EUROPE_REGION_1 +): # type: ignore + """ + Release candidate image to preprod ECS task definition. + + :param region: region to create the task definition in + """ + hdbg.dassert_in(region, hs3.AWS_REGIONS) + # Preprod release should be done from master branch and the client should be + # clean. + curr_branch = hgit.get_branch_name() + hdbg.dassert_eq( + curr_branch, "master", msg="You should release from master branch" + ) + _ = hgit.is_client_clean(abort_if_not_clean=True) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-preprod" + # Create candidate image. + current_dir = os.getcwd() + image_tag = docker_create_candidate_image(ctx, current_dir) + # Update ECS task definition with new image URL. + hlitaaws.aws_update_ecs_task_definition( + task_definition=task_definition_name, + image_tag=image_tag, + region=region, + environment="preprod", + ) + + +@task +def docker_release_prod_task_definition(ctx, region: str = hs3.AWS_US_REGION_1): # type: ignore + """ + Release candidate image to prod ECS task definition. + + :param region: region to create the task definition in + """ + hdbg.dassert_in(region, hs3.AWS_REGIONS) + # Prod release should be done from master branch and the client should be + # clean. + curr_branch = hgit.get_branch_name() + hdbg.dassert_eq( + curr_branch, "master", msg="You should release from master branch" + ) + _ = hgit.is_client_clean(abort_if_not_clean=True) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + task_definition_name = f"{image_name}-prod" + # Create candidate image. + current_dir = os.getcwd() + image_tag = docker_create_candidate_image(ctx, current_dir) + # Update ECS task definition with new image URL. + hlitaaws.aws_update_ecs_task_definition( + task_definition=task_definition_name, + image_tag=image_tag, + region=region, + environment="prod", + ) + + +@task +def copy_ecs_task_definition_image_url(ctx, src_task_def, dst_task_def): # type: ignore + """ + Copy image URL from one task definition to another. + + Currently the implementation assumes the source region is Stockholm + and destination #TODO(Juraj): Because this is the configuration we + need at the moment. + + :param src_task_def: source ECS task definition (located in eu- + north-1) + :param dst_task_def: destination ECS task definition (located in ap- + northeast-1) + """ + # TODO(Vlad): Import locally to avoid redundant dependencies. + # See for detals: https://github.com/cryptokaizen/cmamp/issues/8086. + import helpers.haws as haws + + # + _ = ctx + src_image_url = haws.get_task_definition_image_url( + src_task_def, region=hs3.AWS_EUROPE_REGION_1 + ) + # We have cross-region replication enabled in ECR, all images live in both regions. + dst_image_url = src_image_url.replace( + hs3.AWS_EUROPE_REGION_1, hs3.AWS_TOKYO_REGION_1 + ) + haws.update_task_definition( + dst_task_def, dst_image_url, region=hs3.AWS_TOKYO_REGION_1 + ) + + +# TODO(gp): This might become obsolete. +@task +def docker_update_prod_task_definition( + ctx, version, preprod_tag, airflow_dags_s3_path, task_definition +): # type: ignore + """ + Update image in prod task definition to the desired version. + + :param version: latest version from `changelog.txt` or custom one (e.g., `1.1.1`) + :param preprod_tag: image that will be re-tagged with prod version + e.g., `preprod-d8sf76s` -> `prod-1.1.1` + :param airflow_dags_s3_path: S3 bucket from which airflow will load DAGs + :param task_definition: which ECS task definition to use + currently our prod ECS task definitions match short name of repos. + """ + # TODO(Nikola): Convert `haws` part to script so it can be called via `docker_cmd`. + # https://github.com/cryptokaizen/cmamp/pull/2594/files#r948551787 + import helpers.haws as haws + + # + # TODO(Nikola): Use env var for CK profile. + s3fs_ = hs3.get_s3fs(aws_profile="ck") + super_module = not hgit.is_inside_submodule() + # Prepare params for listing DAGs. + root_dir = hgit.get_client_root(super_module) + dags_path = [root_dir, "datapull", "airflow", "dags"] + if super_module and hgit.is_amp_present(): + # Main DAGs location is always in `cmamp`. + dags_path.insert(1, "amp") + dir_name = os.path.join(*dags_path) + pattern = "preprod.*.py" + only_files = True + use_relative_paths = False + # List preprod DAGs. + dag_paths = hs3.listdir(dir_name, pattern, only_files, use_relative_paths) + for dag_path in dag_paths: + # Abort in case one of the preprod DAGs is out of sync. + _, dag_name = os.path.split(dag_path) + hdbg.dassert_eq( + hs3.from_file(dag_path), + s3fs_.cat(airflow_dags_s3_path + dag_name).decode(), + msg=f"Preprod file `{dag_name}` is out of sync with `{airflow_dags_s3_path}`!", + ) + # Prepare params to compose new prod image url. + prod_version = hlitadoc.resolve_version_value(version) + base_image = "" + stage = "prod" + # Compose new prod image url. + new_prod_image_url = hlitadoc.get_image(base_image, stage, prod_version) + version = None + new_prod_image_url_no_version = hlitadoc.get_image( + base_image, stage, version + ) + # Check if preprod tag exist in preprod task definition as precaution. + preprod_task_definition_name = f"{task_definition}-preprod" + preprod_image_url = haws.get_task_definition_image_url( + preprod_task_definition_name + ) + preprod_tag_from_image = preprod_image_url.split(":")[-1] + msg = ( + f"Preprod tag is different in the image url `{preprod_tag_from_image}`!" + ) + hdbg.dassert_eq(preprod_tag_from_image, preprod_tag, msg=msg) + # Pull preprod image for re-tag. + hlitadoc.docker_login(ctx) + cmd = f"docker pull {preprod_image_url}" + hlitauti.run(ctx, cmd) + # Re-tag preprod image to prod. + cmd = f"docker tag {preprod_image_url} {new_prod_image_url}" + hlitauti.run(ctx, cmd) + cmd = f"docker tag {preprod_image_url} {new_prod_image_url_no_version}" + hlitauti.run(ctx, cmd) + cmd = f"docker rmi {preprod_image_url}" + hlitauti.run(ctx, cmd) + # Get original prod image for potential rollback. + original_prod_image_url = haws.get_task_definition_image_url(task_definition) + # Track successful uploads for potential rollback. + successful_uploads = [] + try: + # Update prod task definition to the latest prod tag. + haws.update_task_definition( + task_definition, new_prod_image_url, environment="prod" + ) + # Add prod DAGs to airflow s3 bucket after all checks are passed. + for dag_path in dag_paths: + # Update prod DAGs. + _, dag_name = os.path.split(dag_path) + prod_dag_name = dag_name.replace("preprod.", "prod.") + dag_s3_path = airflow_dags_s3_path + prod_dag_name + s3fs_.put(dag_path, dag_s3_path) + _LOG.info("Successfully uploaded `%s`!", dag_s3_path) + successful_uploads.append(dag_s3_path) + # Upload new tag to ECS. + docker_push_prod_image(ctx, prod_version) + except Exception as ex: + _LOG.info("Rollback started!") + # Rollback prod task definition image URL. + haws.update_task_definition( + task_definition, original_prod_image_url, environment="prod" + ) + _LOG.info( + "Reverted prod task definition image url to `%s`!", + original_prod_image_url, + ) + # Notify for potential rollback for airflow S3 bucket, if any. + if successful_uploads: + _LOG.warning("Starting S3 rollback!") + # Prepare bucket resource. + s3 = haws.get_service_resource(aws_profile="ck", service_name="s3") + bucket_name, _ = hs3.split_path(airflow_dags_s3_path) + if hasattr(s3, "Bucket"): + bucket = s3.Bucket(bucket_name) + else: + # We'll need to handle this differently since client doesn't + # have object_versions. + raise NotImplementedError( + "S3 resource Bucket attribute not available, fallback implementation needed" + ) + for successful_upload in successful_uploads: + # TODO(Nikola): Maybe even Telegram notification? + # Rollback successful upload. + _, prefix = hs3.split_path(successful_upload) + prefix = prefix.lstrip(os.sep) + versions = sorted( + bucket.object_versions.filter(Prefix=prefix), + key=attrgetter("last_modified"), + reverse=True, + ) + latest_version = versions[0] + latest_version.delete() + _LOG.info("Deleted version `%s`.", latest_version.version_id) + if len(versions) > 1: + rollback_version = versions[1] + _LOG.info( + "Active version is now `%s`!", + rollback_version.version_id, + ) + elif len(versions) == 1: + _LOG.info( + "Deleted version was also the only version. Nothing to rollback." + ) + else: + # TODO(Nikola): Do we need custom exception? + raise NotImplementedError + s3_rollback_message = ( + f"S3 uploads reverted: {successful_uploads}" + if successful_uploads + else "No S3 uploads." + ) + _LOG.info("Rollback completed! %s", s3_rollback_message) + raise ex + + +@task +def docker_build_frontend_feature_image( + ctx, + stage, + dev_image_version=None, + app_version=None, +): + """ + Build frontend image for releasing the features. + + :param stage: stage to release the image + :param dev_image_version: base dev image version to use + :param app_version: app version for feature releases + """ + hdbg.dassert_in(stage, ["test", "preprod", "prod"]) + # Get changelog paths. + current_dir = os.getcwd() + # Get image and app version. + if not dev_image_version: + dev_image_version = hversio.get_changelog_version(current_dir) + if not app_version: + errors = [] + # Here we assume FE has its own runnable dir or the app changelog file + # is inside `app` dir of a parent runnable dir. + for file_name in [ + "app_changelog.txt", + os.path.join("app", "app_changelog.txt"), + ]: + try: + app_version = hversio.get_changelog_version( + current_dir, file_name=file_name + ) + break + except AssertionError as e: + errors.append(str(e)) + else: + raise FileNotFoundError( + f"App changelog file not found. Provide app version explicitly. Errors: {errors}" + ) + # Set ECR base path. + if stage in ("test", "preprod"): + ecr_base_path = "623860924167.dkr.ecr.eu-north-1.amazonaws.com" + else: + ecr_base_path = "726416904550.dkr.ecr.us-east-1.amazonaws.com" + # Set prod docker file name. + dockerfile = "devops/docker_build/prod.Dockerfile" + dockerfile = _to_abs_path(dockerfile) + # Set image tag. + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + image_tag = f"{ecr_base_path}/{image_name}:{stage}-{app_version}" + git_root_dir = hgit.find_git_root() + # Docker build command. + cmd = rf""" + docker build --no-cache \ + --file {dockerfile} \ + --build-arg VERSION={dev_image_version} \ + --build-arg ECR_BASE_PATH={ecr_base_path} \ + --build-arg IMAGE_NAME={image_name} \ + --tag {image_tag} \ + {git_root_dir} + """ + hlitauti.run(ctx, cmd) + _list_image(ctx, image_tag) + + +# ############################################################################# +# Test dev image flow +# ############################################################################# + + +@task +def docker_build_test_dev_image( # type: ignore + ctx, + assignee="", + reviewers="", + container_dir_name=".", +): + """ + Automate the complete periodic release workflow for the dev image. + + This task performs: + 1) Bump version (e.g., 2.2.0 -> 2.3.0) + 2) Get release team members + 3) Create branch with date-based name + 4) Build image locally with the bumped version number + 5) Run tests (fast, slow, superslow) + 6) Add changelog entry for the release + 7) Stage poetry.lock and pip_list.txt files + 8) Commit changes with versioned message + 9) Push changes + 10) Create PR + 11) Tag and push image to GHCR + + :param ctx: invoke context + :param assignee: GitHub username to assign the PR to + :param reviewers: GitHub username(s) to request PR review. If not + specified, uses the release team members from GitHub team + configured in repo_config.yaml + :param container_dir_name: directory where the Dockerfile is located + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # 1) Bump version. + _LOG.info("Step 1: Bumping version") + current_version = hversio.get_changelog_version(container_dir_name) + hdbg.dassert(current_version, "Could not find current version in changelog") + _LOG.info("Current version: %s", current_version) + version = hversio.bump_version(current_version, bump_type="minor") + _LOG.info("Bumped version: %s -> %s", current_version, version) + # 2) Get release team members. + _LOG.info("Step 2: Getting release team members") + if not reviewers: + release_team_name = hrecouti.get_repo_config().get_release_team() + # Get team members from GitHub team. + team_members = hlitagh.gh_get_team_member_names(release_team_name) + reviewers = ",".join(team_members) + _LOG.info("Release team '%s' members: %s", release_team_name, reviewers) + # 3) Create branch with date-based name. + _LOG.info("Step 3: Creating branch with date-based name") + issue_prefix = hrecouti.get_repo_config().get_issue_prefix() + # Get current date in YYYYMMDD format. + today = datetime.date.today().strftime("%Y%m%d") + branch_name = f"{issue_prefix}_Periodic_image_release_{today}" + _LOG.info("Branch name: %s", branch_name) + cmd = f"git checkout -b {branch_name}" + hlitauti.run(ctx, cmd) + # 4) Build image locally. + _LOG.info("Step 4: Building local image with version %s", version) + docker_build_local_image( + ctx, + version=version, + cache=True, + poetry_mode="update", + container_dir_name=container_dir_name, + ) + # 5) Run tests. + _LOG.info("Step 5: Running tests") + dev_version = _get_dev_version(version, container_dir_name) + stage = "dev" + _run_tests( + ctx, + stage, + dev_version, + skip_tests=False, + fast_tests=True, + slow_tests=True, + superslow_tests=True, + qa_tests=False, + ) + # 6) Add changelog entry. + _LOG.info("Step 6: Adding changelog entry") + supermodule = True + root_dir = hversio._get_client_root(supermodule) + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + changelog_file = os.path.join(root_dir, container_dir_name, "changelog.txt") + hdbg.dassert_file_exists(changelog_file) + # Read the current changelog. + changelog_content = hio.from_file(changelog_file) + # Prepare new entry. + today = datetime.date.today().strftime("%Y-%m-%d") + new_entry = f"""# {image_name}-{version} +- {today} +- Periodic release: {today} + +""" + # Prepend new entry to changelog. + updated_changelog = new_entry + changelog_content + # Write back to file. + hio.to_file(changelog_file, updated_changelog) + _LOG.info("Added changelog entry for version %s", version) + # 7) Stage files. + _LOG.info("Step 7: Staging files") + # Fix git permissions in CI to avoid "insufficient permission" errors. + if hserver.is_inside_ci(): + _LOG.info("Running in CI, fixing git permissions") + cmd = "sudo chmod -R 777 .git/objects/" + hlitauti.run(ctx, cmd) + files_to_stage = [ + "devops/docker_build/poetry.lock", + "devops/docker_build/pip_list.txt", + "changelog.txt", + ] + for file_path in files_to_stage: + full_path = os.path.join(root_dir, container_dir_name, file_path) + if os.path.exists(full_path): + cmd = f"git add {full_path}" + hlitauti.run(ctx, cmd) + _LOG.info("Staged %s", full_path) + else: + _LOG.warning("File not found, skipping: %s", full_path) + # 8) Commit changes. + _LOG.info("Step 8: Committing changes") + commit_message = f"Poetry output from the v{version} build" + # --no-verify to skip pre-commit checks since the `poetry.lock` file is + # too big and the `check_file_size` is failed. + cmd = f'git commit -m "{commit_message}" --no-verify' + hlitauti.run(ctx, cmd) + # 9) Push changes. + _LOG.info("Step 9: Pushing changes") + cmd = f"git push origin {branch_name}" + hlitauti.run(ctx, cmd) + # 10) Create PR. + _LOG.info("Step 10: Creating pull request") + pr_body = f"- Periodic release of {image_name} dev image version {version}" + label = _AUTO_RELEASE_LABEL + hlitagh.gh_create_pr( + ctx, + body=pr_body, + draft=False, + reviewer=reviewers, + labels=label, + assignee=assignee, + ) + _LOG.info("PR submitted for branch %s", branch_name) + # 11) Tag and push to GHCR. + _LOG.info("Step 11: Tagging and pushing image to GHCR") + # Get GHCR base image path from repo config. + ghcr_base = hrecouti.get_repo_config().get_container_registry_url("ghcr") + ghcr_image_name = hrecouti.get_repo_config().get_docker_base_image_name() + ghcr_base_image = f"{ghcr_base}/{ghcr_image_name}" + _LOG.info("GHCR base image: %s", ghcr_base_image) + # Get local image name. + local_stage = "local" + image_local = hlitadoc.get_image("", local_stage, dev_version) + # Tag local image as versioned GHCR dev image (e.g., ghcr.io/causify-ai/csfy:dev-2.3.0). + ghcr_image_versioned = f"{ghcr_base_image}:dev-{version}" + cmd = f"docker tag {image_local} {ghcr_image_versioned}" + hlitauti.run(ctx, cmd) + _LOG.info("Tagged as versioned GHCR dev image: %s", ghcr_image_versioned) + # Push versioned GHCR dev image. + cmd = f"docker push {ghcr_image_versioned}" + hlitauti.run(ctx, cmd, pty=True) + _LOG.info("Pushed versioned GHCR dev image: %s", ghcr_image_versioned) + _LOG.info("==> SUCCESS <==") + + +@task +def docker_tag_push_dev_image( + ctx, + version="", + base_image="", + target_registries="ghcr,ecr", + container_dir_name=".", + dry_run=False, +): + """ + Pulls a versioned dev image from a base registry, then tags and pushes + it to the specified target registries (both as versioned and latest). + + :param ctx: invoke context + :param version: version to tag the image and code with. If empty, reads + from changelog + :param base_image: base image path to pull from (e.g., + ghcr.io/causify-ai/csfy). If empty, uses GHCR from repo config + :param target_registries: comma separated list of target Docker + image registries to push the image to. E.g., "ghcr,ecr". + See the `helpers.repo_config_utils.RepoConfig.get_container_registry_url()` + for supported registry names + :param container_dir_name: directory where the Dockerfile is located + :param dry_run: if True, only print the commands without executing + them + """ + hlitauti.report_task(container_dir_name=container_dir_name) + # Get version. + if not version: + version = hversio.get_changelog_version(container_dir_name) + # Get base image if not provided. + if not base_image: + ghcr_base = hrecouti.get_repo_config().get_container_registry_url("ghcr") + ghcr_image_name = hrecouti.get_repo_config().get_docker_base_image_name() + base_image = f"{ghcr_base}/{ghcr_image_name}" + # Pull the image. + stage = "dev" + source_dev_image_versioned = hlitadoc.get_image(base_image, stage, version) + cmd = f"docker pull {source_dev_image_versioned}" + hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) + # Tag and push to target registries. + for registry in target_registries.split(","): + # Strip whitespace from registry name. + registry = registry.strip() + # Tag and push the image to the target registry as latest dev image. + target_base = hrecouti.get_repo_config().get_container_registry_url( + registry + ) + target_image_name = ( + hrecouti.get_repo_config().get_docker_base_image_name() + ) + target_base_image = f"{target_base}/{target_image_name}" + latest_version = None + target_dev_image_latest = hlitadoc.get_image( + target_base_image, stage, latest_version + ) + cmd = ( + f"docker tag {source_dev_image_versioned} {target_dev_image_latest}" + ) + hlitauti.run(ctx, cmd, dry_run=dry_run) + cmd = f"docker push {target_dev_image_latest}" + hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) + # Tag and push versioned dev image to target registry. + target_dev_image_versioned = hlitadoc.get_image( + target_base_image, stage, version + ) + cmd = f"docker tag {source_dev_image_versioned} {target_dev_image_versioned}" + hlitauti.run(ctx, cmd, dry_run=dry_run) + cmd = f"docker push {target_dev_image_versioned}" + hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py new file mode 100644 index 000000000..7c1c360a6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py @@ -0,0 +1,606 @@ +""" +Import as: + +import helpers.lib_tasks_find as hlitafin +""" + +import functools +import glob +import logging +import os +import re +from typing import Iterator, List, Optional, Tuple + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hlist as hlist +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# ############################################################################# +# Find test. +# ############################################################################# + + +def _find_test_files( + dir_name: Optional[str] = None, use_absolute_path: bool = False +) -> List[str]: + """ + Find all the files containing test code in `abs_dir`. + """ + dir_name = dir_name or "." + hdbg.dassert_dir_exists(dir_name) + _LOG.debug("abs_dir=%s", dir_name) + # Find all the file names containing test code. + _LOG.info("Searching from '%s'", dir_name) + path = os.path.join(dir_name, "**", "test_*.py") + _LOG.debug("path=%s", path) + file_names = glob.glob(path, recursive=True) + _LOG.debug("Found %d files: %s", len(file_names), str(file_names)) + hdbg.dassert_no_duplicates(file_names) + # Test files should always under a dir called `test`. + for file_name in file_names: + if "/old/" in file_name: + continue + if "/compute/" in file_name: + continue + hdbg.dassert_eq( + os.path.basename(os.path.dirname(file_name)), + "test", + "Test file '%s' needs to be under a `test` dir ", + file_name, + ) + hdbg.dassert_not_in( + "notebook/", + file_name, + "Test file '%s' should not be under a `notebook` dir", + file_name, + ) + # Make path relatives, if needed. + if use_absolute_path: + file_names = [os.path.abspath(file_name) for file_name in file_names] + # + file_names = sorted(file_names) + _LOG.debug("file_names=%s", file_names) + hdbg.dassert_no_duplicates(file_names) + return file_names + + +# TODO(gp): -> find_class since it works also for any class. +def _find_test_class( + class_name: str, file_names: List[str], exact_match: bool = False +) -> List[str]: + """ + Find test file containing `class_name` and report it in pytest format. + + E.g., for "TestLibTasksRunTests1" return + "test/test_lib_tasks.py::TestLibTasksRunTests1" + + :param exact_match: find an exact match or an approximate where `class_name` + is included in the class name + """ + # > jackpy TestLibTasksRunTests1 + # test/test_lib_tasks.py:60:class TestLibTasksRunTests1(hut.TestCase): + regex = r"^\s*class\s+(\S+)\s*\(" + _LOG.debug("regex='%s'", regex) + res: List[str] = [] + # Scan all the files. + for file_name in file_names: + _LOG.debug("file_name=%s", file_name) + txt = hio.from_file(file_name) + # Search for the class in each file. + for i, line in enumerate(txt.split("\n")): + # _LOG.debug("file_name=%s i=%s: %s", file_name, i, line) + # TODO(gp): We should skip ```, """, ''' + m = re.match(regex, line) + if m: + found_class_name = m.group(1) + _LOG.debug(" %s:%d -> %s", line, i, found_class_name) + if exact_match: + found = found_class_name == class_name + else: + found = class_name in found_class_name + if found: + res_tmp = f"{file_name}::{found_class_name}" + _LOG.debug("-> res_tmp=%s", res_tmp) + res.append(res_tmp) + res = sorted(list(set(res))) + return res + + +# TODO(gp): Extend this to accept only the test method. +# TODO(gp): Have a single `find` command with multiple options to search for different +# things, e.g., class names, test names, pytest_mark, ... +@task +def find_test_class( + ctx, class_name, dir_name=".", pbcopy=True, exact_match=False +): # type: ignore + """ + Report test files containing `class_name` in a format compatible with + pytest. + + :param class_name: the class to search + :param dir_name: the dir from which to search (default: .) + :param pbcopy: save the result into the system clipboard (only on + macOS) + """ + hlitauti.report_task(txt="class_name abs_dir pbcopy") + hdbg.dassert_ne(class_name, "", "You need to specify a class name") + _ = ctx + file_names = _find_test_files(dir_name) + res = _find_test_class(class_name, file_names, exact_match) + res = " ".join(res) + # Print or copy to clipboard. + hsystem.to_pbcopy(res, pbcopy) + + +# ////////////////////////////////////////////////////////////////////////////////// + + +@functools.lru_cache() +def _get_python_files(subdir: str) -> List[str]: + pattern = "*.py" + only_files = False + use_relative_paths = False + python_files = hio.listdir(subdir, pattern, only_files, use_relative_paths) + # Remove tmp files. + python_files = [f for f in python_files if not f.startswith("tmp")] + return python_files + + +# File, line number, line, info1, info2 +_FindResult = Tuple[str, int, str, str, str] +_FindResults = List[_FindResult] + + +def _scan_files(python_files: List[str]) -> Iterator: + for file_ in python_files: + _LOG.debug("file=%s", file_) + txt = hio.from_file(file_) + for line_num, line in enumerate(txt.split("\n")): + # TODO(gp): Skip commented lines. + # _LOG.debug("%s:%s line='%s'", file_, line_num, line) + yield file_, line_num, line + + +def _find_short_import(iterator: Iterator, short_import: str) -> _FindResults: + """ + Find imports in the Python files with the given short import. + + E.g., for dtfcodarun dataflow/core/test/test_builders.py:9:import + dataflow.core.dag_runner as dtfcodarun returns + """ + # E.g., + # `import dataflow.core.dag_runner as dtfcodarun` + regex = rf"import\s+(\S+)\s+as\s+({short_import})" + regex = re.compile(regex) + # + results: _FindResults = [] + for file_, line_num, line in iterator: + m = regex.search(line) + if m: + # E.g., + # dataflow/core/test/test_builders.py:9:import dataflow.core.dag_runner as dtfcodarun + _LOG.debug(" --> line:%s=%s", line_num, line) + long_import_txt = m.group(1) + short_import_txt = m.group(2) + full_import_txt = f"import {long_import_txt} as {short_import_txt}" + res = (file_, line_num, line, short_import_txt, full_import_txt) + # E.g., + _LOG.debug(" => %s", str(res)) + results.append(res) + return results + + +def _find_func_class_uses(iterator: Iterator, regex: str) -> _FindResults: + regexs = [] + # E.g., + # `dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)` + regexs.append(rf"\s+(\w+)\.(\w*{regex})\(") + # `dag_builder: dtfcodabui.DagBuilder` + regexs.append(rf":\s*(\w+)\.(\w*{regex})") + # + _LOG.debug("regexs=%s", str(regexs)) + regexs = [re.compile(regex_) for regex_ in regexs] + # + results: _FindResults = [] + for file_, line_num, line in iterator: + _LOG.debug("line='%s'", line) + m = None + for regex_ in regexs: + m = regex_.search(line) + if m: + # _LOG.debug("--> regex matched") + break + if m: + _LOG.debug(" --> line:%s=%s", line_num, line) + short_import_txt = m.group(1) + obj_txt = m.group(2) + res = (file_, line_num, line, short_import_txt, obj_txt) + # E.g., + # ('./helpers/lib_tasks.py', 10226, 'dtfsys', 'RealTimeDagRunner') + # ('./dataflow/core/test/test_builders.py', 70, 'dtfcodarun', 'FitPredictDagRunner') + # ('./dataflow/core/test/test_builders.py', 157, 'dtfcodarun', 'FitPredictDagRunner') + _LOG.debug(" => %s", str(res)) + results.append(res) + return results + + +def _process_find_results(results: _FindResults, how: str) -> List: + filtered_results: List = [] + if how == "remove_dups": + # Remove duplicates. + for result in results: + (_, _, _, info1, info2) = result + filtered_results.append((info1, info2)) + filtered_results = hlist.remove_duplicates(filtered_results) + filtered_results = sorted(filtered_results) + elif how == "all": + filtered_results = sorted(results) + else: + raise ValueError(f"Invalid how='{how}'") + return filtered_results + + +@task +def find(ctx, regex, mode="all", how="remove_dups", subdir="."): # type: ignore + """ + Find symbols, imports, test classes and so on. + + Example: + ``` + > i find DagBuilder + ('dtfcodabui', 'DagBuilder') + ('dtfcore', 'DagBuilder') + ('dtfcodabui', 'import dataflow.core.dag_builder as dtfcodabui') + ('dtfcore', 'import dataflow.core as dtfcore') + ``` + + :param regex: function or class use to search for + :param mode: what to look for + - `symbol_import`: look for uses of function or classes + E.g., `DagRunner` + returns + ``` + ('cdataf', 'PredictionDagRunner') + ('cdataf', 'RollingFitPredictDagRunner') + ``` + - `short_import`: look for the short import + E.g., `'dtfcodabui' + returns + ``` + ('dtfcodabui', 'import dataflow.core.dag_builder as dtfcodabui') + ``` + :param how: how to report the results + - `remove_dups`: report only imports and calls that are the same + """ + hlitauti.report_task(txt=hprint.to_str("regex mode how subdir")) + _ = ctx + # Process the `where`. + python_files = _get_python_files(subdir) + iter_ = _scan_files(python_files) + # Process the `what`. + if mode == "all": + for mode_tmp in ("symbol_import", "short_import"): + find(ctx, regex, mode=mode_tmp, how=how, subdir=subdir) + return + if mode == "symbol_import": + results = _find_func_class_uses(iter_, regex) + filtered_results = _process_find_results(results, "remove_dups") + print("\n".join(map(str, filtered_results))) + # E.g., + # ('cdataf', 'PredictionDagRunner') + # ('cdataf', 'RollingFitPredictDagRunner') + # Look for each short import. + results = [] + for short_import, _ in filtered_results: + iter_ = _scan_files(python_files) + results.extend(_find_short_import(iter_, short_import)) + elif mode == "short_import": + results = _find_short_import(iter_, regex) + else: + raise ValueError(f"Invalid mode='{mode}'") + # Process the `how`. + filtered_results = _process_find_results(results, how) + print("\n".join(map(str, filtered_results))) + + +# ############################################################################# +# Find test decorator. +# ############################################################################# + + +# TODO(gp): decorator_name -> pytest_mark +def _find_test_decorator( + decorator_name: str, file_names: List[str] +) -> List[str]: + """ + Find test files containing tests with a certain decorator + `@pytest.mark.XYZ`. + """ + hdbg.dassert_isinstance(file_names, list) + # E.g., + # @pytest.mark.slow(...) + # @pytest.mark.qa + string = f"@pytest.mark.{decorator_name}" + regex = rf"^\s*{re.escape(string)}\s*[\(]?" + _LOG.debug("regex='%s'", regex) + res: List[str] = [] + # Scan all the files. + for file_name in file_names: + _LOG.debug("file_name=%s", file_name) + txt = hio.from_file(file_name) + # Search for the class in each file. + for i, line in enumerate(txt.split("\n")): + # _LOG.debug("file_name=%s i=%s: %s", file_name, i, line) + # TODO(gp): We should skip ```, """, '''. We can add a function to + # remove all the comments, although we need to keep track of the + # line original numbers. + m = re.match(regex, line) + if m: + _LOG.debug(" -> found: %d:%s", i, line) + res.append(file_name) + # + res = sorted(list(set(res))) + return res + + +@task +def find_test_decorator(ctx, decorator_name="", dir_name="."): # type: ignore + """ + Report test files containing `class_name` in pytest format. + + :param decorator_name: the decorator to search + :param dir_name: the dir from which to search + """ + hlitauti.report_task() + _ = ctx + hdbg.dassert_ne(decorator_name, "", "You need to specify a decorator name") + file_names = _find_test_files(dir_name) + res = _find_test_decorator(decorator_name, file_names) + res = " ".join(res) + print(res) + + +# ############################################################################# +# Find / replace `check_string`. +# ############################################################################# + + +@task +def find_check_string_output( # type: ignore + ctx, class_name, method_name, as_python=True, fuzzy_match=False, pbcopy=True +): + """ + Find output of `check_string()` in the test running + class_name::method_name. + + E.g., for `TestResultBundle::test_from_config1` return the content of the file + `./core/dataflow/test/TestResultBundle.test_from_config1/output/test.txt` + + :param as_python: if True return the snippet of Python code that replaces the + `check_string()` with a `assert_equal` + :param fuzzy_match: if True return Python code with `fuzzy_match=True` + :param pbcopy: save the result into the system clipboard (only on macOS) + """ + hlitauti.report_task() + _ = ctx + hdbg.dassert_ne(class_name, "", "You need to specify a class name") + hdbg.dassert_ne(method_name, "", "You need to specify a method name") + # Look for the directory named `class_name.method_name`. + cmd = f"find . -name '{class_name}.{method_name}' -type d" + # > find . -name "TestResultBundle.test_from_config1" -type d + # ./core/dataflow/test/TestResultBundle.test_from_config1 + _, txt = hsystem.system_to_string(cmd, abort_on_error=False) + file_names = txt.split("\n") + if not txt: + hdbg.dfatal(f"Can't find the requested dir with '{cmd}'") + if len(file_names) > 1: + hdbg.dfatal(f"Found more than one dir with '{cmd}':\n{txt}") + dir_name = file_names[0] + # Find the only file underneath that dir. + hdbg.dassert_dir_exists(dir_name) + cmd = f"find {dir_name} -name 'test.txt' -type f" + _, file_name = hsystem.system_to_one_line(cmd) + hdbg.dassert_file_exists(file_name) + # Read the content of the file. + _LOG.info("Found file '%s' for %s::%s", file_name, class_name, method_name) + txt = hio.from_file(file_name) + if as_python: + # Package the code snippet. + if not fuzzy_match: + # Align the output at the same level as 'expected = r...'. + num_spaces = 8 + txt = hprint.indent(txt, num_spaces=num_spaces) + output = f""" + actual = + expected = r\"\"\" +{txt} + \"\"\".lstrip().rstrip() + self.assert_equal(actual, expected, fuzzy_match={fuzzy_match}) + """ + else: + output = txt + # Print or copy to clipboard. + hsystem.to_pbcopy(output, pbcopy=pbcopy) + return output + + +# ############################################################################# +# Find module dependencies. +# ############################################################################# + + +standard_libs = [ + "abc", + "argparse", + "datetime", + "importlib", + "logging", + "os", + "pandas", + "pytest", + "re", + "unittest", +] + + +@task +def find_dependency( # type: ignore + ctx, + module_name, + mode="print_deps", + only_module="", + ignore_standard_libs=True, + ignore_helpers=True, + remove_dups=True, +): + """ + E.g., ``` + + # Find all the dependency of a module from itself + > i find_dependency --module-name "amp.dataflow.model" --mode "find_lev2_deps" --ignore-helpers --only-module dataflow + amp/dataflow/model/stats_computer.py:16 dataflow.core + amp/dataflow/model/model_plotter.py:4 dataflow.model + ``` + + :param module_name: the module path to analyze (e.g., `amp.dataflow.model`) + :param mode: + - `print_deps`: print the result of grepping for imports + - `find_deps`: find all the dependencies + - `find_lev1_deps`, `find_lev2_deps`: find all the dependencies + :param only_module: keep only imports containing a certain module (e.g., `dataflow`) + :param ignore_standard_libs: ignore the Python standard libs (e.g., `os`, `...`) + :param ignore_helpers: ignore the `helper` lib + :param remove_dups: remove the duplicated imports + """ + _ = ctx + # (cd amp/dataflow/model/; jackpy "import ") | grep -v notebooks | grep -v test | grep -v __init__ | grep "import dataflow" + src_dir = module_name.replace(".", "/") + hdbg.dassert_dir_exists(src_dir) + # Find all the imports. + cmd = f'find {src_dir} -name "*.py" | xargs grep -n -r "^import "' + _, txt = hsystem.system_to_string(cmd) + # + if mode == "print_deps": + print(txt) + return + # Parse the output. + _LOG.debug("\n" + hprint.frame("Parse")) + lines = txt.split("\n") + lines_out = [] + for line in lines: + # ./forecast_evaluator_from_prices.py:16:import helpers.hpandas as hpandas + # import helpers.hunit_test as hunitest # pylint: disable=no-name-in-module' + data = line.split(":") + hdbg.dassert_lte(3, len(data), "Invalid line='%s'", line) + file, line_num, import_code = data[:3] + _LOG.debug(hprint.to_str("file line_num import_code")) + lines_out.append((file, line_num, import_code)) + lines = lines_out + _LOG.debug("Found %d imports", len(lines)) + # Remove irrelevant files and imports. + _LOG.debug("\n" + hprint.frame("Remove irrelevant entries")) + lines_out = [] + for line in lines: + file, line_num, import_code = line + _LOG.debug("# " + hprint.to_str("file line_num import_code")) + if "__init__.py" in file: + _LOG.debug("Remove because init") + continue + if "/test/" in file: + _LOG.debug("Remove because test") + continue + if "notebooks/" in file: + _LOG.debug("Remove because notebook") + continue + if "from typing import" in import_code: + _LOG.debug("Remove because typing") + continue + lines_out.append(line) + lines = lines_out + _LOG.debug("After removal %d imports", len(lines)) + # Process. + _LOG.debug("\n" + hprint.frame("Process entries")) + lines_out = [] + for line in lines: + # ./forecast_evaluator_from_prices.py:16:import helpers.hpandas as hpandas + file, line_num, import_code = line + _LOG.debug("# " + hprint.to_str("file line_num import_code")) + # Parse import code. + m = re.match(r"^import\s+(\S+)(\s+as)?", import_code) + hdbg.dassert(m, "Can't parse line='%s'", import_code) + assert m is not None + import_name = m.group(1) + _LOG.debug("import_name='%s'", import_name) + lev1_import = import_name.split(".")[0] + if ignore_standard_libs: + if lev1_import in standard_libs: + _LOG.debug("Ignoring standard lib '%s'", lev1_import) + continue + if ignore_helpers: + if lev1_import.startswith("helpers"): + _LOG.debug("Ignoring helpers '%s'", lev1_import) + continue + if only_module: + if only_module not in import_name: + _LOG.debug( + "Ignoring '%s' since it doesn't contain %s", + import_name, + only_module, + ) + continue + # + if mode == "find_deps": + dep = import_name + elif mode == "find_lev1_deps": + deps = import_name.split(".") + if len(deps) > 1: + dep = deps[0] + else: + dep = import_name + elif mode == "find_lev2_deps": + deps = import_name.split(".") + if len(deps) > 1: + dep = ".".join(deps[:2]) + else: + dep = import_name + else: + raise ValueError(f"Invalid mode='{mode}'") + lines_out.append((file, line_num, dep)) + lines = lines_out + # Remove repeated tuples. + if remove_dups: + _LOG.debug("\n" + hprint.frame("Remove repeated tuples")) + import_names = set() + lines_out = [] + for line in lines: + if line[2] in import_names: + continue + lines_out.append(line) + import_names.add(line[2]) + lines = lines_out + else: + _LOG.warning("Remove dups skipped") + # Sort. + _LOG.debug("\n" + hprint.frame("Sort tuples")) + lines = sorted(lines, key=lambda x: x[2]) + # Print and save. + print(hprint.frame("Results")) + _LOG.debug("\n" + hprint.frame("Print")) + txt = "\n".join([":".join(line) for line in lines]) + file_name = "cfile" + hio.to_file(file_name, txt) + _LOG.info("%s saved", file_name) + # + txt = "\n".join(["%s:%s\t\t\t%s" % line for line in lines]) + print(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py new file mode 100644 index 000000000..53c9600af --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py @@ -0,0 +1,1252 @@ +""" +Import as: + +import helpers.lib_tasks_gh as hlitagh +""" + +import datetime +import json +import logging +import os +import re +from typing import Any, Dict, List, Optional, Tuple + +import invoke.exceptions as invexc +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.htable as htable +import helpers.lib_tasks_utils as hlitauti +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# ############################################################################# +# GitHub CLI. +# ############################################################################# + + +@task +def gh_login( # type: ignore + ctx, + account="", + print_status=False, +): + hlitauti.report_task() + # + if not account: + # Retrieve the name of the repo, e.g., "alphamatic/amp". + full_repo_name = hgit.get_repo_full_name_from_dirname( + ".", include_host_name=False + ) + _LOG.debug(hprint.to_str("full_repo_name")) + account = full_repo_name.split("/")[0] + _LOG.info(hprint.to_str("account")) + # + ssh_filename = os.path.expanduser(f"~/.ssh/id_rsa.{account}.github") + _LOG.debug(hprint.to_str("ssh_filename")) + if os.path.exists(ssh_filename): + cmd = f"export GIT_SSH_COMMAND='ssh -i {ssh_filename}'" + print(cmd) + else: + _LOG.warning("Can't find file '%s'", ssh_filename) + # + if print_status: + cmd = "gh auth status" + hlitauti.run(ctx, cmd) + # + github_pat_filename = os.path.expanduser(f"~/.ssh/github_pat.{account}.txt") + if os.path.exists(github_pat_filename): + cmd = f"gh auth login --with-token <{github_pat_filename}" + hlitauti.run(ctx, cmd) + else: + _LOG.warning("Can't find file '%s'", github_pat_filename) + # + if print_status: + cmd = "gh auth status" + hlitauti.run(ctx, cmd) + + +# ############################################################################# + + +def _get_branch_name(branch_mode: str) -> Optional[str]: + if branch_mode == "current_branch": + branch_name: Optional[str] = hgit.get_branch_name() + elif branch_mode == "master": + branch_name = "master" + elif branch_mode == "all": + branch_name = None + else: + raise ValueError(f"Invalid branch='{branch_mode}'") + return branch_name + + +def _get_org_name(org_name: str) -> str: + """ + Get organization name, inferring from current repo if not provided. + + :param org_name: organization name or empty string + :return: organization name + """ + if not org_name: + # Infer organization from current repo. + full_repo_name = hgit.get_repo_full_name_from_dirname( + ".", include_host_name=False + ) + org_name = full_repo_name.split("/")[0] + return org_name + + +def _get_workflow_table() -> htable.TableType: + """ + Get a table with the status of the GH workflow for the current repo. + """ + # Get the workflow status from GH. + cmd = "export NO_COLOR=1; gh run list" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug(hprint.to_str("txt")) + # pylint: disable=line-too-long + # > gh run list + # STATUS TITLE WORKFLOW BRANCH EVENT ID ELAPSED AGE + # * AmpTask1786_Integrate_20230518_2 Fast tests AmpTask1786_Integrate_20230518_2 pull_request 5027911519 4m49s 4m + # > gh run list | more + # completed success AmpTask1786_Integrate_20230518_2 Fast tests AmpTask1786_Integrate_20230518_2 pull_request 5027911519 7m17s 10m + # in_progress AmpTask1786_Integrate_20230518_2 Slow tests AmpTask1786_Integrate_20230518_2 pull_request 5027911518 10m9s 10m + # pylint: enable=line-too-long + # The output is tab separated, so convert it into CSV. + first_line = txt.split("\n")[0] + _LOG.debug("first_line=%s", first_line.replace("\t", ",")) + num_cols = len(first_line.split("\t")) + _LOG.debug(hprint.to_str("first_line num_cols")) + cols = [ + # E.g., completed, in_progress. + "completed", + # E.g., success, failure. + "status", + # Aka title: parse but don't use. + "name", + "workflow", + "branch", + "event", + "id", + "elapsed", + "age", + ] + hdbg.dassert_eq(num_cols, len(cols)) + # Build the table. + table = htable.Table.from_text(cols, txt, delimiter="\t") + _LOG.debug(hprint.to_str("table")) + # Remove the "name" column as it's redundant with "workflow". + table = table.remove_column("name") + return table + + +def _print_table(table: htable.TableType) -> None: + table_str = str(table) + # Colorize the table. + color_map = {"success": "green", "failure": "red", "in progress": "yellow"} + for status, color in color_map.items(): + table_str = table_str.replace( + status, hprint.color_highlight(status, color) + ) + # Report the full status. + print(table_str) + + +# TODO(Grisha): seems like GH changed the output format, we should update accordingly, +# see CmTask #4672 "Slow tests fail (9835540316)" for details. +@task +def gh_workflow_list( # type: ignore + ctx, + filter_by_branch="current_branch", + filter_by_completed="all", + report_only_status=True, + show_stack_trace=False, + print_table=True, +): + """ + Report the status of the GH workflows. + + :param filter_by_branch: name of the branch to check + - `current_branch` for the current Git branch + - `master` for master branch + - `all` for all branches + :param filter_by_completed: filter table by the status of the workflow + - E.g., "failure", "success" + :param report_only_status: if True, report only the status of the workflows + :param show_stack_trace: in case of error run `pytest_repro` reporting also + the stack trace + :param print_table: if True, print the table with the status of the workflows + """ + hlitauti.report_task( + txt=hprint.to_str("filter_by_branch filter_by_completed") + ) + # Login. + gh_login(ctx) + # Get the table. + table = _get_workflow_table() + # Filter table based on the branch. + if filter_by_branch != "all": + field = "branch" + value = _get_branch_name(filter_by_branch) + print(f"Filtering table by {field}={value}") + table = table.filter_rows(field, value) + # Filter table by the workflow status. + if filter_by_completed != "all": + field = "completed" + value = filter_by_completed + print(f"Filtering table by {field}={value}") + table = table.filter_rows(field, value) + if ( + filter_by_branch not in ("current_branch", "master") + or not report_only_status + ): + _print_table(table) + return + # For each workflow find the last success. + branch_name = hgit.get_branch_name() + workflows = table.unique("workflow") + print(f"workflows={workflows}") + for workflow in workflows: + table_tmp = table.filter_rows("workflow", workflow) + if print_table: + print(hprint.frame(workflow)) + _print_table(table_tmp) + # Find the first success. + num_rows = table.size()[0] + _LOG.debug("num_rows=%s", num_rows) + for i in range(num_rows): + status_column = table_tmp.get_column("status") + _LOG.debug("status_column=%s", str(status_column)) + hdbg.dassert_lt( + i, len(status_column), "status_column=", status_column + ) + status = status_column[i] + if status == "success": + print(f"Workflow '{workflow}' for '{branch_name}' is ok") + break + if status == "failure": + _LOG.error( + "Workflow '%s' for '%s' is broken", workflow, branch_name + ) + # Get the output of the broken run. + # > gh run view 1477484584 --log-failed + workload_id = table_tmp.get_column("id")[i] + log_file_name = f"tmp.failure.{workflow}.{branch_name}.txt" + log_file_name = log_file_name.replace(" ", "_").lower() + cmd = f"gh run view {workload_id} --log-failed >{log_file_name}" + hsystem.system(cmd) + # Remove non-printable chars. + # TODO(heanh): Consider adding all the helpers util scripts + # to the `PATH` (when inside the container) so we can just use + # them without specifying the full path. + helpers_root_dir = hgit.find_helpers_root() + file_path = ( + f"{helpers_root_dir}/dev_scripts_helpers/system_tools" + ) + cmd = f"{file_path}/remove_escape_chars.py -i {log_file_name}" + hsystem.system(cmd) + print(f"# Log is in '{log_file_name}'") + # Run_fast_tests Run fast tests 2021-12-19T00:19:38.3394316Z FAILED data + # cmd = rf"grep 'Z FAILED ' {log_file_name}" + workflow_as_str = workflow.lower().replace(" ", "_") + script_name = f"./tmp.pytest_repro.{workflow_as_str}.sh" + cmd = f"invoke pytest_repro --file-name {log_file_name} --script-name {script_name}" + if show_stack_trace: + cmd += " -s" + hsystem.system(cmd, suppress_output=False, abort_on_error=False) + break + if status in ("startup_failure", "cancelled", "skipped"): + _LOG.debug( + "Workflow '%s' for '%s' has status '%s', skipping", + workflow, + branch_name, + status, + ) + break + if status == "": + if i == (len(status_column) - 1): + # If all the runs in the table are in progress, i.e. there is no + # failed or succesful run, issue a warning and exit. E.g., + # ######################################################### + # Superslow tests + # ######################################################### + # completed | status | workflow | branch | event | id | elapsed | age | + # ----------- | ------ | --------------- | ------ | ----------------- | ---------- | ------- | --- | + # in_progress | | Superslow tests | master | workflow_dispatch | 5421740561 | 13m25s | 13m | + _LOG.warning( + "No failed/successful run found for workflow=%s for branch=%s, all runs are in progress, exiting.", + workflow, + branch_name, + ) + else: + _LOG.debug( + "Workflow=%s for branch %s is in progress, skipping further checks", + workflow, + branch_name, + ) + break + else: + raise ValueError(f"Invalid status='{status}'") + + +@task +def gh_workflow_run(ctx, branch="current_branch", workflows="all"): # type: ignore + """ + Run GH workflows in a branch. + """ + hlitauti.report_task(txt=hprint.to_str("branch workflows")) + # Login. + gh_login(ctx) + # Get the branch name. + if branch == "current_branch": + branch_name = hgit.get_branch_name() + elif branch == "master": + branch_name = "master" + else: + raise ValueError(f"Invalid branch='{branch}'") + _LOG.debug(hprint.to_str("branch_name")) + # Get the workflows. + if workflows == "all": + gh_tests = ["fast_tests", "slow_tests"] + else: + gh_tests = [workflows] + _LOG.debug(hprint.to_str("workflows")) + # Run. + for gh_test in gh_tests: + gh_test += ".yml" + # gh workflow run fast_tests.yml --ref AmpTask1251_Update_GH_actions_for_amp + cmd = f"gh workflow run {gh_test} --ref {branch_name}" + hlitauti.run(ctx, cmd) + + +# ############################################################################# + + +# TODO(gp): Remove repo_short_name. +def _get_repo_full_name_from_cmd(repo_short_name: str) -> Tuple[str, str]: + """ + Convert the `repo_short_name` from command line (e.g., "current", "amp", + "lm") to the repo_short_name full name without host name. + """ + repo_full_name_with_host: str + if repo_short_name == "current": + # Get the repo name from the current repo. + repo_full_name_with_host = hgit.get_repo_full_name_from_dirname( + ".", include_host_name=True + ) + hdbg.dassert_eq( + repo_full_name_with_host, + hrecouti.get_repo_config().get_repo_full_name_with_hostname(), + ) + ret_repo_short_name = hrecouti.get_repo_config().get_repo_short_name() + else: + hdbg.dfatal("This code path is obsolete") + _LOG.debug( + "repo_short_name=%s -> repo_full_name_with_host=%s ret_repo_short_name=%s", + repo_short_name, + repo_full_name_with_host, + ret_repo_short_name, + ) + return repo_full_name_with_host, ret_repo_short_name + + +def _get_gh_issue_title(issue_id: int, repo_short_name: str) -> Tuple[str, str]: + """ + Get the title of a GitHub issue. + + :param repo_short_name: `current` refer to the repo where we are in, + otherwise a `repo_short_name` (e.g., "amp") + """ + # TODO(gp): I don't see applications where we need to pass the repo_short_name. + # One should always operate in the dir corresponding to a repo. + hdbg.dassert_eq(repo_short_name, "current") + repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( + repo_short_name + ) + # > (export NO_COLOR=1; gh issue view 1251 --json title) + # {"title":"Update GH actions for amp"} + hdbg.dassert_lte(1, issue_id) + cmd = f"gh issue view {issue_id} --repo {repo_full_name_with_host} --json title,url" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug("txt=\n%s", txt) + # Parse json. + dict_ = json.loads(txt) + _LOG.debug("dict_=\n%s", dict_) + title = dict_["title"] + _LOG.debug("title=%s", title) + url = dict_["url"] + _LOG.debug("url=%s", url) + # Remove some annoying chars. + for char in ": + ( ) / ` *".split(): + title = title.replace(char, "") + # Replace multiple spaces with one. + title = re.sub(r"\s+", " ", title) + title = title.replace(" ", "_") + # Remove some annoying chars. + for char in "- ' ` \"".split(): + title = title.replace(char, "_") + # Add the prefix `AmpTaskXYZ_...` + task_prefix = hrecouti.get_repo_config().get_issue_prefix() + # task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + _LOG.debug("task_prefix=%s", task_prefix) + title = f"{task_prefix}{issue_id}_{title}" + return title, url + + +@task +def gh_issue_title(ctx, issue_id, repo_short_name="current", pbcopy=True): # type: ignore + """ + Print the title that corresponds to the given issue and repo_short_name. + E.g., AmpTask1251_Update_GH_actions_for_amp. + + Before running the invoke, one must check their login status on GH + by running `gh auth status`. + + :param issue_id: id number of the issue to create the branch for + :param repo_short_name: short name of the repo to use for the branch + name building. "current" refers to the repo where the call is + implemented + :param pbcopy: save the result into the system clipboard (only on + macOS) + """ + hlitauti.report_task(txt=hprint.to_str("issue_id repo_short_name")) + # Login. + gh_login(ctx) + # + issue_id = int(issue_id) + hdbg.dassert_lte(1, issue_id) + title, url = _get_gh_issue_title(issue_id, repo_short_name) + # Print or copy to clipboard. + msg = f"{title}: {url}" + hsystem.to_pbcopy(msg, pbcopy=pbcopy) + + +@task +def gh_issue_create( # type: ignore + ctx, + title="", + body="", + labels="", + assignees="", + project="", + repo_short_name="current", +): + """ + Create a new GitHub issue in the specified repository. + + ``` + # Create a simple issue + > invoke gh_issue_create --title "Fix bug in parser" + + # Create an issue with body and labels + > invoke gh_issue_create --title "Add new feature" --body "Description here" --labels "enhancement,priority-high" + + # Create an issue with assignees + > invoke gh_issue_create --title "Review PR" --assignees "user1,user2" + + # Create an issue and add to a project + > invoke gh_issue_create --title "Implement feature" --project "Development Board" + ``` + + :param title: title of the issue (required) + :param body: body/description of the issue + :param labels: comma-separated list of labels to apply + :param assignees: comma-separated list of GitHub usernames to assign + :param project: GitHub project name or number to add the issue to + :param repo_short_name: `current` refer to the repo where we are in, + otherwise a `repo_short_name` (e.g., "amp") + :return: issue ID (integer) of the created issue + """ + hlitauti.report_task(txt=hprint.to_str("title repo_short_name")) + # Login. + gh_login(ctx) + # + hdbg.dassert(title, "Title is required") + hdbg.dassert_eq(repo_short_name, "current") + repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( + repo_short_name + ) + _LOG.info( + "Creating issue with title '%s' in %s", + title, + repo_full_name_with_host, + ) + # Build the command. + cmd = ( + "gh issue create" + + f" --repo {repo_full_name_with_host}" + + f' --title "{title}"' + ) + if body: + cmd += f' --body "{body}"' + if labels: + cmd += f' --label "{labels}"' + if assignees: + cmd += f' --assignee "{assignees}"' + if project: + cmd += f' --project "{project}"' + # Execute the command and capture output. + # gh issue create outputs the URL of the created issue, e.g., + # https://github.com/cryptokaizen/csfy/issues/7572 + _, output = hsystem.system_to_string(cmd) + _LOG.debug("gh issue create output: %s", output) + # Extract the issue ID from the URL. + # The URL format is: https://github.com/org/repo/issues/123 + match = re.search(r"/issues/(\d+)", output) + hdbg.dassert(match, f"Could not extract issue ID from output: {output}") + issue_id = int(match.group(1)) + _LOG.info("Created issue #%s", issue_id) + return issue_id + + +# ############################################################################# + + +def _check_if_pr_exists(title: str) -> bool: + """ + Return whether a PR exists or not. + """ + # > gh pr diff AmpTask1955_Lint_20211219 + # no pull requests found for branch "AmpTask1955_Lint_20211219" + cmd = f"gh pr diff {title}" + rc = hsystem.system(cmd, abort_on_error=False) + pr_exists: bool = rc == 0 + return pr_exists + + +@task +def gh_create_pr( # type: ignore + ctx, + body="", + draft=True, + auto_merge=False, + repo_short_name="current", + title="", + reviewer="", + labels="", + assignee="", +): + """ + Create a draft PR for the current branch in the corresponding + repo_short_name. + + ``` + # To open a PR in the web browser + > gh pr view --web + + # To see the status of the checks + > gh pr checks + ``` + + :param body: the body of the PR + :param draft: draft or ready-to-review PR + :param auto_merge: enable auto merging PR + :param repo_short_name: `current` refer to the repo where we are in, + otherwise a `repo_short_name` (e.g., "amp") + :param title: title of the PR or the branch name, if title is empty + :param reviewer: GitHub username to request review from + :param labels: comma-separated list of labels to apply + :param assignee: GitHub username to assign the PR to + """ + hlitauti.report_task() + # Login. + gh_login(ctx) + # + branch_name = hgit.get_branch_name() + if not title: + # Use the branch name as title. + title = branch_name + repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( + repo_short_name + ) + _LOG.info( + "Creating PR with title '%s' for '%s' in %s", + title, + branch_name, + repo_full_name_with_host, + ) + if auto_merge: + hdbg.dassert( + not draft, "The PR can't be a draft in order to auto merge it" + ) + pr_exists = _check_if_pr_exists(title) + _LOG.debug(hprint.to_str("pr_exists")) + if pr_exists: + _LOG.warning("PR '%s' already exists: skipping creation", title) + else: + # Link the PR automatically to the branch, if possible. + issue_id = hgit.extract_gh_issue_number_from_branch(branch_name) + _LOG.debug(hprint.to_str("issue_id")) + if issue_id and str(issue_id) not in body: + body += f"\n\n#{issue_id}" + _LOG.info("Added issue id %s to the PR body", issue_id) + cmd = ( + "gh pr create" + + f" --repo {repo_full_name_with_host}" + + (" --draft" if draft else "") + + f' --title "{title}"' + + f' --body "{body}"' + ) + if reviewer: + cmd += f" --reviewer {reviewer}" + _LOG.info("Added reviewer %s to the PR", reviewer) + if labels: + cmd += f' --label "{labels}"' + _LOG.info("Added labels %s to the PR", labels) + if assignee: + cmd += f" --assignee {assignee}" + # TODO(gp): Use _to_single_line_cmd + hlitauti.run(ctx, cmd) + if auto_merge: + cmd = f"gh pr ready {title}" + hlitauti.run(ctx, cmd) + cmd = f"gh pr merge {title} --auto --delete-branch --squash" + hlitauti.run(ctx, cmd) + + +# TODO(gp): Add gh_open_pr to jump to the PR from this branch. + +# TODO(Grisha): probably the section deserves a separate lib. +# ############################################################################# +# Buildmeister dashboard +# ############################################################################# + + +# TODO(Grisha): consider moving to cmamp as we run the workflow from cmamp. +@task +def gh_publish_buildmeister_dashboard_to_s3(ctx, mark_as_latest=True): # type: ignore + """ + Run the buildmeister dashboard notebook and publish it to S3. + + :param mark_as_latest: if True, mark the dashboard as `latest`, otherwise + just publish a timestamped copy + """ + hlitauti.report_task() + # Login to GH CLI. + if hserver.is_inside_ci(): + _LOG.info("Skipping login since running inside CI") + else: + gh_login(ctx) + # Run and publish the Buildmeister dashboard Jupyter notebook locally. + run_notebook_script_path = hgit.find_file_in_git_tree("run_notebook.py") + amp_abs_path = hgit.get_amp_abs_path() + notebook_path = os.path.join( + amp_abs_path, "devops/notebooks/Master_buildmeister_dashboard.ipynb" + ) + dst_local_dir = os.path.join(amp_abs_path, "tmp.notebooks") + cmd_run_txt = [ + run_notebook_script_path, + f"--notebook {notebook_path}", + # The notebook does not require a config, so using a random dummy config. + # TODO(Grisha): consider creating a separate config builder for the notebook. + "--config_builder 'datapull.optima.common.qa.qa_check.build_dummy_data_reconciliation_config()'", + f"--dst_dir '{dst_local_dir}'", + "--publish", + "--num_threads serial", + ] + cmd_run_txt = " ".join(cmd_run_txt) + hsystem.system(cmd_run_txt) + # To avoid the dependency on `helpers.hs3`. + import helpers.hs3 as hs3 + + # Get HTML file name. + tmp_local_dir_name = os.path.join(amp_abs_path, "tmp.notebooks") + pattern = "Master_buildmeister_dashboard.0*.html" + only_files = True + use_relative_paths = False + local_html_files = hio.listdir( + tmp_local_dir_name, + pattern, + only_files=only_files, + use_relative_paths=use_relative_paths, + ) + # Assert if more than 1 file is returned. + hdbg.dassert_eq( + len(local_html_files), + 1, + f"Found more than one file in {tmp_local_dir_name} - {local_html_files}", + ) + local_html_file = local_html_files[0] + s3_build_path = os.path.join( + hrecouti.get_repo_config().get_html_bucket_path(), + "build/buildmeister_dashboard", + ) + aws_profile = "ck" + if mark_as_latest: + # Copy the dashboard notebook to S3 as latest build. + s3_latest_build_path = os.path.join( + s3_build_path, "Master_buildmeister_dashboard.latest.html" + ) + hs3.copy_file_to_s3(local_html_file, s3_latest_build_path, aws_profile) + # Copy the timestamped version of the dashboard notebook to S3. + # Need to add a trailing slash to the path to copy the file into the folder. + # https://docs.python.org/3/library/os.path.html#os.path.join + s3_build_path_folder = os.path.join(s3_build_path, "") + hs3.copy_file_to_s3(local_html_file, s3_build_path_folder, aws_profile) + + +def _gh_run_and_get_json(cmd: str) -> List[Dict[str, Any]]: + """ + Run a `gh` command and remove colors when running inside a notebook. + + :param cmd: `gh` command to run + :return: parsed JSON output of a command + """ + _, _txt = hsystem.system_to_string(cmd) + if hsystem.is_running_in_ipynb(): + # Remove the colors from the text. + _txt = re.sub(r"\x1b\[((1;)*[0-9]{2})*m", "", _txt) + _LOG.debug(hprint.to_str("_txt")) + ret: List[Dict[str, Any]] = json.loads(_txt) + return ret + + +def gh_get_open_prs(repo: str) -> List[Dict[str, Any]]: + """ + Return a list of open PRs. + + :param repo: repo name in the format "organization/repo", e.g., + "cryptokaizen/cmamp" + """ + cmd = f"gh pr list --state 'open' --json id --repo {repo}" + pull_requests = _gh_run_and_get_json(cmd) + return pull_requests + + +def _get_best_workflow_run( + workflow_name: str, + workflow_runs: List[Dict[str, Any]], + *, + preferred_event: Optional[str] = None, +) -> Optional[Dict[str, Any]]: + """ + Pick the best available workflow run: + - If `preferred_event` is specified (e.g., "schedule"), try that first. + - Otherwise, return the most recent success/failure run. + + :param workflow_name: GitHub Actions workflow name + :param workflow_runs: run metadata, sorted most-recent-first + :param preferred_event: trigger type to prioritize (e.g., "schedule") + :return: best-matching run + e.g., + ``` + { + 'conclusion': 'success', + 'status': 'completed', + 'url': 'https://github.com/cryptokaizen/cmamp/actions/runs/8714881296', + 'workflowName': 'Allure fast tests' + } + """ + run_status = None + if preferred_event: + for run in workflow_runs: + if run.get("event") == preferred_event and run["conclusion"] in [ + "success", + "failure", + ]: + run_status = run + break + if run_status is None: + _LOG.warning( + "No '%s' run found for workflow '%s'", + preferred_event, + workflow_name, + ) + if run_status is None: + for run in workflow_runs: + if run["conclusion"] in ["success", "failure"]: + run_status = run + break + return run_status + + +def gh_get_workflows( + repo_name: str, *, sort: bool = True +) -> List[Dict[str, str]]: + """ + Get a list of workflows for a given repo. + + :param repo_name: git repo name in the format "organization/repo", + e.g., "cryptokaizen/cmamp" + :param sort: if True, sort the list of workflow names + :return: list of workflows, e.g., [{"id": "12520125", "name": "Fast + tests"}, {"id": "12520124", "name": "Slow tests"}] + """ + hdbg.dassert_isinstance(repo_name, str) + _LOG.debug(hprint.to_str("repo_name")) + # Get the workflow list. + cmd = f"gh workflow list --json id,name --repo {repo_name}" + workflows = _gh_run_and_get_json(cmd) + workflows = [ + {"id": str(workflow["id"]), "name": workflow["name"]} + for workflow in workflows + ] + # sort workflow by name + if sort: + workflows = sorted(workflows, key=lambda workflow: workflow["name"]) + return workflows + + +def gh_get_workflow_details( + repo_name: str, workflow_id: str, fields: List[str], limit: int +) -> List[Dict[str, Any]]: + """ + Return the stats for a given workflow. + + :param repo_name: git repo name in the format "organization/repo", + e.g., "cryptokaizen/cmamp" + :param workflow_id: workflow id, e.g., "12520125" + :param fields: list of fields to return, e.g., ["workflowName", "status"] + :param limit: number of runs to return + :return: workflow stats + Example output: + ``` + [ + { + "conclusion": "success", + "status": "completed", + "url": "https://github.com/cryptokaizen/cmamp/actions/runs/7757345960", + "workflowName": "Slow tests" + } + ] + ``` + """ + hdbg.dassert_isinstance(repo_name, str) + hdbg.dassert_isinstance(workflow_id, str) + hdbg.dassert_container_type(fields, List, str) + _LOG.debug(hprint.to_str("repo_name workflow_id fields")) + # Fetch the latest `limit` runs for status calculation. + cmd = f""" + gh run list \ + --json {",".join(fields)} \ + --repo {repo_name} \ + --branch master \ + --limit {limit} \ + --workflow "{workflow_id}" + """ + workflow_statuses = _gh_run_and_get_json(cmd) + # We still want to return the statuses even there are less runs than requested. E.g., there is a new workflow with a few runs or there is a workflow that was never run. + hdbg.dassert_eq(len(workflow_statuses), limit, only_warning=True) + _LOG.debug("workflow_statuses=\n%s", workflow_statuses) + return workflow_statuses + + +def gh_get_details_for_all_workflows( + repo_list: List[str], +) -> "pd.DataFrame": # noqa: F821 + """ + Get status for all the workflows. + + :param repo_list: list of repos to get the status for e.g., + ["cryptokaizen/cmamp", "cryptokaizen/orange"] + :return: a table with the status of all the workflows, e.g., + ``` + Repo workflowName url status + cryptokaizen/cmamp Allure fast tests https://github.com/cryptokaizen/cmamp/actions/... completed + cryptokaizen/cmamp Allure slow tests https://github.com/cryptokaizen/cmamp/actions/... completed + ``` + """ + import pandas as pd + + # TODO(Grisha): expose cols to the interface, i.e. a caller decides what to do. + gh_cols = ["workflowName", "url", "status", "conclusion", "event"] + # Import locally in order not to introduce external dependencies to the lib. + repo_dfs = [] + for repo_name in repo_list: + # Get all workflows for the given repo. + workflows = gh_get_workflows(repo_name) + # For each workflow find the last run. + for workflow in workflows: + # Get at least a few runs to compute the status; this is useful when + # the latest run is not completed, in this case the run before the + # latest one tells the status for a workflow. + limit = 10 + workflow_id = workflow["id"] + workflow_name = workflow["name"] + workflow_statuses = gh_get_workflow_details( + repo_name, workflow_id, gh_cols, limit + ) + if len(workflow_statuses) < limit: + # TODO(Grisha): should we just insert empty rows as placeholders so that + # we know that such workflows exist? + _LOG.warning( + "Not enough runs to compute status for '%s', repo '%s', skipping the workflow", + workflow_name, + repo_name, + ) + continue + # Get the latest successful or failed workflow run (prioritize scheduled run if available). + SCHEDULED_WORKFLOWS = { + "Gitleaks Scan", + } + preferred_event = ( + "schedule" if workflow_name in SCHEDULED_WORKFLOWS else None + ) + workflow_status = _get_best_workflow_run( + workflow_name, workflow_statuses, preferred_event=preferred_event + ) + if workflow_status is None: + _LOG.warning( + "No successful or failed runs found for '%s', repo '%s', skipping the workflow", + workflow_name, + repo_name, + ) + continue + # Access the info of latest workflow run. + workflow_status = pd.DataFrame([workflow_status]) + workflow_status["repo_name"] = repo_name + repo_dfs.append(workflow_status) + # Collect per-repo tables into a single DataFrame. + df = pd.concat(repo_dfs, ignore_index=True) + # Rename the columns. + df = df.drop(columns=["status"]) + df = df.rename(columns={"workflowName": "workflow_name"}) + return df + + +def gh_get_overall_build_status_for_repo( + repo_df: "pd.Dataframe", # noqa: F821 + *, + use_colors: bool = True, +) -> str: + """ + Return the overall status of the workflows for a repo. + + :param repo_df: table with the status of the workflows for a repo + :param use_colors: if True, return the status with colors + :return: overall status of the build for a repo + """ + if use_colors: + hdbg.dassert( + hsystem.is_running_in_ipynb(), + msg="The use_colors option is applicable only when running inside a Jupyter notebook", + ) + # See: https://stackoverflow.com/questions/19746350/how-to-change-color-in-markdown-cells-ipython-jupyter-notebook + failed_status = 'Failed' + success_status = 'Success' + else: + failed_status = "Failed" + success_status = "Success" + if "failure" in repo_df["conclusion"].values: + # The build is failed if at least one workflow is failed. + overall_status = failed_status + else: + overall_status = success_status + return overall_status + + +def gh_get_workflow_type_names( + repo_name: str, *, sort: bool = True +) -> List[str]: + """ + Get a list of workflow names for a given repo. + + :param repo_name: git repo name in the format "organization/repo", + e.g., "cryptokaizen/cmamp" + :param sort: if True, sort the list of workflow names + :return: list of workflow names, e.g., ["Fast tests", "Slow tests"] + """ + hdbg.dassert_isinstance(repo_name, str) + _LOG.debug(hprint.to_str("repo_name")) + # Get the workflow list. + cmd = f"gh workflow list --json name --repo {repo_name}" + workflow_types = _gh_run_and_get_json(cmd) + workflow_names = [workflow["name"] for workflow in workflow_types] + if sort: + workflow_names = sorted(workflow_names) + # Check for duplicate workflow names. + hdbg.dassert_no_duplicates( + workflow_names, + f"Found duplicate workflow names in repo '{repo_name}'", + ) + return workflow_names + + +def gh_get_org_team_names(org_name: str = "", *, sort: bool = True) -> List[str]: + """ + Get a list of team names for a GitHub organization. + + :param org_name: organization name, e.g., "causify-ai". If empty, + infers from the current repo + :param sort: if True, sort team names alphabetically + :return: list of team names (slugs) + Example output: + ``` + ["dev_system", "dev_frontend", "qa_team"] + ``` + """ + org_name = _get_org_name(org_name) + _LOG.debug(hprint.to_str("org_name")) + # Get the team list using GitHub API. + cmd = f"gh api /orgs/{org_name}/teams --paginate" + teams_data = _gh_run_and_get_json(cmd) + # Extract team slugs from the response. + team_names = [team["slug"] for team in teams_data] + # Sort team names if requested. + if sort: + team_names = sorted(team_names) + _LOG.debug("Found %s teams for org '%s'", len(team_names), org_name) + return team_names + + +def gh_get_team_member_names(team_slug: str, *, org_name: str = "") -> List[str]: + """ + Get a list of member usernames for a specific team in a GitHub + organization. + + :param team_slug: team slug (URL-friendly team name), e.g., "dev_system" + :param org_name: organization name, e.g., "causify-ai". If empty, + infers from the current repo + :return: list of member usernames (login names) + Example output: + ``` + ["username1", "username2", "username3"] + ``` + """ + org_name = _get_org_name(org_name) + hdbg.dassert_isinstance(team_slug, str) + _LOG.debug(hprint.to_str("org_name team_slug")) + # Get the team members using GitHub API. + cmd = f"gh api /orgs/{org_name}/teams/{team_slug}/members --paginate" + members_data = _gh_run_and_get_json(cmd) + # Extract usernames from the response. + usernames = [member["login"] for member in members_data] + _LOG.debug( + "Found %s members in team '%s' (org: '%s')", + len(usernames), + team_slug, + org_name, + ) + return usernames + + +def make_clickable(url: str) -> str: + """ + Wrap a URL as an HTML anchor tag. + + :param url: URL to wrap (e.g., "https://github.com/causify-ai/cmamp/actions/...") + :return: HTML anchor string that makes the URL clickable in rendered Markdown + """ + anchor = f'{url}' + return anchor + + +def color_format(val: str, status_color_mapping: Dict[str, str]) -> str: + """ + Return a background-color style for DataFrame.style.map based on status. + + :param val: value to evaluate for status-based styling (e.g., + "success" or "failure") + :param status_color_mapping: map status strings to color values, + e.g.: { "success": "green", "failure": "red" } + :return: CSS string to apply as a style, e.g., "background-color: + green" + """ + color = status_color_mapping.get(val, "grey") + style = f"background-color: {color}" + return style + + +def render_repo_workflow_status_table( + workflow_df: "pd.DataFrame", # noqa: F821 + status_color_mapping: Dict[str, str], + timezone: str = "America/New_York", +) -> None: + """ + Render a dashboard summary of workflow statuses grouped by repo. + + :param workflow_df: data with columns ["repo_name", "workflow_name", + "conclusion", "url"] + :param status_color_mapping: color for outcomes {"success": "green", + "failure": "red"} + :param timezone: timezone for timestamp display + """ + import pandas as pd + from IPython.display import Markdown, display + + workflow_df["url"] = workflow_df["url"].apply(make_clickable) + repos = workflow_df["repo_name"].unique() + display(Markdown("## Overall Status")) + current_timestamp = pd.Timestamp.now(tz=timezone) + display(Markdown(f"**Last run: {current_timestamp}**")) + for repo in repos: + repo_df = workflow_df[workflow_df["repo_name"] == repo] + overall_status = gh_get_overall_build_status_for_repo(repo_df) + display(Markdown(f"## {repo}: {overall_status}")) + repo_df = repo_df.drop(columns=["repo_name"]) + display( + repo_df.style.map( + color_format, + status_color_mapping=status_color_mapping, + subset=["conclusion"], + ) + ) + + +def get_workflow_run_ids( + repo_path: str, workflow_id: str, *, older_than_days: Optional[int] = None +) -> List[str]: + """ + Get workflow run IDs, optionally filtering by age. + + :param repo_path: repository path in format "org/repo" + :param workflow_id: GitHub workflow ID + :param older_than_days: if specified, only return runs older than + this many days + :return: list of run IDs + """ + # See GitHub CLI API documentation: https://cli.github.com/manual/gh_api + # We use the -q/--jq option to filter results using jq syntax. + if older_than_days is not None: + # Use jq to filter runs by age directly in the gh api command. + # jq date filtering breakdown: + # - `fromdateiso8601` converts ISO 8601 date to Unix timestamp (seconds since epoch) + # - `now` returns current Unix timestamp + # - Days are converted to seconds (days * 86400 seconds/day) + # - Example: if older_than_days=30, cutoff = now - (30 * 86400) + # Only runs where created_at timestamp < cutoff are selected + cutoff_seconds = older_than_days * 86400 + # Log the cutoff date for debugging. + cutoff_date = datetime.datetime.now( + datetime.timezone.utc + ) - datetime.timedelta(days=older_than_days) + _LOG.debug("Filtering runs created before: %s", cutoff_date.isoformat()) + jq_filter = ( + f".workflow_runs[] | " + f"select((.created_at | fromdateiso8601) < (now - {cutoff_seconds})) | " + f".id" + ) + # WARNING: Using --paginate to fetch all workflow runs can be slow + # for workflows with a large number of runs (e.g., 1000+ runs). + # The GitHub API paginates results, and jq filters each page. + cmd = ( + f"gh api /repos/{repo_path}/actions/workflows/{workflow_id}/runs " + f"--paginate -q '{jq_filter}'" + ) + else: + # Get all run IDs without date filtering. + # Example API output (one ID per line): + # 11758293857 + # 11758293856 + # 11758293855 + cmd = ( + f"gh api /repos/{repo_path}/actions/workflows/{workflow_id}/runs " + "--paginate -q '.workflow_runs[].id'" + ) + # Execute command and parse output. + _, run_ids_output = hsystem.system_to_string(cmd) + run_ids = [ + run_id.strip() + for run_id in run_ids_output.strip().split("\n") + if run_id.strip() + ] + return run_ids + + +@task +def gh_delete_workflow_runs( # type: ignore + ctx, workflow_name, older_than_days=None, dry_run=False, confirmation=True +): + """ + Delete all workflow runs for a given workflow. + + :param workflow_name: name of the workflow to delete runs for + :param older_than_days: only delete runs older than this many days + (optional). If None, delete all runs. Example: + older_than_days=30 deletes runs created more than 30 days ago + :param dry_run: if True, show what would be deleted without actually + deleting + :param confirmation: if True, prompt user for confirmation before + deletion (default: True) + """ + hlitauti.report_task( + txt=hprint.to_str("workflow_name older_than_days dry_run confirmation") + ) + # Convert older_than_days to int if provided (invoke passes strings). + if older_than_days is not None: + older_than_days = int(older_than_days) + hdbg.dassert_lte(1, older_than_days) + # Login. + gh_login(ctx) + # + repo_full_name_with_host, _ = _get_repo_full_name_from_cmd("current") + # Get workflow ID by name. + repo_path = repo_full_name_with_host.replace("github.com/", "") + workflows = gh_get_workflows(repo_path) + workflow_id = None + for workflow in workflows: + if workflow["name"] == workflow_name: + workflow_id = workflow["id"] + break + if not workflow_id: + available_workflows = [w["name"] for w in workflows] + raise ValueError( + f"Workflow '{workflow_name}' not found. " + f"Available workflows: {available_workflows}" + ) + _LOG.info("Found workflow '%s' with ID: %s", workflow_name, workflow_id) + # Get all run IDs for this workflow, optionally filtering by date. + run_ids = get_workflow_run_ids( + repo_path, workflow_id, older_than_days=older_than_days + ) + # Check if any runs were found. + age_filter_msg = ( + f" older than {older_than_days} days" + if older_than_days is not None + else "" + ) + if not run_ids: + _LOG.info( + "No workflow runs%s found for '%s'", age_filter_msg, workflow_name + ) + return + _LOG.info("Found %d workflow runs%s to delete", len(run_ids), age_filter_msg) + # Prompt for confirmation if required. + if confirmation and not dry_run: + confirmation_msg = ( + f"\nAre you sure you want to delete {len(run_ids)} workflow run(s)" + f"{age_filter_msg} for '{workflow_name}'?\n" + f"Repository: {repo_full_name_with_host}\n" + f"Type 'yes' or 'y' to confirm: " + ) + user_input = input(confirmation_msg).strip().lower() + if user_input not in ("yes", "y"): + _LOG.info("Deletion cancelled by user") + return + _LOG.info("User confirmed deletion, proceeding...") + # Delete each run. + deleted_count = 0 + failed_count = 0 + for run_id in run_ids: + try: + cmd = f"gh api -X DELETE /repos/{repo_path}/actions/runs/{run_id}" + _LOG.info("Deleting run %s", run_id) + hlitauti.run(ctx, cmd, dry_run=dry_run) + deleted_count += 1 + except (invexc.UnexpectedExit, RuntimeError) as e: + _LOG.error("Failed to delete run %s: %s", run_id, str(e)) + failed_count += 1 + _LOG.info( + "Deletion complete: %d successful, %d failed out of %d total runs", + deleted_count, + failed_count, + len(run_ids), + ) + + +# ############################################################################# + +# def gh_get_pr_title(pr_url: str) -> str: +# > gh pr view https://github.com/causify-ai/helpers/pull/754 --json title -q .title +# HelpersTask705_Extend_coverage_in_pytest_to_cover_when_we_run_through_system diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py new file mode 100644 index 000000000..b7a92d78c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py @@ -0,0 +1,1502 @@ +""" +Import as: + +import helpers.lib_tasks_git as hlitagit +""" + +import logging +import os +import re +import stat +import subprocess +import time +from typing import Any, List + +from invoke import task + +import helpers.hdbg as hdbg +import helpers.hsystem as hsystem + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.lib_tasks_gh as hlitagh +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# Bits matching `chmod a+w` / `chmod a-w` on the symlink inode (not the target). +_SYMLINK_WRITE_BITS = stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH + + +def _collect_symlinks(dir: str) -> List[str]: + """ + Collect symlink paths under a given directory. + + :param dir: directory to walk + :return: symlink paths under `dir` + """ + out: List[str] = [] + for dirpath, dirnames, filenames in os.walk(dir, topdown=True): + # Skips `.git` directories. Does not follow symlinked directories. + if ".git" in dirnames: + dirnames.remove(".git") + for name in filenames: + path = os.path.join(dirpath, name) + if os.path.islink(path): + out.append(path) + for name in dirnames: + path = os.path.join(dirpath, name) + if os.path.islink(path): + out.append(path) + return out + + +def _add_write_perm_to_symlink(dir: str) -> None: + """ + Add write permission for all on each symlink under the given directory. + + :param dir: directory to walk + """ + _LOG.info("Adding write permission for all on each symlink under %s", dir) + for path in _collect_symlinks(dir): + try: + mode = os.lstat(path).st_mode + os.chmod( + path, + mode | _SYMLINK_WRITE_BITS, + ) + except OSError: + hdbg.dassert( + False, + "Failed to add write permissions to symlink; manual intervention may be needed", + ) + + +def _remove_write_perm_from_symlink(dir: str) -> None: + """ + Remove write permission for all on each symlink under a given directory. + + :param dir: directory to walk + """ + _LOG.info("Removing write permission for all on each symlink under %s", dir) + for path in _collect_symlinks(dir): + if not os.path.exists(path): + _LOG.warning("Skipping broken symlink: %s", path) + continue + try: + mode = os.lstat(path).st_mode + os.chmod( + path, + mode & ~_SYMLINK_WRITE_BITS, + ) + except OSError: + hdbg.dassert( + False, + "Failed to remove write permissions from symlink; manual intervention may be needed", + ) + + +def run_git_recursively(ctx: Any, cmd_: str) -> None: + """ + Execute a git command in the main repository and all submodules. + + :param ctx: Invoke context + :param cmd_: Git command to execute + """ + cmd = cmd_ + hlitauti.run(ctx, cmd) + # Run the same command on all submodules. + cmd = f"git submodule foreach '{cmd_}'" + hlitauti.run(ctx, cmd) + + +@task +def git_pull(ctx): # type: ignore + """ + Pull latest changes from remote for main repo and all submodules. + + Temporarily enables write permissions on symlinks to allow pull operations. + """ + hlitauti.report_task() + # Temporarily grant write access to symlinks needed for pulling. + root_dir = hgit.get_client_root(super_module=False) + _add_write_perm_to_symlink(root_dir) + try: + # Pull with autostash to preserve local changes during pull. + cmd = "git pull --autostash" + run_git_recursively(ctx, cmd) + finally: + # Restore restricted permissions on symlinks after pull completes. + _remove_write_perm_from_symlink(root_dir) + + +@task +def git_fetch_master(ctx): # type: ignore + """ + Fetch master branch from remote without switching to it. + + Updates the local master branch to track the latest remote master without + affecting the current branch. + """ + hlitauti.report_task() + # Fetch remote master directly into local master ref (colon syntax). + cmd = "git fetch origin master:master" + run_git_recursively(ctx, cmd) + + +@task +def git_merge_master( + ctx, + abort_if_not_ff=False, + abort_if_not_clean=True, + skip_fetch=False, + auto_merge=False, # type: ignore +): + """ + Merge `origin/master` into the current branch. + + :param abort_if_not_ff: abort if fast-forward is not possible + :param abort_if_not_clean: abort if the client is not clean + :param skip_fetch: skip fetching master + :param auto_merge: automatically commit and push if merge is + successful + """ + hlitauti.report_task() + # Verify working directory is clean before merging to avoid losing changes. + hgit.is_client_clean(dir_name=".", abort_if_not_clean=abort_if_not_clean) + # Fetch latest master from remote to ensure we merge the latest changes. + if not skip_fetch: + git_fetch_master(ctx) + # Perform merge, optionally restricting to fast-forward only to maintain linear history. + cmd = "git merge master" + if abort_if_not_ff: + cmd += " --ff-only" + hlitauti.run(ctx, cmd) + # Commit and push automatically if merge succeeded and user requested it. + if auto_merge: + _LOG.info("Auto-merge enabled: committing and pushing changes") + cmd = 'git commit -am "Merge master" && git push' + hlitauti.run(ctx, cmd) + + +@task +def git_clean(ctx, fix_perms_=False, dry_run=False): # type: ignore + """ + Clean the repo_short_name and its submodules from artifacts. + + Run `git status --ignored` to see what it's skipped. + """ + hlitauti.report_task(txt=hprint.to_str("dry_run")) + + def _run_all_repos(cmd: str) -> None: + # Use `run(ctx, cmd)` instead of `hsystem.system()` so unit tests can easily mock context. + hlitauti.run(ctx, cmd) + # Also clean submodules to ensure they're included in cleanup. + cmd = f"git submodule foreach '{cmd}'" + hlitauti.run(ctx, cmd) + + # Remove untracked files and directories from main repo and submodules. + git_clean_cmd = "git clean -fd" + if dry_run: + git_clean_cmd += " --dry-run" + # Suppress errors since git clean reports non-fatal warnings. + git_clean_cmd += " >/dev/null 2>&1" + _run_all_repos(git_clean_cmd) + # TODO(*): Add "are you sure?" or a `--force switch` to avoid to cancel by + # mistake. + # Fix permissions on symlinks if requested, then clean any temporary files created. + if fix_perms_: + cmd = "invoke fix_perms" + hlitauti.run(ctx, cmd) + # Remove temporary files that may have been created during permission fix. + _run_all_repos(git_clean_cmd) + # Remove common build artifacts and cache directories. + to_delete = [ + r"*\.pyc", + r"*\.pyo", + r".coverage", + r".DS_Store", + r".ipynb_checkpoints", + r".mypy_cache", + r".pytest_cache", + r".ruff_cache", + r".venv", + r"__pycache__", + r"cfile", + r"tmp.*", + r"*.tmp", + r".*_cache", + "htmlcov", + ] + opts = [f"-name '{opt}'" for opt in to_delete] + opts = " -o ".join(opts) + cmd = f"find . {opts} | sort" + if not dry_run: + cmd += " | xargs rm -rf" + hlitauti.run(ctx, cmd) + + +@task +def git_add_all_untracked(ctx): # type: ignore + """ + Add all untracked files to Git. + """ + hlitauti.report_task() + # cmd = "git add $(git ls-files -o --exclude-standard)" + cmd = "git ls-files -o --exclude-standard -z | xargs -0 git add" + hlitauti.run(ctx, cmd) + + +@task +def git_patch_create( # type: ignore + ctx, mode="diff", modified=False, branch=False, last_commit=False, files="" +): + """ + Create a patch file for the entire repo_short_name client from the base + revision. This script accepts a list of files to package, if specified. + + The parameters `modified`, `branch`, `last_commit` have the same meaning as + in `_get_files_to_process()`. + + :param mode: what kind of patch to create + - "diff": (default) creates a patch with the diff of the files + - "tar": creates a tar ball with all the files + """ + hlitauti.report_task( + txt=hprint.to_str("mode modified branch last_commit files") + ) + _ = ctx + # TODO(gp): Check that the current branch is up to date with master to avoid + # failures when we try to merge the patch. + hdbg.dassert_in( + mode, + ("tar", "diff"), + "Patch mode must be either 'tar' for archives or 'diff' for patches", + ) + # Currently only handles the current submodule (not parent repos). + # TODO(gp): Extend this to handle also nested repos. + super_module = False + git_client_root = hgit.get_client_root(super_module) + hash_ = hgit.get_head_hash(git_client_root, short_hash=True) + # Use timestamp and hash to ensure unique patch filenames across time. + timestamp = hlitauti.get_ET_timestamp() + tag = os.path.basename(git_client_root) + dst_file = f"patch.{tag}.{hash_}.{timestamp}" + if mode == "tar": + dst_file += ".tgz" + elif mode == "diff": + dst_file += ".patch" + else: + hdbg.dfatal("Invalid code path") + _LOG.debug("dst_file=%s", dst_file) + # Show what changes will be included in the patch. + _LOG.info( + "Difference between HEAD and master:\n%s", + hgit.get_summary_files_in_branch("master", dir_name="."), + ) + # Determine which files to include in the patch. + all_ = False + # Allow optional user-specified file subset (can be combined with other selectors). + mutually_exclusive = False + # Filter out directories; patches only work with files. + remove_dirs = True + files_as_list = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files, + mutually_exclusive, + remove_dirs, + ) + _LOG.info("Files to save:\n%s", hprint.indent("\n".join(files_as_list))) + if not files_as_list: + _LOG.warning("Nothing to patch: exiting") + return + files_as_str = " ".join(files_as_list) + # Choose command based on patch format: archive vs diff. + cmd = "" + if mode == "tar": + # Create compressed tar archive of the selected files. + cmd = f"tar czvf {dst_file} {files_as_str}" + cmd_inv = "tar xvzf" + elif mode == "diff": + # Generate diff against various targets for different merge strategies. + opts: str + if modified: + # Only uncommitted changes in working tree. + opts = "HEAD" + elif branch: + # All changes since branch point (includes commits on current branch). + opts = "master..." + elif last_commit: + # Only changes in the most recent commit. + opts = "HEAD^" + else: + raise ValueError( + "You need to specify one among -modified, --branch, " + "--last-commit" + ) + cmd = f"git diff {opts} --binary {files_as_str} >{dst_file}" + cmd_inv = "git apply" + else: + raise ValueError(f"Invalid cmd='{cmd}'") + # Execute the patch creation command. + _LOG.info("Creating the patch into %s", dst_file) + hdbg.dassert_ne( + cmd, + "", + "Patch creation command must not be empty", + ) + _LOG.debug("cmd=%s", cmd) + rc = hsystem.system(cmd, abort_on_error=False) + if not rc: + _LOG.warning("Command failed with rc=%d", rc) + # Provide instructions for applying the patch on different environments. + remote_file = os.path.basename(dst_file) + abs_path_dst_file = os.path.abspath(dst_file) + msg = f""" + # To apply the patch and execute: + > git checkout {hash_} + > {cmd_inv} {abs_path_dst_file} + + # To apply the patch to a remote client: + > export SERVER="server" + > export CLIENT_PATH="~/src" + > scp {dst_file} $SERVER: + > ssh $SERVER 'cd $CLIENT_PATH && {cmd_inv} ~/{remote_file}'" + """ + msg = hprint.dedent(msg) + print(msg) + + +def _filter_git_files_by_type( + file_paths: List[str], + keep_python: bool, + keep_jupyter: bool, + keep_markdown: bool, +) -> List[str]: + """ + Filter files by type for git_files task. + + Unlike linters2 version, this returns a flat list (not a tuple) + and does not separate paired jupytext files. + + :param file_paths: files to filter + :param keep_python: include Python files + :param keep_jupyter: include Jupyter notebooks + :param keep_markdown: include Markdown files + :return: filtered list of files + """ + filtered = [] + for f in file_paths: + is_py = f.endswith(".py") + is_ipynb = f.endswith(".ipynb") + is_md = f.endswith(".md") + if ( + (is_py and keep_python) + or (is_ipynb and keep_jupyter) + or (is_md and keep_markdown) + ): + filtered.append(f) + return filtered + + +@task +def git_files( # type: ignore + ctx, + modified=False, + branch=False, + last_commit=False, + keep_python=True, + keep_jupyter=True, + keep_markdown=True, + pbcopy=False, + only_print_files=False, +): + """ + Report which files are changed in the current branch with respect to master. + + The params have the same meaning as in `_get_files_to_process()`. + + :param keep_python: include Python files (default: True) + :param keep_jupyter: include Jupyter notebooks (default: True) + :param keep_markdown: include Markdown files (default: True) + :param only_print_files: only print files without logging headers/footers (default: False) + """ + if not only_print_files: + hlitauti.report_task() + _ = ctx + all_ = False + files = "" + # Use mutually_exclusive=True to enforce exactly one filter mode. + mutually_exclusive = True + remove_dirs = True + files_as_list = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files, + mutually_exclusive, + remove_dirs, + ) + # Filter by file type. + files_as_list = _filter_git_files_by_type( + files_as_list, keep_python, keep_jupyter, keep_markdown + ) + print("\n".join(sorted(files_as_list))) + # Optionally copy the file list to clipboard for easy pasting. + if not only_print_files: + res = " ".join(files_as_list) + hsystem.to_pbcopy(res, pbcopy) + + +@task +def git_last_commit_files(ctx, pbcopy=True): # type: ignore + """ + Print the status of the files in the previous commit. + + :param pbcopy: save the result into the system clipboard (only on + macOS) + """ + # Display the raw git log output for the latest commit. + cmd = 'git log -1 --name-status --pretty=""' + hlitauti.run(ctx, cmd) + # Parse the files that were actually committed (filtering out deletions if needed). + files = hgit.get_previous_committed_files(".") + txt = "\n".join(files) + print(f"\n# The files modified are:\n{txt}") + # Optionally copy the file list to clipboard for easy pasting into commands. + res = " ".join(files) + hsystem.to_pbcopy(res, pbcopy) + + +@task +def git_roll_amp_forward(ctx): # type: ignore + """ + Update amp submodule pointer to the latest master commit. + + Checks out master in amp, pulls latest changes, updates the parent repo's + submodule pointer, and commits the change. + """ + hlitauti.report_task() + AMP_DIR = "amp" + if os.path.exists(AMP_DIR): + # Update amp submodule to point to the latest master. + cmds = [ + f"cd {AMP_DIR} && git checkout master", + f"cd {AMP_DIR} && git pull", + # Stage the submodule pointer change in the parent repository. + f"git add {AMP_DIR}", + f"git commit -m 'Roll {AMP_DIR} pointer forward'", + "git push", + ] + for cmd in cmds: + hlitauti.run(ctx, cmd) + else: + _LOG.warning("%s does not exist, aborting", AMP_DIR) + + +# TODO(gp): Add git_co(ctx) +# Reuse hgit.git_stash_push() and hgit.stash_apply() +# git stash save your-file-name +# git checkout master +# # do whatever you had to do with master +# git checkout staging +# git stash pop + + +# ############################################################################# +# Branches workflows +# ############################################################################# + + +# TODO(gp): Consider renaming the commands as `git_branch_*` + + +@task +def git_branch_files(ctx): # type: ignore + """ + Report which files were added, changed, and modified in the current branch + with respect to master. + + This is a more detailed version of `invoke git_files --branch`, showing file + statuses (added, modified, deleted) rather than just the file list. + """ + hlitauti.report_task() + _ = ctx + # Display the detailed summary of changes made on this branch. + print( + "Difference between HEAD and master:\n" + + hgit.get_summary_files_in_branch("master", dir_name=".") + ) + + +@task +def git_branch_create( # type: ignore + ctx, + branch_name="", + issue_id=0, + repo_short_name="current", + suffix="", + only_branch_from_master=True, + check_branch_name=True, +): + """ + Create and push upstream branch `branch_name` or the one corresponding to + `issue_id` in repo_short_name `repo_short_name`. + + E.g., + ``` + > git checkout -b LemTask169_Get_GH_actions + > git push --set- upstream origin LemTask169_Get_GH_actions + ``` + + :param branch_name: name of the branch to create (e.g., + `LemTask169_Get_GH_actions`) + :param issue_id: use the canonical name for the branch corresponding to that + issue + :param repo_short_name: name of the GitHub repo_short_name that the `issue_id` + belongs to + - "current" (default): the current repo_short_name + - short name (e.g., "amp", "lm") of the branch + :param suffix: suffix (e.g., "02") to add to the branch name when using issue_id + :param only_branch_from_master: only allow to branch from master + :param check_branch_name: make sure the name of the branch is valid like + `{Amp,...}TaskXYZ_...` + """ + hlitauti.report_task() + if issue_id > 0: + # Convert GitHub issue ID to branch name. + hdbg.dassert_eq( + branch_name, + "", + "Cannot specify both --issue and --branch-name; choose one", + ) + title, _ = hlitagh._get_gh_issue_title(issue_id, repo_short_name) + branch_name = title + _LOG.info( + "Issue %d in %s repo_short_name corresponds to '%s'", + issue_id, + repo_short_name, + branch_name, + ) + if suffix != "": + # Add the suffix. + _LOG.debug("Adding suffix '%s' to '%s'", suffix, branch_name) + if suffix[0] in ("-", "_"): + _LOG.warning( + "Suffix '%s' should not start with '%s': removing", + suffix, + suffix[0], + ) + suffix = suffix.rstrip("-_") + branch_name += "_" + suffix + _LOG.info("branch_name='%s'", branch_name) + hdbg.dassert_ne( + branch_name, + "", + "Branch name cannot be empty", + ) + if check_branch_name: + # Reject numeric-only branch names to avoid confusion with commit SHAs. + m = re.match(r"^\d+$", branch_name) + hdbg.dassert( + not m, + "Branch names with only numbers are invalid", + ) + # Enforce naming convention `{RepoPrefix}TaskXYZ_Description` for consistency. + # The valid format of a branch name is `AmpTask1903_Implemented_system_...`. + m = re.match(r"^\S+Task\d+_\S+$", branch_name) + hdbg.dassert( + m, + "Branch name must follow convention: '{RepoPrefix,Amp,...}TaskXYZ_...'", + ) + # Prevent accidental duplicate branches. + hdbg.dassert( + not hgit.does_branch_exist(branch_name, mode="all"), + "Branch '%s' already exists", + branch_name, + ) + # Make sure we are branching from `master`, unless that's what the user wants. + # TODO(Vlad): Remove before merging - temporarily allowing branching from non-master. + curr_branch = hgit.get_branch_name() + if curr_branch != "master": + if only_branch_from_master: + _LOG.warning( + f"Branching from '{curr_branch}' instead of 'master'. " + "This is temporarily allowed but should be reviewed before merging." + ) + # hdbg.dfatal( + # f"You should branch from master and not from '{curr_branch}'" + # ) + # Fetch master. + cmd = "git pull --autostash --rebase" + hlitauti.run(ctx, cmd) + # git checkout -b LmTask169_Get_GH_actions_working_on_lm + cmd = f"git checkout -b {branch_name}" + hlitauti.run(ctx, cmd) + cmd = f"git push --set-upstream origin {branch_name}" + hlitauti.run(ctx, cmd) + + +# TODO(gp): @all Move to hgit. +def _delete_branches(ctx: Any, tag: str, confirm_delete: bool) -> None: + """ + Delete branches that have been merged into master. + + :param ctx: Invoke context + :param tag: Either "local" for local branches or "remote" for remote branches + :param confirm_delete: If True, ask user for confirmation before deleting + """ + if tag == "local": + # Delete local branches that are already merged into master. + # > git branch --merged + # * AmpTask1251_Update_GH_actions_for_amp_02 + find_cmd = r"git branch --merged master | grep -v master | grep -v \*" + delete_cmd = "git branch -d" + elif tag == "remote": + # Get the branches to delete. + find_cmd = ( + "git branch -r --merged origin/master" + + r" | grep -v master | sed 's/origin\///'" + ) + delete_cmd = "git push origin --delete" + else: + raise ValueError(f"Invalid tag='{tag}'") + # TODO(gp): Use system_to_lines + _, txt = hsystem.system_to_string(find_cmd, abort_on_error=False) + branches = hsystem.text_to_list(txt) + # Print info. + _LOG.info( + "There are %d %s branches to delete:\n%s", + len(branches), + tag, + "\n".join(branches), + ) + if not branches: + # No branch to delete, then we are done. + return + # Ask whether to continue. + if confirm_delete: + hsystem.query_yes_no( + hdbg.WARNING + f": Delete these {tag} branches?", abort_on_no=True + ) + for branch in branches: + cmd_tmp = f"{delete_cmd} {branch}" + hlitauti.run(ctx, cmd_tmp) + + +@task +def git_branch_delete_merged(ctx, confirm_delete=True): # type: ignore + """ + Remove (both local and remote) branches that have been merged into master. + """ + hlitauti.report_task() + # Ensure user is on master since we're deleting branches merged into master. + hdbg.dassert_eq( + hgit.get_branch_name(), + "master", + "Must be on master branch to safely delete merged branches", + ) + # + cmd = "git fetch --all --prune" + hlitauti.run(ctx, cmd) + # Delete local and remote branches that are already merged into master. + _delete_branches(ctx, "local", confirm_delete) + _delete_branches(ctx, "remote", confirm_delete) + # + cmd = "git fetch --all --prune" + hlitauti.run(ctx, cmd) + + +@task +def git_branch_rename(ctx, new_branch_name): # type: ignore + """ + Rename current branch both locally and remotely. + """ + hlitauti.report_task() + old_branch_name = hgit.get_branch_name(".") + # Ensure new branch name is actually different to avoid no-op rename. + hdbg.dassert_ne( + old_branch_name, + new_branch_name, + "New branch name must be different from current branch name", + ) + msg = ( + f"Do you want to rename the current branch '{old_branch_name}' to " + f"'{new_branch_name}'" + ) + hsystem.query_yes_no(msg, abort_on_no=True) + # https://stackoverflow.com/questions/30590083 + # Rename the local branch to the new name. + # > git branch -m + cmd = f"git branch -m {new_branch_name}" + hlitauti.run(ctx, cmd) + # Delete the old branch on remote. + # > git push --delete + cmd = f"git push origin --delete {old_branch_name}" + hlitauti.run(ctx, cmd) + # Prevent Git from using the old name when pushing in the next step. + # Otherwise, Git will use the old upstream name instead of . + # > git branch --unset-upstream + cmd = f"git branch --unset-upstream {new_branch_name}" + hlitauti.run(ctx, cmd) + # Push the new branch to remote. + # > git push + cmd = f"git push origin {new_branch_name}" + hlitauti.run(ctx, cmd) + # Reset the upstream branch for the new_name local branch. + # > git push -u + cmd = f"git push origin u {new_branch_name}" + hlitauti.run(ctx, cmd) + print("Done") + + +@task +def git_branch_next_name(ctx, branch_name=None, method="auto"): # type: ignore + """ + Return a name derived from the current branch so that the branch doesn't + exist. + + :param branch_name: if `None` use the current branch name, otherwise specify it + :param method: method to use ('auto', 'github_api', 'linear_scan') + - 'auto' (default): tries GitHub API first, falls back to linear scan + - 'github_api': use only GitHub API method (fast) + - 'linear_scan': use only linear scan method (always works) + + E.g., `AmpTask1903_Implemented_system_Portfolio` -> + `AmpTask1903_Implemented_system_Portfolio_3` + """ + hlitauti.report_task() + _ = ctx + branch_next_name = hgit.get_branch_next_name( + curr_branch_name=branch_name, method=method, log_verb=logging.INFO + ) + print(f"branch_next_name='{branch_next_name}'") + + +@task +def git_branch_copy( # type: ignore + ctx, + new_branch_name="", + skip_git_merge_master=False, + use_patch=False, + check_branch_name=True, +): + """ + Create a new branch with the same content of the current branch. + + :param new_branch_name: name for the new branch + :param skip_git_merge_master: skip merging master into current branch + :param use_patch: apply patching instead of merging + :param check_branch_name: enforce branch naming convention like + `{Amp,...}TaskXYZ_...` + """ + # Patch-based copying is not yet implemented. + hdbg.dassert( + not use_patch, + "Patch-based branch copying is not yet implemented", + ) + # Remove untracked files to ensure clean state when copying branch. + cmd = "git clean -fd" + hlitauti.run(ctx, cmd) + curr_branch_name = hgit.get_branch_name() + # Cannot copy master branch since it would be copying the source to itself. + hdbg.dassert_ne( + curr_branch_name, + "master", + "Cannot copy master branch", + ) + # Sync with master first to ensure new branch includes latest changes (if requested). + if not skip_git_merge_master: + cmd = "invoke git_merge_master --abort-if-not-ff" + hlitauti.run(ctx, cmd) + else: + _LOG.warning("Skipping git_merge_master as requested") + if use_patch: + # TODO(gp): Create a patch or do a `git merge`. + pass + # Generate unique branch name if not provided. + if new_branch_name is None or new_branch_name == "": + new_branch_name = hgit.get_branch_next_name() + _LOG.info("new_branch_name='%s'", new_branch_name) + hdbg.dassert_ne( + new_branch_name, + None, + "Branch name must not be None after generation", + ) + # Allow scratch branches to bypass naming convention. + if new_branch_name.startswith("gp_scratch"): + check_branch_name = False + # Create or checkout the target branch. + mode = "all" + new_branch_exists = hgit.does_branch_exist(new_branch_name, mode) + if new_branch_exists: + # Switch to existing branch to copy changes into it. + cmd = f"git checkout {new_branch_name}" + else: + # Create new branch from master as base. + cmd = f"git checkout master && invoke git_branch_create --branch-name '{new_branch_name}'" + if not check_branch_name: + cmd += " --no-check-branch-name" + hlitauti.run(ctx, cmd) + if use_patch: + # TODO(gp): Apply the patch. + pass + # Squash merge copies all commits as a single change without creating a merge commit. + cmd = f"git merge --squash --ff {curr_branch_name} && git reset HEAD" + hlitauti.run(ctx, cmd) + + +# /////////////////////////////////////////////////////////////////////////////// + + +def _git_diff_with_branch( + ctx: Any, + hash_: str, + tag: str, + # + dir_name: str, + subdir: str, + # + diff_type: str, + keep_extensions: str, + skip_extensions: str, + file_name: str, + # + only_print_files: bool, + dry_run: bool, +) -> None: + """ + Diff files from this client against files in a branch using vimdiff. + + Same parameters as `git_branch_diff_with`. + """ + _LOG.debug( + hprint.to_str( + "hash_ tag dir_name diff_type subdir keep_extensions skip_extensions" + " file_name only_print_files dry_run" + ) + ) + # Diff only works on non-master branches to avoid comparing with itself. + curr_branch_name = hgit.get_branch_name() + hdbg.dassert_ne( + curr_branch_name, + "master", + "Cannot diff master branch against itself", + ) + # Retrieve the list of changed files between current state and the given hash. + cmd = [] + cmd.append("git diff") + if diff_type: + cmd.append(f"--diff-filter={diff_type}") + cmd.append(f"--name-only HEAD {hash_}") + cmd = " ".join(cmd) + files = hsystem.system_to_files( + cmd, dir_name, remove_files_non_present=False + ) + files = sorted(files) + _LOG.debug("%s", "\n".join(files)) + # Filter to a single specific file if requested. + if file_name: + _LOG.debug("Filter by file_name") + _LOG.info("Before filtering files=%s", len(files)) + files_tmp = [] + for f in files: + if f == file_name: + files_tmp.append(f) + hdbg.dassert_eq( + 1, + len(files_tmp), + "Can't find file_name='%s' in\n%s", + file_name, + "\n".join(files), + ) + files = files_tmp + _LOG.info("After filtering by file_name: files=%s", len(files)) + _LOG.debug("%s", "\n".join(files)) + # Keep only files with specified extensions (useful for focusing on code vs docs). + if keep_extensions: + _LOG.debug("# Filter by keep_extensions") + _LOG.debug("Before filtering files=%s", len(files)) + extensions_lst = keep_extensions.split(",") + _LOG.warning( + "Keeping files with %d extensions: %s", + len(extensions_lst), + extensions_lst, + ) + files_tmp = [] + for f in files: + if any(f.endswith(ext) for ext in extensions_lst): + files_tmp.append(f) + files = files_tmp + _LOG.info("After filtering by keep_extensions: files=%s", len(files)) + _LOG.debug("%s", "\n".join(files)) + # Exclude files with specified extensions (useful for skipping config or build files). + if skip_extensions: + _LOG.debug("# Filter by skip_extensions") + _LOG.debug("Before filtering files=%s", len(files)) + extensions_lst = skip_extensions.split(",") + _LOG.warning( + "Skipping files with %d extensions: %s", + len(extensions_lst), + extensions_lst, + ) + files_tmp = [] + for f in files: + if not any(f.endswith(ext) for ext in extensions_lst): + files_tmp.append(f) + files = files_tmp + _LOG.info("After filtering by skip_extensions: files=%s", len(files)) + _LOG.debug("%s", "\n".join(files)) + # Limit diff to files within a specific subdirectory. + if subdir != "": + _LOG.debug("# Filter by subdir") + _LOG.debug("Before filtering files=%s", len(files)) + files_tmp = [] + for f in files: + if f.startswith(subdir): + files_tmp.append(f) + files = files_tmp + _LOG.info("After filtering by subdir: files=%s", len(files)) + _LOG.debug("%s", "\n".join(files)) + # Summary of what will be diffed. + _LOG.info("\n" + hprint.frame(f"# files={len(files)}")) + _LOG.info("\n" + "\n".join(files)) + if len(files) == 0: + _LOG.warning("No files match the filter criteria: exiting") + return + if only_print_files: + _LOG.warning("Exiting as per user request with --only-print-files") + return + # Create temporary directory to store base versions for comparison. + root_dir = hgit.get_repo_full_name_from_client(super_module=True) + # TODO(gp): We should get a temp dir. + dst_dir = f"/tmp/{root_dir}/tmp.{tag}" + hio.create_dir(dst_dir, incremental=False) + # Build vimdiff commands for each file, retrieving base version from source hash. + script_txt = [] + for branch_file in files: + _LOG.debug("\n%s", hprint.frame(f"branch_file={branch_file}")) + # Use current file as right side (what the branch currently has). + if os.path.exists(branch_file): + right_file = branch_file + else: + # For deleted files, use /dev/null as the right side. + right_file = "/dev/null" + # Flatten directory structure to avoid naming conflicts in temp directory. + tmp_file = branch_file + tmp_file = tmp_file.replace("/", "_") + tmp_file = os.path.join(dst_dir, tmp_file) + _LOG.debug( + "Extracting base version of %s to %s", + branch_file, + tmp_file, + ) + # Extract the base version from the specified hash/branch. + cmd = f"git show {hash_}:{branch_file} >{tmp_file}" + rc = hsystem.system(cmd, abort_on_error=False) + if rc != 0: + # File is new in the branch (didn't exist in base hash). + _LOG.debug("File '%s' is new (doesn't exist in base)", branch_file) + left_file = "/dev/null" + else: + left_file = tmp_file + # Generate vimdiff command to compare base and current versions. + cmd = f"vimdiff {left_file} {right_file}" + _LOG.debug("-> %s", cmd) + script_txt.append(cmd) + script_txt = "\n".join(script_txt) + # Display the diff commands that will be executed. + _LOG.info("\n%s" % hprint.frame("Diffing script")) + _LOG.info(script_txt) + # Create executable script for easy manual re-running. + script_file_name = f"./tmp.vimdiff_branch_with_{tag}.sh" + msg = f"To diff against {tag} run" + hio.create_executable_script(script_file_name, script_txt, msg=msg) + hlitauti.run(ctx, script_file_name, dry_run=dry_run, pty=True) + # Clean up temporary files. + cmd = f"rm -rf {dst_dir}" + hlitauti.run(ctx, cmd, dry_run=dry_run) + + +def _git_diff_with_branch_wrapper( + ctx: Any, + hash_: str, + tag: str, + # + dir_name: str, + subdir: str, + include_submodules: bool, + # + diff_type: str, + keep_extensions: str, + skip_extensions: str, + python: bool, + file_name: str, + # + only_print_files: bool, + dry_run: bool, +) -> None: + """ + Wrapper for _git_diff_with_branch that handles Python-specific filtering and submodules. + + Applies Python-specific extension filter if requested, then delegates to _git_diff_with_branch. + If include_submodules is True, also runs the diff for the amp submodule if present. + + Parameters are the same as _git_diff_with_branch with the addition of: + :param include_submodules: if True, also diff the amp submodule + :param python: if True, only diff Python files (overrides extension filters) + """ + hdbg.dassert_eq(dir_name, ".") + # If Python mode is enabled, override all extension filters to only diff Python files. + if python: + hdbg.dassert_eq( + diff_type, + "", + "Cannot specify diff_type with python mode", + ) + hdbg.dassert_eq( + keep_extensions, + "", + "Cannot specify keep_extensions with python mode", + ) + hdbg.dassert_eq( + skip_extensions, + "", + "Cannot specify skip_extensions with python mode", + ) + hdbg.dassert_eq( + file_name, + "", + "Cannot specify file_name with python mode", + ) + keep_extensions = "py" + # Diff files in the main repository. + _git_diff_with_branch( + ctx, + hash_, + tag, + dir_name, + subdir, + diff_type, + keep_extensions, + skip_extensions, + file_name, + only_print_files, + dry_run, + ) + # Also diff the amp submodule if it exists and was requested. + if include_submodules: + if hgit.is_amp_present(): + with hsystem.cd("amp"): + _git_diff_with_branch( + ctx, + hash_, + tag, + dir_name, + subdir, + diff_type, + keep_extensions, + skip_extensions, + file_name, + only_print_files, + dry_run, + ) + + +@task +def git_branch_diff_with( # type: ignore + ctx, + target="base", + hash_value="", + # Where to diff. + subdir="", + include_submodules=False, + # What files to diff. + diff_type="", + keep_extensions="", + skip_extensions="", + python=False, + file_name="", + # What actions. + only_print_files=False, + dry_run=False, +): + """ + Diff files of the current branch with master at the branching point. + + :param subdir: subdir to consider for diffing, instead of `.` + :param target: + - `base`: diff with respect to the branching point + - `master`: diff with respect to `origin/master` + - `head`: diff modified files + - `hash`: diff with respect to hash specified in `hash` + :param hash_value: the hash to use with target="hash" + :param include_submodules: run recursively on all submodules + :param diff_type: files to diff using git `--diff-filter` options + :param keep_extensions: a comma-separated list of extensions to check, e.g., + 'csv,py'. An empty string means keep all the extensions + :param skip_extensions: a comma-separated list of extensions to skip, e.g., + 'txt'. An empty string means do not skip any extension + :param only_print_files: print files to diff and exit + :param dry_run: execute diffing script or not + """ + # Determine the comparison target based on user preference. + dir_name = "." + hdbg.dassert_in(target, ("base", "master", "head", "hash"), "Invalid target") + # Resolve target to a specific git hash for consistent diffing. + if target == "base": + # Compare against the point where this branch diverged from master. + hdbg.dassert_eq( + hash_value, + "", + "Cannot specify hash_value when target is 'base'", + ) + hash_value = hgit.get_branch_hash(dir_name=dir_name) + tag = "base" + elif target == "master": + # Compare against the current state of the remote master branch. + hdbg.dassert_eq( + hash_value, + "", + "Cannot specify hash_value when target is 'master'", + ) + hash_value = "origin/master" + tag = "origin_master" + elif target == "head": + # Compare working directory against HEAD (uncommitted changes). + hdbg.dassert_eq( + hash_value, + "", + "Cannot specify hash_value when target is 'head'", + ) + hash_value = "" + tag = "head" + elif target == "hash": + # Compare against a user-specified commit hash. + hdbg.dassert_ne( + hash_value, + "", + "Must provide hash_value when target is 'hash'", + ) + tag = f"hash@{hash_value}" + else: + raise ValueError(f"Invalid target='{target}") + _git_diff_with_branch_wrapper( + ctx, + hash_value, + tag, + # + dir_name, + subdir, + include_submodules, + # + diff_type, + keep_extensions, + skip_extensions, + python, + file_name, + # + only_print_files, + dry_run, + ) + + +@task +def git_repo_copy(ctx, file_name, src_git_dir, dst_git_dir): # type: ignore + """ + Copy the code from the src Git client to the dst Git client. + + :param file_name: the name of the file to copy (which is under + `src_git_dir`) + :param src_git_dir: the directory of the source Git client (e.g., + "/Users/saggese/src/helpers1") + :param dst_git_dir: the directory of the destination Git client (e.g., + "/Users/saggese/src/helpers2") + """ + _ = ctx + src_git_dir = hgit.resolve_git_client_dir(src_git_dir) + dst_git_dir = hgit.resolve_git_client_dir(dst_git_dir) + # Map source file path to equivalent path in destination repository. + dst_file_path = hgit.project_file_name_in_git_client( + file_name, + src_git_dir, + dst_git_dir, + check_src_file_exists=True, + check_dst_file_exists=False, + ) + _LOG.info("Copying code from '%s' to '%s' ...", file_name, dst_git_dir) + # Perform the file copy operation. + hsystem.system_to_string(f"cp {file_name} {dst_file_path}") + + +# ############################################################################# + + +def _get_submodule_paths() -> List[str]: + """ + Get list of submodule paths from .gitmodules file. + + :return: List of submodule directory paths, empty if no submodules + found + """ + gitmodules_path = ".gitmodules" + if not os.path.exists(gitmodules_path): + _LOG.info("No .gitmodules file found") + return [] + # Extract submodule paths from git config using the .gitmodules file. + cmd = "git config --file .gitmodules --get-regexp path" + _, output = hsystem.system_to_string(cmd) + submodule_paths = [] + for line in output.strip().split("\n"): + if line: + # Parse format: "submodule..path " to extract path. + path = line.split(" ", 1)[1] + submodule_paths.append(path) + return submodule_paths + + +def _get_branch_name(submodule_path: str) -> str: + """ + Get the current branch name for a git repository. + + :param submodule_path: Path to the git repository directory + :return: Branch name or error message + """ + hdbg.dassert_dir_exists(submodule_path) + hdbg.dassert_path_exists(os.path.join(submodule_path, ".git")) + # Query git to get the symbolic name of the current HEAD. + cmd = f"cd {submodule_path} && git rev-parse --abbrev-ref HEAD" + _, branch_name = hsystem.system_to_string(cmd) + return branch_name.strip() + + +@task +def git_branches(ctx): # type: ignore + """ + Print the branch name for the main repository and each git submodule + directory. + + Example usage:: + > dev_scripts_helpers/git/print_git_branches.py + . (main): master + submodule1: feature/new-feature + submodule2: develop + submodule3: main + """ + _ = ctx + # Display main repository branch first for clarity. + main_branch = _get_branch_name(".") + print(f". -> {main_branch}") + # List submodule branches to detect if any are out of sync. + submodule_paths = _get_submodule_paths() + if not submodule_paths: + _LOG.debug("No git submodules found in this repository") + return + # Report branch for each submodule. + for path in submodule_paths: + branch_name = _get_branch_name(path) + print(f"{path} -> {branch_name}") + + +@task +def git_branch_is_merged(ctx): # type: ignore + """ + Check if the current branch was merged into master using GitHub API and git. + + Uses GitHub API to check for open/closed PRs and git to verify branch presence on remote. + """ + _ = ctx + hlitauti.report_task() + branch_name = hgit.get_branch_name() + print(f"branch_name='{branch_name}'") + # Check for PRs targeting master from the current branch on GitHub. + cmd = f"gh pr list --base master --head {branch_name}" + ctx.run(cmd, pty=True) + # Verify if the branch still exists on the remote repository. + cmd = f"git ls-remote --heads origin {branch_name}" + ctx.run(cmd, pty=True) + + +@task +def git_backup( + ctx, + file_mode="all", + backup_dir=None, + include_subrepos=True, + dry_run=False, +): # type: ignore + """ + Create a zip file with modified and/or untracked files from the current + repository and optionally its submodules. + + The zip file is created with a timestamp-based name in the specified + backup directory (default: $HOME/src/backups). + Example: `modified_files.helpers_root.20251119_130034.zip` + + :param file_mode: which files to include: "all" (default), "modified", or + "untracked" + :param backup_dir: directory where to save the zip file (default: + $HOME/src/backups) + :param include_subrepos: whether to include submodule files (default: True) + :param dry_run: if True, only print the files that would be included + without creating the zip + """ + hlitauti.report_task( + txt=hprint.to_str("file_mode, backup_dir, include_subrepos, dry_run") + ) + _ = ctx + # Validate backup scope to ensure user intent is clear. + valid_modes = ["all", "modified", "untracked"] + hdbg.dassert_in( + file_mode, + valid_modes, + "Invalid file_mode '%s'; must be one of: %s", + file_mode, + ", ".join(valid_modes), + ) + # Use default backup location if not specified. + if backup_dir is None: + backup_dir = os.path.join(os.path.expanduser("~"), "src", "backups") + hio.create_dir(backup_dir, incremental=True) + # Determine repository name for readable backup file naming. + super_module = False + git_client_root = hgit.get_client_root(super_module) + # Include timestamp to avoid overwriting previous backups. + timestamp = hlitauti.get_ET_timestamp() + repo_name = os.path.basename(git_client_root) + zip_file_name = f"modified_files.{repo_name}.{timestamp}.zip" + # Collect files from the main repository. + _LOG.info("Collecting %s files from main repository...", file_mode) + main_repo_files = hgit.get_modified_and_untracked_files(".", mode=file_mode) + _LOG.info("Found %d files in main repository", len(main_repo_files)) + all_files = [] + for file_path in main_repo_files: + all_files.append((".", file_path)) + # Also include submodule files if requested to ensure complete backup. + if include_subrepos: + submodule_paths = _get_submodule_paths() + if submodule_paths: + _LOG.info( + "Found %d submodule(s), collecting files...", + len(submodule_paths), + ) + for submodule_path in submodule_paths: + hdbg.dassert_dir_exists( + submodule_path, + msg=f"Submodule path does not exist: {submodule_path}", + ) + _LOG.info("Checking submodule: %s", submodule_path) + submodule_files = hgit.get_modified_and_untracked_files( + submodule_path, mode=file_mode + ) + _LOG.info( + "Found %d files in submodule %s", + len(submodule_files), + submodule_path, + ) + for file_path in submodule_files: + all_files.append((submodule_path, file_path)) + else: + _LOG.info("No submodules found") + else: + _LOG.info("Skipping submodules (include_subrepos=False)") + # Verify there's content to backup before proceeding. + if not all_files: + _LOG.warning("No %s files found. Nothing to zip.", file_mode) + return + # Display summary of what will be backed up. + _LOG.info( + "\n%s\nFound %d total files to include:\n%s", + hprint.frame("Files to include in zip"), + len(all_files), + hprint.indent( + "\n".join( + [ + ( + os.path.join(repo_path, file_path) + if repo_path != "." + else file_path + ) + for repo_path, file_path in all_files + ] + ) + ), + ) + if dry_run: + _LOG.warning("Dry-run mode: not creating zip file") + return + # Create zip file with all collected files. + zip_file_path = os.path.join(backup_dir, zip_file_name) + _LOG.info("Creating zip file: %s", zip_file_path) + import zipfile + + with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zipf: + for repo_path, file_path in all_files: + full_path = os.path.join(repo_path, file_path) + # Maintain directory hierarchy in archive for easy restoration. + arcname = ( + os.path.join(repo_path, file_path) + if repo_path != "." + else file_path + ) + try: + zipf.write(full_path, arcname=arcname) + _LOG.debug("Added to zip: %s", arcname) + except Exception as e: + _LOG.warning("Failed to add %s to zip: %s", full_path, e) + _LOG.info("Successfully created zip file: %s", zip_file_path) + # Display location for easy access. + abs_zip_path = os.path.abspath(zip_file_path) + print(f"\nZip file created at: {abs_zip_path}") + + +@task +def gh_watch(ctx, *, interval=60): # type: ignore + """ + Watch GitHub workflow status with periodic updates. + + Runs `invoke gh_workflow_list` every N seconds using the `watch` command. + If running in tmux, temporarily renames the window to "*GH_WATCH*" for + visibility and restores it on exit. + + :param interval: Update interval in seconds + """ + hlitauti.report_task() + # Check if running inside tmux and save original window name. + old_pane_title = None + if os.environ.get("TMUX"): + _LOG.info("Running in tmux, saving window name") + _, old_pane_title = hsystem.system_to_one_line( + "tmux display-message -p '#W'" + ) + _LOG.info("Original window name: %s", old_pane_title) + # Rename window to indicate we're watching workflows. + hsystem.system("tmux rename-window '*GH_WATCH*'") + try: + # Watch workflows by repeatedly running gh_workflow_list. + while True: + # Clear screen before displaying updated workflow status. + subprocess.run("clear; invoke gh_workflow_list", shell=True) + _LOG.info("Sleeping for %d seconds before next update", interval) + time.sleep(interval) + finally: + # Restore original tmux window name if it was changed. + if old_pane_title is not None: + _LOG.info("Restoring window name: %s", old_pane_title) + hsystem.system(f"tmux rename-window '{old_pane_title}'") + + +# TODO(gp): Add the following scripts: +# dev_scripts/git/gcl +# dev_scripts/git/git_branch.sh +# dev_scripts/git/git_branch_point.sh +# dev_scripts/create_class_diagram.sh diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py new file mode 100644 index 000000000..ff4043f1d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py @@ -0,0 +1,837 @@ +""" +Import as: + +import helpers.lib_tasks_integrate as hlitaint +""" + +import datetime +import logging +import os +from typing import List, Optional, Set, Tuple + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.lib_tasks_gh as hlitagh +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + + +DEFAULT_SRC_DIR_BASENAME = "cmamp1" +DEFAULT_DST_DIR_BASENAME = "kaizenflow1" + +# DEFAULT_SRC_DIR_BASENAME="amp1" +# DEFAULT_DST_DIR_BASENAME="cmamp1" + + +def _dassert_current_dir_matches(expected_dir_basename: str) -> None: + """ + Ensure that the name of the current dir is the one expected. + + E.g., `/Users/saggese/src/cmamp1` is a valid dir for an integration + branch for `cmamp1`. + """ + _LOG.debug(hprint.to_str("expected_dir_basename")) + # Get the basename of the current dir. + curr_dir_basename = os.path.basename(os.getcwd()) + # Check that it's what is expected. + hdbg.dassert_eq( + curr_dir_basename, + expected_dir_basename, + "The current dir '%s' doesn't match the expected dir '%s'", + curr_dir_basename, + expected_dir_basename, + ) + + +# TODO(gp): -> _dassert_is_integration_dir +def _dassert_is_integration_branch(abs_dir: str) -> None: + """ + Ensure that the branch in `abs_dir` is a valid integration or lint branch. + + E.g., `AmpTask1786_Integrate_20220402` is a valid integration + branch. + """ + _LOG.debug(hprint.to_str("abs_dir")) + branch_name = hgit.get_branch_name(dir_name=abs_dir) + hdbg.dassert_ne(branch_name, "master") + hdbg.dassert( + ("_Integrate_" in branch_name) or ("_Lint_" in branch_name), + "Invalid branch_name='%s' in abs_dir='%s'", + branch_name, + abs_dir, + ) + + +def _clean_both_integration_dirs(abs_dir1: str, abs_dir2: str) -> None: + """ + Run `i git_clean` on the passed dirs. + + :param abs_dir1, abs_dir2: full paths of the dirs to clean + """ + _LOG.debug(hprint.to_str("abs_dir1 abs_dir2")) + # + cmd = f"cd {abs_dir1} && invoke git_clean" + hsystem.system(cmd) + # + cmd = f"cd {abs_dir2} && invoke git_clean" + hsystem.system(cmd) + + +@task +def integrate_create_branch(ctx, dir_basename, dry_run=False): # type: ignore + """ + Create the branch for integration of `dir_basename` (e.g., amp1) in the + current dir. + + :param dir_basename: specify the dir name (e.g., `amp1`) to ensure the set-up is + correct. + """ + hlitauti.report_task() + # Check that the current dir has the name `dir_basename`. + _dassert_current_dir_matches(dir_basename) + # Login in GitHub. + hlitagh.gh_login(ctx) + # Create the integration branch with the current date, e.g., + # `AmpTask1786_Integrate_20211231`. + date = datetime.datetime.now().date() + date_as_str = date.strftime("%Y%m%d") + branch_name = f"AmpTask1786_Integrate_{date_as_str}" + # query_yes_no("Are you sure you want to create the branch ") + _LOG.info("Creating branch '%s'", branch_name) + cmd = f"invoke git_branch_create --branch-name '{branch_name}'" + hlitauti.run(ctx, cmd, dry_run=dry_run) + + +# ############################################################################# + + +def _resolve_src_dst_names( + src_dir_basename: str, + dst_dir_basename: str, + subdir: str, + *, + check_exists: bool = True, +) -> Tuple[str, str]: + """ + Return the full path of `src_dir_basename` and `dst_dir_basename`. + + :param src_dir_basename: the current dir (e.g., `amp1`) + :param dst_dir_basename: a dir parallel to the current one (`cmamp1`) + :param check_exists: check that the dst dir exists + + :return: absolute paths of both directories + """ + curr_parent_dir = os.path.dirname(os.getcwd()) + # + abs_src_dir = os.path.join(curr_parent_dir, src_dir_basename, subdir) + abs_src_dir = os.path.normpath(abs_src_dir) + hdbg.dassert_dir_exists(abs_src_dir) + # + abs_dst_dir = os.path.join(curr_parent_dir, dst_dir_basename, subdir) + abs_dst_dir = os.path.normpath(abs_dst_dir) + if check_exists: + hdbg.dassert_dir_exists(abs_dst_dir) + return abs_src_dir, abs_dst_dir + + +@task +def integrate_diff_dirs( # type: ignore + ctx, + src_dir_basename=DEFAULT_SRC_DIR_BASENAME, + dst_dir_basename=DEFAULT_DST_DIR_BASENAME, + reverse=False, + subdir="", + copy=False, + use_linux_diff=False, + check_branches=True, + clean_branches=True, + remove_usual=False, + run_diff_script=True, + dry_run=False, +): + """ + Integrate repos from dirs `src_dir_basename` to `dst_dir_basename` by diffing + or copying all the files with differences. + + ``` + # Use the default values for src / dst dirs to represent the usual set-up. + > i integrate_diff_dirs \ + --src-dir-basename amp1 \ + --dst-dir-basename cmamp1 \ + --subdir . + ``` + + :param src_dir_basename: dir with the source branch (e.g., amp1) + :param dst_dir_basename: dir with the destination branch (e.g., cmamp1) + :param reverse: switch the roles of the default source and destination branches + :param subdir: filter to the given subdir for both dirs (e.g., + `src_dir_basename/subdir` and `dst_dir_basename/subdir`) + :param copy: copy the files instead of diffing + :param use_linux_diff: use Linux `diff` instead of `diff_to_vimdiff.py` + :param remove_usual: remove the usual mismatching files (e.g., `.github`) + :param run_diff_script: run the diff script + :param dry_run: do not execute the commands + """ + _ = ctx + hlitauti.report_task() + if reverse: + src_dir_basename, dst_dir_basename = dst_dir_basename, src_dir_basename + _LOG.warning( + "Reversing dirs: %s", + hprint.to_str2(src_dir_basename, dst_dir_basename), + ) + # Check that the integration branches are in the expected state. + # _dassert_current_dir_matches(src_dir_basename) + # When we integrate a dir that doesn't exist in the dst branch, we need to + # skip the check for existence. + check_exists = False + abs_src_dir, abs_dst_dir = _resolve_src_dst_names( + src_dir_basename, dst_dir_basename, subdir, check_exists=check_exists + ) + hio.create_dir(abs_dst_dir, incremental=True) + if check_branches: + _dassert_is_integration_branch(abs_src_dir) + _dassert_is_integration_branch(abs_dst_dir) + else: + _LOG.warning("Skipping integration branch check") + # Clean branches if needed. + if clean_branches: + # We can clean up only the root dir. + if subdir == "": + _clean_both_integration_dirs(abs_src_dir, abs_dst_dir) + else: + _LOG.warning("Skipping integration branch cleaning") + # Copy or diff dirs. + _LOG.info("abs_src_dir=%s", abs_src_dir) + _LOG.info("abs_dst_dir=%s", abs_dst_dir) + hdbg.dassert_ne(abs_src_dir, abs_dst_dir) + if copy: + # Copy the files. + if dry_run: + cmd = f"diff -r --brief {abs_src_dir} {abs_dst_dir}" + else: + rsync_opts = "--delete -a" + cmd = f"rsync {rsync_opts} {abs_src_dir}/ {abs_dst_dir}" + else: + # Diff the files. + if use_linux_diff: + cmd = f"diff -r --brief {abs_src_dir} {abs_dst_dir}" + else: + cmd = "diff_to_vimdiff.py" + if run_diff_script: + cmd += " --run_diff_script" + else: + cmd += " --no_run_diff_script" + _LOG.warning("Skipping running diff script") + cmd += f" --dir1 {abs_src_dir} --dir2 {abs_dst_dir}" + if remove_usual: + vals = [ + r"\/\.github\/", + ] + regex = "|".join(vals) + cmd += f" --ignore_files='{regex}'" + # We need to use `system` to get vimdiff to connect to stdin and stdout. + if not dry_run: + # hlitauti.run(ctx, cmd, dry_run=dry_run, print_cmd=True) + os.system(cmd) + + +# ############################################################################# + + +# TODO(gp): Allow to pass the hash of the last integration to consider. +# Factor out the logic to find the hash + +# Sometimes we want to see the changes in one dir since an integration point + +# E.g., find all the changes in `datapull` since the last integration +# +# > git log --oneline datapull +# 77f612f75 SorrIssue244 CCXT timestamp representation unit test (#317) +# 6b981b1f6 Sorrtask298 rename get docker cmd to get docker run cmd (#331) +# bd33a5fb9 SorrTask267_Parquet_to_CSV (#267) +# 9819fd117 AmpTask1786_Integrate_20230518_im (#273) <==== +# d530ed561 Update (#272) +# b75eab7ad AmpTask1786_Integrate_20230518_3 (#271) +# +# > git difftool 9819fd117.. datapull +# ... +# +# > git diff --name-only 9819fd117.. datapull +# datapull/ccxt/data/extract/test/test_ccxt_extractor.py +# datapull/common/data/transform/convert_pq_to_csv.py +# datapull/im_lib_tasks.py +# datapull/test/test_im_lib_tasks.py +# +# for file in datapull/ccxt/data/extract/test/test_ccxt_extractor.py datapull/common/data/transform/convert_pq_to_csv.py datapull/im_lib_tasks.py datapull/test/test_im_lib_tasks.py; do +# vimdiff ~/src/cmamp1/$file ~/src/kaizenflow1/$file +# done + + +def _find_files_touched_since_last_integration( + abs_dir: str, subdir: str +) -> List[str]: + """ + Return the list of files modified since the last integration for `abs_dir`. + + :param abs_dir: directory to cd before executing this script + :param subdir: consider only the files under `subdir` + """ + _LOG.debug(hprint.to_str2(abs_dir)) + dir_basename = os.path.basename(abs_dir) + # TODO(gp): dir_basename can be computed from abs_dir_name to simplify the + # interface. + # Change the dir to the desired one. + old_dir = os.getcwd() + try: + os.chdir(abs_dir) + # Find the hash of all integration commits. + cmd = "git log --date=local --oneline --date-order | grep AmpTask1786_Integrate" + # Remove integrations like "'... Merge branch 'master' into + # AmpTask1786_Integrate_20220113'" + cmd += " | grep -v \"Merge branch 'master' into \"" + _, txt = hsystem.system_to_string(cmd) + _LOG.debug("integration commits=\n%s", txt) + txt = txt.split("\n") + # > git log --date=local --oneline --date-order | grep AmpTask1786_Integrate + # 72a1a101 AmpTask1786_Integrate_20211218 (#1975) + # 2acfd6d7 AmpTask1786_Integrate_20211214 (#1950) + # 318ab0ff AmpTask1786_Integrate_20211210 (#1933) + hdbg.dassert_lte(2, len(txt)) + print(f"# last_integration: '{txt[0]}'") + last_integration_hash = txt[0].split()[0] + print("* " + hprint.to_str("last_integration_hash")) + # Find the first commit after the commit with the last integration. + cmd = f"git log --oneline --reverse --ancestry-path {last_integration_hash}^..master" + _, txt = hsystem.system_to_string(cmd) + print(f"* commits after last integration=\n{txt}") + txt = txt.split("\n") + # > git log --oneline --reverse --ancestry-path 72a1a101^..master + # 72a1a101 AmpTask1786_Integrate_20211218 (#1975) + # 90e90353 AmpTask1955_Lint_20211218 (#1976) + # 4a2b45c6 AmpTask1858_Implement_buildmeister_workflows_in_invoke (#1860) + hdbg.dassert_lte(2, len(txt)) + first_commit_hash = txt[1].split()[0] + _LOG.debug("first_commit: '%s'", txt[1]) + _LOG.debug(hprint.to_str("first_commit_hash")) + # Find all the files touched in each branch. + cmd = f"git diff --name-only {first_commit_hash}..HEAD" + _, txt = hsystem.system_to_string(cmd) + files: List[str] = txt.split("\n") + finally: + os.chdir(old_dir) + _LOG.debug("Files modified since the integration=\n%s", "\n".join(files)) + # Filter files by subdir, if needed. + if subdir: + filtered_files = [] + for file in files: + if file.startswith(subdir): + filtered_files.append(file) + files = filtered_files + # Reorganize the files. + hdbg.dassert_no_duplicates(files) + files = sorted(files) + # Save to file for debugging. + file_name = os.path.join( + f"tmp.integrate_find_files_touched_since_last_integration.{dir_basename}.txt" + ) + hio.to_file(file_name, "\n".join(files)) + _LOG.debug("Saved file to '%s'", file_name) + return files + + +@task +def integrate_find_files_touched_since_last_integration( # type: ignore + ctx, + subdir="", +): + """ + Print the list of files modified since the last integration for this dir. + """ + hlitauti.report_task() + abs_dir = os.getcwd() + _ = ctx + files = _find_files_touched_since_last_integration(abs_dir, subdir) + # Print the result. + tag = "Files modified since the integration" + print(hprint.frame(tag)) + print("\n".join(files)) + + +# ############################################################################# + + +def _integrate_files( + files: Set[str], + abs_left_dir: str, + abs_right_dir: str, + only_different_files: bool, +) -> List[Tuple[str, str, str]]: + """ + Build a list of files to compare based on the pattern. + + :param files: relative path of the files to compare :param + abs_left_dir, abs_right_dir: path of the left / right dir + :param only_different_files: include in the script only the files + that are different + :return: list of files to compare + """ + _LOG.debug(hprint.to_str("abs_left_dir abs_right_dir only_different_files")) + files_to_diff: List[Tuple[str, str, str]] = [] + for file in sorted(list(files)): + _LOG.debug(hprint.to_str("file")) + left_file = os.path.join(abs_left_dir, file) + right_file = os.path.join(abs_right_dir, file) + # Check if both the files exist and are the same. + both_exist = os.path.exists(left_file) and os.path.exists(right_file) + if not both_exist: + # Both files don't exist: nothing to do. + equal: Optional[bool] = False + skip: Optional[bool] = True + else: + # They both exist. + if only_different_files: + # We want to check if they are the same. + try: + equal = hio.from_file(left_file) == hio.from_file(right_file) + except RuntimeError as e: + # RuntimeError: error='utf-8' codec can't decode byte 0xd0 in + # position 10: invalid continuation byte + _LOG.error("Caught error:\n%s", e) + equal = True + skip = equal + else: + # They both exist, and we want to process even if they are the + # same. + equal = None + skip = False + _ = left_file, right_file, both_exist, equal, skip + _LOG.debug(hprint.to_str("left_file right_file both_exist equal skip")) + # Execute the action on the 2 files. + if skip: + _LOG.debug(" Skip %s", file) + else: + _LOG.debug(" -> (%s, %s)", left_file, right_file) + files_to_diff.append((file, left_file, right_file)) + return files_to_diff + + +@task +def integrate_files( # type: ignore + ctx, + src_dir_basename=DEFAULT_SRC_DIR_BASENAME, + dst_dir_basename=DEFAULT_DST_DIR_BASENAME, + reverse=False, + subdir="", + mode="vimdiff", + file_direction="", + only_different_files=True, + check_branches=True, +): + """ + Find and copy the files that are touched only in one branch or in both. + + :param ctx: invoke ctx + :param src_dir_basename: dir with the source branch (e.g., amp1) + :param dst_dir_basename: dir with the destination branch (e.g., cmamp1) + :param reverse: switch the roles of the default source and destination branches + :param subdir: directory to select + :param mode: + - "print_dirs": print the directories + - "vimdiff": diff the files + - "copy": copy the files + :param file_direction: which files to diff / copy: + - "common_files": files touched in both branches + - "union_files": files touched in either branch + - "only_files_in_src": files touched only in the src dir + - "only_files_in_dst": files touched only in the dst dir + :param only_different_files: consider only the files that are different among + the branches + :param check_branches: ensure that the current branches are for integration + and not `master` + """ + hlitauti.report_task() + _ = ctx + if reverse: + src_dir_basename, dst_dir_basename = dst_dir_basename, src_dir_basename + _LOG.warning( + "Reversing dirs: %s", + hprint.to_str2(src_dir_basename, dst_dir_basename), + ) + # Check that the integration branches are in the expected state. + _dassert_current_dir_matches(src_dir_basename) + # We want to stay at the top level dir, since the subdir is handled by + # `integrate_find_files_touched_since_last_integration`. + abs_src_dir, abs_dst_dir = _resolve_src_dst_names( + src_dir_basename, dst_dir_basename, subdir="" + ) + if check_branches: + _dassert_is_integration_branch(abs_src_dir) + _dassert_is_integration_branch(abs_dst_dir) + else: + _LOG.warning("Skipping integration branch check") + # Find the files touched in each branch since the last integration. + src_files = set( + _find_files_touched_since_last_integration(abs_src_dir, subdir) + ) + dst_files = set( + _find_files_touched_since_last_integration(abs_dst_dir, subdir) + ) + # + if file_direction == "common_files": + files = src_files.intersection(dst_files) + elif file_direction == "only_files_in_src": + files = src_files - dst_files + elif file_direction == "only_files_in_dst": + files = dst_files - src_files + elif file_direction == "union_files": + files = src_files.union(dst_files) + else: + raise ValueError(f"Invalid file_direction='{file_direction}'") + # + files_to_diff = _integrate_files( + files, + abs_src_dir, + abs_dst_dir, + only_different_files, + ) + # Print the files. + print(hprint.frame(file_direction)) + _LOG.debug(hprint.to_str("files_to_diff")) + files_set = list(zip(*files_to_diff)) + if not files_set: + _LOG.warning("No file found: skipping") + return + files_set = sorted(list(files_set[0])) + txt = "\n".join(files_set) + print(hprint.indent(txt)) + # Process the files touched. + if mode == "print_dirs": + files_lst = [] + for file, left_file, right_file in files_to_diff: + dir_name = os.path.dirname(file) + # Skip empty dir, e.g., for `pytest.ini`. + if dir_name != "": + files_lst.append(dir_name) + files_lst = sorted(list(set(files_lst))) + print(hprint.frame("Dirs changed")) + print("\n".join(files_lst)) + else: + # Build the script with the operations to perform. + if mode == "copy" and file_direction == "only_files_in_dst": + raise ValueError("Can't copy files from destination") + script_txt = [] + for file, left_file, right_file in files_to_diff: + if mode == "copy": + cmd = f"cp -f {left_file} {right_file}" + elif mode == "vimdiff": + cmd = f"vimdiff {left_file} {right_file}" + else: + raise ValueError(f"Invalid mode='{mode}'") + _LOG.debug(" -> %s", cmd) + script_txt.append(cmd) + script_txt = "\n".join(script_txt) + # Execute / save the script. + if mode == "copy": + for cmd in script_txt.split("\n"): + hsystem.system(cmd) + elif mode == "vimdiff": + # Save the diff script. + script_file_name = f"./tmp.vimdiff.{file_direction}.sh" + hio.create_executable_script(script_file_name, script_txt) + print(f"# To diff run:\n> {script_file_name}") + else: + raise ValueError(f"Invalid mode='{mode}'") + + +@task +def integrate_find_files( # type: ignore + ctx, + subdir="", +): + """ + Find the files that are touched in the current branch since last + integration. + """ + hlitauti.report_task() + _ = ctx + # + abs_src_dir = "." + abs_src_dir = os.path.normpath(abs_src_dir) + hdbg.dassert_dir_exists(abs_src_dir) + # Find the files touched in each branch since the last integration. + src_files = sorted( + _find_files_touched_since_last_integration(abs_src_dir, subdir) + ) + print("* Files touched:\n" + "\n".join(src_files)) + + +@task +def integrate_diff_overlapping_files( # type: ignore + ctx, src_dir_basename, dst_dir_basename, subdir="" +): + """ + Find the files modified in both branches `src_dir_basename` and + `dst_dir_basename` Compare these files from HEAD to master version before + the branch point. + + This is used to check what changes were made to files modified by + both branches. + """ + hlitauti.report_task() + _ = ctx + # Check that the integration branches are in the expected state. + _dassert_current_dir_matches(src_dir_basename) + # When we integrate a dir that doesn't exist in the dst branch, we need to + # skip the check for existence. + check_exists = False + src_dir_basename, dst_dir_basename = _resolve_src_dst_names( + src_dir_basename, dst_dir_basename, subdir, check_exists=check_exists + ) + _dassert_is_integration_branch(src_dir_basename) + _dassert_is_integration_branch(dst_dir_basename) + _clean_both_integration_dirs(src_dir_basename, dst_dir_basename) + # Find the files modified in both branches. + src_hash = hgit.get_branch_hash(src_dir_basename) + _LOG.info("src_hash=%s", src_hash) + dst_hash = hgit.get_branch_hash(dst_dir_basename) + _LOG.info("dst_hash=%s", dst_hash) + diff_files1 = os.path.abspath("./tmp.files_modified1.txt") + diff_files2 = os.path.abspath("./tmp.files_modified2.txt") + cmd = f"cd {src_dir_basename} && git diff --name-only {src_hash} HEAD >{diff_files1}" + hsystem.system(cmd) + cmd = f"cd {dst_dir_basename} && git diff --name-only {dst_hash} HEAD >{diff_files2}" + hsystem.system(cmd) + common_files = "./tmp.common_files.txt" + cmd = f"comm -12 {diff_files1} {diff_files2} >{common_files}" + hsystem.system(cmd) + # Get the base files to diff. + files = hio.from_file(common_files).split("\n") + files = [f for f in files if f != ""] + _LOG.info("Found %d files to diff:\n%s", len(files), "\n".join(files)) + # Retrieve the original file and create the diff command. + script_txt = [] + for src_file in files: + hdbg.dassert_file_exists(src_file) + # TODO(gp): Add function to add a suffix to a name, using + # os.path.dirname(), os.path.basename(), os.path.split_extension(). + dst_file = src_file.replace(".py", ".base.py") + # Save the base file. + cmd = f"git show {src_hash}:{src_file} >{dst_file}" + rc = hsystem.system(cmd, abort_on_error=False) + if rc == 0: + # The file was created: nothing to do. + pass + elif rc == 128: + # Note that the file potentially could not exist, i.e., it was added + # in the branch. In this case Git returns: + # ``` + # rc=128 fatal: path 'dataflow/pipelines/real_time/test/ + # test_dataflow_pipelines_real_time_pipeline.py' exists on disk, but + # not in 'ce54877016204315766e90df7c45192bec1fbf20' + src_file = "/dev/null" + else: + raise ValueError(f"cmd='{cmd}' returned {rc}") + # Update the script to diff. + script_txt.append(f"vimdiff {dst_file} {src_file}") + # Save the script to compare. + script_file_name = "./tmp.vimdiff_overlapping_files.sh" + script_txt = "\n".join(script_txt) + hio.create_executable_script(script_file_name, script_txt) + print(f"# To diff against the base run:\n> {script_file_name}") + + +# ############################################################################# + + +def _infer_dst_file_path( + src_file_path: str, + *, + default_src_dir_basename: str = DEFAULT_SRC_DIR_BASENAME, + default_dst_dir_basename: str = DEFAULT_DST_DIR_BASENAME, + check_exists: bool = True, +) -> Tuple[str, str]: + """ + Convert a file path across two dirs with the same data structure. + + E.g., + `.../src/cmamp1/.../test_data_snapshots/alpha_numeric_data_snapshots` + is converted into + `.../src/amp1/.../test_data_snapshots/alpha_numeric_data_snapshots` + """ + _LOG.debug(hprint.to_str("src_file_path")) + src_file_path = os.path.normpath(src_file_path) + if check_exists: + hdbg.dassert_path_exists(src_file_path) + # Extract the repo dir name, by looking for one of the default basenames. + target_dir = f"/{default_dst_dir_basename}/" + idx = src_file_path.find(target_dir) + if idx >= 0: + src_dir_basename = default_dst_dir_basename + dst_dir_basename = default_src_dir_basename + subdir = src_file_path[idx + len(target_dir) :] + else: + target_dir = f"/{default_src_dir_basename}/" + idx = src_file_path.find(target_dir) + if idx >= 0: + src_dir_basename = default_src_dir_basename + dst_dir_basename = default_dst_dir_basename + subdir = src_file_path[idx + len(target_dir) :] + else: + raise ValueError( + f"Can't find either '{default_src_dir_basename}' or " + f"'{default_dst_dir_basename}' in file_path=" + f"'{src_file_path}'" + ) + # Replace src dir (e.g., `cmamp1`) with dst dir (e.g., `amp1`). + dst_file_path = src_file_path.replace( + f"/{src_dir_basename}/", f"/{dst_dir_basename}/" + ) + _LOG.debug(hprint.to_str("dst_file_path subdir")) + if check_exists: + hdbg.dassert_path_exists(dst_file_path) + return dst_file_path, subdir + + +@task +def integrate_rsync( # type: ignore + ctx, + src_dir, + src_dir_basename=DEFAULT_SRC_DIR_BASENAME, + dst_dir_basename=DEFAULT_DST_DIR_BASENAME, + dst_dir="", + check_dir=True, + dry_run=False, +): + """ + Use `rsync` to bring two dirs to sync. + + E.g., + ``` + > invoke integrate_diff_dirs + ... + ... Only in .../cmamp1/.../alpha_numeric_data_snapshots: alpha + ... Only in .../amp1/.../alpha_numeric_data_snapshots: latest + + # Accept the `cmamp1` side vs the `amp1` side with: + > invoke integrate_rsync .../cmamp1/.../alpha_numeric_data_snapshots/ + ``` + + :param src_dir: dir to be used. If empty, it is inferred from file_name + :param dst_dir: dir to be used. If empty, it is inferred from file_name + :param check_dir: force checking that src_dir and dst_dir are valid + integration dirs + :param dry_run: print the system command instead of executing them + """ + hlitauti.report_task() + _ = ctx + src_dir = os.path.normpath(src_dir) + hdbg.dassert_path_exists(src_dir) + _LOG.info(hprint.to_str("src_dir")) + if check_dir: + _dassert_is_integration_branch(src_dir) + # Resolve the dst dir. + if dst_dir == "": + dst_dir, _ = _infer_dst_file_path( + src_dir, + default_src_dir_basename=src_dir_basename, + default_dst_dir_basename=dst_dir_basename, + ) + if check_dir: + _dassert_is_integration_branch(dst_dir) + dst_dir = os.path.normpath(dst_dir) + hdbg.dassert_path_exists(dst_dir) + _LOG.info(hprint.to_str("dst_dir")) + # + _LOG.info("Syncing:\n'%s'\nto\n'%s'", src_dir, dst_dir) + cmd = f"rsync --delete -a -r {src_dir}/ {dst_dir}/" + hsystem.system(cmd, log_level=logging.INFO, dry_run=dry_run) + + +@task +def integrate_file( # type: ignore + ctx, + file_name, + src_dir_basename=DEFAULT_SRC_DIR_BASENAME, + dst_dir_basename=DEFAULT_DST_DIR_BASENAME, + dry_run=False, +): + """ + Diff corresponding files in two different repos. + + ``` + # The path is assumed referred to current dir. + > i integrate_file --file-name helpers/lib_tasks_integrate.py + + > i integrate_file --file-name /Users/saggese/src/kaizenflow1/helpers/lib_tasks_integrate.py + + > i integrate_file \ + --file-name helpers/lib_tasks_integrate.py \ + --src-dir-name cmamp1 + --dst-dir-name kaizenflow1 + ``` + + :param file_name: it can be a full path (e.g., + `/Users/saggese/src/kaizenflow1/helpers/lib_tasks_integrate.py`) + or a relative path to the root of the Git repo (e.g., + `helpers/lib_tasks_integrate.py) + :param dst_dir: dir to be used. If empty, it is inferred from file_name + :param check_dir: force checking that src_dir and dst_dir are valid + integration dirs + :param dry_run: print the system command instead of executing them + """ + hlitauti.report_task() + _ = ctx + file_name = os.path.normpath(file_name) + hdbg.dassert_file_exists(file_name) + # If the file is in the current dir, we need to prepend the dir name. + if not file_name.startswith("/"): + file_name = os.path.join(os.getcwd(), file_name) + _LOG.info(hprint.to_str("file_name")) + # Resolve the src / dst dir, if needed. + dst_file_name, _ = _infer_dst_file_path( + file_name, + default_src_dir_basename=src_dir_basename, + default_dst_dir_basename=dst_dir_basename, + ) + _LOG.info(hprint.to_str("file_name dst_file_name")) + # + _LOG.info("Syncing:\n'%s'\nto\n'%s'", file_name, dst_file_name) + cmd = f"vimdiff {file_name} {dst_file_name}" + # We need to use `system` to get vimdiff to connect to stdin and stdout. + if not dry_run: + # hlitauti.run(ctx, cmd, dry_run=dry_run, print_cmd=True) + os.system(cmd) + + +# Compare the timestamp of last modification of a file. +# FILE=helpers/lib_tasks_git.py; (cd ~/src/cmamp1; git log -1 $FILE); (cd ~/src/kaizenflow1; git log -1 $FILE) + +# > git log --pretty=format:"%h - %an, %ad : %s" --date=short | grep _Integrate_ | head -5 +# fffa1c8b2 - GP Saggese, 2023-06-30 : AmpTask1786_Integrate_20230627_7 (#367) +# 5a05a0c94 - GP Saggese, 2023-06-29 : AmpTask1786_Integrate_20230627_6 (#365) +# 6c3ad7d87 - GP Saggese, 2023-06-29 : AmpTask1786_Integrate_20230627_5 (#364) +# 36abfd8b3 - GP Saggese, 2023-06-28 : AmpTask1786_Integrate_20230627_3 (#361) +# 65fe42d38 - GP Saggese, 2023-06-28 : AmpTask1786_Integrate_20230627_2 (#360) + +# In Sorr +# GIT_INTEGR_HASH=fffa1c8b2 +# fffa1c8b2 - GP Saggese, 2023-06-30 : AmpTask1786_Integrate_20230627_7 (#367) + +# In cmamp +# 20526ed09 - GP Saggese, 2023-08-10 : AmpTask1786_Integrate_20230810_2 (#5011) + +# Show files changed since an integration point +# > git diff --name-only $GIT_INTEGR_HASH dataflow_amp +# dataflow_amp/system/mock1/test/test_mock1_forecast_system.py + +# Show the difference since an integration point +# git difftool $GIT_INTEGR_HASH.. dataflow_amp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py new file mode 100644 index 000000000..a3599f2da --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py @@ -0,0 +1,443 @@ +""" +Import as: + +import helpers.lib_tasks_lint as hlitalin +""" + +import datetime +import filecmp +import logging +import os + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.lib_tasks_docker as hlitadoc +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +# ############################################################################# +# Linter. +# ############################################################################# + + +@task +def lint_check_python_files_in_docker( # type: ignore + ctx, + python_compile=True, + python_execute=True, + modified=False, + branch=False, + last_commit=False, + all_=False, + files="", +): + """ + Compile and execute Python files checking for errors. + + This is supposed to be run inside Docker. + + The params have the same meaning as in `_get_files_to_process()`. + """ + hlitauti.report_task() + _ = ctx + # We allow to filter through the user specified `files`. + mutually_exclusive = False + remove_dirs = True + file_list = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files, + mutually_exclusive, + remove_dirs, + ) + _LOG.debug("Found %d files:\n%s", len(file_list), "\n".join(file_list)) + # Filter keeping only Python files. + _LOG.debug("Filtering for Python files") + exclude_paired_jupytext = True + file_list = hio.keep_python_files(file_list, exclude_paired_jupytext) + _LOG.debug("file_list=%s", "\n".join(file_list)) + _LOG.info("Need to process %d files", len(file_list)) + if not file_list: + _LOG.warning("No files were selected") + # Scan all the files. + failed_filenames = [] + for file_name in file_list: + _LOG.info("Processing '%s'", file_name) + if python_compile: + import compileall + + success = compileall.compile_file(file_name, force=True, quiet=1) + _LOG.debug("file_name='%s' -> python_compile=%s", file_name, success) + if not success: + msg = f"'{file_name}' doesn't compile correctly" + _LOG.error(msg) + failed_filenames.append(file_name) + # TODO(gp): Add also `python -c "import ..."`, if not equivalent to `compileall`. + if python_execute: + cmd = f"python {file_name}" + rc = hsystem.system(cmd, abort_on_error=False, suppress_output=False) + _LOG.debug("file_name='%s' -> python_compile=%s", file_name, rc) + if rc != 0: + msg = f"'{file_name}' doesn't execute correctly" + _LOG.error(msg) + failed_filenames.append(file_name) + hprint.log_frame( + _LOG, + f"failed_filenames={len(failed_filenames)}", + verbosity=logging.INFO, + ) + _LOG.info("\n".join(failed_filenames)) + error = len(failed_filenames) > 0 + return error + + +@task +def lint_check_python_files( # type: ignore + ctx, + python_compile=True, + python_execute=True, + modified=False, + branch=False, + last_commit=False, + all_=False, + files="", +): + """ + Compile and execute Python files checking for errors. + + The params have the same meaning as in `_get_files_to_process()`. + """ + _ = ( + python_compile, + python_execute, + modified, + branch, + last_commit, + all_, + files, + ) + # Execute the same command line but inside the container. E.g., + # /Users/saggese/src/venv/amp.client_venv/bin/invoke lint_docker_check_python_files --branch + cmd_line = hdbg.get_command_line() + # Replace the full path of invoke with just `invoke`. + cmd_line = cmd_line.split() + cmd_line = ["/venv/bin/invoke lint_check_python_files_in_docker"] + cmd_line[ + 2: + ] + docker_cmd_ = " ".join(cmd_line) + cmd = f'invoke docker_cmd --cmd="{docker_cmd_}"' + hlitauti.run(ctx, cmd) + + +def _get_lint_docker_cmd( + base_image: str, + docker_cmd_: str, + stage: str, + version: str, + *, + use_entrypoint: bool = True, +) -> str: + """ + Create a command to run in Linter service. + + :param docker_cmd_: command to run + :param stage: the image stage to use + :return: the full command to run + """ + if base_image == "": + base_path = os.environ["CSFY_ECR_BASE_PATH"] + # Get an image to run the linter on. + linter_image = f"{base_path}/helpers" + else: + linter_image = base_image + _LOG.debug(hprint.to_str("linter_image")) + # Execute command line. + cmd: str = hlitadoc._get_docker_compose_cmd( + linter_image, + stage, + version, + docker_cmd_, + use_entrypoint=use_entrypoint, + ) + return cmd + + +@task +def lint_detect_cycles( # type: ignore + ctx, + dir_name=".", + stage="prod", + version="", + out_file_name="lint_detect_cycles.output.txt", + debug_tool=False, +): + """ + Detect cyclic imports in the directory files. + + For param descriptions, see `lint()`. + + :param dir_name: the name of the dir to detect cyclic imports in + - By default, the check will be carried out in the dir from where + the task is run + :param debug_tool: print the output of the cycle detector + """ + hlitauti.report_task() + # Remove the log file. + if os.path.exists(out_file_name): + cmd = f"rm {out_file_name}" + hlitauti.run(ctx, cmd) + # Prepare the command line. + docker_cmd_opts = [dir_name] + if debug_tool: + docker_cmd_opts.append("-v DEBUG") + docker_cmd_ = ( + "$(find -wholename '*import_check/detect_import_cycles.py') " + + hlitauti._to_single_line_cmd(docker_cmd_opts) + ) + # Execute command line. + base_image = "" + cmd = _get_lint_docker_cmd(base_image, docker_cmd_, stage, version) + # Use `PIPESTATUS` otherwise the exit status of the pipe is always 0 + # because writing to a file succeeds. + cmd = f"({cmd}) 2>&1 | tee -a {out_file_name}; exit $PIPESTATUS" + # Run. + hlitauti.run(ctx, cmd) + + +# pylint: disable=line-too-long +@task +def lint( # type: ignore + ctx, + base_image="", + stage="prod", + version="", + files="", + from_file="", + skip_files="", + dir_name="", + modified=False, + last_commit=False, + branch=False, + # It needs to be a string to allow the user to specify "serial". + num_threads="serial", + only_format=False, + only_check=False, +): + """ + Lint files. + + ``` + # To lint specific files: + > i lint --files="dir1/file1.py dir2/file2.md" + + # To lint the files changed in the last commit, excluding specific files: + > i lint --last-commit --skip-files="dir1/file1.py dir2/file2.md" + + # To lint all the files in the current dir using only formatting actions: + > i lint --dir-name . --only-format + + # To lint the files modified in the current git client: + > i lint --modified + + # To exclude certain paths from linting: + > i lint --files="$(find . -name '*.py' -not -path './compute/*' -not -path './amp/*')" + ``` + + :param stage: the image stage to use (e.g., "prod", "dev", "local") + :param version: the version of the container to use + :param files: specific files to lint (e.g. "dir1/file1.py dir2/file2.md") + :param from_file: specific file storing files to lint + :param skip_files: specific files to skip during linting (e.g. "dir1/file1.py dir2/file2.md") + :param dir_name: name of the dir where all files should be linted + :param modified: lint the files modified in the current git client + :param last_commit: lint the files modified in the previous commit + :param branch: lint the files modified in the current branch w.r.t. master + :param num_threads: number of threads to use ("serial", -1, 0, 1, 2, ...) + :param only_format: run only the modifying actions of Linter (e.g., black) + :param only_check: run only the non-modifying actions of Linter (e.g., pylint) + """ + # Check if the user is in a repo root. + hdbg.dassert( + hgit.is_cwd_git_repo(), + msg="Linter should run from repo root", + ) + hlitauti.report_task() + # Prepare the command line. + lint_cmd_opts = [] + # Add the file selection argument. + hdbg.dassert_eq( + int(len(files) > 0) + + int(len(from_file) > 0) + + int(len(dir_name) > 0) + + int(modified) + + int(last_commit) + + int(branch), + 1, + msg="Specify exactly one among --files, --from_file, --dir-name, --modified, --last-commit, --branch", + ) + if len(files) > 0: + lint_cmd_opts.append(f"--files {files}") + elif len(from_file) > 0: + lint_cmd_opts.append(f"--from_file {from_file}") + elif len(dir_name) > 0: + lint_cmd_opts.append(f"--dir_name {dir_name}") + elif modified: + lint_cmd_opts.append("--modified") + elif last_commit: + lint_cmd_opts.append("--last_commit") + elif branch: + lint_cmd_opts.append("--branch") + else: + raise ValueError("No file selection arguments are specified") + if len(skip_files) > 0: + lint_cmd_opts.append(f"--skip_files {skip_files}") + # + lint_cmd_opts.append(f"--num_threads {num_threads}") + # Add the action selection argument, if needed. + hdbg.dassert_lte( + int(only_format) + int(only_check), + 1, + msg="Specify only one among --only-format, --only-check", + ) + if only_format: + lint_cmd_opts.append("--only_format") + elif only_check: + lint_cmd_opts.append("--only_check") + else: + _LOG.info("All Linter actions selected") + # Compose the command line. + if hserver.is_host_mac(): + find_cmd = "$(find . -path '*linters/base.py')" + else: + find_cmd = "$(find -wholename '*linters/base.py')" + lint_cmd_ = find_cmd + " " + hlitauti._to_single_line_cmd(lint_cmd_opts) + docker_cmd_ = _get_lint_docker_cmd( + base_image, lint_cmd_, stage=stage, version=version + ) + # Run. + hlitauti.run(ctx, docker_cmd_) + + +@task +def lint_check_if_it_was_run(ctx): # type: ignore + """ + Check if the linter was run in the current branch. + + - abort the task with error if the files were modified + """ + hlitauti.report_task() + # Check if the files were modified. + hgit.is_client_clean(abort_if_not_clean=True) + + +@task +def lint_create_branch(ctx, dry_run=False): # type: ignore + """ + Create the branch for linting in the current dir. + + The dir needs to be specified to ensure the set-up is correct. + """ + hlitauti.report_task() + # + date = datetime.datetime.now().date() + date_as_str = date.strftime("%Y%m%d") + branch_name = f"AmpTask1955_Lint_{date_as_str}" + # query_yes_no("Are you sure you want to create the branch '{branch_name}'") + _LOG.info("Creating branch '%s'", branch_name) + cmd = f"invoke git_branch_create --branch-name '{branch_name}'" + hlitauti.run(ctx, cmd, dry_run=dry_run) + + +@task +def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): # type: ignore + """ + Sync code needed to run linter / ai_review from a Git client to the current one. + + :param git_client_name: the name of the Git client to sync from. It can be + something like "helpers1" and it will be used from "$HOME/src" or can + be a full path. + :param revert_to_original: if `True`, revert the changes to the original + """ + _ = ctx + hlitauti.report_task() + # Copy the code from the src Git client to the current one. + src_git_dir = hgit.resolve_git_client_dir(git_client_name) + # + files_to_copy = [ + # "hgit.py", + # "hmarkdown.py", + "llm_prompts.py", + "llm_transform.py", + "inject_todos.py", + "all.coding_style_guidelines.reference.md", + ] + # Revert the files in the current git client to the original code. + if revert_to_original: + _LOG.debug("Reverting to original code ...") + for file_name in files_to_copy: + _LOG.debug("Reverting %s to original code", file_name) + src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) + git_root_dir = hgit.find_git_root(src_git_dir) + src_file_path = os.path.relpath(src_file_path, git_root_dir) + cmd = "git checkout -- " + src_file_path + hsystem.system(cmd) + _LOG.info("Done") + return + # Get the path to the helpers repo. + src_helpers_dir = hgit.find_helpers_root(src_git_dir) + hdbg.dassert_ne(src_helpers_dir, "") + hdbg.dassert_dir_exists(src_helpers_dir) + # + dst_helpers_dir = hgit.find_helpers_root() + hdbg.dassert_dir_exists(dst_helpers_dir) + _LOG.debug(hprint.to_str("src_helpers_dir dst_helpers_dir")) + # + _LOG.info( + "Copying files from '%s' to '%s' ...", src_helpers_dir, dst_helpers_dir + ) + # Find the files to copy. + for file_name in files_to_copy: + _LOG.debug(hprint.to_str("file_name")) + # Get the path to the file in the src Git client. + src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) + src_file_path = os.path.abspath(os.path.join(src_git_dir, src_file_path)) + _LOG.debug(hprint.to_str("src_file_path")) + hdbg.dassert_file_exists(src_file_path) + # Get the path to the file in the dst Git client. + dst_file_path = hgit.project_file_name_in_git_client( + src_file_path, src_helpers_dir, dst_helpers_dir + ) + _LOG.debug(hprint.to_str("dst_file_path")) + # Copy the file. + _LOG.debug(hprint.to_str("src_file_path dst_file_path")) + dir_name = os.path.dirname(dst_file_path) + # Check that the files are different. + if os.path.exists(src_file_path) and os.path.isdir(dst_file_path): + if filecmp.cmp(src_file_path, dst_file_path, shallow=False): + _LOG.info( + "File '%s' is identical to '%s', skipping", + src_file_path, + dst_file_path, + ) + continue + # Copy the file. + hio.create_dir(dir_name, incremental=True) + cmd = f"cp -f {src_file_path} {dst_file_path}" + _LOG.debug(hprint.to_str("cmd")) + _LOG.info("Copying file '%s' to '%s' ...", src_file_path, dst_file_path) + hsystem.system(cmd) + _LOG.info("Done") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py new file mode 100644 index 000000000..215820d4d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py @@ -0,0 +1,380 @@ +""" +Import as: + +import helpers.lib_tasks_perms as hlitaper +""" + +import grp +import logging +import os +import pwd +import stat +from typing import Dict, List, Tuple + +import tqdm +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# ############################################################################# +# Fix permission +# ############################################################################# + + +# The desired invariants are that all files +# 1) are owned by our user or by Docker user +# 2) have the shared group as group +# 3) have the same user and group permissions + +# E.g., +# -rw-rw-r-- 1 sasm sasm-fileshare 21877 Nov 3 18:11 pytest_logger.log + +# The possible problems are: +# -r--r--r-- 1 sasm sasm-fileshare ./.git/objects/02/4df16f66c87bdfb +# -rw-r--r-- 1 265533 sasm-fileshare ./core_lime/dataflow/nodes/test/te +# -rw-rw-r-- 1 265533 sasm-fileshare ./research/real_time/notebooks/Lim + +# drwxr-sr-x 2 gsaggese sasm-fileshare 35 Oct 12 21:51 test +# chmod g=u amp/dev_scripts/git/git_hooks/test + + +def _save_dir_status(dir_name: str, filename: str) -> None: + cmd = f'find {dir_name} -name "*" | sort | xargs ls -ld >{filename}' + hsystem.system(cmd) + _LOG.info("Saved dir status in %s", filename) + + +# From https://stackoverflow.com/questions/1830618 +def _get_user_group(filename: str) -> Tuple[str, str]: + """ + Return the symbolic name of user and group of a file. + """ + uid = os.stat(filename).st_uid + try: + user = pwd.getpwuid(uid).pw_name + except KeyError as e: + # _LOG.warning("Error: ", str(e)) + _ = e + user = str(uid) + # + gid = os.stat(filename).st_gid + try: + group = grp.getgrgid(gid).gr_name + except KeyError as e: + _ = e + group = str(gid) + return user, group + + +def _find_files_for_user(dir_name: str, user: str, is_equal: bool) -> List[str]: + """ + Find all the files under `abs_dir` that are owned or not by `user`. + """ + _LOG.debug("") + mode = "\\!" if not is_equal else "" + cmd = f'find {dir_name} -name "*" {mode} -user "{user}"' + _, txt = hsystem.system_to_string(cmd) + files: List[str] = txt.split("\n") + return files + + +def _find_files_for_group( + dir_name: str, group: str, is_equal: bool +) -> List[str]: + """ + Find all the files under `abs_dir` that are owned by a group `group`. + """ + _LOG.debug("") + mode = "\\!" if not is_equal else "" + cmd = f'find {dir_name} -name "*" {mode} -group "{group}"' + _, txt = hsystem.system_to_string(cmd) + files: List[str] = txt.split("\n") + return files + + +def _compute_stats_by_user_and_group(dir_name: str) -> Tuple[Dict, Dict, Dict]: + """ + Scan all the files reporting statistics in terms of users and groups. + + It also compute a mapping from file to user and group. + """ + _LOG.debug("") + # Find all files. + cmd = f'find {dir_name} -name "*"' + _, txt = hsystem.system_to_string(cmd) + files = txt.split("\n") + # Get the user of each file. + user_to_files: Dict[str, List[str]] = {} + group_to_files: Dict[str, List[str]] = {} + file_to_user_group: Dict[str, Tuple[str, str]] = {} + for file in files: + user, group = _get_user_group(file) + # Update mapping from user to files. + if user not in user_to_files: + user_to_files[user] = [] + user_to_files[user].append(file) + # Update mapping from group to files. + if group not in group_to_files: + group_to_files[group] = [] + group_to_files[group].append(file) + # Update the mapping from file to (user, group). + hdbg.dassert_not_in(file, file_to_user_group) + file_to_user_group[file] = (user, group) + # Print stats. + txt1 = "" + for user, files in user_to_files.items(): + txt1 += f"{user}({len(files)}), " + _LOG.info("user=%s", txt1) + # + txt2 = "" + for group, files in group_to_files.items(): + txt2 += f"{group}({len(files)}), " + _LOG.info("group=%s", txt2) + return user_to_files, group_to_files, file_to_user_group + + +def _ls_l(files: List[str], size: int = 100) -> str: + """ + Run `ls -l` on the files using chunks of size `size`. + """ + txt = [] + for pos in range(0, len(files), size): + files_tmp = files[pos : pos + size] + files_tmp = [f"'{f}'" for f in files_tmp] + cmd = f"ls -ld {' '.join(files_tmp)}" + _, txt_tmp = hsystem.system_to_string(cmd) + txt.append(txt_tmp) + return "\n".join(txt) + + +def _exec_cmd_by_chunks( + cmd: str, files: List[str], abort_on_error: bool, size: int = 100 +) -> None: + """ + Execute `cmd` on files using chunks of size `size`. + """ + for pos in range(0, len(files), size): + files_tmp = files[pos : pos + size] + files_tmp = [f"'{f}'" for f in files_tmp] + cmd = f"{cmd} {' '.join(files_tmp)}" + hsystem.system(cmd, abort_on_error=abort_on_error) + + +def _print_problems(dir_name: str = ".") -> None: + """ + Do `ls -l` on files that are not owned by the current user and its group. + + This function is used for debugging. + """ + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + user = hsystem.get_user_name() + docker_user = hserver.get_docker_user() + # user_group = f"{user}_g" + # shared_group = hserver.get_docker_shared_group() + files_with_problems = [] + for file, (curr_user, curr_group) in file_to_user_group.items(): + _ = curr_user, curr_group + # Files owned by our user and + # if curr_user == user and curr_group == user_group: + # continue + if curr_user in (user, docker_user): + continue + # if curr_group == shared_group: + # continue + files_with_problems.append(file) + # + txt = _ls_l(files_with_problems) + print(txt) + + +def _change_file_ownership(file: str, abort_on_error: bool) -> None: + """ + Change ownership of files with an invalid user (e.g., 265533) by copying + and deleting. + """ + # pylint: disable=line-too-long + # > ls -l ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py + # -rw-r--r-- 1 265533 sasm-fileshare 14327 Nov 3 14:01 ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py + # + # > mv ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py{,.OLD} + # + # > cp ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py{.OLD,} + # + # > ls -l ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py + # -rw-r--r-- 1 gsaggese sasm-fileshare 14327 Nov 5 17:58 ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py + # + # > rm -rf ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py.OLD + # pylint: enable=line-too-long + hdbg.dassert_file_exists(file) + tmp_file = file + ".OLD" + # + cmd = f"mv {file} {tmp_file}" + hsystem.system(cmd, abort_on_error=abort_on_error) + # + cmd = f"cp {tmp_file} {file}" + hsystem.system(cmd, abort_on_error=abort_on_error) + # + cmd = f"rm -rf {tmp_file}" + hsystem.system(cmd, abort_on_error=abort_on_error) + + +def _fix_invalid_owner(dir_name: str, fix: bool, abort_on_error: bool) -> None: + """ + Fix files that are owned by a user that is not the current user or the + Docker one. + """ + _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) + # + _LOG.info("Before fix") + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + # + user = hsystem.get_user_name() + docker_user = hserver.get_docker_user() + for file, (curr_user, _) in tqdm.tqdm(file_to_user_group.items()): + if curr_user not in (user, docker_user): + _LOG.info("Fixing file '%s'", file) + hdbg.dassert_file_exists(file) + cmd = f"ls -l {file}" + hsystem.system( + cmd, abort_on_error=abort_on_error, suppress_output=False + ) + if fix: + _change_file_ownership(file, abort_on_error) + # + _LOG.info("After fix") + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + + +def _fix_group(dir_name: str, fix: bool, abort_on_error: bool) -> None: + """ + Ensure that all files are owned by the shared group. + """ + _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) + _LOG.info("Before fix") + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + if fix: + # Get the user and the group. + user = hsystem.get_user_name() + user_group = f"{user}_g" + shared_group = hserver.get_docker_shared_group() + # + for file, (curr_user, curr_group) in file_to_user_group.items(): + # If the group is the shared group there is nothing to do. + if curr_group == shared_group: + continue + cmd = f"chgrp {shared_group} {file}" + if curr_user == user: + # This is a paranoia check. + hdbg.dassert_eq(curr_group, user_group) + else: + # For files not owned by the current user, we need to `sudo`. + cmd = f"sudo -u {curr_user} {cmd}" + hsystem.system(cmd, abort_on_error=abort_on_error) + _LOG.info("After fix") + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + else: + _LOG.warning("Skipping fix") + + +def _fix_group_permissions(dir_name: str, abort_on_error: bool) -> None: + """ + Ensure that all files are owned by the shared group. + """ + _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) + _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) + user = hsystem.get_user_name() + # docker_user = get_default_param("DOCKER_USER") + for file, (curr_user, curr_group) in tqdm.tqdm(file_to_user_group.items()): + _ = curr_group + st_mode = os.stat(file).st_mode + perms = oct(st_mode & 0o777) + # perms=0o775 + if perms[2] != perms[3]: + _LOG.debug("%s -> %s, %s", file, oct(st_mode), perms) + cmd = f"chmod g=u {file}" + if curr_user != user: + # For files not owned by the current user, we need to `sudo`. + cmd = f"sudo -u {curr_user} {cmd}" + hsystem.system(cmd, abort_on_error=abort_on_error) + is_dir = os.path.isdir(file) + if is_dir: + # pylint: disable=line-too-long + # From https://www.gnu.org/software/coreutils/manual/html_node/Directory-Setuid-and-Setgid.html + # If a directory + # inherit the same group as the directory, + # pylint: enable=line-too-long + has_set_group_id = st_mode & stat.S_ISGID + if not has_set_group_id: + cmd = f"chmod g+s {file}" + if curr_user != user: + # For files not owned by the current user, we need to `sudo`. + cmd = f"sudo -u {curr_user} {cmd}" + hsystem.system(cmd, abort_on_error=abort_on_error) + + +@task +def fix_perms( # type: ignore + ctx, dir_name=".", action="all", fix=True, abort_on_error=True +): + """ + :param action: + - `all`: run all the fixes + - `print_stats`: print stats about file users and groups + - `print_problems`: + - `fix_invalid_owner`: fix the files with an invalid owner (e.g., mysterious + 265533) + - `fix_group`: ensure that shared group owns all the files + - `fix_group_permissions`: ensure that the group permissions are the same + as the owner ones + """ + _ = ctx + hlitauti.report_task() + # + if hserver.is_dev4(): + if action == "all": + action = ["fix_invalid_owner", "fix_group", "fix_group_permissions"] + else: + action = [action] + # + file_name1 = "./tmp.fix_perms.before.txt" + _save_dir_status(dir_name, file_name1) + # + if "print_stats" in action: + _compute_stats_by_user_and_group(dir_name) + if "print_problems" in action: + _print_problems(dir_name) + if "fix_invalid_owner" in action: + _fix_invalid_owner(dir_name, fix, abort_on_error) + if "fix_group" in action: + _fix_group(dir_name, fix, abort_on_error) + if "fix_group_permissions" in action: + _fix_group_permissions(dir_name, abort_on_error) + # + file_name2 = "./tmp.fix_perms.after.txt" + _save_dir_status(dir_name, file_name2) + # + cmd = f"To compare run:\n> vimdiff {file_name1} {file_name2}" + print(cmd) + elif hserver.is_dev_csfy(): + user = hsystem.get_user_name() + group = user + cmd = f"sudo chown -R {user}:{group} *" + hsystem.system(cmd) + cmd = f"sudo chown -R {user}:{group} .pytest_cache" + hsystem.system(cmd, abort_on_error=False) + elif hserver.is_external_dev(): + # Nothing to do. + pass + else: + raise ValueError(f"Invalid machine {os.uname()[1]}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py new file mode 100644 index 000000000..512c09a60 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py @@ -0,0 +1,103 @@ +""" +Import as: + +import helpers.lib_tasks_print as hlitapri +""" + +import logging +import os +import re + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.henv as henv +import helpers.hgit as hgit +import helpers.hsystem as hsystem +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + +# ############################################################################# +# Set-up. +# ############################################################################# + + +@task +def print_setup(ctx): # type: ignore + """ + Print some configuration variables. + """ + hlitauti.report_task() + _ = ctx + var_names = "CSFY_ECR_BASE_PATH BASE_IMAGE".split() + for v in var_names: + print(f"{v}={hlitauti.get_default_param(v)}") + + +@task +def print_tasks(ctx, as_code=False): # type: ignore + """ + Print all the available tasks in `lib_tasks.py`. + + These tasks might be exposed or not by different. + + :param as_code: print as python code so that it can be embed in a + `from helpers.lib_tasks import ...` + """ + hlitauti.report_task() + _ = ctx + func_names = [] + lib_tasks_file_name = os.path.join( + hgit.get_amp_abs_path(), "helpers/lib_tasks.py" + ) + hdbg.dassert_file_exists(lib_tasks_file_name) + # TODO(gp): Use __file__ instead of hardwiring the file. + cmd = rf'\grep "^@task" -A 1 {lib_tasks_file_name} | grep def' + # def print_setup(ctx): # type: ignore + # def git_pull(ctx): # type: ignore + # def git_fetch_master(ctx): # type: ignore + _, txt = hsystem.system_to_string(cmd) + for line in txt.split("\n"): + _LOG.debug("line=%s", line) + m = re.match(r"^def\s+(\S+)\(", line) + if m: + func_name = m.group(1) + _LOG.debug(" -> %s", func_name) + func_names.append(func_name) + func_names = sorted(func_names) + if as_code: + print("\n".join([f"{fn}," for fn in func_names])) + else: + print("\n".join(func_names)) + + +@task +def print_env( + ctx, + repo_config=True, + server_config=True, + system_signature=True, + env_vars=True, +): # type: ignore + """ + Print the repo configuration. + """ + _ = ctx + print( + henv.env_to_str( + repo_config=repo_config, + server_config=server_config, + system_signature=system_signature, + env_vars=env_vars, + ) + ) + + +# TODO(gp): +# Print a CSV +# cat /share/data/cf_production/20221005/system_log_dir/process_forecasts/target_positions/20221005_153006.csv | column -t -s, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py new file mode 100644 index 000000000..98a9b203e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py @@ -0,0 +1,1743 @@ +""" +Import as: + +import helpers.lib_tasks_pytest as hlitapyt +""" + +import json +import logging +import os +import re +import sys +from typing import Any, List, Optional, Tuple + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hcoverage as hcovera +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hlist as hlist +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.htraceback as htraceb +import helpers.lib_tasks_docker as hlitadoc +import helpers.lib_tasks_lint as hlitalin +import helpers.lib_tasks_utils as hlitauti +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +# ############################################################################# +# Run tests. +# ############################################################################# + + +_COV_PYTEST_OPTS = [ + # Only compute coverage for current project and not venv libraries. + "--cov=.", + "--cov-branch", + # Report the missing lines. + # Name Stmts Miss Cover Missing + # ------------------------------------------------------------------------- + # myproj/__init__ 2 0 100% + # myproj/myproj 257 13 94% 24-26, 99, 149, 233-236, 297-298 + "--cov-report term-missing", + # Report data in the directory `htmlcov`. + "--cov-report html", + # "--cov-report annotate", +] + + +_TEST_TIMEOUTS_IN_SECS = { + "fast_tests": 5, + "slow_tests": 30, + "superslow_tests": 60 * 60, +} + + +_NUM_TIMEOUT_TEST_RERUNS = { + "fast_tests": 2, + "slow_tests": 1, + "superslow_tests": 1, +} + + +@task +def run_blank_tests(ctx, stage="dev", version=""): # type: ignore + """ + (ONLY CI/CD) Test that pytest in the container works. + """ + hlitauti.report_task() + _ = ctx + base_image = "" + cmd = '"pytest -h >/dev/null"' + docker_cmd_ = hlitadoc._get_docker_compose_cmd( + base_image, stage, version, cmd + ) + hsystem.system(docker_cmd_, abort_on_error=False, suppress_output=False) + + +def _select_tests_to_skip(test_list_name: str) -> str: + """ + Generate text for pytest specifying which tests to deselect. + """ + if test_list_name == "fast_tests": + skipped_tests = "not slow and not superslow" + elif test_list_name == "slow_tests": + skipped_tests = "slow and not superslow" + elif test_list_name == "superslow_tests": + skipped_tests = "not slow and superslow" + else: + raise ValueError(f"Invalid `test_list_name`={test_list_name}") + return skipped_tests + + +def _build_run_command_line( + test_list_name: str, + custom_marker: str, + pytest_opts: str, + skip_submodules: bool, + coverage: bool, + collect_only: bool, + tee_to_file: bool, + n_threads: str, + *, + allure_dir: Optional[str] = None, +) -> str: + """ + Build the pytest run command. + + E.g., + + ``` + pytest -m "optimizer and not slow and not superslow" \ + . \ + -o timeout_func_only=true \ + --timeout 5 \ + --reruns 2 \ + --only-rerun "Failed: Timeout" + ``` + + The rest of params are the same as in `run_fast_tests()`. + + The invariant is that we don't want to duplicate pytest options that can be + passed by the user through `-p` (unless really necessary). + + :param test_list_name: "fast_tests", "slow_tests" or + "superslow_tests" + :param custom_marker: specify a space separated list of + `pytest` markers to skip (e.g., `optimizer` for the optimizer + tests, see `pytest.ini`). Empty means no marker to skip + :param allure_dir: directory to save allure results to. If specified, allure + plugin will be installed on-the-fly and results will be generated + and saved to the specified directory + """ + hdbg.dassert_in( + test_list_name, _TEST_TIMEOUTS_IN_SECS, "Invalid test_list_name" + ) + pytest_opts = pytest_opts or "." + pytest_opts_tmp = [] + # Select tests to skip based on the `test_list_name` (e.g., fast tests) + # and on the custom marker, if present. + skipped_tests = _select_tests_to_skip(test_list_name) + timeout_in_sec = _TEST_TIMEOUTS_IN_SECS[test_list_name] + # Detect if we are running on a CK dev server / inside CI + # or a laptop outside the CK infra. + is_outside_ck_infra = ( + not hserver.is_dev_csfy() and not hserver.is_inside_ci() + ) + if is_outside_ck_infra: + timeout_multiplier = 10 + _LOG.warning( + f"Tests are running outside the CK server and CI, timeout increased {timeout_multiplier} times." + ) + # Since we are running outside the CK server we increase the duration + # of the timeout, since the thresholds are set for the CK server. + timeout_in_sec *= timeout_multiplier + if custom_marker != "": + pytest_opts_tmp.append(f'-m "{custom_marker} and {skipped_tests}"') + else: + pytest_opts_tmp.append(f'-m "{skipped_tests}"') + if pytest_opts: + pytest_opts_tmp.append(pytest_opts) + # Adding `timeout_func_only` is a workaround for + # https://github.com/pytest-dev/pytest-rerunfailures/issues/99. Because of + # it, we limit only run time, without setup and teardown time. + pytest_opts_tmp.append("-o timeout_func_only=true") + pytest_opts_tmp.append(f"--timeout {timeout_in_sec}") + num_reruns = _NUM_TIMEOUT_TEST_RERUNS[test_list_name] + pytest_opts_tmp.append( + f'--reruns {num_reruns} --only-rerun "Failed: Timeout"' + ) + if hserver.skip_submodules_test(): + # For some repos submodules should be skipped + # regardless of the passed value. + skip_submodules = True + if skip_submodules: + submodule_paths = hgit.get_submodule_paths() + _LOG.warning( + "Skipping %d submodules: %s", len(submodule_paths), submodule_paths + ) + pytest_opts_tmp.append( + " ".join([f"--ignore {path}" for path in submodule_paths]) + ) + if coverage: + pytest_opts_tmp.append(" ".join(_COV_PYTEST_OPTS)) + if collect_only: + _LOG.warning("Only collecting tests as per user request") + pytest_opts_tmp.append("--collect-only") + # Indicate the number of threads for parallelization. + if n_threads != "serial": + pytest_opts_tmp.append(f"-n {str(n_threads)}") + if allure_dir is not None: + pytest_opts_tmp.append(f"--alluredir={allure_dir}") + # Generate test report. + pytest_opts_tmp.append("--junit-xml=tmp.junit.xml") + # Add runnable dir image name to the test report. + image_name = hrecouti.get_repo_config().get_docker_base_image_name() + pytest_opts_tmp.append(f'-o junit_suite_name="{image_name}"') + # Concatenate the options. + _LOG.debug("pytest_opts_tmp=\n%s", str(pytest_opts_tmp)) + pytest_opts_tmp = [po for po in pytest_opts_tmp if po != ""] + # TODO(gp): Use to_multi_line_cmd() + pytest_opts = " ".join([po.rstrip().lstrip() for po in pytest_opts_tmp]) + cmd = f"pytest {pytest_opts}" + if allure_dir is not None: + # Install the `allure-pytest` before running the tests. This is needed + # to generate Allure results which serve as an input for generating + # Allure HTML reports. + # Excluding the command `"source /venv/bin/activate"` because post-activation, + # the `PATH` variable lacks necessary values, causing a failure in a test + # associated with `publish_notebook.py`. + cmd = f"sudo /venv/bin/pip install allure-pytest && {cmd}" + if tee_to_file: + cmd += f" 2>&1 | tee tmp.pytest.{test_list_name}.log" + return cmd + + +def _run_test_cmd( + ctx: Any, + stage: str, + version: str, + cmd: str, + coverage: bool, + collect_only: bool, + skip_pull: bool, + start_coverage_script: bool, + **ctx_run_kwargs: Any, +) -> Optional[int]: + """ + See params in `run_fast_tests()`. + """ + if collect_only: + # Clean files. + hlitauti.run(ctx, "rm -rf ./.coverage*") + # Run. + base_image = "" + # We need to add some " to pass the string as it is to the container. + cmd = f"'{cmd}'" + # We use "host" for the app container to allow access to the database + # exposing port 5432 on localhost (of the server), when running dind we + # need to switch back to bridge. See CmTask988. + extra_env_vars = ["NETWORK_MODE=bridge"] + docker_cmd_ = hlitadoc._get_docker_compose_cmd( + base_image, stage, version, cmd, extra_env_vars=extra_env_vars + ) + _LOG.info("cmd=%s", docker_cmd_) + # We can't use `hsystem.system()` because of buffering of the output, + # losing formatting and so on, so we stick to executing through `ctx`. + rc: Optional[int] = hlitadoc._docker_cmd( + ctx, docker_cmd_, skip_pull=skip_pull, **ctx_run_kwargs + ) + # Print message about coverage. + if coverage: + msg = """ + - The coverage results in textual form are above + + - To browse the files annotate with coverage, start a server (not from the + container): + > (cd ./htmlcov; python -m http.server 33333) + - Then go with your browser to `localhost:33333` to see which code is + covered + """ + msg = hprint.dedent(msg) + print(msg) + if start_coverage_script: + # Create and run a script to show the coverage in the browser. + script_txt = """ + (sleep 2; open http://localhost:33333) & + (cd ./htmlcov; python -m http.server 33333) + """ + script_txt = hprint.dedent(script_txt) + script_name = "./tmp.coverage.sh" + hio.create_executable_script(script_name, script_txt) + coverage_rc = hsystem.system(script_name) + if coverage_rc != 0: + _LOG.warning( + "Setting `rc` to `0` even though the coverage script fails." + ) + rc = 0 + return rc + + +def _run_tests( + ctx: Any, + test_list_name: str, + stage: str, + version: str, + custom_marker: str, + pytest_opts: str, + skip_pull: bool, + skip_submodules: bool, + coverage: bool, + collect_only: bool, + tee_to_file: bool, + n_threads: str, + git_clean_: bool, + *, + start_coverage_script: bool = False, + allure_dir: Optional[str] = None, + # TODO(Grisha): do we need to expose ctx kwargs to the invoke targets? + # E.g., to `run_fast_tests`. See CmTask3602 "All tests fail". + **ctx_run_kwargs: Any, +) -> Optional[int]: + """ + See params in `run_fast_tests()`. + """ + if git_clean_: + cmd = "invoke git_clean --fix-perms" + hlitauti.run(ctx, cmd) + # Build the command line. + cmd = _build_run_command_line( + test_list_name, + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + allure_dir=allure_dir, + ) + # Execute the command line. + rc = _run_test_cmd( + ctx, + stage, + version, + cmd, + coverage, + collect_only, + skip_pull, + start_coverage_script, + **ctx_run_kwargs, + ) + return rc + + +# TODO(Grisha): "Unit tests run_*_tests invokes" CmTask #1652. +@task +def run_tests( # type: ignore + ctx, + test_lists, + abort_on_first_error=False, + stage="dev", + version="", + custom_marker="", + pytest_opts="", + skip_pull=False, + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, + **kwargs, +): + """ + :param test_lists: comma separated list with test lists to run (e.g., `fast_test,slow_tests`) + :param abort_on_first_error: stop after the first test list failing + """ + results = [] + for test_list_name in test_lists.split(","): + rc = _run_tests( + ctx, + test_list_name, + stage, + version, + custom_marker, + pytest_opts, + skip_pull, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + warn=True, + allure_dir=allure_dir, + **kwargs, + ) + if rc != 0: + _LOG.error("'%s' tests failed", test_list_name) + if abort_on_first_error: + sys.exit(-1) + results.append((test_list_name, rc)) + # + rc = any(result[1] for result in results) + # Summarize the results. + _LOG.info("# Tests run summary:") + for test_list_name, rc in results: + if rc != 0: + _LOG.error("'%s' tests failed", test_list_name) + else: + _LOG.info("'%s' tests succeeded", test_list_name) + return rc + + +def _get_custom_marker( + *, + run_only_test_list: str = "", + skip_test_list: str = "", +) -> str: + """ + Get a custom pytest marker from comma-separated string representations of + test lists to run or skip. + + :param run_only_test_list: a string of comma-separated markers to + run, e.g. `run_only_test_list = + "requires_ck_infra,requires_aws"` + :param skip_test_list: a string of comma-separated markers to skip + :return: custom pytest marker + """ + # If we are running outside the CK server / CI, tests requiring CK infra + # should be automatically skipped. + is_outside_ck_infra = ( + not hserver.is_dev_csfy() and not hserver.is_inside_ci() + ) + # Skip tests that requires CK infra. + if is_outside_ck_infra: + _LOG.warning( + "Skipping the tests that require CK " + "infra when running outside the CK server / CI." + ) + if skip_test_list: + skip_test_list = "requires_ck_infra," + skip_test_list + else: + skip_test_list = "requires_ck_infra" + # Convert string representations of lists to actual lists. + if run_only_test_list: + # This works as expected when there is a single test in the list. + run_only_test_list_items = run_only_test_list.split(",") + _LOG.warning("Running only tests inside %s.", run_only_test_list_items) + else: + run_only_test_list_items = [] + if skip_test_list: + # This works as expected when there is a single test in the list. + skip_test_list_items = skip_test_list.split(",") + _LOG.warning("Skipping the tests inside %s.", skip_test_list_items) + else: + # The list can be empty when running inside CK infra. + skip_test_list_items = [] + # Convert marker strings for `pytest -m` using `and` and `not`. + run_only_marker_string = " and ".join(run_only_test_list_items) + skip_marker_string = " and ".join( + [("not " + item) for item in skip_test_list_items] + ) + if run_only_marker_string: + if skip_marker_string: + custom_marker = run_only_marker_string + " and " + skip_marker_string + else: + custom_marker = run_only_marker_string + else: + custom_marker = skip_marker_string + return custom_marker + + +# TODO(gp): Pass a test_list in fast, slow, ... instead of duplicating all the code CmTask #1571. +@task +def run_fast_tests( # type: ignore + ctx, + stage="dev", + version="", + pytest_opts="", + run_only_test_list="", + skip_test_list="", + skip_pull=False, + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run fast tests. check `gh auth status` before invoking to avoid auth + errors. + + :param stage: select a specific stage for the Docker image + :param pytest_opts: additional options for `pytest` invocation. It can be empty + :param run_only_test_list: select markers to run. Takes comma-separated tokens, + e.g. `--run_only_test_list = requires_ck_infra,requires_aws` + :param skip_test_list: select markers to skip. Takes comma-separated tokens. + :param skip_submodules: ignore all the dir inside a submodule + :param coverage: enable coverage computation + :param collect_only: do not run tests but show what will be executed + :param tee_to_file: save output of pytest in `tmp.pytest.log` + :param n_threads: the number of threads to run the tests with + - "auto": distribute the tests across all the available CPUs + :param git_clean_: run `invoke git_clean --fix-perms` before running the tests + :param allure_dir: directory to save allure results to. If specified, allure + plugin will be installed on-the-fly and results will be generated + and saved to the specified directory + """ + hlitauti.report_task() + hdbg.dassert( + not (run_only_test_list and skip_test_list), + "You can't specify both --run_only_test_list and --skip_test_list", + ) + test_list_name = "fast_tests" + # Convert cmd line marker lists to a pytest marker list. + custom_marker = _get_custom_marker( + run_only_test_list=run_only_test_list, skip_test_list=skip_test_list + ) + rc = _run_tests( + ctx, + test_list_name, + stage, + version, + custom_marker, + pytest_opts, + skip_pull, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir=allure_dir, + ) + return rc + + +@task +def run_slow_tests( # type: ignore + ctx, + stage="dev", + version="", + pytest_opts="", + run_only_test_list="", + skip_test_list="", + skip_pull=False, + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run slow tests. + + Same params as `invoke run_fast_tests`. + """ + hlitauti.report_task() + test_list_name = "slow_tests" + # Convert cmd line marker lists to a pytest marker list. + custom_marker = _get_custom_marker( + run_only_test_list=run_only_test_list, skip_test_list=skip_test_list + ) + rc = _run_tests( + ctx, + test_list_name, + stage, + version, + custom_marker, + pytest_opts, + skip_pull, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir=allure_dir, + ) + return rc + + +@task +def run_superslow_tests( # type: ignore + ctx, + stage="dev", + version="", + pytest_opts="", + run_only_test_list="", + skip_test_list="", + skip_pull=False, + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run superslow tests. + + Same params as `invoke run_fast_tests`. + """ + hlitauti.report_task() + test_list_name = "superslow_tests" + # Convert cmd line marker lists to a pytest marker list. + custom_marker = _get_custom_marker( + run_only_test_list=run_only_test_list, skip_test_list=skip_test_list + ) + rc = _run_tests( + ctx, + test_list_name, + stage, + version, + custom_marker, + pytest_opts, + skip_pull, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir=allure_dir, + ) + return rc + + +@task +def run_fast_slow_tests( # type: ignore + ctx, + abort_on_first_error=False, + stage="dev", + version="", + pytest_opts="", + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run fast and slow tests back-to-back. + + Same params as `invoke run_fast_tests`. + """ + hlitauti.report_task() + # Run fast tests but do not fail on error. + test_lists = "fast_tests,slow_tests" + custom_marker = "" + rc = run_tests( + ctx, + test_lists, + abort_on_first_error, + stage, + version, + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir, + ) + return rc + + +@task +def run_fast_slow_superslow_tests( # type: ignore + ctx, + abort_on_first_error=False, + stage="dev", + version="", + pytest_opts="", + skip_submodules=False, + coverage=False, + collect_only=False, + tee_to_file=False, + n_threads="serial", + git_clean_=False, + allure_dir=None, +): + """ + Run fast, slow, superslow tests back-to-back. + + Same params as `invoke run_fast_tests`. + """ + hlitauti.report_task() + # Run fast tests but do not fail on error. + test_lists = "fast_tests,slow_tests,superslow_tests" + custom_marker = "" + rc = run_tests( + ctx, + test_lists, + abort_on_first_error, + stage, + version, + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + git_clean_, + allure_dir, + ) + return rc + + +@task +def run_qa_tests( # type: ignore + ctx, + stage="dev", + version="", +): + """ + Run QA tests independently. + + :param version: version to tag the image and code with + :param stage: select a specific stage for the Docker image + """ + hlitauti.report_task() + # + qa_test_fn = hlitauti.get_default_param("QA_TEST_FUNCTION") + # Run the call back function. + rc = qa_test_fn(ctx, stage, version) + if not rc: + msg = "QA tests failed" + _LOG.error(msg) + raise RuntimeError(msg) + + +# ############################################################################# +# Coverage report +# ############################################################################# + + +def _publish_html_coverage_report_on_s3(aws_profile: str) -> None: + """ + Publish HTML coverage report on S3 so that it can be accessed via browser. + + Target S3 dir is constructed from linux user and Git branch name, e.g. + `s3://...-html/html_coverage/grisha_CmTask1047_fix_tests`. + """ + # Build the dir name from user and branch name. + user = hsystem.get_user_name() + branch_name = hgit.get_branch_name() + _LOG.debug("User='%s', branch_name='%s'", user, branch_name) + s3_html_coverage_dir = f"{user}_{branch_name}" + # Get the full path to the dir. + s3_html_base_dir = "html_coverage" + s3_html_bucket_path = hrecouti.get_repo_config().get_html_bucket_path() + s3_html_coverage_path = os.path.join( + s3_html_bucket_path, s3_html_base_dir, s3_html_coverage_dir + ) + # Copy HTML coverage data from the local dir to S3. + local_coverage_path = "./htmlcov" + # TODO(Nikola): Revert to `s3fs_.put` after `s3fs` is updated to latest + # version. See CmTask #2400. + use_aws_copy = True + if use_aws_copy: + sudo_prefix = "" + if hserver.is_inside_ci(): + # There is no AWS config in GH action, thus create default one from + # chosen profile. To bypass permission errors, `sudo` is used. + sudo_prefix = "sudo " + aws_set_param_cmd = "sudo aws configure set" + aws_set_profile_cmd = f"--profile {aws_profile}" + # TODO(Juraj): needed because ENV_VARS are now prefixed with + # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in + # CmTask11095. + # profile_prefix = aws_profile.upper() + profile_prefix = ( + "CSFY" + if aws_profile.upper() in ["AM", "CK"] + else aws_profile.upper() + ) + # Check if AWS session token is set in environment variable. + if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: + aws_set_value_pairs = [ + f"aws_access_key_id ${profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + f"aws_secret_access_key ${profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + f"aws_session_token ${profile_prefix}_AWS_SESSION_TOKEN", + f"region ${profile_prefix}_AWS_DEFAULT_REGION", + ] + else: + aws_set_value_pairs = [ + f"aws_access_key_id ${profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow + f"aws_secret_access_key ${profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow + f"region ${profile_prefix}_AWS_DEFAULT_REGION", + ] + aws_config_cmds = [ + f"{aws_set_param_cmd} {aws_set_value_pair} {aws_set_profile_cmd}" + for aws_set_value_pair in aws_set_value_pairs + ] + aws_config_pipe_cmd = " && ".join(aws_config_cmds) + hsystem.system(aws_config_pipe_cmd) + cp_cmd = ( + f"{sudo_prefix}aws s3 cp {local_coverage_path} {s3_html_coverage_path} " + f"--recursive --profile {aws_profile}" + ) + hsystem.system(cp_cmd) + else: + # Use `s3fs` to copy data to AWS S3. + s3fs_ = hs3.get_s3fs(aws_profile) + s3fs_.put(local_coverage_path, s3_html_coverage_path, recursive=True) + _LOG.info( + "HTML coverage report is published on S3: path=`%s`", + s3_html_coverage_path, + ) + + +@task +def run_coverage_report( # type: ignore + ctx, + target_dir, + generate_html_report=False, + publish_html_on_s3=True, + aws_profile="ck", +): + """ + Compute test coverage stats. + + The flow is: + - Run tests and compute coverage stats for each test type + - Combine coverage stats in a single file + - Generate a text report + - Generate a HTML report (optional) + - Post it on S3 (optional) + + :param target_dir: directory to compute coverage stats for. The value '.' + uses all the dirs in the current working directory + :param generate_html_report: whether to generate HTML coverage report or not + :param publish_html_on_s3: whether to publish HTML coverage report or not + :param aws_profile: the AWS profile to use for publishing HTML report + """ + # TODO(Grisha): allow user to specify which tests to run. + # Run fast tests for the target dir and collect coverage results. + fast_tests_cmd = f"invoke run_fast_tests --coverage -p {target_dir}" + hlitauti.run(ctx, fast_tests_cmd, use_system=False) + fast_tests_coverage_file = ".coverage_fast_tests" + create_fast_tests_file_cmd = f"mv .coverage {fast_tests_coverage_file}" + hsystem.system(create_fast_tests_file_cmd) + # Run slow tests for the target dir and collect coverage results. + slow_tests_cmd = f"invoke run_slow_tests --coverage -p {target_dir}" + hlitauti.run(ctx, slow_tests_cmd, use_system=False) + slow_tests_coverage_file = ".coverage_slow_tests" + create_slow_tests_file_cmd = f"mv .coverage {slow_tests_coverage_file}" + hsystem.system(create_slow_tests_file_cmd) + # Check that coverage files are present for both fast and slow tests. + hdbg.dassert_file_exists(fast_tests_coverage_file) + hdbg.dassert_file_exists(slow_tests_coverage_file) + # + report_cmd: List[str] = [] + # Clean the previous coverage results. For some docker-specific reasons + # command which combines stats does not work when being run first in + # the chain `bash -c "cmd1 && cmd2 && cmd3"`. So `erase` command which + # does not affect the coverage results was added as a workaround. + report_cmd.append("coverage erase") + # Merge stats for fast and slow tests into single dir. + report_cmd.append( + f"coverage combine --keep {fast_tests_coverage_file} {slow_tests_coverage_file}" + ) + # Specify the dirs to include and exclude in the report. + exclude_from_report = None + if target_dir == ".": + # Include all dirs. + include_in_report = "*" + if hserver.skip_submodules_test(): + # Exclude submodules. + submodule_paths = hgit.get_submodule_paths() + exclude_from_report = ",".join( + path + "/*" for path in submodule_paths + ) + else: + # Include only the target dir. + include_in_report = f"*/{target_dir}/*" + # Generate text report with the coverage stats. + report_stats_cmd = ( + f"coverage report --include={include_in_report} --sort=Cover" + ) + if exclude_from_report is not None: + report_stats_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_stats_cmd) + if generate_html_report: + # Generate HTML report with the coverage stats. + report_html_cmd = f"coverage html --include={include_in_report}" + if exclude_from_report is not None: + report_html_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_html_cmd) + # Execute commands above one-by-one inside docker. Coverage tool is not + # installed outside docker. + full_report_cmd = " && ".join(report_cmd) + docker_cmd_ = f"invoke docker_cmd --use-bash --cmd '{full_report_cmd}'" + hlitauti.run(ctx, docker_cmd_) + if publish_html_on_s3: + # Publish HTML report on S3. + _publish_html_coverage_report_on_s3(aws_profile) + + +def _get_inclusion_settings(target_dir: str) -> Tuple[str, Optional[str]]: + """ + Determine include/omit glob patterns for the coverage report for both text + and HTML coverage reports. + + :param target_dir: directory for coverage stats; use "." to indicate all directories + :return: glob pattern to include and a comma-separated glob pattern to omit + + Examples: + 1. Cover everything (no submodules to omit): + `_get_inclusion_settings(".")` -> `("*", "")` + + 2. Only cover code under a specific directory: + `_get_inclusion_settings("helpers")` -> `("*/helpers/*", None)` + + In `_run_coverage`: + - To cover the entire repo coverage (e.g. `helpers` project root): + `_get_inclusion_settings(".")` corresponds to + ``` + > coverage report --include=* --sort=Cover + > coverage html --include=* [--omit=submodule1/*,submodule2/*] + ``` + + - To cover a single-directory: + ` _get_inclusion_settings("helpers")` corresponds to: + ``` + > coverage report --include=*/helpers/* --sort=Cover + > coverage html --include=*/helpers/* [--omit=...] + ``` + """ + if target_dir == ".": + include_in_report = "*" + exclude_from_report = "" + if hserver.skip_submodules_test(): + submodule_paths: List[str] = hgit.get_submodule_paths() + exclude_from_report = ",".join( + f"{path}/*" for path in submodule_paths + ) + else: + include_in_report = f"*/{target_dir}/*" + exclude_from_report = None + return include_in_report, exclude_from_report + + +@task +def run_coverage(ctx, suite, target_dir=".", generate_html_report=False): # type: ignore + """ + Task to run coverage for any test suite. + + :param ctx: invoke context + :param suite: suite to run ("fast", "slow", "superslow") + :param target_dir: directory to measure coverage + """ + hdbg.dassert_in(suite, ("fast", "slow", "superslow")) + # Build the command line. + test_cmd_parts = [ + # Invoke the "_tests" task. + "invoke", + f"run_{suite}_tests", + # Enable coverage computation. + "--coverage", + # Specify which directory to test. + "-p", + target_dir, + ] + test_cmd = hlitauti.to_multi_line_cmd(test_cmd_parts) + # Run the tests under coverage. + hlitauti.run(ctx, test_cmd, use_system=False) + hdbg.dassert_file_exists(".coverage") + # Compute which files/dirs to include and omit in the report. + include_in_report, exclude_from_report = _get_inclusion_settings(target_dir) + report_cmd: List[str] = [ + # Reset any previous coverage data to avoid contamination. + "coverage erase" + ] + # Generate a text report, including only our target paths. + report_stats_cmd = ( + f"coverage report --include={include_in_report} --sort=Cover" + ) + if exclude_from_report: + report_stats_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_stats_cmd) + # Produce HTML output for interactive browsing. + if generate_html_report: + report_html_cmd = f"coverage html --include={include_in_report}" + if exclude_from_report: + report_html_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_html_cmd) + # Export XML coverage report to integrate with Codecov. + report_cmd.append("coverage xml -o coverage.xml") + full_report_cmd = " && ".join(report_cmd) + docker_cmd_ = f"invoke docker_cmd --use-bash --cmd '{full_report_cmd}'" + hlitauti.run(ctx, docker_cmd_) + + +@task +def run_coverage_subprocess(ctx, target_dir=".", generate_html_report=False): # type: ignore + """ + Run comprehensive coverage using subprocess mode with hcoverage injection + and direct coverage run. This function runs all tests (fast, slow, + superslow) to generate complete coverage. + + :param ctx: invoke context + :param target_dir: directory to measure coverage + :param generate_html_report: whether to generate HTML coverage + report or not + """ + _LOG.info("Running comprehensive test coverage with subprocess injection...") + # Inject coverage hooks. + hcovera.inject() + try: + # Setup coverage environment for subprocess. + hcovera.coverage_commands_subprocess() + # Clean any existing coverage data. + erase_cmd = "coverage erase" + hsystem.system(erase_cmd, abort_on_error=True) + # Build the coverage command with parallel mode - run all tests. + coverage_cmd = ["coverage", "run", "--parallel-mode", "-m", "pytest"] + # Add target directory. + coverage_cmd.append(target_dir) + test_cmd = hlitauti.to_multi_line_cmd(coverage_cmd) + _LOG.debug("About to run command: {test_cmd}") + # Run tests with coverage tracking directly. + hsystem.system(test_cmd, abort_on_error=True) + # Combine coverage data from subprocesses directly. + hcovera.coverage_combine() + hdbg.dassert_file_exists(".coverage") + include_in_report, exclude_from_report = _get_inclusion_settings( + target_dir + ) + include_in_report = include_in_report.replace("/./", "/").replace( + "//", "/" + ) + report_cmd: List[str] = [] + # Generate a text report, including only our target paths. + report_stats_cmd = ( + f"coverage report --include={include_in_report} --sort=Cover" + ) + if exclude_from_report: + exclude_from_report = exclude_from_report.replace( + "/./", "/" + ).replace("//", "/") + report_stats_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_stats_cmd) + if generate_html_report: + # Generate HTML report with the coverage stats. + report_html_cmd = f"coverage html --include={include_in_report}" + if exclude_from_report: + report_html_cmd += f" --omit={exclude_from_report}" + report_cmd.append(report_html_cmd) + # Export XML coverage report to integrate with Codecov. + report_cmd.append("coverage xml -o coverage.xml") + full_report_cmd = " && ".join(report_cmd) + # Run coverage report commands directly (avoid Docker-in-Docker issues). + hsystem.system(full_report_cmd, abort_on_error=True) + except Exception as e: + _LOG.error("Coverage with subprocess failed: %s", e) + raise + finally: + # Always cleanup coverage hooks. + hcovera.remove() + + +# ############################################################################# +# Traceback. +# ############################################################################# + + +# TODO(gp): Consolidate the code from dev_scripts_helpers/testing here. + + +@task +def traceback(ctx, log_name="tmp.pytest_script.txt", purify=True): # type: ignore + """ + Parse the traceback from Pytest and navigate it with vim. + + ``` + # Run a unit test. + > pytest helpers/test/test_traceback.py 2>&1 | tee tmp.pytest.log + > pytest.sh helpers/test/test_traceback.py + # Parse the traceback + > invoke traceback -i tmp.pytest.log + ``` + + :param log_name: the file with the traceback + :param purify: purify the filenames from client (e.g., from running inside Docker) + """ + hlitauti.report_task() + # + dst_cfile = "cfile" + hio.delete_file(dst_cfile) + # Convert the traceback into a cfile. + cmd = [] + cmd.append("traceback_to_cfile.py") + if log_name: + cmd.append(f"-i {log_name}") + cmd.append(f"-o {dst_cfile}") + # Purify the file names. + if purify: + cmd.append("--purify_from_client") + else: + cmd.append("--no_purify_from_client") + cmd = " ".join(cmd) + hlitauti.run(ctx, cmd) + # Read and navigate the cfile with vim. + if os.path.exists(dst_cfile): + cmd = 'vim -c "cfile cfile"' + hlitauti.run(ctx, cmd, pty=True) + else: + _LOG.warning("Can't find %s", dst_cfile) + + +# ############################################################################# +# pytest_clean +# ############################################################################# + + +@task +def pytest_clean(ctx): # type: ignore + """ + Clean pytest artifacts. + """ + hlitauti.report_task() + _ = ctx + import helpers.hpytest as hpytest + + hpytest.pytest_clean(".") + + +# ############################################################################# +# pytest_repro +# ############################################################################# + + +def _get_failed_tests_from_file(file_name: str) -> List[str]: + hdbg.dassert_file_exists(file_name) + txt = hio.from_file(file_name) + if file_name.endswith("/cache/lastfailed"): + # Decode the json-style string. + # { + # "vendors/test/test_vendors.py::Test_gp::test1": true, + # "vendors/test/test_vendors.py::Test_kibot_utils1::...": true, + # } + vals = json.loads(txt) + hdbg.dassert_isinstance(vals, dict) + tests = [k for k, v in vals.items() if v] + else: + # Extract failed tests from the regular text output. + tests = re.findall(r"FAILED (\S+\.py::\S+::\S+)\b", txt) + return tests + + +@task +def pytest_repro( # type: ignore + ctx, + mode="tests", + file_name="./.pytest_cache/v/cache/lastfailed", + show_stacktrace=False, + create_script=True, + script_name="./tmp.pytest_repro.sh", +): + """ + Generate commands to reproduce the failed tests after a `pytest` run. + + The workflow is: + ``` + # Run a lot of tests, e.g., the entire regression suite. + server> i run_fast_slow_tests 2>&1 | log pytest.txt + docker> pytest ... 2>&1 | log pytest.txt + + # Run the `pytest_repro` to summarize test failures and to generate + # commands to reproduce them. + server> i pytest_repro + ``` + + :param mode: the granularity level for generating the commands + - "tests" (default): failed test methods, e.g., + ``` + pytest helpers/test/test_cache.py::TestCachingOnS3::test_with_caching1 + pytest helpers/test/test_cache.py::TestCachingOnS3::test_with_caching2 + ``` + - "classes": classes of the failed tests, e.g., + ``` + pytest helpers/test/test_cache.py::TestCachingOnS3 + pytest helpers/test/test_cache.py::TestCachingOnS3_2 + ``` + - "files": files with the failed tests, e.g., + :param file_name: the name of the file containing the pytest output file to parse + :param show_stacktrace: whether to show the stacktrace of the failed tests + - only if it is available in the pytest output file + :param create_script: create a script to run the tests + :return: commands to reproduce pytest failures at the requested granularity level + """ + hlitauti.report_task() + _ = ctx + # Read file. + _LOG.info("Reading file_name='%s'", file_name) + hdbg.dassert_file_exists(file_name) + _LOG.info("Reading failed tests from file '%s'", file_name) + # E.g., vendors/test/test_vendors.py::Test_gp::test1 + tests = _get_failed_tests_from_file(file_name) + if len(tests) == 0: + _LOG.info("Found 0 failed tests") + return "" + _LOG.debug("tests=%s", str(tests)) + # Process the tests. + targets = [] + for test in tests: + data = test.split("::") + hdbg.dassert_lte(len(data), 3, "Can't parse '%s'", test) + # E.g., dev_scripts/testing/test/test_run_tests.py + # E.g., helpers/test/helpers/test/test_list.py::Test_list_1 + # E.g., core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5 + test_file_name = test_class = test_method = "" + if len(data) >= 1: + test_file_name = data[0] + if len(data) >= 2: + test_class = data[1] + if len(data) >= 3: + test_method = data[2] + _LOG.debug( + "test=%s -> (%s, %s, %s)", + test, + test_file_name, + test_class, + test_method, + ) + if mode == "tests": + targets.append(test) + elif mode == "files": + if test_file_name != "": + targets.append(test_file_name) + else: + _LOG.warning( + "Skipping test='%s' since test_file_name='%s'", + test, + test_file_name, + ) + elif mode == "classes": + if test_file_name != "" and test_class != "": + targets.append(f"{test_file_name}::{test_class}") + else: + _LOG.warning( + "Skipping test='%s' since test_file_name='%s', test_class='%s'", + test, + test_file_name, + test_class, + ) + else: + hdbg.dfatal(f"Invalid mode='{mode}'") + # Package the output. + # targets is a list of tests in the format + # `helpers/test/test_env.py::Test_env1::test_get_system_signature1`. + hdbg.dassert_isinstance(targets, list) + targets = hlist.remove_duplicates(targets) + targets = sorted(targets) + failed_test_output_str = ( + f"Found {len(targets)} failed pytest '{mode}' target(s); " + "to reproduce run:\n" + ) + res = [f"pytest {t}" for t in targets] + res = "\n".join(res) + failed_test_output_str += res + # + if show_stacktrace: + # Get the stacktrace block from the pytest output. + txt = hio.from_file(file_name) + if ( + "====== FAILURES ======" in txt + and "====== slowest 3 durations ======" in txt + ): + failures_blocks = txt.split("====== FAILURES ======")[1:] + failures_blocks = [ + x.split("====== slowest 3 durations ======")[0] + for x in failures_blocks + ] + txt = "\n".join([x.rstrip("=").lstrip("=") for x in failures_blocks]) + # Get the classes and names of the failed tests, e.g. + # "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5" -> + # -> "TestSmaModel.test5". + failed_test_names = [ + test.split("::")[1] + "." + test.split("::")[2] for test in tests + ] + tracebacks = [] + for name in failed_test_names: + # Get the stacktrace for the individual test failure. + # Its start is marked with the name of the test, e.g. + # "___________________ TestSmaModel.test5 ___________________". + start_block = "__ " + name + " __" + traceback_block = txt.rsplit(start_block, maxsplit=1)[-1] + end_block_options = [ + "__ " + n + " __" for n in failed_test_names if n != name + ] + for end_block in end_block_options: + # The end of the traceback for the current failed test is the + # start of the traceback for the next failed test. + if end_block in traceback_block: + traceback_block = traceback_block.split(end_block)[0] + _, traceback_ = htraceb.parse_traceback( + traceback_block, purify_from_client=False + ) + traceback_text = ( + traceback_.strip() if traceback_ is not None else "" + ) + tracebacks.append("\n".join(["# " + name, traceback_text, ""])) + # Combine the stacktraces for all the failures. + full_traceback = "\n\n" + "\n".join(tracebacks) + failed_test_output_str += full_traceback + res += full_traceback + _LOG.info("%s", failed_test_output_str) + if create_script: + # pytest \ + # amp/oms/test/test_portfolio.py::TestDatabasePortfolio2::test1 \ + # ... + # $* + script_txt = [] + # pytest or pytest_log + script_txt.append("pytest_log \\") + script_txt.extend([f" {t} \\" for t in targets]) + script_txt.append(" $*") + script_txt = "\n".join(script_txt) + msg = "To run the tests" + hio.create_executable_script(script_name, script_txt, msg=msg) + return res + + +# ############################################################################# +# pytest_rename_test +# ############################################################################# + + +@task +def pytest_rename_test(ctx, old_test_class_name, new_test_class_name): # type: ignore + """ + Rename the test and move its golden outcome. + + E.g., to rename a test class and all the test methods: + + :param old_test_class_name: old class name + :param new_test_class_name: new class name + """ + hlitauti.report_task() + _ = ctx + root_dir = os.getcwd() + # `lib_tasks` is used from outside the Docker container in the thin dev + # environment and we want to avoid pulling in too many dependencies, unless + # necessary, so we import dynamically. + import helpers.hunit_test_utils as hunteuti + + renamer = hunteuti.UnitTestRenamer( + old_test_class_name, new_test_class_name, root_dir + ) + renamer.run() + + +# ############################################################################# +# pytest_find_ununsed_goldens +# ############################################################################# + + +@task +def pytest_find_unused_goldens( # type: ignore + ctx, + dir_name=".", + stage="prod", + version="", + out_file_name="pytest_find_unused_goldens.output.txt", +): + """ + Detect mismatches between tests and their golden outcome files. + + - When goldens are required by the tests but the corresponding files + do not exist + - When the existing golden files are not actually required by the + corresponding tests + + :param dir_name: the head dir to start the check from + """ + hlitauti.report_task() + # Remove the log file. + if os.path.exists(out_file_name): + cmd = f"rm {out_file_name}" + hlitauti.run(ctx, cmd) + # Prepare the command line. + amp_abs_path = hgit.get_amp_abs_path() + amp_path = amp_abs_path.replace( + os.path.commonpath([os.getcwd(), amp_abs_path]), "" + ) + script_path = os.path.join( + amp_path, "dev_scripts/find_unused_golden_files.py" + ).lstrip("/") + docker_cmd_opts = [f"--dir_name {dir_name}"] + docker_cmd_ = f"{script_path} " + hlitauti._to_single_line_cmd( + docker_cmd_opts + ) + # Execute command line. + base_image = "" + cmd = hlitalin._get_lint_docker_cmd(base_image, docker_cmd_, stage, version) + cmd = f"({cmd}) 2>&1 | tee -a {out_file_name}" + # Run. + hlitauti.run(ctx, cmd) + + +# ############################################################################# +# pytest_compare_logs +# ############################################################################# + + +def _purify_log_file( + file_name: str, remove_line_numbers: bool, grep_regex: str +) -> str: + txt = hio.from_file(file_name) + # Remove leading `16:34:27`. + txt = re.sub(r"^\d\d:\d\d:\d\d ", "", txt, flags=re.MULTILINE) + # Remove references like `at 0x7f43493442e0`. + txt = re.sub(r"at 0x\S{12}", "at 0x", txt, flags=re.MULTILINE) + # Remove `done (0.014 s)`. + txt = re.sub(r"(done) \(\d+\.\d+ s\)", "\\1", txt, flags=re.MULTILINE) + # Remove wall_clock_time='2022-06-17 04:36:56.062645-04:00'. + txt = re.sub(r"(wall_clock_time=)'.*'", "\\1", txt, flags=re.MULTILINE) + # Remove `real_wall_clock_time = '2022-06-17 04:33:19.946025-04:00'`. + txt = re.sub(r"(real_wall_clock_time=)'.*'", "\\1", txt, flags=re.MULTILINE) + # Remove `tqdm [00:00<00:00, 4.05it/s]`. + txt = re.sub(r"(htqdm.py.*)\[.*\]", "\\1", txt, flags=re.MULTILINE) + # Remove `Task-3`. + txt = re.sub(r"(Task-)\d+", "\\1", txt, flags=re.MULTILINE) + # Remove line number, e.g., + # `htqdm.py abstract_market_data.py get_data_for_interval:259` + if remove_line_numbers: + txt = re.sub( + r"(\.py [a-zA-Z_][a-zA-Z0-9_]*):\d+ ", + "\\1:0 ", + txt, + flags=re.MULTILINE, + ) + # + if grep_regex: + lines = [] + for line in txt.split("\n"): + if re.search(grep_regex, line): + lines.append(line) + txt = "\n".join(lines) + return txt + + +@task +def pytest_compare_logs( # type: ignore + ctx, file1, file2, remove_line_numbers=False, grep_regex="", dry_run=False +): + """ + Diff two log files removing the irrelevant parts (e.g., timestamps, object + pointers). + + :param remove_line_numbers: remove line numbers from function calls + (e.g., `abstract_market_data.py get_data_for_interval:259` + :param grep_regex: select lines based on a regex + """ + suffix = "tmp" + # + txt = _purify_log_file(file1, remove_line_numbers, grep_regex) + file1_tmp = hio.add_suffix_to_filename(file1, suffix) + hio.to_file(file1_tmp, txt) + # + txt = _purify_log_file(file2, remove_line_numbers, grep_regex) + file2_tmp = hio.add_suffix_to_filename(file2, suffix) + hio.to_file(file2_tmp, txt) + # Save the script to compare. + script_file_name = "./tmp.vimdiff_log.sh" + script_txt = f"vimdiff {file1_tmp} {file2_tmp}" + msg = "To diff run:" + hio.create_executable_script(script_file_name, script_txt, msg=msg) + hlitauti.run(ctx, script_file_name, dry_run=dry_run, pty=True) + + +# ############################################################################# +# pytest_buildmeister +# ############################################################################# + + +def _run( + cmd: str, + *, + abort_on_error: bool = False, + output_file: Optional[str] = None, + tee: bool = False, +) -> int: + rc = hsystem.system( + cmd, + abort_on_error=abort_on_error, + suppress_output=False, + log_level="echo_frame", + output_file=output_file, + tee=tee, + ) + return rc + + +def _get_invoke_cmd_line(target: str, opts: str, pytest_opts: str) -> str: + """ + + :param opts: options to pass to invoke + """ + cmd = ["invoke"] + cmd.append(target) + if opts: + cmd.append(opts) + if pytest_opts: + cmd.append("--pytest-opts " + pytest_opts) + cmd.append("2>&1") + return " ".join(cmd) + + +def _run_cmd_and_tg(cmd: str, *args: Any, **kwargs: Any) -> None: + rc = _run(cmd, *args, **kwargs) + if rc != 0: + # pytest returns 5, if there are no tests to run. + # On error, send Telegram message. + cmd = "tg.py" + _run(cmd, abort_on_error=False) + + +@task +def pytest_buildmeister_check(ctx, print_output=False): # type: ignore + """ + + :param print_output: print content of the file with the output of the + buildmeister run + """ + _ = ctx + # Concat the files generated by `invoke pytest_...` + log_file = "bm.log.txt" + if os.path.exists(log_file): + cmd = f"rm -rf {log_file}" + _run(cmd) + log_file = "bm.log.txt" + cmd = 'cat $(find . -name "bm.log*.txt" | sort) >' + log_file + _run(cmd) + # + if print_output: + print(hprint.frame("Print output")) + cmd = f"cat {log_file}" + _run(cmd) + # Report failures using `invoke pytest_repro`. + print(hprint.frame("Failures")) + # "> sudo -u sasm rm ./tmp.pytest_repro.sh; i pytest_repro -f {log_file}" + if os.path.exists("./tmp.pytest_repro.sh"): + cmd = "sudo -u sasm rm ./tmp.pytest_repro.sh" + _run(cmd) + # + cmd = f"invoke pytest_repro -f {log_file}" + _run(cmd) + # Report failures using `grep`. + print(hprint.frame("grep Failures")) + cmd = f"grep '^FAILED' {log_file} | sort" + _run(cmd) + + +@task +def pytest_buildmeister( # type: ignore + ctx, opts="", pytest_opts="", docker_clean=False, test=False +): + """ + Run the regression tests. + + - Run updating all the tests + + :param docker_clean: remove all dead Docker instances + :param opts: options to pass to the invoke (e.g., `--version 1.2.0` to test + a specific version of the Docker container) + :param pytest_opts: options to pass to pytest + :param test: just run a single quick test to verify functionality of this + script + """ + _ = ctx + if test: + # For testing. + pytest_opts = "amp/dataflow/backtest/test/test_dataflow_backtest_utils.py::Test_get_configs_from_command_line_Amp1::test1" + if docker_clean: + cmd = "dev_scripts_lime/docker_clean.sh" + _run(cmd) + # Clean and sync. + cmd = "invoke git_clean -f" + _run(cmd) + # + cmd = "invoke git_pull" + _run(cmd) + # + log_file = "bm.log*txt" + if os.path.exists(log_file): + cmd = f"rm -rf {log_file}" + _run(cmd) + # + files_to_merge = [] + # + target = "run_fast_tests" + cmd = _get_invoke_cmd_line(target, opts, pytest_opts) + log_file = f"bm.log.{target}.txt" + files_to_merge.append(log_file) + cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" + cmd = f"bash -c '{cmd}'" + _run_cmd_and_tg(cmd) + # + cmd = "invoke fix_perms" + hsystem.system(cmd) + # + target = "run_slow_tests" + cmd = _get_invoke_cmd_line(target, opts, pytest_opts) + log_file = f"bm.log.{target}.txt" + files_to_merge.append(log_file) + cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" + cmd = f"bash -c '{cmd}'" + _run_cmd_and_tg(cmd) + # + cmd = "invoke fix_perms" + _run(cmd) + # + target = "run_superslow_tests" + log_file = f"bm.log.{target}.txt" + files_to_merge.append(log_file) + cmd = _get_invoke_cmd_line(target, opts, pytest_opts) + cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" + cmd = f"bash -c '{cmd}'" + _run_cmd_and_tg(cmd) + # + pytest_buildmeister_check(ctx) + + +# ############################################################################# +# pytest_collect_only +# ############################################################################# + + +@task +def pytest_collect_only(ctx): # type: ignore + _ = ctx + cmd = 'invoke docker_cmd --cmd "pytest --collect-only 2>&1"' + hsystem.system(cmd, suppress_output=False) + + +# ############################################################################# +# pytest_add_untracked_golden_outcomes +# ############################################################################# + + +@task +def pytest_add_untracked_golden_outcomes(ctx): # type: ignore + """ + Add the golden outcomes files that are not tracked under git. + """ + _ = ctx + cmd = 'git add $(git ls-files . --exclude-standard --others | grep "output" | grep -v tmp)' + hsystem.system(cmd, suppress_output=False) + + +# ############################################################################# +# pytest_failed +# ############################################################################# + + +def _parse_failed_tests( + txt: str, only_file: bool, only_class: bool +) -> Tuple[List[str], int, int]: + """ + Parse the failed tests from the pytest output. + + :param only_file: return only the file name + :param only_class: return only the class name + :return: + - failed_tests: list of failed tests + - num_failed: number of failed tests + - num_passed: number of passed tests + """ + hdbg.dassert_lte(only_file + only_class, 1) + failed_tests = [] + num_failed = num_passed = 0 + for line in txt.split("\n"): + # Remove non printable characters. + line = re.sub(r"[^\x20-\x7E]", "", line) + # FAILED oms/broker/ccxt/test/test_ccxt_execution_quality.py::Test_compute_adj_fill_ecdfs::test3 - RuntimeError: + m = re.search(r"^(FAILED|ERROR) (\S+) -", line) + if m: + test_name = m.group(2) + _LOG.debug("line=%s ->\n\ttest_name='%s'", line, test_name) + failed_tests.append(test_name) + # helpers_root/helpers/test/test_hserver.py::Test_hserver1::test_gp1 (0.00 s) PASSED [ 36%] + m = re.search(r"(\S+) \(\S+ s\) (FAILED|ERROR)", line) + if m: + test_name = m.group(1) + _LOG.debug("line=%s ->\n\ttest_name='%s'", line, test_name) + failed_tests.append(test_name) + # ============ 11 failed, 917 passed, 113 skipped in 64.57s (0:01:04) ============ + # ======================== 4 failed, 43 passed in 40.48s ========================= + m = re.search(r"=+\s+(\d+)\s+failed,\s+(\d+)\s+passed.*", line) + if m: + num_failed = int(m.group(1)) + num_passed = int(m.group(2)) + failed_tests = sorted(list(set(failed_tests))) + # + if num_failed and num_passed and num_failed != len(failed_tests): + _LOG.warning( + "n_failed=%s len(failed_tests)=%s", num_failed, len(failed_tests) + ) + print(f"Failed tests: {num_failed}/{num_passed}") + # Filter, if needed. + if only_file or only_class: + failed_tests_tmp = [] + for test in failed_tests: + # oms/broker/ccxt/test/test_ccxt_execution_quality.py::Test_compute_adj_fill_ecdfs::test3 + m = re.match(r"(\S+)::(\S+)::\S+$", test) + hdbg.dassert(m, f"Can't parse '{test}'") + if only_file: + failed_tests_tmp.append(m.group(1)) + elif only_class: + failed_tests_tmp.append(m.group(1) + "::" + m.group(2)) + else: + raise RuntimeError("Unexpected") + failed_tests = sorted(list(set(failed_tests_tmp))) + return failed_tests, num_failed, num_passed + + +@task +def pytest_failed( + ctx, only_file=False, only_class=False, file_name="tmp.pytest_script.txt" +): # type: ignore + _ = ctx + hlitauti.report_task() + # Read file. + txt = hio.from_file(file_name) + # Extract info. + failed_tests, _, _ = _parse_failed_tests(txt, only_file, only_class) + print("\n".join(failed_tests)) + # Write the repro in a file. + repro_file_name = "tmp.pytest_failed.sh" + repro_txt = "pytest_log " + " ".join(failed_tests) + " $*" + hio.to_file(repro_file_name, repro_txt) + # + hio.create_executable_script(repro_file_name, repro_txt) + _LOG.warning("To run the failed tests run: %s", repro_file_name) + # Save to clipboard. + txt = " ".join(failed_tests) + hsystem.to_pbcopy(txt, pbcopy=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py new file mode 100644 index 000000000..64d60a88b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py @@ -0,0 +1,397 @@ +""" +Import as: + +import helpers.lib_tasks_utils as hlitauti +""" + +import datetime +import glob +import logging +import os +import pprint +import re +import sys +from typing import Any, Dict, List, Optional, Union + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hversion as hversio + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Default params. +# ############################################################################# + +# This is used to inject the default params. +# TODO(gp): Using a singleton here is not elegant but simple. +_DEFAULT_PARAMS = {} + + +def set_default_params(params: Dict[str, Any]) -> None: + global _DEFAULT_PARAMS + _DEFAULT_PARAMS = params + _LOG.debug("Assigning:\n%s", pprint.pformat(params)) + + +def has_default_param(key: str) -> bool: + hdbg.dassert_isinstance(key, str) + return key in _DEFAULT_PARAMS + + +def get_default_param(key: str, *, override_value: Any = None) -> Any: + """ + Return the value from the default parameters dictionary, optionally + overriding it. + """ + hdbg.dassert_isinstance(key, str) + value = None + if has_default_param(key): + value = _DEFAULT_PARAMS[key] + if override_value: + _LOG.info("Overriding value %s with %s", value, override_value) + value = override_value + hdbg.dassert_is_not( + value, None, "key='%s' not defined from %s", key, _DEFAULT_PARAMS + ) + return value + + +def reset_default_params() -> None: + params: Dict[str, Any] = {} + set_default_params(params) + + +# ############################################################################# +# Utils. +# ############################################################################# + + +def parse_command_line() -> None: + # Since it's not easy to add global command line options to invoke, we + # piggy back the option that already exists. + # If one uses the debug option for `invoke` we turn off the code + # debugging. + # TODO(gp): Check http://docs.pyinvoke.org/en/1.0/concepts/library.html# + # modifying-core-parser-arguments + if ("-d" in sys.argv) or ("--debug" in sys.argv): + verbosity = logging.DEBUG + else: + verbosity = logging.INFO + # Suppress command line logging if only_print_files is requested. + report_command_line = "--only-print-files" not in sys.argv + hdbg.init_logger( + verbosity=verbosity, report_command_line=report_command_line + ) + + +# NOTE: We need to use a `# type: ignore` for all the @task functions because +# pyinvoke infers the argument type from the code and mypy annotations confuse +# it (see https://github.com/pyinvoke/invoke/issues/357). + +# In the following, when using `lru_cache`, we use functions from `hsyste` +# instead of `ctx.run()` since otherwise `lru_cache` would cache `ctx`. + +# We prefer not to cache functions running `git` to avoid stale values if we +# call git (e.g., if we cache Git hash and then we do a `git pull`). + +# pyinvoke `ctx.run()` is useful for unit testing, since it allows to: +# - mock the result of a system call +# - register the issued command line (to create the expected outcome of a test) +# On the other side `system_interaction.py` contains many utilities that make +# it easy to interact with the system. +# Once AmpPart1347 is implemented we can replace all the `ctx.run()` with calls +# to `system_interaction.py`. + + +_WAS_FIRST_CALL_DONE = False + + +# TODO(gp): This can be part of the @task +def report_task(txt: str = "", container_dir_name: str = ".") -> None: + """ + Print the task description. + + Each task should call this function at the beginning to print the + task name. + """ + # On the first invocation check the version of the container. + global _WAS_FIRST_CALL_DONE + if not _WAS_FIRST_CALL_DONE: + _WAS_FIRST_CALL_DONE = True + hversio.check_version(container_dir_name) + # Print the name of the function. + msg = hprint.func_signature_to_str( + skip_vars="ctx", assert_on_skip_vars_error=False, frame_level=3 + ) + print(hprint.color_highlight(msg, color="purple")) + + +# TODO(gp): Move this to helpers.system_interaction and allow to add the switch +# globally. +def _to_single_line_cmd(cmd: Union[str, List[str]]) -> str: + """ + Convert a multiline command (as a string or list of strings) into a single + line. + + E.g., convert + ``` + IMAGE=.../amp:dev \ + docker-compose \ + --file devops/compose/tmp.docker-compose.yml \ + --file devops/compose/tmp.docker-compose_as_submodule.yml \ + --env-file devops/env/default.env + ``` + into + ``` + IMAGE=.../amp:dev docker-compose --file ... + ``` + """ + if isinstance(cmd, list): + cmd = " ".join(cmd) + hdbg.dassert_isinstance(cmd, str) + cmd = cmd.rstrip().lstrip() + # Remove `\` at the end of the line. + cmd = re.sub(r" \\\s*$", " ", cmd, flags=re.MULTILINE) + # Use a single space between words in the command. + # TODO(gp): This is a bit dangerous if there are multiple spaces in a string + # that for some reason are meaningful. + cmd = " ".join(cmd.split()) + return cmd + + +def to_multi_line_cmd(docker_cmd_: List[str]) -> str: + r""" + Convert a command encoded as a list of strings into a single command + separated by `\`. + + E.g., convert + ``` + ['IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev', + '\n docker-compose', + '\n --file amp/devops/compose/tmp.docker-compose.yml', + '\n --file amp/devops/compose/tmp.docker-compose_as_submodule.yml', + '\n --env-file devops/env/default.env'] + ``` + into + ``` + IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ + docker-compose \ + --file devops/compose/tmp.docker-compose.yml \ + --file devops/compose/tmp.docker-compose_as_submodule.yml \ + --env-file devops/env/default.env + ``` + """ + # Expand all strings into single lines. + _LOG.debug("docker_cmd=%s", docker_cmd_) + docker_cmd_tmp = [] + for dc in docker_cmd_: + # Add a `\` at the end of each string. + hdbg.dassert(not dc.endswith("\\"), "dc='%s'", dc) + dc += " \\" + docker_cmd_tmp.extend(dc.split("\n")) + docker_cmd_ = docker_cmd_tmp + # Remove empty lines. + docker_cmd_ = [cmd for cmd in docker_cmd_ if cmd.rstrip().lstrip() != ""] + # Package the command. + result = "\n".join(docker_cmd_) + # Remove a `\` at the end, since it is not needed. + result = result.rstrip("\\") + _LOG.debug("docker_cmd=%s", result) + return result + + +# TODO(gp): Pass through command line using a global switch or an env var. +use_one_line_cmd = False + + +def run( + ctx: Any, + cmd: str, + *args: Any, + dry_run: bool = False, + use_system: bool = False, + print_cmd: bool = False, + **ctx_run_kwargs: Any, +) -> Optional[int]: + cmd = hprint.dedent(cmd) + _LOG.debug(hprint.to_str("cmd dry_run")) + if use_one_line_cmd: + cmd = _to_single_line_cmd(cmd) + _LOG.debug("cmd=%s", cmd) + if dry_run: + print(f"Dry-run: > {cmd}") + _LOG.warning("Skipping execution of '%s'", cmd) + res = None + else: + if print_cmd: + print(f"> {cmd}") + if use_system: + # TODO(gp): Consider using only `hsystem.system()` since it's more + # reliable. + res = hsystem.system(cmd, suppress_output=False) + else: + result = ctx.run(cmd, *args, **ctx_run_kwargs) + res = result.return_code + return res + + +# TODO(ai_gp): Use the one in ./helpers/hsystem.py +def _to_pbcopy(txt: str, pbcopy: bool) -> None: + """ + Save the content of txt in the system clipboard. + """ + txt = txt.rstrip("\n") + if not pbcopy: + print(txt) + return + if not txt: + print("Nothing to copy") + return + if hserver.is_host_mac(): + # -n = no new line + cmd = f"echo -n '{txt}' | pbcopy" + hsystem.system(cmd) + print(f"\n# Copied to system clipboard:\n{txt}") + else: + _LOG.warning("pbcopy works only on macOS") + print(txt) + + +def _filter_existing_paths(paths_from_user: List[str]) -> List[str]: + """ + Filter out the paths to non-existent files. + + :param paths_from_user: paths passed by user + :return: existing paths + """ + paths = [] + for user_path in paths_from_user: + if user_path.endswith("/*"): + # Get the files according to the "*" pattern. + dir_files = glob.glob(user_path) + if dir_files: + # Check whether the pattern matches files. + paths.extend(dir_files) + else: + _LOG.error( + ( + "'%s' pattern doesn't match any files: " + "the directory is empty or path does not exist" + ), + user_path, + ) + elif os.path.exists(user_path): + paths.append(user_path) + else: + _LOG.error("'%s' does not exist", user_path) + return paths + + +# TODO(gp): We should factor out the meaning of the params in a string and add it +# to all the tasks' help. +def _get_files_to_process( + modified: bool, + branch: bool, + last_commit: bool, + # TODO(gp): Pass abs_dir, instead of `all_` and remove the calls from the + # outer clients. + all_: bool, + files_from_user: str, + mutually_exclusive: bool, + remove_dirs: bool, +) -> List[str]: + """ + Get a list of files to process. + + The files are selected based on the switches: + - `branch`: changed in the branch + - `modified`: changed in the client (both staged and modified) + - `last_commit`: part of the previous commit + - `all`: all the files in the repo + - `files_from_user`: passed by the user + + :param modified: return files modified in the client (i.e., changed with + respect to HEAD) + :param branch: return files modified with respect to the branch point + :param last_commit: return files part of the previous commit + :param all: return all repo files + :param files_from_user: return files passed to this function + :param mutually_exclusive: ensure that all options are mutually exclusive + :param remove_dirs: whether directories should be processed + :return: paths to process + """ + _LOG.debug( + hprint.to_str( + "modified branch last_commit all_ files_from_user " + "mutually_exclusive remove_dirs" + ) + ) + if mutually_exclusive: + # All the options are mutually exclusive. + hdbg.dassert_eq( + int(modified) + + int(branch) + + int(last_commit) + + int(all_) + + int(len(files_from_user) > 0), + 1, + msg="Specify only one among --modified, --branch, --last-commit, " + "--all_files, and --files", + ) + else: + # We filter the files passed from the user through other the options, + # so only the filtering options need to be mutually exclusive. + hdbg.dassert_eq( + int(modified) + int(branch) + int(last_commit) + int(all_), + 1, + msg="Specify only one among --modified, --branch, --last-commit", + ) + dir_name = "." + if modified: + files = hgit.get_modified_files(dir_name) + elif branch: + files = hgit.get_modified_files_in_branch("master", dir_name) + elif last_commit: + files = hgit.get_previous_committed_files(dir_name) + elif all_: + pattern = "*" + only_files = True + use_relative_paths = True + files = hio.listdir(dir_name, pattern, only_files, use_relative_paths) + if files_from_user: + # If files were passed, filter out non-existent paths. + files = _filter_existing_paths(files_from_user.split()) + # Convert into a list. + hdbg.dassert_isinstance(files, list) + files_to_process = [f for f in files if f != ""] + # We need to remove `amp` to avoid copying the entire tree. + files_to_process = [f for f in files_to_process if f != "amp"] + _LOG.debug("files_to_process='%s'", str(files_to_process)) + # Remove dirs, if needed. + if remove_dirs: + files_to_process = hsystem.remove_dirs(files_to_process) + _LOG.debug("files_to_process='%s'", str(files_to_process)) + # Ensure that there are files to process. + if not files_to_process: + _LOG.warning("No files were selected") + return files_to_process + + +# Copied from helpers.datetime_ to avoid dependency from pandas. + + +def get_ET_timestamp() -> str: + # The timezone depends on how the shell is configured. + timestamp = datetime.datetime.now() + return timestamp.strftime("%Y%m%d_%H%M%S") + + +# End copy. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py new file mode 100644 index 000000000..631a68e5f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +""" +Import as: + +import helpers.logging_testing.logging_main as hlteloma +""" + +import logging +import sys +from typing import Union + +import helpers.hlogging as hloggin + +_LOG = logging.getLogger(__name__) +print(f"_LOG={_LOG}") + + +def install_basic_formatter() -> None: + # The output looks like + # ``` + # DEBUG:__main__: message + # ``` + logging.basicConfig() + + +def _install_formatter( + formatter: Union[hloggin.CustomFormatter, logging.Formatter], +) -> None: + root_logger_ = logging.getLogger() + ch = logging.StreamHandler(sys.stdout) + ch.setFormatter(formatter) + root_logger_.addHandler(ch) + + +def install_current_formatter() -> None: + date_fmt = "%m-%d_%H:%M" + log_format = ( + # 04-28_08:08 INFO : + "%(asctime)-5s %(levelname)-5s" + ) + log_format += ( + # lib_tasks _delete_branches + " %(module)-20s: %(funcName)-30s:" + # 142: ... + " %(lineno)-4d:" + " %(message)s" + ) + formatter = logging.Formatter(log_format, datefmt=date_fmt) + # + _install_formatter(formatter) + + +def install_custom_formatter() -> None: + formatter = hloggin.CustomFormatter() + _install_formatter(formatter) + + +if __name__ == "__main__": + # + print("\n# Installing formatter") + # install_basic_formatter() + # install_current_formatter() + install_custom_formatter() + # + print("\n# Loggers before setLevel") + root_logger = logging.getLogger() + print(f"root_logger={root_logger}") + # Show the loggers that have registered. + print(f"loggers={hloggin.get_all_loggers()}") + # + verbosity = logging.DEBUG + # verbosity = logging.ERROR + print(f"\n# Loggers after setLevel {verbosity}") + root_logger.setLevel(verbosity) + # Setting the verbosity for the root logger sets the verbosity for all the + # children ones. + print(f"root_logger={root_logger}") + print(f"loggers={hloggin.get_all_loggers()}") + # + hloggin.test_logger() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py new file mode 100644 index 000000000..ad88346fe --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py @@ -0,0 +1,10 @@ +""" +Import as: + +import helpers.logging_testing.logging_module as hltelomo +""" + +import logging + +_LOG = logging.getLogger(__name__) +print(f"_LOG={_LOG}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py new file mode 100644 index 000000000..5b0445a31 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py @@ -0,0 +1,17 @@ +import pathlib +from typing import Any, Optional + + +def pytest_ignore_collect( # type: ignore + collection_path: pathlib.Path, path: Any, config: Any +) -> Optional[bool]: + """ + Skip all tests in this directory. + + :param collection_path: path to analyze + :param path: path to analyze (deprecated) + :param config: pytest config object + :return: True if the path should be ignored + """ + # Ignore this directory and all its subdirectories. + return True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb new file mode 100644 index 000000000..7df18640d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb @@ -0,0 +1,638 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Test Cache in Jupyter Notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T16:23:59.696680Z", + "start_time": "2021-08-16T16:23:58.792511Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING\u001b[0m: Disabling annoying warnings\n", + "\u001b[0m\u001b[36mINFO\u001b[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-89e1d81f-7cff-47ee-9790-af936835f517.json'\n", + "\u001b[33mWARNING\u001b[0m: Running in Jupyter\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import logging\n", + "\n", + "import joblib\n", + "\n", + "import helpers.hcache as hcache\n", + "import helpers.hdbg as hdbg\n", + "import helpers.hs3 as hs3\n", + "\n", + "hnotebook.config_notebook()\n", + "\n", + "# hdbg.init_logger(verbosity=logging.DEBUG)\n", + "hdbg.init_logger(verbosity=logging.INFO)\n", + "# hdbg.test_logger()\n", + "_LOG = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "lines_to_next_cell": 2, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Define computation function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-15T12:07:59.739169Z", + "start_time": "2021-08-15T12:07:59.714831Z" + } + }, + "outputs": [], + "source": [ + "def func(a, b):\n", + " # hello\n", + " # assert 0\n", + " out = a * b\n", + " print(f\"Multiplication: {a} * {b} = {out}\")\n", + " return out\n", + "\n", + "\n", + "inputs = (1, 2)\n", + "exp_output = 2\n", + "\n", + "func(*inputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:32:30.476809Z", + "start_time": "2021-08-14T23:32:30.202040Z" + } + }, + "outputs": [], + "source": [ + "!ls hello/joblib/__main__*/f/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:42:14.671491Z", + "start_time": "2021-08-14T23:42:13.356163Z" + } + }, + "outputs": [], + "source": [ + "!pip install https://github.com/aabadie/joblib-s3.git" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:46:59.749548Z", + "start_time": "2021-08-14T23:46:54.455947Z" + } + }, + "outputs": [], + "source": [ + "#!git clone git://github.com/aabadie/joblib-s3.git\n", + "# !(cd joblib-s3 && pip install -r requirements.txt .)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:56:02.954013Z", + "start_time": "2021-08-14T23:56:02.793451Z" + }, + "scrolled": false + }, + "outputs": [], + "source": [ + "# import joblibs3\n", + "\n", + "# joblibs3.register_s3fs_store_backend()\n", + "\n", + "# # dict(compress=False, bucket=None, anon=False,\n", + "# #key=None, secret=None, token=None, use_ssl=True)\n", + "# dict2 = {\n", + "# \"bucket\": \"alphamatic-data\",\n", + "# \"key\": dict_[\"aws_access_key_id\"],\n", + "# \"secret\": dict_[\"aws_secret_access_key\"],\n", + "# }\n", + "# mem = joblib.Memory('joblib_cache', backend='s3', verbose=100, compress=True,\n", + "# backend_options=dict2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T13:24:11.630748Z", + "start_time": "2021-08-16T13:24:10.983061Z" + } + }, + "outputs": [], + "source": [ + "# hjoblib.register_s3fs_store_backend()\n", + "\n", + "s3fs = hs3.get_s3fs(\"am\")\n", + "\n", + "dict2 = {\n", + " \"bucket\": \"alphamatic-data\",\n", + " # \"key\": dict_[\"aws_access_key_id\"],\n", + " # \"secret\": dict_[\"aws_secret_access_key\"],\n", + " \"s3fs\": s3fs,\n", + "}\n", + "\n", + "mem = joblib.Memory(\n", + " \"joblib_cache\",\n", + " backend=\"s3\",\n", + " verbose=100,\n", + " compress=True,\n", + " backend_options=dict2,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-17T15:51:32.654896Z", + "start_time": "2021-08-17T15:51:32.258447Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "ename": "PermissionError", + "evalue": "Access Denied", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter)\u001b[0m\n\u001b[1;32m 531\u001b[0m \u001b[0mdircache\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m \u001b[0;32masync\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mit\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 533\u001b[0m \u001b[0mdircache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CommonPrefixes\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/aiobotocore/paginate.py\u001b[0m in \u001b[0;36m__anext__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcurrent_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0mparsed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_extract_parsed_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/aiobotocore/client.py\u001b[0m in \u001b[0;36m_make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0merror_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 154\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merror_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_response\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moperation_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 155\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mClientError\u001b[0m: An error occurred (AccessDenied) when calling the ListObjectsV2 operation: Access Denied", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0ms3fs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m#mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0mself\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msync\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36msync\u001b[0;34m(loop, func, timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBaseException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 54\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36m_runner\u001b[0;34m(event, coro, result, timeout)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mcoro\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoro\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mcoro\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_ls\u001b[0;34m(self, path, detail, refresh)\u001b[0m\n\u001b[1;32m 719\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsbuckets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 721\u001b[0;31m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 722\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrefresh\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter)\u001b[0m\n\u001b[1;32m 553\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"name\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Key\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 554\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mClientError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 555\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mtranslate_boto_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 556\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdelimiter\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mPermissionError\u001b[0m: Access Denied" + ] + } + ], + "source": [ + "# hjoblib.register_s3fs_store_backend()\n", + "\n", + "s3fs = hs3.get_s3fs(\"am\")\n", + "dict_ = {}\n", + "\n", + "dict2 = {\n", + " \"bucket\": \"alphamatic-data\",\n", + " # \"key\": dict_[\"aws_access_key_id\"],\n", + " # \"secret\": dict_[\"aws_secret_access_key\"],\n", + " \"s3fs\": s3fs,\n", + "}\n", + "path = \"/tmp/cache.unit_test/root.98e1cf5b88c3.app.TestCachingOnS3.test_with_caching1\"\n", + "\n", + "\n", + "s3fs.ls(path)\n", + "\n", + "# mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:54:50.021139Z", + "start_time": "2021-08-14T23:54:50.017180Z" + } + }, + "outputs": [], + "source": [ + "print(dict_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:51:25.440705Z", + "start_time": "2021-08-14T23:51:25.419214Z" + } + }, + "outputs": [], + "source": [ + "# dict_[\"bucket\"] = \"alphamatic-data/tmp\"\n", + "\n", + "print(dict_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def dec(func=None, val=5):\n", + " if func is not None:\n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68549a47", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:53:08.985727Z", + "start_time": "2021-08-14T23:53:08.795065Z" + } + }, + "outputs": [], + "source": [ + "dict_ = hs3.get_aws_credentials(\"am\")\n", + "print(dict_)\n", + "# s3fs = hs3.get_s3fs(\"am\")\n", + "# s3fs.ls(\"s3://alphamatic-data/tmp\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T13:25:34.841885Z", + "start_time": "2021-08-16T13:25:34.820510Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "s3fs.clear_instance_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T16:23:17.621301Z", + "start_time": "2021-08-16T16:23:16.722753Z" + } + }, + "outputs": [], + "source": [ + "# import joblib\n", + "\n", + "# cachedir = \"./hello\"\n", + "# memory = joblib.Memory(cachedir, verbose=0)\n", + "\n", + "\n", + "@mem.cache()\n", + "def f(x):\n", + " # hello\n", + " print(f\"Running f({x})\")\n", + " return x\n", + "\n", + "\n", + "f(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-16T16:26:31.661915Z", + "start_time": "2021-08-16T16:26:31.640938Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'hello'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hcache.cache(set_verbose_mode=True)\n", + "\n", + "\n", + "def hello():\n", + " return \"hello\"\n", + "\n", + "\n", + "hello()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Memory cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T22:19:51.526004Z", + "start_time": "2021-08-14T22:19:51.259763Z" + } + }, + "outputs": [], + "source": [ + "!ls /app/tmp.cache.disk/joblib/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T22:19:25.253342Z", + "start_time": "2021-08-14T22:19:24.986513Z" + } + }, + "outputs": [], + "source": [ + "!ls /mnt/tmpfs/tmp.cache.mem/joblib/lib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2021-08-14T23:21:32.636049Z", + "start_time": "2021-08-14T23:21:32.479710Z" + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "memory_cached_func = hcache._Cached(\n", + " func, use_mem_cache=True, use_disk_cache=False\n", + ")\n", + "\n", + "print(memory_cached_func.get_function_cache_info())\n", + "\n", + "# cache_type = None\n", + "# memory_cached_func.clear_function_cache(cache_type)\n", + "\n", + "hdbg.dassert_eq(memory_cached_func(*inputs), exp_output)\n", + "hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), \"no_cache\")\n", + "\n", + "hdbg.dassert_eq(memory_cached_func(*inputs), exp_output)\n", + "hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), \"mem\")\n", + "\n", + "print(\"memory caching checks passed\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:56:58.620662Z", + "start_time": "2020-09-03T19:56:58.610337Z" + } + }, + "outputs": [], + "source": [ + "def computation_function(a, b):\n", + " # hello\n", + " # assert 0\n", + " out = a * b\n", + " print(f\"Multiplication: {a} * {b} = {out}\")\n", + " return out\n", + "\n", + "\n", + "inputs = (1, 2)\n", + "exp_output = 2\n", + "\n", + "# hdbg.dassert_eq(memory_cached_computation(*inputs), exp_output)\n", + "# hdbg.dassert_eq(memory_cached_computation.get_last_cache_accessed(), \"mem\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Disk cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:45:20.999548Z", + "start_time": "2020-09-03T19:45:20.987298Z" + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "disk_cached_computation = hcache._Cached(\n", + " computation_function, use_mem_cache=False, use_disk_cache=True\n", + ")\n", + "\n", + "disk_cached_computation.clear_function_cache()\n", + "\n", + "hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), \"no_cache\")\n", + "\n", + "hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), \"disk\")\n", + "\n", + "print(\"disk caching checks passed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Full cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:34:54.851944Z", + "start_time": "2020-09-03T19:34:54.839379Z" + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "fully_cached_computation = hcache._Cached(\n", + " computation_function, use_mem_cache=True, use_disk_cache=True\n", + ")\n", + "\n", + "fully_cached_computation.clear_function_cache()\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"no_cache\")\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", + "\n", + "print(\"Clear mem cache\")\n", + "fully_cached_computation.clear_function_cache()\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"disk\")\n", + "\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", + "\n", + "print(\"full caching checks passed\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:34:54.856369Z", + "start_time": "2020-09-03T19:34:54.853563Z" + } + }, + "outputs": [], + "source": [ + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-09-03T19:34:54.995926Z", + "start_time": "2020-09-03T19:34:54.859279Z" + } + }, + "outputs": [], + "source": [ + "# This should fail all the times, because we clear the memory cache.\n", + "fully_cached_computation.clear_function_cache()\n", + "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", + "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py new file mode 100644 index 000000000..3469f42b7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py @@ -0,0 +1,274 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] pycharm={"name": "#%% md\n"} +# # Test Cache in Jupyter Notebook + +# %% +# %load_ext autoreload +# %autoreload 2 + +import logging + +import joblib + +import helpers.hcache as hcache +import helpers.hdbg as hdbg +import helpers.hs3 as hs3 + +hnotebook.config_notebook() + +# hdbg.init_logger(verbosity=logging.DEBUG) +hdbg.init_logger(verbosity=logging.INFO) +# hdbg.test_logger() +_LOG = logging.getLogger(__name__) + + +# %% [markdown] pycharm={"name": "#%% md\n"} +# # Define computation function + + +# %% +def func(a, b): + # hello + # assert 0 + out = a * b + print(f"Multiplication: {a} * {b} = {out}") + return out + + +inputs = (1, 2) +exp_output = 2 + +func(*inputs) + +# %% +# !ls hello/joblib/__main__*/f/ + +# %% +# !pip install https://github.com/aabadie/joblib-s3.git + +# %% +# #!git clone git://github.com/aabadie/joblib-s3.git +# !(cd joblib-s3 && pip install -r requirements.txt .) + +# %% +# import joblibs3 + +# joblibs3.register_s3fs_store_backend() + +# # dict(compress=False, bucket=None, anon=False, +# #key=None, secret=None, token=None, use_ssl=True) +# dict2 = { +# "bucket": "alphamatic-data", +# "key": dict_["aws_access_key_id"], +# "secret": dict_["aws_secret_access_key"], +# } +# mem = joblib.Memory('joblib_cache', backend='s3', verbose=100, compress=True, +# backend_options=dict2) + +# %% +# hjoblib.register_s3fs_store_backend() + +s3fs = hs3.get_s3fs("am") + +dict2 = { + "bucket": "alphamatic-data", + # "key": dict_["aws_access_key_id"], + # "secret": dict_["aws_secret_access_key"], + "s3fs": s3fs, +} + +mem = joblib.Memory( + "joblib_cache", + backend="s3", + verbose=100, + compress=True, + backend_options=dict2, +) + +# %% +# hjoblib.register_s3fs_store_backend() + +s3fs = hs3.get_s3fs("am") +dict_ = {} + +dict2 = { + "bucket": "alphamatic-data", + # "key": dict_["aws_access_key_id"], + # "secret": dict_["aws_secret_access_key"], + "s3fs": s3fs, +} +path = "/tmp/cache.unit_test/root.98e1cf5b88c3.app.TestCachingOnS3.test_with_caching1" + + +s3fs.ls(path) + +# mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2) + + +# %% +print(dict_) + +# %% +# dict_["bucket"] = "alphamatic-data/tmp" + +print(dict_) + + +# %% +def dec(func=None, val=5): + if func is not None: + return + + +# %% + +# %% +dict_ = hs3.get_aws_credentials("am") +print(dict_) +# s3fs = hs3.get_s3fs("am") +# s3fs.ls("s3://alphamatic-data/tmp") + +# %% +s3fs.clear_instance_cache() + + +# %% +# import joblib + +# cachedir = "./hello" +# memory = joblib.Memory(cachedir, verbose=0) + + +@mem.cache() +def f(x): + # hello + print(f"Running f({x})") + return x + + +f(1) + +# %% +hcache.cache(set_verbose_mode=True) + + +def hello(): + return "hello" + + +hello() + +# %% [markdown] pycharm={"name": "#%% md\n"} +# ## Memory cache + +# %% +# !ls /app/tmp.cache.disk/joblib/ + +# %% +# !ls /mnt/tmpfs/tmp.cache.mem/joblib/lib + +# %% pycharm={"name": "#%%\n"} +memory_cached_func = hcache._Cached( + func, use_mem_cache=True, use_disk_cache=False +) + +print(memory_cached_func.get_function_cache_info()) + +# cache_type = None +# memory_cached_func.clear_function_cache(cache_type) + +hdbg.dassert_eq(memory_cached_func(*inputs), exp_output) +hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), "no_cache") + +hdbg.dassert_eq(memory_cached_func(*inputs), exp_output) +hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), "mem") + +print("memory caching checks passed") + + +# %% +def computation_function(a, b): + # hello + # assert 0 + out = a * b + print(f"Multiplication: {a} * {b} = {out}") + return out + + +inputs = (1, 2) +exp_output = 2 + +# hdbg.dassert_eq(memory_cached_computation(*inputs), exp_output) +# hdbg.dassert_eq(memory_cached_computation.get_last_cache_accessed(), "mem") + +# %% [markdown] +# ## Disk cache + +# %% pycharm={"name": "#%%\n"} +disk_cached_computation = hcache._Cached( + computation_function, use_mem_cache=False, use_disk_cache=True +) + +disk_cached_computation.clear_function_cache() + +hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), "no_cache") + +hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), "disk") + +print("disk caching checks passed") + +# %% [markdown] +# ## Full cache + +# %% pycharm={"name": "#%%\n"} +fully_cached_computation = hcache._Cached( + computation_function, use_mem_cache=True, use_disk_cache=True +) + +fully_cached_computation.clear_function_cache() + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "no_cache") + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") + +print("Clear mem cache") +fully_cached_computation.clear_function_cache() + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "disk") + +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") + +print("full caching checks passed") + +# %% +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") + +# %% +# This should fail all the times, because we clear the memory cache. +fully_cached_computation.clear_function_cache() +hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) +hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb new file mode 100644 index 000000000..3050efc31 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb @@ -0,0 +1,858 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "CONTENTS:\n", + "- [hcache_simple Tutorial](#hcache_simple-tutorial)\n", + " - [Imports](#imports)\n", + " - [1. Basic Caching](#1.-basic-caching)\n", + " - [2. Cache Performance Monitoring](#2.-cache-performance-monitoring)\n", + " - [3. Cache Management](#3.-cache-management)\n", + " - [4. Dynamic Runtime Parameters](#4.-dynamic-runtime-parameters)\n", + " - [5. Configurable Cache Locations](#5.-configurable-cache-locations)\n", + " - [6. Per-Function Configuration](#6.-per-function-configuration)\n", + " - [7. Excluding Keys from Cache](#7.-excluding-keys-from-cache)\n", + " - [8. Runtime Property Modification](#8.-runtime-property-modification)\n", + " - [9. S3 Integration](#9.-s3-integration)\n", + " - [10. Binary Data with Pickle](#10.-binary-data-with-pickle)\n", + " - [Summary](#summary)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# hcache_simple Tutorial\n", + "\n", + "This tutorial demonstrates the `hcache_simple` module - a lightweight caching system with memory, disk, and S3 storage.\n", + "\n", + "**Key Features:**\n", + "- Simple decorator-based caching\n", + "- Memory and disk persistence (JSON or pickle)\n", + "- S3 sync for team cache sharing\n", + "- Per-function configuration\n", + "- Performance monitoring\n", + "- Auto-pull from S3 on first cache miss" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Running in Jupyter\n", + "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-7db4a0c4-f8f9-4dd2-9ed5-4a4fdb7cefef.json'\n" + ] + } + ], + "source": [ + "import logging\n", + "import os\n", + "import tempfile\n", + "import time\n", + "\n", + "import pandas as pd\n", + "\n", + "import helpers.hcache_simple as hcacsimp\n", + "import helpers.hdbg as hdbg\n", + "\n", + "hdbg.init_logger(verbosity=logging.INFO)\n", + "_LOG = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "\n", + "## 1. Basic Caching\n", + "\n", + "The `@simple_cache` decorator caches function results automatically.\n", + "\n", + "- First call: Computes result and stores in cache\n", + "- Subsequent calls: Returns cached result instantly\n", + "- Cache is stored in memory and on disk (JSON format)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "@hcacsimp.simple_cache(cache_type=\"json\")\n", + "def expensive_computation(x: int) -> int:\n", + " \"\"\"\n", + " Simulate expensive computation.\n", + " \"\"\"\n", + " _LOG.info(\"Computing result for x=%s (this takes 2 seconds)...\", x)\n", + " time.sleep(2)\n", + " return x**2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First call with x=5:\n", + "WARNING S3 bucket not configured - use set_s3_bucket()\n", + "INFO Computing result for x=5 (this takes 2 seconds)...\n", + "Result: 25\n", + "Time taken: 2.006 seconds\n", + "\n", + "Second call with x=5 (from cache):\n", + "WARNING Cache hit for expensive_computation\n", + "Result: 25\n", + "Time taken: 0.001207 seconds (much faster!)\n" + ] + } + ], + "source": [ + "# First call - computes and caches.\n", + "print(\"First call with x=5:\")\n", + "start_time = time.time()\n", + "result = expensive_computation(5)\n", + "elapsed_time = time.time() - start_time\n", + "print(f\"Result: {result}\")\n", + "print(f\"Time taken: {elapsed_time:.3f} seconds\\n\")\n", + "# Second call - returns from cache instantly.\n", + "print(\"Second call with x=5 (from cache):\")\n", + "start_time = time.time()\n", + "result = expensive_computation(5)\n", + "elapsed_time = time.time() - start_time\n", + "print(f\"Result: {result}\")\n", + "print(f\"Time taken: {elapsed_time:.6f} seconds (much faster!)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## 2. Cache Performance Monitoring\n", + "\n", + "Track cache efficiency with performance metrics:\n", + "- **hits**: Number of times result was retrieved from cache\n", + "- **misses**: Number of times function had to compute result\n", + "- **tot**: Total number of function calls\n", + "- **hit_rate**: Percentage of cache hits" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO Computing result for x=10 (this takes 2 seconds)...\n", + "WARNING Cache hit for expensive_computation\n", + "WARNING Cache hit for expensive_computation\n", + "INFO Computing result for x=20 (this takes 2 seconds)...\n", + "\n", + "Performance Statistics:\n", + "expensive_computation: hits=2 misses=2 tot=4 hit_rate=0.50\n" + ] + } + ], + "source": [ + "# Enable performance monitoring.\n", + "hcacsimp.enable_cache_perf(\"expensive_computation\")\n", + "# Make some calls.\n", + "expensive_computation(10) # Miss - first call with x=10.\n", + "expensive_computation(10) # Hit - cached result.\n", + "expensive_computation(10) # Hit - cached result.\n", + "expensive_computation(20) # Miss - first call with x=20.\n", + "# Check performance stats.\n", + "print(\"\\nPerformance Statistics:\")\n", + "print(hcacsimp.get_cache_perf_stats(\"expensive_computation\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## 3. Cache Management\n", + "\n", + "Control cache lifecycle with these operations:\n", + "- `flush_cache_to_disk()`: Write memory cache to disk\n", + "- `reset_mem_cache()`: Clear memory cache (keeps disk cache)\n", + "- `force_cache_from_disk()`: Reload cache from disk\n", + "- `cache_stats_to_str()`: View cache statistics" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cache statistics:\n", + " memory disk\n", + "expensive_computation 3 3\n", + "\n", + "Flushed to disk\n", + "Memory cache cleared\n", + " memory disk\n", + "expensive_computation - 3\n", + "\n", + "Reloaded from disk\n", + " memory disk\n", + "expensive_computation 3 3\n" + ] + } + ], + "source": [ + "# View current cache state.\n", + "print(\"Cache statistics:\")\n", + "print(hcacsimp.cache_stats_to_str(\"expensive_computation\"))\n", + "# Flush to disk (ensure persistence).\n", + "hcacsimp.flush_cache_to_disk(\"expensive_computation\")\n", + "print(\"\\nFlushed to disk\")\n", + "# Clear memory cache.\n", + "hcacsimp.reset_mem_cache(\"expensive_computation\")\n", + "print(\"Memory cache cleared\")\n", + "print(hcacsimp.cache_stats_to_str(\"expensive_computation\"))\n", + "# Reload from disk.\n", + "hcacsimp.force_cache_from_disk(\"expensive_computation\")\n", + "print(\"\\nReloaded from disk\")\n", + "print(hcacsimp.cache_stats_to_str(\"expensive_computation\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "\n", + "## 4. Dynamic Runtime Parameters\n", + "\n", + "Control caching behavior per function call:\n", + "- `force_refresh=True`: Bypass cache and recompute\n", + "- `abort_on_cache_miss=True`: Raise error if not in cache\n", + "- `report_on_cache_miss=True`: Log warning on cache miss" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "@hcacsimp.simple_cache(cache_type=\"json\")\n", + "def data_processor(data: str) -> str:\n", + " \"\"\"\n", + " Process data string.\n", + " \"\"\"\n", + " _LOG.info(\"Processing: %s\", data)\n", + " time.sleep(1)\n", + " return data.upper()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING S3 bucket not configured - use set_s3_bucket()\n", + "INFO Processing: hello\n", + "First call: HELLO (time: 1.007s)\n", + "WARNING Cache hit for data_processor\n", + "Cached call: HELLO (time: 0.001437s - from cache!)\n", + "INFO Processing: hello\n", + "Force refresh: HELLO\n", + "With report: _cache_miss_\n" + ] + } + ], + "source": [ + "# Normal call - caches result.\n", + "start_time = time.time()\n", + "result = data_processor(\"hello\")\n", + "elapsed_time = time.time() - start_time\n", + "print(f\"First call: {result} (time: {elapsed_time:.3f}s)\")\n", + "# Cached call - returns instantly.\n", + "start_time = time.time()\n", + "result = data_processor(\"hello\")\n", + "elapsed_time = time.time() - start_time\n", + "print(f\"Cached call: {result} (time: {elapsed_time:.6f}s - from cache!)\")\n", + "# Force refresh - recomputes even though cached.\n", + "result = data_processor(\"hello\", force_refresh=True)\n", + "print(f\"Force refresh: {result}\")\n", + "# Report on cache miss.\n", + "result = data_processor(\"world\", report_on_cache_miss=True)\n", + "print(f\"With report: {result}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cache miss error: Cache miss for key='{\"args\": [\"new_value\"], \"kwargs\": {}}'\n" + ] + } + ], + "source": [ + "# Abort on cache miss - raises ValueError if not cached.\n", + "try:\n", + " result = data_processor(\"new_value\", abort_on_cache_miss=True)\n", + "except ValueError as e:\n", + " print(f\"Cache miss error: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## 5. Configurable Cache Locations\n", + "\n", + "Customize where cache files are stored globally:\n", + "- `set_cache_dir()`: Change cache directory\n", + "- `set_cache_file_prefix()`: Change cache file prefix" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cache directory set to: /tmp/tmpxyhjnt9y\n", + "Cache file prefix set to: my_project\n", + "WARNING S3 bucket not configured - use set_s3_bucket()\n", + "\n", + "Cache files created: ['my_project_property.pkl', 'my_project.custom_location_func.json']\n" + ] + } + ], + "source": [ + "# Set custom cache directory.\n", + "cache_dir = tempfile.mkdtemp()\n", + "hcacsimp.set_cache_dir(cache_dir)\n", + "print(f\"Cache directory set to: {cache_dir}\")\n", + "# Set custom prefix.\n", + "hcacsimp.set_cache_file_prefix(\"my_project\")\n", + "print(\"Cache file prefix set to: my_project\")\n", + "\n", + "\n", + "# New cached function will use these settings.\n", + "@hcacsimp.simple_cache(cache_type=\"json\")\n", + "def custom_location_func(x: int) -> int:\n", + " return x * 3\n", + "\n", + "\n", + "# Call function.\n", + "result = custom_location_func(7)\n", + "# Verify cache file location.\n", + "cache_files = [f for f in os.listdir(cache_dir) if \"my_project\" in f]\n", + "print(f\"\\nCache files created: {cache_files}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "\n", + "## 6. Per-Function Configuration\n", + "\n", + "Override global settings for specific functions:\n", + "- Each function can have its own cache directory\n", + "- Each function can have its own cache prefix\n", + "- Useful for organizing different types of caches" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "@hcacsimp.simple_cache(\n", + " cache_type=\"json\",\n", + " cache_dir=\"/tmp/function_a_cache\",\n", + " cache_prefix=\"func_a\",\n", + ")\n", + "def function_a(x: int) -> int:\n", + " return x + 100\n", + "\n", + "\n", + "@hcacsimp.simple_cache(\n", + " cache_type=\"json\",\n", + " cache_dir=\"/tmp/function_b_cache\",\n", + " cache_prefix=\"func_b\",\n", + ")\n", + "def function_b(x: int) -> int:\n", + " return x + 200" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING Cache hit for function_a\n", + "WARNING Cache hit for function_b\n", + "function_a(5) = 105\n", + "function_b(5) = 205\n", + "\n", + "function_a cache location:\n", + " Cache file: /tmp/function_a_cache/func_a.function_a.json\n", + "\n", + "function_b cache location:\n", + " Cache file: /tmp/function_b_cache/func_b.function_b.json\n" + ] + } + ], + "source": [ + "# Call both functions - each uses its own cache location.\n", + "result_a = function_a(5)\n", + "result_b = function_b(5)\n", + "print(f\"function_a(5) = {result_a}\")\n", + "print(f\"function_b(5) = {result_b}\")\n", + "# Verify separate cache files.\n", + "print(\"\\nfunction_a cache location:\")\n", + "cache_file_a = hcacsimp._get_cache_file_name(\"function_a\")\n", + "print(f\" Cache file: {cache_file_a}\")\n", + "print(\"\\nfunction_b cache location:\")\n", + "cache_file_b = hcacsimp._get_cache_file_name(\"function_b\")\n", + "print(f\" Cache file: {cache_file_b}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "\n", + "## 7. Excluding Keys from Cache\n", + "\n", + "Some parameters should not affect cache lookup:\n", + "- Session IDs\n", + "- Logger objects\n", + "- Timestamps\n", + "- Random seeds (when you want same result)\n", + "\n", + "Use `exclude_keys` to ignore these parameters." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "@hcacsimp.simple_cache(\n", + " cache_type=\"json\",\n", + " exclude_keys=[\"session_id\", \"timestamp\"],\n", + ")\n", + "def api_call(query: str, session_id: str, timestamp: float) -> str:\n", + " \"\"\"\n", + " Simulate API call where session_id and timestamp don't affect result.\n", + " \"\"\"\n", + " _LOG.info(\"Making API call for query: %s\", query)\n", + " time.sleep(1)\n", + " return f\"Response for: {query}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING S3 bucket not configured - use set_s3_bucket()\n", + "INFO Making API call for query: search python\n", + "First call: Response for: search python (time: 1.006s)\n", + "WARNING Cache hit for api_call\n", + "Second call (from cache despite different session/timestamp): Response for: search python (time: 0.001377s)\n", + "INFO Making API call for query: search java\n", + "Third call (different query, cache miss): Response for: search java\n" + ] + } + ], + "source": [ + "# These calls have different session_id and timestamp but return cached result.\n", + "start_time = time.time()\n", + "result1 = api_call(\"search python\", session_id=\"abc123\", timestamp=1.0)\n", + "elapsed_time = time.time() - start_time\n", + "print(f\"First call: {result1} (time: {elapsed_time:.3f}s)\")\n", + "start_time = time.time()\n", + "result2 = api_call(\"search python\", session_id=\"xyz789\", timestamp=2.0)\n", + "elapsed_time = time.time() - start_time\n", + "print(\n", + " f\"Second call (from cache despite different session/timestamp): {result2} (time: {elapsed_time:.6f}s)\"\n", + ")\n", + "# Different query triggers cache miss.\n", + "result3 = api_call(\"search java\", session_id=\"abc123\", timestamp=1.0)\n", + "print(f\"Third call (different query, cache miss): {result3}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "\n", + "## 8. Runtime Property Modification\n", + "\n", + "All decorator parameters are stored as properties and can be modified at runtime.\n", + "This allows you to change cache behavior without redecorating functions.\n", + "\n", + "**Common use cases:**\n", + "- Disable write-through temporarily for performance\n", + "- Add/remove keys from exclusion list\n", + "- Enable/disable S3 sync dynamically" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "@hcacsimp.simple_cache(cache_type=\"json\", exclude_keys=[\"session_id\"])\n", + "def api_call(query: str, session_id: str) -> str:\n", + " \"\"\"\n", + " Simulate API call where session_id doesn't affect result.\n", + " \"\"\"\n", + " _LOG.info(\"Making API call for query=%s\", query)\n", + " time.sleep(1)\n", + " return f\"Result for: {query}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial exclude_keys: ['session_id']\n", + "Calling with query='python', session_id='abc'...\n", + "INFO Making API call for query=python\n", + "Result: Result for: python (time: 1.004s)\n", + "\n", + "Calling with query='python', session_id='xyz' (different session_id)...\n", + "WARNING Cache hit for api_call\n", + "Result: Result for: python (time: 0.001291s - cache hit!)\n" + ] + } + ], + "source": [ + "# Demonstrate initial exclude_keys behavior.\n", + "print(\"Initial exclude_keys: ['session_id']\")\n", + "print(\"Calling with query='python', session_id='abc'...\")\n", + "start_time = time.time()\n", + "result1 = api_call(\"python\", session_id=\"abc\")\n", + "elapsed1 = time.time() - start_time\n", + "print(f\"Result: {result1} (time: {elapsed1:.3f}s)\")\n", + "# Same query, different session_id - should hit cache.\n", + "print(\"\\nCalling with query='python', session_id='xyz' (different session_id)...\")\n", + "start_time = time.time()\n", + "result2 = api_call(\"python\", session_id=\"xyz\")\n", + "elapsed2 = time.time() - start_time\n", + "print(f\"Result: {result2} (time: {elapsed2:.6f}s - cache hit!)\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Modifying exclude_keys to [] (empty - don't exclude session_id)\n", + "exclude_keys now: []\n", + "\n", + "Calling with query='python', session_id='new123' (after modification)...\n", + "INFO Making API call for query=python\n", + "Result: Result for: python (time: 1.004s - cache miss, computed new!)\n" + ] + } + ], + "source": [ + "# Now modify exclude_keys to REMOVE session_id from exclusion.\n", + "print(\"\\nModifying exclude_keys to [] (empty - don't exclude session_id)\")\n", + "hcacsimp.set_cache_property(\"api_call\", \"exclude_keys\", [])\n", + "# Verify change.\n", + "exclude_keys_after = hcacsimp.get_cache_property(\"api_call\", \"exclude_keys\")\n", + "print(f\"exclude_keys now: {exclude_keys_after}\")\n", + "# Now same query with different session_id creates NEW cache entry.\n", + "print(\n", + " \"\\nCalling with query='python', session_id='new123' (after modification)...\"\n", + ")\n", + "start_time = time.time()\n", + "result3 = api_call(\"python\", session_id=\"new123\")\n", + "elapsed3 = time.time() - start_time\n", + "print(f\"Result: {result3} (time: {elapsed3:.3f}s - cache miss, computed new!)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## 9. S3 Integration\n", + "\n", + "**Note:** These examples are commented out because they require AWS credentials.\n", + "Uncomment and configure to use S3 caching.\n", + "\n", + "**S3 as Third Storage Layer:**\n", + "- S3 is integrated into the cache lookup as the third tier: Memory → Disk → S3\n", + "- When `get_cache()` is called, it automatically checks all three layers\n", + "- A cache \"miss\" only occurs if key not found in ANY layer\n", + "\n", + "**S3 Features:**\n", + "- `auto_sync_s3=True`: Automatically upload cache updates to S3\n", + "- Auto-pull: Automatically checks S3 as part of cache lookup (one-time per function)\n", + "- Manual cache operations: Use `push_cache_to_s3()` to manually upload, `pull_cache_from_s3()` to manually download and `sync_cache_with_s3()` to manually cache files between S3 and disk\n", + "\n", + "**Usage:**\n", + "1. Configure S3 globally or per-function\n", + "2. First call on any machine computes and uploads to S3\n", + "3. Other machines automatically check S3 during cache lookup\n", + "4. Updates are automatically synced to S3 (if `auto_sync_s3=True`)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# # Global S3 configuration (applies to all cached functions).\n", + "# hcacsimp.set_s3_bucket(\"s3://my-team-bucket\")\n", + "# hcacsimp.set_s3_prefix(\"cache/shared\")\n", + "# hcacsimp.set_aws_profile(\"my-aws-profile\")\n", + "#\n", + "# @hcacsimp.simple_cache(\n", + "# cache_type=\"json\",\n", + "# auto_sync_s3=True, # Auto-upload to S3 after cache updates on disk.\n", + "# )\n", + "# def expensive_llm_call(prompt: str) -> str:\n", + "# \"\"\"\n", + "# Simulate expensive LLM API call.\n", + "# \"\"\"\n", + "# time.sleep(3)\n", + "# return f\"LLM response to: {prompt}\"\n", + "#\n", + "# # First call on any machine - computes and uploads to S3.\n", + "# result = expensive_llm_call(\"Summarize this document\")\n", + "# print(f\"Result: {result}\")\n", + "#\n", + "# # On another machine - S3 is automatically checked during cache lookup.\n", + "# # get_cache() checks: memory → disk → S3.\n", + "# result = expensive_llm_call(\"Summarize this document\")\n", + "# print(f\"Result from cache: {result}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# # Per-function S3 configuration (overrides global settings).\n", + "# @hcacsimp.simple_cache(\n", + "# cache_type=\"json\",\n", + "# s3_bucket=\"s3://project-specific-bucket\",\n", + "# s3_prefix=\"cache/llm\",\n", + "# aws_profile=\"project-profile\",\n", + "# auto_sync_s3=True,\n", + "# )\n", + "# def project_specific_cache(data: str) -> str:\n", + "# return f\"Processed: {data}\"\n", + "#\n", + "# result = project_specific_cache(\"test data\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "\n", + "## 10. Binary Data with Pickle\n", + "\n", + "For complex Python objects (DataFrames, models, etc.), use pickle format:\n", + "- `cache_type=\"pickle\"`: Stores any Python object\n", + "- Supports DataFrames, numpy arrays, custom classes, etc.\n", + "- Trade-off: Not human-readable like JSON" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "@hcacsimp.simple_cache(cache_type=\"pickle\")\n", + "def create_dataframe(rows: int) -> pd.DataFrame:\n", + " \"\"\"\n", + " Create a DataFrame (can't be cached as JSON easily).\n", + " \"\"\"\n", + " _LOG.info(\"Creating DataFrame with %s rows...\", rows)\n", + " time.sleep(1)\n", + " return pd.DataFrame(\n", + " {\n", + " \"id\": range(rows),\n", + " \"value\": [x**2 for x in range(rows)],\n", + " }\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING S3 bucket not configured - use set_s3_bucket()\n", + "INFO Creating DataFrame with 5 rows...\n", + "First call:\n", + " id value\n", + "0 0 0\n", + "1 1 1\n", + "2 2 4\n", + "3 3 9\n", + "4 4 16\n", + "Time taken: 1.007 seconds\n", + "WARNING Cache hit for create_dataframe\n", + "\n", + "Second call (from cache):\n", + " id value\n", + "0 0 0\n", + "1 1 1\n", + "2 2 4\n", + "3 3 9\n", + "4 4 16\n", + "Time taken: 0.001184 seconds (from cache!)\n" + ] + } + ], + "source": [ + "# First call - computes and caches DataFrame.\n", + "start_time = time.time()\n", + "df = create_dataframe(5)\n", + "elapsed_time = time.time() - start_time\n", + "print(\"First call:\")\n", + "print(df)\n", + "print(f\"Time taken: {elapsed_time:.3f} seconds\")\n", + "# Second call - returns cached DataFrame instantly.\n", + "start_time = time.time()\n", + "df = create_dataframe(5)\n", + "elapsed_time = time.time() - start_time\n", + "print(\"\\nSecond call (from cache):\")\n", + "print(df)\n", + "print(f\"Time taken: {elapsed_time:.6f} seconds (from cache!)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Summary\n", + "\n", + "The `hcache_simple` module provides:\n", + "- **Easy caching**: Just add `@simple_cache` decorator\n", + "- **Multiple storage layers**: Memory (fast) → Disk (persistent) → S3 (shared)\n", + "- **Flexible configuration**: Global and per-function settings\n", + "- **Runtime modification**: Change cache behavior without redecorating functions\n", + "- **Performance monitoring**: Track cache efficiency\n", + "- **Team collaboration**: Share caches via S3 with auto-pull\n", + "- **Format support**: JSON (human-readable) or pickle (binary)\n", + "\n", + "For full documentation, see: `docs/tools/helpers/all.hcache_simple.explanation.md`" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py new file mode 100644 index 000000000..48c2d2d3c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py @@ -0,0 +1,486 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# CONTENTS: +# - [hcache_simple Tutorial](#hcache_simple-tutorial) +# - [Imports](#imports) +# - [1. Basic Caching](#1.-basic-caching) +# - [2. Cache Performance Monitoring](#2.-cache-performance-monitoring) +# - [3. Cache Management](#3.-cache-management) +# - [4. Dynamic Runtime Parameters](#4.-dynamic-runtime-parameters) +# - [5. Configurable Cache Locations](#5.-configurable-cache-locations) +# - [6. Per-Function Configuration](#6.-per-function-configuration) +# - [7. Excluding Keys from Cache](#7.-excluding-keys-from-cache) +# - [8. Runtime Property Modification](#8.-runtime-property-modification) +# - [9. S3 Integration](#9.-s3-integration) +# - [10. Binary Data with Pickle](#10.-binary-data-with-pickle) +# - [Summary](#summary) + +# %% [markdown] +# +# # hcache_simple Tutorial +# +# This tutorial demonstrates the `hcache_simple` module - a lightweight caching system with memory, disk, and S3 storage. +# +# **Key Features:** +# - Simple decorator-based caching +# - Memory and disk persistence (JSON or pickle) +# - S3 sync for team cache sharing +# - Per-function configuration +# - Performance monitoring +# - Auto-pull from S3 on first cache miss + +# %% [markdown] +# +# ## Imports + +# %% +import logging +import os +import tempfile +import time + +import pandas as pd + +import helpers.hcache_simple as hcacsimp +import helpers.hdbg as hdbg + +hdbg.init_logger(verbosity=logging.INFO) +_LOG = logging.getLogger(__name__) + +# %% [markdown] +# +# ## 1. Basic Caching +# +# The `@simple_cache` decorator caches function results automatically. +# +# - First call: Computes result and stores in cache +# - Subsequent calls: Returns cached result instantly +# - Cache is stored in memory and on disk (JSON format) + + +# %% +@hcacsimp.simple_cache(cache_type="json") +def expensive_computation(x: int) -> int: + """ + Simulate expensive computation. + """ + _LOG.info("Computing result for x=%s (this takes 2 seconds)...", x) + time.sleep(2) + return x**2 + + +# %% +# First call - computes and caches. +print("First call with x=5:") +start_time = time.time() +result = expensive_computation(5) +elapsed_time = time.time() - start_time +print(f"Result: {result}") +print(f"Time taken: {elapsed_time:.3f} seconds\n") +# Second call - returns from cache instantly. +print("Second call with x=5 (from cache):") +start_time = time.time() +result = expensive_computation(5) +elapsed_time = time.time() - start_time +print(f"Result: {result}") +print(f"Time taken: {elapsed_time:.6f} seconds (much faster!)") + +# %% [markdown] +# +# ## 2. Cache Performance Monitoring +# +# Track cache efficiency with performance metrics: +# - **hits**: Number of times result was retrieved from cache +# - **misses**: Number of times function had to compute result +# - **tot**: Total number of function calls +# - **hit_rate**: Percentage of cache hits + +# %% +# Enable performance monitoring. +hcacsimp.enable_cache_perf("expensive_computation") +# Make some calls. +expensive_computation(10) # Miss - first call with x=10. +expensive_computation(10) # Hit - cached result. +expensive_computation(10) # Hit - cached result. +expensive_computation(20) # Miss - first call with x=20. +# Check performance stats. +print("\nPerformance Statistics:") +print(hcacsimp.get_cache_perf_stats("expensive_computation")) + +# %% [markdown] +# +# ## 3. Cache Management +# +# Control cache lifecycle with these operations: +# - `flush_cache_to_disk()`: Write memory cache to disk +# - `reset_mem_cache()`: Clear memory cache (keeps disk cache) +# - `force_cache_from_disk()`: Reload cache from disk +# - `cache_stats_to_str()`: View cache statistics + +# %% +# View current cache state. +print("Cache statistics:") +print(hcacsimp.cache_stats_to_str("expensive_computation")) +# Flush to disk (ensure persistence). +hcacsimp.flush_cache_to_disk("expensive_computation") +print("\nFlushed to disk") +# Clear memory cache. +hcacsimp.reset_mem_cache("expensive_computation") +print("Memory cache cleared") +print(hcacsimp.cache_stats_to_str("expensive_computation")) +# Reload from disk. +hcacsimp.force_cache_from_disk("expensive_computation") +print("\nReloaded from disk") +print(hcacsimp.cache_stats_to_str("expensive_computation")) + +# %% [markdown] +# +# ## 4. Dynamic Runtime Parameters +# +# Control caching behavior per function call: +# - `force_refresh=True`: Bypass cache and recompute +# - `abort_on_cache_miss=True`: Raise error if not in cache +# - `report_on_cache_miss=True`: Log warning on cache miss + + +# %% +@hcacsimp.simple_cache(cache_type="json") +def data_processor(data: str) -> str: + """ + Process data string. + """ + _LOG.info("Processing: %s", data) + time.sleep(1) + return data.upper() + + +# %% +# Normal call - caches result. +start_time = time.time() +result = data_processor("hello") +elapsed_time = time.time() - start_time +print(f"First call: {result} (time: {elapsed_time:.3f}s)") +# Cached call - returns instantly. +start_time = time.time() +result = data_processor("hello") +elapsed_time = time.time() - start_time +print(f"Cached call: {result} (time: {elapsed_time:.6f}s - from cache!)") +# Force refresh - recomputes even though cached. +result = data_processor("hello", force_refresh=True) +print(f"Force refresh: {result}") +# Report on cache miss. +result = data_processor("world", report_on_cache_miss=True) +print(f"With report: {result}") + +# %% +# Abort on cache miss - raises ValueError if not cached. +try: + result = data_processor("new_value", abort_on_cache_miss=True) +except ValueError as e: + print(f"Cache miss error: {e}") + +# %% [markdown] +# +# ## 5. Configurable Cache Locations +# +# Customize where cache files are stored globally: +# - `set_cache_dir()`: Change cache directory +# - `set_cache_file_prefix()`: Change cache file prefix + +# %% +# Set custom cache directory. +cache_dir = tempfile.mkdtemp() +hcacsimp.set_cache_dir(cache_dir) +print(f"Cache directory set to: {cache_dir}") +# Set custom prefix. +hcacsimp.set_cache_file_prefix("my_project") +print("Cache file prefix set to: my_project") + + +# New cached function will use these settings. +@hcacsimp.simple_cache(cache_type="json") +def custom_location_func(x: int) -> int: + return x * 3 + + +# Call function. +result = custom_location_func(7) +# Verify cache file location. +cache_files = [f for f in os.listdir(cache_dir) if "my_project" in f] +print(f"\nCache files created: {cache_files}") + +# %% [markdown] +# +# ## 6. Per-Function Configuration +# +# Override global settings for specific functions: +# - Each function can have its own cache directory +# - Each function can have its own cache prefix +# - Useful for organizing different types of caches + + +# %% +@hcacsimp.simple_cache( + cache_type="json", + cache_dir="/tmp/function_a_cache", + cache_prefix="func_a", +) +def function_a(x: int) -> int: + return x + 100 + + +@hcacsimp.simple_cache( + cache_type="json", + cache_dir="/tmp/function_b_cache", + cache_prefix="func_b", +) +def function_b(x: int) -> int: + return x + 200 + + +# %% +# Call both functions - each uses its own cache location. +result_a = function_a(5) +result_b = function_b(5) +print(f"function_a(5) = {result_a}") +print(f"function_b(5) = {result_b}") +# Verify separate cache files. +print("\nfunction_a cache location:") +cache_file_a = hcacsimp._get_cache_file_name("function_a") +print(f" Cache file: {cache_file_a}") +print("\nfunction_b cache location:") +cache_file_b = hcacsimp._get_cache_file_name("function_b") +print(f" Cache file: {cache_file_b}") + +# %% [markdown] +# +# ## 7. Excluding Keys from Cache +# +# Some parameters should not affect cache lookup: +# - Session IDs +# - Logger objects +# - Timestamps +# - Random seeds (when you want same result) +# +# Use `exclude_keys` to ignore these parameters. + + +# %% +@hcacsimp.simple_cache( + cache_type="json", + exclude_keys=["session_id", "timestamp"], +) +def api_call(query: str, session_id: str, timestamp: float) -> str: + """ + Simulate API call where session_id and timestamp don't affect result. + """ + _LOG.info("Making API call for query: %s", query) + time.sleep(1) + return f"Response for: {query}" + + +# %% +# These calls have different session_id and timestamp but return cached result. +start_time = time.time() +result1 = api_call("search python", session_id="abc123", timestamp=1.0) +elapsed_time = time.time() - start_time +print(f"First call: {result1} (time: {elapsed_time:.3f}s)") +start_time = time.time() +result2 = api_call("search python", session_id="xyz789", timestamp=2.0) +elapsed_time = time.time() - start_time +print( + f"Second call (from cache despite different session/timestamp): {result2} (time: {elapsed_time:.6f}s)" +) +# Different query triggers cache miss. +result3 = api_call("search java", session_id="abc123", timestamp=1.0) +print(f"Third call (different query, cache miss): {result3}") + + +# %% [markdown] +# +# ## 8. Runtime Property Modification +# +# All decorator parameters are stored as properties and can be modified at runtime. +# This allows you to change cache behavior without redecorating functions. +# +# **Common use cases:** +# - Disable write-through temporarily for performance +# - Add/remove keys from exclusion list +# - Enable/disable S3 sync dynamically + + +# %% +@hcacsimp.simple_cache(cache_type="json", exclude_keys=["session_id"]) +def api_call(query: str, session_id: str) -> str: + """ + Simulate API call where session_id doesn't affect result. + """ + _LOG.info("Making API call for query=%s", query) + time.sleep(1) + return f"Result for: {query}" + + +# %% +# Demonstrate initial exclude_keys behavior. +print("Initial exclude_keys: ['session_id']") +print("Calling with query='python', session_id='abc'...") +start_time = time.time() +result1 = api_call("python", session_id="abc") +elapsed1 = time.time() - start_time +print(f"Result: {result1} (time: {elapsed1:.3f}s)") +# Same query, different session_id - should hit cache. +print("\nCalling with query='python', session_id='xyz' (different session_id)...") +start_time = time.time() +result2 = api_call("python", session_id="xyz") +elapsed2 = time.time() - start_time +print(f"Result: {result2} (time: {elapsed2:.6f}s - cache hit!)") + +# %% +# Now modify exclude_keys to REMOVE session_id from exclusion. +print("\nModifying exclude_keys to [] (empty - don't exclude session_id)") +hcacsimp.set_cache_property("api_call", "exclude_keys", []) +# Verify change. +exclude_keys_after = hcacsimp.get_cache_property("api_call", "exclude_keys") +print(f"exclude_keys now: {exclude_keys_after}") +# Now same query with different session_id creates NEW cache entry. +print( + "\nCalling with query='python', session_id='new123' (after modification)..." +) +start_time = time.time() +result3 = api_call("python", session_id="new123") +elapsed3 = time.time() - start_time +print(f"Result: {result3} (time: {elapsed3:.3f}s - cache miss, computed new!)") + +# %% [markdown] +# +# ## 9. S3 Integration +# +# **Note:** These examples are commented out because they require AWS credentials. +# Uncomment and configure to use S3 caching. +# +# **S3 as Third Storage Layer:** +# - S3 is integrated into the cache lookup as the third tier: Memory → Disk → S3 +# - When `get_cache()` is called, it automatically checks all three layers +# - A cache "miss" only occurs if key not found in ANY layer +# +# **S3 Features:** +# - `auto_sync_s3=True`: Automatically upload cache updates to S3 +# - Auto-pull: Automatically checks S3 as part of cache lookup (one-time per function) +# - Manual cache operations: Use `push_cache_to_s3()` to manually upload, `pull_cache_from_s3()` to manually download and `sync_cache_with_s3()` to manually cache files between S3 and disk +# +# **Usage:** +# 1. Configure S3 globally or per-function +# 2. First call on any machine computes and uploads to S3 +# 3. Other machines automatically check S3 during cache lookup +# 4. Updates are automatically synced to S3 (if `auto_sync_s3=True`) + +# %% +# # Global S3 configuration (applies to all cached functions). +# hcacsimp.set_s3_bucket("s3://my-team-bucket") +# hcacsimp.set_s3_prefix("cache/shared") +# hcacsimp.set_aws_profile("my-aws-profile") +# +# @hcacsimp.simple_cache( +# cache_type="json", +# auto_sync_s3=True, # Auto-upload to S3 after cache updates on disk. +# ) +# def expensive_llm_call(prompt: str) -> str: +# """ +# Simulate expensive LLM API call. +# """ +# time.sleep(3) +# return f"LLM response to: {prompt}" +# +# # First call on any machine - computes and uploads to S3. +# result = expensive_llm_call("Summarize this document") +# print(f"Result: {result}") +# +# # On another machine - S3 is automatically checked during cache lookup. +# # get_cache() checks: memory → disk → S3. +# result = expensive_llm_call("Summarize this document") +# print(f"Result from cache: {result}") + +# %% +# # Per-function S3 configuration (overrides global settings). +# @hcacsimp.simple_cache( +# cache_type="json", +# s3_bucket="s3://project-specific-bucket", +# s3_prefix="cache/llm", +# aws_profile="project-profile", +# auto_sync_s3=True, +# ) +# def project_specific_cache(data: str) -> str: +# return f"Processed: {data}" +# +# result = project_specific_cache("test data") + +# %% [markdown] +# +# ## 10. Binary Data with Pickle +# +# For complex Python objects (DataFrames, models, etc.), use pickle format: +# - `cache_type="pickle"`: Stores any Python object +# - Supports DataFrames, numpy arrays, custom classes, etc. +# - Trade-off: Not human-readable like JSON + + +# %% +@hcacsimp.simple_cache(cache_type="pickle") +def create_dataframe(rows: int) -> pd.DataFrame: + """ + Create a DataFrame (can't be cached as JSON easily). + """ + _LOG.info("Creating DataFrame with %s rows...", rows) + time.sleep(1) + return pd.DataFrame( + { + "id": range(rows), + "value": [x**2 for x in range(rows)], + } + ) + + +# %% +# First call - computes and caches DataFrame. +start_time = time.time() +df = create_dataframe(5) +elapsed_time = time.time() - start_time +print("First call:") +print(df) +print(f"Time taken: {elapsed_time:.3f} seconds") +# Second call - returns cached DataFrame instantly. +start_time = time.time() +df = create_dataframe(5) +elapsed_time = time.time() - start_time +print("\nSecond call (from cache):") +print(df) +print(f"Time taken: {elapsed_time:.6f} seconds (from cache!)") + +# %% [markdown] +# +# ## Summary +# +# The `hcache_simple` module provides: +# - **Easy caching**: Just add `@simple_cache` decorator +# - **Multiple storage layers**: Memory (fast) → Disk (persistent) → S3 (shared) +# - **Flexible configuration**: Global and per-function settings +# - **Runtime modification**: Change cache behavior without redecorating functions +# - **Performance monitoring**: Track cache efficiency +# - **Team collaboration**: Share caches via S3 with auto-pull +# - **Format support**: JSON (human-readable) or pickle (binary) +# +# For full documentation, see: `docs/tools/helpers/all.hcache_simple.explanation.md` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb new file mode 100644 index 000000000..7b505f87e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb @@ -0,0 +1,424 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", + "metadata": {}, + "source": [ + "CONTENTS:\n", + "- [hgoogle_file_api.py](#hgoogle_file_api.py)\n", + " - [Get Credentials for your drive](#get-credentials-for-your-drive)\n", + " - [Get Tab/Sheet id of a particular google sheet](#get-tab/sheet-id-of-a-particular-google-sheet)\n", + " - [Freeze Rows](#freeze-rows)\n", + " - [Change the height of certin rows](#change-the-height-of-certin-rows)\n", + " - [Read some nice data](#read-some-nice-data)\n", + " - [Write this nice data](#write-this-nice-data)" + ] + }, + { + "cell_type": "markdown", + "id": "982ab891-de0a-47d5-946a-0f4fd3f16307", + "metadata": {}, + "source": [ + "\n", + "# hgoogle_file_api.py" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6b997caf-4bfc-47bc-b7e1-584f02da328f", + "metadata": {}, + "outputs": [], + "source": [ + "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade google-api-python-client)\"\n", + "# !sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade pip install oauth2client)\"\n", + "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade gspread)\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0fdf8a01-00ed-4e40-8b8b-3e4ecfe37d45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import importlib\n", + "import helpers.hgoogle_drive_api as hgodrapi\n", + "\n", + "importlib.reload(hgodrapi)" + ] + }, + { + "cell_type": "markdown", + "id": "f9733115-f65b-43fb-8b56-32be7588c617", + "metadata": {}, + "source": [ + "\n", + "## Get Credentials for your drive" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0f3eb12a-bd7e-4846-a8f0-331ece997137", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "google_creds = hgodrapi.get_credentials()\n", + "print(google_creds)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "feb74dae-ff52-44ce-b698-4c04cc2bc8f3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "service = hgodrapi.get_sheets_service(google_creds)\n", + "print(service)" + ] + }, + { + "cell_type": "markdown", + "id": "9e1c8840-c759-4bd6-a2c5-f30d94daf72b", + "metadata": {}, + "source": [ + "\n", + "## Get Tab/Sheet id of a particular google sheet" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "67fe7cc1-0f90-4b45-b93d-c6eaecd25028", + "metadata": {}, + "outputs": [], + "source": [ + "tab_name = \"cleaned_profiles_1\"\n", + "url = \"https://docs.google.com/spreadsheets/d/1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA/edit?gid=1687996260#gid=1687996260\"\n", + "sheet_id = \"1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA\"\n", + "credentials = google_creds" + ] + }, + { + "cell_type": "markdown", + "id": "f18db947-8170-4cba-8799-dfe792e1c732", + "metadata": {}, + "source": [ + "\n", + "## Freeze Rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "232a1ee0-83d2-4449-a8c0-a8e8eca02fc5", + "metadata": {}, + "outputs": [], + "source": [ + "row_indices = [0, 1, 2]\n", + "hgodrapi.freeze_rows(\n", + " credentials,\n", + " sheet_id=sheet_id,\n", + " row_indices=row_indices,\n", + " tab_name=tab_name,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "dd9b9b7d-2dc6-416d-bd9c-a8039fadaba2", + "metadata": {}, + "source": [ + "\n", + "## Change the height of certin rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50db6e3d-8d05-47ea-9ace-dc79ce131f37", + "metadata": {}, + "outputs": [], + "source": [ + "hgodrapi.set_row_height(\n", + " google_creds,\n", + " sheet_id=sheet_id,\n", + " height=20,\n", + " start_index=0,\n", + " end_index=2,\n", + " tab_name=tab_name,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3569d048-d69e-4e4b-ab53-a93b6f4a41d1", + "metadata": {}, + "source": [ + "\n", + "## Read some nice data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e63bede3-2948-4a37-b444-36b4dba81c6d", + "metadata": {}, + "outputs": [], + "source": [ + "nice_data = hgodrapi.from_gsheet(google_creds, url, tab_name=tab_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "59233081-ac03-4ac7-96b1-4de1b07fae75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameTitleFirmLocationPersonal Investment FocusGeographic FocusTypical Deal SizePreferred StagesNotable Personal AI InvestmentsPotential Fit with KaizenfirstNamelastName
0Michael MoritzManaging PartnerSequoia CapitalMenlo ParkAI/MLGlobal$10MEarly to GrowthGoogleHigh'''MichaelMoritz
1Navid AlipourManaging PartnerAnalytics VenturesSan DiegoAI/MLSan Diego$5MSeedCureMetrixHigh''NavidAlipour
2Aaref HilalyPartnerBain Capital VenturesPalo AltoReal-time AnalyticsBurlingameSeedSeed/Early StageRubrikMediumAarefHilaly
3Aaron FleishmanPrincipalTola CapitalSeattle WAEnterprise AIPNW$5M-$20MSeries ADatabricksHighAaronFleishman
4Aaron JacobsonPartnerNew Enterprise AssociatesMenlo ParkMLOpsNorth AmericaSeries A$10M-$30MDatabricksHighAaronJacobson
\n", + "
" + ], + "text/plain": [ + " Name Title Firm Location \\\n", + "0 Michael Moritz Managing Partner Sequoia Capital Menlo Park \n", + "1 Navid Alipour Managing Partner Analytics Ventures San Diego \n", + "2 Aaref Hilaly Partner Bain Capital Ventures Palo Alto \n", + "3 Aaron Fleishman Principal Tola Capital Seattle WA \n", + "4 Aaron Jacobson Partner New Enterprise Associates Menlo Park \n", + "\n", + " Personal Investment Focus Geographic Focus Typical Deal Size \\\n", + "0 AI/ML Global $10M \n", + "1 AI/ML San Diego $5M \n", + "2 Real-time Analytics Burlingame Seed \n", + "3 Enterprise AI PNW $5M-$20M \n", + "4 MLOps North America Series A \n", + "\n", + " Preferred Stages Notable Personal AI Investments Potential Fit with Kaizen \\\n", + "0 Early to Growth Google High''' \n", + "1 Seed CureMetrix High'' \n", + "2 Seed/Early Stage Rubrik Medium \n", + "3 Series A Databricks High \n", + "4 $10M-$30M Databricks High \n", + "\n", + " firstName lastName \n", + "0 Michael Moritz \n", + "1 Navid Alipour \n", + "2 Aaref Hilaly \n", + "3 Aaron Fleishman \n", + "4 Aaron Jacobson " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nice_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "30dcc791-cbdb-45f1-9298-a74e0a7babab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(100, 12)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nice_data.shape" + ] + }, + { + "cell_type": "markdown", + "id": "5c4cafb4-fe5f-4f6e-b594-759b199acb7e", + "metadata": {}, + "source": [ + "\n", + "## Write this nice data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b1f4a89-cb96-417a-86f4-ebc513c18510", + "metadata": {}, + "outputs": [], + "source": [ + "hgodrapi.to_gsheet(google_creds, nice_data, url, tab_name=\"testing_tab\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py new file mode 100644 index 000000000..a76ac9e94 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py @@ -0,0 +1,107 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# CONTENTS: +# - [hgoogle_file_api.py](#hgoogle_file_api.py) +# - [Get Credentials for your drive](#get-credentials-for-your-drive) +# - [Get Tab/Sheet id of a particular google sheet](#get-tab/sheet-id-of-a-particular-google-sheet) +# - [Freeze Rows](#freeze-rows) +# - [Change the height of certin rows](#change-the-height-of-certin-rows) +# - [Read some nice data](#read-some-nice-data) +# - [Write this nice data](#write-this-nice-data) + +# %% [markdown] +# +# # hgoogle_file_api.py + +# %% +# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade google-api-python-client)" +# # !sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade pip install oauth2client)" +# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade gspread)" + +# %% +import importlib +import helpers.hgoogle_drive_api as hgodrapi + +importlib.reload(hgodrapi) + +# %% [markdown] +# +# ## Get Credentials for your drive + +# %% +google_creds = hgodrapi.get_credentials() +print(google_creds) + +# %% +service = hgodrapi.get_sheets_service(google_creds) +print(service) + +# %% [markdown] +# +# ## Get Tab/Sheet id of a particular google sheet + +# %% +tab_name = "cleaned_profiles_1" +url = "https://docs.google.com/spreadsheets/d/1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA/edit?gid=1687996260#gid=1687996260" +sheet_id = "1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA" +credentials = google_creds + +# %% [markdown] +# +# ## Freeze Rows + +# %% +row_indices = [0, 1, 2] +hgodrapi.freeze_rows( + credentials, + sheet_id=sheet_id, + row_indices=row_indices, + tab_name=tab_name, +) + +# %% [markdown] +# +# ## Change the height of certin rows + +# %% +hgodrapi.set_row_height( + google_creds, + sheet_id=sheet_id, + height=20, + start_index=0, + end_index=2, + tab_name=tab_name, +) + +# %% [markdown] +# +# ## Read some nice data + +# %% +nice_data = hgodrapi.from_gsheet(google_creds, url, tab_name=tab_name) + +# %% +nice_data.head() + +# %% +nice_data.shape + +# %% [markdown] +# +# ## Write this nice data + +# %% +hgodrapi.to_gsheet(google_creds, nice_data, url, tab_name="testing_tab") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb new file mode 100644 index 000000000..3bb70bdef --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb @@ -0,0 +1,13040 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "CONTENTS:\n", + "- [Description](#description)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Description\n", + "\n", + "This notebook examines ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet jupyterlab-vim)\"\n", + "#!jupyter labextension enable" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2021-04-02T18:11:14.828251Z", + "start_time": "2021-04-02T18:11:14.514771Z" + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import logging\n", + "\n", + "import helpers.hdbg as hdbg\n", + "import helpers.henv as henv" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2021-04-02T18:11:24.635995Z", + "start_time": "2021-04-02T18:11:18.239237Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# System signature\n", + " # Container version\n", + " container_version='1.2.0'\n", + " changelog_version='2.0.0'\n", + " # Git info\n", + " branch_name='CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI'\n", + " hash='0ca93d8c'\n", + " # Last commits:\n", + " * 0ca93d8c GP Saggese Merge ( 5 minutes ago) Fri May 9 22:09:03 2025 (HEAD -> CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI, origin/CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI)\n", + " |\\ \n", + " * | 99cbbf22 GP Saggese Lint ( 6 minutes ago) Fri May 9 22:08:07 2025 \n", + " | * 27b38c48 GP Saggese CmampTask12067_Read_docs_about_DataPull_4 (#698) ( 8 minutes ago) Fri May 9 22:06:25 2025 (origin/master, origin/HEAD, master)\n", + " # Platform info\n", + " system=Linux\n", + " node name=0f79e8b845ee\n", + " release=6.10.14-linuxkit\n", + " version=#1 SMP Thu Mar 20 16:32:56 UTC 2025\n", + " machine=aarch64\n", + " processor=aarch64\n", + " # psutils info\n", + " cpu count=8\n", + " cpu freq=None\n", + " memory=svmem(total=16749285376, available=14575529984, percent=13.0, used=1910644736, free=9673363456, active=2843516928, inactive=3252117504, buffers=490647552, cached=4674629632, shared=1093632, slab=694362112)\n", + " disk usage=sdiskusage(total=270233210880, used=102272610304, free=154199986176, percent=39.9)\n", + " # Docker info\n", + " has_docker=True\n", + " docker_version='28.0.4'\n", + " docker_needs_sudo=False\n", + " has_privileged_mode=True\n", + " is_inside_docker=True\n", + " has_docker_sibling_containers_support=True\n", + " has_docker_children_containers_support=True\n", + " # Packages\n", + " python: 3.12.3\n", + " cvxopt: ?\n", + " cvxpy: ?\n", + " gluonnlp: ?\n", + " gluonts: ?\n", + " joblib: 1.4.2\n", + " mxnet: ?\n", + " numpy: 2.2.3\n", + " pandas: 2.2.3\n", + " pyarrow: 19.0.1\n", + " scipy: 1.15.2\n", + " seaborn: 0.13.2\n", + " sklearn: 1.6.1\n", + " statsmodels: 0.14.4\n" + ] + } + ], + "source": [ + "print(henv.get_system_signature()[0])\n", + "\n", + "hnotebook.config_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2021-04-02T18:11:24.668793Z", + "start_time": "2021-04-02T18:11:24.638503Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Running in Jupyter\n", + "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-0f2f4a10-7f18-4858-af02-b60808101345.json'\n" + ] + } + ], + "source": [ + "# hdbg.init_logger(verbosity=logging.DEBUG)\n", + "hdbg.init_logger(verbosity=logging.INFO)\n", + "# hdbg.test_logger()\n", + "_LOG = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet openai requests)\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import helpers.hllm as hllm\n", + "import helpers.hpandas as hpandas" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "val = hllm.get_model_stats()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'architecture': {'input_modalities': ['text', 'image'],\n", + " 'instruct_type': None,\n", + " 'modality': 'text+image->text',\n", + " 'output_modalities': ['text'],\n", + " 'tokenizer': 'Mistral'},\n", + " 'context_length': 131072,\n", + " 'created': 1746627341,\n", + " 'description': 'Mistral Medium 3 is a high-performance enterprise-grade '\n", + " 'language model designed to deliver frontier-level '\n", + " 'capabilities at significantly reduced operational cost. It '\n", + " 'balances state-of-the-art reasoning and multimodal '\n", + " 'performance with 8× lower cost compared to traditional large '\n", + " 'models, making it suitable for scalable deployments across '\n", + " 'professional and industrial use cases.\\n'\n", + " '\\n'\n", + " 'The model excels in domains such as coding, STEM reasoning, '\n", + " 'and enterprise adaptation. It supports hybrid, on-prem, and '\n", + " 'in-VPC deployments and is optimized for integration into '\n", + " 'custom workflows. Mistral Medium 3 offers competitive '\n", + " 'accuracy relative to larger models like Claude Sonnet '\n", + " '3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining '\n", + " 'broad compatibility across cloud environments.',\n", + " 'id': 'mistralai/mistral-medium-3',\n", + " 'name': 'Mistral: Mistral Medium 3',\n", + " 'per_request_limits': None,\n", + " 'pricing': {'completion': '0.000002',\n", + " 'image': '0',\n", + " 'internal_reasoning': '0',\n", + " 'prompt': '0.0000004',\n", + " 'request': '0',\n", + " 'web_search': '0'},\n", + " 'supported_parameters': ['tools',\n", + " 'tool_choice',\n", + " 'max_tokens',\n", + " 'temperature',\n", + " 'top_p',\n", + " 'stop',\n", + " 'frequency_penalty',\n", + " 'presence_penalty',\n", + " 'response_format',\n", + " 'structured_outputs',\n", + " 'seed'],\n", + " 'top_provider': {'context_length': 131072,\n", + " 'is_moderated': False,\n", + " 'max_completion_tokens': None}}\n" + ] + } + ], + "source": [ + "import pprint\n", + "\n", + "pprint.pprint(val[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecreateddescriptioncontext_lengthper_request_limitssupported_parametersarchitecture_modalityarchitecture_input_modalitiesarchitecture_output_modalitiesarchitecture_tokenizerarchitecture_instruct_typepricing_promptpricing_completionpricing_requestpricing_imagepricing_web_searchpricing_internal_reasoningtop_provider_context_lengthtop_provider_max_completion_tokenstop_provider_is_moderatedpricing_input_cache_readpricing_input_cache_write
0mistralai/mistral-medium-3Mistral: Mistral Medium 31746627341Mistral Medium 3 is a high-performance enterpr...131072None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone0.00000040.0000020000131072.0NaNFalseNaNNaN
1google/gemini-2.5-pro-previewGoogle: Gemini 2.5 Pro Preview1746578513Gemini 2.5 Pro is Google’s state-of-the-art AI...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[text, image, file][text]GeminiNone0.000001250.0000100.00516001048576.065535.0False0.000000310.000001625
2arcee-ai/caller-largeArcee AI: Caller Large1746487869Caller Large is Arcee's specialist \"function‑c...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.000000550.00000085000032768.0NaNFalseNaNNaN
3arcee-ai/spotlightArcee AI: Spotlight1746481552Spotlight is a 7‑billion‑parameter vision‑lang...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[image, text][text]OtherNone0.000000180.000000180000131072.065537.0FalseNaNNaN
4arcee-ai/maestro-reasoningArcee AI: Maestro Reasoning1746481269Maestro Reasoning is Arcee's flagship analysis...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000090.00000330000131072.032000.0FalseNaNNaN
5arcee-ai/virtuoso-largeArcee AI: Virtuoso Large1746478885Virtuoso‑Large is Arcee's top‑tier general‑pur...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000750.00000120000131072.064000.0FalseNaNNaN
6arcee-ai/coder-largeArcee AI: Coder Large1746478663Coder‑Large is a 32 B‑parameter offspring of Q...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000050.0000008000032768.0NaNFalseNaNNaN
7arcee-ai/virtuoso-medium-v2Arcee AI: Virtuoso Medium V21746478434Virtuoso‑Medium‑v2 is a 32 B model distilled f...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000050.00000080000131072.032768.0FalseNaNNaN
8arcee-ai/arcee-blitzArcee AI: Arcee Blitz1746470100Arcee Blitz is a 24 B‑parameter dense model di...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000450.00000075000032768.0NaNFalseNaNNaN
9microsoft/phi-4-reasoning-plus:freeMicrosoft: Phi 4 Reasoning Plus (free)1746130961Phi-4-reasoning-plus is an enhanced 14B parame...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
10microsoft/phi-4-reasoning-plusMicrosoft: Phi 4 Reasoning Plus1746130961Phi-4-reasoning-plus is an enhanced 14B parame...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.000000070.00000035000032768.0NaNFalseNaNNaN
11microsoft/phi-4-reasoning:freeMicrosoft: Phi 4 Reasoning (free)1746121275Phi-4-reasoning is a 14B parameter dense decod...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
12qwen/qwen3-0.6b-04-28:freeQwen: Qwen3 0.6B (free)1746043526Qwen3-0.6B is a lightweight, 0.6 billion param...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000032000.0NaNFalseNaNNaN
13inception/mercury-coder-small-betaInception: Mercury Coder Small Beta1746033880Mercury Coder Small is the first diffusion lar...32000None[max_tokens, frequency_penalty, presence_penal...text->text[text][text]OtherNone0.000000250.000001000032000.0NaNFalseNaNNaN
14qwen/qwen3-1.7b:freeQwen: Qwen3 1.7B (free)1746031388Qwen3-1.7B is a compact, 1.7 billion parameter...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000032000.0NaNFalseNaNNaN
15qwen/qwen3-4b:freeQwen: Qwen3 4B (free)1746031104Qwen3-4B is a 4 billion parameter dense langua...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None000000128000.0NaNFalseNaNNaN
16opengvlab/internvl3-14b:freeOpenGVLab: InternVL3 14B (free)1746021355The 14b version of the InternVL3 series. An ad...32000None[max_tokens, temperature, top_p]text+image->text[image, text][text]OtherNone00000032000.0NaNFalseNaNNaN
17opengvlab/internvl3-2b:freeOpenGVLab: InternVL3 2B (free)1746019807The 2b version of the InternVL3 series, for an...32000None[max_tokens, temperature, top_p]text+image->text[image, text][text]OtherNone00000032000.0NaNFalseNaNNaN
18deepseek/deepseek-prover-v2:freeDeepSeek: DeepSeek Prover V2 (free)1746013094DeepSeek Prover V2 is a 671B parameter model, ...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
19deepseek/deepseek-prover-v2DeepSeek: DeepSeek Prover V21746013094DeepSeek Prover V2 is a 671B parameter model, ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone0.00000050.000002180000131072.0NaNFalseNaNNaN
20meta-llama/llama-guard-4-12bMeta: Llama Guard 4 12B1745975193Llama Guard 4 is a Llama 4 Scout-derived multi...163840None[max_tokens, temperature, top_p, stop, frequen...text+image->text[image, text][text]OtherNone0.000000050.000000050000163840.0NaNFalseNaNNaN
21qwen/qwen3-30b-a3b:freeQwen: Qwen3 30B A3B (free)1745878604Qwen3, the latest generation in the Qwen large...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
22qwen/qwen3-30b-a3bQwen: Qwen3 30B A3B1745878604Qwen3, the latest generation in the Qwen large...40960None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Qwen3None0.00000010.0000003000040960.040960.0FalseNaNNaN
23qwen/qwen3-8b:freeQwen: Qwen3 8B (free)1745876632Qwen3-8B is a dense 8.2B parameter causal lang...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.040960.0FalseNaNNaN
24qwen/qwen3-8bQwen: Qwen3 8B1745876632Qwen3-8B is a dense 8.2B parameter causal lang...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.0000000350.0000001380000128000.0NaNFalseNaNNaN
25qwen/qwen3-14b:freeQwen: Qwen3 14B (free)1745876478Qwen3-14B is a dense 14.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
26qwen/qwen3-14bQwen: Qwen3 14B1745876478Qwen3-14B is a dense 14.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.000000070.00000024000040960.040960.0FalseNaNNaN
27qwen/qwen3-32b:freeQwen: Qwen3 32B (free)1745875945Qwen3-32B is a dense 32.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
28qwen/qwen3-32bQwen: Qwen3 32B1745875945Qwen3-32B is a dense 32.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.00000010.0000003000040960.0NaNFalseNaNNaN
29qwen/qwen3-235b-a22b:freeQwen: Qwen3 235B A22B (free)1745875757Qwen3-235B-A22B is a 235B parameter mixture-of...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
30qwen/qwen3-235b-a22bQwen: Qwen3 235B A22B1745875757Qwen3-235B-A22B is a 235B parameter mixture-of...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.000000140.000002000040960.040960.0FalseNaNNaN
31tngtech/deepseek-r1t-chimera:freeTNG: DeepSeek R1T Chimera (free)1745760875DeepSeek-R1T-Chimera is created by merging Dee...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
32thudm/glm-z1-rumination-32bTHUDM: GLM Z1 Rumination 32B1745601495THUDM: GLM Z1 Rumination 32B is a 32B-paramete...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000240.00000024000032000.0NaNFalseNaNNaN
33thudm/glm-z1-9b:freeTHUDM: GLM Z1 9B (free)1745601140GLM-Z1-9B-0414 is a 9B-parameter language mode...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032000.0NaNFalseNaNNaN
34thudm/glm-4-9b:freeTHUDM: GLM 4 9B (free)1745601023GLM-4-9B-0414 is a 9 billion parameter languag...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032000.0NaNFalseNaNNaN
35microsoft/mai-ds-r1:freeMicrosoft: MAI DS R1 (free)1745194100MAI-DS-R1 is a post-trained variant of DeepSee...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
36thudm/glm-z1-32b:freeTHUDM: GLM Z1 32B (free)1744924148GLM-Z1-32B-0414 is an enhanced reasoning varia...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
37thudm/glm-z1-32bTHUDM: GLM Z1 32B1744924148GLM-Z1-32B-0414 is an enhanced reasoning varia...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000240.00000024000032000.0NaNFalseNaNNaN
38thudm/glm-4-32b:freeTHUDM: GLM 4 32B (free)1744920915GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
39thudm/glm-4-32bTHUDM: GLM 4 32B1744920915GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000240.00000024000032000.0NaNFalseNaNNaN
40google/gemini-2.5-flash-previewGoogle: Gemini 2.5 Flash Preview1744914667Gemini 2.5 Flash is Google's state-of-the-art ...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[image, text, file][text]GeminiNone0.000000150.000000600.0006192001048576.065535.0False0.00000003750.0000002333
41google/gemini-2.5-flash-preview:thinkingGoogle: Gemini 2.5 Flash Preview (thinking)1744914667Gemini 2.5 Flash is Google's state-of-the-art ...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[image, text, file][text]GeminiNone0.000000150.000003500.0006192001048576.065535.0False0.00000003750.0000002333
42openai/o4-mini-highOpenAI: o4 Mini High1744824212OpenAI o4-mini-high is the same model as [o4-m...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text, file][text]OtherNone0.00000110.000004400.000841500200000.0100000.0True0.000000275NaN
43openai/o3OpenAI: o31744823457o3 is a well-rounded and powerful model across...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text, file][text]OtherNone0.000010.0000400.0076500200000.0100000.0True0.0000025NaN
44openai/o4-miniOpenAI: o4 Mini1744820942OpenAI o4-mini is a compact reasoning model in...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text][text]OtherNone0.00000110.000004400.000841500200000.0100000.0True0.000000275NaN
45shisa-ai/shisa-v2-llama3.3-70b:freeShisa AI: Shisa V2 Llama 3.3 70B (free)1744754858Shisa V2 Llama 3.3 70B is a bilingual Japanese...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3None00000032768.0NaNFalseNaNNaN
46qwen/qwen2.5-coder-7b-instructQwen: Qwen2.5 Coder 7B Instruct1744734887Qwen2.5-Coder-7B-Instruct is a 7B parameter in...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]QwenNone0.000000010.00000003000032768.0NaNFalseNaNNaN
47openai/gpt-4.1OpenAI: GPT-4.11744651385GPT-4.1 is a flagship large language model opt...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.0000020.00000800001047576.032768.0True0.0000005NaN
48openai/gpt-4.1-miniOpenAI: GPT-4.1 Mini1744651381GPT-4.1 Mini is a mid-sized model delivering p...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.00000040.000001600001047576.032768.0True0.0000001NaN
49openai/gpt-4.1-nanoOpenAI: GPT-4.1 Nano1744651369For tasks that demand low latency, GPT‑4.1 nan...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.00000010.000000400001047576.032768.0True0.000000025NaN
50eleutherai/llemma_7bEleutherAI: Llemma 7b1744643225Llemma 7B is a language model for mathematics....4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Othercode-llama0.00000080.000001200004096.04096.0FalseNaNNaN
51alfredpros/codellama-7b-instruct-solidityAlfredPros: CodeLLaMa 7B Instruct Solidity1744641874A finetuned 7 billion parameters Code LLaMA - ...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otheralpaca0.00000080.000001200004096.04096.0FalseNaNNaN
52arliai/qwq-32b-arliai-rpr-v1:freeArliAI: QwQ 32B RpR v1 (free)1744555982QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
53agentica-org/deepcoder-14b-preview:freeAgentica: Deepcoder 14B Preview (free)1744555395DeepCoder-14B-Preview is a 14B parameter code ...96000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000096000.0NaNFalseNaNNaN
54moonshotai/kimi-vl-a3b-thinking:freeMoonshot AI: Kimi VL A3B Thinking (free)1744304841Kimi-VL is a lightweight Mixture-of-Experts vi...131072None[max_tokens, temperature, top_p, reasoning, in...text+image->text[image, text][text]OtherNone000000131072.0NaNFalseNaNNaN
55x-ai/grok-3-mini-betaxAI: Grok 3 Mini Beta1744240195Grok 3 Mini is a lightweight, smaller thinking...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.00000030.00000050000131072.0NaNFalseNaNNaN
56x-ai/grok-3-betaxAI: Grok 3 Beta1744240068Grok 3 is the latest model from xAI. It's thei...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000030.0000150000131072.0NaNFalseNaNNaN
57nvidia/llama-3.3-nemotron-super-49b-v1:freeNVIDIA: Llama 3.3 Nemotron Super 49B v1 (free)1744119494Llama-3.3-Nemotron-Super-49B-v1 is a large lan...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone000000131072.0NaNFalseNaNNaN
58nvidia/llama-3.3-nemotron-super-49b-v1NVIDIA: Llama 3.3 Nemotron Super 49B v11744119494Llama-3.3-Nemotron-Super-49B-v1 is a large lan...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000130.00000040000131072.0NaNFalseNaNNaN
59nvidia/llama-3.1-nemotron-ultra-253b-v1:freeNVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free)1744115059Llama-3.1-Nemotron-Ultra-253B-v1 is a large la...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3None000000131072.0NaNFalseNaNNaN
60meta-llama/llama-4-maverick:freeMeta: Llama 4 Maverick (free)1743881822Llama 4 Maverick 17B Instruct (128E) is a high...256000None[max_tokens, temperature, top_p, structured_ou...text+image->text[text, image][text]OtherNone000000256000.0NaNFalseNaNNaN
61meta-llama/llama-4-maverickMeta: Llama 4 Maverick1743881822Llama 4 Maverick 17B Instruct (128E) is a high...1048576None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0.000000170.000000600.0006684001048576.016384.0FalseNaNNaN
62meta-llama/llama-4-scout:freeMeta: Llama 4 Scout (free)1743881519Llama 4 Scout 17B Instruct (16E) is a mixture-...512000None[max_tokens, temperature, top_p, structured_ou...text+image->text[text, image][text]OtherNone000000512000.0NaNFalseNaNNaN
63meta-llama/llama-4-scoutMeta: Llama 4 Scout1743881519Llama 4 Scout 17B Instruct (16E) is a mixture-...1048576None[max_tokens, temperature, top_p, presence_pena...text+image->text[text, image][text]OtherNone0.000000080.000000300001048576.01048576.0FalseNaNNaN
64all-hands/openhands-lm-32b-v0.1OpenHands LM 32B V0.11743613013OpenHands LM v0.1 is a 32B open-source coding ...16384None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.00000260.0000034000016384.04096.0FalseNaNNaN
65mistral/ministral-8bMistral: Ministral 8B1743430021Ministral 8B is a state-of-the-art language mo...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000010.00000010000131072.0NaNFalseNaNNaN
66deepseek/deepseek-v3-base:freeDeepSeek: DeepSeek V3 Base (free)1743272023Note that this is a base model mostly meant fo...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
67scb10x/llama3.1-typhoon2-8b-instructTyphoon2 8B Instruct1743196511Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000180.0000001800008192.0NaNFalseNaNNaN
68scb10x/llama3.1-typhoon2-70b-instructTyphoon2 70B Instruct1743196170Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000880.0000008800008192.0NaNFalseNaNNaN
69allenai/molmo-7b-d:freeAllenAI: Molmo 7B D (free)1743023247Molmo is a family of open vision-language mode...4096None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0000004096.0NaNFalseNaNNaN
70bytedance-research/ui-tars-72b:freeBytedance: UI-TARS 72B (free)1743020065UI-TARS 72B is an open-source multimodal AI mo...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone00000032768.0NaNFalseNaNNaN
71qwen/qwen2.5-vl-3b-instruct:freeQwen: Qwen2.5 VL 3B Instruct (free)1743014573Qwen2.5 VL 3B is a multimodal LLM from the Qwe...64000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone00000064000.0NaNFalseNaNNaN
72google/gemini-2.5-pro-exp-03-25Google: Gemini 2.5 Pro Experimental1742922099Gemini 2.5 Pro is Google’s state-of-the-art AI...1000000None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[text, image, file][text]GeminiNone0000001000000.065535.0FalseNaNNaN
73qwen/qwen2.5-vl-32b-instruct:freeQwen: Qwen2.5 VL 32B Instruct (free)1742839838Qwen2.5-VL-32B is a multimodal vision-language...8192None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0000008192.0NaNFalseNaNNaN
74qwen/qwen2.5-vl-32b-instructQwen: Qwen2.5 VL 32B Instruct1742839838Qwen2.5-VL-32B is a multimodal vision-language...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000090.00000090000128000.0NaNFalseNaNNaN
75deepseek/deepseek-chat-v3-0324:freeDeepSeek: DeepSeek V3 0324 (free)1742824755DeepSeek V3, a 685B-parameter, mixture-of-expe...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
76deepseek/deepseek-chat-v3-0324DeepSeek: DeepSeek V3 03241742824755DeepSeek V3, a 685B-parameter, mixture-of-expe...163840None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]DeepSeekNone0.00000030.000000880000163840.0NaNFalseNaNNaN
77featherless/qwerky-72b:freeQwerky 72B (free)1742481597Qwerky-72B is a linear-attention RWKV variant ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.04096.0FalseNaNNaN
78openai/o1-proOpenAI: o1-pro1742423211The o1 series of models are trained with reinf...200000None[max_tokens, temperature, top_p, reasoning, in...text+image->text[text, image][text]GPTNone0.000150.000600.2167500200000.0100000.0TrueNaNNaN
79mistralai/mistral-small-3.1-24b-instruct:freeMistral: Mistral Small 3.1 24B (free)1742238937Mistral Small 3.1 24B Instruct is an upgraded ...96000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone00000096000.096000.0FalseNaNNaN
80mistralai/mistral-small-3.1-24b-instructMistral: Mistral Small 3.1 24B1742238937Mistral Small 3.1 24B Instruct is an upgraded ...131072None[max_tokens, temperature, top_p, presence_pena...text+image->text[text, image][text]MistralNone0.000000050.000000150000131072.0NaNFalseNaNNaN
81open-r1/olympiccoder-32b:freeOlympicCoder 32B (free)1742077228OlympicCoder-32B is a high-performing open-sou...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
82google/gemma-3-1b-it:freeGoogle: Gemma 3 1B (free)1741963556Gemma 3 1B is the smallest of the new Gemma 3 ...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma00000032768.08192.0FalseNaNNaN
83google/gemma-3-4b-it:freeGoogle: Gemma 3 4B (free)1741905510Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma000000131072.08192.0FalseNaNNaN
84google/gemma-3-4b-itGoogle: Gemma 3 4B1741905510Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.000000020.000000040000131072.0NaNFalseNaNNaN
85ai21/jamba-1.6-largeAI21: Jamba 1.6 Large1741905173AI21 Jamba Large 1.6 is a high-performance hyb...256000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.0000020.0000080000256000.04096.0FalseNaNNaN
86ai21/jamba-1.6-miniAI21: Jamba Mini 1.61741905171AI21 Jamba Mini 1.6 is a hybrid foundation mod...256000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.00000020.00000040000256000.04096.0FalseNaNNaN
87google/gemma-3-12b-it:freeGoogle: Gemma 3 12B (free)1741902625Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma000000131072.08192.0FalseNaNNaN
88google/gemma-3-12b-itGoogle: Gemma 3 12B1741902625Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.000000050.00000010000131072.0NaNFalseNaNNaN
89cohere/command-aCohere: Command A1741894342Command A is an open-weights 111B parameter mo...256000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000250.000010000256000.08192.0FalseNaNNaN
90openai/gpt-4o-mini-search-previewOpenAI: GPT-4o-mini Search Preview1741818122GPT-4o mini Search Preview is a specialized mo...128000None[web_search_options, max_tokens, response_form...text->text[text][text]GPTNone0.000000150.00000060.02750.00021700128000.016384.0TrueNaNNaN
91openai/gpt-4o-search-previewOpenAI: GPT-4o Search Preview1741817949GPT-4o Search Previewis a specialized model fo...128000None[web_search_options, max_tokens, response_form...text->text[text][text]GPTNone0.00000250.000010.0350.00361300128000.016384.0TrueNaNNaN
92rekaai/reka-flash-3:freeReka: Flash 3 (free)1741812813Reka Flash 3 is a general-purpose, instruction...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
93google/gemma-3-27b-it:freeGoogle: Gemma 3 27B (free)1741756359Gemma 3 introduces multimodality, supporting v...96000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma00000096000.08192.0FalseNaNNaN
94google/gemma-3-27b-itGoogle: Gemma 3 27B1741756359Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.00000010.000000200.000025600131072.016384.0FalseNaNNaN
95thedrummer/anubis-pro-105b-v1TheDrummer: Anubis Pro 105B V11741642290Anubis Pro 105B v1 is an expanded and refined ...131072None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]OtherNone0.00000080.0000010000131072.0131072.0FalseNaNNaN
96thedrummer/skyfall-36b-v2TheDrummer: Skyfall 36B V21741636566Skyfall 36B v2 is an enhanced iteration of Mis...32768None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]OtherNone0.00000050.0000008000032768.032768.0FalseNaNNaN
97microsoft/phi-4-multimodal-instructMicrosoft: Phi 4 Multimodal Instruct1741396284Phi-4 Multimodal Instruct is a versatile 5.6B ...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0.000000050.000000100.0001768500131072.0NaNFalseNaNNaN
98perplexity/sonar-reasoning-proPerplexity: Sonar Reasoning Pro1741313308Note: Sonar Pro pricing includes Perplexity se...128000None[max_tokens, temperature, top_p, reasoning, in...text+image->text[text, image][text]Otherdeepseek-r10.0000020.000008000.0050128000.0NaNFalseNaNNaN
99perplexity/sonar-proPerplexity: Sonar Pro1741312423Note: Sonar Pro pricing includes Perplexity se...200000None[max_tokens, temperature, top_p, web_search_op...text+image->text[text, image][text]OtherNone0.0000030.000015000.0050200000.08000.0FalseNaNNaN
100perplexity/sonar-deep-researchPerplexity: Sonar Deep Research1741311246Sonar Deep Research is a research-focused mode...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.0000020.000008000.0050.000003128000.0NaNFalseNaNNaN
101deepseek/deepseek-r1-zero:freeDeepSeek: DeepSeek R1 Zero (free)1741297434DeepSeek-R1-Zero is a model trained via large-...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r1000000163840.0NaNFalseNaNNaN
102qwen/qwq-32b:freeQwen: QwQ 32B (free)1741208814QwQ is the reasoning model of the Qwen series....40000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwenqwq00000040000.040000.0FalseNaNNaN
103qwen/qwq-32bQwen: QwQ 32B1741208814QwQ is the reasoning model of the Qwen series....131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwenqwq0.000000150.00000020000131072.0NaNFalseNaNNaN
104moonshotai/moonlight-16b-a3b-instruct:freeMoonshot AI: Moonlight 16B A3B Instruct (free)1740719801Moonlight-16B-A3B-Instruct is a 16B-parameter ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0000008192.0NaNFalseNaNNaN
105nousresearch/deephermes-3-llama-3-8b-preview:freeNous: DeepHermes 3 Llama 3 8B Preview (free)1740719372DeepHermes 3 Preview is the latest version of ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone000000131072.0NaNFalseNaNNaN
106openai/gpt-4.5-previewOpenAI: GPT-4.5 (Preview)1740687810GPT-4.5 (Preview) is a research preview of Ope...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GPTNone0.0000750.0001500.10837500128000.016384.0True0.0000375NaN
107google/gemini-2.0-flash-lite-001Google: Gemini 2.0 Flash Lite1740506212Gemini 2.0 Flash Lite offers a significantly f...1048576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GeminiNone0.0000000750.000000300001048576.08192.0FalseNaNNaN
108anthropic/claude-3.7-sonnetAnthropic: Claude 3.7 Sonnet1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.064000.0False0.00000030.00000375
109anthropic/claude-3.7-sonnet:thinkingAnthropic: Claude 3.7 Sonnet (thinking)1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.064000.0False0.00000030.00000375
110anthropic/claude-3.7-sonnet:betaAnthropic: Claude 3.7 Sonnet (self-moderated)1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[max_tokens, temperature, stop, reasoning, inc...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.0128000.0False0.00000030.00000375
111perplexity/r1-1776Perplexity: R1 17761740004929R1 1776 is a version of DeepSeek-R1 that has b...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r10.0000020.0000080000128000.0NaNFalseNaNNaN
112mistralai/mistral-sabaMistral: Saba1739803239Mistral Saba is a 24B-parameter language model...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000020.0000006000032768.0NaNFalseNaNNaN
113cognitivecomputations/dolphin3.0-r1-mistral-24...Dolphin3.0 R1 Mistral 24B (free)1739462498Dolphin 3.0 R1 is the next generation of the D...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
114cognitivecomputations/dolphin3.0-mistral-24b:freeDolphin3.0 Mistral 24B (free)1739462019Dolphin 3.0 is the next generation of the Dolp...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
115meta-llama/llama-guard-3-8bLlama Guard 3 8B1739401318Llama Guard 3 is a Llama-3.1-8B pretrained mod...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.000000020.000000060000131072.0NaNFalseNaNNaN
116openai/o3-mini-highOpenAI: o3 Mini High1739372611OpenAI o3-mini-high is the same model as [o3-m...200000None[tools, tool_choice, seed, max_tokens, respons...text->text[text][text]OtherNone0.00000110.00000440000200000.0100000.0True0.00000055NaN
117deepseek/deepseek-r1-distill-llama-8bDeepSeek: R1 Distill Llama 8B1738937718DeepSeek R1 Distill Llama 8B is a distilled la...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10.000000040.00000004000032000.032000.0FalseNaNNaN
118google/gemini-2.0-flash-001Google: Gemini 2.0 Flash1738769413Gemini Flash 2.0 offers a significantly faster...1000000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GeminiNone0.00000010.000000400.0000258001000000.08192.0False0.0000000250.0000001833
119qwen/qwen-vl-plusQwen: Qwen VL Plus1738731255Qwen's Enhanced Large Visual Language Model. S...7500None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0.000000210.0000006300.0002688007500.01500.0FalseNaNNaN
120aion-labs/aion-1.0AionLabs: Aion-1.01738697557Aion-1.0 is a multi-model system designed for ...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.0000040.0000080000131072.032768.0FalseNaNNaN
121aion-labs/aion-1.0-miniAionLabs: Aion-1.0-Mini1738697107Aion-1.0-Mini 32B parameter model is a distill...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.00000070.00000140000131072.032768.0FalseNaNNaN
122aion-labs/aion-rp-llama-3.1-8bAionLabs: Aion-RP 1.0 (8B)1738696718Aion-RP-Llama-3.1-8B ranks the highest in the ...32768None[max_tokens, temperature, top_p]text->text[text][text]OtherNone0.00000020.0000002000032768.032768.0FalseNaNNaN
123qwen/qwen-vl-maxQwen: Qwen VL Max1738434304Qwen VL Max is a visual understanding model wi...7500None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0.00000080.000003200.001024007500.01500.0FalseNaNNaN
124qwen/qwen-turboQwen: Qwen-Turbo1738410974Qwen-Turbo, based on Qwen2.5, is a 1M context ...1000000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.000000050.000000200001000000.08192.0FalseNaNNaN
125qwen/qwen2.5-vl-72b-instruct:freeQwen: Qwen2.5 VL 72B Instruct (free)1738410311Qwen2.5-VL is proficient in recognizing common...131072None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone000000131072.02048.0FalseNaNNaN
126qwen/qwen2.5-vl-72b-instructQwen: Qwen2.5 VL 72B Instruct1738410311Qwen2.5-VL is proficient in recognizing common...32000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.000000250.00000075000032000.0NaNFalseNaNNaN
127qwen/qwen-plusQwen: Qwen-Plus1738409840Qwen-Plus, based on the Qwen2.5 foundation mod...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.00000040.00000120000131072.08192.0FalseNaNNaN
128qwen/qwen-maxQwen: Qwen-Max1738402289Qwen-Max, based on Qwen2.5, provides the best ...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.00000160.0000064000032768.08192.0FalseNaNNaN
129openai/o3-miniOpenAI: o3 Mini1738351721OpenAI o3-mini is a cost-efficient language mo...200000None[tools, tool_choice, seed, max_tokens, respons...text->text[text][text]OtherNone0.00000110.00000440000200000.0100000.0True0.00000055NaN
130deepseek/deepseek-r1-distill-qwen-1.5bDeepSeek: R1 Distill Qwen 1.5B1738328067DeepSeek R1 Distill Qwen 1.5B is a distilled l...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000180.000000180000131072.032768.0FalseNaNNaN
131mistralai/mistral-small-24b-instruct-2501:freeMistral: Mistral Small 3 (free)1738255409Mistral Small 3 is a 24B-parameter language mo...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]MistralNone00000032768.0NaNFalseNaNNaN
132mistralai/mistral-small-24b-instruct-2501Mistral: Mistral Small 31738255409Mistral Small 3 is a 24B-parameter language mo...28000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]MistralNone0.000000060.00000012000028000.014000.0FalseNaNNaN
133deepseek/deepseek-r1-distill-qwen-32b:freeDeepSeek: R1 Distill Qwen 32B (free)1738194830DeepSeek R1 Distill Qwen 32B is a distilled la...16000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r100000016000.016000.0FalseNaNNaN
134deepseek/deepseek-r1-distill-qwen-32bDeepSeek: R1 Distill Qwen 32B1738194830DeepSeek R1 Distill Qwen 32B is a distilled la...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r10.000000120.000000180000131072.016384.0FalseNaNNaN
135deepseek/deepseek-r1-distill-qwen-14b:freeDeepSeek: R1 Distill Qwen 14B (free)1738193940DeepSeek R1 Distill Qwen 14B is a distilled la...64000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r100000064000.0NaNFalseNaNNaN
136deepseek/deepseek-r1-distill-qwen-14bDeepSeek: R1 Distill Qwen 14B1738193940DeepSeek R1 Distill Qwen 14B is a distilled la...64000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r10.000000150.00000015000064000.064000.0FalseNaNNaN
137perplexity/sonar-reasoningPerplexity: Sonar Reasoning1738131107Sonar Reasoning is a reasoning model provided ...127000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.0000010.0000050.005000127000.0NaNFalseNaNNaN
138perplexity/sonarPerplexity: Sonar1738013808Sonar is lightweight, affordable, fast, and si...127072None[max_tokens, temperature, top_p, web_search_op...text+image->text[text, image][text]OtherNone0.0000010.0000010.005000127072.0NaNFalseNaNNaN
139liquid/lfm-7bLiquid: LFM 7B1737806883LFM-7B, a new best-in-class language model. LF...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000010.00000001000032768.0NaNFalseNaNNaN
140liquid/lfm-3bLiquid: LFM 3B1737806501Liquid's LFM 3B delivers incredible performanc...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000020.00000002000032768.0NaNFalseNaNNaN
141deepseek/deepseek-r1-distill-llama-70b:freeDeepSeek: R1 Distill Llama 70B (free)1737663169DeepSeek R1 Distill Llama 70B is a distilled l...8192None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10000008192.04096.0FalseNaNNaN
142deepseek/deepseek-r1-distill-llama-70bDeepSeek: R1 Distill Llama 70B1737663169DeepSeek R1 Distill Llama 70B is a distilled l...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10.00000010.00000040000131072.016384.0FalseNaNNaN
143deepseek/deepseek-r1:freeDeepSeek: R1 (free)1737381095DeepSeek R1 is here: Performance on par with [...163840None[max_tokens, reasoning, include_reasoning, tem...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
144deepseek/deepseek-r1DeepSeek: R11737381095DeepSeek R1 is here: Performance on par with [...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r10.00000050.000002180000163840.0163840.0FalseNaNNaN
145minimax/minimax-01MiniMax: MiniMax-011736915462MiniMax-01 is a combines MiniMax-Text-01 for t...1000192None[max_tokens, temperature, top_p]text+image->text[text, image][text]OtherNone0.00000020.000001100001000192.01000192.0FalseNaNNaN
146mistralai/codestral-2501Mistral: Codestral 25011736895522[Mistral](/mistralai)'s cutting-edge language ...262144None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000030.00000090000262144.0NaNFalseNaNNaN
147microsoft/phi-4Microsoft: Phi 41736489872[Microsoft Research](/microsoft) Phi-4 is desi...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000070.00000014000016384.016384.0FalseNaNNaN
148deepseek/deepseek-chat:freeDeepSeek: DeepSeek V3 (free)1735241320DeepSeek-V3 is the latest model from the DeepS...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
149deepseek/deepseek-chatDeepSeek: DeepSeek V31735241320DeepSeek-V3 is the latest model from the DeepS...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone0.000000380.000000890000163840.0163840.0FalseNaNNaN
150sao10k/l3.3-euryale-70bSao10K: Llama 3.3 Euryale 70B1734535928Euryale L3.3 70B is a model focused on creativ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000070.00000080000131072.016384.0FalseNaNNaN
151openai/o1OpenAI: o11734459999The latest and strongest model family from Ope...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[text, image][text]GPTNone0.0000150.0000600.02167500200000.0100000.0True0.0000075NaN
152eva-unit-01/eva-llama-3.33-70bEVA Llama 3.33 70B1734377303EVA Llama 3.33 70b is a roleplay and storywrit...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.0000040.000006000016384.04096.0FalseNaNNaN
153x-ai/grok-2-vision-1212xAI: Grok 2 Vision 12121734237338Grok 2 Vision 1212 advances image-based AI wit...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GrokNone0.0000020.0000100.00360032768.0NaNFalseNaNNaN
154x-ai/grok-2-1212xAI: Grok 2 12121734232814Grok 2 1212 introduces significant enhancement...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000020.000010000131072.0NaNFalseNaNNaN
155cohere/command-r7b-12-2024Cohere: Command R7B (12-2024)1734158152Command R7B (12-2024) is a small, fast update ...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]CohereNone0.00000003750.000000150000128000.04000.0FalseNaNNaN
156google/gemini-2.0-flash-exp:freeGoogle: Gemini 2.0 Flash Experimental (free)1733937523Gemini Flash 2.0 offers a significantly faster...1048576None[max_tokens, temperature, top_p, stop]text+image->text[text, image][text]GeminiNone0000001048576.08192.0FalseNaNNaN
157meta-llama/llama-3.3-70b-instruct:freeMeta: Llama 3.3 70B Instruct (free)1733506137The Meta Llama 3.3 multilingual large language...8000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30000008000.08000.0FalseNaNNaN
158meta-llama/llama-3.3-70b-instructMeta: Llama 3.3 70B Instruct1733506137The Meta Llama 3.3 multilingual large language...131000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000090.000000350000131000.0131000.0FalseNaNNaN
159amazon/nova-lite-v1Amazon: Nova Lite 1.01733437363Amazon Nova Lite 1.0 is a very low-cost multim...300000None[tools, max_tokens, temperature, top_p, top_k,...text+image->text[text, image][text]NovaNone0.000000060.0000002400.0000900300000.05120.0TrueNaNNaN
160amazon/nova-micro-v1Amazon: Nova Micro 1.01733437237Amazon Nova Micro 1.0 is a text-only model tha...128000None[tools, max_tokens, temperature, top_p, top_k,...text->text[text][text]NovaNone0.0000000350.000000140000128000.05120.0TrueNaNNaN
161amazon/nova-pro-v1Amazon: Nova Pro 1.01733436303Amazon Nova Pro 1.0 is a capable multimodal mo...300000None[tools, max_tokens, temperature, top_p, top_k,...text+image->text[text, image][text]NovaNone0.00000080.000003200.001200300000.05120.0TrueNaNNaN
162qwen/qwq-32b-preview:freeQwen: QwQ 32B Preview (free)1732754541QwQ-32B-Preview is an experimental research mo...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwendeepseek-r100000016384.0NaNFalseNaNNaN
163qwen/qwq-32b-previewQwen: QwQ 32B Preview1732754541QwQ-32B-Preview is an experimental research mo...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwendeepseek-r10.000000090.00000027000032768.0NaNFalseNaNNaN
164google/learnlm-1.5-pro-experimental:freeGoogle: LearnLM 1.5 Pro Experimental (free)1732216551An experimental version of [Gemini 1.5 Pro](/g...40960None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone00000040960.08192.0FalseNaNNaN
165eva-unit-01/eva-qwen-2.5-72bEVA Qwen2.5 72B1732210606EVA Qwen2.5 72B is a roleplay and storywriting...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000040.000006000016384.04096.0FalseNaNNaN
166openai/gpt-4o-2024-11-20OpenAI: GPT-4o (2024-11-20)1732127594The 2024-11-20 version of GPT-4o offers a leve...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
167mistralai/mistral-large-2411Mistral Large 24111731978685Mistral Large 2 2411 is an update of [Mistral ...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000131072.0NaNFalseNaNNaN
168mistralai/mistral-large-2407Mistral Large 24071731978415This is Mistral AI's flagship model, Mistral L...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000131072.0NaNFalseNaNNaN
169mistralai/pixtral-large-2411Mistral: Pixtral Large 24111731977388Pixtral Large is a 124B parameter, open-weight...131072None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone0.0000020.00000600.00288800131072.0NaNFalseNaNNaN
170x-ai/grok-vision-betaxAI: Grok Vision Beta1731976624Grok Vision Beta is xAI's experimental languag...8192None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GrokNone0.0000050.00001500.009008192.0NaNFalseNaNNaN
171infermatic/mn-inferor-12bInfermatic: Mistral Nemo Inferor 12B1731464428Inferor 12B is a merge of top roleplay models,...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.00000080.0000012000016384.04096.0FalseNaNNaN
172qwen/qwen-2.5-coder-32b-instruct:freeQwen2.5 Coder 32B Instruct (free)1731368400Qwen2.5-Coder is the latest series of Code-Spe...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.0NaNFalseNaNNaN
173qwen/qwen-2.5-coder-32b-instructQwen2.5 Coder 32B Instruct1731368400Qwen2.5-Coder is the latest series of Code-Spe...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000060.00000015000032768.016384.0FalseNaNNaN
174raifle/sorcererlm-8x22bSorcererLM 8x22B1731105083SorcererLM is an advanced RP and storytelling ...16000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralvicuna0.00000450.0000045000016000.0NaNFalseNaNNaN
175eva-unit-01/eva-qwen-2.5-32bEVA Qwen2.5 32B1731104847EVA Qwen2.5 32B is a roleplaying/storywriting ...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000260.0000034000016384.04096.0FalseNaNNaN
176thedrummer/unslopnemo-12bUnslopnemo 12B1731103448UnslopNemo v4.1 is the latest addition from th...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.000000450.00000045000032000.016000.0FalseNaNNaN
177anthropic/claude-3.5-haiku:betaAnthropic: Claude 3.5 Haiku (self-moderated)1730678400Claude 3.5 Haiku features offers enhanced capa...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0False0.000000080.000001
178anthropic/claude-3.5-haikuAnthropic: Claude 3.5 Haiku1730678400Claude 3.5 Haiku features offers enhanced capa...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0True0.000000080.000001
179anthropic/claude-3.5-haiku-20241022:betaAnthropic: Claude 3.5 Haiku (2024-10-22) (self...1730678400Claude 3.5 Haiku features enhancements across ...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0False0.000000080.000001
180anthropic/claude-3.5-haiku-20241022Anthropic: Claude 3.5 Haiku (2024-10-22)1730678400Claude 3.5 Haiku features enhancements across ...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0True0.000000080.000001
181neversleep/llama-3.1-lumimaid-70bNeverSleep: Lumimaid v0.2 70B1729555200Lumimaid v0.2 70B is a finetune of [Llama 3.1 ...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000150.00000225000016384.02048.0FalseNaNNaN
182anthracite-org/magnum-v4-72bMagnum v4 72B1729555200This is a series of models designed to replica...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000150.00000225000016384.01024.0FalseNaNNaN
183anthropic/claude-3.5-sonnet:betaAnthropic: Claude 3.5 Sonnet (self-moderated)1729555200New Claude 3.5 Sonnet delivers better-than-Opu...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0False0.00000030.00000375
184anthropic/claude-3.5-sonnetAnthropic: Claude 3.5 Sonnet1729555200New Claude 3.5 Sonnet delivers better-than-Opu...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0True0.00000030.00000375
185x-ai/grok-betaxAI: Grok Beta1729382400Grok Beta is xAI's experimental language model...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000050.0000150000131072.0NaNFalseNaNNaN
186mistralai/ministral-8bMistral: Ministral 8B1729123200Ministral 8B is an 8B parameter model featurin...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000010.00000010000128000.0NaNFalseNaNNaN
187mistralai/ministral-3bMistral: Ministral 3B1729123200Ministral 3B is a 3B parameter model optimized...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000040.000000040000131072.0NaNFalseNaNNaN
188qwen/qwen-2.5-7b-instruct:freeQwen2.5 7B Instruct (free)1729036800Qwen2.5 7B is the latest series of Qwen large ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.032768.0FalseNaNNaN
189qwen/qwen-2.5-7b-instructQwen2.5 7B Instruct1729036800Qwen2.5 7B is the latest series of Qwen large ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000050.0000001000032768.016384.0FalseNaNNaN
190nvidia/llama-3.1-nemotron-70b-instructNVIDIA: Llama 3.1 Nemotron 70B Instruct1728950400NVIDIA's Llama 3.1 Nemotron 70B is a language ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000120.00000030000131072.0131072.0FalseNaNNaN
191inflection/inflection-3-productivityInflection: Inflection 3 Productivity1728604800Inflection 3 Productivity is optimized for fol...8000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000250.0000100008000.01024.0FalseNaNNaN
192inflection/inflection-3-piInflection: Inflection 3 Pi1728604800Inflection 3 Pi powers Inflection's [Pi](https...8000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000250.0000100008000.01024.0FalseNaNNaN
193google/gemini-flash-1.5-8bGoogle: Gemini 1.5 Flash 8B1727913600Gemini Flash 1.5 8B is optimized for speed and...1000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.00000003750.0000001500001000000.08192.0False0.000000010.0000000583
194thedrummer/rocinante-12bRocinante 12B1727654400Rocinante 12B is designed for engaging storyte...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000250.0000005000032768.0NaNFalseNaNNaN
195anthracite-org/magnum-v2-72bMagnum v2 72B1727654400From the maker of [Goliath](https://openrouter...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000030.000003000032768.0NaNFalseNaNNaN
196liquid/lfm-40bLiquid: LFM 40B MoE1727654400Liquid's 40.3B Mixture of Experts (MoE) model....32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000150.00000015000032768.0NaNFalseNaNNaN
197meta-llama/llama-3.2-3b-instruct:freeMeta: Llama 3.2 3B Instruct (free)1727222400Llama 3.2 3B is a 3-billion-parameter multilin...20000None[max_tokens, temperature, top_p]text->text[text][text]Llama3llama300000020000.020000.0FalseNaNNaN
198meta-llama/llama-3.2-3b-instructMeta: Llama 3.2 3B Instruct1727222400Llama 3.2 3B is a 3-billion-parameter multilin...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000010.000000020000131072.016384.0FalseNaNNaN
199meta-llama/llama-3.2-1b-instruct:freeMeta: Llama 3.2 1B Instruct (free)1727222400Llama 3.2 1B is a 1-billion-parameter language...131000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama3000000131000.0NaNFalseNaNNaN
200meta-llama/llama-3.2-1b-instructMeta: Llama 3.2 1B Instruct1727222400Llama 3.2 1B is a 1-billion-parameter language...131072None[max_tokens, temperature, top_p, top_k, stop, ...text->text[text][text]Llama3llama30.0000000050.000000010000131072.0NaNFalseNaNNaN
201meta-llama/llama-3.2-90b-vision-instructMeta: Llama 3.2 90B Vision Instruct1727222400The Llama 90B Vision model is a top-tier, 90-b...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama30.00000120.000001200.00173400131072.02048.0FalseNaNNaN
202meta-llama/llama-3.2-11b-vision-instruct:freeMeta: Llama 3.2 11B Vision Instruct (free)1727222400Llama 3.2 11B Vision is a multimodal model wit...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama3000000131072.02048.0FalseNaNNaN
203meta-llama/llama-3.2-11b-vision-instructMeta: Llama 3.2 11B Vision Instruct1727222400Llama 3.2 11B Vision is a multimodal model wit...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama30.0000000490.00000004900.0000794800131072.016384.0FalseNaNNaN
204qwen/qwen-2.5-72b-instruct:freeQwen2.5 72B Instruct (free)1726704000Qwen2.5 72B is the latest series of Qwen large...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.0NaNFalseNaNNaN
205qwen/qwen-2.5-72b-instructQwen2.5 72B Instruct1726704000Qwen2.5 72B is the latest series of Qwen large...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Qwenchatml0.000000120.00000039000032768.016384.0FalseNaNNaN
206qwen/qwen-2.5-vl-72b-instructQwen: Qwen2.5-VL 72B Instruct1726617600Qwen2.5 VL 72B is a multimodal LLM from the Qw...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000060.000000600.0005780032768.0NaNFalseNaNNaN
207neversleep/llama-3.1-lumimaid-8bNeverSleep: Lumimaid v0.2 8B1726358400Lumimaid v0.2 8B is a finetune of [Llama 3.1 8...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000032768.02048.0FalseNaNNaN
208openai/o1-previewOpenAI: o1-preview1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.0000150.000060000128000.032768.0True0.0000075NaN
209openai/o1-preview-2024-09-12OpenAI: o1-preview (2024-09-12)1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.0000150.000060000128000.032768.0True0.0000075NaN
210openai/o1-miniOpenAI: o1-mini1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.00000110.00000440000128000.065536.0True0.00000055NaN
211openai/o1-mini-2024-09-12OpenAI: o1-mini (2024-09-12)1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.00000110.00000440000128000.065536.0True0.00000055NaN
212mistralai/pixtral-12bMistral: Pixtral 12B1725926400The first multi-modal, text+image-to-text mode...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]MistralNone0.00000010.000000100.00014450032768.0NaNFalseNaNNaN
213cohere/command-r-plus-08-2024Cohere: Command R+ (08-2024)1724976000command-r-plus-08-2024 is an update of the [Co...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000250.000010000128000.04000.0FalseNaNNaN
214cohere/command-r-08-2024Cohere: Command R (08-2024)1724976000command-r-08-2024 is an update of the [Command...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.000000150.00000060000128000.04000.0FalseNaNNaN
215qwen/qwen-2.5-vl-7b-instruct:freeQwen: Qwen2.5-VL 7B Instruct (free)1724803200Qwen2.5 VL 7B is a multimodal LLM from the Qwe...64000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone00000064000.064000.0FalseNaNNaN
216qwen/qwen-2.5-vl-7b-instructQwen: Qwen2.5-VL 7B Instruct1724803200Qwen2.5 VL 7B is a multimodal LLM from the Qwe...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000020.000000200.00014450032768.0NaNFalseNaNNaN
217sao10k/l3.1-euryale-70bSao10K: Llama 3.1 Euryale 70B v2.21724803200Euryale L3.1 70B v2.2 is a model focused on cr...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000070.00000080000131072.016384.0FalseNaNNaN
218google/gemini-flash-1.5-8b-expGoogle: Gemini 1.5 Flash 8B Experimental1724803200Gemini Flash 1.5 8B Experimental is an experim...1000000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GeminiNone0000001000000.08192.0FalseNaNNaN
219microsoft/phi-3.5-mini-128k-instructMicrosoft: Phi-3.5 Mini 128K Instruct1724198400Phi-3.5 models are lightweight, state-of-the-a...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.000000030.000000090000131072.0NaNFalseNaNNaN
220nousresearch/hermes-3-llama-3.1-70bNous: Hermes 3 70B Instruct1723939200Hermes 3 is a generalist language model with m...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.000000120.00000030000131072.0131072.0FalseNaNNaN
221nousresearch/hermes-3-llama-3.1-405bNous: Hermes 3 405B Instruct1723766400Hermes 3 is a generalist language model with m...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.00000080.00000080000131072.0131072.0FalseNaNNaN
222openai/chatgpt-4o-latestOpenAI: ChatGPT-4o1723593600OpenAI ChatGPT 4o is continually updated by Op...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GPTNone0.0000050.00001500.00722500128000.016384.0TrueNaNNaN
223sao10k/l3-lunaris-8bSao10K: Llama 3 8B Lunaris1723507200Lunaris 8B is a versatile generalist and rolep...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000020.0000000500008192.0NaNFalseNaNNaN
224aetherwiing/mn-starcannon-12bAetherwiing: Starcannon 12B1723507200Starcannon 12B v2 is a creative roleplay and s...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000080.0000012000016384.04096.0FalseNaNNaN
225openai/gpt-4o-2024-08-06OpenAI: GPT-4o (2024-08-06)1722902400The 2024-08-06 version of GPT-4o offers improv...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
226meta-llama/llama-3.1-405b:freeMeta: Llama 3.1 405B (base) (free)1722556800Meta's latest class of model (Llama 3.1) launc...64000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none00000064000.0NaNFalseNaNNaN
227meta-llama/llama-3.1-405bMeta: Llama 3.1 405B (base)1722556800Meta's latest class of model (Llama 3.1) launc...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.0000020.000002000032768.0NaNFalseNaNNaN
228nothingiisreal/mn-celeste-12bMistral Nemo 12B Celeste1722556800A specialized story writing and roleplaying mo...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000080.0000012000016384.04096.0FalseNaNNaN
229perplexity/llama-3.1-sonar-small-128k-onlinePerplexity: Llama 3.1 Sonar 8B Online1722470400Llama 3.1 Sonar is Perplexity's latest model f...127072None[max_tokens, temperature, top_p, top_k, freque...text->text[text][text]Llama3None0.00000020.00000020.005000127072.0NaNFalseNaNNaN
230perplexity/llama-3.1-sonar-large-128k-onlinePerplexity: Llama 3.1 Sonar 70B Online1722470400Llama 3.1 Sonar is Perplexity's latest model f...127072None[max_tokens, temperature, top_p, top_k, freque...text->text[text][text]Llama3None0.0000010.0000010.005000127072.0NaNFalseNaNNaN
231meta-llama/llama-3.1-8b-instruct:freeMeta: Llama 3.1 8B Instruct (free)1721692800Meta's latest class of model (Llama 3.1) launc...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama3000000131072.04096.0FalseNaNNaN
232meta-llama/llama-3.1-8b-instructMeta: Llama 3.1 8B Instruct1721692800Meta's latest class of model (Llama 3.1) launc...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000020.00000003000016384.016384.0FalseNaNNaN
233meta-llama/llama-3.1-405b-instructMeta: Llama 3.1 405B Instruct1721692800The highly anticipated 400B class of Llama3 is...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Llama3llama30.00000080.0000008000032768.016384.0FalseNaNNaN
234meta-llama/llama-3.1-70b-instructMeta: Llama 3.1 70B Instruct1721692800Meta's latest class of model (Llama 3.1) launc...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Llama3llama30.00000010.000000280000131072.016384.0FalseNaNNaN
235mistralai/codestral-mambaMistral: Codestral Mamba1721347200A 7.3B parameter Mamba-based model designed fo...262144None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000250.000000250000262144.0NaNFalseNaNNaN
236mistralai/mistral-nemo:freeMistral: Mistral Nemo (free)1721347200A 12B parameter model with a 128k token contex...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral000000128000.0128000.0FalseNaNNaN
237mistralai/mistral-nemoMistral: Mistral Nemo1721347200A 12B parameter model with a 128k token contex...98304None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.000000030.00000007000098304.049152.0FalseNaNNaN
238openai/gpt-4o-miniOpenAI: GPT-4o-mini1721260800GPT-4o mini is OpenAI's newest model after [GP...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image, file][text]GPTNone0.000000150.000000600.00021700128000.016384.0True0.000000075NaN
239openai/gpt-4o-mini-2024-07-18OpenAI: GPT-4o-mini (2024-07-18)1721260800GPT-4o mini is OpenAI's newest model after [GP...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.000000150.000000600.00722500128000.016384.0True0.000000075NaN
240google/gemma-2-27b-itGoogle: Gemma 2 27B1720828800Gemma 2 27B by Google is an open model built f...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0.00000010.000000300008192.0NaNFalseNaNNaN
241alpindale/magnum-72bMagnum 72B1720656000From the maker of [Goliath](https://openrouter...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000040.000006000016384.04096.0FalseNaNNaN
242google/gemma-2-9b-it:freeGoogle: Gemma 2 9B (free)1719532800Gemma 2 9B by Google is an advanced, open-sour...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0000008192.08192.0FalseNaNNaN
243google/gemma-2-9b-itGoogle: Gemma 2 9B1719532800Gemma 2 9B by Google is an advanced, open-sour...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0.000000020.0000000600008192.0NaNFalseNaNNaN
24401-ai/yi-large01.AI: Yi Large1719273600The Yi Large model was designed by 01.AI with ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]YiNone0.0000030.000003000032768.04096.0FalseNaNNaN
245ai21/jamba-instructAI21: Jamba Instruct1719273600The Jamba-Instruct model, introduced by AI21 L...256000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000050.00000070000256000.04096.0FalseNaNNaN
246anthropic/claude-3.5-sonnet-20240620:betaAnthropic: Claude 3.5 Sonnet (2024-06-20) (sel...1718841600Claude 3.5 Sonnet delivers better-than-Opus ca...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0False0.00000030.00000375
247anthropic/claude-3.5-sonnet-20240620Anthropic: Claude 3.5 Sonnet (2024-06-20)1718841600Claude 3.5 Sonnet delivers better-than-Opus ca...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0True0.00000030.00000375
248sao10k/l3-euryale-70bSao10k: Llama 3 Euryale 70B v2.11718668800Euryale 70B v2.1 is a model focused on creativ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000001480.0000014800008192.08192.0FalseNaNNaN
249cognitivecomputations/dolphin-mixtral-8x22bDolphin 2.9.2 Mixtral 8x22B 🐬1717804800Dolphin 2.9 is designed for instruction follow...16000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000090.0000009000016000.0NaNFalseNaNNaN
250qwen/qwen-2-72b-instructQwen 2 72B Instruct1717718400Qwen2 72B is a transformer-based model that ex...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000090.0000009000032768.04096.0FalseNaNNaN
251mistralai/mistral-7b-instruct:freeMistral: Mistral 7B Instruct (free)1716768000A high-performing, industry-standard 7.3B para...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral00000032768.016384.0FalseNaNNaN
252mistralai/mistral-7b-instructMistral: Mistral 7B Instruct1716768000A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.0000000280.000000054000032768.016384.0FalseNaNNaN
253nousresearch/hermes-2-pro-llama-3-8bNousResearch: Hermes 2 Pro - Llama-3 8B1716768000Hermes 2 Pro is an upgraded, retrained version...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.0000000250.000000040000131072.0131072.0FalseNaNNaN
254mistralai/mistral-7b-instruct-v0.3Mistral: Mistral 7B Instruct v0.31716768000A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.0000000280.000000054000032768.016384.0FalseNaNNaN
255microsoft/phi-3-mini-128k-instructMicrosoft: Phi-3 Mini 128K Instruct1716681600Phi-3 Mini is a powerful 3.8B parameter model ...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.00000010.00000010000128000.0NaNFalseNaNNaN
256microsoft/phi-3-medium-128k-instructMicrosoft: Phi-3 Medium 128K Instruct1716508800Phi-3 128K Medium is a powerful 14-billion par...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.00000010.00000030000131072.0NaNFalseNaNNaN
257neversleep/llama-3-lumimaid-70bNeverSleep: Llama 3 Lumimaid 70B1715817600The NeverSleep team is back, with a Llama 3 70...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.0000040.00000600008192.04096.0FalseNaNNaN
258deepseek/deepseek-coderDeepSeek-Coder-V21715644800DeepSeek-Coder-V2, an open-source Mixture-of-E...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000040.000000120000128000.0NaNFalseNaNNaN
259google/gemini-flash-1.5Google: Gemini 1.5 Flash1715644800Gemini 1.5 Flash is a foundation model that pe...1000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.0000000750.000000300.00004001000000.08192.0False0.000000018750.0000001583
260openai/gpt-4oOpenAI: GPT-4o1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
261openai/gpt-4o:extendedOpenAI: GPT-4o (extended)1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.0000060.00001800.00722500128000.064000.0TrueNaNNaN
262meta-llama/llama-guard-2-8bMeta: LlamaGuard 2 8B1715558400This safeguard model has 8B parameters and is ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.00000020.000000200008192.0NaNFalseNaNNaN
263openai/gpt-4o-2024-05-13OpenAI: GPT-4o (2024-05-13)1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.0000050.00001500.00722500128000.04096.0TrueNaNNaN
264allenai/olmo-7b-instructOLMo 7B Instruct1715299200OLMo 7B Instruct by the Allen Institute for AI...2048None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherzephyr0.000000080.0000002400002048.0NaNFalseNaNNaN
265neversleep/llama-3-lumimaid-8b:extendedNeverSleep: Llama 3 Lumimaid 8B (extended)1714780800The NeverSleep team is back, with a Llama 3 8B...24576None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000024576.02048.0FalseNaNNaN
266neversleep/llama-3-lumimaid-8bNeverSleep: Llama 3 Lumimaid 8B1714780800The NeverSleep team is back, with a Llama 3 8B...24576None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000024576.02048.0FalseNaNNaN
267sao10k/fimbulvetr-11b-v2Fimbulvetr 11B v21713657600Creative writing model, routed with permission...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000080.000001200004096.04096.0FalseNaNNaN
268meta-llama/llama-3-8b-instructMeta: Llama 3 8B Instruct1713398400Meta's latest class of model (Llama 3) launche...8192None[max_tokens, temperature, top_p, top_k, seed, ...text->text[text][text]Llama3llama30.000000030.0000000600008192.016384.0FalseNaNNaN
269meta-llama/llama-3-70b-instructMeta: Llama 3 70B Instruct1713398400Meta's latest class of model (Llama 3) launche...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000030.000000400008192.016384.0FalseNaNNaN
270mistralai/mixtral-8x22b-instructMistral: Mixtral 8x22B Instruct1713312000Mistral's official instruct fine-tuned version...65536None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.00000040.0000012000065536.0NaNFalseNaNNaN
271microsoft/wizardlm-2-8x22bWizardLM-2 8x22B1713225600WizardLM-2 8x22B is Microsoft AI's most advanc...65536None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]Mistralvicuna0.00000050.0000005000065536.016384.0FalseNaNNaN
272google/gemini-pro-1.5Google: Gemini 1.5 Pro1712620800Google's latest multimodal model, supports ima...2000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.000001250.00000500.0006575002000000.08192.0FalseNaNNaN
273openai/gpt-4-turboOpenAI: GPT-4 Turbo1712620800The latest GPT-4 Turbo model with vision capab...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GPTNone0.000010.0000300.0144500128000.04096.0TrueNaNNaN
274cohere/command-r-plusCohere: Command R+1712188800Command R+ is a new, 104B-parameter LLM from C...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.0000030.0000150000128000.04000.0FalseNaNNaN
275cohere/command-r-plus-04-2024Cohere: Command R+ (04-2024)1712016000Command R+ is a new, 104B-parameter LLM from C...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.0000030.0000150000128000.04000.0FalseNaNNaN
276sophosympatheia/midnight-rose-70bMidnight Rose 70B1711065600A merge with a complex family tree, this model...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000080.000000800004096.0NaNFalseNaNNaN
277cohere/commandCohere: Command1710374400Command is an instruction-following conversati...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]CohereNone0.0000010.00000200004096.04000.0FalseNaNNaN
278cohere/command-rCohere: Command R1710374400Command-R is a 35B parameter model that perfor...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000050.00000150000128000.04000.0FalseNaNNaN
279anthropic/claude-3-haiku:betaAnthropic: Claude 3 Haiku (self-moderated)1710288000Claude 3 Haiku is Anthropic's fastest and most...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.000000250.0000012500.000400200000.04096.0False0.000000030.0000003
280anthropic/claude-3-haikuAnthropic: Claude 3 Haiku1710288000Claude 3 Haiku is Anthropic's fastest and most...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.000000250.0000012500.000400200000.04096.0True0.000000030.0000003
281anthropic/claude-3-opus:betaAnthropic: Claude 3 Opus (self-moderated)1709596800Claude 3 Opus is Anthropic's most powerful mod...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000150.00007500.02400200000.04096.0False0.00000150.00001875
282anthropic/claude-3-opusAnthropic: Claude 3 Opus1709596800Claude 3 Opus is Anthropic's most powerful mod...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000150.00007500.02400200000.04096.0True0.00000150.00001875
283anthropic/claude-3-sonnet:betaAnthropic: Claude 3 Sonnet (self-moderated)1709596800Claude 3 Sonnet is an ideal balance of intelli...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.04096.0False0.00000030.00000375
284anthropic/claude-3-sonnetAnthropic: Claude 3 Sonnet1709596800Claude 3 Sonnet is an ideal balance of intelli...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.04096.0True0.00000030.00000375
285cohere/command-r-03-2024Cohere: Command R (03-2024)1709341200Command-R is a 35B parameter model that perfor...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000050.00000150000128000.04000.0FalseNaNNaN
286mistralai/mistral-largeMistral Large1708905600This is Mistral AI's flagship model, Mistral L...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000128000.0NaNFalseNaNNaN
287openai/gpt-3.5-turbo-0613OpenAI: GPT-3.5 Turbo (older v0613)1706140800GPT-3.5 Turbo is OpenAI's fastest model. It ca...4095None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000010.00000200004095.04096.0FalseNaNNaN
288openai/gpt-4-turbo-previewOpenAI: GPT-4 Turbo Preview1706140800The preview GPT-4 model with improved instruct...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000010.000030000128000.04096.0TrueNaNNaN
289nousresearch/nous-hermes-2-mixtral-8x7b-dpoNous: Hermes 2 Mixtral 8x7B DPO1705363200Nous Hermes 2 Mixtral 8x7B DPO is the new flag...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000060.0000006000032768.02048.0FalseNaNNaN
290mistralai/mistral-mediumMistral Medium1704844800This is Mistral AI's closed-source, medium-sid...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000002750.0000081000032768.0NaNFalseNaNNaN
291mistralai/mistral-smallMistral Small1704844800With 22 billion parameters, Mistral Small v24....32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000020.0000006000032768.0NaNFalseNaNNaN
292mistralai/mistral-tinyMistral Tiny1704844800Note: This model is being deprecated. Recommen...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000250.00000025000032768.0NaNFalseNaNNaN
293mistralai/mistral-7b-instruct-v0.2Mistral: Mistral 7B Instruct v0.21703721600A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.00000020.0000002000032768.0NaNFalseNaNNaN
294mistralai/mixtral-8x7b-instructMistral: Mixtral 8x7B Instruct1702166400Mixtral 8x7B Instruct is a pretrained generati...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.000000080.00000024000032768.0NaNFalseNaNNaN
295neversleep/noromaid-20bNoromaid 20B1700956800A collab between IkariDev and Undi. This merge...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.000000750.000001500008192.02048.0FalseNaNNaN
296anthropic/claude-2.1:betaAnthropic: Claude v2.1 (self-moderated)1700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0FalseNaNNaN
297anthropic/claude-2.1Anthropic: Claude v2.11700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0TrueNaNNaN
298anthropic/claude-2:betaAnthropic: Claude v2 (self-moderated)1700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0FalseNaNNaN
299anthropic/claude-2Anthropic: Claude v21700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0TrueNaNNaN
300undi95/toppy-m-7bToppy M 7B1699574400A wild 7B parameter model that merges several ...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralalpaca0.00000080.000001200004096.04096.0FalseNaNNaN
301alpindale/goliath-120bGoliath 120B1699574400A large LLM created by combining two fine-tune...6144None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000656250.00000937500006144.0512.0FalseNaNNaN
302openrouter/autoAuto Router1699401600Your prompt will be processed by a meta-model ...2000000None[]text->text[text][text]RouterNone-1-1NaNNaNNaNNaNNaNNaNFalseNaNNaN
303openai/gpt-3.5-turbo-1106OpenAI: GPT-3.5 Turbo 16k (older v1106)1699228800An older GPT-3.5 Turbo model with improved ins...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000010.000002000016385.04096.0TrueNaNNaN
304openai/gpt-4-1106-previewOpenAI: GPT-4 Turbo (older v1106)1699228800The latest GPT-4 Turbo model with vision capab...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000010.000030000128000.04096.0TrueNaNNaN
305jondurbin/airoboros-l2-70bAiroboros 70B1698537600A Llama 2 70B fine-tune using synthetic data (...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000050.000000500004096.0NaNFalseNaNNaN
306openai/gpt-3.5-turbo-instructOpenAI: GPT-3.5 Turbo Instruct1695859200This model is a variant of GPT-3.5 Turbo tuned...4095None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]GPTchatml0.00000150.00000200004095.04096.0TrueNaNNaN
307mistralai/mistral-7b-instruct-v0.1Mistral: Mistral 7B Instruct v0.11695859200A 7.3B parameter model that outperforms Llama ...2824None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.000000110.0000001900002824.0NaNFalseNaNNaN
308pygmalionai/mythalion-13bPygmalion: Mythalion 13B1693612800A blend of the new Pygmalion-13b and MythoMax....8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000056250.00000112500008192.01024.0FalseNaNNaN
309openai/gpt-3.5-turbo-16kOpenAI: GPT-3.5 Turbo 16k1693180800This model offers four times the context lengt...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000030.000004000016385.04096.0TrueNaNNaN
310openai/gpt-4-32kOpenAI: GPT-4 32k1693180800GPT-4-32k is an extended version of GPT-4, wit...32767None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000060.00012000032767.04096.0TrueNaNNaN
311openai/gpt-4-32k-0314OpenAI: GPT-4 32k (older v0314)1693180800GPT-4-32k is an extended version of GPT-4, wit...32767None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000060.00012000032767.04096.0TrueNaNNaN
312mancer/weaverMancer: Weaver (alpha)1690934400An attempt to recreate Claude-style verbosity,...8000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.0000011250.00000112500008000.01000.0FalseNaNNaN
313anthropic/claude-2.0:betaAnthropic: Claude v2.0 (self-moderated)1690502400Anthropic's flagship model. Superior performan...100000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000100000.04096.0FalseNaNNaN
314anthropic/claude-2.0Anthropic: Claude v2.01690502400Anthropic's flagship model. Superior performan...100000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000100000.04096.0TrueNaNNaN
315undi95/remm-slerp-l2-13bReMM SLERP 13B1689984000A recreation trial of the original MythoMax-L2...6144None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000056250.00000112500006144.01024.0FalseNaNNaN
316gryphe/mythomax-l2-13bMythoMax 13B1688256000One of the highest performing and most popular...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.0000000650.00000006500004096.04096.0FalseNaNNaN
317meta-llama/llama-2-70b-chatMeta: Llama 2 70B Chat1687219200The flagship, 70 billion parameter language mo...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2llama20.00000090.000000900004096.0NaNFalseNaNNaN
318openai/gpt-3.5-turboOpenAI: GPT-3.5 Turbo1685232000GPT-3.5 Turbo is OpenAI's fastest model. It ca...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.00000050.0000015000016385.04096.0TrueNaNNaN
319openai/gpt-3.5-turbo-0125OpenAI: GPT-3.5 Turbo 16k1685232000The latest GPT-3.5 Turbo model with improved i...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.00000050.0000015000016385.04096.0TrueNaNNaN
320openai/gpt-4OpenAI: GPT-41685232000OpenAI's flagship model, GPT-4 is a large-scal...8191None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000030.0000600008191.04096.0TrueNaNNaN
321openai/gpt-4-0314OpenAI: GPT-4 (older v0314)1685232000GPT-4-0314 is the first version of GPT-4 relea...8191None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000030.0000600008191.04096.0TrueNaNNaN
\n", + "
" + ], + "text/plain": [ + " id name created description context_length per_request_limits supported_parameters architecture_modality architecture_input_modalities architecture_output_modalities architecture_tokenizer architecture_instruct_type pricing_prompt pricing_completion pricing_request pricing_image pricing_web_search pricing_internal_reasoning top_provider_context_length top_provider_max_completion_tokens top_provider_is_moderated pricing_input_cache_read pricing_input_cache_write\n", + "0 mistralai/mistral-medium-3 Mistral: Mistral Medium 3 1746627341 Mistral Medium 3 is a high-performance enterpr... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.0000004 0.000002 0 0 0 0 131072.0 NaN False NaN NaN\n", + "1 google/gemini-2.5-pro-preview Google: Gemini 2.5 Pro Preview 1746578513 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0.00000125 0.00001 0 0.00516 0 0 1048576.0 65535.0 False 0.00000031 0.000001625\n", + "2 arcee-ai/caller-large Arcee AI: Caller Large 1746487869 Caller Large is Arcee's specialist \"function‑c... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.00000055 0.00000085 0 0 0 0 32768.0 NaN False NaN NaN\n", + "3 arcee-ai/spotlight Arcee AI: Spotlight 1746481552 Spotlight is a 7‑billion‑parameter vision‑lang... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000018 0.00000018 0 0 0 0 131072.0 65537.0 False NaN NaN\n", + "4 arcee-ai/maestro-reasoning Arcee AI: Maestro Reasoning 1746481269 Maestro Reasoning is Arcee's flagship analysis... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000009 0.0000033 0 0 0 0 131072.0 32000.0 False NaN NaN\n", + "5 arcee-ai/virtuoso-large Arcee AI: Virtuoso Large 1746478885 Virtuoso‑Large is Arcee's top‑tier general‑pur... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000075 0.0000012 0 0 0 0 131072.0 64000.0 False NaN NaN\n", + "6 arcee-ai/coder-large Arcee AI: Coder Large 1746478663 Coder‑Large is a 32 B‑parameter offspring of Q... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 NaN False NaN NaN\n", + "7 arcee-ai/virtuoso-medium-v2 Arcee AI: Virtuoso Medium V2 1746478434 Virtuoso‑Medium‑v2 is a 32 B model distilled f... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "8 arcee-ai/arcee-blitz Arcee AI: Arcee Blitz 1746470100 Arcee Blitz is a 24 B‑parameter dense model di... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000045 0.00000075 0 0 0 0 32768.0 NaN False NaN NaN\n", + "9 microsoft/phi-4-reasoning-plus:free Microsoft: Phi 4 Reasoning Plus (free) 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "10 microsoft/phi-4-reasoning-plus Microsoft: Phi 4 Reasoning Plus 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.00000007 0.00000035 0 0 0 0 32768.0 NaN False NaN NaN\n", + "11 microsoft/phi-4-reasoning:free Microsoft: Phi 4 Reasoning (free) 1746121275 Phi-4-reasoning is a 14B parameter dense decod... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "12 qwen/qwen3-0.6b-04-28:free Qwen: Qwen3 0.6B (free) 1746043526 Qwen3-0.6B is a lightweight, 0.6 billion param... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "13 inception/mercury-coder-small-beta Inception: Mercury Coder Small Beta 1746033880 Mercury Coder Small is the first diffusion lar... 32000 None [max_tokens, frequency_penalty, presence_penal... text->text [text] [text] Other None 0.00000025 0.000001 0 0 0 0 32000.0 NaN False NaN NaN\n", + "14 qwen/qwen3-1.7b:free Qwen: Qwen3 1.7B (free) 1746031388 Qwen3-1.7B is a compact, 1.7 billion parameter... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "15 qwen/qwen3-4b:free Qwen: Qwen3 4B (free) 1746031104 Qwen3-4B is a 4 billion parameter dense langua... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 128000.0 NaN False NaN NaN\n", + "16 opengvlab/internvl3-14b:free OpenGVLab: InternVL3 14B (free) 1746021355 The 14b version of the InternVL3 series. An ad... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "17 opengvlab/internvl3-2b:free OpenGVLab: InternVL3 2B (free) 1746019807 The 2b version of the InternVL3 series, for an... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "18 deepseek/deepseek-prover-v2:free DeepSeek: DeepSeek Prover V2 (free) 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "19 deepseek/deepseek-prover-v2 DeepSeek: DeepSeek Prover V2 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.0000005 0.00000218 0 0 0 0 131072.0 NaN False NaN NaN\n", + "20 meta-llama/llama-guard-4-12b Meta: Llama Guard 4 12B 1745975193 Llama Guard 4 is a Llama 4 Scout-derived multi... 163840 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000005 0.00000005 0 0 0 0 163840.0 NaN False NaN NaN\n", + "21 qwen/qwen3-30b-a3b:free Qwen: Qwen3 30B A3B (free) 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "22 qwen/qwen3-30b-a3b Qwen: Qwen3 30B A3B 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "23 qwen/qwen3-8b:free Qwen: Qwen3 8B (free) 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "24 qwen/qwen3-8b Qwen: Qwen3 8B 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.000000035 0.000000138 0 0 0 0 128000.0 NaN False NaN NaN\n", + "25 qwen/qwen3-14b:free Qwen: Qwen3 14B (free) 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "26 qwen/qwen3-14b Qwen: Qwen3 14B 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000007 0.00000024 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "27 qwen/qwen3-32b:free Qwen: Qwen3 32B (free) 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "28 qwen/qwen3-32b Qwen: Qwen3 32B 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 NaN False NaN NaN\n", + "29 qwen/qwen3-235b-a22b:free Qwen: Qwen3 235B A22B (free) 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "30 qwen/qwen3-235b-a22b Qwen: Qwen3 235B A22B 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000014 0.000002 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "31 tngtech/deepseek-r1t-chimera:free TNG: DeepSeek R1T Chimera (free) 1745760875 DeepSeek-R1T-Chimera is created by merging Dee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "32 thudm/glm-z1-rumination-32b THUDM: GLM Z1 Rumination 32B 1745601495 THUDM: GLM Z1 Rumination 32B is a 32B-paramete... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", + "33 thudm/glm-z1-9b:free THUDM: GLM Z1 9B (free) 1745601140 GLM-Z1-9B-0414 is a 9B-parameter language mode... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "34 thudm/glm-4-9b:free THUDM: GLM 4 9B (free) 1745601023 GLM-4-9B-0414 is a 9 billion parameter languag... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "35 microsoft/mai-ds-r1:free Microsoft: MAI DS R1 (free) 1745194100 MAI-DS-R1 is a post-trained variant of DeepSee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "36 thudm/glm-z1-32b:free THUDM: GLM Z1 32B (free) 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "37 thudm/glm-z1-32b THUDM: GLM Z1 32B 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", + "38 thudm/glm-4-32b:free THUDM: GLM 4 32B (free) 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "39 thudm/glm-4-32b THUDM: GLM 4 32B 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", + "40 google/gemini-2.5-flash-preview Google: Gemini 2.5 Flash Preview 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000006 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", + "41 google/gemini-2.5-flash-preview:thinking Google: Gemini 2.5 Flash Preview (thinking) 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000035 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", + "42 openai/o4-mini-high OpenAI: o4 Mini High 1744824212 OpenAI o4-mini-high is the same model as [o4-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", + "43 openai/o3 OpenAI: o3 1744823457 o3 is a well-rounded and powerful model across... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.00001 0.00004 0 0.00765 0 0 200000.0 100000.0 True 0.0000025 NaN\n", + "44 openai/o4-mini OpenAI: o4 Mini 1744820942 OpenAI o4-mini is a compact reasoning model in... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", + "45 shisa-ai/shisa-v2-llama3.3-70b:free Shisa AI: Shisa V2 Llama 3.3 70B (free) 1744754858 Shisa V2 Llama 3.3 70B is a bilingual Japanese... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "46 qwen/qwen2.5-coder-7b-instruct Qwen: Qwen2.5 Coder 7B Instruct 1744734887 Qwen2.5-Coder-7B-Instruct is a 7B parameter in... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen None 0.00000001 0.00000003 0 0 0 0 32768.0 NaN False NaN NaN\n", + "47 openai/gpt-4.1 OpenAI: GPT-4.1 1744651385 GPT-4.1 is a flagship large language model opt... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.000002 0.000008 0 0 0 0 1047576.0 32768.0 True 0.0000005 NaN\n", + "48 openai/gpt-4.1-mini OpenAI: GPT-4.1 Mini 1744651381 GPT-4.1 Mini is a mid-sized model delivering p... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000004 0.0000016 0 0 0 0 1047576.0 32768.0 True 0.0000001 NaN\n", + "49 openai/gpt-4.1-nano OpenAI: GPT-4.1 Nano 1744651369 For tasks that demand low latency, GPT‑4.1 nan... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000001 0.0000004 0 0 0 0 1047576.0 32768.0 True 0.000000025 NaN\n", + "50 eleutherai/llemma_7b EleutherAI: Llemma 7b 1744643225 Llemma 7B is a language model for mathematics.... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other code-llama 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "51 alfredpros/codellama-7b-instruct-solidity AlfredPros: CodeLLaMa 7B Instruct Solidity 1744641874 A finetuned 7 billion parameters Code LLaMA - ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "52 arliai/qwq-32b-arliai-rpr-v1:free ArliAI: QwQ 32B RpR v1 (free) 1744555982 QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "53 agentica-org/deepcoder-14b-preview:free Agentica: Deepcoder 14B Preview (free) 1744555395 DeepCoder-14B-Preview is a 14B parameter code ... 96000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 96000.0 NaN False NaN NaN\n", + "54 moonshotai/kimi-vl-a3b-thinking:free Moonshot AI: Kimi VL A3B Thinking (free) 1744304841 Kimi-VL is a lightweight Mixture-of-Experts vi... 131072 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [image, text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "55 x-ai/grok-3-mini-beta xAI: Grok 3 Mini Beta 1744240195 Grok 3 Mini is a lightweight, smaller thinking... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.0000003 0.0000005 0 0 0 0 131072.0 NaN False NaN NaN\n", + "56 x-ai/grok-3-beta xAI: Grok 3 Beta 1744240068 Grok 3 is the latest model from xAI. It's thei... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000003 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", + "57 nvidia/llama-3.3-nemotron-super-49b-v1:free NVIDIA: Llama 3.3 Nemotron Super 49B v1 (free) 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "58 nvidia/llama-3.3-nemotron-super-49b-v1 NVIDIA: Llama 3.3 Nemotron Super 49B v1 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000013 0.0000004 0 0 0 0 131072.0 NaN False NaN NaN\n", + "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free) 1744115059 Llama-3.1-Nemotron-Ultra-253B-v1 is a large la... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "60 meta-llama/llama-4-maverick:free Meta: Llama 4 Maverick (free) 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 256000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 256000.0 NaN False NaN NaN\n", + "61 meta-llama/llama-4-maverick Meta: Llama 4 Maverick 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 1048576 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000017 0.0000006 0 0.0006684 0 0 1048576.0 16384.0 False NaN NaN\n", + "62 meta-llama/llama-4-scout:free Meta: Llama 4 Scout (free) 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 512000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 512000.0 NaN False NaN NaN\n", + "63 meta-llama/llama-4-scout Meta: Llama 4 Scout 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 1048576 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Other None 0.00000008 0.0000003 0 0 0 0 1048576.0 1048576.0 False NaN NaN\n", + "64 all-hands/openhands-lm-32b-v0.1 OpenHands LM 32B V0.1 1743613013 OpenHands LM v0.1 is a 32B open-source coding ... 16384 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "65 mistral/ministral-8b Mistral: Ministral 8B 1743430021 Ministral 8B is a state-of-the-art language mo... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "66 deepseek/deepseek-v3-base:free DeepSeek: DeepSeek V3 Base (free) 1743272023 Note that this is a base model mostly meant fo... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "67 scb10x/llama3.1-typhoon2-8b-instruct Typhoon2 8B Instruct 1743196511 Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000018 0.00000018 0 0 0 0 8192.0 NaN False NaN NaN\n", + "68 scb10x/llama3.1-typhoon2-70b-instruct Typhoon2 70B Instruct 1743196170 Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000088 0.00000088 0 0 0 0 8192.0 NaN False NaN NaN\n", + "69 allenai/molmo-7b-d:free AllenAI: Molmo 7B D (free) 1743023247 Molmo is a family of open vision-language mode... 4096 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 4096.0 NaN False NaN NaN\n", + "70 bytedance-research/ui-tars-72b:free Bytedance: UI-TARS 72B (free) 1743020065 UI-TARS 72B is an open-source multimodal AI mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "71 qwen/qwen2.5-vl-3b-instruct:free Qwen: Qwen2.5 VL 3B Instruct (free) 1743014573 Qwen2.5 VL 3B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", + "72 google/gemini-2.5-pro-exp-03-25 Google: Gemini 2.5 Pro Experimental 1742922099 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1000000 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0 0 0 0 0 0 1000000.0 65535.0 False NaN NaN\n", + "73 qwen/qwen2.5-vl-32b-instruct:free Qwen: Qwen2.5 VL 32B Instruct (free) 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 8192 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", + "74 qwen/qwen2.5-vl-32b-instruct Qwen: Qwen2.5 VL 32B Instruct 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000009 0.0000009 0 0 0 0 128000.0 NaN False NaN NaN\n", + "75 deepseek/deepseek-chat-v3-0324:free DeepSeek: DeepSeek V3 0324 (free) 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "76 deepseek/deepseek-chat-v3-0324 DeepSeek: DeepSeek V3 0324 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] DeepSeek None 0.0000003 0.00000088 0 0 0 0 163840.0 NaN False NaN NaN\n", + "77 featherless/qwerky-72b:free Qwerky 72B (free) 1742481597 Qwerky-72B is a linear-attention RWKV variant ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 4096.0 False NaN NaN\n", + "78 openai/o1-pro OpenAI: o1-pro 1742423211 The o1 series of models are trained with reinf... 200000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] GPT None 0.00015 0.0006 0 0.21675 0 0 200000.0 100000.0 True NaN NaN\n", + "79 mistralai/mistral-small-3.1-24b-instruct:free Mistral: Mistral Small 3.1 24B (free) 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 96000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0 0 0 0 0 0 96000.0 96000.0 False NaN NaN\n", + "80 mistralai/mistral-small-3.1-24b-instruct Mistral: Mistral Small 3.1 24B 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 131072 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Mistral None 0.00000005 0.00000015 0 0 0 0 131072.0 NaN False NaN NaN\n", + "81 open-r1/olympiccoder-32b:free OlympicCoder 32B (free) 1742077228 OlympicCoder-32B is a high-performing open-sou... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "82 google/gemma-3-1b-it:free Google: Gemma 3 1B (free) 1741963556 Gemma 3 1B is the smallest of the new Gemma 3 ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 32768.0 8192.0 False NaN NaN\n", + "83 google/gemma-3-4b-it:free Google: Gemma 3 4B (free) 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", + "84 google/gemma-3-4b-it Google: Gemma 3 4B 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000002 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", + "85 ai21/jamba-1.6-large AI21: Jamba 1.6 Large 1741905173 AI21 Jamba Large 1.6 is a high-performance hyb... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.000002 0.000008 0 0 0 0 256000.0 4096.0 False NaN NaN\n", + "86 ai21/jamba-1.6-mini AI21: Jamba Mini 1.6 1741905171 AI21 Jamba Mini 1.6 is a hybrid foundation mod... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000002 0.0000004 0 0 0 0 256000.0 4096.0 False NaN NaN\n", + "87 google/gemma-3-12b-it:free Google: Gemma 3 12B (free) 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", + "88 google/gemma-3-12b-it Google: Gemma 3 12B 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000005 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "89 cohere/command-a Cohere: Command A 1741894342 Command A is an open-weights 111B parameter mo... 256000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 256000.0 8192.0 False NaN NaN\n", + "90 openai/gpt-4o-mini-search-preview OpenAI: GPT-4o-mini Search Preview 1741818122 GPT-4o mini Search Preview is a specialized mo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.00000015 0.0000006 0.0275 0.000217 0 0 128000.0 16384.0 True NaN NaN\n", + "91 openai/gpt-4o-search-preview OpenAI: GPT-4o Search Preview 1741817949 GPT-4o Search Previewis a specialized model fo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.0000025 0.00001 0.035 0.003613 0 0 128000.0 16384.0 True NaN NaN\n", + "92 rekaai/reka-flash-3:free Reka: Flash 3 (free) 1741812813 Reka Flash 3 is a general-purpose, instruction... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "93 google/gemma-3-27b-it:free Google: Gemma 3 27B (free) 1741756359 Gemma 3 introduces multimodality, supporting v... 96000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 96000.0 8192.0 False NaN NaN\n", + "94 google/gemma-3-27b-it Google: Gemma 3 27B 1741756359 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.0000001 0.0000002 0 0.0000256 0 0 131072.0 16384.0 False NaN NaN\n", + "95 thedrummer/anubis-pro-105b-v1 TheDrummer: Anubis Pro 105B V1 1741642290 Anubis Pro 105B v1 is an expanded and refined ... 131072 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000008 0.000001 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "96 thedrummer/skyfall-36b-v2 TheDrummer: Skyfall 36B V2 1741636566 Skyfall 36B v2 is an enhanced iteration of Mis... 32768 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 32768.0 False NaN NaN\n", + "97 microsoft/phi-4-multimodal-instruct Microsoft: Phi 4 Multimodal Instruct 1741396284 Phi-4 Multimodal Instruct is a versatile 5.6B ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000005 0.0000001 0 0.00017685 0 0 131072.0 NaN False NaN NaN\n", + "98 perplexity/sonar-reasoning-pro Perplexity: Sonar Reasoning Pro 1741313308 Note: Sonar Pro pricing includes Perplexity se... 128000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0 128000.0 NaN False NaN NaN\n", + "99 perplexity/sonar-pro Perplexity: Sonar Pro 1741312423 Note: Sonar Pro pricing includes Perplexity se... 200000 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000003 0.000015 0 0 0.005 0 200000.0 8000.0 False NaN NaN\n", + "100 perplexity/sonar-deep-research Perplexity: Sonar Deep Research 1741311246 Sonar Deep Research is a research-focused mode... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0.000003 128000.0 NaN False NaN NaN\n", + "101 deepseek/deepseek-r1-zero:free DeepSeek: DeepSeek R1 Zero (free) 1741297434 DeepSeek-R1-Zero is a model trained via large-... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "102 qwen/qwq-32b:free Qwen: QwQ 32B (free) 1741208814 QwQ is the reasoning model of the Qwen series.... 40000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0 0 0 0 0 0 40000.0 40000.0 False NaN NaN\n", + "103 qwen/qwq-32b Qwen: QwQ 32B 1741208814 QwQ is the reasoning model of the Qwen series.... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0.00000015 0.0000002 0 0 0 0 131072.0 NaN False NaN NaN\n", + "104 moonshotai/moonlight-16b-a3b-instruct:free Moonshot AI: Moonlight 16B A3B Instruct (free) 1740719801 Moonlight-16B-A3B-Instruct is a 16B-parameter ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", + "105 nousresearch/deephermes-3-llama-3-8b-preview:free Nous: DeepHermes 3 Llama 3 8B Preview (free) 1740719372 DeepHermes 3 Preview is the latest version of ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "106 openai/gpt-4.5-preview OpenAI: GPT-4.5 (Preview) 1740687810 GPT-4.5 (Preview) is a research preview of Ope... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.000075 0.00015 0 0.108375 0 0 128000.0 16384.0 True 0.0000375 NaN\n", + "107 google/gemini-2.0-flash-lite-001 Google: Gemini 2.0 Flash Lite 1740506212 Gemini 2.0 Flash Lite offers a significantly f... 1048576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.000000075 0.0000003 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", + "108 anthropic/claude-3.7-sonnet Anthropic: Claude 3.7 Sonnet 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", + "109 anthropic/claude-3.7-sonnet:thinking Anthropic: Claude 3.7 Sonnet (thinking) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", + "110 anthropic/claude-3.7-sonnet:beta Anthropic: Claude 3.7 Sonnet (self-moderated) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [max_tokens, temperature, stop, reasoning, inc... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 128000.0 False 0.0000003 0.00000375\n", + "111 perplexity/r1-1776 Perplexity: R1 1776 1740004929 R1 1776 is a version of DeepSeek-R1 that has b... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.000002 0.000008 0 0 0 0 128000.0 NaN False NaN NaN\n", + "112 mistralai/mistral-saba Mistral: Saba 1739803239 Mistral Saba is a 24B-parameter language model... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", + "113 cognitivecomputations/dolphin3.0-r1-mistral-24... Dolphin3.0 R1 Mistral 24B (free) 1739462498 Dolphin 3.0 R1 is the next generation of the D... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "114 cognitivecomputations/dolphin3.0-mistral-24b:free Dolphin3.0 Mistral 24B (free) 1739462019 Dolphin 3.0 is the next generation of the Dolp... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "115 meta-llama/llama-guard-3-8b Llama Guard 3 8B 1739401318 Llama Guard 3 is a Llama-3.1-8B pretrained mod... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.00000002 0.00000006 0 0 0 0 131072.0 NaN False NaN NaN\n", + "116 openai/o3-mini-high OpenAI: o3 Mini High 1739372611 OpenAI o3-mini-high is the same model as [o3-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", + "117 deepseek/deepseek-r1-distill-llama-8b DeepSeek: R1 Distill Llama 8B 1738937718 DeepSeek R1 Distill Llama 8B is a distilled la... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.00000004 0.00000004 0 0 0 0 32000.0 32000.0 False NaN NaN\n", + "118 google/gemini-2.0-flash-001 Google: Gemini 2.0 Flash 1738769413 Gemini Flash 2.0 offers a significantly faster... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.0000001 0.0000004 0 0.0000258 0 0 1000000.0 8192.0 False 0.000000025 0.0000001833\n", + "119 qwen/qwen-vl-plus Qwen: Qwen VL Plus 1738731255 Qwen's Enhanced Large Visual Language Model. S... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.00000021 0.00000063 0 0.0002688 0 0 7500.0 1500.0 False NaN NaN\n", + "120 aion-labs/aion-1.0 AionLabs: Aion-1.0 1738697557 Aion-1.0 is a multi-model system designed for ... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.000004 0.000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "121 aion-labs/aion-1.0-mini AionLabs: Aion-1.0-Mini 1738697107 Aion-1.0-Mini 32B parameter model is a distill... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.0000007 0.0000014 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "122 aion-labs/aion-rp-llama-3.1-8b AionLabs: Aion-RP 1.0 (8B) 1738696718 Aion-RP-Llama-3.1-8B ranks the highest in the ... 32768 None [max_tokens, temperature, top_p] text->text [text] [text] Other None 0.0000002 0.0000002 0 0 0 0 32768.0 32768.0 False NaN NaN\n", + "123 qwen/qwen-vl-max Qwen: Qwen VL Max 1738434304 Qwen VL Max is a visual understanding model wi... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.0000008 0.0000032 0 0.001024 0 0 7500.0 1500.0 False NaN NaN\n", + "124 qwen/qwen-turbo Qwen: Qwen-Turbo 1738410974 Qwen-Turbo, based on Qwen2.5, is a 1M context ... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.00000005 0.0000002 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", + "125 qwen/qwen2.5-vl-72b-instruct:free Qwen: Qwen2.5 VL 72B Instruct (free) 1738410311 Qwen2.5-VL is proficient in recognizing common... 131072 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", + "126 qwen/qwen2.5-vl-72b-instruct Qwen: Qwen2.5 VL 72B Instruct 1738410311 Qwen2.5-VL is proficient in recognizing common... 32000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.00000025 0.00000075 0 0 0 0 32000.0 NaN False NaN NaN\n", + "127 qwen/qwen-plus Qwen: Qwen-Plus 1738409840 Qwen-Plus, based on the Qwen2.5 foundation mod... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000004 0.0000012 0 0 0 0 131072.0 8192.0 False NaN NaN\n", + "128 qwen/qwen-max Qwen: Qwen-Max 1738402289 Qwen-Max, based on Qwen2.5, provides the best ... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000016 0.0000064 0 0 0 0 32768.0 8192.0 False NaN NaN\n", + "129 openai/o3-mini OpenAI: o3 Mini 1738351721 OpenAI o3-mini is a cost-efficient language mo... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", + "130 deepseek/deepseek-r1-distill-qwen-1.5b DeepSeek: R1 Distill Qwen 1.5B 1738328067 DeepSeek R1 Distill Qwen 1.5B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000018 0.00000018 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "131 mistralai/mistral-small-24b-instruct-2501:free Mistral: Mistral Small 3 (free) 1738255409 Mistral Small 3 is a 24B-parameter language mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "132 mistralai/mistral-small-24b-instruct-2501 Mistral: Mistral Small 3 1738255409 Mistral Small 3 is a 24B-parameter language mo... 28000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0.00000006 0.00000012 0 0 0 0 28000.0 14000.0 False NaN NaN\n", + "133 deepseek/deepseek-r1-distill-qwen-32b:free DeepSeek: R1 Distill Qwen 32B (free) 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 16000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16000.0 16000.0 False NaN NaN\n", + "134 deepseek/deepseek-r1-distill-qwen-32b DeepSeek: R1 Distill Qwen 32B 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000012 0.00000018 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "135 deepseek/deepseek-r1-distill-qwen-14b:free DeepSeek: R1 Distill Qwen 14B (free) 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", + "136 deepseek/deepseek-r1-distill-qwen-14b DeepSeek: R1 Distill Qwen 14B 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000015 0.00000015 0 0 0 0 64000.0 64000.0 False NaN NaN\n", + "137 perplexity/sonar-reasoning Perplexity: Sonar Reasoning 1738131107 Sonar Reasoning is a reasoning model provided ... 127000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000001 0.000005 0.005 0 0 0 127000.0 NaN False NaN NaN\n", + "138 perplexity/sonar Perplexity: Sonar 1738013808 Sonar is lightweight, affordable, fast, and si... 127072 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", + "139 liquid/lfm-7b Liquid: LFM 7B 1737806883 LFM-7B, a new best-in-class language model. LF... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000001 0.00000001 0 0 0 0 32768.0 NaN False NaN NaN\n", + "140 liquid/lfm-3b Liquid: LFM 3B 1737806501 Liquid's LFM 3B delivers incredible performanc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000002 0.00000002 0 0 0 0 32768.0 NaN False NaN NaN\n", + "141 deepseek/deepseek-r1-distill-llama-70b:free DeepSeek: R1 Distill Llama 70B (free) 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 8192 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0 0 0 0 0 0 8192.0 4096.0 False NaN NaN\n", + "142 deepseek/deepseek-r1-distill-llama-70b DeepSeek: R1 Distill Llama 70B 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.0000001 0.0000004 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "143 deepseek/deepseek-r1:free DeepSeek: R1 (free) 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, reasoning, include_reasoning, tem... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "144 deepseek/deepseek-r1 DeepSeek: R1 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.0000005 0.00000218 0 0 0 0 163840.0 163840.0 False NaN NaN\n", + "145 minimax/minimax-01 MiniMax: MiniMax-01 1736915462 MiniMax-01 is a combines MiniMax-Text-01 for t... 1000192 None [max_tokens, temperature, top_p] text+image->text [text, image] [text] Other None 0.0000002 0.0000011 0 0 0 0 1000192.0 1000192.0 False NaN NaN\n", + "146 mistralai/codestral-2501 Mistral: Codestral 2501 1736895522 [Mistral](/mistralai)'s cutting-edge language ... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000003 0.0000009 0 0 0 0 262144.0 NaN False NaN NaN\n", + "147 microsoft/phi-4 Microsoft: Phi 4 1736489872 [Microsoft Research](/microsoft) Phi-4 is desi... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000007 0.00000014 0 0 0 0 16384.0 16384.0 False NaN NaN\n", + "148 deepseek/deepseek-chat:free DeepSeek: DeepSeek V3 (free) 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "149 deepseek/deepseek-chat DeepSeek: DeepSeek V3 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.00000038 0.00000089 0 0 0 0 163840.0 163840.0 False NaN NaN\n", + "150 sao10k/l3.3-euryale-70b Sao10K: Llama 3.3 Euryale 70B 1734535928 Euryale L3.3 70B is a model focused on creativ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "151 openai/o1 OpenAI: o1 1734459999 The latest and strongest model family from Ope... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [text, image] [text] GPT None 0.000015 0.00006 0 0.021675 0 0 200000.0 100000.0 True 0.0000075 NaN\n", + "152 eva-unit-01/eva-llama-3.33-70b EVA Llama 3.33 70B 1734377303 EVA Llama 3.33 70b is a roleplay and storywrit... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "153 x-ai/grok-2-vision-1212 xAI: Grok 2 Vision 1212 1734237338 Grok 2 Vision 1212 advances image-based AI wit... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000002 0.00001 0 0.0036 0 0 32768.0 NaN False NaN NaN\n", + "154 x-ai/grok-2-1212 xAI: Grok 2 1212 1734232814 Grok 2 1212 introduces significant enhancement... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000002 0.00001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "155 cohere/command-r7b-12-2024 Cohere: Command R7B (12-2024) 1734158152 Command R7B (12-2024) is a small, fast update ... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.0000000375 0.00000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "156 google/gemini-2.0-flash-exp:free Google: Gemini 2.0 Flash Experimental (free) 1733937523 Gemini Flash 2.0 offers a significantly faster... 1048576 None [max_tokens, temperature, top_p, stop] text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", + "157 meta-llama/llama-3.3-70b-instruct:free Meta: Llama 3.3 70B Instruct (free) 1733506137 The Meta Llama 3.3 multilingual large language... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 8000.0 8000.0 False NaN NaN\n", + "158 meta-llama/llama-3.3-70b-instruct Meta: Llama 3.3 70B Instruct 1733506137 The Meta Llama 3.3 multilingual large language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009 0.00000035 0 0 0 0 131000.0 131000.0 False NaN NaN\n", + "159 amazon/nova-lite-v1 Amazon: Nova Lite 1.0 1733437363 Amazon Nova Lite 1.0 is a very low-cost multim... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.00000006 0.00000024 0 0.00009 0 0 300000.0 5120.0 True NaN NaN\n", + "160 amazon/nova-micro-v1 Amazon: Nova Micro 1.0 1733437237 Amazon Nova Micro 1.0 is a text-only model tha... 128000 None [tools, max_tokens, temperature, top_p, top_k,... text->text [text] [text] Nova None 0.000000035 0.00000014 0 0 0 0 128000.0 5120.0 True NaN NaN\n", + "161 amazon/nova-pro-v1 Amazon: Nova Pro 1.0 1733436303 Amazon Nova Pro 1.0 is a capable multimodal mo... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.0000008 0.0000032 0 0.0012 0 0 300000.0 5120.0 True NaN NaN\n", + "162 qwen/qwq-32b-preview:free Qwen: QwQ 32B Preview (free) 1732754541 QwQ-32B-Preview is an experimental research mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16384.0 NaN False NaN NaN\n", + "163 qwen/qwq-32b-preview Qwen: QwQ 32B Preview 1732754541 QwQ-32B-Preview is an experimental research mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0.00000009 0.00000027 0 0 0 0 32768.0 NaN False NaN NaN\n", + "164 google/learnlm-1.5-pro-experimental:free Google: LearnLM 1.5 Pro Experimental (free) 1732216551 An experimental version of [Gemini 1.5 Pro](/g... 40960 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 40960.0 8192.0 False NaN NaN\n", + "165 eva-unit-01/eva-qwen-2.5-72b EVA Qwen2.5 72B 1732210606 EVA Qwen2.5 72B is a roleplay and storywriting... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "166 openai/gpt-4o-2024-11-20 OpenAI: GPT-4o (2024-11-20) 1732127594 The 2024-11-20 version of GPT-4o offers a leve... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", + "167 mistralai/mistral-large-2411 Mistral Large 2411 1731978685 Mistral Large 2 2411 is an update of [Mistral ... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", + "168 mistralai/mistral-large-2407 Mistral Large 2407 1731978415 This is Mistral AI's flagship model, Mistral L... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", + "169 mistralai/pixtral-large-2411 Mistral: Pixtral Large 2411 1731977388 Pixtral Large is a 124B parameter, open-weight... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.000002 0.000006 0 0.002888 0 0 131072.0 NaN False NaN NaN\n", + "170 x-ai/grok-vision-beta xAI: Grok Vision Beta 1731976624 Grok Vision Beta is xAI's experimental languag... 8192 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000005 0.000015 0 0.009 0 0 8192.0 NaN False NaN NaN\n", + "171 infermatic/mn-inferor-12b Infermatic: Mistral Nemo Inferor 12B 1731464428 Inferor 12B is a merge of top roleplay models,... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "172 qwen/qwen-2.5-coder-32b-instruct:free Qwen2.5 Coder 32B Instruct (free) 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "173 qwen/qwen-2.5-coder-32b-instruct Qwen2.5 Coder 32B Instruct 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000006 0.00000015 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "174 raifle/sorcererlm-8x22b SorcererLM 8x22B 1731105083 SorcererLM is an advanced RP and storytelling ... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral vicuna 0.0000045 0.0000045 0 0 0 0 16000.0 NaN False NaN NaN\n", + "175 eva-unit-01/eva-qwen-2.5-32b EVA Qwen2.5 32B 1731104847 EVA Qwen2.5 32B is a roleplaying/storywriting ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "176 thedrummer/unslopnemo-12b Unslopnemo 12B 1731103448 UnslopNemo v4.1 is the latest addition from th... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000045 0.00000045 0 0 0 0 32000.0 16000.0 False NaN NaN\n", + "177 anthropic/claude-3.5-haiku:beta Anthropic: Claude 3.5 Haiku (self-moderated) 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", + "178 anthropic/claude-3.5-haiku Anthropic: Claude 3.5 Haiku 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", + "179 anthropic/claude-3.5-haiku-20241022:beta Anthropic: Claude 3.5 Haiku (2024-10-22) (self... 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", + "180 anthropic/claude-3.5-haiku-20241022 Anthropic: Claude 3.5 Haiku (2024-10-22) 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", + "181 neversleep/llama-3.1-lumimaid-70b NeverSleep: Lumimaid v0.2 70B 1729555200 Lumimaid v0.2 70B is a finetune of [Llama 3.1 ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000015 0.00000225 0 0 0 0 16384.0 2048.0 False NaN NaN\n", + "182 anthracite-org/magnum-v4-72b Magnum v4 72B 1729555200 This is a series of models designed to replica... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000015 0.00000225 0 0 0 0 16384.0 1024.0 False NaN NaN\n", + "183 anthropic/claude-3.5-sonnet:beta Anthropic: Claude 3.5 Sonnet (self-moderated) 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", + "184 anthropic/claude-3.5-sonnet Anthropic: Claude 3.5 Sonnet 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", + "185 x-ai/grok-beta xAI: Grok Beta 1729382400 Grok Beta is xAI's experimental language model... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000005 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", + "186 mistralai/ministral-8b Mistral: Ministral 8B 1729123200 Ministral 8B is an 8B parameter model featurin... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", + "187 mistralai/ministral-3b Mistral: Ministral 3B 1729123200 Ministral 3B is a 3B parameter model optimized... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000004 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", + "188 qwen/qwen-2.5-7b-instruct:free Qwen2.5 7B Instruct (free) 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 32768.0 False NaN NaN\n", + "189 qwen/qwen-2.5-7b-instruct Qwen2.5 7B Instruct 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000005 0.0000001 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "190 nvidia/llama-3.1-nemotron-70b-instruct NVIDIA: Llama 3.1 Nemotron 70B Instruct 1728950400 NVIDIA's Llama 3.1 Nemotron 70B is a language ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "191 inflection/inflection-3-productivity Inflection: Inflection 3 Productivity 1728604800 Inflection 3 Productivity is optimized for fol... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", + "192 inflection/inflection-3-pi Inflection: Inflection 3 Pi 1728604800 Inflection 3 Pi powers Inflection's [Pi](https... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", + "193 google/gemini-flash-1.5-8b Google: Gemini 1.5 Flash 8B 1727913600 Gemini Flash 1.5 8B is optimized for speed and... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.0000000375 0.00000015 0 0 0 0 1000000.0 8192.0 False 0.00000001 0.0000000583\n", + "194 thedrummer/rocinante-12b Rocinante 12B 1727654400 Rocinante 12B is designed for engaging storyte... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000025 0.0000005 0 0 0 0 32768.0 NaN False NaN NaN\n", + "195 anthracite-org/magnum-v2-72b Magnum v2 72B 1727654400 From the maker of [Goliath](https://openrouter... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000003 0.000003 0 0 0 0 32768.0 NaN False NaN NaN\n", + "196 liquid/lfm-40b Liquid: LFM 40B MoE 1727654400 Liquid's 40.3B Mixture of Experts (MoE) model.... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000015 0.00000015 0 0 0 0 32768.0 NaN False NaN NaN\n", + "197 meta-llama/llama-3.2-3b-instruct:free Meta: Llama 3.2 3B Instruct (free) 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 20000 None [max_tokens, temperature, top_p] text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 20000.0 20000.0 False NaN NaN\n", + "198 meta-llama/llama-3.2-3b-instruct Meta: Llama 3.2 3B Instruct 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000001 0.00000002 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "199 meta-llama/llama-3.2-1b-instruct:free Meta: Llama 3.2 1B Instruct (free) 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131000.0 NaN False NaN NaN\n", + "200 meta-llama/llama-3.2-1b-instruct Meta: Llama 3.2 1B Instruct 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131072 None [max_tokens, temperature, top_p, top_k, stop, ... text->text [text] [text] Llama3 llama3 0.000000005 0.00000001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "201 meta-llama/llama-3.2-90b-vision-instruct Meta: Llama 3.2 90B Vision Instruct 1727222400 The Llama 90B Vision model is a top-tier, 90-b... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.0000012 0.0000012 0 0.001734 0 0 131072.0 2048.0 False NaN NaN\n", + "202 meta-llama/llama-3.2-11b-vision-instruct:free Meta: Llama 3.2 11B Vision Instruct (free) 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", + "203 meta-llama/llama-3.2-11b-vision-instruct Meta: Llama 3.2 11B Vision Instruct 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.000000049 0.000000049 0 0.00007948 0 0 131072.0 16384.0 False NaN NaN\n", + "204 qwen/qwen-2.5-72b-instruct:free Qwen2.5 72B Instruct (free) 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "205 qwen/qwen-2.5-72b-instruct Qwen2.5 72B Instruct 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen chatml 0.00000012 0.00000039 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "206 qwen/qwen-2.5-vl-72b-instruct Qwen: Qwen2.5-VL 72B Instruct 1726617600 Qwen2.5 VL 72B is a multimodal LLM from the Qw... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000006 0.0000006 0 0.000578 0 0 32768.0 NaN False NaN NaN\n", + "207 neversleep/llama-3.1-lumimaid-8b NeverSleep: Lumimaid v0.2 8B 1726358400 Lumimaid v0.2 8B is a finetune of [Llama 3.1 8... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 32768.0 2048.0 False NaN NaN\n", + "208 openai/o1-preview OpenAI: o1-preview 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", + "209 openai/o1-preview-2024-09-12 OpenAI: o1-preview (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", + "210 openai/o1-mini OpenAI: o1-mini 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", + "211 openai/o1-mini-2024-09-12 OpenAI: o1-mini (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", + "212 mistralai/pixtral-12b Mistral: Pixtral 12B 1725926400 The first multi-modal, text+image-to-text mode... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Mistral None 0.0000001 0.0000001 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", + "213 cohere/command-r-plus-08-2024 Cohere: Command R+ (08-2024) 1724976000 command-r-plus-08-2024 is an update of the [Co... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000025 0.00001 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "214 cohere/command-r-08-2024 Cohere: Command R (08-2024) 1724976000 command-r-08-2024 is an update of the [Command... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.00000015 0.0000006 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "215 qwen/qwen-2.5-vl-7b-instruct:free Qwen: Qwen2.5-VL 7B Instruct (free) 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 64000.0 False NaN NaN\n", + "216 qwen/qwen-2.5-vl-7b-instruct Qwen: Qwen2.5-VL 7B Instruct 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000002 0.0000002 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", + "217 sao10k/l3.1-euryale-70b Sao10K: Llama 3.1 Euryale 70B v2.2 1724803200 Euryale L3.1 70B v2.2 is a model focused on cr... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "218 google/gemini-flash-1.5-8b-exp Google: Gemini 1.5 Flash 8B Experimental 1724803200 Gemini Flash 1.5 8B Experimental is an experim... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", + "219 microsoft/phi-3.5-mini-128k-instruct Microsoft: Phi-3.5 Mini 128K Instruct 1724198400 Phi-3.5 models are lightweight, state-of-the-a... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.00000003 0.00000009 0 0 0 0 131072.0 NaN False NaN NaN\n", + "220 nousresearch/hermes-3-llama-3.1-70b Nous: Hermes 3 70B Instruct 1723939200 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "221 nousresearch/hermes-3-llama-3.1-405b Nous: Hermes 3 405B Instruct 1723766400 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.0000008 0.0000008 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "222 openai/chatgpt-4o-latest OpenAI: ChatGPT-4o 1723593600 OpenAI ChatGPT 4o is continually updated by Op... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 16384.0 True NaN NaN\n", + "223 sao10k/l3-lunaris-8b Sao10K: Llama 3 8B Lunaris 1723507200 Lunaris 8B is a versatile generalist and rolep... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000005 0 0 0 0 8192.0 NaN False NaN NaN\n", + "224 aetherwiing/mn-starcannon-12b Aetherwiing: Starcannon 12B 1723507200 Starcannon 12B v2 is a creative roleplay and s... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "225 openai/gpt-4o-2024-08-06 OpenAI: GPT-4o (2024-08-06) 1722902400 The 2024-08-06 version of GPT-4o offers improv... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", + "226 meta-llama/llama-3.1-405b:free Meta: Llama 3.1 405B (base) (free) 1722556800 Meta's latest class of model (Llama 3.1) launc... 64000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", + "227 meta-llama/llama-3.1-405b Meta: Llama 3.1 405B (base) 1722556800 Meta's latest class of model (Llama 3.1) launc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.000002 0.000002 0 0 0 0 32768.0 NaN False NaN NaN\n", + "228 nothingiisreal/mn-celeste-12b Mistral Nemo 12B Celeste 1722556800 A specialized story writing and roleplaying mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "229 perplexity/llama-3.1-sonar-small-128k-online Perplexity: Llama 3.1 Sonar 8B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.0000002 0.0000002 0.005 0 0 0 127072.0 NaN False NaN NaN\n", + "230 perplexity/llama-3.1-sonar-large-128k-online Perplexity: Llama 3.1 Sonar 70B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", + "231 meta-llama/llama-3.1-8b-instruct:free Meta: Llama 3.1 8B Instruct (free) 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 4096.0 False NaN NaN\n", + "232 meta-llama/llama-3.1-8b-instruct Meta: Llama 3.1 8B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000003 0 0 0 0 16384.0 16384.0 False NaN NaN\n", + "233 meta-llama/llama-3.1-405b-instruct Meta: Llama 3.1 405B Instruct 1721692800 The highly anticipated 400B class of Llama3 is... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000008 0.0000008 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "234 meta-llama/llama-3.1-70b-instruct Meta: Llama 3.1 70B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000001 0.00000028 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "235 mistralai/codestral-mamba Mistral: Codestral Mamba 1721347200 A 7.3B parameter Mamba-based model designed fo... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 262144.0 NaN False NaN NaN\n", + "236 mistralai/mistral-nemo:free Mistral: Mistral Nemo (free) 1721347200 A 12B parameter model with a 128k token contex... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 128000.0 128000.0 False NaN NaN\n", + "237 mistralai/mistral-nemo Mistral: Mistral Nemo 1721347200 A 12B parameter model with a 128k token contex... 98304 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000003 0.00000007 0 0 0 0 98304.0 49152.0 False NaN NaN\n", + "238 openai/gpt-4o-mini OpenAI: GPT-4o-mini 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.000217 0 0 128000.0 16384.0 True 0.000000075 NaN\n", + "239 openai/gpt-4o-mini-2024-07-18 OpenAI: GPT-4o-mini (2024-07-18) 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.007225 0 0 128000.0 16384.0 True 0.000000075 NaN\n", + "240 google/gemma-2-27b-it Google: Gemma 2 27B 1720828800 Gemma 2 27B by Google is an open model built f... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.0000001 0.0000003 0 0 0 0 8192.0 NaN False NaN NaN\n", + "241 alpindale/magnum-72b Magnum 72B 1720656000 From the maker of [Goliath](https://openrouter... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "242 google/gemma-2-9b-it:free Google: Gemma 2 9B (free) 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0 0 0 0 0 0 8192.0 8192.0 False NaN NaN\n", + "243 google/gemma-2-9b-it Google: Gemma 2 9B 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.00000002 0.00000006 0 0 0 0 8192.0 NaN False NaN NaN\n", + "244 01-ai/yi-large 01.AI: Yi Large 1719273600 The Yi Large model was designed by 01.AI with ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Yi None 0.000003 0.000003 0 0 0 0 32768.0 4096.0 False NaN NaN\n", + "245 ai21/jamba-instruct AI21: Jamba Instruct 1719273600 The Jamba-Instruct model, introduced by AI21 L... 256000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000005 0.0000007 0 0 0 0 256000.0 4096.0 False NaN NaN\n", + "246 anthropic/claude-3.5-sonnet-20240620:beta Anthropic: Claude 3.5 Sonnet (2024-06-20) (sel... 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", + "247 anthropic/claude-3.5-sonnet-20240620 Anthropic: Claude 3.5 Sonnet (2024-06-20) 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", + "248 sao10k/l3-euryale-70b Sao10k: Llama 3 Euryale 70B v2.1 1718668800 Euryale 70B v2.1 is a model focused on creativ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000148 0.00000148 0 0 0 0 8192.0 8192.0 False NaN NaN\n", + "249 cognitivecomputations/dolphin-mixtral-8x22b Dolphin 2.9.2 Mixtral 8x22B 🐬 1717804800 Dolphin 2.9 is designed for instruction follow... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000009 0.0000009 0 0 0 0 16000.0 NaN False NaN NaN\n", + "250 qwen/qwen-2-72b-instruct Qwen 2 72B Instruct 1717718400 Qwen2 72B is a transformer-based model that ex... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000009 0.0000009 0 0 0 0 32768.0 4096.0 False NaN NaN\n", + "251 mistralai/mistral-7b-instruct:free Mistral: Mistral 7B Instruct (free) 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "252 mistralai/mistral-7b-instruct Mistral: Mistral 7B Instruct 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "253 nousresearch/hermes-2-pro-llama-3-8b NousResearch: Hermes 2 Pro - Llama-3 8B 1716768000 Hermes 2 Pro is an upgraded, retrained version... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.000000025 0.00000004 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "254 mistralai/mistral-7b-instruct-v0.3 Mistral: Mistral 7B Instruct v0.3 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "255 microsoft/phi-3-mini-128k-instruct Microsoft: Phi-3 Mini 128K Instruct 1716681600 Phi-3 Mini is a powerful 3.8B parameter model ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", + "256 microsoft/phi-3-medium-128k-instruct Microsoft: Phi-3 Medium 128K Instruct 1716508800 Phi-3 128K Medium is a powerful 14-billion par... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000003 0 0 0 0 131072.0 NaN False NaN NaN\n", + "257 neversleep/llama-3-lumimaid-70b NeverSleep: Llama 3 Lumimaid 70B 1715817600 The NeverSleep team is back, with a Llama 3 70... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 8192.0 4096.0 False NaN NaN\n", + "258 deepseek/deepseek-coder DeepSeek-Coder-V2 1715644800 DeepSeek-Coder-V2, an open-source Mixture-of-E... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000004 0.00000012 0 0 0 0 128000.0 NaN False NaN NaN\n", + "259 google/gemini-flash-1.5 Google: Gemini 1.5 Flash 1715644800 Gemini 1.5 Flash is a foundation model that pe... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.000000075 0.0000003 0 0.00004 0 0 1000000.0 8192.0 False 0.00000001875 0.0000001583\n", + "260 openai/gpt-4o OpenAI: GPT-4o 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", + "261 openai/gpt-4o:extended OpenAI: GPT-4o (extended) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000006 0.000018 0 0.007225 0 0 128000.0 64000.0 True NaN NaN\n", + "262 meta-llama/llama-guard-2-8b Meta: LlamaGuard 2 8B 1715558400 This safeguard model has 8B parameters and is ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.0000002 0.0000002 0 0 0 0 8192.0 NaN False NaN NaN\n", + "263 openai/gpt-4o-2024-05-13 OpenAI: GPT-4o (2024-05-13) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 4096.0 True NaN NaN\n", + "264 allenai/olmo-7b-instruct OLMo 7B Instruct 1715299200 OLMo 7B Instruct by the Allen Institute for AI... 2048 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other zephyr 0.00000008 0.00000024 0 0 0 0 2048.0 NaN False NaN NaN\n", + "265 neversleep/llama-3-lumimaid-8b:extended NeverSleep: Llama 3 Lumimaid 8B (extended) 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", + "266 neversleep/llama-3-lumimaid-8b NeverSleep: Llama 3 Lumimaid 8B 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", + "267 sao10k/fimbulvetr-11b-v2 Fimbulvetr 11B v2 1713657600 Creative writing model, routed with permission... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "268 meta-llama/llama-3-8b-instruct Meta: Llama 3 8B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, top_k, seed, ... text->text [text] [text] Llama3 llama3 0.00000003 0.00000006 0 0 0 0 8192.0 16384.0 False NaN NaN\n", + "269 meta-llama/llama-3-70b-instruct Meta: Llama 3 70B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000003 0.0000004 0 0 0 0 8192.0 16384.0 False NaN NaN\n", + "270 mistralai/mixtral-8x22b-instruct Mistral: Mixtral 8x22B Instruct 1713312000 Mistral's official instruct fine-tuned version... 65536 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.0000004 0.0000012 0 0 0 0 65536.0 NaN False NaN NaN\n", + "271 microsoft/wizardlm-2-8x22b WizardLM-2 8x22B 1713225600 WizardLM-2 8x22B is Microsoft AI's most advanc... 65536 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Mistral vicuna 0.0000005 0.0000005 0 0 0 0 65536.0 16384.0 False NaN NaN\n", + "272 google/gemini-pro-1.5 Google: Gemini 1.5 Pro 1712620800 Google's latest multimodal model, supports ima... 2000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.00000125 0.000005 0 0.0006575 0 0 2000000.0 8192.0 False NaN NaN\n", + "273 openai/gpt-4-turbo OpenAI: GPT-4 Turbo 1712620800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.00001 0.00003 0 0.01445 0 0 128000.0 4096.0 True NaN NaN\n", + "274 cohere/command-r-plus Cohere: Command R+ 1712188800 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "275 cohere/command-r-plus-04-2024 Cohere: Command R+ (04-2024) 1712016000 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "276 sophosympatheia/midnight-rose-70b Midnight Rose 70B 1711065600 A merge with a complex family tree, this model... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000008 0.0000008 0 0 0 0 4096.0 NaN False NaN NaN\n", + "277 cohere/command Cohere: Command 1710374400 Command is an instruction-following conversati... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.000001 0.000002 0 0 0 0 4096.0 4000.0 False NaN NaN\n", + "278 cohere/command-r Cohere: Command R 1710374400 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "279 anthropic/claude-3-haiku:beta Anthropic: Claude 3 Haiku (self-moderated) 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 False 0.00000003 0.0000003\n", + "280 anthropic/claude-3-haiku Anthropic: Claude 3 Haiku 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 True 0.00000003 0.0000003\n", + "281 anthropic/claude-3-opus:beta Anthropic: Claude 3 Opus (self-moderated) 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 False 0.0000015 0.00001875\n", + "282 anthropic/claude-3-opus Anthropic: Claude 3 Opus 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 True 0.0000015 0.00001875\n", + "283 anthropic/claude-3-sonnet:beta Anthropic: Claude 3 Sonnet (self-moderated) 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 False 0.0000003 0.00000375\n", + "284 anthropic/claude-3-sonnet Anthropic: Claude 3 Sonnet 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 True 0.0000003 0.00000375\n", + "285 cohere/command-r-03-2024 Cohere: Command R (03-2024) 1709341200 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "286 mistralai/mistral-large Mistral Large 1708905600 This is Mistral AI's flagship model, Mistral L... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 128000.0 NaN False NaN NaN\n", + "287 openai/gpt-3.5-turbo-0613 OpenAI: GPT-3.5 Turbo (older v0613) 1706140800 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 4095 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 4095.0 4096.0 False NaN NaN\n", + "288 openai/gpt-4-turbo-preview OpenAI: GPT-4 Turbo Preview 1706140800 The preview GPT-4 model with improved instruct... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", + "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo Nous: Hermes 2 Mixtral 8x7B DPO 1705363200 Nous Hermes 2 Mixtral 8x7B DPO is the new flag... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000006 0.0000006 0 0 0 0 32768.0 2048.0 False NaN NaN\n", + "290 mistralai/mistral-medium Mistral Medium 1704844800 This is Mistral AI's closed-source, medium-sid... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000275 0.0000081 0 0 0 0 32768.0 NaN False NaN NaN\n", + "291 mistralai/mistral-small Mistral Small 1704844800 With 22 billion parameters, Mistral Small v24.... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", + "292 mistralai/mistral-tiny Mistral Tiny 1704844800 Note: This model is being deprecated. Recommen... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 32768.0 NaN False NaN NaN\n", + "293 mistralai/mistral-7b-instruct-v0.2 Mistral: Mistral 7B Instruct v0.2 1703721600 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000002 0.0000002 0 0 0 0 32768.0 NaN False NaN NaN\n", + "294 mistralai/mixtral-8x7b-instruct Mistral: Mixtral 8x7B Instruct 1702166400 Mixtral 8x7B Instruct is a pretrained generati... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000008 0.00000024 0 0 0 0 32768.0 NaN False NaN NaN\n", + "295 neversleep/noromaid-20b Noromaid 20B 1700956800 A collab between IkariDev and Undi. This merge... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.00000075 0.0000015 0 0 0 0 8192.0 2048.0 False NaN NaN\n", + "296 anthropic/claude-2.1:beta Anthropic: Claude v2.1 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", + "297 anthropic/claude-2.1 Anthropic: Claude v2.1 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", + "298 anthropic/claude-2:beta Anthropic: Claude v2 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", + "299 anthropic/claude-2 Anthropic: Claude v2 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", + "300 undi95/toppy-m-7b Toppy M 7B 1699574400 A wild 7B parameter model that merges several ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "301 alpindale/goliath-120b Goliath 120B 1699574400 A large LLM created by combining two fine-tune... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000065625 0.000009375 0 0 0 0 6144.0 512.0 False NaN NaN\n", + "302 openrouter/auto Auto Router 1699401600 Your prompt will be processed by a meta-model ... 2000000 None [] text->text [text] [text] Router None -1 -1 NaN NaN NaN NaN NaN NaN False NaN NaN\n", + "303 openai/gpt-3.5-turbo-1106 OpenAI: GPT-3.5 Turbo 16k (older v1106) 1699228800 An older GPT-3.5 Turbo model with improved ins... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "304 openai/gpt-4-1106-preview OpenAI: GPT-4 Turbo (older v1106) 1699228800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", + "305 jondurbin/airoboros-l2-70b Airoboros 70B 1698537600 A Llama 2 70B fine-tune using synthetic data (... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000005 0.0000005 0 0 0 0 4096.0 NaN False NaN NaN\n", + "306 openai/gpt-3.5-turbo-instruct OpenAI: GPT-3.5 Turbo Instruct 1695859200 This model is a variant of GPT-3.5 Turbo tuned... 4095 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] GPT chatml 0.0000015 0.000002 0 0 0 0 4095.0 4096.0 True NaN NaN\n", + "307 mistralai/mistral-7b-instruct-v0.1 Mistral: Mistral 7B Instruct v0.1 1695859200 A 7.3B parameter model that outperforms Llama ... 2824 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000011 0.00000019 0 0 0 0 2824.0 NaN False NaN NaN\n", + "308 pygmalionai/mythalion-13b Pygmalion: Mythalion 13B 1693612800 A blend of the new Pygmalion-13b and MythoMax.... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 8192.0 1024.0 False NaN NaN\n", + "309 openai/gpt-3.5-turbo-16k OpenAI: GPT-3.5 Turbo 16k 1693180800 This model offers four times the context lengt... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000003 0.000004 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "310 openai/gpt-4-32k OpenAI: GPT-4 32k 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", + "311 openai/gpt-4-32k-0314 OpenAI: GPT-4 32k (older v0314) 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", + "312 mancer/weaver Mancer: Weaver (alpha) 1690934400 An attempt to recreate Claude-style verbosity,... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000001125 0.000001125 0 0 0 0 8000.0 1000.0 False NaN NaN\n", + "313 anthropic/claude-2.0:beta Anthropic: Claude v2.0 (self-moderated) 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 False NaN NaN\n", + "314 anthropic/claude-2.0 Anthropic: Claude v2.0 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 True NaN NaN\n", + "315 undi95/remm-slerp-l2-13b ReMM SLERP 13B 1689984000 A recreation trial of the original MythoMax-L2... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 6144.0 1024.0 False NaN NaN\n", + "316 gryphe/mythomax-l2-13b MythoMax 13B 1688256000 One of the highest performing and most popular... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000000065 0.000000065 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "317 meta-llama/llama-2-70b-chat Meta: Llama 2 70B Chat 1687219200 The flagship, 70 billion parameter language mo... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 llama2 0.0000009 0.0000009 0 0 0 0 4096.0 NaN False NaN NaN\n", + "318 openai/gpt-3.5-turbo OpenAI: GPT-3.5 Turbo 1685232000 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "319 openai/gpt-3.5-turbo-0125 OpenAI: GPT-3.5 Turbo 16k 1685232000 The latest GPT-3.5 Turbo model with improved i... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "320 openai/gpt-4 OpenAI: GPT-4 1685232000 OpenAI's flagship model, GPT-4 is a large-scal... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN\n", + "321 openai/gpt-4-0314 OpenAI: GPT-4 (older v0314) 1685232000 GPT-4-0314 is the first version of GPT-4 relea... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Normalize the nested JSON\n", + "df = pd.json_normalize(val, sep=\"_\")\n", + "df\n", + "# View the resulting DataFrame\n", + "# print(df.T) # Transpose just for readable vertical inspection" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id mistralai/mistral-medium-3\n", + "name Mistral: Mistral Medium 3\n", + "created 1746627341\n", + "description Mistral Medium 3 is a high-performance enterpr...\n", + "context_length 131072\n", + "per_request_limits None\n", + "supported_parameters [tools, tool_choice, max_tokens, temperature, ...\n", + "architecture_modality text+image->text\n", + "architecture_input_modalities [text, image]\n", + "architecture_output_modalities [text]\n", + "architecture_tokenizer Mistral\n", + "architecture_instruct_type None\n", + "pricing_prompt 0.0000004\n", + "pricing_completion 0.000002\n", + "pricing_request 0\n", + "pricing_image 0\n", + "pricing_web_search 0\n", + "pricing_internal_reasoning 0\n", + "top_provider_context_length 131072.0\n", + "top_provider_max_completion_tokens NaN\n", + "top_provider_is_moderated False\n", + "pricing_input_cache_read NaN\n", + "pricing_input_cache_write NaN\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0].T" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "col_names = [\"id\", \"context_length\", \"pricing_prompt\", \"pricing_completion\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id object\n", + "name object\n", + "created int64\n", + "description object\n", + "context_length int64\n", + "per_request_limits object\n", + "supported_parameters object\n", + "architecture_modality object\n", + "architecture_input_modalities object\n", + "architecture_output_modalities object\n", + "architecture_tokenizer object\n", + "architecture_instruct_type object\n", + "pricing_prompt object\n", + "pricing_completion object\n", + "pricing_request object\n", + "pricing_image object\n", + "pricing_web_search object\n", + "pricing_internal_reasoning object\n", + "top_provider_context_length float64\n", + "top_provider_max_completion_tokens float64\n", + "top_provider_is_moderated bool\n", + "pricing_input_cache_read object\n", + "pricing_input_cache_write object\n", + "dtype: object" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.38819875776397517, 'type': 'is_bool'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.5962732919254659, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 1.0, 'is_bool': 1.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.13043478260869565, 'is_bool': 0.0, 'is_string': 0.13043478260869565, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.07142857142857142, 'is_bool': 0.0, 'is_string': 0.07142857142857142, 'type': 'is_numeric'}\n" + ] + } + ], + "source": [ + "for col in df.columns:\n", + " print(hpandas.infer_column_types(df[col]))" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
is_numericis_boolis_string
id0.0000000.01.000000
name0.0000000.01.000000
created1.0000000.00.000000
description0.0000000.01.000000
context_length1.0000000.00.000000
per_request_limits0.0000000.00.000000
supported_parameters0.0000000.00.000000
architecture_modality0.0000000.01.000000
architecture_input_modalities0.0000000.00.000000
architecture_output_modalities0.0000000.00.000000
architecture_tokenizer0.0000000.01.000000
architecture_instruct_type0.0000000.00.388199
pricing_prompt1.0000000.01.000000
pricing_completion1.0000000.01.000000
pricing_request0.9968940.00.996894
pricing_image0.9968940.00.996894
pricing_web_search0.9968940.00.996894
pricing_internal_reasoning0.9968940.00.996894
top_provider_context_length0.9968940.00.000000
top_provider_max_completion_tokens0.5962730.00.000000
top_provider_is_moderated1.0000001.00.000000
pricing_input_cache_read0.1304350.00.130435
pricing_input_cache_write0.0714290.00.071429
\n", + "
" + ], + "text/plain": [ + " is_numeric is_bool is_string\n", + "id 0.000000 0.0 1.000000\n", + "name 0.000000 0.0 1.000000\n", + "created 1.000000 0.0 0.000000\n", + "description 0.000000 0.0 1.000000\n", + "context_length 1.000000 0.0 0.000000\n", + "per_request_limits 0.000000 0.0 0.000000\n", + "supported_parameters 0.000000 0.0 0.000000\n", + "architecture_modality 0.000000 0.0 1.000000\n", + "architecture_input_modalities 0.000000 0.0 0.000000\n", + "architecture_output_modalities 0.000000 0.0 0.000000\n", + "architecture_tokenizer 0.000000 0.0 1.000000\n", + "architecture_instruct_type 0.000000 0.0 0.388199\n", + "pricing_prompt 1.000000 0.0 1.000000\n", + "pricing_completion 1.000000 0.0 1.000000\n", + "pricing_request 0.996894 0.0 0.996894\n", + "pricing_image 0.996894 0.0 0.996894\n", + "pricing_web_search 0.996894 0.0 0.996894\n", + "pricing_internal_reasoning 0.996894 0.0 0.996894\n", + "top_provider_context_length 0.996894 0.0 0.000000\n", + "top_provider_max_completion_tokens 0.596273 0.0 0.000000\n", + "top_provider_is_moderated 1.000000 1.0 0.000000\n", + "pricing_input_cache_read 0.130435 0.0 0.130435\n", + "pricing_input_cache_write 0.071429 0.0 0.071429" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.apply(lambda x: pd.Series(hpandas.infer_column_types(x))).T" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
is_numericis_boolis_stringtype
id0.00.01.0is_bool
name0.00.01.0is_bool
created1.00.00.0is_numeric
description0.00.01.0is_bool
context_length1.00.00.0is_numeric
per_request_limits0.00.00.0is_bool
supported_parameters0.00.00.0is_bool
architecture_modality0.00.01.0is_bool
architecture_input_modalities0.00.00.0is_bool
architecture_output_modalities0.00.00.0is_bool
architecture_tokenizer0.00.01.0is_bool
architecture_instruct_type0.00.00.388199is_bool
pricing_prompt1.00.01.0is_numeric
pricing_completion1.00.01.0is_numeric
pricing_request0.9968940.00.996894is_numeric
pricing_image0.9968940.00.996894is_numeric
pricing_web_search0.9968940.00.996894is_numeric
pricing_internal_reasoning0.9968940.00.996894is_numeric
top_provider_context_length0.9968940.00.0is_numeric
top_provider_max_completion_tokens0.5962730.00.0is_numeric
top_provider_is_moderated1.01.00.0is_bool
pricing_input_cache_read0.1304350.00.130435is_numeric
pricing_input_cache_write0.0714290.00.071429is_numeric
\n", + "
" + ], + "text/plain": [ + " is_numeric is_bool is_string type\n", + "id 0.0 0.0 1.0 is_bool\n", + "name 0.0 0.0 1.0 is_bool\n", + "created 1.0 0.0 0.0 is_numeric\n", + "description 0.0 0.0 1.0 is_bool\n", + "context_length 1.0 0.0 0.0 is_numeric\n", + "per_request_limits 0.0 0.0 0.0 is_bool\n", + "supported_parameters 0.0 0.0 0.0 is_bool\n", + "architecture_modality 0.0 0.0 1.0 is_bool\n", + "architecture_input_modalities 0.0 0.0 0.0 is_bool\n", + "architecture_output_modalities 0.0 0.0 0.0 is_bool\n", + "architecture_tokenizer 0.0 0.0 1.0 is_bool\n", + "architecture_instruct_type 0.0 0.0 0.388199 is_bool\n", + "pricing_prompt 1.0 0.0 1.0 is_numeric\n", + "pricing_completion 1.0 0.0 1.0 is_numeric\n", + "pricing_request 0.996894 0.0 0.996894 is_numeric\n", + "pricing_image 0.996894 0.0 0.996894 is_numeric\n", + "pricing_web_search 0.996894 0.0 0.996894 is_numeric\n", + "pricing_internal_reasoning 0.996894 0.0 0.996894 is_numeric\n", + "top_provider_context_length 0.996894 0.0 0.0 is_numeric\n", + "top_provider_max_completion_tokens 0.596273 0.0 0.0 is_numeric\n", + "top_provider_is_moderated 1.0 1.0 0.0 is_bool\n", + "pricing_input_cache_read 0.130435 0.0 0.130435 is_numeric\n", + "pricing_input_cache_write 0.071429 0.0 0.071429 is_numeric" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hpandas.infer_column_types_df(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 True\n", + "1 True\n", + "2 True\n", + "3 True\n", + "4 True\n", + "5 True\n", + "6 True\n", + "7 True\n", + "8 True\n", + "9 True\n", + "10 True\n", + "11 True\n", + "12 True\n", + "13 True\n", + "14 True\n", + "15 True\n", + "16 True\n", + "17 True\n", + "18 True\n", + "19 True\n", + "20 True\n", + "21 True\n", + "22 True\n", + "23 True\n", + "24 True\n", + "25 True\n", + "26 True\n", + "27 True\n", + "28 True\n", + "29 True\n", + "30 True\n", + "31 True\n", + "32 True\n", + "33 True\n", + "34 True\n", + "35 True\n", + "36 True\n", + "37 True\n", + "38 True\n", + "39 True\n", + "40 True\n", + "41 True\n", + "42 True\n", + "43 True\n", + "44 True\n", + "45 True\n", + "46 True\n", + "47 True\n", + "48 True\n", + "49 True\n", + "50 True\n", + "51 True\n", + "52 True\n", + "53 True\n", + "54 True\n", + "55 True\n", + "56 True\n", + "57 True\n", + "58 True\n", + "59 True\n", + "60 True\n", + "61 True\n", + "62 True\n", + "63 True\n", + "64 True\n", + "65 True\n", + "66 True\n", + "67 True\n", + "68 True\n", + "69 True\n", + "70 True\n", + "71 True\n", + "72 True\n", + "73 True\n", + "74 True\n", + "75 True\n", + "76 True\n", + "77 True\n", + "78 True\n", + "79 True\n", + "80 True\n", + "81 True\n", + "82 True\n", + "83 True\n", + "84 True\n", + "85 True\n", + "86 True\n", + "87 True\n", + "88 True\n", + "89 True\n", + "90 True\n", + "91 True\n", + "92 True\n", + "93 True\n", + "94 True\n", + "95 True\n", + "96 True\n", + "97 True\n", + "98 True\n", + "99 True\n", + "100 True\n", + "101 True\n", + "102 True\n", + "103 True\n", + "104 True\n", + "105 True\n", + "106 True\n", + "107 True\n", + "108 True\n", + "109 True\n", + "110 True\n", + "111 True\n", + "112 True\n", + "113 True\n", + "114 True\n", + "115 True\n", + "116 True\n", + "117 True\n", + "118 True\n", + "119 True\n", + "120 True\n", + "121 True\n", + "122 True\n", + "123 True\n", + "124 True\n", + "125 True\n", + "126 True\n", + "127 True\n", + "128 True\n", + "129 True\n", + "130 True\n", + "131 True\n", + "132 True\n", + "133 True\n", + "134 True\n", + "135 True\n", + "136 True\n", + "137 True\n", + "138 True\n", + "139 True\n", + "140 True\n", + "141 True\n", + "142 True\n", + "143 True\n", + "144 True\n", + "145 True\n", + "146 True\n", + "147 True\n", + "148 True\n", + "149 True\n", + "150 True\n", + "151 True\n", + "152 True\n", + "153 True\n", + "154 True\n", + "155 True\n", + "156 True\n", + "157 True\n", + "158 True\n", + "159 True\n", + "160 True\n", + "161 True\n", + "162 True\n", + "163 True\n", + "164 True\n", + "165 True\n", + "166 True\n", + "167 True\n", + "168 True\n", + "169 True\n", + "170 True\n", + "171 True\n", + "172 True\n", + "173 True\n", + "174 True\n", + "175 True\n", + "176 True\n", + "177 True\n", + "178 True\n", + "179 True\n", + "180 True\n", + "181 True\n", + "182 True\n", + "183 True\n", + "184 True\n", + "185 True\n", + "186 True\n", + "187 True\n", + "188 True\n", + "189 True\n", + "190 True\n", + "191 True\n", + "192 True\n", + "193 True\n", + "194 True\n", + "195 True\n", + "196 True\n", + "197 True\n", + "198 True\n", + "199 True\n", + "200 True\n", + "201 True\n", + "202 True\n", + "203 True\n", + "204 True\n", + "205 True\n", + "206 True\n", + "207 True\n", + "208 True\n", + "209 True\n", + "210 True\n", + "211 True\n", + "212 True\n", + "213 True\n", + "214 True\n", + "215 True\n", + "216 True\n", + "217 True\n", + "218 True\n", + "219 True\n", + "220 True\n", + "221 True\n", + "222 True\n", + "223 True\n", + "224 True\n", + "225 True\n", + "226 True\n", + "227 True\n", + "228 True\n", + "229 True\n", + "230 True\n", + "231 True\n", + "232 True\n", + "233 True\n", + "234 True\n", + "235 True\n", + "236 True\n", + "237 True\n", + "238 True\n", + "239 True\n", + "240 True\n", + "241 True\n", + "242 True\n", + "243 True\n", + "244 True\n", + "245 True\n", + "246 True\n", + "247 True\n", + "248 True\n", + "249 True\n", + "250 True\n", + "251 True\n", + "252 True\n", + "253 True\n", + "254 True\n", + "255 True\n", + "256 True\n", + "257 True\n", + "258 True\n", + "259 True\n", + "260 True\n", + "261 True\n", + "262 True\n", + "263 True\n", + "264 True\n", + "265 True\n", + "266 True\n", + "267 True\n", + "268 True\n", + "269 True\n", + "270 True\n", + "271 True\n", + "272 True\n", + "273 True\n", + "274 True\n", + "275 True\n", + "276 True\n", + "277 True\n", + "278 True\n", + "279 True\n", + "280 True\n", + "281 True\n", + "282 True\n", + "283 True\n", + "284 True\n", + "285 True\n", + "286 True\n", + "287 True\n", + "288 True\n", + "289 True\n", + "290 True\n", + "291 True\n", + "292 True\n", + "293 True\n", + "294 True\n", + "295 True\n", + "296 True\n", + "297 True\n", + "298 True\n", + "299 True\n", + "300 True\n", + "301 True\n", + "302 False\n", + "303 True\n", + "304 True\n", + "305 True\n", + "306 True\n", + "307 True\n", + "308 True\n", + "309 True\n", + "310 True\n", + "311 True\n", + "312 True\n", + "313 True\n", + "314 True\n", + "315 True\n", + "316 True\n", + "317 True\n", + "318 True\n", + "319 True\n", + "320 True\n", + "321 True\n", + "Name: pricing_request, dtype: bool" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(df[\"pricing_request\"], errors=\"coerce\").notna()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0.000002\n", + "1 0.00001\n", + "2 0.00000085\n", + "3 0.00000018\n", + "4 0.0000033\n", + "5 0.0000012\n", + "6 0.0000008\n", + "7 0.0000008\n", + "8 0.00000075\n", + "9 0\n", + "10 0.00000035\n", + "11 0\n", + "12 0\n", + "13 0.000001\n", + "14 0\n", + "15 0\n", + "16 0\n", + "17 0\n", + "18 0\n", + "19 0.00000218\n", + "20 0.00000005\n", + "21 0\n", + "22 0.0000003\n", + "23 0\n", + "24 0.000000138\n", + "25 0\n", + "26 0.00000024\n", + "27 0\n", + "28 0.0000003\n", + "29 0\n", + "30 0.000002\n", + "31 0\n", + "32 0.00000024\n", + "33 0\n", + "34 0\n", + "35 0\n", + "36 0\n", + "37 0.00000024\n", + "38 0\n", + "39 0.00000024\n", + "40 0.0000006\n", + "41 0.0000035\n", + "42 0.0000044\n", + "43 0.00004\n", + "44 0.0000044\n", + "45 0\n", + "46 0.00000003\n", + "47 0.000008\n", + "48 0.0000016\n", + "49 0.0000004\n", + "50 0.0000012\n", + "51 0.0000012\n", + "52 0\n", + "53 0\n", + "54 0\n", + "55 0.0000005\n", + "56 0.000015\n", + "57 0\n", + "58 0.0000004\n", + "59 0\n", + "60 0\n", + "61 0.0000006\n", + "62 0\n", + "63 0.0000003\n", + "64 0.0000034\n", + "65 0.0000001\n", + "66 0\n", + "67 0.00000018\n", + "68 0.00000088\n", + "69 0\n", + "70 0\n", + "71 0\n", + "72 0\n", + "73 0\n", + "74 0.0000009\n", + "75 0\n", + "76 0.00000088\n", + "77 0\n", + "78 0.0006\n", + "79 0\n", + "80 0.00000015\n", + "81 0\n", + "82 0\n", + "83 0\n", + "84 0.00000004\n", + "85 0.000008\n", + "86 0.0000004\n", + "87 0\n", + "88 0.0000001\n", + "89 0.00001\n", + "90 0.0000006\n", + "91 0.00001\n", + "92 0\n", + "93 0\n", + "94 0.0000002\n", + "95 0.000001\n", + "96 0.0000008\n", + "97 0.0000001\n", + "98 0.000008\n", + "99 0.000015\n", + "100 0.000008\n", + "101 0\n", + "102 0\n", + "103 0.0000002\n", + "104 0\n", + "105 0\n", + "106 0.00015\n", + "107 0.0000003\n", + "108 0.000015\n", + "109 0.000015\n", + "110 0.000015\n", + "111 0.000008\n", + "112 0.0000006\n", + "113 0\n", + "114 0\n", + "115 0.00000006\n", + "116 0.0000044\n", + "117 0.00000004\n", + "118 0.0000004\n", + "119 0.00000063\n", + "120 0.000008\n", + "121 0.0000014\n", + "122 0.0000002\n", + "123 0.0000032\n", + "124 0.0000002\n", + "125 0\n", + "126 0.00000075\n", + "127 0.0000012\n", + "128 0.0000064\n", + "129 0.0000044\n", + "130 0.00000018\n", + "131 0\n", + "132 0.00000012\n", + "133 0\n", + "134 0.00000018\n", + "135 0\n", + "136 0.00000015\n", + "137 0.000005\n", + "138 0.000001\n", + "139 0.00000001\n", + "140 0.00000002\n", + "141 0\n", + "142 0.0000004\n", + "143 0\n", + "144 0.00000218\n", + "145 0.0000011\n", + "146 0.0000009\n", + "147 0.00000014\n", + "148 0\n", + "149 0.00000089\n", + "150 0.0000008\n", + "151 0.00006\n", + "152 0.000006\n", + "153 0.00001\n", + "154 0.00001\n", + "155 0.00000015\n", + "156 0\n", + "157 0\n", + "158 0.00000035\n", + "159 0.00000024\n", + "160 0.00000014\n", + "161 0.0000032\n", + "162 0\n", + "163 0.00000027\n", + "164 0\n", + "165 0.000006\n", + "166 0.00001\n", + "167 0.000006\n", + "168 0.000006\n", + "169 0.000006\n", + "170 0.000015\n", + "171 0.0000012\n", + "172 0\n", + "173 0.00000015\n", + "174 0.0000045\n", + "175 0.0000034\n", + "176 0.00000045\n", + "177 0.000004\n", + "178 0.000004\n", + "179 0.000004\n", + "180 0.000004\n", + "181 0.00000225\n", + "182 0.00000225\n", + "183 0.000015\n", + "184 0.000015\n", + "185 0.000015\n", + "186 0.0000001\n", + "187 0.00000004\n", + "188 0\n", + "189 0.0000001\n", + "190 0.0000003\n", + "191 0.00001\n", + "192 0.00001\n", + "193 0.00000015\n", + "194 0.0000005\n", + "195 0.000003\n", + "196 0.00000015\n", + "197 0\n", + "198 0.00000002\n", + "199 0\n", + "200 0.00000001\n", + "201 0.0000012\n", + "202 0\n", + "203 0.000000049\n", + "204 0\n", + "205 0.00000039\n", + "206 0.0000006\n", + "207 0.00000075\n", + "208 0.00006\n", + "209 0.00006\n", + "210 0.0000044\n", + "211 0.0000044\n", + "212 0.0000001\n", + "213 0.00001\n", + "214 0.0000006\n", + "215 0\n", + "216 0.0000002\n", + "217 0.0000008\n", + "218 0\n", + "219 0.00000009\n", + "220 0.0000003\n", + "221 0.0000008\n", + "222 0.000015\n", + "223 0.00000005\n", + "224 0.0000012\n", + "225 0.00001\n", + "226 0\n", + "227 0.000002\n", + "228 0.0000012\n", + "229 0.0000002\n", + "230 0.000001\n", + "231 0\n", + "232 0.00000003\n", + "233 0.0000008\n", + "234 0.00000028\n", + "235 0.00000025\n", + "236 0\n", + "237 0.00000007\n", + "238 0.0000006\n", + "239 0.0000006\n", + "240 0.0000003\n", + "241 0.000006\n", + "242 0\n", + "243 0.00000006\n", + "244 0.000003\n", + "245 0.0000007\n", + "246 0.000015\n", + "247 0.000015\n", + "248 0.00000148\n", + "249 0.0000009\n", + "250 0.0000009\n", + "251 0\n", + "252 0.000000054\n", + "253 0.00000004\n", + "254 0.000000054\n", + "255 0.0000001\n", + "256 0.0000003\n", + "257 0.000006\n", + "258 0.00000012\n", + "259 0.0000003\n", + "260 0.00001\n", + "261 0.000018\n", + "262 0.0000002\n", + "263 0.000015\n", + "264 0.00000024\n", + "265 0.00000075\n", + "266 0.00000075\n", + "267 0.0000012\n", + "268 0.00000006\n", + "269 0.0000004\n", + "270 0.0000012\n", + "271 0.0000005\n", + "272 0.000005\n", + "273 0.00003\n", + "274 0.000015\n", + "275 0.000015\n", + "276 0.0000008\n", + "277 0.000002\n", + "278 0.0000015\n", + "279 0.00000125\n", + "280 0.00000125\n", + "281 0.000075\n", + "282 0.000075\n", + "283 0.000015\n", + "284 0.000015\n", + "285 0.0000015\n", + "286 0.000006\n", + "287 0.000002\n", + "288 0.00003\n", + "289 0.0000006\n", + "290 0.0000081\n", + "291 0.0000006\n", + "292 0.00000025\n", + "293 0.0000002\n", + "294 0.00000024\n", + "295 0.0000015\n", + "296 0.000024\n", + "297 0.000024\n", + "298 0.000024\n", + "299 0.000024\n", + "300 0.0000012\n", + "301 0.000009375\n", + "302 -1\n", + "303 0.000002\n", + "304 0.00003\n", + "305 0.0000005\n", + "306 0.000002\n", + "307 0.00000019\n", + "308 0.000001125\n", + "309 0.000004\n", + "310 0.00012\n", + "311 0.00012\n", + "312 0.000001125\n", + "313 0.000024\n", + "314 0.000024\n", + "315 0.000001125\n", + "316 0.000000065\n", + "317 0.0000009\n", + "318 0.0000015\n", + "319 0.0000015\n", + "320 0.00006\n", + "321 0.00006\n", + "Name: pricing_completion, dtype: object" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"pricing_completion\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcontext_lengthpricing_promptpricing_completion
302openrouter/auto2000000-1-1
133deepseek/deepseek-r1-distill-qwen-32b:free1600000
59nvidia/llama-3.1-nemotron-ultra-253b-v1:free13107200
113cognitivecomputations/dolphin3.0-r1-mistral-24...3276800
57nvidia/llama-3.3-nemotron-super-49b-v1:free13107200
114cognitivecomputations/dolphin3.0-mistral-24b:free3276800
54moonshotai/kimi-vl-a3b-thinking:free13107200
53agentica-org/deepcoder-14b-preview:free9600000
52arliai/qwq-32b-arliai-rpr-v1:free3276800
231meta-llama/llama-3.1-8b-instruct:free13107200
226meta-llama/llama-3.1-405b:free6400000
125qwen/qwen2.5-vl-72b-instruct:free13107200
45shisa-ai/shisa-v2-llama3.3-70b:free3276800
87google/gemma-3-12b-it:free13107200
92rekaai/reka-flash-3:free3276800
131mistralai/mistral-small-24b-instruct-2501:free3276800
81open-r1/olympiccoder-32b:free3276800
60meta-llama/llama-4-maverick:free25600000
236mistralai/mistral-nemo:free12800000
62meta-llama/llama-4-scout:free51200000
83google/gemma-3-4b-it:free13107200
93google/gemma-3-27b-it:free9600000
79mistralai/mistral-small-3.1-24b-instruct:free9600000
251mistralai/mistral-7b-instruct:free3276800
77featherless/qwerky-72b:free3276800
75deepseek/deepseek-chat-v3-0324:free16384000
242google/gemma-2-9b-it:free819200
73qwen/qwen2.5-vl-32b-instruct:free819200
135deepseek/deepseek-r1-distill-qwen-14b:free6400000
72google/gemini-2.5-pro-exp-03-25100000000
70bytedance-research/ui-tars-72b:free3276800
69allenai/molmo-7b-d:free409600
101deepseek/deepseek-r1-zero:free16384000
102qwen/qwq-32b:free4000000
66deepseek/deepseek-v3-base:free16384000
104moonshotai/moonlight-16b-a3b-instruct:free819200
105nousresearch/deephermes-3-llama-3-8b-preview:free13107200
71qwen/qwen2.5-vl-3b-instruct:free6400000
218google/gemini-flash-1.5-8b-exp100000000
82google/gemma-3-1b-it:free3276800
156google/gemini-2.0-flash-exp:free104857600
204qwen/qwen-2.5-72b-instruct:free3276800
21qwen/qwen3-30b-a3b:free4096000
38thudm/glm-4-32b:free3276800
157meta-llama/llama-3.3-70b-instruct:free800000
18deepseek/deepseek-prover-v2:free16384000
17opengvlab/internvl3-2b:free3200000
23qwen/qwen3-8b:free4096000
16opengvlab/internvl3-14b:free3200000
14qwen/qwen3-1.7b:free3200000
202meta-llama/llama-3.2-11b-vision-instruct:free13107200
12qwen/qwen3-0.6b-04-28:free3200000
11microsoft/phi-4-reasoning:free3276800
162qwen/qwq-32b-preview:free1638400
9microsoft/phi-4-reasoning-plus:free3276800
15qwen/qwen3-4b:free12800000
164google/learnlm-1.5-pro-experimental:free4096000
148deepseek/deepseek-chat:free16384000
199meta-llama/llama-3.2-1b-instruct:free13100000
36thudm/glm-z1-32b:free3276800
35microsoft/mai-ds-r1:free16384000
34thudm/glm-4-9b:free3200000
33thudm/glm-z1-9b:free3200000
188qwen/qwen-2.5-7b-instruct:free3276800
172qwen/qwen-2.5-coder-32b-instruct:free3276800
25qwen/qwen3-14b:free4096000
197meta-llama/llama-3.2-3b-instruct:free2000000
141deepseek/deepseek-r1-distill-llama-70b:free819200
29qwen/qwen3-235b-a22b:free4096000
143deepseek/deepseek-r1:free16384000
27qwen/qwen3-32b:free4096000
215qwen/qwen-2.5-vl-7b-instruct:free6400000
31tngtech/deepseek-r1t-chimera:free16384000
200meta-llama/llama-3.2-1b-instruct1310720.0000000050.00000001
198meta-llama/llama-3.2-3b-instruct1310720.000000010.00000002
139liquid/lfm-7b327680.000000010.00000001
46qwen/qwen2.5-coder-7b-instruct327680.000000010.00000003
243google/gemma-2-9b-it81920.000000020.00000006
232meta-llama/llama-3.1-8b-instruct163840.000000020.00000003
84google/gemma-3-4b-it1310720.000000020.00000004
140liquid/lfm-3b327680.000000020.00000002
115meta-llama/llama-guard-3-8b1310720.000000020.00000006
223sao10k/l3-lunaris-8b81920.000000020.00000005
253nousresearch/hermes-2-pro-llama-3-8b1310720.0000000250.00000004
254mistralai/mistral-7b-instruct-v0.3327680.0000000280.000000054
252mistralai/mistral-7b-instruct327680.0000000280.000000054
268meta-llama/llama-3-8b-instruct81920.000000030.00000006
219microsoft/phi-3.5-mini-128k-instruct1310720.000000030.00000009
237mistralai/mistral-nemo983040.000000030.00000007
160amazon/nova-micro-v11280000.0000000350.00000014
24qwen/qwen3-8b1280000.0000000350.000000138
193google/gemini-flash-1.5-8b10000000.00000003750.00000015
155cohere/command-r7b-12-20241280000.00000003750.00000015
187mistralai/ministral-3b1310720.000000040.00000004
117deepseek/deepseek-r1-distill-llama-8b320000.000000040.00000004
258deepseek/deepseek-coder1280000.000000040.00000012
203meta-llama/llama-3.2-11b-vision-instruct1310720.0000000490.000000049
80mistralai/mistral-small-3.1-24b-instruct1310720.000000050.00000015
20meta-llama/llama-guard-4-12b1638400.000000050.00000005
189qwen/qwen-2.5-7b-instruct327680.000000050.0000001
97microsoft/phi-4-multimodal-instruct1310720.000000050.0000001
88google/gemma-3-12b-it1310720.000000050.0000001
124qwen/qwen-turbo10000000.000000050.0000002
132mistralai/mistral-small-24b-instruct-2501280000.000000060.00000012
173qwen/qwen-2.5-coder-32b-instruct327680.000000060.00000015
159amazon/nova-lite-v13000000.000000060.00000024
316gryphe/mythomax-l2-13b40960.0000000650.000000065
10microsoft/phi-4-reasoning-plus327680.000000070.00000035
147microsoft/phi-4163840.000000070.00000014
26qwen/qwen3-14b409600.000000070.00000024
259google/gemini-flash-1.510000000.0000000750.0000003
107google/gemini-2.0-flash-lite-00110485760.0000000750.0000003
63meta-llama/llama-4-scout10485760.000000080.0000003
294mistralai/mixtral-8x7b-instruct327680.000000080.00000024
264allenai/olmo-7b-instruct20480.000000080.00000024
163qwen/qwq-32b-preview327680.000000090.00000027
158meta-llama/llama-3.3-70b-instruct1310000.000000090.00000035
266neversleep/llama-3-lumimaid-8b245760.000000093750.00000075
207neversleep/llama-3.1-lumimaid-8b327680.000000093750.00000075
265neversleep/llama-3-lumimaid-8b:extended245760.000000093750.00000075
212mistralai/pixtral-12b327680.00000010.0000001
28qwen/qwen3-32b409600.00000010.0000003
49openai/gpt-4.1-nano10475760.00000010.0000004
186mistralai/ministral-8b1280000.00000010.0000001
118google/gemini-2.0-flash-00110000000.00000010.0000004
234meta-llama/llama-3.1-70b-instruct1310720.00000010.00000028
65mistral/ministral-8b1310720.00000010.0000001
240google/gemma-2-27b-it81920.00000010.0000003
256microsoft/phi-3-medium-128k-instruct1310720.00000010.0000003
255microsoft/phi-3-mini-128k-instruct1280000.00000010.0000001
94google/gemma-3-27b-it1310720.00000010.0000002
22qwen/qwen3-30b-a3b409600.00000010.0000003
142deepseek/deepseek-r1-distill-llama-70b1310720.00000010.0000004
307mistralai/mistral-7b-instruct-v0.128240.000000110.00000019
205qwen/qwen-2.5-72b-instruct327680.000000120.00000039
190nvidia/llama-3.1-nemotron-70b-instruct1310720.000000120.0000003
134deepseek/deepseek-r1-distill-qwen-32b1310720.000000120.00000018
220nousresearch/hermes-3-llama-3.1-70b1310720.000000120.0000003
58nvidia/llama-3.3-nemotron-super-49b-v11310720.000000130.0000004
30qwen/qwen3-235b-a22b409600.000000140.000002
90openai/gpt-4o-mini-search-preview1280000.000000150.0000006
136deepseek/deepseek-r1-distill-qwen-14b640000.000000150.00000015
214cohere/command-r-08-20241280000.000000150.0000006
238openai/gpt-4o-mini1280000.000000150.0000006
103qwen/qwq-32b1310720.000000150.0000002
196liquid/lfm-40b327680.000000150.00000015
41google/gemini-2.5-flash-preview:thinking10485760.000000150.0000035
40google/gemini-2.5-flash-preview10485760.000000150.0000006
239openai/gpt-4o-mini-2024-07-181280000.000000150.0000006
61meta-llama/llama-4-maverick10485760.000000170.0000006
67scb10x/llama3.1-typhoon2-8b-instruct81920.000000180.00000018
130deepseek/deepseek-r1-distill-qwen-1.5b1310720.000000180.00000018
3arcee-ai/spotlight1310720.000000180.00000018
216qwen/qwen-2.5-vl-7b-instruct327680.00000020.0000002
262meta-llama/llama-guard-2-8b81920.00000020.0000002
293mistralai/mistral-7b-instruct-v0.2327680.00000020.0000002
86ai21/jamba-1.6-mini2560000.00000020.0000004
122aion-labs/aion-rp-llama-3.1-8b327680.00000020.0000002
229perplexity/llama-3.1-sonar-small-128k-online1270720.00000020.0000002
145minimax/minimax-0110001920.00000020.0000011
291mistralai/mistral-small327680.00000020.0000006
112mistralai/mistral-saba327680.00000020.0000006
119qwen/qwen-vl-plus75000.000000210.00000063
39thudm/glm-4-32b320000.000000240.00000024
32thudm/glm-z1-rumination-32b320000.000000240.00000024
37thudm/glm-z1-32b320000.000000240.00000024
13inception/mercury-coder-small-beta320000.000000250.000001
292mistralai/mistral-tiny327680.000000250.00000025
126qwen/qwen2.5-vl-72b-instruct320000.000000250.00000075
194thedrummer/rocinante-12b327680.000000250.0000005
279anthropic/claude-3-haiku:beta2000000.000000250.00000125
235mistralai/codestral-mamba2621440.000000250.00000025
280anthropic/claude-3-haiku2000000.000000250.00000125
55x-ai/grok-3-mini-beta1310720.00000030.0000005
269meta-llama/llama-3-70b-instruct81920.00000030.0000004
146mistralai/codestral-25012621440.00000030.0000009
76deepseek/deepseek-chat-v3-03241638400.00000030.00000088
149deepseek/deepseek-chat1638400.000000380.00000089
270mistralai/mixtral-8x22b-instruct655360.00000040.0000012
0mistralai/mistral-medium-31310720.00000040.000002
48openai/gpt-4.1-mini10475760.00000040.0000016
127qwen/qwen-plus1310720.00000040.0000012
8arcee-ai/arcee-blitz327680.000000450.00000075
176thedrummer/unslopnemo-12b320000.000000450.00000045
278cohere/command-r1280000.00000050.0000015
19deepseek/deepseek-prover-v21310720.00000050.00000218
285cohere/command-r-03-20241280000.00000050.0000015
7arcee-ai/virtuoso-medium-v21310720.00000050.0000008
6arcee-ai/coder-large327680.00000050.0000008
271microsoft/wizardlm-2-8x22b655360.00000050.0000005
96thedrummer/skyfall-36b-v2327680.00000050.0000008
144deepseek/deepseek-r11638400.00000050.00000218
305jondurbin/airoboros-l2-70b40960.00000050.0000005
318openai/gpt-3.5-turbo163850.00000050.0000015
319openai/gpt-3.5-turbo-0125163850.00000050.0000015
245ai21/jamba-instruct2560000.00000050.0000007
2arcee-ai/caller-large327680.000000550.00000085
308pygmalionai/mythalion-13b81920.00000056250.000001125
315undi95/remm-slerp-l2-13b61440.00000056250.000001125
206qwen/qwen-2.5-vl-72b-instruct327680.00000060.0000006
289nousresearch/nous-hermes-2-mixtral-8x7b-dpo327680.00000060.0000006
150sao10k/l3.3-euryale-70b1310720.00000070.0000008
121aion-labs/aion-1.0-mini1310720.00000070.0000014
217sao10k/l3.1-euryale-70b1310720.00000070.0000008
5arcee-ai/virtuoso-large1310720.000000750.0000012
295neversleep/noromaid-20b81920.000000750.0000015
221nousresearch/hermes-3-llama-3.1-405b1310720.00000080.0000008
233meta-llama/llama-3.1-405b-instruct327680.00000080.0000008
224aetherwiing/mn-starcannon-12b163840.00000080.0000012
179anthropic/claude-3.5-haiku-20241022:beta2000000.00000080.000004
95thedrummer/anubis-pro-105b-v11310720.00000080.000001
180anthropic/claude-3.5-haiku-202410222000000.00000080.000004
51alfredpros/codellama-7b-instruct-solidity40960.00000080.0000012
50eleutherai/llemma_7b40960.00000080.0000012
267sao10k/fimbulvetr-11b-v240960.00000080.0000012
276sophosympatheia/midnight-rose-70b40960.00000080.0000008
123qwen/qwen-vl-max75000.00000080.0000032
161amazon/nova-pro-v13000000.00000080.0000032
171infermatic/mn-inferor-12b163840.00000080.0000012
300undi95/toppy-m-7b40960.00000080.0000012
177anthropic/claude-3.5-haiku:beta2000000.00000080.000004
178anthropic/claude-3.5-haiku2000000.00000080.000004
228nothingiisreal/mn-celeste-12b163840.00000080.0000012
68scb10x/llama3.1-typhoon2-70b-instruct81920.000000880.00000088
74qwen/qwen2.5-vl-32b-instruct1280000.00000090.0000009
249cognitivecomputations/dolphin-mixtral-8x22b160000.00000090.0000009
4arcee-ai/maestro-reasoning1310720.00000090.0000033
317meta-llama/llama-2-70b-chat40960.00000090.0000009
250qwen/qwen-2-72b-instruct327680.00000090.0000009
230perplexity/llama-3.1-sonar-large-128k-online1270720.0000010.000001
287openai/gpt-3.5-turbo-061340950.0000010.000002
277cohere/command40960.0000010.000002
138perplexity/sonar1270720.0000010.000001
137perplexity/sonar-reasoning1270000.0000010.000005
303openai/gpt-3.5-turbo-1106163850.0000010.000002
42openai/o4-mini-high2000000.00000110.0000044
210openai/o1-mini1280000.00000110.0000044
211openai/o1-mini-2024-09-121280000.00000110.0000044
44openai/o4-mini2000000.00000110.0000044
129openai/o3-mini2000000.00000110.0000044
116openai/o3-mini-high2000000.00000110.0000044
312mancer/weaver80000.0000011250.000001125
201meta-llama/llama-3.2-90b-vision-instruct1310720.00000120.0000012
272google/gemini-pro-1.520000000.000001250.000005
1google/gemini-2.5-pro-preview10485760.000001250.00001
248sao10k/l3-euryale-70b81920.000001480.00000148
181neversleep/llama-3.1-lumimaid-70b163840.00000150.00000225
306openai/gpt-3.5-turbo-instruct40950.00000150.000002
182anthracite-org/magnum-v4-72b163840.00000150.00000225
128qwen/qwen-max327680.00000160.0000064
169mistralai/pixtral-large-24111310720.0000020.000006
286mistralai/mistral-large1280000.0000020.000006
85ai21/jamba-1.6-large2560000.0000020.000008
154x-ai/grok-2-12121310720.0000020.00001
47openai/gpt-4.110475760.0000020.000008
100perplexity/sonar-deep-research1280000.0000020.000008
227meta-llama/llama-3.1-405b327680.0000020.000002
153x-ai/grok-2-vision-1212327680.0000020.00001
168mistralai/mistral-large-24071310720.0000020.000006
98perplexity/sonar-reasoning-pro1280000.0000020.000008
111perplexity/r1-17761280000.0000020.000008
167mistralai/mistral-large-24111310720.0000020.000006
166openai/gpt-4o-2024-11-201280000.00000250.00001
225openai/gpt-4o-2024-08-061280000.00000250.00001
260openai/gpt-4o1280000.00000250.00001
192inflection/inflection-3-pi80000.00000250.00001
91openai/gpt-4o-search-preview1280000.00000250.00001
213cohere/command-r-plus-08-20241280000.00000250.00001
191inflection/inflection-3-productivity80000.00000250.00001
89cohere/command-a2560000.00000250.00001
64all-hands/openhands-lm-32b-v0.1163840.00000260.0000034
175eva-unit-01/eva-qwen-2.5-32b163840.00000260.0000034
290mistralai/mistral-medium327680.000002750.0000081
195anthracite-org/magnum-v2-72b327680.0000030.000003
284anthropic/claude-3-sonnet2000000.0000030.000015
283anthropic/claude-3-sonnet:beta2000000.0000030.000015
309openai/gpt-3.5-turbo-16k163850.0000030.000004
184anthropic/claude-3.5-sonnet2000000.0000030.000015
183anthropic/claude-3.5-sonnet:beta2000000.0000030.000015
275cohere/command-r-plus-04-20241280000.0000030.000015
274cohere/command-r-plus1280000.0000030.000015
109anthropic/claude-3.7-sonnet:thinking2000000.0000030.000015
110anthropic/claude-3.7-sonnet:beta2000000.0000030.000015
99perplexity/sonar-pro2000000.0000030.000015
24401-ai/yi-large327680.0000030.000003
246anthropic/claude-3.5-sonnet-20240620:beta2000000.0000030.000015
247anthropic/claude-3.5-sonnet-202406202000000.0000030.000015
56x-ai/grok-3-beta1310720.0000030.000015
108anthropic/claude-3.7-sonnet2000000.0000030.000015
152eva-unit-01/eva-llama-3.33-70b163840.0000040.000006
257neversleep/llama-3-lumimaid-70b81920.0000040.000006
241alpindale/magnum-72b163840.0000040.000006
165eva-unit-01/eva-qwen-2.5-72b163840.0000040.000006
120aion-labs/aion-1.01310720.0000040.000008
174raifle/sorcererlm-8x22b160000.00000450.0000045
263openai/gpt-4o-2024-05-131280000.0000050.000015
222openai/chatgpt-4o-latest1280000.0000050.000015
170x-ai/grok-vision-beta81920.0000050.000015
185x-ai/grok-beta1310720.0000050.000015
261openai/gpt-4o:extended1280000.0000060.000018
301alpindale/goliath-120b61440.00000656250.000009375
313anthropic/claude-2.0:beta1000000.0000080.000024
297anthropic/claude-2.12000000.0000080.000024
299anthropic/claude-22000000.0000080.000024
298anthropic/claude-2:beta2000000.0000080.000024
314anthropic/claude-2.01000000.0000080.000024
296anthropic/claude-2.1:beta2000000.0000080.000024
304openai/gpt-4-1106-preview1280000.000010.00003
43openai/o32000000.000010.00004
273openai/gpt-4-turbo1280000.000010.00003
288openai/gpt-4-turbo-preview1280000.000010.00003
151openai/o12000000.0000150.00006
282anthropic/claude-3-opus2000000.0000150.000075
281anthropic/claude-3-opus:beta2000000.0000150.000075
208openai/o1-preview1280000.0000150.00006
209openai/o1-preview-2024-09-121280000.0000150.00006
321openai/gpt-4-031481910.000030.00006
320openai/gpt-481910.000030.00006
311openai/gpt-4-32k-0314327670.000060.00012
310openai/gpt-4-32k327670.000060.00012
106openai/gpt-4.5-preview1280000.0000750.00015
78openai/o1-pro2000000.000150.0006
\n", + "
" + ], + "text/plain": [ + " id context_length pricing_prompt pricing_completion\n", + "302 openrouter/auto 2000000 -1 -1\n", + "133 deepseek/deepseek-r1-distill-qwen-32b:free 16000 0 0\n", + "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free 131072 0 0\n", + "113 cognitivecomputations/dolphin3.0-r1-mistral-24... 32768 0 0\n", + "57 nvidia/llama-3.3-nemotron-super-49b-v1:free 131072 0 0\n", + "114 cognitivecomputations/dolphin3.0-mistral-24b:free 32768 0 0\n", + "54 moonshotai/kimi-vl-a3b-thinking:free 131072 0 0\n", + "53 agentica-org/deepcoder-14b-preview:free 96000 0 0\n", + "52 arliai/qwq-32b-arliai-rpr-v1:free 32768 0 0\n", + "231 meta-llama/llama-3.1-8b-instruct:free 131072 0 0\n", + "226 meta-llama/llama-3.1-405b:free 64000 0 0\n", + "125 qwen/qwen2.5-vl-72b-instruct:free 131072 0 0\n", + "45 shisa-ai/shisa-v2-llama3.3-70b:free 32768 0 0\n", + "87 google/gemma-3-12b-it:free 131072 0 0\n", + "92 rekaai/reka-flash-3:free 32768 0 0\n", + "131 mistralai/mistral-small-24b-instruct-2501:free 32768 0 0\n", + "81 open-r1/olympiccoder-32b:free 32768 0 0\n", + "60 meta-llama/llama-4-maverick:free 256000 0 0\n", + "236 mistralai/mistral-nemo:free 128000 0 0\n", + "62 meta-llama/llama-4-scout:free 512000 0 0\n", + "83 google/gemma-3-4b-it:free 131072 0 0\n", + "93 google/gemma-3-27b-it:free 96000 0 0\n", + "79 mistralai/mistral-small-3.1-24b-instruct:free 96000 0 0\n", + "251 mistralai/mistral-7b-instruct:free 32768 0 0\n", + "77 featherless/qwerky-72b:free 32768 0 0\n", + "75 deepseek/deepseek-chat-v3-0324:free 163840 0 0\n", + "242 google/gemma-2-9b-it:free 8192 0 0\n", + "73 qwen/qwen2.5-vl-32b-instruct:free 8192 0 0\n", + "135 deepseek/deepseek-r1-distill-qwen-14b:free 64000 0 0\n", + "72 google/gemini-2.5-pro-exp-03-25 1000000 0 0\n", + "70 bytedance-research/ui-tars-72b:free 32768 0 0\n", + "69 allenai/molmo-7b-d:free 4096 0 0\n", + "101 deepseek/deepseek-r1-zero:free 163840 0 0\n", + "102 qwen/qwq-32b:free 40000 0 0\n", + "66 deepseek/deepseek-v3-base:free 163840 0 0\n", + "104 moonshotai/moonlight-16b-a3b-instruct:free 8192 0 0\n", + "105 nousresearch/deephermes-3-llama-3-8b-preview:free 131072 0 0\n", + "71 qwen/qwen2.5-vl-3b-instruct:free 64000 0 0\n", + "218 google/gemini-flash-1.5-8b-exp 1000000 0 0\n", + "82 google/gemma-3-1b-it:free 32768 0 0\n", + "156 google/gemini-2.0-flash-exp:free 1048576 0 0\n", + "204 qwen/qwen-2.5-72b-instruct:free 32768 0 0\n", + "21 qwen/qwen3-30b-a3b:free 40960 0 0\n", + "38 thudm/glm-4-32b:free 32768 0 0\n", + "157 meta-llama/llama-3.3-70b-instruct:free 8000 0 0\n", + "18 deepseek/deepseek-prover-v2:free 163840 0 0\n", + "17 opengvlab/internvl3-2b:free 32000 0 0\n", + "23 qwen/qwen3-8b:free 40960 0 0\n", + "16 opengvlab/internvl3-14b:free 32000 0 0\n", + "14 qwen/qwen3-1.7b:free 32000 0 0\n", + "202 meta-llama/llama-3.2-11b-vision-instruct:free 131072 0 0\n", + "12 qwen/qwen3-0.6b-04-28:free 32000 0 0\n", + "11 microsoft/phi-4-reasoning:free 32768 0 0\n", + "162 qwen/qwq-32b-preview:free 16384 0 0\n", + "9 microsoft/phi-4-reasoning-plus:free 32768 0 0\n", + "15 qwen/qwen3-4b:free 128000 0 0\n", + "164 google/learnlm-1.5-pro-experimental:free 40960 0 0\n", + "148 deepseek/deepseek-chat:free 163840 0 0\n", + "199 meta-llama/llama-3.2-1b-instruct:free 131000 0 0\n", + "36 thudm/glm-z1-32b:free 32768 0 0\n", + "35 microsoft/mai-ds-r1:free 163840 0 0\n", + "34 thudm/glm-4-9b:free 32000 0 0\n", + "33 thudm/glm-z1-9b:free 32000 0 0\n", + "188 qwen/qwen-2.5-7b-instruct:free 32768 0 0\n", + "172 qwen/qwen-2.5-coder-32b-instruct:free 32768 0 0\n", + "25 qwen/qwen3-14b:free 40960 0 0\n", + "197 meta-llama/llama-3.2-3b-instruct:free 20000 0 0\n", + "141 deepseek/deepseek-r1-distill-llama-70b:free 8192 0 0\n", + "29 qwen/qwen3-235b-a22b:free 40960 0 0\n", + "143 deepseek/deepseek-r1:free 163840 0 0\n", + "27 qwen/qwen3-32b:free 40960 0 0\n", + "215 qwen/qwen-2.5-vl-7b-instruct:free 64000 0 0\n", + "31 tngtech/deepseek-r1t-chimera:free 163840 0 0\n", + "200 meta-llama/llama-3.2-1b-instruct 131072 0.000000005 0.00000001\n", + "198 meta-llama/llama-3.2-3b-instruct 131072 0.00000001 0.00000002\n", + "139 liquid/lfm-7b 32768 0.00000001 0.00000001\n", + "46 qwen/qwen2.5-coder-7b-instruct 32768 0.00000001 0.00000003\n", + "243 google/gemma-2-9b-it 8192 0.00000002 0.00000006\n", + "232 meta-llama/llama-3.1-8b-instruct 16384 0.00000002 0.00000003\n", + "84 google/gemma-3-4b-it 131072 0.00000002 0.00000004\n", + "140 liquid/lfm-3b 32768 0.00000002 0.00000002\n", + "115 meta-llama/llama-guard-3-8b 131072 0.00000002 0.00000006\n", + "223 sao10k/l3-lunaris-8b 8192 0.00000002 0.00000005\n", + "253 nousresearch/hermes-2-pro-llama-3-8b 131072 0.000000025 0.00000004\n", + "254 mistralai/mistral-7b-instruct-v0.3 32768 0.000000028 0.000000054\n", + "252 mistralai/mistral-7b-instruct 32768 0.000000028 0.000000054\n", + "268 meta-llama/llama-3-8b-instruct 8192 0.00000003 0.00000006\n", + "219 microsoft/phi-3.5-mini-128k-instruct 131072 0.00000003 0.00000009\n", + "237 mistralai/mistral-nemo 98304 0.00000003 0.00000007\n", + "160 amazon/nova-micro-v1 128000 0.000000035 0.00000014\n", + "24 qwen/qwen3-8b 128000 0.000000035 0.000000138\n", + "193 google/gemini-flash-1.5-8b 1000000 0.0000000375 0.00000015\n", + "155 cohere/command-r7b-12-2024 128000 0.0000000375 0.00000015\n", + "187 mistralai/ministral-3b 131072 0.00000004 0.00000004\n", + "117 deepseek/deepseek-r1-distill-llama-8b 32000 0.00000004 0.00000004\n", + "258 deepseek/deepseek-coder 128000 0.00000004 0.00000012\n", + "203 meta-llama/llama-3.2-11b-vision-instruct 131072 0.000000049 0.000000049\n", + "80 mistralai/mistral-small-3.1-24b-instruct 131072 0.00000005 0.00000015\n", + "20 meta-llama/llama-guard-4-12b 163840 0.00000005 0.00000005\n", + "189 qwen/qwen-2.5-7b-instruct 32768 0.00000005 0.0000001\n", + "97 microsoft/phi-4-multimodal-instruct 131072 0.00000005 0.0000001\n", + "88 google/gemma-3-12b-it 131072 0.00000005 0.0000001\n", + "124 qwen/qwen-turbo 1000000 0.00000005 0.0000002\n", + "132 mistralai/mistral-small-24b-instruct-2501 28000 0.00000006 0.00000012\n", + "173 qwen/qwen-2.5-coder-32b-instruct 32768 0.00000006 0.00000015\n", + "159 amazon/nova-lite-v1 300000 0.00000006 0.00000024\n", + "316 gryphe/mythomax-l2-13b 4096 0.000000065 0.000000065\n", + "10 microsoft/phi-4-reasoning-plus 32768 0.00000007 0.00000035\n", + "147 microsoft/phi-4 16384 0.00000007 0.00000014\n", + "26 qwen/qwen3-14b 40960 0.00000007 0.00000024\n", + "259 google/gemini-flash-1.5 1000000 0.000000075 0.0000003\n", + "107 google/gemini-2.0-flash-lite-001 1048576 0.000000075 0.0000003\n", + "63 meta-llama/llama-4-scout 1048576 0.00000008 0.0000003\n", + "294 mistralai/mixtral-8x7b-instruct 32768 0.00000008 0.00000024\n", + "264 allenai/olmo-7b-instruct 2048 0.00000008 0.00000024\n", + "163 qwen/qwq-32b-preview 32768 0.00000009 0.00000027\n", + "158 meta-llama/llama-3.3-70b-instruct 131000 0.00000009 0.00000035\n", + "266 neversleep/llama-3-lumimaid-8b 24576 0.00000009375 0.00000075\n", + "207 neversleep/llama-3.1-lumimaid-8b 32768 0.00000009375 0.00000075\n", + "265 neversleep/llama-3-lumimaid-8b:extended 24576 0.00000009375 0.00000075\n", + "212 mistralai/pixtral-12b 32768 0.0000001 0.0000001\n", + "28 qwen/qwen3-32b 40960 0.0000001 0.0000003\n", + "49 openai/gpt-4.1-nano 1047576 0.0000001 0.0000004\n", + "186 mistralai/ministral-8b 128000 0.0000001 0.0000001\n", + "118 google/gemini-2.0-flash-001 1000000 0.0000001 0.0000004\n", + "234 meta-llama/llama-3.1-70b-instruct 131072 0.0000001 0.00000028\n", + "65 mistral/ministral-8b 131072 0.0000001 0.0000001\n", + "240 google/gemma-2-27b-it 8192 0.0000001 0.0000003\n", + "256 microsoft/phi-3-medium-128k-instruct 131072 0.0000001 0.0000003\n", + "255 microsoft/phi-3-mini-128k-instruct 128000 0.0000001 0.0000001\n", + "94 google/gemma-3-27b-it 131072 0.0000001 0.0000002\n", + "22 qwen/qwen3-30b-a3b 40960 0.0000001 0.0000003\n", + "142 deepseek/deepseek-r1-distill-llama-70b 131072 0.0000001 0.0000004\n", + "307 mistralai/mistral-7b-instruct-v0.1 2824 0.00000011 0.00000019\n", + "205 qwen/qwen-2.5-72b-instruct 32768 0.00000012 0.00000039\n", + "190 nvidia/llama-3.1-nemotron-70b-instruct 131072 0.00000012 0.0000003\n", + "134 deepseek/deepseek-r1-distill-qwen-32b 131072 0.00000012 0.00000018\n", + "220 nousresearch/hermes-3-llama-3.1-70b 131072 0.00000012 0.0000003\n", + "58 nvidia/llama-3.3-nemotron-super-49b-v1 131072 0.00000013 0.0000004\n", + "30 qwen/qwen3-235b-a22b 40960 0.00000014 0.000002\n", + "90 openai/gpt-4o-mini-search-preview 128000 0.00000015 0.0000006\n", + "136 deepseek/deepseek-r1-distill-qwen-14b 64000 0.00000015 0.00000015\n", + "214 cohere/command-r-08-2024 128000 0.00000015 0.0000006\n", + "238 openai/gpt-4o-mini 128000 0.00000015 0.0000006\n", + "103 qwen/qwq-32b 131072 0.00000015 0.0000002\n", + "196 liquid/lfm-40b 32768 0.00000015 0.00000015\n", + "41 google/gemini-2.5-flash-preview:thinking 1048576 0.00000015 0.0000035\n", + "40 google/gemini-2.5-flash-preview 1048576 0.00000015 0.0000006\n", + "239 openai/gpt-4o-mini-2024-07-18 128000 0.00000015 0.0000006\n", + "61 meta-llama/llama-4-maverick 1048576 0.00000017 0.0000006\n", + "67 scb10x/llama3.1-typhoon2-8b-instruct 8192 0.00000018 0.00000018\n", + "130 deepseek/deepseek-r1-distill-qwen-1.5b 131072 0.00000018 0.00000018\n", + "3 arcee-ai/spotlight 131072 0.00000018 0.00000018\n", + "216 qwen/qwen-2.5-vl-7b-instruct 32768 0.0000002 0.0000002\n", + "262 meta-llama/llama-guard-2-8b 8192 0.0000002 0.0000002\n", + "293 mistralai/mistral-7b-instruct-v0.2 32768 0.0000002 0.0000002\n", + "86 ai21/jamba-1.6-mini 256000 0.0000002 0.0000004\n", + "122 aion-labs/aion-rp-llama-3.1-8b 32768 0.0000002 0.0000002\n", + "229 perplexity/llama-3.1-sonar-small-128k-online 127072 0.0000002 0.0000002\n", + "145 minimax/minimax-01 1000192 0.0000002 0.0000011\n", + "291 mistralai/mistral-small 32768 0.0000002 0.0000006\n", + "112 mistralai/mistral-saba 32768 0.0000002 0.0000006\n", + "119 qwen/qwen-vl-plus 7500 0.00000021 0.00000063\n", + "39 thudm/glm-4-32b 32000 0.00000024 0.00000024\n", + "32 thudm/glm-z1-rumination-32b 32000 0.00000024 0.00000024\n", + "37 thudm/glm-z1-32b 32000 0.00000024 0.00000024\n", + "13 inception/mercury-coder-small-beta 32000 0.00000025 0.000001\n", + "292 mistralai/mistral-tiny 32768 0.00000025 0.00000025\n", + "126 qwen/qwen2.5-vl-72b-instruct 32000 0.00000025 0.00000075\n", + "194 thedrummer/rocinante-12b 32768 0.00000025 0.0000005\n", + "279 anthropic/claude-3-haiku:beta 200000 0.00000025 0.00000125\n", + "235 mistralai/codestral-mamba 262144 0.00000025 0.00000025\n", + "280 anthropic/claude-3-haiku 200000 0.00000025 0.00000125\n", + "55 x-ai/grok-3-mini-beta 131072 0.0000003 0.0000005\n", + "269 meta-llama/llama-3-70b-instruct 8192 0.0000003 0.0000004\n", + "146 mistralai/codestral-2501 262144 0.0000003 0.0000009\n", + "76 deepseek/deepseek-chat-v3-0324 163840 0.0000003 0.00000088\n", + "149 deepseek/deepseek-chat 163840 0.00000038 0.00000089\n", + "270 mistralai/mixtral-8x22b-instruct 65536 0.0000004 0.0000012\n", + "0 mistralai/mistral-medium-3 131072 0.0000004 0.000002\n", + "48 openai/gpt-4.1-mini 1047576 0.0000004 0.0000016\n", + "127 qwen/qwen-plus 131072 0.0000004 0.0000012\n", + "8 arcee-ai/arcee-blitz 32768 0.00000045 0.00000075\n", + "176 thedrummer/unslopnemo-12b 32000 0.00000045 0.00000045\n", + "278 cohere/command-r 128000 0.0000005 0.0000015\n", + "19 deepseek/deepseek-prover-v2 131072 0.0000005 0.00000218\n", + "285 cohere/command-r-03-2024 128000 0.0000005 0.0000015\n", + "7 arcee-ai/virtuoso-medium-v2 131072 0.0000005 0.0000008\n", + "6 arcee-ai/coder-large 32768 0.0000005 0.0000008\n", + "271 microsoft/wizardlm-2-8x22b 65536 0.0000005 0.0000005\n", + "96 thedrummer/skyfall-36b-v2 32768 0.0000005 0.0000008\n", + "144 deepseek/deepseek-r1 163840 0.0000005 0.00000218\n", + "305 jondurbin/airoboros-l2-70b 4096 0.0000005 0.0000005\n", + "318 openai/gpt-3.5-turbo 16385 0.0000005 0.0000015\n", + "319 openai/gpt-3.5-turbo-0125 16385 0.0000005 0.0000015\n", + "245 ai21/jamba-instruct 256000 0.0000005 0.0000007\n", + "2 arcee-ai/caller-large 32768 0.00000055 0.00000085\n", + "308 pygmalionai/mythalion-13b 8192 0.0000005625 0.000001125\n", + "315 undi95/remm-slerp-l2-13b 6144 0.0000005625 0.000001125\n", + "206 qwen/qwen-2.5-vl-72b-instruct 32768 0.0000006 0.0000006\n", + "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo 32768 0.0000006 0.0000006\n", + "150 sao10k/l3.3-euryale-70b 131072 0.0000007 0.0000008\n", + "121 aion-labs/aion-1.0-mini 131072 0.0000007 0.0000014\n", + "217 sao10k/l3.1-euryale-70b 131072 0.0000007 0.0000008\n", + "5 arcee-ai/virtuoso-large 131072 0.00000075 0.0000012\n", + "295 neversleep/noromaid-20b 8192 0.00000075 0.0000015\n", + "221 nousresearch/hermes-3-llama-3.1-405b 131072 0.0000008 0.0000008\n", + "233 meta-llama/llama-3.1-405b-instruct 32768 0.0000008 0.0000008\n", + "224 aetherwiing/mn-starcannon-12b 16384 0.0000008 0.0000012\n", + "179 anthropic/claude-3.5-haiku-20241022:beta 200000 0.0000008 0.000004\n", + "95 thedrummer/anubis-pro-105b-v1 131072 0.0000008 0.000001\n", + "180 anthropic/claude-3.5-haiku-20241022 200000 0.0000008 0.000004\n", + "51 alfredpros/codellama-7b-instruct-solidity 4096 0.0000008 0.0000012\n", + "50 eleutherai/llemma_7b 4096 0.0000008 0.0000012\n", + "267 sao10k/fimbulvetr-11b-v2 4096 0.0000008 0.0000012\n", + "276 sophosympatheia/midnight-rose-70b 4096 0.0000008 0.0000008\n", + "123 qwen/qwen-vl-max 7500 0.0000008 0.0000032\n", + "161 amazon/nova-pro-v1 300000 0.0000008 0.0000032\n", + "171 infermatic/mn-inferor-12b 16384 0.0000008 0.0000012\n", + "300 undi95/toppy-m-7b 4096 0.0000008 0.0000012\n", + "177 anthropic/claude-3.5-haiku:beta 200000 0.0000008 0.000004\n", + "178 anthropic/claude-3.5-haiku 200000 0.0000008 0.000004\n", + "228 nothingiisreal/mn-celeste-12b 16384 0.0000008 0.0000012\n", + "68 scb10x/llama3.1-typhoon2-70b-instruct 8192 0.00000088 0.00000088\n", + "74 qwen/qwen2.5-vl-32b-instruct 128000 0.0000009 0.0000009\n", + "249 cognitivecomputations/dolphin-mixtral-8x22b 16000 0.0000009 0.0000009\n", + "4 arcee-ai/maestro-reasoning 131072 0.0000009 0.0000033\n", + "317 meta-llama/llama-2-70b-chat 4096 0.0000009 0.0000009\n", + "250 qwen/qwen-2-72b-instruct 32768 0.0000009 0.0000009\n", + "230 perplexity/llama-3.1-sonar-large-128k-online 127072 0.000001 0.000001\n", + "287 openai/gpt-3.5-turbo-0613 4095 0.000001 0.000002\n", + "277 cohere/command 4096 0.000001 0.000002\n", + "138 perplexity/sonar 127072 0.000001 0.000001\n", + "137 perplexity/sonar-reasoning 127000 0.000001 0.000005\n", + "303 openai/gpt-3.5-turbo-1106 16385 0.000001 0.000002\n", + "42 openai/o4-mini-high 200000 0.0000011 0.0000044\n", + "210 openai/o1-mini 128000 0.0000011 0.0000044\n", + "211 openai/o1-mini-2024-09-12 128000 0.0000011 0.0000044\n", + "44 openai/o4-mini 200000 0.0000011 0.0000044\n", + "129 openai/o3-mini 200000 0.0000011 0.0000044\n", + "116 openai/o3-mini-high 200000 0.0000011 0.0000044\n", + "312 mancer/weaver 8000 0.000001125 0.000001125\n", + "201 meta-llama/llama-3.2-90b-vision-instruct 131072 0.0000012 0.0000012\n", + "272 google/gemini-pro-1.5 2000000 0.00000125 0.000005\n", + "1 google/gemini-2.5-pro-preview 1048576 0.00000125 0.00001\n", + "248 sao10k/l3-euryale-70b 8192 0.00000148 0.00000148\n", + "181 neversleep/llama-3.1-lumimaid-70b 16384 0.0000015 0.00000225\n", + "306 openai/gpt-3.5-turbo-instruct 4095 0.0000015 0.000002\n", + "182 anthracite-org/magnum-v4-72b 16384 0.0000015 0.00000225\n", + "128 qwen/qwen-max 32768 0.0000016 0.0000064\n", + "169 mistralai/pixtral-large-2411 131072 0.000002 0.000006\n", + "286 mistralai/mistral-large 128000 0.000002 0.000006\n", + "85 ai21/jamba-1.6-large 256000 0.000002 0.000008\n", + "154 x-ai/grok-2-1212 131072 0.000002 0.00001\n", + "47 openai/gpt-4.1 1047576 0.000002 0.000008\n", + "100 perplexity/sonar-deep-research 128000 0.000002 0.000008\n", + "227 meta-llama/llama-3.1-405b 32768 0.000002 0.000002\n", + "153 x-ai/grok-2-vision-1212 32768 0.000002 0.00001\n", + "168 mistralai/mistral-large-2407 131072 0.000002 0.000006\n", + "98 perplexity/sonar-reasoning-pro 128000 0.000002 0.000008\n", + "111 perplexity/r1-1776 128000 0.000002 0.000008\n", + "167 mistralai/mistral-large-2411 131072 0.000002 0.000006\n", + "166 openai/gpt-4o-2024-11-20 128000 0.0000025 0.00001\n", + "225 openai/gpt-4o-2024-08-06 128000 0.0000025 0.00001\n", + "260 openai/gpt-4o 128000 0.0000025 0.00001\n", + "192 inflection/inflection-3-pi 8000 0.0000025 0.00001\n", + "91 openai/gpt-4o-search-preview 128000 0.0000025 0.00001\n", + "213 cohere/command-r-plus-08-2024 128000 0.0000025 0.00001\n", + "191 inflection/inflection-3-productivity 8000 0.0000025 0.00001\n", + "89 cohere/command-a 256000 0.0000025 0.00001\n", + "64 all-hands/openhands-lm-32b-v0.1 16384 0.0000026 0.0000034\n", + "175 eva-unit-01/eva-qwen-2.5-32b 16384 0.0000026 0.0000034\n", + "290 mistralai/mistral-medium 32768 0.00000275 0.0000081\n", + "195 anthracite-org/magnum-v2-72b 32768 0.000003 0.000003\n", + "284 anthropic/claude-3-sonnet 200000 0.000003 0.000015\n", + "283 anthropic/claude-3-sonnet:beta 200000 0.000003 0.000015\n", + "309 openai/gpt-3.5-turbo-16k 16385 0.000003 0.000004\n", + "184 anthropic/claude-3.5-sonnet 200000 0.000003 0.000015\n", + "183 anthropic/claude-3.5-sonnet:beta 200000 0.000003 0.000015\n", + "275 cohere/command-r-plus-04-2024 128000 0.000003 0.000015\n", + "274 cohere/command-r-plus 128000 0.000003 0.000015\n", + "109 anthropic/claude-3.7-sonnet:thinking 200000 0.000003 0.000015\n", + "110 anthropic/claude-3.7-sonnet:beta 200000 0.000003 0.000015\n", + "99 perplexity/sonar-pro 200000 0.000003 0.000015\n", + "244 01-ai/yi-large 32768 0.000003 0.000003\n", + "246 anthropic/claude-3.5-sonnet-20240620:beta 200000 0.000003 0.000015\n", + "247 anthropic/claude-3.5-sonnet-20240620 200000 0.000003 0.000015\n", + "56 x-ai/grok-3-beta 131072 0.000003 0.000015\n", + "108 anthropic/claude-3.7-sonnet 200000 0.000003 0.000015\n", + "152 eva-unit-01/eva-llama-3.33-70b 16384 0.000004 0.000006\n", + "257 neversleep/llama-3-lumimaid-70b 8192 0.000004 0.000006\n", + "241 alpindale/magnum-72b 16384 0.000004 0.000006\n", + "165 eva-unit-01/eva-qwen-2.5-72b 16384 0.000004 0.000006\n", + "120 aion-labs/aion-1.0 131072 0.000004 0.000008\n", + "174 raifle/sorcererlm-8x22b 16000 0.0000045 0.0000045\n", + "263 openai/gpt-4o-2024-05-13 128000 0.000005 0.000015\n", + "222 openai/chatgpt-4o-latest 128000 0.000005 0.000015\n", + "170 x-ai/grok-vision-beta 8192 0.000005 0.000015\n", + "185 x-ai/grok-beta 131072 0.000005 0.000015\n", + "261 openai/gpt-4o:extended 128000 0.000006 0.000018\n", + "301 alpindale/goliath-120b 6144 0.0000065625 0.000009375\n", + "313 anthropic/claude-2.0:beta 100000 0.000008 0.000024\n", + "297 anthropic/claude-2.1 200000 0.000008 0.000024\n", + "299 anthropic/claude-2 200000 0.000008 0.000024\n", + "298 anthropic/claude-2:beta 200000 0.000008 0.000024\n", + "314 anthropic/claude-2.0 100000 0.000008 0.000024\n", + "296 anthropic/claude-2.1:beta 200000 0.000008 0.000024\n", + "304 openai/gpt-4-1106-preview 128000 0.00001 0.00003\n", + "43 openai/o3 200000 0.00001 0.00004\n", + "273 openai/gpt-4-turbo 128000 0.00001 0.00003\n", + "288 openai/gpt-4-turbo-preview 128000 0.00001 0.00003\n", + "151 openai/o1 200000 0.000015 0.00006\n", + "282 anthropic/claude-3-opus 200000 0.000015 0.000075\n", + "281 anthropic/claude-3-opus:beta 200000 0.000015 0.000075\n", + "208 openai/o1-preview 128000 0.000015 0.00006\n", + "209 openai/o1-preview-2024-09-12 128000 0.000015 0.00006\n", + "321 openai/gpt-4-0314 8191 0.00003 0.00006\n", + "320 openai/gpt-4 8191 0.00003 0.00006\n", + "311 openai/gpt-4-32k-0314 32767 0.00006 0.00012\n", + "310 openai/gpt-4-32k 32767 0.00006 0.00012\n", + "106 openai/gpt-4.5-preview 128000 0.000075 0.00015\n", + "78 openai/o1-pro 200000 0.00015 0.0006" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(\"pricing_prompt\")[col_names]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABqwAAAHJCAYAAADwyhjGAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXl0VFXWt58aMlZSGSAEQoAMShImIQgKwYi2imCLLWqD2gqKiC1qK+DYyiAo6CvtAMqkyCCCoiKDjAoSgwwKUQhhzCBDIASSVCWVqZKq7498uVBGEAJHbjjnWetddm6d+9Rv34vd683mnG1wu91uFAqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLhPFSB1AoFAqFQqFQKBQKhUKhUCgUCoVCoVDIjWpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLimpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLivlSB1BcXrjdblwu96WOoUuMRsNFfTYX2yfCKWNGGWsW4dS7T4RTZZTDJ8KpMsrhE+GUMaOMNYtwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjPn2XA0ajAYPBcE5rVcNKcVFxudwUFDgudQzdYTYbCQmxYLeXUlXl0p1PhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4dS7T4RTZdRnRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzXg5EBpqwWQ6t4aVOhJQoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUV3DavMzEweeughOnbsSFJSEm+++SaVlZV/ep/b7WbGjBn07NmTDh060L9/f3755Zc66/Ly8njyySfp1KkTXbt25b///S8lJSV11q1bt46+ffvSvn17evXqxZdffllnTWVlJW+88QZJSUl07NiRhx56iKysrHrXtGjRInr16kX79u3p27cv69evr7OmuLiYl156ia5du9KpUyeeeuopjh8/Xmedy+Vi9uzZ3HrrrbRr146kpCRGjBjhseaBBx4gLi6uzv9lZmbW8SkUCoVCoVAoFAqFQqFQKBQKhUKhUIjCfKkDnI7NZmPgwIFERUUxefJk8vLymDhxIuXl5YwaNeqs986cOZP33nuPkSNHEhcXx/z583n44YdZsmQJLVq0AMDpdPLII48AMGnSJMrLy3njjTcYMWIE06dP11w///wzTzzxBHfffTcvvfQSmzdv5r///S8Wi4Vbb71VWzd+/HhWrFjBCy+8QHh4ONOmTWPQoEF88803BAYGnldN33zzDa+88gqPPfYY1157LStWrOCJJ55g/vz5dOzYUVv39NNPc+DAAcaMGYOPjw/vvPMOQ4YM4csvv8RsPvU6R40axfr163n88ce58soryc/PZ9u2bXWeW2JiIs8//7zHtcjIyD97VQqFQqFQKBQKhUKhUCgUCoVCoVA0WI6edJB5rASLt5HGVt9LHUeBzhpWCxcuxOFwMGXKFIKDgwGorq5m7NixDB06lPDw8D+8r6KigunTp/Pwww8zaNAgADp37sytt97KRx99xJgxYwBYvXo1+/fvZ8WKFcTExABgtVoZPHgwO3bsoEOHDgBMnTqVDh068OqrrwJw7bXXcujQId577z2tYXXs2DG++OILRo8ezd133w1A+/btueGGG1i4cCFDhgw5r5ree+89brvtNp5++mntO/ft28f777/PzJkzAUhLSyM1NZWPPvqIHj16ABAdHU2fPn1Ys2YNffr0AWDTpk0sXryYr776iri4OO053XbbbXWendVq9WiIKRQKhUKhUCgUCoVCoVAoFAqFQnG5UlLmZMbSXaRnF2jX2kWHMvSOtlh8vS5hMoWujgRMSUmhW7duWmMHoHfv3rhcLjZu3HjG+7Zv305JSQm9e/fWrnl7e3PzzTeTkpLi4Y+Li9OaVQBJSUkEBwezYcMGoOaYvy1btnjspALo06cPmZmZHD58GIDU1FRcLpfHuuDgYJKSkup855/VdOjQIXJycjzy137npk2btOMDU1JSsFqtJCUlaWtiYmJISEjw+M7PP/+crl27ejSrFAqFQqFQKBQKhUKhUCgUCoVCoZCdGUt3kZFT4HEtI6eA6Ut2XaJEilp0tcMqKyuLu+66y+Oa1WolLCzsD2dDnX4f4NGIAoiNjWXOnDmUl5fj6+tLVlZWnTUGg4Ho6GjNcfDgQZxO5x+6AJ544glycnIA8PX1xc/Pr866L774ok5NM2bM4NNPP6WgoICEhASCg4O176z9p9Vq5cknnyQ1NRUvLy86dOiA0+nk0KFDxMbGkpWVRXR0NOvXr+edd94hOzubiIgIgoKCPJ7Pr7/+SqNGjejZsydHjx4FoGPHjkycOJHo6Ght3cmTJ9m2bZvW2PLz8+P+++9n5MiRGAyGMz7vP8Ns1lUfVBeYTEaPf+rNJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zChjzSKcF8t39KTDY2dVLS43pGcXcMJeTtNQ/0uaUWZ01bCy2+1YrdY614OCgrDZbGe9z9vbGx8fH4/rVqsVt9uNzWbD19cXu92uzZY6k7/2n7/PUdvAqaioYPLkyUybNo3t27czceJEj1lUVqvVI6vdbmf37t1s2bLFY77Wr7/+ypEjRzy+c/To0ZjNZm2+1vjx4z0+t9vtuN3uOvO1pk6dSlhYmPad+fn55Obm4u/vT2JiItu3b+fEiRMMHjyYlStXas+ppKSEtm3bkpycTHV1NV988QUffvghRUVFvPbaa2d83mfDaDQQEmKp170yYLX6/fmiS+gT4ZQxo4w1i3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoxw+EU4ZMmYeKznr545K1wX/flvEc5QFXTWs9MzixYsBGDRoENdddx2rV68mJyeHhQsXnnW+FsDWrVvrzNfq1KkTe/fu9ViXlZXFypUrtd1dBQUFjBs3jszMTBITE4GaHWC/n6+1YsUKcnNzNY/b7cbb25s1a9aQmZnJgw8+yLPPPsszzzzDsmXLtJlbX3/9NaGhodp9jz76KElJSSxevJhx48ZhNJ5/J9jlcmO3l573fZc7JpMRq9UPu72M6mqX7nwinDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYMsCu7gEMnHLQMs9AmKvTPb/gTLlZGf6+znyxm8TZSWOiol1vEc7wcsFr9znnXma4aVlarleLi4jrXbTYbQUFBZ72vsrKSiooKj11Wdrsdg8Gg3Wu1WikpqdtBtdlsNGvWDEBb+/scP/zwA4C2rnb3Vu0sqn79+mnfeXpWPz8/iouL68zX8vb2Ji8vz+M7o6OjPY4ijIqKAmD37t0ABAQEYLPZ6szXatKkCb/99huHDx8mMjISq9VK06ZNady4MZmZmQA0b96cpk2bcuDAAe2+05tVAP7+/rRp04aff/6Z0tJSAgIC6jyrc6GqSv3LeCaqq10X9flcbJ8Ip4wZZaxZhFPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcF4MX15hKa/N/ZmSsirtWoCfmVcGXk1YcP2O2judC80YFuRHu+hQMnIKcLlPXTcaoE1UKI2tvhf8DES8a1nQ1WGKMTExdWZVFRcXk5+fX2em1O/vA8jOzva4npWVRUREBL6+vmf0u91usrOzNUfLli3x8vKqs652blXtupiYGAoKCmjUqJHH2t/PyQoJCfG4r7amsrIySkpKKC8v1z77fQMpOzsbg8HAyZMntc/dbrfHHCqAwsJC7bsBrrjiijM8qZojDc9Gfn4+RqOx3s0qhUKhUCgUCoVCoVAoFAqFQqFQyMnvm1UAJWVVjJvz8yVKVJehd7Sts+urTVQoQ+9oe4kSKWrR1Q6r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSko6432JiYkEBATw+uuvc/DgQQoKCoiPjyc3N5ebbrrJw7906VIefvhh0tLS8PLyokOHDhQVFXH99dcDNbufrrnmGj777DO+/PJLsrOziYiIoKSkhNDQUCIjIwHo0aMHRqMRh8PB3Llz+eSTT2jfvj07d+5k2LBh2ndGRkZy8OBBHnnkEY+aDAaDNl+rRYsWmEwmdu/eTXJyMnl5eTz33HN8++23hISEaLu9rrzySgDGjRvHf/7zH8rKyvjss8+0XVS1s66Cg4P56aef6NatG6WlNcfzrVy5kmPHjtG27al/6W688UZtjtbpnK05eC6Yzbrqg+oCvQ4ZFOmUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSp5h2ZJ+o0q2opKati98FC2sc0qpf7YtYcFODDc/cnkl9Uhr28GquvibDgC587JeJdy4bB7Xa7/3zZX8OhQ4e45ZZbsFgsPPLII/z222989dVXtG7dmmXLlmnrBg4cSG5uLmvXrtWuDR48mNTUVG666SYSExOZPXs2x48fZ+7cuVxzzTUAlJaWav/5kUceoaSkhE8++YTg4GA2bdqkuebPn8+rr75KbGws99xzD2vXrmXbtm0kJiayYMECbV2fPn3IzMwkKiqKO++8k5kzZ1JaWsq6deu0owMnTpzIxx9/TGBgoEdNzZo14+jRo6SkpBAeHk779u2prKykbdu27Nq1i44dO5Kenk779u3x9/dn1qxZ/PDDDzzyyCN4eXkRGxvLnj17iI2NxWg0sn//ft566y1uv/12kpOTKS4uplGjRlx99dXa/K3GjRvz7bff4ufnx88//8zgwYO54ooruPPOOzl06BDz5s3D5XKxYMECOnXqVK936Ha7MRjOfg6oQqFQKBQKhUKhUCgUCoVCoVAoLi8WrNnDp6v3nvHz+3rFce8t8X9hIkVDQ1c7rFasWIG3tzfx8fFMnToVi8VCUlISmzZtIi8vj/DwcABcLhfV1dXafRUVFaSlpdG1a1fS09NJSUkhPj4el8vFypUrtSbVunXrqKysJCkpidmzZ2M2m+nevTupqans2LGDDh06aOtiYmIwmUxMmjSJiIgITCaTx9F/x44dIzs7Gy8vL/Ly8pg6dSodOnRgx44dLF++nCFDhgDw22+/AdC6dWuPmjZu3OgxXysuLg6omYEFcOTIEaZMmcL06dO1NbVNsGuuuYaff67ZQtmiRQseffRR7rvvPm3d119/jcPh4LXXXmPFihVAzZyswMBA/PxqOsVhYWG43W4OHDjAhAkTcLlc+Pn58f7779e7WVXzbtzY7aX1vv9yxWTS/yBElVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM2NDqHlXdgGHTjhoGWapc1Te+RARevZdSs0b+VNY6KiX+3hRGcUXcTcUNIx3fTlgtfqd864zXTWsUlJSSEpK4oMPPtCu2e12unbtysaNG+nXrx8A8+bN87hv+/btOBwOXnrpJRISErTrEyZM8NiFVdvImjVrlnbN7XZz7bXXsmHDBjp06EBlZSVbtmxh5MiRDBo0SFt32223ceDAAQ4fPkxkZCSpqam4XC7cbjevvvqqlu2JJ54gJSVFa1gdPnwYgFGjRhEfH6/V1KVLF4KCgjzma+3bt481a9YQFxfHQw89RM+ePXnhhRe04xBr52slJydz22238eKLLzJhwgR++eUXzQE1s65CQ0OZNm0aW7Zs4cEHH6Rnz55s2LBBq6dVq1Y0btyYHj16sH//fo4ePcpnn32mNQUvBDVQ7szocRCiaKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU8aMMtYswiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwXQxnXmFpnZlTAX5mXhl4NWHB/ufta9MqlAA/8x8eCxjgZyahZch55y0pczJj6S7Sswu0a+2ia+ZNWXy9zjvjH9EQ3rUs6KphlZWVxV133eVxzWq1EhYW5rG7CSAzM5Px48eTlpaG0VjTnaudL1VLbGwsc+bMoby8HF9fX7KysoiJiWHGjBl8+umnFBQUkJCQQJMmTTT/wYMHcTqdhIaG8uSTT5KamoqXlxdNmjQBID09ncjISLKysggICKC0tJQPP/yQ0aNHExERQVRUlEfW/Px8vLy8GDVqFEeOHMHhcHDVVVdhNBq1HVNwar7WgAEDAJgyZQo7duz4w/la8+fPp6SkBIB//etfNGrUiNjYWK3+yspK3nnnHX799Vd27Nih5f79bCq3282iRYtwuWr+5bnlllt48MEHGTFixHm9N4VCoVAoFAqFQqFQKBQKhUKhUDQsft+sgppZU+Pm/Mx7/0mul/OVgVczbs4fN8Hqw4ylu8jIKfC4lpFTwPQluxjev2O9nAr9oquGld1ux2q11rkeFBSEzWbTfrbZbAwcOJCoqCgmT57MwoUL+fbbb3n77bcZNWqUts5qteJ2u7HZbPj6+mK32zl8+DBr165l5MiRxMXFMX/+fNatW0dgYKDmBnjvvffw8/Nj0qRJlJeX89prrwHw7rvvEhAQwK+//orDUbN98eqrr2bUqFFs3ryZqVOnemQvLi6mcePG/Prrr9x2223aDi+Xy0WLFi20dd26dcNoNLJnzx4ArrzySlavXk1ERIR2VCFAx44dSU1NJSIiAgCz2czWrVt5+umntTXl5eUsWLCAFi1a0LRpUw4ePEhmZiaDBw9m586dtG/fHqg5WtHlchEXF0fr1q1Zt24dM2bMoKCggNGjR+Pt7X2eb5D/n0kNlfs9eh2EKNIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zKjXmndknvjDnVBQ07TafbCQ9jGNztvbrHEAH4zoSUZOAQfzL+yYwaMnHR47q2pxuSE9u4AT9nKahp7/TrBaGsK7lg1dNazOlYULF+JwOJgyZQrBwcGkp6fz/fffs3DhQoYOHXrGY+3cbjcZGRkMHjxYO+6vc+fOXH311Rw5csRj7aFDh1i5cqW2K8lgMPD000/jdDoZNmwYbrcbk8lE27ZtefXVVwG49tprmTNnDqWlpR7feezYMW666Sa2bdvG2rVrufLKKyksLOTYsWPaui+++AIvLy+uueYavv/+e/bs2UOnTp345ZdfPOZ3LV++nKuvvlo7atDhcNCyZUu2b9+uuaxWK23btuWnn37yqOmjjz6ioKCAiRMnAnDixAkA9u7dy969p4bhffHFFwwZMoSoqKhzeyGnYTQaCAmxnPd9smC1XpzzVUX5RDhlzChjzSKceveJcKqMcvhEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinynj5+47kl7Bvdx4RjS1EhAXUy5FbcOjs33GylOTOLevlBkgKsZBU77tryDxWctbPHZWui/K7aD2/a9nQVcPKarVSXFxc57rNZiMoKEj7OSUlhW7duhEcHKzdV1VV0w0+fdaV3W7HYDBo95pMJqqqqujdu7fm8vb29tjBVbs2MjLS4wi92vlTHTt25K233mLixIl8/PHH9OnTxyPrddddx+rVq7VZV76+vjgcDl5//XWPGjp06EB+fr5HTbXzu+Li4vjPf/7DPffc4zG/69ChQ+Tk5PDss89it9t58cUXWbRoEcuXL+fNN9+ksrJS2xX1ySefcOTIEf7xj39gt9v54YcftGMNAcrKyqiurmbs2LHaMYQAd911F+np6eTn59erYeVyubHbS/98oWSYTPof4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mvJi+kjInUxfvZGfWqV1H7WNCefzO9lj8zm+eU0To2ZsqzRv5U1joqFdOuDh1+3sZzvq5xdt4yTOK9F0uWK1+57zrTFcNq5iYmDqzqoqLi8nPz/doHv1+1lXtZyEhIR73Z2VlERERga+vL4B27N/pLrfbTUlJCWVlZZSXl9OyZUsMBoO2tpbs7GwtD9Q0yYA6u7lqZ0tlZWURGRlJQEAATqfTo1lVXFxMRUWFR3PuXOZ31f4zOjqaX3/9VVsXGxuL0+nk0KFDxMbGAlBQUMDgwYOxWCzY7XbMZs9XXVlZidvtrnPsX+08sMzMTLp06UJ9UAPlzkxDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzXgzfB1/trDPPaVd2Ae9/tfO85zm1aRVKgJ/5D48FDPAzk9Ay5KLUfyF1hwX50S46lIycAlzuU9eNBmgTFUpjq+8lz/hX+GRCVw2r5ORkpk2b5jHLatWqVRiNRpKSTm0g/P2sq8TERAICarY+1u6UcjqdrFmzhuTkZG1dREQEO3bs4OjRo9ruoU2bNmlH+NlsNsLDw/Hx8fHY/QSwYsUK/P39cTqdAFxxxRUAZGRkaDu2bDYbaWlpHjnCwsI4fvx4nZoMBgPl5eVnrKmW03d/1f7z9+tqf6793OFwMGTIEJxOJ4MGDWLChAl/6PXz82Py5Mm8+eabFBYW8uqrr2oztE6fGXa+qBlWdWkI56GqjPrziXDKmFHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVGfGS+WT8Q8pzEPd2XMrK0eTasAPzNjHu56wb/jvVh1D+vXng9+t6usbXTNrjK9ZBTlkxFdNawGDBjAvHnzGDZsGEOHDiUvL48333yTAQMGeOxkqq6u5qOPPuLRRx8FwMfHh6FDh/K///2Pffv2sWnTJhYsWEBRURGDBw/W7rvyyitZvXo1Tz75JMOHD6esrIw333yTNm3akJGRoa0LCgri+PHjjBkzht69e7NlyxaWL19OQkKCtqZRo5qBc/PnzycqKorw8HCmT5+OxWLxmGEVGxvL7t2769TUsWNH0tPTtXVut5t9+/axatUqAO0/Oxx1tzSuX79eayytX7+ekydPenz+5JNPsmfPHl577TVycnIASE9Px2q10rFjR6BmFpbb7SY3N5fWrVtTWFjIW2+9pTXkDIazb7c8E2qG1dlpCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjBfqEzHPKSTEwoLxt5G29zh7fisgvlUoneKa/PmN58GF1h0SAq8Pu47c/BJyTzguaG7XmdDbu5YZXTWsgoKCmDNnDuPGjWPYsGFYLBbuvvtunnnmGY91tbOoTmfIkCFMmzaN/fv38+ijj5KQkMBHH31EixYttDUhISG43W5atGjB8OHDMZvN3HzzzcTHx7N7927t2L4mTZrQokULtm3bxhdffEFERATjx4/niy++0NbU/jMpKYlJkybhcDhITEzk9ddfZ8iQIdrnjRs3xmq1YjKZPGoymUwcOnRqsJ2XlxfLli1j2bJlAHz99dd8/fXXGI3GOt/5yiuvaPe99NJLHs8PauZ4ATz//PMezwdg7969QM2Mrnbt2rFjxw727dsH1Bxn+O9//5sPPviAsLCwc3pnv0fNsPpjTCb9n4eqMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZjxeVEZxeTVWXxNhwfVvZIic5xTbLJBOcU2w28suaCbU6Vzs5xjgY+LqhHBdZxTx5/tywGptoDOsoGZH0uzZs8+6pmPHjgQHB3tcKykpobS0lNdff51+/fr94X21s6ueeuopPvjgA+36xIkTPWZdxcTEsG/fPq15BDU7oP7v//5PO5qwZcuWeHl5cfXVVzN58mRt3bp16zy+KyYmhqKiIt59912POVZPPvmkxyytdu3aERwczPvvv69dKy4upkuXLh4ugPfff5+bbrpJWzdv3jzeeOMNrTlX25QC+Oqrr3jxxRfZtGkToaGhHs9w/vz5QM3uq7vuuouJEycSHR3NBx98wFVXXfWHz/BcUOdznpmGcB6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2WUwyfCqTLK4RPhVBnl8IlwyphRxppFOGXIWFLmZMbSXR7H+LWLDmXoHW2x+Hqdt++vmOckw3v5K5wiMspCgzxMMTk5mR9//BG73a5d+6NZV7+ndtbVypUrtWt/NOsqOTmZPXv2aMfpQc2sq6KiIq6//noAvL29ueaaa1i9erXHd6xYsYLY2FgiIyMB6NGjB0ajkTVr1mhrbDYbqampdb7zz2pq0aIFUVFR2rGBp39nt27d8Pb2PvNDOwu1zT8fHx/mz5/P1Vdf7dFMUygUCoVCoVAoFAqFQqFQKBQKxbkzY+kuMnIKPK5l5BQwfcmuejuH3tGWNlGhHtfaRNU0wRSKywHd7bDKzMxk/PjxpKWlYbFYuOOOO3j66ac9mjFnmnXVvn17+vfvT0FBAQkJCVRVVWG321m7di1Q05C57777mDFjBh999BHe3t4EBwfXmXXVq1cvJk2axO23347L5SIkJITKykp69uxJhw4dtHVDhgxh0KBBXHXVVbhcLpo0acKRI0d4++23tTVNmzbllltuYfTo0YwdOxY/Pz/8/f0JCAhgwIAB2rp+/foxbdo0unfvDtQcS1hYWOgxv6u4uJjAwECWLVvGihUr8PPzIyAggPz8fG23FMALL7zA4sWLPZ5rt27dABg0aBAvvvii9p83bdqkrfnPf/4DwJgxY+rx5k5xocPuLkcawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqJ+PRkw6PnVW1uNyQnl3ACXs5TUP9z9sbFODDc/cnkl9Uhv0iHDNYiyzvRbRTREbZ0FXDymazMXDgQKKiopg8eTJ5eXlMnDiR8vJyRo0apa37o1lXrVu35pdffuHZZ58lLi6O+fPn891333nMYnI6naxfv57Q0FCqq6spLi7m+PHjtGvXzmPW1a+//sqxY8eIiIjgxIkT2Gw2Kisr6d27t0feFStW4Ovri9Vq5eTJk+Tn5xMYGEiPHj08avrpp59o3LgxpaWllJWV4XA46N27N4GBgdq6999/H5fLRfPmzcnNzSU/P5+qqir69u2rramsrKRVq1bExsayadMmTp48SXV1NWaz2eOIxMcff5zU1FTy8/PrPOPS0lKqq6sxmUwYjUa8vLwwGAxUVlYSGxvL448/ru0iqw9Go+G8h/vJREMY4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwiXCqjHL4RDhVRjl8IpwyZpSxZhHOyz1j5rGSs37uqHRd0O9QRf3+9XJ/L3+VU0RGWdBVw2rhwoU4HA6mTJmiNWCqq6sZO3YsQ4cO1XYageesq4qKCrp3787gwYMZNGgQAJ07d+bWW2/1OHZv9erVHDhwgBUrVmhH3qWmpjJ48GB27Nih7Z6aOnUqV111FQsXLtTuHTFiBDNmzOAf//gHAMeOHeOLL75g9OjR9O/fH4CioiJuuOEGFi5cyJAhQ7SaSktLWb9+vVbTZ599xtixY3nuuecIDw8nLy+Pzz//nBdffJEHHngAqJmZ1bdvX6ZPn87UqVMBaNSoEZMmTfJ4Zg6HQzua8LHHHgNq5mulpqZqa7Zs2cKDDz4IwOeff05OTg7z5s2jSZMmREVFMW3aNP72t7/x1FNPceutt57nW/PE5XJjt5dekONyxGTS/wA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTJeHOfxojKKL3D3kr+X4ayfW7yNFBY66uUGOd+LrBkvB6xWv3PedaarhlVKSgrdunXz2C3Uu3dvRo8ezcaNG+nXr98f3rd9+3ZKSko8dkB5e3tz8803a8cB1vrj4uI85jMlJSURHBzMhg0b6NChA5WVlWzZsoWRI0d6fEefPn1Yvnw5hw8fJjIyktTUVFwul0eDJzg4mKSkJFJSUrSG1bnUtGfPHqqrqz3mbxkMBnr06MEnn3xCZWXlGedT+fv74+Pjg9PpPONzbdKkCQaDgQEDBnDXXXdhsYjdAaUGyp2ZhjDAT2XUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqkyyuET4VQZ5fCJcKqMcvhEOGXMKGPNIpx6zFhS5mTG0l0eR/m1i66ZD2Xx9TovV1iQH+2iQ8nIKcDlPnXdaKiZOdXY6ntR6pfhvYj2iXCKyCgLujpMMSsry6OZBGC1WgkLCyMrK+us9wF17o2NjSU3N5fy8vIz+g0GA82aNWPBggV07NiR6667DqfTScuWLeu4Tv+urKwsGjVqxGeffabNturfvz8BAQEeWbOysggPD+fJJ5+kU6dOdO3alTfeeIPGjRtr6yorKwHYtm0bffv2pX379vTq1YusrCwqKys5fPiw5qusrGTixIl0796dq666ihtuuAFA2/kFsHLlSv7973+TnJxMx44deeCBB3C73QwePJj27dt7PIPffvuN22+/HaiZYdWzZ88zPmeFQqFQKBQKhUKhUCgUCoVCobgcmbF0Fxk5BR7XMnIKmL5kV718Q+9oS5uoUI9rbaJqGmAKheKP0dUOK7vdjtVqrXM9KCgIm8121vu8vb3x8fHxuG61WnG73dhsNnx9fbHb7R5zo6BmxlRmZia+vr5MnjyZrVu3MmPGDL7++mtuvPFGD1ft+trvrK6u5r333mPkyJHa3KxvvvkGl+tU99Rut7NmzRpCQ0OZNGkS5eXlvPHGGzgcDs3VqlUrAF555RX++c9/8tJLL7F582btKMDTax8/fjyLFy/Wmly1c7NO38E1e/ZsmjdvzgsvvEBISAgjRowAYMmSJTzxxBPauuDgYPr27YuPjw/z58/HaDRy9OhRpkyZ4rHufDGbddUH1QUNYYCfyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnr7zx60uGxs6oWlxvSsws4YS+naaj/eTmDAnx47v5E8ovKsF/gEYO/R5b3ItInwikio2zoqmF1KVi4cCFVVVXEx8dz3XXX4e/vz4wZM1i7di15eXkec7NOp7q6mqKiIh599FGPuVlJSUmUlJwaqudyuThx4gTz5s3TdjZZrVYGDx7MyZMnAWjdujVWq5Xy8nLuvPNOoqKi2LVrFwaDAbfbjcFQc+Zp7dysZ555hmuvvZb8/Hw+/fRTfvjhB959911efvlloGYGV2hoKACZmZmcPHmSTp068fHHH/P4449jNNb8C1NUVMTixYs9sgLMnDmz3g0ro9EgbOjf5UBDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRzovpe3vBNtIzT9Lhisb8Z0BivRyZx0rO+rmj0lXv33mK/F2pnt+LKKesGWVBVw0rq9VKcXFxnes2m42goKCz3ldZWUlFRYXHLiu73Y7BYNDutVqtHs0kqJkxFRgYSOPGjQG0tS6Xy2Nult1u9/i8rKwMt9tdZ25WVFQU6enp2jWz2UxAQECduVkGg4HCwkKg5pi/0tJSGjduzIABAwBo3rw5t912G8uXL6e6uhpAm5v1z3/+U8vRs2dPunbtytKlS7WGVW2zCmDZsmWYzWb+9re/kZaWRmlpKQEBAQBMnDiRxx9/nL59+7Jw4UJmz57NDz/8wIkTJygvL8fX1/eMz/xMuFxu7PbS877vcsdk0v8AP5VRfz4RThkzylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYswiljRhlrFuGUMaOMNYtw6t0nwilbxtQdR5ixdLf287c/HeLbnw7x2B1t6N4+4rxc/l6Gs35u8TZSWOioV07Z3osop6wZLwesVr9z3nWmq4ZVTExMnVlVxcXF5Ofn15k99fv7ALKzs4mPj9euZ2VlERERoTVeYmJi2Ldvn8e9mZmZlJeXa46WLVvi5eWFj49PnVlUp3+X2Vzz6GobXbVUVVXhcrm0ho/ZbMbLy3MoX0lJCW63m6qqKgAOHjxIVVUV48aNIyYmhvLycqKjo/nf//4HgMPh0DI0atTIo3lnNBpp3rw5+/fv/8Nn880339CtWzf27NlDeHi41qyq5bXXXuOOO+7weG4Xihood2YawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPhlDGjjDWLcF4M3+nNqtOZtiSDrglNz8sVFuRHu+hQMnIKcLlPXTcaauZONbb6XnBeWd6LaKesGWVBVw2r5ORkpk2b5jHLatWqVRiNRpKSks54X2JiIgEBAaxcuVJrvDidTlasWAFAx44dsVgstGvXjj179pCTk0NUVBRQs3vL5XJx/fXXAzW7pLp27cqWLVv45JNPmDt3LgkJCQQEBBAbG0tkZCQATZo0AeDRRx8lJycHLy8vrr/+eg4cOKB5fX198fb25sSJE9x2220cPHiQiIgIOnXqpH1X7VqomTG1efNmHA4HHTp04NChQx6f2+12fH19eeihh0hLS8NisXD77bdz4sQJj7lZZWVlfPDBByxevJj8/HxsNht2u53nnntOW1NZWcnw4cP54Ycf8PLyYuHChfTu3Zvi4mKuvPLKeu2uqkXNsKpLQzgPVWXUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRThlzChjzSKceveJcMqUcfqSnWf9/OMVGQzp2+68nMP6teeDxTvZmVWgXWsbHcrjd7a/oN93yvReRDplzSgbumpYDRgwgHnz5jFs2DCGDh1KXl4eb775JgMGDPCYJTVw4EByc3NZu3YtAD4+PgwdOpTJkycTGhpK69atmTt3LsePH6ddu3a89tpr5OXlMWHCBKxWK08++STDhw+nrKwMl8tFTEwMHTp00PyRkZFs3LiRoKAgHnnkERYtWsQvv/zCK6+8oq3x968Zsrdnzx7uv/9+/P39mTVrFm73aS14wMvLi+rqamw2G0899RTbtm1j8eLFhISEeBxfCLBmzRoeeOABXC4XCxcupLKy0uPzffv2ceTIEby9vXniiSc4ePAgc+fOxeVyaXOpAF599VXWrFlDmzZtKCoqwul04na7KSoq0tbs3buXtWvX0qhRI5o1a0Z6ejpbt26loqKC//znP/V8g2qG1Z/REM5DVRn15xPhlDGjjDWLcMqYUcaaRThlzChjzSKceveJcKqMcvhEOFVGOXwinCqjHD4RThkzylizCOeF+vYftp/1872HbOf9O8qQEHh92HXk5peQe8JBRGMLEWEBf37jOSLDe/krnLJmlAVdNayCgoKYM2cO48aNY9iwYVgsFu6++26eeeYZj3Uul4vauU61DBkyBLfbzaxZsygoKCA0NBRvb28+/PBDgoODAaiurmbMmDFcddVVDB8+HLPZjLe3N8nJyZqnoqKCb775Bm9vb9xuN//73/9o1qwZwcHB2u4pgCNHjgDQt29fVqxYgcPh4MorryQjI8NjblZZWRmBgYFcccUVTJkyBYvFQkxMDEeOHNHWOJ1OoGbH1dy5cwkODua2225j2bJlVFdXa+vKy8txu90UFhbyzjvvEBYWRlxcHBkZGR6zt1auXMlDDz3EZ599ho+PDxEREVx55ZWsWLGC4cOHA7Bu3Tr8/PwwmUzs3r1bu7dZs2Zcc801VFZWajvAzgc1w+qPMZn0fx6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhbAgZjxeVUVxejdXXRFhw/X+pf2WkleOFZWf8PK5FUL1nTgX4mLg6IRy7vazejtNpCO9FZdRvxssBq7WBzrACiI2NZfbs2WddM2/evDrXDAYDQ4cOZejQoQDcf//9tG3bVmtWAfTu3ZvRo0fTu3dvZs6cqa07fPiwtmb79u2UlJRgMBh49tln6devHwATJkzQdnQBHDt2DICHHnqIN954AwC3281VV12Ft7c3vr6+VFZW4nA4aNKkiUdN3377LcOGDdPmX+Xk5AA1xwvW5gc4cOAAv/zyizY3q6KiAoDVq1drDSq73U6XLl20n2tnY/n7+9OqVSuOHj3Khx9+yMyZMz12fx09epSysjLKyk79F3thYSEAXbp0YcyYMdx7771neQtnRp3PeWYawnmoKqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWNGGWu+GM6SMiczlu4iPbtAu9YuOpShd7TF4ut13r7Bt7Vl4868M37+UJ82ups5pcf3ItonwilrRlnQXcPqYpGVlcVdd93lcc1qtRIWFkZWVpZ2LTk5malTp/LAAw+wc+dO7Wi938/Nio2NZc6cOZSXl+Pr64vNZsNsNvP6669z8OBBCgoKiI+Pp7q6mpCQEAAOHjyI2+3m+PHjPPzww6SlpeHl5cWVV14JoM3DOnjwIF5eXixZsoRvvvmG7OxsIiIiKC0txWQyaeuKioowGAw888wz7N27F4fDQfv27QEICwsDwGQyERQUxFtvveVRYy3ffPMNt912G0OGDGH37t3s2bOnzrObOHEi3bt3r+eTVygUCoVCoVAoFAqFQqFQKBSKU8xYuouMnAKPaxk5BUxfsovh/TvWy/lo3wRmLN39h9cVCkXD5LJtWNntdqxWa53rQUFB2Gw27ec+ffrwzjvvsHv3bh577DFWr15NRkYGsbGxHnOz5s6di9vtxmaz4evrS0lJCY0bN2bLli3cdNNNJCYmMnv2bKqqqrTvrf0es9nMTz/9xCOPPEJJSQmffPIJAKGhoR5ZMzMziY2NZfjw4axdu5acnByP2VQlJSWEhISwceNG+vXrR6tWrbSdYk2aNNHWJSYmsmbNGo+6mzdvzrFjx+jcuTNQ04CzWq0kJiZyzTXXMHXqVLp168ahQ4fo06dPnfla58OFDCG8XGkIA/xURv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinjBllrFmEU8aMMtYswql3nwinXjMePenw2FlVi8sN6dkFnLCX0zTU/7y9PTo0p0eH5ny0LJ3dB20ktAxi8O3t6p2zFlnei0ifCKesGWXjsm1YnSsrVqzA29ub+Ph4pk6ditFoxGAwcODAAfLy8rSm1enH6dX+nJ+fT9euXUlPTyclJYX4+HgKCws5ceKEx1qn00lSUhKzZ8/GbDbTpUsXtmzZwsGDB7U1paWlxMTEYDKZmDRpEhERETRr1oyjR496fGdBQQHdu3dnw4YNOBwOOnTowNatWz1cP/74Y506a2duVVVVeVy3Wq20bNkSgJCQEE6cOHFBzSqj0XDeAw1loiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRn35tu89zt7NvxHfKpROcU3+/IbzQK81i3TKmFHGmkU49e4T4dRbxsxjJWf93FHpuqDfJ4588Jp633s2Lvf38lf4RDhlzSgLl23Dymq1UlxcXOe6zWbT5j0BpKSkkJSUxAcffADA/PnzefXVV3G73dpOJoCBAwcyatQo7V6TyUR1dTUvvfQSCQmntpled9112s6q2rWRkZHMmjVLW5Odnc2tt95KdnY2AAEBAZSVldG/f38GDRqkrXvqqac4evQohw8fJjIyEl9fXxwOB++8845HDR06dCA/Px+Affv2UVJSwtSpU7nxxhuBmrlc9957L2az2WOm1+955ZVXtF1f9cXlcmO3l16Q43LEZNL/AD+VUX8+EU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8a8glLGfryVkrJTf2E0wM/MmIe70iTk/HcQiMgoyifCKWNGGWsW4dS7T4RTrxn9vQxn/dzibaSw0FEvN+j/Oer1vYj0iXDKmvFywGr1O+ddZ5dtwyomJsZjVhVAcXEx+fn5xMTEaNd+P+uq9rOQkBCP+7OysoiIiMDX1xeAwMBAj/VQswOqpKSEsrIyysvLadmyJQaDQVtbS22jqrahVnuE4OlHEELNEYC13x0ZGUlAQABOp9OjWVVcXExFRYXmOnDgAIBHE2358uX4+PhQUVFBXl4eAQEB2mdbt25l48aNAPz73/9m5MiRdOnSpe4DPQ/UQLkz0xAG+KmM+vOJcMqYUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRjl8Ipwqoz58v29WAZSUVTFm1lbe+0/yGe46P/RW81/hlDGjjDWLcOrdJ8Kpt4xhQX60iw4lI6cA12mHWBkN0CYqlMZW34uSV+/PUW/v5a/wiXDKmlEWGmzDKjMzk/Hjx5OWlobFYuGOO+7g6aefxtvbG4Dk5GSmTZvmMctq1apVGI1GcnJy6NmzJwUFBVRUVGiNIaiZ/xQQEIDL5WLFihXMnz8fs9lMVVUVffr00dZFRESwY8cOFi9ezKeffkp2djYhISGUltbsLrLZbISHh+Pj40N+fj5vvPEGS5cuxeFwYLFY8PX1xel0AnDFFVcANUf5ff7556SlpeHn54fdbtdcAGFhYRw/fpzHHnuMjIwM8vLy6NWrFwaDgfLycqBmVhXACy+8QG5uLnl5eVqTq7KykoiICK0Gt9ut5QX45Zdf+Ne//sXgwYN57rnn6v1u1AyrujSE81BVRv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinLBl3ZJ6o06yqpaSsit0HC2kf06jefj3WLNopY0YZaxbh1LtPhFPPGYf1a88Hi3eyM6tAu9Y2OpTH72x/wb9L1Ptz1PN7EeUT4ZQ1o2wY3L8fztQAsNls3HbbbURFRTF06FDy8vKYOHEiffv2ZdSoUR5roqOjPdbExMSwa9cuRo4cSVxcnHYE37fffkuLFi0AmDp1Ku+88w6+vr4MHTqU9evXs3PnTrp27crcuXMBmDJlCpMnTwZqjgFs3749c+fO1ZpfKSkphIeHk5ycTF5eHl5eXjz44IMcP36cZcuWYTab6dy5M3PnzmXbtm3cd999GAwGWrVqxZ133snXX39NTk4Obrebt956i9tvv53nnnuOpUuX4ufnR5cuXdiwYQM+Pj60adOG9PR00tPTqa6uJjk5maKiIvr06YOPjw+LFi0Cahpeqamp2nMcOXIkq1at4sYbb2T16tUMHjyYRYsWER0dzeeff16vd+N2uzEYzr7NV6FQKBQKhUKhUCgUisuRBWv28OnqvWf8/L5ecdx7S/xfmEihUCj0RW5+CbknHEQ0thARFvDnNygUCqlokDusFi5ciMPhYMqUKdpMpurqasaOHcvQoUMJDw8nKCiIOXPmMG7cOIYNG4bFYuHOO+/kiy++4OGHH9YaVWazmerqaj766CPGjBkD1MycAjAYDEydOpWEhAReeOEFJkyYwI4dO+jQoQMhISFAzZyqbdu28euvv9KrVy92795NRkaGdmxfSEgIeXl5hIaGMnfuXCIiIvjvf//LhAkTKCoq0hwARqMRm83G1KlTSUxMpEePHsybN4/anmLjxo0JCgoiISGBLVu2ADXzqzp27MihQ4cAMJlMzJ49mzlz5vDjjz9y7NgxjEYjN954I99++y3p6em0a9cOgO+//x6n08nq1asB+Oijj4CaOVj1Rc2w+mNMJv2fh6oy6s8nwtkQMu7KLuDQCQctwyy0iQrVXT4RTpVRnxllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpS8aI0LMPWW/eyP+yntMiwiljRhlrFuHUu0+EsyFkDPAxcXVCOHZ72QX99+Hp6P05NoT3ojLqN+PlwGU/wyolJYVu3bppzSqA3r17M3r0aDZu3Ei/fv0AiI2NZfbs2dqaTZs2MWfOHHr37q1d69ixI8ePHyclJUW7tm7dOgBGjRqludxuN1OnTmXDhg106NCBli1bAnDXXXfx/PPPa/c+/vjjZGRkcOLECSIjI7WZV8uWLdMaU263mzfffFObO1U76yomJobly5drruXLlzNv3jzy8/OBmnlZNpuNd999l6CgIOLi4rjhhhv45ZdfPGZpXXnllYwfP57y8nK6detG3759efDBB/n22285fvy4tm7gwIHMmjWLtLQ07drYsWO1BlZ9UedznpmGcB6qyqg/nwinHjPmFZby2tyf6wynfmXg1YQFX9hw6ouR769wqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCOflnrFNq1AC/Mx/eCxggJ+ZhJYhUsxpEeGUMaOMNYtw6t0nwqkyyuET4VQZ9emTiQZ5mGJWVpZHgwbAarUSFhZGVlbWWe8DPO5NTk7m2LFjHDlyRJsD9euvv2IwGEhKStLWGQwGoqOjNUejRjVnTh87dkxb43Q6SU9P9/iu2qZaYWGhtm7Tpk04nU4cjpq/ReDt7Y3JZNJmVtXy/fffYzKZtHt79OiB0WhkzZo12pry8nJSU1NJTk6uU++6desoLS3l9ttvZ9u2bXVqr73/2muvpU2bNtx666188803tG/f/ozPUKFQKETx+2YV1JzzP27Oz5cokUKhUCgUCoVCcf68MvBqAvw8/35w7V/EUigUCoVCoVCcmQa5w8put2O1WutcDwoKwmazaT9nZmYyfvx40tLSsFgstGrVCm9vb3x8fLQ1AwYM4MMPP6SyspK1a9dSWVnJ0aNHCQ0NpX///hQUFJCQkEBVVRUHDhzA37/mb/mXlZUBsHLlStauXYu3tzfBwcHarqnaHI0aNcJsNnP77bfjcrkICQmhsrKSmJgY7Rg/qNl1lZeXx1VXXYXL5aJJkyYcOXKE8PBwzdW0aVNuueUWRo8ezdixYwH4+OOPCQgIYMCAAZprxYoVrFy5kg0bNgDw448/smjRIv72t78RFRWlrZkyZQpwqpmWnZ0NoM3yqi8XOijxcqQhDPBTGfXnE+HUa0aRw6n1WrNInwinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8ZmjQP4YERPMnIKOJh/8Y66Pj2b3moW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyppRNhpkw+pcsNlsDBw4kKioKCZPnkxeXh5jx46lqsrzF6JBQUEMGzaMCRMm8N///peAgAC8vLwoKCjghRdeIC4ujvnz5/Pdd99hNp96XLUeq9WK0WikuLiY48ePEx8fz86dO7V1J0+epKqqisjISE6cOIHNZqOyspKkpCSPhpXL5cJsNhMUFMTJkyfJz88nMDBQa5DV1vTTTz/RuHFjSktLcTqdlJSUcP311xMYGKitW7VqFTk5OTidTgDtmMHaGV1waoeYxWKhvLwcX19fWrduTXV1NV9//TXPP/88Xl5e5/3cjUYDISGW875PFqzWs59nfql9IpwyZpSx5gt15hYcOuvnR06Wkty5Zb39oL+a/wqfCKeMGWWsWYRTxowy1izCqXefCKfKKIdPhFNl1JcvKcRC0p8vqxd6rVmkU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPKarVqO5lOx2azaXOiFi5ciMPhYMqUKdqxfJs2bWL58uUcPHhQm0EF4O/vj8FgYOvWrRgMBjp27EhMTAyDBg0CoHPnztx6661UVFRo/tqm1LPPPss999wDQGpqKoMHDwbQ1u3fvx+z2cx3332nfd+IESNITU3V1hw7dgy32023bt348MMPASgqKuKGG27g+PHjHjWVlpayfv16goODiYuL4+abb2blypU899xzhIeHA/DOO+9gNBpxu93Ex8dz9OhRFi1aRJMmTbQMzZs3B+D111/n1ltv1a6vXLmSp59+moMHDxIbG3t+LwZwudzY7aXnfd/ljsmk/wF+MmY8XlRGcXk1Vl8TYcEX/j8kyzZms+dgEW1aBXNb9+gL9oE870XkcGq91izSJ8IpY0YZaxbhlDGjjDWLcOrdJ8KpMuozo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpyyZrwcsFr9znnXWYNsWMXExNSZVVVcXEx+fr42oyklJYVu3bppzSqAPn36sHz5cpYtW8awYcO061lZWURERODr68umTZtwuVy4XKf+QHl7e3PTTTcxb948zb97924ASktPNWeSkpKwWCw4HA5iYmKorKzk2LFjVFdXezTTanNcddVVQE2jC8BoPPXSgoOD6dq1K99///1Za4qPj2ft2rVs3LiRfv36eXjeeOMNAO68807i4+PP6xlfCGqg3JlpCAP8ZMhYUuZkxtJdpGcXaNfaRYcy9I62WHzPf2dhRk4Bby38Rft5Z+ZJPluXyXP3dSS+5cU5+uNyfy9/xXBqvdX8V/hEOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpa0ZZaJCHKSYnJ/Pjjz9it9u1a6tWrcJoNJKUVLPhPisrS2v01NKjRw8MBgMpKSnaNafTyZo1a0hOTtbuA8jJySEnJ0dbZ7PZcLlczJgxg6SkJDZv3kyjRo1YvXq1tsZgMODt7U1AQACRkZEcPHiQ6upqDAYDN998Mx06dKB///4UFRUBcMUVV2jf6efnR2pqKh07dqRr167897//1Y70O70mk8lE3759ad++PVCzgyssLMyjgVdZWcl9993Hxx9/DMDmzZvrNPi2bNkCwNNPP01cXBwJCQn8/e9/58svv8RqtXrsQFMoLjdmLN1FRk6Bx7WMnAKmL9lVL9/pzarTefPTP76u+GPUcGqFQqFQKBQKhUKhUCgUCoVCXhrkDqsBAwYwb948hg0bxtChQ8nLy+PNN99kwIAB2rF4drud1atXs3r1atauXQuAj48PoaGh7Nixgzlz5tC6dWsWLFhAUVGRdpSf3W7Hy8uL6OhonnzySYYPH87JkydZsmQJUHOEXkVFBS+//DIRERH88ssvjBkzht69e7NlyxYKCwuJi4sDappctVRUVPCvf/2LtLQ0Xn75ZQA6duwI1Bz/V9ucioyM5MYbb2TBggWUlJRgNBq1mmw2G2vWrKF79+7cfPPNTJkyhZUrVxIcHMyePXu07xoyZAjbtm2jY8eO/PLLL5SWlnLffffxv//9j/j4eEJDQyktLcXHx4dOnToRFRVFRkYGO3bsYP/+/fWeX1WL2dwg+6BCaQgD/GTJePSkw2NnVS0uN6RnF3DCXk7TUP8/uPOPWfJD1lk/X7n5N27vUf/jAWV5LyBuOLWeaxblE+GUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mlLFmEU5ZM8qGwe12uy91iPqQmZnJuHHjSEtLw2KxcMcdd/DMM8/g7e0NQNu2bWnatClut5t169Zp99122234+fmRn59PQUEBCQkJvPjii3Tq1AmAqVOn8sEHH/Dtt98yfvx4UlNTqa6uxul04nK5SElJITw8nG7dulFYWMhrr73G7Nmzyc7O1o4VbNy4MbNmzWLz5s0MHDiQ22+/nbCwMJYsWYLD4cDlclFZWclbb73F7bffzoMPPsiWLVuYPn06s2fPJi0tDW9vb+x2O0ajUTt+MD4+HovFQklJSZ3n4e/vT1paGseOHaNnz56c6bVOmDBBOzrw6aefZseOHZw4cQKDwYDFYuHkyZOMGTOGe++9t17vxe12YzAY6nWvQvFX8PPuPMZ+uPmMn49+5FquTgg/Z99LH6SyM/PkGT9vH9uI1x/vcV4ZFQqFPliz5Td2HsjnqivDuKlrq0sdR6FQKBQKhUKhUCgUCoXisqZB7rACiI2NZfbs2Wf83Gq10qdPH0aMGOFx3W63c8MNNzBy5Mgz3ldZWUlwcDCTJ08G4P7776ekpIS9e/dqc6iaNm1KQUEBBoOBZcuWafcPGDBAW3P48GEAEhMTue+++3j++ecBePHFF/nqq6+0dSdPnsRkMtGzZ0969uwJ1DR+rrrqKkwmE1BzzJ/b7aZ9+/YedX/33Xc8/vjj9O3bFzg1D2vr1q0EBQURFxfHc889R1paGjabTWtWAbzzzjseta9evZqnnnpKa5DVB5fLjd1e+ucLJcNk0v8AP1ky+nudvaFq8TZSWOg4Z1/ryKCzNqziWwSfl+/3yPJeGpJPhFNl1FfG7Fwbr87+iVrF99uPMGXRL4x+qCtRzay6yCjKJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxppFOGXNeDlgtfqd866zBtuw+jNiYmLqzG0qLi4mPz+/zmyr398HkJ2dTXx8PFAzO6p58+baDiqAK6+8kn379nl8h9vtJjs7W5s55XDU/JK6rKzM4zv8/PwAaN68ufZ5dXU1NptNa2IZDAZ8fX21htXBgwf/0NW0aVMALVdWVhaNGjXSPLXExsbyxRdf1Km3urqaqqoqDhw4wPvvv4/JZCIwMPCMz+dcUAPlzkxDGOB3uWcMC/KjXXQoGTkFuE7biGg0QJuoUBpbfc/LfVu3KL7ccOZjAXtf2+qi1H+5v5eG6BPhVBn14Tu9WaU5XTD2463MfO7GC0z3/306q/mvcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCKesGWXhsm1YJScnM23aNOx2O1Zrzd+GXrVqFUajUWso1ZKZmcn48eNJS0vD398fLy8vli9frjWsbDYbVVVV3HbbbR7+JUuWsGHDBpYvX05BQQGRkZEUFRVx/fXXA1BaWorBYGDVqlX88ssvpKam4uXlpc2HCggI0HwGg4F3332Xn3/+mezsbMLDw7Hb7bRs2VLLALBnzx5effVVVq9ejcPh0OZbtWjRAqjZQebr68uoUaP49ddfAZg1axYPP/ywx0wtqGl+JSUlaY01b29vqquradOmzQU9ezXDqi4N4TxUmTIO69eeDxbvZGdWgXatbXQoj9/Zvl5/fl/8VyITPtn+h9cv9N8Hmd5LQ/GJcKqM+sn4fdrhOs2qWqpd8GP6UZI7Nq+3X481i3bKmFHGmkU49e4T4VQZ9ZlRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRTlkzysZl27AaMGAA8+bNY9iwYQwdOpS8vDzefPNNBgwYoDV5oOa4v+3bt9O5c2cmT55MXl4eY8eO5aOPPiIsLIzWrVvjcrkoLy9n8ODB2n29evXiueeeY9++fdx99900adKEWbNmYTKZCAkJ0daZTCZ27NjBb7/9xtChQ9m1axdr1qzxyGo2m2nWrBnz58+nS5cuPPXUU3z66ae43W6tuVWLy+Vi4cKFPPDAAwDMmzcPAB8fH21NWVkZq1evplWrmnkbFRUV7N27F5fr1G/gJk6cSGpqKk6nk169enHixAm2bdsGQEZGhkdz7nwwGg2EhFjqda8MWK1+uvaJcOoxY0gIvD7sOnLzS8g94SCisYWIsIA/v/EMdA+xsKxTCxZ9t4+0vcfpFNeEe/7W+oIy/h4Z3ktD84lwqoyX3pd1tPisnx/ItXPHDRf+77eeav6rnDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOlVEOnwinjBllrFmEU8aMMtYswilrRlm4bBtWQUFBzJkzh3HjxjFs2DAsFgt33303zzzzjMe6Y8eO4Xa7mTJlCsHBwQBUVVUxZswYZs6cic1mw2g00rt3b20XE9Q0jqqrqwkKCmLFihWYzWZ69+7N5s2b+eijjxgzZgxWq5WqqioAQkJCmDJlChEREXTr1o1NmzZx8OBBwsPDsVqt5Obm0qRJE7Kysti5cyeJiYmUlJRw7NgxrR4Ap9NJdHQ0CxcuxGKx0KdPH5YtW0ZaWhr33HMPVquV0tJSysrKKCoqAmqOQlyyZAlG46nObkxMDLNnz8bLy4vvvvuO8PBwbr/9dpYtW8bixYt59tln6/Xc1QyrP8Zk0v95qDJmDPAxcXVCOHZ72QXNmaqlV5cW3PO31hfNB3K+F737RDhVRv1kjGkWyPdn+fyKCKuaS3eJfSKceveJcMqYUcaaRThlzChjzSKcMmaUsWYRThkzylizCKfefSKcIjIeLyqjuLwaq6+JsOAL/yW8jM9RxppFOGXNeDlgtaoZVkDN3KbZs2efdU3Tpk2Ji4vTmlUAffr0YcyYMQwfPpx+/fpx//33a8fm1ZKamgrAgw8+yBNPPKFdnzBhAmvXrgVOzcOKiopi9erVHms2b97M5s2b6dKlC1FRUezcuZMXX3yRQYMGATXzsDp37ozD4eDw4cO0bNkSk8lEdXU1Cxcu1BpY69atY9myZRw4cED7zvLycrZu3UpQUBAvvPAC6enpREdHaw0sgLvuuotXX32VESNGeHznN998g9PpPLcHfAbU+ZxnpiGch6oy6s8nwiljRhlrFuG83DP2aB/BnJV7/vBYQJMRurdrpubS6cQnwql3nwinjBllrFmEU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwnkxfCVlTmYs3UV6doF2rV10KEPvaIvF1+ssd54bsjxHkT4RTpVRnz6Z0F3D6vR5UhaLhTvuuIOnn34ab2/vs97ndruZOXMmn376KQUFBSQkJPDiiy/SsWNHj3V5eXmMHz9emydVXl7OgAEDPNZYrVasViv/93//x+jRo/H396e0tNRjHtaKFSsAOHr0qDYHqlOnTlx99dXk5uZSXl5OYmIiRqOR6upqHnroIW1GVkVFBaGhoWRlZQEQHx/PsmXL2L17N7169dJ2W9U2ybKysoiMjKRp06bk5eXxxhtv8O233+J0OgkKCiIoKIgjR44A0KNHDwC6du3qUdP+/fu56aabtJ9NJhP9+vXjk08+ITExkc2bNzNp0iQAbrjhhnN+XwqFQqFQXG68PPBqxs/52aNpZTLWXFcoFAqFQqFQKBQKGZixdBcZOQUe1zJyCpi+ZBfD+3e8NKEUCsVlj64aVjabjYEDBxIVFaXNk5o4cSLl5eWMGjXqrPfOnDmT9957j5EjRxIXF8f8+fN5+OGHWbJkiXaUn9Pp5JFHHgFg0qRJlJeX88wzz5CSksJLL72kuX7++WdsNhsxMTG8/fbbbNiwgVmzZnHvvffy4osvkpeXp+2iWr16NS+88ALh4eE888wzbN68Gbfbjc1mIzw8nMDAQA4dOgTA448/zqpVq9i1axf+/v7YbDYA2rdvD8DXX39N79696dOnj8fOsNp1rVu35siRI6xYsYKHH36Y7OxsVqxYgdVq1XZPNW3alDZt2rBr1y4ee+wxNm7cyN69ewkICOC5557TnE888QRt2rShZcuW3HPPPdp1Ly8vxo8ff97v7nTMZjVU7vc0hAF+KqP+fCKcMmaUsWYRTpkyxjYP5uOXbiJ1Ry77DttoHRlEjw4RFyOibmsW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwXizf0ZMOj51VtbjckJ5dwAl7OU1D/S9pRpFOvftEOFVG/WaUDYPb7XZf6hC1TJ8+nWnTprF+/XrtiL7PPvuMsWPHsn79esLDw//wvoqKCrp3787999/P8OHDAaisrOTWW28lOTmZMWPGALB8+XJGjhzJihUrtOP62rRpQ3V1NYsWLaJDhw4ADB48mJ9//pm+ffsybtw4AB599FG2bt2K2+3GYrEQERHBzp07efXVV+nfvz8A9957L9u3bwcgJSWF8PBwunbtSnFxMY0bN6aoqIiEhAS6dOnChx9+yNVXX838+fPZtm0b9913H40aNaKsrAyz2czNN9/M7t27ycjI4K233uL222/n3//+N+vWrSMyMpK8vDwiIiK48847eeeddzCZTGRkZADwzjvvMHPmTIKCgigqKsLb25svv/yS2NhY7ZnNmDGDjz/+mMLCQgwGA76+vpjNZoqLixk5cqTW2Dtf3G43BoOhXvcqFAqFQqFQKBQKhUKhUCgUsrB973H2/lZAfKtQOsU1udRxNH7encfYDzef8fPRj1zL1Ql//HtahUKhuBB0tcMqJSWFbt26ecyT6t27N6NHj2bjxo3069fvD+/bvn07JSUl9O7dW7vm7e3NzTffrO2EqvXHxcVpzSqAoKAgHA4HGzZsoEOHDlRWVrJlyxZ8fHy0OVEA/fv3Z8OGDXz33XdERkYycuRIdu7c6XF83oIFC/jHP/7B7t27tXudTidhYWGkpKRo6+x2Ox9++CGVlZUAlJWVATBgwACeeuopbd0777xDRkYGFosFgIKCAgwGA99++61HU+iTTz6huLhY+9lkMuHt7c2PP/6ozbA6vVkF0LNnTyZNmsSIESOYNm0aq1atYsSIEeTn5/Puu+8yYMAAAgIC/vB5nw2Xy43dXnre913umEz6H+CnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU49+/IKShn78VZKyqq0awF+ZsY83JUmIfXbuXQxM/p7nf0vo1u8jRQWOurlVn925KhZhFPWjJcDVqvfOe8601XDKisri7vuusvjmtVqJSwsTJv3dKb7AI9GFEBsbCxz5syhvLwcX19fsrKy6qyJiYlh//79muPgwYM4nU6qqqo81tY2fGrnSVVV1fwPSkFBAU2anPobEF5eXhiNRnx9fQGoqqrS1tYSGBiIwWDAbK55/OXl5UBNc+t0vLxqBhie7nK73djtdo9mmtls1ly1lJeXc+2111JUVITZbObzzz/nn//8p/b5gQMHgJpjCB977DGthoCAACorK8nLy6tXw6omp/qX8Uw0hAF+KqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWPGi+H7fbMKoKSsijGztvLef5IvyA0XnjEsyI920aFk5BTgOu1sLqMB2kSF0tjqe8HPQP3Z0adTZdSnTyZ01bCy2+1YrdY614OCgrQ5TrVkZmYyfvx40tLSMBgMmEymOkfRWa1WbZ6Ur68vdrudwMBAZsyYwaeffkpBQQGhoaEUFxdz8uRJ4NS8KIPBwMqVKxk3bhxeXl4kJyd7fO7n54fBYGDq1KlkZ2eTnZ1Ns2bNOH78uEeGqqoqTp48yUsvvcSGDRtwOBxER0fjdrsJCQkBoLS0ZkfS999/T3p6OmlpaVgsFnx8fIBTDavanwcNGsTJkyfJy8vjqaee4sSJEwQGBmrf2bJlS0aOHEmbNm2YPn06W7du5ZVXXuG7775j+vTpADRv3hyA48ePs3TpUj744AOqqqrw8fHBYDAQEVH/WR1qhlVdGsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhFPGjDLWLMIpY0YZaxbh1KtvR+aJOs2qWkrKqth9sJD2MY3q5b6YNQ/r154PFu9kZ1aBdq1tdCiP39n+gn73p/7sXBxUxotDQ8goG7pqWJ0rNpuNgQMHEhUVxeTJk1m4cCHffvstEydOZNSoUWe9d/fu3Xz11VeMHDmSuLg4Zs+ezdGjR9m1axepqals2bIFAH9/f44dO8akSZMoLy/nueee8/CYTCYCAgJYtWoVnTp14plnnmHRokWUlZVhNJ76A1k7H2rx4sXcf//9+Pv78/HHHwPUac7t27ePwsJChg0bxrZt21i/fr3H5z4+Pvj4+LBnzx46d+5MXl4eixcvxsvLy6Nhdcstt7BhwwZsNhsnT56kdkxZamoqeXl5hIeH06xZMwwGA8XFxXTo0IE777yTmTNnUlhYSKNGjfDz8zvPt1KD0WggJMRSr3tlwGqt33P9q3winDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOGTPKWLMIp4wZZaxZhFPGjDLWLMIpY0YZaxbh1Jsvt+DQWT8/crKU5M4tL+g7LkbNISHw+rDryM0vIfeEg4jGFiLC6nca0x+h/uzo06ky6tMnE7pqWFmtVo9ZTLXYbDaPI/AWLlyIw+FgypQpBAcHc/DgQb799lsWLFjA0KFDCQ+vGfpnt9sxGAzavYGBgWRkZDB48GAGDRoEQOfOnUlMTMTpdDJs2DBtN1NJSQnvvvuudizge++9R3Z2NgUFBVrWsrIymjVrxtGjR3n77bdJSEigefPmHDt2TMsaGBhIUVERbdq04csvv8RsNtOrVy+WLVvGkSNHALR8Xl5eBAUF8e677xIREUG3bt3YtGmTdqSg1WqldevWdOnShSVLlgBgNBq55pprKCws1L7z5MmT/Oc//6nzHKuqqvjxxx+58847mTJliraLKysri59//hmA0NBQTp48yaFDh2jRosV5vT9QM6zOhMmk//NQVUb9+UQ4ZcwoY80inDJmlLFmEU4ZM8pYswin3n0inCqjPjPKWLMIp4wZZaxZhFPGjDLWLMJ5sX3Hi8ooLq/G6msiLLj+v4yOCD37vc0b+etqPlSAj4mrE8Kx28vqnet0ZPyzI2PNIpyyZrwcsFob6AyrmJiYOrOqiouLyc/P95gnlZKSQrdu3QgODtbuA3C5XGzcuJF+/foBNfOmIiIitCaU1WqlqqqK3r17a67amVPe3t78/PPPVFZW0qFDB8LCwjy+89lnn+Xxxx/n8OHDALRq1Yqqqir69+/Pv//9b21d//79OXLkCIcPHyYyMpLg4GCKioqYPXu21pgqLi5m2bJl5Ofne+RPSEhg0aJFmmvmzJls2rSJnJwcunbtSkxMDJs2bWLRokU8//zzxMXF0b9/f7755htat26t3RcZGcnevXuZOnUq3333HYsWLSI+Ph6ADh06AJCdnU1JSQklJSUez7uiogKAbdu21athBWqG1dloCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8IpwwZS8qczFi6i/TsAu1au+hQht7RFouv13n72rQKJcDP/IfHAgb4mUloGaLmQ+nEqXefCKfKqE+fTOjqMMXk5GR+/PFH7Ha7dm3VqlUYjUaSkpK0a1lZWR7NpMTERAICAvD399caXk6nkzVr1mizpwCaNm0K1BznV8umTZuoqKiguLiY8vJyvL29sVgsOJ1Oj2wrV67E19eXEydOADUNK0DbcQU1O8H27t2rZQRo3LgxBoPBY77WqlWrMBgMFBUVAdCiRQuMRqM2y6qWdevW4e3tzcGDB7XnY7PZ2LRpk7amoKCAjIwMjzoBcnNzmTFjBi+//LL23T4+PrRs2RKAl156icceewyTycQTTzzB9OnTiY6Oxs/Pj44dO9KzZ08UCoVCoVAoFAqFQqFQKBQKmZmxdBcZOQUe1zJyCpi+ZFe9na8MvJoAP899BAF+Zl4ZeHW9nQqFQnE5oKsdVgMGDGDevHkMGzaMoUOHkpeXx5tvvsmAAQO0Y/4ACgsLWbRoESNHjgRqGjFDhw7lf//7Hz/99BObNm1iwYIFFBQUsHfvXjp27IjFYqFly5YYDAZGjBjB8OHDKSsr480336RNmzZkZGRgs9nw9fXFYrGQl5dHp06dcDqdhIaGcvz4cRISErDZbEDNziyA+fPns3DhQsxmM/7+/lgsFsrKyrR1zZs3Z/v27SQnJ+N0OgkKCsLhcNCxY0fS09M96j9w4AAdO3akurqakJAQTpw4QbNmzTRXp06d6Ny5M0OHDtXumT9/Pq1bt+aWW27RrvXr148TJ05QXV3N/fffr82jSkpK0nJ/++23TJs2DYApU6Z45IiNjdV2r9WHCxm8eLnSEAb4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRThkzylizCKeMGWWsWYTzYviOnnR47KyqxeWG9OwCTtjLaRrqf97eZo0D+GBETzJyCjiY76BlmIU2UaH1zlmLLO9FtFPvPhFOlVG/GWVDVw2roKAg5syZw7hx4xg2bBgWi4W7776bZ555xmOd2+3G5fLcUjdkyBBmz57N3r17efTRR2ndujU+Pj6YTCYmT55MXl4eY8eOBSAqKorhw4djNpu5+eab6dq1K88//7zmqqiowGAwYLFYKCwsxG634+XlRWBgoLamdq6Uv3/N/yiVl5djt9tp06aNtgsLanZduVwurFYrhYWFlJaWUl5eTmRkpEfDyu12YzKZ8Pf3x2azYbfbCQwMxNvb28OVk5OD1WqltLSUyspKKioqaNOmDWbzqVdZVlZGXl4eJpMJk8mkNakiIiK0Nffccw/e3t5MnTqV22+/nYSEBCZPnqw1+dxut8eusHPFaDQQEmI57/tkoSEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRjl8Ipwqoxw+Ec7LPWPmsZKzfu6odF3Q78GSQiwk/fmy8+Zyfy9/lVPvPhFOlVGfPpnQVcMKanb3zJ49+6xrQkNDufvuuz2uGQwGTCYT//rXvxg5ciTTp09n2rRpTJkyRdsttGnTJpYvX86zzz7L5MmTtXs///xzDAYDQUFBVFRUYLfbiYmJYcWKFQBUVlZy6623kpmZSdeuXQHYuXMnAM8//zz33HMPAKmpqQwePBhAm1e1f/9+zGYzKSkp2veNGDGCH374QVtz7Ngx3G433bt358MPPwSgqKiIG264gePHj2vrFi5cSFlZGevXryc4OJi4uDhuuukmli5dyjPPPKPtQjt48CBXXHEFCxYs0L6zS5cu/PDDD9jtdqxWK+Hh4cyePZsBAwbwwgsvALBjxw7WrFlDeno6GzdupEePHn/6vn6Py+XGbi/984WSYTLpf4CfyqifAayno/eaRTj17hPhVBn1mVHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMI58Xw+Xud/S9zW7yNFBY66uUGfdYs2iljRhlrFuGUNePlgNXqd867znTXsDoXYmJitBlRtRQXF5Ofn6/NtkpJSaFbt24eR9v16dOH5cuXs2zZMoYNG6Zdz8rKIiIiAl9fXzZt2oTL5fLYweXt7c1NN93EvHnzNP/u3bsBPOZOJSUlYbFYcDgcxMTEUFlZybFjx6iursZms2mNp9ocV111FVDT6AIwGk+9tODgYLp27cr3339/1pri4+NZu3YtGzdupF+/fhw6dIiqqioOHDhAly5dPJ7Rb7/9RpcuXdixYwclJSUUFBQQHx8P1OwqW7t2Lb169eKrr77S5mbVBzVQ7sw0hAF+KuP5c7EHsP4Reqv5r3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMI54X4woL8aBcdSkZOAS73qetGA7SJCqWx1feiZNVTzX+VU8aMMtYswilrRllokA2r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSqrZSJuVlcVdd93lcV+PHj0wGAwsWrSIRYsWaQ2b3NxcbrrpJu0+gJycHB5++GHS0tLw8vIiLCwMl8vFtddeC9Q0fxo3bsxnn33Gl19+SXZ2NhEREbjdbgICAoiMjOTAgQNUV1djNBp55pln2Lt3Lw6HgyuuuAJA+2dWVhb+/v5s3brVoyZfX1+AOjUtWrRI24m1YMECrFarlrv2n2PGjGHdunVs27aNqqoqKioqAJg4cSJeXl6EhoZiMBh4/vnnPY5D/OqrrwC09QqF4s852wDW4f07XppQCoVCoVAoFAqFQqFQKC6YoXe0ZfoSz7+k2iaq5i+pKhQKheLi0iAbVn369OGdd95h9+7dPPbYY/z222989dVXtG7dWjsWz263s3r1alavXs3atWsB8PHxwc/Pj6NHj3LTTTeRmJjI7Nmzyc/Pp3fv3tp9Xl5eGAwGfvrpJx555BFKSkqYN28eAC1bttTWtWzZku3btxMbG8vw4cNZu3YtOTk52qwom80GQLNmzbQdUK1atWLGjBkAJCQkaK7Q0FByc3Pr1AR41LR3715mzpzJLbfcQk5ODhaLhfz8fLZu3erxnStWrODgwYOMHz8eHx8fHn/8cQAiIyO1nVx///vfWbFiBf/4xz9IT0/n8OHDOJ1Oqqur6d+/f73fj9mshsr9noYwwE9lrB+iBrBezIwifSKceveJcKqM+swoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhPNi+YICfHju/kTyi8qwCxgDcPo/9eYT4ZQxo4w1i3DKmlE2GmTDasWKFXh7exMfH8/UqVOxWCwkJSWxadMm8vLytAaPy+XC7T61X7eiooKysjLCwsJIT08nJSWF+Ph4XC4XK1eu5JprrgHA7XbjdDpJSkpi9uzZmM1m4uLi2LNnD7t379b8ubm5xMTEYDKZmDRpEhEREQQGBlJYWOiRNzc3l+7du7NhwwYcDgcJCQmkpaWRlpbGAw88AIDD4ahTU6tWrfjtt988atqyZQsAa9asAWp2gsGpmVq1bN26lY8++qjOHKqtW7dqRwW+/vrrXHHFFSxevJicnBz8/f2prq7m9ttvx9+/fr9gNxoNFzRs8nKnIQzwUxnPD9EDWGvRU81/lVPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmi+m80h+Cft25xHR2EJEWMAF+0T+vkum9yLKJ8Kpd58Ip8qoT59MNMiGVUpKCklJSXzwwQfaNbvdTteuXbWdTFarlT59+jBixAhtzfbt23G73Vx33XVMmDBBuz5hwgRtF5bVaqWqqorWrVsza9Ysbc1nn33GqFGjSEtLo2fPngQGBnLw4EFeeOEFBg0apK3r3bs3WVlZHD58WJtZ5Xa7eeedd7Sfc3Jy6NWrF/v379e+s6SkhOTkZI+aJk6cyMcff6zVFBAQQFFREe+//752hCFA586dKS0tpbKyUvuO2iZeLbNmzeLhhx8mIyNDu+bt7c1jjz1Go0aNePnllxk6dChvv/32Be2ucrnc2O2lf75QMkwm/Q/wUxnVAFa9OPXuE+FUGfWZUcaaRThlzChjzSKceveJcKqM+swoY80inDJmlLFmEU4ZM8pY88V0lpQ5mbp4JzuzTp2G0j4mlMfvbI/Fr/5zpvVcsyifCKeMGWWsWYRT1oyXA1ar3znvOmuQDas/mk9ltVoJCwvTZjjFxMRo/7mW2mZNp06dPK7HxsYyZ84cysvLiYmJAaBJkyYea7Kzs/H29ubgwYPa5+np6dp6qGlMnThxQst47bXXYjQa8fPz0xpJtZ8B5Ofna1mdTifNmzf3+M4jR47g5eWlrQ8LC6OoqIjo6GhtTXFxMQ6HA7fbzaFDh7Q8YWFhGAynfpGelZWF0WgkLy+vzvNcvnw5MTExpKWl0bx5cxITE+usOR/UQLkz0xAG+KmM54cawCrOqXefCKfKKIdPhFNllMMnwiljRhlrFuGUMaOMNYtwyphRxppFOGXMKGPNF8P5wVc768yZ3pVdwPtf7bwoc6b1WLNonwinjBllrFmEU9aMstAgG1Z2ux2n08lDDz1EWloaFouFO+64A6vVqs1wSk5OZtq0adjtdqxWK1Czwwrg8OHD9OzZk4KCAhISErjhhhtwu93YbDYSExMxGAzk5eXx5JNPkpqaitlspqqqitDQUM3fpk0b1q1bx5YtW3jrrbfIzs4mJCQEu90O1MyS8vb21ppMb7zxBkuXLsXhcGCxWAgODqakpOYosdpj+w4cOKDV5OfnR3FxsUdNV1xxBfv372f16tUsWbKE3NxcQkJCtOdSm9/b25ujR49y8803c/z4ccLDwykvLyc8PJzi4mJt/eTJk5kyZYr2c21jbMGCBdx77731fj9qhlVdGsJ5qCpj/RnWrz0f/O5vb7WNrvnbWxf674Neaxbp1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNF8spcs60XmsW6RPhlDGjjDWLcMqaUTYaZMPK7XazaNEi2rRpw+TJk8nLy2PixIkYjaf+IAwYMIB58+YxbNgwhg4dSl5eHj/88AMGg4FZs2YxcuRI4uLiGD58OG+//bZ2n4+PD1arlf3791NUVMSQIUNYv349O3fuxMvr1Lbhrl27AvDhhx9y3XXX8be//Y25c+fWydqmTRvWr1/PvHnzePDBBzl+/DjLli3D19dXm6/VtGlTDAYDP/74I1FRUTz22GN8/fXXFBYW4nQ6Nde1117LypUreffdd/n73/9+xu/09/enqKiImJgY7r//ftasWcO2bdsICQnBx8fHY21tM+6ee+5h0aJF/N///Z/HUYLni5phdXYawnmoKuP5ExICrw+7jtz8EnJPOC7a+dino7ea/wqn3n0inCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpY0YZaxbhlDGjjDVfqPOvmDOtt5r/Cp8Ip4wZZaxZhFPWjLLQIBtW3t7eVFRUMGXKFIKDgwGorq5m1KhRmM01JQUFBTFnzhzGjRvHsGHDsFgsdOzYkS1btvDggw9qc6diYmIoLCzE7XZrx/YFBgZis9lwu91MnTqVhIQEXnjhBSZMmKA1mRo1agTUzIratm0bv/76K7169SIvL4/U1FTNFR4eDkBoaChz584lIiKC//73v7zxxht4e3trNfn4+FBRUYHNZmPq1KkkJiby97//ncmTJ2s1NWvWDIDg4GC+/fZbLBYLDzzwAN9//z1ZWVnadyYmJrJ//36ys7PZuHEj0dHRPPzww8yaNYvGjRt7PEuXy0WHDh3IysoiLi6Ovn37XtC7UTOs/hiTSf/noaqMF+4L8DFxdUI4dnvZBc2tOh291yzCuSu7gEMnHLQMs9AmKlR3+UQ4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1nyxnCLnTOu1ZpE+EU4ZM8pYswinrBkvB6zWy3yGlclkIiAgQGtWAVx33XUAVFZWatdiY2OZPXu29vNHH33Eli1baNeunXZt/vz59OvXjz179uDr66td9/HxYePGjdrPLpeLiRMnajuemjZtCkC3bt14//33tXVvv/02qamp+PvXbC+uqKjQvqdFixbaupkzZ1JWVqb9bDabCQwMJDU1VbuWm5vL5MmTtZpq8/3zn/9kxIgR2rrDhw+TlZWlZYqPj2fbtm1s2bJFm2OVmZnJrFmztEZbLb6+vrz77rvceOONDB8+nIuBOp/zzDSE81BVRv35RDj1mDGvsJTX5v5MSVmVdi3Az8wrA68mLLh+RzZczHx/hVNllMMnwqkyyuET4ZQxo4w1i3DKmFHGmkU4ZcwoY80inDJmlLHmC3X+FXOm9VbzX+ET4ZQxo4w1i3DKmlEWGmTDqrq6msLCQh544AF27tyJxWIhPj4ewGPX0u+p3an06aef8uabb1JQUEB8fDwHDx6kurqa8vJyfH19MRgMVFRU8PDDD5OWloaXlxcdOnTA7XZrxwIeO3YMgF27dtG3b1+ys7OJiIjQmkqlpTW7jGqP4Bs9ejR79+7F4XDQvn17CgoKPLKeS03l5eUAbNiwgTVr1pCbm0t0dLQ2N+vYsWPExsYSERGBzWYjOTkZu91OeHi4trPqxhtv9HgmZWVl3Hjjjbjdbt5++22+//57Jk+eXKexpVAoFKL5fbMKoKSsinFzfua9/yRfolQKhUKhUCgUCoVCoWhIDL2jLdOX7PKYZdUmKpShd7S9hKkUCoVCcS40yIZVRUUFbreb3bt389hjj/Hbb7/x1Vdf4e3tTVXVqV92Dhw4kNzcXNauXQvUNJEMBgM//fQTN910E4mJicyePZvi4mIAbDab1nAyGo389NNPPPLII5SUlPDJJ5/g5eWl7Viy2WwAHD16FH9/f4YPH87atWvZtm2bx+fV1dV4eXmxceNG+vXrR6tWrZg5cybV1dXa7qtzranWuXfvXjp37syAAQNYtGgRR48e9fg8KysLq9VKRUUFjzzyCEeOHGHx4sWYTCbuv/9+7Tvz8/Nxu90EBgYSGBhIWFgY27Zt4+6772b9+vX1fj9msxoq93sawgA/lVF/PhFOvWbckXmiTrOqlpKyKnYfLKR9TP0a6XqtWaRPhFPGjDLWLMIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmi+mMyjAh+fuTyS/qAx7eTVWXxNhwRc+T0bPNYvyiXDKmFHGmkU4Zc0oGw2yYQVgMpmIj49n6tSpWCwWkpKS2Lhxo8cxey6Xi+rqau3nqqoq3G43Xbt2JT09nZSUFOLj4ykrK9OaVlCz68jlctGlSxdmz56N2Wyme/fupKameqwDiIiIwGQyMWnSJCIiImjbti27du3SPi8tLcXpdNK9e3c2bNiAw+GgQ4cObN++HZfLc1vgudQEkJCQwIkTJ5g0aRLR0dE0b96cI0eOaJ8PGTKExx9/nAkTJjB37lxKSkowm81UVVWxZ88e7UjEI0eO0L59e3bu3Mnw4cO57777uOuuu0hPT+fQoUMeRxieK0aj4YKHV17ONIQBfiqj/nwinHrLmFtw6KyfHzlZSnLnlvX2w8Wt+Uh+Cft25xHR2EJEWMBF8+rtvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtZ8MZ2ifj+l55pF+UQ4ZcwoY80inLJmlIUG2bAyGo00b96cTz75RLtmt9vp0qWLdjwewLx58zzuq92BNHLkSK666irt+kMPPcSPP/5IUFAQUNPYCgwMZNasWdoat9tN27ZtteZR7Yyq6667jldffVVbt2DBAnbt2qXNuqrN884772h+gFtvvZXDhw+fV021s6zuuecej51SY8aMYcGCBVqm0NBQAF577TWcTiffffcdb7zxBk888QTHjx/X7quqqiIsLIy9e/dq12qbeYcPH65Xw8rlcmO3l573fZc7JpP+B/ipjPrziXDqNWNE6Nn/h7x5I39dDMYtKXMydfFOdmadOlqifUwoj9/ZHoufV729en0vIn0inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinrBkvB6xWv3PeddYgG1Z/RO1RfRdr7ZnWuN3uP7xenxz1df3+vjOte+ONN1i5ciUzZ87k0KGa3QsxMTHa53fffTcvvvgiq1atokePHhw9epQ1a9YA0KRJk3Ou4/eogXJnpiEM8FMZ9ecT4dRbxjatQgnwM//hsYABfmYSWoboYjDuB1/tJCOnwOParuwC3v9qJ8P7d7wgN+jvvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPK5XJx5MgRHnjgAXbu3InFYiE+Ph4Aq9V6xvtqdzi99dZbHDx4kIKCAuLj48nOzgZOzbAym82cOHGChx9+mLS0NLy8vOjQoQPV1dXaLqbS0ppdRKmpqfTt25fs7GwiIiIICKg5GsrLy8sjzzPPPMPevXtxOBy0b9+ew4cPexwJeC41eXt7A/Dll18yb948cnNziY6OxuFweGQCmDx5Mh9//DF+fn78+9//xuVy0aNHD6KiorQ1t99+Oxs3buQ///mPds3f35/AwEBatmx57i/kd6gZVnVpCOehqoz684lw6jnjmIe7MmbWVo+mVYCfmTEPd72g/165WPmOnnR4DO2txeWG9OwCTtjLaRrqf0kzinTKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKesGWWjQTasAKqrq9m9ezePPfYYv/32G1999RXe3t74+Z06VmrgwIHk5uaydu1aAMxmMwaDgS1btnDTTTeRmJjI7NmzPY4RBPDz88NoNPLTTz/xyCOPUFJSwieffIKXlxeBgYEea48cOUJsbCzDhw9n7dq1bNu2zeNzf39/vLy82LhxI/369aNVq1bMnDmTqqoqjEbPP7jnUhNARkYGnTt3ZsCAASxatMjjaEGAZcuWMWXKFHx9fXnkkUdYsmQJhw8fJicnh+PHj2u7p7p3787JkyeJiYmhY8eOpKamcvz4cRo3bozZXL8/GmqG1dlpCOehqoz684lw6jFjSIiFBeNvI23vcfb8VkB8q1A6xdV/t+fvudB8mcdKzvq5o9J1wf/9p8f3Itonwql3nwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcKqM+vTJRINsWPn4+FBZWUl8fDxTp07FYrGQlJTExo0bPRotLpeL6upq7Wd/f3/cbjdXX3016enppKSkEB8fT2lpKSUlJR4zplwuF126dGH27NmYzWa6d+9Oamqqdhxf7dqmTZtiMpmYNGkSERERxMXFsXfvXu1zk8mE0+mke/fubNiwAYfDQYcOHdi2bRs+Pj7nVVOtMy4ujhMnTjBp0iSio6Np2rQpx44d0z5funQpAOXl5UyePFn7jsOHD/PBBx8wZswY3G43J0+exGQyceTIEXJzc2ndujXh4eHs3LmTDRs20LNnz/N+N2qG1R9jMun/PFSVUX8+Ec6GkDG2WSCd4ppgt5fVe26ViHz+Xmc/8tXibdTFnC1RThkzylizCKeMGWWsWYRT7z4RTpVRnxllrFmEU8aMMtYswiljRhlrFuE8XlRGcXk1Vl8TYcEX/svjhlCzyqjPjDLWLMIpa8bLAav1Mp9hZTKZCA0N5ZNPPtGu5ebmcsMNN1BZWaldmzdvnsd9VVU1x0zdf//99OnTR7ver18/9uzZg6+vL1AzI8rHx4dZs2Zpa1wuF23atMHpdAI1jSqAdu3a8f7772vr3n77bfbu3asdHVhRUQHAq6++SosWLbR11113HWVlZedVU22+66+/nhEjRmjrnnzySY4dO+aR6ZdffqFfv37Mnz+fmTNn0q1bN+68807Ky8s1N8Add9zBhAkTNNfq1at56qmn2LRpU70aVqBmWJ2NhnAeqsqoP58Ip4wZL9QXFuRHu+hQMnIKcJ02StBogDZRoTS2+upizpZop4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPh1GPGkjInM5bu8jjSvV10KEPvaIvF1+uS5/srnCqjHD4RTpVRnz6ZaJCHKVZXV1NUVORxlN/GjRuBU3Oe/ojanUoZGRnaNafTyZEjR6iurtaaOQaDgYqKCnJycrR1mzdvxu12a7Opjh07BuCxBmDfvn3AqXlStbuoNm/erK2x2WwUFhZy+u6vc6mpNt/+/fs9vjMrK8sjU1ZWFv7+/syZM4eJEyfSrVs3AGJiYrS1FkvNsVV5eXkertpGVm1zT6FQKBSnGHpHW9pEhXpcaxNV8//4KBQKhUKhUCgUCoXi0jNj6S4ycgo8rmXkFDB9ya5LlEihUCgU50qD3GFVUVGBwWDgmmuuISAggKuuuopffvmFoKAgj0bL72dYlZaWYjAYmDlzJrNmzSIqKorGjRtrO51sNhu+vr5UV1djNpu59dZb8fPzo3379uTk5BAaGorBYNDWAhw4cICEhATCwsJo27Yt69ev9/jc6XTi7e3NK6+8wtixY7nyyisxmUz4+PhoDahzranWuX79ehISEmjevDnR0dFkZ2drnx86dIgffvgBh8OB0Whk3LhxfP755wwYMACn00lBQc3/YAcHB2M2m9m4cSMJCQkEBQXRqVMnfvrpJwBCQz1/IXs+mM0Nsg8qlIYwwE9l1J9PhFPGjBfTFxTgw3P3J5JfVIb9Ih8tcfo/LwZ6fo6inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzyljzxXIePenw2FlVi8sN6dkFnLCX0zTU/5LlE+1UGfWZUcaaRThlzSgbDa5hZbPZcLlchIeHExoayoEDB0hNTSU+Pl47rq+W38+w+umnn3C73Vx//fXs3LmT7OxssrOzGThwIB9//DFQ02DKz8/Hz8+P2NhYdu/ezdatWwkPD6dNmzbaDKs9e/YA0KFDBwoKCsjNzWXdunXceeedLF68WPvOtLQ0nE4nSUlJ/Prrr+zZswez2Uz//v1ZuHDhedX0yy+/ANC9e3cyMzM5cuQIhw4d4sEHH2Tu3LkAOBwOrWaXy0VRURFbtmxhy5YtADRq1EhzVVVVYTAYsFqtFBcXs27dOozGmn+ZaneSnS9Go4GQEEu97pWBhjDAT2XUn0+EU8aMF9Mn6r/nZHwvIpx694lwqoxy+EQ4ZcwoY80inDJmlLFmEU4ZM8pYswinjBllrPlCnZnHSs76uaPSdcH//5zeav4rfCKcMmaUsWYRTlkzykKDa1jVNnl69erFf//7XwA+++wzxo4dS0hICEFBQdra02dYVVRU8PPPPwMwefJkfHx8qKys5NZbbyU9PR2DwUBQUBCrV6/G6XTSvXt3ZsyYAUBqaiqDBw8mKCiI2NhYAJYvXw7AU089xXXXXQfAiBEjtO8ICgri2LFjZGdnY7FY+OijjwAoKirihhtuYN++fVrWc62pdqfYK6+8QkxMDAADBgwgLS1N+87Y2Fh69uzJsWPH+Oyzz4CaYwt79epFp06dcLlc2jNISEggNjaWb775RjvusLbZFRYWVq/343K5sdtL63Xv5YzJpP8Bfiqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhPN4URnFF3GHOuj/OTaE96Iy6jOjjDVfLKe/l+Gsn1u8jRQWOurl1mvNIn0inDJmlLFmEU5ZM14OWK1+57zrrME1rFJSUggODtZmLQH07t2bUaNGcfLkSa2R83u2b99ORUUFANnZ2cTHx+Pt7c3NN9/MF198QUREBL6+vqSkpGC1Wjl+/Lh2b1JSEkFBQfz222/cfPPNVFZWsmPHDoxGI1lZWVrDqk+fPlojKyYmhtTUVNxuNw6HA5vNRlBQEMHBwSQlJfHzzz9z5ZVXnnNNhw4d0jJlZWVpdfbp04cJEybg5eVFixYttO/etGkTbrcbg8FAcHAwUDOv6pprrgFg9+7d3HXXXYwYMYL//ve/5OfnU1FRwT333APAVVddVe93pAbKnZmGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLK4RPh1GPGkjInM5bu8jiiq110zQxQi2/9Tvb4PXp/jnp8L6J9IpwyZpSx5gt1hgX50S46lIycAlzuU9eNhpr5w42tvhecV281/xU+EU4ZM8pYswinrBllQXeHKWZmZvLQQw/RsWNHkpKSePPNN6msrNQ+z8rK4sorr+THH3/EbrcDYLVaCQwMxO128/bbb9OhQwf69++vHaFXex+Av78/Tz/9NJ06daJr1678+uuvlJSUkJSUpK2LiYlhz5493HrrrbRv355bb71Vmzl1/fXXc/DgQaqqqoiLi+Ojjz4iKSmJjh07MnPmTACaNWtGZGQkWVlZhISEYDAYuPfee7WacnNzKSwsJDk52aOmlJQUbrrpJtq3b8+//vUv/Pxq/tZZUlKSlj8iIoKJEyfStWtXOnXqxKpVq3C5XFx11VV4e3sDkJycjM1m44cffiAtLY3u3bsDcPToUe07KyoqcLvdvPLKK/Tu3Zt//vOfjBs3DoCmTZuesfGnUCgUCoVCoVAoFArFuTJj6S4ycgo8rmXkFDB9ya5LlEihUFzuDL2jLW2iQj2utYmqaZQrFAqFQt/oaoeVzWZj4MCBREVFMXnyZPLy8pg4cSLl5eWMGjUKALvdTteuXcnJyWHYsGEMHTqUvLw8SkpqzqgdPHgwcXFxzJ8/n3vvvZemTZuyfv167HY73t7e+Pv7k5OTw913301YWJjWZOrXr5/mj46Oxu12U1BQwLBhw0hPT2ft2rVYLBY6dOjAtm3bgJrm1+7du+natSvJycnaEYTXX3+95rJarZSVlZGTk8MDDzwAwJw5c4Ca4/xq14WEhFBZWUl1dTVPPfUUGzZsoKysjMjISMLDw7UZVH5+fmRmZnLLLbfQpk0bPvzwQwBuu+027Tl26tSJsLAwhgwZAoDBULMd+oorruCWW24BoFWrVixYsACXy8WAAQMwm83Mnz8fgPj4+At6j2az7vqgl5yGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNV8s59GTDo+dVbW43JCeXcAJezlNQ/0vacaG5BPhVBn1mVHGmi+mMyjAh+fuTyS/qAz7RTyKVM81i/KJcMqYUcaaRThlzSgbumpYLVy4EIfDwZQpU7Rj7Kqrqxk7dixDhw4lPDwcAF9fX+bMmcO4ceMYNmwY/v7+uN1u4uLiGDRoEACdO3emc+fO2Gw2ze9yuThx4gSDBg1i1apVFBQUEBISwvHjxz3W7dmzh7Zt29K8eXOmT5+O2WzG19eX2vlOtaSlpXHffffx888/8+6772pzn4qLi7U1drsdg8FA//79WbZsGQ6Hg5CQEAoKCigtLSUwMBCArVu3cv3111NZWcmUKVOwWCyYTCacTqfHd2ZmZjJo0CDWr1/P+vXrady4MSUlJRQWFnqsmzlzJmPGjPHYZfbCCy9gNte88uuuu45du3YRFBTEp59+itFo1Oo7cuTIeb230zEaDRc8vPJypiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDj1ljHzWMlZP3dUui7K//+o9+eot/fyV/hEOGXMeDF9R/JL2Lc7j4jGFiLCAi6aV8/vRdTvp/RcsyifCKeMGWWsWYRT1oyyoKuGVUpKCt26ddOaVVAzy2n06NFs3LiRfv36YbVaKS4uJjY2ltmzZwOwadMmBg0aRNu2p7b2ent7c99997F27Vqg5tjAqqoqWrduzYsvvsiLL74IwGeffcaoUaNIS0ujZ8+eBAYGcvDgQR566CGt+VWbIysri8OHDxMUFATUNMCefvpp7eecnBx69erF/v37te8sKSkhOTmZ0aNHM3r0aAAmTpzIxx9/rNUUEBBAUVER//znP7npppu07+zcuTP5+flUVlZq32GxWHjhhRe0/Bs3buThhx8mIyPD41leccUVFBcX88orr2A2mxk9ejQrVqygR48eADRp0gQAh8NBVVUVXl5ePPHEE7z//vvk5eXV8w2Cy+XGbi+t9/2XKyaT/gf4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUseaL5fT3Mpz1c4u3kcJCR73coP/nqNf3ItInwiljxovpKylzMnXxTnZmndrt2D4mlMfvbI/Fr/5z5NR7kaNmEU4ZM8pYswinrBkvB6xWv3PedaarhlVWVhZ33XWXxzWr1UpYWJg2wykmJkb7z7XUNms6derkcT02NpY5c+ZQXl6uzWSqbdTUkp2djbe3NwcPHtQ+T09P95jh5Ha7OXHihJbx2muvxWg04ufnpzWSaj8DyM/P17I6nU6aN2/u8Z1HjhzBy8tLWx8WFkZRURHR0dHamuLiYhwOB263m0OHDml5wsLCtCP+ar/TaDTWaTLNnTsXk8nEvffey5IlSwA4fPiw9rnT6cRkMpGamkpeXh4RERFUVVXx9ttvU1lZSXl5Ob6+vtQHNVDuzDSEAX4qo/58IpwyZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVEOnwin3jKGBfnRLjqUjJwCXO5T142Gmnkyja2+FyWv3p+j3t7LX+ET4ZQx48XwffDVzjpz5HZlF/D+VzsZ3r/jBblBvRe9OlVGOXwinCqjPn0yoauGVe3Mp98TFBSkHdmXnJzMtGnTPNZu374dODU7qhar1Yrb7cZms5GYmIjBYKCg4NT/SDudTtasWUNQUBAbN26kY8eO2menHxG4adMm7Ha7dt3b21trMs2YMYNPP/2UgoIC/P39CQoK0uZp1e5mysnJ4cknnyQ1NRWz2UxpaSlWq1X7jiuuuIL9+/ezZcsWnnnmGbKzs7WjAmu/MzExEV9fXxwOB2+88QZLly7F4XBgNptp3LixxzGEeXl5TJ48mZiYGDp37ozRWNO9bNasmbamVatWVFdXc/fdd1NQUIDBYMDf3x+j0YjL5cJut9e7YaVmWNWlIZyHqjLqzyfCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxpovpnNYv/Z88LvdHW2ja3Z3XOj/76j356jn9yLKJ8IpY8aL5RM5R069l4uDynhx0HtGGWsW4ZQ1o2zoqmF1LgwYMIB58+YxbNgwhg4dSl5eHj/88ANGo1GbcQUwcOBADhw4oP3s4+NDUFAQe/bsYc6cObRu3ZoFCxZQVFREZWUlfn5+TJ48mc2bN/Phhx8yZswYAgICKCsr480336R79+78+OOPmq9NmzasX7+et99+mwEDBuBwOFiyZIk2IwqgadOmGAwGfvjhB5o0acLgwYNZsWIF2dnZlJWVaeuuvfZaVq5cydixY7n++uvp0aMH8+bNw+0+7a+gAS1atGD//v188sknPPDAAxw4cICUlBQqKiq05tLkyZNZunQpTqeTqqoqBgwYwOLFiwEoLT11VJ/VasXb25uSkhL69u1LdnY2W7du1b7z9F1c54OaYXV2GsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUQ6fCKceM4aEwOvDriM3v4TcE46LPj8H9P8c9fheRPtEOGXMeKG+v2KOnHov+nSqjHL4RDhVRn36ZEJXDava+VS/x2azaUfvBQUFMWfOHMaNG8ewYcOwWCwkJiayadMmKioq8PHxAWrmS1VVVWEwGLR7IyMjcTqdzJo1i4KCAhISErj99tv5/PPP6dq1K9dddx3NmjXjww8/pKSkhGeeeQYvLy9uvvlm7r//fvr166e5WrVqBUBwcDCLFi0iIiKCV199lTfffJOqqiotu7+/Pw6Hg4qKCmbOnEliYiIPPfQQL7/8MhUVFcCpnU/+/v5s2bKF9PR0/vWvf7Fz505++ukn7TubN2/O/v37CQgIYO7cuURHR/PWW2/x3HPP4XQ6AfD19dWON8zKyqKkpIRWrVpRVFTEt99+y2+//UarVq3w9/enVatWHDhwgIULFwI1DbGKigry8/M95oidD2qG1R9jMun/PFSVUX8+EU4ZM8pYswinjBllrFmEU0TGXdkFHDrhoGWYhTZRoRfsawg1q4z684lwqoz6zChjzSKcAT4mrk4Ix24vu6C5Vaej9+fYEN6LyqjPjBfLJ3KOnHovctQswiljRhlrFuGUNePlgNXaQGdY/dF8quLiYvLz8z1mSsXGxjJ79mzt502bNrFp0yays7OJj48HYN68eUycOJE1a9Zou49iY2PZt28fGzZs0O697777MJlM2n0tW7bEbDZTVVXFqFGj6NevHwDr1q3TMgLaMXvvvfceXbp00XwfffQRR48e1X729fXFYDCwZcsW7Zrdbufll1/Wjg6MjIwEoHfv3rz++uvaupdffpmffvpJ+67aXU8rV670aCiNGTNGa1g1atRIu+50Ojly5AhHjhwBapp4I0eOZNGiRSQkJLB8+XLcbje//fYbbrebqKgo/va3v+Hv74+XV/0Hb6rzOc9MQzgPVWXUn0+EU8aMMtYswiljRhlrFuG8GL68wlJem/szJWWn/nJQgJ+ZVwZeTVhw/Y6zOR091izaKWNGGWsW4ZQxo4w1i3DKmFHGmkU4Zcx4ob6/Yo6cei/6dKqMcvhEOFVGffpk4qI0rA4cOMChQ4c85j6dzj/+8Y9z8iQn151PtWrVKoxGI0lJSR5rMzMzGT9+PGlpaVqDZfny5VrjqXY+VXJysod/6dKlvPHGG6xcuZKCggKcTicul0ubf+Xt7c211/4/9s48Lqp6///PmYFhGRwWRQx3MAEXRE3NjRat1G56M7tZplbmpSK7V/O2+C3TLDNvZmVqWpq7Vje7LmlmlpFmVmqK+wK4gCDIMjAsAzPz+2N+HJ1QbzLzsYOf83o8eiTnfM7zvF7ngzjDez6f98389NNPzJs3jylTpuDr60twcDAtW7ZUikvV/tavX8+UKVNIT08nIiKCs2fPUlVVRXl5Of7+/vj4+FBYWMirr77Kpk2bsFqtREZGAijb71X//9ixYzz66KPs2bMHk8mkHD99+jQtW7ZUCm9Dhw4lLy8Po9FIq1at3Fal9e7dm8DAQMLCwrDZbBQUFBAQEKA809atWytjZ82axfvvv19jHm666aY/NF+aNGnSpEmTJk3XUr8vVgGUlFUxZfGvvPePxMtcpUmTJk2aNGnSJK+SBrVl3poDbr2s2rQII2lQ2z/RlSZNmjRp0nRpeVSwOnXqFP/617/Yt29fjX5L1dLpdH+4YHWp/lTTp09n6NChbv2phg0bxu7du+ncuTOzZs0iJyeHyZMns2DBAsLDw936U40aNUq57q677uKNN95g4cKFDBkyhIYNGzJnzhx0Oh2hoaHKuL///e9s27aNrKwskpOTOXDgAF9//TVt2rRx86vX61m1ahVdunThmWeeYcWKFcp2gEVFRfj7+ysrlVatWsXw4cMB1+ovvV6P1WpVxgLs27ePyMhIkpOTSUlJ4eeff3Y7b7fbAcjNzeXhhx8GYPHixcCF/lTh4eGUlZXhdDp5+umnadGiBf/5z39Yu3YtNptNWa0F8Ouvv2IwGBQuuLYdfOedd/7QfF1OnjbOvR5VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUxv8fadyKtRrKpWSVkVh04V0D6q/iXP/y+pNbNIpoweZcwsgimjRxkzi2DK6FHGzCKYMnr0Ji84yI/nhnUit7AMS7kds7+B8BDPe6to8+IdaR69I7V7lDGzCKasHmWTRwWriRMncvToUSZMmMBNN92krDqqrS7Vn2rIkCGMHTvWbVx2djZOp5P3339f2RqvqqqKSZMm8eGHH1JUVERcXBwLFiygadOmynUOh4PS0lJatmzJhg0b8PHxQafTERQUxIIFC5g0aRLgKgiBa7XV+++/T2RkJI888giLFi1i3759xMfHK7yGDRuSlpZGamoqnTp1IiQkhIMHDyr3dDqd2O12oqKiWLVqFSaTiaFDh7JixQqys7Pdcvn4+KDX63n33Xdp2bIl9913H59//rlSsDKZTDRr1owuXbooxbHu3buzfft2ZUtAcBXShg4dygMPPABAZmYma9eupby8nPz8C5+oKS8vx+Fw4OPjQ3BwME2aNOHAgQNs3bqV+++/v1ZzqNfrPG7YeT2rLjTw0zyqjyeCKaNHGTOLYMroUcbMIpie8rLyT1/xfOb5UhI7N/PoHmrLfC2YMnqUMbMIpoweZcwsgimjRxkzi2B6k7f7yDmO/HSS2OZhdIxp6DWumjOL+l2NbN87IngimJpHOXgimJpHdfJkkkcFq927d5OUlKSsHPKGft+f6lJq1KgRMTExbn2cBgwYwKRJkxg3bpzSd+pSfktLS5k5cyZxcXEAdO/encaNG5OSkqKMS0lJwcfHhwcffJDx48cDrsLTf//7X77//nvi4+MxmVz/0I8cOZLHH39cuXby5MkcPHiQkpISt1Vhq1atIjg4WPl6/fr1ylZ+1cfbtWvHJ598oozZv38/n3/+OadPu35BExYWRllZGVOnTnXrddW7d28KCwuVr4ODg922CRw8eDAdOnRgwIABbiusevXqxdGjR9mzZ49ybOrUqUybNo3BgwdjMBgu+RyvJIfDicVSetXXXe8yGNTfwE/zqD6eCKaMHmXMLIIpo0cZM4tgeosXGXblF/yN6weqpmm4TPMikql2ngim5lGdHmXMLIIpo0cZM4tgepOXk1/K5I9/rtEPc9JjXWkYWvt+mGrOLIopo0cZM4tgyuhRxswimLJ6vB5kNgf84VVnHhWsQkNDqVevnieIWiktLY377rvP7ZjZbCY8PJy0tLQrXgcQFRWlHIuKisJqtZKVlaX0nTp27BhVVVVu43Q6HS1btlQY1QWrgAD3X56Ul5cDrlVN0dHRBAQEYDAY3IpVTqeTiooKdDodAM2aNbsk69y5c27MqKgo8vLyKCoqUngWi4W8vDy34lhUVFSN57B9+3YA2rdvX8PvzTffjMVioUWLFiQkJFBSUkJ+fj7h4eGXfZZXktZQ7vKqCw38NI/q44lgyuhRxswimDJ6lDGzCKanvDbNwwgK8LnktoBBAT7ENQtVXdNwGeblWjDVzhPB1DzKwRPB1DzKwRPBlMXj74tV4NpaeNLCn73SD1ONmUUzZfQoY2YRTBk9yphZBFNWj7LIo4LV0KFDWbt2LcOGDavVapzaymKxcKntB4ODg5Xt8y53ndFoxM/PTzmWmJjI+++/j9PppFu3bgQFBVFSUoJOp6Nnz56X5UdGRgLw+eef8+GHH5Kfn09sbCxnzpwBLvSdql+/PsePH+exxx5jz549+Pr6Eh8fT3l5ufLMjEYjOp2Oo0ePMnDgQNLT04mMjCQkJASj0YjD4frm7tWrF3q9nrFjx3LkyBGsVitBQUE4HA4GDhzolmnu3LkMHz6c1NRUAgIClEyDBg1SxjVr1ozx48fTpk0bdu7cydy5czlx4gRGo9Gtp9fVSuthVVN1YT9UzaP6eCKYMnqUMbMIpoweZcwsgulN3qTHujJp4aU/Fe3J6w81ZxbFlNGjjJlFMGX0KGNmEUwZPcqYWQTTWzytH6bmUW08EUzNozo9yphZBFNWj7LJo4JVixYtcDgcDBo0iPvuu49GjRpxqcLVnXfe6clthGrAgAHMnDkTgOHDh3P+/HlWr15NcHCw26qlkSNH8ttvv9G5c2fAVWQCOHDgAH379qVTp04sWrSI8+fPu/GrC1u//PILjz/+OCUlJSxbtozAwEC3vlM6nY7z588TEhLCuHHj2Lx5M7t27XLb9rBRo0a0aNGC7du3M3jwYIqLi9m8eTM6nY4HH3zQLdM777zDoUOHGD16NCtXrsRmsxEVFaVkOnz4MOvWraNfv37YbDY++eQT/Pz8qKioAFyrwGojrYfVlVUX9kPVPKqPJ4Ipo0cZM4tgyuhRxswimN7ghYaaWPna3ew5co7DJ/Ol6jshiimjRxkzi2DK6FHGzCKYMnqUMbMIpqc8rR+mGKaMHmXMLIIpo0cZM4tgyupRFnlUsBo7dqzy5zfffPOSY3Q6HYcOHfLkNjVkNpvdejRV6+Kt8i53nc1mo6KiQllltWHDBnx8fKisrGTp0qWYTCaCgoIoKioiJydHKfA4HA4cDofCr96+r3Xr1uzfv5+UlBRiY2OprKykoKBAGVdQUABAly5dWLRoET4+PvTo0YNt27a5eTUYDNSrVw+DwcCMGTOIjIwkPj6eAwcOKOOys7NJT0+nR48ebN68meLiYiIjI8nPz2f9+vWMHj1ayWQ0GomJiWH27NnY7XYSEhLYt2+fkqlBgwaYzWbmzJlDdnY2Op2O4OBgAgICKCws5NSpU0RHR1/13Gg9rC4tg0H9+6FqHtXHE8GU0aOMmUUwZfQoY2aAddvTOXyqkDbNQ7i7R0uPeSI8Bhr13Ng0FJNRX+u+VRerLsyL5lF9PBFMzaM6PcqYWQRTRo8yZhbB9BZP64epeVQbTwRT86hOjzJmFsGU1eP1ILP5GvWwWrJkiSeX11qX6tFUXFxMbm6uW9+pS10HkJ6eTmxsLAApKSnccMMN2O12vv32W8BViNuwYYOykglcWW+++WaFkZeXB0Dv3r157rnnlHs89dRTbNmyRRlXWFgIwMyZM5XCk9PppH379kofLJvNRlVVFREREaxbt05hrV+/nmeffVZZZbVt2zacTiejRo3i6aefZsiQIbz++us8/fTTpKSkKAWrlJQUevbsSbNmzdi7dy8LFiygbdu2dO3aVcnUoEED3n77bU6dOsXAgQNZtWoVixYtYseOHVczFZeUtj/n5VUX9kPVPKqPJ4Ipo0cZM4tgyuhRlswHM/J5a9VvytepJ87zybcneO6hBGKbhXno0DseS8oqmb/2APvT85Vj7VqGkTSoLSZ/X08tqnJeRDNl9ChjZhFMGT3KmFkEU0aPMmYWwfSUp/XDFMOU0aOMmUUwZfQoY2YRTFk9yiKPNlPs2rXrH/rP20pMTOTHH3/EYrEox7766iv0en2NvlMXq1OnTgQFBbFx40bl2IkTJ8jPzycxMVE51qdPHwD27NmjHNuxYweFhYXccsstAJw+7VpKvnv3brd75ObmAtCgQQMAysvL0el0fP3118oYi8WC3W6nXr16AJw6dQqn08nJkyfdMlXfo2FD11Y3aWlpBAcH8+yzz3LzzTczefJkAKKjo90KeGlpaVgsFhYtWsS0adPo3r07ZrOZ8PDwGoW+119/nUGDBikFvOLiYsxmM82aNbvsc9SkSZMmTZo0XX+6uFh1saavuPTxP0Pz1x7gYEa+27GDGfnMW3PgT3KkSZMmTZo0aapLennkTQQFuH92OyjAh5dH3vQnOdKkSZMmTZo0XSyPVlhdrOPHj5OZmQlA48aNadWqlbfQNTR06FAWLVpEnz59KC8vx2g0YrPZuP/++2v0ncrKymLz5s0A+Pn58fe//513332XFStWUFZWRmVlJT4+PowaNUq57q677uKFF15g9erVrFmzBr3eVdfr1asX8fHxgKvo5OPjw969e+nWrRslJSWYTCaKiooA1/aE/v7+lJWV0bJlS1599VWmTZuGzWbD398fg8GgrJyqvsbPz69GJnBtqwhw7tw5iouLMZlMHDx4kA4dOhAZGUlUVJSykgtcq7p++eUXBg4cSJMmTRg2bBi//vorwcHB5OTkKOO6dOmiFMhWrVqlHO/Xrx++vrX/lLInTc+vV9WFBn6aR/XxRDBl9ChjZhFMGT3KlHnND2lXPL/xp5Pc06t22wN6y+PZ81a3lVXVcjhhf3o+eZZyGoUF/qkeRfFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9CbvhgZBzHn2Vg5m5HMq10qzcBNtWni+klzNmUUxZfQoY2YRTBk9yphZBFNWj7LJ44LVN998w7Rp05RiVbWaNGnCCy+8oKxW8racTqfbny/+uloOhwO73X7Jay++5lLX/hFVF5L+yPV/5H5Op9ON+ftx1SuzLBaLUmg6deoUp06dqsEBWLt2LWvXrlWOFxUVcfbsWQAqKiooLXX1mvL19UWn0+Hn54evry8TJ078n3kuJ71eR2ioqdbXX++qCw38NI/q44lgyuhRxswimDJ6lCHz0TNFVzx/+HQhIzz8991TjyeyS6543mpzePwaRG3zci2YMnqUMbMIpoweZcwsgimjRxkzi2B6k9cz1MTl9+epvdScWRRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9//33PPPMM0RGRjJ27Fiio6MB1zZ7n376KWPGjOGDDz5w227PG1q1ahUVFRV89913yiqlTz75hMmTJ/PUU08pq6yWLl3qdl1FRQXz589n9OjRjBs3DoDu3btjs9lYsGABkyZNAmDTpk1UVlZy//3389prrwGu/lGjRo1i3759xMfHYzabqayspEOHDnz66afKPYYMGUJqaqrSryogIIAjR44wefJkHnjgAcC1Aqp79+7KqqjqsTabjZSUFCXTnDlzePfdd5UCVKtWrdi2bRv9+/dnxowZyj0TExPJz7/wiePQ0FCGDBnCmDFj+Mtf/kJSUhITJkwgKCiIhIQEABYvXozJZFLuCTB58mQOHTqEr68vNpsNo9F41XPjcDixWEqv+rrrXQaD+hv4aR7VxwM4V1hGcbkds7+B8BDP/7HT5kWOzCKYMnqUKXPrJsGknjh/2fOxTUP+9Cbkgb66K543GfV/ukdRPBFMGT3KmFkEU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9Xg8ymwP+8KozjwpWc+bMISYmhuXLlxMYeGELlj59+vDwww/z0EMPMXv2bK8XrFJSUujevbtS2AHo378/r7zyCtu3b2fw4MGXvG737t2UlJTQv39/5VhUVBTnzp1TijYAW7ZsAVw9r6rVs2dPQkJC+P7774mPj1d6PHXu3NntHtX9pvLy8mjSpAn+/v44nU769eunjAkODsZgMFBcXAxAs2bN0Ol0NGvWzC1T9T2q+2KFhIRgt9uVPlrVCgkJ4dy5c0qRKSoqirS0NBYsWIDZbGbw4MFMmDCBkpISoqKiAFefq+qtCLt06eLG69KlC5MmTeLBBx+85HP8X9Iayl1edaGBn+ZRHbySskrmrz3gtv1Vu5ZhJA1qi8m/9lt2VkubF3UyNY9y8EQwPeXd3b0Fn39/+W0B+9/c/E9vQh4eHEC7lmEczMjHcdEidL0O2rQIo4HZ/0/3KJongimjRxkzi2DK6FHGzCKYMnqUMbMIpoweZcwsgql2ngim5lEOngim5lGdPJnk0WaKR44c4a9//atbsapagYGB3HvvvRw5csSTW1xSaWlpSuGlWmazmfDwcNLSLvyy5cSJEzz66KMkJCTQs2dP5s2bB+B2bWJiItnZ2WRmZlJeXg7A3r17AZg5cybx8fE88MAD7N27l5YtWyr8+vXrA7BhwwY6duxI165defHFF0lNTVU8AkoB6oEHHqB9+/bcddddTJ8+ncrKSqxW16eAjUYjBoOBzMxMevbsSUJCAo8++ijr16/HYDBQUFAAXCiGvffee0qmKVOmkJGRgdPp5PTp00qm7du3M2/ePO666y5iY2MB1xaGPXu6Fr0PHDjwks/WYDCwZMkSbr/99quYEU2aNHlb89ce4GBGvtuxgxn5zFtz4E9ypEmTputdzz2UcFXH/wwlDWpbo89EmxauYr4mTZo0adKkSZMmTZo0adKkqW7LoxVWfn5+yiqdS6moqAg/Pz9PbnFJWSwWzGZzjePBwcGKn6KiIkaOHEmLFi2YNWsWOTk5TJ48Gb1e7+Zp6NChfPTRR9hsNjZv3ozNZiMrKwuAUaNGKSvIHnzwQXx9fZXiXPXqqOzsbO6//37Cw8NZsGABVVVVyv3B1UcLID8/n+TkZPbv38/ChQuJiIhw28avqqqKqqoq2rVrR2JiIsuXL+fs2bM0bNhQYVVUVACQlZXF8OHDAde2hz4+Pm73HDp0KLNnz8ZkMvHxxx8TFBRESUkJCQkJynaJ1au3EhMTSUxMpLKyko8//phz586xf/9+unXrVuv58fHRmsr9XnWhgZ/mUT28s+etbiurquVwwv70fPIs5TQKq/lBgWvpUSRT7TwRTM2jOj3KlrldVAOWvNSXL39M5+DJQto0D+HuHi095nrTY3CQH88N60RuYRkWL2+XevH/1cYTwZTRo4yZRTBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT86hOjzJmFsGU1aNs8qhg1a1bN5YsWULv3r3p2LGj27m9e/eydOlSZUXPtdaqVauwWq28//77yiqnb7/9li1btpCTk6MUboKDg0lOTuaNN97g//7v/zCZTDidTmJjY3nkkUcA17Z/nTt3VopRAD///DMAAwYM4IcffiA/P58mTZq4rfAC1yo0o9FIt27dmDdvHj4+PjRt2pSSkguNw7OzswHo0KEDhYWFvPvuu0RERGA0GrHb7cq4HTt2ADBo0CDWrVuH1WqlefPmpKenu90zNTUVnU6Hn58fOTk5+Pq6tg+7/fbbazyn++67T9mu8OjRo2zevJm5c+cyYsQI5bqrkV6v87jh+fWsutDAT/P45/NOZJdc8bzV5vD475k2L+pkah7l4IlgepP38N3tvMa6WN70KOq1hprnRRRTRo8yZhbBlNGjjJlFMGX0KGNmbzIzc0s4eiiHyAYmIsODvMKsltqfo5rnRRRPBFPtPBFMzaMcPBFMzaM6eTLJo4LVv/71L4YOHcpDDz1EfHw8LVu6PoWbnp7Ovn37qF+/PuPHj/eK0YtlNpuVFU4Xq6ioiODgYODSfa46d+7Mli1b2Lp1Kw888IByPDAwEJ1Ox88//8yePXt45JFHCA8PV84bjUYeeughVqxYofAPHHBtyzV48GBmzpwJgNPppEuXLhQXFxMcHIzNZiM3N5eAgABmzZql8LZs2cJTTz1FaGgoANu2bQMgISGBCRMmKOOefvpptm7dqtzz2LFjAIwePZo33ngDcK02q+5BVT3utdde49577+WLL75g6dKlrFixgi+//JKqqqrLrk4DmDZtGrfccgv//Oc/OXXqFNHR0VeYhUvL4XBisZRe9XXXuwwG9Tfw0zyqhxfoq7vieZNRT0GBtVZsbV7kyCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYMnqUMbM3mSVllcz9IpXUtAu7T7SPCuOpe9tjCvCsr6/an6Oa50UUTwRT7TwRTM2jOj3KmFkEU1aP14PM5oA/vOrMo4JV06ZNWbt2LfPmzSMlJYUNGzYAEBkZyYgRI/j73/+u9HrypqKiomqsZCouLiY3N1fpT5WWlsZ9993nNqZNmzYA/Pbbb24Fq7S0NCIjI/H391e4ubm5Ne5ps9mUrfTOnTuHTqcjLS2N3r17A64eUeHh4RQXFxMVFcWpU6dwOByUlpa6FdOqC0HVRbG0tDR8fHzIzMx0u2eTJk2orKxUMlV7uriHl9lsJigoiNLSUpo2bQq4CobVq66GDRum8N59913effdd9u3bpxybNGkSY8eOJSQkhD59+tRYKVcbaQ3lLq+60MBP8/jn88KDA2jXMoyDGfk4nBeO63WuXi0NzP4e+9XmRZ1MzaMcPBFMzaMcPBFMGT3KmFkEU0aPMmYWwZTRo4yZvcGcszq1Rl/fA+n5zF6dyrgHEjx055Lan6Ma50U0TwRT7TwRTM2jHDwRTM2jOnkyyaOCFUD9+vWZMGGC28og0UpMTGTu3LkMHz6c1NRUTCYTsbGx6PV6ZQvCS60k6tSpE3q9nh9++IFbb72V/Px8YmNjycrKom/fvsp1BoOBI0eO8Nhjj7Fnzx58fX2JjIwEXKugAEpKSoiIiOCTTz7h888/Jz09ncjISM6fP09AQABNmjRh165dgKuQNXbsWI4cOYLVaiU2NhaAG2+8UblnvXr12L59u1umoCDXUvfqTFarldDQUObPn8+///1vsrKyaNmyJTabjYYNG2I0GgF48MEH+fTTTwkNDaWoqAiDwUB5eTlDhgxh4MCB+Pr6YjQaMZlMFBQUAK4eW5999hmfffYZgHK8NtJ6WNVUXdgPVfOoLl7y4PbM+d2nCdu2dH2a0JO/Y9q8eEeaR+9I7R5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzN5iiuzr6y2PdYkngimjRxkzi2DK6FHGzCKYsnqUTR4XrP4MDRgwgHfeeYdDhw7xxBNPcPLkSVavXk3r1q2V3lSAUoDZvHkzAH5+fvj7+5Obm0vfvn3p1KkTixYtIjc3l/79+yvX6fV6DAYDv/zyC48//jglJSUsXboUuLBKC1wryXbv3k10dDTjxo1j8+bNZGRkKMWtat1www1s376dwYMH07x5c+bPnw/gtprJZDJRVFRUIxPglql58+b89ttvdO7cmaFDh/LZZ59hs9lo0qQJABUVFaxfv57IyEhGjx5NixYtmDFjBnv37uXnn3/m9ddfB6Bhw4ZERESQmJhIZGQkNpuNdevWKT23qgtzVyuth9WVVRf2Q9U8qoMXGgpTk3uTlVtCVp7V6/u1a/OiTqbmUQ6eCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyuhRxsyeMq9FX19Q/3NU27xcC54Iptp5IpiaRzl4IpiaR3XyZNJVFaxefPFFdDodU6ZMwWAw8OKLL/7Pa3Q6HVOnTq21wUtpw4YNGI1GYmNjmTt3LiaTiZ49e7Jjxw5ycnKIiIjAbDZTWVmJXn+hmllRUUFpaSk33HAD+/fvJyUlhdjYWBwOBxs3bqRbt27KdeBa2bRo0SJ8fHyIjo7m+PHjnDp1SuGfOnWKqKgoDAYDM2bMIDIyksDAQCwWC3Chp1RWVhY9evTg+++/x2q1cuONN5KamsqhQ4cA17Z+BQUFNTI1adKEM2fOuGVKT08nPj6evLw8ZsyYQcuWLTEYDOTk5ACwePFigoOD+fTTT/HxcU1v06ZN2bt3L6dOnWLPnj1KoSwmJobNmzeTl5eHTqejVatWgGvLwuprr1ZaD6tLy2BQ/36omkf18QCC/AzcFBeBxVJW675VF0ubFzkyi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKaMHutC5nOFZRSX2zH7GwgP8c4v17zhUWRfX1D/XNeF7x0ZPcqYWQRTRo8yZhbBlNXj9SCzWVAPq507d6LT6XA4HBgMBnbu3Pk/r9HprvwiozZKSUmhZ8+ezJkzRzlmsVjo2rWrspIpKiqKkJAQZs+erYzZtm0bAEOGDOHpp59Wjr/xxhvKKqzq3lAtWrRg4cKFbmNOnDjBTz/9RJcuXWjRogWpqamMHj2aRx55BACn00nnzp0pKSnhzJkzNGvWDIPBgN1u55133lEKWN9++y1PPvkkx48fV+5ptVpJTEzkww8/VO75xBNPcObMGSVT48aNSU1NJSkpSdnCsLi4mJtuuomsrCxsNhtpaWmcOXOG7t27X/LZrVmzRilYvfPOO27n1q5dy7/+9S+lcFVbaftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXmUg+cNZklZJfPXHnDbdq9dyzCSBrXF5O/rDYseebwWfX099VgXeSKYMnqUMbMIpoweZcwsgimrR1l0VQWrb7/99opfXyulpaVx3333uR0zm82Eh4eTlpYGuPpcffDBB269rDZs2ADAoEGDalybmZlJhw4dMJlM6HQ6t5VZlZWVbN68mfr16yv82NhY1q1bx65du1i0aBH5+fk0adIEq9WqeGzSpAmNGjUiJyeHl156iW3btuHr60twcDAhISFkZmYC0KtXLwDy8vIYOHAg6enpREREkJOTg8lkUu7ZqlUrUlNT2bhxI6+88gpWq5XIyEj0ej12u53Tp08zevRo7r33XrKysli6dCnHjh3D6XSi1+upqKjgnnvucctusVh47733+Oqrr8jNzQVwy65JkyZNmjRp0qRJkyZNmjRpur40f+0BDmbkux07mJHPvDUHGPdAwp9j6ndKGtSWeWvci2ptWriKapo0adKkSZOm61Me9bDKysoiLCwMf3//S54vLy8nPz+/Rk8nT3VxEepiBQcHU1RUBMDQoUNZunQpycnJJCUlkZOTw+bNm9Hr9TRt2lS5ZtiwYezatQuA1157jYqKCv7v//6P9PR0Fi9eTOvWrVm5ciWFhYW0a9dO4bdv3x6Ar7/+miFDhtCwYUMWLlyIXq/H4XAo41q1akVmZiY7d+4kKSmJAwcO8PXXX9OwYUMKCgoAaNSoETqdjoMHD9KlSxeeeeYZVqxYgc1mIyQkRGElJCTwxRdfsGHDBkaMGAHA0qVL8fX1paKigqKiIjp16kSDBg149tlnadGiBXPmzOHdd98lNTWVJk2a0LlzZwCmTZuG3W7nm2++wdfXl4SEBDZv3kxQUBA33nijR/Pj46MVvH6vutDAT/OoPp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimDJ6VGvms+etbkWgajmcsD89nzxLOY3CAv9UjwDBQX48N6wTuYVlWARsW3jx/693ngimjB5lzCyCKaNHGTOLYMrqUTZ5VLDq06cP06dPr7Fqp1rffvstzz77rNKr6VoqODiYxYsXM2XKFJKTkzGZTLRr1459+/a5jcvOzsbpdK0v79q1KxEREbz11lsUFBTw4YcfUlRURFxcHAsWLOCDDz5Q+ltVX3PDDTewYcMGfHx86N+/P9u3b+fcuXMKv7y8HIDQ0FDef/99IiMjeeSRR1i0aBEGg0EZ53Q6ldVUqampdOrUidatW7N9+3ZlTPW9mzRpwqpVqzCZTAwdOpTPPvvMLdOqVauwWq28//77mEwmXnzxRfR6PVlZWUo/rOjoaN555x3Onz+PwWCgsLAQgAULFpCQkFDr567X67zS+PR6VV1o4Kd5VB9PBFNGjzJmFsGU0aOaM3/yzRH2Hs2lY0xD7u/T2ivMasn0HOsKTwRTRo8yZhbBlMmj9rNW86g2nqfME9klVzxvtTm88r7eW7lF/o5B7XOttu+da8ETwVQ7TwRT8ygHTwRT86hOnkzyqGBVXbS5nCorK4VsL2c2mykuLq5xvKioSOkTBRAdHc2iRYuUr5cvX86uXbuoqKjAz88PcK1uCgoK4siRI8q1N9xwAwUFBYwbN47Bgwe78W+44QYAzpw5A8Df//53HnroIWXMiy++yOrVqxVWdUFo06ZNyhin08nKlSuVgpXNZgMgPj7eze+WLVvYunWr8gxPnToFwIwZM4iPj1fGHTp0iN27dyv3TElJoXv37gQHB/Pcc89RWlrK0qVLefjhh5V+WPfffz/vvvsu//jHP3jyySd56KGHKCkp8ahYBeBwOLFYSj1iXI8yGNTfwE/zqD6eCKaMHmXMLIIpo0c1Zz6Qfp43l+9Rvk49cZ4lGw7x4sOdiGsRpgqPongimGrniWDK6FHGzCKYMnnUftZqHtXG8xYz0PfKvcZNRj0FBdZasUGe51iXeCKYMnqUMbMIpoweZcwsgimrx+tBZnPAH151dtUFq5KSEiwWi/J1YWEhWVlZNcZZLBY2bNhAeHj41d7ifyoqKkrp61St4uJicnNziYqKuuJ1AOnp6cTGxgKuXlONGzcmMjJS2drwxhtv5OjRo273cDqdpKen07NnTwClV1VZWZnbPQICXNXTxo0bK+ftdrtbMU2n0+Hv768UrKoLUb9nNWrUCEDxVX3+4hVcF9+zeqvD6h5fb775Jhs3buTDDz/kpptucuvxdebMGXJzcwkNDeWRRx5h165d+Pv789JLL/Hiiy9iMtX+E0xaQ7nLqy408NM8qo8ngimjRxkzi2DK6FGNmS/+BerFemPZbha+cHutuRdLhudY13gimDJ6lDGzCKYMHrWftWKYMnpUW+bw4ADatQzjYEY+jos+h6zXuXpENTD7e8Xv9f4c6yJPBFNGjzJmFsGU0aOMmUUwZfUoi666YLVo0SJmz54NuAovU6dOZerUqZcc63Q6+ec//+mRwUspMTGRDz74wK2X1VdffYVer1cKSpdSp06dCAoKYuPGjUrBqqioiKqqKu6++243/po1a5RVVACff/45hYWFfPjhh6xatYrmzZuj0+nYsmULo0aNUsYdPnwYgKCgILd733HHHZSXlxMXF8eYMWMoKSmhSZMmigeAffv2kZCQgNFo5I477qBZs2YAbj23DAYDL7zwAhUVFURGRvL3v/+djIwMdDodRqMRuFAszMzMxNfXl/nz5xMREeHW4ysvLw+A119/XVnB5XQ6Wb16NcXFxbz77rt/bDIuIa2HVU3Vhf1QNY/q44lgyuhRxswimDJ6VGvmNT+kXfH8xp9Ock+vlrXmy/Ic6xJPBFNGjzJmFsGUxaP2s1bzqEaeN5nJg9sz54tUUtPylWNtW4bx1L3tPX5PL9NzrCs8EUwZPcqYWQRTRo8yZhbBlNWjbLrqglXPnj0JDAzE6XTy73//m7vvvpu2bdu6jdHpdAQEBNC2bVvat2/vNbPVGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOJSIiQhk3cuRIsrKy2Lx5MwB+fn4kJSUxa9YswsLCaN26NQ6Hg/Lycrei01133cWLL77Ijz/+yHfffcf58+d5+eWXCQ4OZsaMGeTk5DB58mQAfvvtNyZNmkT//v3ZuXMnu3fvdvNavRKroqKChx9+mD179jB69GgMBoOygqqqqkoZ36RJE26//XZWrlypXFu9Mis3Nxe73U5xcTGDBg3C6XQyYcIEdDqd29aLVVVVZGZmkpCQQP/+/fniiy946KGHCAgIUHpqORwOZWz9+vVp1qwZTz/9NMuWLeOrr77i9OnTboWyPyqth9WVVRf2Q9U8qo8ngimjRxkzi2DK6FFtmY+eKbri+cOnCxmhor4TongimGrniWDK6FHGzCKY17tH7WetOKaMHtWYOTQUpib3Jiu3hKw8K5ENTESGB/3vC69CMjzHusYTwZTRo4yZRTBl9ChjZhFMWT3KoqsuWHXs2JGOHTsCri3q7rzzTlq39m7j2f+l4OBgFi9ezJQpU0hOTsZkMjFkyBDGjh3rNs7hcGC3292OjR49GqfTycKFC8nPz0ev19O/f3+34oyvry/16tWjXr16jBs3Drvdjk6nY/Xq1cqqqB07drB+/XomTpzI8uXL+c9//kNkZCSDBg1izZo1BAcHU1FRgcViISoqiltvvZX//ve/WK1WfH19CQgIoH79+gCkpqYC8PTTT/PLL7+wePFijEaj4r26YHXs2DF8fHyYNGkSH374IVlZWdSrVw+Hw6FsC5idna3k+O233/jtt9+UrwsKCpQVVtXbOnbr1o0dO3bw1FNPcdddd9GmTRv69u3LsWPHalWw0npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGtmVs3CSb1xPnLno9tGqL1nbjOeCKYMnqUMbMIpiwetZ+1mkdv8H7Ym8WxzCJaNwmmV3ykxzzwvscgPwM3xUVgsZR59D19sdQ+LyKYaueJYMroUcbMIpgyepQxswhmXfB4ID2f03lWmoWbaONhz9PrSWazwB5WF+vpp592+7q4uJjAwECqezOJVHR0NIsWLbrimKVLl9Y4ptPpSEpKIikpCYBhw4YpK5mqVVxcTH5+PuPHj2fw4MEMGzaM4OBgpVgFMGDAANavX8/58+dZt26dcnzatGlKP6wdO3bgcDhwOp08//zzPP/88wBMnTqVpUuXKj21Dh06BLi2EazO5HQ66dy5M1arlaioKGw2G9nZ2djtdu68807uv/9+ALZs2cJTTz1Fq1atANi2bRsAt9xyC/Pnz1d8JSUlsXXrVvr16wfAzp07AYiLi7vkc6yoqLjis72StP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPast8d/cWfP795beq6n9zc63vxHXKE8GU0aOMmUUwr3eP2s9acUwZPKaftTB16a9U/95r6+5MFqw/yEsjb6J5hFkVHkXzRDBl9ChjZhFMtfNEMDWPcvBEMDWPtVNOQSmvL/mVkrILO6kFBfjw8sibCA8J9IZFaeTxZoqpqamMGjWKDh060K1bN37++WcA8vPzefLJJ5XCiFqVmJjIjz/+qKw4gpr9sNLS0pTiUrV69eqFTqcjJSVFOVZZWcnXX39NYmKich1ARkYGGRkZyjidTofD4eDmm28G4OTJkzRo0IBNmza5jTEajQQFBdGkSRNOnTqF3W5Hr9fz9ddfK+MaNmwIoBSs0tLSCAwM5JdffnHL5HS6OqlWZ9q/fz/h4eGsW7eO7t27065dO4YOHcqnn34KUGObR02aNGnSpEmTevTcQwlXdVyTJk2aNF29tJ+1mmqri4tV1bI74LXFv/45hjRp0qRJkyZNQvX7YhVASVkVU7R/+69aHq2w2r17NyNHjiQiIoKBAwfy2WefKefCwsIoKSnhk08+oVu3bh4b/b1OnDjBa6+9xp49ezCZTAwaNIh//vOfGI3GK17ndDr58MMPWbFiBfn5+dx44434+fnV6Ic1cOBAXnvtNbZt20ZpaSkrVqxg48aNbNmyBXD1wwoLC2Pv3r307t2b/Px8ZRu/6n5YFosFX19fmjdvztChQ7Hb7ZSVlSnFo2bNminj4uPj2bp1K4mJiRQUFGAwGCgrK1O2W6zeyu/222/ntddeY+bMmRQVFeHj45rChIQEhdWgQQNKSkro27cvdrudqqoqKioq0Ol0So+v3NxciouLKS8vx8/PD7vdTmpqKnv27OHOO+9UvNVGnjZovR5VFxr4aR7VxxPBlNGjjJlFMGX0qObM7aIasOSlvnz5YzoHTxbSpnkId/do6Q2LUj3HusITwZTRo4yZRTBl8qj9rNU81kZb95ypUayqlt0BP+4/S2JC41rz1ZhZNFNGjzJmFsFUO08EU/OoTo8yZhbBVKvHfSfyahSrqlVSVsWhUwW0j6pfa75s8qhgNXPmTKKjo/n0008pKSlxK1iBqz/SF1984ZHBS6moqIiRI0fSokULZs2aRU5ODtOmTaO8vJyJEyde8doPP/yQ9957j/HjxxMTE8Py5cs5ceIElZWVSj+swYMHs337dnQ6HTNmzCA5OZmysjKqqty/8fz9/XE6nRQXF6PT6QgKCuLcuXMcOHBA6f+k0+mIi4tj48aN6PV6fH19MRgMFBcXu21FGBwcTFBQEBaLBYfDQb169SgvL3dbJQXQunVrvvnmG3Q6HXq9Xhl37tw5t3F2ux273U5FRQX+/v74+flRVFTEkSNHiImJwel0Ul5eDri2IiwqKsLX15eqqqoaWyRejfR6HaFeaD58vaouNPDTPKqPJ4Ipo0cZM4tgyuhRzZkfvrudVziXkkzPsa7wRDBl9ChjZhFMmTxqP2s1j1ejtLPFVzx/PMvCoNs87wOupszXiimjRxkzi2CqnSeCqXmUgyeCqXm8emXln77i+czzpSR2rv3iENnkUcEqNTWVcePGYTQa0el0Nc5HRESQl5fnyS0uqVWrVmG1Wnn//fcJCQkBXAWayZMnk5SUpKwi+r0qKiqYN28ejz32GI888ggAnTt3pl+/fsTGxrJq1SoA1q9fz+LFi9mwYQNRUVGEhIRw8803s2HDBvbt20d8fDzgWqXUsGFDfvjhB+Uezz77LO+99x79+vXDbDZjs9n48ssvmTRpEg888AAAixYt4o033uCrr77iqaeewmw2c/DgQaqqqti6dauSqW/fvpw5c4acnByCg4MB+Pzzz/nLX/7CjBkzANd2g3fddRffffcdTz/9NGazmYKCAoqLi9myZYvSd+vf//43H330Ed988w0xMTGYzWb0ej19+/Zl1qxZiv/evXvz888/U1lZia+v71XPjcPhxGIpverrrncZDPI1GZTRo4yZRTDVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEcy64PFcYRnF5XbM/gbCQ2r3i6uoG+qx9QrnW0WaKSio/YdEZZwXGT3KmFkEU+08EUzNozo9yphZBFOtHiPDrvyaoXH9QI/+7b8eZDYH/OFVbB4VrHx8fHA4Lj+ROTk5BAZ6v6lYSkoK3bt3Vwo7AP379+eVV15h+/btDB48+JLX7d69m5KSEvr3768cMxqN3HHHHWzevNmNHxMTo/StioqKwmazERISwvfff098fDznz5/HZrPRpUsXt3sMGDCA9evXc+bMGeV6h8NBv379lDHZ2dn4+/uzY8cOnnrqKaKiovj666/p0aOHksnpdGKxWHA6nWzfvp2//OUv+Pj4kJOT4+a/uk/W4cOHsdlsREVFUVJSAkC9evWUcSdPnkSv1yvbEbZs2ZJ9+/YpPa2qFRwczLlz5/jtt99qZPuj8nbTu+tJMjQZFM0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY9y8EQwNY+1U0lZJfPXHmB/er5yrF3LMJIGtcXkf3Uf5OzVPpLFGw9fcltAgx56tLvBK/llmBfRPBFMtfNEMGX0KGNmEUwZPcqYWQRTbR7bNA8jKMDnktsCBgX4ENcsVPt9+VXIow0fO3TowKZNmy55rrS0lNWrV9e66HElpaWlERYWxqOPPkpCQgI9e/bkgw8+IDw8XCngXO46gO+++45bb72V+Ph4HnjgAYxGI1lZWcoWeWlpaURGRjJmzBg6duxIamoqW7dupUmTJgqjejVWeHg4AwcOpH379tx1110cP35cYXTq1AlfX18CAgL44IMP6NmzJx06dGDFihU0btxYYSUmJlJWVoavr6+SqWvXrhQVFREaGkpaWhpGo1HpZ3X06FHuuusu2rdvz3PPPUdoaChVVVWcPn2aXr16KVsP9u/fnw4dOhATE8N3332Hj48PgwYNAqBHjx4Aygq47OxsEhISOHbsGODqEaZJkyZNmjRp0qRJkyZNmjRd75q/9gAHM/Ldjh3MyGfemgO14r008iZ+/yFig951XJMmTZo0adJ0/enlkTcRFOC+NigowIeXtX/7r1oerbB65plnePjhh/n73//O3XffDcCRI0c4c+YMCxYsID8/n6eeesorRi9WUVERGzZsoE2bNm49rPR6PUVFRZe9zmKxYDAYmDNnjlsPq8WLF+N0OikqKsLf35+ioiJOnTpFREQEM2bMID8/n5dffpnDhw/jcDj4/PPP+eijjwBYsmQJ999/PxMmTGDChAm8/fbbikc/Pz9iY2NJTU1l+fLlDBs2jL1797Jnzx7Onj2LzWYD4K677uLZZ59l06ZNtGrVihEjRrBq1SoMBgMOh0PJdOutt3Lw4EHeffddBg4cSIcOHVizZg16vV65Z3R0NIMGDWL16tUUFRUpfbfsdjvz589XemsNHDiQCRMm8PHHHxMVFcXSpUuprKx0e8a1lY+P9xrfXS+SpcmgSJ4Iptp5IpgyepQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2Cq1ePZ81a3lVXVcjhhf3o+eZZyGoVd3c4x0Y1D+HhCX7bty+LomSJaNwmmV3xkrT1eLFnmRSRPBFPtPBFMGT3KmFkEU0aPMmYWwVSzxxsaBDHn2Vs5mJHPqVwrzcJNtGkR5g2L0smjglWHDh2YP38+kyZN4vnnnwdg2rRpADRr1oz58+cTGxvrucvfyel0YrPZavSwmjhxImVlZZe9rqqqCrvdzuOPP+7Ww+qWW26hoqJCGVdaWkpRURGrVq1y29bv5Zdf5tChQ8yYMYPbbruNL7/8klatWvHqq68C0LhxY86fP6+s1AJo2rQpqamp+Pn5sWzZMuLi4pg/fz5PPvmksp2ir6+v0gMsMzOTlStXcscddxATE8PUqVOVTC1btgQgKCiIjRs3EhkZyeuvv86SJUs4cuSIcs9nnnmGb7/9FqvVisFgwG6306VLF5599lmWL19OdHQ0er2eESNGsHjxYp5//nlsNhsNGzbk3LlzAJfsSfZHpNfrCA011epaGXS9Nxm8FjwRTLXzRDC9ycvMLeHooRwiG5iIDA/yGlfNmb3NFPUMQa7nWFd4IpiaRzl4IpgyepQxswimjB5lzCyCqTaPJ7JLrnjeanPU+j3uPbfcWKvr/oiu93m5FjwRTLXzRDBl9ChjZhFMGT3KmFkEU80ee4aa6Pm/h2m6gjwqWAF0796dTZs2cejQITIyMnA6nTRt2pR27drVuujxv6TX64mMjKzRw2rixIlYLJbLXle9aqhPnz7KMaPRSGxsLD/++CPBwcGAq7BVr149pVgFcP/99zNp0iSioqJYv349hw4d4ssvv6Rjx47KmKVLl7Jy5UomTZqkrFYqLi4G4JtvvlH4AE2aNOHMmTPK1waDgcaNG/P1118rxywWC1OnTlUyVa/IGjduHMOGDVPG7d+/nyNHjij9wpYsWYLRaOSHH35g/fr1vPjii7z99tsMHz6cOXPmMGPGDAD++c9/cu7cOb788ksACgsLFWZ4ePhln+OV5HA4sVhKa3Xt9SyDQY4mgyJ5Iphq54lgepNXUlbJ3C9SSU278InU9lFhPHVve0wBV7fXvyiPInjeZIp6ht70KIongql2ngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCqVaPgb5X/r2FyaivdaN0tWYWyRPBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOr2DwuWFUrLi6OuLg4b+GuWldTHPsjYy83xul0es1HbVm/v+73444fP05UVBRGo1E5ZjAYiImJ4dSpU8oxf39/2rZty6FDh3j77bfZu3cvr7zyCuBaPVdbaU3kLq/rvcngteCJYKqdJ4LpDd6c1ak19vo/kJ7P7NWpjHsgwSM2qDOzt5minyHI8RzrGk8EU/MoB08EU0aPMmYWwZTRo4yZRTDV5jE8OIB2LcM4mJGP46K32nodtGkRRgOzv8d+1Zb5WvBEMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkqypY/fLLL7W6SZcuXWp13eXkcDjIysrCYrFgNpsB+OqrrwCUrwFOnDjBa6+9xp49ezCZTEr/pi1bthAfHw9AZWUlhw8fBlB6WPn4+JCXl0evXr2wWCzExcVx9913Y7fblVVMpaWuVURffvkl69atw9fXlzvuuIOCggLAtc3fxX7uueceCgoKiIyMZPjw4Zw5c0bZErA60+nTp+nevTtlZWV07NhReW7VjOoC1OzZs3nrrbcwmUwMGjSIXbt2uXmqqqpiz5499O7dW9niz263c/jwYaWoaLPZeP311/n000/x8fHhr3/9Ky+99BIACQkJbqvLrlZaD6uakmnPVlE8EUy180QwvcUTsde/tz2K4nmLKfIZesujSJ4Iptp5IpiaR3V6lDGzCKbaeSKYmkd1epQxswimmj0mD27PnN+tem/b0rXq3ZP3t2rOLIongimjRxkzi2CqnSeCqXlUp0cZM4tgyupRNl1VwWr48OFXvYJIp9Nx6NChqzZ2Jel0OoxGI8nJySQlJZGTk8P06dMJDg4mIMC132RRURGDBg1Cr9cze/ZscnJymDx5MjqdjoULFxIWFkbr1q1ZuXIlVqv78v7q7fwAnnzySbZs2cLUqVMJCQmhXr16gKsoBFBSUkJiYiLt27dn0aJFNVjVX+fn5zNixAhycnKYMmUK/v7+biulqotXwcHBjBw5ki+++IL33nsPs9msZKruZZWfn88999xDw4YNWbJkieLlYlZFRQV6/YW/GBMmTODkyZO89tprAJSXl/Ppp5/i7+9PVFQU+/fv57PPPgPgX//611XPSbW0HlZXlkx7toriiWCqnSeC6SlP5F7/1VJbZm8zr8UzhOv/OdZFngim5lEOngimjB5lzCyCKaNHGTOLYKrRY2goTE3uTVZuCVl5VtX3ZhXB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiqypYLVmyRJSPq1JwcDB9+vThzJkzJCcnYzKZGDJkCOvWrVP6RK1atQq73U6DBg3o3bs3ADt27GD9+vU8/PDDLFy4kPz8fOLi4hgxYgQfffQRwcHBVFRUUFxcTPPmzYmJiWH+/PkYDAYCAgLQ6/UKPzU1FYBhw4bxyy+/sGPHDkJDQ5UCVfW4Y8eOYTAYGD58OP/973+xWq00aNCAiooKgoJcL4Czs7MB6Ny5M0ajkblz5xIQEIDBYMButyus3bt3A/DEE0+wceNGsrKyCAsLU1ZRVY/7+OOP2blzJ3PmzFHYhYWFzJ8/X1m1deLECXQ6HQ0bNuTIkSOAaxUWQEREBGVlZUqh7Gqk9bC6tAwG9e+HqnlUH08E01u8urTXv1rnReQzBHmeY13iiWBqHtXpUcbMIphq5wGcKyyjuNyO2d9AeIjnb0plnBcRTLXzRDA1j95hBvkZuCkuAoulzKPXYaL8iWBqHtXpUcbMIphq54lgah7V6VHGzCKYsnq8HmQ2C+ph1bVr11oZ8raioqIoKChg0aJFyrHi4mI+/vhjZSu7lJQUbrvtNubMmaOMGTBgAOvXrycoKIjvv/9eOT5t2jQiIyPx9/dnx44dOBwO9Ho9s2bNUsZMnTqVpUuXKvzqVWPNmzdn4sSJgGtFWefOnbFarURFRWGz2cjOzsZut/PEE0/w/PPPA64tCZ966imFtW3bNgCCgoKYP3++cs+kpCS2bt2qjDtx4gQA7du3Z+zYsQBYLBa6dOmCwWBQtjzU6/V0796d7t27ExMTA8C8efMICwtT2Onp6djtdjIyMpRjx48fB6Bv374MGDCAmTNn/tEpcZO2P+flVRf2Q9U8qo8ngukpry7u9a+2ebkWz9BTj9eCJ4Kpdp4IpuZRDp4IpowevcErKatk/toDblu7tmsZRtKgtpj8fT21KOW8iGCqnSeCqXmUgyeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFV1WwupLOnz9PZmYmAI0bN6Z+/freQtdQYmIic+fOZfjw4aSmpmIymYiNjUWv19OzZ08A0tLSuO+++9yu69WrFzqdjs8++4zPPvuM/Px8YmNjycrKom/fvsp1ABkZGTz22GPs2bMHX19fwsPDcTgc3HzzzQCcPHmSBg0a8Mknn/D555+Tnp5OZGQkTqeToKAgmjRpwvHjx7Hb7ej1esaOHcuRI0ewWq20atUKQPl/WloagYGB/Pzzz26Z/P39AZRMmZmZBAcHM3/+fP7973+TlZVFy5YtMRgMREZGKj2uwFXAe+ONN5SvJ0yYwKuvvkrDhg0B6N27N4888gi//vorx48fp7y8nAYNGpCXl8fs2bNp0aKFV+dMkyZN15+SBrVl3hr3X9a1aeH6ZZ2mPybtGWrSpElT3dT8tQc4mJHvduxgRj7z1hxg3AMJf44pTZo0adKkSZMmTZo0aarj8rhgtWPHDv7973/X6FMVFxfH+PHj6dGjh6e3qKEBAwbwzjvvcOjQIZ544glOnjzJ6tWrad26NREREYBr5dGmTZvYtGkTmzdvBsDPz4+AgADOnj1L37596dSpE4sWLSI3N5f+/fsr1/n6+qLT6fjll194/PHHKSkpYenSpQA0a9ZMGdesWTN2795NdHQ048aNY/PmzWRkZBAZGQm4+mgB3HDDDWzfvp3BgwfTvHlzZRVVXFycwgoLCyMrK6tGJsAtU7t27fjtt9/o3LkzQ4cO5bPPPsNutytjqjVixAiysrKUrw8ePMgDDzzA3LlziY2NJTw8nE2bNtGrVy8SEhJYtmwZTZs2JS8vj+zsbKWAVxt50pT2elVdaOCneVQfTwTTm7zgID+eG9aJ3MIyLF7eDuni/6uN502mqGd4sTcZnmNd4Ylgah7V6VHGzCKYauWdPW91+6BBtRxO2J+eT56lnEZhgX+qR5FMGT3KmFkEU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomzwqWG3evJl//OMf1K9fn8cff1xZlZOens6aNWsYPXo077zzDnfccYc3vCrasGEDRqOR2NhY5s6di8lkomfPnuzYsYOcnByleONwOHA6L+yzVFFRQVlZGeHh4ezfv5+UlBRiY2NxOBxs3LiRbt26Aa6t/SorK+nZsyeLFi3Cx8eHmJgYDh8+zKFDhxR+VlYWUVFRGAwGZsyYQWRkJPXq1aOgoMDNb1ZWFj169OD777/HarUSFxfHnj172LNnD8OHDwfAarXWyNS8eXNOnjzplikjI4P4+Hjy8vKYMWMGLVu2xNfXlzNnzij327NnDwcPHnTzkJOTA8CcOXN47733AFi9ejVhYWFKYezuu+9mz549rFixgocffrhWc6PX6wgNNdXqWhlUFxr4aR7VxxPB9CZP1N95NWf2NlPkz02ZnmNd4GXmlnD0UI7WzF2lTLXzRDBl8uitv38nskuueN5qc3j8c12meRHJVDtPBFPzKAdPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWL3zzjvceOONLF++nKAg9zd9TzzxBA8++KCQglVKSgo9e/Z0609lsVjo2rWrspLJbDYzYMAAnn32WWXM7t27cTqd9O7d2227vDfeeENZhWU2m6mqqqJ169YsXLhQGfPJJ58wceJE9uzZw6233kq9evU4deoUL7zwAo888ogyrn///qSlpXHmzBmCg4MBVwHsnXfeUb7OyMjgrrvu4tixY8o9S0pKSExMdMs0bdo0Pv74YyVTUFAQhYWFvP76624roDp37sy5c+ew2WwYjUZSUlIwm838/PPPxMbG8txzzzFq1CjuvfdeAgMvfNrz4p5W4CpYZWdn85///OfqJ+X/y+FwYrGU1vr661UGg/ob+Gke1ccTwZTRo4yZRTBl9OhNXklZJXO/SCU17cKqjPZRYTx1b3tMAbXvdyPjvIhgqp0ngimTR2///Qv01V3xvMmop6DAetVckGteRDLVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEUxZPV4PMpsD/vCqM48KVqdPn+bZZ5+tUawCCAoKYsiQIbz99tue3OKSulR/KrPZTHh4uNKDKioqSvlztapXHXXs2NHteHR0NIsXL6a8vJyoqCgApddTtdLT0zEajZw6dUo5v3//fmU8uApTeXl5isebb74ZvV5PQECAUqyqPgeQm5ureK2srKRx48Zu98zMzMTX11cZHx4eTmFhIS1btlTGFBcXY7VacTqdnD59mujoaNLS0mjZsiU6nfub6Us9k99r165dbplqI62h3OVVFxr4aR7VxxPBlNGjjJlFMGX06A3enNWpNfrdHEjPZ/bqVK/0u5FxXkQw1c4TwZTBo7f//oUHB9CuZRgHM/JxXNjMAb3O1Yewgdnf4/wyzMu1YKqdJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUeFayioqLIz8+/7Pnz588r2wR6UxaLhcrKSh599FH27NmDyWRi0KBBmM1mpW9UYmIiH3zwARaLBbPZDLhWWAGcOXOGW2+9lfz8fOLi4rjttttwOp0UFRXRqVMndDodOTk5jBkzhm3btuHj40NVVRVhYWEKv02bNnz77bfs3LmTt956i/T0dEJDQ7FYLICrf5XRaFSKTG+++SZr167FarViMpkICQmhpMS1nUivXr0AOH78uJIpICCA4uJit0ytWrXi2LFjbNq0iTVr1pCVlUVoaKjyXKrHWSwWAgMDmTBhAgAzZ85k7969mEwmZQxAamoqK1as4IcffgBcfa+OHTvG7NmzPZofrYdVTdWF/VA1j+rjiWDK6FHGzCKYMnr0Fk/rdyOfRxkzi2B6gyfq71/y4PbM+d2qrbYtXau2PHktLMu8iGaqnSeCqXlUp0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNHhWs/vWvfzFu3Djat2/vtkUduPpbffLJJ8ycOdMjg5eS0+nks88+o02bNsyaNYucnBymTZuGXn/hG2Ho0KEsXbqU5ORkkpKSyMnJ4YcffkCn07Fw4ULGjx9PTEwM48aNc/Po5+eH2Wzm2LFjFBYWMnr0aL777jtSU1Px9b2wZUjXrl0B+Oijj+jduzd9+vRhyZIlNby2adOG7777jqVLlzJixAjOnTvHunXr8Pf3V/prNWrUCJ1Ox48//kiLFi144okn+O9//0tBQQGVlZUK6+abb2bjxo28++67/OUvf7nsPUtLSzl69CiHDh0CoFOnTuzbtw+LxUKDBg2UcZ999hkpKSmYTK499o8fP063bt08KjJqPayurLqwH6rmUX08EUwZPcqYWQRTRo+e8rR+N2J4Iphq54lgXu8eRf39Cw2Fqcm9ycotISvPqvWlUylT7TwRTM2jHDwRTM2jHDwRTBk9yphZBFNGjzJmFsGU1aMs8qhgtXTpUkJDQxkzZgwNGzakWbNmAJw6dYpz587RokULlixZ4lZU0el0zJ071yPTRqORiooK3n//fUJCQgCw2+1MnDgRHx9XpODgYBYvXsyUKVNITk7GZDKRkJDAzp07GTFihNJ3KioqioKCApxOp7JtX7169SgqKsLpdDJ37lzi4uJ44YUXeOONN5QiU/369QEwmUzs2rWLvXv3ctddd5GTk8O2bdsUVkREBODqF7VkyRIiIyP5v//7P958802MRqOSyc/Pj4qKCoqKipg7dy6dOnXiL3/5C7NmzVIy3XDDDQCEhITwzTffYDKZGD58OFu3biUtLU25Z25uLqWlpZSWunpJ7dy5U7nPxQWrH374gby8PGUbQ6fTyc6dO9m4cSNjxoyp1dxoPawuLYNB/fuhah7VxxPBlNGjjJlFMGX06C2e1u9GPo8yZhbB9AZP5N8/gCA/AzfFRWCxlHnEqZYs8yKaqXaeCKbmUZ0eZcwsglkXPJ4rLKO43I7Z30B4iOe/pKwLmWX0KGNmEUwZPcqYWQRTVo/Xg8zma9TD6ujRo8CFQkpmZiYABoOBG264gYqKCmVMtX7fV6k2MhgMBAUFKcUqgN69ewNgs9mUY9HR0SxatEj5esGCBezcuZN27dopx5YvX87gwYM5fPgw/v7+ynE/Pz+2b9+ufO1wOJg2bZqy4qlRo0YAdO/e3W0LvZkzZ7Jt2zYCA11bi1RUVCj3adq0qTLuww8/pKysTPnax8eHevXqsW3bNuVYVlYWs2bNUjJV+/vb3/7Gs88+q4w7c+YMaWlpiqcbbriBzMxMDh065LbqrEOHDkrBDeC7774jPz+fhx56iJycHDp16sSCBQvwVNr+nJdXXdgPVfOoPp4IpoweZcwsgimjR0954cEBxDYL4fCpwhrn4pqHaP1uVMRUO08E83r3eC36TXnq8VrwRDBl9ChjZhFMGT3KmFkEU40eS8oqmb/2gNv2s+1ahpE0qC0mf98rXHlt/F0LpoweZcwsgimjRxkzi2DK6lEWeVSw+vbbb73l46pkt9spLCx0609VXVy6eNXS71W9UungwYMMGDAAgMrKSjIzM7Hb7ZSXl+Pv749Op6OiooKMjAxle7yffvoJp9OpbAuYnZ0NQEZGhts9qgt01aub/Pz8lOurC1ZFRUUUFBS4bTH4RzKVl5cDcOzYMbd7pqWlKZ6io6MxGAzKPXv06AFAeno65eXlbgVDq9XK6NGjqayspHXr1srz0aRJkyZNmjR5V5f9uI7zcic0adLkLSUNasu8Ne6/TGzTwvXLRE2aNGnSpMkTzV97gIMZ+W7HDmbkM2/NAcY9kPDnmNKkSZMmTZrqsOpkhcJms+Hv7+/Wn2r69OkEBwdTVVWljBs5ciRZWVls3rwZcBWRDAYDixcvJjw8nNatW7Ny5UqluNStWzeCgoKoqKjAbDYzZswYxo0bR1lZGdOnTycsLEwp+BQVFQFw4sQJOnbsSGVlJWFhYZw7d87tvN1uJygoiEmTJvHqq6/i4+NDYGAgfn5+SgGqOpOvry+JiYlUVlYSHByM1Wp1y1TN/O6770hISMButxMaGkpubq7beYPBgNlsJikpCXAVzXx8fNxWpAHceuutWCwWwLVKCyAmJoa3336bu+++u9bz40mj6etVdaGBn+ZRfTwRTBk9yphZBFNGj97inT1v5dAlVlcBHDpVSJ6lnEZhgbViyzgvIphq54lgyuQxOMiP54Z1IrewDIsXt2u62JvaMotkyuhRxswimDJ6lDGzCKZaPZ49b3X7MES1HE7Yn56vvcZTAVPtPBFMzaM6PcqYWQRTVo+yySsFq8rKSnJycrBYLG5bzlWrbVvvfnpRp9Nx//33c+TIEaU/1ZAhQ0hJSXEb53A4sNvtbsf0ej1PP/00CxcuJD8/X1lZZLPZeO2116ioqOCll16iefPmtGjRgnHjxuHj48Mdd9xBTk5OjXw6nQ6TyURBQQEWiwVfX1+3bQkdDgfl5eXUq1ePqqoqysvLsVgsNGzYkJycHDdWeXk5ERERFBQUUFpaSnl5udKX6mIZjUYCAwMpKirCYrEQGBhIScmFhtJVVVWUlpYSEhJCaWkpFRUVlJWV0axZMxyOC0sRq4tVv9e4ceNqXbDS63UeN46/nlUXGvhpHtXHE8GU0aOMmUUwZfToKe9EdskVz1ttDo//7ZRxXkQw1c4TwZTJo8jXqGrNLJIpo0cZM4tgyuhRxswimGrzqL3GE8MTwVQ7TwRT8ygHTwRT86hOnkzyqGBlsVh48803WbdundLb6WI5nU50Oh2HDh3y5DY1ZDabMRqNbv2pANavX+9W4Fm6dGmN6yorK3nkkUeU1Ufz5s3j/fffR6fTcccdd+Dv78+cOXM4efIkS5YsISIiQrl+6NChSr+ugADXN12/fv2YOXMm4Fol1bdvX3JychQf58+fp6qqihUrVhAVFQXAtm3bGDVqlJtXvV5PgwYN3Ipuzz77LBs3blTGVRebnnjiCZKTkwEoLCwkMTERQBlXWFiIw+Hgyy+/VFZVffLJJ7zyyivExcUp/K5duxIYGMi8efMYPny48mdP5HA4sVhKPWJcjzIY1N3QFbzv0ds8EUy180QwZfQoY2YRTBk9eosX6Hvl/p0mo56CAmut2DLOiwim2nkimDJ6lDGzCKaMHmXMLIIpwqO33xfVhcyaR/V41F7jyedRxswimDJ6lDGzCKasHq8Hmc0Bf3jVmUcFqxdeeIHvvvuOAQMG0KFDB+rVq+cJ7g8rKipK6dtUreLiYnJzc5Wi0OWuA1c/p9jYWABSUlJo1KgRdrsdf39/ADp06EBmZibbt29n8ODBgKv4lp6eTs+ePQHIy8sDUApY4Fr51K5dO3JycpR7FRYWAlC/fn1lXM+ePfH19cVkcn3SxmazUVVVVWM11W233cb69euVolP1doPVvbAAQkJCaNGiBUePHlWOFxcXo9fr3Xj9+/dn4sSJSkaR0hrKXV5qb+gKdaPJoNo9yphZBFPtPBFMzeP1yQsPDqBdyzAOZuTjuGihtl7n6qPTwOzvsV8Z50UEU+08EUwZPcqYWQRTRo8yZhbB9AZP9PsiNWYWzdQ8Xr2013hieCKYaueJYGoe5eCJYGoe1cmTSR4VrLZv387w4cOZMGGCt/z8ISUmJjJ37lyGDx9OamoqJpOJ2NhY9Hq9UlC6lDp16kRQUBBTp07l1KlT5OfnU1VVhZ+fH4MGDVLG9enThw0bNvD+++8zZcoUfH19iY+Pp7CwkFtuuQWA06dPA65+Utu2bSM9PZ3IyEiqtyBs0KAB4NrmT6fTMXbsWI4cOYLVaqV9+/bY7XalwHfq1CmcTicZGRlumZo3bw5Aw4YNAVfxy2AwsGzZMmbPnk1WVhYtW7YkJycHHx8fjEYjACUlJVRVVTFw4EBOnz5NWVkZn3zyCYBbH6tz585x6tQpYmJiADAYDMydO5cnn3zSswnSJExaQ1dNmjRpqptKGtSWeWvcf7HWpoXrF2uaNGnSpEmTpquT9r5Ik1qkvcbTpEmTJk2avCuPClYhISFKUeVaasCAAbzzzjscOnSIJ554gpMnT7J69Wpat27ttoXfyJEjycrKYvPmzQD4+fmRkJDAtm3b6Nu3L506dWL69OmUlpbSv39/5brbb78dnU7H2bNneeKJJygpKWHZsmWEhYURHx8PuLZDNBgMpKWlER0dzbhx49i8eTO7du0CoKioCH9/f8rKyqhXr56yWqt58+Z8+OGHOBwOgoKClLHg6gX2+0zg6pNVfU+TycTevXvp3LkzQ4cO5bPPPqOwsBC9/sKSutLSUnQ6HRkZGURFRXH48GEmTJiA0WgkLCxMGZebm0uLFi2Ii4vjxx9/xGKx8M4773Dw4EGmT5+ubHt4tfLx0ZrK/V5qb+jqLY8ieSKYaueJYMroUcbMIpgyevQmLzjIj+eGdSK3sAyLl7cuuvj/3pCan6Moptp5IpgyepQxswimjB5lzCyC6S2eyPdFas0skql59IypvcaTy6OMmUUwZfQoY2YRTFk9yiaPClZ/+9vf+PLLL3nwwQfdCiaitWHDBoxGI7GxscydOxeTyUTPnj3ZsWMHOTk5StHK4XAoK54AKioq2LNnD127dmX//v1Kv6jAwEA2btxIt27dAPj2229xOp00bNiQRYsW4ePjQ48ePdi2bRv79u1TilYOh4OoqCgMBgMzZswgMjKSZs2acerUKeWeVVVVWCwWevTowffff4/VaiU+Pp5ffvlF2VawWj4+Pm6Zunbtys8//6wUtMBVjIqPjycvL48ZM2bQsmVLwsLCKCgoUMbodDqeeuopsrOz+fLLLwHXNoIOh8NtnqZPn05ycnKN7RW//vprnn/+eZo0aXLVc6PX64Q2tK7rUntDV6gbTQbV7lHGzCKYaueJYGoer3+eqH8jZZwXEUy180QwZfQoY2YRTG/yMnNLOHooh8gGJiLDg7zGVXNmUUwZ5uVavC9S87yIYmoePZP2Gs+7UrtHGTOLYMroUcbMIpiyepRFHhWskpOTsdls3HfffQwaNIiIiAgMBkONcXfeeacnt6mhlJQUevbsyZw5c5RjFouFrl27uvWdWrp0qdt1u3fvxmq1MmHCBOLi4gDo3r07jRs3VopX1XwfHx8GDhzI+PHjAVcPq5tvvpnvv/+e+Ph4TCYTTqeT++67j8cff1y5dvLkyaxYsYKSkhK31V7vvPOOW0+prl27UlxcDKAcb9u2LcuWLVPG7N+/n/vuu0/ZflCn01FVVUVSUhJ9+/ZVxj366KP8+OOP2Gw2jEYjZrOZyspKpk6dyk033cSLL77IG2+8waBBg9w89O3blyNHjrg9o7/+9a8cPny4VsUqAIfDicVSWqtrr2cZDOpu6Are8SiSJ4Kpdp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYMrmsaSskrlfpJKadmG1TPuoMJ66tz2mgNr3IVJzZlFMmeZF5PsiNc+LKKbmUZ0eZcwsgql2ngim5lGdHmXMLIIpq8frQWZzwB9edeZRwSonJ4edO3dy6NAhDh06dMkxOp3usudqq7S0NO677z63Y2azmfDw8BqrhX5/HUBUVJRyLCoqCqvVSlZWFuXl5fj7+3Ps2DGqqqrcxul0Olq2bKkwTCbXp2d+v21eeXk5AJmZmURHRxMQEIDBYHArFDmdTioqKpSt/po1a3ZJ1rlz59yY/v7+wIX+WNWqqKgAXH21oqOjiYqKqvEcSkpKyM3Ndct0KeXm5l6y6Hg10hrKXV5qb+jqqcdrwRPBVDtPBFNGjzJmFsGU0aOMmUUwZfQoY2YRTLXzRDBl8ThndWqNPkQH0vOZvTrVK32I1JhZNFOGebkW74vUOC+imZpHOXgimDJ6lDGzCKaMHmXMLIIpq0dZ5FHBasKECRw4cICkpCTi4+OpV6+et3xdURaLBbPZXON4cHCw2/Z5J06c4LXXXmPPnj2YTCaaN2+O0WjEz89PGZOYmMjs2bNxOp1K36ns7GwAZs6cyaRJk4iLi+PFF19040dGRgLw0Ucf8dZbb+Hr60ufPn348ccfgQt9qerXr8/x48fp168fmZmZREZGcvvtt1NeXq4UhoxGIzqdjtTUVHr27InVaqVjx474+flhNBpxOFzf3E2bNgXgueee49y5c5hMJvr168eBAwfc7pmYmMgHH3zAxIkT2bBhAwBDhgwBoGfPnpd8pq+//jpLliwBoF27dlcxGzWl9bCqKW/tX5o8uD1zfvdpx7YtXZ929PS514U9W9XuUcbMIphq54lgah7V6VHGzCKYMnqUMbMIptp5IpgyedT6EKnTY12ZF1Hvi9Q6LyKZmkd1epQxswim2nkimJpHdXqUMbMIpqweZZNHBatdu3YxevRonnnmGW/58ZqKiooYOXIkLVq0YNasWeTk5DB58mSqqqrcxg0dOpSPPvqIiooKfv75Z2w2m9IPatSoUcTExLB8+XIefPBBfH19uemmmwCUXlBZWVncf//9hIeHs2DBghr86uJYfn4+ycnJ7N+/n4ULFxIREUF+/oUX106nk5KSEtq0aUNiYiLLly/n7NmzNGzYsAbrzJkzDB8+HHBte+jj4z6NgwYN4r333mPdunV07dqVrVu3Ul5eTkJCgrJN4a+//spHH33EHXfcgcPhYOXKlcr1r7zySi2futbD6n/J0/1LQ0NhanJvsnJLyMqzen0/eagbe7aq3aOMmUUw1c4TwdQ8ysETwdQ8ysETwZTRo4yZRTC1PkTqZMoyL6LfF6ltXq4FU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYNWjQwG2ru2sls9ms9H+6WEVFRYqfVatWYbVaef/99wkJCQFgx44drF+/nlOnTinb8AUHBzNy5EhmzZrFSy+9RGBgIE6nk6ioKB555BEAOnfuTOfOnamsrFT4qampANxxxx388MMP5Ofn06RJE2UrvupxJ06cwGAw0K1bN+bNm4ePjw9NmzbFYrEoY6pXdLVp04bCwkLeffddIiIiMBqNlJaWKuN2794NQP/+/Vm3bh1Wq5XmzZuTnp7uds9Vq1bRoEEDmjVrxvbt2wG47777ePHFF5VnFR4eTmVlJTNnziQvLw+n04ler6dv377Ex8fXem60HlaXlsHg3f1Lg/wM3BQXgcVS5lHfqovlbY/e5olgqp0ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpgyedT6EHmXea6wjOJyO2Z/A+Ehtf8FSV2aF/D++yIZv3c0j+r0KGNmEUy180QwNY/q9ChjZhFMWT1eDzKbr1EPq0cffZRVq1YxZMgQpafTtdClejQVFxe79WhKSUmhe/fuSrEKYMCAAaxfv55169aRnJysHC8pKaFx48Z8++237Nixg0ceeQSn88Jm2EajkQcffJClS5cq/Oq+XF26dOH9998HXKukOnfujNVqJSoqCpvNRnZ2Nna7nddee00pKG3ZsoWnnnqKDh06ALBt2zbAVUSaP3++ct+kpCS2bt2q3PPEiROAq2A1Y8YMwLU9YpcuXTAYDMqWgf/5z38YNmwYTz75JKtXr+bFF19kzJgxGI1Ghd28eXMWLFjAypUrmTJlCq1atcJisRAeHn71E/I7aftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKYNHrQ+Rd5glZZXMX3vAbRu/di3DSBrUFpO/71Xz6uK8iGCqnSeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFHhWsbDYbPj4+3HnnnfTv359GjRpR3ZepWjqdTlmp5C0lJrp6NF3cy+qrr75Cr9crPZrS0tLo06cPjz76qNLD6u6770an05GSkqIUrCorK/n6669JTExUrgNIT0+nV69eWCwW4uLiiIyMxOFwcPPNNwNw8uRJwsLCmDNnDu+88w6+vr7ccccd+Pr6EhQURJMmTTh+/Dh2ux2dTsc999xDQUEBkZGRDBgwAIBWrVop9wwMDGT79u10796dsrIyOnbsiM1mAy70ncrMzCQoKIhXXnmF8ePHYzKZGDRoEL6+vjRq1Aij0ciZM2fIzc1l+fLlzJo1C7vdDkBycjIfffSRUlhMS0tj/vz5fPHFF4BrdZrFYqG8vNyrc6VJkyZNmjRp0qRJkyZNl1LSoLbMW+NebGnTwlVs0fTHNH/tAQ5m5LsdO5iRz7w1Bxj3QEKtmNq8aNKkSZMmTZo0afqz5FHB6s0331T+vGzZskuOEVGwGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOVXo0FRUV8fnnn+Pr68vs2bPJyclh2rRpGI1G9u3bx+LFi2ndujUrV66ksLCQUaNGAa4VS3q9HofDVQF98skn2bJlCxs2bABQthIsKiqioqICq9VKYmIi7du3Z9GiRVitVmJiYpQx4Fp5lZ+fz4gRI8jJyWHOnDkAJCQkuN3TbrcrWxR+8cUXZGRkoNPp3DL5+PhQUlLCPffcQ8OGDVmyZAmVlZXceOONAOTl5QFQUFBAdHQ0gYGB/Pbbb+zevZvBgwcze/ZsWrVqxY8//si6desA10oug8HABx98wMaNG7n33nvp0qVLrefHkya316vqQgM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKZvH4CA/nhvWidzCMixe2M6uWmrO7E3m2fNWt6JStRxO2J+eT56lnEZhgVfNrSvzIoKpdp4IpuZRnR5lzCyCqXaeCKbmUZ0eZcwsgimrR9nkUcFqy5Yt3vJxVQoODmbx4sVMmTKF5ORkTCYTQ4YMYezYscoYp9OJ0+kkLCyM3r17A2C325k4cSKxsbEsXLiQ/Px84uLiWLBggbKdXlVVFQ6Hg4cffphz584xf/58DAYDRqNRWfEEUFpaitVq5ZVXXmHlypXs2LGD0NBQrFYrfn5+bn5btWpFYmIi//3vf7FarYSFhZGfn09AQIDCKikpYcyYMfz666/MnTuXgIAA9Hq929aETqcTh8PB//3f/7Fs2TKysrIICwsjJydHWTlVXWjT6XQcPXrUzUdGRgarVq3ipZdeIi4ujqqqKgDmzZvnluvhhx/myJEjtZobvV7ncRPe61l1oYGf5lF9PBFMGT3KmFkEU0aPsmXefeQcR346SWzzMDrGNPQaV7bnKIIngimjRxkzi2B6kyfq/YOaM3uDeSK75IrnrTaHR8+2rsyLCKbaeSKYmkc5eCKYMnqUMbMIpoweZcwsgimrR1nkUcGqcePG3vJx1YqOjmbRokWXPa/X62ncuDFff/21cqx///5MnDiRiIgI1qxZc8nrqldFDRw4UOkxBa5+XT/++KPSh6qqqop69erx0EMP8dBDDwGuglLbtm0pKysDIDDQ9Wm2zp078/zzz/P8888DsHLlSiZNmkRlZSXgWmEFMHz4cJ5++mnlnv369ePMmTNumSIjIxkxYgQjRoxQru3SpYvCqPY3fPhw5X4AI0eO5KeffqJHjx4AzJkzh4EDB/Lyyy8rY+655x6ys7MZP348DocDvf7qK8EOhxOLpfSqr7veZTCov4Gf5lF9PBFMGT3KmFkEU0aPsmXOyS9l8sc/U1JWpRwLCvBh0mNdaRh69Z/QF+FRFFPtPBFMGT3KmFkEU0aPas0c6Ku74nmTUU9BgbVWbJDnOdYlngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCKavH60Fmc8AfXnXmUcGqWqWlpfzyyy9kZmYCrkJWly5dlIKNWqTTXfkF/dWOvdyYi1dFeerjallNmzbFaDTidDqpqqqitLSUb7/9ll27dgFQUVEBuHp0bdu2jbVr19ZgvvXWW9x+++1ER0f/YZ8XS2sod3nVhQZ+mkf18UQwZfQoY2YRTBk9ypL598UqgJKyKiYt/Jn3/pHoERvkeY4ieSKYMnqUMbMIpowe1ZY5PDiAdi3DOJiRj+Oit416navnVAOzv1f8Xu/PsS7yRDA1j3LwRDBl9ChjZhFMGT3KmFkEU1aPssjjgtXSpUt55513KC0tdSuumEwmxo4dy8MPP+zpLWolh8NBZmYmw4cPJzU1FZPJRGxsLABms/my11WvUHrrrbc4deoU+fn5xMbGkp6eDrhWYPn7++Pj40NeXh6PPfYYe/bswdfXl/j4eOx2u1KoKy11rTTatm0bAwcOJD09ncjISIKCggDw9fV18zN27FiOHDmC1Wqlffv2nDlzRtni749mMhqN3HjjjaxatYqPP/4YcK3Muvvuu1m3bh1t27oa5b799ttK8QpgxowZ7N27F6PRyIcffkhkZGStn73Ww6qm6sJ+qJpH9fFEMGX0KGNmEUwZPcqUed+JvBrFqmqVlFVx6FQB7aPq14ot03MUxRPBlNGjjJlFMGX0qObMyYPbM+eLVFLT8pVjbVuG8dS97T1+XybTc6wrPBFMzaM6PcqYWQRT7TwRTM2jOj3KmFkEU1aPssmjgtV///tfXn/9dRISEhgxYgRRUVEApKWlsXTpUl5//XWCgoL461//6g2vVy273c6hQ4d44oknOHnyJKtXr8ZoNCq9o8C1VV5WVhabN28GwMfHB51Ox86dO+nbty+dOnVi0aJFypZ71aruMfXLL7/w+OOPU1JSwrJly/D19aVevXpuYzMzM4mOjmbcuHFs3rxZWe1UrcDAQHx9fdm+fTuDBw+mefPmfPjhh1RVVdXYlu+PZKpXrx7l5eU0atSI7OxsmjRpwrp16+jQoQPNmjUDICEhQRn//fffc/jwYQCioqK4+eaba/nEtR5W/0t1YT9UzaP6eCKYMnqUMbMIpoweZciclX/6iuczz5eS2LmZR/eQ4TmK5olgyuhRxswimDJ6VGPm0FCYmtybrNwSsvKsRDYwERke5CV3LsnwHOsaTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9/PHHdOnShUWLFmEwGJTjsbGx3HXXXTzyyCN8/PHHf0rBys/PD5vNRmxsLHPnzsVkMtGzZ0+2b9+Oj8+F2A6HA7vdrnwdGBiI0+nkpptuYv/+/aSkpBAbG0tpaSklJSXKCqzqa6vz+/j40KNHD7Zt26asNKse26hRIwwGAzNmzCAyMpKYmBiOHDminDcYDFRWVtKjRw++//57rFYr8fHx7Nq1Cz8/v6vO9PHHH7Nz507eeustsrOzOX/+PG3atOH48ePY7Xa3ubLZbIwfP17pp3Vx4as20npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGmzJFhV/73v3H9wFr3QZHpOYriiWDK6FHGzCKYMnoUkflcYRnF5XbM/gbCQzz/hUaQn4Gb4iKwWMo86lt1serCc1S7Rxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vB5kNl+jHlbp6ek8//zzbgWQahkMBvr168ebb77pyS1qLYPBQFhYGMuWLVOOZWVlcdttt2Gz2ZRjS5cudbuuqsq1Hc6wYcMYMGCAcnzw4MEcPnwYf39/wNVbys/Pj4ULFypjHA4Hbdq0UYo/jRo1AqBdu3bMnj1bGTdz5kyOHDmibB1YvTXfq6++StOmTZVxvXv3pqys7Koz6fV6unfvzueff05MTAzJyck0aNCA5557jvz8fMLDw5WxY8aMwWKxMG3aNF544QXuuOOO//Fk/7e0/Tkvr7qwH6rmUX08EUwZPcqYWQRTRo8yZG7TPIygAJ9LbgsYFOBDXLNQj/3K8BxF80QwZfQoY2YRTBk9eoNXUlbJ/LUH2J9+YQu/di3DSBrUFpO/r6cWpZwXEUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LIo80U69Wrx5kzZy57/syZM0q/pmstu91OYWGh21Z+27dvB1x9ni6n6pVKBw8eVI5VVlaSmZmJ3W6nvLwcAJ1OR0VFBRkZGcq4n376CafTqfSmys7OBnAbA3D06FHgQo+r6lVUP/30kzKmqKiIgoICLl79VdtMALt27SIoKIjQ0FDl2NKlS9m6dSsPPfQQ99577xWv16RJkyZNmjRd33p55E0EBbh/likowIeXR970JznSpEmTputP89ce4GBGvtuxgxn5zFtz4E9ypEmTJk2aNGnSpEmTeuTRCqtbbrmFZcuW0a5dO+6++263cxs2bGD58uXcc889Hhm8lE6cOMFrr73Gnj17MJlMDBo0iH/+859uRRubzYa/vz/JyckkJSWRk5PD9OnT8fPz44svvuDzzz8nLi6OqqoqLBaL0sOqtLQUg8HAggULWLRoEUajkZCQEGWlU1FREf7+/uh0OgIDA7nnnntwOByEhoZis9kICwtDp9MpY6v9dujQAYfDQcOGDcnMzHQ7b7fbMZlMvPLKK0yePJmAgAACAwPx8/NTCmTVmQwGA7169cJut9OgQQMsFgvBwcHKyrDDhw/z1ltv0bZtW1JSUgB46623cDgcjBgxQinIzZ8/nxkzZqDT6fjkk0+U/BkZGWRnZyurw2ojT5v7Xo+qCw38NI/q44lgyuhRxswimDJ6lC3zDQ2CmPPsrRzMyOdUrpVm4SbatAjzmCvbcxTBE8GU0aOMmUUwZfToLd7Z81a3lVXVcjhhf3o+eZZyGoUF/qkeRTJl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWj7LJo4LV+PHj+e233xg/fjzTpk2jRYsWgKvokZeXR1RUFM8++6w3fCoqKipi5MiRtGjRglmzZpGTk8O0adMoLy9n4sSJyjidTsf999/PkSNHSE5OxmQy0bp1a3799VcSEhL45z//yfLly9myZYvbFnl2u10pBtntdoqLizl37hyNGzd2WylVUVFBaWkpTZo0IS8vj6KiImw2GzExMTU8+/r6EhwczPnz58nNzcVkMlFSUqKct9lslJWVER4eTmlpKWVlZVitVlq3bs3x48eVcU6nE5vNRuPGjcnNzaWgoACbzea2aqpBgwbYbDY++OAD9HrXX4ywsDC6du1K//79lXHffvutwrTb7eTm5gLw6aef0rBhQ8aMGVOr+dHrdYSGmmp1rQyqCw38NI/q44lgyuhRxswimDJ6lC1zz1ATPb1GuyDZnqMIngimjB5lzCyCKaNHT3knskuueN5qc3j8XkrGeRHBVDtPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWIWFhfHFF1+watUqUlJSyMrKAqB169aMHj2aBx54QNnuzltatWoVVquV999/n5CQEMBVZJo8eTJJSUlEREQAYDabMRqNLFq0CHAVmHr06EFgYCBdunShe/fudO7cmX79+pGYmKjwq1c/LViwgNjYWAC2bdvGqFGj0Ol0BAcHA2C1WgkJCWHLli3Ktc8++yzffPMN0dHRAEovqwceeICXXnoJgMLCQm655RYAhZWZmYnD4WDt2rVKpk8++YRXXnnFrRgFEB0dzYYNG5Svhw4dyoEDBxRWSEgIZ86c4fHHH+df//oXMTExPPbYY4waNcqNU1hYyBNPPOF2vEuXLgBuha2rlcPhxGIprfX116sMBvU38NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR+8wzxWWUVxux+xvIDzE818WeMtfoK/uiudNRj0FBdZasevCvMjoUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwerweZzQF/eNWZRwUrcPVfGjlyJCNHjvQU9YeUkpJC9+7dlcIOuAosr7zyCtu3b2fw4MEAREVFkZaWpozZvXs3JSUl6HQ6oqKiAFffpzvuuEPZDg8u9J26WD179sTPzw+j0Yi/vz82mw2r1UrDhg3dxvXv35/169fToEED4ELvqotXcIWEhBAbG8tvv/2m+MjLywNQthKsZk2cOFEpRJ0+fRqHw1GjJ9jtt9/Onj17aNasGQA//vgjmZmZjBgx4orPMT09nQ8++IAPPvigxrm7776bffv21brYqDWUu7zqQr2YNyQAAQAASURBVAM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmsnUrKKpm/9oDbtnvtWoaRNKgtJn/fP91feHAA7VqGcTAjH4fzwnG9Dtq0CKOB2d/jZ6rGeRHNE8FUO08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYFRYWkp2draxE+r2OHDlCo0aNlKKLN5SWlsZ9991Xo49VQEAAx44dU8YlJibywQcfYLFYMJvNSvFKr9eTkZHBrbfeSn5+Pg0bNiQrK4vy8nL8/f0pKirCx8eH//znP+Tk5LBt2zZ8fHyorKxUCk+nTp3C6XRy7tw5Vq1axYoVK0hPT6devXoANGnSRBnn6+vL1q1bKSwsZO3atVitVsVH9bjCwkJ0Oh3Lli3j119/Zc+ePQQEuD4JWL9+fSU3wNGjR1myZAnLly8nKysLk8m1ZUT1dox79+4lJCSEn3/+mUmTJgEwffp0NmzYwNy5c5Ui25IlSwCoqqpi06ZNfPPNN5w/fx69Xk/Xrl3x9fX8DZ0mTZo0adKkSZMmTZo0XSvNX3uAgxn5bscOZuQzb80Bxj2Q8OeY+p2SBrVl3hr3olqbFq6imiZNmjRp0qRJkyZNssujgtUbb7xBeno6n3766SXPv/LKK0RFRTF16lRPbuMmi8WC0Wis0cfq5ZdfJiUlheeffx5wbZW3dOlSkpOTSUpKYseOHQC0b9+ehQsXMn78eGJiYnj66adxOp0cOnSIjh07UlJSQlxcHMuWLaNBgwaMHj2a7777jn379mGxWABXHy1wrZx65ZVX6N27N3369GHx4sWAazVUtdcGDRqwZ88eUlNTGTFiBOfOnWPdunUAFBcXU69ePUpKSmjTpg2zZs2iefPmPPHEE/z3v/8lPz+f8+fPu93TYDDw+uuv85e//MXtnunp6XTt2pXc3FysVivPPfccRqNR8bl//37++te/kpKSgo+PD926dQMgISGBsrIy5fneddddhIWFKf2vaiMfH62p3O9VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM1j7Zlnz1vdikDVcjhhf3o+eZZyGoUF/mn+qhUc5MdzwzqRW1iGxcvbFl78f29Ilu+dusQTwdQ8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9SibPCpY/fTTTzz44IOXPX/bbbexatUqT25xSe3du7dGH6t3332XEydOkJOTQ0REBMHBwSxevJgpU6aQnJyMXq9Hr9dz7NgxHnvsMR555BEAIiIiKCkpYeXKlXTs2BEAk8mE0+nE6XQyd+5c4uLiiI6O5sSJE+zbt0/xUb9+fSoqKti1axd79+7ltttuY/369WzatEnpWeXr64tOpyMsLIwlS5YQGRlJnz592LJlC6tWrWL06NGAq5+VwWCgqKiIuXPn0qlTJ4qKisjIyCAnJ0e5p8lkwt/fn2+++QaTycSAAQNYu3Ytn3/+OQ888ABOp1PpnVVeXg5Abm4uAOfPn+frr79mwIABAOzYsQObzcb06dOx2Wy89NJLHDhwgK+++qrWc6PX6zxuFHw9qy408NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIprd4u4+c48hPJ4ltHkbHmIb/+4KrkFozi2SqzeOJ7JIrnrfaHB6/T/FmZlHvmdQ2L9eCJ4Kpdp4IpuZRDp4Ippo9ZuaWcPRQDpENTESGB/3vC/6g1JxZFE8EU0aPMmYWwZTVoyzyqGCVn59PaGjoZc+HhIQoK4S8JbPZzPHjx2v0saouMF3cxyo6OppFixYBsHz5cl599VWsViv9+/dXrnvkkUd4+eWX+eWXXxT+6dOniY2NZc2aNcq4oUOH4uvry/fff69cf/ToUZ577jml+JWRkcH69es5d+4cZ86cwWw2k5+fj9PpZN26dcrWiDNnzuSHH34gJSWF0aNHK1sW3nLLLcyZM0e5Z69evZRM1VsD5uTkMHv2bPr27QvA9u3bWbt2LQcOHMBms2E2mwFXYWvXrl1KX6z33nuPDz74gJSUFKVg9emnn9KtWzcGDRoEuFafDRo0iM2bN9OvX79azY/D4cRiKa3VtdezDAb1N/DTPKqPJ4Ipo0cZM4tgyuhRxswimDJ6lDGzCKa3eDn5pUz++GdKyqqUY0EBPkx6rCsNQ2u34sbbHkXxRDDV6jHQV3fF8yajnoICa63Yas0skieCKaNHGTOLYMroUcbM3mSWlFUy94tUUtMurLxtHxXGU/e2xxRQ+xYYas4siieCKaNHGTOLYMrq8XqQ2Rzwh1edeVSwCg8P5+DBg5c9f+DAAcLCwjy5RQ1FRUWxd+9eoqKilGPFxcXk5eVRr149pdfTpa671J/T0tIICQnh7NmzlJeXExUVxbFjx+jQoYMyxul0kp6eTv369UlLS6NZs2b4+PhQVVVVg3Xxn6OioigpKSEsLMytj1daWprC+qOZunTpopxr2bKlG6vay+nTp7nxxhsBaNy4sVKsulgXe6xeFfb666/zxRdfUFFRgU6n47fffqt1wQrQGspdQXWhgZ/mUX08EUwZPcqYWQRTRo8yZhbBlNGjjJlFMD3l/b5YBVBSVsWkhT/z3j8SPbUHqC/ztWCqzWN4cADtWoZxMCMfh/PCcb3O1SOqgdnfY79qy3wteCKYMnqUMbMIpoweZczsDeac1ak1ehoeSM9n9upUr/Q0VGNm0TwRTBk9yphZBFNWj7LIo4JV3759WbFiBYmJifTp08ft3DfffMPq1asZOnSoRwZ/r8TERH799VelPxPAV199hV6vp0GDBkqvp9+rU6dOGI1Gqqqq8PPzA6CyspKvv/6atm3bsn37doqKikhMTHRbWQWurfMKCwuJi4tj+/btdO3aVTkXEHBhed+GDRto0aIFGRkZFBUV0atXL3Q6HXq9nvnz57NixQrOnz9PVVUVHTp0YP/+/W6ZbDYbY8aMYdu2bTidrndZYWFhFBUV0bRpU8LDw8nNzWXfvn2MHTuW9PR0dDodkZGRnDp1yu2e586d49FHHyU1NZXi4mJat26tsKqVk5PDsmXLajyrFStWMHbsWOU5Xa20HlY1VRf2Q9U8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIpjd4+07k1ShWVaukrIpDpwpoH1W/1nw1ZhbNVLPH5MHtmfO7T9S3ben6RL0n71HUnFkUTwRTRo8yZhbBlNGjjJm9xawrPQ1FMTWP6vQoY2YRTFk9yiads7oyUgsVFxfz0EMPcfz4cWJjY5XVPceOHePw4cNER0ezYsUKZZs6b6ioqIiuXbvSpEkTJk+eTE5ODtOmTeOee+7h559/pmPHjkyZMoWRI0eSlZXF5s2blWsfe+wxtm/fzoQJE2jdujUrV65k27ZtjBs3jilTppCSkkJYWBgdO3YkMDCQN998k7KyMqZPn05UVBS//PIL/v7+vP3226xdu5a1a9fStGlTXn/9dXbu3MmcOXOYMmUKL730Em+99Rb33HMP/fv3Jz09Hb1ez5AhQ/jtt9/IyMjA4XDgdDo5cOCAksnHx4eIiAhuv/12/vOf/ygrpP7yl78wZcoUJk+ezIoVKwBXfzCA77//HofDVa1duXIlnTp14rbbbiMrK4umTZvSsGFDZWvA2267jbS0NDZt2sTTTz/NN998g9PpJCkpidLSUtatW0dgYCBnz57ltddeY8iQIVc9P06n85IruzRp0qRJkyZNmjRputZa+fVhVmw6ctnzD90Vw4N3xl5DR5quhbJyS8jKs3q9Z4kmTZo0aVK/fj2Uw+SPfrrs+Vcev5mb4iKuoSNNmjRp0nQ18miFVb169fjkk0/46KOP2Lx5M5s2bQKgWbNmPPXUU4waNYrAQM/2hf+9goODCQ4Oxul0kpycjMlkYsiQIYwdO5Y+ffooW+85HA7sdrvbtbfffjvbt29nwYIFFBQUEBcXx4IFCzh27Bg6nY7g4GB8fX2Jjo6msLCQcePG4ePjwx133EGjRo3YsWMHXbt2pXfv3txwww2sXbuW06dPM2rUKCIjI3nttde46aabFJ8APXv2JC0tDaPRyJo1a+jUqRPTp09n2LBhVFVVKWMDAwMpLS0lLy+PDRs28OCDD9KtWzeSkpKoqKgA4NZbb2XFihX4+vqybds2WrZsyZw5c/j444/ZuXOncs927dpht9ux2Wzs3bsXgOeee44DBw4oY+Lj49myZQtOp5MlS5bQpEkTHnroIUaNGsU999zD8ePHazU/Wg+rS8tgUP9+qJpH9fFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9AYvMuzKzY4b1w+sdV8jUGfm3+tcYRnF5XbM/gbCQzxv/lwXvneC/AzcFBeBxVLm0fyK8ieCqXlUp0cZM4tgyuhRxszeYmo9DTWPavQoY2YRTFk9Xg+6Zj2sAAIDA3nmmWd45pln/ufYyspKfvvtN2JjY6lXr16t73njjTcSEhLC7NmzlWPFxcXk5uYqfaCWLl1a47ro6GgA5s+fT2zshU9Sbtq0icjISPz9/QGIiYnh6NGjfP/998qYhx56CIPBoFx3cR+rV199lcGDBwPw7bffAhf6ZPn6upo5fvjhh259qOrXr8/Zs2eVrwMCAtDr9ezatUs5ZrFYACgpKQGgSZMmAAwcOJCpU6cq47Zs2cLOnTvR612T3qpVK3bu3MnOnTv5+eefGTFiBF26dGH9+vW0bt0agL///e988cUXpKWl8e2339boNVZdJKuNtP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPMmYWwZTRo4yZRTA94bVpHkZQgM8ltwUMCvAhrlmoV7yqKXO1Ssoqmb/2gNu2SO1ahpE0qC0m/9o3nK/W9f69cy14IpiaRzl4IpiaRzl4Iphq86j1NBTDE8GU0aOMmUUwZfUoi67pZopFRUWMGDFC6d1UWyUmJvLjjz8qBR240MeqZ8+el72uU6dOBAUFsXHjRuVYdR+rxMREN/7hw4fJyMhQjh09epTKykpuueUWAIxGIzfffDO+vr6kpaUp4zZs2EB0dLRSXKreDvHYsWPKmKKiIs6ePYvNZqO8vBwAHx8fysrK3DJVr1ir3rWx+v9nzpxxy3Xw4EEATp8+rfgvKipix44dypisrCwOHjzolrO6+DZgwADi4uLo06cPb7zxBtnZ2bRt2/ayz1GTJk2aNGnSpEmTprqil0feRFCA++f0ggJ8eHnkTX+So2uj+WsP1Gg4fzAjn3lrDvxJjjRp0qRJk6Zro6RBbWnTwv2D2W1auD60oUmTJk2a1C2PV1hdrTxomaVo6NChLF26lOTkZJKSksjJyWH69OkMHTqUiIgL+9D+7W9/48iRI+h0OkwmE4MGDWLUqFHMnTuXsLAwpY9VYWEho0aNUq678847qV+/PnfffTcAjRs3pri4mKioKOLj4918bNu2jY8++ogVK1bQrFkzDh8+zMyZM9386nQ6pkyZwmuvvUaDBg0IDAwkICAAm81GUVER/v7+ykqs7t27o9fradq0KTk5OURGRiqrnYqKigDYuXMnbdu2JTAwkMjISKUYVn2+Y8eO3HjjjTz++OPK854yZQoxMTHceeedAOzbt4/z58+j0+koLCxUCmuLFi2iXr16SvbayJOGxter6kIDP82j+ngimDJ6lDGzCKaMHmXMLIIpo0cZM4tgeot3Q4Mg5jx7Kwcz8jmVa6VZuKnGL7FqK7Vm1hrOq5sngql5VKdHGTOLYMroUcbM3mQGB/nx3LBO5BaWYfHytrje8CeSqXlUp0cZM4tgyupRNl3zgpU3FBwczOLFi5kyZUqNPlbVKioq4uDBg+h0OubMmUNOTg7Tpk3jnnvu4emnn2bhwoXk5+crfayaNm2qXLto0SIKCwuJjo7m5MmTZGZmArit3qqsrOS9997Dx8cHo9FIeXk5R48epXXr1vTv318Zl5mZidPppHXr1pw9e5bz58+Tk5PDvffeyxdffKGMKywsRKfT0bx5c06fPs3JkycxGo20aNFCGWO1uvbYveGGG7Db7Zw/f56jR49y88038+OPPyrjvvzyS44dO0ZsbCzp6elUVFSQm5vLq6++io+Pa8o3btxIVVUV//znP9m6dSv79+9XimdWq5Xi4mICAq7+H3O9XkdoqOmqr5NFZrPnL5BE8kQwZfQoY2YRTLXzRDA1j3LwRDA1j3LwRDBl8tgz1MTl92LwTGrLfCK75IrnrTaHx6/ZZfreEcUTwdQ8ysETwdQ8ysETwVSzR1G/n1JzZlE8EUwZPcqYWQRTVo+yqE4WrMDVj2rRokWXPb9q1Sp8fX357rvvCAkJAcButzN58mS+++47kpKSLnldRUUF8+bNY9SoUYwbNw4Am81Ghw4d+PXXX5VxmzZt4tixY4SFhTF48GDGjx/Ptm3bGDVqFPv27VNWYu3evRuAzz77DD8/PwCeffZZtm3bhk6nIzg4mOzsbEpKSmjfvj3/+c9/AFcB67bbbiMjI4OEhAQAtm3bBsALL7xAv379APjkk0+YNGkS4CrkAbz33nv85S9/YcaMGezcuZMRI0YQExPDypUruf322wEYPXq00rfqiSeeAGDdunWMHz8egEOHDtGwYcP/NQ015HA4sVhKr/q6610Gg/ob+Gke1ccTwZTRo7ebzYtgyjgvoP7nKOu8aB7VxxPBlNGjiMwrNh/h8MlC2rQIYWjfGI953vKoNZxXN08EU/OoTo8yZhbBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOrzupswep/KSUlhe7duyvFKoD+/fvzyiuvsH37dgYPHnzJ63bv3k1JSYnbKimj0Uh4eDjp6elu/FatWnH8+HGioqIA1wqskJAQvv/+e+Lj47HZbEofrPT0dLeeUevXryciIgJ/f3+lEFW99R9ASEgIPXr0YOvWrQo/NTUVnU5HTk6OW6aJEycCEBUVxenTp8nIyOBf//qXW64ePXqwbNkybDYbRqNRKVZdrDZt2lz5of5BaQ3lLq+60MBP86g+ngimDB5FNJvXGth7h1fXnqMs8yKaKaNHGTOLYKqRt+vIOWZ/caEvb0Z2MRt+Os0zQ9qR0OrqP/T1e3nqUWs4Xzd4IpiaRzl4IpiaRzl4IpgyepQxswimjB5lzCyCKatHWVRnN1M8ceIEjz76KAkJCfTs2ZPp06djs9mU82lpaUqhp1pms5nw8HBWr17NrbfeSnx8PA888AC//fab23UAJpOJMWPG0LFjR7p27YrBYKC8vJxz584p4/z9/dHpdHz00Ue0b9+efv36YTabFcapU6ew2+34+/szceJEevbsSUJCAvPnzwcgLi5OYdWrV49jx44xdOhQJVNaWhpVVVXccsstAGRkZBAZGcny5cu56667aN++PQ8//DC+vr6EhITQpEkT5d4RERFMmDCBJ598EoAdO3ZQWVnJ6dOnL/k8s7Oz+etf/wqAXq/3WvFKkyZNmv4siWg2rzWw946056hJk6a6rouLVRfrvf9c+vifIa3hvCZNmjRp0qRJkyZNmuqa6uQKq6KiIkaOHEmLFi2YNWuW0p+qvLxcWW1ksVgwm801rrXb7fz666+88MILxMTEsHz5ch577DHWrFlD06ZNsVgsGI1GkpOTAZgxYwbl5eW88sorAIwZM4YxY8aQlZVFUVERTqeTm266iYkTJ/LTTz8xd+5czp49q/gEaNy4MXv37uXuu+8mNjaWjz76CIBOnTopXkNDQyktLeXw4cM8/vjj5ObmsmrVKgBle0GLxUJMTAw7duygbdu2jBs3jnXr1lFZWUmTJk3c7vnSSy+RnZ1Nr1692LRpE9nZ2QD8+uuvREdHAzBq1Ci6detGTEwMs2fPVgp+999/P+Hh4bWeHx+fOlsHFaa60MBP86g+ngimLB5FNJvXGth7h1eXnqNM8yKSKaNHGTOLYKqVt2zT4Sue//TbYzx0Z+22B/RmZq3hvHp5IpiaR3V6lDGzCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2VQnC1arVq3CarXy/vvv1+hPlZSURERExCWvq6iooKCggJiYGB555BEAOnfuTL9+/ViwYIHSC8rhcHDs2DE2bNigrNI6evQoc+fOpby8nOTkZGw2G/7+/tx44428+uqrANx888189NFHVFVVud03LS2Nvn37smvXLjZv3kxUVBRFRUUcOXJEGVNcXIyvry/dunVjwYIF+Pj40LhxYzIzM8nJyVEyHTp0iJtuugmLxcKMGTOIjIzEx8eHvLw8t3sePux6I71p0ybA1RMLYOXKlTzwwAMAtGzZks8//5ysrCxsNht6vR6Hw8E//vGPq50SRXq9TlhTy+tBdaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9BYvM7eEo4dyiGxgIjI8qNaco6eLrnj+8KlCVf0cqwsN53cfOceRn04S2zyMjjGeb6lYLbV+L4pkah7l4Ilgah7l4IlgyuhRzZm99XrnUpLpOdYVngim5lGdPJl0TQtW9erV44033uDGG2/0iPNH+lOZzWaKi4vdrtu9ezcOh8Ntuzuj0cgdd9zB5s2bAde2gVVVVbRu3dptS8EbbrgBgFtvvZWxY8dy3333ceDAAQYMGOB2j6ZNm5KWlsaZM2cIDg4GwOl0MnXqVOXrjIwM7rrrLo4dO6bcs6SkhMTERObMmaOwpk2bxscff6xkCgoKorCwkEcffZS+ffsq4zp37sy5c+ew2WzKPUwmE7t27UKnczVc3r59O4899hiNGzdWrnvppZew2Wx0794dgCeeeIL33ntPuaY2cjicWCyltb7+epXBoP4GfppH9fFEMGXxKKLZvNbA3ju8uvQcZZoXkUwZPcqYWQTTW7ySskrmfpFKatqF1Z3to8J46t72mAKuvm9e66bBZGQXX/Z8bLMQ1fwcE8H0Ji8nv5TJH/9MSdmFD/wFBfgw6bGuNAyt3Wpbb3sUwRPB1Dyq06OMmUUwZfQoY2YRTLXzvMn09usdER5F8UQw1c4TwdQ8qtfj9SCzOeAPrzrzqGD1yy+/XPG8TqfDaDTSqFEjGjZsiJ+fH/fee68ntwRcK5buu+8+t2PV/amqezhFRUUpf67WwYMHAejYsaPb8ejoaBYvXkx5eblSpGrY0P2Tfenp6RiNRk6dOqWc379/v1tRy+l0Kiud0tLSuPnmm9Hr9QQEBCiFpOpzALm5uYrXyspKt2ISQGZmJr6+vsr48PBwCgsLadmypTKmuLgYq9WK0+nk9OnTip/w8HC3wlNaWhp6vZ6cnBy3ezz66KNYrVY++ugjpT+Xp9Iayl1edaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9JQ3Z3Vqjb55B9Lzmb06lXEPJFw1b2if1nz9y5nLnv/b7Teq7ueYCKY3eL8vVgGUlFUxaeHPvPePRI/YoM7MopmaRzl4IpiaRzl4IpgyelRjZm+/3rmUZHiOdY0ngql5VCdPJnlUsBo+fPgfXo3TvHlznnnmmRorkmoji8VCZWUljz76KHv27MFkMjFo0CDMZrPSwykxMZEPPvjArZfV7t27AThz5gy33nor+fn5xMXFcdttt+F0OikqKqJTp07odDpycnIYM2YM27Ztw8fHh6qqKsLCwhR+mzZt+Pbbb9m5cydvvfUW6enphIaGYrFYAFcvKaPRqBSZ3nzzTdauXYvVasVkMhESEkJJiWtbpF69egFw/PhxJVNAQADFxcVumVq1asWxY8fYtGkTa9asISsri9DQUOW5VPv39/cnJyeHYcOGceDAAcrKyoiPj6dRo0Zuq84efPBB5ZmMGjVKOb569Woef/zxWs+P1sOqpurCfqiaR/XxRDBl8pg8uD1zfvcJs7YtXZ8wq+3PKRFMkGteoO48R9nmRRRTRo8yZhbB9AZPVN+8f94fzzuf7bvkcTX9HBPB9BZv34m8GsWqapWUVXHoVAHto+rXiq3WzCKZmkd1epQxswimjB5lzCyCqXaet5gi+wR7y6NIngim2nkimJpH9XqUTR4VrD766CPeeustbDYbf/vb32jWrBkAJ0+e5LPPPsPf358nn3ySzMxMPvnkE5599ln0ej39+vXzyLTT6eSzzz6jTZs2zJo1i5ycHKZNm4Zef+EbYejQoSxdupTk5GSSkpLIycnhhx9+QKfTsXDhQsaPH09MTAzjxo1j5syZynV+fn6YzWaOHTtGYWEho0eP5rvvviM1NRVf3wtLaLt27ao8g969e9OnTx+WLFlSw2ubNm34f+yde1xUdf7/n3NhuAxyk4siKkIqqJhSWmpSpl3UzI10czVvWV8qat1ca7VatXLT3G2r1aQ0De92WU0tzeymoWZ5SRHvgoKgqAwwMHKdmd8f/Dg6oW7CfOzg57wej31snPM5z/N6nc/AjLz5fN7fffcdS5YsYdSoUZw9e5Z169bh5eWF01nzp+XNmjVDp9Oxbds2IiMjefLJJ/nss88oLCykqqpKYd1+++1s2LCBd955hwceeOCK92zRogXHjx8nPz+f6Oho9u/fT0ZGBn379uXIkSMArFu3TilWvfbaa0BNQW/16tXEx8fXe260HlZXV2PYD1XzqD6eCKYMHgMD4fXk3uSdKyXvvM0te3iLYF4qGeYFGt9zlGVeRDNl9ChjZhHMhvBE9c3re3sb+t7ehgVr9/PL4bN0aR/KuAc71ddmHd3o8wKQZ8m56vncggsk3NKqQfdQW+brwdQ8ysETwdQ8ysETwZTRo9oyX48+wXDjP8fGyBPB1DyqkyeTGlSw+uGHH/D09OTjjz/GZDK5nBs+fDgjR47kl19+4fnnn+dPf/oTDz/8MPPnz29wwcpkMlFRUcGcOXOUPlZ2u50pU6ZgNNZE8vf3Z9GiRbz22mskJydjNpvp0qULO3bsYNSoUYwZMwao2Y6vsLAQp9OpbNvXpEkTiouLcTqdpKSkEBsby6RJk5gxY4ZSZGratOav/Wp7Re3du5f77ruP/Px80tLSFFZYWBgAQUFBLF68mPDwcF566SXeeOMNl2fm6elJRUUFxcXFpKSkEB8fzwMPPMDs2bOVTLV9tAICAvj6668xm82MHDmS77//nszMTOWebdu2paKiAr1ez8GDB4GaflibN29Wxqxdu1a599///neX57t7925iY2Px9r72byyth9Xl5e79S93NE8GU0aOMmUUw3c0rKS0HoLS0nEJj/Xv0XSpfTwO3xoZhtZbVu0/JpZJxXgAycwrJOW+jorwSbzfMTWPIrHmUw6OMmUUw3cET2TcPYOhd0Yx7sJP2flAPhQdd/d8aLZr6qKYXmEzzcqnOFpVRUm7Hz8tASEDDf+ki43MUkTkjy0LOeRutQsx0iAxqME/GeRHBVDtPBFNGj2rNLPrzjizPsTHxRDA1j+r1eCPouvWwWrduHU899VSdYhXUFGAGDRrEe++9x/PPP4+npycPPvggc+fObcgtATAYDPj6+irFKoDevXsDUFlZqRyLjo4mNTVV+XrBggXs2LGDTp0u/vXjsmXLSExM5NChQ3h5ebn437p1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWloaPj41S20rKiqU+7Rs2VIZN3/+fMrKypSvjUYjTZo0IS0tTTmWl5fH7NmzlUy1/v74xz/y17/+VRl36tQpMjMzFU9RUVFs376dHTt2sHr1aiZPnswdd9xBamoq7dq1A6B///5s2bLlss/3n//8JxkZGS4rz65F2v6cV1Zj2A9V86g+ngimDB5Ly6qYtzbDZWuETm2CSBrcEbNXw5rO1kptma8H0x28/MIL/GPxTpetoHy9jfx99K2EBNR/q4paqTGzaKbmUQ6eCOaN7vF69M1rqMfrwRPBbCivQ+sgfL2Nl90W0NfbSGyrQNX1ApNhXkD8ZyhZnqO7eY3t85MIpoweZcwsgql2XkOZ2ucdcUy180QwNY/q5MmkBm2mWFZWxvnz5694/ty5c1y4cHG1TZMmTVy27auv7HY7RUVFSr8oQCkuXa54VqvalUoHDhxQjlVVVZGbm4vdbqe8vOav8HU6HRUVFZw4cUIZ9+OPP+J0OpVtAc+cOQPgMgZgz549AIwdO5ZevXopK5x+/PFHZUxxcTGFhYXY7XaXTAUFBSQkJNC5c2ceeeQRVqxY4ZKp1t/q1avp2rUr3bt356WXXuLYsWMunhISEiguLqZfv368/PLLACxatIgDBw6QkJAA1BT4/vCHP6DT6TAYDC4Znn32WZKTk6/4HDVp0qSpMWje2ow6TWcPnLDw/pqM38mRplr9+pctUNOv5LVFO38nR5o0abqRlTS4Y51VCB0ia375run31d9H34qvt+vfUNb+Al7T7yftM5Q6pX1+0qRJ09Wkfd7RpEnTjaIGrbC67bbbWLx4MV26dKFPnz4u57799lsWL17M7bffrhw7ePAgLVq0aMgtgZpVVF5eXi79qWbNmoW/vz/V1Rc/wI0ePZq8vDw2bdoEwIULFzAYDCxatIiQkBDatWvHihUrlJVOxcXFeHl5odPp8Pf359lnn2XChAmUlZUxa9YsgoKC0Ol0yliAzMxMpk2bpqxY2rFjh3LvyMhIpk2bhoeHB7NmzUKv1xMWFsb777+Pp6enUoCCmmKU0+nE09OTZ599lvXr1zNv3jyaNGmiZLJYav7RcO7cOQYMGECbNm1YuHChi39AKYRZLBbuvPNOvv32W9577z1atGjBvffeC0BISAjR0dGYTCY6d+7Mbbfdxvr168nMzOT48ePcdNNN9Z6fhjSavlHVGBr4aR7VxxPBlMWj1nRWvR73HT9/2b+mh5pfuhzMLiQuqmm92GrNLJKpeVSnRxkzi2C6i+fv68kLI+I5V1SG1Y3bm13qTW2ZRTLdyWse7Mvcv97FgRMWss+5d4uzS/9fbTwRTHfxRH6Gkuk5upvXmD4/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lE2NahgNWXKFEaNGsXTTz9NWFiYsuVdTk4O+fn5hIeHK/2RKioqOH36NEOHDm2waZ1Ox9ChQzl8+LDSn2rIkCF1trhzOBwuq5gA9Ho9zzzzDAsXLsRisRAbG8tTTz3F22+/7cJPSEigoqKCCRMmYDQaueeee8jPz1d6WNXqr3/9K2vWrOHTTz/FbDbj4eFBVVUV7du3Z9CgQaxatYpdu3aRmJjIm2++ic1mIz4+nkceeYTFixcrz8bpdBIXF4evry9z5szBx8cHX19fpUAGsG/fPgCeeOIJvvrqKzZt2kRYWBinTp1y8ZSSkkLnzp1p27YtX3zxBQChoaEYjUZllRnA999/z913361kb9GiBZMnT2bz5s1UVVUpq8muRXq9zi2NHG9UNYYGfppH9fFEMG90j1rTWXHMhvLyLDlXPZ9bcIGEW1o16B5qy3w9mJpHOXgimGr2mHuulCMH8wkPNhMe4ttgnsjPqDLNiwher0AzvdxGuyg1ZxbFbCjvenyGkuE5upvXGD8/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lEWNahgFR4ezrp161i5ciVpaWnk5uYCNb2jRo8ezSOPPKL0cvL09GT+/PkNdwz4+flhMplc+lMBfP755/j7+ytfL1mypM51VVVVjBkzhqSkJOX4xx9/rKyqqh3ncDiYPXu2y/XDhg2jefPmAMrY9u3bs27dOgBGjBiBh4cH27dvV8536NCBXbt20aVLF1599VWF9dZbbyljdu/eDUC7du14/fXXlTEzZsxg8eLFyrjMzEwAEhMTmThxIgBOp5NbbrkFm82Gv78/lZWV7Nixg4kTJzJmzBhuvfVWJk+ezMSJE3nhhRc4deoUERERHD9+nF27drF8+XLlfomJiXh7e/OXv/yF7OxsoqOj/8dM1JXD4cRqvfC/B0omg0H9Dfw0j+rjiWDK4lFrOqtej+FBV//Q1qKpT73nRq2ZRTI1j+r0KGNmdzJLy6pIWZ1OeubFVR5xUUE8/VAcZu/6989Rc2ZRPBFMGT3KlFnkZyiZnqO7eY3p85MIpoweZcwsgql2ngim5lGdHmXMLIIpq8cbQX5+3r951VmDClYA3t7ejB07lrFjxzYU9ZsVFRWlFG9qVVJSwrlz54iKirrqdQBZWVnExMQoxzMzMwkPD8fLy0sZd+TIEZdrnU4nWVlZ9OpV87d/rVq1wsPDg8zMTHr37q1wbr31Vpd71d6ntpfVpfesHVObpbCw0GVMixYtcDgcysq12vOXXqvT6QgMDKSsrIyWLVuSnZ1NVVVVnecQGRmpXBsREcHevXuBmm0SH3roIQ4fPkxoaCjdunW74vP7rdIayl1ZjaGBn+ZRfTwRzBvdo9Z0VhyzobwOrYPw9TZedlsbX28jsa0CG+xXbZmvB1PzKAdPBFONHueuSq/TPycjy8K7q9KZ8EiXBrpTZ2bRPBFMGT3KkPl6fIaS4Tm6m9cYPz+JYMroUcbMIphq54lgah7l4Ilgah7VyZNJDS5Y/R5KSEjgvffew2q14ufnB8CXX36JXq9XCkoAx48fZ/r06ezZswez2czAgQPx9fVlw4YNSiGpqqqKr776ioSEBOW63r17s2bNGu644w6sViuxsbEMHDiQoqIi7rzzTgBMJhNdunRh7ty5vP3223h4eGC1Wjlx4gTR0dFEREQAcMcddwDwySefsHLlSsLDwxk5ciRpaWk8/fTTAFitVgwGA1u2bKFHjx6UlZXRtWtXAgICAOjUqRNQ0+fKbDYzdepUJk6ciNlsZvDgwVitVvz9/TGZTEofq507d/KPf/yDnJyarQNqC2a158+fPw/Ak08+CdSsgDObzaxduxYvLy9atWpV7/nReljVVWPYD1XzqD6eCKZMHpMT45j7q7/Q79im5i/0G/pzSq2ZRTLdyZv2WHemLfzJ5Zcuvt5Gpj3WvUFzo+bMopiaR3V6lDGzu5ha/xzNoww8EUx38kR9hpLtObqb11g+P4lgyuhRxswimGrniWBqHtXpUcbMIpiyepRNDS5Y/fDDD3z66afk5ORgtVrr9HjS6XR8/fXXDb2Ni4YNG8aSJUtITk4mKSmJ/Px8Zs2axbBhwwgLCwNqCjODBw9Gr9fz7rvvkp+fz8yZM7nppptYuHAhQUFBtGvXjhUrVlBUVMS4ceMU/qU9oZ566im++eYbXn/9dW677TY6d+4M1BS6zpw5Q1FREQkJCcTFxfHuu+9y5MgRl35YtayKigrGjh1Lfn4+r732Gv7+/gwbNkwZ53DUVFz9/f0ZPXo0q1evZtu2bQAEBwcrYyorK7HZbAwaNIjQ0FAWL15MVVUVcXFxLs/o/fff59577yU6OppvvvmGadOmAXDmzBmgpvgFNYW3xx57DJvNxsqVK5Vj9elfBVoPq/+lxrAfquZRfTwRTBk8BgbC68m9yTtXSt55m9t6oFwqtWW+Hkx38AIDzayYPpA9h89y6KSFmNZBdG0f6gZ3NVJjZtFMzaMcPBFMtXnU+ueI4YlgyuhRlsyiP0PJ8hzdzWtsn59EMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkBhWsPvjgA958802aNm1K586dad++vbt8XVX+/v4sWrSI1157jeTkZMxmM0OGDOG5555TxqxcuRK73U5wcLCyZZ/dbmfatGk8/vjjLFy4EIvFQmxsLAsWLFC23auoqOCDDz7g0Ucf5ezZs8ybNw+DwYC3t7cyBmDjxo2cOnWKqVOnsmLFCrZv345Op8PpdNKiRQtlXEpKCkajkdjYWD777DNsNhvBwcF4e3vTpEkTZZzT6eTJJ59k7969pKSk4O3tjV6vx+FwKD2sKisrcTgcvPTSSyxdupS8vDyCgoLIz88nJCREeTZQU3T66quvXPgAhw8fBuDQoUPAxd5iOp2OiIgIjh07htVqpbS0FF/fa/9HidbD6vIyGNS/H2pj8LhuaxaHsovo0DqAgT3bNJjXGDJrHhvO8/U0cGtsGFZrWYP6Vl0qtWcWwRThMbp5E7q2D3Xb3DSGzDJ6PFtURkm5HT8vAyEB7vnQrvbn2BjmRa0etf45mkc1epQxM7j/M5SMz1HGz08imDJ6lDGzCKbaeSKYmkd1epQxswimrB5vBPn5XaceVosXL+b2229n3rx59V6RU19FR0eTmpp6xfNbtmyhT58+zJ07VznWv39/pk6dSps2bdi8efNlr9u9ezelpaUMGTKE2NhY5fiMGTPYtGmTC799+/YMHz6c4cOHAzBixAj27t3L5s2b6dy5M5WVlfz4449UV1czfPhwEhMTAfjmm294+umnOXXqFBEREVitVqBm+8BLi2733HMPZ86cUXprVVdX4+3tzahRoxg1ahRQs5Kse/fu6HQ1/7iv/f8HH3yQf/zjHwrr5Zdf5pNPPiE5ORmA0tJSJVefPn2UcV26dKGsrIzKysorPtv/JW1/ziurMeyHqkaPB05Y+NfKX5Sv048X8NG3x3lheBdiWgU10KE6M4tmyuhRxswimDJ6lDGzO5ilZVXMW5vhsr1bpzZBJA3uiNnLPZ8b1f4c1TgvonkNZWr9c8TwRDBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT8ygHTwRT86hOnkxq0GaKVquV++6777oXq36LMjMziYqKcjnm5+dHSEgImZmZV70OqHNtdHQ0eXl5ylZ6l+MnJCRgt9uVVUzZ2dlUV1fX6a0VHR3tci+73Y5OpyMtLU0ZU1VVRWFhIXr9xSkqLy+ntLSUEydOKMcyMjIAlNVQtVsQ1vasqlVtL6varQe9vb0xGo3KtoO1cjgceHh4EBTU8CKAJk3u0qXFqks1a/nlj2vSpEmTpt9f89ZmcOCExeXYgRMW3l+T8Ts50tRYlDS4Ix0iXT+LdoisKXZq0qRJkyZNmjRp0qRJk6YbVw1aYRUXF0dWVpa7vFyTjh8/zvTp09mzZw9ms5nBgwfzl7/8BZPJBNQU0/z8/Opc5+/vz48//shdd92lbAk4efJkunTpolxnMpkoKipi+vTppKWl4eHhQWxsLE6nk+LiYry8vLBarTRp0oRvv/2Wt99+m6ysLMLCwtDpdOzYsYO0tDR27NgBQN++fUlNTWXt2rXYbDaMxprHXlxcDMCFCxcICAhgwYIFfPnll+Tl5QE1K6pqV0xBTcEqODiYUaNG4XQ6KSgoQKfT4enpiaenpwvz4MGD9OnTB6vVSmVlpbJiqvZ8VFQUO3bsYOnSpXzxxReUlJRgMBioqKhocLGqIQ1fb1Q1hgZ+avW45ocrF5gBNvx4kkF31G97QLVmFsmU0aOMmUUwZfQoY2Z3MU8X2FxWVtXK4YT9WRbOW8tpFuTzu3psTDwRTDV79Pf15IUR8ZwrKsPqxu0k1ZxZFE8EU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9yqYGFaymTZvGE088QadOnRg0aJC7PP1PFRcXM3r0aCIjI5k9ezb5+fnMnDmT8vJypkyZctVrLRYLFouFSZMm0b59e5YtW8Zjjz3GmjVrXHpUPf744wC8+eablJeX88orr9RhnTt3jmeeeYYhQ4bw4osv8uOPP5KSkkJ1dTXJyclK8ay6uppPPvmESZMmERYWxvjx4wGU1VoATZo0obKyktzcXJxOJ82bN+fMmTPY7XZljE6n4+abb+abb77Bw8MDT09PwsLCyMrKoqCgwMWbt7c3FouF6upqgoKC8PLyIjs7m+zsbOLj4xk6dCipqak4HA7Ky8ux2+1Knyubrf77YOv1ugY3wr6R1Rga+KnN45FTxVc9fyiniFEqa74uw7xcD6baeSKYmkc5eCKYavN4/EzpVc/bKh1u+byg9ueotnm5Hjx3MkV9plRzZlE8EUwZPcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLD6y1/+QnV1NS+88ALTpk2jWbNmLlvYQU2RZe3atQ0y+WutXLkSm83GnDlzCAgIAGq21XvllVdISkoiLCwMPz8/SkpKXK6rqKjAYrFw8803M2bMGABuueUW7r//fhYsWMC0adPw8/OjsrKSI0eOsGHDBmXbvz179rB48WKys7MV/r59++jcuTOvvvoqALfffjsrV66kqqqKPXv2cOzYMQYOHMjmzZuZNm0aQ4YMASA1NZWhQ4eyZ88ehg4dip+fHwUFBTidTtLS0pRMTzzxBFu2bCE/P1+5586dO3nggQd48803lVydOnXiyJEjQM0KMqgpOn3zzTdEREQA8N133/Hkk0+Snp7OH/7wB6KiooiOjiYzM1MpUPXt25fs7GwOHTrE6dOnad68+TXPjcPhxGq9cM3X3egyGNTfwE+tHttF+JN+vOCK52NaBqim+bpM8yKSqXaeCKbmUZ0eZczsLqaPh+6q580mfYMaxav9Oap1XkTyAM4WlVHi5hVRas+seZTDo4yZRTBl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWjzeC/Py8f/OqswYVrAICAggICKB169YNwVyztmzZQo8ePZTCDkD//v2ZOnUqW7duJTExkaioqDq9qtLS0nA6nfTu3Vs5ZjKZuOeee9i0aRNwsXdVZGSkS48qvV6PTqfjxx9/pFu3bkRGRpKens4TTzyhjHE6nVRWVmKz2Th16hStWrXCYDBgt9u5//77lXHnz58H4NixY8o9bTYbCQkJLpkMBgOAkqlFixakp6fTv39/ZUxJSQlVVVXk5eVRWVnp4rlJkybKf9f2sLp0m8SmTZvSokUL/vrXv+Lv709YWBh33nmnkqW+0hrKXVmNoYGf2jwO7BHJfzdfeVvA/re3Vl3zdRnm5Xow1c4TwdQ8ysETwVSbxxB/b2JaBXAou6jOudjWAQT7ebnFr9qfo9rmRRSvtKyKeWszXLaB7NSmpueU2avhvW7VmFk0U/MoB08EU/MoB08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLGlSwWrJkibt8XJMyMzN5+OGHXY75+fkREhKiFKkSEhJISUlh5MiRpKenYzablWLN4MGDXa6Njo5m0aJFlJeXEx8fj16vx2q1Kn2uYmJiyMvLo2nTpgo/JiaGdevWsWbNGt555x08PDzo3LmzslopMzOTiIgImjVrxpkzZxg5ciRZWVmEh4cTEBBAYGAgubm5ANxxxx1ATV+uXr16YbPZiIuLU3zX3vOmm24iPT2dlJQUJk6ciNlsJiYmBr1ej91uJycnh+joaFq1akVeXh533303FRUVtGrVioKCAgwGA4mJiQBUVlbi5eXF5s2b2bZtG5WVlbz44oucOXOG2267jfDwcBFTp0lTvfTC8C7MWv7LZY9r0qRJkyZ16oprrOr/NzGaVKp5azM4cMLicuzACQvvr8lgwiNdfh9TmjRp0qRJkyZNmjRp0qSp0alBBavfS1ar1WWlUK38/f0pLq7pdzNgwADefvttDh48yJNPPsnJkydZtWoVgEuvqtGjR3Ps2DGcTifFxcWEhYVhMpmwWCz069eP+Ph4UlNTOXfuHDfffLPCj42NBeDo0aM88cQTlJaWsnTpUgIDAyksLFTGhYSEkJubS3V1NRMmTGDTpk3s2rWLW2+9lb179wLQrFkzAHJzc0lMTKR169bMnz9fKTbVsmJiYgDIyspyydSqVSuys7OVcQkJCSxdupSqqiqqqqo4fvw4ULPFYG32wsJCtm3bRlhYGGfOnAHg9ddfp1WrVrz//vsNmh+jUWsq92s1hgZ+avbYKSqYxS/344ttWRw4WUSH1gEM7NmmwVw1ZxbFlNGjjJlFMGX0KGNmdzFPF9g4eJnVVQAHs4s4by2nWZBPvflqf45qnRcRvNMFNpeVVbVyOGF/lqVBc63WzCKZmkd1epQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6lE2XVPB6ueffwagW7duLl//L9WOv55av349JpOJmJgYUlJSMJvNSmGnticUgMPhwOG4uDyvoqKC8vJyQkJC2L9/P1u2bCEmJgaHw8G5c+fw9fUFYPfu3UBNESk1NRWj0UjPnj1JS0tz8XH69GmMRiMGg4E333yT8PBwOnfuTFZWljKmtmDUqlUrNm/ejM1mo3Pnzuzbt8+lD9cvv/wCQNu2bZVMvXr1Ytu2bS733Lx5M56enuh0OoxGI82aNaOwsJDU1FQeeughoqOj8ff3JyoqikOHDqHX63E4HDz44IPs2rWLv/zlL7z33nvodFfvP3E56fU6YQ2ybwQ1hgZ+avb46MBObmNdKjVnFsWUyeNXO06SfuwcN7cNoV9392xh+9HXh9l75Bxd24cytG87tzBBrnkRyVQ7TwRTbR6Pnym96nlbpcMtnxfU/hzVNi8ieNdjrtWW+XowNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LomgpWI0eORKfTsXfvXkwmk/L1leR0OtHpdBw8eLDBRi+Vn5+fSyGnVsXFxfj7+wM1fa569erF3LlzlfMLFixg1qxZfP/99zzyyCNAzbaGH3/8MVOmTMHf39+lEPXBBx8o186YMYPly5fTpUsXADIyMgAYP3680hPL6XTSrVs3SkpK8Pf3p7KyknPnzuHt7c26desU1jfffMPTTz9NYGAggFLk6tOnDy+++KIy7plnnuH7779XMh09elTxUturymq1KgVBf39/cnJyyMnJwd/fn7S0NEwmEwCpqanMmDGD2bNn8/bbb+Pp6cnZs2cZO3Ys7dq1Y/LkyUyePJmsrCyGDx/O1q1bla0Kr0UOhxOr9cI1X3ejy2BQfwM/zaP6eCKYMnnMyivm1dSfqUV8vzuXOZ/8wtSx3YlsXneV7m9RRlYBbyzbo3ydfryAxesPMvnReGIjg+rtVaZ5EclUO08EU60efTyu/kcvZpOewkJbvdig/ueo1nkRwRM512rNLJKpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ9dUsFq8eDGAUgSp/fp6KyoqSunrVKuSkhLOnTunFHIu1+eqQ4cOQM1KpdqCVe3Y8PBwvLy8FO65c+fq3LOyspJWrVoBcPbsWXQ6HZmZmUrBSqfTERISQklJCVFRUWRnZ+NwOLhw4YJLMS06Ohqo2S6w9v5Go1HpaVWriIgIqqqqlEy1njIzM5Vjfn5++Pr6cuHCBVq2bMn27dsBiIyMVOYJalZlAcr2gBaLRenPdekKs9pnlJ2dfblH/5ukNZS7shpDAz/No/p4IpgyeLy0WKUwHfDKhz8x/4W768W8tFh1qWYs3c3CSfVjXioZ5uV6MNXOE8FUm8cQf286tQniwAkLjkt6Vul10CEyiGA/L7f4VftzVNu8iOBdj7lWW+brwdQ8ysETwdQ8ysETwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KomsqWHXv3v2qX18vJSQk8N5777n0svryyy/R6/X06tULqFl5VFVVxdixY9mzZw9ms5mBAwei1+tdVnxVVVXx1VdfkZCQoFxnMBg4dOgQd9xxB1arldjYWKVAVLvCqrS0lODgYObOncvbb7+Nh4cH99xzDyUlJXh7exMREcGuXbuU+wwaNIjCwkLCw8NJTEwELhaRrFYrvr6+fP/99/To0YOysjK6du2KXl9TdazNZLPZ8PPzY+rUqUycOBGz2czgwYOprq4mNDQUk8mk9LHav38/Xbp0Qa/X06ZNG/r27QtAUFCQ8v/e3t4cOHBA6Y3Vo0cP7rnnHgBatGhR7/nReljVVWPYD1XzqD6eCKYsHr/fc6pOsapWdgds23+ahC7X9nNuzQ+ZVz2/4ceTDLqjfr3VZJkX0Uy180Qw1ewxOTGOuavTSc+0KMc6tgni6YfiGvxZQe3PUc3zIoInaq7VnFkUU/OoTo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY/q9ChjZhFMWT3KpmsqWP1a1dXVlJeXK32dfq3S0lK8vLwwGht0mzoaNmwYS5YsITk5maSkJPLz85k1axbDhg1TelM5nU4WL16Mh4cH7777Lvn5+cycORMPDw8OHz7MokWLaNeuHStWrKCoqIhx48YpfKfz4p+HPvXUU3zzzTesWrUKuLgCyel0cuHCBWw2GwkJCcTFxZGamorNZqN9+/Yufp1OJxaLhVGjRpGfn8+///1vAG6//XZlTHV1NXa7HX9/f0aPHs3q1as5ceIEOp3OJVNFRQVWq5VBgwYRGhrK4sWLqaqqUnxdes82bdpw991388svv/DOO+8AcO+99wI1q8H++Mc/snz5crp27QrUrJzbunUrbdu2pUePHvWaG62H1dXVGPZD1TyqjyeCeaN7zDxdd9vYS3Usz8rgPtfWe+rIqeKrnj+UU8QolfVpEcGU0aOMmd3BDAyE15N7k3eulLzzNsKDzYSHXP4zY32l9ueoxnkRwRM912rMLJqpeZSDJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUNqiRNnz6dnTt38vnnn1/2/J/+9Cduu+02Xn755Ybcpo78/f1ZtGgRr732GsnJyZjNZoYMGcJzzz2njDGZTJSVlREUFKRs2We325kyZQo333wzCxcuxGKxEBsby4IFC2jZsiUAPj4+OBwOhg0bhsViYd68eRgMBkwmE5WVlcq2fk6nE5vNxtSpU1mxYgXbt28nMDAQm81G06ZNFZ9Qs53gXXfdxWeffYbNZiMwMJDCwkKaNWsGgMFgoLS0lGeffZadO3eSkpKCt7c3Op0OLy8vl0xVVVW89NJLLF26lLy8PIKCgsjPz1dYtff8xz/+werVq1m6dCkOhwMvLy/Ky8tdimQTJ04kKCiIOXPmADVFsxYtWvDBBx+4bCd4LdJ6WF1eBoP690PVPKqPJ4Ipi8eo5k34/irnbwr3u+aeKu0i/Ek/XnDF8zEtA1TTp0UEU0aPMmYWwfT1NHBrbBhWa1mD+lZdKrU/x8YwLxlZFnLO22gVYqZDA3rwXSp3z7WM86J5VKdHGTOLYMroUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwebwT5+QnqYfVr/fDDD/zhD3+44vn77ruPtWvXNuQWV1R0dDSpqalXPG8wGAgJCeH7779XjtUWrtq2bcvHH3982euqq6sBuO222xgwYIByPDExkUOHDikFJKfTiaenJ8OHD2f48OEAOBwOOnToQFVVFYBSRIqKiuJvf/sbf/vb3wB46623eO+99/Dx8QGgoqICgMGDB/PMM8+4+C0rK3PJ5Ovry6hRoxg1ahQAeXl59OnTh8rKSuVeUNPbasmSJcq1DzzwAMeOHVMKc1BTAAsPD8fHx4fi4mJCQ0Pp2bOn4ru+0vbnvLIaw36omkf18UQwb3SPd8SFs2jDoctuC2jQQ89Oza+ZPbBHJP/dfOVtAfvf3lp1fVpEMGX0KGNmEUwZPaoxc37hBf6xeCelZdXKMV9vI38ffSshAT7usKj656jGeRHNE8GU0aOMmUUwZfQoY2YRTBk9yphZBFPtPBFMzaMcPBFMzaM6eTKpQQWrs2fPKtvVXU6hoaHk5+c35Bb1lt1up6ioyKXP1datWwHqrB46fvw406dPZ8+ePeh0OgDS09OVglVVVRW5ubnY7XbKy8vx8vJCp9NRUVHBG2+8wYYNG7BYLEREROB0OvHw8ADgzJkzCv/ZZ58lLS0NDw8PpVB14ULNSiRPT08APvzwQ3bu3ElWVhZhYWEUFBQo52ozFRYW8uqrr7Jx40ZsNhvh4eEumVq2bElERARvvPEG//nPfzh69CghISGcPXuWmJgYl+zDhw936bN15swZli1bxogRI4iOjm7wHGjSpEnT76mXR9/K9EU7XYpWBn3N8frqheFdmLX8l8se16RJk6bGoF8XqwBKy6p5bdFO/jM+4XdypUmTJk2aNGnSpEmTJk2aNDWwYBUQEEBWVtYVzx8/fvyK/a1Eq7KyEi8vrzp9rvz9/ZVVVAAjRoxg9+7d3HLLLcyePZuVK1fy9ddfk5qaSrNmzZQ+V7UrnYqLi5WClZeXFwsXLmTIkCGEhoaycOFC4OKKqeLimn4nWVlZWCwWkpKSyMjI4KuvvnI5b7fb8fHxYdmyZXTr1o0///nPLF++XCmQXZpJp9OxcuVKRo4cCcCSJUvQ6XQuK7HuuecePvzwQ+Xr/Px89Ho9U6dOdXlGmZmZeHl5ceedd7Jx40aaNGlCVFSU4r++amgj9RtRjaGBn+ZRfTwRTJk8RrcI4MMX+5G2L48jp4ppF+HPHZ3DG8TsFBXM4pf78cW2LA6cLKJD6wAG9mzTICbINS8imWrniWBqHtXpUa2Z9x0/X6dYVavSsmoOZhcSF9W03ny1P0e1zotIngimjB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnqUTQ0qWPXu3ZuVK1cyaNAgOnTo4HIuIyODjz/+mPvvv79BBusrnU7H0KFDOXz4sEufqy1btriMO3PmDE6nkzlz5hAQEMD+/fv57rvvcDgczJ8/n+LiYmJjY3nqqad4++23Xa6tqqqiTZs2rF+/HqPRSP/+/fniiy/Izc2t4ycwMJA5c+YQHh7OsGHDWLlyJdnZ2cp5h8NBaGgomZmZpKenEx8fj5eXF5mZrttPVVdXExUVxcqVKzGbzQwbNoxly5Zx/PhxZcwLL7xAREQE8+fPJz8/H51Oh06n49ixY3Tt2hWAo0ePUlhYCMDGjRsBKCkpYe/evXzzzTd15vO3Sq/XERhorte1MqgxNPDTPKqPJ4Ipk8dBd7Z1C+dSPTqwk9uZINe8iGSqnSeCKZvHMa9+SUFxBcEBnnz4d/d91lRzZncw8yw5Vz2fW3CBhFta1ZtfK7U/R7XNy/XgiWDK6FHGzCKYMnqULfNHXx9m75FzdG0fytC+7dzGle05iuCJYMroUcbMIpgyepQxswimrB5lUYMKVuPHj+eHH35g6NCh3H333dx0001ATTHku+++IygoiPHjx7vF6LXKz88Pk8lUp8/V559/jr+/v/J1s2bNaN++PQEBAcp1drsdgAkTJpCYmAjAxx9/jE6nU641GAzY7XbeeustYmNjFd7WrVuVlVO1YyMiIpSiENSsuFq5cqWyOs3X15fy8nKee+45xowZo4z785//TGZmJqdOnSIiIgIvLy9sNhsrV650yfDpp59y7tw55Wu9Xs+jjz7Ko48+yqRJk9i/fz89e/Zk5syZJCYmYjAYmDlzJk2bNiUmJkYpxD344IP07t2b0aNH43A40OuvvRLscDixWi9c83U3ugwG9Tfw0zyqjyeCKaNHGTOLYMroUcbMIpju5L332T627T+rfH2+qIJBf11DQudmPP5g/QvJas7sTmZ40NX/0dSiqQ+FhbZ6sUH9z1Gt8yKSJ4Ipo0cZM4tgyuhRtswZWQW8sWyP8nX68QIWrz/I5EfjiY0MUoVHUUy180QwZfQoY2YRTBk9yphZBFNWjzeC/Py8f/OqswYVrMLCwvjvf//Lm2++yTfffMOmTZuAmgLMoEGDeO65567a40qkoqKi6qxOKikp4dy5c0RFRSnHMjMzefjhh12ug5oVUZden5mZSXh4OF5eXgA0adLEZTyA0+mktLSUsrIyysvLadWqFTqdThlbq9pCVUlJCVBTJAPqPKvS0lLl3hEREfj6+lJVVeVSrCopKaGiokJhXUkdO3Zk0aJFWCwWQkJCyMrKoqCggK1bt9KtWzdl3Mcff8zHH3/M+vXr693HSmsod2U1hgZ+mkf18UQwZfQoY2YRTBk9yphZBNMdvEuLVZdqy74zjBlQv9Xhl0qNmd3J7NA6CF9v42W3BfT1NhLbKtAtftX+HNU2L9eDJ4Ipo0cZM4tgyuhRlsyXFqsu1Yylu1k46e4GsUGe5yiSJ4Ipo0cZM4tgyuhRxswimLJ6lEUNKlgBhIaG8sYbb+B0OrFYLAAEBQWh0+kabK4hSkhIICUlhZEjR5Keno7ZbCYmJga9Xk+vXr2UcVarVSkYAcTHxysrnpYuXcrixYuJiYkhLy+Pfv36KePCw8PZt28fTz31FHv27MHDw4POnTtz4ULN6qLi4mLCwsLw9PQkNzeXBx98kKysLMLDwwkICMDHx4eqqioAZWXawoULmT59Ojabjbi4OPbt26ewAEJCQjh79mydTDqdzqXXFcAnn3zCBx98QHZ2NkajkXXr1uHr60tgYCAAkyZN4tlnn73ss/v73/9OeHj9+7xoPazqqjHsh6p5VB9PBFNGjzJmFsGU0aOMmUUw3cUb//bmq56f+O4PvD3+znqx1ZpZBHPaY92ZtvAnl6KVr7eRaY91b/BnOLU/RzXPiyieCKaMHmXMLIIpo0eZMq/5IfOq5zf8eJJBd9Sv96tMz1EUTwRTRo8yZhbBlNGjjJlFMGX1KJsaXLCqlU6no2nT+jdpdrcGDBjA22+/zcGDB3nyySc5efIkq1atol27di4rmex2OwsWLOD//u//APD09KRLly6kpaXRokULRowYQWpqKufOnaN///7KdbUrq37++Wcef/xxSktLWbp0Kb6+vsrKKABvb28KCwsJCQlhwoQJbNq0iV27dhEREaGMqX1u+/btIzExkdatWzN//nwqKytdMrVs2ZL9+/fXyRQcHKwUtQ4dOsTf/vY3Dh06xKBBg/D29ubYsWP88MMPDB8+HKOxZspre1RNmDCB2267DYDk5GRsNhubN2/m0Ucfrddz13pYXV2NYT9UzaP6eCKYMnqUMbMIpoweZcwsgtlQXmFp1VXPW0qqGvwZRG2ZRTADA82smD6QPYfPcuikhZjWQXRtH+omdzVS+3NU47yI5olgyuhRxswimDJ6lCHzkVPFVz1/KKeIUSp7nxbBVDtPBFNGjzJmFsGU0aOMmUUwZfUoi66pYDVnzhx0Oh1PPfUUer2eOXPm/M9rdDodycnJ9TZYX61fvx6TyURMTAwpKSmYzWZ69erF9u3byc/PV4pWBoOB6uqLf2FaUVGhrJgqKiri7bffJiYmBofDwYYNG5TizpkzZwC45ZZbSE1NxWg00rNnT9LS0lx6XVVVVWE2mzEYDLz55puEh4fTuXNnjhw5QufOnZUxUFNE2rx5Mzabjc6dO/PLL79QWVmpsGr7VP0609atWwkKCgIgODiYnJwcvL292bhxIzqdDqPRSEREBKdOnarznFq3bk2XLl2AmmJdYGAgGRkZ9X7uWg+ry8tgUP9+qJrHhvMysizknLfRKsRMhwbs0X6p1J5ZBPNsURkl5Xb8vAyEBDT8Db4xZNY8qtOjjJlFMN31PR3o63HVolVQE49691+ScV6imzeha/tQrNayBvWtulRqf46NYV40j+r0KGNmEUwZPcqUuV2EP+nHC654PqZlgGrep0Uw1c4TwZTRo4yZRTBl9ChjZhFMWT3eCPLzE9TDqrZg9cQTT2AymVRdsNqyZQu9evVi7ty5yjGr1Ur37t3ZunUriYmJAHTp0oWAgABlzO7du7HZbOh0Ol599VVl3IwZM5QeXXCxYDVp0iRiYmKAmh5WN998MyaTCS8vLyorK7HZbISGhrJu3Trl2q+//prk5GSCg4MBOHHiBAD3338/SUlJyrhHHnmEX375RVnNdf78eQDeffddpYhltVrp1q2b8nVZWRk2m413332Xfv36MWnSJPbv388f//hHZs2aRWVlJSaT6bLP7Ntvv+WVV15h48aN1/Ko60jbn/PKagz7oWoer135hRf4x+KddbZX+vvoWwkJ8HGHRdVlFsEsLati3toM9mdZlGOd2gSRNLgjZi+P393f9WBqHuXgiWCq0aO7v6fffKY3j8389orn/5Xcu8HPQIZ5Ec0TwVQ7TwRT8ygHTwRT8ygHTwSzobyBPSL57+YrbwvY//bWqnufFsFUO08EU0aPMmYWwZTRo4yZRTBl9SiLrmkzxUOHDnHw4EGl4HHo0KH/+b+DBw8KMf6/lJmZqRR6auXn50dISAiZmRc/RCUkJLBt2zasVqtyHVCn11V0dDR5eXlKr6ji4mKMRiMbNmxQxlRXV+N0OpU+UdnZ2TidTs6ePasUpQDlXrXbAmZnZ+Ph4cHmza69GWw2GwaDQRlXVFSETqfjq6++UsY4nU50Oh0hISEu/tu0cd0bOjo6mqqqKnJyclyOT5s2jdjYWHr06MGkSZP49ttviYuLu8JT1aRJ0+X062IVQGlZNa8t2vk7OWqcmrc2gwMnLC7HDpyw8P6a+q/61KRJ0+8nEd/Td8SFXdNxTZo0adKkSdP10wvDu1zTcU2aNGnSpEmTpl+r3j2sKisr+eGHH2jRooWywkhNslqtVFVVMXbsWPbs2YPZbGbw4MH4+fkp/Z4Ahg0bxpIlS0hOTiYpKYnt27cDEBcXxyOPPILFYiE2NpaCggKcTifFxcV4eXlRWlpK27ZtmTdvHgsWLMBkMhEQEIDdbleKR7X3adq0KYMGDcLhcBAYGEhFRQWAso2f1WqladOm7N69m5tvvhmHw0FoaCinTp3CYDAoXmvvOXXqVF555RW8vb3x8fFBr9cTHh7ucs/333+fbdu2cf78eQwGg7LCKycnh+joaA4ePEiLFi3o06cPLVu2ZOHChaxevRqAt956q0HPvqENu29ENYYGfprH+mnf8fN1ilW1Ki2r5mB2IXFR9e/vp8bMIpinC2wuqzBq5XDC/iwL563lNAuq32o1tWYWyRPBlNGjjJndxRT1Pf1/g+P4v8FxPPefzRRYq2jq58Fbf76z3j5rJcu8iOSJYKqdJ4KpeVSnRxkzi2DK6FG2zJ2igln8cj++2JbFgZNFdGgdwMCebf73hf9Dsj1HETwRTBk9yphZBFNGjzJmFsGU1aNsqnfBysPDg/Hjx/PSSy+psmDldDr55JNP6NChA7NnzyY/P5+ZM2ei17u+WPz9/Vm0aBGvvfYaycnJ6PV6dDod+/fv5/nnn6d9+/YsW7aMvXv31uHn5uYSFBSE3W6npKSEs2fP4uPjg5eXl8vYgoICWrRowfnz5ykuLqaysrKOX5vNhpeXF35+fhQUFHDu3Dk8PT2x2+0u9zx16hTBwcFcuHBB2f7P29u7zjZ/a9asUf67urpaKUbV9rFq164dbdq0YePGjVgsFhyOi0sUa7c7rI/0el2DG57fyGoMDfw0j9emPEvOVc/nFlwg4ZZW9ebXSk2ZRTCPnym96nlbpaPBP1vUlvl68EQwZfQoY+aGMkV/T6dOHVDva6+mG31ergdPBFPtPBFMzaMcPBFMzaMcPBFMd/IeHdjJbaxLJdtzFMETwZTRo4yZRTBl9ChjZhFMWT3KonoXrHQ6HZGRkRQWFrrTj9tkMpmoqKhgzpw5So8qu93OlClTMBpdY0dHR5OamgrAokWLeP311xk9ejRjxowB4JZbbqF3794UFRUpvaKcTidWq5UNGzYoWw+mpaUxbtw4nE4ngDI2KiqK9evXK/dLSkri+++/V84bDAZKSkp49dVXeeSRR4Ca7f969erlUogymUxUVlaydu1aJdNHH33kkqmWedddd/H+++8r1w4YMIDjx4/To0cPAFq3bs2CBQuorKzkgQceICkpiRdffBEPDw8yMjIYMKB+vwhyOJxYrRfqde2NLINB/Q38NI/144UHXf0NqEVTnwY1s1djZhFMHw/dVc+bTXpVNWn+YW8eR3OLaRfhzx2dwxvMU+u8iOSJYKqdJ4KpVo8iv6fB/bnPFpVRUm7Hz8tASEDD/2Gh1nkRyRPBVDsPtNeOLB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53Vu2AFNYWXmTNncv/999fpF/V7y2Aw4OvrqxR2AHr37g1w2RVOtaqurtnaq1Oni38RZDKZaNGiBSUlJcrqKafTiaenp0vunj17otPpqKqqAqBZs2ZA3X5SMTExfP/99/j41GyFU7tFYM+ePZUxAQEBBAUFUVZWdk2Zav21a9fO5Z7R0dEcP35c8VSrBQsW4OfnR2JiIi+++OIVn8u1SGsod2U1hgZ+msdrU4fWQfh6Gy+7LaCvt5HYVoFu8aqmzCKYIf7edGoTxIETFhzOi8f1OugQGUSwn5cqmjRnnbby+pKd1H7m+H53Lgs+P8DLo2+ldZhfg9ju8iiaKaNHGTM3lBni701MqwAOZRfVORfbKsAt39PQ8NylZVXMW5vhsn1hpzZBJA3uiNnL43f3dz2YMnp0B0977cjpUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdSggtXevXsJCAhg0KBBdO/enRYtWtTZDg/g5Zdfbsht6iW73U5hYSEjR44kPT0ds9msbF346+3zLlXtSqXly5cza9YsLBYLMTExZGdnY7fbKS8vx8vLC51OR0VFBY899hh79uzBw8ODzp0743Q68fCo+Udq7dZ6GRkZPPjgg2RlZREeHq48owsXalYieXp6AjB16lQOHz6MzWYjLi4Oi8Xi4vW3ZCovLwdg9erVrFmzhvz8fF544QUyMzMVT9HR0QDk5eWRkpJC27ZtiY+PB6CqqkrpraVJk6bfpr+PvpXXFu10KVr5ehv5++hbf0dXjU9Jgzvy/hrXX/51iKz55Z9adGmxqlZ2B0xftJP5L9z9+5jSpEml0l1pkdXVF19dV81bm8GBExaXYwdOWHh/TQYTHuny+5jS1CikvXY0adKkSZMmTZo0adKkyf1qUMFq6dKlyn9v3779smN0Ot3vUrCqqKjA6XRy8OBBnnzySU6ePMmqVaswmUzKKiqA0aNHk5eXx6ZNm4CaIpJOp+Pnn3+mX79+xMfHk5qaSklJCQDFxcVKwUmv1/Pzzz/z+OOPU1paytKlS/Hw8ED3/39DU1xcDMDp06fx8fFhwoQJbNq0iV27drmct9vteHh4sHXrVhITE2ndujXz58/Hbrcrq69+a6Za5rlz5+jYsSP5+fl89dVXZGVluZyfOXMm69evp7KykpCQENq3b89///tfzGYznTt3btCzNxq1pnK/VmNo4Kd5rL+aB/sy9693ceCEhexzNlqFmOkQ6Z7Cr1ozi2D6+3rywoh4zhWVYXXz9kru8Pf9nlN1ilW1sjtg2/7TJHRpUS+2mudFFE8EU+08EUy1ejxdYOPgyaLLnjt4sojz1nKaBfnUm+8uj5cWyGvlcML+LEuDPKp1XkTyRDDVytNeO/J5lDGzCKaMHmXMLIIpo0cZM4tgqp0ngql5VKdHGTOLYMrqUTY1qGB16NAhd/kQIoPBQExMDCkpKZjNZnr16sXWrVtdttlzOBzY7Xbl6+rqapxOJ927d2f//v1s2bKFmJgYysrKlKIVQFlZGQ6Hg27dupGamorRaKRnz56kpaW5jAMIDw/HYDDw5ptvEh4eTseOHcnIyFDOX7hwgaqqKnr27MnmzZux2Wx07tyZ3bt343C4/nb0t2QCmDRpEitWrAAgNzeX559/npkzZyrn7XY7+fn5eHh48MMPPxAWFgbA448/Trdu3er7yNHrdQ1qon6jKvdcKUcO5hMebCY8xNdtXFmbDKrVY69AM73cQqortWYWwRT1M6Sh/jJPl1z1/LE8K4P7tLvqmP8lNc+LKJ4Iptp5Iphq83j8TOlVz9sqHW75Xle7R7XNy/XgiWCqjae9dsTwRDDVzhPB1DzKwRPB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiBhWsanXkyBE2b95Mbm4uABERESQkJNTpo3Q9pdfradGihcsqMKvVSrdu3bBarcqxJUuWuFxXuwJp4sSJ3HzzzcrxsWPHsm3bNvz9/YGawlaTJk1YuHChMsbpdNKxY0eleFTbo6p37968+uqryrgVK1aQkZGh9Lqq9fP2228rfID777+fU6dOXVOm2uvvvPNOxo4dS/v27Rk7dqwyF7Xnv/rqKwA+++wzQkNDAZRCldVqxc+vfr1YHA4nVuuFel17I6q0rIqU1emkZ178K9y4qCCefigOs3f9+xsYDHI2GVS7Rxkzi2CqlRfVvAnfX+X8TeF+FBba6sWWcV5EMNXOE8FUq0cfj6vv+2c26ev9/QLq96jWeRHJE8FUK0977cjnUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUZ0eZcwsgimrxxtBfn7ev3nVWYMKVpWVlUyZMoU1a9bgdDrR62tu6nA4ePPNNxk0aBDTp0+/as+o6yndFZsp1G/slcY4nU63+bhWVlRUFACZmZnKf9d+7eHhQcuWLYGL/bUGDhzocv0777zDO++8w759+5TeWtcqraHcRc1dlV6nv0FGloV3V6W7pb+BrE0G1e5RxswimGrj3REXzqINhy67LaBBDz07NW+wXxnnRQRT7TwRTLV5DPH3plObIA6csOC45KOMXlfTmy7Yz8stftXuUW3zcj14Iphq42mvHTE8EUy180QwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoQQWrf/7zn3z22WcMHz6cRx99lFatWqHT6Th58iRLlixhxYoV+Pv789JLL7nL72+Ww+EgLy/PZbXQl19+CVBn9dDx48eZPn06e/bsUY599dVXSi+nqqoqZfvD2h5WRqOR8+fP88Ybb7BhwwYsFgsRERHY7XZlZdWFCzUrjXbu3Mmzzz5LWloaHh4e+PrWbAnn4eHh4uedd95h586dZGVlERYWRl5enkvBqjbTq6++ysaNG7HZbISHh7swWrZsSWRkJJ9++inLli0DYM6cOTRp0oTbbrtNKR726dOHtLQ0AEwmE+Xl5dRujThhwgTFW32k9bCqkdbfQD6PMmYWwVQzb+rY7rzy4U8uRSuDvuZ4Q372yTgvIphq54lgqtljcmIcc3+1yrhjm5pVxg39rKB2j2qeF1E8EUw187TXjlweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6lE0NKlitXbuWwYMHM2XKFJfjUVFRTJ06ldLSUtauXfu7FKx0Oh0mk4nk5GSSkpLIz89n1qxZ+Pv74+19cQ/JESNGsHv3bm655RZmz57NypUr+frrr1mwYAEhISG0a9eOFStWYLO5buvh4+ODl5cXCxcuZMiQIYSGhirbA/662HP8+HHOnz9PUlISGRkZynZ8l7J8fHxYtmwZ3bp1489//jPLly/Hbrcrq9ZqMzmdTlauXMnIkSOBmi0NdTodRuPFqRw3bhx///vflWJW06ZNycnJcdniMCQkhKqqKkwmE3/961+Jjo5m9OjRQM2WhUlJSfV67loPq4vS+huI4Ylgqp0ngimjR3fwAgPNfPbPwXz900n2Hj3HzW1D6Ne9tRvc1UjGeRHBVDtPBFONHgMD4fXk3uSdKyXvvM3tfRxB/R7VOC+ieSKYauRprx05PcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLCqrq52KYL8Wl27duW7775ryC3qLX9/f/r27cupU6dITk7GbDYzZMgQ1q1b59In6syZMzidTubMmUNAQADZ2dl8/fXXOJ1O5s+fT3FxMbGxsYwaNYoPPvhAudbPz4/c3FzatGnD+vXrMRqN9O/fn88//5xz584pHmoVGBjInDlzCA8PJzExkVWrVmGxWBRWZWUloaGhZGZmkp6eTnx8PEajUekLBtCkSROKioqIiopi5cqVmM1mhg0bxrJly1zGFRYWYjKZlGJXeXk5I0aMYOXKleTn5xMWFkaTJk0AePjhhxkxYoRybUhICPn5+fV+7loPq4vS+hvI51HGzCKYaucBdI8JpV/31litZQ3qw1MrGedFBFPtPBHM5ZsOc+hkER0iAxjWr70bHLrfo6+ngVtjw9z2/QLq99gYXjsyehSRWXvtyOHxbFEZJeV2/LwMhAS455cPas8sgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vBHk53edeljdcccdpKWlMXz48Mue/+GHH+jVq1dDblFvRUVFUVhYSGpqqnKspKSEDz/80KW3U7NmzWjfvj0BAQHKdVDTO2rChAkkJiYCMHPmTMLDw/Hy8gJqikx2u5233nqL2NhY5Zr169dTWFgIoGyRGBISwsaNG5V7fvPNN6xatYpTp04B0Lp1a6qrqxk+fDhPPfWUMu6RRx4hOzubU6dOERERQUBAAEVFRaxcuVIphpWUlLBs2TKlSAawZcsWevfuzdy5c2nfvj1jx45l6NChLF++nK1bt5KYmMhNN90EgNl8cYXP4cOHGTp0KOfPn2/Ak9d6WNVK628ghieCqXaeCKaMHmXMLIIpo0c1Zt51+Czvrt6vfH3iTAnrf8zhz0M60eWmUHdYlOI5NjaeCKaMHmXMLIIpg8fSsirmrc1w2ea7U5sgkgZ3xOxV/y3UL5XaMl8PpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNWgzxfHjx3Pq1CmeeeYZtm/fTm5uLrm5uWzbto3k5GTy8vIYP348RUVFLv9zh44fP87YsWPp0qULvXr1YtasWVRWVirnExIS2LZtG1arVTn25ZdfotfrOXHiBHfddRedO3fml19+UXpKAcTHx+Pr64u3tzfvv/8+Xbt2pVu3bnz00Uf06NFDGdesWTOgpj/Vgw8+SFxcHHfeeScVFRWUlJRQXl6OyWTCbDZTVVXFG2+8Qa9evejSpQtTpkzBZDIphaHWrWu2k8rKylIy9ejRg/37a34RlZmZCUBwcDA6nY41a9Zw3333ERcXx8CBA9HpdC7PNTMzk4iICF588UUA3nrrLV5++WWaNm2qsBISEtDpdHz44YfEx8fTqVMnEhISSE9Pp0WLFm6ZI02QNLgjHSKDXI51iKz5R64mTZo0adJUX11arLpU//n08sc1adKkSdO1a97aDA6csLgcO3DCwvtrMn4nR5o0adKkSZMmTZo03dhq0AqrAQMGAHDkyBG++eYbl3NOZ82SkoEDB9a57uDBgw25LcXFxYwePZrIyEhmz55Nfn4+M2fOpLy8XOmnNWzYMJYsWVKnh1VcXBwLFy5k4sSJtG/fnjFjxrBmzRqeffZZWrZsiaenJ48//jhvv/02Z86cISkpie+++4709HRycnIUD7VFnenTp9O7d2/69u3L4sWLXTx6eXlhNpvJz89nyZIljBo1irNnz7Ju3TqMRiMFBQXAxZ5Xa9eupXXr1jz55JN89tlnykqt4uJi5Z67d+/mH//4Bw888IByT6fT6VKYKy4u5osvvqC6uhqoKcLt27ePoqIihRkcHMytt97Kzp07lf5ctVsBXroCrT5qaCP1G0n+vp68MCKec0VlWN24jYisTQbV7lHGzCKYaueJYGoe1elRrZmXbjx01fMff3uU4ffWf3tAWZ5jY+KJYMroUcbMIpiyeDxdYHNZWVUrhxP2Z1k4by2nWZDP7+pRJE8EU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomxpUsEpOTkanu3qfHhFauXIlNptN6TsFYLfbeeWVV0hKSiIsLAx/f38WLVrEa6+9pvSweuihh/j000957LHHGDNmjMLT6XQsWLCAadOmARAREaGcS0lJITY2lkmTJjFjxgz27dtH586dMRgMQE2fql27drF3717uu+8+jh07xt69e5XrjcaaRxwUFMTixYsJDw/npZdeYsaMGZw5c8Yll16vp7i4mJSUFOLj4+nbty8ffPCBUrAymUzodDqCg4P5+uuvMZvNjBw5kg0bNriwHA6Hy7Z+O3bsUP67dhtCgI4dO5KRkYHRaOTChQsEBgZSUFDAtm3bsNvtSsZrkV6vIzDQ/L8HSiZRz0TWJoNq9yhjZhFMtfNEMDWPNz4v91wpRw7mEx5sJjzE939fcBkdySm+6vlD2UVued9R83MUxVQ7TwRTJo/u+P67ktSaWSTzRvd4/EzpVc/bKh3az1qV8EQw1c4TwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KogYVrJ599ll3+bgmbdmyhR49eijFKoD+/fszdepUpUcTQHR0tEsPq+3bt7No0SL69++vHAsKCqJFixZs2bJFOfbDDz9gNBoZOXIkEydOBGpWjKWkpLB582Y6d+6s9H76v//7Px5//HHl2ldeeYW9e/dSWlpKWFiYcnzdunVK3ymAOXPmUFJSAqAcj4uL46OPPlLG7N+/nw8++EBZ2aXT6ZTCXL9+/ZRxhw4d4vTp01RWVmIymfDy8sJut7Nv3z6XgmLHjh2VVV1HjhwhNTWVlJQU7r77bmXME088wZYtWygpKXF5vr9VDocTq/XCNV93o8tgUH8DP82j+ngimDJ6lDGzCKaMHt3JKy2rImV1OumZF/9aPy4qiKcfisPsfW19UNq19OfEmZIrno9pFUBhoa3eXtX8HEUx1c4TwZTJozu//0R5FMUTwZTFo4/H1f8w02zSaz9rf2eeCKbaeSKYmkd1epQxswim2nkimJpHdXqUMbMIpqwebwT5+Xn/5lVnDSpY/V7KzMzk4Ycfdjnm5+dHSEiI0qPpSteB65Z3UVFR2Gw28vLyKC8vx8vLi6NHj1JdXe0yTqfT0aZNG4VRW7Dy9natlpaXlwOQm5tLdHQ03t7eGAwGl2KV0+mkoqJCKSa1atXqsqyzZ8+6ML28vICa7fwuVUVFBQA5OTlER0crBatLi1UlJSVUV1crrGPHjgEQGxtLdXU1VVVVZGRkkJ6eDkBBQUG9ClaA1lDuKmoMDfw0j+rjiWDK6FHGzCKYMnp0B2/uqvQ6fVAysiy8uyqdCY90uSbWsL7t+OrnU1c8/8e727olvxqfo2im2nkimDJ4dOf335WktszXg3mjewzx96ZTmyAOnLDgcF48rtfV9KQN9vPSftaqhCeCqXaeCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpXVasXPz6/OcX9/f2X7vCtdZzKZ8PT0VI4lJCTw7rvv4nQ6lb5TZ86cQafT0atXL5frjUYjW7dupUuXLkrfqb179zJixAgAqqqq2L59O3Cx71TTpk05duwYb7zxBhs2bMBisRAREUF5ebmy5V7tVn9ZWVk8++yzpKWl4eHhgb+/Px4eHjgcNS/uli1bAvDRRx8xZcoUsrKyCAsLU7YDrL1nkyZNOHXqFK+++iobN27EZrMRHh6OTqdTeovV9uBKT0+vs1JOp9MRHh5+9Um4irQeVnXVGPZD1TyqjyeCKaNHGTOLYMro0V08EX1Q/jK0M29/su+yxxv6PqzW5yiSqXaeCKYsHrU+RJrHhig5MY65v1qd17FNzeo87Wft788TwVQ7TwRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBSt3atiwYXzwwQdUVFTw008/UVlZSVFREdHR0S5b+o0YMYKdO3fi5+fH7Nmz+emnn5g3bx7r1q2jY8eOtGvXjhUrVtQpmEVERPDzzz+zcOFChgwZQmhoKAsXLkSn07msgNLr9Zw5c4aysjKSkpLIyMjgq6++wmQyKWNqVzytWrWKbt268ec//5nly5dTVVXlcs9mzZpx6tQpVq5cyciRIwFYsmQJer0evb7mm6VTp0506tRJ6fu1d+9edu3aRVVVFWFhYXVWe/1WaT2srq7GsB+q5lF9PBFMGT3KmFkEU0aPDeWJ6IPS9/Y29L29DQvW7ueXw2fp0j6UcQ92aojNOlLbc7weTLXzRDBvdI9aHyJxTBk8BgbC68m9yTtXSt55m9b/TKU8EUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoURas/Pz8lP5Pl6q4uNhl673LXVdZWUlFRYWyysrf35/Ro0cze/ZsXn75ZcxmM0FBQbRt29bl2tpVTN27d6d37940b96cefPm4XQ6mT9/PsXFxcTGxvKPf/yD5557TvHRpEkTHA4Hbdq0Yf369RiNRvr3789XX32F3W5X+N7e3pSWlhIYGMicOXMIDw9nzJgxpKamKlv+1TKDgoLIzMwkPT2d+Ph4goOD2bdvn3Lex8cHh8NBVFQUK1euxGw2M2zYMFasWKEUtwwGA++99x7vvPMOq1ev5uzZszRt2pSCggLOnDlDeno6cXFx1zw3Wg+ry8tgUP9+qJpH9fFEMGX0KGNmEUwZPZ4tKqOk3I6fl4GQgPp/2BTZB2XoXdGMe7ATVmtZg3qpXCq1z4sIptp5IpiyeNT6EGke3cHz9TRwa2yY9rNWZTwRTLXzRDA1j+r0KGNmEUy180QwNY/q9ChjZhFMWT3eCPLzu8F7WEVFRdXpVVVSUsK5c+dc+k5d7jqArKwsYmJilOOlpaW0aNGCb7/9FoAXXniBI0eOuFwbFhZGfn6+cl2rVq0wGo1UV1czYcIEEhMTARRG7b1qVzS99tprdOvWTeHt3r2b06dPK1/XFtA2btyoHLNaraSmplJaWvOXoREREQD06dOH119/XRn38ssvs2/fPuVetSu3VqxY4dKHau3atS6rsUJCQpg+fTpjx47lgQce4K677mLUqFEAZGdn16tgBVoPq6upMeyHqnlUH08EU0aPMmYWwZTBY2lZFfPWZrhsI9apTRBJgzti9vK4Zt716IMiw7xcD6baeSKYN7pHrQ+ROKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHuXgiWBqHtXJk0mNcjPFhIQEtm3bhtVqVY59+eWX6PX6On2nLlV8fDy+vr5s2LBBOVZVVcVXX31FQkKCC//QoUOcOHFCOXbkyBGqqqq48847gZq+U7fffjseHh4uxbP169cTHR2tFJdqe20dPXpUGVNcXMzp06eprKykvLwcqOmPVVZW5pKptnhV23eq9v9PnXJttH7gwAEAcnJyADCbzS7HoaZIV1JSoqzWuvS5HTlyhOTkZJfjtf2yNGnSpEmTJtk0b20GB05YXI4dOGHh/TUZ9WYmDe5Ih8ggl2MdImuKYJo0aRIr7ftPkyZNmjRp0qRJkyZNmhqHGuUKq2HDhrFkyRKSk5NJSkoiPz+fWbNmMWzYMJe+U3/84x85fPgwOp0Os9nM4MGDGTduHCkpKQQFBSl9p4qKihg3bpxy3b333kvTpk0ZOHAgAC1atKCkpISoqCg6d+7s4iMtLY0PPviA5cuX06pVKw4dOsRbb73l4len0/Haa68xffp0goOD8fHxwdvbm8rKSoqLi/Hy8sLDo+Yvtnv06IFer6dly5bk5+cTHh6uFJlq+2Pt2LGDjh074uPjQ3h4uFIMqz3v7e2Nt7c3L774IgMGDGDBggWYTCZCQ0OxWC7+Au5Pf/oTR48eJTExkXHjxvHLL78A0KVLF5ec16qGNiC+EdUYGvhpHtXHE8GU0aOMmUUwZfF4usDmsrKqVg4n7M+ycN5aTrMgn2vm+vt68sKIeM4VlWF1wzaDtZJlXkQz1c4TwZTJo6jvv0u9qS2zSKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNjbJg5e/vz6JFi3jttddITk7GbDYzZMgQnnvuOWVMcXExBw4cQKfTMXfuXPLz85k5cyaDBg3imWeeYeHChVgsFmJjY1mwYIHLiqLU1FSKioqIjo7m5MmT5ObmAris3qqqquI///kPRqMRk8lEeXk5R44coV27dvTv318Zl5ubi9PppF27dpw+fZqCggLy8/N56KGHWL16tTKuqKgInU5H69atycnJ4eTJk5hMJiIjI5UxNlvNfunNmzfHbrdTUFDAkSNHuP3229m2bZvLMwoNDaVr164sXLgQnU6Hh4cHDz/8MB988IFLhsrKSpfeVgB/+9vf6js16PU6tzSuvlHVGBr4aR7VxxPBlNGjjJlFMG90j8fPlF71vK3S0aD3OVHvkTf6vFwvptp5IpgyeRT5GVWtmUUyZfQoY2YRTBk9yphZBFNGjzJmFsFUO08EU/MoB08EU/OoTp5MapQFK4Do6GhSU1OveH7lypV4eHjw3XffKX2c7HY7r7zyCt999x1JSUmXva6iooL333+fcePGMWHCBAAqKyu5+eab2blzpzJu48aNHD16lKCgIBITE5k4cSJpaWmMGzeOffv2KSuUdu/eDcAnn3yi9Kn661//SlpaGjqdDn9/f86cOUNpaSlxcXF8+umnQE0Bq0+fPpw4cYIuXboAkJaWBsCkSZO4//77Afjoo4+YNm0aUFPIg5ptCC9cuEB4eDi33norERER7N+/H6fTqYzJzc3l0KFDvP3228ycOZMxY8bw2muvATWFLJvNpmwteC1yOJxYrReu+bobXQaD+hv4aR7VxxPBlNGjjJlFMBuDx/lr93M4p5jYVv6MG9SpXgwfD91Vz5tNegoLbfVig5zzIqNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ422YPW/tGXLFnr06KEUqwD69+/P1KlT2bp1K4mJiZe9bvfu3ZSWlrqskjKZTISEhJCVleXCv+mmmzh27BhRUVFAzQqsgIAANm/eTOfOnamsrFT6YGVlZRETEwPAgAED+PzzzwkLC8PLy0spRF3aXyogIICePXvy/fffK/z09HR0Oh35+fkumaZMmQKgjIuKiuL8+fMsXLiQjz76SCnsZWZmKmNOnTpFVVWV0ruqtlgFMGrUKG6++WY+/vjj3/Ko60hrKHdlNYYGfppH9fFEMGX0KGNmEUw1etyanseCLw4pX58tLGPz3jP834Ox3N6h+TWxQvy96dQmiAMnLDicF4/rdTU9b4L9vNySX4Z5Ec0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jHDwRTM2jOnky6YYtWGVmZtK3b1/Gjh3Lnj17lB5WISEhZGZmXvU6gO+++46nnnpK2TawVatW/Pzzz5w9e5bQ0FAyMzMxGAwAvPLKK8ycOZN77rmH1q1bK4zs7Gzsdjsmk4mxY8dSWlpKeHg4gwcPBiA2Nla5p6+vr7K9X3l5OV27dsXDw4Pq6mruvPNOAE6cOEFoaCizZ8/mrbfeUjJ5enri7e1NREQEAPv27cPpdFJZWcnw4cPx8PDAZDKRlpbG008/7XLvy+mVV14hLi6uIY9fkyZNmjRpum66tFh1qeatPXjNBSuApMEdeX9Nhksvqw6RQSQN7lhvj5o0adKkSZMmTZo0adKkSZMmTZqurhu2YFVcXMz69evp0KEDs2fPVnpY6fV6iouLr3id1WrFYDAwd+5cJk6cSPv27Vm2bBmbN28G4Nlnn+XZZ58lNzcXi8WCv78/M2fOpLy8nDfeeIPz589z+PBhxQPUbLFXVFTEn/70J6xWK++88w4APXr0UO4JoNfrMZlMPProo3zxxRdkZWWh0+mU7QWLi4vx8PCgvLycu+66i+joaJYsWUJlZSXx8fFKhtzcXAwGAyaTiaFDh7J+/XrOnj2Lv78/w4YNA2q2DYSa1VgzZswA4JFHHgGgY8eOdOxY/1/KGY1aU7lfqzE08NM8qo8ngimjRxkzi2Cq1eP7a9Kvev7D9Qd44sFr2x7Q39eTF0bEc66oDGu5HT8vAyEB7tl/WpZ5EckTwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsumGLVjVrjCaM2eOSw+rKVOmUFZWdsXrqqursdvtPP7444wZMwaAW265hTvvvBOLxYLBYCA5OVnZvm/JkiW0b98eqCkCjRs3Dp3Otf/FTTfdxKBBg1i+fLlS5CouLqZp06YAXLhwgdLSUiZOnMi+fftYsGABtau3fp3Jbrfzz3/+k/nz57N161aaNGmCxWIhODgYqNlW8OTJkzz//POcPXuWNWvWUFRUhF6vJyoqiiZNmgAX+2Hdd999So8sd0iv1wltaN3Y1Rga+Gke1ccTwZTRo4yZRTDdycs9V8qRg/mEB5sJD/GtF+PoKetVzx/OKa73+5LI9zM1z4sopoweZcwsgql2ngim5lEOngim5lEOngim5lEOngimjB5lzCyCKaNHGTOLYMrqURbdsAUrvV5PeHh4nR5WU6ZMUVY0XU61q6L69u2rHDOZTMTExLBt2zYWLlyIl5cXt912G3a7XSlWQU0PK4PBQGRkJAA+Pj4AxMfHk5SURFJSEgArVqxg2rRpVFVVARdXWP3xj3/kiSeeUHj3338/p06dqpPpwQcf5MEHH1Su7datm8JYtGgRer2ewYMHYzQaeeqpp3jllVf49ttv0el0VFZWYjKZmD59OgAeHh4uz2P8+PG0bt36tzziy8rhcGK1Xqj39TeqDAb1N/DTPKqPJ4Ipo0cZM4tgupNXWlZFyup00jMvbrkXFxXE0w/FYfb2uCZW2wg/zhZe+Q9R2rf0p7DQVi+fss2LKKaMHmXMLIKpdp4IpuZRnR5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53dsAWry+nXK58aOvZKY5xO52WP18fHtbIyMzM5efKkst3gpdq9ezf//e9/+dOf/kRWVhYA//nPf/jPf/6jjHnnnXd455132LdvH56enr/Z56XSGspdWY2hgZ/mUX08EUwZPcqYWQTTHby5q9I5cMLiciwjy8K7q9KZ8EiXa2KNG9iRren5Vzw/dkCHBvuVZV5EM2X0KGNmEUy180QwNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7Lohi1YORwOcnNzGTlyJOnp6ZjNZmJiYoCL/ZsuJ39/fwD+9a9/kZ2djcViISYmRinwFBcX4+XlhdFo5Pz58zz22GPs2bMHDw8POnfujN1uV1ZWXbhQs9IoLS2NBx98kKysLMLDw/H1rdn2yMPDw8XPc889x+HDh7HZbMTFxXHq1Ckcjosv7N+S6YknnuChhx5i2bJlbNy4UbnWbDaTkpKirP5avHgx77//Ps2aNWPXrl3k5ORgt9vR6/VMmjRJ8VYfaT2s6qox7IeqeVQfTwRTRo8yZhbBdBfvdIGN/VmWOscdTtifZeG8tZxmQT7XxHxycAfeW3Pgsscb8p4k07yIZMroUcbMIphq54lgah7V6VHGzCKYMnqUMbMIpoweZcwsgql2ngim5lGdHmXMLIIpq0fZdMMWrKCmZ9XBgwd58sknOXnyJKtWrcJkMuHtfXEPydGjR5OXl8emTZsAMBqN6HQ6duzYQb9+/YiPjyc1NbXONoLe3t7o9Xp+/vlnHn/8cUpLS1m6dCkeHh5Kn6ha5ebmEh0dzYQJE9i0aRO7du1yOe/j44OHhwdbt24lMTGR1q1bM3/+fKqrq9HrXV/c/ytTdHQ0hw4dYuPGjTz88MNER0fz3nvvYbVa2bp1K7fddhsAt912Gw6Hg//7v//j4YcfZsqUKTz22GM4HA6ysrLq3Pe3SuthdXU1hv1QNY/q44lgyuhRxswimA3lHT9TetXztkrHNb+PDExoy8CEtryzcjf7jp2n803BjB8W3xCbLpJhXq4HU0aPMmYWwVQ7TwRT8ygHTwRT8ygHTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KItu2IKVp6cnlZWVxMTEkJKSgtlsplevXmzduhWj8WJsh8OB3W5Xvvbx8cHpdHLrrbeyf/9+tmzZQkxMDBcuXKC0tFRZgVV7bbdu3UhNTcVoNNKzZ0/S0tKUbfxqxzZr1gyDwcCbb75JeHg47du35/Dhw8p5g8FAVVUVPXv2ZPPmzdhsNjp37syuXbtctuX7rZn+85//8MADD/D6668DcPToUdauXcsHH3zA+PHjMRgMVFdX89JLLzFq1Cief/555dqmTZtSVFRU7+eu9bC6vAwG9e+HqnlUH08EU0aPMmYWwXQXz8fj6tvimk36evecGjsgVvFYX8alkmleRDJl9Hi2qIyScjt+XgZCAtzzDxW1ZxbBVDsParYzzTlvo1WImQ6RQQ3myTgvIphq54lgah7V6VHGzCKYMnpsDJnd/R4I6n+OjWFeNI/q9ChjZhFMWT3eCPLz03pYYTAYCAoKYunSpcqxvLw8+vTpQ2VlpXJsyZIlLtdVV1cDMGLECAYMGKAcT0xM5NChQ3h5eQE1vaU8PT1ZuHChMsbhcNChQweqqqqAmkIVQKdOnXj33XeVcW+99RaHDx9Wtg6sqKgA4NVXX6Vly5bKuN69e1NWdrGR/G/JlJOTw4kTJ1yKUDNnzsRut7N27Vry8/MJDw9n27Zt5ObmMmrUKGXc4cOHGThw4NUe62+Stj/nldUY9kPVPKqPJ4Ipo0cZM4tgNpQX4u9NRIiZU+fqFpRahpoJ9vNSXc8pGeblejBl8FhaVsW8tRku2152ahNE0uCOmL3qv93ypVJb5uvBVCMvv/AC/1i8k9KyauWYr7eRv4++lZCAa9vW9HKScV5EMNXOE8HUPMrBE8HUPMrBcwdT9HsgqP85qnFeRPNEMGX0KGNmEUxZPcqiRluwOn78ONOnT2fPnj2YzWYGDx7MX/7yF0wmE1CzdV5RURFWq5Xa/k5bt24FalYc3XXXXVgsFmJjY5k8eTJdunQBUFYq/fzzz2zYsIG0tDSMRiPl5eXY7XbKy8vx8vJCp9NRUVHBypUrWb58OVlZWQQGBuJ0OpX+T2fOnAEgKyuLN954g7Vr12Kz2ZSiV22Pq9pVVGvXrmXnzp3s2bMHb29vioqKlLGXZlq8eDHLli0jLy+PwMBAACV3ZmYmAJ999hlvvPEGZ8+eJSwsTOmFVVpasxXT3r17CQgIID09nVGjRik9rADuv/9+902UJk2aNGnS9CtdrlgFkHO24auiNGn6PTVvbQYHTlhcjh04YeH9NRlMeKTL72NKkxD9+hd1AKVl1by2aCf/GZ/wO7nSpEmTJk2axEt7D9SkSZMmTSLVKAtWxcXFjB49msjISGbPnk1+fj4zZ86kvLycKVOmAFBZWYmXlxfJyckkJSWRn5/PrFmz8PT0ZN++fUyaNIn27dszYcIEhg0bxqZNm2jZsiUXLlzAYDCwYsUKgoODeeKJJ/juu+/Yt2+fcu/agpXZbGbq1Kn07t2bvn37snjxYgAKCgqUsVBTRMrOzmbUqFGcPXuWdevWAZCfnw/UFKJ8fX2ZPXs2rVu35sknn+Szzz7DYrG4rLCqrKzEaDTyj3/8gwceeMDlnrWsgwcPKv/fr18/AgIC2LJlC7t37wYuFqx++OEHrFYrEydOZMiQIWzevJns7GwAwsPDGzQ/DWlwf6OqMTTw0zyqjyeCKaNHGTOLYLqLt3Tjoaue//jbowy/t3292GrNLJKpeVSPx9MFNpeVVbVyOGF/loXz1nKaBdX/r47VmFk0U628fcfP1/lFXa1Ky6o5mF1IXFTTerFlnBcRTLXzRDA1j+r0KGNmEUwZPao1s8j3QFD/c1TrvIjkiWDK6FHGzCKYsnqUTY2yYLVy5UpsNhtz5swhICAAqCn6vPLKKyQlJREWFoZOp2Po0KEcPnyY5ORkzGYzDz30EIsXL6Z9+/aMGTMGgKioKPbs2cOCBQuYNm2acg+n04nT6SQlJYXY2FgeeughVq9ezcGDBxW+j48PRqORXbt2sXfvXu677z6Xws+lCgoKYvHixYSHhzNhwgT+/e9/s337doYOHQrUrOwyGAwUFxeTkpJCfHw84eHhbN26lfz8fOWeJpOJJk2a8PXXX2M2mxk5ciTLli0jIyMDgCZNmgA1WxsuX76cJk2aEBUVxQMPPMDnn39OZmYm8fHxNGnSBIfDgdPpZPny5TgcDuLi4khPT+fjjz/m4Ycfrtfc6PU6AgPN9bpWBjWGBn6aR/XxRDDdycs9V8qRg/mEB5sJD/F1G1fNmUUx1ezxqx0nST92jpvbhtCve+t6c47kFF/1/KHsoga/j8g0L6J4Ipg3usfjZ0qvet5W6WjQa3v34bMc/vEkMa2D6No+tN6cX+tGnxcRvDxLzlXP5xZcIOGWVg26h4zzIoKpdp4Ipoyf8UQw1c4TwdQ8ysFrKPN6vAeC+p+j2ublevBEMGX0KGNmEUxZPcqiRlmw2rJlCz169FCKVQD9+/dn6tSpbN26lcTERPz8/DCZTKSmpipjtm/fzqJFi+jQoYNybNmyZcyYMYNNmzYB4Ofnh91up127dspKKICPPvqI1atXs2fPHu666y6aNGlCdnY2kyZNUopftT7Onz/PqVOn8Pf3B2qKX+vWrVO+PnHiBP/+9785evSock+bzcadd97J3LlzFdbMmTPZunWrksnX15eioiJmzpxJv379lHEff/wxZ8+epbKykoiICAA+/PBDoqKilDGffPIJn3/+udLrKiYmhrS0NFasWMHEiRMZPnw4Pj4+TJ48WdlWsD5yOJxYrRfqff2NKoNB/Q38NI/q44lgupNXWlZFyup00jMvriiIiwri6YfiMHvXv1eLmjOLYqrZY1ZeMa+m/kwt4vvducz55Bemju1OZHO/a+a1a+nPiTMlVzwf0yqAwsL6bQ0o07yI4olgyuLRx0N31fNmk75er+18ywVe+fCnOn0ipj3WndDAhq3YkmFeRPDCg67+j88WTX1U83NMBFNGj7Jlbiyf8UQw1c4TwdQ8qtOjWjOLfA8E9T9Htc6LSJ4IpoweZcwsgimrxxtBfn7ev3nVWaMsWGVmZtZZAeTn50dISIhSbImKiqpTeDlw4AAAXbt2dTkeHR3NokWLKC8vV4o8oaGuf7malZWFyWRSVk+Fhoayf/9+l6KQ0+nk/Pnzisfbb78dvV6Pt7e3UqyqPQdw7tw5xWtVVRUtWrRwuWdubi4eHh7K+JCQEIqKimjTpo0ypqSkBJvNhtPpJCcnR/GTmZnp4m379u0AdOvWDYC2bdsCNX2zDAYDf/rTn1izZg2AUtSqr7SGcldWY2jgp3lUH08E0x28uavS6/Rqyciy8O6qdLf0alFjZtFMNXq8tFilMB3wyoc/Mf+Fu6+ZN6xvO776+dQVz//x7rYNfgYyzItongjmje4xxN+bTm2COHDCgsN58bheBx0igwj286oX+9fFKqjZcmfawp/c0ifiRp8XEbwOrYPw9TZedkskX28jsa0CVfdzTARTRo+yZG5sn/FEMNXOE8HUPMrBayjzerwHgvqfo9rm5XrwRDBl9ChjZhFMWT3KItUVrI4fP8706dPZs2cPZrOZwYMH85e//AWTyaSMsVqt+Pm5/lW30+nEbrezdOlSFi9eTFBQEPv373cZW9vHadOmTcyYMQMPDw/uueceunXrhtPppLi4mPj4eHQ6HSdPnuTBBx8kKyuL5s2bY7VaCQ4OVvpSdejQgW+//ZZly5YxefJkbDYbbdq0wWq1AjX9q0wmEyEhIRQWFjJ27Fglk7+/P0FBQZSU1PyF+R133AHAzz//zH333UdeXh6tWrUiJycHPz8/5Z5RUVEcPXqUF154gePHj1NWVsbkyZPR6XQu/oODg3nppZd48cUXKSsrIywsjNzcXIKCgpRC1R133IHRaGTRokUEBQURHx+P2VyzTU379vXrHVIrrYdVXTWG/VA1j+rjiWC6iyeyV4taM4tkqtXj93tO1SlW1crugG37T5PQpcXlB1xFfxnambc/2XfZ4w15D5FlXkTyRDBl8picGMfcX61K6NimZlVCfV7bWq8k9fKmPdadaQsvv/JNTT/HRDBl9ChT5sb0GU8EU+08EUzNozo9qjmzqPdAd3psLDwRTM2jOj3KmFkEU1aPsklVBavi4mJGjx5NZGQks2fPJj8/n5kzZ1JeXs6UKVOueu38+fMpKCigS5cu/OUvfyE1NZXNmzczbtw4xo8fT35+Pj/88AMAZ86c4c0336S8vJy//e1vysoiAE9PT8xmMzk5OXTt2pXnnnuOTz75hMLCQpo2vfgLge7duwOwefNmHn30UXx8fPjwww8xmUwuK5Tatm1LWloaR48eJTk5mV27dvHdd98RGRmpFKyaNWsGwMGDB+nTpw9Dhgxh0aJFVFRUuNwzPj6ejRs3cvLkSaKiosjIyGDOnDnce++9fPnll8q4nj17snbtWvr370+nTp1ISUnB4XAQGxurjAkODsbLy4vS0lJuueUW4uLiWL58OVCzkqu+0npYXV2NYT9UzaP6eCKYDeWJ7tUC6st8PZhq85h5+spb9wEcy7MyuE+7a+b2vb0NfW9vw4K1+/nl8Fm6tA9l3IOd6muzjm70ebkePBFMGTwGBsLryb3JO1dK3nlbg/u+aL2S1MsLDDSzYvpA9hw+y6GTFq23mEqZaueJYMr4GU8EU+08EUzNoxw8dzBFvweC+p+jGudFNE8EU0aPMmYWwZTVoyxSVcFq5cqV2Gw25syZo/SnstvtvPLKKyQlJREWFgbUbP9XW+wBqKio4P3338fHx4du3brRo0cPbrnlFvr27Ut+fj7JycmYzWZatWrF0aNH+ec//0lMTAwA7733HocPH0an07n0nPLy8uL06dO89dZbxMbG0qtXL3bt2kW7du2UMQAtW7bkv//9L0ajkfvuu4+vvvoKQGGVl5crX7/zzjuEh4fzhz/8gTVr1hAYGKhk0Ol0+Pn5sW/fPrZv366slDp+/LjCqt0KMDo6moyMDAAGDRrEXXfdxZdffqmM++c//0n37t2ZP38+GzduxOl00rNnT3bt2oXdbsdgMLBr1y5KS0vp2LEje/bs4dtvvyUoKAiAjIwMysrK8Pa+9m8srYfV5dUY9kPVPKqPJ4LpLp6oXi2g3swimWeLyigpt+PnZSAkwD0fatzhMap5E76/yvmbwv0atEf90LuiGfdgJ6zWsgZxaiXja0fzqE6Pvp4Gbo0Na/BrW+uVpG4eQHTzJnRtH6ran2MimDJ6lClzY/qMJ4Kpdp4IpuZRnR4bQ2Z3vweC+p9jY5gXzaM6PcqYWQRTVo83ghptD6stW7bQo0cPpVgF0L9/f6ZOncrWrVtJTEwE6van2r17N6Wlpeh0OqVvk8lkYsCAAWzatIm9e/cCMHbsWI4ePepyzzVr1nDzzTdjMpnw8vKisrKSCxcuEBoayubNm5VxX3/9NVu3biU4OBiAEydOADBkyBCSkpKUcY888gi//PKL4qO2p9Xy5cuVgpLVauWzzz5Tvs7JycHpdBIZGcnHH3+ssObNm8ebb75Jq1atlNwATzzxBFarlcmTJ/Pss8/yxRdf4OHhQcuWLZVrhw4dyvHjx1m6dCnz58/n0KFD/PTTTzgcDgwGA1lZWQBK4QsgPz8fgNOnT/Piiy/y1ltvXXW+riRtf84rqzHsh6p5VB9PBLOhPFG9WtzpUTTPHczSsirmrc1w2XqnU5sgkgZ3xOxV/6bml6ohHu+IC2fRhkOX3RbQoIeenZpre9SrhKl5vDF5Wq+kxsETwdQ8ysETwZTxM54Iptp5IpiaRzl4IpgyepQxswimjB5lzCyCKatHWaSqzRQzMzOVokyt/Pz8CAkJcSlQJSQksG3bNqVfVO05vV5Pr169lHHR0dHk5eUpq5yKi4sxGo1s2LBBGVNdXY3T6VRWO2VnZ+N0Ojl79qxSlAKUe0VERCjjPDw8XIpaADabDYPBoIwrKipCp9MpK6+gZnWWTqdTtt6r9X/kyBHlPgAFBQUAREZGAjWruSIjI122/wNYv349PXr0cOnzNW/ePFJTU3n11Vfx8PBg0aJF/OlPf8LDo+YXoHfeeSd9+vQhJCSEadOmMW/ePAYMGABAYmIiycnJaNKkSdPVlDS4Ix0ig1yOdYisKbZo+m2atzajTlPzAycsvL8m4wpXXH+9PPpWfv1HMAZ9zXFNmjSJ199H34qvt+vfmPl6G/m79j2oSZMmQdI+42nSpEmTJk2aNGn6vaSqFVZWqxU/P786x/39/SkuLla+HjZsGEuWLCE5OZmkpCS2b9+uHK/dNhDggw8+wOl0ctttt+Hr60tlZSWxsbEsXLiQoKAg2rVrx4oVK7Db7UrxqPY+fn5+DBw4EIAWLVooWxDWbptntVoJDAxk9+7ddOzYEU9PT2XLQYPBoHgoLS2ldevWTJkyhalTpxIcHIyPjw8Gg4Hw8HCXezqdTnr06IFer6dly5bk5eUBYDRenKY//vGPzJo1iy+++AKAUaNGkZWVxdKlS5Ux69at48033wRg8uTJAHTs2JEBAwZgsVgICgqivLwch8NBaWkp06ZNc3nezz//vJKzPmpok80bUY2hgZ/mUX08EUx38vx9PXlhRDznisqwunE7OzVndidTZFNzd3kEiG4RwIcv9iNtXx5HThXTLsKfOzqHN4hZK7XPtVpfOyJ5IpgyenQnr3mwL3P/ehcHTljIPmejVYi5zi+S6yNtXtwjzaN7pHaPsmVuLJ/xRDDVzhPB1Dyq06OMmUUw1c4TwdQ8qtOjjJlFMGX1KJtUVbD6rfL392fRokW89tprJCcno9fr0ev1TJo0SRlTXFzMqVOnAJg+fToVFRW8/PLLXLhwgWeeeYaFCxdisViIjY2lc+fOeHl5udyjpKSEtm3bcvLkSXJzc5WeVbVyOBwUFhbSvHlzdDodZ86c4ciRIwQHB7sU15xOJydOnKB9+/acPn2agoIC8vPzCQ4OdlkRBTV9rFq3bk1OTg4nT550KVTVZvrwww+JjIykoKCAkpISjh07RkJCAl27dlXGffTRR3WeWUZGBn/605+YMWMGiYmJ2Gw2Tp8+jdFoZNy4cXh6erJ48WJKS0uZP38+f/vb365xVmqk1+sa3IT3RlZjaOCneVQfTwTTnTxR3/NqzuwO5vVoag7uyz3ozrZu4VxOap9rtb12rgdPBFNGj+7k9Qo00+t/D7tmafOiTqbmUQ6eCKaMn/FEMNXOE8HUPMrBE8GU0aOMmUUwZfQoY2YRTFk9yiJVFaz8/PyUlUyXqri4WOn3VKvo6GhSU1MBWLZsGa+++qpLUWnlypUYDAYcDgf33HMPXl5ezJ07l8zMTP7whz+49J0aNmyYwvf2rnkx3X///UoPp8rKSvr160d+fr4yrqCggKqqKhYsWKBsY5iWlsa4ceNcvOr1eoKDg1m7dq1y7K9//SsbNmxQxjkcNftZPvHEE8pWfEVFRSQkJAAo41auXInNZuPzzz/n22+/ZfLkybzwwgv861//Ij8/X1lddu7cOR544AFllVVtnqysLNq2bav4OnLkCCkpKdx9990ADB48mPvuu4/FixeTnJyMr6/vlSfrCnI4nFitF675uhtdBoP6G/hpHtXHE8GU0aOIzOu2ZnEou4gOrQMY2LNNvRgim5pD43iOavcoY2YRTBk9ish8tqiMEjevdNDmpeGS0WNGloWc8+5b7Qfqf46NYV4ag0fttaPOedE8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9XgjyM/P+zevOlNVwSoqKsqlVxXUrHQ6d+5cnd5Wv74OICsri5iYGAC2bNlCs2bNsNvtyuqpm2++mdzcXLZu3UpiYiJQswIqKytL6X11/vx5AJo3b67wTSYTnTp1Ij8/X7lXUVERAE2bNlXG9erVCw8PD8zmmr9Eq6yspLq6uk6xrU+fPnz++ecEBAQAcPbsWaCmR1WtAgICiIyM5MiRI8rxLVu20KNHD+U6gH79+jFr1iwlU05ODidOnOD55593uecDDzzA7NmzyczMJC4ujmPHjgEQGxvrck+o6euVn59fr4JVzfXaN+OV1Bga+Gke1ccTwZTRozt4B05Y+NfKX5Sv048X8NG3x3lheBdiWl3bL16uR1NzUOdzFM1UO08EU/N44/JKy6qYtzbDZQvRTm1qesmYvTwaalGbF5Uy1egxv/AC/1i8k9KyauVYbT+1kID6b2F7qdT+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08maSqglVCQgLvvfeeSy+rL7/8Er1erxSUanX8+HGmT5/Onj178PHxwcPDg88//1wpWB0/fpyqqioGDRqkXNO3b1/Wr1/P8uXL+c9//oPFYiEiIoKioiLuvPNOAHJycgDYsWMHzz77LGlpaXh4eKDX11QAg4ODASgvL0en0/HOO++wc+dOsrKyCAsLo7q6miZNmgCQnZ2N0+nk5MmTvPrqq2zcuBGbzaYwQkNDgZril8FgYP369axevZo9e/ZgNpu5cOECRqNR2TowMzOThx9+mE8++URZ/fXkk0/i5+enFPpq/z8sLIwXX3yRr7/+mqqqKmX1lYdHzS82WrRoAcBdd9112bmo7a+lSZMmTZou6tJi1aWatfwXFk66+5p5SYM78v4a119Ea03NNWnSdKnmrc3gwAmLy7EDJyy8vyaDCY90+X1MaZJSvy44AJSWVfPaop38Z3zC7+RKU2OQ9trRpEmTJk2aNGnS9FulqoLVsGHDWLJkCcnJySQlJZGfn8+sWbMYNmyYUnABGDFiBLt37+aWW25h9uzZ5Ofn88orr7BgwQJCQkJo164dRUVFeHh4MG7cOOW6++67jxdeeIH09HSGDBlCaGgoCxcuxGAwEBgYCIDVasVoNLJ//35ycnJISkoiIyODr776CqjZntDLy4uysjKaN2/OsmXL6NatG3/+859Zvnw5TqcTu92ujIWaVVwrV65k5MiRACxZsgSAiooK5Z6BgYF89913hIeHk5yczJYtW/jpp5/Q6S5uGWW1Wvnll1+YP38+nTp1oqCggKCgII4fP86+fftc7jlhwgRKSkoYNmwYubm5fPHFFwCEhIQA0KlTJ0JDQ5XVXQC+vr5UVFRw//33K1sj1kdGo9ZU7tdqDA38NI/q44lgyujRXbw1P2Re9fyGH08y6I5r2x5QVFNzUO9zFMlUO08EU/OoTo/u4p0usLkUtGvlcML+LAvnreU0C6rf6gRtXtwjWTzuO36+TsGhVqVl1RzMLiQuqullz/8Wqf05qnVeRPLcxdReO+qcF5E8EUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsklVBSt/f38WLVrEa6+9RnJyMmazmSFDhvDcc8+5jDtz5gxOp5M5c+a4bGM3bdo05s+fT3FxMU6nk4cffthlmz2Hw4HD4cDX15f169djNBrp378/P/74IwsWLGDatGku9wkMDGTOnDmEh4dz11138f3333Pw4EGleGa32wkNDSUzM5P09HTi4+MpLy8nNzfXhVNVVUWbNm1YuXIlZrOZhx56iE8//ZTt27czdOhQhWUymdDr9bzzzju0adOGrl27smfPHpf+VDt37gRg//79APz8888uX9cqOzsbk8nE8uXLiYiIYPDgwXz22Wf89NNPdOvWDYPBwMCBA1m8eDFBQUEUFRVRVVWFp6en0kerPtLrdcKa894IagwN/DSP6uOJYMrkMfdcKUcO5hMebCY8pH5bnQIcOVV81fOHcooYVc+ffyJ/bqp1XkQy1c4TwdQ83pi842dKr3reVulo8M8PmebFXe8Hl5NaM7uLmWfJuer53IILJNzSqt78Wqn9OaptXq4Hr6FM7bUjhieCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFqipYAURHR5OamnrVMc2aNaN9+/YuvZwGDBjAtGnTmDBhAomJifTo0UPZmq9Wu3fvxul0cu+99zJjxgzl+IwZM9i0aRMAfn5+VFdX065dO9atW6eM+eijj/j+++/Zs2cPd911F02aNCE7O5tJkyYxZswYZVz//v2xWCycOnVK6V1Vu8Kq9usTJ07w6aefcvToUeWepaWlJCQkMHfuXIU1c+ZM9uzZo/Sn8vX1paioiHfffZd+/fop42655RYuXLhAZWWlcg+z2cyuXbuUFVpbt27ls88+48CBA8p1ZrMZT09P0tLSACgrK+Pee+9lyZIlTJky5apzcCU5HE6s1gv1uvZGlsGg/gZ+mkf18UQwZfJYWlZFyup00jMvrk6Iiwri6YfiMHtfe9+XdhH+pB8vuOL5mJYBFBba6uVVpnkRyVQ7TwRT86hOj+7i+XjornrebNJrP3d+g9z9fiDCoyieu5jhQVf/B3eLpj71fi2C+p+jWudFJM9dTO21o855EckTwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfV4I8jPz/s3rzpTXcHqt6i2l9Ol8vPzIyQkROnhFBUVpfx3rWqLNV27dnU5Hh0dzaJFiygvLycqKgq42F+qVllZWZhMJrKzs5Xz+/fvV8ZDTWHq/Pnzisfbb78dvV6Pt7e3UkiqPQdw7tw5xWtVVZXSV6pWubm5eHh4KONDQkIoKiqiTZuLW06VlJRgs9lwOp3k5OQofkJCQly2E8zMzESv15Ofn+9yj/Lycm6//XasViuRkZH4+vpy8uRJGiKtodyV1Rga+Gke1ccTwZTB49xV6XX6vmRkWXh3VXq9+r4M7BHJfzdfeVvA/re3bnB+GeblejDVzhPB1DzemLwQf286tQniwAkLDufF43pdTb+7YD8v7efOb5C73w8uJ7VldjezQ+sgfL2Nl93azdfbSGyrQLf4VftzVNu8XA9eQ5naa0cMTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIsaZcHKarVSVVXF2LFj2bNnD2azmcGDB+Pn56f0cEpISOC9997DarXi5+cH1KywAjh16hR33XUXFouF2NhY+vTpg9PppLi4mPj4eHQ6Hfn5+Tz77LOkpaVhNBqprq4mKChI4Xfo0IFvv/2WHTt28K9//YusrCwCAwOxWq1ATS8pk8mkFJneeOMN1q5di81mw2w2ExAQQGlpzTYvd9xxBwDHjh1TMnl7e1NSUuKSqXXr1hw9epTx48dz5swZTCYTwcHB6HQ6F/9eXl7k5+czYsQIMjIyKCsro3PnzjRr1oySkhLlObZq1Yp7772X/fv3Y7PZOHXqFBUVFfj41K8XQq20HlZ11Rj2Q9U8qo8ngimLR1F9XyY/Gs+Mpbsve7whP/tkmRfRTLXzRDA1j+r06E5ecmIcc3+1Oqhjm5rVQdrPnf8tkX3A3OVRJM+dzGmPdWfawp9cCg++3kamPda9wZ//1f4c1TwvonjuZGqvHXXOiyieCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2dQoC1ZOp5NPPvmEDh06MHv2bPLz85k5cyZ6/cUXwrBhw1iyZAnJyckkJSWRn5/PDz/8gE6nY+HChUycOJH27dszYcIE3nrrLeU6T09P/Pz8OHr0KEVFRTzxxBN89913pKen4+FxccuQ7t27A/DBBx/Qu3dv+vbty+LFi+t47dChA9999x1Llixh1KhRnD17lnXr1uHl5YXTWfOnss2aNUOn07Ft2zYiIyN58skn+eyzzygsLKSqqkphtW/fnq+//hqr1crjjz/O+fPnWb58eZ17tmjRguPHj5Ofn090dDT79+8nIyODvn37cuTIEQBmz57NL7/8QlpaGgMGDCA8PJz169eTl5fHgQMHqKqqcsn7W6X1sLq6GsN+qJpH9fFEMG90j6L6vvQMNLOua0s++eYIew6fpWv7UIb2bVdfm3V0o8/L9WKqnSeCqXm8cXmBgfB6cm/yzpWSd97m9v5LN/q8XI8+YKCuzKKYgYFmVkwfyJ7DZzl00kJM6yC6tg/93xdeg9T+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdQoC1Ymk4mKigrmzJmj9LGy2+1MmTIFo7Emkr+/P4sWLeK1114jOTkZs9lMly5d2LFjB6NGjVL6TkVFRVFYWIjT6VS27WvSpAnFxcU4nU5SUlKIjY1l0qRJzJgxQykyNW3aFLjYK2rv3r3cd9995Ofnk5aWprDCwsIACAoKYvHixYSHh/PSSy/xxhtvYDKZlEyenp5UVFRQXFxMSkoK8fHxPPDAA8yePVvJ1L59ewCaN29OSkoKZrOZ4cOH89///pcLFy4o92zbti0VFRXo9XoOHjwI1PTD2rx5szKmQ4cOLF68GKPRyNdff01YWBi33347VVVVrFu3jl27dnH77bdf89xoPawuL4NB/fuhah4bzsvIspBz3karEDMdIoMazAP1ZxbBdAdPZN8XgPu6tWRo33ZYrWUN4tRKlnkRzVQ7TwRT86hOjyIy+3oauDU2TPu5c40S/X6gxsyimdHNm9C1fajbXoug/ufYGOalMXjUXjvqnBfNozo9yphZBFPtPBFMzaM6PcqYWQRTVo83gm74HlYGgwFfX1+lWAXQu3dvACorK5Vj0dHRpKamKl8vWLCAHTt20KlTJ+XYsmXLSExM5NChQ3h5eSnHPT092bp1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWlqasq1eRUWFcp+WLVsq4+bPn09ZWZnytdFopEmTJqSlpSnH8vLymD17tpKpQ4cOADzxxBP069dPGffNN99QVlam8KOioti+fTs7duxg9erVTJ48mTvuuIPU1FTatatZCdC3b186dOiAj48PKSkpCuuNN94AUApz9ZG2P+eV1Rj2Q9U8XrvyCy/wj8U762xz8vfRtxIS0LAtNmultszXg9kQ3vXo+9JQj9eDJ4Ipo0cZM4tgyuhRxswimNr7gTqZMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNcqCld1up6ioyKU/VW1x6dJVS79W7UqlAwcOMGDAAACqqqrIycnBbrdz88034+vrS0VFBRUVFZw4cYLIyEgAfvzxR5xOp7JN3pkzZwDYs2ePSz8sg8EAwIULNauMPD09Afjzn//MiRMn8PDw4M4778Risbh4tdvtWCwWBg4cSHZ2NuHh4XTt2tUlU8uWLYmMjOTf//43U6dOxWazERcXx+nTp4mIiFDGJSQkMHfuXBITEzl69CgAr7/+OgcOHODxxx9X7nnTTTexbNkyunfvTnFxMY899hiffPIJRqORW2+99donRpMmSfXrYhVAaVk1ry3ayX/GJ/xOrjQlDe7I+2syXHqXdIgMImlwx9/RlSZNmjRput7S3g80adKkSZMmTZo0adKkqXGoURasKisr8fLyculPNWvWLPz9/amuvvhL49GjR5OXl8emTZuAmiKSwWBg0aJFhISE0K5dOxYvXozVagVg+vTpVFRU8NJLL2EymXj22WeZMGECZWVlzJo1i6CgIHS6mm1FiouLASgoKKBr166MGjWKTz75hMzMTJfzVVVV6PV6Dh06xIgRI/Dx8WHhwoU4nU6X1WAVFRU4HA6Ki4v585//zK5du1i9ejU+Pj4umZo1a8aPP/5Inz59uOWWW5gzZw5Op5Px48crY6KiovDw8ODYsWP06tWL77//ns8//5yAgADuvfdeZdzatWvx9fVV8i9cuBCA559/vl79q2rV0Ma5N6IaQwM/zWP9tO/4+TrFqlqVllVzMLuQuKim9earMbNoprt4/r6evDAinnNFZVjL7fh5GQgJcM8ewmrNLJIpo0cZM4tgyuhRxswimNr7gXukeVQfTwRT86hOjzJmFsGU0aOMmUUw1c4TwdQ8qtOjjJlFMGX1KJsaZcFKp9MxdOhQDh8+rPSnGjJkCFu2bHEZ53A4sNvtLsf0ej3PPPMMCxcuxGKxEBQUhNFopLq6mu7duxMWFsabb76JxWKhWbNmTJgwAaPRyD333EN+fr6yVV7t1oC33XYbhYWF/Pvf/6Z58+b4+fkpBSCAU6dO4XA4SExMZP369dhsNtq2bcuBAweoXY0FNVvwBQcHc9NNNzFnzhzMZjNRUVFkZ2crY86cOcPPP//M4MGD+eWXX9iyZQt2ux2j0ais+AJYuXIlRqORfv36KcW6tm3bcuzYMQoKCpS+WtHR0fzyyy/o9XrsdjsBAQHodDp+/vlnxo0bpxTnrkV6vc4tjatvVDWGBn6ax2tTniXnqudzCy6QcEurevNrpabMopm550o5cjCf8GAz4SG+DeaJ/Jkk07yI4olgqp0ngulOnru/B2sl23MUwRPBlMmj9n6geVQbTwRTNo+7D5/l8I8niWkdRNf2oW7jqjmzKKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpWfnx8mk8mlPxXA559/jr+/v/L1kiVL6lxXVVXFmDFjSEpKAmDEiBEEBARw+PBh5dpmzZphsVjo378/8+fPV64fNmwYzZs3B2oKUQD3338/w4cPV8ZMnjyZVatWKayCggIMBgMzZsxQxjidTm6++WalYFVZWYnT6aRt27Yumb755huefvpp9PqaimxaWhoOh4OXXnqJX375haeffpqnn36ao0ePsmXLFp544gkAtmzZQs+ePfnXv/7FqlWrmDx5Mu+++y733nsvW7duJTExEafTycmTJxkzZgyTJk2iffv2/N///R9dunRh+PDhbN26lTvuuOMaZwYcDidW64Vrvu5Gl8Gg/gZ+msf68cKDrv4G1KKpj9bM/TeqtKyKlNXppGde3LIpLiqIpx+Kw+xd/1Wfas4siieCKaNH2TJr34Pq5YlgyuhRxswimDJ6lDGzCKY7efmWC7zy4U91eshOe6w7oYH17yGr5syimJpHdXqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ42yYBUVFaVsvVerkpISzp07R1RU1FWvA8jKyiImJgaAzMxMWrRoQXh4OF5eXkDNaqQjR4643MPpdJKVlUWvXr0AsNlqfgFdVlbmcg9v75pfXrdo0UI5b7fbKS4uVopYOp0OLy8vpWBVu4rq16xmzZoBKL4yMzNp2rQpWVlZjB8/nj/84Q+MHz+et956i08//VS5LjMzk4cfftiF5evrS0hIiJLJYrFgsViU51CrDh06uHiqj7SGcldWY2jgp3m8NnVoHYSvt/Gy2wL6ehuJbRWoNXP/jZq7Kp0DJywuxzKyLLy7Kp0Jj3RpoDt1ZhbNE8GU0aMsmbXvQfXzRDBl9ChjZhFMGT3KmFkE0x28XxeroGY77mkLf3JLD1k1ZhbN1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiRlmwSkhIICUlhZEjR5Keno7ZbCYmJga9Xq8UlC6n+Ph4fH19ef3118nOzsZisVBRUUFFRQUPPvigC3/NmjWsXbuWZcuW4eHhQefOnSkqKuLOO+8Eavph6XQ6/vvf/7JmzRqysrIIDw9Xtgz09b24hY5Op+O5557j8OHD2Gw24uLiKCkpoWXLlsDFflcHDx50yRQdHQ2gjLNarXh6ejJmzBgA1qxZQ3p6Oh07dlQYtbyvv/6atWvXcv78eQBefPFFvL29lXFBQUF4eHjwt7/9jb/97W8AzJo1i1mzZgEXC271kdbDqq4aw36omsf6a9pj3Zm28PJ/4dnQ7we1ZnY383SBjf1ZljrHHU7Yn2XhvLWcZkH1+2tZtWYWyRPBlNGjTJm170F180QwZfQoY2YRTBk9yphZBNNdPJE9ZNWaWSRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBasBAwbw9ttvc/DgQZ588klOnjzJqlWraNeundKfCWD06NHk5eUpfZw8PT3p0qULaWlp9OvXj/j4eGbNmoXNZqN///7KdXfffTc6nY5z587x5JNPUlpaytKlSwkKCqJz587KOL1ez/Hjx4mOjmbChAls2rSJXbt2uXg1Go00adJE2YqvdevWzJ8/H4fDQXBwsMvYysrKOpkAZWVWeXk5p0+fxuFwMGjQILp06cK2bdtYvXp1nX5TDoeDfv36kZ+fz9dff82BAwdcVqDpdDq6d+/O1q1bGTRoEOvWraNHjx4cPXoUX19fevToUa+50XpYXV2NYT9UzeO1KzDQzIrpA9lz+CyHTlrcvoc+qC+zu5nHz5Re9byt0tHgny1qy3w9eCKYMnqUIbP2Pdg4eCKYMnqUMbMIpoweZcwsgtlQ3vXoIau2zNeDqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFjbJgtX79ekwmEzExMaSkpGA2m+nVqxfbt28nPz9fKVo5HA7sdrtyXUVFBXv27KF79+7s37+fLVu2YDAYMJlMbNiwgdtuuw2Ab7/9FqfTSXh4OKmpqRiNRnr27ElaWhr79u2jc+fO+Pn5YbfbiYyMxGAw8Oabb/L/2Hvv8KiK9v//tTVlk00nEFpIgCR0kB5AaUoTfBAU9YvYUdGPCioWQFRQHhRFsSGCFBUriiAWBCEU6UgnlBAgJCSBlE02ZTe7+/sjvzPu0h4pRxfPvK7LC7N79n3ue2bOzJwp98TFxVG7dm1OnDghJpmCgoI4evQonTt3ZvXq1djtdlq0aMHmzZspLa0eHFKuNRqNPj61adOGbdu2iV1bLpcLt7t6K+GSJUtYsmTJOdMnLCyM06dP8+mnn4rPcnNzgepwiAp9+vRh3bp17Ny5E4Ddu3fTs2dPnnjiCcxm8yXljTzD6twYDP4fD1XaePl6ibVCaZ1UA5ut/LLOrfLG330GyCsqp6TChTXQQEz4pTXIwSbdBb+3mPWXnKZaLItqaGrRRi35LJ9B/9ZTQ1OLNmrRZzU0tWijFn1WQ/NK6al5hqy/+qym5pXoy5+JFtPR3/XU0NSijVr0WQ1NLdqoRZ/V0NSqjf8GrNZ/+RlWaWlppKam8t5774nPbDab2DE0ePBgABYsWODzu23btmG323nuuedISUkB4I477iAvL4+0tDRx3YoVKwB45JFHhJbH46Fjx46sXr2aFi1aUK9e9YqtHj16iJB6AA8//DAnTpzg1KlT1KlTR5w/NX36dDEx5fF4RFhAgHr16qHT6YiPj+eTTz4RWkuXLmXbtm3k5+cDkJKSwrJly3jttdd8QhgOHDiQAwcO4HA4MJvNJCQkEB4ezrvvviuuycrKomfPnrRt21Z8ZjRWZ//nn39Op06deOihh7j33nv/ajacFxmf8/xcDfFQpY3+p6eG5pXQKy138uH3e3zCiDVrEMnIQU2xBJouSismLIhmDSLZm1mA2/Pn53odNImPJNoaeNn2aiVf1NbUoo1a8Fk+g1eHnhqaWrRRiz6roalFG7Xosxqal6v3d5wh628+q6F5Jfvy50ML6Xi16amhqUUbteizGppatFGLPquhqVUbtcJVGUwxIyNDhLZTsFqtxMTEkJGRccHfAT6/7datGydPnuTEiRNUVFQAsGPHDnQ6nc95WDqdjgYNGgiNqKjqmNgnT54U1zidTnbv3u1zr/DwcAAKCwvFdb///jtOpxO7vXrVl9lsxmAwYLPZfOxdtWoVBoNB/LZGjeoQY8eP/xkCobi4mMzMTDwej/i8W7durF+/nqKiIpxOJ1lZWTzxxBMA55yQGjBgAAAzZ85k5syZeO9Kk0gkkvPx4fd72JtZ4PPZ3swCZi7ec0l6Iwc1pUl8pM9nTeKrX5olEon6yGdQIpFIJFcL40e0JSTId/1tSJCR8SPanucXkjO50n15iUQikUgkkivBVbnDymazYbVaz/o8LCyM4uLiC/7ObDYTEBAgPhs2bBgfffQRDoeD5cuX43A4yMnJITEx8azzsPbt20dwcPWB4+Xl5QAsX76cefPm0bhxYxYuXCh2TSl2REVFYTKZePTRRxk9ejTl5eVMnTqVhIQEn4knj8dDXl4eEydOpG/fvmzcuJGlS5cSGxsrtPT66vnFOXPmULNmTWJjY5k5cyYWi4XKykpx3bBhw1iwYAH9+vXj9OnTQPWE24033ugzWed0OunatauYXCspKeGNN95gy5YtzJo16y/lxbkwGq/KeVBVuRoO8JM2+p+eGppXSi/ntN1nNaaC2wO7jxRwylZBzcjgi9IMCwng6TvakF9Uju0KhiXRUr6oqalFG7Xms3wG/VdPDU0t2qhFn9XQ1KKNWvRZDc0rqVcrOoT3xlzH3swCjuXbqRdjOWvRxaXgzz5fSU01+vJX2kY19dTQ9Hc9NTS1aKMWfVZDU4s2atFnNTS1aqPWuConrK4kYWFhjBo1ildffZXnn3+ekJAQQkJCaNWqlc91brdbnCXlzc0338ycOXMoKCggJSWF6dOn88ADD4jv9Xo9NWvWJD4+ntGjR2M0Gunduzd16tTxCWmo0+kYOHAgW7du5euvvyYuLo5JkyYxd+7cs+45YMAApk2bht1up02bNrzwwgs89thjPj7NmzeP559/npKSEgICAggODmbz5s1kZ2cTFxcHwLFjx1izZo2Pj1AdcjEvL0/s6LoY9HrdZR/K/m/majjAT9rof3pqaF6u3uGTpRf83u5wX3JdoFYdooV8+Ts0tWij1nyWz6D/6qmhqUUbteizGppatFGLPquheSX1UiMspP7vyy4af/b5Smiq2Zf35t+ejlejnhqaWrRRiz6roalFG7XosxqaWrVRK1yVE1ZWq1XsZPKmuLhYnBN1vt85HA4qKyt9dlkFBwej0+nYtGkTgYGBDBkyROygUliwYAHDhg0T+sq/vXr14sUXXxTXZWZm+nxvtVqpqKhgxowZPnpvvvmmj61Wq5XY2FimTp3qc91bb7111j1HjBjhc89169b5fA+QmJjI559/Lv4uLy/n+uuv56OPPmLChAninsHBwfz2228idOEbb7zBzJkz+f333xk0aNB50/J8uN0ebLayi/7dvx2Dwf8P8JM2+p+eGpp3TvpV/P/8cb0uWSfYpLvg9xazXh52/Q/qqaGpRRu16LMamlq0UYs+q6Hp73pqaEob/dNGLfqshqYWbfRXn9Xsy4N20vFq0lNDU4s2atFnNTS1aKMWfVZDU6s2/huwWoP+8q6zq3LCKiEh4ayzqkpKSsjPzz/rbKszfwdw5MgRkpOTxecZGRnExcURGBgorjtw4IDPbz0eD0eOHBHnWtWrVw+TyURGRgZdu3b10fK+V0JCAqdOnTprMu3Mc7j+ik/Kv2f+NiMjA5PJRN26dc/re1BQEImJiRw9elR8lpaWRqdOncRkFUCnTp2YOXMme/fuvaQJK0AeKHcBroYD/KSN/qd3JTTvmbLyrM+Uyas5z/S4aL2YsCCaNYhkb2YBbq/Np3pd9Zk30dZAedi1H+ipoalFG7XosxqaWrRRiz6roenvempoShu1oaeGprRRG3qXq/l39OUv18a/Q08NTX/XU0NTizZq0Wc1NLVooxZ9VkNTqzZqhatywqpbt268//77DB8+nF27dmGxWEhOTkav14sJpXPRpk0bQkJCeOWVVzh27BgFBQUkJyeTnZ1Nr169fPS///577rnnHrZv347JZKJFixYUFRVx7bXXAmA2m+nQoQNffPEF33zzDUeOHCEuLo7w8HASExOpU6cOAF26dEGv1/PEE0+Qnp6O3W6nefPm7Nq1i1GjRl2UT3Xr1iU+Pp4PP/yQ1157jezsbBo0aEBVVRWdOnXCbDYDMGPGDN55551zpkFiYqL4/z179lBeXk5SUtJZ1zmdzr+aHRKJRKOMHNSUmYv3+MS/bxIfychBTf9BqyQSiUQikUgkEsn/QvblJRKJRCKR+CNX5YRVv379mD59Ovv27ePBBx/k6NGjLFq0iMaNGxMbGyuuGzFiBNnZ2SxfvhyAgIAAWrVqxdq1a+nVqxdt2rRh7ty55Ofn07dvX/G7Hj16YDKZ2Lx5M/fddx+lpaV88sknREZG0qJFC5/rXnrpJRITExk9ejTLly9n69at3HXXXeIa5fyqdevWMXjwYOrXr8+sWbOorKxkwIABF+1Tly5d+OSTT7jmmmsYNmwYX331FYcPH/a5Z2lpKf369SMpKQmr1Up+fj7ffPMNubm53HHHHeK68vJy9Ho9Tz/9NABbtmxhxYoVhIaGXtaEldEoD5U7k6vhAD9po//pXSlN7zCA5+KeKSsvKTxgWEgAT9/RhvyicmwVLqyBBmLCLz9Gr1byRU09NTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0NSijf7ss1p9eW/btJCOV4ueGppatFGLPquhqUUbteizGppatVFr6Dwej+d/X+ZfzJw5k/fee0/sVFJ2I/3++++sWrVKTPAMHz6cEydOsHJldSisyspKOnXqRNOmTc/aYdW7d28mTpwIwNKlSxkzZgypqals374do9FIixYtWLt2LV999ZWYtLr33nvJzs7GaDSKHVZhYWHY7XaWLVsGwMmTJ+nevTsdO3YUO6xatGjBzp07eeSRR7j//vsvyqcbbrgBq9VKcXGxzw6rOnXqMGvWLABWrFjB3LlzOXDgAGVlZcTGxqLT6SgsLGT9+vViJ5aysyowMBC32018fDxDhw7liy++oE2bNrz88ssXnTcejwed7sLxsCUSyV/nRH4pOafsxEVbiIsJuSSNG8cs/p/XLJl2aSFA1eBK+CyRSCQSiUQikUgkkquLXzYeZdehfFo2iqFX+/r/tDkSiUQi+Qe4KndYpaWlkZqaynvvvSc+s9lstG/fXuxkAliwYIHP77Zt24bdbue5554jJSVFfP7qq6+KXViKfnJyMnPmzBGfeTweOnbsyOrVq2nRogUOh4ONGzfy5JNP+uxuWrFiBQ8//DBZWVnUqVOHtWvX4vF4mD59us8ZVo888ghpaWliwuqv+HT8+HEyMzN59913fUIYzp8/n6lTp+JwODCbzfTs2ZOePXuK7ysrK+ncuTP9+vUTk1VQfa6V0+lkx44dPuk0a9YsH1svBrfbg81Wdkm//TdjMPj/AX7SRv/SKy138v63u9iV8WeIjuYJkTz8n+ZYgkyXa+pZ+MOhymr5rMWyqIamFm3Uos9qaGrRRi36rIamv+upoSlt9E8bteizGppatFGLPquhqUUbtebzkexiXpq7GUVm1bYTvPPVH7xwd3via1n9wkY19NTQlDb6p41a9FkNTa3a+G/Aag36y7vOrsoJq4yMDG6++Wafz6xWKzExMWRkZFzwdwAJCQk+nycmJjJv3jwqKioIDAwkIyPjrGt0Oh0NGjQQGseOHcPpdJ5TS7lXnTp1yMjIICoq6qwJoMTERL7++uuL8kn5t0GDBmdpOZ1Ojh8/7nNGlcJvv/1GaWmpTwhCgIiICLKzs+nYsSM2m434+HiGDRtGfn7+WX5dDPJAufNzNRzgJ230D733Fu1ib2aBz2d7jhTw7qJdjL611UVpzXmmB/dMWXnB7/3hUOUr6fO50GJZVENTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1pozb01NCUNmpDTw3NK6HnPVkldN3w4sebmPV0j8vSBv/0WW1NaaM29NTQlDb6p56WuConrGw2G1br2SsswsLCKC4uFn8fPnyYSZMmsX37diwWC/Xr18dsNhMQEODzO6vVisfjobi4mMDAQGw2G1lZWVx33XUUFBSQkpLCs88+66Ov/Dtr1iwee+wxTCYTvXv35sEHH/T53mazYTAYGDhwoAgb+MADD4iwft4+bdu2jdTUVOx2O61bt2b8+PHnvOeECRPYs2cPFouFQYMG0bt3b5/vAb766is++ugjsrOzMZvNhIeH065dOx+/ExISyMnJETuzCgsLmTx5MjqdjtTU1EvImWrkGVZnczXEQ5U2+o9ezmm7z+HHCm4P7D5SwClbBTUjgy/rHt5c7jN7JfxW02ctlkU1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KKNWvRZDU0t2qgln1dtzzprskrB5Yb1u3Po1qr2JWn7q89qakob/dNGLfqshqZWbdQaV+WE1V+huLiYESNGEB8fz4wZM8jNzeXFF1+kqqrqL/32+PHjjB07lqSkJD799FPuuecemjVrhtFYnWSKTk5ODtOmTaOiooL//ve/ZGdn+2jl5+eTm5vLddddx3PPPceGDRt4/vnnGThwoM91LpeLXbt28cILLxAbG8sHH3zAXXfdhcViEdeUl5eLeys+TZkyhRMnTvho/fDDD4wfP54HH3yQ5s2b88gjj+DxeNi5cyetWrUCoKCggD179mAwGIiLi6N///4sW7aMgoICDAYDkZGRF5fg/z96vY6ICMv/vlCjWK1X5hBbtfTU0NSijZerd/hk6QW/tzvcF/2cKWdUeZ9ndaXPrbocv9Xw+Uy0WBbV0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NKWN2tBTQ/Ny9TJySi74/aFsG4O6N76se/ibz3+HprRRG3pqaEob/VNPS1yVE1ZWq5WSkrMbtOLiYhF67/PPP8dut/POO+8QHh4OwO+//87SpUs5duwY9erVE7+z2WzodDrCwsKorKzEZrPRoEEDcTbVNddcQ58+fTh8+DDt27cHYNeuXQCMHDmSHj16CLvuvfdeAGHHwYMHMRqNvPTSSwB07NiR48ePs3r1anHNyZMn8Xg8dOjQgSFDhgDQvHlzunfvjt1uF9dt27YNgOeee46WLVsC1RNdEydO9Lnn22+/Tf/+/Xn88cf56quvcLvdJCUl8e677zJr1iyRFoWFhcyZM4dZs2bxwQcfEBwcjE6no6qqimPHjp0zvOD/Qp5hdW4MBv+PhyptvHy9NTuyOXiimMZ1wujSIu6SdYJNugt+bzHrL/nMqU9fuF74fDnnVnlzJdJRTZ+1WBbV0NSijVr0Ga5cXaagxXTUos9qaPq7nhqa0kb/tFENn/ccKeD4KTv1Yiw0ib+0xYLeaDFf1ND0dz01NKWN/mmjlnxOqBXKqgt83zDO6jfvglrKFzU1tWijFn1WQ1OrNv4bsFr/5WdYJSQknHVWVUlJic/ZS2lpaXTq1ElMVgH069ePpUuXsmTJEkaNGiU+z8jIIC4ujsDAQH7//Xfcbjdu958Fymw206tXLxYsWCD09+3bB0BZ2Z+TM6mpqVgsFux2OwkJCTgcDk6ePInL5fKZTFPsUCad1q5dC4Be/2emhYeH0759e1atWiXuefjwYaB615ZC3759mTBhAgaDgbp163L8+HEyMzN56qmnAFi6dCkJCQkMGTKEqVOnivB/TqcTgGbNmjF37lwAPB4PrVq1oqKi4q9nxjmQ8TnPz9UQD1XaePEcybHxyoItPgfEzl66l3Ej2lI/9uzwpf+LmLAgmjWIZG9mAW7Pn5/rddAkPpJoa+Bl++9v+aJFn/8OPTU0tWijVny+0nWZGjaqrenvempoatFGLfqshqYWbbwSermFZUyev4XS8j8jf4QEGRk/oi0x4Zcf8lmL+aKGpr/rqaEpbdSGnhqal6vXpXkc837cf86wgAY9dG5Wy+/eBbWQL3+HphZt1KLPamhq1UatcFUGU+zWrRvr16/HZrOJz3766Sf0er04eykjI4PIyEjuvvtuWrVqRWpqKhs3bkSn05GWliZ+53Q6+eWXX+jWrZv4HcCRI0fo0qULLVq04NZbbyU/Px+3203Hjh0BOHr0KJGRkbz33nu0bt2a9u3bM27cOEwmEyEhIdSpU4djx47hcrnQ6XTceOONNG/enBtuuIHdu3cD0LBhQ3HP4OBg1q1bR6dOnWjVqhV33303paXVIbIUn06cOEFISAgvvPCC8OmDDz7AZDIRFxeH2WwW9n/22WekpqayYcMG4uLiSExMxOl0cvz4cQBq1KhBQEAA3bt3p0WLFvTs2ZNbb70Vp9NJSEiIzw40iURyYbwHeBVcbpg0b8sla44c1PSs1bZN4iMZOajpJWv6O1r0WSLxJ9SoyyQSiUTiy5mTVQCl5VW8LOtaiUSiYcaNaMuZC+8N+urPJRKJRKItrsodVsOGDWPBggWMGjWKkSNHkpuby9SpUxk2bBixsbFAdXjAb775BpPJxLvvvivOezKbzezcuZN58+bRuHFjFi5cSFFRkQjlZ7PZ0Ov1YofVQw89xIoVK1i2bBmAmMgpLi6msrISu91Ot27daN68OXPnzsVut5OUlCSugeqdSwUFBdx5553k5uby3nvvAYjzpJR7ulwuwsLCGDFiBN9++y2ZmZnodDofn4xGI6Wlpdx4443UqFGD+fPn43Q6adSokc898/LyqFOnDqdOnSIlJQWr1erz/Zo1a9DpdHg8HiorK8nKyiIrKwuA//u//8NkMl1y/hiNV+U8qKpcDQf4SRsvDbUOiA0LCeDpO9qQX1SOrcKFNdBATPjlx7/153zRos9q6amhqUUbteTz1XTYtRqa/q6nhqYWbdSiz2poatHGK6W38/CpsyarFErLq9h3rJDmCVGXpK3FfFFD09/11NCUNvqnjVrzObF2OB8/14u1O7M5kHVlQ1N7/+tvempoShv900Yt+qyGplZt1BpX5YRVWFgY8+bN4+WXX2bUqFFYLBaGDBnCE088Ia7xeDx4PB4iIyPp2rUrUH3e04QJE0hOTmbOnDkUFBSQkpLC7NmzqVu3LgBVVVW43W7+3//7f+Tl5fHhhx9iMBgwm804HA6hX1ZWht1u54UXXmDhwoX8/vvvREREYLfbCQgI8LG3YcOGdOvWje+++w673U5kZCQFBQUEBQUJrdLSUh599FG2bNnC+++/T1BQEHq9Ho/nz9hYHo8Ht9vN888/zyeffEJ2djaRkZHk5uZisVh87vnxxx/z4IMPAhAREXFWGt588818+eWXOJ1OTCYTOp0Og8FAeXm5mCC7FPR6HRERlv99ocbYlp5H+oajJNePpHVSjSumq9VDBq+U5on8Ug7syyUu2kJcTMglaah9QKxaz5M/54sWfVZLTw1NLdqoBZ+vxsOu1dD0dz01NLVooxZ9VkNTizZerl52wfELfn/idBndrrm8SBdazBc1NP1dTw1NaaN/6V2Jd9Xz4a8+A9x4baMrpuWNP/uslqa0URt6amhKG/1TT0tclRNWAImJieLspXOh1+upXbs2v/zyi/hMOe8pNjaWxYsXn/N3yg6kgQMHijOmAO6++27Wr18vzqGqqqoiNDSU22+/ndtvvx2onlBq2rQp5eXlAAQHV8cgv+aaaxg7dixjx44FYOHChUycOFGcI6WENhw+fDiPPPKIuGefPn3ErifFp7i4OO68807uvPNO8dt27doJDcW+kpISvvnmG7Hb68zvv/nmG4KDg1mxYgVmsxmAAwcOcOONN7JgwQL69Olz3rS9EG63B5ut7H9fqBFyC8p48eNNZ8Won3hPe2pEXHqMeoNBm4cMXinN0nIn73+7i10ZBeKz5gmRPPyf5liCLm53oZoHxIK28uVq0VNDU9ronzZqyeer6bBrNTT9XU8NTS3aqEWf1dDUoo1XSi8u8sIDF7WjgmVd+w9r+rueGprSRv+y8Uq+q6plo1p6amj6u54amtJG/7RRiz6roalVG/8NWK1Bf3nX2VU7YXUp6HS6K3rt+a7x3hV1uXZcrFZCQgJQfS6W8v/K3yaTSewkO3ToEAkJCWKyCuCPP/4Aqie7Lgd5oNyfnDlZBdXhPibO2cTbj3W7bH2tHjJ4uZrvLdrF3swCn8/2HCng3UW7GH1rq4vS+jsOiAVt5MvVpqeGprRRG3pqaF6Jw67n/LD/vN/742HXamj6u54amlq0UYs+q6GpRRsvV69J/UhCgoznDAsYEmQkpV6ErGv9RNPf9dTQlDb6h96VfFc9H/7m89+h6e96amhKG7Whp4amtNE/9bTEv3bCyu12c+LECYYPH86uXbuwWCwkJycDiPOczoWyA+n111/n2LFjFBQUkJyczJEjR4DqHViBgYEYjUZOnTrFPffcw/bt2zGZTLRo0QKXyyV2VpWVVe80Wrt2LQMHDuTIkSPExcURElK9nVs5J0qx54knniA9PR273U7z5s3JysoSZ2n9VZ/q1q2L1Wpl7Nix4rfz588nKCiITp06iQmqmJgYli5dSvfu3Tl9+jSBgYGUlZURGBjoM9F1KcgzrKqRMer908ac03Z2Hyk463O3B3YfKeCUrYKakRe3++2Fu9vz4sebfCatDPrqzy/3edBKvlxNempoShv900Yt+Tz9y+0X/P69b3fyf0NbXZK2ltJRLT01NLVooxZ9VkNTizZeSb2J97Rn4pxzR2C4nH6jFvNFDU1/11NDU9roPzaq8a56pW1UU08NTX/XU0NT2uifNmrRZzU0tWqj1vjXTlhB9ZlV+/bt48EHH+To0aMsWrQIs9kszo4CGDFiBNnZ2SxfvhwAo9GITqdj48aN9OrVizZt2jB37lwRUk9BOWNq8+bN3HfffZSWlvLJJ59gMpkIDQ31ufbEiRMkJiYyevRoli9fztatW32+Dw4OxmQysW7dOgYPHkz9+vWZNWsWVVVV6PW+hfuv+NSkSRM2bNjAgAEDWLp0KQ6Hg5MnT9K/f39xTf/+/Vm0aBHBwcEMHjyYb7/9FpvNhtPppEePHpec5vIMqz+RMerV0btczcMnSy/4vd3hvugyHBFh4bvXBvHrpqPsOJhPy0Yx9Gpf/5JtPBf/9ny5GvXU0JQ2akNPDc3L1TucfeHd1QdP2C67fddCOqqtp4amFm3Uos9qaGrRxiuhFxFhYeGk/mxPz2P/0QJ5xq2favq7nhqa0sZ/Xk+Nd9Vz4U8+/12a/q6nhqa0URt6amhKG/1TT0v8ayesAgICcDgcJCcn8/7772OxWEhNTWXdunUYjX+67Xa7cblc4u/g4GA8Hg9t27Zl9+7dpKWlkZycTFlZGaWlpWIHlvLbdu3aMXfuXIxGI507d2bt2rUijJ9ybc2aNTEYDEybNo24uDiSkpJIT08X3xsMBpxOJ507d2b16tXY7XZatGjB1q1bCQgIuGif5s2bx1dffcWsWbOEfkJCAtu3/7l6OjU1lY8//ph3332XmTNn4nQ6SUpK4tChQ2RnZ19yusszrP5Exqi/8jbmFZVTUuHCGmggJvzSKv5g04VDclrM+kvOl/bJNejVvj42W/llnVvlzdWQL/5uoxZ9VkNTizZqyefEuFC2HTh93u8b1ZZnWP2TempoatFGLfqshqYWbVTD58RaobROqnHF+o1azBc1NP1dTw1NaaP/2Kjmuyr4p89qa/q7nhqa0kb/tFGLPquhqVUb/w3IM6yonqSJjIzkk08+EZ9lZ2fTvXt3HA6H+GzBggU+v6uqqg7NcMcdd9CvXz/x+eDBg9m/fz+BgYFA9dlSAQEBzJkzR1zjdrtp0qQJTqcTqJ6oAmjWrBnvvvuuuO7NN98kPT1dhA6srKwE4KWXXhJnTAF07dqV8vLyi/YJYOjQoQwdOpSkpCTuvvtuNm/ejN3u27Hp1KkTq1ev5o8//mDu3Ll07NiRtm3bCvsvFRmfsxoZo/7K6ZWWO/nw+z0+4RGaNYhk5KCmWAIv7uDZmLAgmjWIZG9mAW6vI+L0OmgSH0m0NVDmi59o+rueGprSRm3oqaF5uXqPDG7JPVNWnvf7h//TQtaNfqCnhqYWbdSiz2poatFGLfqshqYWbdSiz2po/ttt/DveVS/Xxr9DTw1Nf9dTQ1PaqA09NTSljf6ppyX+tcEUXS4XRUVFPqH81q1bByDOcToXyk6lvXv3is+cTicnTpzA5XJRUVEBgE6no7KykszMTHHdhg0b8Hg84myqkydPAvhcA3DgwAHgzzOulF1UGzZsENcUFxdTWFiI9+6vi/HJ4/GIybfdu3ezbt067rjjDp9rPvzwQ+bOncvkyZNJTExkypQp6PV6brrppvOmj+TiGD+iLSFBvvPCIUFGxo9o+w9ZdHXy4fd7zjp4dm9mATMX77kkvZGDmtIkPtLnsybx1RNgEolEolVG9Gl8UZ9LJBKJRCKRSK4s8l1VIpFIJFrnqt1hdfjwYSZNmsT27duxWCwMGjSIxx9/XEzcOBwOAgMDGTVqFCNHjiQ3N5epU6cSFhbGrl27uO666ygoKMBkMmGxWEhLSwOqJ5EMBgPz5s0jLS2NzMxMXC6XCPNXXFxMYGAgOp2OsLAw7rnnHuDPySmLxYJOpxPXAmRkZDBo0CCys7MpKysTE0nK9y6Xi9DQUKZMmcLHH3/M8ePHcbvdYlJMQfFp6NChlJeXc+rUKTweD8HBwUITYN++fUycOJE//vgDgGXLljFo0CCfc66WLFnCtGnTqFGjBk8//TRQPQnXpk0bn3teCpdzWPC/jVrRIbw35jr2ZhZwLN9OvRjLWZ3PS0FLhwyqcfBsWEgAT9/RhvyicmyXGWLQGy3li5qa/q6nhqa00T9t1JrPPdvWo2fbesz4+g8OZNloXMfKo0NaXbau1tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaq9a7qbZu/+aympr/rqaEpbfRPG7XosxqaWrVRa1yVE1bFxcWMGDGC+Ph4ZsyYQW5uLlOmTKGiooIJEyYA1ZMvQ4cOJT09nVGjRmGxWBgyZAjfffcdu3fv5plnniEpKYknnniCvLw8jh8/LsLx6fV6rFYrhw8fBqB27dqcPn2a0tI/D8DU6XQ0a9ZMnB8VEBBAvXr12L9/P6dOnfKxt1mzZuzevRudTkdUVBRms5msrCyxWwsgIiKC/Px8jh49il6vp379+hw7dkxMlCn3vOaaa0hLS8NoNGKxWKhVqxbp6emcPv3nuRM//PCDmKxSWLx4MatXr2bjxo3Anzuz8vLyxDUej4etW7dy7733snr16kvKG71ed0UOAf23kRphIVUFXS0cMqjmwbNqlVUt5MvfoenvempoShu1oaeG5pXUm3C/Gi2W9tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaqOa7irz6rqenvempoShu1oaeGprTRP/W0xFU5YfX5559jt9t55513CA8PB6p3Kb344ouMHDmS2NhYrFYrZrOZuXPnit9VVlby8ccf07JlS+666y4A0tLS6NOnD7Nnz2bixIlYrVacTienTp3ixx9/JCEhAYDJkyczf/58jh07JvTT09Np3bo1n3/+ubhHx44dOXHiBABhYWFAdUi+iRMncuuttwKwc+dOhg4dyvbt2xk6dChWq5XTp0+j0+lYt26d8On+++8nLS2N3Nxccc8dO3YwYMAApk2bJu7ZrFkzEWYQ4Mknn2TMmDHodDoWLVrEs88+y6233soPP/yAy+XCYDAwZcoU8vPzcTgc4hwvt9tNz549yc7OJicnh1q1al103rjdHmy2sov+3b8dg8H/D/C70pprdmRz8EQxjeuE0aVF3CXrqHnwrBbzRYs2atFnNTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0MwrKqdEhR0j/pyOV0O+SBv900Yt+qyGpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dlRNWaWlpdOrUSUzsAPTt25cXXniBdevWMXjwYBISEsjIyPD53dq1a/F4PHTt2lV8Zjab6d27N8uXLwcQE1Tx8fHi/6F615VOp2PDhg20a9eO+Ph4du3axf333y+u8Xg8OBwO7HY7WVlZ1KtXD4PBgMvlok+fPuI6ZQfWoUOHxD3tdjvdunXz8clgMAAIn2rXrs2uXbvo27evuKakpASn00l2djYOh0OERFTCEiokJyfzxRdfUFBQQExMDABVVVWEhIT4+Fi/fn2ys7N9dnZdLPJAufNzNRzgd7maR3JsvLJgC0qdvGrbCWYv3cu4EW2pH2u9aL2/4+BZLeSL2npqaPq7nhqa0kZt6KmhKW3Uhp4amlq0UYs+q6GpRRu16POV0Cwtd/Lh93t8wnw3a1B9Jo8l0HQlTPT7dPTHfFFbTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9ron3pa4qqcsMrIyKBnz57cfffdPmdYxcTEiEmqbt268cEHH2Cz2bBaqwfJly1bBlSfU6WcYZWSkkL79u3Jzs6moqKCNm3aoNfr8Xg8PProo6xduxaj0UhVVRWRkZFCPzk5mSVLlnD8+HEGDhzIkSNHiIiIwG63Cxvr1KlDzZo1yc3N5YMPPuD777/HbrdjsViwWq1iJ1aXLl0AsNvtwqegoCBKSkqwWCzing0bNmTXrl3s2rWL1157jezsbCIiItDpdLhcLo4fP05iYiIABw4cYNq0aWzatAmAd955h6CgICIiIkQ6hoaGsnz5cpKSknzSNzg4mLi4S98RI9E23pNVCi43TJq3hVlP97gkzZGDmjJzse9Lrjx4ViKRSCQSiUQi8X8+/H4PezMLfD7bm1nAzMV7GH1rq3/GKIlEIpFIJBKJX3JVTlgVFxezbNkymjRp4nOGlV6vp7i4GIBhw4axYMECRo0axciRI8nNzWX58uXodDrmz5/Pk08+SVJSEqNHjxbnPRUXFxMbG0toaChHjx6lrKyM+++/n99++41du3ZhMpmEfvPmzQH45JNP6Nq1Kz179mT+/Pk+NgI0btyYEydOsGDBAu68807y8vJYsmQJgYGBlJVVh86rWbMmOp2OrVu3Eh8fz4MPPsh3331HYWEhQUFBQqtVq1Z8++23fPDBBwwYMOCc99y/fz+vvvoqu3btombNmrRv355Vq1Zx+vRpDAYDhw8fJikpiS+++IITJ06g11dvxXO7q2cYdDodr7zyymXlj9EoD5U7k6vhAL8roblqe9ZZk1UKLjes351Dt1a1L1pXrYNntZIvauqpoenvempoShv900Yt+qyGphZt1KLPamj6u54amtJG/7RRiz5fKc2c03afRWcKbg/sPlLAKVsFNSOD/1EbryY9NTSljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJKyX03plnWE2YMIHy8nKg+vyoefPm8fLLLzNq1CgsFgtNmjRh+/bt3HPPPeIMq4SEBIqKisSEDYDRaBT3ef/990lJSeGZZ57h1VdfpaSkxOeakJAQtm7dyo4dO7jhhhsoLCxk5cqVQis4uLrzHRkZyfz584mLi+P5559n6tSpPvcExITb+++/T5s2bRgwYAAzZswQPnlr/frrr1gsFoYPH87atWvFGVbR0dFUVlZit9s5duwYubm5AEyaNImXX36ZX3/9laSkJBo2bEheXh5utxu9Xk9UVBS1atUiLy+P7777jj59+pwVVvCvoNfrVD0c9GrHnw/wO5FfyoF9ucRFW4iLCfnfPzgHGTklF/z+ULaNQd0bX5I2qHfwrD/ni1qaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShu1oaeGpr/ZePhk6QW/tzvcV6SP7+/p6G/58nfoqaGpRRu16LMamv6up4amtFEbempoShv9U09LXJUTVnq9nri4uLPOsJowYQI2m018lpiYyNy5c8XfkyZNYvv27fTs2VN89umnn3L33Xezfv16wsLCgOrJr9DQUNatWyeu83g8TJ069azJo/79+/PSSy+J6xYuXMjKlStxOp0Awp4lS5YIfYDPPvuMrKws8bfBYKB27dr88ssv4jObzcaMGTOEhsPhAOCRRx7hjjvuENeVl5dz4MABgoODiY6OZtiwYWzfvp1169axYsUKnn32WXr27MmUKVPE2VRt2rTBbrdjNBrZs2eP0Nq6dSu3334769atE6EKLwa324PNVnbRv/u3YzD47wF+peVO3v92F7sy/lz52Dwhkof/0xxL0MXFlE+oFcqqC3zfMM5KYaH90gzFv9NRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2+qeNWvT5SmkGmy68CNJi1sv3g39YU9ronzZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KqN/was1qC/vOvsqpywOhcXsxvor1x7vmuUCZ8rYcelap35uzOv6969O9HR0UyZMkWcT/Xee++h0+kYNGgQAAUFBZSXl6PT6ejYsSM2m434+Hhuv/12AI4dO/aX/TgTeaDc+fHHA/zeW7TrrJjye44U8O6iXRcdU75L8zjm/bj/nGEBDXro3KzWFfHfH9NRTT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1/szEmLIhmDSLZm1mA2+s1Vq+rPpM22hoo3w/8RFPaqA09NTS1aKMWfVZDU4s2atFnNTS1aqNWuConrNxuN9nZ2dhsNqxWKwA//fQTgPj7XCg7nFasWEGLFi0AcDqd7N+/H6g+AyowMBCj0cipU6fIzMwkPj4egN9//x2Xy0VOTg6tWrXCbDYDsH37dp97rF+/HgCTyeRjz/PPP8/u3bspKCigUaNGHD9+3CckoOLTgw8+yMaNGzGZTDRs2NBHQ7nnkiVL+PLLLzly5AhxcXFUVFQAiDOxwsLCmDt3LrfffjuLFi0Cqnd0vfbaa9StWxeoDitoMpkICwsT52Tl5eXx8ssvA1C79sWfM6Qgz7A6G3+Nh6pGTPkX7m7Pix9v8pm0MuirP7/csuGv6aiWnhqaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShv900Yt+nwlNUcNbs57Z0R0aNqgOqKDfD/45zWljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJK51Oh9lsZtSoUYwcOZLc3FymTp0qJl8URowYQXZ2NsuXLweqz50yGAzMmTOHyMhIGjduzMKFC7HbfUMQBAcHExYWxqOPPsro0aMpLy9nypQp6HQ6DAYDb731Fps2beLDDz/k4MGDTJw4kb59+7Jx40afkH6KltlsZvny5fTv35+UlBRmz55NVVUVBoPBxyeXy8WGDRu47777yM/P54svvsBoNPr4BPDHH3/QtGlTRo8ezffff09mZqbP96dPn2bo0KFUVlaSlJREeno6AQEBjB49GqgOY6jT6ejSpQu//fYbiYmJxMXF4XA4cDqdOBwO2rZte0l5I8+wujD+Fg9VjZjyEREWvnttEL9uOsqOg/m0bBRDr/b1L8fMs/C3dFRbTw1NLdqoRZ/V0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FND0x9tjIiAV0Z1JTu/lOxT9ss6M/d8+Hs6+mO+qK2nhqYWbdSiz2po+rueGprSRm3oqaEpbfRPPS1xVU5YhYWF0bNnT7Kyshg1ahQWi4UhQ4acdU6U2+3G5XKJv61WKy6Xi1GjRjFnzhwKCgpISUnhzjvv5KOPPhK/DQsLo1GjRhgMBkaPHo3RaKRu3brk5eXRvn17unbtSq1atfjwww8B2LhxI19//TVxcXE88cQTvPHGG0LLYrHgcDho0aIFGzZsYMWKFbRq1YqdO3f67LAKCgqitLSUpKQkZs2ahcVi4frrr+fnn3+msrJS2AVQq1YtSktLmTZtGg0aNKBx48YcOHBAfP/WW2+Js7bS09OBP3dfTZ48mf79+wMwffp05s6dy+LFi8nJyUGn09G6dWs2b97MyZMnSUxMvOi8kWdYnRuD4crGL80rKqekwoU10EBM+KVXgGrGlG+fXINe7etjs5VfVlx6b650Ovq7nhqaWrRRiz6roalFG68Gn/ccKeD4KTv1Yiw0iY+8AhZqMx39XU8NTS3aeDX4/N9Pt3Ikp4TEuFCeuv2aK2ChNtPR3/XU0LwabAwJMNA2JVa+H/iZprTRP23Uos9qaPq7nhqa0kb/tFGLPquhqVUb/w1Yrf/yM6wSEhIoLCxk7ty54rOSkhI+/vhjEhISxGcLFiw463cAPXv2ZNSoUeLzKVOmEBcXR2BgoLjuwIEDfPfdd+Ka22+/HaPRSHJyMgD16tXDaDRSVVXF/fffz+DBgwFYuXKlz730+uqMePrpp2nXrp3Qu/7668nJyRF/BwQEAPDFF1+Iz2w2Gz///DOlpdW7YOrUqQNA586deeWVV8R148aN48CBA+JeSpjCTZs2+UzgdevWTWgBBAYG8uCDD/Lggw/yzDPPsHv3bu644w42b97M5SDjc56fy41fWlru5MPv9/iE8WvWIJKRg5piCTRdtN7fEVNei3FlteizGpr+rqeGprRRG3pXQjO3sIzJ87dQWl4lPgsJMjJ+RFtiwi8ulOv50EI6Xm16amhq0UZ/9PnHDZl8tSpD/L0ro5A7J/3KsJ6JXN/uyuxU10I6Xm16amhKG7Whp4amtFEbempoatFGLfqshqYWbdSiz2poatVGrXBVBlPs1q0b69evx2azic9++ukn9Ho9qamp5/1dmzZtCAkJ4ccffxSfOZ1OfvnlF7p16+ajv3//fp9QewcOHMDpdHLttdcC1edJdezYEZPJREbGny+Xy5YtIzExUUwuKedPHTx4UFxTXFxMTk4ODodDnD9lNBopLy/38ennn38GwOPx+PyblZXl49fevXsBOH78OFA9SabX68UEHIDL5aKiogKn03ne9FHst1qt1KtX74LXSf4ZPvx+D3szC3w+25tZwMzFey5Zc+Sgpmetym8SXz0JJpFIJBL/5MzJKoDS8ipenrflH7JIIpFcDt6TVd58vuLw32yJRCKRSCQSiUQikfxzXJU7rIYNG8aCBQvOOsNq2LBhxMbGiuvOPMMqICCAkSNHMmPGDJ8zrAoKCkhPT6dVq1ZYLBZuvPFGEhMTfc6wKikpISEhgRYtWgj9hx56iLVr1zJnzhzmzp1LZGQkeXl5vPnmmz726vV6Jk+ezKuvvorRaCQ4OJjAwEAcDgfFxcUEBgZiMpkwGAx069YNp9NJWFgYdruduLg4ERKwuLgYqA5B2KpVK1wuFxEREZw6dcrn+zp16rB//366detGWVkZgYGBhIeHU1xcLM7NKi0tpU+fPhiNRoqKinA6nXg8Hg4ePMhzzz2HyXTxu3UULvfg3H8jV+LAvZzTdp+dVQpuD+w+UsApWwU1Iy9+VX1YSABP39GG/KJybFcgzKCCFg9C1KLPamj6u54amtJG/7TRX33eefjUWZNVCqXlVew7VkjzhKhL1tdKOl5NempoatFGf/V5yicXnmh+44vtPH3HpYcH1Eo6Xk16amhKG/3TRi36rIamFm3Uos9qaPq7nhqa0kb/tFGLPquhqVUbtcZVOWEVFhbGvHnzePnll33OsHriiSd8rjvzDCuA+++/H4/HI86waty4MQEBARgMBmbMmEFubi5TpkyhV69e2O12cYaVTqejX79+Plrbtm0DwGQyUVVVhc1mw2Qy0axZM3GNy+XC7XYTFhZGVVUVFRUV2Gw26tat67Obyul04nA4iI2NpbCwkLKyMioqKnxC+imYzWaCg4MpLi7GZrNhsVh8tCwWC3q9HpfLhclkwuFwcOLECRITEzl27BgA2dnZ2Gw2zGYzVVVVuN1uPB4PJpOJLl26XGLOgF6vIyLCcsm//7dyIr+UA/tyL+uA4cMnSy/4vd3hvqy0VyvftHgQohZ9VkPT3/XU0LySelei3jkXWktHNfQuVzO74PgFvz9xuoxu11z+Tul/ezpejXpqaGrRRn/zOfN/9PEyckquSD/t356OV6OeGppas/GXjUfZdSiflo1i6NX+yoTPBP/2WS1Nrdl4tfSVtZYvamn6u54amtJGbeipoSlt9E89LXFVTlgBJCYm+pxhdS7OPMMKQKfTMXLkSEaOHAnAzJkz+eCDD3jnnXcIDw8HqieZXnzxRX777TexY6tTp044HA6hU1lZycyZMwkODuaOO+7gySefxOFw0KdPH2bPns3EiRMBOHHiBABz584V51+tXbuWe++9F51OJyak7HY74eHhpKWliXuMGTOGX3/9lQYNGgCIcH633nor48aNA6CoqEiEKVS0Tpw4gdvt5tdffxU+ffHFF7zwwgtEREQA1buwNm7cSFBQ9cPzzDPPsHPnTk6fPs1nn33G+PHjL5i258Pt9mCzlV3Sb/+NlJY7ef/bXezK+HNnVPOESB7+T3MsQRe3iy3YpLvg9xaz/rIOLzYY/P+QQX+3UYs+q6Hp73pqaF5JvStZ76hlo1qa/q53pTTjIi/c8a0dFSzbg3+ZnhqaWrTRX32OrxnC3syi836fUCtUPtP/Mj01NLVm45HsYl6auxlFZtW2E7zz1R+8cHd74mtZ/cJGNfTU0NSajVdLX1lr+aKWpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dtRNWV4q0tDQ6deokJnYA+vbtywsvvMC6desYPHgwAAkJCT5nVW3bto3S0lJ0Oh0JCQlA9c6n3r17ixCEACdPnjzrnqmpqQQEBGA2m0VoQLvdTo0aNXyu69u3L0uXLiU6OhpAnKkVExMjrgkPDyc5OZk//vhD2KGECNTpdD5aEyZMEJNawcFnh47T6/XUq1ePvLy8/5FqF0YeKPcn7y3addaZU3uOFPDuol2MvrXVRWnFhAXRrEEkezMLcHv+/Fyvqz5zKtoaeEXS/mo4ZNDfbdSiz2po+rueGppXQu9K1jvnQivpqKbe5Wo2qR+JQQ/n6vsa9JBSL0K2B/9SPTU0tWijv/n85LA23DNl5Xm/H31ra/lM/0v11NDUio3ek1VC1w0vfryJWU/3uCxt8E+f1dbUio1XW19ZK/mitqa/66mhKW3Uhp4amtJG/9TTEpqfsMrIyODmm2/2+cxqtRITE+MzQdWtWzc++OADbDYbVqtVfKfX60lNTRXXJSYmMm/ePCoqKggMDKS4uBij0ciPP/4odlhVVVXh8XjEbqdjx47h8XjIy8sjMzOT+Ph4ABHmr06dOuI6k8nE6tWrxQ4xqN6dZTAYxHVFRUXodDp++eUXhg4dCoDH40Gn0/lMdp2Jy+Xi4MGDdO7c+eIT0gt5hlU1apw5NWpwc947YzVY0wbVq8EuN92vhpit/m6jFn1WQ9Pf9dTQvFJ6ap11dyVtVFPT3/WulGbOafs5J6ugerDucvIZtJOOV5OeGppatNGffb69V0M++/XQOT+Xfbx/n54amlqycdX2rAu2g+t359CtVe1L0vZXn9XU1JKNV1NfWUv5oqamv+upoSlt9E8bteizGppatVFraH7CSpmAOpOwsDCKi4vF38OGDWPBggWMGjWKkSNH8vvvv4vPlbCBAPPnz8fj8VBcXExgYCClpaU0adKEOXPmEBkZSePGjVm4cCEul0tMHin3qVOnDo8++iijR4+mvLyc6dOnAxAZGSlsjY6O5o8//mDixIn07duXjRs3cvDgQQwGg7ChtLSU5s2bM3XqVPR6PbGxscycORODwUBcXJy4rry8nNWrVwPVYQRPnjyJy+WiVq1aFBQUiPteDPIMqz9R48ypiAh4ZVRXsvNLyT5lv+LxtuHqiNnq7zZq0Wc1NP1dTw3Ny9VT+6w70EY6qq13uZp/Rz7Dvz8dr0Y9NTS1aKM/+nxb36bc1rcpE2auI/1YEUn1wnlpZOr//uFFoIV0vNr01NDUgo0ZOSUX/P5Qto1B3Rtf1j38zee/Q1MLNl6NfWUt5MvfoenvempoShu1oaeGprTRP/W0hOYnrP4qYWFhzJs3j5dffplRo0ah1+vR6/U888wzPtd5PJ6zfpucnEyvXr2YM2cOBQUFpKSk0KJFCwIDA32ue+655/j2228ZPXo0RqORbt26sXTpUp9rAgMDmTFjBtOnT+frr78mLi6OPn36sGLFCp/runfvTklJCdOmTcNut9OmTRtq166N2WwW15w+fZrHHnvsLHtfeOEFGjRoQIcOHS46neQZVn+i5plTIQEG2qbEYrOVX9aZBt5c6RirV1pPDU1/11NDU4s2aslnNesdLaWjWnpXSlOeaej/NmrRZzU0/V1PDc0nb2sj9GQf79+rp4amlmxMqBXKqgt83zDO6jf9HS3li5qaWuwraylf1NT0dz01NKWN/mmjFn1WQ1OrNv4bkGdYXQRWq5WSkrNXaBUXF4vznhQSExOZO3cuAJ9++ikvvfTSWRNUI0aM8Dkrymq1YrfbGTlypE8Yv2HDholrlH9NJhMzZswQ12RmZrJ06VIfrdLSUnr27EnPnj3FdW+++aaPrVarlfLycsaOHcvYsWPF5127dvW5rk6dOqSnp7N69WoefvhhHnjggXNOYF0sMj5nNX/HmVNajdnq7zZq0Wc1NP1dTw3Ny9WT9c7VoXe5mvJMQ/U0/V1PDU0t2qhFn9XQ1KKNWvRZDc3L1evSPI55P+4/71mOnZvV8rv+jhby5e/Q1GJfWQv58ndo+rueGprSRm3oqaEpbfRPPS2h+WCKCQkJPmdVAZSUlJCfn09CQsIFfwdw5MgRn88zMjKIi4sTu6fOpe/xeDhy5IjQqFevHiaT6azrlL+V6xISEjh16pRPqELlOm9bL8anP/74g8cee4ybbrrpikxWSXwZOagpTeJ9Qys2iY9k5KCm/5BFEonk346sd7SBzGeJRCKRaJlxI9py5iJdg776c4nkQsg+lEQikUgk/o3md1h169aNDz74wOcsq59++gm9Xk9q6vnjxrdp04aQkBB+/PFHkpOTAXA6nfzyyy9069bNR//7778nMzOT+Ph4AH7//XeKioq49tprATCbzXTo0IGff/6ZESNGiN8uW7aMxMRE6tSpA0CXLl3Q6/X88ssvDB06FKjeCbZ27Voefvjhi/bp0KFDjBw5ko4dO/Liiy9echpKzo8l0MToW1txylaB3eHGYtYTbQ383z+USCSSS0TWO9pA5rNEIpFItEz9WCuznu7B+t05HMq20TDOSudmtf5psyRXAbIPJZFIJBKJf6P5Cathw4axYMECRo0axciRI8nNzWXq1KkMGzaM2NhYcd2IESPIzs5m+fLlAAQEBDBy5EhmzJhBZGQkjRs3ZuHChRQVFXHvvfeK391www3MnDmTRx99lNGjR1NeXs7UqVO57rrraNGihbjuoYce4s4772TixIn07duXjRs3snTpUt58801xTc2aNRkyZAhTp05Fr9cTGxvLzJkzCQ0NZdiwYRfl0+nTp7n33nsJCAhgxIgR7N69W/w+JCSEhg0bXvnE1jA1I4OJiLBQWGiX20ElEsnfgqx3tIHMZ4lEIpFomW6tajOoe2PZDkouGtmHkkgkEonEP9H8hFVYWBjz5s3j5ZdfZtSoUVgsFoYMGcITTzzhc53b7cblcvl8dv/99+PxeJgzZw4FBQWkpKQwe/Zs6tatK64xmUx89NFHTJo0idGjR2M0GunduzfPPfecj1bbtm2ZMWMG06dP5+uvvyYuLo5JkybRt29fn+vGjRuHxWJh2rRp2O122rRpw8cff0xoaOhF+XTo0CFOnjwJwF133eVzj/bt27NgwYKLT0yJRCKRSCQSiUQikUgkEolEIpFIJJJLQPMTVgCJiYnMnTv3gtecawJHp9MxcuRIRo4cecHfxsbGMmPGjP9pR8+ePenZs+cFrzGbzYwdO5axY8de8Lr/5VOHDh1IT0//nzZJJBKJRCKRSCQSiUQikUgkEolEIpGojf5/XyKRSCQSiUQikUgkEolEIpFIJBKJRCKRqIecsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/ipywkkgkEolEIpFIJBKJRCKRSCQSiUQikfyjyAkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KnLCSSCQSiUQikUgkEolEIpFIJBKJRCKR/KPICSuJRCKRSCQSiUQikUgkEolEIpFIJBLJP4qcsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/is7j8Xj+aSMk/x48Hg9utyxS58Jg0ONyuf1WTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaM29NTQlDb6p96/Ab1eh06n+0vXygkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KDAkokUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8UOWElkUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8U4z9tgERyJTl8+DCTJk1i+/btWCwWBg0axOOPP47ZbL7g7zweD7NmzWL+/PmcOnUKnU5HaGgoQ4YM8fl9bm4ukyZNYu3atZhMJnr37s2zzz6LxWJh1qxZfPbZZxQUFBAXF4fL5SInJweDwYDL5SIsLIzevXuTn5/P+vXrMRqN1KxZk1OnTlFeXk7r1q0ZP348CQkJrFixggkTJnDq1CkAAgICAAgKCiI4OJjCwkKCg4OJiYnh8OHDOJ1O9Ho9gYGBOJ1OEhISeOKJJ+jevTuHDx/miSeeID09Xfhbp04datasyZYtW3j66aepVasWP/zwA2vXrqWiogIAnU6HXq8nPDycm266iZ49e/L111/z+++/c/LkSTweDwBWq5Unn3ySW2+9FY/Hw7Bhw/jjjz/OSuPY2Fjmzp1LQkICubm5PP/886xZs+as6/R6PSaTiZSUFHr37s2cOXM4ffr0WdfpdDo8Hg8BAQEkJiZy8uRJbDYbQUFBOBwOKisradiwIceOHSMuLo6bbrqJjRs3snnzZqqqqny0DAYDRqORa665hnbt2vHNN99w4sQJkeZdunShqKiInTt3otfr8Xg8VFZWijTQ6XTUrl2btm3bsn37dk6cOEFAQABOp5OAgACRzzab7ax7KwQHB1OrVi2ys7PR6XSUlZWddY3RaKR+/fpUVFSQn59PgwYNfPJ50qRJbN26lcrKyrN+Gx8fT2FhIcXFxeIzi8VCVVUVgYGBoiyHhIT4/G7IkCHs2rVL+FmnTh1ef/111qxZwzvvvHNOX4xGIz179uSVV17x0Tt8+DCjR49m//79Qs9gMNCsWTOeffZZWrVqBcDChQt54403sNlsProBAQHceuutPPXUU1RWVvLss8/y22+/UVVVhU6nE9coz5vNZsPhcJzTxubNmzN16lQsFgtPPvkkW7Zswe12C7u8y/7jjz/O2rVree655ygsLDxLq0aNGrz00kukpqby5ptv8tVXX1FaWgpUlx+lnNSpU4fi4mKKioowm82Ul5fjcrnOaZ/ZbMZgMIiyoNRJQ4cO5fHHH2fNmjU+dYT37+Li4ggMDOTo0aN4PB4cDgdutxuDwUDNmjUpKSmhqqqKrl27Mm7cOEpKSnzyRUGn0xEbG8stt9zC/fffzzfffHPOfPHm5ptvZv/+/ezbt0/cU/F95MiRDB48WNSV+fn56HQ6nE4nACaTiXbt2jF+/HgsFgtPPfUUW7duFfliMpmoqqoSz2BYWBht2rTh6NGjHDlyBIPBgNPpFGmamJjIU089RWpqKm+88QZffPEF5eXleDwegoKCcLvdVFZW8tBDD/Hdd99x+vRpTCYTZWVlIs/OxGis7jbp9XocDgd6vR6z2SzKXOvWrcnMzCQjIwOj0YjT6cTtdhMYGEibNm0YP348derUEfac+ZxHRESQlpaG2Wzm8OHDTJgwgW3btuHxeDCZTAwYMIDnn3/ep83Jz88HEHWLyWQCOKvNUdJGyZfWrVvz8ssvizbnueeeo6ioyCf/o6KiRDsKMHnyZL755huRZ1D9zHXs2JExY8aQlJQknvP09HQ8Hg8GgwG9Xk9YWBjx8fFkZWVRWFhITEwMBQUFonwnJSXx5ptvYrFYePnll1m1ahVVVVU+eWE0GuncuTMOh4Nt27bhcrlEfptMJgICAqisrBRtoPczWVJSclZeXnPNNdx33328++677Ny5U5Q1JY+Dg4Pp06cPzz77LO+++y5paWlkZWWJZ0pph7zrRqUe++qrr5g1axbHjh3z8cFsNjNs2DAef/xx3n33XXbs2MGuXbuorKwUekq7esstt/D444/z66+/nrPtSE5OFmmm9E2qqqoIDg6muLhYlHW9Xk9VVZXomxw7duyc5Ruq24XatWuLvknz5s0JDw9n48aNlJeXYzQaKS8vF2mk0+kICwujfv367Nu3T3ynPPtKHRgQEEB4eDg2m42SkhIaNGiAyWTi4MGD6PV68TwqZQ+q28WkpCTy8/PJycnBZDKJZ9hkMmGxWLDb7cTExBAaGkpeXp7ow1RVVWGxWIiKiiI3N5fS0lJCQkKw2+0iH9xu91llzLtcX3vttRQVFbFr1y6fPqV3/avT6QgPDycgIICCggLRLqempjJ58mQWLVok2iHFL4PBQPfu3RkxYgSvv/46O3bsEDbo9XqCgoKA6jqsQYMGbNq0ifz8fDwejyjvMTExANhsNho2bIjH4+HAgQO4XC6hFRgYSEREBLm5ubjdbkwmE8HBwZSUlGA0GtHr9VgsFuLi4igpKSE7O5tatWpRt25ddu/e7VMfKCh92vPRokULAgMD2bVrF1VVVT51hTcRERH/M8282yyHwyHyOygoCJ1OR0FBwXntALjvvvv44osvznr2vVm3bh0TJ05k5cqVwi+z2XxJde2hQ4fYvn07brdb5IHBYOCaa65h4sSJ561rofo5ufXWWxkzZgyATzoofarevXvz5JNPEhUVhdls5vTp0+j1elHmveta77rf7XbjcrlwOp0+da1iz5NPPnnOPidU9wFfeOEFH3t0Oh1ms5nKykruvfde9u3bx/bt2wkMDPTJF4vFQmxsLCdPnhR1ZGBgIHv37vV57hSbXnzxRVF/lpSU8Oyzz/rkC1Q/H02bNmXcuHG0atUKt9vNmDFj+Omnn3zq76CgIPr378+zzz5LVlYW9913n8g/b2rXrs2oUaO4+eab+fzzz/nll1/Ys2ePqD+90el0NGzYkEmTJpGdnc2PP/7I1q1bz3pHMZlM9OvXjwkTJoj3xs2bN4v2VyEqKooxY8Zw8803c/z4cSZNmsTevXvFs+59X+/0WbhwIW+//fZZ5d+73IaEhPj0372JjIzkySef5OabbxaflZSUMHnyZH744Qef+srj8WA0Gunbty8dOnTgq6++4vDhw9jtdtG+63Q6goODuf766xk+fDiDBg06R0mqJjAwEJfLJdqoM9HpdJhMJoKCgqioqCAgIACDwUBRUZFPedHr9URERDBo0CBGjRrFO++8w6JFi86ZbwozZ85k3rx5bN68WdRLer2eNm3aMHPmTJFX27dvJzg4GL1eL8pMYGAgDz30EA8++CBZWVn07NnzvD4mJSXx0ksvibK8bNkyvv76azZt2uRTH3qX5ejo6PNqepfl3Nxcxo4de8589S7LANu2bePFF188q2+vpGHDhg0pKio653OhoJTlwYMHs3jxYrZs2UJ2djZut9unLAOEhIRw66238vjjj1NZWcnkyZNZunSpT1q3bNmSV155hQYNGoh69fTp0xgMBtF3MJvNREdHU1hYSFBQEBaLhezs7LPandq1azN+/Hi6d+/OypUrefPNNzl06NBZdlmtVqZPn067du148803+fbbb88qJ3FxcTRu3Jj9+/dz+vRpzGYzFRUVPu0pVNexERERoh9hMBjE+975UJ6j82E0Gn36d8r1Y8aM4YEHHhDv91u2bKGqqkr0PS0WC5GRkRw7doynn34aq9XKrFmzOH78OIBPOgQHB9OgQQPy8vJEfeXxeER78Z///IfHH3+cgoIChgwZ4lMmlGc8MTFRjDksWrSISZMmiXRQ7DYajVgsFmw2G263W/TtoDr/AwIC0Ov1NGzYkAceeIBevXqxf/9+HnzwQXJycs6bRjt37iQ9PZ2HH374LNsGDBjAhAkTAPj4449ZvXo1hw4dorKbDUD5AACB/0lEQVSyUtzbu66dMWPGeccvoqKi+OSTT0hISDjvdUajkeHDhzN69GgxPlheXs4rr7zCd9995zPuoNfr0ev1PuMcSUlJ57y3TqdDp9P5tG/ffvst8+bN4+DBg6Is6nQ6AgMD6devH3Fxcfz++++i3AcHB2Oz2cSzFBcXx6hRo3juuefOm7bKvc8c91i8eDHvvfeeyBdlDOKxxx5j3rx5fPLJJ+Tl5QGI8m00GkU7n5mZKfoe5+sztmzZkgcffJDp06dz6NAhPB6PyDOj0YjH4yEiIoL27dtTs2ZNFi9eTGFhoU/ZNpvNdOjQgaeeeoqkpCS+/fZbPvzwQzIyMs55z6SkJKKiotiyZQuAeA/wHoNISEgAwOFwMH36dDZs2MDevXvFM6Pk0/PPP0+rVq1E32HHjh2iPtDpdAQFBdGvXz8xvtajRw8xvujNmXW3YtfHH3/MzJkzRVsZHR3NggULhH3Dhw9n06ZNZ+nddNNNjBs3jtDQ0HOmwdWGznOhGlQiuYooLi6mf//+xMfHM3LkSHJzc5kyZQoDBw4UDdn5+PDDD3nrrbcIDAykdu3aBAcHs2fPHkwmEzfddBMTJkzA6XQyePBgAJ544gkqKir473//S3JyMtdccw1vv/02Tz75JACvvvoqBoMBi8WCxWIhJyeHW2+9la+++orQ0FCmTJnC/Pnz2bBhA40bN+app57igw8+4Pjx40yePJn7778fnU7Hf/7zH5YuXUplZSVGo5GAgAAqKiro1q0bJSUlbNmyhaCgILp168bPP/8sKtCmTZvyzTffMHPmTMaMGUNRURE6nY727duzdetWn47R008/zY4dO9iwYYOoEE0mEwaDgcrKShITE8nNzaVOnTqYzWYOHDhAeXm5mAxyOp14PB7eeustDhw4wLvvvntW+kZERFBYWEhkZCTLli3jzjvvxOl0cuTIETHBYLVaKSoqwmg00rVrV4qLi9myZQuNGjUiIyNDNHahoaGUlpbi8Xi46aab+OGHH3A6nbRt25a9e/diNBopKyujqqqKm266if/85z+sXr2aOXPmEBkZSWFhoWhYTSaT6EhbrVaio6PJyMggIiICi8VCfHw869atE9/37NmTxYsX43K5CAoKory8HIPBgNVqxeVyYbPZROerpKSEkpISmjRpwp49e2jcuDH33Xcfs2fPJi8vj5EjR/Lf//6X5s2bs2PHDjHQ+uCDD7Jnzx7WrFlDREQE8fHx7NmzB4AGDRqQnp6OxWLh9ddfZ9WqVSKfn3nmGeLj44mKiuKnn34SA9I6nY6dO3ficDhE+WncuDEHDhxAr6/eZPvoo4/yxRdfkJyczMyZM0W+ffTRR7z22msAtGrVirp167JkyRICAgIYNGgQX375JSkpKZjNZnbs2IFOpyM5OZnKykoyMzPp3Lkzs2fPFs/n9ddfT1FREQ0aNODo0aPodDpcLhctWrTg8OHDLF68mJ07dzJ69GgxsGaz2cRgZ0BAAHa7nWHDhnH48GE2bdpEzZo1ycnJEb4FBwdTVlZGixYt2LlzJ7Vq1SInJ0d8rvybnJxMQUEBoaGhZGdnU15eTrNmzdi9e7foYCsTTB07dmTFihUYjUYcDodP3ns8HurXr8/x48e57rrrWLduHZWVlXTt2pXNmzeLwfi2bduyZcsWoqKicDqdlJeXiwE5l8tFSEgIbreb8vJyLBYLAwcO5LPPPhMDAddccw1r164lMDCQ1NRUfv31VzHhFxgYKAbkxo4dy1tvvUVFRQUtW7Zk586dADRr1oxDhw5RXl5OUlISjz32GNOnT8fj8ZCfny/yJTMzE6jufCYkJJCVlQVAmzZt2LBhg8gXpa4wm80YjUYqKyu59tpr2bRpE5WVlWLip3Xr1uzfv5/ExET27NlD//79+fnnn+nXrx+LFy/2eVHT6/U0btxY5EtBQQGRkZE0a9aM77//HrPZLMqxTqejQYMG7N27lwYNGoiJxKKiIpGmDRo0EPmSlpaGy+Vi4MCBLF68WHQ6XS4XJpOJUaNGMWfOHFG3NG7cmPT0dAIDA0lJSRETg+Hh4eTk5Ii/lQHhnJwcevbsyYoVK0hISBADdMrL3Jtvvsmnn37K8ePHSU1NZenSpVRVVYlJNu+Jj9tvv53HHnuMfv36YbfbiYyMpHv37nz99dfo9Xrat28v2hwlHZX0U/QAnzYnLi6OQ4cO4XQ6qV27NgAFBQVYrVbR5kD1S215ebl4IR49ejQffvghAwcOxOFw8PXXXwN/dsTtdjt6vZ6oqChKS0uZO3cuI0eOpKioiM6dO/PHH3+ICQblmWzTpg3t27fngw8+AGDo0KHk5uaSlpZGREQEkZGRFBcXU1hYiMFgEM+JwWAgNjaW48ePi3YgMDCQiooKoqKiOHXqFAEBASQnJ5OcnMw333zj80zWqlWLEydOiPJRr1498WKvtNd2u12UR6vVKl4MU1JSiI2NpVatWnz88cdYrVbxW71eT7169cjMzCQgIIAffviBnTt3MmbMGJKTk9m3b5+oU5U2SK/XM3jwYH7++WdSUlJEu9ypUyeaNGnCggULcDgcDBgwgGXLlolBAO8Jc6UuU9LMYDAwcuRInn/+eSoqKsSzpSzIaNq0Kdu2bcNoNPLII4+wbt06Nm/eTLt27cjMzKSgoACXy0WzZs1Em/XUU08xevRoSktLue+++5g1a5ZoO3U6HZWVlWKSPDMzk+DgYBo2bCgGpAFq1apFRUUFnTp14scff8RoNDJ27FimTp1KVVUVvXv3Zvny5bjdbuLi4igoKKCyspLrrrsOs9nMzz//TL169aiqqiInJwePx0PXrl1Zs2YNOp1O1LXl5eUivWJjY8nPzxcTXDVr1uTkyZMA9O3bl82bN4vJJmViWRkgtVgsPPzwwwC8/vrrhIaGMm3aNNGn7Nixo6h/b731VtLT09m2bRsGg4EpU6awZcsWvvnmG7p168aqVauIiorCbrdTUVGB2+2mZs2aFBcXYzKZRP3gcrlEu6zw8MMPs2bNGnbt2kXr1q3Zvn27sDc2NpasrCyCg4P573//y7PPPktpaSm1a9fm9OnTVFVVERMTIwYawsLC6NatG0uWLBEDws2aNWP79u3079+fpUuXYjAYmDRpEgsWLGDv3r0kJyezf/9+wsLCcLvdBAQEcOrUKYKCgqhRowYul4uCggLRZiUlJbFt2zaxuGLatGlMnjyZY8eOcc0117B582YAevXqRX5+Pn/88cd50ywmJoahQ4cye/Zs0d/Mz8/H4XCI/mR5eTlxcXG0a9eOH374gaqqKp544gk6duzI66+/zt69e7Hb7RgMBp8JJGUAyWQy0a1bN4qLi9m4cSMmk0m0w8XFxQQFBZ23rj0Tpa41mUzUqVMHj8fDkSNHiImJEXVkcHCwqGuVut77FTwgIAC3280tt9yCw+Hgm2++EemwbNkyMjMzRd+8vLycxx57jFmzZgl9ZQLJ4/H41P1DhgwR/bC6devSoEED0tLSiIyMZOrUqeJ949prr2Xt2rU4nU6MRiOdOnVizZo1dOvWjdjYWGFPSEgIx44dExNOFouFJk2a8P/+3//jmWeeEfnSsmVLfvzxR1GWGzVqxPPPP094eLhYmKVMzttsNkwmE2azmcWLF1O3bl1GjBgh8sV7MDsgIIDQ0FDKy8tZvHgxzz//PBs3bhTtsbKgTGmvmzZtyr59+ygoKPCZbNXpdFitVqqqqigrK2P69OlMmTKF9u3b89tvv6HT6UT/U9FWJpwCAgK45ppryM3N5ejRo1RWVqLX67FardjtdjHw2LZtWw4cOEBoaKjP4JlSdyt2TJ8+nfj4eBYsWMDx48fFQh2lHdbr9RiNRoxGIy+++CJPPfUUAQEBVFVViUHMkJAQsaCnffv2TJ06ldTUVPR6vUgPg8FAZGSk6L9Mnz6dPn36AHDvvfeydetWPB4PDRs2ZPfu3aL/WbduXfbv30/Tpk3p1KkTX331lZiItNlsGAwGOnbsyOHDhwkMDCQrK0uUn6ioKJ8JvejoaE6dOkVoaCglJSUEBgaKPqSy8Ke4uJjg4GAAUUd6PB5iY2PJzc0V7249evRg8+bNwhaA0tJSTCYTLpdL5Pf06dOxWCw888wzREZGkp6eTlBQEC6XS/QpkpKSKCgoEGMIr732Gunp6eh0OiIjIzGbzeTk5DBlyhSSk5O56aabiIuLIy8vT7R1kZGRYiI6KChIlOWHHnqI1atXi7pe6bsbDAZRlj/44AOGDx8u0ikwMBBATEYrZXn//v1iolKn0/lMBgQFBYmynJKSwqBBg8T7aklJibjWuyy73W4iIyMJDg7m5MmToj8aFhbmU5Zr1KhBjRo1SEhI4KuvvjpnPaj4PXjwYI4ePcrWrVvFwi6LxSIWg0VERHDLLbcwc+ZM0fc+cyGcUift27eP3Nxcn4kPk8kkFm/o9XomTJjASy+9RFJSEnv37hUaShkzm83UqlWLDh06sGzZMiwWC06nk4KCAsxmsyhrRUVFXHfddWzfvh2Xy0VpaSnh4eEUFRWJd3Sl7MXFxZGdnY3RaMTtdpOUlER6ejp169aloKBAvEso9ir1alVVlah3IiMjxaIDZRGc0t+tqqqiRYsWfPTRR/Tv35+oqCjS09MxGo24XC7q1q3L0aNHha833ngjS5cuJSUlhSNHjuBwOHzeLR0OB1VVVQQEBBAUFERpaSkul4saNWqId5UBAwawfPlyTp06RXx8PFarVbxDAnTo0IGtW7fy3HPP8dJLL2EwGMT7mN1uF31oqH43rFGjBllZWaLfoix2uvfeeykpKeHLL79kwoQJTJ06VdRR3pPSSl/ltddeo0WLFvTs2VM818p7OCDaqzFjxnDPPffQuXNnvv/+e7GwUSn7Sl3bqlUrTp48yaxZs1i1ahV6vZ7mzZuzdetWDAYDUVFRLFu2jA8++ICPPvpIvOt7axkMBm655RYxvjhmzBh++OEHjEYjkZGR6HQ6Tp48idVqpW/fvhQUFLB+/XoWL15Mr169GD58OOvXr+f48eN4PB4xjnX48GHR3t1yyy18+eWXDBgwgC+//FIs1OncuTPLly8Xz+gdd9xB586d+fTTT1m3bp0YV6moqODEiRPiuRs+fDjXXXcdTz75pBizUvpTyvPVtm1b0tPTadGiBevWrRP1f2xsrBiDaNy4MXv27KF9+/asW7dOvJvDnwvsKysriY6ORq/Xi0ktpZ4yGAw0b96csWPHkpmZyXPPPce1117Lb7/9JsqA8qyYTCaefvppvvjiC3Jzc2ncuLFYxGk0GomKiiIvL4/Q0FCcTidDhw7l66+/xu12i7ozOjqaoqIiqqqqeOihh/jiiy9ITEzE7Xazd+9eUY6GDh3KkSNHOH78OD/88AOhoaHYbDZ69OhBRUWFaEt79erFunXriI6OpqCggMWLFzN8+HDq1avHxo0bCQ0Nxe12i/ozMjKS5s2bM3PmTKHldDopKSmhRYsWYgFAQUGBT5s8btw4lixZIvJOr9fz7bffotfrhX3Dhw8nNzeXFi1a0KhRI0JCQjh+/DjffvstTZs2Zc6cOeetq68mZEhAyb+Gzz//HLvdzjvvvEPXrl0ZMmQITz31FJ9//jm5ubnn/V1lZSUzZ86kTZs2uN1u5s+fz/z584mJiaFJkybi9z///DMHDx7krbfeokePHvTr14/JkyezatUq3nvvPe655x7uuusu1qxZQ8uWLUXn7LvvvmPAgAH89ttvuN1uSkpKqFGjBps2bWL48OGkp6cTFhbGu+++S0lJCS+99BI6nY7777+f+vXrYzAY6NWrl1h9/NRTT7F69Wq2bt0q7N+zZw/XX389ZrOZ3bt3M2TIEJo3b87LL79McXExer2eBx54gPnz54tVFspkBcAzzzyDzWYTq38BMUB86NAhbrvtNg4cOECHDh2orKykRYsW/PDDD7zyyiuiczFt2jRmzZoldHU6HRMmTBCdveTkZIqKinj55Zc5ePAgL7zwAlDdME2ZMoWCggIGDRrEuHHjWLVqFR6PB7PZLF7iBwwYQOvWrSkpKSEgIIDmzZuTk5MjXj63bdvGjz/+yPLly8VnY8eOpWPHjoSHh6PX6yksLCQ0NBS9Xs/DDz+M0+nEarXSoEEDbDabaFwKCwuZNWsWs2fPpmbNmkD1hMuPP/7INddcIzpdTZo0oWbNmrRv3140/Dt37uTEiRN8/vnnpKSksHv3bpHP8fHxzJs3j4qKChYuXEjLli1JSEgQK46uv/56li9fLgaclV1d48aNY9y4caSnp9OtWzc8Hg+HDx/mpZdeEvlst9uZNm2a2LU2ZMgQtm/fzpQpU0Q6R0ZGMmDAAEaOHAlUr95o1KgR27dvF2VZ6Zw6HA7efvttUSauv/56Xn/9dfr164fL5RIvX1OnTmXnzp3odDoeeeQRDhw4wNtvv43JZGLt2rVC7/PPP8dms9GsWTPy8/O5//77eeGFF9DpdBQVFREeHs7s2bN544030Ol0DBo0iOLiYgICAkhJScHpdHL99dfjcrlYuHChmDypW7curVq1YtiwYUD1Kqfu3buze/duunbtSk5ODl26dKGiooKwsDAcDgfR0dFERERQVFTE4cOHARg5ciTXX3+9KG+hoaFkZWVx++23s3z5cuLi4kSHLDo6mri4OGrUqIHb7ebo0aMkJCTw66+/ioGahx56iLKyMgICAmjUqBFbt27llltu4dSpU2JCVRlcUSZ3lMnfsrIy0tLSSE5Oxmw2Y7fbefTRR2nZsiW1a9dm+fLloo7YtWsXUVFRhIWFAbB06VL0ej1JSUns2LEDgEceeYQ9e/ZQXl6OyWQiPT2dmJgY3nrrLQ4ePEhxcbHIlwceeIAXX3wRnU4nBj0bNmzIhg0bfPJFr9eLARKlTKxatYrbbrsNk8kknpv33nuPZ599ln379nHdddexbNky7rnnHnbs2EHr1q1FOgA0adLEJ1/ef/99li1bRmJiohiMMxqNVFRUcOedd7J3714xUK4MnCu7HQCOHTsm8kWn03HfffcxZcoU1q9fLwZJAG677TaxSt3tdhMTEyMGzxwOB9u3bycrK4sPP/yQL7/8kuTkZBITE4HqlfZKHb9+/XrxEjl//nwqKiqoU6cOAB07duTdd9+luLiYRYsWicEsk8nEL7/8QmBgICaTicDAQD7//HM++ugjbDYbFRUVfPTRR4wfP57nnnuOiooKnzZnx44dxMTE0LJlS0JCQvB4PDRv3pyYmBifNmffvn24XC7uvPNOTpw4wVNPPUV5eTmFhYU+bc7IkSMJDAwUaZidnc1TTz3FwoUL+frrr8Xk8W+//cbKlSvF86LsTnv11Vex2Wy0aNGCjh07inoYYNeuXXTt2pUdO3aIgd/w8HCMRiOzZs2iefPmIu/Lysq47rrrcDgcYvApLCxM2FVSUkLt2rWpqKjgkUce4dSpU8TGxhIZGcmOHTsYMmQISUlJ4pls3rw5xcXFjBgxQkxqKZNAShtWWlrKTTfdJNovZVdH165dWbVqFUOGDEGv1+N0OqlZsyZ6vZ7Y2FgxMdqjRw9cLhezZ8/m7bffpkePHmJlc2BgIM2bNycpKUnsyPvmm29YsmQJqampos164403ePrppxk3bhwAP/zwA23btsVoNIpdN+Hh4dSqVYuysjKSkpJEmr311lsMGDCAhx56SAxmAtxxxx08//zzbNu2jSZNmohBv/z8fOLi4jh69CinTp3CZDIRERHh02YpA5JKOa1ZsyYBAQE4HA48Hg/XX389YWFhZGZmotPpKC8vZ8+ePUyYMIEBAwYA8Morr1BRUcGvv/7KXXfdhcViERMkNWrUYOXKlQQHB9OlSxdycnKoqKggJiaGmjVrkp6eTocOHTh27BjZ2dmi7d+9ezeJiYl4PB7WrFlDeXk5PXr0YMWKFbRs2VIs2rjvvvvo27ev6AM2adKEZ555hoKCArEbRGlvlQH4srIyBgwYIF6Wi4uLCQsLE31K7/p34sSJFBQUUKtWLXQ6Hdu2bRMDZytXrsTj8dC/f38xOGgymTh58iTDhw+npKRErDxt0aIFS5YsYezYsaIPtXTpUo4cOUKdOnXYtWsXOp2O8ePHExMTQ25uLjExMVRUVPDTTz9RWlpKdHS0GCCYNm2az2plm83GyZMnRZpVVVUxffp0+vfvL9IsNDSUTZs2kZ6eTtOmTcWu/LFjx+J0Ojl16pTYxf/ll1/yzTff+KRZdnY28fHxPmkWExNDYmIimzZtQqfT8cADD/DOO+9QVFR0wTSbO3cuXbp0EW3WoUOHqKioYN68eWRnZ1NRUUHt2rXJzc1lzJgxTJgwAZ1Ox6JFi8RgioLRaKRdu3aibfB4PAQHB9OsWTOWL1/Ohg0bxET/p59+ysKFC/F4PJSXl59V14aFhYlV4lC9w9G7rnU6nUydOpWjR48yfPhw8vLyxOBccXGxqGvbtWsnnunw8HB0Oh0tWrTA5XLx2Wefibp27ty5PProoyxcuBCo7u9XVlaKPq2y0EZpi8eOHYtOp2PFihWi7l+2bBk6nY4777yT48eP8+ijj4q61rvub926NSaTiV69egGQlZVFUFAQa9eu9bFn6dKlrF27Vqwsr6ys5J133hETN0q+bN++nS5duqDT6fjhhx/EBL+C2Wyma9eu2O12kpOTxeT67Nmz2b59u8gXZUfXrbfeClT3TU+dOkVwcDCvvPIKGzduJCIigvr161NZWcl3331H69atiYmJISgoiHXr1lFcXCz6WzVq1BA2KM9gUlISb7/9NosWLSIxMRGHw0FJSYmYmA8ODhYDycHBwSKqwoABA0T74Xa7WbhwIePHj6eqqorKykrWrVuHzWbD6XSKXaB169alsrKSZs2aERMTg8Vi4e233yY5OZlHH31U7MQymUwicoWyuMZgMPD+++8TFxfHQw89JHab16hRg9LSUu644w7RR5g+fbqYHImJiaF169a88MILFBQU0LRpU6Kjo3n77bcB2L59O2vXrhXvmAcOHBCr251OJ3379iU8PJy9e/eKNvD//b//R0lJCcOGDUOn07FhwwYee+wxMjMzufHGGzl+/DgDBgwgKSlJ9LV0Oh2NGjUS73JQPSHz+OOPizqyc+fOuN1u/u///o+ysjJq1qwp0njQoEEYDAbi4uKwWCysXbuW++67j0OHDtGlSxdKS0u59dZbad68OVA9iO92u1m6dCl79uzxWZCyaNEixo0bR3Z2Nq1btyY9PR2bzcY777xDo0aNxOQAVC9e+e677zAYDEydOpUlS5YA0LBhQzEJoCy0fOihh8TguDJR3LJlS5/J0g4dOoiFsEpZViaBQkNDRVn+9ttvzyrL3jvNGjRoQN26dcXElcPhEGV59uzZmEwm9Hq9z45a70VAykRNZGQkQ4cOFe97Ho/nrLJ8/PhxnnnmGTZv3izKstIfa9iwoZjMDgkJYeHChSJii8fjYfz48SxcuFC8QxUUFPDhhx9yzz33oNfrxSSHshtAmZBavXq1GOx2u90EBQXRoUMHnE4nwcHBREdHY7FYmD59uliUp9ChQwdiYmIIDw/H6XRy9OhRvv76a/r06UNeXh5BQUF0794do9FI9+7dKSoqIiEhgQ0bNuBwOMRizHr16mE2m3E6nTRr1kz0/7Kzs+nSpYuY9HvppZdo2bIlcXFxYvJG4cYbb8RgMIj3NJvNRvfu3cnKyqJHjx6UlZXx5ZdfsmrVKoKCgkQ/dM+ePXz22WfY7XbCwsKoWbOmGMBX6mcliklaWho9evQgPT2d1NRUsZigefPmeDweUWYqKyvp1KmTeE9r27Ytubm59OnTh4ULF3Lq1CkGDRrEzz//jM1mo127dkD1zqPCwkKaN2/OtGnTAMQ7hd1uJykpSeSjMvmWlZVFRESEuParr77CYDDw7bff8vLLL5OamsqMGTOoqKjAZDKJRSNt2rQR0QGMRiO///47Y8eOFc+P2+3mq6++4oUXXhALFFatWkVRURHLly/n9OnThISE0Lp1a3777TesVivx8fGirq1ZsyY1a9Zk5cqVuN1uxo0bR7169QgNDcVkMlFYWMjnn3/O7t27Rb0VEBDATz/9JCaonE6nGB90u9389NNPoq5dvHgxpaWl1KhRA5vNxqhRo3jjjTfEOAdUL+R66KGHxA5qqN4t8/rrr3Pq1CnMZjNz587lzTff5MSJE9SrV4/y8nK+/vpr3nnnHQYMGEBQUBBVVVUMGDCA5ORk1q1bR3x8PAEBAURFRbFgwQI8Ho/Y0VWrVi0xFqBM5A4bNgyz2SzGIrZu3crIkSNZt26deM+dP38+s2bNomXLltSqVYs//viDIUOGsGPHDh544AFq1aoldpsr72t16tTh1KlT3H333aJMKHlXv359tm3bhl6vZ8mSJWJyTafT0b9/f3Q6HbVq1aJfv354PB52797NkSNHeOCBB8Rk1YIFCzCbzQwZMgSz2SzGMBcsWMANN9yAw+EgOTkZgPHjx4s+4sKFC6moqOCdd97h9ddfx+FwMHDgQKB6bEcZi/38889F3X/fffeJPAJ4+eWXefbZZzlx4gShoaHMnj2bRYsWkZubS+vWrSktLeXrr78mNTWVgIAAatWqJcbXKisrOX36tBi7+vLLL5k/fz4FBQW0bt1atMm///672NH+/PPP89RTTzFmzBiWLl3qY5/SDrz++uuMHDmSO+64g2eeeYYxY8awbt26C45/X03ICSvJv4a0tDQ6deokOlBQvZLW7XaLXTLnYtu2bZSWllJaWip+bzab6d27N1lZWeL3aWlpJCUliW2YAKmpqVgsFsrLy+nbty8Oh4ONGzfSr18/0dCGh4fTr18/8vLyiI+Px+Px8Nlnn+F2uxk1ahTh4eGsXr2a8PBwOnXqxLFjx3C73fTt21f4NHjwYLGi4+abbxarRRMSEvB4PGRlZTFo0CC6du2KwWBg9erV9OvXj8zMTLG9tm/fvgA+nT+F9evXi1V1APXr16d+/fpCT9H4+eefxQAMVA/+KBw7dgyHwyHC1AAMGDCAvn37Cm2Px8OGDRtISkqibt26QPXW3OjoaKD6BbZv3764XC7++OMPWrZsyZEjR/B4PPTt21c0iG3atGHgwIEijJuymmH9+vWiA35m2VDS3ul0EhkZSVRUFAApKSli1Xpubi7h4eEEBweLfFZe2pR8NplMNG7cGKhuJHr37i1CIMbExJCXl0dCQgIJCQnEx8cDcP/995+Vz0ePHqVXr14sX76c0NBQOnfuzE033cThw4fJyMgQL6Jut5s+ffrQokULAOrVq0dqaippaWkA9OvXj6NHj9KhQwcyMjJEh1EJlbJp0yaxLT87O1uUK8W/wMBAfv/9d9q1aydshOrQGd5hD73ztKqqivz8fAwGAzt37hT5e+edd+J2u8XAtFIWATEJ2bx5c0pLS+nbt68oG8eOHaNjx46sXLlS7OhRwv6kpqYyePBgsWLZarWKl8HOnTuzbds2+vTpw+7du8WAvbKi+tprrwX+HIhs0qQJVVVV9O/fny1bthAZGUlAQIB4ftPS0khNTSU8PJy4uDgAkU7Kah69Xs/x48e58cYbycnJEavXlEmXo0eP0qdPH9LS0rBarXTr1g273Y7b7aZHjx4EBwcTHByMx+MRk6dKGQsODhbhs7KysnA6ncIe5ZlWVukqz3RWVhaZmZki9MiBAwfo1KmTeJYTExNFvgQGBnLttdeKfElISBD3u1C+eIcPU/LFarWKgQTvfFm+fDkdOnTwWfmn1MNKvii7UZTJwpYtWwLQu3dvkS/BwcG0bt1aPL+pqalih413vjRr1gyPxyNWunbu3Fms5vXOl8rKSlEHRkRE0KVLFxGyrGfPnmLFeXJysnimFHuVFXcJCQlUVVWxceNGhg4dClR3ZsPDw+nduzfl5eVcf/31YuDK7XaTmpoKQE5ODuHh4cLuyspK0ebUr19f2KOEmvvll1+IjIz0aXOUdFTCSSrhBwsKCnzanIEDB1JQUCDaHLfbLcqY0uYcOnSIJk2aiHAibrebfv36iTYnJiYGj8fDypUrxX09Ho/Y4VezZk3Cw8Pp2rWrWJlntVpFfd2/f3+hpYQ48Hg8oi3LyclBp9OJ5w5g4MCBYqFCWVkZzZo1AxCLDuLi4khPTxf5pryIKWF0UlJSyMnJITQ0lNWrV4v69+jRozRp0oTS0lL+85//iAmijIwMMegaExMjVsQ1atRI6CckJHD06FHxDKalpdGhQwe2bduG2+2mbt26YgdK7dq1qaqqYvny5WRmZornQ0kzpc1SVr0rbZZSN3qjlFWPx0NBQQGtWrUSuxL69u0rQk4oExBBQUGinKxZs4bU1FThW8+ePUUaKW3WypUrxcBiXl6emPxISUkB/myzvvvuO6xWK126dOHo0aMYjUZSU1MJDg4mKiqKm266SYRnUeo1pc3q2rUrAPv376dJkyY4HA4GDRoknv/OnTvTsmVLnE4npaWlPPvss3g8HvGiqtg4fPhwTCaTuHe/fv0oLCxk8ODBWK1WkXbXX3895eXldOrUiYCAANFfaNy4sbgmODiYtWvXivpXKUtQPWin1IdKn8+7/lXSUqlb+vbty/Hjx8nMzGTgwIFUVVWxcuVKAFH2EhMT2b17N3FxcVitVlH/KgNIygCh0qcaMmSIsPXYsWOUlpaSkpIiBlkHDBhA8+bNcTqd4nnZsGEDycnJDBgwgH379tGpUyduuOEGsTNFKUfbtm1j6NChoq5et26dqLdSU1OpV68eBw4cwO1206VLF2GHxWIRZSs0NFT0a8PDw4mKihJppkyQnZlmSl/rYtIsISFBtKFKO64sBlHud8sttwg/lDbr6NGjfPXVV5SVlYm+kFLXKuGjoXoXgNK/1+l0REdHi7o2ISGBpKQkURd517XFxcX06tVLTFgpK6cv1L9XdrM0btxY1LXKgGbnzp1FXav015X/GjZsKNJd2emjTFyEhoaKeuPGG28Udf/NN9+Mx+MRK+S988X7fWPgwIG43e7zvm9UVVWRmZnJDTfcIOp+xR4lbJDyfCttoGKPki8nT54Uz/TRo0dZvny5qMeUfBk5ciRut1v0V4KCgkhLSyMtLQ2DwUB0dDR169alrKyM2267TeRLUFAQcXFx4t3OZrMRGhoq8rBfv34UFBSIsOxGo5EGDRrgdrtFfijpnJiYiMvlEgsl0tLSxLXKxLyyGywxMVG8Y6xZs0b0GwCx+0RpL5UJ8ZiYGI4fP87p06ex2+0MGTIEt9tNfHy82Ilx+PBhsrKyWLt2rUjvyspKMenfuHFjQkNDCQoKEv3ONWvWEBkZSVhYmCiP7du3FwP7q1atIiQkhIqKCgoKCujTp4+wTbm3ct+0tDQxoVezZk0cDgdt2rShSZMmREVF+Txjv/zyC506dWLr1q0kJSWJcuZ2u8WEgcPhIDMzk169erF582YxaN2wYUO2bNniM/gHiBDSVquV7du3Exoayu7du8U7m8Ivv/xC06ZNOXr0KC1btsThcIj+nrI79bbbbhO7HI4fP05AQAC///67aLszMjLOyqumTZuKPAwPD2fRokUA4t2jrKyM8PBwmjVrRkFBAWvXrhV2n1mWlfpOr9eLvo2SV97vvcq9lbK8fv16oLqPfaGyrGgrz5V3mxEcHCzK8sqVKwkMDPQpy0p4rjPLcnZ29v8sywEBAXz//fdkZmaKsqz0r1u3bi3Kcn5+vk+f0+Px0KdPHxISEmjSpAnR0dEEBgbicDjo27cvq1evxuOpjmqgvOe2bdtWPKPeIa3Ky8sZPnw44eHhxMTEiInlwsJCkpOTxYRYSEgII0aMICMjg65du+LxeEQ/TenjZWdnc91115Gamip2ZcXFxVFRUUFycrIYg9i/fz+dOnUS5QAQYxZKeD6r1Sre0TZv3uxzrcVi4dixYzRt2lSUVaPRKCbwld19CQkJREZGkpqaKt6fXC4Xa9asEX1OZSfRzTffLMJqKn3B4uJiatasKeq4sLAw4uLi2LJlC8nJyT4h2bKysujcuTM33HADf/zxBykpKT4hYe+++27RTiuLvEpKSsjLy+P666/HbreLcp6ZmYnVauX//u//AMTuPGXR4rBhw0Q52LVrl3iGsrKySElJETsSld1lQUFBop1WyntaWhrbtm0jJCQEq9Uq0uvM8rlx40YMBgMbNmzAbrfTp08fLBYL9erVIzw8/Lx1bY8ePVi+fDl9+/ala9euhISEkJaWJt59Q0JC6Ny5M+Hh4aIeBET/w+PxiMUBt9xyC/v376e0tFS8lyu7kHr37i3qBKVeqFu3rtjhA3/2n5TJ5k6dOoldO97vg/369ePkyZPifWft2rW4XC6OHj0qQj+Gh4f7LIgrLi4W7ZvH46Fjx45s3ryZTp06AX++Tyi73Ww2m8+4ar9+/Thy5Ih4FktLS0VYfu++ldFoFHX+3r17adSokUgzZTIzPDyclStXsnHjRnr16sXGjRtxu90UFRWRlJTE8ePH6dSpE1VVVWJBkNL3a9SoEe3btyc1NZXNmzeL8Sbl/spxCD169BDpquRbUVERrVq1Ijw8XIRLfuyxx0RZVtLszHxSogedqZeYmEhaWhp2u13sglfyKSUlBZ1OJyIVrF69WuzSPHDggNhJpVyrHAWQlZXFl19+KdpW5TrgnPadCyXPzheS+2pDTlhJ/jVkZGT4TCZB9YtUTEzMeeOYKr+D6gFF798r5yIpYeLOpa/T6USlnJCQIELkJSQkUFpaSkVFBRUVFaIij4mJISYmhoMHDxIVFUV4eDgNGjQQNigdAEVPuafye2UXgbLCLzk5WazYadCgAQ0bNkSn05GRkSF+462XnZ3N/PnzsVqt4nOn0ynOW1EG6hQ7FL0TJ04QExMjzq5S0kHZ5eU9Sei9qqhTp078+uuv4j5KY+GdjgcOHBArMNatWyfCE7hcLpKTk8Wgw7PPPivitBYXF4uXzIiICBo2bEhAQAAZGRlnnRei5LFiozIAuG7dOrG6WglFqOSpd8hEpTOlrGIyGo2iYXS5XCQmJopJQO+wEMr3gDivwzufPR4PJSUllJaWijNXlDwrKioSE4oej4fbbruNb775RpSBxMREoaV0MJRyqjB16lQ8Hg8vvviiOEcHEHYkJyeLlXdOp5OsrCzxXXZ2Nh999BHnQrFRefF56aWXgOrn56effhLPm3dZhOrzq7wH6BISErBarSKtgoODRTqGh4eTlZWFy+US6eLxeES5VlbqKOH1lGclNjYWo9EowmV4/+sdlqRTp044nU4RIkGxR9Fv0KCBWP2plGclvIOSjh9++CFQ3TlSOvNKGVC0GjRoQGJiorDD5XJRVVWFw+EgKipK7LRStq0XFxdTUVEhXgJPnTol7FFs837pUO6j5CtUP2f16tUT1ykDpUoYnTPzRfH/fPmiTOafmS9BQUEilIESvkbZPRAVFeVjp1IPK+ngHb85KipKTA7GxcWdlS+A8F2n04m0UupdJZa0on3nnXf6aHufXXJm/a683NasWVOEHVXKW0lJiVjx5XK5KCoq4ssvvxR1/Jn1q7L7JSQkxKeOV3ZYKaEWlUF08G1zEhMTRXif6OhoTp48KezxTscaNWqIwSkljZXrlDanbt26PufcKIP3Z7Y5CQkJPrHWvdscxZ+8vDyfNsftdvvY1LBhQ7Hj4OTJkyL8n7eW1WoVAyMBAQGizQoPD6dJkyZix4T3rjWoHrzX6XQiJK3yQquU1dLSUhISEkS9oTw3NWrU8Akj633GnTIgrUyyKPW7MvielZVFo0aNiImJITg4GJPJxJEjR0SaKSFjFe0dO3YwYsQIYmJixMuMMiCqhOVT0iwxMVGstFfSLCMjQ0zceuOdZkpYEagO7aacQeR9Pt65nhelzapfv77w88w2SxkYUnZGKPdR2iylXVDaLKX9rqqqoqqqyuc5sNvtYld0Xl4e8+fPx2w2U1BQIMpTdHQ0Bw8epKKigoSEBPE8WK1W8Yzk5OSwZMkSsTJQuU5JR8Wv0NBQEhMTRYib77//HqiuA6dNmyZ2hHrXRdu3b2fFihVERkZSWlqK0+kUC1Dy8vIoLy8XO3W82wOlvvQ+d8i7/lXSUTlDy7v+9a6jvftU3iHhlGdKqSuVsgxw8OBBoLqMhoWFiTRr1aqVT54odVq9evVEKB2lvQoLCxN5r6RZRkaGyBe3283BgwdFiBXvemrixIliQK+wsFD0a5Vy53K5xI7VoqKis9Js+/btQuvll1/mp59++p9pBog2VJnMUJ5hpaw3adJE9De826wlS5aIPqziX05ODkajkX379uHxeMjNzRVnogFn1bWNGjUSYbiV7xVuvvlm4b9yJhycv3+v7DJXQrEq9ijPgGLjqVOniI6Oxmg0ip0oCpMnTxaTyVDdjih9Ku+632w2i3SIiYnxqWu96/5zvR+c+b7h8XgYMGCAqPu97QFEvigo9jRp0oTg4GACAgLIzMwU/bXdu3eL9ljxuUWLFsTExIhBdOUctYMHD4q2V6mTEhISRL4oYbiVOtflcrFnzx4OHDjAvffei8ViEfmiTHqYTCbCw8PFMxEdHU1QUBAmk0nU2Uodr5yP510elJ3Xdrsdu91OdnY2hw8fFvdR0l1pp5W0V8qrshNDKbdnnuuh3Fs5swfgs88+o1atWrRv356Kigrsdjvl5eUcOnSIvXv3ilCmp0+fpkmTJnTt2pUaNWpgtVo5deqUz7vKa6+9xsMPP0x4eLjPvZX7hoaGivP+oHoxUEJCgljcoSxQzMnJIT4+nkOHDhEeHs68efO4/fbbiYmJETuKlUlSZUemEkI6ISEBp9N51plJ33zzjaiz8vPzqVGjhjgDUykzoaGhnDx5UqyaV9pHJby50qYpz3FRURGbN2+msrKSkpIS0tPTRTtWv359kVfKxAf8Obj3xRdfANWLqJTJFUA8g8rCOm8/Fi1aRHR0tAhBXl5eLvo2Sn9EKctRUVGinChl2Xuh1e7du8nIyGDcuHHExsb6lGWXyyXOg6yqqqJJkybUqFFD7BJVynJ+fr4Ij+hdlqG6n6qUZY/Hg91uZ8uWLSKsonK9d1kODg4WE3RKWVbaw9atW/uUZWW3TFBQkE8ECKU8Kc9tQkKCqJO7desmdj/Gx8dTo0YNEbrVOyJMQkICDRo0oLy83OccKmWsQFmAMHnyZKB6saSSdkajkaysLOrXry8WJCQmJor89D7D2+l0ikmwhIQEn3cYpT0sKCggOjpavJMr/Xjvd0WlblXKLVTXCUq7cPjwYfH+VlVVJRaOQPV7zpEjR8S7rtLWl5aWotPpxISIghIe+9ixY7Rq1YqMjAyf93+drvrswRMnToi6Pjs7m/r165OVlSXS2btvk5CQQI0aNXA4HMTHx4v+V61atejSpQvr16+nVq1aot32eDwiEg1ULzCuUaMGQUFBZGRkiGcoIyNDjCF59zdMJhM///wzgIh8k52dLZ7NsrIy0tPTRdhTpZ0LDg4WY0DKRGJCQgI2m42DBw/6pL93XRsVFcXWrVspLS1lwIABJCYmimfWO9T8ypUr6devnxjnUBaYZmRkYDAYxM6isrIyJk2aBFRPsBuNRrFbXUlvqB5DWLx48VnnGSnh7pWQjZMnT8bpdLJ7925OnjwpIqco7bTyvqOMc1VVVYmF08p1Sp3wySefsGXLFmFP7dq1ycjIoHHjxqLfqdfrRZ/TZrOd9d6slCWlrlCee+VdV+mLKCE+09PTadSokWgflYUypaWlfPvttzidTkJCQnzaI6VPrqSjzWYjLCxM9OOUvojy3Cn96rKyMmJjY9mzZw8ej0eEnR83bhwdOnQQfij9j4yMDJ/3dOX59h5jU647czG80nYoi8GVts97jHPr1q1iIaPyLqD0mZ1OJ2+88Qb3338/6enpJCQkiPo0IyNDHBUSEBBAz549adGiBffeey9Hjhw5y75NmzbRqlUrmjVrxu23386XX37Ju+++S48ePYRvVztywkryr8Fms/lMxCgo8bAv9Duz2UxJSYnP75WVuyEhIRQXF4sVfGeihIwICAgQ97FarWLFRHFxsdBVtoSXlJQILW/7lAZeOTxe8cnbruLiYnFdaGioeKFWrnO5XD739NZ79dVX6d27t1gdBPg0LIpN3pqKnhJOTfm8oKCAGTNm0LNnT9EQwZ8TOzExMbz33ntiRZnSQCqrQ5XwXX369OHmm29Gp9ORk5PDXXfd5TO4pfDSSy+Jv/fs2SNi3QYFBREWFobBYKC4uPiceW2z2USaORwOevbsyW+//SZWHcOfk27KpIKCsupIGRhPTEwUK0yUkIJKh/nMw+aVDpaSfmfm89atW4mNjaW8vNwnn51OJ926dRMdopSUFBYsWCDyy2q1+pQ15Z5KWb7tttuYNGkStWvXFg2/gnJegLJSWnlx8Lbx1VdfFYNJ3h1J7/tBdciFDh06iLj448ePx+12i/KnlB1ATHq4XC4xqOmd7vBn7GolbrCSvso9lU6Lcq13uA7FJ6XTAn8OLirnkigDw0qnXOnIK/Yoz1tQUJB4aVcGyZTVicrWeiUsgLKa0Hvljbc9VqtVfKeE/VOeAaW8PPjgg8TFxYkVicrnpaWlWK1WkS/eaa8MeCvp652O3mezKfmnxEg/M1+U67zzpaqqSnTclBeHM/Olfv363HPPPUJbecFTwvadSVhYmMgXpcPepEkTkd5n5ov3M6jki3JmSs+ePUU94z0RDr6Dgt5pr9SB3t8pHeTKykrhl5Jn3vmi7IIbP348n376qfg9/LlT1ftf7zpe8U2pY5Uyfmabo8S0h+o6tLKyUthzZjoqE7/eKyK92xzvl3uDwSD+PrPNCQsL8ykn3s+A984M7zZHeSbPTEflJd37/CfvNllJeyVMpmKPUn96l2/lmVQGLPLy8ti9e7cYLFFCDTocDkJDQ0VZ9k7b4uJin5Vl3uV7zZo1orx4LwxQyndoaChhYWFissm7blRCOiqMGzeOPn36EBYW5lMHKPZ5p5nin3IGgdJmne/Abu80UygqKhIhRWrWrOkTzk5BSXcl373riTPbrFq1aonfDBgw4Kw2q7y8XLQVii1WqxWn03lWWahduzYOh4OKigoGDBhAWFgYdevWpbi4WNSjSpjZM+2oqKjgueeew2AwUKtWLZ/BceU6ZSWzd+hipe2/7rrrxOr+WbNm8dVXXzFr1izq1q3rUw7uv/9+fvvtN06fPi2el7vuuotHH30Ui8Uizl/avHkzhYWFPvWvki5wdv2rpKNSlr2fK5vNJup87/rXe2GNdzp6T0rp9Xoxkac8v8rvvCdTvOstZWedsno4MTFR/FbxRyl7SvlPS0tDp9NRp04dcRYAVC/umDRpkngBV8qZMriqlAOHw0FcXJyo05Q0a9eunVhEoNNVn3n25ptv/s80A3zaUMVP5Z7evpzZBu7du1fs2jWbzaKubdSoEffdd5+wRdFSJjS869qwsDAxoaj0TRWUEFNK/iicq38fFBREeno6PXv2FIssTCaTCDd6Zl0bEhIibFM0Vq5cyfbt2+nRo4fIL71eL+qNM983lHRQzhfx1jpXX+JC7xvKBJG3hsKZbaC3Pcrg+XPPPSf8huqwsN75oixgUNJTCSFWWFgoBnWU87mUa5Uznc5cOaxEL8jJyRFlTLFPeRaU9lixU6njlUkR5V1PGcxW8vf+++8XOzSUtPZ4PKLfAPj0L5TQkWfuEFLu6+2zgnJvp9MpBt/37NnDxx9/LBZEKAtCJk+eTFlZGadPnxaLW2bNmiXKoJI+3osgR40aJcIAe0+0KPfV6/WiDYTqwXLFTu8y43A4mD17Nnv27GHDhg107NiR5557TpQtJZyud9lQ2kJl0E9p05TFVLVq1RLhoJUBTOW5Vn6rnB+i+KT44H3WpVIvt2vXjl69emEwGMSAXUlJiWj3lfcI5f+VtlvZ8aloW61WMRANf76TlpeX+9QxBoOBX375BZvNRmFhIcHBwaJP5J2vSnlSninvvPJuDyIjI0lJSeHHH3/kjTfeEJ8r6amc4eNdnpRzj7wXaSnPk2KjsvjIbDb7lGUlfZX8OXr0qNgBpNiolBVvlIVGZ75nKnWYckaXd1p79++LiopEWQgLCxM+mUwmkUfeCy8Uf5UxCG+8n8usrCwRJl6JyqCUI5vNRmRkJOHh4ezcuVPUn8riKPiznVPyOy8vj7y8PFF+rrnmGuG/0l88s15V0iEsLAybzSbKrbKLTJkcsNvtHDx4kKqqKpo2bcrs2bN93u+9xy+U/terr74qwi96o/SJbTYbbdq0EWfTKpMcyiJr77re46mOAqDc81zjWUr6e4eNDAwMZMaMGRgMBtLT0/nPf/4j7GjZsqX4jVI+jUYjxcXFIk1/++03tm/fjsfjEee5mc1mevbsyfjx432eUeXZUNqYa6+9lieeeIKffvqJ8vJyn/LpXUatViuvvfYaOp2O2267TXyuPJNK+Vy6dCmxsbG0a9dOvDcWFxdTWVkpztK66aabSElJEeMcyplcyv2UOuLDDz8UfdGuXbvidrtZsWKFsMfj8XDDDTcwceJEYmNjxU40JX8Bn/ZB+W1ERARGo5F77rmH06dP+/T3FH+U56eoqIi77rpL3FOpEwYPHuwTqWTp0qWi3Crtm9vtFhN1Sh/FOz0VSktLRTQM+LN/4D3eAb79OGVXZ2hoKPXq1ROLDJS0UxaPLF++HKhetKig7KT0vpcyFub9/nzy5Elhw0MPPUR0dDROp5O3335b9EU2bdqE0+k8a1xXqUO8x9gUH4CzxjbCwsJEPa9M4JWVlREaGsqSJUvYvn07gwcPBhDPWFhYmBinuPfeezl27Bi33347BoNBtPvFxcXk5+eLRRuvv/66CPN97733+pwj3q5dO55//nk++ugjgoOD2bp1K+PHjycgIECE7vw3ICesJBKNsHbtWtauXcuYMWMuW6uqqorRo0cD1atvvVEq4sDAQK677jpGjBgBcFb8emVFWMOGDcULRePGjdm3b59PJ1qhbdu2YtWxcgD0peDxeFi1ahVNmjTBaDSKVaJnHvh6PoYOHSrsq6ys9Hnx8x5I+ysoB56fifcZB0ajkWnTpomY7GfuHjsXEydOpFevXgQHB/usKvorFBYWsnbtWp/O3flo2bIlsbGx6HQ6EYaooKDA5+XCX1BWK/0vXC6Xz6G9Z6IM3Clxvb0H9y6V2267jeDgYDGp5D3w8U9gNBrFajll4vJMUlNTRXgsZaWf946BC6GsuPcOH/q/UFZgw9l1DuCzI+FKoeSL8tJ5ww038O23317x+1ztKKt+r7nmmouuAy/E6dOncblchIeH07x5c9FObNy48bJ0u3Tp4jPQDBcXNqF3795A9QDPpEmTLngA+ZVCSdfY2FgxMHDmKvXLQdlNcTnk5OSI3U7//e9/OXr0KDk5OT51gnLI9pnlxOFw8Mgjj4iD4/9K++Nt+7p160R9MnDgQDp06MBDDz101u61uXPn0qRJE3F+ElTvyBk1apTY2VKjRg0RRvB8k4l/F263+7x18IVQJpEAcSbRhdi7dy8TJkw4axXpzTffTK9evcTAmJJmZ4Yjcblc1KtX7yzd//u//+O6664Tv12wYME5F5ZdSVwulwgn602PHj3o1q2bsOViWLp06UXb4XQ6RV/iXG3WX6GyspJXXnmFRx999JzhrtXAu8xfTn3udrt55JFHxMCj9/mSahAQEECdOnV45513xIryy0E5U0mv1/+lZ+hKoeyGNxqNxMTEcPfdd5/1fjJp0iR0Op3Y0e3xeHjggQfEDhEF74n/zp07s2DBAoxGo1h0d6nccccdxMbGijNinn322Qtef76yGxsbi8FgIDAwkLCwsP+vvfOOr7q+/v/r7tzkJiEJGWSQkAAJK8jeCSsqIkNAoiB0WmmlCmKp8KXQAoKigKyCVFtBFBSihBE2DkgAQYYIBLJDhtn7Zt3c/P7I4xw+nyTgaJWf7Xk+Hn1Ybu79jPc47/N+n8VRXMpIVuCOQ54yXRU5sLQ2Rp977jmEh4ezkxZBNWnuRmNjI1asWMERL/di9OjRbDzV6XRYvXo1amtrOVrn38FoNCIsLAyrV69mh4UfG4q+Icgx5tv4d+bGqlWr2CBwNygKhigpKUFKSkqLsa6kffv2fNhN6c6U2QQAYNq0afjoo49U+z2lY4ISLy8vBAQE8D7jwoUL3/Jmd4cMnTRuKFq2uLgYe/bswbRp01g/bU2Opaen4/Tp0yqH3ta4desW3nnnHQBQRc3Sof93JTk5mduF0sASCxYsQHV1NUJDQ7Fp0yb+nKJN7kVMTAwmTZrUou8XLVqE8ePHszO4EpIV3t7eePLJJ/HnP/8ZlZWVKoctJZ988gnrnMrahUrsdjs+++wzjB07tsW+gJwxNRoNgoODsXr1aj7naO06QFPU9eOPPw6dTocLFy7A0dGR0yvSGcSCBQswZswYODg4wMPDgw0hlBZUCRkdqU5eY2MjduzY0eq7kGycPHkyp1RX0rNnT45ABO5kjvkp0Ov16NOnD6f3bt++PTu0kywH7uwnAagM4Q0NDaryBErIEcZgMKgMmBERETh48CB0Oh2uX7/OBquKigo2iv0YlJaWYsmSJZg0aRL69++v+ltwcDA/R2RkJDujNz93ojSa3t7eGD58OKKiorBx40bk5uaqvvvcc89hypQp6Nu3L7Zv34533nkHbdq0QWJiImbNmvWdzr1+DojBSvivQelFoIQs2vf6HXmlKH9PBfwqKyvh6uoKFxeXVg8vyDOqtraW70PpegCwdw0A9qx1dnZW5R1unjqCIhXonZSbFbLoA+B0cnRP8hhR3pOut3TpUsycORNms1mlsGi1WlbC6JmU+WvpemVlZazMbtiwAV999RX+8Y9/wMvLS7W5UKbBo2soP9fpdCpPTPLIbWxsRPv27eHo6MieGUVFRaywVFRU8KaHCs4D4LQADQ0NcHV1bbWvld4XjY2NCAsLw8CBA2G32/ngipRZyndP0D3JQ9PPz4/z3Z4+fZpDloE70WVKbxHqs9b62WazYdy4cS362cnJCRUVFZx2CbijdNtsNlWkkbJ9aSw3j5pSQu1NSh4ZR+gZMzIyMHPmTFZo6CCXolCU48pms/FmkOofUU0I5dhRto1er1c9o3LsKPuaPBuV7aL0eqF0McrvV1RUoLGxkfuMDkfMZjPKysrYuEaKJaWaoOdxcXHBgQMHUF5ejg4dOnDhT6BpA0mFgIE74fB0L6Wiq2zj8vJybmNKMabX61FZWamSERTFokw5Sam/qB+VbU9RXdS+Sm8gm83GY0+ZjoY8gpX9Qt9r3i+0GVSmISotLb1rv1RWVnIbtKYglZWVcVuR0knjhK5DhmBqI4L6BWjaDCjT7ZBHEkV5KsPklTKLZCpB7QA0bQTovajPyEhFHsNarRZjxozhZyT5Ru+s/K9Sxis9tuidAbRYc5RelFRvhZ6neTuSYVB5CKRcc5QGYzp0o7WMrkHvRv3ffM2hPqR2oHFEfU/QRkGn06Fnz548z5VzErizpjg6OrZYs5ReqNSGdXV12LJlCzQaDUaNGsV1c6i/gKY5Setz87ZVRqkA6vGt1Wr5N3StkpISjmisrKxEWVkZbDYbf5fazNXVlZ/DbrcjOjoar7zyCsrKyloYmunfzeeLzWbjAuyurq4tvGQJZZuR3KHaItSftA42ny8VFRU8DpRygiJ+qK1IjpnNZnTu3LnFmmU2m1UHAiSTDAYDFzomPDw8eE5NnDgRr732GqxWKyorKzki53e/+x0ee+wxfkblgcTAgQM5okV5XfoepbClttBoNCgrK+MUos8++yyAptR7a9euRZs2bfD3v/9dNQ6ioqIwcOBA2Gw2nj9nzpxBfHw8y52ysjKWexkZGap1lP7bXP5SO9JYVspfijgh2ULyV/nuyjlF70TMmDFD9R36HY0P5bVpjCxcuJDnvbKeBq0nNPauXbsGoKn2DfULpZGi/qZ7ajQa/rcyxSn9LTQ0lPv1bm2m0+nYw/xebQZAtYbSe9I9le9C1yddwt/fnyOhKY0oyVq6FtUXIrndXNZSBBy1H6VvAppqndIzKJ2rmuv3CxcuRHl5OQYMGAAvLy+Vfk9ZBprLWuUBUmVlJbZt2watVouxY8eioKCAn9dms3Harub7DWX6T6WspfdqbX+glP0HDx7kvzXfbyhRymsAKtlPvxk4cCA/j5OTEx88U7+QJzu1J/WLm5sbH+ZS/R36LkVbNo/+d3Bw4DSxyqgucgRSphSm5ycZr4xAowhSZZQQ3ZsM6tRfpDcAUOkXFL1Iae6U0Lht/jndm3Q1m82Gv//971zHhpwBgKbsBm5ubpyS2s/PD9evX0dsbCzfW6mv0n1JB1XKG7ovGehJVyktLeXnVI4Zo9EIJycneHl5wdfXFy+//DL27t2LoqIijgCn9qT9AxmzSQ6RHKG2pjlD65hyXptMJo6QBoB//vOfAMDp0CgFNUUNKfevFPFIz00Hzcrxr1y7KUqLHP3y8vL4s/Lyco4IoD0aPVtdXR0GDhzIke9Wq5UdIKh9lZFCND+UfaVcD2gsR0ZGqvQKmmt2u53HslKm6/V6VcSA2WzmiGZ6TqvVyu90t7FsNpt5bVBG4jbfT5Ju0tygSrKzoaGhhVxV6veHDx9mWVhYWKjaC5Dc0Gq1eOaZZ/ga48aNQ2FhITp06KCqQamMniQ9R4ky4qeyshLPPPMMoqKicODAAZ5zZOClNie9haKDadyS0Yn6qTW5SvOPopmaGzmo3piTkxMaGxtx48YN9OjRAy4ud9LbVlVV8ffoN8ePH8fMmTM5LaASWkNcXFxw7NgxdtCgfiM92Gw2q/TvmpoaVfYSpW7zwgsvqPRCkikFBQU4fPgw+vTpA7PZrEoR+8knn/BYpfFps9ng6urK9bk6dOiApUuX8hyi8amM0Gke0UKRgjSmKNsCyZnmZ0Bbt27FH/7wBzz22GMtztEo2r2oqAh1dXUYN24cAPCenb6jjNIG7tQSJr3T1dUVt27d4nZ5+OGH0aVLFzQ0NOA3v/kNP+tDDz3Ejt4vvvgigDvRayQPyLFbmdWCHJYoNWnXrl2RnJys0vcoIovG2EMPPcTvqowioncinY0i36iPaK2k9VP53nQtwmKxoK6ujucEfU953kHvqJwjJKfKysp4XSCDKK0F9LlyfivXb2pTiio7fPgwR4i2adOG37eiooJTzycnJ7NM0el0uHbtWotzXWVGFOU4ojWxuWGU+kmj0XCknMFgwKefforw8HAsXbqU24zmGJ3T0fN5eXmhT58+KCgoYJ2Ovufm5qbSMYODg+Hj44OcnJxWzzrDwsIwaNAgPPLII3BwcMC5c+d+VMPcT4kYrIT/GpQ5d4mKigoUFBS0qD3V/HdAkxGkec5SHx8fFBYWct2L5tdXequkpaWhffv2MBgMSE1NhcVigYODAxwcHPh3hYWFKCgoQKdOnVBYWIjS0lKkpaXxMygVGvqcctMCTQtBfX09rFYrGhsbkZiYyAfVyloYrT1rRkYGtmzZgn79+qkMVps2bcK7774Lm83GeWvpOeh6fn5+KCgoYIH8xRdfYNOmTQgLC0NFRYXquUmhp82O0tOGPCyVuZGVtcHatGmDxsZG1NTUQKfTITExkb1iqHYIAFXqK/K2ojpQVL+heR8rDSN2ux2+vr6w2+2or69Xed4oU83RMwF3Fsi0tDT23IqIiMDUqVM5vJoWaGpfZSqV1vrZw8MDXbt2bdHPQUFBuHXrFqxWK2pqalSpBoxGo6rNUlNTodFoUFRUxJ+lpaWpxr5yY0y/TUxM5BRLBoMB/v7+SEtLQ1VVFbZs2YLo6GhVG65btw79+vXj1I4GgwGZmZlczyQpKYn7xd/fXzUWAXANIlLq6BmpLaqrq7kfSktL4e/vD51Ox+1CHk6pqam88SgqKuL5FhwcjLy8PNhsNj4Uof9Seg4fHx/4+voiKyuLD1ypj9LS0mAwGJCcnAxHR0dYLBb4+vrymLdYLNDpdKyUkYcdzT9SNPR6PT9PWloaUlJSePNBBikKe3d0dOQc/wUFBTAYDNDpdLypd3NzQ0pKikoWKBU45Zgij1nqF3qvnJwcVFRUcB5/SlVAv6Pv3atfyHhZXl5+z35paGiAVqtFUVGR6jlpLFJ/0L327duHoqIiNkYtWrSI0/8o5yD1C/2N2ga4My8pDzgV/qXxoZR1Sq/i1NRUvlZeXh6Cg4NV70X1tPLz81uk9dHr9S0iN5SKtFLGk7dtUFAQALXCq1xzqI6gTqdDYWEhfHx8+HmU7Zifn8/KLrURfY/WnKysLG7/wsJCTi/YfM1JS0tTHYYoxxnNZS8vL9Wao3ymy5cvcz0aZf0vjUbDcyA1NRUVFRU8puvq6njOlJaW4saNG/D19VWtk/ReX3/9NSwWC9LT05GWlga9Xg+j0cgOFhaLhQuoK/uA5B5dh1Jt0jumpqbymFamsSgsLIS/vz+SkpJQUFDAKTw7dOjAbRMcHKwa323btuU+p+eia1N6TeW4orlPhwLKmjFKlG3Wrl07ljvUZhRNRO+snC/BwcFISUlhY25mZibPF5JP1GaUWpfWruZrFq0L5DxC6zeNVeX4JNlFaxZ5T1ZWVrIHJUUU09hQzquqqirYbDZYrVakpqayTE1JSeF0SqmpqfxeFRUVLHd8fHxUBgGdTofQ0FBkZmaqZFFQUBCCg4NZ/tL4J5mp0+lQUFDAkeK5ubkqOas8CFd+TuuBl5cX13MCmuSvUkaT/PXz81PVdKR2JFmpPPymcUU6FbXZV199peoTkmknTpxAXFwcv8+tW7dQXFwMnU6HlJQUpKamcupWyvFPhyLUNtSW9B6U9qT5ONHpdHzY17FjRx5792oz2mjfq83oWZRtRtD3rl+/znNduWY9/PDDfG+SYyRrac1ydnbmAvX0HsqxnJyczPUmAbUTwHPPPcefL1q0iPtQqd8XFBTg4MGDcHR05IN1pZ7crl07lnn0jG3btuW2b2xsRFJSElJTU5GRkYFBgwbhX//6Fz9HWloaH4QoZb/y0KqwsFAla5Wyv/n+QPn5oUOHADRFvyhlP+0PiOZrIMn+69ev8ztVVVXx8wQFBXG70d+//vprFBQU8KGPs7MzfH19uUi7UueisUBpz0iXov6j9QBQ1y2kyJH6+nqUlJSooheqq6tRX1/PMpv2EKmpqSpjDz0npasj/TAkJITlC70nrdPUP83TMtO4VTon0b2Dg4O5mD3QZNSlesoODg5sFOzWrRvrLNQWbm5uSE5ORn5+PioqKtC2bVtuOxrfFRUVLVKX030rKipQWFjI+5mvv/6a113aM9DYVa7v5MlfUlLChh5y9KK6w5Tui4rS0/pFdR1pzlD/VVVVITQ0lA0pQJPuEBAQwMZoaovw8HDY7XZe00jPoxp4+fn5nLaNjIh0aE7yVpleNSMjg3XS5557DrW1tSgtLUW/fv04AiIoKIjPCEgef/3115zW02q1wmw2s26j7CsaKzROqP/o+ZuPZXpmGss6nQ41NTUoKSmBXq/H9evXkZ+fz0Y7Gsuenp5wcnJCfX19C2Mz1W+821imd1KO5erqapUDm1arhYeHB8tV5VimVLHV1dUqOUTjidqbHDBramqwbds2bvfdu3dzunWr1aqKUli8eDEcHBzg5OQEHx8fbhul4092djbLp+ZOlv7+/khNTYWDgwNWr16N4cOHc00ziuKn2lmJiYm85y4oKODzAIrgdXZ25nFL84Geh76r3HMDTbLPYDDwGYQyeoy+r4zwa9u2Le9127Rpg7KyMmzZsgUFBQXYu3ev6rf79u1DYWEhfH19uR6ycv9P/erm5qba62RmZiIgIEC1vtC+lWqlOjs7w8HBgfUv0oN79uypWqf1ej3XZwaaMmpQfVBvb29OM//aa6/BYDCo5hDdm/Rfs9kMi8XC46W5vkTjmmr7kfwmGd+jRw92MFauec1lbWBgIEd30tig7zQ/xyP5WVNTw/ekd6fnI/1DaUR94YUXMG7cOHh6emLZsmX8veaO9o2NjUhLS2PnjU6dOsFgMKhkQm1tLf//goICuLu7s2GCdHxCud+hdyJH+7q6Ojg5OeHWrVsoKChATU0N7HY7Rza5uLi0uBa1Lcl40sdoD066CMnt0NBQ3lN16NABNTU1MJvNSEtLQ7du3QA0yXaSH76+vqwn0BpMDhW0pyJd5OLFi6isrES7du2g1Wr5mZT7UEKpi9B1Sd+lZ1c6virPjWmsKKG1o7GxEb6+vrz2kfFt48aNfD6l1+u5ZnVwcDBycnL4b0RtbS3LjODgYHTs2BEmk0klP4mSkpJ7nmsDYIefu0Wl/dwQg5XwX0NERAQSEhJUyglZ3YcMGXLX3/Xu3ZuVNvp9fX09jh49ioCAAP59REQEEhMTVeH5Z86cQVVVFcxmMw4dOgSj0YgBAwZwXl1SxuLi4uDl5YW0tDRoNBoO+d68eTNKS0sRGRmJsrIyJCQk8GJ76NAhfqePP/4YWq0WDQ0N/P81Gg1SUlK47sC+ffvw+eefw2azITIyEnFxcQgKCmIv0vHjx2P79u2qujMA8MQTT+CNN96ARqPhRSA9PR1ZWVl8PaqDQgp1QEAAR/wcPnwYGo0G7du3h9FoxLVr1zid2KFDh9hbk55j4MCB3I70fnSonJmZyeG8DzzwAC5fvoyRI0ciKCgIhw8fZqUmKSkJWq0Wffv2Zc836qfW0glGREQgPT2dn+HmzZtswLpx4wYvUu7u7pxTm/qZDuGV/Xzy5EkATYtGfHw8L7q5ubnw8vJCSkoK0tPT+Rpvv/22qp/j4+MB3PFEoXaIjY1FSEgIoqKikJCQwM979OhRxMXFAWg6eDh9+jR7TsXFxSEwMBBnz55Fx44dYbFYcOjQIR77ykKT7u7uOHz4MCIiIliZr66uxqBBg/Dll19yGPP27duxfft2dO3aVTVOtm/fjiNHjkCv16NLly5ISEjgEPOkpCS89dZbAJrCnJVjEQCGDx8OjUbDh9D0jDR2zpw5oyoQabFYUF1djfj4eB7zwcHB7AXU2NiIhIQE9OnTB0eOHEH37t3Z44z6jNIWKY3KERERiIuLQ9++fVFcXMxecytWrGDP6crKSmRnZyMiIoL7WqvVorq6GhcvXkRAQAD279+Pdu3a8YExKUBBQUE4cuQIIiIiUFZWhlOnTsFisUCr1eLkyZOwWq2cL508kT/++GP2zKTxq9FoYDabcfr0aR47cXFxrKSQjAgICEBQUBAuX74MoCmEPiEhgfs3OTkZO3bs4Of/7LPPuF/S0tL4We7VL8qw/Hv1C9B0EHv27FneAJKMIPml0Wgwfvx4+Pj4cAo5mgfBwcEIDw9HcXExz8GtW7dyv9jtduTk5KC8vJz7pbCwkK/h5eWFtm3bstKn1Wp5bptMJj6Io36hzdnJkycRERGB4uJi3LhxA3FxcYiIiFA9d1FREXbv3g0XFxf0798fe/bsAQCOEDt27BjMZjOOHz8OrVbLMp4OONq1a4eysjLuF5PJxGtOVlYWTp06xQcOWq0WDz74IIqLi1VrDj1PbW0tzGYzLl26hKCgIJ7XtObs27cPHh4evOZQjQSNRsNrTqdOnXDt2jUUFxe3uuYUFBRAo9Fg5MiRfF+NRsN9f/nyZTz99NO8QSP5S/Oc2pDWLxrTNGbatWuHxsZG/h7QtNmmaFdHR0fePNEcyM7ORlhYGKdJ0Wq1SExM5KLaiYmJaNeuHcrLyxEZGcntFhQUhBs3bsBisWDv3r04ffo0H8KRYYDWHYvFwgWvacMTFBTEczAiIgJnz55Fnz59WDabTCZotVpkZ2dDr9cjKioKQUFByMvLU7XZvn370LdvX5w+fZpllLLNlJAhkKINLl26BF9fX24zMrQVFxfzPeh9IyIicPr0aX63kydP8nzJyMjA0aNHeV3dv38/X6e1NWvixIm8ZgUGBqKhoQHx8fGwWq0oLi5GbGwsHz7m5+er1qxt27YBAM6fP8+HxjQHIyIiEB8fj6+++gp6vR5arRbbtm2DRtNULyguLo6jB3bs2MF1L+Lj4xEXFwc3Nzd8/PHHHHFYWlqKI0eOwGw249y5c2hoaEBiYiL8/PyQlJSkOugYOnSoSv4CTYdLVNeKHEA0Gg1u3LjBfU9tSWNPKX/37dsHvV6PkSNHArgTgZScnIwePXogNzcXZWVlLH91Op3KgYPW99jYWP7Mz88PFosFly5d4nXx0KFDuHr1KgwGA2JjY1mnIrnl7u6OzMxMPPXUUxyhHR8fD61Wi969e2PPnj28hr733ntwd3fnNgPupMuk31y8eFElt0hvCgkJQVlZmWoukp6sXLOat9n+/ftx4cIFmM3me7YZ6YfUZkCTIevq1avcR7t37+b70nwBmtKVBQQEcJspZS2ttXR4RhQVFbGsTUtLQ2JiInvVOzg48JrVpk0bdOrUiZ0dAgMD4ebmptLvycA4ZMgQjpJort9bLBZUVVUhPj6eZS1FddP/kpKS8Mgjj2D79u3YvHmzyvDh7OyMxx9/HFqtFgcPHmTZT+/Xtm1bfh46xFHuN/bt2wedTtdC9sfHx+PmzZvQarUtZH9SUhL3UVlZGetXtAaSHKN+0ev1PKcDAwMRFRWF7Oxsfg+TyYQ333wTWq0WycnJ3C8RERGIiIjgdTczMxOOjo7YuXMn94vVakV2dja6d+/ORtuqqiokJibi888/R3l5OSwWC9q1a8c1FtPT06HVavnwjto5JSUFer0eISEhcHNzQ25uLurq6lhmGY1GvPnmm/zdqqoqNDY28nPSIVh+fj7S09O5zajeVEFBAQICAuDh4QGLxYKYmBhotVqkpaXxZyEhIfD39+f5RzrNvn37WAbRIa6LiwuuX7/O9y4rK4PVakVJSQkb5KxWK4YPH45Lly4hICAA7u7uOHLkCGJjYwE0Ob55eHjwfSMiInhe5efnw2g04ssvv8S1a9dQVFSEIUOG4ODBg9BoNHjwwQeRkJCAfv36ITExUbVWka5oMpkQFBTE62NSUhLCwsKQlJSEfv36qaIKgSYDHEWnA00H6+Hh4dBoNCoDx5gxY5CcnIygoCCcPXsWnp6ebKggQ9auXbtQVlaG06dPo3379qitrUX//v25nlNISAjy8vJUfUXRRA0NDdi8eTPeeOMNAHccWnx8fLB582bk5ubC3d2d92iku5hMJqxYsQIAVJEq9HfqKzo8vHnzJt+bxjKtye7u7jyWP/jgA446oLEM3DFiBQUFISYmhtuRolJCQkIwcuRI1NTUsA5IuiWtPfcay3V1dQgMDFSN5ZqaGr6/s7MzLBYLGhoakJCQwDonpahU6pwajQZHjx5FWloarl27hsLCQtTU1MBoNGL8+PF4/PHH2RlJmYmDxsc333zDOp7ZbMbRo0dRWlqK9PR0jihzc3PDzZs3WbZUVlZi+/btCA4O5vWDZKfFYmFZX1ZWhi+++IJrUt28eRN+fn64evUqevXqhcuXLyMsLIz34+SwQc4WZrOZx21kZCQOHjzI89dgMHCEemBgIL7++msEBQXh1KlTXAeY+pLalMYtObgBTVFEpHM2NDTAwcEB06dPh1arRZ8+fXi8OTk5sV5KjpYJCQno27cvbty4oTIo+vv7Iz4+HkeOHEGvXr1w/fp1dhACgB07duCvf/0rG1Lr6ur4zOfo0aNwcnLiNPuBgYEoKyvDxo0beQ4BdyKod+7cyeNg165dqKurg5ubG8sKSrFNsvbQoUO8Z6CaSpRBhjLqUP/961//AtAkL2pqahAZGYnMzExef5TvHBcX16qsBe6kTKS9YWVlJc+JqqoqVFZW8vkgpXCj8TRkyBB2cNJoNDh9+jSfKyrThUdFReHixYsYPXo07+MjIiKQmZnJ8i0wMBBnzpxBaWkpp8FPSUnBgAEDWCZcvXoV3bp1Q1xcHO93jh49ys6kdAZB70P7HaWsJ93AarXy+AbAZ4qkw7i4uKjOVePi4tiYSsbES5cuITAwkHXEmpoa2Gw21hW7deuGW7du8XVtNhuMRiNKS0t57BYWFmLAgAHQaptSuGZlZbGM1+v17FDVtm1b1kXi4uLwxRdfICAgAEVFRbDZbBg7dixKS0vRtWtX3oeWlJTg2rVrHMXq5OQEu92OkJAQHgPr168H0LRXpDYj2U39dOPGDZWDJ52TJCcnIyIiAu7u7hzlW1dXx7pxXFwcunTpotpDlpeXo3Pnzjhy5Ajy8vJw/vx5dp6hfhoxYgQKCwt5P0X9k5ubi7y8PNXzKbFarfj0008RGBiI+vp61oN/7mgaf6rklYLwI1NWVoaxY8eiQ4cOeOaZZ5CXl4dXXnkF48aNw+LFi/l7v/jFL5CTk6MKk9y6dSvWr1/PqRUsFguuXr0Ko9GICRMmYPHixaivr8ekSZNw+/ZtWCwWLFy4EKtWrUJoaCj69OmDDRs2cJjvihUroNPp4OTkBIvFgpycHERHR2P37t1wdnbGq6++im3btuHs2bMs9IOCgpCRkYGXX36ZDwInT56Mffv2sTesyWTiRbm8vJzz4w4bNgxHjhxhhYpCZ7ds2YIXX3wRpaWl0Gg0GDBgAC5cuMCp4GprazF//nxERkZi2bJlOHfuHEcYURqBTp06IS8vD927d0dCQgIMBgMr0FQUub6+Hm+88QaSkpKwadMmODk5sReOEnd3d8TFxWHEiBGora3FsGHDuPghhdZrNBoMHz4clZWVOH/+PFxcXNCjRw828tD9gaa0Q3FxcZyOYcCAATh48CAyMzNRV1cHk8mEp59+Grm5uYiJiYGHhweKi4vZs1mv1/O1HB0d4ePjw5FcVquVDyyApoV79OjR7NEUFBSElJQU9pqkBTsqKgo3btxARUUFKioq0K1bN3z99dfo3Lkz5s+fjzfffJP/rtVqER0djYiICMybNw9WqxWzZs3C3r178c0336BNmzbw8/PDjRs3YLfb4e3tjby8PDg5OWH16tVYvHgx8vPz8dZbb2HBggXo0KEDSkpKkJycDL1ez0YcMkQ6ODigpqYGoaGhfCgBAH/84x/x4Ycforq6mlMJAE1K+VNPPYXGxkb06tUL7du3R2xsLM+T/Px8+Pn5cX5goOlg3tHREWlpaRg8eDBsNhtycnKwZ88ePPjggygtLUWHDh1449bQ0ICePXsiOTkZsbGxuHLlCubNmwej0Qiz2cx5600mE0wmE6xWK6Kjo5GSkoIvvviCw6MJSinZs2dPXLlyhaMjKAWXxWJBZWUlunTpgqKiIjg7O7OXo6+vL3JycrgA7JQpU7B//34MHDgQJ06cUBW7r6mp4QPHtm3borS0FMOHD0dCQgKsVisX1KVx1LdvX1y4cAEeHh7seUiHIvTcyiLIAwYMwLlz51jRNxqNnMph8ODBOH78ODQaDSIjI5GRkcGeOhMnTsTBgwdRX1+Pnj17ciqjHj16cBQcbezpML6goID7hRTZxsZGBAcH4/bt2zAYDAgICMDNmzd5U0GebZTegw78n3/+ebz99tvQarUoLS3lw+jAwEAkJydj7NixOHr0KMaMGYPY2FguKgw0Kf+hoaHcL1arFbm5ufweJNucnZ35ea9fv47g4GA+uC8pKeG2Cg4ORmZmJoYPH45Tp06xN1mbNm14M2Wz2aDX6/Hwww/j008/ZW9yFxcXWK1WaLVadOzYERkZGaisrMSDDz4IrVbLhzTk+VZSUoLRo0fj+PHj6NSpE8aPH48PPviAI6ycnZ3h7e2NkpIS3thS6kZKCUHja+jQoXjkkUfw+uuvw2q1wm63w2g0std4//79ec2hdqT2o2L3AFRrjp+fH5KTk1FfX8/XMhqNcHFxUa05AwYMwPnz5/lejz76KI4fP47x48fDbrdjz549KuMKpfkaPXo0fv3rX6OxsRGzZs1CaWkpTCYTdDodH9zRnOzTpw/69evH0R1eXl7sZevm5gZ3d3eUlZWhpKRElUpWp9PB19cX6enpfOBLMs3DwwOFhYVwcHBAaGgocnNzkZ+fj9GjRyMhIQE1NTVwcXFBaWkpy4KAgABkZWXxmufk5MRRVcCdgycHBwf+/MUXX8TGjRthsVi4b2l8U8TawYMHceXKFbz44ovo0qWLKt84rUFarRZPPPEEIiMjUVRUhCVLlqCurg4GgwHDhg3jQ41HH30UBw4c4MMcZZQGreHUZjk5ObBYLBgzZgzeffddGAwGTv2j0+nQuXNnXLt2DXq9Hq+88gqOHDmCY8eOwdXVlVOmtbZmzZ07F5WVlXj66aexdetWHve0PhqNRrRp0wb5+flwcnJCSEgIrl69yuOZ5sioUaMQExMDi8WC559/HqtWrYLNZsODDz6II0eOcEqe27dvsyPAqFGjcOLECbRv3x42m41rYg0dOhSnT5+GRtNU7/H8+fNch+vAgQPcx0OHDkV8fDw6derEm+Y+ffpwehAay+7u7hwdRu02ePBgnD17FhqNBh4eHpgzZw5eeeUVDBgwgOXvk08+icTERHz55ZfQ6XR49dVXceHCBezZswcRERH49NNP0bZtW/YAttvt8PHx4dRIpCc1NDTAbDarHG7mzJmDvLw87Nq1i9cc6ktvb29kZWXB0dERq1atwksvvcSHj3TYFxgYiKKiIk7lOmLECMTGxvJ6Qp7QU6ZMwe7du2E2m7F48WLs2LED165dg6enJ4qKiuDu7s73Juegjh078mad0gu++uqrKCoqwl/+8heW0z4+PkhPT0e3bt1w8eJF1r1CQ0ORmJh41zbz8vLC1KlT8Y9//APV1dXo2LEjCgoK+LCrpKQE1dXV8PX1ZaNOXV0dHBwc4OXlhWPHjuHAgQOYN28eR4EBTYfIdKBIqbi6du2KK1eusCymiDGz2XxXWas0NiplLckK8oo3Go1wdHREu3btUFBQgJUrV3JNCqX+SfPEbrdjypQpsNvtiImJgV6vh9lsZgPU4MGD2bFi7ty5ePPNN1FVVcUH0rW1tWhsbFTJ/ilTpnA9GpPJxPsDd3d3rFq1SrXf+Oijjzhlz5w5c7B161aMGzcOdXV1iImJgZeXF0aPHo1jx45x9LGDgwP8/f0xatQobN++nfvF3d2dawb5+fmhpKQEVqsV7dq1Q25uLrch6QAU9d65c2cUFRXB19cX586dYx2DIk6pxlN1dTXef/99/PKXv2SnJJPJhLq6Ou6bNm3aoFu3bkhMTOQ1idYjcgqirA4dO3ZESkoK7HY715BSGrXNZjNsNhvsdjtMJhPWr1+PgoICrFixgtOWUspbGmN9+/bFrVu34OLioopIU6Y+12g06NixI/Lz8zFx4kQkJSXhiy++4LFIawy1w8yZM7F582YMGjQIly9fZkOMMr3zgAEDMG3aNK4h1rzeIcm9tWvXYteuXcjJyUH79u15jnbs2BFXr17lAz5/f38kJibC3d0dM2fOxL/+9S94e3sjJSWFnS8GDx6MlJQUODg4sAHm8OHDrHP7+/sjKyuLIwkp6o2MAEqDCNCkMzU2NqKqqorna5cuXfjwvaGhAc7OztBomuoO5uXlcVpDMihTPZTg4GCkp6ezHpaYmAiz2cyRWbdv30ZoaCiKi4v5DOG1115DYmIiR/OYTCbk5OSwwYsie2/fvq2KSKU+ozS7RUVFPM9qa2tZ79doNJwetrq6GmPGjEFBQQEbWEiPJZRjmfQAahcaI2azGVarlXVW0vccHBxUaYJpLJMT2a9+9SsAwLvvvst7XrPZzJHNGk1TJoUbN26gd+/euHjxIj+XVqtVRaCazWZMmjQJGRkZuHjxImpra9nARtHubm5umDp1KrZu3YrZs2fjrbfe4kgTZcrfyMhI3LhxA/n5+fD19eVICNp3k/PHX/7yFyxbtgxhYWFsfKQxRNclR9tDhw6htraWs3GQkY+iSWfOnInY2FjY7XZUVFTw+CVdk84gPD09OTMG7S1pL0P7l+7du+Orr75inUIp8+rr6zl1ITkhmEwmNtrW1tbCy8sLc+fOxSuvvAJfX18kJiZCr9dzzcj09HTWGx599FEcPHgQXbt2xfXr11XRKRQtTHsOMjhR+3l6esJqtWLs2LE4duwYOwJ27doV165dg0ajwezZs5GYmMg165YtWwaNRsPp6KqqqritSBbT3NJomurtUV9Q1h43NzesWLECzz77LK9dzWUU0OQouWTJErz44os8Ho1GIxvZjUYjBg0ahBUrVmDSpEkcFQkAffr0gaOjIz7//HNoNBqsXbsWY8aMAdDkiHvp0iWYTCZMmzYNCQkJfAYUFxeHp556ivVRkrG1tbVskHz88cexePFizJw5ExcvXuR9XFhYGMrLy/lsYtCgQXBxccHp06c5deSyZctQXFyMdevW8bihiFC9Xs/RgNXV1ZgwYQI2btzIc23UqFE4fvw42rRpwzrE66+/jpiYGHZW69OnDyorK3H79m1UVVVh8ODBePzxx2EymTB//nzuJ9KnSKbSmkXnbqRzOjk5obS0FE5OTujUqROuX7+OwYMH49SpU7wnJxlI54eenp7QaDTslENnTqSH1dfXIygoCLdv38aIESNw4sQJ/juNa4PBAG9vb9bXQ0ND8eWXX/JY8fHxQX5+Pnr06IGXXnoJS5YsYYc6q9XKRiQAvO+12+3o1asXxo8fj3feeYfX5sGDByM/Px+FhYV89nns2DHExcVhyZIlaGhoQFVVFcLDw3k9rKioQGxsLJYsWYIzZ85w5iIHBwf4+vriypUrcHd3R48ePTBu3Dh88sknuHXrFnJzczn9P0VXlZaW8visrKzE+PHjeV8yceJEnDx5kg19cXFxuHnzJt566y3k5eWha9eucHZ2xokTJ5CVlQUXFxd4e3tjz549qtqEP1fEYCX8V5GSkoJly5bh0qVLcHJywoQJEzB37lzVZJ0xYways7PZ6xNoEo5bt27F9u3bOWexi4sLJk+erPp9Xl4eJkyYgNLSUjg7OyMqKgoLFy6Ek5MTtm7divfffx/FxcXw9fVFQ0MDcnNzWblwcXFBVFQUCgsLkZCQAJ1OBx8fH6SlpcFms2Hw4MFYtGgRQkJCcPz4cSxZsoQt9CaTiQW4o6MjSkpK4OjoCE9PT06ro9VqebOs1+uxceNGjBgxAikpKZgzZw57jQNN3jWvv/46nnjiCcyfPx9Wq5W9Y5rj7u6OiRMnoqioiDfrzZk4cSJeffVVNDY24qGHHuJ0C0rCwsKwZs0ahISEIDo6GteuXeP6KgR5WdlsNnTp0gVRUVF48803W4RMGwwGVgxMJhOngyspKWm1gKSrqyt+9atf4ezZs2ywU0KHoX379kW/fv2wZ88eVowdHR0xdOhQlJSU4KuvvoJW21RfRVmnym63w9/fnxX57OxsTs9iNBo5tWR1dTV69+6NgoICmM1mzJo1C2+88QbS0tLg6ekJs9mM7Oxs2O12NgIojWtarZYVGEopYjabcebMGR7758+fZ88cer7Ro0fjoYcewpIlS1ShxZQb22QyISoqCunp6fjmm29Uc+P48eNcGwRoUvpfe+01bNu2DV9++SWHRJNySWlcRo0ahZUrV+L3v/89z7eUlBTMnTuX0wrShq1bt25YsGABp1nYuXMn1qxZ0yIHuclkwtSpUzF//nzU1tZiwYIFOHnypMpbk5QRFxcXBAUFIT09nVMX0EEUAJ5vFosF0dHRdy22O2zYMPz973/HqVOn8Kc//alVQ2ynTp0wb948DBkyBGvXrsW2bdt4I+jg4MDt4+fnh7KyMpSWlrKyrRz/QNN8o0M+KvhNBxvkVT137lx8/vnnKhnRHA8PDz7ArK+vZ8OCt7c3KioqUFVVBaPRiKNHj6KyslLVL4RWq0VAQACmTZuGadOmISYmptV+AZrC+F9//XX86U9/4rmjxNXVFfPnz8fkyZNZVpL3EG3KDQYD+vXrx/3yxBNPqIyRzZ+tTZs26N27NzIyMjjVCOXqB5oitubPn48hQ4ZgzZo12LZtW4v2bn5NBwcHnnf0mcFg4JQuN2/eVB0yNm/zXr16ISMjo0X6JGL69Ol46aWXsGbNGnzwwQfct62xcuVK7N69mw8HjEYjxo4di0WLFqnWHBoDyjpTAFqsOdQ21AZ9+/bF0qVLec1ZuHBhi9QDxOHDh+Hn54fly5djz549dy3k2r9/f/z1r3/F5MmTVamHyHMuKCgIWVlZKCkpgaenJx9iAnfWCIvFgmXLluHTTz9VGfKApjWif//+aGhowMWLF1XvQweetbW17N362WefYe3atdi9e3eLdYQ2uTNnzuS6jMrxodU25UR/6KGHkJSUhKSkJLi7u3PaW/Igpucj2UhybPfu3di6dStu376tegej0Yjo6GjMnz8fDz/8cKvzxWQy4f/+7/8wdepUTJs2DZcvX2517CrbjHQTAK2ugzqdDgEBAWhoaMA333wDT09P5OTkwN3dHeXl5Xdds8LDw+Hq6opz586hpqaG1yZqI6BprFGqqHsVQyfIG99gMCApKYkNrTS36DDRZDKxsSI3N5flJh0eODo6oqqqCh4eHqwblZeX8xqq0+lU6fWUODg4sNMNQYd+5EQzcOBAnDlzBhUVFfDw8GCdsrn8dXNzg8lkQlFRETp06IAXXngBQ4YMwfLly/Hxxx+zjKN1UqfTYfjw4Zg5cybWrFmDK1eutNpnzfsPuOPB7OnpyTUy6MD7busBjVODwQCTydRqPVb6XkBAAB9eK+WTwWBgw4Oyvk9YWBj8/f2RkJAAvV6vajM6ZK+vr2ddWJmW6NvaTLlm0eE2RbuTgwIAdpIKDAyEzWZjHWbnzp1YvXp1i7lP0Qnr1q2Dr68vFixYgBMnTqhkyaOPPvq9Ze1nn312V11i7ty5mDVr1veStR9++GELnWrZsmU4deoUDAYD6zY055SyVin7yUud2l6pjzffb9BhnLOzM493AC3Gcmtjh2qzKQ1I5OCi0+mwYMECHD9+vIWeSvLgb3/7G9asWYPs7GzExsa26BegaY527doVixYtQq9evXD79m08++yzLfQXBwcHjB07FgsXLuS9W2trt7OzM1566SXExsZyCru7odFoEBISguXLl+P06dN33TcZDAY88sgjWLx4MfLy8rBs2TLeeyjnuYeHB1544QXExsYiOTkZHTt2xM2bN9lRS3lfap8HHngAmzdvxrp161q999ChQ7Fu3ToUFRXhpZdewtWrV1u8t6enJ+bMmYMpU6bwnjg2NhbLly9nJ0C6LxnBH374Ybi5uXFUOBk8APBe5MEHH8SCBQvw/vvv46233mJDJBlBGxoa+N908N/82eierq6uHIlDhiiC9HmNRoNf/vKXePbZZ7Fp0ybExMS0aDtHR0f069cPs2fPhpOTE++TaA5rNBr07t2bo/npDMHR0RHOzs68n3VwcMCsWbOQkJCAW7dusRG5NV2oc+fOWLp0KY/lxx577K5jpXv37li0aBGSk5Oxc+dOJCcnq9YsirZTjuU///nPXD9Uia+vL5599lnExsbi8uXLcHFxgYeHR4u5Qdf18/Pj9L93W7dpLDc0NHDKvrvh5OSE6OhozJ07F7W1tVi+fDk70AFNczc8PBwrVqxAcHAwy1VKeUfrDUUAlpSUwGw2o6GhodV9h06nw6ZNm/iwe+3atUhOTm6xloaHh2Pjxo1wc3NT7dGU7zho0CB06NABR44cQVFRERu3lftGAJx+k+oNUipk5Z577969yMjIwNq1a5GSkoLGxsYWz6TVatGhQwe0bdsWly9fvqueAjQ5dYWEhODChQuqurSOjo6Ijo7GP//5T8yfPx8uLi7YunVrq2nAHB0dERgYiIKCAjZ4klx1c3PDY489hrlz56K4uJijo1tjy5YtGDFiBGJiYrB8+XKVjkBnFY6OjjwPmxs0WyM2NhbPPPMMOxIq26h///5YsmQJgoODERMTg2XLlqkce5Sy9tq1a5g5c2ar99Dr9fjb3/7GJR0A4LHHHkNeXh6qqqrYKYfGZ0hICObMmaM65yBMJhOefPJJdq6dMWMGMjMz0bNnT5w8eVI13qlGV5cuXbBgwQL85S9/QXp6uqqNmuPk5IS3334bgYGBWLlyJT755BMulUHjxGQyYfjw4aro8uZQhPWoUaNw9epVLvvg7OwMnU6H4uJi7hsyKLm6umLixImYO3cuYmNjsXTpUtV68Ktf/Qpz5szBO++8g/feew95eXmt3tvDw4PnCNUoa419+/YhKysLb7zxBqdWVD6TyWRCZGQkXnzxRezcuRN79+6961kf0JSuMyAgAMePH2+xv27Tpg0mTJiAcePGYfXq1ap1QImXlxeCgoL47GrkyJGt7tO8vLywfv169OrVix2BW2Py5MlYuHAhkpOTsXr1aty6dYvrcdJ7+vr64g9/+INqfN6+fRvLli1DfHw819vs1asXli1bhpCQEGRkZGDp0qW4dOkSn01ptVr4+flh3Lhx+M1vfnPXGsk/N8RgJQiCIAiCIAiCIAiCIAiCIAiCINxXpIaVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIws+A0NBQbNiw4Xv95ty5cwgNDcW5c+d+pKcSBEEQBEEQBEH4zyAGK0EQBEEQBEEQBEH4AVy8eBEbNmxAeXn5/X4UQRAEQRAEQfjZo2lsbGy83w8hCIIgCIIgCIIg3Jva2lrodDro9frv/Bu73Y76+noYDAZoteKv+J/m7bffxqpVq3DixAn4+/vf78cRBEEQBEEQhJ81smMRBEEQBEEQBEH4/xS73Y7a2loAgMlk+l7GKgDQarUwmUw/a2NVY2Mjampq7vdjCIIgCIIgCILwI/Pz3bUIgiAIgiAIgiD8TNiwYQNCQ0ORkpKC559/Hr1798aAAQOwfPlyNkgBTXWqli5din379mHs2LHo0aMHTp06xX9rXsMqLy8PCxcuxNChQ9G9e3eMHDkSS5YsQV1dHYDWa1jNmDEDjz76KJKTkzFjxgz07NkTw4YNwz/+8Y8Wz52dnY1Zs2bhgQcewKBBg7BixQqcOnXqe9fF+uijjxAaGorz589j8eLFGDBgAHr37o358+ejrKxM9d2RI0fimWeewalTpzBp0iSEh4dj165dAIDbt2/jueeeQ//+/dGzZ09MnToVn376qer39M5xcXHYuHEjhg0bhl69euG5555DRUUF6urq8PLLL2PQoEHo1asXFixYwO3VWj889NBD6NGjByZNmoTz58/zdzZs2IBVq1YBAEaNGoXQ0FCEhoYiKyvrO7eLIAiCIAiCIAh3+H7ueYIgCIIgCIIgCMIPZs6cOfDz88O8efNw+fJlvPvuuygvL2fDBwCcPXsWhw4dwvTp0+Hm5gY/P79Wr5WXl4cpU6agoqICU6dORXBwMPLy8nDkyBHU1NTAaDTe9TnKysrw29/+FlFRURgzZgyOHDmC119/HZ07d0ZkZCQAwGq14he/+AUKCgowc+ZMtG3bFgcOHPhehqrmLF26FC4uLpg9ezbS0tKwc+dO5OTk4N1334VGo+HvpaWlYd68eYiOjsbUqVPRoUMHFBYW4oknnkB1dTVmzJgBNzc3fPzxx/j973+P9evXIyoqSnWvrVu3wsHBAb/73e+QkZGBHTt2QK/XQ6PRoLy8HLNnz8aVK1fw0Ucfwc/PD7Nnz1b9/vz584iLi8OMGTNgNBqxc+dO/Pa3v8Xu3bvRuXNnREVFIT09HQcOHMCCBQvg5uYGAHB3d//B7SMIgiAIgiAI/8uIwUoQBEEQBEEQBOEnwt/fH5s3bwYATJ8+HRaLBe+//z5+/etfIywsDECTsWb//v3o2LHjPa+1Zs0aFBYW4sMPP0SPHj348+effx7fVqo4Pz8fr776KiZOnAgAmDJlCkaOHImYmBg2WH3wwQe4ffs2Nm3ahNGjRwMAnnjiCf7ND8FgMOCdd96BwWAAAPj6+uK1117DyZMnMWrUKP5eRkYG3nrrLQwbNow/W7FiBQoLC/Hee++hb9++AIDHH38c48ePx8qVKzFq1ChV6sOGhga8++67fK+SkhIcPHhQFU02ffp0ZGZm4qOPPmphsLp16xZiYmLQvXt3AMDYsWPx8MMPY/369di4cSPCwsLQtWtXHDhwAKNHj5YaVoIgCIIgCILwbyIpAQVBEARBEARBEH4ipk+frvr3U089BQD4/PPP+bN+/fp9q7HKbrfj+PHjGDFihMpYRSijlVrD0dEREyZM4H8bjUb06NEDt2/f5s9OnToFb29vlSHJZDJh6tSp97z2vYiOjmYDEgA8+eST0Ov1+Oyzz1Tf8/f3VxmrAOCzzz5DeHg4G6sAwMnJCdHR0cjOzkZycrLq+xMmTFDdKzw8HI2NjZg8ebLqe+Hh4cjNzYXNZlN93qtXLzZWAU3GtVGjRuH06dNoaGj4nm8uCIIgCIIgCMK3IQYrQRAEQRAEQRCEn4jAwEDVv9u3bw+tVquqe/RdInWKi4tRWVmJTp06/aDn8PHxaWHUcnV1VdWTys7ORvv27Vt8r3379j/onkDL93dycoKnpyeys7NVn7fWBjk5OejQoUOLz4ODg/nvSnx9fVX/dnZ2BgC0a9euxed2ux0VFRX3fFYACAoKQnV1NYqLi1v8TRAEQRAEQRCEfw8xWAmCIAiCIAiCINwnWouEcnBw+NHvq9PpfvR7/Dv8J9pAmR7wu3z+bWkUBUEQBEEQBEH4cRGDlSAIgiAIgiAIwk9ERkZGi3/b7fbvXf/I3d0dFosFSUlJ/8nHU+Hn54fMzMwWhpzMzMwffM3m719VVYWCggL4+fl96299fX2RlpbW4vPU1FT++3+S5s8KAOnp6TCbzXB3dwfw7akXBUEQBEEQBEH47ojBShAEQRAEQRAE4SfivffeU/17x44dAICIiIjvdR2tVovRo0fjk08+wdWrV1v8/T8RLTR06FDk5eXhxIkT/FltbS0+/PDDH3zNDz74APX19fzvnTt3wmazfaf3j4yMxFdffYVLly7xZ1arFR9++CH8/Py+te7X9+XSpUu4du0a/zs3NxcnTpzAkCFDOELNbDYDQIt0goIgCIIgCIIgfH/09/sBBEEQBEEQBEEQ/lfIysrCrFmzMGzYMFy+fBn79u3Do48+irCwsO99rRdeeAHx8fGYMWMGpk6dipCQEBQUFODw4cN4//334eLi8m89a3R0NHbs2IF58+Zh5syZ8PT0xP79+2EymQD8sOii+vp6/PKXv8SYMWOQlpaG999/H3369MGoUaO+9be/+93vcPDgQTz99NOYMWMGXF1dsXfvXmRlZWHDhg13TfX3Q+ncuTN+85vfYMaMGTAajdi5cycA4I9//CN/p1u3bgCAtWvX4pFHHoHBYMCIESPg6Oj4H30WQRAEQRAEQfhfQAxWgiAIgiAIgiAIPxFvvPEG1q1bh9WrV0Ov1+Opp57C/Pnzf9C1vL298eGHH2LdunXYv38/Kisr4e3tjYiIiP9IDSgnJyds27YNy5cvx/bt2+Ho6IiJEyeiV69e+OMf/8iGq+/D4sWLsX//fqxfvx719fUYO3YsFi1a9J2MX23btsWuXbvw2muvYceOHaitrUVoaCi2bNmC4cOH/4A3vDf9+vXDAw88gE2bNiEnJwcdO3bEypUrVcbF8PBwPP/889i1axdOnToFu92OEydOiMFKEARBEARBEH4AmkapLCsIgiAIgiAIgvCjsmHDBmzcuBFnzpzh+kc/V9555x2sXLkSn3/+Oby9vb/Tbz766CMsWLAAe/bsQY8ePX7kJ/z3CQ0NxfTp07F48eL7/SiCIAiCIAiC8D+D1LASBEEQBEEQBEEQWqWmpkb179raWnzwwQcICgr6zsYqQRAEQRAEQRCE74KkBBQEQRAEQRAEQRBaZfbs2fD19UVYWBgqKyuxb98+pKam4vXXXwfQZNCqqKi45zVcXV1/ikcVBEEQBEEQBOFnjhisBEEQBEEQBEEQhFYZOnQo9uzZg/3796OhoQEdO3bE2rVr8cgjjwAA4uLisGDBgnteY/v27T/FowqCIAiCIAiC8DNHalgJgiAIgiAIgiAIP4j8/HwkJyff8zvdunWTKCtBEARBEARBEL4VMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xXt/X4AQRAEQRAEQRAEQRAEQRAEQRAE4X8bMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI95X/B/V7Nk7OqfW/AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df[[\"pricing_prompt\", \"pricing_completion\"]].plot.scatter(\n", + " x=\"pricing_prompt\", y=\"pricing_completion\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for /: 'str' and 'str'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:218\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 218\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:242\u001b[0m, in \u001b[0;36mevaluate\u001b[0;34m(op, a, b, use_numexpr)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_numexpr:\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# error: \"None\" not callable\u001b[39;00m\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_str\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _evaluate_standard(op, op_str, a, b)\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:73\u001b[0m, in \u001b[0;36m_evaluate_standard\u001b[0;34m(op, op_str, a, b)\u001b[0m\n\u001b[1;32m 72\u001b[0m _store_test_result(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[46], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprice_ratio\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_prompt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/common.py:76\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer..new_method\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[1;32m 74\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/arraylike.py:210\u001b[0m, in \u001b[0;36mOpsMixin.__truediv__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__truediv__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__truediv__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtruediv\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/series.py:6135\u001b[0m, in \u001b[0;36mSeries._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 6133\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_arith_method\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, op):\n\u001b[1;32m 6134\u001b[0m \u001b[38;5;28mself\u001b[39m, other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_align_for_op(other)\n\u001b[0;32m-> 6135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexOpsMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/base.py:1382\u001b[0m, in \u001b[0;36mIndexOpsMixin._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 1379\u001b[0m rvalues \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(rvalues\u001b[38;5;241m.\u001b[39mstart, rvalues\u001b[38;5;241m.\u001b[39mstop, rvalues\u001b[38;5;241m.\u001b[39mstep)\n\u001b[1;32m 1381\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(\u001b[38;5;28mall\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 1382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_construct_result(result, name\u001b[38;5;241m=\u001b[39mres_name)\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:283\u001b[0m, in \u001b[0;36marithmetic_op\u001b[0;34m(left, right, op)\u001b[0m\n\u001b[1;32m 279\u001b[0m _bool_arith_check(op, left, right) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 281\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_na_arithmetic_op\" has incompatible type\u001b[39;00m\n\u001b[1;32m 282\u001b[0m \u001b[38;5;66;03m# \"Union[ExtensionArray, ndarray[Any, Any]]\"; expected \"ndarray[Any, Any]\"\u001b[39;00m\n\u001b[0;32m--> 283\u001b[0m res_values \u001b[38;5;241m=\u001b[39m \u001b[43m_na_arithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res_values\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:227\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_cmp \u001b[38;5;129;01mand\u001b[39;00m (\n\u001b[1;32m 221\u001b[0m left\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(right, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[1;32m 222\u001b[0m ):\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# Don't do this for comparisons, as that will handle complex numbers\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# incorrectly, see GH#32047\u001b[39;00m\n\u001b[0;32m--> 227\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43m_masked_arith_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:163\u001b[0m, in \u001b[0;36m_masked_arith_op\u001b[0;34m(x, y, op)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# See GH#5284, GH#5035, GH#19448 for historical reference\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[0;32m--> 163\u001b[0m result[mask] \u001b[38;5;241m=\u001b[39m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43myrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_scalar(y):\n", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'" + ] + } + ], + "source": [ + "df[\"price_ratio\"] = df[\"pricing_completion\"] / df[\"pricing_prompt\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# df[\"total_price\"] =" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py new file mode 100644 index 000000000..c94786208 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py @@ -0,0 +1,118 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# CONTENTS: +# - [Description](#description) + +# %% [markdown] +# +# # Description +# +# This notebook examines ... + +# %% +# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet jupyterlab-vim)" +# #!jupyter labextension enable + +# %% +# %load_ext autoreload +# %autoreload 2 + +import logging + +import helpers.hdbg as hdbg +import helpers.henv as henv + +# %% +print(henv.get_system_signature()[0]) + +hnotebook.config_notebook() + +# %% +# hdbg.init_logger(verbosity=logging.DEBUG) +hdbg.init_logger(verbosity=logging.INFO) +# hdbg.test_logger() +_LOG = logging.getLogger(__name__) + +# %% +# !sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet openai requests)" + +# %% +import helpers.hllm as hllm +import helpers.hpandas as hpandas + +# %% +val = hllm.get_model_stats() + +# %% +import pprint + +pprint.pprint(val[0]) + +# %% +import pandas as pd + +# %% +# Normalize the nested JSON +df = pd.json_normalize(val, sep="_") +df +# View the resulting DataFrame +# print(df.T) # Transpose just for readable vertical inspection + +# %% +df.iloc[0].T + +# %% +col_names = ["id", "context_length", "pricing_prompt", "pricing_completion"] + +# %% +df.dtypes + +# %% [markdown] +# # + +# %% +for col in df.columns: + print(hpandas.infer_column_types(df[col])) + +# %% +df.apply(lambda x: pd.Series(hpandas.infer_column_types(x))).T + +# %% +hpandas.infer_column_types_df(df) + + +# %% +pd.to_numeric(df["pricing_request"], errors="coerce").notna() + +# %% +df["pricing_completion"] + +# %% +df.sort_values("pricing_prompt")[col_names] + +# %% +df[["pricing_prompt", "pricing_completion"]].plot.scatter( + x="pricing_prompt", y="pricing_completion" +) + +# %% +df["price_ratio"] = df["pricing_completion"] / df["pricing_prompt"] + +# %% + +# %% +# df["total_price"] = diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb new file mode 100644 index 000000000..60491a1c6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb @@ -0,0 +1,993 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-07T22:25:23.663978Z", + "start_time": "2020-06-07T22:25:23.661756Z" + } + }, + "source": [ + "# Description" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:30:40.920362Z", + "start_time": "2020-06-09T19:30:40.864535Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline\n", + "\n", + "import json\n", + "import logging\n", + "\n", + "import jsonpickle\n", + "import jsonpickle.ext.pandas as jsonpickle_pandas\n", + "\n", + "jsonpickle_pandas.register_handlers()\n", + "\n", + "import pandas as pd # noqa: E402\n", + "\n", + "import helpers.hdbg as hdbg # noqa: E402\n", + "import helpers.henv as henv # noqa: E402\n", + "import helpers.hplayback as hplayba # noqa: E402" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:30:43.871255Z", + "start_time": "2020-06-09T19:30:43.739350Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Logger already initialized: skipping\n", + "# Packages\n", + " python: 3.7.6\n", + " gluonnlp: 0.9.1\n", + " gluonts: 0.5.0\n", + " joblib: 0.15.1\n", + " mxnet: 1.6.0\n", + " numpy: 1.18.4\n", + " pandas: 1.0.3\n", + " pyarrow: 0.17.1\n", + " scipy: 1.4.1\n", + " seaborn: 0.10.1\n", + " sklearn: 0.23.1\n", + " statsmodels: 0.11.1\n", + "# Last commits:\n", + " * 268f2f1 saggese PTask2231: Checkpoint ( 2 days ago) Sun Jun 7 20:58:52 2020 (HEAD -> PTask2231_Playback_approach_for_unit_testing, origin/PTask2231_Playback_approach_for_unit_testing)\n", + " * 7025106 pavel-... PTask2291: Add args, kwargs. New tests ( 6 days ago) Wed Jun 3 11:38:56 2020 \n", + " * 60e0b11 saggese PTask2291: Add leftover files ( 10 days ago) Sat May 30 10:06:29 2020 \n" + ] + } + ], + "source": [ + "hdbg.init_logger(verbosity=logging.INFO)\n", + "\n", + "_LOG = logging.getLogger(__name__)\n", + "\n", + "_LOG.info(\"%s\", henv.get_system_signature()[0])\n", + "\n", + "hnotebook.config_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:53:12.564104Z", + "start_time": "2020-06-09T19:53:12.513350Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Product Price\n", + "hello \n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200\n" + ] + } + ], + "source": [ + "data = {\n", + " \"Product\": [\"Desktop Computer\", \"Tablet\", \"iPhone\", \"Laptop\"],\n", + " \"Price\": [700, 250, 800, 1200],\n", + "}\n", + "\n", + "df = pd.DataFrame(data, columns=[\"Product\", \"Price\"])\n", + "df.index.name = \"hello\"\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:56:24.324137Z", + "start_time": "2020-06-09T19:56:24.279767Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Product': hello\n", + " 0 Desktop Computer\n", + " 1 Tablet\n", + " 2 iPhone\n", + " 3 Laptop\n", + " Name: Product, dtype: object,\n", + " 'Price': hello\n", + " 0 700\n", + " 1 250\n", + " 2 800\n", + " 3 1200\n", + " Name: Price, dtype: int64}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# df.to_json(orient=\"\")\n", + "df.to_dict(orient=\"series\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:58:05.848188Z", + "start_time": "2020-06-09T19:58:05.747808Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\"pd.DataFrame({'Product': ['Desktop Computer', 'Tablet', 'iPhone', 'Laptop'], 'Price': [700, 250, 800, 1200]})\"" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hplayba.to_python_code(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:58:31.870465Z", + "start_time": "2020-06-09T19:58:31.822189Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProductPrice
0Desktop Computer700
1Tablet250
2iPhone800
3Laptop1200
\n", + "
" + ], + "text/plain": [ + " Product Price\n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame.from_dict(\n", + " {\n", + " \"Product\": [\"Desktop Computer\", \"Tablet\", \"iPhone\", \"Laptop\"],\n", + " \"Price\": [700, 250, 800, 1200],\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T20:14:52.983985Z", + "start_time": "2020-06-09T20:14:52.861966Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Initialize values for unit test.\n", + "dummy_0 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", + "dummy_0 = jsonpickle.decode(dummy_0)\n", + "dummy_1 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", + "dummy_1 = jsonpickle.decode(dummy_1)\n", + "# Call function.\n", + "act = F(dummy_0, dummy_1)\n", + "# Create expected value of function output.\n", + "exp = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop ComputerDesktop Computer,1400\\nTabletTablet,500\\niPhoneiPhone,1600\\nLaptopLaptop,2400\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", + "exp = jsonpickle.decode(exp)\n", + "# Check.\n", + "assert act.equals(exp)\n" + ] + } + ], + "source": [ + "use_playback = True\n", + "\n", + "\n", + "def F(a, b):\n", + " if use_playback:\n", + " playback = Playback(\"assert_equal\", \"F\", a, b)\n", + " playback.start()\n", + " c = a + b\n", + " if use_playback:\n", + " output = playback.end(c)\n", + " res = output\n", + " else:\n", + " res = c\n", + " return res\n", + "\n", + "\n", + "a = df\n", + "b = df\n", + "print(F(a, b))" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T20:20:24.981307Z", + "start_time": "2020-06-09T20:20:24.839197Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'[3, 3, ]'" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hplayba.to_python_code([\"3\", 3])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:30:54.111194Z", + "start_time": "2020-06-09T19:30:54.046499Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# obj1=\n", + " Product Price\n", + "hello \n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200\n", + "class=\n", + "# frozen=\n", + "{\n", + " \"meta\": {\n", + " \"dtypes\": {\n", + " \"Price\": \"int64\",\n", + " \"Product\": \"object\"\n", + " },\n", + " \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"\n", + " },\n", + " \"py/object\": \"pandas.core.frame.DataFrame\",\n", + " \"txt\": true,\n", + " \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\"\n", + "}\n", + "# obj2=\n", + " Product Price\n", + "hello \n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200\n", + "class=\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProductPrice
hello
0Desktop Computer700
1Tablet250
2iPhone800
3Laptop1200
\n", + "
" + ], + "text/plain": [ + " Product Price\n", + "hello \n", + "0 Desktop Computer 700\n", + "1 Tablet 250\n", + "2 iPhone 800\n", + "3 Laptop 1200" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hplayba.round_trip_convert(df, logging.INFO)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-07T22:32:12.623139Z", + "start_time": "2020-06-07T22:32:12.577435Z" + } + }, + "outputs": [], + "source": [ + "hplayba.round_trip_convert(\"hello\", logging.INFO)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:45:27.442281Z", + "start_time": "2020-06-09T19:45:27.380299Z" + } + }, + "outputs": [], + "source": [ + "def F(a, b):\n", + " return a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2020-06-09T19:45:36.907940Z", + "start_time": "2020-06-09T19:45:36.861549Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# Initialize values for unit test.\n", + "dummy_0 = r\"3\"\n", + "dummy_0 = jsonpickle.decode(dummy_0)\n", + "dummy_1 = r\"2\"\n", + "dummy_1 = jsonpickle.decode(dummy_1)\n", + "# Call function.\n", + "act = F(dummy_0, dummy_1)\n", + "# Create expected value of function output.\n", + "exp = r\"5\"\n", + "exp = jsonpickle.decode(exp)\n", + "# Check.\n", + "assert act == exp\n", + "\n", + "\n", + "# #############################################################################\n", + "# Playback\n", + "# #############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:31:00.704146Z", + "start_time": "2020-05-29T18:31:00.695276Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "class Playback:\n", + " # def __init__(self, file_name, mode, *args, **kwargs):\n", + " # self.args = args\n", + " # self.kwargs = kwargs\n", + " def __init__(self, file_name, mode, func_name, a, b):\n", + " self.a = a\n", + " self.b = b\n", + "\n", + " def start(self):\n", + " self.a_json = jsonpickle.encode(self.a)\n", + " self.b_json = jsonpickle.encode(self.b)\n", + "\n", + " def end(self, ret):\n", + " self.ret_json = jsonpickle.encode(ret)\n", + " output = []\n", + " output.append(\"# Initialize values for unit test.\")\n", + " output.append(\"a = %s\" % jsonpickle.decode(self.a_json))\n", + " output.append(\"b = %s\" % jsonpickle.decode(self.b_json))\n", + " output.append(\"# Apply values.\")\n", + " output.append(\"act = F(a, b)\")\n", + " output.append(\"exp = %s\" % jsonpickle.decode(self.ret_json))\n", + " # output.append(\"self.assertEqual(act, exp)\")\n", + " # output.append(\"assert act == exp\")\n", + " output = \"\\n\".join(output)\n", + " print(\"output=\", output)\n", + "\n", + "\n", + "# def F(a: int, b: int):\n", + "# c = {}\n", + "# c[\"pavel\"] = a + b\n", + "# return c\n", + "\n", + "\n", + "def F(a: int, b: int):\n", + " playback = Playback(\"\", \"\", \"F\", a, b)\n", + " playback.start()\n", + " c = {}\n", + " c[\"pavel\"] = a + b\n", + " playback.end(c)\n", + " return c\n", + "\n", + "\n", + "res = F(3, 4)\n", + "print(res)\n", + "\n", + "\n", + "# #############################################################################\n", + "# Playback\n", + "# #############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:55:14.350318Z", + "start_time": "2020-05-29T18:55:14.319820Z" + } + }, + "outputs": [], + "source": [ + "class Playback: # noqa: F811\n", + " # def __init__(self, file_name, mode, *args, **kwargs):\n", + " # self.args = args\n", + " # self.kwargs = kwargs\n", + " def __init__(self, file_name, mode, func_name, a, b):\n", + " self.a = a\n", + " self.b = b\n", + "\n", + " def start(self):\n", + " self.a_json = jsonpickle.encode(self.a)\n", + " self.b_json = jsonpickle.encode(self.b)\n", + "\n", + " def end(self, ret):\n", + " self.ret_json = jsonpickle.encode(ret)\n", + " output = []\n", + " output.append(\"# Initialize values for unit test.\")\n", + " # output.append(\"a = %s\" % jsonpickle.decode(self.a_json))\n", + " # output.append(\"b = %s\" % jsonpickle.decode(self.b_json))\n", + " output.append(f\"a = r'{self.a_json}'\")\n", + " output.append(\"a = jsonpickle.decode(a)\")\n", + " output.append(f\"b = r'{self.b_json}'\")\n", + " output.append(\"b = jsonpickle.decode(b)\")\n", + " output.append(\"# Apply values.\")\n", + " # output.append(\"act = F(a, b)[1]\")\n", + " output.append(\"act = F(a, b)\")\n", + " output.append(f\"exp = r'{self.ret_json}'\")\n", + " output.append(\"exp = jsonpickle.decode(exp)\")\n", + " # output.append(\"self.assertEqual(act, exp)\")\n", + " output.append(\"assert act.equals(exp)\")\n", + " # output.append(\"assert act == exp\")\n", + " output = \"\\n\".join(output)\n", + " return output\n", + "\n", + "\n", + "# def F(a: int, b: int):\n", + "# c = {}\n", + "# c[\"pavel\"] = a + b\n", + "# return c\n", + "\n", + "use_playback = True\n", + "\n", + "\n", + "def F(a: pd.DataFrame, b: pd.DataFrame):\n", + " if use_playback:\n", + " playback = Playback(\"\", \"\", \"F\", a, b)\n", + " playback.start()\n", + " # c = {}\n", + " # c[\"pavel\"] = a + b\n", + " c = a + b\n", + " if use_playback:\n", + " output = playback.end(c)\n", + " res = output, c\n", + " else:\n", + " res = c\n", + " return res\n", + "\n", + "\n", + "a = pd.DataFrame({\"Price\": [700, 250, 800, 1200]})\n", + "b = pd.DataFrame({\"Price\": [1, 1, 1, 1]})\n", + "\n", + "res = F(a, b)\n", + "output = res[0]\n", + "print(output)\n", + "exec(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:51:02.968918Z", + "start_time": "2020-05-29T18:51:02.964513Z" + } + }, + "outputs": [], + "source": [ + "# Initialize values for unit test.\n", + "a = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}'\n", + "a = jsonpickle.decode(a)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:44:10.790106Z", + "start_time": "2020-05-29T18:44:10.779459Z" + } + }, + "outputs": [], + "source": [ + "a = pd.DataFrame({\"Price\": [700, 250, 800, 1200]})\n", + "\n", + "# round_trip(a)\n", + "frozen = jsonpickle.encode(a)\n", + "print(frozen)\n", + "print(f\"frozen2 = '{frozen}'\")\n", + "# print(\"frozen = '%s'\" % frozen)\n", + "assert 0\n", + "#\n", + "print(\"frozen=\")\n", + "print(json_pretty_print(frozen)) # noqa: F821\n", + "#\n", + "obj2 = jsonpickle.decode(frozen)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:49:44.390404Z", + "start_time": "2020-05-29T18:49:44.384524Z" + } + }, + "outputs": [], + "source": [ + "frozen2 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}'\n", + "print(frozen2)\n", + "# print(\"\\n\")\n", + "# print(frozen)\n", + "if False and isinstance(frozen2, str):\n", + " # print(frozen2[61])\n", + " # assert 0\n", + " frozen2 = json.loads(frozen2)\n", + " print(frozen2)\n", + "frozen2 = jsonpickle.decode(frozen2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:40:34.682031Z", + "start_time": "2020-05-29T18:40:34.668987Z" + } + }, + "outputs": [], + "source": [ + "a = \"\"\"{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}\"\"\"\n", + "a = jsonpickle.decode(a)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:39:18.728676Z", + "start_time": "2020-05-29T18:39:18.711958Z" + } + }, + "outputs": [], + "source": [ + "# Initialize values for unit test.\n", + "a = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", + "a = jsonpickle.decode(a)\n", + "b = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n1\\n1\\n1\\n1\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", + "b = jsonpickle.decode(b)\n", + "# Apply values.\n", + "act = F(a, b)\n", + "exp = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n701\\n251\\n801\\n1201\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", + "exp = jsonpickle.decode(exp)\n", + "assert act == exp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:27:30.781670Z", + "start_time": "2020-05-29T18:27:30.777539Z" + } + }, + "outputs": [], + "source": [ + "# Initialize values for unit test.\n", + "a = 3\n", + "b = 4\n", + "# Apply values.\n", + "act = F(a, b)\n", + "exp = {\"pavel\": 7}\n", + "assert act == exp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:10:03.802405Z", + "start_time": "2020-05-29T18:10:03.790642Z" + }, + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "df2 = round_trip(df) # noqa: F821\n", + "\n", + "\n", + "# #############################################################################\n", + "# Thing\n", + "# #############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T21:48:14.394447Z", + "start_time": "2020-05-11T21:48:14.384307Z" + } + }, + "outputs": [], + "source": [ + "class Thing:\n", + " def __init__(self, name):\n", + " self.name = name\n", + "\n", + "\n", + "obj = Thing(\"Awesome\")\n", + "\n", + "round_trip(obj) # noqa: F821" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T19:58:38.314059Z", + "start_time": "2020-05-11T19:58:38.309331Z" + } + }, + "outputs": [], + "source": [ + "def test(a: int, b: int):\n", + " print(round_trip(a)) # noqa: F821\n", + "\n", + "\n", + "test(\"strunz\", 6)\n", + "test(4, 6)\n", + "test([\"hello\"], 6)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T14:03:41.315868Z", + "start_time": "2020-05-11T14:03:41.311264Z" + } + }, + "outputs": [], + "source": [ + "df.index.dtype #" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T14:03:00.632566Z", + "start_time": "2020-05-11T14:03:00.623714Z" + } + }, + "outputs": [], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-29T18:16:34.748252Z", + "start_time": "2020-05-29T18:16:34.736249Z" + } + }, + "outputs": [], + "source": [ + "# import io\n", + "# import io.StringIO\n", + "# from io import StringIO\n", + "\n", + "# output = StringIO.StringIO()\n", + "\n", + "orient = \"columns\"\n", + "# orient = \"split\"\n", + "# orient = \"records\"\n", + "# orient = \"table\"\n", + "df_as_str = df.to_json(orient=orient)\n", + "\n", + "# split\n", + "# records\n", + "# index\n", + "# values\n", + "# table\n", + "# columns (the default format)\n", + "\n", + "python_code = []\n", + "target_var = \"df_as_str\"\n", + "python_code.append(f\"{target_var} = {df_as_str}\")\n", + "python_code.append(f\"{target_var}.index.name = '{df.index.name}'\")\n", + "python_code = \"\\n\".join(python_code)\n", + "print(python_code)\n", + "\n", + "exec(python_code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-05-11T13:42:52.575973Z", + "start_time": "2020-05-11T13:42:52.568178Z" + } + }, + "outputs": [], + "source": [ + "arr = eval(df_as_str)\n", + "df2 = pd.DataFrame.from_dict(arr, orient=\"columns\")\n", + "df2.index.name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:.conda-develop] *", + "language": "python", + "name": "conda-env-.conda-develop-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "165px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py new file mode 100644 index 000000000..22176ce52 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py @@ -0,0 +1,374 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python [conda env:.conda-develop] * +# language: python +# name: conda-env-.conda-develop-py +# --- + +# %% [markdown] +# # Description + +# %% [markdown] +# # Imports + +# %% +# %load_ext autoreload +# %autoreload 2 +# %matplotlib inline + +import json +import logging + +import jsonpickle +import jsonpickle.ext.pandas as jsonpickle_pandas + +jsonpickle_pandas.register_handlers() + +import pandas as pd # noqa: E402 + +import helpers.hdbg as hdbg # noqa: E402 +import helpers.henv as henv # noqa: E402 +import helpers.hplayback as hplayba # noqa: E402 + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + +_LOG.info("%s", henv.get_system_signature()[0]) + +hnotebook.config_notebook() + +# %% +data = { + "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], + "Price": [700, 250, 800, 1200], +} + +df = pd.DataFrame(data, columns=["Product", "Price"]) +df.index.name = "hello" +print(df) + +# %% +# df.to_json(orient="") +df.to_dict(orient="series") + +# %% +hplayba.to_python_code(df) + +# %% +pd.DataFrame.from_dict( + { + "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], + "Price": [700, 250, 800, 1200], + } +) + +# %% +use_playback = True + + +def F(a, b): + if use_playback: + playback = Playback("assert_equal", "F", a, b) + playback.start() + c = a + b + if use_playback: + output = playback.end(c) + res = output + else: + res = c + return res + + +a = df +b = df +print(F(a, b)) + +# %% +hplayba.to_python_code(["3", 3]) + +# %% +hplayba.round_trip_convert(df, logging.INFO) + +# %% +hplayba.round_trip_convert("hello", logging.INFO) + + +# %% +def F(a, b): + return a + b + + +# %% +# Initialize values for unit test. +dummy_0 = r"3" +dummy_0 = jsonpickle.decode(dummy_0) +dummy_1 = r"2" +dummy_1 = jsonpickle.decode(dummy_1) +# Call function. +act = F(dummy_0, dummy_1) +# Create expected value of function output. +exp = r"5" +exp = jsonpickle.decode(exp) +# Check. +assert act == exp + + +# ############################################################################# +# Playback +# ############################################################################# + + +# %% +class Playback: + # def __init__(self, file_name, mode, *args, **kwargs): + # self.args = args + # self.kwargs = kwargs + def __init__(self, file_name, mode, func_name, a, b): + self.a = a + self.b = b + + def start(self): + self.a_json = jsonpickle.encode(self.a) + self.b_json = jsonpickle.encode(self.b) + + def end(self, ret): + self.ret_json = jsonpickle.encode(ret) + output = [] + output.append("# Initialize values for unit test.") + output.append("a = %s" % jsonpickle.decode(self.a_json)) + output.append("b = %s" % jsonpickle.decode(self.b_json)) + output.append("# Apply values.") + output.append("act = F(a, b)") + output.append("exp = %s" % jsonpickle.decode(self.ret_json)) + # output.append("self.assertEqual(act, exp)") + # output.append("assert act == exp") + output = "\n".join(output) + print("output=", output) + + +# def F(a: int, b: int): +# c = {} +# c["pavel"] = a + b +# return c + + +def F(a: int, b: int): + playback = Playback("", "", "F", a, b) + playback.start() + c = {} + c["pavel"] = a + b + playback.end(c) + return c + + +res = F(3, 4) +print(res) + + +# ############################################################################# +# Playback +# ############################################################################# + + +# %% +class Playback: # noqa: F811 + # def __init__(self, file_name, mode, *args, **kwargs): + # self.args = args + # self.kwargs = kwargs + def __init__(self, file_name, mode, func_name, a, b): + self.a = a + self.b = b + + def start(self): + self.a_json = jsonpickle.encode(self.a) + self.b_json = jsonpickle.encode(self.b) + + def end(self, ret): + self.ret_json = jsonpickle.encode(ret) + output = [] + output.append("# Initialize values for unit test.") + # output.append("a = %s" % jsonpickle.decode(self.a_json)) + # output.append("b = %s" % jsonpickle.decode(self.b_json)) + output.append(f"a = r'{self.a_json}'") + output.append("a = jsonpickle.decode(a)") + output.append(f"b = r'{self.b_json}'") + output.append("b = jsonpickle.decode(b)") + output.append("# Apply values.") + # output.append("act = F(a, b)[1]") + output.append("act = F(a, b)") + output.append(f"exp = r'{self.ret_json}'") + output.append("exp = jsonpickle.decode(exp)") + # output.append("self.assertEqual(act, exp)") + output.append("assert act.equals(exp)") + # output.append("assert act == exp") + output = "\n".join(output) + return output + + +# def F(a: int, b: int): +# c = {} +# c["pavel"] = a + b +# return c + +use_playback = True + + +def F(a: pd.DataFrame, b: pd.DataFrame): + if use_playback: + playback = Playback("", "", "F", a, b) + playback.start() + # c = {} + # c["pavel"] = a + b + c = a + b + if use_playback: + output = playback.end(c) + res = output, c + else: + res = c + return res + + +a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) +b = pd.DataFrame({"Price": [1, 1, 1, 1]}) + +res = F(a, b) +output = res[0] +print(output) +exec(output) + +# %% +# Initialize values for unit test. +a = r'{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}' +a = jsonpickle.decode(a) + +# %% +a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) + +# round_trip(a) +frozen = jsonpickle.encode(a) +print(frozen) +print(f"frozen2 = '{frozen}'") +# print("frozen = '%s'" % frozen) +assert 0 +# +print("frozen=") +print(json_pretty_print(frozen)) # noqa: F821 +# +obj2 = jsonpickle.decode(frozen) + +# %% +frozen2 = r'{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}' +print(frozen2) +# print("\n") +# print(frozen) +if False and isinstance(frozen2, str): + # print(frozen2[61]) + # assert 0 + frozen2 = json.loads(frozen2) + print(frozen2) +frozen2 = jsonpickle.decode(frozen2) + +# %% +a = """{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}""" +a = jsonpickle.decode(a) + +# %% +# Initialize values for unit test. +a = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' +a = jsonpickle.decode(a) +b = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n1\n1\n1\n1\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' +b = jsonpickle.decode(b) +# Apply values. +act = F(a, b) +exp = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n701\n251\n801\n1201\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' +exp = jsonpickle.decode(exp) +assert act == exp + +# %% +# Initialize values for unit test. +a = 3 +b = 4 +# Apply values. +act = F(a, b) +exp = {"pavel": 7} +assert act == exp + +# %% +df2 = round_trip(df) # noqa: F821 + + +# ############################################################################# +# Thing +# ############################################################################# + + +# %% +class Thing: + def __init__(self, name): + self.name = name + + +obj = Thing("Awesome") + +round_trip(obj) # noqa: F821 + + +# %% +def test(a: int, b: int): + print(round_trip(a)) # noqa: F821 + + +test("strunz", 6) +test(4, 6) +test(["hello"], 6) + +# %% +df.index.dtype # + +# %% +df.dtypes + +# %% +# import io +# import io.StringIO +# from io import StringIO + +# output = StringIO.StringIO() + +orient = "columns" +# orient = "split" +# orient = "records" +# orient = "table" +df_as_str = df.to_json(orient=orient) + +# split +# records +# index +# values +# table +# columns (the default format) + +python_code = [] +target_var = "df_as_str" +python_code.append(f"{target_var} = {df_as_str}") +python_code.append(f"{target_var}.index.name = '{df.index.name}'") +python_code = "\n".join(python_code) +print(python_code) + +exec(python_code) + +# %% +arr = eval(df_as_str) +df2 = pd.DataFrame.from_dict(arr, orient="columns") +df2.index.name + +# %% diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb new file mode 100644 index 000000000..4516033f2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb @@ -0,0 +1,1774 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "895cb286", + "metadata": {}, + "source": [ + "Show Parquet / Pyarrow API." + ] + }, + { + "cell_type": "markdown", + "id": "b068d525", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "id": "8f46ec68", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:53:22.684558Z", + "start_time": "2021-06-16T20:53:22.645267Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0m\u001b[33mWARNING\u001b[0m: Logger already initialized: skipping\n" + ] + } + ], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "import pandas as pd\n", + "import pyarrow as pa\n", + "import pyarrow.dataset as ds\n", + "import pyarrow.parquet as pq\n", + "from pyarrow.dataset import DirectoryPartitioning\n", + "\n", + "import helpers.hdbg as hdbg\n", + "import helpers.hio as hio\n", + "\n", + "hdbg.init_logger(verbosity=logging.INFO)\n", + "_LOG = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "215ff89e", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:19:03.323062Z", + "start_time": "2021-06-15T11:19:03.303632Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " idx instr val1 val2\n", + "2000-01-01 0 A 99 30\n", + "2000-01-02 0 A 54 46\n", + "2000-01-03 0 A 85 86\n", + "2000-01-04 0 A 97 62\n", + "2000-01-05 0 A 12 25\n" + ] + } + ], + "source": [ + "def get_df() -> pd.DataFrame:\n", + " \"\"\"\n", + " Create pandas random data, like:\n", + "\n", + " ```\n", + " idx instr val1 val2\n", + " 2000-01-01 0 A 99 30\n", + " 2000-01-02 0 A 54 46\n", + " 2000-01-03 0 A 85 86\n", + " ```\n", + " \"\"\"\n", + " instruments = \"A B C D E\".split()\n", + " \"id stock val1 val2\".split()\n", + " df_idx = pd.date_range(\n", + " pd.Timestamp(\"2000-01-01\"), pd.Timestamp(\"2000-01-15\"), freq=\"1D\"\n", + " )\n", + " # print(df_idx)\n", + " random.seed(1000)\n", + "\n", + " df = []\n", + " for idx, inst in enumerate(instruments):\n", + " df_tmp = pd.DataFrame(\n", + " {\n", + " \"idx\": idx,\n", + " \"instr\": inst,\n", + " \"val1\": [random.randint(0, 100) for k in range(len(df_idx))],\n", + " \"val2\": [random.randint(0, 100) for k in range(len(df_idx))],\n", + " },\n", + " index=df_idx,\n", + " )\n", + " # print(df_tmp)\n", + " df.append(df_tmp)\n", + " df = pd.concat(df)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "8e8235d0", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:35:16.903580Z", + "start_time": "2021-06-15T11:35:16.895316Z" + } + }, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "EOL while scanning string literal (, line 4)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m4\u001b[0m\n\u001b[0;31m txt += \"# df=\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m EOL while scanning string literal\n" + ] + } + ], + "source": [ + "def df_to_str(df: pd.DataFrame) -> str:\n", + " txt = \"\"\n", + " txt += \"# df=\\n%s\" % df.head(3)\n", + " txt += \"\\n# df.shape=\\n%s\" % str(df.shape)\n", + " txt += \"\\n# df.dtypes=\\n%s\" % str(df.dtypes)\n", + " return txt" + ] + }, + { + "cell_type": "markdown", + "id": "17cc474b", + "metadata": {}, + "source": [ + "# Save and load all data in one file" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "cb399156", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:25:27.514505Z", + "start_time": "2021-06-15T11:25:27.496811Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " idx instr val1 val2\n", + "2000-01-01 0 A 99 30\n", + "2000-01-02 0 A 54 46\n", + "2000-01-03 0 A 85 86\n", + "# df.shape=\n", + "(75, 4)\n", + "# df.dtypes=\n", + "idx int64\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "df = get_df()\n", + "# print(df.head())\n", + "print(df_to_str(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "940dc7d2", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:25:34.893472Z", + "start_time": "2021-06-15T11:25:34.886977Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table=\n", + "pyarrow.Table\n", + "idx: int64\n", + "instr: string\n", + "val1: int64\n", + "val2: int64\n", + "__index_level_0__: timestamp[ns]\n" + ] + } + ], + "source": [ + "table = pa.Table.from_pandas(df)\n", + "\n", + "print(\"table=\\n%s\" % table)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "93df67fc", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:25:38.560269Z", + "start_time": "2021-06-15T11:25:38.533905Z" + } + }, + "outputs": [], + "source": [ + "# Save.\n", + "file_name = \"df_in_one_file.pq\"\n", + "pq.write_table(table, file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "155e36c0", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:25:51.016044Z", + "start_time": "2021-06-15T11:25:51.001034Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pyarrow.Table\n", + "idx: int64\n", + "instr: string\n", + "val1: int64\n", + "val2: int64\n", + "__index_level_0__: timestamp[us]\n", + "# df=\n", + " idx instr val1 val2\n", + "2000-01-01 0 A 99 30\n", + "2000-01-02 0 A 54 46\n", + "2000-01-03 0 A 85 86\n", + "# df.shape=\n", + "(75, 4)\n", + "# df.dtypes=\n", + "idx int64\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Load.\n", + "df2 = pq.read_table(file_name)\n", + "print(df2)\n", + "\n", + "df2 = df2.to_pandas()\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "markdown", + "id": "1098757c", + "metadata": {}, + "source": [ + "## Read a subset of columns" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "6f4a652f", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:27:11.924350Z", + "start_time": "2021-06-15T11:27:11.910680Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pyarrow.Table\n", + "idx: int64\n", + "val1: int64\n", + "# df=\n", + " idx val1\n", + "0 0 99\n", + "1 0 54\n", + "2 0 85\n", + "# df.shape=\n", + "(75, 2)\n", + "# df.dtypes=\n", + "idx int64\n", + "val1 int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "df2 = pq.read_table(file_name, columns=[\"idx\", \"val1\"])\n", + "print(df2)\n", + "\n", + "df2 = df2.to_pandas()\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "markdown", + "id": "012cebdb", + "metadata": {}, + "source": [ + "## Partitioned dataset\n", + "\n", + "from https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data\n", + "\n", + "- A dataset can exploit a nested structure, where the sub-dir names hold information about which subset of the data is stored in that dir\n", + "- E.g., \"Hive\" patitioning scheme \"key=vale\" dir names" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ca26642e", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:30:11.964993Z", + "start_time": "2021-06-15T11:30:11.947282Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " idx instr val1 val2\n", + "2000-01-01 0 A 99 30\n", + "2000-01-02 0 A 54 46\n", + "2000-01-03 0 A 85 86\n", + "# df.shape=\n", + "(75, 4)\n", + "# df.dtypes=\n", + "idx int64\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "df = get_df()\n", + "print(df_to_str(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "7cae349f", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:30:27.617064Z", + "start_time": "2021-06-15T11:30:27.541418Z" + } + }, + "outputs": [], + "source": [ + "base = \".\"\n", + "dir_name = os.path.join(base, \"parquet_dataset_partitioned\")\n", + "os.system(\"rm -rf %s\" % dir_name)\n", + "\n", + "pq.write_to_dataset(table, dir_name, partition_cols=[\"idx\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "fd57116d", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:30:30.672054Z", + "start_time": "2021-06-15T11:30:30.389512Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" + ] + } + ], + "source": [ + "!ls parquet_dataset_partitioned" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "ac82b5ad", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:31:29.322947Z", + "start_time": "2021-06-15T11:31:29.298883Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./parquet_dataset_partitioned/idx=0/cab9de6eff0c47bcb688a1ce437c7f89.parquet\n", + "./parquet_dataset_partitioned/idx=1/56813e569097420cae892720d3bb0789.parquet\n", + "./parquet_dataset_partitioned/idx=2/5c9a17d2e1294dd58c7d8695868c2cb5.parquet\n", + "./parquet_dataset_partitioned/idx=3/b28576eb22d54999980a313a24511497.parquet\n", + "./parquet_dataset_partitioned/idx=4/8ee3f0d7585b48959a560c954562add8.parquet\n" + ] + } + ], + "source": [ + "# Read data back.\n", + "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "print(\"\\n\".join(dataset.files))" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "64394b7f", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:32:01.839074Z", + "start_time": "2021-06-15T11:32:01.822727Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " instr val1 val2 idx\n", + "2000-01-01 A 99 30 0\n", + "2000-01-02 A 54 46 0\n", + "2000-01-03 A 85 86 0\n", + "# df.shape=\n", + "(75, 4)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Read everything.\n", + "df2 = dataset.to_table().to_pandas()\n", + "\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "df96e1db", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:33:05.171630Z", + "start_time": "2021-06-15T11:33:05.147040Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " instr val1 val2 idx\n", + "2000-01-01 B 18 22 1\n", + "2000-01-02 B 59 89 1\n", + "2000-01-03 B 91 90 1\n", + "# df.shape=\n", + "(15, 4)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "dtype: object\n", + "# df=\n", + " instr val1 val2 idx\n", + "2000-01-01 A 99 30 0\n", + "2000-01-02 A 54 46 0\n", + "2000-01-03 A 85 86 0\n", + "# df.shape=\n", + "(45, 4)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Load part of the data.\n", + "\n", + "df2 = dataset.to_table(filter=ds.field(\"idx\") == 1).to_pandas()\n", + "print(df_to_str(df2))\n", + "\n", + "df2 = dataset.to_table(filter=ds.field(\"idx\") < 3).to_pandas()\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "markdown", + "id": "b3c27848", + "metadata": {}, + "source": [ + "## Add year-month partitions" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "69d2ea15", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:36:11.106142Z", + "start_time": "2021-06-15T11:36:11.087701Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " idx instr val1 val2 year month\n", + "2000-01-01 0 A 99 30 2000 1\n", + "2000-01-02 0 A 54 46 2000 1\n", + "2000-01-03 0 A 85 86 2000 1\n", + "# df.shape=\n", + "(75, 6)\n", + "# df.dtypes=\n", + "idx int64\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "year int64\n", + "month int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "df = get_df()\n", + "df[\"year\"] = df.index.year\n", + "df[\"month\"] = df.index.month\n", + "\n", + "print(df_to_str(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "1a2f8c3a", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:01.738085Z", + "start_time": "2021-06-15T11:37:01.730748Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table=\n", + "pyarrow.Table\n", + "idx: int64\n", + "instr: string\n", + "val1: int64\n", + "val2: int64\n", + "year: int64\n", + "month: int64\n", + "__index_level_0__: timestamp[ns]\n" + ] + } + ], + "source": [ + "table = pa.Table.from_pandas(df)\n", + "\n", + "print(\"table=\\n%s\" % table)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "9112ed65", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:04.832037Z", + "start_time": "2021-06-15T11:37:04.702121Z" + } + }, + "outputs": [], + "source": [ + "base = \".\"\n", + "dir_name = os.path.join(base, \"pq_partitioned2\")\n", + "os.system(\"rm -rf %s\" % dir_name)\n", + "\n", + "pq.write_to_dataset(table, dir_name, partition_cols=[\"idx\", \"year\", \"month\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "844913cc", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:17.553902Z", + "start_time": "2021-06-15T11:37:17.276875Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" + ] + } + ], + "source": [ + "!ls $dir_name" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "e5ba8be3", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:07.695235Z", + "start_time": "2021-06-15T11:37:07.433612Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bc6b2314c7f640a38c62029280f6f65e.parquet\r\n" + ] + } + ], + "source": [ + "!ls $dir_name/idx=0/year=2000/month=1" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "2d93f116", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:37:26.153218Z", + "start_time": "2021-06-15T11:37:26.109040Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./pq_partitioned2/idx=0/year=2000/month=1/bc6b2314c7f640a38c62029280f6f65e.parquet\n", + "./pq_partitioned2/idx=1/year=2000/month=1/bb178ff0bdd344ca8328f9d67398b322.parquet\n", + "./pq_partitioned2/idx=2/year=2000/month=1/16081eea25fd4da6bd802037b541766c.parquet\n", + "./pq_partitioned2/idx=3/year=2000/month=1/1557b3c461054eadba16e3072fbd3a8a.parquet\n", + "./pq_partitioned2/idx=4/year=2000/month=1/07a0c7fcf054450296b35452b57236ef.parquet\n" + ] + } + ], + "source": [ + "# Read data back.\n", + "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "print(\"\\n\".join(dataset.files))" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "21148afd", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T11:39:19.396955Z", + "start_time": "2021-06-15T11:39:19.374534Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " instr val1 val2 idx year month\n", + "2000-01-01 C 99 37 2 2000 1\n", + "2000-01-02 C 98 48 2 2000 1\n", + "2000-01-03 C 70 58 2 2000 1\n", + "# df.shape=\n", + "(15, 6)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "year int32\n", + "month int32\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Read data back.\n", + "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "df2 = dataset.to_table(filter=ds.field(\"idx\") == 2).to_pandas()\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "d9e4e596", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:54:34.478646Z", + "start_time": "2021-06-16T20:54:34.250254Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "idx: int64\n", + "instr: string\n", + "val1: int64\n", + "val2: int64\n", + "year: int64\n", + "month: int64\n", + "__index_level_0__: timestamp[ns]\n", + "-- schema metadata --\n", + "pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975\n" + ] + } + ], + "source": [ + "# We could scan manually and create the dirs manually if we don't want to add\n", + "# add a new dir.\n", + "base = \".\"\n", + "dir_name = os.path.join(base, \"parquet_dataset_partitioned2\")\n", + "os.system(\"rm -rf %s\" % dir_name)\n", + "\n", + "schemas = []\n", + "\n", + "schema = pa.Table.from_pandas(df).schema\n", + "print(schema)\n", + "# assert 0\n", + "# idx: int64\n", + "# instr: string\n", + "# val1: int64\n", + "# val2: int64\n", + "# year: int64\n", + "# month: int64\n", + "\n", + "# grouped = df.groupby(lambda x: x.day)\n", + "group_by_idx = df.groupby(\"idx\")\n", + "for idx, df_tmp in group_by_idx:\n", + " _LOG.debug(\"idx=%s -> df.shape=%s\", idx, str(df_tmp.shape))\n", + " #\n", + " group_by_year = df_tmp.groupby(lambda x: x.year)\n", + " for year, df_tmp2 in group_by_year:\n", + " _LOG.debug(\"year=%s -> df.shape=%s\", year, str(df_tmp2.shape))\n", + " #\n", + " group_by_month = df_tmp2.groupby(lambda x: x.month)\n", + " for month, df_tmp3 in group_by_month:\n", + " _LOG.debug(\"month=%s -> df.shape=%s\", month, str(df_tmp3.shape))\n", + " # file_name = \"df_in_one_file.pq\"\n", + " # pq.write_table(table, file_name)\n", + " # /app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet\n", + " subdir_name = os.path.join(\n", + " dir_name, f\"idx={idx}\", f\"year={year}\", f\"month={month}\"\n", + " )\n", + " table = pa.Table.from_pandas(df_tmp3, schema=schema)\n", + " schemas.append(table.schema)\n", + " # print(df_tmp3)\n", + " # print(table.schema)\n", + " # pq.write_to_dataset(table,\n", + " # subdir_name, schema=schema)\n", + " file_name = os.path.join(subdir_name, \"df_out.pq\")\n", + " hio.create_enclosing_dir(file_name)\n", + " pq.write_table(table, file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "8309de4a", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:41:14.320037Z", + "start_time": "2021-06-16T20:41:14.314354Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "schemas[0] == schemas[4]" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "f0e49f46", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:42:26.864001Z", + "start_time": "2021-06-16T20:42:26.856395Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", + " idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", + " idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", + " idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", + " idx: int64\n", + " instr: string\n", + " val1: int64\n", + " val2: int64\n", + " year: int64\n", + " month: int64\n", + " __index_level_0__: timestamp[ns]\n", + " -- schema metadata --\n", + " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975]" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "schemas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1130cbc2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "e5bdcdd8", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:53:50.373825Z", + "start_time": "2021-06-16T20:53:50.099251Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df_out.pq\r\n" + ] + } + ], + "source": [ + "!ls $dir_name/idx=0/year=2000/month=1" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "aaf67ae6", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T20:55:02.764098Z", + "start_time": "2021-06-16T20:55:02.717192Z" + } + }, + "outputs": [ + { + "ename": "ArrowInvalid", + "evalue": "Unable to merge: Field month has incompatible types: int64 vs int32", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mArrowInvalid\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#src_dir = f\"{dir_name}/idx=0/year=2000/month=1\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0msrc_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"{dir_name}/idx=0/year=2000\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m dataset = ds.dataset(src_dir,\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"parquet\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m partitioning=\"hive\")\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/dataset.py\u001b[0m in \u001b[0;36mdataset\u001b[0;34m(source, schema, format, filesystem, partitioning, partition_base_dir, exclude_invalid_files, ignore_prefixes)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 656\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_filesystem_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 657\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 658\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_is_path_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msource\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/dataset.py\u001b[0m in \u001b[0;36m_filesystem_dataset\u001b[0;34m(source, schema, filesystem, partitioning, format, partition_base_dir, exclude_invalid_files, selector_ignore_prefixes)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mfactory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mFileSystemDatasetFactory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpaths_or_selector\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfactory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfinish\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/_dataset.pyx\u001b[0m in \u001b[0;36mpyarrow._dataset.DatasetFactory.finish\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.pyarrow_internal_check_status\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.check_status\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mArrowInvalid\u001b[0m: Unable to merge: Field month has incompatible types: int64 vs int32" + ] + } + ], + "source": [ + "# Read data back.\n", + "# https://github.com/dask/dask/issues/4194\n", + "# src_dir = f\"{dir_name}/idx=0/year=2000/month=1\"\n", + "src_dir = f\"{dir_name}/idx=0/year=2000\"\n", + "dataset = ds.dataset(src_dir, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "df2 = dataset.to_table().to_pandas()\n", + "# print(df_to_str(df2))\n", + "print(\"\\n\".join(dataset.files))" + ] + }, + { + "cell_type": "markdown", + "id": "98f4111d", + "metadata": {}, + "source": [ + "## Partition manually" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "id": "f0b33d85", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-15T00:57:11.260871Z", + "start_time": "2021-06-15T00:57:11.235982Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(((year == 2009) and (month == 11)) and (day == 3))\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Neither field_names nor schema was passed; cannot infer field_names", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpartitioning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/2009/11/3\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mpartitioning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdiscover\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/_dataset.pyx\u001b[0m in \u001b[0;36mpyarrow._dataset.DirectoryPartitioning.discover\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Neither field_names nor schema was passed; cannot infer field_names" + ] + } + ], + "source": [ + "partitioning = DirectoryPartitioning(\n", + " pa.schema([(\"year\", pa.int16()), (\"month\", pa.int8()), (\"day\", pa.int8())])\n", + ")\n", + "print(partitioning.parse(\"/2009/11/3\"))\n", + "\n", + "# partitioning.discover()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "ad70cbee", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:03:31.809969Z", + "start_time": "2021-06-16T11:03:31.526597Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" + ] + } + ], + "source": [ + "!ls /app/data" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "b19d1189", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:18:31.838549Z", + "start_time": "2021-06-16T11:18:31.821223Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet\n", + "/app/data/idx=0/year=2000/month=1/0435eeb615b14155bdc26266b91a4b1b.parquet\n", + "/app/data/idx=0/year=2000/month=1/05cc8a039ec746acb5668fde0a372028.parquet\n", + "/app/data/idx=0/year=2000/month=1/06039c8c6e9e4d54be7dcbef2bcdfa78.parquet\n", + "/app/data/idx=0/year=2000/month=1/0bb8e349594445a08fca4e337a7922d4.parquet\n", + "/app/data/idx=0/year=2000/month=1/115df7cedef540469cec56ee40ac19bd.parquet\n", + "/app/data/idx=0/year=2000/month=1/1174a70ffe614f4a9875b680e255902b.parquet\n", + "/app/data/idx=0/year=2000/month=1/122bdd75109c4fc7918d6db00f7bed41.parquet\n", + "/app/data/idx=0/year=2000/month=1/1543e41a14234c279fdfaa8656e8a71d.parquet\n", + "/app/data/idx=0/year=2000/month=1/17bd91618d5240fe83309608e91cb1ef.parquet\n", + "/app/data/idx=0/year=2000/month=1/18461c0ee57845768a503cfc865e323b.parquet\n", + "/app/data/idx=0/year=2000/month=1/1994694468184272a388fb8b40f03d5b.parquet\n", + "/app/data/idx=0/year=2000/month=1/1eb5f1adbe57418fa5d866d35902c39a.parquet\n", + "/app/data/idx=0/year=2000/month=1/2bd3c3ae435b489bb194ef7b2a715d9a.parquet\n", + "/app/data/idx=0/year=2000/month=1/2ded0d292def4e1186653d90852295f6.parquet\n", + "/app/data/idx=0/year=2000/month=1/2ff4a2fa54664e67bab85a76324738ed.parquet\n", + "/app/data/idx=0/year=2000/month=1/378e55b8faf24033abf1c275741a88e8.parquet\n", + "/app/data/idx=0/year=2000/month=1/37a96e8834af4a87bc64ec3d1199ad54.parquet\n", + "/app/data/idx=0/year=2000/month=1/3a24331d6e51402d9a86c974f8a3bd05.parquet\n", + "/app/data/idx=0/year=2000/month=1/3ae93c6a21cc4a88bbaf90219f275563.parquet\n", + "/app/data/idx=0/year=2000/month=1/3b5f35d2add64a738cec5061659e35a2.parquet\n", + "/app/data/idx=0/year=2000/month=1/3d16749690f840c49facce0e37461a7e.parquet\n", + "/app/data/idx=0/year=2000/month=1/44bf70580b9a43829addb2a9e8f89dc6.parquet\n", + "/app/data/idx=0/year=2000/month=1/46237f338cee47c69f33b15fcb83817a.parquet\n", + "/app/data/idx=0/year=2000/month=1/46f19733b2d642c29adb58bf9499b6ca.parquet\n", + "/app/data/idx=0/year=2000/month=1/485f7d3d06b3486ca4bc8b35420f997a.parquet\n", + "/app/data/idx=0/year=2000/month=1/4cd8551c6c8f4daab7313732b9c1cea8.parquet\n", + "/app/data/idx=0/year=2000/month=1/4eca6951932d47d8a5678422da4a3d70.parquet\n", + "/app/data/idx=0/year=2000/month=1/512a55d6f91c412f951ba66728bfe118.parquet\n", + "/app/data/idx=0/year=2000/month=1/521193be72e1465ca33034cfd8e93ac9.parquet\n", + "/app/data/idx=0/year=2000/month=1/59aa37cc4792493899e812215b3bb822.parquet\n", + "/app/data/idx=0/year=2000/month=1/5aaa32a61b614d65a91747336c8028f0.parquet\n", + "/app/data/idx=0/year=2000/month=1/5cf9306e97ae42fdae53369710a5d0b6.parquet\n", + "/app/data/idx=0/year=2000/month=1/5fb97e575a9c4ff282293e9810040594.parquet\n", + "/app/data/idx=0/year=2000/month=1/613e6048f8434fccafed8c9d457fddc1.parquet\n", + "/app/data/idx=0/year=2000/month=1/61d608f23a69494eaef248d79a776ede.parquet\n", + "/app/data/idx=0/year=2000/month=1/62bf226aebb641229b33f7e3bf9f5cb1.parquet\n", + "/app/data/idx=0/year=2000/month=1/62c09d56d67d4c738568fed318152ca9.parquet\n", + "/app/data/idx=0/year=2000/month=1/652129318d7a4d5b83e256a94803ecdc.parquet\n", + "/app/data/idx=0/year=2000/month=1/67607b3708e44233927974a861972a99.parquet\n", + "/app/data/idx=0/year=2000/month=1/69915fb955c24302a905e4520a76f547.parquet\n", + "/app/data/idx=0/year=2000/month=1/6cc812431ef44bd195e5baf9715095a6.parquet\n", + "/app/data/idx=0/year=2000/month=1/6ce2493e58b34b53ae42da84ee0ef165.parquet\n", + "/app/data/idx=0/year=2000/month=1/6e88cf1910bc4a71bcea865ed1605363.parquet\n", + "/app/data/idx=0/year=2000/month=1/70c1ba22a85f4b489096f80eacd5855c.parquet\n", + "/app/data/idx=0/year=2000/month=1/7705f37eac7e40ceb2fba4c9fd2cb81d.parquet\n", + "/app/data/idx=0/year=2000/month=1/7bdb4feb6a874697b8c2f9a6cb03a6e6.parquet\n", + "/app/data/idx=0/year=2000/month=1/7fd82496e8274e999d217df302fd46b0.parquet\n", + "/app/data/idx=0/year=2000/month=1/8130570ceae44ca69ce7b2cd9865c3ec.parquet\n", + "/app/data/idx=0/year=2000/month=1/83f8e04fd5ac49ec80ac7b98e8221278.parquet\n", + "/app/data/idx=0/year=2000/month=1/8469e01698bf47f28fda41a3935eeb64.parquet\n", + "/app/data/idx=0/year=2000/month=1/874aa31290804dd0abca1a8f40dc4875.parquet\n", + "/app/data/idx=0/year=2000/month=1/888ef49654f241df8cae8454a5cd3f07.parquet\n", + "/app/data/idx=0/year=2000/month=1/8aa4e41d00fc438c9de0906ecc66bbb9.parquet\n", + "/app/data/idx=0/year=2000/month=1/90e662712235472ebae79fd64eaae094.parquet\n", + "/app/data/idx=0/year=2000/month=1/91e7dcbfc57a495a943bad2400690bc1.parquet\n", + "/app/data/idx=0/year=2000/month=1/9394c04aef64432fb94219d0e8b50286.parquet\n", + "/app/data/idx=0/year=2000/month=1/9740961302bc40b192d20715c52d6ef6.parquet\n", + "/app/data/idx=0/year=2000/month=1/99e92f5585514ed4bd43b5bf50bdaaa8.parquet\n", + "/app/data/idx=0/year=2000/month=1/9bf5c3598f69411fb1acdc30779b25bd.parquet\n", + "/app/data/idx=0/year=2000/month=1/9d81c342203c4396ac2d9efcbb0cae7a.parquet\n", + "/app/data/idx=0/year=2000/month=1/9d8b2486e80f40468cf4ae50a41fda41.parquet\n", + "/app/data/idx=0/year=2000/month=1/a170565f336f4b3b99994c8d83012a4d.parquet\n", + "/app/data/idx=0/year=2000/month=1/a50138dae90f478781bf032908703ef4.parquet\n", + "/app/data/idx=0/year=2000/month=1/a5ab58aa310e47669e9d3604bf94f155.parquet\n", + "/app/data/idx=0/year=2000/month=1/a5bd118e999e4df6ab3306e52671228e.parquet\n", + "/app/data/idx=0/year=2000/month=1/a5c0a7da693147b98f68811b4af7c79e.parquet\n", + "/app/data/idx=0/year=2000/month=1/a84afce396eb4afa91de3b08129e2ab7.parquet\n", + "/app/data/idx=0/year=2000/month=1/a8c1f364a7c944bb89d59d354059e596.parquet\n", + "/app/data/idx=0/year=2000/month=1/aa3bb180eda948c4aab93428ece443a8.parquet\n", + "/app/data/idx=0/year=2000/month=1/aa868fa8e11a4a838c19a1a260dcf6f6.parquet\n", + "/app/data/idx=0/year=2000/month=1/b01aa53c572d492f9667f157455742fc.parquet\n", + "/app/data/idx=0/year=2000/month=1/b6a7fc9dd14a4af6a3635cd138abdfe2.parquet\n", + "/app/data/idx=0/year=2000/month=1/b740e474de9f4b5497877c14f688faed.parquet\n", + "/app/data/idx=0/year=2000/month=1/b81d3d9c4045498c9deb3968b935e422.parquet\n", + "/app/data/idx=0/year=2000/month=1/b8c5a9f58500424785e4c83520931127.parquet\n", + "/app/data/idx=0/year=2000/month=1/b9176233e3934efebb0b12e1a780a3b1.parquet\n", + "/app/data/idx=0/year=2000/month=1/ba3d62351b7745f5a4e18f27159d5820.parquet\n", + "/app/data/idx=0/year=2000/month=1/bb9f583ed63840b39ada7bb0f45b9d57.parquet\n", + "/app/data/idx=0/year=2000/month=1/c55358bb09194e7aad9828678b5eaa61.parquet\n", + "/app/data/idx=0/year=2000/month=1/c5e31c9f04a6491dbf068fa889095e27.parquet\n", + "/app/data/idx=0/year=2000/month=1/c70308ef1a954ccea429f0de60c41fb3.parquet\n", + "/app/data/idx=0/year=2000/month=1/cf1e928b55ba4dd09bfa2765dadffb76.parquet\n", + "/app/data/idx=0/year=2000/month=1/d08715970c714455b7b9fbf18a86e8c0.parquet\n", + "/app/data/idx=0/year=2000/month=1/d27b68dc839f47e2a25814d805b9d759.parquet\n", + "/app/data/idx=0/year=2000/month=1/d46043c1511647a5b3b96450580ce6e1.parquet\n", + "/app/data/idx=0/year=2000/month=1/d592794fbc7f4ed0877d5a350fabf8d4.parquet\n", + "/app/data/idx=0/year=2000/month=1/d8b05ee145d046a1ac321708b68e91de.parquet\n", + "/app/data/idx=0/year=2000/month=1/d9163626e55f40bb88142c43eb4b9fab.parquet\n", + "/app/data/idx=0/year=2000/month=1/dcb0cd8bc9084246955a6090f643a43d.parquet\n", + "/app/data/idx=0/year=2000/month=1/dd0db6d0e040442bb0b950efa6ac6e6a.parquet\n", + "/app/data/idx=0/year=2000/month=1/dd306d9fd65a459fbbf1e32fc9260ae3.parquet\n", + "/app/data/idx=0/year=2000/month=1/e05e535b8969470680658f6c2924bb68.parquet\n", + "/app/data/idx=0/year=2000/month=1/e3aff8e0f7094609b4de8bacac5faa4c.parquet\n", + "/app/data/idx=0/year=2000/month=1/e439d12c5539461da2b12a54d7dbb1c3.parquet\n", + "/app/data/idx=0/year=2000/month=1/e51258868c044644a708c74ff4c2ca46.parquet\n", + "/app/data/idx=0/year=2000/month=1/ea632843bd34467496837fea693443ff.parquet\n", + "/app/data/idx=0/year=2000/month=1/ecf1306aadb04ecdabb50803116eb0fa.parquet\n", + "/app/data/idx=0/year=2000/month=1/ef2355b80a7346afbabd33743d7e69a2.parquet\n", + "/app/data/idx=0/year=2000/month=1/ef7d760f2a2245e08f8c038bdf554edd.parquet\n", + "/app/data/idx=0/year=2000/month=1/f4ca5d31138248eca2beb467548461ed.parquet\n", + "/app/data/idx=0/year=2000/month=1/fba715c8fda84ad88d370f71b2408c12.parquet\n", + "/app/data/idx=0/year=2000/month=1/fe435999dba9476baec1b3009d529d32.parquet\n", + "/app/data/idx=0/year=2000/month=1/fe53414bfef84cb39ca04b48c8e8332c.parquet\n", + "/app/data/idx=0/year=2000/month=1/ff75b3e1006f42c9ba9deb689324ee3e.parquet\n", + "/app/data/idx=1/year=2000/month=1/056b4d30021044298d7fde4cdd296561.parquet\n", + "/app/data/idx=1/year=2000/month=1/0c138f0939f347928f5c2d1c92207d57.parquet\n", + "/app/data/idx=1/year=2000/month=1/0cb27647424c4302b7a1cd47369b4e6d.parquet\n", + "/app/data/idx=1/year=2000/month=1/1064ed9fc62a450890a19bd906d7953a.parquet\n", + "/app/data/idx=1/year=2000/month=1/14f3b6e2235c4a2eabf23840c82059ec.parquet\n", + "/app/data/idx=1/year=2000/month=1/1541e4cf70a048b88c7f8296456b8437.parquet\n", + "/app/data/idx=1/year=2000/month=1/19e5b00a91f64342be20a2faee8ef69c.parquet\n", + "/app/data/idx=1/year=2000/month=1/2512f9bc30c04375bd71f270e1901050.parquet\n", + "/app/data/idx=1/year=2000/month=1/2641066820c74d5fadd5d1a42b40d23f.parquet\n", + "/app/data/idx=1/year=2000/month=1/2b1c634e1ded48a2887abbb539f1ea41.parquet\n", + "/app/data/idx=1/year=2000/month=1/2bc577092b964473943428b8c04f6414.parquet\n", + "/app/data/idx=1/year=2000/month=1/2bc84c76804345c581c00b8e0ad59752.parquet\n", + "/app/data/idx=1/year=2000/month=1/2bd2238465b1416a8870494b579fae42.parquet\n", + "/app/data/idx=1/year=2000/month=1/2d5c13231ffc48aeb76bdb071663ceff.parquet\n", + "/app/data/idx=1/year=2000/month=1/2e48508ad08c4154813996117b6a833a.parquet\n", + "/app/data/idx=1/year=2000/month=1/3ca7d082ede544aab9f1f564acbffc14.parquet\n", + "/app/data/idx=1/year=2000/month=1/3d1f61cf39764307bbf39762d9c38af7.parquet\n", + "/app/data/idx=1/year=2000/month=1/40a2f2b0bd8c49be95aafc319ffd4a69.parquet\n", + "/app/data/idx=1/year=2000/month=1/4201c94937bc44f3809d9bf883b49cd7.parquet\n", + "/app/data/idx=1/year=2000/month=1/422474d1c6934fd298944ef7c9f21bfe.parquet\n", + "/app/data/idx=1/year=2000/month=1/444a6621429443c8b6550c6c04b27a24.parquet\n", + "/app/data/idx=1/year=2000/month=1/4940c21244274606bd6b543df4738ccf.parquet\n", + "/app/data/idx=1/year=2000/month=1/4b87781720884af7ae79d3f59fd69cd3.parquet\n", + "/app/data/idx=1/year=2000/month=1/4dd866c257864005a62854991f666b25.parquet\n", + "/app/data/idx=1/year=2000/month=1/4f06000c93bb45f18edfa84eeb89a1b9.parquet\n", + "/app/data/idx=1/year=2000/month=1/50716e5b2e004ba38d414a101ae09427.parquet\n", + "/app/data/idx=1/year=2000/month=1/50fc4338cf41483091d11a2616eb6221.parquet\n", + "/app/data/idx=1/year=2000/month=1/563109ba1ed647ef9518393a9d1ddb2e.parquet\n", + "/app/data/idx=1/year=2000/month=1/586e3969f1084af2bf28cee6f721cdc6.parquet\n", + "/app/data/idx=1/year=2000/month=1/5a1ba9682db3414ea33666e64d055535.parquet\n", + "/app/data/idx=1/year=2000/month=1/5e2241ecbf364a0784626be86e38d6eb.parquet\n", + "/app/data/idx=1/year=2000/month=1/6300ef1b3beb44f0937dc8f890e845ce.parquet\n", + "/app/data/idx=1/year=2000/month=1/64aeabc396ba42ada56c695a32ed12a7.parquet\n", + "/app/data/idx=1/year=2000/month=1/664ca39e99134dabbe6d4c7402f626aa.parquet\n", + "/app/data/idx=1/year=2000/month=1/68efc5543f394005bb82c0dc63a3b01f.parquet\n", + "/app/data/idx=1/year=2000/month=1/6c51260b47964705a3dcfa1cf25ca106.parquet\n", + "/app/data/idx=1/year=2000/month=1/6f9ad552153244679f73a058dfc5b42e.parquet\n", + "/app/data/idx=1/year=2000/month=1/718ffd8c75a14cde953e8e3275341d31.parquet\n", + "/app/data/idx=1/year=2000/month=1/728984a554734a25a69f0eb1f32f842f.parquet\n", + "/app/data/idx=1/year=2000/month=1/75296fd97a724c74bc09e9d64b528f50.parquet\n", + "/app/data/idx=1/year=2000/month=1/76ca85d0dfd849829f105ee6fddb6439.parquet\n", + "/app/data/idx=1/year=2000/month=1/77ac6bd92e7f4a46bbc7634de174bbf3.parquet\n", + "/app/data/idx=1/year=2000/month=1/79a48d3eb0c144ccb13fa4baf944c92b.parquet\n", + "/app/data/idx=1/year=2000/month=1/7a1ae42ab80b4cbf9c00a5b7f213a12c.parquet\n", + "/app/data/idx=1/year=2000/month=1/7af9fe9698494063a751f9a8f5a317dc.parquet\n", + "/app/data/idx=1/year=2000/month=1/7cd226f5679b4cae9af7b881fa1787b7.parquet\n", + "/app/data/idx=1/year=2000/month=1/7fed9a3f251c44209ce0933cfe60ec98.parquet\n", + "/app/data/idx=1/year=2000/month=1/842f90063cbb44b4ae1e7d6b9b4aa59e.parquet\n", + "/app/data/idx=1/year=2000/month=1/84dceabacd264c82981347142463feb9.parquet\n", + "/app/data/idx=1/year=2000/month=1/85d7b8fa841e42b097e34dcd8f13beca.parquet\n", + "/app/data/idx=1/year=2000/month=1/878a1b363a0a48c3b0af294e9f885d72.parquet\n", + "/app/data/idx=1/year=2000/month=1/887e26b6f1004e4fb2a5e373b4d9c5f3.parquet\n", + "/app/data/idx=1/year=2000/month=1/88bc144aa2ed4334b077b19f702a9a99.parquet\n", + "/app/data/idx=1/year=2000/month=1/88fe979886ee453789ca1b1083300618.parquet\n", + "/app/data/idx=1/year=2000/month=1/8b75d58338e64ae1bc694bb0d7044597.parquet\n", + "/app/data/idx=1/year=2000/month=1/8cf24285a4a5450ca5c56c731f5c87a0.parquet\n", + "/app/data/idx=1/year=2000/month=1/8d873dde8103478ba44283b5c90e5060.parquet\n", + "/app/data/idx=1/year=2000/month=1/8e25293517d8490b9f12892f63f35b3a.parquet\n", + "/app/data/idx=1/year=2000/month=1/92bbf16c4b7f4888ae4f93efcec6d40a.parquet\n", + "/app/data/idx=1/year=2000/month=1/9443d531d13f41b491771f22caa9d5a4.parquet\n", + "/app/data/idx=1/year=2000/month=1/94b871d36d384a24a6f42f34d56f822c.parquet\n", + "/app/data/idx=1/year=2000/month=1/9543cef54d3340ba9c8a2dca154947b8.parquet\n", + "/app/data/idx=1/year=2000/month=1/985415e78a0c4abcb42a96c44bdef44b.parquet\n", + "/app/data/idx=1/year=2000/month=1/9b501f9c98c3455ab37f13dc32d4836e.parquet\n", + "/app/data/idx=1/year=2000/month=1/9f0ac6f2e23242b1afb424389a8a1f08.parquet\n", + "/app/data/idx=1/year=2000/month=1/a20bfc0770454e1185f3d1b91efed93c.parquet\n", + "/app/data/idx=1/year=2000/month=1/a31f4026dbab4ef9807081ad9be5e5cc.parquet\n", + "/app/data/idx=1/year=2000/month=1/a472f43a45da4357b63cb0b5535e3237.parquet\n", + "/app/data/idx=1/year=2000/month=1/a74453d72e364b0f819ecf238d9b53fd.parquet\n", + "/app/data/idx=1/year=2000/month=1/a94d3fce611243d29a21b612f01e5a18.parquet\n", + "/app/data/idx=1/year=2000/month=1/a990f67b865f4e599ffa926341915ae2.parquet\n", + "/app/data/idx=1/year=2000/month=1/aa28c2d20ed140b18ddead5b11b96a0b.parquet\n", + "/app/data/idx=1/year=2000/month=1/aa724649481e4f7aa95b78cfe333c72d.parquet\n", + "/app/data/idx=1/year=2000/month=1/ac4487b08071423481580622be8d9914.parquet\n", + "/app/data/idx=1/year=2000/month=1/ad2a3795a1ad46f0b7b509a6ebdc85f4.parquet\n", + "/app/data/idx=1/year=2000/month=1/afa56f8175ed41a8b34bac4ac6786cf3.parquet\n", + "/app/data/idx=1/year=2000/month=1/b6c7cee2c50642bbaacf29e16dbbece5.parquet\n", + "/app/data/idx=1/year=2000/month=1/b9c0158311a04c3fa9c594d6db280053.parquet\n", + "/app/data/idx=1/year=2000/month=1/bbbd7a1b72b645ed8afdada3a0fd9fac.parquet\n", + "/app/data/idx=1/year=2000/month=1/bbce481ce9fc404684db9578007edd4b.parquet\n", + "/app/data/idx=1/year=2000/month=1/bbf2ea53874d4bb49b7ebf959c24b060.parquet\n", + "/app/data/idx=1/year=2000/month=1/bd054b89ad8a46f29968468a4fd6d34d.parquet\n", + "/app/data/idx=1/year=2000/month=1/c1a395d1127240c1b9d7ebcb0d63842f.parquet\n", + "/app/data/idx=1/year=2000/month=1/c27376832ccd439685bdc3b11cdcec0f.parquet\n", + "/app/data/idx=1/year=2000/month=1/c5c55b01bbe1494e9297385e99e9f0d3.parquet\n", + "/app/data/idx=1/year=2000/month=1/c872faa9a863454cadc603827abd3f6c.parquet\n", + "/app/data/idx=1/year=2000/month=1/c9528d72e8574a279c0995c3de171de3.parquet\n", + "/app/data/idx=1/year=2000/month=1/cb7475b11c924a689515ade22ec7b134.parquet\n", + "/app/data/idx=1/year=2000/month=1/cb9a2e526b7845daaaf8f3ced61d8597.parquet\n", + "/app/data/idx=1/year=2000/month=1/cd356e54f63c483ea4792e842667c1ac.parquet\n", + "/app/data/idx=1/year=2000/month=1/cdd3925db9ae44a0ba2760031b229219.parquet\n", + "/app/data/idx=1/year=2000/month=1/d118c630c6194befaae2217985c9073c.parquet\n", + "/app/data/idx=1/year=2000/month=1/d68ddf28bd144430a5dc2c4437f37472.parquet\n", + "/app/data/idx=1/year=2000/month=1/d7adfebd0e9249f989f41e10ca61bf59.parquet\n", + "/app/data/idx=1/year=2000/month=1/d9b7947e9c6b400080d2226093fcc571.parquet\n", + "/app/data/idx=1/year=2000/month=1/d9f610ef03c748619ee5ef2ddcde2634.parquet\n", + "/app/data/idx=1/year=2000/month=1/dcbf892a4231404c90139ee3adfc6815.parquet\n", + "/app/data/idx=1/year=2000/month=1/e083fc488a7446bbbdad82c37f8fca29.parquet\n", + "/app/data/idx=1/year=2000/month=1/e5f84abccb0d407898e892f78dcb9ce1.parquet\n", + "/app/data/idx=1/year=2000/month=1/e74ca84dac2e4d53977a54d9daeb7adc.parquet\n", + "/app/data/idx=1/year=2000/month=1/e85272be7a1c411a886bc856c6012396.parquet\n", + "/app/data/idx=1/year=2000/month=1/ec83d2e5ff534be1b28b4cf511b67e0d.parquet\n", + "/app/data/idx=1/year=2000/month=1/ef6709a1008c43cc994cf01278474c94.parquet\n", + "/app/data/idx=1/year=2000/month=1/f7249440aa6f403f934e5018d34a583c.parquet\n", + "/app/data/idx=1/year=2000/month=1/fc96559adfd2419a9a1cf883b4d521fb.parquet\n", + "/app/data/idx=2/year=2000/month=1/0210672cfa44441bbcf4c07a2bd3c467.parquet\n", + "/app/data/idx=2/year=2000/month=1/0259160641d446518dffe477c5265240.parquet\n", + "/app/data/idx=2/year=2000/month=1/04cce58d49ba4c3982dd0823f43f29a9.parquet\n", + "/app/data/idx=2/year=2000/month=1/058031e2ce2d4bd99cbe7297756dd547.parquet\n", + "/app/data/idx=2/year=2000/month=1/06918413b55f43a19fb7f4e13712c396.parquet\n", + "/app/data/idx=2/year=2000/month=1/07a8bd5cb80140a48f709d86fe3e00aa.parquet\n", + "/app/data/idx=2/year=2000/month=1/0bca80d1ee444038871e5fbb1ccc4d21.parquet\n", + "/app/data/idx=2/year=2000/month=1/0bd86024c6234346b739be5af1a49ed2.parquet\n", + "/app/data/idx=2/year=2000/month=1/0c2d3de1afda4b8f82f43cf658a09fb8.parquet\n", + "/app/data/idx=2/year=2000/month=1/0d4d954eab7043a0a8d7bd751897deb5.parquet\n", + "/app/data/idx=2/year=2000/month=1/0d976fec817b4dd88d3082fe39e6f2b6.parquet\n", + "/app/data/idx=2/year=2000/month=1/12255adedd3948d4b8ced88001a61e04.parquet\n", + "/app/data/idx=2/year=2000/month=1/1406843e1322465e8384ba8685a9eb9d.parquet\n", + "/app/data/idx=2/year=2000/month=1/15ab3cbd13ce4fc7ab69d5c2b1672ca2.parquet\n", + "/app/data/idx=2/year=2000/month=1/18b68b427e2947bbaee4122bc2b0fbf8.parquet\n", + "/app/data/idx=2/year=2000/month=1/1a883ab5889441578fbf5f0a2c822c07.parquet\n", + "/app/data/idx=2/year=2000/month=1/1cc7612ae5e34455a716fc38b84427bb.parquet\n", + "/app/data/idx=2/year=2000/month=1/1e188269ac30443fa796a8bdbea70e46.parquet\n", + "/app/data/idx=2/year=2000/month=1/1e803b9281ef4d4289f8a207de2fd2a2.parquet\n", + "/app/data/idx=2/year=2000/month=1/2099fd988d544989b1117a45cd92e2c5.parquet\n", + "/app/data/idx=2/year=2000/month=1/245b2e48c3d442f990dfd9f3f18f5544.parquet\n", + "/app/data/idx=2/year=2000/month=1/2cc8cd4af02e48728683551df1d9b517.parquet\n", + "/app/data/idx=2/year=2000/month=1/2e389e9f7c4f43ea8ff96d1fa13f0347.parquet\n", + "/app/data/idx=2/year=2000/month=1/2edcd33c70704b64b80987aba03d724e.parquet\n", + "/app/data/idx=2/year=2000/month=1/3170732421924aeaa451ca82a4b77131.parquet\n", + "/app/data/idx=2/year=2000/month=1/3227923c1dab4e7fbe07511111e76f67.parquet\n", + "/app/data/idx=2/year=2000/month=1/3607d6e90ab64fff84b4f2c9477540ce.parquet\n", + "/app/data/idx=2/year=2000/month=1/3b733f19c98f44ebb6ab31e93d18f09b.parquet\n", + "/app/data/idx=2/year=2000/month=1/3d79e3dd93d44a208aadd899a9632005.parquet\n", + "/app/data/idx=2/year=2000/month=1/3e2d5106997b4d2a8a4aaaada70b5c34.parquet\n", + "/app/data/idx=2/year=2000/month=1/3ea888ba5f0c4c46aaa55795799c8614.parquet\n", + "/app/data/idx=2/year=2000/month=1/4065fbfffe364f5b8f661dd0caff5c00.parquet\n", + "/app/data/idx=2/year=2000/month=1/4438f729a59e4bee856e9766a7866777.parquet\n", + "/app/data/idx=2/year=2000/month=1/489418f859104268b59905195289b433.parquet\n", + "/app/data/idx=2/year=2000/month=1/4a07e13d3bab4ee4bed09868f4d0ae6a.parquet\n", + "/app/data/idx=2/year=2000/month=1/4ade79216a6f42ffbfa7ee5c2949d904.parquet\n", + "/app/data/idx=2/year=2000/month=1/508e221eeacc4624977761af65fdf95f.parquet\n", + "/app/data/idx=2/year=2000/month=1/520f6ff1dee6468099730664d5bea3de.parquet\n", + "/app/data/idx=2/year=2000/month=1/537a5c5b6d2949eca8c35db48dcc123f.parquet\n", + "/app/data/idx=2/year=2000/month=1/552fbc5a37494e7bb792e3c225cd4021.parquet\n", + "/app/data/idx=2/year=2000/month=1/589b0598f3eb4f178125912219919413.parquet\n", + "/app/data/idx=2/year=2000/month=1/62c0c2448a5d49889e2d2b8421264798.parquet\n", + "/app/data/idx=2/year=2000/month=1/6312935db784424a957645de2de4a4c2.parquet\n", + "/app/data/idx=2/year=2000/month=1/64f32e163bed483b860f21c6666b0a7d.parquet\n", + "/app/data/idx=2/year=2000/month=1/66414c74b1ab4c3cb155b440359b1705.parquet\n", + "/app/data/idx=2/year=2000/month=1/6f0e1508809f47efba9fe398311b711c.parquet\n", + "/app/data/idx=2/year=2000/month=1/724d5c288c834e34846ad8871a94ee10.parquet\n", + "/app/data/idx=2/year=2000/month=1/740e15b45d2745a997e81672fc58481e.parquet\n", + "/app/data/idx=2/year=2000/month=1/75d5db2fb8404493bd6f6ebbeee50e91.parquet\n", + "/app/data/idx=2/year=2000/month=1/765f6b9e1260430680f79e9c4b8de8a1.parquet\n", + "/app/data/idx=2/year=2000/month=1/7893a366f6fd4770ac34af71a74af552.parquet\n", + "/app/data/idx=2/year=2000/month=1/7d6b206a0cdc4c7baefb675350602e10.parquet\n", + "/app/data/idx=2/year=2000/month=1/7d7c7ec0eaf04cf386ce6d93c5107246.parquet\n", + "/app/data/idx=2/year=2000/month=1/7e5eb92603774185bce487436db2af8f.parquet\n", + "/app/data/idx=2/year=2000/month=1/7f393857790e43da9549ed4c69797d18.parquet\n", + "/app/data/idx=2/year=2000/month=1/7f72ff606a804972a50960d0efcebcae.parquet\n", + "/app/data/idx=2/year=2000/month=1/8415983fe0a549c89ea28b25db102138.parquet\n", + "/app/data/idx=2/year=2000/month=1/86cf478f40914946b5b86106be97f7d8.parquet\n", + "/app/data/idx=2/year=2000/month=1/86f1de6e862141be8bd612465486fd16.parquet\n", + "/app/data/idx=2/year=2000/month=1/895fb45b8f554034a79ebd9c8eff9cad.parquet\n", + "/app/data/idx=2/year=2000/month=1/896bad5a081440b582d71fbb5baa4998.parquet\n", + "/app/data/idx=2/year=2000/month=1/8c2163530eef4b7b9e22fc1d4d99d6d5.parquet\n", + "/app/data/idx=2/year=2000/month=1/8c3b5f112ddf48e1a165bcad69f7e548.parquet\n", + "/app/data/idx=2/year=2000/month=1/8f54037c274c424fa2e13e83afe6a983.parquet\n", + "/app/data/idx=2/year=2000/month=1/9267bc6aecba4d66952bc7778a97bbb0.parquet\n", + "/app/data/idx=2/year=2000/month=1/978623e40a264ecbb8e3e7afee4a9221.parquet\n", + "/app/data/idx=2/year=2000/month=1/9b501c10edd94539b8147571202e7dfe.parquet\n", + "/app/data/idx=2/year=2000/month=1/a2cbd94909a7409cb233cc388fcd53be.parquet\n", + "/app/data/idx=2/year=2000/month=1/a570b6d3b72d4c8090c4efcb2eeb2d70.parquet\n", + "/app/data/idx=2/year=2000/month=1/a88b8e956c104202a8f2d279c7e58741.parquet\n", + "/app/data/idx=2/year=2000/month=1/a97573410ce04706ac3d5c88f9cd285e.parquet\n", + "/app/data/idx=2/year=2000/month=1/a9c31f330c2d454a8911627eaafe7e31.parquet\n", + "/app/data/idx=2/year=2000/month=1/aa941bd2b9574ce294967019aa4cd515.parquet\n", + "/app/data/idx=2/year=2000/month=1/af86ac06c6f7484c8bbb8215a408ce73.parquet\n", + "/app/data/idx=2/year=2000/month=1/b35d48ff673541559bf27f4c3e1feab6.parquet\n", + "/app/data/idx=2/year=2000/month=1/b5b85036b2c540f9add4b86012873462.parquet\n", + "/app/data/idx=2/year=2000/month=1/b8b4abc89c824a17a263d898f4bca476.parquet\n", + "/app/data/idx=2/year=2000/month=1/bb6a1df466d84085bc0900641233cbc3.parquet\n", + "/app/data/idx=2/year=2000/month=1/bb95334225ce41768c1175ccabad174b.parquet\n", + "/app/data/idx=2/year=2000/month=1/bca9c21e480249eebb26aeed167b1293.parquet\n", + "/app/data/idx=2/year=2000/month=1/bf49382a8e024ffe9c17e4849ce4127f.parquet\n", + "/app/data/idx=2/year=2000/month=1/c06c38062a2b4e13b4e1ee1eaf03bfa2.parquet\n", + "/app/data/idx=2/year=2000/month=1/c1f40b6256444001af06dc2fb98f5e5c.parquet\n", + "/app/data/idx=2/year=2000/month=1/c4968d0cbcd54c83a0dd3e57039f0578.parquet\n", + "/app/data/idx=2/year=2000/month=1/c6afa57132184a71becf083d1b553473.parquet\n", + "/app/data/idx=2/year=2000/month=1/c87a24c747984bf58745b666dac98323.parquet\n", + "/app/data/idx=2/year=2000/month=1/cc34429087f54f7aaf1e84bc12517c26.parquet\n", + "/app/data/idx=2/year=2000/month=1/cc839cdd3fbe465abc78861a4cc11acf.parquet\n", + "/app/data/idx=2/year=2000/month=1/db6c45d7e8234bc1949ddd8973010d7f.parquet\n", + "/app/data/idx=2/year=2000/month=1/dbb0a2e2bdbc4319a07d04af0d9356fc.parquet\n", + "/app/data/idx=2/year=2000/month=1/dbde0aee2a4647939d6f027a99e37cc4.parquet\n", + "/app/data/idx=2/year=2000/month=1/ddd0738116b5496391991ad6d3e781b9.parquet\n", + "/app/data/idx=2/year=2000/month=1/e52fd781bd78475789d4160624a6e34a.parquet\n", + "/app/data/idx=2/year=2000/month=1/e9c5c04f931f4fd4b6afb51db34cda54.parquet\n", + "/app/data/idx=2/year=2000/month=1/eee841a6139a4fe19620045f04c2f908.parquet\n", + "/app/data/idx=2/year=2000/month=1/ef42e36ceb794730ac25dad68f73294d.parquet\n", + "/app/data/idx=2/year=2000/month=1/efe26f73b0494f828fcf2686b6874c71.parquet\n", + "/app/data/idx=2/year=2000/month=1/f15094f2f10748e59573fecb5435ecc4.parquet\n", + "/app/data/idx=2/year=2000/month=1/f1e37026291c41c5ae698956baa6bf39.parquet\n", + "/app/data/idx=2/year=2000/month=1/f1f56b07a73646e4a5219a2623b04489.parquet\n", + "/app/data/idx=2/year=2000/month=1/f25704c4b00a418c9fa2385f9018adc7.parquet\n", + "/app/data/idx=2/year=2000/month=1/f60540924a1641de9d64f66c1af980dd.parquet\n", + "/app/data/idx=2/year=2000/month=1/f62eada23e1d430dacb69eeff0d5ba59.parquet\n", + "/app/data/idx=2/year=2000/month=1/f9b43fe646ec4607baa500b1360a6e1c.parquet\n", + "/app/data/idx=2/year=2000/month=1/fc3a31bc82ba4f17a93a18138887d9d5.parquet\n", + "/app/data/idx=3/year=2000/month=1/00b291e6d0d2494a8652e6ffcf1746c5.parquet\n", + "/app/data/idx=3/year=2000/month=1/01b6882837054cc4801c6929a630abd7.parquet\n", + "/app/data/idx=3/year=2000/month=1/09ebeae420f348c28a365f607978aeda.parquet\n", + "/app/data/idx=3/year=2000/month=1/0c41010bec604c93b974e72fa35cc2c7.parquet\n", + "/app/data/idx=3/year=2000/month=1/0cb995ed168f4829a38db4f75d4ed14b.parquet\n", + "/app/data/idx=3/year=2000/month=1/0cf1a660ee984efcaabe1d1bb9263a9a.parquet\n", + "/app/data/idx=3/year=2000/month=1/0d0bbc2ee628424f8204240680f44389.parquet\n", + "/app/data/idx=3/year=2000/month=1/0f72553d38cb47f095fdf35e03507dd3.parquet\n", + "/app/data/idx=3/year=2000/month=1/0ff3e55ae9464e369302d1fb2abaec40.parquet\n", + "/app/data/idx=3/year=2000/month=1/1165cf18728c41edb7bb8a765ae7854d.parquet\n", + "/app/data/idx=3/year=2000/month=1/12a3b4dadd4f43389c269f4b736278c2.parquet\n", + "/app/data/idx=3/year=2000/month=1/1a204362f488461da026ee347c817e2e.parquet\n", + "/app/data/idx=3/year=2000/month=1/1c306421662241b48b85f24d033898fc.parquet\n", + "/app/data/idx=3/year=2000/month=1/22155eaaf5ce4e36bbb36b162dadae9e.parquet\n", + "/app/data/idx=3/year=2000/month=1/229cb1d3321f4660866b414f3a647fff.parquet\n", + "/app/data/idx=3/year=2000/month=1/280b6ca59e1f4312b872fd23d96ed6df.parquet\n", + "/app/data/idx=3/year=2000/month=1/2859c7dccfe54951a955941fa23a33b1.parquet\n", + "/app/data/idx=3/year=2000/month=1/2a17999c98294f38ac3e60af45779214.parquet\n", + "/app/data/idx=3/year=2000/month=1/2e3b411a5a3a48aba5e52053e54dbe9f.parquet\n", + "/app/data/idx=3/year=2000/month=1/2eb295d22ddd4ca9801d7b0a6a950261.parquet\n", + "/app/data/idx=3/year=2000/month=1/313a5fc7ea2c49009cd68f31ce030eb3.parquet\n", + "/app/data/idx=3/year=2000/month=1/319b8c873aba46d9a39aaed1d7ade697.parquet\n", + "/app/data/idx=3/year=2000/month=1/36c17affd08e450ba034d29818f6c94f.parquet\n", + "/app/data/idx=3/year=2000/month=1/37170fb9855d47f0871cbf1b3c4a5763.parquet\n", + "/app/data/idx=3/year=2000/month=1/3772fba9cef64744a8aa5ad999a1d48d.parquet\n", + "/app/data/idx=3/year=2000/month=1/3d68d10aee3b46e9ab4c2341f395e9f8.parquet\n", + "/app/data/idx=3/year=2000/month=1/3da7295cc0ee4953aad41cddb746c0ec.parquet\n", + "/app/data/idx=3/year=2000/month=1/401a2d5e38ee4581ac5950131e7739ed.parquet\n", + "/app/data/idx=3/year=2000/month=1/40bb809ba5824fa48218e2543e1317d8.parquet\n", + "/app/data/idx=3/year=2000/month=1/42c11bbbec28471d818c4eda7ffa0316.parquet\n", + "/app/data/idx=3/year=2000/month=1/430d92d720ef40aca2043cdd9a4216a7.parquet\n", + "/app/data/idx=3/year=2000/month=1/4344d9475f474d4289c16c14e3d76205.parquet\n", + "/app/data/idx=3/year=2000/month=1/4965043c1c58485fb9a81ca502c9704c.parquet\n", + "/app/data/idx=3/year=2000/month=1/4c954d56c1f040f8adcb92a116fc3e4a.parquet\n", + "/app/data/idx=3/year=2000/month=1/4cb7c012e50c4e45988d6c73f931babf.parquet\n", + "/app/data/idx=3/year=2000/month=1/4d11aa2de91047638fd1fbb49180b828.parquet\n", + "/app/data/idx=3/year=2000/month=1/59de1ba8fd7b41d7819849137f7b9817.parquet\n", + "/app/data/idx=3/year=2000/month=1/5a31ef5acc2340b7a575b1d77e9e9917.parquet\n", + "/app/data/idx=3/year=2000/month=1/5b14185275384ee5ae5839b6d69c714e.parquet\n", + "/app/data/idx=3/year=2000/month=1/5b35b2943a7c476aa5dc3a2af08f13fe.parquet\n", + "/app/data/idx=3/year=2000/month=1/5e6bb9eceb2d4a4ebddd39e06db86d67.parquet\n", + "/app/data/idx=3/year=2000/month=1/5f8372dbc36a4681bdebfaa9f3328eec.parquet\n", + "/app/data/idx=3/year=2000/month=1/6317cb7958d2459595a28bdca41f42d5.parquet\n", + "/app/data/idx=3/year=2000/month=1/67ba93ec02b44b0593c0ff37aa3db5b7.parquet\n", + "/app/data/idx=3/year=2000/month=1/69be17b95a9046c2a4553f5c077f5fff.parquet\n", + "/app/data/idx=3/year=2000/month=1/6ac05cada45b48b89ec15b0f76df21ac.parquet\n", + "/app/data/idx=3/year=2000/month=1/6ce38fe0d6a54853a757745eb148960a.parquet\n", + "/app/data/idx=3/year=2000/month=1/7000686e11b34200ae44dfe294dc8c8e.parquet\n", + "/app/data/idx=3/year=2000/month=1/70f44eb7513c4100aa2cd5779e3c5d67.parquet\n", + "/app/data/idx=3/year=2000/month=1/7421bdc2222640b38ada8d94e10e5865.parquet\n", + "/app/data/idx=3/year=2000/month=1/78f4a6251bb7423e800ada3444bb54c1.parquet\n", + "/app/data/idx=3/year=2000/month=1/874eb82772844f269bc5360ef1971245.parquet\n", + "/app/data/idx=3/year=2000/month=1/87baf01b30ce467ca976e26ad5bec1e2.parquet\n", + "/app/data/idx=3/year=2000/month=1/8a31ab99c92a4a8b829f37561cc99956.parquet\n", + "/app/data/idx=3/year=2000/month=1/8aa9003415c649288a13560a1352805b.parquet\n", + "/app/data/idx=3/year=2000/month=1/8ae3a6e6214f4816b469f09b01c2e955.parquet\n", + "/app/data/idx=3/year=2000/month=1/8ff02b303fca4f86a129197874e8e6fe.parquet\n", + "/app/data/idx=3/year=2000/month=1/94c27fe8b6084f7b8606cef710bab753.parquet\n", + "/app/data/idx=3/year=2000/month=1/94c4de33006f424e8cb424accfad8a2c.parquet\n", + "/app/data/idx=3/year=2000/month=1/9c9b600151fb47e5a073e51a735e1537.parquet\n", + "/app/data/idx=3/year=2000/month=1/9e59161660e140209e94cab5f7ea5098.parquet\n", + "/app/data/idx=3/year=2000/month=1/9fadcdc1ab7a4b9783128af7b744d705.parquet\n", + "/app/data/idx=3/year=2000/month=1/9fd3848ab9c54869b34c3a5d8e79be9a.parquet\n", + "/app/data/idx=3/year=2000/month=1/a2c45c983d5b469997c55c4e2ad72427.parquet\n", + "/app/data/idx=3/year=2000/month=1/a3f1f0a5cca84c4eaa7f2a1bef1f88b0.parquet\n", + "/app/data/idx=3/year=2000/month=1/a43049d78c9341668d77a63fc3b4d57f.parquet\n", + "/app/data/idx=3/year=2000/month=1/aa89184d32ca40c28f44109c97cee774.parquet\n", + "/app/data/idx=3/year=2000/month=1/ab3cf71e9caa44ec90adc43a56867162.parquet\n", + "/app/data/idx=3/year=2000/month=1/acab0d093d9a4bca854719e790512a25.parquet\n", + "/app/data/idx=3/year=2000/month=1/acf77747edbf4df5b457cfc8a77e0dc0.parquet\n", + "/app/data/idx=3/year=2000/month=1/b5672b45b393472986217241b378742f.parquet\n", + "/app/data/idx=3/year=2000/month=1/b7fd4df9bc9440ff94d713a7e43959d2.parquet\n", + "/app/data/idx=3/year=2000/month=1/b81af51b094e457faa6c786d1fffc470.parquet\n", + "/app/data/idx=3/year=2000/month=1/bbedc33b622c46b7af6af9c62e139163.parquet\n", + "/app/data/idx=3/year=2000/month=1/befaac43d5fa49f0a118ffaac6b5c4d3.parquet\n", + "/app/data/idx=3/year=2000/month=1/c0a4a83a65d94f2281b2039cac0e2c9e.parquet\n", + "/app/data/idx=3/year=2000/month=1/c4f44bc2181f45a3866cc232d80f2e46.parquet\n", + "/app/data/idx=3/year=2000/month=1/c63bff60ba67488d8ce536aa47774b53.parquet\n", + "/app/data/idx=3/year=2000/month=1/c74c114cc7e34985aeb20e14c2b26f3c.parquet\n", + "/app/data/idx=3/year=2000/month=1/c7eb09b4b0cf44eab86d88f11d00c222.parquet\n", + "/app/data/idx=3/year=2000/month=1/ce3160350479478da1a327405dc4cbe8.parquet\n", + "/app/data/idx=3/year=2000/month=1/cfb6a5a4bdbb4bb0a6afa699aa2e100a.parquet\n", + "/app/data/idx=3/year=2000/month=1/d137ffa9eeeb418491e792c7871334c6.parquet\n", + "/app/data/idx=3/year=2000/month=1/d43ffbf42b694713ae6e4b1e408529f9.parquet\n", + "/app/data/idx=3/year=2000/month=1/d7f91f13f3444032995bc7c6c0bcd1cd.parquet\n", + "/app/data/idx=3/year=2000/month=1/d9aa67eaa7f144fc8613ce81bd072167.parquet\n", + "/app/data/idx=3/year=2000/month=1/ddeb24d5cdb043f380654ff98d83adc9.parquet\n", + "/app/data/idx=3/year=2000/month=1/e1e0b2ae05154f459914dad148a7779f.parquet\n", + "/app/data/idx=3/year=2000/month=1/e74ecdc304164cd8b953c808a1353bfd.parquet\n", + "/app/data/idx=3/year=2000/month=1/e7eb8d26146c423eaa1a77343d16920b.parquet\n", + "/app/data/idx=3/year=2000/month=1/e937a5e6dd0241c1a50b24a1c9b4ea7a.parquet\n", + "/app/data/idx=3/year=2000/month=1/e94bfddc06704799a2699d3a90d9843b.parquet\n", + "/app/data/idx=3/year=2000/month=1/f08328e844ab486ca07eda98bf1ca9ba.parquet\n", + "/app/data/idx=3/year=2000/month=1/f154e97e55b0428185553c4acb9ce227.parquet\n", + "/app/data/idx=3/year=2000/month=1/f38d8f30947f4bd08fb1c10bc81d8ee7.parquet\n", + "/app/data/idx=3/year=2000/month=1/f3fb7ade438a4929aba0109858f4abe4.parquet\n", + "/app/data/idx=3/year=2000/month=1/f458cf905d5845f1ac64183bba7a4826.parquet\n", + "/app/data/idx=3/year=2000/month=1/f6ce7accff3e4eb8b601078583655865.parquet\n", + "/app/data/idx=3/year=2000/month=1/f7d729c528904fd182207989fef04050.parquet\n", + "/app/data/idx=3/year=2000/month=1/f9d5734d70c542a3bf5ba9e004cb2e95.parquet\n", + "/app/data/idx=3/year=2000/month=1/fbeb2f31e5784074a90d737fb8c4e047.parquet\n", + "/app/data/idx=3/year=2000/month=1/fc1fb4ad31c448eeb8724a3069e760f0.parquet\n", + "/app/data/idx=3/year=2000/month=1/fde846fa6d8649c9b1770638786fb18c.parquet\n", + "/app/data/idx=3/year=2000/month=1/fe9c940d68fd4759a90408a1245022a6.parquet\n", + "/app/data/idx=3/year=2000/month=1/ffebea86d7fe4a64a973415ab3b6eccf.parquet\n", + "/app/data/idx=4/year=2000/month=1/01a585864dc644b6a4a7b13ae97c1f85.parquet\n", + "/app/data/idx=4/year=2000/month=1/0251c252cf544dc49285c7e4fcbf9784.parquet\n", + "/app/data/idx=4/year=2000/month=1/026b7ed2f32a4a4d9b1fe4bf2e2c45ce.parquet\n", + "/app/data/idx=4/year=2000/month=1/03343bb5f29d42f19ce58caddb755df7.parquet\n", + "/app/data/idx=4/year=2000/month=1/04f9e581b08c424595f85fa85f87cb2c.parquet\n", + "/app/data/idx=4/year=2000/month=1/05751ecfd2734eedb17546ca81f8344a.parquet\n", + "/app/data/idx=4/year=2000/month=1/05f08cd7531f42a792e243c617b344f1.parquet\n", + "/app/data/idx=4/year=2000/month=1/061bd006ae35412eb8e5b758c50102c4.parquet\n", + "/app/data/idx=4/year=2000/month=1/06ba2d68586e4088921c99eddd5a5d86.parquet\n", + "/app/data/idx=4/year=2000/month=1/06df2daa4186437791d71a6b8e23519d.parquet\n", + "/app/data/idx=4/year=2000/month=1/07369c0250b5496bbac305aa1909eaa1.parquet\n", + "/app/data/idx=4/year=2000/month=1/0beb1321d8304074994a90b3a7eb94c5.parquet\n", + "/app/data/idx=4/year=2000/month=1/0f0e0602ffe5408a82d5265b2dc5ec18.parquet\n", + "/app/data/idx=4/year=2000/month=1/0fc5d753f2184cb0868ae28fc84c227e.parquet\n", + "/app/data/idx=4/year=2000/month=1/135fcc4c1e5a4823ae050c1e89fa413c.parquet\n", + "/app/data/idx=4/year=2000/month=1/156b561654924ad1b111bd5c965a46c2.parquet\n", + "/app/data/idx=4/year=2000/month=1/168d6922b1824cedb14d5654d75ba284.parquet\n", + "/app/data/idx=4/year=2000/month=1/1827f11f108341ccb48a0bb6ab694a64.parquet\n", + "/app/data/idx=4/year=2000/month=1/18e1c91f8c724d30a77bdd47e665c571.parquet\n", + "/app/data/idx=4/year=2000/month=1/19016c157bce43e394b117e8e0ed2557.parquet\n", + "/app/data/idx=4/year=2000/month=1/1a2c4e9d435f4c5faf83efbbb559118b.parquet\n", + "/app/data/idx=4/year=2000/month=1/1d3c8ecb9804470c87bfd7c25a3dab28.parquet\n", + "/app/data/idx=4/year=2000/month=1/1ee5f78eb54548278ae0a857c616e84c.parquet\n", + "/app/data/idx=4/year=2000/month=1/1ff311b87ba74e998ff7a5267ba52832.parquet\n", + "/app/data/idx=4/year=2000/month=1/285e2e6ef8c34d45b73916b4bfe1a2bf.parquet\n", + "/app/data/idx=4/year=2000/month=1/288d2d389b1e4a7695454e12fc442592.parquet\n", + "/app/data/idx=4/year=2000/month=1/2bfb7829ce324e1bb182159d8a6e7966.parquet\n", + "/app/data/idx=4/year=2000/month=1/2cb8084772654371bc4aab66bab3d5fc.parquet\n", + "/app/data/idx=4/year=2000/month=1/2d0d1ce706fe41feadf69279c0290101.parquet\n", + "/app/data/idx=4/year=2000/month=1/2d4a13244f154d278d237535e957d174.parquet\n", + "/app/data/idx=4/year=2000/month=1/2d73ffb2b7314b48b25c924dad691fa1.parquet\n", + "/app/data/idx=4/year=2000/month=1/30c6048fdac04824831e0a984445c238.parquet\n", + "/app/data/idx=4/year=2000/month=1/3b3610138fd84568b3f6b20ccce2b296.parquet\n", + "/app/data/idx=4/year=2000/month=1/3d33b2adeb0c406aafda7296398833d2.parquet\n", + "/app/data/idx=4/year=2000/month=1/3fb3450af6ed4ddc996b10c7316018af.parquet\n", + "/app/data/idx=4/year=2000/month=1/4384e6f19b984984a0e583891fab8200.parquet\n", + "/app/data/idx=4/year=2000/month=1/4499b3a4074d42ad87a6a74f031bad48.parquet\n", + "/app/data/idx=4/year=2000/month=1/578cd70733f54818812b7fee342f7922.parquet\n", + "/app/data/idx=4/year=2000/month=1/5a42dc9b52a845b394f570bc7e233637.parquet\n", + "/app/data/idx=4/year=2000/month=1/5e3f996936cd466c8f182e4925b457b9.parquet\n", + "/app/data/idx=4/year=2000/month=1/6171f6c076d442ce9ee9b2223a1c9e29.parquet\n", + "/app/data/idx=4/year=2000/month=1/637fabc040bd4139901780de2f98df24.parquet\n", + "/app/data/idx=4/year=2000/month=1/649b57f24c1c49e7aa025d1a111f31a6.parquet\n", + "/app/data/idx=4/year=2000/month=1/65dac4a30aba4d3e9a18e731bef42800.parquet\n", + "/app/data/idx=4/year=2000/month=1/6759ad29fa9a416498d408a97082da2d.parquet\n", + "/app/data/idx=4/year=2000/month=1/682c068895b54404aa02c22ec59d98d7.parquet\n", + "/app/data/idx=4/year=2000/month=1/6a2c44eebd7c447ab0eac8b5596612ce.parquet\n", + "/app/data/idx=4/year=2000/month=1/6c36185edd4a41bc8869406a3bc9b533.parquet\n", + "/app/data/idx=4/year=2000/month=1/6ce5ab2e0fce43c9be58cd6ca0ab1b0c.parquet\n", + "/app/data/idx=4/year=2000/month=1/6e7dfa62c7ab4743bd5b47c2d65fcd3f.parquet\n", + "/app/data/idx=4/year=2000/month=1/7137092484b641e3a41226810acbe2b7.parquet\n", + "/app/data/idx=4/year=2000/month=1/727c5b50be444555bb0c8cb3493f136c.parquet\n", + "/app/data/idx=4/year=2000/month=1/761ed2925727400586b3f95bebe32b12.parquet\n", + "/app/data/idx=4/year=2000/month=1/779bc731cfdc4eb582b7d45275f45f7d.parquet\n", + "/app/data/idx=4/year=2000/month=1/79ee8145c2814549a38530b2c506544e.parquet\n", + "/app/data/idx=4/year=2000/month=1/7b59b765fa454ce0a9fbd88628d6f604.parquet\n", + "/app/data/idx=4/year=2000/month=1/8292f989424444f6aa18bbcfc68f1734.parquet\n", + "/app/data/idx=4/year=2000/month=1/83b3730a855b494487dd6728a517ee3b.parquet\n", + "/app/data/idx=4/year=2000/month=1/84ebbce76a7a4107b939b685da66b5f4.parquet\n", + "/app/data/idx=4/year=2000/month=1/855921d6f64644a38bd2be5d9669fe0a.parquet\n", + "/app/data/idx=4/year=2000/month=1/85ab57ea6d0e48efac390b6047a6f435.parquet\n", + "/app/data/idx=4/year=2000/month=1/8b974e69e33e41cdb5bde25a6a422fd6.parquet\n", + "/app/data/idx=4/year=2000/month=1/8dd5278b54e9413ebd42286dea00c4a3.parquet\n", + "/app/data/idx=4/year=2000/month=1/93128598152643a297db72dec38a07b5.parquet\n", + "/app/data/idx=4/year=2000/month=1/95448aeaacdc40fe97d207b2c80ca784.parquet\n", + "/app/data/idx=4/year=2000/month=1/9571568631184e1386c3528b8ce9ed26.parquet\n", + "/app/data/idx=4/year=2000/month=1/95f8e19b3af344db98dcc5c5f9546c3a.parquet\n", + "/app/data/idx=4/year=2000/month=1/9921911b40d041f6ac72c4d44578c5cf.parquet\n", + "/app/data/idx=4/year=2000/month=1/9b4d80c840c14d3b9c67da4c9877b628.parquet\n", + "/app/data/idx=4/year=2000/month=1/9e259ea36fbb4c0ba9b6535a3f34544e.parquet\n", + "/app/data/idx=4/year=2000/month=1/9e3706d0ded44106bf8e0dee8900cd28.parquet\n", + "/app/data/idx=4/year=2000/month=1/9e67c71850a54fe1aa354c43d2cd9c38.parquet\n", + "/app/data/idx=4/year=2000/month=1/9ef278bcdc3b41e89059c309bcbb005e.parquet\n", + "/app/data/idx=4/year=2000/month=1/a33e6304bb1b47daa86853f19b009366.parquet\n", + "/app/data/idx=4/year=2000/month=1/a9bc812dc596492eafcc73f01d0e53a3.parquet\n", + "/app/data/idx=4/year=2000/month=1/abf0e405806744df9ea3e9908eb0451f.parquet\n", + "/app/data/idx=4/year=2000/month=1/acd468d1addc4d75944766e48c3eb324.parquet\n", + "/app/data/idx=4/year=2000/month=1/adbdc6fec62c463aa94e0ce707ae1768.parquet\n", + "/app/data/idx=4/year=2000/month=1/b24807369dfc461e92eb8a56a7931070.parquet\n", + "/app/data/idx=4/year=2000/month=1/b2f3d43c99f44131969e0fcf27cfbf3c.parquet\n", + "/app/data/idx=4/year=2000/month=1/b461c7cfd0f4483f8309f670f4f4265d.parquet\n", + "/app/data/idx=4/year=2000/month=1/b584791f45f74432a067632281285b9a.parquet\n", + "/app/data/idx=4/year=2000/month=1/b7ca6973a34c4f92831f16216beb33f2.parquet\n", + "/app/data/idx=4/year=2000/month=1/b7f16808e8e4491e8f86d3ae9766f2b9.parquet\n", + "/app/data/idx=4/year=2000/month=1/b9a2d05a74a84d71a1b65a0f05895011.parquet\n", + "/app/data/idx=4/year=2000/month=1/bc3036cc653e4584893f8b36e33c8f85.parquet\n", + "/app/data/idx=4/year=2000/month=1/be1318c7564d48be8435c11344627932.parquet\n", + "/app/data/idx=4/year=2000/month=1/bf90009dc7b14cfaab939f435d975a0b.parquet\n", + "/app/data/idx=4/year=2000/month=1/c0105d7e54fc42dc93d5140782960815.parquet\n", + "/app/data/idx=4/year=2000/month=1/c450cbe2674e488d8e30953252bc7a4b.parquet\n", + "/app/data/idx=4/year=2000/month=1/c82d37b18d65434ca1fe1b9cf4d29ccb.parquet\n", + "/app/data/idx=4/year=2000/month=1/cb53085f9145493b9a171d31b682e75f.parquet\n", + "/app/data/idx=4/year=2000/month=1/cc14bf7a74c9498889bc52e29f83edff.parquet\n", + "/app/data/idx=4/year=2000/month=1/ce3c90dd7e7a4f5a862580c14aa22c28.parquet\n", + "/app/data/idx=4/year=2000/month=1/d47149e3e1e34123a48f623ca121e8a8.parquet\n", + "/app/data/idx=4/year=2000/month=1/d9e0e3e786a942f5892c6ce17b37eb4a.parquet\n", + "/app/data/idx=4/year=2000/month=1/dbb82450694e4e76ab34f3e650d36594.parquet\n", + "/app/data/idx=4/year=2000/month=1/dc67b56f0c814648b9ebf8e1c483b923.parquet\n", + "/app/data/idx=4/year=2000/month=1/de2e16496bcd405b8d48aec4da4d5ae4.parquet\n", + "/app/data/idx=4/year=2000/month=1/de37409ac14b49c38c9c0da26d6c721f.parquet\n", + "/app/data/idx=4/year=2000/month=1/e8072594944141a5b078b74e739307d3.parquet\n", + "/app/data/idx=4/year=2000/month=1/e9b36b985eb44b44a5436af438f7ceb0.parquet\n", + "/app/data/idx=4/year=2000/month=1/eefdd2cdde1d4085964d1469a11f462c.parquet\n", + "/app/data/idx=4/year=2000/month=1/f89525bf20e540f29b021ce5f4d9eb3c.parquet\n", + "/app/data/idx=4/year=2000/month=1/fab7e098a4c8489785225a74b71ec2ef.parquet\n" + ] + } + ], + "source": [ + "dir_name = \"/app/data\"\n", + "\n", + "# Read data back.\n", + "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "print(\"\\n\".join(dataset.files))" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "ba4d7dc4", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:17:50.865185Z", + "start_time": "2021-06-16T11:17:50.378460Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# df=\n", + " instr val1 val2 idx year month\n", + "2000-01-09 00:00:00-05:00 A 99 54 0 2000 1\n", + "2000-01-13 09:30:00-05:00 A 99 62 0 2000 1\n", + "2000-01-13 09:35:00-05:00 A 54 76 0 2000 1\n", + "# df.shape=\n", + "(18075, 6)\n", + "# df.dtypes=\n", + "instr object\n", + "val1 int64\n", + "val2 int64\n", + "idx int32\n", + "year int32\n", + "month int32\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Read everything.\n", + "df2 = dataset.to_table().to_pandas()\n", + "\n", + "print(df_to_str(df2))" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "68e84388", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:05:43.018220Z", + "start_time": "2021-06-16T11:05:43.007510Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['A' 'B' 'C' 'D' 'E']\n", + "DatetimeIndex(['2000-01-06 00:00:00-05:00', '2000-01-10 00:00:00-05:00',\n", + " '2000-01-01 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", + " '2000-01-08 00:00:00-05:00', '2000-01-12 00:00:00-05:00',\n", + " '2000-01-09 00:00:00-05:00', '2000-01-02 00:00:00-05:00',\n", + " '2000-01-14 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", + " '2000-01-07 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", + " '2000-01-15 00:00:00-05:00', '2000-01-05 00:00:00-05:00',\n", + " '2000-01-11 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", + " '2000-01-05 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", + " '2000-01-02 00:00:00-05:00', '2000-01-14 00:00:00-05:00',\n", + " '2000-01-12 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", + " '2000-01-13 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", + " '2000-01-07 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", + " '2000-01-08 00:00:00-05:00', '2000-01-10 00:00:00-05:00',\n", + " '2000-01-11 00:00:00-05:00', '2000-01-09 00:00:00-05:00',\n", + " '2000-01-02 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", + " '2000-01-05 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", + " '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", + " '2000-01-10 00:00:00-05:00', '2000-01-11 00:00:00-05:00',\n", + " '2000-01-14 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", + " '2000-01-07 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", + " '2000-01-08 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", + " '2000-01-09 00:00:00-05:00', '2000-01-08 00:00:00-05:00',\n", + " '2000-01-14 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", + " '2000-01-03 00:00:00-05:00', '2000-01-02 00:00:00-05:00',\n", + " '2000-01-04 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", + " '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", + " '2000-01-10 00:00:00-05:00', '2000-01-07 00:00:00-05:00',\n", + " '2000-01-05 00:00:00-05:00', '2000-01-11 00:00:00-05:00',\n", + " '2000-01-09 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", + " '2000-01-11 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", + " '2000-01-14 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", + " '2000-01-10 00:00:00-05:00', '2000-01-09 00:00:00-05:00',\n", + " '2000-01-12 00:00:00-05:00', '2000-01-07 00:00:00-05:00',\n", + " '2000-01-06 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", + " '2000-01-01 00:00:00-05:00', '2000-01-08 00:00:00-05:00',\n", + " '2000-01-02 00:00:00-05:00', '2000-01-05 00:00:00-05:00',\n", + " '2000-01-15 00:00:00-05:00'],\n", + " dtype='datetime64[ns, America/New_York]', freq=None)\n" + ] + } + ], + "source": [ + "print(df2[\"instr\"].unique())\n", + "print(df2.index)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "205.6px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py new file mode 100644 index 000000000..d7d5f9e56 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py @@ -0,0 +1,304 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# Show Parquet / Pyarrow API. + +# %% [markdown] +# ## Imports + +# %% +import logging +import os +import random + +import pandas as pd +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.parquet as pq +from pyarrow.dataset import DirectoryPartitioning + +import helpers.hdbg as hdbg +import helpers.hio as hio + +hdbg.init_logger(verbosity=logging.INFO) +_LOG = logging.getLogger(__name__) + + +# %% +def get_df() -> pd.DataFrame: + """ + Create pandas random data, like: + + ``` + idx instr val1 val2 + 2000-01-01 0 A 99 30 + 2000-01-02 0 A 54 46 + 2000-01-03 0 A 85 86 + ``` + """ + instruments = "A B C D E".split() + "id stock val1 val2".split() + df_idx = pd.date_range( + pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-15"), freq="1D" + ) + # print(df_idx) + random.seed(1000) + + df = [] + for idx, inst in enumerate(instruments): + df_tmp = pd.DataFrame( + { + "idx": idx, + "instr": inst, + "val1": [random.randint(0, 100) for k in range(len(df_idx))], + "val2": [random.randint(0, 100) for k in range(len(df_idx))], + }, + index=df_idx, + ) + # print(df_tmp) + df.append(df_tmp) + df = pd.concat(df) + return df + + +# %% +def df_to_str(df: pd.DataFrame) -> str: + txt = "" + txt += "# df=\n%s" % df.head(3) + txt += "\n# df.shape=\n%s" % str(df.shape) + txt += "\n# df.dtypes=\n%s" % str(df.dtypes) + return txt + + +# %% [markdown] +# # Save and load all data in one file + +# %% +df = get_df() +# print(df.head()) +print(df_to_str(df)) + +# %% +table = pa.Table.from_pandas(df) + +print("table=\n%s" % table) + +# %% +# Save. +file_name = "df_in_one_file.pq" +pq.write_table(table, file_name) + +# %% +# Load. +df2 = pq.read_table(file_name) +print(df2) + +df2 = df2.to_pandas() +print(df_to_str(df2)) + +# %% [markdown] +# ## Read a subset of columns + +# %% +df2 = pq.read_table(file_name, columns=["idx", "val1"]) +print(df2) + +df2 = df2.to_pandas() +print(df_to_str(df2)) + +# %% [markdown] +# ## Partitioned dataset +# +# from https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data +# +# - A dataset can exploit a nested structure, where the sub-dir names hold information about which subset of the data is stored in that dir +# - E.g., "Hive" patitioning scheme "key=vale" dir names + +# %% +df = get_df() +print(df_to_str(df)) + +# %% +base = "." +dir_name = os.path.join(base, "parquet_dataset_partitioned") +os.system("rm -rf %s" % dir_name) + +pq.write_to_dataset(table, dir_name, partition_cols=["idx"]) + +# %% +# !ls parquet_dataset_partitioned + +# %% +# Read data back. +dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") + +print("\n".join(dataset.files)) + +# %% +# Read everything. +df2 = dataset.to_table().to_pandas() + +print(df_to_str(df2)) + +# %% +# Load part of the data. + +df2 = dataset.to_table(filter=ds.field("idx") == 1).to_pandas() +print(df_to_str(df2)) + +df2 = dataset.to_table(filter=ds.field("idx") < 3).to_pandas() +print(df_to_str(df2)) + +# %% [markdown] +# ## Add year-month partitions + +# %% +df = get_df() +df["year"] = df.index.year +df["month"] = df.index.month + +print(df_to_str(df)) + +# %% +table = pa.Table.from_pandas(df) + +print("table=\n%s" % table) + +# %% +base = "." +dir_name = os.path.join(base, "pq_partitioned2") +os.system("rm -rf %s" % dir_name) + +pq.write_to_dataset(table, dir_name, partition_cols=["idx", "year", "month"]) + +# %% +# !ls $dir_name + +# %% +# !ls $dir_name/idx=0/year=2000/month=1 + +# %% +# Read data back. +dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") + +print("\n".join(dataset.files)) + +# %% +# Read data back. +dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") + +df2 = dataset.to_table(filter=ds.field("idx") == 2).to_pandas() +print(df_to_str(df2)) + +# %% +# We could scan manually and create the dirs manually if we don't want to add +# add a new dir. +base = "." +dir_name = os.path.join(base, "parquet_dataset_partitioned2") +os.system("rm -rf %s" % dir_name) + +schemas = [] + +schema = pa.Table.from_pandas(df).schema +print(schema) +# assert 0 +# idx: int64 +# instr: string +# val1: int64 +# val2: int64 +# year: int64 +# month: int64 + +# grouped = df.groupby(lambda x: x.day) +group_by_idx = df.groupby("idx") +for idx, df_tmp in group_by_idx: + _LOG.debug("idx=%s -> df.shape=%s", idx, str(df_tmp.shape)) + # + group_by_year = df_tmp.groupby(lambda x: x.year) + for year, df_tmp2 in group_by_year: + _LOG.debug("year=%s -> df.shape=%s", year, str(df_tmp2.shape)) + # + group_by_month = df_tmp2.groupby(lambda x: x.month) + for month, df_tmp3 in group_by_month: + _LOG.debug("month=%s -> df.shape=%s", month, str(df_tmp3.shape)) + # file_name = "df_in_one_file.pq" + # pq.write_table(table, file_name) + # /app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet + subdir_name = os.path.join( + dir_name, f"idx={idx}", f"year={year}", f"month={month}" + ) + table = pa.Table.from_pandas(df_tmp3, schema=schema) + schemas.append(table.schema) + # print(df_tmp3) + # print(table.schema) + # pq.write_to_dataset(table, + # subdir_name, schema=schema) + file_name = os.path.join(subdir_name, "df_out.pq") + hio.create_enclosing_dir(file_name) + pq.write_table(table, file_name) + +# %% +schemas[0] == schemas[4] + +# %% +schemas + +# %% + +# %% +# !ls $dir_name/idx=0/year=2000/month=1 + +# %% +# Read data back. +# https://github.com/dask/dask/issues/4194 +# src_dir = f"{dir_name}/idx=0/year=2000/month=1" +src_dir = f"{dir_name}/idx=0/year=2000" +dataset = ds.dataset(src_dir, format="parquet", partitioning="hive") + +df2 = dataset.to_table().to_pandas() +# print(df_to_str(df2)) +print("\n".join(dataset.files)) + +# %% [markdown] +# ## Partition manually + +# %% +partitioning = DirectoryPartitioning( + pa.schema([("year", pa.int16()), ("month", pa.int8()), ("day", pa.int8())]) +) +print(partitioning.parse("/2009/11/3")) + +# partitioning.discover() + +# %% +# !ls /app/data + +# %% +dir_name = "/app/data" + +# Read data back. +dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") + +print("\n".join(dataset.files)) + +# %% +# Read everything. +df2 = dataset.to_table().to_pandas() + +print(df_to_str(df2)) + +# %% +print(df2["instr"].unique()) +print(df2.index) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb new file mode 100644 index 000000000..6dcf8078c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "81a273af", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:42.864614Z", + "start_time": "2021-06-16T11:41:42.860710Z" + } + }, + "outputs": [], + "source": [ + "# https://s3fs.readthedocs.io/en/latest/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8fef0639", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:42.888158Z", + "start_time": "2021-06-16T11:41:42.869135Z" + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 3" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "37fe11a3", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.140014Z", + "start_time": "2021-06-16T11:41:42.890655Z" + } + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import helpers.hs3 as hs3" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a4130a2c", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.145271Z", + "start_time": "2021-06-16T11:41:43.141535Z" + } + }, + "outputs": [], + "source": [ + "aws_profile = \"am\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a49a28ff", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.158474Z", + "start_time": "2021-06-16T11:41:43.148428Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "aws_region=%s us-east-1\n" + ] + } + ], + "source": [ + "# s3 = s3fs.S3FileSystem(anon=False, key=aws_access_key_id, secret=aws_secret_access_key)\n", + "\n", + "s3 = hs3.get_s3fs(aws_profile)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1795133f", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.819759Z", + "start_time": "2021-06-16T11:41:43.160432Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bucket=alphamatic-data\n" + ] + }, + { + "data": { + "text/plain": [ + "['alphamatic-data/README.md', 'alphamatic-data/data']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bucket = hs3.get_s3_bucket_path(aws_profile, add_s3_prefix=False)\n", + "print(\"bucket=\" + bucket)\n", + "s3.ls(bucket)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9bc9623e", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.828493Z", + "start_time": "2021-06-16T11:41:43.822315Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['alphamatic-data/README.md', 'alphamatic-data/data']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s3.ls(bucket)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "65f95a8a", + "metadata": { + "ExecuteTime": { + "end_time": "2021-06-16T11:41:43.839153Z", + "start_time": "2021-06-16T11:41:43.832520Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(os.environ[\"AWS_DEFAULT_REGION\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py new file mode 100644 index 000000000..65aa9d9f8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py @@ -0,0 +1,44 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +# %% +# https://s3fs.readthedocs.io/en/latest/ + +# %% +# %load_ext autoreload +# %autoreload 3 + +# %% +import os + +import helpers.hs3 as hs3 + +# %% +aws_profile = "am" + +# %% +# s3 = s3fs.S3FileSystem(anon=False, key=aws_access_key_id, secret=aws_secret_access_key) + +s3 = hs3.get_s3fs(aws_profile) + +# %% +bucket = hs3.get_s3_bucket_path(aws_profile, add_s3_prefix=False) +print("bucket=" + bucket) +s3.ls(bucket) + +# %% +s3.ls(bucket) + +# %% +print(os.environ["AWS_DEFAULT_REGION"]) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb new file mode 100644 index 000000000..9f3df144d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb @@ -0,0 +1,448 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ae351077", + "metadata": {}, + "source": [ + "# Maple\n", + "\n", + "https://www.sagemath.org/" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "67b105e6", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T08:59:38.281663Z", + "start_time": "2022-11-24T08:59:32.166395Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting sagemath\n", + " Downloading sagemath-1.3.0.tar.gz (9.4 kB)\n", + "Collecting cython>=0.26\n", + " Downloading Cython-0.29.32-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (1.9 MB)\n", + "\u001b[K |████████████████████████████████| 1.9 MB 3.2 MB/s eta 0:00:01\n", + "\u001b[?25hBuilding wheels for collected packages: sagemath\n", + " Building wheel for sagemath (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for sagemath: filename=sagemath-1.3.0-py3-none-any.whl size=9330 sha256=eb8efd936116026e66a021d4bdd88dc4d9ce207fd633706229625d26878de267\n", + " Stored in directory: /root/.cache/pip/wheels/da/63/1f/6dc0b464e0fec31a0d318d11748e11be903fe893fd6fb713fe\n", + "Successfully built sagemath\n", + "Installing collected packages: cython, sagemath\n", + "Successfully installed cython-0.29.32 sagemath-1.3.0\n" + ] + } + ], + "source": [ + "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install sagemath)\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70f1c613", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "8dd49c0c", + "metadata": {}, + "source": [ + "# Sympy" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bab397f4", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T08:59:31.082906Z", + "start_time": "2022-11-24T08:59:08.303577Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting sympy\n", + " Downloading sympy-1.11.1-py3-none-any.whl (6.5 MB)\n", + "\u001b[K |████████████████████████████████| 6.5 MB 4.4 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting mpmath>=0.19\n", + " Downloading mpmath-1.2.1-py3-none-any.whl (532 kB)\n", + "\u001b[K |████████████████████████████████| 532 kB 6.2 MB/s eta 0:00:01\n", + "\u001b[?25hInstalling collected packages: mpmath, sympy\n", + "Successfully installed mpmath-1.2.1 sympy-1.11.1\n" + ] + } + ], + "source": [ + "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install sympy)\"" + ] + }, + { + "cell_type": "markdown", + "id": "c32a78b2", + "metadata": {}, + "source": [ + "## Features\n", + "\n", + "https://docs.sympy.org/latest/tutorials/intro-tutorial/features.html#" + ] + }, + { + "cell_type": "markdown", + "id": "547104ae", + "metadata": {}, + "source": [ + "## Logic\n", + "\n", + "https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html#what-is-symbolic-computation" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "016ffec6", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T08:59:48.097485Z", + "start_time": "2022-11-24T08:59:47.660109Z" + } + }, + "outputs": [], + "source": [ + "import sympy\n", + "from sympy import * # noqa: F403" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "15a65c7c", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:01:58.628860Z", + "start_time": "2022-11-24T09:01:58.614742Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle y \\vee \\left(x \\wedge y\\right)$" + ], + "text/plain": [ + "y | (x & y)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x, y = sympy.symbols(\"x,y\")\n", + "y | (x & y)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c016e526", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:02:16.425181Z", + "start_time": "2022-11-24T09:02:16.418742Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle x \\Rightarrow y$" + ], + "text/plain": [ + "Implies(x, y)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x >> y" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "961ab5b7", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:02:36.687945Z", + "start_time": "2022-11-24T09:02:36.681518Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle \\text{True}$" + ], + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Evaluate an expression.\n", + "(y & x).subs({x: True, y: True})" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d36a6df4", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:03:53.122377Z", + "start_time": "2022-11-24T09:03:53.108926Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle \\left(x \\wedge \\neg w\\right) \\vee \\left(y \\wedge z \\wedge \\neg x\\right)$" + ], + "text/plain": [ + "(x & ~w) | (y & z & ~x)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "w, x, y, z = sympy.symbols(\"w x y z\")\n", + "minterms = [{w: 0, x: 1}, {y: 1, z: 1, x: 0}]\n", + "sympy.SOPform([w, x, y, z], minterms)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "351f8a29", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:04:52.260031Z", + "start_time": "2022-11-24T09:04:52.244286Z" + } + }, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle \\neg x \\wedge \\neg y$" + ], + "text/plain": [ + "~x & ~y" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b = (~x & ~y & ~z) | (~x & ~y & z)\n", + "sympy.simplify_logic(b)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "6997a50b", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:07:01.070407Z", + "start_time": "2022-11-24T09:07:01.063092Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 0] -> True\n", + "[0, 1] -> True\n", + "[1, 0] -> False\n", + "[1, 1] -> True\n" + ] + } + ], + "source": [ + "# Compute truth table.\n", + "from sympy.logic.boolalg import truth_table # noqa: E402\n", + "\n", + "table = truth_table(x >> y, [x, y])\n", + "for t in table:\n", + " print(f\"{t[0]} -> {t[1]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "c70e51cf", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:08:01.433951Z", + "start_time": "2022-11-24T09:08:01.298800Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sympy.satisfiable(x & ~x)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f9d0eda7", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:08:14.379803Z", + "start_time": "2022-11-24T09:08:14.364702Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{y: True, x: True}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sympy.satisfiable((x | y) & (x | ~y) & (~x | y))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "782bd93c", + "metadata": { + "ExecuteTime": { + "end_time": "2022-11-24T09:28:42.188931Z", + "start_time": "2022-11-24T09:28:42.124276Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{L: True, Q: True, B: False, N: False}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# - (not L => Q and B and N)\n", + "# - (N => not L)\n", + "# - not Q => B\n", + "# - not B\n", + "\n", + "L, N, Q, B = sympy.symbols(\"L N Q B\")\n", + "\n", + "C = (\n", + " sympy.Implies(~L, Q & B & N)\n", + " & sympy.Implies(N, ~L)\n", + " & sympy.Implies(~Q, B)\n", + " & ~B\n", + ")\n", + "sympy.satisfiable(C)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1298f34b", + "metadata": {}, + "outputs": [], + "source": [ + "## Stats\n", + "\n", + "# https://docs.sympy.org/latest/modules/stats.html#" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py new file mode 100644 index 000000000..bd5b8a5aa --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py @@ -0,0 +1,98 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Maple +# +# https://www.sagemath.org/ + +# %% +# !sudo /bin/bash -c "(source /venv/bin/activate; pip install sagemath)" + +# %% + +# %% [markdown] +# # Sympy + +# %% +# !sudo /bin/bash -c "(source /venv/bin/activate; pip install sympy)" + +# %% [markdown] +# ## Features +# +# https://docs.sympy.org/latest/tutorials/intro-tutorial/features.html# + +# %% [markdown] +# ## Logic +# +# https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html#what-is-symbolic-computation + +# %% +import sympy +from sympy import * # noqa: F403 + +# %% +x, y = sympy.symbols("x,y") +y | (x & y) + +# %% +x >> y + +# %% +# Evaluate an expression. +(y & x).subs({x: True, y: True}) + +# %% +w, x, y, z = sympy.symbols("w x y z") +minterms = [{w: 0, x: 1}, {y: 1, z: 1, x: 0}] +sympy.SOPform([w, x, y, z], minterms) + +# %% +b = (~x & ~y & ~z) | (~x & ~y & z) +sympy.simplify_logic(b) + +# %% +# Compute truth table. +from sympy.logic.boolalg import truth_table # noqa: E402 + +table = truth_table(x >> y, [x, y]) +for t in table: + print(f"{t[0]} -> {t[1]}") + +# %% +sympy.satisfiable(x & ~x) + +# %% +sympy.satisfiable((x | y) & (x | ~y) & (~x | y)) + +# %% +# - (not L => Q and B and N) +# - (N => not L) +# - not Q => B +# - not B + +L, N, Q, B = sympy.symbols("L N Q B") + +C = ( + sympy.Implies(~L, Q & B & N) + & sympy.Implies(N, ~L) + & sympy.Implies(~Q, B) + & ~B +) +sympy.satisfiable(C) + +# %% +## Stats + +# https://docs.sympy.org/latest/modules/stats.html# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py new file mode 100644 index 000000000..7550952ca --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py @@ -0,0 +1,192 @@ +""" +Import as: + +import helpers.old.conda as holdcond +""" + +import json +import logging +import os +from typing import Any, Dict, List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hsystem as hsystem +import helpers.old.user_credentials as holuscre + +_LOG = logging.getLogger(__name__) + + +def conda_system(cmd: str, *args: Any, **kwargs: Any) -> int: + """ + When running a conda command we need to execute a script to configure + conda. This script is typically executed in .bashrc but here we create a + new bash shell every time to execute a command, so we need to re-initialize + the shell before any conda command. + + :param cmd: + :param args: + :param kwargs: + :return: + """ + # TODO(gp): Pass conda_env_name as done in get_conda_list() + path = holuscre.get_credentials()["conda_sh_path"] + hdbg.dassert_path_exists(path) + hdbg.dassert(os.path.isfile(path), "'%s' is not a file", path) + cmd = f"source {path} && {cmd}" + output: int = hsystem.system(cmd, *args, **kwargs) + return output + + +def conda_system_to_string( + cmd: str, *args: Any, **kwargs: Any +) -> Tuple[int, str]: + path = holuscre.get_credentials()["conda_sh_path"] + hdbg.dassert_path_exists(path) + hdbg.dassert(os.path.isfile(path), "'%s' is not a file", path) + cmd = f"source {path} && {cmd}" + output: Tuple[int, str] = hsystem.system_to_string(cmd, *args, **kwargs) + return output + + +def get_conda_envs_dirs() -> List[str]: + """ + :return: list of the env dirs from conda + """ + _, ret = conda_system_to_string(r"conda config --show envs_dirs --json") + _LOG.debug("ret=%s", ret) + envs = json.loads(ret) + hdbg.dassert_in("envs_dirs", envs) + envs = envs["envs_dirs"] + hdbg.dassert_isinstance(envs, list) + return list(envs) + + +def set_conda_env_root(conda_env_path: str) -> None: + """ + Set conda env dirs so that it matches what specified in. + + > conda config --show envs_dirs --json + { + "envs_dirs": [ + "/Users/gp/.conda/envs", + ] + } + + > conda config --prepend envs_dirs /data/gp_wd/anaconda2/envs2 + """ + envs = get_conda_envs_dirs() + # + if not envs or envs[0] != conda_env_path: + _LOG.warning( + "%s is not the first env dir in %s", conda_env_path, str(envs) + ) + # Reset the list of conda envs. + _LOG.debug("Resetting envs_dir %s", str(envs)) + for env in envs: + _LOG.debug("Deleting %s", env) + cmd = f"conda config --remove envs_dirs {env}" + # We don't abort because of a bug in conda not deleting the key + # when asked for. + # CondaKeyError: 'envs_dirs': u'/data/shared/anaconda2/envs' is not + # in the u'envs_dirs' key of the config file + conda_system(cmd, abort_on_error=False) + envs = get_conda_envs_dirs() + _LOG.debug("Current envs: %s", str(envs)) + # Add the conda env. + cmd = f"conda config --prepend envs_dirs {conda_env_path}" + conda_system(cmd) + # Check. + envs = get_conda_envs_dirs() + hdbg.dassert( + envs or envs[0] != conda_env_path, + msg=f"{conda_env_path} is not first env dir in {envs}", + ) + else: + _LOG.debug( + "Nothing to do, since %s is already in %s", conda_env_path, envs + ) + + +def get_conda_info_envs() -> Tuple[dict, None]: + """ + :return: (env_dict, active_env) + - env_dict: map 'conda env name -> conda env path' + - active_env: name of the active conda env + """ + # > conda info --envs + # # conda environments: + # # + # aws /Users/gp/.conda/envs/aws + # bbg /Users/gp/.conda/envs/bbg + # deeplearning /Users/gp/.conda/envs/deeplearning + # jupyter /Users/gp/.conda/envs/jupyter + # test_conda /Users/gp/.conda/envs/test_conda + # TODO(gp): Use --json but we need to parse the json without any module. + ret = conda_system_to_string(r"conda info --envs")[1] + _LOG.debug("Parsing conda info\n%s", ret) + ret = ret.split("\n") + env_dict = {} + active_env = None + for line in ret: + line = line.rstrip().lstrip() + if line == "": + continue + if line.startswith("#"): + continue + vals = line.split() + if len(vals) == 2: + env_name, env_path = vals + env_dict[env_name] = env_path + elif len(vals) == 3: + env_name, star, env_path = vals + hdbg.dassert_eq(star, "*") + env_dict[env_name] = env_path + else: + _LOG.debug("Can't parse line='%s'", line) + return env_dict, active_env + + +def get_conda_list(conda_env_name: str) -> Dict[str, Dict[str, str]]: + """ + :return: env_dict mapping package name to their info + - env_dict: map 'conda env name -> conda env path' + - active_env: name of the active conda env + """ + # > conda list + # # packages in environment at /Users/gp/.conda/envs/: + # # + # # Name Version Build Channel + # absl-py 0.5.0 py_0 conda-forge + # agate 1.6.0 py_3 conda-forge + # agate-dbf 0.2.0 py27_0 conda-forge + # agate-excel 0.2.2 py_0 conda-forge + # TODO(gp): Use --json but we need to parse the json without any module. + cmd = rf"(conda activate {conda_env_name} 2>&1) >/dev/null && conda list" + ret = conda_system_to_string(cmd)[1] + ret = ret.split("\n") + env_dict = {} + labels = {1: "version", 2: "build", 3: "channel"} + for line in ret: + line = line.rstrip().lstrip() + _LOG.debug("line='%s'", line) + if line == "": + continue + if line.startswith("#"): + continue + vals = line.split() + env_dict[vals[0]] = {labels[k]: vals[k] for k in range(1, len(vals[:4]))} + return env_dict + + +_CONDA_PATH = None + + +def get_conda_path() -> Optional[str]: + global _CONDA_PATH + if not _CONDA_PATH: + rc, txt = conda_system_to_string("which conda", abort_on_error=False) + if rc == 0: + _CONDA_PATH = str(txt) + else: + _CONDA_PATH = "n/a" + return _CONDA_PATH diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py new file mode 100644 index 000000000..5b0445a31 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py @@ -0,0 +1,17 @@ +import pathlib +from typing import Any, Optional + + +def pytest_ignore_collect( # type: ignore + collection_path: pathlib.Path, path: Any, config: Any +) -> Optional[bool]: + """ + Skip all tests in this directory. + + :param collection_path: path to analyze + :param path: path to analyze (deprecated) + :param config: pytest config object + :return: True if the path should be ignored + """ + # Ignore this directory and all its subdirectories. + return True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py new file mode 100644 index 000000000..f51cb5d8d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py @@ -0,0 +1,75 @@ +""" +Import as: + +import helpers.old.env2 as holdenv2 +""" + +import logging +import os +from typing import Tuple + +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.old.conda as holdcond + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +def get_system_info(add_frame: bool) -> str: + msg = "" + if add_frame: + msg += hprint.frame("System info") + "\n" + msg += f"user name={hsystem.get_user_name()}\n" + msg += f"server name={hsystem.get_server_name()}\n" + msg += f"os name={hsystem.get_os_name()}\n" + msg += f"conda path={holdcond.get_conda_path()}\n" + msg += f"conda env root={str(holdcond.get_conda_envs_dirs())}\n" + return msg + + +def get_package_summary(conda_env_name: str, add_frame: bool) -> str: + msg = "" + if add_frame: + msg += hprint.frame("Package summary") + "\n" + conda_list = holdcond.get_conda_list(conda_env_name) + msg = "" + for package in ["pandas", "numpy", "scipy", "arrow-cpp"]: + ver = conda_list[package]["version"] if package in conda_list else "None" + line = f"{package}: {ver}" + msg += line + "\n" + return msg + + +def get_conda_export_list(conda_env_name: str, add_frame: bool) -> str: + msg = "" + if add_frame: + msg += hprint.frame("Package summary") + "\n" + cmd = rf"(conda activate {conda_env_name} 2>&1 >/dev/null) && conda list --export" + _, msg_tmp = holdcond.conda_system_to_string(cmd) + msg += msg_tmp + return msg + + +def save_env_file(conda_env_name: str, dir_name: str) -> Tuple[str, str]: + msg = "" + msg += get_system_info(add_frame=True) + msg += get_package_summary(conda_env_name, add_frame=True) + msg += get_conda_export_list(conda_env_name, add_frame=True) + # Save results. + if dir_name is not None: + file_name = ( + f"{conda_env_name}.{hsystem.get_user_name()}.{hsystem.get_os_name()}." + f"{hsystem.get_server_name()}.txt" + ) + dst_file = os.path.join(dir_name, file_name) + dst_file = os.path.abspath(dst_file) + hio.create_enclosing_dir(dst_file, incremental=True) + _LOG.info("Saving conda env signature to '%s'", dst_file) + hio.to_file(dst_file, msg) + else: + dst_file = None + return msg, dst_file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py new file mode 100644 index 000000000..a9d6b4f46 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py @@ -0,0 +1,267 @@ +""" +Import as: + +import helpers.old.tunnels as holdtunn +""" + +import logging +import os +from typing import Any, Dict, List, Tuple, Union, cast + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.old.user_credentials as holuscre + +_LOG = logging.getLogger(__name__) + + +def _get_services_info() -> list: + # Server ports. + services = [ + # service name, server public IP, local port, remote port. + ("MongoDb", hsystem.get_env_var("OLD_DEV_SERVER"), 27017, 27017), + ("Jenkins", hsystem.get_env_var("JENKINS_SERVER"), 8080, 8080), + # ("Reviewboard", hsystem.get_env_var("REVIEWBOARD_SERVER"), 8000, 8000), + # ("Doc server", hsystem.get_env_var("REVIEWBOARD_SERVER"), 8001, 80), + # Netdata to Jenkins and Dev server. + # ("Dev system performance", DEV_SERVER, 19999), + # ("Jenkins system performance", DEV_SERVER, 19999), + ] + return services + + +# ############################################################################# + + +def get_tunnel_info() -> Tuple[list, str]: + credentials = holuscre.get_credentials() + # + tunnel_info = credentials["tunnel_info"] + hdbg.dassert_is_not(tunnel_info, None) + # Add tunnels for standard services. + services = _get_services_info() + tunnel_info.extend(services) + # + ssh_key_path = credentials["ssh_key_path"] + hdbg.dassert_is_not(ssh_key_path, None) + # TODO(gp): Add check to make sure that the source ports are all different. + return tunnel_info, ssh_key_path + + +def tunnel_info_to_string(tunnel_info: list) -> str: + ret = "\n".join(map(str, tunnel_info)) + ret = hprint.indent(ret) + return ret + + +def parse_service( + service: Tuple[str, str, int, int], +) -> Dict[str, Union[str, int]]: + hdbg.dassert_eq(len(service), 4, "service=%s", service) + service_name, server, local_port, remote_port = service + return { + "service_name": service_name, + "server": server, + "local_port": local_port, + "remote_port": remote_port, + } + + +def find_service( + service_name: str, tunnel_info: list +) -> Tuple[str, str, int, int]: + found_service = False + for service in tunnel_info: + if service_name == parse_service(service)["service_name"]: + hdbg.dassert(not found_service) + found_service = True + ret: Tuple[str, str, int, int] = service + hdbg.dassert(found_service) + return ret + + +def get_server_ip(service_name: str) -> str: # pylint: disable=unused-argument + tunnel_info, _ = get_tunnel_info() + _LOG.debug("tunnels=\n%s", tunnel_info_to_string(tunnel_info)) + service = find_service("Doc server", tunnel_info) + server = parse_service(service)["server"] + server = cast(str, server) + return server + + +def _get_tunnel_info() -> Tuple[Any, str]: + credentials = holuscre.get_credentials() + # + tunnel_info = credentials["tunnel_info"] + hdbg.dassert_is_not(tunnel_info, None) + # Add tunnels for standard services. + services = _get_services_info() + tunnel_info.extend(services) + # + ssh_key_path = credentials["ssh_key_path"] + hdbg.dassert_is_not(ssh_key_path, None) + # TODO(gp): Add check to make sure that the source ports are all different. + return tunnel_info, ssh_key_path + + +def _tunnel_info_to_string(tunnel_info: list) -> str: + ret = "\n".join(map(str, tunnel_info)) + ret = hprint.indent(ret) + return ret + + +def _service_to_string(service: Tuple[str, str, str, str]) -> str: + service_name, server, local_port, remote_port = service + ret = ( + f"tunnel for service '{service_name}'" + + f" server='{server}'" + + f" port='{local_port}->{remote_port}'" + ) + return ret + + +# ############################################################################# + + +def _get_ssh_tunnel_process( + local_port: int, remote_port: int, fuzzy_match: bool +) -> Tuple[List[int], str]: + """ + Return the pids of the processes attached to a given port. + """ + + def _keep_line(line: str) -> bool: + keep = "ssh -i" in line + if keep: + if fuzzy_match: + keep = (f" {local_port}:localhost " in line) or ( + f" localhost:{remote_port} " in line + ) + else: + keep = f" {local_port}:localhost:{remote_port} " in line + return keep + + _LOG.debug("local_port=%d -> remote_port=%d", local_port, remote_port) + pids, txt = hsystem.get_process_pids(_keep_line) + _LOG.debug("pids=%s", pids) + _LOG.debug("txt=\n%s", txt) + return pids, txt + + +def _create_tunnel( + server_name: str, + local_port: int, + remote_port: int, + user_name: str, + ssh_key_path: str, +) -> None: + """ + Create tunnel from localhost to 'server' for the ports `local_port -> + remote_port` and `user_name`. + """ + ssh_key_path = os.path.expanduser(ssh_key_path) + _LOG.debug("ssh_key_path=%s", ssh_key_path) + hdbg.dassert_path_exists(ssh_key_path) + # + cmd = ( + "ssh -i {ssh_key_path} -f -nNT -L {local_port}:localhost:{remote_port}" + + " {user_name}@{server}" + ) + cmd = cmd.format( + user_name=user_name, + ssh_key_path=ssh_key_path, + local_port=local_port, + remote_port=remote_port, + server=server_name, + ) + hsystem.system(cmd, blocking=False) + # Check that the tunnel is up and running. + pids = _get_ssh_tunnel_process(local_port, remote_port, fuzzy_match=True) + hdbg.dassert_lte(1, len(pids)) + + +def _kill_ssh_tunnel_process(local_port: int, remote_port: int) -> None: + """ + Kill all the processes attached to either local or remote port. + """ + get_pids = lambda: _get_ssh_tunnel_process( + local_port, remote_port, fuzzy_match=True + ) + hsystem.kill_process(get_pids) + + +# ############################################################################# + + +def start_tunnels(user_name: str) -> None: + """ + Start all the tunnels for the given user. + """ + _LOG.debug("user_name=%s", user_name) + # Get tunnel info. + tunnel_info, ssh_key_path = _get_tunnel_info() + _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) + # + for service in tunnel_info: + _, server, local_port, remote_port = service + pids, _ = _get_ssh_tunnel_process( + local_port, remote_port, fuzzy_match=False + ) + if not pids: + _LOG.info("Starting %s", _service_to_string(service)) + _create_tunnel( + server, local_port, remote_port, user_name, ssh_key_path + ) + else: + _LOG.warning( + "%s already exists: skipping", _service_to_string(service) + ) + + +def stop_tunnels() -> None: + """ + Stop all the tunnels for the given user. + """ + # Get the tunnel info. + tunnel_info, _ = _get_tunnel_info() + _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) + # + for service in tunnel_info: + _, _, local_port, remote_port = service + _LOG.info("Stopping %s", _service_to_string(service)) + _kill_ssh_tunnel_process(local_port, remote_port) + + +def check_tunnels() -> None: + """ + Check the status of the tunnels for the given user. + """ + # Get the tunnel info. + tunnel_info, _ = _get_tunnel_info() + _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) + # + for service in tunnel_info: + _, _, local_port, remote_port = service + pids, _ = _get_ssh_tunnel_process( + local_port, remote_port, fuzzy_match=False + ) + if pids: + msg = f"exists with pid={pids}" + else: + msg = "doesn't exist" + _LOG.info("%s -> %s", _service_to_string(service), msg) + + +def kill_all_tunnel_processes() -> None: + """ + Kill all the processes that have `ssh -i ...:localhost:...". + """ + + # cmd = "ps ax | grep 'ssh -i' | grep localhost: | grep -v grep" + def _keep_line(line: str) -> bool: + keep = ("ssh -i" in line) and (":localhost:" in line) + return keep + + get_pids = lambda: hsystem.get_process_pids(_keep_line) + hsystem.kill_process(get_pids) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py new file mode 100755 index 000000000..5faded15d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python +""" +Import as: + +import helpers.old.user_credentials as holuscre +""" + +import argparse +import logging +import os +import pprint +from typing import Any, Dict, List, Tuple + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hparser as hparser +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def get_dev_server_ip() -> str: + """ + Get the dev server name from the user environment. + """ + env_var_name = "" + if env_var_name not in os.environ: + _LOG.error( + "Can't find '%s': re-run dev_scripts/setenv.sh?", env_var_name + ) + raise RuntimeError + dev_server = os.environ[env_var_name] + return dev_server + + +# pylint: disable=too-many-statements +def get_credentials() -> Dict[str, Any]: + """ + Report information about a user set-up as a function of: 1) user name 2) + server name 3) git repository name. + + The mandatory information are: + 1) git_user_name + 2) git_user_email + 3) conda_sh_path: the path of the script bootstrapping conda + - To find "conda_sh_path": + > which conda + /data/root/anaconda3/bin/conda + > find /data/root/anaconda3 -name "conda.sh" + - In one instruction: + > CONDA_DIR=$(dirname $(which conda))"/.."; find $CONDA_DIR -name "conda.sh" + - If there are multiple ones you want to pick the one under + `profile.d`, e.g., `/anaconda3/etc/profile.d/conda.sh` + 4) conda_env_path: the path of the dir storing the conda environments + - To find "conda_env_path" + > conda info + ... + envs directories : /data/saggese/.conda/envs + + The optional information are: + 5) ssh_key_path: the path of the ssh key to use + 6) tunnel_info: list of "personal" ports to forward + - This is an advanced behavior that allows to specify in your user + config a set of ports to forward from one computer (typically your + laptop) to a set of services that are specific of your set-up (e.g., + started through `run_jupyter_server.py`) + - E.g., + ```python + if server_name in ("gpmac.local", "gpmac.lan"): + if git_repo_name == "": + service = ("Jupyter1", get_dev_server_ip(), 10003, 10003) + ``` + when GP runs `ssh_tunnels.py` from his laptop in a + `` client, a tunnel is open to the dev + server where `run_jupyter_server.py` will have started a notebook server + 7) jupyter_port: on which port to start a jupyter server on a specific server + - It's a good idea for everybody to have a different port to avoid port + collisions + 8) notebook_html_path: the path where to save html of notebooks + 9) notebook_backup_path: the path where to backup the source .ipynb code of + notebooks + """ + # + user_name = hsystem.get_user_name() + server_name = hsystem.get_server_name() + _LOG.debug("user_name='%s'", user_name) + _LOG.debug("server_name='%s'", server_name) + git_repo_name = hgit.get_repo_full_name_from_client(super_module=True) + # Values to assign. + git_user_name = "" + git_user_email = "" + conda_sh_path = "" + ssh_key_path = "~/.ssh/id_rsa" + tunnel_info: List[Tuple[str, str, str, str]] = [] + jupyter_port = -1 + notebook_html_path = "" + notebook_backup_path = "" + # + conda_env_path = "~/.conda/envs" + conda_env_path = os.path.expanduser(conda_env_path) + if server_name in (): + conda_sh_path = "/anaconda3/etc/profile.d/conda.sh" + if user_name == "saggese": + # GP. + git_user_name = "saggese" + git_user_email = "abc@xyz.com" + if server_name.startswith("gpmac") or server_name.startswith( + "giacintos-mbp" + ): + # Laptop. + conda_sh_path = "/Users/saggese/opt/anaconda3/etc/profile.d/conda.sh" + conda_env_path = "/Users/saggese/.conda/envs" + if git_repo_name == "": + # Forward port 10003 to the notebook server that is started by + # `run_jupyter_server.py` when executed on the dev server. + # service = ("Jupyter1", get_dev_server_ip(), 10003, 10003) + # tunnel_info.append(service) + # jupyter_port = 10001 + pass + elif server_name == "": + if git_repo_name == "": + jupyter_port = 10003 + else: + hdbg.dassert_ne(conda_sh_path, "") + elif user_name == "paul": + # Paul. + git_user_name = "paul" + git_user_email = "abc@xyz.com" + if server_name in ("Pauls-MacBook-Pro.local", "Pauls-MBP"): + conda_sh_path = "/Users/paul/anaconda3/etc/profile.d/conda.sh" + conda_env_path = "/Users/paul/.conda/envs" + # Check. + for var_name, val_name in [ + ("git_user_name", git_user_name), + ("git_user_email", git_user_email), + ("conda_sh_path", conda_sh_path), + ("conda_env_path", conda_env_path), + # We allow the rest of the variables (e.g., ssh_key_path, tunnel_info) to + # be empty since in some configurations they can be undefined. + ]: + hdbg.dassert_is_not( + val_name, + None, + "Undefined '%s': add your credentials for user_name='%s' and " + "server_name='%s' to '%s'", + var_name, + user_name, + server_name, + __file__, + ) + conda_sh_path = os.path.expanduser(conda_sh_path) + conda_sh_path = os.path.abspath(conda_sh_path) + hdbg.dassert_path_exists(conda_sh_path) + # + conda_env_path = os.path.abspath(os.path.expanduser(conda_env_path)) + # Not necessarily the conda_env_path exists. + if not os.path.exists(conda_env_path): + _LOG.warning("The dir '%s' doesn't exist: creating it", conda_env_path) + hio.create_dir(conda_env_path, incremental=True) + hdbg.dassert_path_exists(os.path.dirname(conda_env_path)) + # + for service in tunnel_info: + # TODO(gp): We should call in ssh_tunnels.py to keep this encapsulated. + hdbg.dassert_eq(len(service), 4) + service_name, server, local_port, remote_port = service + _ = service_name, server, local_port, remote_port + ret = { + "git_user_name": git_user_name, + "git_user_email": git_user_email, + "conda_sh_path": conda_sh_path, + "conda_env_path": conda_env_path, + "ssh_key_path": ssh_key_path, + "tunnel_info": tunnel_info, + "jupyter_port": jupyter_port, + "notebook_html_path": notebook_html_path, + "notebook_backup_path": notebook_backup_path, + } + _LOG.debug("Credentials: %s", ret) + return ret + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--user", action="store", default=None, help="Impersonate a user" + ) + hparser.add_verbosity_arg(parser) + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + if args.user: + hsystem.set_user_name(args.user) + usc = get_credentials() + pprint.pprint(usc) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh new file mode 100644 index 000000000..45acd8194 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh @@ -0,0 +1,113 @@ +#!/bin/sh + +# NOTE TO MAINTAINERS: this must be updated each time a new texlive is +# released! +default_version=2024 +tlversion=${1:-"$default_version"} +installer_archive=install-tl-unx.tar.gz + +usage () +{ + printf 'Install TeXLive\n' + printf 'Usage: %s [OPTIONS]\n\n' "$0" + printf 'Options:\n' + printf ' -t: TeXLive version (default %s)\n' "$default_version" + printf ' -m: mirror URL\n' +} + +if ! args=$(getopt 't:m:' "$@"); then + usage && exit 1 +fi +# The variable is intentionally left unquoted. +# shellcheck disable=SC2086 +set -- $args + +tlversion= +mirror_url= + +while true; do + case "$1" in + (-t) + tlversion="${2}" + shift 2 + ;; + (-m) + mirror_url="${2}" + shift 2 + ;; + (--) + shift + break + ;; + (*) + printf 'Unknown option: %s\n' "$1" + usage + exit 1 + ;; + esac +done + +[ -n "$tlversion" ] || tlversion="$default_version" + +if [ -z "$mirror_url" ] && [ "$tlversion" != "$default_version" ]; then + # Default mirror for historic releases + mirror_url="ftp://tug.org/historic/" +fi + +if [ -z "$mirror_url" ]; then + # Get the mirror URL from the redirect. Otherwise, if we were to + # always use the mirror URL, we'd run into problems whenever we get + # installer and signatures from different mirrors that are not 100% + # in sync. + mirror_url=$(wget -4 --quiet --output-document=/dev/null \ + --server-response \ + http://mirror.ctan.org/ \ + 2>&1 | \ + sed -ne 's/.*Location: \(.*\)$/\1/p' | head -n 1) +fi + +# Trim trailing slash(es) +mirror_url=$(echo "$mirror_url" | sed -e 's/\/*$//') + +if [ "$tlversion" = "$default_version" ]; then + installer_url="$mirror_url/systems/texlive/tlnet/" + repository= +else + installer_url="$mirror_url/systems/texlive/$tlversion/tlnet-final/" + repository=$installer_url +fi + +# Log the installer and repository url +printf 'installer URL: %s\n' "${installer_url}" +printf 'repository: %s\n' "${repository}" + +# Download the install-tl perl script. The archive integrity and signature is +# verified later, so it's ok if we use an insecure connection. +wget -4 --no-verbose --no-check-certificate \ + "$installer_url/$installer_archive" \ + "$installer_url/$installer_archive".sha512 \ + "$installer_url/$installer_archive".sha512.asc \ + || exit 1 + +## Verifiy installer integrity +# get current signing key +gpg --keyserver keyserver.ubuntu.com \ + --receive-key 0xC78B82D8C79512F79CC0D7C80D5E5D9106BAB6BC || exit 5 +gpg --verify "$installer_archive".sha512.asc || exit 5 +sha512sum "$installer_archive".sha512 || exit 5 + +## Proceed with installation +# Extract installer +mkdir -p ./install-tl +tar --strip-components 1 -zvxf "$installer_archive" -C "$PWD/install-tl" \ + || exit 1 + +# Run the default installation with the specified profile. +./install-tl/install-tl ${repository:+-repository "$repository"} \ + --profile=/root/texlive.profile + +# Cleanup installation artifacts. +rm -rf ./install-tl \ + "$installer_archive" \ + "$installer_archive.sha512" \ + "$installer_archive.sha512.asc" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt new file mode 100644 index 000000000..9e4ccf64f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt @@ -0,0 +1,115 @@ +# Packages listed in https://pandoc.org/MANUAL.html#creating-a-pdf + +######################################################################### +### Packages required by pandoc +amsfonts # math fonts +amsmath # math commands +babel # required when pandoc is used with lang +beamer # for presentations +bidi # used by xelatex if the `dir` variable is set +bookmark # bookmarks with hyperref +booktabs # nicer-looking tables +caption # customize captions in floating envs; required for beamer +csquotes # used for typography if the `csquotes` variable is set to true +euler # use AMS Euler fonts for math +eurosym # Metafont and macros for Euro sign +fancyvrb # Verbatim environments for code blocks +framed # Needed with certain `--highlight-style` options +geometry # required if the `geometry` variable set +graphics # required if the document contains images +hyperref # hyperlinks +listings # if the `--listing` option is used +lm # Latin modern fonts +lm-math # Latin modern fonts for math +memoir # frequently used document class +multirow # Tabular cells spanning multiple rows +pgf # for TikZ and beamer +setspace # required if the `linestretch` variable is used +soul # required for underlined text +subfig # Figures broken into subfigures +tools # the LaTeX standard tools bundle; e.g., calc, longtable +xcolor # colors + +# Deprecated! Only used by older pandoc versions before 3.0. +ulem + +######################################################################### +### Semi-optional packages +# +# The following packages will be used to improve output quality if +# present, but pandoc does not require them to be present: +footnotehyper # to allow footnotes in tables +microtype # for better spacing adjustments +parskip # for better inter-paragraph spaces +upquote # for straight quotes in verbatim environments +xurl # for better line breaks in URLs + +######################################################################### +### Intentionally **NOT** installed due to size constraints. +# +#xeCJR # If CJKmainfont is set, xeCJK is needed. + +######################################################################### +### Required when using pandoc-crossref +cleveref # Intelligent cross-referencing +float # Improved interface for floating objects + +######################################################################### +### Extra engines and packages for XeLaTeX and LuaLaTeX. +fontspec # required with xelatex or lualatex +ifmtarg # if-then-else commands used in the default template +iftex # Checks for the specific LaTeX engine being used +latexmk +lua-ul # LuaLaTeX replacement of soul +luacode +luacolor +lualatex-math # LuaTeX specific math patches +luatexbase +mathspec # used by xelatex if the `mathspec` variable is set +selnolig # Used with LuaLaTeX to disable illegal typographic ligatures +unicode-math # Unicode math support for XeTeX and LuaTeX +xetex + +######################################################################### +### Reference management tools +biber +biblatex +bibtex +natbib + +######################################################################### +### I18n and languages +# +# The choice of selected languages is historic, those were the ones +# installed by TeXLive by default for a long time. +bidi +babel-basque +babel-czech +babel-danish +babel-dutch +babel-english +babel-finnish +babel-french +babel-german +babel-hungarian +babel-italian +babel-norsk +babel-polish +babel-portuges +babel-spanish +babel-swedish +hyphen-basque +hyphen-czech +hyphen-danish +hyphen-dutch +hyphen-english +hyphen-finnish +hyphen-french +hyphen-german +hyphen-hungarian +hyphen-italian +hyphen-norwegian +hyphen-polish +hyphen-portuguese +hyphen-spanish +hyphen-swedish diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile new file mode 100644 index 000000000..dd5364e87 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile @@ -0,0 +1,32 @@ +# texlive.profile written on Tue Feb 5 09:43:07 2019 UTC +# It will NOT be updated and reflects only the +# installation profile at installation time. +# +# NOTE: see also alpine/latex.Dockerfile which appends +# `binary_x86_64-linuxmusl 1` to this file, use for non-glibc distributions. +selected_scheme scheme-basic +TEXDIR /opt/texlive/texdir +TEXMFLOCAL /opt/texlive/texmf-local +TEXMFSYSVAR /opt/texlive/texdir/texmf-var +TEXMFSYSCONFIG /opt/texlive/texdir/texmf-config +TEXMFVAR ~/.texlive/texmf-var +TEXMFCONFIG ~/.texlive/texmf-config +TEXMFHOME ~/texmf +instopt_adjustpath 0 +instopt_adjustrepo 1 +instopt_letter 0 +instopt_portable 0 +instopt_write18_restricted 1 +tlpdbopt_autobackup 1 +tlpdbopt_backupdir tlpkg/backups +tlpdbopt_create_formats 1 +tlpdbopt_desktop_integration 1 +tlpdbopt_file_assocs 1 +tlpdbopt_generate_updmap 0 +tlpdbopt_install_docfiles 0 +tlpdbopt_install_srcfiles 0 +tlpdbopt_post_code 1 +tlpdbopt_sys_bin /usr/local/bin +tlpdbopt_sys_info /usr/local/share/info +tlpdbopt_sys_man /usr/local/share/man +tlpdbopt_w32_multi_user 1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py new file mode 100644 index 000000000..d8807f46b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py @@ -0,0 +1,411 @@ +""" +Import as: + +import helpers.repo_config_utils as hrecouti +""" + +import logging +import os +from typing import Any, Dict, List, Optional, Union + +import yaml + +_LOG = logging.getLogger(__name__) + +# ############################################################################# + +# Copied from hprint to avoid import cycles. + + +# TODO(gp): It should use *. +def indent(txt: str, num_spaces: int = 2) -> str: + """ + Add `num_spaces` spaces before each line of the passed string. + """ + spaces = " " * num_spaces + txt_out = [] + for curr_line in txt.split("\n"): + if curr_line.lstrip().rstrip() == "": + # Do not prepend any space to a line with only white characters. + txt_out.append("") + continue + txt_out.append(spaces + curr_line) + res = "\n".join(txt_out) + return res + + +# End copy. + + +# ############################################################################# + + +def _find_config_file(file_name: str) -> str: + """ + Find recursively the dir of config file. + + This function traverses the directory hierarchy upward from a + specified starting path to find the directory that contains the + config file. + + :param file_name: name of the file to find + :return: path to the file + """ + curr_dir = os.getcwd() + while True: + path = os.path.join(curr_dir, file_name) + if os.path.exists(path): + break + parent = os.path.dirname(curr_dir) + if parent == curr_dir: + # We cannot use helpers since it creates circular import. + raise FileNotFoundError( + f"Could not find '{file_name}' in current directory or any parent directories" + ) + curr_dir = parent + return path + + +def _get_env_var( + env_name: str, + as_bool: bool = False, + default_value: Any = None, + abort_on_missing: bool = True, +) -> Union[str, bool]: + """ + Get an environment variable by name. + + :param env_name: name of the env var + :param as_bool: convert the value into a Boolean + :param default_value: the default value to use in case it's not + defined + :param abort_on_missing: if the env var is not defined aborts, + otherwise use the default value + :return: value of env var + """ + if env_name not in os.environ: + if abort_on_missing: + assert 0, f"Can't find env var '{env_name}' in '{str(os.environ)}'" + else: + return default_value + value = os.environ[env_name] + if as_bool: + # Convert the value into a boolean. + if value in ("0", "", "None", "False"): + value = False + else: + value = True + return value + + +# ############################################################################# +# RepoConfig +# ############################################################################# + + +class RepoConfig: + def __init__(self, data: Dict) -> None: + """ + Set the data to be used by the module. + """ + self._data = data + + def set_repo_config_data(self, data: Dict) -> None: + self._data = data + + @classmethod + def from_file(cls, file_name: Optional[str] = None) -> "RepoConfig": + """ + Return the text of the code stored in `repo_config.yaml`. + """ + if file_name is None: + file_name = RepoConfig._get_repo_config_file() + assert os.path.exists(file_name), f"File '{file_name}' doesn't exist" + _LOG.debug("Reading file_name='%s'", file_name) + try: + with open(file_name, "r") as file: + # Use `safe_load()` to avoid executing arbitrary code. + data = yaml.safe_load(file) + assert isinstance(data, dict), ( + "data=\n%s\nis not a dict but %s", + str(data), + type(data), + ) + except Exception as e: + raise ValueError(f"Error reading YAML file {file_name}: {e}") + return cls(data) + + # TODO(gp): -> __str__? + def config_func_to_str(self) -> str: + """ + Return the string representation of the config function. + """ + ret: List[str] = [] + ret.append(f"get_host_name='{self.get_host_name()}'") + ret.append( + f"get_html_dir_to_url_mapping='{self.get_html_dir_to_url_mapping()}'" + ) + ret.append(f"get_invalid_words='{self.get_invalid_words()}'") + ret.append( + f"get_docker_base_image_name='{self.get_docker_base_image_name()}'" + ) + ret.append(f"get_release_team='{self.get_release_team()}'") + txt = "\n".join(ret) + return txt + + # repo_info + + # TODO(gp): -> get_repo_name + def get_name(self) -> str: + """ + Return the name of the repo, e.g., in `//amp`. + """ + value = self._data["repo_info"]["repo_name"] + return f"//{value}" + + def get_github_repo_account(self) -> str: + """ + Return the account name of the repo on GitHub, e.g., `causify-ai`, + `gpsaggese`. + """ + value = self._data["repo_info"]["github_repo_account"] + return value + + def get_repo_short_name(self) -> str: + """ + Return the short name of the repo, e.g., `amp`. + """ + value = self._data["repo_info"]["repo_name"] + return value + + def get_repo_full_name(self) -> str: + """ + Return the full name of the repo, e.g., `causify-ai/amp`, + `gpsaggese/notes`. + """ + github_repo_account = self._data["repo_info"]["github_repo_account"] + repo_name = self._data["repo_info"]["repo_name"] + value = f"{github_repo_account}/{repo_name}" + return value + + def get_repo_full_name_with_hostname(self) -> str: + """ + Return the full name of the repo, e.g., `github.com/causify-ai/amp`. + """ + repo_full_name = self.get_repo_full_name() + host_name = self.get_host_name() + value = f"{host_name}/{repo_full_name}" + return value + + # TODO(gp): We should replace this with `get_full_repo_name()`, since + # the mapping is not needed. + def get_repo_map(self) -> Dict[str, str]: + """ + Return a mapping of short repo name -> long repo name. + + E.g., + ``` + {"amp": "causify-ai/amp"} + {"helpers": "causify-ai/helpers"} + ``` + """ + repo_name = self._data["repo_info"]["repo_name"] + github_repo_account = self._data["repo_info"]["github_repo_account"] + repo_map = {repo_name: f"{github_repo_account}/{repo_name}"} + return repo_map + + # TODO(gp): Is this needed? + def get_extra_amp_repo_sym_name(self) -> str: + github_repo_account = self._data["repo_info"]["github_repo_account"] + repo_name = self._data["repo_info"]["repo_name"] + if repo_name in ["orange", "lemonade"]: + # TODO(Grisha): it should return cmamp name, not the current + return f"{github_repo_account}/cmamp" + else: + return f"{github_repo_account}/{repo_name}" + + # TODO(gp): -> get_github_host_name + def get_host_name(self) -> str: + """ + Return the host name of the repo, e.g., `github.com`. + """ + value = self._data["repo_info"]["github_host_name"] + return value + + def get_invalid_words(self) -> List[str]: + """ + Return a list of words that are considered invalid in the repo. + """ + values = self._data["repo_info"]["invalid_words"] + if values is None: + invalid_words = [] + else: + invalid_words = values.split(",") + return invalid_words + + def get_issue_prefix(self) -> str: + """ + Return the prefix for the issue, e.g., `CmampTask`, `HelpersTask`. + """ + value = self._data["repo_info"]["issue_prefix"] + return value + + # docker_info + + def get_docker_base_image_name(self) -> str: + """ + Return a base name for docker image. + + E.g., `helpers`. + """ + value = self._data["docker_info"]["docker_image_name"] + return value + + def get_release_team(self) -> str: + """ + Return the release team name for docker image. + + E.g., `dev_system`. + """ + value = self._data["docker_info"].get("release_team") + return value + + # s3_bucket_info + + def get_unit_test_bucket_path(self) -> str: + """ + Return the path to the unit test bucket. + """ + value = self._data["s3_bucket_info"]["unit_test_bucket_name"] + return value + + def get_html_bucket_path(self) -> str: + """ + Return the path to the bucket where published HTMLs are stored. + """ + value = self._data["s3_bucket_info"]["html_bucket_name"] + return value + + def get_html_bucket_path_v2(self) -> str: + """ + Return the path to the bucket with published HTMLs. + + "v2" version allows for the published HTMLs to be browsed. + """ + html_bucket = self.get_html_bucket_path() + html_bucket_path = os.path.join(html_bucket, "v2") + return html_bucket_path + + def get_html_ip(self) -> str: + """ + Return the IP of the bucket where published HTMLs are stored. + """ + value = self._data["s3_bucket_info"]["html_ip"] + return value + + def get_html_ip_v2(self) -> str: + """ + Return the IP of the bucket with published HTMLs. + + "v2" version allows for the published HTMLs to be browsed. + """ + ip = self.get_html_ip() + ip_v2 = f"{ip}/v2" + return ip_v2 + + def get_html_dir_to_url_mapping(self) -> Dict[str, str]: + """ + Return a mapping between directories mapped on URLs. + + This is used when we have web servers serving files from + specific directories. + """ + dir_to_url = { + self.get_html_bucket_path(): self.get_html_ip(), + self.get_html_bucket_path_v2(): self.get_html_ip_v2(), + } + return dir_to_url + + def get_shared_configs_bucket_name(self, environment: str) -> str: + """ + Return the name of the shared configs bucket. + """ + if "shared_configs_bucket_name" not in self._data["s3_bucket_info"]: + return None + value: Dict[str, str] = self._data["s3_bucket_info"][ + "shared_configs_bucket_name" + ] + bucket_name = value.get(environment, None) + return bucket_name + + def get_dir_suffix(self) -> str: + """ + Return the suffix of the dev_scripts_{dir_suffix} dir for the repo. + + E.g., `helpers` for `dev_scripts_helpers` in //helpers repo. + """ + value = self._data["runnable_dir_info"]["dir_suffix"] + return value + + def use_helpers_as_nested_module(self) -> bool: + """ + Return whether the helpers repo is used as a nested module. + """ + value = bool( + self._data["runnable_dir_info"]["use_helpers_as_nested_module"] + ) + return value + + # TODO(gp): Add functions for container_registry_info. + + def get_container_registry_url(self, registry: str = "ecr") -> str: + """ + Return the URL of the container registry. + + :param registry: the name of the container registry (e.g., `ecr`, `ghcr`) + :return: the URL of the container registry + """ + return self._data["container_registry_info"][registry] + + # Utils. + + @staticmethod + def _get_repo_config_file() -> str: + """ + Return the absolute path to `repo_config.yml` that should be used. + + The `repo_config.yml` is determined based on an overriding env var or + based on the root of the Git path. + """ + env_var = "CSFY_REPO_CONFIG_PATH" + file_path = _get_env_var(env_var, abort_on_missing=False) + if file_path: + _LOG.warning( + "Using value '%s' for %s from env var", file_path, env_var + ) + else: + # client_root = _find_git_root() + # We cannot use git root here because the config file doesn't always + # reside in the root of the repo (e.g., it can be in subdir such as + # //cmamp/ck.infra for runnable dir). + file_path = _find_config_file("repo_config.yaml") + file_path = os.path.abspath(file_path) + _LOG.debug("Reading file_name='%s'", file_path) + # Check if path exists. + # We can't use helpers since it creates circular import. + if not os.path.exists(file_path): + raise FileNotFoundError(f"File '{file_path}' doesn't exist") + return file_path + + +_REPO_CONFIG = None + + +def get_repo_config() -> RepoConfig: + """ + Return the repo config object. + """ + global _REPO_CONFIG + if _REPO_CONFIG is None: + _REPO_CONFIG = RepoConfig.from_file() + return _REPO_CONFIG diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py new file mode 100644 index 000000000..cd24fecf1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py @@ -0,0 +1,83 @@ +""" +Import as: + +import helpers.stage_linked_file as hstlifil +""" + +import argparse +import logging +import os +import shutil +from typing import List + +_LOG = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def find_symlinks(dst_dir: str) -> List[str]: + """ + Find all symbolic links in the destination directory. + + :param dst_dir: Directory to search for symbolic links. + :return: List of paths to symbolic links. + """ + symlinks = [] + for root, _, files in os.walk(dst_dir): + for file in files: + file_path = os.path.join(root, file) + if os.path.islink(file_path): + symlinks.append(file_path) + return symlinks + + +def stage_links(symlinks: List[str]) -> None: + """ + Replace symbolic links with writable copies of the linked files. + + :param symlinks: List of symbolic links to replace. + """ + for link in symlinks: + # Resolve the original file the symlink points to. + target_file = os.readlink(link) + if not os.path.exists(target_file): + _LOG.warning( + f"Warning: Target file does not exist for link {link} -> {target_file}" + ) + continue + # Replace the symlink with a writable copy of the target file. + try: + os.remove(link) + # Copy file to the symlink location. + shutil.copy2(target_file, link) + # Make the file writable. + os.chmod(link, 0o644) + _LOG.info("Staged: %s -> %s", link, target_file) + except Exception as e: + _LOG.error("Error staging link %s: %s", link, e) + + +def main(): + parser = argparse.ArgumentParser( + description="Stage symbolic links for modification." + ) + parser.add_argument( + "--dst_dir", required=True, help="Destination directory." + ) + args = parser.parse_args() + symlinks = find_symlinks(args.dst_dir) + if not symlinks: + _LOG.info("No symbolic links found to stage.") + return + stage_links(symlinks) + _LOG.info("Staged %s files for modification.", len(symlinks)) + + +if __name__ == "__main__": + main() + +""" +Usage + + - python3 stage_linked_file.py --dst_dir /path/to/dst + +""" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py new file mode 100644 index 000000000..27344070d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py @@ -0,0 +1,30 @@ +""" +Import as: + +import helpers.telegram_notify.config as htenocon +""" + +import getpass +import os +from typing import Tuple + +import helpers.hdbg as hdbg + +NOTIFY_JUPYTER_TOKEN = os.environ["CSFY_TELEGRAM_TOKEN"] + + +def get_info() -> Tuple[str, str]: + user = getpass.getuser() + # telegram_token is the token of your bot + # - You can use @NotifyJupyterBot, its token is + # '***REMOVED***' + # chat_id: To get it, start messaging with the bot. Then go to + # https://api.telegram.org/bot/getUpdates and get your chat id. + # (If you are using @NotifyJupyterBot, go to + # https://api.telegram.org/bot***REMOVED***/getUpdates ) + if user in ("saggese", "gsaggese", "root"): + telegram_token = NOTIFY_JUPYTER_TOKEN + chat_id = "967103049" + else: + hdbg.dfatal(f"User `{user}` is not in the config.py") + return telegram_token, chat_id diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py new file mode 100644 index 000000000..e90c3968d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +""" +Import as: + +import helpers.telegram_notify.get_chat_id as htngchid +""" + +import argparse +import json +import logging +from typing import Dict, cast + +import requests + +import helpers.telegram_notify.config as htenocon +import helpers.telegram_notify.telegram_notify as htnoteno + +_LOG = logging.getLogger(__name__) +_LOG.setLevel(logging.INFO) + + +def _get_updates_dict(token: str) -> dict: + updates_cont = requests.post( + f"https://api.telegram.org/bot{token}/getUpdates" + ).content + updates_dict = json.loads(updates_cont) + assert updates_dict["ok"], updates_dict + return cast(dict, updates_dict) + + +def _get_username_id(updates_dict: dict) -> Dict[str, str]: + return { + result["message"]["from"]["username"]: result["message"]["from"]["id"] + for result in updates_dict["result"] + } + + +def _get_chat_id_updates_dict(username: str, updates_dict: dict) -> str: + username_id = _get_username_id(updates_dict) + assert username in username_id.keys(), ( + "Either the username is wrong or you" + " have not sent a message to the bot yet" + ) + return username_id[username] + + +def send_chat_id(token: str, username: str) -> str: + updates_dict = _get_updates_dict(token) + chat_id = _get_chat_id_updates_dict(username, updates_dict) + htnoteno.TelegramNotify.send( + text=f"Your chat id is: {chat_id}", token=token, chat_id=chat_id + ) + return chat_id + + +def _main() -> None: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("--username", required=True, action="store", type=str) + parser.add_argument("--token", required=False, action="store", type=str) + args = parser.parse_args() + username = args.username + if args.token: + token_ = args.token + else: + _LOG.info("Using default token for NotifyJupyterBot.") + token_ = htenocon.NOTIFY_JUPYTER_TOKEN + chat_id_ = send_chat_id(token_, username) + print(f"Your chat id is: {chat_id_}") + + +if __name__ == "__main__": + _main() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py new file mode 100644 index 000000000..6e0e3eb16 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py @@ -0,0 +1,155 @@ +""" +Import as: + +import helpers.telegram_notify.telegram_notify as htnoteno +""" + +import json +import logging +import os +import os.path +import re +import sys +from typing import Optional + +import requests + +# Alternative that works for both Python 2 and 3: +import requests.compat as rcompa + +import helpers.telegram_notify.config as htenocon + +_LOG = logging.getLogger(__name__) + + +def _get_launcher_name() -> str: + """ + Return the name of jupyter notebook or path to python file you are running. + """ + import ipykernel + + try: # Python 3 (see Edit2 below for why this may not work in Python 2) + import notebook.notebookapp as ihnb + except ImportError: # Python 2 + import warnings + + import IPython.utils.shimmodule as iush + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=iush.ShimWarning) + import IPython.html.notebookapp as ihnb + launcher = sys.argv[0] + if os.path.basename(launcher) == "ipykernel_launcher.py": + match = re.search( + "kernel-(.*).json", ipykernel.connect.get_connection_file() + ) + if match is None: + return launcher + kernel_id = match.group(1) + servers = ihnb.list_running_servers() + for ss in servers: + response = requests.get( + rcompa.urljoin(ss["url"], "api/sessions"), # type: ignore + params={"token": ss.get("token", "")}, + ) + for nn in json.loads(response.text): + if nn["kernel"]["id"] == kernel_id: + relative_path = nn["notebook"]["path"] + return str(os.path.basename(relative_path)) + return launcher + + +# ############################################################################# +# TelegramNotebookNotify +# ############################################################################# + + +class TelegramNotebookNotify: + """ + Sends notifications. + """ + + def __init__(self) -> None: + self.launcher_name = _get_launcher_name() + self.token, self.chat_id = htenocon.get_info() + + @staticmethod + def send( + text: str, token: Optional[str], chat_id: Optional[str] + ) -> Optional[bytes]: + if chat_id is None or token is None: + _LOG.warning( + "Not sending notifications. To send notifications, both " + "`chat_id` and `token` need to be specified. Go to README.md" + "for more information." + ) + return None + payload = {"chat_id": chat_id, "text": text, "parse_mode": "HTML"} + return requests.post( + f"https://api.telegram.org/bot{token}/sendMessage", + data=payload, + ).content + + def notify(self, message: str) -> None: + msg = f"
{self.launcher_name}
: {message}" + self.send(msg, self.token, self.chat_id) + + +# ############################################################################# +# _RequestsHandler +# ############################################################################# + + +class _RequestsHandler(logging.Handler): + def emit(self, record: logging.LogRecord) -> bytes: # type: ignore + token, chat_id = htenocon.get_info() + log_entry = self.format(record) + payload = {"chat_id": chat_id, "text": log_entry, "parse_mode": "HTML"} + return requests.post( + f"https://api.telegram.org/bot{token}/sendMessage", + data=payload, + ).content + + +# ############################################################################# +# _LogFormatter +# ############################################################################# + + +class _LogFormatter(logging.Formatter): + def format(self, record: logging.LogRecord) -> str: + launcher_name = _get_launcher_name() + return f"
{launcher_name}
: {record.msg}" + + +def init_tglogger(log_level: int = logging.DEBUG) -> None: + """ + Send notifications using logging. + """ + _tg_log = logging.getLogger("telegram_notify") + _tg_log.setLevel(log_level) + handler = _RequestsHandler() + formatter = _LogFormatter() + handler.setFormatter(formatter) + _tg_log.handlers = [handler] + + +# ############################################################################# +# TelegramNotify +# ############################################################################# + + +class TelegramNotify: + """ + Send notifications. + """ + + def __init__(self) -> None: + self.token, self.chat_id = htenocon.get_info() + + def send(self, text: str) -> Optional[bytes]: + payload = {"chat_id": self.chat_id, "text": text, "parse_mode": "HTML"} + return requests.post( + f"https://api.telegram.org/bot{self.token}/sendMessage", + data=payload, + ).content diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt new file mode 100644 index 000000000..3135b8c8e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt new file mode 100644 index 000000000..3135b8c8e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt new file mode 100644 index 000000000..3135b8c8e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt new file mode 100644 index 000000000..2f396a270 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt new file mode 100644 index 000000000..2f396a270 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt new file mode 100644 index 000000000..2f396a270 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt @@ -0,0 +1 @@ +func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt new file mode 100644 index 000000000..00529190c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt @@ -0,0 +1,3 @@ + a b c +0 0 2 2 +1 3 4 5 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt new file mode 100644 index 000000000..95d09f2b1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt new file mode 100644 index 000000000..b68450ebb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt @@ -0,0 +1 @@ +hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt new file mode 100644 index 000000000..b68450ebb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt @@ -0,0 +1 @@ +hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt new file mode 100644 index 000000000..b68450ebb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt @@ -0,0 +1 @@ +hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt new file mode 100644 index 000000000..efbdde823 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt @@ -0,0 +1,31 @@ +original shape=(7, 2) +Head: +{ + "0":{ + "col_1":1.0, + "col_2":1 + }, + "1":{ + "col_1":2.0, + "col_2":2 + }, + "2":{ + "col_1":3.0, + "col_2":3 + } +} +Tail: +{ + "4":{ + "col_1":5.0, + "col_2":5 + }, + "5":{ + "col_1":6.0, + "col_2":6 + }, + "6":{ + "col_1":7.0, + "col_2":7 + } +} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt new file mode 100644 index 000000000..cab20a014 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"421470c7-7797-4a94-b584-eb83ff2de88a", + "col_2":1 + }, + "1":{ + "col_1":"22cde381-1782-43dc-8c7a-8712cbdf5ee1", + "col_2":2 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt new file mode 100644 index 000000000..4a6c9e821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"2020-01-01T00:00:00", + "col_2":1.0 + }, + "1":{ + "col_1":"2020-05-12T00:00:00", + "col_2":2.0 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt new file mode 100644 index 000000000..4a6c9e821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"2020-01-01T00:00:00", + "col_2":1.0 + }, + "1":{ + "col_1":"2020-05-12T00:00:00", + "col_2":2.0 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt new file mode 100644 index 000000000..3c50fde31 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt @@ -0,0 +1,31 @@ +original shape=(7, 2) +Head: +{ + "0":{ + "col_1":1.0, + "col_2":1 + }, + "1":{ + "col_1":2.0, + "col_2":2 + }, + "2":{ + "col_1":3.0, + "col_2":3 + } +} +Tail: +{ + "4":{ + "col_1":5.0, + "col_2":5 + }, + "5":{ + "col_1":6.0, + "col_2":6 + }, + "6":{ + "col_1":7.0, + "col_2":7 + } +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt new file mode 100644 index 000000000..4a6c9e821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"2020-01-01T00:00:00", + "col_2":1.0 + }, + "1":{ + "col_1":"2020-05-12T00:00:00", + "col_2":2.0 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt new file mode 100644 index 000000000..4a6c9e821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"2020-01-01T00:00:00", + "col_2":1.0 + }, + "1":{ + "col_1":"2020-05-12T00:00:00", + "col_2":2.0 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt new file mode 100644 index 000000000..cab20a014 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt @@ -0,0 +1,13 @@ +original shape=(2, 2) +Head: +{ + "0":{ + "col_1":"421470c7-7797-4a94-b584-eb83ff2de88a", + "col_2":1 + }, + "1":{ + "col_1":"22cde381-1782-43dc-8c7a-8712cbdf5ee1", + "col_2":2 + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt new file mode 100644 index 000000000..9c8c2a07e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt @@ -0,0 +1,4 @@ +## docker_images_ls_repo: +## docker_login: +eval $(aws ecr get-login --profile am --no-include-email --region us-east-1) +docker image ls 665840871993.dkr.ecr.us-east-1.amazonaws.com diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt new file mode 100644 index 000000000..e2df28b1f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt @@ -0,0 +1,3 @@ +## docker_kill: all=True +docker ps -a +docker rm -f $(docker ps -a -q) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt new file mode 100644 index 000000000..44a4748dc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt @@ -0,0 +1,3 @@ +## docker_kill: all=False +docker ps -l +docker rm -f $(docker ps -l -q) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt new file mode 100644 index 000000000..613a41c2d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt @@ -0,0 +1 @@ +docker ps --format='table {{.ID}}\t{{.Label "user"}}\t{{.Image}}\t{{.Command}}\t{{.RunningFor}}\t{{.Status}}\t{{.Ports}}\t{{.Label "com.docker.compose.service"}}' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt new file mode 100644 index 000000000..0c262d7ea --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt @@ -0,0 +1,3 @@ +report_memory_usage=False report_cpu_usage=False +## git_clean: dry_run=False +find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt new file mode 100644 index 000000000..e8a2a8473 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt @@ -0,0 +1,3 @@ +report_memory_usage=False report_cpu_usage=False +## git_fetch_master: +git fetch origin master:master diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt new file mode 100644 index 000000000..36f22574b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt @@ -0,0 +1,4 @@ +report_memory_usage=False report_cpu_usage=False +## git_pull: +git pull --autostash +git submodule foreach 'git pull --autostash' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt new file mode 100644 index 000000000..06d15ab26 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt @@ -0,0 +1,3 @@ +## print_setup: +ECR_BASE_PATH=665840871993.dkr.ecr.us-east-1.amazonaws.com +BASE_IMAGE=amp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt new file mode 100644 index 000000000..265ef5fcf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt @@ -0,0 +1,2 @@ +call('eval $(aws ecr get-login --no-include-email --region us-east-1)') +call('docker image ls 665840871993.dkr.ecr.us-east-1.amazonaws.com') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt new file mode 100644 index 000000000..202366437 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt @@ -0,0 +1,2 @@ +call('docker ps -a') +call('docker rm -f $(docker ps -a -q)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt new file mode 100644 index 000000000..4ee19d730 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt @@ -0,0 +1,2 @@ +call('docker ps -l') +call('docker rm -f $(docker ps -l -q)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt new file mode 100644 index 000000000..c8b46747d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt @@ -0,0 +1 @@ +call('eval $(aws ecr get-login --profile am --no-include-email --region us-east-1)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt new file mode 100644 index 000000000..614c9318f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt @@ -0,0 +1 @@ +call('docker ps --format=\'table {{.ID}}\\t{{.Label "user"}}\\t{{.Image}}\\t{{.Command}}\\t{{.RunningFor}}\\t{{.Status}}\\t{{.Ports}}\\t{{.Label "com.docker.compose.service"}}\'') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt new file mode 100644 index 000000000..029e8a64f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt @@ -0,0 +1,3 @@ +call('eval $(aws ecr get-login --no-include-email --region us-east-1)') +call('docker pull 665840871993.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev', pty=True) +call('docker pull 665840871993.dkr.ecr.us-east-1.amazonaws.com/helpers:prod', pty=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt new file mode 100644 index 000000000..7d238de7e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt @@ -0,0 +1 @@ +call("docker stats --no-stream --format='table {{.ID}}\\t{{.Name}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}\\t{{.PIDs}}'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt new file mode 100644 index 000000000..dc7c8a671 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt @@ -0,0 +1 @@ +call('gh pr create --repo alphamatic/amp --draft --title "AmpTask1310_Implement_RH1E" --body ""') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt new file mode 100644 index 000000000..1aa1034a0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt @@ -0,0 +1 @@ +call('gh pr create --repo github.com/alphamatic/amp --draft --title "test" --body "\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt new file mode 100644 index 000000000..d93250129 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt @@ -0,0 +1 @@ +call('gh pr create --repo github.com/alphamatic/amp --draft --title "test" --body "hello_world\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt new file mode 100644 index 000000000..a7010f356 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt @@ -0,0 +1 @@ +call('gh pr create --repo github.com/alphamatic/amp --title "test" --body "\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt new file mode 100644 index 000000000..7e38db5a7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt @@ -0,0 +1,2 @@ +call('gh workflow run fast_tests.yml --ref AmpTask1310_Implement_RH1E') +call('gh workflow run slow_tests.yml --ref AmpTask1310_Implement_RH1E') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt new file mode 100644 index 000000000..e79742c64 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt @@ -0,0 +1,3 @@ +call('git pull --autostash') +call('git checkout -b test') +call('git push --set-upstream origin test') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt new file mode 100644 index 000000000..25c178bb7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt @@ -0,0 +1,3 @@ +call('git pull --autostash --rebase') +call('git checkout -b AmpTask123_test') +call('git push --set-upstream origin AmpTask123_test') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt new file mode 100644 index 000000000..72eb80ddc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt @@ -0,0 +1,3 @@ +call('git pull --autostash --rebase') +call('git checkout -b CmampTask1_fix_amp_tmux_session_script') +call('git push --set-upstream origin CmampTask1_fix_amp_tmux_session_script') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt new file mode 100644 index 000000000..b7c58a3d2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt @@ -0,0 +1,3 @@ +call('git clean -fd >/dev/null 2>&1') +call("git submodule foreach 'git clean -fd >/dev/null 2>&1'") +call("find . -name '*\\.pyc' -o -name '*\\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' -o -name '.*_cache' -o -name 'htmlcov' | sort | xargs rm -rf") \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt new file mode 100644 index 000000000..b7c58a3d2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt @@ -0,0 +1,3 @@ +call('git clean -fd >/dev/null 2>&1') +call("git submodule foreach 'git clean -fd >/dev/null 2>&1'") +call("find . -name '*\\.pyc' -o -name '*\\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' -o -name '.*_cache' -o -name 'htmlcov' | sort | xargs rm -rf") \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt new file mode 100644 index 000000000..0241acc2e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt @@ -0,0 +1,2 @@ +call('git fetch origin master:master') +call("git submodule foreach 'git fetch origin master:master'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt new file mode 100644 index 000000000..d9d3fc510 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt @@ -0,0 +1,3 @@ +call('git fetch origin master:master') +call("git submodule foreach 'git fetch origin master:master'") +call('git merge master') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt new file mode 100644 index 000000000..78883f1ba --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt @@ -0,0 +1,2 @@ +call('git pull --autostash') +call("git submodule foreach 'git pull --autostash'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt new file mode 100644 index 000000000..70a06c388 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt @@ -0,0 +1 @@ +call('./linters/base.py --files ./helpers/lib_tasks.py ./helpers/test/TestDryRunTasks2.test_git_branch_create/output/test.txt ./helpers/test/TestDryRunTasks2.test_git_merge_master/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint1/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint2/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint3/output/test.txt ./helpers/test/test_lib_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt new file mode 100644 index 000000000..28b088e72 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt @@ -0,0 +1 @@ +call('./linters/base.py --files core/dataflow/builders.py core/dataflow/core.py core/dataflow/dataflow_design.md core/dataflow/runners.py core/dataflow/visualization.py core/test/test_core.py dev_scripts/client_setup/build.sh devops/docker_build/install_packages.sh devops/docker_build/install_requirements.sh devops/docker_build/poetry.lock devops/docker_build/pyproject.toml documentation/general/workflows.txt helpers/datetime_.py helpers/git.py helpers/lib_tasks.py helpers/test/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt helpers/test/TestDryRunTasks1.test_docker_kill_all/output/test.txt helpers/test/TestDryRunTasks1.test_docker_kill_last/output/test.txt helpers/test/TestDryRunTasks1.test_docker_ps/output/test.txt helpers/test/TestDryRunTasks1.test_docker_stats/output/test.txt helpers/test/TestDryRunTasks1.test_git_clean/output/test.txt helpers/test/TestDryRunTasks1.test_git_pull/output/test.txt helpers/test/TestDryRunTasks1.test_git_pull_master/output/test.txt helpers/test/TestDryRunTasks1.test_print_setup/output/test.txt helpers/test/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt helpers/test/TestDryRunTasks2.test_docker_kill_all/output/test.txt helpers/test/TestDryRunTasks2.test_docker_kill_last/output/test.txt helpers/test/TestDryRunTasks2.test_docker_login/output/test.txt helpers/test/TestDryRunTasks2.test_docker_ps/output/test.txt helpers/test/TestDryRunTasks2.test_docker_pull/output/test.txt helpers/test/TestDryRunTasks2.test_docker_stats/output/test.txt helpers/test/TestDryRunTasks2.test_gh_create_pr/output/test.txt helpers/test/TestDryRunTasks2.test_gh_issue_title/output/test.txt helpers/test/TestDryRunTasks2.test_gh_workflow_list/output/test.txt helpers/test/TestDryRunTasks2.test_gh_workflow_run/output/test.txt helpers/test/TestDryRunTasks2.test_git_branch_files/output/test.txt helpers/test/TestDryRunTasks2.test_git_clean/output/test.txt helpers/test/TestDryRunTasks2.test_git_clean2/output/test.txt helpers/test/TestDryRunTasks2.test_git_pull/output/test.txt helpers/test/TestDryRunTasks2.test_git_pull_master/output/test.txt helpers/test/TestDryRunTasks2.test_print_setup/output/test.txt helpers/test/test_cache.py helpers/test/test_lib_tasks.py im/kibot/data/load/kibot_s3_data_loader.py im/kibot/data/load/test/test_s3_data_loader.py tasks.py test/test_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt new file mode 100644 index 000000000..9fac068a3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt @@ -0,0 +1 @@ +call('./linters/base.py --files /app/amp/helpers/test/test_lib_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt new file mode 100644 index 000000000..ac6627a2e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt @@ -0,0 +1,101 @@ +,Name,Frequency,Country,Unit,Start Date,End Date,Commodity,Contracts,Business Category,is_alive,source_code,dataset_code,series_code,original_name,extracted_frequency,is_downloaded,WIND Commodity,Update,id_is_broken +0,Coal and coke CO2 emissions – Aruba – million metric tonnes carbon dioxide,Annual,Aruba,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ABW-MMTCD.A,"Coal and coke CO2 emissions, Aruba, Annual — million metric tonnes carbon dioxide",Annual,success,,, +1,Coal and coke CO2 emissions – Albania – million metric tonnes carbon dioxide,Annual,Albania,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ALB-MMTCD.A,"Coal and coke CO2 emissions, Albania, Annual — million metric tonnes carbon dioxide",Annual,success,,, +2,Coal and coke CO2 emissions – United Arab Emirates – million metric tonnes carbon dioxide,Annual,United Arab Emirates,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARE-MMTCD.A,"Coal and coke CO2 emissions, United Arab Emirates, Annual — million metric tonnes carbon dioxide",Annual,success,,, +3,Coal and coke CO2 emissions – Argentina – million metric tonnes carbon dioxide,Annual,Argentina,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARG-MMTCD.A,"Coal and coke CO2 emissions, Argentina, Annual — million metric tonnes carbon dioxide",Annual,success,,, +4,Coal and coke CO2 emissions – Armenia – million metric tonnes carbon dioxide,Annual,Armenia,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARM-MMTCD.A,"Coal and coke CO2 emissions, Armenia, Annual — million metric tonnes carbon dioxide",Annual,success,,, +5,Germany: Term Structure of Interest Rate on Listed Federal Securities: 1,Daily,Germany,%,1997-08-07,2019-12-19,,,Upstream,True,WIND,Deutsche Bundesbank,G0008063,,,success,Gold,2019-12-20,False +6,Germany: Term Structure of Interest Rate on Listed Federal Securities: 10,Daily,Germany,%,1997-08-07,2019-12-19,,,Upstream,True,WIND,Deutsche Bundesbank,SG000S6E,,,not_attempted,Gold,2019-12-20,True +7,France: Treasury Bills Reference Rate: 1Y,Daily,France,%,1989-01-03,2019-12-19,,,Upstream,True,WIND,Banque de France,G0008146,,,success,Gold,2019-12-20,False +8,France: Treasury Bills Reference Rate: 10Y,Daily,France,%,1987-01-02,2019-12-19,,,Upstream,True,WIND,Banque de France,G1400003,,,success,Gold,2019-12-20,False +9,Spain: Government Securities Yields: 12M,Daily,Spain,%,1987-07-01,2019-12-19,,,Upstream,True,WIND,Bank of Spain,G2700068,,,success,Gold,2019-12-20,False +10,Spain: Government Securities Yields: 10Y,Daily,Spain,%,1989-07-18,2019-12-19,,,Upstream,True,WIND,Bank of Spain,G2700075,,,success,Gold,2019-12-20,False +11,Italy: Government Securities Yields: 3Y_,Daily,Italy,%,1989-07-24,2019-12-19,,,Upstream,True,WIND,Bank of Italy,G1700018,,,success,Gold,2019-12-20,False +12,Italy: Government Securities Yields: 10,Daily,Italy,%,1991-03-05,2019-12-19,,,Upstream,True,WIND,Bank of Italy,G1700020,,,success,Gold,2019-12-20,False +13,Futures Closing Price (Active Contract): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M0066358,,,success,Gold,2019-12-20,False +14,Futures Settlement Price (Continuous 3M): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0068142,,,success,Gold,2019-12-20,False +15,Futures Closing Price (Continuous): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0147027,,,success,Gold,2019-12-20,False +16,Futures Closing Price (Continuous 3M): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0031868,,,success,Gold,2019-12-20,False +17,Futures Settlement Price (Active Contract): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0181376,,,success,Gold,2019-12-20,False +18,Futures Trading Volume: Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6409,,,not_attempted,Gold,2019-12-20,True +19,Futures Turnover: Gold,Daily,China,"CNY, in 10,000s",2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6410,,,not_attempted,Gold,2019-12-20,True +20,Futures Position: Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6411,,,not_attempted,Gold,2019-12-20,True +21,Futures Trading Volume (Active Contract): Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M0096581,,,success,Gold,2019-12-20,False +22,Futures Position (Active Contract): Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00096614,,,not_attempted,Gold,2019-12-20,True +23,Closing Stock on Warrant: Gold,Daily,China,kg,2008-01-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0049497,,,success,Gold,2019-12-20,False +24,Duplicate) Closing Stock on Warrant: Gold: Total,Daily,China,kg,2008-01-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0049505,,,success,Gold,2019-12-20,False +25,Futures Closing Price (Continuous): COMEX Gold,Daily,United States,USD/ounce,1975-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0069669,,,success,Gold,2019-12-20,False +26,Futures Closing Price (Active Contract}: COMEX Gold),Daily,United States,USD/ounce,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0180903,,,success,Gold,2019-12-20,False +27,Futures Closing Price (Continuous): COMEX Mini Gold,Daily,United States,USD/ounce,2010-12-06,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0147014,,,success,Gold,2019-12-19,False +28,Futures Settlement Price (Active Contract}: COMEX Gold),Daily,United States,USD/ounce,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G0945,,,not_attempted,Gold,2019-12-20,True +29,Futures Closing Price (Active Contract: COMEX Mini Gold),Daily,United States,USD/ounce,2013-01-03,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G0906,,,not_attempted,Gold,2019-12-20,True +30,Futures Settlement Price (Active Contract}: COMEX Mini Gold),Daily,United States,USD/ounce,2013-01-03,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G094E,,,not_attempted,Gold,2019-12-20,True +31,Futures Trading Volume (Active Contract): COMEX Gold,Daily,United States,lots,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,M00096642,,,not_attempted,Gold,2019-12-20,True +32,Futures Position (Active Contract): COMEX Gold,Daily,United States,lots,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,M0096645,,,success,Gold,2019-12-20,False +33,COMEX: Silver: Inventory,Daily,United States,ozt,1992-09-01,2019-12-19,Silver,"COMEX:6Q,COMEX:QI,COMEX:SI,COMEX:SIL,COMEX:SIT,COMEX:SSP,COMEX:SV,COMEX:XY,COMEX:YV,DGCX:DS,ICEUS:YI,ICEUS:ZI,IFUS:HIO,IFUS:YI,IFUS:ZI,LME:AG,MCX:SILVER,SHFE:AG,TCE:12",Midstream,True,WIND,CME,S0114145,,,success,Gold,2019-12-20,False +34,SGE Gold: Closing Price: Au9995,Daily,China,yuan/g,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035818,,,success,Gold,2019-12-20,False +35,SGE Gold: Closing Price: Au9999,Daily,China,yuan/g,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035819,,,success,Gold,2019-12-20,False +36,SGE Gold: Closing Price: Au100G,Daily,China,yuan/g,2006-12-25,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035820,,,success,Gold,2019-12-20,False +37,SGE Gold: Closing Price: AuT+D,Daily,China,yuan/g,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035821,,,success,Gold,2019-12-20,False +38,SGE Gold: Settlement Price: Au (T+D),Daily,China,yuan/g,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0202645,,,success,Gold,2019-12-20,False +39,SGE Gold: Volume: Au9995,Daily,China,kg,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035824,,,success,Gold,2019-12-20,False +40,SGE Gold: Volume: Au9999,Daily,China,kg,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035825,,,success,Gold,2019-12-20,False +41,SGE Gold: Volume: Au100g,Daily,China,kg,2006-12-25,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035826,,,success,Gold,2019-12-20,False +42,SGE Gold: Volume: AuT+D,Daily,China,kg,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035827,,,success,Gold,2019-12-20,False +43,SGE Gold: Position: Au (T+D),Daily,China,kg,2008-08-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S5806075,,,success,Gold,2019-12-20,False +44,SGE Gold: Deferred Payment of Direction: Au (T+D),Daily,China,,2008-08-22,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0182163,,,success,Gold,2019-12-20,False +45,SGE Gold: Delivery Volume: Au (T+D),Daily,China,kg,2008-08-22,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0181748,,,success,Gold,2019-12-20,False +46,Loco London Gold: In USD,Daily,United Kingdom,USD/ounce,1968-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031645,,,success,Gold,2019-12-20,False +47,Loco Londen Gold: In EUR,Daily,United Kingdom,EUR/ounce,1999-01-04,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031646,,,success,Gold,2019-12-20,False +48,Loco Londen Gold: In GBP,Daily,United Kingdom,GBP/ounce,1968-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031647,,,success,Gold,2019-12-20,False +49,Closing Price: Paper Gold: Bank of China,Daily,China,yuan/g,2011-01-20,2014-10-31,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Company Website,S5806366,,,not_attempted,Gold,2014-11-03,False +50,Closing Price: Paper Gold: China Construction Bank,Daily,China,yuan/g,2011-01-20,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Company Website,S5806367,,,success,Gold,2019-12-20,False +51,Closing Price: Paper Gold: Industrial and Commercial Bank of China,Daily,China,yuan/g,2011-01-20,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Company Website,S5806365,,,success,Gold,2019-12-20,False +52,Price: Gold: 99.95,Daily,China,yuan/g,2007-01-04,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S5801701,,,success,Gold,2019-12-20,False +53,Price: Gold: 99.99,Daily,China,yuan/g,2007-01-04,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S5801702,,,success,Gold,2019-12-20,False +54,SPDR Gold Shares: Total Net Asset Value Qunces in the Trust,Daily,United States,ozt,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105520,,,success,Gold,2019-12-20,False +55,SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United States,tons,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105521,,,success,Gold,2019-12-20,False +56,SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United States,USD,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105522,,,success,Gold,2019-12-20,False +57,iShares: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United States,USD,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807688,,,success,Gold,2019-12-20,False +58,iShares: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United States,ozt,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807690,,,success,Gold,2019-12-20,False +59,iShares: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United States,tons,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807691,,,success,Gold,2019-12-20,False +60,GBS: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,USD,2004-04-01,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807692,,,success,Gold,2019-12-20,False +61,GBS: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807693,,,success,Gold,2019-12-20,False +62,GBS: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807694,,,success,Gold,2019-12-20,False +63,PHAU: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,USD,2007-04-25,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807695,,,success,Gold,2019-12-20,False +64,PHAU: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-04-24,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807696,,,success,Gold,2019-12-20,False +65,PHAU: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-04-24,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807697,,,success,Gold,2019-12-20,False +66,SGBS: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,Switzerland,USD,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808213,,,success,Gold,2019-12-20,False +67,SGBS: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,Switzerland,ozt,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808214,,,success,Gold,2019-12-20,False +68,SGBS: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,Switzerland,tons,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808215,,,success,Gold,2019-12-20,False +69,GOLD: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,AUD,2004-01-09,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807698,,,success,Gold,2019-12-20,False +70,GOLD: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807699,,,success,Gold,2019-12-20,False +71,GOLD: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807700,,,success,Gold,2019-12-20,False +72,SGOL: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,Switzerland,USD,2009-09-04,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807701,,,success,Gold,2019-10-31,False +73,SGOL: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,Switzerland,ozt,2009-09-02,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807702,,,success,Gold,2019-10-31,False +74,SGOL: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,Switzerland,tons,2009-09-02,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807703,,,success,Gold,2019-10-31,False +75,Price: Chinese Major Ports FOB: Silicon: 98.5,Daily,United Kingdom,USD/ton,2006-06-02,2014-05-30,,,Upstream,False,WIND,According to the Press Finishing,S0149035,,,not_attempted,,2014-06-03,False +76,"Price: Silicon Powder: -200 Mesh,-300 Mesh: Shanghai-made",Daily,China,yuan/kg,2005-01-04,2019-12-24,,,Upstream,True,WIND,According to the Press Finishing,S5801759,,,success,,2019-12-24,False +77,Market Price: Secondary Metallurgical Coke: National,Daily,China,yuan/ton,2013-12-31,2019-12-20,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,National Bureau of Statistics of China,S5914487,,,success,,2019-12-24,False +78,"Ex-factory Price (Tax-inclusive): Metallurgical Coke Grade 3 (A15%,0.6%): Yunng",Daily,China,yuan/ton,2004-10-22,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S0033511,,,success,,2019-12-24,False +79,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Rizhao,Daily,China,yuan/ton,2012-03-08,2015-02-11,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,False,WIND,Wind,S5118432,,,success,,2015-02-11,False +80,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Zibo,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118277,,,success,,2019-12-24,False +81,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Yinchuan,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118276,,,success,,2019-12-24,False +82,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Xinjiang County,Daily,China,yuan/ton,2012-03-05,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118275,,,success,,2019-12-24,False +83,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Xuzhou,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118273,,,success,,2019-12-24,False +84,Exit Price (Tax-inclusive): Secondary Metallurgical Coke: Tianjin,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118270,,,success,,2019-12-24,False +85,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Shuangyashan,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118267,,,success,,2019-12-24,False +86,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Shijiazhuang,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118266,,,success,,2019-12-24,False +87,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Shanghai,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118264,,,success,,2019-12-24,False +88,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Qigihar,Daily,China,yuan/ton,2011-09-09,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118263,,,success,,2019-12-24,False +89,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Panzhihua,Daily,China,yuan/ton,2010-04-13,2019-12-06,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118260,,,success,,2019-12-06,False +90,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Mudanjlang,Daily,China,yuan/ton,2011-09-01,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118259,,,success,,2019-12-24,False +91,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Lvliang,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118258,,,success,,2019-12-24,False +92,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Linyt,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118255,,,success,,2019-12-24,False +93,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Linfen,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118254,,,success,,2019-12-24,False +94,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Jinzhong,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118252,,,success,,2019-12-24,False +95,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Hancheng,Daily,China,yuan/ton,2012-03-06,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118247,,,success,,2019-12-24,False +96,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Fukang,Daily,China,yuan/ton,2012-03-05,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118246,,,success,,2019-12-24,False +97,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Fushun,Daily,China,yuan/ton,2011-09-09,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118245,,,success,,2019-12-24,False +98,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Ordos,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118243,,,success,,2019-12-24,False +99,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Anyang,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118241,,,success,,2019-12-24,False diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt new file mode 100644 index 000000000..2de8022c8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt @@ -0,0 +1,8 @@ + Name Frequency Country Unit Start Date End Date Commodity Contracts Business Category is_alive source_code dataset_code series_code original_name extracted_frequency is_downloaded WIND Commodity Update id_is_broken +5 Germany: Term Structure of Interest Rate on Listed Federal Securities: 1 Daily Germany % 1997-08-07 2019-12-19 NaN NaN Upstream True WIND Deutsche Bundesbank G0008063 NaN NaN success Gold 2019-12-20 False +7 France: Treasury Bills Reference Rate: 1Y Daily France % 1989-01-03 2019-12-19 NaN NaN Upstream True WIND Banque de France G0008146 NaN NaN success Gold 2019-12-20 False +8 France: Treasury Bills Reference Rate: 10Y Daily France % 1987-01-02 2019-12-19 NaN NaN Upstream True WIND Banque de France G1400003 NaN NaN success Gold 2019-12-20 False +... +97 Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Fushun Daily China yuan/ton 2011-09-09 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118245 NaN NaN success NaN 2019-12-24 False +98 Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Ordos Daily China yuan/ton 2010-04-20 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118243 NaN NaN success NaN 2019-12-24 False +99 Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Anyang Daily China yuan/ton 2010-04-13 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118241 NaN NaN success NaN 2019-12-24 False diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt new file mode 100644 index 000000000..8c6bdf3cf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt @@ -0,0 +1,18 @@ +def func1(): + """ + First function. + + ``` + foo + ``` + """ + + +def func2(): + """ + Second function. + + ``` + foo + ``` + """ \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt new file mode 100644 index 000000000..3f4d616bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt @@ -0,0 +1,52 @@ +# Test created for __main__.plbck_sum. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestPlbckSum(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 0 + b = 1 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test2(self) -> None: + # Define input variables. + a = 1 + b = 2 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test3(self) -> None: + # Define input variables. + a = 2 + b = 3 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test4(self) -> None: + # Define input variables. + a = 3 + b = 4 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt new file mode 100644 index 000000000..3f4d616bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt @@ -0,0 +1,52 @@ +# Test created for __main__.plbck_sum. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestPlbckSum(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 0 + b = 1 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test2(self) -> None: + # Define input variables. + a = 1 + b = 2 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test3(self) -> None: + # Define input variables. + a = 2 + b = 3 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test4(self) -> None: + # Define input variables. + a = 3 + b = 4 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt new file mode 100644 index 000000000..1a2ceab1a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt @@ -0,0 +1,30 @@ +# Test created for __main__.plbck_sum. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestPlbckSum(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 0 + b = 1 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) + +class TestPlbckSum(hunitest.TestCase): + def test2(self) -> None: + # Define input variables. + a = 1 + b = 2 + # Call function to test. + actual = plbck_sum(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt new file mode 100644 index 000000000..b5439e39d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 3 + b = 2 + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = 5 + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt new file mode 100644 index 000000000..6631e9e27 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt @@ -0,0 +1,19 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = {"1": 2} + b = {"3": 4} + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt new file mode 100644 index 000000000..80e85048a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}) + b = pd.DataFrame.from_dict({'Price': [1, 1, 1, 1]}) + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = hpandas.df_to_str(actual, num_rows=None) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt new file mode 100644 index 000000000..1d91a4a88 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = {"1": ["a", 2]} + b = {"3": pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}), "4": {"5": 6}} + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = {"1": ["a", 2], "3": pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}), "4": {"5": 6}} + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt new file mode 100644 index 000000000..badcab6f7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = pd.Series(data=[10, 20, 15], index=RangeIndex(start=0, stop=3, step=1), name="N Numbers", dtype=int64) + b = pd.Series(data=[10.0, 0.0, 5.5], index=RangeIndex(start=0, stop=3, step=1), name="Z Numbers", dtype=float64) + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = hpandas.df_to_str(actual, num_rows=None) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt new file mode 100644 index 000000000..6b92491e1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt @@ -0,0 +1,23 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = pd.Series(data=[10, 20, 15], index=RangeIndex(start=0, stop=3, step=1), name="N Numbers", dtype=int64) + b = pd.Series(data=[10.0, 0.0, 5.5], index=RangeIndex(start=0, stop=3, step=1), name="Z Numbers", dtype=float64) + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = pd.Series(data=[20.0, 20.0, 20.5], index=RangeIndex(start=0, stop=3, step=1), name="None", dtype=float64) + expected = jsonpickle.decode(expected) + actual = hpandas.df_to_str(actual, num_rows=None) + expected = hpandas.df_to_str(expected, num_rows=None) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt new file mode 100644 index 000000000..403295821 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt @@ -0,0 +1,19 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = cconfig.Config.from_python("Config({'meta': 'meta value 1', 'list': [1, 2]})") + b = cconfig.Config.from_python("Config({'meta': 'meta value 2'})") + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt new file mode 100644 index 000000000..5a0f6c938 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt @@ -0,0 +1,21 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = cconfig.Config.from_python("Config({'meta': 'meta value 1', 'list': [1, 2]})") + b = cconfig.Config.from_python("Config({'meta': 'meta value 2'})") + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = cconfig.Config.from_python("Config({'meta': 'meta value 2', 'list': [1, 2]})") + expected = jsonpickle.decode(expected) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt new file mode 100644 index 000000000..1884fe5bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt @@ -0,0 +1,15 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string_none. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckStringNone(hunitest.TestCase): + def test1(self) -> None: + # Call function to test. + actual = get_result_check_string_none() + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt new file mode 100644 index 000000000..710587bb8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt @@ -0,0 +1,17 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal_none. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqualNone(hunitest.TestCase): + def test1(self) -> None: + # Call function to test. + actual = get_result_assert_equal_none() + # Define expected output. + expected = "Some string." + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt new file mode 100644 index 000000000..40dc558c5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = "test" + b = "case" + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = "testcase" + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt new file mode 100644 index 000000000..68b93d84d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = [1, 2, 3] + b = [4, 5, 6] + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = [1, 2, 3, 4, 5, 6] + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt new file mode 100644 index 000000000..faa6861c2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt @@ -0,0 +1,20 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = {"1": 2} + b = {"3": 4} + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = {"1": 2, "3": 4} + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt new file mode 100644 index 000000000..abfa197bd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt @@ -0,0 +1,22 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}) + b = pd.DataFrame.from_dict({'Price': [1, 1, 1, 1]}) + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = pd.DataFrame.from_dict({'Price': [701, 251, 801, 1201]}) + actual = hpandas.df_to_str(actual, num_rows=None) + expected = hpandas.df_to_str(expected, num_rows=None) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt new file mode 100644 index 000000000..f7fa7c8c9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt @@ -0,0 +1,23 @@ +# Test created for helpers.test.test_hplayback.get_result_assert_equal. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultAssertEqual(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = r'{"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B98BAQ=="]]}' + a = jsonpickle.decode(a) + b = r'{"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B9wBAQ=="]]}' + b = jsonpickle.decode(b) + # Call function to test. + actual = get_result_assert_equal(a=a, b=b) + # Define expected output. + expected = r'{"py/reduce": [{"py/type": "datetime.timedelta"}, {"py/tuple": [1096, 0, 0]}]}' + expected = jsonpickle.decode(expected) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt new file mode 100644 index 000000000..25588d901 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt @@ -0,0 +1,19 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = 3 + b = 2 + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt new file mode 100644 index 000000000..cd51f2ced --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt @@ -0,0 +1,18 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = "test" + b = "case" + # Call function to test. + actual = get_result_check_string(a=a, b=b) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt new file mode 100644 index 000000000..c42805818 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt @@ -0,0 +1,19 @@ +# Test created for helpers.test.test_hplayback.get_result_check_string. + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest +import jsonpickle +import pandas as pd +import config_root.config as cconfi + + +class TestGetResultCheckString(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + a = [1, 2, 3] + b = [4, 5, 6] + # Call function to test. + actual = get_result_check_string(a=a, b=b) + actual = str(actual) + # Check output. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt new file mode 100644 index 000000000..8547d2955 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt @@ -0,0 +1,30 @@ +original shape=(5, 3) +Head: +{ + "0":{ + "id":1, + "column_1":1000.0, + "column_2":"test_string_1" + }, + "1":{ + "id":2, + "column_1":1001.0, + "column_2":"test_string_2" + }, + "2":{ + "id":3, + "column_1":1002.0, + "column_2":"test_string_3" + }, + "3":{ + "id":4, + "column_1":1003.0, + "column_2":"test_string_4" + }, + "4":{ + "id":5, + "column_1":1004.0, + "column_2":"test_string_5" + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt new file mode 100644 index 000000000..4f0f96902 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt @@ -0,0 +1 @@ +INSERT INTO test_table(id,column_1,column_2) VALUES %s diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt new file mode 100644 index 000000000..c5faf0358 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt @@ -0,0 +1,20 @@ +original shape=(3, 3) +Head: +{ + "0":{ + "id":1, + "column_1":1000.0, + "column_2":"test_string_1" + }, + "1":{ + "id":4, + "column_1":1002.0, + "column_2":"test_string_3" + }, + "2":{ + "id":5, + "column_1":1001.0, + "column_2":"test_string_2" + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt new file mode 100644 index 000000000..8547d2955 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt @@ -0,0 +1,30 @@ +original shape=(5, 3) +Head: +{ + "0":{ + "id":1, + "column_1":1000.0, + "column_2":"test_string_1" + }, + "1":{ + "id":2, + "column_1":1001.0, + "column_2":"test_string_2" + }, + "2":{ + "id":3, + "column_1":1002.0, + "column_2":"test_string_3" + }, + "3":{ + "id":4, + "column_1":1003.0, + "column_2":"test_string_4" + }, + "4":{ + "id":5, + "column_1":1004.0, + "column_2":"test_string_5" + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt new file mode 100644 index 000000000..8547d2955 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt @@ -0,0 +1,30 @@ +original shape=(5, 3) +Head: +{ + "0":{ + "id":1, + "column_1":1000.0, + "column_2":"test_string_1" + }, + "1":{ + "id":2, + "column_1":1001.0, + "column_2":"test_string_2" + }, + "2":{ + "id":3, + "column_1":1002.0, + "column_2":"test_string_3" + }, + "3":{ + "id":4, + "column_1":1003.0, + "column_2":"test_string_4" + }, + "4":{ + "id":5, + "column_1":1004.0, + "column_2":"test_string_5" + } +} +Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt new file mode 100644 index 000000000..cd2308af6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt @@ -0,0 +1,65 @@ +# Dir structure +. +dummy_value_1=1 +dummy_value_1=1/dummy_value_2=A +dummy_value_1=1/dummy_value_2=A/data.parquet +dummy_value_1=2 +dummy_value_1=2/dummy_value_2=B +dummy_value_1=2/dummy_value_2=B/data.parquet +dummy_value_1=3 +dummy_value_1=3/dummy_value_2=C +dummy_value_1=3/dummy_value_2=C/data.parquet +# File signatures +len(file_names)=3 +file_names=dummy_value_1=1/dummy_value_2=A/data.parquet, dummy_value_1=2/dummy_value_2=B/data.parquet, dummy_value_1=3/dummy_value_2=C/data.parquet +# dummy_value_1=1/dummy_value_2=A/data.parquet +num_lines=13 +''' +original shape=(1, 1) +Head: +{ + "0":{ + "dummy_value_3":0 + } +} +Tail: +{ + "0":{ + "dummy_value_3":0 + } +} +''' +# dummy_value_1=2/dummy_value_2=B/data.parquet +num_lines=13 +''' +original shape=(1, 1) +Head: +{ + "0":{ + "dummy_value_3":0 + } +} +Tail: +{ + "0":{ + "dummy_value_3":0 + } +} +''' +# dummy_value_1=3/dummy_value_2=C/data.parquet +num_lines=13 +''' +original shape=(1, 1) +Head: +{ + "0":{ + "dummy_value_3":0 + } +} +Tail: +{ + "0":{ + "dummy_value_3":0 + } +} +''' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt new file mode 100644 index 000000000..ca3ab848c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt @@ -0,0 +1,4 @@ + description comment is_ok +0 hello Number of not submitted OMS child orders=0 / 7... True +1 hello2 ok True +is_ok=True \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt new file mode 100644 index 000000000..b0e7738bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt @@ -0,0 +1,4 @@ + description comment is_ok +0 hello Number of not submitted OMS child orders=0 / 7... True +1 hello2 not_ok False +is_ok=False \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt new file mode 100644 index 000000000..393449cf4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt @@ -0,0 +1,19 @@ +Some text before + + + +:::: +::::{.column width=40%} + + + +Middle text + + + +:::columns +::::{.column width=60%} + + + +Some text after \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt new file mode 100644 index 000000000..f3bdbccbf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt @@ -0,0 +1,5 @@ + + +:::: +::: + diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt new file mode 100644 index 000000000..d5e54b365 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt @@ -0,0 +1,9 @@ + +################################################################################ +* Failed assertion * +cond=False +df1.columns.difference(df2.columns)= +Index(['B'], dtype='object') +df2.columns.difference(df1.columns)= +Index(['C'], dtype='object') +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt new file mode 100644 index 000000000..464343e55 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt @@ -0,0 +1,9 @@ + +################################################################################ +* Failed assertion * +cond=False +df1.index.difference(df2.index)= +Index([1, 4], dtype='int64') +df2.index.difference(df1.index)= +Index([5, 6], dtype='int64') +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json new file mode 100644 index 000000000..1e4b47491 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json @@ -0,0 +1,10 @@ +{ + "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"10 + 15\", \"gpt-5-nano\"], \"kwargs\": {}}": [ + "25", + 3.195e-05 + ], + "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"2 + 3\", \"gpt-5-nano\"], \"kwargs\": {}}": [ + "5", + 3.195e-05 + ] +} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt new file mode 100644 index 000000000..dbd21a9a0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt @@ -0,0 +1,41 @@ + 0 +2010-01-31 NaN +2010-02-28 NaN +2010-03-31 NaN +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-06-30 NaN +2010-07-31 NaN +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 +2013-02-28 NaN +2013-03-31 NaN +2013-04-30 NaN diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt new file mode 100644 index 000000000..6e33e1427 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt @@ -0,0 +1,33 @@ + 0 +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt new file mode 100644 index 000000000..3a043159d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt @@ -0,0 +1,41 @@ + 0 +2010-01-31 NaN +2010-02-28 NaN +2010-03-31 NaN +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-06-30 0.146756 +2010-07-31 0.146756 +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 +2013-02-28 0.686501 +2013-03-31 0.686501 +2013-04-30 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt new file mode 100644 index 000000000..200d35c7a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt @@ -0,0 +1,38 @@ + 0 +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-06-30 0.146756 +2010-07-31 0.146756 +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 +2013-02-28 0.686501 +2013-03-31 0.686501 +2013-04-30 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt new file mode 100644 index 000000000..590e9e5f7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt @@ -0,0 +1,41 @@ + 0 +2010-01-31 0.000000 +2010-02-28 0.000000 +2010-03-31 0.000000 +2010-04-30 0.302333 +2010-05-31 0.146756 +2010-06-30 0.000000 +2010-07-31 0.000000 +2010-08-31 0.345561 +2010-09-30 0.396767 +2010-10-31 0.538817 +2010-11-30 0.419195 +2010-12-31 0.685220 +2011-01-31 0.204452 +2011-02-28 0.878117 +2011-03-31 0.027388 +2011-04-30 0.670468 +2011-05-31 0.417305 +2011-06-30 0.558690 +2011-07-31 0.140387 +2011-08-31 0.198101 +2011-09-30 0.800745 +2011-10-31 0.968262 +2011-11-30 0.313424 +2011-12-31 0.692323 +2012-01-31 0.876389 +2012-02-29 0.894607 +2012-03-31 0.085044 +2012-04-30 0.039055 +2012-05-31 0.169830 +2012-06-30 0.878143 +2012-07-31 0.098347 +2012-08-31 0.421108 +2012-09-30 0.957890 +2012-10-31 0.533165 +2012-11-30 0.691877 +2012-12-31 0.315516 +2013-01-31 0.686501 +2013-02-28 0.000000 +2013-03-31 0.000000 +2013-04-30 0.000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt new file mode 100644 index 000000000..9f8585df5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt @@ -0,0 +1,3 @@ +Consecutive headers increase by more than one level: + HeaderInfo(1, 'Chapter 1', 1) + HeaderInfo(3, 'Subsection 1.1.1', 6) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt new file mode 100644 index 000000000..ce0136250 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt @@ -0,0 +1 @@ +hello diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt new file mode 100644 index 000000000..0c31b6c66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt @@ -0,0 +1,3 @@ +,a,b,c +0,0,1,2 +1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv new file mode 100644 index 000000000..0ddcc75ab --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv @@ -0,0 +1,5 @@ +col1,col2,col3 +a,a,a +b,b,b +c,,c +d,, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt new file mode 100644 index 000000000..4f8eb6107 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +cond=False +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt new file mode 100644 index 000000000..b31ec5ee2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt @@ -0,0 +1,6 @@ + +################################################################################ +* Failed assertion * +cond=False +hello +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt new file mode 100644 index 000000000..134e5b23c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt @@ -0,0 +1,6 @@ + +################################################################################ +* Failed assertion * +cond=False +hello world +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt new file mode 100644 index 000000000..f99e55fe0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +cond=False +Caught assertion while formatting message: +'not all arguments converted during string formatting' +hello %s world too_many +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt new file mode 100644 index 000000000..5ebc30e5f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +cond=False +Caught assertion while formatting message: +'not enough arguments for format string' +hello %s +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt new file mode 100644 index 000000000..c941ca91b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt @@ -0,0 +1 @@ +You passed '['hello']' or type '' instead of str diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt new file mode 100644 index 000000000..41b8447e3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +'1' +== +'2' +hello world +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt new file mode 100644 index 000000000..41b8447e3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +'1' +== +'2' +hello world +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt new file mode 100644 index 000000000..3bdf77365 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt @@ -0,0 +1,10 @@ + +################################################################################ +* Failed assertion * +'1' +== +'2' +Caught assertion while formatting message: +'not enough arguments for format string' +hello %s +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt new file mode 100644 index 000000000..5e9f4aa95 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt @@ -0,0 +1 @@ +issubclass() arg 2 must be a class, a tuple of classes, or a union \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt new file mode 100644 index 000000000..3eeaf0ce1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +Instance '' of class '_Man' is not a subclass of '' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt new file mode 100644 index 000000000..e5b23c85f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +Instance '' of class '_Man' is not a subclass of '' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt new file mode 100644 index 000000000..69b3f64e9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +val1=3 +[1, 2, 3] +val2=3 +[1, 2, 4] +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt new file mode 100644 index 000000000..11a472589 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +'a' in '['xyz']' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt new file mode 100644 index 000000000..bb58d202b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +'a' is 'None' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt new file mode 100644 index 000000000..fca016604 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +Instance of 'a' is '' instead of '' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt new file mode 100644 index 000000000..b377f94fe --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt @@ -0,0 +1,5 @@ + +################################################################################ +* Failed assertion * +Instance of 'a' is '' instead of '(, )' +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt new file mode 100644 index 000000000..1c61bf06a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt @@ -0,0 +1,9 @@ + +################################################################################ +* Failed assertion * +val1= +[1, 2, 4, 3] +is not sorted +sorted(val1)= +[1, 2, 3, 4] +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt new file mode 100644 index 000000000..a13f9d582 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt @@ -0,0 +1,9 @@ + +################################################################################ +* Failed assertion * +val1= +[1, 2, 4, 3] +is not sorted +sorted(val1)= +[4, 3, 2, 1] +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt new file mode 100644 index 000000000..9fe19e631 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt @@ -0,0 +1,8 @@ + +################################################################################ +* Failed assertion * +val1= +[1, 3, 3] +has duplicates +3 +################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt new file mode 100644 index 000000000..a1f1fdce9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt @@ -0,0 +1 @@ +date='2022-11-01' doesn't have the right format: time data '2022-11-01' does not match format '%Y%m%d' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt new file mode 100644 index 000000000..48cd44539 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt @@ -0,0 +1,28 @@ +################################################################################ +data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +2 7 8 9 +3 10 11 12 +################################################################################ +filters +################################################################################ +{'col_0': (('gt', 1), ('lt', 7)), 'col_1': ('eq', 5)} +################################################################################ +filtered_data +################################################################################ + col_0 col_1 col_2 +1 4 5 6 +################################################################################ +info +################################################################################ +OrderedDict([('nrows', 4), + ('n_col_0_gt_1', np.int64(3)), + ('perc_col_0_gt_1', '3 / 4 = 75.00%'), + ('n_col_0_lt_7', np.int64(2)), + ('perc_col_0_lt_7', '2 / 4 = 50.00%'), + ('n_col_1_eq_5', np.int64(1)), + ('perc_col_1_eq_5', '1 / 4 = 25.00%'), + ('nrows_remaining', np.int64(1))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt new file mode 100644 index 000000000..c935f88e6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt @@ -0,0 +1,28 @@ +################################################################################ +data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +2 7 8 9 +3 10 11 12 +################################################################################ +filters +################################################################################ +{'col_0': ('gt', 2), 'col_1': ('eq', 5)} +################################################################################ +filtered_data +################################################################################ + col_0 col_1 col_2 +1 4 5 6 +2 7 8 9 +3 10 11 12 +################################################################################ +info +################################################################################ +OrderedDict([('nrows', 4), + ('n_col_0_gt_2', np.int64(3)), + ('perc_col_0_gt_2', '3 / 4 = 75.00%'), + ('n_col_1_eq_5', np.int64(1)), + ('perc_col_1_eq_5', '1 / 4 = 25.00%'), + ('nrows_remaining', np.int64(3))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt new file mode 100644 index 000000000..456d06923 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt @@ -0,0 +1,26 @@ +################################################################################ +data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +################################################################################ +filters +################################################################################ +{'col_0': (1, 12), 'col_1': (2, 11), 'col_2': (3, 6)} +################################################################################ +filtered_data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +################################################################################ +info +################################################################################ +OrderedDict([('nrows', 2), + ('n_col_0', np.int64(1)), + ('perc_col_0', '1 / 2 = 50.00%'), + ('n_col_1', np.int64(1)), + ('perc_col_1', '1 / 2 = 50.00%'), + ('n_col_2', np.int64(2)), + ('perc_col_2', '2 / 2 = 100.00%'), + ('nrows_remaining', np.int64(1))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt new file mode 100644 index 000000000..ae70053b9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt @@ -0,0 +1,27 @@ +################################################################################ +data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +################################################################################ +filters +################################################################################ +{'col_0': (1, 12), 'col_1': (2, 11), 'col_2': (3, 6)} +################################################################################ +filtered_data +################################################################################ + col_0 col_1 col_2 +0 1 2 3 +1 4 5 6 +################################################################################ +info +################################################################################ +OrderedDict([('nrows', 2), + ('n_col_0', np.int64(1)), + ('perc_col_0', '1 / 2 = 50.00%'), + ('n_col_1', np.int64(1)), + ('perc_col_1', '1 / 2 = 50.00%'), + ('n_col_2', np.int64(2)), + ('perc_col_2', '2 / 2 = 100.00%'), + ('nrows_remaining', np.int64(2))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt new file mode 100644 index 000000000..a947c3402 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt @@ -0,0 +1 @@ +A fake check_string output to use for test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt new file mode 100644 index 000000000..62b216ee4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt @@ -0,0 +1 @@ +A fake check_string output to use for test2 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt new file mode 100644 index 000000000..3b18e512d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt @@ -0,0 +1 @@ +hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt new file mode 100644 index 000000000..3b18e512d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt @@ -0,0 +1 @@ +hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt new file mode 100644 index 000000000..3b18e512d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt @@ -0,0 +1 @@ +hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt new file mode 100644 index 000000000..3b18e512d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt @@ -0,0 +1 @@ +hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv new file mode 100644 index 000000000..abc3dac80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv @@ -0,0 +1,2 @@ +A,B,C,D,E +1,2.3456,c,d,78 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types new file mode 100644 index 000000000..81816c1d2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types @@ -0,0 +1 @@ +{'A': 'int64', 'B': 'float64', 'C': 'object', 'D': 'object', 'E': 'int64'} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt new file mode 100644 index 000000000..4a3a582fe --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt @@ -0,0 +1,58 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt new file mode 100644 index 000000000..47371468a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt @@ -0,0 +1,58 @@ +stage='prod', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs={'/data/shared': '/shared_data'}, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=0 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + - /data/shared:/shared_data + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt new file mode 100644 index 000000000..5ebe91b26 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt @@ -0,0 +1,60 @@ +stage='prod', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=0 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} +networks: + default: + name: main_network diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt new file mode 100644 index 000000000..eb8d4824a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt @@ -0,0 +1,57 @@ +stage='dev', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=0 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + - ../../:/app + environment: + - MYPYPATH + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt new file mode 100644 index 000000000..2c9d5ecf0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt @@ -0,0 +1,56 @@ +stage='dev', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=0 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + - CSFY_CI=$CSFY_CI + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /app:/app + linter: + extends: base_app + volumes: + - /app:/src + - ../../../:/app + environment: + - MYPYPATH + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt new file mode 100644 index 000000000..9ba5c60c9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt @@ -0,0 +1,63 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_GIT_ROOT_PATH=/app + - CSFY_HELPERS_ROOT_PATH=/app/helpers_root + - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /data/dummy/src/cmamp1:/app + working_dir: /app + linter: + extends: base_app + volumes: + - /data/dummy/src/cmamp1:/src + working_dir: /src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt new file mode 100644 index 000000000..91e37ffc3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt @@ -0,0 +1,63 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_GIT_ROOT_PATH=/app + - CSFY_HELPERS_ROOT_PATH=/app + - CSFY_USE_HELPERS_AS_NESTED_MODULE=0 + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /data/dummy/src/helpers1:/app + working_dir: /app + linter: + extends: base_app + volumes: + - /data/dummy/src/helpers1:/src + working_dir: /src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt new file mode 100644 index 000000000..a16d2f133 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt @@ -0,0 +1,63 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_GIT_ROOT_PATH=/app + - CSFY_HELPERS_ROOT_PATH=/app/helpers_root + - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /data/dummy/src/cmamp1:/app + working_dir: /app/ck.infra + linter: + extends: base_app + volumes: + - /data/dummy/src/cmamp1/ck.infra:/src + working_dir: /src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt new file mode 100644 index 000000000..b4afb6c80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt @@ -0,0 +1,63 @@ +stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True +version: '3' +services: + base_app: + cap_add: + - SYS_ADMIN + environment: + - CSFY_ENABLE_DIND=1 + - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL + - CSFY_REPO_CONFIG_CHECK=True + - CSFY_REPO_CONFIG_PATH= + - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID + - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION + - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE + - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET + - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY + - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN + - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH + - CSFY_GIT_ROOT_PATH=/app + - CSFY_HELPERS_ROOT_PATH=/app/amp/helpers_root + - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 + - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN + - CSFY_CI=$CSFY_CI + - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN + - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN + image: ${IMAGE} + restart: 'no' + volumes: + - ~/.aws:/home/.aws + - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ + - ~/.config/gh:/home/.config/gh + - ~/.ssh:/home/.ssh + privileged: true + network_mode: ${NETWORK_MODE:-host} + app: + extends: base_app + volumes: + - /data/dummy/src/orange1:/app + working_dir: /app + linter: + extends: base_app + volumes: + - /data/dummy/src/orange1:/src + working_dir: /src + environment: + - MYPYPATH + - CSFY_REPO_CONFIG_PATH=/app/repo_config.py + jupyter_server: + command: devops/docker_run/run_jupyter_server.sh + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} + jupyter_server_test: + command: jupyter notebook -h 2>&1 >/dev/null + environment: + - PORT=${PORT} + extends: app + network_mode: ${NETWORK_MODE:-bridge} + ports: + - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl new file mode 100644 index 0000000000000000000000000000000000000000..25ffea79afb3dad6014da937fd8ff7c64cfbb55f GIT binary patch literal 405 zcmbu(u}TCn5C-5=POmty^Br!R#KqQ5tptU`K4g&`x>)b*xwLl zAPnC>A16QGM~haxHX(s9!SC>iPp{kDcc1UPQuAlsa1IP%@R`>S$wC5qgNF9qcEih^ zM)$?XXzv!h9D($~l)kwaw@T#&&7%2;aOs%O@yz+ry2;oJv-TCg^Z#rJt zOv+;7YpG+DdW`ZPBmWrrRNJ=rhZ*Uc+e;dwsBxOiGhf16goGn_QVDW#6MA: + a='False' + b='hello' + c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt new file mode 100644 index 000000000..7aad26473 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False, b=hello, c=3.14) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt new file mode 100644 index 000000000..d491215bd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False , b=hello , c=3.14 ) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt new file mode 100644 index 000000000..b5e297083 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt @@ -0,0 +1,12 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False, b=hello, c=3.14, hello=. at 0x>) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' + hello='. at 0x>' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt new file mode 100644 index 000000000..b69634f84 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt @@ -0,0 +1,12 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False, b=hello, c=3.14, _hello=under) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' + _hello='under' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt new file mode 100644 index 000000000..332cd0a1d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt @@ -0,0 +1,12 @@ +################################################################################ +str: +################################################################################ +_Object1 at 0x=(a=False, b=hello, c=3.14, _Object1__hello=double_dunder) +################################################################################ +repr: +################################################################################ +: + a='False' + b='hello' + c='3.14' + _Object1__hello='double_dunder' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt new file mode 100644 index 000000000..28193b95d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt new file mode 100644 index 000000000..fccd31195 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(object2=_Object2 at 0x=(x=True, y=world, z=6.28), p=p, q=q) +################################################################################ +repr: +################################################################################ +: + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' + p='p' + q='q' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt new file mode 100644 index 000000000..08aebee19 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p , q=q , object2=_Object2 at 0x=(x=True, y=world, z=6.28) ) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt new file mode 100644 index 000000000..28193b95d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt new file mode 100644 index 000000000..28193b95d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt new file mode 100644 index 000000000..28193b95d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt @@ -0,0 +1,11 @@ +################################################################################ +str: +################################################################################ +_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) +################################################################################ +repr: +################################################################################ +: + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt new file mode 100644 index 000000000..14a9380bb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt @@ -0,0 +1 @@ +xdg-open a.html diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt new file mode 100644 index 000000000..b0047fa49 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt @@ -0,0 +1 @@ +None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt new file mode 100644 index 000000000..b0047fa49 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt @@ -0,0 +1 @@ +None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt new file mode 100644 index 000000000..b0047fa49 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt @@ -0,0 +1 @@ +None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt new file mode 100644 index 000000000..34d8d7aa1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt @@ -0,0 +1,16 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + ```python + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + ``` +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt new file mode 100644 index 000000000..38f3146a7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt @@ -0,0 +1,20 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + + + ```python + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + ``` + + +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt new file mode 100644 index 000000000..34d8d7aa1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt @@ -0,0 +1,16 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + ```python + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + ``` +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt new file mode 100644 index 000000000..dacb761b7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt @@ -0,0 +1,20 @@ +0:- Functions can be declared in the body of another function +1:- E.g., to hide utility functions in the scope of the function that uses them +2: + +3: ```python +4: def print_integers(values): +5: +6: def _is_integer(value): +7: try: +8: return value == int(value) +9: except: +10: return False +11: +12: for v in values: +13: if _is_integer(v): +14: print(v) +15: ``` +16: + +17:- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt new file mode 100644 index 000000000..52f34afc3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt @@ -0,0 +1,2 @@ +$AM_AWS_S3_BUCKET = $AM_AWS_S3_BUCKET +$CSFY_AWS_S3_BUCKET = $CSFY_AWS_S3_BUCKET diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed new file mode 100644 index 000000000..0850990c3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed @@ -0,0 +1,12 @@ +{ + "dev_scripts/testing/test/test_run_tests.py": true, + "dev_scripts/testing/test/test_run_tests2.py": true, + "helpers/test/test_printing.py::Test_dedent1::test1": true, + "helpers/test/test_printing.py::Test_dedent1::test2": true, + "helpers/test/test_printing.py::Test_dedent2::test1": true, + "documentation/scripts/test/test_all.py": true, + "documentation/scripts/test/test_render_md.py": true, + "helpers/test/helpers/test/test_list.py::Test_list_1": true, + "helpers/test/helpers/test/test_list.py::Test_list_2": true, + "helpers/test/test_cache.py::TestAmpTask1407": true +} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt new file mode 100644 index 000000000..61323668a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt @@ -0,0 +1,15 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 10 failed pytest 'tests' target(s); to reproduce run: +pytest dev_scripts/testing/test/test_run_tests.py +pytest dev_scripts/testing/test/test_run_tests2.py +pytest documentation/scripts/test/test_all.py +pytest documentation/scripts/test/test_render_md.py +pytest helpers/test/helpers/test/test_list.py::Test_list_1 +pytest helpers/test/helpers/test/test_list.py::Test_list_2 +pytest helpers/test/test_cache.py::TestAmpTask1407 +pytest helpers/test/test_printing.py::Test_dedent1::test1 +pytest helpers/test/test_printing.py::Test_dedent1::test2 +pytest helpers/test/test_printing.py::Test_dedent2::test1 +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt new file mode 100644 index 000000000..9e66e81bd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt @@ -0,0 +1,325 @@ +============================= test session starts ============================== +platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 +cachedir: .pytest_cache +rootdir: /app, configfile: pytest.ini +plugins: flaky-3.7.0, timeout-2.0.2, rerunfailures-10.2, cov-3.0.0, instafail-0.4.2, xdist-2.5.0, forked-1.4.0 +collecting ... >>ENV<<: is_inside_container=True: code_version=1.0.6, container_version=1.0.6, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=False AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True +# Git + branch_name='CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests' + hash='ca2dbf510' + # Last commits: + * ca2dbf510 Sonya Nikiforova Merge branch 'master' into CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests ( 2 hours ago) Mon Feb 14 16:25:29 2022 (HEAD -> CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests, origin/CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests) + |\ + * | 63a471cca sonniki CmTask695: Update for reproducibility ( 2 hours ago) Mon Feb 14 16:15:14 2022 + | * 0d236ad57 Nikola Jašek CMTask1103: Add tests for HistoricalPqByTileClient (#1176) ( 2 hours ago) Mon Feb 14 16:01:56 2022 (origin/master, origin/HEAD) +# Machine info + system=Linux + node name=61ceebd0998a + release=5.11.0-1028-aws + version=#31~20.04.1-Ubuntu SMP Fri Jan 14 14:37:50 UTC 2022 + machine=x86_64 + processor=x86_64 + cpu count=8 + cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) + memory=svmem(total=33295769600, available=23499386880, percent=29.4, used=9048117248, free=17212899328, active=2693218304, inactive=12081451008, buffers=651313152, cached=6383439872, shared=286130176, slab=934486016) + disk usage=sdiskusage(total=104021790720, used=40223850496, free=63781163008, percent=38.7) +# Packages + python: 3.8.10 + gluonnlp: ? + gluonts: 0.6.7 + joblib: 1.1.0 + mxnet: 1.9.0 + numpy: 1.22.0 + pandas: 1.3.5 + pyarrow: 6.0.1 + scipy: 1.6.1 + seaborn: 0.11.2 + sklearn: 1.0.2 + statsmodels: 0.13.1 +INFO: > cmd='/venv/bin/pytest datapull/common/data/client/test/test_historical_pq_clients.py' +INFO: Saving log to file 'tmp.pytest.log' +collected 9 items + +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 (1.14 s) PASSED [ 11%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_start_ts_for_symbol1 (1.05 s) PASSED [ 22%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_universe1 (0.00 s) PASSED [ 33%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 (1.26 s) FAILED [ 44%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 (1.44 s) FAILED [ 55%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 (1.09 s) FAILED [ 66%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 (0.95 s) FAILED [ 77%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 (0.86 s) FAILED [ 88%] +datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data6 (1.05 s) PASSED [100%] + +=================================== FAILURES =================================== +________________ TestHistoricalPqByTileClient1.test_read_data1 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 89, in test_read_data1 + self._test_read_data1( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 44, in _test_read_data1 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data1 +-------------------------------------------------------------------------------- + +# df= ( +index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(4320, 4) | df.shape=(4320, 4) +full_symbol close year month ( +timestamp ( +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( +2021-12-30 00:02:00+00:00 1467591036 2 2021 12 ( +... ( +2022-01-01 23:57:00+00:00 1467591036 4317 2022 1 ( +2022-01-01 23:58:00+00:00 1467591036 4318 2022 1 ( +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data1/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data1/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data1 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month +shape=(4320, 4) + full_symbol close year month +timestamp +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 +2021-12-30 00:02:00+00:00 1467591036 2 2021 12 +... +2022-01-01 23:57:00+00:00 1467591036 4317 2022 1 +2022-01-01 23:58:00+00:00 1467591036 4318 2022 1 +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1""" +________________ TestHistoricalPqByTileClient1.test_read_data2 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 132, in test_read_data2 + self._test_read_data2( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 61, in _test_read_data2 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data2 +-------------------------------------------------------------------------------- + +# df= ( +index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(8640, 4) | df.shape=(8640, 4) +full_symbol close year month ( +timestamp ( +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( +2021-12-30 00:00:00+00:00 1508924190 0 2021 12 ( +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( +... ( +2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 ( +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( +2022-01-01 23:59:00+00:00 1508924190 4319 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data2/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data2/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data2 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month +shape=(8640, 4) + full_symbol close year month +timestamp +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 +2021-12-30 00:00:00+00:00 1508924190 0 2021 12 +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 +... +2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 +2022-01-01 23:59:00+00:00 1508924190 4319 2022 1""" +________________ TestHistoricalPqByTileClient1.test_read_data3 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 176, in test_read_data3 + self._test_read_data3( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 79, in _test_read_data3 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data3 +-------------------------------------------------------------------------------- + +# df= ( +index=[2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(2640, 4) | df.shape=(2640, 4) +full_symbol close year month ( +timestamp ( +2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 ( +2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 ( +2022-01-01 02:01:00+00:00 1467591036 3001 2022 1 ( +... ( +2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 ( +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( +2022-01-01 23:59:00+00:00 1508924190 4319 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data3/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data3/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data3 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] +columns=full_symbol,close,year,month +shape=(2640, 4) + full_symbol close year month +timestamp +2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 +2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 +2022-01-01 02:01:00+00:00 1467591036 3001 2022 1 +... +2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 +2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 +2022-01-01 23:59:00+00:00 1508924190 4319 2022 1""" +________________ TestHistoricalPqByTileClient1.test_read_data4 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 221, in test_read_data4 + self._test_read_data4( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 97, in _test_read_data4 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data4 +-------------------------------------------------------------------------------- + +# df= ( +index=[2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(6002, 4) | df.shape=(6002, 4) +full_symbol close year month ( +timestamp ( +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( +2021-12-30 00:00:00+00:00 1508924190 0 2021 12 ( +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( +... ( +2022-01-01 01:59:00+00:00 1508924190 2999 2022 1 ( +2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 ( +2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data4/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data4/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data4 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] +columns=full_symbol,close,year,month +shape=(6002, 4) + full_symbol close year month +timestamp +2021-12-30 00:00:00+00:00 1467591036 0 2021 12 +2021-12-30 00:00:00+00:00 1508924190 0 2021 12 +2021-12-30 00:01:00+00:00 1467591036 1 2021 12 +... +2022-01-01 01:59:00+00:00 1508924190 2999 2022 1 +2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 +2022-01-01 02:00:00+00:00 1508924190 3000 2022 1""" +________________ TestHistoricalPqByTileClient1.test_read_data5 _________________ +Traceback (most recent call last): + File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 267, in test_read_data5 + self._test_read_data5( + File "/app/datapull/common/data/client/test/im_client_test_case.py", line 114, in _test_read_data5 + self.check_df_output(actual_df, *args, **kwargs) + File "/app/helpers/hunit_test.py", line 1516, in check_df_output + self.assert_equal( + File "/app/helpers/hunit_test.py", line 1230, in assert_equal + is_equal = assert_equal( + File "/app/helpers/hunit_test.py", line 957, in assert_equal + diff_files( + File "/app/helpers/hunit_test.py", line 666, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data5 +-------------------------------------------------------------------------------- + +# df= ( +index=[2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] | df.index in [2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] +columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month +shape=(242, 4) | df.shape=(242, 4) +full_symbol close year month ( +timestamp ( +2021-12-31 23:00:00+00:00 1467591036 2820 2021 12 ( +2021-12-31 23:00:00+00:00 1508924190 2820 2021 12 ( +2021-12-31 23:01:00+00:00 1467591036 2821 2021 12 ( +... ( +2022-01-01 00:59:00+00:00 1508924190 2939 2022 1 ( +2022-01-01 01:00:00+00:00 1467591036 2940 2022 1 ( +2022-01-01 01:00:00+00:00 1508924190 2940 2022 1 ( +Diff with: +> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data5/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data5/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data5 +-------------------------------------------------------------------------------- +exp = r"""# df= +index=[2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] +columns=full_symbol,close,year,month +shape=(242, 4) + full_symbol close year month +timestamp +2021-12-31 23:00:00+00:00 1467591036 2820 2021 12 +2021-12-31 23:00:00+00:00 1508924190 2820 2021 12 +2021-12-31 23:01:00+00:00 1467591036 2821 2021 12 +... +2022-01-01 00:59:00+00:00 1508924190 2939 2022 1 +2022-01-01 01:00:00+00:00 1467591036 2940 2022 1 +2022-01-01 01:00:00+00:00 1508924190 2940 2022 1""" +============================= slowest 3 durations ============================== +1.44s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 +1.26s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 +1.14s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 +=========================== short test summary info ============================ +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 +FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 +========================= 5 failed, 4 passed in 10.94s ========================= diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt new file mode 100644 index 000000000..c297aad27 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt @@ -0,0 +1,10 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 5 failed pytest 'tests' target(s); to reproduce run: +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 +pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt new file mode 100644 index 000000000..8c9d7793d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt @@ -0,0 +1,10 @@ +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1175206Z SKIPPED [1] core/statistics/test/test_requires_statsmodels.py:315: cmamp #654. +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1175722Z SKIPPED [1] config_root/config/test/test_config.py:325: See AmpTask1573 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1176275Z XFAIL core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1176859Z XFAIL core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1177550Z FAILED dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1178650Z FAILED dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1179474Z FAILED dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 - Na... +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1180384Z ^[[31m= ^[[31m^[[1m3 failed^[[0m, ^[[32m1511 passed^[[0m, ^[[33m155 skipped^[[0m, ^[[33m60 deselected^[[0m, ^[[33m2 xfailed^[[0m, ^[[33m1 rerun^[[0m^[[31m in 211.15s (0:03:31)^[[0m^[[31m =^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1367972Z 11:53:07 @ 2022-02-19 06:51:34 - ^[[36mINFO ^[[0m hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=28.0 KB +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1381857Z 11:53:07 @ 2022-02-19 06:51:34 - ^[[33mWARN ^[[0m hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt new file mode 100644 index 000000000..e16188c74 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt @@ -0,0 +1,8 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 +pytest dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 +pytest dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt new file mode 100644 index 000000000..58f583b0e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt @@ -0,0 +1,61 @@ +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0521158Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_daily1 (0.03 s) ^[[32mPASSED^[[0m^[[31m [ 99%]^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0932903Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) ^[[32mPASSED^[[0m^[[31m [100%]^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0933619Z +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0933865Z =================================== FAILURES =================================== +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0934800Z ^[[31m^[[1m_____________________ TestRealTimeMvnReturnsWithOms1.test1 _____________________^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0935555Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0936347Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 388, in test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0937188Z market_data = self.get_market_data(event_loop) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0938027Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 325, in get_market_data +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0939155Z df = self.get_market_data_df() +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0939988Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 310, in get_market_data_df +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0940754Z df = node.fit()["df_out"] +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0941392Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0941905Z self._lazy_load(fit=True) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0942562Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0943252Z rets = self._generate_returns(fit) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0943957Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0944732Z vol = cofinanc.compute_annualized_volatility(avg_rets) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0945561Z NameError: name 'cofinanc' is not defined +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0959832Z ^[[31m^[[1m____________________ TestMultivariateNormalDataSource.test1 ____________________^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0961700Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0968475Z File "/app/dataflow/core/nodes/test/test_sources.py", line 175, in test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0970838Z df = node.fit()["df_out"] +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0972952Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0973577Z self._lazy_load(fit=True) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0974176Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0976810Z rets = self._generate_returns(fit) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0977529Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0978880Z vol = cofinanc.compute_annualized_volatility(avg_rets) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0981739Z NameError: name 'cofinanc' is not defined +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0982702Z ^[[31m^[[1m_________________________ TestMvnReturnsBuilder.test1 __________________________^[[0m +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0985191Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0985837Z File "/app/dataflow/core/test/test_builders.py", line 74, in test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0986469Z result_bundle = dag_runner.fit() +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0987113Z File "/app/dataflow/core/dag_runner.py", line 170, in fit +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0987711Z return self._run_dag(method) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0988321Z File "/app/dataflow/core/dag_runner.py", line 181, in _run_dag +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0988936Z df_out, info = self._run_dag_helper(method) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0989566Z File "/app/dataflow/core/dag_runner.py", line 110, in _run_dag_helper +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0990221Z df_out = self.dag.run_leq_node(nid, method)["df_out"] +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0991397Z File "/app/dataflow/core/dag.py", line 428, in run_leq_node +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0992521Z self._run_node(id_, pred_nid, method) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0993298Z File "/app/dataflow/core/dag.py", line 593, in _run_node +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0993800Z output = getattr(node, method)(**kwargs) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0994361Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0994834Z self._lazy_load(fit=True) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0995336Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0995859Z rets = self._generate_returns(fit) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0996779Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0997405Z vol = cofinanc.compute_annualized_volatility(avg_rets) +Run_fast_tests Run fast tests 2022-02-19T16:53:07.0998205Z NameError: name 'cofinanc' is not defined +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1048290Z ============================= slowest 3 durations ============================== +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1048893Z 26.48s setup oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1049478Z 8.44s call helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1050189Z 5.32s setup dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1116212Z =========================== short test summary info ============================ +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z SKIPPED [1] test/test_tasks.py:68: Test needs to be run outside Docker +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119804Z SKIPPED [1] test/test_tasks.py:60: Test needs to be run outside Docker +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt new file mode 100644 index 000000000..bc2ab8612 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt @@ -0,0 +1,61 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 +pytest dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 +pytest dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 + +# TestRealTimeMvnReturnsWithOms1.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 388, in test1 + market_data = self.get_market_data(event_loop) + File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 325, in get_market_data + df = self.get_market_data_df() + File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 310, in get_market_data_df + df = node.fit()["df_out"] + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit + self._lazy_load(fit=True) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load + rets = self._generate_returns(fit) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns + vol = cofinanc.compute_annualized_volatility(avg_rets) +NameError: name 'cofinanc' is not defined +^[[31m^[[1m__________________ + +# TestMultivariateNormalDataSource.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/core/nodes/test/test_sources.py", line 175, in test1 + df = node.fit()["df_out"] + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit + self._lazy_load(fit=True) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load + rets = self._generate_returns(fit) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns + vol = cofinanc.compute_annualized_volatility(avg_rets) +NameError: name 'cofinanc' is not defined +^[[31m^[[1m_______________________ + +# TestMvnReturnsBuilder.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/core/test/test_builders.py", line 74, in test1 + result_bundle = dag_runner.fit() + File "$GIT_ROOT/dataflow/core/dag_runner.py", line 170, in fit + return self._run_dag(method) + File "$GIT_ROOT/dataflow/core/dag_runner.py", line 181, in _run_dag + df_out, info = self._run_dag_helper(method) + File "$GIT_ROOT/dataflow/core/dag_runner.py", line 110, in _run_dag_helper + df_out = self.dag.run_leq_node(nid, method)["df_out"] + File "$GIT_ROOT/dataflow/core/dag.py", line 428, in run_leq_node + self._run_node(id_, pred_nid, method) + File "$GIT_ROOT/dataflow/core/dag.py", line 593, in _run_node + output = getattr(node, method)(**kwargs) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit + self._lazy_load(fit=True) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load + rets = self._generate_returns(fit) + File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns + vol = cofinanc.compute_annualized_volatility(avg_rets) +NameError: name 'cofinanc' is not defined + +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt new file mode 100644 index 000000000..b0f4950ce --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt @@ -0,0 +1,36 @@ +amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] + +=================================== FAILURES =================================== +__________________________ TestE8c_ModelBuilder.test1 __________________________ +Traceback (most recent call last): + File "/app/dataflow/pipelines/E8/test/test_E8c_pipeline.py", line 79, in test1 + self.check_string(actual) + File "/app/amp/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "/app/amp/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '/app/dataflow/pipelines/E8/test/TestE8c_ModelBuilder.test1/output/test.txt.tmp' +################################################################################ + +__________________________ TestE8a_ModelBuilder.test1 __________________________ +Traceback (most recent call last): + File "/app/dataflow/pipelines/E8/test/test_E8a_pipeline.py", line 72, in test1 + self.check_string(actual) + File "/app/amp/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "/app/amp/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '/app/dataflow/pipelines/E8/test/TestE8a_ModelBuilder.test1/output/test.txt.tmp' +################################################################################ + +============================= slowest 3 durations ============================== +10.36s call dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 +7.77s call dataflow/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit +7.31s call dataflow/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit +=========================== short test summary info ============================ +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_ModelBuilder::test1 +Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt new file mode 100644 index 000000000..063e0af62 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt @@ -0,0 +1,36 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 2 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_ModelBuilder::test1 +pytest dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 + +# TestE8a_ModelBuilder.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/pipelines/E8/test/test_E8a_pipeline.py", line 72, in test1 + self.check_string(actual) + File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow/pipelines/E8/test/TestE8a_ModelBuilder.test1/output/test.txt.tmp' +################################################################################ + +# TestE8c_ModelBuilder.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/pipelines/E8/test/test_E8c_pipeline.py", line 79, in test1 + self.check_string(actual) + File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow/pipelines/E8/test/TestE8c_ModelBuilder.test1/output/test.txt.tmp' +################################################################################ + +________________________ + +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt new file mode 100644 index 000000000..a2ee5ad54 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt @@ -0,0 +1,2533 @@ +INFO: > cmd='/local/home/gsaggese/src/venv/amp.client_venv/bin/invoke run_fast_slow_superslow_tests' +>>ENV<<: is_inside_container=False: code_version=1.0.3, container_version=None, is_inside_docker=False, is_inside_ci=False, CI_defined=False, CSFY_CI='nan' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=False +## run_fast_slow_superslow_tests:  +## run_fast_tests:  +15:12:49 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"' +IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"'  +WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. +WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. +WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. +WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. +Creating compose_app_run ... + + +Creating compose_app_run ... done +##> devops/docker_run/entrypoint.sh +UID=0 +GID=0 +# Activate environment +##> devops/docker_build/entrypoint/patch_environment_variables.sh +# Set PATH +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +# Set PYTHONPATH +PYTHONPATH=/app/amp:/app: +# Configure env +Testing sudo +/app +Setting up Docker +{ "storage-driver": "vfs" } + * Starting Docker: docker  +[ OK ] + * Docker is running +# Check AWS authentication setup +AWS_DEFAULT_REGION='us-east-1' + Name Value Type Location + ---- ----- ---- -------- + profile am manual --profile +access_key ****************3J32 shared-credentials-file +secret_key ****************QpHW shared-credentials-file + region us-east-1 env AWS_DEFAULT_REGION +CONTAINER_VERSION='' +BUILD_TAG='' +which python: /venv/bin/python +python -V: Python 3.8.10 +docker -v: Docker version 20.10.12, build e91ed57 +docker-compose -v: docker-compose version 1.25.0, build unknown +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +PYTHONPATH=/app/amp:/app: +entrypoint.sh: 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"' +============================= test session starts ============================== +platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 +cachedir: .pytest_cache +rootdir: /app, configfile: pytest.ini +plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 +timeout: 5.0s +timeout method: signal +timeout func_only: True +collecting ...  +collecting 0 items  +collecting 0 items  +collecting 67 items  +collecting 70 items  +collecting 230 items  +collecting 548 items  +collecting 622 items  +collecting 801 items  +collecting 1084 items  +collecting 1419 items  +collecting 1775 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True +----------------------------------------------------------------------------- +This code is not in sync with the container: +code_version='1.0.3' != container_version='amp-1.0.3' +----------------------------------------------------------------------------- +You need to: +- merge origin/master into your branch with `invoke git_merge_master` +- pull the latest container with `invoke docker_pull` +# Git + branch_name='AmpTask2163_Implement_tiled_backtesting_5' + hash='29bdaf1' + # Last commits: + * 29bdaf1 saggese Lint ( 3 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) + * c26c937 saggese Checkpoint ( 3 minutes ago) Mon Mar 7 20:09:34 2022 + * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) +# Machine info + system=Linux + node name=5f6da4732626 + release=3.10.0-1160.36.2.el7.x86_64 + version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 + machine=x86_64 + processor=x86_64 + cpu count=8 + cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) + memory=svmem(total=66548252672, available=51710918656, percent=22.3, used=11804581888, free=14433091584, active=30353010688, inactive=18354896896, buffers=0, cached=40310579200, shared=2491396096, slab=2053443584) + disk usage=sdiskusage(total=107362627584, used=32545419264, free=74817208320, percent=30.3) +# Packages + python: 3.8.10 + gluonnlp: ? + gluonts: 0.6.7 + joblib: 1.1.0 + mxnet: 1.8.0 + numpy: 1.21.1 + pandas: 1.3.4 + pyarrow: 6.0.1 + scipy: 1.6.1 + seaborn: 0.11.2 + sklearn: 1.0.1 + statsmodels: 0.13.1 +INFO: > cmd='/venv/bin/pytest -m not slow and not superslow . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun Failed: Timeout' +INFO: Saving log to file 'tmp.pytest.log' + +collected 1874 items / 81 deselected / 1793 selected  + +amp/dataflow/model/test/test_experiment_utils.py::Test_get_configs_from_command_line1::test1 (0.02 s) PASSED [ 0%] +amp/core/finance/test/test_prediction_processing.py::TestStackPredictionDf::test1 (0.03 s) PASSED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call1 SKIPPED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call2 SKIPPED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call3 SKIPPED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call4 SKIPPED [ 0%] +amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_parser SKIPPED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_get_df1 (0.01 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_read_with_filter1 (0.03 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_everything1 (0.02 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_one_column1 (0.02 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_two_columns1 (0.02 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_merge1 (0.08 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read1 (0.05 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read2 (0.06 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read3 (0.03 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read4 (0.02 s) PASSED [ 0%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_full1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_half1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_half2 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_invalid1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_invalid2 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_one_year1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_one_year2 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_over_two_years1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_two_years1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns2 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns3 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns4 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_get_test_data1 (0.00 s) PASSED [ 1%] +amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_to_partitioned_dataset SKIPPED [ 1%] +amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_to_partitioned_dataset_wrong_column (0.00 s) PASSED [ 1%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test1 (0.03 s) PASSED [ 1%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test2 (0.02 s) PASSED [ 1%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test3 (0.02 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test4 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test5 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test6 (0.02 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test7 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test8 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexSkLearnModel::test1 (0.07 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexSkLearnModel::test2 (0.10 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexPooledSkLearnModel::test1 (0.04 s) PASSED [ 2%] +amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexPooledSkLearnModel::test2 (0.08 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_end_ts_for_symbol1 (0.21 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_universe1 (0.00 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data1 (0.05 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data2 (0.07 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data3 (0.08 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data4 (0.08 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data5 (0.08 s) PASSED [ 2%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_unadjusted_data5 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_parquet_data2 (0.06 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_parquet_data5 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_metadata1 (0.11 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_universe1 (0.00 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data1 (0.05 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data2 (0.07 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data3 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data4 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data5 (0.08 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_expiry_data5 (0.09 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_data2 (0.06 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_data5 (0.07 s) PASSED [ 3%] +amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_expiry_data5 (0.07 s) PASSED [ 3%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_mixed_constraints SKIPPED [ 3%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_only_gmv_constraint SKIPPED [ 3%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_restrictions SKIPPED [ 4%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_short_ban SKIPPED [ 4%] +amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer2::test1 SKIPPED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse1 (0.00 s) PASSED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse2 (0.38 s) PASSED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse3 (0.00 s) PASSED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse4 (0.00 s) PASSED [ 4%] +amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse_empty_traceback1 (0.00 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestGhLogin1::test_gh_login (0.23 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_images_ls_repo (0.56 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_kill_all SKIPPED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_kill_last SKIPPED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_ps (0.21 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_stats SKIPPED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean (0.22 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_fetch_master (0.22 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_pull (0.21 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_images_ls_repo (0.36 s) PASSED [ 4%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_kill_all SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_kill_last SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_login (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_ps (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_pull (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_stats SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_find_test_class1 (0.14 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr1 SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr2 SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr3 SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_issue_title (0.42 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_workflow_list SKIPPED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_files (0.15 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_clean (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_clean2 (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create3 (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_fetch_master (0.00 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_merge_master (0.08 s) PASSED [ 5%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_pull (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint1 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint2 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint3 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_print_setup (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title1 (0.47 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title3 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title4 (0.44 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRemoveSpaces1::test1 (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash1 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash2 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash3 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash4 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash5 (0.02 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_jupyter1 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests1 (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests2 (0.00 s) PASSED [ 6%] +amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests4 SKIPPED [ 6%] +amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests5 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class1 (0.14 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class2 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class3 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_decorator1 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_decorator2 SKIPPED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_files1 (0.09 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_files2 (0.09 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_diff_files_abort1 (0.16 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_branch1 (0.33 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_files1 (0.23 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_last_commit1 (0.37 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_modified1 (0.44 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_parse_linter_output1::test1 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_parse_linter_output1::test2 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test1 (0.15 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2 (0.14 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert1 (0.00 s) PASSED [ 7%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert3 (0.07 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_branch1 SKIPPED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files1 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files3 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_last_commit1 (0.03 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_modified1 (0.07 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_classes1 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_classes2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_files1 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_files2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_tests1 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_tests2 (0.00 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test1 (0.22 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test2 (0.22 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test3 (0.22 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test4 (0.22 s) PASSED [ 8%] +amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test5 (0.22 s) PASSED [ 9%] +amp/helpers/test/test_lib_tasks.py::TestFailing::test_failing (0.00 s) PASSED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data2 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input1 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input2 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input3 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input4 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_order_book SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_order_book_invalid_input1 SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_get_exchange_currency_pairs SKIPPED [ 9%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_initialize_class SKIPPED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_1 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_2 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_3 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_2tiles_1 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_2tiles_2 (0.00 s) PASSED [ 9%] +amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_3tiles_1 (0.00 s) PASSED [ 9%] +amp/helpers/test/test_hpandas.py::Test_dassert_is_unique1::test_dassert_is_unique1 (0.00 s) PASSED [ 9%] +amp/helpers/test/test_hpandas.py::Test_dassert_is_unique1::test_dassert_is_unique2 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_to_series1::test1 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_to_series1::test2 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_to_series1::test3 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df1 (0.02 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df2 (0.02 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df3 (0.02 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df4 (0.01 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types1 (0.01 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types2 (0.01 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types3 (0.01 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str1 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str2 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str3 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str4 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str5 (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_datetime (0.00 s) PASSED [ 10%] +amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_timestamp (0.00 s) PASSED [ 11%] +amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_uuid (0.00 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_universe1 (0.00 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data2 (0.06 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data3 (0.07 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data4 (0.06 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data5 (0.07 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data6 (0.00 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_universe1 (0.00 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data1 (0.04 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data2 (0.06 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data3 (0.07 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data4 (0.06 s) PASSED [ 11%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data5 (0.07 s) PASSED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data6 (0.00 s) PASSED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_end_ts_for_symbol1 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_start_ts_for_symbol1 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_universe1 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data1 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data2 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data3 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data4 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data5 SKIPPED [ 12%] +amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data6 SKIPPED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_infs (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_zero_in_bin_interior_false (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_zero_in_bin_interior_true (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_digitize1::test1 (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_digitize1::test_heaviside1 (0.00 s) PASSED [ 12%] +amp/core/signal_processing/test/test_misc_transformations.py::Test_compute_weighted_sum1::test1 (0.00 s) PASSED [ 12%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal1 (0.00 s) PASSED [ 12%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal5 (0.03 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal_fuzzy_match1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_not_equal1 (0.03 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_not_equal2 (0.04 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir2 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir3 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir4 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_output_dir1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_s3_scratch_dir1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_s3_scratch_dir2 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space2 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space3 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_equal1 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_equal2 (0.00 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_not_equal1 (0.03 s) PASSED [ 13%] +amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_not_equal_debug SKIPPED [ 13%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string1 (0.01 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing1 (0.01 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing2 (0.00 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing3 (0.15 s) (WARNING: Test was updated) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal1 (0.04 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal2 (0.03 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal3 (0.04 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal1 (0.02 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal2 (0.02 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal3 (0.02 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing1 +WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_missing1/output/test_df.txt'(0.02 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing2 (0.01 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing3 +WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_missing3/output/test_df.txt'(0.15 s) (WARNING: Test was updated) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal1 (0.06 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal2 (0.05 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal3 +WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt'(0.03 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal4 (0.05 s) PASSED [ 14%] +amp/helpers/test/test_unit_test.py::Test_check_string_debug1::test1 (0.16 s) (WARNING: Test was updated) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_check_string_debug1::test2 +WARNING: Update golden outcome file '/app/amp/helpers/test/Test_check_string_debug1.test2/output/test_df.txt'(0.15 s) (WARNING: Test was updated) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_unit_test1::test_purify_txt_from_client1 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_unit_test1::test_purify_txt_from_client2 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::TestSubsetDf1::test1 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_get_dir_signature1::test1 (0.02 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_get_dir_signature1::test2 (0.02 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test1 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test2 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test3 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test1 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test2 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test3 (0.00 s) PASSED [ 15%] +amp/helpers/test/test_unit_test.py::Test_purify_amp_reference1::test1 (0.00 s) PASSED [ 15%] +amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeReturnPipeline1::test1 (0.47 s) PASSED [ 15%] +amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimePipelineWithOms1::test1 (0.98 s) PASSED [ 15%] +amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 SKIPPED [ 15%] +amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms2::test1 SKIPPED [ 15%] +amp/datapull/common/data/transform/test/test_convert_csv_to_pq.py::TestCsvToPq::test_csv_to_pq_script SKIPPED [ 16%] +dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 (0.00 s) FAILED [ 16%] +research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit (6.31 s) RERUN [ 16%] +research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit (1.61 s) PASSED [ 16%] +research/RH2E/test/test_RH2E_pipeline.py::TestRH2E_DagBuilder::test1 (4.68 s) PASSED [ 16%] +research/RH2E/test/test_RH2Ec_pipeline.py::TestRH2Ec_DagBuilder::test1 (0.18 s) PASSED [ 16%] +research/RH2E/test/test_RH2Ed_pipeline.py::TestRH2Ed_DagBuilder::test1 (0.19 s) PASSED [ 16%] +research/RH2E/test/test_RH2Ee_pipeline.py::TestRH2Ee_DagBuilder::test1 (0.39 s) PASSED [ 16%] +research/RH2E/test/test_RH2Ef_pipeline.py::TestRH2Ef_DagBuilder::test1 (4.61 s) PASSED [ 16%] +research/RH2E/test/test_RH2Eg_pipeline.py::TestRH2Eg_DagBuilder::test1 (3.68 s) PASSED [ 16%] +research/RH1E/test/test_RH1E_pipeline.py::TestRH1E_DagBuilder::test1 (2.02 s) PASSED [ 16%] +research/RH1E/test/test_RH1Eb_pipeline.py::TestRH1Eb_DagBuilder::test1 (0.17 s) PASSED [ 16%] +research/RH1E/test/test_RH1Eb_pipeline.py::TestRH1Eb_DagBuilder::test2 (1.97 s) PASSED [ 16%] +oms_lime/test/test_eg_broker.py::TestEgBroker1::test_place_order1 (0.94 s) PASSED [ 16%] +oms_lime/test/test_eg_portfolio_example.py::TestEgPortfolioExample1::test_get_eg_portfolio_example1 (1.74 s) PASSED [ 16%] +oms_lime/test/test_eg_portfolio_example.py::TestEgPortfolioExample1::test_get_eg_portfolio_example2 (0.06 s) PASSED [ 16%] +oms_lime/test/test_eg_restrictions.py::TestEgRestrictions1::test_get_trading_restrictions (0.02 s) PASSED [ 16%] +dataflow_lime/system/test/test_E8d_replayed_system_runner.py::TestReplayedE8dWithMockedOms1::test_save_data SKIPPED [ 16%] +dataflow_lime/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_DagBuilder::test1 (0.47 s) PASSED [ 16%] +dataflow_lime/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_DagBuilder::test1 (4.82 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (5.13 s) RERUN [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (6.41 s) RERUN [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (2.90 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_predict (0.79 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution (6.48 s) RERUN [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution (4.11 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder3::test_get_dag1 (0.01 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder3::test_get_dag2 (0.01 s) PASSED [ 17%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder4::test_fit (2.90 s) PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove1 PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove2 PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove3 PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove4 PASSED [ 17%] +amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove5 PASSED [ 17%] +amp/oms/test/test_oms_db.py::TestOmsDbRemoveAllTables1::test1 SKIPPED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio1::test_state (0.02 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics1 (0.02 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics2 (0.09 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics3 (0.01 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_initialization_with_cash1 (0.02 s) PASSED [ 17%] +amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_initialization_with_holdings1 (0.08 s) PASSED [ 18%] +amp/oms/test/test_portfolio.py::TestMockedPortfolio1::test1 SKIPPED [ 18%] +amp/oms/test/test_portfolio.py::TestMockedPortfolio1::test2 SKIPPED [ 18%] +amp/oms/test/test_portfolio.py::TestMockedPortfolio2::test1 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestSimulatedProcessForecasts1::test_initialization1 (0.63 s) PASSED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts1::test_mocked_system1 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system1 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system2 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system3 SKIPPED [ 18%] +amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system4 SKIPPED [ 18%] +amp/oms/test/test_restrictions.py::TestRestrictions1::test1 SKIPPED [ 18%] +amp/oms/test/test_restrictions.py::TestRestrictions1::test2 SKIPPED [ 18%] +amp/dataflow/system/test/test_real_time_dag_adapter.py::TestRealtimeDagAdapter1::testMvnReturnsBuilder1 (0.05 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes1 (0.01 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes2 (0.01 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes3 (0.01 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes4 (0.02 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes5 (0.13 s) PASSED [ 18%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes1 (0.02 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes10 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes2 (0.02 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes3 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes4 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes5 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes6 (0.02 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes7 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes8 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes9 (0.01 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks1 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks2 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks3 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag_adapter.py::TestDagAdapter1::test1 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_dag_adapter.py::TestDagAdapter1::test2 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_from_config1 (0.01 s) PASSED [ 19%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_get_columns_for_tag1 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_get_tags_for_column1 (0.00 s) PASSED [ 19%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_pickle1 (0.05 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_to_config1 (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_to_dict_and_back (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_feature_col_names1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags2 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags3 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_targets_and_predictions_for_tags1 (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_prediction_col_names1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_target_col_names1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_to_config1 (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_runners.py::TestRollingFitPredictDagRunner1::test1 (0.43 s) PASSED [ 20%] +amp/dataflow/core/test/test_runners.py::TestIncrementalDagRunner1::test1 (0.47 s) PASSED [ 20%] +amp/dataflow/core/test/test_utils.py::Test_get_df_info_as_string::test1 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_utils.py::Test_get_df_info_as_string::test2 (0.00 s) PASSED [ 20%] +amp/dataflow/core/test/test_visualization.py::Test_dataflow_core_visualization1::test_draw1 (0.01 s) PASSED [ 20%] +amp/dataflow/core/test/test_visualization.py::Test_dataflow_core_visualization1::test_draw_to_file1 (0.01 s) PASSED [ 20%] +amp/core/plotting/test/test_portfolio_stats.py::Test_plot_portfolio_stats1::test1 PASSED [ 20%] +amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test1 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test2 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test3 (0.00 s) PASSED [ 21%] +amp/config_root/config/test/test_config_builders.py::TestGetConfigsFromBuilder1::test1 (0.00 s) PASSED [ 21%] +amp/config_root/config/test/test_config_builders.py::TestGetConfigFromEnv::test_no_env_variables (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test1 (0.01 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test2 (0.01 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test3 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test4 (0.01 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test1 (0.02 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test2 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test3 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test1 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test2 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test3 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test4 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::TestComputeTurn1::test1 (0.00 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::TestMaximizeWeightEntropy1::test1 (0.13 s) PASSED [ 21%] +amp/core/statistics/test/test_turnover.py::TestFindNearestAffinePoint1::test1 (0.01 s) PASSED [ 22%] +research/returns/test/test_dataflow_lime_returns_pipeline.py::TestReturnsPipeline::test1 (0.11 s) PASSED [ 22%] +im_lime/eg/test/test_eg_transform_pq_by_date_to_by_asset.py::TestEgTransformByDateToByTile1::test_transform1 (4.30 s) PASSED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test1 (3.12 s) PASSED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache1 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache2 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache3 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache4 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache5 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache6 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_historical1 (0.70 s) PASSED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_real_time1 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_replayed_time1 SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_save_data SKIPPED [ 22%] +core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgMultipleInstrumentDataReader1::test_historical1 (0.75 s) PASSED [ 22%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_incorrect_datetime (0.00 s) PASSED [ 22%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_integer_datetime (0.00 s) PASSED [ 22%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_string_datetime (0.00 s) PASSED [ 22%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_index_already_present (0.00 s) PASSED [ 23%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_milliseconds (0.00 s) PASSED [ 23%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_seconds (0.00 s) PASSED [ 23%] +amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_wrong_column (0.00 s) PASSED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_start_ts_for_symbol1 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_universe1 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 SKIPPED [ 23%] +amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data6 SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_command_line SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_function_call1 SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_function_call2 SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_process_chunk SKIPPED [ 23%] +amp/helpers/test/test_lib_tasks_find.py::Test_find_short_import1::test1 (0.00 s) PASSED [ 23%] +amp/helpers/test/test_lib_tasks_find.py::Test_find_func_class_uses1::test1 (0.00 s) PASSED [ 24%] +amp/dataflow/system/test/test_real_time_runner.py::TestRealTimeDagRunner1::test_replayed_time1 SKIPPED [ 24%] +amp/dataflow/system/test/test_real_time_runner.py::TestRealTimeDagRunner1::test_simulated_replayed_time1 (0.46 s) PASSED [ 24%] +amp/dataflow/core/test/test_builders.py::TestArmaReturnsBuilder::test1 (0.16 s) PASSED [ 24%] +amp/dataflow/core/test/test_builders.py::TestArmaReturnsBuilder::test_str1 (0.00 s) PASSED [ 24%] +amp/dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 (0.15 s) PASSED [ 24%] +amp/dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test_str1 (0.00 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_gluonts_models.py::TestDeepARGlobalModel::test_fit1 (1.13 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_gluonts_models.py::TestDeepARGlobalModel::test_fit_dag1 (1.07 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_col_csv1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_col_parquet1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_index_csv1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_index_parquet1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_filter_dates1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_filter_dates_open_boundary1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestArmaDataSource::test1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 (0.01 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test1 (0.07 s) PASSED [ 24%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test2 (0.02 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test3 (0.07 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test4 (0.08 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test5 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2 (0.13 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3 (0.16 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test01 (0.10 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test02 (0.10 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test03 (0.13 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test04 (0.13 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test05 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test06 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test07 (0.19 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test08 SKIPPED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test09 (0.37 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test10 (0.10 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test11 (0.09 s) PASSED [ 25%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test12 (0.13 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test13 (0.16 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1 (0.20 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2 (0.25 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3 (0.34 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1 (0.01 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2 (0.01 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1 (0.01 s) PASSED [ 26%] +amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test1 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test2 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test3 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test4 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test5 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test1 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test2 (0.01 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test3 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test4 (0.00 s) PASSED [ 26%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test5 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test1 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test10 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test11 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test12 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test2 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test3 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test4 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test5 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test6 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test7 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test8 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test9 (0.01 s) PASSED [ 27%] +amp/core/statistics/test/test_t_test.py::TestTTest1samp::test1 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_t_test.py::TestTTest1samp::test2 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_t_test.py::TestTTest1samp::test3 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_t_test.py::TestTTest1samp::test4 (0.00 s) PASSED [ 27%] +amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator1::test_high_sample_count (0.02 s) PASSED [ 27%] +amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator1::test_moderate_sample_count (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator2::test_high_sample_count (0.41 s) PASSED [ 28%] +amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator2::test_moderate_sample_count (0.20 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test1 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test2 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test3 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test4 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test5 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test6 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test7 (0.01 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test1 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test2 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test3 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test4 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test5 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test6 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test1 (0.00 s) PASSED [ 28%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test2 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test3 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test4 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test5 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test6 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeNumFiniteSamples::test1 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeNumUniqueValues::test1 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeDenominatorAndPackage::test1 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test1 (0.01 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test2 (0.01 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test3 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test1 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test2 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test3 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test4 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test5 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test6 (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_2dof (0.00 s) PASSED [ 29%] +amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_4dof (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_almost_normal (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test1 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test10 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test11 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test12 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test2 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test3 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test5 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test6 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test7 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test8 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test9 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test1 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test2 (0.01 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test3 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test4 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test5 (0.00 s) PASSED [ 30%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test6 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test7 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test3 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeNormalizedDrawdownCdf::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeNormalizedDrawdownCdf::test2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test3 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test3 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::Test_compute_drawdown::test1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::Test_compute_time_under_water::test1 (0.01 s) PASSED [ 31%] +amp/core/statistics/test/test_drawdown.py::Test_compute_time_under_water::test2 (0.01 s) PASSED [ 31%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed1 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed2 (0.00 s) PASSED [ 31%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_scale_invariance1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test4 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test5 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test3 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test1 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test2 (0.00 s) PASSED [ 32%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test3 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test4 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test5 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test6 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_small_df (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_small_series (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_user_supplied_pi0 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test0 SKIPPED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test1 (0.38 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test_generate_input_data SKIPPED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test0 SKIPPED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test1 (0.02 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test2 (0.05 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test3 (0.04 s) PASSED [ 33%] +amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test_generate_input_data SKIPPED [ 33%] +amp/core/statistics/test/test_requires_statsmodels.py::TestComputeKratio::test1 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_requires_statsmodels.py::TestComputeKratio::test2 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test1 (0.00 s) PASSED [ 33%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 (0.00 s) XFAIL [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test3 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test1 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test2 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test3 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test4 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test5 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test6 (0.02 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 (0.01 s) XFAIL [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test1 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test2 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test3 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test4 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test5 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test6 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test7 (0.01 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test8 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test1 (0.00 s) PASSED [ 34%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test2 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test3 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test4 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test5 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test6 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test7 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test8 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test1 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test2 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test3 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test4 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test5 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test6 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test7 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test8 SKIPPED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test9 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test1 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test2 (0.00 s) PASSED [ 35%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test3 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test4 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test5 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test6 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test7 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test8 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test_nan (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test_smoke (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test1 (0.03 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test2 (0.03 s) PASSED [ 36%] +amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test3 (0.03 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test1 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test2 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test3 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test1 (0.01 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test2 (0.01 s) PASSED [ 36%] +amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test3 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeSharpeRatio::test1 (0.00 s) PASSED [ 36%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeSharpeRatioStandardError::test1 (0.00 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test1 (0.02 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test2 (0.09 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test3 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatioStandardError::test1 (0.02 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatioStandardError::test2 (0.09 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_summarize_sharpe_ratio::test1 (0.00 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test1 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test2 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test3 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test4 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_nans1 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_oos_not_from_interval1 (0.00 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_zeros1 (0.01 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestSharpeRatioCorrelationConversion::test1 (0.00 s) PASSED [ 37%] +amp/core/statistics/test/test_sharpe_ratio.py::TestSharpeRatioCorrelationConversion::test2 (0.00 s) PASSED [ 37%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_smooth_derivative1::test1 (0.03 s) PASSED [ 37%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_smooth_moving_average1::test1 (0.01 s) PASSED [ 37%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test1 (0.00 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test2 (0.00 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test3 (0.00 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test4 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test5 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test6 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test7 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_moment1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_norm1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_var1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_std1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_demean1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_skew1::test1 (0.02 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_kurtosis1::test1 (0.02 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_sharpe_ratio1::test1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_corr1::test1 (0.02 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zcorr1::test1 (0.03 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_atol1 (0.01 s) PASSED [ 38%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_clean1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_inf1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_inf2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_nan1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_nan2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_zero1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_zero2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_default_values1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_default_values2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_atol1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_clean1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_inf1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_inf2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_nan1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_nan2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_zero1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_zero2 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_atol1 (0.01 s) PASSED [ 39%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_clean1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_inf1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_inf2 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_nan1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_nan2 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_zero1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_zero2 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_annualized_sharpe_ratio::test1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test1 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test2 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test3 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test4 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test5 (0.16 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test6 (0.17 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test1 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test2 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test3 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test4 (0.01 s) PASSED [ 40%] +amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test5 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_nan1 (0.14 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_nan2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_zero1 (0.10 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_zero2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_winsorize1 (0.11 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_winsorize2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test1 (0.00 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test2 (0.00 s) PASSED [ 41%] +amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test3 (0.00 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_clean1 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_depth (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode1 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode3 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode1 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode2 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode3 (0.01 s) PASSED [ 41%] +amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test1 (0.03 s) PASSED [ 42%] +amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test2 (0.03 s) PASSED [ 42%] +amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test3 (0.03 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test1 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test2 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test_lag_1 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test_correlate_with_lagged_cumsum::test1 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test_correlate_with_lagged_cumsum::test2 (0.01 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test_calculate_inverse::test1 (0.00 s) PASSED [ 42%] +amp/core/signal_processing/test/test_cross_correlation.py::Test_calculate_presudoinverse::test1 (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_set_non_ath_to_nan1::test1 (0.01 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_remove_times_outside_window::test_bypass (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_remove_times_outside_window::test_remove (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_set_weekends_to_nan::test1 (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_set_weekends_to_nan::test2 (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_remove_weekends::test_bypass (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_ablation.py::Test_remove_weekends::test_remove (0.00 s) PASSED [ 42%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_ask_value (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_bid_value (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_centered_order_book_imbalance (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_geometric_mid (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_log_relative_spread (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_mid (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_mid_value (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_order_book_imbalance (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_quoted_spread (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_relative_spread (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_weighted_mid (0.00 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_time_bars1::test1 (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_time_bars1::test2 (0.02 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test1 (0.02 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test2 (0.02 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test3 (0.02 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_nans1 (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_nans2 (0.01 s) PASSED [ 43%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_no_nans1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_no_nans2 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_offset (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_endpoints_daily (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_endpoints_intraday (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_invariance (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_business_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_month1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_week1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_year1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_no_freq_day_to_business_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_business_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_minute1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_upsample_business_day_to_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_srs::test_upsample_month_to_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_business_day1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_month1 (0.01 s) PASSED [ 44%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_week1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_year1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_no_freq_day_to_business_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_business_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_minute1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_upsample_business_day_to_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_resampling.py::Test_resample_df::test_upsample_month_to_day1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::TestComputeOvernightReturns::test1 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test1 (0.00 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test2 (0.00 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test3 (0.00 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test4 (0.00 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test5 (0.01 s) PASSED [ 45%] +amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test6 (0.00 s) PASSED [ 45%] +amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_config_with_function (0.00 s) PASSED [ 45%] +amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_config_with_object (0.00 s) PASSED [ 45%] +amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_roundtrip_transform1 (0.00 s) PASSED [ 45%] +amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_set1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_existing_key2 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key2 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key3 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key4 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigIn1::test_in1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestFlatConfigIn1::test_not_in1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key2 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key3 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key4 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key2 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key3 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key1 (0.00 s) PASSED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key2 SKIPPED [ 46%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key3 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key4 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_not_existing_key1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config_print1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config_to_python1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_roundtrip_transform1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_in1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_in2 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in2 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in3 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in4 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update2 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update3 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigFlatten1::test_flatten1 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestNestedConfigFlatten1::test_flatten2 (0.00 s) PASSED [ 47%] +amp/config_root/config/test/test_config.py::TestSubtractConfig1::test_test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config.py::TestSubtractConfig1::test_test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config.py::TestDassertIsSerializable1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config.py::TestDassertIsSerializable1::test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config.py::TestFromEnvVar1::test1 (0.44 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_validate_configs1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_validate_configs1::test_check_same_configs_error (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_get_config_from_flattened_dict1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_get_config_from_flattened_dict1::test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_get_config_from_nested_dict1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_get_config_from_nested_dict1::test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_intersect_configs1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_intersect_configs1::test_same_config (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_subtract_configs1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_subtract_configs1::test_same_config (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test1 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test2 (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test_same_config (0.00 s) PASSED [ 48%] +amp/config_root/config/test/test_config_utils.py::Test_convert_to_dataframe1::test1 (0.01 s) PASSED [ 49%] +amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test1 (0.00 s) PASSED [ 49%] +amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test2 (0.00 s) PASSED [ 49%] +amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test3 (0.00 s) PASSED [ 49%] +dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test1 (0.47 s) PASSED [ 49%] +dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test2 (0.26 s) PASSED [ 49%] +dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test3 (0.27 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_log_portfolio_read_portfolio (0.10 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_multiday_overnight_returns_injected (0.02 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_1_asset_floating_gmv (0.02 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_1_asset_targeted_gmv (0.02 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_3_assets_floating_gmv (0.03 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_3_assets_targeted_gmv (0.03 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_4_assets_dollar_neutrality_demean (0.04 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_4_assets_dollar_neutrality_side_preserving (0.04 s) PASSED [ 49%] +amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_multiday_1_asset_targeted_gmv (0.02 s) PASSED [ 49%] +im_lime/eg/test/test_eg_historical_pq_by_date_taq_bar_client.py::TestEgHistoricalPqByDateTaqBarClient1::test_read_data1 (1.75 s) PASSED [ 49%] +im_lime/eg/test/test_eg_historical_pq_by_date_taq_bar_client.py::TestEgHistoricalPqByDateTaqBarClient1::test_read_data2 (1.82 s) PASSED [ 49%] +market_data_lime/test/test_eg_historical_market_data.py::TestEgHistoricalMarketData1::test_get_data_at_timestamp1 (2.07 s) PASSED [ 50%] +market_data_lime/test/test_eg_historical_market_data.py::TestEgHistoricalMarketData1::test_should_be_online1 (0.00 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_at_timestamp1 (0.15 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval1 SKIPPED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval2 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval3 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval4 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval5 (0.15 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period1 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period2 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period3 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period4 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period5 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period6 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period7 (0.16 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_last_end_time1 (0.06 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_last_price1 (0.23 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_twap_price1 (0.15 s) PASSED [ 50%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_is_online1 (0.06 s) PASSED [ 51%] +amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_should_be_online1 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test1 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test2 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test3 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test4 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test5 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test6 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test7 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestParseFullSymbol::test1 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestParseFullSymbol::test2 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestConstructFullSymbol::test1 (0.00 s) PASSED [ 51%] +amp/datapull/common/data/client/test/test_full_symbol.py::TestConstructFullSymbol::test2 (0.00 s) PASSED [ 51%] +amp/datapull/common/universe/test/test_universe_utils.py::TestStringToNumericalId::test1 (0.00 s) PASSED [ 51%] +amp/datapull/common/universe/test/test_universe_utils.py::TestBuildNumericalToStringIdMapping::test1 (0.00 s) PASSED [ 51%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates1 (0.00 s) PASSED [ 51%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates2 (0.00 s) PASSED [ 51%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates3 (0.00 s) PASSED [ 51%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates4 (0.00 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_get_available_dates1 (0.00 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test1 (1.39 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test2 (2.44 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test3 (2.81 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval1::test_tsla1 (1.61 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval1::test_tsla2 (1.05 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_convert_string_to_timestamp1 (0.02 s) PASSED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_generate_raw_eg_data SKIPPED [ 52%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_process_bar_data1 (0.02 s) PASSED [ 52%] +vendors_lime/datastream_liquidity/test/test_datastream_liquidity_utils.py::TestDatastreamLiquidityUtils1::test_get_liquidity_data1 (0.82 s) PASSED [ 52%] +vendors_lime/datastream_liquidity/test/test_datastream_liquidity_utils.py::TestDatastreamLiquidityUtils1::test_get_liquidity_data2 (0.74 s) PASSED [ 52%] +research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline1::test_replayed_time1 SKIPPED [ 52%] +research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline1::test_save_data SKIPPED [ 52%] +research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_tiny1 (0.00 s) PASSED [ 52%] +research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v1 (0.00 s) PASSED [ 52%] +research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v2_all (0.20 s) PASSED [ 52%] +research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v2_top100 (0.08 s) PASSED [ 52%] +oms_lime/test/test_eg_portfolio.py::TestEgPortfolio1::test_send_orders1 SKIPPED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData1::test_should_be_online1 (0.02 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_data1 (0.04 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_data3 (0.05 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_last_end_time1 (0.01 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_is_online1 (0.02 s) PASSED [ 53%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_sql_get_query1 (0.01 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData1::test_save_market_data1 SKIPPED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data1 (0.19 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_at_timestamp1 (0.19 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_at_timestamp2 (0.17 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_for_interval1 (0.18 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_print_info_for_serialized_data1 SKIPPED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_round_trip1 (0.16 s) PASSED [ 53%] +market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData3::test_get_data1 (0.50 s) PASSED [ 53%] +market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period1 (0.00 s) SKIPPED [ 53%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data1 (0.08 s) PASSED [ 53%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data2 (0.08 s) PASSED [ 53%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data3 (0.12 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data4 (0.12 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data5 (0.12 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_0 (0.02 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_1 (0.07 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_3 (0.08 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_6 (0.08 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_63 (0.11 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_at_timestamp1 (0.04 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_at_timestamp2 (0.02 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_for_interval1 (0.03 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_for_interval2 (0.04 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_get_last_end_time1 (0.03 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available1 (0.07 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available2 (0.05 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available3 (0.84 s) PASSED [ 54%] +amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData4::test_is_last_bar_available1 (0.08 s) PASSED [ 54%] +amp/dataflow/model/test/test_stats_computer.py::TestStatsComputer1::test_compute_portfolio_stats1 (0.04 s) PASSED [ 54%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_bash SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_cmd1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_images_ls_repo1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_jupyter1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_login1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_ps SKIPPED (T...) [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_docker_stats SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_list SKIPPED (Test n...) [ 55%] +amp/test/test_tasks.py::TestExecuteTasks1::test_print_setup1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_collect_only2 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_docker_build_local_image SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_docker_build_prod_image SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_docker_jupyter1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_docker_pull1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_lint1 SKIPPED (Test ...) [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_run_blank_tests1 SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_run_fast_tests SKIPPED [ 55%] +amp/test/test_tasks.py::TestExecuteTasks2::test_run_fast_tests_failed SKIPPED [ 55%] +amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order1 SKIPPED [ 56%] +amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order2 SKIPPED [ 56%] +amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order_and_timeout1 SKIPPED [ 56%] +amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order_and_timeout2 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerCmd::test1 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerCmd::test2 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerDown::test1 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerDown::test2 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerUp::test1 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerUp::test2 (0.00 s) PASSED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test1 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test2 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test3 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test4 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test1 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test2 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test3 SKIPPED [ 56%] +amp/datapull/test/test_im_lib_tasks.py::TestImDockerCmd::test1 SKIPPED [ 56%] +amp/datapull/ccxt/universe/test/test_universe.py::TestGetUniverse::test_get_universe1 (0.00 s) PASSED [ 57%] +amp/datapull/ccxt/universe/test/test_universe.py::TestGetUniverse::test_get_universe2 (0.00 s) PASSED [ 57%] +amp/datapull/ccxt/universe/test/test_universe.py::TestGetVendorUniverse::test1 (0.00 s) PASSED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_exchange_id1 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_exchange_id2 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_symbol_id1 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_symbol_id2 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_trade_symbol_id1 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_trade_symbol_id2 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data1 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data2 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data3 SKIPPED [ 57%] +amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data4 SKIPPED [ 57%] +amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test1 (0.00 s) PASSED [ 57%] +amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test2 (0.00 s) PASSED [ 57%] +amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test3 (0.00 s) PASSED [ 57%] +amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test4 (0.00 s) PASSED [ 57%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test1 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test10 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test11 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test12 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test13 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test14 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test15 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test16 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test17 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test18 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test2 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test3 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test4 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test5 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test6 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test7 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test8 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test9 (0.00 s) PASSED [ 58%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_config1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_dataseries1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_df1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_dict1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_float1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_float2 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_float3 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_int1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_int2 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_int3 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_list1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_str1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_str2 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestToPythonCode1::test_str3 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestPlaybackFilePath1::test1 (0.00 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test1 (0.50 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test2 (0.50 s) PASSED [ 59%] +amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test3 (0.49 s) PASSED [ 59%] +amp/helpers/test/test_printing.py::Test_printing1::test_color_highlight1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test2 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test3 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test4 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test5 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_to_str1::test6 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_log::test2 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_log::test3 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_log::test4 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_sort_dictionary::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_indent1::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_dedent1::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_dedent1::test2 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_dedent1::test_roundtrip1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_align_on_left1::test1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_logging1::test_log_frame1 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_logging1::test_log_frame2 (0.00 s) PASSED [ 60%] +amp/helpers/test/test_printing.py::Test_logging1::test_log_frame3 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test1 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test2 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test3 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test4 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test5 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test6 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system1::test7 (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system2::test_get_os_name (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system2::test_get_server_name (0.05 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_system2::test_get_user_name (0.10 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test1 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test2 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test3 (0.00 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test1 (0.17 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test2 (0.18 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test3 (0.18 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test4 (0.18 s) PASSED [ 61%] +amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test5 (0.18 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_Linux_commands1::test_du1 (0.19 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_not_timestamp1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp2 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp3 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp4 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp5 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test2 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test_no_timestamp1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test_no_timestamp2 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestCacheFunctions::test_get_cache_name1 (0.00 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_changed_function (0.12 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_redefined_function (0.12 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching1 (0.30 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching2 (0.30 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching3 (0.30 s) PASSED [ 62%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching4 (0.32 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching5 (0.29 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_disk_reset (0.39 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_mem_reset (0.40 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_mem_reset2 (0.43 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestGlobalCache1::test_without_caching1 (0.00 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestFunctionSpecificCache1::test_with_caching1 (0.70 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestFunctionSpecificCache1::test_with_caching2 (0.64 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCachePerformance::test_performance_dataframe (0.17 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCachePerformance::test_performance_series (0.16 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheDecorator::test_decorated_function (0.11 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheDecorator::test_decorated_function_no_mem (0.11 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestAmpTask1407::test1 (0.10 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestAmpTask1407::test2 (0.11 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCachingOnS3::test_with_caching1 SKIPPED [ 63%] +amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_disk_cache1 (0.33 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_mem_cache1 (0.32 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_mem_disk_cache1 (0.32 s) PASSED [ 63%] +amp/helpers/test/test_cache.py::TestCacheUpdateFunction1::test1 (0.01 s) PASSED [ 64%] +amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_disk_cache1 (0.32 s) PASSED [ 64%] +amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_mem_cache1 (0.32 s) PASSED [ 64%] +amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_mem_disk_cache1 (0.33 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test1 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test2 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test3 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test4 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test5 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test6 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert1::test7 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test1 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test2 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test3 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test4 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_eq1::test5 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_eq_all1 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_eq_all2 (0.00 s) PASSED [ 64%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_in1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_in2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance3 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance4 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance5 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted3 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted4 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_subset1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_subset2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_no_duplicates1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_no_duplicates2 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_not_intersection1 (0.00 s) PASSED [ 65%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_not_intersection2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_set_eq1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_set_eq2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test3 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test3 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert3 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert4 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert2 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert3 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test1 (0.00 s) PASSED [ 66%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_fail1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man2 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man_fail1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man_fail2 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_callable1::test1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_dbg.py::Test_dassert_callable1::test_fail1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_branch_name1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_client_root1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_client_root2 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_path_from_supermodule1 (0.11 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_project_dirname1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_get_submodule_paths1 (0.05 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_is_amp (0.11 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule1::test_is_inside_submodule1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_get_head_hash1 (0.05 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_get_remote_head_hash1 (0.05 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes1 (0.00 s) PASSED [ 67%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes2 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes3 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_all_repo_names1 (0.10 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_all_repo_names2 (0.11 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_client1 (0.05 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_client2 (0.05 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_dirname1 (0.05 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_dirname2 (0.05 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name1 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name2 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name4 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name_rountrip1 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_task_prefix_from_repo_short_name1 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name1 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name2 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name3 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name4 (0.00 s) PASSED [ 68%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root1 SKIPPED [ 68%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root2 SKIPPED [ 69%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root3 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root4 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root5 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files1 (0.11 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files_in_branch1 (0.05 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_previous_committed_files1 (0.07 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_summary_files_in_branch1 (0.47 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_git_modified_files1::test_git_log1 (0.07 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test1 (0.13 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test2 (0.13 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test3 (0.17 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test4 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_find_docker_file1::test5 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_docker_base_image_name1 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_host_name1 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_repo_map1 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_has_didn_support1 (0.00 s) PASSED [ 69%] +amp/helpers/test/test_hasyncio.py::Test_hasyncio1::test_real_time1 (1.00 s) PASSED [ 70%] +amp/helpers/test/test_hasyncio.py::Test_hasyncio1::test_simulated_time1 (0.00 s) PASSED [ 70%] +amp/helpers/test/test_hlogging.py::Test_logging1::test_logging_levels1 (0.00 s) PASSED [ 70%] +amp/helpers/test/test_hlogging.py::Test_hlogging_asyncio1::test_real_time1 (1.00 s) PASSED [ 70%] +amp/helpers/test/test_hlogging.py::Test_hlogging_asyncio1::test_simulated_time1 (0.00 s) PASSED [ 70%] +amp/helpers/test/test_io_.py::Test_find_all_files1::test1 (0.20 s) PASSED [ 70%] +amp/helpers/test/test_io_.py::Test_change_filename_extension1::test1 (0.00 s) PASSED [ 70%] +amp/helpers/test/test_io_.py::Test_load_df_from_json::test1 (0.01 s) PASSED [ 70%] +amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_get_file_name1 (0.00 s) PASSED [ 70%] +amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_get_file_name2 (0.00 s) PASSED [ 70%] +amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_run1 (0.36 s) PASSED [ 70%] +amp/dev_scripts/test/test_amp_dev_scripts.py::Test_env1::test_get_system_signature1 (0.16 s) PASSED [ 70%] +amp/dev_scripts/infra/test/test_all.py::Test_ssh_tunnel::test1 SKIPPED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_caesar1 (0.00 s) PASSED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_author1 SKIPPED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_file_size1 (0.28 s) PASSED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_master1 (0.05 s) PASSED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_words_in_text1 (0.00 s) PASSED [ 70%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex1 (0.00 s) PASSED [ 71%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex2 (0.00 s) PASSED [ 71%] +amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex3 (0.00 s) PASSED [ 71%] +amp/dataflow/model/test/test_forecast_mixer.py::TestForecastMixer1::test_generate_portfolio_bar_metrics_df (0.05 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_model_selection1 (1.77 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_model_return_correlation1 (0.28 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_multiple_tests_adjustment1 (0.18 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_positions1 (0.43 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_rets_and_vol1 (0.71 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_rets_signal_analysis1 (0.58 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_return_correlation1 (0.32 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_returns_and_predictions1 (1.13 s) PASSED [ 71%] +amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_sharpe_ratio_panel1 (0.40 s) PASSED [ 71%] +amp/dataflow/model/test/test_regression_analyzer.py::TestRegressionAnalyzer1::test_compute_moments (0.06 s) PASSED [ 71%] +amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_replayed_time1 (0.00 s) PASSED [ 71%] +amp/core/test/test_real_time.py::TestReplayedTime1::test1 (0.00 s) PASSED [ 71%] +amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_real_time1 (3.03 s) PASSED [ 71%] +amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_replayed_time1 (4.01 s) PASSED [ 72%] +amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_simulated_replayed_time1 (0.00 s) PASSED [ 72%] +amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_simulated_time1 (0.00 s) PASSED [ 72%] +amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test1 (0.40 s) PASSED [ 72%] +amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test2 (0.39 s) PASSED [ 72%] +amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test3 (0.39 s) PASSED [ 72%] +amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test4 (0.39 s) PASSED [ 72%] +amp/optimizer/test/test_utils.py::Test_compute_tangency_portfolio::test_precision_equivalency (0.00 s) PASSED [ 72%] +amp/optimizer/test/test_utils.py::Test_compute_tangency_portfolio::test_toy_case (0.00 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_get_data1 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_get_twap_price1 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread1 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread2 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread3 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread4 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint1 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint2 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint3 (0.01 s) PASSED [ 72%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread1 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread2 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread3 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread4 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread5 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread6 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price1 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price2 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price3 (0.01 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test1 (0.06 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random1 (0.06 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random2 (0.08 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random3 (0.10 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test1 (0.03 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test2 (0.03 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test3 (0.07 s) PASSED [ 73%] +amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test_perf1 SKIPPED [ 73%] +amp/oms/test/test_api.py::Test_Contract1::test1 (0.00 s) PASSED [ 73%] +amp/oms/test/test_api.py::Test_Contract1::test_cmp1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Contract1::test_cmp2 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Order1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_OrderStatus1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Trade1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_cmp1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_cmp2 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_diff1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_diff2 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_Position1::test_diff3 (0.00 s) PASSED [ 74%] +amp/oms/test/test_api.py::Test_OMS1::test1 SKIPPED (unconditional skip) [ 74%] +amp/oms/test/test_api.py::Test_OMS1::test2 SKIPPED (unconditional skip) [ 74%] +amp/oms/test/test_broker.py::TestSimulatedBroker1::test_submit_and_fill1 (0.05 s) PASSED [ 74%] +amp/oms/test/test_broker.py::TestMockedBroker1::test1 SKIPPED (Need ...) [ 74%] +amp/oms/test/test_order.py::TestOrder1::test1 (0.00 s) PASSED [ 74%] +amp/oms/test/test_order.py::TestOrders1::test1 (0.00 s) PASSED [ 74%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_exchange_exist1 SKIPPED [ 74%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_symbol_exist1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_trade_symbol_exist1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_get_remaining_data_to_load SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_daily_data1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_daily_data_with_holes SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_minute_data1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_minute_data_with_holes SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_daily_data1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_minute_data1 SKIPPED [ 75%] +amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_tick_data1 SKIPPED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract1 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract2 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract3 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol1 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol2 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol3 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol4 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract1 (0.00 s) PASSED [ 75%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract2 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract3 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract4 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract5 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract6 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract7 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract1 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract2 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract3 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract1 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract2 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract3 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract4 (0.00 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contract_slow1 (0.66 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts1 (0.06 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts2 (0.06 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts3 (0.06 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures1 (0.05 s) PASSED [ 76%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures3 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures4 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures5 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures6 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures_slow1 (0.39 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures_slow2 (0.39 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata1 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata2 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata3 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata4 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata5 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow1 (0.38 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow2 (0.40 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow3 (0.39 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_zero_element1 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_zero_element2 (0.05 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_kibot_hardcoded_contract_lifetime_computer1 (0.00 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_kibot_hardcoded_contract_lifetime_computer2 (0.00 s) PASSED [ 77%] +amp/im/kibot/metadata/test/test_load.py::TestTickerListLoader::test_parsing_logic (0.00 s) PASSED [ 78%] +amp/im/kibot/metadata/test/test_load.py::TestTickerListLoader::test_real_call SKIPPED [ 78%] +amp/im/kibot/metadata/test/test_load.py::TestAdjustmentsLoader::test_real_call SKIPPED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_etfs (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_forex (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_futures (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_stocks (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_sp500 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_daily (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_minutely (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_tick (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractContractType::test_continuous (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractContractType::test_expiry (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test1 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test10 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test11 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test12 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test13 (0.00 s) PASSED [ 78%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test14 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test2 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test3 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test4 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test5 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test6 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test7 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test8 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test9 (0.00 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_s3_data_loader.py::TestKibotS3DataLoader::test1 (0.17 s) PASSED [ 79%] +amp/im/kibot/data/load/test/test_s3_data_loader.py::TestKibotS3DataLoader::test_read_data_with_start_end_ts SKIPPED [ 79%] +amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_daily_data_from_s3_1 SKIPPED [ 79%] +amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_daily_data_from_s3_2 SKIPPED [ 79%] +amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_minutely_data_from_s3_1 SKIPPED [ 79%] +amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_minutely_data_from_s3_2 SKIPPED [ 79%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol1 (0.00 s) PASSED [ 79%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol2 (0.00 s) PASSED [ 79%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol3 (0.00 s) PASSED [ 79%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol4 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name1 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name2 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name3 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name4 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name5 (0.00 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_parse_symbols_file1 (0.00 s) PASSED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_exchange_exist1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_symbol_exist1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_trade_symbol_exist1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_get_remaining_data_to_load SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_daily_data1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_daily_data_with_holes SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_minute_data1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_minute_data_with_holes SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_daily_data1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_minute_data1 SKIPPED [ 80%] +amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_tick_data1 SKIPPED [ 80%] +amp/im/kibot/data/extract/test/test_kibot_data_download.py::TestKibotDownload::test_extract_dataset_links (0.03 s) PASSED [ 81%] +amp/im/kibot/data/extract/test/test_kibot_data_download.py::TestKibotDownload::test_extract_payload_links (1.53 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path1 (0.00 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path2 (0.00 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path3 (0.00 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_get_latest_symbols_file1 (0.03 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_dtypes1 (0.04 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data1 (0.10 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data2 (0.04 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data3 (0.10 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data_check_date_type (0.05 s) PASSED [ 81%] +amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data_with_start_end_ts (1.51 s) PASSED [ 81%] +amp/im/eoddata/test/test_read_symbol_list.py::Test_read_symbols_from_file::test1 (0.00 s) PASSED [ 81%] +amp/im/ib/connect/test/test_im_tasks.py::TestImTwsStartIbInterface::test1 SKIPPED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_filter_table1 (0.00 s) PASSED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_filter_table2 (0.00 s) PASSED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_filter_table3 (0.00 s) PASSED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_from_text1 (0.00 s) PASSED [ 81%] +amp/helpers/test/test_table.py::TestTable1::test_from_text_invalid1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_from_text_invalid2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_repr1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_str1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_unique1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_table.py::TestTable1::test_unique2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_timer.py::TestTimedScope::test_1 (1.00 s) PASSED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test__check_version1 SKIPPED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test__check_version2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test_check_version1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test_get_changelog_version1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_versioning.py::TestVersioning1::test_get_container_version1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_find_duplicates1::test1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_find_duplicates1::test2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test2 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test3 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_extract1::test1 (0.00 s) PASSED [ 82%] +amp/helpers/test/test_list.py::Test_list_extract1::test2 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test3 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test4 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test5 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test6 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_extract1::test7 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test2 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test3 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test4 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list_chunk1::test5 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_find_duplicates1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_find_duplicates2 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates2 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates3 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_open.py::Test_open_unknown::test_unknown_extension1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_open.py::Test_open_unknown::test_unknown_os1 (0.00 s) PASSED [ 83%] +amp/helpers/test/test_open.py::Test_open_html::test_linux1 SKIPPED (...) [ 84%] +amp/helpers/test/test_open.py::Test_open_html::test_mac1 SKIPPED (Se...) [ 84%] +amp/helpers/test/test_open.py::Test_open_html::test_windows1 SKIPPED [ 84%] +amp/helpers/test/test_open.py::Test_open_pdf::test_mac1 (0.06 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_get_credentials1::test1 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_functions1::test_extract_bucket_from_path1 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_exists1 (0.01 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_exists2 (0.05 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_exists3 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_glob1 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_s3.py::Test_s3_1::test_ls1 (0.01 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_dry_run1 (0.00 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_asyncio_threading1 (0.06 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_asyncio_threading2 (0.03 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_loky1 (0.06 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_loky2 (1.97 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_serial1 (0.06 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_asyncio_threading1 (0.02 s) PASSED [ 84%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_asyncio_threading2 (0.02 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_loky1 (1.34 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_loky2 (1.19 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_serial1 (0.02 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_serial2 (0.02 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading1 (0.08 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading2 (0.04 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading3 (0.08 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading4 (0.04 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky1 (0.08 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky2 (1.38 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky3 PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_serial1 (0.07 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_serial2 (0.08 s) PASSED [ 85%] +amp/helpers/test/test_joblib_helpers.py::Test_joblib_example1::test1 SKIPPED [ 85%] +amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_datetime1 (0.00 s) PASSED [ 85%] +amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_datetime_fail1 (0.00 s) PASSED [ 85%] +amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_strict_datetime1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_strict_datetime_fail1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_dassert_is_datetime1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_dassert_is_datetime_assert1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_datetime_conversions (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime2 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime3 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp2 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp_assert1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp_assert2 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_ET (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_UTC (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_naive_ET (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_naive_UTC (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_annual1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_bimonthly1 (0.00 s) PASSED [ 86%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_daily1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_index1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly2 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly3 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly4 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly5 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly2 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly3 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_semiannual1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_semiannual2 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_srs1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_weekly1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test2 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test3 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test1 (0.00 s) PASSED [ 87%] +amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test2 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test3 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test2 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test3 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test4 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test5 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test6 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test7 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test8 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test9 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_env.py::Test_env1::test_get_system_signature1 (0.17 s) PASSED [ 88%] +amp/helpers/test/test_hnumpy.py::TestRandomSeedContext::test_example1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_hnumpy.py::TestRandomSeedContext::test_example2 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_csv_helpers.py::Test_convert_csv_to_dict::test1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_csv_helpers.py::Test_from_typed_csv::test1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_csv_helpers.py::Test_to_typed_csv::test1 (0.00 s) PASSED [ 88%] +amp/helpers/test/test_dataframe.py::Test_filter_data_by_values1::test_conjunction1 (0.01 s) PASSED [ 88%] +amp/helpers/test/test_dataframe.py::Test_filter_data_by_values1::test_disjunction1 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_filter_data_by_comparison::test_conjunction1 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_filter_data_by_comparison::test_disjunction1 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::TestFilterDataByMethod::test1 (0.02 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test1 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test2 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test3 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test4 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test5 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test6 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test1 (0.01 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test2 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test3 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test4 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test5 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test6 (0.00 s) PASSED [ 89%] +amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test7 (0.00 s) PASSED [ 89%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md1::test_uml_file_names1 (0.00 s) PASSED [ 89%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md2::test_render_command1 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md2::test_render_command2 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml1 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml2 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml3 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml4 (0.00 s) PASSED [ 90%] +amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml_playback1 (0.01 s) PASSED [ 90%] +amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test1 (0.00 s) PASSED [ 90%] +amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test2 (0.00 s) PASSED [ 90%] +amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test3 (0.00 s) PASSED [ 90%] +amp/dataflow/pipelines/features/test/test_feature_pipeline.py::TestFeaturePipeline::test1 (0.23 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_fit_with_oos (0.02 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_fit_without_oos (0.03 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_predict_with_oos (0.04 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_predict_without_oos (0.00 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1 (0.01 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1 (0.01 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2 (0.01 s) PASSED [ 90%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json3 (0.00 s) PASSED [ 91%] +amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_merge (0.01 s) PASSED [ 91%] +amp/dataflow/model/test/test_model_evaluator.py::TestModelEvaluator1::test_aggregate_models1 (0.26 s) PASSED [ 91%] +amp/dataflow/model/test/test_model_evaluator.py::TestModelEvaluator1::test_calculate_stats1 (1.41 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer1::test_column_arithmetic (0.03 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer2::test_resampling (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer3::test_multicolumn_processing1 (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer3::test_multicolumn_processing2 (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer4::test_drop_nans (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer4::test_drop_nans_without_reindexing (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer1::test1 (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans (0.01 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_then_join (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_without_reindexing (0.01 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_without_reindexing_then_attempt_join (0.01 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer1::test1 (0.02 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer2::test1 (0.04 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer2::test2 (0.51 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans (0.01 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_then_join (0.02 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_without_reindexing (0.01 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_without_reindexing_then_attempt_join (0.01 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestFunctionWrapper::test1 (0.01 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestTwapVwapComputer::test1 (0.02 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestTwapVwapComputer::test2 (0.03 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestMultiindexTwapVwapComputer::test1 (0.08 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_transformers.py::TestMultiindexTwapVwapComputer::test2 (0.13 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test1 (0.02 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test2 (0.03 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test3 (0.05 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test1 (0.04 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test2 (0.06 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test3 (0.08 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test1 (0.04 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test2 (0.04 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test3 (0.06 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_local_level_model.py::TestLocalLevelModel::test1 (0.01 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test0 SKIPPED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test1 (0.06 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test2 (0.05 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test3 (0.09 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test4 (0.05 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test5 (0.09 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test6 (0.05 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test7 (0.09 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test_generate_input_data SKIPPED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1 (1.04 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2 (1.07 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_step_one1 SKIPPED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_with_constant1 (1.25 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict2 (1.07 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_different_intervals1 (1.46 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_different_intervals_no_x1 (1.28 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_with_nan (1.08 s) PASSED [ 93%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test1 (0.02 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test_invert_zret_0_zscoring1 (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test_invert_zret_3_zscoring1 (0.02 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_pass_through (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_pass_through_no_writing (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_write (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_pass_through (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_pass_through_no_writing (0.01 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_write (0.01 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_linearize_eigval_eigvec (0.01 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_sort_eigval1 (0.00 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_sort_eigval2 (0.00 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_stabilize_eigenvec1 (0.01 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_stabilize_eigenvec2 (0.02 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer2::test1 (0.23 s) PASSED [ 94%] +amp/core/test/test_residualizer.py::TestPcaFactorComputer2::test2 (0.35 s) PASSED [ 94%] +amp/core/test/test_timeseries_study.py::TestTimeSeriesDailyStudy::test_usual_case (0.29 s) PASSED [ 94%] +amp/core/test/test_timeseries_study.py::TestTimeSeriesMinutelyStudy::test_usual_case (0.58 s) PASSED [ 94%] +amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test1 (0.13 s) PASSED [ 95%] +amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test2 (0.13 s) PASSED [ 95%] +amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test3 (0.13 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test_shape1 (0.00 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test_truncate1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_local_ts (0.28 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_none_x_vars (0.00 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_series_target (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_correctness SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_correctness_local_ts SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform_artificial_ts SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform_none_x_vars SKIPPED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromGluonForecasts::test_transform1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToSklean::test_transform1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformToSklean::test_transform_none_x_vars1 (0.01 s) PASSED [ 95%] +amp/core/test/test_data_adapters.py::TestTransformFromSklean::test_transform1 (0.01 s) PASSED [ 96%] +amp/core/test/test_explore.py::Test_explore1::test_ols_regress_series (0.20 s) PASSED [ 96%] +amp/core/test/test_explore.py::Test_explore1::test_rolling_pca_over_time1 SKIPPED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column1 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column2 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column3 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column4 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index1 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index2 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index3 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index4 (0.00 s) PASSED [ 96%] +amp/core/test/test_explore.py::TestFilterByTime::test_no_intersection (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pairs::test1 (0.01 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_difference1 (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_difference2 (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_mean (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_difference1 (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_difference2 (0.00 s) PASSED [ 96%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_difference_of_logs (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_mean (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_mean_of_logs (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_normalized_difference1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_cross_feature_pair::test_normalized_difference2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_identity_1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_identity_2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_3 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_3 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_2 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_3 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compare_subspaces::test1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_effective_rank::test1 (0.00 s) PASSED [ 97%] +amp/core/test/test_features.py::Test_compute_effective_rank::test2 (0.00 s) PASSED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test1 (0.02 s) PASSED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test2 SKIPPED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test3 SKIPPED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test4 SKIPPED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test1 (0.01 s) PASSED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test2 SKIPPED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test3 (0.01 s) PASSED [ 98%] +amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test4 (0.01 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestResampleIndex1::test1 (0.01 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test1 (0.01 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test2 (0.09 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test3 (0.08 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test4 (0.13 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test5 (0.02 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestReadDataFromS3::test_read_csv1 (0.10 s) PASSED [ 98%] +amp/core/test/test_pandas_helpers.py::TestReadDataFromS3::test_read_parquet1 (1.08 s) PASSED [ 98%] +amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test1 (0.00 s) PASSED [ 98%] +amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test2 (0.00 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test3 (0.00 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::TestMultivariateNormalProcess::test1 (0.01 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::TestMultivariateNormalProcess::test2 (0.00 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::Test_generate_arima_signal_and_response::test1 (0.00 s) PASSED [ 99%] +amp/core/test/test_artificial_signal_generators.py::TestGenerateRecipeDataset::test1 (0.01 s) PASSED [ 99%] +amp/core/test/test_backtest.py::TestGeneratePredictions::test1 SKIPPED [ 99%] +amp/core/test/test_backtest.py::TestGeneratePredictions::test2 SKIPPED [ 99%] +amp/core/test/test_backtest.py::TestGeneratePredictions::test3 SKIPPED [ 99%] +amp/core/information_bars/test/test_bars.py::TestBars::test_get_dollar_bars (0.07 s) PASSED [ 99%] +amp/core/information_bars/test/test_bars.py::TestBars::test_get_tick_bars (0.02 s) PASSED [ 99%] +amp/core/information_bars/test/test_bars.py::TestBars::test_get_volume_bars (0.07 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_daily1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_daily_shift_freq1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_minutely1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_multiple_responses_daily1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_daily1 (0.03 s) PASSED [ 99%] +amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] + +=================================== FAILURES =================================== +__________________ Test_get_configs_from_command_line1.test1 ___________________ +Traceback (most recent call last): + File "/app/dataflow_lime/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 + configs = dtfmoexuti.get_configs_from_command_line(args) + File "/app/amp/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + configs = cconfig.get_configs_from_builder(config_builder) + File "/app/amp/config_root/config/builder.py", line 46, in get_configs_from_builder + imp = importlib.import_module(import_) + File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1014, in _gcd_import + File "", line 991, in _find_and_load + File "", line 973, in _find_and_load_unlocked +ModuleNotFoundError: No module named 'dataflow_lime.pipelines.E8.8Ed_configs' +============================= slowest 3 durations ============================== +6.49s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution +6.41s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit +6.31s call research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit +=========================== short test summary info ============================ +SKIPPED [5] amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py: Need dind support +SKIPPED [1] amp/helpers/test/test_hparquet.py:741: CmTask1305: after removing circular dependencies in `hio.from_file`, this test fails reading a parquet file +SKIPPED [5] amp/optimizer/test/test_single_period_optimization.py: Requires special docker container. +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:200: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:192: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:184: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:263: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:271: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:287: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:298: Only run in amp as supermodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:307: Only run in amp as supermodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:316: Only run in amp as supermodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:332: Only run in amp +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:390: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:399: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:408: AmpTask1347: Add support for mocking `system*()` functions to unit test +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:481: CmampTask #683. +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:536: Only run in amp as submodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:571: Only run in amp as submodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:600: Only run in amp as submodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:635: Only run in amp as supermodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:698: Only run in amp as submodule +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:792: Only run in amp +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:1003: Only run in amp +SKIPPED [1] amp/helpers/test/test_lib_tasks.py:1343: This test makes sense for a branch +SKIPPED [9] amp/datapull/ccxt/data/extract/test/test_exchange_class.py: Enable after CMTask1292 is resolved. +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:789: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:769: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:809: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:530: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:573: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:620: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:666: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:711: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:757: Need dind support +SKIPPED [1] amp/helpers/test/test_unit_test.py:335: This is only used to debug the debugging the infrastructure +SKIPPED [1] amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py:380: Need dind support +SKIPPED [1] amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py:534: Need dind support +SKIPPED [1] amp/datapull/common/data/transform/test/test_convert_csv_to_pq.py:60: CmTask1305: after removing circular dependencies in `hio.from_file`, this test fails reading a parquet file +SKIPPED [1] dataflow_lime/system/test/test_E8d_replayed_system_runner.py:130: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py: Need dind support +SKIPPED [1] amp/oms/test/test_portfolio.py:291: Need dind support +SKIPPED [1] amp/oms/test/test_portfolio.py:320: Need dind support +SKIPPED [1] amp/oms/test/test_portfolio.py:412: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:119: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:238: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:243: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:248: Need dind support +SKIPPED [1] amp/oms/test/test_process_forecasts.py:253: Need dind support +SKIPPED [1] amp/oms/test/test_restrictions.py:18: Need dind support +SKIPPED [1] amp/oms/test/test_restrictions.py:45: Need dind support +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:57: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:75: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:93: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:124: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:150: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:198: This is for manual testing +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:274: Next PR will rewrite this +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:325: LimeTask296: Break 2022-01-06 +SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:311: Run manually +SKIPPED [9] amp/datapull/common/data/client/test/test_historical_pq_clients.py: Some tests are returning an empty df +SKIPPED [4] amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py: TODO(gp): Need to update this tests after transform v1.3 +SKIPPED [1] amp/dataflow/system/test/test_real_time_runner.py:39: Too slow for real time +SKIPPED [1] amp/dataflow/core/nodes/test/test_volatility_models.py:423: unconditional skip +SKIPPED [1] amp/core/statistics/test/test_regression.py:46: This test fails on some computers due to AmpTask1649 +SKIPPED [1] amp/core/statistics/test/test_regression.py:17: This test generates the input data +SKIPPED [1] amp/core/statistics/test/test_regression.py:137: This test fails on some computers due to AmpTask1649 +SKIPPED [1] amp/core/statistics/test/test_regression.py:108: This test generates the input data +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:270: cmamp #654. +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:283: cmamp #654. +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:296: cmamp #654. +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:303: cmamp #654. +SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:315: cmamp #654. +SKIPPED [1] amp/config_root/config/test/test_config.py:325: See AmpTask1573 +SKIPPED [1] amp/market_data/test/test_market_data_im_client.py:134: CmTask882. +SKIPPED [1] vendors_lime/taq_bars/test/test_taq_bars_utils.py:304: This is used to generate the frozen input +SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:52: LimeTask222 Use volume for volume everywhere +SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:39: Run manually +SKIPPED [1] oms_lime/test/test_eg_portfolio.py:14: Finish this +SKIPPED [1] market_data_lime/test/test_eg_replayed_market_data.py:26: Run manually +SKIPPED [1] market_data_lime/test/test_eg_replayed_market_data.py:110: Run manually +SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:36: Skip on Mondays +SKIPPED [1] amp/test/test_tasks.py:68: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:60: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:44: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:64: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:56: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:48: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:52: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:36: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:40: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:122: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:95: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:102: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:85: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:89: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:142: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:112: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:129: Test needs to be run outside Docker +SKIPPED [1] amp/test/test_tasks.py:134: Test needs to be run outside Docker +SKIPPED [1] amp/oms/test/test_order_processor.py:70: Need dind support +SKIPPED [1] amp/oms/test/test_order_processor.py:78: Need dind support +SKIPPED [1] amp/oms/test/test_order_processor.py:86: Need dind support +SKIPPED [1] amp/oms/test/test_order_processor.py:96: Need dind support +SKIPPED [7] amp/datapull/test/test_im_lib_tasks.py: CMTask #789. +SKIPPED [1] amp/datapull/test/test_im_lib_tasks.py:240: amp #1189 +SKIPPED [10] amp/im/kibot/data/load/test/test_sql_data_loader.py: CmTask666 +SKIPPED [1] amp/helpers/test/test_cache.py:731: See CMTask #952. +SKIPPED [1] amp/helpers/test/test_git.py:217: Run only in amp as super-module +SKIPPED [1] amp/helpers/test/test_git.py:229: Run only in amp as sub-module +SKIPPED [1] amp/dev_scripts/infra/test/test_all.py: unconditional skip +SKIPPED [1] amp/dev_scripts/git/git_hooks/test/test_install_hooks.py:21: There are no Git credentials inside Docker +SKIPPED [1] amp/oms/test/test_pnl_simulator.py:432: For performance measurement +SKIPPED [1] amp/oms/test/test_api.py:162: unconditional skip +SKIPPED [1] amp/oms/test/test_api.py:191: unconditional skip +SKIPPED [1] amp/oms/test/test_broker.py:55: Need dind support +SKIPPED [11] amp/im/kibot/test/test_kibot_sql_writer_backend.py: CmTask666 +SKIPPED [1] amp/im/kibot/metadata/test/test_load.py:47: Disabled waiting for PTask4139 +SKIPPED [1] amp/im/kibot/metadata/test/test_load.py:66: Disabled waiting for PTask4139 +SKIPPED [1] amp/im/kibot/data/load/test/test_s3_data_loader.py:23: Not implemented yet +SKIPPED [4] amp/im/ib/data/transform/test/test_transform.py: CmTask666 +SKIPPED [11] amp/im/ib/test/test_ib_sql_writer_backend.py: CmTask666 +SKIPPED [1] amp/im/ib/connect/test/test_im_tasks.py: unconditional skip +SKIPPED [1] amp/helpers/test/test_versioning.py:23: CmampTask570 +SKIPPED [3] amp/helpers/test/test_open.py: See cryptomtc/cmamp#321 +SKIPPED [1] amp/helpers/test/test_joblib_helpers.py: Just for experimenting with joblib +SKIPPED [1] amp/dataflow/core/nodes/test/test_regression_models.py:35: This test fails on some computers due to AmpTask1649 +SKIPPED [1] amp/dataflow/core/nodes/test/test_regression_models.py:18: This test generates the input data +SKIPPED [1] amp/dataflow/core/nodes/test/test_sarimax_models.py:39: cmamp #654. +SKIPPED [1] amp/core/test/test_data_adapters.py:146: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_data_adapters.py:161: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_data_adapters.py:118: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_data_adapters.py:177: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_data_adapters.py:132: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_explore.py:25: https://github.com/.../.../issues/3676 +SKIPPED [1] amp/core/test/test_features.py:510: Apparent instability +SKIPPED [1] amp/core/test/test_features.py:517: Apparent instability +SKIPPED [1] amp/core/test/test_features.py:524: Apparent instability +SKIPPED [1] amp/core/test/test_features.py:556: Apparent instability +SKIPPED [1] amp/core/test/test_backtest.py:27: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_backtest.py:69: Disabled because of PTask2440 +SKIPPED [1] amp/core/test/test_backtest.py:111: Disabled because of PTask2440 +XFAIL amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 +XFAIL amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 +FAILED dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 += 1 failed, 1581 passed, 209 skipped, 81 deselected, 2 xfailed, 4 rerun in 200.01s (0:03:20) = +15:16:12 @ 2022-03-07 10:15:22 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=120.0 KB +15:16:12 @ 2022-03-07 10:15:22 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' +15:16:12 @ 2022-03-07 10:15:22 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... +15:16:12 @ 2022-03-07 10:15:22 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan +ERROR: 1 +15:16:15 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3720 Fast tests failed +## run_slow_tests:  +15:16:15 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"' +IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"'  +WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. +WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. +WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. +WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. +Creating compose_app_run ... + + +Creating compose_app_run ... done +##> devops/docker_run/entrypoint.sh +UID=0 +GID=0 +# Activate environment +##> devops/docker_build/entrypoint/patch_environment_variables.sh +# Set PATH +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +# Set PYTHONPATH +PYTHONPATH=/app/amp:/app: +# Configure env +Testing sudo +/app +Setting up Docker +{ "storage-driver": "vfs" } + * Starting Docker: docker  +[ OK ] + * Docker is running +# Check AWS authentication setup +AWS_DEFAULT_REGION='us-east-1' + Name Value Type Location + ---- ----- ---- -------- + profile am manual --profile +access_key ****************3J32 shared-credentials-file +secret_key ****************QpHW shared-credentials-file + region us-east-1 env AWS_DEFAULT_REGION +CONTAINER_VERSION='' +BUILD_TAG='' +which python: /venv/bin/python +python -V: Python 3.8.10 +docker -v: Docker version 20.10.12, build e91ed57 +docker-compose -v: docker-compose version 1.25.0, build unknown +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +PYTHONPATH=/app/amp:/app: +entrypoint.sh: 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"' +============================= test session starts ============================== +platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 +cachedir: .pytest_cache +rootdir: /app, configfile: pytest.ini +plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 +timeout: 30.0s +timeout method: signal +timeout func_only: True +collecting ...  +collecting 0 items  +collecting 0 items  +collecting 67 items  +collecting 70 items  +collecting 230 items  +collecting 548 items  +collecting 562 items  +collecting 794 items  +collecting 1037 items  +collecting 1375 items  +collecting 1424 items  +collecting 1775 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True +----------------------------------------------------------------------------- +This code is not in sync with the container: +code_version='1.0.3' != container_version='amp-1.0.3' +----------------------------------------------------------------------------- +You need to: +- merge origin/master into your branch with `invoke git_merge_master` +- pull the latest container with `invoke docker_pull` +# Git + branch_name='AmpTask2163_Implement_tiled_backtesting_5' + hash='29bdaf1' + # Last commits: + * 29bdaf1 saggese Lint ( 6 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) + * c26c937 saggese Checkpoint ( 7 minutes ago) Mon Mar 7 20:09:34 2022 + * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) +# Machine info + system=Linux + node name=d232c57e32e2 + release=3.10.0-1160.36.2.el7.x86_64 + version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 + machine=x86_64 + processor=x86_64 + cpu count=8 + cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) + memory=svmem(total=66548252672, available=51706417152, percent=22.3, used=11809091584, free=14425956352, active=30357913600, inactive=18355712000, buffers=0, cached=40313204736, shared=2491396096, slab=2054676480) + disk usage=sdiskusage(total=107362627584, used=32545501184, free=74817126400, percent=30.3) +# Packages + python: 3.8.10 + gluonnlp: ? + gluonts: 0.6.7 + joblib: 1.1.0 + mxnet: 1.8.0 + numpy: 1.21.1 + pandas: 1.3.4 + pyarrow: 6.0.1 + scipy: 1.6.1 + seaborn: 0.11.2 + sklearn: 1.0.1 + statsmodels: 0.13.1 +INFO: > cmd='/venv/bin/pytest -m slow and not superslow . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun Failed: Timeout' +INFO: Saving log to file 'tmp.pytest.log' + +collected 1874 items / 1803 deselected / 71 selected  + +amp/helpers/test/test_sql.py::TestSql1::test_copy_rows_with_copy_from1 SKIPPED [ 1%] +amp/helpers/test/test_sql.py::TestSql1::test_create_database SKIPPED [ 2%] +amp/helpers/test/test_sql.py::TestSql1::test_create_insert_query SKIPPED [ 4%] +amp/helpers/test/test_sql.py::TestSql1::test_db_connection_to_tuple SKIPPED [ 5%] +amp/helpers/test/test_sql.py::TestSql1::test_duplicate_removal1 SKIPPED [ 7%] +amp/helpers/test/test_sql.py::TestSql1::test_duplicate_removal2 SKIPPED [ 8%] +amp/helpers/test/test_sql.py::TestSql1::test_execute_insert_query1 SKIPPED [ 9%] +amp/helpers/test/test_sql.py::TestSql1::test_remove_database1 SKIPPED [ 11%] +amp/helpers/test/test_sql.py::TestSql1::test_remove_database_invalid SKIPPED [ 12%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create1 (0.84 s) PASSED [ 14%] +amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create2 (0.47 s) PASSED [ 15%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data1_database_portfolio SKIPPED [ 16%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data1_database_vs_dataframe_portfolio SKIPPED [ 18%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data2_database_portfolio SKIPPED [ 19%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data2_database_vs_dataframe_portfolio SKIPPED [ 21%] +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data3_database_portfolio SKIPPED [ 22%] +amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data1 SKIPPED [ 23%] +amp/datapull/common/data/transform/test/test_extract_data_from_db.py::TestExtractDataFromDb1::test_extract_data_from_db SKIPPED [ 25%] +dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 (1.29 s) FAILED [ 26%] +dataflow_lime/system/test/test_E8d_replayed_system_runner.py::TestReplayedE8dWithMockedOms1::test1 SKIPPED [ 28%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance1 (19.17 s) PASSED [ 29%] +dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance2 (19.22 s) PASSED [ 30%] +research/RH2E/test/test_RH2E_prod_models.py::Test_RH2Eg_ProdModels::test_end_to_end_slow1 SKIPPED [ 32%] +amp/oms/test/test_oms_db.py::TestOmsDbSubmittedOrdersTable1::test_create_table1 SKIPPED [ 33%] +amp/oms/test/test_oms_db.py::TestOmsDbAcceptedOrdersTable1::test_create_table1 SKIPPED [ 35%] +amp/oms/test/test_oms_db.py::TestOmsDbAcceptedOrdersTable1::test_insert1 SKIPPED [ 36%] +amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table1 SKIPPED [ 38%] +amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table2 SKIPPED [ 39%] +amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table3 SKIPPED [ 40%] +amp/oms/test/test_oms_db.py::TestOmsDbCurrentPositionsTable1::test_create_table1 SKIPPED [ 42%] +amp/oms/test/test_oms_db.py::TestOmsDbRestrictionsTable1::test_create_table1 SKIPPED [ 43%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentSuccess1::test_parallel1 (8.08 s) PASSED [ 45%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentSuccess1::test_serial1 (11.31 s) PASSED [ 46%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_parallel1 (11.48 s) PASSED [ 47%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_parallel2 (11.42 s) PASSED [ 49%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_serial1 (14.94 s) PASSED [ 50%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_serial2 (15.19 s) PASSED [ 52%] +amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentArchiveOnS3::test_serial1 (8.62 s) PASSED [ 53%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test_parallel1 (11.59 s) PASSED [ 54%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test_serial1 (11.41 s) PASSED [ 56%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_parallel1 (15.41 s) PASSED [ 57%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_parallel2 (15.34 s) PASSED [ 59%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_serial1 (11.61 s) PASSED [ 60%] +amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_serial2 (12.15 s) PASSED [ 61%] +im_lime/eg/test/test_eg_historical_pq_by_asset_taq_bar_client.py::TestEgHistoricalPqByTileTaqBarClient1::test_read_data_for_multiple_symbols1 (1.92 s) PASSED [ 63%] +amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_equities1 (1.31 s) PASSED [ 64%] +amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_futures1 (16.72 s) PASSED [ 66%] +amp/dataflow/core/nodes/test/test_gluonts_models.py::TestContinuousDeepArModel::test_fit_dag1 (6.20 s) PASSED [ 67%] +amp/dataflow/core/nodes/test/test_gluonts_models.py::TestContinuousDeepArModel::test_predict_dag1 (4.90 s) PASSED [ 69%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval_perf1::test1 (13.29 s) PASSED [ 70%] +vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval_perf1::test2 (7.24 s) PASSED [ 71%] +research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline2::test_real_time1 SKIPPED [ 73%] +market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_last_end_time2 (0.03 s) PASSED [ 74%] +market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period2 (0.00 s) SKIPPED [ 76%] +market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period3 (0.00 s) SKIPPED [ 77%] +market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period_compare1 (0.00 s) SKIPPED [ 78%] +amp/test/test_tasks.py::TestExecuteTasks2::test_collect_only1 SKIPPED [ 80%] +amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_create_all_tables1 SKIPPED [ 81%] +amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_create_im_database SKIPPED [ 83%] +amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_up1 SKIPPED [ 84%] +amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_real_time1 (3.62 s) PASSED [ 85%] +amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_real_time2 (3.99 s) PASSED [ 87%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_parse_symbols_file2 (11.92 s) PASSED [ 88%] +amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky4 (1.33 s) PASSED [ 90%] +amp/dataflow/system/test/test_source_nodes.py::TestKibotEquityReader::test1 (7.63 s) PASSED [ 91%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1 (1.90 s) PASSED [ 92%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1 (1.48 s) PASSED [ 94%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict1 (1.49 s) PASSED [ 95%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_no_x1 (1.57 s) PASSED [ 97%] +amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_summary (2.32 s) PASSED [ 98%] +amp/core/test/test_backtest.py::TestGeneratePredictions::test4 SKIPPED [100%] + +=================================== FAILURES =================================== +_________________ Test_TiledBacktest_E8d.test_end_to_end_slow1 _________________ +Traceback (most recent call last): + File "/app/dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py", line 35, in test_end_to_end_slow1 + self._test(config_builder, experiment_builder, run_model_extra_opts) + File "/app/amp/dataflow/model/run_prod_model_flow.py", line 175, in _test + self.check_string(configs_signature, fuzzy_match=True, tag=tag) + File "/app/amp/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "/app/amp/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '/app/dataflow_lime/pipelines/E8/test/Test_TiledBacktest_E8d.test_end_to_end_slow1/output/configs_signature.txt.tmp' +################################################################################ + +============================= slowest 3 durations ============================== +19.22s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance2 +19.17s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance1 +16.72s call amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_futures1 +=========================== short test summary info ============================ +SKIPPED [1] amp/helpers/test/test_sql.py:95: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:36: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:46: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:21: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:111: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:131: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:79: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:58: Need dind support +SKIPPED [1] amp/helpers/test/test_sql.py:71: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:126: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:210: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:162: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:226: Need dind support +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:200: Need dind support +SKIPPED [1] amp/datapull/ccxt/data/extract/test/test_exchange_class.py:35: Enable after CMTask1292 is resolved. +SKIPPED [1] amp/datapull/common/data/transform/test/test_extract_data_from_db.py:38: Need dind support +SKIPPED [1] dataflow_lime/system/test/test_E8d_replayed_system_runner.py:250: Need dind support +SKIPPED [1] research/RH2E/test/test_RH2E_prod_models.py:61: Disabled since cache was invalidated +SKIPPED [1] amp/oms/test/test_oms_db.py:46: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:127: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:136: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:192: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:203: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:223: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:292: Need dind support +SKIPPED [1] amp/oms/test/test_oms_db.py:310: Need dind support +SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:131: LimeTask222 Use volume for volume everywhere +SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:59: Skip on Mondays +SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:81: Skip on Mondays +SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:105: Skip on Mondays +SKIPPED [1] amp/test/test_tasks.py:116: Test needs to be run outside Docker +SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:20: Need dind support +SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:49: Need dind support +SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:12: Need dind support +SKIPPED [1] amp/core/test/test_backtest.py:153: Disabled because of PTask2440 +FAILED dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 +==== 1 failed, 35 passed, 35 skipped, 1803 deselected in 297.23s (0:04:57) ===== +15:21:15 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=35.7 MB +15:21:15 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' +15:21:15 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... +15:21:15 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan +ERROR: 1 +15:21:18 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3737 Slow tests failed +## run_superslow_tests:  +15:21:18 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"' +IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ +NETWORK_MODE=bridge \ + docker-compose \ + --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + app \ + 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"'  +WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. +WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. +WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. +WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. +Creating compose_app_run ... + + +Creating compose_app_run ... done +##> devops/docker_run/entrypoint.sh +UID=0 +GID=0 +# Activate environment +##> devops/docker_build/entrypoint/patch_environment_variables.sh +# Set PATH +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +# Set PYTHONPATH +PYTHONPATH=/app/amp:/app: +# Configure env +Testing sudo +/app +Setting up Docker +{ "storage-driver": "vfs" } + * Starting Docker: docker  +[ OK ] + * Docker is running +# Check AWS authentication setup +AWS_DEFAULT_REGION='us-east-1' + Name Value Type Location + ---- ----- ---- -------- + profile am manual --profile +access_key ****************3J32 shared-credentials-file +secret_key ****************QpHW shared-credentials-file + region us-east-1 env AWS_DEFAULT_REGION +CONTAINER_VERSION='' +BUILD_TAG='' +which python: /venv/bin/python +python -V: Python 3.8.10 +docker -v: Docker version 20.10.12, build e91ed57 +docker-compose -v: docker-compose version 1.25.0, build unknown +PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +PYTHONPATH=/app/amp:/app: +entrypoint.sh: 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"' +============================= test session starts ============================== +platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 +cachedir: .pytest_cache +rootdir: /app, configfile: pytest.ini +plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 +timeout: 3600.0s +timeout method: signal +timeout func_only: True +collecting ...  +collecting 0 items  +collecting 0 items  +collecting 67 items  +collecting 70 items  +collecting 230 items  +collecting 548 items  +collecting 641 items  +collecting 801 items  +collecting 1084 items  +collecting 1391 items  +collecting 1671 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' +>>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True +----------------------------------------------------------------------------- +This code is not in sync with the container: +code_version='1.0.3' != container_version='amp-1.0.3' +----------------------------------------------------------------------------- +You need to: +- merge origin/master into your branch with `invoke git_merge_master` +- pull the latest container with `invoke docker_pull` +# Git + branch_name='AmpTask2163_Implement_tiled_backtesting_5' + hash='29bdaf1' + # Last commits: + * 29bdaf1 saggese Lint (11 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) + * c26c937 saggese Checkpoint (12 minutes ago) Mon Mar 7 20:09:34 2022 + * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) +# Machine info + system=Linux + node name=61bb36f6d969 + release=3.10.0-1160.36.2.el7.x86_64 + version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 + machine=x86_64 + processor=x86_64 + cpu count=8 + cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) + memory=svmem(total=66548252672, available=51712106496, percent=22.3, used=11803402240, free=14392971264, active=30350835712, inactive=18393743360, buffers=0, cached=40351879168, shared=2491396096, slab=2055942144) + disk usage=sdiskusage(total=107362627584, used=32546025472, free=74816602112, percent=30.3) +# Packages + python: 3.8.10 + gluonnlp: ? + gluonts: 0.6.7 + joblib: 1.1.0 + mxnet: 1.8.0 + numpy: 1.21.1 + pandas: 1.3.4 + pyarrow: 6.0.1 + scipy: 1.6.1 + seaborn: 0.11.2 + sklearn: 1.0.1 + statsmodels: 0.13.1 +INFO: > cmd='/venv/bin/pytest -m not slow and superslow . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun Failed: Timeout' +INFO: Saving log to file 'tmp.pytest.log' + +collected 1874 items / 1864 deselected / 10 selected  + +amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data3_database_vs_dataframe_portfolio SKIPPED [ 10%] +dataflow_lime/system/test/test_E8d_replayed_system_runner.py::Test_E8d_Replayed_SystemRunner::test1 + + +(462.17 s) PASSED [ 20%] +research/RH2E/test/test_RH2E_prod_models.py::Test_RH2Eg_ProdModels::test_end_to_end_superslow1 SKIPPED [ 30%] +im_lime/eg/test/test_eg_historical_pq_by_asset_taq_bar_client.py::TestEgHistoricalPqByTileTaqBarClient1::test_read_data1 (30.67 s) PASSED [ 40%] +amp/core/test/test_gallery_signal_processing.py::Test_gallery_signal_processing1::test_notebook1 (47.16 s) PASSED [ 50%] +dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource2::test1 (47.27 s) PASSED [ 60%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_1 (14.17 s) PASSED [ 70%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_2 (0.21 s) PASSED [ 80%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_3 (0.26 s) PASSED [ 90%] +amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_4 (0.21 s) PASSED [100%] + +============================= slowest 3 durations ============================== +462.17s call dataflow_lime/system/test/test_E8d_replayed_system_runner.py::Test_E8d_Replayed_SystemRunner::test1 +47.27s call dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource2::test1 +47.16s call amp/core/test/test_gallery_signal_processing.py::Test_gallery_signal_processing1::test_notebook1 +=========================== short test summary info ============================ +SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:239: Need dind support +SKIPPED [1] research/RH2E/test/test_RH2E_prod_models.py:88: Disabled since cache was invalidated +========== 8 passed, 2 skipped, 1864 deselected in 610.66s (0:10:10) =========== +15:31:32 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=0.0 b +15:31:32 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' +15:31:32 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... +15:31:32 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan +15:31:34 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3758 Fast tests failed +15:31:34 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3763 Slow tests failed +15:31:34 - INFO  lib_tasks.py run_fast_slow_superslow_tests:3770 Superslow tests passed +Traceback (most recent call last): + File "/local/home/gsaggese/src/venv/amp.client_venv/bin/invoke", line 8, in + sys.exit(program.run()) + File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/program.py", line 384, in run + self.execute() + File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/program.py", line 566, in execute + executor.execute(*self.tasks) + File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/executor.py", line 129, in execute + result = call.task(*args, **call.kwargs) + File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/tasks.py", line 127, in __call__ + result = self.body(*args, **kwargs) + File "/local/home/gsaggese/src/sasm-lime4/amp/helpers/lib_tasks.py", line 3772, in run_fast_slow_superslow_tests + raise RuntimeError("Some tests failed") +RuntimeError: Some tests failed diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt new file mode 100644 index 000000000..4168d0576 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt @@ -0,0 +1,41 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 2 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 +pytest dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 + +# Test_get_configs_from_command_line1.test1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow_lime/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 + configs = dtfmoexuti.get_configs_from_command_line(args) + File "$GIT_ROOT/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + configs = cconfig.get_configs_from_builder(config_builder) + File "$GIT_ROOT/config_root/config/builder.py", line 46, in get_configs_from_builder + imp = importlib.import_module(import_) + File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1014, in _gcd_import + File "", line 991, in _find_and_load + File "", line 973, in _find_and_load_unlocked +ModuleNotFoundError: No module named 'dataflow_lime.pipelines.E8.8Ed_configs' + + +_______________ + +# Test_TiledBacktest_E8d.test_end_to_end_slow1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py", line 35, in test_end_to_end_slow1 + self._test(config_builder, experiment_builder, run_model_extra_opts) + File "$GIT_ROOT/dataflow/model/run_prod_model_flow.py", line 175, in _test + self.check_string(configs_signature, fuzzy_match=True, tag=tag) + File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string + hdbg.dfatal(msg) + File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal + raise assertion_type(ret) +AssertionError: +################################################################################ +The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow_lime/pipelines/E8/test/Test_TiledBacktest_E8d.test_end_to_end_slow1/output/configs_signature.txt.tmp' +################################################################################ + +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt new file mode 100644 index 000000000..955be2326 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt @@ -0,0 +1,396 @@ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4532280Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4532780Z +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4533026Z =================================== FAILURES =================================== +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4533724Z _______________________ TestDryRunTasks1.test_git_clean ________________________ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4534485Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535021Z File "/app/helpers/test/test_lib_tasks.py", line 189, in test_git_clean +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535516Z self.dry_run(target) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535998Z File "/app/helpers/test/test_lib_tasks.py", line 170, in dry_run +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4536460Z self.check_string(act) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4536939Z File "/app/helpers/hunit_test.py", line 1266, in check_string +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4537409Z is_equal = assert_equal( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4537889Z File "/app/helpers/hunit_test.py", line 881, in assert_equal +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4538319Z diff_files( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4538746Z File "/app/helpers/hunit_test.py", line 586, in diff_files +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4539220Z raise RuntimeError(msg_as_str) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4539617Z RuntimeError: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4540266Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4540856Z ACTUAL vs EXPECTED: TestDryRunTasks1.test_git_clean +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541568Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541928Z +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4542194Z report_memory_usage=False report_cpu_usage=False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4542753Z ## git_clean: dry_run=False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4543426Z > git clean -fd >/dev/null 2>&1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4544154Z > git submodule foreach 'git clean -fd >/dev/null 2>&1' +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4544817Z > git clean -fd +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4545492Z > git submodule foreach 'git clean -fd' +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4546194Z find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.i ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4546664Z Diff with: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4547307Z > vimdiff helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.actual.txt helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.expected.txt +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4547931Z or running: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4548273Z > ./tmp_diff.sh +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4548887Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4549484Z EXPECTED VARIABLE: TestDryRunTasks1.test_git_clean +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4550166Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4550742Z exp = r"""report_memory_usage=False report_cpu_usage=False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4551201Z ## git_clean: dry_run=False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4552300Z find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4553032Z """ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4637180Z ____ Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 ____ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4638716Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4641551Z File "/app/dataflow/system/example1/test/test_example1_forecast_system.py", line 50, in test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4642218Z self._test_fit_over_backtest_period1(system, output_col_name) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4642836Z File "/app/dataflow/system/dtfamsys.py", line 114, in _test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4643452Z self.check_string(actual, fuzzy_match=True, purify_text=True) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644017Z File "/app/helpers/hunit_test.py", line 1266, in check_string +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644483Z is_equal = assert_equal( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644949Z File "/app/helpers/hunit_test.py", line 881, in assert_equal +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4645544Z diff_files( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4645997Z File "/app/helpers/hunit_test.py", line 586, in diff_files +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4646473Z raise RuntimeError(msg_as_str) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4646868Z RuntimeError: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4647564Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4648290Z FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649091Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649436Z +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649668Z system_config ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4650153Z dag_config: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4650634Z filter_ath: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4651136Z col_mode: replace_all ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4651652Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4652148Z start_time: 09:30:00 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4652605Z end_time: 16:00:00 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4653072Z resample: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4653690Z in_col_groups: [('close',), ('volume',), ('feature1',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4654208Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4654696Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4655175Z rule: 5T ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4655784Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4656444Z vwap_groups: [('close', 'volume', 'vwap')] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4656971Z reindex_like_input: False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4657465Z join_output_with_input: False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4657953Z compute_ret_0: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4658550Z in_col_groups: [('close',), ('vwap',), ('twap',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4659057Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4659540Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660015Z mode: log_rets ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660489Z col_mapping: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660968Z close: close.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4661457Z vwap: vwap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4661945Z twap: twap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4662415Z compute_vol: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4663124Z in_col_group: ('vwap.ret_0',) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4663731Z out_col_group: ('vwap.ret_0.vol',) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4664238Z drop_nans: True ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4664863Z permitted_exceptions: (,) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4665372Z adjust_rets: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4665979Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4666481Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4666971Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4667587Z term1_col: vwap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4668076Z term2_col: vwap.ret_0.vol ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4668576Z out_col: vwap.ret_0.vol_adj ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4669067Z term2_delay: 2 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4669552Z operation: div ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4670017Z drop_nans: True ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4670500Z compress_rets: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4671111Z in_col_groups: [('vwap.ret_0.vol_adj',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4671611Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4672094Z col_mapping: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4672591Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4673126Z dag_builder_object: nid_prefix= ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4673669Z dag_builder_class: Example1_DagBuilder < +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4674415Z system_class: Example1_ForecastSystem < +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4674950Z dag_config_config: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4675420Z resample: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4675903Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4676387Z rule: 1T ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4676933Z dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.expected.txt +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4695757Z or running: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4696094Z > ./tmp_diff.sh +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4696806Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4697507Z EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4698303Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4698867Z exp = r"""################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4699274Z system_config +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4699701Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700106Z dag_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700449Z filter_ath: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700810Z col_mode: replace_all +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701206Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701589Z start_time: 09:30:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701975Z end_time: 16:00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4702320Z resample: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4702851Z in_col_groups: [('close',), ('volume',), ('feature1',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4703292Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4703670Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4704018Z rule: 5T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4704700Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4705360Z vwap_groups: [('close', 'volume', 'vwap')] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4705811Z reindex_like_input: False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4706221Z join_output_with_input: False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4707219Z compute_ret_0: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4707770Z in_col_groups: [('close',), ('vwap',), ('twap',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708195Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708558Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708947Z mode: log_rets +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4709311Z col_mapping: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4709685Z close: close.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710049Z vwap: vwap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710416Z twap: twap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710775Z compute_vol: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4711241Z in_col_group: ('vwap.ret_0',) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4711723Z out_col_group: ('vwap.ret_0.vol',) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4712125Z drop_nans: True +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4712643Z permitted_exceptions: (,) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4713077Z adjust_rets: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4713595Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714124Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714543Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714941Z term1_col: vwap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4715350Z term2_col: vwap.ret_0.vol +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4715752Z out_col: vwap.ret_0.vol_adj +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4716142Z term2_delay: 2 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4716723Z operation: div +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717096Z drop_nans: True +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717445Z compress_rets: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717947Z in_col_groups: [('vwap.ret_0.vol_adj',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4718433Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4718799Z col_mapping: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4719194Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4719637Z dag_builder_object: nid_prefix= +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4720674Z dag_builder_class: Example1_DagBuilder +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721179Z system_class: Example1_ForecastSystem +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721575Z dag_config_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721934Z resample: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4722300Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4722662Z rule: 1T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4723350Z dag_runner_object: > +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724172Z market_data_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724575Z asset_id_col_name: asset_id +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724989Z asset_ids: [1467591036, 3303714233] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4725381Z backtest_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4725884Z universe_str: example1_v1-top2 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4726310Z trading_period_str: 1T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4726712Z time_interval_str: Jan2000 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4727275Z start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4727822Z end_timestamp: 2000-01-31 00:00:00+00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4728377Z market_object: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4728897Z dag_object: name=None +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4729261Z mode=strict +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4732047Z nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4734910Z edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4735787Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4736220Z vwap.ret_0.vol_adj.c +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4736654Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737097Z 1467591036 3303714233 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737459Z end_ts +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737924Z 2000-01-01 10:00:00-05:00 -0.98 -0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4738406Z 2000-01-01 10:05:00-05:00 0.98 0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4738892Z 2000-01-01 10:10:00-05:00 -0.98 -0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4739261Z """ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4739878Z ________ Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 _________ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4740430Z Traceback (most recent call last): +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4741028Z File "/app/dataflow/system/example1/test/test_example1_forecast_system.py", line 57, in test_fit_over_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4741597Z self._test_fit_over_period1( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4742128Z File "/app/dataflow/system/dtfamsys.py", line 137, in _test_fit_over_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4742734Z self.check_string(actual, fuzzy_match=True, purify_text=True) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4743389Z File "/app/helpers/hunit_test.py", line 1266, in check_string +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4743841Z is_equal = assert_equal( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4744328Z File "/app/helpers/hunit_test.py", line 881, in assert_equal +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4744767Z diff_files( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4745216Z File "/app/helpers/hunit_test.py", line 586, in diff_files +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4745672Z raise RuntimeError(msg_as_str) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4746068Z RuntimeError: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4746708Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4747409Z FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748182Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748547Z +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748848Z system_config ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4749340Z dag_config: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4749833Z filter_ath: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4750343Z col_mode: replace_all ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4750846Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4751336Z start_time: 09:30:00 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4751805Z end_time: 16:00:00 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4752323Z resample: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4752956Z in_col_groups: [('close',), ('volume',), ('feature1',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4753459Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4754106Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4754581Z rule: 5T ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4755213Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4755868Z vwap_groups: [('close', 'volume', 'vwap')] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4756379Z reindex_like_input: False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4756884Z join_output_with_input: False ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4757379Z compute_ret_0: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4757975Z in_col_groups: [('close',), ('vwap',), ('twap',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4758480Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4758952Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4759435Z mode: log_rets ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4759916Z col_mapping: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4760412Z close: close.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4786435Z vwap: vwap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4787169Z twap: twap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4787693Z compute_vol: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4788439Z in_col_group: ('vwap.ret_0',) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4789095Z out_col_group: ('vwap.ret_0.vol',) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4789615Z drop_nans: True ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4790296Z permitted_exceptions: (,) ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4790847Z adjust_rets: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4791745Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4792292Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4792798Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4793324Z term1_col: vwap.ret_0 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4794102Z term2_col: vwap.ret_0.vol ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4794647Z out_col: vwap.ret_0.vol_adj ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4795177Z term2_delay: 2 ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4795669Z operation: div ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4796310Z drop_nans: True ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4796825Z compress_rets: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4797500Z in_col_groups: [('vwap.ret_0.vol_adj',)] ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4798031Z out_col_group: () ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4798529Z col_mapping: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4799069Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4799632Z dag_builder_object: nid_prefix= ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4800220Z dag_builder_class: Example1_DagBuilder < +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4800794Z system_class: Example1_ForecastSystem < +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4801362Z dag_config_config: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4801873Z resample: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4802385Z transformer_kwargs: ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4802893Z rule: 1T ( +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4803458Z dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.expected.txt +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4817745Z or running: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4818089Z > ./tmp_diff.sh +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4818751Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4819543Z EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4820358Z -------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4820978Z exp = r"""################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4821394Z system_config +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4821833Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822241Z dag_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822588Z filter_ath: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822959Z col_mode: replace_all +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4823370Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4823780Z start_time: 09:30:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4824182Z end_time: 16:00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4824541Z resample: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825093Z in_col_groups: [('close',), ('volume',), ('feature1',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825551Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825951Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4826312Z rule: 5T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4827017Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4827800Z vwap_groups: [('close', 'volume', 'vwap')] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4828262Z reindex_like_input: False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4828701Z join_output_with_input: False +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4829101Z compute_ret_0: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4829632Z in_col_groups: [('close',), ('vwap',), ('twap',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830079Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830488Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830867Z mode: log_rets +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4831247Z col_mapping: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4831634Z close: close.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832038Z vwap: vwap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832422Z twap: twap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832798Z compute_vol: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4833272Z in_col_group: ('vwap.ret_0',) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4833972Z out_col_group: ('vwap.ret_0.vol',) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4834393Z drop_nans: True +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4834954Z permitted_exceptions: (,) +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4835410Z adjust_rets: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4835964Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4836403Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4836795Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4837206Z term1_col: vwap.ret_0 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4837629Z term2_col: vwap.ret_0.vol +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838047Z out_col: vwap.ret_0.vol_adj +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838451Z term2_delay: 2 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838829Z operation: div +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4839211Z drop_nans: True +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4839571Z compress_rets: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840090Z in_col_groups: [('vwap.ret_0.vol_adj',)] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840527Z out_col_group: () +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840906Z col_mapping: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4841320Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4841792Z dag_builder_object: nid_prefix= +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4842257Z dag_builder_class: Example1_DagBuilder +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4842739Z system_class: Example1_ForecastSystem +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4843303Z dag_config_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4843654Z resample: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4844032Z transformer_kwargs: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4844412Z rule: 1T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4845150Z dag_runner_object: > +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4845882Z market_data_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4846282Z asset_id_col_name: asset_id +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4846700Z asset_ids: [1467591036, 3303714233] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847096Z backtest_config: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847559Z universe_str: example1_v1-top2 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847978Z trading_period_str: 1T +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4848377Z time_interval_str: Jan2000 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4848941Z start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4849581Z end_timestamp: 2000-01-31 00:00:00+00:00 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4850153Z market_object: +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4850683Z dag_object: name=None +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4851051Z mode=strict +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4853864Z nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4856657Z edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4857531Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4857948Z vwap.ret_0.vol_adj.c +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4858372Z ################################################################################ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4858854Z 1467591036 3303714233 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4859226Z end_ts +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4859697Z 2000-01-01 10:00:00-05:00 -0.98 -0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4860188Z 2000-01-01 10:05:00-05:00 0.98 0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4860695Z 2000-01-01 10:10:00-05:00 -0.98 -0.98 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4861053Z """ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4862316Z ============================= slowest 3 durations ============================== +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4866438Z 26.87s setup oms/test/test_broker.py::TestDatabaseBroker1::test1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4867269Z 6.46s setup datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_parser +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4868189Z 6.24s setup datapull/talos/data/client/test/test_talos_clients.py::TestTalosSqlRealTimeImClient1::test_build_numerical_to_string_id_mapping +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4868892Z =========================== short test summary info ============================ +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4956618Z FAILED helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean - Run... +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4957400Z FAILED dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_backtest_period1 +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4958274Z FAILED dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_period1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt new file mode 100644 index 000000000..94e600076 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt @@ -0,0 +1,399 @@ +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt' +HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: +pytest dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_backtest_period1 +pytest dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_period1 +pytest helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean + +# TestDryRunTasks1.test_git_clean +Traceback (most recent call last): + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 189, in test_git_clean + self.dry_run(target) + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 170, in dry_run + self.check_string(act) + File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string + is_equal = assert_equal( + File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal + diff_files( + File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +ACTUAL vs EXPECTED: TestDryRunTasks1.test_git_clean +-------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541928Z +report_memory_usage=False report_cpu_usage=False ( +## git_clean: dry_run=False ( + > git clean -fd >/dev/null 2>&1 + > git submodule foreach 'git clean -fd >/dev/null 2>&1' + > git clean -fd + > git submodule foreach 'git clean -fd' +find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.i ( +Diff with: +> vimdiff helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.actual.txt helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: TestDryRunTasks1.test_git_clean +-------------------------------------------------------------------------------- +exp = r"""report_memory_usage=False report_cpu_usage=False +## git_clean: dry_run=False +find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf +""" +__ + +# Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/system/example1/test/test_example1_forecast_system.py", line 50, in test_fit_over_backtest_period1 + self._test_fit_over_backtest_period1(system, output_col_name) + File "$GIT_ROOT/dataflow/system/dtfamsys.py", line 114, in _test_fit_over_backtest_period1 + self.check_string(actual, fuzzy_match=True, purify_text=True) + File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string + is_equal = assert_equal( + File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal + diff_files( + File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +-------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649436Z +system_config ( +dag_config: ( +filter_ath: ( +col_mode: replace_all ( +transformer_kwargs: ( +start_time: 09:30:00 ( +end_time: 16:00:00 ( +resample: ( +in_col_groups: [('close',), ('volume',), ('feature1',)] ( +out_col_group: () ( +transformer_kwargs: ( +rule: 5T ( +resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( +vwap_groups: [('close', 'volume', 'vwap')] ( +reindex_like_input: False ( +join_output_with_input: False ( +compute_ret_0: ( +in_col_groups: [('close',), ('vwap',), ('twap',)] ( +out_col_group: () ( +transformer_kwargs: ( +mode: log_rets ( +col_mapping: ( +close: close.ret_0 ( +vwap: vwap.ret_0 ( +twap: twap.ret_0 ( +compute_vol: ( +in_col_group: ('vwap.ret_0',) ( +out_col_group: ('vwap.ret_0.vol',) ( +drop_nans: True ( +permitted_exceptions: (,) ( +adjust_rets: ( +in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( +out_col_group: () ( +transformer_kwargs: ( +term1_col: vwap.ret_0 ( +term2_col: vwap.ret_0.vol ( +out_col: vwap.ret_0.vol_adj ( +term2_delay: 2 ( +operation: div ( +drop_nans: True ( +compress_rets: ( +in_col_groups: [('vwap.ret_0.vol_adj',)] ( +out_col_group: () ( +col_mapping: ( +vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( +dag_builder_object: nid_prefix= ( +dag_builder_class: Example1_DagBuilder < +system_class: Example1_ForecastSystem < +dag_config_config: ( +resample: ( +transformer_kwargs: ( +rule: 1T ( +dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 +-------------------------------------------------------------------------------- +exp = r"""################################################################################ +system_config +################################################################################ +dag_config: + filter_ath: + col_mode: replace_all + transformer_kwargs: + start_time: 09:30:00 + end_time: 16:00:00 + resample: + in_col_groups: [('close',), ('volume',), ('feature1',)] + out_col_group: () + transformer_kwargs: + rule: 5T + resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] + vwap_groups: [('close', 'volume', 'vwap')] + reindex_like_input: False + join_output_with_input: False + compute_ret_0: + in_col_groups: [('close',), ('vwap',), ('twap',)] + out_col_group: () + transformer_kwargs: + mode: log_rets + col_mapping: + close: close.ret_0 + vwap: vwap.ret_0 + twap: twap.ret_0 + compute_vol: + in_col_group: ('vwap.ret_0',) + out_col_group: ('vwap.ret_0.vol',) + drop_nans: True + permitted_exceptions: (,) + adjust_rets: + in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] + out_col_group: () + transformer_kwargs: + term1_col: vwap.ret_0 + term2_col: vwap.ret_0.vol + out_col: vwap.ret_0.vol_adj + term2_delay: 2 + operation: div + drop_nans: True + compress_rets: + in_col_groups: [('vwap.ret_0.vol_adj',)] + out_col_group: () + col_mapping: + vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c +dag_builder_object: nid_prefix= +dag_builder_class: Example1_DagBuilder +system_class: Example1_ForecastSystem +dag_config_config: + resample: + transformer_kwargs: + rule: 1T +dag_runner_object: > +market_data_config: + asset_id_col_name: asset_id + asset_ids: [1467591036, 3303714233] +backtest_config: + universe_str: example1_v1-top2 + trading_period_str: 1T + time_interval_str: Jan2000 + start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 + end_timestamp: 2000-01-31 00:00:00+00:00 +market_object: +dag_object: name=None +mode=strict +nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] +edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] +################################################################################ +vwap.ret_0.vol_adj.c +################################################################################ + 1467591036 3303714233 +end_ts +2000-01-01 10:00:00-05:00 -0.98 -0.98 +2000-01-01 10:05:00-05:00 0.98 0.98 +2000-01-01 10:10:00-05:00 -0.98 -0.98 +""" +______ + +# Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +Traceback (most recent call last): + File "$GIT_ROOT/dataflow/system/example1/test/test_example1_forecast_system.py", line 57, in test_fit_over_period1 + self._test_fit_over_period1( + File "$GIT_ROOT/dataflow/system/dtfamsys.py", line 137, in _test_fit_over_period1 + self.check_string(actual, fuzzy_match=True, purify_text=True) + File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string + is_equal = assert_equal( + File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal + diff_files( + File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files + raise RuntimeError(msg_as_str) +RuntimeError: +-------------------------------------------------------------------------------- +FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +-------------------------------------------------------------------------------- +Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748547Z +system_config ( +dag_config: ( +filter_ath: ( +col_mode: replace_all ( +transformer_kwargs: ( +start_time: 09:30:00 ( +end_time: 16:00:00 ( +resample: ( +in_col_groups: [('close',), ('volume',), ('feature1',)] ( +out_col_group: () ( +transformer_kwargs: ( +rule: 5T ( +resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( +vwap_groups: [('close', 'volume', 'vwap')] ( +reindex_like_input: False ( +join_output_with_input: False ( +compute_ret_0: ( +in_col_groups: [('close',), ('vwap',), ('twap',)] ( +out_col_group: () ( +transformer_kwargs: ( +mode: log_rets ( +col_mapping: ( +close: close.ret_0 ( +vwap: vwap.ret_0 ( +twap: twap.ret_0 ( +compute_vol: ( +in_col_group: ('vwap.ret_0',) ( +out_col_group: ('vwap.ret_0.vol',) ( +drop_nans: True ( +permitted_exceptions: (,) ( +adjust_rets: ( +in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( +out_col_group: () ( +transformer_kwargs: ( +term1_col: vwap.ret_0 ( +term2_col: vwap.ret_0.vol ( +out_col: vwap.ret_0.vol_adj ( +term2_delay: 2 ( +operation: div ( +drop_nans: True ( +compress_rets: ( +in_col_groups: [('vwap.ret_0.vol_adj',)] ( +out_col_group: () ( +col_mapping: ( +vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( +dag_builder_object: nid_prefix= ( +dag_builder_class: Example1_DagBuilder < +system_class: Example1_ForecastSystem < +dag_config_config: ( +resample: ( +transformer_kwargs: ( +rule: 1T ( +dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.expected.txt +or running: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 +-------------------------------------------------------------------------------- +exp = r"""################################################################################ +system_config +################################################################################ +dag_config: + filter_ath: + col_mode: replace_all + transformer_kwargs: + start_time: 09:30:00 + end_time: 16:00:00 + resample: + in_col_groups: [('close',), ('volume',), ('feature1',)] + out_col_group: () + transformer_kwargs: + rule: 5T + resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] + vwap_groups: [('close', 'volume', 'vwap')] + reindex_like_input: False + join_output_with_input: False + compute_ret_0: + in_col_groups: [('close',), ('vwap',), ('twap',)] + out_col_group: () + transformer_kwargs: + mode: log_rets + col_mapping: + close: close.ret_0 + vwap: vwap.ret_0 + twap: twap.ret_0 + compute_vol: + in_col_group: ('vwap.ret_0',) + out_col_group: ('vwap.ret_0.vol',) + drop_nans: True + permitted_exceptions: (,) + adjust_rets: + in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] + out_col_group: () + transformer_kwargs: + term1_col: vwap.ret_0 + term2_col: vwap.ret_0.vol + out_col: vwap.ret_0.vol_adj + term2_delay: 2 + operation: div + drop_nans: True + compress_rets: + in_col_groups: [('vwap.ret_0.vol_adj',)] + out_col_group: () + col_mapping: + vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c +dag_builder_object: nid_prefix= +dag_builder_class: Example1_DagBuilder +system_class: Example1_ForecastSystem +dag_config_config: + resample: + transformer_kwargs: + rule: 1T +dag_runner_object: > +market_data_config: + asset_id_col_name: asset_id + asset_ids: [1467591036, 3303714233] +backtest_config: + universe_str: example1_v1-top2 + trading_period_str: 1T + time_interval_str: Jan2000 + start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 + end_timestamp: 2000-01-31 00:00:00+00:00 +market_object: +dag_object: name=None +mode=strict +nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] +edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] +################################################################################ +vwap.ret_0.vol_adj.c +################################################################################ + 1467591036 3303714233 +end_ts +2000-01-01 10:00:00-05:00 -0.98 -0.98 +2000-01-01 10:05:00-05:00 0.98 0.98 +2000-01-01 10:10:00-05:00 -0.98 -0.98 +""" + +# To run the tests: +> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt new file mode 100644 index 000000000..d0b931699 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt @@ -0,0 +1,7 @@ + +```python + +def check_empty_lines(): + print("Check empty lines are present!") + +``` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt new file mode 100644 index 000000000..34d8d7aa1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt @@ -0,0 +1,16 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + ```python + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + ``` +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt new file mode 100644 index 000000000..de229ba17 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt @@ -0,0 +1,16 @@ +- Functions can be declared in the body of another function +- E.g., to hide utility functions in the scope of the function that uses them + + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + +- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt new file mode 100644 index 000000000..fb18a0a9c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt @@ -0,0 +1,9 @@ +```python +def no_start_python(): + print("No mention of python at the start")``` +``` + +``` + A markdown paragraph contains + delimiters that needs to be removed. +``` \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt new file mode 100644 index 000000000..6c1304cfb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt @@ -0,0 +1,7 @@ +Text before +:::: +::::{.column width=40%} +Middle text +:::columns +::::{.column width=60%} +Text after \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt new file mode 100644 index 000000000..0ac895652 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt @@ -0,0 +1,2 @@ +:::: +::: \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt new file mode 100644 index 000000000..9f8585df5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt @@ -0,0 +1,3 @@ +Consecutive headers increase by more than one level: + HeaderInfo(1, 'Chapter 1', 1) + HeaderInfo(3, 'Subsection 1.1.1', 6) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt new file mode 100644 index 000000000..ab5bbf048 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt @@ -0,0 +1,71 @@ +################################################################################ +level=1, description='Chapter 1' +################################################################################ +- **Chapter 1** + - Section 1.1 + - Section 1.2 +- Chapter 2 +################################################################################ +level=2, description='Section 1.1' +################################################################################ +- Chapter 1 + - **Section 1.1** + - Subsection 1.1.1 + - Subsection 1.1.2 + - Section 1.2 +- Chapter 2 +################################################################################ +level=3, description='Subsection 1.1.1' +################################################################################ +- Chapter 1 + - Section 1.1 + - **Subsection 1.1.1** + - Subsection 1.1.2 + - Section 1.2 +- Chapter 2 +################################################################################ +level=3, description='Subsection 1.1.2' +################################################################################ +- Chapter 1 + - Section 1.1 + - Subsection 1.1.1 + - **Subsection 1.1.2** + - Section 1.2 +- Chapter 2 +################################################################################ +level=2, description='Section 1.2' +################################################################################ +- Chapter 1 + - Section 1.1 + - **Section 1.2** +- Chapter 2 +################################################################################ +level=1, description='Chapter 2' +################################################################################ +- Chapter 1 +- **Chapter 2** + - Section 2.1 + - Section 2.2 +################################################################################ +level=2, description='Section 2.1' +################################################################################ +- Chapter 1 +- Chapter 2 + - **Section 2.1** + - Subsection 2.1.1 + - Section 2.2 +################################################################################ +level=3, description='Subsection 2.1.1' +################################################################################ +- Chapter 1 +- Chapter 2 + - Section 2.1 + - **Subsection 2.1.1** + - Section 2.2 +################################################################################ +level=2, description='Section 2.2' +################################################################################ +- Chapter 1 +- Chapter 2 + - Section 2.1 + - **Section 2.2** \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt new file mode 100644 index 000000000..df89fcd63 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt @@ -0,0 +1,40 @@ +################################################################################ +level=1, description='Models' +################################################################################ +- **Models** + - Naive Bayes + - Decision trees + - Random forests + - Linear models +################################################################################ +level=2, description='Naive Bayes' +################################################################################ +- Models + - **Naive Bayes** + - Decision trees + - Random forests + - Linear models +################################################################################ +level=2, description='Decision trees' +################################################################################ +- Models + - Naive Bayes + - **Decision trees** + - Random forests + - Linear models +################################################################################ +level=2, description='Random forests' +################################################################################ +- Models + - Naive Bayes + - Decision trees + - **Random forests** + - Linear models +################################################################################ +level=2, description='Linear models' +################################################################################ +- Models + - Naive Bayes + - Decision trees + - Random forests + - **Linear models** \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt new file mode 100644 index 000000000..1c6176761 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt @@ -0,0 +1,40 @@ +OrderedDict([('build-system', + OrderedDict([('build-backend', 'poetry.masonry.api'), + ('requires', ['poetry>=0.12'])])), + ('tool', + OrderedDict([('poetry', + OrderedDict([('authors', ['']), + ('dependencies', + OrderedDict([('awscli', '*'), + ('boto3', '*'), + ('bs4', '*'), + ('flaky', '*'), + ('fsspec', '*'), + ('gluonts', '*'), + ('invoke', '*'), + ('jsonpickle', '*'), + ('jupyter', '*'), + ('lxml', '*'), + ('matplotlib', '*'), + ('mxnet', '*'), + ('networkx', '*'), + ('pandas', '^1.1.0'), + ('psycopg2', '*'), + ('pyarrow', '*'), + ('pytest', '^6.0.0'), + ('pytest-cov', '*'), + ('pytest-instafail', + '*'), + ('pytest-xdist', '*'), + ('python', '^3.7'), + ('pywavelets', '*'), + ('requests', '*'), + ('s3fs', '*'), + ('seaborn', '*'), + ('sklearn', '*'), + ('statsmodels', '*'), + ('tqdm', '*')])), + ('description', ''), + ('dev-dependencies', OrderedDict()), + ('name', 'lm'), + ('version', '0.1.0')]))]))]) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt new file mode 100644 index 000000000..66475c930 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt @@ -0,0 +1,4 @@ +time data "28-07-2023 15:05:13" doesn't match format "%Y%m%d_%H%M%S", at position 0. You might want to try: + - passing `format` if your strings have a consistent format; + - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format; + - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this. \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt new file mode 100644 index 000000000..41895df11 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt @@ -0,0 +1 @@ +Unknown datetime string format, unable to parse: qwe28abc07-201234, at position 0 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt new file mode 100644 index 000000000..0498168e2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt @@ -0,0 +1,16 @@ + +################################################################################ +################################################################################ +_system() failed +################################################################################ +################################################################################ +# _system: cmd='(ls this_file_doesnt_exist) 2>&1', abort_on_error=True, suppress_error=None, suppress_output=True, blocking=True, wrapper=None, output_file=None, num_error_lines=30, tee=False, dry_run=False, log_level=10 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cmd='(ls this_file_doesnt_exist) 2>&1' +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +- rc='' +- output=' +ls: cannot access 'this_file_doesnt_exist': No such file or directory +' +- Output saved in 'tmp.system_output.txt' +- Command saved in 'tmp.system_cmd.sh' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv new file mode 100644 index 000000000..abc3dac80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv @@ -0,0 +1,2 @@ +A,B,C,D,E +1,2.3456,c,d,78 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py new file mode 100644 index 000000000..7b0473b8a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py @@ -0,0 +1,136 @@ +import filecmp +import os +import pathlib +import shutil +from typing import List, Tuple + +import dev_scripts_helpers.system_tools.create_links as dshstcrli +import helpers.hio as hio +import helpers.hunit_test as hunitest + + +# ############################################################################# +# Test_create_links +# ############################################################################# + + +class Test_create_links(hunitest.TestCase): + """ + Unit tests for the `create_links.py` script. + """ + + def create_file( + self, dir_path: pathlib.Path, file_name: str, content: str + ) -> pathlib.Path: + """ + Create a file with the given content in the specified directory. + + This helper function ensures the directory exists before + creating the file and writing the specified content into it. + + :param dir_path: path to the directory where the file will be + created + :param file_name: name of the file to create + :param content: content to write into the file + :return: full path to the created file + """ + dir_path = pathlib.Path(dir_path) + file_path = dir_path / file_name + hio.to_file(file_name=str(file_path), txt=content) + return file_path + + def test__find_common_files(self) -> None: + """ + Test identifying common files between two directories. + + Create two directories, each containing identical files, + and checks that the `_find_common_files` function identifies these files. + """ + base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) + src_dir: pathlib.Path = base_dir / "test_src_dir" + dst_dir: pathlib.Path = base_dir / "test_dst_dir" + src_dir.mkdir(parents=True, exist_ok=True) + dst_dir.mkdir(parents=True, exist_ok=True) + file1_src: pathlib.Path = self.create_file( + src_dir, "file1.txt", "Hello, World!" + ) + file1_dst: pathlib.Path = shutil.copy(file1_src, dst_dir) + common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( + str(src_dir), str(dst_dir) + ) + self.assertEqual(len(common_files), 1) + self.assertEqual(common_files[0], (str(file1_src), str(file1_dst))) + + def test__replace_with_links_absolute(self) -> None: + """ + Test replacing common files with absolute symbolic links. + + Create identical files in two directories and replace the files + in the destination directory with absolute symbolic links + pointing to the source files. + """ + base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) + src_dir: pathlib.Path = base_dir / "test_src_dir" + dst_dir: pathlib.Path = base_dir / "test_dst_dir" + file1: pathlib.Path = self.create_file( + src_dir, "file1.txt", "Hello, World!" + ) + shutil.copy(file1, dst_dir) + common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( + str(src_dir), str(dst_dir) + ) + dshstcrli._replace_with_links(common_files, use_relative_paths=False) + for _, dst_file in common_files: + self.assertTrue(os.path.islink(dst_file)) + self.assert_equal(os.readlink(dst_file), str(file1)) + + def test__replace_with_links_relative(self) -> None: + """ + Test replacing common files with relative symbolic links. + + Create identical files in two directories and replace the files + in the destination directory with relative symbolic links + pointing to the source files. + """ + base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) + src_dir: pathlib.Path = base_dir / "test_src_dir" + dst_dir: pathlib.Path = base_dir / "test_dst_dir" + file1: pathlib.Path = self.create_file( + src_dir, "file1.txt", "Hello, World!" + ) + shutil.copy(file1, dst_dir) + common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( + src_dir, dst_dir + ) + dshstcrli._replace_with_links(common_files, use_relative_paths=True) + for src_file, dst_file in common_files: + self.assertTrue(os.path.islink(dst_file)) + expected_link: str = os.path.relpath( + src_file, os.path.dirname(dst_file) + ) + self.assert_equal(os.readlink(dst_file), expected_link) + + def test__stage_links(self) -> None: + """ + Test replacing symbolic links with writable file copies. + + Create symbolic links in a directory and then stage them by + replacing each link with a copy of the original file it points + to. + """ + base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) + src_dir: pathlib.Path = base_dir / "test_src_dir" + dst_dir: pathlib.Path = base_dir / "test_dst_dir" + src_dir.mkdir(parents=True, exist_ok=True) + dst_dir.mkdir(parents=True, exist_ok=True) + file1: pathlib.Path = self.create_file( + src_dir, "file1.txt", "Hello, World!" + ) + link1: pathlib.Path = dst_dir / "file1.txt" + os.symlink(file1, link1) + symlinks: List[str] = dshstcrli._find_symlinks(dst_dir) + dshstcrli._stage_links(symlinks) + for link in symlinks: + self.assertFalse(os.path.islink(link)) + self.assertTrue(os.path.isfile(link)) + self.assertTrue(filecmp.cmp(link, file1, shallow=False)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py new file mode 100644 index 000000000..98994cb5a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py @@ -0,0 +1,96 @@ +import asyncio +import logging +from typing import Optional + +import helpers.hasyncio as hasynci +import helpers.hdatetime as hdateti +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_hasyncio1 +# ############################################################################# + + +class Test_hasyncio1(hunitest.TestCase): + """ + Execute a workload using different time semantics: + + - real time + - simulated time + """ + + @staticmethod + async def workload(get_wall_clock_time: hdateti.GetWallClockTime) -> None: + """ + Coroutine simulating a workload waiting for 1s. + """ + + def _print_time() -> None: + true_wall_clock_time = hdateti.get_current_time("ET") + _LOG.debug("wall_clock_time=%s", true_wall_clock_time) + event_loop_time = get_wall_clock_time() + _LOG.debug("event_loop_time=%s", event_loop_time) + + _print_time() + # The execution here is just waiting. + _LOG.debug(" -> execute") + await asyncio.sleep(1.0) + # + _print_time() + + def run_test( + self, + event_loop: Optional[asyncio.AbstractEventLoop], + get_wall_clock_time: hdateti.GetWallClockTime, + ) -> None: + coroutine = self.workload(get_wall_clock_time) + hasynci.run(coroutine, event_loop=event_loop) + + def test_real_time1(self) -> None: + """ + Use real-time semantic. + + In this case: + ``` + wall_clock_time=2021-09-27 20:40:43.775683-04:00 + event_loop_time=2021-09-27 20:40:43.799074-04:00 + -> execute + wall_clock_time=2021-09-27 20:40:44.808990-04:00 + event_loop_time=2021-09-27 20:40:44.812472-04:00 + ``` + + - the wall clock time and the event loop time both advance + """ + # Use the wall clock time with no special event loop. + get_wall_clock_time = lambda: hdateti.get_current_time(tz="ET") + event_loop = None + # Run. + self.run_test(event_loop, get_wall_clock_time) + + def test_simulated_time1(self) -> None: + """ + Use simulated time semantic. + + In this case: + ``` + wall_clock_time=2021-09-27 20:38:47.843501-04:00 + event_loop_time=2021-09-27 20:38:47.841555-04:00 + -> execute + wall_clock_time=2021-09-27 20:38:47.868272-04:00 + event_loop_time=2021-09-27 20:38:48.841555-04:00 + ``` + + - the wall_clock time doesn't advance since the execution is instantaneous + - the event loop time moves forward 1 sec + """ + # Use the solipsistic event loop to simulate the real-time faster. + with hasynci.solipsism_context() as event_loop: + # Use the simulated wall clock time. + get_wall_clock_time = lambda: hdateti.get_current_time( + tz="ET", event_loop=event_loop + ) + # Run. + self.run_test(event_loop, get_wall_clock_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py new file mode 100644 index 000000000..5469e009e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py @@ -0,0 +1,276 @@ +import os +import unittest.mock as umock +from typing import Optional + +import boto3 +import pytest +from botocore.client import BaseClient +from moto import mock_aws + +import helpers.haws as haws +import helpers.hunit_test as hunitest + + +# ############################################################################# +# Haws_test_case +# ############################################################################# + + +class Haws_test_case(hunitest.TestCase): + @pytest.fixture(autouse=True, scope="class") + def aws_credentials(self) -> None: + """ + Mocked AWS credentials for moto. + """ + os.environ["MOCK_AWS_ACCESS_KEY_ID"] = "testing" + os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["MOCK_AWS_SECURITY_TOKEN"] = "testing" + os.environ["MOCK_AWS_SESSION_TOKEN"] = "testing" + os.environ["MOCK_AWS_DEFAULT_REGION"] = "us-east-1" + + +# ############################################################################# +# Test_get_session +# ############################################################################# + + +class Test_get_session(Haws_test_case): + @pytest.fixture(autouse=True) + def set_up_test(self) -> None: + os.environ["MOCK_AWS_S3_BUCKET"] = "mock_aws_bucket" + + @mock_aws + @umock.patch("boto3.Session") + def test_get_session1(self, mock_boto3_session: umock.Mock) -> None: + """ + Test that `haws.get_session` correctly return a session without region + parameter. + """ + aws_profile = "__mock__" + # Create a mock session. + mock_session = umock.MagicMock() + mock_boto3_session.return_value = mock_session + # Test that get_session returns a session object. + session = haws.get_session(aws_profile) + self.assertEqual(session, mock_session) + # Verify that `boto3.Session` was called with the correct profile. + mock_boto3_session.assert_called_once_with(profile_name=aws_profile) + + @mock_aws + @umock.patch("boto3.Session") + def test_get_session2(self, mock_boto3_session: umock.Mock) -> None: + """ + Test that `haws.get_session` correctly return a session with region + parameter. + """ + aws_profile = "__mock__" + region = "us-east-1" + # Create a mock session + mock_session = umock.MagicMock() + mock_boto3_session.return_value = mock_session + # Test that `get_session` returns a session object with the specified region. + session = haws.get_session(aws_profile, region=region) + self.assertEqual(session, mock_session) + # Verify that `boto3.Session` was called with the correct profile and region. + mock_boto3_session.assert_called_once_with( + profile_name=aws_profile, region_name=region + ) + + +# ############################################################################# +# Test_get_service_client +# ############################################################################# + + +class Test_get_service_client(Haws_test_case): + @mock_aws + @umock.patch("helpers.haws.get_session") + def test1(self, mock_get_session: umock.Mock) -> None: + """ + Test `haws.get_service_client()` returns a client for S3. + """ + aws_profile = "__mock__" + service_name = "s3" + region = "us-east-1" + # Create a mock session with the expected credentials. + mock_session = boto3.session.Session( + aws_access_key_id="testing", + aws_secret_access_key="testing", + region_name=region, + ) + mock_get_session.return_value = mock_session + # Create mock client for S3. + client = haws.get_service_client( + aws_profile=aws_profile, service_name=service_name, region=region + ) + # Check that the returned client is for the S3 service. + self.assert_equal(client.meta.service_model.service_name, "s3") + # Check for region. + self.assert_equal(client.meta.region_name, region) + + +# ############################################################################# +# Test_get_service_resource +# ############################################################################# + + +class Test_get_service_resource(Haws_test_case): + @mock_aws + @umock.patch("helpers.haws.get_session") + def test1(self, mock_get_session: umock.Mock) -> None: + """ + Test that `haws.get_service_resource()` correctly retrieves a S3 + resource. + """ + aws_profile = "__mock__" + service_name = "s3" + # Create a mock session with the expected credentials. + mock_session = boto3.session.Session( + aws_access_key_id="testing", + aws_secret_access_key="testing", + region_name="us-east-1", + ) + mock_get_session.return_value = mock_session + # Create mock S3 bucket. + s3 = boto3.resource("s3") + s3.create_bucket(Bucket="my-test-bucket") + s3_resource = haws.get_service_resource( + aws_profile=aws_profile, service_name=service_name + ) + # Get all `S3` buckets. + buckets = list(s3_resource.buckets.all()) + bucket_names = [bucket.name for bucket in buckets] + # Check. + self.assertIn("my-test-bucket", bucket_names) + + +# ############################################################################# +# Test_get_task_definition_image_url +# ############################################################################# + + +class Test_get_task_definition_image_url(Haws_test_case): + @mock_aws + @umock.patch("helpers.haws.get_service_client") + def test1(self, mock_get_service_client: umock.Mock) -> None: + """ + Test that `get_task_definition_image_url` retrieves correct image URL. + """ + # Mock data. + task_definition_name = "my-task-definition" + mock_image_url = "old_image_url" + region = "us-east-1" + # Mock the return value of `get_service_client`. + mock_client = boto3.client("ecs", region_name=region) + mock_get_service_client.return_value = mock_client + # Create a mock task definition. + mock_client.register_task_definition( + family=task_definition_name, + # The following are required parameters. + containerDefinitions=[ + {"name": "my-container", "image": mock_image_url, "memory": 512} + ], + ) + image_url = haws.get_task_definition_image_url( + task_definition_name, environment="test" + ) + self.assertEqual(image_url, mock_image_url) + + +# ############################################################################# +# Test_update_task_definition +# ############################################################################# + + +class Test_update_task_definition(Haws_test_case): + @mock_aws + @umock.patch("helpers.haws.get_ecs_client") + def test1(self, mock_get_ecs_client: BaseClient) -> None: + """ + Test updating a task definition with a new image URL. + """ + # Mock data. + task_definition_name = "my-task-definition" + old_image_url = "old_image_url" + new_image_url = "new_image_url" + region = "us-east-1" + # Mock the return value of `get_ecs_client`. + mock_client = boto3.client("ecs", region_name=region) + mock_get_ecs_client.return_value = mock_client + # Create a mock task definition. + mock_client.register_task_definition( + family=task_definition_name, + containerDefinitions=[ + {"name": "my-container", "image": old_image_url} + ], + executionRoleArn="__mock__", + networkMode="bridge", + requiresCompatibilities=["EC2"], + cpu="256", + memory="512", + ) + # Update task definition. + haws.update_task_definition( + task_definition_name, + new_image_url, + region=region, + environment="test", + ) + # Check if the task definition is updated. + task_description = mock_client.describe_task_definition( + taskDefinition=task_definition_name + ) + updated_image_url = task_description["taskDefinition"][ + "containerDefinitions" + ][0]["image"] + self.assertEqual(updated_image_url, new_image_url) + + +# ############################################################################# +# Test_get_ecs_client +# ############################################################################# + + +class Test_get_ecs_client(Haws_test_case): + def mock_aws_client( + self, mock_get_session: umock.Mock, *, region: Optional[str] = None + ) -> None: + aws_profile = "__mock__" + test_cluster_name = "test-cluster" + # Create a mock session with the expected credentials. + mock_session = boto3.session.Session( + aws_access_key_id="testing", + aws_secret_access_key="testing", + region_name=region or "us-east-1", + ) + mock_get_session.return_value = mock_session + # Create mock ECS client. + ecs_client = boto3.client("ecs", region_name="us-east-1") + ecs_client.create_cluster(clusterName=test_cluster_name) + # Get ECS client. + if region: + test_client = haws.get_ecs_client(aws_profile, region=region) + else: + test_client = haws.get_ecs_client(aws_profile) + # Get the created cluster. + cluster_name = test_client.list_clusters()["clusterArns"][0] + # Check cluster name. + self.assertIn(test_cluster_name, cluster_name) + + @mock_aws + @umock.patch("helpers.haws.get_session") + def test1(self, mock_get_session: umock.Mock) -> None: + """ + Test that `haws.get_ecs_client()` correctly return a client to work + with ECS within a specified region. + """ + self.mock_aws_client(mock_get_session, region="us-east-1") + + @mock_aws + @umock.patch("helpers.haws.get_session") + def test2(self, mock_get_session: umock.Mock) -> None: + """ + Test that `haws.get_ecs_client()` correctly return a client to work + with ECS without a specified region. + """ + self.mock_aws_client(mock_get_session) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py new file mode 100644 index 000000000..1699e7bcd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py @@ -0,0 +1,1002 @@ +import logging +import tempfile +import time +from typing import Any, Callable, Generator, Tuple + +import numpy as np +import pandas as pd +import pytest + +import helpers.hcache as hcache +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Do not commit this. +# _LOG.debug = _LOG.info + + +# TODO(gp): Use hprint.log_frame +def _LOG_frame(txt: str) -> None: + _LOG.debug("\n%s", hprint.frame(txt)) + + +# ############################################################################# + + +def _get_add_function() -> Callable: + """ + Return a function with the ability to track state, used for testing. + """ + + def func(x: int, y: int) -> int: + func.executed = True # type: ignore[attr-defined] + return x + y + + func.executed = False # type: ignore[attr-defined] + return func + + +def _reset_add_function(func: Callable) -> None: + """ + Reset the function before another execution, so we can verify if it was + executed or not. + + We should do this every time we run the cached version of the + function. + """ + func.executed = False # type: ignore[attr-defined] + hdbg.dassert(not func.executed) # type: ignore[attr-defined] + + +# ############################################################################# + + +# ############################################################################# +# _ResetGlobalCacheHelper +# ############################################################################# + + +class _ResetGlobalCacheHelper(hunitest.TestCase): + """ + Create a global cache for each test method and resets it at every test + method invocation. + """ + + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def _remove_all_caches(self) -> None: + """ + Clean and remove all the caches for this test. + """ + cache_type = "all" + hcache.clear_global_cache(cache_type, tag=self.cache_tag, destroy=True) + + def set_up_test(self) -> None: + # Create a tag like "TestCacheFeatures::test_without_caching1". + self.cache_tag = f"{self.__class__.__name__}::{self._testMethodName}" + # Clean all the caches before this test method is run. + self._remove_all_caches() + + def tear_down_test(self) -> None: + # Clean and remove all the caches after the test method is run. + self._remove_all_caches() + + def _get_f_cf_functions( + self, **cached_kwargs: Any + ) -> Tuple[Callable, hcache._Cached]: + """ + Create the intrinsic function `f` and its cached version `cf`. + """ + # Make sure that we are using the unit test cache. + # disk_cache_name = hcache._get_global_cache_name("disk", self.cache_tag) + # _LOG.debug("disk_cache_name=%s", disk_cache_name) + # _LOG.debug( + # "disk_cache_path=%s", hcache._get_global_cache_path("disk", self.cache_tag) + # ) + # TODO(gp): Add an assertion. + # Create the intrinsic function. + f = _get_add_function() + # Create the cached function. + cf = hcache._Cached(f, tag=self.cache_tag, **cached_kwargs) + # Reset all the caches. + hcache.clear_global_cache("all", self.cache_tag) + cf._reset_cache_tracing() + return f, cf + + def _execute_and_check_state( + self, + f: Callable, + cf: hcache._Cached, + val1: int, + val2: int, + exp_cf_state: str, + ) -> None: + """ + Call the function `f(val1, val2) and its cached function `cf(val1, + val2)` and check whether the intrinsic function was executed and what + caches were used, according to `exp_f_state` and `exp_cf_state`. + """ + # If there was no caching then we must have executed the function. + exp_f_state = exp_cf_state == "no_cache" + _LOG.debug( + "\n%s", + hprint.frame( + f"val1={val1}, val2={val2}, exp_f_state={exp_f_state}, " + f"exp_cf_state={exp_cf_state}", + char1="<", + ), + ) + # Reset the intrinsic function since we want to verify if it was called + # or not when we call the cached function. + _reset_add_function(f) + # Call the cached function. + actual = cf(val1, val2) + expected = val1 + val2 + # Check the result. + self.assertEqual(actual, expected) + # Check which function was executed and what caches were used. + _LOG.debug( + "f.executed=%s vs %s", + f.executed, # type: ignore[attr-defined] + exp_f_state, + ) + _LOG.debug( + "cf.get_last_cache_accessed=%s vs %s", + cf.get_last_cache_accessed(), + exp_cf_state, + ) + self.assertEqual(f.executed, exp_f_state) # type: ignore[attr-defined] + self.assertEqual(cf.get_last_cache_accessed(), exp_cf_state) + + +# ############################################################################# + + +# ############################################################################# +# TestCacheFunctions +# ############################################################################# + + +class TestCacheFunctions(hunitest.TestCase): + def test_get_cache_name1(self) -> None: + """ + Make sure we are using the unit test cache and not the development + cache, by checking the name of the disk cache. + """ + cache_tag = "unittest" + disk_cache_name = hcache._get_global_cache_name("disk", cache_tag) + _LOG.debug("disk_cache_name=%s", disk_cache_name) + self.assertIn(cache_tag, disk_cache_name) + + +# ############################################################################# + + +# ############################################################################# +# TestGlobalCache1 +# ############################################################################# + + +class TestGlobalCache1(_ResetGlobalCacheHelper): + def test_without_caching1(self) -> None: + """ + If we execute two times without caching, we get two executions of the + intrinsic function. + """ + f = _get_add_function() + self.assertFalse(f.executed) # type: ignore[attr-defined] + # Execute. + actual = f(3, 4) + self.assertEqual(actual, 7) + # The function was executed. + self.assertTrue(f.executed) # type: ignore[attr-defined] + # Reset. + _reset_add_function(f) + self.assertFalse(f.executed) # type: ignore[attr-defined] + # Execute again. + actual = f(3, 4) + self.assertEqual(actual, 7) + # Check that the function is executed again, since there is no caching. + self.assertTrue(f.executed) # type: ignore[attr-defined] + + def test_with_caching1(self) -> None: + """ + - Leave the caches enabled + - Show that the memory cache is used + """ + # Both memory and disk cache enabled. + f, cf = self._get_f_cf_functions() + # 1) Execute and verify that it is executed, since it was not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Execute and verify that it is not executed, since it's cached in memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 3) Execute and verify that it is not executed, since it's cached. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + + def test_with_caching2(self) -> None: + """ + - Leave the caches enabled + - Cache different values + """ + # Both memory and disk cache enabled. + f, cf = self._get_f_cf_functions() + # 1) Execute and verify that it is executed, since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Use a different workload. + _LOG.debug("\n%s", hprint.frame("Execute")) + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + # 3) Execute the second time: verify that it is not executed, since cached. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 4) Use a different workload: not executed since cached. + _LOG.debug("\n%s", hprint.frame("Execute")) + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="mem") + + def test_with_caching3(self) -> None: + """ + - Disable both mem and disk cache + - Cache a single value + """ + # Disable both memory and disk cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=False, use_disk_cache=False + ) + # 1) Execute the first time: executed since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + # 2) Execute the second time: executed since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + + def test_with_caching4(self) -> None: + """ + - Disable only the disk cache + - Cache different values + """ + # Use only memory cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=True, use_disk_cache=False + ) + # 1) Execute and verify that it is executed since not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + # 2) Execute the second time: verify that it was cached from memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="mem") + + def test_with_caching5(self) -> None: + """ + - Disable only the memory cache + - Cache different values + """ + # Use only disk cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=False, use_disk_cache=True + ) + # 1) Verify that it is executed since there is no cache. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + # 2) Verify that it is executed, since it's cached in memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="disk") + + # //////////////////////////////////////////////////////////////////////////// + + def test_with_caching_mem_reset(self) -> None: + """ + - Use only the memory cache + - Execute and cache + - Reset the mem cache + - Execute again + - Check that the cached function is recomputed + """ + # Use only memory cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=True, use_disk_cache=False + ) + # 1) Verify that it is executed, since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Verify that it is not executed, since it's cached in memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 3) Reset memory cache. + _LOG.debug("\n%s", hprint.frame("Reset memory cache")) + hcache.clear_global_cache("mem", self.cache_tag) + # 4) Verify that it is executed, since the cache was emptied. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + + def test_with_caching_disk_reset(self) -> None: + """ + Same as `test_with_caching_mem_reset()` but using the disk cache. + """ + # Use only disk cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=False, use_disk_cache=True + ) + # 1) Verify that it is executed, since it's not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Verify that it is not executed, since cached in disk. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 3) Reset disk cache. + _LOG.debug("\n%s", hprint.frame("Reset memory cache")) + hcache.clear_global_cache("disk", self.cache_tag) + # 4) Verify that it is executed, since the cache was emptied. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + + def test_with_caching_mem_reset2(self) -> None: + """ + - Use both caches + - Execute and cache + - Reset the mem cache + - Execute again + - Check that the cached value is found in the disk cache + """ + # Use both memory and disk cache + f, cf = self._get_f_cf_functions(use_mem_cache=True, use_disk_cache=True) + # 1) Verify that it is executed. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Verify that it is not executed, since it's cached in memory. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 3) Reset memory cache. + hcache.clear_global_cache("mem", self.cache_tag) + # 4) Verify that it is not executed, since it's in the disk cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + + # //////////////////////////////////////////////////////////////////////////// + + def test_redefined_function(self) -> None: + """ + If the cached function is redefined, but it's still the same, then the + intrinsic function should not be recomputed. + """ + # Define the function inline imitating working in a notebook. + _LOG.debug("\n%s", hprint.frame("Define function")) + add = _get_add_function() + cached_add = hcache._Cached(add, tag=self.cache_tag) + # 1) Execute the first time. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state( + add, cached_add, 1, 2, exp_cf_state="no_cache" + ) + # 2) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 3) Redefine the function inline. + _LOG.debug("\n%s", hprint.frame("Redefine function")) + add = _get_add_function() + cached_add = hcache._Cached(add, tag=self.cache_tag) + # 4) Execute the third time. Should still use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 5) Execute the fourth time. Should still use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 6) Check that call with other arguments miss the cache. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state( + add, cached_add, 3, 4, exp_cf_state="no_cache" + ) + + def test_changed_function(self) -> None: + """ + If the function is redefined, but the code is not the same, then the + intrinsic function should be recomputed. + """ + # Define the function imitating working in a notebook. + _LOG.debug("\n%s", hprint.frame("Define function")) + + def add(x: int, y: int) -> int: + add.executed = True # type: ignore[attr-defined] + return x + y + + cached_add = hcache._Cached(add, tag=self.cache_tag) + # 1) Execute the first time. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state( + add, cached_add, 1, 2, exp_cf_state="no_cache" + ) + # 2) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 3) Redefine the function with different code. + _LOG.debug("\n%s", hprint.frame("Redefine function")) + + # pylint: disable=function-redefined + def add(x: int, y: int) -> int: # type: ignore[no-redef] + add.executed = True # type: ignore[attr-defined] + z = x + y + return z + + cached_add = hcache._Cached(add, tag=self.cache_tag) + # 4) Execute the third time. Should still use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state( + add, cached_add, 1, 2, exp_cf_state="no_cache" + ) + # 5) Execute the fourth time. Should still use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") + # 6) Check that call with other arguments miss the cache. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state( + add, cached_add, 3, 4, exp_cf_state="no_cache" + ) + + +# ############################################################################# + + +# ############################################################################# +# _ResetFunctionSpecificCacheHelper +# ############################################################################# + + +class _ResetFunctionSpecificCacheHelper(_ResetGlobalCacheHelper): + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test2() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test2(self) -> None: + self.set_up_test() + # Create temp directories to store the cache. + self.disk_cache_dir = tempfile.mkdtemp() + # Clear global cache. + hcache.clear_global_cache("all", tag=self.cache_tag) + + +# ############################################################################# +# TestFunctionSpecificCache1 +# ############################################################################# + + +class TestFunctionSpecificCache1(_ResetFunctionSpecificCacheHelper): + def test_with_caching1(self) -> None: + """ + - Test using the function-specific disk cache + - Disable function-specific cache and switching to global cache + - Test using the global cache + """ + # Use a global cache and + _LOG.debug("\n%s", hprint.frame("Starting")) + _LOG.debug( + "# get_global_cache_info()=\n%s", + hcache.get_global_cache_info(tag=self.cache_tag), + ) + f, cf = self._get_f_cf_functions( + use_mem_cache=False, + use_disk_cache=True, + disk_cache_path=self.disk_cache_dir, + ) + _LOG.debug( + "# cf.get_function_cache_info()=\n%s", cf.get_function_cache_info() + ) + # 1) Execute and verify that it is executed. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Execute and verify that it is not executed, since it's cached on disk. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 3) Clear the global cache. + _LOG.debug("\n%s", hprint.frame("clear_global_cache")) + hcache.clear_global_cache("all") + # 4) Execute and verify that it is not executed, since it's cached on disk. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + + def test_with_caching2(self) -> None: + """ + - Test using the function-specific disk cache + - Disable function-specific cache and switching to global cache + - Test using the global cache + """ + # Use only per-function disk cache. + f, cf = self._get_f_cf_functions( + use_mem_cache=False, disk_cache_path=self.disk_cache_dir + ) + # 1) Execute and verify that it is executed. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Clear the global cache. + _LOG.debug("\n%s", hprint.frame("clear_global_cache")) + hcache.clear_global_cache("all") + # 3) Execute and verify that it is not executed. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 4) Use the global cache. + _LOG.debug( + "\n%s", hprint.frame("Disable function cache and use global cache") + ) + cf.set_function_cache_path(None) + # 5) Execute and verify that function is executed with global cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 6) Execute. Now we get the value from the memory cache since disabling + # the function cache means enabling the memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") + # 7) Restore back specific cache. + _LOG.debug("\n%s", hprint.frame("Restore function cache")) + cf.set_function_cache_path(self.disk_cache_dir) + # Verify that it is *NOT* executed with specific cache. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + + +# ############################################################################# + + +# ############################################################################# +# TestCachePerformance +# ############################################################################# + + +class TestCachePerformance(_ResetGlobalCacheHelper): + @staticmethod + # pylint: disable=unused-argument + def _computation(*args: Any) -> None: + """ + Simulate work. + + :param args: throw away arguments + """ + # Emulate small quantity of work. + time.sleep(0.01) + + @staticmethod + def _timeit(func: Callable, *args: Any) -> float: + """ + Get performance measure of the call to fn with args. + + :param fn: callable function + :param args: any arguments to pass to the function fn + :return: precise time in seconds + """ + perf_start = time.perf_counter() + func(*args) + perf_diff = time.perf_counter() - perf_start + return perf_diff + + def _test_performance(self, val: Any) -> None: + """ + Test performance of the cache over some argument val. + + :param val: any hashable argument + """ + # Create cached versions of the computation function. + _mem_cached_computation = hcache._Cached( + self._computation, + tag=self.cache_tag, + use_mem_cache=True, + use_disk_cache=False, + ) + _disk_cached_computation = hcache._Cached( + self._computation, + tag=self.cache_tag, + use_mem_cache=False, + use_disk_cache=True, + ) + # First step: no cache. + no_cache_ct = self._timeit(lambda: self._computation(val)) + print(f"no cache run time={no_cache_ct}") + # Second step: memory cache. + memory_no_cache_ct = self._timeit(lambda: _mem_cached_computation(val)) + print(f"empty memory cache run time={memory_no_cache_ct}") + print(f"empty memory cache overhead={memory_no_cache_ct - no_cache_ct}") + memory_cache_ct = self._timeit(lambda: _mem_cached_computation(val)) + print(f"hot memory cache run time={memory_cache_ct}") + print(f"hot memory cache benefit={no_cache_ct - memory_cache_ct}") + # Third step: disk cache. + disk_no_cache_ct = self._timeit(lambda: _disk_cached_computation(val)) + print(f"empty disk cache run time={disk_no_cache_ct}") + print(f"empty disk cache overhead={disk_no_cache_ct - no_cache_ct}") + disk_cache_ct = self._timeit(lambda: _disk_cached_computation(val)) + print(f"hot disk cache run time={disk_cache_ct}") + print(f"hot disk cache benefit={no_cache_ct - disk_cache_ct}") + + def test_performance_dataframe(self) -> None: + """ + Test performance of the cache over pandas DataFrame. + """ + # Create a somewhat big DataFrame with random data. + df = pd.DataFrame( + np.random.randint(0, 100, size=(100, 4)), columns=list("ABCD") + ) + print("testing pandas dataframe, with sample size", df.shape) + self._test_performance(df) + + def test_performance_series(self) -> None: + """ + Test performance of the cache over pandas Series. + """ + # Create a somewhat big DataFrame with random data. + s = pd.Series(np.random.randint(0, 100, size=100)) + print("testing pandas series, with sample size", s.shape) + self._test_performance(s) + + +# ############################################################################# + + +# ############################################################################# +# TestCacheDecorator +# ############################################################################# + + +class TestCacheDecorator(_ResetGlobalCacheHelper): + def test_decorated_function(self) -> None: + """ + Test decorator with both caches enabled. + """ + + # Define the function inline imitating working in a notebook. + @hcache.cache(tag=self.cache_tag) + def add(x: int, y: int) -> int: + add.__wrapped__.executed = True + return x + y + + # Execute the first time. + self._execute_and_check_state( + add.__wrapped__, add, 1, 2, exp_cf_state="no_cache" + ) + # Execute the second time. Must use memory cache. + self._execute_and_check_state( + add.__wrapped__, add, 1, 2, exp_cf_state="mem" + ) + + def test_decorated_function_no_mem(self) -> None: + """ + Test decorator with only disk cache. + """ + + # Define the function inline imitating working in a notebook. + @hcache.cache(tag=self.cache_tag, use_mem_cache=False) + def add(x: int, y: int) -> int: + add.__wrapped__.executed = True + return x + y + + # Execute the first time. + self._execute_and_check_state( + add.__wrapped__, add, 1, 2, exp_cf_state="no_cache" + ) + # Execute the second time. Must use disk cache. + self._execute_and_check_state( + add.__wrapped__, add, 1, 2, exp_cf_state="disk" + ) + + +# ############################################################################# + + +# ############################################################################# +# TestAmpTask1407 +# ############################################################################# + + +class TestAmpTask1407(_ResetGlobalCacheHelper): + def test1(self) -> None: + """ + A class method can't be cached. + """ + + class _AmpTask1407Class: + def __init__(self, string: str) -> None: + self._string = string + + @hcache.cache(tag=self.cache_tag) + def print(self, n: int) -> str: + string = "" + for _ in range(n): + string += "hello" + ("o" * len(self._string)) + " " + return string + + obj = _AmpTask1407Class("test") + with self.assertRaises(ValueError): + obj.print(5) + + def test2(self) -> None: + """ + A static method can be cached. + """ + + class _AmpTask1407Class: + def __init__(self, string: str) -> None: + self._string = string + + @staticmethod + @hcache.cache(tag=self.cache_tag) + def static_print(n: int) -> str: + print("--> hello: ", n) + string = "" + for _ in range(n): + string += "hello" + ("o" * len("world")) + " " + return string + + @hcache.cache(tag=self.cache_tag) + def print(self, n: int) -> str: + string = "" + for _ in range(n): + string += "hello" + ("o" * len(self._string)) + " " + return string + + obj = _AmpTask1407Class("test") + obj.static_print(5) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "no_cache") + # + obj.static_print(5) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") + obj.static_print(5) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") + # + obj.static_print(6) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "no_cache") + obj.static_print(6) + self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") + + +# ############################################################################# + + +# ############################################################################# +# TestCachingOnS3 +# ############################################################################# + + +class TestCachingOnS3(_ResetFunctionSpecificCacheHelper): + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test3() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test3(self) -> None: + self.set_up_test2() + # Get a directory to store the cache on S3. + self.disk_cache_dir = self.get_s3_scratch_dir() + self.aws_profile = "am" + # Clear global cache. + hcache.clear_global_cache("all", tag=self.cache_tag) + + @pytest.mark.skip(reason="See CMTask #952.") + def test_with_caching1(self) -> None: + """ + - Test using the function-specific cache + - Disable function-specific cache and switching to global cache + - Test using the global cache + """ + _LOG.debug("\n%s", hprint.frame("Starting")) + _LOG.debug( + "\n%s", + hcache.get_global_cache_info(tag=self.cache_tag, add_banner=True), + ) + f, cf = self._get_f_cf_functions( + use_mem_cache=False, + disk_cache_path=self.disk_cache_dir, + aws_profile=self.aws_profile, + ) + _LOG.debug("\n%s", cf.get_function_cache_info(add_banner=True)) + cf.clear_function_cache(destroy=False) + # 1) Execute and verify that it is executed, since the value is not cached. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 2) Execute and verify that it is not executed, since it's cached on disk. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 3) Clear the global cache. + _LOG.debug("\n%s", hprint.frame("Clear global cache")) + hcache.clear_global_cache("all") + # 4) Verify that it is *NOT* executed, since the S3 cache is used. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + # 5) Clear the function cache. + _LOG.debug("\n%s", hprint.frame("Clear function cache")) + cf.clear_function_cache() + # 6) Clear the function cache. + _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # 7) Verify that it is executed. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") + + +# ############################################################################# + + +# ############################################################################# +# TestCacheEnableReadOnly1 +# ############################################################################# + + +class TestCacheEnableReadOnly1(_ResetGlobalCacheHelper): + def _helper(self, cache_from: str, **kwargs: Any) -> None: + """ + Test that when enabling read-only mode we get an assertion only if the + function invocation was not cached. + """ + # Both memory and disk cache enabled, although we use only memory. + f, cf = self._get_f_cf_functions(**kwargs) + # Execute and verify that it is executed, since it was not cached. + _LOG_frame("Execute the 1st time") + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") + # Execute and verify that it is not executed, since it's cached in memory. + _LOG_frame("Execute the 2nd time") + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) + _LOG_frame("Execute the 3rd time") + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) + # + # Enable the read-only mode. + # + _LOG_frame("Enable read-only mode") + cf.enable_read_only(True) + # This is cached so it doesn't raise. + self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) + # This is not cached so it should raise. + with self.assertRaises(hcache.NotCachedValueException) as cm: + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + actual = str(cm.exception) + self.check_string(actual) + # + # Disable the read-only mode. + # + _LOG_frame("Disable read-only mode") + cf.enable_read_only(False) + # Now this doesn't assert even if it's not in the cache. + self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") + + def test_mem_cache1(self) -> None: + self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=False) + + def test_disk_cache1(self) -> None: + self._helper(cache_from="disk", use_mem_cache=False, use_disk_cache=True) + + def test_mem_disk_cache1(self) -> None: + self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=True) + + +# ############################################################################# + + +# ############################################################################# +# TestCacheUpdateFunction1 +# ############################################################################# + + +class TestCacheUpdateFunction1(_ResetGlobalCacheHelper): + def test1(self) -> None: + # Define the function imitating working in a notebook. + _LOG.debug("\n%s", hprint.frame("Define function")) + + def add(x: int, y: int) -> int: + add.executed = True # type: ignore[attr-defined] + return x + y + + disk_cache_dir = self.get_scratch_space() + _LOG.debug("disk_cache_dir=%s", disk_cache_dir) + cached_add = hcache._Cached( + add, + use_mem_cache=False, + use_disk_cache=True, + disk_cache_path=disk_cache_dir, + ) + # 1) Execute the first time. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state( + add, cached_add, 1, 2, exp_cf_state="no_cache" + ) + # 2) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + func_path = cached_add._get_function_specific_code_path() + code_before = hio.from_file(func_path) + _LOG.debug("code_before=\n%s", code_before) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="disk") + # 3) Redefine the function with different code while running. + _LOG.debug("\n%s", hprint.frame("Update function")) + + # This function is redefined on purpose to test the code. + def add(x: int, y: int) -> int: # type: ignore[no-redef] + add.executed = True # type: ignore[attr-defined] + return x * y + + cached_add._func = add + cached_add._disk_cached_func.func = add + cached_add.update_func_code_without_invalidating_cache() + # + code_after = hio.from_file(func_path) + _LOG.debug("code_after=\n%s", code_after) + self.assertNotEqual(code_before, code_after) + # 4) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="disk") + + +# ############################################################################# + + +# ############################################################################# +# TestCacheEnableCheckOnlyIfPresent1 +# ############################################################################# + + +class TestCacheEnableCheckOnlyIfPresent1(_ResetGlobalCacheHelper): + def _helper(self, cache_from: str, **kwargs: Any) -> None: + # Both memory and disk cache enabled. + f, cf = self._get_f_cf_functions(**kwargs) + # 1) Execute the first time. + _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) + self._execute_and_check_state(f, cf, 1, 2, exp_cf_state="no_cache") + # 2) Execute the second time. Must use memory cache. + _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) + self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) + # 3) Enable the `check_only_if_present` mode. + _LOG.debug("\n%s", hprint.frame("Enable check_only_if_present")) + cf.enable_check_only_if_present(True) + # Since the value was cached, we should get an assertion. + with self.assertRaises(hcache.CachedValueException) as cm: + self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) + actual = str(cm.exception) + self.check_string(actual) + # 4) Try with a new value. + _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) + self._execute_and_check_state(f, cf, 2, 2, exp_cf_state="no_cache") + # 5) Disable the `check_only_if_present` mode. + _LOG.debug("\n%s", hprint.frame("Disable check_only_if_present")) + cf.enable_check_only_if_present(False) + # 6) Execute a value: we should get a cache hit. + _LOG.debug("\n%s", hprint.frame("Execute the 4rd time")) + self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) + # 7) Execute a value: we should get a cache hit. + _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) + self._execute_and_check_state(f, cf, 2, 2, exp_cf_state=cache_from) + + # TODO(gp): Add a test for verbose mode in __call__ + # TODO(gp): get_function_cache_info + def test_mem_cache1(self) -> None: + self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=False) + + def test_disk_cache1(self) -> None: + self._helper(cache_from="disk", use_mem_cache=False, use_disk_cache=True) + + def test_mem_disk_cache1(self) -> None: + self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py new file mode 100644 index 000000000..3555e378f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py @@ -0,0 +1,2606 @@ +import copy +import logging +import os +from typing import Any, Dict + +import pandas as pd +import pytest + +import helpers.hcache_simple as hcacsimp +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_json_double(x: int) -> int: + """ + Return double the input and cache it using JSON. + + :param x: input integer to be doubled + :return: doubled value (x * 2) + """ + res = x * 2 + return res + + +@hcacsimp.simple_cache(cache_type="pickle") +def _cached_pickle_square(x: int) -> int: + """ + Return the square of the input and cache it using pickle. + + :param x: input integer to be squared + :return: squared value (x**2) + """ + res = x**2 + return res + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_multi_arg_sum(a: int, b: int) -> int: + """ + Return the sum of two numbers. + + :param a: first number + :param b: second number + :return: sum of a and b. + """ + res = a + b + return res + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_refreshable_func(x: int) -> int: + """ + Return x multiplied by 10 and update the call count. + + :param x: The input integer + :return: The result of multiplying x by 10 + """ + _cached_refreshable_func.call_count += 1 + res = x * 10 + return res + + +# Initialize the call counter for the refreshable function. +_cached_refreshable_func.call_count = 0 + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_kwarg_diff(a: int, b: int = 0) -> int: + """ + Return the difference between a and b. + + :param a: The minuend + :param b: The subtrahend (defaults to 0) + :return: The difference (a - b) + """ + res = a - b + return res + + +@hcacsimp.simple_cache(cache_type="json") +def _cached_add_100(x: int) -> int: + """ + Return x plus 100. Used primarily for testing cache statistics. + + :param x: The input integer + :return: value (x + 100) + """ + res = x + 100 + return res + + +# ############################################################################# +# _BaseCacheTest +# ############################################################################# + + +class _BaseCacheTest(hunitest.TestCase): + """ + Base test class to provide common setup and teardown functionality. + + Instead of using setUp/tearDown, we use set_up_test/tear_down_test + along with a pytest fixture that ensures these methods run before + and after each test. + """ + + @pytest.fixture(autouse=True) + def setup_teardown_test(self, monkeypatch): + # Store monkeypatch for use in tests. + self.monkeypatch = monkeypatch + # Run common setup before each test. + self.set_up_test() + yield + # Run common teardown after each test. + self.tear_down_test() + + def set_up_test(self) -> None: + """ + Setup operations to run before each test: + + - Isolate all global variables to prevent race conditions. + - Set cache directory to test scratch space. + """ + _LOG.debug("set_up_test") + super().setUp() + # Isolate configuration globals. + scratch_space = self.get_scratch_space() + self.monkeypatch.setattr(hcacsimp, "_CACHE_DIR", scratch_space) + self.monkeypatch.setattr( + hcacsimp, "_CACHE_FILE_PREFIX", hcacsimp._CACHE_FILE_PREFIX + ) + self.monkeypatch.setattr(hcacsimp, "_S3_BUCKET", hcacsimp._S3_BUCKET) + self.monkeypatch.setattr(hcacsimp, "_S3_PREFIX", hcacsimp._S3_PREFIX) + self.monkeypatch.setattr(hcacsimp, "_AWS_PROFILE", hcacsimp._AWS_PROFILE) + # Isolate data structure globals. + self.monkeypatch.setattr(hcacsimp, "_CACHE", {}) + # Use deepcopy for _CACHE_PROPERTY to preserve decorator-set properties. + self.monkeypatch.setattr( + hcacsimp, "_CACHE_PROPERTY", copy.deepcopy(hcacsimp._CACHE_PROPERTY) + ) + self.monkeypatch.setattr(hcacsimp, "_CACHE_PERF", {}) + self.monkeypatch.setattr(hcacsimp, "_S3_AUTO_PULL_ATTEMPTED", set()) + + def tear_down_test(self) -> None: + """ + Teardown operations to run after each test. + + All global variables are isolated via monkeypatch, so they are + automatically restored after each test. + """ + _LOG.debug("tear_down_test") + + + +# ############################################################################# +# Test_get_cache +# ############################################################################# + + +class Test_get_cache(_BaseCacheTest): + """ + Test get_cache functionality for retrieving cached values. + """ + + def test1(self) -> None: + """ + Verify that get_cache returns a cache with the expected key and value. + """ + # Populate the cache by calling _cached_json_double. + _cached_json_double(2) + # Retrieve the in-memory cache for _cached_json_double. + cache: Dict[str, Any] = hcacsimp.get_cache("_cached_json_double") + # Assert that the key '{"args": [2], "kwargs": {}}' is in the cache and + # its value is 4. + self.assertIn('{"args": [2], "kwargs": {}}', cache) + self.assertEqual(cache['{"args": [2], "kwargs": {}}'], 4) + + +# ############################################################################# +# Test_flush_cache_to_disk +# ############################################################################# + + +class Test_flush_cache_to_disk(_BaseCacheTest): + """ + Test flush_cache_to_disk functionality for persisting cache to disk. + """ + + def test1(self) -> None: + """ + Verify that flushing creates a cache file on disk. + """ + # Call _cached_json_double to populate the cache. + _cached_json_double(3) + # Flush the cache to disk. + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Define expected cache file name. + cache_file = hcacsimp._get_cache_file_name("_cached_json_double") + # Assert that the cache file now exists on disk. + self.assertTrue( + os.path.exists(cache_file), + f"Cache file {cache_file} should exist on disk.", + ) + + def test2(self) -> None: + """ + Verify that the disk cache file contains the expected key and value. + """ + # Populate cache and flush to disk. + _cached_json_double(3) + # Flush the cache to disk. + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Define the expected cache file name. + cache_file = hcacsimp._get_cache_file_name("_cached_json_double") + # # Open and load the disk cache file. + disk_cache = hcacsimp._load_func_cache_data_from_file(cache_file, "json") + # Assert that the disk cache contains the key '{"args": [3], "kwargs": + # {}}' with the correct value. + self.assertIn('{"args": [3], "kwargs": {}}', disk_cache) + # Assert that the value for key '{"args": [3], "kwargs": {}}' is 6. + self.assertEqual(disk_cache['{"args": [3], "kwargs": {}}'], 6) + + +# ############################################################################# +# Test_reset_mem_cache +# ############################################################################# + + +class Test_reset_mem_cache(_BaseCacheTest): + """ + Test reset_mem_cache functionality for clearing in-memory cache. + """ + + def test1(self) -> None: + """ + Verify that the cache is empty after `reset_mem_cache` is called. + """ + # Populate the in-memory cache. + _cached_json_double(5) + # Reset the in-memory cache. + hcacsimp.reset_mem_cache("_cached_json_double") + # Retrieve the memory cache after reset. + cache_after: Dict[str, Any] = hcacsimp.get_mem_cache( + "_cached_json_double" + ) + # Verify that the key '{"args": [5], "kwargs": {}}' is no longer in the cache. + self.assertNotIn('{"args": [5], "kwargs": {}}', cache_after) + + +# ############################################################################# +# Test_force_cache_from_disk +# ############################################################################# + + +class Test_force_cache_from_disk(_BaseCacheTest): + """ + Test force_cache_from_disk functionality for loading cache from disk. + """ + + def test1(self) -> None: + """ + Verify that the memory cache is empty after a reset. + """ + # Populate cache and flush to disk. + _cached_json_double(7) + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Reset in-memory cache. + hcacsimp.reset_mem_cache("_cached_json_double") + mem_cache: Dict[str, Any] = hcacsimp.get_mem_cache("_cached_json_double") + # Ensure that the in-memory cache is empty. + self.assertNotIn( + '{"args": [7], "kwargs": {}}', + mem_cache, + "Memory cache should be empty after reset.", + ) + + def test2(self) -> None: + """ + Populate disk cache, reset memory, force reload, and verify that the + key appears. + """ + # Populate cache, flush to disk, and then reset in-memory cache. + _cached_json_double(7) + hcacsimp.flush_cache_to_disk("_cached_json_double") + hcacsimp.reset_mem_cache("_cached_json_double") + _LOG.debug("Force reload disk cache for '_cached_json_double'") + # Force reload cache from disk. + hcacsimp.force_cache_from_disk("_cached_json_double") + full_cache: Dict[str, Any] = hcacsimp.get_cache("_cached_json_double") + # Assert that the key is restored in the in-memory cache. + self.assertIn( + '{"args": [7], "kwargs": {}}', + full_cache, + "After forcing, disk key should appear in memory.", + ) + + +# ############################################################################# +# Test_get_cache_perf +# ############################################################################# + + +class Test_get_cache_perf(_BaseCacheTest): + """ + Test cache performance tracking functionality. + """ + + def test1(self) -> None: + """ + Verify that performance tracking records hits and misses correctly. + """ + # Enable performance tracking. + hcacsimp.enable_cache_perf("_cached_json_double") + _LOG.debug("Call _cached_json_double(8) twice") + # First call should be a miss. + _LOG.debug("# First call should be a miss") + _cached_json_double(8) + # Second call should be a hit. + _LOG.debug("# Second call should be a hit") + _cached_json_double(8) + # Retrieve performance statistics. + stats: str = hcacsimp.get_cache_perf_stats("_cached_json_double") + # Verify that one hit and one miss are recorded. + self.assertIn("hits=1", stats) + self.assertIn("misses=1", stats) + + def test2(self) -> None: + """ + Verify that disabling performance tracking returns None. + """ + # Disable performance tracking. + hcacsimp.disable_cache_perf("_cached_json_double") + # Assert that performance data is no longer available. + self.assertIsNone(hcacsimp.get_cache_perf("_cached_json_double")) + + +# ############################################################################# +# Test_set_cache_property +# ############################################################################# + + +class Test_set_cache_property(_BaseCacheTest): + """ + Test set_cache_property and get_cache_property functionality. + """ + + def test1(self) -> None: + """ + Verify that setting a valid cache property works and can be retrieved. + """ + # Set a valid cache property. + hcacsimp.set_cache_property( + "_cached_json_double", "report_on_cache_miss", True + ) + # Retrieve and verify the property. + val: bool = hcacsimp.get_cache_property( + "_cached_json_double", "report_on_cache_miss" + ) + self.assertTrue(val) + + def test2(self) -> None: + """ + Verify that resetting cache properties clears previously set + properties. + """ + # Set and verify the cache property. + hcacsimp.set_cache_property( + "_cached_json_double", "report_on_cache_miss", True + ) + self.assertTrue( + hcacsimp.get_cache_property( + "_cached_json_double", "report_on_cache_miss" + ) + ) + # Reset all cache properties. + hcacsimp.reset_cache_property() + # Verify that the property is no longer True. + self.assertFalse( + hcacsimp.get_cache_property( + "_cached_json_double", "report_on_cache_miss" + ) + ) + + def test3(self) -> None: + """ + Verify that setting an invalid cache property raises an error. + """ + # Verify that setting an invalid property raises an error. + with self.assertRaises(AssertionError): + hcacsimp.set_cache_property( + "_cached_json_double", "invalid_prop", True + ) + + def test4(self) -> None: + """ + Verify return of a string containing the property value. + """ + # Set force_refresh property and verify that it appears in the properties string. + hcacsimp.set_cache_property("_cached_json_double", "force_refresh", True) + prop_str: str = hcacsimp.cache_property_to_str("_cached_json_double") + # Check output. + self.assertIn("force_refresh: True", prop_str) + + +# ############################################################################# +# Test_get_cached_func_names +# ############################################################################# + + +class Test_get_cached_func_names(_BaseCacheTest): + """ + Test get_cached_func_names functionality for retrieving cached function + names. + """ + + def test1(self) -> None: + """ + Verify that memory cache function names include `_cached_json_double`. + """ + # Populate in-memory cache. + _cached_json_double(9) + # Retrieve function names from the memory cache. + mem_funcs = hcacsimp.get_cached_func_names("mem") + # Check output. + self.assertIn("_cached_json_double", mem_funcs) + + def test2(self) -> None: + """ + Verify that all cache function names include both JSON and pickle + functions. + """ + # Populate and flush caches for JSON and pickle functions. + _cached_json_double(2) + # Flush _cached_json_double cache to disk. + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Call _cached_pickle_square with input 2. + _cached_pickle_square(2) + # Flush _cached_pickle_square cache to disk. + hcacsimp.flush_cache_to_disk("_cached_pickle_square") + # Retrieve all local cached function names (both memory and disk). + all_funcs = hcacsimp.get_cached_func_names("local") + # Check output. + self.assertIn("_cached_json_double", all_funcs) + self.assertIn("_cached_pickle_square", all_funcs) + + def test3(self) -> None: + """ + Verify that disk cache function names include `_cached_json_double` + after flushing. + """ + # Flush JSON cache to disk and verify disk cache function names. + _cached_json_double(2) + # Flush _cached_json_double cache to disk. + hcacsimp.flush_cache_to_disk("_cached_json_double") + # Retrieve function names from the disk cache. + disk_funcs = hcacsimp.get_cached_func_names("disk") + # Check output. + self.assertIn("_cached_json_double", disk_funcs) + + def test4(self) -> None: + """ + Verify that disk-cached function names include functions with custom + cache_dir and cache_prefix. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + custom_cache_dir = os.path.join(scratch_dir, "custom_cache") + + # Create a cached function with custom cache location. + @hcacsimp.simple_cache( + cache_type="json", + cache_dir=custom_cache_dir, + cache_prefix="custom_prefix", + ) + def _custom_location_func(x: int) -> int: + return x * 3 + + # Run. + _custom_location_func(5) + hcacsimp.flush_cache_to_disk("_custom_location_func") + disk_funcs = hcacsimp.get_cached_func_names("disk") + # Check. + self.assertIn("_custom_location_func", disk_funcs) + + +# ############################################################################# +# Test_cache_stats_to_str +# ############################################################################# + + +class Test_cache_stats_to_str(_BaseCacheTest): + """ + Test cache_stats_to_str functionality for generating cache statistics. + """ + + def test1(self) -> None: + """ + Verify that cache_stats_to_str returns a DataFrame with 'memory' and + 'disk' columns. + """ + # Populate cache. + _cached_add_100(1) + stats_df: pd.DataFrame = hcacsimp.cache_stats_to_str("_cached_add_100") + # Assert that the returned object is a DataFrame. + self.assertIsInstance(stats_df, pd.DataFrame) + # Verify that it contains the 'memory' and 'disk' columns. + self.assertIn("memory", stats_df.columns) + self.assertIn("disk", stats_df.columns) + + +# ############################################################################# +# Test__cached_kwarg_diff +# ############################################################################# + + +class Test__cached_kwarg_diff(_BaseCacheTest): + """ + Test caching behavior with keyword arguments. + """ + + def test1(self) -> None: + """ + Test that verifies keyword arguments are handled correctly by the + cache. + """ + # Call with different keyword argument values. + res1: int = _cached_kwarg_diff(5, b=3) + res2: int = _cached_kwarg_diff(5, b=10) + # Both calls should return the different result as both args, kwargs are used for caching. + self.assertNotEqual(res1, res2) + + +# ############################################################################# +# Test__cached_multi_arg_sum +# ############################################################################# + + +class Test__cached_multi_arg_sum(_BaseCacheTest): + """ + Test caching behavior with multiple positional arguments. + """ + + def test1(self) -> None: + """ + Verify that the cache for _cached_multi_arg_sum contains the correct + key. + """ + # Populate the cache. + _cached_multi_arg_sum(1, 2) + cache: Dict[str, Any] = hcacsimp.get_cache("_cached_multi_arg_sum") + _LOG.debug("cache=%s", cache) + # Verify that the cache key is formatted as '{"args": [1, 2], "kwargs": {}}'. + self.assertIn('{"args": [1, 2], "kwargs": {}}', cache) + + +# ############################################################################# +# Test__cached_pickle_square +# ############################################################################# + + +class Test__cached_pickle_square(_BaseCacheTest): + """ + Test caching with pickle serialization. + """ + + def test1(self) -> None: + """ + Ensure that _cached_pickle_square returns the correct value and disk + file. + """ + # Call the function to square the input. + res: int = _cached_pickle_square(4) + # Flush the cache to disk. + hcacsimp.flush_cache_to_disk("_cached_pickle_square") + cache_file = hcacsimp._get_cache_file_name("_cached_pickle_square") + # Open and load the pickle cache file. + func_cache_data = hcacsimp._load_func_cache_data_from_file( + cache_file, "pickle" + ) + _LOG.debug("func_cache_data=%s", func_cache_data) + # Verify the result and cache contents. + self.assertEqual(res, 16) + self.assertIn('{"args": [4], "kwargs": {}}', func_cache_data) + self.assertEqual(func_cache_data['{"args": [4], "kwargs": {}}'], 16) + + +# ############################################################################# +# Test__cached_refreshable_func +# ############################################################################# + + +class Test__cached_refreshable_func(_BaseCacheTest): + """ + Test force_refresh cache property functionality. + """ + + def test1(self) -> None: + """ + Verify that `_cached_refreshable_func` is called only once initially. + """ + # Reset call counter. + _cached_refreshable_func.call_count = 0 + # Call the function twice with the same input. + _cached_refreshable_func(3) + _cached_refreshable_func(3) + # Verify that the function was only called once (cache hit on the second + # call). + self.assertEqual( + _cached_refreshable_func.call_count, + 1, + "Function should be called only once initially.", + ) + + def test2(self) -> None: + """ + Verify that enabling `force_refresh` causes `_cached_refreshable_func` + to be re-called. + """ + # Call the function normally. + res: int = _cached_refreshable_func(3) + # Enable force_refresh so that the function will be re-called. + hcacsimp.set_cache_property( + "_cached_refreshable_func", "force_refresh", True + ) + # Verify that the function returns the correct value (3 * 10 = 30). + self.assertEqual(res, 30) + # Verify that the function's call count has incremented, indicating it + # was re-called. + self.assertEqual( + _cached_refreshable_func.call_count, + 2, + "Function should be re-called when force_refresh is enabled.", + ) + + +# ############################################################################# +# Test_reset_cache_perf +# ############################################################################# + + +class Test_reset_cache_perf(_BaseCacheTest): + """ + Test reset_cache_perf functionality for resetting performance statistics. + """ + + def test1(self) -> None: + """ + Verify that reset_cache_perf resets stats for a single function. + """ + # Prepare inputs. + hcacsimp.enable_cache_perf("_cached_json_double") + _cached_json_double(5) + _cached_json_double(5) + # Run test. + hcacsimp.reset_cache_perf("_cached_json_double") + # Check outputs. + perf = hcacsimp.get_cache_perf("_cached_json_double") + self.assertEqual(perf["tot"], 0) + self.assertEqual(perf["hits"], 0) + self.assertEqual(perf["misses"], 0) + + def test2(self) -> None: + """ + Verify that reset_cache_perf with empty func_name resets all functions. + """ + # Prepare inputs. + hcacsimp.enable_cache_perf("_cached_json_double") + hcacsimp.enable_cache_perf("_cached_multi_arg_sum") + _cached_json_double(1) + _cached_multi_arg_sum(1, 2) + # Run test. + hcacsimp.reset_cache_perf("") + # Check outputs. + perf1 = hcacsimp.get_cache_perf("_cached_json_double") + perf2 = hcacsimp.get_cache_perf("_cached_multi_arg_sum") + self.assertEqual(perf1["tot"], 0) + self.assertEqual(perf2["tot"], 0) + + +# ############################################################################# +# Test_disable_cache_perf +# ############################################################################# + + +class Test_disable_cache_perf(_BaseCacheTest): + """ + Test disable_cache_perf functionality for disabling performance tracking. + """ + + def test1(self) -> None: + """ + Verify that disable_cache_perf with empty func_name disables all + functions. + """ + # Prepare inputs. + hcacsimp.enable_cache_perf("_cached_json_double") + hcacsimp.enable_cache_perf("_cached_multi_arg_sum") + _cached_json_double(1) + _cached_multi_arg_sum(1, 2) + # Run test. + hcacsimp.disable_cache_perf("") + # Check outputs. + perf1 = hcacsimp.get_cache_perf("_cached_json_double") + perf2 = hcacsimp.get_cache_perf("_cached_multi_arg_sum") + # After disabling, perf should be None. + self.assertIsNone(perf1) + self.assertIsNone(perf2) + + +# ############################################################################# +# Test_get_cache_perf_stats +# ############################################################################# + + +class Test_get_cache_perf_stats(_BaseCacheTest): + """ + Test get_cache_perf_stats for retrieving performance statistics. + """ + + def test1(self) -> None: + """ + Verify that get_cache_perf_stats returns empty string when no stats + exist. + """ + # Prepare inputs. + # Ensure no perf stats exist for a non-tracked function. + hcacsimp.disable_cache_perf("_cached_json_double") + # Run test. + stats = hcacsimp.get_cache_perf_stats("_cached_json_double") + # Check outputs. + self.assertEqual(stats, "") + + +# ############################################################################# +# Test_cache_property_to_str +# ############################################################################# + + +class Test_cache_property_to_str(_BaseCacheTest): + """ + Test cache_property_to_str for converting properties to string. + """ + + def test1(self) -> None: + """ + Verify that cache_property_to_str with empty func_name returns all + functions. + """ + # Prepare inputs. + # Call functions to ensure they are cached. + _cached_json_double(1) + _cached_multi_arg_sum(1, 2) + hcacsimp.set_cache_property("_cached_json_double", "force_refresh", True) + hcacsimp.set_cache_property( + "_cached_multi_arg_sum", "write_through", True + ) + # Run test. + result = hcacsimp.cache_property_to_str("") + # Check outputs. + self.assertIn("_cached_json_double", result) + self.assertIn("_cached_multi_arg_sum", result) + self.assertIn("force_refresh: True", result) + self.assertIn("write_through: True", result) + + + +# ############################################################################# +# Test_reset_mem_cache_all +# ############################################################################# + + +class Test_reset_mem_cache_all(_BaseCacheTest): + """ + Test reset_mem_cache with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that reset_mem_cache with empty func_name resets all caches. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + # Run test. + hcacsimp.reset_mem_cache("") + # Check outputs. + cache1 = hcacsimp.get_mem_cache("_cached_json_double") + cache2 = hcacsimp.get_mem_cache("_cached_multi_arg_sum") + self.assertEqual(len(cache1), 0) + self.assertEqual(len(cache2), 0) + + +# ############################################################################# +# Test_reset_disk_cache_all +# ############################################################################# + + +class Test_reset_disk_cache_all(_BaseCacheTest): + """ + Test reset_disk_cache with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that reset_disk_cache with empty func_name removes all cache + files. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + hcacsimp.flush_cache_to_disk("_cached_json_double") + hcacsimp.flush_cache_to_disk("_cached_multi_arg_sum") + # Run test. + hcacsimp.reset_disk_cache("", interactive=False) + # Check outputs. + cache_file1 = hcacsimp._get_cache_file_name("_cached_json_double") + self.assertFalse(os.path.exists(cache_file1)) + cache_file2 = hcacsimp._get_cache_file_name("_cached_multi_arg_sum") + self.assertFalse(os.path.exists(cache_file2)) + + +# ############################################################################# +# Test_force_cache_from_disk_all +# ############################################################################# + + +class Test_force_cache_from_disk_all(_BaseCacheTest): + """ + Test force_cache_from_disk with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that force_cache_from_disk with empty func_name loads all + caches. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + hcacsimp.flush_cache_to_disk("_cached_json_double") + hcacsimp.flush_cache_to_disk("_cached_multi_arg_sum") + hcacsimp.reset_mem_cache("") + # Run test. + hcacsimp.force_cache_from_disk("") + # Check outputs. + cache1 = hcacsimp.get_mem_cache("_cached_json_double") + cache2 = hcacsimp.get_mem_cache("_cached_multi_arg_sum") + self.assertGreater(len(cache1), 0) + self.assertGreater(len(cache2), 0) + + +# ############################################################################# +# Test_flush_cache_to_disk_all +# ############################################################################# + + +class Test_flush_cache_to_disk_all(_BaseCacheTest): + """ + Test flush_cache_to_disk with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that flush_cache_to_disk with empty func_name flushes all + caches. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + # Run test. + hcacsimp.flush_cache_to_disk("") + # Check outputs. + cache_file1 = hcacsimp._get_cache_file_name("_cached_json_double") + self.assertTrue(os.path.exists(cache_file1)) + # + cache_file2 = hcacsimp._get_cache_file_name("_cached_multi_arg_sum") + self.assertTrue(os.path.exists(cache_file2)) + + +# ############################################################################# +# Test_cache_stats_to_str_all +# ############################################################################# + + +class Test_cache_stats_to_str_all(_BaseCacheTest): + """ + Test cache_stats_to_str with empty func_name parameter. + """ + + def test1(self) -> None: + """ + Verify that cache_stats_to_str with empty func_name returns stats for + all functions. + """ + # Prepare inputs. + _cached_json_double(1) + _cached_multi_arg_sum(2, 3) + # Run test. + result = hcacsimp.cache_stats_to_str("") + # Check outputs. + self.assertIsNotNone(result) + self.assertIn("_cached_json_double", result.index) + self.assertIn("_cached_multi_arg_sum", result.index) + + +# ############################################################################# +# Test_get_cached_func_names_invalid +# ############################################################################# + + +class Test_get_cached_func_names_invalid(_BaseCacheTest): + """ + Test get_cached_func_names with invalid type parameter. + """ + + def test1(self) -> None: + """ + Verify that get_cached_func_names raises ValueError for invalid type. + """ + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hcacsimp.get_cached_func_names("invalid_type") + self.assertIn("Invalid type", str(cm.exception)) + + +# ############################################################################# +# Test__get_cache_file_name +# ############################################################################# + + +class Test__get_cache_file_name(_BaseCacheTest): + """ + Test _get_cache_file_name for various configurations. + """ + + def test1(self) -> None: + """ + Verify that _get_cache_file_name raises ValueError for invalid cache + type. + """ + # Prepare inputs. + hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hcacsimp._get_cache_file_name("_cached_json_double") + self.assertIn("Invalid cache type", str(cm.exception)) + + def test2(self) -> None: + """ + Test global cache_dir + global cache_prefix (default fallback). + + Verifies that when no per-function properties are set, the + function falls back to global cache_dir and cache_prefix. + """ + # Prepare inputs. + func_name = "_cached_json_double" + # Run. + actual = hcacsimp._get_cache_file_name(func_name) + # Check. + global_cache_dir = hcacsimp.get_cache_dir() + global_cache_prefix = hcacsimp.get_cache_file_prefix() + expected = os.path.join( + global_cache_dir, f"{global_cache_prefix}.{func_name}.json" + ) + self.assertEqual(actual, expected) + + def test3(self) -> None: + """ + Test per-function cache_dir + global cache_prefix. + + Verifies that per-function cache_dir is used while falling back + to global cache_prefix. + """ + # Prepare inputs. + custom_dir = "/tmp/custom_test_dir" + func_name = "_cached_json_double" + hcacsimp.set_cache_property(func_name, "cache_dir", custom_dir) + # Run. + actual = hcacsimp._get_cache_file_name(func_name) + # Check. + global_cache_prefix = hcacsimp.get_cache_file_prefix() + expected = os.path.join( + custom_dir, f"{global_cache_prefix}.{func_name}.json" + ) + self.assertEqual(actual, expected) + + def test4(self) -> None: + """ + Test global cache_dir + per-function cache_prefix. + + Verifies that per-function cache_prefix is used while falling + back to global cache_dir. + """ + # Prepare inputs. + custom_prefix = "custom_prefix" + func_name = "_cached_json_double" + hcacsimp.set_cache_property(func_name, "cache_prefix", custom_prefix) + # Run. + actual = hcacsimp._get_cache_file_name(func_name) + # Check. + global_cache_dir = hcacsimp.get_cache_dir() + expected = os.path.join( + global_cache_dir, f"{custom_prefix}.{func_name}.json" + ) + self.assertEqual(actual, expected) + + def test5(self) -> None: + """ + Test per-function cache_dir + per-function cache_prefix. + + Verifies that both per-function cache_dir and cache_prefix are + used when both are set (no fallback to global values). + """ + # Prepare inputs. + custom_dir = "/tmp/custom_test_dir_both" + custom_prefix = "custom_prefix_both" + func_name = "_cached_json_double" + hcacsimp.set_cache_property(func_name, "cache_dir", custom_dir) + hcacsimp.set_cache_property(func_name, "cache_prefix", custom_prefix) + # Run. + actual = hcacsimp._get_cache_file_name(func_name) + # Check. + expected = os.path.join(custom_dir, f"{custom_prefix}.{func_name}.json") + self.assertEqual(actual, expected) + + def test6(self) -> None: + """ + Test file path format for pickle cache type. + + Verifies that _get_cache_file_name returns correct file + extension for pickle (.pkl) cache type. + """ + # Prepare inputs. + func_name = "_cached_pickle_square" + # Run. + actual = hcacsimp._get_cache_file_name(func_name) + # Check. + self.assertTrue(actual.endswith(".pkl")) + self.assertIn(func_name, actual) + + def test7(self) -> None: + """ + Test file path format for json cache type. + + Verifies that _get_cache_file_name returns correct file + extensions for json (.json) cache type. + """ + # Prepare inputs. + func_name = "_cached_json_double" + # Run. + actual = hcacsimp._get_cache_file_name(func_name) + # Check. + self.assertTrue(actual.endswith(".json")) + self.assertIn(func_name, actual) + + + +# ############################################################################# +# Test__save_cache_dict_to_disk +# ############################################################################# + + +class Test__save_cache_dict_to_disk(_BaseCacheTest): + """ + Test _save_cache_dict_to_disk for invalid cache type. + """ + + def test1(self) -> None: + """ + Verify that _save_cache_dict_to_disk raises ValueError for invalid + cache type. + """ + # Prepare inputs. + hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") + data = {"key": "value"} + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hcacsimp._save_cache_dict_to_disk("_cached_json_double", data) + self.assertIn("Invalid cache type", str(cm.exception)) + + + +# ############################################################################# +# Test_get_disk_cache_invalid +# ############################################################################# + + +class Test_get_disk_cache_invalid(_BaseCacheTest): + """ + Test get_disk_cache for invalid cache type. + """ + + def test1(self) -> None: + """ + Verify that get_disk_cache raises ValueError for invalid cache type. + """ + # Prepare inputs. + hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hcacsimp.get_disk_cache("_cached_json_double") + self.assertIn("Invalid cache type", str(cm.exception)) + +@hcacsimp.simple_cache(cache_type="json") +def _cache_mode_function(x: int) -> int: + """ + Test function to verify cache_mode parameter. + + :param x: input integer + :return: x * 5 + """ + _cache_mode_function.call_count += 1 + res = x * 5 + return res + + +_cache_mode_function.call_count = 0 + + +# ############################################################################# +# Test_cache_mode +# ############################################################################# + + +class Test_cache_mode(_BaseCacheTest): + """ + Test cache_mode parameter functionality. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_cache_mode_function", "type", "json") + _cache_mode_function.call_count = 0 + + def test1(self) -> None: + """ + Verify that setting force_refresh property forces cache refresh. + """ + # Prepare inputs. + _cache_mode_function(10) + initial_count = _cache_mode_function.call_count + # Set force_refresh property. + hcacsimp.set_cache_property("_cache_mode_function", "force_refresh", True) + # Run test. + result = _cache_mode_function(10) + # Check outputs. + self.assertEqual(result, 50) + self.assertEqual(_cache_mode_function.call_count, initial_count + 1) + + def test2(self) -> None: + """ + Verify that setting abort_on_cache_miss property aborts on cache miss. + """ + # Prepare inputs. + hcacsimp.set_cache_property( + "_cache_mode_function", "abort_on_cache_miss", True + ) + # Run test and check output. + with self.assertRaises(ValueError) as cm: + _cache_mode_function(99) + self.assertIn("Cache miss", str(cm.exception)) + + def test3(self) -> None: + """ + Verify that calling with different arguments bypasses cache. + """ + # Prepare inputs. + _cache_mode_function(15) + initial_count = _cache_mode_function.call_count + # Run test. + result1 = _cache_mode_function(16) + result2 = _cache_mode_function(17) + # Check outputs. + self.assertEqual(result1, 80) + self.assertEqual(result2, 85) + self.assertEqual(_cache_mode_function.call_count, initial_count + 2) + + +@hcacsimp.simple_cache(cache_type="json") +def _abort_test_function(x: int) -> int: + """ + Test function to verify abort_on_cache_miss parameter. + + :param x: input integer + :return: x * 7 + """ + res = x * 7 + return res + + +# ############################################################################# +# Test_abort_on_cache_miss +# ############################################################################# + + +class Test_abort_on_cache_miss(_BaseCacheTest): + """ + Test abort_on_cache_miss functionality. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_abort_test_function", "type", "json") + + def test1(self) -> None: + """ + Verify that abort_on_cache_miss=True raises error on cache miss. + """ + # Run test and check output. + with self.assertRaises(ValueError) as cm: + _abort_test_function(100, abort_on_cache_miss=True) + self.assertIn("Cache miss", str(cm.exception)) + + +@hcacsimp.simple_cache(cache_type="json") +def _report_test_function(x: int) -> int: + """ + Test function to verify report_on_cache_miss parameter. + + :param x: input integer + :return: x * 8 + """ + res = x * 8 + return res + + +# ############################################################################# +# Test_report_on_cache_miss +# ############################################################################# + + +class Test_report_on_cache_miss(_BaseCacheTest): + """ + Test report_on_cache_miss functionality. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_report_test_function", "type", "json") + + def test1(self) -> None: + """ + Verify that report_on_cache_miss=True returns '_cache_miss_' on miss. + """ + # Run test. + result = _report_test_function(200, report_on_cache_miss=True) + # Check outputs. + self.assertEqual(result, "_cache_miss_") + + +@hcacsimp.simple_cache(cache_type="json", write_through=True) +def _write_through_function(x: int) -> int: + """ + Test function to verify write_through parameter. + + :param x: input integer + :return: x * 9 + """ + res = x * 9 + return res + + +# ############################################################################# +# Test_write_through +# ############################################################################# + + +class Test_write_through(_BaseCacheTest): + """ + Test write_through functionality for automatic disk caching. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_write_through_function", "type", "json") + + def test1(self) -> None: + """ + Verify that write_through=True automatically writes to disk. + """ + # Run test. + _write_through_function(11) + # Check outputs. + cache_file = hcacsimp._get_cache_file_name("_write_through_function") + self.assertTrue(os.path.exists(cache_file)) + # + disk_cache = hcacsimp._load_func_cache_data_from_file(cache_file, "json") + self.assertIn('{"args": [11], "kwargs": {}}', disk_cache) + self.assertEqual(disk_cache['{"args": [11], "kwargs": {}}'], 99) + + +@hcacsimp.simple_cache(cache_type="json") +def _test_cache_mode_kwarg(x: int, **kwargs) -> int: + """ + Test function that accepts kwargs to test cache_mode parameter. + + :param x: input integer + :param kwargs: additional keyword arguments + :return: x * 3 + """ + _test_cache_mode_kwarg.call_count += 1 + res = x * 3 + return res + + +_test_cache_mode_kwarg.call_count = 0 + + +# ############################################################################# +# Test_cache_mode_parameter +# ############################################################################# + + +class Test_cache_mode_parameter(_BaseCacheTest): + """ + Test cache_mode parameter as a keyword argument. + """ + + def set_up_test(self) -> None: + """ + Setup operations to run before each test. + """ + super().set_up_test() + hcacsimp.set_cache_property("_test_cache_mode_kwarg", "type", "json") + _test_cache_mode_kwarg.call_count = 0 + + def test1(self) -> None: + """ + Verify that cache_mode='REFRESH_CACHE' keyword forces refresh. + """ + # Prepare inputs. + _test_cache_mode_kwarg(20) + initial_count = _test_cache_mode_kwarg.call_count + # Run test. + result = _test_cache_mode_kwarg(20, cache_mode="REFRESH_CACHE") + # Check outputs. + self.assertEqual(result, 60) + self.assertEqual(_test_cache_mode_kwarg.call_count, initial_count + 1) + + def test2(self) -> None: + """ + Verify that cache_mode='HIT_CACHE_OR_ABORT' raises error on miss. + """ + # Run test and check output. + with self.assertRaises(ValueError) as cm: + _test_cache_mode_kwarg(88, cache_mode="HIT_CACHE_OR_ABORT") + self.assertIn("Cache miss", str(cm.exception)) + + def test3(self) -> None: + """ + Verify that cache_mode='DISABLE_CACHE' bypasses cache. + """ + # Prepare inputs. + _test_cache_mode_kwarg(30) + initial_count = _test_cache_mode_kwarg.call_count + # Run test. + result1 = _test_cache_mode_kwarg(30, cache_mode="DISABLE_CACHE") + result2 = _test_cache_mode_kwarg(30, cache_mode="DISABLE_CACHE") + # Check outputs. + self.assertEqual(result1, 90) + self.assertEqual(result2, 90) + self.assertEqual(_test_cache_mode_kwarg.call_count, initial_count + 2) + + +# ############################################################################# +# Module-level helpers for new tests. +# ############################################################################# + + +@hcacsimp.simple_cache(cache_type="json") +def _test_intrinsic_func_intrinsic(x: int) -> int: + """ + Return x times 3. Named with `_intrinsic` suffix to test suffix stripping. + + :param x: input integer + :return: x * 3 + """ + res = x * 3 + return res + + +@hcacsimp.simple_cache(cache_type="json", exclude_keys=["session_id"]) +def _test_exclude_keys_func(x: int, *, session_id: str = "") -> int: + """ + Return x times 2, ignoring session_id for caching purposes. + + :param x: input integer + :param session_id: session identifier (excluded from cache key) + :return: x * 2 + """ + res = x * 2 + return res + + +@hcacsimp.simple_cache(cache_type="json", write_through=False) +def _test_no_write_through(x: int) -> int: + """ + Return x plus 1, with write_through disabled. + + :param x: input integer + :return: x + 1 + """ + res = x + 1 + return res + + +# ############################################################################# +# Test_sanity_check_function_cache +# ############################################################################# + + +class Test_sanity_check_function_cache(_BaseCacheTest): + """ + Test sanity_check_function_cache for validating function cache dicts. + """ + + def test1(self) -> None: + """ + Verify that sanity_check_function_cache passes for valid cache data. + """ + # Prepare inputs. + func_cache_data = {'{"args": [1], "kwargs": {}}': 2} + # Run test. + hcacsimp.sanity_check_function_cache(func_cache_data) + # Check outputs (no exception raised). + + def test2(self) -> None: + """ + Verify that sanity_check_function_cache passes for empty dict when + assert_on_empty=False. + """ + # Prepare inputs. + func_cache_data: dict = {} + # Run test. + hcacsimp.sanity_check_function_cache( + func_cache_data, assert_on_empty=False + ) + # Check outputs (no exception raised). + + +# ############################################################################# +# Test_sanity_check_cache +# ############################################################################# + + +class Test_sanity_check_cache(_BaseCacheTest): + """ + Test sanity_check_cache for validating nested cache dicts. + """ + + def test1(self) -> None: + """ + Verify that sanity_check_cache passes for valid nested cache data. + """ + # Prepare inputs. + cache_data = {"my_func": {'{"args": [1], "kwargs": {}}': 42}} + # Run test. + hcacsimp.sanity_check_cache(cache_data) + # Check outputs (no exception raised). + + def test2(self) -> None: + """ + Verify that sanity_check_cache passes for empty dict when + assert_on_empty=False. + """ + # Prepare inputs. + cache_data: dict = {} + # Run test. + hcacsimp.sanity_check_cache(cache_data, assert_on_empty=False) + # Check outputs (no exception raised). + + +# ############################################################################# +# Test_cache_data_to_str +# ############################################################################# + + +class Test_cache_data_to_str(_BaseCacheTest): + """ + Test cache_data_to_str for converting cache data to a string. + """ + + def test1(self) -> None: + """ + Verify that cache_data_to_str returns a string with the function name + and cache key. + """ + # Prepare inputs. + cache_data = {"my_func": {'{"args": [1], "kwargs": {}}': 42}} + # Run test. + result = hcacsimp.cache_data_to_str(cache_data) + # Check outputs. + self.assertIn("my_func", result) + self.assertIn('{"args": [1], "kwargs": {}}', result) + self.assertIn("42", result) + + +# ############################################################################# +# Test_get_cache_property_system +# ############################################################################# + + +class Test_get_cache_property_system(_BaseCacheTest): + """ + Test get_cache_property for system properties on unknown functions. + """ + + def test1(self) -> None: + """ + Verify that get_cache_property returns None for a system property when + the function is not in the cache property dict. + """ + # Run test. + val = hcacsimp.get_cache_property("_nonexistent_func_xyz", "type") + # Check outputs. + self.assertIsNone(val) + + +# ############################################################################# +# Test_set_cache_property_new_func +# ############################################################################# + + +class Test_set_cache_property_new_func(_BaseCacheTest): + """ + Test set_cache_property for a brand new function not yet in cache property. + """ + + def test1(self) -> None: + """ + Verify that set_cache_property creates a new entry for a function that + was not previously registered. + """ + # Run test. + hcacsimp.set_cache_property("_brand_new_func_xyz", "force_refresh", True) + # Check outputs. + val = hcacsimp.get_cache_property("_brand_new_func_xyz", "force_refresh") + self.assertTrue(val) + + +# ############################################################################# +# Test_cache_property_to_str_no_props +# ############################################################################# + + +class Test_cache_property_to_str_no_props(_BaseCacheTest): + """ + Test cache_property_to_str for a function with no properties in the cache. + """ + + def test1(self) -> None: + """ + Verify that cache_property_to_str returns the function name header even + when the function has no registered cache properties. + """ + # Run test with a function name not in _CACHE_PROPERTY. + result = hcacsimp.cache_property_to_str("_nonexistent_func_xyz") + # Check outputs. + self.assertIn("_nonexistent_func_xyz", result) + + +# ############################################################################# +# Test__get_cache_file_name_auto_detect +# ############################################################################# + + +class Test__get_cache_file_name_auto_detect(_BaseCacheTest): + """ + Test _get_cache_file_name when cache type is None (auto-detect from disk). + """ + + def test1(self) -> None: + """ + Verify that _get_cache_file_name infers .pkl extension when a .pkl file + exists on disk. + """ + # Prepare inputs: create a valid .pkl file in the cache dir. + cache_dir = hcacsimp.get_cache_dir() + func_name = "_auto_detect_pkl_func" + pkl_path = os.path.join(cache_dir, f"tmp.cache_simple.{func_name}.pkl") + hcacsimp._save_func_cache_data_to_file(pkl_path, "pickle", {}) + # Run test. + file_name = hcacsimp._get_cache_file_name(func_name) + # Check outputs. + self.assertTrue(file_name.endswith(".pkl")) + + def test2(self) -> None: + """ + Verify that _get_cache_file_name infers .json extension when a .json + file exists on disk. + """ + # Prepare inputs: create a valid .json file in the cache dir. + cache_dir = hcacsimp.get_cache_dir() + func_name = "_auto_detect_json_func" + json_path = os.path.join(cache_dir, f"tmp.cache_simple.{func_name}.json") + hcacsimp._save_func_cache_data_to_file(json_path, "json", {}) + # Run test. + file_name = hcacsimp._get_cache_file_name(func_name) + # Check outputs. + self.assertTrue(file_name.endswith(".json")) + + def test3(self) -> None: + """ + Verify that _get_cache_file_name defaults to .json when no file exists. + """ + # Prepare inputs: use a brand new function name with no disk file. + func_name = "_no_file_func_xyz" + # Run test. + file_name = hcacsimp._get_cache_file_name(func_name) + # Check outputs. + self.assertTrue(file_name.endswith(".json")) + + +# ############################################################################# +# Test__save_func_cache_data_to_file_infer +# ############################################################################# + + +class Test__save_func_cache_data_to_file_infer(_BaseCacheTest): + """ + Test _save_func_cache_data_to_file when cache_type is None (inferred from + file extension). + """ + + def test1(self) -> None: + """ + Verify that _save_func_cache_data_to_file infers pickle format from + .pkl extension when cache_type is None. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + file_name = os.path.join(scratch_dir, "tmp_test_infer.pkl") + data = {'{"args": [1], "kwargs": {}}': 42} + # Run test. + hcacsimp._save_func_cache_data_to_file(file_name, None, data) + # Check outputs. + self.assertTrue(os.path.exists(file_name)) + loaded = hcacsimp._load_func_cache_data_from_file(file_name, "pickle") + self.assertEqual(loaded, data) + + +# ############################################################################# +# Test__load_func_cache_data_from_file_infer +# ############################################################################# + + +class Test__load_func_cache_data_from_file_infer(_BaseCacheTest): + """ + Test _load_func_cache_data_from_file when cache_type is None (inferred from + file extension). + """ + + def test1(self) -> None: + """ + Verify that _load_func_cache_data_from_file infers pickle format from + .pkl extension when cache_type is None. + """ + # Prepare inputs: save a pickle file. + scratch_dir = self.get_scratch_space() + file_name = os.path.join(scratch_dir, "tmp_test_load_infer.pkl") + data = {'{"args": [5], "kwargs": {}}': 25} + hcacsimp._save_func_cache_data_to_file(file_name, "pickle", data) + # Run test with None cache_type (should infer from .pkl). + result = hcacsimp._load_func_cache_data_from_file(file_name, None) + # Check outputs. + self.assertEqual(result, data) + + +# ############################################################################# +# Test_reset_disk_cache_no_file +# ############################################################################# + + +class Test_reset_disk_cache_no_file(_BaseCacheTest): + """ + Test reset_disk_cache when the target function has no disk cache file. + """ + + def test1(self) -> None: + """ + Verify that reset_disk_cache does not raise when the function has no + cache file on disk. + """ + # Prepare inputs: use a function that has never been cached to disk. + func_name = "_cached_json_double" + # Ensure no disk file exists. + hcacsimp.reset_disk_cache(func_name, interactive=False) + cache_file = hcacsimp._get_cache_file_name(func_name) + self.assertFalse(os.path.exists(cache_file)) + # Run test: reset again when no file exists (should not raise). + hcacsimp.reset_disk_cache(func_name, interactive=False) + # Check outputs (no exception raised). + + +# ############################################################################# +# Test_mock_cache +# ############################################################################# + + +class Test_mock_cache(_BaseCacheTest): + """ + Test mock_cache for inserting values directly into the cache. + """ + + def test1(self) -> None: + """ + Verify that mock_cache inserts a value into the function cache that can + be retrieved as a cache hit. + """ + # Prepare inputs. + func_name = "_cached_json_double" + cache_key = '{"args": [99], "kwargs": {}}' + value = 198 + # Run test. + hcacsimp.mock_cache(func_name, cache_key, value) + # Check outputs. + cache = hcacsimp.get_cache(func_name) + self.assertEqual(cache[cache_key], value) + + def test2(self) -> None: + """ + Verify that a mocked cache value causes a cache hit when the decorated + function is called. + """ + # Prepare inputs. + func_name = "_cached_json_double" + cache_key = '{"args": [77], "kwargs": {}}' + value = 154 + # Run test. + hcacsimp.mock_cache(func_name, cache_key, value) + result = _cached_json_double(77, abort_on_cache_miss=True) + # Check outputs. + self.assertEqual(result, value) + + +# ############################################################################# +# Test_mock_cache_from_args_kwargs +# ############################################################################# + + +class Test_mock_cache_from_args_kwargs(_BaseCacheTest): + """ + Test mock_cache_from_args_kwargs for inserting values via args/kwargs. + """ + + def test1(self) -> None: + """ + Verify that mock_cache_from_args_kwargs inserts the correct value into + the cache for the given args and kwargs. + """ + # Prepare inputs. + func_name = "_cached_json_double" + args = (55,) + kwargs: dict = {} + value = 110 + # Run test. + hcacsimp.mock_cache_from_args_kwargs(func_name, args, kwargs, value) + # Check outputs. + expected_key = '{"args": [55], "kwargs": {}}' + cache = hcacsimp.get_cache(func_name) + self.assertEqual(cache[expected_key], value) + + +# ############################################################################# +# Test_mock_cache_from_disk +# ############################################################################# + + +class Test_mock_cache_from_disk(_BaseCacheTest): + """ + Test mock_cache_from_disk for bulk-inserting cache data from a dict. + """ + + def test1(self) -> None: + """ + Verify that mock_cache_from_disk populates the cache from a dict of + pre-computed values. + """ + # Prepare inputs. + func_name = "_cached_json_double" + func_cache_data = { + '{"args": [33], "kwargs": {}}': 66, + '{"args": [44], "kwargs": {}}': 88, + } + # Run test. + hcacsimp.mock_cache_from_disk(func_name, func_cache_data) + # Check outputs. + cache = hcacsimp.get_cache(func_name) + self.assertEqual(cache['{"args": [33], "kwargs": {}}'], 66) + self.assertEqual(cache['{"args": [44], "kwargs": {}}'], 88) + + +# ############################################################################# +# Test_simple_cache_intrinsic +# ############################################################################# + + +class Test_simple_cache_intrinsic(_BaseCacheTest): + """ + Test simple_cache decorator with a function whose name ends in _intrinsic. + """ + + def test1(self) -> None: + """ + Verify that the _intrinsic suffix is stripped and the cache key uses + the base function name. + """ + # Run test. + result = _test_intrinsic_func_intrinsic(5) + # Check outputs. + self.assertEqual(result, 15) + # Cache should be stored under the base name (without _intrinsic). + cache = hcacsimp.get_cache("_test_intrinsic_func") + self.assertIn('{"args": [5], "kwargs": {}}', cache) + + +# ############################################################################# +# Test_simple_cache_existing_type +# ############################################################################# + + +class Test_simple_cache_existing_type(_BaseCacheTest): + """ + Test that simple_cache preserves a pre-existing cache type setting. + """ + + def test1(self) -> None: + """ + Verify that applying simple_cache with cache_type='json' does not + override an existing 'pickle' type already set for the function. + """ + # Prepare inputs: set the type before decoration. + hcacsimp.set_cache_property("_inline_type_func", "type", "pickle") + + def _inline_type_func(x: int) -> int: + return x + + # Apply decorator with a different cache_type. + hcacsimp.simple_cache(cache_type="json")(_inline_type_func) + # Check outputs: type should remain 'pickle'. + val = hcacsimp.get_cache_property("_inline_type_func", "type") + self.assertEqual(val, "pickle") + + +# ############################################################################# +# Test_simple_cache_exclude_keys +# ############################################################################# + + +class Test_simple_cache_exclude_keys(_BaseCacheTest): + """ + Test simple_cache decorator with exclude_keys parameter. + """ + + def test1(self) -> None: + """ + Verify that calls with the same primary arg but different excluded + kwargs produce a single cache entry (the excluded key is ignored). + """ + # Run test: two calls with same x but different session_id. + result1 = _test_exclude_keys_func(5, session_id="abc") + result2 = _test_exclude_keys_func(5, session_id="xyz") + # Check outputs. + self.assertEqual(result1, 10) + self.assertEqual(result2, 10) + # Only one cache entry should exist. + cache = hcacsimp.get_cache("_test_exclude_keys_func") + self.assertEqual(len(cache), 1) + + +# ############################################################################# +# Test_simple_cache_no_write_through +# ############################################################################# + + +class Test_simple_cache_no_write_through(_BaseCacheTest): + """ + Test simple_cache decorator with write_through=False. + """ + + def test1(self) -> None: + """ + Verify that with write_through=False the computed value is not + automatically persisted to disk after a function call. + """ + # Run test. + result = _test_no_write_through(7) + self.assertEqual(result, 8) + # Reset memory cache so that reading goes to disk. + hcacsimp.reset_mem_cache("_test_no_write_through") + # Check outputs: disk cache should not contain the computed value. + disk_cache = hcacsimp.get_disk_cache("_test_no_write_through") + self.assertNotIn('{"args": [7], "kwargs": {}}', disk_cache) + + +# ############################################################################# +# Test_global_cache_file_prefix +# ############################################################################# + + +class Test_global_cache_file_prefix(_BaseCacheTest): + """ + Test global cache file prefix configuration. + """ + + def test1(self) -> None: + """ + Verify that set_cache_file_prefix changes the cache file prefix. + """ + # Prepare inputs. + custom_prefix = "my_test_cache" + # Run. + hcacsimp.set_cache_file_prefix(custom_prefix) + _ = _cached_json_double(5) + # Check. + cache_file = hcacsimp._get_cache_file_name("_cached_json_double") + self.assertIn(custom_prefix, cache_file) + + def test2(self) -> None: + """ + Verify that get_cache_file_prefix returns the configured prefix. + """ + # Prepare inputs. + custom_prefix = "test_prefix" + hcacsimp._CACHE_FILE_PREFIX = custom_prefix + # Run. + actual = hcacsimp.get_cache_file_prefix() + # Check. + self.assertEqual(actual, custom_prefix) + + +# ############################################################################# +# Test helper functions for per-function configuration +# ############################################################################# + + +@hcacsimp.simple_cache( + cache_type="json", + cache_dir="/tmp/custom_cache", + cache_prefix="project_cache", +) +def _test_per_function_cache_dir_and_prefix(x: int) -> int: + """ + Test function with custom cache directory and prefix. + + :param x: input integer + :return: x * 2 + """ + res = x * 2 + return res + + +@hcacsimp.simple_cache( + cache_type="json", + cache_dir="/tmp/custom_cache", +) +def _test_per_function_cache_dir(x: int) -> int: + """ + Test function with custom cache directory and default prefix. + + :param x: input integer + :return: x * 2 + """ + res = x * 2 + return res + + +@hcacsimp.simple_cache( + cache_type="json", + cache_prefix="project_cache", +) +def _test_per_function_prefix(x: int) -> int: + """ + Test function with custom prefix and default directory. + + :param x: input integer + :return: x * 3 + """ + res = x * 3 + return res + + +@hcacsimp.simple_cache( + cache_type="json", + s3_bucket="s3://decorator-bucket", + s3_prefix="decorator/prefix", + aws_profile="decorator-profile", +) +def _test_per_function_s3_configs(x: int) -> int: + """ + Test function with all S3 parameters set via decorator. + + :param x: input integer + :return: x * 6 + """ + return x * 6 + + +# ############################################################################# +# Test_per_function_cache_dir +# ############################################################################# + + +class Test_per_function_cache_dir(_BaseCacheTest): + """ + Test per-function cache directory configuration. + """ + + def test1(self) -> None: + """ + Test cache_dir configured via decorator parameter. + + Verifies that when cache_dir is set in the @simple_cache + decorator, the cache file is created in the specified custom + directory. + """ + # Run. + _ = _test_per_function_cache_dir(10) + # Check. + # Verify cache file is in decorator-specified directory. + cache_file = hcacsimp._get_cache_file_name("_test_per_function_cache_dir") + self.assertIn("/tmp/custom_cache", cache_file) + # Flush to disk to verify file creation. + hcacsimp.flush_cache_to_disk("_test_per_function_cache_dir") + self.assertTrue(os.path.exists(cache_file)) + + def test2(self) -> None: + """ + Test that cache_dir can be retrieved. + + Verifies that cache_dir property set via decorator can be + retrieved using get_cache_property. + """ + # Run. + cache_dir = hcacsimp.get_cache_property( + "_test_per_function_cache_dir", "cache_dir" + ) + # Check. + self.assertEqual(cache_dir, "/tmp/custom_cache") + + def test3(self) -> None: + """ + Test cache_dir configured via set_cache_property() function call. + + Verifies that cache_dir can be set manually via + set_cache_property() for functions without cache_dir in their + decorator. + """ + # Prepare inputs. + custom_dir = self.get_scratch_space() + "/manual_cache" + # Set cache_dir manually. + hcacsimp.set_cache_property( + "_cached_json_double", "cache_dir", custom_dir + ) + # Run. + _ = _cached_json_double(10) + # Check. + # Verify cache file is in manually-set directory. + cache_file = hcacsimp._get_cache_file_name("_cached_json_double") + self.assertIn(custom_dir, cache_file) + # Flush to disk to verify file creation. + hcacsimp.flush_cache_to_disk("_cached_json_double") + self.assertTrue(os.path.exists(cache_file)) + + def test4(self) -> None: + """ + Test get/set cache_dir property API. + + Verifies that cache_dir can be stored and retrieved via + get/set_cache_property functions. + """ + # Prepare inputs. + custom_dir = "/tmp/test_cache_dir" + # Run. + hcacsimp.set_cache_property( + "_cached_json_double", "cache_dir", custom_dir + ) + actual = hcacsimp.get_cache_property("_cached_json_double", "cache_dir") + # Check. + self.assertEqual(actual, custom_dir) + + +# ############################################################################# +# Test_per_function_cache_prefix +# ############################################################################# + + +class Test_per_function_cache_prefix(_BaseCacheTest): + """ + Test per-function cache prefix configuration. + """ + + def test1(self) -> None: + """ + Test cache_prefix configured via decorator parameter. + + Verifies that when cache_prefix is set in the @simple_cache + decorator, the cache file name uses the specified custom prefix. + """ + # Run. + _ = _test_per_function_prefix(7) + # Check. + cache_file = hcacsimp._get_cache_file_name("_test_per_function_prefix") + self.assertIn("project_cache", cache_file) + + def test2(self) -> None: + """ + Test that cache_prefix can be retrieved. + + Verifies that cache_prefix property set via decorator can be + retrieved using get_cache_property. + """ + # Run. + cache_prefix = hcacsimp.get_cache_property( + "_test_per_function_prefix", "cache_prefix" + ) + # Check. + self.assertEqual(cache_prefix, "project_cache") + + def test3(self) -> None: + """ + Test cache_prefix configured via set_cache_property() function call. + + Verifies that cache_prefix can be set manually via + set_cache_property() for functions without cache_prefix in their + decorator. + """ + # Prepare inputs. + custom_prefix = "test_prefix" + # Set cache_prefix manually. + hcacsimp.set_cache_property( + "_cached_json_double", "cache_prefix", custom_prefix + ) + # Run. + _ = _cached_json_double(7) + # Check. + cache_file = hcacsimp._get_cache_file_name("_cached_json_double") + self.assertIn(custom_prefix, cache_file) + + def test4(self) -> None: + """ + Test get/set cache_prefix property API. + + Verifies that cache_prefix can be stored and retrieved via + get/set_cache_property functions. + """ + # Prepare inputs. + custom_prefix = "my_project_cache" + # Run. + hcacsimp.set_cache_property( + "_cached_json_double", "cache_prefix", custom_prefix + ) + actual = hcacsimp.get_cache_property( + "_cached_json_double", "cache_prefix" + ) + # Check. + self.assertEqual(actual, custom_prefix) + + +# ############################################################################# +# Test_per_function_cache_dir_and_prefix +# ############################################################################# + + +class Test_per_function_cache_dir_and_prefix(_BaseCacheTest): + """ + Test per-function cache directory and prefix configured together. + """ + + def test1(self) -> None: + """ + Test both cache_dir and cache_prefix configured via decorator. + + Verifies that when both cache_dir and cache_prefix are set in + the @simple_cache decorator, both are applied correctly to the + cache file path. + """ + # Run. + _ = _test_per_function_cache_dir_and_prefix(10) + # Check. + cache_file = hcacsimp._get_cache_file_name( + "_test_per_function_cache_dir_and_prefix" + ) + # Verify custom directory is used. + self.assertIn("/tmp/custom_cache", cache_file) + # Verify custom prefix is used. + self.assertIn("project_cache", cache_file) + # Flush to disk to verify file creation. + hcacsimp.flush_cache_to_disk("_test_per_function_cache_dir_and_prefix") + self.assertTrue(os.path.exists(cache_file)) + + def test2(self) -> None: + """ + Test that cache_dir and cache_prefix can be retrieved. + + Verifies that both cache_dir and cache_prefix properties set via + decorator can be retrieved using get_cache_property. + """ + # Run. + cache_dir = hcacsimp.get_cache_property( + "_test_per_function_cache_dir_and_prefix", "cache_dir" + ) + cache_prefix = hcacsimp.get_cache_property( + "_test_per_function_cache_dir_and_prefix", "cache_prefix" + ) + # Check. + self.assertEqual(cache_dir, "/tmp/custom_cache") + self.assertEqual(cache_prefix, "project_cache") + + +# ############################################################################# +# Test_s3_configuration +# ############################################################################# + + +class Test_s3_configuration(_BaseCacheTest): + """ + Test S3 configuration (global and per-function). + """ + + def test1(self) -> None: + """ + Verify that set_s3_bucket stores bucket with s3:// prefix. + """ + # Prepare inputs. + bucket = "my-test-bucket" + # Run. + hcacsimp.set_s3_bucket(bucket) + actual = hcacsimp.get_s3_bucket() + # Check. + self.assertEqual(actual, "s3://my-test-bucket") + + def test2(self) -> None: + """ + Verify that set_s3_bucket preserves existing s3:// prefix. + """ + # Prepare inputs. + bucket = "s3://my-test-bucket" + # Run. + hcacsimp.set_s3_bucket(bucket) + actual = hcacsimp.get_s3_bucket() + # Check. + self.assertEqual(actual, "s3://my-test-bucket") + + def test3(self) -> None: + """ + Verify that set_s3_prefix and get_s3_prefix work correctly. + """ + # Prepare inputs. + prefix = "cache/project1" + # Run. + hcacsimp.set_s3_prefix(prefix) + actual = hcacsimp.get_s3_prefix() + # Check. + self.assertEqual(actual, prefix) + + def test4(self) -> None: + """ + Verify that set_aws_profile and get_aws_profile work correctly. + """ + # Prepare inputs. + profile = "my-aws-profile" + # Run. + hcacsimp.set_aws_profile(profile) + actual = hcacsimp.get_aws_profile() + # Check. + self.assertEqual(actual, profile) + + def test5(self) -> None: + """ + Verify that per-function s3_bucket can be set and retrieved. + """ + # Prepare inputs. + func_name = "_cached_json_double" + s3_bucket = "s3://function-specific-bucket" + # Run. + hcacsimp.set_cache_property(func_name, "s3_bucket", s3_bucket) + actual = hcacsimp.get_cache_property(func_name, "s3_bucket") + # Check. + self.assertEqual(actual, s3_bucket) + + def test6(self) -> None: + """ + Verify that per-function s3_prefix can be set and retrieved. + """ + # Prepare inputs. + func_name = "_cached_json_double" + s3_prefix = "custom/prefix" + # Run. + hcacsimp.set_cache_property(func_name, "s3_prefix", s3_prefix) + actual = hcacsimp.get_cache_property(func_name, "s3_prefix") + # Check. + self.assertEqual(actual, s3_prefix) + + def test7(self) -> None: + """ + Verify that per-function aws_profile can be set and retrieved. + """ + # Prepare inputs. + func_name = "_cached_json_double" + aws_profile = "function-aws-profile" + # Run. + hcacsimp.set_cache_property(func_name, "aws_profile", aws_profile) + actual = hcacsimp.get_cache_property(func_name, "aws_profile") + # Check. + self.assertEqual(actual, aws_profile) + + def test8(self) -> None: + """ + Verify that auto_sync_s3 property can be set and retrieved. + """ + # Prepare inputs. + func_name = "_cached_json_double" + auto_sync = True + # Run. + hcacsimp.set_cache_property(func_name, "auto_sync_s3", auto_sync) + actual = hcacsimp.get_cache_property(func_name, "auto_sync_s3") + # Check. + self.assertEqual(actual, auto_sync) + + +# ############################################################################# +# Test_per_function_s3_decorator +# ############################################################################# + + +class Test_per_function_s3_decorator(_BaseCacheTest): + """ + Test S3 configuration set via decorator parameters. + """ + + def test1(self) -> None: + """ + Test that all S3 decorator parameters are stored correctly. + """ + # Run. + s3_bucket = hcacsimp.get_cache_property( + "_test_per_function_s3_configs", "s3_bucket" + ) + s3_prefix = hcacsimp.get_cache_property( + "_test_per_function_s3_configs", "s3_prefix" + ) + aws_profile = hcacsimp.get_cache_property( + "_test_per_function_s3_configs", "aws_profile" + ) + # Check. + self.assertEqual(s3_bucket, "s3://decorator-bucket") + self.assertEqual(s3_prefix, "decorator/prefix") + self.assertEqual(aws_profile, "decorator-profile") + + +# ############################################################################# +# Test__get_s3_cache_path +# ############################################################################# + + +class Test__get_s3_cache_path(_BaseCacheTest): + """ + Test _get_s3_cache_path function. + """ + + def test1(self) -> None: + """ + Test S3 path with global bucket and no prefix. + """ + # Prepare inputs. + hcacsimp.set_s3_bucket("s3://my-bucket") + # Run. + actual = hcacsimp._get_s3_cache_path("_cached_json_double") + # Check. + self.assertIn("s3://my-bucket", actual) + self.assertIn("_cached_json_double", actual) + + def test2(self) -> None: + """ + Test S3 path with global bucket and prefix. + """ + # Prepare inputs. + hcacsimp.set_s3_bucket("s3://my-bucket") + hcacsimp.set_s3_prefix("cache/data") + # Run. + actual = hcacsimp._get_s3_cache_path("_cached_json_double") + # Check. + self.assertIn("s3://my-bucket/cache/data", actual) + self.assertIn("_cached_json_double", actual) + + def test3(self) -> None: + """ + Test S3 path with per-function bucket overriding global. + """ + # Prepare inputs. + hcacsimp.set_s3_bucket("s3://global-bucket") + hcacsimp.set_cache_property( + "_cached_json_double", "s3_bucket", "s3://function-bucket" + ) + # Run. + actual = hcacsimp._get_s3_cache_path("_cached_json_double") + # Check. + self.assertIn("s3://function-bucket", actual) + self.assertNotIn("global-bucket", actual) + + def test4(self) -> None: + """ + Test S3 path with per-function prefix overriding global. + """ + # Prepare inputs. + hcacsimp.set_s3_bucket("s3://my-bucket") + hcacsimp.set_s3_prefix("global/prefix") + hcacsimp.set_cache_property( + "_cached_json_double", "s3_prefix", "function/prefix" + ) + # Run. + actual = hcacsimp._get_s3_cache_path("_cached_json_double") + # Check. + self.assertIn("s3://my-bucket/function/prefix", actual) + self.assertNotIn("global/prefix", actual) + + def test5(self) -> None: + """ + Test S3 path with decorator-configured bucket and prefix. + """ + # Run. + actual = hcacsimp._get_s3_cache_path("_test_per_function_s3_configs") + # Check. + self.assertIn("s3://decorator-bucket/decorator/prefix", actual) + self.assertIn("_test_per_function_s3_configs", actual) + + def test6(self) -> None: + """ + Test that ValueError is raised when S3 bucket is not configured. + """ + # Run and check. + with self.assertRaises(ValueError) as cm: + hcacsimp._get_s3_cache_path("_cached_json_double") + self.assertEqual(str(cm.exception), "S3 bucket not configured") + + +# ############################################################################# +# Test__extract_func_name_from_cache_file +# ############################################################################# + + +class Test__extract_func_name_from_cache_file(_BaseCacheTest): + """ + Test _extract_func_name_from_cache_file function. + """ + + def test1(self) -> None: + """ + Test extraction from JSON cache file with standard prefix. + """ + # Prepare inputs. + cache_file_name = "tmp.cache_simple._cached_json_double.json" + # Run. + actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) + # Check. + self.assertEqual(actual, "_cached_json_double") + + def test2(self) -> None: + """ + Test extraction from pickle cache file. + """ + # Prepare inputs. + cache_file_name = "tmp.cache_simple._cached_pickle_square.pkl" + # Run. + actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) + # Check. + self.assertEqual(actual, "_cached_pickle_square") + + def test3(self) -> None: + """ + Test extraction with custom prefix. + """ + # Prepare inputs. + cache_file_name = "my_project_cache._my_function.json" + # Run. + actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) + # Check. + self.assertEqual(actual, "_my_function") + + def test4(self) -> None: + """ + Test extraction returns None for invalid file name. + """ + # Prepare inputs. + cache_file_name = "invalid_filename" + # Run. + actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) + # Check. + self.assertIsNone(actual) + + def test5(self) -> None: + """ + Test extraction returns None for file without extension. + """ + # Prepare inputs. + cache_file_name = "cache.function_name" + # Run. + actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) + # Check. + self.assertIsNone(actual) + + def test6(self) -> None: + """ + Test extraction with custom prefix and dir. + """ + # Prepare inputs. + cache_file_name = "my_dir/my_project_cache._my_function.json" + # Run. + actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) + # Check. + self.assertEqual(actual, "_my_function") + + def test7(self) -> None: + """ + Test extraction when custom prefix has dots in it. + """ + # Prepare inputs. + cache_file_name = "dir1/dir2/my.project.cache._my_function.json" + # Run. + actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) + # Check. + self.assertEqual(actual, "_my_function") + + +# ############################################################################# +# Test__check_s3_configured +# ############################################################################# + + +class Test__check_s3_configured(_BaseCacheTest): + """ + Test _check_s3_configured function. + """ + + def test1(self) -> None: + """ + Test returns False when S3 bucket is not explicitly configured. + """ + # Run. + actual = hcacsimp._check_s3_configured() + # Check. + self.assertFalse(actual) + + def test2(self) -> None: + """ + Test returns True when global S3 bucket is configured. + """ + # Prepare inputs. + hcacsimp.set_s3_bucket("s3://my-bucket") + # Run. + actual = hcacsimp._check_s3_configured() + # Check. + self.assertTrue(actual) + + def test3(self) -> None: + """ + Test returns True when per-function S3 bucket is configured. + """ + # Prepare inputs. + func_name = "_cached_json_double" + hcacsimp.set_cache_property( + func_name, "s3_bucket", "s3://function-bucket" + ) + # Run. + actual = hcacsimp._check_s3_configured(func_name) + # Check. + self.assertTrue(actual) + + def test4(self) -> None: + """ + Test per-function bucket overrides missing global bucket. + """ + # Prepare inputs. + func_name = "_cached_json_double" + hcacsimp.set_cache_property( + func_name, "s3_bucket", "s3://function-bucket" + ) + # Run. + actual_with_func = hcacsimp._check_s3_configured(func_name) + actual_without_func = hcacsimp._check_s3_configured() + # Check. + self.assertTrue(actual_with_func) + self.assertFalse(actual_without_func) + + def test5(self) -> None: + """ + Test with decorator-configured S3 bucket. + """ + # Run. + actual = hcacsimp._check_s3_configured("_test_per_function_s3_configs") + # Check. + self.assertTrue(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py new file mode 100644 index 000000000..4ab1219a4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py @@ -0,0 +1,335 @@ +import logging +import os +from typing import Any, List + +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.hcfile as hcfile + +_LOG = logging.getLogger(__name__) + + +def _create_test_file(self_: Any, filename: str, content: str) -> str: + """ + Create a test file with given content in the scratch directory. + + :param scratch_dir: Directory to create file in + :param filename: Name of file to create + :param content: Content to write to file + :return: Full path to created file + """ + scratch_dir = self_.get_scratch_space() + file_path = os.path.join(scratch_dir, filename) + content = hprint.dedent(content) + hio.to_file(file_path, content) + return file_path + + +def _create_cfile(self_: Any, cfile_content: List[str]) -> str: + """ + Create a cfile with TODOs in the scratch directory. + + :param scratch_dir: Directory to create file in + :param cfile_content: List of TODO lines to write + :return: Full path to created cfile + """ + content = "\n".join(cfile_content) + return _create_test_file(self_, "cfile.txt", content) + + +# ############################################################################# +# Test_parse_cfile1 +# ############################################################################# + + +class Test_parse_cfile1(hunitest.TestCase): + def helper(self, cfile_content: str, expected: str) -> None: + """ + Helper function to test parsing a cfile. + + :param cfile_content: Content to write to the test cfile + :param expected: Expected output from parse_cfile + """ + # Prepare inputs. + cfile_path = _create_test_file(self, "cfile.txt", cfile_content) + # Run function under test. + actual = hcfile.parse_cfile(cfile_path) + actual = "\n".join(map(str, actual)) + # Check output. + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test parsing a cfile with valid entries. + """ + cfile_content = r""" + file1.py:10: Add docstring + file2.py:20: Add type hints + file3.py:30: Fix formatting + """ + expected = r""" + ('file1.py', '10', ' Add docstring') + ('file2.py', '20', ' Add type hints') + ('file3.py', '30', ' Fix formatting') + """ + self.helper(cfile_content, expected) + + def test2(self) -> None: + """ + Test parsing a cfile with valid entries. + """ + cfile_content = r""" + dev_scripts_helpers/llms/llm_transform.py:63:33: F821 undefined name '_extract_bullet_points' [flake8] + dev_scripts_helpers/llms/llm_cli.py:23: [C0301(line-too-long), ] Line too long (109/100) [pylint] + helpers/hio.py: 'pandas' is imported multiple times [normalize_imports] + helpers/hmarkdown.py:770:38: W605 invalid escape sequence '\S' [flake8] + """ + expected = r""" + ('dev_scripts_helpers/llms/llm_transform.py', '63', "33: F821 undefined name '_extract_bullet_points' [flake8]") + ('dev_scripts_helpers/llms/llm_cli.py', '23', ' [C0301(line-too-long), ] Line too long (109/100) [pylint]') + ('helpers/hmarkdown.py', '770', "38: W605 invalid escape sequence '\\S' [flake8]") + """ + self.helper(cfile_content, expected) + + def test_empty_file(self) -> None: + """ + Test parsing an empty cfile. + """ + self.helper("", "") + + def test_invalid_entries(self) -> None: + """ + Test parsing a cfile with invalid entries that should be skipped. + """ + cfile_content = r""" + file1.py:10: Valid entry + Invalid line without proper format + file2.py:20: Another valid entry + :30: Missing filename + file3.py:: Missing line number + """ + expected = r""" + ('file1.py', '10', ' Valid entry') + ('file2.py', '20', ' Another valid entry') + (' ', '30', ' Missing filename') + """ + self.helper(cfile_content, expected) + + +# ############################################################################# +# Test_inject_todos_from_cfile1 +# ############################################################################# + + +class Test_inject_todos_from_cfile1(hunitest.TestCase): + def _inject_todos(self, cfile_content: str) -> None: + """ + Helper to inject TODOs with standard parameters. + """ + todo_user = "user" + comment_prefix = "#" + hcfile.inject_todos_from_cfile(cfile_content, todo_user, comment_prefix) + + def test1(self) -> None: + """ + Test injecting TODOs from a cfile into a Python file. + """ + # Create a test file. + test_file_content = """ + def hello(msg): + print(msg) + + def world(): + print("world") + """ + file_path = _create_test_file(self, "test.py", test_file_content) + # Create cfile with TODOs. + cfile_content = [ + f"{file_path}:1: Add type hints.", + f"{file_path}:4: Add docstring.", + ] + _create_cfile(self, cfile_content) + # Run the function under test. + self._inject_todos("\n".join(cfile_content)) + # Check output. + actual = hio.from_file(file_path) + expected = """ + # TODO(user): Add type hints. + def hello(msg): + print(msg) + + # TODO(user): Add docstring. + def world(): + print("world") + """ + self.assert_equal(actual, expected, dedent=True) + + def test_one_line_file(self) -> None: + """ + Test injecting TODOs into an empty file. + """ + # Create an empty test file + test_file_content = """ + print("hello") + """ + file_path = _create_test_file(self, "empty.py", test_file_content) + # Create cfile with TODOs + cfile_content = [f"{file_path}:1: Add content to empty file."] + _create_cfile(self, cfile_content) + # Run the function under test + self._inject_todos("\n".join(cfile_content)) + # Check output + actual = hio.from_file(file_path) + expected = """ + # TODO(user): Add content to empty file. + print("hello") + """ + self.assert_equal(actual, expected, dedent=True) + + def test_invalid_line_numbers(self) -> None: + """ + Test handling of TODOs with invalid line numbers. + """ + # Create a test file + test_file_content = """ + line1 + line2 + """ + file_path = _create_test_file(self, "test.py", test_file_content) + # Create cfile with invalid line numbers + cfile_content = [ + f"{file_path}:999: This line number doesn't exist.", + ] + _create_cfile(self, cfile_content) + # This should raise an assertion error due to invalid line numbers + with self.assertRaises(AssertionError) as err: + self._inject_todos("\n".join(cfile_content)) + # Check output. + expected = """ + ################################################################################ + * Failed assertion * + 998 < 2 + ################################################################################ + """ + self.assert_equal( + str(err.exception), expected, dedent=True, fuzzy_match=True + ) + + def test2(self) -> None: + """ + Test injecting TODOs from a cfile into a Python file with a complex + class. + """ + # Create a test file. + test_file_content = """ + import logging + from typing import List, Optional + + class DataProcessor: + def __init__(self): + self.logger = logging.getLogger(__name__) + self.data = [] + + def process_batch(self, items): + for item in items: + self.data.append(self._transform(item)) + + def _transform(self, item): + return item.upper() + + def get_results(self): + return self.data + + def clear(self): + self.data = [] + """ + file_path = _create_test_file(self, "test.py", test_file_content) + # Create cfile with TODOs. + cfile_content = [ + f"{file_path}:4: Add class docstring explaining purpose and usage", + f"{file_path}:5: Add type hints for instance variables", + f"{file_path}:9: Add type hints for items parameter", + f"{file_path}:10: Consider adding batch size validation", + f"{file_path}:13: Add error handling for non-string inputs", + f"{file_path}:16: Add return type hint and docstring", + f"{file_path}:19: Add docstring explaining clear behavior", + ] + _create_cfile(self, cfile_content) + # Run function under test. + self._inject_todos("\n".join(cfile_content)) + # Check output. + actual = hio.from_file(file_path) + expected = """ + import logging + from typing import List, Optional + + # TODO(user): Add class docstring explaining purpose and usage + class DataProcessor: + # TODO(user): Add type hints for instance variables + def __init__(self): + self.logger = logging.getLogger(__name__) + self.data = [] + + # TODO(user): Add type hints for items parameter + def process_batch(self, items): + # TODO(user): Consider adding batch size validation + for item in items: + self.data.append(self._transform(item)) + + # TODO(user): Add error handling for non-string inputs + def _transform(self, item): + return item.upper() + + # TODO(user): Add return type hint and docstring + def get_results(self): + return self.data + + # TODO(user): Add docstring explaining clear behavior + def clear(self): + self.data = [] + """ + self.assert_equal(actual, expected, dedent=True) + + def test3(self) -> None: + """ + Test injecting TODOs from a cfile into multiple Python files. + """ + # Create first test file. + test_file1_content = """ + def foo(): + pass + """ + file_path1 = _create_test_file(self, "test1.py", test_file1_content) + # Create second test file. + test_file2_content = """ + def bar(): + return None + """ + file_path2 = _create_test_file(self, "test2.py", test_file2_content) + # Create cfile. + cfile_content = [ + f"{file_path1}:1: Add docstring for foo.", + f"{file_path2}:1: Add docstring for bar.", + f"{file_path2}:2: Add type hint for return.", + ] + _create_cfile(self, cfile_content) + # Run function under test. + self._inject_todos("\n".join(cfile_content)) + # Check output. + actual1 = hio.from_file(file_path1) + expected1 = """ + # TODO(user): Add docstring for foo. + def foo(): + pass + """ + self.assert_equal(actual1, expected1, dedent=True) + # + actual2 = hio.from_file(file_path2) + expected2 = """ + # TODO(user): Add docstring for bar. + def bar(): + # TODO(user): Add type hint for return. + return None + """ + self.assert_equal(actual2, expected2, dedent=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py new file mode 100644 index 000000000..d8f2c19e2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py @@ -0,0 +1,81 @@ +import logging +import os + +import pandas as pd + +import helpers.hcsv as hcsv +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_convert_csv_to_dict +# ############################################################################# + + +class Test_convert_csv_to_dict(hunitest.TestCase): + def test1(self) -> None: + dir_name = self.get_input_dir() + test_csv_path = os.path.join(dir_name, "test.csv") + actual_result = hcsv.convert_csv_to_dict(test_csv_path, remove_nans=True) + expected_result = { + "col1": ["a", "b", "c", "d"], + "col2": ["a", "b"], + "col3": ["a", "b", "c"], + } + self.assertEqual(actual_result, expected_result) + + +# ############################################################################# +# Test_from_typed_csv +# ############################################################################# + + +class Test_from_typed_csv(hunitest.TestCase): + """ + Check the opportunity to load correctly. + + .csv file with dtype param, which exist in .types prefix file. And + finally it checks that dtypes of loaded dataframe didn't change + compared with the original one. + """ + + def test1(self) -> None: + dir_name = self.get_input_dir() + test_csv_path = os.path.join(dir_name, "test.csv") + os.path.join(dir_name, "test.csv.types") + actual_result = ( + hcsv.from_typed_csv(test_csv_path) + .dtypes.apply(lambda x: x.name) + .to_dict() + ) + expected_result = { + "A": "int64", + "B": "float64", + "C": "object", + "D": "object", + "E": "int64", + } + self.assertEqual(actual_result, expected_result) + + +# ############################################################################# +# Test_to_typed_csv +# ############################################################################# + + +class Test_to_typed_csv(hunitest.TestCase): + """ + Check whether the function 'to_typed_csv' create file with '.types' prefix + or not. + """ + + def test1(self) -> None: + dir_name = self.get_input_dir() + test_csv_path = os.path.join(dir_name, "test.csv") + test_csv_types_path = os.path.join(dir_name, "test.csv.types") + df = pd.read_csv(test_csv_path) + hcsv.to_typed_csv(df, test_csv_path) + self.assertTrue(os.path.exists(test_csv_types_path)) + os.remove(test_csv_types_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py new file mode 100644 index 000000000..aaa5c0c9e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py @@ -0,0 +1,299 @@ +""" +Import as: + +import helpers.test.test_dataframe as httdat +""" + +import collections +import logging +import os + +import numpy as np +import pandas as pd + +import helpers.hdataframe as hdatafr +import helpers.hpandas as hpandas +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_filter_data_by_values1 +# ############################################################################# + + +class Test_filter_data_by_values1(hunitest.TestCase): + def test_conjunction1(self) -> None: + data = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + data = data.add_prefix("col_") + filters = {"col_0": (1, 12), "col_1": (2, 11), "col_2": (3, 6)} + info: collections.OrderedDict = collections.OrderedDict() + filtered_data = hdatafr.filter_data_by_values(data, filters, "and", info) + # TODO(gp): Factor out the common code. + str_output = ( + f"{hprint.frame('data')}\n" + f"{hpandas.df_to_str(data)}\n" + f"{hprint.frame('filters')}\n{filters}\n" + f"{hprint.frame('filtered_data')}\n" + f"{hpandas.df_to_str(filtered_data)}\n" + f"{hunitest.convert_info_to_string(info)}" + ) + self.check_string(str_output) + + def test_disjunction1(self) -> None: + data = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + data = data.add_prefix("col_") + filters = {"col_0": (1, 12), "col_1": (2, 11), "col_2": (3, 6)} + info: collections.OrderedDict = collections.OrderedDict() + filtered_data = hdatafr.filter_data_by_values(data, filters, "or", info) + str_output = ( + f"{hprint.frame('data')}\n" + f"{hpandas.df_to_str(data)}\n" + f"{hprint.frame('filters')}\n{filters}\n" + f"{hprint.frame('filtered_data')}" + f"\n{hpandas.df_to_str(filtered_data)}\n" + f"{hunitest.convert_info_to_string(info)}" + ) + self.check_string(str_output) + + +# ############################################################################# +# Test_filter_data_by_comparison +# ############################################################################# + + +class Test_filter_data_by_comparison(hunitest.TestCase): + def test_conjunction1(self) -> None: + data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + data = data.add_prefix("col_") + filters = {"col_0": (("gt", 1), ("lt", 7)), "col_1": ("eq", 5)} + info: collections.OrderedDict = collections.OrderedDict() + filtered_data = hdatafr.filter_data_by_comparison( + data, filters, "and", info + ) + str_output = ( + f"{hprint.frame('data')}\n" + f"{hpandas.df_to_str(data)}\n" + f"{hprint.frame('filters')}\n{filters}\n" + f"{hprint.frame('filtered_data')}\n" + f"{hpandas.df_to_str(filtered_data)}\n" + f"{hunitest.convert_info_to_string(info)}" + ) + self.check_string(str_output) + + def test_disjunction1(self) -> None: + data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + data = data.add_prefix("col_") + filters = {"col_0": ("gt", 2), "col_1": ("eq", 5)} + info: collections.OrderedDict = collections.OrderedDict() + filtered_data = hdatafr.filter_data_by_comparison( + data, filters, "or", info + ) + str_output = ( + f"{hprint.frame('data')}\n" + f"{hpandas.df_to_str(data)}\n" + f"{hprint.frame('filters')}\n{filters}\n" + f"{hprint.frame('filtered_data')}" + f"\n{hpandas.df_to_str(filtered_data)}\n" + f"{hunitest.convert_info_to_string(info)}" + ) + self.check_string(str_output) + + +# ############################################################################# +# TestFilterDataByMethod +# ############################################################################# + + +class TestFilterDataByMethod(hunitest.TestCase): + """ + Test was generated automatically with Playback. + """ + + def test1(self) -> None: + # Define input variables. + input_path = os.path.join(self.get_input_dir(), "test.txt") + data = pd.read_csv(input_path, index_col=0) + filters = { + "Frequency": {"isin": {"values": ["Monthly", "Weekly", "Daily"]}}, + "source_code": {"isin": {"values": ["WIND"]}}, + "is_downloaded": {"isin": {"values": ["success"]}}, + } + mode = "and" + info: collections.OrderedDict = collections.OrderedDict() + # Call function to test. + actual = hdatafr.filter_data_by_method( + df=data, filters=filters, mode=mode, info=info + ) + actual = hpandas.df_to_str(actual, precision=3) + # Check output. + self.check_string(actual, fuzzy_match=True) + + +# ############################################################################# +# Test_apply_nan_mode +# ############################################################################# + + +class Test_apply_nan_mode(hunitest.TestCase): + @staticmethod + def _get_series_with_nans(seed: int) -> pd.Series: + date_range = {"start": "1/1/2010", "periods": 40, "freq": "M"} + series = hpandas.get_random_df( + num_cols=1, + seed=seed, + date_range_kwargs=date_range, + )[0] + series[:3] = np.nan + series[-3:] = np.nan + series[5:7] = np.nan + return series + + def test1(self) -> None: + """ + Test for `mode=leave_unchanged`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series) + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + def test2(self) -> None: + """ + Test for `mode="drop"`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series, mode="drop") + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + def test3(self) -> None: + """ + Test for `mode="ffill"`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series, mode="ffill") + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + def test4(self) -> None: + """ + Test for `mode="ffill_and_drop_leading"`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series, mode="ffill_and_drop_leading") + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + def test5(self) -> None: + """ + Test for `mode="fill_with_zero"`. + """ + series = self._get_series_with_nans(seed=1) + actual = hdatafr.apply_nan_mode(series, mode="fill_with_zero") + actual_string = hpandas.df_to_str(actual, num_rows=None) + self.check_string(actual_string) + + # Smoke test for empty input. + def test6(self) -> None: + series = pd.Series(dtype="float64") + hdatafr.apply_nan_mode(series) + + +# ############################################################################# +# Test_compute_points_per_year_for_given_freq +# ############################################################################# + + +class Test_compute_points_per_year_for_given_freq(hunitest.TestCase): + def test1(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("T") + np.testing.assert_equal(actual, 525780.125) + + def test2(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("B") + np.testing.assert_equal(actual, 260.875) + + def test3(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("D") + np.testing.assert_equal(actual, 365.25) + + def test4(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("W") + np.testing.assert_equal(actual, 52.25) + + def test5(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("M") + np.testing.assert_equal(actual, 12.0) + + def test6(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("Y") + np.testing.assert_equal(actual, 1.0) + + def test7(self) -> None: + actual = hdatafr.compute_points_per_year_for_given_freq("0D") + np.testing.assert_equal(actual, 0.0) + + +# ############################################################################# +# TestRemoveDuplicates +# ############################################################################# + + +class TestRemoveDuplicates(hunitest.TestCase): + def test_remove_duplicates1(self) -> None: + test_data = { + "dummy_value_1": [1, 2, 1], + "dummy_value_2": ["A", "A", "A"], + "knowledge_timestamp": [3, 2, 1], + "end_download_timestamp": [3, 2, 1], + } + df = pd.DataFrame(data=test_data) + duplicate_columns = ["dummy_value_1", "dummy_value_2"] + control_column = None + actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) + actual = hpandas.df_to_str(actual) + expected = r""" + dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp + 0 1 A 3 3 + 1 2 A 2 2""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_remove_duplicates2(self) -> None: + test_data = { + "dummy_value_1": [1, 2, 1], + "dummy_value_2": ["A", "A", "A"], + "knowledge_timestamp": [3, 2, 1], + "end_download_timestamp": [3, 2, 1], + } + df = pd.DataFrame(data=test_data) + duplicate_columns = None + control_column = "knowledge_timestamp" + actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) + actual = hpandas.df_to_str(actual) + expected = r""" + dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp + 0 1 A 3 3 + 1 2 A 2 2 + 2 1 A 1 1""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_remove_duplicates3(self) -> None: + test_data = { + "dummy_value_1": [1, 2, 1], + "dummy_value_2": ["A", "A", "A"], + "knowledge_timestamp": [3, 2, 1], + "end_download_timestamp": [3, 2, 1], + } + df = pd.DataFrame(data=test_data) + duplicate_columns = ["dummy_value_1", "dummy_value_2"] + control_column = "knowledge_timestamp" + actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) + actual = hpandas.df_to_str(actual) + expected = r""" + dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp + 1 2 A 2 2 + 2 1 A 1 1""" + self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py new file mode 100644 index 000000000..fac073570 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py @@ -0,0 +1,932 @@ +import datetime +import logging + +import pandas as pd +import pytz + +import helpers.hdatetime as hdateti +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + +_STR_TS_NAIVE = "2021-01-04 09:30:00" +_STR_TS_UTC = "2021-01-04 09:30:00-00:00" +_STR_TS_ET = "2021-01-04 09:30:00-05:00" + +_PD_TS_NAIVE = pd.Timestamp("2021-01-04 09:30:00") +_PD_TS_UTC = pd.Timestamp("2021-01-04 09:30:00-00:00", tz="UTC") +_PD_TS_ET = pd.Timestamp("2021-01-04 09:30:00-05:00", tz="America/New_York") + +_DT_DT_NAIVE = datetime.datetime(2021, 1, 4, 9, 30, 0) +_DT_DT_UTC = pytz.timezone("UTC").localize(_DT_DT_NAIVE) +_DT_DT_ET = pytz.timezone("America/New_York").localize(_DT_DT_NAIVE) + + +# ############################################################################# +# Test_dassert_is_datetime1 +# ############################################################################# + + +class Test_dassert_is_datetime1(hunitest.TestCase): + def test_is_datetime1(self) -> None: + """ + Test valid datetime objects. + """ + objs = [ + _STR_TS_NAIVE, + _STR_TS_UTC, + _STR_TS_ET, + _PD_TS_NAIVE, + _PD_TS_UTC, + _PD_TS_ET, + _DT_DT_NAIVE, + _DT_DT_UTC, + _DT_DT_ET, + ] + for obj in objs: + _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) + hdateti.dassert_is_datetime(obj) + + def test_is_datetime_fail1(self) -> None: + """ + Test invalid datetime objects. + """ + objs = [0, 0.0] + for obj in objs: + _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) + with self.assertRaises(AssertionError): + hdateti.dassert_is_datetime(obj) + + def test_is_strict_datetime1(self) -> None: + """ + Test valid datetime objects. + """ + objs = [ + _PD_TS_NAIVE, + _PD_TS_UTC, + _PD_TS_ET, + _DT_DT_NAIVE, + _DT_DT_UTC, + _DT_DT_ET, + ] + for obj in objs: + _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) + hdateti.dassert_is_strict_datetime(obj) + + def test_is_strict_datetime_fail1(self) -> None: + """ + Test invalid datetime objects. + """ + objs = [0, _STR_TS_NAIVE, _STR_TS_UTC, _STR_TS_ET, "hello"] + for obj in objs: + _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) + with self.assertRaises(AssertionError): + hdateti.dassert_is_strict_datetime(obj) + + +# ############################################################################# +# Test_dassert_tz1 +# ############################################################################# + + +class Test_dassert_tz1(hunitest.TestCase): + def test_datetime_conversions(self) -> None: + # Get a tz-naive datetime. + dt = datetime.datetime(2020, 1, 5, 9, 30, 0) + hdateti.dassert_is_tz_naive(dt) + # Localize it to UTC. + dt_utc = pytz.timezone("UTC").localize(dt) + hdateti.dassert_has_tz(dt_utc) + hdateti.dassert_has_UTC_tz(dt_utc) + # Convert to ET. + dt_et = dt_utc.astimezone(pytz.timezone("US/Eastern")) + hdateti.dassert_has_tz(dt_et) + hdateti.dassert_has_ET_tz(dt_et) + # Convert it back to UTC. + dt_utc2 = dt_et.astimezone(pytz.timezone("UTC")) + hdateti.dassert_has_tz(dt_utc2) + hdateti.dassert_has_UTC_tz(dt_utc2) + self.assertEqual(dt_utc, dt_utc2) + # Make it naive. + dt2 = dt_utc2.replace(tzinfo=None) + hdateti.dassert_is_tz_naive(dt2) + self.assertEqual(dt, dt2) + + def test_dassert_is_datetime1(self) -> None: + for obj in [ + _STR_TS_NAIVE, + _STR_TS_UTC, + _STR_TS_ET, + _PD_TS_NAIVE, + _PD_TS_UTC, + _PD_TS_ET, + _DT_DT_NAIVE, + _DT_DT_UTC, + _DT_DT_ET, + ]: + hdateti.dassert_is_datetime(obj) + + def test_dassert_is_datetime_assert1(self) -> None: + datetime_ = 5 + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_is_datetime(datetime_) + actual = str(cm.exception) + # pylint: disable=line-too-long + expected = r""" + * Failed assertion * + Instance of '5' is '' instead of '(, , )' + datetime_='5' of type '' is not a DateTimeType + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_to_datetime1(self) -> None: + """ + Apply `to_datetime` to a naive datetime. + """ + for obj in [ + _STR_TS_NAIVE, + _PD_TS_NAIVE, + _DT_DT_NAIVE, + ]: + _LOG.debug("obj='%s' type='%s'", obj, type(obj)) + actual = hdateti.to_datetime(obj) + expected = _DT_DT_NAIVE + self.assertEqual(actual, expected) + # Check the tz info. + hdateti.dassert_is_tz_naive(actual) + with self.assertRaises(AssertionError): + hdateti.dassert_has_tz(actual) + hdateti.dassert_has_UTC_tz(actual) + hdateti.dassert_has_ET_tz(actual) + + def test_to_datetime2(self) -> None: + """ + Apply `to_datetime` to a UTC datetime. + """ + for obj in [ + _STR_TS_UTC, + _PD_TS_UTC, + _DT_DT_UTC, + ]: + _LOG.debug("obj='%s' type='%s'", obj, type(obj)) + actual = hdateti.to_datetime(obj) + expected = _DT_DT_UTC + self.assertEqual(actual, expected) + # Check the tz info. + hdateti.dassert_has_tz(actual) + hdateti.dassert_has_UTC_tz(actual) + with self.assertRaises(AssertionError): + hdateti.dassert_is_tz_naive(actual) + hdateti.dassert_has_ET_tz(actual) + + def test_to_datetime3(self) -> None: + """ + Apply `to_datetime` to an ET datetime. + """ + for obj in [ + _STR_TS_ET, + _PD_TS_ET, + _DT_DT_ET, + ]: + _LOG.debug("obj='%s' type='%s'", obj, type(obj)) + actual = hdateti.to_datetime(obj) + expected = _DT_DT_ET + self.assertEqual(str(actual), str(expected)) + + +# ############################################################################# +# Test_dassert_tz_compatible1 +# ############################################################################# + + +class Test_dassert_tz_compatible1(hunitest.TestCase): + def test_dassert_compatible_timestamp1(self) -> None: + """ + Both datetimes are naive. + """ + for datetime1 in [_PD_TS_NAIVE, _DT_DT_NAIVE]: + for datetime2 in [_PD_TS_NAIVE, _DT_DT_NAIVE]: + hdateti.dassert_tz_compatible(datetime1, datetime2) + + def test_dassert_compatible_timestamp2(self) -> None: + """ + Both datetimes have tz info. + """ + for datetime1 in [_PD_TS_UTC, _PD_TS_ET]: + for datetime2 in [_DT_DT_UTC, _DT_DT_ET]: + hdateti.dassert_tz_compatible(datetime1, datetime2) + + def test_dassert_compatible_timestamp_assert1(self) -> None: + """ + Test a single not compatible pair of datetimes and check the raised + exception. + """ + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_tz_compatible(_PD_TS_NAIVE, _DT_DT_UTC) + actual = str(cm.exception) + # pylint: disable=line-too-long + expected = """ + * Failed assertion * + 'False' + == + 'True' + datetime1='2021-01-04 09:30:00' and datetime2='2021-01-04 09:30:00+00:00' are not compatible + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_dassert_compatible_timestamp_assert2(self) -> None: + """ + Test a pairs of non-compatible datetimes making sure the assertion is + raised. + """ + for datetime1 in [ + _PD_TS_NAIVE, + _DT_DT_NAIVE, + _PD_TS_NAIVE, + _DT_DT_NAIVE, + ]: + for datetime2 in [_PD_TS_UTC, _PD_TS_ET, _DT_DT_UTC, _DT_DT_ET]: + with self.assertRaises(AssertionError): + hdateti.dassert_tz_compatible(datetime1, datetime2) + + +# ############################################################################# +# Test_dassert_have_same_tz1 +# ############################################################################# + + +class Test_dassert_have_same_tz1(hunitest.TestCase): + """ + Test an assertion that checks that timezones are equal for input + timestamps. + """ + + def test1(self) -> None: + """ + Timezones are equal. + """ + hdateti.dassert_have_same_tz(_DT_DT_ET, _PD_TS_ET) + + def test2(self) -> None: + """ + Both timestamps are tz-naive. + """ + hdateti.dassert_have_same_tz(_PD_TS_NAIVE, _DT_DT_NAIVE) + + def test3(self) -> None: + """ + Different timezones. + """ + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_have_same_tz(_DT_DT_ET, _DT_DT_UTC) + actual = str(cm.exception) + # pylint: disable=line-too-long + expected = """ + * Failed assertion * + 'America/New_York' + == + 'UTC' + datetime1=2021-01-04 09:30:00-05:00 (datetime1.tzinfo=America/New_York) datetime2=2021-01-04 09:30:00+00:00 (datetime2.tzinfo=UTC) + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Same timezone but different DST mode (i.e. EST vs EDT). + """ + ts_est = pd.Timestamp("2023-03-12 01:55:00-05:00", tz="America/New_York") + ts_edt = pd.Timestamp("2023-03-12 03:00:00-04:00", tz="America/New_York") + hdateti.dassert_have_same_tz(ts_est, ts_edt) + + +# ############################################################################# +# Test_get_current_time1 +# ############################################################################# + + +class Test_get_current_time1(hunitest.TestCase): + def test_get_current_time_UTC(self) -> None: + tz = "UTC" + dt = hdateti.get_current_time(tz) + _LOG.debug("tz=%s -> dt=%s", tz, dt) + hdateti.dassert_has_UTC_tz(dt) + + def test_get_current_time_ET(self) -> None: + tz = "ET" + dt = hdateti.get_current_time(tz) + _LOG.debug("tz=%s -> dt=%s", tz, dt) + hdateti.dassert_has_ET_tz(dt) + + def test_get_current_time_naive_UTC(self) -> None: + tz = "naive_UTC" + dt = hdateti.get_current_time(tz) + _LOG.debug("tz=%s -> dt=%s", tz, dt) + hdateti.dassert_is_tz_naive(dt) + + def test_get_current_time_naive_ET(self) -> None: + tz = "naive_ET" + dt = hdateti.get_current_time(tz) + _LOG.debug("tz=%s -> dt=%s", tz, dt) + hdateti.dassert_is_tz_naive(dt) + + +# ############################################################################# +# Test_to_generalized_datetime +# ############################################################################# + + +class Test_to_generalized_datetime(hunitest.TestCase): + def test_srs1(self) -> None: + srs = pd.Series(["2010-01-01", "2010-01-02"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_index1(self) -> None: + idx = pd.Index(["2010-01-01", "2010-01-02"]) + actual = hdateti.to_generalized_datetime(idx) + expected = pd.Index( + [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] + ) + pd.testing.assert_index_equal(actual, expected) + + def test_daily1(self) -> None: + srs = pd.Series(["1 Jan 2010", "2 Jan 2010"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_weekly1(self) -> None: + srs = pd.Series(["2021-W14", "2021-W15"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-04-10"), pd.Timestamp("2021-04-17")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_semiannual1(self) -> None: + srs = pd.Series(["2021-S1", "2021-S2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-06-30"), pd.Timestamp("2021-12-31")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_semiannual2(self) -> None: + srs = pd.Series(["2021/S1", "2021/S2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-06-30"), pd.Timestamp("2021-12-31")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_bimonthly1(self) -> None: + srs = pd.Series(["2021-B1", "2021-B2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-01-01"), pd.Timestamp("2021-03-01")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly1(self) -> None: + srs = pd.Series(["2020-M1", "2020-M2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly2(self) -> None: + srs = pd.Series(["2020M01", "2020M02"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly3(self) -> None: + srs = pd.Series(["2020-01", "2020-02"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly4(self) -> None: + srs = pd.Series(["2020 Jan", "2020 Feb"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_monthly5(self) -> None: + srs = pd.Series(["January 2020", "February 2020"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_quarterly1(self) -> None: + srs = pd.Series(["2020-Q1", "2020-Q2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_quarterly2(self) -> None: + srs = pd.Series(["2020Q1", "2020Q2"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_quarterly3(self) -> None: + srs = pd.Series(["Q1 2020", "Q2 2020"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] + ) + pd.testing.assert_series_equal(actual, expected) + + def test_annual1(self) -> None: + srs = pd.Series(["2021", "2022"]) + actual = hdateti.to_generalized_datetime(srs) + expected = pd.Series( + [pd.Timestamp("2021-12-31"), pd.Timestamp("2022-12-31")] + ) + pd.testing.assert_series_equal(actual, expected) + + +# ############################################################################# +# Test_find_bar_timestamp1 +# ############################################################################# + + +class Test_find_bar_timestamp1(hunitest.TestCase): + """ + Use mode="round". + """ + + def helper1(self, current_timestamp: pd.Timestamp) -> None: + bar_duration_in_secs = 15 * 60 + max_distance_in_secs = 10 + actual = hdateti.find_bar_timestamp( + current_timestamp, + bar_duration_in_secs, + max_distance_in_secs=max_distance_in_secs, + ) + expected = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test1(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") + self.helper1(current_timestamp) + + def test2(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T08:00:05", tz="UTC") + self.helper1(current_timestamp) + + def test3(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T07:59:55", tz="UTC") + self.helper1(current_timestamp) + + def test4(self) -> None: + current_timestamp = pd.Timestamp( + "2021-09-09 08:01:59.500000+0000", tz="UTC" + ) + bar_duration_in_secs = 1 + # + actual = hdateti.find_bar_timestamp( + current_timestamp, bar_duration_in_secs, mode="round" + ) + expected = pd.Timestamp("2021-09-09T08:02:00+0000", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + # /////////////////////////////////////////////////////////////////////////// + + def test5(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T07:59:20", tz="UTC") + with self.assertRaises(AssertionError) as cm: + self.helper1(current_timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 40 <= 10 + current_timestamp=2021-09-09 07:59:20+00:00 is too distant from bar_timestamp=2021-09-09 08:00:00+00:00 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test6(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T08:10:20", tz="UTC") + with self.assertRaises(AssertionError) as cm: + self.helper1(current_timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 280 <= 10 + current_timestamp=2021-09-09 08:10:20+00:00 is too distant from bar_timestamp=2021-09-09 08:15:00+00:00 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_find_bar_timestamp2 +# ############################################################################# + + +class Test_find_bar_timestamp2(hunitest.TestCase): + """ + Use mode="floor". + """ + + def test1(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T07:59:55", tz="UTC") + bar_duration_in_secs = 15 * 60 + # + actual = hdateti.find_bar_timestamp( + current_timestamp, bar_duration_in_secs, mode="floor" + ) + expected = pd.Timestamp("2021-09-09T07:45:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test2(self) -> None: + current_timestamp = pd.Timestamp("2021-09-09T08:01:55", tz="UTC") + bar_duration_in_secs = 15 * 60 + # + actual = hdateti.find_bar_timestamp( + current_timestamp, bar_duration_in_secs, mode="floor" + ) + expected = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test3(self) -> None: + current_timestamp = pd.Timestamp( + "2021-09-09 08:01:59.500000+0000", tz="UTC" + ) + bar_duration_in_secs = 1 + # + actual = hdateti.find_bar_timestamp( + current_timestamp, bar_duration_in_secs, mode="floor" + ) + expected = pd.Timestamp("2021-09-09T08:01:59+0000", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# +# Test_convert_seconds_to_minutes +# ############################################################################# + + +class Test_convert_seconds_to_minutes(hunitest.TestCase): + def test1(self) -> None: + """ + Check that conversion is implemented correcty. + """ + num_secs = 300 + actual = hdateti.convert_seconds_to_minutes(num_secs) + expected = int(num_secs / 60) + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + Check that an error is raised when input is not an integer number of + minutes. + """ + num_secs = 10 + with self.assertRaises(AssertionError) as cm: + hdateti.convert_seconds_to_minutes(num_secs) + actual = str(cm.exception) + expected = """ + * Failed assertion * + '10' + == + '0' + num_secs=10 is not an integer number of minutes + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_convert_unix_epoch_to_timestamp +# ############################################################################# + + +class Test_convert_unix_epoch_to_timestamp(hunitest.TestCase): + def test1(self) -> None: + """ + Test with default parameter values. + """ + epoch = 1631145600000 + actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch) + expected = pd.Timestamp("2021-09-09T00:00:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test2(self) -> None: + """ + Test with specified unit. + """ + epoch = 1631145600 + unit = "s" + actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch, unit=unit) + expected = pd.Timestamp("2021-09-09T00:00:00", tz="UTC") + self.assert_equal(str(actual), str(expected)) + + def test3(self) -> None: + """ + Test with specified timezone. + """ + epoch = 1631145600000 + tz = "US/Pacific" + actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch, tz=tz) + expected = pd.Timestamp("2021-09-08T17:00:00", tz="US/Pacific") + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# +# Test_convert_timestamp_to_unix_epoch +# ############################################################################# + + +class Test_convert_timestamp_to_unix_epoch(hunitest.TestCase): + def test1(self) -> None: + """ + Test with default parameter values. + """ + timestamp = pd.Timestamp("2021-09-09") + actual = hdateti.convert_timestamp_to_unix_epoch(timestamp=timestamp) + expected = 1631145600000 + self.assert_equal(str(actual), str(expected)) + + def test2(self) -> None: + """ + Test with specified unit. + """ + timestamp = pd.Timestamp("2021-09-09") + unit = "s" + actual = hdateti.convert_timestamp_to_unix_epoch( + timestamp=timestamp, unit=unit + ) + expected = 1631145600 + self.assert_equal(str(actual), str(expected)) + + def test3(self) -> None: + """ + Test for a timestamp with specified timezone. + """ + timestamp = pd.Timestamp("2021-09-08T17:00:00", tz="US/Pacific") + actual = hdateti.convert_timestamp_to_unix_epoch(timestamp=timestamp) + expected = 1631145600000 + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# +# Test_str_to_timestamp1 +# ############################################################################# + + +class Test_str_to_timestamp1(hunitest.TestCase): + """ + Test if string representation of datetime is converted correctly. + """ + + def test1(self) -> None: + """ + - `datetime_str` has a valid format + - `datetime_format` has a valid pattern for `datetime_str` + """ + datetime_str = "20230728_150513" + timezone_info = "US/Eastern" + datetime_format = "%Y%m%d_%H%M%S" + actual = hdateti.str_to_timestamp( + datetime_str, timezone_info, datetime_format=datetime_format + ) + expected = pd.Timestamp("2023-07-28 15:05:13-0400", tz="US/Eastern") + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + - `datetime_str` has a valid format + - `datetime_format` has an valid pattern for `datetime_str` + - `timezone_info` is UTC + """ + datetime_str = "20230728_150513" + timezone_info = "UTC" + format = "%Y%m%d_%H%M%S" + actual = hdateti.str_to_timestamp( + datetime_str, timezone_info, datetime_format=format + ) + expected = pd.Timestamp("2023-07-28 15:05:13+0000", tz="UTC") + self.assertEqual(actual, expected) + + def test3(self) -> None: + """ + - `datetime_str` has a valid format + - `datetime_format` has an invalid pattern for `datetime_str` + """ + datetime_str = "28-07-2023 15:05:13" + timezone_info = "US/Eastern" + datetime_format = "%Y%m%d_%H%M%S" + # The datetime format does not match the string representation of datetime. + with self.assertRaises(ValueError) as err: + hdateti.str_to_timestamp( + datetime_str, timezone_info, datetime_format=datetime_format + ) + actual = str(err.exception) + self.check_string(actual) + + def test4(self) -> None: + """ + - `datetime_str` has an invalid format + - `datetime_format` is not defined + """ + datetime_str = "qwe28abc07-201234" + timezone_info = "US/Eastern" + # Invalid datetime, should raise a ValueError. + with self.assertRaises(ValueError) as err: + hdateti.str_to_timestamp(datetime_str, timezone_info) + actual = str(err.exception) + self.check_string(actual) + + +# ############################################################################# +# Test_dassert_str_is_date +# ############################################################################# + + +class Test_dassert_str_is_date(hunitest.TestCase): + """ + Test that the function checks a string representation of date correctly. + """ + + def test1(self) -> None: + """ + - date has a valid format + """ + date_str = "20221101" + hdateti.dassert_str_is_date(date_str) + + def test2(self) -> None: + """ + - date has an invalid format + """ + date = "2022-11-01" + with self.assertRaises(ValueError) as err: + hdateti.dassert_str_is_date(date) + actual = str(err.exception) + self.check_string(actual) + + +# ############################################################################# +# Test_dassert_is_valid_timestamp +# ############################################################################# + + +class Test_dassert_is_valid_timestamp(hunitest.TestCase): + def test1(self) -> None: + """ + Test should not raise an exception when timestamp has a timezone. + """ + timestamp = pd.Timestamp( + "2021-01-04 09:30:00-05:00", tz="America/New_York" + ) + hdateti.dassert_is_valid_timestamp(timestamp) + + def test2(self) -> None: + """ + Test should raise an exception when timestamp is without timezone info. + """ + # Set inputs. + timestamp = pd.Timestamp("2021-01-04 09:30:00") + # Run. + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_is_valid_timestamp(timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 'None' is not 'None' + datetime_='2021-01-04 09:30:00' doesn't have timezone info + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Test should not raise an exception when timestamp is none. + """ + timestamp = None + hdateti.dassert_is_valid_timestamp(timestamp) + + def test4(self) -> None: + """ + Test should raise an exception when timestamp is of type string. + """ + # Set input. + timestamp = "2021-01-04 09:30:00" + # Run. + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_is_valid_timestamp(timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + Instance of '2021-01-04 09:30:00' is '' instead of '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_timestamp_lt +# ############################################################################# + + +class Test_dassert_timestamp_lt(hunitest.TestCase): + def test1(self) -> None: + """ + Test with valid timestamps where start is less than end. + """ + start_timestamp = pd.Timestamp("2021-01-02 09:30:00-00:00", tz="UTC") + end_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + + def test2(self) -> None: + """ + Test with equal timestamps, this is should raise an exception. + """ + # Set inputs. + start_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") + end_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") + # Run. + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 2021-02-02 09:30:00+00:00 < 2021-02-02 09:30:00+00:00 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Test with start timestamp greater than end timestamp, this is should + raise an exception. + """ + # Set inputs. + start_timestamp = pd.Timestamp( + "2021-02-04 09:30:00-05:00", tz="America/New_York" + ) + end_timestamp = pd.Timestamp( + "2021-01-04 09:30:00-05:00", tz="America/New_York" + ) + # Run. + with self.assertRaises(AssertionError) as cm: + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 2021-02-04 09:30:00-05:00 < 2021-01-04 09:30:00-05:00 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Test with start timestamp as None. + """ + start_timestamp = None + end_timestamp = pd.Timestamp( + "2021-01-04 09:30:00-05:00", tz="America/New_York" + ) + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + + def test5(self) -> None: + """ + Test with end timestamp as None. + """ + start_timestamp = pd.Timestamp( + "2021-01-04 09:30:00-05:00", tz="America/New_York" + ) + end_timestamp = None + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) + + def test6(self) -> None: + """ + Test with both timestamps as None. + """ + start_timestamp = None + end_timestamp = None + hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py new file mode 100644 index 000000000..9dd38d00e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py @@ -0,0 +1,934 @@ +import collections +import logging +from typing import List, Tuple + +import helpers.hdbg as hdbg +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# TODO(gp): Make sure the coverage is 100%. + +# ############################################################################# + + +# ############################################################################# +# Test_dassert1 +# ############################################################################# + + +# TODO(gp): Use a self.assert_equal() instead of a check_string() since this +# code needs to be stable. +class Test_dassert1(hunitest.TestCase): + """ + Test `dassert()`. + """ + + def test1(self) -> None: + """ + An assertion that is verified. + """ + hdbg.dassert(True) + + def test2(self) -> None: + """ + An assertion that is not verified. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False) + self.check_string(str(cm.exception)) + + def test3(self) -> None: + """ + An assertion with a message. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False, msg="hello") + self.check_string(str(cm.exception)) + + def test4(self) -> None: + """ + An assertion with a message to format. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False, "hello %s", "world") + self.check_string(str(cm.exception)) + + def test5(self) -> None: + """ + Too many parameters. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False, "hello %s", "world", "too_many") + self.check_string(str(cm.exception)) + + def test6(self) -> None: + """ + Not enough parameters. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert(False, "hello %s") + self.check_string(str(cm.exception)) + + def test7(self) -> None: + """ + Common error of calling `dassert()` instead of `dassert_eq()`. + + According to the user's intention the assertion should trigger, + but, because of using `dassert()` instead of `dassert_eq()`, the + assertion will not trigger. We notice that the user passed a + list instead of a string as `msg` and raise. + """ + with self.assertRaises(AssertionError) as cm: + y = ["world"] + hdbg.dassert(y, ["hello"]) + self.check_string(str(cm.exception)) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_eq1 +# ############################################################################# + + +class Test_dassert_eq1(hunitest.TestCase): + def test1(self) -> None: + hdbg.dassert_eq(1, 1) + + def test2(self) -> None: + hdbg.dassert_eq(1, 1, msg="hello world") + + def test3(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_eq(1, 2, msg="hello world") + self.check_string(str(cm.exception)) + + def test4(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_eq(1, 2, "hello %s", "world") + self.check_string(str(cm.exception)) + + def test5(self) -> None: + """ + Raise assertion with incorrect message. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_eq(1, 2, "hello %s") + self.check_string(str(cm.exception)) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_misc1 +# ############################################################################# + + +# TODO(gp): Break it in piece. +class Test_dassert_misc1(hunitest.TestCase): + # dassert_in + + def test_in1(self) -> None: + hdbg.dassert_in("a", "abc") + + def test_in2(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_in("a", "xyz".split()) + self.check_string(str(cm.exception)) + + # dassert_is + + def test_is1(self) -> None: + a = None + hdbg.dassert_is(a, None) + + def test_is2(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is("a", None) + self.check_string(str(cm.exception)) + + # dassert_isinstance + + def test_is_instance1(self) -> None: + hdbg.dassert_isinstance("a", str) + + def test_is_instance2(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_isinstance("a", int) + self.check_string(str(cm.exception)) + + def test_is_instance3(self) -> None: + hdbg.dassert_isinstance("a", (str, int)) + + def test_is_instance4(self) -> None: + hdbg.dassert_isinstance(5.0, (float, int)) + + def test_is_instance5(self) -> None: + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_isinstance("a", (float, int)) + # TODO(gp): Replace all check_string with assert_equal + self.check_string(str(cm.exception)) + + # dassert_set_eq + + def test_set_eq1(self) -> None: + a = [1, 2, 3] + b = [2, 3, 1] + hdbg.dassert_set_eq(a, b) + + def test_set_eq2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 3] + b = [2, 2, 1] + hdbg.dassert_set_eq(a, b) + # Check. + actual = str(cm.exception) + expected = """ + * Failed assertion * + val1 - val2=[3] + val2 - val1=[] + val1=[1, 2, 3] + set eq + val2=[1, 2] + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + # dassert_is_subset + + def test_is_subset1(self) -> None: + a = [1, 2] + b = [2, 1, 3] + hdbg.dassert_is_subset(a, b) + + def test_is_subset2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 3] + b = [4, 2, 1] + hdbg.dassert_is_subset(a, b) + # Check. + actual = str(cm.exception) + expected = """ + * Failed assertion * + val1=[1, 2, 3] + issubset + val2=[1, 2, 4] + val1 - val2=[3] + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + # dassert_not_intersection + + def test_not_intersection1(self) -> None: + a = [1, 2, 3] + b = [4, 5] + hdbg.dassert_not_intersection(a, b) + + def test_not_intersection2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 3] + b = [4, 2, 1] + hdbg.dassert_not_intersection(a, b) + actual = str(cm.exception) + expected = """ + * Failed assertion * + val1=[1, 2, 3] + has no intersection + val2=[1, 2, 4] + val1.intersection(val2)=[1, 2] + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + # dassert_no_duplicates + + def test_no_duplicates1(self) -> None: + a = [1, 2, 3] + hdbg.dassert_no_duplicates(a) + + def test_no_duplicates2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 3, 3] + hdbg.dassert_no_duplicates(a) + self.check_string(str(cm.exception)) + + # dassert_is_sorted + + def test_is_sorted1(self) -> None: + a = [1, 2, 3] + hdbg.dassert_is_sorted(a) + + def test_is_sorted2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 4, 3] + hdbg.dassert_is_sorted(a) + self.check_string(str(cm.exception)) + + def test_is_sorted3(self) -> None: + """ + Test an array that is sorted descending. + """ + a = [3, 2, 2] + hdbg.dassert_is_sorted(a, sort_kwargs={"reverse": True}) + + def test_is_sorted4(self) -> None: + """ + Test an array that is not sorted descending. + """ + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 4, 3] + sort_kwargs = {"reverse": True} + hdbg.dassert_is_sorted(a, sort_kwargs=sort_kwargs) + self.check_string(str(cm.exception)) + + # dassert_eq_all + + def test_eq_all1(self) -> None: + a = [1, 2, 3] + b = [1, 2, 3] + hdbg.dassert_eq_all(a, b) + + def test_eq_all2(self) -> None: + with self.assertRaises(AssertionError) as cm: + a = [1, 2, 3] + b = [1, 2, 4] + hdbg.dassert_eq_all(a, b) + self.check_string(str(cm.exception)) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_lgt1 +# ############################################################################# + + +class Test_dassert_lgt1(hunitest.TestCase): + def test1(self) -> None: + """ + No assertion raised since `0 <= 0 <= 3`. + """ + hdbg.dassert_lgt( + 0, 0, 3, lower_bound_closed=True, upper_bound_closed=True + ) + + def test2(self) -> None: + """ + Raise assertion since it is not true that `0 < 0 <= 3`. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_lgt( + 0, 0, 3, lower_bound_closed=False, upper_bound_closed=True + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 0 < 0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Raise assertion since it is not true that `0 < 100 <= 3`. + + The formatting of the assertion is correct. + """ + with self.assertRaises(AssertionError) as cm: + lower_bound_closed = False + upper_bound_closed = True + hdbg.dassert_lgt( + 0, + 100, + 3, + lower_bound_closed, + upper_bound_closed, + "hello %s", + "world", + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 100 <= 3 + hello world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_is_proportion1 +# ############################################################################# + + +class Test_dassert_is_proportion1(hunitest.TestCase): + def test1(self) -> None: + """ + Passing assertion with correct message and format. + """ + hdbg.dassert_is_proportion(0.1, "hello %s", "world") + + def test2(self) -> None: + """ + Passing assertion with correct message and format. + """ + hdbg.dassert_is_proportion(0.0, "hello %s", "world") + + def test3(self) -> None: + """ + Passing assertion with correct message and format. + """ + hdbg.dassert_is_proportion(1.0, "hello %s", "world") + + def test_assert1(self) -> None: + """ + Failing assertion with correct message and format. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_proportion(1.01, "hello %s", "world") + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 1.01 <= 1 + hello world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert2(self) -> None: + """ + Failing assertion with correct message. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_proportion(1.01, "hello world") + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 1.01 <= 1 + hello world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert3(self) -> None: + """ + Failing assertion with incorrect message formatting. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_proportion(1.01, "hello", "world") + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 1.01 <= 1 + Caught assertion while formatting message: + 'not all arguments converted during string formatting' + hello world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert4(self) -> None: + """ + Failing assertion with incorrect message formatting. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_proportion(1.01, "hello %s %s", "world") + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 1.01 <= 1 + Caught assertion while formatting message: + 'not enough arguments for format string' + hello %s %s world + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_container_type1 +# ############################################################################# + + +class Test_dassert_container_type1(hunitest.TestCase): + def test1(self) -> None: + list_ = "a b c".split() + hdbg.dassert_container_type(list_, List, str) + + def test_assert1(self) -> None: + """ + Check that assertion fails since a list is not a tuple. + """ + list_ = "a b c".split() + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_container_type(list_, Tuple, str) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '['a', 'b', 'c']' is '' instead of 'typing.Tuple' + obj='['a', 'b', 'c']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert2(self) -> None: + """ + Check that assertion fails since a list contains strings and ints. + """ + list_ = ["a", 2, "c", "d"] + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_container_type(list_, list, str) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '2' is '' instead of '' + obj='['a', 2, 'c', 'd']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert3(self) -> None: + """ + Like `test_assert3()` but with a message. + """ + list_ = ["a", 2, "c", "d"] + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_container_type( + list_, list, str, "list_ is %s homogeneous", "not" + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '2' is '' instead of '' + list_ is not homogeneous + obj='['a', 2, 'c', 'd']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# _Animal +# ############################################################################# + + +class _Animal: + pass + + +# ############################################################################# +# _Man +# ############################################################################# + + +class _Man(_Animal): + pass + + +# ############################################################################# +# _Vegetable +# ############################################################################# + + +class _Vegetable: + pass + + +# ############################################################################# +# Test_dassert_issubclass1 +# ############################################################################# + + +class Test_dassert_issubclass1(hunitest.TestCase): + def test_man1(self) -> None: + """ + An instance of `_Man` descends from `_Animal`. + """ + man = _Man() + hdbg.dassert_issubclass(man, _Man) + + def test_man2(self) -> None: + """ + An instance of `_Man` descends from object. + """ + man = _Man() + hdbg.dassert_issubclass(man, object) + + def test_man_fail1(self) -> None: + """ + An instance of `_Man` doesn't descends from `_Vegetable`. + """ + man = _Man() + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_issubclass(man, _Vegetable) + # We need to purify from object references. + self.check_string(str(cm.exception), purify_text=True) + + def test_man_fail2(self) -> None: + """ + An instance of `_Man` doesn't descends from `int`. + """ + man = _Man() + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_issubclass(man, int) + self.check_string(str(cm.exception), purify_text=True) + + def test1(self) -> None: + """ + In Python everything is an object. + """ + hdbg.dassert_issubclass(5, object) + hdbg.dassert_issubclass(int, object) + hdbg.dassert_issubclass(int, (object, int)) + + def test_fail1(self) -> None: + """ + `issubclass` only accepts classes and not instances as second argument. + """ + with self.assertRaises(Exception) as cm: + hdbg.dassert_issubclass(int, 5.0) + self.check_string(str(cm.exception), purify_text=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_callable1 +# ############################################################################# + + +class Test_dassert_callable1(hunitest.TestCase): + def test1(self) -> None: + func = lambda x: x + hdbg.dassert_callable(func) + + def test_fail1(self) -> None: + func = 4 + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_callable(func) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Obj '4' of type '' is not callable + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_all_defined_or_all_None +# ############################################################################# + + +class Test_dassert_all_defined_or_all_None(hunitest.TestCase): + def test1(self) -> None: + """ + Verify that test passes when all the values are defined. + """ + vals = [1, 2, 3] + hdbg.dassert_all_defined_or_all_None(vals) + + def test2(self) -> None: + """ + Verify that assertion is raised when at least one of the values is not + defined. + """ + vals = [1, 2, None, None] + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_all_defined_or_all_None(vals) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Some values in list are defined and some are None: '[1, 2, None, None]' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Verify that test passes when all the values are not defined. + """ + vals = [None, None, None] + hdbg.dassert_all_defined_or_all_None(vals) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_related_params1 +# ############################################################################# + + +class Test_dassert_related_params1(hunitest.TestCase): + def test1(self) -> None: + obj = {"val1": 1, "val2": 1, "val3": "hello"} + mode = "all_or_none_non_null" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + + def test2(self) -> None: + obj = {"val1": 0, "val2": None, "val3": ""} + mode = "all_or_none_non_null" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + + def test3(self) -> None: + obj = {"val1": 1, "val2": 0, "val3": "hello"} + with self.assertRaises(Exception) as cm: + mode = "all_or_none_non_null" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + actual = str(cm.exception) + expected = """ + * Failed assertion * + All or none parameter should be non-null: + val2=0 + params={'val1': 1, 'val2': 0, 'val3': 'hello'} + message 'hello world' + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_related_params2 +# ############################################################################# + + +class Test_dassert_related_params2(hunitest.TestCase): + def test1(self) -> None: + obj = {"val1": 1, "val2": 1, "val3": "hello"} + mode = "all_or_none_non_None" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + + def test2(self) -> None: + obj = { + "val1": None, + "val2": None, + "val3": None, + } + mode = "all_or_none_non_None" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + + def test3(self) -> None: + obj = {"val1": None, "val2": None, "val3": "hello"} + with self.assertRaises(Exception) as cm: + mode = "all_or_none_non_None" + hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") + actual = str(cm.exception) + expected = """ + * Failed assertion * + All or none parameter should be non-None: + val1=None + params={'val1': None, 'val2': None, 'val3': 'hello'} + message 'hello world' + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_all_attributes_are_same1 +# ############################################################################# + + +class Test_dassert_all_attributes_are_same1(hunitest.TestCase): + def test1(self) -> None: + """ + Wrong type of object. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_all_attributes_are_same(5, "a") + actual = str(cm.exception) + expected = """ + * Failed assertion * + Instance of '5' is '' instead of '' + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test2(self) -> None: + """ + Wrong type of attribute. + """ + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_all_attributes_are_same([1, 2, 3], 1) + actual = str(cm.exception) + expected = """ + * Failed assertion * + Instance of '1' is '' instead of '' + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test3(self) -> None: + """ + Attribute with different values. + """ + Obj = collections.namedtuple("Obj", ["a", "b"]) + list_ = [Obj(1, 2), Obj(1, 3)] + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_all_attributes_are_same(list_, "b") + actual = str(cm.exception) + expected = """ + * Failed assertion * + Elements in the list have different values for + attribute b: + {2, 3} + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test4(self) -> None: + """ + Attribute with same values. + """ + Obj = collections.namedtuple("Obj", ["a", "b"]) + list_ = [Obj(1, 2), Obj(1, 2)] + hdbg.dassert_all_attributes_are_same(list_, "b") + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_lt +# ############################################################################# + + +class Test_dassert_lt(hunitest.TestCase): + def test1(self) -> None: + """ + Test that the function doesn't raise an exception if first value is + less than second value. + """ + val1 = 1 + val2 = 2 + hdbg.dassert_lt(val1, val2) + + def test2(self) -> None: + """ + Test that the function raises an exception if first value is equal to + second value. + """ + # Set inputs. + val1 = 2 + val2 = 2 + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_lt(val1, val2) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 2 < 2 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Test that the function raises an exception if first value is greater + than second value. + """ + # Set inputs. + val1 = 3 + val2 = 2 + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_lt(val1, val2) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 3 < 2 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Test that the function doesn't raise an exception when we pass string + inputs. + """ + val1 = "a" + val2 = "b" + hdbg.dassert_lt(val1, val2) + + def test5(self) -> None: + """ + Test that the function raises an exception where first value is greater + than second value with floats. + """ + # Set inputs. + val1 = 2.0 + val2 = 1.0 + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_lt(val1, val2) + actual = str(cm.exception) + expected = """ + * Failed assertion * + 2.0 < 1.0 + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_is_integer +# ############################################################################# + + +class Test_dassert_is_integer(hunitest.TestCase): + def test1(self) -> None: + """ + Test that the function do not raise the exception with integer values. + """ + val = 5 + hdbg.dassert_is_integer(val) + + def test2(self) -> None: + """ + Test that the function do not raise the exception with float values + that represent an integer. + """ + val = 5.0 + hdbg.dassert_is_integer(val) + + def test3(self) -> None: + """ + Test that the function raises an exception for float values that do not + represent an integer. + """ + # Set inputs. + val = 5.5 + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_integer(val) + actual = str(cm.exception) + expected = """ + * Failed assertion * + Invalid val='5.5' of type '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Test that the function raises an exception for non-integer and non- + float types. + """ + # Set inputs. + val = "5" + # Run. + with self.assertRaises(AssertionError) as cm: + hdbg.dassert_is_integer(val) + actual = str(cm.exception) + expected = """ + * Failed assertion * + Invalid val='5' of type '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py new file mode 100644 index 000000000..b3f6d7f04 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py @@ -0,0 +1,107 @@ +import logging + +import config_root.config as cconfig +import helpers.hdict as hdict +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_get_nested_dict_iterator +# ############################################################################# + + +class Test_get_nested_dict_iterator(hunitest.TestCase): + def test1(self) -> None: + """ + Test basic case with no nesting. + """ + dict_ = {"key0": "value0", "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0",), "value0"), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test2(self) -> None: + """ + Test simple nested case. + """ + dict_ = { + "key0": {"key00": "value00", "key01": "value01"}, + "key1": "value1", + } + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [ + (("key0", "key00"), "value00"), + (("key0", "key01"), "value01"), + (("key1",), "value1"), + ] + self.assertListEqual(actual_result, expected_result) + + def test3(self) -> None: + """ + Test multilevel nested case. + """ + dict_ = {"key0": {"key00": {"key000": "value000"}}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [ + (("key0", "key00", "key000"), "value000"), + (("key1",), "value1"), + ] + self.assertListEqual(actual_result, expected_result) + + def test4(self) -> None: + """ + Test flat case with `None` value. + """ + dict_ = {"key0": "value0", "key1": None} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0",), "value0"), (("key1",), None)] + self.assertListEqual(actual_result, expected_result) + + def test5(self) -> None: + """ + Test nested case with `None` value. + """ + dict_ = {"key0": {"key00": None}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0", "key00"), None), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test6(self) -> None: + """ + Test flat case with empty dict value. + """ + dict_ = {"key0": {}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0",), {}), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test7(self) -> None: + """ + Test nested case with empty dict value. + """ + dict_ = {"key0": {"key00": {}}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0", "key00"), {}), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test8(self) -> None: + """ + Test flat case with empty Config value. + """ + config = cconfig.Config() + dict_ = {"key0": config, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0",), config), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) + + def test9(self) -> None: + """ + Test nexted case with empty Config value. + """ + config = cconfig.Config() + dict_ = {"key0": {"key00": config}, "key1": "value1"} + actual_result = list(hdict.get_nested_dict_iterator(dict_)) + expected_result = [(("key0", "key00"), config), (("key1",), "value1")] + self.assertListEqual(actual_result, expected_result) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py new file mode 100644 index 000000000..7220d1474 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py @@ -0,0 +1,624 @@ +import logging +import os +import unittest.mock as umock +from typing import List, Optional, Tuple + +import helpers.hdbg as hdbg +import helpers.hdocker as hdocker +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_replace_shared_root_path1 +# ############################################################################# + + +class Test_replace_shared_root_path1(hunitest.TestCase): + def test1(self) -> None: + """ + Test replacing shared root path. + """ + # Mock `hserver.get_shared_data_dirs()` to return a dummy mapping. + mock_mapping = { + "/data/shared1": "/shared_folder1", + "/data/shared2": "/shared_folder2", + } + with umock.patch.object( + hserver, "get_shared_data_dirs", return_value=mock_mapping + ): + # Test replacing shared root path. + path1 = "/data/shared1/asset1" + act1 = hdocker.replace_shared_root_path(path1) + exp1 = "/shared_folder1/asset1" + self.assertEqual(act1, exp1) + # + path2 = "/data/shared2/asset2" + act2 = hdocker.replace_shared_root_path(path2) + exp2 = "/shared_folder2/asset2" + self.assertEqual(act2, exp2) + # + path3 = 'object("/data/shared2/asset2/item")' + act3 = hdocker.replace_shared_root_path(path3) + exp3 = 'object("/shared_folder2/asset2/item")' + self.assertEqual(act3, exp3) + + def test2(self) -> None: + """ + Test replacing shared root path with the `replace_ecs_tokyo` parameter. + """ + # Mock `hserver.get_shared_data_dirs()` to return a dummy mapping. + mock_mapping = { + "/data/shared": "/shared_folder", + } + with umock.patch.object( + hserver, "get_shared_data_dirs", return_value=mock_mapping + ): + # Test if `ecs_tokyo` is replaced if `replace_ecs_tokyo = True`. + path1 = 'object("/data/shared/ecs_tokyo/asset2/item")' + replace_ecs_tokyo = True + act1 = hdocker.replace_shared_root_path( + path1, replace_ecs_tokyo=replace_ecs_tokyo + ) + exp1 = 'object("/shared_folder/ecs/asset2/item")' + self.assertEqual(act1, exp1) + # Test if `ecs_tokyo` is not replaced if `replace_ecs_tokyo` is not + # defined. + path2 = 'object("/data/shared/ecs_tokyo/asset2/item")' + act2 = hdocker.replace_shared_root_path(path2) + exp2 = 'object("/shared_folder/ecs_tokyo/asset2/item")' + self.assertEqual(act2, exp2) + + +# ############################################################################# +# Test_convert_to_docker_path1 +# ############################################################################# + + +class Test_convert_to_docker_path1(hunitest.TestCase): + @staticmethod + def convert_caller_to_callee_docker_path( + in_file_path: str, + is_caller_host: bool, + use_sibling_container_for_callee: bool, + check_if_exists: bool, + ) -> Tuple[str, str]: + """ + Prepare inputs and call the function to convert a file name to Docker + paths. + + :return: A tuple containing + - docker_file_path: the Docker file path + - mount: the Docker mount string + """ + ( + source_host_path, + callee_mount_path, + mount, + ) = hdocker.get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + docker_file_path = hdocker.convert_caller_to_callee_docker_path( + in_file_path, + source_host_path, + callee_mount_path, + check_if_exists=check_if_exists, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + return docker_file_path, mount + + def helper( + self, + in_file_path: str, + is_caller_host: bool, + use_sibling_container_for_callee: bool, + check_if_exists: bool, + exp_docker_file_path: str, + exp_mount: str, + ) -> None: + """ + Test converting a file name to Docker paths. + """ + # Run test. + docker_file_path, mount = self.convert_caller_to_callee_docker_path( + in_file_path, + is_caller_host, + use_sibling_container_for_callee, + check_if_exists, + ) + # Check output. + self.assert_equal(docker_file_path, exp_docker_file_path) + self.assert_equal(mount, exp_mount) + + def test1(self) -> None: + """ + Test converting a file name to Docker paths. + """ + # - Prepare inputs. + dir_name = self.get_input_dir() + in_file_path = os.path.join(dir_name, "tmp.llm_transform.in.txt") + is_caller_host = True + use_sibling_container_for_callee = True + check_if_exists = False + # - Prepare outputs. + helpers_root_path = hgit.find_helpers_root() + exp_docker_file_path = os.path.join( + helpers_root_path, + "helpers/test/outcomes", + "Test_convert_to_docker_path1.test1/input", + "tmp.llm_transform.in.txt", + ) + exp_mount = "type=bind,source=/app,target=/app" + self.helper( + in_file_path, + is_caller_host, + use_sibling_container_for_callee, + check_if_exists, + exp_docker_file_path, + exp_mount, + ) + + def test2(self) -> None: + """ + Test converting a file name of an existing file to a Docker path. + """ + # - Prepare inputs. + dir_name = self.get_input_dir() + # Create a file. + # E.g., in_file_path='/app/helpers/test/outcomes/Test_convert_to_docker_path1.test2/input/input.md' + in_file_path = os.path.join(dir_name, "tmp.input.md") + hio.to_file(in_file_path, "empty") + _LOG.debug(hprint.to_str("in_file_path")) + is_caller_host = True + use_sibling_container_for_callee = True + check_if_exists = True + # - Prepare outputs. + helpers_root_path = hgit.find_helpers_root() + exp_docker_file_path = os.path.join( + helpers_root_path, + "helpers/test/outcomes", + "Test_convert_to_docker_path1.test2/input", + "tmp.input.md", + ) + exp_mount = "type=bind,source=/app,target=/app" + self.helper( + in_file_path, + is_caller_host, + use_sibling_container_for_callee, + check_if_exists, + exp_docker_file_path, + exp_mount, + ) + + +# ############################################################################# +# Test_is_path1 +# ############################################################################# + + +class Test_is_path1(hunitest.TestCase): + def helper(self, path: str, expected: bool) -> None: + """ + Test helper for `is_path()` function. + """ + # Run test. + actual = hdocker.is_path(path) + # Check outputs. + _LOG.debug(hprint.to_str("path actual expected")) + self.assertEqual(actual, expected) + + def test_file_with_extension(self) -> None: + """ + Test paths with file extensions. + """ + # Prepare inputs. + test_cases = [ + ("file.txt", True), + ("document.pdf", True), + ("script.py", True), + ("data.csv", True), + ("image.jpg", True), + ("config.json", True), + ("readme.md", True), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + def test_absolute_paths(self) -> None: + """ + Test absolute paths. + """ + # Prepare inputs. + test_cases = [ + ("/path/to/file.py", True), + ("/usr/bin/python", True), + ("/etc/config", True), + ("/home/user", True), + ("/", True), + ("/data/shared", True), + ] + # Check outputs. + for path, expected in test_cases: + self.helper(path, expected) + + def test_relative_paths(self) -> None: + """ + Test relative paths starting with ./ or ../. + """ + # Prepare inputs and run tests. + test_cases = [ + ("./file.txt", True), + ("../data.csv", True), + ("./folder/subfolder", True), + ("../parent/file", True), + ("./", True), + ("../", True), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + def test_trailing_slash_paths(self) -> None: + """ + Test paths ending with slash (indicating directories). + """ + # Prepare inputs and run tests. + test_cases = [ + ("folder/", True), + ("data/", True), + ("my_directory/", True), + ("nested/folder/", True), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + def test_non_path_strings(self) -> None: + """ + Test strings that should not be considered paths. + """ + # Prepare inputs and run tests. + test_cases = [ + ("readme", False), + ("hello", False), + ("command", False), + ("data", False), + ("test", False), + ("python", False), + ("docker", False), + ("", False), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + def test_edge_cases(self) -> None: + """ + Test edge cases and complex scenarios. + """ + # Prepare inputs and run tests. + test_cases = [ + # - Files with multiple extensions. + ("file.tar.gz", True), + ("backup.sql.bz2", True), + # - Hidden files. + (".hidden", True), + (".gitignore", True), + # - Complex paths. + ("./nested/folder/file.txt", True), + ("../parent/folder/", True), + ("/absolute/path/file.py", True), + # - Files without extension in paths. + # True because it contains a slash. + ("folder/README", True), + # True because starts with "./". + ("./config", True), + # True because starts with "/". + ("/usr/bin/python", True), + # - Strings that might be confused with paths. + # True because has extension. + ("folder.name", True), + # False because no extension, slash, or path prefix. + ("file-name", False), + # False because no extension, slash, or path prefix. + ("under_score", False), + ] + # Run tests. + for path, expected in test_cases: + self.helper(path, expected) + + +# ############################################################################# +# Test_convert_all_paths_from_caller_to_callee_docker_path1 +# ############################################################################# + + +class Test_convert_all_paths_from_caller_to_callee_docker_path1( + hunitest.TestCase +): + def helper( + self, + cmd_opts: List[str], + expected_str: str, + *, + is_caller_host: bool = True, + use_sibling_container_for_callee: bool = True, + create_files: Optional[List[str]] = None, + ) -> None: + """ + Helper for `convert_all_paths_from_caller_to_callee_docker_path()`. + """ + hdbg.dassert_isinstance(cmd_opts, list) + hdbg.dassert_isinstance(expected_str, str) + # Prepare inputs. + if create_files: + # Create temporary files for testing existing file paths. + for file_path in create_files: + dir_name = os.path.dirname(file_path) + if dir_name: + hio.create_dir(dir_name, incremental=True) + hio.to_file(file_path, "test content") + # Get docker mount info for the test. + ( + caller_mount_path, + callee_mount_path, + _, + ) = hdocker.get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + # Run test. + actual = hdocker.convert_all_paths_from_caller_to_callee_docker_path( + cmd_opts, + caller_mount_path, + callee_mount_path, + is_caller_host, + use_sibling_container_for_callee, + ) + _LOG.debug("actual=\n%s", str(actual)) + # Check outputs. + actual_str = "\n".join(actual) + actual_str = huntepur.purify_text(actual_str) + expected_str = huntepur.purify_text(expected_str) + self.assert_equal(actual_str, expected_str, dedent=True) + + # ///////////////////////////////////////////////////////////////////////////// + + def test_mixed_options_with_paths_and_non_paths(self) -> None: + """ + Test converting mixed command options with paths and non-paths. + """ + # Prepare inputs. + cmd_opts = [ + "--verbose", + "file.txt", # Path-like (has extension) + "--output", + "./output.log", # Path-like (relative path) + "command", # Not a path + # "/absolute/path", # Path-like (absolute) + "--flag", + "folder/", # Path-like (trailing slash) + ] + expected_output = [ + "--verbose", + "/app/file.txt", # Converted + "--output", + "/app/output.log", # Converted + "command", # Not converted + # "/app/absolute/path", # Converted + "--flag", + "/app/folder", # Converted + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_existing_files_get_converted(self) -> None: + """ + Test that existing files are converted even without path-like + appearance. + """ + # Prepare inputs. + temp_dir = self.get_scratch_space() + existing_file = os.path.join(temp_dir, "testfile") + cmd_opts = [ + "--input", + existing_file, # Will exist, should be converted + "nonexistent", # Doesn't exist and not path-like, won't be converted + ] + expected_output = [ + "--input", + f"/app/{os.path.relpath(existing_file, hgit.find_git_root())}", # Converted + "nonexistent", # Not converted + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output, create_files=[existing_file]) + + def test_path_like_strings_without_existing_files(self) -> None: + """ + Test that path-like strings are converted even if files don't exist. + """ + # Prepare inputs. + cmd_opts = [ + "script.py", # Path-like (extension) but doesn't exist + "./config.json", # Path-like (relative) but doesn't exist + # "/usr/bin/tool", # Path-like (absolute) but doesn't exist + "plain_word", # Not path-like and doesn't exist + ] + expected_output = [ + "/app/script.py", # Converted (has extension) + "/app/config.json", # Converted (relative path) + # "/app/usr/bin/tool", # Converted (absolute path) + "plain_word", # Not converted + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_empty_command_options(self) -> None: + """ + Test handling of empty command options list. + """ + # Prepare inputs. + cmd_opts = [] + expected_output = [] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_only_non_path_options(self) -> None: + """ + Test command options with no paths. + """ + # Prepare inputs. + cmd_opts = [ + "--verbose", + "--debug", + "command", + "argument", + "--flag", + ] + expected_output = [ + "--verbose", + "--debug", + "command", + "argument", + "--flag", + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_only_path_options(self) -> None: + """ + Test command options with only paths. + """ + # Prepare inputs. + cmd_opts = [ + "input.txt", + "./config.yaml", + # "/var/log/app.log", + "data/", + "./output.json", + ] + expected_output = [ + "/app/input.txt", + "/app/config.yaml", + # "/app/var/log/app.log", + "/app/data", + "/app/output.json", + ] + expected_output = "\n".join(expected_output) + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_complex_paths_with_extensions(self) -> None: + """ + Test complex paths with multiple extensions and special cases. + """ + # Prepare inputs. + cmd_opts = [ + "archive.tar.gz", # Multiple extensions + ".hidden", # Hidden file + "backup.sql.bz2", # Multiple extensions + ".gitignore", # Hidden config file + ] + expected_output = """ + $GIT_ROOT/archive.tar.gz + $GIT_ROOT/.hidden + $GIT_ROOT/backup.sql.bz2 + $GIT_ROOT/.gitignore + """ + # Run test and check outputs. + self.helper(cmd_opts, expected_output) + + def test_sibling_vs_child_container_modes(self) -> None: + """ + Test different container modes (sibling vs child). + """ + # Prepare inputs. + cmd_opts = ["input.txt", "output/"] + # Test sibling container mode. + expected_output = ["/app/input.txt", "/app/output"] + expected_output = "\n".join(expected_output) + self.helper( + cmd_opts, + expected_output, + is_caller_host=True, + use_sibling_container_for_callee=True, + ) + # Test child container mode. + expected_output = ["/app/input.txt", "/app/output"] + expected_output = "\n".join(expected_output) + self.helper( + cmd_opts, + expected_output, + is_caller_host=True, + use_sibling_container_for_callee=False, + ) + + +# ############################################################################# +# Test_get_docker_mount_info1 +# ############################################################################# + + +class Test_get_docker_mount_info1(hunitest.TestCase): + def test1(self) -> None: + """ + With CSFY_ENABLE_DIND, sibling-style docker.sock must still bind the + repo root inside this container, not CSFY_HOST_GIT_ROOT_PATH. + """ + # - Prepare inputs. + git_root = hgit.find_git_root() + env = { + "CSFY_ENABLE_DIND": "1", + "CSFY_HOST_GIT_ROOT_PATH": "/path/only/on/outer/host", + } + # - Prepare outputs. + exp_target = "/app" + exp_mount = f"type=bind,source={git_root},target=/app" + # Run test. + with umock.patch.dict(os.environ, env, clear=False): + source, target, mount = hdocker.get_docker_mount_info( + is_caller_host=False, + use_sibling_container_for_callee=True, + ) + # Check outputs. + self.assert_equal(source, git_root) + self.assert_equal(target, exp_target) + self.assert_equal(mount, exp_mount) + + def test2(self) -> None: + """ + Without DinD, sibling mode uses CSFY_HOST_GIT_ROOT_PATH for bind + source. + """ + # - Prepare inputs. + host_root = "/tmp/explicit_host_git_root_for_test" + env = { + "CSFY_ENABLE_DIND": "0", + "CSFY_HOST_GIT_ROOT_PATH": host_root, + } + # - Prepare outputs. + exp_target = "/app" + exp_mount = f"type=bind,source={host_root},target=/app" + # Run test. + with umock.patch.dict(os.environ, env, clear=False): + source, target, mount = hdocker.get_docker_mount_info( + is_caller_host=False, + use_sibling_container_for_callee=True, + ) + # Check outputs. + self.assert_equal(source, host_root) + self.assert_equal(target, exp_target) + self.assert_equal(mount, exp_mount) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py new file mode 100644 index 000000000..203ae012e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py @@ -0,0 +1,158 @@ +""" +Unit tests for hdocker_tests.py +""" + +import logging +import os + +import helpers.hdocker_tests as hdoctest +import helpers.hio as hio +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_get_docker_test_files +# ############################################################################# + + +class Test_get_docker_test_files(hunitest.TestCase): + """ + Test the get_docker_test_files function. + """ + + def test1(self) -> None: + """ + Test finding docker test files in a directory. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + hio.to_file(os.path.join(scratch_dir, "docker_test_1.py"), "") + hio.to_file(os.path.join(scratch_dir, "docker_test_2.py"), "") + hio.to_file(os.path.join(scratch_dir, "other_file.py"), "") + # Run test. + actual = hdoctest.get_docker_test_files(scratch_dir) + # Check outputs. + self.assertEqual(len(actual), 2) + self.assertTrue(any("docker_test_1.py" in f for f in actual)) + self.assertTrue(any("docker_test_2.py" in f for f in actual)) + + def test2(self) -> None: + """ + Test with no matching files. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create non-matching files. + hio.to_file(os.path.join(scratch_dir, "test_file.py"), "") + hio.to_file(os.path.join(scratch_dir, "other_file.py"), "") + # Run test. + actual = hdoctest.get_docker_test_files(scratch_dir) + # Check outputs. + self.assertEqual(len(actual), 0) + + def test3(self) -> None: + """ + Test with single docker test file. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + hio.to_file(os.path.join(scratch_dir, "docker_test_single.py"), "") + # Run test. + actual = hdoctest.get_docker_test_files(scratch_dir) + # Check outputs. + self.assertEqual(len(actual), 1) + self.assertTrue("docker_test_single.py" in actual[0]) + + def test4(self) -> None: + """ + Test that files are returned in sorted order. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + hio.to_file(os.path.join(scratch_dir, "docker_test_z.py"), "") + hio.to_file(os.path.join(scratch_dir, "docker_test_a.py"), "") + hio.to_file(os.path.join(scratch_dir, "docker_test_m.py"), "") + # Run test. + actual = hdoctest.get_docker_test_files(scratch_dir) + # Check outputs. + self.assertEqual(len(actual), 3) + basenames = [os.path.basename(f) for f in actual] + self.assertEqual( + basenames, + ["docker_test_a.py", "docker_test_m.py", "docker_test_z.py"], + ) + + +# ############################################################################# +# Test_run_docker_cmd +# ############################################################################# + + +class Test_run_docker_cmd(hunitest.TestCase): + """ + Test the run_docker_cmd function. + """ + + def test1(self) -> None: + """ + Test that error is raised when docker_cmd.sh does not exist in + script_dir. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Run test and check output. + with self.assertRaises(AssertionError): + hdoctest.run_docker_cmd(scratch_dir) + + def test2(self) -> None: + """ + Test that error is raised when script_dir does not exist. + """ + # Prepare inputs. + nonexistent_dir = "/nonexistent_dir_that_does_not_exist" + # Run test and check output. + with self.assertRaises(AssertionError): + hdoctest.run_docker_cmd(nonexistent_dir) + + +# ############################################################################# +# Test_run_all_tests +# ############################################################################# + + +class Test_run_all_tests(hunitest.TestCase): + """ + Test the run_all_tests function. + """ + + def test1(self) -> None: + """ + Test with no docker test files returns 0. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create non-matching files. + hio.to_file(os.path.join(scratch_dir, "test_file.py"), "") + # Run test. + actual = hdoctest.run_all_tests(scratch_dir) + # Check outputs. + self.assertEqual(actual, 0) + + def test2(self) -> None: + """ + Test with docker test files when docker_cmd_script doesn't exist. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + hio.to_file(os.path.join(scratch_dir, "docker_test_1.py"), "") + nonexistent_docker_cmd = os.path.join( + scratch_dir, "nonexistent_docker_cmd.sh" + ) + # Run test and check output. + with self.assertRaises(AssertionError): + hdoctest.run_all_tests( + scratch_dir, docker_cmd_script=nonexistent_docker_cmd + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py new file mode 100644 index 000000000..d1f229435 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py @@ -0,0 +1,17 @@ +import logging + +import helpers.henv as henv +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_env1 +# ############################################################################# + + +class Test_env1(hunitest.TestCase): + def test_get_system_signature1(self) -> None: + txt = henv.get_system_signature() + _LOG.debug(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py new file mode 100644 index 000000000..f50f79994 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py @@ -0,0 +1,347 @@ +import logging +import pathlib + +import helpers.hfile_tree as hfiltree +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_generate_tree +# ############################################################################# + + +class Test_generate_tree(hunitest.TestCase): + def test1(self) -> None: + """ + Test generating default tree. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = False + include_python = False + only_dirs = False + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- compose", + "- docker_build", + " - create_users.sh", + " - pip_list.txt", + "- docker_run", + ] + ) + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + Test generating default tree with depth. + """ + # Prepare inputs. + path = self.devops_dir + depth = 1 + include_tests = False + include_python = False + only_dirs = False + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- compose", + "- docker_build", + "- docker_run", + ] + ) + self.assertEqual(actual, expected) + + def test3(self) -> None: + """ + Test generating tree including test files and dirs. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = True + include_python = False + only_dirs = False + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- compose", + "- docker_build", + "- docker_run", + "- test", + " - test_docker.py", + ] + ) + self.assertEqual(actual, expected) + + def test4(self) -> None: + """ + Test generating tree including python files. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = False + include_python = True + only_dirs = False + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- __init__.py", + "- compose", + "- docker_build", + "- docker_run", + " - execute.py", + "- user_credentials.py", + ] + ) + self.assertEqual(actual, expected) + + def test5(self) -> None: + """ + Test generating tree with only directories. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = False + include_python = False + only_dirs = True + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- compose", + "- docker_build", + "- docker_run", + ] + ) + self.assertEqual(actual, expected) + + def test6(self) -> None: + """ + Test generating tree including tests, python files, and only + directories. + """ + # Prepare inputs. + path = self.devops_dir + depth = 0 + include_tests = True + include_python = True + only_dirs = True + output = "" + # Call tested function. + actual = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + # Check output. + expected = "\n".join( + [ + "devops", + "- __init__.py", + "- compose", + "- docker_build", + "- docker_run", + " - execute.py", + "- test", + " - test_docker.py", + "- user_credentials.py", + ] + ) + self.assertEqual(actual, expected) + + def test7(self) -> None: + """ + Test writing tree to file. + """ + # Prepare inputs. + scratch = pathlib.Path(self.get_scratch_space()) + path = self.devops_dir + depth = 0 + include_tests = False + include_python = False + only_dirs = False + output = scratch / "TREE.md" + # Call tested function. + _ = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + actual = output.read_text(encoding="utf-8") + # Check output. + expected = ( + "\n".join( + [ + "", + "devops", + "- compose", + "- docker_build", + " - create_users.sh", + " - pip_list.txt", + "- docker_run", + "", + ] + ) + + "\n" + ) + self.assertEqual(actual, expected) + + def test8(self) -> None: + """ + Test updating tree on existing file, preserving comments. + """ + # Prepare inputs. + scratch = pathlib.Path(self.get_scratch_space()) + path = self.devops_dir + depth = 0 + include_tests = False + include_python = False + only_dirs = False + output = scratch / "TREE.md" + # Create existing file. + content = ( + "\n".join( + [ + "", + "devops", + "- compose # compose-comment", + "- docker_build", + " - pip_list.txt # pip-comment", + "", + ] + ) + + "\n" + ) + output.write_text(content, encoding="utf-8") + # Call tested function. + _ = hfiltree.generate_tree( + path=path, + depth=depth, + include_tests=include_tests, + include_python=include_python, + only_dirs=only_dirs, + output=output, + ) + actual = output.read_text(encoding="utf-8") + # Check output. + expected = ( + "\n".join( + [ + "", + "devops", + "- compose # compose-comment", + "- docker_build", + " - create_users.sh", + " - pip_list.txt # pip-comment", + "- docker_run", + "", + ] + ) + + "\n" + ) + self.assertEqual(actual, expected) + + def setUp(self) -> None: + """ + Create a `devops` directory in scratch space. + + Scratch directory layout: + ``` + devops + - __init__.py + - user_credentials.py + - compose + - docker_run + - execute.py + - docker_build + - create_users.sh + - pip_list.txt + - test + - TestDocker + - test_docker.py + ``` + """ + super().setUp() + scratch = self.get_scratch_space() + self.devops_dir = pathlib.Path(scratch) / "devops" + self.devops_dir.mkdir() + structure = { + "": ["__init__.py", "user_credentials.py"], + "compose": [], + "docker_run": ["execute.py"], + "docker_build": ["create_users.sh", "pip_list.txt"], + "test": ["TestDocker", "test_docker.py"], + } + # Create empty dirs and files. + for subdir, files in structure.items(): + folder = self.devops_dir / subdir if subdir else self.devops_dir + if subdir: + folder.mkdir() + for name in files: + (folder / name).touch() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py new file mode 100644 index 000000000..8a7135578 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py @@ -0,0 +1,822 @@ +import logging +import os +import tempfile +from typing import Generator, List, Optional + +import pytest + +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# Unfortunately we can't check the outcome of some of these functions since we +# don't know in which dir we are running. Thus we just test that the function +# completes and visually inspect the outcome, if possible. + + +# ############################################################################# +# Test_git_submodule1 +# ############################################################################# + + +class Test_git_submodule1(hunitest.TestCase): + def test_get_client_root1(self) -> None: + actual = hgit.get_client_root(super_module=True) + _LOG.debug("actual=%s", actual) + + def test_get_client_root2(self) -> None: + actual = hgit.get_client_root(super_module=False) + _LOG.debug("actual=%s", actual) + + def test_get_project_dirname1(self) -> None: + actual = hgit.get_project_dirname() + _LOG.debug("actual=%s", actual) + + def test_get_branch_name1(self) -> None: + actual = hgit.get_branch_name() + _LOG.debug("actual=%s", actual) + + def test_is_inside_submodule1(self) -> None: + actual = hgit.is_inside_submodule() + _LOG.debug("actual=%s", actual) + + # Outside CK infra, the following call hangs, so we skip it. + # TODO(gp): I don't see why it requires our infra. + @pytest.mark.requires_ck_infra + def test_is_amp(self) -> None: + actual = hgit.is_amp() + _LOG.debug("actual=%s", actual) + + def test_get_path_from_supermodule1(self) -> None: + actual = hgit.get_path_from_supermodule() + _LOG.debug("actual=%s", actual) + + def test_get_submodule_paths1(self) -> None: + actual = hgit.get_submodule_paths() + _LOG.debug("actual=%s", actual) + + +# ############################################################################# +# Test_git_submodule2 +# ############################################################################# + + +class Test_git_submodule2(hunitest.TestCase): + # def test_get_submodule_hash1(self) -> None: + # dir_name = "amp" + # _ = hgit._get_submodule_hash(dir_name) + + def test_get_remote_head_hash1(self) -> None: + dir_name = "." + actual = hgit.get_head_hash(dir_name) + _LOG.debug("actual=%s", actual) + + # def test_report_submodule_status1(self) -> None: + # dir_names = ["."] + # short_hash = True + # _ = hgit.report_submodule_status(dir_names, short_hash) + + def test_get_head_hash1(self) -> None: + dir_name = "." + actual = hgit.get_head_hash(dir_name) + _LOG.debug("actual=%s", actual) + + def _helper_group_hashes( + self, + head_hash: str, + remh_hash: str, + subm_hash: Optional[str], + expected: str, + ) -> None: + actual = hgit._group_hashes(head_hash, remh_hash, subm_hash) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_group_hashes1(self) -> None: + head_hash = "a2bfc704" + remh_hash = "a2bfc704" + subm_hash = None + expected = "head_hash = remh_hash = a2bfc704" + # + self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) + + def test_group_hashes2(self) -> None: + head_hash = "22996772" + remh_hash = "92167662" + subm_hash = "92167662" + expected = """ + head_hash = 22996772 + remh_hash = subm_hash = 92167662 + """ + # + self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) + + def test_group_hashes3(self) -> None: + head_hash = "7ea03eb6" + remh_hash = "7ea03eb6" + subm_hash = "7ea03eb6" + expected = "head_hash = remh_hash = subm_hash = 7ea03eb6" + # + self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) + + +# ############################################################################# +# Test_git_repo_name1 +# ############################################################################# + + +class Test_git_repo_name1(hunitest.TestCase): + def test_parse_github_repo_name1(self) -> None: + repo_name = "git@github.com:alphamatic/amp" + host_name, repo_name = hgit._parse_github_repo_name(repo_name) + self.assert_equal(host_name, "github.com") + self.assert_equal(repo_name, "alphamatic/amp") + + def test_parse_github_repo_name2(self) -> None: + repo_name = "https://github.com/alphamatic/amp" + hgit._parse_github_repo_name(repo_name) + host_name, repo_name = hgit._parse_github_repo_name(repo_name) + self.assert_equal(host_name, "github.com") + self.assert_equal(repo_name, "alphamatic/amp") + + def test_parse_github_repo_name3(self) -> None: + repo_name = "git@github.fake.com:alphamatic/amp" + host_name, repo_name = hgit._parse_github_repo_name(repo_name) + self.assert_equal(host_name, "github.fake.com") + self.assert_equal(repo_name, "alphamatic/amp") + + def test_parse_github_repo_name4(self) -> None: + repo_name = "https://github.fake.com/alphamatic/amp" + host_name, repo_name = hgit._parse_github_repo_name(repo_name) + self.assert_equal(host_name, "github.fake.com") + self.assert_equal(repo_name, "alphamatic/amp") + + def test_get_repo_full_name_from_dirname1(self) -> None: + actual = hgit.get_repo_full_name_from_dirname( + dir_name=".", include_host_name=False + ) + _LOG.debug("actual=%s", actual) + + def test_get_repo_full_name_from_dirname2(self) -> None: + actual = hgit.get_repo_full_name_from_dirname( + dir_name=".", include_host_name=True + ) + _LOG.debug("actual=%s", actual) + + def test_get_repo_full_name_from_client1(self) -> None: + actual = hgit.get_repo_full_name_from_client(super_module=True) + _LOG.debug("actual=%s", actual) + + def test_get_repo_full_name_from_client2(self) -> None: + actual = hgit.get_repo_full_name_from_client(super_module=False) + _LOG.debug("actual=%s", actual) + + +# ############################################################################# +# Test_git_path1 +# ############################################################################# + + +# Outside CK infra, the following class hangs, so we skip it. +@pytest.mark.requires_ck_infra +class Test_git_path1(hunitest.TestCase): + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", + ) + def test_get_path_from_git_root1(self) -> None: + file_name = "/app/helpers/test/test_hgit.py" + actual = hgit.get_path_from_git_root(file_name, super_module=True) + _LOG.debug("get_path_from_git_root()=%s", actual) + # Check. + expected = "helpers/test/test_hgit.py" + self.assert_equal(actual, expected) + + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), + reason="Run only in amp as sub-module", + ) + def test_get_path_from_git_root2(self) -> None: + file_name = "/app/amp/helpers/test/test_hgit.py" + actual = hgit.get_path_from_git_root(file_name, super_module=True) + _LOG.debug("get_path_from_git_root()=%s", actual) + # Check. + expected = "amp/helpers/test/test_hgit.py" + self.assert_equal(actual, expected) + + def test_get_path_from_git_root3(self) -> None: + file_name = "/app/amp/helpers/test/test_hgit.py" + git_root = "/app" + actual = hgit.get_path_from_git_root( + file_name, super_module=False, git_root=git_root + ) + # Check. + expected = "amp/helpers/test/test_hgit.py" + self.assert_equal(actual, expected) + + def test_get_path_from_git_root4(self) -> None: + file_name = "/app/amp/helpers/test/test_hgit.py" + git_root = "/app/amp" + actual = hgit.get_path_from_git_root( + file_name, super_module=False, git_root=git_root + ) + # Check. + expected = "helpers/test/test_hgit.py" + self.assert_equal(actual, expected) + + def test_get_path_from_git_root5(self) -> None: + file_name = "helpers/test/test_hgit.py" + git_root = "/app/amp" + with self.assertRaises(ValueError): + hgit.get_path_from_git_root( + file_name, super_module=False, git_root=git_root + ) + + +# ############################################################################# +# Test_git_modified_files1 +# ############################################################################# + + +# Outside CK infra, the following class hangs, so we skip it. +@pytest.mark.requires_ck_infra +@pytest.mark.slow(reason="Around 7s") +@pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", +) +class Test_git_modified_files1(hunitest.TestCase): + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + + def set_up_test(self) -> None: + """ + All these tests need a reference to Git master branch. + """ + hgit.fetch_origin_master_if_needed() + + def test_get_modified_files1(self) -> None: + actual = hgit.get_modified_files() + _LOG.debug("actual=%s", actual) + + def test_get_previous_committed_files1(self) -> None: + actual = hgit.get_previous_committed_files() + _LOG.debug("actual=%s", actual) + + def test_get_modified_files_in_branch1(self) -> None: + actual = hgit.get_modified_files_in_branch("master") + _LOG.debug("actual=%s", actual) + + def test_get_summary_files_in_branch1(self) -> None: + actual = hgit.get_summary_files_in_branch("master") + _LOG.debug("actual=%s", actual) + + def test_git_log1(self) -> None: + actual = hgit.git_log() + _LOG.debug("actual=%s", actual) + + +# ############################################################################# + + +# ############################################################################# +# Test_find_docker_file1 +# ############################################################################# + + +# Outside CK infra, the following class hangs, so we skip it. +@pytest.mark.requires_ck_infra +class Test_find_docker_file1(hunitest.TestCase): + def test1(self) -> None: + """ + Test for a file `amp/helpers/test/test_hgit.py` that is not from Docker + (i.e., it doesn't start with `/app`) and exists in the repo. + """ + amp_dir = hgit.get_amp_abs_path() + # Use this file since `find_docker_file()` needs to do a `find` in the + # repo, and we need to have a fixed file structure. + file_name = hgit.find_file_in_git_tree("test_hgit.py") + actual = hgit.find_docker_file( + file_name, + root_dir=amp_dir, + ) + expected = ["helpers/test/test_hgit.py"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test2(self) -> None: + """ + Test for a file `/app/amp/helpers/test/test_hgit.py` that is from + Docker (i.e., it starts with `/app`) and exists in the repo. + """ + amp_dir = hgit.get_amp_abs_path() + # Use this file since `find_docker_file()` needs to do a `find` in the + # repo, and we need to have a fixed file structure. + file_name = hgit.find_file_in_git_tree("test_hgit.py") + expected = ["helpers/test/test_hgit.py"] + actual = hgit.find_docker_file( + file_name, + root_dir=amp_dir, + ) + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test3(self) -> None: + """ + Test for a file `/venv/lib/python3.8/site-packages/invoke/tasks.py` + that is from Docker (e.g., it starts with `/app`), but doesn't exist in + the repo. + """ + file_name = "/venv/lib/python3.8/site-packages/invoke/tasks.py" + actual = hgit.find_docker_file(file_name) + expected: List[str] = [] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test4(self) -> None: + """ + Test for a file `./core/dataflow/utils.py` that is from Docker (i.e., + it starts with `/app`), but has multiple copies in the repo. + """ + amp_dir = hgit.get_amp_abs_path() + file_name = "/app/amp/core/dataflow/utils.py" + dir_depth = 1 + candidate_files = [ + "core/dataflow/utils.py", + "core/foo/utils.py", + "core/bar/utils.py", + ] + candidate_files = [os.path.join(amp_dir, f) for f in candidate_files] + actual = hgit.find_docker_file( + file_name, + root_dir=amp_dir, + dir_depth=dir_depth, + candidate_files=candidate_files, + ) + # Only one candidate file matches basename and one dirname. + expected = ["core/dataflow/utils.py"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test5(self) -> None: + amp_dir = hgit.get_amp_abs_path() + file_name = "/app/amp/core/dataflow/utils.py" + dir_depth = -1 + candidate_files = [ + "core/dataflow/utils.py", + "bar/dataflow/utils.py", + "core/foo/utils.py", + ] + candidate_files = [os.path.join(amp_dir, f) for f in candidate_files] + actual = hgit.find_docker_file( + file_name, + root_dir=amp_dir, + dir_depth=dir_depth, + candidate_files=candidate_files, + ) + # Only one file matches `utils.py` using all the 3 dir levels. + expected = ["core/dataflow/utils.py"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_extract_gh_issue_number_from_branch +# ############################################################################# + + +class Test_extract_gh_issue_number_from_branch(hunitest.TestCase): + def test_extract_gh_issue_number_from_branch1(self) -> None: + """ + Tests extraction from a branch name with a specific format. + """ + branch_name = "CmampTask10725_Add_more_tabs_to_orange_tmux" + actual = hgit.extract_gh_issue_number_from_branch(branch_name) + expected = "10725" + self.assert_equal(str(actual), expected) + + def test_extract_gh_issue_number_from_branch2(self) -> None: + """ + Tests extraction from another branch name format. + """ + branch_name = "HelpersTask23_Add_more_tabs_to_orange_tmux" + actual = hgit.extract_gh_issue_number_from_branch(branch_name) + expected = "23" + self.assert_equal(str(actual), expected) + + def test_extract_gh_issue_number_from_branch3(self) -> None: + """ + Tests extraction from a short branch name format. + """ + branch_name = "CmTask3434" + actual = hgit.extract_gh_issue_number_from_branch(branch_name) + expected = "3434" + self.assert_equal(str(actual), expected) + + def test_extract_gh_issue_number_from_branch4(self) -> None: + """ + Tests behavior when no issue number is present in the branch name. + """ + branch_name = "NoTaskNumberHere" + actual = hgit.extract_gh_issue_number_from_branch(branch_name) + expected = "None" + self.assert_equal(str(actual), expected) + + +# ############################################################################# +# Test_find_git_root1 +# ############################################################################# + + +class Test_find_git_root1(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is a super repo (e.g. //orange) + - the repo contains another super repo (e.g. //amp) as submodule (first level) + - the first level submodule contains another submodule (e.g. //helpers) (second level) + + Directory structure: + orange/ + |-- .git/ + `-- amp/ + |-- .git (points to ../.git/modules/amp) + |-- ck.infra/ + `-- helpers_root/ + `-- .git (points to ../../.git/modules/amp/modules/helpers_root) + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create `orange` repo. + self.repo_dir = os.path.join(temp_dir, "orange") + hio.create_dir(self.repo_dir, incremental=False) + self.git_dir = os.path.join(self.repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create `amp` submodule under `orange`. + self.submodule_dir = os.path.join(self.repo_dir, "amp") + hio.create_dir(self.submodule_dir, incremental=False) + submodule_git_file = os.path.join(self.submodule_dir, ".git") + txt = "gitdir: ../.git/modules/amp" + hio.to_file(submodule_git_file, txt) + submodule_git_file_dir = os.path.join( + self.repo_dir, ".git", "modules", "amp" + ) + hio.create_dir(submodule_git_file_dir, incremental=False) + # Create `helpers_root` submodule under `amp`. + self.subsubmodule_dir = os.path.join(self.submodule_dir, "helpers_root") + hio.create_dir(self.subsubmodule_dir, incremental=False) + subsubmodule_git_file = os.path.join(self.subsubmodule_dir, ".git") + txt = "gitdir: ../../.git/modules/amp/modules/helpers_root" + hio.to_file(subsubmodule_git_file, txt) + subsubmodule_git_file_dir = os.path.join( + self.repo_dir, ".git", "modules", "amp", "modules", "helpers_root" + ) + hio.create_dir(subsubmodule_git_file_dir, incremental=False) + # Create `ck.infra` runnable dir under `amp`. + self.runnable_dir = os.path.join(self.submodule_dir, "ck.infra") + hio.create_dir(self.runnable_dir, incremental=False) + + def test1(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in the super repo (e.g. //orange) + """ + self.set_up_test() + with hsystem.cd(self.repo_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test2(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in first level submodule (e.g. //amp) + """ + self.set_up_test() + with hsystem.cd(self.submodule_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test3(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in second level submodule (e.g. //helpers) + """ + self.set_up_test() + with hsystem.cd(self.subsubmodule_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test4(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in a runnable dir (e.g. ck.infra) under the + first level submodule (e.g. //amp) + """ + self.set_up_test() + with hsystem.cd(self.runnable_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + +# ############################################################################# +# Test_find_git_root2 +# ############################################################################# + + +class Test_find_git_root2(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is a super repo (e.g. //cmamp) + - the repo contains //helpers as submodule + + Directory structure: + cmamp/ + |-- .git/ + |-- ck.infra/ + `-- helpers_root/ + `-- .git (points to ../.git/modules/helpers_root) + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create `cmamp` repo. + self.repo_dir = os.path.join(temp_dir, "cmamp") + hio.create_dir(self.repo_dir, incremental=False) + self.git_dir = os.path.join(self.repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create `helpers_root` submodule under `cmamp`. + self.submodule_dir = os.path.join(self.repo_dir, "helpers_root") + hio.create_dir(self.submodule_dir, incremental=False) + submodule_git_file = os.path.join(self.submodule_dir, ".git") + txt = "gitdir: ../.git/modules/helpers_root" + hio.to_file(submodule_git_file, txt) + submodule_git_file_dir = os.path.join( + self.repo_dir, ".git", "modules", "helpers_root" + ) + hio.create_dir(submodule_git_file_dir, incremental=False) + # Create `ck.infra` runnable dir under `cmamp`. + self.runnable_dir = os.path.join(self.repo_dir, "ck.infra") + hio.create_dir(self.runnable_dir, incremental=False) + + def test1(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in the super repo (e.g. //cmamp) + """ + self.set_up_test() + with hsystem.cd(self.repo_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test2(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is the submodule (e.g. //helpers) + """ + self.set_up_test() + with hsystem.cd(self.submodule_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test3(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in a runnable dir (e.g. ck.infra) + """ + self.set_up_test() + with hsystem.cd(self.runnable_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + +# ############################################################################# +# Test_find_git_root3 +# ############################################################################# + + +class Test_find_git_root3(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is //helpers + + Directory structure: + helpers/ + |-- .git/ + `-- arbitrary1/ + `-- arbitrary1a/ + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create `helpers` repo. + self.repo_dir = os.path.join(temp_dir, "helpers") + hio.create_dir(self.repo_dir, incremental=False) + self.git_dir = os.path.join(self.repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create arbitrary directory under `helpers`. + self.arbitrary_dir = os.path.join( + self.repo_dir, "arbitrary1", "arbitrary1a" + ) + hio.create_dir(self.arbitrary_dir, incremental=False) + + def test1(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is the root of repo + """ + self.set_up_test() + with hsystem.cd(self.repo_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + def test2(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is in an arbitrary directory under the repo + """ + self.set_up_test() + with hsystem.cd(self.arbitrary_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + +# ############################################################################# +# Test_find_git_root4 +# ############################################################################# + + +class Test_find_git_root4(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is a linked repo + + Directory structure: + repo/ + `-- .git/ + linked_repo/ + `-- .git (points to /repo/.git) + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create repo. + self.repo_dir = os.path.join(temp_dir, "repo") + hio.create_dir(self.repo_dir, incremental=False) + self.git_dir = os.path.join(self.repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create linked repo. + self.linked_repo_dir = os.path.join(temp_dir, "linked_repo") + hio.create_dir(self.linked_repo_dir, incremental=False) + # Create pointer from linked repo to the actual repo. + linked_git_file = os.path.join(self.linked_repo_dir, ".git") + txt = f"gitdir: {self.git_dir}\n" + hio.to_file(linked_git_file, txt) + + def test1(self) -> None: + """ + Check that the function returns the correct git root if + - the caller is the linked repo + """ + self.set_up_test() + with hsystem.cd(self.linked_repo_dir): + git_root = hgit.find_git_root(".") + self.assert_equal(git_root, self.repo_dir) + + +# ############################################################################# +# Test_find_git_root5 +# ############################################################################# + + +class Test_find_git_root5(hunitest.TestCase): + """ + Check that the error is raised when no .git directory is found. + + Directory structure: + arbitrary_dir/ + broken_repo/ + `-- .git (points to /nonexistent/path/to/gitdir) + """ + + @pytest.fixture(autouse=True) + def setup_teardown_test(self): + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test(self) -> None: + # `self.get_scratch_space()` does not work in the case as it creates + # a temp directory within the repo where `.git` exists by default + # (e.g. /app/helpers/test/outcomes/Test_find_git_root5.test1/tmp.scratch) + # This preventing the exception from being raised. + # We need a structure without `.git` for this test. + self.temp_dir = tempfile.TemporaryDirectory() + # Create arbitrary directory that is not a git repo. + self.arbitrary_dir = os.path.join(self.temp_dir.name, "arbitrary_dir") + hio.create_dir(self.arbitrary_dir, incremental=False) + # Create arbitrary directory that is a submodule or linked repo that + # point to non existing super repo. + self.repo_dir = os.path.join(self.temp_dir.name, "broken_repo") + hio.create_dir(self.repo_dir, incremental=False) + # Create an invalid `.git` file with a non-existent `gitdir`. + invalid_git_file = os.path.join(self.repo_dir, ".git") + txt = "gitdir: /nonexistent/path/to/gitdir" + hio.to_file(invalid_git_file, txt) + + def tear_down_test(self) -> None: + self.temp_dir.cleanup() + + def test1(self) -> None: + """ + Check that the error is raised when the caller is in a directory that + is not either a git repo or a submodule. + """ + with ( + hsystem.cd(self.arbitrary_dir), + self.assertRaises(AssertionError) as cm, + ): + _ = hgit.find_git_root(".") + actual = str(cm.exception) + expected = """ + * Failed assertion * + '/' + != + '/' + No .git directory or file found in any parent directory. + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test2(self) -> None: + """ + Check that the error is raised when the caller is in a submodule or + linked repo that points to non existing super repo. + """ + with hsystem.cd(self.repo_dir), self.assertRaises(AssertionError) as cm: + _ = hgit.find_git_root(".") + actual = str(cm.exception) + expected = """ + * Failed assertion * + '/' + != + '/' + Top-level .git directory not found. + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + +# ############################################################################# +# Test_find_git_root6 +# ############################################################################# + + +class Test_find_git_root6(hunitest.TestCase): + """ + Check that the function returns the correct git root if: + - the repo is a worktree + + Directory structure: + main_repo/ + `-- .git/ + |-- config + `-- worktrees/ + `-- csfy2/ + |-- HEAD + `-- config + csfy2/ (worktree) + `-- .git (points to /main_repo/.git/worktrees/csfy2) + """ + + def set_up_test(self) -> None: + temp_dir = self.get_scratch_space() + # Create main repo with a .git directory. + self.main_repo_dir = os.path.join(temp_dir, "main_repo") + hio.create_dir(self.main_repo_dir, incremental=False) + self.git_dir = os.path.join(self.main_repo_dir, ".git") + hio.create_dir(self.git_dir, incremental=False) + # Create worktree git metadata directory. + self.worktree_git_dir = os.path.join(self.git_dir, "worktrees", "csfy2") + hio.create_dir(self.worktree_git_dir, incremental=False) + # Create worktree directory. + self.worktree_dir = os.path.join(temp_dir, "csfy2") + hio.create_dir(self.worktree_dir, incremental=False) + # Create pointer from worktree to the git directory. + worktree_git_file = os.path.join(self.worktree_dir, ".git") + txt = f"gitdir: {self.worktree_git_dir}\n" + hio.to_file(worktree_git_file, txt) + + def test1(self) -> None: + """ + Check that the function returns the worktree root when called from a worktree. + """ + self.set_up_test() + with hsystem.cd(self.worktree_dir): + git_root = hgit.find_git_root(".") + # For worktrees, the function should return the worktree root, + # not the main repository root. + self.assert_equal(git_root, self.worktree_dir) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py new file mode 100644 index 000000000..2e4a97ca4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py @@ -0,0 +1,406 @@ +import logging +import os +import re +from typing import Any, Callable + +import helpers.hdbg as hdbg +import helpers.hintrospection as hintros +import helpers.hpickle as hpickle +import helpers.hstring as hstring +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_is_pickleable +# ############################################################################# + + +def hello() -> bool: + return False + + +# ############################################################################# +# _ClassPickleable +# ############################################################################# + + +class _ClassPickleable: + """ + Class with pickleable param values. + """ + + def __init__(self) -> None: + self._arg1 = 1 + self._arg2 = ["2", 3] + + @staticmethod + def say2(self) -> None: + print("Hello") + + def say(self) -> None: + print("Hello") + + +# ############################################################################# +# _ClassNonPickleable +# ############################################################################# + + +class _ClassNonPickleable: + """ + Class with non-pickleable param values. + """ + + def __init__(self) -> None: + self._arg1 = lambda x: x + self._arg2 = 2 + + +# ############################################################################# +# Test_is_pickleable1 +# ############################################################################# + + +class Test_is_pickleable1(hunitest.TestCase): + def helper( + self, + obj: Any, + exp_str: str, + exp_bound: bool, + exp_lambda: bool, + exp_pickled: bool, + ) -> None: + _LOG.debug("obj=%s", obj) + # + act_str = str(obj) + _LOG.debug("act_str=%s", act_str) + _LOG.debug("exp_str=%s", exp_str) + self.assert_equal(act_str, exp_str, purify_text=True) + # + act_bound = hintros.is_bound_to_object(obj) + _LOG.debug("act_bound=%s", act_bound) + _LOG.debug("exp_bound=%s", exp_bound) + self.assertEqual(act_bound, exp_bound) + # + act_lambda = hintros.is_lambda_function(obj) + _LOG.debug("act_lambda=%s", act_lambda) + _LOG.debug("exp_lambda=%s", exp_lambda) + self.assertEqual(act_lambda, exp_lambda) + # Try to pickle. + try: + file_name = os.path.join(self.get_scratch_space(), "obj.pkl") + hpickle.to_pickle(obj, file_name) + act_pickled = True + except AttributeError as e: + _LOG.error("e=%s", e) + act_pickled = False + _LOG.debug("act_pickled=%s", act_pickled) + _LOG.debug("exp_pickled=%s", exp_pickled) + self.assertEqual(act_pickled, exp_pickled) + + def test_lambda1(self) -> None: + # Local lambda. + lambda_ = lambda: 0 + func = lambda_ + exp_str = r". at 0x>" + # A lambda is not bound to an object. + exp_bound = False + exp_lambda = True + # A lambda is not pickleable. + exp_pickled = False + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_lambda2(self) -> None: + lambda_ = lambda x: x + func = lambda_ + exp_str = r". at 0x>" + # A lambda is not bound to an object. + exp_bound = False + exp_lambda = True + # A lambda is not pickleable. + exp_pickled = False + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_func1(self) -> None: + def _hello() -> bool: + return False + + # + func = _hello + exp_str = ( + r"._hello at 0x>" + ) + exp_bound = False + exp_lambda = False + # A local object is not pickleable. + exp_pickled = False + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_func2(self) -> None: + # Global function. + func = hello + exp_str = r"" + exp_bound = False + exp_lambda = False + # A global function is pickleable since it's not bound locally or + # to an object. + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_method1(self) -> None: + # A class method but unbound to an object. + func = _ClassPickleable.say + exp_str = r"" + exp_bound = False + exp_lambda = False + # A unbound class method is actually pickleable. + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_method2(self) -> None: + # A static class method. + func = _ClassPickleable.say2 + exp_str = r"" + exp_bound = False + exp_lambda = False + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_method3(self) -> None: + # A bound method. + class_instance = _ClassPickleable() + func = class_instance.say + exp_str = r">" + exp_bound = True + exp_lambda = False + # A method bound to an object is just a function, so it's pickleable. + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + def test_method4(self) -> None: + # A static class method. + class_instance = _ClassPickleable() + func = class_instance.say2 + exp_str = r"" + exp_bound = False + exp_lambda = False + exp_pickled = True + self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) + + +# ############################################################################# +# Test_is_pickleable2 +# ############################################################################# + + +class Test_is_pickleable2(hunitest.TestCase): + def helper( + self, + obj: Any, + mode: str, + expected: bool, + ) -> None: + """ + Check that picklebility is detected correctly for specified mode. + """ + _LOG.debug("obj=%s", obj) + actual = hintros.is_pickleable(obj, mode=mode) + _LOG.debug("actual=%s", actual) + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + + def test_non_callable1(self) -> None: + obj = [1, "2", 0.3] + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_non_callable2(self) -> None: + obj = [1, "2", 0.3] + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_lambda1(self) -> None: + obj = lambda x: x + mode = "type_search" + expected = False + self.helper(obj, mode, expected) + + def test_lambda2(self) -> None: + obj = lambda x: x + mode = "try_and_catch" + expected = False + self.helper(obj, mode, expected) + + def test_local_object1(self) -> None: + def _hello() -> bool: + return False + + obj = _hello + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_local_object2(self) -> None: + def _hello() -> bool: + return False + + obj = _hello + mode = "try_and_catch" + expected = False + self.helper(obj, mode, expected) + + def test_global_object1(self) -> None: + obj = hello + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_global_object2(self) -> None: + obj = hello + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_unbound_class_method1(self) -> None: + obj = _ClassPickleable.say + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_unbound_class_method2(self) -> None: + obj = _ClassPickleable.say + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_static_class_method1(self) -> None: + obj = _ClassPickleable.say + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_static_class_method2(self) -> None: + obj = _ClassPickleable.say + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_bound_to_object_method1(self) -> None: + class_instance = _ClassPickleable() + obj = class_instance.say + mode = "type_search" + expected = False + self.helper(obj, mode, expected) + + def test_bound_to_object_method2(self) -> None: + class_instance = _ClassPickleable() + obj = class_instance.say + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_pickleable_class1(self) -> None: + obj = _ClassPickleable() + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_pickleable_class2(self) -> None: + obj = _ClassPickleable() + mode = "try_and_catch" + expected = True + self.helper(obj, mode, expected) + + def test_nonpickleable_class1(self) -> None: + obj = _ClassNonPickleable() + mode = "type_search" + expected = True + self.helper(obj, mode, expected) + + def test_nonpickleable_class2(self) -> None: + obj = _ClassNonPickleable() + mode = "try_and_catch" + expected = False + self.helper(obj, mode, expected) + + +# ############################################################################# +# Test_get_function_name1 +# ############################################################################# + + +def test_function() -> None: + pass + + +# ############################################################################# +# Test_get_function_name1 +# ############################################################################# + + +class Test_get_function_name1(hunitest.TestCase): + def test1(self) -> None: + actual = hintros.get_function_name() + expected = "test1" + self.assert_equal(actual, expected, purify_text=True) + + +# ############################################################################# +# Test_get_name_from_function1 +# ############################################################################# + + +class Test_get_name_from_function1(hunitest.TestCase): + def test1(self) -> None: + actual = hintros.get_name_from_function(test_function) + actual = hstring.remove_prefix(actual, "amp.", assert_on_error=False) + expected = "helpers.test.test_hintrospection.test_function" + self.assert_equal(actual, expected, purify_text=True) + + +# ############################################################################# +# Test_get_function_from_string1 +# ############################################################################# + + +def dummy_function() -> None: + pass + + +# ############################################################################# +# Test_get_function_from_string1 +# ############################################################################# + + +class Test_get_function_from_string1(hunitest.TestCase): + def test1(self) -> None: + """ + Test that function is correctly extracted from a string. + """ + func_str = "helpers.test.test_hintrospection.dummy_function" + # Compute the actual value. + act_func = hintros.get_function_from_string(func_str) + actual = hintros.get_name_from_function(act_func) + actual = hstring.remove_prefix(actual, "amp.", assert_on_error=False) + # Compute the expected value. + exp_func = dummy_function + expected = hintros.get_name_from_function(exp_func) + expected = hstring.remove_prefix(expected, "amp.", assert_on_error=False) + # Run. + hdbg.dassert_isinstance(act_func, Callable) + # The function can have different names depending on whether `helpers` + # is a sub-repo or a super-repo: + # helpers.test.test_hintrospection.dummy_function + # helpers_root.helpers.test.test_hintrospection.dummy_function + # + actual = re.sub( + r"helpers_root\.helpers\.", "helpers.", actual, flags=re.MULTILINE + ) + expected = re.sub( + r"helpers_root\.helpers\.", "helpers.", expected, flags=re.MULTILINE + ) + self.assert_equal(actual, expected, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py new file mode 100644 index 000000000..cbf1f16f3 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py @@ -0,0 +1,225 @@ +import logging +import os + +import numpy as np +import pandas as pd + +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_rename_file_if_exists +# ############################################################################# + + +class Test_rename_file_if_exists(hunitest.TestCase): + """ + Test that the function renames existing files correctly. + """ + + def check_file( + self, + file_to_rename: str, + before_extension: bool, + expected_file_name: str, + ) -> None: + """ + Check that file is renamed correctly. + """ + # Create a target file to rename. + scratch_dir = self.get_scratch_space() + file_name = "test_file.txt" + file_path = os.path.join(scratch_dir, file_name) + lines = "" + hio.to_file(file_path, lines) + # Rename the file. + file_to_rename = os.path.join(scratch_dir, file_to_rename) + suffix = "suffix" + hio.rename_file_if_exists( + file_to_rename, suffix, before_extension=before_extension + ) + # Check that file is renamed. + expected_file_path = os.path.join(scratch_dir, expected_file_name) + self.assertTrue(os.path.exists(expected_file_path)) + + def test1(self) -> None: + """ + Test that suffix is added before an extension. + """ + file_to_rename = "test_file.txt" + before_extension = True + expected_file_name = "test_file.suffix.txt" + self.check_file(file_to_rename, before_extension, expected_file_name) + + def test2(self) -> None: + """ + Test that suffix is added after an extension. + """ + file_to_rename = "test_file.txt" + before_extension = False + expected_file_name = "test_file.txt.suffix" + self.check_file(file_to_rename, before_extension, expected_file_name) + + def test3(self) -> None: + """ + Test that non-existing file is not renamed. + """ + file_to_rename = "not_exist.txt" + before_extension = False + expected_file_name = "not_exist.txt" + with self.assertRaises(AssertionError): + self.check_file(file_to_rename, before_extension, expected_file_name) + + +# ############################################################################# +# Test_find_all_files1 +# ############################################################################# + + +class Test_find_all_files1(hunitest.TestCase): + def test1(self) -> None: + dir_name = hgit.get_client_root(super_module=False) + # Check that there are files. + pattern = "*" + only_files = True + use_relative_paths = True + all_files = hio.listdir( + dir_name, pattern, only_files, use_relative_paths + ) + self.assertGreater(len(all_files), 0) + # Check that there are more files than Python files. + exclude_paired_jupytext = False + py_files = hio.keep_python_files(all_files, exclude_paired_jupytext) + self.assertGreater(len(py_files), 0) + self.assertGreater(len(all_files), len(py_files)) + # Check that there are more Python files than not paired Python files. + exclude_paired_jupytext = True + not_paired_py_files = hio.keep_python_files( + all_files, exclude_paired_jupytext + ) + self.assertGreater(len(not_paired_py_files), 0) + self.assertGreater(len(py_files), len(not_paired_py_files)) + + +# ############################################################################# +# Test_change_filename_extension1 +# ############################################################################# + + +class Test_change_filename_extension1(hunitest.TestCase): + def test1(self) -> None: + file_name = "./core/dataflow_model/notebooks/Master_experiment_runner.py" + actual = hio.change_filename_extension(file_name, "py", "ipynb") + expected = ( + "./core/dataflow_model/notebooks/Master_experiment_runner.ipynb" + ) + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_load_df_from_json +# ############################################################################# + + +class Test_load_df_from_json(hunitest.TestCase): + def test1(self) -> None: + test_json_path = os.path.join(self.get_input_dir(), "test.json") + actual_result = hio.load_df_from_json(test_json_path) + expected_result = pd.DataFrame( + { + "col1": ["a", "b", "c", "d"], + "col2": ["a", "b", np.nan, np.nan], + "col3": ["a", "b", "c", np.nan], + } + ) + actual_result = hpandas.df_to_str(actual_result) + expected_result = hpandas.df_to_str(expected_result) + self.assertEqual(actual_result, expected_result) + + +# ############################################################################# +# Test_safe_rm_file +# ############################################################################# + + +class Test_safe_rm_file(hunitest.TestCase): + def test_successful_removal_within_git_client(self) -> None: + """ + Test successful removal of directory within Git client. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + test_dir = os.path.join(scratch_dir, "test_dir_to_remove") + os.makedirs(test_dir) + # Create a test file in the directory to ensure it has content + test_file = os.path.join(test_dir, "test_file.txt") + hio.to_file(test_file, "test content") + # Verify directory exists before removal + self.assertTrue(os.path.exists(test_dir)) + # Run test. + hio.safe_rm_file(test_dir) + # Check output. + self.assertFalse(os.path.exists(test_dir)) + + def test_removal_of_nested_directory(self) -> None: + """ + Test removal of deeply nested directory structure. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + nested_dir = os.path.join(scratch_dir, "parent", "child", "grandchild") + os.makedirs(nested_dir) + # Create files at different levels + hio.to_file(os.path.join(nested_dir, "file1.txt"), "content1") + hio.to_file( + os.path.join(os.path.dirname(nested_dir), "file2.txt"), "content2" + ) + parent_dir = os.path.join(scratch_dir, "parent") + # Verify directory exists + self.assertTrue(os.path.exists(parent_dir)) + # Run test. + hio.safe_rm_file(parent_dir) + # Check output. + self.assertFalse(os.path.exists(parent_dir)) + + def test_directory_does_not_exist(self) -> None: + """ + Test that function raises assertion error for non-existent directory. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + non_existent_dir = os.path.join(scratch_dir, "non_existent_directory") + # Ensure directory doesn't exist + self.assertFalse(os.path.exists(non_existent_dir)) + # Run test and check output. + with self.assertRaises(AssertionError) as cm: + hio.safe_rm_file(non_existent_dir) + self.assertIn("does not exist", str(cm.exception)) + + def test_cannot_delete_git_root(self) -> None: + """ + Test that function prevents deletion of Git client root directory. + """ + # Prepare inputs. + git_root = hgit.find_git_root() + # Run test and check output. + with self.assertRaises(AssertionError) as cm: + hio.safe_rm_file(git_root) + self.assertIn("Cannot delete Git client root", str(cm.exception)) + + def test_directory_outside_git_client_rejected(self) -> None: + """ + Test that function rejects directories outside Git client. + """ + # Prepare inputs. + # Use /tmp which should be outside any Git client + outside_dir = "/tmp" + # Run test and check output. + with self.assertRaises(AssertionError) as cm: + hio.safe_rm_file(outside_dir) + self.assertIn("is not within Git client root", str(cm.exception)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py new file mode 100644 index 000000000..70450e943 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py @@ -0,0 +1,665 @@ +""" +Unit tests for hlatex module. + +This module tests LaTeX text processing utilities including: +- Removing LaTeX formatting commands +- Detecting LaTeX line separators +- Framing sections with separator lines +- Detecting LaTeX comments +- Extracting section headers and their hierarchy +""" + +import logging + +import helpers.hlatex as hlatex +import helpers.hmarkdown_headers as hmarhead +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +# ############################################################################# +# Test_remove_latex_formatting1 +# ############################################################################# + + +class Test_remove_latex_formatting1(hunitest.TestCase): + """ + Test the remove_latex_formatting function. + """ + + def test1(self) -> None: + """ + Test removal of textcolor commands from LaTeX text. + """ + # Prepare inputs. + txt = r""" + - If there is \textcolor{red}{no pattern}, we can try learning: + - Measure if \textcolor{blue}{learning works}. + - In the \textcolor{orange}{worst case}, conclude that it + \textcolor{green}{does not work}. + - If we can find the \textcolor{purple}{solution in one step} or + \textcolor{cyan}{program the solution}: + - \textcolor{brown}{Machine learning} is not the \textcolor{teal}{recommended + technique}, but it still works. + - Without \textcolor{magenta}{data}, we cannot do anything: + \textcolor{violet}{data is all that matters}. + """ + txt = hprint.dedent(txt) + # Prepare outputs. + expected = r""" + - If there is no pattern, we can try learning: + - Measure if learning works. + - In the worst case, conclude that it + does not work. + - If we can find the solution in one step or + program the solution: + - Machine learning is not the recommended + technique, but it still works. + - Without data, we cannot do anything: + data is all that matters.""" + expected = hprint.dedent(expected) + # Run test. + actual = hlatex.remove_latex_formatting(txt) + # Check outputs. + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_is_latex_line_separator1 +# ############################################################################# + + +class Test_is_latex_line_separator1(hunitest.TestCase): + """ + Test the _is_latex_line_separator function. + """ + + def test1(self) -> None: + """ + Test that a line with repeated # characters is recognized as separator. + """ + # Prepare inputs. + line = "% ##########" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertTrue(actual) + + def test2(self) -> None: + """ + Test that a line with repeated = characters is recognized as separator. + """ + # Prepare inputs. + line = "% ==========" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertTrue(actual) + + def test3(self) -> None: + """ + Test that a line with repeated - characters is recognized as separator. + """ + # Prepare inputs. + line = "% ----------" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertTrue(actual) + + def test4(self) -> None: + """ + Test that a line with too few repeated characters is not a separator. + """ + # Prepare inputs. + line = "% ####" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertFalse(actual) + + def test5(self) -> None: + """ + Test that a regular comment is not recognized as separator. + """ + # Prepare inputs. + line = "% This is a regular comment" + # Run test. + actual = hlatex._is_latex_line_separator(line) + # Check outputs. + self.assertFalse(actual) + + +# ############################################################################# +# Test_frame_sections1 +# ############################################################################# + + +class Test_frame_sections1(hunitest.TestCase): + """ + Test the frame_sections function. + """ + + def helper(self, input_txt: str, expected: str) -> None: + """ + Helper method to test frame_sections function. + + :param input_txt: Input LaTeX text + :param expected: Expected output after processing + """ + # Prepare inputs. + lines = hprint.dedent(input_txt) + lines = lines.split("\n") + # Run test. + actual = hlatex.frame_sections(lines) + actual = "\n".join(actual) + # Prepare outputs. + expected = hprint.dedent(expected) + # Check outputs. + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test adding separator before a single section command. + """ + # Prepare inputs. + input_txt = r""" + \section{Introduction} + This is the introduction. + """ + # Prepare outputs. + expected = r""" + % ############################################################################## + \section{Introduction} + This is the introduction. + """ + # Run test. + self.helper(input_txt, expected) + + def test2(self) -> None: + """ + Test adding separators before section, subsection, and subsubsection. + """ + # Prepare inputs. + input_txt = r""" + \section{Proposed framework} + + \subsection{Combining Physics-Informed and Data-Driven Approaches} + + \subsubsection{Detailed Analysis} + """ + # Prepare outputs. + expected = r""" + % ############################################################################## + \section{Proposed framework} + + % ============================================================================== + \subsection{Combining Physics-Informed and Data-Driven Approaches} + + % ------------------------------------------------------------------------------ + \subsubsection{Detailed Analysis} + """ + # Run test. + self.helper(input_txt, expected) + + def test3(self) -> None: + """ + Test that existing separators are removed and replaced with correct ones. + """ + # Prepare inputs. + input_txt = r""" + % ============== + \section{Introduction} + + % ############## + \subsection{Background} + """ + # Prepare outputs. + expected = r""" + % ############################################################################## + \section{Introduction} + + % ============================================================================== + \subsection{Background} + """ + # Run test. + self.helper(input_txt, expected) + + def test4(self) -> None: + """ + Test that multiple consecutive empty lines are reduced to one. + """ + # Prepare inputs. + input_txt = r""" + \section{Introduction} + + + + This is text after multiple empty lines. + """ + # Prepare outputs. + expected = r""" + % ############################################################################## + \section{Introduction} + + This is text after multiple empty lines. + """ + # Run test. + self.helper(input_txt, expected) + + def test5(self) -> None: + """ + Test with mixed content including text, sections, and empty lines. + """ + # Prepare inputs. + input_txt = r""" + This is some introductory text. + + \section{Methods} + + We describe the methods here. + + + \subsection{Data Collection} + + Details about data collection. + + \subsubsection{Sampling Strategy} + + Sampling details here. + """ + # Prepare outputs. + expected = r""" + This is some introductory text. + + % ############################################################################## + \section{Methods} + + We describe the methods here. + + % ============================================================================== + \subsection{Data Collection} + + Details about data collection. + + % ------------------------------------------------------------------------------ + \subsubsection{Sampling Strategy} + + Sampling details here. + """ + # Run test. + self.helper(input_txt, expected) + + def test6(self) -> None: + """ + Test that lines without section commands are left unchanged. + """ + # Prepare inputs. + input_txt = r""" + This is regular text. + No sections here. + Just content. + """ + # Prepare outputs. + expected = r""" + This is regular text. + No sections here. + Just content. + """ + # Run test. + self.helper(input_txt, expected) + + +# ############################################################################# +# Test_is_latex_comment +# ############################################################################# + + +class Test_is_latex_comment(hunitest.TestCase): + """ + Test the _is_latex_comment function. + """ + + def test1(self) -> None: + """ + Test that a line starting with % is recognized as a comment. + """ + # Prepare inputs. + line = "% This is a comment" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertTrue(actual) + + def test2(self) -> None: + """ + Test that a line with leading whitespace and % is a comment. + """ + # Prepare inputs. + line = " % This is a comment" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertTrue(actual) + + def test3(self) -> None: + """ + Test that a regular line is not recognized as a comment. + """ + # Prepare inputs. + line = "This is regular text" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertFalse(actual) + + def test4(self) -> None: + """ + Test that a line with escaped % character is not a comment. + """ + # Prepare inputs. + line = r"The value is \% of the total" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertFalse(actual) + + def test5(self) -> None: + """ + Test that a line with % in the middle is not a comment. + """ + # Prepare inputs. + line = r"Text before \% and after" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertFalse(actual) + + def test6(self) -> None: + """ + Test that a line with only % is a comment. + """ + # Prepare inputs. + line = "%" + # Run test. + actual = hlatex._is_latex_comment(line) + # Check outputs. + self.assertTrue(actual) + + +# ############################################################################# +# Test_extract_latex_section +# ############################################################################# + + +class Test_extract_latex_section(hunitest.TestCase): + """ + Test the _extract_latex_section function. + """ + + def helper( + self, line: str, expected_level: int, expected_title: str + ) -> None: + """ + Helper method to test extraction of LaTeX section commands. + + :param line: LaTeX line to parse + :param expected_level: Expected section level (0 if no section) + :param expected_title: Expected title (empty string if no section) + """ + # Prepare inputs - line_number is arbitrary for testing. + line_number = 1 + # Run test. + header_info = hlatex._extract_latex_section(line, line_number) + # Check outputs. + if expected_level == 0: + # No section expected. + self.assertIsNone(header_info) + else: + # Section expected. + self.assertIsNotNone(header_info) + self.assert_equal(str(header_info.level), str(expected_level)) + self.assert_equal(header_info.description, expected_title) + + def test1(self) -> None: + """ + Test extraction of basic section command. + """ + line = r"\section{Introduction}" + self.helper(line, 1, "Introduction") + + def test2(self) -> None: + """ + Test extraction of basic subsection command. + """ + line = r"\subsection{Background}" + self.helper(line, 2, "Background") + + def test3(self) -> None: + """ + Test extraction of basic subsubsection command. + """ + line = r"\subsubsection{Details}" + self.helper(line, 3, "Details") + + def test4(self) -> None: + """ + Test extraction of section with nested LaTeX commands. + """ + line = r"\section{Introduction to \textbf{Machine Learning}}" + self.helper(line, 1, r"Introduction to \textbf{Machine Learning}") + + def test5(self) -> None: + """ + Test extraction of section with optional short title. + """ + line = r"\section[Short Title]{Long Title for Table of Contents}" + # Should extract the long title (in curly braces). + self.helper(line, 1, "Long Title for Table of Contents") + + def test6(self) -> None: + """ + Test extraction of section with escaped special characters. + """ + line = r"\section{Cost Analysis: \$100 \& More}" + self.helper(line, 1, r"Cost Analysis: \$100 \& More") + + def test7(self) -> None: + """ + Test extraction of section with leading whitespace. + """ + line = r" \section{Methods}" + self.helper(line, 1, "Methods") + + def test8(self) -> None: + """ + Test that a regular line is not recognized as a section. + """ + line = "This is regular text" + self.helper(line, 0, "") + + def test9(self) -> None: + """ + Test that section with empty title is not extracted. + """ + line = r"\section{}" + # Sections with empty titles should not be extracted. + self.helper(line, 0, "") + + +# ############################################################################# +# Test_extract_headers_from_latex +# ############################################################################# + + +class Test_extract_headers_from_latex(hunitest.TestCase): + """ + Test the extract_headers_from_latex function. + """ + + def helper(self, lines: str, expected: str, *, max_level: int = 3) -> None: + """ + Helper method to test header extraction from LaTeX documents. + + :param lines: LaTeX document content as a string + :param expected: Expected string representation of header list + :param max_level: Maximum header level to extract (default: 3) + """ + # Prepare inputs. + lines_list = hprint.dedent(lines).split("\n") + # Run test. + actual = hlatex.extract_headers_from_latex( + lines_list, max_level, sanity_check=False + ) + actual_str = hmarhead.header_list_to_str(actual) + # Prepare outputs. + expected = hprint.dedent(expected) + # Check outputs. + self.assert_equal(actual_str, expected) + + def test1(self) -> None: + """ + Test extraction from a basic LaTeX document with multiple section levels. + """ + # Prepare inputs. + lines = r""" + \section{Introduction} + This is the introduction. + + \subsection{Background} + Background information here. + + \section{Methods} + Methods description. + """ + # Prepare outputs. + expected = """ + HeaderInfo(1, 'Introduction', 1) + HeaderInfo(2, 'Background', 4) + HeaderInfo(1, 'Methods', 7)""" + # Run test. + self.helper(lines, expected) + + def test2(self) -> None: + """ + Test that commented-out sections are skipped. + """ + # Prepare inputs. + lines = r""" + \section{Introduction} + % \section{Old Section} + \subsection{Current Subsection} + % \subsection{Old Subsection} + """ + # Prepare outputs. + expected = """ + HeaderInfo(1, 'Introduction', 1) + HeaderInfo(2, 'Current Subsection', 3)""" + # Run test. + self.helper(lines, expected) + + def test3(self) -> None: + """ + Test that only headers up to max_level are extracted. + """ + # Prepare inputs. + lines = r""" + \section{Chapter 1} + \subsection{Section 1.1} + \subsubsection{Section 1.1.1} + """ + # Prepare outputs. + # Should only get section and subsection, not subsubsection. + expected = """ + HeaderInfo(1, 'Chapter 1', 1) + HeaderInfo(2, 'Section 1.1', 2)""" + # Run test. + self.helper(lines, expected, max_level=2) + + def test4(self) -> None: + """ + Test extraction with nested LaTeX commands in titles. + """ + # Prepare inputs. + lines = r""" + \section{Introduction to \textbf{ML}} + \subsection{Using \emph{Neural Networks}} + """ + # Prepare outputs. + expected = r""" + HeaderInfo(1, 'Introduction to \textbf{ML}', 1) + HeaderInfo(2, 'Using \emph{Neural Networks}', 2)""" + # Run test. + self.helper(lines, expected) + + def test5(self) -> None: + """ + Test that line numbers are correctly recorded. + """ + # Prepare inputs. + lines = r""" + Some text here. + + \section{First Section} + More text. + + \subsection{First Subsection} + Even more text. + """ + # Prepare outputs. + # Line numbers should be 3 and 6 (1-indexed). + expected = """ + HeaderInfo(1, 'First Section', 3) + HeaderInfo(2, 'First Subsection', 6)""" + # Run test. + self.helper(lines, expected) + + def test6(self) -> None: + """ + Test extraction from document with no sections. + """ + # Prepare inputs. + lines = """ + This is just regular text. + No sections here. + """ + # Prepare outputs. + expected = "" + # Run test. + self.helper(lines, expected) + + def test7(self) -> None: + """ + Test extraction with all three section levels. + """ + # Prepare inputs. + lines = r""" + \section{Chapter 1} + Introduction to chapter. + + \subsection{Section 1.1} + Section content. + + \subsubsection{Subsection 1.1.1} + Detailed content. + + \subsection{Section 1.2} + More content. + + \section{Chapter 2} + Second chapter. + """ + # Prepare outputs. + expected = """ + HeaderInfo(1, 'Chapter 1', 1) + HeaderInfo(2, 'Section 1.1', 4) + HeaderInfo(3, 'Subsection 1.1.1', 7) + HeaderInfo(2, 'Section 1.2', 10) + HeaderInfo(1, 'Chapter 2', 13)""" + # Run test. + self.helper(lines, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py new file mode 100644 index 000000000..f8d9b237d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py @@ -0,0 +1,176 @@ +import logging +from typing import List, Optional + +import helpers.hlist as hlist +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_list_find_duplicates1 +# ############################################################################# + + +class Test_list_find_duplicates1(hunitest.TestCase): + def test1(self) -> None: + list_ = "a b c d".split() + list_out = hlist.find_duplicates(list_) + self.assertEqual(list_out, []) + + def test2(self) -> None: + list_ = "a b c a d e f f".split() + list_out = hlist.find_duplicates(list_) + self.assertEqual(set(list_out), set("a f".split())) + + +# ############################################################################# +# Test_list_remove_duplicates1 +# ############################################################################# + + +class Test_list_remove_duplicates1(hunitest.TestCase): + def test1(self) -> None: + list_ = "a b c d".split() + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "a b c d".split()) + + def test2(self) -> None: + list_ = "a b c a d e f f".split() + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "a b c d e f".split()) + + def test3(self) -> None: + list_ = "a b c a d e f f".split() + list_ = list(reversed(list_)) + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "f e d a c b".split()) + + +# ############################################################################# +# Test_list_extract1 +# ############################################################################# + + +class Test_list_extract1(hunitest.TestCase): + def _helper( + self, + start_idx: Optional[int], + end_idx: Optional[int], + expected_list: List[str], + ) -> None: + list_ = "a b c d".split() + actual_list = hlist.extract(list_, start_idx, end_idx) + self.assertEqual(actual_list, expected_list) + + def test1(self) -> None: + start_idx = 0 + end_idx = 1 + expected_list = "a".split() + self._helper(start_idx, end_idx, expected_list) + + def test2(self) -> None: + start_idx = 1 + end_idx = None + expected_list = "b c d".split() + self._helper(start_idx, end_idx, expected_list) + + def test3(self) -> None: + start_idx = None + end_idx = None + expected_list = "a b c d".split() + self._helper(start_idx, end_idx, expected_list) + + def test4(self) -> None: + start_idx = None + end_idx = 2 + expected_list = "a b".split() + self._helper(start_idx, end_idx, expected_list) + + def test5(self) -> None: + start_idx = None + end_idx = 2 + expected_list = "a b".split() + self._helper(start_idx, end_idx, expected_list) + + def test6(self) -> None: + start_idx = 0 + end_idx = 4 + expected_list = "a b c d".split() + self._helper(start_idx, end_idx, expected_list) + + def test7(self) -> None: + start_idx = 0 + end_idx = 3 + expected_list = "a b c".split() + self._helper(start_idx, end_idx, expected_list) + + +# ############################################################################# +# Test_list_chunk1 +# ############################################################################# + + +class Test_list_chunk1(hunitest.TestCase): + def _helper(self, n: int, expected_list: List[List[str]]) -> None: + list_ = "a b c d e f".split() + actual_list = hlist.chunk(list_, n) + self.assertEqual(actual_list, expected_list) + + def test1(self) -> None: + n = 1 + expected_list = ["a b c d e f".split()] + self._helper(n, expected_list) + + def test2(self) -> None: + n = 2 + expected_list = [["a", "b", "c"], ["d", "e", "f"]] + self._helper(n, expected_list) + + def test3(self) -> None: + n = 3 + expected_list = [["a", "b"], ["c", "d"], ["e", "f"]] + self._helper(n, expected_list) + + def test4(self) -> None: + n = 4 + expected_list = [["a", "b"], ["c", "d"], ["e"], ["f"]] + self._helper(n, expected_list) + + def test5(self) -> None: + n = 6 + expected_list = [["a"], ["b"], ["c"], ["d"], ["e"], ["f"]] + self._helper(n, expected_list) + + +# ############################################################################# +# Test_list1 +# ############################################################################# + + +class Test_list1(hunitest.TestCase): + def test_find_duplicates1(self) -> None: + list_ = "a b c d".split() + list_out = hlist.find_duplicates(list_) + self.assertEqual(list_out, []) + + def test_find_duplicates2(self) -> None: + list_ = "a b c a d e f f".split() + list_out = hlist.find_duplicates(list_) + self.assertEqual(set(list_out), set("a f".split())) + + def test_remove_duplicates1(self) -> None: + list_ = "a b c d".split() + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "a b c d".split()) + + def test_remove_duplicates2(self) -> None: + list_ = "a b c a d e f f".split() + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "a b c d e f".split()) + + def test_remove_duplicates3(self) -> None: + list_ = "a b c a d e f f".split() + list_ = list(reversed(list_)) + list_out = hlist.remove_duplicates(list_) + self.assertEqual(list_out, "f e d a c b".split()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py new file mode 100644 index 000000000..820d21519 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py @@ -0,0 +1,361 @@ +import os +import types +import unittest.mock as umock +from typing import Any, Dict + +import pandas as pd +import pytest + +pytest.importorskip("openai") # noqa: E402 # pylint: disable=wrong-import-position +import helpers.hdbg as hdbg # noqa: E402 +import helpers.hllm as hllm # noqa: E402 +import helpers.hunit_test as hunitest # noqa: E402 + +_USER_PROMPT1 = "what is machine learning?" +_USER_PROMPT2 = _USER_PROMPT1.upper() + +_SYSTEM_PROMPT1 = "You are a helpful AI assistant." +_SYSTEM_PROMPT2 = ( + "You are a helpful AI assistant and excellent in explaining things." +) + +_TEMPERATURE1 = 0.1 +_TEMPERATURE2 = 0.2 + +_TOP_P1 = 0.5 + +_MODEL1 = "gpt-4o-mini" +_MODEL2 = "gpt-3.5-turbo" +_MODEL3 = "deepseek/deepseek-r1-0528-qwen3-8b:free" +_MODEL4 = "openai/gpt-4o-mini" + + +# Test functions for the unit tests. +def _get_completion_parameters1() -> Dict[str, Any]: + data = { + "user_prompt": _USER_PROMPT1, + "system_prompt": _SYSTEM_PROMPT1, + "temperature": _TEMPERATURE1, + "model": _MODEL1, + } + return data + + +def _get_completion_parameters2() -> Dict[str, Any]: + data = { + "user_prompt": _USER_PROMPT2, + "system_prompt": _SYSTEM_PROMPT2, + "temperature": _TEMPERATURE2, + "model": _MODEL2, + "top_p": _TOP_P1, + } + return data + + +def _get_completion_parameters3() -> Dict[str, Any]: + data = { + "user_prompt": _USER_PROMPT2, + "system_prompt": _SYSTEM_PROMPT2, + "temperature": _TEMPERATURE2, + "model": _MODEL3, + "top_p": _TOP_P1, + } + return data + + +def _get_completion_parameters4() -> Dict[str, Any]: + data = { + "user_prompt": _USER_PROMPT1, + "system_prompt": _SYSTEM_PROMPT1, + "temperature": _TEMPERATURE1, + "model": _MODEL4, + } + return data + + +# ############################################################################# +# Test_get_completion +# ############################################################################# + + +class Test_get_completion(hunitest.TestCase): + def test1(self) -> None: + """ + Verify that get_completion() returns response from cache with the + expected response. + """ + parameters1 = _get_completion_parameters1() + actual_response = hllm.get_completion( + **parameters1, cache_mode="HIT_CACHE_OR_ABORT" + ) + self.assertIsInstance(actual_response, str) + self.check_string(actual_response) + + def test2(self) -> None: + """ + Verify with different openai models. + """ + parameters2 = _get_completion_parameters2() + actual_response = hllm.get_completion( + **parameters2, cache_mode="HIT_CACHE_OR_ABORT" + ) + self.assertIsInstance(actual_response, str) + self.check_string(actual_response) + + def test3(self) -> None: + """ + Verify if hllm.get_completion() support openrouter models. + """ + parameters3 = _get_completion_parameters3() + actual_response = hllm.get_completion( + **parameters3, cache_mode="HIT_CACHE_OR_ABORT" + ) + self.assertIsInstance(actual_response, str) + self.check_string(actual_response) + + def test4(self) -> None: + """ + Verify with OpenAI-prefixed models. + """ + parameters4 = _get_completion_parameters4() + actual_response = hllm.get_completion( + **parameters4, cache_mode="HIT_CACHE_OR_ABORT" + ) + self.assertIsInstance(actual_response, str) + self.check_string(actual_response) + + +# ############################################################################# +# Test_response_to_txt +# ############################################################################# + + +class Test_response_to_txt(hunitest.TestCase): + # Dummy classes to satisfy `isinstance` checks. + + class DummyChatCompletion: + def __init__(self, text: str = "") -> None: + msg = types.SimpleNamespace(content=text) + choice = types.SimpleNamespace(message=msg) + self.choices = [choice] + + class DummyThreadMessage: + def __init__(self, text: str = "") -> None: + # mimic .content[0].text.value + value_obj = types.SimpleNamespace(value=text) + text_obj = types.SimpleNamespace(text=value_obj) + self.content = [text_obj] + + @umock.patch( + "openai.types.chat.chat_completion.ChatCompletion", + new=DummyChatCompletion, + ) + def test_chat_completion_branch(self) -> None: + resp = Test_response_to_txt.DummyChatCompletion("hello chat") + actual = hllm.response_to_txt(resp) + expected = "hello chat" + self.assert_equal(actual, expected) + + @umock.patch( + "openai.types.beta.threads.message.Message", + new=DummyThreadMessage, + ) + def test_thread_message_branch(self) -> None: + resp = Test_response_to_txt.DummyThreadMessage("thread reply") + actual = hllm.response_to_txt(resp) + expected = "thread reply" + self.assert_equal(actual, expected) + + def test_str_pass_through(self) -> None: + actual = hllm.response_to_txt("just a string") + expected = "just a string" + self.assert_equal(actual, expected) + + def test_unknown_type_raises(self) -> None: + with self.assertRaises(ValueError) as cm: + hllm.response_to_txt(12345) + self.assertIn("Unknown response type", str(cm.exception)) + + +# ############################################################################# +# Test_retrieve_openrouter_model_info +# ############################################################################# + + +class Test_retrieve_openrouter_model_info(hunitest.TestCase): + @umock.patch("requests.get") + def test_retrieve_success(self, mock_get) -> None: + # Prepare dummy JSON data. + data = [ + {"id": "model1", "name": "Model One"}, + {"id": "model2", "name": "Model Two"}, + ] + mock_response = umock.Mock() + mock_response.json.return_value = {"data": data} + mock_get.return_value = mock_response + # Call the function under test. + df = hllm._retrieve_openrouter_model_info() + # Build expected DataFrame. + expected_df = pd.DataFrame(data) + # Verify DataFrame content. + self.assertEqual( + df.to_dict(orient="records"), expected_df.to_dict(orient="records") + ) + # Ensure the correct URL was requested. + mock_get.assert_called_once_with("https://openrouter.ai/api/v1/models") + + @umock.patch("requests.get") + def test_missing_data_key_raises(self, mock_get) -> None: + # JSON missing the 'data' key. + mock_response = umock.Mock() + mock_response.json.return_value = {"wrong": []} + mock_get.return_value = mock_response + # Expect an assertion from hdbg.dassert_eq. + with self.assertRaises(AssertionError): + hllm._retrieve_openrouter_model_info() + + +# ############################################################################# +# Test_save_models_info_to_csv +# ############################################################################# + + +class Test_save_models_info_to_csv(hunitest.TestCase): + def get_temp_path(self) -> str: + """ + Helper function for creating temporary directory. + """ + self.tmp_dir = self.get_scratch_space() + tmp_file_name = "tmp.models_info.csv" + self.tmp_path = os.path.join(self.tmp_dir, tmp_file_name) + return self.tmp_path + + def test_save_models_info(self) -> None: + """ + Save Dataframe as a CSV and check. + """ + # Prepare a DataFrame with extra columns. + data = [ + { + "id": "m1", + "name": "Model1", + "description": "desc1", + "pricing": {"prompt": 0.1, "completion": 0.2}, + "supported_parameters": ["a", "b"], + "extra_col": 123, + }, + { + "id": "m2", + "name": "Model2", + "description": "desc2", + "pricing": {"prompt": 0.3, "completion": 0.4}, + "supported_parameters": ["c"], + "extra_col": 456, + }, + ] + df = pd.DataFrame(data) + output_file: str = self.get_temp_path() + # Call the function under test. + returned_df = hllm._save_models_info_to_csv(df, output_file) + # The returned DataFrame should have only the selected columns. + expected_columns = [ + "id", + "name", + "description", + "prompt_pricing", + "completion_pricing", + "supported_parameters", + ] + hdbg.dassert_eq(list(returned_df.columns), expected_columns) + # Verify pricing values are extracted correctly. + self.assert_equal( + str(returned_df["prompt_pricing"]), + str(pd.Series([0.1, 0.3], name="prompt_pricing", dtype=float)), + ) + self.assert_equal( + str(returned_df["completion_pricing"]), + str(pd.Series([0.2, 0.4], name="completion_pricing", dtype=float)), + ) + # File should be created and readable. + hdbg.dassert_file_exists(output_file) + saved_df = pd.read_csv(output_file) + self.assert_equal( + str(returned_df["completion_pricing"]), + str(saved_df["completion_pricing"]), + ) + self.assert_equal( + str(returned_df["prompt_pricing"]), str(saved_df["prompt_pricing"]) + ) + + +# ############################################################################# +# Test_calculate_cost +# ############################################################################# + + +class Test_calculate_cost(hunitest.TestCase): + def get_tmp_path(self) -> str: + """ + Return temporary file path. + """ + self.tmp_dir = self.get_scratch_space() + tmp_file_name: str = "tmp.models_info.csv" + self.tmp_path = os.path.join(self.tmp_dir, tmp_file_name) + return self.tmp_path + + def test_openai_cost(self) -> None: + """ + Known OpenAI model and token counts produce expected cost. + """ + comp = types.SimpleNamespace( + usage=types.SimpleNamespace( + prompt_tokens=1000000, completion_tokens=2000000 + ) + ) + llm_cost_tracker = hllm.LLMCostTracker() + cost = llm_cost_tracker.calculate_cost( + comp, model="gpt-3.5-turbo", models_info_file="" + ) + # 1000000*(0.5/1000000) + 20000000*(1.5/1000000) = 3.5 + self.assertAlmostEqual(cost, 3.5) + + def test_openai_unknown_model(self) -> None: + """ + Passing an unknown OpenAI model should raise an assertion or + ValueError. + """ + comp = types.SimpleNamespace( + usage=types.SimpleNamespace(prompt_tokens=1, completion_tokens=1) + ) + llm_cost_tracker = hllm.LLMCostTracker() + with pytest.raises(AssertionError): + llm_cost_tracker.calculate_cost( + comp, model="nonexistent-model", models_info_file="" + ) + + def test_openrouter_load_existing_csv(self) -> None: + """ + Assume that the CSV file exists for OpenRouter. + + Then we should load CSV and calculate cost without fetching. + """ + # Write a tiny CSV: id,prompt_pricing,completion_pricing + temp_csv_file = self.get_tmp_path() + pd.DataFrame( + { + "id": ["deepseek/m1"], + "prompt_pricing": [0.1], + "completion_pricing": [0.2], + } + ).to_csv(temp_csv_file, index=False) + comp = types.SimpleNamespace( + usage=types.SimpleNamespace(prompt_tokens=1, completion_tokens=1) + ) + llm_cost_tracker = hllm.LLMCostTracker() + cost = llm_cost_tracker.calculate_cost( + comp, + model="deepseek/m1", + models_info_file=temp_csv_file, + ) + # 1*0.1 + 1*0.2 = 0.1 + 0.2 = 0.3 + self.assertAlmostEqual(cost, 0.3) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py new file mode 100644 index 000000000..fc684420b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py @@ -0,0 +1,1403 @@ +import logging +import os +import time +from typing import Callable, Dict, Optional + +import pandas as pd +import pytest + +import helpers.hcache_simple as hcacsimp +import helpers.hio as hio +import helpers.hllm_cli as hllmcli +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +from helpers.test.test_hcache_simple import _BaseCacheTest + +_LOG = logging.getLogger(__name__) + +# Disable calling LLM when testing. +_RUN_REAL_LLM = False +# _RUN_REAL_LLM = True + +# ############################################################################# +# Test_apply_llm_with_files +# ############################################################################# + +# Test cases shared across both library and executable tests. +# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. +_TEST_CASES = [ + # llm_cli.py --input_file input.txt --output_file output.txt + ( + "Basic usage with input file", + {}, + ), + # llm_cli.py --input_file input.txt --output_file output.txt --system_prompt "You are a helpful math assistant. Solve the problem step by step." + ( + "With custom system prompt", + { + "system_prompt": "You are a helpful math assistant. Solve the problem step by step." + }, + ), + # llm_cli.py --input_file input.txt --output_file output.txt --model gpt-4 + ( + "With specific model selection", + {"model": "gpt-4"}, + ), + # llm_cli.py --input_file input.txt --output_file output.txt --expected_num_chars 500 + ( + "With progress bar (expected character count)", + {"expected_num_chars": 500}, + ), + # llm_cli.py --input_file input.txt --output_file output.txt --system_prompt "You are a helpful assistant that provides concise answers" --model gpt-4o-mini --expected_num_chars 1000 + ( + "Complete example with all options", + { + "system_prompt": "You are a helpful assistant that provides concise answers", + "model": "gpt-4o-mini", + "expected_num_chars": 1000, + }, + ), +] + +# Test cases for input_text functionality. +# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. +_TEST_CASES_INPUT_TEXT = [ + # llm_cli.py --input_text "2+2=" --output_file output.txt + ( + "Basic usage with input text", + { + "input_text": "2+2=", + }, + ), + # llm_cli.py --input_text "What is Python?" --output_file output.txt --system_prompt "You are a helpful assistant" + ( + "With input text and system prompt", + { + "input_text": "What is Python?", + "system_prompt": "You are a helpful assistant", + }, + ), + # llm_cli.py --input_text "Explain recursion" --output_file output.txt --model gpt-4o-mini + ( + "With input text and specific model", + { + "input_text": "Explain recursion", + "model": "gpt-4o-mini", + }, + ), +] + +# Test cases for print_only functionality. +# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. +_TEST_CASES_PRINT_ONLY = [ + # llm_cli.py --input_text "2+2=" --output_file - + ( + "Print to screen with input text", + { + "input_text": "2+2=", + "print_only": True, + }, + ), +] + + +# ############################################################################# +# TestApplyLlmBase +# ############################################################################# + + +class TestApplyLlmBase(_BaseCacheTest): + """ + Base class with helper methods for testing apply_llm functions. + + Provides common helper methods used across different test classes to + reduce code duplication and maintain consistency. + """ + + def _run_test_cases(self, use_llm_executable: bool) -> None: + """ + Helper method to run test cases with specified interface. + + :param use_llm_executable: if True, use CLI executable; if False, use library + """ + # Get scratch space for test files. + scratch_dir = self.get_scratch_space() + # Create input file. + input_file = os.path.join(scratch_dir, "input.txt") + hio.to_file(input_file, "2+2=") + # Run each test case. + for idx, (description, kwargs) in enumerate(_TEST_CASES, 1): + _LOG.info("Running test case %d: %s", idx, description) + output_file = os.path.join(scratch_dir, f"output_{idx}.txt") + # Run test. + hllmcli.apply_llm_with_files( + input_file=input_file, + output_file=output_file, + use_llm_executable=use_llm_executable, + **kwargs, + ) + # Check that output file was created. + self.assertTrue(os.path.exists(output_file)) + # Check that output file is not empty. + output_content = hio.from_file(output_file) + self.assertGreater(len(output_content), 0) + + def _run_test_cases_input_text(self, use_llm_executable: bool) -> None: + """ + Helper method to run input_text test cases with specified interface. + + :param use_llm_executable: if True, use CLI executable; if False, use library + """ + # Get scratch space for test files. + scratch_dir = self.get_scratch_space() + # Run each test case. + for idx, (description, kwargs) in enumerate(_TEST_CASES_INPUT_TEXT, 1): + _LOG.info("Running test case %d: %s", idx, description) + output_file = os.path.join(scratch_dir, f"output_text_{idx}.txt") + # Extract input_text from kwargs. + kwargs_copy = kwargs.copy() + input_text = kwargs_copy.pop("input_text") + # Run test using apply_llm directly. + response = hllmcli.apply_llm( + input_text, + use_llm_executable=use_llm_executable, + **kwargs_copy, + ) + # Write output to file. + hio.to_file(output_file, response) + # Check that output file was created. + self.assertTrue(os.path.exists(output_file)) + # Check that output file is not empty. + output_content = hio.from_file(output_file) + self.assertGreater(len(output_content), 0) + + +# ############################################################################# +# Test_apply_llm_with_files1 +# ############################################################################# + + +@pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", +) +class Test_apply_llm_with_files1(TestApplyLlmBase): + """ + Test apply_llm_with_files using both library and executable interfaces. + + Tests run various command-line configurations to ensure they execute + without errors. Does not verify output correctness. + """ + + def test_library(self) -> None: + """ + Test multiple command-line configurations using library interface. + + Tests various command-line argument combinations to ensure they + execute without errors. Does not verify output correctness. + """ + self._run_test_cases(use_llm_executable=False) + + @pytest.mark.skipif( + not hllmcli._check_llm_executable(), reason="llm executable not found" + ) + def test_executable(self) -> None: + """ + Test multiple command-line configurations using executable interface. + + Tests various command-line argument combinations to ensure they + execute without errors. Does not verify output correctness. + """ + self._run_test_cases(use_llm_executable=True) + + +# ############################################################################# +# Test_apply_llm_with_files2 +# ############################################################################# + + +@pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", +) +class Test_apply_llm_with_files2(TestApplyLlmBase): + def test1_library(self) -> None: + """ + Test input_text parameter using library interface. + + Tests that input_text parameter works correctly when text is provided + directly instead of from a file. Does not verify output correctness. + """ + self._run_test_cases_input_text(use_llm_executable=False) + + @pytest.mark.skipif( + not hllmcli._check_llm_executable(), reason="llm executable not found" + ) + def test1_executable(self) -> None: + """ + Test input_text parameter using executable interface. + + Tests that input_text parameter works correctly when text is provided + directly instead of from a file. Does not verify output correctness. + """ + self._run_test_cases_input_text(use_llm_executable=True) + + # ////////////////////////////////////////////////////////////////////////// + + def _run_test_cases_print_only(self, use_llm_executable: bool) -> None: + """ + Helper method to run print_only test cases with specified interface. + + :param use_llm_executable: if True, use CLI executable; if False, use library + """ + # Run each test case. + for idx, (description, kwargs) in enumerate(_TEST_CASES_PRINT_ONLY, 1): + _LOG.info("Running test case %d: %s", idx, description) + # Extract parameters from kwargs. + kwargs_copy = kwargs.copy() + input_text = kwargs_copy.pop("input_text") + kwargs_copy.pop("print_only") # Not needed for apply_llm + # Run test using apply_llm directly - this should print to stdout. + response = hllmcli.apply_llm( + input_text, + use_llm_executable=use_llm_executable, + **kwargs_copy, + ) + # Print response to stdout (simulating print_only behavior). + print(response) + + def test2_library(self) -> None: + """ + Test print_only parameter using library interface. + + Tests that print_only parameter works correctly when output should be + printed to screen instead of written to file. Does not verify output + correctness. + """ + self._run_test_cases_print_only(use_llm_executable=False) + + @pytest.mark.skipif( + not hllmcli._check_llm_executable(), reason="llm executable not found" + ) + def test2_executable(self) -> None: + """ + Test print_only parameter using executable interface. + + Tests that print_only parameter works correctly when output should be + printed to screen instead of written to file. Does not verify output + correctness. + """ + self._run_test_cases_print_only(use_llm_executable=True) + + +# ############################################################################# +# Test_llm1 +# ############################################################################# + + +@pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", +) +class Test_llm1(hunitest.TestCase): + """ + Test _llm() function with different models and prompt lengths. + + Tests verify that _llm() correctly processes prompts of varying lengths + across different models, and tracks timing and cost information. + """ + + @staticmethod + def get_short_prompt() -> str: + """ + Get a short test prompt. + + :return: short system prompt string + """ + prompt = "You are a helpful assistant. Answer concisely." + return prompt + + @staticmethod + def get_medium_prompt() -> str: + """ + Get a medium-length test prompt. + + :return: medium-length system prompt string + """ + prompt = """ + You are a helpful assistant. Your task is to provide clear and + accurate answers to questions. Always be concise but thorough in + your explanations. If you don't know something, acknowledge it. + Use simple language that anyone can understand. + """ + prompt = hprint.dedent(prompt) + return prompt + + @staticmethod + def get_long_prompt() -> str: + """ + Get a long test prompt. + + :return: long system prompt string + """ + prompt = """ + You are a highly knowledgeable AI assistant with expertise across + multiple domains including technology, science, mathematics, and + general knowledge. Your primary objectives are: + + 1. Provide accurate and well-researched information + 2. Explain concepts clearly and thoroughly + 3. Use examples when they help clarify complex topics + 4. Cite sources or acknowledge uncertainty when appropriate + 5. Adapt your language to the user's level of understanding + 6. Break down complex problems into manageable steps + 7. Verify calculations and logical reasoning before responding + 8. Consider multiple perspectives when discussing controversial topics + + When answering questions: + - Start with a direct answer to the question + - Follow with supporting details and context + - Use bullet points or numbered lists for clarity + - Provide examples when helpful + - Suggest follow-up resources if relevant + + Always maintain a professional, helpful, and respectful tone. + """ + prompt = hprint.dedent(prompt) + return prompt + + def test1(self) -> None: + """ + Test _llm() with multiple models and prompt lengths. + + Tests short, medium, and long prompts across different models to + verify proper handling and cost calculation. Reports results in a + comprehensive table with time, cost, and cost-per-character metrics. + """ + hcacsimp.set_cache_property("_test_llm", "mode", "DISABLE_CACHE") + # Define test configurations with model-specific inputs. + # Questions are designed to elicit longer responses for more accurate cost + # comparisons. + test_configs = [ + ( + "gpt-5-nano", + "Explain the concept of machine learning and provide examples of its applications in real-world scenarios.", + ), + ( + "gpt-4o-mini", + "Describe the history and culture of Paris, France, including its major landmarks and contributions to art and literature.", + ), + ( + "gpt-4o", + "Explain what recursion is in computer science, provide multiple examples with code, and discuss when to use recursion versus iteration.", + ), + ] + # Store results for tabular reporting. + results = [] + # Run tests for each model and prompt type combination. + for model, input_str in test_configs: + for prompt_type, prompt_getter in [ + ("short", self.get_short_prompt), + ("medium", self.get_medium_prompt), + ("long", self.get_long_prompt), + ]: + _LOG.info("Testing model=%s with %s prompt", model, prompt_type) + system_prompt = prompt_getter() + # Run test. + start_time = time.time() + response, cost = hllmcli._llm(system_prompt, input_str, model) + elapsed_time = time.time() - start_time + # Check outputs. + self.assertIsInstance(response, str) + self.assertGreater(len(response), 0) + self.assertIsInstance(cost, float) + self.assertGreaterEqual(cost, 0.0) + # Calculate cost per character and cost per 1M characters. + response_len = len(response) + cost_per_char = cost / response_len if response_len > 0 else 0.0 + cost_per_1m_chars = ( + cost_per_char * 1_000_000 if response_len > 0 else 0.0 + ) + # Store results. + results.append( + { + "Model": model, + "Prompt Type": prompt_type, + "Time (s)": elapsed_time, + "Cost ($)": cost, + "Response Length": response_len, + "Cost/Char ($)": cost_per_char, + "Cost/1M Chars ($)": cost_per_1m_chars, + } + ) + # Create DataFrame for tabular display. + results_df = pd.DataFrame(results) + # Format numeric columns. + results_df["Time (s)"] = results_df["Time (s)"].round(2) + results_df["Cost ($)"] = results_df["Cost ($)"].round(6) + results_df["Cost/Char ($)"] = results_df["Cost/Char ($)"].round(8) + results_df["Cost/1M Chars ($)"] = results_df["Cost/1M Chars ($)"].round( + 2 + ) + # Log results table. + _LOG.info("\n%s", hprint.frame("LLM Test Results")) + with pd.option_context( + "display.max_columns", + None, + "display.max_rows", + None, + "display.width", + None, + "display.max_colwidth", + None, + ): + _LOG.info("\n%s", results_df.to_string(index=False)) + + +# ############################################################################# +# Test_apply_llm_batch1 +# ############################################################################# + + +def _eval_functor(input_str: str, *, delay: float = 0.0) -> str: + """ + Evaluate the input string using eval and return the result as a string. + + :param input_str: mathematical expression to evaluate + :return: result of evaluation as a string + """ + _LOG.debug("input_str='%s'", input_str) + if delay > 0.0: + time.sleep(delay) + result = eval(input_str) + result_str = str(result) + _LOG.debug("-> result_str='%s'", result_str) + return result_str + + +# ############################################################################# +# Test_apply_llm_batch1 +# ############################################################################# + + +class Test_apply_llm_batch1(hunitest.TestCase): + """ + Test and compare three batch processing approaches. + + Tests: + - apply_llm_batch_individual() + - apply_llm_batch_with_shared_prompt() + - apply_llm_batch_combined() + to verify they return consistent results using a testing functor that uses + eval. + """ + + @staticmethod + def get_test_prompt() -> str: + """ + Get a simple test prompt for batch processing. + + :return: system prompt string + """ + prompt = "You are a calculator. Return only the numeric result." + return prompt + + def helper( + self, + model: str, + func: Callable, + testing_functor: Optional[Callable[[str], str]], + ) -> None: + """ + Helper function to run a batch processing function with test inputs. + + :param func: batch processing function to test + :param testing_functor: optional testing functor for mocking + """ + _LOG.trace(hprint.to_str("model func testing_functor")) + # Create test inputs. + prompt = self.get_test_prompt() + input_list = ["2 + 2", "3 * 3", "10 - 5", "20 / 4"] + expected_responses = ["4", "9", "5", "5"] + # Run the function. + responses, cost = func( + prompt=prompt, + input_list=input_list, + model=model, + testing_functor=testing_functor, + ) + # Check basic properties. + responses = [str(int(float(r))) for r in responses] + self.assertEqual(responses, expected_responses) + if testing_functor is None: + self.assertGreater(cost, 0.0) + else: + self.assertEqual(cost, 0.0) + + @pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", + ) + def test_individual1(self) -> None: + """ + Test apply_llm_batch_individual without testing_functor. + + This test uses the real LLM API. + """ + model = "gpt-5-nano" + func = hllmcli.apply_llm_batch_individual + testing_functor = None + self.helper( + model, + func, + testing_functor, + ) + + def test_individual2(self) -> None: + """ + Test apply_llm_batch_individual with testing_functor. + + This test uses a mock calculator instead of the real LLM API. + """ + model = "" + func = hllmcli.apply_llm_batch_individual + testing_functor = _eval_functor + self.helper( + model, + func, + testing_functor, + ) + + @pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", + ) + def test_shared1(self) -> None: + """ + Test apply_llm_batch_with_shared_prompt without testing_functor. + + This test uses the real LLM API. + """ + model = "gpt-5-nano" + func = hllmcli.apply_llm_batch_with_shared_prompt + testing_functor = None + self.helper( + model, + func, + testing_functor, + ) + + def test_shared2(self) -> None: + """ + Test apply_llm_batch_with_shared_prompt with testing_functor. + + This test uses a mock calculator instead of the real LLM API. + """ + model = "" + func = hllmcli.apply_llm_batch_with_shared_prompt + testing_functor = _eval_functor + self.helper( + model, + func, + testing_functor, + ) + + @pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", + ) + def test_combined1(self) -> None: + """ + Test apply_llm_batch_combined without testing_functor. + + This test uses the real LLM API. + """ + model = "gpt-5-nano" + # model = "gpt-4o-mini" + func = hllmcli.apply_llm_batch_combined + testing_functor = None + self.helper( + model, + func, + testing_functor, + ) + + def test_combined2(self) -> None: + """ + Test apply_llm_batch_combined with testing_functor. + + This test uses a mock calculator instead of the real LLM API. + """ + model = "" + func = hllmcli.apply_llm_batch_combined + testing_functor = _eval_functor + self.helper( + model, + func, + testing_functor, + ) + + +# ############################################################################# +# Test_apply_llm_prompt_to_df1 +# ############################################################################# + + +class Test_apply_llm_prompt_to_df1(hunitest.TestCase): + """ + Test apply_llm_prompt_to_df with testing_functor. + + This is used to test the logic around `apply_llm_batch_*()` functions. + """ + + @staticmethod + def _extract_expression(obj) -> str: + """ + Extract mathematical expression from a DataFrame row or string. + + :param obj: either a string or a pandas Series + :return: extracted string for evaluation + """ + if isinstance(obj, pd.Series): + # Extract from DataFrame row. + if "expression" in obj.index: + expr = obj["expression"] + # Handle None, NaN, or empty string. + if pd.isna(expr) or expr == "": + return "" + return str(expr) + return "" + else: + # Already a string. + if pd.isna(obj) or obj == "": + return "" + return str(obj) + + def helper( + self, + df: pd.DataFrame, + batch_size: int, + expected_df: pd.DataFrame, + expected_stats: Dict[str, int], + ) -> None: + """ + Test apply_llm_prompt_to_df with testing_functor that uses eval. + """ + # Prepare inputs. + prompt = "Dummy" + extractor = self._extract_expression + # To test the progress bar. + # delay = 0.5 + delay = 0.0 + testing_functor = lambda input_str: _eval_functor(input_str, delay=delay) + # Run test. + result_df, stats = hllmcli.apply_llm_prompt_to_df( + prompt=prompt, + df=df, + extractor=extractor, + target_col="result", + batch_mode="individual", + batch_size=batch_size, + model="gpt-5-nano", + testing_functor=testing_functor, + use_sys_stderr=True, + ) + # Check outputs. + self.assert_equal(str(result_df), str(expected_df)) + elapsed_time = stats.pop("elapsed_time_in_seconds") + self.assertGreater(elapsed_time, 0.0) + self.assertEqual(stats, expected_stats) + + def helper_test1(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with testing_functor that uses eval. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": ["2 + 3", "10 * 5", "100 - 25", "15 / 3"], + } + ) + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": ["2 + 3", "10 * 5", "100 - 25", "15 / 3"], + "result": ["5", "50", "75", "5.0"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 0, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + def helper_test2(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with larger dataframe and batch_size > 1. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": [ + "1 + 1", + "2 * 3", + "10 - 5", + "20 / 4", + "3 ** 2", + "100 // 3", + "15 % 4", + ], + } + ) + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": [ + "1 + 1", + "2 * 3", + "10 - 5", + "20 / 4", + "3 ** 2", + "100 // 3", + "15 % 4", + ], + "result": ["2", "6", "5", "5.0", "9", "33", "3"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 0, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + def helper_test3(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with pre-filled target column values. + + This test verifies that all rows are processed and pre-filled values + are overwritten with computed results from the testing_functor. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": [ + "5 + 5", + "3 * 4", + "20 - 8", + "16 / 2", + "2 ** 3", + ], + } + ) + # Pre-fill some values in the target column. + df["result"] = [None, "12", None, None, "8"] + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": [ + "5 + 5", + "3 * 4", + "20 - 8", + "16 / 2", + "2 ** 3", + ], + "result": ["10", "12", "12", "8.0", "8"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 0, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + def helper_test4(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with rows that have empty extraction results. + + This test verifies that rows with empty or None expressions are skipped + and marked with empty string in the result column. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": ["5 + 5", "", "10 + 10", None, "15 + 15"], + } + ) + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": ["5 + 5", "", "10 + 10", None, "15 + 15"], + "result": ["10", "", "20", "", "30"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 2, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + def helper_test5(self, batch_size: int) -> None: + """ + Test apply_llm_prompt_to_df with batch where all items have missing data. + + This test verifies that batches with all empty/None items are skipped + entirely and the else branch is executed. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "expression": ["1 + 1", "", None, "", "5 + 5"], + } + ) + # Prepare outputs. + expected_df = pd.DataFrame( + { + "expression": ["1 + 1", "", None, "", "5 + 5"], + "result": ["2", "", "", "", "10"], + } + ) + num_items = len(df) + expected_stats = { + "num_items": num_items, + "num_skipped": 3, + "num_batches": (num_items + batch_size - 1) // batch_size, + "total_cost_in_dollars": 0.0, + } + # Run test. + self.helper(df, batch_size, expected_df, expected_stats) + + # batch_size=1 + + def test1_num_batch1(self) -> None: + self.helper_test1(batch_size=1) + + def test2_num_batch1(self) -> None: + self.helper_test2(batch_size=1) + + def test3_num_batch1(self) -> None: + self.helper_test3(batch_size=1) + + def test4_num_batch1(self) -> None: + self.helper_test4(batch_size=1) + + def test5_num_batch1(self) -> None: + self.helper_test5(batch_size=1) + + # batch_size=2 + + def test1_num_batch2(self) -> None: + self.helper_test1(batch_size=2) + + def test2_num_batch2(self) -> None: + self.helper_test2(batch_size=2) + + def test3_num_batch2(self) -> None: + self.helper_test3(batch_size=2) + + def test4_num_batch2(self) -> None: + self.helper_test4(batch_size=2) + + def test5_num_batch2(self) -> None: + self.helper_test5(batch_size=2) + + # batch_size=3 + + def test1_num_batch3(self) -> None: + self.helper_test1(batch_size=3) + + def test2_num_batch3(self) -> None: + self.helper_test2(batch_size=3) + + def test3_num_batch3(self) -> None: + self.helper_test3(batch_size=3) + + def test4_num_batch3(self) -> None: + self.helper_test4(batch_size=3) + + def test5_num_batch3(self) -> None: + self.helper_test5(batch_size=3) + + # batch_size=10 + + def test1_num_batch10(self) -> None: + self.helper_test1(batch_size=10) + + def test2_num_batch10(self) -> None: + self.helper_test2(batch_size=10) + + def test3_num_batch10(self) -> None: + self.helper_test3(batch_size=10) + + def test4_num_batch10(self) -> None: + self.helper_test4(batch_size=10) + + def test5_num_batch10(self) -> None: + self.helper_test5(batch_size=10) + + +# ############################################################################# +# Test_apply_llm_prompt_to_df2 +# ############################################################################# + + +# TODO(gp): Convert this into a unit test for apply_llm_prompt. +class Test_apply_llm_prompt_to_df2(_BaseCacheTest): + """ + Test apply_llm_prompt_to_df with mocked cache. + """ + + @staticmethod + def get_test_prompt() -> str: + """ + Get a simple test prompt for LLM. + + This prompt asks the LLM to sum two numbers, providing a simple + and predictable test case. + + :return: system prompt string + """ + prompt = """ + You are a calculator. Given input in the format "a + b", return only + the sum as a number. + + Return ONLY the numeric result, nothing else. + """ + prompt = hprint.dedent(prompt) + return prompt + + @staticmethod + def extract_test_fields(obj) -> str: + """ + Extract test fields from a DataFrame row or string. + + :param obj: either a string or a pandas Series + :return: extracted string for LLM processing + """ + if isinstance(obj, pd.Series): + # Extract from DataFrame row. + if "num1" in obj.index and "num2" in obj.index: + num1 = obj["num1"] + num2 = obj["num2"] + return f"{num1} + {num2}" + return "" + else: + # Already a string. + return obj + + def create_test_df(self) -> pd.DataFrame: + """ + Create a minimal DataFrame with test data (2 rows). + """ + df = pd.DataFrame( + { + "num1": [2, 10], + "num2": [3, 15], + } + ) + return df + + def run_cached_apply_llm_prompt_to_df(self) -> None: + prompt = self.get_test_prompt() + df = self.create_test_df() + prompt = self.get_test_prompt() + extractor = self.extract_test_fields + result_df, _ = hllmcli.apply_llm_prompt_to_df( + prompt=prompt, + df=df, + extractor=extractor, + target_col="sum", + batch_mode="individual", + model="gpt-5-nano", + batch_size=10, + use_sys_stderr=True, + ) + _LOG.debug("result_df=%s", result_df) + # Check outputs. + expected_df = pd.DataFrame( + { + "num1": [2, 10], + "num2": [3, 15], + "sum": ["5", "25"], + } + ) + self.assert_equal(str(result_df), str(expected_df)) + + @pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", + ) + def test1(self) -> None: + """ + Warm up cache by calling apply_llm and save cache to file. + + This test creates a cache by calling apply_llm with test data, + then saves the cache to a file for use in subsequent tests. + """ + # Create a file with the cache content for test2 in the input directory. + input_dir = self.get_input_dir( + test_class_name=self.__class__.__name__, + test_method_name="test2", + ) + hcacsimp.set_cache_dir(input_dir) + # Call apply_llm to warm up the cache for both inputs. + self.run_cached_apply_llm_prompt_to_df() + # Flush the cache to disk to ensure it's saved. + hcacsimp.flush_cache_to_disk("_llm") + func_cache_data = hcacsimp.get_disk_cache("_llm") + # Check that the cache file exists and is not empty. + hcacsimp.sanity_check_function_cache( + func_cache_data, assert_on_empty=True + ) + + def test2(self) -> None: + """ + Test apply_llm_prompt_to_df with mocked cache. + + This test + - loads the cache file created in test1 + - mocks the cache with the data from the cache file + - verifies that apply_llm_prompt_to_df uses the cached values without + hitting the LLM API. + """ + # Prepare inputs. + # # Set up temporary cache directory. + scratch_dir = self.get_scratch_space() + hcacsimp.set_cache_dir(scratch_dir) + # Load the saved cache file from test2's input directory. + input_dir = self.get_input_dir() + # Load the cache data from the cache file. + cache_file = os.path.join(input_dir, "tmp.cache_simple._llm.json") + _LOG.debug("cache_file=%s", cache_file) + func_cache_data = hcacsimp._load_func_cache_data_from_file( + cache_file, "json" + ) + _LOG.debug("func_cache_data=%s", func_cache_data) + hcacsimp.sanity_check_function_cache( + func_cache_data, assert_on_empty=True + ) + _LOG.debug("Loaded func_cache_data=\n%s", func_cache_data) + hcacsimp.mock_cache_from_disk("_llm", func_cache_data) + try: + # Set abort_on_cache_miss to ensure we don't hit the LLM API. + hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", True) + # Run apply_llm_prompt_to_df with mocked cache. + self.run_cached_apply_llm_prompt_to_df() + finally: + # Reset the cache property. + hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", False) + + def test3(self) -> None: + """ + Test apply_llm_prompt_to_df without mocked cache. + + This test verifies that apply_llm_prompt_to_df raises an error when the + cache is missed and abort_on_cache_miss=True. + """ + # Set up temporary cache directory. + scratch_dir = self.get_scratch_space() + hcacsimp.set_cache_dir(scratch_dir) + try: + # Set abort_on_cache_miss to ensure we don't hit the LLM API. + hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", True) + with self.assertRaises(ValueError) as fail: + # Run apply_llm_prompt_to_df without mocked cache. + self.run_cached_apply_llm_prompt_to_df() + self.assertIn("Cache miss", str(fail.exception)) + finally: + # Reset the cache property. + hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", False) + + +# ############################################################################# +# Test_apply_llm_batch_cost_comparison +# ############################################################################# + + +@pytest.mark.skipif( + not _RUN_REAL_LLM, + reason="Real LLM not enabled", +) +class Test_apply_llm_batch_cost_comparison(hunitest.TestCase): + """ + Test and compare costs of different batch processing approaches. + + Tests both direct batch function calls and apply_llm_prompt_to_df with + different batch modes. + """ + + @staticmethod + def get_person_industry_prompt() -> str: + """ + Get the industry classification prompt for testing. + + :return: system prompt string + """ + prompt = """ + Given the following list of industries with examples, classify the text into the + corresponding industry: + - Industrial & Built Environment + - Transportation & Logistics + - Consumer & Retail + - Technology & Digital Services + - Health & Life Sciences + - Finance & Professional Services + - Public & Social Sector + - Media, Marketing & Experiences + + You MUST report the industry exactly as one of the options above. Do not + include any other text. + If you are not sure about the industry, return "unknown". + """ + prompt = hprint.dedent(prompt) + return prompt + + @staticmethod + def get_test_industries() -> list: + """ + Get a list of test company descriptions for industry classification. + + :return: list of company descriptions + """ + industries = [ + "A company that sells fresh produce and operates farms", + "A car manufacturer that produces electric vehicles", + "A construction company specializing in residential buildings", + "A company that manufactures consumer electronics and appliances", + "An online learning platform providing courses for students", + "An electric utility company providing power generation services", + "A civil engineering firm providing infrastructure design", + "A company organizing corporate events and conferences", + "A bank providing retail banking and investment services", + "A nonprofit organization focused on environmental conservation", + "A hospital providing emergency and surgical medical services", + "A staffing agency providing recruitment and temp worker services", + "A data center company providing server hardware and infrastructure", + "A software development company creating enterprise resource planning systems", + "A cybersecurity firm providing threat detection and penetration testing", + "A cloud infrastructure provider offering scalable computing resources", + "An IT company providing network management and server maintenance", + "A consulting firm helping businesses integrate SAP and Oracle systems", + "A help desk company providing 24/7 technical support services", + "A data analytics company building business intelligence dashboards", + "A DevOps company providing CI/CD pipeline automation tools", + "A law firm specializing in corporate mergers and acquisitions", + "A shipping company providing international freight and logistics", + "A factory manufacturing industrial machinery and equipment", + "An advertising agency creating brand campaigns for consumer products", + "A streaming service providing movies and TV shows online", + "A pharmaceutical company developing new drugs and vaccines", + "A commercial real estate firm managing office building portfolios", + "An online retailer selling clothing and accessories through eCommerce", + "A sports equipment manufacturer producing gear for athletes", + "A telecommunications company providing mobile and internet services", + "A hotel chain operating luxury resorts and vacation properties", + ] + return industries + + def helper(self, model: str, batch_size: int) -> None: + """ + Compare costs and time of different batch modes in apply_llm_prompt_to_df. + + This test compares the performance of three batch modes: + 1. individual: processes each query separately + 2. shared_prompt: uses shared prompt context + 3. combined: combines all queries into single API call + """ + # Reset cache before each batch mode to ensure fair comparison. + hcacsimp.set_cache_dir(self.get_scratch_space()) + _LOG.info("Cache directory: %s", hcacsimp.get_cache_dir()) + hcacsimp.reset_cache("", interactive=False) + # Prepare inputs. + prompt = self.get_person_industry_prompt() + industries = self.get_test_industries() + testing_functor = None + # Create DataFrame from test data. + df = pd.DataFrame({"description": industries}) + + # Extractor function to get text from DataFrame row. + def extractor(obj): + if isinstance(obj, pd.Series): + return obj["description"] + return str(obj) + + # Test each batch mode. + batch_modes = ["individual", "shared_prompt", "combined"] + results = [] + # Store result DataFrames to compare across batch modes. + result_dfs = {} + for batch_mode in batch_modes: + _LOG.info( + "\n%s", hprint.frame("Testing batch mode: %s" % batch_mode) + ) + # Create a copy of the DataFrame for this batch mode. + df_copy = df.copy() + # Call apply_llm_prompt_to_df with the current batch mode. + result_df, stats = hllmcli.apply_llm_prompt_to_df( + prompt=prompt, + df=df_copy, + extractor=extractor, + target_col="industry", + batch_mode=batch_mode, + model=model, + batch_size=batch_size, + testing_functor=testing_functor, + use_sys_stderr=True, + ) + # Get elapsed time from stats. + elapsed_time = stats["elapsed_time_in_seconds"] + # Print time and cost for this batch mode. + _LOG.info( + "Batch mode '%s': Time=%.2fs, Cost=$%.6f", + batch_mode, + elapsed_time, + stats["total_cost_in_dollars"], + ) + # Store results. + results.append( + { + "Batch Mode": batch_mode, + "Time (s)": elapsed_time, + "Num Items": stats["num_items"], + "Num Skipped": stats["num_skipped"], + "Num Batches": stats["num_batches"], + "Total Cost ($)": stats["total_cost_in_dollars"], + } + ) + # Store result DataFrame for comparison. + result_dfs[batch_mode] = result_df + # Verify results. + self.assertEqual(len(result_df), len(industries)) + self.assertIn("industry", result_df.columns) + # Check that all batch modes produce the same results. + # Compare each batch mode's results with the first batch mode. + first_batch_mode = batch_modes[0] + first_result_df = result_dfs[first_batch_mode]["industry"].reset_index( + drop=True + ) + for batch_mode in batch_modes[1:]: + compare_result_df = result_dfs[batch_mode]["industry"].reset_index( + drop=True + ) + # Create a comparison DataFrame between the two batch modes. + match_df = pd.DataFrame( + { + first_batch_mode: first_result_df, + batch_mode: compare_result_df, + } + ) + # Add a column with whether they match or not. + match_df["Match"] = ( + match_df[first_batch_mode] == match_df[batch_mode] + ) + all_match = match_df["Match"].all() + if not all_match: + _LOG.error( + "Results mismatch between '%s' and '%s':\n%s", + first_batch_mode, + batch_mode, + match_df, + ) + _LOG.info( + "Results match between '%s' and '%s'", + first_batch_mode, + batch_mode, + ) + # Create comparison DataFrame. + comparison_df = pd.DataFrame(results) + # Add relative metrics compared to individual mode. + individual_time = comparison_df.loc[ + comparison_df["Batch Mode"] == "individual", "Time (s)" + ].iloc[0] + individual_cost = comparison_df.loc[ + comparison_df["Batch Mode"] == "individual", "Total Cost ($)" + ].iloc[0] + comparison_df["Time Ratio"] = comparison_df["Time (s)"] / individual_time + comparison_df["Cost Ratio"] = ( + comparison_df["Total Cost ($)"] / individual_cost + ) + # Format the DataFrame for better readability. + comparison_df["Time (s)"] = comparison_df["Time (s)"].round(2) + comparison_df["Total Cost ($)"] = comparison_df["Total Cost ($)"].round( + 6 + ) + comparison_df["Time Ratio"] = comparison_df["Time Ratio"].round(2) + comparison_df["Cost Ratio"] = comparison_df["Cost Ratio"].round(2) + # Print comparison_df without truncation. + with pd.option_context( + "display.max_columns", + None, + "display.max_rows", + None, + "display.width", + None, + "display.max_colwidth", + None, + ): + _LOG.info("Batch mode comparison:\n%s", comparison_df) + + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 17.98 32 4 0.000653 1.00 1.00 + # shared_prompt 17.60 32 4 0.000998 0.98 1.53 + # combined 8.42 32 4 0.000330 0.47 0.51 + # + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 19.27 32 2 0.000651 1.00 1.00 + # shared_prompt 19.34 32 2 0.001385 1.00 2.13 + # combined 7.45 32 2 0.000277 0.39 0.43 + # + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 16.38 32 1 0.000651 1.00 1.00 + # shared_prompt 17.51 32 1 0.002148 1.07 3.30 + # combined 6.15 32 1 0.000251 0.38 0.39 + def test1(self) -> None: + model = "gpt-4o-mini" + batch_size = 8 + self.helper(model, batch_size) + # + batch_size = 16 + self.helper(model, batch_size) + # + batch_size = 32 + self.helper(model, batch_size) + + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 68.57 32 4 0.002711 1.00 1.00 + # shared_prompt 53.07 32 4 0.002638 0.77 0.97 + # combined 29.30 32 4 0.001654 0.43 0.61 + # + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 68.40 32 2 0.002788 1.00 1.00 + # shared_prompt 53.88 32 2 0.002809 0.79 1.01 + # combined 25.99 32 2 0.001643 0.38 0.59 + # + # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio + # individual 59.38 32 1 0.002610 1.00 1.00 + # shared_prompt 52.61 32 1 0.002482 0.89 0.95 + # combined 15.79 32 1 0.001118 0.27 0.43 + def test2(self) -> None: + model = "gpt-5-nano" + batch_size = 8 + self.helper(model, batch_size) + # + batch_size = 16 + self.helper(model, batch_size) + # + batch_size = 32 + self.helper(model, batch_size) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py new file mode 100644 index 000000000..a7e567679 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py @@ -0,0 +1,103 @@ +import asyncio +import logging +from typing import Optional + +import helpers.hasyncio as hasynci +import helpers.hdatetime as hdateti +import helpers.hlogging as hloggin +import helpers.hunit_test as hunitest +import helpers.hwall_clock_time as hwacltim + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +# ############################################################################# +# Test_logging1 +# ############################################################################# + + +class Test_logging1(hunitest.TestCase): + def test_logging_levels1(self) -> None: + hloggin.test_logger() + + +# ############################################################################# + + +# ############################################################################# +# Test_hlogging_asyncio1 +# ############################################################################# + + +class Test_hlogging_asyncio1(hunitest.TestCase): + @staticmethod + async def workload(get_wall_clock_time: hdateti.GetWallClockTime) -> None: + """ + Coroutine simulating a workload waiting for 1s. + """ + # Set the coroutine name. + task = asyncio.current_task() + task.set_name("workload") + + def _print_time() -> None: + true_wall_clock_time = hdateti.get_current_time("ET") + _LOG.debug("wall_clock_time=%s", true_wall_clock_time) + event_loop_time = get_wall_clock_time() + _LOG.debug("event_loop_time=%s", event_loop_time) + + _print_time() + _LOG.debug(" -> wait") + await asyncio.sleep(1.0) + _print_time() + + def run_test( + self, + event_loop: Optional[asyncio.AbstractEventLoop], + get_wall_clock_time: hdateti.GetWallClockTime, + ) -> None: + coroutine = self.workload(get_wall_clock_time) + hasynci.run(coroutine, event_loop=event_loop) + + # pylint: disable=line-too-long + def test_real_time1(self) -> None: + """ + Use the logger. + + The output is like: + + ``` + 07:55:54 hunit_test.py setUp:932 Resetting random.seed to 20000101 + 07:55:54 hunit_test.py setUp:935 Resetting np.random.seed to 20000101 + 07:55:54 hunit_test.py setUp:944 base_dir_name=/app/amp/helpers/test + ``` + """ + # Use the wall clock time with no special event loop. + get_wall_clock_time = lambda: hdateti.get_current_time(tz="ET") + event_loop = None + # Run. + self.run_test(event_loop, get_wall_clock_time) + + # pylint: disable=line-too-long + def test_simulated_time1(self) -> None: + """ + Use the logger with event_loop and asyncio. + + The output is like: + + ``` + 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py _print_time:28 wall_clock_time=2022-01-18 07:52:55.337574-05:00 + 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py _print_time:30 event_loop_time=2022-01-18 07:52:55.310587-05:00 + 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py workload:33 -> wait + ``` + """ + with hasynci.solipsism_context() as event_loop: + # Use the simulate wall clock time. + get_wall_clock_time = lambda: hdateti.get_current_time( + tz="ET", event_loop=event_loop + ) + hwacltim.set_wall_clock_time(get_wall_clock_time) + # Run. + self.run_test(event_loop, get_wall_clock_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py new file mode 100644 index 000000000..2f1653c79 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py @@ -0,0 +1,716 @@ +import logging +import os +from typing import List + +import helpers.hio as hio +import helpers.hmarkdown as hmarkdo +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_remove_bullets +# ############################################################################# + + +class Test_remove_bullets(hunitest.TestCase): + """ + Test the remove_bullets function. + """ + + def helper(self, text: str, expected: str) -> None: + """ + Helper to test remove_bullets function. + + :param text: Input text with bullets + :param expected: Expected output with bullets removed + """ + # Run test. + text = hprint.dedent(text) + actual = hmarkdo.remove_bullets(text) + # Check outputs. + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test basic bullet removal. + """ + # Prepare inputs. + text = """ + - First item + - Second item + - Third item + """ + # Prepare outputs. + expected = """ + First item + Second item + Third item + """ + # Run test. + self.helper(text, expected) + + def test2(self) -> None: + """ + Test nested bullets removal. + """ + # Prepare inputs. + text = """ + - First item + - Nested item + - Another nested + - Second item + """ + # Prepare outputs. + expected = """ + First item + Nested item + Another nested + Second item + """ + # Run test. + self.helper(text, expected) + + def test3(self) -> None: + """ + Test mixed content with bullets and non-bullets. + """ + # Prepare inputs. + text = """ + - Bullet item + Regular text line + - Another bullet + More regular text + """ + # Prepare outputs. + expected = """ + Bullet item + Regular text line + Another bullet + More regular text + """ + # Run test. + self.helper(text, expected) + + def test4(self) -> None: + """ + Test empty lines preservation. + """ + # Prepare inputs. + text = """ + - First item + + - Second item + + - Third item + """ + # Prepare outputs. + expected = """ + First item + + Second item + + Third item + """ + # Run test. + self.helper(text, expected) + + +# ############################################################################# +# Test_bold_first_level_bullets1 +# ############################################################################# + + +class Test_bold_first_level_bullets1(hunitest.TestCase): + def helper(self, text: str, expected: str) -> None: + """ + Helper to test bold_first_level_bullets function. + """ + text = hprint.dedent(text) + lines = text.split("\n") + actual_lines = hmarkdo.bold_first_level_bullets(lines) + actual = "\n".join(actual_lines) + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test basic first-level bullet bolding. + """ + text = r""" + - First item + - Sub item + - Second item + """ + expected = r""" + - **First item** + - Sub item + - **Second item** + """ + self.helper(text, expected) + + def test2(self) -> None: + """ + Test with mixed content including non-bullet text. + """ + text = r""" + Some text here + - First bullet + More text + - Second bullet + - Nested bullet + Final text + """ + expected = r""" + Some text here + - **First bullet** + More text + - **Second bullet** + - Nested bullet + Final text + """ + self.helper(text, expected) + + def test3(self) -> None: + """ + Test with multiple levels of nesting. + """ + text = r""" + - Top level + - Second level + - Third level + - Back to second + - Another top + """ + expected = r""" + - **Top level** + - Second level + - Third level + - Back to second + - **Another top** + """ + self.helper(text, expected) + + def test4(self) -> None: + """ + Test with empty lines between bullets. + """ + text = r""" + - First item + + - Second item + - Sub item + + - Third item + """ + expected = r""" + - **First item** + + - **Second item** + - Sub item + + - **Third item** + """ + self.helper(text, expected) + + def test5(self) -> None: + """ + Test with text that already contains some bold markers. + """ + text = r""" + - First **important** point + - Sub point + - Second point with emphasis + """ + expected = r""" + - First **important** point + - Sub point + - **Second point with emphasis** + """ + self.helper(text, expected) + + +# ############################################################################# +# Test_colorize_bold_text1 +# ############################################################################# + + +class Test_colorize_bold_text1(hunitest.TestCase): + def test1(self) -> None: + """ + Test basic case with single bold text. + """ + text = "This is **bold** text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"This is **\red{bold}** text" + self.assert_equal(actual, expected) + + def test2(self) -> None: + """ + Test multiple bold sections get different colors. + """ + text = "**First** normal **Second** text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"**\red{First}** normal **\teal{Second}** text" + self.assert_equal(actual, expected) + + def test3(self) -> None: + """ + Test underscore style bold text. + """ + text = "This is __bold__ text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"This is **\red{bold}** text" + self.assert_equal(actual, expected) + + def test4(self) -> None: + """ + Test text with no bold sections returns unchanged. + """ + text = "This is plain text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = "This is plain text" + self.assert_equal(actual, expected) + + def test5(self) -> None: + """ + Test mixed bold styles in same text. + """ + text = "**First** and __Second__ bold" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"**\red{First}** and **\teal{Second}** bold" + self.assert_equal(actual, expected) + + def test6(self) -> None: + """ + Test with abbreviations=False uses full \textcolor syntax. + """ + text = "This is **bold** text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=False + ) + expected = r"This is **\textcolor{red}{bold}** text" + self.assert_equal(actual, expected) + + def test7(self) -> None: + """ + Test with multiple bullet lists and different colors. + """ + text = """ + **List 1:** + - First item + - Second item + + **List 2:** + - Another item + - Final item + """ + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r""" + **\red{List 1:}** + - First item + - Second item + + **\teal{List 2:}** + - Another item + - Final item + """ + self.assert_equal(actual, expected) + + def test8(self) -> None: + text = hprint.dedent( + r""" + - **\red{Objective}** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **\orange{Key Components}** + - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - Utility update: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **\blue{Learning Process}** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **\violet{Advantages}** + - More sample-efficient than direct utility estimation + - Leverages structure of the MDP to generalize better + + - **\pink{Challenges}** + - Requires accurate model estimation + - Computational cost of solving Bellman equations repeatedly + + - **\olive{Example}** + - A thermostat estimates room temperature dynamics and uses them to predict + comfort level under a fixed heating schedule + + - **\darkgray{Use Case}** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + ) + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = hprint.dedent( + r""" + - **\red{Objective}** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **\orange{Key Components}** + - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - Utility update: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **\olive{Learning Process}** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **\green{Advantages}** + - More sample-efficient than direct utility estimation + - Leverages structure of the MDP to generalize better + + - **\cyan{Challenges}** + - Requires accurate model estimation + - Computational cost of solving Bellman equations repeatedly + + - **\blue{Example}** + - A thermostat estimates room temperature dynamics and uses them to predict + comfort level under a fixed heating schedule + + - **\darkgray{Use Case}** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + ) + self.assert_equal(actual, expected) + + def test9(self) -> None: + """ + Test basic case with single bold text. + """ + text = "**First** normal **Second** text" + actual = hmarkdo.colorize_bold_text( + text, color_sequence="equidistant", use_abbreviations=True + ) + expected = r"**\red{First}** normal **\teal{Second}** text" + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_format_first_level_bullets1 +# ############################################################################# + + +class Test_format_first_level_bullets1(hunitest.TestCase): + # TODO(ai): Rename -> helper + def format_and_compare_markdown(self, text: str, expected: str) -> None: + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # + lines = text.split("\n") + actual_lines = hmarkdo.format_first_level_bullets(lines) + actual = "\n".join(actual_lines) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test basic case with single first level bullet. + """ + text = """ + Some text + - First bullet + More text""" + expected = """ + Some text + + - First bullet + More text""" + self.format_and_compare_markdown(text, expected) + + def test2(self) -> None: + """ + Test multiple first level bullets. + """ + text = """ + - First bullet + - Second bullet + - Third bullet""" + expected = """ + - First bullet + + - Second bullet + + - Third bullet""" + self.format_and_compare_markdown(text, expected) + + def test3(self) -> None: + """ + Test mixed first level and indented bullets. + """ + text = """ + - First level + + - Second level + - Another second + - Back to first""" + expected = """ + - First level + - Second level + - Another second + + - Back to first""" + self.format_and_compare_markdown(text, expected) + + def test4(self) -> None: + """ + Test mixed content with text and bullets. + """ + text = """ + Some initial text + - First bullet + Some text in between + - Second bullet + Final text""" + expected = """ + Some initial text + + - First bullet + Some text in between + + - Second bullet + Final text""" + self.format_and_compare_markdown(text, expected) + + def test5(self) -> None: + """ + Test nested bullets with multiple levels. + """ + text = """ + - Level 1 + - Level 2 + - Level 3 + - Another level 1 + - Level 2 again""" + expected = """ + - Level 1 + - Level 2 + - Level 3 + + - Another level 1 + - Level 2 again""" + self.format_and_compare_markdown(text, expected) + + def test6(self) -> None: + """ + Test empty lines handling. + """ + text = """ + - First bullet + + - Second bullet + + - Third bullet""" + expected = """ + - First bullet + + - Second bullet + + - Third bullet""" + self.format_and_compare_markdown(text, expected) + + def test7(self) -> None: + """ + Test mixed content with bullets and text. + """ + text = """ + Some text here + - First bullet + More text + - Second bullet + - Nested bullet + Final paragraph + - Last bullet""" + expected = """ + Some text here + + - First bullet + More text + + - Second bullet + - Nested bullet + Final paragraph + + - Last bullet""" + self.format_and_compare_markdown(text, expected) + + def test8(self) -> None: + """ + Test bullets with inline formatting. + """ + text = """ + - **Bold bullet** point + - *Italic nested* bullet + - `Code bullet` here + - **_Mixed_** formatting""" + expected = """ + - **Bold bullet** point + - *Italic nested* bullet + + - `Code bullet` here + - **_Mixed_** formatting""" + self.format_and_compare_markdown(text, expected) + + def test9(self) -> None: + """ + Test bullets with special characters. + """ + text = """ + - Bullet with (parentheses) + - Bullet with [brackets] + - Bullet with {braces} + - Bullet with $math$""" + expected = """ + - Bullet with (parentheses) + - Bullet with [brackets] + + - Bullet with {braces} + - Bullet with $math$""" + self.format_and_compare_markdown(text, expected) + + def test10(self) -> None: + text = hprint.dedent( + r""" + - **Objective** + + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **Key Components** + + - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - **Utility update**: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **Learning Process** + + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **Use Case** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + ) + expected = hprint.dedent( + r""" + - **Objective** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **Key Components** + - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - **Utility update**: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **Learning Process** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **Use Case** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + ) + self.format_and_compare_markdown(text, expected) + + +# ############################################################################# +# Test_process_lines1 +# ############################################################################# + + +class Test_process_lines1(hunitest.TestCase): + # TODO(gp): This doesn't seem correct. + def test1(self) -> None: + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + txt_in = hio.from_file(input_file_path) + txt_in = hprint.dedent(txt_in) + lines = txt_in.split("\n") + out = [] + for i, line in hmarkdo.process_lines(lines): + _LOG.debug(hprint.to_str("line")) + out.append(f"{i}:{line}") + actual = "\n".join(out) + self.check_string( + actual, dedent=True, remove_lead_trail_empty_lines=True + ) + + +# ############################################################################# +# Test_process_code_block1 +# ############################################################################# + + +class Test_process_code_block1(hunitest.TestCase): + def helper(self, txt: str) -> str: + out: List[str] = [] + in_code_block = False + lines = txt.split("\n") + for i, line in enumerate(lines): + _LOG.debug("%s:line=%s", i, line) + # Process the code block. + do_continue, in_code_block, out_tmp = hmarkdo.process_code_block( + line, in_code_block, i, lines + ) + out.extend(out_tmp) + if do_continue: + continue + # + out.append(line) + return "\n".join(out) + + def test1(self) -> None: + # Prepare inputs. + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + txt_in = hio.from_file(input_file_path) + txt_in = hprint.dedent(txt_in, remove_lead_trail_empty_lines_=True) + # Run function. + actual = self.helper(txt_in) + # Check output. + self.check_string( + actual, dedent=True, remove_lead_trail_empty_lines=True + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py new file mode 100644 index 000000000..e33c04dc8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py @@ -0,0 +1,205 @@ +import helpers.hmarkdown as hmarkdo +import helpers.hunit_test as hunitest + + +# ############################################################################# +# Test_process_color_commands1 +# ############################################################################# + + +class Test_process_color_commands1(hunitest.TestCase): + def test_text_content1(self) -> None: + """ + Test with plain text content. + """ + txt_in = r"\red{Hello world}" + expected = r"\textcolor{red}{\text{Hello world}}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + def test_math_content1(self) -> None: + """ + Test color command with mathematical content. + """ + txt_in = r"\blue{x + y = z}" + expected = r"\textcolor{blue}{x + y = z}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + def test_multiple_colors1(self) -> None: + """ + Test multiple color commands in the same line. + """ + txt_in = r"The \red{quick} \blue{fox} \green{jumps}" + expected = r"The \textcolor{red}{\text{quick}} \textcolor{blue}{\text{fox}} \textcolor{darkgreen}{\text{jumps}}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + def test_mixed_content1(self) -> None: + """ + Test color commands with both text and math content. + """ + txt_in = r"\red{Result: x^2 + y^2}" + expected = r"\textcolor{red}{Result: x^2 + y^2}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + def test_nested_braces1(self) -> None: + """ + Test color command with nested braces. + """ + txt_in = r"\blue{f(x) = {x + 1}}" + expected = r"\textcolor{blue}{f(x) = {x + 1}}" + actual = hmarkdo.process_color_commands(txt_in) + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_colorize_bullet_points_in_slide1 +# ############################################################################# + + +class Test_colorize_bullet_points_in_slide1(hunitest.TestCase): + def test1(self) -> None: + # Prepare inputs. + text = r""" + - **VC Theory** + - Measures model + + - **Bias-Variance Decomposition** + - Prediction error + - **Bias** + - **Variance** + + - **Computation Complexity** + - Balances model + - Related to + - E.g., Minimum + + - **Bayesian Approach** + - Treats ML as probability + - Combines prior knowledge with observed data to update belief about a model + + - **Problem in ML Theory:** + - Assumptions may not align with practical problems + """ + # Run function. + all_md_colors = [ + "red", + "orange", + "yellow", + "lime", + "green", + "teal", + "cyan", + "blue", + "purple", + "violet", + "magenta", + "pink", + "brown", + "olive", + "gray", + "darkgray", + "lightgray", + "black", + "white", + ] + + actual = hmarkdo.colorize_bullet_points_in_slide( + text, all_md_colors=all_md_colors + ) + # Check output. + expected = r""" + - **\red{VC Theory}** + - Measures model + + - **\orange{Bias-Variance Decomposition}** + - Prediction error + - **\yellow{Bias}** + - **\lime{Variance}** + + - **\green{Computation Complexity}** + - Balances model + - Related to + - E.g., Minimum + + - **\teal{Bayesian Approach}** + - Treats ML as probability + - Combines prior knowledge with observed data to update belief about a model + + - **\cyan{Problem in ML Theory:}** + - Assumptions may not align with practical problems + """ + self.assert_equal(actual, expected) + + def test2(self) -> None: + # Prepare inputs. + text = r""" + * Machine Learning Flow + + ::: columns + :::: {.column width=90%} + - Question + - E.g., "How can we predict house prices?" + - Input data + - E.g., historical data of house sales + + - _"If I were given one hour to save the planet, I would spend 59 minutes + defining the problem and one minute resolving it"_ (Albert Einstein) + + - **Not all phases are equally important!** + - Question $>$ Data $>$ Features $>$ Algorithm + - Clarity of the question impacts project success + - Quality and relevance of data are crucial for performance + - Proper feature selection simplifies the model and improves accuracy + - Algorithm is often less important (contrary to popular belief!) + :::: + :::: {.column width=5%} + + ```graphviz[height=90%] + digraph BayesianFlow { + rankdir=TD; + splines=true; + ... + } + ``` + :::: + ::: + """ + # Run function. + actual = hmarkdo.colorize_bullet_points_in_slide(text) + # Check output. + expected = r""" + * Machine Learning Flow + + ::: columns + :::: {.column width=90%} + - Question + - E.g., "How can we predict house prices?" + - Input data + - E.g., historical data of house sales + + - _"If I were given one hour to save the planet, I would spend 59 minutes + defining the problem and one minute resolving it"_ (Albert Einstein) + + - **\red{Not all phases are equally important!}** + - Question $>$ Data $>$ Features $>$ Algorithm + - Clarity of the question impacts project success + - Quality and relevance of data are crucial for performance + - Proper feature selection simplifies the model and improves accuracy + - Algorithm is often less important (contrary to popular belief!) + :::: + :::: {.column width=5%} + + ```graphviz[height=90%] + digraph BayesianFlow { + rankdir=TD; + splines=true; + ... + } + ``` + :::: + ::: + """ + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py new file mode 100644 index 000000000..8d47a3966 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py @@ -0,0 +1,355 @@ +import logging +from typing import List, Tuple + +import helpers.hprint as hprint +import helpers.hmarkdown_div_blocks as hmadiblo +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def _prepare_div_block_inputs(txt: str, expected: str) -> Tuple[List[str], str]: + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=False) + if expected.startswith("\n"): + expected = expected[1:] + if expected.endswith("\n"): + expected = expected[:-1] + lines = txt.split("\n") + return lines, expected + + +# ############################################################################# +# Test_add_prettier_ignore_to_div_blocks +# ############################################################################# + + +class Test_add_prettier_ignore_to_div_blocks(hunitest.TestCase): + """ + Test the function to add prettier-ignore comments around div blocks. + """ + + def helper(self, txt: str, expected: str) -> None: + # Prepare inputs. + lines, expected = _prepare_div_block_inputs(txt, expected) + # Run test. + actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.assert_equal(actual, expected) + + def test_simple_div_block(self) -> None: + """ + Test a simple div block with two colons. + """ + txt = """ + :::: + ::: + """ + # Add a leading empty line in expected since function adds it. + expected = """ + + + :::: + ::: + + + """ + self.helper(txt, expected) + + def test_div_block_with_attributes(self) -> None: + """ + Test a div block with column attributes. + """ + txt = """ + :::: + ::::{.column width=40%} + """ + expected = """ + + + :::: + ::::{.column width=40%} + + + """ + self.helper(txt, expected) + + def test_multiple_div_blocks(self) -> None: + """ + Test multiple div blocks in the same content. + """ + txt = """ + Some text before + + :::: + ::::{.column width=40%} + + Middle text + + :::columns + ::::{.column width=60%} + + Some text after + """ + expected = """ + Some text before + + + + :::: + ::::{.column width=40%} + + + + Middle text + + + + :::columns + ::::{.column width=60%} + + + + Some text after + """ + self.helper(txt, expected) + + def test_no_div_blocks(self) -> None: + """ + Test content with no div blocks. + """ + txt = """ + Some normal text + with no div blocks + at all + """ + expected = """ + Some normal text + with no div blocks + at all + """ + self.helper(txt, expected) + + def test_unclosed_div_block(self) -> None: + """ + Test a div block that is not closed. + """ + txt = """ + Some text + + :::: + + More text + """ + expected = """ + Some text + + :::: + + More text + """ + self.helper(txt, expected) + + +# ############################################################################# +# Test_remove_prettier_ignore_from_div_blocks +# ############################################################################# + + +class Test_remove_prettier_ignore_from_div_blocks(hunitest.TestCase): + """ + Test the function to remove prettier-ignore comments from div blocks. + """ + + def helper(self, txt: str, expected: str) -> None: + # Prepare inputs. + lines, expected = _prepare_div_block_inputs(txt, expected) + # Run test. + actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.assert_equal(actual, expected) + + def test_remove_simple_block(self) -> None: + """ + Test removing prettier-ignore from a simple div block. + """ + txt = """ + + + :::: + ::: + + + """ + expected = """ + :::: + ::: + """ + self.helper(txt, expected) + + def test_remove_block_with_content(self) -> None: + """ + Test removing prettier-ignore from a div block with content. + """ + txt = """ + Some text before + + + :::: + ::::{.column width=40%} + + + Some text after + """ + expected = """ + Some text before + :::: + ::::{.column width=40%} + Some text after + """ + self.helper(txt, expected) + + def test_remove_multiple_blocks(self) -> None: + """ + Test removing prettier-ignore from multiple div blocks. + """ + txt = """ + Text before + + + :::: + ::::{.column width=40%} + + + Middle text + + + :::columns + ::::{.column width=60%} + + + Text after + """ + expected = """ + Text before + :::: + ::::{.column width=40%} + Middle text + :::columns + ::::{.column width=60%} + Text after + """ + self.helper(txt, expected) + + def test_no_prettier_ignore_comments(self) -> None: + """ + Test content with no prettier-ignore comments. + """ + txt = """ + Some normal text + with no prettier-ignore comments + at all + """ + expected = """ + Some normal text + with no prettier-ignore comments + at all + """ + self.helper(txt, expected) + + +# ############################################################################# +# Test_add_remove_prettier_ignore_roundtrip +# ############################################################################# + + +class Test_add_remove_prettier_ignore_roundtrip(hunitest.TestCase): + """ + Test that adding and removing prettier-ignore comments is a roundtrip. + """ + + def helper(self, txt: str) -> None: + # Prepare inputs. + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + # Add prettier-ignore comments. + lines_with_comments = hmadiblo.add_prettier_ignore_to_div_blocks(lines) + # Remove prettier-ignore comments. + lines_restored = hmadiblo.remove_prettier_ignore_from_div_blocks( + lines_with_comments + ) + actual = "\n".join(lines_restored) + expected = txt + # Check outputs. + self.assert_equal(actual, expected) + + def test_roundtrip_simple(self) -> None: + """ + Test that add and remove operations are inverses for simple div block. + """ + txt = """ + :::: + ::: + """ + self.helper(txt) + + def test_roundtrip_complex1(self) -> None: + """ + Test roundtrip for content with multiple div blocks and text. + """ + txt = """ + Text1 + + :::: + ::::{.column width=40%} + + Text2 + + :::columns + ::::{.column width=60%} + + Text3 + """ + self.helper(txt) + + def test_roundtrip_complex2(self) -> None: + """ + Test roundtrip for content with multiple div blocks and text. + """ + txt = """ + Text1 + ::: + ::::{.column width=40%} + Text2 + :::: + ::::{.column width=40%} + Text3 + :::columns + ::::{.column width=60%} + Text4 + """ + self.helper(txt) + + def test_roundtrip_complex3(self) -> None: + """ + Test roundtrip for content with multiple div blocks and text. + """ + txt = """ + Text1 + + ::: + ::::{.column width=40%} + + Text2 + :::: + ::::{.column width=40%} + + Text3 + :::columns + ::::{.column width=60%} + Text4 + """ + self.helper(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py new file mode 100644 index 000000000..c8ccc96b8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py @@ -0,0 +1,218 @@ +import logging +import pprint +from typing import Dict, List + +import helpers.hmarkdown as hmarkdo +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_replace_fenced_blocks_with_tags1 +# ############################################################################# + + +class Test_replace_fenced_blocks_with_tags1(hunitest.TestCase): + def helper( + self, text: str, expected_lines: List[str], expected_map: Dict[str, str] + ) -> None: + """ + Test replacing fenced code blocks with tags. + """ + lines = hprint.dedent(text, remove_lead_trail_empty_lines_=True) + lines = lines.split("\n") + # Call function. + actual_lines, fence_map = hmarkdo.replace_fenced_blocks_with_tags(lines) + # Check output. + fence_map_as_str = pprint.pformat(fence_map) + expected_map_as_str = pprint.pformat(expected_map) + self.assert_equal(fence_map_as_str, expected_map_as_str) + # + actual_lines = "\n".join(actual_lines) + expected_lines = hprint.dedent( + expected_lines, remove_lead_trail_empty_lines_=True + ) + self.assert_equal(actual_lines, expected_lines) + + def helper_round_trip(self, text: str) -> None: + """ + Test the round trip. + """ + # Do the round trip. + lines = text.split("\n") + actual_lines, fence_map = hmarkdo.replace_fenced_blocks_with_tags(lines) + act_text = hmarkdo.replace_tags_with_fenced_blocks( + actual_lines, fence_map + ) + # Check output. + act_text = "\n".join(act_text) + self.assert_equal(act_text, text) + + def test1(self) -> None: + """ + Test replacing fenced code blocks with tags. + """ + # Prepare inputs. + text = """ + Some text before + ```python + def foo(): + return 42 + ``` + Text between blocks + ```` + Plain code block + ```` + Some text after + """ + # Prepare outputs. + expected_lines = """ + Some text before + + Text between blocks + + Some text after + """ + # Check fence map. + expected_map = { + "1": "```python\ndef foo():\n return 42\n```", + "2": "````\nPlain code block\n````", + } + self.helper(text, expected_lines, expected_map) + + def test2(self) -> None: + """ + Test nested fenced blocks. + """ + text = """ + ```` + Outer block + ```python + def nested(): + pass + ``` + Still outer + ```` + """ + expected_lines = """ + + """ + expected_map = { + "1": "````\nOuter block\n```python\ndef nested():\n pass\n```\nStill outer\n````" + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test3(self) -> None: + """ + Test empty fenced blocks. + """ + text = """ + Before + ``` + ``` + After + ```python + ``` + End + """ + expected_lines = """ + Before + + After + + End + """ + expected_map = {"1": "```\n```", "2": "```python\n```"} + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test4(self) -> None: + """ + Test blocks with different fence lengths. + """ + text = """ + Start + ``` + Three + ``` + Middle + ````` + Five + ````` + End + """ + expected_lines = """ + Start + + Middle + + End + """ + expected_map = {"1": "```\nThree\n```", "2": "`````\nFive\n`````"} + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test5(self) -> None: + """ + Test blocks with language specifiers. + """ + text = """ + ```python + def foo(): pass + ``` + ```bash + echo hello + ``` + ```javascript + console.log('hi'); + ``` + """ + expected_lines = """ + + + + """ + expected_map = { + "1": "```python\ndef foo(): pass\n```", + "2": "```bash\necho hello\n```", + "3": "```javascript\nconsole.log('hi');\n```", + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test6(self) -> None: + """ + Test blocks with indentation. + """ + text = """ + Outside + ``` + Indented block + More indent + ``` + ```python + def foo(): + pass + ``` + End + """ + expected_lines = """ + Outside + + + End + """ + expected_map = { + "1": " ```\n Indented block\n More indent\n ```", + "2": " ```python\n def foo():\n pass\n ```", + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py new file mode 100644 index 000000000..91efef1f4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py @@ -0,0 +1,449 @@ +import logging + +import helpers.hmarkdown_filtering as hmarfilt +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_filter_by_header1 +# ############################################################################# + + +class Test_filter_by_header1(hunitest.TestCase): + def test_basic_header_extraction(self) -> None: + """ + Test basic header extraction functionality. + """ + # Prepare inputs. + test_content = """ + # Introduction + This is the introduction section. + Some content here. + + ## Section 1 + Content for section 1. + + # Conclusion + Final thoughts here. + """ + test_content = hprint.dedent( + test_content, remove_lead_trail_empty_lines_=False + ) + lines = test_content.split("\n") + # Run test. + result_lines = hmarfilt.filter_by_header(lines, "Introduction") + result_content = "\n".join(result_lines) + # Check outputs. + expected = """ + # Introduction + This is the introduction section. + Some content here. + + ## Section 1 + Content for section 1. + """ + self.assert_equal(result_content, expected, dedent=True) + + def test_header_not_found(self) -> None: + """ + Test behavior when header is not found. + """ + # Prepare inputs. + test_content = """ + # Introduction + This is the introduction section. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + # Check outputs. + with self.assertRaises(ValueError): + hmarfilt.filter_by_header(lines, "NonExistent") + + +# ############################################################################# +# Test_parse_range1 +# ############################################################################# + + +class Test_parse_range1(hunitest.TestCase): + def test_numeric_range(self) -> None: + """ + Test parsing numeric range (0-indexed). + """ + # Run test. + start, end = hmarfilt._parse_range("0:10", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 10) + + def test_none_start(self) -> None: + """ + Test range with None start (defaults to 0). + """ + # Run test. + start, end = hmarfilt._parse_range("None:10", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 10) + + def test_none_end(self) -> None: + """ + Test range with None end (defaults to max_value). + """ + # Run test. + start, end = hmarfilt._parse_range("0:None", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 20) + + def test_both_none(self) -> None: + """ + Test range with both None (0:max_value). + """ + # Run test. + start, end = hmarfilt._parse_range("None:None", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 20) + + def test_invalid_range(self) -> None: + """ + Test invalid range format. + """ + # Run test. + with self.assertRaises(AssertionError): + hmarfilt._parse_range("invalid", 20) + + def test_case_insensitive_none(self) -> None: + """ + Test case insensitive None parsing. + """ + # Run test. + start, end = hmarfilt._parse_range("NONE:none", 20) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 20) + + +# ############################################################################# +# Test_filter_by_lines1 +# ############################################################################# + + +class Test_filter_by_lines1(hunitest.TestCase): + def test_basic_line_filtering(self) -> None: + """ + Test basic line filtering functionality (0-indexed). + """ + # Prepare inputs. + test_content = """ + Line 1 + Line 2 + Line 3 + Line 4 + Line 5 + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (indices 1:3 = Line 2 and Line 3). + result_lines = hmarfilt.filter_by_lines(lines, "1:3") + result_content = "\n".join(result_lines) + # Check outputs. + expected = "Line 2\nLine 3" + self.assertEqual(result_content, expected) + + def test_line_filtering_with_none(self) -> None: + """ + Test line filtering with None start (defaults to 0). + """ + # Prepare inputs. + test_content = """ + Line 1 + Line 2 + Line 3 + Line 4 + Line 5 + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (None:2 = indices 0:2 = Line 1 and Line 2). + result_lines = hmarfilt.filter_by_lines(lines, "None:2") + result_content = "\n".join(result_lines) + # Check outputs. + expected = "Line 1\nLine 2" + self.assertEqual(result_content, expected) + + def test_line_filtering_to_end(self) -> None: + """ + Test line filtering from start to end. + """ + # Prepare inputs. + test_content = """ + Line 1 + Line 2 + Line 3 + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (1:None = indices 1:3 = Line 2 and Line 3). + result_lines = hmarfilt.filter_by_lines(lines, "1:None") + result_content = "\n".join(result_lines) + # Check outputs. + expected = "Line 2\nLine 3" + self.assertEqual(result_content, expected) + + def test_invalid_range_order(self) -> None: + """ + Test that start line <= end line is enforced. + """ + # Prepare inputs. + test_content = "Line 1\nLine 2\nLine 3" + lines = test_content.split("\n") + # Run test. + # Check outputs. + with self.assertRaises(AssertionError): + hmarfilt.filter_by_lines(lines, "2:1") + + +# ############################################################################# +# Test_filter_by_slides1 +# ############################################################################# + + +class Test_filter_by_slides1(hunitest.TestCase): + def test_basic_slide_filtering(self) -> None: + """ + Test basic slide filtering functionality. + """ + # Prepare inputs. + test_content = """ + # Header 1 + + + + + * Slide 1 + Content for slide 1. + + * Slide 2 + Content for slide 2. + + * Slide 3 + Content for slide 3. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + result_lines = hmarfilt.filter_by_slides(lines, "0:1") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("Slide 1", result_content) + self.assertNotIn("Slide 2", result_content) + + def test_slide_filtering_with_none_end(self) -> None: + """ + Test slide filtering to the end. + """ + # Prepare inputs. + test_content = """ + * Slide 1 + Content 1. + + * Slide 2 + Content 2. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + result_lines = hmarfilt.filter_by_slides(lines, "0:None") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("Slide 1", result_content) + self.assertIn("Slide 2", result_content) + + def test_slide_filtering_invalid_range(self) -> None: + """ + Test that invalid slide ranges raise errors. + """ + # Prepare inputs. + test_content = """ + * Slide 1 + Content 1. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + # Check outputs. + with self.assertRaises(AssertionError): + hmarfilt.filter_by_slides(lines, "1:0") + + def test_slide_filtering_beyond_slides(self) -> None: + """ + Test filtering with end beyond available slides. + """ + # Prepare inputs. + test_content = """ + * Slide 1 + Content 1. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + # Check outputs. + with self.assertRaises(AssertionError): + hmarfilt.filter_by_slides(lines, "0:5") + + def test_no_slides_content(self) -> None: + """ + Test behavior with content that has no slides. + """ + # Prepare inputs. + test_content = """ + # Header 1 + Just regular content without slides. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + # Check outputs (should fail validation since there are no slides). + with self.assertRaises(AssertionError): + hmarfilt.filter_by_slides(lines, "0:1") + + def test_slide_filtering_single_slide(self) -> None: + """ + Test filtering a single slide when there's only one slide (0-indexed). + """ + # Prepare inputs. + test_content = """ + * Only Slide + This is the only content. + Additional content after the slide. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (0:1 = only slide at index 0). + result_lines = hmarfilt.filter_by_slides(lines, "0:1") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("Only Slide", result_content) + self.assertIn("This is the only content.", result_content) + + def test_slide_end_boundary(self) -> None: + """ + Test filtering to the end of slides (0-indexed). + """ + # Prepare inputs. + test_content = """ + * Slide 1 + Content 1. + + * Slide 2 + Content 2. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (0:2 = slides 0 and 1). + result_lines = hmarfilt.filter_by_slides(lines, "0:2") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("Slide 1", result_content) + self.assertIn("Slide 2", result_content) + + +# ############################################################################# +# Test_additional_edge_cases1 +# ############################################################################# + + +class Test_additional_edge_cases1(hunitest.TestCase): + def test_filter_by_header_with_subsection(self) -> None: + """ + Test extracting a subsection header. + """ + # Prepare inputs. + test_content = """ + # Introduction + This is the introduction. + + ## Subsection 1 + Content for subsection 1. + + ## Subsection 2 + Content for subsection 2. + + # Conclusion + Final thoughts. + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test. + result_lines = hmarfilt.filter_by_header(lines, "Subsection 1") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertIn("## Subsection 1", result_content) + self.assertIn("Content for subsection 1.", result_content) + + def test_parse_range_edge_cases(self) -> None: + """ + Test edge cases for range parsing (0-indexed). + """ + # Run test. + start, end = hmarfilt._parse_range("0:0", 1) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 0) + # Run test. + start, end = hmarfilt._parse_range("None:None", 1000) + # Check outputs. + self.assertEqual(start, 0) + self.assertEqual(end, 1000) + + def test_filter_lines_single_line(self) -> None: + """ + Test filtering with empty range (0:0). + """ + # Prepare inputs. + test_content = "Single line content" + lines = test_content.split("\n") + # Run test (0:0 = empty range). + result_lines = hmarfilt.filter_by_lines(lines, "0:0") + result_content = "\n".join(result_lines) + # Check outputs. + self.assertEqual(result_content, "") + + def test_filter_lines_exact_range(self) -> None: + """ + Test filtering with exact boundaries (0-indexed). + """ + # Prepare inputs. + test_content = """ + Line 1 + Line 2 + Line 3 + """ + test_content = hprint.dedent(test_content) + lines = test_content.split("\n") + # Run test (0:2 = indices 0 and 1 = Line 1 and Line 2). + result_lines = hmarfilt.filter_by_lines(lines, "0:2") + result_content = "\n".join(result_lines) + # Check outputs. + expected = "Line 1\nLine 2" + self.assertEqual(result_content, expected) + + def test_parse_range_invalid_formats(self) -> None: + """ + Test various invalid range formats. + """ + # Run test. + with self.assertRaises(AssertionError): + hmarfilt._parse_range("5", 10) + # Run test. + with self.assertRaises(AssertionError): + hmarfilt._parse_range("", 10) + # Run test. + with self.assertRaises(ValueError): + hmarfilt._parse_range("1:2:3", 10) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py new file mode 100644 index 000000000..abf2faf66 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py @@ -0,0 +1,1403 @@ +import logging +import os + +import helpers.hio as hio +import helpers.hmarkdown_div_blocks as hmadiblo +import helpers.hmarkdown_formatting as hmarform +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_remove_end_of_line_periods1 +# ############################################################################# + + +class Test_remove_end_of_line_periods1(hunitest.TestCase): + def helper(self, input_text: str, expected_text: str) -> None: + # Prepare inputs. + input_text = hprint.dedent(input_text).strip() + expected_text = hprint.dedent(expected_text).strip() + lines = input_text.split("\n") + # Run test. + actual_lines = hmarform.remove_end_of_line_periods(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.assertEqual(actual, expected_text) + + def test_standard_case(self) -> None: + input_text = """ + Hello. + World. + This is a test. + """ + expected_text = """ + Hello + World + This is a test + """ + self.helper(input_text, expected_text) + + def test_no_periods(self) -> None: + input_text = """ + Hello + World + This is a test + """ + expected_text = """ + Hello + World + This is a test + """ + self.helper(input_text, expected_text) + + def test_multiple_periods(self) -> None: + input_text = """ + Line 1..... + Line 2..... + End. + """ + expected_text = """ + Line 1 + Line 2 + End + """ + self.helper(input_text, expected_text) + + def test_empty_string(self) -> None: + input_text = "" + expected_text = "" + self.helper(input_text, expected_text) + + def test_leading_and_trailing_periods(self) -> None: + input_text = """ + .Line 1. + .Line 2. + ..End.. + """ + expected_text = """ + .Line 1 + .Line 2 + ..End + """ + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_md_clean_up1 +# ############################################################################# + + +class Test_md_clean_up1(hunitest.TestCase): + def test1(self) -> None: + # Prepare inputs. + txt = r""" + **States**: + - \( S = \{\text{Sunny}, \text{Rainy}\} \) + **Observations**: + - \( O = \{\text{Yes}, \text{No}\} \) (umbrella) + + ### Initial Probabilities: + \[ + P(\text{Sunny}) = 0.6, \quad P(\text{Rainy}) = 0.4 + \] + + ### Transition Probabilities: + \[ + \begin{aligned} + P(\text{Sunny} \to \text{Sunny}) &= 0.7, \quad P(\text{Sunny} \to \text{Rainy}) = 0.3 \\ + P(\text{Rainy} \to \text{Sunny}) &= 0.4, \quad P(\text{Rainy} \to \text{Rainy}) = 0.6 + \end{aligned} + \] + + ### Observation (Emission) Probabilities: + \[ + \begin{aligned} + P(\text{Yes} \mid \text{Sunny}) &= 0.1, \quad P(\text{No} \mid \text{Sunny}) = 0.9 \\ + P(\text{Yes} \mid \text{Rainy}) &= 0.8, \quad P(\text{No} \mid \text{Rainy}) = 0.2 + \end{aligned} + \] + """ + txt = hprint.dedent(txt) + actual = hmarform.md_clean_up(txt) + actual = hprint.dedent(actual) + expected = r""" + **States**: + - $S = \{\text{Sunny}, \text{Rainy}\}$ + **Observations**: + - $O = \{\text{Yes}, \text{No}\}$ (umbrella) + + ### Initial Probabilities: + $$ + \Pr(\text{Sunny}) = 0.6, \quad \Pr(\text{Rainy}) = 0.4 + $$ + + ### Transition Probabilities: + $$ + \begin{aligned} + \Pr(\text{Sunny} \to \text{Sunny}) &= 0.7, \quad \Pr(\text{Sunny} \to \text{Rainy}) = 0.3 \\ + \Pr(\text{Rainy} \to \text{Sunny}) &= 0.4, \quad \Pr(\text{Rainy} \to \text{Rainy}) = 0.6 + \end{aligned} + $$ + + ### Observation (Emission) Probabilities: + $$ + \begin{aligned} + \Pr(\text{Yes} | \text{Sunny}) &= 0.1, \quad \Pr(\text{No} | \text{Sunny}) = 0.9 \\ + \Pr(\text{Yes} | \text{Rainy}) &= 0.8, \quad \Pr(\text{No} | \text{Rainy}) = 0.2 + \end{aligned} + $$""" + self.assert_equal(actual, expected, dedent=True) + + +# ############################################################################# +# Test_remove_code_delimiters1 +# ############################################################################# + + +class Test_remove_code_delimiters1(hunitest.TestCase): + def test1(self) -> None: + """ + Test a basic example. + """ + # Prepare inputs. + content = r""" + ```python + def hello_world(): + print("Hello, World!") + ``` + """ + content = hprint.dedent(content) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + def hello_world(): + print("Hello, World!") + """ + self.assert_equal(actual, expected, dedent=True) + + def test2(self) -> None: + """ + Test an example with empty lines at the start and end. + """ + # Prepare inputs. + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + content = hio.from_file(input_file_path) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + def check_empty_lines(): + print("Check empty lines are present!") + """ + self.assert_equal(actual, expected, dedent=True) + + def test3(self) -> None: + """ + Test a markdown with headings, Python and yaml blocks. + """ + # Prepare inputs. + content = r""" + # Section 1 + + This section contains comment and python code. + + > "Knowledge is like a tree, growing stronger with each branch of understanding." + + ```python + def greet(name): + return f"Hello, {name}!" + print(greet("World")) + ``` + + # Section 2 + + Key points below. + + - Case Study 1: Implementation in modern industry + - Case Study 2: Comparative analysis of traditional vs. modern methods + + ```yaml + future: + - AI integration + - Process optimization + - Sustainable solutions + ``` + """ + content = hprint.dedent(content) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + # Section 1 + + This section contains comment and python code. + + > "Knowledge is like a tree, growing stronger with each branch of understanding." + + + def greet(name): + return f"Hello, {name}!" + print(greet("World")) + + + # Section 2 + + Key points below. + + - Case Study 1: Implementation in modern industry + - Case Study 2: Comparative analysis of traditional vs. modern methods + + yaml + future: + - AI integration + - Process optimization + - Sustainable solutions + + """ + self.assert_equal(actual, expected, dedent=True) + + def test4(self) -> None: + """ + Test another markdown with headings and multiple indent Python blocks. + """ + # Prepare inputs. + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + content = hio.from_file(input_file_path) + content = hprint.dedent(content) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + self.check_string(actual, dedent=True) + + def test5(self) -> None: + """ + Test an empty string. + """ + # Prepare inputs. + content = "" + lines = content.split("\n") if content else [] + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = "" + self.assert_equal(actual, expected, dedent=True) + + def test6(self) -> None: + """ + Test a Python and immediate markdown code block. + """ + # Prepare inputs. + in_dir_name = self.get_input_dir() + input_file_path = os.path.join(in_dir_name, "test.txt") + content = hio.from_file(input_file_path) + lines = content.split("\n") + # Call function. + actual_lines = hmarform.remove_code_delimiters(lines) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + def no_start_python(): + print("No mention of python at the start") + + + + A markdown paragraph contains + delimiters that needs to be removed. + """ + self.assert_equal(actual, expected, dedent=True) + + +# ############################################################################# +# Test_format_markdown_slide +# ############################################################################# + + +class Test_format_markdown_slide(hunitest.TestCase): + def helper(self, input_text: str, expected_text: str) -> None: + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual = hmarform.format_markdown_slide(lines) + actual = "\n".join(actual) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + _LOG.debug("actual=\n%s", actual) + _LOG.debug("expected=\n%s", expected) + self.assert_equal(str(actual), str(expected)) + + def test1(self) -> None: + """ + Test formatting a simple slide with bullets. + """ + input_text = """ + * Slide title + - First bullet + - Second bullet + """ + expected_text = """ + * Slide Title + + - First bullet + + - Second bullet + """ + self.helper(input_text, expected_text) + + def test2(self) -> None: + """ + Test formatting multiple slides. + """ + input_text = """ + * First slide + - Point A + - Point B + * Second slide + - Point X + - Point Y + """ + expected_text = """ + * First Slide + + - Point A + + - Point B + * Second Slide + + - Point X + + - Point Y + """ + self.helper(input_text, expected_text) + + def test3(self) -> None: + """ + Test formatting slides with nested bullets. + """ + input_text = """ + * Main slide + - First level + - Nested point + - Another nested + - Second level + """ + expected_text = """ + * Main Slide + + - First level + - Nested point + - Another nested + + - Second level + """ + self.helper(input_text, expected_text) + + def test4(self) -> None: + """ + Test formatting empty input. + """ + # Prepare inputs. + input_text = """ + """ + # Check outputs. + expected_text = """ + """ + self.helper(input_text, expected_text) + + def test5(self) -> None: + """ + Test formatting slide title capitalization. + """ + input_text = """ + * mixed case slide title + - Point one + """ + expected_text = """ + * Mixed Case Slide Title + + - Point one + """ + self.helper(input_text, expected_text) + + def test6(self) -> None: + """ + Test formatting slide with only title, no bullet points. + """ + input_text = """ + * Solo slide title + """ + expected_text = """ + * Solo Slide Title + """ + self.helper(input_text, expected_text) + + def test7(self) -> None: + """ + Test formatting slide with deeply nested bullets. + """ + input_text = """ + * Main slide + - Level 1 + - Level 2 + - Level 3 + - Level 4 + - Back to level 1 + """ + expected_text = """ + * Main Slide + + - Level 1 + - Level 2 + - Level 3 + - Level 4 + + - Back to level 1 + """ + self.helper(input_text, expected_text) + + def test8(self) -> None: + """ + Test formatting slide with nested bullets and special formatting. + """ + input_text = r""" + * What Are Data Analytics? + - **Collections of data** + + - Aggregated, organized data sets for analysis + + - E.g., customer purchase histories in a CRM system + - **Dashboards** + + - Visual displays of key metrics for insights + - E.g., dashboard showing quarterly revenue, expenses + + - **Descriptive statistics** + - Summary metrics: mean, median, mode, standard deviation + - E.g., average sales per quarter to understand trends + - **Historical reports** + + - Examination of past performance + - E.g., monthly sales reports for past fiscal year + - **Models** + - Statistical representations to forecast, explain phenomena + + - E.g., predictive model to anticipate customer churn based on behavioral data + """ + expected_text = r""" + * What Are Data Analytics? + + - **Collections of data** + - Aggregated, organized data sets for analysis + - E.g., customer purchase histories in a CRM system + + - **Dashboards** + - Visual displays of key metrics for insights + - E.g., dashboard showing quarterly revenue, expenses + + - **Descriptive statistics** + - Summary metrics: mean, median, mode, standard deviation + - E.g., average sales per quarter to understand trends + + - **Historical reports** + - Examination of past performance + - E.g., monthly sales reports for past fiscal year + + - **Models** + - Statistical representations to forecast, explain phenomena + - E.g., predictive model to anticipate customer churn based on behavioral data + """ + self.helper(input_text, expected_text) + + def test9(self) -> None: + """ + This reproduces a broken behavior of prettier with fenced divs. + """ + input_text = r""" + * Incremental vs Iterative + ::: columns + :::: {.column width=55%} + + - **Incremental Development** + - Each increment adds functional components + - Require upfront planning to divide features meaningfully + - Integration of increments can be complex + + - **Iterative Development** + - Each increment delivers usable system + - Refine and improve product through repeated cycles + - Get feedback + - Uncover and adjust for unknown requirements + + - **Incremental $\gg$ Iterative** + + :::: + :::: {.column width=40%} + + ![](msml610/lectures_source/figures/Lesson02_Monalisa_incremental.png){width=90%} + + \small _Incremental + + \vspace{0.5cm} + + ![](msml610/lectures_source/figures/Lesson02_Monalisa_iterative.png){width=90%} + + \small _Iterative_ + + \vspace{0.5cm} + + ![](msml610/lectures_source/figures/Lesson02_Skateboard.png){width=90%} + + \small _Incremental vs Iterative_ + :::: + ::: + """ + expected_text = r""" + * Incremental vs Iterative + ::: columns + :::: {.column width=55%} + + - **Incremental Development** + - Each increment adds functional components + - Require upfront planning to divide features meaningfully + - Integration of increments can be complex + + - **Iterative Development** + - Each increment delivers usable system + - Refine and improve product through repeated cycles + - Get feedback + - Uncover and adjust for unknown requirements + + - **Incremental $\gg$ Iterative** + :::: + :::: {.column width=40%} + ![](msml610/lectures_source/figures/Lesson02_Monalisa_incremental.png){width=90%} + \small \_Incremental + \vspace{0.5cm} + ![](msml610/lectures_source/figures/Lesson02_Monalisa_iterative.png){width=90%} + \small _Iterative_ + \vspace{0.5cm} + ![](msml610/lectures_source/figures/Lesson02_Skateboard.png){width=90%} + \small _Incremental vs Iterative_ + :::: + ::: + """ + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_format_figures +# ############################################################################# + + +class Test_format_figures(hunitest.TestCase): + def helper(self, input_text: str, expected_text: str) -> None: + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual_lines = hmarform.format_figures(lines) + actual = "\n".join(actual_lines) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + self.assert_equal(actual, expected) + + def test_basic_text_with_figures(self) -> None: + """ + Test converting basic text with figures to column format. + """ + input_text = """ + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - Read / write small amounts of data frequently + - **Columnar DBs** + - E.g., Amazon Redshift, Snowflake + - Read / write large amounts of data infrequently + - Analytics requires a few columns + - Better data compression + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) + """ + expected_text = """ + ::: columns + :::: {.column width=65%} + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - Read / write small amounts of data frequently + - **Columnar DBs** + - E.g., Amazon Redshift, Snowflake + - Read / write large amounts of data infrequently + - Analytics requires a few columns + - Better data compression + :::: + :::: {.column width=40%} + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + def test_no_figures_no_change(self) -> None: + """ + Test that text without figures remains unchanged. + """ + input_text = """ + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - **Columnar DBs** + - E.g., Amazon Redshift, Snowflake + - Better data compression + """ + expected_text = """ + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - **Columnar DBs** + - E.g., Amazon Redshift, Snowflake + - Better data compression + """ + self.helper(input_text, expected_text) + + def test_already_in_columns_format_no_change(self) -> None: + """ + Test that text already in columns format remains unchanged. + """ + input_text = """ + ::: columns + :::: {.column width=65%} + - **Row-based DBs** + - E.g., MySQL, Postgres + :::: + :::: {.column width=40%} + ![](some_image.png) + :::: + ::: + """ + expected_text = """ + ::: columns + :::: {.column width=65%} + - **Row-based DBs** + - E.g., MySQL, Postgres + :::: + :::: {.column width=40%} + ![](some_image.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + def test_single_figure(self) -> None: + """ + Test converting text with a single figure. + """ + input_text = """ + - **Important concept** + - This is the main point + - Supporting detail + + ![](path/to/image.png) + """ + expected_text = """ + ::: columns + :::: {.column width=65%} + - **Important concept** + - This is the main point + - Supporting detail + :::: + :::: {.column width=40%} + + ![](path/to/image.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + def test_mixed_content_with_figures(self) -> None: + """ + Test converting mixed content including text and figures. + """ + input_text = """ + ## Section header + + Some introductory text here. + + - **Point one** + - Detail A + - Detail B + - **Point two** + - Detail X + - Detail Y + + ![](image1.png) + + Additional text between figures. + + ![](image2.png) + """ + expected_text = """ + ::: columns + :::: {.column width=65%} + ## Section header + + Some introductory text here. + + - **Point one** + - Detail A + - Detail B + - **Point two** + - Detail X + - Detail Y + :::: + :::: {.column width=40%} + + ![](image1.png) + + Additional text between figures. + + ![](image2.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + def test_empty_input(self) -> None: + """ + Test that empty input returns empty output. + """ + input_text = "" + expected_text = "" + self.helper(input_text, expected_text) + + def test_with_slide_title(self) -> None: + """ + Test that slide title is left unchanged. + """ + input_text = """ + * VCS: How to Track Data + + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - Read / write small amounts of data frequently + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) + """ + expected_text = """ + * VCS: How to Track Data + ::: columns + :::: {.column width=65%} + - **Row-based DBs** + - E.g., MySQL, Postgres + - Optimized for reading / writing rows + - Read / write small amounts of data frequently + :::: + :::: {.column width=40%} + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) + + ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) + :::: + ::: + """ + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_format_md_links_to_latex_format +# ############################################################################# + + +class Test_format_md_links_to_latex_format(hunitest.TestCase): + def helper(self, input_text: str, expected_text: str) -> None: + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual_lines = hmarform.format_md_links_to_latex_format(lines) + actual = "\n".join(actual_lines) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + self.assert_equal(actual, expected) + + # ========================================================================= + # Edge cases. + # ========================================================================= + + def test_empty_input(self) -> None: + """ + Test empty input. + """ + # Prepare inputs. + input_text = "" + expected_text = "" + # Run test. + self.helper(input_text, expected_text) + + def test_no_links(self) -> None: + """ + Test content without any links. + """ + # Prepare inputs. + input_text = """ + # Important Notes + + - This is regular text + - No links here + - Just plain content + """ + expected_text = """ + # Important Notes + + - This is regular text + - No links here + - Just plain content + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Plain URL conversion: http://... or https://... + # ========================================================================= + + def test_plain_http_url(self) -> None: + """ + Test converting single plain HTTP URL. + """ + # Prepare inputs. + input_text = """ + Visit http://example.com + """ + expected_text = r""" + Visit [\textcolor{blue}{\underline{http://example.com}}](http://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_https_url(self) -> None: + """ + Test converting single plain HTTPS URL. + """ + # Prepare inputs. + input_text = """ + Visit https://example.com + """ + expected_text = r""" + Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_with_path(self) -> None: + """ + Test converting plain URLs with paths. + """ + # Prepare inputs. + input_text = """ + Check out https://ubuntu.com/tutorials/command-line-for-beginners + """ + expected_text = r""" + Check out [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_with_query_parameters(self) -> None: + """ + Test converting plain URL with query parameters. + """ + # Prepare inputs. + input_text = """ + Search: https://example.com/search?q=python&page=1 + """ + expected_text = r""" + Search: [\textcolor{blue}{\underline{https://example.com/search?q=python&page=1}}](https://example.com/search?q=python&page=1) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_with_fragment(self) -> None: + """ + Test converting plain URL with fragment. + """ + # Prepare inputs. + input_text = """ + Docs: https://docs.python.org/3/tutorial/index.html#tutorial-index + """ + expected_text = r""" + Docs: [\textcolor{blue}{\underline{https://docs.python.org/3/tutorial/index.html#tutorial-index}}](https://docs.python.org/3/tutorial/index.html#tutorial-index) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_at_line_start(self) -> None: + """ + Test plain URL at beginning of line. + """ + # Prepare inputs. + input_text = """ + https://example.com is a good site + """ + expected_text = r""" + [\textcolor{blue}{\underline{https://example.com}}](https://example.com) is a good site + """ + # Run test. + self.helper(input_text, expected_text) + + def test_plain_url_at_line_end(self) -> None: + """ + Test plain URL at end of line. + """ + # Prepare inputs. + input_text = """ + Check this link https://example.com + """ + expected_text = r""" + Check this link [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # URL in backticks conversion: `http://...` or `https://...` + # ========================================================================= + + def test_backtick_url(self) -> None: + """ + Test converting single URL in backticks. + """ + # Prepare inputs. + input_text = """ + Visit `https://example.com` for details + """ + expected_text = r""" + Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) for details + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Markdown link conversion: [Text](URL) + # ========================================================================= + + def test_markdown_link_simple(self) -> None: + """ + Test converting simple markdown link [Text](URL). + """ + # Prepare inputs. + input_text = """ + Check out [this tutorial](https://example.com/tutorial) + """ + expected_text = r""" + Check out [\textcolor{blue}{\underline{this tutorial}}](https://example.com/tutorial) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_markdown_link_preserves_text(self) -> None: + """ + Test that markdown link preserves the display text. + """ + # Prepare inputs. + input_text = """ + See [documentation](https://docs.example.com) here + """ + expected_text = r""" + See [\textcolor{blue}{\underline{documentation}}](https://docs.example.com) here + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Email link conversion: [email@domain.com](email@domain.com) + # ========================================================================= + + def test_email_link_simple1(self) -> None: + """ + Test converting simple email link. + """ + # Prepare inputs. + input_text = """ + Contact: [support@example.com](support@example.com) + """ + expected_text = r""" + Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_email_link_simple2(self) -> None: + """ + Test converting simple email link. + """ + # Prepare inputs. + input_text = """ + Contact: [](support@example.com) + """ + expected_text = r""" + Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Multiple URLs. + # ========================================================================= + + def test_multiple_urls_same_line(self) -> None: + """ + Test converting multiple URLs on same line. + """ + # Prepare inputs. + input_text = """ + Visit https://example.com and https://another.com + """ + expected_text = r""" + Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) and [\textcolor{blue}{\underline{https://another.com}}](https://another.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_multiple_urls_different_lines(self) -> None: + """ + Test converting multiple URLs on different lines. + """ + # Prepare inputs. + input_text = """ + Tutorial: https://ubuntu.com/tutorials/command-line-for-beginners + + Documentation: https://docs.python.org/3/ + """ + expected_text = r""" + Tutorial: [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) + + Documentation: [\textcolor{blue}{\underline{https://docs.python.org/3/}}](https://docs.python.org/3/) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Mixed link types. + # ========================================================================= + + def test_mixed_plain_and_backtick_urls(self) -> None: + """ + Test handling mixed plain and backtick URLs. + """ + # Prepare inputs. + input_text = """ + Plain: https://example.com + Backtick: `https://docs.example.com` + """ + expected_text = r""" + Plain: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + Backtick: [\textcolor{blue}{\underline{https://docs.example.com}}](https://docs.example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_mixed_plain_and_markdown_links(self) -> None: + """ + Test handling mixed plain URLs and markdown links. + """ + # Prepare inputs. + input_text = """ + Plain: https://example.com + Markdown: [Click here](https://docs.example.com) + """ + expected_text = r""" + Plain: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + Markdown: [\textcolor{blue}{\underline{Click here}}](https://docs.example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_mixed_all_types(self) -> None: + """ + Test handling all link types in same content. + """ + # Prepare inputs. + input_text = r""" + ## Resources + + - Plain URL: https://ubuntu.com/tutorials/command-line-for-beginners + - Backtick URL: `https://docs.python.org/3/` + - Markdown link: [Click here](https://github.com) + - Email: [support@example.com](support@example.com) + - Already formatted: [\textcolor{blue}{\underline{https://stackoverflow.com}}](https://stackoverflow.com) + """ + expected_text = r""" + ## Resources + + - Plain URL: [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) + - Backtick URL: [\textcolor{blue}{\underline{https://docs.python.org/3/}}](https://docs.python.org/3/) + - Markdown link: [\textcolor{blue}{\underline{Click here}}](https://github.com) + - Email: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) + - Already formatted: [\textcolor{blue}{\underline{https://stackoverflow.com}}](https://stackoverflow.com) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Complex scenarios. + # ========================================================================= + + def test_url_with_file_extension(self) -> None: + """ + Test URL pointing to file with extension. + """ + # Prepare inputs. + input_text = """ + Download: https://cdn.example.com/files/document.pdf + """ + expected_text = r""" + Download: [\textcolor{blue}{\underline{https://cdn.example.com/files/document.pdf}}](https://cdn.example.com/files/document.pdf) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_already_formatted_link_preserved(self) -> None: + """ + Test that already formatted links are preserved. + """ + # Prepare inputs. + input_text = r""" + Link: [\textcolor{blue}{\underline{Example Site}}](https://example.com) + """ + expected_text = r""" + Link: [\textcolor{blue}{\underline{Example Site}}](https://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + # ========================================================================= + # Image/picture links should be left untouched. + # ========================================================================= + + def test_filter_image_simple(self) -> None: + """ + Test that simple image links are left untouched. + """ + # Prepare inputs. + input_text = """ + Check this image: ![](path/to/image.png) + """ + expected_text = """ + Check this image: ![](path/to/image.png) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_filter_jpg_images(self) -> None: + """ + Test that JPG image links are left untouched. + """ + # Prepare inputs. + input_text = """ + ![](lectures_source/images/lec_4_1_slide_5_image_1.jpg) + """ + expected_text = """ + ![](lectures_source/images/lec_4_1_slide_5_image_1.jpg) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_filter_mixed_images_and_emails(self) -> None: + """ + Test that image links are not processed while email links are. + """ + # Prepare inputs. + input_text = """ + Contact: [](support@example.com) + Image: ![](path/to/image.png) + Link: https://example.com + """ + expected_text = r""" + Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) + Image: ![](path/to/image.png) + Link: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_filter_image_with_alt_text(self) -> None: + """ + Test that image links with alt text are left untouched. + """ + # Prepare inputs. + input_text = """ + ![Alt text](path/to/image.png) + """ + expected_text = """ + ![Alt text](path/to/image.png) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_filter_multiple_images(self) -> None: + """ + Test that multiple image links are left untouched. + """ + # Prepare inputs. + input_text = """ + ![](image1.png) + ![](image2.jpg) + ![](image3.gif) + """ + expected_text = """ + ![](image1.png) + ![](image2.jpg) + ![](image3.gif) + """ + # Run test. + self.helper(input_text, expected_text) + + def test_markdown_link_with_escaped_underscores(self) -> None: + """ + Test markdown link with escaped underscores in the text. + """ + # Prepare inputs. + input_text = r""" + [tutorial\_docker\_compose](https://github.com/gpsaggese/umd_classes/tree/main/data605/tutorials/tutorial_docker_compose) + """ + expected_text = r""" + [\textcolor{blue}{\underline{tutorial\_docker\_compose}}](https://github.com/gpsaggese/umd_classes/tree/main/data605/tutorials/tutorial_docker_compose) + """ + # Run test. + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_add_prettier_ignore_to_div_blocks +# ############################################################################# + + +class Test_add_prettier_ignore_to_div_blocks(hunitest.TestCase): + """ + Test the function to add prettier-ignore comments around div blocks. + """ + + def test_simple_div_block(self) -> None: + """ + Test a simple div block with two colons. + """ + # Prepare inputs. + txt = """ + :::: + ::: + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.check_string(actual) + + def test_multiple_div_blocks(self) -> None: + """ + Test multiple div blocks in the same content. + """ + # Prepare inputs. + txt = """ + Some text before + + :::: + ::::{.column width=40%} + + Middle text + + :::columns + ::::{.column width=60%} + + Some text after + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.check_string(actual) + + +# ############################################################################# +# Test_remove_prettier_ignore_from_div_blocks +# ############################################################################# + + +class Test_remove_prettier_ignore_from_div_blocks(hunitest.TestCase): + """ + Test the function to remove prettier-ignore comments from div blocks. + """ + + def test_remove_simple_block(self) -> None: + """ + Test removing prettier-ignore from a simple div block. + """ + # Prepare inputs. + txt = """ + + + :::: + ::: + + + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.check_string(actual) + + def test_remove_multiple_blocks(self) -> None: + """ + Test removing prettier-ignore from multiple div blocks. + """ + # Prepare inputs. + txt = """ + Text before + + + :::: + ::::{.column width=40%} + + + Middle text + + + :::columns + ::::{.column width=60%} + + + Text after + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + lines = txt.split("\n") + # Run test. + actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) + actual = "\n".join(actual_lines) + # Check outputs. + self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py new file mode 100644 index 000000000..34ea20964 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py @@ -0,0 +1,2002 @@ +import logging +import os +import pprint +from typing import Any, List, Tuple, cast + +import helpers.hio as hio +import helpers.hmarkdown as hmarkdo +import helpers.hmarkdown_headers as hmarhead +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def _to_header_list(data: List[Tuple[int, str]]) -> hmarkdo.HeaderList: + res = [ + hmarkdo.HeaderInfo(level, text, 5 * i + 1) + for i, (level, text) in enumerate(data) + ] + return res + + +def get_header_list1() -> hmarkdo.HeaderList: + data = [ + (1, "Chapter 1"), + (2, "Section 1.1"), + (3, "Subsection 1.1.1"), + (3, "Subsection 1.1.2"), + (2, "Section 1.2"), + (1, "Chapter 2"), + (2, "Section 2.1"), + (3, "Subsection 2.1.1"), + (2, "Section 2.2"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_header_list2() -> hmarkdo.HeaderList: + data = [ + (1, "Module Alpha"), + (2, "Lesson Alpha-1"), + (3, "Topic Alpha-1.a"), + (3, "Topic Alpha-1.b"), + (2, "Lesson Alpha-2"), + (3, "Topic Alpha-2.a"), + (1, "Module Beta"), + (2, "Lesson Beta-1"), + (3, "Topic Beta-1.a"), + (2, "Lesson Beta-2"), + (1, "Module Gamma"), + (2, "Lesson Gamma-1"), + (3, "Topic Gamma-1.a"), + (3, "Topic Gamma-1.b"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_header_list3() -> hmarkdo.HeaderList: + data = [ + (1, "Topic A"), + (2, "Subtopic A.1"), + (3, "Detail A.1.i"), + (3, "Detail A.1.ii"), + (2, "Subtopic A.2"), + (1, "Topic B"), + (2, "Subtopic B.1"), + (3, "Detail B.1.i"), + (2, "Subtopic B.2"), + (3, "Detail B.2.i"), + (3, "Detail B.2.ii"), + (2, "Subtopic B.3"), + (1, "Topic C"), + (2, "Subtopic C.1"), + (3, "Detail C.1.i"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_header_list4() -> hmarkdo.HeaderList: + data = [ + (1, "Chapter 1"), + (3, "Subsection 1.1.1"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_header_list5() -> hmarkdo.HeaderList: + data = [ + (1, "Chapter 1"), + (2, "Section 1.1"), + (3, "Subsection 1.1.1"), + (1, "Chapter 2"), + ] + header_list = _to_header_list(data) + return header_list + + +def _get_markdown_example1() -> str: + content = r""" + # Header1 + Content under header 1. + ## Header2 + Content under subheader 2. + # Header3 + Content under header 3. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_example2() -> str: + content = r""" + # Header1 + Content under header 1. + ## Header2 + Content under subheader 2. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_no_header_example1() -> str: + content = r""" + This is some content without any headers. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_example4() -> str: + content = r""" + # Chapter 1 + + Welcome to the first chapter. This chapter introduces fundamental concepts and + lays the groundwork for further exploration. + + ## Section 1.1 + + This section discusses the initial principles and key ideas that are crucial for + understanding the topic. + + ### Subsection 1.1.1 + + The first subsection dives deeper into the details, providing examples and + insights that help clarify the concepts. + + Example: + ```python + def greet(name): + return f"Hello, {name}!" + print(greet("World")) + ``` + + ### Subsection 1.1.2 + + Here, we examine alternative perspectives and additional considerations that + were not covered in the previous subsection. + + - Key Point 1: Understanding different viewpoints enhances comprehension. + - Key Point 2: Practical application reinforces learning. + + ## Section 1.2 + + This section introduces new frameworks and methodologies that build upon the + foundation established earlier. + + > "Knowledge is like a tree, growing stronger with each branch of understanding." + + # Chapter 2 + + Moving forward, this chapter explores advanced topics and real-world + applications. + + ## Section 2.1 + + This section provides an in-depth analysis of core mechanisms that drive the + subject matter. + + ### Subsection 2.1.1 + + A deep dive into specific case studies and empirical evidence that support + theoretical claims. + + - Case Study 1: Implementation in modern industry + - Case Study 2: Comparative analysis of traditional vs. modern methods + + ## Section 2.2 + + The final section of this chapter presents summary conclusions, key takeaways, + and potential future developments. + + ```yaml + future: + - AI integration + - Process optimization + - Sustainable solutions + ``` + + Stay curious and keep exploring! + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_example5() -> hmarkdo.HeaderList: + content = r""" + # Models + test + ## Naive Bayes + test2 + ## Decision trees + test3 + ## Random forests + ## Linear models + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_slides_example1() -> str: + content = r""" + # Header1 + + * Slide 1 + Content 1. + + ## Header2 + + * Slide 2 + Content 2. + + * Slide 3 + Content 3. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_slides_example2() -> str: + content = r""" + # Header1 + + * Slide1 + Content 1. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _test_navigation_flow( + self_: Any, + txt: str, + header_list_exp: str, + header_tree_exp: str, + level: int, + description: str, + nav_str_exp: str, +) -> None: + # 1) Extract headers. + lines = txt.split("\n") + header_list = hmarkdo.extract_headers_from_markdown(lines, max_level=3) + actual = pprint.pformat(header_list) + self_.assert_equal( + actual, header_list_exp, dedent=True, remove_lead_trail_empty_lines=True + ) + # 2) Build header tree. + tree = hmarkdo.build_header_tree(header_list) + actual = hmarkdo.header_tree_to_str(tree, ancestry=None) + self_.assert_equal( + actual, header_tree_exp, dedent=True, remove_lead_trail_empty_lines=True + ) + # 3) Compute the navigation bar for a specific header. + actual = hmarkdo.selected_navigation_to_str(tree, level, description) + self_.assert_equal( + actual, nav_str_exp, dedent=True, remove_lead_trail_empty_lines=True + ) + + +def _test_full_navigation_flow(self_: Any, txt: str) -> None: + res: List[str] = [] + # Extract headers. + lines = txt.split("\n") + header_list = hmarkdo.extract_headers_from_markdown(lines, max_level=3) + # Build header tree. + tree = hmarkdo.build_header_tree(header_list) + # Create a navigation map for any header. + for node in header_list: + level, description, _ = node.as_tuple() + res_tmp = hprint.frame(hprint.to_str("level description")) + res.append(res_tmp) + # + res_tmp = hmarkdo.selected_navigation_to_str(tree, level, description) + res.append(res_tmp) + # Check. + actual = "\n".join(res) + self_.check_string(actual) + + +# ############################################################################# +# Test_header_list_to_vim_cfile1 +# ############################################################################# + + +class Test_header_list_to_vim_cfile1(hunitest.TestCase): + def test1(self) -> None: + """ + Test conversion of header list to vim cfile format with multiple + levels. + """ + # Prepare inputs. + markdown_file = "test.py" + headers = get_header_list1() + # Call function. + actual_lines = hmarkdo.header_list_to_vim_cfile(markdown_file, headers) + actual = "\n".join(actual_lines) + # Check output. + expected = r""" + test.py:1:Chapter 1 + test.py:6:Section 1.1 + test.py:11:Subsection 1.1.1 + test.py:16:Subsection 1.1.2 + test.py:21:Section 1.2 + test.py:26:Chapter 2 + test.py:31:Section 2.1 + test.py:36:Subsection 2.1.1 + test.py:41:Section 2.2 + """ + self.assert_equal(actual, expected, dedent=True) + + +# ############################################################################# +# Test_header_list_to_markdown1 +# ############################################################################# + + +class Test_header_list_to_markdown1(hunitest.TestCase): + def helper( + self, headers: hmarkdo.HeaderList, mode: str, expected: str + ) -> None: + """ + Helper method to test header_list_to_markdown function. + + :param headers: list of HeaderInfo objects + :param mode: conversion mode ("list" or "headers") + :param expected: expected output string + """ + # Call function. + actual_lines = hmarkdo.header_list_to_markdown(headers, mode) + actual = "\n".join(actual_lines) + # Check output. + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test conversion of header list to markdown list format with + indentation. + """ + # Prepare inputs. + headers = get_header_list1() + mode = "list" + # Prepare outputs. + expected = r""" + - Chapter 1 + - Section 1.1 + - Subsection 1.1.1 + - Subsection 1.1.2 + - Section 1.2 + - Chapter 2 + - Section 2.1 + - Subsection 2.1.1 + - Section 2.2 + """ + # Run test. + self.helper(headers, mode, expected) + + def test2(self) -> None: + """ + Test conversion of header list to markdown headers format with + proper heading levels. + """ + # Prepare inputs. + headers = get_header_list1() + mode = "headers" + # Prepare outputs. + expected = r""" + # Chapter 1 + ## Section 1.1 + ### Subsection 1.1.1 + ### Subsection 1.1.2 + ## Section 1.2 + # Chapter 2 + ## Section 2.1 + ### Subsection 2.1.1 + ## Section 2.2 + """ + # Run test. + self.helper(headers, mode, expected) + + +# ############################################################################# +# Test_is_markdown_line_separator1 +# ############################################################################# + + +class Test_is_markdown_line_separator1(hunitest.TestCase): + def helper(self, line: str, expected: bool) -> None: + """ + Helper method to test is_markdown_line_separator function. + + :param line: input line to test + :param expected: expected boolean result + """ + # Call function. + actual = hmarkdo.is_markdown_line_separator(line) + # Check output. + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test that a line with only dashes is recognized as a separator. + """ + # Prepare inputs. + line = "-----------------------" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test2(self) -> None: + """ + Test that a line with hash prefix and dashes is a valid separator. + """ + # Prepare inputs. + line = "# ------" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test3(self) -> None: + """ + Test that a line with hash prefix and hash characters is a valid + separator. + """ + # Prepare inputs. + line = "# #########" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test4(self) -> None: + """ + Test that a line with triple hash prefix and equals is a valid + separator. + """ + # Prepare inputs. + line = "### =====" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test5(self) -> None: + """ + Test that a line with hash and slashes is a valid separator. + """ + # Prepare inputs. + line = "#//////" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test6(self) -> None: + """ + Test that a line with hash, spaces, and slashes is a valid + separator. + """ + # Prepare inputs. + line = "# //////" + # Prepare outputs. + expected = True + # Run test. + self.helper(line, expected) + + def test7(self) -> None: + """ + Test that plain text is not recognized as a separator. + """ + # Prepare inputs. + line = "Not a separator" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test8(self) -> None: + """ + Test that a short dash line is not a valid separator. + """ + # Prepare inputs. + line = "# --" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test9(self) -> None: + """ + Test that mixed separator characters are not valid. + """ + # Prepare inputs. + line = "# ###---" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test10(self) -> None: + """ + Test that two equals signs alone are not a valid separator. + """ + # Prepare inputs. + line = "==" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test11(self) -> None: + """ + Test that dash prefix with slashes is not a valid separator. + """ + # Prepare inputs. + line = "- //////" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test12(self) -> None: + """ + Test that separators with trailing text are not valid. + """ + # Prepare inputs. + line = "=== Not a seperator" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + def test13(self) -> None: + """ + Test that separators with surrounding text are not valid. + """ + # Prepare inputs. + line = "--- Not a seperator ---" + # Prepare outputs. + expected = False + # Run test. + self.helper(line, expected) + + +# ############################################################################# +# Test_extract_section_from_markdown1 +# ############################################################################# + + +class Test_extract_section_from_markdown1(hunitest.TestCase): + def helper(self, content: str, header_name: str, expected: str) -> None: + """ + Helper method to test extract_section_from_markdown function. + + :param content: markdown content to extract from + :param header_name: name of header to extract + :param expected: expected output string + """ + # Call function. + lines = content.split("\n") + actual_lines = hmarkdo.extract_section_from_markdown(lines, header_name) + actual = "\n".join(actual_lines) + # Check output. + self.assert_equal(actual, expected, dedent=True) + + # TODO(gp): This doesn't seem correct. + def test1(self) -> None: + """ + Test extracting a section that includes a subheader. + """ + # Prepare inputs. + content = _get_markdown_example1() + # Prepare outputs. + expected = r""" + # Header1 + Content under header 1. + ## Header2 + Content under subheader 2. + """ + # Run test. + self.helper(content, "Header1", expected) + + def test2(self) -> None: + """ + Test extracting a subheader section only. + """ + # Prepare inputs. + content = _get_markdown_example1() + content = hprint.dedent(content) + # Prepare outputs. + expected = r""" + ## Header2 + Content under subheader 2. + """ + # Run test. + self.helper(content, "Header2", expected) + + def test3(self) -> None: + """ + Test extracting the last header section in the document. + """ + # Prepare inputs. + content = _get_markdown_example1() + content = hprint.dedent(content) + # Prepare outputs. + expected = r""" + # Header3 + Content under header 3. + """ + # Run test. + self.helper(content, "Header3", expected) + + def test4(self) -> None: + """ + Test extracting a header that spans to the end of document. + """ + # Prepare inputs. + content = _get_markdown_example2() + # Prepare outputs. + expected = r""" + # Header1 + Content under header 1. + ## Header2 + Content under subheader 2. + """ + # Run test. + self.helper(content, "Header1", expected) + + def test5(self) -> None: + # Prepare inputs. + content = _get_markdown_no_header_example1() + # Call tested function. + with self.assertRaises(ValueError) as fail: + lines = content.split("\n") + hmarkdo.extract_section_from_markdown(lines, "Header4") + # Check output. + actual = str(fail.exception) + expected = r"Header 'Header4' not found" + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_extract_headers_from_markdown1 +# ############################################################################# + + +class Test_extract_headers_from_markdown1(hunitest.TestCase): + def helper(self, content: str, max_level: int, expected: str) -> None: + """ + Helper method to test extract_headers_from_markdown function. + + :param content: markdown content to extract headers from + :param max_level: maximum header level to extract + :param expected: expected output string representation + """ + # Call function. + lines = content.split("\n") + actual = hmarkdo.extract_headers_from_markdown( + lines, max_level=max_level + ) + # Check output. + self.assert_equal(str(actual), expected) + + def test1(self) -> None: + """ + Test extracting multiple headers with different levels from markdown + content. + """ + # Prepare inputs. + content = _get_markdown_example1() + max_level = 3 + # Prepare outputs. + expected = r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3), HeaderInfo(1, 'Header3', 5)]""" + # Run test. + self.helper(content, max_level, expected) + + def test2(self) -> None: + """ + Test extracting headers from a simple two-level structure. + """ + # Prepare inputs. + content = _get_markdown_example2() + max_level = 3 + # Prepare outputs. + expected = ( + r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3)]""" + ) + # Run test. + self.helper(content, max_level, expected) + + def test3(self) -> None: + # Prepare inputs. + content = r""" + This is some content without any headers. + """ + content = hprint.dedent(content) + # Call function. + lines = content.split("\n") + actual = hmarkdo.extract_headers_from_markdown(lines, max_level=3) + # Check output. + expected: List[str] = [] + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# +# Test_extract_slides_from_markdown1 +# ############################################################################# + + +class Test_extract_slides_from_markdown1(hunitest.TestCase): + def helper(self, content: str, expected: str) -> None: + """ + Helper method to test extract_slides_from_markdown function. + + :param content: markdown content to extract slides from + :param expected: expected output string representation + """ + # Call function. + lines = content.split("\n") + actual = hmarkdo.extract_slides_from_markdown(lines) + # Check output. + self.assert_equal(str(actual), expected) + + def test1(self) -> None: + """ + Test extracting multiple slides from markdown presentation format. + """ + # Prepare inputs. + content = _get_markdown_slides_example1() + # Prepare outputs. + expected = r"""([HeaderInfo(1, 'Slide 1', 3), HeaderInfo(1, 'Slide 2', 8), HeaderInfo(1, 'Slide 3', 11)], 12)""" + # Run test. + self.helper(content, expected) + + def test2(self) -> None: + """ + Test extracting a single slide from markdown presentation format. + """ + # Prepare inputs. + content = _get_markdown_slides_example2() + # Prepare outputs. + expected = r"""([HeaderInfo(1, 'Slide1', 3)], 4)""" + # Run test. + self.helper(content, expected) + + def test3(self) -> None: + # Prepare inputs. + content = _get_markdown_no_header_example1() + # Call function. + lines = content.split("\n") + actual = hmarkdo.extract_slides_from_markdown(lines) + # Check output. + expected = r"""([], 1)""" + self.assert_equal(str(actual), expected) + + +# ############################################################################# +# Test_selected_navigation_to_str1 +# ############################################################################# + + +class Test_selected_navigation_to_str1(hunitest.TestCase): + def test1(self) -> None: + """ + Create navigation bar from Markdown text `_get_markdown_example4()`. + """ + txt = _get_markdown_example4() + header_list_exp = """ + [HeaderInfo(1, 'Chapter 1', 1), + HeaderInfo(2, 'Section 1.1', 6), + HeaderInfo(3, 'Subsection 1.1.1', 11), + HeaderInfo(3, 'Subsection 1.1.2', 23), + HeaderInfo(2, 'Section 1.2', 31), + HeaderInfo(1, 'Chapter 2', 38), + HeaderInfo(2, 'Section 2.1', 43), + HeaderInfo(3, 'Subsection 2.1.1', 48), + HeaderInfo(2, 'Section 2.2', 56)] + """ + header_tree_exp = """ + - Chapter 1 + - Chapter 2 + """ + level = 3 + description = "Subsection 1.1.2" + nav_str_exp = """ + - Chapter 1 + - Section 1.1 + - Subsection 1.1.1 + - **Subsection 1.1.2** + - Section 1.2 + - Chapter 2 + """ + _test_navigation_flow( + self, + txt, + header_list_exp, + header_tree_exp, + level, + description, + nav_str_exp, + ) + + def test2(self) -> None: + txt = _get_markdown_example4() + _test_full_navigation_flow(self, txt) + + +# ############################################################################# +# Test_selected_navigation_to_str2 +# ############################################################################# + + +class Test_selected_navigation_to_str2(hunitest.TestCase): + def test1(self) -> None: + """ + Create navigation bar from Markdown text `_get_markdown_example5()`. + """ + txt = _get_markdown_example5() + header_list_exp = r""" + [HeaderInfo(1, 'Models', 1), + HeaderInfo(2, 'Naive Bayes', 3), + HeaderInfo(2, 'Decision trees', 5), + HeaderInfo(2, 'Random forests', 7), + HeaderInfo(2, 'Linear models', 8)] + """ + header_tree_exp = """ + - Models + """ + level = 2 + description = "Decision trees" + nav_str_exp = """ + - Models + - Naive Bayes + - **Decision trees** + - Random forests + - Linear models + """ + _test_navigation_flow( + self, + txt, + header_list_exp, + header_tree_exp, + level, + description, + nav_str_exp, + ) + + def test2(self) -> None: + txt = _get_markdown_example5() + _test_full_navigation_flow(self, txt) + + +# ############################################################################# +# Test_modify_header_level1 +# ############################################################################# + + +class Test_modify_header_level1(hunitest.TestCase): + def helper( + self, input_lines: List[str], level: int, expected_lines: List[str] + ) -> None: + """ + Helper method to test `modify_header_level` function. + + :param input_lines: list of input text lines + :param level: level adjustment to apply + :param expected_lines: list of expected output lines + """ + # Prepare inputs. + input_text = "\n".join(input_lines) + # Call tested function. + actual_lines = hmarkdo.modify_header_level(input_lines, level) + actual = "\n".join(actual_lines) + # Check output. + expected = "\n".join(expected_lines) + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test the inputs to increase headings. + """ + # Prepare inputs and outputs. + input_lines = [ + "# Chapter 1", + "## Section 1.1", + "### Subsection 1.1.1", + "#### Sub-subsection 1.1.1.1", + ] + level = 1 + expected_lines = [ + "## Chapter 1", + "### Section 1.1", + "#### Subsection 1.1.1", + "##### Sub-subsection 1.1.1.1", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test2(self) -> None: + """ + Test inputs to increase headings with level 5 becoming level 6. + """ + # Prepare inputs and outputs. + input_lines = ["# Chapter 1", "##### Sub-sub-subsection 1.1.1.1.1"] + level = 1 + expected_lines = ["## Chapter 1", "###### Sub-sub-subsection 1.1.1.1.1"] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test3(self) -> None: + """ + Test inputs to increase headings including a paragraph which remains + unchanged. + """ + # Prepare inputs and outputs. + input_lines = ["# Chapter 1", "Paragraph 1"] + level = 1 + expected_lines = ["## Chapter 1", "Paragraph 1"] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test4(self) -> None: + """ + Test inputs of paragraphs which remain unchanged. + """ + # Prepare inputs and outputs. + input_lines = ["Paragraph 1", "Paragraph 2"] + level = 1 + expected_lines = ["Paragraph 1", "Paragraph 2"] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test5(self) -> None: + """ + Test to increase headings with mixed levels. + """ + # Prepare inputs and outputs. + input_lines = [ + "# Chapter 1", + "##### Sub-sub-subsection 1.1.1.1.1", + "# Chapter 2", + "### Subsection 2.1", + "# Chapter 3", + ] + level = 1 + expected_lines = [ + "## Chapter 1", + "###### Sub-sub-subsection 1.1.1.1.1", + "## Chapter 2", + "#### Subsection 2.1", + "## Chapter 3", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test6(self) -> None: + """ + Test the inputs to decrease headings. + """ + # Prepare inputs and outputs. + input_lines = [ + "## Section 1.1", + "### Subsection 1.1.1", + "#### Sub-subsection 1.1.1.1", + "##### Sub-sub-subsection 1.1.1.1.1", + ] + level = -1 + expected_lines = [ + "# Section 1.1", + "## Subsection 1.1.1", + "### Sub-subsection 1.1.1.1", + "#### Sub-sub-subsection 1.1.1.1.1", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test7(self) -> None: + """ + Test inputs to decrease headings by one level. + """ + # Prepare inputs and outputs. + input_lines = [ + "## Chapter 1", + "##### Sub-subsection 1.1.1.1", + ] + level = -1 + expected_lines = [ + "# Chapter 1", + "#### Sub-subsection 1.1.1.1", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test8(self) -> None: + """ + Test inputs of paragraphs which remain unchanged. + """ + # Prepare inputs and outputs. + input_lines = ["Paragraph 1", "Paragraph 2", "Paragraph 3"] + level = -1 + expected_lines = ["Paragraph 1", "Paragraph 2", "Paragraph 3"] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test9(self) -> None: + """ + Test increasing headers by 2 levels. + """ + # Prepare inputs and outputs. + input_lines = [ + "# Chapter 1", + "## Section 1.1", + "### Subsection 1.1.1", + ] + level = 2 + expected_lines = [ + "### Chapter 1", + "#### Section 1.1", + "##### Subsection 1.1.1", + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test10(self) -> None: + """ + Test decreasing headers by 2 levels. + """ + # Prepare inputs and outputs. + input_lines = [ + "### Chapter 1", + "#### Section 1.1", + "##### Subsection 1.1.1", + ] + level = -2 + expected_lines = [ + "# Chapter 1", # 3-2=1 + "## Section 1.1", # 4-2=2 + "### Subsection 1.1.1", # 5-2=3 + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + def test11(self) -> None: + """ + Test increasing headers by 2 levels. + """ + # Prepare inputs and outputs. + input_lines = [ + "### Level 3", + "#### Level 4", + ] + level = 2 + expected_lines = [ + "##### Level 3", # 3+2=5 + "###### Level 4", # 4+2=6 + ] + # Call the helper. + self.helper(input_lines, level, expected_lines) + + +# ############################################################################# +# Test_format_headers1 +# ############################################################################# + + +class Test_format_headers1(hunitest.TestCase): + def helper( + self, input_text: List[str], expected: List[str], max_lev: int + ) -> None: + """ + Process the given text with a specified maximum level and compare the + result with the expected output. + + :param input_text: the text to be processed + :param expected: the expected output after processing the text + :param max_lev: the maximum heading level to be formatted + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + write_file = os.path.join(scratch_dir, "write_file.txt") + # Call tested function. + hmarkdo.format_headers(input_text, write_file, max_lev=max_lev) + # Check output. + actual = hio.from_file(write_file) + self.assertEqual(actual, "\n".join(expected)) + + def test1(self) -> None: + """ + Test the inputs to check the basic formatting of headings. + """ + input_text = [ + "# Chapter 1", + "section text", + ] + expected = [ + "# #############################################################################", + "# Chapter 1", + "# #############################################################################", + "section text", + ] + self.helper(input_text, expected, max_lev=1) + + def test2(self) -> None: + """ + Test inputs with headings beyond the maximum level to ensure they are + ignored during formatting. + """ + input_text = [ + "# Chapter 1", + "## Section 1.1", + "### Section 1.1.1", + ] + expected = [ + "# #############################################################################", + "# Chapter 1", + "# #############################################################################", + "## ############################################################################", + "## Section 1.1", + "## ############################################################################", + "### Section 1.1.1", + ] + self.helper(input_text, expected, max_lev=2) + + def test3(self) -> None: + """ + Test the inputs to check that markdown line separators are removed. + """ + input_text = [ + "# Chapter 1", + "-----------------", + "Text", + "############", + ] + expected = [ + "# #############################################################################", + "# Chapter 1", + "# #############################################################################", + "Text", + ] + self.helper(input_text, expected, max_lev=1) + + def test4(self) -> None: + """ + Test inputs where max_level is inferred from the file content. + """ + input_text = [ + "# Chapter 1", + "max_level=1", + "## Section 1.1", + ] + expected = [ + "# #############################################################################", + "# Chapter 1", + "# #############################################################################", + "max_level=1", + "## Section 1.1", + ] + self.helper(input_text, expected, max_lev=2) + + def test5(self) -> None: + """ + Test inputs with no headers to ensure they remain unchanged. + """ + input_text = [ + "Only text", + "No headings", + ] + expected = [ + "Only text", + "No headings", + ] + self.helper(input_text, expected, max_lev=3) + + +# ############################################################################# +# Test_sanity_check_header_list1 +# ############################################################################# + + +class Test_sanity_check_header_list1(hunitest.TestCase): + def test1(self) -> None: + """ + Test that the header list with valid level increase is accepted. + """ + # Prepare inputs. + header_list = get_header_list1() + # Call function. + hmarkdo.sanity_check_header_list(header_list) + + def test2(self) -> None: + """ + Test that the header list with an increase of more than one level + raises an error. + """ + # Prepare inputs. + header_list = get_header_list4() + # Call function. + with self.assertRaises(ValueError) as err: + hmarkdo.sanity_check_header_list(header_list) + # Check output. + actual = str(err.exception) + self.check_string(actual) + + def test3(self) -> None: + """ + Test that the header list is accepted when heading levels decrease by + more than one. + """ + # Prepare inputs. + header_list = get_header_list5() + # Call function. + hmarkdo.sanity_check_header_list(header_list) + + +# ############################################################################# +# Test__has_internal_capitals1 +# ############################################################################# + + +class Test__has_internal_capitals1(hunitest.TestCase): + """ + Test `_has_internal_capitals` function. + """ + + def helper(self, word: str, expected: bool) -> None: + """ + Test helper for `_has_internal_capitals`. + + :param word: word to test + :param expected: expected result + """ + # Run test. + actual = hmarhead._has_internal_capitals(word) + # Check outputs. + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test word with internal capital letters. + """ + # Prepare inputs. + word = "SimpleFeedForward" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test2(self) -> None: + """ + Test word with multiple internal capital letters. + """ + # Prepare inputs. + word = "DeepNPTS" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test3(self) -> None: + """ + Test word with capital only at the start. + """ + # Prepare inputs. + word = "Machine" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test4(self) -> None: + """ + Test all lowercase word. + """ + # Prepare inputs. + word = "learning" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test5(self) -> None: + """ + Test all uppercase word. + """ + # Prepare inputs. + word = "ML" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test6(self) -> None: + """ + Test single lowercase character. + """ + # Prepare inputs. + word = "a" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test7(self) -> None: + """ + Test single uppercase character. + """ + # Prepare inputs. + word = "A" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test8(self) -> None: + """ + Test empty string. + """ + # Prepare inputs. + word = "" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test9(self) -> None: + """ + Test camelCase word. + """ + # Prepare inputs. + word = "camelCase" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + +# ############################################################################# +# Test_capitalize_header1 +# ############################################################################# + + +class Test_capitalize_header1(hunitest.TestCase): + def helper(self, txt: str, expected: str) -> None: + # Prepare inputs. + txt = hprint.dedent(txt) + # Run function. + lines = txt.split("\n") + actual_lines = hmarkdo.capitalize_header(lines) + actual = "\n".join(actual_lines) + # Check outputs. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test capitalizing a short two-word title. + """ + txt = r""" + * ML theory + """ + expected = r""" + * ML Theory + """ + self.helper(txt, expected) + + def test2(self) -> None: + """ + Test capitalizing a longer multi-word title. + """ + txt = r""" + * A map of machine learning + """ + expected = r""" + * A Map of Machine Learning + """ + self.helper(txt, expected) + + def test3(self) -> None: + """ + Test that strings inside backticks are preserved. + """ + txt = r""" + # Using `python` for Machine Learning + """ + expected = r""" + # Using `python` for Machine Learning + """ + self.helper(txt, expected) + + def test4(self) -> None: + """ + Test that strings inside single quotes are preserved. + """ + txt = r""" + * Working with 'machine learning' algorithms + """ + expected = r""" + * Working with 'machine learning' Algorithms + """ + self.helper(txt, expected) + + def test5(self) -> None: + """ + Test that strings inside double quotes are preserved. + """ + txt = r""" + # Understanding "deep learning" concepts + """ + expected = r""" + # Understanding "deep learning" Concepts + """ + self.helper(txt, expected) + + def test6(self) -> None: + """ + Test mixed usage of quotes and backticks. + """ + txt = r""" + * Using `python` and "machine learning" for 'data science' + """ + expected = r""" + * Using `python` and "machine learning" for 'data science' + """ + self.helper(txt, expected) + + def test7(self) -> None: + """ + Test complex title with various quote types. + """ + txt = r""" + # Introduction to `sklearn` and "data preprocessing" in 'python' + """ + expected = r""" + # Introduction to `sklearn` and "data preprocessing" in 'python' + """ + self.helper(txt, expected) + + def test8(self) -> None: + """ + Test that words with internal capitals are preserved. + """ + txt = r""" + # SimpleFeedForward model + """ + expected = r""" + # SimpleFeedForward Model + """ + self.helper(txt, expected) + + def test9(self) -> None: + """ + Test multiple words with internal capitals. + """ + txt = r""" + * DeepNPTS and SimpleFeedForward models + """ + expected = r""" + * DeepNPTS and SimpleFeedForward Models + """ + self.helper(txt, expected) + + def test10(self) -> None: + """ + Test mixed normal words and words with internal capitals. + """ + txt = r""" + # Using SimpleFeedForward for machine learning + """ + expected = r""" + # Using SimpleFeedForward for Machine Learning + """ + self.helper(txt, expected) + + def test11(self) -> None: + """ + Test that headers inside fenced code blocks are not processed. + """ + txt = r""" + # Main header + + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + ``` + + ## Another header + """ + expected = r""" + # Main Header + + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + ``` + + ## Another Header + """ + self.helper(txt, expected) + + def test12(self) -> None: + """ + Test headers inside multiple fenced code blocks are not processed. + """ + txt = r""" + # First header + + ```python + # comment in code + x = 1 + ``` + + ## Second header + + ```bash + # shell comment + echo "hello" + ``` + """ + expected = r""" + # First Header + + ```python + # comment in code + x = 1 + ``` + + ## Second Header + + ```bash + # shell comment + echo "hello" + ``` + """ + self.helper(txt, expected) + + def test13(self) -> None: + """ + Test that the first word after a numeric prefix is capitalized. + """ + txt = r""" + ## 4.4 the Victim Triangle + """ + expected = r""" + ## 4.4 The Victim Triangle + """ + self.helper(txt, expected) + + def test14(self) -> None: + """ + Test that "of", "a", "an" after a numeric prefix are capitalized. + """ + txt = r""" + ## 1.1 of mice and men + """ + expected = r""" + ## 1.1 Of Mice and Men + """ + self.helper(txt, expected) + + def test15(self) -> None: + """ + Test that "of", "a", "an" are capitalized. + """ + txt = r""" + ## of mice and men + """ + expected = r""" + ## Of Mice and Men + """ + self.helper(txt, expected) + + +# ############################################################################# +# Test_capitalize_header2 +# ############################################################################# + + +class Test_capitalize_header2(hunitest.TestCase): + """ + Test enhanced capitalize_header functionality for mixed case words and + fenced blocks. + """ + + def helper(self, txt: str, expected: str) -> None: + """ + Helper method to test capitalize_header function. + + :param txt: input text to process + :param expected: expected output after processing + """ + # Prepare inputs. + txt = hprint.dedent(txt) + # Run function. + lines = txt.split("\n") + actual_lines = hmarkdo.capitalize_header(lines) + actual = "\n".join(actual_lines) + # Check outputs. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test that SimpleFeedForward is preserved as-is. + """ + txt = r""" + # using SimpleFeedForward for predictions + """ + expected = r""" + # Using SimpleFeedForward for Predictions + """ + self.helper(txt, expected) + + def test2(self) -> None: + """ + Test that DeepNPTS is preserved as-is. + """ + txt = r""" + # training with DeepNPTS model + """ + expected = r""" + # Training with DeepNPTS Model + """ + self.helper(txt, expected) + + def test3(self) -> None: + """ + Test multiple mixed case words in the same header. + """ + txt = r""" + # comparing SimpleFeedForward and DeepNPTS models + """ + expected = r""" + # Comparing SimpleFeedForward and DeepNPTS Models + """ + self.helper(txt, expected) + + def test4(self) -> None: + """ + Test mixed case words combined with all caps words. + """ + txt = r""" + # using API with SimpleFeedForward for ML tasks + """ + expected = r""" + # Using API with SimpleFeedForward for ML Tasks + """ + self.helper(txt, expected) + + def test5(self) -> None: + """ + Test mixed case word as the first word in header. + """ + txt = r""" + # SimpleFeedForward network architecture + """ + expected = r""" + # SimpleFeedForward Network Architecture + """ + self.helper(txt, expected) + + def test6(self) -> None: + """ + Test that headers inside fenced blocks are not capitalized. + """ + txt = r""" + # Main header + Some text + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + ``` + """ + expected = r""" + # Main Header + Some text + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + ``` + """ + self.helper(txt, expected) + + def test7(self) -> None: + """ + Test that multiple headers inside fenced blocks are not capitalized. + """ + txt = r""" + # introduction to forecasting + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + q75 = forecast.quantile(0.75) + + # 90% confidence interval + q05 = forecast.quantile(0.05) + q95 = forecast.quantile(0.95) + + # mean and median + mean = forecast.mean + median = forecast.quantile(0.5) + ``` + # conclusion + """ + expected = r""" + # Introduction to Forecasting + ```python + # 50% confidence interval (interquartile range) + q25 = forecast.quantile(0.25) + q75 = forecast.quantile(0.75) + + # 90% confidence interval + q05 = forecast.quantile(0.05) + q95 = forecast.quantile(0.95) + + # mean and median + mean = forecast.mean + median = forecast.quantile(0.5) + ``` + # Conclusion + """ + self.helper(txt, expected) + + def test8(self) -> None: + """ + Test that headers in fenced blocks with language specifier are not + capitalized. + """ + txt = r""" + # data processing + ```bash + # run the script + python script.py + ``` + """ + expected = r""" + # Data Processing + ```bash + # run the script + python script.py + ``` + """ + self.helper(txt, expected) + + def test9(self) -> None: + """ + Test mixed case words inside fenced blocks are preserved. + """ + txt = r""" + # using SimpleFeedForward model + ```python + # SimpleFeedForward implementation + class SimpleFeedForward: + pass + ``` + """ + expected = r""" + # Using SimpleFeedForward Model + ```python + # SimpleFeedForward implementation + class SimpleFeedForward: + pass + ``` + """ + self.helper(txt, expected) + + def test10(self) -> None: + """ + Test multiple fenced blocks in the same document. + """ + txt = r""" + # first section + ```python + # code block 1 + x = 1 + ``` + # second section + ```python + # code block 2 + y = 2 + ``` + """ + expected = r""" + # First Section + ```python + # code block 1 + x = 1 + ``` + # Second Section + ```python + # code block 2 + y = 2 + ``` + """ + self.helper(txt, expected) + + def test11(self) -> None: + """ + Test that slide titles (starting with *) also preserve mixed case. + """ + txt = r""" + * using SimpleFeedForward for predictions + """ + expected = r""" + * Using SimpleFeedForward for Predictions + """ + self.helper(txt, expected) + + def test12(self) -> None: + """ + Test mixed case words with punctuation. + """ + txt = r""" + # SimpleFeedForward: a neural network approach + """ + expected = r""" + # SimpleFeedForward: a Neural Network Approach + """ + self.helper(txt, expected) + + def test13(self) -> None: + """ + Test that normal words without mixed case are still capitalized + properly. + """ + txt = r""" + # introduction to machine learning + """ + expected = r""" + # Introduction to Machine Learning + """ + self.helper(txt, expected) + + def test14(self) -> None: + """ + Test empty fenced blocks don't cause issues. + """ + txt = r""" + # header before + ``` + ``` + # header after + """ + expected = r""" + # Header Before + ``` + ``` + # Header After + """ + self.helper(txt, expected) + + +# ############################################################################# +# Test_has_mixed_case1 +# ############################################################################# + + +class Test_has_mixed_case1(hunitest.TestCase): + """ + Test the _has_mixed_case helper function. + """ + + def helper(self, word: str, expected: bool) -> None: + """ + Test helper for has_mixed_case. + + :param word: word to test + :param expected: expected result + """ + # Call function. + actual = hmarkdo.has_mixed_case(word) + # Check output. + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test SimpleFeedForward has mixed case. + """ + # Prepare inputs. + word = "SimpleFeedForward" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test2(self) -> None: + """ + Test DeepNPTS has mixed case (all caps after first). + """ + # Prepare inputs. + word = "DeepNPTS" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test3(self) -> None: + """ + Test Machine does not have mixed case (only first char capital). + """ + # Prepare inputs. + word = "Machine" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test4(self) -> None: + """ + Test lowercase word has no mixed case. + """ + # Prepare inputs. + word = "machine" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test5(self) -> None: + """ + Test all caps word has mixed case (caps after first position). + """ + # Prepare inputs. + word = "API" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test6(self) -> None: + """ + Test single character has no mixed case. + """ + # Prepare inputs. + word = "A" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test7(self) -> None: + """ + Test two character word with first capital has no mixed case. + """ + # Prepare inputs. + word = "At" + # Prepare outputs. + expected = False + # Run test. + self.helper(word, expected) + + def test8(self) -> None: + """ + Test two character word with both caps has mixed case. + """ + # Prepare inputs. + word = "ML" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) + + def test9(self) -> None: + """ + Test camelCase word has mixed case. + """ + # Prepare inputs. + word = "camelCase" + # Prepare outputs. + expected = True + # Run test. + self.helper(word, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py new file mode 100644 index 000000000..f12ae2d5a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py @@ -0,0 +1,377 @@ +import logging +from typing import List, Tuple, cast + +import helpers.hmarkdown as hmarkdo +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def _to_header_list(data: List[Tuple[int, str]]) -> hmarkdo.HeaderList: + res = [ + hmarkdo.HeaderInfo(level, text, 5 * i + 1) + for i, (level, text) in enumerate(data) + ] + return res + + +def get_header_list6() -> hmarkdo.HeaderList: + """ + - Spelling + - All + - LLM + - Linter + - Python + - Naming + - LLM + - Linter + - Docstrings + - LLM + - Linter + - Unit_tests + - All + - LLM + - Linter + """ + data = [ + (1, "Spelling"), + (2, "All"), + (3, "LLM"), + (3, "Linter"), + (1, "Python"), + (2, "Naming"), + (3, "LLM"), + (3, "Linter"), + (2, "Docstrings"), + (3, "LLM"), + (3, "Linter"), + (1, "Unit_tests"), + (2, "All"), + (3, "LLM"), + (3, "Linter"), + ] + header_list = _to_header_list(data) + return header_list + + +def get_guidelines_txt1() -> str: + txt = r""" + # General + + ## Spelling + + ### LLM + + ### Linter + + - Spell commands in lower case and programs with the first letter in upper case + - E.g., `git` as a command, `Git` as a program + - E.g., capitalize the first letter of `Python` + - Capitalize `JSON`, `CSV`, `DB` and other abbreviations + + # Python + + ## Naming + + ### LLM + + - Name functions using verbs and verbs/actions + - Good: `download_data()`, `process_input()`, `calculate_sum()` + - Good: Python internal functions as `__repr__`, `__init__` are valid + - Good: Functions names like `to_dict()`, `_parse()`, `_main()` are valid + - Name classes using nouns + - Good: `Downloader()`, `DataProcessor()`, `User()` + - Bad: `DownloadStuff()`, `ProcessData()`, `UserActions()` + + ### Linter + + - Name executable Python scripts using verbs and actions + - E.g., `download.py` and not `downloader.py` + + # Unit_tests + + ## Rules + + ### LLM + + - A test class should test only one function or class to help understanding + test failures + - A test method should only test a single case to ensures clarity and + precision in testing + - E.g., "for these inputs the function responds with this output" + """ + txt = hprint.dedent(txt) + txt = cast(str, txt) + return txt + + +# ############################################################################# +# Test_convert_header_list_into_guidelines1 +# ############################################################################# + + +class Test_convert_header_list_into_guidelines1(hunitest.TestCase): + def test1(self) -> None: + """ + Test converting a header list into guidelines. + """ + # Prepare inputs. + header_list = get_header_list6() + # Call function. + guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + # Check output. + actual = "\n".join(map(str, guidelines)) + expected = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + HeaderInfo(1, 'Spelling:All:Linter', 16) + HeaderInfo(1, 'Python:Naming:LLM', 31) + HeaderInfo(1, 'Python:Naming:Linter', 36) + HeaderInfo(1, 'Python:Docstrings:LLM', 46) + HeaderInfo(1, 'Python:Docstrings:Linter', 51) + HeaderInfo(1, 'Unit_tests:All:LLM', 66) + HeaderInfo(1, 'Unit_tests:All:Linter', 71) + """ + self.assert_equal(actual, expected, dedent=True) + + +# ############################################################################# +# Test_extract_rules1 +# ############################################################################# + + +class Test_extract_rules1(hunitest.TestCase): + def helper(self, selection_rules: List[str], expected: str) -> None: + """ + Test extracting rules from a markdown file. + """ + # Prepare inputs. + guidelines = get_header_list6() + guidelines = hmarkdo.convert_header_list_into_guidelines(guidelines) + # Call function. + selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) + # Check output. + actual = "\n".join(map(str, selected_guidelines)) + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["Spelling:*:LLM"] + expected = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + """ + self.helper(selection_rules, expected) + + def test2(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["Spelling:NONE:LLM"] + expected = """ + """ + self.helper(selection_rules, expected) + + def test3(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["Spelling:All:*"] + expected = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + HeaderInfo(1, 'Spelling:All:Linter', 16) + """ + self.helper(selection_rules, expected) + + def test4(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["Spelling:All:*", "Python:*:*"] + expected = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + HeaderInfo(1, 'Spelling:All:Linter', 16) + HeaderInfo(1, 'Python:Naming:LLM', 31) + HeaderInfo(1, 'Python:Naming:Linter', 36) + HeaderInfo(1, 'Python:Docstrings:LLM', 46) + HeaderInfo(1, 'Python:Docstrings:Linter', 51) + """ + self.helper(selection_rules, expected) + + +# ############################################################################# +# Test_parse_rules_from_txt1 +# ############################################################################# + + +class Test_parse_rules_from_txt1(hunitest.TestCase): + def helper(self, text: str, expected: List[str]) -> None: + # Prepare inputs. + text = hprint.dedent(text) + lines = text.split("\n") + # Call function. + actual = hmarkdo.parse_rules_from_txt(lines) + # Check output. + actual = str(actual) + expected = str(expected) + self.assert_equal(actual, expected, dedent=True) + + def test_basic_list1(self) -> None: + """ + Test extracting simple first-level bullet points. + """ + text = """ + - Item 1 + - Item 2 + - Item 3 + """ + expected = ["- Item 1", "- Item 2", "- Item 3"] + self.helper(text, expected) + + def test_nested_list1(self) -> None: + """ + Test extracting bullet points with nested sub-items. + """ + text = """ + - Item 1 + - Item 2 + - Sub-item 2.1 + - Sub-item 2.2 + - Item 3 + """ + expected = [ + "- Item 1", + "- Item 2\n - Sub-item 2.1\n - Sub-item 2.2", + "- Item 3", + ] + self.helper(text, expected) + + def test_empty_list1(self) -> None: + """ + Test handling empty input. + """ + text = "" + expected = [] + self.helper(text, expected) + + +# ############################################################################# +# Test_end_to_end_rules1 +# ############################################################################# + + +class Test_end_to_end_rules1(hunitest.TestCase): + def test_get_header_list1(self) -> None: + """ + Test extracting headers from a markdown file. + """ + # Prepare inputs. + txt = get_guidelines_txt1() + max_level = 4 + # Run function. + lines = txt.split("\n") + header_list = hmarkdo.extract_headers_from_markdown(lines, max_level) + # Check output. + actual = "\n".join(map(str, header_list)) + expected = """ + HeaderInfo(1, 'General', 1) + HeaderInfo(2, 'Spelling', 3) + HeaderInfo(3, 'LLM', 5) + HeaderInfo(3, 'Linter', 7) + HeaderInfo(1, 'Python', 14) + HeaderInfo(2, 'Naming', 16) + HeaderInfo(3, 'LLM', 18) + HeaderInfo(3, 'Linter', 28) + HeaderInfo(1, 'Unit_tests', 33) + HeaderInfo(2, 'Rules', 35) + HeaderInfo(3, 'LLM', 37) + """ + self.assert_equal(actual, expected, dedent=True) + # Run function. + guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + # Check output. + actual = "\n".join(map(str, guidelines)) + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Python:Naming:Linter', 28) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.assert_equal(actual, expected, dedent=True) + + def helper_extract_rules( + self, selection_rules: List[str], expected: str + ) -> None: + """ + Helper function to test extracting rules from a markdown file. + """ + # Prepare inputs. + txt = get_guidelines_txt1() + max_level = 4 + lines = txt.split("\n") + header_list = hmarkdo.extract_headers_from_markdown(lines, max_level) + guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + # Call function. + selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) + # Check output. + actual = "\n".join(map(str, selected_guidelines)) + self.assert_equal(actual, expected, dedent=True) + + def test_extract_rules1(self) -> None: + """ + Test extracting rules from a markdown file. + """ + selection_rules = ["General:*:LLM"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules2(self) -> None: + selection_rules = ["General:NONE:LLM"] + expected = """ + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules3(self) -> None: + selection_rules = ["*:*:LLM"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules4(self) -> None: + selection_rules = ["*:*:LLM", "General:*:*"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules5(self) -> None: + selection_rules = ["*:*:*"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Python:Naming:Linter', 28) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper_extract_rules(selection_rules, expected) + + def test_extract_rules6(self) -> None: + selection_rules = ["*:*:*", "General:*:*"] + expected = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Python:Naming:Linter', 28) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper_extract_rules(selection_rules, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py new file mode 100644 index 000000000..39137551e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py @@ -0,0 +1,399 @@ +import logging +from typing import List + +import helpers.hmarkdown as hmarkdo +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_process_slides +# ############################################################################# + + +class Test_process_slides(hunitest.TestCase): + @staticmethod + def transform( + slide_text: List[str], + *, + slide_title: str = "", + slide_line_number: int = 0, + ) -> str: + """ + Example adding a `@` to the beginning of each line of the slide. + + :param slide_text: List of lines in the slide + :param slide_title: Title of the slide + :param slide_line_number: Line number of the slide + :return: Transformed text + """ + _LOG.debug("input=\n%s", "\n".join(slide_text)) + # Transform. + text_out = [f"@{line}" for line in slide_text] + _LOG.debug("output=\n%s", "\n".join(text_out)) + return text_out + + def helper(self, text: str, expected: str) -> None: + """ + Test helper for process_slides. + + :param text: Input text with slides + :param expected: Expected output after transformation + """ + # Prepare inputs. + text = hprint.dedent(text, remove_lead_trail_empty_lines_=False) + # Process. + actual = hmarkdo.process_slides(text, self.transform) + # Check output. + expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=False) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test multiple slides. + """ + text = """ + * Slide 1 + - Point 1 + - Point 2 + + * Slide 2 + - Point A + - Point B + """ + expected = """ + @* Slide 1 + @ - Point 1 + @ - Point 2 + @ + @* Slide 2 + @ - Point A + @ - Point B + """ + self.helper(text, expected) + + def test2(self) -> None: + """ + Test single line slide. + """ + text = """ + * Single line slide + """ + expected = """ + @* Single line slide + """ + self.helper(text, expected) + + def test3(self) -> None: + """ + Test slide with inline comment. + """ + text = """ + * Slide with comment + # This is a comment + - Point 1 + """ + expected = """ + @* Slide with comment + @ # This is a comment + @ - Point 1 + """ + self.helper(text, expected) + + def test4(self) -> None: + """ + Test slide with comment block. + """ + text = """ + * Slide with block + + - Point 1 + """ + expected = """ + @* Slide with block + @ + @ - Point 1 + """ + self.helper(text, expected) + + def test5(self) -> None: + text = """ + * Slide 1 + * Slide 2 + """ + expected = """ + @* Slide 1 + @* Slide 2 + """ + self.helper(text, expected) + + def test6(self) -> None: + text = """ + + * Slide 1 + * Slide 2 + """ + expected = """ + + @* Slide 1 + @* Slide 2 + """ + self.helper(text, expected) + + def test7(self) -> None: + text = """ + + * Slide 1 + * Slide 2 + + """ + expected = """ + + @* Slide 1 + @* Slide 2 + @ + """ + self.helper(text, expected) + + def test8(self) -> None: + text = """ + //* Slide 1 + * Slide 2 + + """ + expected = """ + //* Slide 1 + @* Slide 2 + @ + """ + self.helper(text, expected) + + +# ############################################################################# +# Test_convert_slide_to_markdown +# ############################################################################# + + +class Test_convert_slide_to_markdown(hunitest.TestCase): + """ + Test converting slide bullets to markdown headers. + """ + + def helper(self, input_text, expected_text) -> None: + """ + Test helper for convert_slide_to_markdown. + + :param input_text: Input text with slide bullets + :param expected_text: Expected output with markdown headers + """ + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual = hmarkdo.convert_slide_to_markdown(lines) + actual = "\n".join(actual) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test converting a simple slide bullet to markdown header. + """ + input_text = """* This is a slide title""" + expected_text = """##### This is a slide title""" + self.helper(input_text, expected_text) + + def test2(self) -> None: + """ + Test converting multiple slide bullets. + """ + input_text = """ + * First slide + - Some content + * Second slide + - More content + """ + expected_text = """ + ##### First slide + - Some content + ##### Second slide + - More content + """ + self.helper(input_text, expected_text) + + def test3(self) -> None: + """ + Test converting slides mixed with other content. + """ + input_text = """ + Some intro text + * Slide title + - Point 1 + - Point 2 + Regular markdown text + * Another slide + """ + expected_text = """ + Some intro text + ##### Slide title + - Point 1 + - Point 2 + Regular markdown text + ##### Another slide + """ + self.helper(input_text, expected_text) + + def test4(self) -> None: + """ + Test converting text with no slide bullets. + """ + input_text = """ + Regular text + More text + - Regular bullet point + """ + expected_text = """ + Regular text + More text + - Regular bullet point + """ + self.helper(input_text, expected_text) + + def test5(self) -> None: + """ + Test converting empty input. + """ + input_text = "" + expected_text = "" + self.helper(input_text, expected_text) + + +# ############################################################################# +# Test_convert_markdown_to_slide +# ############################################################################# + + +class Test_convert_markdown_to_slide(hunitest.TestCase): + """ + Test converting markdown headers to slide bullets. + """ + + def helper(self, input_text: str, expected_text: str) -> None: + """ + Test helper for convert_markdown_to_slide. + + :param input_text: Input text with markdown headers + :param expected_text: Expected output with slide bullets + """ + # Prepare inputs. + lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + actual = hmarkdo.convert_markdown_to_slide(lines) + actual = "\n".join(actual) + # Check outputs. + expected = hprint.dedent(expected_text).strip() + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test converting a simple h5 header to slide bullet. + """ + input_text = """ + ##### This is a slide title + """ + expected_text = """ + * This is a slide title + """ + self.helper(input_text, expected_text) + + def test2(self) -> None: + """ + Test converting multiple h5 headers. + """ + input_text = """ + ##### First slide + - Some content + ##### Second slide + - More content + """ + expected_text = """ + * First slide + - Some content + * Second slide + - More content + """ + self.helper(input_text, expected_text) + + def test3(self) -> None: + """ + Test converting headers mixed with other content. + """ + input_text = """ + Some intro text + ##### Slide title + - Point 1 + - Point 2 + Regular markdown text + ##### Another slide + """ + expected_text = """ + Some intro text + * Slide title + - Point 1 + - Point 2 + Regular markdown text + * Another slide + """ + self.helper(input_text, expected_text) + + def test4(self) -> None: + """ + Test converting text with no h5 headers. + """ + input_text = """ + Regular text + # H1 header + ## H2 header + #### H4 header + """ + expected_text = """ + Regular text + # H1 header + ## H2 header + #### H4 header + """ + self.helper(input_text, expected_text) + + def test5(self) -> None: + """ + Test converting empty input. + """ + input_text = "" + expected_text = "" + self.helper(input_text, expected_text) + + def test6(self) -> None: + """ + Test that converting slide to markdown and back gives original result. + """ + # Prepare inputs. + input_text = """ + * First slide + - Some content + * Second slide + Regular text + """ + original_lines = hprint.dedent(input_text).strip().split("\n") + # Run test. + markdown_lines = hmarkdo.convert_slide_to_markdown(original_lines) + roundtrip_lines = hmarkdo.convert_markdown_to_slide(markdown_lines) + # Check outputs. + self.assert_equal(str(roundtrip_lines), str(original_lines)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py new file mode 100644 index 000000000..f651aa3bf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py @@ -0,0 +1,196 @@ +import logging +import pprint +from typing import Dict, List + +import helpers.hmarkdown_tables as hmartabl +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_replace_tables_with_tags1 +# ############################################################################# + + +class Test_replace_tables_with_tags1(hunitest.TestCase): + def helper( + self, text: str, expected_lines: List[str], expected_map: Dict[str, str] + ) -> None: + """ + Test replacing markdown tables with tags. + """ + lines = hprint.dedent(text, remove_lead_trail_empty_lines_=True) + lines = lines.split("\n") + # Call function. + actual_lines, table_map = hmartabl.replace_tables_with_tags(lines) + # Check output. + table_map_as_str = pprint.pformat(table_map) + expected_map_as_str = pprint.pformat(expected_map) + self.assert_equal(table_map_as_str, expected_map_as_str) + # + actual_lines = "\n".join(actual_lines) + expected_lines = hprint.dedent( + expected_lines, remove_lead_trail_empty_lines_=True + ) + self.assert_equal(actual_lines, expected_lines) + + def helper_round_trip(self, text: str) -> None: + """ + Test the round trip. + """ + # Do the round trip. + lines = text.split("\n") + actual_lines, table_map = hmartabl.replace_tables_with_tags(lines) + act_text = hmartabl.replace_tags_with_tables(actual_lines, table_map) + # Check output. + act_text = "\n".join(act_text) + self.assert_equal(act_text, text) + + def test1(self) -> None: + """ + Test replacing simple markdown table with tags. + """ + # Prepare inputs. + text = """ + Some text before + | Column 1 | Column 2 | + |----------|----------| + | Value 1 | Value 2 | + | Value 3 | Value 4 | + Text between tables + | Name | Age | City | + |------|-----|------| + | John | 25 | NYC | + Some text after + """ + # Prepare outputs. + expected_lines = """ + Some text before + + Text between tables + + Some text after + """ + # Check table map. + expected_map = { + "1": "| Column 1 | Column 2 |\n|----------|----------|\n| Value 1 | Value 2 |\n| Value 3 | Value 4 |", + "2": "| Name | Age | City |\n|------|-----|------|\n| John | 25 | NYC |", + } + self.helper(text, expected_lines, expected_map) + + def test2(self) -> None: + """ + Test table with alignment indicators. + """ + text = """ + | Left | Center | Right | + |:-----|:------:|------:| + | L1 | C1 | R1 | + | L2 | C2 | R2 | + """ + expected_lines = """ + + """ + expected_map = { + "1": "| Left | Center | Right |\n|:-----|:------:|------:|\n| L1 | C1 | R1 |\n| L2 | C2 | R2 |" + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test3(self) -> None: + """ + Test table with minimal structure. + """ + text = """ + Before + | A | B | + |---|---| + | 1 | 2 | + After + """ + expected_lines = """ + Before + + After + """ + expected_map = {"1": "| A | B |\n|---|---|\n| 1 | 2 |"} + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test4(self) -> None: + """ + Test table with empty cells. + """ + text = """ + | Col1 | Col2 | Col3 | + |------|------|------| + | A | | C | + | | B | | + """ + expected_lines = """ + + """ + expected_map = { + "1": "| Col1 | Col2 | Col3 |\n|------|------|------|\n| A | | C |\n| | B | |" + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test5(self) -> None: + """ + Test multiple tables with different column counts. + """ + text = """ + First table: + | A | B | + |---|---| + | 1 | 2 | + + Second table: + | X | Y | Z | W | + |---|---|---|---| + | a | b | c | d | + | e | f | g | h | + """ + expected_lines = """ + First table: + + + Second table: + + """ + expected_map = { + "1": "| A | B |\n|---|---|\n| 1 | 2 |", + "2": "| X | Y | Z | W |\n|---|---|---|---|\n| a | b | c | d |\n| e | f | g | h |", + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) + + def test6(self) -> None: + """ + Test table with indentation. + """ + text = """ + Outside + | Col1 | Col2 | + |------|------| + | Val1 | Val2 | + End + """ + expected_lines = """ + Outside + + End + """ + expected_map = { + "1": " | Col1 | Col2 |\n |------|------|\n | Val1 | Val2 |" + } + self.helper(text, expected_lines, expected_map) + # + self.helper_round_trip(text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py new file mode 100644 index 000000000..fc88b62a1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py @@ -0,0 +1,228 @@ +import logging + +import helpers.hmarkdown as hmarkdo +import helpers.hmarkdown_toc as hmartoc +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_extract_yaml_frontmatter1 +# ############################################################################# + + +class Test_extract_yaml_frontmatter1(hunitest.TestCase): + """ + Test the extract_yaml_frontmatter function. + """ + + def helper( + self, + txt: str, + expected_frontmatter: list, + expected_remaining: list, + ) -> None: + """ + Test helper for extract_yaml_frontmatter. + + :param txt: Input text to process + :param expected_frontmatter: Expected front matter lines + :param expected_remaining: Expected remaining lines + """ + # Prepare inputs. + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Run test. + frontmatter, remaining = hmartoc.extract_yaml_frontmatter(lines) + # Check outputs. + self.assertEqual(frontmatter, expected_frontmatter) + self.assertEqual(remaining, expected_remaining) + + def test1(self) -> None: + """ + Test extracting YAML front matter from a file. + """ + # Prepare inputs. + txt = """ + --- + title: My Document + date: 2024-01-01 + --- + # Content + This is the main content. + """ + # Prepare outputs. + expected_frontmatter = [ + "---", + "title: My Document", + "date: 2024-01-01", + "---", + ] + expected_remaining = ["# Content", "This is the main content."] + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + def test2(self) -> None: + """ + Test processing a file without YAML front matter. + """ + # Prepare inputs. + txt = """ + # Content + This is the main content. + """ + # Prepare outputs. + expected_frontmatter = [] + expected_remaining = ["# Content", "This is the main content."] + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + def test3(self) -> None: + """ + Test handling incomplete YAML front matter (missing closing delimiter). + """ + # Prepare inputs. + txt = """ + --- + title: My Document + # Content without closing delimiter + """ + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Prepare outputs. + expected_frontmatter = [] + expected_remaining = lines + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + def test4(self) -> None: + """ + Test extracting empty YAML front matter. + """ + # Prepare inputs. + txt = """ + --- + --- + # Content + """ + # Prepare outputs. + expected_frontmatter = ["---", "---"] + expected_remaining = ["# Content"] + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + def test5(self) -> None: + """ + Test that separators not at the beginning are not treated as front matter. + """ + # Prepare inputs. + txt = """ + # Content + --- + More content + """ + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Prepare outputs. + expected_frontmatter = [] + expected_remaining = lines + # Run test. + self.helper(txt, expected_frontmatter, expected_remaining) + + +# ############################################################################# +# Test_remove_table_of_contents1 +# ############################################################################# + + +class Test_remove_table_of_contents1(hunitest.TestCase): + def test1(self) -> None: + """ + Test removing table of contents from markdown text. + """ + # Prepare inputs. + text = """ + # Introduction + + This is an introduction. + + + - [Section 1](#section-1) + - [Section 2](#section-2) + + + ## Section 1 + + Content of section 1. + """ + expected = """ + # Introduction + + This is an introduction. + + + + ## Section 1 + + Content of section 1. + """ + text = hprint.dedent(text) + # Run test. + actual = hmarkdo.remove_table_of_contents(text) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test2(self) -> None: + """ + Test text without table of contents remains unchanged. + """ + # Prepare inputs. + text = """ + # Introduction + + This is an introduction. + + ## Section 1 + + Content of section 1. + """ + text = hprint.dedent(text) + # Run test. + actual = hmarkdo.remove_table_of_contents(text) + # Check output. + self.assert_equal(actual, text) + + def test3(self) -> None: + """ + Test removing multi-line table of contents. + """ + # Prepare inputs. + text = """ + # Introduction + + + - [Section 1](#section-1) + - [Subsection 1.1](#subsection-11) + - [Section 2](#section-2) + - [Subsection 2.1](#subsection-21) + - [Subsection 2.2](#subsection-22) + + + ## Section 1 + """ + expected = """ + # Introduction + + + + ## Section 1 + """ + text = hprint.dedent(text) + # Run test. + actual = hmarkdo.remove_table_of_contents(text) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py new file mode 100644 index 000000000..16f0f097a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py @@ -0,0 +1,394 @@ +import logging + +import helpers.hmkdocs as hmkdocs +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_dedent_python_code_blocks1 +# ############################################################################# + + +class Test_dedent_python_code_blocks1(hunitest.TestCase): + def test_simple_code_block(self) -> None: + """ + Test dedenting a simple Python code block. + """ + # Prepare inputs. + text = """ + # Example + + ```python + def hello(): + print("Hello") + ``` + """ + expected = """ + # Example + + ```python + def hello(): + print("Hello") + ``` + """ + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # Run test. + actual = hmkdocs.dedent_python_code_blocks(text) + # Check output. + self.assert_equal(actual, expected) + + def test_multiple_code_blocks(self) -> None: + """ + Test dedenting multiple Python code blocks. + """ + # Prepare inputs. + text = """ + # Example 1 + + ```python + def hello(): + print("Hello") + ``` + + # Example 2 + + ```python + def goodbye(): + print("Goodbye") + ``` + """ + expected = """ + # Example 1 + + ```python + def hello(): + print("Hello") + ``` + + # Example 2 + + ```python + def goodbye(): + print("Goodbye") + ``` + """ + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # Run test. + actual = hmkdocs.dedent_python_code_blocks(text) + # Check output. + self.assert_equal(actual, expected) + + def test_no_python_blocks(self) -> None: + """ + Test text without Python code blocks remains unchanged. + """ + # Prepare inputs. + text = """ + # Example + + This is just text. + + ```javascript + console.log("Hello"); + ``` + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.dedent_python_code_blocks(text) + # Check output. + self.assert_equal(actual, text) + + def test_already_aligned_code(self) -> None: + """ + Test code that is already aligned. + """ + # Prepare inputs. + text = """ + # Example + + ```python + def hello(): + print("Hello") + ``` + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.dedent_python_code_blocks(text) + # Check output. + self.assert_equal(actual, text) + + +# ############################################################################# +# Test_replace_indentation1 +# ############################################################################# + + +class Test_replace_indentation1(hunitest.TestCase): + def test_two_to_four_spaces(self) -> None: + """ + Test replacing 2-space indentation with 4-space indentation. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + - Sub item 2 + """ + expected = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + - Sub item 2 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=4 + ) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test_four_to_two_spaces(self) -> None: + """ + Test replacing 4-space indentation with 2-space indentation. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + - Sub item 2 + """ + expected = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + - Sub item 2 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=4, output_spaces=2 + ) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test_two_to_eight_spaces(self) -> None: + """ + Test replacing 2-space indentation with 8-space indentation. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + expected = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=8 + ) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test_three_to_six_spaces(self) -> None: + """ + Test replacing 3-space indentation with 6-space indentation. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + expected = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=3, output_spaces=6 + ) + # Check output. + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test_no_indentation(self) -> None: + """ + Test text without indentation remains unchanged. + """ + # Prepare inputs. + text = """ + - Item 1 + - Item 2 + - Item 3 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=4 + ) + # Check output. + self.assert_equal(actual, text) + + def test_same_input_output_spaces(self) -> None: + """ + Test that using same input and output spaces leaves text unchanged. + """ + # Prepare inputs. + text = """ + - Item 1 + - Sub item 1 + - Sub sub item 1 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=2 + ) + # Check output. + self.assert_equal(actual, text) + + def test_empty_text(self) -> None: + """ + Test empty text handling. + """ + # Prepare inputs. + text = "" + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=2, output_spaces=4 + ) + # Check output. + self.assert_equal(actual, text) + + def test_zero_to_four_spaces(self) -> None: + """ + Test converting zero indentation to 4 spaces (edge case). + """ + # Prepare inputs. + text = """ + Item 1 + Item 2 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.replace_indentation( + text, input_spaces=1, output_spaces=4 + ) + # Check output. + self.assert_equal(actual, text) + + +# ############################################################################# +# Test_preprocess_mkdocs_markdown1 +# ############################################################################# + + +class Test_preprocess_mkdocs_markdown1(hunitest.TestCase): + def test_full_preprocessing(self) -> None: + """ + Test the complete preprocessing pipeline. + """ + # Prepare inputs. + text = """ + # Introduction + + + - [Section 1](#section-1) + - [Section 2](#section-2) + + + ## Section 1 + + Here is some Python code: + + ```python + def example(): + print("Hello") + if True: + print("World") + ``` + + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + """ + expected = """ + # Introduction + + + + ## Section 1 + + Here is some Python code: + + ```python + def example(): + print("Hello") + if True: + print("World") + ``` + + - Item 1 + - Sub item 1 + - Sub sub item 1 + - Item 2 + """ + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # Run test. + actual = hmkdocs.preprocess_mkdocs_markdown(text) + # Check output. + self.assert_equal(actual, expected) + + def test_empty_text(self) -> None: + """ + Test preprocessing empty text. + """ + # Prepare inputs. + text = "" + # Run test. + actual = hmkdocs.preprocess_mkdocs_markdown(text) + # Check output. + self.assert_equal(actual, text) + + def test_text_without_preprocessing_needs(self) -> None: + """ + Test text that doesn't need any preprocessing. + """ + # Prepare inputs. + text = """ + # Simple Markdown + + This is just simple text. + + - Item 1 + - Item 2 + """ + text = hprint.dedent(text) + # Run test. + actual = hmkdocs.preprocess_mkdocs_markdown(text) + # Check output. + self.assert_equal(actual, text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py new file mode 100644 index 000000000..abb48a154 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py @@ -0,0 +1,25 @@ +import logging + +import helpers.hmodule as hmodule +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_hmodule1 +# ############################################################################# + + +class Test_hmodule1(hunitest.TestCase): + def test_has_module1(self) -> None: + """ + Check that the function returns true for the existing package. + """ + self.assertTrue(hmodule.has_module("numpy")) + + def test_has_not_module1(self) -> None: + """ + Check that the function returns false for the non-existing package. + """ + self.assertFalse(hmodule.has_module("no_such_module")) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py new file mode 100644 index 000000000..4d6b7bceb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py @@ -0,0 +1,215 @@ +import logging + +import numpy as np +import collections + +import helpers.hnumpy as hnumpy +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestRandomSeedContext +# ############################################################################# + + +class TestRandomSeedContext(hunitest.TestCase): + def test_example1(self) -> None: + """ + Getting more random numbers without context manager changes the + sequence of random numbers. + """ + n = 3 + # First batch. + np.random.seed(0) + vals1a = np.random.randn(n) + vals2a = np.random.randn(n) + # Second batch. + np.random.seed(0) + vals1b = np.random.randn(n) + vals = np.random.randn(n) + _ = vals + vals2b = np.random.randn(n) + # Check. + self.assertEqual(str(vals1a), str(vals1b)) + # Of course this might fail with a vanishingly small probability. + self.assertNotEqual(str(vals2a), str(vals2b)) + + def test_example2(self) -> None: + """ + Getting more random numbers with context manager doesn't change the + sequence of random numbers. + """ + n = 3 + # First batch. + np.random.seed(0) + vals1a = np.random.randn(n) + vals2a = np.random.randn(n) + # Second batch. + np.random.seed(0) + vals1b = np.random.randn(n) + with hnumpy.random_seed_context(42): + vals = np.random.randn(n) + _ = vals + vals2b = np.random.randn(n) + # Check. + self.assertEqual(str(vals1a), str(vals1b)) + self.assertEqual(str(vals2a), str(vals2b)) + + +# ############################################################################# +# TestFloorWithPrecision +# ############################################################################# + + +class TestFloorWithPrecision(hunitest.TestCase): + def _test_floor_with_precision( + self, + value: float, + precision: int, + expected: str, + ) -> None: + """ """ + actual = hnumpy.floor_with_precision(value, precision) + self.assert_equal(str(actual), expected) + + def test_floor_with_precision1(self) -> None: + """ + Test for negative float values as input. + """ + expected_as_str = "-4.63" + self._test_floor_with_precision(-4.6385, 2, expected_as_str) + + def test_floor_with_precision2(self) -> None: + """ + Test for Zero precision. + """ + expected_as_str = "-4.0" + self._test_floor_with_precision(-4.6385, 0, expected_as_str) + + def test_floor_with_precision3(self) -> None: + """ + Test for negative precision. + """ + value = 4.6385 + amount_precision = -2 + with self.assertRaises(AssertionError) as cm: + hnumpy.floor_with_precision(value, amount_precision) + # Check. + actual = str(cm.exception) + expected = """ + * Failed assertion * + 0 <= -2 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_floor_with_precision4(self) -> None: + """ + Test for positive float values as input. + """ + expected_as_str = "4.63" + self._test_floor_with_precision(4.6385, 2, expected_as_str) + + def test_floor_with_precision5(self) -> None: + """ + Test for integer values as input. + """ + expected_as_str = "4.0" + self._test_floor_with_precision(4, 0, expected_as_str) + + def test_floor_with_precision6(self) -> None: + """ + Test for very small value as input. + """ + expected = 0.0000532 + self._test_floor_with_precision(0.0000532999, 7, str(expected)) + + def test_floor_with_precision7(self) -> None: + """ + Test for very large value as input. + """ + expected_as_str = "4289734.12345" + self._test_floor_with_precision(4289734.1234599999, 5, expected_as_str) + + +# ############################################################################# +# Test_OrderedDict_repr_str +# ############################################################################# + + +class Test_OrderedDict_repr_str(hunitest.TestCase): + """ + The tests are used to gatekeep the expected behavior of + dunder method __str__ and __repr__ for the OrderedDict class. + + The tests stem from changes in Python 3.12. Observe below: + + Python 3.9.5: + >>> from collections import OrderedDict + >>> import numpy + >>> dct = OrderedDict({ "test": numpy.int64(42)}) + >>> dct["test"] + 42 + >>> print(dct) + OrderedDict([('test', 42)]) + >>> str(dct) + "OrderedDict([('test', 42)])" + >>> repr(dct) + "OrderedDict([('test', 42)])" + >>> str(dct["test"]) + '42' + >>> repr(dct["test"]) + '42' + + Python 3.12.3: + >>> from collections import OrderedDict + >>> import numpy + >>> dct = OrderedDict({"test": numpy.int64(42)}) + >>> dct = OrderedDict({"test": numpy.int64(42)}) + KeyboardInterrupt + >>> str(dct) + "OrderedDict({'test': np.int64(42)})" + >>> repr(dct) + "OrderedDict({'test': np.int64(42)})" + >>> str(dct["test"]) + '42' + >>> repr(dct["test"]) + 'np.int64(42)' + """ + + def test_str_single1(self) -> None: + """ + Test that the __str__ method on a single item in OrderedDict returns the expected string. + """ + d = collections.OrderedDict({"test": np.int64(42)}) + actual = str(d["test"]) + expected = "42" + self.assert_equal(actual, expected) + + def test_repr_single1(self) -> None: + """ + Test that the __repr__ method on a single item in OrderedDict returns the expected string. + """ + d = collections.OrderedDict({"test": np.int64(42)}) + actual = repr(d["test"]) + expected = "np.int64(42)" + self.assert_equal(actual, expected) + + def test_str_full1(self) -> None: + """ + Test that the __str__ method of OrderedDict returns the expected string. + """ + d = collections.OrderedDict({"test": np.int64(42)}) + actual = str(d) + expected = "OrderedDict({'test': np.int64(42)})" + self.assert_equal(actual, expected) + + def test_repr_full1(self) -> None: + """ + Test that the __repr__ method of OrderedDict returns the expected string. + """ + d = collections.OrderedDict({"test": np.int64(42)}) + actual = repr(d) + expected = "OrderedDict({'test': np.int64(42)})" + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py new file mode 100644 index 000000000..6106dd551 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py @@ -0,0 +1,392 @@ +import abc +import logging +from typing import Any, Callable, List, Optional + +import pandas as pd + +import helpers.hdbg as hdbg +import helpers.hobject as hobject +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# _Obj_to_str_TestCase +# ############################################################################# + + +# Note that we can't derive this class from `hunitest.TestCase` otherwise the +# unit test framework will try to run the tests in this class. +class _Obj_to_str_TestCase(abc.ABC): + """ + Test case for testing `obj_to_str()` and `obj_to_repr()`. + """ + + @abc.abstractmethod + def get_object(self) -> Any: + """ + Build object to test. + """ + ... + + def helper(self, *, expected: Optional[str] = None, **kwargs: Any) -> None: + obj = self.get_object() + hdbg.dassert_is_not(obj, None) + # + txt: List[str] = [] + # Get `str()`. + txt.append(hprint.frame("str:")) + txt.append(hobject.obj_to_str(obj, **kwargs)) + # Get `repr()`. + txt.append(hprint.frame("repr:")) + txt.append(hobject.obj_to_repr(obj, **kwargs)) + # Concat. + txt = "\n".join(txt) + # Check. + if expected is None: + self.check_string(txt, purify_text=True) + else: + hdbg.dassert_isinstance(expected, str) + self.assert_equal(txt, expected, purify_text=True, fuzzy_match=True) + + def test1(self, expected: str) -> None: + """ + Use `__dict__` to extract the attributes. + """ + self.helper(expected=expected, attr_mode="__dict__") + + def test2(self, expected: str) -> None: + """ + Use `dir` to extract the attributes. + """ + self.helper(expected=expected, attr_mode="dir") + + def test3(self, expected: str) -> None: + """ + Use `__dict__` and print the type of the attributes. + """ + self.helper(expected=expected, print_type=True) + + def test4(self) -> None: + """ + Print only callable attributes. + """ + self.helper(callable_mode="all") + + def test5(self) -> None: + """ + Print only private attributes. + """ + self.helper(private_mode="all") + + def test6(self) -> None: + """ + Print only dunder attributes. + """ + self.helper(dunder_mode="all") + + +# ############################################################################# +# _Object1 +# ############################################################################# + + +class _Object1: + """ + Object storing only scalar members and not other nested objects. + """ + + def __init__(self) -> None: + self.a = False + self.b = "hello" + self.c = 3.14 + self._hello = "under" + self.__hello = "double_dunder" + self.hello = lambda x: x + 1 + + +# ############################################################################# +# Test_obj_to_str1 +# ############################################################################# + + +class Test_obj_to_str1(hunitest.TestCase, _Obj_to_str_TestCase): + def get_object(self) -> Any: + obj = _Object1() + return obj + + def test1(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object1 at 0x=(a=False, b=hello, c=3.14) + ################################################################################ + repr: + ################################################################################ + : + a='False' + b='hello' + c='3.14' + """ + super().test1(expected) + + def test2(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object1 at 0x=(a=False, b=hello, c=3.14) + ################################################################################ + repr: + ################################################################################ + : + a='False' + b='hello' + c='3.14' + """ + super().test2(expected) + + def test3(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object1 at 0x=(a=False , b=hello , c=3.14 ) + ################################################################################ + repr: + ################################################################################ + : + a='False' + b='hello' + c='3.14' + """ + super().test3(expected) + + +# ############################################################################# +# _Object2 +# ############################################################################# + + +class _Object2: + """ + Object using a `obj_to_str()` as repr. + """ + + def __init__(self) -> None: + self.x = True + self.y = "world" + self.z = 6.28 + self._hello = "under" + self.__hello = "double_dunder" + self.hello = lambda x: x + 1 + + def __repr__(self) -> str: + return hobject.obj_to_str(self) + + +# ############################################################################# +# _Object3 +# ############################################################################# + + +class _Object3: + """ + Object storing another object. + """ + + def __init__(self) -> None: + self.p = "p" + self.q = "q" + self.object2 = _Object2() + + +# ############################################################################# +# Test_obj_to_str2 +# ############################################################################# + + +class Test_obj_to_str2(hunitest.TestCase, _Obj_to_str_TestCase): + def get_object(self) -> Any: + obj = _Object3() + return obj + + def test1(self) -> None: + # TODO(gp): object2 in repr should be printed recursively as repr, but + # it's not. + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) + ################################################################################ + repr: + ################################################################################ + : + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' + """ + super().test1(expected) + + def test2(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object3 at 0x=(object2=_Object2 at 0x=(x=True, y=world, z=6.28), p=p, q=q) + ################################################################################ + repr: + ################################################################################ + : + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' + p='p' + q='q' + """ + super().test2(expected) + + def test3(self) -> None: + expected = r""" + ################################################################################ + str: + ################################################################################ + _Object3 at 0x=(p=p , q=q , object2=_Object2 at 0x=(x=True, y=world, z=6.28) ) + ################################################################################ + repr: + ################################################################################ + : + p='p' + q='q' + object2='_Object2 at 0x=(x=True, y=world, z=6.28)' + """ + super().test3(expected) + + +# ############################################################################# +# _Abstract_ClassA +# ############################################################################# + + +class _Abstract_ClassA(abc.ABC, hobject.PrintableMixin): + """ + Abstract class descending from `PrintableMixin`. + """ + + def __init__(self) -> None: + self._arg0 = 0 + self._arg1 = "one" + self._arg2 = 2 + + @staticmethod + def get_config_attributes() -> List[str]: + return ["_arg1", "_arg2"] + + +# ############################################################################# +# _ClassB +# ############################################################################# + + +class _ClassB(hobject.PrintableMixin): + """ + Class descending from `PrintableMixin`. + """ + + def __init__(self, get_wall_clock_time: Callable) -> None: + self._arg5 = {"key1": "five", "key2": 5} + self._arg6 = "abc" + self._get_wall_clock_time = get_wall_clock_time + + @staticmethod + def get_config_attributes() -> List[str]: + return ["_arg5", "_get_wall_clock_time"] + + def get_wall_clock_time(self) -> pd.Timestamp: + """ + Return wall clock time in the timezone specified in the ctor. + + Initially wall clock time can be in any timezone, but cannot be + timezone-naive. + """ + wall_clock_time = self._get_wall_clock_time() + return wall_clock_time + + +# ############################################################################# +# _ClassA +# ############################################################################# + + +class _ClassA(_Abstract_ClassA): + """ + Class descending from `_AbstractClassA` and embedding `_ClassB`. + """ + + def __init__(self) -> None: + super().__init__() + self._arg3 = [3, 3, 3] + get_wall_clock_time = lambda: pd.Timestamp( + "2022-04-23", tz="America/New_York" + ) + helper_class = _ClassB(get_wall_clock_time) + self._arg4 = helper_class + self._arg10 = { + "key": 1, + "get_wall_clock_time": helper_class.get_wall_clock_time, + } + + def get_config_attributes(self) -> List[str]: + config_attributes = super().get_config_attributes() + child_class_attributes = ["_arg3", "_arg4", "_arg10"] + config_attributes.extend(child_class_attributes) + return config_attributes + + +# ############################################################################# +# Test_PrintableMixin_to_config_str +# ############################################################################# + + +class Test_PrintableMixin_to_config_str(hunitest.TestCase): + def check_test_class_str(self, test_class: Any, expected: str) -> None: + actual = test_class.to_config_str() + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Print `_Abstract_ClassA`. + """ + test_class = _Abstract_ClassA() + expected = r""" + : + _arg1='one' + _arg2='2' + """ + self.check_test_class_str(test_class, expected) + + def test2(self) -> None: + """ + Print `_ClassA`. + """ + test_class = _ClassA() + expected = r""" + : + _arg1='one' + _arg2='2' + _arg3='[3, 3, 3]' + _arg4=: + _arg5='{'key1': 'five', 'key2': 5}' + _get_wall_clock_time='. at 0x>' + _arg10= + {'get_wall_clock_time': : + _arg5='{'key1': 'five', 'key2': 5}' + _arg6='abc' >, + 'key': 1} + """ + self.check_test_class_str(test_class, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py new file mode 100644 index 000000000..9e9887915 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py @@ -0,0 +1,92 @@ +import logging + +import pytest + +import helpers.hopen as hopen +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): Some of these tests should be executed outside of the container to +# test other systems. + + +# ############################################################################# +# Test_open_unknown +# ############################################################################# + + +class Test_open_unknown(hunitest.TestCase): + """ + Test unknown extension and unknown systems. + """ + + def test_unknown_extension1(self) -> None: + """ + Test unknown extension raises an error. + """ + with self.assertRaises(AssertionError) as cm: + hopen.open_file("a.unknown_ext") + # Check error text. + self.assertIn("unknown_ext", str(cm.exception)) + + def test_unknown_os1(self) -> None: + """ + Test unknown OS raises an error. + """ + with self.assertRaises(AssertionError) as cm: + hopen._cmd_open_html("b.html", "UnknownOS") + # Check error text. + self.assertIn("UnknownOS", str(cm.exception)) + + +# ############################################################################# +# Test_open_html +# ############################################################################# + + +@pytest.mark.skip(reason="See cryptomtc/cmamp#321") +class Test_open_html(hunitest.TestCase): + """ + Test different command correctness for opening html file. + """ + + def test_linux1(self) -> None: + """ + Test Linux. + """ + cmd = hopen._cmd_open_html("a.html", "Linux") + self.check_string(str(cmd)) + + def test_windows1(self) -> None: + """ + Test Windows. + """ + cmd = hopen._cmd_open_html("b.html", "Windows") + self.check_string(str(cmd)) + + def test_mac1(self) -> None: + """ + Test Darwin. + """ + cmd = hopen._cmd_open_html("c.html", "Darwin") + self.check_string(str(cmd)) + + +# ############################################################################# +# Test_open_pdf +# ############################################################################# + + +class Test_open_pdf(hunitest.TestCase): + """ + Test different command correctness for opening pdf file. + """ + + def test_mac1(self) -> None: + """ + Test Darwin. + """ + cmd = hopen._cmd_open_html("a.pdf", "Darwin") + self.check_string(str(cmd)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py new file mode 100644 index 000000000..be5200d47 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py @@ -0,0 +1,42 @@ +import logging + +import numpy as np +import pandas as pd +import pytest + +import helpers.hpandas_analysis as hpananal +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_explore1 +# ############################################################################# + + +class Test_explore1(hunitest.TestCase): + def test_ols_regress_series(self) -> None: + x = 5 * np.random.randn(100) + y = x + np.random.randn(*x.shape) + df = pd.DataFrame() + df["x"] = x + df["y"] = y + hpananal.ols_regress_series( + df["x"], df["y"], intercept=True, print_model_stats=False + ) + + @pytest.mark.skip(reason="https://github.com/.../.../issues/3676") + def test_rolling_pca_over_time1(self) -> None: + np.random.seed(42) + df = pd.DataFrame(np.random.randn(10, 5)) + df.index = pd.date_range("2017-01-01", periods=10) + corr_df, eigval_df, eigvec_df = hpananal.rolling_pca_over_time( + df, 0.5, "fill_with_zero" + ) + txt = ( + "corr_df=\n%s\n" % corr_df.to_string() + + "eigval_df=\n%s\n" % eigval_df.to_string() + + "eigvec_df=\n%s\n" % eigvec_df.to_string() + ) + self.check_string(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py new file mode 100644 index 000000000..595877a97 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py @@ -0,0 +1,67 @@ +import logging + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_CheckSummary +# ############################################################################# + + +class Test_CheckSummary(hunitest.TestCase): + def test1(self) -> None: + """ + All the tests have passed. + """ + # Prepare inputs. + obj = hpandas.CheckSummary() + obj.add( + "hello", + "Number of not submitted OMS child orders=0 / 73 = 0.00%", + True, + ) + obj.add("hello2", "ok", True) + # Check. + is_ok = obj.is_ok() + self.assertTrue(is_ok) + # + actual = obj.report_outcome(notebook_output=False, assert_on_error=False) + self.check_string(actual) + # No assertion expected. + obj.report_outcome() + + def test2(self) -> None: + """ + Not all the tests have passed. + """ + # Prepare inputs. + obj = hpandas.CheckSummary() + obj.add( + "hello", + "Number of not submitted OMS child orders=0 / 73 = 0.00%", + True, + ) + obj.add("hello2", "not_ok", False) + # Check. + is_ok = obj.is_ok() + self.assertFalse(is_ok) + # + actual = obj.report_outcome(notebook_output=False, assert_on_error=False) + self.check_string(actual) + # + with self.assertRaises(ValueError) as e: + actual = obj.report_outcome() + actual_exception = str(e.exception) + expected_exception = r""" + The checks have failed: + description comment is_ok + 0 hello Number of not submitted OMS child orders=0 / 7... True + 1 hello2 not_ok False + is_ok=False + """ + self.assert_equal(actual_exception, expected_exception, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py new file mode 100644 index 000000000..a65340957 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py @@ -0,0 +1,364 @@ +import logging + +import numpy as np +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# TestDropNa +# ############################################################################# + + +class TestDropNa(hunitest.TestCase): + def test_dropna1(self) -> None: + """ + Test if all types of NaNs are dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, 1, 3, 2, 0], + "dummy_value_2": ["0", "A", "B", None, "D"], + "dummy_value_3": [0, 0, pd.NA, 0, 0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.dropna(test_df, drop_infs=False) + # Prepare expected result. + expected = { + "dummy_value_1": [1, 0], + "dummy_value_2": ["A", "D"], + "dummy_value_3": [0, 0], + } + # Set the dtype of numeral columns to float to match the dataframe after NA dropping. + expected = pd.DataFrame(data=expected).astype( + {"dummy_value_1": "float64", "dummy_value_3": "object"} + ) + # Set the index of the rows that remained. + expected = expected.set_index(pd.Index([1, 4])) + # Check. + hunitest.compare_df(actual, expected) + + def test_dropna2(self) -> None: + """ + Test if infs are dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [-np.inf, 1, 3, 2, 0], + "dummy_value_2": ["0", "A", "B", "C", "D"], + "dummy_value_3": [0, 0, np.inf, 0, 0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.dropna(test_df, drop_infs=True) + # Prepare expected result. + expected = { + "dummy_value_1": [1, 2, 0], + "dummy_value_2": ["A", "C", "D"], + "dummy_value_3": [0, 0, 0], + } + # Set the dtype of numeral columns to float to match the dataframe after NA dropping. + expected = pd.DataFrame(data=expected).astype( + {"dummy_value_1": "float64", "dummy_value_3": "float64"} + ) + # Set the index of the rows that remained. + expected = expected.set_index(pd.Index([1, 3, 4])) + # Check. + hunitest.compare_df(actual, expected) + + +# ############################################################################# +# TestDropAxisWithAllNans +# ############################################################################# + + +class TestDropAxisWithAllNans(hunitest.TestCase): + def test_drop_rows1(self) -> None: + """ + Test if row full of nans is dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, 2, 3], + "dummy_value_2": [pd.NA, "B", "C"], # type: ignore + "dummy_value_3": [None, 1.0, 1.0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.drop_axis_with_all_nans(test_df, drop_rows=True) + # Prepare expected result. + expected = { + "dummy_value_1": [2, 3], + "dummy_value_2": ["B", "C"], + "dummy_value_3": [1.0, 1.0], + } + # Set the dtype of numeral columns to float to match the dataframe after NA dropping. + expected = pd.DataFrame(data=expected).astype( + {"dummy_value_1": "float64"} + ) + # Set the index of the rows that remained. + expected = expected.set_index(pd.Index([1, 2])) + # Check. + hunitest.compare_df(actual, expected) + + def test_drop_rows2(self) -> None: + """ + Test if non fully nan row is not dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, 2, 3], + "dummy_value_2": ["A", "B", "C"], # type: ignore + "dummy_value_3": [None, 1.0, 1.0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.drop_axis_with_all_nans(test_df, drop_rows=True) + # Prepare expected result. + expected = { + "dummy_value_1": [np.nan, 2, 3], + "dummy_value_2": ["A", "B", "C"], # type: ignore + "dummy_value_3": [None, 1.0, 1.0], + } + # Set the dtype of numeral columns to float to match the dataframe after NA dropping. + expected = pd.DataFrame(data=expected).astype( + {"dummy_value_1": "float64"} + ) + # Set the index of the rows that remained. + expected = expected.set_index(pd.Index([0, 1, 2])) + # Check. + hunitest.compare_df(actual, expected) + + def test_drop_columns1(self) -> None: + """ + Test if column full of nans is dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, pd.NA, None], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [1.0, 1.0, 1.0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.drop_axis_with_all_nans(test_df, drop_columns=True) + # Prepare expected result. + expected = { + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [1.0, 1.0, 1.0], + } + expected = pd.DataFrame(data=expected) + # Check. + hunitest.compare_df(actual, expected) + + def test_drop_columns2(self) -> None: + """ + Test if column that is not full of nans is not dropped. + """ + # Prepare actual result. + test_data = { + "dummy_value_1": [np.nan, 2, None], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [1.0, 1.0, 1.0], + } + test_df = pd.DataFrame(data=test_data) + # Drop NA. + actual = hpandas.drop_axis_with_all_nans(test_df, drop_columns=True) + # Prepare expected result. + expected = { + "dummy_value_1": [np.nan, 2, None], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [1.0, 1.0, 1.0], + } + expected = pd.DataFrame(data=expected) + # Check. + hunitest.compare_df(actual, expected) + + +# ############################################################################# +# TestDropDuplicates +# ############################################################################# + + +class TestDropDuplicates(hunitest.TestCase): + """ + Test that duplicates are dropped correctly. + """ + + @staticmethod + def get_test_data() -> pd.DataFrame: + test_data = [ + (1, "A", 3.2), + (1, "A", 3.2), + (10, "B", 3.2), + (8, "A", 3.2), + (4, "B", 8.2), + (10, "B", 3.2), + ] + index = [ + "dummy_value1", + "dummy_value3", + "dummy_value2", + "dummy_value1", + "dummy_value1", + "dummy_value2", + ] + columns = ["int", "letter", "float"] + df = pd.DataFrame(data=test_data, index=index, columns=columns) + return df + + def test_drop_duplicates1(self) -> None: + """ + - use_index = True + - column_subset is not None + """ + # Prepare test data. + df = self.get_test_data() + use_index = True + column_subset = ["float"] + no_duplicates_df = hpandas.drop_duplicates( + df, use_index, column_subset=column_subset + ) + no_duplicates_df = hpandas.df_to_str(no_duplicates_df) + # Prepare expected result. + expected_signature = r""" + int letter float + dummy_value1 1 A 3.2 + dummy_value3 1 A 3.2 + dummy_value2 10 B 3.2 + dummy_value1 4 B 8.2 + """ + # Check. + self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) + + def test_drop_duplicates2(self) -> None: + """ + - use_index = True + - column_subset = None + """ + # Prepare test data. + df = self.get_test_data() + use_index = True + no_duplicates_df = hpandas.drop_duplicates(df, use_index) + no_duplicates_df = hpandas.df_to_str(no_duplicates_df) + # Prepare expected result. + expected_signature = r""" + int letter float + dummy_value1 1 A 3.2 + dummy_value3 1 A 3.2 + dummy_value2 10 B 3.2 + dummy_value1 8 A 3.2 + dummy_value1 4 B 8.2 + """ + # Check. + self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) + + def test_drop_duplicates3(self) -> None: + """ + - use_index = False + - column_subset = None + """ + # Prepare test data. + df = self.get_test_data() + use_index = False + no_duplicates_df = hpandas.drop_duplicates(df, use_index) + no_duplicates_df = hpandas.df_to_str(no_duplicates_df) + # Prepare expected result. + expected_signature = r""" + int letter float + dummy_value1 1 A 3.2 + dummy_value2 10 B 3.2 + dummy_value1 8 A 3.2 + dummy_value1 4 B 8.2 + """ + # Check. + self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) + + def test_drop_duplicates4(self) -> None: + """ + - use_index = False + - column_subset is not None + """ + # Prepare test data. + df = self.get_test_data() + use_index = False + column_subset = ["letter", "float"] + no_duplicates_df = hpandas.drop_duplicates( + df, use_index, column_subset=column_subset + ) + no_duplicates_df = hpandas.df_to_str(no_duplicates_df) + # Prepare expected result. + expected_signature = r""" + int letter float + dummy_value1 1 A 3.2 + dummy_value2 10 B 3.2 + dummy_value1 4 B 8.2 + """ + # Check. + self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) + + +# ############################################################################# +# Test_impute_nans +# ############################################################################# + + +class Test_impute_nans(hunitest.TestCase): + def test1(self) -> None: + """ + Test basic imputation of "nan" strings with empty string. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": ["value1", "nan", "value3"], + "col2": ["a", "b", "c"], + } + ) + # Call function to test. + result_df = hpandas.impute_nans(df, "col1", "") + # Check output. + self.assertEqual(result_df["col1"].tolist(), ["value1", "", "value3"]) + self.assertEqual(result_df["col2"].tolist(), ["a", "b", "c"]) + + def test2(self) -> None: + """ + Test imputation with a custom value. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": ["value1", "nan", "value3"], + "col2": ["a", "nan", "c"], + } + ) + # Call function to test. + result_df = hpandas.impute_nans(df, "col2", "MISSING") + # Check output. + self.assertEqual(result_df["col1"].tolist(), ["value1", "nan", "value3"]) + self.assertEqual(result_df["col2"].tolist(), ["a", "MISSING", "c"]) + + def test3(self) -> None: + """ + Test with no "nan" values present. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": ["value1", "value2", "value3"], + "col2": ["a", "b", "c"], + } + ) + # Call function to test. + result_df = hpandas.impute_nans(df, "col1", "") + # Check output - should be unchanged. + self.assertEqual( + result_df["col1"].tolist(), ["value1", "value2", "value3"] + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py new file mode 100644 index 000000000..9567c91e5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py @@ -0,0 +1,650 @@ +import logging +from typing import Tuple + +import numpy as np +import pandas as pd +import pytest + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# TestCompareDataframeRows +# ############################################################################# + + +class TestCompareDataframeRows(hunitest.TestCase): + def get_test_data(self) -> pd.DataFrame: + test_data = { + "dummy_value_1": [0, 1, 3, 2, 0], + "dummy_value_2": ["0", "A", "C", "B", "D"], + "dummy_value_3": [0, 0, 0, 0, 0], + } + df = pd.DataFrame(data=test_data) + df.index.name = "test" + return df + + def test_compare_dataframe_rows1(self) -> None: + """ + Verify that differences are caught and displayed properly. + """ + # Prepare inputs. + test_data = self.get_test_data() + edited_test_data = test_data.copy()[1:-1] + edited_test_data.loc[1, "dummy_value_2"] = "W" + edited_test_data.loc[2, "dummy_value_2"] = "Q" + edited_test_data.loc[2, "dummy_value_3"] = "1" + # Run. + data_difference = hpandas.compare_dataframe_rows( + test_data, edited_test_data + ) + # Check output. + actual = hpandas.df_to_str(data_difference) + expected = r""" dummy_value_2 dummy_value_3 test + self other self other + 0 W A 1 + 1 Q C 1 0 2""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_compare_dataframe_rows2(self) -> None: + """ + Verify that differences are caught and displayed properly without + original index. + """ + # Prepare inputs. + test_data = self.get_test_data() + test_data.index.name = None + edited_test_data = test_data.copy()[1:-1] + edited_test_data.loc[1, "dummy_value_2"] = "W" + edited_test_data.loc[2, "dummy_value_2"] = "Q" + edited_test_data.loc[2, "dummy_value_3"] = "1" + # Run. + data_difference = hpandas.compare_dataframe_rows( + test_data, edited_test_data + ) + # Check output. + actual = hpandas.df_to_str(data_difference) + expected = r""" dummy_value_2 dummy_value_3 + self other self other + 0 W A NaN NaN + 1 Q C 1 0.0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_compare_dfs +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +class Test_compare_dfs(hunitest.TestCase): + """ + - Define two DataFrames that can be either equal or different in terms of columns or rows + - Compare its values by calculating the difference + """ + + @staticmethod + def get_test_dfs_equal() -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Both DataFrames have only equal rows and columns names. + """ + timestamp_index1 = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + values1 = { + "tsA": pd.Series([1, 2, 3]), + "tsB": pd.Series([4, 5, 6]), + "tsC": pd.Series([7, 8, 9]), + "timestamp": timestamp_index1, + } + df1 = pd.DataFrame(data=values1) + df1 = df1.set_index("timestamp") + # + timestamp_index2 = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + values2 = { + "tsA": pd.Series([1.1, 1.9, 3.15]), + "tsB": pd.Series([0, 5, 5.8]), + "tsC": pd.Series([6.5, 8.6, 9.07]), + "timestamp": timestamp_index2, + } + df2 = pd.DataFrame(data=values2) + df2 = df2.set_index("timestamp") + return df1, df2 + + @staticmethod + def get_test_dfs_close_to_zero() -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + DataFrames with values that are close to 0. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + ] + values1 = { + "tsA": [3e-9, -3e-9], + "tsB": [6e-3, 4e-9], + "timestamp": timestamp_index, + } + df1 = pd.DataFrame(data=values1) + df1 = df1.set_index("timestamp") + # + values2 = { + "tsA": [15e-3, -5e-9], + "tsB": [5e-9, 3e-9], + "timestamp": timestamp_index, + } + df2 = pd.DataFrame(data=values2) + df2 = df2.set_index("timestamp") + return df1, df2 + + def get_test_dfs_different(self) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + DataFrames have both unique and equal rows and columns. + """ + df1, df2 = self.get_test_dfs_equal() + df2 = df2.rename( + columns={"tsC": "extra_col"}, + index={ + pd.Timestamp("2022-01-01 21:03:00+00:00"): pd.Timestamp( + "2022-01-01 21:04:00+00:00" + ) + }, + ) + return df1, df2 + + def test1(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "diff" + """ + df1, df2 = self.get_test_dfs_equal() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="diff", + assert_diff_threshold=None, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.diff tsB.diff tsC.diff + timestamp + 2022-01-01 21:01:00+00:00 -0.10 4.0 0.50 + 2022-01-01 21:02:00+00:00 0.10 0.0 -0.60 + 2022-01-01 21:03:00+00:00 -0.15 0.2 -0.07 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "pct_change" + - zero_vs_zero_is_zero = False + - remove_inf = False + """ + df1, df2 = self.get_test_dfs_equal() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="pct_change", + assert_diff_threshold=None, + zero_vs_zero_is_zero=False, + remove_inf=False, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change + timestamp + 2022-01-01 21:01:00+00:00 -9.090909 inf 7.692308 + 2022-01-01 21:02:00+00:00 5.263158 0.000000 -6.976744 + 2022-01-01 21:03:00+00:00 -4.761905 3.448276 -0.771775 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + - DataFrames are not equal + - Column and row modes are `inner` + - diff_mode = "diff" + """ + df1, df2 = self.get_test_dfs_different() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="inner", + column_mode="inner", + diff_mode="diff", + assert_diff_threshold=None, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.diff tsB.diff + timestamp + 2022-01-01 21:01:00+00:00 -0.1 4.0 + 2022-01-01 21:02:00+00:00 0.1 0.0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + - DataFrames are not equal + - Column and row modes are `inner` + - diff_mode = "pct_change" + """ + df1, df2 = self.get_test_dfs_different() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="inner", + column_mode="inner", + diff_mode="pct_change", + assert_diff_threshold=None, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change + timestamp + 2022-01-01 21:01:00+00:00 -9.090909 NaN + 2022-01-01 21:02:00+00:00 5.263158 0.0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test5(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "diff" + - All values of the second DataFrame are zeros + + Check that if the second DataFrame consists of zeros, + the function will perform comparison to the initial DataFrame. + """ + df1, df2 = self.get_test_dfs_different() + # Create DataFrame with zeros. + df2 = df1 * 0 + # Compare. + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="diff", + assert_diff_threshold=None, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.diff tsB.diff tsC.diff + timestamp + 2022-01-01 21:01:00+00:00 1 4 7 + 2022-01-01 21:02:00+00:00 2 5 8 + 2022-01-01 21:03:00+00:00 3 6 9 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test6(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "pct_change" + - close_to_zero_threshold = 1e-6 + - zero_vs_zero_is_zero = True + - remove_inf = True + + The second DataFrame has numbers below the close_to_zero_threshold. + """ + df1, df2 = self.get_test_dfs_close_to_zero() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="pct_change", + assert_diff_threshold=None, + zero_vs_zero_is_zero=True, + remove_inf=True, + ) + # + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change + timestamp + 2022-01-01 21:01:00+00:00 -100.0 NaN + 2022-01-01 21:02:00+00:00 0.0 0.0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test7(self) -> None: + """ + - DataFrames are equal + - Column and row modes are `equal` + - diff_mode = "pct_change" + - close_to_zero_threshold = 1e-6 + - zero_vs_zero_is_zero = False + - remove_inf = False + + The second DataFrame has numbers below the close_to_zero_threshold. + """ + df1, df2 = self.get_test_dfs_close_to_zero() + df_diff = hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="pct_change", + assert_diff_threshold=None, + zero_vs_zero_is_zero=False, + remove_inf=False, + ) + # + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change + timestamp + 2022-01-01 21:01:00+00:00 -100.0 inf + 2022-01-01 21:02:00+00:00 NaN NaN + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test8(self) -> None: + """ + Test NaN comparison with NaNs present at different location in two + dataframes. + """ + # Build test dataframes. + df1 = pd.DataFrame( + data={ + "A": [1.1, np.nan, 3.1, np.nan, np.inf, np.inf], + "B": [0, 0, 0, 0, 0, 0], + } + ) + df2 = pd.DataFrame( + data={ + "A": [3.0, 2.2, np.nan, np.nan, np.nan, np.inf], + "B": [0, 0, 0, 0, 0, 0], + } + ) + # Check. + with self.assertRaises(AssertionError) as cm: + compare_nans = True + hpandas.compare_dfs( + df1, df2, compare_nans=compare_nans, only_warning=False + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + DataFrame.iloc[:, 0] (column name="A") are different + + DataFrame.iloc[:, 0] (column name="A") values are different (66.66667 %) + [index]: [0, 1, 2, 3, 4, 5] + [left]: [1.1, nan, 3.1, nan, inf, inf] + [right]: [3.0, 2.2, nan, nan, nan, inf] + At positional index 0, first diff: 1.1 != 3.0 + df1= + A B + 0 1.1 0 + 1 NaN 0 + 2 3.1 0 + 3 NaN 0 + 4 inf 0 + 5 inf 0 + and df2= + A B + 0 3.0 0 + 1 2.2 0 + 2 NaN 0 + 3 NaN 0 + 4 NaN 0 + 5 inf 0 + are not equal. + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test9(self) -> None: + """ + Test to verify the error when df1 and df2 have different index types. + """ + df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + # Create df2 with a DatetimeIndex. + dates = pd.date_range("2021-01-01", periods=3) + df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "timestamp": dates}) + df2 = df2.set_index("timestamp") + with self.assertRaises(AssertionError) as cm: + hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + df1.index.difference(df2.index)= + RangeIndex(start=0, stop=3, step=1) + df2.index.difference(df1.index)= + DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq=None) + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test10(self) -> None: + """ + Check `assert_diff_threshold` functionality in presence of NaN values + in df_diff. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + df2 = pd.DataFrame( + { + "tsA": [100, 200, 300], + "tsB": [400, 500, 600], + "tsC": [700, 800, 900], + "timestamp": timestamp_index, + } + ) + df2 = df2.set_index("timestamp") + adjustment_factor = 1.000001 + df1 = df2 * adjustment_factor + df1.iloc[1, 2] = np.nan + df_diff = hpandas.compare_dfs( + df1, + df2, + diff_mode="pct_change", + only_warning=True, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change + timestamp + 2022-01-01 21:01:00+00:00 0.0001 0.0001 0.0001 + 2022-01-01 21:02:00+00:00 0.0001 0.0001 NaN + 2022-01-01 21:03:00+00:00 0.0001 0.0001 0.0001 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test11(self) -> None: + """ + Check functionality for `remove_inf = False` in presence of `diff_mode + = 'pct_change'`. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + df2 = pd.DataFrame( + { + "tsA": [100, 200, 300], + "tsB": [400, 500, 600], + "tsC": [700, 800, 900], + "timestamp": timestamp_index, + } + ) + df2 = df2.set_index("timestamp") + adjustment_factor = 1.00001 + df1 = df2 * adjustment_factor + df1.iloc[1, 2] = np.inf + with self.assertRaises(AssertionError) as cm: + hpandas.compare_dfs( + df1, + df2, + diff_mode="pct_change", + remove_inf=False, + only_warning=False, + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + DataFrame.iloc[:, 0] (column name="tsA") are different + + DataFrame.iloc[:, 0] (column name="tsA") values are different (100.0 %) + [index]: [2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00, 2022-01-01 21:03:00+00:00] + [left]: [False, False, False] + [right]: [True, True, True] + df1= + tsA tsB tsC + timestamp + 2022-01-01 21:01:00+00:00 100.001 400.004 700.007 + 2022-01-01 21:02:00+00:00 200.002 500.005 inf + 2022-01-01 21:03:00+00:00 300.003 600.006 900.009 + and df2= + tsA tsB tsC + timestamp + 2022-01-01 21:01:00+00:00 100 400 700 + 2022-01-01 21:02:00+00:00 200 500 800 + 2022-01-01 21:03:00+00:00 300 600 900 + have pct_change more than `assert_diff_threshold`. + """ + self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) + + def test12(self) -> None: + """ + Check functionality for `remove_inf = True` in presence of `diff_mode = + 'pct_change'`. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + df2 = pd.DataFrame( + { + "tsA": [100, 200, 300], + "tsB": [400, 500, 600], + "tsC": [700, 800, 900], + "timestamp": timestamp_index, + } + ) + df2 = df2.set_index("timestamp") + adjustment_factor = 1.00001 + df1 = df2 * adjustment_factor + df1.iloc[1, 2] = np.inf + df_diff = hpandas.compare_dfs( + df1, + df2, + diff_mode="pct_change", + only_warning=True, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change + timestamp + 2022-01-01 21:01:00+00:00 0.001 0.001 0.001 + 2022-01-01 21:02:00+00:00 0.001 0.001 NaN + 2022-01-01 21:03:00+00:00 0.001 0.001 0.001 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test13(self) -> None: + """ + Check test case when negative values in df2. + """ + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + ] + df2 = pd.DataFrame( + { + "tsA": [100, 200, -300], + "tsB": [400, -500, 600], + "tsC": [700, -800, 900], + "timestamp": timestamp_index, + } + ) + df2 = df2.set_index("timestamp") + adjustment_factor = 1.00001 + df1 = df2 * adjustment_factor + df_diff = hpandas.compare_dfs( + df1, + df2, + diff_mode="pct_change", + only_warning=True, + ) + actual = hpandas.df_to_str(df_diff) + expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change + timestamp + 2022-01-01 21:01:00+00:00 0.001 0.001 0.001 + 2022-01-01 21:02:00+00:00 0.001 -0.001 -0.001 + 2022-01-01 21:03:00+00:00 -0.001 0.001 0.001 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_invalid_input(self) -> None: + """ + Put two different DataFrames with `equal` mode. + """ + df1, df2 = self.get_test_dfs_different() + with self.assertRaises(AssertionError): + hpandas.compare_dfs( + df1, + df2, + row_mode="equal", + column_mode="equal", + diff_mode="pct_change", + ) + + +# ############################################################################# +# Test_compare_nans_in_dataframes +# ############################################################################# + + +class Test_compare_nans_in_dataframes(hunitest.TestCase): + def test1(self) -> None: + """ + Check that NaN differences are identified correctly. + """ + # Build test dataframes. + df1 = pd.DataFrame( + data={ + "A": [1.1, np.nan, 3.1, np.nan, np.inf, np.inf], + "B": [0, 0, 0, 0, 0, 0], + } + ) + df2 = pd.DataFrame( + data={ + "A": [3.0, 2.2, np.nan, np.nan, np.nan, np.inf], + "B": [0, 0, 0, 0, 0, 0], + } + ) + df = hpandas.compare_nans_in_dataframes(df1, df2) + actual = hpandas.df_to_str(df) + expected = r""" + A + df1 df2 + 1 NaN 2.2 + 2 3.1 NaN + 4 inf NaN + """ + self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py new file mode 100644 index 000000000..0bd4eaeee --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py @@ -0,0 +1,276 @@ +import logging + +import numpy as np +import pandas as pd +import pytest + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_to_series1 +# ############################################################################# + + +class Test_to_series1(hunitest.TestCase): + def helper(self, n: int, expected: str) -> None: + vals = list(range(n)) + df = pd.DataFrame([vals], columns=[f"a{i}" for i in vals]) + df = df.T + _LOG.debug("df=\n%s", df) + srs = hpandas.to_series(df) + _LOG.debug("srs=\n%s", srs) + actual = str(srs) + self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) + + def test1(self) -> None: + n = 0 + expected = r""" + Series([], dtype: float64) + """ + self.helper(n, expected) + + def test2(self) -> None: + n = 1 + expected = r""" + a0 0 + dtype: int64""" + self.helper(n, expected) + + def test3(self) -> None: + n = 5 + expected = r""" + a0 0 + a1 1 + a2 2 + a3 3 + a4 4 + Name: 0, dtype: int64""" + self.helper(n, expected) + + +# ############################################################################# +# Test_cast_series_to_type +# ############################################################################# + + +class Test_cast_series_to_type(hunitest.TestCase): + """ + Test converting a series into a given type. + """ + + def test1(self) -> None: + series = pd.Series(["1", "2", "3"]) + series_type = int + actual = hpandas.cast_series_to_type(series, series_type) + self.assertEqual(actual.dtype.type, np.int64) + + def test2(self) -> None: + series = pd.Series(["0.1", "0.2", "0.3"]) + series_type = float + actual = hpandas.cast_series_to_type(series, series_type) + self.assertEqual(actual.dtype.type, np.float64) + + def test3(self) -> None: + series = pd.Series(["None", "None", "None"]) + series_type = None + actual = hpandas.cast_series_to_type(series, series_type) + for i in range(len(actual)): + self.assertIsNone(actual.iloc[i]) + + def test4(self) -> None: + series = pd.Series(["2020-01-01", "2020-02-02", "2020-03-03"]) + series_type = pd.Timestamp + actual = hpandas.cast_series_to_type(series, series_type) + self.assertEqual(actual.dtype.type, np.datetime64) + + def test5(self) -> None: + series = pd.Series(["{}", "{1: 2, 3: 4}", "{'a': 'b'}"]) + series_type = dict + actual = hpandas.cast_series_to_type(series, series_type) + for i in range(len(actual)): + self.assertEqual(type(actual.iloc[i]), dict) + + +# ############################################################################# +# Test_convert_to_type +# ############################################################################# + + +class Test_convert_to_type(hunitest.TestCase): + def test_convert_to_type_bool(self) -> None: + """ + Check converting to bool column. + """ + # Mix of booleans, truthy/falsy strings, numerics, and invalid values + data = [True, False, "True", "false", 1, 0, "1", "0", "yes", None] + series = pd.Series(data) + result = hpandas.convert_to_type(series, "is_bool") + expected = pd.Series( + [True, False, True, False, True, False, True, False, None, None] + ) + pd.testing.assert_series_equal(result, expected) + + def test_convert_to_type_int_and_numeric(self) -> None: + """ + Check converting to numeric and int column. + """ + # Strings that parse to numbers, floats, invalid strings, and ints + series = pd.Series(["1", "2", "3.5", "abc", 4], dtype=object) + # is_int should coerce numeric strings to numbers, invalid -> NaN + result_int = hpandas.convert_to_type(series, "is_int") + expected_int = pd.to_numeric(series, errors="coerce") + pd.testing.assert_series_equal(result_int, expected_int) + # is_numeric is the same as to_numeric + result_numeric = hpandas.convert_to_type(series, "is_numeric") + pd.testing.assert_series_equal(result_numeric, expected_int) + + def test_convert_to_type_string(self) -> None: + """ + Check converting to string column. + """ + # Strings vs non-strings + data = ["a", 1, None, "hello", True, 3.14] + series = pd.Series(data, dtype=object) + result = hpandas.convert_to_type(series, "is_string") + expected = pd.Series(["a", "1", "None", "hello", "True", "3.14"]) + pd.testing.assert_series_equal(result, expected) + + def test_convert_to_type_unknown(self) -> None: + "Check converting to invalid datatype column." + series = pd.Series([1, 2, 3], dtype=object) + with pytest.raises(ValueError) as exc: + hpandas.convert_to_type(series, "invalid_type") + self.assertIn("Unknown column type: invalid_type", str(exc.value)) + + +# ############################################################################# +# Test_infer_column_types +# ############################################################################# + + +class Test_infer_column_types(hunitest.TestCase): + def test_numeric_dominance(self) -> None: + """ + Check with numeric dominant column. + """ + # 5 elements: '1','2',3 (numeric), 'a', None + col = pd.Series(["1", "2", 3, "a", None], dtype=object) + vals = hpandas.infer_column_types(col) + # is_numeric: True for "1","2",3 → 3/5 = 0.6 + assert pytest.approx(vals["is_numeric"], rel=1e-6) == 0.6 + # is_bool: none are bool → 0.0 + assert vals["is_bool"] == 0.0 + # is_string: "1","2","a" are str → 3/5 = 0.6 + assert pytest.approx(vals["is_string"], rel=1e-6) == 0.6 + # numeric ≥ string, and bool < numeric ⇒ type is numeric + self.assert_equal(vals["type"], "is_numeric") + + def test_bool_dominance(self) -> None: + """ + Check with bool dominant column. + """ + # 4 elements: True, False, True (bool), "x" + col = pd.Series([True, False, True, "x"], dtype=object) + vals = hpandas.infer_column_types(col) + # is_bool: 3/4 = 0.75 + assert pytest.approx(vals["is_bool"], rel=1e-6) == 0.75 + # is_numeric: True→1, False→0, True→1, "x"→NaN → notna → 3/4 = 0.75 + assert pytest.approx(vals["is_numeric"], rel=1e-6) == 0.75 + # is_string: only "x" → 1/4 = 0.25 + assert pytest.approx(vals["is_string"], rel=1e-6) == 0.25 + # bool ≥ numeric ⇒ type is bool + self.assert_equal(vals["type"], "is_bool") + + def test_string_dominance(self) -> None: + """ + Check with string dominant column. + """ + # 3 elements: 1.5 (numeric), "a","b" (strings) + col = pd.Series([1.5, "a", "b"], dtype=object) + vals = hpandas.infer_column_types(col) + # is_bool: none are bool → 0/3 = 0.0 + assert pytest.approx(vals["is_bool"], rel=1e-6) == 0.0 + # is_numeric: 1/3 ≈ 0.333... + assert pytest.approx(vals["is_numeric"], rel=1e-6) == pytest.approx( + 1 / 3, rel=1e-6 + ) + # is_string: 2/3 ≈ 0.666... + assert pytest.approx(vals["is_string"], rel=1e-6) == pytest.approx( + 2 / 3, rel=1e-6 + ) + # bool < numeric < string ⇒ type is string + self.assert_equal(vals["type"], "is_string") + + +# ############################################################################# +# Test_convert_df +# ############################################################################# + + +class Test_convert_df(hunitest.TestCase): + def test_convert_df_all_bool(self) -> None: + """ + A column of pure booleans should stay booleans. + """ + df = pd.DataFrame({"flag": [True, False, True, False]}) + df_out = hpandas.convert_df(df) + # Expect a DataFrame back + assert isinstance(df_out, pd.DataFrame) + # Column dtype must be bool + self.assert_equal(df_out["flag"].dtype.name, "bool") + # Values preserved + self.assert_equal( + str(df_out["flag"].tolist()), str([True, False, True, False]) + ) + + def test_convert_df_all_numeric(self) -> None: + """ + A column of numeric strings and ints should become floats. + """ + df = pd.DataFrame({"score": ["1", 2, "3.5", 4]}, dtype=object) + df_out = hpandas.convert_df(df) + assert isinstance(df_out, pd.DataFrame) + # dtype should be float64 + assert df_out["score"].dtype == float + # Values converted correctly + assert df_out["score"].tolist() == [1.0, 2.0, 3.5, 4.0] + + def test_convert_df_all_string(self) -> None: + """ + A column of strings (and mixed non-numeric non-bool) stays as-is. + """ + df = pd.DataFrame( + {"name": ["alice", "bob", "", "charlie"]}, dtype=object + ) + df_out = hpandas.convert_df(df) + print(df_out.head(5)) + assert isinstance(df_out, pd.DataFrame) + # dtype remains object (strings) + self.assert_equal(df_out["name"].dtype.name, "object") + self.assert_equal( + str(df_out["name"].tolist()), str(["alice", "bob", "", "charlie"]) + ) + + def test_convert_df_mixed_columns(self) -> None: + """ + Different datatype columns should convert accordingly. + """ + df = pd.DataFrame( + { + "flag": [True, False, False], + "value": [10, 20, "xyz"], + "text": ["one", "hello", 2], + }, + dtype=object, + ) + df_out = hpandas.convert_df(df) + # flag → bool + self.assert_equal(df_out["flag"].dtype.name, "bool") + self.assertIn("float", df_out["value"].dtype.name) + self.assert_equal(df_out["text"].dtype.name, "object") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py new file mode 100644 index 000000000..44b7c7b18 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py @@ -0,0 +1,448 @@ +import logging + +import numpy as np +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_dassert_is_unique1 +# ############################################################################# + + +class Test_dassert_is_unique1(hunitest.TestCase): + def get_df1(self) -> pd.DataFrame: + """ + Return a df without duplicated index. + """ + num_rows = 5 + idx = [ + pd.Timestamp("2000-01-01 9:00") + pd.Timedelta(minutes=i) + for i in range(num_rows) + ] + values = [[i] for i in range(len(idx))] + df = pd.DataFrame(values, index=idx) + _LOG.debug("df=\n%s", df) + # + actual = hpandas.df_to_str(df) + expected = r""" + 0 + 2000-01-01 09:00:00 0 + 2000-01-01 09:01:00 1 + 2000-01-01 09:02:00 2 + 2000-01-01 09:03:00 3 + 2000-01-01 09:04:00 4""" + self.assert_equal(actual, expected, fuzzy_match=True) + return df + + def test_dassert_is_unique1(self) -> None: + df = self.get_df1() + hpandas.dassert_unique_index(df) + + def get_df2(self) -> pd.DataFrame: + """ + Return a df with duplicated index. + """ + num_rows = 4 + idx = [ + pd.Timestamp("2000-01-01 9:00") + pd.Timedelta(minutes=i) + for i in range(num_rows) + ] + idx.append(idx[0]) + values = [[i] for i in range(len(idx))] + df = pd.DataFrame(values, index=idx) + _LOG.debug("df=\n%s", df) + # + actual = hpandas.df_to_str(df) + expected = r""" + 0 + 2000-01-01 09:00:00 0 + 2000-01-01 09:01:00 1 + 2000-01-01 09:02:00 2 + 2000-01-01 09:03:00 3 + 2000-01-01 09:00:00 4""" + self.assert_equal(actual, expected, fuzzy_match=True) + return df + + def test_dassert_is_unique2(self) -> None: + df = self.get_df2() + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_unique_index(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + Duplicated rows are: + 0 + 2000-01-01 09:00:00 0 + 2000-01-01 09:00:00 4 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_valid_remap +# ############################################################################# + + +class Test_dassert_valid_remap(hunitest.TestCase): + def test1(self) -> None: + """ + Check that the function works with correct inputs. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] + remap_dict = { + "dummy_value_1": "1, 2, 3", + "dummy_value_2": "A, B, C", + } + # Check. + hpandas.dassert_valid_remap(to_remap, remap_dict) + + def test2(self) -> None: + """ + Check that an assertion is raised if dictionary keys are not a subset. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2"] + remap_dict = { + "dummy_value_1": "1, 2, 3", + "dummy_value_2": "A, B, C", + "dummy_value_3": "A1, A2, A3", + } + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + val1=['dummy_value_1', 'dummy_value_2', 'dummy_value_3'] + issubset + val2=['dummy_value_1', 'dummy_value_2'] + val1 - val2=['dummy_value_3'] + Keys to remap should be a subset of existing columns""" + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check that an assertion is raised if the duplicate values are present + in the dict. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] + remap_dict = { + "dummy_value_1": 1, + "dummy_value_2": "A, B, C", + "dummy_value_3": "A, B, C", + } + # Run. + with self.assertRaises(AttributeError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + 'dict_values' object has no attribute 'count'""" + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Check that an assertion is raised if the input is not a list. + """ + # Set inputs. + to_remap = {"dummy_value_1"} + remap_dict = { + "dummy_value_1": "1, 2, 3", + } + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '{'dummy_value_1'}' is '' instead of '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test5(self) -> None: + """ + Check that an assertion is raised if the input is not a dictionary. + """ + # Set inputs. + to_remap = ["dummy_value_1"] + remap_dict = [ + "dummy_value_1 : 1, 2, 3", + ] + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '['dummy_value_1 : 1, 2, 3']' is '' instead of '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_increasing_index +# ############################################################################# + + +class Test_dassert_increasing_index(hunitest.TestCase): + def test1(self) -> None: + """ + Check that a monotonically increasing index passes the assert. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:03"), + pd.Timestamp("2000-01-01 9:04"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + hpandas.dassert_increasing_index(df) + + def test2(self) -> None: + """ + Check that an assert is raised when index is not monotonically + increasing. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:04"), + pd.Timestamp("2000-01-01 9:03"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_increasing_index(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + Not increasing indices are: + 0 + 2000-01-01 09:04:00 0 + 2000-01-01 09:03:00 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check that a monotonically increasing index with duplicates passes the + assert. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:00"), + pd.Timestamp("2000-01-01 9:00"), + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:01"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + hpandas.dassert_increasing_index(df) + + +# ############################################################################# +# Test_dassert_strictly_increasing_index +# ############################################################################# + + +class Test_dassert_strictly_increasing_index(hunitest.TestCase): + def test1(self) -> None: + """ + Check that unique and monotonically increasing index passes the assert. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:03"), + pd.Timestamp("2000-01-01 9:04"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + hpandas.dassert_strictly_increasing_index(df) + + def test2(self) -> None: + """ + Check that an assert is raised for an increasing index with duplicates. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:03"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_strictly_increasing_index(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + Duplicated rows are: + 0 + 2000-01-01 09:01:00 0 + 2000-01-01 09:01:00 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check that an assert is raised for a not monotonically increasing + index. + """ + # Build test dataframe. + idx = [ + pd.Timestamp("2000-01-01 9:01"), + pd.Timestamp("2000-01-01 9:03"), + pd.Timestamp("2000-01-01 9:02"), + pd.Timestamp("2000-01-01 9:04"), + ] + values = [0, 0, 0, 0] + df = pd.DataFrame(values, index=idx) + # Run. + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_strictly_increasing_index(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + Not increasing indices are: + 0 + 2000-01-01 09:03:00 0 + 2000-01-01 09:02:00 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_dassert_index_is_datetime +# ############################################################################# + + +class Test_dassert_index_is_datetime(hunitest.TestCase): + @staticmethod + def get_multiindex_df( + index_is_datetime: bool, + ) -> pd.DataFrame: + """ + Helper function to get test multi-index dataframe. Example of dataframe + returned when `index_is_datetime = True`: + + ``` + column1 column2 + index timestamp + index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431 + 2022-01-01 21:10:00+00:00 1.303778 -0.288235 + index2 2022-01-01 21:00:00+00:00 1.237079 1.168012 + 2022-01-01 21:10:00+00:00 1.333692 1.708455 + ``` + + Example of dataframe returned when `index_is_datetime = False`: + + ``` + column1 column2 + index timestamp + index1 string1 -0.122140 -1.949431 + string2 1.303778 -0.288235 + index2 string1 1.237079 1.168012 + string2 1.333692 1.708455 + ``` + """ + if index_is_datetime: + index_inner = [ + pd.Timestamp("2022-01-01 21:00:00", tz="UTC"), + pd.Timestamp("2022-01-01 21:10:00", tz="UTC"), + ] + else: + index_inner = ["string1", "string2"] + index_outer = ["index1", "index2"] + iterables = [index_outer, index_inner] + index = pd.MultiIndex.from_product( + iterables, names=["index", "timestamp"] + ) + columns = ["column1", "column2"] + nums = np.random.uniform(-2, 2, size=(4, 2)) + df = pd.DataFrame(nums, index=index, columns=columns) + return df + + def test1(self) -> None: + """ + Check that multi-index dataframe index is datetime type. + """ + index_is_datetime = True + df = self.get_multiindex_df(index_is_datetime) + hpandas.dassert_index_is_datetime(df) + + def test2(self) -> None: + """ + Check that multi-index dataframe index is not datetime type. + """ + index_is_datetime = False + df = self.get_multiindex_df(index_is_datetime) + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_index_is_datetime(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + cond=False + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check for empty dataframe. + """ + df = pd.DataFrame() + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_index_is_datetime(df) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of 'RangeIndex(start=0, stop=0, step=1)' is '' instead of '' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Check that single-indexed dataframe index is datetime type. + """ + index_is_datetime = True + df = self.get_multiindex_df(index_is_datetime) + df = df.loc["index1"] + hpandas.dassert_index_is_datetime(df) + + +# ############################################################################# +# Test_dassert_approx_eq1 +# ############################################################################# + + +class Test_dassert_approx_eq1(hunitest.TestCase): + def test1(self) -> None: + hpandas.dassert_approx_eq(1, 1.0000001) + + def test2(self) -> None: + srs1 = pd.Series([1, 2.0000001]) + srs2 = pd.Series([0.999999, 2.0]) + hpandas.dassert_approx_eq(srs1, srs2, msg="hello world") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py new file mode 100644 index 000000000..2c69e4fe7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py @@ -0,0 +1,685 @@ +import datetime +import logging +import unittest.mock +import uuid +from typing import Optional, Union + +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hpandas_display as hpandisp +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestDataframeToJson +# ############################################################################# + + +class TestDataframeToJson(hunitest.TestCase): + """ + Test dataframe to JSON conversion. + """ + + def test1(self) -> None: + """ + Verify correctness of dataframe to JSON transformation. + """ + # Prepare inputs. + test_dataframe = pd.DataFrame( + { + "col_1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], + "col_2": [1, 2, 3, 4, 5, 6, 7], + } + ) + # Run test. + output_str = hpandas.convert_df_to_json_string( + test_dataframe, n_head=3, n_tail=3 + ) + # Check output. + self.check_string(output_str) + + def test2(self) -> None: + """ + Verify correctness of UUID-containing dataframe transformation. + """ + # Prepare inputs. + test_dataframe = pd.DataFrame( + { + "col_1": [ + uuid.UUID("421470c7-7797-4a94-b584-eb83ff2de88a"), + uuid.UUID("22cde381-1782-43dc-8c7a-8712cbdf5ee1"), + ], + "col_2": [1, 2], + } + ) + # Run test. + output_str = hpandas.convert_df_to_json_string( + test_dataframe, n_head=None, n_tail=None + ) + # Check output. + self.check_string(output_str) + + def test3(self) -> None: + """ + Verify correctness of transformation of a dataframe with Timestamps. + """ + # Prepare inputs. + test_dataframe = pd.DataFrame( + { + "col_1": [ + pd.Timestamp("2020-01-01"), + pd.Timestamp("2020-05-12"), + ], + "col_2": [1.0, 2.0], + } + ) + # Run test. + output_str = hpandas.convert_df_to_json_string( + test_dataframe, n_head=None, n_tail=None + ) + # Check output. + self.check_string(output_str) + + def test4(self) -> None: + """ + Verify correctness of transformation of a dataframe with datetime. + """ + # Prepare inputs. + test_dataframe = pd.DataFrame( + { + "col_1": [ + datetime.datetime(2020, 1, 1), + datetime.datetime(2020, 5, 12), + ], + "col_2": [1.0, 2.0], + } + ) + # Run test. + output_str = hpandas.convert_df_to_json_string( + test_dataframe, n_head=None, n_tail=None + ) + # Check output. + self.check_string(output_str) + + +# ############################################################################# +# Test_list_to_str +# ############################################################################# + + +class Test_list_to_str(hunitest.TestCase): + """ + Test list to string conversion. + """ + + def test1(self) -> None: + """ + Check that a list is converted to string correctly. + """ + # Prepare inputs. + items = [1, "two", 3, 4, "five"] + # Run test. + actual = hprint.list_to_str2(items, enclose_str_char="|", sep_char=" ; ") + # Check output. + expected = "5 [|1| ; |two| ; |3| ; |4| ; |five|]" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + Check that a list is converted to string and truncated correctly. + """ + # Prepare inputs. + items = list(range(15)) + # Run test. + actual = hprint.list_to_str2(items, enclose_str_char="", sep_char=" - ") + # Check output. + expected = "15 [0 - 1 - 2 - 3 - 4 - ... - 10 - 11 - 12 - 13 - 14]" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check that a list is converted to string correctly, without additional + parameters. + """ + # Prepare inputs. + items = [1, 2, 3, 4, "five"] + # Run test. + actual = hprint.list_to_str2(items) + # Check output. + expected = "5 ['1', '2', '3', '4', 'five']" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_display_df +# ############################################################################# + + +class Test_display_df(hunitest.TestCase): + """ + Test the display_df function. + """ + + def helper_test_display_df( + self, + df: Union[pd.DataFrame, pd.Series], + expected: Optional[str], + **kwargs, + ) -> None: + """ + Test helper for display_df. + + :param df: Input dataframe or series + :param expected: Expected output to compare with actual output + :param kwargs: Keyword arguments to pass to display_df + """ + # Capture the output from print_or_display and logging. + outputs = [] + tag = kwargs.get("tag") + + def mock_print_or_display( + mock_df: pd.DataFrame, + *, + index: bool = True, + as_txt: bool = False, + log_level: int = logging.INFO, + ) -> None: + """ + Capture the dataframe string representation. + """ + if as_txt or not index: + output = mock_df.to_string(index=index) + else: + output = mock_df.to_html(index=index) + outputs.append(output) + + # Run test. + with unittest.mock.patch( + "helpers.hpandas_display.print_or_display", + side_effect=mock_print_or_display, + ): + with unittest.mock.patch( + "helpers.hpandas_display._LOG.log" + ) as mock_log: + hpandisp.display_df( + df, + log_level=logging.DEBUG, + **kwargs, + ) + # Capture tag logging if present. + if tag is not None and mock_log.called: + for call in mock_log.call_args_list: + if "tag=" in str(call): + outputs.append(f"tag={tag}") + # Check output if expected is provided. + if expected is not None: + expected = hprint.dedent(expected) + actual = "\n".join(outputs) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Test display_df with small dataframe. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
01a
12b
23c
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected) + + def test2(self) -> None: + """ + Test display_df with large dataframe and max_lines. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": list(range(100)), + "col_2": [f"val_{i}" for i in range(100)], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
00val_0
11val_1
.........
9898val_98
9999val_99
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, max_lines=5) + + def test3(self) -> None: + """ + Test display_df with inline_index=True. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + } + ) + # Prepare outputs. + expected = """ + . col_1 col_2 + 0 1 a + 1 2 b + 2 3 c + """ + # Run test. + self.helper_test_display_df( + df, expected=expected, inline_index=True, index=True + ) + + def test4(self) -> None: + """ + Test display_df with index=False. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + } + ) + # Prepare outputs. + expected = """ + col_1 col_2 + 1 a + 2 b + 3 c + """ + # Run test. + self.helper_test_display_df(df, expected=expected, index=False) + + def test5(self) -> None: + """ + Test display_df with named index and inline_index=True. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + } + ) + df.index.name = "my_index" + # Prepare outputs. + expected = """ + my_index col_1 col_2 + 0 1 a + 1 2 b + 2 3 c + """ + # Run test. + self.helper_test_display_df( + df, expected=expected, inline_index=True, index=False + ) + + def test6(self) -> None: + """ + Test display_df with Pandas Series (should convert to DataFrame). + """ + # Prepare inputs. + series = pd.Series([1, 2, 3, 4, 5], name="my_series") + # Prepare outputs. + expected = """ + . my_series + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + + """ + # Run test. + self.helper_test_display_df( + series, expected=expected, inline_index=True, index=False + ) + + def test7(self) -> None: + """ + Test display_df with tag parameter. + """ + # Prepare inputs. + df = pd.DataFrame({"col_1": [1, 2, 3]}) + # Prepare outputs. + expected = """ + . col_1 + 0 1 + 1 2 + 2 3 + tag=my_tag + """ + # Run test. + self.helper_test_display_df( + df, expected=expected, tag="my_tag", inline_index=True, index=False + ) + + def test8(self) -> None: + """ + Test display_df with mode='all_rows'. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": list(range(50)), + "col_2": [f"val_{i}" for i in range(50)], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
00val_0
11val_1
.........
4848val_48
4949val_49
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, mode="all_rows") + + def test9(self) -> None: + """ + Test display_df with mode='all_cols'. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": ["a", "b", "c"], + "col_3": [10.5, 20.5, 30.5], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2col_3
01a10.5
12b20.5
23c30.5
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, mode="all_cols") + + def test10(self) -> None: + """ + Test display_df with mode='all'. + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": list(range(50)), + "col_2": [f"val_{i}" for i in range(50)], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
00val_0
11val_1
.........
4848val_48
4949val_49
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, mode="all") + + def test11(self) -> None: + """ + Test display_df with invalid mode raises error. + """ + # Prepare inputs. + df = pd.DataFrame({"col_1": [1, 2, 3]}) + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hpandisp.display_df( + df, + mode="invalid_mode", + log_level=logging.DEBUG, + ) + self.assertIn("Invalid mode", str(cm.exception)) + + def test12(self) -> None: + """ + Test display_df with duplicate columns raises assertion. + """ + # Prepare inputs. + df = pd.DataFrame([[1, 2], [3, 4]]) + df.columns = ["col", "col"] + # Run test and check output. + with self.assertRaises(AssertionError): + hpandisp.display_df(df, log_level=logging.DEBUG) + + def test13(self) -> None: + """ + Test display_df with single row dataframe. + """ + # Prepare inputs. + df = pd.DataFrame({"col_1": [1], "col_2": ["a"]}) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + +
col_1col_2
01a
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, max_lines=5) + + def test14(self) -> None: + """ + Test display_df with max_lines=1 (edge case). + """ + # Prepare inputs. + df = pd.DataFrame( + { + "col_1": list(range(10)), + "col_2": [f"val_{i}" for i in range(10)], + } + ) + # Prepare outputs. + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
col_1col_2
00val_0
11val_1
.........
88val_8
99val_9
+ """ + # Run test. + self.helper_test_display_df(df, expected=expected, mode="all") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py new file mode 100644 index 000000000..c1f66b0d8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py @@ -0,0 +1,43 @@ +import logging +import os + +import pytest + +import helpers.hpandas as hpandas +import helpers.hs3 as hs3 +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# TestReadDataFromS3 +# ############################################################################# + + +class TestReadDataFromS3(hunitest.TestCase): + def test_read_csv1(self) -> None: + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_name = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + # TODO(sonaal): Reorganize all s3 input data, CmampTask5650. + "alphamatic-data", + "data/kibot/all_stocks_1min/RIMG.csv.gz", + ) + hs3.dassert_path_exists(file_name, s3fs) + stream, kwargs = hs3.get_local_or_s3_stream(file_name, s3fs=s3fs) + hpandas.read_csv_to_df(stream, **kwargs) + + @pytest.mark.slow("~15 sec.") + def test_read_parquet1(self) -> None: + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_name = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "data/kibot/pq/sp_500_1min/AAPL.pq", + ) + hs3.dassert_path_exists(file_name, s3fs) + stream, kwargs = hs3.get_local_or_s3_stream(file_name, s3fs=s3fs) + hpandas.read_parquet_to_df(stream, **kwargs) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py new file mode 100644 index 000000000..0e1b813fa --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py @@ -0,0 +1,680 @@ +import logging + +import numpy as np +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_subset_multiindex_df +# ############################################################################# + + +class Test_subset_multiindex_df(hunitest.TestCase): + """ + Filter Multiindex DataFrame with 2 column levels. + """ + + @staticmethod + def get_multiindex_df() -> pd.DataFrame: + timestamp_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + ] + iterables = [["asset1", "asset2"], ["open", "high", "low", "close"]] + index = pd.MultiIndex.from_product(iterables, names=[None, "timestamp"]) + nums = np.array( + [ + [ + 0.77650806, + 0.12492164, + -0.35929232, + 1.04137784, + 0.20099949, + 1.4078602, + -0.1317103, + 0.10023361, + ], + [ + -0.56299812, + 0.79105046, + 0.76612895, + -1.49935339, + -1.05923797, + 0.06039862, + -0.77652117, + 2.04578691, + ], + [ + 0.77348467, + 0.45237724, + 1.61051308, + 0.41800008, + 0.20838053, + -0.48289112, + 1.03015762, + 0.17123323, + ], + [ + 0.40486053, + 0.88037142, + -1.94567068, + -1.51714645, + -0.52759748, + -0.31592803, + 1.50826723, + -0.50215196, + ], + [ + 0.17409714, + -2.13997243, + -0.18530403, + -0.48807381, + 0.5621593, + 0.25899393, + 1.14069646, + 2.07721856, + ], + ] + ) + df = pd.DataFrame(nums, index=timestamp_index, columns=index) + return df + + def test1(self) -> None: + """ + Filter by: + + - Timestamp index range + - Level 1 columns + - Level 2 columns + """ + df = self.get_multiindex_df() + df_filtered = hpandas.subset_multiindex_df( + df, + start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), + end_timestamp=pd.Timestamp("2022-01-01 21:03:00+00:00"), + columns_level0=["asset1"], + columns_level1=["high", "low"], + ) + expected_length = 3 + expected_column_names = [("asset1", "high"), ("asset1", "low")] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:03:00+00:00] + columns=('asset1', 'high'),('asset1', 'low') + shape=(3, 2) + asset1 + timestamp high low + 2022-01-01 21:01:00+00:00 0.124922 -0.359292 + 2022-01-01 21:02:00+00:00 0.791050 0.766129 + 2022-01-01 21:03:00+00:00 0.452377 1.610513 + """ + self.check_df_output( + df_filtered, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test2(self) -> None: + """ + Filter by: + + - Timestamp index range + - Level 1 columns + """ + df = self.get_multiindex_df() + df_filtered = hpandas.subset_multiindex_df( + df, + start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), + end_timestamp=pd.Timestamp("2022-01-01 21:02:00+00:00"), + columns_level1=["close"], + ) + expected_length = 2 + expected_column_names = [("asset1", "close"), ("asset2", "close")] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00] + columns=('asset1', 'close'),('asset2', 'close') + shape=(2, 2) + asset1 asset2 + timestamp close close + 2022-01-01 21:01:00+00:00 1.041378 0.100234 + 2022-01-01 21:02:00+00:00 -1.499353 2.045787 + """ + self.check_df_output( + df_filtered, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test3(self) -> None: + """ + Filter by: + + - Timestamp index range + - Level 2 columns + """ + df = self.get_multiindex_df() + df_filtered = hpandas.subset_multiindex_df( + df, + start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), + end_timestamp=pd.Timestamp("2022-01-01 21:02:00+00:00"), + columns_level0=["asset2"], + ) + expected_length = 2 + expected_column_names = [ + ("asset2", "close"), + ("asset2", "high"), + ("asset2", "low"), + ("asset2", "open"), + ] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00] + columns=('asset2', 'close'),('asset2', 'high'),('asset2', 'low'),('asset2', 'open') + shape=(2, 4) + asset2 + timestamp close high low open + 2022-01-01 21:01:00+00:00 0.100234 1.407860 -0.131710 0.200999 + 2022-01-01 21:02:00+00:00 2.045787 0.060399 -0.776521 -1.059238 + """ + self.check_df_output( + df_filtered, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test4(self) -> None: + """ + Filter by: + + - Level 1 columns + - Level 2 columns + """ + df = self.get_multiindex_df() + df_filtered = hpandas.subset_multiindex_df( + df, + columns_level0=["asset2"], + columns_level1=["low"], + ) + expected_length = 5 + expected_column_names = [("asset2", "low")] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:05:00+00:00] + columns=('asset2', 'low') + shape=(5, 1) + asset2 + timestamp low + 2022-01-01 21:01:00+00:00 -0.131710 + 2022-01-01 21:02:00+00:00 -0.776521 + 2022-01-01 21:03:00+00:00 1.030158 + 2022-01-01 21:04:00+00:00 1.508267 + 2022-01-01 21:05:00+00:00 1.140696 + """ + self.check_df_output( + df_filtered, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test_columns_level0_invalid_input(self) -> None: + df = self.get_multiindex_df() + with self.assertRaises(AssertionError): + hpandas.subset_multiindex_df( + df, + columns_level0=["invalid_input"], + ) + + def test_columns_level1_invalid_input(self) -> None: + df = self.get_multiindex_df() + with self.assertRaises(AssertionError): + hpandas.subset_multiindex_df( + df, + columns_level1=["invalid_input"], + ) + + +# ############################################################################# +# Test_compare_multiindex_dfs +# ############################################################################# + + +class Test_compare_multiindex_dfs(hunitest.TestCase): + """ + Subset Multiindex DataFrames with 2 column levels and compare its values. + """ + + @staticmethod + def get_multiindex_dfs() -> pd.DataFrame: + timestamp_index1 = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + ] + iterables1 = [["asset1", "asset2"], ["open", "high", "low", "close"]] + index1 = pd.MultiIndex.from_product( + iterables1, names=[None, "timestamp"] + ) + nums1 = np.array( + [ + [ + 0.77650806, + 0.12492164, + -0.35929232, + 1.04137784, + 0.20099949, + 1.4078602, + -0.1317103, + 0.10023361, + ], + [ + -0.56299812, + 0.79105046, + 0.76612895, + -1.49935339, + -1.05923797, + 0.06039862, + -0.77652117, + 2.04578691, + ], + [ + 0.77348467, + 0.45237724, + 1.61051308, + 0.41800008, + 0.20838053, + -0.48289112, + 1.03015762, + 0.17123323, + ], + [ + 0.40486053, + 0.88037142, + -1.94567068, + -1.51714645, + -0.52759748, + -0.31592803, + 1.50826723, + -0.50215196, + ], + [ + 0.17409714, + -2.13997243, + -0.18530403, + -0.48807381, + 0.5621593, + 0.25899393, + 1.14069646, + 2.07721856, + ], + ] + ) + df1 = pd.DataFrame(nums1, index=timestamp_index1, columns=index1) + # + timestamp_index2 = [ + pd.Timestamp("2022-01-01 21:00:00+00:00"), + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + ] + iterables2 = [ + ["asset1", "asset2", "asset3"], + ["open", "high", "low", "close", "volume"], + ] + index2 = pd.MultiIndex.from_product( + iterables2, names=[None, "timestamp"] + ) + nums2 = [ + [ + 0.79095104, + -0.10304008, + -0.69848962, + 0.50078409, + 0.41756371, + -1.33487885, + 1.04546138, + 0.191062, + 0.08841533, + 0.61717725, + -2.15558483, + 1.21036169, + 2.60355386, + 0.07508052, + 1.00702849, + ], + [ + 0.56223723, + 0.97433151, + -1.40471182, + 0.53292355, + 0.24381913, + 0.64343069, + -0.46733655, + -1.20471491, + -0.08347491, + 0.33365524, + 0.04370572, + -0.53547653, + -1.07622168, + 0.7318155, + -0.47146482, + ], + [ + -0.48272741, + 1.17859032, + -0.40816664, + 0.46684297, + 0.42518077, + -1.52913855, + 1.09925095, + 0.48817537, + 1.2662552, + -0.59757824, + 0.23724902, + -0.00660826, + 0.09780482, + -0.17166633, + -0.54515917, + ], + [ + -0.37618442, + -0.3086281, + 1.09168123, + -1.1751162, + 0.38291194, + 1.80830268, + 1.28318855, + 0.75696503, + -1.04042572, + 0.06493231, + -0.10392893, + 1.89053412, + -0.21200498, + 1.61212857, + -2.00765278, + ], + [ + -0.19674075, + -1.02532132, + -0.22486018, + 0.37664998, + 0.35619408, + -0.77304675, + 0.59053699, + -1.53249898, + 0.57548424, + -0.32093537, + -0.52109972, + 1.70938034, + -0.55419632, + 0.45531674, + 0.66878119, + ], + [ + 0.05903553, + 1.2040308, + 0.62323671, + -0.23639535, + 0.87270792, + 2.60253287, + -0.77788842, + 0.80645833, + 1.85438743, + -1.77561587, + 0.41469478, + -0.29791883, + 0.75140743, + 0.50389702, + 0.55311024, + ], + [ + -0.97820763, + -1.32155197, + -0.6143911, + 0.01473404, + 0.87798665, + 0.1701048, + -0.75376376, + 0.72503616, + 0.5791076, + 0.43942739, + 0.62505817, + 0.44998739, + 0.37350664, + -0.73485633, + -0.70406184, + ], + [ + -1.35719477, + -1.82401288, + 0.77263763, + 2.36399552, + -0.45353019, + 0.33983713, + -0.62895329, + 1.34256611, + 0.2207564, + 0.24146184, + 0.90769186, + 0.57426869, + -0.04587782, + -1.6319128, + 0.38094798, + ], + ] + df2 = pd.DataFrame(nums2, index=timestamp_index2, columns=index2) + return df1, df2 + + def test1(self) -> None: + """ + - Subset by both columns and index + - Make inner intersection and compute pct_change + """ + df1, df2 = self.get_multiindex_dfs() + subset_multiindex_df_kwargs = { + "start_timestamp": pd.Timestamp("2022-01-01 21:02:00+00:00"), + "end_timestamp": pd.Timestamp("2022-01-01 21:04:00+00:00"), + "columns_level0": ["asset1", "asset2"], + "columns_level1": ["low", "high"], + } + compare_dfs_kwargs = { + "column_mode": "inner", + "row_mode": "inner", + "diff_mode": "pct_change", + "assert_diff_threshold": None, + } + df_diff = hpandas.compare_multiindex_dfs( + df1, + df2, + subset_multiindex_df_kwargs=subset_multiindex_df_kwargs, + compare_dfs_kwargs=compare_dfs_kwargs, + ) + expected_length = 3 + expected_column_names = [ + ("asset1.pct_change", "high.pct_change"), + ("asset1.pct_change", "low.pct_change"), + ("asset2.pct_change", "high.pct_change"), + ("asset2.pct_change", "low.pct_change"), + ] + expected_column_unique_values = None + expected_signature = r"""# df= + index=[2022-01-01 21:02:00+00:00, 2022-01-01 21:04:00+00:00] + columns=('asset1.pct_change', 'high.pct_change'),('asset1.pct_change', 'low.pct_change'),('asset2.pct_change', 'high.pct_change'),('asset2.pct_change', 'low.pct_change') + shape=(3, 4) + asset1.pct_change asset2.pct_change + timestamp high.pct_change low.pct_change high.pct_change low.pct_change + 2022-01-01 21:02:00+00:00 -32.881643 287.700041 -94.505475 -259.066028 + 2022-01-01 21:03:00+00:00 246.576815 47.525948 -137.632125 36.090517 + 2022-01-01 21:04:00+00:00 185.862978 -765.280229 -153.498432 198.418808 + """ + self.check_df_output( + df_diff, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + +# ############################################################################# +# Test_multiindex_df_info1 +# ############################################################################# + + +class Test_multiindex_df_info1(hunitest.TestCase): + @staticmethod + def get_multiindex_df_with_datetime_index() -> pd.DataFrame: + datetime_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + ] + iterables = [["asset1", "asset2"], ["open", "high", "low", "close"]] + index = pd.MultiIndex.from_product(iterables, names=[None, "timestamp"]) + nums = np.array( + [ + [ + 0.77650806, + 0.12492164, + -0.35929232, + 1.04137784, + 0.20099949, + 1.4078602, + -0.1317103, + 0.10023361, + ], + [ + -0.56299812, + 0.79105046, + 0.76612895, + -1.49935339, + -1.05923797, + 0.06039862, + -0.77652117, + 2.04578691, + ], + [ + 0.77348467, + 0.45237724, + 1.61051308, + 0.41800008, + 0.20838053, + -0.48289112, + 1.03015762, + 0.17123323, + ], + [ + 0.40486053, + 0.88037142, + -1.94567068, + -1.51714645, + -0.52759748, + -0.31592803, + 1.50826723, + -0.50215196, + ], + [ + 0.17409714, + -2.13997243, + -0.18530403, + -0.48807381, + 0.5621593, + 0.25899393, + 1.14069646, + 2.07721856, + ], + ] + ) + df = pd.DataFrame(nums, index=datetime_index, columns=index) + return df + + @staticmethod + def get_multiindex_df_with_non_datetime_index() -> pd.DataFrame: + non_datetime_index = ["M", "N"] + index = pd.MultiIndex.from_product([["A", "B"], ["X", "Y"]]) + data = [[1, 2, 3, 4], [5, 6, 7, 8]] + df = pd.DataFrame(data, index=non_datetime_index, columns=index) + return df + + def test1(self) -> None: + """ + Test DataFrame with a datetime index. + """ + df = self.get_multiindex_df_with_datetime_index() + actual = hpandas.multiindex_df_info(df) + # This is required by `pandas` >= 2.2. + expected = """ + shape=2 x 4 x 5 + columns_level0=2 ['asset1', 'asset2'] + columns_level1=4 ['close', 'high', 'low', 'open'] + rows=5 ['2022-01-01 21:01:00+00:00', '2022-01-01 21:02:00+00:00', '2022-01-01 21:03:00+00:00', '2022-01-01 21:04:00+00:00', '2022-01-01 21:05:00+00:00'] + start_timestamp=2022-01-01 21:01:00+00:00 + end_timestamp=2022-01-01 21:05:00+00:00 + frequency=min + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + Test DataFrame with a non-frequency datetime index. + """ + df = self.get_multiindex_df_with_datetime_index() + non_frequency_datetime_index = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:04:30+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + ] + df.index = non_frequency_datetime_index + actual = hpandas.multiindex_df_info(df) + expected = """ + shape=2 x 4 x 5 + columns_level0=2 ['asset1', 'asset2'] + columns_level1=4 ['close', 'high', 'low', 'open'] + rows=5 ['2022-01-01 21:01:00+00:00', '2022-01-01 21:02:00+00:00', '2022-01-01 21:04:00+00:00', '2022-01-01 21:04:30+00:00', '2022-01-01 21:06:00+00:00'] + start_timestamp=2022-01-01 21:01:00+00:00 + end_timestamp=2022-01-01 21:06:00+00:00 + frequency=None + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Test DataFrame with a non-datetime index. + """ + df = self.get_multiindex_df_with_non_datetime_index() + actual = hpandas.multiindex_df_info(df) + expected = """ + shape=2 x 2 x 2 + columns_level0=2 ['A', 'B'] + columns_level1=2 ['X', 'Y'] + rows=2 ['M', 'N'] + """ + self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py new file mode 100644 index 000000000..f0295958f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py @@ -0,0 +1,426 @@ +import logging +from typing import Dict, List + +import pandas as pd + +import helpers.hprint as hprint +import helpers.hpandas as hpandas +import helpers.hpandas_stats as hpanstat +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_compute_duration_df +# ############################################################################# + + +class Test_compute_duration_df(hunitest.TestCase): + """ + Compute timestamp stats from dfs and check the intersection. + """ + + @staticmethod + def get_dict_with_dfs() -> Dict[str, pd.DataFrame]: + timestamp_index1 = [ + pd.Timestamp("2022-01-01 21:00:00+00:00"), + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + pd.Timestamp("2022-01-01 21:06:00+00:00"), + ] + timestamp_index2 = [ + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + pd.Timestamp("2022-01-01 21:05:00+00:00"), + ] + timestamp_index3 = [ + pd.Timestamp("2022-01-01 21:01:00+00:00"), + pd.Timestamp("2022-01-01 21:02:00+00:00"), + pd.Timestamp("2022-01-01 21:03:00+00:00"), + pd.Timestamp("2022-01-01 21:04:00+00:00"), + ] + # + value1 = {"value1": [None, None, 1, 2, 3, 4, 5, None]} + value2 = {"value2": [1, 2, 3, None]} + value3 = {"value3": [None, None, 1, 2]} + # + df1 = pd.DataFrame(value1, index=timestamp_index1) + df2 = pd.DataFrame(value2, index=timestamp_index2) + df3 = pd.DataFrame(value3, index=timestamp_index3) + # + tag_to_df = { + "tag1": df1, + "tag2": df2, + "tag3": df3, + } + return tag_to_df + + def helper( + self, + valid_intersect: bool, + expected_start_timestamp: pd.Timestamp, + expected_end_timestamp: pd.Timestamp, + ) -> None: + """ + Checks if the intersection is valid and the same amongst all dfs. + """ + tag_to_df = self.get_dict_with_dfs() + _, tag_dfs = hpandas.compute_duration_df( + tag_to_df, valid_intersect=valid_intersect, intersect_dfs=True + ) + # Collect all start timestamps. + start_timestamps = [tag_dfs[tag].index.min() for tag in tag_dfs] + # Check that all start timestamps are equal. + start_equal = all( + element == start_timestamps[0] for element in start_timestamps + ) + self.assertTrue(start_equal) + # Check that start intersection is correct. + required_start_intersection = expected_start_timestamp + self.assertEqual(start_timestamps[0], required_start_intersection) + # Collect all end timestamps. + end_timestamps = [tag_dfs[tag].index.max() for tag in tag_dfs] + # Check that all end timestamps are equal. + end_equal = all( + element == end_timestamps[0] for element in end_timestamps + ) + self.assertTrue(end_equal) + # Check that end intersection is correct. + required_end_intersection = expected_end_timestamp + self.assertEqual(end_timestamps[0], required_end_intersection) + + def test1(self) -> None: + """ + Check only timestamp stats. + """ + tag_to_df = self.get_dict_with_dfs() + df_stats, _ = hpandas.compute_duration_df(tag_to_df) + expected_length = 3 + expected_column_names = [ + "max_index", + "max_valid_index", + "min_index", + "min_valid_index", + ] + expected_column_unique_values = None + expected_signature = r""" + # df= + index=[tag1, tag3] + columns=min_index,max_index,min_valid_index,max_valid_index + shape=(3, 4) + min_index max_index min_valid_index max_valid_index + tag1 2022-01-01 21:00:00+00:00 2022-01-01 21:06:00+00:00 2022-01-01 21:02:00+00:00 2022-01-01 21:06:00+00:00 + tag2 2022-01-01 21:02:00+00:00 2022-01-01 21:05:00+00:00 2022-01-01 21:02:00+00:00 2022-01-01 21:04:00+00:00 + tag3 2022-01-01 21:01:00+00:00 2022-01-01 21:04:00+00:00 2022-01-01 21:03:00+00:00 2022-01-01 21:04:00+00:00 + """ + expected_signature = hprint.dedent(expected_signature) + self.check_df_output( + df_stats, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test2(self) -> None: + """ + Modify initial DataFrames in dictionary with non-valid intersection + (incl NaNs). + """ + valid_intersect = False + expected_start_timestamp = pd.Timestamp("2022-01-01 21:02:00+00:00") + expected_end_timestamp = pd.Timestamp("2022-01-01 21:04:00+00:00") + self.helper( + valid_intersect, expected_start_timestamp, expected_end_timestamp + ) + + def test3(self) -> None: + """ + Modify initial DataFrames in dictionary with valid intersection + (excluding NaNs). + """ + valid_intersect = True + expected_start_timestamp = pd.Timestamp("2022-01-01 21:03:00+00:00") + expected_end_timestamp = pd.Timestamp("2022-01-01 21:04:00+00:00") + self.helper( + valid_intersect, expected_start_timestamp, expected_end_timestamp + ) + + +# ############################################################################# +# Test_compute_weighted_sum +# ############################################################################# + + +class Test_compute_weighted_sum(hunitest.TestCase): + def helper( + self, + index1: List[int], + index2: List[int], + weights_data: Dict[str, List[float]], + index_mode: str, + expected_signature: str, + ) -> None: + """ + Build inputs and check that function output is correct. + """ + # Create test data. + data1 = {"A": [1, 2], "B": [3, 4]} + df1 = pd.DataFrame(data1, index=index1) + data2 = {"A": [5, 6], "B": [7, 8]} + df2 = pd.DataFrame(data2, index=index2) + dfs = {"df1": df1, "df2": df2} + # Create weights DataFrame. + weights = pd.DataFrame(weights_data, index=dfs.keys()) + # Run the function. + weighted_sums = hpandas.compute_weighted_sum( + dfs=dfs, weights=weights, index_mode=index_mode + ) + actual_signature = str(weighted_sums) + self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) + + def test1(self) -> None: + """ + Check that weighted sums are computed correctly. + + index_mode = "assert_equal". + """ + index1 = [0, 1] + index2 = [0, 1] + weights_data = {"w1": [0.2, 0.8]} + index_mode = "assert_equal" + expected_signature = r""" + {'w1': A B + 0 4.2 6.2 + 1 5.2 7.2} + """ + expected_signature = hprint.dedent(expected_signature) + self.helper(index1, index2, weights_data, index_mode, expected_signature) + + def test2(self) -> None: + """ + Check that weighted sums are computed correctly. + + index_mode = "intersect". + """ + index1 = [0, 1] + index2 = [0, 2] + weights_data = {"w1": [0.2, 0.8], "w2": [0.5, 0.5]} + index_mode = "intersect" + expected_signature = r""" + {'w1': A B + 0 4.2 6.2 + 1 NaN NaN + 2 NaN NaN, 'w2': A B + 0 3.0 5.0 + 1 NaN NaN + 2 NaN NaN} + """ + expected_signature = hprint.dedent(expected_signature) + self.helper(index1, index2, weights_data, index_mode, expected_signature) + + def test3(self) -> None: + """ + Check that weighted sums are computed correctly. + + index_mode = "leave_unchanged". + """ + index1 = [0, 1] + index2 = [2, 3] + weights_data = {"w1": [0.2, 0.8]} + index_mode = "leave_unchanged" + expected_signature = r""" + {'w1': A B + 0 NaN NaN + 1 NaN NaN + 2 NaN NaN + 3 NaN NaN} + """ + expected_signature = hprint.dedent(expected_signature) + self.helper(index1, index2, weights_data, index_mode, expected_signature) + + def test4(self) -> None: + """ + Check that an assertion is raised if input is an empty dict. + """ + dfs: Dict[str, pd.DataFrame] = {} + weights_data = {"w1": [0.2, 0.8]} + index_mode = "assert_equal" + with self.assertRaises(AssertionError) as cm: + hpandas.compute_weighted_sum( + dfs=dfs, + weights=pd.DataFrame(weights_data), + index_mode=index_mode, + ) + actual_signature = str(cm.exception) + expected_signature = r""" + * Failed assertion * + cond={} + dictionary of dfs must be nonempty + """ + expected_signature = hprint.dedent(expected_signature) + self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) + + +# ############################################################################# +# Test_get_value_counts_stats_df +# ############################################################################# + + +class Test_get_value_counts_stats_df(hunitest.TestCase): + """ + Test value counts statistics computation. + """ + + def helper( + self, + category_data: List[str], + num_rows: int, + expected: str, + ) -> None: + """ + Test value counts with given parameters. + """ + # Prepare inputs. + df = pd.DataFrame({"category": category_data}) + # Run test. + result_df = hpandas.get_value_counts_stats_df( + df, "category", num_rows=num_rows + ) + # Check outputs. + actual = str(result_df) + expected = hprint.dedent(expected) + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test basic value counts with default parameters. + """ + # Prepare inputs. + category_data = ["A", "B", "A", "C", "A", "B", "D", "A", "C", "A"] + num_rows = 10 + # Prepare outputs. + expected = """ + count pct [%] + category + A 5 50.0 + B 2 20.0 + C 2 20.0 + D 1 10.0 + """ + # Run test. + self.helper(category_data, num_rows, expected) + + def test2(self) -> None: + """ + Test limiting the number of rows returned. + """ + # Prepare inputs. + category_data = ["A", "B", "A", "C", "A", "B", "D", "A", "C", "A"] + num_rows = 2 + # Prepare outputs. + expected = """ + count pct [%] + category + A 5 50.0 + B 2 20.0 + """ + # Run test. + self.helper(category_data, num_rows, expected) + + def test3(self) -> None: + """ + Test with num_rows=0 to return all rows. + """ + # Prepare inputs. + category_data = ["A", "B", "A", "C", "A", "B"] + num_rows = 0 + # Prepare outputs. + expected = """ + count pct [%] + category + A 3 50.000000 + B 2 33.333333 + C 1 16.666667 + """ + # Run test. + self.helper(category_data, num_rows, expected) + + +# ############################################################################# +# Test__get_unique_values_stats +# ############################################################################# + + +class Test__get_unique_values_stats(hunitest.TestCase): + """ + Test unique values count and percentage computation. + """ + + def helper(self, df_data: Dict, expected: str) -> None: + """ + Test unique values stats computation. + """ + # Prepare inputs. + df = pd.DataFrame(df_data) + # Run test. + result_df = hpanstat._get_unique_values_stats(df) + # Check outputs. + actual = str(result_df) + expected = hprint.dedent(expected) + self.assert_equal(actual, expected, dedent=True) + + def test1(self) -> None: + """ + Test basic unique values computation. + """ + df_data = { + "col1": [1, 2, 1, 3, 1], + "col2": ["a", "b", "a", "c", "d"], + "col3": [1.0, 1.0, 1.0, 1.0, 1.0], + } + expected = """ + num_unique unique [%] + col1 3 60.0 + col2 4 80.0 + col3 1 20.0 + """ + self.helper(df_data, expected) + + def test2(self) -> None: + """ + Test with NaN values. + """ + df_data = { + "col1": [1, 2, 1, None, 1], + "col2": ["a", "b", "a", None, "c"], + } + expected = """ + num_unique unique [%] + col1 2 40.0 + col2 3 60.0 + """ + self.helper(df_data, expected) + + def test3(self) -> None: + """ + Test with single unique value. + """ + df_data = { + "col1": [5, 5, 5, 5], + "col2": ["x", "x", "x", "x"], + } + expected = """ + num_unique unique [%] + col1 1 25.0 + col2 1 25.0 + """ + self.helper(df_data, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py new file mode 100644 index 000000000..f11d6988a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py @@ -0,0 +1,1888 @@ +import csv +import io +import logging +import re +import time +from typing import Any, Dict, List, Optional, Tuple + +import numpy as np +import pandas as pd +import pytest + +import helpers.hdatetime as hdateti +import helpers.hpandas as hpandas +import helpers.hpandas_transform as hpantran +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_trim_df1 +# ############################################################################# + + +class Test_trim_df1(hunitest.TestCase): + def get_df(self, *args: Any, **kwargs: Any) -> pd.DataFrame: + """ + Return a df where the CSV txt is read verbatim without inferring dates. + + The `start_time` column is thus a str. + """ + txt = """ + ,start_time,egid,close + 4,2022-01-04 21:38:00.000000,13684,1146.48 + 8,2022-01-04 21:38:00.000000,17085,179.45 + 14,2022-01-04 21:37:00.000000,13684,1146.26 + 18,2022-01-04 21:37:00.000000,17085,179.42 + 24,2022-01-04 21:36:00.000000,13684,1146.0 + 27,2022-01-04 21:36:00.000000,17085,179.46 + 34,2022-01-04 21:35:00.000000,13684,1146.0 + 38,2022-01-04 21:35:00.000000,17085,179.42 + 40,2022-01-04 21:34:00.000000,17085,179.42 + 44,2022-01-04 21:34:00.000000,13684,1146.0 + """ + txt = hprint.dedent(txt) + df = pd.read_csv(io.StringIO(txt), *args, index_col=0, **kwargs) + df["start_time"] = pd.to_datetime(df["start_time"]) + return df + + def test_types1(self) -> None: + """ + Check the types of a df coming from `read_csv()`. + + The timestamps in `start_time` are left as strings. + """ + df = self.get_df() + # + actual = hpandas.df_to_str( + df, print_dtypes=True, print_shape_info=True, tag="df" + ) + expected = r"""# df= + index=[4, 44] + columns=start_time,egid,close + shape=(10, 3) + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 + 1 start_time datetime64[ns] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 + 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 + 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + ... + 38 2022-01-04 21:35:00 17085 179.42 + 40 2022-01-04 21:34:00 17085 179.42 + 44 2022-01-04 21:34:00 13684 1146.00""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def get_df_with_parse_dates(self) -> pd.DataFrame: + """ + Read the CSV parsing `start_time` as timestamps. + + The inferred type is a nasty `datetime64` which is not as well- + behaved as our beloved `pd.Timestamp`. + """ + df = self.get_df(parse_dates=["start_time"]) + return df + + def test_types2(self) -> None: + """ + Check the types of a df coming from `read_csv()` forcing parsing some + values as dates. + """ + df = self.get_df_with_parse_dates() + # Check. + actual = hpandas.df_to_str( + df, print_dtypes=True, print_shape_info=True, tag="df" + ) + expected = r"""# df= + index=[4, 44] + columns=start_time,egid,close + shape=(10, 3) + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 + 1 start_time datetime64[ns] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 + 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 + 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + ... + 38 2022-01-04 21:35:00 17085 179.42 + 40 2022-01-04 21:34:00 17085 179.42 + 44 2022-01-04 21:34:00 13684 1146.00""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def get_df_with_tz_timestamp(self) -> pd.DataFrame: + """ + Force the column parsed as `datetime64` into a tz-aware object. + + The resulting object is a `datetime64[ns, tz]`. + """ + df = self.get_df_with_parse_dates() + # Apply the tz. + col_name = "start_time" + df[col_name] = ( + df[col_name].dt.tz_localize("UTC").dt.tz_convert("America/New_York") + ) + df[col_name] = pd.to_datetime(df[col_name]) + return df + + def test_types3(self) -> None: + """ + Check the types of a df coming from `read_csv()` after conversion to + tz-aware objects. + """ + df = self.get_df_with_tz_timestamp() + # Check. + actual = hpandas.df_to_str( + df, print_dtypes=True, print_shape_info=True, tag="df" + ) + expected = r"""# df= + index=[4, 44] + columns=start_time,egid,close + shape=(10, 3) + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 + 1 start_time datetime64[ns, America/New_York] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 + 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 + 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 + start_time egid close + 4 2022-01-04 16:38:00-05:00 13684 1146.48 + 8 2022-01-04 16:38:00-05:00 17085 179.45 + 14 2022-01-04 16:37:00-05:00 13684 1146.26 + ... + 38 2022-01-04 16:35:00-05:00 17085 179.42 + 40 2022-01-04 16:34:00-05:00 17085 179.42 + 44 2022-01-04 16:34:00-05:00 13684 1146.00""" + self.assert_equal(actual, expected, fuzzy_match=True) + + # ////////////////////////////////////////////////////////////////////////////// + + def helper( + self, + df: pd.DataFrame, + ts_col_name: Optional[str], + start_ts: Optional[pd.Timestamp], + end_ts: Optional[pd.Timestamp], + left_close: bool, + right_close: bool, + expected: str, + ) -> None: + """ + Run trimming and check the outcome. + + See param description in `hpandas.trim_df`. + + :param expected: the expected oucome of the trimming + """ + df_trim = hpandas.trim_df( + df, ts_col_name, start_ts, end_ts, left_close, right_close + ) + actual = hpandas.df_to_str(df_trim, print_shape_info=True, tag="df_trim") + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_trim_df1(self) -> None: + """ + Test trimming: baseline case. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[4, 38] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + ... + 27 2022-01-04 21:36:00 17085 179.46 + 34 2022-01-04 21:35:00 13684 1146.00 + 38 2022-01-04 21:35:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df2(self) -> None: + """ + Trim a df with a column that is `datetime64` without tz using a + `pd.Timestamp` without tz. + + This operation is valid. + """ + df = self.get_df_with_parse_dates() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[4, 38] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + ... + 27 2022-01-04 21:36:00 17085 179.46 + 34 2022-01-04 21:35:00 13684 1146.00 + 38 2022-01-04 21:35:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df3(self) -> None: + """ + Trim a df with a column that is `datetime64` with tz vs a `pd.Timestamp + with tz. + + This operation is valid. + """ + df = self.get_df_with_tz_timestamp() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00", tz="UTC") + end_ts = pd.Timestamp("2022-01-04 21:38:00", tz="UTC") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[4, 38] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 4 2022-01-04 16:38:00-05:00 13684 1146.48 + 8 2022-01-04 16:38:00-05:00 17085 179.45 + 14 2022-01-04 16:37:00-05:00 13684 1146.26 + ... + 27 2022-01-04 16:36:00-05:00 17085 179.46 + 34 2022-01-04 16:35:00-05:00 13684 1146.00 + 38 2022-01-04 16:35:00-05:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + # pylint: disable=line-too-long + def test_trim_df4(self) -> None: + """ + Trim a df with a column that is `datetime64` with tz vs a + `pd.Timestamp` without tz. + + This operation is invalid and we expect an assertion. + """ + df = self.get_df_with_tz_timestamp() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + with self.assertRaises(TypeError) as cm: + hpandas.trim_df( + df, ts_col_name, start_ts, end_ts, left_close, right_close + ) + # Check. + actual = str(cm.exception) + expected = r""" + Invalid comparison between dtype=datetime64[ns, America/New_York] and Timestamp""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_trim_df5(self) -> None: + """ + Test filtering on the index. + """ + df = self.get_df() + df = df.set_index("start_time") + # Run. + ts_col_name = None + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] + columns=egid,close + shape=(8, 2) + egid close + start_time + 2022-01-04 21:38:00 13684 1146.48 + 2022-01-04 21:38:00 17085 179.45 + 2022-01-04 21:37:00 13684 1146.26 + ... + 2022-01-04 21:36:00 17085 179.46 + 2022-01-04 21:35:00 13684 1146.00 + 2022-01-04 21:35:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df6(self) -> None: + """ + Test excluding the lower boundary. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = False + right_close = True + expected = r"""# df_trim= + index=[4, 27] + columns=start_time,egid,close + shape=(6, 3) + start_time egid close + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45 + 14 2022-01-04 21:37:00 13684 1146.26 + 18 2022-01-04 21:37:00 17085 179.42 + 24 2022-01-04 21:36:00 13684 1146.00 + 27 2022-01-04 21:36:00 17085 179.46""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df7(self) -> None: + """ + Test excluding the upper boundary. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = False + expected = r"""# df_trim= + index=[14, 38] + columns=start_time,egid,close + shape=(6, 3) + start_time egid close + 14 2022-01-04 21:37:00 13684 1146.26 + 18 2022-01-04 21:37:00 17085 179.42 + 24 2022-01-04 21:36:00 13684 1146.00 + 27 2022-01-04 21:36:00 17085 179.46 + 34 2022-01-04 21:35:00 13684 1146.00 + 38 2022-01-04 21:35:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df8(self) -> None: + """ + Test filtering on a sorted column. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + df = df.sort_values(ts_col_name) + expected = r"""# df_trim= + index=[4, 38] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 34 2022-01-04 21:35:00 13684 1146.00 + 38 2022-01-04 21:35:00 17085 179.42 + 24 2022-01-04 21:36:00 13684 1146.00 + ... + 18 2022-01-04 21:37:00 17085 179.42 + 4 2022-01-04 21:38:00 13684 1146.48 + 8 2022-01-04 21:38:00 17085 179.45""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df9(self) -> None: + """ + Test filtering on a sorted index. + """ + df = self.get_df() + df = df.set_index("start_time") + # Run. + ts_col_name = None + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = True + right_close = True + df = df.sort_index() + expected = r"""# df_trim= + index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] + columns=egid,close + shape=(8, 2) + egid close + start_time + 2022-01-04 21:35:00 13684 1146.00 + 2022-01-04 21:35:00 17085 179.42 + 2022-01-04 21:36:00 13684 1146.00 + ... + 2022-01-04 21:37:00 17085 179.42 + 2022-01-04 21:38:00 13684 1146.48 + 2022-01-04 21:38:00 17085 179.45""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df10(self) -> None: + """ + Test filtering on a sorted index, excluding lower and upper boundaries. + """ + df = self.get_df() + df = df.set_index("start_time") + # Run. + ts_col_name = None + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + left_close = False + right_close = False + df = df.sort_index() + expected = r"""# df_trim= + index=[2022-01-04 21:36:00, 2022-01-04 21:37:00] + columns=egid,close + shape=(4, 2) + egid close + start_time + 2022-01-04 21:36:00 13684 1146.00 + 2022-01-04 21:36:00 17085 179.46 + 2022-01-04 21:37:00 13684 1146.26 + 2022-01-04 21:37:00 17085 179.42""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df11(self) -> None: + """ + Test filtering on a non-sorted column, with `start_ts` being None. + """ + df = self.get_df() + # Run. + ts_col_name = "start_time" + start_ts = None + end_ts = pd.Timestamp("2022-01-04 21:37:00") + left_close = True + right_close = True + expected = r"""# df_trim= + index=[14, 44] + columns=start_time,egid,close + shape=(8, 3) + start_time egid close + 14 2022-01-04 21:37:00 13684 1146.26 + 18 2022-01-04 21:37:00 17085 179.42 + 24 2022-01-04 21:36:00 13684 1146.00 + ... + 38 2022-01-04 21:35:00 17085 179.42 + 40 2022-01-04 21:34:00 17085 179.42 + 44 2022-01-04 21:34:00 13684 1146.00""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + def test_trim_df12(self) -> None: + """ + Test filtering on a sorted index, with `end_ts` being None. + """ + df = self.get_df() + df = df.set_index("start_time") + # Run. + ts_col_name = None + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = None + left_close = True + right_close = True + df = df.sort_index() + expected = r"""# df_trim= + index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] + columns=egid,close + shape=(8, 2) + egid close + start_time + 2022-01-04 21:35:00 13684 1146.00 + 2022-01-04 21:35:00 17085 179.42 + 2022-01-04 21:36:00 13684 1146.00 + ... + 2022-01-04 21:37:00 17085 179.42 + 2022-01-04 21:38:00 13684 1146.48 + 2022-01-04 21:38:00 17085 179.45""" + self.helper( + df, ts_col_name, start_ts, end_ts, left_close, right_close, expected + ) + + +# ############################################################################# +# Test_trim_df2 +# ############################################################################# + + +@pytest.mark.skip( + "Used for comparing speed of different trimming methods (CmTask1404)." +) +class Test_trim_df2(Test_trim_df1): + """ + Test the speed of different approaches to df trimming. + """ + + def get_data( + self, set_as_index: bool, sort: bool + ) -> Tuple[pd.DataFrame, str, pd.Timestamp, pd.Timestamp]: + """ + Get the data for experiments. + + :param set_as_index: whether to set the filtering values as + index + :param sort: whether to sort the filtering values + :return: the df to trim, the parameters for trimming + """ + # Get a large df. + df = self.get_df() + df = df.loc[df.index.repeat(100000)].reset_index(drop=True) + # Define the params. + ts_col_name = "start_time" + start_ts = pd.Timestamp("2022-01-04 21:35:00") + end_ts = pd.Timestamp("2022-01-04 21:38:00") + # Prepare the data. + if set_as_index: + df = df.set_index(ts_col_name, append=True, drop=False) + if sort: + df = df.sort_index(level=ts_col_name) + elif sort: + df = df.sort_values(ts_col_name) + return df, ts_col_name, start_ts, end_ts + + def check_trimmed_df( + self, + df: pd.DataFrame, + ts_col_name: str, + start_ts: pd.Timestamp, + end_ts: pd.Timestamp, + ) -> None: + """ + Confirm that the trimmed df matches what is expected. + + The trimmed df is compared to the one produced by + `hpandas.trim_df()` with lower and upper boundaries included. + Thus, it is ensured that all the trimming methods produce the + same output. + + See param descriptions in `hpandas.trim_df()`. + + :param df: the df trimmed in a test, to compare with the + `hpandas.trim_df()` one + """ + # Clean up the df from the test. + if df.index.nlevels > 1: + df = df.droplevel(ts_col_name) + df = df.reset_index(drop=True) + df = df.sort_values(by=[ts_col_name, "egid"], ascending=[False, True]) + # Get the reference trimmed df. + left_close = True + right_close = True + df_trim_for_comparison = hpandas.trim_df( + df, ts_col_name, start_ts, end_ts, left_close, right_close + ) + assert df.equals(df_trim_for_comparison) + + def test_simple_mask_col(self) -> None: + """ + Trim with a simple mask; filtering on a column. + """ + set_as_index = False + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + mask = df[ts_col_name] >= start_ts + df = df[mask] + if not df.empty: + mask = df[ts_col_name] <= end_ts + df = df[mask] + end_time = time.time() + _LOG.info( + "Simple mask trim (column): %.2f seconds", (end_time - start_time) + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_simple_mask_idx(self) -> None: + """ + Trim with a simple mask; filtering on an index. + """ + set_as_index = True + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + mask = df.index.get_level_values(ts_col_name) >= start_ts + df = df[mask] + if not df.empty: + mask = df.index.get_level_values(ts_col_name) <= end_ts + df = df[mask] + end_time = time.time() + _LOG.info( + "Simple mask trim (index): %.2f seconds", (end_time - start_time) + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_between_col(self) -> None: + """ + Trim using `pd.Series.between`; filtering on a column. + """ + set_as_index = False + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df[df[ts_col_name].between(start_ts, end_ts, inclusive="both")] + end_time = time.time() + _LOG.info( + "`pd.Series.between` trim (column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_between_idx(self) -> None: + """ + Trim using `pd.Series.between`; filtering on an index. + """ + set_as_index = True + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + filter_values = pd.Series( + df.index.get_level_values(ts_col_name) + ).between(start_ts, end_ts, inclusive="both") + df = df.droplevel(ts_col_name) + df = df[filter_values] + end_time = time.time() + _LOG.info( + "`pd.Series.between` trim (index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_truncate_non_sorted_col(self) -> None: + """ + Trim using `pd.DataFrame.truncate`; filtering on a non-sorted column. + """ + set_as_index = False + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df.set_index(df[ts_col_name], append=True).sort_index( + level=ts_col_name + ) + df = df.swaplevel() + df = df.truncate(before=start_ts, after=end_ts) + end_time = time.time() + _LOG.info( + "`pd.DataFrame.truncate` trim (non-sorted column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_truncate_non_sorted_idx(self) -> None: + """ + Trim using `pd.DataFrame.truncate`; filtering on a non-sorted index. + """ + set_as_index = True + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + df = df.swaplevel() + # Run. + start_time = time.time() + df = df.sort_index(level=ts_col_name) + df = df.truncate(before=start_ts, after=end_ts) + end_time = time.time() + _LOG.info( + "`pd.DataFrame.truncate` trim (non-sorted index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_truncate_sorted_col(self) -> None: + """ + Trim using `pd.DataFrame.truncate`; filtering on a sorted column. + """ + set_as_index = False + sort = True + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df.set_index(ts_col_name, drop=False) + df = df.truncate(before=start_ts, after=end_ts) + end_time = time.time() + _LOG.info( + "`pd.DataFrame.truncate` trim (sorted column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_truncate_sorted_idx(self) -> None: + """ + Trim using `pd.DataFrame.truncate`; filtering on a sorted index. + """ + set_as_index = True + sort = True + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + df = df.swaplevel() + # Run. + start_time = time.time() + df = df.truncate(before=start_ts, after=end_ts) + end_time = time.time() + _LOG.info( + "`pd.DataFrame.truncate` trim (sorted index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_searchsorted_non_sorted_col(self) -> None: + """ + Trim using `pd.Series.searchsorted`; filtering on a non-sorted column. + """ + set_as_index = False + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df.sort_values(ts_col_name, ascending=True) + left_idx = df[ts_col_name].searchsorted(start_ts, side="left") + right_idx = df[ts_col_name].searchsorted(end_ts, side="right") + df = df.iloc[left_idx:right_idx] + end_time = time.time() + _LOG.info( + "`pd.Series.searchsorted` trim (non-sorted column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_searchsorted_non_sorted_idx(self) -> None: + """ + Trim using `pd.Series.searchsorted`; filtering on a non-sorted index. + """ + set_as_index = True + sort = False + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + df = df.sort_index(level=ts_col_name) + left_idx = df.index.get_level_values(ts_col_name).searchsorted( + start_ts, side="left" + ) + right_idx = df.index.get_level_values(ts_col_name).searchsorted( + end_ts, side="right" + ) + df = df.iloc[left_idx:right_idx] + end_time = time.time() + _LOG.info( + "`pd.Series.searchsorted` trim (non-sorted index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_searchsorted_sorted_col(self) -> None: + """ + Trim using `pd.Series.searchsorted`; filtering on a sorted column. + """ + set_as_index = False + sort = True + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + left_idx = df[ts_col_name].searchsorted(start_ts, side="left") + right_idx = df[ts_col_name].searchsorted(end_ts, side="right") + df = df.iloc[left_idx:right_idx] + end_time = time.time() + _LOG.info( + "`pd.Series.searchsorted` trim (sorted column): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + def test_searchsorted_sorted_idx(self) -> None: + """ + Trim using `pd.Series.searchsorted`; filtering on a sorted index. + """ + set_as_index = True + sort = True + df, ts_col_name, start_ts, end_ts = self.get_data( + set_as_index=set_as_index, sort=sort + ) + # Run. + start_time = time.time() + left_idx = df.index.get_level_values(ts_col_name).searchsorted( + start_ts, side="left" + ) + right_idx = df.index.get_level_values(ts_col_name).searchsorted( + end_ts, side="right" + ) + df = df.iloc[left_idx:right_idx] + end_time = time.time() + _LOG.info( + "`pd.Series.searchsorted` trim (sorted index): %.2f seconds", + (end_time - start_time), + ) + # Check. + self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) + + +# ############################################################################# +# Test_assemble_df_rows +# ############################################################################# + + +class Test_assemble_df_rows(hunitest.TestCase): + """ + Test assembing df values into a column-row structure. + """ + + @staticmethod + def get_rows_values_example(df_as_str: str) -> hpantran.RowsValues: + """ + Prepare the input. + """ + # Separate the rows. + rows = df_as_str.split("\n") + # Clean up extra spaces. + rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] + # Identify individual values in the rows. + rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) + return rows_values + + def test1(self) -> None: + """ + Test unnamed index, compact df. + """ + # Get the input. + df_as_str = """ + col1 col2 col3 col4 + 0 0.1 0.1 0.1 0.1 + 1 0.2 0.2 0.2 0.2""" + rows_values = self.get_rows_values_example(df_as_str) + # Run. + actual = hpantran._assemble_df_rows(rows_values) + # Check. + expected = [ + ["", "col1", "col2", "col3", "col4"], + ["0", "0.1", "0.1", "0.1", "0.1"], + ["1", "0.2", "0.2", "0.2", "0.2"], + ] + self.assertListEqual(actual, expected) + + def test2(self) -> None: + """ + Test unnamed index, large df. + """ + # Get the input. + df_as_str = """ + column_with_a_very_long_name_1 column_with_a_very_long_name_2 column_with_a_very_long_name_3 column_with_a_very_long_name_4 column_with_a_very_long_name_5 + 0 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 + 1 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789""" + rows_values = self.get_rows_values_example(df_as_str) + # Run. + actual = hpantran._assemble_df_rows(rows_values) + # Check. + expected = [ + [ + "", + "column_with_a_very_long_name_1", + "column_with_a_very_long_name_2", + "column_with_a_very_long_name_3", + "column_with_a_very_long_name_4", + "column_with_a_very_long_name_5", + ], + [ + "0", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + ], + [ + "1", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + ], + ] + self.assertListEqual(actual, expected) + + def test3(self) -> None: + """ + Test named index, compact df. + """ + # Get the input. + df_as_str = """ + col1 col2 col3 col4 + idx + 0 0.1 0.1 0.1 0.1 + 1 0.2 0.2 0.2 0.2""" + rows_values = self.get_rows_values_example(df_as_str) + # Run. + actual = hpantran._assemble_df_rows(rows_values) + # Check. + expected = [ + ["idx", "col1", "col2", "col3", "col4"], + ["0", "0.1", "0.1", "0.1", "0.1"], + ["1", "0.2", "0.2", "0.2", "0.2"], + ] + self.assertListEqual(actual, expected) + + def test4(self) -> None: + """ + Test named index, large df. + """ + # Get the input. + df_as_str = """ + column_with_a_very_long_name_1 column_with_a_very_long_name_2 column_with_a_very_long_name_3 column_with_a_very_long_name_4 column_with_a_very_long_name_5 + idx + 0 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 + 1 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789""" + rows_values = self.get_rows_values_example(df_as_str) + # Run. + actual = hpantran._assemble_df_rows(rows_values) + # Check. + expected = [ + [ + "idx", + "column_with_a_very_long_name_1", + "column_with_a_very_long_name_2", + "column_with_a_very_long_name_3", + "column_with_a_very_long_name_4", + "column_with_a_very_long_name_5", + ], + [ + "0", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + ], + [ + "1", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + "0.123456789123456789123456789", + ], + ] + self.assertListEqual(actual, expected) + + +# ############################################################################# +# Test_str_to_df +# ############################################################################# + + +class Test_str_to_df(hunitest.TestCase): + """ + Test converting a string representation of a dataframe into a Pandas df. + """ + + def test1(self) -> None: + # Prepare input. + df_as_str = """ + col1 col2 col3 col4 + 0 0.1 a None 2020-01-01 + 1 0.2 "b c" None 2021-05-05""" + col_to_type = { + "__index__": int, + "col1": float, + "col2": str, + "col3": None, + "col4": pd.Timestamp, + } + col_to_name_type: Dict[str, type] = {} + # Run. + actual = hpandas.str_to_df(df_as_str, col_to_type, col_to_name_type) + # Check. + expected = pd.DataFrame( + { + "col1": [0.1, 0.2], + "col2": ["a", "b c"], + "col3": [None, None], + "col4": [ + pd.Timestamp("2020-01-01"), + pd.Timestamp("2021-05-05"), + ], + }, + index=[0, 1], + ) + hunitest.compare_df(actual, expected) + + def test2(self) -> None: + """ + Run a full circle check. + + The df used for testing: + + 1 2 + end_timestamp + 2023-08-15 0.21 1.7 + 2023-08-16 0.22 1.8 + 2023-08-17 0.23 1.9 + """ + # Create a df from the data. + data = { + 1: [0.21, 0.22, 0.23], + 2: [1.7, 1.8, 1.9], + } + timestamps = [ + pd.Timestamp("2023-08-15"), + pd.Timestamp("2023-08-16"), + pd.Timestamp("2023-08-17"), + ] + expected = pd.DataFrame(data, index=timestamps) + expected.index.name = "end_timestamp" + # Convert the df into a string. + df_as_str = hpandas.df_to_str(expected) + # Convert the resulting string back into a df. + col_to_type = { + "__index__": pd.Timestamp, + "1": float, + "2": float, + } + col_to_name_type = { + "1": int, + "2": int, + } + actual = hpandas.str_to_df(df_as_str, col_to_type, col_to_name_type) + # Check that the initial df and the final df are the same. + hunitest.compare_df(actual, expected) + + +# ############################################################################# +# TestFindGapsInDataframes +# ############################################################################# + + +class TestFindGapsInDataframes(hunitest.TestCase): + def test_find_gaps_in_dataframes(self) -> None: + """ + Verify that gaps are caught. + """ + # Prepare inputs. + test_data = pd.DataFrame( + data={ + "dummy_value_1": [1, 2, 3], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [0, 0, 0], + } + ) + # Run. + missing_data = hpandas.find_gaps_in_dataframes( + test_data.head(2), test_data.tail(2) + ) + # Check output. + actual = pd.concat(missing_data) + actual = hpandas.df_to_str(actual) + expected = r""" dummy_value_1 dummy_value_2 dummy_value_3 + 2 3 C 0 + 0 1 A 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# TestSubsetDf1 +# ############################################################################# + + +class TestSubsetDf1(hunitest.TestCase): + def test1(self) -> None: + # Generate some random data. + np.random.seed(42) + df = pd.DataFrame( + np.random.randint(0, 100, size=(20, 4)), columns=list("ABCD") + ) + # Subset. + df2 = hpandas.subset_df(df, nrows=5, seed=43) + # Check. + actual = hpandas.df_to_str(df2) + expected = r""" + A B C D + 0 51 92 14 71 + 1 60 20 82 86 + 3 23 2 21 52 + ... + 17 80 35 49 3 + 18 1 5 53 3 + 19 53 92 62 17 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# TestCheckAndFilterMatchingColumns +# ############################################################################# + + +class TestCheckAndFilterMatchingColumns(hunitest.TestCase): + """ + Test that matching columns are filtered correctly. + """ + + @staticmethod + def get_test_data() -> pd.DataFrame: + df = pd.DataFrame( + data=[[3, 4, 5]] * 3, + columns=["col1", "col2", "col3"], + ) + return df + + def test_check_and_filter_matching_columns1(self) -> None: + """ + - required columns = received columns + - `filter_data_mode` = "assert" + """ + df = self.get_test_data() + columns = ["col1", "col2", "col3"] + filter_data_mode = "assert" + df = hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + actual_columns = df.columns.to_list() + self.assert_equal(str(actual_columns), str(columns)) + + def test_check_and_filter_matching_columns2(self) -> None: + """ + - received columns contain some columns apart from required ones + - `filter_data_mode` = "assert" + """ + df = self.get_test_data() + columns = ["col1", "col3"] + filter_data_mode = "assert" + with self.assertRaises(AssertionError): + hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + + def test_check_and_filter_matching_columns3(self) -> None: + """ + - received columns do not contain some of required columns + - `filter_data_mode` = "assert" + """ + df = self.get_test_data() + columns = ["col1", "col4"] + filter_data_mode = "assert" + with self.assertRaises(AssertionError): + hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + + def test_check_and_filter_matching_columns4(self) -> None: + """ + - received columns contain some columns apart from required ones + - `filter_data_mode` = "warn_and_trim" + """ + df = self.get_test_data() + columns = ["col1", "col3"] + filter_data_mode = "warn_and_trim" + df = hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + actual_columns = df.columns.to_list() + self.assert_equal(str(actual_columns), str(columns)) + + def test_check_and_filter_matching_columns5(self) -> None: + """ + - received columns do not contain some of required columns + - `filter_data_mode` = "warn_and_trim" + """ + df = self.get_test_data() + columns = ["col1", "col2", "col4"] + filter_data_mode = "warn_and_trim" + df = hpandas.check_and_filter_matching_columns( + df, columns, filter_data_mode + ) + actual_columns = df.columns.to_list() + expected_columns = ["col1", "col2"] + self.assert_equal(str(actual_columns), str(expected_columns)) + + +# ############################################################################# + + +# ############################################################################# +# Test_merge_dfs1 +# ############################################################################# + + +class Test_merge_dfs1(hunitest.TestCase): + """ + Test that 2 dataframes are merged correctly. + """ + + @staticmethod + def get_dataframe(data: Dict, index: List[int]) -> pd.DataFrame: + df = pd.DataFrame.from_dict(data) + index = pd.Index(index) + df = df.set_index(index, drop=True) + return df + + def test1(self) -> None: + """ + Overlap of `threshold_col` values is 100%. + """ + # Create test data. + data1 = { + "col1": [1, 10, 100], + "col2": [2, np.nan, 200], + "col3": [3, 30, 300], + "threshold_col": [7, 70, 700], + } + index1 = [1, 2, 3] + df1 = self.get_dataframe(data1, index1) + # + data2 = { + "col3": [3, 30, 300], + "col4": [4, 40, 400], + "col5": [5, np.nan, 500], + "threshold_col": [7, 70, 700], + } + index2 = [3, 4, 5] + df2 = self.get_dataframe(data2, index2) + # + threshold_col_name = "threshold_col" + cols_to_merge_on = ["col3", "threshold_col"] + merged_df = hpandas.merge_dfs( + df1, + df2, + threshold_col_name, + how="outer", + on=cols_to_merge_on, + ) + # Set expected values. + expected_length = 3 + expected_column_names = [ + "col1", + "col2", + "col3", + "col4", + "col5", + "threshold_col", + ] + expected_column_unique_values = None + expected_signature = r""" + # df= + index=[0, 2] + columns=col1,col2,col3,threshold_col,col4,col5 + shape=(3, 6) + col1 col2 col3 threshold_col col4 col5 + 0 1 2.0 3 7 4 5.0 + 1 10 NaN 30 70 40 NaN + 2 100 200.0 300 700 400 500.0 + """ + # Check. + self.check_df_output( + merged_df, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test2(self) -> None: + """ + Overlap of `threshold_col` values is below the threshold. + """ + # Create test data. + data1 = { + "col1": [1, 10, 100], + "col2": [2, np.nan, 200], + "col3": [3, 30, 300], + "threshold_col": [7, 70, 700], + } + index1 = [1, 2, 3] + df1 = self.get_dataframe(data1, index1) + # + data2 = { + "col3": [3, 30, 300], + "col4": [4, 40, 400], + "col5": [5, np.nan, 500], + "threshold_col": [7, 60, 600], + } + index2 = [3, 4, 5] + df2 = self.get_dataframe(data2, index2) + # + threshold_col_name = "threshold_col" + cols_to_merge_on = ["col3", "threshold_col"] + # Check. + with self.assertRaises(AssertionError): + hpandas.merge_dfs( + df1, + df2, + threshold_col_name, + how="outer", + on=cols_to_merge_on, + ) + + def test3(self) -> None: + """ + Overlap of `threshold_col` values is above the threshold. + """ + # Create test data. + data1 = { + "col1": [1, 3, 5, 7, 10, 100, 100, 100, 100, 10, 10], + "col2": [2, 4, 6, 8, np.nan, 200, 200, np.nan, 10, 10, 100], + "col3": [1, 2, 3, 4, 30, 300, 300, np.nan, 300, 300, 30], + "threshold_col": [0, 1, 3, 5, 7, 9, 11, 13, 15, 70, 700], + } + index1 = range(0, 11) + df1 = self.get_dataframe(data1, index1) + # + data2 = { + "col3": [3, 30, 300, 1, 2, 3, 4, 30, 300, 300, np.nan], + "col4": [4, 40, 400, 2, 4, 6, 8, 11, 13, 15, 70], + "col5": [5, np.nan, 500, 5, 7, 10, 1, 2, 3, 4, 30], + "threshold_col": [1, 2, 3, 5, 7, 9, 11, 13, 15, 70, 700], + } + index2 = range(9, 20) + df2 = self.get_dataframe(data2, index2) + # + threshold_col_name = "threshold_col" + cols_to_merge_on = ["col3", "threshold_col"] + merged_df = hpandas.merge_dfs( + df1, + df2, + threshold_col_name, + how="outer", + on=cols_to_merge_on, + ) + # Set expected values. + expected_length = 20 + expected_column_names = [ + "col1", + "col2", + "col3", + "col4", + "col5", + "threshold_col", + ] + expected_column_unique_values = None + # This is required by `pandas` >= 2.2. + expected_signature = r""" + # df= + index=[0, 19] + columns=col1,col2,col3,threshold_col,col4,col5 + shape=(20, 6) + col1 col2 col3 threshold_col col4 col5 + 0 1.0 2.0 1.0 0 NaN NaN + 1 NaN NaN 1.0 5 2.0 5.0 + 2 3.0 4.0 2.0 1 NaN NaN + ... + 17 10.0 10.0 300.0 70 15.0 4.0 + 18 100.0 NaN NaN 13 NaN NaN + 19 NaN NaN NaN 700 70.0 30.0 + """ + # Check. + self.check_df_output( + merged_df, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) + + def test4(self) -> None: + """ + There are common columns (besides columns to merge on) in dataframes. + """ + # Create test data. + data1 = { + "col1": [1, 10, 100], + "col5": [2, np.nan, 200], + "col3": [3, 30, 300], + "threshold_col": [7, 70, 700], + } + index1 = [1, 2, 3] + df1 = self.get_dataframe(data1, index1) + # + data2 = { + "col3": [3, 30, 300], + "col4": [4, 40, 400], + "col5": [5, np.nan, 500], + "threshold_col": [7, 70, 700], + } + index2 = [3, 4, 5] + df2 = self.get_dataframe(data2, index2) + # + threshold_col_name = "threshold_col" + cols_to_merge_on = ["col3", "threshold_col"] + # Check. + with self.assertRaises(AssertionError): + hpandas.merge_dfs( + df1, + df2, + threshold_col_name, + how="outer", + on=cols_to_merge_on, + ) + + +# ############################################################################# +# Test_apply_index_mode +# ############################################################################# + + +class Test_apply_index_mode(hunitest.TestCase): + @staticmethod + def get_test_data() -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Generate toy dataframes for the test. + """ + # Define common columns. + columns = ["A", "B"] + # Build dataframes with intersecting indices. + idx1 = [0, 1, 2, 3, 4] + data1 = [ + [0.21, 0.44], + [0.11, 0.42], + [1.99, 0.8], + [3.1, 0.91], + [3.5, 1.4], + ] + df1 = pd.DataFrame(data1, columns=columns, index=idx1) + # + idx2 = [0, 6, 2, 3, 5] + data1 = [ + [0.1, 0.4], + [0.11, 0.2], + [1.29, 0.38], + [0.1, 0.9], + [3.3, 2.4], + ] + df2 = pd.DataFrame(data1, columns=columns, index=idx2) + return df1, df2 + + def test1(self) -> None: + """ + Check that returned dataframes have indices that are equal to the + common index. + + - `mode="intersect"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + # Use an index intersection to transform dataframes. + mode = "intersect" + df1_out, df2_out = hpandas.apply_index_mode(df1_in, df2_in, mode) + # Check that indices are common. + common_index = df1_in.index.intersection(df2_in.index) + common_index = hpandas.df_to_str(common_index) + idx1 = hpandas.df_to_str(df1_out.index) + idx2 = hpandas.df_to_str(df2_out.index) + self.assert_equal(idx1, common_index) + self.assert_equal(idx2, common_index) + + def test2(self) -> None: + """ + Check that dataframe indices did not change after applying an index + mode. + + - `mode="leave_unchanged"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + mode = "leave_unchanged" + df1_out, df2_out = hpandas.apply_index_mode(df1_in, df2_in, mode) + # Check that indices are as-is. + df1_in_idx = hpandas.df_to_str(df1_in.index) + df1_out_idx = hpandas.df_to_str(df1_out.index) + self.assert_equal(df1_in_idx, df1_out_idx) + # + df2_in_idx = hpandas.df_to_str(df2_in.index) + df2_out_idx = hpandas.df_to_str(df2_out.index) + self.assert_equal(df2_in_idx, df2_out_idx) + + def test3(self) -> None: + """ + Check that an assertion is raised when indices are not equal. + + - `mode="assert_equal"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + mode = "assert_equal" + # Check that both indices are equal, assert otherwise. + with self.assertRaises(AssertionError) as cm: + hpandas.apply_index_mode(df1_in, df2_in, mode) + actual = str(cm.exception) + # Check the error exception message. + self.check_string(actual) + + +# ############################################################################# +# Test_apply_column_mode +# ############################################################################# + + +class Test_apply_column_mode(hunitest.TestCase): + """ + Test that function applies column modes correctly. + """ + + @staticmethod + def get_test_data() -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Generate toy dataframes for the test. + """ + # Build dataframes with intersecting columns. + columns_1 = ["A", "B"] + data1 = [ + [0.21, 0.44], + [0.11, 0.42], + [1.99, 0.8], + [3.1, 0.91], + [3.5, 1.4], + ] + df1 = pd.DataFrame(data1, columns=columns_1) + # + columns_2 = ["A", "C"] + data2 = [ + [0.1, 0.4], + [0.11, 0.2], + [1.29, 0.38], + [0.1, 0.9], + [3.3, 2.4], + ] + df2 = pd.DataFrame(data2, columns=columns_2) + return df1, df2 + + def test1(self) -> None: + """ + Check that returned dataframes have columns that are equal to the + common ones. + + - `mode="intersect"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + # Use a column intersection mode to transform dataframes. + mode = "intersect" + df1_out, df2_out = hpandas.apply_columns_mode(df1_in, df2_in, mode) + # Check that dfs have equal column names. + common_columns = df1_in.columns.intersection(df2_in.columns) + common_columns = hpandas.df_to_str(common_columns) + columns1 = hpandas.df_to_str(df1_out.columns) + self.assert_equal(columns1, common_columns) + # + columns2 = hpandas.df_to_str(df2_out.columns) + self.assert_equal(columns2, common_columns) + + def test2(self) -> None: + """ + Check that dataframes' columns did not change after applying a column + mode. + + - `mode="leave_unchanged"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + mode = "leave_unchanged" + df1_out, df2_out = hpandas.apply_columns_mode(df1_in, df2_in, mode) + # Check that columns are as-is. + df1_in_columns = hpandas.df_to_str(df1_in.columns) + df1_out_columns = hpandas.df_to_str(df1_out.columns) + self.assert_equal(df1_in_columns, df1_out_columns) + # + df2_in_columns = hpandas.df_to_str(df2_in.columns) + df2_out_columns = hpandas.df_to_str(df2_out.columns) + self.assert_equal(df2_in_columns, df2_out_columns) + + def test3(self) -> None: + """ + Check that an assertion is raised when columns are not equal. + + - `mode="assert_equal"` + """ + # Get test data. + df1_in, df2_in = self.get_test_data() + mode = "assert_equal" + # Check that both dataframes columns are equal, assert otherwise. + with self.assertRaises(AssertionError) as cm: + hpandas.apply_columns_mode(df1_in, df2_in, mode) + actual = str(cm.exception) + # Compare the actual outcome with an expected one. + self.check_string(actual) + + +# ############################################################################# + + +# ############################################################################# +# Test_get_df_from_iterator +# ############################################################################# + + +class Test_get_df_from_iterator(hunitest.TestCase): + def test1(self) -> None: + """ + Check that a dataframe is correctly built from an iterator of + dataframes. + """ + # Build iterator of dataframes for the test. + data1 = { + "num_col": [1, 2], + "str_col": ["A", "B"], + } + df1 = pd.DataFrame(data=data1) + data2 = { + "num_col": [3, 4], + "str_col": ["C", "D"], + } + df2 = pd.DataFrame(data=data2) + data3 = { + "num_col": [5, 6], + "str_col": ["E", "F"], + } + df3 = pd.DataFrame(data=data3) + # Run. + iter_ = iter([df1, df2, df3]) + df = hpandas.get_df_from_iterator(iter_) + actual_signature = hpandas.df_to_str(df) + expected_signature = """ num_col str_col + 0 1 A + 0 3 C + 0 5 E + 1 2 B + 1 4 D + 1 6 F + """ + self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) + + +# ############################################################################# +# TestFilterByTime +# ############################################################################# + + +class TestFilterByTime(hunitest.TestCase): + @staticmethod + def _get_test_data() -> pd.DataFrame: + """ + Get data for testing. + + :return: data for testing + """ + df = pd.DataFrame( + { + "col1": [1, 2, 3, 4], + "col2": [ + hdateti.to_datetime("2018-04-05"), + hdateti.to_datetime("2018-04-06"), + hdateti.to_datetime("2018-04-07"), + hdateti.to_datetime("2018-04-08"), + ], + } + ) + df.index = pd.date_range("2017-01-01", periods=4) + return df + + def test_filter_by_index1(self) -> None: + """ + Verify that `[lower_bound, upper_bound)` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2017-01-02") + upper_bound = hdateti.to_datetime("2017-01-04") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="left", + ts_col_name=None, + ) + expected = df[1:3] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_index2(self) -> None: + """ + Verify that `(lower_bound, upper_bound]` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2017-01-02") + upper_bound = hdateti.to_datetime("2017-01-04") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="right", + ts_col_name=None, + ) + expected = df[2:4] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_index3(self) -> None: + """ + Verify that `[lower_bound, upper_bound]` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2017-01-02") + upper_bound = hdateti.to_datetime("2017-01-04") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="both", + ts_col_name=None, + ) + expected = df[1:4] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_index4(self) -> None: + """ + Verify that `(lower_bound, upper_bound)` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2017-01-02") + upper_bound = hdateti.to_datetime("2017-01-04") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="neither", + ts_col_name=None, + ) + expected = df[2:3] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_column1(self) -> None: + """ + Verify that `[lower_bound, upper_bound)` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2018-04-06") + upper_bound = hdateti.to_datetime("2018-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="left", + ts_col_name="col2", + ) + expected = df[1:3] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_column2(self) -> None: + """ + Verify that `(lower_bound, upper_bound]` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2018-04-06") + upper_bound = hdateti.to_datetime("2018-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="right", + ts_col_name="col2", + ) + expected = df[2:4] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_column3(self) -> None: + """ + Verify that `[lower_bound, upper_bound]` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2018-04-06") + upper_bound = hdateti.to_datetime("2018-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="both", + ts_col_name="col2", + ) + expected = df[1:4] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_filter_by_column4(self) -> None: + """ + Verify that `(lower_bound, upper_bound)` works. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2018-04-06") + upper_bound = hdateti.to_datetime("2018-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="neither", + ts_col_name="col2", + ) + expected = df[2:3] + self.assert_equal(actual.to_string(), expected.to_string()) + + def test_no_intersection(self) -> None: + """ + Verify that if time interval is not covered by data then empty + DataFrame is returned. + """ + df = self._get_test_data() + lower_bound = hdateti.to_datetime("2021-04-06") + upper_bound = hdateti.to_datetime("2021-04-08") + actual = hpantran.filter_by_time( + df=df, + lower_bound=lower_bound, + upper_bound=upper_bound, + inclusive="both", + ts_col_name=None, + ) + self.assertEqual(actual.shape[0], 0) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py new file mode 100644 index 000000000..67eddb250 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py @@ -0,0 +1,251 @@ +import logging + +import pandas as pd + +import helpers.hpandas as hpandas +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_df_to_str +# ############################################################################# + + +class Test_df_to_str(hunitest.TestCase): + @staticmethod + def get_test_data() -> pd.DataFrame: + test_data = { + "dummy_value_1": [1, 2, 3], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [0, 0, 0], + } + df = pd.DataFrame(data=test_data) + return df + + def test_df_to_str1(self) -> None: + """ + Test common call to `df_to_str` with basic df. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df) + expected = r""" + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str2(self) -> None: + """ + Test common call to `df_to_str` with tag. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df, tag="df") + expected = r"""# df= + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str3(self) -> None: + """ + Test common call to `df_to_str` with print_shape_info. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df, print_shape_info=True) + expected = r""" + index=[0, 2] + columns=dummy_value_1,dummy_value_2,dummy_value_3 + shape=(3, 3) + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str4(self) -> None: + """ + Test common call to `df_to_str` with print_dtypes. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df, print_dtypes=True) + expected = r""" + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 3 / 3 = 100.00% 0 / 3 = 0.00% 0 + 1 dummy_value_1 int64 3 / 3 = 100.00% 0 / 3 = 0.00% 1 + 2 dummy_value_2 object 3 / 3 = 100.00% 0 / 3 = 0.00% A + 3 dummy_value_3 int64 1 / 3 = 33.33% 0 / 3 = 0.00% 0 + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str5(self) -> None: + """ + Test common call to `df_to_str` with multiple args. + """ + df = self.get_test_data() + actual = hpandas.df_to_str( + df, print_shape_info=True, print_dtypes=True, tag="df" + ) + expected = r""" + # df= + index=[0, 2] + columns=dummy_value_1,dummy_value_2,dummy_value_3 + shape=(3, 3) + * type= + col_name dtype num_unique num_nans first_elem type(first_elem) + 0 index int64 3 / 3 = 100.00% 0 / 3 = 0.00% 0 + 1 dummy_value_1 int64 3 / 3 = 100.00% 0 / 3 = 0.00% 1 + 2 dummy_value_2 object 3 / 3 = 100.00% 0 / 3 = 0.00% A + 3 dummy_value_3 int64 1 / 3 = 33.33% 0 / 3 = 0.00% 0 + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str6(self) -> None: + """ + Test common call to `df_to_str` with `pd.Series`. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df["dummy_value_2"]) + expected = r""" + dummy_value_2 + 0 A + 1 B + 2 C + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str7(self) -> None: + """ + Test common call to `df_to_str` with `pd.Index`. + """ + df = self.get_test_data() + index = df.index + index.name = "index_name" + actual = hpandas.df_to_str(index) + expected = r""" + index_name + 0 0 + 1 1 + 2 2 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str8(self) -> None: + """ + Test that `-0.0` is replaced with `0.0`. + """ + test_data = { + "dummy_value_1": [1, 2, 3, 4], + "dummy_value_2": ["A", "B", "C", "D"], + "dummy_value_3": [0, 0, 0, 0], + "dummy_value_4": [+0.0, -0.0, +0.0, -0.0], + } + df = pd.DataFrame(data=test_data) + actual = hpandas.df_to_str(df, handle_signed_zeros=True) + expected = r""" + dummy_value_1 dummy_value_2 dummy_value_3 dummy_value_4 + 0 1 A 0 0.0 + 1 2 B 0 0.0 + 2 3 C 0 0.0 + 3 4 D 0 0.0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str9(self) -> None: + """ + Test that `-0.0` is replaced with `0.0` in a multi-index dataframe. + """ + test_data = { + ("A", "X"): [-0.0, 5.0, -0.0], + ("A", "Y"): [2, 6, 0], + ("B", "X"): [0, 7, 3], + ("B", "Y"): [4.4, -0.0, 5.1], + } + df = pd.DataFrame(data=test_data) + actual = hpandas.df_to_str(df, handle_signed_zeros=True) + expected = r""" + A B + X Y X Y + 0 0.0 2 0 4.4 + 1 5.0 6 7 0.0 + 2 0.0 0 3 5.1""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_df_to_str10(self) -> None: + """ + Test common call to `df_to_str` with `print_memory_usage = True`. + """ + df = self.get_test_data() + actual = hpandas.df_to_str(df, print_memory_usage=True) + # This is required by `numpy` >= 2.1.0 + expected = r""" + * memory= + shallow deep + Index 132.0 b 132.0 b + dummy_value_1 24.0 b 24.0 b + dummy_value_2 24.0 b 150.0 b + dummy_value_3 24.0 b 24.0 b + total 204.0 b 330.0 b + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_head +# ############################################################################# + + +class Test_head(hunitest.TestCase): + def test1(self) -> None: + """ + Test basic head functionality without seed. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": [1, 2, 3, 4, 5], + "col2": ["a", "b", "c", "d", "e"], + } + ) + hpandas.head(df, num_rows=2) + + def test2(self) -> None: + """ + Test head with a seed for reproducible sampling. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": list(range(10)), + "col2": list("abcdefghij"), + } + ) + hpandas.head(df, seed=42, num_rows=3) + + def test3(self) -> None: + """ + Test head with different num_rows parameter. + """ + # Prepare input. + df = pd.DataFrame( + { + "col1": list(range(5)), + "col2": list("abcde"), + } + ) + hpandas.head(df, num_rows=4) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py new file mode 100644 index 000000000..a1be56d40 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py @@ -0,0 +1,1468 @@ +import datetime +import logging +import os +import random +from typing import Any, List, Optional, Tuple + +import pandas as pd +import pyarrow +import pyarrow.parquet as parquet +import pytest + +import helpers.hdbg as hdbg +import helpers.hmoto as hmoto +import helpers.hpandas as hpandas +import helpers.hparquet as hparque +import helpers.hprint as hprint +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# Most of these unit tests are taken from +# `amp/helpers/notebooks/gallery_parquet.ipynb` + + +def _get_df(date: datetime.date, seed: int = 42) -> pd.DataFrame: + """ + Create pandas random data, like: + + ``` + idx instr val1 val2 + 2000-01-01 0 A 99 30 + 2000-01-02 0 A 54 46 + 2000-01-03 0 A 85 86 + ``` + """ + instruments = "A B C D E".split() + date = pd.Timestamp(date, tz="America/New_York") + start_date = date.replace(hour=9, minute=30) + end_date = date.replace(hour=16, minute=0) + df_idx = pd.date_range(start_date, end_date, freq="5T") + _LOG.debug("df_idx=[%s, %s]", min(df_idx), max(df_idx)) + _LOG.debug("len(df_idx)=%s", len(df_idx)) + random.seed(seed) + # For each instruments generate random data. + df = [] + for idx, inst in enumerate(instruments): + df_tmp = pd.DataFrame( + { + "idx": idx, + "instr": inst, + "val1": [random.randint(0, 100) for _ in range(len(df_idx))], + "val2": [random.randint(0, 100) for _ in range(len(df_idx))], + }, + index=df_idx, + ) + df.append(df_tmp) + # Create a single df for all the instruments. + df = pd.concat(df) + return df + + +def _get_test_df_with_timestamps() -> pd.DataFrame: + """ + Create a DataFrame with timestamps. + """ + timestamp = pd.Timestamp("2022-01-01 00:00:00.123456", tz="America/New_York") + index = [timestamp for _ in range(6)] + df = pd.DataFrame( + { + "n_legs": [2, 2, 4, 4, 5, 100], + "animal": [ + "Flamingo", + "Parrot", + "Dog", + "Horse", + "Brittle stars", + "Centipede", + ], + "year": [2001, 2002, 2001, 2003, 2003, 2001], + }, + index=index, + ) + knowledge_timestamp = pd.Timestamp.now(tz="UTC") + df["knowledge_timestamp"] = knowledge_timestamp + return df + + +def _get_df_example1() -> pd.DataFrame: + date = datetime.date(2020, 1, 1) + df = _get_df(date) + _LOG.debug("df=\n%s", df.head(3)) + return df + + +def _compare_dfs(self: Any, df1: pd.DataFrame, df2: pd.DataFrame) -> str: + df1_as_str: str = hpandas.df_to_str(df1, print_shape_info=True, tag="") + df2_as_str = hpandas.df_to_str(df2, print_shape_info=True, tag="") + self.assert_equal(df1_as_str, df2_as_str, fuzzy_match=True) + # When Parquet reads partitioned dataset can convert partitioning columns into + # categorical variables that can create false positives. + pd.testing.assert_frame_equal( + df1, df2, check_dtype=False, check_categorical=False + ) + return df1_as_str + + +# ############################################################################# + + +# ############################################################################# +# TestParquet1 +# ############################################################################# + + +class TestParquet1(hunitest.TestCase): + def test_get_df1(self) -> None: + """ + Check the output of `_get_df()`. + """ + # Prepare data. + df = _get_df_example1() + # Check. + actual = hpandas.df_to_str(df, print_shape_info=True, tag="df") + expected = r"""# df= + index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] + columns=idx,instr,val1,val2 + shape=(395, 4) + idx instr val1 val2 + 2020-01-01 09:30:00-05:00 0 A 81 35 + 2020-01-01 09:35:00-05:00 0 A 14 58 + 2020-01-01 09:40:00-05:00 0 A 3 81 + ... + 2020-01-01 15:50:00-05:00 4 E 57 3 + 2020-01-01 15:55:00-05:00 4 E 33 50 + 2020-01-01 16:00:00-05:00 4 E 96 75""" + self.assert_equal(actual, expected, fuzzy_match=True) + + # ////////////////////////////////////////////////////////////////////////////// + + def get_file_name(self) -> str: + dir_name = self.get_scratch_space() + file_name = os.path.join(dir_name, "df.parquet") + return file_name + + def write_data_as_parquet(self) -> Tuple[pd.DataFrame, str]: + # Prepare data. + df = _get_df_example1() + # Save data. + file_name = self.get_file_name() + hparque.to_parquet(df, file_name, log_level=logging.INFO) + return df, file_name + + def write_and_read_helper(self, columns: List[str]) -> None: + """ + - Save a dataframe as Parquet + - Read back certain columns of the data from the file + - Check that the df is what expected + """ + df, file_name = self.write_data_as_parquet() + # Read back one column of the data. + df2 = hparque.from_parquet( + file_name, columns=columns, log_level=logging.INFO + ) + _LOG.debug("df2=\n%s", df2.head(3)) + # Check. + df = df[columns] + _compare_dfs(self, df, df2) + + def test_write_and_read_everything1(self) -> None: + """ + Read all the columns from the file. + """ + df, file_name = self.write_data_as_parquet() + # Read data back. + df2 = hparque.from_parquet(file_name, log_level=logging.INFO) + _LOG.debug("df2=\n%s", df2.head(3)) + # Check. + _compare_dfs(self, df, df2) + + def test_write_and_read_one_column1(self) -> None: + """ + - Read back one column of the data from the file. + """ + # Read back one column of the data. + columns = ["val1"] + self.write_and_read_helper(columns) + + def test_write_and_read_two_columns1(self) -> None: + """ + Read back one column of the data from the file. + """ + # Read back two columns of the data. + columns = ["idx", "val1"] + self.write_and_read_helper(columns) + + # ////////////////////////////////////////////////////////////////////////////// + + def read_filtered_parquet( + self, file_name: str, filters: Any + ) -> pd.DataFrame: + filesystem = None + dataset = parquet.ParquetDataset( + file_name, + filesystem=filesystem, + filters=filters, + ) + columns = None + table = dataset.read(columns=columns) + df = table.to_pandas() + _LOG.debug("df=\n%s", df.head(3)) + return df + + def test_read_with_filter1(self) -> None: + """ + Read only a subset of the rows. + """ + _, file_name = self.write_data_as_parquet() + # Read. + filters = [] + filters.append([("idx", "=", 0)]) + df2 = self.read_filtered_parquet(file_name, filters) + # Check. + actual = hpandas.df_to_str(df2, print_shape_info=True, tag="df") + expected = r"""# df= + index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] + columns=idx,instr,val1,val2 + shape=(79, 4) + idx instr val1 val2 + 2020-01-01 09:30:00-05:00 0 A 81 35 + 2020-01-01 09:35:00-05:00 0 A 14 58 + 2020-01-01 09:40:00-05:00 0 A 3 81 + ... + 2020-01-01 15:50:00-05:00 0 A 29 76 + 2020-01-01 15:55:00-05:00 0 A 12 8 + 2020-01-01 16:00:00-05:00 0 A 48 49""" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_write_and_read_partition_parquet_files_with_unit(self) -> None: + """ + Write the Pandas DataFrame to partitioned Parquet files and read it + back, verifying the retention of time unit information in the index. + """ + # Prepare test data. + dst_dir = os.path.join(self.get_scratch_space(), "tmp.partition_parquet") + initial_df = _get_test_df_with_timestamps() + initial_df.index = initial_df.index.as_unit("us") + partition_columns = initial_df.columns.tolist() + # The `to_partitioned_parquet` saves the given dataframe as Parquet + # files partitioned along the given columns. + hparque.to_partitioned_parquet(initial_df, partition_columns, dst_dir) + df_from_parquet_files = hparque.from_parquet(dst_dir) + # Check that the time unit is ns. + self.assert_equal("ns", df_from_parquet_files.index.unit) + # TODO(Vlad): Refactor after CmampTask7331 is resolved. + # self.assert_equal(initial_df.index.unit, df.index.unit) + + def test_write_and_read_parquet_file_with_unit(self) -> None: + """ + Write the provided DataFrame to Parquet file and read it back, + verifying the retention of time unit information in the index. + """ + test_parquet_file = os.path.join( + self.get_scratch_space(), "tmp_dummy.parquet" + ) + initial_df = _get_test_df_with_timestamps() + initial_df.index = initial_df.index.as_unit("us") + # The `to_parquet` function writes a DF to a single parquet file without + # any partition. + hparque.to_parquet(initial_df, test_parquet_file) + df = hparque.from_parquet(test_parquet_file) + self.assert_equal("ns", df.index.unit) + # TODO(Vlad): Refactor after CmampTask7331 is resolved. + # self.assert_equal(initial_df.index.unit, df.index.unit) + + @pytest.mark.skip(reason="TODO(Juraj): HelpersTask21.") + def test_save_read_concat_data(self) -> None: + """ + Verify that data produced by different version of Pandas preserves + types when reading/writing to/from Parquet. + """ + # Copy sample data that saved with the Pandas v.1.5.1 from S3 to the + # scratch dir. + s3_path = self.get_s3_input_dir() + local_path = self.get_scratch_space() + aws_profile = "ck" + hs3.copy_data_from_s3_to_local_dir(s3_path, local_path, aws_profile) + # Read sample data from the scratch dir. + sample_data = hparque.from_parquet(local_path) + # Generate artificial test data. + data = { + "timestamp": [1696896000000], + "open": [27578.4], + "high": [27584.3], + "low": [27571.2], + "close": [27571.3], + "volume": [154.933], + "exchange_id": ["binance"], + "knowledge_timestamp": [ + pd.Timestamp("2023-11-06 14:15:11.241716+0000", tz="UTC") + ], + } + index = pd.Series( + [pd.Timestamp("2023-10-10T00:00:00+00:00")], name="timestamp" + ) + test_data = pd.DataFrame(data, index=index) + # Concatenate sample and test data and save it to the scratch dir. + combined_test_data = pd.concat([sample_data, test_data]) + local_combined_file_path = os.path.join( + local_path, "combined_dummy.parquet" + ) + hparque.to_parquet(combined_test_data, local_combined_file_path) + # Read the data back from the scratch dir. + actual_df = hparque.from_parquet(local_combined_file_path) + # Check that the data types the same as in the sample data. + dtypes_sample = str(sample_data.dtypes) + dtypes_actual = str(actual_df.dtypes) + self.assert_equal(dtypes_sample, dtypes_actual, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# TestPartitionedParquet1 +# ############################################################################# + + +class TestPartitionedParquet1(hunitest.TestCase): + # From https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data + # A dataset can exploit a nested structure, where the sub-dir names hold + # information about which subset of the data is stored in that dir + # E.g., "Hive" partitioning scheme "key=vale" dir names + + def write_partitioned_dataset_and_check( + self, + df: pd.DataFrame, + partition_cols: List[str], + exp_dir_signature: Optional[str], + ) -> str: + """ + - Write df as a partitioned dataset + - (Optional) Check the signature of the directory + + :param partition_cols: columns used for + :param exp_dir_signature: expected signature of the written directory + :return path to the saved Parquet data + """ + _LOG.debug(hprint.to_str("partition_cols")) + # Prepare data. + dir_name = os.path.join(self.get_scratch_space(), "data.parquet") + table = pyarrow.Table.from_pandas(df) + # Write partitioned dataset. + parquet.write_to_dataset( + table, + dir_name, + partition_cols, + ) + # Check dir signature. + if exp_dir_signature is not None: + include_file_content = False + remove_dir_name = True + dir_signature = hunitest.get_dir_signature( + dir_name, include_file_content, remove_dir_name=remove_dir_name + ) + self.assert_equal( + dir_signature, + exp_dir_signature, + fuzzy_match=True, + purify_text=True, + ) + return dir_name + + def write_and_read_helper( + self, + df: pd.DataFrame, + partition_cols: List[str], + exp_dir_signature: Optional[str], + columns_to_read: Optional[List[str]], + ) -> str: + """ + - Write df as a partitioned dataset using `partitioned_cols` + - Read certain column back + + :param partition_cols: columns used for + :param exp_dir_signature: expected signature of the written directory + :return: read df as string + """ + _LOG.debug(hprint.to_str("partition_cols columns_to_read")) + # Write and check. + dir_name = self.write_partitioned_dataset_and_check( + df, partition_cols, exp_dir_signature + ) + # Read back certain columns. + df2 = hparque.from_parquet( + dir_name, columns=columns_to_read, log_level=logging.INFO + ) + # Compare. + if columns_to_read is not None: + df = df[columns_to_read] + # + hdbg.dassert_set_eq(df.columns, df2.columns) + df2 = df2[df.columns] + df_as_str = _compare_dfs(self, df, df2) + return df_as_str + + # ////////////////////////////////////////////////////////////////////////////// + + def test_write_and_read1(self) -> None: + """ + - Write a partitioned dataset with one partitioning column + - Read everything back + """ + df = _get_df_example1() + partition_cols = ["idx"] + exp_dir_signature = r""" + # Dir structure + . + idx=0 + idx=0/data.parquet + idx=1 + idx=1/data.parquet + idx=2 + idx=2/data.parquet + idx=3 + idx=3/data.parquet + idx=4 + idx=4/data.parquet""" + columns_to_read = None + self.write_and_read_helper( + df, partition_cols, exp_dir_signature, columns_to_read + ) + + def test_write_and_read2(self) -> None: + """ + - Write a partitioned dataset with two partitioning columns + - Read everything back + """ + df = _get_df_example1() + partition_cols = ["idx", "instr"] + exp_dir_signature = r"""# Dir structure + . + idx=0 + idx=0/instr=A + idx=0/instr=A/data.parquet + idx=1 + idx=1/instr=B + idx=1/instr=B/data.parquet + idx=2 + idx=2/instr=C + idx=2/instr=C/data.parquet + idx=3 + idx=3/instr=D + idx=3/instr=D/data.parquet + idx=4 + idx=4/instr=E + idx=4/instr=E/data.parquet""" + # Read back everything. + columns_to_read = None + self.write_and_read_helper( + df, partition_cols, exp_dir_signature, columns_to_read + ) + + def test_write_and_read3(self) -> None: + """ + - Write a partitioned dataset with one partitioning column + - Read two columns back + """ + df = _get_df_example1() + partition_cols = ["idx"] + exp_dir_signature = None + columns_to_read = ["idx", "instr"] + df_as_str = self.write_and_read_helper( + df, partition_cols, exp_dir_signature, columns_to_read + ) + expected = r"""# = + index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] + columns=idx,instr + shape=(395, 2) + idx instr + 2020-01-01 09:30:00-05:00 0 A + 2020-01-01 09:35:00-05:00 0 A + 2020-01-01 09:40:00-05:00 0 A + ... + 2020-01-01 15:50:00-05:00 4 E + 2020-01-01 15:55:00-05:00 4 E + 2020-01-01 16:00:00-05:00 4 E""" + self.assert_equal(df_as_str, expected, fuzzy_match=True) + + def test_write_and_read4(self) -> None: + """ + - Write a partitioned dataset with one partitioning column + - Read two columns back filtering by the one of the partitioned column + """ + df = _get_df_example1() + partition_cols = ["idx"] + exp_dir_signature = None + # Write and check. + dir_name = self.write_partitioned_dataset_and_check( + df, partition_cols, exp_dir_signature + ) + # Read back everything. + columns_to_read = ["idx", "instr"] + filters = [] + filters.append(("idx", "=", 0)) + # Note that `from_parquet` doesn't work with filters. + # df2 = hparque.from_parquet( + # dir_name, + # columns=columns_to_read, + # filters=filters, + # log_level=logging.INFO, + # ) + filesystem = None + dataset = parquet.ParquetDataset( + dir_name, + filesystem=filesystem, + filters=filters, + ) + table = dataset.read(columns=columns_to_read) + df2 = table.to_pandas() + # Compare. + df_as_str = hpandas.df_to_str(df2, print_shape_info=True, tag="df") + expected = r"""# df= + index=[0, 78] + columns=idx,instr + shape=(79, 2) + idx instr + 0 0 A + 1 0 A + 2 0 A + ... + 76 0 A + 77 0 A + 78 0 A""" + self.assert_equal(df_as_str, expected, fuzzy_match=True) + + # ////////////////////////////////////////////////////////////////////////////// + + def test_merge1(self) -> None: + """ + - Write a partitioned dataset in multiple chunks using the same partitioning + column + - Make sure that reading it back we get the original data. + """ + df = _get_df_example1() + # + partition_cols = ["idx"] + # Write the first chunk. + df_chunk1 = df[df["idx"].isin([0, 1])] + exp_dir_signature = """ + # Dir structure + . + idx=0 + idx=0/data.parquet + idx=1 + idx=1/data.parquet""" + # Write and check. + _ = self.write_partitioned_dataset_and_check( + df_chunk1, partition_cols, exp_dir_signature + ) + # Write the second chunk. + df_chunk2 = df[df["idx"].isin([2, 3, 4])] + exp_dir_signature = """ + # Dir structure + . + idx=0 + idx=0/data.parquet + idx=1 + idx=1/data.parquet + idx=2 + idx=2/data.parquet + idx=3 + idx=3/data.parquet + idx=4 + idx=4/data.parquet""" + # Write and check. + dir_name = self.write_partitioned_dataset_and_check( + df_chunk2, partition_cols, exp_dir_signature + ) + # Read everything. + columns_to_read = None + df2 = hparque.from_parquet( + dir_name, columns=columns_to_read, log_level=logging.INFO + ) + # Compare. + hdbg.dassert_set_eq(df.columns, df2.columns) + df2 = df2[df.columns] + df_as_str = _compare_dfs(self, df, df2) + expected = r""" + # = + index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] + columns=idx,instr,val1,val2 + shape=(395, 4) + idx instr val1 val2 + 2020-01-01 09:30:00-05:00 0 A 81 35 + 2020-01-01 09:35:00-05:00 0 A 14 58 + 2020-01-01 09:40:00-05:00 0 A 3 81 + ... + 2020-01-01 15:50:00-05:00 4 E 57 3 + 2020-01-01 15:55:00-05:00 4 E 33 50 + 2020-01-01 16:00:00-05:00 4 E 96 75""" + self.assert_equal(df_as_str, expected, fuzzy_match=True) + self.assert_equal(df_as_str, expected, fuzzy_match=True) + + def _run_write_and_read_mixed_units_partitioned_dataset( + self, first_unit: str, second_unit: str + ) -> None: + """ + Write two DataFrames with different time units to a partitioned Parquet + dataset and read it back. + + :param first_unit: time unit of the first DataFrame + :param second_unit: time unit of the second DataFrame + """ + initial_df = _get_test_df_with_timestamps() + partition_columns = ["n_legs", "animal", "year"] + dst_dir = os.path.join(self.get_scratch_space(), "tmp.pp_mixed_units") + # Write first DF as partitioned parquet. + first_df = initial_df.copy() + first_df.index = first_df.index.as_unit(first_unit) + first_df["knowledge_timestamp"] = first_df["knowledge_timestamp"].astype( + f"datetime64[{first_unit}, UTC]" + ) + hparque.to_partitioned_parquet(first_df, partition_columns, dst_dir) + # Write second DF as partitioned parquet. + second_df = initial_df.copy() + second_df.index = second_df.index.as_unit(second_unit) + second_df["knowledge_timestamp"] = second_df[ + "knowledge_timestamp" + ].astype(f"datetime64[{second_unit}, UTC]") + hparque.to_partitioned_parquet(second_df, partition_columns, dst_dir) + # Read it back. + _ = hparque.from_parquet(dst_dir) + + def test_write_and_read_mixed_units_partition_dataset_1(self) -> None: + """ + Write two DataFrames with different time units to a partitioned Parquet + dataset and read it back. + + The combination `ns` and `us` should not raise an error. + See CmampTask7331 for details. + """ + self._run_write_and_read_mixed_units_partitioned_dataset("ns", "us") + + @pytest.mark.skip( + reason="Since names and order the files is not guaranteed, the test is " + "flaky, decided to skip it for now.", + ) + def test_write_and_read_mixed_units_partition_dataset_2(self) -> None: + """ + Write two DataFrames with different time units to a partitioned Parquet + dataset and read it back. + + The combination `ms` and `us` should raise an error. + """ + with self.assertRaises(pyarrow.lib.ArrowInvalid): + self._run_write_and_read_mixed_units_partitioned_dataset("ms", "us") + + +# ############################################################################# + + +# ############################################################################# +# TestGetParquetFiltersFromTimestampInterval1 +# ############################################################################# + + +class TestGetParquetFiltersFromTimestampInterval1(hunitest.TestCase): + def test_no_interval(self) -> None: + """ + No timestamps provided. + """ + partition_mode = "by_year_month" + start_ts = None + end_ts = None + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + self.assertIsNone(filters) + + def test_by_month_half1(self) -> None: + """ + Test a left-bound interval [..., None]. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = None + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = r"[[('year', '==', 2020), ('month', '>=', 1)], [('year', '>', 2020)]]" + self.assert_equal(actual, expected) + + def test_by_month_half2(self) -> None: + """ + Test a right-bound interval [None, ...]. + """ + partition_mode = "by_year_month" + start_ts = None + end_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = r"[[('year', '==', 2020), ('month', '<=', 1)], [('year', '<', 2020)]]" + self.assert_equal(actual, expected) + + def test_by_month_one_year1(self) -> None: + """ + Test an interval contained in a whole year. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = pd.Timestamp("2020-12-02 09:31:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = ( + r"[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 12)]]" + ) + self.assert_equal(actual, expected) + + def test_by_month_one_year2(self) -> None: + """ + Test an interval contained in a whole year. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = pd.Timestamp("2020-01-02 09:32:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = ( + r"[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 1)]]" + ) + self.assert_equal(actual, expected) + + def test_by_month_invalid1(self) -> None: + """ + Test an invalid interval. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = pd.Timestamp("2020-01-02 09:30:00+00:00") + with self.assertRaises(AssertionError) as fail: + hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(fail.exception) + expected = r""" + * Failed assertion * + 2020-01-02 09:31:00+00:00 <= 2020-01-02 09:30:00+00:00 + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_by_month_invalid2(self) -> None: + """ + Test an invalid partition mode. + """ + partition_mode = "new_mode" + start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") + end_ts = pd.Timestamp("2020-01-02 09:32:00+00:00") + with self.assertRaises(ValueError) as fail: + hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(fail.exception) + expected = r"Unknown partition mode `new_mode`!" + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_by_month_two_years1(self) -> None: + """ + Test an interval spanning two years. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") + end_ts = pd.Timestamp("2021-12-02 09:31:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = ( + r"[[('year', '==', 2020), ('month', '>=', 6)], " + r"[('year', '==', 2021), ('month', '<=', 12)]]" + ) + self.assert_equal(actual, expected) + + def test_by_month_over_two_years1(self) -> None: + """ + Test an interval longer than two years. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, start_ts, end_ts + ) + actual = str(filters) + expected = ( + r"[[('year', '==', 2020), ('month', '>=', 6)], " + r"[('year', '>', 2020), ('year', '<', 2022)], " + r"[('year', '==', 2022), ('month', '<=', 12)]]" + ) + self.assert_equal(actual, expected) + + def test_additional_filters1(self) -> None: + """ + No timestamps provided while a single additional filter is provided. + """ + partition_mode = "by_year_month" + start_ts = None + end_ts = None + additional_filters = [ + ( + "currency_pair", + "in", + ("BTC_USDT",), + ) + ] + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, + start_ts, + end_ts, + additional_filters=additional_filters, + ) + actual = str(filters) + expected = r"[('currency_pair', 'in', ('BTC_USDT',))]" + self.assert_equal(actual, expected) + + def test_additional_filters2(self) -> None: + """ + Test an interval with multiple additional filters. + """ + partition_mode = "by_year_month" + start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") + additional_filters = [ + ("exchange_id", "in", ("binance")), + ("currency_pairs", "in", ("ADA_USDT", "BTC_USDT")), + ] + filters = hparque.get_parquet_filters_from_timestamp_interval( + partition_mode, + start_ts, + end_ts, + additional_filters=additional_filters, + ) + actual = str(filters) + expected = ( + r"[[('exchange_id', 'in', 'binance'), " + r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " + r"('year', '==', 2020), ('month', '>=', 6)], " + r"[('exchange_id', 'in', 'binance'), " + r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " + r"('year', '>', 2020), ('year', '<', 2022)], " + r"[('exchange_id', 'in', 'binance'), " + r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " + r"('year', '==', 2022), ('month', '<=', 12)]]" + ) + self.assert_equal(actual, expected) + + +# ############################################################################# + + +# ############################################################################# +# TestAddDatePartitionColumns +# ############################################################################# + + +class TestAddDatePartitionColumns(hunitest.TestCase): + def add_date_partition_columns_helper( + self, partition_mode: str, expected: str + ) -> None: + # Prepare inputs. + test_data = { + "dummy_value": [1, 2, 3], + "dummy_timestamp": [1638646800000, 1638646860000, 1638646960000], + } + start_timestamp = "2021-12-04 19:40:00+00:00" + end_timestamp = "2021-12-04 19:42:00+00:00" + index = pd.date_range(start_timestamp, end_timestamp, freq="1T") + df = pd.DataFrame(index=index, data=test_data) + # Run. + hparque.add_date_partition_columns(df, partition_mode) + # Check output. + actual = hpandas.df_to_str(df) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_add_date_partition_columns1(self) -> None: + partition_mode = "by_date" + expected = r""" dummy_value dummy_timestamp date + 2021-12-04 19:40:00+00:00 1 1638646800000 20211204 + 2021-12-04 19:41:00+00:00 2 1638646860000 20211204 + 2021-12-04 19:42:00+00:00 3 1638646960000 20211204""" + self.add_date_partition_columns_helper(partition_mode, expected) + + def test_add_date_partition_columns2(self) -> None: + partition_mode = "by_year" + expected = r""" dummy_value dummy_timestamp year + 2021-12-04 19:40:00+00:00 1 1638646800000 2021 + 2021-12-04 19:41:00+00:00 2 1638646860000 2021 + 2021-12-04 19:42:00+00:00 3 1638646960000 2021""" + self.add_date_partition_columns_helper(partition_mode, expected) + + def test_add_date_partition_columns3(self) -> None: + partition_mode = "by_year_month_day" + # pylint: disable=line-too-long + expected = r""" dummy_value dummy_timestamp year month day + 2021-12-04 19:40:00+00:00 1 1638646800000 2021 12 4 + 2021-12-04 19:41:00+00:00 2 1638646860000 2021 12 4 + 2021-12-04 19:42:00+00:00 3 1638646960000 2021 12 4""" + self.add_date_partition_columns_helper(partition_mode, expected) + + def test_add_date_partition_columns4(self) -> None: + partition_mode = "by_year_week" + expected = r""" dummy_value dummy_timestamp year weekofyear + 2021-12-04 19:40:00+00:00 1 1638646800000 2021 48 + 2021-12-04 19:41:00+00:00 2 1638646860000 2021 48 + 2021-12-04 19:42:00+00:00 3 1638646960000 2021 48""" + self.add_date_partition_columns_helper(partition_mode, expected) + + +# ############################################################################# + + +# ############################################################################# +# TestToPartitionedDataset +# ############################################################################# + + +class TestToPartitionedDataset(hunitest.TestCase): + @staticmethod + def get_test_data1() -> pd.DataFrame: + test_data = { + "dummy_value_1": [1, 2, 3], + "dummy_value_2": ["A", "B", "C"], + "dummy_value_3": [0, 0, 0], + } + df = pd.DataFrame(data=test_data) + return df + + def test_get_test_data1(self) -> None: + test_data = self.get_test_data1() + actual = hpandas.df_to_str(test_data) + expected = r""" + dummy_value_1 dummy_value_2 dummy_value_3 + 0 1 A 0 + 1 2 B 0 + 2 3 C 0""" + self.assert_equal(actual, expected, fuzzy_match=True) + + @pytest.mark.skip( + reason="CmTask1305: after removing circular dependencies in " + "`hio.from_file`, this test fails reading a parquet file" + ) + def test_to_partitioned_dataset(self) -> None: + """ + Test partitioned Parquet datasets with existing columns. + """ + # Prepare inputs. + test_dir = self.get_scratch_space() + df = self.get_test_data1() + # Run. + partition_cols = ["dummy_value_1", "dummy_value_2"] + hparque.to_partitioned_parquet(df, partition_cols, test_dir) + # Check output. + include_file_content = False + remove_dir_name = True + dir_signature = hunitest.get_dir_signature( + test_dir, include_file_content, remove_dir_name=remove_dir_name + ) + expected = r""" + # Dir structure + . + dummy_value_1=1 + dummy_value_1=1/dummy_value_2=A + dummy_value_1=1/dummy_value_2=A/data.parquet + dummy_value_1=2 + dummy_value_1=2/dummy_value_2=B + dummy_value_1=2/dummy_value_2=B/data.parquet + dummy_value_1=3 + dummy_value_1=3/dummy_value_2=C + dummy_value_1=3/dummy_value_2=C/data.parquet""" + self.assert_equal( + dir_signature, expected, purify_text=True, fuzzy_match=True + ) + # + include_file_content = True + dir_signature = hunitest.get_dir_signature( + test_dir, include_file_content, remove_dir_name=remove_dir_name + ) + self.check_string(dir_signature, purify_text=True, fuzzy_match=True) + + def test_to_partitioned_dataset_wrong_column(self) -> None: + """ + Assert that wrong columns are detected before partitioning. + """ + # Prepare inputs. + test_dir = self.get_scratch_space() + df = self.get_test_data1() + # Run. + partition_cols = ["void_column", "dummy_value_2"] + # Check output. + with self.assertRaises(AssertionError) as cm: + hparque.to_partitioned_parquet(df, partition_cols, test_dir) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + val1=['dummy_value_2', 'void_column'] + issubset + val2=['dummy_value_1', 'dummy_value_2', 'dummy_value_3'] + val1 - val2=['void_column'] + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# TestListAndMergePqFiles +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +@pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", +) +class TestListAndMergePqFiles(hmoto.S3Mock_TestCase): + def generate_test_data(self) -> hs3.AwsProfile: + """ + Upload test daily Parquet files for 3 days to the mocked S3 bucket. + """ + start_date = "2022-02-02" + end_date = "2022-02-04" + assets = ["A", "B", "C", "D", "E", "F"] + asset_col_name = "asset" + test_dir = self.get_scratch_space() + partition_mode = "by_year_month" + custom_partition_cols = "asset,year,month" + hparque.generate_parquet_files( + start_date, + end_date, + assets, + asset_col_name, + test_dir, + partition_mode=partition_mode, + custom_partition_cols=custom_partition_cols, + ) + s3fs_ = hs3.get_s3fs(self.mock_aws_profile) + s3_bucket = f"s3://{self.bucket_name}" + s3fs_.put(test_dir, s3_bucket, recursive=True) + return s3fs_ + + @pytest.mark.slow("~7 seconds.") + def test_list_and_merge_pq_files(self) -> None: + """ + Check if predefined generated Parquet files are properly merged. + """ + s3fs_ = self.generate_test_data() + # Prepare common `hs3.listdir` params. + s3_bucket = f"s3://{self.bucket_name}" + pattern = "*.parquet" + only_files = True + use_relative_paths = True + # Check bucket content before merge. + parquet_path_list_before = hs3.listdir( + s3_bucket, + pattern, + only_files, + use_relative_paths, + aws_profile=s3fs_, + ) + self.assertEqual(len(parquet_path_list_before), 6) + # Add extra parquet files and rename existing one. + # e.g., `dummy.parquet`, `dummy_new.parquet`. + # Every second file is left intact to replicate ready out-of-the-box folder. + # e.g., `asset=A/year=2022/month=2/77a2534aaf9649fab6511cea53a6bf7f-0.parquet`. + for path in parquet_path_list_before[::2]: + original_path = f"{s3_bucket}/{path}" + original_file_name = os.path.basename(original_path) + renamed_path = original_path.replace( + original_file_name, "dummy.parquet" + ) + additional_path = original_path.replace( + original_file_name, "dummy_new.parquet" + ) + s3fs_.rename(original_path, renamed_path) + s3fs_.copy(renamed_path, additional_path) + # Check if edits are in place. + updated_parquet_path_list = hs3.listdir( + s3_bucket, + pattern, + only_files, + use_relative_paths, + aws_profile=s3fs_, + ) + data_parquet_path_list = [ + path for path in updated_parquet_path_list if "dummy" not in path + ] + self.assertEqual(len(updated_parquet_path_list), 9) + self.assertEqual(len(data_parquet_path_list), 3) + # Check bucket content after merge. + hparque.list_and_merge_pq_files(self.bucket_name, aws_profile=s3fs_) + parquet_path_list_after = hs3.listdir( + s3_bucket, + pattern, + only_files, + use_relative_paths, + aws_profile=s3fs_, + ) + parquet_path_list_after.sort() + expected_list = [ + "tmp.scratch/asset=A/year=2022/month=2/data.parquet", + "tmp.scratch/asset=B/year=2022/month=2/data.parquet", + "tmp.scratch/asset=C/year=2022/month=2/data.parquet", + "tmp.scratch/asset=D/year=2022/month=2/data.parquet", + "tmp.scratch/asset=E/year=2022/month=2/data.parquet", + "tmp.scratch/asset=F/year=2022/month=2/data.parquet", + ] + self.assertListEqual(parquet_path_list_after, expected_list) + + def test_list_and_merge_pq_files_duplicate_drop(self) -> None: + # Prepare test data. + test_data = { + "dummy_value_1": [1, 1, 1], + "dummy_value_2": ["A", "A", "A"], + "knowledge_timestamp": [1, 2, 3], + "end_download_timestamp": [3, 2, 1], + } + df = pd.DataFrame(data=test_data) + # Save test data to s3 bucket. + s3fs_ = hs3.get_s3fs(self.mock_aws_profile) + s3_bucket = f"s3://{self.bucket_name}" + original_sample_path = f"{s3_bucket}/dummy/data.parquet" + dummy_sample_path = original_sample_path.replace( + "data.parquet", "dummy.parquet" + ) + hparque.to_parquet(df, dummy_sample_path, aws_profile=s3fs_) + # Check if new columns are in place. + df = hparque.from_parquet(dummy_sample_path, aws_profile=s3fs_) + self.assertIn("knowledge_timestamp", df.columns) + self.assertIn("end_download_timestamp", df.columns) + self.assertEqual(len(df), 3) + # Check if duplicates are dropped after merge. + hparque.list_and_merge_pq_files(self.bucket_name, aws_profile=s3fs_) + df = hparque.from_parquet(original_sample_path, aws_profile=s3fs_) + self.assertEqual(len(df), 1) + + +# ############################################################################# + + +# ############################################################################# +# TestListAndMergePqFilesMixedUnits +# ############################################################################# + + +class TestListAndMergePqFilesMixedUnits(hunitest.TestCase): + def _list_and_merge_mixed_units_pq_files( + self, first_unit: str, second_unit: str + ) -> None: + """ + Run `list_and_merge_pq_files` with different time units in the same + column and index. + + :param first_unit: first time unit. + :param second_unit: second time unit. + """ + # Prepare test data. + dst_dir = os.path.join(self.get_scratch_space(), "tmp.list_and_merge") + first_file_name = os.path.join(dst_dir, "tmp.1first.parquet") + second_file_name = os.path.join(dst_dir, "tmp.2second.parquet") + merged_file_name = os.path.join(dst_dir, "tmp.merged.parquet") + # Write first DF with the `first_unit`. + initial_df = _get_test_df_with_timestamps() + first_df = initial_df.copy() + first_df.index = first_df.index.as_unit(first_unit) + first_df["knowledge_timestamp"] = first_df["knowledge_timestamp"].astype( + f"datetime64[{first_unit}, UTC]" + ) + hparque.to_parquet(first_df, first_file_name) + # Write second DF with the `second_unit`. + second_df = initial_df.copy() + second_df.index = second_df.index.as_unit(second_unit) + second_df["knowledge_timestamp"] = second_df[ + "knowledge_timestamp" + ].astype(f"datetime64[{second_unit}, UTC]") + hparque.to_parquet(second_df, second_file_name) + # List and merge. + hparque.list_and_merge_pq_files(dst_dir, file_name="tmp.merged.parquet") + # Read it back. + _ = hparque.from_parquet(merged_file_name) + + def test_parquet_files_with_mixed_time_units_1(self) -> None: + """ + Test merging Parquet files with the `ns` and `us`. + """ + first_unit = "ns" + second_unit = "us" + self._list_and_merge_mixed_units_pq_files(first_unit, second_unit) + + # TODO(Nina): @Samarth fix the test. + @pytest.mark.skip(reason="Broken.") + def test_parquet_files_with_mixed_time_units_2(self) -> None: + """ + Test merging Parquet files with the `ms` and `ns`. + + It should raise an error. See CmampTask7331 for details. + + The test will not raise an asserion when the time units is `ms` and + `us`. The reason is that we do not lose data when converting from + the first time unit, which is `ms`, to the second time unit, which + is `us`, transitioning from low resolution to high resolution. + """ + first_unit = "us" + second_unit = "ms" + with self.assertRaises(pyarrow.lib.ArrowInvalid): + self._list_and_merge_mixed_units_pq_files(first_unit, second_unit) + + +# ############################################################################# + + +# ############################################################################# +# TestYieldParquetTiles +# ############################################################################# + + +class TestYieldParquetTiles(hunitest.TestCase): + def generate_test_data(self) -> None: + """ + Generate test data and write it to a scratch dir. + + Data has the following structure: + + ``` + asset_id ... year month + end_ts + 2021-11-01 100 2021 11 + 2021-11-01 200 2021 11 + 2021-11-01 300 2021 11 + ... + 2022-02-01 200 2022 2 + 2022-02-01 300 2022 2 + 2022-02-01 400 2022 2 + ``` + """ + # Generate synthetic data. + asset_ids = [100, 200, 300, 400] + prices = list(range(1, 17)) + volatility = list(range(17, 33)) + dates = ["2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01"] + dates = map(pd.Timestamp, dates) + index_ = [dates, asset_ids] + multi_index = pd.MultiIndex.from_product( + index_, names=["end_ts", "asset_id"] + ) + df = pd.DataFrame( + {"price": prices, "volatility": volatility}, index=multi_index + ) + df["year"] = df.index.get_level_values(0).year + df["month"] = df.index.get_level_values(0).month + df = df.reset_index(level=1) + _LOG.debug("Test data: df=\n%s", hpandas.df_to_str(df)) + # Write the data to a scratch dir. + partition_columns = ["asset_id", "year", "month"] + dst_dir = self.get_scratch_space() + hparque.to_partitioned_parquet(df, partition_columns, dst_dir) + + def test_yield_tiles_by_asset(self) -> None: + """ + Test reading only certain asset ids. + """ + self.generate_test_data() + # Read data. + file_name = self.get_scratch_space() + asset_ids = [100, 200] + asset_id_col = "asset_id" + asset_batch_size = 1 + columns = [asset_id_col, "price"] + generator_ = hparque.yield_parquet_tiles_by_assets( + file_name, asset_ids, asset_id_col, asset_batch_size, columns + ) + df = pd.concat(generator_) + _LOG.debug("Filtered data: df=\n%s", hpandas.df_to_str(df)) + # Check asset ids filtering. + actual = str(asset_ids) + expected = str(df[asset_id_col].unique().tolist()) + self.assert_equal(actual, expected) + + def test_yield_tiles_by_year(self) -> None: + """ + Test reading only certain asset ids and dates. + """ + self.generate_test_data() + # Read data. + file_name = self.get_scratch_space() + start_year = 2021 + start_month = 12 + start_date = datetime.date(start_year, start_month, 1) + end_year = 2022 + end_month = 1 + end_date = datetime.date(end_year, end_month, 2) + asset_ids = [300, 400] + asset_id_col = "asset_id" + columns = [asset_id_col, "price"] + generator_ = hparque.yield_parquet_tiles_by_year( + file_name, + start_date, + end_date, + columns, + asset_ids=asset_ids, + asset_id_col=asset_id_col, + ) + df = pd.concat(generator_) + _LOG.debug("Filtered data: df=\n%s", hpandas.df_to_str(df)) + # Check asset ids filtering. + actual = str(asset_ids) + expected = str(df[asset_id_col].unique().tolist()) + self.assert_equal(actual, expected) + # Check start date filtering. + min_date = df.index.min() + self.assertEqual(min_date.month, start_month) + self.assertEqual(min_date.year, start_year) + # Check end date filtering. + max_date = df.index.max() + self.assertEqual(max_date.month, end_month) + self.assertEqual(max_date.year, end_year) + + +# ############################################################################# + + +# ############################################################################# +# TestBuildFilterWithOnlyEqualities +# ############################################################################# + + +class TestBuildFilterWithOnlyEqualities(hunitest.TestCase): + def test_year_month_day_equality(self) -> None: + """ + Test interval with same year, month and day. + """ + start_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-02 21:31:00+00:00") + filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) + actual = str(filters) + expected = ( + r"[('year', '==', 2022), ('month', '==', 12), ('day', '==', 2)]" + ) + self.assert_equal(actual, expected) + + def test_year_month_equality(self) -> None: + """ + Test interval with same year and month. + """ + start_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-28 21:31:00+00:00") + filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) + actual = str(filters) + expected = r"[('year', '==', 2022), ('month', '==', 12)]" + self.assert_equal(actual, expected) + + def test_year_equality(self) -> None: + """ + Test interval with same year. + """ + start_ts = pd.Timestamp("2022-10-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-12-02 21:31:00+00:00") + filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) + actual = str(filters) + expected = r"[('year', '==', 2022)]" + self.assert_equal(actual, expected) + + def test_no_equality(self) -> None: + """ + Test interval with different start and end years. + """ + start_ts = pd.Timestamp("2021-10-02 09:31:00+00:00") + end_ts = pd.Timestamp("2022-10-02 21:31:00+00:00") + filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) + actual = str(filters) + expected = r"[]" + self.assert_equal(actual, expected) + + +# ############################################################################# + + +# ############################################################################# +# TestPartitionedParquet2 +# ############################################################################# + + +class TestPartitionedParquet2(hunitest.TestCase): + """ + Test case for writing and reading partitioned Parquet datasets with mixed + timestamp formats. + """ + + def _get_test_df(self) -> pd.DataFrame: + """ + Create a DataFrame with timestamps. + """ + # Mock the get_current_time method. + timestamp = pd.Timestamp("2024-05-20 00:00:00", tz="UTC") + index = [timestamp for _ in range(4)] + df = pd.DataFrame( + { + "bids": [200, 123, 263, 167], + "asks": [150, 120, 240, 150], + "symbol": ["BTC_USDT" for _ in range(4)], + }, + index=index, + ) + end_download_timestamp = "2024-06-04 20:38:43.467599+00:00" + df["end_download_timestamp"] = end_download_timestamp + return df + + def _run_write_and_read_mixed_timestamp_partitioned_dataset(self) -> None: + """ + Write two DataFrames with different timestamp formats to a partitioned + Parquet dataset and read it back. + """ + initial_df = self._get_test_df() + partition_columns = ["bids", "asks", "symbol"] + dst_dir = os.path.join(self.get_scratch_space(), "tmp.pp_mixed_units") + # Write first DF as partitioned parquet. + first_df = initial_df.copy() + hparque.to_partitioned_parquet(first_df, partition_columns, dst_dir) + # Write second DF as partitioned parquet. + second_df = initial_df.copy() + second_df["end_download_timestamp"] = pd.to_datetime( + second_df["end_download_timestamp"] + ) + hparque.to_partitioned_parquet(second_df, partition_columns, dst_dir) + # Read it back and verify the output. + combined_df = hparque.from_parquet(dst_dir) + combined_df["end_download_timestamp"] = pd.to_datetime( + combined_df["end_download_timestamp"] + ).dt.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + actual = hpandas.df_to_str(combined_df) + expected = r""" + end_download_timestamp bids asks symbol + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 123 120 BTC_USDT + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 123 120 BTC_USDT + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 167 150 BTC_USDT + ... + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 200 150 BTC_USDT + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 263 240 BTC_USDT + 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 263 240 BTC_USDT + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Test writing and reading a partitioned Parquet dataset with mixed + timestamp formats. + """ + self._run_write_and_read_mixed_timestamp_partitioned_dataset() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py new file mode 100644 index 000000000..8e65eeb2e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py @@ -0,0 +1,398 @@ +import argparse +import os + +import helpers.hio as hio +import helpers.hparser as hparser +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestParseLimitRange +# ############################################################################# + + +class TestParseLimitRange(hunitest.TestCase): + def test_parse_limit_range_valid1(self) -> None: + """ + Test parsing valid range format. + """ + limit_str = "1:5" + expected = (1, 5) + actual = hparser.parse_limit_range(limit_str) + self.assertEqual(actual, expected) + + def test_parse_limit_range_valid2(self) -> None: + """ + Test parsing valid range format with same start and end. + """ + limit_str = "3:3" + expected = (3, 3) + actual = hparser.parse_limit_range(limit_str) + self.assertEqual(actual, expected) + + def test_parse_limit_range_valid3(self) -> None: + """ + Test parsing valid range format with larger numbers. + """ + limit_str = "10:100" + expected = (10, 100) + actual = hparser.parse_limit_range(limit_str) + self.assertEqual(actual, expected) + + def test_parse_limit_range_no_colon(self) -> None: + """ + Test that missing colon raises assertion error. + """ + limit_str = "15" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_multiple_colons(self) -> None: + """ + Test that multiple colons raise assertion error. + """ + limit_str = "1:2:3" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_invalid_start(self) -> None: + """ + Test that non-integer start raises fatal error. + """ + limit_str = "abc:5" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_invalid_end(self) -> None: + """ + Test that non-integer end raises fatal error. + """ + limit_str = "1:xyz" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_start_zero(self) -> None: + """ + Test that start index of 0 raises assertion error. + """ + limit_str = "0:5" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_end_zero(self) -> None: + """ + Test that end index of 0 raises assertion error. + """ + limit_str = "1:0" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + def test_parse_limit_range_start_greater_than_end(self) -> None: + """ + Test that start greater than end raises assertion error. + """ + limit_str = "5:3" + with self.assertRaises(AssertionError): + hparser.parse_limit_range(limit_str) + + +# ############################################################################# +# TestApplyLimitRange +# ############################################################################# + + +class TestApplyLimitRange(hunitest.TestCase): + def test_apply_limit_range_no_limit(self) -> None: + """ + Test that None limit range returns original items. + """ + items = ["a", "b", "c", "d", "e"] + actual = hparser.apply_limit_range(items, None) + self.assertEqual(actual, items) + + def test_apply_limit_range_valid_range(self) -> None: + """ + Test applying valid range to items. + """ + items = ["a", "b", "c", "d", "e"] + limit_range = (1, 3) + expected = ["b", "c", "d"] # 0-indexed, inclusive + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + def test_apply_limit_range_single_item(self) -> None: + """ + Test applying range that selects single item. + """ + items = ["a", "b", "c", "d", "e"] + limit_range = (2, 2) + expected = ["c"] + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + def test_apply_limit_range_first_item(self) -> None: + """ + Test applying range starting from first item. + """ + items = ["a", "b", "c", "d", "e"] + limit_range = (0, 1) + expected = ["a", "b"] + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + def test_apply_limit_range_last_item(self) -> None: + """ + Test applying range ending at last item. + """ + items = ["a", "b", "c", "d", "e"] + limit_range = (3, 4) + expected = ["d", "e"] + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + def test_apply_limit_range_start_exceeds_length(self) -> None: + """ + Test that start index exceeding items length raises assertion error. + """ + items = ["a", "b", "c"] + limit_range = (5, 6) + with self.assertRaises(AssertionError): + hparser.apply_limit_range(items, limit_range) + + def test_apply_limit_range_end_exceeds_length(self) -> None: + """ + Test that end index exceeding items length raises assertion error. + """ + items = ["a", "b", "c"] + limit_range = (1, 5) + with self.assertRaises(AssertionError): + hparser.apply_limit_range(items, limit_range) + + def test_apply_limit_range_custom_item_name(self) -> None: + """ + Test that custom item name doesn't affect functionality. + """ + items = [1, 2, 3, 4, 5] + limit_range = (0, 2) + expected = [1, 2, 3] + actual = hparser.apply_limit_range( + items, limit_range, item_name="numbers" + ) + self.assertEqual(actual, expected) + + def test_apply_limit_range_empty_list(self) -> None: + """ + Test applying limit range to empty list. + """ + items = [] + limit_range = (0, 1) + with self.assertRaises(AssertionError): + hparser.apply_limit_range(items, limit_range) + + def test_apply_limit_range_complex_objects(self) -> None: + """ + Test applying limit range to complex objects. + """ + items = [{"id": i, "value": f"item{i}"} for i in range(10)] + limit_range = (2, 4) + expected = [ + {"id": 2, "value": "item2"}, + {"id": 3, "value": "item3"}, + {"id": 4, "value": "item4"}, + ] + actual = hparser.apply_limit_range(items, limit_range) + self.assertEqual(actual, expected) + + +# ############################################################################# +# Test_add_multi_file_args +# ############################################################################# + + +class Test_add_multi_file_args(hunitest.TestCase): + def test_adds_correct_arguments(self) -> None: + """ + Test that add_multi_file_args adds the correct arguments to parser. + """ + # Prepare inputs. + parser = argparse.ArgumentParser() + # Run function. + hparser.add_multi_file_args(parser) + # Check that the arguments were added. + namespace = parser.parse_args([]) + self.assertTrue(hasattr(namespace, "files")) + self.assertTrue(hasattr(namespace, "from_files")) + self.assertTrue(hasattr(namespace, "input")) + + +# ############################################################################# +# Test_parse_multi_file_args +# ############################################################################# + + +class Test_parse_multi_file_args(hunitest.TestCase): + # Helper method. + def _create_test_file(self, file_path: str, content: str = "test") -> None: + """ + Create a test file with given content. + """ + hio.create_dir(os.path.dirname(file_path), incremental=True) + hio.to_file(file_path, content) + + def test_files_comma_separated(self) -> None: + """ + Test parsing comma-separated file list. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + file1 = f"{scratch_dir}/file1.txt" + file2 = f"{scratch_dir}/file2.txt" + file3 = f"{scratch_dir}/file3.txt" + self._create_test_file(file1) + self._create_test_file(file2) + self._create_test_file(file3) + # Create namespace with files argument. + args = argparse.Namespace() + args.files = f"{file1},{file2},{file3}" + args.from_files = None + args.input = None + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1, file2, file3] + self.assert_equal(str(actual), str(expected)) + + def test_from_files(self) -> None: + """ + Test parsing file containing list of files. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + file1 = f"{scratch_dir}/file1.txt" + file2 = f"{scratch_dir}/file2.txt" + file3 = f"{scratch_dir}/file3.txt" + self._create_test_file(file1) + self._create_test_file(file2) + self._create_test_file(file3) + # Create file list. + list_file = f"{scratch_dir}/list.txt" + content = f"{file1}\n{file2}\n{file3}\n" + self._create_test_file(list_file, content) + # Create namespace with from_files argument. + args = argparse.Namespace() + args.files = None + args.from_files = list_file + args.input = None + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1, file2, file3] + self.assert_equal(str(actual), str(expected)) + + def test_from_files_with_empty_lines(self) -> None: + """ + Test parsing file with empty lines and comments. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + file1 = f"{scratch_dir}/file1.txt" + file2 = f"{scratch_dir}/file2.txt" + self._create_test_file(file1) + self._create_test_file(file2) + # Create file list with empty lines and comments. + list_file = f"{scratch_dir}/list.txt" + content = f""" + # This is a comment + {file1} + + # Another comment + {file2} + + """ + self._create_test_file(list_file, content) + # Create namespace with from_files argument. + args = argparse.Namespace() + args.files = None + args.from_files = list_file + args.input = None + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1, file2] + self.assert_equal(str(actual), str(expected)) + + def test_input_multiple(self) -> None: + """ + Test parsing repeated --input arguments. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test files. + file1 = f"{scratch_dir}/file1.txt" + file2 = f"{scratch_dir}/file2.txt" + self._create_test_file(file1) + self._create_test_file(file2) + # Create namespace with input argument. + args = argparse.Namespace() + args.files = None + args.from_files = None + args.input = [file1, file2] + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1, file2] + self.assert_equal(str(actual), str(expected)) + + def test_backward_compatibility_single_file(self) -> None: + """ + Test that single -i/--input still works. + """ + # Prepare inputs. + scratch_dir = self.get_scratch_space() + # Create test file. + file1 = f"{scratch_dir}/file1.txt" + self._create_test_file(file1) + # Create namespace with input argument (single file, not list). + args = argparse.Namespace() + args.files = None + args.from_files = None + args.input = file1 # Single file as string, not list + # Run function. + actual = hparser.parse_multi_file_args(args) + # Check outputs. + expected = [file1] + self.assert_equal(str(actual), str(expected)) + + def test_file_validation(self) -> None: + """ + Test that non-existent files raise error. + """ + # Create namespace with non-existent file. + args = argparse.Namespace() + args.files = "/nonexistent/file1.txt,/nonexistent/file2.txt" + args.from_files = None + args.input = None + # Run function and check that it raises error. + with self.assertRaises(AssertionError): + hparser.parse_multi_file_args(args) + + def test_empty_file_list(self) -> None: + """ + Test empty file list handling. + """ + # Prepare inputs. + + # Create namespace with no files. + args = argparse.Namespace() + args.files = None + args.from_files = None + args.input = None + # Run function and check that it raises error. + with self.assertRaises(AssertionError) as cm: + hparser.parse_multi_file_args(args) + # Check the error message. + act = str(cm.exception) + self.assertIn("No input files specified", act) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py new file mode 100644 index 000000000..8064ddbe1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py @@ -0,0 +1,97 @@ +import logging + +import helpers.hpickle as hpickle +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestToPickleable +# ############################################################################# + + +class TestToPickleable(hunitest.TestCase): + def test_list1(self) -> None: + """ + Test that a list is converted to a pickleable correctly. + + force_values_to_string = False + """ + _obj = [1, "2", [3, 0.4], (5, None)] + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = [1, "2", [3, 0.4], (5, None)] + self.assertEqual(actual, expected) + + def test_list2(self) -> None: + """ + Test that a list is converted to a pickleable correctly. + + force_values_to_string = True + """ + _obj = [1, "2", [3, 0.4], (5, None)] + force_values_to_string = True + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = ["1", "2", ["3", "0.4"], ("5", "None")] + self.assertEqual(actual, expected) + + def test_tuple1(self) -> None: + """ + Test that a tuple is converted to a pickleable correctly. + + force_values_to_string = False + """ + _obj = (1, "2", [3, 0.4], (5, None)) + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = (1, "2", [3, 0.4], (5, None)) + self.assertEqual(actual, expected) + + def test_dict1(self) -> None: + """ + Test that a dict is converted to a pickleable correctly. + + force_values_to_string = False + """ + _obj = {"a": 1, 2: ["b", 3], "c": {0.4: None}} + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = {"a": 1, 2: ["b", 3], "c": {0.4: None}} + self.assertEqual(actual, expected) + + def test_iterable1(self) -> None: + """ + Test that an iterable is converted to a pickleable correctly. + + force_values_to_string = False + """ + _obj = {1, 2, 3} + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = [1, 2, 3] + self.assertEqual(actual, expected) + + def test_unpickleable1(self) -> None: + """ + Test that an unpickleable object is converted to a string. + + force_values_to_string = False + """ + _obj = lambda x: x + force_values_to_string = False + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = ". at 0x>" + self.assert_equal(actual, expected, purify_text=True) + + def test_unpickleable2(self) -> None: + """ + Test that an unpickleable object is converted to a string. + + force_values_to_string = True + """ + _obj = lambda x: x + force_values_to_string = True + actual = hpickle.to_pickleable(_obj, force_values_to_string) + expected = ". at 0x>" + self.assert_equal(actual, expected, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py new file mode 100644 index 000000000..a829ea82f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py @@ -0,0 +1,506 @@ +import datetime +import logging +import os +from typing import Any, Optional + +import pandas as pd +import pytest + +import config_root.config as cconfig +import helpers.hio as hio +import helpers.hplayback as hplayba +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestJsonRoundtrip1 +# ############################################################################# + + +class TestJsonRoundtrip1(hunitest.TestCase): + """ + Test roundtrip conversion through jsonpickle for different types. + """ + + def test1(self) -> None: + obj = 3 + # + hplayba.round_trip_convert(obj, logging.DEBUG) + + def test2(self) -> None: + obj = "hello" + # + hplayba.round_trip_convert(obj, logging.DEBUG) + + def test3(self) -> None: + data = { + "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], + "Price": [700, 250, 800, 1200], + } + df = pd.DataFrame(data, columns=["Product", "Price"]) + df.index.name = "hello" + # + obj = df + hplayba.round_trip_convert(obj, logging.DEBUG) + + def test4(self) -> None: + obj = datetime.date(2015, 1, 1) + # + hplayba.round_trip_convert(obj, logging.DEBUG) + + +# ############################################################################# +# TestPlaybackInputOutput1 +# ############################################################################# + + +class TestPlaybackInputOutput1(hunitest.TestCase): + """ + Freeze the output of Playback. + """ + + def helper(self, mode: str, *args: Any, **kwargs: Any) -> None: + # TODO(gp): Factor out the common code. + # Define a function to generate a unit test for. + def get_result_assert_equal(a: Any, b: Any) -> Any: + p = hplayba.Playback("assert_equal") + if isinstance(a, datetime.date) and isinstance(b, datetime.date): + return p.run(abs(a - b)) + if isinstance(a, dict) and isinstance(b, dict): + c = {} + c.update(a) + c.update(b) + return p.run(c) + if isinstance(a, cconfig.Config) and isinstance(b, cconfig.Config): + c = cconfig.Config(update_mode="overwrite") + c.update(a) + c.update(b) + return p.run(c) + return p.run(a + b) + + def get_result_check_string(a: Any, b: Any) -> Any: + p = hplayba.Playback("check_string") + if isinstance(a, datetime.date) and isinstance(b, datetime.date): + return p.run(abs(a - b)) + if isinstance(a, dict) and isinstance(b, dict): + c = {} + c.update(a) + c.update(b) + return p.run(c) + if isinstance(a, cconfig.Config) and isinstance(b, cconfig.Config): + c = cconfig.Config(update_mode="overwrite") + c.update(a) + c.update(b) + return p.run(c) + return p.run(a + b) + + def get_result_assert_equal_none() -> Any: + p = hplayba.Playback("assert_equal") + return p.run("Some string.") + + def get_result_check_string_none() -> Any: + p = hplayba.Playback("check_string") + return p.run("Some string") + + if mode == "assert_equal": + if not args and not kwargs: + code = get_result_assert_equal_none() + else: + code = get_result_assert_equal(*args, **kwargs) + elif mode == "check_string": + if not args and not kwargs: + code = get_result_check_string_none() + else: + code = get_result_check_string(*args, **kwargs) + else: + raise ValueError("Invalid mode ") + self.check_string(code, purify_text=True) + _LOG.debug("Testing code:\n%s", code) + exec(code, locals()) # pylint: disable=exec-used + + def test1(self) -> None: + """ + Test for int inputs. + """ + # Create inputs. + a = 3 + b = 2 + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test2(self) -> None: + """ + Test for string inputs. + """ + # Create inputs. + a = "test" + b = "case" + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test3(self) -> None: + """ + Test for list inputs. + """ + # Create inputs. + a = [1, 2, 3] + b = [4, 5, 6] + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test4(self) -> None: + """ + Test for dict inputs. + """ + # Create inputs. + a = {"1": 2} + b = {"3": 4} + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test5(self) -> None: + """ + Test for pd.DataFrame inputs. + """ + # Create inputs. + a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) + b = pd.DataFrame({"Price": [1, 1, 1, 1]}) + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test6(self) -> None: + """ + Test for datetime.date inputs (using `jsonpickle`). + """ + # Create inputs. + a = datetime.date(2015, 1, 1) + b = datetime.date(2012, 1, 1) + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test7(self) -> None: + """ + Test for int inputs with check_string. + """ + # Create inputs. + a = 3 + b = 2 + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test8(self) -> None: + """ + Test for string inputs with check_string. + """ + # Create inputs. + a = "test" + b = "case" + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test9(self) -> None: + """ + Test for list inputs with check_string. + """ + # Create inputs. + a = [1, 2, 3] + b = [4, 5, 6] + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test10(self) -> None: + """ + Test for dict inputs with check_string. + """ + # Create inputs. + a = {"1": 2} + b = {"3": 4} + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test11(self) -> None: + """ + Test for pd.DataFrame inputs with check_string. + """ + # Create inputs. + a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) + b = pd.DataFrame({"Price": [1, 1, 1, 1]}) + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test12(self) -> None: + """ + Test for dict inputs with data structures recursion. + """ + # Create inputs. + a = {"1": ["a", 2]} + b = {"3": pd.DataFrame({"Price": [700, 250, 800, 1200]}), "4": {"5": 6}} + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test13(self) -> None: + """ + Test for pd.Series inputs with check_string. + """ + # Create inputs. + a = pd.Series([10, 20, 15], name="N Numbers") + b = pd.Series([10.0, 0.0, 5.5], name="Z Numbers") + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test14(self) -> None: + """ + Test for pd.Series inputs with assert_equal. + """ + # Create inputs. + a = pd.Series([10, 20, 15], name="N Numbers") + b = pd.Series([10.0, 0.0, 5.5], name="Z Numbers") + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test15(self) -> None: + """ + Test for cconfig.Config inputs with check_string. + """ + # Create inputs. + a = cconfig.Config([("meta", "meta value 1"), ("list", [1, 2])]) + b = cconfig.Config([("meta", "meta value 2")]) + # Generate, freeze and execute a unit test. + self.helper("check_string", a=a, b=b) + + def test16(self) -> None: + """ + Test for cconfig.Config inputs with assert_equal. + """ + # Create inputs. + a = cconfig.Config([("meta", "meta value 1"), ("list", [1, 2])]) + b = cconfig.Config([("meta", "meta value 2")]) + # Generate, freeze and execute a unit test. + self.helper("assert_equal", a=a, b=b) + + def test17(self) -> None: + """ + Test if testing function has no args with check_string. + """ + self.helper("check_string") + + def test18(self) -> None: + """ + Test if testing function has no args with assert_equal. + """ + self.helper("assert_equal") + + +# ############################################################################# +# TestToPythonCode1 +# ############################################################################# + + +class TestToPythonCode1(hunitest.TestCase): + """ + Test to_python_code() for different types. + """ + + def _check(self, input_obj: Any, expected: str) -> None: + res = hplayba.to_python_code(input_obj) + self.assert_equal(res, expected) + + def test_float1(self) -> None: + """ + Test float without first zero. + """ + self._check(0.1, "0.1") + + def test_float2(self) -> None: + """ + Test positive float. + """ + self._check(1.0, "1.0") + + def test_float3(self) -> None: + """ + Test negative float. + """ + self._check(-1.1, "-1.1") + + def test_int1(self) -> None: + """ + Test zero. + """ + self._check(0, "0") + + def test_int2(self) -> None: + """ + Test positive int. + """ + self._check(10, "10") + + def test_int3(self) -> None: + """ + Test negative int. + """ + self._check(-10, "-10") + + def test_str1(self) -> None: + """ + Test str simple. + """ + self._check("a", '"a"') + + def test_str2(self) -> None: + """ + Test str with double quotes. + """ + self._check('"b"', '"\\"b\\""') + + def test_str3(self) -> None: + """ + Test str with single quotes. + """ + self._check("'c'", "\"'c'\"") + + def test_list1(self) -> None: + """ + Test List. + """ + self._check([1, 0.2, "3"], '[1, 0.2, "3"]') + + def test_dict1(self) -> None: + """ + Test Dist. + """ + self._check({"a": 0.2, 3: "b"}, '{"a": 0.2, 3: "b"}') + + def test_df1(self) -> None: + """ + Test pd.DataFrame (single quotes expected in field names) + """ + self._check( + pd.DataFrame.from_dict({"a": [0.2, 0.1]}), + "pd.DataFrame.from_dict({'a': [0.2, 0.1]})", + ) + + def test_dataseries1(self) -> None: + """ + Test pd.Series. + """ + self._check( + pd.Series([0.2, 0.1], name="a"), + "pd.Series(data=[0.2, 0.1], index=RangeIndex(start=0, stop=2, step=1), " + 'name="a", dtype=float64)', + ) + + def test_config1(self) -> None: + """ + Test cconfig.Config. + """ + config = cconfig.Config() + config["var1"] = "val1" + config["var2"] = cconfig.Config([("var3", 10), ("var4", "val4")]) + self._check( + config, + "cconfig.Config.from_python(\"Config({'var1': 'val1', " + "'var2': Config({'var3': 10, 'var4': 'val4'})})\")", + ) + + +# ############################################################################# +# TestPlaybackFilePath1 +# ############################################################################# + + +class TestPlaybackFilePath1(hunitest.TestCase): + """ + Test file mode correctness. + """ + + def test1(self) -> None: + """ + Test writing to file when number of tests is more than generated (10). + """ + test_file = hplayba.Playback._get_test_file_name( + "./path/to/somewhere.py" + ) + self.assert_equal( + test_file, "./path/to/test/test_by_playback_somewhere.py" + ) + + +# ############################################################################# +# TestPlaybackFileMode1 +# ############################################################################# + + +class TestPlaybackFileMode1(hunitest.TestCase): + """ + Test file mode correctness. + """ + + def get_code(self, max_tests: Optional[int] = None) -> str: + """ + Return a code for executable file to run. + """ + max_tests_str = "" if max_tests is None else f", max_tests={max_tests}" + code = ( + "\n".join( + [ + "import helpers.hplayback as hplayba", + "def plbck_sum(a: int, b: int) -> int:", + ' hplayba.Playback("check_string", to_file=True%s).run(None)', + " return a + b", + "", + "[plbck_sum(i, i + 1) for i in range(4)]", + ] + ) + % max_tests_str + ) + return code + + def helper(self, max_tests: Optional[int] = None) -> Any: + """ + Return generated by playback code. + """ + # Get file paths. + tmp_dir = self.get_scratch_space() + # File with code. + code_basename = "code_.py" + tmp_py_file = os.path.join(tmp_dir, code_basename) + # File with test. + tmp_test_file = os.path.join( + tmp_dir, "test", "test_by_playback_" + code_basename + ) + # Save the code to the file. + hio.to_file(tmp_py_file, self.get_code(max_tests)) + # Executes the code. + hsystem.system(f"python {tmp_py_file}") + playback_code = hio.from_file(tmp_test_file) + return playback_code + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~10 seconds.") + def test1(self) -> None: + """ + Test writing to file when number of tests is more than generated. + """ + max_tests = 100 + self.check_string(self.helper(max_tests)) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~10 seconds.") + def test2(self) -> None: + """ + Test writing to file when number of tests is default. + """ + self.check_string(self.helper()) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~10 seconds.") + def test3(self) -> None: + """ + Test writing to file when number of tests is lower than generated. + """ + max_tests = 2 + self.check_string(self.helper(max_tests)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py new file mode 100644 index 000000000..395138e7a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py @@ -0,0 +1,844 @@ +import logging +import pprint +from typing import List + +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_printing1 +# ############################################################################# + + +class Test_printing1(hunitest.TestCase): + def test_color_highlight1(self) -> None: + for c in hprint._COLOR_MAP: + _LOG.debug(hprint.color_highlight(c, c)) + + +# ############################################################################# +# Test_to_str1 +# ############################################################################# + + +class Test_to_str1(hunitest.TestCase): + def test1(self) -> None: + x = 1 + # To disable linter complaints. + _ = x + actual = hprint.to_str("x") + expected = "x=1" + self.assertEqual(actual, expected) + + def test2(self) -> None: + x = "hello world" + # To disable linter complaints. + _ = x + actual = hprint.to_str("x") + expected = "x='hello world'" + self.assertEqual(actual, expected) + + def test3(self) -> None: + x = 2 + # To disable linter complaints. + _ = x + actual = hprint.to_str("x*2") + expected = "x*2=4" + self.assertEqual(actual, expected) + + def test4(self) -> None: + """ + Test printing multiple values separated by space. + """ + x = 1 + y = "hello" + # To disable linter complaints. + _ = x, y + actual = hprint.to_str("x y") + expected = "x=1, y='hello'" + self.assertEqual(actual, expected) + + def test5(self) -> None: + """ + Test printing multiple strings separated by space. + """ + x = "1" + y = "hello" + # To disable linter complaints. + _ = x, y + actual = hprint.to_str("x y") + expected = "x='1', y='hello'" + self.assertEqual(actual, expected) + + def test6(self) -> None: + """ + Test printing a list. + """ + x = [1, "hello", "world"] + # To disable linter complaints. + _ = x + actual = hprint.to_str("x") + expected = "x=[1, 'hello', 'world']" + self.assertEqual(actual, expected) + + +# ############################################################################# + + +def example_func1(x: int, y: str) -> str: + _ = x, y + ret = hprint.func_signature_to_str() + return ret # type: ignore[no-any-return] + + +def example_func2() -> str: + ret = hprint.func_signature_to_str() + return ret # type: ignore[no-any-return] + + +def example_func3(x: int, y: str) -> str: + _ = x, y + ret = hprint.func_signature_to_str("y") + return ret # type: ignore[no-any-return] + + +def example_func4(x: int, y: str, z: float) -> str: + _ = x, y, z + ret = hprint.func_signature_to_str("x z") + return ret # type: ignore[no-any-return] + + +def example_func5(x: int, y: str, z: float) -> str: + _ = x, y, z + ret = hprint.func_signature_to_str(["y", "z"]) + return ret # type: ignore[no-any-return] + + +# ############################################################################# +# Test_func_signature_to_str1 +# ############################################################################# + + +class Test_func_signature_to_str1(hunitest.TestCase): + def test1(self) -> None: + actual = example_func1(1, "hello") + expected = "# example_func1: x=1, y='hello'" + self.assert_equal(actual, expected) + + def test2(self) -> None: + actual = example_func2() + expected = "# example_func2:" + self.assert_equal(actual, expected) + + def test3(self) -> None: + actual = example_func3(1, "hello") + expected = "# example_func3: x=1" + self.assert_equal(actual, expected) + + def test4(self) -> None: + actual = example_func4(1, "hello", 3.14) + expected = "# example_func4: y='hello'" + self.assert_equal(actual, expected) + + def test5(self) -> None: + actual = example_func5(1, "hello", 3.14) + expected = "# example_func5: x=1" + self.assert_equal(actual, expected) + + +# ############################################################################# +# Test_log +# ############################################################################# + + +class Test_log(hunitest.TestCase): + def test2(self) -> None: + x = 1 + # To disable linter complaints. + _ = x + for verb in [logging.DEBUG, logging.INFO]: + hprint.log(_LOG, verb, "x") + + def test3(self) -> None: + x = 1 + y = "hello" + # To disable linter complaints. + _ = x, y + for verb in [logging.DEBUG, logging.INFO]: + hprint.log(_LOG, verb, "x y") + + def test4(self) -> None: + """ + The command: + + > pytest -k Test_log::test4 -o log_cli=true --dbg_verbosity DEBUG + + should print something like: + + DEBUG test_printing:printing.py:315 x=1, y='hello', z=['cruel', 'world'] + INFO test_printing:printing.py:315 x=1, y='hello', z=['cruel', 'world'] + """ + x = 1 + y = "hello" + z = ["cruel", "world"] + # To disable linter complaints. + _ = x, y, z + for verb in [logging.DEBUG, logging.INFO]: + hprint.log(_LOG, verb, "x y z") + + +# ############################################################################# +# Test_sort_dictionary +# ############################################################################# + + +class Test_sort_dictionary(hunitest.TestCase): + def test1(self) -> None: + dict_ = { + "tool": { + "poetry": { + "name": "lm", + "version": "0.1.0", + "description": "", + "authors": [""], + "dependencies": { + "awscli": "*", + "boto3": "*", + "flaky": "*", + "fsspec": "*", + "gluonts": "*", + "invoke": "*", + "jupyter": "*", + "matplotlib": "*", + "mxnet": "*", + "networkx": "*", + "pandas": "^1.1.0", + "psycopg2": "*", + "pyarrow": "*", + "pytest": "^6.0.0", + "pytest-cov": "*", + "pytest-instafail": "*", + "pytest-xdist": "*", + "python": "^3.7", + "pywavelets": "*", + "s3fs": "*", + "seaborn": "*", + "sklearn": "*", + "statsmodels": "*", + "bs4": "*", + "jsonpickle": "*", + "lxml": "*", + "tqdm": "*", + "requests": "*", + }, + "dev-dependencies": {}, + } + }, + "build-system": { + "requires": ["poetry>=0.12"], + "build-backend": "poetry.masonry.api", + }, + } + actual = hprint.sort_dictionary(dict_) + self.check_string(pprint.pformat(actual)) + + +# ############################################################################# +# Test_indent1 +# ############################################################################# + + +class Test_indent1(hunitest.TestCase): + def test1(self) -> None: + txt = """foo + +klass TestHelloWorld(hunitest.TestCase): + bar +""" + num_spaces = 2 + actual = hprint.indent(txt, num_spaces=num_spaces) + expected = """ foo + + klass TestHelloWorld(hunitest.TestCase): + bar +""" + self.assert_equal(actual, expected, fuzzy_match=False) + + +# ############################################################################# +# Test_dedent1 +# ############################################################################# + + +class Test_dedent1(hunitest.TestCase): + def test1(self) -> None: + txt = """ + foo + + klass TestHelloWorld(hunitest.TestCase): + bar +""" + actual = hprint.dedent(txt) + expected = """foo + +klass TestHelloWorld(hunitest.TestCase): + bar""" + self.assert_equal(actual, expected, fuzzy_match=False) + + def test2(self) -> None: + txt = r""" + read_data: + file_name: foo_bar.txt + nrows: 999 + single_val: hello + zscore: + style: gaz + com: 28""" + actual = hprint.dedent(txt) + expected = """read_data: + file_name: foo_bar.txt + nrows: 999 +single_val: hello +zscore: + style: gaz + com: 28""" + self.assert_equal(actual, expected, fuzzy_match=False) + + def test_roundtrip1(self) -> None: + """ + Verify that `indent` and `dedent` are inverse of each other. + """ + txt1 = """foo + + +# ############################################################################# +# TestHelloWorld +# ############################################################################# + + +class TestHelloWorld(hunitest.TestCase): + bar""" + num_spaces = 3 + txt2 = hprint.indent(txt1, num_spaces=num_spaces) + txt3 = hprint.dedent(txt2) + self.assert_equal(txt1, txt3, fuzzy_match=False) + + +# ############################################################################# +# Test_align_on_left1 +# ############################################################################# + + +class Test_align_on_left1(hunitest.TestCase): + def test1(self) -> None: + txt = """foo + +klass TestHelloWorld(hunitest.TestCase): + bar +""" + actual = hprint.align_on_left(txt) + expected = """foo + +klass TestHelloWorld(hunitest.TestCase): +bar +""" + self.assert_equal(actual, expected, fuzzy_match=False) + + +# ############################################################################# +# Test_logging1 +# ############################################################################# + + +class Test_logging1(hunitest.TestCase): + def test_log_frame1(self) -> None: + hprint.log_frame(_LOG, "%s %s", "hello", "world") + + def test_log_frame2(self) -> None: + hprint.log_frame(_LOG, "%s", "hello", level=1) + + def test_log_frame3(self) -> None: + hprint.log_frame(_LOG, "%s", "hello", level=2, verbosity=logging.INFO) + + +# ############################################################################# +# Test_remove_lead_trail_empty_lines1 +# ############################################################################# + + +class Test_remove_lead_trail_empty_lines1(hunitest.TestCase): + def helper(self, input_str: str, expected_output: List[str]) -> None: + """ + Test the `remove_lead_trail_empty_lines` function. + + :param input_str: The input string to be processed. + :param expected_output: The expected output list of strings. + + Example: + input_str = "line1\n\n\nline2" + expected_output = ["line1", "", "", "line2"] + """ + # Test as string. + actual = hprint.remove_lead_trail_empty_lines(input_str) + expected = "\n".join(expected_output) + self.assertEqual(actual, expected) + # Test as list of strings. + input_str = input_str.splitlines() + actual = hprint.remove_lead_trail_empty_lines(input_str) + self.assertEqual(actual, expected_output) + + def test_empty_string_returns_empty_list(self) -> None: + input_str: str = "" + expected_output: List[str] = [] + self.helper(input_str, expected_output) + + def test_single_line_string_returns_single_line_list(self) -> None: + input_str: str = "line" + expected_output = ["line"] + self.helper(input_str, expected_output) + + def test_multiple_lines_with_no_empty_lines_returns_same_lines( + self, + ) -> None: + input_str: str = "line1\nline2\nline3" + expected_output = ["line1", "line2", "line3"] + self.helper(input_str, expected_output) + + def test_leading_empty_lines_are_removed(self) -> None: + input_str: str = "\n\nline1\nline2" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_trailing_empty_lines_are_removed(self) -> None: + input_str: str = "line1\nline2\n\n" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_leading_and_trailing_empty_lines_are_removed(self) -> None: + input_str: str = "\n\nline1\nline2\n\n" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_consecutive_empty_lines_in_middle_are_not_removed(self) -> None: + input_str: str = "line1\n\n\nline2" + expected_output = ["line1", "", "", "line2"] + self.helper(input_str, expected_output) + + def test_only_empty_lines_returns_empty_list(self) -> None: + input_str: str = "\n\n\n" + expected_output: List[str] = [] + self.helper(input_str, expected_output) + + def test_mixed_content_with_leading_trailing_and_middle_empty_lines( + self, + ) -> None: + input_str: str = "\n\nline1\n\nline2\n\n" + expected_output = ["line1", "", "line2"] + self.helper(input_str, expected_output) + + def test_single_empty_line_returns_empty_list(self) -> None: + input_str: str = "\n" + expected_output: List[str] = [] + self.helper(input_str, expected_output) + + def test_multiple_consecutive_empty_lines_at_beginning_and_end( + self, + ) -> None: + input_str: str = "\n\n\nline1\nline2\n\n\n" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_input_with_only_spaces_and_tabs_as_empty_lines(self) -> None: + input_str: str = " \n\t\nline1\nline2\n \n\t" + expected_output = ["line1", "line2"] + self.helper(input_str, expected_output) + + def test_input_with_mixed_line_endings_unix_and_windows(self) -> None: + input_str: str = "line1\n\nline2\r\n\r\nline3" + expected_output = ["line1", "", "line2", "", "line3"] + self.helper(input_str, expected_output) + + def test_input_with_special_characters(self) -> None: + input_str: str = "line1\n\n!@#$%^&*()\n\nline2" + expected_output = ["line1", "", "!@#$%^&*()", "", "line2"] + self.helper(input_str, expected_output) + + +# ############################################################################# +# Test_remove_empty_lines +# ############################################################################# + + +class Test_remove_empty_lines(hunitest.TestCase): + """ + Test remove_empty_lines function with different modes. + """ + + def helper(self, lines: str, mode: str, expected: str) -> None: + """ + Test helper for remove_empty_lines. + + :param lines: Input text as string (will be split into list) + :param mode: Mode parameter for remove_empty_lines + :param expected: Expected output as string (will be split into list) + """ + # Prepare inputs. + lines_str = hprint.dedent(lines) + if lines_str: + lines_list = lines_str.split("\n") + else: + lines_list = [] + # Prepare outputs. + expected_str = hprint.dedent(expected) + if expected_str: + expected_list = expected_str.split("\n") + else: + expected_list = [] + # Run test. + actual = hprint.remove_empty_lines(lines_list, mode=mode) + # Check outputs. + self.assert_equal(str(actual), str(expected_list)) + + def test1(self) -> None: + """ + Test no_empty_lines mode with an empty list. + """ + # Prepare inputs. + lines = "" + mode = "no_empty_lines" + # Prepare outputs. + expected = "" + # Run test. + self.helper(lines, mode, expected) + + def test2(self) -> None: + """ + Test no_empty_lines mode with no empty lines in the input. + """ + # Prepare inputs. + lines = """ + line1 + line2 + line3 + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test3(self) -> None: + """ + Test no_empty_lines mode with all lines being empty. + """ + # Prepare inputs. + lines = """ + + + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = "" + # Run test. + self.helper(lines, mode, expected) + + def test4(self) -> None: + """ + Test no_empty_lines mode removes leading empty lines. + """ + # Prepare inputs. + lines = """ + + line1 + line2 + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test5(self) -> None: + """ + Test no_empty_lines mode removes trailing empty lines. + """ + # Prepare inputs. + lines = """ + line1 + line2 + + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test6(self) -> None: + """ + Test no_empty_lines mode removes empty lines in the middle. + """ + # Prepare inputs. + lines = """ + line1 + + line2 + + line3 + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test7(self) -> None: + """ + Test no_empty_lines mode removes lines with only whitespace. + """ + # Prepare inputs. + lines = """ + line1 + + line2 + \t + line3 + """ + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test8(self) -> None: + """ + Test no_consecutive_empty_lines mode with empty list. + """ + # Prepare inputs. + lines = "" + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = "" + # Run test. + self.helper(lines, mode, expected) + + def test9(self) -> None: + """ + Test no_consecutive_empty_lines mode with no empty lines. + """ + # Prepare inputs. + lines = """ + line1 + line2 + line3 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test10(self) -> None: + """ + Test no_consecutive_empty_lines mode keeps single empty line. + """ + # Prepare inputs. + lines = """ + line1 + + line2 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test11(self) -> None: + """ + Test no_consecutive_empty_lines mode keeps one of two consecutive empty lines. + """ + # Prepare inputs. + lines = """ + line1 + + + line2 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test12(self) -> None: + """ + Test no_consecutive_empty_lines mode keeps one of multiple consecutive empty lines. + """ + # Prepare inputs. + lines = """ + line1 + + + + + line2 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + """ + # Run test. + self.helper(lines, mode, expected) + + def test13(self) -> None: + """ + Test no_consecutive_empty_lines mode with multiple groups of consecutive empty lines. + """ + # Prepare inputs. + lines = """ + line1 + + + line2 + + + + line3 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test14(self) -> None: + """ + Test no_consecutive_empty_lines mode keeps all non-consecutive empty lines. + """ + # Prepare inputs. + lines = """ + line1 + + line2 + + line3 + """ + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + + line3 + """ + # Run test. + self.helper(lines, mode, expected) + + def test15(self) -> None: + """ + Test that invalid mode raises ValueError. + """ + # Prepare inputs. + lines = ["line1", "line2"] + mode = "invalid_mode" + # Run test and check output. + with self.assertRaises(ValueError) as cm: + hprint.remove_empty_lines(lines, mode=mode) + actual = str(cm.exception) + expected = "Invalid mode='invalid_mode'" + self.assert_equal(actual, expected) + + def test16(self) -> None: + """ + Test remove_empty_lines with string input (decorator functionality). + """ + # Prepare inputs. + text = """ + line1 + + line2 + + line3 + """ + text = hprint.dedent(text) + mode = "no_empty_lines" + # Prepare outputs. + expected = """ + line1 + line2 + line3 + """ + expected = hprint.dedent(expected) + # Run test. + actual = hprint.remove_empty_lines(text, mode=mode) + # Check outputs. + self.assert_equal(actual, expected) + + def test17(self) -> None: + """ + Test no_consecutive_empty_lines with string input (decorator functionality). + """ + # Prepare inputs. + text = """ + line1 + + + line2 + """ + text = hprint.dedent(text) + mode = "no_consecutive_empty_lines" + # Prepare outputs. + expected = """ + line1 + + line2 + """ + expected = hprint.dedent(expected) + # Run test. + actual = hprint.remove_empty_lines(text, mode=mode) + # Check outputs. + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py new file mode 100644 index 000000000..652fdf47a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py @@ -0,0 +1,228 @@ +import io +import os +import pprint +import re +from contextlib import redirect_stdout + +import pytest + +# TODO(heanh): add `junitparser` in `//helpers` image. +pytest.importorskip("junitparser") + +import helpers.hio as hio # noqa: E402 +import helpers.hpytest as hpytest # noqa: E402 +import helpers.hunit_test as hunitest # noqa: E402 + + +def _strip_color_codes(text: str) -> str: + """ + Remove ANSI color escape codes from text. + + :param text: text to strip the color codes from + :return: text with the color codes removed + """ + # Remove ANSI escape codes. + txt = re.sub(r"\033\[[0-9;]*m", "", text) + return txt + + +# ############################################################################# +# Test_JUnitReporter +# ############################################################################# + + +class Test_JUnitReporter(hunitest.TestCase): + """ + Test scenario where there are passed, skipped tests with leads to `PASSED` + result. + """ + + def helper(self) -> hpytest.JUnitReporter: + """ + Helper function to create a `JUnitReporter` object. + + :return: `JUnitReporter` object + """ + xml_str = """ + + + + + /app/dummy/test/test_module.py:25: Dummy skip message for testing purposes. + + + + + """ + input_dir = self.get_scratch_space() + input_file_path = os.path.join(input_dir, "test.xml") + hio.to_file(input_file_path, xml_str) + reporter = hpytest.JUnitReporter(input_file_path) + return reporter + + def test_parse(self) -> None: + """ + Test parsing the JUnit XML file. + """ + reporter = self.helper() + reporter.parse() + actual = pprint.pformat(reporter.overall_stats) + expected = r""" + {'error': 0, + 'failed': 0, + 'passed': 1, + 'skipped': 1, + 'total_tests': 2, + 'total_time': 3.0} + """ + self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) + + def test_print_summary(self) -> None: + """ + Test printing the summary of the results from JUnit XML file. + """ + reporter = self.helper() + reporter.parse() + captured_output = io.StringIO() + with redirect_stdout(captured_output): + reporter.print_summary() + actual = captured_output.getvalue() + actual = _strip_color_codes(actual) + expected = r""" + ====================================================================== + collected 2 items + + ====================================================================== + Test: dummy-test-suite-1 + Timestamp: 2025-01-01T12:00:00.000000+00:00 + ---------------------------------------------------------------------- + dummy.test.test_module.DummyTestCase::test_dummy_function PASSED (1.000s) + dummy.test.test_module.DummyTestCase::test_another_function SKIPPED (1.000s) + Summary: 1 passed, 1 skipped in 2.000s + + ====================================================================== + Test: dummy-test-suite-2 + Timestamp: 2025-01-01T12:01:00.000000+00:00 + ---------------------------------------------------------------------- + Summary: no tests in 1.000s + + ====================================================================== + Summary: 1 passed, 1 skipped in 3.00s + Result: PASSED + """ + self.assert_equal( + actual, + expected, + dedent=True, + fuzzy_match=True, + ) + + +# ############################################################################# +# Test_JUnitReporter2 +# ############################################################################# + + +class Test_JUnitReporter2(hunitest.TestCase): + """ + Test scenario where there are passed, error, failed, and skipped tests with + leads to `FAILED` result. + """ + + def helper(self) -> hpytest.JUnitReporter: + """ + Helper function to create a `JUnitReporter` object. + + :return: `JUnitReporter` object + """ + xml_str = """ + + + + + /app/dummy/test/test_module.py:25: Dummy skip message for testing purposes. + + + + + + /app/dummy/test/test_module.py:30: Dummy failure message for testing purposes. + + + /app/dummy/test/test_module.py:35: Dummy error message for testing purposes. + + + + + """ + input_dir = self.get_scratch_space() + input_file_path = os.path.join(input_dir, "test.xml") + hio.to_file(input_file_path, xml_str) + reporter = hpytest.JUnitReporter(input_file_path) + return reporter + + def test_parse(self) -> None: + """ + Test parsing the JUnit XML file. + """ + reporter = self.helper() + reporter.parse() + actual = pprint.pformat(reporter.overall_stats) + expected = r""" + {'error': 1, + 'failed': 1, + 'passed': 2, + 'skipped': 1, + 'total_tests': 5, + 'total_time': 6.0} + """ + self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) + + def test_print_summary(self) -> None: + """ + Test printing the summary of the results from JUnit XML file. + """ + reporter = self.helper() + reporter.parse() + captured_output = io.StringIO() + with redirect_stdout(captured_output): + reporter.print_summary() + actual = captured_output.getvalue() + actual = _strip_color_codes(actual) + expected = r""" + ====================================================================== + collected 5 items + + ====================================================================== + Test: dummy-test-suite-1 + Timestamp: 2025-01-01T12:00:00.000000+00:00 + ---------------------------------------------------------------------- + dummy.test.test_module.DummyTestCase::test_dummy_function PASSED (1.000s) + dummy.test.test_module.DummyTestCase::test_another_function SKIPPED (1.000s) + Summary: 1 passed, 1 skipped in 2.000s + + ====================================================================== + Test: dummy-test-suite-2 + Timestamp: 2025-01-01T12:01:00.000000+00:00 + ---------------------------------------------------------------------- + dummy.test.test_module.DummyTestCase::test_passed_function PASSED (1.000s) + dummy.test.test_module.DummyTestCase::test_failed_function FAILED (1.000s) + dummy.test.test_module.DummyTestCase::test_error_function ERROR (1.000s) + Summary: 1 passed, 1 failed, 1 error in 3.000s + + ====================================================================== + Test: dummy-test-suite-3 + Timestamp: 2025-01-01T12:02:00.000000+00:00 + ---------------------------------------------------------------------- + Summary: no tests in 1.000s + + ====================================================================== + Summary: 2 passed, 1 failed, 1 error, 1 skipped in 6.00s + Result: FAILED + """ + self.assert_equal( + actual, + expected, + dedent=True, + fuzzy_match=True, + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py new file mode 100644 index 000000000..d64310202 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py @@ -0,0 +1,154 @@ +import asyncio +import logging + +import pytest + +import helpers.hretry as hretry +import helpers.htimer as htimer +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +EXCEPTIONS = (AttributeError, ValueError) + + +# ############################################################################# +# Test_retry +# ############################################################################# + + +class Test_retry(hunitest.TestCase): + def test_retry1(self) -> None: + """ + Test normal case. + """ + self.exception_count = 0 + num_attempts = 3 + + @hretry.sync_retry(num_attempts, EXCEPTIONS) + def func() -> bool: + if self.exception_count < num_attempts - 1: + self.exception_count += 1 + raise ValueError("Simulated expected error") + _LOG.debug("All good") + return True + + self.assertTrue(func()) + self.assertEqual(self.exception_count, num_attempts - 1) + + def test_retry2(self) -> None: + """ + Test when the number of exceptions is greater than the number of + retries. + """ + self.exception_count = 0 + num_attempts = 3 + + @hretry.sync_retry(num_attempts, EXCEPTIONS) + def func() -> bool: + if self.exception_count < num_attempts: + self.exception_count += 1 + raise ValueError("Simulated expected error") + _LOG.debug("All good") + return True + + with self.assertRaises(ValueError): + func() + + def test_retry3(self) -> None: + """ + Test when the raised exception is not in the list of expected + exceptions. + """ + self.exception_count = 0 + num_attempts = 3 + + @hretry.sync_retry(num_attempts, EXCEPTIONS) + def func() -> None: + if self.exception_count < num_attempts - 1: + self.exception_count += 1 + raise IndexError("Simulated non expected error") + _LOG.debug("All good") + + with self.assertRaises(IndexError): + func() + + +# ############################################################################# +# Test_retry2 +# ############################################################################# + + +class Test_retry2(hunitest.TestCase): + def test_async_retry1(self) -> None: + """ + Test normal case. + """ + self.exception_count = 0 + num_attempts = 3 + retry_delay_in_sec = 1 + + @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) + async def func() -> bool: + if self.exception_count < num_attempts - 1: + self.exception_count += 1 + await asyncio.sleep(0.1) + raise ValueError("Simulated expected error") + _LOG.debug("All good") + return True + + with htimer.TimedScope(logging.INFO, "async_retry_loop") as ts: + result = asyncio.run(func()) + self.assertEqual(round(ts.elapsed_time, 1), 2.2) + self.assertTrue(result) + self.assertEqual(self.exception_count, num_attempts - 1) + + @pytest.mark.skip(reason="See CmTask11013") + def test_async_retry2(self) -> None: + """ + Test when the number of exceptions is greater than the number of + retries. + """ + self.exception_count = 0 + num_attempts = 3 + retry_delay_in_sec = 1 + + @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) + async def func() -> bool: + if self.exception_count < num_attempts: + self.exception_count += 1 + await asyncio.sleep(0.1) + raise ValueError("Simulated expected error") + _LOG.debug("All good") + return True + + with self.assertRaises(ValueError) as fail: + with htimer.TimedScope(logging.INFO, "async_retry_loop") as ts: + asyncio.run(func()) + self.assertEqual(round(ts.elapsed_time, 1), 3.3) + actual = str(fail.exception) + expected = "Simulated expected error" + self.assert_equal(actual, expected) + + def test_async_retry3(self) -> None: + """ + Test when the raised exception is not in the list of expected + exceptions. + """ + self.exception_count = 0 + num_attempts = 3 + retry_delay_in_sec = 1 + + @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) + async def func() -> None: + if self.exception_count < num_attempts - 1: + self.exception_count += 1 + await asyncio.sleep(0.1) + raise IndexError("Simulated non expected error") + _LOG.debug("All good") + + with self.assertRaises(IndexError) as fail: + asyncio.run(func()) + actual = str(fail.exception) + expected = "Simulated non expected error" + self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py new file mode 100644 index 000000000..8f9dd84df --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py @@ -0,0 +1,597 @@ +import logging +import os +from typing import Generator, Tuple + +import pytest + +import helpers.hio as hio +import helpers.hmoto as hmoto +import helpers.hs3 as hs3 +import helpers.hserver as hserver +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestReplaceStarWithDoubleStar +# ############################################################################# + + +class TestReplaceStarWithDoubleStar(hunitest.TestCase): + def test1(self) -> None: + """ + Test non replacement of a single asterisk at the end of the path. + """ + pattern_to_modify = "s3://bucket/path/*" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/path/*") + + def test2(self) -> None: + """ + Test replacement of a single asterisk within the path. + """ + pattern_to_modify = "s3://bucket/path/*/file" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/path/**/*/file") + + def test3(self) -> None: + """ + Test no replacement when there are no asterisks in the path. + """ + pattern_to_modify = "s3://bucket/path/file" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/path/file") + + def test4(self) -> None: + """ + Test replacement when multiple asterisk are in the path. + """ + pattern_to_modify = "s3://bucket/*/path/*" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/**/*/path/*") + + def test5(self) -> None: + """ + Test non-replacement of asterisk at the end of the path in a special + case. + """ + pattern_to_modify = "s3://bucket/*/path/csv*" + new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) + self.assert_equal(new_pattern, "s3://bucket/**/*/path/csv*") + + +# ############################################################################# +# TestToFileAndFromFile1 +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +@pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", +) +class TestToFileAndFromFile1(hmoto.S3Mock_TestCase): + def write_read_helper(self, file_name: str, force_flush: bool) -> None: + # Prepare inputs. + file_content = "line_mock1\nline_mock2\nline_mock3" + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + s3_path = f"s3://{self.bucket_name}/{file_name}" + # Save file. + # TODO(Nikola): Is it possible to verify `force_flush`? + hs3.to_file( + file_content, + s3_path, + aws_profile=moto_s3fs, + force_flush=force_flush, + ) + # Read file. + saved_content = hs3.from_file(s3_path, aws_profile=moto_s3fs) + # Check output. + expected = r"""line_mock1 + line_mock2 + line_mock3""" + self.assert_equal(saved_content, expected, fuzzy_match=True) + + # ######################################################################### + + def test_to_file_and_from_file1(self) -> None: + """ + Verify that regular `.txt` file is saved/read on S3. + """ + # Prepare inputs. + regular_file_name = "mock.txt" + force_flush = False + self.write_read_helper(regular_file_name, force_flush) + + def test_to_file_and_from_file2(self) -> None: + """ + Verify that compressed (e.g,`.gz`,`gzip`) file is saved/read on S3. + """ + # Prepare inputs. + gzip_file_name = "mock.gzip" + force_flush = True + self.write_read_helper(gzip_file_name, force_flush) + + def test_to_file_invalid1(self) -> None: + """ + Verify that only binary mode is allowed. + """ + # Prepare inputs. + regular_file_name = "mock.txt" + regular_file_content = "line_mock1\nline_mock2\nline_mock3" + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + s3_path = f"s3://{self.bucket_name}/{regular_file_name}" + # Save file with `t` mode. + with self.assertRaises(ValueError) as fail: + hs3.to_file( + regular_file_content, s3_path, mode="wt", aws_profile=moto_s3fs + ) + # Check output. + actual = str(fail.exception) + expected = r"S3 only allows binary mode!" + self.assert_equal(actual, expected) + + def test_from_file_invalid1(self) -> None: + """ + Verify that encoding is not allowed. + """ + # Prepare inputs. + regular_file_name = "mock.txt" + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + s3_path = f"s3://{self.bucket_name}/{regular_file_name}" + # Read with encoding. + with self.assertRaises(ValueError) as fail: + hs3.from_file(s3_path, encoding=True, aws_profile=moto_s3fs) + # Check output. + actual = str(fail.exception) + expected = r"Encoding is not supported when reading from S3!" + self.assert_equal(actual, expected) + + +# ############################################################################# +# TestListdir1 +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +@pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", +) +class TestListdir1(hmoto.S3Mock_TestCase): + def prepare_test_data(self) -> Tuple[str, hs3.AwsProfile]: + bucket_s3_path = f"s3://{self.bucket_name}" + depth_one_s3_path = f"{bucket_s3_path}/depth_one" + # Prepare test files. + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + first_s3_path = f"{depth_one_s3_path}/mock1.txt" + lines = [b"line_mock1"] + with moto_s3fs.open(first_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + second_s3_path = f"{depth_one_s3_path}/mock2.gzip" + with moto_s3fs.open(second_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + # Prepare test directories. + # `moto_s3fs.mkdir` is useless as empty directory is not visible. + # There must be at least one file in the directory to be visible. + regular_dir_s3_path = f"{depth_one_s3_path}/mock" + additional_file_s3_path = f"{regular_dir_s3_path}/regular_mock3.txt" + with moto_s3fs.open(additional_file_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + git_dir_s3_path = f"s3://{bucket_s3_path}/.git" + additional_file_s3_path = f"{git_dir_s3_path}/git_mock3.txt" + with moto_s3fs.open(additional_file_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + return bucket_s3_path, moto_s3fs + + # ######################################################################### + + def test_listdir1(self) -> None: + """ + Verify that all paths are found. + """ + bucket_s3_path, moto_s3fs = self.prepare_test_data() + pattern = "*" + only_files = False + use_relative_paths = False + paths = hs3.listdir( + bucket_s3_path, + pattern, + only_files, + use_relative_paths, + aws_profile=moto_s3fs, + exclude_git_dirs=False, + ) + paths.sort() + expected_paths = [ + "mock_bucket/.git", + "mock_bucket/.git/git_mock3.txt", + "mock_bucket/depth_one", + "mock_bucket/depth_one/mock", + "mock_bucket/depth_one/mock/regular_mock3.txt", + "mock_bucket/depth_one/mock1.txt", + "mock_bucket/depth_one/mock2.gzip", + ] + self.assertListEqual(paths, expected_paths) + + def test_listdir2(self) -> None: + """ + Verify that all relative paths are found. + """ + bucket_s3_path, moto_s3fs = self.prepare_test_data() + # Exclude `.git` by going level below. + bucket_s3_path = os.path.join(bucket_s3_path, "depth_one") + pattern = "*" + only_files = False + use_relative_paths = True + paths = hs3.listdir( + bucket_s3_path, + pattern, + only_files, + use_relative_paths, + aws_profile=moto_s3fs, + exclude_git_dirs=False, + ) + paths.sort() + expected_paths = [ + "mock", + "mock/regular_mock3.txt", + "mock1.txt", + "mock2.gzip", + ] + self.assertListEqual(paths, expected_paths) + + def test_listdir3(self) -> None: + """ + Verify that all paths are found, except `.git` ones. + """ + bucket_s3_path, moto_s3fs = self.prepare_test_data() + pattern = "*" + only_files = False + use_relative_paths = False + paths = hs3.listdir( + bucket_s3_path, + pattern, + only_files, + use_relative_paths, + aws_profile=moto_s3fs, + ) + paths.sort() + expected_paths = [ + "mock_bucket/depth_one", + "mock_bucket/depth_one/mock", + "mock_bucket/depth_one/mock/regular_mock3.txt", + "mock_bucket/depth_one/mock1.txt", + "mock_bucket/depth_one/mock2.gzip", + ] + self.assertListEqual(paths, expected_paths) + + def test_listdir4(self) -> None: + """ + Verify that all file paths are found. + """ + bucket_s3_path, moto_s3fs = self.prepare_test_data() + pattern = "*" + only_files = True + use_relative_paths = False + paths = hs3.listdir( + bucket_s3_path, + pattern, + only_files, + use_relative_paths, + aws_profile=moto_s3fs, + exclude_git_dirs=False, + ) + paths.sort() + expected_paths = [ + "mock_bucket/.git/git_mock3.txt", + "mock_bucket/depth_one/mock/regular_mock3.txt", + "mock_bucket/depth_one/mock1.txt", + "mock_bucket/depth_one/mock2.gzip", + ] + self.assertListEqual(paths, expected_paths) + + +# ############################################################################# +# TestDu1 +# ############################################################################# + + +@pytest.mark.requires_ck_infra +@pytest.mark.requires_aws +@pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", +) +class TestDu1(hmoto.S3Mock_TestCase): + def test_du1(self) -> None: + """ + Verify that total file size is returned. + """ + bucket_s3_path = f"s3://{self.bucket_name}" + depth_one_s3_path = f"{bucket_s3_path}/depth_one" + # Prepare test files. + moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) + first_s3_path = f"{bucket_s3_path}/mock1.txt" + lines = [b"line_mock\n"] * 150 + with moto_s3fs.open(first_s3_path, "wb") as s3_file: + s3_file.writelines(lines) + second_s3_path = f"{depth_one_s3_path}/mock2.txt" + with moto_s3fs.open(second_s3_path, "wb") as s3_file: + # One level deeper to test recursive `du`. + s3_file.writelines(lines) + # Get multiple files. + size = hs3.du(bucket_s3_path, aws_profile=moto_s3fs) + expected_size = 3000 + self.assertEqual(size, expected_size) + size = hs3.du(depth_one_s3_path, aws_profile=moto_s3fs) + expected_size = 1500 + self.assertEqual(size, expected_size) + # Get exactly one file. + size = hs3.du(second_s3_path, aws_profile=moto_s3fs) + self.assertEqual(size, expected_size) + # Verify size in human-readable form. + size = hs3.du(bucket_s3_path, human_format=True, aws_profile=moto_s3fs) + expected_size = r"2.9 KB" + self.assert_equal(size, expected_size) + + +# ############################################################################# +# TestGenerateAwsFiles +# ############################################################################# + + +class TestGenerateAwsFiles(hunitest.TestCase): + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test(self) -> None: + self.setUp() + os.environ["MOCK_AWS_ACCESS_KEY_ID"] = "mock_access_key" + os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] = "mock_secret_access_key" + os.environ["MOCK_AWS_SESSION_TOKEN"] = "mock_session_token" + os.environ["MOCK_AWS_S3_BUCKET"] = "mock_s3_bucket" + os.environ["MOCK_AWS_DEFAULT_REGION"] = "mock_default_region" + # + os.environ["TEST_AWS_ACCESS_KEY_ID"] = "test_access_key" + os.environ["TEST_AWS_SECRET_ACCESS_KEY"] = "test_secret_access_key" + os.environ["TEST_AWS_SESSION_TOKEN"] = "test_session_token" + os.environ["TEST_AWS_S3_BUCKET"] = "test_s3_bucket" + os.environ["TEST_AWS_DEFAULT_REGION"] = "test_default_region" + # Generate AWS files with mock AWS profiles. + self._scratch_test_dir = self.get_scratch_space() + aws_profiles = ["mock", "test"] + hs3.generate_aws_files( + home_dir=self._scratch_test_dir, aws_profiles=aws_profiles + ) + + def tear_down_test(self) -> None: + del os.environ["MOCK_AWS_ACCESS_KEY_ID"] + del os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] + del os.environ["MOCK_AWS_SESSION_TOKEN"] + del os.environ["MOCK_AWS_S3_BUCKET"] + del os.environ["MOCK_AWS_DEFAULT_REGION"] + # + del os.environ["TEST_AWS_ACCESS_KEY_ID"] + del os.environ["TEST_AWS_SECRET_ACCESS_KEY"] + del os.environ["TEST_AWS_SESSION_TOKEN"] + del os.environ["TEST_AWS_S3_BUCKET"] + del os.environ["TEST_AWS_DEFAULT_REGION"] + + def helper(self, file_name: str, expected: str) -> None: + # Check. + target_dir = os.path.join(self._scratch_test_dir, ".aws") + actual = hio.from_file(os.path.join(target_dir, file_name)) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Check that AWS credentials file is generated correctly. + """ + file_name = "credentials" + expected = r""" + [mock] + aws_access_key_id=mock_access_key + aws_secret_access_key=mock_secret_access_key + aws_session_token=mock_session_token + aws_s3_bucket=mock_s3_bucket + + [test] + aws_access_key_id=test_access_key + aws_secret_access_key=test_secret_access_key + aws_session_token=test_session_token + aws_s3_bucket=test_s3_bucket + """ + self.helper(file_name, expected) + + def test2(self) -> None: + """ + Check that AWS config file is generated correctly. + """ + file_name = "config" + expected = """ + [profile mock] + region=mock_default_region + + [profile test] + region=test_default_region + """ + self.helper(file_name, expected) + + +# ############################################################################# + + +# ############################################################################# +# Test_get_s3_bucket_from_stage +# ############################################################################# + + +class Test_get_s3_bucket_from_stage(hunitest.TestCase): + def test1(self) -> None: + """ + Check for a valid stage. + """ + # Define arguments. + stage = "test" + # Run. + actual = hs3.get_s3_bucket_from_stage(stage) + expected = "cryptokaizen-data-test" + self.assert_equal(actual, expected) + + def test2(self) -> None: + """ + Check for a valid stage and optional suffix. + """ + # Define arguments. + stage = "preprod" + suffix = "suffix_test" + # Run. + actual = hs3.get_s3_bucket_from_stage(stage, add_suffix=suffix) + expected = "cryptokaizen-data.preprod/suffix_test" + self.assert_equal(actual, expected) + + def test3(self) -> None: + """ + Check Invalid stage. + """ + # Define arguments. + stage = "Invalid" + # Run. + with self.assertRaises(AssertionError) as cm: + hs3.get_s3_bucket_from_stage(stage) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + 'Invalid' in '{'test': 'cryptokaizen-data-test', 'preprod': 'cryptokaizen-data.preprod', 'prod': 'cryptokaizen-data'}' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + +_AWS_PROFILE = "ck" + + +# ############################################################################# +# Test_s3_get_credentials1 +# ############################################################################# + + +@pytest.mark.requires_aws +@pytest.mark.requires_ck_infra +class Test_s3_get_credentials1(hunitest.TestCase): + def test1(self) -> None: + res = hs3.get_aws_credentials(_AWS_PROFILE) + _LOG.debug("res=%s", str(res)) + + +# ############################################################################# +# Test_s3_functions1 +# ############################################################################# + + +class Test_s3_functions1(hunitest.TestCase): + def test_extract_bucket_from_path1(self) -> None: + path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "tmp/TestCachingOnS3.test_with_caching1/joblib", + ) + bucket, path = hs3.split_path(path) + self.assert_equal(bucket, "cryptokaizen-unit-test") + self.assert_equal(path, "/tmp/TestCachingOnS3.test_with_caching1/joblib") + + +# ############################################################################# +# Test_s3_1 +# ############################################################################# + + +@pytest.mark.requires_aws +@pytest.mark.requires_ck_infra +class Test_s3_1(hunitest.TestCase): + def test_ls1(self) -> None: + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "README.md", + ) + _LOG.debug("file_path=%s", file_path) + # > aws s3 ls s3://***** + # PRE data/ + # 2021-04-06 1:17:44 48 README.md + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_names = s3fs.ls(file_path) + _LOG.debug("file_names=%s", file_names) + self.assertGreater(len(file_names), 0) + + @pytest.mark.requires_aws + @pytest.mark.requires_ck_infra + def test_glob1(self) -> None: + # > aws s3 ls s3://alphamatic-data/data/ib/metadata/ + # 2021-04-26 08:39:00 18791 exchanges-2021-04-01-134738089177.csv + # 2021-04-26 08:39:00 18815 exchanges-2021-04-01-143112738505.csv + # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-134738089177.csv + # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-143112738505.csv + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "data/ib/metadata", + ) + glob_pattern = file_path + "/exchanges-*" + _LOG.debug("glob_pattern=%s", glob_pattern) + file_names = s3fs.glob(glob_pattern) + _LOG.debug("file_names=%s", file_names) + self.assertGreater(len(file_names), 0) + + @pytest.mark.requires_aws + @pytest.mark.requires_ck_infra + def test_exists1(self) -> None: + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "README.md", + ) + _LOG.debug("file_path=%s", file_path) + actual = s3fs.exists(file_path) + expected = True + self.assertEqual(actual, expected) + + @pytest.mark.requires_aws + @pytest.mark.requires_ck_infra + def test_exists2(self) -> None: + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "README_does_not_exist.md", + ) + _LOG.debug("file_path=%s", file_path) + actual = s3fs.exists(file_path) + expected = False + self.assertEqual(actual, expected) + + @pytest.mark.requires_aws + @pytest.mark.requires_ck_infra + def test_exists3(self) -> None: + # > aws s3 ls alphamatic-data/data/ib/metadata/symbols-2021-04-01-143112738505.csv + # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-143112738505.csv + s3fs = hs3.get_s3fs(_AWS_PROFILE) + file_path = os.path.join( + hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), + "alphamatic-data", + "data/ib/metadata/symbols-2021-04-01-143112738505.csv", + ) + _LOG.debug("file_path=%s", file_path) + actual = s3fs.exists(file_path) + expected = True + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py new file mode 100644 index 000000000..cc046ddac --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py @@ -0,0 +1,209 @@ +# TODO(gp): Use pytest.import_skip instead of all this machinery. +_HAS_MOTO = True +try: + import moto +except ImportError: + # `moto` may not be installed in a non-cmamp repo, so we skip it (see "DevTools376: + # Break 2022-02-22"). + import helpers.hgit as hgit + + assert not hgit.is_cmamp(), ( + "`cmamp` should have moto, while other repos are allowed to not have it)" + ) + _HAS_MOTO = False + +if _HAS_MOTO: + import json + import logging + import unittest.mock as umock + + import boto3 + import botocore + import pytest + + import helpers.hgit as hgit + import helpers.hs3 as hs3 + import helpers.hsecrets as hsecret + import helpers.hserver as hserver + import helpers.hunit_test as hunitest + + _LOG = logging.getLogger(__name__) + + # The `mock_aws` decorator ensures the calls to the AWS API are + # mocked. + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + class TestCreateClient(hunitest.TestCase): + def test_create_client1(self) -> None: + """ + Simple smoke test to verify connection to AWS. + """ + client = hsecret.get_secrets_client(aws_profile="ck") + self.assertIsInstance(client, botocore.client.BaseClient) + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + class TestGetSecret(hunitest.TestCase): + @moto.mock_aws + def test_get_secret(self) -> None: + """ + Verify that the secret can be retrieved correctly. + """ + # Make sure the region name matches the one used in `hsecret` profile. + client = boto3.client( + "secretsmanager", region_name=hs3.AWS_EUROPE_REGION_1 + ) + secret = {"testkey": "testvalue"} + secret_name = "test.local.sandbox.1" + client.create_secret( + Name=secret_name, SecretString=json.dumps(secret) + ) + self.assertDictEqual(hsecret.get_secret(secret_name), secret) + + @moto.mock_aws + @pytest.mark.skip( + reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." + ) + def test_trading_key(self) -> None: + """ + Verify locking mechanism for trading key is processed correctly. + """ + # Define test params. + secret_value = {"test.trading.key": "test.trading.value"} + secret_name = "test.trading.sandbox.1" + usedBy = "pytest" + hsecret.store_secret(secret_name, secret_value) + # Define expected values. + usedBy = hsecret._get_flag_value(usedBy) + expected = f"Secret key is already in use by {usedBy}" + # Call get secret to lock the key. + _ = hsecret.get_secret(secret_name) + # Recall get secret for same key to verify the lock. + try: + hsecret.get_secret(secret_name) + except RuntimeError as rte: + actual = str(rte) + self.assert_equal(actual, expected, fuzzy_match=True) + + @moto.mock_aws + @pytest.mark.skip( + reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." + ) + def test_lock_for_different_script(self) -> None: + """ + Verify locking mechanism for access to trading key is passed if + scripts are different. + """ + # Define test params. + secret_value = {"test.trading.key": "test.trading.value"} + secret_name = "test.trading.sandbox.1" + script1 = "pytest" + script2 = "run_system_observer.py" + hsecret.store_secret(secret_name, secret_value) + # Call get secret to lock the key with testing script. + _ = hsecret.get_secret(secret_name) + usedBy1 = hsecret._get_flag_value(script1) + # Define expected values. + usedBy2 = hsecret._get_flag_value(script2) + # Update secret value with expected usedBy script names. + secret_value["usedBy"] = [usedBy1, usedBy2] + # Call get secret for same key to verify the lock for mocked script. + with umock.patch("sys.argv", [script2]): + actual = hsecret.get_secret(secret_name) + self.assert_equal( + str(actual), expected=str(secret_value), fuzzy_match=True + ) + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + class TestStoreSecret(hunitest.TestCase): + @moto.mock_aws + def test_store_secret1(self) -> None: + """ + Verify that a secret can be stored correctly. + """ + secret = {"testkey": "testvalue"} + secret_name = "test.local.sandbox.1" + hsecret.store_secret(secret_name, secret) + # Make sure the region name matches the one used in `hsecret`. + client = boto3.client( + "secretsmanager", region_name=hs3.AWS_EUROPE_REGION_1 + ) + test_secret_value = json.loads( + client.get_secret_value(SecretId=secret_name)["SecretString"] + ) + self.assertDictEqual(test_secret_value, secret) + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + @pytest.mark.skip( + reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." + ) + class TestLockSecret(hunitest.TestCase): + @moto.mock_aws + def test_lock_secret(self) -> None: + """ + Verify that the lock secret function locks the key. + """ + # Define test params. + secret = {"testkey": "testvalue"} + secret_name = "test.local.sandbox.1" + hsecret.store_secret(secret_name, secret) + usedBy = "pytest" + # Lock the stored secret. + hsecret.lock_secret(secret_name, secret) + # Retry locking the same secret. + try: + hsecret.lock_secret(secret_name, secret) + except RuntimeError as rte: + usedBy = hsecret._get_flag_value(usedBy) + expected = f"Secret key is already in use by {usedBy}" + actual = str(rte) + self.assert_equal(actual, expected, fuzzy_match=True) + + @pytest.mark.requires_ck_infra + @pytest.mark.requires_aws + @pytest.mark.skipif( + not hserver.is_CK_S3_available(), + reason="Run only if CK S3 is available", + ) + @pytest.mark.skip( + reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." + ) + class TestUpdateUsedby(hunitest.TestCase): + @moto.mock_aws + def test1(self) -> None: + """ + Verify that update_usedby updates value in secrets manager. + """ + # Define test params. + secret_value = {"testkey": "testvalue"} + secret_name = "test.local.sandbox.1" + usedBy = "pytest" + hsecret.store_secret(secret_name, secret_value) + # Define expected value. + expected = r""" + {'testkey': 'testvalue', 'usedBy': ['pytest']} + """ + # Run. + hsecret.update_usedby(secret_name, secret_value, usedBy) + actual = hsecret.get_secret(secret_name) + # Verify. + self.assert_equal(str(actual), expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py new file mode 100644 index 000000000..3e6a1ba7d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py @@ -0,0 +1,321 @@ +import logging + +import pytest + +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# _TestCase1 +# ############################################################################# + + +class _TestCase1: + # def test_config_func_to_str1(self) -> None: + # val = hserver.config_func_to_str() + # _LOG.info("val=\n%s", val) + # if self.exp_config_func_to_str is not None: + # self.assert_equal(val, self.exp_config_func_to_str) + + def test_consistency1(self) -> None: + hserver._dassert_setup_consistency() + + def test_is_host_csfy_server1(self) -> None: + val = hserver.is_host_csfy_server() + _LOG.info("val=\n%s", val) + if self.exp_is_host_csfy_server is not None: + self.assertEqual(val, self.exp_is_host_csfy_server) + + def test_is_host_mac1(self) -> None: + val = hserver.is_host_mac() + _LOG.info("val=\n%s", val) + if self.exp_is_host_mac is not None: + self.assertEqual(val, self.exp_is_host_mac) + + def test_get_docker_info1(self) -> None: + val = hserver.get_docker_info() + _LOG.info("val=\n%s", val) + # Remove the docker version since it is not stable. + val = hprint.filter_text("docker_version=", val) + if self.exp_get_docker_info is not None: + self.assert_equal(val, self.exp_get_docker_info) + + def test_get_setup_settings1(self) -> None: + setups = hserver._get_setup_settings() + val = hserver._setup_to_str(setups) + _LOG.info("val=\n%s", val) + if self.exp_get_setup_settings is not None: + self.assert_equal(val, self.exp_get_setup_settings) + + # def test_get_setup_signature1(self) -> None: + # val = hserver._get_setup_signature() + # _LOG.info("val=\n%s", val) + # if self.exp_get_setup_signature is not None: + # self.assert_equal(val, self.exp_get_setup_signature) + + def test_is_inside_ci1(self) -> None: + val = hserver.is_inside_ci() + _LOG.info("val=\n%s", val) + if self.exp_is_inside_ci is not None: + self.assertEqual(val, self.exp_is_inside_ci) + + +# ############################################################################# +# Test_hserver1 +# ############################################################################# + + +class Test_hserver1(_TestCase1, hunitest.TestCase): + """ + Smoke test without checking anything. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = None + self.exp_get_docker_info = None + self.exp_get_setup_settings = None + self.exp_get_setup_signature = None + self.exp_is_host_csfy_server = None + self.exp_is_host_mac = None + self.exp_is_inside_ci = None + + +# ############################################################################# +# Test_hserver_inside_ci1 +# ############################################################################# + + +@pytest.mark.skipif( + not hserver.is_inside_ci(), + reason="Config not matching", +) +class Test_hserver_inside_ci1(_TestCase1, hunitest.TestCase): + """ + Run tests inside CI. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = None + self.exp_get_docker_info = hprint.dedent(r""" + Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_docker_sibling_containers_support=True + has_docker_children_containers_support=True + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci True + """) + self.exp_get_setup_signature = None + self.exp_is_host_csfy_server = False + self.exp_is_host_mac = False + self.exp_is_inside_ci = True + + +# ############################################################################# +# Test_hserver_inside_docker_container_on_csfy_server1 +# ############################################################################# + + +@pytest.mark.skipif( + not hserver.is_inside_docker_container_on_csfy_server(), + reason="Config not matching", +) +class Test_hserver_inside_docker_container_on_csfy_server1( + _TestCase1, hunitest.TestCase +): + """ + Run tests inside Docker container on a Causify dev server. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_docker_sibling_containers_support=True + has_docker_children_containers_support=True + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server True + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci False + """) + self.exp_get_setup_signature = "" + self.exp_is_host_csfy_server = True + self.exp_is_host_mac = False + self.exp_is_inside_ci = False + + +# ############################################################################# +# Test_hserver_outside_docker_container_on_csfy_server1 +# ############################################################################# + + +@pytest.mark.skipif( + not hserver.is_outside_docker_container_on_csfy_server(), + reason="Config not matching", +) +class Test_hserver_outside_docker_container_on_csfy_server1( + _TestCase1, hunitest.TestCase +): + """ + Run tests outside Docker container on a Causify dev server. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=False + has_docker_sibling_containers_support=*undef* + has_docker_children_containers_support=*undef* + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server True + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci False + """) + self.exp_get_setup_signature = "" + self.exp_is_host_csfy_server = True + self.exp_is_host_mac = False + self.exp_is_inside_ci = False + + +# ############################################################################# +# Test_hserver_inside_docker_container_on_gp_mac1 +# ############################################################################# + + +@pytest.mark.skipif( + not (hserver.is_inside_docker() and hserver.is_host_gp_mac()), + reason="Config not matching", +) +class Test_hserver_inside_docker_container_on_gp_mac1( + _TestCase1, hunitest.TestCase +): + """ + Run tests inside Docker container on GP's Mac. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_docker_sibling_containers_support=True + has_docker_children_containers_support=True + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac True + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci False + """) + self.exp_get_setup_signature = "" + self.exp_is_host_csfy_server = False + self.exp_is_host_mac = True + self.exp_is_inside_ci = False + + +# ############################################################################# +# Test_hserver_outside_docker_container_on_gp_mac1 +# ############################################################################# + + +@pytest.mark.skipif( + not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), + reason="Config not matching", +) +class Test_hserver_outside_docker_container_on_gp_mac1( + _TestCase1, hunitest.TestCase +): + """ + Run tests outside Docker container on GP's Mac. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=False + has_docker_sibling_containers_support=*undef* + has_docker_children_containers_support=*undef* + """) + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac True + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + is_inside_ci False + """) + self.exp_get_setup_signature = "" + self.exp_is_host_csfy_server = False + self.exp_is_host_mac = True + self.exp_is_inside_ci = False + + +# ############################################################################# + + +# TODO(gp): Add test mocking the environment variables in _get_setup_signature. +# We should have one class for each set up (e.g., outside Mac, outside Linux, +# inside Docker, inside CI, etc.) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py new file mode 100644 index 000000000..998b65c86 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py @@ -0,0 +1,81 @@ +import os +import unittest.mock as umock + +import helpers.hslack as hslack +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestSlackNotifier +# ############################################################################# + + +class TestSlackNotifier(hunitest.TestCase): + def test1(self) -> None: + """ + Check that `SlackNotifier` initializes with provided bot token. + """ + # Create notifier with explicit token. + notifier = hslack.SlackNotifier(bot_token="xoxb-test1-token") + self.assertEqual(notifier.bot_token, "xoxb-test1-token") + + def test2(self) -> None: + """ + Check that `SlackNotifier` initializes with environment variable token. + """ + # Mock environment variable and create notifier. + with umock.patch.dict( + os.environ, {"SLACK_BOT_TOKEN": "xoxb-test2-token"} + ): + notifier = hslack.SlackNotifier() + self.assertEqual(notifier.bot_token, "xoxb-test2-token") + + def test3(self) -> None: + """ + Check that `SlackNotifier` raises `ValueError` when no token is + provided. + """ + # Clear environment and verify initialization fails. + with umock.patch.dict(os.environ, {}, clear=True): + with self.assertRaises(ValueError) as cm: + hslack.SlackNotifier() + self.assertIn("No bot token provided", str(cm.exception)) + + def test4(self) -> None: + """ + Check that `send_message()` successfully sends message to Slack + channel. + """ + # Mock successful Slack API response. + with umock.patch("helpers.hslack.requests.post") as mock_post: + mock_response = umock.MagicMock() + mock_response.json.return_value = {"ok": True} + mock_response.raise_for_status.return_value = None + mock_post.return_value = mock_response + # Send message and verify API call. + notifier = hslack.SlackNotifier(bot_token="xoxb-test4-token") + notifier.send_message("#test4", "test4 message content") + # Verify request parameters. + mock_post.assert_called_once() + _, kwargs = mock_post.call_args + self.assertEqual(kwargs["json"]["channel"], "#test4") + self.assertEqual(kwargs["json"]["text"], "test4 message content") + + def test5(self) -> None: + """ + Check that `send_message()` raises `ValueError` on Slack API error. + """ + # Mock Slack API error response. + with umock.patch("helpers.hslack.requests.post") as mock_post: + mock_response = umock.MagicMock() + mock_response.json.return_value = { + "ok": False, + "error": "channel_not_found", + } + mock_response.raise_for_status.return_value = None + mock_post.return_value = mock_response + # Verify error is raised with correct message. + notifier = hslack.SlackNotifier(bot_token="xoxb-test5-token") + with self.assertRaises(ValueError) as cm: + notifier.send_message("#test5", "test5 message content") + self.assertIn("channel_not_found", str(cm.exception)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py new file mode 100644 index 000000000..f6adba2f6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py @@ -0,0 +1,29 @@ +import helpers.hsql as hsql +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestCreateInOperator +# ############################################################################# + + +class TestCreateInOperator(hunitest.TestCase): + def test_create_in_operator1(self) -> None: + """ + Test creating IN operator for more than one value. + """ + values = ["binance", "ftx"] + column = "exchange_id" + actual = hsql.create_in_operator(values, column) + expected = "exchange_id IN ('binance','ftx')" + self.assertEqual(actual, expected) + + def test_create_in_operator2(self) -> None: + """ + Test creating IN operator for one value. + """ + values = ["ftx"] + column = "exchange_id" + actual = hsql.create_in_operator(values, column) + expected = "exchange_id IN ('ftx')" + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py new file mode 100644 index 000000000..1e5b4ff01 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py @@ -0,0 +1,270 @@ +import os +from typing import List, Tuple + +import helpers.hio as hio +import helpers.hstring as hstring +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestExtractVersionFromFileName +# ############################################################################# + + +class TestExtractVersionFromFileName(hunitest.TestCase): + def _test_extract_version_from_file_name( + self, version: str, expected: Tuple[int, int] + ) -> None: + """ + Verify function provides expected output on valid inputs. + + :param version: version in string format to input, e.g. 1.0 + :param expected: expected output version in (major, minor) + format + """ + fn = f"/app/datapull/ccxt/universe/download/universe_v{version}.json" + self.assertEqual(hstring.extract_version_from_file_name(fn), expected) + + def test_extract_version_from_file_name1(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("1.1", (1, 1)) + + def test_extract_version_from_file_name2(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("4", (4, 0)) + + def test_extract_version_from_file_name3(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("1.0", (1, 0)) + + def test_extract_version_from_file_name4(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("3.11", (3, 11)) + + def test_extract_version_from_file_name5(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("16.2", (16, 2)) + + def test_extract_version_from_file_name6(self) -> None: + """ + Verify function provides expected output on valid input. + """ + self._test_extract_version_from_file_name("25.11", (25, 11)) + + def _test_extract_version_from_file_name_incorrect_format( + self, file_name: str + ) -> None: + """ + Helper function to verify function raises AssertionError on incorrect + input format. + + :param file_name: incorrect file_name to test + """ + expected_fail = "Can't parse file" + with self.assertRaises(AssertionError) as fail: + _ = hstring.extract_version_from_file_name(file_name) + self.assertIn(expected_fail, str(fail.exception)) + + def test_extract_version_from_file_name_incorrect_format1(self) -> None: + """ + Verify function raises AssertionError on incorrect input format. + """ + self._test_extract_version_from_file_name_incorrect_format("incorrect") + + def test_extract_version_from_file_name_incorrect_format2(self) -> None: + """ + Verify function raises AssertionError on incorrect input format. + """ + self._test_extract_version_from_file_name_incorrect_format( + "universe_vxx.json" + ) + + def test_extract_version_from_file_name_incorrect_format3(self) -> None: + """ + Verify function raises AssertionError on incorrect input format. + """ + self._test_extract_version_from_file_name_incorrect_format( + "universe_v.1.json" + ) + + def test_extract_version_from_file_name_incorrect_format4(self) -> None: + """ + Verify function raises AssertionError on incorrect input format. + """ + self._test_extract_version_from_file_name_incorrect_format( + "universe_11.json" + ) + + +# ############################################################################# +# TestGetDocstringLineIndices +# ############################################################################# + + +class TestGetDocstringLineIndices(hunitest.TestCase): + """ + Test determining which code lines are inside (doc)strings. + """ + + def helper(self, code: str, expected: List[str]) -> None: + lines = code.split("\n") + actual_idxs = hstring.get_docstring_line_indices(lines) + actual = [lines[i].strip() for i in actual_idxs] + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test one type of quotes. + """ + code = """ + def test_assert_equal1(self) -> None: + ''' + Test one. + ''' + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + s = ''' + Inside a string. + ''' + d = '''Does not count''' + self.check_string(actual) + + """ + expected = ["'''", "Test one.", "s = '''", "Inside a string."] + self.helper(code, expected) + + def test2(self) -> None: + """ + Test the second type of quotes. + """ + code = ''' + def test_assert_equal1(self) -> None: + """ + Test one. + """ + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + s = """ + Inside a string. + """ + d = """Does not count""" + self.check_string(actual) + + ''' + expected = ['"""', "Test one.", 's = """', "Inside a string."] + self.helper(code, expected) + + def test3(self) -> None: + """ + Test quotes within quotes. + """ + code = """ + def test_assert_equal1(self) -> None: + ''' + Test one. + """ + code += '''\ +""" + String within "Test one". + """ + ''' + code += """\ +''' + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + s = ''' + Inside a string. + ''' + d = '''Does not count''' + self.check_string(actual) + + """ + expected = [ + "'''", + "Test one.", + '"""', + 'String within "Test one".', + '"""', + "s = '''", + "Inside a string.", + ] + self.helper(code, expected) + + +# ############################################################################# +# TestGetCodeBlockLineIndices +# ############################################################################# + + +class TestGetCodeBlockLineIndices(hunitest.TestCase): + def helper(self, code: str, expected: List[str]) -> None: + lines = code.split("\n") + actual_idxs = hstring.get_code_block_line_indices(lines) + actual = [lines[i].strip() for i in actual_idxs] + self.assertEqual(actual, expected) + + def test1(self) -> None: + """ + Test getting code block line indices. + """ + code = """ + def test_assert_equal1(self) -> None: + ``` + Test one. + ``` + d = ```Does not count``` + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + """ + expected = ["```", "Test one."] + self.helper(code, expected) + + +# ############################################################################# +# TestGetDocstrings +# ############################################################################# + + +class TestGetDocstrings(hunitest.TestCase): + def test1(self) -> None: + """ + Test that grouped lines within docstrings are correctly returned. + """ + # Prepare inputs. + test_get_docstring_lines_input_dir = self.get_input_dir() + text_file_path = os.path.join( + test_get_docstring_lines_input_dir, "test.txt" + ) + text = hio.from_file(text_file_path) + lines = text.splitlines() + # Run. + actual = hstring.get_docstrings(lines) + # Check. + expected = [ + [1, 2, 3, 4, 5, 6], + [11, 12, 13, 14, 15, 16], + ] + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py new file mode 100644 index 000000000..4d2431bca --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py @@ -0,0 +1,494 @@ +import logging +import os +import platform +import re +import tempfile +from typing import List + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur + +_LOG = logging.getLogger(__name__) + + +def _get_ls_error_message(filename: str = "this_file_doesnt_exist") -> str: + """ + Get the expected error message for ls command for the current OS. + + :param filename: The filename that doesn't exist + """ + if platform.system() == "Darwin": + return f"ls: {filename}: No such file or directory" + elif platform.system() == "Linux": + return f"ls: cannot access '{filename}': No such file or directory" + raise RuntimeError(f"Unsupported OS: {platform.system()}") + +# ############################################################################# + + +# ############################################################################# +# Test_system1 +# ############################################################################# + + +class Test_system1(hunitest.TestCase): + def test1(self) -> None: + hsystem.system("ls") + + def test2(self) -> None: + hsystem.system("ls /dev/null", suppress_output=False) + + def test3(self) -> None: + """ + Output to a file. + """ + with tempfile.NamedTemporaryFile() as fp: + temp_file_name = fp.name + _LOG.debug("temp_file_name=%s", temp_file_name) + hsystem.system("ls", output_file=temp_file_name) + hdbg.dassert_path_exists(temp_file_name) + + def test4(self) -> None: + """ + Tee to a file. + """ + with tempfile.NamedTemporaryFile() as fp: + temp_file_name = fp.name + _LOG.debug("temp_file_name=%s", temp_file_name) + hsystem.system("ls", output_file=temp_file_name, tee=True) + hdbg.dassert_path_exists(temp_file_name) + + def test5(self) -> None: + """ + Test dry_run. + """ + temp_file_name = tempfile._get_default_tempdir() # type: ignore + candidate_name = tempfile._get_candidate_names() # type: ignore + temp_file_name += "/" + next(candidate_name) + _LOG.debug("temp_file_name=%s", temp_file_name) + hsystem.system("ls", output_file=temp_file_name, dry_run=True) + hdbg.dassert_path_not_exists(temp_file_name) + + def test6(self) -> None: + """ + Test abort_on_error=True. + """ + hsystem.system("ls this_file_doesnt_exist", abort_on_error=False) + + def test7(self) -> None: + """ + Test abort_on_error=True (default). + """ + with self.assertRaises(RuntimeError) as cm: + hsystem.system("ls this_file_doesnt_exist") + actual = str(cm.exception) + # Different systems return different rc. + actual = re.sub(r"rc='\d+'", "rc=''", actual) + # Use OS-specific expected error message. + error_msg = _get_ls_error_message() + expected = f""" + ################################################################################ + ################################################################################ + _system() failed + ################################################################################ + ################################################################################ + # _system: cmd='(ls this_file_doesnt_exist) 2>&1', print_command=False, abort_on_error=True, suppress_error=None, suppress_output=True, blocking=True, wrapper=None, output_file=None, num_error_lines=30, tee=False, dry_run=False, log_level=10 + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + cmd='(ls this_file_doesnt_exist) 2>&1' + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + - rc='' + - output=' + {error_msg} + ' + - Output saved in 'tmp.system_output.txt' + - Command saved in 'tmp.system_cmd.sh' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test8(self) -> None: + """ + Check that an assert error is raised when `tee` is passed without a log + file. + """ + with self.assertRaises(AssertionError) as cm: + _ = hsystem.system("ls this_should_fail", tee=True) + actual = str(cm.exception) + expected = r""" + ################################################################################ + * Failed assertion * + 'True' implies 'False' + ################################################################################ + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test9(self) -> None: + """ + Check that the failing command fails and logs are stored in the log + file. + + - `allow_errors = False` + - `tee = True` + - Log file path is passed + """ + log_dir = self.get_scratch_space() + log_file_path = os.path.join(log_dir, "tee_log") + with self.assertRaises(RuntimeError) as cm: + _ = hsystem.system( + "ls this_should_fail", tee=True, output_file=log_file_path + ) + actual = str(cm.exception) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # Normalize rc value (differs across systems). + actual = re.sub(r"rc='\d+'", "rc=''", actual) + # Check log output contains the OS-specific error message. + actual = hio.from_file(log_file_path) + error_msg = _get_ls_error_message("this_should_fail") + expected = error_msg + "\n" + self.assert_equal(actual, expected) + + def test10(self) -> None: + """ + Check that the failing command passes and logs are stored in the log + file. + + - `allow_errors = True` + - `tee = True` + - Log file path is passed + """ + log_dir = self.get_scratch_space() + log_file_path = os.path.join(log_dir, "tee_log") + rc = hsystem.system( + "ls this_should_fail", + tee=True, + abort_on_error=False, + output_file=log_file_path, + ) + self.assertNotEqual(rc, 0) + # Check log output. + actual = hio.from_file(log_file_path) + # Use OS-specific expected error message. + error_msg = _get_ls_error_message("this_should_fail") + expected = error_msg + "\n" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# + + +# ############################################################################# +# Test_system2 +# ############################################################################# + + +class Test_system2(hunitest.TestCase): + def test_get_user_name(self) -> None: + actual = hsystem.get_user_name() + _LOG.debug("actual=%s", actual) + # + expected = hsystem.system_to_string("whoami")[1] + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + # + expected = hsystem.system_to_one_line("whoami")[1] + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + + def test_get_server_name(self) -> None: + actual = hsystem.get_server_name() + _LOG.debug("actual=%s", actual) + # + expected = hsystem.system_to_string("uname -n")[1] + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + + def test_get_os_name(self) -> None: + actual = hsystem.get_os_name() + _LOG.debug("actual=%s", actual) + # + expected = hsystem.system_to_string("uname -s")[1] + _LOG.debug("expected=%s", expected) + self.assertEqual(actual, expected) + + +# ############################################################################# + + +# ############################################################################# +# Test_compute_file_signature1 +# ############################################################################# + + +class Test_compute_file_signature1(hunitest.TestCase): + def test1(self) -> None: + """ + Compute the signature of a file using 1 enclosing dir. + """ + file_name = ( + "/app/amp/core/test/TestCheckSameConfigs." + + "test_check_same_configs_error/output/test.txt" + ) + dir_depth = 1 + actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) + expected = ["output", "test.txt"] + self.assert_equal(str(actual), str(expected)) + + def test2(self) -> None: + """ + Compute the signature of a file using 2 enclosing dirs. + """ + file_name = ( + "/app/amp/core/test/TestCheckSameConfigs." + + "test_check_same_configs_error/output/test.txt" + ) + dir_depth = 2 + actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) + expected = [ + "TestCheckSameConfigs.test_check_same_configs_error", + "output", + "test.txt", + ] + self.assert_equal(str(actual), str(expected)) + + def test3(self) -> None: + """ + Compute the signature of a file using 4 enclosing dirs. + """ + file_name = "/app/amp/core/test/TestApplyAdfTest.test1/output/test.txt" + dir_depth = 4 + actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) + expected = [ + "core", + "test", + "TestApplyAdfTest.test1", + "output", + "test.txt", + ] + self.assert_equal(str(actual), str(expected)) + + +# ############################################################################# + + +# ############################################################################# +# Test_find_file_with_dir1 +# ############################################################################# + + +class Test_find_file_with_dir1(hunitest.TestCase): + def test1(self) -> None: + """ + Check whether we can find this file using one enclosing dir. + """ + # Use this file. + file_name = "helpers/test/test_hsystem.py" + dir_depth = 1 + actual = hsystem.find_file_with_dir(file_name, dir_depth=dir_depth) + expected = r"""['helpers/test/test_hsystem.py']""" + self.assert_equal(str(actual), str(expected), purify_text=True) + + def _helper(self, dir_depth: int, mode: str) -> List[str]: + """ + Test helper for find_file_with_dir. + + :param dir_depth: Number of directory levels to use for matching + :param mode: Search mode for matching + :return: List of matching files + """ + # Create a fake golden outcome to be used in this test. + golden_content = "hello world" + self.check_string(golden_content) + # E.g., helpers/test/test_hsystem.py::Test_find_file_with_dir1::test2/test.txt + file_name = os.path.join(self.get_output_dir(), "test.txt") + _LOG.debug("file_name=%s", file_name) + actual = hsystem.find_file_with_dir( + file_name, dir_depth=dir_depth, mode=mode + ) + _LOG.debug("Found %d matching files", len(actual)) + return actual + + def test2(self) -> None: + """ + Check whether we can find a test golden output using different number + of enclosing dirs. + + With only 1 enclosing dir, we can't find it. + """ + # Use only one dir which is not enough to identify the file. + # E.g., .../test/TestSqlWriterBackend1.test_insert_tick_data1/output/test.txt + dir_depth = 1 + mode = "return_all_results" + actual = self._helper(dir_depth, mode) + # For sure there are more than 100 tests. + self.assertGreater(len(actual), 100) + + def test3(self) -> None: + """ + Like `test2`, but using 2 levels for sure we are going to identify the + file. + """ + dir_depth = 2 + mode = "return_all_results" + actual = self._helper(dir_depth, mode) + _LOG.debug("Found %d matching files", len(actual)) + # There should be a single match. + expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt']""" + self.assert_equal(str(actual), str(expected), purify_text=True) + self.assertEqual(len(actual), 1) + + def test4(self) -> None: + """ + Like `test2`, but using 2 levels for sure we are going to identify the + file and asserting in case we don't find a single result. + """ + dir_depth = 2 + mode = "assert_unless_one_result" + actual = self._helper(dir_depth, mode) + _LOG.debug("Found %d matching files", len(actual)) + # There should be a single match. + expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt']""" + self.assert_equal(str(actual), str(expected), purify_text=True) + self.assertEqual(len(actual), 1) + + def test5(self) -> None: + """ + Like `test2`, using more level than 2, again, we should have a single + result. + """ + dir_depth = 3 + mode = "assert_unless_one_result" + actual = self._helper(dir_depth, mode) + _LOG.debug("Found %d matching files", len(actual)) + expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt']""" + self.assert_equal(str(actual), str(expected), purify_text=True) + self.assertEqual(len(actual), 1) + + +# ############################################################################# + + +# ############################################################################# +# Test_Linux_commands1 +# ############################################################################# + + +class Test_Linux_commands1(hunitest.TestCase): + def test_du1(self) -> None: + hsystem.du(".") + + +# ############################################################################# + + +# ############################################################################# +# Test_has_timestamp1 +# ############################################################################# + + +class Test_has_timestamp1(hunitest.TestCase): + def test_has_not_timestamp1(self) -> None: + """ + No timestamp. + """ + file_name = "patch.amp.8c5a2da9.tgz" + actual = hsystem.has_timestamp(file_name) + expected = False + self.assertEqual(actual, expected) + + def test_has_timestamp1(self) -> None: + """ + Valid timestamp. + """ + file_name = "patch.amp.8c5a2da9.20210725_225857.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + def test_has_timestamp2(self) -> None: + """ + Valid timestamp. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725-22_58_57.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + def test_has_timestamp3(self) -> None: + """ + Valid timestamp. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725225857.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + def test_has_timestamp4(self) -> None: + """ + Valid timestamp. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_22_58_57.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + def test_has_timestamp5(self) -> None: + """ + Valid timestamp. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725225857.tgz" + actual = hsystem.has_timestamp(file_name) + expected = True + self.assertEqual(actual, expected) + + +# ############################################################################# +# Test_append_timestamp_tag1 +# ############################################################################# + + +class Test_append_timestamp_tag1(hunitest.TestCase): + def test_no_timestamp1(self) -> None: + """ + Invalid timestamp, with no tag. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.tgz" + tag = "" + actual = hsystem.append_timestamp_tag(file_name, tag) + # /foo/bar/patch.amp.8c5a2da9.20210726-15_11_25.tgz + expected = r"/foo/bar/patch.amp.8c5a2da9.\S+.tgz" + self.assertRegex(actual, expected) + + def test_no_timestamp2(self) -> None: + """ + Invalid timestamp, with no tag. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.tgz" + tag = "hello" + actual = hsystem.append_timestamp_tag(file_name, tag) + # /foo/bar/patch.amp.8c5a2da9.20210726-15_11_25.hello.tgz + expected = r"/foo/bar/patch.amp.8c5a2da9.\S+.hello.tgz" + self.assertRegex(actual, expected) + + def test1(self) -> None: + """ + Valid timestamp, with no tag. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" + tag = "" + actual = hsystem.append_timestamp_tag(file_name, tag) + # /foo/bar/patch.amp.8c5a2da9.20210725_225857.20210726-15_11_25.tgz + expected = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + Valid timestamp, with a tag. + """ + file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" + tag = "hello" + actual = hsystem.append_timestamp_tag(file_name, tag) + expected = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.hello.tgz" + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py new file mode 100644 index 000000000..385de303a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py @@ -0,0 +1,159 @@ +import logging + +import helpers.hprint as hprint +import helpers.htable as htable +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestTable1 +# ############################################################################# + + +class TestTable1(hunitest.TestCase): + # ######################################################################### + + @staticmethod + def _get_table() -> htable.Table: + txt = """completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests""" + cols = ["status", "outcome", "descr", "workflow"] + # table = [line for line in csv.reader(txt.split("\n"), delimiter=' ')] + # _LOG.debug(hprint.to_str("table")) + # _LOG.debug("size=%s", str(htable.size(table))) + table = htable.Table.from_text(cols, txt, delimiter=" ") + return table + + def test_from_text1(self) -> None: + table = self._get_table() + self.assertIsInstance(table, htable.Table) + _LOG.debug(hprint.to_str("table")) + + def test_from_text_invalid1(self) -> None: + txt = """completed failure Lint Run_linter +completed success Lint +completed success Lint Slow_tests""" + cols = ["status", "outcome", "descr", "workflow"] + with self.assertRaises(AssertionError) as cm: + htable.Table.from_text(cols, txt, delimiter=" ") + actual = str(cm.exception) + expected = """ + * Failed assertion * + '3' + == + '4' + Invalid row='['completed', 'success', 'Lint']' for cols='['status', 'outcome', 'descr', 'workflow']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_from_text_invalid2(self) -> None: + txt = """completed failure Lint Run_linter + completed success Lint Fast_tess + completed success Lint Slow_tests""" + cols = ["status", "outcome", "descr", "workflow", "EXTRA"] + with self.assertRaises(AssertionError) as cm: + htable.Table.from_text(cols, txt, delimiter=" ") + actual = str(cm.exception) + expected = """ + * Failed assertion * + '4' + == + '5' + Invalid row='['completed', 'failure', 'Lint', 'Run_linter']' for cols='['status', 'outcome', 'descr', 'workflow', 'EXTRA']' + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + # ######################################################################### + + def test_repr1(self) -> None: + table = self._get_table() + actual = repr(table) + expected = r""" +cols=['status', 'outcome', 'descr', 'workflow'] +table= +['completed', 'failure', 'Lint', 'Run_linter'] +['completed', 'success', 'Lint', 'Fast_tests'] +['completed', 'success', 'Lint', 'Slow_tests'] +size=(3, 4) +""" + expected = expected.rstrip().lstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + + def test_str1(self) -> None: + table = self._get_table() + actual = str(table) + expected = r""" +status | outcome | descr | workflow | +--------- | ------- | ----- | ---------- | +completed | failure | Lint | Run_linter | +completed | success | Lint | Fast_tests | +completed | success | Lint | Slow_tests | +""" + expected = expected.rstrip().lstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + + # ######################################################################### + + def test_filter_table1(self) -> None: + """ + Filter resulting in a single matching row. + """ + table = self._get_table() + # + table_filter = table.filter_rows("outcome", "failure") + expected = r""" +cols=['status', 'outcome', 'descr', 'workflow'] +table= +['completed', 'failure', 'Lint', 'Run_linter'] +size=(1, 4) +""" + actual = repr(table_filter) + expected = expected.rstrip().lstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + + def test_filter_table2(self) -> None: + """ + Filter resulting in no matches. + """ + table = self._get_table() + # + table_filter = table.filter_rows("status", "in progress") + expected = r""" +cols=['status', 'outcome', 'descr', 'workflow'] +table= + +size=(0, 4) +""" + actual = repr(table_filter) + expected = expected.rstrip().lstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + + def test_filter_table3(self) -> None: + """ + Filter with a column constant using the constant value. + """ + table = self._get_table() + # + table_filter = table.filter_rows("descr", "Lint") + actual = repr(table_filter) + expected = repr(table) + self.assert_equal(actual, expected, fuzzy_match=False) + + # ######################################################################### + + def test_unique1(self) -> None: + table = self._get_table() + # + actual = table.unique("descr") + expected = ["Lint"] + self.assert_equal(str(actual), str(expected), fuzzy_match=False) + + def test_unique2(self) -> None: + table = self._get_table() + # + actual = table.unique("workflow") + expected = ["Fast_tests", "Run_linter", "Slow_tests"] + self.assert_equal(str(actual), str(expected), fuzzy_match=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py new file mode 100644 index 000000000..fa2059b0b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py @@ -0,0 +1,578 @@ +import logging + +import helpers.hprint as hprint +import helpers.htext_protect as htexprot +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test__extract_protected_content +# ############################################################################# + + +class Test__extract_protected_content(hunitest.TestCase): + """ + Test the extract_protected_content function. + """ + + def helper( + self, + txt: str, + file_type: str, + expected_txt: str, + expected_map_size: int, + ) -> None: + """ + Test helper for extract_protected_content. + + :param txt: Input text to process + :param file_type: File type ('md', 'txt', or 'tex') + :param expected_txt: Expected output text with placeholders + :param expected_map_size: Expected number of protected items + """ + # Prepare inputs. + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Run test. + actual_lines, protected_map = htexprot.extract_protected_content( + lines, file_type + ) + # Check outputs. + actual = "\n".join(actual_lines) + expected = hprint.dedent( + expected_txt, remove_lead_trail_empty_lines_=True + ) + self.assert_equal(actual, expected) + self.assertEqual(len(protected_map), expected_map_size) + + def test1(self) -> None: + """ + Test extracting single fenced block with content. + """ + # Prepare inputs. + txt = """ + Some text here. + ```python + def foo(): + return 42 + ``` + More text. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Some text here. + ```python + <<>> + ``` + More text. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test2(self) -> None: + """ + Test extracting multiple fenced blocks. + """ + # Prepare inputs. + txt = """ + Text. + ```python + code1 + ``` + Middle. + ```javascript + code2 + ``` + End. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text. + ```python + <<>> + ``` + Middle. + ```javascript + <<>> + ``` + End. + """ + expected_map_size = 2 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test3(self) -> None: + """ + Test extracting empty fenced block. + """ + # Prepare inputs. + txt = """ + Text before. + ```python + ``` + Text after. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text before. + ```python + <<>> + ``` + Text after. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test4(self) -> None: + """ + Test extracting fenced blocks with different languages. + """ + # Prepare inputs. + txt = """ + ```python + python_code + ``` + ```javascript + js_code + ``` + ```bash + bash_code + ``` + """ + file_type = "md" + # Prepare outputs. + expected = """ + ```python + <<>> + ``` + ```javascript + <<>> + ``` + ```bash + <<>> + ``` + """ + expected_map_size = 3 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test5(self) -> None: + """ + Test extracting HTML single-line comment. + """ + # Prepare inputs. + txt = """ + Text before. + + Text after. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text before. + <<>> + Text after. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test6(self) -> None: + """ + Test extracting HTML multi-line comment. + """ + # Prepare inputs. + txt = """ + Text before. + + Text after. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text before. + <<>> + Text after. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test7(self) -> None: + """ + Test extracting LaTeX comment. + """ + # Prepare inputs. + txt = """ + Some LaTeX text. + % This is a LaTeX comment + More text. + """ + file_type = "tex" + # Prepare outputs. + expected = """ + Some LaTeX text. + <<>> + More text. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test8(self) -> None: + """ + Test extracting math block. + """ + # Prepare inputs. + txt = """ + Text before. + $$ + E = mc^2 + $$ + Text after. + """ + file_type = "md" + # Prepare outputs. + expected = """ + Text before. + $$ + <<>> + $$ + Text after. + """ + expected_map_size = 1 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test9(self) -> None: + """ + Test fenced block not extracted for tex files. + """ + # Prepare inputs. + txt = """ + LaTeX text. + ``` + This should not be extracted for tex files + ``` + More text. + """ + file_type = "tex" + # Prepare outputs. + expected = """ + LaTeX text. + ``` + This should not be extracted for tex files + ``` + More text. + """ + expected_map_size = 0 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + def test10(self) -> None: + """ + Test mixed content (fenced blocks + comments + normal text). + """ + # Prepare inputs. + txt = """ + # Title + Some text. + ```python + code here + ``` + + $$ + math here + $$ + End. + """ + file_type = "md" + # Prepare outputs. + expected = """ + # Title + Some text. + ```python + <<>> + ``` + <<>> + $$ + <<>> + $$ + End. + """ + expected_map_size = 3 + # Run test. + self.helper(txt, file_type, expected, expected_map_size) + + +# ############################################################################# +# Test__restore_protected_content +# ############################################################################# + + +class Test__restore_protected_content(hunitest.TestCase): + """ + Test the restore_protected_content function. + """ + + def helper( + self, + txt: str, + protected_map: dict, + expected_txt: str, + ) -> None: + """ + Test helper for restore_protected_content. + + :param txt: Input text with placeholders + :param protected_map: Mapping of placeholders to original content + :param expected_txt: Expected output with restored content + """ + # Prepare inputs. + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + # Run test. + actual_lines = htexprot.restore_protected_content(lines, protected_map) + # Check outputs. + actual = "\n".join(actual_lines) + expected = hprint.dedent( + expected_txt, remove_lead_trail_empty_lines_=True + ) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test restoring single placeholder. + """ + # Prepare inputs. + txt = """ + Text before. + ```python + <<>> + ``` + Text after. + """ + protected_map = { + "<<>>": "def foo():\n return 42" + } + # Prepare outputs. + expected = """ + Text before. + ```python + def foo(): + return 42 + ``` + Text after. + """ + # Run test. + self.helper(txt, protected_map, expected) + + def test2(self) -> None: + """ + Test restoring multiple placeholders. + """ + # Prepare inputs. + txt = """ + ```python + <<>> + ``` + <<>> + ``` + <<>> + ``` + """ + protected_map = { + "<<>>": "code1", + "<<>>": "", + "<<>>": "code2", + } + # Prepare outputs. + expected = """ + ```python + code1 + ``` + + ``` + code2 + ``` + """ + # Run test. + self.helper(txt, protected_map, expected) + + def test3(self) -> None: + """ + Test restoring multi-line content from single placeholder. + """ + # Prepare inputs. + txt = """ + Text. + <<>> + More text. + """ + protected_map = { + "<<>>": "" + } + # Prepare outputs. + expected = """ + Text. + + More text. + """ + # Run test. + self.helper(txt, protected_map, expected) + + def test4(self) -> None: + """ + Test with empty map (no-op). + """ + # Prepare inputs. + txt = """ + Text line 1. + Text line 2. + Text line 3. + """ + protected_map = {} + # Prepare outputs. + expected = """ + Text line 1. + Text line 2. + Text line 3. + """ + # Run test. + self.helper(txt, protected_map, expected) + + def test5(self) -> None: + """ + Test restoring empty content. + """ + # Prepare inputs. + txt = """ + Before. + ``` + <<>> + ``` + After. + """ + protected_map = {"<<>>": ""} + # Prepare outputs. + expected = """ + Before. + ``` + + ``` + After. + """ + # Run test. + self.helper(txt, protected_map, expected) + + +# ############################################################################# +# Test_extract_restore_roundtrip +# ############################################################################# + + +class Test_extract_restore_roundtrip(hunitest.TestCase): + """ + Test that extract followed by restore is identity operation. + """ + + def helper(self, txt: str, file_type: str) -> None: + """ + Test helper for roundtrip (extract then restore). + + :param txt: Input text + :param file_type: File type ('md', 'txt', or 'tex') + """ + # Prepare inputs. + lines = txt.split("\n") + lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) + original = "\n".join(lines) + # Run test. + extracted_lines, protected_map = htexprot.extract_protected_content( + lines, file_type + ) + restored_lines = htexprot.restore_protected_content( + extracted_lines, protected_map + ) + # Check outputs. + actual = "\n".join(restored_lines) + self.assert_equal(actual, original) + + def test1(self) -> None: + """ + Test roundtrip with fenced blocks. + """ + # Prepare inputs. + txt = """ + # Title + Some text. + ```python + def foo(): + return 42 + ``` + More text. + """ + file_type = "md" + # Run test. + self.helper(txt, file_type) + + def test2(self) -> None: + """ + Test roundtrip with mixed content. + """ + # Prepare inputs. + txt = """ + Text. + ```python + code + ``` + + $$ + E = mc^2 + $$ + End. + """ + file_type = "md" + # Run test. + self.helper(txt, file_type) + + def test3(self) -> None: + """ + Test roundtrip with LaTeX comments. + """ + # Prepare inputs. + txt = """ + LaTeX text. + % Comment 1 + More text. + % Comment 2 + End. + """ + file_type = "tex" + # Run test. + self.helper(txt, file_type) + + def test4(self) -> None: + """ + Test roundtrip with no protected content. + """ + # Prepare inputs. + txt = """ + Just regular text. + No special content here. + Just plain paragraphs. + """ + file_type = "md" + # Run test. + self.helper(txt, file_type) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py new file mode 100644 index 000000000..ff57a87c0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py @@ -0,0 +1,24 @@ +import logging +import time + +import helpers.htimer as htimer +import helpers.hunit_test as hunitest + + +# ############################################################################# +# TestTimedScope +# ############################################################################# + + +class TestTimedScope(hunitest.TestCase): + def test_1(self) -> None: + """ + Test that elapsed time is correctly computed. + """ + # Run the function to test. + with htimer.TimedScope(logging.INFO, "Test") as ts: + time.sleep(1) + # Round actual time up to 1 decimal and compare it with expected. + actual_rounded_time = round(ts.elapsed_time, 1) + expected_rounded_time = 1.0 + self.assertEqual(actual_rounded_time, expected_rounded_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py new file mode 100644 index 000000000..808a2221e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py @@ -0,0 +1,474 @@ +import logging +from typing import List + +import helpers.hdbg as hdbg +import helpers.hprint as hprint +import helpers.htraceback as htraceb +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_Traceback1 +# ############################################################################# + + +class Test_Traceback1(hunitest.TestCase): + def test_parse0(self) -> None: + txt = """ + + TEST + Traceback + TEST + Traceback (most recent call last): + File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": + NameError: name 'repo_short_name' is not defined + TEST TEST TEST + """ + txt = hprint.dedent(txt) + _LOG.debug("txt=\n%s", txt) + purify_from_client = False + # Run the function under test. + act_cfile, act_traceback = htraceb.parse_traceback( + txt, purify_from_client=purify_from_client + ) + # Check. + exp_traceback = """Traceback (most recent call last): + File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": +NameError: name 'repo_short_name' is not defined + TEST TEST TEST""" + self.assertEqual(act_traceback, exp_traceback) + + # pylint: disable=line-too-long + # TODO(gp): Add test and fix for the following traceback: + + # Bug1: + # Traceback (most recent call last): + # File "/Users/saggese/src/venv/amp.client_venv/bin/invoke", line 8, in + # sys.exit(program.run()) + # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 373, in run + # self.parse_collection() + # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 465, in parse_collection + # self.load_collection() + # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 696, in load_collection + # module, parent = loader.load(coll_name) + # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/loader.py", line 76, in load + # module = imp.load_module(name, fd, path, desc) + # File "/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/imp.py", line 234, in load_module + # return load_source(name, filename, file) + # File "/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/imp.py", line 171, in load_source + # module = _load(spec) + # File "", line 711, in _load + # File "", line 680, in _load_unlocked + # File "", line 855, in exec_module + # File "", line 228, in _call_with_frames_removed + # File "/Users/saggese/src/lem1/amp/tasks.py", line 8, in + # from helpers.lib_tasks import set_default_params # This is not an invoke target. + # File "/Users/saggese/src/lem1/amp/helpers/lib_tasks.py", line 23, in + # import helpers.hgit as hgit + # File "/Users/saggese/src/lem1/amp/helpers/git.py", line 16, in + # import helpers.hsystem as hsystem + # File "/Users/saggese/src/lem1/amp/helpers/system_interaction.py", line 529 + # signature2 = _compute_file_signature(file_name, dir_depth) + # ^ + # SyntaxError: invalid syntax + # Traceback (most recent call last): + # File "/Users/saggese/src/lem1/amp/dev_scripts/tg.py", line 21, in + # import helpers.hsystem as hsystem + # File "/Users/saggese/src/lem1/amp/helpers/system_interaction.py", line 529 + # signature2 = _compute_file_signature(file_name, dir_depth) + # ^ + # SyntaxError: invalid syntax + + # Bug2: + # Traceback (most recent call last): + # File "/app/amp/dataflow/pipelines/real_time/test/test_dataflow_amp_real_time_pipeline.py", line 46, in test1 + # ) = mdmdinex.get_ReplayedTimeMarketData_example2( + # TypeError: get_ReplayedTimeMarketData_example2() got an unexpected keyword argument 'df' + # + # 13:34:45 INFO traceback_to_cfile : _main : 76 : in_file_name=log.txt + # 13:34:45 INFO parser : read_file : 304 : Reading from 'log.txt' + # 13:34:45 ERROR traceback_to_cfile : _main : 87 : Can't find traceback in the file + + # Bug3: + # =================================== FAILURES =================================== + # _________________________ TestGetDataForInterval.test1 _________________________ + # Traceback (most recent call last): + # File "/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3361, in get_loc + # return self._engine.get_loc(casted_key) + # File "pandas/_libs/index.pyx", line 76, in pandas._libs.index.IndexEngine.get_loc + # File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc + # File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item + # File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item + # KeyError: 'end_ts' + # + # The above exception was the direct cause of the following exception: + # + # Traceback (most recent call last): + # File "/app/amp/market_data/test/test_market_data_client.py", line 46, in test1 + # data = market_data_client.get_data_for_interval( + # File "/app/amp/market_data/market_data.py", line 212, in get_data_for_interval + # df = self._get_data( + # File "/app/amp/market_data/market_data_client.py", line 93, in _get_data + # market_data["start_ts"] = market_data["end_ts"] - pd.Timedelta( + # File "/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 3458, in __getitem__ + # indexer = self.columns.get_loc(key) + # File "/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3363, in get_loc + # raise KeyError(key) from err + # KeyError: 'end_ts' + + # Bug4: + # dataflow/model/test/test_experiment_utils.py::Test_get_configs_from_command_line1::test1 (0.01 s) FAILED [100%] + # + # =================================== FAILURES =================================== + # __________________ Test_get_configs_from_command_line1.test1 ___________________ + # Traceback (most recent call last): + # File "/app/dataflow/model/test/test_experiment_utils.py", line 35, in test1 + # configs = dtfmoexuti.get_configs_from_command_line(args) + # File "/app/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + # configs = cconfig.get_configs_from_builder(config_builder) + # File "/app/config_root/config/builder.py", line 48, in get_configs_from_builder + # imp = importlib.import_module(import_) + # File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + # return _bootstrap._gcd_import(name[level:], package, level) + # File "", line 1014, in _gcd_import + # File "", line 991, in _find_and_load + # File "", line 961, in _find_and_load_unlocked + # File "", line 219, in _call_with_frames_removed + # File "", line 1014, in _gcd_import + # File "", line 991, in _find_and_load + # File "", line 961, in _find_and_load_unlocked + # File "", line 219, in _call_with_frames_removed + # File "", line 1014, in _gcd_import + # File "", line 991, in _find_and_load + # File "", line 973, in _find_and_load_unlocked + # ModuleNotFoundError: No module named 'research' + # ============================= slowest 3 durations ============================== + + # pylint: enable=line-too-long + + def _parse_traceback_helper( + self, + txt: str, + purify_from_client: bool, + exp_cfile: str, + exp_traceback: str, + ) -> None: + hdbg.dassert_isinstance(txt, str) + hdbg.dassert_isinstance(exp_cfile, str) + hdbg.dassert_isinstance(exp_traceback, str) + txt = hprint.dedent(txt) + _LOG.debug("txt=\n%s", txt) + # Run the function under test. + act_cfile, act_traceback = htraceb.parse_traceback( + txt, purify_from_client=purify_from_client + ) + _LOG.debug("act_cfile=\n%s", act_cfile) + _LOG.debug("act_traceback=\n%s", act_traceback) + # Compare cfile. + act_cfile = htraceb.cfile_to_str(act_cfile) + exp_cfile = hprint.dedent(exp_cfile) + _LOG.debug(hprint.to_str("exp_cfile act_cfile")) + self.assert_equal( + act_cfile, exp_cfile, fuzzy_match=True, purify_text=True + ) + # Compare traceback. + # Handle `None`. + act_traceback = str(act_traceback) + exp_traceback = hprint.dedent(exp_traceback) + _LOG.debug(hprint.to_str("exp_traceback act_traceback")) + self.assert_equal( + act_traceback, exp_traceback, fuzzy_match=True, purify_text=True + ) + + def test_parse1(self) -> None: + """ + Parse traceback with all files from Docker that actually exist in the + current repo. + """ + txt = """ + + TEST + Traceback + TEST + Traceback (most recent call last): + File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": + NameError: name 'repo_short_name' is not defined + TEST TEST TEST + """ + purify_from_client = False + # pylint: disable=line-too-long + exp_cfile = [ + ( + "$GIT_ROOT/helpers/test/test_lib_tasks.py", + 27, + "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)", + ), + ( + "$GIT_ROOT/helpers/lib_tasks.py", + 1265, + "_get_gh_issue_title:task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name)", + ), + ( + "$GIT_ROOT/helpers/git.py", + 397, + 'get_task_prefix_from_repo_short_name:if repo_short_name == "amp":', + ), + ] + exp_cfile = htraceb.cfile_to_str(exp_cfile) + # pylint: enable=line-too-long + exp_traceback = """ + Traceback (most recent call last): + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 + actual = ltasks._get_gh_issue_title(issue_id, repo) + File "$GIT_ROOT/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title + task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) + File "$GIT_ROOT/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name + if repo_short_name == "amp": + NameError: name 'repo_short_name' is not defined + TEST TEST TEST + """ + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse_empty_traceback1(self) -> None: + """ + Parse an empty traceback file. + """ + txt = """ + + TEST + Traceback + TEST TEST TEST + """ + purify_from_client = True + exp_cfile: List[htraceb.CfileRow] = [] + exp_cfile = htraceb.cfile_to_str(exp_cfile) + exp_traceback = "None" + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse2(self) -> None: + """ + Parse a traceback file with both files from Docker and local files. + """ + # Use references to this file so that we are independent of the file + # layout. + # pylint: disable=line-too-long + txt = """ + Traceback (most recent call last): + File "./helpers/test/test_htraceback.py", line 146, in + _main(_parse()) + File "./helpers/test/test_htraceback.py", line 105, in _main + configs = cdtfut.get_configs_from_command_line(args) + File "/app/amp/./helpers/test/test_htraceback.py", line 228, in get_configs_from_command_line + "config_builder": args.config_builder, + """ + purify_from_client = True + exp_cfile = """ + helpers/test/test_htraceback.py:146::_main(_parse()) + helpers/test/test_htraceback.py:105:_main:configs = cdtfut.get_configs_from_command_line(args) + helpers/test/test_htraceback.py:228:get_configs_from_command_line:"config_builder": args.config_builder, + """ + exp_traceback = """ + Traceback (most recent call last): + File "./helpers/test/test_htraceback.py", line 146, in + _main(_parse()) + File "./helpers/test/test_htraceback.py", line 105, in _main + configs = cdtfut.get_configs_from_command_line(args) + File "$GIT_ROOT/./helpers/test/test_htraceback.py", line 228, in get_configs_from_command_line + "config_builder": args.config_builder, + """ + # pylint: enable=line-too-long + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse3(self) -> None: + """ + Parse a traceback file with both files from Docker and local files. + """ + # Use references to this file so that we are independent from the file + # layout. + # pylint: disable=line-too-long + txt = """ + collected 6 items + + helpers/test/test_lib_tasks.py::Test_pytest_failed1::test_classes1 (0.02 s) FAILED [ 16%] + + =================================== FAILURES =================================== + ______________________ Test_pytest_failed1.test_classes1 _______________________ + Traceback (most recent call last): + File "/app/amp/helpers/test/test_lib_tasks.py", line 1460, in test_classes1 + self._helper(file_name, target_type, expected) + File "/app/amp/helpers/test/test_lib_tasks.py", line 1440, in _helper + actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, + File "/venv/lib/python3.8/site-packages/invoke/tasks.py", line 127, in __call__ + result = self.body(*args, **kwargs) + File "/app/amp/helpers/lib_tasks.py", line 2140, in pytest_failed + hdbg.dassert(m, "Invalid test='%s'", test) + File "/app/amp/helpers/dbg.py", line 129, in dassert + _dfatal(txt, msg, *args) + File "/app/amp/helpers/dbg.py", line 117, in _dfatal + dfatal(dfatal_txt) + File "/app/amp/helpers/dbg.py", line 63, in dfatal + raise assertion_type(ret) + AssertionError: + * Failed assertion * + cond=None + Invalid test='dev_scripts/testing/test/test_run_tests.py' + """ + # pylint: enable=line-too-long + purify_from_client = False + exp_cfile = """ + $GIT_ROOT/helpers/test/test_lib_tasks.py:1460:test_classes1:self._helper(file_name, target_type, expected) + $GIT_ROOT/helpers/test/test_lib_tasks.py:1440:_helper:actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, + /venv/lib/python3.8/site-packages/invoke/tasks.py:127:__call__:result = self.body(*args, **kwargs) + $GIT_ROOT/helpers/lib_tasks.py:2140:pytest_failed:hdbg.dassert(m, "Invalid test='%s'", test) + $GIT_ROOT/helpers/dbg.py:129:dassert:_dfatal(txt, msg, *args) + $GIT_ROOT/helpers/dbg.py:117:_dfatal:dfatal(dfatal_txt) + $GIT_ROOT/helpers/dbg.py:63:dfatal:raise assertion_type(ret)""" + exp_traceback = r""" + Traceback (most recent call last): + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 1460, in test_classes1 + self._helper(file_name, target_type, expected) + File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 1440, in _helper + actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, + File "/venv/lib/python3.8/site-packages/invoke/tasks.py", line 127, in __call__ + result = self.body(*args, **kwargs) + File "$GIT_ROOT/helpers/lib_tasks.py", line 2140, in pytest_failed + hdbg.dassert(m, "Invalid test='%s'", test) + File "$GIT_ROOT/helpers/dbg.py", line 129, in dassert + _dfatal(txt, msg, *args) + File "$GIT_ROOT/helpers/dbg.py", line 117, in _dfatal + dfatal(dfatal_txt) + File "$GIT_ROOT/helpers/dbg.py", line 63, in dfatal + raise assertion_type(ret) + AssertionError: + * Failed assertion * + cond=None + Invalid test='dev_scripts/testing/test/test_run_tests.py' + """ + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse4(self) -> None: + """ + Parse a traceback file with both files from Docker and local files. + """ + # pylint: disable=line-too-long + txt = """ + =================================== FAILURES =================================== + ____________ TestEgSingleInstrumentDataReader2.test_true_real_time1 ____________ + Traceback (most recent call last): + File "/app/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 182, in test_true_real_time1 + self._execute_node(node) + File "/app/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 238, in _execute_node + dict_ = node.fit() + File "/app/amp/core/dataflow/nodes/sources.py", line 385, in fit + self.df = self._get_data_until_current_time() + File "/app/amp/core/dataflow/nodes/sources.py", line 429, in _get_data_until_current_time + df = self._get_data() + File "/app/amp/core/dataflow/nodes/sources.py", line 574, in _get_data + hdbg.dassert_lte(df.index.max(), current_time) + File "/app/amp/helpers/dbg.py", line 172, in dassert_lte + cond = val1 <= val2 + TypeError: '<=' not supported between instances of 'float' and 'Timestamp' + ============================= slowest 3 durations ============================== + """ + purify_from_client = False + exp_cfile = r""" + $GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:182:test_true_real_time1:self._execute_node(node) + $GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:238:_execute_node:dict_ = node.fit() + $GIT_ROOT/core/dataflow/nodes/sources.py:385:fit:self.df = self._get_data_until_current_time() + $GIT_ROOT/core/dataflow/nodes/sources.py:429:_get_data_until_current_time:df = self._get_data() + $GIT_ROOT/core/dataflow/nodes/sources.py:574:_get_data:hdbg.dassert_lte(df.index.max(), current_time) + $GIT_ROOT/helpers/dbg.py:172:dassert_lte:cond = val1 <= val2/TypeError: '<=' not supported between instances of 'float' and 'Timestamp'""" + exp_traceback = r""" + Traceback (most recent call last): + File "$GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 182, in test_true_real_time1 + self._execute_node(node) + File "$GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 238, in _execute_node + dict_ = node.fit() + File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 385, in fit + self.df = self._get_data_until_current_time() + File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 429, in _get_data_until_current_time + df = self._get_data() + File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 574, in _get_data + hdbg.dassert_lte(df.index.max(), current_time) + File "$GIT_ROOT/helpers/dbg.py", line 172, in dassert_lte + cond = val1 <= val2 + TypeError: '<=' not supported between instances of 'float' and 'Timestamp'""" + # pylint: enable=line-too-long + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) + + def test_parse5(self) -> None: + """ + Parse a traceback file with both files from Docker and local files. + """ + # pylint: disable=line-too-long + txt = """ + Traceback (most recent call last): + File "/app/dataflow_lm/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 + configs = dtfmoexuti.get_configs_from_command_line(args) + File "/app/amp/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + configs = cconfig.get_configs_from_builder(config_builder) + File "/app/amp/config_root/config/builder.py", line 46, in get_configs_from_builder + imp = importlib.import_module(import_) + File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1014, in _gcd_import + File "", line 991, in _find_and_load + File "", line 973, in _find_and_load_unlocked + ModuleNotFoundError: No module named 'dataflow_lm.pipelines.E8.8Ed_configs' + """ + purify_from_client = False + exp_cfile = """ + $GIT_ROOT/dataflow_lm/pipelines/E8/test/test_E8d_configs.py:37:test1:configs = dtfmoexuti.get_configs_from_command_line(args) + $GIT_ROOT/dataflow/model/experiment_utils.py:195:get_configs_from_command_line:configs = cconfig.get_configs_from_builder(config_builder) + $GIT_ROOT/config_root/config/builder.py:46:get_configs_from_builder:imp = importlib.import_module(import_) + /usr/lib/python3.8/importlib/__init__.py:127:import_module:return _bootstrap._gcd_import(name[level:], package, level) + :1014:_gcd_import: + :991:_find_and_load: + :973:_find_and_load_unlocked: + """ + exp_traceback = """ + Traceback (most recent call last): + File "$GIT_ROOT/dataflow_lm/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 + configs = dtfmoexuti.get_configs_from_command_line(args) + File "$GIT_ROOT/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line + configs = cconfig.get_configs_from_builder(config_builder) + File "$GIT_ROOT/config_root/config/builder.py", line 46, in get_configs_from_builder + imp = importlib.import_module(import_) + File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1014, in _gcd_import + File "", line 991, in _find_and_load + File "", line 973, in _find_and_load_unlocked + ModuleNotFoundError: No module named 'dataflow_lm.pipelines.E8.8Ed_configs' + """ + # pylint: enable=line-too-long + self._parse_traceback_helper( + txt, purify_from_client, exp_cfile, exp_traceback + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py new file mode 100644 index 000000000..a6e1e2ef6 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py @@ -0,0 +1,954 @@ +""" +Import as: + +import helpers.test.test_unit_test as ttutes +""" + +import logging +import tempfile +from typing import Optional, Tuple + +import pandas as pd +import pytest + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur + +_LOG = logging.getLogger(__name__) + + +def _git_add(file_name: str) -> None: + # pylint: disable=unreachable + cmd = f"git add -u {file_name}" + _LOG.debug("> %s", cmd) + rc = hsystem.system(cmd, abort_on_error=False) + if rc: + _LOG.warning( + "Can't run '%s': you need to add the file manually", + cmd, + ) + + +def _to_skip_on_update_outcomes() -> bool: + """ + Determine whether to skip on `--update_outcomes`. + + Some tests can't pass with `--update_outcomes`, since they exercise + the logic in `--update_outcomes` itself. + + We can't always use `@pytest.mark.skipif(hunitest.get_update_tests)` + since pytest decides which tests need to be run before the variable + is actually set. + """ + to_skip = False + if hunitest.get_update_tests(): + _LOG.warning( + "Skip this test since it exercises the logic for --update_outcomes" + ) + to_skip = True + return to_skip + + +# ############################################################################# +# TestTestCase1 +# ############################################################################# + + +class TestTestCase1(hunitest.TestCase): + """ + Test free-standing functions in unit_test.py. + """ + + def test_get_input_dir1(self) -> None: + """ + Test hunitest.get_input_dir(). + """ + actual = self.get_input_dir() + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_input_dir1/input" + self.assertEqual(actual, expected) + + def test_get_input_dir2(self) -> None: + use_only_test_class = False + test_class_name = "test_class" + test_method_name = "test_method" + actual = self.get_input_dir( + use_only_test_class=use_only_test_class, + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # + expected = "$GIT_ROOT/helpers/test/outcomes/test_class.test_method/input" + self.assertEqual(actual, expected) + + def test_get_input_dir3(self) -> None: + use_only_test_class = False + test_class_name = None + test_method_name = None + actual = self.get_input_dir( + use_only_test_class=use_only_test_class, + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # + expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_input_dir3/input" + self.assertEqual(actual, expected) + + def test_get_input_dir4(self) -> None: + use_only_test_class = True + test_class_name = None + test_method_name = None + actual = self.get_input_dir( + use_only_test_class=use_only_test_class, + test_class_name=test_class_name, + test_method_name=test_method_name, + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # + expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1/input" + self.assertEqual(actual, expected) + + def test_get_output_dir1(self) -> None: + """ + Test hunitest.get_output_dir(). + """ + actual = self.get_output_dir() + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_output_dir1/output" + self.assertEqual(actual, expected) + + def test_get_scratch_space1(self) -> None: + """ + Test hunitest.get_scratch_space(). + """ + actual = self.get_scratch_space() + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = ( + "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_scratch_space1" + "/tmp.scratch" + ) + self.assertEqual(actual, expected) + + def test_get_scratch_space2(self) -> None: + test_class_name = "test_class" + test_method_name = "test_method" + actual = self.get_scratch_space( + test_class_name=test_class_name, test_method_name=test_method_name + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = ( + "$GIT_ROOT/helpers/test/outcomes/test_class.test_method/tmp.scratch" + ) + self.assertEqual(actual, expected) + + def test_get_scratch_space3(self) -> None: + test_class_name = "test_class" + test_method_name = "test_method" + use_absolute_path = False + actual = self.get_scratch_space( + test_class_name=test_class_name, + test_method_name=test_method_name, + use_absolute_path=use_absolute_path, + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = "outcomes/test_class.test_method/tmp.scratch" + self.assertEqual(actual, expected) + + def test_get_s3_scratch_dir1(self) -> None: + actual = self.get_s3_scratch_dir() + _LOG.debug("actual=%s", actual) + # It is difficult to test, so we just execute. + + def test_get_s3_scratch_dir2(self) -> None: + test_class_name = "test_class" + test_method_name = "test_method" + actual = self.get_s3_scratch_dir( + test_class_name=test_class_name, test_method_name=test_method_name + ) + _LOG.debug("actual=%s", actual) + # It is difficult to test, so we just execute. + + def test_assert_equal1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_assert_not_equal1(self) -> None: + actual = "hello world" + expected = "hello world w" + tmp_dir = tempfile.mkdtemp() + with self.assertRaises(RuntimeError): + self.assert_equal(actual, expected, dst_dir=tmp_dir) + + def test_assert_not_equal2(self) -> None: + actual = "hello world" + expected = "hello world w" + # Create a dir like `/var/tmp/tmph_kun9xq`. + tmp_dir = tempfile.mkdtemp() + self.assert_equal( + actual, expected, abort_on_error=False, dst_dir=tmp_dir + ) + # Compute the signature from the dir. + actual = hunitest.get_dir_signature( + tmp_dir, include_file_content=True, num_lines=None + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + actual = actual.replace(tmp_dir, "$TMP_DIR") + # pylint: disable=line-too-long + expected = """ + # Dir structure + $TMP_DIR + $TMP_DIR/tmp_diff.sh + # File signatures + len(file_names)=1 + file_names=$TMP_DIR/tmp_diff.sh + # $TMP_DIR/tmp_diff.sh + num_lines=8 + ''' + #!/bin/bash + if [[ $1 == "wrap" ]]; then + cmd='vimdiff -c "windo set wrap"' + else + cmd='vimdiff' + fi; + cmd="$cmd helpers/test/outcomes/TestTestCase1.test_assert_not_equal2/tmp.final.actual.txt helpers/test/outcomes/TestTestCase1.test_assert_not_equal2/tmp.final.expected.txt" + eval $cmd + + ''' + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert_equal_fuzzy_match1(self) -> None: + actual = "hello world" + expected = "hello world " + is_equal = self.assert_equal(actual, expected, fuzzy_match=True) + self.assertTrue(is_equal) + + def test_assert_equal5(self) -> None: + actual = "hello world" + expected = "hello world2" + with self.assertRaises(RuntimeError): + self.assert_equal(actual, expected, fuzzy_match=True) + + def _remove_lines1(self) -> None: + txt = r""" + # ##################################################################### + * Failed assertion * + 'in1' not in '{'in1': 'out1'}' + ## + `in1` already receiving input from node n1 + # ##################################################################### + # ##################################################################### + """ + actual = hunitest._remove_spaces(txt) + expected = r""" + * Failed assertion * + 'in1' not in '{'in1': 'out1'}' + ## + `in1` already receiving input from node n1 + # ##################################################################### + """ + self.assert_equal(actual, expected, fuzzy_match=False) + + +# ############################################################################# +# Test_AssertEqual1 +# ############################################################################# + + +class Test_AssertEqual1(hunitest.TestCase): + def test_equal1(self) -> None: + """ + Matching actual and expected without fuzzy matching. + """ + actual = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + expected = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + test_name = self._get_test_name() + test_dir = self.get_scratch_space() + is_equal = hunitest.assert_equal(actual, expected, test_name, test_dir) + _LOG.debug(hprint.to_str("is_equal")) + self.assertTrue(is_equal) + + def test_equal2(self) -> None: + """ + Matching actual and expected with fuzzy matching. + """ + actual = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + expected = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + test_name = self._get_test_name() + test_dir = self.get_scratch_space() + fuzzy_match = True + is_equal = hunitest.assert_equal( + actual, expected, test_name, test_dir, fuzzy_match=fuzzy_match + ) + _LOG.debug(hprint.to_str("is_equal")) + self.assertTrue(is_equal) + + def test_not_equal1(self) -> None: + """ + Mismatching actual and expected. + """ + actual = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + expected = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +""" + test_name = self._get_test_name() + test_dir = self.get_scratch_space() + fuzzy_match = False + with self.assertRaises(RuntimeError) as cm: + hunitest.assert_equal( + actual, expected, test_name, test_dir, fuzzy_match=fuzzy_match + ) + # Check that the assertion is what expected. + actual = str(cm.exception) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + expected = ''' +-------------------------------------------------------------------------------- +ACTUAL vs EXPECTED: Test_AssertEqual1.test_not_equal1 +-------------------------------------------------------------------------------- + + ( +completed failure Lint Run_linter | completed failure Lint Run_linter +completed success Lint Fast_tests ( +completed success Lint Slow_tests ( +Diff with: +> ./tmp_diff.sh +-------------------------------------------------------------------------------- +ACTUAL VARIABLE: Test_AssertEqual1.test_not_equal1 +-------------------------------------------------------------------------------- +expected = r""" +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests +"""''' + if actual != expected: + hio.to_file("actual.txt", actual) + hio.to_file("expected.txt", expected) + self.assert_equal(actual, expected, fuzzy_match=False) + # We don't use self.assert_equal() since this is exactly we are testing, + # so we use a trusted function. + self.assertEqual(actual, expected) + + # For debugging: don't commit code with this test enabled. + @pytest.mark.skip( + reason="This is only used to debug the debugging the infrastructure" + ) + def test_not_equal_debug(self) -> None: + """ + Create a mismatch on purpose to see how the suggested updated to + expected variable looks like. + """ + actual = r"""empty +start + +completed failure Lint Run_linter +completed success Lint Fast_tests +completed success Lint Slow_tests + +end + +""" + expected = "hello" + self.assert_equal(actual, expected, fuzzy_match=False) + + +# ############################################################################# +# TestCheckString1 +# ############################################################################# + + +class TestCheckString1(hunitest.TestCase): + def test_check_string1(self) -> None: + """ + Compare the actual value to a matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + golden_outcome = "hello world" + # + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + # Overwrite the golden file, so that --update_golden doesn't matter. + hio.to_file(file_name, golden_outcome) + try: + # Check. + outcome_updated, file_exists, is_equal = self.check_string(actual) + # Actual match the golden outcome and it wasn't updated. + finally: + # Clean up. + hio.to_file(file_name, golden_outcome) + _git_add(file_name) + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertTrue(is_equal) + + def test_check_string_not_equal1(self) -> None: + """ + Compare the actual value to a mismatching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + golden_outcome = "hello world2" + # + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + # Modify the golden. + hio.to_file(file_name, golden_outcome) + try: + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False + ) + finally: + # Clean up. + hio.to_file(file_name, golden_outcome) + _git_add(file_name) + # Actual doesn't match the golden outcome. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + + def test_check_string_not_equal2(self) -> None: + """ + Compare the actual value to a mismatching golden outcome and udpate it. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + golden_outcome = "hello world2" + # Force updating the golden outcomes. + self.mock_update_tests() + # + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + # Modify the golden. + hio.to_file(file_name, golden_outcome) + try: + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False + ) + new_golden = hio.from_file(file_name) + _git_add(file_name) + finally: + # Clean up. + hio.to_file(file_name, golden_outcome) + _git_add(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertTrue(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + # The golden outcome was updated. + self.assertEqual(new_golden, "hello world") + + def test_check_string_not_equal3(self) -> None: + """ + Like test_check_string_not_equal1() but raising the exception. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + golden_outcome = "hello world2" + # + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + # Modify the golden. + hio.to_file(file_name, golden_outcome) + try: + # Check. + with self.assertRaises(RuntimeError): + self.check_string(actual) + finally: + # Clean up. + hio.to_file(file_name, golden_outcome) + _git_add(file_name) + + def test_check_string_missing1(self) -> None: + """ + When running with --update_outcomes, the golden outcome was missing and + so it was added. + + This tests the code path when action_on_missing_golden="update". + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + # Force updating the golden outcomes. + self.mock_update_tests() + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False + ) + hdbg.dassert_file_exists(file_name) + new_golden = hio.from_file(file_name) + finally: + # Clean up. + hio.delete_file(file_name) + _git_add(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertTrue(outcome_updated) + self.assertFalse(file_exists) + self.assertFalse(is_equal) + # + self.assertEqual(new_golden, "hello world") + + def test_check_string_missing2(self) -> None: + """ + Without running with --update_outcomes, the golden outcome was missing, + action_on_missing_golden="assert", and the unit test framework + asserted. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False, action_on_missing_golden="assert" + ) + hdbg.dassert_file_exists(file_name + ".tmp") + new_golden = hio.from_file(file_name + ".tmp") + finally: + # Clean up. + hio.delete_file(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertFalse(outcome_updated) + self.assertFalse(file_exists) + self.assertFalse(is_equal) + # + self.assertEqual(new_golden, "hello world") + + def test_check_string_missing3(self) -> None: + """ + Without running with --update_outcomes, the golden outcome was missing, + action_on_missing_golden="update", and the unit test framework updates + the golden. + """ + if _to_skip_on_update_outcomes(): + return + actual = "hello world" + tag = "test" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_string( + actual, abort_on_error=False, action_on_missing_golden="update" + ) + hdbg.dassert_file_exists(file_name) + new_golden = hio.from_file(file_name) + finally: + # Clean up. + hio.delete_file(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertTrue(outcome_updated) + self.assertFalse(file_exists) + self.assertFalse(is_equal) + # + self.assertEqual(new_golden, "hello world") + + +# ############################################################################# +# TestCheckDataFrame1 +# ############################################################################# + + +class TestCheckDataFrame1(hunitest.TestCase): + """ + Some of these tests can't pass with `--update_outcomes`, since they + exercise the logic in `--update_outcomes` itself. + """ + + def _check_df_helper( + self, actual: pd.DataFrame, abort_on_error: bool, err_threshold: float + ) -> Tuple[bool, bool, Optional[bool]]: + golden_outcomes = pd.DataFrame( + [[0, 1, 2], [3, 4, 5]], columns="a b c".split() + ) + # + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + # Overwrite the golden file, so that --update_golden doesn't matter. + hio.create_enclosing_dir(file_name, incremental=True) + golden_outcomes.to_csv(file_name) + try: + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, + abort_on_error=abort_on_error, + err_threshold=err_threshold, + ) + finally: + # Clean up. + golden_outcomes.to_csv(file_name) + _git_add(file_name) + return outcome_updated, file_exists, is_equal + + def test_check_df_equal1(self) -> None: + """ + Compare the actual value of a df to a matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = True + err_threshold = 0.0001 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome matches the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertTrue(is_equal) + + def test_check_df_equal2(self) -> None: + """ + Compare the actual value of a df to a matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1.01, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = True + err_threshold = 0.05 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome matches the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertTrue(is_equal) + + def test_check_df_equal3(self) -> None: + """ + Compare the actual value of a df to a matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1.05, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = True + err_threshold = 0.05 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome matches the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertTrue(is_equal) + + def test_check_df_not_equal1(self) -> None: + """ + Compare the actual value of a df to a non-matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1.06, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = False + err_threshold = 0.05 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome doesn't match the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + exp_error_msg = """ + actual= + a b c + 0 0 1.06 2 + 1 3 4.00 5 + expected= + a b c + 0 0 1 2 + 1 3 4 5 + actual_masked= + [[ nan 1.06 nan] + [ nan nan nan]] + expected_masked= + [[nan 1. nan] + [nan nan nan]] + err= + [[ nan 0.06 nan] + [ nan nan nan]] + max_err=0.060 + """ + self.assert_equal(self._error_msg, exp_error_msg, fuzzy_match=True) + + def test_check_df_not_equal2(self) -> None: + """ + Compare the actual value of a df to a not matching golden outcome. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a d c".split()) + abort_on_error = False + err_threshold = 0.05 + outcome_updated, file_exists, is_equal = self._check_df_helper( + actual, abort_on_error, err_threshold + ) + # Actual outcome doesn't match the golden outcome and it wasn't updated. + self.assertFalse(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + + def test_check_df_not_equal3(self) -> None: + """ + Compare the actual value to a mismatching golden outcome and update it. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + golden_outcome = pd.DataFrame( + [[0, 2, 2], [3, 4, 5]], columns="a b c".split() + ) + # Force updating the golden outcomes. + self.mock_update_tests() + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + # Modify the golden. + hio.create_enclosing_dir(file_name, incremental=True) + golden_outcome.to_csv(file_name) + try: + # Check. + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, abort_on_error=False + ) + # + new_golden = pd.read_csv(file_name, index_col=0) + finally: + # Clean up. + hio.to_file(file_name, str(golden_outcome)) + _git_add(file_name) + # Actual doesn't match the golden outcome and it was updated. + self.assertTrue(outcome_updated) + self.assertTrue(file_exists) + self.assertFalse(is_equal) + # Check golden. + self.assert_equal(str(new_golden), str(actual)) + + def test_check_df_not_equal4(self) -> None: + """ + Like `test_check_df_not_equal1()` but raising the exception. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1.06, 2], [3, 4, 5]], columns="a b c".split()) + abort_on_error = True + err_threshold = 0.05 + with self.assertRaises(RuntimeError): + self._check_df_helper(actual, abort_on_error, err_threshold) + + def test_check_df_missing1(self) -> None: + """ + When running with --update_outcomes, the golden outcome was missing and + so it was added. + + This tests the code path when action_on_missing_golden="update". + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + # Force updating the golden outcomes. + self.mock_update_tests() + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + _LOG.debug(hprint.to_str("file_name")) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, abort_on_error=False + ) + hdbg.dassert_file_exists(file_name) + new_golden = pd.read_csv(file_name, index_col=0) + finally: + # Clean up. + hio.delete_file(file_name) + _git_add(file_name) + # Expected outcome doesn't exists and it was updated. + self.assertTrue(outcome_updated) + self.assertFalse(file_exists) + self.assertFalse(is_equal) + # Check golden. + self.assert_equal(str(new_golden), str(actual)) + + def test_check_df_missing2(self) -> None: + """ + Without running with --update_outcomes, the golden outcome was missing, + action_on_missing_golden="assert", and the unit test framework + asserted. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, abort_on_error=False, action_on_missing_golden="assert" + ) + hdbg.dassert_file_exists(file_name + ".tmp") + new_golden = pd.read_csv(file_name + ".tmp", index_col=0) + hdbg.dassert_path_not_exists(file_name) + finally: + # Clean up. + hio.delete_file(file_name) + # Expected outcome doesn't exists and it was not updated. + self.assertFalse(outcome_updated) + self.assertFalse(file_exists) + self.assertIs(is_equal, None) + # Check golden. + self.assert_equal(str(new_golden), str(actual)) + + def test_check_df_missing3(self) -> None: + """ + Without running with --update_outcomes, the golden outcome was missing, + action_on_missing_golden="update", and the unit test framework updates + the golden. + """ + if _to_skip_on_update_outcomes(): + return + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + tag = "test_df" + _, file_name = self._get_golden_outcome_file_name(tag) + try: + # Remove the golden. + hio.delete_file(file_name) + # Check. + outcome_updated, file_exists, is_equal = self.check_dataframe( + actual, abort_on_error=False, action_on_missing_golden="update" + ) + hdbg.dassert_file_exists(file_name) + new_golden = pd.read_csv(file_name, index_col=0) + finally: + # Clean up. + hio.delete_file(file_name) + # Expected outcome doesn't exists and it was not updated. + self.assertTrue(outcome_updated) + self.assertFalse(file_exists) + self.assertIs(is_equal, None) + # Check golden. + self.assert_equal(str(new_golden), str(actual)) + + +# ############################################################################# +# Test_check_string_debug1 +# ############################################################################# + + +class Test_check_string_debug1(hunitest.TestCase): + def test1(self) -> None: + actual = "hello" + # action_on_missing_golden = "assert" + action_on_missing_golden = "update" + self.check_string( + actual, action_on_missing_golden=action_on_missing_golden + ) + + def test2(self) -> None: + actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) + # action_on_missing_golden = "assert" + action_on_missing_golden = "update" + self.check_dataframe( + actual, action_on_missing_golden=action_on_missing_golden + ) + + +# ############################################################################# +# Test_get_dir_signature1 +# ############################################################################# + + +class Test_get_dir_signature1(hunitest.TestCase): + def helper(self, include_file_content: bool) -> str: + in_dir = self.get_input_dir() + actual = hunitest.get_dir_signature( + in_dir, include_file_content, num_lines=None + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + return actual # type: ignore[no-any-return] + + def test1(self) -> None: + """ + Test dir signature excluding the file content. + """ + include_file_content = False + actual = self.helper(include_file_content) + # pylint: disable=line-too-long + expected = r""" + # Dir structure + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0 + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.txt + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/run_notebook.0.log + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1 + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.pkl + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.txt + $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/run_notebook.1.log + """ + # pylint: enable=line-too-long + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + Test dir signature including the file content. + """ + include_file_content = True + actual = self.helper(include_file_content) + # The golden outcome is long and uninteresting so we use check_string. + self.check_string(actual, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py new file mode 100644 index 000000000..14910d1f5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py @@ -0,0 +1,288 @@ +import logging +import unittest.mock as umock +from typing import Any + +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +def _check(self: Any, str_to_eval: str, exp_val: str) -> None: + """ + Evaluate `str_to_eval` and compare it to expected value `exp_val`. + """ + # The variable lives 3 levels in the stack trace from here. + act_val = hprint.to_str(str_to_eval, frame_level=3) + _LOG.debug("%s", act_val) + self.assert_equal(act_val, exp_val, purify_text=True) + + +# ############################################################################# +# _Class +# ############################################################################# + + +class _Class: + def __init__(self) -> None: + self.a = 3 + self.b = 14 + + def get_a(self) -> int: + return self.a + + def get_b(self) -> int: + return self.b + + +# ############################################################################# +# _TestCase +# ############################################################################# + + +class _TestCase(hunitest.TestCase): + def check(self, *args, **kwargs) -> None: + _check(self, *args, **kwargs) + + +# ############################################################################# +# Test_Mock1 +# ############################################################################# + + +# References +# - https://docs.python.org/3/library/unittest.mock.html +# - https://realpython.com/python-mock-library/ +# +# - Mocks are used to imitate objects in the code base and need to have the same +# interface of objects they are replacing +# - `Mock` and `MagicMock` objects +# - avoid to create stubs by creating attributes and methods as they are +# accessed +# - accessing the same attribute returns the same mock +# - can be configured to specify return values +# - store details of how they have been used +# - After execution, one can make assertions about how mocks have been used + +# umock.Mockspec +# :param spec: specification for the mock object, e.g., using a class to create +# the proper interface + + +# ############################################################################# +# Test_Mock1 +# ############################################################################# + + +class Test_Mock1(_TestCase): + """ + - A `Mock` creates attributes / methods as you access them + - The return value of a mocked attribute / method is also a `Mock` + """ + + def test_lazy_attributes1(self) -> None: + """ + Assigning a class attribute on a Mock creates a Mock. + """ + obj = umock.Mock() + # obj is a Mock object. + self.check("obj", "obj=") + # Calling an attribute creates a Mock. + self.check("obj.a", "obj.a=") + # Assigning an attribute in the mock creates an attribute. + obj.a = 3 + self.check("obj.a", "obj.a=3") + + def test_lazy_methods1(self) -> None: + """ + Calling a method on a Mock creates a Mock. + """ + # Mock json module `import json`. + json = umock.Mock() + self.check("json", "json=") + # Create a function on the fly that returns a mock. + v = json.dumps() + self.assertTrue(isinstance(v, umock.Mock)) + self.check("json.dumps", "json.dumps=") + # The mocked function and the returned value from a mock function are + # different mocks. + self.check("v", "v=") + self.check("type(v)", "type(v)=") + self.check( + "json.dumps()", "json.dumps()=" + ) + self.assertTrue(isinstance(json.dumps, umock.Mock)) + self.assertNotEqual(id(v), id(json.dumps)) + + def test_assert1(self) -> None: + """ + Check what function was called. + """ + json = umock.Mock() + json.loads("hello") + # Check that the mocked function was called as expected. + json.loads.assert_called() + json.loads.assert_called_once() + json.loads.assert_called_with("hello") + self.assertEqual(json.loads.call_count, 1) + + def test_str1(self) -> None: + mock = umock.Mock() + # Calling `str()` on a mock creates a mock on the fly. + self.check("str(mock)", "str(mock)=\"\"") + # Assign a mocked function returning "hello" to mock.__str__. + mock.__str__ = umock.Mock(return_value="hello") + self.assertEqual(str(mock), "hello") + # One can't assign the return value, like one would do with a MagicMock. + # mock.__str__.return_value = "hello" + + def test_spec1(self) -> None: + # Create a Mock based on the class `_Class`. + mock = umock.Mock(spec=_Class) + # + self.assertTrue(isinstance(mock, _Class)) + mock.get_a = umock.Mock(return_value=3) + self.assertEqual(mock.get_a(), 3) + + +# ############################################################################# +# Test_MagicMock1 +# ############################################################################# + + +class Test_MagicMock1(_TestCase): + """ + A `MagicMock` is a subclass of `Mock` with some magic methods already + created. + """ + + def test_get1(self) -> None: + """ + Assign a MagicMock using array notation. + """ + mock = umock.MagicMock() + # MagicMock automatically infer `__get_item__()`. + mock[3] = "fish" + # Check. + mock.__setitem__.assert_called_with(3, "fish") + + def test_get2(self) -> None: + mock = umock.MagicMock() + mock.__getitem__.return_value = "result" + + def test_str1(self) -> None: + """ + Mock `str()` method. + """ + mock = umock.MagicMock() + # Mock `str()`. + mock.__str__.return_value = "foobar" + # Check. + self.assertEqual(str(mock), "foobar") + mock.__str__.assert_called_with() + + +# ############################################################################# +# Test_Mock_Class1 +# ############################################################################# + + +class Test_Mock_Class1(_TestCase): + def test_without_mock1(self) -> None: + obj = _Class() + self.assertEqual(obj.get_a(), 3) + self.assertEqual(obj.get_b(), 14) + + def test_with_mock1(self) -> None: + obj = _Class() + # Mock method `get_a()`. + obj.get_a = umock.MagicMock(return_value=4) + # Check. + self.assertEqual(obj.get_a(), 4) + obj.get_a.assert_called() + + def test_with_mock2(self) -> None: + obj = _Class() + # Mock method `get_a()`. + obj.get_a = umock.MagicMock(side_effect=KeyError("foo")) + # Check. + with self.assertRaises(KeyError) as cm: + obj.get_a() + # + actual = str(cm.exception) + expected = "'foo'" + self.assert_equal(actual, expected) + obj.get_a.assert_called() + + +# ############################################################################# +# Test_Mock_Class_with_decorator1 +# ############################################################################# + +# `umock.patch()` +# - replaces classes in a particular module with a Mock object +# - by default creates a MagicMock + +# `umock.patch.object(target, attribute)` patches the named member "attribute" +# on the object "target" with a mock object. + + +# ############################################################################# +# Test_Mock_Class_with_decorator1 +# ############################################################################# + + +class Test_Mock_Class_with_decorator1(_TestCase): + @umock.patch.object(_Class, "get_a", return_value=4) + def test1(self, mock_method: umock.MagicMock) -> None: + """ + Patch method of an object using a decorator. + """ + obj = _Class() + # Check. + # self.assertIs(mock_method, umock.MagicMock) + self.check( + "mock_method", "mock_method=" + ) + self.assertEqual(obj.get_a(), 4) + mock_method.assert_called() + obj.get_a.assert_called() + + +# ############################################################################# +# Test_Mock_Class_with_context_manager1 +# ############################################################################# + + +class Test_Mock_Class_with_context_manager1(_TestCase): + def test1(self) -> None: + """ + Patch an object method using a context manager. + """ + # Inside the context manager, the method is mocked. + with umock.patch.object(_Class, "get_a", return_value=4): + obj = _Class() + # Check. + self.check( + "obj.get_a", "obj.get_a=" + ) + self.assertEqual(obj.get_a(), 4) + obj.get_a.assert_called() + # Outside the context manager everything is normal. + obj = _Class() + # Check. + self.check( + "obj.get_a", + "obj.get_a=>", + ) + self.assertEqual(obj.get_a(), 3) + + def test_dict1(self) -> None: + """ + Patch a dictionary. + """ + foo = {"key": "value"} + with umock.patch.dict(foo, {"key": "new_value"}, clear=True): + self.assertEqual(foo["key"], "new_value") + # Outside the context manager everything is normal. + self.assertEqual(foo["key"], "value") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py new file mode 100644 index 000000000..6488621a1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py @@ -0,0 +1,1065 @@ +""" +Import as: + +import helpers.test.test_hunit_test_purification as thuntepur +""" + +import datetime +import logging +import os +import unittest.mock as umock +from typing import Any, List + +import pytest + +import helpers.hgit as hgit +import helpers.hprint as hprint +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_purify_text1 +# ############################################################################# + + +class Test_purify_text1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str, **kwargs: Any) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(txt) + self.assert_equal(actual, expected, **kwargs) + + def test1(self) -> None: + txt = "amp/helpers/test/test_system_interaction.py" + expected = "helpers/test/test_system_interaction.py" + self.check_helper(txt, expected) + + def test2(self) -> None: + txt = "amp/helpers/test/test_system_interaction.py" + expected = "helpers/test/test_system_interaction.py" + self.check_helper(txt, expected) + + def test3(self) -> None: + txt = "['amp/helpers/test/test_system_interaction.py']" + expected = "['helpers/test/test_system_interaction.py']" + self.check_helper(txt, expected) + + def test4(self) -> None: + txt = "app.helpers.test.test_system_interaction.py" + expected = "helpers.test.test_system_interaction.py" + self.check_helper(txt, expected) + + def test5(self) -> None: + """ + Test that longer paths are processed before shorter ones. + """ + txt = "/home/user/project/src/file.py" + with ( + umock.patch("helpers.hgit.get_client_root") as mock_git_root, + umock.patch("os.getcwd") as mock_pwd, + ): + mock_git_root.return_value = "/home/user/project" + mock_pwd.return_value = "/home/user" + expected = "$GIT_ROOT/src/file.py" + self.check_helper(txt, expected) + + def test6(self) -> None: + """ + Test that paths with multiple occurrences of the same pattern are + processed correctly. + """ + txt = "/home/user/project/src/project/file.py" + with ( + umock.patch("helpers.hgit.get_client_root") as mock_git_root, + umock.patch("os.getcwd") as mock_pwd, + ): + mock_git_root.return_value = "/home/user/project" + mock_pwd.return_value = "/home/user" + expected = "$GIT_ROOT/src/project/file.py" + self.check_helper(txt, expected) + + def test7(self) -> None: + """ + Test that paths with multiple patterns are processed in the correct + order. + """ + txt = "/home/user/project/src/project/file.py" + with ( + umock.patch("helpers.hgit.get_client_root") as mock_git_root, + umock.patch("os.getcwd") as mock_pwd, + ): + mock_git_root.return_value = "/home/user/project" + mock_pwd.return_value = "/home/user/project/src" + expected = "$GIT_ROOT/src/project/file.py" + self.check_helper(txt, expected) + + def test8(self) -> None: + """ + Test that paths with no matching patterns are left unchanged. + """ + txt = "/home/user/other/file.py" + with ( + umock.patch("helpers.hgit.get_client_root") as mock_git_root, + umock.patch("os.getcwd") as mock_pwd, + ): + mock_git_root.return_value = "/home/user/project" + mock_pwd.return_value = "/home/user/project/src" + expected = "/home/user/other/file.py" + self.check_helper(txt, expected) + + def test9(self) -> None: + super_module_path = hgit.get_client_root(super_module=True) + # TODO(gp): We should remove the current path. + # pylint: disable=line-too-long + txt = r""" + ************* Module input [pylint] + $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint] + $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8] + $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8] + cmd line='$SUPER_MODULE/dev_scripts/linter.py -f $SUPER_MODULE/amp/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log' + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint] + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint] + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy] + """ + txt = hprint.dedent(txt) + txt = txt.replace("$SUPER_MODULE", super_module_path) + expected = r""" + ************* Module input [pylint] + $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint] + $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8] + $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8] + cmd line='$GIT_ROOT/dev_scripts/linter.py -f $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log' + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint] + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint] + dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy] + """ + # pylint: enable=line-too-long + self.check_helper(txt, expected, dedent=True) + + def test10(self) -> None: + """ + Test case when client root path is equal to `/` + """ + # pylint: disable=redefined-outer-name + hgit = umock.Mock() + hgit.get_client_root.return_value = "/" + txt = "/tmp/subdir1" + expected = txt + self.check_helper(txt, expected) + + def test11(self) -> None: + """ + Test the correct order of `app` -> `amp` purification with multiple + import statements. + """ + txt = """ + import app.amp.helpers_root.helpers.test.test_file + from app.amp.helpers_root.helpers.hprint import dedent + import app.amp.helpers.config + from amp.app.helpers.config import get_config + import amp.app.helpers_root.config + """ + expected = """ + import helpers.test.test_file + from helpers.hprint import dedent + import helpers.config + from helpers.config import get_config + import helpers.config + """ + self.check_helper(txt, expected) + + def test12(self) -> None: + """ + Test amp and app purification in file path strings. + """ + txt = """ + app/amp/helpers_root/helpers/test/test_file.py + amp/app/helpers_root/helpers/test/test_file.py + """ + expected = """ + helpers/test/test_file.py + helpers/test/test_file.py + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_directory_paths1 +# ############################################################################# + + +class Test_purify_directory_paths1(hunitest.TestCase): + def check_helper(self, input_: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_directory_paths(input_) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test1(self) -> None: + """ + Test the replacement of `GIT_ROOT`. + """ + with ( + umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ), + umock.patch.dict( + "os.environ", + {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, + clear=True, + ), + umock.patch("os.getcwd", return_value="/home/user"), + ): + input_ = "/home/user/gitroot/src/subdir/file.py" + expected = "$GIT_ROOT/src/subdir/file.py" + self.check_helper(input_, expected) + + def test2(self) -> None: + """ + Test the replacement of `CSFY_HOST_GIT_ROOT_PATH`. + """ + with ( + umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ), + umock.patch.dict( + "os.environ", + {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, + clear=True, + ), + umock.patch("os.getcwd", return_value="/home/user"), + ): + input_ = "/home/user/csfy_host_git_root/other/file.py" + expected = "$CSFY_HOST_GIT_ROOT_PATH/other/file.py" + self.check_helper(input_, expected) + + def test3(self) -> None: + """ + Test the replacement of `PWD`. + """ + with ( + umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ), + umock.patch.dict( + "os.environ", + {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, + clear=True, + ), + umock.patch("os.getcwd", return_value="/home/user"), + ): + input_ = "/home/user/documents/file.py" + expected = "$PWD/documents/file.py" + self.check_helper(input_, expected) + + def test4(self) -> None: + """ + Test the replacement when `GIT_ROOT`, `CSFY_HOST_GIT_ROOT_PATH` and + current working directory are the same. + """ + with ( + umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user" + ), + umock.patch.dict( + "os.environ", + {"CSFY_HOST_GIT_ROOT_PATH": "/home/user"}, + clear=True, + ), + umock.patch("os.getcwd", return_value="/home/user"), + ): + input_ = "/home/user/file.py" + expected = "$GIT_ROOT/file.py" + self.check_helper(input_, expected) + + +# ############################################################################# +# Test_purify_from_environment1 +# ############################################################################# + + +class Test_purify_from_environment1(hunitest.TestCase): + def check_helper(self, input_: str, expected: str) -> None: + try: + # Manually set a user name to test the behaviour. + hsystem.set_user_name("root") + # Run. + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_from_environment(input_) + self.assert_equal(actual, expected, fuzzy_match=True) + finally: + # Reset the global user name variable regardless of a test results. + hsystem.set_user_name(None) + + def test1(self) -> None: + input_ = "IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-root-1.0.0" + expected = "IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0" + self.check_helper(input_, expected) + + def test2(self) -> None: + input_ = "--name root.amp_test.app.app" + expected = "--name $USER_NAME.amp_test.app.app" + self.check_helper(input_, expected) + + def test3(self) -> None: + input_ = "run --rm -l user=root" + expected = "run --rm -l user=$USER_NAME" + self.check_helper(input_, expected) + + def test4(self) -> None: + input_ = "run_docker_as_root='True'" + expected = "run_docker_as_root='True'" + self.check_helper(input_, expected) + + def test5(self) -> None: + input_ = "out_col_groups: [('root_q_mv',), ('root_q_mv_adj',), ('root_q_mv_os',)]" + expected = "out_col_groups: [('root_q_mv',), ('root_q_mv_adj',), ('root_q_mv_os',)]" + self.check_helper(input_, expected) + + +# ############################################################################# +# Test_purify_amp_reference1 +# ############################################################################# + + +class Test_purify_amp_reference1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + txt = hprint.dedent(txt) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_amp_references(txt) + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Remove the reference to `amp.`. + """ + txt = """ + * Failed assertion * + Instance '' + of class '_Man' is not a subclass of '' + """ + expected = r""" + * Failed assertion * + Instance '' + of class '_Man' is not a subclass of '' + """ + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test removing multiple amp references in a single string. + """ + txt = """ + ImportError: No module named 'amp.helpers.test.test_file' + """ + expected = r""" + ImportError: No module named 'helpers.test.test_file' + """ + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test removing amp references in file paths. + """ + txt = """ + File "/home/user/amp/helpers/test/test_dbg.py", line 10 + File "/home/user/amp/helpers/test/test_file.py", line 20 + """ + expected = r""" + File "/home/user/helpers/test/test_dbg.py", line 10 + File "/home/user/helpers/test/test_file.py", line 20 + """ + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test removing amp references in import statements. + """ + txt = """ + from amp.helpers.test import test_dbg + import amp.helpers.test.test_file + from amp.helpers.test.test_dbg import _Man + """ + expected = r""" + from helpers.test import test_dbg + import helpers.test.test_file + from helpers.test.test_dbg import _Man + """ + self.check_helper(txt, expected) + + def test5(self) -> None: + """ + Test removing amp references in docstrings and comments. + """ + txt = """ + # This is a test for amp.helpers.test.test_dbg + """ + expected = r""" + # This is a test for helpers.test.test_dbg + """ + self.check_helper(txt, expected) + + def test6(self) -> None: + """ + Test removing amp references in error messages with multiple + occurrences. + """ + txt = """ + Error in amp.helpers.test.test_dbg: Invalid input + Error in amp.helpers.test.test_file: File not found + Error in amp.helpers.test.test_dbg: Permission denied + """ + expected = r""" + Error in helpers.test.test_dbg: Invalid input + Error in helpers.test.test_file: File not found + Error in helpers.test.test_dbg: Permission denied + """ + self.check_helper(txt, expected) + + def test7(self) -> None: + """ + Test that longer amp paths are processed before shorter ones. + """ + txt = "amp/helpers/amp/test/test_file.py" + expected = "helpers/test/test_file.py" + self.check_helper(txt, expected) + + def test8(self) -> None: + """ + Test that nested amp references are processed correctly. + """ + txt = "amp.helpers.test.amp.TestClass" + expected = "helpers.test.amp.TestClass" + self.check_helper(txt, expected) + + def test9(self) -> None: + """ + Test removing amp references from test creation comments with various + module paths. + """ + txt = """ + # Test created for amp.helpers.test.test_file + # Test created for amp.core.dataflow.model + # Test created for amp.helpers.test.test_dbg._Man + """ + expected = r""" + # Test created for helpers.test.test_file + # Test created for core.dataflow.model + # Test created for helpers.test.test_dbg._Man + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_app_references1 +# ############################################################################# + + +class Test_purify_app_references1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_app_references(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test app.helpers reference removal. + """ + txt = "app.helpers.test.test_file" + expected = "helpers.test.test_file" + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test app.amp.helpers reference removal. + """ + txt = "app.amp.helpers.test.test_file" + expected = "amp.helpers.test.test_file" + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test app.amp.helpers_root.helpers reference removal. + """ + txt = "app.amp.helpers_root.helpers.test.test_file" + expected = "amp.helpers.test.test_file" + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test multiple app references in the same string. + """ + txt = """ + app.helpers.test.test_file + app.amp.helpers.test.test_file + app.amp.helpers_root.helpers.test.test_file + """ + expected = """ + helpers.test.test_file + amp.helpers.test.test_file + amp.helpers.test.test_file + """ + self.check_helper(txt, expected) + + def test5(self) -> None: + """ + Test that longer app paths are processed before shorter ones. + """ + txt = "app/helpers/app/test/test_file.py" + expected = "helpers/test/test_file.py" + self.check_helper(txt, expected) + + def test6(self) -> None: + """ + Test that app.amp.helpers_root references are processed before app.amp. + """ + txt = "app.amp.helpers_root.helpers.test.TestClass" + expected = "amp.helpers.test.TestClass" + self.check_helper(txt, expected) + + def test7(self) -> None: + """ + Test string with no app references. + """ + txt = "path/to/file.txt" + expected = "path/to/file.txt" + self.check_helper(txt, expected) + + def test8(self) -> None: + """ + Test removing app references from test creation comments with various + module paths. + """ + txt = """ + # Test created for app.helpers.test.test_file + # Test created for app.core.dataflow.model + # Test created for app.helpers.test.test_dbg._Man + """ + expected = r""" + # Test created for helpers.test.test_file + # Test created for core.dataflow.model + # Test created for helpers.test.test_dbg._Man + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_from_env_vars +# ############################################################################# + + +# TODO(ShaopengZ): numerical issue. (arm vs x86) +@pytest.mark.requires_ck_infra +class Test_purify_from_env_vars(hunitest.TestCase): + """ + Test purification from env vars. + """ + + def check_helper(self, env_var: str) -> None: + env_var_value = os.environ[env_var] + input_ = f"s3://{env_var_value}/" + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_from_env_vars(input_) + expected = f"s3://${env_var}/" + self.assert_equal(actual, expected, fuzzy_match=True) + + @pytest.mark.skipif( + not hrecouti.get_repo_config().get_name() == "//cmamp", + reason="Run only in //cmamp", + ) + def test1(self) -> None: + """ + - $CSFY_AWS_S3_BUCKET + """ + env_var = "CSFY_AWS_S3_BUCKET" + self.check_helper(env_var) + + +# TODO(gp): HelpersTask1 +# @pytest.mark.skipif( +# not hrecouti.get_repo_config().get_name() == "//cmamp", +# reason="Run only in //cmamp", +# ) +# def test_end_to_end(self) -> None: +# """ +# - Multiple env vars. +# """ +# #am_aws_s3_bucket = os.environ["AM_AWS_S3_BUCKET"] +# csfy_aws_s3_bucket = os.environ["CSFY_AWS_S3_BUCKET"] +# # +# text = f""" +# $AM_AWS_S3_BUCKET = {am_aws_s3_bucket} +# $CSFY_AWS_S3_BUCKET = {csfy_aws_s3_bucket} +# """ +# # +# text_purifier = huntepur.TextPurifier() +# actual = text_purifier.purify_from_env_vars(text) +# self.check_string(actual, fuzzy_match=True) + + +# ############################################################################# +# Test_purify_object_representation1 +# ############################################################################# + + +class Test_purify_object_representation1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + txt = hprint.dedent(txt) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_object_representation(txt) + expected = hprint.dedent(expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + txt = """ + load_prices: {'source_node_name': 'RealTimeDataSource object + at 0x7f571c329b50 + """ + expected = r""" + load_prices: {'source_node_name': 'RealTimeDataSource object + at 0x""" + self.check_helper(txt, expected) + + def test2(self) -> None: + txt = """ + load_prices: {'source_node_name at 0x7f571c329b51': + 'RealTimeDataSource object at 0x7f571c329b50 + """ + expected = r""" + load_prices: {'source_node_name at 0x': + 'RealTimeDataSource object at 0x""" + self.check_helper(txt, expected) + + def test3(self) -> None: + txt = """ + load_prices: {'source_node_name': 'RealTimeDataSource', + 'source_node_kwargs': {'market_data': + , 'period': 'last_5mins', 'asset_id_col': 'asset_id', + 'multiindex_output': True}} process_forecasts: {'prediction_col': 'close', + 'execution_mode': 'real_time', 'process_forecasts_config': + {'market_data': + ,'portfolio ': , 'order_type': 'price@twap', 'ath_start_time': + datetime.time(9, 30), 'trading_start_time': datetime.time(9, 30), + 'ath_end_time': datetime.time(16, 40), 'trading_end_time': + datetime.time(16, 4 0)}} + """ + expected = r""" + load_prices: {'source_node_name': 'RealTimeDataSource', + 'source_node_kwargs': {'market_data': + , 'period': 'last_5mins', 'asset_id_col': 'asset_id', + 'multiindex_output': True}} process_forecasts: {'prediction_col': 'close', + 'execution_mode': 'real_time', 'process_forecasts_config': + {'market_data': + ,'portfolio ': , 'order_type': 'price@twap', 'ath_start_time': + datetime.time(9, 30), 'trading_start_time': datetime.time(9, 30), + 'ath_end_time': datetime.time(16, 40), 'trading_end_time': + datetime.time(16, 4 0)}}""" + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test replacing wall_clock_time=Timestamp('..., tz='America/New_York')) + """ + txt = """ + _knowledge_datetime_col_name='timestamp_db' _delay_in_secs='0' + >, 'bar_duration_in_secs': 300, 'rt_timeout_in_secs_or_time': 900} , + _dst_dir=None , _fit_at_beginning=False , + _wake_up_timestamp=None , _bar_duration_in_secs=300 , + _events=[Event(num_it=1, current_time=Timestamp('2000-01-01 + 10:05:00-0500', tz='America/New_York'), + wall_clock_time=Timestamp('2022-08-04 09:29:13.441715-0400', + tz='America/New_York')), Event(num_it=2, + current_time=Timestamp('2000-01-01 10:10:00-0500', + tz='America/New_York'), wall_clock_time=Timestamp('2022-08-04 + 09:29:13.892793-0400', tz='America/New_York')), Event(num_it=3, + current_time=Timestamp('2000-01-01 10:15:00-0500', + tz='America/New_York'), wall_clock_time=Timestamp('2022-08-04 + 09:29:14.131619-0400', tz='America/New_York'))] ) + """ + expected = """ + _knowledge_datetime_col_name='timestamp_db' _delay_in_secs='0' + >, 'bar_duration_in_secs': 300, 'rt_timeout_in_secs_or_time': 900} , + _dst_dir=None , _fit_at_beginning=False , + _wake_up_timestamp=None , _bar_duration_in_secs=300 , + _events=[Event(num_it=1, current_time=Timestamp('2000-01-01 + 10:05:00-0500', tz='America/New_York'), + wall_clock_time=Timestamp('xxx', tz='America/New_York')), + Event(num_it=2, current_time=Timestamp('2000-01-01 10:10:00-0500', + tz='America/New_York'), wall_clock_time=Timestamp('xxx', + tz='America/New_York')), Event(num_it=3, + current_time=Timestamp('2000-01-01 10:15:00-0500', + tz='America/New_York'), wall_clock_time=Timestamp('xxx', + tz='America/New_York'))] ) + """ + txt = " ".join(hprint.dedent(txt).split("\n")) + expected = " ".join(hprint.dedent(expected).split("\n")) + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_today_date1 +# ############################################################################# + + +class Test_purify_today_date1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_today_date(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test replacing today's date and time with placeholders. + """ + today = datetime.date.today() + today_str = today.strftime("%Y%m%d") + txt = f""" + Report generated on {today_str}_103045. + Next run scheduled at {today_str}_235959. + """ + expected = """ + Report generated on YYYYMMDD_HHMMSS. + Next run scheduled at YYYYMMDD_HHMMSS. + """ + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test replacing today's date only with placeholder. + """ + today = datetime.date.today() + today_str = today.strftime("%Y%m%d") + txt = f""" + Backup completed: {today_str}. + Last modified: {today_str}. + """ + expected = """ + Backup completed: YYYYMMDD. + Last modified: YYYYMMDD. + """ + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test to check that non-date-like numbers are not replaced. + """ + txt = """ + ID: 20000319_123456 + Code: 20000321 + Reference: 20000320_999999 + """ + expected = """ + ID: 20000319_123456 + Code: 20000321 + Reference: 20000320_999999 + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_white_spaces1 +# ############################################################################# + + +class Test_purify_white_spaces1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_white_spaces(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test removing trailing spaces and tabs. + """ + txt = "Line 1 \nLine 2\t\nLine 3 \t \n" + expected = "Line 1\nLine 2\nLine 3\n" + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test removing trailing spaces and preserving empty lines. + """ + txt = "Line 1\n\n\nLine 2\n\n\n\nLine 3 " + expected = "Line 1\n\n\nLine 2\n\n\n\nLine 3" + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test removing trailing whitespace and preserving leading whitespace. + """ + txt = " \n Line 1\nLine 2\n Line 3 \n " + expected = " \n Line 1\nLine 2\n Line 3\n" + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test preserving intentional whitespace within lines. + """ + txt = "Line 1 with spaces\nLine 2\twith\ttabs" + expected = "Line 1 with spaces\nLine 2\twith\ttabs\n" + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_parquet_file_names1 +# ############################################################################# + + +class Test_purify_parquet_file_names1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_parquet_file_names(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test purification of Parquet file names with the path. + + The Parquet file names with the + GUID have to be replaced with the `data.parquet` string. + """ + txt = """ + s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=1/ea5e3faed73941a2901a2128abeac4ca-0.parquet + s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=2/f7a39fefb69b40e0987cec39569df8ed-0.parquet + """ + expected = """ + s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=1/data.parquet + s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=2/data.parquet + """ + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test purification of Parquet file name without the path. + """ + txt = """ + ffa39fffb69b40e0987cec39569df8ed-0.parquet + """ + expected = """ + data.parquet + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_helpers1 +# ############################################################################# + + +class Test_purify_helpers1(hunitest.TestCase): + def check_helper(self, txt: str, expected: str) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_helpers(txt) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test replacing helpers references in import statements. + """ + txt = """ + import helpers_root.helpers.hdbg as hdbg + from helpers_root.helpers.hprint import dedent + import helpers_root.config_root.config as config + """ + expected = """ + import helpers.hdbg as hdbg + from helpers.hprint import dedent + import config_root.config as config + """ + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test replacing helpers references in file paths. + """ + txt = """ + /path/to/helpers/hdbg.py + /path/to/helpers/hprint.py + /path/to/config_root/config.py + """ + expected = """ + /path/to/helpers/hdbg.py + /path/to/helpers/hprint.py + /path/to/config_root/config.py + """ + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test replacing helpers references in docstrings and comments. + """ + txt = """ + import helpers_root.helpers.hdbg + from /path/to/helpers_root/helpers/hprint import dedent + import helpers_root.config_root.config + from /path/to/helpers_root/config_root/config import settings + """ + expected = """ + import helpers.hdbg + from /path/to/helpers/hprint import dedent + import config_root.config + from /path/to/config_root/config import settings + """ + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test that non-matching patterns are not replaced. + """ + txt = """ + import other_module + from other_package import helpers + import helpers_utils + path/to/other/helpers/file.py + """ + expected = """ + import other_module + from other_package import helpers + import helpers_utils + path/to/other/helpers/file.py + """ + self.check_helper(txt, expected) + + +# ############################################################################# +# Test_purify_docker_image_name1 +# ############################################################################# + + +class Test_purify_docker_image_name1(hunitest.TestCase): + def test1(self) -> None: + txt = r""" + docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.edb567be pdflatex -output-directory + """ + expected = r""" + docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.xxxxxxxx pdflatex -output-directory + """ + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_docker_image_name(txt) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test2(self) -> None: + """ + Test patterns like `tmp.latex.aarch64.2f590c86.2f590c86`. + """ + txt = r""" + docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.2f590c86.2f590c86 pdflatex -output-directory + """ + expected = r""" + docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.xxxxxxxx pdflatex -output-directory + """ + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_docker_image_name(txt) + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_purify_line_number1 +# ############################################################################# + + +class Test_purify_line_number1(hunitest.TestCase): + def test1(self) -> None: + """ + Check that the text is purified from line numbers correctly. + """ + txt = """ + dag_config (marked_as_used=False, writer=None, val_type=config_root.config.config_.Config): + in_col_groups (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::286::apply_history_lookback, val_type=list): [('close',), ('volume',)] + out_col_group (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::286::apply_history_lookback, val_type=tuple): () + """ + expected = r""" + dag_config (marked_as_used=False, writer=None, val_type=config_root.config.config_.Config): + in_col_groups (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::$LINE_NUMBER::apply_history_lookback, val_type=list): [('close',), ('volume',)] + out_col_group (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::$LINE_NUMBER::apply_history_lookback, val_type=tuple): () + """ + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_line_number(txt) + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_purify_file_names1 +# ############################################################################# + + +class Test_purify_file_names1(hunitest.TestCase): + def check_helper(self, file_names: List[str], expected: List[str]) -> None: + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(file_names) + actual = "\n".join(str(path) for path in actual) + expected = "\n".join(str(path) for path in expected) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test basic file name purification with relative paths. + """ + with umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ): + txt = [ + "/home/user/gitroot/helpers/test/test_file.py", + "/home/user/gitroot/amp/helpers/test/test_dbg.py", + ] + expected = [ + "helpers/test/test_file.py", + "helpers/test/test_dbg.py", + ] + self.check_helper(txt, expected) + + def test2(self) -> None: + """ + Test file name purification with nested amp references. + """ + with umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ): + txt = [ + "/home/user/gitroot/amp/helpers/amp/test/test_file.py", + "/home/user/gitroot/amp/helpers/test/amp/test_dbg.py", + ] + expected = [ + "helpers/test/test_file.py", + "helpers/test/test_dbg.py", + ] + self.check_helper(txt, expected) + + def test3(self) -> None: + """ + Test file name purification with app references to ensure that they are + not replaced. + """ + with umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ): + txt = [ + "/home/user/gitroot/app/helpers/test/test_file.py", + "/home/user/gitroot/app/amp/helpers/test/test_dbg.py", + ] + expected = [ + "app/helpers/test/test_file.py", + "app/helpers/test/test_dbg.py", + ] + self.check_helper(txt, expected) + + def test4(self) -> None: + """ + Test file name purification with empty list. + """ + with umock.patch( + "helpers.hgit.get_client_root", return_value="/home/user/gitroot" + ): + txt = [] + expected = [] + self.check_helper(txt, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py new file mode 100644 index 000000000..35421d368 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py @@ -0,0 +1,553 @@ +import os + +import helpers.hio as hio +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.hunit_test_utils as hunteuti + + +# ############################################################################# +# TestUnitTestRenamer +# ############################################################################# + + +class TestUnitTestRenamer(hunitest.TestCase): + """ + Test class renaming functionality. + """ + + +# ############################################################################# +# TestCases +# ############################################################################# + + + @staticmethod + def helper() -> str: + """ + Create file content. + """ + content = """ +class TestCases(hunitest.TestCase): + def test_assert_equal1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + self.check_string(actual) + """ + return content + + +# ############################################################################# +# TestNewCase +# ############################################################################# + + + def test_rename_class1(self) -> None: + """ + Test renaming of existing class. + """ + content = self.helper() + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer("TestCases", "TestNewCase", root_dir) + actual, _ = renamer._rename_class(content) + expected = """ +class TestNewCase(hunitest.TestCase): + def test_assert_equal1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test_check_string1(self) -> None: + actual = "hello world" + self.check_string(actual) + """ + self.assert_equal(actual, expected) + + def test_rename_class2(self) -> None: + """ + Test renaming of non existing class. + """ + content = self.helper() + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer("TestCase", "TestNewCase", root_dir) + actual, _ = renamer._rename_class(content) + # Check if the content of the file was not changed. + self.assert_equal(actual, content) + + +# ############################################################################# +# TestPytestRenameMethod +# ############################################################################# + + +class TestPytestRenameMethod(hunitest.TestCase): + """ + Test method renaming functionality. + """ + + +# ############################################################################# +# TestCases +# ############################################################################# + + + @staticmethod + def helper() -> str: + """ + Create file content. + """ + content = """ +class TestCases(hunitest.TestCase): + def test1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test10(self) -> None: + actual = "hello world" + self.check_string(actual) + + +# ############################################################################# +# TestOtherCases +# ############################################################################# + + +class TestOtherCases(hunitest.TestCase): + def test1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test10(self) -> None: + actual = "hello world" + self.check_string(actual) + """ + return content + + +# ############################################################################# +# TestCases +# ############################################################################# + + + def test_rename_method1(self) -> None: + """ + Test renaming of existing method. + """ + content = self.helper() + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer( + "TestCases.test1", "TestCases.test_new", root_dir + ) + actual, _ = renamer._rename_method(content) + expected = """ +class TestCases(hunitest.TestCase): + def test_new(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test10(self) -> None: + actual = "hello world" + self.check_string(actual) + + +# ############################################################################# +# TestOtherCases +# ############################################################################# + + +class TestOtherCases(hunitest.TestCase): + def test1(self) -> None: + actual = "hello world" + expected = actual + self.assert_equal(actual, expected) + + def test10(self) -> None: + actual = "hello world" + self.check_string(actual) + """ + self.assert_equal(actual, expected) + + def test_rename_method2(self) -> None: + """ + Test renaming of non existing method. + """ + content = self.helper() + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer( + "TestOtherCases.test5", "TestOtherCases.test6", root_dir + ) + actual, _ = renamer._rename_method(content) + # Check if the content of the file was not changed. + self.assert_equal(actual, content) + + def test_rename_method3(self) -> None: + """ + Test renaming of invalid method names. + """ + self.helper() + root_dir = os.getcwd() + with self.assertRaises(AssertionError): + hunteuti.UnitTestRenamer( + "TestCases.test10", "TestOtherCases.test6", root_dir + ) + + +# ############################################################################# +# TestPytestRenameOutcomes +# ############################################################################# + + +class TestPytestRenameOutcomes(hunitest.TestCase): + """ + Test golden outcomes directory renaming. + """ + + @staticmethod + def helper(toy_test: str) -> None: + """ + Create the temporary outcome to rename. + + :param toy_test: the name of the toy directory + """ + outcomes_paths = [ + "TestCase.test_check_string1", + "TestCase.test_rename", + "TestCase.test_rename3", + "TestCases.test_rename2", + "TestRename.test_rename1", + ] + for path in outcomes_paths: + outcomes_dir = os.path.join(toy_test, "test/outcomes", path) + hio.create_dir(outcomes_dir, incremental=False) + hio.to_file(f"{outcomes_dir}/test.txt", "Test files.") + cmd = f"git add {toy_test}/" + hsystem.system(cmd, abort_on_error=False, suppress_output=False) + + def _clean_up(self, toy_test: str) -> None: + """ + Remove temporary test directory. + + :param toy_test: the name of the toy directory + """ + cmd = f"git reset {toy_test}/ && rm -rf {toy_test}/" + hsystem.system(cmd, abort_on_error=False, suppress_output=False) + + def test_rename_class_outcomes(self) -> None: + """ + Rename outcome directory. + """ + toy_test = "toyCmTask1279." + self._testMethodName + # Create outcomes directory. + test_path = os.path.join(toy_test, "test") + # Create the toy outcomes. + self.helper(toy_test) + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer( + "TestCase", "TestRenamedCase", root_dir + ) + renamer.rename_outcomes( + test_path, + ) + # Check if the dirs were renamed. + outcomes_path = os.path.join(test_path, "outcomes") + outcomes_dirs = os.listdir(outcomes_path) + actual = sorted( + [ + ent + for ent in outcomes_dirs + if os.path.isdir(os.path.join(outcomes_path, ent)) + ] + ) + expected = [ + "TestCases.test_rename2", + "TestRename.test_rename1", + "TestRenamedCase.test_check_string1", + "TestRenamedCase.test_rename", + "TestRenamedCase.test_rename3", + ] + self.assertEqual(actual, expected) + self._clean_up(toy_test) + + def test_rename_method_outcomes(self) -> None: + """ + Rename outcome directory. + """ + toy_test = "toyCmTask1279." + self._testMethodName + # Create outcomes directory. + test_path = os.path.join(toy_test, "test") + # Create the toy outcomes. + self.helper(toy_test) + root_dir = os.getcwd() + renamer = hunteuti.UnitTestRenamer( + "TestCase.test_rename", + "TestCase.test_method_renamed", + root_dir, + ) + renamer.rename_outcomes( + test_path, + ) + # Check if the dirs were renamed. + outcomes_path = os.path.join(test_path, "outcomes") + outcomes_dirs = os.listdir(outcomes_path) + actual = sorted( + [ + ent + for ent in outcomes_dirs + if os.path.isdir(os.path.join(outcomes_path, ent)) + ] + ) + expected = [ + "TestCase.test_check_string1", + "TestCase.test_method_renamed", + "TestCase.test_rename3", + "TestCases.test_rename2", + "TestRename.test_rename1", + ] + self.assertEqual(actual, expected) + self._clean_up(toy_test) + + +# ############################################################################# +# Test_get_test_file_for_source +# ############################################################################# + + +class Test_get_test_file_for_source(hunitest.TestCase): + """ + Test mapping source files to test files. + """ + + def test1(self) -> None: + """ + Source file with existing test file returns the test path. + """ + actual = hunteuti.get_test_file_for_source("helpers/hdbg.py") + expected = "helpers/test/test_hdbg.py" + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + Source file without test file returns None. + """ + actual = hunteuti.get_test_file_for_source("tasks.py") + self.assertIsNone(actual) + + def test3(self) -> None: + """ + Test file as input returns None. + """ + actual = hunteuti.get_test_file_for_source("helpers/test/test_hdbg.py") + self.assertIsNone(actual) + + +# ############################################################################# +# TestIsTestFile +# ############################################################################# + + +class TestIsTestFile(hunitest.TestCase): + """ + Test test file detection. + """ + + def test_path_with_test_dir(self) -> None: + """ + Path containing /test/ is detected as test file. + """ + actual = hunteuti.is_test_file("helpers/test/test_hdbg.py") + self.assertTrue(actual) + + def test_path_with_test_prefix(self) -> None: + """ + Basename starting with test_ is detected as test file. + """ + actual = hunteuti.is_test_file("helpers/test_hdbg.py") + self.assertTrue(actual) + + def test_path_with_test_suffix(self) -> None: + """ + Basename ending with _test.py is detected as test file. + """ + actual = hunteuti.is_test_file("helpers/hdbg_test.py") + self.assertTrue(actual) + + def test_source_file(self) -> None: + """ + Source file path is not detected as test file. + """ + actual = hunteuti.is_test_file("helpers/hdbg.py") + self.assertFalse(actual) + + def test_nested_path_with_test(self) -> None: + """ + Path with /test/ anywhere is detected as test file. + """ + actual = hunteuti.is_test_file( + "dev_scripts_helpers/scraping/test/__init__.py" + ) + self.assertTrue(actual) + + +# ############################################################################# +# TestGetTestFilesForSources +# ############################################################################# + + +class TestGetTestFilesForSources(hunitest.TestCase): + """ + Test mapping lists of source files to test files. + """ + + def test_mixed_files(self) -> None: + """ + Mixed source and test files returns only matched test files. + """ + files = [ + "helpers/hdbg.py", + "helpers/test/test_hdbg.py", + "helpers/hio.py", + ] + actual = hunteuti.get_test_files_for_sources(files) + expected = [ + "helpers/test/test_hdbg.py", + "helpers/test/test_hio.py", + ] + self.assertEqual(sorted(actual), sorted(expected)) + + def test_only_test_files(self) -> None: + """ + Only test files as input returns empty list. + """ + files = [ + "helpers/test/test_hdbg.py", + "helpers/test/test_hio.py", + ] + actual = hunteuti.get_test_files_for_sources(files) + expected = [] + self.assertEqual(actual, expected) + + def test_only_source_files_with_tests(self) -> None: + """ + Source files with existing tests return matching test files. + """ + files = [ + "helpers/hdbg.py", + "helpers/hio.py", + ] + actual = hunteuti.get_test_files_for_sources(files) + expected = [ + "helpers/test/test_hdbg.py", + "helpers/test/test_hio.py", + ] + self.assertEqual(sorted(actual), sorted(expected)) + + def test_source_without_test(self) -> None: + """ + Source file without test file is skipped. + """ + files = ["tasks.py"] + actual = hunteuti.get_test_files_for_sources(files) + expected = [] + self.assertEqual(actual, expected) + + def test_empty_list(self) -> None: + """ + Empty input returns empty list. + """ + files = [] + actual = hunteuti.get_test_files_for_sources(files) + expected = [] + self.assertEqual(actual, expected) + + +# ############################################################################# +# TestGetParentDirs +# ############################################################################# + + +class TestGetParentDirs(hunitest.TestCase): + """ + Test extracting minimal parent directories from file list. + """ + + def test_single_file(self) -> None: + """ + Single file returns its parent directory. + """ + files = ["helpers/hdbg.py"] + actual = hunteuti.get_parent_dirs(files) + expected = ["helpers"] + self.assertEqual(actual, expected) + + def test_files_in_same_dir(self) -> None: + """ + Multiple files in same directory return that directory once. + """ + files = [ + "helpers/hdbg.py", + "helpers/hio.py", + ] + actual = hunteuti.get_parent_dirs(files) + expected = ["helpers"] + self.assertEqual(actual, expected) + + def test_files_in_different_dirs(self) -> None: + """ + Files in different directories return all distinct dirs. + """ + files = [ + "dev_scripts_helpers/scraping/process_hn_article.py", + "helpers/hgit.py", + "helpers/lib_tasks_utils.py", + ] + actual = hunteuti.get_parent_dirs(files) + expected = [ + "dev_scripts_helpers/scraping", + "helpers", + ] + self.assertEqual(sorted(actual), sorted(expected)) + + def test_nested_dirs_dedup(self) -> None: + """ + Nested directories are deduplicated to keep only parent. + """ + files = [ + "dev_scripts_helpers/scraping/process_hn_article.py", + "dev_scripts_helpers/scraping/test/__init__.py", + "helpers/hgit.py", + "helpers/lib_tasks_utils.py", + ] + actual = hunteuti.get_parent_dirs(files) + expected = [ + "dev_scripts_helpers/scraping", + "helpers", + ] + self.assertEqual(sorted(actual), sorted(expected)) + + def test_empty_list(self) -> None: + """ + Empty file list returns empty directory list. + """ + files = [] + actual = hunteuti.get_parent_dirs(files) + expected = [] + self.assertEqual(actual, expected) + + def test_root_level_files(self) -> None: + """ + Files at root level are handled correctly. + """ + files = [ + "tasks.py", + "pyproject.toml", + ] + actual = hunteuti.get_parent_dirs(files) + expected = ["."] + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py new file mode 100644 index 000000000..79aa3ab80 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py @@ -0,0 +1,74 @@ +import logging + +import helpers.hunit_test as hunitest +import helpers.hversion as hversio + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestVersioning1 +# ############################################################################# + + +class TestVersioning1(hunitest.TestCase): + def test_get_changelog_version1(self) -> None: + """ + Test `cmamp` version. + """ + container_dir_name = "." + code_version = hversio.get_changelog_version(container_dir_name) + _LOG.debug("code_version=%s", code_version) + + def test_get_container_version1(self) -> None: + container_version = hversio.get_container_version() + _LOG.debug("container_version=%s", container_version) + + def test_check_version1(self) -> None: + container_dir_name = "." + hversio.check_version(container_dir_name) + + def test__check_version1(self) -> None: + code_version = "1.0.0" + container_version = "1.0.2" + is_ok = hversio._check_version(code_version, container_version) + self.assertFalse(is_ok) + + def test__check_version2(self) -> None: + code_version = "1.0.0" + container_version = "1.0.0" + is_ok = hversio._check_version(code_version, container_version) + self.assertTrue(is_ok) + + def test__check_version3(self) -> None: + code_version = "1.0.0" + container_version = "amp-1.0.0" + is_ok = hversio._check_version(code_version, container_version) + self.assertTrue(is_ok) + + def test_bump_version1(self) -> None: + """ + Test major version bump. + """ + version = "2.2.0" + result = hversio.bump_version(version, bump_type="major") + expected = "3.0.0" + self.assertEqual(result, expected) + + def test_bump_version2(self) -> None: + """ + Test minor version bump. + """ + version = "2.2.0" + result = hversio.bump_version(version, bump_type="minor") + expected = "2.3.0" + self.assertEqual(result, expected) + + def test_bump_version3(self) -> None: + """ + Test patch version bump. + """ + version = "2.2.0" + result = hversio.bump_version(version, bump_type="patch") + expected = "2.2.1" + self.assertEqual(result, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py new file mode 100644 index 000000000..987b30476 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py @@ -0,0 +1,569 @@ +import logging +import os +import time +from typing import Any, List, Optional, Union + +import pytest + +import helpers.hjoblib as hjoblib +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + +# ############################################################################# + + +def workload_function( + val1: int, + val2: str, + # + **kwargs: Any, +) -> str: + """ + Execute the test workload. + """ + _LOG.info("Starting workload %s", val1) + incremental = kwargs.pop("incremental") + num_attempts = kwargs.pop("num_attempts") + _ = val1, val2, incremental, num_attempts + res: str = hprint.to_str("val1 val2 incremental num_attempts kwargs") + _LOG.debug("res=%s", res) + sleep = 0.01 + # sleep = 2 + time.sleep(sleep) + _LOG.info("Ending workload %s", val1) + if val1 == -1: + raise ValueError(f"Error: {res}") + return res + + +# ############################################################################# +# Test_parallel_execute1 +# ############################################################################# + + +def get_workload1( + randomize: bool, *, seed: Optional[int] = None +) -> hjoblib.Workload: + """ + Return a workload for `workload_function()` with 5 tasks that succeeds. + """ + tasks = [] + for i in range(5): + # val1, val2 + task = ((i, 2 * i), {f"hello{i}": f"world{2 * i}", "good": "bye"}) + tasks.append(task) + workload: hjoblib.Workload = (workload_function, "workload_function", tasks) + if randomize: + # Randomize workload. + workload = hjoblib.randomize_workload(workload, seed=seed) + return workload + + +# ############################################################################# + + +def _outcome_to_string(outcome: List[str]) -> str: + outcome = "\n".join(sorted(map(str, outcome))) + return outcome + + +def _helper_success( + self_: Any, + workload: hjoblib.Workload, + num_threads: Union[str, int], + abort_on_error: bool, + expected_return: str, + backend: str, +) -> None: + """ + Run a workload that is supposed to succeed and check its result. + """ + dry_run = False + incremental = True + num_attempts = 1 + log_file = os.path.join(self_.get_scratch_space(), "log.txt") + # + res = hjoblib.parallel_execute( + workload, + dry_run, + num_threads, + incremental, + abort_on_error, + num_attempts, + log_file, + backend=backend, + ) + # Check. + _LOG.debug("res=%s", str(res)) + actual = _outcome_to_string(res) + self_.assert_equal(actual, expected_return) + + +# ############################################################################# +# Test_parallel_execute1 +# ############################################################################# + + +class Test_parallel_execute1(hunitest.TestCase): + """ + Execute a workload of 5 tasks that all succeed. + """ + + # pylint: disable=line-too-long + EXPECTED_RETURN = r"""val1=0, val2=0, incremental=True, num_attempts=1, kwargs={'hello0': 'world0', 'good': 'bye'} +val1=1, val2=2, incremental=True, num_attempts=1, kwargs={'hello1': 'world2', 'good': 'bye'} +val1=2, val2=4, incremental=True, num_attempts=1, kwargs={'hello2': 'world4', 'good': 'bye'} +val1=3, val2=6, incremental=True, num_attempts=1, kwargs={'hello3': 'world6', 'good': 'bye'} +val1=4, val2=8, incremental=True, num_attempts=1, kwargs={'hello4': 'world8', 'good': 'bye'}""" + + def test_dry_run1(self) -> None: + """ + Dry-run a workload. + """ + workload = get_workload1(randomize=True) + dry_run = True + num_threads = "serial" + incremental = True + num_attempts = 1 + abort_on_error = True + log_file = os.path.join(self.get_scratch_space(), "log.txt") + res = hjoblib.parallel_execute( + workload, + dry_run, + num_threads, + incremental, + abort_on_error, + num_attempts, + log_file, + ) + _LOG.debug("res=%s", str(res)) + self.assertIs(res, None) + + def _run_test(self, num_threads: Union[str, int], backend: str) -> None: + workload = get_workload1(randomize=True) + abort_on_error = True + # + expected_return = self.EXPECTED_RETURN + _helper_success( + self, + workload, + num_threads, + abort_on_error, + expected_return, + backend, + ) + + # pylint: enable=line-too-long + + def test_serial1(self) -> None: + num_threads = "serial" + backend = "" + self._run_test(num_threads, backend) + + def test_parallel_loky1(self) -> None: + num_threads = "1" + backend = "loky" + self._run_test(num_threads, backend) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~6 seconds, see CmTask4951.") + def test_parallel_loky2(self) -> None: + num_threads = "3" + backend = "loky" + self._run_test(num_threads, backend) + + def test_parallel_asyncio_threading1(self) -> None: + num_threads = "1" + backend = "asyncio_threading" + self._run_test(num_threads, backend) + + def test_parallel_asyncio_threading2(self) -> None: + num_threads = "3" + backend = "asyncio_threading" + self._run_test(num_threads, backend) + + +# ############################################################################# +# Test_parallel_execute2 +# ############################################################################# + + +def get_workload2() -> hjoblib.Workload: + """ + Return a workload for `workload_function()` with 1 task that fails. + """ + task = ((-1, 7), {"hello2": "world2", "good2": "bye2"}) + tasks = [task] + workload: hjoblib.Workload = (workload_function, "workload_function", tasks) + return workload + + +def _helper_fail( + self_: Any, + workload: hjoblib.Workload, + num_threads: Union[str, int], + abort_on_error: bool, + expected_assertion: str, + backend: str, +) -> None: + dry_run = False + incremental = True + num_attempts = 1 + log_file = os.path.join(self_.get_scratch_space(), "log.txt") + # + with self_.assertRaises(ValueError) as cm: + res = hjoblib.parallel_execute( + workload, + dry_run, + num_threads, + incremental, + abort_on_error, + num_attempts, + log_file, + backend=backend, + ) + # Print result if it succeeds. + _LOG.debug("res=%s", str(res)) + # Check. + actual = str(cm.exception) + self_.assert_equal(actual, expected_assertion) + + +# # To observe the output in real-time. +# if __name__ == "__main__": +# hdbg.init_logger(verbosity=logging.INFO) +# workload = get_workload1(randomize=True) +# # num_threads = "serial" +# num_threads = "1" +# # num_threads = "5" +# # backend = "loky" +# backend = "asyncio_threading" +# # backend = "asyncio_multiprocessing" +# abort_on_error = True +# # +# dry_run = False +# incremental = True +# num_attempts = 1 +# log_file = "./log.txt" +# # +# _LOG.info("\n" + hprint.frame("Start workload")) +# with htimer.TimedScope(logging.INFO, "Execute workload"): +# res = hjoblib.parallel_execute( +# workload, +# dry_run, +# num_threads, +# incremental, +# abort_on_error, +# num_attempts, +# log_file, +# backend=backend, +# ) +# _LOG.info("\n" + hprint.frame("Results")) +# import pprint +# +# print(pprint.pformat(res)) + + +# ############################################################################# +# Test_parallel_execute2 +# ############################################################################# + + +class Test_parallel_execute2(hunitest.TestCase): + """ + Execute a workload of 1 task that fails. + """ + + # pylint: disable=line-too-long + EXPECTED_STRING = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'}""" + + # pylint: enable=line-too-long + + def _run_test( + self, + abort_on_error: bool, + num_threads: Union[str, int], + backend: str, + should_succeed: bool, + ) -> None: + workload = get_workload2() + # + expected_return = self.EXPECTED_STRING + if should_succeed: + _helper_success( + self, + workload, + num_threads, + abort_on_error, + expected_return, + backend, + ) + else: + _helper_fail( + self, + workload, + num_threads, + abort_on_error, + expected_return, + backend, + ) + + def test_serial1(self) -> None: + num_threads = "serial" + abort_on_error = True + backend = "" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_serial2(self) -> None: + num_threads = "serial" + abort_on_error = False + backend = "" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~7 seconds.") + def test_parallel_loky1(self) -> None: + num_threads = 2 + abort_on_error = True + backend = "loky" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~7 seconds.") + def test_parallel_loky2(self) -> None: + num_threads = 2 + abort_on_error = False + backend = "loky" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading1(self) -> None: + num_threads = 2 + abort_on_error = True + backend = "asyncio_threading" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading2(self) -> None: + num_threads = 2 + abort_on_error = False + backend = "asyncio_threading" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + +# ############################################################################# +# Test_parallel_execute3 +# ############################################################################# + + +def get_workload3( + randomize: bool, seed: Optional[int] = None +) -> hjoblib.Workload: + """ + Return a workload for `workload_function()` with 5 tasks succeeding and one + task failing. + """ + workload: hjoblib.Workload = get_workload1(randomize=True) + # Modify the workflow in place. + (workload_func, func_name, tasks) = workload + _ = workload_func, func_name + task = ((-1, 7), {"hello2": "world2", "good2": "bye2"}) + tasks.append(task) + if randomize: + # Randomize workload. + workload = hjoblib.randomize_workload(workload, seed=seed) + return workload + + +# ############################################################################# +# Test_parallel_execute3 +# ############################################################################# + + +class Test_parallel_execute3(hunitest.TestCase): + """ + Execute a workload with 5 tasks that succeed and 1 task that fails. + """ + + # pylint: disable=line-too-long + EXPECTED_STRING1 = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'}""" + + EXPECTED_STRING2 = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'} +val1=0, val2=0, incremental=True, num_attempts=1, kwargs={'hello0': 'world0', 'good': 'bye'} +val1=1, val2=2, incremental=True, num_attempts=1, kwargs={'hello1': 'world2', 'good': 'bye'} +val1=2, val2=4, incremental=True, num_attempts=1, kwargs={'hello2': 'world4', 'good': 'bye'} +val1=3, val2=6, incremental=True, num_attempts=1, kwargs={'hello3': 'world6', 'good': 'bye'} +val1=4, val2=8, incremental=True, num_attempts=1, kwargs={'hello4': 'world8', 'good': 'bye'}""" + + # pylint: enable=line-too-long + + def _run_test( + self, + abort_on_error: bool, + num_threads: Union[str, int], + backend: str, + should_succeed: bool, + ) -> None: + workload = get_workload3(randomize=False) + # Since there is an error and `abort_on_error=True` we only get information + # about the failed task. + if should_succeed: + expected_return = self.EXPECTED_STRING2 + _helper_success( + self, + workload, + num_threads, + abort_on_error, + expected_return, + backend, + ) + else: + # Since there is an error and `abort_on_error=True` we only get information + # about the failed task. + expected_exception = self.EXPECTED_STRING1 + _helper_fail( + self, + workload, + num_threads, + abort_on_error, + expected_exception, + backend, + ) + + def test_serial1(self) -> None: + num_threads = "serial" + abort_on_error = True + backend = "" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_serial2(self) -> None: + """ + Execute: + - a workload with 5 tasks that succeed and 1 task that fails + - serially + - don't abort because abort_on_error=False + """ + num_threads = "serial" + abort_on_error = False + backend = "" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_loky1(self) -> None: + num_threads = "1" + abort_on_error = True + backend = "loky" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + @pytest.mark.requires_ck_infra + @pytest.mark.slow("~7 seconds.") + def test_parallel_loky2(self) -> None: + num_threads = "3" + abort_on_error = True + backend = "loky" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_loky3(self) -> None: + num_threads = "1" + abort_on_error = False + backend = "loky" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + @pytest.mark.slow("~5 seconds.") + def test_parallel_loky4(self) -> None: + num_threads = "3" + abort_on_error = False + backend = "loky" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading1(self) -> None: + num_threads = "1" + abort_on_error = True + backend = "asyncio_threading" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading2(self) -> None: + num_threads = "3" + abort_on_error = True + backend = "asyncio_threading" + # + should_succeed = False + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading3(self) -> None: + num_threads = "1" + abort_on_error = False + backend = "asyncio_threading" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + def test_parallel_asyncio_threading4(self) -> None: + num_threads = "3" + abort_on_error = False + backend = "asyncio_threading" + # + should_succeed = True + self._run_test(abort_on_error, num_threads, backend, should_succeed) + + +# ############################################################################# + + +# ############################################################################# +# Test_joblib_example1 +# ############################################################################# + + +@pytest.mark.skip(reason="Just for experimenting with joblib") +class Test_joblib_example1(hunitest.TestCase): + @staticmethod + def func(val: int) -> int: + print(f"val={val}") + if val == -1: + raise ValueError(f"val={val}") + print(f" out={val}") + return val + + def test1(self) -> None: + """ + Show that when a job fails the entire `joblib.Parallel` fails without + returning anything, but just propagating the exception. + """ + # num_threads = 5 + num_threads = 1 + vals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + # vals[1] = -1 + vals[5] = -1 + import joblib + + backend = "loky" + res = joblib.Parallel(n_jobs=num_threads, backend=backend, verbose=200)( + joblib.delayed(Test_joblib_example1.func)(val) for val in vals + ) + print(f"res={str(res)}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py new file mode 100644 index 000000000..12f04c506 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py @@ -0,0 +1,540 @@ +# This should only test helper functions from `lib_tasks.py`. +# `test_tasks.py` associated to `tasks.py` should test specific task targets. + +import logging +import os +import re +import unittest.mock as umock +from typing import Dict, Generator + +import invoke +import pytest + +import helpers.hgit as hgit +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.lib_tasks as hlibtask +import helpers.lib_tasks_gh as hlitagh +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + + +def _get_default_params() -> Dict[str, str]: + """ + Get fake params pointing to a different image so we can test the code + without affecting the official images. + """ + ecr_base_path = os.environ["CSFY_ECR_BASE_PATH"] + default_params = { + "CSFY_ECR_BASE_PATH": ecr_base_path, + "BASE_IMAGE": "amp_test", + "HELPERS_IMAGE_PROD": f"{ecr_base_path}/helpers:prod", + } + return default_params + + +# ############################################################################# +# _LibTasksTestCase +# ############################################################################# + + +class _LibTasksTestCase(hunitest.TestCase): + """ + Test class injecting default parameters in the `lib_tasks` singleton in + `set_up_test()` and cleaning up the singleton in `tear_down_test()`. + """ + + # This will be run before and after each test. + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + # Run before each test. + self.set_up_test() + yield + # Run after each test. + self.tear_down_test() + + def set_up_test(self) -> None: + params = _get_default_params() + hlitauti.set_default_params(params) + + def tear_down_test(self) -> None: + hlitauti.reset_default_params() + + +# ############################################################################# + + +# TODO(gp): Make it public. +def _build_mock_context_returning_ok() -> invoke.MockContext: + """ + Build a MockContext catching any command and returning rc=0. + """ + ctx = invoke.MockContext( + repeat=True, run={re.compile(".*"): invoke.Result(exited=0)} + ) + return ctx + + +# ############################################################################# +# _CheckDryRunTestCase +# ############################################################################# + + +class _CheckDryRunTestCase(hunitest.TestCase): + """ + Test class running an invoke target with/without dry-run and checking that + the issued commands are what is expected. + """ + + def _check_calls(self, ctx: invoke.MockContext) -> None: + """ + `check_string()` the sequence of commands issued in the context. + """ + actual = "\n".join(map(str, ctx.run.mock_calls)) + actual = hprint.remove_non_printable_chars(actual) + self.check_string(actual) + + def _check_output(self, target: str, check: bool = True) -> None: + """ + Dry run target checking that the sequence of commands issued is the + expected one. + """ + ctx = _build_mock_context_returning_ok() + # pylint: disable=exec-used + exec(f"hlibtask.{target}") + # pylint: enable=exec-used + # Check the outcome. + if check: + self._check_calls(ctx) + + +# TODO(gp): We should group the tests by what is tested and not how it's +# tested. E.g. TestDryRunTasks1::test_print_setup and +# TestDryRunTasks2::test_print_setup should go together in a class. + + +# ############################################################################# +# TestDryRunTasks1 +# ############################################################################# + + +class TestDryRunTasks1(hunitest.TestCase): + """ + - Run invoke in dry-run mode from command line + - Compare the output to the golden outcomes + """ + + # TODO(gp): -> TestGitCommands1 + + def dry_run( + self, target: str, dry_run: bool = True, check_string: bool = True + ) -> None: + """ + Invoke the given target with dry run. + + This is used to test the commands that we can't actually + execute. + """ + opts = "--dry" if dry_run else "" + # + # TODO(vitalii): While deploying the container versioning + # we disable the check in the unit tests. Remove `SKIP_VERSION_CHECK=1` + # after CmampTask570 is fixed. + cmd = f"SKIP_VERSION_CHECK=1 invoke {opts} {target} | grep -v INFO | grep -v '>>ENV<<:'" + _, actual = hsystem.system_to_string(cmd) + # + actual = hprint.remove_non_printable_chars(actual) + # docker_ps: sudo=False + regex = r"# \S+:" + actual = hunitest.filter_text(regex, actual) + # + regex = r"(WARN|INFO)\s+hcache.py" + actual = hunitest.filter_text(regex, actual) + # Filter out `no module` warnings. + # TODO(Grisha): add the "no module warning" filtering + # to `purify_text()` in `check_string()`. + regex = "WARN.*No module" + actual = hunitest.filter_text(regex, actual) + if check_string: + self.check_string(actual) + + # ######################################################################### + + # TODO(gp): We can't test this since amp and cmamp have now different base image. + # def test_print_setup(self) -> None: + # target = "print_setup" + # self.dry_run(target) + + # The problem is that we use system and not ctx to execute the command, so that + # --dry-run doesn't work. + @pytest.mark.skip(reason="This is actually run") + def test_git_pull(self) -> None: + target = "git_pull" + self.dry_run(target) + + @pytest.mark.skip(reason="This is actually run") + def test_git_fetch_master(self) -> None: + target = "git_fetch_master" + self.dry_run(target) + + @pytest.mark.skip(reason="This is actually run deleting files") + def test_git_clean(self) -> None: + target = "git_clean" + self.dry_run(target) + + # ######################################################################### + # TODO(gp): -> TestDockerCommands1 + + @pytest.mark.slow("~6 sec.") + @pytest.mark.skipif( + hserver.is_inside_ci(), reason="In CI the output is different" + ) + def test_docker_images_ls_repo(self) -> None: + target = "docker_images_ls_repo" + # TODO(gp): amp and cmamp have different version of aws cli and so the + # output is different. + check_string = False + self.dry_run(target, check_string=check_string) + + @pytest.mark.slow("~6 sec.") + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. Different golden outcomes in helpers and other repos.", + ) + def test_docker_ps(self) -> None: + target = "docker_ps" + self.dry_run(target) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_stats(self) -> None: + target = "docker_stats" + self.dry_run(target) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_kill_last(self) -> None: + target = "docker_kill" + self.dry_run(target) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_kill_all(self) -> None: + target = "docker_kill --all" + self.dry_run(target) + + +# ############################################################################# + + +# ############################################################################# +# TestDryRunTasks2 +# ############################################################################# + + +# Outside CK infra, the class hangs, so we skip it. +@pytest.mark.requires_ck_infra +@pytest.mark.slow(reason="Around 7s") +@pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", +) +class TestDryRunTasks2(_LibTasksTestCase, _CheckDryRunTestCase): + """ + - Call the invoke task directly from Python + - `check_string()` that the sequence of commands issued by the target is the + expected one using mocks to return ok for every system call. + """ + + def test_print_setup(self) -> None: + target = "print_setup(ctx)" + self._check_output(target) + + def test_git_pull(self) -> None: + target = "git_pull(ctx)" + self._check_output(target) + + def test_git_fetch_master(self) -> None: + target = "git_fetch_master(ctx)" + self._check_output(target) + + def test_git_clean(self) -> None: + target = "git_clean(ctx)" + self._check_output(target) + + # TODO(Grisha): is not it the same as `test_git_clean()`? + def test_git_clean2(self) -> None: + target = "git_clean(ctx, dry_run=False)" + self._check_output(target) + + # ######################################################################### + + def test_docker_images_ls_repo(self) -> None: + target = "docker_images_ls_repo(ctx)" + self._check_output(target, check=False) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_kill_all(self) -> None: + target = "docker_kill(ctx, all=True)" + self._check_output(target) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_kill_last(self) -> None: + target = "docker_kill(ctx)" + self._check_output(target) + + def test_docker_ps(self) -> None: + target = "docker_ps(ctx)" + self._check_output(target) + + def test_docker_pull(self) -> None: + target = "docker_pull(ctx)" + self._check_output(target, check=False) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_docker_stats(self) -> None: + target = "docker_stats(ctx)" + self._check_output(target) + + # ######################################################################### + # TODO(gp): -> TestGhCommands1 + + # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_gh_create_pr1(self) -> None: + with ( + umock.patch.object( + hgit, "get_branch_name", return_value="AmpTask1_test_branch" + ), + umock.patch.object( + hlitagh, + "_get_repo_full_name_from_cmd", + return_value=("github.com/alphamatic/amp", "amp"), + ), + ): + target = "gh_create_pr(ctx, title='test')" + self._check_output(target) + + # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_gh_create_pr2(self) -> None: + with ( + umock.patch.object( + hgit, "get_branch_name", return_value="AmpTask1_test_branch" + ), + umock.patch.object( + hlitagh, + "_get_repo_full_name_from_cmd", + return_value=("github.com/alphamatic/amp", "amp"), + ), + ): + target = "gh_create_pr(ctx, body='hello_world', title='test')" + self._check_output(target) + + # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_gh_create_pr3(self) -> None: + with ( + umock.patch.object( + hgit, "get_branch_name", return_value="AmpTask1_test_branch" + ), + umock.patch.object( + hlitagh, + "_get_repo_full_name_from_cmd", + return_value=("github.com/alphamatic/amp", "amp"), + ), + ): + target = "gh_create_pr(ctx, draft=False, title='test')" + self._check_output(target) + + # TODO(*): Remove skip after migration to `csfy`.` + @pytest.mark.skip( + reason="migration to new repo " + "ref: https://github.com/causify-ai/cmamp/issues/13063" + ) + def test_gh_issue_title(self) -> None: + target = "gh_issue_title(ctx, 1)" + self._check_output(target) + + # TODO(Shaopengz): Outside CK infra, the test hangs, so skip. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") + def test_gh_workflow_list(self) -> None: + target = "gh_workflow_list(ctx, filter_by_branch='master')" + self._check_output(target) + + # This is an action with side effects so we can't test it. + # def test_gh_workflow_run(self) -> None: + # target = "gh_workflow_run(ctx)" + # self._check_output(target) + + # ######################################################################### + # TODO(gp): -> TestGitCommands1 + def test_git_branch_files(self) -> None: + # This test needs a reference to Git master branch. + hgit.fetch_origin_master_if_needed() + # + target = "git_branch_files(ctx)" + self._check_output(target) + + @pytest.mark.skip( + reason="HelpersTask638: Skip Failing test to merge the PR in cmamp" + ) + def test_git_branch_create1(self) -> None: + target = ( + "git_branch_create(ctx, branch_name='AmpTask123_test', " + "only_branch_from_master=False)" + ) + self._check_output(target) + + # TODO(*): Remove skip after migration to `csfy`.` + @pytest.mark.skip( + reason="migration to new repo " + "ref: https://github.com/causify-ai/cmamp/issues/13063" + ) + def test_git_branch_create2(self) -> None: + # Difference between `cmamp` and `kaizenflow`. + target = ( + "git_branch_create(ctx, issue_id=1, only_branch_from_master=False)" + ) + self._check_output(target) + + def test_git_branch_create3(self) -> None: + with self.assertRaises(AssertionError): + target = ( + "git_branch_create(ctx, branch_name='test', issue_id=1, " + "only_branch_from_master=False)" + ) + self._check_output(target, check=False) + + # This is an action with side effects so we can't test it. + # def test_git_branch_delete_merged(self) -> None: + # target = "git_branch_delete_merged(ctx)" + # self._check_output(target) + + def test_git_merge_master(self) -> None: + target = "git_merge_master(ctx, abort_if_not_clean=False)" + self._check_output(target) + + # ######################################################################### + # TODO(gp): -> TestLintCommands1 + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_lint1(self) -> None: + target = "lint(ctx, modified=True)" + # The output depends on the client, so don't check it. + self._check_output(target, check=False) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_lint2(self) -> None: + target = "lint(ctx, branch=True)" + # The output depends on the client, so don't check it. + self._check_output(target, check=False) + + @pytest.mark.skip( + reason="AmpTask1347: Add support for mocking `system*()` " + "functions to unit test" + ) + def test_lint3(self) -> None: + file = __file__ + target = f"lint(ctx, files='{file}')" + # The output depends on the client, so don't check it. + self._check_output(target, check=False) + + def test_find_test_class1(self) -> None: + class_name = self.__class__.__name__ + target = f"find_test_class(ctx, class_name='{class_name}')" + self._check_output(target) + + # ######################################################################### + + @pytest.mark.skipif( + hserver.is_inside_ci(), reason="In CI the output is different" + ) + def test_docker_login(self) -> None: + """ + Instead of using _build_mock_context_returning_ok(), set the return + values more explicitly. + """ + stdout = "aws-cli/1.19.49 Python/3.7.6 Darwin/19.6.0 botocore/1.20.49\n" + ctx = invoke.MockContext( + run={ + "aws --version": invoke.Result(stdout), + re.compile("^docker login"): invoke.Result(exited=0), + re.compile("^eval"): invoke.Result(exited=0), + } + ) + hlibtask.docker_login(ctx) + # Check the outcome. + # self._check_calls(ctx) + + +# ############################################################################# + +# TODO(gp): Run test coverage with +# > i run_fast_slow_tests \ +# --pytest-opts="helpers/test/test_lib_tasks.py test/test_tasks.py" \ +# --coverage + +# TODO(gp): Add tests for: +# - print_tasks +# - git_files +# - git_last_commit_files +# - check_python_files +# - docker_stats +# - traceback (with checked in file) +# - lint + + +# ############################################################################# + + +# ############################################################################# +# TestFailing +# ############################################################################# + + +class TestFailing(hunitest.TestCase): + """ + Run a test that fails based on CSFY_FORCE_TEST_FAIL environment variable. + """ + + def test_failing(self) -> None: + if os.environ.get("CSFY_FORCE_TEST_FAIL", "") == "1": + self.fail("test failed succesfully") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py new file mode 100644 index 000000000..80ea28ffb --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py @@ -0,0 +1,494 @@ +import logging +import os +import re +import unittest.mock as umock +from typing import Dict, Optional + +import pytest + +import helpers.hgit as hgit +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur +import helpers.lib_tasks_docker as hlitadoc +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + + +# pylint: disable=protected-access + + +# ############################################################################# +# Test_generate_compose_file1 +# ############################################################################# + + +class Test_generate_compose_file1(hunitest.TestCase): + def helper( + self, + stage: str, + *, + use_privileged_mode: bool = False, + use_sibling_container: bool = False, + shared_data_dirs: Optional[Dict[str, str]] = None, + mount_as_submodule: bool = False, + use_network_mode_host: bool = True, + use_main_network: bool = False, + ) -> None: + txt = [] + # + params = [ + "stage", + "use_privileged_mode", + "use_sibling_container", + "shared_data_dirs", + "mount_as_submodule", + "use_network_mode_host", + ] + txt_tmp = hprint.to_str(" ".join(params)) + txt.append(txt_tmp) + # + file_name = None + txt_tmp = hlitadoc._generate_docker_compose_file( + stage, + use_privileged_mode, + use_sibling_container, + shared_data_dirs, + mount_as_submodule, + use_network_mode_host, + use_main_network, + file_name, + ) + # Remove all the env variables that are function of the host. + txt_tmp = hunitest.filter_text("CSFY_HOST_", txt_tmp) + txt_tmp = hunitest.filter_text("CSFY_GIT_ROOT_PATH", txt_tmp) + txt_tmp = hunitest.filter_text("CSFY_HELPERS_ROOT_PATH", txt_tmp) + txt_tmp = hunitest.filter_text( + "CSFY_USE_HELPERS_AS_NESTED_MODULE", txt_tmp + ) + txt_tmp = hunitest.filter_text("OPENAI_API_KEY", txt_tmp) + txt.append(txt_tmp) + # + txt = "\n".join(txt) + txt = hunitest.filter_text(r"working_dir", txt) + self.check_string(txt) + + def test1(self) -> None: + self.helper(stage="prod", use_privileged_mode=True) + + def test2(self) -> None: + self.helper( + stage="prod", shared_data_dirs={"/data/shared": "/shared_data"} + ) + + def test3(self) -> None: + self.helper(stage="prod", use_main_network=True) + + # TODO(ShaopengZ): This hangs outside CK infra, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + hgit.is_in_amp_as_submodule(), reason="Only run in amp directly" + ) + def test4(self) -> None: + self.helper(stage="dev") + + # TODO(ShaopengZ): This hangs outside CK infra, so we skip it. + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test5(self) -> None: + self.helper(stage="dev") + + +# ############################################################################# +# Test_generate_compose_file2 +# ############################################################################# + + +class Test_generate_compose_file2(hunitest.TestCase): + def helper( + self, + mock_getcwd: str, + mock_find_git_root: str, + mock_find_helpers_root: str, + mock_is_in_helpers_as_supermodule: bool, + *, + stage: str = "prod", + use_privileged_mode: bool = True, + use_sibling_container: bool = False, + shared_data_dirs: Optional[Dict[str, str]] = None, + mount_as_submodule: bool = False, + use_network_mode_host: bool = True, + use_main_network: bool = False, + ) -> None: + txt = [] + # + params = [ + "stage", + "use_privileged_mode", + "use_sibling_container", + "shared_data_dirs", + "mount_as_submodule", + "use_network_mode_host", + ] + txt_tmp = hprint.to_str(" ".join(params)) + txt.append(txt_tmp) + # + file_name = None + with ( + umock.patch.object(os, "getcwd", return_value=mock_getcwd), + umock.patch.object( + hgit, "find_git_root", return_value=mock_find_git_root + ), + umock.patch.object( + hgit, "find_helpers_root", return_value=mock_find_helpers_root + ), + umock.patch.object( + hgit, + "is_in_helpers_as_supermodule", + return_value=mock_is_in_helpers_as_supermodule, + ), + ): + txt_tmp = hlitadoc._generate_docker_compose_file( + stage, + use_privileged_mode, + use_sibling_container, + shared_data_dirs, + mount_as_submodule, + use_network_mode_host, + use_main_network, + file_name, + ) + # Remove all the env variables that are function of the host. + txt_tmp = hunitest.filter_text("CSFY_HOST_", txt_tmp) + txt_tmp = hunitest.filter_text("OPENAI_API_KEY", txt_tmp) + txt.append(txt_tmp) + # + txt = "\n".join(txt) + self.check_string(txt) + + def test1(self) -> None: + """ + Check that file is generated correctly when the repo is `//cmamp`. + """ + self.helper( + mock_getcwd="/data/dummy/src/cmamp1", + mock_find_git_root="/data/dummy/src/cmamp1", + mock_find_helpers_root="/data/dummy/src/cmamp1/helpers_root", + mock_is_in_helpers_as_supermodule=False, + ) + + def test2(self) -> None: + """ + Check that file is generated correctly when the repo is `//helpers`. + """ + self.helper( + mock_getcwd="/data/dummy/src/helpers1", + mock_find_git_root="/data/dummy/src/helpers1", + mock_find_helpers_root="/data/dummy/src/helpers1", + mock_is_in_helpers_as_supermodule=True, + ) + + def test3(self) -> None: + """ + Check that file is generated correctly when the repo is `//cmamp` and + `//cmamp/ck.infra` is a runnable dir. + """ + self.helper( + mock_getcwd="/data/dummy/src/cmamp1/ck.infra", + mock_find_git_root="/data/dummy/src/cmamp1", + mock_find_helpers_root="/data/dummy/src/cmamp1/helpers_root", + mock_is_in_helpers_as_supermodule=False, + ) + + def test4(self) -> None: + """ + Check that file is generated correctly when the repo is `//orange`. + """ + self.helper( + mock_getcwd="/data/dummy/src/orange1", + mock_find_git_root="/data/dummy/src/orange1", + mock_find_helpers_root="/data/dummy/src/orange1/amp/helpers_root", + mock_is_in_helpers_as_supermodule=False, + ) + + +# ############################################################################# + + +# ############################################################################# +# TestLibTasksGetDockerCmd1 +# ############################################################################# + + +# TODO(ShaopengZ): This hangs outside CK infra, so we skip it. +@pytest.mark.requires_ck_infra +class TestLibTasksGetDockerCmd1(httestlib._LibTasksTestCase): + """ + Test `_get_docker_compose_cmd()`. + """ + + def check(self, actual: str, expected: str) -> None: + # Remove current timestamp (e.g., `20220317_232120``) from the `--name` + # so that the tests pass. + timestamp_regex = r"\.\d{8}_\d{6}" + actual = re.sub(timestamp_regex, "", actual) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_txt_from_client(actual) + # This is required when different repos run Docker with user vs root / remap. + actual = hunitest.filter_text("--user", actual) + self.assert_equal(actual, expected, fuzzy_match=True) + + @pytest.mark.requires_ck_infra + # TODO(gp): After using a single docker file as part of AmpTask2308 + # "Update_amp_container" we can probably run these tests in any repo, so + # we should be able to remove this `skipif`. + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_docker_bash1(self) -> None: + """ + Command for docker_bash target. + """ + base_image = "" + stage = "dev" + version = "1.0.0" + cmd = "bash" + service_name = "app" + use_entrypoint = False + print_docker_config = False + actual = hlitadoc._get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + service_name=service_name, + use_entrypoint=use_entrypoint, + print_docker_config=print_docker_config, + ) + expected = r""" + IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.app.app \ + --entrypoint bash \ + app + """ + self.check(actual, expected) + + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_docker_bash2(self) -> None: + """ + Command for docker_bash with entrypoint. + """ + base_image = "" + stage = "local" + version = "1.0.0" + cmd = "bash" + print_docker_config = False + actual = hlitadoc._get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + print_docker_config=print_docker_config, + ) + expected = r"""IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.app.app \ + app \ + bash """ + self.check(actual, expected) + + @pytest.mark.requires_ck_infra + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_docker_bash3(self) -> None: + """ + Command for docker_bash with some env vars. + """ + base_image = "" + stage = "local" + version = "1.0.0" + cmd = "bash" + extra_env_vars = ["PORT=9999", "SKIP_RUN=1"] + print_docker_config = False + actual = hlitadoc._get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + extra_env_vars=extra_env_vars, + print_docker_config=print_docker_config, + ) + expected = r""" + IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0 \ + PORT=9999 \ + SKIP_RUN=1 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.app.app \ + app \ + bash + """ + self.check(actual, expected) + + if False: + + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_docker_bash4(self) -> None: + base_image = "" + stage = "dev" + version = "1.0.0" + cmd = "bash" + entrypoint = False + print_docker_config = False + actual = hlitadoc._get_docker_compose_cmd( + base_image, + stage, + version, + cmd, + entrypoint=entrypoint, + print_docker_config=print_docker_config, + ) + expected = r""" + IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.app.app \ + --entrypoint bash \ + app + """ + self.check(actual, expected) + + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_docker_jupyter1(self) -> None: + base_image = "" + stage = "dev" + version = "1.0.0" + port = 9999 + self_test = True + print_docker_config = False + actual = hlitadoc._get_docker_jupyter_cmd( + base_image, + stage, + version, + port, + self_test, + print_docker_config=print_docker_config, + ) + expected = r""" + IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ + PORT=9999 \ + docker compose \ + --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ + --env-file devops/env/default.env \ + run \ + --rm \ + --name $USER_NAME.amp_test.jupyter_server_test.app \ + --service-ports \ + jupyter_server_test + """ + self.check(actual, expected) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_is_image_name_valid1 +# ############################################################################# + + +class Test_dassert_is_image_name_valid1(hunitest.TestCase): + def test1(self) -> None: + """ + Check that valid images pass the assertion. + """ + valid_images = [ + "12345.dkr.ecr.us-east-1.amazonaws.com/amp:dev", + "abcde.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0", + "12345.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0", + "sorrentum/cmamp", + ] + for image in valid_images: + hlitadoc.dassert_is_image_name_valid(image) + + def test2(self) -> None: + """ + Check that invalid images do not pass the assertion. + """ + invalid_images = [ + # Missing required parts. + "invalid-image-name", + # Missing stage/version. + "12345.dkr.ecr.us-east-1.amazonaws.com/amp:", + # Invalid version. + "12345.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-invalid", + ] + # TODO(gp): Add a check for the output. + for image in invalid_images: + with self.assertRaises(AssertionError): + hlitadoc.dassert_is_image_name_valid(image) + + +# ############################################################################# + + +# ############################################################################# +# Test_dassert_is_base_image_name_valid1 +# ############################################################################# + + +class Test_dassert_is_base_image_name_valid1(hunitest.TestCase): + def test1(self) -> None: + """ + Check that valid base images pass the assertion. + """ + valid_base_images = [ + "12345.dkr.ecr.us-east-1.amazonaws.com/amp", + "sorrentum/cmamp", + "ghcr.io/cryptokaizen/cmamp", + ] + for base_image in valid_base_images: + hlitadoc._dassert_is_base_image_name_valid(base_image) + + def test2(self) -> None: + """ + Check that invalid base images do not pass the assertion. + """ + invalid_base_images = [ + # Missing required parts. + "invalid-base-image", + # Extra character at the end. + "abcde.dkr.ecr.us-east-1.amazonaws.com/amp:", + # Extra part in the name. + "ghcr.io/cryptokaizen/cmamp/invalid", + ] + for base_image in invalid_base_images: + with self.assertRaises(AssertionError): + hlitadoc._dassert_is_base_image_name_valid(base_image) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py new file mode 100644 index 000000000..ff430ed24 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py @@ -0,0 +1,1530 @@ +import logging +import os +import unittest.mock as umock +from typing import Generator, List + +import boto3 +import moto +import pytest + +import helpers.hgit as hgit +import helpers.hunit_test as hunitest +import helpers.lib_tasks_docker as hlitadoc +import helpers.lib_tasks_docker_release as hltadore +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + + +def _extract_commands_from_call(calls: List[umock._Call]) -> List[str]: + """ + Extract command strings from a list of mock call arguments. + + Example: + calls = [ + ( + # args tuple: (context, command) + (mock_ctx, "docker build --no-cache image1"), + # kwargs dictionary + {"pty": True} + ) + ] + After extraction: + ["docker build --no-cache image1"] + + :param calls: list of mock call objects containing (args, kwargs) + :return: list of command strings + """ + # Each mock call is a (args, kwargs) tuple, extract the command string + # from args[1] in each call. + call_list = [args_[1] for args_, kwargs_ in calls] + return call_list + + +# ############################################################################# +# _DockerFlowTestHelper +# ############################################################################# + + +class _DockerFlowTestHelper(hunitest.TestCase): + """ + Helper test class to perform common setup, teardown logic and assertion + checks for Docker flow tests. + """ + + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + self.set_up_test() + yield + self.tear_down_test() + + def set_up_test(self) -> None: + # Mock system calls. + self.system_patcher = umock.patch("helpers.hsystem.system") + self.mock_system = self.system_patcher.start() + # Mock run. + self.run_patcher = umock.patch("helpers.lib_tasks_utils.run") + self.mock_run = self.run_patcher.start() + # Mock version validation. + self.version_patcher = umock.patch( + "helpers.lib_tasks_docker.dassert_is_subsequent_version" + ) + self.mock_version = self.version_patcher.start() + # Mock docker login. + self.docker_login_patcher = umock.patch( + "helpers.lib_tasks_docker.docker_login" + ) + self.mock_docker_login = self.docker_login_patcher.start() + # Mock environment variable. + self.env_patcher = umock.patch.dict( + "os.environ", {"CSFY_ECR_BASE_PATH": "test.ecr.path"} + ) + self.get_default_param_patcher = umock.patch( + "helpers.lib_tasks_utils.get_default_param", + side_effect=lambda param: { + "CSFY_ECR_BASE_PATH": "test.ecr.path", + "BASE_IMAGE": "test-image", + }.get(param, ""), + ) + self.mock_get_default_param = self.get_default_param_patcher.start() + self.env_patcher.start() + self.get_docker_base_image_name_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_docker_base_image_name" + ) + self.mock_get_docker_base_image_name = ( + self.get_docker_base_image_name_patcher.start() + ) + # + self.patchers = { + "system": self.system_patcher, + "run": self.run_patcher, + "version": self.version_patcher, + "docker_login": self.docker_login_patcher, + "env": self.env_patcher, + "docker_base_image_name": self.get_docker_base_image_name_patcher, + "default_param": self.get_default_param_patcher, + } + # Test inputs. + self.mock_ctx = httestlib._build_mock_context_returning_ok() + self.test_version = "1.0.0" + self.test_base_image = "test-registry.com/test-image" + self.test_multi_arch = "linux/amd64,linux/arm64" + self.mock_get_docker_base_image_name.return_value = "test-image" + + def tear_down_test(self) -> None: + """ + Clean up test environment by stopping all mocks after each test case. + """ + for patcher in self.patchers.values(): + patcher.stop() + + def _check_docker_command_output( + self, expected: str, call_args_list: List[umock._Call] + ) -> None: + """ + Verify that the sequence of Docker commands from mock calls matches the + expected string. + + :param expected: expected command string + :param call_args_list: list of mock call objects + """ + actual_cmds = _extract_commands_from_call(call_args_list) + actual_cmds = "\n".join(actual_cmds) + _LOG.debug("Actual Docker commands:\n%s", actual_cmds) + self.assert_equal( + actual_cmds, + expected, + purify_text=True, + purify_expected_text=True, + fuzzy_match=True, + remove_lead_trail_empty_lines=True, + dedent=True, + ) + + +# ############################################################################# +# Test_docker_build_local_image1 +# ############################################################################# + + +class Test_docker_build_local_image1(_DockerFlowTestHelper): + """ + Test building a local Docker image. + """ + + def test_single_arch1(self) -> None: + """ + Test building with single architecture. + + This test checks: + - Single architecture build + - No-cache build options + - Custom build arguments + - Local user-specific tagging + """ + # Call tested function. + hltadore.docker_build_local_image( + self.mock_ctx, + self.test_version, + cache=False, + base_image=self.test_base_image, + poetry_mode="update", + ) + # The output is a list of strings, each representing a command. + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test-registry.com/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test-registry.com/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_multi_arch1(self) -> None: + """ + Test building with multiple architectures. + + This test checks: + - Multi-architecture build (amd64, arm64) + - Buildx driver setup + - Platform-specific build options + - Image pushing to registry + """ + # Call tested function. + hltadore.docker_build_local_image( + self.mock_ctx, + self.test_version, + cache=False, + base_image=self.test_base_image, + poetry_mode="update", + multi_arch=self.test_multi_arch, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test-registry.com/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + docker pull test-registry.com/test-image:local-$USER_NAME-1.0.0 + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test-registry.com/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_build_prod_image1 +# ############################################################################# + + +class Test_docker_build_prod_image1(_DockerFlowTestHelper): + """ + Test building a prod Docker image. + """ + + def test_single_arch_prod_image1(self) -> None: + """ + Test building with single architecture. + + This test checks: + - Production build workflow + - Single architecture build + - Build arguments for prod environment + - Prod image versioning + - Default and versioned tagging + """ + # Call tested function. + hltadore.docker_build_prod_image( + self.mock_ctx, + self.test_version, + base_image=self.test_base_image, + cache=False, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --tag test-registry.com/test-image:prod-1.0.0 \ + --file /app/devops/docker_build/prod.Dockerfile \ + --build-arg VERSION=1.0.0 \ + --build-arg ECR_BASE_PATH=test.ecr.path \ + --build-arg IMAGE_NAME=test-image \ + /app + docker tag test-registry.com/test-image:prod-1.0.0 test-registry.com/test-image:prod + docker image ls test-registry.com/test-image:prod + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_multi_arch_prod_image1(self) -> None: + """ + Test building with multiple architectures. + + This test checks: + - Multi-architecture production build + - Buildx setup for multi-platform builds + - Push to registry during build + - Production build arguments + - Multi-arch specific options + """ + # Call tested function. + hltadore.docker_build_multi_arch_prod_image( + self.mock_ctx, + self.test_version, + base_image=self.test_base_image, + cache=False, + multi_arch=self.test_multi_arch, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg VERSION=1.0.0 --build-arg ECR_BASE_PATH=test.ecr.path \ + --tag test-registry.com/test-image:prod-1.0.0 \ + --file devops/docker_build/prod.Dockerfile \ + - + docker pull test-registry.com/test-image:prod-1.0.0 + docker image ls test-registry.com/test-image:prod-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + # TODO(gp): Is the assertion too strict? + reason="Needs to run inside a super module", + ) + def test_candidate_tag1(self) -> None: + """ + Test building with candidate mode using tag. + + This test checks: + - Production build using candidate mode + - Custom tag specification + - Build arguments + - Non-default image tagging + """ + test_tag = "test_tag" + # Call tested function. + hltadore.docker_build_prod_image( + self.mock_ctx, + self.test_version, + base_image=self.test_base_image, + cache=False, + candidate=True, + tag=test_tag, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --tag test-registry.com/test-image:prod-test_tag \ + --file /app/devops/docker_build/prod.Dockerfile \ + --build-arg VERSION=1.0.0 \ + --build-arg ECR_BASE_PATH=test.ecr.path \ + --build-arg IMAGE_NAME=test-image \ + /app + docker image ls test-registry.com/test-image:prod-test_tag + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_candidate_user_tag1(self) -> None: + """ + Test building with candidate mode using user tag. + + This test checks: + - Production build using candidate mode + - Combined user and custom tag parameters + - Custom tag format (prod-user-tag) + - Build arguments + """ + test_user_tag = "test_user" + test_tag = "test_tag" + # Call tested function. + hltadore.docker_build_prod_image( + self.mock_ctx, + self.test_version, + base_image=self.test_base_image, + cache=False, + candidate=True, + user_tag=test_user_tag, + tag=test_tag, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --tag test-registry.com/test-image:prod-test_user-test_tag \ + --file /app/devops/docker_build/prod.Dockerfile \ + --build-arg VERSION=1.0.0 \ + --build-arg ECR_BASE_PATH=test.ecr.path \ + --build-arg IMAGE_NAME=test-image \ + /app + docker image ls test-registry.com/test-image:prod-test_user-test_tag + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_tag_push_multi_arch_prod_image1 +# ############################################################################# + + +class Test_docker_tag_push_multi_arch_prod_image1(_DockerFlowTestHelper): + """ + Test tagging and pushing a multi-architecture Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test pushing to AWS ECR. + + This test checks: + - Multi-arch image tagging + - AWS ECR target registry + - Production image versioning + """ + # Call tested function. + target_registry = "aws_ecr.ck" + hltadore.docker_tag_push_multi_arch_prod_image( + self.mock_ctx, + self.test_version, + target_registry=target_registry, + ) + expected = r""" + docker buildx imagetools create -t test.ecr.path/test-image:prod test.ecr.path/test-image:prod-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_dockerhub1(self) -> None: + """ + Test pushing to DockerHub from AWS ECR. + + This test checks: + - Multi-arch image tagging + - DockerHub registry (differs from AWS ECR test) + - Version and latest tagging + - Cross-registry image copying + """ + # Call tested function. + target_registry = "dockerhub.causify" + hltadore.docker_tag_push_multi_arch_prod_image( + self.mock_ctx, + self.test_version, + target_registry=target_registry, + ) + expected = r""" + docker buildx imagetools create -t causify/test-image:prod-1.0.0 test.ecr.path/test-image:prod-1.0.0 + docker buildx imagetools create -t causify/test-image:prod test.ecr.path/test-image:prod-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_tag_push_multi_build_local_image_as_dev1 +# ############################################################################# + + +class Test_docker_tag_push_multi_build_local_image_as_dev1( + _DockerFlowTestHelper +): + """ + Test tagging and pushing a multi-arch local Docker image as dev. + """ + + def test_aws_ecr1(self) -> None: + """ + Test pushing to AWS ECR. + + This test checks: + - Multi-arch image tagging + - AWS ECR target registry + - Dev image versioning + - Default and versioned tagging + """ + # Call tested function. + target_registry = "aws_ecr.ck" + hltadore.docker_tag_push_multi_build_local_image_as_dev( + self.mock_ctx, + self.test_version, + target_registry=target_registry, + ) + expected = r""" + docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_dockerhub1(self) -> None: + """ + Test pushing to DockerHub from AWS ECR. + + This test checks: + - Multi-arch image tagging + - DockerHub registry (differs from AWS ECR test) + - Version and latest tagging + - Cross-registry image copying + """ + # Call tested function. + target_registry = "dockerhub.causify" + hltadore.docker_tag_push_multi_build_local_image_as_dev( + self.mock_ctx, + self.test_version, + target_registry=target_registry, + ) + expected = r""" + docker buildx imagetools create -t causify/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t causify/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_release_dev_image1 +# ############################################################################# + + +class Test_docker_release_dev_image1(_DockerFlowTestHelper): + """ + Test releasing a dev Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test releasing the dev image to AWS ECR. + + This test checks: + - Build workflow + - No-cache build options + - Dev image versioning + - Default and versioned tagging + - Registry target selection + - Architecture support + - Tagging and versioning + """ + # Call tested function. + hltadore.docker_release_dev_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + push_to_repo=True, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 test.ecr.path/test-image:dev-1.0.0 + docker tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 test.ecr.path/test-image:dev + docker push test.ecr.path/test-image:dev-1.0.0 + docker push test.ecr.path/test-image:dev + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_release_prod_image1 +# ############################################################################# + + +class Test_docker_release_prod_image1(_DockerFlowTestHelper): + """ + Test releasing a prod Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test releasing the prod image to AWS ECR. + + This test checks: + - Build workflow + - No-cache build options + - Prod image versioning + - Default and versioned tagging + - Registry target selection + - Architecture support + - Tagging and versioning + """ + # Call tested function. + hltadore.docker_release_prod_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + push_to_repo=True, + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + DOCKER_BUILDKIT=0 \ + time \ + docker build \ + --no-cache \ + --tag test.ecr.path/test-image:prod-1.0.0 \ + --file /app/devops/docker_build/prod.Dockerfile \ + --build-arg VERSION=1.0.0 \ + --build-arg ECR_BASE_PATH=test.ecr.path \ + --build-arg IMAGE_NAME=test-image \ + /app + docker tag test.ecr.path/test-image:prod-1.0.0 test.ecr.path/test-image:prod + docker image ls test.ecr.path/test-image:prod + docker push test.ecr.path/test-image:prod-1.0.0 + docker push test.ecr.path/test-image:prod + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_release_multi_build_dev_image1 +# ############################################################################# + + +class Test_docker_release_multi_build_dev_image1(_DockerFlowTestHelper): + """ + Test releasing a multi-arch dev Docker image. + """ + + def test_single_registry1(self) -> None: + """ + Test releasing to a single registry. + + This test checks: + - Multi-arch build setup + - Build and push workflow + - Dev image tagging + - Test skipping options + - Single registry target + """ + # Call tested function. + hltadore.docker_release_multi_build_dev_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + target_registries="aws_ecr.ck", + ) + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + docker pull test.ecr.path/test-image:local-$USER_NAME-1.0.0 + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_multiple_registries1(self) -> None: + """ + Test releasing to multiple registries. + + This test checks: + - Multi-arch build workflow + - Multiple registry targets (AWS ECR and DockerHub) + - Parallel image tagging + - Image retagging for different registries + """ + # Call tested function. + hltadore.docker_release_multi_build_dev_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + target_registries="aws_ecr.ck,dockerhub.causify", + ) + expected = r""" + cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + docker pull test.ecr.path/test-image:local-$USER_NAME-1.0.0 + invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t causify/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 + docker buildx imagetools create -t causify/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_rollback_dev_image1 +# ############################################################################# + + +class Test_docker_rollback_dev_image1(_DockerFlowTestHelper): + """ + Test rolling back a dev Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test rolling back and pushing to AWS ECR. + + This test checks: + - Dev image rollback workflow + - Version-specific image pull + - Retagging as latest + - Repository pushing + """ + # Call tested function. + hltadore.docker_rollback_dev_image( + self.mock_ctx, + self.test_version, + push_to_repo=True, + ) + expected = r""" + docker pull test.ecr.path/test-image:dev-1.0.0 + docker tag test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:dev + docker push test.ecr.path/test-image:dev-1.0.0 + docker push test.ecr.path/test-image:dev + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_rollback_prod_image1 +# ############################################################################# + + +class Test_docker_rollback_prod_image1(_DockerFlowTestHelper): + """ + Test rolling back a prod Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test rolling back and pushing to AWS ECR. + + This test checks: + - Production image rollback workflow + - Version-specific image pull + - Retagging as latest production + - Repository pushing + """ + # Call tested function. + hltadore.docker_rollback_prod_image( + self.mock_ctx, + self.test_version, + push_to_repo=True, + ) + expected = r""" + docker pull test.ecr.path/test-image:prod-1.0.0 + docker tag test.ecr.path/test-image:prod-1.0.0 test.ecr.path/test-image:prod + docker push test.ecr.path/test-image:prod-1.0.0 + docker push test.ecr.path/test-image:prod + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_push_prod_candidate_image1 +# ############################################################################# + + +class Test_docker_push_prod_candidate_image1(_DockerFlowTestHelper): + """ + Test pushing a prod candidate Docker image. + """ + + def test_aws_ecr1(self) -> None: + """ + Test pushing to AWS ECR. + + This test checks: + - Candidate image pushing + - AWS ECR target registry + - Hash-based image tagging + """ + # Call tested function. + candidate = "4759b3685f903e6c669096e960b248ec31c63b69" + hltadore.docker_push_prod_candidate_image( + self.mock_ctx, + candidate=candidate, + ) + expected = r""" + docker push test.ecr.path/test-image:prod-4759b3685f903e6c669096e960b248ec31c63b69 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_release_multi_arch_prod_image1 +# ############################################################################# + + +class Test_docker_release_multi_arch_prod_image1(_DockerFlowTestHelper): + """ + Test releasing a multi-arch prod Docker image. + """ + + def test_multiple_registries1(self) -> None: + """ + Test releasing to AWS ECR and DockerHub. + + This test checks: + - Multi-arch build workflow + - AWS ECR and DockerHub target registries + - Test skipping options + - Image tagging and pushing + """ + # Call tested function. + hltadore.docker_release_multi_arch_prod_image( + self.mock_ctx, + self.test_version, + cache=False, + skip_tests=True, + fast_tests=False, + slow_tests=False, + superslow_tests=False, + qa_tests=False, + docker_registry=["aws_ecr.ck", "dockerhub.causify"], + ) + expected = r""" + cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore + docker buildx create \ + --name multiarch_builder \ + --driver docker-container \ + --bootstrap \ + && \ + docker buildx use multiarch_builder + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker buildx build \ + --no-cache \ + --push \ + --platform linux/amd64,linux/arm64 \ + --build-arg VERSION=1.0.0 --build-arg ECR_BASE_PATH=test.ecr.path \ + --tag test.ecr.path/test-image:prod-1.0.0 \ + --file devops/docker_build/prod.Dockerfile \ + - + docker pull test.ecr.path/test-image:prod-1.0.0 + docker image ls test.ecr.path/test-image:prod-1.0.0 + docker buildx imagetools create -t test.ecr.path/test-image:prod test.ecr.path/test-image:prod-1.0.0 + docker buildx imagetools create -t causify/test-image:prod-1.0.0 test.ecr.path/test-image:prod-1.0.0 + docker buildx imagetools create -t causify/test-image:prod test.ecr.path/test-image:prod-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_create_candidate_image1 +# ############################################################################# + + +class Test_docker_create_candidate_image1(_DockerFlowTestHelper): + """ + Test creating a candidate Docker image. + """ + + def set_up_test2(self) -> None: + """ + Set up test environment with additional mocks specific to this test + class. + """ + self.set_up_test() + # Mock git hash. + self.git_hash_patcher = umock.patch( + "helpers.hgit.get_head_hash", + return_value="4759b3685f903e6c669096e960b248ec31c63b69", + ) + self.mock_git_hash = self.git_hash_patcher.start() + self.patchers["git_hash"] = self.git_hash_patcher + # Mock workspace size check. + self.workspace_check_patcher = umock.patch( + "helpers.lib_tasks_docker_release._check_workspace_dir_sizes" + ) + self.mock_workspace_check = self.workspace_check_patcher.start() + self.patchers["workspace_check"] = self.workspace_check_patcher + # Mock file existence check to handle both paths. + self.file_exists_patcher = umock.patch( + "helpers.hdbg.dassert_file_exists" + ) + self.mock_file_exists = self.file_exists_patcher.start() + self.patchers["file_exists"] = self.file_exists_patcher + # Mock `docker_build_prod_image()`. + self.build_prod_patcher = umock.patch( + "helpers.lib_tasks_docker_release.docker_build_prod_image" + ) + self.mock_build_prod = self.build_prod_patcher.start() + self.patchers["build_prod"] = self.build_prod_patcher + # Mock `docker_push_prod_candidate_image()`. + self.push_prod_patcher = umock.patch( + "helpers.lib_tasks_docker_release.docker_push_prod_candidate_image" + ) + self.mock_push_prod = self.push_prod_patcher.start() + self.patchers["push_prod"] = self.push_prod_patcher + + def tear_down_test2(self) -> None: + """ + Clean up test environment. + """ + self.tear_down_test() + + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + """ + Set up and tear down test environment for each test. + """ + self.set_up_test2() + yield + self.tear_down_test2() + + def test_aws_ecr1(self) -> None: + """ + Test creating and pushing to AWS ECR. + + This test checks: + - Task definition update with correct parameters + - Proper command construction for aws_update_task_definition.py + """ + # Call tested function. + hltadore.docker_create_candidate_image( + self.mock_ctx, + user_tag="test_user", + ) + # Verify the mocks were called with correct parameters. + self.mock_build_prod.assert_called_once_with( + self.mock_ctx, + container_dir_name=".", + version=hlitadoc._IMAGE_VERSION_FROM_CHANGELOG, + candidate=True, + tag="test_user-4759b3685f903e6c669096e960b248ec31c63b69", + ) + self.mock_push_prod.assert_called_once_with( + self.mock_ctx, + "test_user-4759b3685f903e6c669096e960b248ec31c63b69", + ) + + +# ############################################################################# +# Test_docker_update_prod_task_definition1 +# ############################################################################# + + +class Test_docker_update_prod_task_definition1(_DockerFlowTestHelper): + """ + Test updating a prod task definition to the desired version. + """ + + @pytest.fixture(autouse=True) + def aws_credentials(self) -> None: + """ + Mocked AWS credentials for moto. + """ + os.environ["DOCKER_MOCK_AWS_ACCESS_KEY_ID"] = "testing" + os.environ["DOCKER_MOCK_AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["DOCKER_MOCK_AWS_SECURITY_TOKEN"] = "testing" + os.environ["DOCKER_MOCK_AWS_SESSION_TOKEN"] = "testing" + os.environ["DOCKER_MOCK_AWS_DEFAULT_REGION"] = "us-east-1" + + def set_up_test2(self) -> None: + """ + Set up test environment with additional mocks specific to this test + class. + """ + self.set_up_test() + # Mock AWS and S3 functionality. + self.aws_patcher = umock.patch( + "helpers.haws.get_task_definition_image_url" + ) + self.mock_aws = self.aws_patcher.start() + self.mock_aws.return_value = ( + "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69" + ) + self.patchers["aws"] = self.aws_patcher + self.s3_patcher = umock.patch("helpers.hs3.get_s3fs") + self.mock_s3 = self.s3_patcher.start() + self.mock_s3.return_value.cat.return_value = b"test_content" + self.patchers["s3"] = self.s3_patcher + # Mock file operations. + self.file_patcher = umock.patch( + "helpers.hs3.from_file", return_value="test_content" + ) + self.mock_file = self.file_patcher.start() + self.patchers["file"] = self.file_patcher + # Mock listdir to return test DAG files. + self.listdir_patcher = umock.patch( + "helpers.hs3.listdir", + return_value=["/app/im_v2/airflow/dags/test_dag.py"], + ) + self.mock_listdir = self.listdir_patcher.start() + self.patchers["listdir"] = self.listdir_patcher + + def tear_down_test2(self) -> None: + """ + Clean up test environment. + """ + # Clean up environment variables. + for key in [ + "DOCKER_MOCK_AWS_ACCESS_KEY_ID", + "DOCKER_MOCK_AWS_SECRET_ACCESS_KEY", + "DOCKER_MOCK_AWS_SECURITY_TOKEN", + "DOCKER_MOCK_AWS_SESSION_TOKEN", + "DOCKER_MOCK_AWS_DEFAULT_REGION", + ]: + if key in os.environ: + del os.environ[key] + # Call parent teardown. + self.tear_down_test() + + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + """ + Set up and tear down test environment for each test. + """ + self.set_up_test2() + yield + self.tear_down_test2() + + @moto.mock_aws + @umock.patch("helpers.haws.update_task_definition") + @umock.patch("helpers.haws.get_ecs_client") + def test_promotion_to_prod( + self, + mock_get_ecs_client: umock.Mock, + mock_update_task_definition: umock.Mock, + ) -> None: + """ + Test the promotion of a preprod Docker image and DAGs to production. + + This test checks: + - Task definition update workflow + - Preprod to prod image conversion. + - DAG file synchronization + - Image tagging and pushing + """ + # Mock AWS ECS client using moto and register a task definition. + region = "us-east-1" + mock_ecs_client = boto3.client("ecs", region_name=region) + mock_ecs_client.register_task_definition( + family="test_task", + containerDefinitions=[ + { + "name": "test-container", + "image": "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69", + } + ], + executionRoleArn="__mock__", + networkMode="bridge", + requiresCompatibilities=["EC2"], + cpu="256", + memory="512", + ) + mock_get_ecs_client.return_value = mock_ecs_client + # Add mock client to patchers for cleanup. + self.ecs_client_patcher = umock.patch( + "boto3.client", return_value=mock_ecs_client + ) + self.mock_ecs_client = self.ecs_client_patcher.start() + self.patchers["ecs_client_test1"] = self.ecs_client_patcher + # Call tested function. + hltadore.docker_update_prod_task_definition( + self.mock_ctx, + version=self.test_version, + preprod_tag="4759b3685f903e6c669096e960b248ec31c63b69", + airflow_dags_s3_path="s3://test-bucket/dags/", + task_definition="test_task", + ) + expected = r""" + docker pull test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 + docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod-1.0.0 + docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod + docker rmi test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 + docker push test.ecr.path/test-image:prod-1.0.0 + docker push test.ecr.path/test-image:prod + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + # Check whether `update_task_definition` was called with the expected arguments. + expected_image_url = "test.ecr.path/test-image:prod-1.0.0" + mock_update_task_definition.assert_called_once_with( + "test_task", expected_image_url, environment="prod" + ) + + @moto.mock_aws + @umock.patch("helpers.haws.get_ecs_client") + def test_promotion_to_prod_exception_handling( + self, mock_get_ecs_client: umock.Mock + ) -> None: + """ + Test exception handling and rollback behavior when updating prod task + definition. + + This test checks: + - Exception handling during task definition update + - Rollback of task definition to original image + - Rollback of S3 DAG files + - Proper error propagation + """ + # Mock AWS ECS client using moto and register a task definition. + region = "us-east-1" + mock_ecs_client = boto3.client("ecs", region_name=region) + mock_ecs_client.register_task_definition( + family="test_task", + containerDefinitions=[ + { + "name": "test-container", + "image": "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69", + } + ], + executionRoleArn="__mock__", + networkMode="bridge", + requiresCompatibilities=["EC2"], + cpu="256", + memory="512", + ) + mock_get_ecs_client.return_value = mock_ecs_client + # Add mock client to patchers for cleanup. + self.ecs_client_patcher = umock.patch( + "boto3.client", return_value=mock_ecs_client + ) + self.mock_ecs_client = self.ecs_client_patcher.start() + self.patchers["ecs_client_test2"] = self.ecs_client_patcher + # Mock S3 bucket operations to simulate a failure. + self.mock_s3.return_value.put.side_effect = Exception("S3 upload failed") + # Call tested function and verify exception is raised. + with self.assertRaises(Exception) as cm: + hltadore.docker_update_prod_task_definition( + self.mock_ctx, + version=self.test_version, + preprod_tag="4759b3685f903e6c669096e960b248ec31c63b69", + airflow_dags_s3_path="s3://test-bucket/dags/", + task_definition="test_task", + ) + # Check the error message. + self.assertIn("S3 upload failed", str(cm.exception)) + # Check whether rollback commands were executed. + expected = r""" + docker pull test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 + docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod-1.0.0 + docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod + docker rmi test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + # Check whether task definition was rolled back. + self.mock_aws.assert_called_with("test_task") + + +# ############################################################################# +# Test_docker_tag_push_dev_image1 +# ############################################################################# + + +class Test_docker_tag_push_dev_image1(_DockerFlowTestHelper): + """ + Test tagging and pushing dev image from a base registry to multiple registries. + """ + + def set_up_test2(self) -> None: + """ + Set up test environment with additional mocks for GHCR workflow. + """ + super().set_up_test() + # Mock version retrieval from changelog. + self.changelog_version_patcher = umock.patch( + "helpers.hversion.get_changelog_version" + ) + self.mock_changelog_version = self.changelog_version_patcher.start() + self.mock_changelog_version.return_value = self.test_version + # Mock repo config for GHCR registry URL and image name. + self.get_container_registry_url_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_container_registry_url" + ) + self.mock_get_container_registry_url = ( + self.get_container_registry_url_patcher.start() + ) + # Use side_effect to return different values based on registry. + self.mock_get_container_registry_url.side_effect = lambda registry: { + "ghcr": "ghcr.io/causify-ai", + "ecr": "test.ecr.path", + }.get(registry, "ghcr.io/causify-ai") + # Add new patchers to cleanup list. + self.patchers.update( + { + "changelog_version": self.changelog_version_patcher, + "container_registry_url": self.get_container_registry_url_patcher, + } + ) + + def tear_down_test2(self) -> None: + """ + Clean up test environment. + """ + self.tear_down_test() + + @pytest.fixture(autouse=True) + def setup_teardown_test(self) -> Generator: + """ + Set up and tear down test environment for each test. + """ + self.set_up_test2() + yield + self.tear_down_test2() + + def test_normal_execution1(self) -> None: + """ + Test normal execution without dry_run. + + This test checks: + - GHCR image pulling + - Tagging for GHCR and AWS ECR + - Pushing to both registries + - Versioned and latest image handling + """ + # Call tested function. + hltadore.docker_tag_push_dev_image( + self.mock_ctx, + target_registries="ghcr,ecr", + container_dir_name=".", + dry_run=False, + ) + # Verify expected Docker commands were executed. + expected = r""" + docker pull ghcr.io/causify-ai/test-image:dev-1.0.0 + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev + docker push ghcr.io/causify-ai/test-image:dev + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev-1.0.0 + docker push ghcr.io/causify-ai/test-image:dev-1.0.0 + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev + docker push test.ecr.path/test-image:dev + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev-1.0.0 + docker push test.ecr.path/test-image:dev-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_dry_run1(self) -> None: + """ + Test dry_run mode execution. + + This test checks: + - No actual Docker commands are executed when dry_run=True + - All operations are simulated + - Function completes without errors + - Mock calls should include dry_run parameter + """ + # Call tested function with dry_run enabled. + hltadore.docker_tag_push_dev_image( + self.mock_ctx, + target_registries="ghcr,ecr", + container_dir_name=".", + dry_run=True, + ) + # Verify expected Docker commands were executed. + expected = r""" + docker pull ghcr.io/causify-ai/test-image:dev-1.0.0 + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev + docker push ghcr.io/causify-ai/test-image:dev + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev-1.0.0 + docker push ghcr.io/causify-ai/test-image:dev-1.0.0 + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev + docker push test.ecr.path/test-image:dev + docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev-1.0.0 + docker push test.ecr.path/test-image:dev-1.0.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + +# ############################################################################# +# Test_docker_build_test_dev_image1 +# ############################################################################# + + +class Test_docker_build_test_dev_image1(_DockerFlowTestHelper): + """ + Test the complete periodic dev image release workflow. + """ + + def set_up_test(self) -> None: + """ + Set up test environment with additional mocks for the dev image + workflow. + """ + super().set_up_test() + # Mock version operations. + self.get_changelog_version_patcher = umock.patch( + "helpers.hversion.get_changelog_version" + ) + self.mock_get_changelog_version = ( + self.get_changelog_version_patcher.start() + ) + self.mock_get_changelog_version.return_value = "2.3.0" + self.bump_version_patcher = umock.patch("helpers.hversion.bump_version") + self.mock_bump_version = self.bump_version_patcher.start() + self.mock_bump_version.return_value = "2.4.0" + # Mock repo config methods. + self.get_release_team_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_release_team" + ) + self.mock_get_release_team = self.get_release_team_patcher.start() + self.mock_get_release_team.return_value = "dev_system" + self.get_issue_prefix_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_issue_prefix" + ) + self.mock_get_issue_prefix = self.get_issue_prefix_patcher.start() + self.mock_get_issue_prefix.return_value = "TestTask" + self.get_container_registry_url_patcher = umock.patch( + "helpers.repo_config_utils.RepoConfig.get_container_registry_url" + ) + self.mock_get_container_registry_url = ( + self.get_container_registry_url_patcher.start() + ) + self.mock_get_container_registry_url.return_value = "ghcr.io/causify-ai" + # Mock GitHub operations. + self.gh_get_team_member_names_patcher = umock.patch( + "helpers.lib_tasks_gh.gh_get_team_member_names" + ) + self.mock_gh_get_team_member_names = ( + self.gh_get_team_member_names_patcher.start() + ) + self.mock_gh_get_team_member_names.return_value = ["user1", "user2"] + self.gh_create_pr_patcher = umock.patch( + "helpers.lib_tasks_gh.gh_create_pr" + ) + self.mock_gh_create_pr = self.gh_create_pr_patcher.start() + # Mock file operations. + self.get_client_root_patcher = umock.patch( + "helpers.hversion._get_client_root" + ) + self.mock_get_client_root = self.get_client_root_patcher.start() + self.mock_get_client_root.return_value = "/test/root" + self.from_file_patcher = umock.patch("helpers.hio.from_file") + self.mock_from_file = self.from_file_patcher.start() + self.mock_from_file.return_value = "# Existing changelog content\n" + self.to_file_patcher = umock.patch("helpers.hio.to_file") + self.mock_to_file = self.to_file_patcher.start() + # Mock file existence check for dassert_file_exists (changelog validation). + self.file_exists_patcher = umock.patch( + "helpers.hdbg.dassert_file_exists" + ) + self.mock_file_exists = self.file_exists_patcher.start() + # Mock os.path.exists selectively for file staging logic. + # Store the original function before patching + original_exists = os.path.exists + # Define which files should exist for staging + staged_files = { + "/test/root/./devops/docker_build/poetry.lock", + "/test/root/./devops/docker_build/pip_list.txt", + "/test/root/./changelog.txt", + } + + def selective_exists(path): + # Return True for staged files, use original function for everything else + if path in staged_files: + return True + return original_exists(path) + + self.path_exists_patcher = umock.patch( + "os.path.exists", side_effect=selective_exists + ) + self.mock_path_exists = self.path_exists_patcher.start() + # Mock date operations. + self.date_patcher = umock.patch("datetime.date") + self.mock_date = self.date_patcher.start() + # Set up strftime to return different formats based on the format string. + # Branch name uses %Y%m%d, changelog uses %Y-%m-%d + self.mock_date.today.return_value.strftime.side_effect = lambda fmt: { + "%Y%m%d": "20251023", + "%Y-%m-%d": "2025-10-23", + }.get(fmt, "2025-10-23") + # Mock Docker image operations. + self.get_image_patcher = umock.patch( + "helpers.lib_tasks_docker.get_image" + ) + self.mock_get_image = self.get_image_patcher.start() + self.mock_get_image.return_value = ( + "test.ecr.path/test-image:local-testuser-2.4.0" + ) + # Mock _run_tests to prevent actual test execution. + self.run_tests_patcher = umock.patch( + "helpers.lib_tasks_docker_release._run_tests" + ) + self.mock_run_tests = self.run_tests_patcher.start() + # Mock is_inside_ci to control CI-specific behavior. + self.is_inside_ci_patcher = umock.patch("helpers.hserver.is_inside_ci") + self.mock_is_inside_ci = self.is_inside_ci_patcher.start() + # Default to True to simulate CI environment. + self.mock_is_inside_ci.return_value = True + # Add all new patchers to cleanup list. + self.patchers.update( + { + "get_changelog_version": self.get_changelog_version_patcher, + "bump_version": self.bump_version_patcher, + "get_release_team": self.get_release_team_patcher, + "get_issue_prefix": self.get_issue_prefix_patcher, + "container_registry_url": self.get_container_registry_url_patcher, + "gh_get_team_member_names": self.gh_get_team_member_names_patcher, + "gh_create_pr": self.gh_create_pr_patcher, + "get_client_root": self.get_client_root_patcher, + "from_file": self.from_file_patcher, + "to_file": self.to_file_patcher, + "file_exists": self.file_exists_patcher, + "path_exists": self.path_exists_patcher, + "date": self.date_patcher, + "get_image": self.get_image_patcher, + "run_tests": self.run_tests_patcher, + "is_inside_ci": self.is_inside_ci_patcher, + } + ) + + def test_complete_workflow1(self) -> None: + """ + Test the complete periodic dev image release workflow. + """ + # Call the tested function. + hltadore.docker_build_test_dev_image( + self.mock_ctx, + reviewers="", # Empty to trigger team lookup + container_dir_name=".", + ) + # Verify version operations were called. + self.mock_bump_version.assert_called_once_with( + "2.3.0", bump_type="minor" + ) + # Verify GitHub team lookup was performed. + self.mock_get_release_team.assert_called_once() + self.mock_gh_get_team_member_names.assert_called_once_with("dev_system") + # Verify issue prefix was fetched for branch creation. + self.mock_get_issue_prefix.assert_called() + # Verify PR was created with team members as reviewers. + self.mock_gh_create_pr.assert_called_once() + pr_call_args = self.mock_gh_create_pr.call_args + self.assertIn("reviewer", pr_call_args.kwargs) + self.assertEqual(pr_call_args.kwargs["reviewer"], "user1,user2") + # Verify expected Docker and Git commands were executed. + expected = r""" + git checkout -b TestTask_Periodic_image_release_20251023 + cp -f devops/docker_build/dockerignore.dev /app/.dockerignore + tar -czh . | DOCKER_BUILDKIT=0 \ + time \ + docker build \ + \ + --build-arg AM_CONTAINER_VERSION=2.4.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ + --tag test.ecr.path/test-image:local-testuser-2.4.0 \ + --file devops/docker_build/dev.Dockerfile \ + - + invoke docker_cmd --stage local --version 2.4.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull + cp -f poetry.lock.out ./devops/docker_build/poetry.lock + cp -f pip_list.txt ./devops/docker_build/pip_list.txt + docker image ls test.ecr.path/test-image:local-testuser-2.4.0 + sudo chmod -R 777 .git/objects/ + git add /test/root/./devops/docker_build/poetry.lock + git add /test/root/./devops/docker_build/pip_list.txt + git add /test/root/./changelog.txt + git commit -m "Poetry output from the v2.4.0 build" --no-verify + git push origin TestTask_Periodic_image_release_20251023 + docker tag test.ecr.path/test-image:local-testuser-2.4.0 ghcr.io/causify-ai/test-image:dev-2.4.0 + docker push ghcr.io/causify-ai/test-image:dev-2.4.0 + """ + self._check_docker_command_output(expected, self.mock_run.call_args_list) + + def test_with_existing_reviewers1(self) -> None: + """ + Test the workflow when reviewers is already provided. + """ + # Call the tested function with a specific reviewer. + hltadore.docker_build_test_dev_image( + self.mock_ctx, + reviewers="specific_user", + container_dir_name=".", + ) + # Verify PR was created with the provided reviewer. + self.mock_gh_create_pr.assert_called_once() + pr_call_args = self.mock_gh_create_pr.call_args + self.assertIn("reviewer", pr_call_args.kwargs) + self.assertEqual(pr_call_args.kwargs["reviewer"], "specific_user") + # Verify team lookup was NOT performed since reviewers was provided. + self.mock_gh_get_team_member_names.assert_not_called() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py new file mode 100644 index 000000000..886e1dc36 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py @@ -0,0 +1,267 @@ +import logging +import os + +import pytest + +import helpers.hgit as hgit +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.hunit_test_purification as huntepur +import helpers.lib_tasks_find as hlitafin +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_find_short_import1 +# ############################################################################# + + +class Test_find_short_import1(hunitest.TestCase): + def test1(self) -> None: + iterator = [ + ("file1.py", 10, "import dataflow.core.dag_runner as dtfcodarun"), + ("file1.py", 11, "import helpers.hpandas as hpandas"), + ] + results = hlitafin._find_short_import(iterator, "dtfcodarun") + actual = "\n".join(map(str, results)) + # pylint: disable=line-too-long + expected = r"""('file1.py', 10, 'import dataflow.core.dag_runner as dtfcodarun', 'dtfcodarun', 'import dataflow.core.dag_runner as dtfcodarun')""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# Test_find_func_class_uses1 +# ############################################################################# + + +class Test_find_func_class_uses1(hunitest.TestCase): + def test1(self) -> None: + iterator = [ + ( + "file1.py", + 10, + "dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)", + ), + ( + "file1.py", + 11, + "This test is similar to `TestRealTimeDagRunner1`. It uses:", + ), + ("file1.py", 12, "dag_builder: dtfcodabui.DagRunner,"), + ("file1.py", 13, ":param dag_builder: `DagRunner` instance"), + ] + results = hlitafin._find_func_class_uses(iterator, "DagRunner") + actual = "\n".join(map(str, results)) + expected = r""" + ('file1.py', 10, 'dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)', 'dtfamsys', 'RealTimeDagRunner') + ('file1.py', 12, 'dag_builder: dtfcodabui.DagRunner,', 'dtfcodabui', 'DagRunner')""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# TestLibTasksRunTests1 +# ############################################################################# + + +class TestLibTasksRunTests1(hunitest.TestCase): + """ + Test `_find_test_files()`, `_find_test_decorator()`. + """ + + def test_find_test_files1(self) -> None: + """ + Find all the test files in the current dir. + """ + files = hlitafin._find_test_files() + # For sure there are more than 1 test files: at least this one. + self.assertGreater(len(files), 1) + + def test_find_test_files2(self) -> None: + """ + Find all the test files from the top of the super module root. + """ + git_root = hgit.get_client_root(super_module=True) + files = hlitafin._find_test_files(git_root) + # For sure there are more than 1 test files: at least this one. + self.assertGreater(len(files), 1) + + def test_find_test_class1(self) -> None: + """ + Find the current test class. + """ + git_root = hgit.get_client_root(super_module=True) + file_names = hlitafin._find_test_files(git_root) + # + file_names = hlitafin._find_test_class( + "TestLibTasksRunTests1", file_names + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(file_names) + expected = ["helpers/test/test_lib_tasks_find.py::TestLibTasksRunTests1"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test_find_test_class2(self) -> None: + """ + Find the current test class. + """ + file_names = [__file__] + # + file_names = hlitafin._find_test_class( + "TestLibTasksRunTests1", file_names + ) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(file_names) + expected = ["helpers/test/test_lib_tasks_find.py::TestLibTasksRunTests1"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test_find_test_class3(self) -> None: + """ + Create synthetic code and look for a class. + """ + scratch_space = self.get_scratch_space() + dir_name = os.path.join(scratch_space, "test") + file_dict = { + "test_this.py": hprint.dedent( + """ + foo + + class TestHelloWorld(hunitest.TestCase): + bar + """ + ), + "test_that.py": hprint.dedent( + """ + foo + baz + + class TestHello_World(hunitest.): + bar + """ + ), + } + incremental = True + hunitest.create_test_dir(dir_name, incremental, file_dict) + # + file_names = hlitafin._find_test_files(dir_name) + act_file_names = [os.path.relpath(d, scratch_space) for d in file_names] + exp_file_names = ["test/test_that.py", "test/test_this.py"] + self.assert_equal(str(act_file_names), str(exp_file_names)) + # + actual = hlitafin._find_test_class("TestHelloWorld", file_names) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(actual) + expected = [ + "helpers/test/outcomes/TestLibTasksRunTests1.test_find_test_class3/tmp.scratch/" + "test/test_this.py::TestHelloWorld" + ] + self.assert_equal(str(actual), str(expected), purify_text=True) + + def test_find_test_decorator1(self) -> None: + """ + Find test functions in the "no_container" in synthetic code. + """ + scratch_space = self.get_scratch_space() + dir_name = os.path.join(scratch_space, "test") + file_dict = { + "test_this.py": hprint.dedent( + """ + foo + + class TestHelloWorld(hunitest.TestCase): + bar + """ + ), + "test_that.py": hprint.dedent( + """ + foo + baz + + @pytest.mark.no_container + class TestHello_World(hunitest.): + bar + """ + ), + } + incremental = True + hunitest.create_test_dir(dir_name, incremental, file_dict) + # + file_names = hlitafin._find_test_files(dir_name) + actual = hlitafin._find_test_decorator("no_container", file_names) + text_purifier = huntepur.TextPurifier() + actual = text_purifier.purify_file_names(actual) + expected = [ + "helpers/test/outcomes/TestLibTasksRunTests1.test_find_test_decorator1/" + "tmp.scratch/test/test_that.py" + ] + self.assert_equal(str(actual), str(expected), purify_text=True) + + # TODO(gp): This test can run in amp. + @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") + def test_find_test_decorator2(self) -> None: + """ + Find test functions in the "no_container" test list. + """ + file_name = hgit.find_file_in_git_tree("hunit_test.py") + file_names = [file_name] + actual = hlitafin._find_test_decorator("qa", file_names) + expected = ["$GIT_ROOT/helpers/hunit_test.py"] + self.assert_equal(str(actual), str(expected), purify_text=True) + + +# ############################################################################# +# Test_find_check_string_output1 +# ############################################################################# + + +class Test_find_check_string_output1(hunitest.TestCase): + def helper(self, expected: str, fuzzy_match: bool) -> None: + # Look for the `check_string()` corresponding to this test. + ctx = httestlib._build_mock_context_returning_ok() + class_name = self.__class__.__name__ + method_name = self._testMethodName + as_python = True + # We don't want to copy but just print. + pbcopy = False + actual = hlitafin.find_check_string_output( + ctx, class_name, method_name, as_python, fuzzy_match, pbcopy + ) + # Check that it matches exactly. + self.assert_equal(actual, expected, fuzzy_match=False) + + def test1(self) -> None: + """ + Test `find_check_string_output()` by searching the `check_string` of + this test. + """ + # Force to generate a `check_string` file so we can search for it. + actual = "A fake check_string output to use for test1" + self.check_string(actual) + # Check. + expected = ''' + actual = + expected = r""" + A fake check_string output to use for test1 + """.lstrip().rstrip() + self.assert_equal(actual, expected, fuzzy_match=False) + ''' + self.helper(expected, fuzzy_match=False) + + def test2(self) -> None: + """ + Like test1 but using `fuzzy_match=True`. + """ + # Force to generate a `check_string` file so we can search for it. + actual = "A fake check_string output to use for test2" + self.check_string(actual) + # Check. + expected = ''' + actual = + expected = r""" +A fake check_string output to use for test2 + + """.lstrip().rstrip() + self.assert_equal(actual, expected, fuzzy_match=True) + ''' + self.helper(expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py new file mode 100644 index 000000000..a5ee64c9e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py @@ -0,0 +1,133 @@ +import logging +import unittest.mock as umock + +import pytest + +import helpers.hgit as hgit +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.lib_tasks_gh as hlitagh + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +# ############################################################################# +# TestLibTasks1 +# ############################################################################# + + +class TestLibTasks1(hunitest.TestCase): + """ + Test some auxiliary functions, e.g., `_get_gh_issue_title()`. + """ + + @pytest.mark.skip("CmTask #2362.") + def test_get_gh_issue_title1(self) -> None: + issue_id = 1 + repo = "amp" + actual = hlitagh._get_gh_issue_title(issue_id, repo) + expected = ( + "AmpTask1_Bridge_Python_and_R", + "https://github.com/alphamatic/amp/issues/1", + ) + self.assert_equal(str(actual), str(expected)) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="""Skip unless helpers is the supermodule. Fails when updating submodules; + passes in fast tests super-repo run. See CmTask10845.""", + ) + def test_get_gh_issue_title4(self) -> None: + cmd = "invoke gh_login" + hsystem.system(cmd) + # + issue_id = 1 + repo = "current" + _ = hlitagh._get_gh_issue_title(issue_id, repo) + + def test_get_org_name1(self) -> None: + """ + Test _get_org_name when org_name is provided. + """ + org_name = "test-org" + result = hlitagh._get_org_name(org_name) + expected = "test-org" + self.assertEqual(result, expected) + + @umock.patch.object(hgit, "get_repo_full_name_from_dirname") + def test_get_org_name2(self, mock_get_repo: umock.Mock) -> None: + """ + Test _get_org_name when org_name is empty (infers from repo). + """ + mock_get_repo.return_value = "causify-ai/helpers" + result = hlitagh._get_org_name("") + expected = "causify-ai" + self.assertEqual(result, expected) + mock_get_repo.assert_called_once_with(".", include_host_name=False) + + +# ############################################################################# +# TestGhOrgTeamFunctions +# ############################################################################# + + +class TestGhOrgTeamFunctions(hunitest.TestCase): + """ + Test gh_get_org_team_names and gh_get_team_member_names with mocked data. + """ + + @umock.patch.object(hlitagh, "_gh_run_and_get_json") + @umock.patch.object(hlitagh, "_get_org_name") + def test_gh_get_org_team_names1( + self, mock_get_org_name: umock.Mock, mock_gh_run: umock.Mock + ) -> None: + """ + Test gh_get_org_team_names with sorted team names. + """ + # Setup mocks. + mock_get_org_name.return_value = "test-org" + mock_gh_run.return_value = [ + {"slug": "dev_backend", "id": 1}, + {"slug": "dev_frontend", "id": 2}, + {"slug": "qa_team", "id": 3}, + ] + # Call function. + result = hlitagh.gh_get_org_team_names("test-org", sort=True) + # Verify result. + expected = ["dev_backend", "dev_frontend", "qa_team"] + self.assertEqual(result, expected) + # Verify mocks were called correctly. + mock_get_org_name.assert_called_once_with("test-org") + mock_gh_run.assert_called_once_with( + "gh api /orgs/test-org/teams --paginate" + ) + + @umock.patch.object(hlitagh, "_gh_run_and_get_json") + @umock.patch.object(hlitagh, "_get_org_name") + def test_gh_get_team_member_names1( + self, mock_get_org_name: umock.Mock, mock_gh_run: umock.Mock + ) -> None: + """ + Test gh_get_team_member_names with member list. + """ + # Setup mocks. + mock_get_org_name.return_value = "test-org" + mock_gh_run.return_value = [ + {"login": "user1", "id": 101}, + {"login": "user2", "id": 102}, + {"login": "user3", "id": 103}, + ] + # Call function. + result = hlitagh.gh_get_team_member_names( + "dev_team", org_name="test-org" + ) + # Verify result. + expected = ["user1", "user2", "user3"] + self.assertEqual(result, expected) + # Verify mocks were called correctly. + mock_get_org_name.assert_called_once_with("test-org") + mock_gh_run.assert_called_once_with( + "gh api /orgs/test-org/teams/dev_team/members --paginate" + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py new file mode 100644 index 000000000..e60ea8f36 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py @@ -0,0 +1,267 @@ +from typing import List + +import pytest + +import helpers.hgit as hgit +import helpers.hunit_test as hunitest +import helpers.lib_tasks_git as hlitagit +import helpers.test.test_lib_tasks as httestlib + +# pylint: disable=protected-access + + +# ############################################################################# +# TestLibTasksGitCreatePatch1 +# ############################################################################# + + +@pytest.mark.slow(reason="Around 7s") +@pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", +) +class TestLibTasksGitCreatePatch1(hunitest.TestCase): + """ + Test `git_patch_create()`. + """ + + @staticmethod + def helper( + modified: bool, branch: bool, last_commit: bool, files: str + ) -> None: + ctx = httestlib._build_mock_context_returning_ok() + # + mode = "tar" + hlitagit.git_patch_create( + ctx, mode, modified, branch, last_commit, files + ) + # + mode = "diff" + hlitagit.git_patch_create( + ctx, mode, modified, branch, last_commit, files + ) + + def test1(self) -> None: + """ + Test modified files mode. + """ + hgit.fetch_origin_master_if_needed() + # Prepare inputs. + modified = True + branch = False + last_commit = False + files = "" + # Run test. + self.helper(modified, branch, last_commit, files) + + def test2(self) -> None: + """ + Test branch mode. + """ + # Prepare inputs. + modified = False + branch = True + last_commit = False + files = "" + # Run test. + self.helper(modified, branch, last_commit, files) + + def test3(self) -> None: + """ + Test last commit mode. + """ + hgit.fetch_origin_master_if_needed() + # Prepare inputs. + modified = False + branch = False + last_commit = True + files = "" + # Run test. + self.helper(modified, branch, last_commit, files) + + def test4(self) -> None: + """ + Test with specific files. + """ + hgit.fetch_origin_master_if_needed() + # Prepare inputs. + modified = True + branch = False + last_commit = False + files = __file__ + # Run test. + self.helper(modified, branch, last_commit, files) + + def test5(self) -> None: + """ + Test with all flags False raises AssertionError. + """ + hgit.fetch_origin_master_if_needed() + # Prepare inputs. + ctx = httestlib._build_mock_context_returning_ok() + mode = "diff" + modified = False + branch = False + last_commit = False + files = __file__ + # Run test and check output. + with self.assertRaises(AssertionError) as cm: + hlitagit.git_patch_create( + ctx, mode, modified, branch, last_commit, files + ) + actual = str(cm.exception) + expected = """ +* Failed assertion * +'0' +== +'1' +Specify only one among --modified, --branch, --last-commit +""" + self.assert_equal(actual, expected, fuzzy_match=True) + + +# ############################################################################# +# TestFilterGitFilesByType +# ############################################################################# + + +class TestFilterGitFilesByType(hunitest.TestCase): + """ + Test _filter_git_files_by_type() function. + """ + + def helper( + self, + files: List[str], + keep_python: bool, + keep_jupyter: bool, + keep_markdown: bool, + expected: List[str], + ) -> None: + """ + Test helper for _filter_git_files_by_type. + + :param files: List of files to filter + :param keep_python: include Python files + :param keep_jupyter: include Jupyter notebooks + :param keep_markdown: include Markdown files + :param expected: Expected filtered result + """ + # Run test. + result = hlitagit._filter_git_files_by_type( + files, keep_python, keep_jupyter, keep_markdown + ) + # Check outputs. + self.assertEqual(result, expected) + + def test1(self) -> None: + """ + Test filtering to include only Python files. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + keep_python = True + keep_jupyter = False + keep_markdown = False + # Prepare outputs. + expected = ["foo.py"] + # Run test. + self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) + + def test2(self) -> None: + """ + Test filtering to include only Jupyter notebooks. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + keep_python = False + keep_jupyter = True + keep_markdown = False + # Prepare outputs. + expected = ["bar.ipynb"] + # Run test. + self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) + + def test3(self) -> None: + """ + Test filtering to include only Markdown files. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + keep_python = False + keep_jupyter = False + keep_markdown = True + # Prepare outputs. + expected = ["baz.md"] + # Run test. + self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) + + def test4(self) -> None: + """ + Test filtering with multiple file types. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md", "qux.txt"] + keep_python = True + keep_jupyter = False + keep_markdown = True + # Prepare outputs. + expected = ["foo.py", "baz.md"] + # Run test. + self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) + + def test5(self) -> None: + """ + Test filtering with all file types. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + keep_python = True + keep_jupyter = True + keep_markdown = True + # Prepare outputs. + expected = files + # Run test. + self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) + + def test6(self) -> None: + """ + Test filtering with empty file list. + """ + # Prepare inputs. + files: List[str] = [] + keep_python = True + keep_jupyter = True + keep_markdown = False + # Prepare outputs. + expected: List[str] = [] + # Run test. + self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) + + def test7(self) -> None: + """ + Test filtering when no files match. + """ + # Prepare inputs. + files = ["foo.py", "bar.ipynb", "baz.md"] + keep_python = False + keep_jupyter = False + keep_markdown = False + # Prepare outputs. + expected: List[str] = [] + # Run test. + self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) + + def test8(self) -> None: + """ + Test that filtering preserves file order. + """ + # Prepare inputs. + files = ["c.py", "a.ipynb", "b.md", "d.py"] + keep_python = True + keep_jupyter = False + keep_markdown = True + # Prepare outputs. + expected = ["c.py", "b.md", "d.py"] + # Run test. + self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py new file mode 100644 index 000000000..47a41e0d8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py @@ -0,0 +1,27 @@ +import helpers.hunit_test as hunitest +import helpers.lib_tasks_integrate as hlitaint + + +# ############################################################################# +# Test_infer_dst_dir1 +# ############################################################################# + + +class Test_infer_dst_dir1(hunitest.TestCase): + def test1(self) -> None: + # Define input variables. + src_dir = "/src/cmamp1/oms/broker/broker.py" + # Call function to test. + actual = hlitaint._infer_dst_file_path( + src_dir, + default_src_dir_basename="cmamp1", + default_dst_dir_basename="amp1", + check_exists=False, + ) + # Define expected output. + expected = ( + "/src/amp1/oms/broker/broker.py", + "oms/broker/broker.py", + ) + # Compare actual and expected output. + self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py new file mode 100644 index 000000000..cb40f72a5 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py @@ -0,0 +1,32 @@ +import logging + +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.lib_tasks_lint as hlitalin +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_lint_check_if_it_was_run +# ############################################################################# + + +class Test_lint_check_if_it_was_run(hunitest.TestCase): + """ + Test `lint_check_if_it_was_run()`. + """ + + def test1(self) -> None: + # Build a mock context. + ctx = httestlib._build_mock_context_returning_ok() + # Stash the leftover changes from the previous tests. + cmd = "git stash --include-untracked" + hsystem.system(cmd) + # Simple check that the function does not fail. + _ = hlitalin.lint_check_if_it_was_run(ctx) + # Pop the stashed changes to restore the original state. + cmd = "git stash pop" + # Do not abort on error because the stash may be empty. + hsystem.system(cmd, abort_on_error=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py new file mode 100644 index 000000000..321f7f515 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py @@ -0,0 +1,1163 @@ +import logging +import os +import re +import unittest.mock as umock +from typing import List + +import pytest + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest +import helpers.lib_tasks_pytest as hlitapyt +import helpers.test.test_lib_tasks as httestlib + +_LOG = logging.getLogger(__name__) + +# pylint: disable=protected-access + + +def _remove_junit_suite_name(text: str) -> str: + """ + Remove the junit suite name from the input text. + - E.g. '-o junit_suite_name="helpers"' -> '-o junit_suite_name=""' + + :param text: input text to process + :return: text with the junit suite name removed + """ + txt = re.sub(r'(-o\s*junit_suite_name=)"[^"]*"', r'\1""', text) + return txt + + +def _purify_pytest_command(text: str) -> str: + """ + Purify the pytest command by removing environment-specific values. + + :param text: input text to process + :return: text with environment-specific values removed + """ + txt = _remove_junit_suite_name(text) + return txt + + +# ############################################################################# +# Test_build_run_command_line1 +# ############################################################################# + + +class Test_build_run_command_line1(hunitest.TestCase): + def run_fast_tests1_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Basic run fast tests. + + :param is_dev_csfy_return_value: mocking the return_value of + `hserver.is_dev_csfy()` + :param is_inside_ci_return_value: mocking the return_value of + `hserver.is_inside_ci()` + :param expected: expected output string + """ + custom_marker = "" + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = False + n_threads = "1" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests1_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests1_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests1_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_inside_ci_return_value = False + is_dev_csfy_return_value = False + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def run_fast_tests2_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Coverage and collect-only. + + See `run_fast_tests1_helper()` for params description. + """ + custom_marker = "" + pytest_opts = "" + skip_submodules = False + coverage = True + collect_only = True + tee_to_file = False + n_threads = "1" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests2_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + r'pytest -m "not slow and not superslow" . ' + r"-o timeout_func_only=true --timeout 5 --reruns 2 " + r'--only-rerun "Failed: Timeout" --cov=.' + r" --cov-branch --cov-report term-missing --cov-report html " + r"--collect-only -n 1 " + r"--junit-xml=tmp.junit.xml " + r'-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests2_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests2_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests2_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + r'pytest -m "not slow and not superslow" . ' + r"-o timeout_func_only=true --timeout 50 --reruns 2 " + r'--only-rerun "Failed: Timeout" --cov=.' + r" --cov-branch --cov-report term-missing --cov-report html " + r"--collect-only -n 1 " + r"--junit-xml=tmp.junit.xml " + r'-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + self.run_fast_tests2_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + @pytest.mark.skip(reason="Fix support for pytest_mark") + @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") + def test_run_fast_tests4(self) -> None: + """ + Select pytest_mark. + """ + scratch_space = self.get_scratch_space(use_absolute_path=False) + dir_name = os.path.join(scratch_space, "test") + file_dict = { + "test_this.py": hprint.dedent( + """ + foo + + class TestHelloWorld(hunitest.TestCase): + bar + """ + ), + "test_that.py": hprint.dedent( + """ + foo + baz + + @pytest.mark.no_container + class TestHello_World(hunitest.): + bar + """ + ), + } + incremental = True + hunitest.create_test_dir(dir_name, incremental, file_dict) + # + test_list_name = "fast_tests" + custom_marker = "" + pytest_opts = "" + skip_submodules = True + coverage = False + collect_only = False + tee_to_file = False + n_threads = "1" + # + actual = hlitapyt._build_run_command_line( + test_list_name, + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + expected = ( + "pytest Test_build_run_command_line1.test_run_fast_tests4/tmp.scratch/" + "test/test_that.py" + ) + self.assert_equal(actual, expected) + + def run_fast_tests5_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Basic run fast tests tee-ing to a file. Mock depending on + `is_dev_csfy_return_value`. + + See `run_fast_tests1_helper()` for params description. + """ + custom_marker = "" + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = True + n_threads = "1" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests5_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + " 2>&1" + " | tee tmp.pytest.fast_tests.log" + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests5_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests5_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests5_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + " 2>&1" + " | tee tmp.pytest.fast_tests.log" + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + self.run_fast_tests5_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def run_fast_tests6_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Run fast tests with a custom test marker. + + See `run_fast_tests1_helper()` for params description. + """ + custom_marker = "optimizer" + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = False + n_threads = "1" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests6_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + 'pytest -m "optimizer and not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests6_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests6_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests6_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + 'pytest -m "optimizer and not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + self.run_fast_tests6_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def run_fast_tests7_helper( + self, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Run fast tests with parallelization. + + See `run_fast_tests1_helper()` for params description. + """ + custom_marker = "" + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = False + n_threads = "auto" + # + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_run_fast_tests7_inside_ck_infra(self) -> None: + """ + Mock test for running fast tests inside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n auto ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + self.run_fast_tests7_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests7_inside_ci(self) -> None: + """ + Mock test for running fast tests inside CI flow only. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = True + self.run_fast_tests1_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def test_run_fast_tests7_outside_ck_infra(self) -> None: + """ + Mock test for running fast tests outside the CK infra. + """ + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n auto ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + self.run_fast_tests7_helper( + is_dev_csfy_return_value, is_inside_ci_return_value, expected + ) + + def get_custom_marker_helper( + self, + run_only_test_list: str, + skip_test_list: str, + is_dev_csfy_return_value: bool, + is_inside_ci_return_value: bool, + expected: str, + ) -> None: + """ + Check that a correct cmd line is generated with custom marker string. + + :param run_only_test_list: a string of comma-separated markers + to run + :param skip_test_list: a string of comma-separated markers to + skip + :param is_dev_csfy_return_value: see `run_fast_tests1_helper()` + :param is_inside_ci_return_value: see `run_fast_tests1_helper()` + :param expected: expected output string + """ + # Mock settings. + pytest_opts = "" + skip_submodules = False + coverage = False + collect_only = False + tee_to_file = False + n_threads = "1" + # Mock test. + with ( + umock.patch.object( + hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value + ), + umock.patch.object( + hserver, "is_inside_ci", return_value=is_inside_ci_return_value + ), + ): + custom_marker = hlitapyt._get_custom_marker( + run_only_test_list=run_only_test_list, + skip_test_list=skip_test_list, + ) + actual = hlitapyt._build_run_command_line( + "fast_tests", + custom_marker, + pytest_opts, + skip_submodules, + coverage, + collect_only, + tee_to_file, + n_threads, + ) + actual = _purify_pytest_command(actual) + expected = _purify_pytest_command(expected) + self.assert_equal(actual, expected) + + def test_get_custom_marker1_full(self) -> None: + # Input params. + run_only_test_list = "run_marker_1,run_marker_2" + skip_test_list = "skip_marker_1,skip_marker_2" + is_dev_csfy_return_value = False + is_inside_ci_return_value = False + # Expected output. + expected = ( + 'pytest -m "' + "run_marker_1 and run_marker_2 " + "and not requires_ck_infra " + "and not skip_marker_1 and not skip_marker_2 " + 'and not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 50 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1 ' + "--junit-xml=tmp.junit.xml " + '-o junit_suite_name="helpers"' + ) + # Mock check. + self.get_custom_marker_helper( + run_only_test_list, + skip_test_list, + is_dev_csfy_return_value, + is_inside_ci_return_value, + expected, + ) + + def get_custom_marker2_empty(self) -> None: + # Input params. + run_only_test_list = "" + skip_test_list = "" + is_dev_csfy_return_value = True + is_inside_ci_return_value = True + # Expected output. + expected = ( + 'pytest -m "not slow and not superslow" . ' + "-o timeout_func_only=true --timeout 5 --reruns 2 " + '--only-rerun "Failed: Timeout" -n 1' + ) + # Mock check. + self.get_custom_marker_helper( + run_only_test_list, + skip_test_list, + is_dev_csfy_return_value, + is_inside_ci_return_value, + expected, + ) + + +# ############################################################################# +# Test_pytest_repro1 +# ############################################################################# + + +class Test_pytest_repro1(hunitest.TestCase): + def helper(self, file_name: str, mode: str, expected: List[str]) -> None: + script_name = os.path.join( + self.get_scratch_space(), "tmp.pytest_repro.sh" + ) + ctx = httestlib._build_mock_context_returning_ok() + actual = hlitapyt.pytest_repro( + ctx, mode=mode, file_name=file_name, script_name=script_name + ) + hdbg.dassert_isinstance(actual, str) + expected = "\n".join(["pytest " + x for x in expected]) + self.assert_equal(actual, expected) + + # //////////////////////////////////////////////////////////////////////////// + + def _build_pytest_filehelper(self, txt: str) -> str: + txt = hprint.dedent(txt) + file_name = os.path.join(self.get_scratch_space(), "cache/lastfailed") + hio.to_file(file_name, txt) + return file_name + + def _build_pytest_file1(self) -> str: + txt = """ + { + "dev_scripts/testing/test/test_run_tests.py": true, + "dev_scripts/testing/test/test_run_tests2.py": true, + "helpers/test/test_printing.py::Test_dedent1::test2": true, + "documentation/scripts/test/test_all.py": true, + "documentation/scripts/test/test_render_md.py": true, + "helpers/test/helpers/test/test_list.py::Test_list_1": true, + "helpers/test/test_cache.py::TestAmpTask1407": true + } + """ + return self._build_pytest_filehelper(txt) + + def test_tests1(self) -> None: + file_name = self._build_pytest_file1() + mode = "tests" + expected = [ + "dev_scripts/testing/test/test_run_tests.py", + "dev_scripts/testing/test/test_run_tests2.py", + "documentation/scripts/test/test_all.py", + "documentation/scripts/test/test_render_md.py", + "helpers/test/helpers/test/test_list.py::Test_list_1", + "helpers/test/test_cache.py::TestAmpTask1407", + "helpers/test/test_printing.py::Test_dedent1::test2", + ] + self.helper(file_name, mode, expected) + + def test_files1(self) -> None: + file_name = self._build_pytest_file1() + mode = "files" + expected = [ + "dev_scripts/testing/test/test_run_tests.py", + "dev_scripts/testing/test/test_run_tests2.py", + "documentation/scripts/test/test_all.py", + "documentation/scripts/test/test_render_md.py", + "helpers/test/helpers/test/test_list.py", + "helpers/test/test_cache.py", + "helpers/test/test_printing.py", + ] + self.helper(file_name, mode, expected) + + def test_classes1(self) -> None: + file_name = self._build_pytest_file1() + mode = "classes" + expected = [ + "helpers/test/helpers/test/test_list.py::Test_list_1", + "helpers/test/test_cache.py::TestAmpTask1407", + "helpers/test/test_printing.py::Test_dedent1", + ] + self.helper(file_name, mode, expected) + + def _build_pytest_file2(self) -> str: + # pylint: disable=line-too-long + txt = """ + { + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1": true, + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2": true, + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1": true, + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test2": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test3": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test4": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test01": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test02": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test03": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test04": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test05": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test06": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test07": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test09": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test10": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test11": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test12": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test13": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1": true, + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1": true, + "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder::test1": true, + "core/dataflow/test/test_runners.py::TestIncrementalDagRunner::test1": true, + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_dump_json1": true, + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_load_json1": true, + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test1": true, + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test2": true, + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test3": true, + "core/test/test_config.py::Test_subtract_config1::test_test1": true, + "core/test/test_config.py::Test_subtract_config1::test_test2": true, + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1": true, + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1": true, + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2": true, + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test1": true, + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test2": true, + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test3": true, + "helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2": true, + "helpers/test/test_printing.py::Test_dedent1::test2": true + } + """ + # pylint: enable=line-too-long + return self._build_pytest_filehelper(txt) + + def test_tests2(self) -> None: + file_name = self._build_pytest_file2() + mode = "tests" + # pylint: disable=line-too-long + expected = [ + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1", + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2", + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1", + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1", + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1", + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2", + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3", + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1", + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2", + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test1", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test2", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test3", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test4", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test01", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test02", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test03", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test04", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test05", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test06", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test07", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test09", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test10", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test11", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test12", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test13", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1", + "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder::test1", + "core/dataflow/test/test_runners.py::TestIncrementalDagRunner::test1", + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_dump_json1", + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_load_json1", + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test1", + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test2", + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test3", + "core/test/test_config.py::Test_subtract_config1::test_test1", + "core/test/test_config.py::Test_subtract_config1::test_test2", + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1", + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1", + "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2", + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test1", + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test2", + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test3", + "helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2", + "helpers/test/test_printing.py::Test_dedent1::test2", + ] + # pylint: enable=line-too-long + self.helper(file_name, mode, expected) + + def test_files2(self) -> None: + file_name = self._build_pytest_file2() + mode = "files" + # pylint: disable=line-too-long + expected = [ + "core/dataflow/nodes/test/test_sarimax_models.py", + "core/dataflow/nodes/test/test_volatility_models.py", + "core/dataflow/test/test_builders.py", + "core/dataflow/test/test_runners.py", + "core/dataflow_model/test/test_model_evaluator.py", + "core/dataflow_model/test/test_run_experiment.py", + "core/test/test_config.py", + "core/test/test_dataframe_modeler.py", + "dev_scripts/test/test_run_notebook.py", + "helpers/test/test_lib_tasks.py", + "helpers/test/test_printing.py", + ] + # pylint: enable=line-too-long + self.helper(file_name, mode, expected) + + def test_classes2(self) -> None: + file_name = self._build_pytest_file2() + mode = "classes" + # pylint: disable=line-too-long + expected = [ + "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel", + "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator", + "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder", + "core/dataflow/test/test_runners.py::TestIncrementalDagRunner", + "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator", + "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1", + "core/test/test_config.py::Test_subtract_config1", + "core/test/test_dataframe_modeler.py::TestDataFrameModeler", + "dev_scripts/test/test_run_notebook.py::TestRunNotebook1", + "helpers/test/test_lib_tasks.py::Test_find_check_string_output1", + "helpers/test/test_printing.py::Test_dedent1", + ] + # pylint: enable=line-too-long + self.helper(file_name, mode, expected) + + +# ############################################################################# +# Test_pytest_repro_end_to_end +# ############################################################################# + + +@pytest.mark.slow("~6 sec.") +class Test_pytest_repro_end_to_end(hunitest.TestCase): + """ + - Run the `pytest_repro` invoke from command line + - A fixed file imitating the pytest output file is used + - Compare the output to the golden outcome + """ + + def helper(self, cmd: str) -> None: + # Save output in tmp dir. + script_name = os.path.join( + self.get_scratch_space(), "tmp.pytest_repro.sh" + ) + cmd += f" --script-name {script_name}" + # Run the command. + _, actual = hsystem.system_to_string(cmd) + # Filter out the "No module named ..." warnings. + # TODO(Grisha): add the "no module warning" filtering to + # `purify_text()` in `check_string()`. + regex = "WARN.*No module" + actual = hunitest.filter_text(regex, actual) + # Remove "Encountered unexpected exception importing solver GLPK" + # generated on Mac. + regex = "Encountered unexpected exception importing solver GLPK" + actual = hunitest.filter_text(regex, actual) + # ImportError("cannot import name 'glpk' from 'cvxopt' (/venv/lib/python3.9/site-packages/cvxopt/__init__.py)") + regex = r"""ImportError\("cannot import name""" + actual = hunitest.filter_text(regex, actual) + # Modify the outcome for reproducibility. + actual = hprint.remove_non_printable_chars(actual) + actual = re.sub(r"[0-9]{2}:[0-9]{2}:[0-9]{2} - ", r"HH:MM:SS - ", actual) + actual = actual.replace("/app/amp/", "/app/") + actual = re.sub( + r"lib_tasks_pytest.py pytest_repro:[0-9]+", + r"lib_tasks_pytest.py pytest_repro:{LINE_NUM}", + actual, + ) + # Remove unstable content. + lines = actual.split("\n") + line_cmd = lines[0] + _LOG.debug("%s", "\n".join(lines)) + for i, line in enumerate(lines): + m = re.search("# pytest_repro: ", line) + if m: + test_output_start = i + 1 + break + lines_test_output = lines[test_output_start:] + # + actual = "\n".join([line_cmd] + lines_test_output) + regex = "init_logger" + actual = hunitest.filter_text(regex, actual) + regex = r"(WARN|INFO)\s+hcache.py" + actual = hunitest.filter_text(regex, actual) + # Check the outcome. + self.check_string(actual, purify_text=True, fuzzy_match=True) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test1(self) -> None: + file_name = f"{self.get_input_dir()}/cache/lastfailed" + cmd = f"invoke pytest_repro --file-name='{file_name}'" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test2(self) -> None: + """ + The tests are different since the input depends on the test and it's + different for different tests. + """ + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}'" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test3(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}'" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test4(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test5(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test6(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" + self.helper(cmd) + + @pytest.mark.skipif( + not hgit.is_in_helpers_as_supermodule(), + reason="Run only in helpers as super module. See CmTask10739", + ) + def test7(self) -> None: + file_name = f"{self.get_input_dir()}/log.txt" + cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" + self.helper(cmd) + + +# ############################################################################# +# Test_pytest_failed1 +# ############################################################################# + + +class Test_pytest_failed1(hunitest.TestCase): + def get_pytest_text1(self) -> str: + txt = """ + 20:48:15 - ^[[36mINFO ^[[0m hdbg.py init_logger:1018 > cmd='/venv/bin/pytest helpers_root/dev_scripts_helpers/documentation/' + collected 47 items + + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1::test1 (2.07 s) FAILED [ 2%] + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question1 (0.00 s) PASSED [ 4%] + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question2 (0.00 s) PASSED [ 6%] + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question3 (0.00 s) PASSED [ 8%] + + + =================================== FAILURES =================================== + _________________________ Test_preprocess_notes1.test1 _________________________ + + FAILED helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3::test_run_all1 - AttributeError: 'list' object has no attribute 'split' + FAILED helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1::test2 - RuntimeError: cmd='(/app/helpers_root/dev_scripts_helpers/documentation/notes_to_pdf.py --input /app/helpers_root/dev_scripts_helpers/documentation/test/outcomes/Test_notes + + ======================== 4 failed, 43 passed in 40.48s ========================= + """ + txt = hprint.dedent(txt) + return txt + + def helper( + self, + txt: str, + only_file: bool, + only_class: bool, + exp_failed_tests: str, + exp_num_failed: int, + exp_num_passed: int, + ) -> None: + act_failed_tests, act_num_failed, act_num_passed = ( + hlitapyt._parse_failed_tests(txt, only_file, only_class) + ) + act_failed_tests = "\n".join(act_failed_tests) + self.assert_equal( + act_failed_tests, + exp_failed_tests, + dedent=True, + remove_lead_trail_empty_lines=True, + ) + self.assertEqual(act_num_failed, exp_num_failed) + self.assertEqual(act_num_passed, exp_num_passed) + + def test1(self) -> None: + # Prepare inputs and outputs. + txt = self.get_pytest_text1() + only_file = False + only_class = False + exp_failed_tests = """ + helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1::test2 + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1::test1 + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3::test_run_all1 + """ + exp_num_failed = 4 + exp_num_passed = 43 + # Check. + self.helper( + txt, + only_file, + only_class, + exp_failed_tests, + exp_num_failed, + exp_num_passed, + ) + + def test2(self) -> None: + # Prepare inputs and outputs. + txt = self.get_pytest_text1() + only_file = True + only_class = False + exp_failed_tests = """ + helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py + """ + exp_num_failed = 4 + exp_num_passed = 43 + # Check. + self.helper( + txt, + only_file, + only_class, + exp_failed_tests, + exp_num_failed, + exp_num_passed, + ) + + def test3(self) -> None: + # Prepare inputs and outputs. + txt = self.get_pytest_text1() + only_file = False + only_class = True + exp_failed_tests = """ + helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1 + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1 + helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3 + """ + exp_num_failed = 4 + exp_num_passed = 43 + # Check. + self.helper( + txt, + only_file, + only_class, + exp_failed_tests, + exp_num_failed, + exp_num_passed, + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py new file mode 100644 index 000000000..ac2b17b42 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py @@ -0,0 +1,301 @@ +import logging +import os + +import pytest + +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hunit_test as hunitest +import helpers.lib_tasks_utils as hlitauti + +_LOG = logging.getLogger(__name__) + + +# pylint: disable=protected-access + + +# ############################################################################# +# Test_get_files_to_process1 +# ############################################################################# + + +class Test_get_files_to_process1(hunitest.TestCase): + """ + We can't check the outcome so we just execute the code. + """ + + def test_modified1(self) -> None: + """ + Retrieve files modified in this client. + """ + modified = True + branch = False + last_commit = False + all_ = False + files_from_user = "" + mutually_exclusive = True + remove_dirs = True + _ = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + + @pytest.mark.skipif( + hgit.get_branch_name() != "master", + reason="This test makes sense for a branch", + ) + def test_branch1(self) -> None: + """ + Retrieved files modified in this client. + """ + # This test needs a reference to Git master branch. + hgit.fetch_origin_master_if_needed() + # + modified = False + branch = True + last_commit = False + all_ = False + files_from_user = "" + mutually_exclusive = True + remove_dirs = True + _ = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + + def test_last_commit1(self) -> None: + """ + Retrieved files modified in the last commit. + """ + modified = False + branch = False + last_commit = True + all_ = False + files_from_user = "" + mutually_exclusive = True + remove_dirs = True + _ = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + + def test_files1(self) -> None: + """ + Pass through files from user. + """ + modified = False + branch = False + last_commit = False + all_ = False + files_from_user = __file__ + mutually_exclusive = True + remove_dirs = True + files = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + self.assertEqual(files, [__file__]) + + def test_files2(self) -> None: + """ + Pass through files from user. + + Use two types of paths we don't want to process: + - non-existent python file + - pattern "/*" that matches no files + """ + modified = False + branch = False + last_commit = False + all_ = False + files_from_user = "testfile1.py testfiles1/*" + mutually_exclusive = True + remove_dirs = True + files = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + self.assertEqual(files, []) + + def test_files3(self) -> None: + """ + Pass through files from user. + + Use the sequence of paths separated by newlines. + """ + modified = False + branch = False + last_commit = False + all_ = False + # Specify the number of toy files. + n_toy_files = 4 + files_from_user = [] + # Get root directory. + root_dir = hgit.get_client_root(super_module=False) + # Generate toy files and store their paths. + for file_num in range(n_toy_files): + # Build the name of the test file. + file_name = f"test_toy{str(file_num)}.tmp.py" + # Build the path to the test file. + test_path = os.path.join(root_dir, file_name) + # Create the empty toy file. + hio.to_file(test_path, "") + files_from_user.append(test_path) + mutually_exclusive = True + remove_dirs = True + # Join the names with `\n` separator. + joined_files_from_user = "\n".join(files_from_user) + files = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + joined_files_from_user, + mutually_exclusive, + remove_dirs, + ) + # Remove the toy files. + for path in files_from_user: + hio.delete_file(path) + self.assertEqual(files, files_from_user) + + def test_assert1(self) -> None: + """ + Test that --modified and --branch together cause an assertion. + """ + modified = True + branch = True + last_commit = False + all_ = True + files_from_user = "" + mutually_exclusive = True + remove_dirs = True + with self.assertRaises(AssertionError) as cm: + hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + '3' + == + '1' + Specify only one among --modified, --branch, --last-commit, --all_files, and --files + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert2(self) -> None: + """ + Test that --modified and --files together cause an assertion if + `mutually_exclusive=True`. + """ + modified = True + branch = False + last_commit = False + all_ = False + files_from_user = __file__ + mutually_exclusive = True + remove_dirs = True + with self.assertRaises(AssertionError) as cm: + hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + '2' + == + '1' + Specify only one among --modified, --branch, --last-commit, --all_files, and --files + """ + self.assert_equal(actual, expected, fuzzy_match=True) + + def test_assert3(self) -> None: + """ + Test that --modified and --files together don't cause an assertion if + `mutually_exclusive=False`. + """ + modified = True + branch = False + last_commit = False + all_ = False + files_from_user = __file__ + mutually_exclusive = False + remove_dirs = True + files = hlitauti._get_files_to_process( + modified, + branch, + last_commit, + all_, + files_from_user, + mutually_exclusive, + remove_dirs, + ) + self.assertEqual(files, [__file__]) + + +# ############################################################################# + + +# ############################################################################# +# TestLibTasksRemoveSpaces1 +# ############################################################################# + + +class TestLibTasksRemoveSpaces1(hunitest.TestCase): + def test1(self) -> None: + txt = r""" + IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev \ + docker-compose \ + --file $GIT_ROOT/devops/compose/docker-compose_as_submodule.yml \ + run \ + --rm \ + -l user=$USER_NAME \ + --entrypoint bash \ + user_space + """ + actual = hlitauti._to_single_line_cmd(txt) + expected = ( + "IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev" + " docker-compose --file" + " $GIT_ROOT/devops/compose/docker-compose_as_submodule.yml" + " run --rm -l user=$USER_NAME --entrypoint bash user_space" + ) + self.assert_equal(actual, expected, fuzzy_match=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py new file mode 100644 index 000000000..ac46b6c17 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py @@ -0,0 +1,74 @@ +import os + +import pytest + +import config_root.config as cconfig +import dev_scripts_helpers.notebooks.run_notebook_test_case as dshnrntca +import helpers.hgit as hgit +import helpers.hserver as hserver +import helpers.lib_tasks_gh as hlitagh + + +def build_config() -> cconfig.ConfigList: + """ + Get an empty config for the test. + """ + config = {} + config = cconfig.Config() + config_list = cconfig.ConfigList([config]) + return config_list + + +# ############################################################################# +# Test_Master_buildmeister_dashboard_notebook +# ############################################################################# + + +class Test_Master_buildmeister_dashboard_notebook( + dshnrntca.Test_Run_Notebook_TestCase +): + @pytest.mark.skipif( + not hserver.is_inside_ci(), + reason="No access to data from `lemonade` repo locally", + ) + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", + ) + @pytest.mark.superslow("~42 sec.") + def test1(self) -> None: + amp_dir = hgit.get_amp_abs_path() + notebook_path = os.path.join( + amp_dir, + "devops", + "notebooks", + "Master_buildmeister_dashboard.ipynb", + ) + config_builder = ( + "helpers.test.test_master_buildmeister_dashboard.build_config()" + ) + self._test_run_notebook(notebook_path, config_builder) + + @pytest.mark.skipif( + not hserver.is_inside_ci(), + reason="No access to data from `lemonade` repo locally", + ) + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Run only in amp as super-module", + ) + @pytest.mark.superslow("~30 sec.") + def test2(self) -> None: + """ + Check that we can get status for all the workflows. + """ + repo_list = [ + "causify-ai/cmamp", + "causify-ai/orange", + "causify-ai/lemonade", + "causify-ai/kaizenflow", + "causify-ai/helpers", + "causify-ai/quant_dashboard", + ] + for repo_name in repo_list: + hlitagh.gh_get_workflow_type_names(repo_name) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py new file mode 100644 index 000000000..ced80844b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py @@ -0,0 +1,284 @@ +import logging + +import pytest + +import helpers.hgit as hgit +import helpers.hserver as hserver +import helpers.hunit_test as hunitest +import helpers.hunit_test_utils as hunteuti +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# TestRepoConfig_Amp +# ############################################################################# + + +class TestRepoConfig_Amp(hunitest.TestCase): + # Difference between `cmamp` and `kaizenflow`. + expected_repo_name = "//cmamp" + + def test_repo_name1(self) -> None: + """ + Show that when importing repo_config, one doesn't get necessarily the + outermost repo_config (e.g., for lime one gets amp.repo_config). + """ + + actual = hrecouti.get_repo_config().get_name() + _LOG.info( + "actual=%s expected_repo_name=%s", actual, self.expected_repo_name + ) + + @pytest.mark.skipif( + not hgit.is_in_amp_as_supermodule(), + reason="Only run in amp as supermodule", + ) + def test_repo_name2(self) -> None: + """ + If //amp is a supermodule, then repo_config should report //amp. + """ + actual = hrecouti.get_repo_config().get_name() + self.assertEqual(actual, self.expected_repo_name) + + @pytest.mark.skipif( + not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" + ) + def test_repo_name3(self) -> None: + """ + If //amp is a supermodule, then repo_config should report something + different than //amp. + """ + actual = hrecouti.get_repo_config().get_name() + self.assertNotEqual(actual, self.expected_repo_name) + + def test_config_func_to_str(self) -> None: + _LOG.info(hserver.config_func_to_str()) + + def test_is_dev4(self) -> None: + """ + Amp could run on dev4 or not. + """ + _ = hserver.is_dev4() + + def test_is_CK_S3_available(self) -> None: + """ + When running Amp on dev_csfy, the CSFY bucket should be available. + """ + if hserver.is_dev_csfy(): + actual = hserver.is_CK_S3_available() + expected = True + self.assertEqual(actual, expected) + + +# ############################################################################# +# TestRepoConfig_Amp_signature +# ############################################################################# + + +# > pytest ./amp/helpers/test/test_repo_config_amp.py + + +# ############################################################################# +# TestRepoConfig_Amp_signature1 +# ############################################################################# + + +class TestRepoConfig_Amp_signature1(hunitest.TestCase): + def test_dev_csfy_server(self) -> None: + target_name = "amp" + hunteuti.execute_only_in_target_repo(target_name) + # + hunteuti.execute_only_on_dev_csfy() + # + expected = r""" + # Repo config: + # repo_config.config + enable_privileged_mode='True' + get_docker_base_image_name='amp' + get_docker_shared_group='' + get_docker_user='' + get_host_name='github.com' + get_invalid_words='[]' + get_shared_data_dirs='{'/data/shared': '/shared_data'}' + has_dind_support='True' + has_docker_sudo='True' + is_CK_S3_available='True' + run_docker_as_root='False' + skip_submodules_test='False' + use_docker_db_container_name_to_connect='False' + use_docker_network_mode_host='False' + use_docker_sibling_containers='False' + # Server config: + # hserver.config + is_AM_S3_available()='True' + is_dev4()='False' + is_dev_csfy()='True' + is_inside_ci()='False' + is_inside_docker()='True' + is_mac(version='Catalina')='False' + is_mac(version='Monterey')='False' + is_mac(version='Sequoia')='False' + is_mac(version='Ventura')='False' + # Env vars: + CSFY_ENABLE_DIND='1' + CSFY_FORCE_TEST_FAIL='' + CSFY_REPO_CONFIG_CHECK='True' + CSFY_REPO_CONFIG_PATH='' + CSFY_CI='' + GH_ACTION_ACCESS_TOKEN=empty + """ + hunteuti.check_env_to_str(self, expected) + + def test_mac(self) -> None: + target_name = "amp" + hunteuti.execute_only_in_target_repo(target_name) + # + hunteuti.execute_only_on_mac(version="Catalina") + # + expected = r""" + # Repo config: + # repo_config.config + enable_privileged_mode='False' + get_docker_base_image_name='amp' + get_docker_shared_group='' + get_docker_user='' + get_host_name='github.com' + get_invalid_words='[]' + get_shared_data_dirs='None' + has_dind_support='False' + has_docker_sudo='True' + is_CK_S3_available='False' + run_docker_as_root='False' + skip_submodules_test='False' + use_docker_db_container_name_to_connect='True' + use_docker_network_mode_host='False' + use_docker_sibling_containers='True' + # Server config: + # hserver.config + is_AM_S3_available='True' + is_dev4='False' + is_dev_csfy='False' + is_inside_ci='False' + is_inside_docker='True' + is_mac='True' + # Env vars: + CSFY_ENABLE_DIND='1' + CSFY_FORCE_TEST_FAIL='' + CSFY_REPO_CONFIG_CHECK='False' + CSFY_REPO_CONFIG_PATH='' + CSFY_CI='' + GH_ACTION_ACCESS_TOKEN=empty + """ + hunteuti.check_env_to_str(self, expected) + # + exp_enable_privileged_mode = True + exp_has_dind_support = True + hrecouti.assert_setup( + self, exp_enable_privileged_mode, exp_has_dind_support + ) + + @pytest.mark.skipif( + not hrecouti.get_repo_config().get_name() == "//amp", + reason="Run only in //amp", + ) + def test_amp_ci(self) -> None: + hunteuti.execute_only_on_ci() + # + expected = r""" + # Repo config: + # repo_config.config + enable_privileged_mode='True' + get_docker_base_image_name='amp' + get_docker_shared_group='' + get_docker_user='' + get_host_name='github.com' + get_invalid_words='[]' + get_shared_data_dirs='None' + has_dind_support='True' + has_docker_sudo='False' + is_CK_S3_available='False' + run_docker_as_root='True' + skip_submodules_test='False' + use_docker_db_container_name_to_connect='False' + use_docker_network_mode_host='False' + use_docker_sibling_containers='False' + # Server config: + # hserver.config + is_AM_S3_available()='True' + is_dev4()='False' + is_dev_csfy()='False' + is_inside_ci()='True' + is_inside_docker()='True' + is_mac(version='Catalina')='False' + is_mac(version='Monterey')='False' + is_mac(version='Ventura')='False' + is_mac(version='Sequoia')='False' + # Env vars: + CSFY_CI='true' + CSFY_ENABLE_DIND='1' + CSFY_FORCE_TEST_FAIL='' + CSFY_REPO_CONFIG_CHECK='True' + CSFY_REPO_CONFIG_PATH='' + """ + # We ignore the AWS vars, since GH Actions does some replacement to mask + # the env vars coming from secrets. + skip_secrets_vars = True + hunteuti.check_env_to_str( + self, expected, skip_secrets_vars=skip_secrets_vars + ) + + @pytest.mark.skipif( + not hrecouti.get_repo_config().get_name() == "//cmamp", + reason="Run only in //cmamp", + ) + def test_cmamp_ci(self) -> None: + hunteuti.execute_only_on_ci() + # + expected = r""" + # Repo config + get_host_name='github.com' + get_html_dir_to_url_mapping='{'s3://cryptokaizen-html': 'http://172.30.2.44', 's3://cryptokaizen-html/v2': 'http://172.30.2.44/v2'}' + get_invalid_words='[]' + get_docker_base_image_name='cmamp' + # Server config + enable_privileged_mode='True' + get_docker_shared_group='' + get_docker_user='' + get_host_user_name='runner' + get_shared_data_dirs='None' + has_dind_support='True' + has_docker_sudo='False' + is_AM_S3_available='True' + is_CK_S3_available='True' + is_dev4='False' + is_dev_csfy='False' + is_external_linux='False' + is_host_mac='False' + is_ig_prod='False' + is_inside_ci='True' + is_inside_docker='True' + is_inside_ecs_container='False' + is_inside_unit_test='True' + is_prod_csfy='False' + run_docker_as_root='True' + skip_submodules_test='False' + use_docker_db_container_name_to_connect='False' + use_docker_network_mode_host='False' + use_docker_sibling_containers='False' + use_main_network='False' + # Env vars + CSFY_CI='true' + CSFY_ECR_BASE_PATH='$CSFY_ECR_BASE_PATH' + CSFY_ENABLE_DIND='1' + CSFY_FORCE_TEST_FAIL='' + CSFY_REPO_CONFIG_CHECK='True' + CSFY_REPO_CONFIG_PATH='' + """ + # We ignore the AWS vars, since GH Actions does some replacement to mask + # the env vars coming from secrets. + skip_secrets_vars = True + hunteuti.check_env_to_str( + self, expected, skip_secrets_vars=skip_secrets_vars + ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py new file mode 100644 index 000000000..f5b284c58 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py @@ -0,0 +1,65 @@ +import logging +import os + +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hunit_test as hunitest +import helpers.repo_config_utils as hrecouti + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_repo_config1 +# ############################################################################# + + +class Test_repo_config1(hunitest.TestCase): + def create_test_file(self) -> str: + yaml_txt = """ + repo_info: + repo_name: helpers + github_repo_account: causify-ai + github_host_name: github.com + invalid_words: + issue_prefix: HelpersTask + + docker_info: + docker_image_name: helpers + + s3_bucket_info: + unit_test_bucket_name: s3://cryptokaizen-unit-test + html_bucket_name: s3://cryptokaizen-html + html_ip: http://172.30.2.44 + + container_registry_info: + ecr: 623860924167.dkr.ecr.eu-north-1.amazonaws.com + ghcr: ghcr.io/cryptokaizen + + runnable_dir_info: + use_helpers_as_nested_module: False + venv_tag: helpers + dir_suffix: helpers + """ + yaml_txt = hprint.dedent(yaml_txt) + file_name = os.path.join(self.get_scratch_space(), "yaml.txt") + hio.to_file(file_name, yaml_txt) + return file_name + + def test1(self) -> None: + file_name = self.create_test_file() + repo_config = hrecouti.RepoConfig.from_file(file_name) + actual = repo_config.get_name() + expected = "//helpers" + self.assert_equal(actual, expected) + + def test2(self) -> None: + file_name = self.create_test_file() + repo_config = hrecouti.RepoConfig.from_file(file_name) + actual = repo_config.get_repo_map() + expected = { + "helpers": "causify-ai/helpers", + } + self.assert_equal(str(actual), str(expected)) + + # TODO(gp): Test all the methods of the RepoConfig class. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index f46201cbb..24f1adaef 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -1,7 +1,7 @@ --- # ────────────────────────────────────────────────────────────── # playbook.yaml -# Ansible playbook – train, deploy, and test the House Price +# Ansible playbook - train, deploy, and test the House Price # Prediction API running in Docker. # # Usage (from your Mac, inside the project folder): @@ -19,7 +19,7 @@ # ── Container settings ──────────────────────────────────── container_name: house-price image_name: house-price-project - host_port: 5001 + host_port: 5000 container_port: 5000 # ── Paths ───────────────────────────────────────────────── @@ -35,17 +35,17 @@ tasks: # ── SETUP ───────────────────────────────────────────────── - - name: "Setup – verify Docker is installed" + - name: "Setup - verify Docker is installed" ansible.builtin.command: docker --version changed_when: false tags: [setup] - - name: "Setup – verify Docker daemon is running" + - name: "Setup - verify Docker daemon is running" ansible.builtin.command: docker info changed_when: false tags: [setup] - - name: "Setup – display deployment config" + - name: "Setup - display deployment config" ansible.builtin.debug: msg: - "Project : {{ project_root }}" @@ -55,21 +55,32 @@ tags: [setup] # ── TRAIN MODEL ─────────────────────────────────────────── - - name: "Train – check if model already exists" + - name: "Train - check if model already exists" ansible.builtin.stat: path: "{{ model_pkl }}" register: model_stat tags: [train] - - name: "Train – run template.example.py to train and save model" - ansible.builtin.command: - cmd: python3 template.example.py - chdir: "{{ project_root }}" + - name: "Train – run template.example.py inside Docker container" + ansible.builtin.command: > + docker run --rm + --name {{ container_name }}-train + -v {{ project_root }}:/project + {{ image_name }} + bash -c "cd /project && python template.example.py" when: not model_stat.stat.exists register: train_out tags: [train] - - name: "Train – show output" + # - name: "Train - run template.example.py to train and save model" + # ansible.builtin.command: + # cmd: python3 template.example.py + # chdir: "{{ project_root }}" + # when: not model_stat.stat.exists + # register: train_out + # tags: [train] + + - name: "Train - show output" ansible.builtin.debug: var: train_out.stdout_lines when: @@ -77,7 +88,7 @@ - train_out is defined tags: [train] - - name: "Train – confirm model file exists" + - name: "Train - confirm model file exists" ansible.builtin.stat: path: "{{ model_pkl }}" register: model_check @@ -85,19 +96,19 @@ tags: [train] # ── DEPLOY ──────────────────────────────────────────────── - - name: "Deploy – stop existing container (if running)" + - name: "Deploy - stop existing container (if running)" ansible.builtin.command: docker stop {{ container_name }} ignore_errors: true changed_when: false tags: [deploy] - - name: "Deploy – remove existing container (if any)" + - name: "Deploy - remove existing container (if any)" ansible.builtin.command: docker rm {{ container_name }} ignore_errors: true changed_when: false tags: [deploy] - - name: "Deploy – start container with Flask API" + - name: "Deploy - start container with Flask API" ansible.builtin.command: > docker run -d --name {{ container_name }} @@ -109,7 +120,7 @@ bash -c "PORT={{ container_port }} python /project/app.py" tags: [deploy] - - name: "Deploy – wait for API to become healthy" + - name: "Deploy - wait for API to become healthy" ansible.builtin.uri: url: "{{ api_base }}/health" method: GET @@ -120,13 +131,13 @@ until: health_resp.status == 200 tags: [deploy] - - name: "Deploy – confirm API is up" + - name: "Deploy - confirm API is up" ansible.builtin.debug: msg: "API is live at {{ api_base }} status={{ health_resp.json.status }}" tags: [deploy] # ── TESTING ─────────────────────────────────────────────── - - name: "Test 1 – GET /health returns ok" + - name: "Test 1 - GET /health returns ok" ansible.builtin.uri: url: "{{ api_base }}/health" method: GET @@ -134,14 +145,14 @@ register: t_health tags: [testing] - - name: "Test 1 – assert status is ok" + - name: "Test 1 - assert status is ok" ansible.builtin.assert: that: t_health.json.status == "ok" success_msg: "✅ /health → ok" fail_msg: "❌ /health returned {{ t_health.json }}" tags: [testing] - - name: "Test 2 – POST /predict returns a price" + - name: "Test 2 - POST /predict returns a price" ansible.builtin.uri: url: "{{ api_base }}/predict" method: POST @@ -156,7 +167,7 @@ register: t_predict tags: [testing] - - name: "Test 2 – assert predicted_price is positive" + - name: "Test 2 - assert predicted_price is positive" ansible.builtin.assert: that: - t_predict.json.predicted_price is defined @@ -165,7 +176,7 @@ fail_msg: "❌ /predict returned {{ t_predict.json }}" tags: [testing] - - name: "Test 3 – POST /predict/batch returns two prices" + - name: "Test 3 - POST /predict/batch returns two prices" ansible.builtin.uri: url: "{{ api_base }}/predict/batch" method: POST @@ -178,7 +189,7 @@ register: t_batch tags: [testing] - - name: "Test 3 – assert batch count and ordering" + - name: "Test 3 - assert batch count and ordering" ansible.builtin.assert: that: - t_batch.json.count == 2 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/leaderboard.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/leaderboard.csv deleted file mode 100644 index 442d29f5a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/leaderboard.csv +++ /dev/null @@ -1,4 +0,0 @@ -Model,Activation,Optimizer,Seq Length,Grad Clipping,Accuracy,F1,Epoch Time (s) -RNN,relu,adam,50,Yes,0.7541,0.7536,6.39 -RNN,relu,adam,50,Yes,0.7541,0.7536,6.36 -RNN,relu,adam,50,Yes,0.7541,0.7536,6.28 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/metrics.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/metrics.csv deleted file mode 100644 index b0f2ad24e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/metrics.csv +++ /dev/null @@ -1,4 +0,0 @@ -Model,Activation,Optimizer,Seq Length,Grad Clipping,Accuracy,F1,Epoch Time (s),Epochs,Hardware -RNN,relu,adam,50,Yes,0.7541,0.7536,6.39,5,CPU -RNN,relu,adam,50,Yes,0.7541,0.7536,6.36,5,CPU -RNN,relu,adam,50,Yes,0.7541,0.7536,6.28,5,CPU diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_activation.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_activation.png deleted file mode 100644 index f0a0df7bbce4c69ba5c06d3180e8098884dada64..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16500 zcmdse2UJwqx+dl}w*dnZ+7?kki$=*&ZAAeIQiWttOF(kY$u_ovpahYO0VqH~C1)cj zK_m+lNhAwM&M@D$``vrr>-+A^teIJFy}9dlnTo1&_WAe!e`()TQ97}9727Hn7M8Vg zCy%MJuq+l~VOdo1!*cx1y{7w5@Qaw;aSc1FrJFtD*T zwzN1z5u+U3|I-hgF@;d7}_hmRlEaq=qvxNc`a6vyGV`MEaVdZjDHdqtnRNGF-< zs@s<5)y_q1x7Tax(=#dT(z?GYu-SJ!DvVr_3MH)P{TVH_U+Ys8 z+&>tm7~>7B;`38)ii#5K`)YY9lyQx6i!L8_mZh4VAbTJm-2h2C)= zhsry;D;~KFWOZ%k;z~BDne3WT9qX?6RfK7ECBki3PC4kZO<~JAnu~11x22%KV|M7oiOBXMU(?FRa@oVhd>vg~vD0IH366u!4H=e^ zj~BuV>oct%RXtnJCA}XH;M(e~;tjIi+a81nnf4|m=c}GN<(-uIc@?=ozkraC_?&@k zh1)ws?iCaigos!sJ);-#?%7imJ>Whw-qY+lsCM$?@3XVB)sN*it=%DfYT2@7;%<{q zbMx{7@7!s9-qKkXYLHStI*>D{_f2YcZlZ;C>(|(0Ip=CX^k6g+MWBrZlZEbDQ5ivcV zo;|{C4LZ#Zm+<$+gj%T1O^>NPl3lB7TP8#vm|bu6z7vraP1(jZ@oM5WUw)^5O)28* zPpz)g_e>fM3=4bm^2&;w(uUG7F)D|I11((Ke)G0%jPd?PJ~1&(qXo)yi9vq>Svfg5le(nv z#fulyavaRZ23z!3tmm|ANY>7UOjSC+5( z;Rlt3Gl_x*#hd4FSCsaG@di_qiig+8GH2VH=FN>avI(5O8X|1oJIq-cBBY?M@1BN# zq$TS~k##Ea;jry>#EmOPNNPTK@E~3^IRv`=-%(k%SFr^QU!!es(%?88tQ!HD}Au!-ho5D($fsh0*^d_!uzO< zZE}XMudh`qlCkT)bN9lQ{Ti(ocSyTs;s*p2LxqJ_u3Tx@lx_e0hYPnIavad{zjNp8 znI{_Ogv}c+G&{Agtx7l7u6k~uK-JLb7A;KD%{f&TBBY4TUdET4d%iYN+o;*4@4%t6 zFIfB|BQ3oJI+H!;5{xS!-xPD7PNgbNFl~K}@j%7wd#lA-3jFFeZP{WtkljZ~vFVT> zsnxa&#uEvB@#3&;>x)HnZvhlCkDy>R)3L=}X(m$KzF5%g(?_Olxm4F&nD@Sda$bJ^ z^8Ff5Uz|MXIn&ruil<+O{}g3quj77us$xS^7J6J3<|b})`-g<|3VvwLbG^+y^4W`D zyn#n4>|*DKaMRi+n)KRaeM>#3wq={x+0A5q*n7Isn_BKubhGbclN5bw%&>Hnb7AXa3B)HpFbj_Ry6|ofdD@D2KM(jZG$SEla$Hm3*2np3lj#X26e){PJ>Urd0bL`Au zLCfUiLY7+;1)z7>G?bxs8;?ppEhD-(NaA)3PAEL)h$> zP+{|fh1*7|smj4y^_*Tl(Ml^uPp`;D2jcnp=T~-v%}vI$sMDI02eG0VQ>|ClmwtZh zrHK1D<>ZvPa{YS8Q9fSYgBeW@p>d}lT@MzsjgeDS6iP0b&o-`R+%;~>PMboNa=71E z&t+jI9O^tfR8)ptt9btWL-K}|Yt{(9zOmlXdBJ6@*8n}ivw4C`ZFSr7z=ns%y}Y?c ziusba(Y?j2KVOa0Oo{1fNK2p=poq6~adD*>l}Ffe@<~hQS9>zcJ1sKrUg#{3^lYwE z(a|x(of`PCi=F2Vo@nuG8DbvDV4g?6Hh7+r!mm-~IX}X6Lfg`1w8^I%G! zqy#ezWp?%I)%)l#5@Vr~&d-O&#)QPg#G-vp(`XmSg={&Xbs;x5w^UhX?u%2Ba-_79 z{u84adggY)i+6@bMh^A#^eD}d1&CKqw(#MSvGAU1F{;RMw2bvo2%-r2g)OETlVaKR zexFX*p#9sTqR?#nK5^-Z=lRO0H4(qkk#d>AYW`ijUN2p`^!|JQJrWHT`BR3w-apaK z7|0b0oaOkGYV3dag2P~QKSKx`#O~{-inxWlyH0rf`ubKGJ-zULG@M(;+PbYbWn5RO8%V-H_%EBa_hjWvcsI7>NR*BoaXAj>`KOMfZ ze61gM%@a*3*{|tl^$!R0XFgO6zFsdwn;2+94^Ob=>?40d;n<-u-!r{U>PbP{!6E%T zozA*t3y-CxC1%K8Nl6{QRL4O*Iip%0vxZc;CCk^Sv+R6fj>TRuLu*Vpb~H(xJ38O1H8SY9w%sgT_nVro~VD57iS zJv{q_+1B5mxbxTR7pI0hic3oTxmj6Rd$4<}bsd|c$*NArL`V00kQk()v2}EG>_#z= z5qbRh@#*nKOMa8tvA(*56TY0QRW|MqPjaaJI7YFn z?P&MudNbi=D^>()y-v(%j*p7^sim5MRyE{9@(edc3 z*REA-OgGn6s;^Db)y`nrcqu4i0#&!Zyu!-H*30O@_`Nq;P_SfXVo?3enS0CEY->%m z^yg8mj#k(jn~wWFoMuu}$>GRsqp9aO^lv6Vl<=i|)P16#Zp4&Pso9u>4|J)Ae)o50J3GH;Vimph{fgf(~j`YgGhu1_MSrRmk@(_=ZX{`1DHpewCG_kuv&)+W@Owqnl$pZi_U3e4?51rK2MnO`_)p zmxt)!P;q7K7p|ffxK_Z8&hE9u~xIl<*lv&^50Y4%)jPt;72 zMK#$>$+T7mOc-?}VP5xL-(m6b^0L)}4qr76XgocPy-)uDD-E?MvQ}14v2Vso_zM`ax7mEgv1v|7OG{5)%JPU* z?UD7Zt!HZzwbe14t$X<7)OD<{RSBd#e-4bct7s%vInrTe$Zn76o;^j|o-Q}EP}xT| zJ(g7^#jq^(-0S5*=D^*kqv>z2*YB1?zo|r-d~mIiOV-O91$i;Sw|9M|+SnJOAbYCw z&%eEOuugB-caE!Ish<0oT5Qp7^#ge`136u5ly}e0&5gZSvc8*qKE@oTUDrW#Pic&H z>uj6OcXxL+q??oe-C51g%PU_|QQ?BBOLH1Je|ww2s~)dZLDnJ7KzreKvt1{As$(NP z;uEzqOgf4K*#Nw%_Mb40^b&}vPPjm-SJJ_kQ;J=`(?~n@;#~|6J&T}3eN277$4geM z6HRM06LxlWmB_1Y@t9G>Lp6*m%KA8CH`AX{vh5MJiGBCSmj*d#j}#44&4qQTX7#E7 zo?6*bU9sMu5wgyN}&So}PFW2c=)jVcX4KR~J;sm-=JP>l+ zp_7be)KxHtF;tpY1X-GF2It`WPumFXHqI@`I&Z(={_J!Zp*)g0mLp zXLOV9#bdYwx7^draY%GVA5zrO32QgmNB7PetJPNQZ%8XcV>LueG>vfjywbKtO+7F> zDib9{NyG360bdOM_RVSp4LX#dpBtPm?@}*TyfP3ld0(TduIC&bEMgf!V6Eo!3pcZ{ zuQ=2d0w>W`yfZQ~?q1OIoEtls=j)lP75~K~fIv#Vq|8@txDd)r!2X}p>&-gR&hlEf z;n5IuPk@(QPvu*)PfuUE%};gl96D6BtsPhN25nC9@Zn3wAc=P`j1RU9j?NMc2@ug! zn;5E@=aQzG@0Qh5pAy@iBIUZl`TaI9j?OA&8KFqe`KNsiFqnJg8MbjyKTo}bwD$t$v??q<$H?TEqrjX9I3RcM$Zc3Zu{ zXtG9KKUiWQ^YzkY%eG^bkmnTi)F!U2a2p-Nuh%9mP@oXwqXnC}l3W;QRwXw#_x&$l zzNq5%w|Yd%`;fJuf^Kj6@yD|e5Dd}mI!C)Yb;yU{QVhi%2h0c*1@AC|T0_sD{OqNw z-+QckIToo-k4AZnmfe_=YP{SanVOcn-s*{5gpGT=Pa zt%}N79>tIg6Yix54xx}aH9s?`khf~JT4>q*l+O4?G_Csj`c>=K*X{C^ zeApG?n)IqrGs`A=Aa_J5Q7cUz^fl?Jb!)ys$vxxN7rzm8pvq-qf|$o_rdI032l?(Z zCe69dtm3r`b1e(F&g0@%tuMrqV~*DRnz@KK3(WWk0aP*zQ_6|@1$t3`=hCxVIfzQZ=pMy`NpPJQK+j78$#@9v zbOZual@EtRb&O)@svR?vL+wPy85{Im$T_vT>$LsTsBMfdXfyVxF2xqNk=tAN^>ofH z6iHh!MVlZE#p~vz-`lTw^l98x;95P`ejy<+*MkMC2A~9ilbA9yt$Wcm3Vpef@7(#( z9R20!mD}7L;&vyYN!6eM^VzZMc$6|B?J1n={JXSQ9BEDHV^Vc3Ef{3kz*S z;kIy5>!X<6v^0~h$k3^(YoXBYvS(j#u&^-L4jw#6nVhln%(kKq&+LMpMUip(frW)P zc6Qe7+O=z9`9wve&l@d^T$`0y=7r}q51=4)c@PBxl+0;DitA0 znjJkGS!j1&myZtFtz=xipS1{D?a|Etn%B=mR{jA2kAMIDhpCwvuwZ#0PBEjF4ZCCk z=(Efqu4H+$E@8P;T>7YuXZP;on|MxrOf{|wxOXoG-GWSOd1Yn(w1qG++Y1?u9NG{e zdaL6exkCRssJ;%%&F6cah&Q)6rL-Vuo+j#&>$n*nfbjq`D|&qWs#U6h&Gg1+^cw_c zLqr-5qi^24d9RF2i}4Z`mfDVIR?uA-;S!F#JUoSQQan7e;1+yU0)GDfL?7dol$;H7 z-?(uje(1}mSFc`C;_xt4vYhub(JmMa#>t&KcPjPcrMS2_yLH|nM;F`Y0a}7>vj@sf zO;0a^Uh?i;=Hu)|EQdQf`+F9nc&q4iZv}1REfgJwR+>qiWlMfne^*!69vPVeg?$@P zeLFcrvsSKKcP}U?sARt)C}4@%2d?dOLC<-&wH)F!^j=m?9#T}2TP)n%-GSAjtE*|T z<9Im^nbffb(o+v8Kpg}}zURkq#mk&WQ)z$<*Vl2xuj3GB*miw@>KRd4RmHkv2VYOA zOEA#R0j<<`#sy!#T(JN8>Ga*ZcO&-hz{Q;BTq^>wM6<9+`rO`bkkQDxeR~`Tjzp3N z03l@}_kBqT1gsK4qYv!L0c-GB#6krMow9!~*(W2Tf8@v!kVq=XP;ALdgO7H_yvji= zQJ2|0Vtiv2UKo7-Fp*K@>4Ex;lH7o$y%hc9{}-=Ll1%R1yZ8P4^HI>2Teogi-aDZE zp|Kz;>M)c&D!MwrY4sD$LuHXN3kTI!W6>lmmDff1`8`~@iD&P=eR3Fm6_Bl=ak?0n zM@X3fe#gzM+4jp}!je#(HM!2VV90$CV1lx2@Psw%y#)%XR~+%^Al1|{vurucF%49q zt`W()(i7B+4RXplPDzHieXmaS{i&&`RoEdYQK-TE*$y-g#n>mqb}LzU-?bS{J7q7` z;TdrAbp4u2J9qAZ!vHhx{{3yS<9LC{kXY>w5%U2`sXMDH0;BSF^~2+5HpFOFYY}2_)$N=uNpq4 zI@7v+RpM5B=~_;y1H==69pbJqGcn~?mXMT;U_wqs*#9O`zrSiX*(0Z|0XR*6Lh-@${b3F^sbP)FSF z@kc4CriubUe0fF18iEPP4u=}V_RHBz_}h-SAj~}8?Y_-jg_|bU@dJgKNI1#N^B`u4? zg7`+T#V{wDE1@KK$X(gP!(%eqUBPz9&>NjSx<18-Z~y*NXtq*=9#F*~<}m0Z!^7E+ z#nI~QGdV1X8Uci;1bvE(szM)*gmUSb(*ECI$)wqKDZx7tO)Gly<|4*YSJOmxUlJ8k zV)dgF>&-tsZOe6Ey=qlJWaMBf>b%cJf_Y3|`a zsu~tov0WJG3+BZ>sNQi>+}y_?aYPSPLHH>P6;XzsP&M39LP#v}$)K%EcZtHnAiKou z+_`fGp3hK^#aZ3U-ZnQkPxt9B#78Rx_0}6j*8H|)#poxxZyYu-ILQ644z9sI^pyHA z&K-1}26;l9!4e9fUnhSEm*7K#bDtiqD3~8hw&nE1P{)k0H3N<^@VxuSH&~_6i`Ky= zQ(DU=NJIio*RKK`qE?4N%hVxD!{JC88Q@}P-_Os_AEX)b^{ZJ>eb`7(b(}w(f*f1v zL7+mo9}Wb;P7QNx&?C_~s289+0gJ+EBAq-_5$Y{h#c1 zTPb5>W0Q|jzvb`)HfRK7$Gx`5`5Bf+B%;*F!{Ob(zgpWeFPZ@M`sW4%`52;S zPopkJ!WZ+l_4OsRy}qIUwB^NbO9Def6W}G*_d6Vyl{EqP1q%q`#vfx{n>TOS()+B4 zlZw@WIR}`%<4-c+Fkyf_?)7{--CIHe-p*kN0M$|x%=YSe_?NxtPzR`tp~rx(Fo*W> z^ZSm8cy9b>qaY`*vy+&$(5Mt#A^42>dd|LXdX3r(;LCVjh8~8+Y^&ttIsoE@#eAaN4{9~T3Qc#+$?J=7(?qiTpqeTO-1D=IbzJNV;608qU_ld zJIjl^%lyG2Ig1pv^!y|}&7w(*?3%}N{-dZ?W9akPIn?9T_#73|#sG!GC7o6M1nGrq z$Zycl!1S`PbUW0+oGyhj*NMPaWBiT=$WnZ*v8@wwebV3lWQfhuRki=;{|L54j7S2rdgqTD~bV>9d-#UKzb7rQf)a2_ScDP|Enn0Jf& zMEK)F1$)#|3>D=np>zFKwIMl)Ex=IWcUQ$zt5<6bi%asz5rayK+;W-7%2;vn8=;E%^`7y zlxx?AaDGwQe?+2K)YQ~e)j`{;IO-mVrU|7*`_Ozbl2|uwYOm(pw25ls#QW^*%f)2Y zRK+UuSN*&Bmqgf^l*Nr5NSJKeqm-H2)zJ|L8dzHN{{4G^jJrpV9t~@NMH3JhsMae@ zZa?N!@WZ!WTT2>q%uFAkfc^$fJJ~=9YuX($iDwIn8Fu*;J^;TVGdo-NQkkKxi-Fpi<#obGI&nVQ=7%P$}Q;IHG+t>sb^VHVjvJZYG*x;YMk+~)Pqj|1Cl z!N3akg#rJMBa&TC98R&r*G~@saY&{3J^uXp{B}A#uS5u=;sZHY^7&k5L>6(T^Jj+@ z)`D|DZX!Ph6)XlFaNog$&r6Ho;i^IOQKeGD8>mz&LLdE=GQa$y0(CtQqG87;3_JoP zu*wS1{p=>9u>n`xxOU3=q0AVbv+Z0;=2=-;A<(+D3)XM~D?j-4Mm(TcFEc9z3Uln} zsNLJbLV58NJ*dZ27^A8{*gCJMm^>YWgM()nD70FK(`_+V1!PU@lGG4PBAyHsmu%L5 ziI?@iML>dKF-pNi%#q-bcGDmT{nfSAff&v(sZHSx^)~0_z>(&C_`wETAQ{di3iDa! zr1Ha_0JR9pqQlF?W>0Y`2VE+86FxY2Fga`A-n}3Fb}JBQM)(A=nl^6Qq?V{91d&xO zquIqYdOg_Ci|JxFt#~jG6Rhv`HQOIz%PW(x4tJC5U>@rkc5Km{Vo6&njmC|x6Ixcd z23?PlTFh<%017*I(E-ip$6!Ka1LllHF3iSO?-;;5b&`j6i@*;ix-?Pk$YUT76mmP^ zh%VDTv4IF|)Ryz(&S^_CEI;p&10$^ZbF>nufa#x&430ux$}hOi4m-Jtthh6YArQr* zu{#k-qPrl)QIbF*oy+F*A(3d(Nk4!7T-;|xnrk$Z@Q1?&hyNlseIBn-MY8}ydw}^L zi^dIVBq3D*b=!uIOc`BlOyaAp-b$fGO1p2~zWqJNEo_hI&kmW;{v1>7dQ?I4a(?c` zt!8p#D3bESu;6>R1y`ehJ{l}&?1!}5F8ZnC)u>@BIi~c1H2c0Y1Y8hJ2EnIYa4}=M zg823IEZ&mODe1SOCY(7iH8UT9pFJ?cBpQ~qk4<682xGr=iA5JiqD$Mg ztxV2M-&w{4>ICA27RFjtgp@1vwkKHD1W<4bu04O~n<}cRewg+T5g8)vvJ!725yPN) zq|817lRhEw2=aUUpb!k?8pi2g*Tb87n~VE9DkAK}2{EZZ*e2C~PY==1K} z`QTJhcU^L%Nj2jEyqAoMoZ=-E%;@R)-kWkYePE-a?eCeaPnQQI4=PJqzCn z{caypXmL_PLTa$E4WxYp3XK)fASZ2e7z!_=55@XJ3icL2yf>dgfAjXOareguy&oT3 zfw%O;d889)NcY*ZXMP{TRFU+By5_|#$6qk}JVem=A%sD#%HWyiSupTQ2tGE?zSB}~ z?Y0;wj3q(*b+c0=r1>>@%=EkTLsm^{Q$bG7_V*Sc44zm%lIn@T@%c(_>AVO zQlC|@3Un-Llt%IIzc@<^j1 zOEj<~jIWYx7OJYMg6Cc>g*u=J>4C&&$Q&LW9aWvNY|hmLyCDybNJRF7&AKG}T3K01 zvF*T;Jts^*cKP6c+9obm#oQ)(B7EpUgI<*OpYR5-^4r5+*_z6Ks?r-;VDSA%P0B!A z7E=T>>H#$6uoU9${$2K6ErO6ASiLGDK~z_~K#(f7^G5bZkK|zBQ(JwI{s(y}hz3~3 zU`pmkeS(=QC0v-98fXY}m^2Km2Mu_^c05$vK7M$3m>q6V;qE$OH}G33uVw%DFgHv_ zUQVt8>g*U%IFN#sveZY&0;pKmCJ#$w^6gV8acLhue26S~Z*>F$;oaTcwgVZ+`5|R; z6kI)hdi2~*=&x3t&w_Wz{V$0+;+xC}Mn+3x!2I#ijKq+@+;jeF51df^`ca!9{Mh2M zGBr21T%ft1ckN1Z88boaRPe(etE4PRR0fd&t@To)cSu_P#0+$YG^@5(CA!jBnyZp7 zkvo)IrA0iYnO)1DQU+{Cx=!yNAUcT3hW{6~;(zUDN^)BKg=}_xHO*)gLY|S(9pomf zK&C1B5AaOozg^5kr?0?Y2qKae9UUD9R)9?s1eZ&pUtdKf3R}pS1mGLf`wfx+9BIqo zZ9(~AlX6MLM0$i6fiZ|O>bwg6i9&x%sxg%$;k%uLcOSF!=AaEStqa%r@!f z2+E6pD}LQof7EPX;}9BU2kI~sF)rlrvPvs27#Zc1b^^-;1QxyeYbjq`ueJ;`Ropk? z-oL0^O@9ABC4ICOk&C_BxHme+Tr6lT~uqjL5 zg;`L$!_Ghdw{7}=9asN!3}h@oc-hR#x*K8&L9s3!f$9`0h;kI~zb8oC2;haU7Bo*%j>N!2Rz$o3 zOg+tY!onQyJVo#9>f+nG_av%p)tWWAqg#KIPyT1mXt0Fl@Ac}{(SBwYl^S!(+Qj$f z%?i}Ne3KD^9*Brqo6MBx&p(8lj8Kp~qCJ!unCOhCs24+`{QSz5!6GX?*S%d<;F7(R zh4rdu$%3T3bl>yR_fG%JTIaQoeDdUp>I}G{7G?@j;!DWnAgYLhtA@s>-L&;Aj5U`H zU9%VKSnN>>!*2cPJcG{)cv^2pq&a`hFp{bb8coT!VhPkcd-kWc; z)g|dBKt&A5Zbna;opKsg{RR-5hm$HHNDqw%6lifvahY$k zV^L-)A}wb%$$>Ov=29(Nq?Lmfw;nv-x%`p=O11|hKZ3I4g+bxv^DJu3w^;dz0fq3$ zSfv7g9QZqD{HjZ_R$8kL2dXEAjBuxx8B#&R>`D$~bVE~Chqoq4+s>l=)A!EvU2uN7 zxt7Vo@FOw@Q$#@E*UlFavIMR*{hnKRN-l)NA_=|tp70j8 zl@q-r!}V(IC0>5uZYh^h6-ZgtIJ*GbMy~R{09_Cs!g}=;tu}zVjA3w&z0fIO6cfbf zGY-6N0-=WU$(l=hkdZnZnBmZ`MN-}5m>qRSO#g)BN{*$R~oQcHw70u`lt%(;-% z)ih{5G*_7(D7*Tj;673eQo748$zN7@xrpYBY3TwZ1tQh= zfC}iJQu)a1;1`iN>vA}70%v3rpgsCM1~RMd5%Un;G&%D3=YvSsUt2EzhEe@z$VP2a z6bEeXl^IqcS;XtMpM}-SYDpB6To~A2c7J*!vmqJhjEEXZZN%2mr3~iNcFB6HAU_e_ zfaDK~2*DT4B>JMvS*&Mq6DZc$yIsB)p$ULD6+EUx=wLwq>7dnTp%nKC3euXJne(d| zz2xmR;BF6zS&HKf)rd$WkpqfS1?S(c0?|5xkjSca>&^h+a?9ZiKovNZAkqTRFN(@0 zK0wHV6oNk>*iqhdAArTAAvF%v5r!EGo)wH0K$8F>+s;z!(Qv?Jan54DHiBt%;eviTG5XSUVh)%| zg!MoImq;rhNBod@0PzJXsmZjaAJEOFxsHFW!0|grsi`miBwEegL!2eZO@y_I72fpm zaoA2YCd{f@ltFS+j-x5e3V9_Z$Pp=F(|JRMJD{juCyEbjOB=JKaI!3+qB%pz32VUpodrT+_uaHrazG z{89}LNVrF6JQ^Q~sd!AbtvJyLrL8YjAKw%Z9_}u{zxJ@$xeoSAHvTvO3d17-(zFlY z8jf&`;>;AzF17jRC3w&P$D0d|sY*j3Fg#MXVVm>3$e?xot8thv-h8h}n1CONuMCB7pWC`K$DPnDoaqSfIF z$-(FFk!_fpDDv!9jfp15NKn93_ZbH`GcN?F+;RvLl4HY#w|B&n1O;T(I`m2=XDogc zs#WN&k0*^N)&;nvuva@S72m*q*Kzyyas@FDg!JWJ zz@{dbzC;qda2^1Mg~6FaPUFHtQ|CMLL@vcC)I#DlAE7E;D z7z>yHG43{!mE;^K)7_Ls$57aIJd6*(oVBPFI@bSDTqs~+{Q{qYO!NbBUcd_8Gh5EMi}q}u>DNbg9AqV(Q7b|WGPf=E~CUFjVgpj7G7 zMS7Rs;mzeaCpYhud+!4@GgUdhLUbVC`Ff-+1 z=VL#9<)zB}_Pm3tsWyQfi1?`WKf{M(tOmCi5D0+q#EgLINj8_RlZvZowF`-Y|+ z^D3oV+9szRg}f^E=zLG1{I*RilpVMAlzW>gg>o(Or+?$syeMyBFg~Tp*svzzUbN(4 z72)~r9Zkk3@@LzfRJ!d2>_+QeU0FbMTRf>zzDe+W)@!NIjFyWDYz0{_uR;wz$hN}F+ zFy{&CVdb$w3A(;By;Wh!gZ1%j92`|TJiaQKt(NyqvwL=xEEKpbcH=t91q-ECqR&)s z?bqP6>Z|l)bXgq9T=;UILr`Oj`*EX&BSr7t_21_x`1tYJ+3C)^42DJSwEIpNKPgzA zxtOkBeP(fXY}PtJ+Jkejqp)zaV9}Iz*Dk|}?hgxdU0&0@L1uAksk-})U-?~LUf#W} zz+tj?VQI3eKF?wDc;ci4UGzz2nb10}D#7sRFjsY2Hsz`FTa@JsU0sCcKHuCY?3@)X zd0OsA^L5ViJ^o@o1I_92-dE^d=Pt?kD<*WfE>_esom4Ay&G){ti$hBzs-!8+;L-TR z#Bs7tAF8T2QmUSd-Y>5X7w5vd$aOul{yt_^Ma^bArNJ}O!{;@M`$uJ4>z9PIh%vH7X z_GM1Xn{1aWgZX?SHtsmY!p<%|RCbOHHTYZe&S-Vz?yEDU02V>_tSI^$v`aw6;GJ{+t92oq%n5I$rPK=MsSz4x)*@n3;#2TLn!n!$E&wlk1S^4<;wu3u& z?TUK!>QuT}ht|NrfM;We{u=6U;Kp{Efd!8?%t027nja1 zcQLH8q{yhMJ~1tr@4Y-LSJ&6m>Of*|@YBoi8#it!<~!O5t;}_?hg}S+h$>-l(@52= zkY8Sy?!iUH9_$-vv+FoIZc(th;?!nQLKi3d?8ck7Z+R@c&*Mo2Ox(KPZ)`MB$saaM@_r7NC`hyzI2%|Va<`|bcF}|cy{jG84)EFG}z;> zZJXoEYy9chx^?Rg^YS)cE!?j{9u33)2Bak znYNxxb(}U#z4Bo@mvz5vs%aa)PWdXsfGRSmc7=vsP@ge?x8442^~}e!G<&tRwG}c= zH6H~B8#g3r8Yw4hnRsyO7JKYH#K+fkYLI}T2IjWw1LxP%4sM9E4S67$LXx^l> z|MKm-cY_dr0k=2r4!m0SIG{2_AlD;9v;QQvt+!)!C37B?uR>&XUPdRiDOl{ebnwYb zNY4v?tV-om1GN<>#uZ;X-%4G%5`kc32?z)f^Py6y77iwJUpvYA_J(^2A0KK+-jZ4XNGWwDy{>-7BA@8=d40uhgV&7|IU_4Vm}E4mjA z4dcA8oG|_*JItAqlH$QQ`D!gqR-SEd`ChAqDT9)UzI_s1%SKHpk+DkgkLQ3ipDANNtU@%u$s&5>KB{hQl$7-1<{y8wURdjZOVy&b?VzPq zdfs(JJ?;I#NmAAgK0dv!QC7yK6t8TvFzk%dE#?!zZQxT|z{SNSh5T?BjE(l- z)O)LrzLDJ*>J+@-GSj3dChy(Zo8G9!<#S+_^0xkZnLLY?Rd2Z*K0t5(c@0(m_9oi8 zaB&)w)-1UWm)Vf;jyR?Gx@TuMh>DBv&P~R*#iCW^jAXP(q!%xa=2k@>6%e?Hz}`ed zQ{8@Mun%P1cmQz)~8?M`D#cwC(p^v}Vh+|ir}N6Ff6HKaY)&Wvj5Sj7~J zwn}Pzf|HXI+b_SI?r>Yp_dLL7`C8|b@d;a@3ukp`m!RUfP{I^*ah&JYAO;XB&Gfg3|5A zw8UHXo8>**byWRi=rxUe$8^*}(xpTAtcqWL|NZwdE{ijw!iR17!a@K9W1VJ4rTyjD z6k|S5xVp}D-UXf$f}0h()iu7Dj|0EN{gk*5GNT_MfL8X zuRp%oQd)&GcpxUvxMx;(8yI;79a>JCMpsahG4Slf~2-L2Zxg{+}K zqa-C5dO%)ix=uko*ETW3v@P!F#f>!-RO z$rz%OHs{(U1Cb`8Qc00shiWaMs3*6Ke_JojS(G@Uq-@{5og2`SEKFZzNOP7&I6ZED z^JuPmy5T00OE(=W1;+BiadCMRDK^WNGMCj&n>XJV%Dj){^JEl1_mWG zfcJqbOQEaT`HU`Ab`?C0C)Wv3CuV0G8QhjSRYhEvvJ~w3qhm6j2Rjv3iAaly-D%D+ ziAV7;L?0kuUa&MFDgJ{5S`P+jcg=3L12y_I5`YCqkU=sCH^zydn|s` z5ZaS{I^Zx1%f}$If>_7tK?49p_Okch2d`$f(8>I2Jy0V$)R=6yZ$?<#pItM}tV1~Q zVrJ4O;XcYMMndgP+>jz0HcWt+CiCu zK%a=WQZP!CgiqN4QPJgK4^C-+p49}m)fEEV(=2-=@`1*3NZk)D$ia_2U0vZQOl&7k zWW4XW+&Vvu%^xgWT~3w`;qTEGvG{UJcC;^Do`I) zJCIAyli;Zb7%K1O4*iIMbDknM9TDV3$AJqf_T66lvd;beEYyOl2P)~)X`FKyX-JmTD< zS#8vVD4DP%H0v`6lg5Kyg3kfP%gV|mZF4mxA0P2%^e_8C)S_?Msq^;kx@;>wo54CN z5SZfW)7KA42P1HPE-7^9=te=vwH<3u)C+S_zL;sMG}fL^OHZE^9DKlKVIVqC$SEV# zZN(w;Mg-&h{Ja5rA3rt?F!ve0)d}t9yL(B4H;kW*Apv@EXw{tBZXE zoLm>L!a*9tLYJ2aw@&eo%UCYaQD$LbEktPp*3Z|^ug97y!M{jz-=eG70Mh}#`R8l= zcB2WUer!X)=XDsT&<7&=kkL2Nb0(Yi@|p(c*p9@tm=$WGj45F;R;H8P$Pg?Y7IMO* zxgWqlZgpij-K6CNzC{`tmC`bTn0gd=!X!?d!9~%JO$F^ZoEueU%RcV&LXK0iy1G$- z@!6L-BvGkFd0WkIvZ^FGzn*+U+Z!toaLJ`D*Nzu+>~3fG{_7j)j|mG4dooKM=i(xx z)CEkb1hE3*9w%VSaiPy&+m#bH$AcZ?L{o^uKoDirn50`3D)N}&}sn;^$SjEwI9 zWJ@?Z8|GysF;3E|wIpfgUWk?sxqu;As;AUXiVQ$vVq)HpULk19ruwVjDkiGekN;p% z=(=1^T3AZQMbqpe#aQ{GU_plzMzip)!$Fztj%9-L-H)PB^olz=I?_y98jM>q65pFl zOin()*gpUyea~}TPr!Eg7}A^9+uNJG57^=f0{zjGC;FJ`hUzlY+6&1PGn8D|KHe_% z~Zl>#V77cMiAI?VImjbXP zYf%B*q^F#uF);DHoW;V-h!J{IcTIuwyk%{I8rP1We)=sbe@1u=$*_ z(|jG^=eZ4A4}2_KndWeCa4_iq`0T1>FfiNzCZm0PmJy~|o$GA|Y9cRq-efaF3iwoN zW$?Kyq#>!IRkU4{Fm)WRb`g5{^5tMtYP6Pp=Q@8U`O?(uYd6FxL`#F4i^3?xja?mS z&%cPj9IfpN<+Bn4L00#KQzwhjhW6K1ipncQM-tLE*0e2$5RTZ0=Hq;oq&zn1g~bA> zI8IjZN=^^_V&8ye30Ki_5JfeVcXchmgMFj+R%hK-Rlc}f2;`=csRF&~YJngZ2Zv0! z*n=7XQ%2AY7OrSHJO26xGsgOWXb zYyW?|x@GTo-@JMA^jjK{n9D`(t;QQEr}!>;kZ%DcWmA2RZPl`~7qhV}xP|C4P`Um; zb8$^RuVq;3c~g;H!hp$0lKX)mh4MA8wN(QXzQ6vyn}r%#e0GOfPo8{Yz82S0@VO|F z^=uD?qPnRQg!lj9S9gF#N~wBVHJhO3ch`Yq`O@xK=T(?KxoujGIDzy2ynZ;IUA zxp!~uwQCem!1@@}i)KauY(N^8)nr+8v8i%WC|Q9H+^rE&QRvf`_6gV~)YR72XP8_> zNQ&;=yH|83jNc}1*fv+^LwUI(2oMaAUThkC_?$XEcOg_zZS97wbx%*<+(}25cvvBd zMNm)!>>Nvq+xhdqV_-N?2FhTnjH56H_(dhll0q?0pHR)>vl;ZwFzZN~o3oFe!HY?I zUiP`gCx8^Dxg#gg{&%)8i->4rdY9fzp}b)Cx*de!E*3!S*|TTTBX|L3&gq{)8z%4s z-_QPugO5)QB%3Fbpx{!;bCL{>fp(}I^1@n8q*^!Dx|jz4~>n19b7F-+D)PO z($MeE9-o|4xNyOvrluyixwfVT5Yq0o35dt1V}+|)Sehec+t(oUx?g(lr|#ptd<$hU z{?jMbbRh0jy((o?P>|J(A^8#3Q~h!kVImy{xqMc=F(~LnZeT7YkHP76CkU_v*j)pA z_^8cP<hxDI5_klwB%&Ny#yhmoY1yfINFDoPCIX*tV zOa_o|&tK6hw)%rmgm`SAefbl%ul{YMaa(qpjI0G3q2PC&#;AD<~-NfHsbdh#-?fAdk`G z9P5GSsMoOo)1s!esd_R5ViNERO3*hXB#}$6QlqnU4dqpRsjdfSD8D^g2hlmFH&RO9 zKijA&r1@RaPj)M=PQxi6W~0+w7Hx}4N(9nNN=l5tm5G|B8aEv;0@epV(g5NoeqnmZ zs4hls0X;LB#KF@8Uq4+XtjOOb4y$DlLQ@~a7uM$E;#gra-ed6o;V%myuI}fL9YX41 zp**KsTU%=cHezk+$LF>bod~KhXxr0OV=E0#fhZ40RG$W!=1>Vkc1R{P0bPVvz!7wF z!o!C@m29V{*Bb0YSS8WZZ{`JbeFSE#j717lQJH=vA7K2B$tcq@HaViFjh9 zq8OoMdagiMkX2U?47LBV5xWzI21Z(Uu@}QwtOC|UWv^GIRoBLHtI5W2y z8P2{zyOwe-KA@$u7oS)%SyN@^RGHn)@NHD!#Y_E1UjHo3KICSDmGbk zrUfetMs4<84+thh-})xtw&m21tVIKqXvs8FadysAaKg(8vv%j#CT4l#V)Y4X3efB7 zkairOc>mn4GT%y}L~~<2r4w>g#Cz`mq*()D#UpG2F#@<^?At!l_vxh$D_8*VX$nqw z!JFJ$qcFOXiSY3ewdawN{&C>JiJ&IDJXL$y>(9wG@n{`|(o@OQAv+8L0Hcxk{ar_E zkPPW=tF92kB#;ZrmoGmbwspEULzIkfZLp79|Klkl76kH|B@rPEMH1z$9(=*r-6tQHmzVp5F;XZjcIql<1sKhHv zwYRskhjTYIHL-{D?XCg2fPuiSs9+S87KlC?x2x*k(y0q=nNYr8zI-9+tG}`|&+k0< z0{KCt#&m;PG3bOfz`i!Gmx0<3gAz6{FpwU>i~IfiY!*TgHVN*{jb4LpRa(-vMYsIP zvo&ki6Nmwdku60AsGM7`GWh91vJXGi*Y>4>=%h;C6av79vVbLTXsjCSD9rGYJ-WXx^Owu+WqB>W`!|Hl$? z_)q@$1QApEk(DHzm1Jd~-nj7{m^Nv`L$0L}l-sH>rx@2wb z>7hoB3UplxdkfR;AW$ayO(|DP-@pHeMi;CVBL9Pd7k%;IZ=}#8> z+1a~SJmk;>%btrNfwx;N8pMLBBg4i*$LdNnATI$#nAHqB3i8VmittS;cz)g4(KgRy z)vx2@)c|NokhJt$vn&EnT3n9Z{R4p0R%EWM>_bf8OjM8sHE0KP$;k19DONOqtnW=b ztZGSILvAswi}5Bb2Nc{vaF^9Jk&P4 zu~@wGg-G1+oLrRyHhy*0N}x4w?#q3IfG^?snnrAa`uNfND52dmTZ%B?KV~G72B=t1 zTOAKHZg58u$tZjj-fTKkC^Fi+Z19WkCOkZdva;v+qHFSVD{LvF*aQ* z>#@zs?s2SNy*pN1EW2v$ty6|L1xXlBC z_Oe9vi%9KYR(f`39ks@;!zCKoot>WipY&Ta1KfrjQouUX8FkA7YXp_h0IA15qU}6> z8`Yw(p@6_YP!iycPrG}1HlUuuHIsyA&@j)1RvM4}HEhdCzgU7SYUs1gqRPl{ScV3H zqBKAmVQR1XJAAu)Z)l}M*8^TCC$2(p^)T^EgNm@UvI^OSo6t19yC0SkQ1AXT7=1uK z`r3=&ItpMKVXg{kK7018pP!#n_X+aEgc)A9dDr`r=qs~ln3jAPMBVPCu@^FZ4{A#RGa=tRajeMA} z%M|qHSjcHU5nB0`NZg_hK9f=P)cvKJes4* zgKZ_eH=VGvDw}eGEaDzAOXaV6yHMt`eAtab_j7P?5Pn88$GRHCTLhTo__RS%!5ICy zEfsvhe-hjinJz#|m%Je%yFMG9%EAgiKJ%y7FEh3Bpnw4cL%}4+gYkg#k3YWv9HBfL zX*S5SAnb)9Y;1!t?geriy!jEG^mE)7+bjn_JGcjbCDyI0ax|n7SuBR`hfxka&QFxI zAODIGb1n>ZQX%|G#ZOXVHsGTvSuSdZ7(CVCSRw)iky9EQ8waKVbswvLTV@n1#W+Rk zY^#-wS{`cPD{{xG%&@Sg7OR;mF9y0kjE4X=&;I zc$*w#!OeM&27rTaPPCb@3>%)lo4!G}-LXtk12h*;W?V{G8_yv5z z{n`hkcQsrztDop8W1!ddIb!VqVJrvlkqwYJwT39jXlH(ge_Poj zKt-?=<&sAT4NY2Fa;D0IWJUHz%5(gLU8KWujCYk7A$c8P5ehEYCdkh2*>rCWP(MUgf>`eSxL5vf(6X~PHab)u2(2DMMaW5uGf-1%LG6)Qj2H<`cR%#;c@7i^ z!Hxw}!G2!TBN<}=Lv>M7`(Ywbx_e^%1gz9<;8!TF4?n8 zu%^?HsR!WeavaSj)8Ny~i@h*h9@^(yc}RqGSEjAJ2slS1xXtHRYqkKLkeDKjRW2o{ zqBqUd!@@wI1DR{{<=sXi#-ST1LvRF}rgWtP{gbpxe@$jgj|>t^X^N03!61jaE*L;5 z*XZo*By`Ob#$&nqh{ zD0TkeUemC(WOyS(D*TuL+no*9m;Zq?^0OBy-VoRR2dP z>K>esi}E5|Joa#&0h^>`Y@7fSUSd`t^-f^PgA-9fd<|x9FU5^;Gh93`8ufbqzc4}T;YbcE@9I5jK`!fqI`Ax!hze|_rg?5qfjIl;p>H_+HFqG!Ddln|_{ zAQH}K?pPe?N#fsz{VJTaa~qdPPY2T#N6<UmD%WJ*5PU+i*f3cj@Q7;5_+pNk*SYoft1?)*imgYZi5Maz0OL}8U&jQhK#9Hny`oGCfLYiBXuKhDSBD9hQuH%#ed;}DVmy|Jo$r0Pn zG7etE2#jW(h0&J?7NHKkdxhZaQBS$@fi~#?LMPF!p_1FoOiCIyrM!0QaVC$=Ya#x^UT-dwf@dx!1gYr zumgej&ouS-Cc>*rR{kpl@`IF!s*RTMS4#vzx%3rkw-=p26lmVtckb|~-@S8(m5nX_ zy$KN^hTVu)RKWHEGSs^14>=HA3srB&&nx>jP0Bz!rd++%(c?zf=i0!O{ka?2=vkp_ zrOsQNm8O~CCO6WD6dQt|XlFn}Lt~ezNO`4n=!$zZ+bh4vXj2a#KHNMruA23C#E4!c z4DoC!h47n`(%pSRr`V$!RK$THoG2jjq!MV^&39NRJu=_`a9+qSAmD?L&9$OEI=;9n z9H*LT$t7E4*f)D!0@H1 zHBLz~1VF#F|J4ol6PK+##TnKyK)NT#c8C`^*KVuqaUst}FDNjgrS-**VE^b#+IMwloLivHw%>Bmu1$im5fgq^;ptFMV&eHH)|F!Y- zx3T`q_}F8NTr5(5w$}Xdb58@IydN>IuG-xt0%}@48 z`VeDzI${+XmVBmZ8?gn_sBCzp0251TB90qMAkh67i6a_z`+D3gQBm@7N|HWl>$PY| zn7$Wr;z<#L2UEy$XtvW>KCG=;Kn8wW8H9&B2PyV2;GD{)rKODoAyZgdUS0DRx8mXzN zL3AN5`n9>M8809Z^XA>_Q~ zxAUGzjcH=#0$ml#PJ3;Gc@-A051J5hmZNv_8~oJ++cPqfNKhX!Y|}dpb3(MZknxR#M_{YytGjgsc)p@Lrib z0t2eL5fn4RTVTI>x0_Oyc*cV$r?RJ-jgk$Z%#focQ_vY@;7XnS?RN(OQFQDq=f9dkuEk5hZCQuICh{Fs6G? z#EBp&P$IGzv35cO1#xp!EAP|z!n}XvU(Zk{YUV1#*_hKCXhh8S*gXCbJNTW!(cEEG zPKbf_Z2&thIf=?{-%gS6G++Cw2<%k4ZpA*KnNK=+9@Z2u(skUR$kiua+M3|d$9c$CHMk!cp~-SCc&fd1g20!P{U9bD!mz=&tcKFKvd-uttdWGjC#^zF z;r#jY_o^nfR~P(=Jm)f9uR=KEJ2VVQ#D^lhI#(iM4S`{RsGOjTiIW=clEoW~L@*@7 z6*lKXNr^Z-B<69egoPsFJX|~&8pVl8&Z2@M@rv@wA;Y-oGGhQ3Q2ulk7>B=yEf*}) zWSxQxUo%qx31sU+P#c_JWWsdv!L#YUxwX-A8qGWi=cau(ZM(w5L>w7rfTB zIn&GxK|ZL8lUK(e0@^mC`@F_^iXxnpW@KXGn>K(7Ku-SQd>_=IkLQm=SjX{H2!dve z0kBbgs&+#O$MHRKlCDJr>n*)#3`@L0Lqd~i)y7jFj#;jKs@msOY222h1edZb8E3=7 z#-`Lkos`YM7^R3G;#87}&e{*Nj{;0LjE6ygA#os!*iOxkj~})aantp%h#it&p8wR> zP{rVPm>M>z>9#!Wxtau(pYbpbq4}iYz*U=dt(5d~mG%a62Q(rL=x*eQH3(4Y5lr17 zuwx<0`co?A{ga_VMvxx*>e?D2MA#00dIlPJ2*K{JbP)$kIQFr#i&JmZ7A-8z>keAE zzizH$P-@UN7uU_UKrX_3F2msb^?I36F>zG*W)+JtEVeqLDYcDuc@iB4ZAuyu zVv^gvyvAHKYmICKXlNWC6GKK|%C3iRjGQwghdr88^}L`Sab&rGW+ONbWx@yt(iTcY zR-WOARZVOa${f$7H{X+EmE_$h56^K%efq}Y2*@_L6-ab4F*6(C3yIr;D8+5IEhgaM zUG7zlr&bBq!lao%Diwf-dY=7D5<7KzI7tJIHECv~CHk+27wW-@YJJfO9=pdJ`Z1W# zY6|+&VD4B!EvU$JkX`b*b?dJ=m{N9zqZJX>9p}s=095*MrUys5RN#t$VVVUeEV^&B zFf%a`0Z`)nd5F3SIEqJB7{|d%tM$+@(A5bCke8Ro{o5-_n^l$2?gK6_n~WX7rE%j7 zD?wwR+sWa?m6^=K!w@O~be-;1S>VIiid=H4LMcI24nW4T3Fj!o8gZlXznr~8&YNuB zbwn07BjM~ZN&a62ym(0L!nSb MrOzdu`R&I40;*>nEdT%j diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_model.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/accuracy_by_model.png deleted file mode 100644 index 8afa714d04b52320b661311940fb8a368b5f3e43..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16115 zcmdse2UL~Uy6rZ`9yKZoA|+8!5Cu_+bfW^&dy%RjNKrZ>9ivF3NazXziqe}>q=OMb z1d$HXMSAbOy}7*S-g`>UJ@<|G#(3ktjFGLv-v3_zT3?ypob!JmFDt%tJMDG~g|buP z(nUoI0Y&Djjl~Ufn;XV@Kix90vNkq1 zJHyV$e)_~uMm9DU*8ChCH~-@e?B-V2IewPi*o=#8v$%BGnnKxqmHfXUP9)ZtLJ__q zaq$=B+fPQ@9koCH18j^jZ?_6JU={}fhvG^{d4PGq%y&{^O6?c2B4(wlAi!|j3xa&Gd3 zML3S}h0III$OKgy*8RF^hi8LpU5w1s~X($FFWta_Ea$+T8E+d*cq&tjf11Az4s~I;kGwZug%&M!UgOS z9OuXUm3u<2+fNRs7UkxBpXKS}(~xFR5f~8AT`g?hAt)*;8h^F;zFES^P}1devDPf} zrhRgPU1N&u2~n$~B_$=S+}xS<;`_9?0tX@L^-Cd69OHYgt`g-R!_~ z-{WV`s&{mBBB#k@;gmet!otGsJ9gAf zbQRlveYY((*L9(5%j{sC_?pRZPt)io&BYEkPMay`4m2d!# zBX7LWvAJquq&eNw*H=wm{`ZxYmHKBFwnV>p@wrns|MsgJJYj_kUw6bhACn1vAsKXh zaG>USh>%>Qurrf{R0w-!mcjMwrp;*vw5P9K@>_l)Xs>ug%v&Do?in3TF7-KN`u5(o z^2$ovfslZJ=i&_Z7c)&7$Sv7c3C$0@XLLO=^$4e+(pl)Nl4Yj7xUj$?D5#ZMw6+74DX6^hQt0W%r_M4rB+Cq+V%09H* zHmkEuQvB;8n;quIk`Xf>s;cDhV@2GhXMXr=`)NZ2!iT0NwVkwF)n}>%pXXTjmm8+z z@EZm-o<~I~M@#NU5UObB+E^|eI-YUmkMFo|w$!iXaA;)2l~r+_Iwg;AYeWcnIx#l! zyxn;M2kKH+8!d&MU=zJ~(WvbApZT4aGU})u92{~rGEJiAJ6&}rCnq%u95Q2M!eZRq zD61>eRsJvPV&#S(3XE0b#JF!yTuIi-QA{_iR17#O6X@q>DLUKR+dEjeJXWV|*B-rF zc;Or}Dxb-CeQnj#)3f&D<1?AJMm2<%N79#u5>sv7)7uT;i?YUxtovngFp|11i_u4M z&<%-dY(M|}OHNKsc}0cNb1@&APUm@J+(3C%mEGBS)6{~PrP<-(wdL`m8{w|Yt(`Vs z-fXB07f`eA|I|?PT-+APKlJ9#M*mN~+23#4#J9-}zc8K~Y0h0=TUIG@S*?h8k7L31 zXLO1Me%eC6d-v{=(o+6e5y!bC{#zq)qFx7Bg@tt(ooD>;MX_o+_M*E5C)^4mImLt= zW~#rIcyT+-7!}}ZCUr4Bxwl4#Y(8q|-HJ!FERE%j&!oDpN-?@DJj=BkPw_h<-m>nj zPK3b*S}s0`(9h?b&`bvq(KmWb*%gB7$~ zIB#N-5Xy5i!MHAFu-jLpv4W#0$=mw}Y;*a~7oDdod579^(p^?()lO<<%i|1d=SEv+ zzj|@-hxbQ^=#bc}Pr7`jVEU7Q2J%ogQaJh6Xp3=EiuRn#jsqvEzx;7`KLbOGK}GQ3 zxa)eJ&h4&E$tW;oRdb&N#_RhdT~kmhv{x7@O-DDXe~uFM@@6_?+IT|Wn=@7}k_Y?5 zjT^Ea;JsG%qf)#gbGm6$iWh@v?F$Jpe+gOHLZir?hj?ft3{gIIjc(b^z~%pOWpR4+ z=Eg0%xcsH{^u98uK70D~%?d)Gulm^qG5^IccXyABx7#*p$2iOm9X)eqI*ps0&)n;o z8Jq6jUV{kNwMD6jJd4f)Cp8}Z`s;U&j@Y1h<#_*tJU1Q{-2D3PL1}Ai>k3Ns-0K%F zn2vn7(Uv{w-~U*|wb1Wc;o6evGGEd1m;i1dStIi_D$MCpn!0>@>pGOHWfyGVY5$J` z<(j8_^j(o^RxbIrkaheo8@vJhDV{V z$XfQ~>@4>&xd^UOs#SB1z|_K}@=K@9i&p2Dm-nv;n73uuTskdaH zdiwiYTUC=a4459HywK`${Ce@o2SFrW zNYV!y{ZHWn*FQbs(@#*2|2nZKG*f*c2DPqsqD #GK>S}#@bECv#=+&BcBzzy1J-^8qseXUP$a~cb;!&*66;w zTX>*8`0?ZT?Ci6fwrr`WsE`Ueu97BqvAw`C>&3J+6OUP2oKAs5JSr=-&>9jTUehe6 zO?7dpi&0;?_cu5PSFS8?{z=$S9J6NF4KjFW|Cr`+=Mh zxMDK9_U+rL_U(KAT-JIWWC09Z94LB)#dv9SSi+(`Kj4iTO$$cK5ty-n69p_ zOoZTNnNTi;-tr*Zg^8;|6K^)otunoz5-RmIC8ItdgIn*MkOSTYD(hG4Bv*5vgF#e%44Xa+LY-EX?v zqE;uAh2jK9-UqO+W!>tcV;Ec)t`lk8bm`t9b#*ipXUH3Iaes&(Uu$*0+V$aBWIejy>>pbgjZkY6 zbP96oj3g|7pyj$$CA^$uSQ)C)L-+7JR$i;7>&&e0U3~p9vKj5SS$sckCmMfaWn-&& z*0&=y5m0ApW=0w4fMaR#-u?TzHYeS=f#GVOIj=6w@;l7L39rs5EgSi~INa|`)yU<% zWE0MBD`{RZ8*F85P4GwT^a_qnA&lpyXHxImFP2@My0>n<7G9aSAoJv`0uWkyR)1d~ ze}itK>Pe3ebMEXb(mRI9ma#9=ICT|Ol&m#+Dyr8piu+O?Y%6&ghoVW^VUwBi|O@>+h|8ENPAJ=W<=NOLboq{9CP&FrG*H^MpXa&<(C$p*~k*$f`$;NTeTELxAUl~BZxg-aB8ttIb|wm9#I4#{q> zoxM57`t5Fc4b>DaW2VQbk~wAbzKkMfPEJnJz6?U%oTYSpR_{=;s*n#&(qTN6Nt#)o zPH4XVC^%Ji#BhGBy_81Z%F0T5y020O=(V!vLqH``v~mlh)0MB?-IaiAqwlfhc``uI z9}k)5Uu{g$HZ6TfZ)%p^b^q`&Dz$jUlK9v8`GVF!-C^io(~X+ukC6nA-ZMCU( zXFN5Gc$dc2We`D?Px!1q;;NO{BN?C~X<6Ba#g@Kwe0nG+05x`FZFolrS2mn|BJwI&VL=r}YNUG?3VVVL{g?@=mPOvMg)B;U6n` zJ8xjjFpQB6H$f+@eD`3VOtfT>be38B$SM$^%<|l*kFT$<{a|#+M{=v=4Om#XaNT&( zdIz9mahuEXsNvemB7sYWC7$~Y5gai{-#M3(aQPeFoG6$)&Py|vMgo>!@5L)Tzn{|^ ztod=Ox1zExg=XNMZj0LAkHuVu`) zzS8#~_y$RmE+Hv-noTw76dLE5pMU;Y%%9I@;P--YDUO0`*w2p`BP)UG%}L@11O{Fc z^JN%46>mS*mLulBYCej+;<|eE>d~`jv&*)X$O%r~1LO`a{{Gq<8vU)jFJCYSInbD! zk4p)Q*5}!o6WE871O?HD>l$LnIm*IPrktSsp*~f&b7rWaqH8cR(siv_aIq&K20bW7 zF!t3({DGGzl$KQE3~S zjK^pg7b3sj55u9Bk~oTwIjQ;jQCu9067URJ&Y2sZPo)C|crn zj$xP;AH92b@r=cnbFmgm^72>CpFjU3;_Y`RBZeOtOV=sts)mL!_~FdEeR2aqYC5c$ z(@7Xvv5-jNrGXF#~9F(6ud!~_Lq~JF^t1cle?X9S!#F2YWPvUZxSvV#x zLI4T0MQmhxV6joX4I>?yI9IzF*Q-$)W|7qiDAUX?5vZ7hm`5C@J{(iQRE{CPpln-R zQ^G6#_a?}R7G%Qewr8H~7%sEgZ-pV>Y^Qo8PF_yCSP{a}ly5)HzO>*XG5Gn_a}08H z!3>n`lNnT3EXiE^LLvZXbOi5xq5P{4lMfB&dBkoG7X1vf;NBbT6Z1vu+JtJr437)9 zYs)?;tEy@OfJ;Oz_+XeGe>vTd40BkrT0;NoVt-@f{T7C~)2|MKO^^87-ymUUOmglb?>kx18Oire;>vr3YW z@U-=|pY9t3#h`lWQkm#76B855Pd~jW#IUBT(oUf~b+XpX=0lr7=h_b>Tg^a2 z6Nm8AzO@eaAz7zD1FOlqRy5iDo=rfYwUA!eX(3?<^mlJqNZyW@%{O+X`f$qL6utcN z${ir<6jXGrv`AD5AcT~di`bGo**AM)z(oK$+R#RS`1sL=wle2;PO%3M9t?FB73CKc zNTo~TeSc)1Y(tS?JAGOaeC4iDynq`m>?tI)EoLz;n}O=u6MHBW<#T2QC-*Zlrh>~& z1ffBwDm4AT;NTccp%VT56)$3AO>3e=cP&?j@yaD@W~pAhcu%05Ec%gVu?#vSU^|cl zlfkuXEQb%DPgG4#6Ilfz)|JsG13Zdtivb#Lpl9AlakKh_el*4&b)vVY2RnDJxw(02 ze%{WGk7c7zsdLZRUP_>UJ!^Zr_IKsKv2}KutuG=S-TW_RZ=g_u&;Nh%`gP9YjDG14 z?)UGPgVgl%^TV`J$Gi-rZUVY?`?hURBou zab5s&Zor(^02B@yNeT`2gjFPHfx*~Fms(U&ssjgNK7N!&3rz85+k$fns-)3xvz=;8 z)=UJq7s_WHQ*aU-E(Txu^XGq6T1S76vm5W=xA@|Q-fS=0|M9~Ig>&cJ($dp=$H!eu zGM|4n;urxu6+1 zt|LnmKylFu-hPRMkH&;4O@PAv9}XS{yJV-0^|Oe#<$n6p6jZ!)={F|UBS$WvQqHY` zaa6IgN{hJtbzA0*WD2)z7MndxC`Mj6%%4>J{aSMSF+ zJmvb)3>g^x|I=%>P}@hO5ik#U4wY-yUVuumsVRgs6b~k)ZmxK@NZ{2H+`FJPUmG2^ z|D9|^VBiad=VHdlP)P|11x$wZ$iC?4Xl<(^pfi>uM~VwcfH14>qfp{LYwh9%P#*%2 z>2R8s3CBAUWQG4(KbrPmCJGd&p1I1py6J==NQI4ER@u!Ue$4VldB_-!U(yOYWv>C` z(Ng2y$-Dd=)Nu$syhw$nk zAPF~?H(-xkr26s4E1kexUaqgOX>W7BZYzP*MVP@^(BT7MW*gC5xD7wjKp3&5J$1$H z^y$-{wsilQw4mu7di=!eqeORL3O6qGJ~-4-khiv2<*GBY(EZ+54SmE}z-hr^l4CDg z)>$<=Ga@7IIU+{rP;yuJ$n(|Zc~gw!l5%oQ7)AS~#&q*;-2@ZG?^Aot(2(D!PfHS9 zNd_vNSK)Yf_YqfDR}UZ{^^l4#GBf55#)J{R6vUkg`W+a;-;c^gBvn_-gR4*l@zDs8 zfWw~7+}xay6d3lN3Gwk=uBoXhNjb*MEDD5cJ$o9-B5I$LTMW5}8}vTm7;t!^bH<-v zJ_9jq*Q{Ui{(YkB`kF~|T69!Y6c^@A#1ShyyKL>ErsUw@ATX^TlQFsik!8v8;J6DG z`@%F{-*}8>+Bal?0P(WoYttG!ApbK~vy<-ZMA|Co!Jwd^Xw+Jg_Bd>id_h3}OcLO5 zcA0h%ID@dq?u0}}Mw09L3NKy4B>I;{H)C!Hq94_R>I`} z-FM#whKEBmu+-~=+85YZe3GyaJl zfU_(L>>gpDcv!i(>f?o&n0nGg!iXN;f~dq4(m?p5`a;NHz{`mjJyDUe74eCV0!P!H zGQXCWSMLH(1tBhHhnt2$!YN~DmwGOSt=6+%G>-oeaBLW32jPVB^FNL2fG*y!aq}rs zzOutBx}a=mN=QgJbh%S!;5$IP;-JmaL<({CUu5w;=Lxk55l=Gh04|w$OSG%6%H~|qih^Zk}S9f`SJgdkKa*cl?hJ`QNvjzJBY*^YN;BYYbn za3=Ovge2lu(F#W>lyk4}U7%PJz;0afCmJkTlNdIIh}Y2j$MAXz41 zE12S~ddovO^~K_;u$30fTA|k6gM;zx+PMSRk7(sx_(+4L{rO2kaf9m!6g!DiRjcf^ zn1+;Qf?!*kDBcZ({_M#UI+AH`!nGpgoghA6@+bQT9oR9CQw;4IXTq#TkL~qBM4))F z`0(ROlJ{`;2%BgHG~06zEcn`-=9;gEI2BCUi+==e4LL>yLQN6kvYmE@f7v!d3-Y*`K#`a>{3LU9qGWanT|Y36n9>f2pZxEgA(l z$i^8LX1C;P`{Cf}i1?&JOZ`?L8>-3TAlR?gk_5;y3_E=@}-JW;~ zMT~85Wn6;j_x>o%I?*vPxi(2qAi-Zs?mH?iFDdDXdJ>zWop=1?$x;7h;&{L^NUkUL zqO^FSMHBvA^Ysl&lXd9oTt?MDg*@c`FPiO6Mqw>PkczT$pj1CD(b#X5jZ|Q@3P*Qn zF?#=yzC>77M<)Wi9s^m!WA4$TM~91*o-I!In}V$mJ!`4=>Qg{iSmIxzn)HuS!}P(K z!8+Md3^b7GIp!>7pPYS$T-vmGGk?aG&6|M*^hL+gaf?L10X+2!QJd^Lf9yX8Na{Qk zH2KMFIx}*^_wIe~@lyaw9{$p9`JSGg`bv5;iNPl=+pB7_%yos*^723FdxMqCfN5blSHQIg9TR35HFh#IUSaob<1%fO?sS^= z0T&ld#tmU-jg6assD_K?>2JUNrpNdmbr<7}q^#^MBRN@FRd8-fXt}P=7y}3u#OCs3o7tmwoOpBPu?mas1bg{ zig9Qpji}}^n2#YF{}QtV7XsD(_HCn;g~?p-_7%=Au?dZ8BdvU3f1*|CNSE>yvO;)pd~<)5Zbflr$?~K(1F)JQLd#%nGzgAb zNSAT&m=u+5n#b+5MSt!p#mtEldfAw&n@Yr9EQ6NM>IF3MSZZA-1<(ZSt1H;IbH*0- zdqgCNF3gQF0bT17@q^$7W@ANGC@qN@+1UL))W&oS-=i3#D|Y9+mVys(j=zp?pf zkdKD~7MT~U~S6r7SUnfPfySs{BB(Z(APsQT_sm} z+v^twb=DMOcYC3=VgyaC?NhIXHRe%c$YlO6U<8CsFG3L=asQ!16-&gUII88MVFJ<{lKt;Yu-e5# z+ic0VPai9o*TFx7Zr{O|q@W|;zJ0r_xO;uG6R85g;z35=I^2-wfyq`_9zc-Bf>Do- zjg|68WF&z?3NiZk90sHR9glc~0o<9w-Qdve!%OF~ zN06UCwBf-9id!oV29~DhfaC(1%gf8dKqSlx7{1=@M@N8!CWqQ*ZPJM~!{w)SSd>I0 zYe;b=w2;}J06BDuLVtMh;Frq0_F=Ej!82m;&awE=;(eY3!JsRu5P}40Zrk_qAaoub zaCgM%Hw)!6PwS2M!825}w8x>2V;X=FN``tg4~zww6)<1>p6lSWmgkfNY{TE&p%V=pm z>2O)TNq^Sj9Mm2KnAwz3BNN%*7^d^(^OT;=)HcFN@WZ)KxH2V0S}mEw0j>O;arM-N zh1<)+F1AP(N&cd-N|E(NIibmS`_vo`lJL;l&I?>eW_TQCCW3W!P|mfM*nx<> zhJk@SbxYjm&*}yS2F5@Z=!5d0dQqgJArwm}Vv8^jjjZn2zMTt@gI@5qJQ?#zhcGCo z0;+23wMLOQJwC_Ji(IOP_bU1R&O;_BdP7aAkr2XET4saJHs<8az5KNEiguE<^(ae_ ztr<#7G~Ra{asVNxp{{w2WH~KbH6`HGS$=`q2w0Ila-;<%XRNRGunYd|4IfT-!(Nwj z{$nTkpY@Y}qJ=C@^%9FBa4w-Kh&2H;B6~4f&^i15MhmFJf6hnkZ(av4ql8+HB6OTk zv*2**0U#0s0|QT|06GM!@#uS7HhqBhkqkGWQQybM-&oGsftm~3wrwln7lZzSMv!zvM1i{_?Z%Oi}LdpJNnsp2$Gw^H1!D&Z58h{c2 z|LyVtIyxs6TPO%qm81JSVQeop)%RA?R0w9*PCy3A0>GoQj)^Y&FpB5^gwKtlI;v@( zK?D`$x7mJ07FNLCWgG}|WSucr|9O0Xmft3>!+BntxbH$nhQHz4$ix(C;M*U}4&^|+ zRs}hT=1boa5D>7Rp5D1;m3$w>&OwYait}UAObGagZ9M4y@Cy?G1|&^98eZk&GvZ)g zJ-xi3{{<^Tlf+gt4LZF%2=+_Ic~&LUxUN)qsJp*E7I&AWji$^Z(nf3cuSlGa5kh2g zt<(1viXo1T4q>RjC)Hm*O{%~pdVkk!%CX$?HyIZ9<8T`iCUEorthD&=toF}ShZpP@ z8Sl^sdI3$r+6HMqL#aP+VVVv9MQxnIbFQmzzlSv_4tn)aOJ*Xfn&(I)20DJ*AtNxn zx$uXSX9G3L@5$zFgI_=eBt*l@tHo6C{9;mOMTbChJ(1=LbM_371R0FCZ{5n$P0~~J z%WI22%Y|3dJ$MY@*s=Fl5i^awuPvl0oT*dnty^zqHLs7OPJGKCh!n6IRHWJ9He?9_*)AFw{WrMe zpG{M(qjF?i3}pK61j_%E_xx!6=-~Tj;@D)(_LPok@TPT(N z2YyU(RTy82&+6aMGbHga`sTt!$A8BwtH+qxc`V5Vzf!7DQqbc#p_~ zR{tT{o3rkuIrXMP1d@WsE_y|fexR9?VHF|*zJ4j1KQo3_tI}laf7x%~OC`AN+jCq? z!>Nfbt4pE6&bbUu6Td(?bh83172kJE)+TrV&Mz%r8zCNowhe7{bwO@EW!i&({wwJ) zd)4F2hA)zK+lw{$LLe4NBThG4XzQ3)6a^fn+ z7lN$TW@1I+!PmBAnMV>r{MVfpI}f)};;J!G#sUO`k= zsIoeF%nB$eC_FqF=`%#Y0Fty_`v8Sk|I<#l?G?~_VQtBUDUhSt9FwSl@*cM-x7cWW zmTVV>tn#15zU|I8nL>Sg7WoyR+6gQ7uOe{4XuvpsQ04olf7X3uPPL_C<$zTZ*YQUXZM;`JB*5Y;=wuvhTi4;y;P@OKzGOp{( zoy3S+Ug5RDy>E@aai8Q7qXCqW)?rG*ekCMwMS zg8_2F3MeYy?za0i)xX`Xi;D{pD8Url)VLfGe|RDd~b7>7Bd^iu`@`XEbiixOHiKOj%w1QJ&NW`Oo}?ltoara`zg00FJHpM}58PTZc- zcFzK_OU&oR!$_r^_`Q_gZMYJ$m6*&yIuU7_gmU*offxgEB;JV;H`kz19@{3%+&mN6P(H^P!n2O!7jcakEiC@jR!N0b`)qr zAMvX+O#fEp&sO0*8SqQPdte6 t>5sp*Jw?97{X~<4e)WIkFPmDYM7_Lo=c5Wgp>+PePg^k-gtM6WJfof{r~@3Uzy*WbLFn0ywv6mEE^~k z%4X>c=aeaw)j|}?s;=+X;T;oi4rl!5r2Y9z_9|A!_D%-2Mie;%d+Tdf_SZ}=|Kw<7 zYiDX@$;W+?oA>BXSMBYs?Sy%FEdKrhZYx_8p8fJ`TJa@6SYObzqfi(O$iJ)NCE`pe zl(Pq<&z)Ai9y;3TrlV@KyfQKS{c4U|w>Fr4AD?hU{a(y3#%9mrA3u@F?=mkdi1~f& z@NjP7+h=XQ$(yWNX#Digsjk@-DU@S(S!%d(Uy(m=wWLsPCj9t+@T$mF`1OPL z^GioJY~JkP$lhGx#p-^ajQ> z;2RPU5FN;=(U#3B=AydOJEqD_BZ>EQ^7jb|3ARp7^bEuDP+gg9KF5(uYuB#T)L;2r z%8;m$7T$HecXM3xZx!eBPG_gzn6s5`2-bH`P!OMc^|m?kC{NK8wZL&SvA5iRxYZ=0 zc|GfmNRQ>oh>Dt;meJhq)U_LT>P){2-XP?=Td1X+)gyU%X+i&NU!X*PHt(jh85tQa z?|)stgNfMNovu8h>1mBi9m3KODb}%l&Y-M?=MMAJLh|A9Q zeDF-8g2YMMcX98ETyrlQ8aAitOT@&+nl;5M74-Uv@Y{a8fWIc{m$*OSHM-|G-lh9= zz`@B$BbZyaDczuyL##h=adEP?prdxtqO(A4VP>@PfWN41rb;cvlxz&93il~Q&htK0S7@3(9 z3LJ*9)_T^>b0*SK_zZvNZkI@{jri2_qn;bx1F83 z)lUQ(ai40JF2zSl`ByYItNrl9502$-Q}t2^53LgSB}R!-efPQZHa0e2W@j~0w6dFu zT%BU#;KIA-gA0-dS7N{THZW3;PB<6w0TKn!a+E7bM5`I%`VI)&$sA{W$>)Vc2b_S@ChKFsqI<*X@g{k0K1 zcoM6Ur54K$DONXCOG`_^?n(ynucwM!rm4Zf!DjT-lvG`sS$mF!V0obXYZ4@NPtU5A zxVvRvt$#Vs+~L&iwqe5tWi0EC9Xp~I#!E`7DotsQqsrH>=ZT7nHf5Qrh|Rn+)GDw~ z=p4|R*DEb8Rg06~>+Ir^s1PA8*xl3ZUYw$x8$_eg&YU@uW!9X?E1@DGAu&DHsqMX! zi^EPLQRwC7#HYI90%S7KeAqA#n2-at)Q=ECjpu&@+ut0J|ulI4OAA3kIm zRqm{gkkIey>YDHIVm&C0Pc@PpEc4!F-jQeHvM`#9TmL!RtXbyd$&(st`btU<)@|7n zA1>xPR4vrq6nfI`pmev}Y}m`v(roLVGh~Tf7AMN5-?Bw~nrXM9<3eU)?#RS~=@#v5 zr*XaNFcA&yoNG#F&fMCtaif~5YBaWT^PUrmxPN6En~duGT}LlP9+rD@_4E5bG?FLB zJyvMEyu4ZVgUVdG1*&ZFp><(ST?sxuo1~TP?Xwn_mZrZ{^0e=9>Up`AORwOLCsv2GPiY z2oF7#IQizt)3=*SJUoh)=ZA+{GvoHTO(|KlWeq+z3=Ge;Z^z}H2wHHUuFie^dV%UV z^6n{uWN4%{Q!DH0gRUEsa-Fu7T=)F^xF@M?UnhFiGL7X@kZZdIuPGwSDn*^J=Hg%9 zubKW-UuxMc)O{n$eQDmb^|?`7fkP5XS+-@TaHrdBXN&j&DZhY_kPzL?Wo2b9)6E(V zvz<;!r*E&LMr-K1sXch`;LDdU4V-C(&k*Zx%gd$Q=BF|?o}ltydGllTwN~X}dRngM zZzv2T7oJFX6nD9fi-}EE@m0KiTaQpO+x~3Jo)eK>MXtkf5$@l<=)Ct%)9zw7moIPG zMr$UUR1^l67sfS^0OY2~Qy`#JaxFXUKfgb1UV>I)hN#-`!w>bZeRofLtjv12e7MV& z<+d=xE72}8UZ_HI8gD^4tnBbNtD+>PBMF(QKDY=ox*)!aPgG3I*Rr#qb!s>ASP3iD z`~HvH!F$ZdI`Taa9yoNUkLrd6Hn_k`UK`y%zS(+~nsK>ogL(U-u`Q+_p!g9 zh3c~~MjL;geNIYhH#NDp78`p1%Ed6@=Gg8h0_M1*q_ko)3*>58Jqj)Dc;&nk``+37 zf$+>h*GjHjCaRBIh@fV>`_i;VltHQI>-+o9a*Om^>E>Dgkv91#H>~*Ebe5yl)c2j+FyWQK5Z7ThW8n~I2m9-a7lf#^w zm-jAju0O=L-I7+_rasRsvHZ1HBY6e2^!422fUmH_5ED<~=Udrs@6rtH#271pBs6*OnrKK z|NebhziiYuQoEcw9W?H=Tpur*ZOiWK>tjEBSaQ{>RkjWeq>rU+JY6zB#3{Xteq^L0 zU!~NGp=4yKmU9N6(&7rX|C2I z=-?7C$nzYoOX;UZ+jHrAojR0KCw2TLR_FnXIcVjL5zAL5UA}(q!DX`(M$I-c9edI5 zaN)x5XnW6|JxjJoO-(gxeI9ex=_j$p&!sNN-E8+I7l)B|O4KRTWE7;cEwi>LbHgnF zFMS_Bl71-I8!%nOE<4!wLeiI|b+q!$o4Vh(9{4yu&a-RRuI6M-l}?&d@Knk8 z7oU$usbtsC(8w<+sAz0d!EQ&%$%#0Psn&*zrDRG{Rih*~Z)O$mFfN?!a#h&Hty2e- zVKMaO^Jm4`vCfp>`wXn&$=liFb-vK_f*NZ1MMP*-OYy}zX&q~={m@a>S>2`{j1|r5 zAimTod`>1z)L9w3l~l)qY~kYJ;R(sX?VgvE96}bl zjAWEI6}!1MC#XmrI&^5LA;#N%dCo+7mz5M3H+S*nPAb3imn(9iCzMdc)hGL_r_(%^ zWRWj{B_$sDLr0S$N=iy*#=EWDMh6DsK7G1MS_z+VRUHAYO--u!>(}RoLRaffWt-Dm z9vqP3ZXd@ZChF31rg>{(%cB@`v$eYUu@G};VEi1 zy0fIiP*eVhE7S4IzvWo8$CmGNy_nnOl2lk&h>&hV9ECi=O>+T5(0V<()wSrT^IFDo z0%lF=er&q)wB-c>(S{ZA#_pE2c0YAQR%jL9$lp|2=ec=zMPXD=poU&O?(gG=56|$< z5V}|J-ZW7+nH9s?g|aXsuJ3|IO!glf^yVq91BlNC3?p&Yw!E-5Ok7;t*2X6PvRp%% z1QQ7{J@+{yhrx#P`T6~he8hBC_cHKP3r zxG!E0af``hn3$NrvR-&xzT$q|TC+Rsd4QiAQX*sqc|45Zn#Wj^k|{u~(2)RUdZrX5 zIasz!zX^S68t6p(&Ye3fLYASO&U!9`hwub3Ra)+>qV6mEzD!O|*NBW?ZAsRAa@^1} zyC^-qF4bLMUq7U5ImUfOmn+2_7&o|RbMe%IZNj*Vri}Ug=*ZTpUn`a4srS6Srz<$q z;?extG7P;s1u-EY=dU7ssNBmEjeA8L-^nelsH>_56a#q3a&reiOn8OAN&TigF9%CZ}Y^wFtUDRI0*`h!p$!{NAcR0`R8?Mj$BT1NUQAp z=CAKE4BIkI{KpC>*k?9p(D-2BxqMvWR_GeAg%rtHt#T(g{g%gsa z-6i^H5FB=h_c66F+LZDzP_jv*Ue~y0f=%!1L1bV(&^ig8J6>K%XooFJxM4MbZlH=k zWQ*4e3FTI{m}OkL!sMwdCmYP2y`X200j@pQDtB%yI3VC%=g~S~I~1=W+ITVTif%)h z$87)*{cqK1?KHYl(a6G?>C&priP2ozIiE;D>3?i8vP_XxzIru&CjhBYWsoVFYKw^P zSYGdzB@{>vz+w4k0`BxwT>+2f8)dIwlS|M`86=VcY}onuRCr1j zpNr3Z+>ZI6q$-;Mw@1Zn=Q2B%f}be+i_JW;ES@tC$=*e}oq1YO?8lE+$OyY{-#)YM z;-a$=e>4l*e*7JEA-2CdgiI4{G^e!cJNa-=`Rj z7~hwslRFaJ=egTb5v7_;4RYb4t!?`-PGd6WMEAx6k!Fx>T+I?9>s!(c^n>8w+`s=c zYNXclD|f5ifsBf#8f01cjD2cCPewgG>y?Pi6?Xm-jXIwOJYv@K@=kC{b7iHhQ-_V$ z@`S%fJg_q6vxb^55iBxqGvHwCc-3RRX)H|=X6+{@?P4)=#ULj&>C*vvmgK`7`E~+! z{TGd{T#2{pE(U{8T)eXEx-=lMqJ+5SH>ugv9Itd%L3HA`!}6h5mr^6dT-7s8YKuSi zwHLY0R?z9Q1#ZFCdrw-h^I0?BUbmTGNp9C&^+5C{c?AV5sD}hCsjCMaN5LxX?(UA) z%#6kMZ8rp%5Oi7rSYUV^z^>whjblcZ^0Nq-U6~s1?mmIxKU+aw-tV~mC&kk8^779! zGlG(SdrWVDn=tC)!e|-BYc6vAb7Vcu$tnuCT5@TA+7~dB`SIh&0E{SHj&5F?cN>45 zpJ&0iOP1XSyTwwn_{F<3&#B8fuBW%xa1oP!-yBnllPpSf+apou=k@6ZXG=RfJ3qCg zgiL^n339$@VDPkOY|L^ByQ?$sHN{_I`5hDbe9yqZx2N=F5#aUQ#C#cbxjfZsaIcW5 zsp;!Zr*4buE-ox6CIW=T2=U-6&AVPMd*w>BQm&=$S?Y96aAhagbUV#@Q*XnH9AP01E;sNqM)Oi0Hg>5qmKgv!Op2+ zB2Fwz3$sGtDSQCwSx}$^=4Z#PlJ!fv+6I&(G7OkrPxO}ix{epo;}IR?&Z7`1uXpL+ zF!*lWIu^s%J{f}{Po9|Bk9D+KJ2?pfnE940Pp7>`*)_btTN5s3-Z<+k)O8HF*5JCc zGc#Bs$zOhnB-`41n?XE%Fc;Ht@9W)G{A3PMPtjto4CWC6b7AO|t12yhcPkUqRi|A$ ztNysm6zqKT@J)^bJUj}&{`#xS#|Kg)`$RP!bh$5BkQscucp()TreQkpSj;sCb59iV z>R%f>ns@=@ zf5TWOtr_E5(U%I&zT%Me^zxOZ?v(_`(KfJ*>LB4VD-(VsQ!=Lez>pA55IcZJX46B> zEGKQR#DvcRQ4#1`SrogenISlaP_B;*Ew@uB?^h0VfQMC?D^1B~XJ@a!yPZuHJKC~? zLQ#}s@_Y(%ECJ(-hK}lr*G3`G700SCi&7}dSD!zBp87T9hDV;Y%KR&Oa`I6ctLS$W zikiyl)4#4>y}Dvr(a{Iju3ukST6z}fLq<-n=t?(gPQ~Sn zcNR!TnjhmD%FV>8gr37EPile1x^XOceRm)`dw6(QO)<)GtUh-di zmp-g>l0u133}vz0wtc&R!OQQ+FtC6Be!_EL`LnEhUh*!uy1G`Pz!tcnKoUZXACO!& zJSsh15&X8`ZY&H>+?^2c%rP-B?7X}Uv0}Wu%C0jb>~tpJvPNB}&X^Z3UdYZ!U$}4| z)TP75@4wHBEm%c4)!d(DZ8y}!&bVt=Wo>P3Y-eq)0t&l^s5l#&6zH&eKn|XE9&vF! zFtsgQoSef?hOqH!AxW7V(=QzrOxs+VZq8X*5y+{QrYE+Uh5ys@rH;IBQWY<5M4&gzy0^bSHa-1VOmy_S zthngtBR~KA;dwc7NTu6-A?5JJM~aWXedU{k;27XtezT?{7$T%m4oPF><>O=AzWu=9 z2baeqBiB$6D*E#PjuH^}L|%}DcoMn6&Mfb)pI-|ww}8`_4q)pMety*uA*-COrGSSI z4QH!D78h?=bmS?KjpCCbPa0YVmZ9~$jsjL19I?8)dohNTP~9!5wDMmV_Oe?`O${|5wG_eOXZZ6skd(5?gNicb|^MhFUJF&)3_!y z0ay`OItkbVGuWI%;!X-h)giS;)Y;bdIRT!KY6$r}@u3XEVZ$G{*JJw6*QI7ajwUr2a-kCWE=0u&lGkXi3f?V)ed5r|7zV&hYlWmH{A_}5rmI8 zzzq=TKw^!iqT)|hR#trj1C^DP!-?in(_8&edt4p_7u zIB*~vLIAJ7Gl=%41Qj+y-J7*#C1_=v#UMzA!K4;{bS8YTqT(-^sCARJZAK6<6-!pb5r+2x+_OPkb$=6@^*`R%vwsvq;ky&NVF_pou- z4(+2mL+G9c@3I3q*x4Td^`b}Ew_D^UK$9G6$! zD<6_?S{M1Nj*iZVu_+>J6y-lfFIiT@ z=t~5f&}l&mA)fjZZ|JQc(?eu&^Y8)8jmLOp%H#jH;@-Xzl@&j5_e? zfz&=Fph5DrJ*8geU4_myE*o)6_iTpjEJJ@C=vbWi9J>2O;q{toR;ldkHRukfjDGso z2>1UFdbM%Mla`TDwzf`(8na{P&e*1=CfS)K0Boa-OCvepi*(EHp^B>YTj!mGWOye` z^j|cg$>-&J#cy1KXwotYV3wTz1S>X;UjuO;L-@Wna=2IIDE z&%mE@LaBh>RXAdU_@DxURfNF*b zQ>22bqhk&s-8wpSI^7pW!Ci%+Q9`DBYwPR#PH4^e^gu+LL?=ICFv=S3=JisFqwB2IFR{z3i#d>10Yc@`CX>2qE7^v z@9{3XJlS?^6D>y>tWFEjSpXL~L`8L^rKN|uOG=hOw(8j~e*JnKRY*HGIXU@Di8R31 zsE$M493~L~(}sg2Q}5or`$XKWpmkZ6UAgnG6$vzAYHW3Q3 zJE>b>Uk|F+?(GtQHFEtqde}|plY0nKXa=cx{@gi237}X7666YTi2cNgOW+`yKa_cA z1IkzW{c}rw)-fF&7e|uRuNc|#i*l;pzH}&LM2iD+_Yl0}V>eWzSTs*vn`ERt_F4uL zBhdmeV?0B8mc4p)q2RhSyNHN(l6ngDAd%`$pFU0Y1~qC zpx=$Qty!}s?Q7-_Xz78~J=Tqw)b^gRev#@1fm58!#|7=MWzpeE^y&-L|x?o=jY0$?K~9iIFGJ<-DG=yOoEjH|I1*1}$;zTju6 z4vMO=|$b0b^SrJ2e6V^oS3KzbTK9cyd8uJW9Oe;c;0BH&? zAUrsukKsv@daUzWb1XuLGKwjQBtDW#*kvkyQ>kLP-(JorNqoxI?yF@2!WJz=PXETl z9h8_Xn_fvHBclY@*)c~2E+Cn_E?ipQZ8~t8ZKsZ|?(}4>L?sA!fU9N&aWCV}oifxX{HD(^wbc}iqpvb*Fyrpktglzg!MBj*I%}D2(XNi+l%8eUI5jsA zVcr-k2X0)}sOfuTjO7-ukXNtHg@NOz^1IGlBY%JZPDF@skL9@yJU`;wwr;r-f^5NE z4#tL9QV$Xy8krcV%!D5{TVJjA1>__w4mb^UWngRy{dLdUGK~2el9G}}O}Js}YCII& zAJK&F25BILs>pHVNIKGt68aV&hW;0Lkj$Suz02AxnLMG0fd1r6MZu%2M;=1VQ(|_& zUsS*MDwV@n#u>GR1Q3>e4&9HNw&Eu;mN#@6pGXZvjDU2_GG7>N+ucHXlZh?Xoz?y8 zAI88ejV&qKMD8D2n(a>WVL^;nRQEjIjao&^fv{N#W(sSorlqBIX9xDfT3}CjetrNU zw;{GL3hW{4YjeH7pYW^z<%&q7aQAuB&Kr}}jOu?b$%&2*2)gi7@NO!NEdn10yykp| z@y~DDA|&OvC&O76f(c|RMZg$Fb&QTLnr%;XfDBNV+>1MAe2GQD!(w7ZRRbON5GEqrW;FB5+5ZvO zR&G*=A#D>mC=YgwgYLX-+qRFRqZXZHrq?}ynbRH7ztj6X*xO8V%zhdmi}TA*It&FM z#?it|8pj|xfwPVr#X z;G_vT$SxqDhV6te!ZpAAKZ4$agWxykQSJl7!&5?2zFY_~`EdU^;yY{-gJQKbp!A8Ec~g z0a366oqdX!f7GsaCqx;@9r;gxZd5siB?sYB`P6#*f7UB&7$cm~VLxl{gq$wLKuUWM zkA5oDBn~uZ1Wz6O1%%+w&&yMl3}cLqj64NXO+{_3iOmG-xjG*w46Zk2Y^Mg7(IM$~ z4|tSR7Lh4j5vmJD`l1~`emuor&j7U$t_(_w16|iiZSEIbwJNP6I0hb)L_$%0eDk0! z-QYIV10LxDxNSI7&`T*&fv)|r=!J;0o!)ntSy0`L0!2DLLL`RktO=xO-}rbMC`b+X z16&?6bl_{PgKWKaLk(IQRPbm3xjBsn!HKbk6dokT6mp&8(4n%Y5&F15{H84uuN0`T ztULw&8H7kg1ZrX=TeW6gz}yuB)Z^j7C*gJ~sBDCX;oy*AQwVPwYEI%zgmLj^lC2t_ zkdf)T^rzqyILrtDNK!T+b9*GmKMxW%DB0*bDLVPegj&SV3eU#18DOoVm-ooM1L_z< z%h1ZVjRD!329r~`;wHe%9bTo?M2tXilNm2i3G>(s0h{a9q~Sxw)@07c)9CQwW27g( z+{p9hm`_DK3dj2=CvIp00qGLC4Ss`XAPW<~77H#2S+uDdl%Z9es6gi>{L4PkJYF-r zk!Oq+%E+m%`e)g`<@FVBs-QneQU$NQBj&Teg9R+G#{N6_vCd{M6HC1t6X%H_kI+_!8us;cR;(nzkQ`)u zqH)Ml!Tv;IN|i@`m`+GcO5hsXqd5RjB;Q}C=c0KKO?di3aTFu^MX zSB;nSU6K2`2_*nFinO|KsI5g!>=h4c4BSboz_!mbGY?5aO~L)qII_M4FP>D;vLjyH zZ9X1|OGZIK1DwaV%HuMW5XVt?ibOaDALvMN;6DcSSLTDOd2UIaJNMZhP5-?;b^+HL z1`0?wpM}Q>(k$n*!z9GW#MP0PQM@#xO`8E^PeAlCD*vtHicZy?C4Nt$bHN}OcEaY2 zc}sFVx+tcI=!*EQh|Ug_Z;yrcZbFK{f^~MFHiDQto`U%O=Cg!kHe$UBkZ8k}EkP(V zFJHb4c=E*MZ87onA)NX!Qv*vT0QR{o&kgJqXCpMCSA^XGX@ZFewFA#uBJe4sz(Anv z(%yq?Y@C9E>SW+)%TC(Kqn8{Xe`K%db>auh9M%W+BMy-Q05Vnlnb08&VpI2FgN=f@ z8>fAAA@19qzzYYC0vlC$M1FKKGe3-CSk&8qA{H`54-n6%CBd9R`6eJcX6@6EI zo!)wg5`R!CVr+b5n|0WUFSE1`J|U0{JQ^in6@!zA7qFqBfi$d;nKy4PjKce-V~C;0 z9O3jt$g1nm0!C=9h_VB^i`Li+XB4DlBs7Ev;$^wBI6sR}Q`ONq3im;XBVM5Q>&?8| zZJBqPln&G&f=a>DPEg&pZ`%ebEz~>}U_q6kZ;KEZ5f@mP0w6E|mz$$3!u6TozZ%%F z+Pw7GNSix}IkH)Rb$^ph(XlXf_4f8+bTDz#RaA_;dGn@uDlosQTwk#^B3v5S)CG1} zk`81HM_AEOHl5V?PW}_U3YIzv(3RMc`5lH-tHZ?>sBTcb;4m`!lf?R@4n4=I(_>{x zMq0X_aB``-B80EP8W0lbESsa9oyw(4kD&8jYg18H-m!J7(wNPr(sWG~RJrAMsYB}4 zgCF`1dY7T+qVw9S8NMnjGXO1bL>tOvWiaKEG4^7g( zUlW!T8oK9>r>7Z4stubq1so)NE+1cGTM1$o-U*`eVpeH}crAv((B;!puRY=@9LM5p zB?gQeqNT|`H6?5Ek%C7oifL(9Ms$rH!X+`(=C7*q2EAmXT486olS?a_sLzzl2`u@;~tMsfm`VJg51(92|e3aJ;RLTM(grr^DvAxa8PY1@jH53p<#ZO&j;Y zs17X^^#)!oD5dhS{REF7;{Od}H@>X?=Q0pv?n7n}Vl;yz(EY7AZV0MCjwSR!x+;Pz zzAdyk@*OmxLcvX?sVPSRqne=|6Z-{72HofSH*OTdf0F_Qs6~KIkO8-bNmw2Pez=Y@ zLSo)lR%V7&&lSP6Aou9lX;@zYw>lkKi$MY5XhTWA3qjlOvmWmj5)y*LhaE^LR_xHB z^INxW4HmUT&1J!mu?!zbl847mD%z|zDB}mw{}mp`{`fneePUWf`zWvE7>WzQFjWd1(`+H(dZT?(u z&q<-1&J&>_5`JbAp~#S%6{r3i#Kiv&f%3opogb#GA42{B_2PW}`Vp}1IG++c5(17J zZcQ@h5J(M;Bu)P*8oHLXS)R1Ky!y3k$)x$>XaYV#9Agz#RYx<4+(HfrJmxbQ!fa4n z(+qiS=mq4)e?`;ccN7)x>s{|z>x*3l>hBYl!k0jiz6K`#7d`onM3|J3l_d(foGV^1 zo2nQL{5z2_K0O1w)(ma#FQ`^V$MAngc5c{Bx|LXJ4fl(`MgOQ)9*qX(qUhyo6hgI- zMq^7B{;NO107}z*QdlTTYXxm_^NX!kp3OgXo1W-I#GJ!GQ>8NIQi2=d-HrbI*_u$h zC?ogo-hE(iQ*@EA!zo%uV*Q!5;tvlJriip?lL)uN$PchgEzDNP0oCKENdK~N*iSrl zEAi-;10kKfA_A|{%JOUpBNG!ji=tQCK;rgWNpjpuC!#nqA}}HXMo%7`>KXp&ugFFD zl{s{VoA=KB4+`nO4Cc`ixU^A*-a-|JhC_!XIaLL&gh=84G?-U_@`=fl1o+_K;5>{j zsc#>$nS(W^=i6dPRMXs4dMwo83tDZGVY#1Hp7qmj(iS*!;!)FGAqQPxCk!ISEOYIP z7k!BM0P_xUsvbE)L~c}B03L@?tJP|He>Laorm4s|g02Y&3W{asGk%vPB`IkJglM#H z5}tBSqN1nZa0_O$$He6h)v|Hy5s@f>9rXtW24oAq?(gw`8vU8=n@{0$_ZJDj29Mz{ zRc}vlb%Y8#$iT*dd&o4Mot#v`l=Qe9zWbn7;zdU&2f&(n&FLbZ1#T8Y`o153wBFpj zhLX5?e6A*yLT}Hv8vtA5)+&xHjrluvOl>Mjo>2z20Y#Hp%nh3d$^pbN*Vak!ImqlC zAdg_-;1*eP*pSH+s}~0|q)tX!<4d`Jlov!)D!RK%s+M8y29M1($pkzI8Mi$Pgw^=a z#`TJQ=;}Dqx4odjM~*$NlM5L5KZx_sstf`2M(0Z~?F03xJRY{dRL1JBVzt)a+q)5T zZZ^)?x#7IW6JdwMzw7gdV(O!$v=_+HG)(JwkVc{>5Ps{=>q; zT5$>yEueY=Q8Y136U+dNN{t5e3o+^XZvA?5pg;2c#HNek=p4w3XT-5>QnZ$$_B(v0 z0B%N3>?|)>y;UOYP=^%jLzgF^{7@M0!p%atc;{cdmJOD0t)VF&ciQ`XN zZI$)TaJw~;TEwq@`SNA#tnuvaJg^+h%*^u1TW<2<9Hgo_KmqEM5?a1l(|Z{a3TWT> ze)su7HjJol-|eQTFVZQevISxKNP#J<)rb;*ChzFlNG(oo?iLs`r@*okAfE_dUG*{; zaW=JeFEsPCH=N3zAxBz(w^Tq(9eQhd`uI(Wea7UHkVxqGwKPZ!!n7R}weQTyy8sVl zV#}t%V0K92BGRQKxytyW9{cR&=2V7SGJRV_F2Iyiax#7#Fd(<7Nq zWvdfd$P9UYO5r=Z^w^jOfQ)ExzYmEZ} zViyzB&Fypyx#8wUJXn|?O@i0I_~8;+OsD|9*sU0xKhsBLXofYrxzO283}-lq*9vym z_aoa@>6lbu8C(7pw=i7QKsoV4&$?6D{eQ(Ryn4m|xg2F#F=ZOfj#ya_n!fz)2NFTV zLJEgH7UJF8=2h0~$ZoZO-XX-I2Hji8BUL+Bo(#{h)DWu)8VsAp_wQZ)^j!hZKulb- zeoFvtTu!J{^==>~{vH-eaTG(*SwsfRvzIe;a;$7@CL9tsoKpMa)@4d_4GJL{6)}EX zMe)O^t`4UKY(oSz!*GN{Zf785+B+stLT^6aO=)f;=3USTNg$B~M`2xQG4vOU&C25c z78~SxDQ=kN@kKf23|qFeV73cW+)6RQO~$tn3kBT%OE{R;l7mu)imd>vk$f_C*;MF4`^-#}kT5Yee{RK~hcyI(P z@baS$^p0QyR`smA$-W2709rL17tc*uPgqAjB9&*dRY-JT z-Pc|N1hJU~xHH^)3kG5mhQq|6k1B@a6Krl@d%SA68E$U>cBd`S?TCCT)M@_^4SAZ} zIMHPhdF0G0h6xecISQSb54(~L#tugnWeS+=;TPO5jbX6~Zh|mP77Cj|_2!#qTyO!c z-?E38dYkj@Oo$5v#-v8-Az5e=cHaW@TV7(|o}I$fVfNXQd0TgQELRv?E@~mf;a7N|r zHxp5yB40x1B5`Wg5PcVvJWd(M<4naT5JFpg4RHntQvSOvxd?Gxu#&Xcd2sy1Nl{l< zXU?+zR5OY9r#P%gEG}+vr)bd}QwZ}!j(rds6YLkP$aZqQt}ZUb`1|CwwT;aK==(al zoCEtu(a69+Bm`uv(qTqo-66*yen#|@LzP2aMKq!oBaz)c|FNE&mBz?-NE&i5IYP&O z`PKL2lqj5sE`u?_$-UpLT`Nsp$FT1xgAr3sE?5dnQvjK?ox48!=fe?&0A@||6JM3Hj{0jCEn8;wH~7pSBnLT?(*rt3BSx@JAhfXB!or|uh*z79SS z?(@=^Ey;;nVqS${@M%cm0yIp>i_bxno~2H};ToJTar}7w{VO+CmS&0bwm#QNKV%6& zR}vQXX$MV15ObB}KwL^1<}gDDc`Vq>mK=zUyJ2?7EH1Y0xdnsGUPBCyaC5&#Ioc#G z3FcQ9q=+RytbD#WWdv=Zx815l06649SeQj)8a(Hc=xPQ`p5zcC?giRN;L;zIywtC1 z))cz$#EQu^8IMV@0v*O4h%TH_9{-$&YpD3Ltc3hN16ZVabmYT_pK<5}AWhdX6w?lw zQi!?VvSi6ox}n%@0p}(_8W}a_;(UtnC^@-gSul9m%Y41!1lh=osKcj69ifoGK)B6$T6VuabU^s|@4W}hqjrA=pQ;FXfJ;MC^cQ`X37XhQZ7CryM zuCuZjQex;!l{_V%!d=IdK#t%H%hk*@s7xH(+yp!TN1A5hn1zOm1!a#ENl0>h14Vdx zun}N(>H#7p%VAiZ9GM0s+x$>L+;o%%shm_J;$|ktiOu1IC%mu&ne2!l3Retq0R^g0 z61NSIUlIO~3gj#S>KFXONk|cL&d>v={D_l<5NptR>pfN$^vMC*Hk7xR=I`DQk&+(2 z+p16rahE{E6g(v9Jvc=kNnAld)|Wt$v=k%PRA8SYN7&iIub-(En+=7?ZrHifVOI+- z;yfS^AyH$R3o#ggkRy!&XKOPY+HrO#+S}P_U&n28=N<#NYqV5-9!<%Qm2_Q(mYd3jT|)%dFeV8l_-SBOy-a&vqEW*CJiJqWKNVJ^AMU1h0IfB zo-$^9uWxIg^X~IG@80|D{l|IV_pHxa>sc$0=lT7<-|v0j*L_{r{kw8p^$_DKu2mEY zh4HYmqB?~_El;6PmDADUPq>(rit)17`Jj%ohW#05H{)~Wlw-!u4z~8rwpJ!QuIA^Q ztnAN<3-1-)Dad2#?CjtqB_d+??{5&cKW8Drt4h;|4_WD;e9DPJVKOHFP$kI4TTv*- z)DJ7}*F67dsMYqq}0`06Z2F!O>3YjbAWZh>1J5i z)y5YV>rcn4CyrLEx8L_=#&dc&Ff?@Cr^hmL$Gf+m(9~S(*Lne{69WC)YxoF&&$lp>hC+>y=io8Eb-R#@ii6}7AlYTaW2jG z$eqyAI@5oye*--hC$s$m%?gKaZ&)V!KCf6>oG!X`_ijK(hmpp8^XlOB5=N$`ru{or z`UldAl(e+iT!)$h&$i~ituL>rh|bS%Q4Bp{H{I&i-dX+RfKHK5LE_OwO}n{`%o{fb zzjEp^`TX?Q`R}3HD*CDsrJOt-mWK}?rua>_c``*vxs+aFT<_3b!C2tq?(WVZ=d&-r zpx}vCa_9H&f&2YwJLi9mt49iGKQnrFcZX$5j#x@c%JCu}?}_2o^}dV0wo#0#9?Kk0 z)cpFwBE)u3+~SjboI3B8EeEZyUA=lSB4VS@(t=w+U|>mG8@EXQC^dygMrPx}?Bs#6 zON>uyEURByY2?rLC+D0Wi#BK_pJ(9n%RDD9uk78sRl!`lOW)kud_q_EDn7Tkw3K?~ z%9S3?3lpE!@P#w818I!h+4Stf@~K*;71udw+co0Ujd27h!}GO11F6{^Jv}Rem1x-6 z*_SO}-uYgnXqTkx$*WhdQYeofKc;O6fB4o{=#iAmso}zu6ky3|w6(Pr4;?a{n;P3_cv$RrJGi+_IJ^ma@X3veS2|oTH#f!xI(H{SydI2 z(~l1&L(SQY+%Hz7J9aAt1Oyb<*R#30y7o4{*tU9;tl)sgn7yQc0DXby^d&4O_D0P7 z-P%(H6o=uKy$oEt=%%Ko9L}999qlX|Yql?X>m!UyjIC0QQ$PIvp}2*a>B$~Rt_e7+8(kBNli&|FmNJ>k`9KN#^%O2AF z+_*`Jlp3XTwKhqsz(p?d3^5KD4*Xt{+3Us_jjvP@;)=EWoNDT@J zVQ=?cD)^f3{^G$dtJ_~QZSFHG1hwY5(Nj#Ohg&noUc7j*oRV%`$De5TCE-MAZLK

Ot)>^`TIA^*$snu4_pemeR~aW0Q17!G&>IVnr!>_dv&|cj~XG)wjh#E_Kgj+ z7Zr_8O|eNusMaq|HQUo;6NC-&w+AqH)oB~9!w#5qmIjiGH9@xlMG-6XTB6-+bHQa^yn*$`jT_M}1N9~!!*+LF-sHoq@l=K7%$YMLQ-h7_MHeqp zpFkMbMD3>`_k1fXjQzh-8FPQ)nn6=+*pCeSf5h8h?a`RKp2X;7WA z-zp|{LQ+cVD!$U-?*02my{YJMw>9aehsJP~SH67FtjV;|Ha0fqU0j^APc(FqRZG

%f5n2eD5{gW~pW`<|#LzJ4HP`ufI3 z>DM?0ipt7C&oeXE;g0UiV&#q`)LD0qjEKn2j~*E74w1WxQ@$$n$QGpA&69bqLt&Op zFB2Uc9a$p1rykBM&W)6RdwVA^GBR?ohfhG@C_+Y=!dx`}^V-XneD}`@-ImH$R*BEE zve@j47P8uM-P*miy>Rfm+LsoyNmA3vvVAx@KF->j>vCuIbdJM*d_N02`?lt(siEf5 z_wSD~aEQuV>*cwoA}OKNDcCk<9JFmO)X$&kx>nlKa+;h=^`cWR&a9-AR#qO$AO9rH zCnKY8i#?~~ecfQ_D|F`5qaB66eyqQB>lW`G+pjzd2Mz>=hll4lbY9$5aclFT0Hqif zyQY^c>-X6HK6%ImY=wn|)P8S1ecChYq@QUWO5vHE9N3m+-!5D@(?wr{wA$C+TF~aZ z`(y21%PyS4<1^E_A~Cg#eX;lgO3msvDcv0fpjCg|vW;g!4r=X)aD z0*=J3L7C0)k@1)~-$A*1^Cr)S4xrecQG}IyyQ*2q2AmpUM7{ot>S$ zml!ztGEdI0U+hm#GxG*&5DJs97cN*Fd{MQGa_io`r`aOYi>Fl{i0;>%S}LrQxp8TJ zx@ZY&gAAWEs694*4_iukv-pt_hh&%S*W*)i%Mue4sr|kddgn2BsfxPdpnA_ug$5^o z?o&ioIf!E(mvxo03x$E-W=+?ps@6=K29m&(RaEXatzhC7w6(J{f1(`!`3i#L9rbY+ zIqeA5ZTb26iYQX%yo(A?mRm1s`X;0_mAT!R>SGJ1;sl z+5=f%(Oz;ZlkoTBEIdZYQ$9;ZAuO?j!*NC6_Ag`*r8n|l= zi(dNa)9Uy4gbo-PMLFvZ{rDLE^_A1M+369c_3PKS6ng9FdQ1-ohfCOZA&!<)2HOhO zr|M=ZU?I&%#zh+h2fwD%P*G7?Hav?MEbyEfv>M%Ii34n8Z!c2d>gIL`NF+{!t8QVR z+u+v=$igLv=cz*MJjL(&_3JCzwsm~!@tm8oyS`zs`kq3inEl9yZ9a*b{1ZdX+V%BM zkF9S^47+}v;cIhqjCPtqhL1?md`8V=>xiV*Ryny%6oQlrT;kOtD+JQ=sj&iN`Jxyko#lUAzka=fQ+W~4cr>AtXjX~KyZv&yvm{0#*j zlchM|Pu;G5sUGpOX?WHd;nubvs1A9Sik_YxHK!8^AR`PLGKw5qhMH(CE-qenllOpG z_sMA|8TX%m{2U)2dnb?vfOfcfZe}J5n@<~JS1$JP{d@78$J`7e7oV=U7aAHE=`*{M zX2t3NW@_zcr>Q6eWFR>!oI97{Z1*9Mp1m5Uh`%z=p_3ZbVnW&GWqSGre}Dg!R9q;o z=S@{r<#kT+>RcC_uT4z=ryHvFjbC3Yj3mSmq3YJ2OJ#)e%pq}hgX-gbS1N+no0eZ) z$Fa+jdEdT$NiClM^NakG|(`&r`sPPt59^I>ou*{#eeJyE)rF zZEB7yxhpJt^+qX<)4n6Sbx@G2Eq7(?0d7Po(eTjM)LiD~=2no-kl9+baKqG`zrI~> z0O4Ex%E`iV7uSafX?8W4$+fn&wl~AWuTjy^zNuhjV!9k3zYRMd2v~Ny(2Emiw=DMX z9iy{nS9dIhv`6jt=TH4!8+Q~nFw|guZrf0(wn53K2x-I+1Hp8wSFc9l>21x+PI=nT z-Q4f?H;4ep*w9`i7Uvl16bfI{?E0)Jkw3W2Zbt3Y{GtFT8s7 z%iD%}cz&lha`I@SMP%ngi^u!-@2A?T^1z{e$WMByVXkDwCL;iS4k_pDS$55r9z5Xa zW0$4iRG55vwD*Cy#p+Lyav~pTClO4!^7Fe`h^o=yrT4BbE`gbunG->Qo~^CfQb}{}xw*N~$HMnK zkg$JTGHGC7pj43dWQ@uP(GoJKHRdtdZ@jd);4yURhVQY&uW2YOZ=2lEPR-AIp(q>* zo224dVEWw3zgR#{8$zH2k%cCNKX4+i-D0e({QQrH77X?0>?L1KPs=+xin$_ppu8LR ze@RsL1eh>b04%)InsB1C%~Y?oDwEGhYr1DSHo|(a$UTXRhFOL=LiI9AFG0;Zug=A_ z%_0|e#DvHyARiidjU5cQeA#rUDO1w@XKdR>zICN#WnxZ0R0f+e>8s?H@=IjbF|CyL zm`Jd_WQNmH_pG4n{*%zq(3FMoD!KawSy@?1mX_NK7iVoggiEBoGnC~DrtPi_sf>^G zxr$ozL_g1!K>6g8Aw@-U7ybPS{_Ay?DlnXxK8Ku$X86X2y%*JFf4%N)_Na=KW5VGm zcu0$DM#du9v#Z{J$6m#z_g{#AhA9-Dxu!6_ne|D*N5t%YG4!( zCcSyItPVfF9c~o!87pTO^Ii1%lB`|TF?c&2dj+&|P5IcdgLk$b>lhrIuO9JCdo1Hw ziX`B>w2)n*AODhL=!iOPZQyZ{^ET`X;7;?S*X8 zhO6`fYhn(WmRwvu^YgvPt=qRR-n_~D(y~TDcL_DS^zGY~l_5CKm-i%T^3(B7SnIwz zx5Zg1B1H(_g2J5Phx78x&CS9^t^viS40*@U=hqe!Q`5L^r>VhHgU#7hM#Zw;v%4r! zC~_4ef+w$11#v55-$-^vN*ZX*WuTzs%%Dpp9Xz|C7t7e#nDWu12awT6(|i}`Q}wbd zLIt!((XR9E*~5lYCaH?vSmNEhnMR~=hA-D;fEINDHF+aC zUZO z#Wj(_Anb8*VUiBfkFNwT=6 zh6OEJ*}G5y$HA{+A0F-1SS5Ys$`x`&BqQceHQ9_#PL?AOkaq*o1)BmvQ2dZ3{@AuH zMlDkIfwY@&OKWTT*;Xn47aFJv9AYNR415<{7&b^UBkz}ATuwjn>^(lS1e;Y8ca)tF zD?rNo-QD+|J9lohj7g5r7D{sVNm7v(US9iZKYy-C)Yzu-NRkD;a3%h6{PBSkc+sRW z!+csMSW8{q#6j!J$B&Gt(153hPM_YO8Yvr$+*5-4@2QHcTwIuoYm_y;&b52hCf|86 zrRy9Qz_5_D#U7VUxBD{5`{<^k?*hTn?z;(CLR3Pc$i6uB1~6f?V|T^OWP_oYW0zuL z0m2EJspRau>&(Y66rQ6gdfBT1fyE?MudxX+uUT^u-RHL-KTLl8{GKrh*hZ?NQZQE_ za+Yy*)cyp+B3a?nIewcD-RLL{q(^gV6`Cm}DH(;IlbYDk-@gjInVFdx&yF1{5w#hG zj*gCip3(;AJD0yMFGu`v0bSjBCY~gjg=nptJ^k^+htc7@5fe~bz3oMEi(?_a9oWGK zGM=t+@z;G9Cbbd0YO{mAEMh~uziR3oR8*wP)Ybpqx(kV(6#y|hDoUP?SMluGXB^^Z zuA>Su-@AA3cb~+$Q}L0UuR+s~pWok`)W#l;MvF<>>@R7Ch9JET&Hw6&RNsVVk(QC` zkJG152gJn0V8;W1Lj6keih+9>>s{w(91-A5#z1h7A|lFha1W}g-k|Vc1>+jrMmtKP zu~#5hu3??7BqVI_tMANn8EQJ2lamv)UgE4~kLZ!B*RGL@LE+I#(Rq^XgMj1PzFiqC z20bcyO^&0fnEOw&MxnuV+4XJS4E)X0J2ajKMMd#+{`j#KeN6y}0`$b|fWqnqA|oP% z5H*`YZ14yQ36ZvqL=H~wv$h;GD9l{JO604-0bK=v(;5+e{`2R#bfii0MPR@bj~~BD zxQI4=BZZb%1)j4LeS}TG9URU6ruKN>sadxzuOssAqpC(}=RJ}Blr-*cfG!kA_{7Yw zUn_BHI=i~~sA*^po<993$8|_Iv{GW%F3sG$;kNI?3(dJM$w<`U4h{~?T3TA90VN%< z>v}iyrAq?CXLI$idK{uge$QXLUVqw}|=*{=XABtbUex*Zw5Ifr<>DXKSL=Wv{&6ladJS?m`51zo|Tn$8&=fO;Q(2&ReGH= zQ%HPq5O*ySg2U{DmDgy=3Q>S?z&REDH9M+NfLC>V+7XNIGe)Y@d1=Xl^f9JW%H+^P zJ%+r8dd=j&f~h|5Q%cs=;NbGlYI2PB_V%Z9omURjrylU|$ema_AGXVF_%s0arOtR- zu;?SimI7@8=4RHmu5D4~{wvB6Wyvu`FHd{}4ix?JOudY6xzc@R98$MF5 zVFV&SR?@}P=XOv~&>K1iLhQ#%{^cc~uE`Ha{98{?SzB9MhxQI;7M7Yjhv+TW$ykG>JDKKi17{bp(;(u`~4U-QwnEy{X*? zLyajrqq<(+_xGov!RMV2(ujDkeV%0(h=d)h$)Ei9zub6BQ+<2O(LYIM|Kei#K|t`@ z!V@P>0FHYcMaL8?3q+)VsuQ-$iaJ=yA9c4YZ=}$3L!JT_EHmIRO*I0H1@b^nj^qYe zFOCRlH>LCEB~iH;rqona0<@At?}0!<{xCvfQ;)>1b&~?yResG8c@Rx`jPKH-q}R-7 zcasoVvC_l<=2A_2Zq$2V0MowOIQ4K~JpAb9&6@z_xSp$MJ9r>e$jZu6C?X;vr~<~N z0V~yuf`aHs7l~@0pryZM%N7S`XGMV94fw1U0er&F<)({1;kL91DJcw~A#ms)q{s*g zGJthRuIB*1(A!p!x7LRe_44IfB)$47)E2p=d8fGUyZ7!z$HppVk&SCL&!9MubsYrW z1k3-vskOEB#PQ>}G(<2Eo+#4DDNiU0=?SG69cyYTIs89ET8 z}ToI7>u6o<5{ZJhUC$Z+(Prl7xj8`9c0Cx(;jkfYZqbhot?ybB_(upbk*y}i7#gV#W8;Hoq zr(sMXFXjJZU&P!gx*29GQ4pePESWhtg0M1LO%nY4tN25Y*jv>lg{HJ`i}bokS_d3| z*(uAmSBv;*o0c(nL0_5jCU zi+p`UUmSIVmUST}W;3Fq6Giu%0a0mpaEwnkn~BSto106XALT*iGaDiedbYh`W`**R zBMQ(W;znsGRWj2XR&(zQg1A9LA$S=F3q`c2W}O;2En$I)Z8Jz()qTzmZf^H8LCnTP zMRgQ5+hpp-#>E+fh0B;SF1v(wg`RW=JORb%2F99f(#Wlc-Z%*b5a6ND*Cz zz$E@~xeW;7_!73?{nuCCtk?OOm+*r_?RkD^c!1W#owgC0@}lJq(BxBR)`ywDq= zH*Zo79Xez+hN2hc&CACZ-Bg0xd-m)ZFnUAy&<;VtFX8KQ?=w@sefu`(`t<|^hGThWnRWdT- zLBC->)YsPsOsEmsda}q<7fWZx|M5#6TkwB*#lW` zDdd)pA3rV?de51(<-6}Ll*7JiN_&07bs-?Vy&infH zYaU_Y)t8wzDWOGX;pV>a=+OrM1yn3#W>S^d0hmG3KmTHZ{wzb*cYbWehmRl4y|L2D zjhYH{#z_)Pt5++b_@+bKP5jv2z6k*fX)3me;38zy@CGHiI# zZY6c~HMwph239kPz2dfCpYxZgSy}CXxN1g^FRjzxN-xhYVi2Gj?~lk%K}p}ZeEIU} zR)gd^J|Q7SNV3TgG-?@Euv#$s9N^!4!wW> zGU$;vSx1p09UMfdsj2xlO5;|rTnW$#f`@6y!pZ*EYjWs~IVc>R5M3*XXcaTb3NSTf zR@+07Nt_|oH@YcaYaa>3pf{Qvq%12t{*Qf8;r6>Hc#2w1P7bX4GFn=bk+y=+sbCmR zj>OgK>1JAQ8F2glZXIMfD);e6BA?6P1Vvp=j4RO`XYkisQ0^f@2WvMJlg9-8@|V`^zEH(=_ci?(9Cu? z4zb}QWjaYnexOAQ2GWB8qINZ?^2;9MsH89L+quEVgf`7DEa0sdVDXqcTjVmJ1>Km^ z*w~0Jm5Ks6mSL0EDAnTpxEdO7+2j8afpc3jkmQ|z{nwn`1Vi=R9U7xU+3kc(B3=nd z#(pS|G1mnDPVfo`ZlVycl7fPQsB!5sfbY#bJQQ@hmsR7%bCpOGhF@PTZ(sOpE2-q@ zLZd-&K9F?e+b1If4c5K`&Z0W*p1)JZK^bLC0WzARBBIoT)-4AJ7^^w7$rv@`&oT}Z zL?h{bTpDN$X}&-$M4{nBHG6U9V=21&nkNTVytHd(L-|b@ez+U5zNb(?wT<{6Ted6* ze}!XxGxPra`!OI&bqc-q0%D8VeicOz^H%jms{k)A74lkb*U*suU($m|2gP}yUdU^* zPWyac+_T z8%PqwP+(aT9ry*hIDIS1{jE#>cEu6ZihldnY2JP=zaFx7y_pTKAZn9ox zZtw-nTr%(8zc)VPx~(0giQ(wP#25Ob>)^sNGB-c@-E$3Ts*EQXjsxsC^ncj_0JxU0 zt}uLTr9kfU`=dMVCmkP&(YX$yVvkri;~Z904czo5ALZ7;>l+&mB`AJSZ= zF1Q=-!-z|k8dD2rN`Fnv<%yO@tBC4Tv^Xg;(Oc7fiw$5}QIqxhT2>m3V(!j>~3|u~u3U1^%)Ffl1gHC@Iu1LtTd^2@&!qjqjWk`L6B> zdG;ZqgFzw>$t7K5bY<6tG+kMrdD8w*6dDcyw|VqNlZ0+vOCz%~7sq417miAZc;dYoEg2K2F;e=7pJRKMOnbH z1mVaI*2U<|8Crl67!&i%b*1P9|c76Rmfu$Kj;n+ zZGTxNZzc(Y3Lm$8*)npk#2HAWBoLeM$B>Q{p2cTZxJd8Gy(5OE8YhJSXG$>y$FZ(! zBGFL2Q3|;sd(6C{|9u-@G^4xrJiF}BSGxsd;8C}Wb*}$`hfvX9jkP=JFXmemoJ-pa zhU5A3=fxo5d2QSr9TUYnF9SO3zbY2-W$$eMuy6jS8;kRIriCS6Ma`p}E~`F3R08Ya zRnNpyC)K|2r2NrP1OtTVBeLb6G^;(=Ru zHL_UUl_|3NZ7CCF?b`O&HijEY-Fz*NX>zhWyjsJ)Uj$Midi&9zKQCg9h(%+7X`|G{ zo2#4oJalr;UD(?kQda#|m`Z-R(Wj~I-kMsPA7}Q3s>QCwalsOCZTAmKI5SvP7%MUE zBta#w=fyhrLWDQ=a6zRu2g}2|HB^@9`FEdK!Ez#ad%mvwjQdR=MNJ6~Z$@U86L#wD z2IIJQbRqKfYRw&*Qf9gwuui7~19K5{w2bVyQbi6ZRjHyI-l##q#iG z4OMTrec`QwLYDzHBI7}ZF@-7{k|N3%eW;{GCGeED9=YTF%iky*FJF51vA_8?yFBZT z51ZZB?4qO?7ICA75)aVa_DkYBWLJhI40nPL=$8deB$q*%fcOyuz*ajQPZWy1>wbKcE;FNKQ5cv95m)$<^}M{&YbK{6HlTZ9s=WQ1OW^$2noCTNXou6sE~| zcR%8HVuoXZ=k>{CtxjlO^)^dzkETNkKBT@DZ#{8rrxXQ#I!^}|mpYp?$SOaoBA1k} z;*j$t9mW>H@K{<^rIyx;78+DcNbGj}Bs$E@nUaJz-{6)>YVd(|_e*kR^HCi6-nYml zTzhOewyHiN7Q+MapZCXqCY&#pI3_6W(E$VxS12QZ+EBqX^GVv{)?+j!&GIq2W2h9p&N^P%=9}y41ACsoH zN_I|A7Q7oYmkzi91%{y#9Th1J-!WubO z()g~cfKrG3D52kADwjW9}$aC)owx(#sUyKFl==tF#8e{)JZ8V1qEujOykO% zoAa9H$cf1esiE2q$`0~Hjm=11GP}=g-?kX~oH?7_e$wqiuJ+-)u%W_x3p^8$vHX7^iDXSc1NP$j{DHx?`qnny#tuveE1L$A8(OqgIpe+`>~fZia2wj|IEyd6pg_|L_;AOGd#@% z&?yD4`{x>-iHb)sco3)jE}MD=Byu8m>=YLtgH?nxFPZG&A{f6-7F?n)A4{5 zY@Bv*$%Nq92aos+3UoDGsitnBSf~Dk6+Wwv2NEAUjGUmZN&$JIKKF5-wA)(NLkx*^ zvK8|SduF+!R>4GCik<9$0Ulh+TEc$9n$PVlNn*aTB0<9l0PDb!BdcJiAvKMGbLTQJ z>!o1*f^#Wh__jB9lVoVVfDRx+3JeE?v4`)?@cRoF{50sXX5C||+0M(GL?Htc8JuZy z9S0Uf@u}7jXB57#tG_N8IES>3vI`R$aGgHcO16nFiKT9}JTEV=PP)nRm%2g*-jgTM zk5d6Z$wTr6+vEuANm$CC1>8NMxdRH~6>x^ckd7tVAHps39H;-#smQ~ld(Pu_ntIY( zXqhNP42PIbJmp@$zTamOCt0z^biA5LA?2FnpGa$87AN6#qML%K$W5%h{QvAr?;CLtXPws{N) zA>3a*b@RuXSaaT?jec<%kb~rI=o4O2_q7?--&0zoy}Z24hF~mlaB?a`v#{w@0f{I#od^BF zA_n<%5vnU-MLPvPMtaq%Rl|7&VESNEsZTQ$v019}UG&GH0C?5PyNAvZ0?m;$P!&l9 zLcarHYZ0Apocdb>dftg91}Hlh;O`xz?ty@3@;ZoxoW0Zf`pNnS{yJqg6VN_nn!)*Z zMUJ8{M z=s<^H+=?SK&BQ=M+P~2>ac;qCNbIA?TJM{PYJuzI6BA3XJ#K1lzFa^cc|{8=2^~Ag zG87&dZeS!K!G(#0dm@H}c!Q8elOryE=RdPHW@@-SLeMsEV`$tHlj~~e^ zD3p3taGZ_&udX9`9lXYnHADL z-ULpQ+`AW9M!`O!pg@MyWC(?Xqe1AELqFl)XoEN;_In~foBo;FSRhKg`a4Xku6@D7 zhnx^rS5aWM-blnMxR;!Vn(8{v|H-;~Jm2^`wN-*X{r8Z*pBWDhgg~&f;kRyGNJ$aA zzEL_$cVtU|yp>fSO~VnSHJ58|&-}g6b+YFFKM5iaZ{GBSZD*HN?J@{hgpL9THFFC? zRNnfhbdYf@^?8sljwp{hdU`Pjud%_lC?X2`Em!*{l3O;a%x;Lso(b3Z% zFfeHQm#h%9b1)=;bm&IAQ4Tj_@=5K(hgYH>0LWxmvEp+75Ccxl!@Hm!j_sk*)Y7^H zf|O5CP^F%mjqMV$!AYCzsHnKDf?DXI5@%bA;qvV0>Qc9gBn9^F-T!S3z)08MD$l6@ z+U7;Xs)?9}pTaZF`=1d@EdFA9O*enfX1Ocu$lPBGCPTA?wVO)+$ASq224(`sF{ee0 zpPG9gV^kJ|MypG2K}x}TvAakE* zg7*(2M(}hVadWZC)Ge@}Mv^2Z3rx`LlKcVB&T;G?7Wi$G#mBHfOa?c+@gZq)$mIWT zIYm%Kzxa+a9TjE!jvZo-TL>Q1U+0@Guy{i|5auetB5Dfe@Q$4(G8E?kdJI@;QZho0 zXl~oOLk0Z5IM! zbf*hE*1<0=DxN&czv&Z&6MW(&kQ-sS*SFok?{xX*aIg5wFLXHic%4rf$!Y(fw%lOZ zFx|66h>n;=Wkz`xLSwWW2kidwynAM~>s)-v7J(Q2$%9^3Ls` z|AFYm#KhE_(O46sLlo#Fj6Gd~2kM@%zI86&0OdgCFHAHCK`Xj{pB)~#YWNLdG*}te zKmk+_2Rwo)%Ln6%mAa;OZjef_KP3?Fp$(T|iK&PfN2DDX+rBh_V}S!4MwiIMcd2#! z()AOToKBCfUAqSU9afWxnN=y~9Dd3fm`2zjPc>X z)Zcch^DLU9QfN3=(fF5uzm!{;U?nDc%z$$}665opmi`2XHfdTaZfvB-v^;dK7>qj7 zF)>Ntq8eH&lVL)_E$+vV#&eHJ>wV)N#3>YLbInS0Pa2e_wd>Mv1PAXnHg`Gd5;Bgu zCkjmtvW+Qv@Tc1|>y0RUlw#8ra;Gg01I{T;zbo5_n|X3HrHTr>IS2TSfPNN zUyErmjeZRsodgL1#frQ+9gUA-&X4R{a|1DjO{|aLce)0vyhqn^P*I2xejcV`kYRW4 z0W30XlHGuShJ2X=Ui`y{Z%bzIU7Rp5I2)a06FIlh0t8)R8hlcaPl(*MLDGQ&`;xfv z_e?#Q1b}*};P-`xJsW-6yU z?}UiUFQYuq&ORhB@0aos4*?)UL`|5C9z(m%3OhB4eI+GYazH>aghhYTO_Zwq1vdLq zc=&ou;ssy?W+S$U(2;}D`bkZC_631xvx&_f)48u9yqr*1zli@kT%{i-=PL^|dwg`0$1Z{n`xYE48xw`kQ%ZS}5@{;1ggDa6hiOU`3xBSp1?>``A9@944=rH1_ zs-m(MeC1l4J#vL6#Q6U5 zHvIDsANk$MhpvLRX15EcVkRw+Jyp)Qc~n~AEI$^184RH3&!7K2jR;(-qTj^YeVC3D z6-5chO+}aHgnsb|^=1SDW}qD*Nv9YTa6uxgfL=$Q5`YDN`y~$g;kicuS*Kr}TMm1v zIGNvue$9xaL1uyoi;N&qUVuqU%xC^Q1}8RwOE{05<_PtnQD>xJnj2XHMLY!F6%%+_ zGB~e2)6e7Ex>YF)MRyr_Hbetr=sao+8U2R@$tNkvfyvlg_3@aq1kr(0Bm>^c*uvs6 zCMotO*S)Ja1}z*(AA<989PWI$0hETWrITq@UteEXFo?qza>lLt2pA+#5%scc_k$3{ z^D~I&x&PzyRGoBcUR&h`s@2-{7M zQ&$G+wxh3a2R&8YZZ!RP=*m(FFp9%O?+FI(eH>)47Wp$)mOyrB=0QLXT|_vAnTK$D zRtp%t6hp-!jz1hWY245y?1Pr4;q^>-K!|-Bjk6@?p&=QC5C$D_o8bd<;CVcP1VYM{ z_{Scr$N=iu_CiL=4B_5-do4+RMxG(|4baBh?4-eTt32Gf9MaE4t>i$hwKag*fn zAQ<5#S4$qaQg)fChk^Tqxyqb=isD4<*Oyjzpm(u>qk?bgFa*nEWcW_E2nPx#P97Lf zcQikPu1?0X*cIr=+!h>4<^up{2eGtq4XANs5+_3zIgAw}0-!y z2l90RGfVb^2AFYtxd@h$+7Anb$60)m@R`OG$es#78iD{OJdo8gy4~IEU>~_}V3_B6 z3`{NN>$Un8nRjqn_yy4wN7(oQpQhV2@0l2EWW|Zy5BL$MAqX$}Yi!!_Gy@r{3Oo>D zHOe?qFsLY)R+GoWBWznd4cn{mtP9WroX~i zhunR?Vk_TX0$10-?hou01-j|MUZ<_->n;P|^1>(#00axiUHxksPrA55`!OEk&#X}I zij2@#g#1PfBWNv&pRK;?(W+t}IZn=^)38xC_b4LI3f1%Qw`mY2;wGe) zXZ5llAIq$-si`5;bik%iP)`2N5>UrarFti-xVr8EMP_b?X1W~eD;eE`)t_hIz76DI z84ev3g5@v>%Cph7T6AgcG^3oc2AC`hMkD6iogiftM$kZ-Q3Iiahjc#faXC}1%zwDJTGfBIVG562qZ~e{ Ls`yO7=;HqZQMqKs diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/best_loss.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/best_loss.png deleted file mode 100644 index b7ae2425ffaaff8826b86a13d55d39f00f3537dc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 26093 zcmb@u1z1=8wl(?}>5@i}1_41rkS>uwl1hg(f+!%}jdX~F0)ik)NyjTG4F(dTNF$AO zDxG&O+~4`m+2`Ev-upb)XK#IVvFaCdjycAdD^mTA0ueqfJ_>~*Qc^@~qEHx0DAc)+ zxY+P7bF_BI0Wnv3eOE09D_0M5=liJJ=B|#999$n+-(zyW@9bjjU@yok#w&1@>7lEu zql-8npWT1_0I!4d13u27Y)!~N|~ z=VQC-Z*1Nqqf{r6t#xqs4oyDv-i*nDc+S8^`?aJZ+dK3sOU-&b0KH6oKUC<1;} zv}f}M!k>A(_Y_&+SMOuyNcgKFP8b{VSNe+y$WOVN{XZO3!VOZ-zbq+v6uBc9aN-Mp z%W-RKYb(qN;~3r@C^oe9o6r`rx3~9PtsKnem3#YA#Nj3hW|3)=$&#k4H~gcSy}gNf zMsE}O`=n^6{~4X!PFZ^4?jA+*uu+88i)$H!)$5KH0Ny zT^=BckB@)&v&JcAu-5O;UEjb!*Lp1AWFO~}h&}pnXZ3rjsm!C1iu}~VtPU7TCD+AX z`DHh_C-bu}9ThTM92_{gxw(nsYCm#sf3L9q!PWoJ+IoM;JMUn@$KKY!xG-F~;^Ft{ z@&G;=xF2Vt^2*OzEpqLnqoZ$`@&s26s|Zz+uI6uwOSmqm|8lGISbyrXyLJxsHBGE7 zj*XePS|h#x-2*jGAD@|p?^(EngdzF)SH?YOh)~T*Jcc9`6qpJM3NK!~XzuJJ5Pa|z z?e1PRAnZz1U0YlGCgAjBsLq4W*U!)V-n}5*VuPu}^`^n4x3)FKuKJT10VlFe{>Lyu z#MIQ(sxO6!_V@PM5e z|68M<&(YpYtbt#jjSv-|389*r8a&2pjjUT6o0~1krvCYhF6-k`_1go6j!sUGHS^Ih zX5TAq_2@($UmGm>YJ1IBx#~Y0{^!1aqp#S{u`zRd`=^0{fdX<|2Wim-1LsjRG&C=y z{lt+&j~)>|cKtgfruH9v9Z@{}3Bg+Y`CCxLmjH z#q5#%L->?lLN|9kUBZLkvYkX)*$RG;k&|Du>ErBraqWDD z|3Us7W9*9;7q^xNFKFeeK2AxYDKTk?x~!5k*_W?%KWxQSEZ{^k_x8)M6u}3rD~n!#xo#a$PkovNQu($mK@H{U=;ms_tSq_~*J|78C`{mzJOjn6Kx zRo9DiDE;@YSLS*$Vq>KbaL4LA6wJ+;kteLExZYiLE0!U~W&Z2bGc{@1&Lp1rq@*zT zE3=SL<;apBtOY7mZrvgjsw-WB`uJeGE#{Jl+5P)Di+U+8n{3;&v$L-i zVi^<_6`NOv-%ZWT1gj+TPR+#1+|t&jAf^*l|0U*dP!n(}jWQdr_i}V}yrHX01yk|# zm6Xr)WJ~bE@H-9^lZXhl-@#T3tPBz|vLM(+_XbOh#RJyu$LsN66%$?(CKMJHR#a8} zu+(4pz1%{ zr1{GiJeb+y_>7MqBFV2k8sdMEn##h&go0Q8_(M&m*z5FQd%3;4J2W;{Eyc~x*O!Ef zDy+U)zz)>+`N~NUvBoIy5wNI-XSl{%QZu zywH00OA$5St-+uD{W{wD+SA2`)z|GHY-Gm7#O&>iI@pd7GfH2@Ai2OOU8KW-+Jvat zKVk(D0}X@Jz3kAqe+!doZTOu+;l!vz6AuPSz}{3ier&$$MAPw(VXa>n(Is}SMCEI| zymtzvL`C(l3kgyF=FA#>C)L#JN`b144GVNp`S_)* zQ1IYZ-ko<c$|oW!-&el{(0k zh|gnKRn&J%j~^6x?mYIx%Kg2)hs8d#?W7R1o~K-w_TAg~Y~S<`F}-*QJaF5`kMf1b zm4ha7ckVp3a`uIBcRV@rs{iv8ZZFdkkqEvQ9GrU;XRKwFpwyjz@gl+ypSC%#xCluhlu>e~_%6 zuB<#t4+uZlT`%1pfuwiiORaZBLxJ91uHH9tI7Ry980T?_(be{28-N66VEaM1?lThd z)0fxVF8lYvZ7O!wmzJU+w9!fX){N}P_J=b^E?rkp_$**| z_wL<=f#ORiCx;GKP3kXBO-+gV9bB)n8^wNY**@j;$j+{_;!0wNaU zlUH+YHa1)!U#f)w%xhGmKDT%E>Q$yF7RYd0<7zrrO&jUR*;Q}A@*}0Cje=R5hP1T+ z`);5oyM96f2Pa&>4pvh$?Ccn)nNQ{bqD0-6DDGrP$9#&pw0z`aW?_-FJu);@^K+-* z=%_FglHbBalMLMJb$;!Zhr*!xCL{}HTnt(v8}0nn8599%x!G&B_t$}sfJu}!^nuveP!q|Bs~@} zF&Y3*{j~uc92|gJ4CZ}ezdrb$BRuzeK~7H!LOB1mYgYvGPp5V}wP?o~($OY~X4`*y zWU?OgywY#*uGIChvdSG;?l>9Gf#?e3_Cmyg!Rl*TzI9Je&tO-807WJz#@@uf?RdQy zq-+vON}XH>NIkGVxuh8Lw{*|p;o;pc8PZKG2{@mSkl=a}dr2u)FQI+%`H6+Tx7(Y( zfRjIrS317uL)d7CcmsK$jL33N%!d<7N zMvtjhtl}Md3HJr#a32nH3a<%wJ5+;GzXp=sGMY5lRQiU77Ds!V$Us84Ds!5-l_nElvebM1(-WFtfRh*i zGFzuVM#J8EEakIf25S(-bxLV!>_LXDd~>MY>ra`>{GF|(egXueJbLs9xx~syB>`mq zeqVw|@Ms%5J7G;tGPEN0PYkQ=%|Av_I=Z<5u;Gb&_39~90OlVeNFVkWP{QVLJlL{c z8LQJPz3}|tVaM|;xI{$qyK7^cCx`28pFiW+RMI**ISn-h$RIEZV%M@!wZ-SB8(Ujv zfKOW+)7IAZ(L^Kt=UB$Ux~GuFEWT&TBk$(DweT2lz*VCflC`xpPA;zAEg#svn;)r7 zC;RnGKXPRF22t@Cn8D3bC+akll9Cq28ziJpk6ea^hg-V4i4>mFwY+y*=8~e&8Hi)S&H-aF4b;jVv=Ot1xO) zUs8x(k4sBS`+l9{+z&tg_D1f7z7eQKP$qpf`~u^x zo>x*%(;x7VolVYh2W@Z9TVG$l(RM-R;aEnFUa1L8&PR_4zwjqd2t`FjCx;(oe1yzv zZfS{`s2mEDGXoV~D`ZUo;t^KesZ(%fpBQr$^EE-iwsMQl8*qYocdAg0(+oC(uVL#z z_j!wcY3@Yu(?*Zeh=2BI#=RF;YSx6p>z~@md*&Jfmr*OFUV|^xh`r}*pJap z_#g5hh+fF+XI~!$63QU`UvrsL?Oq?7>wZak@nT+Ox2gX=7Q|Rs*MjPR3QLUZSgu}G zT^}h>fm+kV>sO+{DijAbUVqqqeSORBcT)Ktu2#2qbOgo35WVt0us+)Qp|&too;4s| zY*2aA(^FLXXhV*WLGmJgC5;+U*2DN7hYZULp zeK)@=Ff#MBIq`hZj6NT(&h-PSi|3)j^<*Zogg!dKLSv55^t#9yTNh;PpYmcHed{@xc zwMHiMjd=QZ-S+2AlWnu!2e;XwwH>xshRI0;RE3ys)q!uuJCs0+R1&m7Q%li(P(xK1}+pP(Mj2DRXmk z4HZhA()UMyVxyu8He}nWsHk8=7k!NS-_(f6GlUfBeN0|ms93B!18WO_2>*~$A{1mq zF`Q8lszaVV`(fd*(ix{M@3HnXzv{t02`Q<@YE*1&z{;xr#e{-9SC;#}zLHmYcrKTO zL3O|dz~($JFK^<(O-wmW%0%Mwy9NdVY@{$=FFI5c<9(-h3Umr5*Vc#u2g%6D&`G!# z{cJ34*rCOg<$MQ3!mUEnhYvZCqBch}66!_^2*+v_rtqLPo@_))2wni>%>Upk9X#c0 zhAlN&c~q}UF!Ayd&wP%>Q5X2&cDQ5rXsArqw!v+A;0=_Jknnilb$@xrkp*Z+ z{~t>{6W>kwqs`BZqK=cOr;IWti^pVeWhL5#FkI)EZb9Pl=iLovEB@oiA zKZF0Y^dy{?R)YUPyU{Q@8h*C?MLi|!eop(A^9$O}Hv$QAygo-fq%fX&-q+aruPJ1% zn@{u2SByNzr%`?Ki(e=~?!FP(E7d2n{MU4w`xJ@EX9AMLgZfVoo>y_3-Mbnb8J{`Z z5I%4%;ZG0?De3pDiwR3F?rDTX#?#e?eoU=8ViEcs-KQvL9<%a8kynzeh=TuGLQ9{% z2D_tmDBq02;CK4o=lFMX>`9c-XzgTvcm%!*msWz+wAZ@P?R=W7Z(rlWh1z*IZ`qq) zb3QqCu$TRkpr!xhXAsLQRbIT7en@?g!ogA9opILXISLBdNm`YR6ykTQT-!R zD%@x(+M=iqOx|MYSj@8en?mrBHS^7M`8qLRLDY_C(6|1JrVI(mOSvmzuE-L6`57L+5X^Fom4A$U zxuuEzzDr$HMK~AZQgXargoJgiqGw$6hb>78ihRv}?y84+PaBS)qohWTdJFo26ytSG}wBJ`sTTh2W5lq~Nw<17>WLB;@o_rp>3%3EHHLDmVLcUMt3Y*fP598FeZm znHW}3N3$P07FMhIhUUHPfqEcU1K0ignu2n#(CDyR89fdrN-PO4%5P%p84A{nq%Qi( zH053N=>6@Aofm>rTa%m;TXe)?bi3n2f3|A+XqozziV|rlagKuAxfqE00+1!e@8rj| zjeR9tbAKPUeHPjaCCg4 z_!^^XmOCp$iSFCjFqLLG|5hNOEh|z?GWEr>uMDi_YDPS@3#N(Qaas`-0m;$TZsDAl z1TAVLC7(ag5*I1RzI|U?aQ6=jO9B~;qSOPMq8kK01Qlh~-}e1={o@DF&a3>1(OW-n z{_NpxK3>5{Q&SwQC6aK-v0C{0aqf+|lr6cakbrpv3rp~aICNig3OD7$UuKcI#P2!y zt!620Ftm^4+PpV+RZdxpl}=g?!P6O{RBpCzB(B0=G(Bo6ey4WCDg>F1k$_}ogO zA@qIz&r9a~D9|k!)F|X7+pk=8*X*-r_woG*Ab{&4SAyuOBQmi>$C?;}d zINUI2tqo)}T@Sf&p>9N+j-|jtlp{Y%lq0fzBXM_Qs?$TjQRoV}``o7?vHVML9nEBA zbbp8oXgyplioC(mF;|Qpsgj}$Z~K^-Uo$aq!_XrW+*sqAFIz0;ttUFCasEToHX0qP zPV=SQJDcySHhx;|HY#Jamdi~E&2|ovj_7}`bZrEbR-8u_N%)_y<7HDeA|>q?hC`Xo zn3Yb=dko#@P=ZyjK5zTnCDTKrD-n>Ehw&rWAhT0TxSf~tQ%FV+%iGtAO9>HqNiadA zIyab;&3|nWKfL$mLuK9F7Y#i4N1FT^ZOw6L^w!1OSGD)NL-=L|&NV5FxAX*jRx&0d z?Z-gwr@Sz0AAGl^7=z6x7Ze_94w+w>Yd%pwH8kp3)RNMch9l|Fk*>QDMElWGvb z)UxzyapS0j5ZVx+_I{fYwq`xMTmo?kLynA8gR;Kw&6Qj3_ShPjxsM{zt-*BpDoC7x zSJ0Mcyzr1HBUe|k^U8-0G4pYz+E-yTwPV;WC>81!2i4U{Xme459OMYJJpSSQ{JaGe zAW*@E0-y!pzInVmVLQGKG%tb+B~Oq3q)Pvp@8SE?7#u7YL(fXuucjPIS9t3nGd`d6 zD@V4)=2vdbWHh>8k@?rx>PN#tSG01?`U`XxMk={a{$!joiHcrK<}v3r2fP=S=( zx=3{oJd4oHn>RbQCP12c3fO39NFNkH6zb>CpA`TAl1J*j1XXX}?hIiIz1dQzfY#>m z_7-l*eqdnstL0+X2o)Bef{LfOzM?S|sTA{y5)|EI|SX012hz|~z zTi^hgDu)72R6-)_erFOQB{{pebdL2vVaJb9@+m3Sm7hO+K*)Ni*^v_Po`nT>k%){8 zBS=pM1f&}IUm6Tw8xly&QYA^^iU^jf!=hclQ=&?M5?bBJh|tH!r`R2SW4nBL(TD@M zS>Odee~hLs2O$xmrvQ{A0)X3&4A5%4CiQu>nmPpl;D*Y~&jTeIUTHhXa^(u%J7(Z74wnhNwdsn{10OMGOJa~adgets28cVi8_3v9P8Mcn*+PdqcZh|7)tci<;02!P;b;}ly?Y%^ zE+v}zAvrlY12@rIdBojC*Qx_-QOys0N~X8E50PM@bNQ~KfUs~_Mh0VRYiq?wTWc!` z)YdNPP)tulRR+~^Z-sH4JCQ&N;q8~g{Y!^Xm9oHm0g2@OhzIn`8s}g5boBIvj%;Z3 z$Sn`2kI}2?>e;+rBx?;_zQ|i}#ktnl0ovY!;(r6S)=Wntx992cE-5v2_~y*#^PDu( z-ay<49V{&k){kK0{zHiwd2FR&%$ z_denyARQEi&rq9{gQ`*U{yhc?syKMCw~G$IdqYKNe6B70LipRa9H6XYT3A>B)vWo>6lU+fO;PibhCjbx(oE@)_L1q3 z5DW*46dp^Hzs6-)?`8k|bJN>fe4c|;qt@M|CfFk&5``-A>u2n#8jzY#h_DHl&X`CD zsEdinlU`n*2^p+=e-4#5b8HOe@Ql%*5 zyd>A{vbG8)HB|FY8Q!VsRo6&vmdq$5bM$B8Nu_3a6tZ)2YR;rXXnoa&Gey>~LkOiw zr6N0dnXjTx&Cm;`HGksVV=BDLRF8Yi1<_~sOG(Tp)s5W>F25OeUN2#D_4_gsGsTgs zDm9M|;}e)ZJde_^;-lp#d8-H+P6~MxMHV@U_mh-Ox$4o)J}+6~p18Pd^lG;+;*_U=}D zO?o5ePy_T$kH<73k&S>tl1e&B-=}jkKG$(jz8j-nMor0=o#fGgztGwWsuye`dDVuQ z>Rw+Y_m_@N3u1xEwE2d-+7fQ4QCMP|#OugVvcWYg^PMWr2o{IyN^F!!@)OUF48=E% z-3T>epWJPi*xPrI#!Xh_LPpF@mBZsw7rs3fN`G%cCE|(Nvnr-c?6WB#rai2X2x_2L z*S~si`jbmdhA|VoI1w@Ol=N)47)iuCkL%KD?BD*n*O8=1@dsYK^6bQ?|ESZLZ*!XF zJ+Tz^US(($r(i`+#6LT6preH>qp^<)wE=-WM1e!ah*YDk1$n>EM8EB#;?rNsQarw5 z$P&N7qyQ_0784og^BbFi0~^*hp%rO2ReyRC@5_ILnbTfJk~a-1=jKy~TYLXC6aMQM zF7+iX-)j|8nCJEuXL*hooDAWbOG0PY?CpGq$1e2)1I2G^NRjzW01|uN3F4a(;D6>* zeWRMny`AT(7m2-<$s0fe{VK?CVc+5<-}qHV?D&r+v+(6hjA15|!H$tO$jrr``{$bA zClOXZb2jQS2`;?lrxS7S*)qLBSc+)B;r7rfruNwIJW6`hh4QV#cybL(=0PZOC@^Mi zI*NKk?2V%FZ>FC(>;Vt)$0CDIRCutL(c^+D~xyEMHag z|NG`Hpgvm<=VICC^aX}+P-Wsr;;pTjYF#!SL!q<+Vm=i2J+dZX z5pYBzCAb|Huf!$FSL8c3KCz zE+&%;@~_9L!24O2{U5k2quOOc8Uak43k)A1X_J5h0iymJAd?OJfM<(;@glDh6myOd zKGOI%0GSaK8Hd-LB)GY^?OG2rMZ+fhe8Xf1L?uKu*DW9g zBI5jqPoG*peY!O-JXmTP3vh{vho^XCr%>hAE$pJAB2W>CRa82ai{;STFB`>SuzcnpW*3CXG=k) zK;j{T^TR(T#Go7Es!q}@c{P^oH*X%0i>fq1D+yvU15eLFUp_fpJp5N@vIST+QVW%g z-@CSeHt?;)SOW1^HJu!A8&o}t1Sd^F$01fAXw=M7QpWE-e0F(~5wRrAt;Gt#5yv8w z5T}G~W;AOshc~j_Opz*KWo3maxwF%#LFvvNUYEqT&$F^v)6&z;?{7Cbx$*JwMPx>V zgkYvGw_6Iht_*3oT+Zh@#6sn0;;E>p0Cm_-;wfQhK1E=MLUr!Q&wSXXfXpF4m=MYa zd2w%lpBd=+LMhOL0e^`PE&@IVMJ>3f=<4lJ0A@_jzI>~&)=FQ7yR^3}ii(TtdNlzL zVE<5onsCP!zY@2;`U-~9qF1bGWZvUv|G%ILsAQz#suKrf0NIe}-Jdgz*0b=22+wvy<6%T1}k7eiP-kWL-1>;Icb+w55`Z$rEo*wGP zy?dDh4bffQ-K5|#QF_LXAP4|07X>VFLCh~&dTz!ZU{DbZ0P2P#5bPiw98_*21ZhRJtQdI)aI?_?=>$JU^hz&zszb*uVn)ANvOe{lkozECgq z@rXE-g&M2ZA`T&V#a|B^5uIFRP&61!R3;%?$#AMjmZ@P7CR(B5owbXP&P755dcD7kN4R4GvV7V@*OFm zBWTO+tO>suuBJqpO$G2V0}U|}6tRRP=}c^UBN2}+yO}^B^T@-nX zl8t?CX~$~8w$%zF)nT&8dl2*@ATURu2oeD;{0{9<49gl^n<|vM29b+R49OiBs0!z@ z4{!M@u~5EV{Mb|9zd-(G<3SGfUYs4JV3iOF=ETsdLbFWCK828^6oy=|!dLROkZ-C1 zGtXNRcn~>%jdJ8z(AhCgN@rieR_Ykl7)NPl`e+Z6c!)7dNE*3n-_3}eh-h`G9KR<2AyI{GY)Em0%GRX_&u26?;Ulxw^kR%YXj$=SS-c?JlM&`1|5oj6koQx?b+aIpUks=$!`g`>0p zN1>Q&Lr^oizqO+ecjD>q{|ZO9&}UOL@;^keg7-FfO4ioa{pB?9DmY>8t}SN&21|7^ zJroT<8>o6ToarS6bw1)}93G~b@5v|~n2v~O-#`#6pLbj5cd{#Yu{?K^T{vfh9lxL! z=@Ss1p!wrREEHl(gSrEZZa7ds?o9??C}_9=ut<{q(_cWF^Pd21{P-5J637gxaTOrh zBZ(W5AXr+k!BAqniUqjH-1(hg+bNMX4octS2-{uZGOW>fe>iJkU?3uvuQqE5GV=Gg zYK*8r&L~daITxRZh=qWEj~)pCm^yw)un~dj4zaJi>yeHIkpmM4=RAsql@*)>8h`J0 z$ka}rK4uqZto{T~CPV&?Y&`51ov3l<#c009=DPL9Xpq%dApw934W%45HSRS}-$0;W zX+sH~Q}DP>&d-N~YNIm*nhvAOT-W2mLhk3vms3R?o`N1^^5+DYk?+NZBH(do0|5*% z73OQbt*E+o=9GS2399viu>|avS2uav^jO9J9g@3c+?=2UR|G$tDX|J=;)qwCi5ZQF7ZJc2-|Dp4bdP4wY0UJ|MKMvC)dHmX<|rtxCK~P zals(0oYUHyqe2R@J9s2ohKKL|CgxP%RG;uH8HCYKEMO%I?NZyOw> zF4QX>sPOXgGM{XIjEjeNz3~O0fqaPmWVHRadEM%qN3=xJB06qN(GC}FKgUDukP9TGL6(-2aIK7wzYpW;8U{- z?#a-|NIcYJY12t7VqD5mNp1zt15Aj2PPLZ0dLVctSvWZr3#GuG!f_|8#dSmw;0TL&Syu-7BF6qWxJ(EUpE_^a{Xb6pqi~IbQ!(0Eqsg-Q$%K$Q`_cmw1 z$;pB^QV})`SU1FnR(qHt=E4rZMrfk-ymE<|-B8&%(0bcobs?@}(B&YrC)K6@iycbp z9c3aR)%b-M6xd_B`EPZXwgPNl$n`?z_fAiaQ#W80bJb^RF@XAR5}m1^Tm0OhIE)E* zsK$v^+-)hu|MMetSNS{Wv0G z%i@hd>ykzzaK?^XBT~L@oVOrNxb{l0Ngb}dyC`B$2VSyp+rbj+Id%YscwkFO9Dkag zPCx7Hl~Q;M7V!NN-b80t*N}Jb1OaYew=*#@A(1BqCzXL;GLK;x;%PYCT@MGVY-lBr zc5lER8WDNrl&|1F*d(;hkN=^TybE&#E>-Mv=g!r5tTWt{lLKEA0Sb|!!!9tIEUsR> zbg8YU=K@#+3|8Sc*gJi-cfbqmH2;;}^8WFq)VSSyIBM5#4N5Aa?>2%n z77XELU`x@jbLZhTevb(ltt(ZCpxR;L6F6p3C@2axe*eCi%xjF8bU^4v^lh;I;|!D- z6TG&Jumg~~_bb`-8(8VP8&G!j>zKA_-(P4gb)DDeS z<1H<_D0lLQnwKp*D_w>4=)j$tld}FYK|h_JbklaD8W{@6C3Dc@5e5xYHZVDv8oc9$ zLWH_Bh-n^ZU@9=4eE5I{LeTMd6jknp~_GNj+Ym^QUpEMYUjH#Du=$TrutS>*gjwaQInjhQU(f>#pt@FBPzIvQFA86T)jg4m5UvJc(8%2Og zxE1^&2<8TfU$?>ed@E8NHzcjce(I1rZT_J6RAziNDrtubaMQMWOc)!q3_#X?jTEA* zz@;PZGYBMGr9&tLKIoL*tBe@*7Y((zcEz*fD`7zWC?ypaYkp^@ckZqA8ar$2d?B?w zZNw~c^~C`&TsEL?m=Z6e6u2qxY5lWlzJLY5lP$+Xkrr$gFbmNTvY4c$8DL9*Ax;qX z)7wEF6v*og_bsfRINUmDRpd4E&?aj<{gc~wS>t@}ZuKa*g@AS3SSl>d%*(??nORzf z;E}UWLar>_!GL?3Z`xVK#J+Nf@si+Ww%nzxu(iPosP2&JOUo12*_T!l6BDcPKNd%8 zKw`mx92Ez%4a-Z%BgTzv9$vDaPJ`l_mkXPFTGM$SmCNpTmcl+!DI@Rr`qn&KqJWT) zJh*YdqJLgXOA9g31I4a0MhDdZgdbd*J}QRWT}~pbjO%t$H#Eu(mj20mobL=siKRfv zN*Sw>{uhp*h1f~d?T^39Y+=}m2oZ$Ue;3Xww{-j>*P-_xAdm>}?2z?;1%V^{Yo8T* z3%9CA5ohew;rH*~)r<(?_Pp08^M_@ z!4~&zLL3zKTlAUR9&>-$+gWhSSIP7~t6~Tfs^3vk)qlfAl|3@Oe#b~HzJ{g10l}E< zW<<*iK`s8b-%vdUa+>Lil-g249z#Bip0eA*w{t;E&#hrRfHz=ZU_e^EzCqr%xe^Dj zED0pQ_vmo|!t{F21UCPrCGscyp+!lFH3UQ-?5swE!h^(Zu=F9_3J|iKjt|_RVkZT# z2=<=~Cm=(l<|PA?kB%&-LvV1Y=Hq|Wn!p7tMJi7?)%Cq}XpDpvXb$nuU=;~*fJe&Q zR}H#eid+CSqZJIm1(ieZLp*-6vlJNa7%R7q%A{kVif9+3{q5+1(pzFdLLIRZ*prYP z0H)oFk<E@P=KmPQ}OwU@f|$idaE)s5Z+1VdLJe*XwnO9@#NAhLp_6H;?}{(jZxOpD3em{GsU=rQ4W=7y(tnk}8&MWD zsm~)Fh>wHwWnXC~%VR6APYI)tJPy7^TeO10c@((au37i8J-K)d0~Po3W#N96q2I?T zVV2DIwu7N))uUf@6awdvU}TrJ=PSackp1(oQ+w!)_#+w@^_GfD{WYNeIis&d!dAmLO*W zc%Fj3Di4p{)I2a%DLTaPJb!VzZlR*f5GCuXd3^jysGqQb!@dc6htxSKkmf0nY~05E zUc!3f^?Z{~e&TaNOl+j|=}GZCsW>wUK5?DKWxzE6?wvsCgC?N@_9bQr6~aMGlh2Yz zru$-8MKj`_u0NeX^k+b^gdyWjz!(lFDos600%nze$gqI_Ol^X2vd1B&G9A^fzCfRl zboFi!ji42iq$C}f2HV@)zZoEovsBm$-?Q#pfG6de#j+Pg;r>T^Z^;ISb4X_COmf_pH{`Ooj@jqiU~YZdx`-GJ3WA zus!p^M$xk3Z}Or+3cqtlAuBdn3iR}B-smO%an*mKJTJquZUEt=v3&9=<1{<-SsjsZ z(=e6*O;gTToc3X2HUTwn^XTX$(7(V3npai|py(I(Wf%&17>akY(w_DoHPYTYG0L3! z5tL%rC>c}rYb2)Dj6t_vx5&sywL>iQe7yEM+_|4rAlc^*y0D{%N4RoM5J+xF!<_jk zKwrqRIAB!;Z`(sKx;H|1OR@BuH*X$}y%&P=2ONnppi{|g0;ikj4>qEY;eIOi7Jr6c z6-G2=usX?W>*;W@jnz1v2T^k1NDO3Aki|IP0RUdA`~#`X42;(=1Rp>oWcdcw6Vz>b zd!Emrxo#4C#`@Lv^qE;%<<>vgi+zNTw+BrTuOZSs2BNnGbcG_k)d~2u5!RQqpApCg zWR#roqprD(VmZ#?EwU`8W&gjcPO@BHBG?9K-kp`gw7+I`P0g4lN1i)SWLcf)+VwuwFa=sx69=yP)3mwdD&w85z?* zsko*NHWW%gtv+pJL~CUi-Fh%6@jE3u`^_$WH8nH?sY&l+0(BVi%hJ+~ znx7RwgGx4HH(Q)^=htm_oE?!0c~!>)i7y;>DubkFAeaz-MS47fw*>$2+|q5pTq%W; zIS@RiR#phPxw&t@1lWq!K;OKHfog7PdDCjAd%p?wTARxpFjplBY5dfmhxeZD9#&wY zt{$yl1`6sKq(E*M!P5HQ_&e(prynGs@#g=aKOaoA;guTKVW+5uK`D%Z!oa}jc;teV zTF9*+U^g`kchj!|v&UMB>r!PpTRnz*gL00|3z{h*afy2UW*e!pBLH!@cz42Qy+PT* z_Wywn)enS;^WA8{@tlbfxlUfqq4w$t7&|S#BwY4{mIEl> z1E3y14-@Tq=G`QU1_GkDW%b)>p@pd9FJ1s`!PKJ?D#lbws_TZRn!38=rfag_4owh8 z3PK)$w1Vs4;NS}vL;4g@lh7R223C7gXx~{3*k00s7CAgHwQ@iY&o$tdpgT&RJOPDN zw0jF#u?Um~xIY8Uo-MCsgSi?eIwldox|ETKV^Jo#LQ9)M%e->T@bJ-_=kM7HtRSEM z8Mf0o(>6~js0ufy2Lq-d$sAO=H5bJD1ODq*1^ZnC0z66k^=>)SNC$kx!A}Vs(@+|s zPXutp_C~o|DHYmgaucc)Cv(j``f24`0HDbo+gH$wEUdl120ll z*)>669uscl+|)#)G4TlqgaUXkI5f)x(cbdmfn?dPCKIA6X<&d*lCD2b}MA0_Ij(~*h>)j4vtxRUGM>V*cc`< zIQl^I3DWx(5gjcjEBkoB2LN*6-^UZqQ6pGsN(&kC8P>1)BXHuNC;_m|AbU448%8|~ z_BY}|Kz3|v>rDhr154lQZG;Qk^NkkGm(6`MMqCUMZ{Bm^SH}^ zj{5I!gV1gVMB(m40Hf_dQ9e>TL}Uv~wUgUdQkt zP{)JU1sA2Lq5=(tceZt5V{7BQxY$`fd!G^2oU64d%J7T@^28FZxanSmYs zg`}4N(u;!hsz8-pa(1(&ps#Af)BPwtRCD+n>bOKxHaPRgR-;-kG&{v7C*we`7XUA$ zksfRi;ZUuBdzrx2b~&}6^3D`!6A`qpR*qOCtEV}^Ac|b*s_%EueihY(b^=E;Gg%(} ztRY1@%joE6cWzfAaF{k%=2WSrefZG z_jCo!-V;uT=^o=PVDksg2nGBi;znv%pPA9J`-rS@`6Mn@#4!S8WDMh-pUr=yi8ov- zk$7*F`0Cl#r%MiI(B}kSP|z5T!Nq_ojR}{N*ZyKMy%+~fQ0}QagJBQ*Wd0|sp<@$~ zyeG4;g<>^W$*X@ki-D&E;l9je^z;Th^lX}uH|CUjOVZ=-gb8j%&$`cAFDO?oyD4p5 zgng;!T~mBrMd&#Rvb05Yb8qd<=2UsQw9ynv(MFp&C{%b@vq9lhu2lwXfepzsb z$Gx(%i-q_i%Y(sqHoB=>so!zxsz(!!i$O#!Eo_>leuuK&TQXaS(Z#4bi) z%6NeDBS;dKZ^2^|X6u$9ERdrL@)1Brm{6H=jjCZG2Jcdjxz3)OJue&C` zs^NL|B@SpZ_S>m_uD7wFha)m#K?6~DO7hU2Vg}Q(k;djl-wxeFwA*ZEpN^)*!kcK9 zWARlzuFNMbVY4!G+jGnBe8Zu=7?(Zo;t0b>ODExSd9DcxrH$>Wi(u%Dt6}jbOC(mZ zsZ5mvG6fyGc~gQza&&|t^dk#MS{PAL{V>2fA6hh_y0Za8gfz_pkxC+lM)i;KIsGE2 z$tjWAja26y*g`@2u^h6K*C#wt`uZHw4?ZF*WYTW@Qz{5Xw_CF1w2Y4$0`CdU@6giz zJ;d}^G=ZWhqUQS1#yn!jk~+I!KZyc2d6!q??3-#>TAyN`KLQ=b50Q#U`(MAIH{PfS zrw6P2p5roBp4f^NKQh-5HtLIIFZ0Xk-rA3UR^xC0L!}Pbf?=f+xd50en$$%FUiK+v zIq`oAi~0WFiv`7}8-E`j!xqE(h=@cPmgcCx*LtkVZuX^98e#vYy}A6wC^?XYg9t(B z$`9RN2^l5z1Xig2IphL!2(bM+=-+XG-?<>=$mM9Z0Q&|d7k9M-v^ybQ0iyqXrOJv|DyZiRul4y~;- z^U7VJE>{=Y6D&vW|8HCaGjIo_+vNIn3dlym;2ZPaU5f!4Y-uF`s!}8P<_A3f z51aL$wSvNrh615-Fkc<1R8&^JX=cU*z0IA)A^;Ylq)r94M81cWorUFq5sKCxQ)?WH zT0)#s_Ln@p$64C!I| z{<~vQe{mJofAjb6(9sNtEeam@39Zml`DlAj&u3`XceM|PQkR|P%l$oHM+r_#j2#o@ zRtQ&+^8>DA|J;Y~d0g|}dboFZ$O^6CkauOF$@x<(;{_BnXHwZuG{z8J5(Xi(NdKIO zhzKMRY+zaUAY&ta-rv4`6Bspt#ez*thihm!DzJV#Lz@0PHsKAh{KloFJppR=KbaFj zr+!oU?DF{Hug3t+5LXVaX9rc)9B_Fy8{p0PAK721^s!KE?CdsUg88psqkgRDf(>Q{FY~rrmNokcrZ;3!RbEwa<;`}fs95G)n>+bf6o%4 z3~Y@t!g~l|Mrv(4+K2Dp7J!f9e{d+D{fk1;NaL@xsvWKh;y2KS@d=5sp%Q#5#&?3x zG*|6#NQ?U7LpTEGhTPYLiH#e$-IR!^&H^F*%xD*_MTR>Ls%zYxl*TKROs|jsZ@`t+ z%GeHhS$riy;P=KC#A*G5>0bvx61__QlQ;LCR%1lc@!&~AeqX|vke4vCJzgWx#*P73zv?X$?~_9DL=Lj`*uh4PoC2`3-aVk^m; zI{Q>p!3+k-&zUZQsLO5kVpwnn(b*Z;X1b2tqIdHVO7IM1q+y?Z@Tnc8pZA31xAT{> z(F1akze0V%=~?I|M8_t@Y(|-CMX>N&EMv`5QmGhgHk^%(cqxZ~!exboi@zaPOm&w-0U)ig`QP0ngmpZ-`#!2-F>-(kjq zwh(@Y8(>dP#<@r0BBC`NMaX!zHl#K+aUMwiD8Pf}EqFRCibDB(6@iCmciILRwl{m( zL7$Tn{F80(XXxC0$o?LQ$V$YMURuXO?KK?_lKAie14Vc?a2isAA11I6cSe(942>b* zjc)U5uD`3}0jBJY_SaR2Ww)ysXK%CgrDn2a+}T7dy~!QOeKA}LNMdpP;ZNo6q#BmC zGtYyzTFgoxq4BGKWTCfBn($ZI%f`QFTuFi4mOR=5a>ldr-ObK?QFw07zfY$xXsFe- zu#AVo9rnY*yWq@L((d>-@w-@H{G433iCGD{hj{NbX3Gr!1qoaPQUY^={a$OgJ6>hS zYI+q3HjiMWX#?GdlI$O%`w?V-fudTk56a$>jN~pj{GZH&&WB;);c_G)0-exvD&z+r zb}D_RX7Xx>FIXD9M=S*wK1m-_fC7Be%AeHAVZLm12D%Es(Qww(0BsV0W^TZzJ`jbx zrY7pa1bhn%z9XUW4!*c`bJ-8Ef$f4;10v`vPbkjOVXENSgj(rh=Y3IEA(~NEdsy=6 z?AD3~@ChnJXaPN{1#Il_S&6{tXaX<{8c%uOPQCs(I5^lUzVnyCIziCuZKUy{as}w> z_HUJC)y=cgm#L|#j&=nS1VvYL8K7BJR`%T2S5nchUo#c&z;9`yPMNK4@O>{7Jl@vY z*d=Iw=l$akpC$rD06rO}g>>&Di0vQf#LYaIEM8M<8675DtPR~;*o~%h=MJ}Iz5-Db z5^Py@^`K34b+v+~X2MrDAU?{ix@n<>^ByE`Xlh0yUlRfI*6j@V*q1q&@xT<00}&Z% zm`D22+1Yau1K$5ux6xtkio9*}!K2U}WOn+iZpe!nOxa6pp?z4m@TLY79|i1kfLESc zT+H{Pht5>cypc)^zV3vBf-j+Afuj3;3$f0D`nCxlIxzp$k*G7mhk~|YGeGc(GQfIf zig$s>gxtr=?BnB$U87ETkGt*6ZA+fNFA|rB&S3Z~M_lKx`z7OS9308D9a{0AiasnM zTVw$P9(>y1c1FQ}Zq%*+j0t5|m4~%?FzXzZDsp$F&A+pqCg*=YWoo?Eo#4q+Bx zl>XXbCMS!4faZ_41&7eL3vPqHv&td`OsV%vD}sS{0+M{Cq20VXTuJ<=u-lUM69y^T zlamu%T-;{(00YVYY7nUPcKO?odk%Gm1K~q!V6Wf=4A&!b`sCX;b(jyEs#*|lkpTkW zc>@SWq+K2ieZGIXMH0t>xYUpX#V2%A>Q$>qc7#*rDyHRKCAFyd|f{gf!s;qu(pGr?-A3RxXg<1lbTj8s?$gQFAq#6qex*n&Kl zi>sj~!URgp733TE*oZuQ(}$}8SzlqZ4dXmz>%GQD&(cC6Am*ES`mTQk2=!JD$zw=`?Frvop_(lXG|#oX>&{CJJdUb1l07PV8+}jHgC*P%n#GAj*I8nN`=gRX{z zWIq(D>4-j@{4`t9DE|6cxXXJII9GMCKZgc2*=9!_myXZ|mH$_3=N=DrzW?!2N{LYI zidB}S6_w4UM5J@_FB0ujeUM@ZxJilGp4!Q5o%2B+#M0m+vH+v!_RBEEy{D6 zeUBR#t~(m!c6+0LEgqXA8r5QzZ0#)A2{Uz+*AWo7A+73t$DSeE6GRrpif5^6#oe_V zC^VbMnFrZ=efT~CEghyc+!8;d7aJC+L)?8RrYk@7>eYR*5}|tW%pk3= z=hS8u>f}?oYl+Knml>z*}>gs`Mk!iAllfEp62$=OK10y+SB<^_^*?!Ih2sf zAyv=pqdN}r5(P(tN!4WT(G=cLV(j2lU-Vl$E%NI*Y)9jZJO2|A092jmfy zelHgF=2Pov}F zOrTIc>c(S+zo{4=_9D$c1jsrF#=?o71uCwR6UXDv+yN2e_;;3;mfAjK(qPSGoKx-^ zw0WPRAI4E8BGXc46h7)7EQgTMRAZLX-P?N>*@bPXL*lBP-QC?+_rLPF`=$alNH1D& zTM6Pb&qnQ*efhVLw8Lu9(jKubv#H(WzsvTwVO@Q#at?8q17WZJq_^&8dy8lsOk+nO7cgg5boeD^XLbtXXp&OufCqzx*Nw4j7OhaZc7GlTPYD)#U#$=JR~UH36g{W=G)hQtw~_=8V<=;kQ9J6rGi&Bs_n< zB5;eHgU~IZ{@YF}TE#D?Ad%k||As=l8+Nb;dgu#k%{vwRF!6V-_|~}imjkN-`wI*T zqB@8O3lj|rgiPBZ|9SMm5wJ(37Uyxwa;#N%277rGz2QQdWo&3jAqq)M!=zw5Y`r@K z-P)RcMTUjWKi~tOuNwP!fBMnSrT!ZiEt(Eq2;B?3L@s9j%Joq30Vnm?<{B`M8n(%* zm_ta{)YK%oIo@%XAt=3p7gh2G1_s}FvVlgzN44HfvqDV^+gjPt3-KH89cu!UrS%)w zVKXE=MN%kJQl~_ml84Ogtz`aG^v~$BK<6K)i`7m@Ok}u|L7t3 z7Sa+cJ4W(U;za@7kX=(FOw7GV5}Uxg-I+mvO(^WoiAc1yKLv#ACrDZ;hM22CIY*|3 zTArQDiOmBL5^a*HGuz>qemQCz^3y~|RneIHob+*TBX#a7t7M3XjkcGn2d+rEcu@!m z37n?BZ)4G`S-5s;$R%WkFOJ@L(a@JaimYW?ABIBad3mZ>3Acrj{AT__+XH))uJa*K zYULE}UNjrkd{AkJY{+@l@%A^1+8%D0l~RE~-$L)FlbHWJ`$*_885JPryXns^d&eG& zd6JM%SQE;@*(>~KV}dRlzA2;+-!GuiF8%0jX8fLW373$R{y^Wi{yI7=k#ws2I=2<~ z0QA6YGOk5v;a6TVF}45aFlCak{I3}1t+?|d%$FHckTOQpFbHADg%RqR92#eP9_#}R zO-&&RY&uJ9$+9uo04foKVH18@xLCt7s$uud)>2>oVuAo%u#M%ZH;W&l$HzxzXS=XG z%hz>}$W%^-#*BTLpiPFwY&Kxq?%}9JDPpTVuU-kK-0IzLdhe38@vOHtpLZH4{`92bFU63+Si3DzR38RkTeW1el*Z$%Z3 z=MK02(;LgYVy=`z9b=R=w~%B96R;B1jCw*Cb@$islUdHML9MNy6E zxO{mdM2*m7i=djhksc%pDwoF-=}F-a`gK@@fTlirH)xDHxa?LWRvaQP0_n-MIcp*M zV01vDI0eVd_(iH5S2MmB8n>Vn^E$7tLw_y!<3O(0itV~#v}y9u21i zM(5lHxs1#-HnfY$xvgZX?3bkzCiXLH4S2ceg|eYSw$Gi0#=#J9RHA6WH#!7GLEuou zxTLJC9hGZ{Y6uw{MDKDmo?6hj_05U=L}gXzE*Bg zhC;gO)%P6RhJ;2ZD0**GW22%}OA3$hcPZ6VBvCW~$qm^H9lo$;YA`TlwWzsT4!N}6 z(BK443_D5DCLqPI%gxnQ{_)3XN(~IhNgbRfu9Hg~FM*q>62)Y(SQ8*s_JwepxQ@9| zalGAJN3av00Y$bRwyFW%wMfjmU-^8N?LF1PpTHMt1Z~zGox$wSJN%{tm2=~WcN=WD ztk#d6zT6&YP7o)Pa`IAnz<9!K#S9qg(g~R;YI$vK9u%ZYxnEazz0)8fbg`OmOg6R$ zle{zV2oD<@O$-fbjd*2RRq)Ex&MP`nD8)7dp*s4kG9qy0*rrOWh?9$*4-}TiU_GPwBTfKR%>qQ*^YV?= zY&NeVI3V|tTCz$Mfm9NH8-%gR9oz&Qq?FO zl#_6TB1~*})4_=!OgqFGXxgJh*DjnpXO5)2d^}hOt@Tx%*EA~9xMlo!wuj#pP-$UByxk6r1h8}`uMbW#;Xt5YxNxy7#s_Vbg?j!2_t63- zm~_$4vi+M{_{%YR<2PYE4PhbF!=`3t3!tir1c%q^81^}AQtsM1zfV(XuvJ1SK0G>l zZd)b|Ls+Z?#Qv_bOceft_Lue!V12L0XJ)YTK(NIpI!hHCO1-wivwBA*vAzcL^k}B*1b^4E zu`MQ84qu=8(BU}Nv)N$4n{&*S?}aYI;lRLBsOHtuK@~tYR^i<&merQ|PA|Vi z-QV|AOWq#I$q!LXl{gtq6S5Gr>B5q87~?D>d@&w4|CsXm4N4b?oR0T~V&yRIVq*q$ zWd{yKL+gLL^!U?tZc$-k_MCZ!Q$3OLTBpUnkmXRkPp)#L?mH%6&#vz7Yc_ax?gPrR zWYuOmvnOMVWy=iyVv51%Nm|M`zGSZq2#N4tj1R@Ks~d%siJ6#~r~{(*2yUg+@plcM zmKu$`NZBO_KM(#jlKpv5`qzkMsVyZe1o(4#iwginDAt%kJrK&A18Vg$Oq}NSvQjcC z2HltG{jvCOZ=`%^6LpU&V~Ndnqlh@j)N{HZu>+;N$quqJp(L^Je1{5I_^coaI|ChM z=;y(W`D53Z%;CqH-22GT;Mx>)b{LH8RzA>urdZn{ZtB4d51dTfcVDn>-TTnI1N>Nm zKP`fHDOKN9F>gJ>YRe&#UgEyeApAd&8wJ37X#FYQ;|T^B>xHve+JyXzs46b9SguKQ zD>S}!pqGra9XO$&sw#?`mj-!7n*q@u5w%`*_#(m^F^QAA0Ds2Qe(xog+PNd9Zvoml zyWY8u?sNB`mZs+4>|h)~#886bTT=P?`L*!eKKTp{?-Cb{ zlwXsQlJm5JX-W>i1Ho zV+Q>IR~eLZb{+tHq6tc&kwcG$W8h@pjbiXCuju-TLQplA5+k>132qWLymTZF@5J*C zP|8+gtpE@aGr=uk1{e;;LjghHJ|MqOfN2aVa8|qiZpFNBaQ3ZF;8u|ati{d)>whVp zg+aS?BHaVHKl0!NNM8t57q}Q%${}Dp$&mN`t9rmle*lPQ1dL#3ELEuY5|C2xd-|o79$P=fw*uLv}?ES&_10KCX`PtaQs9@Wnv;PJ2%xmQU diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_activation.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_activation.png deleted file mode 100644 index 91dbf117d63f3a125f330ca17f2dc05f09f9e712..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14468 zcmdsecU)BWzU?T9MvV{)g47r_D##!pRRJpsNRcj8Md=J3q>ix{N~Dc+l-^Z(R|z1( zh;)GgM5H(A(%;&i+;h)6H}AZAKkxl@@(EkY%$~h}zi(MIPs(fbiMvjdn5^1yi z*;8sH(pqs6X-(Y^>+qZ4j>oC^N7C`Mj-$GbiKELU`>Q0SOOCcyHjY;2mw$G?YVTlf zLlGfMl7$ccZ06`_>mVg4X#MvWkZtTu1$QYkwc;cjY|mbBAdy%u5npQ(WfROvB=HIP zQ^z%~g^zYn^@C=XS10a%zG1)z=)EgWUA|eNz$eI4a)ObC!V1@L zGFhSVi{}n=T3GcZ9?ty3+nnQ0ys)EnDE-`}jUlpYv)A69n`4m;SE*#$xREE%gJF|9 z?A-2ZG(PNBny#2L#aLXhv#MaOIyMv9kN`fOlVHM{iE-noYdAH*c+RWVEHawk~MwP=FLqXo*P?Qk5tPmDQO-% zc3nA2W;BOmDCNj_k-AH#YeLTy&B*J3prGLL@;KGS==`%=cgD!1M9W|Sz4&%YAw6ld zg44!vso$l`Q@cIaMmv|%aV57sC?di$O))h%FmStkeO17|Y!f<1d3m{}J8u_twK$Tx z;wo>yAnYIe>gIaO_8jYt8#fjV>GB6eMyB81#HPh=+vz!HFu$5)W`|lN0R9XyNI^t4O55~pD>YvZc54?Vz^!<0=H4ez6#OUQxD4dHv z;+^mBZ`*hH((CVXn`|YCYvmB9jEo5feh=^RxY|XOoRiIg`tE&^=JM_P+_MO6l4J^%1 zg^F2+-r39<@53oR^rgFuyj-$4c0_u4x}jh&J`8Kv^x~@GW{$&WjXWl9GjmIQ{z0%- zcD1NS&0sz*`k|nrQiPPIjEoE)AD{isePpuoZ@>K}L=LMKgYBb}^7oJVv7^z&5x!^y0{e%f_clNbzrA1+BX0ll2S4vO~T?5L>An0`=D zkmSPB^w#bo*QxD0cEtax6uMtZN*B9ZI+7n0d-~xH^EbcmD(Jq$8Q#W5)=xY#mUogk zgP)(Dl+#cw;-%G&(IlcBF?<3gkEK zIQSl6txtZ%VH_A9o|b7Q)t zUY@u64+{&cjkf2ud@6s)ug{`!f6Ya2A(Pe@SLOU$OFRoDv3?C|A^Re)z1HY2l{2C) zXFs}f~3YyNZfRKu}QcKzo{=ctfF!4b5|UpsoyGF$z7| zmM!nT`~Lfoss7rq(&bSbIsXt5Q(yI{r%!)rcy4^Ar>Dnw>$YulY<*3#W=c)q0reV$ zb&a3wikZyPmy^4zym@6it=eJQng@sr2O*Nz=;}TeD`3$;7AfzA_fs!vXtL#QbaixWCO{ zMc~BAll0=bPRr)h3&zA3cKJ%EsHF)4wooy$y{PEu?y5ce_7$dvA*3&4nLV2SQpQ3j zZYm|C#A0cAnVx6Y!{wRm5J}{Tab|0cv7fYw%-4@hnQk){+qk){Jv=;WN>^qC&y)%o zR}ibajf2C&-`~FmnMACVw@}4F-K;lz_U;YA@`fVX9*cKgD{~yIFTZv7ZkWIS)=HV? z3?mzk!)6!9il&sO#tO$nbGy9Sr-z!VZ{4|5u#~nsNh>`Z85!9(m=Gn#>-F`C>l9s_ zZk^jG<*AV!B3JGr6g>YL}~PgHQyS!9qsxpn(?2%XqOX<+YIXNaIqT>+(#C&z?-1t`bF_^{P8jY1z=8n%+em zi3q;4*>ZL^>r&aBs`mEwiG1^hcn;w!_hjZjZwm?zR`BO>{k$dHsn{D(Znsgay{C|>C>kr z1w_+u`>Z)Zfq?@xAtL^JP>3sesroi0^YKL6@?0FvbsJ90B|2y8=&a({Dk8-Vzii4h zRY8v(C>$?oKt?10kQj-D(YJW)8Em&H)kMLtnEpz%^43vj0izOq|16hr!%;@QdQXLq z!?dJBpZCJ_U`(yd;yJ;hFE_+&I#VN#xLC{#HTlOPhr1tC$k=^4bz6lJBs<~!>5mNw zv&52ml4WygoTVPiDkqa2yS<`hJQoJuJld`M?BZ|2A|lxi{mN&~E)RO8*hiMk_i@Ur zAzVKX4i09;#>M3`L>lt!jA;p#Z{NP%&dHh5l%hQxVj5Y8LX>S>$y`}{x>qzp%1NPu z{rh$63b@A0t-Cy?d}7ASovgb`I?ij)&!>;rl>E}$KdrF3juaWZmJyBrUW;Kv(DnYP z6T|fuPuDBz7C0smb(q-ciHQkX$LiZ4{uy8$OH^Z`kyS4ckSakrLM_vTzWt}4qF0yt zWwl+7v6CzpYX>JOYtyO1ddWY9F!loDa`tMhp0=nKE(@PtXbPS^ej z1yd=nW@1O}^#lBa@pSu=4MbXIjtPNaTxs zeqTbeRM-R&E)4+3K|qkDOTB^xt1F8Uk$ilsTT8w^oXBo5EKHtQoEy(5cAqOK`XPPk zK&IF7g62FsIc~||tdZwTkh=Tn%B$XW=(2|0USyX@0(|mP<_>Nyt9~qZ$P(Z zK&zorPRaH4^^RShbLJJ^JBL~`6Ob_Lty^E5mR+5%ZOSl`CDs=pzzs#av6M;`wdvHy zUb3!~j@bi2S~SG_0Q$w=xUn`tQK0^|I~hGc{_EFk^hjwvR2|*tmv7IGwD!Enm=t{5 zwLIp+Az>Sh*3pQHm#}!^EdkDxl9D)%UV8?-!@70rQ>WqP1Co*#&`LE5okz*;%qdT` zGYl;n6CdT+bfp_DePZ*SUBk5QO?9=R-_gl?M1LSc1_AsCeM|G(#sG^1J${^cA=6|K zCAMI$)4eJ6f`})2Ik6bP5qwL9^GRSPO5$A}ndq~-jJ-p}ZPMqus750}v@+@xg4*MQ5kcw(Qpz&O|FPT*XCH zX@RmcnSlO$Gbv{CgX%|E4^U6^yOb_QxUS6iW0kDEX*Akkb6Qx>qmBmvE1|Qa?b?G9 z@yZbvdqO%2U9{U0mhL|t8L>jM+@~9VF644sj+(fy&%hPoI= zoaTA^sB)!lU4vS0r2Cj;Wz2Y7TpW2g^TmsEj*eO8jJ)R?e)!?gwqMU30&DC-o3&~c zK2!SX(`6Cd%eJ3>x`@nfxW*NZaMEmV3dcj<-Z<1${_MkSu}7}kNAe{ zl;pp!j4Wo+xWD1a$@R2ZEOQru%C5P)hc&0^g&D0bw3wjN0<>T!Q#>Pqx9Opx@kXUy z6)i0-z^~i3ZoN!^_Nm;`WqO?{z)MACWy0&xxv>t*bw9FS1WmFaz$TB|z^;I>Fum^Q z1ni7QnpIR+S7%wbHy5Ft_9Z1m3gg5Mt>g7^ib@%V#onMJ<_N9)`MxkZfH@r`fy)yq zbxuvqe*-hKn4zIzJa(lIHCLR#c&@rb1iZ29sUWLdTGMn%q~ft9X97v1Qg&s6#c8nq z^d>fPW2}4t$e-8cmUM%mx|q9&+p)(6nNM+^B+Hg`i;jY9nT3HEU2gupdrzy!pF3}8 z$Ri@2clj1)wC}=DN(Pbf(sK-lw{&{bZ%pflnk=U?*zD}=q}*pytECUjw0@oF5xw}5 z1iqRS9ewN}n_x4DHu^Jf(*Qpc*?DEjl|UQD72b8J7cwoZxpa8Nw=+34V(axbvmZKU z`YyZT|98>+p(p?oq|PkoE|uja*_2~+;uJKdO;<7Ohbao zFqo{y&2_ZMu>7&tbd+CKE-sK|66qx?a=370yxYK~J01wlpBx_kkgCs2A~hNL`pPGwpZMMyCIPjcQK%u{C_^04~(t7-nK)^IHFLIq^ihV zeE37h-e~5_eg9)&=6@>W{N?K(w3jK_+rN1I`ZdwK-@G|{)x<=r&1K}nbM#WOB0xaK zrE(G}n(SbiC$Fg3(3qr-zH<;1IuYARlv*Hj{*ls)uhybE9SH^rBkDH`ijinK7ZzHR zvRQnpkKRB;6cRhzrppk(UmLI-5J}?_RaRCONbv3Rk+y6_w5)niv~;^(d}6x2UfzKN z2hKY?=YXF_v>6*4LlMx7^Nfv+9RTpqI(znPoacjw4>dJ3K4)R0hVceT&~%hiR(AG* zMjm$d#2YtmR8yj&qB3TNTP$DS+CUqj1qSZHXI5_I;_}dRVnPtnN5!|dTZtvqRb@lw!Ov&h4q)+yMrD*>TZmLzVP_*V?|2g)#@GG z61K;X3K{gRIJNZWoDfcVg|lbhC!UWX8vMY3nZSh?r}FdjANcvv9J~GNAMHL97#cd1 zWuTz&z-?wI?%us0Tmmq#W1AUeA-L-ZQDLu0YK3!2)Sx@It(&D>HN^78WJ zWm)Ji(o0{;q^^C5259g)ZM=h(wW_vO<@W8{lQT2LpW4n-Z@ts$AKOZ5^gk)M+1X`7 zo?PBMV*R0cm+1q5}v z1Ox`g_D4}mDu#)h3uw35pVMlk9OWZKw(dpEz5>PZa{0!YkyiDa>o(Uz%>#s=R4}zM zc^97a`t^xQe;Q4@rxF=F#GFfKeEp~7>O~GW7C2cVNG03Ab_I>S0XbI(n)Pg)S}VR~ zT8q^LV)LE-UuN3I+`*2*TwfmPWF)ghTglR-A_0KWM3S1^u${{vo2%6}Y}l~S5VaDE zO}6MN@gy`+)HPOKUctfFd*jejjx6}G3!T7Ly+?r}$f4(C)xL$93~Cxd=Y2Rt?OP!D zUzhqLZ9M<}!-owJJOY6aL3lvk_e)C~0O0z^;>X^;KB)z92#-L4Za4=k5*?M$3{nOG z?GQvD(4TFTf9{JcRDh7DEFY%3p-VRqJ{iz5_$4HC9_|n*`0NY? zPXlF<>*3wIm7>Y)=vPjZP2&GGhsgQXm?&)DzCCtjd2zVMPqySr7oaLOPf`H8QNY+i z`OCaP>}Fl5+6w(nAw@ew>)N%v1<*egnfcgyJLrm^`}_NKpzK;sOOQy%I_lqdeE;D; zI*~mn+C~-@7Y|=wUo8M6FQDXM-y>s*wqX!*`F8DkHIbi~c<(w|*zJ7lzKX6cBOV?e za^_+5Y=;nfejost!RpFfDZhbDZhP`Lu$_!VmW;C;%W!}F`hXj|-z`#H=&=@KE&-c(lV|}@g z0KYmxLjApvc@1_%NX*#V&SguB8miRN(vpUb&YNZNmizB_4l@kaUe~oBh+=zZI70P_FqF|Oh@y~K0Tu|Y|^sc{USl780bHX-?P%Xs9)JEQd3p61ZX zHa>CU_1+dLG$(BXMBvM&Wc$1eP<#rUNBNA5x;C~%Njk(=JV3S0Q`2M?&+T+0rb+SW z>{@wVARFjFsn1^%r#}^hQi6Wu?CcyjY%Lb%EhMV89cK(mo~)rm&(qY`*B@LU8x$m_ zrl!jM{fvmUWV#eiU~wu)P*K33VA@{<6@%n-JgAH=NS#+io=M4cUF-3YavV^#Zu|}x zUY^WFA(veE3gW>q#?lROdFi3u>nH@3`Or-QFyosRL4X>l_9n!63lVbVZes8uhkZg# ze>XpXMj)!U3ojAXkYpUx%Sja*#z!V@!5xAmB>nArzWdjxI>|W?LnBQ<8#!TLS6af8^08t?!7ve@`mZ#MTT?CyhZT2ydfyqnzujL<5k2jm2>?OE9 zW^n040_EoB=7a))^3sI)Nbzi&C4pII$2wASFefW80JxL$<4Gg3I?Sdd|I@@8()F1S zHRYxETz5i*K#(^@AHSLI_QkN+Es2=lKm>?M15e|Sa!f=9^L31M-{oyi%kAW2VPPSJ ztU{OZ^eh`YyXSzW+))o7JlM{~m5Me8eqlHx6g{>n;`zIdy`;pwcMUm4#FHG*xIS5!!4S5{Q;@$;wDj}r6FjFJTfi1O+65cAGhdj$o@lZU`Wob6#2)#cp<#g7*}-C6I4@sI%@}s^ehXr9hTXf%4jjCC<;noqx5ME0@Nfz~nvo;`JqHR#J;uc`&z?QwNJ1HCv}s}%7d^0R z*KrIW)ig8$pT@*wqZp$D^U25<*56*d>dPhhw6Rfxzyn*hY*B8{wQ5xZ3M7w`NFNuR zPIvc3xOCQ`o5Z2?L35T!TU_i~HSAsfqr7|&Gbx30=QO;$N)T#+5d1Jckypcy01g?k zr;t?<$+Hb=)URc{hbasR3T0;-}8rzon5#)a%M zB?=`Sz$L^Y@0G_2)I->{%t2weZjtPqXWoOCAWf4nogfd5jrnG)q6Q z;MOfPk1!ZcEe39@%X53>K~P$<%o`3v55Vla>CK`IQJv(nqk?r`T)7W`w?`hzu|;Q5 zeh#H0dDqE15atUU(RUaiBsI6+$0;mM;uOg2h|XX1-?g-8LxMC=`m^3~Q_z|n z2?co;xOON$jFSB5DREyKcn|t}I!eg^M*XOLgrJ)Z0IL)wV-WDGQnC*R&Yhgv=66GO zsz!wG;6aU2ujOgHX9(T-cd!LiQBl{)&uxiY&;j2fBlirJvl%r)DItI(NNm!{yUZ9y z%PX~Dom8#0w^Q~cLGMAUO|H@B;(zA~VA*GKV)?#9fcQtACo_Q4z5D)|kY#QaMHD0<}0=yyW z@zvJ)!?WK)z|d7qHHM|doeK>JtrM7lxJIXu59e{KYG=>hgFi!E?^|HCvM%%G?P;|V z!~Z*tHMF#}0z`?;d~R_&3|J^lKVOXyjDc}vR+k)>C;X@y6bgAfAFpvb5oEJJ^02x8 z%0f$NvUPh7G3EO9J*INm7ohMQ%hp8Wfbxz%YU=1b&arBJN`SKuHW;#qYT-JMIpG!F zJkq*iVQFHPIBA|mliPh>{K3GbTtyMTXgvKlt?0jhq_Bwpm*!&Lg@8zQ8Sk{HdwQDy zB!q~Ik~<`tih9)Oq`5MGyY2Vj;P@o<1U{4p{y`X7-O#Fz>mgoUOIuiiVmu{BOU|B?l2wpBj6RQ9>2Cx(#8HHVBeGzi@S&HHUj z%iMz@x#g|@yN2|UBkA2-ZaY~Kg=+tiBYKd|l}B+vf+p9?*xTC&1_i|dz-YB4?NxBHvkQe`c^{l6$E zo6z9*I1M$*6JxBtnvfXGkz!w$AS|5=i%lL7Eh)0ygaUbG`_7$7-rheZ%;He~#~&K5 zvK8#CY-}x#u1JVT`{9_6AajHi^YrP{?cCgHdliHCi;HWcMER5T)rd-?rmntM9xAE^ zCOl5<*17TpUpm9O0F?o(WW6W z7Y_e~$mH&6o@3-OjpygDe2L_IS#YhEJ`-bD)Q{VV7W;P+QT}!>e<7uq#Cc-2l7?~0 z{-1w728HQ?uWvJ)6zP|;u?Fe?T2)-F+81T^^7P~7rAh$AjdqJ&PF@}zLx?A#pf?%li92kDr;>zg=`gHebkdMbxUbGw>5 zFsZHjTgoga4}T9Hck{1Prl12@Kokb3VhklXjy~qB!hML~#?Pu@+$ZP55to>_|MZzN zmR0_HF(T0P@Fe~Y4H|6 zw~0u!iR*302rv|U;@{}_Y1TFX=CNnb4#6YF4`*0NNJ!8uy|%XH-j?0ld3e$RqRgN* zwxp|~TMOoiGGGuO#7B&yf>GBS6I6INb4jQ|CuW@$gqjEYB!*jYV21iVgkb6UvDsF{ zrt_!|udE^FZK{O7j=JLU{rdHmbx%*T&a%LL30?TG+q4-(HcfNye^NHU>;nN!KYY+h zn8o4h)vHY({*ii!<^U0@&gK1O9{Bi(IP{$(I45QUMI7w}OL6BYC?ti^`=(pjd+MD< zN1O{soZWp!0X-Y7DPj2&{}kkCmdS6eyD?`|eDvti+c$4)Eo$GqQ9XCgJ1iN9cP8Rr zs}-d+f6C?`Ou(=GV<7_$KpSa-nTY_=@HFQ2{8_ARx3RG$U@p90Na#G4;=#Rps%~xt zCr+H$V{py9t9-oZ+jvyg)YNnslCTEg5#H@6phyI5AkoeZ{>aw&$41@K(fg6(hks^# z8vTpyqq3#AFV%p5a~*HWe;K6eDF#aj?qR^}dUzf)&c~tn%h>+Qk<#i+i;?B~=;MUM zhbW(BIMfbex=LtEe-4OV>@@Z+TsPj7LMs6029o@y4`)0%d6)0NfvUJw!l?^>NeH^W zIWP>2wq~9|%TK9)4D&OZq1 zIL!keA18~#{z&&ge}7`|TGi0daFqbU03%0a*mmqtD6sXn_1w?D()o?@ZAuFHUuHX_6(K?mUih`YR+9XCF6FPVX4Ar;Fm2tsl`EJOB@~4Z{qJ zw!-T<{Rn7`uc!;aN1w`A*nbUuW@S1-)*NFqh_X?H1oofuqB*Q+zAHd%QXXHQ;ZCw_ zx?~_^XyIBJI$dEsjmK{Y z3kWeahxIX0cBz}i?c;Cj388)HC-}!&GffNiAFQ+WBs6!Vq(x{u)eAl5zc=g3TfQr@ z=A#8nP=@x)DqW7uFBh)e=e_Qh0cVJERBgnO1W+!F_zf*!cuo#3dcP zm!Ywf=o9bk&Oo7!FL)h8R62+IK-NZ@RpfG{o_G^L4ia`}QR$EL(}`323j4 zPXEsrzZ7QXl8D~SDK>DI*Xwu3?nkzy7GB~FZVnRL6-8fscd07H~{)kHUJe0IzO`4zBCtoy(+|&NIB8u{eDqUa34vh@^yD zGCrAt{@aOb;OYpnTYp}`+%w41~ zefsoCpJ%*87fM_c9>$1hgD5~~1o!UUxH(EL^X4ESIiP9|S+64a6EKU| zef#cR%U3tP_bqYgtEuC(DSCq2sug+%xr4vxU^WKo#E=nn3iz8S*qFGE>rne6Tp*A| z){E30jlnBZP}V|4OqJ;o=SbSOutdaX96;g2yoE#3K9*2eCtYr*>*XrLaNGc2Q&2!a z44ha1{0jbA@DFC2H^}`tWry5XT>>irD@Z9Tzm`OHDS@bkF5iNA3520BUWv1Krdf}e z3wuq4bqx>?kYI`S$`f@AdRV&qcyWu6GN4Sg&KX~>I%v&=;g{%C7*MG}3pc#h{e6O$ zP0_?1n98PGO``@89$F}PkZLSG>#xl_v5sUxV4)CMY6&db=tMiIRBFJZM{sukDkar+ z6h@#R3McGMgfTxM=W2EEPQn1uk3Yf zKb}5ZYZ@fnY(Eq(Q4@Ry;f&+)`g%!T4GW7dnSg1lf*UU2_hI;f7oZ^am<==X zGaH-{0vNh~2;$Ly6sU*Em5FKF%wD!0Rx~UL4qX;0xB5<38IoSlA{SIvjhQ&A~~yM5KvK( zBpHN55G7~HnKu{Rw_o?|y8Z4Q@4i2Gyc(lUX~Efh?Y+J*zd7f+BY$3smSz_Xg+if~ zK66TuLRl?Hp{%O-c^&@Gw-SxB_>Yjy=?gYW7Wy{!y4HG>bGkN`rWQ7)MwfrH)3d&2 zWMR(BCB${~&~Jt|HkQ|fxw*~$_6uAV)&|^r&aY|0MK)NTQNKo^(Cd2N&8sHE4SQRs-qhE6~I z?u-{!rs)iIC9>U%&GFTWhj_~ww^TosE*taYyn6X&z-o`l`68D2^sc=e*_URW#Fz(u zSh`NzCr_#eiw;e@G-|Y)@op;mg+jT0TA3#g>$>i9Dti@$;(zkL@OeY;kWPW7K=9#q zy@i<{Q*O&M#o~N6z495x^)g!zs`^#v9~?V!K2(@3xFV)-X)-n1ipDWS#8I(QeEIe2 z)vJ|rto3qoa!T6Ub#ASvZ%EO~vq{~*qtewvE>u`0YH7oU4ZC^Iqx()pXV|=<|BIJVoTz8hW&f?{GO7MBJ4VsZp81W zR^&w=P*_;#y4Vx$u8L(%EX|Jut8OXXW{|d#BGddzudFUfo!2Wf*LI+JIbW}A+YmKN z&a64zJ25eF^Oh|mws%gCy%b7oPI?@h=sDM3)E$pG^X2+XrIF~6+dLEZC{$oa><(f z@uNCG$1UEIcDKLrgX#WbYmaP_@b>ne`_8h=AucYiWce9%|E5*A$%!a=$@nxr#{y5l_Hj@4M`3#=* zfy|QwJBOMxcx!5F$A{8P8o9n#MM!uI_u%4eigLrw&SRY=C+CxO3fmrEy}ygU^ixSm z$+(lGq~}Z5dAs@C@bK^>re9ucj(;iAlxY^cr(~u(*rJ>(`UKl#6DMtvCoS*7!UAKn zqLLD=fq}u5`UI7329Kj2Ws2D&%5j#{Eb}2DjQLZ|CJ8!)1uZQt3=9lWa$>XZmz++X zylE;B+>oU1Yd}S)e9}+rOH3^|V7Y8jFdKtNi23s6f}?{&C$;BgF|*P@L-NpoQtbIB z75cSrZ>^uKQ<95!n(UwQzO!aMUGH?^az4VLYj~K;-QAs*@fc6L?B4mQAtf(5rq1r} z7;LWbxA%8)-4>m^+XDJ(V-xXRr-9_GvHTXb3}Y1!5090bE)&aSD;jk&?73@rtMOohEDUY+=8e*2HJqUq>yFv$C=> zZcGUc5-{tpDll&q#zIPpi=~E#hiQKPx$5?YEsR`U6`{w~1cJ}CyDz(jEf!4F$XZ)j zReNmQK4_K`>CDW`oS+=JNhL*7T_r(TAhDjuew~th;pN`;={h_{L_~x} zwxw=C>i4CS**Chb8W;>9;OS(ggT>to#Fl2Rkfj||OHuZq*;*GVMejP>72p%~km-2B zxnKc}7ngo7pk|sho=5Z@v|Rr27JA&i zZP=1+b-F+VhsHd+7P?~lY)ahHZvom708I=_ioQ1`_F z9U5BNdX$fe;k*&!j&~ly4x`Vsorf|+iX9gx8%eUyHacje(eu> zVq=@Gk#;Ma9B3SG?LN;fTc3Qfai%LkXK-le&YhAEcJbdwR(8j;b8?onWx6lBp#Wi7 z+ng8$1qHu*OEH(!hJ}WjX%@P*7A#C{ruuOzZIb?oly;(yXIp*uwCXC|8fo2C`th~n z_~1}SM~7!#UY>VQP=8gHdYXP#{vo_wF$td|y6I69$ewyW36Rq7>CQ~7S<_#v054nC_H9*T)%urQAI_C+fy?X3{Td5 zTwI?cbk3f=zo!IsStg=qq0)V+_VLvut-Q=CHz6URk9gV#*wWoT_t7Y*IwUX^Qnf|B z3f+gJQ|QGPs(9`P2Rt#Zi_@JLZl&rLHZ?U#;?fViSlo_P=CoHB^z_As3mi~OF+)BotEqCXxz5@g-U;yO`nR=K9(<0slf2Kd=re_|@*HiI6YeER~OLN`>Mv{bwa zm3-&v26VKP!9i2$nm0GrsN~t1c~u8zwYyzJlT&nXm|i_h?Je_i}Oc6N4^qzek6!nQuF zUWK)P{JL+Vh+fXNL(2M@}srD%G`#>Q%D zX-$^%RvLZ|5egD@&P>rMOhFJG6c*M1>hKsYbX%O8ZL8%m_{^9;R(jA!`K#Vx7M5@k zvs>$EYuLkGo})u2YUSCDjgCI9&{smOo-E@o{4yUM9lh1TJWHw2%~e{EMTeUlVf8CMqA8_(G!z3 zGHUQ>HfRf*3rF_z^Q+K{j9zzLL`TBADBB`QlepYaVE?YHOjgjmrRu}I?M$oBE41yC zp7MThv3*`nd{LR~x?5OG*;^Ky8a&G^P;R0&m6)1+-&nsKBrNFAfuvUO`TnwHVvmzTMK zuUhrSX-!vmNQPBEw{LSScH3}R4a=P@VGlp%BCTg&VA7gvds?`mv&46j>>9~x`Y1ftM4F>l2KrnL|uW^I6unijzl5 zkc{`zT~D02&e}Kg<2&6@&f9Mdg|*L5-y3VKB#AhXT4;N<; zsysQMQiD((YYA3(92@SY;ma=9aDON7M1^3xVO^ZUl=Q+9z=v`5^V2l+^xP9xqc=_4 zUaO-{lCIsT8tl>LVo9=a0VI zSo-Oc{H06J3Kqs>=l7Ln1sgxp&UbPeeY>7?M=$4?lP6Cq=R4V+K6T2l)Q>aa!prBX z$r?}CHz z&ai#Enql&X*}=0xe9|m#GZ6%9N&D_EX>HZoxScEA-OjpwJ23URa4{`Z6M|mr@}2Ef zQnj_jmu4cTM%q&WYD}8bjS}P|Zt9l@MpQ>i4IpakSe9p`NLm4f9UeW}SKvC|+0_+| z)oyjt&ZZA^7Uz|>zAJo3M^{dz)%;{mORQ;|!*|*%xRNAb@}}+Ew?|@=>3nNjOWWJq zfByMrjwvC;Q;WlJPS5?AbHTd+l07k`b@cQYhybI1^gn!&F>G&%>$k2fxx~)74lkDb zON;Zn5&MD(s>#MSBWEC?{R09vvO!!K;1HlxDCSP-x2SgQLcQBS)%xTl2CMQ0*d-S|8DaAF#MD zRCi>%xw%QEYUkfa2=6+qkdU6ckP$Y0Yc=nT}52fJ(yEx&)Q79imR@R-<3viez+` zF6L$DvK-zzh-vt5of8>iXd(aM+7v51+TIlAr8g-dpvU5dIoud_;N1@%9<@-5!uF32`( zPcx|81q7Vs%Gg4KXMBKq9WZBQH{4>R94G(wnA3z|Jj+HsmVoH%iq;D=R-t098r+2o zuUuSQ7~Ou`+(ku7+e{6ruP_P+l&cU&d}!06bG?yk#bOpNBEM7sxPglKMG*yTM@{V1 ze_>)u2Au!Y*r>LhTO02zk%5IJ74Y0|?xn%!ka+KXQr)v!LM`Dx3!dP7dgO{{oAZb$ zLCUCI(ML_V%M~yaCTr!XB9rO}*U*u{!7J25K=evG6?IlEC@6@&FqH2b8k$T>MSOhx zt0q$d{>Y_hnMJadAI+mqCnEZH@#>$AXI|gsRlzgn7zKvqS;(u87dR-CH_K9gN3~eD zx5|eA*kB5ZM@%@z>{?Hu==tjIKzDf6-;nG~W#2TCXL$`HN14b|e0#CG{3&mOTB>%0 z>u{c>5|%72*dFrytQI|mQXb&#y+=n!XMWm27eDlM<}58Lx@EbBazaSS@1jh0#10DO z0!;^~t^elFyJ<~pDbW?~?^M%dDe^MsEBf{9HgMe9k@H_=mi}*l|FTo|od*w8fVv0) z@aD}4Hda=KFZMH|9oQ&G|80jgT8i$jqMT6IH_leiG7sImZy&!=4J#WPTRdhc0y=$s zeaDByE!sq)qoTe9AQa*-1UaE-j#zbJ6O)eex4fS14#v>tC;5mu8Iv;^hY=WHsLHYm z0TqF8ujtS*9(xw@G^ zEK-}DoqZyC2e)9f78rQ5ol8^R&MxzHem+xket!OwpdcsekdmU}M70!)5r$Kbsj~8N zWdIjf(S7^Af2P5ncHQg~WT2V~c-a>5jHP!OU~WbQ44Ru7x@cG!7`Ov0Me}wYo%TG= z?G%ciRg6b4k_^q2vy!7?cD!f&es@bL&nAE(WeLws-uLdwM!2${4~d(Yu$i|;(B-8t z;cHqk9=lPb*RET4KuoMHLR3sl8|<8ic^-PI>+<3>-bm;;XB7nzJlYQPYDI z1Sk}aqO-Ju81{hlYA5icb?P)d&l z>+bMuI^80Rnmsl#0YXMNAZ~ce(w591t<@+t#;V+DvTsBcLq;rW!~Iv|CepWeQz(}> z|0jLMcm;z;<2S52VQy|tvuTqu07S$hFE1|%b_7~UeZ2|+PBqc8BMKQLtgi`-f5n8W zgra)+$`yi3(8SJ&I!#oeY${{Camm^VQr2zZhZKQMm?divGq&G9K5*6)!b?mv_0-*= z&-5Cd2K$rz?c29OvsJ5B^;5G_FJHbqHn9x8baK9ZSsNiGk8cn62L5e>asknXv+ev? z{&bVLWcBp)IFB4r(ACwY8^T93xRe5_3(ye_4NZ;C(%5!Uv^_%Re0;=`s^c~r{pR~s zef%{}qa2!8=v8}C*ZU`ST=gw8w2XA3NZ-_~ZaC!(Un~ryDAME6n zMDI8>S`dtcL0l!jA6Ti!|BzY88E+7hXfP9_Mf8aXU#Ia3!3@Cp)lwGw|1M=Vi{s1k zc7X!cN@@D%fM~mRMhm$wxr9}RXaWy9eZ8^1v#-y{V&l(0Ki1truuC0gG7)NaZf9`vlQR$u7FMyq0jr2X=F;HAcdVPS}1p1mpc>i{o~Kfd`c1o`K+eVw{f+ zb>W$OrHrH_{hXvUMtv1!Wj|E6t-t=dr>~nWIzqy$d7ge3-x+Of?OjJM-+ss_^c)1e zCgGj=tvk=;rgqV)# z026&EE*{9~2~(1nfBgFOYd$a^6(K^kNlMTe^zO%xw*=mBG#?f(zqaPW_VJb$@zM_> z5L+nGH_)OehdllVeSVm*+_HOj;yT)0pD>i)xN$?~%o&asBN#*XgT*pH1F;y^{eYKy zh?l=5>0;J-XaGTcM&LMo>Rw)vA_F@Jc@OjHp{(`;Y-}G2_{>{om{mxD6ILp!2(~DQ zSBR7}n&_=2j4@c2u7QEX?Ck85SzsA5S2|EgNXW1PxkVE1;g+laoI(-QOru&!gdveI z?1q|E4BN}g&+1*hdh8HjM(){-g8$ruf<^?$9P`@YCE-mgf&~*XSEial(=>|M?JB`Zf(~Xos`PLOMGcqQjwf3gEFD5dB`*O*8 zdd%wi^T^1d0aS&b7!eKS>E8d;pBTD9I9A)YyQ+O10EP#dVtD}ymhIDR=78G_J9fm) z%s6;W;R8f3gQK{#v@`}#&b~1U5RhKKUVLw_A??nir+)wacTnXGz~WCrLX!UY<7a6> zZq88PyMwApk6B4SUs_tigy-=Jgf9qN2t(4r0%k|Y`Ea3~myNSG(9^4FXao_($Ip); z!?Z~eJ;@iM2`eWyOZRhdQqrLZEaKWI4Nt&#YNQ)RBwx%*z+d%4Nz1viQZ2VzapQFp zfvc~tuf_dZ_-Ocx`oYZ9!0hrPeKxCbwdZ$yr;fo=J3&ync&?_inh_G;ORiisFP#za>##i&LLM`RNf^voPts|Ws z9o{1YAjvpiJ|`DY_AWYyrmz(>2Lzj*`8kvsY|Mqr9tek)JA2!&ZNWCZfB$~9PF9oY z(M^%3?&5`HE1hP}dCF_>@F*|uMwfh`dxA^6|8_+TYcgqc*y_y+on?w(mk!%>VkOTG zaad>1LONRkDNf)@K-Ao{TZwpxvb+4tqSGo!EkoA zrnMB@u3r7c5-WOv6=5*z1G({fU`-=591w_(l187y#lsEwq2Ye66Af|VUXy|CG zzOjW6AD0$K+>fV!`1r9OY)w73z8-w_=6JjsKZ_4^f^3{1oPo}vr#%KYk>T*;G8;Sl z!M%G=g8!EJD~?uH`MJXNBKxRyVQ*MiF`pd7Q7R94DC|Mg`J6>%I(VV}Z{vM=K2$bxLKg;5D2z$z=EEpj&$90_ znUj7AMUoI~=t`yK<$gBfS>f95i<6tV;l_A&48Nlfk^%~E|A_l?UJ>_AFZ3`IyUVD0 zHS&X&<3Iw!<$FWQpd3P9K5!?9l-mn0iyl0B^r&EYX9d13EqQfuQ8xCUX4 zlBmYDAiCt5Oa(%Z+n-NVO|Aj)%Ma47Chk|<`P1D+jv0_Pq=rJ-R4-hbBRyFGL2C?m zhZA;AHc}F+*n7|}+dHaJ+xZ-!1Thx)fwusUn+QIeO$#&BiI<2m?o&Ou1|s91W_q$_3gIgL&kXY zk3`Jg(dsLg-a4x=ikSved2e)pb`6Mj4lo_Gkf>0^3|J8gemg$|^?bx+gLEWNnE+bk zyZ}c5U1WE5mLb2NVb=58kt-j`I*GRdvOxrLp|q?lke&su zk+x(Ii#=2eR5UhK0Gkt5c+yQM&jP{8e1CaGAA;V4ym^+C_dPZm@27O{J6aSThX4F$ zZP7WX3{cm8gD6Wz^F4ba-V~4!CQgX1E;f66dw!FKv+ncM_F>(XdGFv@CAXg^8ZGD1 zqlzzIz9cL^URD&ds8jDJ?|pbSx3DF=vWB!uu|zLsr!(ZwXc`j`!Q!Cj&V?_}vTXT` z<@)gp6!C0IqiZX0b;eMj3Uk9$CTNk#A*$MuBS%g*OJRB>mY@9THs{m*;h?C15u;;c z5y{^l_Djxw6W2{_Vxw@d#p+KU&HJfk--p>HfuUE*s;d65;%wcvt-FdDQa`lMZAL6e z9?XBK!nXa&m%{&#cvYh}6I_We_~M&4Z}QmIc!Puy_U_W{6t{g@n|9{qLQ@Zn+i{N&@oYe?*}mCwy6vP8?9J+r-heFb>y2yXYDmOoDU(WkKDk`>8>> zFi0HvkAcFK4|WFjbT1W&2wt~6QdzMWsfjrbKic&(is&1~|4E-`=>?GzB!7u_33&DP z?c1y8%^~xkE@15U2Vp@r1g5d}B4a`)8vIK-aT&VZL5Nux5|l1qevYX<2~rq5M}5p{ zXb`0b2xq0f-lDxAA07eBamSo5LZJh5mlzlrxOXV>^u28mn;M~fYeX&mBX1IC?#KP7 zyosbarWe1!z_=GLc)X_YF$Ce_%sh-<)qs;KfM=`c|A8ja$jDQ3s_V*TIYBtvbg{fxNw zV~X+o*Q^I)_iOcZLvQnz>j( z4+Dg(JrbS}+t0jx`Lop7|}T5r3kj+(8{BO9h@oemI%A6~4_AkR(E=(jI;3)wblZ=c^ z4Xi&&uyJVv{pS8v9Ma$6soU@$%;WpzmWh9h#bxxyLuOH9i+g|kQPol8H3^sAw!VKi zlrOQ$chAawF}B-4ktvB+&oAd%HGRgapzz%8oRZQ~MQk=4K#a{r@7}?j+=f|ttc-hn zVx%ECm?%~-!Ubp~1B)(L{;389{tuZkJ5>m0AD3qKZ^Yt6B~65vx3(ZGER3s9fA#yt zJ?!H_D}ZhxYhlTIK8ve#Eqm>|b6mj{-7n1Hb;+boR;G%o!Jr30-#@msu`w$2! zeCa$osW1v&F@OB?aSa$Nu|o-PF8*ySR|r@k+NDmN>V<>_YM}vxa)wQxB9T$P7e5jl z04sH3=aqiZKUcc`4W=C{cZ7Fs4H?Z96cko4Uqf;te8{qk^V5I32>5h)ACPFo5Q5Iz zox@1`b?NsNz4`K-g)F8VKh{5+E3R7)z?NQ}VX{0;)g@LPP|6usis`l_uD&Gjj`q5B zqFlI`1_mTn_yJ^%0aTy}MvA%4aezAN?56~Rynoh0jH<$RgB;S$AQ~WD+RjZF!B>3J zB7f4qq<$68#*TF-a%_j1`BmU$tf^>!N{pAJEuj6L!d(S&gem%aS|}V@kP)M%+ue1r zf7LJtV+hzUE$qG|A|A4#r5I4NbVYy1>mx3I!a_h*l+6{o3)Aav-^4YSwz;{jI+5ca z6qG>tS@Twntp{2bmQCiTM*_gIpg`+e+fvF;WUrDRCaR^d+d}rr%$t{2#bb;t&5i34&m$+jDpHCGIv=C?e5g!vHffc-F5Yq#bCpUp%Iilv1R9rf>D7h zS~i32dv51@JTQ_D5%(~6B4(~yU!Cmbyb@QFVJ7_*t|R{?=2ha&X^ zoI;b{YMu>bfzfb4k)BH&#$-gquH!($^`kej<(kA34s$ibFTea|U{L(F7c5AS@>UA3 zl{yW!8v9SUb7;cy;bN|1kXWbLmt-l=H0K}%rVye}E6=VTiW3VSqioQJ;`b|lr|Ued zQ5oh}fO+K4W2ES9BC$h^7sMd#sK0+5<<4#>`PV=2NPtxM^J)3|Q9UUC&>$`D zjI5j|xO%lyYh*eL3>7(=f=fT?bz9ZKgaBV19TQ^=Yd9X%8=$`vT;uXw>T=CHkBuZ? zYx|FGk^oty3VMmSX8}&mK}85UZf}0W)(nFIuYNg4wXln;>t97`|CI}O=d601in%Hd zw<20R_#RtgX(3icV8(Qm0r;)eV52=%K-DgE)54L6LmACPDTlZ+@I+0!8e=6vLQG;V z7vK?jQdsC-uumK>l;ejm_DnhUeCXzO?(2C)pJ=bU5cv`eGMQZoNrfSt+zj=h5$Kv& z(4BSg*o2-yHYaH2ydsPfyu{FBWk{!kRDm-I36A64LX7*si~>ZHRYS^~_;!!G-%iN_ z2S&6BgMBWgi84KvMSDUg45gI@>?PXISDJN1%CPD(Ln70ALsr>z{#IDh^j zPQSz=#0S7|I8D~8P2v$uV3D0NBh?H9m2rs=TXUMhBQEXNafdHvGH6gS?bSiGn(V7( zZx6!Jkb5p+Kwu5T@{CX?$4hW*fKa#+;l%d`!?m!>%!_sO%t<%atncrwj`SF|uZfZl zg0_JMiySd{z$jD`u|#Mzi`PFq!EmU+{EL^eHAkOv`V!^MVbnW=Ge-q1+6JqJySnt0 zQ#706uvb5@1O;YNM-*%Y8WMo8SR0G_OlktL#N1yD<;v91pCA zcc{!JUtZ|a&G(^g9x-n@wsN$m8eTpxvv);BphKL#G_vgkr9kHj=Y_>WpPV~f={)Dd zM2yZHg&K)J-$#z_^tli6uzB91HMjop)djq_sUbK}2VPVHhVCE`eMo2($1X{83PW#0 z^;3l00L~i`2@TDNNqZv=O=;R9a%n3L0%>Fz$AcqWDFI-jr4?~z2E;Q4YSWRC!NHIb zH@7mL`y#fj@$JmV?H3yqo<5(*smB|?x}1f&#M+0H_=}$#PW=}W%NHB}lENZr8esKe zaFXa_#3D{_;rz{m&f(!OgW!wYIwW$@IClwIN0CrLLK?mQC8Q~ylUrssGil2<<7uSST|w2oX~wT6M2IR#&W*dw`~ zjbe0p%!EV z$nYKY@k&ytY>MD%@44K+xY$AOUIhfj=eFRm2uC$Jk?2X!5@x`+{!M7wPR+r!mMmka ze88Cz;w6{Tns8FcfAg%+AMb8^dM2R35D7qL^4Ocjo8G%A*VMg~SGv5$ YSulvYd{7+pGle33`uwSslb3G(FY3L}e*gdg diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_model.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_model.png deleted file mode 100644 index 3e8a73d0228380c795d220a1c8de772cd9a20e26..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13683 zcmdse2UL_zKr^f-G-`zG1as)#xH1ROlUiEf|K)#Hy%82be`|qkX9aS3lT4~r)FGb zxVyjhT&tK28I`!6+cFzwR}ff~CZLEl-7+#E;kFJA8ZWgCTZVdHBy3gw5@?bw$sH?5b6`1JQ*mEd})6KM7^HhU(2VTQF;!Ztc3 zr*HrC^z{37>zY|Ew~7A8MeUWZZ>|acyk4)xW7bk@puWDIK9N1$72IhpUXyLtEidiV zT^=gxI@V*?z-+fR9c;=T@5{C8R+b1=&YvC3?Y2)@Q{R+r*StVg36+c~5iZob|D9l3 z>b|xk*9L?rKa`DhmefX_;$UPx+QA3Ru1pI7mm@hzAeQS9vKh_`AjIIKBSU7%J(Ywzx2Fnb<$K@w@N(Qa$IBxLtd8 z@N1>`;k_$)t_|Pl=9F@NzH{eJKY#yD$H7hfnx-}Js&s>r_r7~Y?YhpKJaww1wbc*{ zs*YChweNVhj^Pof99kXaEt5Dg(3s@Fs>{LojW5g&F?CKUDBRt1=1D|}Q0|$9g@t<$ zAJ$#=mX1wKOytSC)o{hwIMID}Vk_>a>to@J)!i(Y*F&2~xq zceUN*ak(d~=S2I;)OWo1nh8*<*HKra!$xs(dkgaqiUJ zyLZWJ8|W5s3ch>Hif*xCe_Uq=7|k;a^Fy9YK}J(#M8x-WYO~J#>}2YA(BZ3}A}mt# zH4z$BRaFix>&GW2wQOzE0)+G%B9C#Fp2%9VRolUVj>l~ZcsqeRnSXifrITU$*uc4c zIo-VWw1R>{O82Q1Yqvxg(vC!}E(bs;jFrF1=aZd~LKWF+SdUJb$QQ zHp9C8m=~9XT57>e;$YsWG8>PKUaQ9}t?_{3>C^Vj4$NLJv6jmI`t)%|;k*)-r|#;S zgHS41G2c}F2^XbiwXg8D&O{RJ=H=aY=#aWq!E~%~MHtOory3-hzAcaL}Jq+Pjd;68p%-)(??T3I<|Tsszr z^r50cr9yTw&%mW~6H=`AQ$?%9_={Zw6t7y;VVb8P4V`e zsn7T6#${Ut&p($F&L2~wF@+SwTAZUM2Ah+xj=9^OtK`(vC?fYdO9FL$%=ML#z$_Wx zJxXa?M=Go*2b%+UlP(uOtV81I4Rm&PYX11+9Z}QD@4a1DuU@TZIaIhP}`(j57^&eUhg~yTj>*N=jbjuq=jJ@^P11Ekg%O zMMcH*rOTGlcAa{d)sSh~XsnPH=F-KfrR&Wtos5`tnOm^R9XOnH{5HELBH2iLaWNOA zxkK9m>qcCs?dRvGQJZtJ=jS}W-ZwnYe!FO(`=WM$=#_VWQuG-oj~`&9UF^-nICkt< za-&sYGtCg!aVQgIo@eSig(lHd!ety5Xtfhgx$d)$ia!2NpHAOkE)m%+PrWXmiNJRs zezlT%Kh^4sOs?#;DeL9me3MltTrhQsB(H0CsFU6<0jE}X#_QLwo8+y?+UXWd<@fL3 zkIQJ;+hPl_{dtmN1) zDtd0+x^;+A|M2ke$jC_EP!Hwq2MFxGU+-@C@cz9G9a*7=A~D%y=gn(Y{BZlskmn*R z=j~9P2aCo?%*y&?*>Tc`WT$r!_H^vm4Jth!_*4A~3k$EgyLS!^r4BSQa%(fpKJIz* z_H7@2W#71n9Cg%f&2dO#=aETPjgrr<)lgTD;#r(3oR~*~&4N;i8U~zJq^5hFKKa*7!gplOE1H-e0dy3S!2!Y1?Mc#redPl)5#-c@akj zvyP+or{u(Zv@&vc+J8O0w&mIJmEJODW@b8h&M#tB!c%b7J}mznl19X1&Y2N(T2XP+ z_U!{NJe{4h8fTv7^v!HPUEN-E8>xx?Q}1rou`HiEQ>eT7!phfdl}r8o{AwDya02&* za^BTWyj;d%x-d8W;lqb&w8L}f&c)Q!)T{`%ni_5o5;5~3B?~Y^1sg-3us{tXr&%V} znzfv(Buzt2?Xzon_nGUc)~e{3dh__L-mC5hm6d1pQPo}Nz7|_ZcRLxTr+VW@3URV8 z3t6KTvP^7sB=C{fR?!c&1t>f^-iM4!w)?crtNZd5E6gYQ>q&x+Vbf~rhXQn|RS0{x{NdMYf+QUh zliUXy=#n0DS=8*#?-b&Ar2M0CN_;{><+VdGfd(Val-bNisydaLB2+kMtMA$y!hrL`nfe|?YsBv$;osYYSBl#|A-Cz zyzb1_*X`EMH5nI+Rt&Tj8X{C3=xki#XV+{NszKpvqAJm})8AUQl2+xTC{%@?nY(|Q zTI=cQi8A3pYLqcRiBE@(#adJ9FWVxX4m9U7$G1*A<@ls!;BvCMx88+ir&v;4d0_EyA_`MX}V3%-nEM-^Vy_-rDH^Mfd2K7-ZEzvO9r{JC5Hnof+%h zFD$GDm^a=V7j9l2BGFfy6sV@A#v5AT;53v!XA4%@Esl+rI0H@e4tt;!epy*VM9pwk7oU3UK7d%Doggu* zu-8RJH1 z3vQj0M;G@NbsoMTJ@RfBsRTnFlSk-gAKzZ?06qd%pPriPxwVZt^@_DL$gf#NWLd2E z=-Uz?+q0=2bir14#rVS}EdevlLmnp56Q3^E83pPaPtVTk=DUukwK#wAlAL5UWv6f2 zut9yuVtTMq&17_*5hGBNrW_)EQ7K%e`QE*IX+ZYL?As;Jb4U$eA4kaNj3>p%Kj(Jo zSjR6QPzo^md}d<6f-nwE4Gq8Iigv)!7dV`&0MEJ}9v%cSY}&lpq%26ZMAEVUNm6*< z*GxYn?YcCh5&<3_9v1Kob)YfRG99sTmGF%^q=Y%pn!cv0s%mI4*&6-ATJKzPniU9&P;}VUn<&0Jj3qYyz)oa$I>oBO6`kwRSbzNn_ zG?Yi3Wh>e3S+j^I-5V>|(v~h=n*8X|pDdcPZ0hc9;s*jD*;D8AF;=Or^4W13en;-n zSCil?YTt9#yLB1}*qGF>0JN)3oSbH5;j*nkhfI`dR*Gy}et7vsP~asyd&#BD-7l|% zd(0%HWOr>9G{`%@VXtyVE?27cNQc5ndHKG_r?{)edOr>b#VL>)PiP5BHF{s`mG1g% zySN+&RxChT(Pi5c5!vggl;ZC&aKkrLqzR|~ocsd>EXRE7mpNZ+CMoMYg}eTapO3$KWr#>&07{rB4q9k4ee7AFw6V`1RU1|Ah&ESUxFd19-;HB_45KxFD-8 zp?nGD`s51%#Z%tg=kRFKdrIdD)(ACmiCfD7Y&ZsjT$jHpaG&+C)*Qjl^$;UI%8S+o zvSmjolwneH?%la#?fx*lZ24wtdDA^W)1Bbrm9J~MAFt=$2jcx7a`XR;%HK zjXq#=45UQd?SCp;Gq}F(=9cXLTm<;fZuD2p;9tBv^LFrq2WydQ{NH~sXJcd2kmq6- z>W-Zhyj|4(Vhm)5lOG$+2hZ>SS7%ti6BVzRK%MmihVlMWf5;%*u|}sP7jN zaul4M7*I#T4g#_pr*Kgy&iuMXWxgsh2{vEen$1mpy?6I63iJJq>()IhEiKiVUAb~) z5>Qo2XAy^N4N@=Zz~#3hcAYqK-TLpS0L;LNslWbuf-!+lmWWw{;`5LbR5IyB_ zdK5}q=d}Sz8&lJGG;}F>Gj?vNo&!On?&u4s*#IjelasO*hH)SH;y${xj4!2J4|tXN zcpKWun>TOxgoP_;(!#=F8>5X=w*(`nj4a(&K*HcO$WcyPu;_=icekFfd4IYrEj@@6Wey z-@8HE%C(XepDsA9q6EDQn?61uw4^M0M|f*yq2O|4#FKxnOZ|%&`y(0af5OY}4Ycps zwd*+a2%?GT>4hA&YPnXofC?ETY7v0I?y#41nC=YZ=jTs)nuzCbDQA}Zc6W4`e(>Y_ z@b2AN0zaDUDsb)!Xh7;$u3VwbKG<^Ln4#x9og5JWR$tqz8>n^WT_12v4BPVoM)eLO zIJ$TMt`DW9_VhQu{%Q*C;IxX$WKW^Z7y0>q6WNb=xGg-TF{mTzuKD3!BCrC1GOU zQ+brAdDwv@!99EA1B`l;QVU4mkeaZO47HUE_0pBXn|1K!so7b6At7a;JL@rYu9G54nncr{K6qDxTE8W ze?WkFOP&jgq2|@A$v~BerriqUTUFJ^NsA{p>qqD1NlA}Y9>3@9twpYS>DO=PCm4lv z)_Pnf5SQ6@KDVeR+M!>C>mxR8_0ACuf0{$Deg^$!=Uqp_CfwW=YuhJW5PT+QiL0SjWxHo#Nx; zqchoC`RoQ%%|vJ`09g)wE%~{Gw&jnOA8`oVD=n@6>eZ{1-8*+is;1)coHrZhuCW&9 zqOt;vTrxtd^4**qhFY5GVUHd~pfQ?(x8$_n+GgHt(pCBtD9?@-7#O(08WVvhmb=8B zJ$p9t_3PIyF%r(h5yhtE^=k^f+P7pr<;yj)E8 zpxa95PoXAPpXYuBe@5Z(G~0Vk#tow8i|65{L!=uE(* z___f=Qj3g8%y!Dp)&^kwcD_Ha~xo@B;yDd zyLLeBzLVq^Nv_^abL||SXR(E$jj3P-wKX*&r&V#y?!=b%SOkiaHcGHuRQ#kR_BJ{F zA@D23`G)K5V4|@=)k_%%)-bOKlU7pQfK^H)Ib}^R6ukz}>*U0gsm)=E%7y zw~Kb%E>i85#D0p!Pd2Kd>fYl|v0feoJtaXa)xf;=gEY@}nQK`#{NGS0rq^*3sJ~)H z+i^>;$d-0v%s~FUoVK4X{o@4duP0$5Tco!_kOaTl zi0vo~7Gqizdd{;9p&Qnsr8TzMApz}bXU`2E;`W%jXk=ny(Np>Co|l(&s`foQTU!yM z4;ziHT#5HwTyP!C?j|r5g`4l-!L;&uv{DhMKVf5@1Cw}>J z&u(E-hL|pISL7;EDo;$5@|ZiVGTj5p0sSzA+3j@0N)e1Y$%2{Et=Ghf=z_wp1yx?x z4WBYGKI!%-2^40QhaM3%DGzZC{hI>vIj@f(bK0x+F2V@aRitB9uunOQ|4~t@FL2K_ zE`9P0veA1c!^5e@xI82Qn2?!SM&4pNRK#lOvZD+z`!`jm_<>ZsB1E_{f8KtU@r5ZrZxPjd67LmJ@N@_uz;&r zfEUqV0n12_J$CG8QjQ}dcgDoVo|c@?EB!N!CPSqcFJ4e*Z{NJh_x<-TqY%J)S>=39 zc_SbAp`ir=AVq)paGGJs<36UE@9i5Bk__<{;iv*Mk7zpw!HsCx3iuXNGs(d9ODHdw zC%&>$l%5~^NCr>nHK@W7m|rL(0BZrJ5Q{Z!OZO;;#7nzP#79OR185@!Uav7NJ$-zl z(TbTpgsE$VWHF|`S;HdYO+b~Vz11+8aG0&?2L6z(=FW>InCzG^NsZD*z%dPRRek&9E1qJV*ptpy9};9lc{tveI9F-QZ>LJ$VgwWDant z)@QfkSe)$q`+;q*W}%ix)ZfJ#gO1%OD%)&$t^M%*Va}sRk3y9D@p28Slyx~Gis-LU zHk>o8>eEh=vhMeLRmOXWj6Yn&!D?MxU|@Pmv<0M!8N~Gl2iuQi8qBAqrIlxI!Lg2>8?yEd4F%}f zhpGB~t3{-{K~hWm9!yaz>XNGvP9l!|=SeHUVqPtiLVJ=!m#luo~LSlvel|+6Bcdth0twQ zhx^Q!Dn^Q!-1VAcu2l8>Vv%21m_h30wU{-NcVLKNn7Z(Lf6zprVLDr$K+O>NL+C?Q z#4u_+SP=dKM&3AumNOF+4OqLWf`P7|iwRUVpzsYGsF0p@-foY~wXabEFAFz@!$g+< zy@Thk_~$<}{QOUQ`6aik_y2&;>YYG!h&hEgLSXzL(-YJi)3@eC_JBIo@w;k5gca0v zkqa*=yLazKY{u6u5I#-jI-m%h-QBF%U}`m5N(v(X&k`ab1NwW1Fb#5@9Hc?9*3vz5 z=D{ttZLwoxW3JnyKp)_csDoC?sx8dR(*wZ*F;PEzwo;?$Uu7$f-q?Bw42g^S@%Ikf9Rr-z=LiL|L~0N;Vl_*-LiX zOIG9>ppqW^blCO_fI(K4ILbygy}PqB!LZQ7VUU5ld2vhYzyT0>i@ozGWQw+OWR(H} z{oiIxul;jQ*vi6O`}oLcS+s8hMv15gjVV~AGsnNlnA|d#Q3zuRiHWC4L4o;6ZvZX~ zHK;#!fgISEN3_wVPd~X(g`^;EJh_iAxCAeS0YhAmmHTg);r^%P#y=YQu5yTB+zT_8 zCNY$K{+!`5+9f&r-R8{%LaV|c1A&ZR>FL3k>D5G$`Wf7L@7`1Dv1h81wbKc9T(WG% zKIn@mdGv`?)N)K}hz_D}S0-AfCkXzv>MEOkr%RQozs}9xymD^470kpJCst24#&8NT|A3_w?-K5c7jSzy9nspe~0Q(Ijt*8_}d!usJSzO(?0CQtHJdYk;l@bw;S}A1}PkuDYW#|g}L?rhA{DuQTG2r z8vFzJG^#yo)sUfpu@Mo_F)+-y^4`ZdaU)pRH`wWr5H3Xa$#ml~wWCLWBF;Tb#c^u2 z4W0=69Pmt0^Z`Ny0}JP3$Uq*N+2P-(KN5gT{gN(lveiItF)}vJX~1lenD{WjaT~+W za}3-DL@`v)X;4#FABVQw)9lnrV@i~ilx*VSN(Q-w)iiYRpXlP-Y8zjcMf@MQ_=W}dECKOaunBdS|Rov^=*>Kk`qvRbO z-kHlUXLkf02&r1pP>il~>o23Jf!j08I*+$#8PJ%!bW#%%s^G<`h4&&oY^7X@I_5Zc z?tBY7k#}SH^@{=})o}XNLipzCx6sX6#m46E?d@H>uMCJ?THd7Gw6(yUHt+84PA;G` z`Ma7Tue+a>|yC3G8*U4$HNx&H;|0fp(Iap^F(4Hgx-h~M6*@|3H%5}@Gn@(z3bme zh<8y^!M_PjeRF>^F&Vnf^u{^BMAP7~3L+%DSs@a3Qq+^nvYvmEtUxoCyPQ7H+Dxd$ zhQFAp|3&}E^t9OnkKpm6IX&!U{@Pu^A^mQ zFZ?n$ca4o(>O7;_F&K`BBoI~N$^K?r+fsOADWkCk&1lz##X{fXou(NTOn(kYnKTm&Q_d%Oc*G*CzTyRI7n2C4FMHC(1i0 zVJD^KIwOXXao(=0^aZq41M(O!onxwxv3Gms#_Qh^M3(4q>+=w(*bKN))tOC8hF5@m z3D!Z3)9ua#NTM0!>mWVKwEjd%eR)WX@<0uKFPsbOYcUTQV0nxNxAvIG&OU86T4G9w z+uQSWbAMuu1xf7DDxC2jEL`-Mo;fJTre`{(#S!tQG0P^3C|1q6P8Oiy1hf2RP*`(a z3Dr&~d9u~hkPuvu-#AQb=CQR}n9Jh&&pCg4ARtZcxt1&`u9_z85SDg z@EY(;0Di>qNyGzec@^9%148Srv;Fo==r*L_z*|xcZeK&U&{7JM(k8b1ToKA)coIxs z5DSIVkby)0Csm&s!uA#yW+6qDOf znNKvH2QJLoU2Vy_jPi#gluy4{BQmy%n)@}*K$@{o-%ZI|RY7=5)Nm=X=70a?!QUp! z4=wPg!XoetX4o3HaBT3Qj|VD`VQW88$(_WxFtZ;2bQlpS!Nm>~nlrs-S>;ar-uWh~ z6zpU2dzC}1>0}!qQfWd4@bX_`YKCpdI|hObb4>5y;9)3_#3}%DZFU0`HW8PREAa6_ ztdoODGGAt@Z5iWoXIEDr^y#xWYof}aT^E13rput#vWYcCynHEl^27;K96juF&c6CQ z*Sn5qEol=awoHiDxpgQ~L_=Z7c+1Sh!x5JR6F(9|Mva*2aZb>=&--LpwaO4PIHSci zNoHZ{G8rquS7?3Ytx33hEb*hkNl2Xb{;){k1c|VDb>{vzl^x`HcK>~{j^Nn0csW#(h}LNr!PV!#K(ssthZoF4oELR5~^Pznh-?h2Lw zjM&5*8++zyB)r4~>61D`RBP;InuIW?+=jN?7>GRZMY3&|)Zo`lhIm74(-61y8=VH5 zP>+=pAdROSmX(zyem%@R{diG_6QKp;dg?ZmL*T6U+-GpX@Hf65c|8n4z#gpUsar^!X72;e zLMrg_UIj#DErJytG#0ez7@CDGKLnRQEQXhMHds;f*-AP2c@PO&<3t23sn3DNb!IED~4W#0v)PkWe0ObM6X2hOj87;57Qn0$5x;Pj$20q0?ZEF S#c+c{Q8=l5BIVeHpZ^#CITcv| diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_optimizer.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/f1_by_optimizer.png deleted file mode 100644 index 2cbd7fe2e68c8088f8ac6af4483e324a3457a602..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14784 zcmeHu2UJvPx@}=>QKS_xAVRBT1Ox;G2_{gIl94PZAUT6%6Q~HH2qGW?lCxSM8B_$6 zoRKUdIVn-{+o!wlym>Qo=id9)4QsvidUaQKTUDq2^Z#Gi-`@M&Q#^l`ijtm^L?Thi zoI9gTBCQi9ktn)<*@QnaxzFi>e~CCqU35^fHg<3}urnel7&zEiSUXsl8vf>FWM^+` zZ6&}X!oz>~x2p~gHuj>tyq16Y0FSku3GadP>)UXVEjH&g?MWnR1L7ZwM9BnGT$o$t z%t_U2&&Iplbkx*V*M7|J5ZTOon=9e zSFhi2 zIqKy4+3^kyCim~}1pSv!qB&v;9-A&CqnMq;P(a znaM~~5|5WPk4{e16K?G%KLs`hsVR`Rl9I!WDxV}-cD&)S^xr4!v^t+ZS(FlS+&S5P zu*U8fgRHD<^4Lx}qpmzhdj2bSwKA_p7q5(4n>EJ!pMQ2Nn)t4DPm$}_du%gPmAv}R zpREgLpUjPSBy6E!rEQyE7>rCYZH#Zd=usNrlE!-Bd1`l|8`t5(Rb$G@;?B;_;?q?E zVaIG=_-xzsePo_eU(YZaRl7RRHTH}q~Z)8SV89@o*deo;Lse;1bBJUS1n zja90{O4n#|z1BPn=eyk&zn4&RWu?D9<~;FCOiZleDEVsq9Y)u4e0+Sk%SX>W7fs2P zCk4xmIHLS0Z&hcpv$NN2Q9k9;aAIUOAxy*LyT@N6mDvV%3eIzE_7W0#22!c_n3 zHjDNwbB%j_Yb%pptr^9u$qUni`5VGdkxgpV3teYt-f!hS)HN_U+@Nsku4z z2vNtQm)?@%;^Re^RwkT_8xal;Geb%ta?aj7#kWAJRuD*;gGdY>u_&{V3a~mhWWQKQJ)z<=riM5xZEeY^yZvT#C$N zR{A5Ct`{u~gdZGSo9nQqV`NOn8>40WteZZ+x}0TD;&U)Eg?VLAa;;w1xr^n(g$vE; zy4vEdGwgHXmY-jq6z*{+Gi<-JZvA?>0%r5<4!XU2lS=RJzCUSSmW6Z;`BYomivQPr zGd0(4o-tfPE!otWYoBFUO4*#Kbb7~*9W}H2#NFmpvF`iJ2YJNio@iUub7yxdfBkvy z-o2^mX)fEc7^m@eN^0uXB9En@(9q%6yquh}f@V!N=_w0HgxUO_mGhEo%VcH_9-i}9 znPQ0#)hx2%cy&#Ub7Z=m!WVOXn{k7!mZdW<4#m2~{41Gz*ktUhRg6Ri7)J*AZ zWE1n%MCXD55~i;9Z0-dJcpvZdL%vnft)x8vAnrNEgVbacb?(Tlnen8&iXYU=Y zmzQqf29m70@`jfti=7$6A3ciJD|99PN5kE{;0dpOx`0W|)y^DSsTwj4#rw{k%Em^O zwdFxcB8*(z+)^$?O7Kt&#so+-c+7Vt?6b-_iJ)vq)g)&kJq>&E9XO6LiMy&U&5d*Y zAa9l}C0*L%V{lmeP1QryF{j@5-i85tk2|F*Jt>yHL(L?_yceZUs3zk0phfMcPkaT1 zh5o35`RBwxefm`MhPQaR;o+6#>BzN$uP&$*e%h#Uz8=rrr({kC{wUq8pYzPRF6t!D zip0{G0RCioy7qX-;_Rr&h7B8NkEb*DwV6b6Pf0Mkjq!zeuHU@F}E|{2T z%gXxV!;)S_EAJ!NZaes`$$&4thE7hJ3VSXn86jI@-0 zcC5Z_^#jd?JwoBtKaq{Pi}S;cOO=C*JBrfoLEA-IskNYkgG z_N)|gns&Ao%WCTG`S}49ARp#K5)!MzJ)3P7UlgW_42~BW1qh;CqMsGnE8=a>yfo7!M)Uy%P%WO zC%W>ve*5hNlh^VO!H5j6wN;*sTQ_eq%d9NUay59)4Am@ZOPsoUSc>q&Ltf7h1`R8IuLqk^= zmX=ZwV2E#x6DO|6%8fcu`qK-_Vlx{@no}YyE1z&bF6pg%c;Ga%%w}rFCg&cH!R6_k zW$`OluH;k|1kEt==oKXH)^}GLZ_j$%Wu;LaCTNu1;rQh(MM&5itFGiPUoK^tx7PIk zL?`;#Kl@<+afihjQz|Bj&bauSH`jI9*NUSOhIlUY?_NQf!GCMqlN>GOSe6gie_Vac zGb;0<;x-*tcJ?9Pt-h6~QFQKGR#sLvqCWKZ_p{t7iN-yvq7$_sR~&lZZ%or9OQYkV z(`zskYm$vc-G6?kTVfEgI}`YAa#BCJ{q^hDo_)Hyq1oBlmFD?fdWK8bBRRnVUplUt znZ^sFp(f9jR8@n>;#^#EyLt7Ra7=OqA7+b3FEJ-6#Yo4A4(8eqGJopd{l>v`zuL8H zIS-g5(+LPsQ+Z|M#*KNiE!q*~GRnwQ$4c((t8K51T)#a!8x$FtLC2*&RJ1f<@5#t^ z{@Fu^;q=rs589KOJ*Va6{dOI`7{wOpu8hW~j8j&Nk*4xz6mO4rv{%&4QQPD-=^2x6JY~oh|jdtZv47BQdsTmr+xOwZ={pA-N zNpIBCw1NlH)WSY|I737Pup0YopLoxeMLT=@#=7X!f=*+f4Figoce|KQa+=je-H4Sd ze!E6=k4)!DJyg!CKfm|Z%mwaZ4PX#azFg*4zq+y%92S=B=eNzDLFDyE7sJb!9oJUo zcORDy7qax67EitS>J-km@jh?SP=n`46GwB3M(sj-d3pI>oz>QUNosG$>PX4r4}!X$ zo}M+wYh)K)IM4T*XkFX>#cYQai6&J$`#cbn-D>1WsDMeP=Zfp>SeuHKl~s)=?x0~k zwOCY6&M`!d>D%iRH5u&28hpHe5Cy@PL50=xia(kYhT}ehNXdU>pY!q zbmu!LLCN(oXODNF11GIH7n;;EuB^@%w?;U36HRz`n?Blpmu>le+4JYs%OC8IAfM?! zfb2!7Y5=;^Y2SLQw4ZEte-}5W)3f*5k?1}_*audHFm^Gq&fH>!a3Qm+(-fG6<;Vr6BOM(DT> z$AttveylR4E7n&Yppk9W#hmmZU!JG+wUJyQ0F5iU8xh6qtgK}VzZS>Ig^b*ymZG6Hte6riT6=GD2TI~DvhQ>xgqY7F<(}puRYTLQty69UiY1#=te_r#xeLGRwpPtT> zBSZ20`3H-0<0)EMW~LYv+6 _2pgKA{;-4^%f=W_QIu7k3*Wpl_J4HPJMOfTgsG zJqzDhwArZ*W_OHJZr#fD+Bg9v->mI*+;i6vj=eqytgWrn^o#X?WlA5YWax=g(H}jx zWy=<#rwh7FeLLu^&#jet4mfC}F(E+OZ%;Hfe`sd7vAx)Nt8itmqgc~=ZFMF1 z;lrpmmL18L%O7A;AR4f*7_II5+gphIL7hG7vFNa{urQpK-Q9p<$Tqwtxh@)wKMakC zz&MQw7e=YQe%=Byo$U*dGy~jR1j2}jUdSDM-Y7ob8H3$7Z-|vaFKCtHEsVu@LmRpf z7>hKlsIFE3lv%Pa;{D=VrB}aAmCoDw*%&&H>WXg%uI^cSdF$j8;?VqnT=yby1abKF zx%MWP8{-vN7+VWm><_WCCss`kcUTt-;u}^+E!&%HoSlUU2;er6TmJc_!RdRG=Gg5r zB(XojXfM%HaVq8v$86rlO%69E_)9MLQcI%}Z40j+up7KDo1;B4RXdBWFUY)Ca`k77 z+|u{n5`U~bCv#=;sduD-jCQZCxVOhye|n>%rVSs6AG9j?Asyp3Kf&YdN!QIii#{7N zA|SY}#rOSqUC#4hR7tPibr|&xTiLII^;gtB)$3t4Vc5!G* z6|+;_{eZ)<$QyGUrxfFZUxMgD;;wU!;G{%dXA+4C4v735YVnbL=4B>oYHEal0)DYM z$F|ScgVMo~2v0$)&Wi~5I<&~y)uqW%tg2OiZLyV-N+@s!SZ*{uf8yZhD4k!eaA_hc z31k};^&61{$-4P(+E{wM#IP;FO}Or8(@_2ez9|j~385FV80sV@Cxfbly+Qri?b{oG94=rmDKISl{Q07hiOKQonVA`Lj@|gMtzfmN z?X68z-v$QMuRIw*E0Z#|=P}vi^Mpq?8ST2lDqH_mMoI6(lHQyFSKPX+Alq~2 z?(U?a`JFpxE_9pcV$63ORR_1AsHCJsr+E-N+xCMO8tkpj9<~ExjN7 zFE{jnN9nD$?p9nh5tjG;D6h<_{2Lul$LTFIikqO$VjRw3Qr`@q5Iw*)%(7Yw}^JR3K zR2sJPS8+~e&B?*UxCG>^Zr4wI|1~Jnr;bx-`$=*kJjes*Hvm(nU*6sbVXqCzH zXEdYx_miN%Dmprrl50Z$`iQ2J&Ckx(FDFqMhmfLfGK99(j_p*GshC>o$gEUGWCM37 zW2}&K!wW#R(iq^E^)woIBG_#O zH)qE0aq?BQvAv7s9_dGKPT{`5|G7?A3pZmlBRE>37MwbD>K<7b#lE7Z#>r}Uc({L+ z5y91L&|anRx4A`0v@ERp5h?|lo1OifkHHW+Y6fO;-EVvDlpMHIqEuA6o^+kFGBb1^ z#+HW4vhDA3o_Nc|@BPMGHPWyZ;FF%X=qzdXlVr51}{Kp3x zdO@=w9I#PK2S{(VyGgaV_x$`+)zzQGUx-XcJ>0l?vw2^cKWf!OW}_=t*txl@+lq1U zvhwmxmjlG(>*^F2e*V1r{mVPo)rG-o*R9w>U%pupVdF0%!mUit?bl!2U7MThsd|5q zAklKfm$%P$|mo@W;voLf(>E^6B%vHjznljpbRI&hf=ePcT4h{~P-rioGalEv@x?D~d zFy5I<#!1KKPvprFqzc{Az`y_}8(X;%@JS0Am5Ad=5HhBJaB#3;VsJ2_s;bH`z!XH% zE?&J<05}Y2j)X)okvGaiFgYyGP0IZCPEAeOS)mj5FG>*6uiQAcj$n?>OOE(v(h(mA zYuV?%f7h<~nHdLSipvAUx-ZTuB&3djt!YKtZeRdTs>%!v-$X_KvAR0# zolh7slF`J1a(a4gKBBV!%UWMy2->nm3B$Ua8(#7-ZjaaoK7I_UH)e{58vCXaulL0F z-o1L@Mxt!Ie0hXhui&Dosj2US2TvS^0~@d@neGdAG7YF8%@`W8tb6oOd9+>`dbgzO zrr?Id7SNGT&>5QL{L_5M5u0Y^<-PEj?LrH#shXnBOkN`p!U{A}ntJ0Q2GD74#^&d;&l(X;OV4%y z*Nft0cg$I6j9iT!8?zW3E%Vj~rBchWTinO6%UX)EL z@vk<13`XIHATFq;IFcW?t)vd^3%q56Mu%HeVP;nD(3Z!4kM^UV& zLMo}G8lxW9>+RSuyLUGYfzmi`qtpDOSEKF1=sQ}N?|ioHeSx&s23?*|sjhBoYfHIp zThN_5chEO7Q67}{&tsTO!Yq^r%DB?~JOTdVis%aJFdPsQIA|u0W5<4mJy$Mpoi$T< zdgLUoF;X9MXBW3t?A^P+#Kp#fTsCH542g(H!-frGq-nTE!DC_UJyReV` z=+&tH%8(T3d9CjMTDQJeF8=e&ErNjQ?ChNBcAGSR>3} zWgmR?S$J{rnqV_JL-HH`JK@laOuGwQIQwp2*pYBPLfO-^XnA>Ar+sC48On_27~kgY z40S3O#N!|fsR)`snef%Jut-H-MQFRaF8<4PGIHCBJn{*K-Q)Ya&1OACg`Lo1=Q6aj z)8+egi;T;U_!+kT_%=9b{_)`fbki`ayy1A|fJvKU-)xRms3NWsiz*nbKm#X$Qr4Zm zx9{lH--Ly;50a0vU3h+&Km3r4NzJo*P}RZDo~7FMmB|EBQc_;o<3mUxK&kr8U0OH{ zEnK5wwBy~lQ$9{V=;_m;olk(H3Hgm~cyf3+$+~EvERGv63|mu=p+4sA+qZT}=%cFE zh^T%JFjWwK)vjJmeEXQoH;_oJN&G$Bvm+@Pzl)l) zvubvBuR*jCvMzuf=fQ(7sy6!V6_7srfI$hN5*Qfh03qY}Jh;!H#}_0xetb?+NznVT z7bEDGMU?>?n%%n-z!#2Ywd#Q>Z`EI&+6|mD0(MBl$;nBoiAX;)+%oF)NOxgAa}p?6 zQTqWuX49d%XpjU(3+gBfiqiN9A!GuhFmwF&+lk}O6G6=TCES08F^%DUn>1|rM8WiV zf)ER&rK5{OUL-qDcKiDI9S#}nL0aqF$BkuSCsBCL1p*}XkB_H~cVwFryr^~#%3hW7 zfQd1r?Ee%6nIy}D)zftj`s&@bsL+g9*t+N4M%O(B$-sPj0 z#V639gsI*}BEUz`-OusfTTEPh7OH>9Sp}jvo@3Ae3Y*0nt4c%>6SQe*aj?B4piWo*johPcYy$HCAV$X^= zhR&yKW|jmCmYN&J$Tcn#yM?@^qL&4ag52bwkV~0W+zY=dW z>&|w81R|;J=*5*TQvSPNeTRElQ88}ZxIw7#n9MQqsT=kX!GLx?_2UN-bxm(9wO3$S z$oODJpihmF4E@Ak{$xOCk&BGtu2<_{ND&MO1_D$G^;j>$w_w$Ij8KN5T-PJKW?>9S zhC)qZs)30nNWU{w0;=tgSAoC7j<+Fz9h3k^krHHzGaT@b91flKBAaK$~oz zMIHVi0i+8QWefk%b;rveF zp1lIbkUuMP!Iy=|{r!|ziJq*0+-pH93OY~dr0L|o{!sMSl= zXq<@4)RjkV&nlr#$p(r#3`G&+GAtCXL(zA9$4CEYBMDb=%>AK4_LpH=*tzosf;40f zFOCSeroKHs(P+B=f?2yM8EI%@K*4YxJ*sM8V4ziS?G+{i+Wq^vhZ|Vpz#W;wjS@&$ zJ&5LShA|^nuC@?{B8{2PmjgoQejh>qEyViA1Q!)`0=EZk@Fmf$;6*WmMCOXp@&6~d zzf5q*Vig<{p;1>rpuve?oKay>M{7$0u@5Lrqr%(Z`|zD4EO|Zm`EklWh_VSf0c13#ddU+L-P(ZD&YC zs0Nw&A@R8Q`IT$Mrsb0s2fuwo^vc0J0H;tva-LG*Q7^MwZ7ih3-_9D(3FL234Sc=4 zDr7l8z7X@+4|+7()z+llR?Lw82o(T0#TPq!q>uw8`Vfq6Q? zQc}lPcYYps0-ejw$5$UG&c~Yd zvVX!P7F|jqLSQX$nRdYUMNK@we6Zl=8oN=~5=BAuK6>=1qN1V!(pWG&ED-N4J9CsM zC@2DBF`0*$bRUL{Mu_6jMIiuRG$Di_0CvT;exjp&Y#Qu*dSkFt$Lm@R)g@iA!GT33 zZcrfDgdP7CO#J`sHRY*=|8BC`xnz9h%2fbrV){30N{j}@sHzu@uAy~AurpP!P>Y6D zzAlJEg@8oRkANai0q>R0)mB$G!?u*?vm ziJtu%E**%Mg@uI!B6FO$n3&FSr!n^IY0!SB@#&iCo}S{)GPGT_0Mp(9INu0!N@QfD zTs~g-1q3R`#s0bh2!RerbcOVB{V&|0dG8-J-ck(bn}r(R_&2=)Nbq_CKleWfWZ@)E ze5WhaRg?4I0q-Nq$J)qe1A%8WSKtL}Cz}Ld%7RtIq)eMY4}M9#tSAx33=wut0vy4pSaBF4ld6(X$&qd3`-$>kV?gLA z0}=vnR`YBQj#^OJSH(kdVxtH@IkJrs6j6_)0*Vnh_J7-9&El(>)2(Yo8 zMeu@kev*(dm{q(*)ulSJ{gTrb66eQ9p({A7;Ml{(#pdeD3Q~U(ffvMQgZ_$2kMvs2 z{aucN$`ZpO_hbl=2ks|cJ9#2`?Cm2m$Ye5$?&3om5B85M&4j}D>neA#C+(_ci1GQd zOdER`Q$Q0|!j2QyRa<)@G&Ho7@iE47i!}%YgoPAouk`5hpQJjS>HPeBvCjwvN$p0f z4w>tO(Q%A)nEBH`muK3jz^$5FAha#!T!-~6Z+f93=6FLx!)F=T5W*(~YFX!p7%4gO z53Q3+Ts)$(=6We`6RZ)57*mSdS~G3_Ht#}Dp(GrB;{`8Pe_x-<7$4x^_B$nUX%NFT z^p9QVtZi(3Y_tAioptqRTK;h>j(!zCv-w*pQnkVjV;O5jtnmxDtl;hDiF2`(NANpm2J+)=L9 zdm0#6;cQZfl+fL~cQ57k?F3Yy`}Nn~WFURnxC|&nrL3v%`9QuBIwdCE8Z#Z95}6#l9E`}H^1Em`l~stxVRW| zGu^&@DfEIl4co5^3X_s^^}SZqs3m^>_HNU4nl*Y-V#|vcFJ@qT2-n?oo%433GATI= zXq|APf_GE7vwi$Dt~ojVI{dg%Rs0=~WD16tCi0e}e8*4uxVG)Ofw=!CK+`Z6vi{LgGlO z=8$6=fN#I`^~C^G?bcK*u<8#^9`W(`5RZ|4>k7dgS^V=xX52nbMC zQK8$h!@$XfB>$FJq!ze>S#0!3^c}_qD&fv3Aj4$L?5+c$CPa`EE=J>!U!nr}h-=Ep z$*Igwbg{?~_9+mW%ranqnk$02st;3LmqFc2&VrYm6Xv`?+FySq+-tId+nFQ|v&P>T z#krrzavF?qHiLyOFg7ufUE$d=CH$QFl`F9z*ss_*lB)h#6F?!@Dpm48T(~4`4Kc*% zsFtGs$B7H)o+1VN+W@y5_J(Ye=v0J=SDqh7#B5Yw7sY8voCv{>v{?wjeH32pR9b`~ zELfH9dh$&e50WEF?oe|H3aY`2+k!qthq)NmGf3`S*d~Am!ny>a-P|b&dsKyJOGA0Y zlMc!7;fyvE!7~d#p%)>Y%LG##*(yNXHFz><47^h9>({Rj4h%FrB}J-|Ce9oZ-7vwU zz{VJ1iP?-0q7cwm42?)t?~OP}&F#<)lPK12O2pKI0XL9¥S=5luX%0Yb~n&YQ%! zkJ!5T(TrlVQeV17_+&ujVdV)>%?N+Camc2(8xZ+V9tH-!$jm$n7hP^<4;=BVY8&1j z7W+VbBI^mPA|MKf!>x-r=GuhC1~=ubzKq&)B1;Ht42HxJ@Z^NO1W}j(l7JZU2#u(% zpW+C`?~w|KT!%<)dUg3Mw1{Cm4WJ65Bh#pI7t});j2QnrNKkz>&_4iZMF~SI^TrJu zh7iTVnt}m(~&S-GX5`R^M|$ z3ffaK;Qkd?+>$2GAOCsP`dY=yA(ub4hm zxd>wpbOU#V%Y^+D8xDPdBP5KZ%8i}u=Mfjzg>`@^Z9p@CBuBAN(Eo!?8SwN4p695I zBc7%qp2h`nw~x;VEK+sI56ACcPIBwyC^C7>JOWdS z%o;YQC!|W;Ol~|M25mfKak@EGlaJaI%}e9t$&=oGR9~`xzZAq97#|2*7;ZZRTu3}v z_TQgJz&nIpL`E6!&34+?ngZN{{z|78wn|0B9WMt}oy%+rzc|>onpW&HJ*+3hBuBV{ zVc|&D$aoI#cRbM@@3B392;VK(=41#Pq5-+fubWWNu8`Y&Exda;pNE7b6fgJln!$U8 zP8x&7=eqY@J9X+NQKoQ4`h?3Ed>8DPl`G_pilW3u4iHAZOw-2@ricu_RCU z_F<+?+st_F#C)fHz;=dXUl{uEC(R&>XBI~@M&Zp1&d8W-_IgN4_opEBUqL(YVeZ32 z5!_B|&qVCE$&{h*`CY$G>U-`sVcW*EcdzN0GiQ$J?-p($*GiZZe)I)b*lVT1C&KAI zX#u~Oe?8gc!r&-&nI6m)pL)8EDV`8ukZ}L}@Cq{<+wLS=?43_Z?-4B4n0QC*THVVw zi!z+A&SIBitFAP#2d45@7H#~e1788XeU;op3ih?$xLMKEZ!P>4$8t>q*xry85CLJw zovlL4EukrxHT_(psVD3@_r1G1JBc=f0awrV*|%?3h{0s*uETY(b+XDVf=EYAfylrb zQiLkSWA25p$GKb_Xr~C0d{1@Vt}$7S3l$YC=2ciJq~^SkiOKrKMbL2+zwXhPHVQ$% zlqzFSAD6xFE+8P_h(~oep$#L8YUW@j$5jdG7mi<^@}@NH)`cGQkgxYeZ*bm3f3EOH zZ*rZVNJAw?*QhblH#UwZs9`B-Y0=YfF(BwJj&wCt+fLKqqZ+=PDibA2gLH?|W6^${VP%daoTsW&} zA2ihFbX7^|`|@bjFSjGqFFt#G$k+d+y##}VW z#95)=9CpiFCyHNLUrxL0))Kc_TO#*~pXlOVTz?++%GGVgzPNM68U>&1jucvRRh_ES zP)5_WbzOBW^Ml%4#k*HfC|55D%i7_-zMeZw!C%V`{u8g^l^-iBpI7K6SO%Wbd6d?! zW>ygO?VU%$d5EXCeTdqw2r}hJRdHL-O49=CUp?K1{siB17w!F~s%%tMhRy7}?v0snMh9q`% z-QqF*^0`jqt=SpMx!`TPcEu!Xmv7!x2u(ul^tK%#g=87btLa#Q`VpCZwMK_oZ-4SsnR?&CGc9FO~S6- zAx2(7VRB)-YJkIS_Hj(!-41uj>)BmFKZ$18U_p3HS`&e!jYo@YIWzOcOROEiE2{+6#xpz3U5I=GFUyv#rw{Ck<`- zYAV?9Y z?;G13Xu%scY>>Zt^@W%heUz{7R%-4#I>!3mnur}+w;pnHbKA_ocO}nu$hShbZlbR) zcu08W>s{YStN`su8=c6^o2aCKa&4uaiolp3Kg=hGTAQMTPM=me-Qk=TaN3MLuPwQZ zqe$JZ!zF8Kq+=v8?dFFK&BnZ0{CqXZxMEDGYya{2uBWmS^Bv2Y2M-=hZ5_%TD`lJP zPe{pSu={>$DsQAPK`ufpPU7iN!_Tk6OG``Bt!Oe{4E*v*YFWzf9&Bz%ROP~2T?%!X z@^76j!ZRH?dNlsVy9b7~kq?V~_NQcZxaO7SBz&4@l`tz>%oMO2K8|Jo@cw;oUrcDS zrHqTSGq-i`m5NYNEoZ9zK0SAVo8R8v7xPs`bnrV)#Fi-sk9L*>wk*7m4H0VQ)~|l^ z?Af#IXH~(1O_xrEy3WOM7QgWH+|=3KtrIOBsN3$g>_YZ%yqv4*1p>0kQ9aLYgtxuD zz2^~I^2x`$IC!0=2IDf5-+enNCntAQK%hlvd11V#V7i^lM`SK`baIl1g@vWkv1noJ z@k1{!p0j7q_B88-@{Ht`K%}YPtdBT$^&HURry|(rOr$GG43pRTH>o4R|v$-!{ zzMUB9=$M(E_20v#`{K(N_3<;({aAl4J#UARg6ZVg?cC z&(_0VnxkZNb)(mB*l?;z%dH?TF;Qq}!FxC7Tm1Im%t}d)f?LMCffAQ5pDLqroHi?* zjn3(d4)ml_spOJDlVrV^{CQ1UUWhX~yzlDi3E;Qt*^S-dLikg2a(e=dJljl9`-m=O zJ50CPRe0{$VmvoFwuv8$giIjsw4)!lX+$R7x6M$orv#$e5 zuP#b*7j0m9fhJm9Qc^N8*j#s9Beyc2$UQv_)re^u@SkNZ4vS0R!?Q{!12f4-n_PXZBd~`OLGb9*RSU! z!JcMZA1j9<(CR$fO>L-@7883Ar;-xVjE_mqlEq>24+;u0^bzLtu3HoK&Vw%Sx8Hu| z(yu;19eea>Lmg^j9d<#l`5|Qj*&(+c(c$FPW@`7~HNv?S&)HL9wYTikijTXsLmc~A z4u2YKFKA6N;|VT_Uzq9i5%YCe7%h=iOG!(w8oseDdT;52C^5x!?2W0l)qB z8|(7o=hxceej^PS*nd#z2AMhvR_KHPH7=G`aH_y&dUR!h_MTDfv1Nwn?VLT)qf_E+Zmm=#PuzjOC4 z4_SCCyWuaY-@bh_r7|K%Chk0OjZdp}$X3JR!;ZP7`H^6q1m&bZuKfA)=em&7?CdhZ zf_Aq2W+>9}`CW2qq;lV0wT}Dd`=6*&Y;0@|MQ&~;U$Se4<5?5cP;D~hy4Y775RK(?3 zNJuh*%xA-vEsaQbv8bO4C>ctb7T+1Yzkk1Z|Ni})Av!ub(S$;YTJnaw>*z{LN@(dc zk??rgk27;~{p8FQwr81zqVBpDj}L#z8TZr{QDYwF$sNjUjZd=r`ep@gh+ivIfEOuN z!qPh_JNqiIipr%+ze*Oww?E~I`y`~?{eJ1`a;#*@h?b<3RDFoB)5MQ=bbmnC@o@zm zov7s8epae&YnE9`$KtTvwQTe7V()zk2-*{x`BzEVw=!n#F)Mw>!tN8n*Bmz@r!<`x z6&>Bbe|g4Z3Hda^Zlv9?KK4m}LqZIvc5(JZ9PWaXlhd>$R3MATMazUYkd=)sVWBN{ zc)I(2P)MKfn-5-;)j{9i#|WjSr2*g^FPNy6ynOkxe1w=+kWGJ6N=kjbBKZ{vYe&?T z32M>8#l^Wvg|laG1N6)U&es&WIJH@n>~rk#*PEy~-4UynWx8SWW+fZ|TrZK;hkumM zz1VB75%7fR{NqQDzLXBy=Cy9l_nqILR9$@q@HM+dcKe#utAi}0VhFHso%!Zb*SH?n zASa_>^fWhjdLu2Ve?~9VMOj6|M#;~{aIZT3Y12d;y}mcMSFsEwrLlvbOHww~QPvnASAuC`4r*@_qNsd^Y|2!|}+dD1#qge^<(^ ziI(0=ZBlaiZiHh+FXEi7TNN@SU+A{%N-bH%4UpZvc6OUgaBDzeintG>A1KIJLEP}7 z0B20mq#j+!MaoK0-xMAZ)08%CwR9n-s z@fbu#qPx30tBP{rTtAz{@&W1bduK5(W&(F#Nh=}levBScBLcvu%<)z4kbm!V+Rk07k3H zLES0&V~^C@6jXdF{d#-#k;%?lG>+fR;V>S05oxhjV$Aq)Qa$#D)YFBh;$OvI|^# zm5QdO$&M4f;iN)<+|)cdEIxrs&%(?sUL`yogba}k2r#g`G_uU?JY$Y^ktA?hM#h^| znyhhVX6AueT4(h!Leg#ww`4r89KZ0_ffOYlnch_YA9wT1kvHStt2EO34 z(MXL3V_l#6KfjJZ^!tT|rT|5#&b8g)h zhH8_nwrJos!;V51(5XO_<ek21E75`RS$x6|JOojP_@uqM{!8_%s0?CIWOxSh}R2 z_1LnHNy6XXpPuvPz5IfL23#ZUnw+3N$W^Z0NHUUQ-m@%|R%OD-&(jGrX?vbKcaCsi z*3+l6N^MaxZOTq$9TOB(BV-Bm>kDJW4{%STH|{HtkU~L=>%mL*@tL1NrfdQ`ngkIH zeqBT&09lL>aht)`d z{Cv}li_1ifIH{@NSY%<`Sy@>p-fxrrSXEVJvPRmsF-2=6f6z;yyw2{%Dju8f+n8cp z>0@&pl}rwcdB)b{;O$b1iqD?hs;#SQn_8YsapMtooH!XC9!?N$x_-@DPe$PuX9Rww z0CJ+m{Cim?FvD`_6BSrVleh2QNh7IBpE=_$0hB{<6Tj`?3&};7`6+*#vyS)g-|xmb z*tT`+P2d@6N5|Y*9HHPzq{?+x14&u*Ad}mE{`tCT!K8AUX`4Wn?Cm|;<*0x9H?_5a z?M_2p;6ilR%x{0eSt3fm2rwEb6D+VNugw$)RT^Ztf}mB~!O!0kckSC}vUUqYFK=VK zh=uZ%E7!qZb#RMbSy@>RJw3Sz>6;xb*&l4bt`A`Kci*{*s5G zIZs7D*`S&dma>h3!5BqGAKHfzw!nIU4K1!c}zcn>A zO?{MSkjeeFL#r=m6vLst%;xZBF>^TUNL2sSV4@o;P@kJfb6x)dbkBfkEQ2#k)nrW65KTY_3hWKyD5}c3?ZvGQz*whD69YS z8W@&v1-g>3)6}s82f|9lGgndEANz2M;n2zB-dk4*%3h?zzr|Vu;&%6$QYh|eP>#%t zIj$Y0D1Cc>xOd}*4POekKzvjWx-2QVW5*7qh!o$0)TO% z3gtTg>I47#Um5An0Ffw3NqGVY5qe@f)M7Xg9U1viH!a6xs5Q&3k!T~jl|gYxlAlbo z60il=3LGcz?}kqW%ucKQYhIUn>{&;6_{l&UAjVL z*Wfm57t&KTHckY+e)i;vMRBJ};?;0e7o+LnHdIiEcVc3!_~k(6c8$P`zR*&4a%ieY zxPnQs9y@jkC}_i`O^MyH*a7DHo{_8)Wp}WF41>C(kdShpF&{l@2$_?|@Gu*jq|4ld z#e@A4C{9m7L*tsl&aPUVE?Qn33WgF+y*C#kFuJrjuLSw@)vJ@#F}z^kf=BH=-i0eA zC>sD6v3DbV)&=of)hDQM5Pg;iaZ8IMZV4}6zHG^avZbi3+~cfq?b@}n8r)Lu`_9v6 zTa9_CO~t#jQsZyw>9GN%Nk~YrccXIQ&N7IC2+dFW#mTc!5J65=GE8)C-8y#c*vFiU z7cT}kT%}OXw0(U|*-lT-%E7^r)eutPG@YBYA-HsR#F~vekMZ-Xiin7?&!Y;zgs|ER z;zOw3B=Y^Ijt(uL@`g<6^XJi!1!+jK=2NRyQ7F5FebyYnul-O`!gl)fpX5GN6276= z4h{}2IaV=ZK8zYB`=I>kRs{4R#;#==`vVp97!J(M&9R<5S*?SsP}zOjRxwF{kzbaP zQEp1sXee;9BN`ki3Se_kWG5RJNY6Ept**U*4SwqdF4MH`MXp>Ob*(XCBdiEe9#E2XX_(1f0oVB2l zaGX$qw-GD!0mKQ21FFc-Dcd?WQZhYJQVkTwLAed_ij9VSrEEw?6)?7won; zpyk*b?!gTOkI)=!Eur(ZwYBW^uPBr^7b|r=IYR^-&YioHYu)z}XpX-O_`vVkvq(=* zPcFtH2*{fmg|0$VRkpTHXEJzuZ+%OlOCFJ3^$G~w+skXrH`HJIiKNc$+aqyy(Ozk^ zRK&%%8IDi4YO|f`?(S|$H+Z^r+cu5TZQHhGn6~B8XFT!qixP2}<9p}Bq+wOTL*Bq^ z{P}FX1&w z9KYicf$N|@TB;2-=h>yS##hIAauRsy`}8Rq7@esj-#qp!TUw?Td+s?b(GLk~%YKnW zlsJ+wC;M~sM4%9$Ewohp%L-iZEiQ;e0TitQVj=19Cw5`cqCGw~_J|XLGAla zO9+>@JfEx^nZ^?B`o$-SA|Y>R7?0oEkYS`^Gt#aBiBiSYwGechxo;*a4wMV<;bV7p zV&d^5Oiahv*(;MOAkrD2^paM9-*3N#L1^m74L6RsEjuB}X#8e@yLucbzN)%9{Jau4 zDiAXyAADAZpP#={)`p*9OP1bB92))RRGrfD^2F4NIE9!}-86&#`Z#0_Mf6CZ z8OZlpN1@bjW^~z7{oxS}ZN4kh^$*Eciv0#)2+Gwh|HEIsOIB6mtTV&~4nP01B|j$BvKT8dQ7)gMvvg zkaX{Om8hu6K60C+r)LF4D8BgsiP5hAI7TB42X5);u;BawFbJBc85kJgrY%gN^YQcx zqH+ZGqjvn4F{z*vtmQg;k;oZ9RSw04(DX{1nzDL=u^%=8z$Q>Gx9{Jdf{qS#3@?+t zHJ#}tJ>A^~Q26PE9j!BLG+ic@{QUg-lJkZIm>1We%5M2r0x-EHGc7e0U9Lz3X=oEC z$tfjB^}W^ML}r5iV&($UpbZXE`LiCA8mE^-8& ztTI|&0T|L%L!)RU+HWD?RW|sKukJBFqnf&R|IdB37ot=I3#tGpp>at>1{8~gq(EQa zLxB}NnHgps?NRN3q&%(*_MjdgO|#f={*j_+PRA&eFz+Aw6jJ<-N(V$vOjw&Vr(OhW zsCQqn>RD*0md+xWBoY}xUSa5tY~FL~y=n}a$EeE}IH817j}6_vd$+x^3ASis!rzZNAFhQ*s zAm_DTR1?(@z^e|*K2Wioi6G=aT-gR@k2g_e+AH*zmloq=W3Qm|Ea`w32$7Zc`R~3} z$15dp>z41PjzLrF1L0MH{6p$5IQ$}13-o)_;!W5NA3l%FP`2@;j|UGx1IkP?^dL5xCY*~oRjmK}oi)Iz&!G>M75@6`PYzB_ct@G) z&b@n4{SWz3sd)`lrhluM5B`MS|Dl>mG%8fLc=V}MrywOBJ9Ow}=?~<&#F7Z=8{%jn z^mgOU3pNZGV%HS$&sDAzN8e^s48Y#a$; z@{5UFg>qT<{^7Ps8i@Y?#j+{mLaaIL&mLWl*qleFKN5b}kp8+#){CDV8qyqc)hI$= zgVu)Kxw8`WK^jZPN3B@-#w>Hr%0oAQHj3ehc#zI7XKkhT*2^q*9uoS@MF}yN;kZq- zIu2MOa*Vf9T+_F`fD{d0NxbU?o8m{@djB6ADa`M z(tg*Ns$@YAmnwo+ zx?6on2$8N^oAK0IXfSB-{eZ#U7K>Hf*EusboPkc<2(L;|3iOP42uGtqb{i;_QA-xT z1lXzUy1y<*vu6Wcs!_X!26FuMv%CtA18qsK2Vp`yIKJC|{y7Z7Z(nK|yP(;=iF>uR z@~Dz=k&zeBA_y7Y;%;@1`RpocX3N52Pr^yc)dwoVd-vO{I}rDr`UYl#Rx4NgVLDV) zBkaR1Pr1vNA33|Y9FY+BW~f2eu!}~c$(zlNb<>hPK-6eglU7#$`paQ;%?|WLY3W?} z*{*Z=8O%^sDx&xs`wOc%lOOoT=JtOIlPdSVmThmzG8;*J8XT;l6MR^FCS*T_Vjctl z3R?tem_)_I7?nLg(chdFLloSFg$33DJzZUZQobyl#QXQ|>3{s~Ap7LXRjV}mk<^=q zoU8B@e3dAx0JfQEk4%czoi15E$5MXj7kZz49S1Uc|yL))4CjM2!}MINGP|-J;72X)hxp8qjuV zv#ygSs*1tivKhvWXXd9zNVa|Z_N{v9ySHx@6&0{H+QJA}`U*AZv87(GDt*oq6HaC9-wMM-(1wF(zYzV;k zQ%9I#&^T36QMb4cX}|eDCNv#ucK(+dMof}gil#6cl0||;8-7bkNg3~JgceXsd%ffD zX%#NT?nNy#pS2ZrY2^T4`=cZKnbBRM` zZ+yY2y&dIVFT_C%Wf5#f({J+bojcnZ8Hc}!%yw-f{R(-?wi_Uc{y zV13E1&4x-4`R*RDMd@y&_A;@;y7_a8am$}_jB(6Qtl0;n!m^i zra9d^^@yZEG{QQ@Kq@r`h$y$Mvkpx%Vnk)vfwtTL&_XbY2{Fj|rKF6gdP8%s=fS0X zp9KWO!$V~=)S}duXV!x9Xuao1Hq68 z53ssiB_2yLZ-zKDvHM|E%(?Oo@wAISkh9pnzYB)5{@SR==&;g`Klbucl9%^|)vwZi z;n5B@Zu=kChuRB>f2g5x0PjM_FczJwbg{qXI9DUIv2S_}f4>C~$Gfmwdjb;q0Nh2& zXluut{EfiS0O@FYq$33xfH-!ZdECDbBU!I!i6Fxk#KL9L(rs0dmwyRq9BxV21vSaS zqIsxhqK4RmrCMxg9k#Te=|7`0LmPkksT!@R42w=N(zi!s&3(N~2TFu61K+7k3JVL% zd`A$?uMiE+oj(t?vpcgT&2B_9ODSAok9ImN#YdQ#%h2{UaYM_-8cSuT(&y` zStxCo4Pr~Wz44k$zF;tZK|zV|d&k45KqwgWBiI)BBuY4>q2e3DhQeNgY{hc$;8_a` z3sV!_Teo73nvxA0;{CwF{GL9IAkphwi*UO7&nbRz6%N4v+yJ9v7#!cRK%K-Y2Zy%f zzq6tL52gLTFX~qcaOD5ELvR8naTwai0_UKS zuo;0G#QA^D3WHuYKr+9Ii>L`8s zGro;a8izxvzniMnh@_~K)tZn*h0q@sc7V1Mz1+?vG=y_Yv{xMC=7x%?X__F6(6CY4 zA>hX;2D*9$vOJ`|_{4Dl3M04{2?K9{HedJGy|^oRzawV}e9&I;v7|(Nbab?|qQZuy zs-i;4hf!F@$rwgt+u<*9cx;uN<+zxT+6W(!czA>o5Ll^zl{4dnd zj9K!KM3qOIqaOBU+J>j-u)?WPwG8zC=HfqxbsoL7p0|_hhT|av0?=@dKfQCNY9#; z^)e}un>I~Y+p(*bkKg}YNNYE~yi;HPi@%`1L2yw+B$1*N4oih$dwxz#o<+_2O`Fme zIGXOi`O^pRIll5q3%zTz{I>(c&`bHV1ZyrgBN9hJHQvvvdej3kMZX##IOa^g;3Kc0ewt5;i=ix?CrCyia5m}=&QguLC>up1}l!T zmh|fl!>Vn9TYJ`4Vue=E4tt8EYYnaVw{5P2mMjV$pWS^n%kVX@wiZo6!bS+3H zkTmQG(b^6n4AB&KX85DdCHwS@W7W9 zfv16EB1#wC=36`JlQjf+%-Yq#IQlz^iZHW~V3eHGvvNA!RIp8mxcq<*QC{jWK@kat zCcHG$iBN<2_Qx5sfoM<$^d~Qo=^L_uxSu#!YtaW$mXmwJ#B%(2dG`yrJ!m5JU;29J zX)-1~vk+-Wi+jRDbm2CKxbWt}&5XX0M2`&OlLJpmF6)2yX=y5}$N=j?q!j2flSrif z04qwE&hTRsGq;O&n3|eugi6EhIH3>u=wn$K@u1Y8IUrQsnmf=nOS-m1nSeOc2X9xs zl8VL=2@*FYyP<%a>&65Qz!yz7HCQW;eS=wttSi`=AzJ<+y@tVODR-D4i%YklK@43y zSOs6I%k4g-b*H~@{+G<_P4c!{GkqP)lznUZj6kVu7kr-mKe_3WDFWobX(LV(!5cMxFZY0k_lzF0nnYMiBllDJ# z5sXP$BeX{_(RI(FYG6 zH^uG0i?-*{BS$`55EG+^#}Y+XdugtzC;+vm%4TeeGg zRsN0L?G?Wqf7(*W^l0KN*I$E=(n#AD(gfweoBOoYNu(B!Z?9=unj2p^-cUPA_QVn71B&=zyHK>22DQAEZF$W;I{g+?9 zZCz<-*!A(rTyKQW1ctGCk!l*C1=PdyJoNeXJ5?IN@%Uj9Rnt*ozw1mmp^`eK+i6Sa zk{FOGglRmH`9ZEE^&EM;*TJhqUX4&4c$3K32(cSM*=u}9Cz`lR%ef{>6S`oul>bX4 zm43`-RzqBfL-a`aip=#M#tecCvD)Jq8?wx_K()&p27%Hh->jmGMGF?)GW$3< zOoU&IVh@lz_B!1jiz4~@&N{`Cw2luOzTOPEn8%JR+ zoOMK|U>|p|?1+{L?ss0qJ?{;y&_x+i8nUObX|MUoJ+gC}Lf|W#N-}1x7AMe^A`XyB z$>YTEUM0Gm2xEbuiYLedF@LyV?R+C4J|y5V*@r^r=A?juj%#}+XMf*79mnVtf7M|L zNUDlhBRtNbLnL8Jl$iClr|J3NKs{isl6(5Pz8HNeMKsHS{_@hNKWqEsWHg~QK@|z@ zO<{7uh-N9yVqpbD>u=3UVRY#F#cL0j7!Irg z+DiaQJ8(KJIV#sP56i_pbe!z}w7XN@7DMGUH}saFQb^*ld{Igyg<&D2xj8vGWE5eZ6o9rPu{dg0Vnl*h6{g%WeiB4YF!E#9ryygsI~p-+*AnYm@{*GH7q zVb=hCtXv24uwFnR@_+~pKmn`+B|y*k3u*eM8RHvv8&tXPmT8t~*oZ7ISYcGd#y1k^ zM;>*Y)N~Mh2j#^r7)u9SC235sRajNJSbi@W^M;Ly{%|}-;h+Y?uY&x5;;LRsWNc~^ zfD#(>d;?Y#K5qmy1XFrA)Nz2L$Z$N;SE%#YLjrNzoX0DCBSF++v2RFwwcBKc!jaqc z;q@*3^hpj{9Kb+4fT|%p_N0>n+(i!q;BqbEFT9LBuiPOSKvfeOmLtwU1N_yC9m|WM zn9=Y3;`U*OiayR9d16wR`jHdr;jx_zU0x6;lO%|=s}Y>AxoZH-D`2;XzZm~;iKyY2 z)-R;@iV}cNun{4wQOaJlU`gJKLDwQsU-DjJl_3y;0KfYBdiu#(BuE9Qdfa-I3}mJp zc6PUP!v@EJWaa07 z`C?Tie3XglJ*<-Tw{G3a{<7NTWhLxUH(_2iAOi|0`Z?Xt)ymNrH1y1uekEi$K((Q+7&l3?09jTA^G?N5RU^}H@&cmWHTHUVoZM z?~o}tD@gpu``y_BIWw}~Y@%dvyUbZ9U48x5q+vDBIFgba8PHad9<8s(NT#mS{yQ?=hF3z$y=jt tnY|iL!{Z+74uVJjQ~#R|qMXkz@44*Xv*~tf8$bs|;-buj5@i}1_41rkS>uwl1hg(f+!%}jdX~F0)ik)NyjTG4F(dTNF$AO zDxG&O+~4`m+2`Ev-upb)XK#IVvFaCdjycAdD^mTA0ueqfJ_>~*Qc^@~qEHx0DAc)+ zxY+P7bF_BI0Wnv3eOE09D_0M5=liJJ=B|#999$n+-(zyW@9bjjU@yok#w&1@>7lEu zql-8npWT1_0I!4d13u27Y)!~N|~ z=VQC-Z*1Nqqf{r6t#xqs4oyDv-i*nDc+S8^`?aJZ+dK3sOU-&b0KH6oKUC<1;} zv}f}M!k>A(_Y_&+SMOuyNcgKFP8b{VSNe+y$WOVN{XZO3!VOZ-zbq+v6uBc9aN-Mp z%W-RKYb(qN;~3r@C^oe9o6r`rx3~9PtsKnem3#YA#Nj3hW|3)=$&#k4H~gcSy}gNf zMsE}O`=n^6{~4X!PFZ^4?jA+*uu+88i)$H!)$5KH0Ny zT^=BckB@)&v&JcAu-5O;UEjb!*Lp1AWFO~}h&}pnXZ3rjsm!C1iu}~VtPU7TCD+AX z`DHh_C-bu}9ThTM92_{gxw(nsYCm#sf3L9q!PWoJ+IoM;JMUn@$KKY!xG-F~;^Ft{ z@&G;=xF2Vt^2*OzEpqLnqoZ$`@&s26s|Zz+uI6uwOSmqm|8lGISbyrXyLJxsHBGE7 zj*XePS|h#x-2*jGAD@|p?^(EngdzF)SH?YOh)~T*Jcc9`6qpJM3NK!~XzuJJ5Pa|z z?e1PRAnZz1U0YlGCgAjBsLq4W*U!)V-n}5*VuPu}^`^n4x3)FKuKJT10VlFe{>Lyu z#MIQ(sxO6!_V@PM5e z|68M<&(YpYtbt#jjSv-|389*r8a&2pjjUT6o0~1krvCYhF6-k`_1go6j!sUGHS^Ih zX5TAq_2@($UmGm>YJ1IBx#~Y0{^!1aqp#S{u`zRd`=^0{fdX<|2Wim-1LsjRG&C=y z{lt+&j~)>|cKtgfruH9v9Z@{}3Bg+Y`CCxLmjH z#q5#%L->?lLN|9kUBZLkvYkX)*$RG;k&|Du>ErBraqWDD z|3Us7W9*9;7q^xNFKFeeK2AxYDKTk?x~!5k*_W?%KWxQSEZ{^k_x8)M6u}3rD~n!#xo#a$PkovNQu($mK@H{U=;ms_tSq_~*J|78C`{mzJOjn6Kx zRo9DiDE;@YSLS*$Vq>KbaL4LA6wJ+;kteLExZYiLE0!U~W&Z2bGc{@1&Lp1rq@*zT zE3=SL<;apBtOY7mZrvgjsw-WB`uJeGE#{Jl+5P)Di+U+8n{3;&v$L-i zVi^<_6`NOv-%ZWT1gj+TPR+#1+|t&jAf^*l|0U*dP!n(}jWQdr_i}V}yrHX01yk|# zm6Xr)WJ~bE@H-9^lZXhl-@#T3tPBz|vLM(+_XbOh#RJyu$LsN66%$?(CKMJHR#a8} zu+(4pz1%{ zr1{GiJeb+y_>7MqBFV2k8sdMEn##h&go0Q8_(M&m*z5FQd%3;4J2W;{Eyc~x*O!Ef zDy+U)zz)>+`N~NUvBoIy5wNI-XSl{%QZu zywH00OA$5St-+uD{W{wD+SA2`)z|GHY-Gm7#O&>iI@pd7GfH2@Ai2OOU8KW-+Jvat zKVk(D0}X@Jz3kAqe+!doZTOu+;l!vz6AuPSz}{3ier&$$MAPw(VXa>n(Is}SMCEI| zymtzvL`C(l3kgyF=FA#>C)L#JN`b144GVNp`S_)* zQ1IYZ-ko<c$|oW!-&el{(0k zh|gnKRn&J%j~^6x?mYIx%Kg2)hs8d#?W7R1o~K-w_TAg~Y~S<`F}-*QJaF5`kMf1b zm4ha7ckVp3a`uIBcRV@rs{iv8ZZFdkkqEvQ9GrU;XRKwFpwyjz@gl+ypSC%#xCluhlu>e~_%6 zuB<#t4+uZlT`%1pfuwiiORaZBLxJ91uHH9tI7Ry980T?_(be{28-N66VEaM1?lThd z)0fxVF8lYvZ7O!wmzJU+w9!fX){N}P_J=b^E?rkp_$**| z_wL<=f#ORiCx;GKP3kXBO-+gV9bB)n8^wNY**@j;$j+{_;!0wNaU zlUH+YHa1)!U#f)w%xhGmKDT%E>Q$yF7RYd0<7zrrO&jUR*;Q}A@*}0Cje=R5hP1T+ z`);5oyM96f2Pa&>4pvh$?Ccn)nNQ{bqD0-6DDGrP$9#&pw0z`aW?_-FJu);@^K+-* z=%_FglHbBalMLMJb$;!Zhr*!xCL{}HTnt(v8}0nn8599%x!G&B_t$}sfJu}!^nuveP!q|Bs~@} zF&Y3*{j~uc92|gJ4CZ}ezdrb$BRuzeK~7H!LOB1mYgYvGPp5V}wP?o~($OY~X4`*y zWU?OgywY#*uGIChvdSG;?l>9Gf#?e3_Cmyg!Rl*TzI9Je&tO-807WJz#@@uf?RdQy zq-+vON}XH>NIkGVxuh8Lw{*|p;o;pc8PZKG2{@mSkl=a}dr2u)FQI+%`H6+Tx7(Y( zfRjIrS317uL)d7CcmsK$jL33N%!d<7N zMvtjhtl}Md3HJr#a32nH3a<%wJ5+;GzXp=sGMY5lRQiU77Ds!V$Us84Ds!5-l_nElvebM1(-WFtfRh*i zGFzuVM#J8EEakIf25S(-bxLV!>_LXDd~>MY>ra`>{GF|(egXueJbLs9xx~syB>`mq zeqVw|@Ms%5J7G;tGPEN0PYkQ=%|Av_I=Z<5u;Gb&_39~90OlVeNFVkWP{QVLJlL{c z8LQJPz3}|tVaM|;xI{$qyK7^cCx`28pFiW+RMI**ISn-h$RIEZV%M@!wZ-SB8(Ujv zfKOW+)7IAZ(L^Kt=UB$Ux~GuFEWT&TBk$(DweT2lz*VCflC`xpPA;zAEg#svn;)r7 zC;RnGKXPRF22t@Cn8D3bC+akll9Cq28ziJpk6ea^hg-V4i4>mFwY+y*=8~e&8Hi)S&H-aF4b;jVv=Ot1xO) zUs8x(k4sBS`+l9{+z&tg_D1f7z7eQKP$qpf`~u^x zo>x*%(;x7VolVYh2W@Z9TVG$l(RM-R;aEnFUa1L8&PR_4zwjqd2t`FjCx;(oe1yzv zZfS{`s2mEDGXoV~D`ZUo;t^KesZ(%fpBQr$^EE-iwsMQl8*qYocdAg0(+oC(uVL#z z_j!wcY3@Yu(?*Zeh=2BI#=RF;YSx6p>z~@md*&Jfmr*OFUV|^xh`r}*pJap z_#g5hh+fF+XI~!$63QU`UvrsL?Oq?7>wZak@nT+Ox2gX=7Q|Rs*MjPR3QLUZSgu}G zT^}h>fm+kV>sO+{DijAbUVqqqeSORBcT)Ktu2#2qbOgo35WVt0us+)Qp|&too;4s| zY*2aA(^FLXXhV*WLGmJgC5;+U*2DN7hYZULp zeK)@=Ff#MBIq`hZj6NT(&h-PSi|3)j^<*Zogg!dKLSv55^t#9yTNh;PpYmcHed{@xc zwMHiMjd=QZ-S+2AlWnu!2e;XwwH>xshRI0;RE3ys)q!uuJCs0+R1&m7Q%li(P(xK1}+pP(Mj2DRXmk z4HZhA()UMyVxyu8He}nWsHk8=7k!NS-_(f6GlUfBeN0|ms93B!18WO_2>*~$A{1mq zF`Q8lszaVV`(fd*(ix{M@3HnXzv{t02`Q<@YE*1&z{;xr#e{-9SC;#}zLHmYcrKTO zL3O|dz~($JFK^<(O-wmW%0%Mwy9NdVY@{$=FFI5c<9(-h3Umr5*Vc#u2g%6D&`G!# z{cJ34*rCOg<$MQ3!mUEnhYvZCqBch}66!_^2*+v_rtqLPo@_))2wni>%>Upk9X#c0 zhAlN&c~q}UF!Ayd&wP%>Q5X2&cDQ5rXsArqw!v+A;0=_Jknnilb$@xrkp*Z+ z{~t>{6W>kwqs`BZqK=cOr;IWti^pVeWhL5#FkI)EZb9Pl=iLovEB@oiA zKZF0Y^dy{?R)YUPyU{Q@8h*C?MLi|!eop(A^9$O}Hv$QAygo-fq%fX&-q+aruPJ1% zn@{u2SByNzr%`?Ki(e=~?!FP(E7d2n{MU4w`xJ@EX9AMLgZfVoo>y_3-Mbnb8J{`Z z5I%4%;ZG0?De3pDiwR3F?rDTX#?#e?eoU=8ViEcs-KQvL9<%a8kynzeh=TuGLQ9{% z2D_tmDBq02;CK4o=lFMX>`9c-XzgTvcm%!*msWz+wAZ@P?R=W7Z(rlWh1z*IZ`qq) zb3QqCu$TRkpr!xhXAsLQRbIT7en@?g!ogA9opILXISLBdNm`YR6ykTQT-!R zD%@x(+M=iqOx|MYSj@8en?mrBHS^7M`8qLRLDY_C(6|1JrVI(mOSvmzuE-L6`57L+5X^Fom4A$U zxuuEzzDr$HMK~AZQgXargoJgiqGw$6hb>78ihRv}?y84+PaBS)qohWTdJFo26ytSG}wBJ`sTTh2W5lq~Nw<17>WLB;@o_rp>3%3EHHLDmVLcUMt3Y*fP598FeZm znHW}3N3$P07FMhIhUUHPfqEcU1K0ignu2n#(CDyR89fdrN-PO4%5P%p84A{nq%Qi( zH053N=>6@Aofm>rTa%m;TXe)?bi3n2f3|A+XqozziV|rlagKuAxfqE00+1!e@8rj| zjeR9tbAKPUeHPjaCCg4 z_!^^XmOCp$iSFCjFqLLG|5hNOEh|z?GWEr>uMDi_YDPS@3#N(Qaas`-0m;$TZsDAl z1TAVLC7(ag5*I1RzI|U?aQ6=jO9B~;qSOPMq8kK01Qlh~-}e1={o@DF&a3>1(OW-n z{_NpxK3>5{Q&SwQC6aK-v0C{0aqf+|lr6cakbrpv3rp~aICNig3OD7$UuKcI#P2!y zt!620Ftm^4+PpV+RZdxpl}=g?!P6O{RBpCzB(B0=G(Bo6ey4WCDg>F1k$_}ogO zA@qIz&r9a~D9|k!)F|X7+pk=8*X*-r_woG*Ab{&4SAyuOBQmi>$C?;}d zINUI2tqo)}T@Sf&p>9N+j-|jtlp{Y%lq0fzBXM_Qs?$TjQRoV}``o7?vHVML9nEBA zbbp8oXgyplioC(mF;|Qpsgj}$Z~K^-Uo$aq!_XrW+*sqAFIz0;ttUFCasEToHX0qP zPV=SQJDcySHhx;|HY#Jamdi~E&2|ovj_7}`bZrEbR-8u_N%)_y<7HDeA|>q?hC`Xo zn3Yb=dko#@P=ZyjK5zTnCDTKrD-n>Ehw&rWAhT0TxSf~tQ%FV+%iGtAO9>HqNiadA zIyab;&3|nWKfL$mLuK9F7Y#i4N1FT^ZOw6L^w!1OSGD)NL-=L|&NV5FxAX*jRx&0d z?Z-gwr@Sz0AAGl^7=z6x7Ze_94w+w>Yd%pwH8kp3)RNMch9l|Fk*>QDMElWGvb z)UxzyapS0j5ZVx+_I{fYwq`xMTmo?kLynA8gR;Kw&6Qj3_ShPjxsM{zt-*BpDoC7x zSJ0Mcyzr1HBUe|k^U8-0G4pYz+E-yTwPV;WC>81!2i4U{Xme459OMYJJpSSQ{JaGe zAW*@E0-y!pzInVmVLQGKG%tb+B~Oq3q)Pvp@8SE?7#u7YL(fXuucjPIS9t3nGd`d6 zD@V4)=2vdbWHh>8k@?rx>PN#tSG01?`U`XxMk={a{$!joiHcrK<}v3r2fP=S=( zx=3{oJd4oHn>RbQCP12c3fO39NFNkH6zb>CpA`TAl1J*j1XXX}?hIiIz1dQzfY#>m z_7-l*eqdnstL0+X2o)Bef{LfOzM?S|sTA{y5)|EI|SX012hz|~z zTi^hgDu)72R6-)_erFOQB{{pebdL2vVaJb9@+m3Sm7hO+K*)Ni*^v_Po`nT>k%){8 zBS=pM1f&}IUm6Tw8xly&QYA^^iU^jf!=hclQ=&?M5?bBJh|tH!r`R2SW4nBL(TD@M zS>Odee~hLs2O$xmrvQ{A0)X3&4A5%4CiQu>nmPpl;D*Y~&jTeIUTHhXa^(u%J7(Z74wnhNwdsn{10OMGOJa~adgets28cVi8_3v9P8Mcn*+PdqcZh|7)tci<;02!P;b;}ly?Y%^ zE+v}zAvrlY12@rIdBojC*Qx_-QOys0N~X8E50PM@bNQ~KfUs~_Mh0VRYiq?wTWc!` z)YdNPP)tulRR+~^Z-sH4JCQ&N;q8~g{Y!^Xm9oHm0g2@OhzIn`8s}g5boBIvj%;Z3 z$Sn`2kI}2?>e;+rBx?;_zQ|i}#ktnl0ovY!;(r6S)=Wntx992cE-5v2_~y*#^PDu( z-ay<49V{&k){kK0{zHiwd2FR&%$ z_denyARQEi&rq9{gQ`*U{yhc?syKMCw~G$IdqYKNe6B70LipRa9H6XYT3A>B)vWo>6lU+fO;PibhCjbx(oE@)_L1q3 z5DW*46dp^Hzs6-)?`8k|bJN>fe4c|;qt@M|CfFk&5``-A>u2n#8jzY#h_DHl&X`CD zsEdinlU`n*2^p+=e-4#5b8HOe@Ql%*5 zyd>A{vbG8)HB|FY8Q!VsRo6&vmdq$5bM$B8Nu_3a6tZ)2YR;rXXnoa&Gey>~LkOiw zr6N0dnXjTx&Cm;`HGksVV=BDLRF8Yi1<_~sOG(Tp)s5W>F25OeUN2#D_4_gsGsTgs zDm9M|;}e)ZJde_^;-lp#d8-H+P6~MxMHV@U_mh-Ox$4o)J}+6~p18Pd^lG;+;*_U=}D zO?o5ePy_T$kH<73k&S>tl1e&B-=}jkKG$(jz8j-nMor0=o#fGgztGwWsuye`dDVuQ z>Rw+Y_m_@N3u1xEwE2d-+7fQ4QCMP|#OugVvcWYg^PMWr2o{IyN^F!!@)OUF48=E% z-3T>epWJPi*xPrI#!Xh_LPpF@mBZsw7rs3fN`G%cCE|(Nvnr-c?6WB#rai2X2x_2L z*S~si`jbmdhA|VoI1w@Ol=N)47)iuCkL%KD?BD*n*O8=1@dsYK^6bQ?|ESZLZ*!XF zJ+Tz^US(($r(i`+#6LT6preH>qp^<)wE=-WM1e!ah*YDk1$n>EM8EB#;?rNsQarw5 z$P&N7qyQ_0784og^BbFi0~^*hp%rO2ReyRC@5_ILnbTfJk~a-1=jKy~TYLXC6aMQM zF7+iX-)j|8nCJEuXL*hooDAWbOG0PY?CpGq$1e2)1I2G^NRjzW01|uN3F4a(;D6>* zeWRMny`AT(7m2-<$s0fe{VK?CVc+5<-}qHV?D&r+v+(6hjA15|!H$tO$jrr``{$bA zClOXZb2jQS2`;?lrxS7S*)qLBSc+)B;r7rfruNwIJW6`hh4QV#cybL(=0PZOC@^Mi zI*NKk?2V%FZ>FC(>;Vt)$0CDIRCutL(c^+D~xyEMHag z|NG`Hpgvm<=VICC^aX}+P-Wsr;;pTjYF#!SL!q<+Vm=i2J+dZX z5pYBzCAb|Huf!$FSL8c3KCz zE+&%;@~_9L!24O2{U5k2quOOc8Uak43k)A1X_J5h0iymJAd?OJfM<(;@glDh6myOd zKGOI%0GSaK8Hd-LB)GY^?OG2rMZ+fhe8Xf1L?uKu*DW9g zBI5jqPoG*peY!O-JXmTP3vh{vho^XCr%>hAE$pJAB2W>CRa82ai{;STFB`>SuzcnpW*3CXG=k) zK;j{T^TR(T#Go7Es!q}@c{P^oH*X%0i>fq1D+yvU15eLFUp_fpJp5N@vIST+QVW%g z-@CSeHt?;)SOW1^HJu!A8&o}t1Sd^F$01fAXw=M7QpWE-e0F(~5wRrAt;Gt#5yv8w z5T}G~W;AOshc~j_Opz*KWo3maxwF%#LFvvNUYEqT&$F^v)6&z;?{7Cbx$*JwMPx>V zgkYvGw_6Iht_*3oT+Zh@#6sn0;;E>p0Cm_-;wfQhK1E=MLUr!Q&wSXXfXpF4m=MYa zd2w%lpBd=+LMhOL0e^`PE&@IVMJ>3f=<4lJ0A@_jzI>~&)=FQ7yR^3}ii(TtdNlzL zVE<5onsCP!zY@2;`U-~9qF1bGWZvUv|G%ILsAQz#suKrf0NIe}-Jdgz*0b=22+wvy<6%T1}k7eiP-kWL-1>;Icb+w55`Z$rEo*wGP zy?dDh4bffQ-K5|#QF_LXAP4|07X>VFLCh~&dTz!ZU{DbZ0P2P#5bPiw98_*21ZhRJtQdI)aI?_?=>$JU^hz&zszb*uVn)ANvOe{lkozECgq z@rXE-g&M2ZA`T&V#a|B^5uIFRP&61!R3;%?$#AMjmZ@P7CR(B5owbXP&P755dcD7kN4R4GvV7V@*OFm zBWTO+tO>suuBJqpO$G2V0}U|}6tRRP=}c^UBN2}+yO}^B^T@-nX zl8t?CX~$~8w$%zF)nT&8dl2*@ATURu2oeD;{0{9<49gl^n<|vM29b+R49OiBs0!z@ z4{!M@u~5EV{Mb|9zd-(G<3SGfUYs4JV3iOF=ETsdLbFWCK828^6oy=|!dLROkZ-C1 zGtXNRcn~>%jdJ8z(AhCgN@rieR_Ykl7)NPl`e+Z6c!)7dNE*3n-_3}eh-h`G9KR<2AyI{GY)Em0%GRX_&u26?;Ulxw^kR%YXj$=SS-c?JlM&`1|5oj6koQx?b+aIpUks=$!`g`>0p zN1>Q&Lr^oizqO+ecjD>q{|ZO9&}UOL@;^keg7-FfO4ioa{pB?9DmY>8t}SN&21|7^ zJroT<8>o6ToarS6bw1)}93G~b@5v|~n2v~O-#`#6pLbj5cd{#Yu{?K^T{vfh9lxL! z=@Ss1p!wrREEHl(gSrEZZa7ds?o9??C}_9=ut<{q(_cWF^Pd21{P-5J637gxaTOrh zBZ(W5AXr+k!BAqniUqjH-1(hg+bNMX4octS2-{uZGOW>fe>iJkU?3uvuQqE5GV=Gg zYK*8r&L~daITxRZh=qWEj~)pCm^yw)un~dj4zaJi>yeHIkpmM4=RAsql@*)>8h`J0 z$ka}rK4uqZto{T~CPV&?Y&`51ov3l<#c009=DPL9Xpq%dApw934W%45HSRS}-$0;W zX+sH~Q}DP>&d-N~YNIm*nhvAOT-W2mLhk3vms3R?o`N1^^5+DYk?+NZBH(do0|5*% z73OQbt*E+o=9GS2399viu>|avS2uav^jO9J9g@3c+?=2UR|G$tDX|J=;)qwCi5ZQF7ZJc2-|Dp4bdP4wY0UJ|MKMvC)dHmX<|rtxCK~P zals(0oYUHyqe2R@J9s2ohKKL|CgxP%RG;uH8HCYKEMO%I?NZyOw> zF4QX>sPOXgGM{XIjEjeNz3~O0fqaPmWVHRadEM%qN3=xJB06qN(GC}FKgUDukP9TGL6(-2aIK7wzYpW;8U{- z?#a-|NIcYJY12t7VqD5mNp1zt15Aj2PPLZ0dLVctSvWZr3#GuG!f_|8#dSmw;0TL&Syu-7BF6qWxJ(EUpE_^a{Xb6pqi~IbQ!(0Eqsg-Q$%K$Q`_cmw1 z$;pB^QV})`SU1FnR(qHt=E4rZMrfk-ymE<|-B8&%(0bcobs?@}(B&YrC)K6@iycbp z9c3aR)%b-M6xd_B`EPZXwgPNl$n`?z_fAiaQ#W80bJb^RF@XAR5}m1^Tm0OhIE)E* zsK$v^+-)hu|MMetSNS{Wv0G z%i@hd>ykzzaK?^XBT~L@oVOrNxb{l0Ngb}dyC`B$2VSyp+rbj+Id%YscwkFO9Dkag zPCx7Hl~Q;M7V!NN-b80t*N}Jb1OaYew=*#@A(1BqCzXL;GLK;x;%PYCT@MGVY-lBr zc5lER8WDNrl&|1F*d(;hkN=^TybE&#E>-Mv=g!r5tTWt{lLKEA0Sb|!!!9tIEUsR> zbg8YU=K@#+3|8Sc*gJi-cfbqmH2;;}^8WFq)VSSyIBM5#4N5Aa?>2%n z77XELU`x@jbLZhTevb(ltt(ZCpxR;L6F6p3C@2axe*eCi%xjF8bU^4v^lh;I;|!D- z6TG&Jumg~~_bb`-8(8VP8&G!j>zKA_-(P4gb)DDeS z<1H<_D0lLQnwKp*D_w>4=)j$tld}FYK|h_JbklaD8W{@6C3Dc@5e5xYHZVDv8oc9$ zLWH_Bh-n^ZU@9=4eE5I{LeTMd6jknp~_GNj+Ym^QUpEMYUjH#Du=$TrutS>*gjwaQInjhQU(f>#pt@FBPzIvQFA86T)jg4m5UvJc(8%2Og zxE1^&2<8TfU$?>ed@E8NHzcjce(I1rZT_J6RAziNDrtubaMQMWOc)!q3_#X?jTEA* zz@;PZGYBMGr9&tLKIoL*tBe@*7Y((zcEz*fD`7zWC?ypaYkp^@ckZqA8ar$2d?B?w zZNw~c^~C`&TsEL?m=Z6e6u2qxY5lWlzJLY5lP$+Xkrr$gFbmNTvY4c$8DL9*Ax;qX z)7wEF6v*og_bsfRINUmDRpd4E&?aj<{gc~wS>t@}ZuKa*g@AS3SSl>d%*(??nORzf z;E}UWLar>_!GL?3Z`xVK#J+Nf@si+Ww%nzxu(iPosP2&JOUo12*_T!l6BDcPKNd%8 zKw`mx92Ez%4a-Z%BgTzv9$vDaPJ`l_mkXPFTGM$SmCNpTmcl+!DI@Rr`qn&KqJWT) zJh*YdqJLgXOA9g31I4a0MhDdZgdbd*J}QRWT}~pbjO%t$H#Eu(mj20mobL=siKRfv zN*Sw>{uhp*h1f~d?T^39Y+=}m2oZ$Ue;3Xww{-j>*P-_xAdm>}?2z?;1%V^{Yo8T* z3%9CA5ohew;rH*~)r<(?_Pp08^M_@ z!4~&zLL3zKTlAUR9&>-$+gWhSSIP7~t6~Tfs^3vk)qlfAl|3@Oe#b~HzJ{g10l}E< zW<<*iK`s8b-%vdUa+>Lil-g249z#Bip0eA*w{t;E&#hrRfHz=ZU_e^EzCqr%xe^Dj zED0pQ_vmo|!t{F21UCPrCGscyp+!lFH3UQ-?5swE!h^(Zu=F9_3J|iKjt|_RVkZT# z2=<=~Cm=(l<|PA?kB%&-LvV1Y=Hq|Wn!p7tMJi7?)%Cq}XpDpvXb$nuU=;~*fJe&Q zR}H#eid+CSqZJIm1(ieZLp*-6vlJNa7%R7q%A{kVif9+3{q5+1(pzFdLLIRZ*prYP z0H)oFk<E@P=KmPQ}OwU@f|$idaE)s5Z+1VdLJe*XwnO9@#NAhLp_6H;?}{(jZxOpD3em{GsU=rQ4W=7y(tnk}8&MWD zsm~)Fh>wHwWnXC~%VR6APYI)tJPy7^TeO10c@((au37i8J-K)d0~Po3W#N96q2I?T zVV2DIwu7N))uUf@6awdvU}TrJ=PSackp1(oQ+w!)_#+w@^_GfD{WYNeIis&d!dAmLO*W zc%Fj3Di4p{)I2a%DLTaPJb!VzZlR*f5GCuXd3^jysGqQb!@dc6htxSKkmf0nY~05E zUc!3f^?Z{~e&TaNOl+j|=}GZCsW>wUK5?DKWxzE6?wvsCgC?N@_9bQr6~aMGlh2Yz zru$-8MKj`_u0NeX^k+b^gdyWjz!(lFDos600%nze$gqI_Ol^X2vd1B&G9A^fzCfRl zboFi!ji42iq$C}f2HV@)zZoEovsBm$-?Q#pfG6de#j+Pg;r>T^Z^;ISb4X_COmf_pH{`Ooj@jqiU~YZdx`-GJ3WA zus!p^M$xk3Z}Or+3cqtlAuBdn3iR}B-smO%an*mKJTJquZUEt=v3&9=<1{<-SsjsZ z(=e6*O;gTToc3X2HUTwn^XTX$(7(V3npai|py(I(Wf%&17>akY(w_DoHPYTYG0L3! z5tL%rC>c}rYb2)Dj6t_vx5&sywL>iQe7yEM+_|4rAlc^*y0D{%N4RoM5J+xF!<_jk zKwrqRIAB!;Z`(sKx;H|1OR@BuH*X$}y%&P=2ONnppi{|g0;ikj4>qEY;eIOi7Jr6c z6-G2=usX?W>*;W@jnz1v2T^k1NDO3Aki|IP0RUdA`~#`X42;(=1Rp>oWcdcw6Vz>b zd!Emrxo#4C#`@Lv^qE;%<<>vgi+zNTw+BrTuOZSs2BNnGbcG_k)d~2u5!RQqpApCg zWR#roqprD(VmZ#?EwU`8W&gjcPO@BHBG?9K-kp`gw7+I`P0g4lN1i)SWLcf)+VwuwFa=sx69=yP)3mwdD&w85z?* zsko*NHWW%gtv+pJL~CUi-Fh%6@jE3u`^_$WH8nH?sY&l+0(BVi%hJ+~ znx7RwgGx4HH(Q)^=htm_oE?!0c~!>)i7y;>DubkFAeaz-MS47fw*>$2+|q5pTq%W; zIS@RiR#phPxw&t@1lWq!K;OKHfog7PdDCjAd%p?wTARxpFjplBY5dfmhxeZD9#&wY zt{$yl1`6sKq(E*M!P5HQ_&e(prynGs@#g=aKOaoA;guTKVW+5uK`D%Z!oa}jc;teV zTF9*+U^g`kchj!|v&UMB>r!PpTRnz*gL00|3z{h*afy2UW*e!pBLH!@cz42Qy+PT* z_Wywn)enS;^WA8{@tlbfxlUfqq4w$t7&|S#BwY4{mIEl> z1E3y14-@Tq=G`QU1_GkDW%b)>p@pd9FJ1s`!PKJ?D#lbws_TZRn!38=rfag_4owh8 z3PK)$w1Vs4;NS}vL;4g@lh7R223C7gXx~{3*k00s7CAgHwQ@iY&o$tdpgT&RJOPDN zw0jF#u?Um~xIY8Uo-MCsgSi?eIwldox|ETKV^Jo#LQ9)M%e->T@bJ-_=kM7HtRSEM z8Mf0o(>6~js0ufy2Lq-d$sAO=H5bJD1ODq*1^ZnC0z66k^=>)SNC$kx!A}Vs(@+|s zPXutp_C~o|DHYmgaucc)Cv(j``f24`0HDbo+gH$wEUdl120ll z*)>669uscl+|)#)G4TlqgaUXkI5f)x(cbdmfn?dPCKIA6X<&d*lCD2b}MA0_Ij(~*h>)j4vtxRUGM>V*cc`< zIQl^I3DWx(5gjcjEBkoB2LN*6-^UZqQ6pGsN(&kC8P>1)BXHuNC;_m|AbU448%8|~ z_BY}|Kz3|v>rDhr154lQZG;Qk^NkkGm(6`MMqCUMZ{Bm^SH}^ zj{5I!gV1gVMB(m40Hf_dQ9e>TL}Uv~wUgUdQkt zP{)JU1sA2Lq5=(tceZt5V{7BQxY$`fd!G^2oU64d%J7T@^28FZxanSmYs zg`}4N(u;!hsz8-pa(1(&ps#Af)BPwtRCD+n>bOKxHaPRgR-;-kG&{v7C*we`7XUA$ zksfRi;ZUuBdzrx2b~&}6^3D`!6A`qpR*qOCtEV}^Ac|b*s_%EueihY(b^=E;Gg%(} ztRY1@%joE6cWzfAaF{k%=2WSrefZG z_jCo!-V;uT=^o=PVDksg2nGBi;znv%pPA9J`-rS@`6Mn@#4!S8WDMh-pUr=yi8ov- zk$7*F`0Cl#r%MiI(B}kSP|z5T!Nq_ojR}{N*ZyKMy%+~fQ0}QagJBQ*Wd0|sp<@$~ zyeG4;g<>^W$*X@ki-D&E;l9je^z;Th^lX}uH|CUjOVZ=-gb8j%&$`cAFDO?oyD4p5 zgng;!T~mBrMd&#Rvb05Yb8qd<=2UsQw9ynv(MFp&C{%b@vq9lhu2lwXfepzsb z$Gx(%i-q_i%Y(sqHoB=>so!zxsz(!!i$O#!Eo_>leuuK&TQXaS(Z#4bi) z%6NeDBS;dKZ^2^|X6u$9ERdrL@)1Brm{6H=jjCZG2Jcdjxz3)OJue&C` zs^NL|B@SpZ_S>m_uD7wFha)m#K?6~DO7hU2Vg}Q(k;djl-wxeFwA*ZEpN^)*!kcK9 zWARlzuFNMbVY4!G+jGnBe8Zu=7?(Zo;t0b>ODExSd9DcxrH$>Wi(u%Dt6}jbOC(mZ zsZ5mvG6fyGc~gQza&&|t^dk#MS{PAL{V>2fA6hh_y0Za8gfz_pkxC+lM)i;KIsGE2 z$tjWAja26y*g`@2u^h6K*C#wt`uZHw4?ZF*WYTW@Qz{5Xw_CF1w2Y4$0`CdU@6giz zJ;d}^G=ZWhqUQS1#yn!jk~+I!KZyc2d6!q??3-#>TAyN`KLQ=b50Q#U`(MAIH{PfS zrw6P2p5roBp4f^NKQh-5HtLIIFZ0Xk-rA3UR^xC0L!}Pbf?=f+xd50en$$%FUiK+v zIq`oAi~0WFiv`7}8-E`j!xqE(h=@cPmgcCx*LtkVZuX^98e#vYy}A6wC^?XYg9t(B z$`9RN2^l5z1Xig2IphL!2(bM+=-+XG-?<>=$mM9Z0Q&|d7k9M-v^ybQ0iyqXrOJv|DyZiRul4y~;- z^U7VJE>{=Y6D&vW|8HCaGjIo_+vNIn3dlym;2ZPaU5f!4Y-uF`s!}8P<_A3f z51aL$wSvNrh615-Fkc<1R8&^JX=cU*z0IA)A^;Ylq)r94M81cWorUFq5sKCxQ)?WH zT0)#s_Ln@p$64C!I| z{<~vQe{mJofAjb6(9sNtEeam@39Zml`DlAj&u3`XceM|PQkR|P%l$oHM+r_#j2#o@ zRtQ&+^8>DA|J;Y~d0g|}dboFZ$O^6CkauOF$@x<(;{_BnXHwZuG{z8J5(Xi(NdKIO zhzKMRY+zaUAY&ta-rv4`6Bspt#ez*thihm!DzJV#Lz@0PHsKAh{KloFJppR=KbaFj zr+!oU?DF{Hug3t+5LXVaX9rc)9B_Fy8{p0PAK721^s!KE?CdsUg88psqkgRDf(>Q{FY~rrmNokcrZ;3!RbEwa<;`}fs95G)n>+bf6o%4 z3~Y@t!g~l|Mrv(4+K2Dp7J!f9e{d+D{fk1;NaL@xsvWKh;y2KS@d=5sp%Q#5#&?3x zG*|6#NQ?U7LpTEGhTPYLiH#e$-IR!^&H^F*%xD*_MTR>Ls%zYxl*TKROs|jsZ@`t+ z%GeHhS$riy;P=KC#A*G5>0bvx61__QlQ;LCR%1lc@!&~AeqX|vke4vCJzgWx#*P73zv?X$?~_9DL=Lj`*uh4PoC2`3-aVk^m; zI{Q>p!3+k-&zUZQsLO5kVpwnn(b*Z;X1b2tqIdHVO7IM1q+y?Z@Tnc8pZA31xAT{> z(F1akze0V%=~?I|M8_t@Y(|-CMX>N&EMv`5QmGhgHk^%(cqxZ~!exboi@zaPOm&w-0U)ig`QP0ngmpZ-`#!2-F>-(kjq zwh(@Y8(>dP#<@r0BBC`NMaX!zHl#K+aUMwiD8Pf}EqFRCibDB(6@iCmciILRwl{m( zL7$Tn{F80(XXxC0$o?LQ$V$YMURuXO?KK?_lKAie14Vc?a2isAA11I6cSe(942>b* zjc)U5uD`3}0jBJY_SaR2Ww)ysXK%CgrDn2a+}T7dy~!QOeKA}LNMdpP;ZNo6q#BmC zGtYyzTFgoxq4BGKWTCfBn($ZI%f`QFTuFi4mOR=5a>ldr-ObK?QFw07zfY$xXsFe- zu#AVo9rnY*yWq@L((d>-@w-@H{G433iCGD{hj{NbX3Gr!1qoaPQUY^={a$OgJ6>hS zYI+q3HjiMWX#?GdlI$O%`w?V-fudTk56a$>jN~pj{GZH&&WB;);c_G)0-exvD&z+r zb}D_RX7Xx>FIXD9M=S*wK1m-_fC7Be%AeHAVZLm12D%Es(Qww(0BsV0W^TZzJ`jbx zrY7pa1bhn%z9XUW4!*c`bJ-8Ef$f4;10v`vPbkjOVXENSgj(rh=Y3IEA(~NEdsy=6 z?AD3~@ChnJXaPN{1#Il_S&6{tXaX<{8c%uOPQCs(I5^lUzVnyCIziCuZKUy{as}w> z_HUJC)y=cgm#L|#j&=nS1VvYL8K7BJR`%T2S5nchUo#c&z;9`yPMNK4@O>{7Jl@vY z*d=Iw=l$akpC$rD06rO}g>>&Di0vQf#LYaIEM8M<8675DtPR~;*o~%h=MJ}Iz5-Db z5^Py@^`K34b+v+~X2MrDAU?{ix@n<>^ByE`Xlh0yUlRfI*6j@V*q1q&@xT<00}&Z% zm`D22+1Yau1K$5ux6xtkio9*}!K2U}WOn+iZpe!nOxa6pp?z4m@TLY79|i1kfLESc zT+H{Pht5>cypc)^zV3vBf-j+Afuj3;3$f0D`nCxlIxzp$k*G7mhk~|YGeGc(GQfIf zig$s>gxtr=?BnB$U87ETkGt*6ZA+fNFA|rB&S3Z~M_lKx`z7OS9308D9a{0AiasnM zTVw$P9(>y1c1FQ}Zq%*+j0t5|m4~%?FzXzZDsp$F&A+pqCg*=YWoo?Eo#4q+Bx zl>XXbCMS!4faZ_41&7eL3vPqHv&td`OsV%vD}sS{0+M{Cq20VXTuJ<=u-lUM69y^T zlamu%T-;{(00YVYY7nUPcKO?odk%Gm1K~q!V6Wf=4A&!b`sCX;b(jyEs#*|lkpTkW zc>@SWq+K2ieZGIXMH0t>xYUpX#V2%A>Q$>qc7#*rDyHRKCAFyd|f{gf!s;qu(pGr?-A3RxXg<1lbTj8s?$gQFAq#6qex*n&Kl zi>sj~!URgp733TE*oZuQ(}$}8SzlqZ4dXmz>%GQD&(cC6Am*ES`mTQk2=!JD$zw=`?Frvop_(lXG|#oX>&{CJJdUb1l07PV8+}jHgC*P%n#GAj*I8nN`=gRX{z zWIq(D>4-j@{4`t9DE|6cxXXJII9GMCKZgc2*=9!_myXZ|mH$_3=N=DrzW?!2N{LYI zidB}S6_w4UM5J@_FB0ujeUM@ZxJilGp4!Q5o%2B+#M0m+vH+v!_RBEEy{D6 zeUBR#t~(m!c6+0LEgqXA8r5QzZ0#)A2{Uz+*AWo7A+73t$DSeE6GRrpif5^6#oe_V zC^VbMnFrZ=efT~CEghyc+!8;d7aJC+L)?8RrYk@7>eYR*5}|tW%pk3= z=hS8u>f}?oYl+Knml>z*}>gs`Mk!iAllfEp62$=OK10y+SB<^_^*?!Ih2sf zAyv=pqdN}r5(P(tN!4WT(G=cLV(j2lU-Vl$E%NI*Y)9jZJO2|A092jmfy zelHgF=2Pov}F zOrTIc>c(S+zo{4=_9D$c1jsrF#=?o71uCwR6UXDv+yN2e_;;3;mfAjK(qPSGoKx-^ zw0WPRAI4E8BGXc46h7)7EQgTMRAZLX-P?N>*@bPXL*lBP-QC?+_rLPF`=$alNH1D& zTM6Pb&qnQ*efhVLw8Lu9(jKubv#H(WzsvTwVO@Q#at?8q17WZJq_^&8dy8lsOk+nO7cgg5boeD^XLbtXXp&OufCqzx*Nw4j7OhaZc7GlTPYD)#U#$=JR~UH36g{W=G)hQtw~_=8V<=;kQ9J6rGi&Bs_n< zB5;eHgU~IZ{@YF}TE#D?Ad%k||As=l8+Nb;dgu#k%{vwRF!6V-_|~}imjkN-`wI*T zqB@8O3lj|rgiPBZ|9SMm5wJ(37Uyxwa;#N%277rGz2QQdWo&3jAqq)M!=zw5Y`r@K z-P)RcMTUjWKi~tOuNwP!fBMnSrT!ZiEt(Eq2;B?3L@s9j%Joq30Vnm?<{B`M8n(%* zm_ta{)YK%oIo@%XAt=3p7gh2G1_s}FvVlgzN44HfvqDV^+gjPt3-KH89cu!UrS%)w zVKXE=MN%kJQl~_ml84Ogtz`aG^v~$BK<6K)i`7m@Ok}u|L7t3 z7Sa+cJ4W(U;za@7kX=(FOw7GV5}Uxg-I+mvO(^WoiAc1yKLv#ACrDZ;hM22CIY*|3 zTArQDiOmBL5^a*HGuz>qemQCz^3y~|RneIHob+*TBX#a7t7M3XjkcGn2d+rEcu@!m z37n?BZ)4G`S-5s;$R%WkFOJ@L(a@JaimYW?ABIBad3mZ>3Acrj{AT__+XH))uJa*K zYULE}UNjrkd{AkJY{+@l@%A^1+8%D0l~RE~-$L)FlbHWJ`$*_885JPryXns^d&eG& zd6JM%SQE;@*(>~KV}dRlzA2;+-!GuiF8%0jX8fLW373$R{y^Wi{yI7=k#ws2I=2<~ z0QA6YGOk5v;a6TVF}45aFlCak{I3}1t+?|d%$FHckTOQpFbHADg%RqR92#eP9_#}R zO-&&RY&uJ9$+9uo04foKVH18@xLCt7s$uud)>2>oVuAo%u#M%ZH;W&l$HzxzXS=XG z%hz>}$W%^-#*BTLpiPFwY&Kxq?%}9JDPpTVuU-kK-0IzLdhe38@vOHtpLZH4{`92bFU63+Si3DzR38RkTeW1el*Z$%Z3 z=MK02(;LgYVy=`z9b=R=w~%B96R;B1jCw*Cb@$islUdHML9MNy6E zxO{mdM2*m7i=djhksc%pDwoF-=}F-a`gK@@fTlirH)xDHxa?LWRvaQP0_n-MIcp*M zV01vDI0eVd_(iH5S2MmB8n>Vn^E$7tLw_y!<3O(0itV~#v}y9u21i zM(5lHxs1#-HnfY$xvgZX?3bkzCiXLH4S2ceg|eYSw$Gi0#=#J9RHA6WH#!7GLEuou zxTLJC9hGZ{Y6uw{MDKDmo?6hj_05U=L}gXzE*Bg zhC;gO)%P6RhJ;2ZD0**GW22%}OA3$hcPZ6VBvCW~$qm^H9lo$;YA`TlwWzsT4!N}6 z(BK443_D5DCLqPI%gxnQ{_)3XN(~IhNgbRfu9Hg~FM*q>62)Y(SQ8*s_JwepxQ@9| zalGAJN3av00Y$bRwyFW%wMfjmU-^8N?LF1PpTHMt1Z~zGox$wSJN%{tm2=~WcN=WD ztk#d6zT6&YP7o)Pa`IAnz<9!K#S9qg(g~R;YI$vK9u%ZYxnEazz0)8fbg`OmOg6R$ zle{zV2oD<@O$-fbjd*2RRq)Ex&MP`nD8)7dp*s4kG9qy0*rrOWh?9$*4-}TiU_GPwBTfKR%>qQ*^YV?= zY&NeVI3V|tTCz$Mfm9NH8-%gR9oz&Qq?FO zl#_6TB1~*})4_=!OgqFGXxgJh*DjnpXO5)2d^}hOt@Tx%*EA~9xMlo!wuj#pP-$UByxk6r1h8}`uMbW#;Xt5YxNxy7#s_Vbg?j!2_t63- zm~_$4vi+M{_{%YR<2PYE4PhbF!=`3t3!tir1c%q^81^}AQtsM1zfV(XuvJ1SK0G>l zZd)b|Ls+Z?#Qv_bOceft_Lue!V12L0XJ)YTK(NIpI!hHCO1-wivwBA*vAzcL^k}B*1b^4E zu`MQ84qu=8(BU}Nv)N$4n{&*S?}aYI;lRLBsOHtuK@~tYR^i<&merQ|PA|Vi z-QV|AOWq#I$q!LXl{gtq6S5Gr>B5q87~?D>d@&w4|CsXm4N4b?oR0T~V&yRIVq*q$ zWd{yKL+gLL^!U?tZc$-k_MCZ!Q$3OLTBpUnkmXRkPp)#L?mH%6&#vz7Yc_ax?gPrR zWYuOmvnOMVWy=iyVv51%Nm|M`zGSZq2#N4tj1R@Ks~d%siJ6#~r~{(*2yUg+@plcM zmKu$`NZBO_KM(#jlKpv5`qzkMsVyZe1o(4#iwginDAt%kJrK&A18Vg$Oq}NSvQjcC z2HltG{jvCOZ=`%^6LpU&V~Ndnqlh@j)N{HZu>+;N$quqJp(L^Je1{5I_^coaI|ChM z=;y(W`D53Z%;CqH-22GT;Mx>)b{LH8RzA>urdZn{ZtB4d51dTfcVDn>-TTnI1N>Nm zKP`fHDOKN9F>gJ>YRe&#UgEyeApAd&8wJ37X#FYQ;|T^B>xHve+JyXzs46b9SguKQ zD>S}!pqGra9XO$&sw#?`mj-!7n*q@u5w%`*_#(m^F^QAA0Ds2Qe(xog+PNd9Zvoml zyWY8u?sNB`mZs+4>|h)~#886bTT=P?`L*!eKKTp{?-Cb{ zlwXsQlJm5JX-W>i1Ho zV+Q>IR~eLZb{+tHq6tc&kwcG$W8h@pjbiXCuju-TLQplA5+k>132qWLymTZF@5J*C zP|8+gtpE@aGr=uk1{e;;LjghHJ|MqOfN2aVa8|qiZpFNBaQ3ZF;8u|ati{d)>whVp zg+aS?BHaVHKl0!NNM8t57q}Q%${}Dp$&mN`t9rmle*lPQ1dL#3ELEuY5|C2xd-|o79$P=fw*uLv}?ES&_10KCX`PtaQs9@Wnv;PJ2%xmQU diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/worst_loss.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/plots/worst_loss.png deleted file mode 100644 index b7ae2425ffaaff8826b86a13d55d39f00f3537dc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 26093 zcmb@u1z1=8wl(?}>5@i}1_41rkS>uwl1hg(f+!%}jdX~F0)ik)NyjTG4F(dTNF$AO zDxG&O+~4`m+2`Ev-upb)XK#IVvFaCdjycAdD^mTA0ueqfJ_>~*Qc^@~qEHx0DAc)+ zxY+P7bF_BI0Wnv3eOE09D_0M5=liJJ=B|#999$n+-(zyW@9bjjU@yok#w&1@>7lEu zql-8npWT1_0I!4d13u27Y)!~N|~ z=VQC-Z*1Nqqf{r6t#xqs4oyDv-i*nDc+S8^`?aJZ+dK3sOU-&b0KH6oKUC<1;} zv}f}M!k>A(_Y_&+SMOuyNcgKFP8b{VSNe+y$WOVN{XZO3!VOZ-zbq+v6uBc9aN-Mp z%W-RKYb(qN;~3r@C^oe9o6r`rx3~9PtsKnem3#YA#Nj3hW|3)=$&#k4H~gcSy}gNf zMsE}O`=n^6{~4X!PFZ^4?jA+*uu+88i)$H!)$5KH0Ny zT^=BckB@)&v&JcAu-5O;UEjb!*Lp1AWFO~}h&}pnXZ3rjsm!C1iu}~VtPU7TCD+AX z`DHh_C-bu}9ThTM92_{gxw(nsYCm#sf3L9q!PWoJ+IoM;JMUn@$KKY!xG-F~;^Ft{ z@&G;=xF2Vt^2*OzEpqLnqoZ$`@&s26s|Zz+uI6uwOSmqm|8lGISbyrXyLJxsHBGE7 zj*XePS|h#x-2*jGAD@|p?^(EngdzF)SH?YOh)~T*Jcc9`6qpJM3NK!~XzuJJ5Pa|z z?e1PRAnZz1U0YlGCgAjBsLq4W*U!)V-n}5*VuPu}^`^n4x3)FKuKJT10VlFe{>Lyu z#MIQ(sxO6!_V@PM5e z|68M<&(YpYtbt#jjSv-|389*r8a&2pjjUT6o0~1krvCYhF6-k`_1go6j!sUGHS^Ih zX5TAq_2@($UmGm>YJ1IBx#~Y0{^!1aqp#S{u`zRd`=^0{fdX<|2Wim-1LsjRG&C=y z{lt+&j~)>|cKtgfruH9v9Z@{}3Bg+Y`CCxLmjH z#q5#%L->?lLN|9kUBZLkvYkX)*$RG;k&|Du>ErBraqWDD z|3Us7W9*9;7q^xNFKFeeK2AxYDKTk?x~!5k*_W?%KWxQSEZ{^k_x8)M6u}3rD~n!#xo#a$PkovNQu($mK@H{U=;ms_tSq_~*J|78C`{mzJOjn6Kx zRo9DiDE;@YSLS*$Vq>KbaL4LA6wJ+;kteLExZYiLE0!U~W&Z2bGc{@1&Lp1rq@*zT zE3=SL<;apBtOY7mZrvgjsw-WB`uJeGE#{Jl+5P)Di+U+8n{3;&v$L-i zVi^<_6`NOv-%ZWT1gj+TPR+#1+|t&jAf^*l|0U*dP!n(}jWQdr_i}V}yrHX01yk|# zm6Xr)WJ~bE@H-9^lZXhl-@#T3tPBz|vLM(+_XbOh#RJyu$LsN66%$?(CKMJHR#a8} zu+(4pz1%{ zr1{GiJeb+y_>7MqBFV2k8sdMEn##h&go0Q8_(M&m*z5FQd%3;4J2W;{Eyc~x*O!Ef zDy+U)zz)>+`N~NUvBoIy5wNI-XSl{%QZu zywH00OA$5St-+uD{W{wD+SA2`)z|GHY-Gm7#O&>iI@pd7GfH2@Ai2OOU8KW-+Jvat zKVk(D0}X@Jz3kAqe+!doZTOu+;l!vz6AuPSz}{3ier&$$MAPw(VXa>n(Is}SMCEI| zymtzvL`C(l3kgyF=FA#>C)L#JN`b144GVNp`S_)* zQ1IYZ-ko<c$|oW!-&el{(0k zh|gnKRn&J%j~^6x?mYIx%Kg2)hs8d#?W7R1o~K-w_TAg~Y~S<`F}-*QJaF5`kMf1b zm4ha7ckVp3a`uIBcRV@rs{iv8ZZFdkkqEvQ9GrU;XRKwFpwyjz@gl+ypSC%#xCluhlu>e~_%6 zuB<#t4+uZlT`%1pfuwiiORaZBLxJ91uHH9tI7Ry980T?_(be{28-N66VEaM1?lThd z)0fxVF8lYvZ7O!wmzJU+w9!fX){N}P_J=b^E?rkp_$**| z_wL<=f#ORiCx;GKP3kXBO-+gV9bB)n8^wNY**@j;$j+{_;!0wNaU zlUH+YHa1)!U#f)w%xhGmKDT%E>Q$yF7RYd0<7zrrO&jUR*;Q}A@*}0Cje=R5hP1T+ z`);5oyM96f2Pa&>4pvh$?Ccn)nNQ{bqD0-6DDGrP$9#&pw0z`aW?_-FJu);@^K+-* z=%_FglHbBalMLMJb$;!Zhr*!xCL{}HTnt(v8}0nn8599%x!G&B_t$}sfJu}!^nuveP!q|Bs~@} zF&Y3*{j~uc92|gJ4CZ}ezdrb$BRuzeK~7H!LOB1mYgYvGPp5V}wP?o~($OY~X4`*y zWU?OgywY#*uGIChvdSG;?l>9Gf#?e3_Cmyg!Rl*TzI9Je&tO-807WJz#@@uf?RdQy zq-+vON}XH>NIkGVxuh8Lw{*|p;o;pc8PZKG2{@mSkl=a}dr2u)FQI+%`H6+Tx7(Y( zfRjIrS317uL)d7CcmsK$jL33N%!d<7N zMvtjhtl}Md3HJr#a32nH3a<%wJ5+;GzXp=sGMY5lRQiU77Ds!V$Us84Ds!5-l_nElvebM1(-WFtfRh*i zGFzuVM#J8EEakIf25S(-bxLV!>_LXDd~>MY>ra`>{GF|(egXueJbLs9xx~syB>`mq zeqVw|@Ms%5J7G;tGPEN0PYkQ=%|Av_I=Z<5u;Gb&_39~90OlVeNFVkWP{QVLJlL{c z8LQJPz3}|tVaM|;xI{$qyK7^cCx`28pFiW+RMI**ISn-h$RIEZV%M@!wZ-SB8(Ujv zfKOW+)7IAZ(L^Kt=UB$Ux~GuFEWT&TBk$(DweT2lz*VCflC`xpPA;zAEg#svn;)r7 zC;RnGKXPRF22t@Cn8D3bC+akll9Cq28ziJpk6ea^hg-V4i4>mFwY+y*=8~e&8Hi)S&H-aF4b;jVv=Ot1xO) zUs8x(k4sBS`+l9{+z&tg_D1f7z7eQKP$qpf`~u^x zo>x*%(;x7VolVYh2W@Z9TVG$l(RM-R;aEnFUa1L8&PR_4zwjqd2t`FjCx;(oe1yzv zZfS{`s2mEDGXoV~D`ZUo;t^KesZ(%fpBQr$^EE-iwsMQl8*qYocdAg0(+oC(uVL#z z_j!wcY3@Yu(?*Zeh=2BI#=RF;YSx6p>z~@md*&Jfmr*OFUV|^xh`r}*pJap z_#g5hh+fF+XI~!$63QU`UvrsL?Oq?7>wZak@nT+Ox2gX=7Q|Rs*MjPR3QLUZSgu}G zT^}h>fm+kV>sO+{DijAbUVqqqeSORBcT)Ktu2#2qbOgo35WVt0us+)Qp|&too;4s| zY*2aA(^FLXXhV*WLGmJgC5;+U*2DN7hYZULp zeK)@=Ff#MBIq`hZj6NT(&h-PSi|3)j^<*Zogg!dKLSv55^t#9yTNh;PpYmcHed{@xc zwMHiMjd=QZ-S+2AlWnu!2e;XwwH>xshRI0;RE3ys)q!uuJCs0+R1&m7Q%li(P(xK1}+pP(Mj2DRXmk z4HZhA()UMyVxyu8He}nWsHk8=7k!NS-_(f6GlUfBeN0|ms93B!18WO_2>*~$A{1mq zF`Q8lszaVV`(fd*(ix{M@3HnXzv{t02`Q<@YE*1&z{;xr#e{-9SC;#}zLHmYcrKTO zL3O|dz~($JFK^<(O-wmW%0%Mwy9NdVY@{$=FFI5c<9(-h3Umr5*Vc#u2g%6D&`G!# z{cJ34*rCOg<$MQ3!mUEnhYvZCqBch}66!_^2*+v_rtqLPo@_))2wni>%>Upk9X#c0 zhAlN&c~q}UF!Ayd&wP%>Q5X2&cDQ5rXsArqw!v+A;0=_Jknnilb$@xrkp*Z+ z{~t>{6W>kwqs`BZqK=cOr;IWti^pVeWhL5#FkI)EZb9Pl=iLovEB@oiA zKZF0Y^dy{?R)YUPyU{Q@8h*C?MLi|!eop(A^9$O}Hv$QAygo-fq%fX&-q+aruPJ1% zn@{u2SByNzr%`?Ki(e=~?!FP(E7d2n{MU4w`xJ@EX9AMLgZfVoo>y_3-Mbnb8J{`Z z5I%4%;ZG0?De3pDiwR3F?rDTX#?#e?eoU=8ViEcs-KQvL9<%a8kynzeh=TuGLQ9{% z2D_tmDBq02;CK4o=lFMX>`9c-XzgTvcm%!*msWz+wAZ@P?R=W7Z(rlWh1z*IZ`qq) zb3QqCu$TRkpr!xhXAsLQRbIT7en@?g!ogA9opILXISLBdNm`YR6ykTQT-!R zD%@x(+M=iqOx|MYSj@8en?mrBHS^7M`8qLRLDY_C(6|1JrVI(mOSvmzuE-L6`57L+5X^Fom4A$U zxuuEzzDr$HMK~AZQgXargoJgiqGw$6hb>78ihRv}?y84+PaBS)qohWTdJFo26ytSG}wBJ`sTTh2W5lq~Nw<17>WLB;@o_rp>3%3EHHLDmVLcUMt3Y*fP598FeZm znHW}3N3$P07FMhIhUUHPfqEcU1K0ignu2n#(CDyR89fdrN-PO4%5P%p84A{nq%Qi( zH053N=>6@Aofm>rTa%m;TXe)?bi3n2f3|A+XqozziV|rlagKuAxfqE00+1!e@8rj| zjeR9tbAKPUeHPjaCCg4 z_!^^XmOCp$iSFCjFqLLG|5hNOEh|z?GWEr>uMDi_YDPS@3#N(Qaas`-0m;$TZsDAl z1TAVLC7(ag5*I1RzI|U?aQ6=jO9B~;qSOPMq8kK01Qlh~-}e1={o@DF&a3>1(OW-n z{_NpxK3>5{Q&SwQC6aK-v0C{0aqf+|lr6cakbrpv3rp~aICNig3OD7$UuKcI#P2!y zt!620Ftm^4+PpV+RZdxpl}=g?!P6O{RBpCzB(B0=G(Bo6ey4WCDg>F1k$_}ogO zA@qIz&r9a~D9|k!)F|X7+pk=8*X*-r_woG*Ab{&4SAyuOBQmi>$C?;}d zINUI2tqo)}T@Sf&p>9N+j-|jtlp{Y%lq0fzBXM_Qs?$TjQRoV}``o7?vHVML9nEBA zbbp8oXgyplioC(mF;|Qpsgj}$Z~K^-Uo$aq!_XrW+*sqAFIz0;ttUFCasEToHX0qP zPV=SQJDcySHhx;|HY#Jamdi~E&2|ovj_7}`bZrEbR-8u_N%)_y<7HDeA|>q?hC`Xo zn3Yb=dko#@P=ZyjK5zTnCDTKrD-n>Ehw&rWAhT0TxSf~tQ%FV+%iGtAO9>HqNiadA zIyab;&3|nWKfL$mLuK9F7Y#i4N1FT^ZOw6L^w!1OSGD)NL-=L|&NV5FxAX*jRx&0d z?Z-gwr@Sz0AAGl^7=z6x7Ze_94w+w>Yd%pwH8kp3)RNMch9l|Fk*>QDMElWGvb z)UxzyapS0j5ZVx+_I{fYwq`xMTmo?kLynA8gR;Kw&6Qj3_ShPjxsM{zt-*BpDoC7x zSJ0Mcyzr1HBUe|k^U8-0G4pYz+E-yTwPV;WC>81!2i4U{Xme459OMYJJpSSQ{JaGe zAW*@E0-y!pzInVmVLQGKG%tb+B~Oq3q)Pvp@8SE?7#u7YL(fXuucjPIS9t3nGd`d6 zD@V4)=2vdbWHh>8k@?rx>PN#tSG01?`U`XxMk={a{$!joiHcrK<}v3r2fP=S=( zx=3{oJd4oHn>RbQCP12c3fO39NFNkH6zb>CpA`TAl1J*j1XXX}?hIiIz1dQzfY#>m z_7-l*eqdnstL0+X2o)Bef{LfOzM?S|sTA{y5)|EI|SX012hz|~z zTi^hgDu)72R6-)_erFOQB{{pebdL2vVaJb9@+m3Sm7hO+K*)Ni*^v_Po`nT>k%){8 zBS=pM1f&}IUm6Tw8xly&QYA^^iU^jf!=hclQ=&?M5?bBJh|tH!r`R2SW4nBL(TD@M zS>Odee~hLs2O$xmrvQ{A0)X3&4A5%4CiQu>nmPpl;D*Y~&jTeIUTHhXa^(u%J7(Z74wnhNwdsn{10OMGOJa~adgets28cVi8_3v9P8Mcn*+PdqcZh|7)tci<;02!P;b;}ly?Y%^ zE+v}zAvrlY12@rIdBojC*Qx_-QOys0N~X8E50PM@bNQ~KfUs~_Mh0VRYiq?wTWc!` z)YdNPP)tulRR+~^Z-sH4JCQ&N;q8~g{Y!^Xm9oHm0g2@OhzIn`8s}g5boBIvj%;Z3 z$Sn`2kI}2?>e;+rBx?;_zQ|i}#ktnl0ovY!;(r6S)=Wntx992cE-5v2_~y*#^PDu( z-ay<49V{&k){kK0{zHiwd2FR&%$ z_denyARQEi&rq9{gQ`*U{yhc?syKMCw~G$IdqYKNe6B70LipRa9H6XYT3A>B)vWo>6lU+fO;PibhCjbx(oE@)_L1q3 z5DW*46dp^Hzs6-)?`8k|bJN>fe4c|;qt@M|CfFk&5``-A>u2n#8jzY#h_DHl&X`CD zsEdinlU`n*2^p+=e-4#5b8HOe@Ql%*5 zyd>A{vbG8)HB|FY8Q!VsRo6&vmdq$5bM$B8Nu_3a6tZ)2YR;rXXnoa&Gey>~LkOiw zr6N0dnXjTx&Cm;`HGksVV=BDLRF8Yi1<_~sOG(Tp)s5W>F25OeUN2#D_4_gsGsTgs zDm9M|;}e)ZJde_^;-lp#d8-H+P6~MxMHV@U_mh-Ox$4o)J}+6~p18Pd^lG;+;*_U=}D zO?o5ePy_T$kH<73k&S>tl1e&B-=}jkKG$(jz8j-nMor0=o#fGgztGwWsuye`dDVuQ z>Rw+Y_m_@N3u1xEwE2d-+7fQ4QCMP|#OugVvcWYg^PMWr2o{IyN^F!!@)OUF48=E% z-3T>epWJPi*xPrI#!Xh_LPpF@mBZsw7rs3fN`G%cCE|(Nvnr-c?6WB#rai2X2x_2L z*S~si`jbmdhA|VoI1w@Ol=N)47)iuCkL%KD?BD*n*O8=1@dsYK^6bQ?|ESZLZ*!XF zJ+Tz^US(($r(i`+#6LT6preH>qp^<)wE=-WM1e!ah*YDk1$n>EM8EB#;?rNsQarw5 z$P&N7qyQ_0784og^BbFi0~^*hp%rO2ReyRC@5_ILnbTfJk~a-1=jKy~TYLXC6aMQM zF7+iX-)j|8nCJEuXL*hooDAWbOG0PY?CpGq$1e2)1I2G^NRjzW01|uN3F4a(;D6>* zeWRMny`AT(7m2-<$s0fe{VK?CVc+5<-}qHV?D&r+v+(6hjA15|!H$tO$jrr``{$bA zClOXZb2jQS2`;?lrxS7S*)qLBSc+)B;r7rfruNwIJW6`hh4QV#cybL(=0PZOC@^Mi zI*NKk?2V%FZ>FC(>;Vt)$0CDIRCutL(c^+D~xyEMHag z|NG`Hpgvm<=VICC^aX}+P-Wsr;;pTjYF#!SL!q<+Vm=i2J+dZX z5pYBzCAb|Huf!$FSL8c3KCz zE+&%;@~_9L!24O2{U5k2quOOc8Uak43k)A1X_J5h0iymJAd?OJfM<(;@glDh6myOd zKGOI%0GSaK8Hd-LB)GY^?OG2rMZ+fhe8Xf1L?uKu*DW9g zBI5jqPoG*peY!O-JXmTP3vh{vho^XCr%>hAE$pJAB2W>CRa82ai{;STFB`>SuzcnpW*3CXG=k) zK;j{T^TR(T#Go7Es!q}@c{P^oH*X%0i>fq1D+yvU15eLFUp_fpJp5N@vIST+QVW%g z-@CSeHt?;)SOW1^HJu!A8&o}t1Sd^F$01fAXw=M7QpWE-e0F(~5wRrAt;Gt#5yv8w z5T}G~W;AOshc~j_Opz*KWo3maxwF%#LFvvNUYEqT&$F^v)6&z;?{7Cbx$*JwMPx>V zgkYvGw_6Iht_*3oT+Zh@#6sn0;;E>p0Cm_-;wfQhK1E=MLUr!Q&wSXXfXpF4m=MYa zd2w%lpBd=+LMhOL0e^`PE&@IVMJ>3f=<4lJ0A@_jzI>~&)=FQ7yR^3}ii(TtdNlzL zVE<5onsCP!zY@2;`U-~9qF1bGWZvUv|G%ILsAQz#suKrf0NIe}-Jdgz*0b=22+wvy<6%T1}k7eiP-kWL-1>;Icb+w55`Z$rEo*wGP zy?dDh4bffQ-K5|#QF_LXAP4|07X>VFLCh~&dTz!ZU{DbZ0P2P#5bPiw98_*21ZhRJtQdI)aI?_?=>$JU^hz&zszb*uVn)ANvOe{lkozECgq z@rXE-g&M2ZA`T&V#a|B^5uIFRP&61!R3;%?$#AMjmZ@P7CR(B5owbXP&P755dcD7kN4R4GvV7V@*OFm zBWTO+tO>suuBJqpO$G2V0}U|}6tRRP=}c^UBN2}+yO}^B^T@-nX zl8t?CX~$~8w$%zF)nT&8dl2*@ATURu2oeD;{0{9<49gl^n<|vM29b+R49OiBs0!z@ z4{!M@u~5EV{Mb|9zd-(G<3SGfUYs4JV3iOF=ETsdLbFWCK828^6oy=|!dLROkZ-C1 zGtXNRcn~>%jdJ8z(AhCgN@rieR_Ykl7)NPl`e+Z6c!)7dNE*3n-_3}eh-h`G9KR<2AyI{GY)Em0%GRX_&u26?;Ulxw^kR%YXj$=SS-c?JlM&`1|5oj6koQx?b+aIpUks=$!`g`>0p zN1>Q&Lr^oizqO+ecjD>q{|ZO9&}UOL@;^keg7-FfO4ioa{pB?9DmY>8t}SN&21|7^ zJroT<8>o6ToarS6bw1)}93G~b@5v|~n2v~O-#`#6pLbj5cd{#Yu{?K^T{vfh9lxL! z=@Ss1p!wrREEHl(gSrEZZa7ds?o9??C}_9=ut<{q(_cWF^Pd21{P-5J637gxaTOrh zBZ(W5AXr+k!BAqniUqjH-1(hg+bNMX4octS2-{uZGOW>fe>iJkU?3uvuQqE5GV=Gg zYK*8r&L~daITxRZh=qWEj~)pCm^yw)un~dj4zaJi>yeHIkpmM4=RAsql@*)>8h`J0 z$ka}rK4uqZto{T~CPV&?Y&`51ov3l<#c009=DPL9Xpq%dApw934W%45HSRS}-$0;W zX+sH~Q}DP>&d-N~YNIm*nhvAOT-W2mLhk3vms3R?o`N1^^5+DYk?+NZBH(do0|5*% z73OQbt*E+o=9GS2399viu>|avS2uav^jO9J9g@3c+?=2UR|G$tDX|J=;)qwCi5ZQF7ZJc2-|Dp4bdP4wY0UJ|MKMvC)dHmX<|rtxCK~P zals(0oYUHyqe2R@J9s2ohKKL|CgxP%RG;uH8HCYKEMO%I?NZyOw> zF4QX>sPOXgGM{XIjEjeNz3~O0fqaPmWVHRadEM%qN3=xJB06qN(GC}FKgUDukP9TGL6(-2aIK7wzYpW;8U{- z?#a-|NIcYJY12t7VqD5mNp1zt15Aj2PPLZ0dLVctSvWZr3#GuG!f_|8#dSmw;0TL&Syu-7BF6qWxJ(EUpE_^a{Xb6pqi~IbQ!(0Eqsg-Q$%K$Q`_cmw1 z$;pB^QV})`SU1FnR(qHt=E4rZMrfk-ymE<|-B8&%(0bcobs?@}(B&YrC)K6@iycbp z9c3aR)%b-M6xd_B`EPZXwgPNl$n`?z_fAiaQ#W80bJb^RF@XAR5}m1^Tm0OhIE)E* zsK$v^+-)hu|MMetSNS{Wv0G z%i@hd>ykzzaK?^XBT~L@oVOrNxb{l0Ngb}dyC`B$2VSyp+rbj+Id%YscwkFO9Dkag zPCx7Hl~Q;M7V!NN-b80t*N}Jb1OaYew=*#@A(1BqCzXL;GLK;x;%PYCT@MGVY-lBr zc5lER8WDNrl&|1F*d(;hkN=^TybE&#E>-Mv=g!r5tTWt{lLKEA0Sb|!!!9tIEUsR> zbg8YU=K@#+3|8Sc*gJi-cfbqmH2;;}^8WFq)VSSyIBM5#4N5Aa?>2%n z77XELU`x@jbLZhTevb(ltt(ZCpxR;L6F6p3C@2axe*eCi%xjF8bU^4v^lh;I;|!D- z6TG&Jumg~~_bb`-8(8VP8&G!j>zKA_-(P4gb)DDeS z<1H<_D0lLQnwKp*D_w>4=)j$tld}FYK|h_JbklaD8W{@6C3Dc@5e5xYHZVDv8oc9$ zLWH_Bh-n^ZU@9=4eE5I{LeTMd6jknp~_GNj+Ym^QUpEMYUjH#Du=$TrutS>*gjwaQInjhQU(f>#pt@FBPzIvQFA86T)jg4m5UvJc(8%2Og zxE1^&2<8TfU$?>ed@E8NHzcjce(I1rZT_J6RAziNDrtubaMQMWOc)!q3_#X?jTEA* zz@;PZGYBMGr9&tLKIoL*tBe@*7Y((zcEz*fD`7zWC?ypaYkp^@ckZqA8ar$2d?B?w zZNw~c^~C`&TsEL?m=Z6e6u2qxY5lWlzJLY5lP$+Xkrr$gFbmNTvY4c$8DL9*Ax;qX z)7wEF6v*og_bsfRINUmDRpd4E&?aj<{gc~wS>t@}ZuKa*g@AS3SSl>d%*(??nORzf z;E}UWLar>_!GL?3Z`xVK#J+Nf@si+Ww%nzxu(iPosP2&JOUo12*_T!l6BDcPKNd%8 zKw`mx92Ez%4a-Z%BgTzv9$vDaPJ`l_mkXPFTGM$SmCNpTmcl+!DI@Rr`qn&KqJWT) zJh*YdqJLgXOA9g31I4a0MhDdZgdbd*J}QRWT}~pbjO%t$H#Eu(mj20mobL=siKRfv zN*Sw>{uhp*h1f~d?T^39Y+=}m2oZ$Ue;3Xww{-j>*P-_xAdm>}?2z?;1%V^{Yo8T* z3%9CA5ohew;rH*~)r<(?_Pp08^M_@ z!4~&zLL3zKTlAUR9&>-$+gWhSSIP7~t6~Tfs^3vk)qlfAl|3@Oe#b~HzJ{g10l}E< zW<<*iK`s8b-%vdUa+>Lil-g249z#Bip0eA*w{t;E&#hrRfHz=ZU_e^EzCqr%xe^Dj zED0pQ_vmo|!t{F21UCPrCGscyp+!lFH3UQ-?5swE!h^(Zu=F9_3J|iKjt|_RVkZT# z2=<=~Cm=(l<|PA?kB%&-LvV1Y=Hq|Wn!p7tMJi7?)%Cq}XpDpvXb$nuU=;~*fJe&Q zR}H#eid+CSqZJIm1(ieZLp*-6vlJNa7%R7q%A{kVif9+3{q5+1(pzFdLLIRZ*prYP z0H)oFk<E@P=KmPQ}OwU@f|$idaE)s5Z+1VdLJe*XwnO9@#NAhLp_6H;?}{(jZxOpD3em{GsU=rQ4W=7y(tnk}8&MWD zsm~)Fh>wHwWnXC~%VR6APYI)tJPy7^TeO10c@((au37i8J-K)d0~Po3W#N96q2I?T zVV2DIwu7N))uUf@6awdvU}TrJ=PSackp1(oQ+w!)_#+w@^_GfD{WYNeIis&d!dAmLO*W zc%Fj3Di4p{)I2a%DLTaPJb!VzZlR*f5GCuXd3^jysGqQb!@dc6htxSKkmf0nY~05E zUc!3f^?Z{~e&TaNOl+j|=}GZCsW>wUK5?DKWxzE6?wvsCgC?N@_9bQr6~aMGlh2Yz zru$-8MKj`_u0NeX^k+b^gdyWjz!(lFDos600%nze$gqI_Ol^X2vd1B&G9A^fzCfRl zboFi!ji42iq$C}f2HV@)zZoEovsBm$-?Q#pfG6de#j+Pg;r>T^Z^;ISb4X_COmf_pH{`Ooj@jqiU~YZdx`-GJ3WA zus!p^M$xk3Z}Or+3cqtlAuBdn3iR}B-smO%an*mKJTJquZUEt=v3&9=<1{<-SsjsZ z(=e6*O;gTToc3X2HUTwn^XTX$(7(V3npai|py(I(Wf%&17>akY(w_DoHPYTYG0L3! z5tL%rC>c}rYb2)Dj6t_vx5&sywL>iQe7yEM+_|4rAlc^*y0D{%N4RoM5J+xF!<_jk zKwrqRIAB!;Z`(sKx;H|1OR@BuH*X$}y%&P=2ONnppi{|g0;ikj4>qEY;eIOi7Jr6c z6-G2=usX?W>*;W@jnz1v2T^k1NDO3Aki|IP0RUdA`~#`X42;(=1Rp>oWcdcw6Vz>b zd!Emrxo#4C#`@Lv^qE;%<<>vgi+zNTw+BrTuOZSs2BNnGbcG_k)d~2u5!RQqpApCg zWR#roqprD(VmZ#?EwU`8W&gjcPO@BHBG?9K-kp`gw7+I`P0g4lN1i)SWLcf)+VwuwFa=sx69=yP)3mwdD&w85z?* zsko*NHWW%gtv+pJL~CUi-Fh%6@jE3u`^_$WH8nH?sY&l+0(BVi%hJ+~ znx7RwgGx4HH(Q)^=htm_oE?!0c~!>)i7y;>DubkFAeaz-MS47fw*>$2+|q5pTq%W; zIS@RiR#phPxw&t@1lWq!K;OKHfog7PdDCjAd%p?wTARxpFjplBY5dfmhxeZD9#&wY zt{$yl1`6sKq(E*M!P5HQ_&e(prynGs@#g=aKOaoA;guTKVW+5uK`D%Z!oa}jc;teV zTF9*+U^g`kchj!|v&UMB>r!PpTRnz*gL00|3z{h*afy2UW*e!pBLH!@cz42Qy+PT* z_Wywn)enS;^WA8{@tlbfxlUfqq4w$t7&|S#BwY4{mIEl> z1E3y14-@Tq=G`QU1_GkDW%b)>p@pd9FJ1s`!PKJ?D#lbws_TZRn!38=rfag_4owh8 z3PK)$w1Vs4;NS}vL;4g@lh7R223C7gXx~{3*k00s7CAgHwQ@iY&o$tdpgT&RJOPDN zw0jF#u?Um~xIY8Uo-MCsgSi?eIwldox|ETKV^Jo#LQ9)M%e->T@bJ-_=kM7HtRSEM z8Mf0o(>6~js0ufy2Lq-d$sAO=H5bJD1ODq*1^ZnC0z66k^=>)SNC$kx!A}Vs(@+|s zPXutp_C~o|DHYmgaucc)Cv(j``f24`0HDbo+gH$wEUdl120ll z*)>669uscl+|)#)G4TlqgaUXkI5f)x(cbdmfn?dPCKIA6X<&d*lCD2b}MA0_Ij(~*h>)j4vtxRUGM>V*cc`< zIQl^I3DWx(5gjcjEBkoB2LN*6-^UZqQ6pGsN(&kC8P>1)BXHuNC;_m|AbU448%8|~ z_BY}|Kz3|v>rDhr154lQZG;Qk^NkkGm(6`MMqCUMZ{Bm^SH}^ zj{5I!gV1gVMB(m40Hf_dQ9e>TL}Uv~wUgUdQkt zP{)JU1sA2Lq5=(tceZt5V{7BQxY$`fd!G^2oU64d%J7T@^28FZxanSmYs zg`}4N(u;!hsz8-pa(1(&ps#Af)BPwtRCD+n>bOKxHaPRgR-;-kG&{v7C*we`7XUA$ zksfRi;ZUuBdzrx2b~&}6^3D`!6A`qpR*qOCtEV}^Ac|b*s_%EueihY(b^=E;Gg%(} ztRY1@%joE6cWzfAaF{k%=2WSrefZG z_jCo!-V;uT=^o=PVDksg2nGBi;znv%pPA9J`-rS@`6Mn@#4!S8WDMh-pUr=yi8ov- zk$7*F`0Cl#r%MiI(B}kSP|z5T!Nq_ojR}{N*ZyKMy%+~fQ0}QagJBQ*Wd0|sp<@$~ zyeG4;g<>^W$*X@ki-D&E;l9je^z;Th^lX}uH|CUjOVZ=-gb8j%&$`cAFDO?oyD4p5 zgng;!T~mBrMd&#Rvb05Yb8qd<=2UsQw9ynv(MFp&C{%b@vq9lhu2lwXfepzsb z$Gx(%i-q_i%Y(sqHoB=>so!zxsz(!!i$O#!Eo_>leuuK&TQXaS(Z#4bi) z%6NeDBS;dKZ^2^|X6u$9ERdrL@)1Brm{6H=jjCZG2Jcdjxz3)OJue&C` zs^NL|B@SpZ_S>m_uD7wFha)m#K?6~DO7hU2Vg}Q(k;djl-wxeFwA*ZEpN^)*!kcK9 zWARlzuFNMbVY4!G+jGnBe8Zu=7?(Zo;t0b>ODExSd9DcxrH$>Wi(u%Dt6}jbOC(mZ zsZ5mvG6fyGc~gQza&&|t^dk#MS{PAL{V>2fA6hh_y0Za8gfz_pkxC+lM)i;KIsGE2 z$tjWAja26y*g`@2u^h6K*C#wt`uZHw4?ZF*WYTW@Qz{5Xw_CF1w2Y4$0`CdU@6giz zJ;d}^G=ZWhqUQS1#yn!jk~+I!KZyc2d6!q??3-#>TAyN`KLQ=b50Q#U`(MAIH{PfS zrw6P2p5roBp4f^NKQh-5HtLIIFZ0Xk-rA3UR^xC0L!}Pbf?=f+xd50en$$%FUiK+v zIq`oAi~0WFiv`7}8-E`j!xqE(h=@cPmgcCx*LtkVZuX^98e#vYy}A6wC^?XYg9t(B z$`9RN2^l5z1Xig2IphL!2(bM+=-+XG-?<>=$mM9Z0Q&|d7k9M-v^ybQ0iyqXrOJv|DyZiRul4y~;- z^U7VJE>{=Y6D&vW|8HCaGjIo_+vNIn3dlym;2ZPaU5f!4Y-uF`s!}8P<_A3f z51aL$wSvNrh615-Fkc<1R8&^JX=cU*z0IA)A^;Ylq)r94M81cWorUFq5sKCxQ)?WH zT0)#s_Ln@p$64C!I| z{<~vQe{mJofAjb6(9sNtEeam@39Zml`DlAj&u3`XceM|PQkR|P%l$oHM+r_#j2#o@ zRtQ&+^8>DA|J;Y~d0g|}dboFZ$O^6CkauOF$@x<(;{_BnXHwZuG{z8J5(Xi(NdKIO zhzKMRY+zaUAY&ta-rv4`6Bspt#ez*thihm!DzJV#Lz@0PHsKAh{KloFJppR=KbaFj zr+!oU?DF{Hug3t+5LXVaX9rc)9B_Fy8{p0PAK721^s!KE?CdsUg88psqkgRDf(>Q{FY~rrmNokcrZ;3!RbEwa<;`}fs95G)n>+bf6o%4 z3~Y@t!g~l|Mrv(4+K2Dp7J!f9e{d+D{fk1;NaL@xsvWKh;y2KS@d=5sp%Q#5#&?3x zG*|6#NQ?U7LpTEGhTPYLiH#e$-IR!^&H^F*%xD*_MTR>Ls%zYxl*TKROs|jsZ@`t+ z%GeHhS$riy;P=KC#A*G5>0bvx61__QlQ;LCR%1lc@!&~AeqX|vke4vCJzgWx#*P73zv?X$?~_9DL=Lj`*uh4PoC2`3-aVk^m; zI{Q>p!3+k-&zUZQsLO5kVpwnn(b*Z;X1b2tqIdHVO7IM1q+y?Z@Tnc8pZA31xAT{> z(F1akze0V%=~?I|M8_t@Y(|-CMX>N&EMv`5QmGhgHk^%(cqxZ~!exboi@zaPOm&w-0U)ig`QP0ngmpZ-`#!2-F>-(kjq zwh(@Y8(>dP#<@r0BBC`NMaX!zHl#K+aUMwiD8Pf}EqFRCibDB(6@iCmciILRwl{m( zL7$Tn{F80(XXxC0$o?LQ$V$YMURuXO?KK?_lKAie14Vc?a2isAA11I6cSe(942>b* zjc)U5uD`3}0jBJY_SaR2Ww)ysXK%CgrDn2a+}T7dy~!QOeKA}LNMdpP;ZNo6q#BmC zGtYyzTFgoxq4BGKWTCfBn($ZI%f`QFTuFi4mOR=5a>ldr-ObK?QFw07zfY$xXsFe- zu#AVo9rnY*yWq@L((d>-@w-@H{G433iCGD{hj{NbX3Gr!1qoaPQUY^={a$OgJ6>hS zYI+q3HjiMWX#?GdlI$O%`w?V-fudTk56a$>jN~pj{GZH&&WB;);c_G)0-exvD&z+r zb}D_RX7Xx>FIXD9M=S*wK1m-_fC7Be%AeHAVZLm12D%Es(Qww(0BsV0W^TZzJ`jbx zrY7pa1bhn%z9XUW4!*c`bJ-8Ef$f4;10v`vPbkjOVXENSgj(rh=Y3IEA(~NEdsy=6 z?AD3~@ChnJXaPN{1#Il_S&6{tXaX<{8c%uOPQCs(I5^lUzVnyCIziCuZKUy{as}w> z_HUJC)y=cgm#L|#j&=nS1VvYL8K7BJR`%T2S5nchUo#c&z;9`yPMNK4@O>{7Jl@vY z*d=Iw=l$akpC$rD06rO}g>>&Di0vQf#LYaIEM8M<8675DtPR~;*o~%h=MJ}Iz5-Db z5^Py@^`K34b+v+~X2MrDAU?{ix@n<>^ByE`Xlh0yUlRfI*6j@V*q1q&@xT<00}&Z% zm`D22+1Yau1K$5ux6xtkio9*}!K2U}WOn+iZpe!nOxa6pp?z4m@TLY79|i1kfLESc zT+H{Pht5>cypc)^zV3vBf-j+Afuj3;3$f0D`nCxlIxzp$k*G7mhk~|YGeGc(GQfIf zig$s>gxtr=?BnB$U87ETkGt*6ZA+fNFA|rB&S3Z~M_lKx`z7OS9308D9a{0AiasnM zTVw$P9(>y1c1FQ}Zq%*+j0t5|m4~%?FzXzZDsp$F&A+pqCg*=YWoo?Eo#4q+Bx zl>XXbCMS!4faZ_41&7eL3vPqHv&td`OsV%vD}sS{0+M{Cq20VXTuJ<=u-lUM69y^T zlamu%T-;{(00YVYY7nUPcKO?odk%Gm1K~q!V6Wf=4A&!b`sCX;b(jyEs#*|lkpTkW zc>@SWq+K2ieZGIXMH0t>xYUpX#V2%A>Q$>qc7#*rDyHRKCAFyd|f{gf!s;qu(pGr?-A3RxXg<1lbTj8s?$gQFAq#6qex*n&Kl zi>sj~!URgp733TE*oZuQ(}$}8SzlqZ4dXmz>%GQD&(cC6Am*ES`mTQk2=!JD$zw=`?Frvop_(lXG|#oX>&{CJJdUb1l07PV8+}jHgC*P%n#GAj*I8nN`=gRX{z zWIq(D>4-j@{4`t9DE|6cxXXJII9GMCKZgc2*=9!_myXZ|mH$_3=N=DrzW?!2N{LYI zidB}S6_w4UM5J@_FB0ujeUM@ZxJilGp4!Q5o%2B+#M0m+vH+v!_RBEEy{D6 zeUBR#t~(m!c6+0LEgqXA8r5QzZ0#)A2{Uz+*AWo7A+73t$DSeE6GRrpif5^6#oe_V zC^VbMnFrZ=efT~CEghyc+!8;d7aJC+L)?8RrYk@7>eYR*5}|tW%pk3= z=hS8u>f}?oYl+Knml>z*}>gs`Mk!iAllfEp62$=OK10y+SB<^_^*?!Ih2sf zAyv=pqdN}r5(P(tN!4WT(G=cLV(j2lU-Vl$E%NI*Y)9jZJO2|A092jmfy zelHgF=2Pov}F zOrTIc>c(S+zo{4=_9D$c1jsrF#=?o71uCwR6UXDv+yN2e_;;3;mfAjK(qPSGoKx-^ zw0WPRAI4E8BGXc46h7)7EQgTMRAZLX-P?N>*@bPXL*lBP-QC?+_rLPF`=$alNH1D& zTM6Pb&qnQ*efhVLw8Lu9(jKubv#H(WzsvTwVO@Q#at?8q17WZJq_^&8dy8lsOk+nO7cgg5boeD^XLbtXXp&OufCqzx*Nw4j7OhaZc7GlTPYD)#U#$=JR~UH36g{W=G)hQtw~_=8V<=;kQ9J6rGi&Bs_n< zB5;eHgU~IZ{@YF}TE#D?Ad%k||As=l8+Nb;dgu#k%{vwRF!6V-_|~}imjkN-`wI*T zqB@8O3lj|rgiPBZ|9SMm5wJ(37Uyxwa;#N%277rGz2QQdWo&3jAqq)M!=zw5Y`r@K z-P)RcMTUjWKi~tOuNwP!fBMnSrT!ZiEt(Eq2;B?3L@s9j%Joq30Vnm?<{B`M8n(%* zm_ta{)YK%oIo@%XAt=3p7gh2G1_s}FvVlgzN44HfvqDV^+gjPt3-KH89cu!UrS%)w zVKXE=MN%kJQl~_ml84Ogtz`aG^v~$BK<6K)i`7m@Ok}u|L7t3 z7Sa+cJ4W(U;za@7kX=(FOw7GV5}Uxg-I+mvO(^WoiAc1yKLv#ACrDZ;hM22CIY*|3 zTArQDiOmBL5^a*HGuz>qemQCz^3y~|RneIHob+*TBX#a7t7M3XjkcGn2d+rEcu@!m z37n?BZ)4G`S-5s;$R%WkFOJ@L(a@JaimYW?ABIBad3mZ>3Acrj{AT__+XH))uJa*K zYULE}UNjrkd{AkJY{+@l@%A^1+8%D0l~RE~-$L)FlbHWJ`$*_885JPryXns^d&eG& zd6JM%SQE;@*(>~KV}dRlzA2;+-!GuiF8%0jX8fLW373$R{y^Wi{yI7=k#ws2I=2<~ z0QA6YGOk5v;a6TVF}45aFlCak{I3}1t+?|d%$FHckTOQpFbHADg%RqR92#eP9_#}R zO-&&RY&uJ9$+9uo04foKVH18@xLCt7s$uud)>2>oVuAo%u#M%ZH;W&l$HzxzXS=XG z%hz>}$W%^-#*BTLpiPFwY&Kxq?%}9JDPpTVuU-kK-0IzLdhe38@vOHtpLZH4{`92bFU63+Si3DzR38RkTeW1el*Z$%Z3 z=MK02(;LgYVy=`z9b=R=w~%B96R;B1jCw*Cb@$islUdHML9MNy6E zxO{mdM2*m7i=djhksc%pDwoF-=}F-a`gK@@fTlirH)xDHxa?LWRvaQP0_n-MIcp*M zV01vDI0eVd_(iH5S2MmB8n>Vn^E$7tLw_y!<3O(0itV~#v}y9u21i zM(5lHxs1#-HnfY$xvgZX?3bkzCiXLH4S2ceg|eYSw$Gi0#=#J9RHA6WH#!7GLEuou zxTLJC9hGZ{Y6uw{MDKDmo?6hj_05U=L}gXzE*Bg zhC;gO)%P6RhJ;2ZD0**GW22%}OA3$hcPZ6VBvCW~$qm^H9lo$;YA`TlwWzsT4!N}6 z(BK443_D5DCLqPI%gxnQ{_)3XN(~IhNgbRfu9Hg~FM*q>62)Y(SQ8*s_JwepxQ@9| zalGAJN3av00Y$bRwyFW%wMfjmU-^8N?LF1PpTHMt1Z~zGox$wSJN%{tm2=~WcN=WD ztk#d6zT6&YP7o)Pa`IAnz<9!K#S9qg(g~R;YI$vK9u%ZYxnEazz0)8fbg`OmOg6R$ zle{zV2oD<@O$-fbjd*2RRq)Ex&MP`nD8)7dp*s4kG9qy0*rrOWh?9$*4-}TiU_GPwBTfKR%>qQ*^YV?= zY&NeVI3V|tTCz$Mfm9NH8-%gR9oz&Qq?FO zl#_6TB1~*})4_=!OgqFGXxgJh*DjnpXO5)2d^}hOt@Tx%*EA~9xMlo!wuj#pP-$UByxk6r1h8}`uMbW#;Xt5YxNxy7#s_Vbg?j!2_t63- zm~_$4vi+M{_{%YR<2PYE4PhbF!=`3t3!tir1c%q^81^}AQtsM1zfV(XuvJ1SK0G>l zZd)b|Ls+Z?#Qv_bOceft_Lue!V12L0XJ)YTK(NIpI!hHCO1-wivwBA*vAzcL^k}B*1b^4E zu`MQ84qu=8(BU}Nv)N$4n{&*S?}aYI;lRLBsOHtuK@~tYR^i<&merQ|PA|Vi z-QV|AOWq#I$q!LXl{gtq6S5Gr>B5q87~?D>d@&w4|CsXm4N4b?oR0T~V&yRIVq*q$ zWd{yKL+gLL^!U?tZc$-k_MCZ!Q$3OLTBpUnkmXRkPp)#L?mH%6&#vz7Yc_ax?gPrR zWYuOmvnOMVWy=iyVv51%Nm|M`zGSZq2#N4tj1R@Ks~d%siJ6#~r~{(*2yUg+@plcM zmKu$`NZBO_KM(#jlKpv5`qzkMsVyZe1o(4#iwginDAt%kJrK&A18Vg$Oq}NSvQjcC z2HltG{jvCOZ=`%^6LpU&V~Ndnqlh@j)N{HZu>+;N$quqJp(L^Je1{5I_^coaI|ChM z=;y(W`D53Z%;CqH-22GT;Mx>)b{LH8RzA>urdZn{ZtB4d51dTfcVDn>-TTnI1N>Nm zKP`fHDOKN9F>gJ>YRe&#UgEyeApAd&8wJ37X#FYQ;|T^B>xHve+JyXzs46b9SguKQ zD>S}!pqGra9XO$&sw#?`mj-!7n*q@u5w%`*_#(m^F^QAA0Ds2Qe(xog+PNd9Zvoml zyWY8u?sNB`mZs+4>|h)~#886bTT=P?`L*!eKKTp{?-Cb{ zlwXsQlJm5JX-W>i1Ho zV+Q>IR~eLZb{+tHq6tc&kwcG$W8h@pjbiXCuju-TLQplA5+k>132qWLymTZF@5J*C zP|8+gtpE@aGr=uk1{e;;LjghHJ|MqOfN2aVa8|qiZpFNBaQ3ZF;8u|ati{d)>whVp zg+aS?BHaVHKl0!NNM8t57q}Q%${}Dp$&mN`t9rmle*lPQ1dL#3ELEuY5|C2xd-|o79$P=fw*uLv}?ES&_10KCX`PtaQs9@Wnv;PJ2%xmQU diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_by_neighborhood.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_by_neighborhood.png index 4cc2a85c7ab98f436dce590ed10cd451d76ab61d..45c1135ed7dad317826c7a8ec9efdc67ba53c71d 100644 GIT binary patch literal 27162 zcmd442UM2nmL-hYQh_on2&jOHk_<=&%LE@NNJgRvsANTQw8|372!5z25)4QVf+Wcn zg$M?6mL#HNkSqxN`%t%gdQDHS`Fp0;A~=V&T`zu;k>n-gSEx!%}(a_ z7cJ~;_X>*(iwSN%=iqSuqLhe;&A)zuu$}!`k#C`)C-EUaomV`0k%eWI1N}3v`3#Q> z3yXy>^D7=YEX~=r<-u3>&?cU@wjBIFeVB4&`|9uB|KR(( zcOUwVq~D+3;IjokpM_oTB*kNWL-gy-KP(`1u{>wC)@CK4VTIX7M?EW z@&55`LyVhDu#nZ!rZX2VT;P!N$hfvfU@JR2`$@rdLMM~?a^98<-r|HKA|2^0 zU=hIx*tU#~P21Yq+AZ+6*$wkW{a@jWv%DYdF<1W(EUESMDC_%&;vzY3-39m9A9%j_ zi~Fprt84VR@+aMuk$x({61SWAb=U6HR2;CM32647^~@Qq(Jy%@p17E8Q&d}lZ>(<8X^TB}d#B%w8xPj#+jYIU z%{$ro)UP6I&5xpt2Srbf+nD_`Zn@3_MnZ;}npQP&eKJr z6+Aq$BaT0ecyi?WP8k`QoVTv$@D>|~9KE`$1UlZ_zC3t+O;=TPfOeejW7pv?(eC5@ zU50Ekyb_BzTn<*pozy72wl=_ILqoFpb-t;dlV9IIjJ&pX*UpPy-Z`~Re0l$nWAB+8 zcsYI^9v<@#!6)zT(y96JeK_R9_Wk>{?eeD6e!G0Rqpj`n!7IzUMr)0Ajvc#EIm0m2 z$ijv>_BCkz^2;wngM%jy9Jmyt9Ul{|5veqpS6Ru|mEPj@Li+v03=hkGyXLriLi#(M z`#;({JL~*9e^J$ov#KFdjwKd`-ZS4Lad{#IPbR;qt<}MTSG>N-8K74f{prh>2yR(7 zMP^g#<(kTurNNR<``=Q1$+G1h{3`0-zO$^=%-m*)+aw>+az(SHuCtx{kUtBed#U1D(w&La&H%& zR%^-i)R1YxN<>>!M(E{v<>Y?<@=&u{#(T!&$dMz(rKPH&p`mV*U6HhVk%`rRsl1T) zoJ`4d=z4PL5(`}&&1m%?S$C)8T8-wH7Zsi=_@x=v>OML!FK4X5ieZ%Hg#B{tEPJ&- zG1>g>Hn)?94j%lCHg>4v?Jn%b(6?`D--kNH9J@97`S|KakN4}n`kb(Sn%?ob$szO7 z(55b{i&JBLmUYh`|M>Cab6;Qln>UI!b8h5$>tp$9nbKnc!suLjmBfO<14h>C_iv@Y24x zs7Rr$txYe>DW0zjo58W~od5g<3+N1#m7NeW$Pg`ejrbz=u)+Lzuh`VrFz*=SJTJE| z56<4fySuu%#VFq1${~8{lCWXsXVDOMRZgQ9XKu#mB-*<3;8`D!_0|V;4mG`#x$E$yB4{~Ve5vJXc<}A(XPm_Z8_?f zG+UR74ldlg;t+FF?z`OCnJK+g%ex%nwsBEWJMfK1m=52E%&Vi+4&ehd5Rj`n2S4~Pvn(Td1nsV{$2hHkC>-s&oFiN|3 z@5b_mHl$cQeH$oZRdkCBKPlMArESsOomxr-zU;LKyK6UWI5)oNQ}gqo`m>INcGapUeU0g{h<`C^ zPmjjj;}y7S<9B_%NR0Y3rIKLD7onbGCtqe{?9fRtV5sy2sLGWu7s-Bf?7^OBWTBnv z;eSw4iq}uK#NBl=uF%MmV-Dl;St2x9HfO)Y<0s(rdvFuWgT$;-@Y7{St2CJ5>y2D~ z!Qm~tv2pL3jT^f}L%J#>m6>>qyPsT@D>5#$r(g|Oxn*>Z2kpE4@S%Qky8}W}nD@-f zp|7Qh>xA^);MzXiZ&Sf(m;D&OeG-SB8EaL+Tn1xq7)CFL4zjvF?(xoUIkll$Wuy2Zn`((e1~g_w*#@X=ge0 zIShSfuyX8sli-a&zR#{%X6=p>2b!{+DT{w9yk4E1)!N(kT;(GQMK2q$=yA4MU zAC9uBO}KXJR+_YWx?_*_O-^wY#9{66{-)7^yxDH7i`x4ju~g3~r$(zonNwaG&y?0q zBkSxm&dr`2A4o((^qd~8jnRm_6`>Lm6DsR2S7q-t(dO&caaZT_*RPScxTH(46!J3@ zPyZ|^xGQE=qbZa%`qoXo_P&0gfbu;-%gTtG!baIqixWh420lA;61Pqe|4H}&=Tj?M{h4*;%XFV5)lZHrr_TTy(Rp#^{hFVD-pl}u z%F~Qg;WEs2QFH96seVx^7mMWqNIJZ7sUU9GT!kD=`NpOt*AU^|E2^XaBILFSM@di$hvR6 zuz~2Z^}^PQd$4VN5$5qGI9q9H!(U4|anyYtK79BMYbaIa_+#RNuX(O_rZ$q!-j5$Y z-ar&;|Lf{%rvmLnBb$tjj1Bn47pd6w4XU!EceWJkCYxO~oc(F_wz`^uoY8HCnI4Ef z=~jN%ukW?2iZW5$gA`+#F37yW#wE1@IF_yQg>3+@!mYXnr-5dhjLb|9WU0MCQXQfn za(Z^9_V5bOU5;&lL(JV5J{{H%!_U~jys=X&Mj}2780j^DZ=cV}^xEHlTHV_alJlCL zOL32p=crcJ7y=Pze}BJ9y1SWK_?F@w8WA@xwEekAe+sAUCRU)Ju&_`9iN7G5e`CB} z>Yh*S?KXYB>^%FiduK9UzTAkbpPX9yP`uRN?2fQuLxxjdP?Jo_Js~T_JzqYA^gIv6 z5iyf6jsI>`Gb&Pif4@yofy6i)?m=mv503YHf3xhtKbJf8$-0e*@@=I5U$ttLb>GzZ zfVhXJC#$owb5`y2Wx+^n)NLXC{aIaq6%}!o%1v2683?%=t(){@OL1GOwRwmU0Bv7d z<9cHctUNv3KFg{%{W(Jj^WuDxy_cgs-^;y8)=RISPOQgi<^VpspqmTO%E7#mWLhK@ zFQyT3>^0C?pASN8UDsJ9b>M)T`S}NY$CkK?xR;r~oE5X~=$)?HIhUZ7?(8rLmiKP9 z)ip2(Ub$(LwTp|3gvVHjjBCXk)C9Msoo(B(x+*;{10D13ncdj1`%l}tlDoTD8Aqol zNBbI(lx(c>#)rQGs2@Fb>JiRUo|k9x+jEYyX;~iMlTDIzZ{Rhgn%KtsmDh_H?E?t9 zu}RW?1E6SHPx{5LDg{MFMLP^LQ!2lY4YaiQn+fWtz3V!^Idy6NfyHO1^EpvA?e>^B zZ`xV*_~`X@LN}10HZpGn8hh2()J_&s*kP-@dh6DHWPh_eyL3Kud8FC3*edQ>%*tVA z6{{6{Z%gq+N1*YhN&P&Kyi_(yyrU7v9^62z zBUa>uOGO0B_t+(oqryu%jV%oUt{eK1{6#%S~FIKoR5pD|o-L7{q7_6jWDNXE<~*D>E;Ceq(Y+ z#JD+OdZtzvpuVl6!`$qBP<2;kcXN(A)7VQVUJut0si$kCXra}836F{A1rjy2wK>M; z0|QO>(?9!2?&Jd?$+7VY1h;50{2c#Elf_RHZ<>T4MhkRG8UmvKss8z;wXoIb-?Nzq) znGBcNo^}8TpV-*gEycDi>C2Wa+wC~~$&4*T})+f}2M)$7-9XE@bfL^3zFM}%}!I%UbNkz!G~jiHW9q9LPl zIRS zzJVpK@8wF&ZvyI!bXBIFdNrRA7IN{mrqOC_uY;74rEz=o=;i@)_Dn&)%P%~OGLHdl zUz~htm6P}Y8dlHKbZexePu+jrVor%(PiFEa6?l8*EXcFylS9EX~Sx~#| z<8_u5GFxuUdV5+Zef|*A__*)SE6bVvi`&9&voB0fIyTNGq7*~~mCjjQSueBdA_@(L z8=v1~l5mmxKu(~Ue2=|uqPI$KP-B%~QH~EgPiW`OgWOKs(I<_g4S57^f6Poy&|na3 zYe=_CL@8{6B!BJNwF>7J&q!1WQHYX5BO}MYc?^sXy%n)S!A+D-6|!xlCu%;v0#S#V zAFhplhuOEjtYz5OT1#j7l(|ix@$m4t7WvhhOMm>(S?BkTj*ePjp&Wrt_R~rDGz{3}gizYp z!afw_dFO2dK$(zHwl3{G0MuQi8dnz=Mdr3+_x(FrGt{RzROA0nS+1LylOKJ^|V> z`}?2fKLzbLbGbk7r+^V}X{A}jcUnDObnf`3b~)|W8W#(BN074hnIrngp^=1g^g zy;84gDJX-}L8EsYMsKewj*rE2HL~tHnH(OY z8U3X2`g$u=Bzie+sWDoy5s_hDlaH;s${t@<3KWk2KK#`Rd77;NM?4NZM_+S}J}`zy z>-ROgw2uMTPfiWwf$tLYZS$LS90DNSHW{_H*o!Mr*iaA4GC4io!ZnFXf8d&*UwYQX zn9+UB?L~glBYAG4F@2f^MRU9bav)y0ZJ$Kk$rneNz1=E(_S;hJc;r0lx`wtFPCk=K zDoT>sNoQK~t=nMhFAQ~*=$rbl&43uFyOr|H7xj?rn`B*8E|uP4EPqk`l&U><(bPpH z^J3-^aG+AIL*WIyy?kG%PN~23#wTR$)d&_i@#qrbP$YO{HIyHE>9!$4*>dkP>p*Dm z)i=H5blI@*R6gJQ`SYt145e+FGPQ{Js}x}&Hu5&Fg0;7P*aHdcI?*hR1tLb-=f*R_ zKRa|+F;Fc8Njb(;illE>34T5$t#E78RjOf7Df;pq^Gy?aB^CW&*S9gPKXS3 zQ_7p2$wD1yS?fIXxtQHydZ2ym&*dU-s;lRIA{ZMU-?s6D#Hs|pJ99uQRKUVeM@>yF zt;c&HQ4)kC(z(CMhG?%5wbO#@uD|x<5h)PeXZGgjNKYfzj1J~4fT_o3~)%^T6IXR=moUx!Lf$Mnaaxgx8IyyR8R#T|qcQQEk znEhefoNZy1=QR$EairOjfd>Q9{gC~I-!|(pR-=E*yK2*`f^C&pUsK|ZRB2(JA zzY2AjM#`pfOAt?BXh**l@mPU9RGG5=m{Hz8I(?Cgh4pfrdKqGA|? z-&0h&=VpF*9&+lWKR`pQD(7qMvC2?pOgb;b@++67d2 zVwB?+jdh@9hT`&M`Fi1VZ|?~W0cuXWg;A72BG*P#sm^x2*qEu8boxs2+0q07jR*;@ zNIaC=_l`hEkU~I48Yt0vc~7pdXnL5s?_AmDauI*SET?J|xY79X9U%WfS7_n29KU@! z!X;^cm@W(POIRX)$uLx8-KbQ`iS%+EK6Ay_w+Ap~5_CuPqhA()-uTPuT5G{h#5&^J zQ76Tygh-`*WdXc7+5Y;b#`*Kl(iB7uJyb^3C>^4}nZ=qFGbyNQ7}()JRp)toPfxV_q4=2M zHC#Wg+-EhWCPYV?V4f!qFBmtS!-s$W5GorBZi$#~b(|BP9{cV5seok3f?!7h#;8GJ?iIg(f4s$ODjpX+)`yi7 z0*k>jrMpag%q<@I1RUGW!L|a*mj%b4a$os%!4je@^)elzLtVdY6gyXTfLI|QFyI); zoXPH3C-IjK7cXk$c}^t0@<@uC&Ai%-C3aG8-U6ic)TVa7wi=^J$?EH zqC3ZxWo!3*Y-*yyo+{(lAV4&JY~YK<>Dq)UtJg=ctM$F6ZE(LpPz~Y>=P*@uDC*-L}cHDqg?# z4Jc!(1)rj@c#UOOPx%lmWURy^^vRm5X;*ZStu;gq7ZA^^-rn8?@87?F17$}o-PX_) zY#o~?=;Pp*<14q|O6RCWtMeB$Ha5Nq5{vx?YMcYbfwH|vkE!4)_s@r%dbJO4d=YpoDCVD;GMWIMPDYvqYU~#)pHQcBI{Ev8pa-{8c94iJ?Mi8m* zh#do&+bJp-?J1emA$}v=m^a$>J;%}Rdtym8PGGazudFu`1)IYP3Hj;ywm)C{Z#i^> zQ`}a<@YZK;t~(#yKTb3CbbM}Sjwe?MUpLwC8hL|M!i{ora%rWt?pXTiapU%vUqkm2 zv*hCD#sM~cFG*%*JeTj>d2zOsgA(jY4h~BfS6BMBql$`ulq1-hVsTZ%R(j_xam{{! zPbRw3x!JX7DJ!cOB$S))9_;1Bk>3p@{?$PW(CRf*-5WUWoZthF(iukTlr)S4@Y-|q z)+UR?M~-X+rzb{7_~XlqoTwXLXS)su%W;t0d^AkX^9C&>*;jgzj_FLaS>c<^V z#4e~|Xsp};tepF!uqunci-$cAB=!paa$t%DAzC%#(7?xh@ zeCs8WLVy2#;qfrJkU^{Y$fLDq!#pEEj7oTZ{`pc0&hKMY`RO}@07Sg`ESHw7TeBv7 z^>!6{6?hVUZvd0r%rAQf0mBl+iIm$&RDxmFHn8#(o=`WWH1WfrA@g!+KDjYiG+HJ9 z@h_WM7SL}T`;}z{{VKX>z9{{gzs6;S`eG-Pr_+d(I{{I_8sM$y&lE3$Fp~26kpY6b zI!tJdeDled~sHqHva*vpIew~{i%wZx${sg{I{;uV<#gdS1-ZNsY3CU8xA&H4vgg#i}cK^H#STK+dgZl<>hXs%U zg5?E00<{C$uX)Zg0>_?n(WtgD~yYiqkb^l9#`RBx5^WEfd*Zgjj%by#&#FzjfVah7>9Tq(rVi6ZsY` zTv#{V*UQlgc`)|qO^%_VA$34xyj;}b1rrFjnyHprWLzL|4!Sv#SV&1Ky#|X*RmJqu zLBydoTwFq+Td@4lKN+=&#yZG;eUoG@$@H8`11Q*n_zsTD5FBt?&&8e^Au*cn$pM;0k$vfA=B0E1T!ncMqN-W<_J= z+{T-n9f!Xhr`Q8ojiLh>S#>cxdcORp0a$2=UryfcE-sxAzcc`}`x;b(B_3a1!d}wS z(o*9zF=zs=$~Z3p?`{F1a}vb5CK&3*=RjF8v`bnscfbukLaiQyTa3MCLg8hO=1lcx zNqNs?6U>EeN0EslBYf+TYjvYps9eZ@<2cZ4xcAJv=c&#pQc0OY*tV(l%EL}}0SG$T zEMez9mb{-H^+3rpcNP>kep)CK7!@GUNAVmn@X!zgMzXek9%SUYD=qG%+dFnwmBZIG z3Vm!RR47{)yjSXl_MK{ZL{8C{Q5| zykFOV^?61x7?KY$HK}Jy?l1sss}SOt2JGN)%aOhUdgp?t%lR(uOS?3qPC2iRgP|!3 zxUw5%cGL0aSb;m8`$z)@J7$GM#@1FFCub~*-gDnR6;2KouvaIWE0b~w z9>iIy8TAAuBi}L9TzCRU&1ReuhP~VScr74#Voq;zNoyN>d^^JQ+f9}qLh(3kYlZYQ0c^^V z%f&n<27O~1-8AAPFMinp<%z8zUVK#P7y?BB>r47MEPg6J%kwLmiZ(Hv6Jt{sw}z8M zj15qOwsDtVc^!_P*pcBRZdWJ^cKi~ zs!P~5m9@UQ+|kn$3pqW)MbC3;tQsId42%jz8c+mx`En*cUC#vF5F~DwfG{Zlu7h*G zb(xN_=QE11&>hMxa#3Ie?LB*o3{$uu_1u?1*hisEY^*5-r3_9`3yh6s#IcRCC})Sp z$7}Itu?V%%z$ih$77G&i=_&IIl*UM07q4zg4C`6F^k)Eavmf*ZK1BBB~9qX*s92-{8Dj>6>xl!iOvc1EZn z-zQMbc^Cgde>RXa9(wxpX~<4;TJd^f#r~}HI4(Z~eO_F=e0xhTI5!g$lak6&)0b0A zBAg}ZPiLOqx@B{}nV+vD=sJ|)yPX=|>N^Vz7tC#Je*1s3`;(sVKX#zb<@@FsovmvD zI{t<&`KxaNUeW(F{zG8&FA(bg{tM0@ADHi--N5r>oAd0`E<-ZPUYd=1YRVbm{FB9R z|HKFR&%cVs!dBEpC8v|lA@vuJ^}%R?48!eyoYwAA%4pq!L6E4RFNmH1SoB42bO_MC19?&k95s_@9fXDDM4zGJfYIXf{>F;1KNT+%I`s=TxV}aD<{s*#Ay|+(#Q6}`x$9jEYUuX)MHn=gN#`Q-4EEZkBKQTQ`4Fe-dgO|T698@nYg zJxOf9!Nng6Dl0W$ajXTwNV#6${d6sh2>dL#D^OxIb#?DkG2@U)QWpZi=boNjz~u;) z3{sM$-XoF|;3^4~%|d~th0?$i>ASdwjh4eDyF!p8^NoJq_i3EtJ?_NZca3 zP_)K&T0trBCrHwtMc)(Ff#TKh;;bJ@jHs}TQJxUEd(y^1PvgJ8qLdmfZKxs~PI>;I4&YL8$9*fhu{_&(BZC!679)OThvUs6j+F3bZ?c zfjZ38YqJZkQx<5Q^U_)L2!C#`25`5?_$0rH{RUw~;~~ z6hv}c>rVshxb>yhf|nUS+#?AA_CuJwU9Hn#>npy%Ie6%AZ!x(c96>V{aXo0asgFvS zT%Ra1atAY9$H=uX!IMPQaO_8Y>HxCD%*+fTwGI)N&F%x_mn)X*?*z5ikn5R6PLbt# zOwBAOo3$cF9}@=f3LT)4XQv0e)Ap}lzaGYd7V@>;*bCq#>4Vtm~s9W zazFu31vf8hB=6m_<;!18)#Cm60_SiA9(HC^E5&j$f4B(^0u_Az^yvu-=BzOk({BND zA#uL2Vqdnb0zhm8B`keTNuWp=UR*>~wUp25xk@A~pKJx8ZruXDxvoQ>Z-YKFoSGKn zmT@`ZW72t3y!rVR`<^@iCuL@nWAS)TQ&u`cx%s#qDoKX$95&zFQ8BXSTvxwZD=Z>b zuWxMRka2llDYx!A|B*^^#=>c1?X457e#E@wPC2CQQIPhsOgD{^$=)^sjKHKSQy+PP z2Ix>+x^$^Xos-VfX0G|q9maPIf;o+bfLWU$D=K;doB~zEpjwnL2tS*KK zfIKD969ktPYyq_Im0mSnn8VX@oW%E%Ypx8Gt^?F1;MHR-p5q}P zn|2h>IP?J~>Eeb!?fiXaJa0A{!GoM3oClERYt;C`IH7jZyfpR%4KqYS1S{|N6GCVihpFRl3O8IP!u8ZQhL6}0BicS^&+3Fd3vJO z@)w{7!<|>EKH*os7X?a*gfbxaA>A)YFyun55Kyi4>*x;~87-x67<_c=*6|tp%8}FW zkAOE%4w3!wsG@J_V8Cbmd?#;sk*KJJRG;uh#{`_Fukf8o6T)9>1LdvPn$Yw_u%H$VYoL)idH z$`OZ=!NiG9I2-Ev`f({#dN49`7HM$casl9BK<1g%LQ=Cw(@)3yhdKy)e&^;649hIP zU&qq(e*>V0W?{JMC}Wkug(O)a-Q__1OErJH4$(()1>Go84{GJ0NTRQ$i!>)x$lUJ_ z(%EB~(co5Q5k{p>Ng3q-&T={2e=sR!2z9*;pd=E%hPtcZs8=AYS%_OVWjc~Zm26gg z+A8ZpyAQd7krt0Yu&qJD;pzMH@6TP0&;18J3Xoa0Y30f-UAC>M#A?h;_K3(%^}g6? zknz0fIb5q&voq6a-{OFQ$gvPjsv8K9QCN4okxSTyU+4-_vAj;$APVdB7PbUXCp92^ z_!*Uvsyet(go?1JWJ@sNcqneC4c+h#gd_9_?Ao)ZBH<_l2GJlwb%13smh9NP`2g~4 z4K$>?Kd#MXQkngJ{QnB0hISzEU7@N6XWHP+d$R@ z@9!P!@zTdhBZXQe<~DMUB0dEUe2+;_E%;{QX`r2#L5A3OzV!fru?0MB(fU_uwJ#}@ zj9xhIQuo|s^3!#Zk^Za!%TIq@Rha_5JwncH*jY%shW|D8#N&h1Wdjen*y+EP>Kj{~ z$Q5S^lXg~t)v=~3T7#bf@0=#0ni$>=Vo`~S7rC|IC|tW{4Icw8*oR~*2PbAhyQUG_ z4*UxXJeFP)_U}C_4q@rlH8t;`n+fQ0D+5(&Bp9<%=$wtV?^0p+2lSb(L`1ZEI1rnd z8`L{VHn@L9VP-{h+Y$cG)P05eKfK*kx<;yoIW=tGy7kX~y%xO4F6>1`(|HfTX>>p2 z?~kJwgGtrq2cG{8$PFyk^Hh+DM|Ww*mEmx1`T~dfas7$A;h^S}m;k>f&W1^;(^zn3 zIQ1?-{I+aNix)P?cm(W)I-&;~6m#-LgxBn}b7NEF^p79VZNKm(dOMx@w@6I$_}@q@ zqiFJ9kr-n{uhl(q!TuG8zyX{<7Y84Mu>R<1H4OBQQ)4#Mfa7eD^4R&nm=lkGqaO-l zts5-EPAH4$lu13`cF_0xT0z~i{((8TyKK#l)|4iWC-Gad!4m*+rca4OhJsj8*MCOV z_M-%a0C4Chpa$-vt@T$?0>s8C7tZnq1<7!dp`&u3L?%t+(X%XtUqLq|l0^{6m#rWS zYA88S;GogK*>r${-MI1;@F@wd^qas$c&fhngTQJjxDE{U+gp!pijc)A8SN}DNDuW*Q{E#wRmQ- z1vGLAG-32(U0C`YPhLsgKoGlX+oA?8F9o;DN8btVu6V2z7lEh{9PS>3uAOiyfjkc3 zTQH|@w0?7o0~&NLFvSHR$OdBdwTTNU*kSUC=#nr1c)D}_?d;t(Fvp;9wCJjMR&txy zmtbk*b38|kWp#{YQ)ViRXcpskFmf=%;nI+2nxiu*4xSG&uOBXkd0tH>8KYmcU>{A> zcpQ%yMRG`_fT4a`b2e_X9NE zh*2LN*!>!~prcgfzLTS%26(Ir1e=2Ta(bH9SK$m6wx51d!=i++#exx+ZX311a5K2*2`()$-5uxbcF^a=zr}Ps_nSNDPyZDi?5r_J z!P&>F%2lOQd|o=Yrz~AJ|7Q{FKb_G3oecJ0_X&TqJz9x}!A#91x&AVHJo$D@O8}_d8)EAm*D`-dzOn%gy)e~#1+esd!7fFD}YfaYq!oLjE< zia&Vp;NMIMt~)thcEP@c4qQ0%D^Q%Hq!~UiY=rJ>KvZG8Kj5kbREX}sNjz&$(r$>C z23})p(_upS-G^Y-*n&^fM&C6$RJV}1=47&&8Y(O9C^=gE{DR^W3YS1>yRQ{C&_%!k zD+oI1OlfF5meCTl3i-G}9XyqFA0yEtI%ojq1Zvp2yF=|D_`Px@a`;cg5vNKMTEGWv zXLRFnuSI&oOAJT84zMc+nkf)SG_>aUgVyplqVpyYc*s}@u{p@P<$upaDSgZKU9s^A zI3Ejm_NhOTFB%FKohec&rcs*mGUgn?kz6SLu>ThC?(b%d!PfUbfXtfGIbOs+WyQs1 zBiMX?23CWSHUPbppOKh7bj@%IML3^;R_#w=QjSXaUQLwOHdswCTU@`{T3Tk;g2wW< z4kLG&py0cHV+xjHJhYStKJ7WiGB!GTmk+w1P<<}GGvNC5>)6%a1}Gdtpbznyf`!AW zV>#Y>9~#s(p58_G1)3_utn-lXV6QWDn!P7@lKJU7eI5(*o{7q@_&_a?r@#yeYy3=; zbh%BB+#V+n-+uM2sKOE13WIx)9@!eH)ppmNy>qS(XnQcMOc? zRXOg?Auw2*OrQy3A6mfZG%@#?{JAizjmKl~SH5U?egs0#;%V2eTNeSb_#h5C8AJ3`kBejZ7R1A zj6+A=*56hG?otBJi6&^R%y-^kn88D8geLI3ZIbV(6{DziX|F<2#1H%O8-LEJJkQF? z%6x;geKHqI7h;u@4F+q(l3|?WFI6@A5tkHp7KF?X;l>n&Xnc4yLM(y!vHqr5 zm?o*R!q@7!192lB=a6guLy6WJh@pi_~ zoHx$>q@q8p{&{)y--%kt6)ep3?z@V>BN2Z;(ct0szd=d=%_Z#MnD+^`#Zv96NpMS!G{r~hJ+ zzQ|}Uv9}iAis{luD}DyqcT37+B+d_BY_=Cn9^JQViKF^2g81!D1M zMvymSKqr#C0+}d+24#T9o5X;U)N|!qg>;`EK(mr}l3(vW}G*Hprjyf`%zI#2$853t%=d-)a)!^m)AryB_)0^!Ob<1_#EoOG0exz zjd%kCgA@=dS^KS~$2p{(=4c`mp_Xv|VPZl6RGz>XsksE=S7R!_XcB=R;r0&Ydb2$E zbGx$!a!FL#kW6Vi^>JR5NA<1e7LTfj{&29paUgaNxKYg z-ZIB;bKfrFz64wo19w9w43jF!Ep#~my^3pV$1enaKL1zE0}z~^7*rsI4%V6k!&vB8 zkd=3A-Fgr}g#gQ4fB)8z%r8(pB}2aCh*xKlUk#PCdhXA2CsmUX@bDB-aMUY;D;~@S zn|nMu@nhg!lH}s1uE{hFzjze(VV~8s&Q>xM70=)Qfc+$8snH{RR@%lD-3c^;r*8nK z(*o~F^Hq?3G@&f6DV+ml{$3S)EbPGjFk2Dr1y<*jcrPeRY-S)XCos(B$o5){<$w{E zbmb%#ZMTS#yY{5Tn)e?*3!gXuzMUV|r;q6?tpa0B<;AjL#gz1ws?ZN)*hj@~bFlUO*Gk#v(q%I%B9?gf~ zD9N6CILvCUOnu`bcvRi_%)!it9QSlwv^$YKMfT(D$GjHO1Npr#Pl}eF@KGrEx9}v$ z;BObo0XN?N7c=0T_@O_l6b}CMAH}%KlM4&~`*fi>df);q4a{(`l@S;D=k{7I&h<@@ zANS&U5(^puz4@XccXa75r9TUQ0RvJj?j^ez>-_VJwdXu~P^;r{DIzb5x%^CA6>dLu zu)ysc?kwqYz`AHVPvc;5(f%KHr=;b+*ZHXCZok0!`met_C%&~`yK%##(_-bNy`t!W zVyGk9k~8N+b*stmzyJQ6b#_7BEmgU+wg5kyiI#jPkD(;iFb~y+{T_9^dD18S&>_VII~6+r#S99~X+Q;LW*CFUQh) z;Xi}mHlBcBW8~f^Mz91Cg4hMzkh{(1udX7@MqyAIT6*rn1A+dFC=y*TFe1ID7BvVc zK3{M2ttdy9n2>c*s%byw)L|@6a!(dU?VwldO=F`z6#>d3IpN3$ks2KtI;W@ZkG7x? z;b#CtJ4lwM->t2IgU0j+$RC>5`=_M$322JBR!%F zEeJhJWaXt%Ok~HA_nt8|T6Iym*y$MlkObLq#%h|B11|_FgGyLti9_+V*p|lb$0fc0 z=DiXF^-Ph7FYYU*PB$oZ?LXGNHyH+kG%p0G<0&n_Em~f2zL=Yb4*V^s0dZ)kjz-#} z!9$2S&k!EZkw@f@KY;Mh`=RdFp`-_Mqb5pfGQ7e+q>V5{efB8f>L=Y?T+X3HaB4q` z?~7MHx|M9DcWGLJ5VF|VEC~Ntd@_}NG^wr=&3MEczzRuoZ^9j4W~f45fhZ%CwZA~L z3IYtYGXe0aX=CukSfSc?@lI3Qj!WE<)ukn%9zcYQeElZ?K!L(FfeJnI|a$1q>GWCwOgrRB4BU7PJ_w(MW#}&( zpiY!~NxtvLoWsD{~ENn;96+m|nA@GV6(_x$}&X##}40S>+Z%DE_AblW{< z--gwzP2-I7noT}ZqZIQ7cC(E+Y4jgN;wTJIU@HJ4UeMLk z^#-gWZna8U3#My41y+d$oJvchN*UgLr>Q2iN3UJKJ`#jZV*`LZjSQoM2X(+BWjPB& zpz#(UDEjW4&*2!o z$kJQ-}d*;qNfM@0gi2+(CeS)J$qPg(8V8vIqw| z-N$+{gOc4if3H8zE)536=sg&~D`2d!DF6bF2Hh2fscuf~Xf%V0ppJvDuNQdTjK~5+ zIBge~44M$))7O$m4l^=y6A%z@b~yy+O_#=wRn(OUy77ZaUO>Osw&tuBtlV*YDcxzV z$&R&gFg#L;XfYSKxy!>NW3j|#Nx0y^!?u^%+DM~H(hK}uGf3Jwfw+tGX1BLB_qMBY zQwt$UE^sTRURTw`lWiDB=!!$k zI)Q2Kg4~JE_wjyU{HkC)&vJqqNc9`GSqHWR2l)Rz;fccVz+CDDCNp%%PA`Xvnvho? zGaZ%jGuZmD>uGIoGKkNC$vX-jA3~P#(=(j?GH^KW)Jut?0u1Nsg2o>X(9vw2-Hf1J z(GR4q;X^(v%JB|1Q--IfwoRG8-iwKB@N0F!C-=f?76p3WOh5YnaW@~6Tm{!rGIbr? zGvE4Cai4RI93H~MNzQ}_I^0Jq++(U;IMckQ-Lmo!z+#9>f?=9?ZG@PL=jyu8C?7fD_IhQ1GE_Cr~tQX85l_9lRu zivl@GEhBw;{o&a^hTdWjOgTQUGU{NsCfo`rO=D2%?~s#gPHW*$vr9HUBS_RdZnG9# zV;Bj_TR*d4@d|&wvBDMGUQYKn%i%2~;U3?W1M>u0UG=Zs>deH&0cOa=V5J!-1^c3Y z7U<~x``J-gmm|ZimYx3YMMWt(?MOL9X!x{SXM6(i37w2Xbt+StfvGnY9L=blRRhya z)lR&@%RNULi(#^$4ptZwD<2&Ur+F-E(2RhT70CtdmYkwfW=5wB zItvwJ&s>6ME4&n0KAa*~9aR(`P3&-Aaf)<)kCopjt{>kpR(N9o7mW<` z5MkOz97g~85jb`TV7C%yxDXaH&>CrJ6kKuH3G?;Ut~Xl&wU?A(^3_3wlJYz-f&C%} ziSHQZuIl@kV6d>8h1$+l7hq!cO~cA{g2vE5x+Qlf!Wk*plMqe&(mK#^?*L2(v&K## z<)f=eoyK90MLxmw%QQ}>(QYkYjo}pqWq@UT4C)JiA$5BXnMv5?f1Da$nh#^oNy3_q z12~yG(TtChtsJghjfPxtIV5u#Kj?2&gcrxRSk z*oDAl+tr5&bhPV(+@^5@vdSg(UM`5$o@fUq=TIf*YB`IHpR^aF*Z&9}T8S<>vK$2b zCpfG$`jbuE?w3hUj3Av0ESR}RE=D*&EyT0I5YxG$i67KQgYuHu-~1R|h#l1YPoxj` zv-VjltS#;{3G)wFgDI(NFE{uoR+ zJ5j!(){P?55`s3V2z13U!6$MRzfzQPpm3T+2Tbc^mZaara6!ORBN#Qx!4$!K^U-Xe z_|lTVk4_F3E~vqDMI9uhDS}*k!6`2xQ8(6(u%Hq$Hw*}cig}%7E|=u9K{@KDUo^j$a5$qUcEty(S~V3XnHO>2S{|GHGiIOEK$BS~=2%}qo#GJ1aTUXPnJa((`4o&AMJ9EyBHzRf*a)`N2dTQPug~U=kbVu# z;=7IE9BkD=`ln8v!aO}NK1rjE;NIY+<)S|G!QtV(SmCp{HLp$Fa1USOz)|Zwb!k%z zGv?d8-Dj}%qLQsNx1>E^acMDH=ISFxF>Ycbz}$Yo5F6^INAt{4+{YWZJ(APw$`&#k zeWEFA-XBw+^vc9=WgzL2U5R{B@wkaJJZ>*)Ojb-O;XV5y_$Xr9O#t}4toQhAbXTS3sb`!QCN-$i)azFTFHAf+=_KPK^gdgN?)`!6rV7W+ijOKdJofBqL8p<(p*tu zsbaJZxT1Ha50}7ZLLIIV-!L!O0*rSM8t=)n2H2&QU~m!+?qkDxGy{nIf&_;-s7a3l z8Ot1}K?kW~S_DR4@ZmBB<);T)lezy2i0%?+%s5LY@qg z^UOp;^jKNtd=?q0g{N39SwJ8+Mp)K?sz5^9=s=4x5r|0BtGT`+6Zt(`hj&7{DJkqI z{;@+CXolS9ApSewi?>wa`Ou3tH^EARjK2_PGlixQ#sqK(FiEs7K^A!ld7j3b?SrO) zldep}0EAY2#>jzT%kn2z&~B9i89_l!ilud>>mGT>c`Pmq(dV~?A%!Z1kG7NqYJ5XD z%H+9bQ;g5#6F?Wo>TSnPz)Fr?MHwbcjt2mN6NMj zwu>cb3YQ;In<%c~iYV7^L7)*Vp4n#>t6*qqc?SR#%bOAp5KZC;f@CHoP<)sn%zwY$ zYqEs#jOM>!k_wpzsDy;c-0+&E0Xn{E&yj%}QxSUgNXwOS>Wu;e*^N0iG!ghS28H3J z_$7~{@P4Ymj`{69Fo~z8E?KxR3hV=82CaIE?xd)(uyhr=If!6W($c7)K#-!jdEkjU z`uc>r`UT=iF~>_|!lMWc6QFa1A4N{cw=jt@N6;vPwN#@*n4I)bD3z&05wR+HlpqU+ zIICmAJ6R<>-e1zt`Xq*|+84!h@VKpRfgsl_X_627H6JMBfIy}ab*x=!* z6XDv`tGBbVrWOcg@zGo=5V&f9&@}0uq+qId@Sr0&G~`;pT7y#+Nux_p`JUf^!?CA2 z!B8Lm`zp9et;H9(V=PL5!Iy~%7jpPhx+3c=fOG?^2%Kf4>VMvczv=;#=2%2;VoJsi zDJeaiz#7!>G=ZDI9bPPX^y<~CYSbZWdnyHv6%6;&z`|?6Q)1Hxs+JNer7zkWkRCLX zni>m0(l*l~r1;(NKL#|Q`+Z2ZLpi#6yl#hX3*GBD3u!QC$+Wtt1`esD; z03AL6BdEt|m?3>Psvov#^d9XL5lQsbb7G)I2_p^+OF$JcvkIOta9o|KwMNQ&SZ0xA zshbOpv!6u;VLOb@W%~hMg$irJ3)C7#bCeH-fr{9LHwPP_ap1s#m zkf4uw-m_+8!+D7Aizl!LH7+{9aH>=~rdQ>X2u52IvN?PbC&1oOXw4Za*~M0gY5y9` zHXWtr8*i-I`hxvtxPTG@73_Zsh#*aWqJBK)^QZSG+jFw~-`|tDc7k-d!t8(HWEz&PDGojZQxOGQI zZnFLBb9Z+Pwze^qCWzy=86!JFN2SWaw7{IcM&PvgM`U4(!OXx>hu-JUpWhKV!BOjy zNL~k29#CSbEC)H=0TXapY5mIDsb^;O?A$Ut3Pfp;7Bvo?Z}^Ig%_e{5<3KZSi8kmF zP2>i69QbTLX3*iQ0IrP%wzMREf1VeZHJV>Ve-{EOrtQ*+cYYudD00pPWB`X>z*Z?I z`@s-#XorfTBK@@mKr=lKo4mbe6|~zgEA?=IwM;}#;BORp`7}?!Tz0E84AU?N)mns2 zCCcXpj!F$o*)2VTjJ%8BCyls6Ja9yV3RzOAp27>BsRhrIsrS%`=Q1x`9y#X|uSSk^qg3$P79)!I2)MG4+9;yyuTBJLkyo0stq$`8Rh98re+ zSI|9vC^=&6K1nr)ejJ$3l5w4E4T`TXmdZvVAmm^c#~gh`ljM~%-ESsg=q79(X3jA- z>ZPw!iJQR138xXy5oa9-QsN>vwzO;)vh)Dg*b7e&d7QwmCBRrOeXJd4gYg0LdyZq( zD}jEweE8S>fDd)m^zO%aBr>pI-c~e{k3`Oy33y`3IUW$VoW(zlnlldU0W_?}-xWda z^(eq}sD6PF9lh%3;9emv5;e8}DlgVe5+~sAf`8~{6?(3S%As^Uwoezaki75cNYut? z&Z=k1>pO}A6wu#a4OxE_9Z^b|Q|GuCZWx0GYR-^c!gvmkpsIx(p)*L6l;w%i_*pp95~n? zC4W{9ByFu6EV9OOk%NP@h-nuZiHqK5DMFcp({3(Pq-4$Kd21)-;<9$wY47{{{l3rl z`97at`!Ll^gj2y9jiRG|+IFxlXv&=REb9TS>@$(vF>!y6sfhB{LQRqcw04zef63VURCE(hIJk>k_ zJ7NXvDSSr%tBnyb5$n8kYBI#j>A+2jQW@6cxohYt%qRoT&dkO*U1TTp_LwEVboQ3$ z+vGdf$&62Kff^K{K61X$nmgruQoV{Z5TQ|2FiOKxoGc1}Sxb;?GC@Q)f2nc8M&xf0 zW%NPGZ+qY>qnJL?N$2`r0S3LIZ{BC^%Ib@aH3+DkWWZD+P^mN5XrY$xu!N4_OcO@m ydvB@I6_heM<Q-E!pAr3?RWF>ZydYk{*J%x^E_bespodw)5rRxEuWUPr@OP8 zr?bP+_1?B8JsjLzr6gq}cW+y7@9F98A-jFM%fJ4Bq}xfm?VqEfOz)U z9$KcMzhy(>dFMp6@P~&?F6cJ@aPDgVF}r@wi(C7Ol_t|ToRbz=JEc}ucLuSZuwP6R z)a*+iyX_%Sv+r)%4t`~PKOdj`hc-px; z-!=b>eqi8E+0nx<@9gHbge(=7yjb?oD(G9;#@%rd$sBHN+{JAxE-qOs`++gi;oAM; zcBb*RJNrE1lMhyUR*$k-hqI1rnQU7Tks`Ns>Pyvj{Hc)QsmI9&)w5XN*lM+Q(Se)r zflAX~MSe_rzNWg|zj(U(haY~Bc7C&C`F2ZlOG``qifMswMsijD;96NvwG|f+Smb-2 z@DekLcc_bBBp_gL>Qq7U!Kcb0eEe-acoufCb91JcTYEv0L2`I^btI?n%dWDiHUCIC z^KCR(QBhI+@WbE7hkIjURKxP~|60Hpsmd=q`npa>iJKiZviOL;zWxfM1zC?zc3lYJ z7ks!$>(0kNHb=R8?OPf*)*hhaR`e}?(TZ&eXXY$$eDla!+$8f@cuK2g@P~j^1>d^w z%KCn~uQ>Th*(l<-&`Yf3#e7%yl!gdg_vEDa%uG+Dhi@}^*pO`GG}Qgle`adT!kj(R z+nSdi<<~8^Tuk?`hPY!%A3xfgsfAs>+z_TToltpwGbcH{sCKY;ChgU$SJD!8!yjJ@ z$@@N9v|Rkg9}SAeE4CTm?+Bi;Fi6s`%=YSU$vIe5RCGJ{l>9Avib#1s-LngpdV7hD z_2VU8rCG3U$hcV=MI7y!7%Vm#?QhOG*`?v<@1LK)rmf<6xJ2^!2#3SX>o6Q_FZ8QX z3E99mXX$x49h-?>X0$$2d+lXmCClvvpMLx0mtV|vnb1>CDGkc!?{rzV&lfFFtQAj9hYGTakZOIu^mbXiM>gMDt zd*401cI#F`q8>Bp*FS!^aOsk#*S@hAO-*A(Z8c%SN@?snOkApt5=8o{QMH_UqAHc<}@u^xw0oa zSfiAl7oGg@Y(sExa6_7f+3G#cY7Rd#FNTJO z`VW8HpzhtY#8jt8(eF!azRzIVFXtACndLg?>V4A9pZ^u7;LzPYF8T?FHqbNfKQlYY zEHABSyl-o0Q7?O4qDj7I;!h$XT|GTa^Cu@04U!F8SU%|%(YY?I&yQw&Y_baW%D;ps zHO0LNoEX&_@=w-Je0coj9S3&4*Z4Rmz0P2$z;9^Tk3YtoKF#MB5WvZ(SXcM#nn>h8 za%JT4;P-zm^y0P{{QUFJUp{|6+?;8PtxnNTXX1C_RW1wJveGi#-~H*|b<;?LBkqnRPGv7%a$vIaNB4RmqX=pi+3iaR{NA*P|Bk;%E+lJi@l#rE+E z;oyYBF)Gg^C0#(D}3kGu&}V-e*3LH!={4OGH8@$o+zSvMKoYhkyrFSuf7}k zUYz(--@a7hAY8k4%{1%yWzpWghGZdW`>Vg5J==v%{`ARR!uZkpX0Ld=Xstox>#cS( zlS4WqoF*M?#N(4a)e>%QZWNg2%;$ggZK!8$xW|M;X|hqeA>yOUV9aHq-L<%7jI|xy zltHfrOV`!ED-CrSSXXr7#0ep(Ru_X)CGf21GAjxbVc+z_N-8PU%u5{cm5o zXbr!OkBf_n{)(sa7?+(H>j-X$JGgSWq`7faRMhi`?a?hcPE0n-N6x}7>-ftLTyCn4 z;y2N*%d=zWsGdHv;Fk3&#)nU~qlIJbbvFxwEKE5xz$cSCMQZm6X)wCtk z_98wg-Ibj$CMKq?H2LW)eJ1X+}l{%|fNB+0F#=mPE}Gdd`V+nUEr zZj3t^k>tf`ym0ZN-qiR=L#~UN#bfuMTXS9N-elN}w0YDdBGwD-alWN;Y1M!pM!bwzeDFt%{aKjC`&T z<~-?%(#v&jrY)fp!PJRv?0$K7x8IjztnIyYH<1tAvQK?GnPdO@7hIeA##FPBktQpr zk-i4g;-Es>XZ2c-8U-c}k`q$Wg)3LQjjOXA>O|EG{YNy0@;5s4zsWGg4WGYsT}r&x z-RGsHA`O0Yifa@nG{fY-oHxyNPQ<;Xck-VaX&ONqbLe`0^Q4c?H<7{N-nt~yoTMef ziYY?79k=Y-rMp=tN*_yMkmt%?*5lHY7TK6$5;I>w=vhaHRaLkIt*Y1fumjU>&ON1I zOWct~-+gxo2i5D{pYsAo8jKt}iVCl+m5WD|Zyp)MO-FuB#`-1)j15qp&bGhiKQ@qx zb#<)1w=dB(CL1~OEXCi1ge}-et&EHeZ`Hm%ZtW?!qDffkzV<>E9$qOBPw)Swa&wLs z$FQ|%1KYni(J=MS^tT>iq&9UV4I^jg6bgF{sb)rq!G{rG2OaJSpZavh(!Y1(yt#7| z^qAUs+*eDLChtFcrq=c1wv>fUUXY1SuaF1l7Q*iX#KgX~Cmwf{f{SqLnF6c#B-qtN z$@&c36OwUF`Th5~FYn6exU^&+e)s3Owc5QpgNk<#+f04!48sQgbU;-#*_+#fz*QR^ zJe`P}VpSEfy{Y{2>YE~}OG3XD%}fVKm}LGcAS8V#-Li1FMrp<#;j$&$;X2+TTEAw} zp}pVilI97>r1rQBI5hPN zmstlRB=5+$Mg8>CPxXmj#*{hpsS!mi-6_ICFl$Vz$uAjgIEGxHLYL0P)+@JHgTU|f4Hy|<1{7LO2<82{j zM|T2E9$OV+&A)oji>t@Jj*klkPmM&B%HwtShzbrC3|+C9Kt#Tbz$UojV%Zx6nAAWV zw-C`7WS4UMkqa05x*M;3Ty59Gm>6!jZK^e1H8)qvu6ot<^mGU^@8_!R#e3_$K0NzA zbZ5sC^JE0M)$7)+Q?a*CI5GV3Fv|g%Wfv~|CEV%O76yZHNmV$o{_3%9mIZIpX0QS- ztz)D9dPmoGJhCqBZ~5_uA1(m`2r@1q&^2E^z<8Zuqbjmt!&7U&#*WITXBW7*$oULN zx3Srmk*5Xay!+j6^^9_FwsWsBaG75i4*GNi+R41kPsOFMZNihd`uh0^(tY7hj19^F z*nTf4cst$;XRGDvv23@F_8t7CAM2Q58!B(@u>LKz2Hzo|u9D-}aIca5@WA3u?2LC^ zN^$z<&WhF?xz-AV4J?Oc&^KGwXsTJB5CXh*@Zjs$ud!}OisgVtE?lI{oxdn63&_aG zbpG|nk4re=$Fg17Eoz#Y**U^{-AkU`+#2!j-8<|4j^d6+MSwA7#^E&ceVu@2-kgL( z4}QbbNZDN}^~Pb#dUq+OyVWGGSk$JkK5@xVo8nYfjbhf_JzjnK%8bv5-px5_1K5!H>>R19%rxvy4;GVj;o#Y6^QqVF1(vz8f@n?kQKUQ?Jn2$ zg5DiPf>Os=QJKmwPMN5%YRv+gV-1Qd%P~0^Mvprbt_ZPRfjGGqk9U1KCu-=DbrGnIT2~Q?#QG>jM z&$<8eS!WkSZrm}YnaM2WV9v@vvgf|V>)=qYwDjHZ(ned7&rIpNVnjM--CY?s*JD-T zFCTmMGOSmH;Fy}?EX1e?gcixTwM*j=4T|%rs;agIZ3gb#ePQKxSG8S$D^4pdMFny8 z%7OUrN!B#4OasCZVq8K%Rci3BWhv+$or-K1JSk8bWM(q+_UNjRoes64%8Z?^ZF^bM zjoZtCLo~b!L*A`Ajfmy)bz)+*-D5zty&^$B zkzp|rIbm#We(zmpr_F~Kx9fUyk#_8Pt<2!(VznEKG*3G|I6zE$`pI=LhBz z2^j774qo+$Y6*Vz^7G$|4EWyz43!oYbSwBAqCA6Sw%XOzwRcg;yhY1)$ji$!B3-9C z-$zbVApZ#X`1mL@*iIclWq*CQaAAhi!9m3U85=E$7j3{#mAD;wt>qz~g9jHkHg$8C ztl4`+Z5L1gA_KktUXhL}-PH$=&wVR5Q{sqi@%?-hYv=xUKa-O+ZM{?CDbD4RB}=@8 z_FP;hcJu?*Crx78wik%J<-o0L#?5BbUf~evSJ->?3=ROug@}sm+EqIcW?pSvA&Z@0 z)}FeJZ9qfo6kPVs|F>1|8Z{*Y?E5vP34oN^& z;vJ~`%P-E%(zpZuxkmnZs;05IsW|lyal6%Znc9)kzMqb0MJrkW%U|_$vLf077wcyK z$c@B>{QUKApPt=)?AbpGr$$?aK7;M4PEBcv$SPDBmC7Tu*P<{3>JclZxhMWskYIa(jz~Rbc;{oy?&)A7P#JzhmFA`F81RIkx~H`-eT6{H@Meu)O2G zNq_;Rb-0 z28I4!c*^E?S6()@U?ICb&H9_ORBKaR4kzi0u^bx5uD*HmhLyJb@!DCdeD|-KnHjIm zrjV-KisZDBz35Ldwr?lujNBQk0^h>`=idGT@mXG?Jy;3GjSXSr8CN>lJ{?*a(OzpB zInmcy-Fp-|AG4958HI`m0eh_ zhZuN6DL4pKqB^5IOgOq<;BhT0jM>C4h+uP#cPVQ`v&7inX=!v!cxa?QZ7rQ)7t9|l zkFW|#^lA4~_2{Y2QAm5;+G>_+np6AuM5psaikI1`$PWp`e)&!LJ8%}q?r<`KjtI+nYa%yl51p!P z5M__MkByD#c+3Ei?~9CzI?*O!R~&Yut*B5p;F9`3FXF)D!x=yT2S5W9X?KnuJ?bn| zoY}s7!WATh6G}ru5L~rSktsPhwi;D?-{^NW&a*gaqmwq}8fUkfIV?*Q#8Dd?ZqOK_ zX|6+c?C?36!U@NdDI=T5#$`{u|J_1~GgBv3-D=ee$g+f^Hs~a`Ws4e06|em1OP(?l z&MKF^fT;;EJ2s`I;uN>Ck8MSiZI+VA{KdbcYM0G6u|jX%#TUc^2PA>a>B{&hjJsMNxdY~M-qgwv+(G`9DJ5?()+NV5Gojddmc6W#O ze?O0tSu4AQvvy;oLegfOQN8Hh0eQ7qg-DgPKjpaKLhWNY*gn5;81W$+g$l0uVPIEX zP!Yp#Y{J+c5^LqW^`D&Vk_PGJ(q7PP?GG+y`MPy!0MuAy51+y);Q!|p6{^8AQ+bkB zMg2PzV#cn6uiW0OnyNP1`WT3n>K=fq?pHH;9YK0Wk3M8a>ZLM2uiJO>uLm>Od1t23 z@pvF)eejRC+jr~UKJp)NRL@M7E9}(l%?U(eWp?WPF>=uFWSC9m7N4w|x8G}`VrwW044C>Fv+0HH~(gP3gjz+<--j-*_6y%-!awfPsBaSS(IzUVANqZRd`uzj6Nxf?1_f8&g}wQIB~~4My(9MYlo$X8qRmf}=jq-=i`Zc^4}D9Fb?BWg7K~I)gqC!Te=o5VvJFkgqXgxIe3=rNskf zG7f_y$U*N39c(kcGj7|$eE@7!`{Qt=AS<(7Td;1t&nzuf#_`I|Vfx$yWfK9szDfI@ z{aD5Cp#D&^ZfWkBzq!Jnn=v9Z`Qk=<;GURr$G{LyWN?Xm;J9aIgy6B8!$lG7m&RW{ zREqRA3g5q%VObH-@+r{Ed zDtGYXTb>O_Dec)~=vFuqo*HHVHn?kWFr7BR!qt6h+^g`@pNp4NDS32Xs#E>Rve4fE zsP7suB9{>Y%p<;j{eBUZORt23C*SSR(bd(>3^oQg-$9Z4z^}i?y1d;lzIUF`j%(`I zH#vYj@$bAWBONg6eBi(VF{`3evO$xN!3)G8HlV=w%e_04?b2!pg(R+MYSauLkYJK^ zTtiLm6>^k8qHdgYXaxe7<3QUJ;+YWk>hJ$N=cbsRN`$0EEjYYXK*cxn1~*5a(nWSQ zy1q$Ezvz^opRaEM=+>meF(De^+Y*W4!WQZI`abS!N>Be`qsEpE8-B)9M-!n_QnJ6* z>XO8Yi;CYq{MeAFrz&BVn;o#^6MiceNW}>mM6ftx@Mn4&1aMTw2LXF1spuE@o`_Tk zFa+bwz|rTXkAcT>c#WE#I4oqJM=r?-lUscT+V05vW!Cwb=DDUK+8wUAvQ`goKT@x! zWBV-ksw+Ds&bl!i@)5G=jN6-2?ndT*)G}uL9;&J z%ZYLh)`9k^GfXMXr8U>~*0q5kKn*39eMiOZP|evv15h(uI0Kpjs_rX9Vfqj zmJJw9;pVe@qJu5>d30wNHdTYWQVm;YfSnw{S2jnjooq0&igVHgpd=UBlYsYg!X5Gl9%y)Et&A|CwRpw0SAgLo zV2@haA8K*wLC4mSc0w{3N{-}%Q; z^6fnO*LN|Hc0!m;=J#vXI8$AWpsI35`0TM9n~EhdcvzLEzs(asKo{Av&@yN~?iiHfx!jxEtrTN$z{+1eP+q%k4x??b3&flX-s}>BkPsklIgY#Q zi+lU(j2|62v!U|q+p|mi$K?WIuNs^TGtWqy03)n2G0MG+m{M9*Rkg(^?H=31)h*)I zE&Ef$uT@pfEn3bG*jkFrwZ*EafNFJ?U2VH-`6WRW#24NwAxeS*R=5OkzfAb-jkci3 z_KFzeNTuL*-)~(JmmfXaQCe45_YC*uFjPTim~4LQiRwtXCi=2>U0t>xUfyl8NA@`Z zHhfvIOeGPi^zZM<`BYn*YNU64D0f>WHYkD%Eg%wTqcz~;J=$;534UG*WRAf0?b~CZ zpp+vbIGPvwH}vk+4e&hVIC#QG!@*2YWxTzrR&GV zGvq6UPv$hGuLds2{hnC8fPmF>IKV0#e}R|mJxSamPS9$EWht-$mvCKHgDg6kuT>MR zwXt zo*$2ZkAL=HhF)837ivsK-=5jE&yTL^eVnt6kFOGWDL!a|E5mIsG(&jw@!BWo`0>>P zkiIvF*hL-vB!A5@2I$mel%OP6kAOsiQe09Grb9o)c&EbX8yo!5&E%@5%D=vTB4l66qhcXeg! z^EpU?7#CQzV?VLI>w+gw7)6{7{jLethk+Jw44x=XNl^x0z+&2cZeNPcNWgj{!#;za z2UOpH7<2Q+jab+Mz7Ntm()ReoIKG3>9W!jr?5jl84MQ!4n*46>Y&@+KVz;dSa01ce zQ0bBp=+Y#6qp;w=pdM}nf(Hz@C1nA$vd~`l`&`MVe0*=89^fMz3g4~SHM9Qt53^R$ zzc{jJ?h*R?X|aEto<;uT#>lIQm@cSXu`Y)olZkT}bCN_Z=RTdY~}N4)e(Bi;{~Krjn* z#BGoeW-p%oDe#4Vi-)oCtv^dkEzAYzYaVN>n6G%Q^@#qNFMj{OJox|ZdH>&k;O#)) z?5p{*gEQhaCCY!lMu~FYt6M5*k{b@X&;7rw)BpST{>Od7pUT3gA#o>ys%Z&Q^!#`g z%=_``d%c-=c#CKIp8m!ptqOos3^Zd)VL)DYO>{bbW~-uNyX^??*xh5L&;CT1|Akb| zmc4t82~sUuw&lp(My8H1K)MbD9WU#1x&WNnwpA-6%pV^jfOvNlHIV}%fLL|#NT0#C zZ{KW>|G|6G%>&7L_i=HPw;H8=yso2|vAEa}@dn56 z>IMk5tYok4#*g;LY20K$_JMdrwW(Kc?E&1nTt%!{XDA93={loK#|ej_h-sI>?FczS9IDrCfVEA5;?M8K}@ z?sx?U8W>1ZUFRp-5L6g2)`<1!pN8hR_a%pS z>|*~Hkzy&}DzPnF zK1TKNo=0@BXy5E@>cIVt*!&z+56dRzEm#0LMjf#|$4LqP4I=q{*shK#fsqz&0d{7< z<)Z3VAn5KyETyG-tkh;S>uXnpRd{rCG;I$NP?{aZ!Nr?2Z{0@?p+Rh!o6Ho-y9iOB z;VmyY+l(LG$LXZ9nM2g#s8#Tk0jkeDjv3H4T{@}bFWmk8Pr@hwbbPDZi}$#h)qx(1 zvPI!q;FWTyE7k?{@AT@cSO5O|@5#h~6qo}jM<*UwnJBZ}sCcO;faEp~)Fq8$OFkqE zpVQ&7Kx>Jp6R8k@2%Ch;+6{lfY4j~16P=-HA{?D_-zk-kb$`ODBy9VCq1!PBQA z_n!E3z@&s0;Pgak;PTTO6OjVP!KRN4um&9gC@Y%;+6%E5d`TG9q=aJfpg}}$qAbB zr>c%h-H;_~lfHiS#eOB@s}UHiLYx=S1MM%*gp&I>|NMjj`1<9mRwcl6l7DOpcvb(` zUw>cZIiVJH#-TV(kbbzf^VO|vE0sG9DNpPXZcq&rGi}*MO?)g3u#J%J9GXU<+S=p( zkx#?v!fg7jV+j)j%j}Kz;u;f+2NEX%MbkL2+inoMT{Y2GFjJiL@}r+U&A*>79CV^2 z6_420S9tjka3!I%c6f1X=W_^c$yL1lJ;pctQS7~a9)0*iPcPo%o!0zocL{yUf2|?2 z&BB6DH}6ZF{khOkRzA-$>U;mhe`0DKNR#ACOdgs}|H3c-p>G#0i2UzME<1ea`yrEG z^`Fq^KQ1B9{*OZ|W43%z5P6W%aY}ADc3i#s58TgyH;ns2b}i5|>VhaUWL zj!b{3XOqA&wmk8$3)!XwqtuFv8ykYb@xHkFcd}i#X22xpA#7VJ;0z4F>QG8cLcIw9 zewFwaO0PuZARP@jtVNt|!VPd%+HRJs3GTUWf$*vOD8*V@L`0zMD9~b_UZ4{`_bpVy zWR#{rxwi-;`^Pj;|Po4V7qBa2Yc zxBGJ>5aFIzRUIUrW7o0sK>p4=9+Il}5bHNBQNh!G@b?COeHWUWi*Q?cG{YV>KO+`l z;Q%~`R)Hfo+k+-t6uvwY`7%1%NLdgI8pkrE=i6Ui$>5`eE{3JWoB7BV;wmm$xbQV{ z<8_O22(u5sH(HSSt{rc&e5;`*IG+amXq~}9fa`w0Bs z2a5-=x5Q>wwo_GAEP*-)N z88B0(Sq(B_Hq20P=V~y(vcv^VjXPlDkrUiaTY>nz@Fc7~SA-~BVI7A>N*edy1$nq^ z!h~s+?^$Wxf)aC?u&|q)Kyy;?%ye&(EkZEEHS+x_)~q@6)fn9GJi$GHPIOqzg4>6@ zY|{3x)EY2f*9dT!-YJ#e78K~x=QC+D1%dag(Zs3Nu8T)2|A{`AwI109h~(!5|f zU>PqwD|fz`ovOE6gzh25Q(qs6Ci;GU@6A1P<_wam(H*CK_;+yAEL*YScvBRlE0k4l z8b#QPQX5MO@Y-Zx#OCVb0_D&uR7L0@%ImSbxoaBrV%5H9MW^n@uPHXSkiQIuh1MM4 z!mxkv!k?gM-5hu5aR&GI&!W^WHadY#MTp-3|D&C^`Vv*=$UiPzsD;loWzK>n_ApqH zIE^sy@ZrO3eBJVG#)n9NNq4Fam$+MJ+a9EsDhEUdc(R3imW~Bq?7{H1xydj?ki)w% z&4S}O4lw8W@zsXiu5E96he=b;jP!TLuH0T0mLzMYt*gr@J*gcj8wZjLuR7J?PzC&4 z4<6B3sVU*GGK!4X_y|frun)xgP4T{`2^T^hy8xRN&zuIDkDU$5DG|5z%g9IrTpIVP ztw5825w-{M<*j%~EG%CiJ&O<(tQhqPhegcHkke@i0zW-n09x}95V9zHQ8`MN zCRCyZVBDUt>?&h!@UYw1mV>osC^mUe8UhIfoknwqcjzh zmagxe#I{mFOvXMO?PP#8lC;COTcwdG_v8vz+8pvpBOHii9fWs|gDdh59pY9f6c^ghsQn^-{i8m5Jr!~@L|B0 z&*<+wlcI8EjTp?Y2+ElVlN!B;;1&j_wBNm zrtdf%svf^)))g>8u+r%E|G0d491a#~dEQ(^R>f)ftvc5h zNTdd+2T);`HhQDpC+F$*!eIu&H4}=$vHm0UgnUcSX)Y{L4>Hs`i5F8Q5D8MG6dM zT-#ouP>N$l`D=i;%(_c?kqh_Up6CVy4dOv^T-l~%Q72&ou7**|EM0eyk zjSiK$^zg^FjJ+`Zgn&Cj)o7$o`)hDY_eRMHP4?oAqN2|ry-=>E;ErquQq>XugWRIV z`=d-rQ_c46G7lN0+M6ILY+y3XNX`QC@CAc9NC>a z52GWBSkGlx+tMBN$wu1|sO{lIz`13_Mkxe{%gdXm7mnWHmYah@Y5+g3MCD}eNYUh% zszlfHe?wk)DbgG5Z#mXtbCt?WTo_0AXULaPrw9<_-87ztLy5g$xSs38uaZYU*8%s( z0BVfO5V&tE_&R8@Qd5DgO^Z%F0eds}s0h+%?TVm{i2y~6*l-E677HV17bW+%r$x!Z zPwg)_+srJx8gXdCa60Duk0eE%`l1dWms>7NXTjXL_BdK%mNWe?;D1~G{rA>m>Bwxo zraZo9z2EKeHE)|N1GHdc)EqT^+nPRS55F=fd?QMsRNj9X8*2is;|ZfDb@wb`Dg}Qo zTj~G)kIkOY9f)BA6{82o1<+SI*pR&`>1x-0@>U}23`761yET*B_zxF>cgT>2Kw#`* zWUDQblGWZa9GDv%0rJE_^+x%=Unl_o=!#gqS`bX!HyNb^9S9A2kvy*B>Ym%)i?TgA zTinUQ1Zsi&tJXv;q^;R^k{B@br#&DDEH}F!#18H?(Te|Un6uz($2uXnh^kXiuZxM|p?UL^T?{CP52iaJgW zUB+d2>uIUJXwf3wLx&D!*8PnvHtEfIgs1#Oo*4XOg*D8I%_R7N_0)#H?Iv_)bgQ`B zT_-4*xV?B%7tA7~KYb<=He#qDq_l_f;f|P6i7}2wxHVVC+b^SKOPv&IRQ!>SGqdL{ zO2BRGW3%$KqZL!3ol+B<_Rhc4+b*N{sM!+iMPaDxUZK0f6QxpUFOHxr907_G{Zg>d zn7F#m80j867E8AT^6?g(D0%<>j0)#E;JFxd3nU|UkYLX)hH|XVK+P4g!G;Xa5S@Vl z_gXwTS!SI-(ro~TW3JwVRCFQfKRWVrSy`E_ngrd1)2!>gvyAh-R6in|Lvv}hzB7_A zfH>jYG-C>502ND+oLFe)a`CZhM@QKxx0MOBC~IMbVw6k(FKU}CsUL!^xQd$ zwc1Sj{(*tK>c4U=XPn49f$~KC3RCHzE+{sJg&qDc5qJcy$MjhS0MH=l<`ZZqX&iAc z9Yl(F@bsiVF2Y82cz|dxa$VU;fQG$gE3N7W*evRdDIBgnD5%S$hTaYs^K&Aj+&h|| zcrd}h>ECZv{ld@h?X{1dZ{4oYa_N&^3llT{4cPi8z2Lt4zgdecJ@UUIhQG1SR;vq! z(1w$%=+90s5c*GKM*fc~%!j{Kn5zFws_6eWB>qio_9vvGV;RQM9^EhWC%pd_4sKiW ziuCnPw*LRZHgUZdHOVO~x8+cS|cd-NbMVj;7Tq5P_OWQ#Dtx+YfDc3AovZN6`6!0$FbL9!LJIfb3i3UtsJm}{$)U8!G(rAY8d=pX-wF$v`jOY=Uzl8ex&;teJ(}=R)g#v?0*r4MausPS%65Le_lQy9>gt70DwIjKiq`2eK*Hj6Dr4 zy!d>3Yt?Lbpb6N7nEGL}5k)P?rA{4tOBKKF2Ukt92^y4dOQ%){dC!?WTep7fFnoHS>96sw?t7B6-5Ogc#YH_H)|)Lf?ZMtuOTxD-KPLj4j`5N2Mfoj z`b8|=ygH5dG$tZdX|uv(vGaWhDot{O1u+YM+_YD-1CBz{UJyk%`0{YkkpZ?DdQzc< z-7`$LOucn$mD$W+fBjlkUapbvbz>AP(`$Te0}5fl%vj?pIN=_GCUQUqB!7;mn5?WR zN=pN@V5Cw~rEo^g&7kV9izY#`yOeR496Y)wS@ldeU~IhPxGRG_hI*>=k|>z0@>#lBa0 zXix@Ck>gaHRSr@O_PoOBaZc_b*C`ov^s8sGB)@6@8}aC0pX8vCz~AjEwB?M zLgKARbHm2nxO(*nODw{ZV?PLA?Xjlq@Gc|9q209Zo@K|W9)UF8E1FboR6QMM=JPx5 ze=eaH6!_}31Ost;sH-&J{oQXcjEn%T*@Mp{y8wIb{p)%t!FD^pIYcT5$|vfx-^-PO zc1k8OfFtUB$0so13?tl+N6Q$2eE5V1OX1QWhzn_H1VD>S#EgEOrfF10%Wy+L>2TW*4I&795nsSu5i3B z8I77jcOn$PhGFFm5Jog}b#OsRR@=B~Qy&pJG99O4s23JDE}Ij9RwvTa3GIMtlJ@*~ z2+ZCq@3~(`SG%G_e?Gd3)Gq3Dyd&$Cgp)oD`Z)GU5G~~X@~pp=ss*nR4^YG|PPJee zqT;T);nfdO0C86Q6<5S>e*CE^Df)%jiU&Irt&aLDJ>nshH`VKk%p~PzPp*Fg|2(?} zIPlKi6Ave+rVMaqR~qn8m)g<@i#e0){m>EWg%a5y)zkp;FRxJ&Y8p6Y4GgGAll&Xt z&!78NIswIFd%&PVAfkBwiO!He|NK+>#QR@IxUEeMalDXOd!Fy!e1tXPO8@3n_};E_ z7x=FuRQ`LSpZ7fU^K>IQfAGKmb~9G!q5c&>zB{v|q(>Oc!K1WHg*>jR11Y5EL! zwAHTq9(6|ptn|mDMW$uIZu|E1?3KL3?|Y=pCsY<_<3nesh|ic=^CVai!W(!jNcic0 zkL>70C-w>1V7NTV6a%%^5#fOm3+ZetOnDn%gn;(KuBlF&)eh)^@@*p^At_PdJ&tA0 z0wCxjAppel4fy6L@IjZ>?BPjMe8;3Cgl84`etw7^6ZHE~VToAShdLUR_*si$3Ok^| z4#C+-N+Dt(hcZbZQEZ6l4%Ar!bDX$AlBhaCL6myg@S6g*#U>Y_j{rKG3KoRg%xEwM zpj{PAln(~s9c%!2gIuITISUCV9)!*o_=3@Lby!VJE&rbzzgKc;30n1XCz_$pMF)*1 zg7UIy3PSTTQ4pZ$b913~yqON}dcGVbsxjWJ9*wdo=-z6H7eLmeo^sHNBj`m*fZdPU z@0b7hqsY>wVaqrHJ&``1;Pj}QgnGzD69M6fDzGRf7tpztmSkflOM_Ve90BB2#?+3( z-drD9uu#+-)szc&+A=2|IUXBXHRltuSS+0CY77!ZC?(_I7@=)u7^d;&@f_#oTFJ?F zYB{6{CunTEPZA=mvEFr@`otLc5(uM@gLxx!FllgjMKUo^`;c=WAeJ5$+xeXl1AvN} zL4Z`68*?$>hD;Zv&SHW%HOLJz~hDlQb<-~iu@9fNRY28DZkj4>U)8oKx0O;y! z_s9WJ!4TR1igIJiQ~`j8x-Pn|?nzd3bSO2CZ%hQ@CVtMM6>YE(M~0U5{JmnAv0{+~ z#xR)$7}y@{7(5@>MVU=)rhoyO)Vhb#fxIEiud77hMyBV%3h4gv!$$JdV~)vwWL_3C zKUJpe4#2^eo|?fOY=6D<4{+Y>YrDFb-udc@0T$mjw*QN3o}7GBCq8X;oLkI?upr|K z8suKVdRd#jj&rRC`B$`cGu%two@3rDpTXRTebUnUwW;d^dc0oJNAWeW2G^YhY`G7{ z`!G62h(M{8w}MfY_)8d!pHa0`S{-n3BV1y1(O z2t=JIFOOoR{-5!>hvhFo?sPW-7&+`TPd*5>~CLlR*!{*8;ZYt#1@IUW4(2%pd;%*Qd4&e^`n0N{8?N#_B<(_#6zN& zwo)=3Sdq+S6ml=4PlBW{NNB_{DNOWddGq=ni0Fom#ead6VdnKqwk!I#}Y(t>W-#CKr z_J7gjF?#NQ&5HlqGyIo3Vg5b6!Q)*D(2}5Di`#5&zL1{(amenaCPS|a*YkB$AS6nv z69RWy%sa>{Hxm!q>rpLvXLZ^jr*8`avwuA;K#1Bo?tC$ zZTEUJnpT?YBavIodBn8{b%|C4jFiP^0q76m?c_96CqI#ONkiY>4pYvROuu%4+&f3JYN$^ihIvTlh<9_6-E2hloAR zEy8dVU0$cqlI8|UCo8skfdx`<`>7!<47>Z4^wOYjT`QSWH0Fv16af%YiJWHsW8QxBN$$ zp;ZuxE+HDQafMzP`Szh&IaExN)N(j*1M~zB+MWZ_C_TplO;L2sEAH%9aiF zQ@ymxT5T7)x*VD$D_vVf{2bV0b>rWj7e#wJut%%qk#=ZW4-ni@CpoxcN_}+qMl=rI zEa>SF-aBtxG0XkkFJK9^FpBF@4m|K2JX`}%Few)+4aYv6hXy;b^{(i049^10L+*`G zCK3gO*8fQMoy|;6(WmM*%VS zYQ`Mj(>P24{{HAdtbI@jt&8zt*keBf$bMjWnZsms_P~T;Vqj6`Ak)G>uh0N1Rx~~C zL@t+Ws_Ra)^k7$E0WNO(LDnV{ujtVo}udv=<-f#vo7t zBwZM@xh?4ytWCke!5Ceo2e#HQS~2juxq|m0l0k8dKpBTpEZcABcxyHQJ}nK^SZsM9 zEn<6H3F&jgehCz*pp?eaL#=8!>od}{1S?NjQCawnxRMWEYe~|MXWexXH^ufIL zA-9g2Jm==0@S{YHEtyKGJ29voB`>DFBzg{mN<(x6R6?%?6IqL5Ckw|{+f>o2c}%JU z6@@2kK4`6O?;Qq_vjx2gF#8$eM~88PkDoZVG+vH(V`kn8y`6Z0Itn8pXN7t@wPHF^ zR=#K*4Gls(jR&7o*Jy}8D}Oxi0wc5_*nkZ+l9VoXuqT{J-;iH zPJ_a*%i1{5w;nFxKpIfwP+Ng^x*uw8>m6^})g_7RcQ2u)wZ zpwrOgt0I~7^{IQ$m`3~1pzx`H;u`bfAPWSLC0rz!+}>xt1bo5m#URoo;rQo{%DM3k zqu(O|eI{f_#5+CH`flJCD4OiVP-vIn0#bSYZ@>%FjHBmVlynAiN;vB~(Sv4$7EvOP zxh+aCtg@n!5)+^Wf5g@`V(LQzO4wtzx0lA%k9s%>BV8!2&MZNjWVU02IPDAYsdpVL zd3IwSm@B9M;Fo#C(Bc+f3k%a(VL0xt#-Y&KO}>b5v|}Iy$o7v>4#%jrBTbDMl!bsz z?HlX)m?h1Bzx2J?fziOhhjF)*StEBu6pu-@!*@uGKCVZdw}7_>P3Cy={-5vOQfuGyCGYRx^m?RIGtOi=TyjSlz7ky8`NrU9CS7Yb9& zw1KBL+Wvlc;4e*#J#}WPwNU2t#DSm#LzU=O!xXi;(Gk2G<~d@-(-VBYJfJ}e z4D{eR8@w;*oJ^l*xj@hchCDO_nBFP=x(ovu8E?j<)Ep=L@Eb0wd}!#o4-(<|4*t-f zAEE`uj>7#$-5*Gxv7mC|{S+;i3f4697XA({*U9b|iR41{TQ@rOA#S*kF*Z4|04-;5 zeVUMO=iQa_=*h{}QY}YwYzPf*LC*RJ8>JDtIBTUl02!9U0a#K}qM7!}9M$=H$Wk;H z5xnS0AD=wd&TvucTmo7sShDLGPCIqI>_#K4))W}}_3Hf-g0XN_FXHF-d{eWr7uo+& zYYk~=2ygq^B5&1%1Z1HtmkKkGx%FtDxtTU0`^o>AZ#6H-)F5bNW26M}1iihvmDU|T zk~zF&qx4(p!DBUtX&|6F7^o8N*KL}`mK!8{n)R-fw4hIDX0Kbw2~1wj(I08 zjT}Bj2Qvzhoj157>-0VD1Pml~G(&Tj?eo2bx38PTbp^!zg(URb-6L!}Le;peJUb*f zr{ybFkYl+Q^x6`cxToyZ=qRF>M~C~bs1xg8+pv}?gh@aa<7ttEnPBmgj#V-kuI!yY z$Y|uyf{V5LLanebcz0a+FgO@6rZ5&lhAOrrB0W7FmTMc%7;%0`O299DuVX?9^wB_N zsBm3jO9kJe&!QfxVPw$fZ%9#aDwu&(p*FL8(COsi#1AH-!JU%AV)39)n!vGWt$f5z z9fAoZ%R*`lSCu$LvZ>QpWN3oqm#6DbE)|FpbkuTfiT@*&f&Ap470_L+LGz_)Z~|C| zSpUJzbVW#E*$?GoWWM*7IhdQef?uBAeZI)oaLP4C2N!KLz2&gnYQYHZt-F%H1|{2Gj%4f0-*k0Sa|UL_wtX?1a(CI^It!zAcWAIF+!Uvs4p%71<4Lu z0ApgrpaV51gIQAn`nUvAVBpkX^hH@K?W_yW%`gmtG!3Jb-H)z zlx>F&E4yfL5;rYGv=be28r~+zjv$Z{tmn?3FElVPpaP(E8OLcAj;GypYz{%Pv{kDj=0vL1a7$c!0NlQ{3sFrXP{;e?5nPkvVUi}%kn zg!i5N6&W5=H6rVJOGB47p}vgNM)5;ks0iN??`5eso4_yrWXx2OZ~fWt(Q&i15V{uT zJ-?a$UwTbZK6dqesU_Dx4JbP3^f=HgAIBQqQD9gH0+)ARAHrY=nw5>dGt>Cg7!moC+V5Y$I!98#Y|l~W?7B@p3FvC63kXx{E3m$G&7L{V;97323hebT9ufRucG=SO{%nB>jVNEwTZ&OH5hF(p#03&8j z)l|*qJGKTEpAu=9%I;H37djHCz8xPMOvglXYHF*j_JHiGnOFuL9BX#cBVdQpg@ZS? zJfI&(=M2qh$p-r?81euhbF&;ZRKO4WAv1&AddOKcTcy9Hv$J#ekrF=Sii0sKmvvFT zLUM?~``Cl+^~p&?e?0 zYvWi#tcK#5X+SK!Qf+)9m6yrba$+$oCII7xA?73__mGv&()ZmwV%DKT zg2wl}P$kW;Op}x`JA-VzW&zMsz+qm0FKooeXHo+N31$}Y#+U%mkr_Ftz5Z~jtr3JdK>!@;M@QY{!p=89l+ASru~uW?S|>qqu_tT@&5Au~a=4RAPXV73t_5AB zzfDN`fNg8XF&xYUJ3l7vtxmn&l-y7`R6i~a4~oHyg4x(;gF2KSzLf9T8Vri$+i@BDxOK;lLzp1g zNUm#yG$)8GbcijA<-&fD#9JdB>w~93=ZVvV5muGpCNzO?Q3xm+C=Q6s=W1&WpusmO z2cpaY94~#`TJG%oqAc5uo3}a^uwfQ{h}k7%6vv^bz-oypTA^JuTea-d);O1bLOCRh z0Wq1vW0iZE6gZkwM1lh1$VS!`1TtO^R@V-i$VeYTQ}@RzZj|kAPXlD5aRSH+G}syu2F3ZlVGr-W zMzkFSCqO>a(6HbsFLK{#PuzpL`Yue+Cx7aErU4ELv6`^H9mao%;1w|4NVVJ`H5(C8 zrhHl$mM)^;fcTR^izcRRl?X0@H(h`ikBPXTxd&y6RE|E_i}q~zu&BbQ^Mmdtl~T^S z*|1v?7%K@GB56ujZX7NhkxaUQFtywpeKIuO*=mwLR5TQF#E^JN96d6x5~!w*MDl#0 zW6B=IjCUXhW4cid*kkYub^HQtL`v4>^O}t0_hN_!{`(YsS1+KPrxSrX@Zio@=s`Z1 z?x-H}cwQ63=78DE6GjB2aKZ0J@1!qtgbS-Tu$t7m}?RTellSVnNpO z$TGxY7{OPdD@(qi?g+cBP1^SPGAap>;SDgmfY)gUl!d-n-mEM2ymhG}>hxa*LG{hV z2;N>aVOBDPqz;-^Oam{ALA|D)GHMwha~v42T4DmBRfAfUC>udDye5ha{Kp3(J`4`J z{g2wt=as<#jN|WIT;w2!*~*Vx9JL#pi}?}TtZ>nL@cXb7rYsIak~EXrjuKiakur?z zuw2Ye>qgF_xn80=lMS0jjZ~7om6*OMAA9T3b#Qez8qB(_(N;BiM*=SSj=(50}Of9+5=z>tG^1 z--sILG31+5)}^(8T|aY|o~hu-5#t!{ov}pvX+fo#&zh@CL$EqQZnx9t-FsA7--Wds zvxkr%i3LXOOkm3~fwWX7lavTDt!i)2oa^<&r_rbpDT<8ro{n!xC|ihuT5_BvWhn&t zxgZ{P1&pA^kanOi2da0YUqBCIz|RDcOtQNikUpG_rTti5kpul=Fjd6`k!3-$dLaR= z+$Fl}z1Ye*!xhs>#??Um=wevSoflT5wFyNjTkhJ_DRV9o+U+RXHJrPfFVNl-$MPlS zy3|gJEP}$k<@qZndwgn=K+0X4P8llrHE&N9l}Nw+*T(7JT{;iny8hhSl3_gW$W_{! LJDT1(yfFvDpD$-G}1^&Hz=q`cL_*$ z_niyz{O<3@^W49#&-uwV`t=NMy-xdQIXOA`}N6QEEi;=6Ywl~AZt)hHCk zC>{>{jp-zo5B!gy-EB3y2Ud^m9QAD+DD_djEmAE){_!4~UJL*qSC~`aGAB^G0bWSK#H2+=6Tgp!F=Z75C z&ZrLHEW0|Ym$nDdNGjF@QeJalEb27~Veis0c-=#rC}qgkkk%HR!d)b3IIo?jYM^@N zR$e>Vo61Qy&+UbJV#4QaY=m1&rJLnWj#!}zrUG` zNuhWAiwtPW|BqkU@KTOh;@x$N^;xmKdET!^JQoFR>o0Rps@en>3pBoY$0!|olR13s zbE_Dy{S_mF4AmSzIp(_Au8fV@it_T&$;n5rqX{I8j4m7TocAN4%T~SNwrvexBPk{I z_HuR8H0{!}upkx|7It!V9pC8DZF(>Gl$3(PaF+CG^4{v<@4DJrOj=sn>Hb32&G|m~ zYuWcNUy_y|Y!0Mp6r#<|&F>ePt9Pb9z>RlX#itc@F`8Wt%*xH>3O>(^bIq*Vr=mi1 zet!P`=g0SgX@t+hHm02BRIrtayrC2YMm*0%=NJv_@O6_j=|5=abc@$VP^@NYwZEvACO|zH-?s{~7KA1zh zbQJDf(2`YDl;-o}AL1d@*O0|mm90@#9Wt85c2~*u|=&Lt%|#%aHTDkM+7U z)pLuAer`0;YHDiA#R^f*_2e||tW5<87@#@W3kOGR!VXnZ@~#!m|@V4B-(H657r8lDY3~-d0gLD<3Q5n^U<<&=SSjWL0(G z2Y1%dId+y+Hj=F->2COkF4MMBdo#PaZepGoqNDval1NV7P&&_jvJuZC(P7Uc_XjV~ zJHLZCxgIs2QBJ#G)A`Zp>1^lK`Tjz}wLd?+v$C>kCJ*+un(AJVjE#*wgWx;fF4Mv< zo=0NOy}k2n#--fWe(?M5`nthl-I0~$?cZG-Eb*3jarzbgf zUAlBBn%fWu4oZ1N#n|ZRay#!E&JMUk?;A$N=6yVxv80%%aEPiQ2ow{gh><`G2ng67 z54-W}(?iyMi0bLK#23R)d3br-@(iWli#U_RR@eIAQ!J0xoHFk(z%O%HsD+ER6+G#{ zT=3>Ldh}=vB5(TF$5Y)|no&7Zp;X+MpN(4^*JnENJdX~=178MSyQM6HLF%W2iAzFz z22L6rPaD{a1QI!suq^wz?#G{XBp&~G%Y^bD9UT=Z%u&gDi=tP_(Xp`JnQVz_faM9| zOW7}V-O5JMe^O3=ZPAxgb%;jc!BKqm_N~U10nU4mTjSVKc$}P^HVgfQ<}LhI!)%#Z zS-#>QNneoAUBBhi2&Wf~aWhk+P$s8RCIXGKrz!8Tx4@5aQ$ScaC{a4>v8Km-FZbB| zJR8ct>Toy1OK~cG@fx3bFIQp_yPP);F}ZDtmWb0b{Xo81Pj&#$qh`DkEe=>l7Fep! zg^C$n1|2E)67IdbzBBAF*PTVUA9GxTG_|ea4Z5R)n-El!oHlw{PP9 zo<};xRwF;RN@0_+QQT(TnIbogA@5*%nGBbg+)+@7nr)30W?^Gz_kH*7ozw1m`tZ{~ zt?`^Fe+a<`CVbTC9&i~-X%~-O9uHE*s{>tIrQV?XV&Nrw$T-tn(pRItR)Rmq@qaB#r%5^!96 zPb%&Yzlx87LMYCro-YY`OAwj08e@=nCcJ@(ABqc-}1>A8A^j&=v_F(1_5N8kILu<$a1#k#Rd%o7&kl9 z6eV_RHyh6B;-L&yemCI6ufbwLfs(BABi@*Oh>qBX#kdU-hpd@TNJvPUZdIjPsoj$z zPH#@;$HkUIL zxAo{v8@P`7+}Eq=T@@;JH)LjYTKs4;RI!`3)a)>69}hJypUrb{>Qmp#Lu z6JNLvi}AZY0B6A1$OunbT6%0|CWifbgbb62sOas!{71JnG~Sx@=E5?a`3)(m#8Q}- z_rdo#QO)h$T>@EI*@~()Z(NdkNRU3o#l^_=fByWL05{scHyN}#kP{e`An&E?x^NAn zdNj;S!jFgyRgYwvt|x^WiY2y>Fm4W(JY~_H_!s%)kJZK<^*fvT8sEq&9!wf&!D2|Vi^-OxQwd1y+d}EwFd-l%VySJ2; z&ooD{Mx{=sjbjV)o!MJGMRg%!u1>k zPy>AaSFXN9F!eP8jjXf!`uf|O16JI&6L)(1`Y=$7LuD@@!035+$a5+-sh>Z8{_^eH zz*u2NWQkZ+v)yKrvnkQaH(#8l$@M()(5i5uw4d#4)1;-O^o14t02w1qyX-n@w298M z-k*$dbY><1zO$~Yi!v`S?~Cmufl8K!Z(E}D)Ld_cN!O=`_cOYtdvbJ()O6N|U6=HK zz8CyrKX*2iRx}V2g2C69m!ieog#aLV<5Msjx&G`gH2c_QvaZ8e>XBzPA_lv33O4+C zVc|6>58Rf60`Em#&jPk`EwaBYDft0X6ax7)ttx!i*XZz>gZr8c`iSfYd>NhYQ{D|r(gMxWXson3j2qJy+}@;WNpgq_-w2o zARwRzsYk8AM6qbW3ZUcNyLY#z6T|Y^2P)l#6A}}Va#7dVhy%!~>5I+yGMqMpKi^{n zEk8r16Yx9~5prA%juUmQ9U7uRS&!9XmmluV-I0@%OqGxQ1u3w;sR>yPlfHbW!^1r* zh_>4r3i=zYPf2NMLm~H%&2*$Ludmme+&CToE@5h_M#vwtP3=~;p+oU8Xf4m)Z7AV_IzzA)S z6YwrsFAa+#sMxAe7ua$E z*4{@)2w0DzDq-=owtQvi!&62inSCz)M9~RY-z>!v;^9$(;@pP=sN80(*4MnX?O=9$ zGD>&03yM?_AYS|!Ic5%>@+)0An#J-cQox-Z!ga4-zNCkncss>gZDV7TVBVK6o~|4S z+|4Lxt}1HcyhhH?EX`scrSvmmVq$$B4L6*YKLEiqn}f! zTurFbxq>eTwgS@&=+P(G$5#RXGGsX^L;d|pHL|X29sn`ILLmUS!bPHqia}VGq&jJWF@6ElHGZfCEAX(wE;Zs;x=pP#DF;h9ik-T>tMVeo%KceU( z+0+@ju(7c4pnQ-4*W+q@4w)_%Menh{(iBxh!)NjmMgQk#ENfQF7ASW_EwTDe~3Gik$5-Ep(cSbrV}IGZ<~p|EDK;Nc zI=AI#D0)cmud>{uIJB`)-m2<$vz>QQq;Lkv#YK+h^PxyS{G`n7NxO9^)p^ryrwFAwOS6`FkAfR`3lk3=~f}`QCc`IAwTb z>0u?s}iDTzOL{v3`Kn}zg(^Bz7c_~AzB>gfXh&Jq$;#Nc%2dsrv>F-JmR65GyIFYlWDFclAZSp$B@mpmPhubBt`%k z(sl2}iKyEx4yyO3E>-Tyo965(#dl18@gpAv-2gyfj_y(Nmj2A{@H?x@C)Op4aoOcs zuir;TGD9*2JZW51|6^oC_ez9J1!oZk21adnclYRIId*c{ZGE_JPKylL`#N$HcDhAk zhuek~MWviR5bCdI`KUvz9UQJZolhfe^}GWDK!VjsrBZA*9pLkcy;Y4Y6;=fILgLR* zwF<^k>mgCiR9k5;Ds5@fEzendNIp7fw=}E`0T7Ux$(H^z8sFN@tsG(ck*L9P=Le{- z4#Bzz{>DbxtWJD0Z>ij0mBqNZzBDp2`m_q<5c7bppuJSrgDHo(Tem1i;S5r{h4}{t zk`wsWj)r-@di6@2BPxjcn(1`coJaj2q{7gF!=;NXy-@B1vxXY%VNij-ZW;MbnquLBFs0uy~} zcYp}Mo;EfjX6=?jtSUAlYo_K$s&rALfP{8s$Ge!S1rPAH6I6HPjbITR=j2UHrez0| zzSYWpNbh_n>U!O4Dk@n>Na%NAk4|0LQl*)7=ojXDZ;dj9p|dQx8u;JbePEOiJD+!El|OB`BXP@_Bk zZ;q1hUxDJ+;&w0f_r!z`+!i6MO>eOkow&HT_2Is=R*5aPYPJ@rkF2@CG5IV9)m-FF zP0^Kmi^BV$S%H>O2V6X7CP6{mf*)&M|l%TVR4X!|tcGRKZUWT2;YUKpo-fdo-t(uN6y#JRt<7HB3X zQox5w?Ezyvv-`FK+6oTJCF;ftU*AjI++=`cd&)w_ZePEC9Vbj5!q&FDTqRjaALvu> z7aK;)!7tAyCX4{%JaM*(slCd`m{>j6TWEI9V|xtO$H#|WOpMmFP{Ya@0CJp_>sOL< z=guKQ738HFAeX<&ou8swIa_hPUNK6ab-I9$kB^G0@|^7X-4J90HMgP^auHRz^5}K2>DLHdG#`qge{bfA3PZ_?N@NWf}KU*GxawMy}1LJB?fhK^jcPGd(0uV(Bb z;V;nEp2pTPFfrZU^|ZC+c^?}KymGA|#bOz4V ztC*Ob_Jx_vz0`P*=E9*M&#I_&rDOsqYiom*xrcvB-Gu>$oW3aV(y*+2aR8vs3Ib9g zDTY`2&%SO1P(4!^%lCv_OIth7ZO0baI9AX(P7r@30C?q@v@!6Rc3x2!;H&TGpa6~0 z2{b6#f!Y2yoVq%nT)AULsyuHXAP%w+RC5f7Ef?Kk`=;g1g@N`kYCkF=zmfy(y{)A% z0L~Xcyto}kD+<8!NuADr4EJA#(j2Q|Op{MOY6P>jJb1JN`Al-1nALSIm! z0k7EXtWJvesm21jHv}l8R$|KpTqgAUce#C_7}k!C(m6U61mzwVE?hwQd0zQNv(ywu zcfoBjXo(Ey=1&`Rl&U-Dj|_NP{oVB$Bz&P9VUO<2X{0L~{Q7Xq#o`(@vB$xND#!;! z=XoTp)B%qJrOh*JIxQ(JeIZ!5xb+uQxJl}%pd9W8TN%HT0DMvLnUF&KyfADEy^Ei_ zSm`)qyx}=BOG87GT&n6UZ)t-h8jxyhVKop+f%3}8V3i7^MU>e)@jZ>Z@^5r!W@b=_ z?lcc(YOtcO&=K9s%-*ZsUaliq6S2zm_C7^ULGe*3J-=cMxb~DF?a+%~kgaTKh_2*t zW3=Azw>z~lk?bxf7hE&B(-$_VoT)~3f#1?6Je&d{n@}4;S~CP}3!=+KknKZZ2WCWB z&}ejz(d6yYgD-A7JfN+Zy@_6HxnI@%Ey!~%s%=5j;}7l6dmxP}R=C)kk5tkCtk3=Y z8Dx(OAkLdEf8YG|QG(lU`u=4mrZJFiw)d8*Rsh;3K%xSc`G=K76+{I|U=zSDqvudM zkXEHbFJhn|`-ofh!Eyp8)!+6`E4Q*ZafuCi^!>YrIijaDOuPrL3n9T~mm? zYD(^PnO;M*DJK_~-*Alkem2hh+^0-Qx%V#NmN!njo|^0C{4H^DOh9So%PkHbGdMQw zOg5bD8zoTsMcjA!6qYjlCZ-O?9zsN50r{>+#Am>RW00c0xNM*iF~;KbDo_j=eD{Vx z%BNaW2!jV=_$Z@wf!7Y!9ij0$)H!*3U6AFHwgSr0md3&MOUdc!=}NI-VI*e=ut7*Z zRj3hbLC-rBW3Q{Lo1P7!i8`!Uov9)cW#Ok}RjKE!+Q+p*KMWsDii}rHK(u+NW^IuDecE#p__=JmEUmOzFbd}(qyj-cx z^=30S5p*b^)Oj8q@Mr5*#Tl0AdLB?h0|B1;TE6zDk%ZfX$~ihTuy_)-wzi?h`A{(M zA^PEwrAlKzez0@j3gI9Ax|zHVN>IBxzTwQsY#LGP6SqA^QiF$bLN5wF2QV}>r#Lum8PXQ=i3kNq5mY?E_V#unTFs#? zzjMas%PT9lpt^!=><60UpBX`CxR24Lr7)ZE#=A?R&;^T#yf7#HN?vE=g5^*tHt;Yl z&+i|}o0A;rDPl-hxuY9e{uFfmNaZ*FdC{A(%wuy_giAzjO2jPVL{QNecSDFuWoA5c)eq}9I{6Ngjm)ja- zOO0I$hZK`bguo6ks!qJiLSc^&AoZ&K)u0*+z)uaMcYX+#cpg5y$w z1WpVX;ZJE696A|Br>bdCt6aFt&8<^_84#ip9G8Y&Hh|7C(9>7z(eX6WtBK>~xontX z-275n8c4w+Z=~q~8rj(FEE?qx5`*!80yG*xX-nZ3cNG_Z&PUoBlp`1-#b|C}aU0^G z9szi}Py(Mw&K_+Z?GruUnD66-y(2nZzm?G0sSI5p`sOIk_$dsx)mZ?>C8w$FKtk5H zvf`YuEkWQ{&9fNb1k=M6u0>lZ<=^)-e%;Eo>(16@T|V-@R$5j@sH)myo(Ix}F7PWT zkw(^W_OX%dngl3=qr(H|eg_qmNC+P?0BeG$?SLg-O>eD`sX}uQ@Zfa=CL!)V0xbrF zXduQd=6jhJDym?mT|7TS9!9wG*q=YlufxMP;&?p|-Ja+boWt#ECsz40HMP}_LFMZt zW)k(W)lMQ&p{m(2VQ|acH^)bDx+eVgT?xE+bMJe!8?8{sqw~;H4&#b zP$|1}bi<(0LIKj^?Wv!!ip7@Nke1INXd8<9x<`Y>NTuj?%Rzro0U7D(Q8LNoAnS{R z@{Kfb_jl)V5W0_?A81p8{w$uXGAc7|HVP@5&_GKkjOC>I8%8jG$WZ0f5dW1@H(A z2v}BDR?450XCi(hLho_IwQqZ6TvsT}84xYvR$$X9r-Gva>(k=!unBxr!j7k*s{p|J zQ4vNb4iS|XKs-Ibh$g>2V3ay8h5GnlfgDE%54Sy&8lSKI_69VzkW+x1X=swQLC_C^ z@(Lc5=g@JtBiU}bC}533d>ELgPC@I25R42+%K-|%w-!eeOS~IH;k9l@hFA!HL-+!u z*#Uq86Cl2Fy3*~X$_+SMr<}z2uSJM6%U1`;;Yf*`Vyj={O#*$6^4YUzh&)=`-mV18 zdlycOJmRNtSqq%)2Vq7N{3Eh!Eb^dPLDiqwnNR*%Xx3Bz@{)ukFb!W2P!|^$696q6 zKs|O@FfcHv2QkIBtgH-b6izD#RIS@Y7p}`GXKSgeX=o6zu&^ArLZPK<1ATb&0RS9i zum$80M76`iIt5lEI&fal3Qcm-iEaK6*8U(82Y^=L4-oDdB$ss|L@08PzZ}se|_cwH-I5BQc`*lauES`d3BXs zyVQ;h+A(LYUcKrZs5%88JWA&VqR-V_XI050g?m3UJUm?6*eC_*FaXY25CkR9!26)m zs+NrJInDrx5OOd!Imv|d|6^lgm$$agLBBzIFDm>)JmsVEm4E_xAbo=YNB<;w&_lUj8ADzv1Z-nj;A9%F4rCGsO%Q zc6|#Ab`cQ~Wa(~0BvhtcZ=NZ%g$mY|rOA(25$=6_Z1R?Q=#3r(0q!&}qYPTYJ4 zVe&FEfol0i0n@#CiOb8&q%z4$NE;H`Ep0ivH;^g^)I5Fb{r-E5p9>#dk6ijXxgcj} zuOo(}ibvETQb=5)NTWD)e{MKU|NH{yc4qcu4DBO3~ z$$&(c4Ue!_9W=ku+1&c}L?iD=1Hc1BoHv}+1knE?hxvi$ae!Iq&G?!!^r)ftgebng zu({TDc4vUBTo@KeIc9&UL5+O#QpM>LaxMPg8rBY6Ry8;&kszXXm z9SGR@Y_k_XVi$rwGbuTF0>ABbGRpUP6r;d<5H5)H0aI_HlbuKqs~*xZMJzRNoR*=* z58p5QpffrVaNsD^2Bb?0t&TvD0?ziELHj1(3bf<@hyBBcoLBPpy_E7HYo9+RztIg2xM(u?g&TIo5AqP$ob;=`FnmjdPyCKP-%Uv~f$x8WG64bfX4Z0~G6b-}C`3^` z6vK`Frp89-%_HZYOzdF&0h>kv9(c0y3ig41K<}*)m(I-#csvHmhtvcFx5eQKa3jfDd z(5oMg9-F$4?ePmPiR0DR*H?qIpK}NhcN=2X`gc7(czV2BE1Xss5R)KekBVKm{!kFYgB`&KsHJ^twyU6q}kn{PUlUEPIW=4W5}2 zy8vBGh)9_dm0Qv7nC0LG5^-6_gAUxQSszj=e#?uyAe>cK-vpB-wVNdrZpc*mOkW`r zokEEKitVw0MMebnoJ9ZxC@;8!>Yg6{sxpbkcd}F%0jYwZ4DQCgs524d^TmI!o7?)j zgbzCDAz^jE=G7v`}NMv_6kbWi{)jXgGTJiC5ahA*U8eq?9dHNY7 zLU3~ojOGIc0i#a2`MESK=(9U*&Y$-oZCL#A{0htea-r*jf_)})Xt)dsctH{GAXK4T z=e5UAazDZGM-ut=M7V;EOW+`MieqA8`eu1~c#1G&+g`w!1%1 z9&@!&gE&S`h0T+lu^E4lJ?6ru`rkzjd>U^LpsK$#uqSR(5wurCmju3l@rK7mfS^&4 z?i*dIUz?l7fiNY@#?y(p;h~&XC-6ZlRjhP#hBBT2WgYMIX+Ki&C7$WDv@57znmLyM z2!OuG|3xlc`D~flRQfUyFu+I4R`QR14upD z+HFsa!soxw66H!I-hvyq*~{g4pm>^bgc3T&{gm%?Sm}g{ZIC#8H0@)q!N6WxT>J z3a-kuuC1;aA@{scPAC#kqUP?8* zmPu^rD+_QU-NTBAYyQ>QlvPiw314{pv}tyVPYTp*E@PU>OEZWu$$oPDTwMFI8co&J zm|D+l=KEtgui`;M82d>gMn@`Bvx})wJRjAxd3j*0?k2-rDM;!kOY%C`_D&snR2pSb zyNkK(&p1*fH7N$<|GtCu+^PWsb?7RQnC-Uxn9W0gTrtTe0xNl=ca;b!Gh}3x!sw8* zaD8#VdHjCQ1GnECvnsY;zVD~DR>iZren(L_>h#Ow=>3>+rBZ*+;rsZ+Gu|K%Pg~*Y z@ihE>c5SY1>p5G=4n(E zLRr!|Zxm&3s+denDl31a6#CB>UZJ!lB3`fM$E~%sU_lX)tUuR0KbL@?=|eRvV%6ke zq8CBFQ78VbkK*K}#7rJiZmRgT_RajswovV$VI0^#qkW&LKJB`g3H&Q6|Hmn<*J13f ze@*s^c?FZTj7dhHh&i4X7T5l&Ml+cP#w8>vf6cSKBe8Fd?Ba-omP|@P=4Cp~|J>B& z)Qsh>LrtZ=mWH!WT|1>rBdJeck3{bM_v_CIDav2Dv)%23&Bwhzli4_tzVnw^{An^f zu43?CyY3Kqu{5N<(=&A>rtb$y&Q56zSHf%A^p}b0#k`*)D8T{Fk@tX7Lz=Dnr+_at|kPKE~pLq^}_ z3D~mAqe+M}pPMc|lA|JvxO{>R($u~xnQ=Sz5U0-@Wz^SiRx$_s%TWJ&PPR-zngC&& zbS3BZxoe|(8GQPZ%I*K=55j{KzSTRvu#}yA8~2DS*M^BgN|K4}B&vBza<*QKsrzZ5 zJ^O!9zoEI?`Jpr>51uc6zAOz|$*UXKr~9D_u1&Hm6wrM`+_ zN~z-X$F=@xNrorE&Aif)C+zg*4MzkEZkJDNgfBj0!t2nJ5emwZv z+ji+*x>nmaoTZ}m>#+3wCrh8eu;Okl@H_62%;b!=X^loY*Z=%OaYxQelvNk2SR>v& zw3oVXp#b}m{BK{-c54p446=#XsIQNzQ-}61h1>KqqBZ_K=>6C)G+_t)21%>c%n@U% zm6^zs2L5|e>O!d?LE#+P$^7oK(VcBQu=G#J3i#hE|cbvw~YpZ?d z-S_wtr)&vF*TVG6J1z!>lNB;D636>KNvy3yb(UR<#n0L zUYbC;LKSrD>>_SVa0jj8O2f%XTU{6&lAJFmXNuUnp6OpoNptTUsUq#a_;hPC!G65- zg(G5tjdJ^PnjiA07f!zQ4d**?+R@$dR3&Gvy;*BcLyKB}|5d+}NSW;GAfl}7_moAe z+T=A1u$Rlh_lo`dUJ*?Pfr~3w)s>uGqBL^z??-bnB$WSKeP>%14(ts1;fQATjFN=J z_!r1k>Hb}n1>#;jnGA*U)$)rO5?COMIrr}|m82~je*gNRmM-$;$)nC?Hbdlw!v5Wm zOmdsS;?E08eQhcMB*MFI;gYI#C#zt)u{Y!D?=KtC94@q5+t%F%c766FkQ75J1+57ccsrGLIxG)!?{kIFuNkR`ZUIUI28%FqMDqnt zZ6cU|@d)>}&Rxw?2|`FMw4`n(%S7nGkAeM+gEc`~O3JTh<8ZZw6HNbSK?y;+schP% z1!)c^^_L*TC2+%dNp>>SgVscS2ni6%U_fNL9y?6nWa9+Uh<3z%iv}_3Ulevg7~2q=(WE}o*Ls_TJJ8gos=mDWe|jyruJl6GQjnq z@{YG~R`E4(9Fr+ZLO7>=HmCZJj&{w{UmC*Br4R?EdBI4~O%J)ZeE2XY^dfn(ihpox zX@m~?31I0V%yn4M0GN!BNyLEO`3DsK7g15?5GOPAX+dc|jq(CBEKDZQAmTUl#*>Sn zZ3{hy2B2Cf7M9e6p}R=6NifhG*)Vxu8BQND@??gXQ&kwYLD~ATJr2@9cKODMLVADz zmZ58aje={=GF1tIa)pUXr$J%i;xdAR6&PZ-negN9R7oi*1|W4d4rBlQYECFOQ)Pz- z!}BVQ1cn0-dEr<#>Z{=Ap0K*EYg?{l;7-N3nKRT7bPfXr+F839=*tq&{eL2e*M|Ep&{*@ESQA@xBW6GxmrEvFI))v@erVWS6mcR2x#LA*Vxlt8wYY`XrZk`$CZ(oc&`SAVIUALh6jP8!RwHQq z^gdp`zUoy;X=n7Sn4Z>&$^L14zc;_{?)WM)`Bn3{#+$lumHh+~63gcm$m&AX_}!<-x&3y}i8?)O8@H zw1Fx9cp=v3`zTJIJ`I5CTNCZ85?Bi`!GR@?!@=sTq~L$+Bve~Al3NV|o8N^o0B6iou_ z@CFKHZk{>(=oeU?@7%fb)uuuS*j*n-e77Px0$QLuy{tUO(3=LKd;eXy`hV_Xa5qzh zmCH&YTDeFO&6SlA-&e1It*W(`$vw_pb}RRPqkFu)i$LF|yE4v!fzAh76dj{ERN%|P zMS(ABy6c6H&&`Z3nU>=;AXAzZMLX8+eK}p^QwT}3;DpER%dbkfGqBVW+dg<69U|=^ za6|Kd$jY)8p`pT z=8n?N=Y{RdRKk*fzGsq;g2aS@LR?KW!VX5q3={MNkX=EyxcVT)$cz`}yefE`;8go5 zrPo9ED+KIYi68+T4-x3s`qcK}|2^Lmw+yH~ggjKy{42InQ9^MLplQp|4>8>)zP^eo zT0w{-gYKOB(ZLqtKd&-SKGv-Mm>n+OH%c!=p<;-(3(7!3iveVUzWCVwSOg>^JKvh_ z?=)X>mOTQOFWaY|GMImoNYUn`sHB}yGAoGwJnpB=Vfg4-m`;v9lp_MVzvv`E$z*&7 z_sjVMIkdv`eH!3P$G3EX1kjIOcPMYW9I>Zsb#>=J`Bc36GPxVd=l%3~bK?N~g$w|2 z2?;06B<;-hg7W3X##EWfFr6)5?<}NzQi`5jmPrekFpwnn2s4++8n4C(z+tbRoFD=8 zSmm%MVJlCUlCz4V=amT_Na{w^C#Xgy*DUNc@QYLFgC#{QnC@B#4x3Rs(;YZIJ`PrG zk{CH0RaMn5UzTr6AseRr1}Mkp|Cw^U-}IN^Ok}DC^Gt8S6ADC{1O@UE7!a&Vpj`}Y zq&65zSYH@82ZLsf{d|AHYVMrrnV+|rWhO~svy6-k?oIFGaMyVmmt1qM z%u41_SiTBq2~DtAF*?sE^k5EBIYWgQ#1ip%G566wek-*UL)Wsig3&%I=enMYXzp-ML(Z|wBO^Av=;Dj1;8wB zbMHH*;{>x&pz*9v*z(RJO<8Pa+%BM^k_|y^6r+Fz5Ss<`LtpAtsAL?E$skrBXirt- z@gLum@~lPWGN~#Zlk6n7MMh0xW+6aBv`(-X(I7Yj{K4CYXjx#=szJs(z+HjtEb^~! zubC}(Yg(Om{e1jv$^D;+D9(4kL$e`|2B;U#c(<1Yd&`0jQddApCKs831-_38XdP60kdfM@KTN|^cjny1CE0ryE zJ7}`phmd^X%r&bZIn^334bzoL`J|Z8r>sv?Ow6?yxDH>7yHvh*8W|}zw2VZSBCV5D zT<_=4xnCyOsGI38^<8>2m_q}HK^P}ji{2Kb$&tFN~g zhSZeMFgr#E;{@y`RDW-%78YLMd>lYl%dwD8mYYZ>aig-Jz8o>TbASuZH>DwvDnBTr zfh4eWC8wl_C$=HeZsK@QuqpYlj;oi_r3YDnPQ{b+Gb)%Gi!wkA@&7LSjA9f_6L*im z94@G(t({y77B)sMuKPJZ;J}1H6Kxb+yX?~`e<4GJu7c!m4}-VLhe;2J-xR6<`(ao! z=)9FX?6GHo&;c;&!jyX5w{Vt~sd&%X!alH%Kyxg(ME&?4$T>Hk;6&0?LK0WK62yN& zg$Iu9pQbeYTuYxR*kV9*dVnaVJ5s|6M6O=N2P0tK$%V%uHqUN}qB3@mnOm41y~KKqBuS=D#;x-U8a`PW%g{63ro7lle(=k)eMcy=#b(x zOyEXht)@l=LuuZZB>ZlsD<^<`6FN#>pwR!fIsnLR6xIOXI+c*!6-W%m!6z8&6)3&W zrc=)=Ihz9g%~J;Y%dn<0iuDD-F5rb&h7saX=%OPX&Rpo904kp={G69}3mVqTU|2JR zNks5-v;N!NOx%DcYR>ba%9hFv_(OA%2z~j}Y65n>($e3Eq7SAcyh4o-um~VD2sQfq z_j4#>@al^+Z~5bz5TQo6IVhQ;L5OV^dE zs8r{JbdhZBYhR-BKn;^*!ac!61`LAQzB!viubXu^il~NbTL8U8aQ*veK=KPq#!J^^ z(XG?7xU7V*;YMu#e)i1&%b83u%BvB&8H<6Ng@Z^H!dFhiw}vx{*qS>#J8K7tko$Yp z_pX@rj45bM05;tEe}r|=lATxoc)1#JkAjs`5n2||#~6j&fb!bjc7h=k6twx+ga0O~ znB-B-JOMXe6*S2c73?n2=HqAoHjTdwZ3|@n5bS^kFd#}Qey0Uar6TlC;OzzfY9Eg* zsQFJF4}}+AC1#OR2rKg=j~@la54hn6SZ2hR1C%J-!dXv`9%lJWPReXd^0m~ZG5Nu& zVj_>uwsWCHkx0dw;Bmop1N#=tNN_ey!YefVLqdMooUqYXFJA{V;MOJC5{L0n^(N&b zm9j554mHr<|3_%L3+fX@fx*A;kIfTrIJSG9dEmV+k5#JSf zYq<;?i2#`3f{^g`TFVI_;JJY43eGe~H|EILC~f7IejE3H@Rz|zWT}SX!|DJE79?E3 zz$uRR44U}*hx@^YX4!K z&4h5?Wp`^Ip+6S@e#58Ftf2d+>?UsUcuPPx34g4eriW`2me1*EVql8J@I z5E2I%2m4sF_CQOj2j@1DVaJil4Wyk9a}bxnhY49fydU1hK#I6Aoty;Vl^J1x=%~PB zAr2nER}m4(#XkZ28p326*k>4FY%&lMm?3lmVKQT!wc!r380)&Q5I#^el2)g$i!Iyb zUe~@Unf#{h#XGpGv&j2xzI0aukUIhYT?h#b;G+;9h}*$N4;AlYB5>K_ zpq!Q}wvYiTx7BYf2mwAr(7;MZh(uh%8@X$+%@5G3)&(_tS)Y(Y)7~;|>;+D@K;XJJ z@t(na`3G1Vxq{q?we*dlnr{H6ynMw3`%kDzGpTS&lMRO8h$3uq@<-ijk=$%4FAa~dSIji4j(>h;!e}TbcsvNyeD1?dFWLB9JyCr z*UPd!*H2;eqyU8~A&Tzqi#mu7O#ZzuX7;`OQ#JlF*mqDU2*`HUs^Dz*TQDR^Vb~aa z3o0bkR4t-cL{uR`#?8CJKCq~Y46+U1l-3Jj$?CDnCG4NLyYY?M=5eV|h% zW@S{ncK>}n7T|z7uqUFYB=O+ASU4e4;fC+Z|GTs%>-`YGC<=wV-$s;$kx?QC3}N@; z`X`Mb1|SS`bhKexVcs7fW?8>{`7-e|=j+AvE>oZxpbDKU<0njb|5sQ{{w)uRdFQUI z0QK3h_m%i8MtE=R?Vdw0={xxoWC~QZ3JC~MS!iPqe+FR<^%5twJ%aGT|G`8t!=c-) zO>mF^_6WFytg(Ab1nUd<$knT!c{Hes4eH;Ei>JZL>D}^1hUtyWqeF+_?~&pkL1)B1 z`eUT>ccqkUihlQ6TqF*{MfqRse;<6Oo|9$C5E>7+&rvCgo$1+9H*9kw5PR6M zn_>z8z+X2-!eAPo2J}zy&)dp-VBGxMZ0~D)Vf-vWu0AcN9B3w@r+s_P)Tosu`QIeC zH{*6PV8W{bXpZ=4(-&b13Z^?d%8W`tH~{%jt7?ulR3Cq7>%zT z8KHw~L+2o>rtJ;9`z3bCo(@!vRtuQTggLp&sG{(eNtn+^gVFHqlG8Djvy+u+scmIh zoJh|i^2(^7q;?jhw7f6*h7phj@S`_}H=h2H^R@`)CO&5MudZsly!7Fo;}}&`a*69F zL`j^=utu>lP#`F4dwERjgz54C5CNHqeWUukh{-_Fo+MO4w4LkC*;tT; zQ=;W(vyfqEbb;=Dh<}}TucWfAqtpXBe&$Na1|LQ8QN$w$vMQ-~HM|1_3OVBELwXZX z=cx@Tkie5L zmQw?{hM6xCa6)0jTU7vq-Ucka0*xLOnw8xD<& zE9{?{sw9=m&og0ELX!%aa)fv|g@PF2f(`{FyK$)3jlEb1;81);5ajGfI}%?%a*l4M zE6hTZcy8E?U@O0yIWX8?Cqj|qS!ie|xTK18v;lHppun=t+sKS8y?@|$-6N6u;H;jP z*}IOj@+_K6l{30@7G}_e=3zkNC0?!;_cwn)H`6SRC!uf&sfuU^9%bt9{SGvqJ|e#| zNYdv0A9-WmA8u=ud`0$(o66$MG?d)mDn1(u zkgb`vKWELWaYWF#rMuR6^xV{zi#359+2X^4J>+d`<{L-%JTU&srOM^=<*QWT{JOVH zyQ`u^qVkHWd)(*7>^-6TFD2P%^ae{UqpPy~7=tNtho^Vz9cJI(ci4^7agQZ9z3p)y zg)ZxTdj+10NJ{wB@f#$K%f>a)7QG!!jI`Ou zRtaDI)2{rLbQ6@uY?Oo=v#Y#-d9Tmz@qhB^6W!O(nqX>`WIC}5pjSc%0~=m=YSo7G zMJO9*f$=!be)(}-AagKuj>!MgL3AF7QfKMPLzcpPB>X+++z01#_p|)c#?%|_Y9Jm? z+Qm#t^BqLNBTi(-iq{;^2J-gyn|Z7id8eD2il zH>;`sxL)pTrf^*boz2E!&^ZDKP@9wT;t@AwAaqh>yZ~L}w`#y8~JLDbIk^#(y82hp|9^L*>AyMfH z2qtFwP7)?#8p@Oq2m0v}m>VIf^W%%!ffCoUK1On}rb}AGrqo?y&N{I0nY-xAvr7pp z|M0N}^@tZn&$ESB4lsgTz8$K)o1e#UsDf1ccjXd)TuAMMHKswQ1cDj9DN#Hmb(@6hw9R>3n z7>%RW)s(&ENRU=@9--wP5CUqg!WR^EOqcmcR z7O6A;E)3OZ_~}|=iUYwC@lzWgK5-JDyS}V2CA28!FxOgEcGm!@Ig$ZLLQ+iBCFor< zP@ooB3Skqp*S2P7so`5K-;!HIV`fO?f^q1(6yMb6qQyT>t|x69M4#6xs1{4dHDei= z`SkTuHY0$7_BY@=qSQ1E9$_C$ld3S5NHB5U$yBKzKDHQgTlC3Ci5`e1r- z@U!i=R-UE&9YkM8Ha1X>MoTRPidlox$DzM|EgXls>CoSVR!sEb~y_(~yQo*(Br073MdoLztg}wbSUsQ^=NK&)S-{0eFqbq8cKkQ61 zuP9*fm|Q8c^VEkpinc?Jq^5+9|9d!k^umqJpG;8gb0szzj6CY=8#19ScOH=Y}p$T^8IbpW@~p}+wC%XplFN)T{VS-u4Ay^%K+`2FKPj6 zaZGsn+B$OZS|_}-8m;)D1+_=kx_!clwsBf3$1u7Mi#GWF@AqBmC`pzU~j zx@ObHM`sSXw5Z8{ok|KV4i$+e=d)diBO`kM*|@s6W5OG5)@}pUIC;LeqPCW?1irOU zmYIf7(K$`YM~%z9vKKh6q$)g)n%8y7uE>)VN8Hj3-(JVvjil5RMGydfNQOvd?d9z~ z*^ZD;6cZq^LoW7AQkL3is?z?Ry+WhSf${P1#)0M%&GcQx1I7&KvB`3XvTBBfjMcvf z(nBZ&6yivSZ4w7CzFJ>~oXgSC?^zkSN=5|;vCrk!a``hwJ%RfUi#YA#5|H9fSeVFz ztLfiq;cGu#(TrXzI?@Gg=d4yq{eJdZpZ&kbhriNodTWSbNINg=6@&^Q5cmJHBb9=a zPq=wbx{I|xj-98U@jTT(eG55RlKwUiT7pwlG&BGf^zrc_-NVo#Dv&P~4w7U1&|jZX z)NO7)@$&R^&fWF8E1375uuWu40DF#rkwHPrn&2y2(}yGn38DYr-8U7DWfxcU6-9xAy76YUvK+^UqPfT8(8;acCWsM{qJ@dV; zVxzO+1tW_}&GM7NGQws0ghjY9AS%~fgfED;qe`B$l{2sR+lt+l` zWe%xDn0UXkNz>mIx7+APs>T{_R?SXsMH+fPV8H^rcJaXuGJ?Kv3=(~qLNX{3eudQb z2bxDEJw7e{7oX>O3KHBDiSqfd#T!1ys?#%&Z(iiolna#gXXhe`IV$yH;!Mi2T*xL?T3u5?|(6L;E0kEAORzG&55U%Qi@0uRGTlbO@bKL|RTG=1pR zar3V&L(@a$`=LAOtk+k_pxISta2M%eyq+GaWvGQgq5+Zti@rqd`d}7X#6Od*w&;WZ2*9~y}o^3QqS+1^%JaCO&RUqT5^TuAS ztcM<8nnyTglr^i?di)sDbF9qTxAItHZC=v;O#)KSm3Zl{QGbgQzqP8xf5Wsc-yhEk z1245(CQ2(P!#N|>5%n@=)@ADVgOegcKTGVfzq#V?EqlD65>3T3|Ox(ZMb;H`>0r+v#c*Z9Ojg5_o zAhWW%nw+`-y5lkz?@Z$0tK**sQsqET3H{xgFh2S5%f!>D9b^k0Sl>u$7xb)No_8ne zbM&(GOnlYUmJ?bYn$N&9AT>9G?wh{j5Bg0+`OUX>Z4YpG6gj-R=AJg&WIFUHK@tm1 zF%K}8DnMc;@&`zcp>^npUeEBgRg%5GSm<6W`e_brHJ$6Ld3nrsJa-)f?<;sq2fcI# zqzywnb3E|!TVV#e18;xQ{Wm(Ix3^m|gS4axEi+$-xbgEf5Saynt3i-hGywz<9Ef8E zAl8ZYBVY}Jba4O(M5gn%u@5+4a-@1yM9|C15Ff?~NMc6NfFryPqNai_i=LA2Jts>5XXs{3 zk@=<<{(-!XAdsNV;gH{lgnVV5;dAFL#ElcU>A}m+MJPIwjy<3yy%SiW@c!SS!!_jW z(dI(WAY<4=ddlcpIr#B!zdAbEvvV5KHBVK(cb1Wjc@aD0vDELuMV&Zphe31)+_97m zwDD)Cc;ZCa`}gCCKko1LUJhAr5$h9@79c?c$?l*F zQBSKSmV;`Sr~sT9>O3>R>V!mQkau>bZj|hA?pMG4rLmI&idqxSkNO->v$J)J*au2F zu1fiEgA{_uG3%Tl_d6FWc9>^}(x-|(OZgTSZs)b!esfTe#>)YOH6qI-KCs}~!R}!E zBsa(fm%w%a5|O)2HX5PGjwC$byRIQ{9UNlFq5}a1cD2vOb+lK7oYUxmcr@4(Y&nKn zxJF-Mq3y}s(&i-2f8lL=C< zjks0NZfJ%w7`l#%Y#{>tRj_fTUcQ(o(%Yw1@^JhtD|A_ta)o%p0Td^Xd z9emBQiVDFFqw2mo#n|cJV93g!6O@(Jy)&%883CVo;^RLdSrXcXz*J%PjRaOJP$|Na zDBRSvV%e2In6_;56{Md#SJ{2U%OK3|v{angY1P7?{TEw*f7C1+?MnIRIJZtJE0VL* zPLrGS=VtybhnK1vZlV?0x%;SBE>BVpPr^?vYwbI;Vv#GV3ZNiM7Xc`c&}a&{4^p%P z2q>gC4Zht5JS(uqRRW`^yGJwsW(sQU7JA=Z5Z|Wx+vV^GCP`-ul0{`FC#-Zrz;{T41h9KR9(HRTuHD?8AqGwxSa(eg4#Y`krkQ%F!@ukipp7 zoHn#xsI7>9@}57_2_{7-sqw4@Bb2GRfOWbbnnZsVkEngBDk?M-;sW#e0?g4Tr-oHv zSfaLRPmCH9CS$+?;Ln@drv$Cv!ssmvl1S|c*oZu#OCCrbRC_g(wHpkp_7(?KJf}IS z-Q|GYK^-#m#5S zly9=ONYPL`s+;RD3*-h-1oFh2gNWRs;-u8|(6K&Y!&kr_SkxMS9xAruQclGsX-{*P zDond?c-gf&)$5aK>KE-0FscCoK+ebA+1V+-b-|w2fQ!Ue$n~%aBQbvip_V;>ym z)9_ey`O`sKb*8XVq#62%kaN@1#F-C=O^I(wXLO5RCgv)@jqH%1^e|FBn4!>7$Sy-; zm{`D!Ab(DP83(bDfZp~B)Sy5!*MdA4*`o7*!p$!$m7O%f#SQea9hW-aeP9K`cr{p} zNzwu}d3}6O8*95(qxlsd9<_aq>IAlJs`l_aiA zzrKEL)jR1O0NDFbAMwyY^rrZM23r9YIvBir0UnH?xq6V%o+vEOyy3~QX;!oMX5tX@ zgRPrxno%&Ig#Jeb zy+;0yHTc%Fr*VBp(LW%t0HSx#z`!yJ@v(yiO`L9uBpe6}=YGCe+I|4G80sAmx|$-1 zhWO#TdPiNKBPNLG48JC7#&G^$tKR|sUNdEKy}D}dP(fa5k0$-T+}M+yM(?v%+%ypo zSdA=P3`Sa@8JIzq-d5kT=w;QI)m*J0KJYRC@Fn&{eK42=mG%7B+b3k=AID?M&rZiKG8 zXPS!naIO`UHw~GoN?Z3kt>+}iq3WJhAhv8RKNuV|pu_!}Z2+lCFEsLm5*k`$Lq0zN z%wRds9jtfNGgA`-_)?#|5J>*LyHKr8H%CgmhQ?QZiM_med@LPaJ|Pb=E;0wg(MIEV`hJPA|2foZU2xz zTTS4OfD_4Uw2G5hbi?F_1~UbUWRVjCcdG6r6vOa+|MhWhmd_KnwY$jdvu@egFJ;IU zfikFYZ2?742rvWsTRkibNL(x&Yrl!`6?0!!)Jr%oEyj$VNSGb)s>a3!>W)95!>MoZ z^4hlJfT2Xt+JW_v-FF>i7n)lh%`VJ2udm#_FY&RI^yK-=$IayS9X?oF`G;4s%~PD` zu|V&GcR_ruhV^AU_vT|a7eT=L;Kn$E7q}+%R*Viv6$rBzCJyE3YM|kyf@X0@t_M~~ zRXy57ZuZovJEr3t6_FJ1P$sJTbLkMN%RPWx?cj{VPdmoE- z$LdKDg^?#R0Q0Ja869x05(7zmic(O2tU_L19#QTqI1iYHkSa}cOFVFrvO6qvT~{l7 zwVI|QG`yTPtmxRPcnL(Pt$SX38-??%JIyk&;55-PAy)A)Dky2qb~V~vaO8tppU||( z#0g?kPFV9mN>SZxqjoTo)|`L}x$Idlg|eXdAVpNQGV$y5ya)T*x5;^%FU?bTyu&55 z^Rvw(=-eRrCVVrPJ%}dPS5y#3`f`AB$1pg?{OlFHsTvI`CYi^A{QIZA?AR>D{WkyV z_DeRN@mh9A0jO(4r3~GY(l;aryN6PM}ax~)v zfr`VwJvk<3-fQpGj%W4&lO+x3z%wwnBM`RQG9c?#`bNd~KbxALIM~PneW-a3og0#y z+wx9l@@8?M4@CyBAoqkQX0hQCNolO*zCF>uDdy_Ims+0O6g^K-=`lx;=M7NkxbM=L7#f>Zt`HQlOSN1hm3fe({wiVTh%&_!% z_OHpoS3C79-k0$pGds<@(m;<08i_R7EC-Lk&yWj@NUyU%HOA=*SzJ23GAtLMa)thaxD z$$WwPTY=9eRj(a!yPxE@o%Ren&abLvZ>cRRip+;#C_yIN)MkqaTcCY)_szq4H*U-h z?<$ev-uCFQjK;vRd%@y(%oV7uDeWZm#KhX1R;L_Q8ElSn%BV&upMa{ zB>gNt)@=UDwS4w*jr3ymwMV@gQ?~M*jU=#SkeVR%oh+QuLB%4BUkDUX|2@1{PgZT| znUbFCK>XB}7-RX^*%x1@&A26cy6dZ`obz_w^x`8+8K`&i`~v|n1VnyNC3xYDd`#CW zRPSyT{Q03|e5%f(lp*y=Z$g-isdd@KG|;x3sV~#Kc>0Nqe$ForpWWy6(mNq5CICKR ztdaMM_oXT?;%OPLdtVvl-P!|+V$m!_cGFo$jpaH{P+uhAO{E(SL@wjc==r#WaMrk= z_%b}t(yfzBNLk-$V-QWQUNf_tO1&QYama?_(b>lfMKx%I zAU~b#?{St`yOOXRu~2P$BF(Osk$hUfiN*v%2ux-1@U2igqGi_JlG5g~NiDszwPKw* zz3kq@4O!I78faeJXlLs`9j>el-XQ5s?E#>3UgBAXJw)Xb^-dHr-`s79LQ1N89NlQCm zag?8o)K`_G^?M3<7E$K9s0FM54X^heN9T=Ki=ViMx}u3~5T5S-K$5EHv!i&11YS4Nsmign-M-DwV`3;ShBq^0}Q1vonU8B=Ri2W`nqT zXu;0?nLQu9i)Pj8W%WOn(D3MAp&1v5&5+Mc$MwG5%n9!t~rx-wB&zjnqqL-$VThUmru?*DL3n z6!Zdr{{d9esJnsgaVBv<;Fn>Q8RI^2@YKF#~7ln8+8Q z5DctHnx;0kEueGZdEK@4DoD29nxkAZs(WcxzOC%|u(%2g)O~ekgY@7Qvg#SP*A&gB zxx6y-X;5&;=-aD@Z$^T8GA41YRx+JTKIoO%FN7yHxf7XKL+6NU*A;fi^Us~cf)`T# z1hh4(9$WO@JRHU9C3+;i_?!v}Y&3<&KL~~l$pZDNv{$SxV83>KhcQ?{kF+=6q!Bcn z>#krf&dBMee&!5S!kxrbt8F{dci(V&@{4hiM?+Q%*?!W$&dX3&{_EoQlRS@p9ai4| zZPH_p$$RsP8F8-0avv7s8o1j8N(m2UL^|Bq@sdS&}6X$d?1qQ?y;{ON=cTDOh zly5M;SqYUY24ZpY%AOUzRKySFq_BEw73YinlG9Py9<}M9tF#opr`!H~Msfx+01>(~ zuJm#+mWM|Yamod*sE9$98*B22@`I>#pgTH+g5@A4G(y_T%FCP5dJc}?L|@X0abJYN z42Y}(R+yysWLoN{)JdYl;h&X>rsdV@`Ln_&(u<7`yNY>bZ~eOsz^Ykmd4bUZSttuT z`%+A;1Pw3_uq}PZZ=k8f8Y<)TD~^f5X3?5S=!%wsy(t6Q_hqWN60+Jz`Uaf3u!@=!E zTkm|)#-geE3u2B{w%g|`O$d`0-*uoSbT7IxBCBvO4 zemgqdh?Nq7<`e1U02t`%eqi9sgvs8mehYLR$so3})2Ow8{0qPJx#9k=5f<>YaEUD< z7jaelQ(33G!W?3*dR<*GDdmg(D_}Z8tmyd}ow`$BhfEwYZ;o#lh$VhtfBSO5E^DSQ zbS%m3M|3z4T?z$8dx;nWIBxlZ?$#a2i;XvxE8~;y;*r)#i z5o?jVwN$lR`->LE?`7gWBYF+SCl}4$i0t3mUsRla=|3&aT9~5drS;}o9ykt*EQ%lb zc1}a|1xhX+u>e?`fzM0mpKu-6dO7Us)pv$b|JgAM_G4{L*0PmngW5H!czabqqTy#>!$W)GjaaMkhx$0NT&^_rbo?*&z(ke|K%{u zW?64-Z4HNjW?NT?(x}_hckPN$gji~?1+~CGy_TRLM?Hbnrj&uMl0C#i4vfKrF6d8? z0dAnFF%-7r!kUc;~Y5gj~=&Ju{sVjGIs`*U3|;+QT?E2pNo8(K93}l zUI7pGGtm@ZSe)g%4&T@A1nng6><3y3*Dkkh1RTX+Q(|Zc*=aTSqD=5MhN7J8CiI2g z8+O6np&Nr2V7hn;f01NTqyY12;y}v&%?GE+w?vC4=f%XB8SE>&M^g1SCbFL$T>4Kz zCUwLQ&-p~%YblzrkdW6}F8~Cp1)o3n#Zey3u&dC-aiH}tAf<|OPL7M`@{%2=925j- zPTYy<|C+1)va#?ElS0#2DVh@VZu3XdhBzo!=yiK2`gqeSzZ29WXenrnZw4&o90chZ z_M1EOnf)ARQfL=xOuMpEVb;A{=H!kxL&b$@ZR?z6*VW5$^qR9iQ^OW7$K+8Gf-u zjSUx$fg8ldqrViT;mCIhR0WdM#9Hm?V{Q??BKAPs5sQ~7M}t(8d|qTNQO-6#xW!=d zYkaSat7p+nk5l}Acu&Ez>)?@wOP518fx;>|N=~&iB=^(|9uwZOmSJH*Y&d!+<1uT; z`^l|!N@2*J11c?@yVK_ zPIkkmEQ2q|#CA1wWIY!>D?X!%y`ya~q7@6l-H}@c*C4EU_;81aykP5;&ZqBh8yl9a zwRMl%cY3q%7m1P_(!E_g_$@dfTqiPefKhTDqRd;O3-F#9S*sOh71vkR)x0TQCWyQ3)9m+0?5WG}wT zt)QLVjJ&n>U!?Ad7hvU%oyDiVe+5{T+>H?vIMJH&Ht;Ml7rI4&w`>$3)E$z8Hh z#q;dTQP1~Y-pIGYdv}nI+gY`=ABuGb5AY2+PmJ%)ezJA(T0*E6LfnTvZqaY9_)L&v z=1(D10{9Q2n;%;6-D3}i=Z#voJ=jjH?9@a{d&B6KXgR)FFQ3lO&pzLH@a}6{3y{e7 zPwEGb4PhN4W9+Da+uy~qW^q8xqk~bhCxKoHM9sOs(mxUBvT_Hx4&IZla&02lwjLv} zBIh=mt(y>Xl>}lp$SX1}I-=1fDxDk5czyrBTIPk;G#_lUWNJv6Fbvi0F8{r`p})U~ z_43ZqzLI!#R-OK)+^ci^(aze&<;?fDN(&f`~a!3RU)Bo$)wx#D3jJJ;?s zS2v?jKkvMeFVDIjolZs!Sw|jVbQajOoJwM!T?oi}w?P+)=a>%=V^DQ(Q=&|vt6Tq+ zao>z zdAG}+RXOQ>7stY6171*>&`T@n^63o!Mc#@OkLvCA+Rj0q^8BghYtph#)&IV&-T2+i zg0#ozdbBq?t1{rNrd;-O+jcgPmhRWfj1G*6@2oOGY_NFw=kzZ~P2D+xJ(OOuY%)duCW%SFRSK*ZtiZjC*e`6jedwjLKr!uSSm$1a^cprt;L4n z$FG;bIjmFk2zruLpKUC5=3xU~|3CV%W}V^RCZH`NmB7@b!1o zKcQHKC#vA{c{ncT1q9Ym*2 zhzkw6%Z-dnUH*) z_j*crMgzw>h=){h(1NuA2bzn=lcinclqIU*fAdHV+LN#Pg$(%-s4J=Fid>uiccg7& zc@UnERdWE-SR-}kTlQm9;3tum3Tu!}%udA;$QO`Y`nM-lklIq}L53Go7lD`ms3T^}pMk7;6}#sAq!Hsw{SGhAf1(a2_@y+NTs z5@g-J7w?82wHx>=Sb4_n9lF8D?;qaFPa#%05Oxs9TC@zb0{5%nllVZ1AhRLK>{t9% z#D6n#-M)P_#%J$^+b6n4`f)C!D>B066%+`);t}&@jX3Zbp~c(b!lgV@?xX(o%D;JJ zyCsvySG~Z?rsPilPpcxb0b@j!Y0GvU97b05+6P^g>o2;?@Q~KqCl;{0}*e)%6^NwjZAq%O$>LXYE*oZwk=|W zP>9s`Zu0(z$Ho3zIbCV`0u$Sb`@MsM197Mg|LdgTe(WiLEtk>8iHROP4W*^ zb&zQdu*w-e_lbsNJ~aC(1XZU7usL^N)mpxi@CdtMm0^UPW;^YMs2q9`(Cl9e_vz~Aag?a6h@o#z{ zW5BvU5CkvoE$rcDWcDx=X2hDUI!tNe7~JWFMMW#R7XG;2j*jjD>zgz`!c>}`5=k*- zq)F?jbuLr^<*#3V4?ZcMu=y@zbTAfMCw~q@njt>BdG#cPu$Ewykbw`G3Tfllr>xFp z<)^EwtLui-_jh6CH>i|@f`iLI62puF`|onft6VVWgz#@3KBPliJOHLqFtpx=_H;C<)zy4zL4-Dkpxm+`wcvqpMf>S|Rn`Y3uZ`e*&XU|r`8I2WI?ch&*Zq_?l z1z{VN=3gWDP7;o8#2Ko)>@sLN{*NCMS_0ZtmtDKOTiZtQ78syI`T4`Z?FNWs7>W}7 z{Q2{+u;xcFbJye*78WjrTAc-OLZsSh+^dIFi~=LHg=7eU+}v3kQZ-*QxNK13xdRKD((mMl^+B!OBc)pXKKCQ&m3|fRxb5((NLs)6@gb}J8WTtD_ z*&ie&Edv$E%F1f5rY0*^&ieapWrI6^G`vSHD#0n7LgahoC|*HI37rmv!$6 z;Dim*r>@H1Hzzt;awZF`MJIm44}ZC*=iFwGNlsH54JdMRbD@6}789!^0SWUDZ~->D zhYm49uZz#8px^(^1=Z{t4h~}XkHWh;?9jv0V5U)i{$B7b!SzwPb$te%v1`FXaB1h| z;h|8VbzQqQI7tO;88Q?Qv(*e94`V_gsEz~E$LvCuZqpP+0~|X|P(orAQI{V(b|X`e zRZ$yV(@YH9z83|nd@{uke^ype;od0}ehcAs2?dpLd^54cCi1y=t6hT?$b9W?@Un3a zO*UN+V7`9+nv9eY+$1RV5uXWVm(LnbPIWtZd3kc54-ef8x&74cr9Vq~8-LJ!X-I&k z`}Zs`CLZ7Nd&v(Uu(!4=iNxPx0)@-nUA`Dj^yaGk??k~fT$pVw+2%fGL8hc5%vNK( z`pC1A-z(w5Rvo7Lun*L|>CfvImO;@AY+yaW12X*Q&s^668J#`deR);L+Hj4Jz*IFc z{S_;B_G3OPsCauJWu|Np5~7E|qpGS3&yF&_H$v>x*3&abQI9DVrMUapY4Nw%ss{Yf z`@+r1Ixn^)b>L7L?QMk@SFX@wmt%J2QV@tS3Z$GoNxZza->sBA?jbaT$6bzU9{C6j zzEOY|WLl*0^O2cs3nKt!m?oIjS&S7egUJAe3`Qg4yyY%@=Yl2ORUoEju;fr^uyA9` zq%0oe0yqD$QVx2#G2=h}l9nnz0N0R~35^Z%BgKeorlvFhUtegm8LNzWmK7lXCg$M) ztmx?Iu!=T-f_WLJ5@Zrz2LKDe!3ts(h`<=ca(!@cka=hSXC5hwWtg#a>N^a89x{+i z8O*7_uXv4srI=U_5DvaA6*#m*;HAj3(;$#ViFk7oRtJ_IqeWIi(qB09^=`iBue8k1 zhcL`B{NBA!U&o&`+dMTYV8aM93=)cL--9WI*q;w@f^OTf!}sZ`H3m{q2NKFa=Ua(| zCTcQbz*X%1q8=h|IWlMy9t*ZEXl~oAC+m`=H48t#Z4TXgzkK;ZY#0yX0Y>|M-qy1srD2*F@v zDe7WUoDBm!H{m&iyu40N0QQ959dmsUFj}Q`W z<1;fgZRZg*1E5RBF`QbD#j?9D3ap~(8<8`>&Wt#kA{0Gr?jINkLQ(>$83S=*rECxt zWx{5{^VAqE3Q`>YOq|Y|bdMeLi!$+G1g?%L8I?FN12HGwW^L4+J0fr7K^6fyy8^Th zio?vXDcOTrPsFnp_6KCZs;5OB!!XUQT8n(Ae&)P$pO(XT z0Kbi=LU6|pPK;z9R`Uh{2l*;S(%w-#3NM7+GhbGt<1sK%P*AXMYt$a86#f{ks+!zT9SnRhQd*E6N zw)to9&nSL~3uRzsK@QWzeK$Iq5t4q4sa{`NG->Xnw5Na0d+O?rEF??`}eO6mkfIV3O(#$p}Go8P^ zQyISfD76uqCEL;YJ-;VbXJu!TiHl?w!Dw$4(NKd`8vv;&#~UaG=t0aRnm!=S#8L~7 z5-TuqXu5bXxmBSCy5LH{VYhL~IhbBcPgg`%v>Yy|Zc`sDbK6maiA{Fd zlOxX$+SM#`S~jq=CKgh`OtaXiqoX4%E?$MKXuZO@MYBB#GEoICAp4TF1+cr2A@6?u z<_!&TSE;~U!yH`|P7HoQJ1SB6nLX9p>(^J%o%5!-S#bypyPZ2PU81G< zUB7<4>_q!}q~fEVgP;=Egq4D7{y7*39(5ChsEu*7r9+}+vI)v#xQ4|PE`Y*aK&Ov1*-9JRHzH%LpbA=Yq* z4h6Tklj$0%y_^(`$9lMM4jGS%%F_GpU%83jTo+u@hPKY{zEa3=*!!@#AqMLxiTGez zEau{xqLhPLGVjnIA&6O%Qd0Ko>fUO)1yw47)V)(P``Fppsk1e5PTa52)Y204Hg()X zJZ2X5q4&4f-QE3#B^rtqF^T9AUB*vVGDlK)GCf+49a~Goyd(oL}ZKuD7TpR{K-O-l!C9RnGl~d|dbyEmr zQk33elz=#cgU-1fJ=nxQ42=8=6FYB$c+`lb5>G5UNXhWy{Lwh+qL?kIDKuQ?p0u8AxdML4^WIG->YP-4JLX zM*GHhow#SelPLyk(W6GXXb*4)uPlx_eQ?<#5x;&DKJ`0*at4i0wlI}O-yh+7IO zKZqW8@35Y*j;=1|C1$Ghlbu(!7@II6hs*&YnF>4#)7~$@u`Lei0Z#Q68<-)~;LXEt z-n@)hv~L-|oiDwQI7gpjLFZ#8UJ*Q+_%IZz;q5lIwn)i749XAhfR`=VX*euJlB7ntgZQ>PoITs0sL zV}C6vFiM-;ZXK4ME6i5%SgBF9O&KE9_L4}wM>T*@7yK~8%6=pb{vt?XCG*=K!O+9UA7rIIPRyW+A zo8~f-&{o@kVjQ#3QslnF{uuS{6PXKeFjXj+=ib3)!DfcMkASU zZbS?<$0I_|{sHVpGPH0M_y0`fCQP*9=`vCLQ^Itm4CEV-(h-igozgO5;fXFJn2|{> z)1*62ac&3CF+ZQo8Mt=@RW}(;Q#e2N3hSkcLj$Mt!vMffB=4Ec}TM{ zREqpPWG@@-I_#K$43*&|9fdb3KME4G+x)TGpyFSFaZ_1C!%<7#VH}j?Y{Id93hLGA zpD%`qLTPYK44(6#>(^@_fhWGzuuG!H`9g2^xWy7j25eMW(2rw6X*Uh(JGN3$(|T_1 zEs8QbQCG*wi;IhoKHl@2k?2D(V^{a2k5BPL(Hk5Pdv$c!$wWuWulr%C+9E6K&kYzY zddpRzjBb4U)(-}e>ppz=Kq$mF?%z+#R`Z4L0A^-mX7Jl?F(f}Uu92pxc@kk?D z06usD7~Tsm$0->BpbYR~KNK0$4S+iytOyDr%@Zotpw6uON-@J)z#}GS| zB2ojhGCZN^IkV&IS&iMdnnFimWn&W{5v)3;*@l&qb2DUTWX>Hr!tA*9x3xNkhN6E^ zZY@U~BjXPtYZF3VLmWfNe;=NQCo3C88I$~)^5A1L+P!D}^5x4bUxo`J2aBY{xoqY4 z#}Tj^k)36EfD(KMxtxC}EgNlAUC$V;Aw=wjQl}oph8DM8?)Qnf^*T{EMFp8EE|(bP zutThS3S<)nC#3simN2N}E%9VU|5zyHjE!Ce7TolJhldA_p(MH>Lo&l-P#tD*17}S> zvaMw8!H=%<+tBaFPOJD*dvm3U{LdB0SBS?1GG@)$`{d;0qmHp_T&=V<%ACML%a)a* zJZ+!Y>Di%m;Q|Ka=3jjI@+HSw%X8<#0CAA=p}D#FM?~6>qRAFRsBd;(JBh<8h@`s8 zO10} {'Area (sqft)':>12} {'Predicted Price':>16}\")\n", "print(\"-\" * 44)\n", "for inst, price in zip(instances, batch[\"predictions\"]):\n", - " print(\n", - " f\"{inst['OverallQual']:>10} \"\n", - " f\"{inst['GrLivArea']:>12,} \"\n", - " f\"${price:>15,.0f}\"\n", - " )" + " print(f\"{inst['OverallQual']:>10} {inst['GrLivArea']:>12,} ${price:>15,.0f}\")" ] }, { @@ -210,25 +272,44 @@ "source": [ "## Price sensitivity analysis\n", "\n", - "Hold all features at their default values and vary `OverallQual` from 1\n", + "Hold all features at their default values and vary OverallQual from 1\n", "to 10 to visualise how quality drives price." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "e4a3468a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB4RElEQVR4nO3dd1hT1xsH8G/C3ggCAUVEcYCouPeqA3er1rpHtcvirrbaWq2T1rZWO9RqW/XnqKta9957oyIOUBQtU5Elm5zfH5RbI8OEAAnw/TwPj95zT27ee3MIeXPPkAkhBIiIiIiIiLQg13UARERERERU+jGxICIiIiIirTGxICIiIiIirTGxICIiIiIirTGxICIiIiIirTGxICIiIiIirTGxICIiIiIirTGxICIiIiIirTGxICIiIiIirTGxoHKnatWqGDlypLR9/PhxyGQyHD9+XGcxverVGEvK6tWrIZPJ8PDhwxJ/7vx89dVXkMlkug6DNCCTyfDVV19J2/rYrgqij+8RSqUS3t7emD9/vs5iAErfa0mlw6VLl9CyZUtYWFhAJpMhICAg37r79++HpaUlYmJiSi5AUhsTCypROX+Ucn5MTU1Rs2ZNjB07FlFRUboOTyN79+5V+fCkC0lJSZg1axa8vb1hYWEBe3t7+Pj4YMKECQgPD9dpbMB/H8hyfoyMjFCtWjUMHz4cDx480HV4eunZs2eYOnUqatWqBVNTU9jZ2cHX1xe7d+/WdWgl5syZM+jTpw+cnJxgYmKCqlWr4qOPPsLjx491HZpkw4YNWLx4cYk9359//onHjx9j7NixufaFhoZi7NixqFmzJszNzWFubg4vLy/4+fnhxo0baj/HyJEjYWlpWZRh5ykrKwsuLi6QyWTYt29fsT+fPpg/fz569+4NJyenXIn367z6Pvryz/nz53PVP3v2LFq3bg1zc3MoFAqMHz8eSUlJueqlpaXhs88+g4uLC8zMzNCsWTMcOnRIm9MslIyMDPTv3x+xsbH44YcfsHbtWri5uWHp0qVYvXp1rvpdu3aFh4cH/P39SzxWej1DXQdA5dOcOXPg7u6O1NRUnD59GsuWLcPevXsRGBgIc3PzEo2lbdu2SElJgbGxsUaP27t3L3755RedJRcZGRlo27Yt7ty5gxEjRmDcuHFISkrCrVu3sGHDBvTp0wcuLi46ie1V48ePR5MmTZCRkYGrV69ixYoV2LNnD27evPnaGGfMmIFp06aVUKS6dffuXXTs2BExMTF499130bhxY8TFxWH9+vXo1asXpkyZgm+//VbXYRarn376CRMmTEC1atUwbtw4ODs74/bt2/jtt9+wadMm7Nu3D82bNy/RmPJ6j9iwYQMCAwMxceLEEonh22+/xcCBA2FjY6NSvnv3bgwYMACGhoYYMmQI6tevD7lcjjt37mDbtm1YtmwZQkND4ebmViRxDBs2DAMHDoSJiUmhj3H06FFERESgatWqWL9+Pbp161YksemzGTNmQKFQoEGDBjhw4EChjpHzPvoyDw8Ple2AgAB07NgRnp6eWLRoEZ48eYLvvvsOwcHBuZK4kSNHYuvWrZg4cSJq1KiB1atXo3v37jh27Bhat25dqBgL4/79+3j06BFWrlyJ9957TypfunQpKlasmOfd+w8//BBTpkzB7NmzYWVlVWKxkhoEUQlatWqVACAuXbqkUj558mQBQGzYsCHfxyYlJRVJDG5ubmLEiBFaH8fPz08U16+QOjFu3rxZABDr16/PtS8lJUXEx8dr/Lw5r09oaKjGj83LsWPHBACxZcsWlfIff/xRABALFizI97FF9XqXFunp6cLb21uYm5uL8+fPq+zLzMwUAwYMEADExo0bSzSujIwMkZaWptFjAIhZs2ZJ2+q2q9OnTwu5XC7atGkjXrx4obIvJCREODk5CRcXF/H8+XON4tGUOr9/PXr0EG5ubsUaR46rV68KAOLw4cMq5SEhIcLCwkJ4enqK8PDwXI/LyMgQS5YsEWFhYQUeP+d3bcSIEcLCwqLoAs/H8OHDRcOGDcWSJUuEhYWF2r/rpfk9Iaftx8TE5Pr9eJ383kfz0q1bN+Hs7Kzy/r9y5UoBQBw4cEAqu3DhggAgvv32W6ksJSVFVK9eXbRo0ULt2IrCiRMn8jy/OnXqiHbt2uX5mKioKGFgYCB+//33EoiQNMGuUKQX3njjDQDZt/SB/27J379/H927d4eVlRWGDBkCILuv8eLFi1GnTh2YmprCyckJH374IZ4/f65yTCEE5s2bh8qVK8Pc3BwdOnTArVu3cj13fv2nL1y4gO7du6NChQqwsLBAvXr1sGTJEim+X375BQBUbkvnKOoY83L//n0AQKtWrXLtMzU1hbW1tbR948YNjBw5EtWqVYOpqSkUCgVGjRqFZ8+eqfVc+/btQ5s2bWBhYQErKyv06NFD7Tjz8urrnTOOIigoCIMHD0aFChWkb8zyG2Oxbt06NG3aFObm5qhQoQLatm2LgwcPah335cuXIZPJsGbNmlz7Dhw4AJlMJnVLSkxMxMSJE1G1alWYmJjA0dERnTt3xtWrVzW+Jn/99RcCAwMxbdo0NGvWTGWfgYEBfv31V9ja2kp3yKKiomBoaIjZs2fnOtbdu3chk8nw888/S2VxcXGYOHEiXF1dYWJiAg8PD3zzzTdQKpVSnYcPH0Imk+G7777D4sWLUb16dZiYmCAoKAjp6emYOXMmGjVqBBsbG1hYWKBNmzY4duyYxuean7lz50rX/tU7l9WrV8fChQsRHh6OFStWSOXt27dH+/btcx1r5MiRqFq1qkrZd999h5YtW8Le3h5mZmZo1KgRtm7d+tq4Xn2PaN++Pfbs2YNHjx5Jv/tVq1ZFUlISLCwsMGHChFzHePLkCQwMDArVfePvv/+GsbEx2rZtq1K+cOFCvHjxAqtWrYKzs3OuxxkaGmL8+PFwdXWVygp6b1XHq2MsevbsiWrVquVZt0WLFmjcuLFKWUpKCrZv346BAwfinXfeQUpKCnbs2JHrsUXxN2DHjh3o0aMHXFxcYGJigurVq2Pu3LnIyspS+3yLyqttsbASExORmZmZ576EhAQcOnQIQ4cOVXn/Hz58OCwtLbF582apbOvWrTAwMMAHH3wglZmammL06NE4d+7ca7sdBgcHo1+/flAoFDA1NUXlypUxcOBAxMfHS3XS0tIwadIkODg4wMrKCr1798aTJ09UuoKNHDkS7dq1AwD0798fMpkM7du3R9WqVXHr1i2cOHFC+h17+ffc0dER9erVy7PtkG6xKxTphZwPyfb29lJZZmYmfH190bp1a3z33XfSB40PP/wQq1evxrvvvovx48cjNDQUP//8M65du4YzZ87AyMgIADBz5kzMmzcP3bt3R/fu3XH16lV06dIF6enpr43n0KFD6NmzJ5ydnTFhwgQoFArcvn0bu3fvxoQJE/Dhhx8iPDwchw4dwtq1a3M9viRizOna8L///Q8zZswocIDzoUOH8ODBA7z77rtQKBS4desWVqxYgVu3buH8+fMFPnbt2rUYMWIEfH198c033yA5ORnLli1D69atce3atUL9wczr9Qay/7DUqFEDCxYsgBAi38fPnj0bX331FVq2bIk5c+bA2NgYFy5cwNGjR9GlSxet4m7cuDGqVauGzZs3Y8SIESr7Nm3ahAoVKsDX1xcA8NFHH2Hr1q0YO3YsvLy88OzZM5w+fRq3b99Gw4YNNbomu3btApD9ISAvNjY2ePPNN7FmzRqEhITAw8MD7dq1w+bNmzFr1qxccRoYGKB///4AgOTkZLRr1w7//PMPPvzwQ1SpUgVnz57F9OnTERERkWuswKpVq5CamooPPvgAJiYmsLOzQ0JCAn777TcMGjQI77//PhITE/H777/D19cXFy9ehI+Pj0bn+6rk5GQcOXIEbdq0gbu7e551BgwYgA8++AC7du3Cp59+qvFzLFmyBL1798aQIUOQnp6OjRs3on///ti9ezd69Oih9nG++OILxMfH48mTJ/jhhx8AAJaWlrC0tESfPn2wadMmLFq0CAYGBtJj/vzzTwghNPoQn+Ps2bPw9vaW3jdy7N69Gx4eHrkS0dfJ7721MAYMGIDhw4fj0qVLKt10Hj16hPPnz+fqurdz504kJSVh4MCBUCgUaN++PdavX4/BgwerHae676+rV6+GpaUlJk+eDEtLSxw9ehQzZ85EQkLCa7sUZmRkqHxILoidnR3k8uL/nvbdd99FUlISDAwM0KZNG3z77bcqidvNmzeRmZmZK5kzNjaGj48Prl27JpVdu3YNNWvWVElAAKBp06YAsrtUvZyQviw9PR2+vr5IS0vDuHHjoFAo8M8//2D37t2Ii4uTuuu99957WLduHQYPHoyWLVvi6NGjuX7PPvzwQ1SqVAkLFiyQuno5OTnhxYsXGDduHCwtLfHFF18AAJycnFQe26hRI/z9998aXEEqEbq9YULlTU6XiMOHD4uYmBjx+PFjsXHjRmFvby/MzMzEkydPhBDZt+QBiGnTpqk8/tSpU3l2/9m/f79KeXR0tDA2NhY9evQQSqVSqvf5558LACrdHHJuMx87dkwIkd3txN3dXbi5ueXqcvHysfLrClUcMeYlOTlZ1KpVSwAQbm5uYuTIkeL3338XUVFRedZ91Z9//ikAiJMnT0plr3ZZSUxMFLa2tuL9999XeWxkZKSwsbHJVf6qnGv7xx9/iJiYGBEeHi727NkjqlatKmQymdQlbtasWQKAGDRoUK5j5OzLERwcLORyuejTp4/IyspSqZtzHbWNe/r06cLIyEjExsZKZWlpacLW1laMGjVKKrOxsRF+fn4FHktdPj4+wsbGpsA6ixYtEgDEzp07hRBC/PrrrwKAuHnzpko9Ly8v8cYbb0jbc+fOFRYWFuLevXsq9aZNmyYMDAykrjKhoaECgLC2thbR0dEqdTMzM3N1iXr+/LlwcnJSuSZCFK4rVEBAgAAgJkyYUOA1qFevnrCzs5O227Vrl2d3iREjRuTqqvTq70FO97OXr5UQubtCvfoeIUT+XaEOHDggAIh9+/bliju/bh2vU7lyZdGvXz+Vsvj4eAFAvPXWW7nqP3/+XMTExEg/L593fu+tOfte1xXq1dcyPj5emJiYiE8++USl3sKFC4VMJhOPHj1SKe/Zs6do1aqVtL1ixQphaGiYq71p+zdAiLzf9z788ENhbm4uUlNTCzzPnNdcnR9Nuo4WpivUmTNnRL9+/cTvv/8uduzYIfz9/YW9vb0wNTUVV69elept2bIl13t6jv79+wuFQiFt16lTJ1e7F0KIW7duCQBi+fLl+cZz7dq113bNyvl9/vjjj1XKBw8enOv88+vqVVBXKCGEWLBggQCQ59880h12hSKd6NSpExwcHODq6oqBAwfC0tIS27dvR6VKlVTqjRkzRmV7y5YtsLGxQefOnfH06VPpp1GjRrC0tJS6ZRw+fBjp6ekYN26cyrfx6gy0vHbtGkJDQzFx4kTY2tqq7FNn2tOSiBEAzMzMcOHCBUydOhVA9rdzo0ePhrOzM8aNG4e0tDSVujlSU1Px9OlTaQBsQd12Dh06hLi4OAwaNEjlXAwMDNCsWTO1u8GMGjUKDg4OcHFxQY8ePfDixQusWbMm1zdrH3300WuP9ffff0OpVGLmzJm5viXMuY7axj1gwABkZGRg27ZtUtnBgwcRFxeHAQMGSGW2tra4cOFCkczAlZiY+NpBiDn7ExISAAB9+/aFoaEhNm3aJNUJDAxEUFCQSpxbtmxBmzZtUKFCBZXr0alTJ2RlZeHkyZMqz9OvXz84ODiolBkYGEiDl5VKJWJjY6VvRwvT9etViYmJKueYHysrK6mupl7+PXj+/Dni4+PRpk2bIok/R6dOneDi4oL169dLZYGBgbhx4waGDh1aqGM+e/YMFSpUUCnLaQN5zeLUvn17ODg4SD853TZf9up7a2FZW1ujW7du2Lx5s8pdxk2bNqF58+aoUqWKynkcOHAAgwYNksr69esHmUym0k2noDjVfX8FVF/vxMREPH36FG3atEFycjLu3LlT4HnVr18fhw4dUutHoVCod7EKqWXLlti6dStGjRqF3r17Y9q0adKd5unTp0v1UlJSACDPgfWmpqbS/py6+dV7+Vh5ybkjceDAASQnJ+dZZ+/evQCyB5y/rCgnO8j5nXj69GmRHZO0x65QpBO//PILatasCUNDQzg5OaFWrVq5PiQaGhqicuXKKmXBwcGIj4+Ho6NjnseNjo4GkH0bHgBq1Kihst/BwSHXH+hX5XTT8fb2Vv+ESjjGHDY2Nli4cCEWLlyIR48e4ciRI/juu+/w888/w8bGBvPmzQMAxMbGYvbs2di4caP0/DkKut0fHBwM4L8xEa969TZ6fmbOnIk2bdrAwMAAFStWhKenJwwNc7/95NcF5mX379+HXC6Hl5dXscVdv3591K5dG5s2bcLo0aMBZH9QqlixosoxFy5ciBEjRsDV1RWNGjVC9+7dMXz48Hz7nBfEysrqtX8gX/3wXbFiRXTs2BGbN2/G3LlzpTgNDQ3Rt29f6XHBwcG4ceNGrmQhx6ttIr/XYc2aNfj+++9x584dZGRkvLa+JnLO6XVJQ2JiYr6/W6+ze/duzJs3DwEBASqJd1GukyKXyzFkyBAsW7YMycnJMDc3x/r162Fqaip1TSsM8UrXwJzrldc0or/++isSExMRFRWVZzKT13urNgYMGIC///4b586dQ8uWLXH//n1cuXIlVxe7TZs2ISMjAw0aNEBISIhU3qxZM6xfvx5+fn6vjVPd91cAuHXrFmbMmIGjR49KiViO13VzqlChAjp16lRgHV3y8PDAm2++iW3btiErKwsGBgZSIvVy286RmpqqkmiZmZnlWy9nf37c3d0xefJkLFq0COvXr0ebNm3Qu3dvDB06VEo6Hj16BLlcjurVq6s8tlatWpqfbD5yfie4zpF+YWJBOtG0adNc31a/ysTEJFeyoVQq4ejoqPJt4Mvy++BUknQVo5ubG0aNGoU+ffqgWrVqWL9+vZRYvPPOOzh79iymTp0KHx8fWFpaQqlUomvXriqDd/M6FyB7vEJe38rllRzkpW7dumr9kS7oj5kmiiLuAQMGYP78+Xj69CmsrKywc+dODBo0SOWx77zzDtq0aYPt27fj4MGD+Pbbb/HNN99g27ZtGk+h6enpiYCAAISFhal8y/uynDUJXk6qBg4ciHfffRcBAQHw8fHB5s2b0bFjR1SsWFGqo1Qq0blz53zHJdSsWVNlO6/XYd26dRg5ciTeeustTJ06FY6OjtJg5JxkXBs1atSAoaFhgesupKWl4e7du1I/cCD7Q8WrH7oB5Bqge+rUKfTu3Rtt27bF0qVL4ezsDCMjI6xatQobNmzQOv6XDR8+HN9++y3+/vtvDBo0CBs2bEDPnj1zTRWrLnt7+1wDk21sbODs7IzAwMBc9XPGXOS3iF1e763a6NWrF8zNzbF582a0bNkSmzdvhlwuz5VI5bwn5jXhBAA8ePBAJSnX5m9AXFwc2rVrB2tra8yZMwfVq1eHqakprl69is8++6zA9z0gexxBbGxswSf+0nO+PJ6mpLi6uiI9PR0vXryAtbW1NIA/IiIiV92IiAiVqb2dnZ3xzz//5FkPwGunAf/+++8xcuRI7NixAwcPHsT48ePh7++P8+fPF2nSWpCc34mX3+tI95hYUKlSvXp1HD58GK1atSrwQ2jOwObg4GCVP1QxMTG5/kDn9RxAdveFgj4M5/ctSUnEWJAKFSqgevXq0geO58+f48iRI5g9ezZmzpwp1cv5Vr8gOdfC0dFRb769q169OpRKJYKCgvIdMFwUcQ8YMACzZ8/GX3/9BScnJyQkJGDgwIG56jk7O+Pjjz/Gxx9/jOjoaDRs2BDz58/XOLHo2bMn/vzzT2kw/qsSEhKwY8cO1K5dW2Xu+rfeegsffvih1B3q3r17Kt0jgOzrkZSUpNVruHXrVlSrVg3btm1TafuvDhwvLHNzc3Ts2BGHDx/Go0eP8lx3YfPmzUhLS1P5wFqhQoU8F1vMuSOY46+//oKpqSkOHDig0gVk1apVhYq3oG9Jvb290aBBA6xfvx6VK1dGWFgYfvrpp0I9DwDUrl1bmkHtZT169MBvv/2GixcvqiRbJc3CwgI9e/bEli1bsGjRImzatAlt2rRR+XAaGhqKs2fPYuzYsdIsQDmUSiWGDRuGDRs25Nn2X6bu++vx48fx7NkzbNu2TWU2rbyuY17Onj2LDh06qFU3NDS0yGZ90sSDBw9gamoqdYfz9vaGoaEhLl++jHfeeUeql56ejoCAAJUyHx8fHDt2DAkJCSp3cC9cuCDtf526deuibt26mDFjBs6ePYtWrVph+fLlmDdvHtzc3KBUKnH//n2VuxR3795V+/xedyciNDQUFStW1IsvFOk/HGNBpco777yDrKwsqdvHyzIzMxEXFwcgu5+zkZERfvrpJ5VvM9VZKbdhw4Zwd3fH4sWLpePlePlYFhYWAJCrTknECADXr1/Ps+vMo0ePEBQUJL2Z53yT9uq3uuo8j6+vL6ytrbFgwQKVri85YmJi1Iq1KL311luQy+WYM2dOrm8dc86xKOL29PRE3bp1sWnTJmzatAnOzs4qH1CysrJydadwdHSEi4uLSheDp0+f4s6dO/n2Rc7x9ttvw8vLC19//TUuX76ssk+pVGLMmDF4/vx5rg/ytra28PX1xebNm7Fx40YYGxvjrbfeUqnzzjvv4Ny5c3kuzBUXF5fv9JUvy6sdXbhwAefOnXvtY9U1Y8YMCCEwcuTIXH28Q0ND8emnn8LV1RXDhg2TyqtXr447d+6ovKbXr1/HmTNncsUvk8lU7mQ8fPiw0LPKWFhYFNidZtiwYTh48CAWL14Me3t7rRaBa9GiBQIDA3N1Xfn0009hbm6OUaNGISoqKtfj8rqTU1wGDBiA8PBw/Pbbb7h+/brKGB/gv7sVn376Kd5++22Vn3feeQft2rXL9y7Ey9R9f82rvaanp2Pp0qVqnY8+jbHI6/3q+vXr2LlzJ7p06SLd1bGxsUGnTp2wbt06lS6Fa9euRVJSkkpC/vbbbyMrK0tl6ua0tDSsWrUKzZo1y3dGKCD7S45X3zPq1q0LuVwutdGc9v7jjz+q1NNktXoLC4tcf19fduXKFbRo0ULt41HJ4B0LKlXatWuHDz/8EP7+/ggICECXLl1gZGSE4OBgbNmyBUuWLMHbb78NBwcHTJkyBf7+/ujZsye6d++Oa9euYd++fa+9bSqXy7Fs2TL06tULPj4+ePfdd+Hs7Iw7d+7g1q1b0oezRo0aAcgenObr6wsDAwMMHDiwRGIEsgcoz5o1C71790bz5s1haWmJBw8e4I8//kBaWpo0T7i1tTXatm2LhQsXIiMjA5UqVcLBgwfV+ubO2toay5Ytw7Bhw9CwYUMMHDgQDg4OCAsLw549e9CqVSuVtRJKgoeHB7744gvMnTsXbdq0Qd++fWFiYoJLly7BxcUF/v7+RRb3gAEDMHPmTGl+95e7ZSQmJqJy5cp4++23Ub9+fVhaWuLw4cO4dOkSvv/+e6nezz//jNmzZ+PYsWN5rreQw9jYGFu3bkXHjh3RunVrlZW3N2zYgKtXr+KTTz7J867JgAEDMHToUCxduhS+vr65Jh2YOnUqdu7ciZ49e2LkyJFo1KgRXrx4gZs3b2Lr1q14+PDha9tcz549sW3bNvTp0wc9evRAaGgoli9fDi8vrzz7+RdG69at8cMPP2DixImoV68eRo4cKf3urVy5EnK5HH///bfK+Y0aNQqLFi2Cr68vRo8ejejoaCxfvhx16tRR6Vffo0cPLFq0CF27dsXgwYMRHR2NX375BR4eHgV2v8pPo0aNsGnTJkyePBlNmjSBpaUlevXqJe0fPHgwPv30U2zfvh1jxozJNVWsJt58803MnTsXJ06ckKZTBrK7j23YsAGDBg1CrVq1pJW3hRAIDQ3Fhg0bIJfLNeqakpGRIXWhfJmdnR0+/vjjfB+Xs9bElClTYGBggH79+qnsX79+PXx8fPL9wNq7d2+MGzcOV69eLXCqZnXfX1u2bIkKFSpgxIgRGD9+PGQyGdauXat2slXUYyzWrl2LR48eSV8wnDx5UrrOw4YNK3Bl9AEDBsDMzAwtW7aEo6MjgoKCsGLFCpibm+Prr79WqTt//ny0bNkS7dq1wwcffIAnT57g+++/R5cuXdC1a1epXrNmzdC/f39Mnz4d0dHR8PDwwJo1a/Dw4UP8/vvvBZ7L0aNHMXbsWPTv3x81a9ZEZmYm1q5dq/K6+/j4YNCgQVi6dCni4+PRsmVLHDlyRGVszes0atQIy5Ytw7x58+Dh4QFHR0dpjFt0dDRu3LiRa1wO6QEdzERF5Vh+K2+/6nXTHq5YsUI0atRImJmZCSsrK1G3bl3x6aefqqw+m5WVJWbPni2cnZ2FmZmZaN++vQgMDFRrKkkhslcB7ty5s7CyshIWFhaiXr164qeffpL2Z2ZminHjxgkHBwchk8lyTT1blDHm5cGDB2LmzJmiefPmwtHRURgaGgoHBwfRo0cPcfToUZW6T548EX369BG2trbCxsZG9O/fX4SHh6s9LeixY8eEr6+vsLGxEaampqJ69epi5MiR4vLlywXGqO6KsTlTysbExOS771V//PGHaNCggTAxMREVKlQQ7dq1E4cOHSqSuHMEBwdLU0qePn1aZV9aWpqYOnWqqF+/vtRG6tevL5YuXZpn/K+2r/xER0eLyZMnCw8PD2FiYiJsbW1Fp06dpClm85KQkCDMzMwEALFu3bo86yQmJorp06cLDw8PYWxsLCpWrChatmwpvvvuO5Geni6E+G+62ZdX482hVCrFggULhJubmzAxMRENGjQQu3fvznNaV3XbVX5OnTol3nzzTVGxYkXpd8vR0VFERETkWX/dunWiWrVqwtjYWPj4+IgDBw7kGdfvv/8uatSoIUxMTETt2rXFqlWr8mxf6rxHJCUlicGDBwtbW1tpyudXde/eXQAQZ8+eVeu8C1KvXj0xevToPPeFhISIMWPGCA8PD2FqairMzMxE7dq1xUcffSQCAgJU6hb03pozxWteP9WrVxdCFPxaDhkyRAAQnTp1Uim/cuWKACC+/PLLfM/v4cOHAoCYNGnSa+MUQr331zNnzojmzZsLMzMz4eLiIj799FNpOmB1fx+LSrt27fK9tq+LZcmSJaJp06bCzs5OGBoaCmdnZzF06FARHBycZ/1Tp06Jli1bClNTU+Hg4CD8/PxEQkJCrnopKSliypQpQqFQCBMTE9GkSROxf//+157LgwcPxKhRo0T16tWFqampsLOzEx06dMi1MnxKSooYP368sLe3FxYWFqJXr17i8ePHak83GxkZKXr06CGsrKwEAJWpZ5ctWybMzc3zPC/SLZkQJXivlIiISENz587FzJkz8cUXX+T5bbq+6tOnD27evKnRt7T5Wbt2Lfz8/BAWFpbrjhRRaSKTyTBr1izprnphNGjQAO3bt5cWqCT9wTEWRESk17788kt89NFHmD9/vkqfcH0WERGBPXv2qIwH0caQIUNQpUqVPNekICpP9u/fj+Dg4FyTVJB+4B0LIiKiIhIaGoozZ87gt99+w6VLl3D//v1iH9xLVJoUxR0L0l+8Y0FERFRETpw4gWHDhiE0NBRr1qxhUkFE5QrvWBARERERkdZ4x4KIiIiIiLTGxIKIiIiIiLSm0wXy/P39sW3bNty5c0da/OWbb75RWf49hxAC3bt3x/79+7F9+3aVlWXDwsIwZswYHDt2DJaWlhgxYgT8/f1haKje6SmVSoSHh8PKyuq1S8gTEREREZUXQggkJibCxcVFZaHYvOg0sThx4gT8/PzQpEkTZGZm4vPPP0eXLl0QFBQECwsLlbqLFy/O80N/VlYWevToAYVCgbNnzyIiIgLDhw+HkZERFixYoFYc4eHhBS5fT0RERERUnj1+/BiVK1cusI5eDd6OiYmBo6MjTpw4gbZt20rlAQEB6NmzJy5fvgxnZ2eVOxb79u1Dz549ER4eDicnJwDA8uXL8dlnnyEmJgbGxsavfd74+HjY2tri8ePHsLa2LpZzo8JTKpWIiYmBg4PDazNlorywDZE22H5IG2w/pC1dt6GEhAS4uroiLi4ONjY2BdbV6R2LV8XHxwMA7OzspLLk5GQMHjwYv/zyS57T9p07dw5169aVkgoA8PX1xZgxY3Dr1i00aNAg12PS0tKQlpYmbScmJgIALC0tYWlpWWTnQ0VDqVQiJSUFlpaWfFOmQmEbIm2w/ZA22H5IW7puQ0qlEgDUGi6gN4mFUqnExIkT0apVK3h7e0vlkyZNQsuWLfHmm2/m+bjIyEiVpAKAtB0ZGZnnY/z9/TF79uxc5TExMUhNTS3sKVAxUSqViI+PhxCCb8pUKGxDpA22H9IG2w9pS9dtKOcLeHXoTWLh5+eHwMBAnD59WirbuXMnjh49imvXrhXpc02fPh2TJ0+WtnNu8Tg4OLArlB5SKpWQyWS8jUyFxjZE2mD7IW2w/ZC2dN2GTE1N1a6rF4nF2LFjsXv3bpw8eVJlUMjRo0dx//592NraqtTv168f2rRpg+PHj0OhUODixYsq+6OiogAg3xVPTUxMYGJikqtcLpfzl15PyWQyvj6kFbYh0gbbD2mD7Ye0pcs2pMlz6rSFCyEwduxYbN++HUePHoW7u7vK/mnTpuHGjRsICAiQfgDghx9+wKpVqwAALVq0wM2bNxEdHS097tChQ7C2toaXl1eJnQsRERERUXmm0zsWfn5+2LBhA3bs2AErKytpTISNjQ3MzMygUCjyvOtQpUoVKQnp0qULvLy8MGzYMCxcuBCRkZGYMWMG/Pz88rwrQURERERERU+ndyyWLVuG+Ph4tG/fHs7OztLPpk2b1D6GgYEBdu/eDQMDA7Ro0QJDhw7F8OHDMWfOnGKMnIiIiIiIXqbTOxaFWUIjr8e4ublh7969RRESEREREZWALKXAxdBYRCemwtHKFE3d7WAgf/2UpuVJllLgwoNnCHkSC48kAzSrVlGvr5FeDN4mIiIiovJjf2AEZu8KQkT8f9P8O9uYYlYvL3T1dtZhZPoj9zUK1ftrxOkJiIiIiKjE7A+MwJh1V1WSCgCIjE/FmHVXsT8wQkeR6Y/Seo14x4KIiIiISkSWUmD2riDk1Rk+p+yLvwNhb2mi111+ilOWUuCL7YH5XiMZgNm7gtDZS6F314iJBRERERGViIuhsbm+hX/Vs6R09F9+roQiKn0EgIj4VFwMjUWL6va6DkcFu0IRERERUYmITiw4qSD16eO15B0LIiIiIioRjlamatXr5q2As41ZMUejnyLiU7AvMPK19dS9liWJiQURERERlYim7nZwtjHNtzuUDIDCxhQ/D26od+MHSkqWUqD1N0cRGZ+a5ziLnGvU1N2upEN7LXaFIiIiIqISYSCX4f021fLcl5NGzOrlVW6TCiD7Gs3q5QXgv2uSQ9+vERMLIiIiIioRQggcuxud5z6FjSmWDW2ot2s0lKSu3s5YNrQhFDaq3Z30/RqxKxQRERERlYijd6JxKvgpAMDZ2gRf96uHuJQMrrydh67ezujspcCFB08R8iQGHpUduPI2EREREVF6phLz9tyWtj/v4YV2tRx1GJH+M5DL0LyaPapZZsHR0R5yPU4qAHaFIiIiIqISsObsQ4Q+fQEAaFK1AnrW08/uPFR4TCyIiIiIqFjFJKbhxyPBAACZDJjVqw5kMv3+9p00x8SCiIiIiIrV9wfvIjEtEwAwoLErvCvZ6DgiKg5MLIiIiIio2AT+E49Nlx8DACxNDPFJl1o6joiKCxMLIiIiIioWQgjM3nUL4t+V3sZ39ICDlYlug6Jiw8SCiIiIiIrFnpsRuPTwOQDAvaIFRrZ013FEVJyYWBARERFRkUtJz4L/3jvS9owenjA25EfPsoyvLhEREREVuRUnH+CfuBQAQNuaDnijNtesKOuYWBARERFRkQqPS8GyEyEAshd5m9nTk9PLlgNMLIiIiIioSH297w5SM5QAgOEt3ODhaKXjiKgkMLEgIiIioiJz+WEsdl4PBwBUMDfCxI41dRwRlRQmFkRERERUJJRKgdm7gqTtyV1qwcbcSIcRUUliYkFERERERWLr1Se4+U88AKC2wgqDmrjqOCIqSUwsiIiIiEhriakZWLj/rrQ9s6cXDA34UbM84atNRERERFr7+VgInialAQC61lGgpUdFHUdEJY2JBRERERFpJfTpC/xxOhQAYGwox+fdPXUcEekCEwsiIiIi0sr8PbeRkSUAAO+1dkcVe3MdR0S6wMSCiIiIiArtVHAMDt+OAgA4Wpng4w4eOo6IdIWJBREREREVSmaWEnNeml72s661YWliqMOISJeYWBARERFRoay/EIbg6CQAQH1XW/RpUEnHEZEuMbEgIiIiIo09f5GORYfuSduzenlBLpfpMCLSNSYWRERERKSxHw7fQ3xKBgCgb4NKaFilgo4jIl1jYkFEREREGrkTmYB15x8BAMyNDfBp19o6joj0ARMLIiIiIlKbEAJzdwdBmT27LD5uXx0KG1PdBkV6gYkFEREREantUFAUzoQ8AwBUrmCG99pU03FEpC+YWBARERGRWtIyszBvz21p+4vunjA1MtBhRKRPmFgQERERkVr+OP0QYbHJAIDm1ezQ1Vuh44hInzCxICIiIqLXik5Ixc9HgwEAchkws2cdyGScXpb+w8SCiIiIiF5r4YG7eJGeBQAY1LQKvFysdRwR6RsmFkRERERUoOuP47D1yhMAgLWpISZ3rqnjiEgfMbEgIiIionwJITB71y1pe0KnmrC3NNFhRKSvmFgQERERUb52Xg/H1bA4AEB1BwsMb+Gm24BIbzGxICIiIqI8Jadnwn/vHWn7y55eMDLgx0fKG1sGEREREeVp+fH7iExIBQC8UdsR7Ws56jgi0mdMLIiIiIgol8exyfj15AMAgKFchhk9PHUcEek7JhZERERElMvX++4gLVMJAHi3VVVUc7DUcUSk75hYEBEREZGKCw+eYc/NCACAvYUxxnWsoeOIqDRgYkFEREREkiylwOxdQdL2FN9asDY10mFEVFowsSAiIiIiyebLjxEUkQAA8HK2xjuNXXUcEZUWTCyIiIiICAAQn5KB7w7clbZn9fKCgVymw4ioNGFiQUREREQAgJ+OBOPZi3QAQI96zmhWzV7HEVFpwsSCiIiIiHA/Jgmrzz4EAJgYyjG9W23dBkSlDhMLIiIiIsL8PbeRqRQAgA/bVUflCuY6johKG50mFv7+/mjSpAmsrKzg6OiIt956C3fv/tevLzY2FuPGjUOtWrVgZmaGKlWqYPz48YiPj1c5TlhYGHr06AFzc3M4Ojpi6tSpyMzMLOnTISIiIiqVjt2NxtE70QAAhbUpPmpXTccRUWmk08TixIkT8PPzw/nz53Ho0CFkZGSgS5cuePHiBQAgPDwc4eHh+O677xAYGIjVq1dj//79GD16tHSMrKws9OjRA+np6Th79izWrFmD1atXY+bMmbo6LSIiIqJSIyNLibm7/5tednr32jA3NtRhRFRayYQQQtdB5IiJiYGjoyNOnDiBtm3b5llny5YtGDp0KF68eAFDQ0Ps27cPPXv2RHh4OJycnAAAy5cvx2effYaYmBgYGxu/9nkTEhJgY2OD+Ph4WFtbF+k5kfaUSiWio6Ph6OgIuZy990hzbEOkDbYf0kZpaD+/nw6VEotGbhWw9aMWkMk4E5S+0HUb0uRzsl618JwuTnZ2dgXWsba2hqFhdiZ97tw51K1bV0oqAMDX1xcJCQm4detW8QZMREREVIo9S0rD4sP3pO1ZvbyYVFCh6c19LqVSiYkTJ6JVq1bw9vbOs87Tp08xd+5cfPDBB1JZZGSkSlIBQNqOjIzM8zhpaWlIS0uTthMSEqQYlEqlVudBRU+pVEIIwdeGCo1tiLTB9kPa0Pf2893Bu0hMzR6X+najSvB2sdbbWMsrXbchTZ5XbxILPz8/BAYG4vTp03nuT0hIQI8ePeDl5YWvvvpKq+fy9/fH7Nmzc5XHxMQgNTVVq2NT0VMqlYiPj4cQQm9vI5N+YxsibbD9kDb0uf0ExyRj06XHAABzIznebWiP6OhoHUdFr9J1G0pMTFS7rl4kFmPHjsXu3btx8uRJVK5cOdf+xMREdO3aFVZWVti+fTuMjIykfQqFAhcvXlSpHxUVJe3Ly/Tp0zF58mRpOyEhAa6urnBwcOAYCz2kVCohk8ng4OCgd2/KVDqwDZE22H5IG/rafoQQmLjjIv6dXRbjOtaAp3sl3QZFedJ1GzI1NVW7rk4TCyEExo0bh+3bt+P48eNwd3fPVSchIQG+vr4wMTHBzp07c51cixYtMH/+fGlQCwAcOnQI1tbW8PLyyvN5TUxMYGJikqtcLpfr1S89/Ucmk/H1Ia2wDZE22H5IG/rYfvbdjMD50FgAgJu9OUa1dter+EiVLtuQJs+p08TCz88PGzZswI4dO2BlZSWNibCxsYGZmRkSEhLQpUsXJCcnY926dUhISJDGQzg4OMDAwABdunSBl5cXhg0bhoULFyIyMhIzZsyAn59fnskDERERUXmWmpGF+XtvS9tfdPeEiaGBDiOiskKnicWyZcsAAO3bt1cpX7VqFUaOHImrV6/iwoULAAAPDw+VOqGhoahatSoMDAywe/dujBkzBi1atICFhQVGjBiBOXPmlMg5EBEREZUmv516gCfPUwAArT0qorOX02seQaQenXeFKkj79u1fWwcA3NzcsHfv3qIKi4iIiKhMioxPxS/H7gMADOQyfNmT08tS0WFnOiIiIqJyYuH+O0jJyAIADG1WBbUUVjqOiMoSJhZERERE5cDVsOfYdu0fAICtuREmda6p44iorGFiQURERFTGKZUCs3cFSduTO9eErbmxDiOisoiJBREREVEZt/3aP7j+OA4AUNPJEoObVtFtQFQmMbEgIiIiKsOS0jLxzf470vbMnnVgaMCPgFT02KqIiIiIyrClx0IQnZgGAOjs5YTWNSrqOCIqq5hYEBEREZVRYc+S8dvpUACAsYEcX3T31HFEVJYxsSAiIiIqoxbsvY30TCUAYFRrd1StaKHjiKgsY2JBREREVAadDXmK/bciAQAOViYY+4aHjiOiso6JBREREVEZk5mlxJzd/00vO9W3FixNDHUYEZUHTCyIiIiIypg/Lz3GnchEAEDdSjZ4u2FlHUdE5YFGqatSqcSJEydw6tQpPHr0CMnJyXBwcECDBg3QqVMnuLq6FlecRERERKSG+OQMLDp4V9r+qrcX5HKZDiOi8kKtOxYpKSmYN28eXF1d0b17d+zbtw9xcXEwMDBASEgIZs2aBXd3d3Tv3h3nz58v7piJiIiIKB+Lj9zD8+QMAMCbPi5o5Gan44iovFDrjkXNmjXRokULrFy5Ep07d4aRkVGuOo8ePcKGDRswcOBAfPHFF3j//feLPFgiIiIiyl9wVCL+d+4RAMDMyADTutXWcURUnqiVWBw8eBCengXPe+zm5obp06djypQpCAsLK5LgiIiIiEg9QgjM2R2ELKUAAIxpXx3ONmY6jorKE7W6Qr2cVGRkZORb7+nTpzAyMkL16tW1j4yIiIiI1Hb0TjROBT8FAFSyNcMHbavpOCIqbzSeFWrgwIEQQuQqj4qKQvv27YsiJiIiIiLSQHqmEnNfml52evfaMDUy0GFEVB5pnFiEhYXhvffeUymLjIxE+/btUbs2+/ERERERlbTVZ0Px8FkyAKBpVTv0qOus44ioPNI4sdi7dy/Onj2LyZMnAwDCw8PRrl071K1bF5s3by7yAImIiIgofzGJafjpSAgAQCYDZvbygkzG6WWp5Gm8BKODgwMOHjyI1q1bAwB2796Nhg0bYv369ZDLud4eERERUUn6/uBdJKZlAgAGNnGFdyUbHUdE5VWh1nZ3dXXFoUOH0KZNG3Tu3Blr165lZkxERERUwgL/icemy48BAFYmhvikSy0dR0TlmVqJRYUKFfJMHJKTk7Fr1y7Y29tLZbGxsUUXHRERERHlSQiB2btuIWdOnQmdaqCipYlug6JyTa3EYvHixcUcBhERERFpYveNCFx6+BwAUK2iBYa3qKrbgKjcUyuxGDFiRHHHQURERERqSknPgv/e29L2jJ6eMDbkWFfSLa1boFKpRGhoKLKysooiHiIiIiJ6jRUnHyA8PhUA0K6mAzrUctRxREQaJhbbt29XmVL2wYMHqFatGqpXrw4XFxdcunSpyAMkIiIiov+Ex6Vg2Yns6WUN5TJ82dOTk+iQXtAosfj222+RkZEhbX/55Zfw9PTEjRs30Lt3b3zyySdFHiARERER/efrfXeQmqEEAAxvURUejlY6jogom1pjLMLCwiCEQEhICCpUqCBt79+/HytWrIC1tTU++OADdOrUCWFhYQCAKlWqFGvgREREROXNpYex2Hk9HABgZ2GMCR1r6Dgiov+olVisWrUKQPb0svv27cOlS5fw5MkTpKWl4datWwgMDIRSqURKSgpWr14NAJg5c2axBU1ERERU3iiV2dPL5vikS03YmBvpMCIiVWolFrNmzQIAac2KWbNmYerUqWjbtq2UQISGhuKPP/5gQkFERERUDLZeeYLAfxIAALUVVhjYhL1DSL9otPL2Z599hsGDB+Prr7+GXC7HoUOHpH07duxA69atizxAIiIiovIuMTUDCw/clbZn9vKCgZwDtkm/aJRY9O/fHz4+Prhx4wYaNWqEqlWrSvs8PT3RrVu3oo6PiIiIqNz7+VgInialAQC6eSvQsnpFHUdElJtGiQUA1KhRAzVq5B4o5OvrWyQBEREREdF/Qp++wB+nQwEAxoZyfN7dU8cREeVNrelmc2Z6Utc///xTqGCIiIiISNX8PbeRkSUAAB+0qQZXO3MdR0SUN7USiyZNmuDDDz8scAG8+Ph4rFy5Et7e3vjrr7+KLEAiIiKi8urkvRgcvh0FAHCyNsGY9tV1HBFR/tTqChUUFIT58+ejc+fOMDU1RaNGjeDi4gJTU1M8f/4cQUFBuHXrFho2bIiFCxeie/fuxR03ERERUZmWmaXE3N1B0va0brVhYaJxL3aiEqPWHQt7e3ssWrQIERER+Pnnn1GjRg08ffoUwcHBAIAhQ4bgypUrOHfuHJMKIiIioiKw/kIYgqOTAAA+rrZ4s34lHUdEVDCN0l4zMzO8/fbbePvtt4srHiIiIqJy7/mLdCw6dE/antXLC3JOL0t6Tq07FkRERERUcn44fA/xKRkAgL4NK6FBlQo6jojo9ZhYEBEREemRO5EJWHf+EQDA3NgAn3WtreOIiNTDxIKIiIhITwghMGdXEJTZs8vCr4MHnKxNdRsUkZqYWBARERHpiYNBUTh7/xkAwNXODKNbu+s4IiL1MbEgIiIi0gNpmVmYv+e2tP1Fdy+YGhnoMCIizRQqsVi7di1atWoFFxcXPHqU3Qdw8eLF2LFjR5EGR0RERFRe/HH6IcJikwEALarZw7eOk44jItKMxonFsmXLMHnyZHTv3h1xcXHIysoCANja2mLx4sVFHR8RERFRmRedkIqfj2avDyaXATN7eUEm4/SyVLponFj89NNPWLlyJb744gsYGPx3e65x48a4efNmkQZHREREVB4sPHAXL9Kzv6wd3KwKPJ2tdRwRkeY0TixCQ0PRoEGDXOUmJiZ48eJFkQRFREREVF5cfxyHrVeeAACsTQ0xuXMtHUdEVDgaJxbu7u4ICAjIVb5//354enoWRUxERERE5YIQArN33ZK2J3WuCTsLYx1GRFR4hpo+YPLkyfDz80NqaiqEELh48SL+/PNP+Pv747fffiuOGImIiIjKpJ3Xw3E1LA4A4OFoiaHN3XQbEJEWNE4s3nvvPZiZmWHGjBlITk7G4MGD4eLigiVLlmDgwIHFESMRERFRmZOcngn/vXek7S97esHIgCsBUOmlcWIBAEOGDMGQIUOQnJyMpKQkODo6FnVcRERERGXa8uP3EZmQCgDoWNsR7Wo66DgiIu1onFiEhoYiMzMTNWrUgLm5OczNzQEAwcHBMDIyQtWqVYs6RiIiIqIy5XFsMn49+QAAYGQgwxc9OE6VSj+N77eNHDkSZ8+ezVV+4cIFjBw5sihiIiIiIirTvt53B2mZSgDAu63cUc3BUscREWlP48Ti2rVraNWqVa7y5s2b5zlbFBERERH95/yDZ9hzMwIAUNHSGGPf8NBxRERFQ+PEQiaTITExMVd5fHy8tAo3EREREeWWpRSYsytI2p7qWwvWpkY6jIio6GicWLRt2xb+/v4qSURWVhb8/f3RunVrjY7l7++PJk2awMrKCo6Ojnjrrbdw9+5dlTqpqanw8/ODvb09LC0t0a9fP0RFRanUCQsLQ48ePWBubg5HR0dMnToVmZmZmp4aERERUbHafPkxgiISAADelazxdiNXHUdEVHQ0Hrz9zTffoG3btqhVqxbatGkDADh16hQSEhJw9OhRjY514sQJ+Pn5oUmTJsjMzMTnn3+OLl26ICgoCBYWFgCASZMmYc+ePdiyZQtsbGwwduxY9O3bF2fOnAGQndT06NEDCoUCZ8+eRUREBIYPHw4jIyMsWLBA09MjIiIiKhbxKRn47sB/X6DO7FkHBnKZDiMiKloyIYTQ9EHh4eH4+eefcf36dZiZmaFevXoYO3Ys7OzstAomJiYGjo6OOHHiBNq2bYv4+Hg4ODhgw4YNePvttwEAd+7cgaenJ86dO4fmzZtj37596NmzJ8LDw+Hk5AQAWL58OT777DPExMTA2Pj1q1cmJCTAxsYG8fHxsLa21uocqOgplUpER0fD0dERcjnn9ybNsQ2RNth+SBsvt58Fe+/gt9OhAICe9Zzx8+CGOo6OSgNdvwdp8jm5UOtYuLi4FMvdgPj4eACQEpQrV64gIyMDnTp1kurUrl0bVapUkRKLc+fOoW7dulJSAQC+vr4YM2YMbt26hQYNGuR6nrS0NKSlpUnbCQnZtySVSiWUSmWRnxdpR6lUQgjB14YKjW2ItMH2Q9rIaT8hUYlYffYhAMDEUI7PutZimyK16Po9SJPnVSuxuHHjBry9vSGXy3Hjxo0C69arV0/tJ3+ZUqnExIkT0apVK3h7ewMAIiMjYWxsDFtbW5W6Tk5OiIyMlOq8nFTk7M/Zlxd/f3/Mnj07V3lMTAxSU1MLFT8VH6VSifj4eAgh+G0hFQrbEGmD7YcKK0spcO1JAh7HJGDXvSRkKrM7iQxt5ASj9ERER+eeDIfoVbp+D8pr0qb8qJVY+Pj4IDIyEo6OjvDx8YFMJkNePahkMlmhZ4by8/NDYGAgTp8+XajHa2L69OmYPHmytJ2QkABXV1c4ODiwK5QeUiqVkMlkcHBw4B91KhS2IdIG2w8Vxv7ASMzZfVtaWTuHrbkRJnerCzNjAx1FRqWNrt+DTE1N1a6rVmIRGhoKBwcH6f9FbezYsdi9ezdOnjyJypUrS+UKhQLp6emIi4tTuWsRFRUFhUIh1bl48aLK8XJmjcqp8yoTExOYmJjkKpfL5fyjoadkMhlfH9IK2xBpg+2HNLE/MAJ+G64hr0GscckZOBXyFF29nUs8Liq9dPkepMlzqlXTzc0NMpkMGRkZmD17NpRKJdzc3PL80YQQAmPHjsX27dtx9OhRuLu7q+xv1KgRjIyMcOTIEans7t27CAsLQ4sWLQAALVq0wM2bNxEdHS3VOXToEKytreHl5aVRPERERETayFIKzN4VlGdSAQAyALN3BSFLqfHcOUR6T6O0x8jICH/99VeRPbmfnx/WrVuHDRs2wMrKCpGRkYiMjERKSgoAwMbGBqNHj8bkyZNx7NgxXLlyBe+++y5atGiB5s2bAwC6dOkCLy8vDBs2DNevX8eBAwcwY8YM+Pn55XlXgoiIiKi4XAyNRUR8/uM1BYCI+FRcDI0tuaCISojG91Peeust/P3330Xy5MuWLUN8fDzat28PZ2dn6WfTpk1SnR9++AE9e/ZEv3790LZtWygUCmzbtk3ab2BggN27d8PAwAAtWrTA0KFDMXz4cMyZM6dIYiQiIiJSV3SiepPAqFuPqDTReLrZGjVqYM6cOThz5gwaNWokLWSXY/z48WofS50lNExNTfHLL7/gl19+ybeOm5sb9u7dq/bzEhERERUHdZe7c7RSf0AsUWmhcWLx+++/w9bWFleuXMGVK1dU9slkMo0SCyIiIqKy4lBQFL7YfrPAOjIAChtTNHXXblFhIn2kcWJRHLNCEREREZVWGVlKfHfgLn49+aDAejl3M2b18oKBXN17G0Slh0aJxfnz57Fr1y6kp6ejY8eO6Nq1a3HFRURERKT3IuJTMHbDNVx59Fwq615XgU6eTvj2wF2VgdwKG1PM6uXFqWapzFI7sdi6dSsGDBgAMzMzGBkZYdGiRfjmm28wZcqU4oyPiIiISC+duBeDSZsCEPsiHQBgZCDDF909MaJlVchkMrzpUwkXHjxFyJMYeFR2QLNqFXmngso0tWeF8vf3x/vvv4/4+Hg8f/4c8+bNw4IFC4ozNiIiIiK9k6UU+P7gXYxcdVFKKirZmmHLRy0xspU7ZLLs5MFALkPzavboUtsOzavZM6mgMk/txOLu3buYMmUKDAyyl6D/5JNPkJiYqLIwHREREVFZFp2YiqG/XcBPR0OQM7llx9qO2DO+NXxcbXUaG5Guqd0VKjk5GdbW1tK2sbExTE1NkZSUBEdHx2IJjoiIiEhfnL3/FOP/DMDTpDQA2XckPvWthffbVIOcdyOINBu8/dtvv8HS0lLazszMxOrVq1GxYkWpjNPNEhERUVmiVAosPR6CRYfuQfnvXQonaxP8PLghmlTltLFEOdROLKpUqYKVK1eqlCkUCqxdu1ba5joWREREVJbEvkjHpE0BOHEvRiprU6MiFg/wgb2liQ4jI9I/aicWDx8+LMYwiIiIiPTLlUexGLvhmjRlrFwGTOxUE34dPDgQmygPGi+QR0RERFSWCSHw26lQfLP/DjL/7ftU0dIEPw70QUuPiq95NFH5xcSCiIiI6F/xyRmYsvU6DgVFSWXN3O3w06AGcLQ21WFkRPqPiQURERERgBtP4vDx+qt48jxFKvPrUB2TOtWEoYHaM/QTlVtMLIiIiKhcE0Jg7flHmLf7NtKzlAAAW3Mj/DDABx1qcUp9InUxsSAiIqJyKzE1A9O23cSeGxFSWcMqtvh5cEO42JrpMDKi0ketxCIhIUHtA768iB4RERGRvgoKT4DfhqsIffpCKnu/jTs+7VobRuz6RKQxtRILW1tbyGTqTauWlZWlVUBERERExUkIgc2XH2PmjltIy8zu+mRlaojv+teHbx2FjqMjKr3USiyOHTsm/f/hw4eYNm0aRo4ciRYtWgAAzp07hzVr1sDf3794oiQiIiIqAsnpmZjxdyC2Xf1HKqtbyQa/DG6IKvbmOoyMqPRTK7Fo166d9P85c+Zg0aJFGDRokFTWu3dv1K1bFytWrMCIESOKPkoiIiIiLYVEJ2LMuqsIjk6SyoY1d8MXPTxhamSgw8iIygaNOxCeO3cOjRs3zlXeuHFjXLx4sUiCIiIiIipK2689Qa+fzkhJhYWxAX4c1ABz3/JmUkFURDROLFxdXbFy5cpc5b/99htcXV2LJCgiIiKiopCakYXp225i0qbrSMnIHgdaW2GFneNao3d9Fx1HR1S2aDzd7A8//IB+/fph3759aNasGQDg4sWLCA4Oxl9//VXkARIREREVxsOnL/Dx+qsIivhvdssBjV3xVe86MDPmXQqioqbxHYvu3bvj3r176NWrF2JjYxEbG4tevXrh3r176N69e3HESERERKSRvTcj0POn01JSYWokx3f96+Obt+sxqSAqJoVaIM/V1RULFiwo6liIiIiItJKeqcSCvbex+uxDqay6gwWWDmmEWgor3QVGVA4UavWXU6dOYejQoWjZsiX++Sd7ura1a9fi9OnTRRocERERkboexyaj/6/nVJKKN31csHNsayYVRCVA48Tir7/+gq+vL8zMzHD16lWkpaUBAOLj43kXg4iIiHTicFAUev50GtcfxwEAjA3lmN/HG4sH+MDCpFAdNIhIQxonFvPmzcPy5cuxcuVKGBkZSeWtWrXC1atXizQ4IiIiooJkZCnhv/c23vvfZcSnZAAA3OzNsW1MSwxp5gaZTKbjCInKD41T+Lt376Jt27a5ym1sbBAXF1cUMRERERG9VmR8Ksb9eRWXHj6Xyrp5K/DN2/VgbWpUwCOJqDhonFgoFAqEhISgatWqKuWnT59GtWrViiouIiIionydvBeDiZsCEPsiHQBgZCDD5909MbJlVd6lINIRjROL999/HxMmTMAff/wBmUyG8PBwnDt3DlOmTMGXX35ZHDESERERAQCylAJLjgTjp6PBECK7rJKtGX4e3AANqlTQbXBE5ZzGicW0adOgVCrRsWNHJCcno23btjAxMcGUKVMwbty44oiRiIiICDGJaZiw8RrO3n8mlb1R2xGL3qkPW3NjHUZGREAhEguZTIYvvvgCU6dORUhICJKSkuDl5QVLS8viiI+IiIgI5+4/w/iN1xCTmD0bpYFchqm+tfBBm2qQy9n1iUgfaDwr1KhRo5CYmAhjY2N4eXmhadOmsLS0xIsXLzBq1KjiiJGIiIjKKaVS4JdjIRjy23kpqXCyNsGf7zfHR+2qM6kg0iMaJxZr1qxBSkpKrvKUlBT873//K5KgiIiIiJ6/SMeoNZfw7YG7UP47nqK1R0XsGd8GTd3tdBscEeWidleohIQECCEghEBiYiJMTU2lfVlZWdi7dy8cHR2LJUgiIiIqX648eo5xG64iPD4VACCTARM71sTYNzxgwLsURHpJ7cTC1tYWMpkMMpkMNWvWzLVfJpNh9uzZRRocERERlS9CCPx+OhRf77uDzH9vU1S0NMaSgQ3QyqOijqMjooKonVgcO3YMQgi88cYb+Ouvv2Bn998tSGNjY7i5ucHFxaVYgiQiIqKyLz4lA1O3XMfBoCiprKm7HX4a1ABO1qYFPJKI9IHaiUW7du0AAKGhoahSpQoXnyEiIqIic/NJPD7ecAWPY/8bx/lx++qY3LkmDA00HhJKRDqg8XSzR48ehaWlJfr3769SvmXLFiQnJ2PEiBFFFhwRERGVbUIIrDv/CHN330Z6lhIAYGtuhB/e8UGH2hy7SVSaaPwVgL+/PypWzN3H0dHREQsWLCiSoIiIiKjsS0rLxLg/r+HLHbekpKJBFVvsGd+GSQVRKaTxHYuwsDC4u7vnKndzc0NYWFiRBEVERERl2+2IBPitv4oHT19IZaNbu+OzrrVhbMiuT0SlkcaJhaOjI27cuIGqVauqlF+/fh329vZFFRcRERGVUZsvP8aXfwciLTP7LoWVqSG+fbs+unordBwZEWlD48Ri0KBBGD9+PKysrNC2bVsAwIkTJzBhwgQMHDiwyAMkIiKisiE5PRNf/n0Lf119IpXVcbHG0iEN4WZvocPIiKgoaJxYzJ07Fw8fPkTHjh1haJj9cKVSieHDh3OMBREREeUpJDoRH6+/intRSVLZ0OZVMKOHF0yNDHQYGREVFY0TC2NjY2zatAlz587F9evXYWZmhrp168LNza044iMiIqJSbkfAP5i+7SaS07MAAObGBvDvWxdv+lTScWREVJQ0Tixy1KxZM88VuImIiIgAIDUjC3N2B2HDhf8md6nlZIVfhjSEh6OlDiMjouKgVmIxefJkzJ07FxYWFpg8eXKBdRctWlQkgREREVHp9fDpC3y8/iqCIhKksv6NKmPOm94wM2bXJ6KySK3E4tq1a8jIyJD+nx+uxk1ERET7bkbg0603kJiWCQAwNZJjzpveeKexq44jI6LipFZicezYsTz/T0REROVXllLgYmgsohNT4WhlCh9XWyw8cAerzjyU6lRzsMDSIQ1RW2Gtu0CJqEQUeowFERERlV/7AyMwe1cQIuJTpTIjAxkysoS03au+C/z71oWlCT9uEJUHav2m9+3bV+0Dbtu2rdDBEBERkf7bHxiBMeuuQrxSnpNUGMpl+Kp3HQxpVoXdpInKEbk6lWxsbKQfa2trHDlyBJcvX5b2X7lyBUeOHIGNjU2xBUpERES6l6UUmL0rKFdS8TJbcyMMasqkgqi8UeuOxapVq6T/f/bZZ3jnnXewfPlyGBhkz+qQlZWFjz/+GNbW7D9JRERUVimVAuvOP1Lp/pSXp0npuBgaixbV7UsoMiLSBxp3evzjjz9w+vRpKakAAAMDA0yePBktW7bEt99+W6QBEhERke5kZilx8WEsDgRG4sCtKEQmFJxU5IhOVK8eEZUdanWFellmZibu3LmTq/zOnTtQKpUaHevkyZPo1asXXFxcIJPJ8Pfff6vsT0pKwtixY1G5cmWYmZnBy8sLy5cvV6mTmpoKPz8/2Nvbw9LSEv369UNUVJSmp0VERET/Ss9U4tjdaHy29QaaLjiCwSsvYM25R2onFQDgaGVajBESkT7S+I7Fu+++i9GjR+P+/fto2rQpAODChQv4+uuv8e6772p0rBcvXqB+/foYNWpUngPEJ0+ejKNHj2LdunWoWrUqDh48iI8//hguLi7o3bs3AGDSpEnYs2cPtmzZAhsbG4wdOxZ9+/bFmTNnND01IiKicislPQsn7sVgf2AEjtyOltageJmxgRytPexxJSwOCSkZeY6zkAFQ2JiiqbtdscdMRPpF48Tiu+++g0KhwPfff4+IiAgAgLOzM6ZOnYpPPvlEo2N169YN3bp1y3f/2bNnMWLECLRv3x4A8MEHH+DXX3/FxYsX0bt3b8THx+P333/Hhg0b8MYbbwDIHg/i6emJ8+fPo3nz5pqeHhERUbmRmJqBo3eisT8wEsfvxiAlIytXHTMjA3So7YCu3s7oUMsBVqZG0qxQMkAlucgZqj2rlxcM5By4TVTeaJxYyOVyfPrpp/j000+RkJAAAMU2aLtly5bYuXMnRo0aBRcXFxw/fhz37t3DDz/8ACB7NqqMjAx06tRJekzt2rVRpUoVnDt3jokFERHRK2JfpONwUBT2BUbgTMgzpGfl7sZsZWqIzp5O8PVWoF1NB5gaGajs7+rtjGVDG+Zax0JhY4pZvbzQ1du52M+DiPRPoVasyczMxPHjx3H//n0MHjwYABAeHg5ra2tYWloWWXA//fQTPvjgA1SuXBmGhoaQy+VYuXIl2rZtCwCIjIyEsbExbG1tVR7n5OSEyMjIfI+blpaGtLQ0aTsnQVIqlRqPE6Hip1QqIYTga0OFxjZE2igL7ScqIRUHg6KwPzAKFx/GIkuZuxOTvYUxOns5oWsdJzSvZg9jw/+GYeZ17l28nNCxtiMuPYxFdGIaHK1M0KSqHQzkslJ9rYpaWWg/pFu6bkOaPK/GicWjR4/QtWtXhIWFIS0tDZ07d4aVlRW++eYbpKWl5RpcrY2ffvoJ58+fx86dO+Hm5oaTJ0/Cz88PLi4uKncpNOXv74/Zs2fnKo+JiUFqKmex0DdKpRLx8fEQQkAu13i+ASK2IdJKaW0/4fFpOBYSh+MhzxEY8SLP8RAOlkZo72GLDh4VUN/F8t/uSwJxsU/Vfp5qlkA1S0MAWXj2NKaowi8zSmv7If2h6zaUmJiodl2NE4sJEyagcePGuH79Ouzt/5ufuk+fPnj//fc1PVy+UlJS8Pnnn2P79u3o0aMHAKBevXoICAjAd999h06dOkGhUCA9PR1xcXEqdy2ioqKgUCjyPfb06dMxefJkaTshIQGurq5wcHDgWhx6SKlUQiaTwcHBgW/KVChsQ6SN0tR+7kcnYf+tSOy/FYVb4Ql51nGzM4evtxO61lGgXiUbyDkWoliVpvZD+knXbcjUVP0Z3jROLE6dOoWzZ8/C2NhYpbxq1ar4559/ND1cvjIyMpCRkZHrAhoYGEi3ZBo1agQjIyMcOXIE/fr1AwDcvXsXYWFhaNGiRb7HNjExgYmJSa5yuVzOX3o9JZPJ+PqQVtiGSBv62n6EELgVnoADtyKxLzASIdFJedar6WSJrt7O6FpHAU9nK66IXcL0tf1Q6aHLNqTJc2qcWCiVSmRl5Z414smTJ7CystLoWElJSQgJCZG2Q0NDERAQADs7O1SpUgXt2rXD1KlTYWZmBjc3N5w4cQL/+9//sGjRIgCAjY0NRo8ejcmTJ8POzg7W1tYYN24cWrRowYHbRERUJimVAtcex2F/YAT234rE49iUPOvVq2wD3zoKdPVWoLpD0Y1/JCLKj8aJRZcuXbB48WKsWLECQHYGlZSUhFmzZqF79+4aHevy5cvo0KGDtJ3TPWnEiBFYvXo1Nm7ciOnTp2PIkCGIjY2Fm5sb5s+fj48++kh6zA8//AC5XI5+/fohLS0Nvr6+WLp0qaanRUREpLdyVr/eHxiJA7ciEZWQlquOTAY0dquArt7O8K3jhMoVzHUQKRGVZzIhRF7jufL1+PFjdO3aFUIIBAcHo3HjxggODkbFihVx8uRJODo6FlesxSYhIQE2NjaIj4/nGAs9pFQqER0dDUdHR95GpkJhGyJt6Kr9pGVm4WzIM+wPjMSh21GIfZGeq46BXIaW1e3hW0eBLnWcuNq1HuL7D2lL121Ik8/JGt+xcHV1xfXr17Fp0yZcv34dSUlJGD16NIYMGQIzM7NCB01ERFTeZa9+nb1gXb6rXxvK0bZGRfjWUaCzlxNszY3zOBIRUcnTKLHIyMhA7dq1sXv3bgwZMgRDhgwprriIiIjKhYTUDBy7E419NyNx/F40UjNyzxlvbmyADrUc0dVbgQ61HWFpUqhlqIiIipVG70xGRkZc54GIiEhLmqx+3dVbgbZ5rH5NRKRvNP7Kw8/PD9988w1+++03GBryGxMiIiJ1RCWk4sCtSOwPjMSF0LxXv65oaYzOXtkzObV4ZfVrIiJ9p3FmcOnSJRw5cgQHDx5E3bp1YWFhobJ/27ZtRRYcERFRafY4Nhn7AyOxLzACV8Pi8qzjbGMK3zoKdPNWoHFVu39XvyYiKn00TixsbW2lxeiIiIhIVUh04r/JRGT+q1/bm6OrtwLdvJ1Rv7INF6wjojJB48Ri1apVxREHERGR3slSClx48AwhT2LhkWSAZtUq5rqjkLP69f7ASOy/lf/q17WcrNDVO7ubU20FV78morJH7cRCqVTi22+/xc6dO5Geno6OHTti1qxZnGKWiIjKpP2BEZi9KwgR8TmTloTC2cYUs3p5oYuXAtceP5eSiYJWv+7qrUDXOgpU4+rXRFTGqZ1YzJ8/H1999RU6deoEMzMzLFmyBNHR0fjjjz+KMz4iIqIStz8wAmPWXcWrw6sj4lPx0bqrsDEzRHxK7jUmZDKgiZsdfP+9M1HJll++EVH5oXZi8b///Q9Lly7Fhx9+CAA4fPgwevTogd9++40rSRIRUZmRpRSYvSsoV1LxspeTCkO5DC2q26Ord/aCdVz9mojKK7UTi7CwMHTv3l3a7tSpE2QyGcLDw1G5cuViCY6IiKikXQyNfan7U/4aVqmAwc2qoJOnI1e/JiKCBolFZmYmTE1Vv4UxMjJCRkZGkQdFRESkK9GJ6i0EO6KlG970qVTM0RARlR5qJxZCCIwcORImJiZSWWpqKj766COVtSy4jgUREZVWWUqB08FP1arLLk9ERKrUTixGjBiRq2zo0KFFGgwREZGuPHmejEmbAnDp4fMC68kAKGxM0dTdrmQCIyIqJdROLLh+BRERlVW7rofj8+03kZiaPShbBkC89G+OnJUnZvXy4grZRESv0HiBPCIiorIiKS0TX+28ha1XnkhllWzNsGSgD54mpb2yjkX2nYpZvbzQ1dtZF+ESEek1JhZERFQuBTyOw4SN1/DoWbJU1ru+C+b18Ya1qREAoLOXAhcePEXIkxh4VHbIc+VtIiLKxsSCiIjKlSylwK8n72PRwXvIVGZ3dLIwNsDct7zRp0ElyGT/JQ4GchmaV7NHNcssODraQ86kgogoX0wsiIio3IiIT8HkTddx7sEzqczH1RZLBvrAzd6igEcSEdHrMLEgIqJyYX9gBD776ybiU7LXX5LJAL/2HpjQqQaMDOQ6jo6IqPRjYkFERGVacnom5u6+jT8vhkllzjam+GGAD5pXs9dhZEREZQsTCyIiKrMC/4nH+I3X8CDmhVTWva4C/n3qwcbcSIeRERGVPUwsiIiozFEqBX4/HYqFB+4gIyt7gLaZkQFm966D/o0rqwzQJiKiosHEgoiIypTohFR8suU6TgU/lcrqVrLBkoE+qOZgqcPIiIjKNiYWRERUZhwOisKnf91A7It0ANkDtD9oWw2fdK4FY0MO0CYiKk5MLIiIqNRLzcjCgr238b9zj6QyJ2sTLHrHB608KuowMiKi8oOJBRERlWq3IxIwYeM13ItKksq6eDnhm371UMHCWIeRERGVL0wsiIioVBJCYPXZh/DfdwfpmUoAgKmRHF/29MLgplU4QJuIqIQxsSAiolLnaVIapm65jmN3Y6QyT2dr/DjQBzWcrHQYGRFR+cXEgoiISpXjd6MxZct1PE1Kl8pGt3bHp11rwcTQQIeRERGVb0wsiIioVEjNyMLC/Xfxx5lQqayipQm+618P7Ws56jAyIiICmFgQEVEpEByViPEbA3A7IkEq61DLAd/2r4+KliY6jIyIiHIwsSAiIr0lhMD6C2GYuzsIaf8O0DY2lOPzbrUxomVVDtAmItIjTCyIiEgvxb5Ix2d/3cChoCiprKaTJX4c1AC1FdY6jIyIiPLCxIKIiPTOmZCnmLQpANGJaVLZiBZumN7dE6ZGHKBNRKSPmFgQEZHeSM9U4vuDd7Hi1AMIkV1mZ2GMb9+uh46eTroNjoiICsTEgoiI9MKDmCRM2BiAm//ES2VtalTE9/3rw9HaVIeRERGROphYEBGRTgkhsPnyY3y1MwgpGVkAACMDGT7rWhujWrlDLucAbSKi0oCJBRER6Ux8cgamb7+BvTcjpbLqDhZYMrABvCvZ6DAyIiLSFBMLIiLSifMPnmHSpgBExKdKZYObVcGXPbxgZswB2kREpQ0TCyIiKlEZWUosPnwPS4/flwZo25ob4eu+9dDVW6Hb4IiIqNCYWBARUYl59OwFxm8MwPXHcVJZi2r2+GGADxQ2HKBNRFSaMbEgIqJiJ4TAtqv/YOaOQLxIzx6gbSiX4ZMutfBB22ow4ABtIqJSj4kFEREVq4TUDMzYHoid18Olsqr25lgysAHqu9rqLjAiIipSTCyIiKjYXHkUi/F/BuCfuBSprH+jyviqdx1YmPBPEBFRWcJ3dSIiKnKZWUr8fCwEPx4JhvLfAdpWpobw71sXPeu56DY4IiIqFkwsiIioSD2OTcakTQG4/Oi5VNakagX8MMAHlSuY6zAyIiIqTkwsiIioyOy8Ho4vtt1EYlomAMBALsPEjjXwcQcPDtAmIirjmFgQEZHWktIyMXNHILZd/Ucqc7Uzw+IBDdDIrYIOIyMiopLCxIKIiLQS8DgOEzZew6NnyVJZnwaVMOfNOrAyNdJhZEREVJKYWBARUaFkKQWWn7iPHw7dQ+a/I7QtTQwx7y1vvNWgko6jIyKiksbEgoiINBYel4JJmwJwITRWKmtQxRZLBjRAFXsO0CYiKo+YWBARkUb23ozA9G03EZ+SAQCQy4CxHTwwvmMNGBrIdRwdERHpChMLIiJSS3J6JmbvDMKmy4+lskq2ZvhhgA+autvpMDIiItIHTCyIiOi1bj6Jx4SN1/Dg6QuprEc9ZyzoUxc2ZhygTUREgE7vWZ88eRK9evWCi4sLZDIZ/v7771x1bt++jd69e8PGxgYWFhZo0qQJwsLCpP2pqanw8/ODvb09LC0t0a9fP0RFRZXgWRARlV1KpcCvJ+6j77IzUlJhbmyAb9+uh58HNWBSQUREEp0mFi9evED9+vXxyy+/5Ln//v37aN26NWrXro3jx4/jxo0b+PLLL2FqairVmTRpEnbt2oUtW7bgxIkTCA8PR9++fUvqFIiIyqyohFQM++MC/PfdQUZW9qxP9SrbYM/4Nujf2BUyGRe8IyKi/+i0K1S3bt3QrVu3fPd/8cUX6N69OxYuXCiVVa9eXfp/fHw8fv/9d2zYsAFvvPEGAGDVqlXw9PTE+fPn0bx58+ILnoioDDsUFIVPt17H8+TsAdoyGfBRu+qY1KkmjA05QJuIiHLT278OSqUSe/bsQc2aNeHr6wtHR0c0a9ZMpbvUlStXkJGRgU6dOklltWvXRpUqVXDu3DkdRE1EVLpkKQXO3X+GHQH/4Nz9Z0hKzcSMv2/i/f9dlpIKhbUp1r/XDJ91rc2kgoiI8qW3g7ejo6ORlJSEr7/+GvPmzcM333yD/fv3o2/fvjh27BjatWuHyMhIGBsbw9bWVuWxTk5OiIyMzPfYaWlpSEtLk7YTEhIAZCczSqWyWM6HCk+pVEIIwdeGCo1tKG/7AyMxZ/dtRCakSmWGcpm02B0AdPFygn9fb1QwNy6314/th7TB9kPa0nUb0uR59TaxyDmJN998E5MmTQIA+Pj44OzZs1i+fDnatWtX6GP7+/tj9uzZucpjYmKQmpqaxyNIl5RKJeLj4yGEgFzOb0tJc2xDuR0LeY7pux/kKs9JKgzlwJQOVfCmd0VkJMUhOqmkI9QfbD+kDbYf0pau21BiYqLadfU2sahYsSIMDQ3h5eWlUu7p6YnTp08DABQKBdLT0xEXF6dy1yIqKgoKhSLfY0+fPh2TJ0+WthMSEuDq6goHBwdYW1sX7YmQ1pRKJWQyGRwcHPimTIXCNqQqSymw5I9bBdapYG6C0R28YCDnAG22H9IG2w9pS9dt6OVJk15HbxMLY2NjNGnSBHfv3lUpv3fvHtzc3AAAjRo1gpGREY4cOYJ+/foBAO7evYuwsDC0aNEi32ObmJjAxMQkV7lcLucvvZ6SyWR8fUgrbEPZnr9Ix8pTD1S6P+UlJikNlx/FoUV1+xKKTL+x/ZA22H5IW7psQ5o8p04Ti6SkJISEhEjboaGhCAgIgJ2dHapUqYKpU6diwIABaNu2LTp06ID9+/dj165dOH78OADAxsYGo0ePxuTJk2FnZwdra2uMGzcOLVq04IxQRET/evTsBQ4FReFQUBQuP3qOrJfGUBQkOpFdQ4mISH06TSwuX76MDh06SNs53ZNGjBiB1atXo0+fPli+fDn8/f0xfvx41KpVC3/99Rdat24tPeaHH36AXC5Hv379kJaWBl9fXyxdurTEz4WISF8olQIBT+Jw+N9kIriQAyQcrdS//U1ERCQTQqj31VUZlpCQABsbG8THx3OMhR5SKpWIjo6Go6MjbyNToZSHNpSakYUzIU9xKCgKh29H42lSWp71qlW0QEdPR2y7+g9iX6Qjrz8AMgAKG1Oc/uwNjrFA+Wg/VHzYfkhbum5DmnxO1tsxFkREVLCnSWk4eicah4OicCr4KVIysnLVkcmARlUqoJOXEzp7OaG6gyUAoJFbBYxZdxUyQCW5yEkjZvXiwG0iItIMEwsiolLkfkxS9l2JoChcCXuOvO45mxrJ0aaGAzp7OeGN2o6oaJl7soqu3s5YNrQhZu8KQkT8f2MpFDammNXLC129nYvzNIiIqAxiYkFEpMeylAJXw55L4yUePH2RZ72Klibo5OmITp5OaF2jIkyNDF577K7ezujspcDF0FhEJ6bC0coUTd3teKeCiIgKhYkFEZGeSU7PxKng7PESR+9EI/ZFep71ajhaSl2cfCrbQl6IhMBALuOUskREVCSYWBAR6YHoxFQcuZ09XuJ0yFOkZSpz1ZHLgMZV7dDFywkdPZ3gXtFCB5ESERHljYkFEZEOCCEQHJ0krS8R8Dguz3rmxgZoV9MBnTyzx0tUsDAu2UCJiIjUxMSCiKiEZGYpcfnR83+nhI3Co2fJedZztDKRuji1qGav1ngJIiIiXWNiQURUjJLSMnHyXgwOB0Xh6N1oxCVn5FmvtsIKnb2c0MnTCXUr2RRqvAQREZEuMbEgIipikfGpOHQ7e0rYc/efIT0r93gJA7kMzdzt0Mkz+86Eq525DiIlIiIqOkwsiIi0JITA7YhEHL6dPV7i5j/xedazNDFEu1oO6OLlhPY1HWFjblTCkRIRERUfJhZERIWQkaXExdBYafD1P3EpedZztjGVujg1r2YPY0N5CUdKRERUMphYEBGpKSE1A8fvZo+XOHY3GompmXnWq+NiLXVxquNiDZmM4yWIiKjsY2JBRFSAJ8+TceR2NA4FReH8g2fIVIpcdYwMZGhezR6d/11fopKtmQ4iJSIi0i0mFkRELxFC4FZ4Ag4GZQ++DopIyLOelakh3qjtiE6eTmhXywHWphwvQURE5RsTCx3LUgpcDI1FdGIqHK1M0dTdDgacZlKSpRS48OAZQp7EwiPJAM2qVeT1eQXbUMHUaUNpmVk4/yAWh/9dXyIiPjXPY1WyNUNnLyd08XJCE3c7GBlwvAQREVEOJhY6tD8wArN3Bal8iHG2McWsXl7o6u2sw8j0Q+7rE8rr8wq2oYIV1IaaV7PH8bsxOBQUhRP3YpCUlvd4iXqVbdDZ0wmdvJxQW2HF8RJERET5kAkhcncYLmcSEhJgY2OD+Ph4WFtbl8hz7g+MwJh1V/Hqxc/5yLJsaMNy/cGQ1+f1eI0Klt/1ySGXAXkMl4CxgRwtPezRyTN7JieFjWmxxkn6T6lUIjo6Go6OjpDLeZeKNMP2Q9rSdRvS5HMy71joQJZSYPauoDw/8OSUTdlyA4HhCZCXw29HlUJg9ZmHvD4F4DUqWEHX5786//3f1twIb9RyRGcvJ7Sp6QBLE741EhERaYp/PXXgYmhsvn24cySlZeLnoyElFFHpw+vzerxGr9fdW4HhLauisVsFGHK8BBERkVaYWOhAdGLBSQURlQxfbwWaV7PXdRhERERlAhMLHXC0Uq/P9vRuteHpXDJjPvTJ7YgE+O+789p65fX6ALxGr6Pu9VH3d5GIiIhej4mFDjR1t4OzjSki41Pz7AMuA6CwMcV7baqVy2lDW3lUxOqzD3l9CsBrVDB1r09Td7uSDo2IiKjMYqdiHTCQyzCrlxeA/2bwyZGzPauXV7n8QAjw+qiD16hgvD5EREQlj4mFjnT1dsayoQ1zTWWpsDEt99OEArw+6uA1KhivDxERUcniOhbQzToWObhqcsGyV01+ipAnMfCo7MCVt/PANlQwtiHSlq7nkKfSje2HtKXrNsR1LEoRA7kMLapzVpr8GMhlaF7NHtUss+DoaA85PxDmwjZUMLYhIiKiksHUmYiIiIiItMbEgoiIiIiItMbEgoiIiIiItMbEgoiIiIiItMbEgoiIiIiItMbEgoiIiIiItMbpZgHkLOWRkJCg40goL0qlEomJiTA1NeUc4FQobEOkDbYf0gbbD2lL120o5/OxOkvfMbEAkJiYCABwdXXVcSRERERERPonMTERNjY2BdbhytvIzgTDw8NhZWUFmYyLZ+mbhIQEuLq64vHjxyW+MjqVDWxDpA22H9IG2w9pS9dtSAiBxMREuLi4vPaOCe9YAJDL5ahcubKuw6DXsLa25psyaYVtiLTB9kPaYPshbemyDb3uTkUOdvYjIiIiIiKtMbEgIiIiIiKtMbEgvWdiYoJZs2bBxMRE16FQKcU2RNpg+yFtsP2QtkpTG+LgbSIiIiIi0hrvWBARERERkdaYWBARERERkdaYWBARERERkdaYWJDe8vf3R5MmTWBlZQVHR0e89dZbuHv3rq7DolLq66+/hkwmw8SJE3UdCpUi//zzD4YOHQp7e3uYmZmhbt26uHz5sq7DolIgKysLX375Jdzd3WFmZobq1atj7ty54NBWysvJkyfRq1cvuLi4QCaT4e+//1bZL4TAzJkz4ezsDDMzM3Tq1AnBwcG6CbYATCxIb504cQJ+fn44f/48Dh06hIyMDHTp0gUvXrzQdWhUyly6dAm//vor6tWrp+tQqBR5/vw5WrVqBSMjI+zbtw9BQUH4/vvvUaFCBV2HRqXAN998g2XLluHnn3/G7du38c0332DhwoX46aefdB0a6aEXL16gfv36+OWXX/Lcv3DhQvz4449Yvnw5Lly4AAsLC/j6+iI1NbWEIy0YZ4WiUiMmJgaOjo44ceIE2rZtq+twqJRISkpCw4YNsXTpUsybNw8+Pj5YvHixrsOiUmDatGk4c+YMTp06petQqBTq2bMnnJyc8Pvvv0tl/fr1g5mZGdatW6fDyEjfyWQybN++HW+99RaA7LsVLi4u+OSTTzBlyhQAQHx8PJycnLB69WoMHDhQh9Gq4h0LKjXi4+MBAHZ2djqOhEoTPz8/9OjRA506ddJ1KFTK7Ny5E40bN0b//v3h6OiIBg0aYOXKlboOi0qJli1b4siRI7h37x4A4Pr16zh9+jS6deum48iotAkNDUVkZKTK3zEbGxs0a9YM586d02FkuRnqOgAidSiVSkycOBGtWrWCt7e3rsOhUmLjxo24evUqLl26pOtQqBR68OABli1bhsmTJ+Pzzz/HpUuXMH78eBgbG2PEiBG6Do/03LRp05CQkIDatWvDwMAAWVlZmD9/PoYMGaLr0KiUiYyMBAA4OTmplDs5OUn79AUTCyoV/Pz8EBgYiNOnT+s6FColHj9+jAkTJuDQoUMwNTXVdThUCimVSjRu3BgLFiwAADRo0ACBgYFYvnw5Ewt6rc2bN2P9+vXYsGED6tSpg4CAAEycOBEuLi5sP1RmsSsU6b2xY8di9+7dOHbsGCpXrqzrcKiUuHLlCqKjo9GwYUMYGhrC0NAQJ06cwI8//ghDQ0NkZWXpOkTSc87OzvDy8lIp8/T0RFhYmI4iotJk6tSpmDZtGgYOHIi6deti2LBhmDRpEvz9/XUdGpUyCoUCABAVFaVSHhUVJe3TF0wsSG8JITB27Fhs374dR48ehbu7u65DolKkY8eOuHnzJgICAqSfxo0bY8iQIQgICICBgYGuQyQ916pVq1xTXN+7dw9ubm46iohKk+TkZMjlqh+zDAwMoFQqdRQRlVbu7u5QKBQ4cuSIVJaQkIALFy6gRYsWOowsN3aFIr3l5+eHDRs2YMeOHbCyspL6EdrY2MDMzEzH0ZG+s7KyyjUex8LCAvb29hynQ2qZNGkSWrZsiQULFuCdd97BxYsXsWLFCqxYsULXoVEp0KtXL8yfPx9VqlRBnTp1cO3aNSxatAijRo3SdWikh5KSkhASEiJth4aGIiAgAHZ2dqhSpQomTpyIefPmoUaNGnB3d8eXX34JFxcXaeYofcHpZklvyWSyPMtXrVqFkSNHlmwwVCa0b9+e082SRnbv3o3p06cjODgY7u7umDx5Mt5//31dh0WlQGJiIr788kts374d0dHRcHFxwaBBgzBz5kwYGxvrOjzSM8ePH0eHDh1ylY8YMQKrV6+GEAKzZs3CihUrEBcXh9atW2Pp0qWoWbOmDqLNHxMLIiIiIiLSGsdYEBERERGR1phYEBERERGR1phYEBERERGR1phYEBERERGR1phYEBERERGR1phYEBERERGR1phYEBERERGR1phYEBERERGR1phYEBGRivbt22PixInSdtWqVfVqtfLVq1fD1tZW2v7qq6/g4+NTbM/Xtm1bbNiwodiOXxjTpk3DuHHjdB0GEZEKJhZERCXo8ePHGDVqFFxcXGBsbAw3NzdMmDABz54903VoWomNjcXEiRPh5uYGY2NjuLi4YNSoUQgLCyv2554yZQqOHDkibY8cORJvvfVWkRx7586diIqKwsCBA6WyFStWoH379rC2toZMJkNcXJxGxxw/fjwaNWoEExOTfBOiGzduoE2bNjA1NYWrqysWLlyosn/KlClYs2YNHjx4oOkpEREVGyYWREQl5MGDB2jcuDGCg4Px559/IiQkBMuXL8eRI0fQokULxMbGFuvzZ2RkFMtxY2Nj0bx5cxw+fBjLly9HSEgINm7ciJCQEDRp0qTYP/xaWlrC3t6+WI79448/4t1334Vc/t+fy+TkZHTt2hWff/55oY87atQoDBgwIM99CQkJ6NKlC9zc3HDlyhV8++23+Oqrr7BixQqpTsWKFeHr64tly5YVOgYioiIniIioRHTt2lVUrlxZJCcnq5RHREQIc3Nz8dFHHwkhhJg+fbpo2rRprsfXq1dPzJ49W9peuXKlqF27tjAxMRG1atUSv/zyi7QvNDRUABAbN24Ubdu2FSYmJmLVqlXi6dOnYuDAgcLFxUWYmZkJb29vsWHDBpXnadeunZgwYYK07ebmJn744Yd8z+ujjz4SFhYWIiIiQqU8OTlZVKpUSXTt2rXAY9WvX1/MmjVL2v7++++Ft7e3MDc3F5UrVxZjxowRiYmJ0v5Vq1YJGxsbaXvWrFmifv360v8BqPwcO3ZMdOjQQfj5+ak8b3R0tDAyMhKHDx/O87yio6OFTCYTgYGBee4/duyYACCeP3+ez5Up2Mtxv2zp0qWiQoUKIi0tTSr77LPPRK1atVTqrVmzRlSuXLlQz01EVBx4x4KIqATExsbiwIED+Pjjj2FmZqayT6FQYMiQIdi0aROEEBgyZAguXryI+/fvS3Vu3bqFGzduYPDgwQCA9evXY+bMmZg/fz5u376NBQsW4Msvv8SaNWtUjj1t2jRMmDABt2/fhq+vL1JTU9GoUSPs2bMHgYGB+OCDDzBs2DBcvHixUOelVCqxceNGDBkyBAqFQmWfmZkZPv74Yxw4cECjuzFyuRw//vgjbt26hTVr1uDo0aP49NNP1XrslClT8M4776Br166IiIhAREQEWrZsiffeew8bNmxAWlqaVHfdunWoVKkS3njjjTyPdfr0aZibm8PT01Pt2IvCuXPn0LZtWxgbG0tlvr6+uHv3Lp4/fy6VNW3aFE+ePMHDhw9LND4iovwwsSAiKgHBwcEQQuT7IdXT0xPPnz9HTEwM6tSpg/r166sMGF6/fj2aNWsGDw8PAMCsWbPw/fffo2/fvnB3d0ffvn0xadIk/PrrryrHnThxolTH2dkZlSpVwpQpU+Dj44Nq1aph3Lhx6Nq1KzZv3lyo84qJiUFcXFyB5yWEQEhIiNrHnDhxIjp06ICqVavijTfewLx589SOz9LSEmZmZjAxMYFCoYBCoYCxsTH69u0LANixY4dUd/Xq1Rg5ciRkMlmex3r06BGcnJxUukGVhMjISDg5OamU5WxHRkZKZS4uLgCy4yQi0gdMLIiISpAQQq16Q4YMkRILIQT+/PNPDBkyBADw4sUL3L9/H6NHj4alpaX0M2/ePJW7HADQuHFjle2srCzMnTsXdevWhZ2dHSwtLXHgwAGtB1m/7rxe/vb9dQ4fPoyOHTuiUqVKsLKywrBhw/Ds2TMkJycXOj5TU1MMGzYMf/zxBwDg6tWrCAwMxMiRI/N9TEpKCkxNTTV+rm7duqm8LpaWlqhTp05hQ89Xzp0vba4LEVFRMtR1AERE5YGHhwdkMhlu376NPn365Np/+/ZtVKhQAQ4ODgCAQYMG4bPPPsPVq1eRkpKCx48fS4N9k5KSAAArV65Es2bNVI5jYGCgsm1hYaGy/e2332LJkiVYvHgx6tatCwsLC0ycOBHp6emFOi8HBwfY2tri9u3bee6/ffs2DA0N4e7uDiC7m9OrScjLg8ofPnyInj17YsyYMZg/fz7s7Oxw+vRpjB49Gunp6TA3Ny9UnADw3nvvwcfHB0+ePMGqVavwxhtvwM3NLd/6FStWVOl6pK7ffvsNKSkpKmVGRkZqP16hUCAqKkqlLGf75e5mOd3LctoMEZGuMbEgIioB9vb26Ny5M5YuXYpJkyapjLOIjIzE+vXrMXz4cKlbTuXKldGuXTusX78eKSkp6Ny5MxwdHQFkd4txcXHBgwcPpLsY6jpz5gzefPNNDB06FED2GIl79+7By8urUOcll8vxzjvvYP369ZgzZ47KB9+UlBQsXboUffr0gY2NDYDsD8ERERFSnYSEBISGhkrbV65cgVKpxPfffy91QdK0m5axsTGysrJyldetWxeNGzfGypUrsWHDBvz8888FHqdBgwaIjIzE8+fPUaFCBbWfv1KlShrF+6oWLVrgiy++QEZGhpSQHDp0CLVq1VKJIzAwEEZGRsVyN4SIqDDYFYqIqIT8/PPPSEtLg6+vL06ePInHjx9j//796Ny5MypVqoT58+er1B8yZAg2btyILVu25EogZs+eDX9/f/z444+4d+8ebt68iVWrVmHRokUFxlCjRg0cOnQIZ8+exe3bt/Hhhx/m+nZcU/Pnz4dCoUDnzp2xb98+PH78GCdPnoSvry/kcjmWLFki1X3jjTewdu1anDp1Cjdv3sSIESNU7rJ4eHggIyMDP/30Ex48eIC1a9di+fLlGsVTtWpV3LhxA3fv3sXTp09V7oi89957+PrrryGEyPPO0csaNGiAihUr4syZMyrlkZGRCAgIkMaN3Lx5EwEBAWoPUA8JCUFAQAAiIyORkpKCgIAABAQESHeNBg8eDGNjY4wePRq3bt3Cpk2bsGTJEkyePFnlOKdOnUKbNm1yTQZARKQzupuQioio/Hn48KEYMWKEcHJyEkZGRsLV1VWMGzdOPH36NFfd58+fCxMTE2Fubq4y3WqO9evXCx8fH2FsbCwqVKgg2rZtK7Zt2yaE+G+62WvXrqk85tmzZ+LNN98UlpaWwtHRUcyYMUMMHz5cvPnmm1IdTaebFUKImJgYMW7cOOHq6ioMDAwEANGyZUvx7NkzlXrx8fFiwIABwtraWri6uorVq1fnmm520aJFwtnZWZiZmQlfX1/xv//9T2Va14KmmxUie5rYzp07C0tLS2m62RyJiYnC3NxcfPzxxwWeT45PP/1UDBw4UKUsryltAYhVq1apdcx27drl+fjQ0FCpzvXr10Xr1q2FiYmJqFSpkvj6669zHadWrVrizz//VOs5iYhKgkwINUcSEhERqen333/Hxx9/jE2bNhXZKthF4eHDh6hevTouXbqEhg0bvrZ+ZGQk6tSpg6tXrxY4HqOk7du3D5988glu3LgBQ0P2aiYi/cCuUEREVORGjx6NjRs34vbt27kGMutCRkYGIiMjMWPGDDRv3lytpALIHiz9+++/az1rVlF78eIFVq1axaSCiPQK71gQEVGZd/z4cXTo0AE1a9bE1q1bUbduXV2HRERU5jCxICIiIiIirbErFBERERERaY2JBRERERERaY2JBRERERERaY2JBRERERERaY2JBRERERERaY2JBRERERERaY2JBRERERERaY2JBRERERERaY2JBRERERERae3/NlrlbJaKjYUAAAAASUVORK5CYII=", + "text/plain": [ + "

" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:__main__:Plot saved to results/price_vs_quality.png.\n" + ] + } + ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "# Build instances across the full quality range.\n", "qual_range = list(range(1, 11))\n", - "instances = [{\"OverallQual\": q, \"GrLivArea\": 1500} for q in qual_range]\n", + "instances = [{\"OverallQual\": q, \"GrLivArea\": 1500} for q in qual_range]\n", "# Fetch batch predictions for all quality levels.\n", "prices = cpptteut.api_predict_batch(instances)[\"predictions\"]\n", "# Plot price vs quality.\n", + "os.makedirs(\"results\", exist_ok=True)\n", "plt.figure(figsize=(8, 4))\n", "plt.plot(qual_range, [p / 1000 for p in prices], marker=\"o\", linewidth=2)\n", "plt.xlabel(\"Overall Quality (1–10)\")\n", @@ -240,6 +321,14 @@ "plt.show()\n", "_LOG.info(\"Plot saved to results/price_vs_quality.png.\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52316b9e-8b3a-4efc-bb77-7bc3580f5c95", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -250,6 +339,18 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" } }, "nbformat": 4, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py index 0ff3c8690..2926fcfd2 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.API.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.19.0 +# jupytext_version: 1.19.1 # kernelspec: # display_name: Python 3 (ipykernel) # language: python @@ -151,4 +151,6 @@ plt.tight_layout() plt.savefig("results/price_vs_quality.png", dpi=120) plt.show() -_LOG.info("Plot saved to results/price_vs_quality.png.") \ No newline at end of file +_LOG.info("Plot saved to results/price_vs_quality.png.") + +# %% diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb index 0e080328b..e26393f47 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.ipynb @@ -11,10 +11,10 @@ "the Kaggle House Prices regression task without requiring a running server.\n", "\n", "- Loads (or generates) the dataset via `template_utils`.\n", - "- Trains a PyCaret regression model and compares multiple algorithms.\n", - "- Finalizes and saves the best model to `ml_model/`.\n", + "- Compares multiple sklearn regression models using cross-validation.\n", + "- Trains the best model on the full dataset and saves it to `ml_model/`.\n", "- Runs direct in-process predictions using the saved model.\n", - "- Produces feature importance and neighbourhood price charts.\n", + "- Produces a neighbourhood price comparison chart saved to `results/`.\n", "- Reference: (house_price.example.md)\n", "\n", "Follow the reference to write notebooks in a clear manner:\n", @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "0f747b0b", "metadata": {}, "outputs": [], @@ -43,16 +43,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "10c685e6", "metadata": {}, "outputs": [], "source": [ "import logging\n", + "import sys\n", + "import os\n", "\n", - "import helpers.hdbg as hdbg\n", - "import helpers.hnotebook as hnotebo\n", - "import class_project.project_template.template_utils as cpptteut" + "sys.path.insert(0, \"/project\")\n", + "\n", + "import template_utils as cpptteut" ] }, { @@ -65,16 +67,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "53dd637e", "metadata": {}, "outputs": [], "source": [ - "hdbg.init_logger(verbosity=logging.INFO)\n", - "\n", "_LOG = logging.getLogger(__name__)\n", - "\n", - "hnotebo.config_notebook()" + "logging.basicConfig(level=logging.INFO)" ] }, { @@ -90,10 +89,185 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "c170517a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:template_utils:Loading dataset from 'ml_model/train.csv'.\n", + "INFO:__main__:Dataset shape: (1460, 16)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LotAreaOverallQualOverallCondYearBuiltTotalBsmtSFGrLivAreaFullBathBedroomAbvGrGarageCarsGarageAreaNeighborhoodHouseStyleRoofStyleExterQualKitchenQualSalePrice
084507520038561710232548CollgCr2StoryGableGdGd208500
1960068197612621262232460Veenker1StoryGableTATA181500
2112507520019201786232608CollgCr2StoryGableGdGd223500
395507519157561717133642Crawfor2StoryGableTAGd140000
41426085200011452198243836NoRidge2StoryGableGdGd250000
\n", + "
" + ], + "text/plain": [ + " LotArea OverallQual OverallCond YearBuilt TotalBsmtSF GrLivArea \\\n", + "0 8450 7 5 2003 856 1710 \n", + "1 9600 6 8 1976 1262 1262 \n", + "2 11250 7 5 2001 920 1786 \n", + "3 9550 7 5 1915 756 1717 \n", + "4 14260 8 5 2000 1145 2198 \n", + "\n", + " FullBath BedroomAbvGr GarageCars GarageArea Neighborhood HouseStyle \\\n", + "0 2 3 2 548 CollgCr 2Story \n", + "1 2 3 2 460 Veenker 1Story \n", + "2 2 3 2 608 CollgCr 2Story \n", + "3 1 3 3 642 Crawfor 2Story \n", + "4 2 4 3 836 NoRidge 2Story \n", + "\n", + " RoofStyle ExterQual KitchenQual SalePrice \n", + "0 Gable Gd Gd 208500 \n", + "1 Gable TA TA 181500 \n", + "2 Gable Gd Gd 223500 \n", + "3 Gable TA Gd 140000 \n", + "4 Gable Gd Gd 250000 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Load or generate the House Prices dataset.\n", "DATA_PATH = \"ml_model/train.csv\"\n", @@ -115,10 +289,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "60dfc66a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target column statistics:\n", + "count 1460.000000\n", + "mean 180921.195890\n", + "std 79442.502883\n", + "min 34900.000000\n", + "25% 129975.000000\n", + "50% 163000.000000\n", + "75% 214000.000000\n", + "max 755000.000000\n", + "Name: SalePrice, dtype: float64\n", + "\n", + "Missing values per column:\n", + "Series([], dtype: int64)\n" + ] + } + ], "source": [ "# Display summary statistics for the target and key numeric features.\n", "print(\"Target column statistics:\")\n", @@ -136,16 +330,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "90b7bc24", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:template_utils:Splitting data into train and test sets.\n", + "INFO:__main__:Train: 1168 rows | Test: 292 rows\n" + ] + } + ], "source": [ "# Split into train and test sets for offline evaluation.\n", "X_train, X_test, y_train, y_test = cpptteut.split_data(df)\n", - "_LOG.info(\n", - " \"Train: %d rows | Test: %d rows\", len(X_train), len(X_test)\n", - ")" + "_LOG.info(\"Train: %d rows | Test: %d rows\", len(X_train), len(X_test))" ] }, { @@ -153,27 +354,172 @@ "id": "5d0fb4a9", "metadata": {}, "source": [ - "## Train model with PyCaret\n", + "## Compare models\n", "\n", - "PyCaret's `compare_models` benchmarks multiple regression algorithms in a\n", - "single call and selects the best performer by RMSE." + "Cross-validate GradientBoosting, RandomForest, and Ridge and display the\n", + "leaderboard sorted by RMSE." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "d4a2d510", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:template_utils:Cross-validating GradientBoosting (5 folds)…\n", + "INFO:template_utils:Cross-validating RandomForest (5 folds)…\n", + "INFO:template_utils:Cross-validating Ridge (5 folds)…\n", + "INFO:template_utils:Leaderboard:\n", + " Model RMSE MAE R2\n", + "GradientBoosting 27551.49 16785.83 0.8765\n", + " RandomForest 31235.81 19090.38 0.8431\n", + " Ridge 34684.92 20517.56 0.8054\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelRMSEMAER2
0GradientBoosting27551.4916785.830.8765
1RandomForest31235.8119090.380.8431
2Ridge34684.9220517.560.8054
\n", + "
" + ], + "text/plain": [ + " Model RMSE MAE R2\n", + "0 GradientBoosting 27551.49 16785.83 0.8765\n", + "1 RandomForest 31235.81 19090.38 0.8431\n", + "2 Ridge 34684.92 20517.56 0.8054" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Run the PyCaret regression experiment and retrieve the best model.\n", - "best_model = cpptteut.run_pycaret_regression(df, n_select=3, fold=5)\n", - "# Pull and display the full leaderboard.\n", - "leaderboard = cpptteut.get_model_results()\n", - "_LOG.info(\"PyCaret leaderboard retrieved.\")\n", + "# Compare all candidate models using 5-fold cross-validation.\n", + "leaderboard = cpptteut.compare_models(df, fold=5)\n", "leaderboard" ] }, + { + "cell_type": "markdown", + "id": "22725b0d", + "metadata": {}, + "source": [ + "## Train best model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "fa0724e0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:__main__:Best model: GradientBoosting\n", + "INFO:template_utils:Training GradientBoosting on full dataset (1460 rows)…\n" + ] + } + ], + "source": [ + "# Train the top-ranked model (GradientBoosting) on the full dataset.\n", + "best_model_name = leaderboard.iloc[0][\"Model\"]\n", + "_LOG.info(\"Best model: %s\", best_model_name)\n", + "pipeline = cpptteut.train_best_model(df, model_name=best_model_name)" + ] + }, + { + "cell_type": "markdown", + "id": "664f1775", + "metadata": {}, + "source": [ + "## Evaluate model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "bb424d37", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:template_utils:Test metrics: {'RMSE': 13232.96, 'MAE': 9994.75, 'R2': 0.9772}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test RMSE : $13,233\n", + "Test MAE : $9,995\n", + "Test R² : 0.9772\n" + ] + } + ], + "source": [ + "# Evaluate the fitted pipeline on the held-out test set.\n", + "metrics = cpptteut.evaluate_model(pipeline, X_test, y_test)\n", + "print(f\"Test RMSE : ${metrics['RMSE']:,.0f}\")\n", + "print(f\"Test MAE : ${metrics['MAE']:,.0f}\")\n", + "print(f\"Test R² : {metrics['R2']:.4f}\")" + ] + }, { "cell_type": "markdown", "id": "8000f7e7", @@ -184,14 +530,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "34ae9f8b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:template_utils:Model saved to '/project/ml_model/house_price_model.pkl'.\n", + "INFO:__main__:Model saved.\n" + ] + } + ], "source": [ - "# Finalize on the full dataset and persist to disk.\n", - "cpptteut.finalize_and_save(best_model)\n", - "_LOG.info(\"Model finalized and saved.\")" + "# Persist the trained pipeline to disk for the Flask API to load.\n", + "cpptteut.finalize_and_save(pipeline)\n", + "_LOG.info(\"Model saved.\")" ] }, { @@ -199,26 +554,39 @@ "id": "99d094b8", "metadata": {}, "source": [ - "## Run in-process predictions\n", - "\n", - "Load the saved model and predict directly without a running Flask server." + "## Run in-process predictions" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "cedab673", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:template_utils:Loading model from '/project/ml_model/house_price_model.pkl'.\n", + "INFO:__main__:Predicted price: $195245\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted sale price: $195,245\n" + ] + } + ], "source": [ - "# Load the saved model artifact.\n", + "# Load the saved artifact and run a single in-process prediction.\n", "model = cpptteut.load_model_artifact()\n", - "# Predict the price of a representative house.\n", "house = {\n", " \"OverallQual\": 7,\n", - " \"GrLivArea\": 1800,\n", - " \"GarageCars\": 2,\n", - " \"YearBuilt\": 2005,\n", + " \"GrLivArea\": 1800,\n", + " \"GarageCars\": 2,\n", + " \"YearBuilt\": 2005,\n", " \"Neighborhood\": \"CollgCr\",\n", "}\n", "price = cpptteut.predict_price(house, model=model)\n", @@ -236,10 +604,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "e6c05122", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validation errors:\n", + " ✗ OverallQual must be between 1 and 10 (got 15).\n", + " ✗ GrLivArea must be greater than 0.\n", + " ✗ ExterQual must be one of ['Ex', 'Fa', 'Gd', 'Po', 'TA'] (got 'ZZ').\n" + ] + } + ], "source": [ "# Demonstrate validation with an intentionally bad payload.\n", "bad_payload = {\"OverallQual\": 15, \"GrLivArea\": -50, \"ExterQual\": \"ZZ\"}\n", @@ -256,31 +635,60 @@ "source": [ "## Show results\n", "\n", - "Compare predicted prices across neighbourhoods and plot feature importance." + "Compare predicted prices across neighbourhoods and save the chart." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "fdb8d334", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUEUlEQVR4nO3dd3QU5f7H8c8mIZuQSmhJMAQILUDovYUSCIhI80eRFgT0Kog0BVR6CVxEEUTwKgICIqCIXlCQLqCiVAVCpIMSipTQAyTz+8OTvSwpJEzCSni/ztlz2GeemfnOs7PLfnZKLIZhGAIAAAAAE5wcXQAAAACARx/BAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQJwkCJFiigqKsr2fOPGjbJYLNq4caPDarrXvTVmF0ds+7Fjx2SxWPTWW289tHVmtaioKBUpUuSB5/X09Lxvv3/qOM2dO1cWi0Xbt293dCk2mXk9kpKSVK5cOY0fPz57i8oki8WiUaNG2Z4nj/OxY8ceei137tzRa6+9pqCgIDk5Oal169YPvQZHa9CggcqVK+foMkwbOnSoatSo4egy8BAQLPBYSv7PMvnh5uamkiVLqm/fvjpz5oyjy8uUb775xu6LgCPcPZZOTk4KDAxU06ZN/1EhyVGS9zU3Nzf9+eefKabnlC8OyJxFixbp5MmT6tu3r6NL+cf6+OOPNXnyZD3zzDOaN2+eBgwYkC3r+fTTTzV16tRsWTb+1r9/f+3Zs0dff/21o0tBNnNxdAGAI40ZM0ZFixbVzZs3tWXLFs2cOVPffPON9u7dq9y5cz/UWurXr68bN27I1dU1U/N98803mjFjhsPDRZMmTdStWzcZhqGjR4/q/fffV6NGjbRy5Uo1b9483XkfdNsfJQkJCZo4caKmT5+eZcv88MMPlZSUlGXLw8MzefJkdezYUT4+Po4uJV1du3ZVx44dZbVaH/q6169fr0KFCumdd97J1vV8+umn2rt3r/r375+t63mc+fv7q1WrVnrrrbf09NNPO7ocZCOOWOCx1rx5c3Xp0kW9evXS3Llz1b9/fx09elRfffVVmvNcu3YtW2pxcnKSm5ubnJwezbdlyZIl1aVLF3Xt2lUjRozQmjVrZBhGur8E3rx5U0lJSY/8tmdExYoV9eGHH+rUqVNZtsxcuXI55AufIyUlJenmzZuOLsOUXbt2ac+ePWrfvr2jS7kvZ2dnubm5yWKxPPR1nz17Vr6+vg99vVnl+vXrji7hH6V9+/basmWLjhw54uhSkI1y7v/iwANo1KiRJOno0aOS/nce+uHDh/Xkk0/Ky8tLnTt3lvT3F5ypU6eqbNmycnNzU8GCBfXCCy/o4sWLdss0DEPjxo3TE088ody5c6thw4bat29finWndZ3Btm3b9OSTTypPnjzy8PBQ+fLl9e6779rqmzFjhiT705GSZXWNmREWFqZ8+fLZxjJ5+z777DO9+eabKlSokHLnzq3Lly8/0LYnO3DggJ555hn5+fnJzc1NVatWzfTh9nfeeUfBwcFyd3dXeHi49u7da5s2Z84cWSwW7dq1K8V8EyZMkLOzc6qnON3r9ddfV2JioiZOnJihmhYsWKAqVarI3d1dfn5+6tixo06ePGnXJ7Vz+s+fP6+uXbvK29tbvr6+6t69u/bs2SOLxaK5c+emWM+ff/6p1q1by9PTU/nz59fgwYOVmJiYak3pjVOy9evXq169evLw8JCvr69atWqlmJiY+9YtSaNGjUrxBdZisahv375auHChypYtK6vVqlWrVtmmJyQkaODAgcqfP788PDzUpk0bnTt3LsWy33//fdv8gYGB6tOnjy5dupSi39KlS23jni9fPnXp0iXV13f58uUqV66c3NzcVK5cOX355Zepjllqli9fLldXV9WvXz/V7f/999/VpUsX+fj4KH/+/Bo+fLgMw9DJkyfVqlUreXt7y9/fX1OmTEmx7ISEBI0cOVLFixeX1WpVUFCQXnvtNSUkJKToN2DAAOXPn19eXl56+umn9ccff6RYXmrXWHz11Vdq0aKFAgMDZbVaFRISorFjx6bYb5JP89u/f78aNmyo3Llzq1ChQvr3v/+d7vgkX9ezYcMG7du3z/a5lvz5kNHPtYzU2aBBA61cuVLHjx+3rSd530zr+pLUPq+St3XHjh2qX7++cufOrddffz1Tr0l6MjKGZ8+eVc+ePVWwYEG5ubmpQoUKmjdv3n1rv3vM7/6MOH36tHr06KEnnnhCVqtVAQEBatWqVYrx+Pbbb23veS8vL7Vo0SLV/z8iIiIkKd0f7vDo41Qo4C6HDx+WJOXNm9fWdufOHUVGRqpu3bp66623bKdIvfDCC5o7d6569Oihfv366ejRo3rvvfe0a9cubd26Vbly5ZIkjRgxQuPGjdOTTz6pJ598Ujt37lTTpk1169at+9azZs0aPfXUUwoICNArr7wif39/xcTEaMWKFXrllVf0wgsv6NSpU1qzZo3mz5+fYv6HUWNaLl68qIsXL6p48eJ27WPHjpWrq6sGDx6shISENE9/ut+2S9K+fftUp04dFSpUSEOHDpWHh4eWLFmi1q1b64svvlCbNm3uW+cnn3yiK1euqE+fPrp586beffddNWrUSL/99psKFiyoZ555Rn369NHChQtVqVIlu3kXLlyoBg0aqFChQvddT9GiRdWtWzd9+OGHGjp0qAIDA9PsO378eA0fPlzt27dXr169dO7cOU2fPl3169fXrl270vwVNykpSS1bttTPP/+sF198UaVLl9ZXX32l7t27p9o/MTFRkZGRqlGjht566y2tXbtWU6ZMUUhIiF588cVMjZMkrV27Vs2bN1exYsU0atQo3bhxQ9OnT1edOnW0c+fOB77QfP369VqyZIn69u2rfPnyqUiRItq9e7ck6eWXX1aePHk0cuRIHTt2TFOnTlXfvn21ePFi2/yjRo3S6NGjFRERoRdffFGxsbGaOXOmfvnlF7v3QfJ7pVq1aoqOjtaZM2f07rvvauvWrXbj/t1336ldu3YqU6aMoqOjdf78edsXsIz44YcfVK5cOdt679WhQweFhoZq4sSJWrlypcaNGyc/Pz998MEHatSokSZNmqSFCxdq8ODBqlatmi2gJCUl6emnn9aWLVv0/PPPKzQ0VL/99pveeecd/f7771q+fLltHb169dKCBQv07LPPqnbt2lq/fr1atGiRofrnzp0rT09PDRw4UJ6enlq/fr1GjBihy5cva/LkyXZ9L168qGbNmqlt27Zq3769Pv/8cw0ZMkRhYWFpniKZP39+zZ8/X+PHj9fVq1cVHR0tSQoNDZWU8c+1jNT5xhtvKD4+Xn/88YftlKuM3NQgNefPn1fz5s3VsWNHdenSRQULFszUa5KWjIzhjRs31KBBAx06dEh9+/ZV0aJFtXTpUkVFRenSpUu2z8zMaNeunfbt26eXX35ZRYoU0dmzZ7VmzRqdOHHC9l6eP3++unfvrsjISE2aNEnXr1/XzJkzVbduXe3atcvuPe/j46OQkBBt3bo1266XwT+AATyG5syZY0gy1q5da5w7d844efKk8dlnnxl58+Y13N3djT/++MMwDMPo3r27IckYOnSo3fybN282JBkLFy60a1+1apVd+9mzZw1XV1ejRYsWRlJSkq3f66+/bkgyunfvbmvbsGGDIcnYsGGDYRiGcefOHaNo0aJGcHCwcfHiRbv13L2sPn36GKm9lbOjxrRIMnr27GmcO3fOOHv2rLFt2zajcePGhiRjypQpdttXrFgx4/r163bzP+i2N27c2AgLCzNu3rxpN7127dpGiRIl0q356NGjhiS719swDGPbtm2GJGPAgAG2tk6dOhmBgYFGYmKirW3nzp2GJGPOnDnprid5X/vll1+Mw4cPGy4uLka/fv1s08PDw42yZcvanh87dsxwdnY2xo8fb7ec3377zXBxcbFr7969uxEcHGx7/sUXXxiSjKlTp9raEhMTjUaNGqWoNXnfHjNmjN16KlWqZFSpUuWBxqlixYpGgQIFjPPnz9va9uzZYzg5ORndunVLs+5kI0eOTLEvSzKcnJyMffv22bUnj2tERITdPjFgwADD2dnZuHTpkmEY/9u/mzZtavf6vffee4Yk4+OPPzYMwzBu3bplFChQwChXrpxx48YNW78VK1YYkowRI0bYbWdAQIBtHYZhGN99950hKdXtutcTTzxhtGvXLs3tf/75521td+7cMZ544gnDYrEYEydOtLVfvHjRcHd3t3t/zp8/33BycjI2b95st9xZs2YZkoytW7cahmEYu3fvNiQZL730kl2/Z5991pBkjBw50taWPM5Hjx61td37/jUMw3jhhReM3Llz270Xw8PDDUnGJ598YmtLSEgw/P39U93+e9373jCMjH+uZabOFi1apPq6pbbthpHy8+rubZ01a5Zd34y+JmnJ6BhOnTrVkGQsWLDA1nbr1i2jVq1ahqenp3H58uU0azeM/73Pkz8jLl68aEgyJk+enGZtV65cMXx9fY3evXvbtZ8+fdrw8fFJ0W4YhtG0aVMjNDQ03W3Go41TofBYi4iIUP78+RUUFKSOHTvK09NTX375ZYpfoO/99Xbp0qXy8fFRkyZN9Ndff9keVapUkaenpzZs2CDp719wb926pZdfftnuFI+MXCS4a9cuHT16VP3790/xC3VGznd+GDXebfbs2cqfP78KFCigGjVqaOvWrRo4cGCK5XTv3l3u7u7pLisj237hwgWtX79e7du315UrV2zbd/78eUVGRurgwYMZOkWpdevWdq939erVVaNGDX3zzTe2tm7duunUqVO2MZP+Plrh7u6udu3a3XcdyYoVK6auXbvqP//5j+Li4lLts2zZMiUlJal9+/Z2r5u/v79KlChhV8O9Vq1apVy5cql37962NicnJ/Xp0yfNef71r3/ZPa9Xr16q50Dfb5zi4uK0e/duRUVFyc/Pz9avfPnyatKkid14ZlZ4eLjKlCmT6rTnn3/ebr+tV6+eEhMTdfz4cUn/27/79+9vdw1P79695e3trZUrV0qStm/frrNnz+qll16Sm5ubrV+LFi1UunRpW7/k7ezevbvdhddNmjRJs8Z7nT9/Xnny5Elzeq9evWz/dnZ2VtWqVWUYhnr27Glr9/X1ValSpexeq6VLlyo0NFSlS5e223eST/FM3neSX4t+/frZrTej7/m737/J77169erp+vXrOnDggF1fT09PdenSxfbc1dVV1atXf+Dz7DP6uZbZOrOC1WpVjx49UtSbkdckPRkZw2+++Ub+/v7q1KmTrS1Xrlzq16+frl69qk2bNmVqW9zd3eXq6qqNGzemOMUs2Zo1a3Tp0iV16tTJbtucnZ1Vo0aNVLctT548+uuvvzJVCx4tnAqFx9qMGTNUsmRJubi4qGDBgipVqlSKC4hdXFxSnOJw8OBBxcfHq0CBAqku9+zZs5Jk+3JTokQJu+n58+dP94uF9L/Tsh70VqQPo8a7tWrVSn379pXFYpGXl5fKli0rDw+PFP2KFi1632VlZNsPHTokwzA0fPhwDR8+PNU+Z8+eve9pSvdut/T3hehLliyxPW/SpIkCAgK0cOFCNW7cWElJSVq0aJFatWolLy+v+27P3d58803Nnz9fEydOTHG9iPT362YYRqp1SUrz9Bnp79cyICAgxR3N7j0dLZmbm5vy589v15YnT55Uv0jcb5yS96NSpUql6BcaGqrVq1fr2rVrqe4T95PePlO4cGG758n7bPI2pFWXq6urihUrZpueXv2lS5fWli1b7PqlNh6lSpXSzp07779B+vu6prTcu00+Pj5yc3NTvnz5UrSfP3/e9vzgwYOKiYlJ8Zomu/s97+TkpJCQkBT1Z8S+ffv05ptvav369bp8+bLdtPj4eLvnTzzxRIofQvLkyaNff/01Q+u6V0Y/1zJbZ1YoVKhQilM7M/qapCcjY3j8+HGVKFEixf9fyaePJe+3GWW1WjVp0iQNGjRIBQsWVM2aNfXUU0+pW7du8vf3t22b9L9rE+/l7e2dos0wDIfcCAAPD8ECj7Xq1auratWq6faxWq0pPqyTkpJUoEABLVy4MNV50vpP5GF62DU+8cQTtovz0nO/oxUZlXyb1cGDBysyMjLVPml9oc4sZ2dnPfvss/rwww/1/vvva+vWrTp16pTdr4gZVaxYMXXp0kX/+c9/NHTo0BTTk5KSZLFY9O2338rZ2TnF9Ac9/zs1qS3/YUjri0VaF42nt8+ktQ3pfXF3tLx586b5K7CU+jZlZDuTkpIUFhamt99+O9W+QUFBmaw0pUuXLik8PFze3t4aM2aMQkJC5Obmpp07d2rIkCEpbn+c1a9PRj/XMltnarJiP82K1yQrxzAz29S/f3+1bNlSy5cv1+rVqzV8+HBFR0dr/fr1qlSpkm0M58+fbwsbd3NxSfkV8+LFiykCMnIWggXwAEJCQrR27VrVqVMn3S89wcHBkv7+ZadYsWK29nPnzqX7xSJ5HZK0d+/edL+wp/UfxcOoMbtkZNuTa82VK1eGAk1akn91u9vvv/+e4kLjbt26acqUKfrvf/+rb7/9Vvnz508z0NzPm2++qQULFmjSpEkppoWEhMgwDBUtWlQlS5bM1HKDg4O1YcMGXb9+3e6oxaFDhx6ozrvdb5yS96PY2NgU/Q4cOKB8+fLZjlbkyZMn1TsyZfZX1Yy4u6679+9bt27p6NGjtn3n7n73/gIbGxtrm373++VeqW17akqXLm27W1pWCgkJ0Z49e9S4ceN0fxUODg5WUlKSDh8+bHeUIiP1b9y4UefPn9eyZcvs7mqVHduTmox+rmWmzrTGKvno1737amb204y+JmYFBwfr119/td2+O1nyKV/J+21mtykkJESDBg3SoEGDdPDgQVWsWFFTpkzRggULbJ/TBQoUyPBn8NGjR1WhQoVMbRseLVxjATyA9u3bKzExUWPHjk0x7c6dO7YP7YiICOXKlUvTp0+3+3UpI3/ltXLlyipatKimTp2a4j+Bu5eV/GXt3j4Po8bskpFtL1CggBo0aKAPPvgg1esVUrvlaGqWL19udy3Gzz//rG3btqW4Y0358uVVvnx5ffTRR/riiy/UsWPHVH+Ry4iQkBB16dJFH3zwgU6fPm03rW3btnJ2dtbo0aNT/CJpGIbdqS/3ioyM1O3bt/Xhhx/a2pKSkmy3JDbjfuMUEBCgihUrat68eXav2d69e/Xdd9/pySeftLWFhIQoPj7e7lSOuLi4TN2yNaMiIiLk6uqqadOm2Y3n7NmzFR8fb7sTUtWqVVWgQAHNmjXL7jag3377rWJiYmz97t7Ou0+nWbNmjfbv35+hmmrVqqW9e/dm6najGdG+fXv9+eefdq9/shs3btj+Bk/yazZt2jS7Phl5zyf/en73WN66dUvvv//+g5adKRn9XMtMnR4eHqmeGpX8xfn777+3tSUmJuo///lPpurNyGti1pNPPqnTp0/b3Q3tzp07mj59ujw9PRUeHi7p74Dh7Oxst02SUozL9evXU/y9mJCQEHl5edn228jISHl7e2vChAm6fft2ipru/QyOj4/X4cOHVbt27QffUPzjccQCeADh4eF64YUXFB0drd27d6tp06bKlSuXDh48qKVLl+rdd9/VM888Y/u7ANHR0Xrqqaf05JNPateuXfr222/vezjYyclJM2fOVMuWLVWxYkX16NFDAQEBOnDggPbt26fVq1dLkqpUqSLp7wsxIyMj5ezsrI4dOz6UGrNLRrd9xowZqlu3rsLCwtS7d28VK1ZMZ86c0Y8//qg//vhDe/bsue+6ihcvrrp16+rFF19UQkKCpk6dqrx58+q1115L0bdbt24aPHiwJD3QaVB3e+ONNzR//nzFxsaqbNmytvaQkBCNGzdOw4YN07Fjx9S6dWt5eXnp6NGj+vLLL/X888/barhX69atVb16dQ0aNEiHDh1S6dKl9fXXX+vChQuSMnbRf1oyMk6TJ09W8+bNVatWLfXs2dN2u1kfHx+7vwzfsWNHDRkyRG3atFG/fv1st6gsWbJkhq9RyKj8+fNr2LBhGj16tJo1a6ann35asbGxev/991WtWjXb65grVy5NmjRJPXr0UHh4uDp16mS73WyRIkXsbo8ZHR2tFi1aqG7dunruued04cIFTZ8+XWXLltXVq1fvW1OrVq00duxYbdq0SU2bNs2ybe3atauWLFmif/3rX9qwYYPq1KmjxMREHThwQEuWLNHq1atVtWpVVaxYUZ06ddL777+v+Ph41a5dW+vWrcvQka3atWsrT5486t69u/r16yeLxaL58+c/tFPPMvq5lpk6q1SposWLF2vgwIGqVq2aPD091bJlS5UtW1Y1a9bUsGHDdOHCBfn5+emzzz7TnTt3MlxvRl8Ts55//nl98MEHioqK0o4dO1SkSBF9/vnn2rp1q6ZOnWq7FszHx0f/93//p+nTp8tisSgkJEQrVqxIca3H77//rsaNG6t9+/YqU6aMXFxc9OWXX+rMmTPq2LGjpL+voZg5c6a6du2qypUrq2PHjsqfP79OnDihlStXqk6dOnrvvfdsy1y7dq0Mw1CrVq1Mby/+wR7mLaiAf4q7bwGanu7duxseHh5pTv/Pf/5jVKlSxXB3dze8vLyMsLAw47XXXjNOnTpl65OYmGiMHj3aCAgIMNzd3Y0GDRoYe/fuNYKDg9O93WyyLVu2GE2aNDG8vLwMDw8Po3z58sb06dNt0+/cuWO8/PLLRv78+Q2LxZLidp1ZWWNaJBl9+vRJt0/y9i1dujTNaZnddsMwjMOHDxvdunUz/P39jVy5chmFChUynnrqKePzzz9Pt57k2ytOnjzZmDJlihEUFGRYrVajXr16xp49e1KdJy4uznB2djZKliyZ7rLvlt6+lnzL13tvqWkYf986tm7duoaHh4fh4eFhlC5d2ujTp48RGxtrN/+9t8k8d+6c8eyzzxpeXl6Gj4+PERUVZWzdutWQZHz22Wd286a2b997y9fMjtPatWuNOnXqGO7u7oa3t7fRsmVLY//+/Sn6fffdd0a5cuUMV1dXo1SpUsaCBQvSvN1savtWWuOa1r703nvvGaVLlzZy5cplFCxY0HjxxRdT3MrYMAxj8eLFRqVKlQyr1Wr4+fkZnTt3trvNbrIvvvjCCA0NNaxWq1GmTBlj2bJlad5GNzXly5c3evbsadeWvP3nzp2za0/rtUrtdqy3bt0yJk2aZJQtW9awWq1Gnjx5jCpVqhijR4824uPjbf1u3Lhh9OvXz8ibN6/h4eFhtGzZ0jh58mSGbje7detWo2bNmoa7u7sRGBhovPbaa8bq1atTvQVravt2RscprfkNI2Ofaxmt8+rVq8azzz5r+Pr6prhl8OHDh42IiAjDarUaBQsWNF5//XVjzZo1Gd5Ww8j4a5KZMUhtDM+cOWP06NHDyJcvn+Hq6mqEhYWlejvsc+fOGe3atTNy585t5MmTx3jhhReMvXv32t1u9q+//jL69OljlC5d2vDw8DB8fHyMGjVqGEuWLEmxvA0bNhiRkZGGj4+P4ebmZoSEhBhRUVHG9u3b7fp16NDBqFu3brrbi0efxTD+wVe4AcA/yF9//aWAgACNGDEizTtR/RMtX75cbdq00ZYtW1SnTh1HlwP9fcFrnz59dOLEiTT/4CGQU5w+fVpFixbVZ599xhGLHI5rLAAgg+bOnavExER17drV0aWk6caNG3bPExMTNX36dHl7e6ty5coOqgr36ty5swoXLpwl178A/3RTp05VWFgYoeIxwBELALiP9evXa//+/Ro+fLgaNmyoZcuWObqkNPXq1Us3btxQrVq1lJCQoGXLlumHH37QhAkTNGzYMEeXBwDIwQgWAHAfDRo00A8//KA6depowYIF9/2je4706aefasqUKTp06JBu3ryp4sWL68UXX1Tfvn0dXRoAIIcjWAAAAAAwjWssAAAAAJhGsAAAAABgGn8gLwskJSXp1KlT8vLyMvUHqAAAAIB/EsMwdOXKFQUGBsrJKf1jEgSLLHDq1CkFBQU5ugwAAAAgW5w8eVJPPPFEun0IFlnAy8tL0t8D7u3t7eBqAAAAgKxx+fJlBQUF2b7vpodgkQWST3/y9vYmWAAAACDHycjp/ly8DQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMc3F0ATlJuZGr5WTN7egyAAAAkEMcm9jC0SVkGEcsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmPaPCRZz586Vr6+vqWWMGjVKFStWTLdPVFSUWrdubWo9AAAAAOxlebCIioqSxWKxPfLmzatmzZrp119/Nb3sL7/8UjVr1pSPj4+8vLxUtmxZ9e/f3zZ98ODBWrdunen1AAAAAMicbDli0axZM8XFxSkuLk7r1q2Ti4uLnnrqqTT73759+77LXLdunTp06KB27drp559/1o4dOzR+/Hi7eT09PZU3b94s2QYAAAAAGZctwcJqtcrf31/+/v6qWLGihg4dqpMnT+rcuXM6duyYLBaLFi9erPDwcLm5uWnhwoUplnHu3DlVrVpVbdq0UUJCgv773/+qTp06evXVV1WqVCmVLFlSrVu31owZM2zz3HsqVGJiogYOHChfX1/lzZtXr732mgzDsFtPUlKSoqOjVbRoUbm7u6tChQr6/PPPs2NYAAAAgBwr26+xuHr1qhYsWKDixYvbHU0YOnSoXnnlFcXExCgyMtJunpMnT6pevXoqV66cPv/8c1tQ2bdvn/bu3ZvhdU+ZMkVz587Vxx9/rC1btujChQv68ssv7fpER0frk08+0axZs7Rv3z4NGDBAXbp00aZNm9JcbkJCgi5fvmz3AAAAAB5nLtmx0BUrVsjT01OSdO3aNQUEBGjFihVycvpfjunfv7/atm2bYt7Y2Fg1adJEbdq00dSpU2WxWCRJL7/8sjZv3qywsDAFBwerZs2aatq0qTp37iyr1ZpqHVOnTtWwYcNs65k1a5ZWr15tm56QkKAJEyZo7dq1qlWrliSpWLFi2rJliz744AOFh4enutzo6GiNHj36AUYGAAAAyJmy5YhFw4YNtXv3bu3evVs///yzIiMj1bx5cx0/ftzWp2rVqinmu3HjhurVq6e2bdvq3XfftYUKSfLw8NDKlSt16NAhvfnmm/L09NSgQYNUvXp1Xb9+PcWy4uPjFRcXpxo1atjaXFxc7NZ76NAhXb9+XU2aNJGnp6ft8cknn+jw4cNpbt+wYcMUHx9ve5w8eTLTYwQAAADkJNlyxMLDw0PFixe3Pf/oo4/k4+OjDz/8UL169bL1uZfValVERIRWrFihV199VYUKFUrRJyQkRCEhIerVq5feeOMNlSxZUosXL1aPHj0yXefVq1clSStXrkyxrrSOgiRPS286AAAA8Lh5KH/HwmKxyMnJSTdu3Ei/GCcnzZ8/X1WqVFHDhg116tSpdPsXKVJEuXPn1rVr11JM8/HxUUBAgLZt22Zru3Pnjnbs2GF7XqZMGVmtVp04cULFixe3ewQFBWVyKwEAAIDHV7YcsUhISNDp06clSRcvXtR7772nq1evqmXLlved19nZWQsXLlSnTp3UqFEjbdy4Uf7+/ho1apSuX7+uJ598UsHBwbp06ZKmTZum27dvq0mTJqku65VXXtHEiRNVokQJlS5dWm+//bYuXbpkm+7l5aXBgwdrwIABSkpKUt26dRUfH6+tW7fK29tb3bt3z5LxAAAAAHK6bAkWq1atUkBAgKS/v7yXLl1aS5cuVYMGDXTs2LH7F+XiokWLFqlDhw62cBEeHq4ZM2aoW7duOnPmjPLkyaNKlSrpu+++U6lSpVJdzqBBgxQXF6fu3bvLyclJzz33nNq0aaP4+Hhbn7Fjxyp//vyKjo7WkSNH5Ovrq8qVK+v111/PkrEAAAAAHgcW494/7IBMu3z5snx8fBTUf4mcrLkdXQ4AAAByiGMTWzh0/cnfc+Pj4+Xt7Z1u34dyjQUAAACAnI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAExzcXQBOcne0ZHy9vZ2dBkAAADAQ8cRCwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgmoujC8hJyo1cLSdrbkeXAQAAgBzi2MQWji4hwzhiAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAw7ZEJFhs3bpTFYtGlS5fS7DN37lz5+vo+tJoAAAAA/M1UsIiKipLFYtHEiRPt2pcvXy6LxZLh5TRo0EAWi0UWi0Vubm4qWbKkoqOjZRiGrU/t2rUVFxcnHx8fMyUDAAAAyAamj1i4ublp0qRJunjxoqnl9O7dW3FxcYqNjdWwYcM0YsQIzZo1yzbd1dVV/v7+mQosAAAAAB4O08EiIiJC/v7+io6OTrPPF198obJly8pqtapIkSKaMmVKij65c+eWv7+/goOD1aNHD5UvX15r1qyxTU/tVKi5c+eqcOHCyp07t9q0aaPz58+nWO64ceNUoEABeXl5qVevXho6dKgqVqxo1+ejjz5SaGio3NzcVLp0ab3//vuZHwgAAADgMWY6WDg7O2vChAmaPn26/vjjjxTTd+zYofbt26tjx4767bffNGrUKA0fPlxz585NdXmGYWjz5s06cOCAXF1d01zvtm3b1LNnT/Xt21e7d+9Ww4YNNW7cOLs+Cxcu1Pjx4zVp0iTt2LFDhQsX1syZM1P0GTFihMaPH6+YmBhNmDBBw4cP17x58zI/GAAAAMBjymLcfSFDJkVFRenSpUtavny5atWqpTJlymj27Nlavny52rRpI8Mw1LlzZ507d07fffedbb7XXntNK1eu1L59+yT9fY3FDz/8IFdXV926dUu3b9+Wm5ub1q1bp9q1a0v6+4hFw4YNdfHiRfn6+urZZ59VfHy8Vq5caVtux44dtWrVKttRjZo1a6pq1ap67733bH3q1q2rq1evavfu3ZKk4sWLa+zYserUqZOtz7hx4/TNN9/ohx9+SHW7ExISlJCQYHt++fJlBQUFKaj/EjlZcz/ocAIAAAB2jk1s4dD1X758WT4+PoqPj5e3t3e6fbPsrlCTJk3SvHnzFBMTY9ceExOjOnXq2LXVqVNHBw8eVGJioq2tc+fO2r17t7Zu3armzZvrjTfesIWK1MTExKhGjRp2bbVq1bJ7Hhsbq+rVq9u13f382rVrOnz4sHr27ClPT0/bY9y4cTp8+HCa646OjpaPj4/tERQUlGZfAAAA4HHgklULql+/viIjIzVs2DBFRUVlen4fHx8VL15ckrRkyRIVL15cNWvWVERERFaVmMLVq1clSR9++GGKkOLs7JzmfMOGDdPAgQNtz5OPWAAAAACPqywLFpI0ceJEVaxYUaVKlbK1hYaGauvWrXb9tm7dqpIlS6b55d3T01OvvPKKBg8erF27dqV6J6jQ0FBt27bNru2nn36ye16qVCn98ssv6tatm63tl19+sf27YMGCCgwM1JEjR9S5c+cMb6fVapXVas1wfwAAACCny9JgERYWps6dO2vatGm2tkGDBqlatWoaO3asOnTooB9//FHvvffefe+89MILL2js2LH64osv9Mwzz6SY3q9fP9WpU0dvvfWWWrVqpdWrV2vVqlV2fV5++WX17t1bVatWVe3atbV48WL9+uuvKlasmK3P6NGj1a9fP/n4+KhZs2ZKSEjQ9u3bdfHiRbujEgAAAADSluV/eXvMmDFKSkqyPa9cubKWLFmizz77TOXKldOIESM0ZsyY+54u5efnp27dumnUqFF2y0tWs2ZNffjhh3r33XdVoUIFfffdd3rzzTft+nTu3FnDhg3T4MGDVblyZR09elRRUVFyc3Oz9enVq5c++ugjzZkzR2FhYQoPD9fcuXNVtGhRcwMBAAAAPEZM3RXqUdSkSRP5+/tr/vz5WbbM5KvluSsUAAAAstKjdFeoLD0V6p/m+vXrmjVrliIjI+Xs7KxFixZp7dq1dn94DwAAAIB5OTpYWCwWffPNNxo/frxu3rypUqVK6YsvvsjWO00BAAAAj6McHSzc3d21du1aR5cBAAAA5HhZfvE2AAAAgMcPwQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaS6OLiAn2Ts6Ut7e3o4uAwAAAHjoOGIBAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA01wcXUBOUm7kajlZczu6DAAAADyCjk1s4egSTOGIBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMy1SwiIqKksVi0cSJE+3aly9fLovFoqtXrypXrlz67LPP7KZ37NhRFotFx44ds2svUqSIhg8fbtdWunRpWa1WnT59OjOlAQAAAHCgTB+xcHNz06RJk3Tx4sUU0zw9PVW1alVt3LjRrn3jxo0KCgqyaz969KiOHz+uRo0a2dq2bNmiGzdu6JlnntG8efMyWxoAAAAAB8l0sIiIiJC/v7+io6NTnd6wYUO7ABETE6ObN2/qxRdftGvfuHGjrFaratWqZWubPXu2nn32WXXt2lUff/xximUXKVJE48aNU7du3eTp6ang4GB9/fXXOnfunFq1aiVPT0+VL19e27dvt5tvy5Ytqlevntzd3RUUFKR+/frp2rVrtunvv/++SpQoITc3NxUsWFDPPPNMZocFAAAAeKxlOlg4OztrwoQJmj59uv74448U0xs2bKjY2FjFxcVJkjZs2KC6deuqUaNGdsFiw4YNqlWrltzc3CRJV65c0dKlS9WlSxc1adJE8fHx2rx5c4rlv/POO6pTp4527dqlFi1aqGvXrurWrZu6dOminTt3KiQkRN26dZNhGJKkw4cPq1mzZmrXrp1+/fVXLV68WFu2bFHfvn0lSdu3b1e/fv00ZswYxcbGatWqVapfv35mhwUAAAB4rD3Qxdtt2rRRxYoVNXLkyBTT6tSpI1dXV1uI2Lhxo8LDw1WlShX99ddfOnr0qCRp06ZNatiwoW2+zz77TCVKlFDZsmXl7Oysjh07avbs2SmW/+STT+qFF15QiRIlNGLECF2+fFnVqlXT//3f/6lkyZIaMmSIYmJidObMGUlSdHS0OnfurP79+6tEiRKqXbu2pk2bpk8++UQ3b97UiRMn5OHhoaeeekrBwcGqVKmS+vXrl+72JyQk6PLly3YPAAAA4HH2wHeFmjRpkubNm6eYmBi79ty5c6tatWq2YLFp0yY1aNBALi4uql27tjZu3KgjR47oxIkTdsHi448/VpcuXWzPu3TpoqVLl+rKlSt2yy9fvrzt3wULFpQkhYWFpWg7e/asJGnPnj2aO3euPD09bY/IyEglJSXp6NGjatKkiYKDg1WsWDF17dpVCxcu1PXr19Pd9ujoaPn4+NgeQUFBGR02AAAAIEd64GBRv359RUZGatiwYSmmNWzYUBs2bNC+fft048YNVa5cWZIUHh6uDRs2aMOGDcqdO7dq1KghSdq/f79++uknvfbaa3JxcZGLi4tq1qyp69evp7jDVK5cuWz/tlgsabYlJSVJkq5evaoXXnhBu3fvtj327NmjgwcPKiQkRF5eXtq5c6cWLVqkgIAAjRgxQhUqVNClS5fS3PZhw4YpPj7e9jh58uQDjCAAAACQc7iYmXnixImqWLGiSpUqZdfesGFDjRs3Tp9++qnq1q0rZ2dnSX+Hkf/85z8yDMN2ypT090Xb9evX14wZM+yWM2fOHM2ePVu9e/d+4BorV66s/fv3q3jx4mn2cXFxUUREhCIiIjRy5Ej5+vpq/fr1atu2bar9rVarrFbrA9cEAAAA5DSm/kBeWFiYOnfurGnTptm1165dW1arVdOnT1d4eLitvXr16jp79qy++uor22lQt2/f1vz589WpUyeVK1fO7tGrVy9t27ZN+/bte+AahwwZoh9++EF9+/bV7t27dfDgQX311Ve2i7dXrFihadOmaffu3Tp+/Lg++eQTJSUlpQhLAAAAANJm+i9vjxkzxnbaUTI3NzfVrFlTV65cUYMGDWztVqvV1p4cLL7++mudP39ebdq0SbHs0NBQhYaGpnoRd0aVL19emzZt0u+//6569eqpUqVKGjFihAIDAyVJvr6+WrZsmRo1aqTQ0FDNmjVLixYtUtmyZR94nQAAAMDjxmIk35cVD+zy5ct/X8Tdf4mcrLkdXQ4AAAAeQccmtnB0CSkkf8+Nj4+Xt7d3un1NH7EAAAAAAIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0F0cXkJPsHR0pb29vR5cBAAAAPHQcsQAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpLo4uICcpN3K1nKy5HV0GAAAAHkHHJrZwdAmmcMQCAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaf/4YDFq1ChVrFjR9jwqKkqtW7d2WD0AAAAAUsr2YHH69Gm9/PLLKlasmKxWq4KCgtSyZUutW7cuu1dt59ChQ+rRo4eeeOIJWa1WFS1aVJ06ddL27dsfah0AAABATuSSnQs/duyY6tSpI19fX02ePFlhYWG6ffu2Vq9erT59+ujAgQPZuXqb7du3q3HjxipXrpw++OADlS5dWleuXNFXX32lQYMGadOmTanOd/v2beXKleuh1AgAAAA8yrL1iMVLL70ki8Win3/+We3atVPJkiVVtmxZDRw4UD/99JMk6cSJE2rVqpU8PT3l7e2t9u3b68yZMxlex5UrV9S5c2d5eHgoICBA77zzjho0aKD+/ftLkgzDUFRUlEqUKKHNmzerRYsWCgkJUcWKFTVy5Eh99dVXkv4OQRaLRYsXL1Z4eLjc3Ny0cOHCLB8TAAAAICfKtmBx4cIFrVq1Sn369JGHh0eK6b6+vkpKSlKrVq104cIFbdq0SWvWrNGRI0fUoUOHDK9n4MCB2rp1q77++mutWbNGmzdv1s6dO23Td+/erX379mnQoEFyckq5ub6+vnbPhw4dqldeeUUxMTGKjIxMdZ0JCQm6fPmy3QMAAAB4nGXbqVCHDh2SYRgqXbp0mn3WrVun3377TUePHlVQUJAk6ZNPPlHZsmX1yy+/qFq1aumu48qVK5o3b54+/fRTNW7cWJI0Z84cBQYG2vocPHhQktKt4279+/dX27Zt0+0THR2t0aNHZ2h5AAAAwOMg245YGIZx3z4xMTEKCgqyhQpJKlOmjHx9fRUTE3Pf+Y8cOaLbt2+revXqtjYfHx+VKlUqU3XcrWrVqvftM2zYMMXHx9seJ0+ezNQ6AAAAgJwm24JFiRIlZLFYHtoF2mkpWbKkJGW4jtRO27qX1WqVt7e33QMAAAB4nGVbsPDz81NkZKRmzJiha9eupZh+6dIlhYaG6uTJk3a/+O/fv1+XLl1SmTJl7ruOYsWKKVeuXPrll19sbfHx8fr9999tzytWrKgyZcpoypQpSkpKSrUOAAAAAOZk612hZsyYocTERFWvXl1ffPGFDh48qJiYGE2bNk21atVSRESEwsLC1LlzZ+3cuVM///yzunXrpvDw8AydkuTl5aXu3bvr1Vdf1YYNG7Rv3z717NlTTk5OslgskiSLxaI5c+bo999/V7169fTNN9/oyJEj+vXXXzV+/Hi1atUqO4cAAAAAeCxka7AoVqyYdu7cqYYNG2rQoEEqV66cmjRponXr1mnmzJmyWCz66quvlCdPHtWvX18REREqVqyYFi9enOF1vP3226pVq5aeeuopRUREqE6dOgoNDZWbm5utT/Xq1bV9+3YVL15cvXv3VmhoqJ5++mnt27dPU6dOzYYtBwAAAB4vFiOzVzf/w127dk2FChXSlClT1LNnz4eyzsuXL8vHx0dB/ZfIyZr7oawTAAAAOcuxiS0cXUIKyd9z4+Pj73tdcbb+5e2HYdeuXTpw4ICqV6+u+Ph4jRkzRpI4xQkAAAB4iB75YCFJb731lmJjY+Xq6qoqVapo8+bNypcvn6PLAgAAAB4bj3ywqFSpknbs2OHoMgAAAIDHWrZevA0AAADg8UCwAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmObi6AJykr2jI+Xt7e3oMgAAAICHjiMWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0F0cXkJOUG7laTtbcji4DAAAA9zg2sYWjS8jxOGIBAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADDtoQSLIkWKaOrUqen2sVgsWr58+cMoBwAAAEAWMx0sTp48qeeee06BgYFydXVVcHCwXnnlFZ0/f/6Bljd37lxZLJZ0H8eOHTNbNgAAAIAsZCpYHDlyRFWrVtXBgwe1aNEiHTp0SLNmzdK6detUq1YtXbhwIdPL7NChg+Li4myPWrVqqXfv3nZtQUFBZsoGAAAAkMVMBYs+ffrI1dVV3333ncLDw1W4cGE1b95ca9eu1Z9//qk33ngj1fkOHjyo+vXry83NTWXKlNGaNWts09zd3eXv7297uLq6Knfu3Lbnt27dUtu2beXp6Slvb2+1b99eZ86ckSTFx8fL2dlZ27dvlyQlJSXJz89PNWvWtC1/wYIFtmBy7NgxWSwWLVu2TA0bNlTu3LlVoUIF/fjjj2aGBQAAAHjsPHCwuHDhglavXq2XXnpJ7u7udtP8/f3VuXNnLV68WIZh2E1LSkpS27Zt5erqqm3btmnWrFkaMmRIhtaZlJSkVq1a6cKFC9q0aZPWrFmjI0eOqEOHDpIkHx8fVaxYURs3bpQk/fbbb7JYLNq1a5euXr0qSdq0aZPCw8PtlvvGG29o8ODB2r17t0qWLKlOnTrpzp07DzIsAAAAwGPpgYPFwYMHZRiGQkNDU50eGhqqixcv6ty5c3bta9eu1YEDB/TJJ5+oQoUKql+/viZMmJChda5bt06//fabPv30U1WpUkU1atTQJ598ok2bNumXX36RJDVo0MAWLDZu3KgmTZooNDRUW7ZssbXdGywGDx6sFi1aqGTJkho9erSOHz+uQ4cOpVlHQkKCLl++bPcAAAAAHmemL96+94jE/cTExCgoKEiBgYG2tlq1amVq3ruvsShTpox8fX0VExMjSQoPD9eWLVuUmJioTZs2qUGDBrawcerUKR06dEgNGjSwW2758uVt/w4ICJAknT17Ns06oqOj5ePjY3twzQcAAAAedw8cLIoXLy6LxWL7Qn+vmJgY5cmTR/nz53/g4h5E/fr1deXKFe3cuVPff/+9XbDYtGmTAgMDVaJECbt5cuXKZfu3xWKR9PdpV2kZNmyY4uPjbY+TJ09mz8YAAAAAj4gHDhZ58+ZVkyZN9P777+vGjRt2006fPq2FCxeqQ4cOti/qyUJDQ3Xy5EnFxcXZ2n766acMrTN53ru/yO/fv1+XLl1SmTJlJEm+vr4qX7683nvvPeXKlUulS5dW/fr1tWvXLq1YsSLFaVAPwmq1ytvb2+4BAAAAPM5MnQr13nvvKSEhQZGRkfr+++918uRJrVq1Sk2aNFGhQoU0fvz4FPNERESoZMmS6t69u/bs2aPNmzenefeo1OYNCwtT586dtXPnTv3888/q1q2bwsPDVbVqVVu/Bg0aaOHChbYQ4efnp9DQUC1evDhLggUAAAAAe6aCRYkSJbR9+3YVK1ZM7du3V0hIiJ5//nk1bNhQP/74o/z8/FKu0MlJX375pW7cuKHq1aurV69eqQaQ1FgsFn311VfKkyeP6tevr4iICBUrVkyLFy+26xceHq7ExES7aykaNGiQog0AAABA1rAYmb36Gilcvnz574u4+y+RkzW3o8sBAADAPY5NbOHoEh5Jyd9z4+Pj73v6v+m7QgEAAAAAwQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYJqLowvISfaOjpS3t7ejywAAAAAeOo5YAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATHNxdAE5gWEYkqTLly87uBIAAAAg6yR/v03+vpsegkUWOH/+vCQpKCjIwZUAAAAAWe/KlSvy8fFJtw/BIgv4+flJkk6cOHHfAUfWuXz5soKCgnTy5El5e3s7upzHCmPvGIy7YzDujsPYOwbj7jj/xLE3DENXrlxRYGDgffsSLLKAk9Pfl6r4+Pj8Y3aCx4m3tzfj7iCMvWMw7o7BuDsOY+8YjLvj/NPGPqM/nHPxNgAAAADTCBYAAAAATCNYZAGr1aqRI0fKarU6upTHCuPuOIy9YzDujsG4Ow5j7xiMu+M86mNvMTJy7ygAAAAASAdHLAAAAACYRrAAAAAAYBrBAgAAAIBpBAuTZsyYoSJFisjNzU01atTQzz//7OiScpTo6GhVq1ZNXl5eKlCggFq3bq3Y2Fi7Pg0aNJDFYrF7/Otf/3JQxTnHqFGjUoxr6dKlbdNv3rypPn36KG/evPL09FS7du105swZB1acMxQpUiTFuFssFvXp00cS+3tW+v7779WyZUsFBgbKYrFo+fLldtMNw9CIESMUEBAgd3d3RURE6ODBg3Z9Lly4oM6dO8vb21u+vr7q2bOnrl69+hC34tGT3rjfvn1bQ4YMUVhYmDw8PBQYGKhu3brp1KlTdstI7X0yceLEh7wlj5777fNRUVEpxrVZs2Z2fdjnM+9+457aZ77FYtHkyZNtfR6VfZ5gYcLixYs1cOBAjRw5Ujt37lSFChUUGRmps2fPOrq0HGPTpk3q06ePfvrpJ61Zs0a3b99W06ZNde3aNbt+vXv3VlxcnO3x73//20EV5yxly5a1G9ctW7bYpg0YMED//e9/tXTpUm3atEmnTp1S27ZtHVhtzvDLL7/YjfmaNWskSf/3f/9n68P+njWuXbumChUqaMaMGalO//e//61p06Zp1qxZ2rZtmzw8PBQZGambN2/a+nTu3Fn79u3TmjVrtGLFCn3//fd6/vnnH9YmPJLSG/fr169r586dGj58uHbu3Klly5YpNjZWTz/9dIq+Y8aMsXsfvPzyyw+j/Efa/fZ5SWrWrJnduC5atMhuOvt85t1v3O8e77i4OH388ceyWCxq166dXb9HYp838MCqV69u9OnTx/Y8MTHRCAwMNKKjox1YVc529uxZQ5KxadMmW1t4eLjxyiuvOK6oHGrkyJFGhQoVUp126dIlI1euXMbSpUttbTExMYYk48cff3xIFT4eXnnlFSMkJMRISkoyDIP9PbtIMr788kvb86SkJMPf39+YPHmyre3SpUuG1Wo1Fi1aZBiGYezfv9+QZPzyyy+2Pt9++61hsViMP//886HV/ii7d9xT8/PPPxuSjOPHj9vagoODjXfeeSd7i8vhUhv77t27G61atUpzHvZ58zKyz7dq1cpo1KiRXdujss9zxOIB3bp1Szt27FBERIStzcnJSREREfrxxx8dWFnOFh8fL0ny8/Oza1+4cKHy5cuncuXKadiwYbp+/bojystxDh48qMDAQBUrVkydO3fWiRMnJEk7duzQ7du37fb/0qVLq3Dhwuz/WejWrVtasGCBnnvuOVksFls7+3v2O3r0qE6fPm23j/v4+KhGjRq2ffzHH3+Ur6+vqlatausTEREhJycnbdu27aHXnFPFx8fLYrHI19fXrn3ixInKmzevKlWqpMmTJ+vOnTuOKTCH2bhxowoUKKBSpUrpxRdf1Pnz523T2Oez35kzZ7Ry5Ur17NkzxbRHYZ93cXQBj6q//vpLiYmJKliwoF17wYIFdeDAAQdVlbMlJSWpf//+qlOnjsqVK2drf/bZZxUcHKzAwED9+uuvGjJkiGJjY7Vs2TIHVvvoq1GjhubOnatSpUopLi5Oo0ePVr169bR3716dPn1arq6uKf6jL1iwoE6fPu2YgnOg5cuX69KlS4qKirK1sb8/HMn7cWqf8cnTTp8+rQIFCthNd3FxkZ+fH++DLHLz5k0NGTJEnTp1kre3t629X79+qly5svz8/PTDDz9o2LBhiouL09tvv+3Aah99zZo1U9u2bVW0aFEdPnxYr7/+upo3b64ff/xRzs7O7PMPwbx58+Tl5ZXi1OJHZZ8nWOCR0adPH+3du9fuPH9Jdud2hoWFKSAgQI0bN9bhw4cVEhLysMvMMZo3b277d/ny5VWjRg0FBwdryZIlcnd3d2Blj4/Zs2erefPmCgwMtLWxv+Nxcfv2bbVv316GYWjmzJl20wYOHGj7d/ny5eXq6qoXXnhB0dHRj+xfLP4n6Nixo+3fYWFhKl++vEJCQrRx40Y1btzYgZU9Pj7++GN17txZbm5udu2Pyj7PqVAPKF++fHJ2dk5xF5wzZ87I39/fQVXlXH379tWKFSu0YcMGPfHEE+n2rVGjhiTp0KFDD6O0x4avr69KliypQ4cOyd/fX7du3dKlS5fs+rD/Z53jx49r7dq16tWrV7r92N+zR/J+nN5nvL+/f4qbddy5c0cXLlzgfWBScqg4fvy41qxZY3e0IjU1atTQnTt3dOzYsYdT4GOiWLFiypcvn+3zhX0+e23evFmxsbH3/dyX/rn7PMHiAbm6uqpKlSpat26drS0pKUnr1q1TrVq1HFhZzmIYhvr27asvv/xS69evV9GiRe87z+7duyVJAQEB2Vzd4+Xq1as6fPiwAgICVKVKFeXKlctu/4+NjdWJEyfY/7PInDlzVKBAAbVo0SLdfuzv2aNo0aLy9/e328cvX76sbdu22fbxWrVq6dKlS9qxY4etz/r165WUlGQLfMi85FBx8OBBrV27Vnnz5r3vPLt375aTk1OK03Rgzh9//KHz58/bPl/Y57PX7NmzVaVKFVWoUOG+ff+p+zynQpkwcOBAde/eXVWrVlX16tU1depUXbt2TT169HB0aTlGnz599Omnn+qrr76Sl5eX7RxOHx8fubu76/Dhw/r000/15JNPKm/evPr11181YMAA1a9fX+XLl3dw9Y+2wYMHq2XLlgoODtapU6c0cuRIOTs7q1OnTvLx8VHPnj01cOBA+fn5ydvbWy+//LJq1aqlmjVrOrr0R15SUpLmzJmj7t27y8Xlfx/T7O9Z6+rVq3ZHeo4ePardu3fLz89PhQsXVv/+/TVu3DiVKFFCRYsW1fDhwxUYGKjWrVtLkkJDQ9WsWTP17t1bs2bN0u3bt9W3b1917NjR7vQ12Etv3AMCAvTMM89o586dWrFihRITE22f+35+fnJ1ddWPP/6obdu2qWHDhvLy8tKPP/6oAQMGqEuXLsqTJ4+jNuuRkN7Y+/n5afTo0WrXrp38/f11+PBhvfbaaypevLgiIyMlsc8/qPt91kh//3CxdOlSTZkyJcX8j9Q+7+jbUj3qpk+fbhQuXNhwdXU1qlevbvz000+OLilHkZTqY86cOYZhGMaJEyeM+vXrG35+fobVajWKFy9uvPrqq0Z8fLxjC88BOnToYAQEBBiurq5GoUKFjA4dOhiHDh2yTb9x44bx0ksvGXny5DFy585ttGnTxoiLi3NgxTnH6tWrDUlGbGysXTv7e9basGFDqp8v3bt3Nwzj71vODh8+3ChYsKBhtVqNxo0bp3hNzp8/b3Tq1Mnw9PQ0vL29jR49ehhXrlxxwNY8OtIb96NHj6b5ub9hwwbDMAxjx44dRo0aNQwfHx/Dzc3NCA0NNSZMmGDcvHnTsRv2CEhv7K9fv240bdrUyJ8/v5ErVy4jODjY6N27t3H69Gm7ZbDPZ979PmsMwzA++OADw93d3bh06VKK+R+lfd5iGIaR7ekFAAAAQI7GNRYAAAAATCNYAAAAADCNYAEAAADANIIFAAAAANMIFgAAAABMI1gAAAAAMI1gAQAAAMA0ggUAAAAA0wgWAIAUoqKi1Lp1a9vzBg0aqH///g+9jo0bN8pisejSpUtZutxjx47JYrFo9+7dWbrce50/f14FChTQsWPHUkwbNWpUqu1z586Vr69vmsvcv3+/nnjiCV27di3rCgWALECwAIBHRFRUlCwWiywWi1xdXVW8eHGNGTNGd+7cyfZ1L1u2TGPHjs1Q3+wKA2kpUqSIbVw8PDxUuXJlLV26NN15goKCFBcXp3LlymVrbePHj1erVq1UpEiRLFtmmTJlVLNmTb399ttZtkwAyAoECwB4hDRr1kxxcXE6ePCgBg0apFGjRmny5Mmp9r1161aWrdfPz09eXl5ZtrysNmbMGMXFxWnXrl2qVq2aOnTooB9++CHVvrdu3ZKzs7P8/f3l4uKSbTVdv35ds2fPVs+ePe3aly5dqipVqmjy5MmqVq2aatasqS+++CJTy+7Ro4dmzpz5UEIlAGQUwQIAHiFWq1X+/v4KDg7Wiy++qIiICH399deS/nf60vjx4xUYGKhSpUpJkk6ePKn27dvL19dXfn5+atWqld0pOImJiRo4cKB8fX2VN29evfbaazIMw269954KlZCQoCFDhigoKEhWq1XFixfX7NmzdezYMTVs2FCSlCdPHlksFkVFRUmSkpKSFB0draJFi8rd3V0VKlTQ559/breeb775RiVLlpS7u7saNmyY6qlCqfHy8pK/v79KliypGTNmyN3dXf/9738l/X1EY+zYserWrZu8vb31/PPPp3oq1L59+/TUU0/J29tbXl5eqlevng4fPmyb/tFHHyk0NFRubm4qXbq03n///XRr+uabb2S1WlWzZk1b2++//65OnTqpRYsWioqK0pw5c9SnTx/dvn07zeWcO3dOVatWVZs2bZSQkCBJatKkiS5cuKBNmzZlaHwA4GEgWADAI8zd3d3uyMS6desUGxurNWvWaMWKFbp9+7YiIyPl5eWlzZs3a+vWrfL09FSzZs1s802ZMkVz587Vxx9/rC1btujChQv68ssv011vt27dtGjRIk2bNk0xMTH64IMP5OnpqaCgINuv77GxsYqLi9O7774rSYqOjtYnn3yiWbNmad++fRowYIC6dOli+3J88uRJtW3bVi1bttTu3bvVq1cvDR06NNNj4uLioly5ctmNy1tvvaUKFSpo165dGj58eIp5/vzzT9WvX19Wq1Xr16/Xjh079Nxzz9mOCCxcuFAjRozQ+PHjFRMTowkTJmj48OGaN29emnVs3rxZVapUsWv79ddf5eTkpNGjRyt//vwqV66cunbtqo4dO6a6jJMnT6pevXoqV66cPv/8c1mtVkmSq6urKlasqM2bN2d6fAAgu2TfMWAAQLYxDEPr1q3T6tWr9fLLL9vaPTw89NFHH8nV1VWStGDBAiUlJemjjz6SxWKRJM2ZM0e+vr7auHGjmjZtqqlTp2rYsGFq27atJGnWrFlavXp1muv+/ffftWTJEq1Zs0YRERGSpGLFitmm+/n5SZIKFChguwg5ISFBEyZM0Nq1a1WrVi3bPFu2bNEHH3yg8PBwzZw5UyEhIZoyZYokqVSpUvrtt980adKkDI/LrVu3NGXKFMXHx6tRo0a29kaNGmnQoEG25/ceCZkxY4Z8fHz02WefKVeuXJKkkiVL2qaPHDlSU6ZMsY1R0aJFtX//fn3wwQfq3r17qrUcP35cgYGBdm1VqlSRk5OTXn311ftegxIbG6smTZqoTZs2mjp1qu31SxYYGKjjx4+nuwwAeJgIFgDwCFmxYoU8PT11+/ZtJSUl6dlnn9WoUaNs08PCwmyhQpL27NmjQ4cOpbg+4ubNmzp8+LDi4+MVFxenGjVq2Ka5uLioatWqKU6HSrZ79245OzsrPDw8w3UfOnRI169fV5MmTezab926pUqVKkmSYmJi7OqQZAsh9zNkyBC9+eabunnzpjw9PTVx4kS1aNHCNr1q1arpzr97927Vq1fPFirudu3aNR0+fFg9e/ZU7969be137tyRj49Pmsu8ceOG3Nzc7NqKFi2qNWvWaPz48dq8ebOWLVumZs2aaezYsQoJCbGbt169enr22Wc1derUVJfv7u6u69evp7tdAPAwESwA4BHSsGFDzZw5U66urgoMDExx8bGHh4fd86tXr6pKlSpauHBhimXlz5//gWpwd3fP9DxXr16VJK1cuVKFChWym5Z8eo8Zr776qqKiouTp6amCBQum+HX/3nG5V3rblFz7hx9+mCL4ODs7pzlfvnz5dPHixRTt9erV06pVqzRq1CiVLVtWs2fPVqNGjXT48GHb62m1WhUREaEVK1bo1VdfTTFmknThwgW7MAIAjsY1FgDwCPHw8FDx4sVVuHDhDN3RqHLlyjp48KAKFCig4sWL2z18fHzk4+OjgIAAbdu2zTbPnTt3tGPHjjSXGRYWpqSkpDQvHE4+YpKYmGhrK1OmjKxWq06cOJGijqCgIElSaGiofv75Z7tl/fTTT/fdRunvL/HFixeXv79/ilCREeXLl9fmzZtTvYi6YMGCCgwM1JEjR1LUXrRo0TSXWalSJe3fvz/d9VarVk2TJ0/WiRMn7E5rcnJy0vz581WlShU1bNhQp06dSjHv3r17bUd7AOCfgGABADlY586dlS9fPrVq1UqbN2/W0aNHtXHjRvXr109//PGHJOmVV17RxIkTtXz5ch04cEAvvfRSuuf/FylSRN27d9dzzz2n5cuX25a5ZMkSSVJwcLAsFotWrFihc+fO6erVq/Ly8tLgwYM1YMAAzZs3T4cPH9bOnTs1ffp02wXQ//rXv3Tw4EG9+uqrio2N1aeffqq5c+dm9xBJkvr27avLly+rY8eO2r59uw4ePKj58+crNjZWkjR69GhFR0dr2rRp+v333/Xbb79pzpw56f4ticjISO3bt8/uqMWqVav0zjvv6MiRI0pKStLZs2c1bdo05cuXT4ULF7ab39nZWQsXLlSFChXUqFEjnT592jbt2LFj+vPPP23XuADAPwHBAgBysNy5c+v7779X4cKF1bZtW4WGhqpnz566efOmvL29JUmDBg1S165d1b17d9WqVUteXl5q06ZNusudOXOmnnnmGb300ksqXbq0evfubftL0IUKFdLo0aM1dOhQFSxYUH379pUkjR07VsOHD1d0dLRCQ0PVrFkzrVy50varf+HChfXFF19o+fLlqlChgmbNmqUJEyZk4+j8T968ebV+/XpdvXpV4eHhqlKlij788EPbNRe9evXSRx99pDlz5igsLEzh4eGaO3duukcswsLCVLlyZVvgkv4OZdu2bVP9+vU1YcIENW7cWDExMVqxYkWq13e4uLho0aJFKlu2rBo1aqSzZ89KkhYtWqSmTZsqODg4i0cCAB6cxUjr6jwAAGDKypUr9eqrr2rv3r1ycrL/LW/UqFGKiorK9F/lvnXrlkqUKKFPP/1UderUycJqAcAcLt4GACCbtGjRQgcPHtSff/5pu5bErBMnTuj1118nVAD4x+GIBQAAAADTuMYCAAAAgGkECwAAAACmESwAAAAAmEawAAAAAGAawQIAAACAaQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAApv0/UhyGdMyeAJkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:__main__:Plot saved to results/price_by_neighborhood.png.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Neighborhood PredictedPrice\n", + " OldTown 163890.32\n", + " CollgCr 174784.64\n", + " NWAmes 174832.51\n", + " NoRidge 176042.92\n", + " BrkSide 176053.85\n" + ] + } + ], "source": [ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "\n", "# Build one instance per neighbourhood using default feature values.\n", "neighborhoods = [\"OldTown\", \"BrkSide\", \"CollgCr\", \"NWAmes\", \"NoRidge\"]\n", - "instances = [\n", - " {**cpptteut.FEATURE_DEFAULTS, \"Neighborhood\": n} for n in neighborhoods\n", - "]\n", + "instances = [{**cpptteut.FEATURE_DEFAULTS, \"Neighborhood\": n} for n in neighborhoods]\n", "# Predict prices for all neighbourhoods.\n", "prices = [cpptteut.predict_price(inst, model=model) for inst in instances]\n", "result_df = (\n", " pd.DataFrame({\"Neighborhood\": neighborhoods, \"PredictedPrice\": prices})\n", " .sort_values(\"PredictedPrice\")\n", ")\n", - "# Plot the neighbourhood comparison bar chart.\n", + "# Plot and save the neighbourhood comparison chart.\n", + "os.makedirs(\"results\", exist_ok=True)\n", "plt.figure(figsize=(8, 4))\n", "plt.barh(result_df[\"Neighborhood\"], result_df[\"PredictedPrice\"] / 1000)\n", "plt.xlabel(\"Predicted Price ($k)\")\n", @@ -291,6 +699,14 @@ "_LOG.info(\"Plot saved to results/price_by_neighborhood.png.\")\n", "print(result_df.to_string(index=False))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5eda1c92-cf37-4fe5-9838-a002cb7e12b0", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -301,6 +717,18 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" } }, "nbformat": 4, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py index 1c5ff6c4e..c9a5a0593 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template.example.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.19.0 +# jupytext_version: 1.19.1 # kernelspec: # display_name: Python 3 (ipykernel) # language: python @@ -180,4 +180,6 @@ plt.savefig("results/price_by_neighborhood.png", dpi=120) plt.show() _LOG.info("Plot saved to results/price_by_neighborhood.png.") -print(result_df.to_string(index=False)) \ No newline at end of file +print(result_df.to_string(index=False)) + +# %% diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py index e01af3b37..c4090d516 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/template_utils.py @@ -208,6 +208,56 @@ def split_data( return train_test_split(X, y, test_size=test_size, random_state=42) +def load_test_data(test_data_path: str) -> Tuple[pd.DataFrame, pd.Series]: + """ + Load test data from a CSV file and separate features from target. + + If the file does not exist, generate synthetic test data. + + :param test_data_path: path to test.csv (Kaggle House Prices test data) + :return: (X_test, y_test) tuple with features and target values + """ + if os.path.exists(test_data_path): + logger.info("Loading test dataset from '%s'.", test_data_path) + df = pd.read_csv(test_data_path) + # Keep only the columns required for this project. + available = [c for c in ALL_FEATURES + [TARGET_COLUMN] if c in df.columns] + df = df[available] + else: + logger.warning("File '%s' not found – generating synthetic test data.", test_data_path) + df = _generate_synthetic_data(n=500) + + X_test = df.drop(columns=[TARGET_COLUMN]) + y_test = df[TARGET_COLUMN] + logger.info("Test set: %d rows", len(X_test)) + return X_test, y_test + + +# ----------------------------------------------------------------------------- +# Example 2: PyCaret classification pipeline +# ----------------------------------------------------------------------------- + + +def run_pycaret_classification( + df: pd.DataFrame, target_column: str +) -> pd.DataFrame: + """ + Run a basic PyCaret classification experiment. + + :param df: dataset containing features and target + :param target_column: name of the target column + + :return: comparison of top-performing models + """ + logger.info("Initializing PyCaret classification setup") + ... + + logger.info("Comparing models") + results = compare_models() + ... + + return results + # ----------------------------------------------------------------------------- # Sklearn pipeline builder # ----------------------------------------------------------------------------- @@ -288,6 +338,56 @@ def compare_models( return leaderboard +def run_pycaret_regression( + df: pd.DataFrame, + n_select: int = 3, + fold: int = 5, + target_column: str = TARGET_COLUMN, +) -> Pipeline: + """ + Run a PyCaret-style regression experiment: compare models and train the best. + + This is a convenience wrapper that: + 1. Runs cross-validation on all candidate models (compare_models) + 2. Selects the top performer by RMSE + 3. Trains the best model on the full dataset + 4. Stores the leaderboard for later retrieval via get_model_results() + + :param df: dataset containing features and target column + :param n_select: (unused, for PyCaret compatibility) number of top models + :param fold: number of cross-validation folds + :param target_column: name of the target column + :return: fitted sklearn Pipeline for the best model + """ + global _model_leaderboard, _best_model_pipeline + + # Run comparison and get leaderboard + leaderboard = compare_models(df, target_column=target_column, fold=fold) + _model_leaderboard = leaderboard + + # Train the best model (top row after sorting by RMSE) + best_model_name = leaderboard.iloc[0]["Model"] + logger.info("Training best model: %s", best_model_name) + best_pipeline = train_best_model(df, target_column=target_column, model_name=best_model_name) + _best_model_pipeline = best_pipeline + + return best_pipeline + + +def get_model_results() -> pd.DataFrame: + """ + Retrieve the leaderboard from the last run_pycaret_regression() call. + + :return: DataFrame with columns Model, RMSE, MAE, R2 sorted by RMSE + :raises RuntimeError: if run_pycaret_regression() has not been called yet + """ + if _model_leaderboard is None: + raise RuntimeError( + "No model results available. Call run_pycaret_regression() first." + ) + return _model_leaderboard + + def train_best_model( df: pd.DataFrame, target_column: str = TARGET_COLUMN, From 0def1077cb07f3c04376e92cf445a29f1ea1996a Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Wed, 6 May 2026 20:56:25 -0400 Subject: [PATCH 55/58] removing the helpers folder, not necessary --- .../helpers/README.md | 285 - .../helpers/__init__.py | 0 .../helpers/asana_utils.py | 1156 -- .../helpers/github_utils.py | 2110 --- .../helpers/hasyncio.py | 508 - .../helpers/haws.py | 266 - .../helpers/hcache.py | 1086 -- .../helpers/hcache_simple.py | 1963 --- .../helpers/hcfile.py | 135 - .../helpers/hchatgpt.py | 549 - .../helpers/hchatgpt_instructions.py | 32 - .../helpers/hcoverage.py | 183 - .../helpers/hcsv.py | 365 - .../helpers/hdataframe.py | 309 - .../helpers/hdatetime.py | 909 -- .../helpers/hdbg.py | 1134 -- .../helpers/hdict.py | 119 - .../helpers/hdocker.py | 871 -- .../helpers/hdocker_tests.py | 197 - .../helpers/hemail.py | 47 - .../helpers/henv.py | 541 - .../helpers/hfile_tree.py | 232 - .../helpers/hgit.py | 1869 --- .../helpers/hgoogle_drive_api.py | 1183 -- .../helpers/hintrospection.py | 284 - .../helpers/hio.py | 1046 -- .../helpers/hjoblib.py | 880 -- .../helpers/hjupyter.py | 383 - .../helpers/hlatex.py | 334 - .../helpers/hlint.py | 29 - .../helpers/hlist.py | 78 - .../helpers/hllm.py | 680 - .../helpers/hllm_cli.py | 840 - .../helpers/hllm_cost.py | 233 - .../helpers/hlogging.py | 809 - .../helpers/hlogging.pyi | 14 - .../helpers/hmarkdown.py | 18 - .../helpers/hmarkdown_bullets.py | 248 - .../helpers/hmarkdown_coloring.py | 286 - .../helpers/hmarkdown_comments.py | 66 - .../helpers/hmarkdown_div_blocks.py | 132 - .../helpers/hmarkdown_fenced_blocks.py | 131 - .../helpers/hmarkdown_filtering.py | 109 - .../helpers/hmarkdown_formatting.py | 530 - .../helpers/hmarkdown_headers.py | 841 - .../helpers/hmarkdown_rules.py | 367 - .../helpers/hmarkdown_slides.py | 201 - .../helpers/hmarkdown_tables.py | 121 - .../helpers/hmarkdown_toc.py | 164 - .../helpers/hmatplotlib.py | 106 - .../helpers/hmkdocs.py | 170 - .../helpers/hmodule.py | 121 - .../helpers/hmoto.py | 111 - .../helpers/hnetwork.py | 97 - .../helpers/hnotebook.py | 105 - .../helpers/hnumba.py | 43 - .../helpers/hnumpy.py | 57 - .../helpers/hobject.py | 500 - .../helpers/hopen.py | 106 - .../helpers/hpandas.py | 18 - .../helpers/hpandas.py.old | 2684 ---- .../helpers/hpandas_analysis.py | 628 - .../helpers/hpandas_check_summary.py | 111 - .../helpers/hpandas_clean.py | 282 - .../helpers/hpandas_compare.py | 289 - .../helpers/hpandas_conversion.py | 221 - .../helpers/hpandas_dassert.py | 371 - .../helpers/hpandas_display.py | 302 - .../helpers/hpandas_io.py | 128 - .../helpers/hpandas_multiindex.py | 183 - .../helpers/hpandas_stats.py | 527 - .../helpers/hpandas_transform.py | 1023 -- .../helpers/hpandas_utils.py | 649 - .../helpers/hparquet.py | 1309 -- .../helpers/hparser.py | 1176 -- .../helpers/hpickle.py | 253 - .../helpers/hplayback.py | 495 - .../helpers/hprint.py | 1076 -- .../helpers/hpytest.py | 266 - .../helpers/hretry.py | 94 - .../helpers/hs3.py | 1129 -- .../helpers/hsecrets.py | 233 - .../helpers/hserver.py | 1167 -- .../helpers/hsftp.py | 204 - .../helpers/hslack.py | 66 - .../helpers/hsql.py | 36 - .../helpers/hsql_implementation.py | 954 -- .../helpers/hsql_test.py | 273 - .../helpers/hstring.py | 176 - .../helpers/hsystem.py | 1097 -- .../helpers/htable.py | 180 - .../helpers/htest_logger.py | 48 - .../helpers/htext_protect.py | 262 - .../helpers/hthreading.py | 43 - .../helpers/htimer.py | 275 - .../helpers/htqdm.py | 48 - .../helpers/htraceback.py | 228 - .../helpers/htranslate.py | 109 - .../helpers/htypes.py | 11 - .../helpers/hunit_test.py | 1876 --- .../helpers/hunit_test_purification.py | 450 - .../helpers/hunit_test_utils.py | 658 - .../helpers/hversion.py | 300 - .../helpers/hwall_clock_time.py | 125 - .../helpers/hwarnings.py | 156 - .../helpers/lib_tasks.py | 37 - .../helpers/lib_tasks_aws.py | 407 - .../helpers/lib_tasks_bash.py | 104 - .../helpers/lib_tasks_docker.py | 1590 -- .../helpers/lib_tasks_docker_release.py | 1890 --- .../helpers/lib_tasks_find.py | 606 - .../helpers/lib_tasks_gh.py | 1252 -- .../helpers/lib_tasks_git.py | 1502 -- .../helpers/lib_tasks_integrate.py | 837 - .../helpers/lib_tasks_lint.py | 443 - .../helpers/lib_tasks_perms.py | 380 - .../helpers/lib_tasks_print.py | 103 - .../helpers/lib_tasks_pytest.py | 1743 --- .../helpers/lib_tasks_utils.py | 397 - .../helpers/logging_testing/__init__.py | 0 .../helpers/logging_testing/logging_main.py | 81 - .../helpers/logging_testing/logging_module.py | 10 - .../helpers/notebooks/conftest.py | 17 - .../helpers/notebooks/hcache.tutorial.ipynb | 638 - .../helpers/notebooks/hcache.tutorial.py | 274 - .../notebooks/hcache_simple.tutorial.ipynb | 858 - .../notebooks/hcache_simple.tutorial.py | 486 - .../hgoodle_drive_api.tutorial.ipynb | 424 - .../notebooks/hgoodle_drive_api.tutorial.py | 107 - .../helpers/notebooks/hllm.tutorial.ipynb | 13040 ---------------- .../helpers/notebooks/hllm.tutorial.py | 118 - .../notebooks/hplayback.tutorial.ipynb | 993 -- .../helpers/notebooks/hplayback.tutorial.py | 374 - .../helpers/notebooks/parquet.tutorial.ipynb | 1774 --- .../helpers/notebooks/parquet.tutorial.py | 304 - .../helpers/notebooks/s3.tutorial.ipynb | 210 - .../helpers/notebooks/s3.tutorial.py | 44 - .../helpers/notebooks/sage.tutorial.ipynb | 448 - .../helpers/notebooks/sage.tutorial.py | 98 - .../helpers/old/__init__.py | 0 .../helpers/old/conda.py | 192 - .../helpers/old/conftest.py | 17 - .../helpers/old/env2.py | 75 - .../helpers/old/tunnels.py | 267 - .../helpers/old/user_credentials.py | 208 - .../pandoc_docker_files/install-texlive.sh | 113 - .../helpers/pandoc_docker_files/packages.txt | 115 - .../pandoc_docker_files/texlive.profile | 32 - .../helpers/repo_config_utils.py | 411 - .../helpers/stage_linked_file.py | 83 - .../helpers/telegram_notify/__init__.py | 0 .../helpers/telegram_notify/config.py | 30 - .../helpers/telegram_notify/get_chat_id.py | 76 - .../telegram_notify/telegram_notify.py | 155 - .../helpers/test/__init__.py | 0 .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test_df.txt | 3 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../TestDataframeToJson.test1/output/test.txt | 31 - .../TestDataframeToJson.test2/output/test.txt | 13 - .../TestDataframeToJson.test3/output/test.txt | 13 - .../TestDataframeToJson.test4/output/test.txt | 13 - .../output/test.txt | 31 - .../output/test.txt | 13 - .../output/test.txt | 13 - .../output/test.txt | 13 - .../output/test.txt | 4 - .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 1 - .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 4 - .../output/test.txt | 3 - .../output/test.txt | 2 - .../output/test.txt | 2 - .../output/test.txt | 2 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 3 - .../output/test.txt | 1 - .../output/test.txt | 0 .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 0 .../output/test.txt | 0 .../output/test.txt | 2 - .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 0 .../output/test.txt | 3 - .../output/test.txt | 3 - .../output/test.txt | 2 - .../output/test.txt | 3 - .../output/test.txt | 2 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 0 .../input/test.txt | 101 - .../output/test.txt | 8 - .../TestGetDocstrings.test1/input/test.txt | 18 - .../output/test.txt | 52 - .../output/test.txt | 52 - .../output/test.txt | 30 - .../output/test.txt | 20 - .../output/test.txt | 19 - .../output/test.txt | 20 - .../output/test.txt | 20 - .../output/test.txt | 20 - .../output/test.txt | 23 - .../output/test.txt | 19 - .../output/test.txt | 21 - .../output/test.txt | 15 - .../output/test.txt | 17 - .../output/test.txt | 20 - .../output/test.txt | 20 - .../output/test.txt | 20 - .../output/test.txt | 22 - .../output/test.txt | 23 - .../output/test.txt | 19 - .../output/test.txt | 18 - .../output/test.txt | 19 - .../output/test.txt | 30 - .../output/test.txt | 1 - .../output/test.txt | 20 - .../output/test.txt | 30 - .../output/test.txt | 30 - .../output/test.txt | 65 - .../Test_CheckSummary.test1/output/test.txt | 4 - .../Test_CheckSummary.test2/output/test.txt | 4 - .../output/test.txt | 19 - .../output/test.txt | 5 - .../output/test.txt | 9 - .../output/test.txt | 9 - .../input/tmp.cache_simple._llm.json | 10 - .../Test_apply_nan_mode.test1/output/test.txt | 41 - .../Test_apply_nan_mode.test2/output/test.txt | 33 - .../Test_apply_nan_mode.test3/output/test.txt | 41 - .../Test_apply_nan_mode.test4/output/test.txt | 38 - .../Test_apply_nan_mode.test5/output/test.txt | 41 - .../output/test.txt | 3 - .../output/test.txt | 1 - .../output/test_df.txt | 3 - .../input/test.csv | 5 - .../Test_dassert1.test2/output/test.txt | 5 - .../Test_dassert1.test3/output/test.txt | 6 - .../Test_dassert1.test4/output/test.txt | 6 - .../Test_dassert1.test5/output/test.txt | 8 - .../Test_dassert1.test6/output/test.txt | 8 - .../Test_dassert1.test7/output/test.txt | 1 - .../Test_dassert_eq1.test3/output/test.txt | 8 - .../Test_dassert_eq1.test4/output/test.txt | 8 - .../Test_dassert_eq1.test5/output/test.txt | 10 - .../output/test.txt | 1 - .../output/test.txt | 5 - .../output/test.txt | 5 - .../output/test.txt | 8 - .../output/test.txt | 5 - .../output/test.txt | 5 - .../output/test.txt | 5 - .../output/test.txt | 5 - .../output/test.txt | 9 - .../output/test.txt | 9 - .../output/test.txt | 8 - .../output/test.txt | 1 - .../output/test.txt | 28 - .../output/test.txt | 28 - .../output/test.txt | 26 - .../output/test.txt | 27 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../output/test.txt | 1 - .../Test_from_typed_csv.test1/input/test.csv | 2 - .../input/test.csv.types | 1 - .../output/test.txt | 58 - .../output/test.txt | 58 - .../output/test.txt | 60 - .../output/test.txt | 57 - .../output/test.txt | 56 - .../output/test.txt | 63 - .../output/test.txt | 63 - .../output/test.txt | 63 - .../output/test.txt | 63 - .../input/result_0/config.pkl | Bin 405 -> 0 bytes .../input/result_0/config.txt | 7 - .../input/result_0/run_notebook.0.log | 0 .../input/result_1/config.pkl | Bin 405 -> 0 bytes .../input/result_1/config.txt | 7 - .../input/result_1/run_notebook.1.log | 0 .../input/result_0/config.txt | 7 - .../input/result_0/run_notebook.0.log | 0 .../input/result_1/config.txt | 7 - .../input/result_1/run_notebook.1.log | 0 .../output/test.txt | 45 - .../input/test.json | 17 - .../Test_obj_to_str1.test1/output/test.txt | 11 - .../Test_obj_to_str1.test2/output/test.txt | 11 - .../Test_obj_to_str1.test3/output/test.txt | 11 - .../Test_obj_to_str1.test4/output/test.txt | 12 - .../Test_obj_to_str1.test5/output/test.txt | 12 - .../Test_obj_to_str1.test6/output/test.txt | 12 - .../Test_obj_to_str2.test1/output/test.txt | 11 - .../Test_obj_to_str2.test2/output/test.txt | 11 - .../Test_obj_to_str2.test3/output/test.txt | 11 - .../Test_obj_to_str2.test4/output/test.txt | 11 - .../Test_obj_to_str2.test5/output/test.txt | 11 - .../Test_obj_to_str2.test6/output/test.txt | 11 - .../output/test.txt | 1 - .../Test_open_html.test_mac1/output/test.txt | 1 - .../output/test.txt | 1 - .../Test_open_pdf.test_mac1/output/test.txt | 1 - .../input/test.txt | 16 - .../output/test.txt | 20 - .../Test_process_lines1.test1/input/test.txt | 16 - .../Test_process_lines1.test1/output/test.txt | 20 - .../output/test.txt | 2 - .../input/cache/lastfailed | 12 - .../output/test.txt | 15 - .../input/log.txt | 325 - .../output/test.txt | 10 - .../input/log.txt | 10 - .../output/test.txt | 8 - .../input/log.txt | 61 - .../output/test.txt | 61 - .../input/log.txt | 36 - .../output/test.txt | 36 - .../input/log.txt | 2533 --- .../output/test.txt | 41 - .../input/log.txt | 396 - .../output/test.txt | 399 - .../input/test.txt | 7 - .../input/test.txt | 16 - .../output/test.txt | 16 - .../input/test.txt | 9 - .../output/test.txt | 7 - .../output/test.txt | 2 - .../output/test.txt | 3 - .../output/test.txt | 71 - .../output/test.txt | 40 - .../output/test.txt | 40 - .../output/test.txt | 4 - .../output/test.txt | 1 - .../Test_system1.test7/output/test.txt | 16 - .../Test_to_typed_csv.test1/input/test.csv | 2 - .../helpers/test/test_create_link.py | 136 - .../helpers/test/test_hasyncio.py | 96 - .../helpers/test/test_haws.py | 276 - .../helpers/test/test_hcache.py | 1002 -- .../helpers/test/test_hcache_simple.py | 2606 --- .../helpers/test/test_hcfile.py | 335 - .../helpers/test/test_hcsv.py | 81 - .../helpers/test/test_hdataframe.py | 299 - .../helpers/test/test_hdatetime.py | 932 -- .../helpers/test/test_hdbg.py | 934 -- .../helpers/test/test_hdict.py | 107 - .../helpers/test/test_hdocker.py | 624 - .../helpers/test/test_hdocker_tests.py | 158 - .../helpers/test/test_henv.py | 17 - .../helpers/test/test_hfile_tree.py | 347 - .../helpers/test/test_hgit.py | 822 - .../helpers/test/test_hintrospection.py | 406 - .../helpers/test/test_hio.py | 225 - .../helpers/test/test_hlatex.py | 665 - .../helpers/test/test_hlist.py | 176 - .../helpers/test/test_hllm.py | 361 - .../helpers/test/test_hllm_cli.py | 1403 -- .../helpers/test/test_hlogging.py | 103 - .../helpers/test/test_hmarkdown_bullets.py | 716 - .../helpers/test/test_hmarkdown_coloring.py | 205 - .../helpers/test/test_hmarkdown_div_blocks.py | 355 - .../test/test_hmarkdown_fenced_blocks.py | 218 - .../helpers/test/test_hmarkdown_filtering.py | 449 - .../helpers/test/test_hmarkdown_formatting.py | 1403 -- .../helpers/test/test_hmarkdown_headers.py | 2002 --- .../helpers/test/test_hmarkdown_rules.py | 377 - .../helpers/test/test_hmarkdown_slides.py | 399 - .../helpers/test/test_hmarkdown_tables.py | 196 - .../helpers/test/test_hmarkdown_toc.py | 228 - .../helpers/test/test_hmkdocs.py | 394 - .../helpers/test/test_hmodule.py | 25 - .../helpers/test/test_hnumpy.py | 215 - .../helpers/test/test_hobject.py | 392 - .../helpers/test/test_hopen.py | 92 - .../helpers/test/test_hpandas_analysis.py | 42 - .../test/test_hpandas_check_summary.py | 67 - .../helpers/test/test_hpandas_clean.py | 364 - .../helpers/test/test_hpandas_compare.py | 650 - .../helpers/test/test_hpandas_conversion.py | 276 - .../helpers/test/test_hpandas_dassert.py | 448 - .../helpers/test/test_hpandas_display.py | 685 - .../helpers/test/test_hpandas_io.py | 43 - .../helpers/test/test_hpandas_multiindex.py | 680 - .../helpers/test/test_hpandas_stats.py | 426 - .../helpers/test/test_hpandas_transform.py | 1888 --- .../helpers/test/test_hpandas_utils.py | 251 - .../helpers/test/test_hparquet.py | 1468 -- .../helpers/test/test_hparser.py | 398 - .../helpers/test/test_hpickle.py | 97 - .../helpers/test/test_hplayback.py | 506 - .../helpers/test/test_hprint.py | 844 - .../helpers/test/test_hpytest.py | 228 - .../helpers/test/test_hretry.py | 154 - .../helpers/test/test_hs3.py | 597 - .../helpers/test/test_hsecrets.py | 209 - .../helpers/test/test_hserver.py | 321 - .../helpers/test/test_hslack.py | 81 - .../helpers/test/test_hsql.py | 29 - .../helpers/test/test_hstring.py | 270 - .../helpers/test/test_hsystem.py | 494 - .../helpers/test/test_htable.py | 159 - .../helpers/test/test_htext_protect.py | 578 - .../helpers/test/test_htimer.py | 24 - .../helpers/test/test_htraceback.py | 474 - .../helpers/test/test_hunit_test.py | 954 -- .../helpers/test/test_hunit_test_mock.py | 288 - .../test/test_hunit_test_purification.py | 1065 -- .../helpers/test/test_hunit_test_utils.py | 553 - .../helpers/test/test_hversion.py | 74 - .../helpers/test/test_joblib_helpers.py | 569 - .../helpers/test/test_lib_tasks.py | 540 - .../helpers/test/test_lib_tasks_docker.py | 494 - .../test/test_lib_tasks_docker_release.py | 1530 -- .../helpers/test/test_lib_tasks_find.py | 267 - .../helpers/test/test_lib_tasks_gh.py | 133 - .../helpers/test/test_lib_tasks_git.py | 267 - .../helpers/test/test_lib_tasks_integrate.py | 27 - .../helpers/test/test_lib_tasks_lint.py | 32 - .../helpers/test/test_lib_tasks_pytest.py | 1163 -- .../helpers/test/test_lib_tasks_utils.py | 301 - .../test_master_buildmeister_dashboard.py | 74 - .../helpers/test/test_repo_config_amp.py | 284 - .../helpers/test/test_repo_config_utils.py | 65 - .../results/price_by_neighborhood.png | Bin 27162 -> 27259 bytes helpers_root | 2 +- 455 files changed, 1 insertion(+), 129611 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_aws.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py delete mode 100755 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/run_notebook.0.log delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.pkl delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/run_notebook.1.log delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_0/config.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_0/run_notebook.0.log delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_1/config.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/input/result_1/run_notebook.1.log delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_load_df_from_json.test1/input/test.json delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md deleted file mode 100644 index 8578eccd3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/README.md +++ /dev/null @@ -1,285 +0,0 @@ -# Summary - -The `helpers/` directory is the core Python library providing utilities, -development tools, and infrastructure components for the helpers ecosystem. -Modules follow the `h` naming convention and are organized by domain. - -# Directory Structure - -- `logging_testing/` - - Utilities for testing logging behavior across modules -- `notebooks/` - - Jupyter notebooks and tutorials (e.g., hcache_simple usage) -- `old/` - - Deprecated and archived modules (conda, tunnels, user_credentials) -- `pandoc_docker_files/` - - Docker setup files and package lists for pandoc and texlive -- `telegram_notify/` - - Telegram bot notification module with config and chat ID utilities -- `test/` - - Unit tests for all modules (90+ test files organized by module name) - -# Files - -## Core Infrastructure - -- `hdbg.py` - - Debugging utilities with specialized assertions, logging, and fatal error handling -- `hio.py` - - Filesystem operations, file read/write, and directory management utilities -- `hsystem.py` - - System interaction: shell commands, environment variables, process management -- `henv.py` - - Environment variable checks and module installation management -- `hserver.py` - - Identify which server the code is running on -- `hversion.py` - - Code version control and Docker container compatibility checking -- `hlogging.py` - - Logging configuration, custom formatters, and logging utilities -- `hwarnings.py` - - Suppress annoying Python warnings when imported -- `htraceback.py` - - Traceback parsing, formatting, and manipulation utilities -- `hprint.py` - - Debugging and pretty-printing utilities for Python objects -- `hparser.py` - - Argparse helpers: verbosity, action, limit-range, and other standard arguments -- `hobject.py` - - Introspect and print the state of a Python object -- `hintrospection.py` - - Python introspection and module analysis utilities -- `hmodule.py` - - Dynamic module installation and import management utilities -- `htimer.py` - - Timer class for measuring and reporting elapsed time -- `htqdm.py` - - tqdm progress bar stream redirected to Python logger -- `hthreading.py` - - Timeout decorator to enforce execution time limits on functions -- `hretry.py` - - Retry decorators for synchronous and asynchronous functions -- `hasyncio.py` - - Async/await utilities and coroutine management for asyncio -- `hnetwork.py` - - Network utilities including URL availability checking -- `hopen.py` - - Cross-platform file opening utility -- `htypes.py` - - General type aliases and type utilities based on standard Python libraries -- `hwall_clock_time.py` - - Wall clock time simulation and management for testing and replays - -## Data Processing - -- `hpandas.py` - - Pandas utilities aggregating all hpandas_* submodules -- `hpandas_analysis.py` - - Statistical analysis and ML-related functions for pandas DataFrames -- `hpandas_check_summary.py` - - DataFrame check and summary reporting utilities -- `hpandas_clean.py` - - DataFrame cleaning operations (deduplicate, fill NaN, sanitize) -- `hpandas_compare.py` - - DataFrame comparison utilities for diffing and equality checks -- `hpandas_conversion.py` - - DataFrame and Series conversion and casting utilities -- `hpandas_dassert.py` - - Pandas-specific assertions and validation functions -- `hpandas_display.py` - - DataFrame display formatting and signature generation -- `hpandas_io.py` - - Pandas I/O operations for local and S3 storage -- `hpandas_multiindex.py` - - MultiIndex creation, manipulation, and access operations -- `hpandas_stats.py` - - Pandas statistics, duration computation, and time-series helpers -- `hpandas_transform.py` - - DataFrame transformation operations (pivot, reshape, normalize) -- `hpandas_utils.py` - - General-purpose pandas utilities and helper functions -- `hdataframe.py` - - Lower-level helper functions for processing pandas DataFrames -- `hnumpy.py` - - NumPy utilities, array helpers, and random seed management -- `hnumba.py` - - Numba JIT compilation wrapper and acceleration utilities -- `hparquet.py` - - Parquet file read/write operations using pyarrow -- `hcsv.py` - - CSV file operations and DataFrame I/O utilities -- `hdatetime.py` - - Date/time manipulation, parsing, and timezone handling utilities -- `hdict.py` - - Dictionary manipulation and nested dictionary operation utilities -- `hlist.py` - - List manipulation, deduplication, and membership utilities -- `hstring.py` - - String manipulation, formatting, and transformation utilities -- `htable.py` - - Lightweight rectangular table class with no pandas dependency - -## Caching and Performance - -- `hcache.py` - - Advanced function caching using joblib with S3 and git integration -- `hcache_simple.py` - - Simple caching with JSON or pickle file-based storage backends -- `hjoblib.py` - - Joblib parallelization, memory caching, and job management -- `hpickle.py` - - Pickle and JSON serialization and deserialization routines - -## Testing Framework - -- `hunit_test.py` - - Enhanced unit testing framework built on unittest and pytest with golden files -- `hunit_test_purification.py` - - Text purification utilities to sanitize test output for comparison -- `hunit_test_utils.py` - - Unit test utilities including test renaming and helpers -- `hpytest.py` - - Pytest integration utilities and test artifact handling -- `hcoverage.py` - - Code coverage utilities and test coverage analysis helpers -- `hplayback.py` - - Automatically generate unit tests by recording and replaying function calls -- `htest_logger.py` - - Test logging script template -- `hmoto.py` - - AWS service mocking with moto for unit testing - -## Markdown Processing - -- `hmarkdown.py` - - Markdown processing entry point aggregating all hmarkdown_* submodules -- `hmarkdown_bullets.py` - - Markdown bullet point processing and formatting -- `hmarkdown_coloring.py` - - Markdown text coloring utilities for LaTeX and HTML output -- `hmarkdown_comments.py` - - Markdown comment detection, extraction, and removal utilities -- `hmarkdown_div_blocks.py` - - Utilities for handling HTML div blocks within markdown files -- `hmarkdown_fenced_blocks.py` - - Fenced code block parsing and manipulation in markdown -- `hmarkdown_filtering.py` - - Markdown section extraction and content filtering utilities -- `hmarkdown_formatting.py` - - Markdown text formatting and whitespace normalization utilities -- `hmarkdown_headers.py` - - Markdown header manipulation, extraction, and level adjustment -- `hmarkdown_rules.py` - - Markdown rule validation and processing utilities -- `hmarkdown_slides.py` - - Markdown slide extraction, splitting, and processing for presentations -- `hmarkdown_tables.py` - - Markdown table parsing, formatting, and manipulation utilities -- `hmarkdown_toc.py` - - Markdown table of contents generation and YAML frontmatter handling -- `hlint.py` - - Linting utilities for text and code files -- `htext_protect.py` - - Utilities for protecting content regions during text processing - -## External Services and Cloud - -- `haws.py` - - AWS services integration with boto3 client and resource management -- `hs3.py` - - S3 file operations, listing, and S3-backed filesystem utilities -- `hsecrets.py` - - AWS Secrets Manager integration for secret retrieval -- `htranslate.py` - - AWS Translate service wrapper for text translation -- `hgit.py` - - Git repository operations, branch management, and diff utilities -- `hdocker.py` - - Docker container operations, image management, and Docker utilities -- `hdocker_tests.py` - - Utilities for running tests inside Docker containers -- `hdockerized_executables.py` - - Wrappers for Dockerized executables: prettier, pandoc, latex, and others -- `hgoogle_drive_api.py` - - Google Drive and Google Sheets API integration utilities -- `hchatgpt.py` - - OpenAI API integration with file management and chat utilities -- `hchatgpt_instructions.py` - - ChatGPT system instructions and prompt templates -- `hllm.py` - - LLM API integration with caching, cost tracking, and response handling -- `hllm_cli.py` - - LLM CLI interaction wrapper and cost estimation utilities -- `hllm_cost.py` - - LLM cost calculation for OpenRouter and other APIs -- `hslack.py` - - Slack notification utilities for sending messages to channels -- `hemail.py` - - Email sending utilities via SMTP -- `hsftp.py` - - SFTP file transfer operations using pysftp -- `hsql.py` - - SQL database operations as a PostgreSQL wrapper -- `hsql_implementation.py` - - Low-level SQL implementation with psycopg2 driver -- `hsql_test.py` - - SQL testing utilities, fixtures, and database test helpers -- `asana_utils.py` - - Enhanced Asana analytics with time estimation and team grouping -- `github_utils.py` - - GitHub API utilities for caching and repository data retrieval - -## Notebooks and Visualization - -- `hnotebook.py` - - Jupyter notebook configuration and display setup utilities -- `hjupyter.py` - - Jupyter notebook execution and output capture utilities -- `hmatplotlib.py` - - Matplotlib utilities, figure management, and plotting helpers -- `hmkdocs.py` - - MkDocs-specific markdown generation and documentation utilities -- `hlatex.py` - - LaTeX conversion utilities using pandoc - -## Miscellaneous - -- `hfile_tree.py` - - Directory tree building and formatted output utilities -- `hcfile.py` - - C file parsing and transformation utilities -- `repo_config_utils.py` - - Repository configuration utilities loaded from YAML -- `stage_linked_file.py` - - Symbolic link staging utility for git operations - -## Task System (`lib_tasks_*.py`) - -- `lib_tasks.py` - - Entry point that aggregates all invoke task modules -- `lib_tasks_aws.py` - - Invoke tasks for AWS operations and deployments -- `lib_tasks_bash.py` - - Invoke tasks for bash script execution -- `lib_tasks_docker.py` - - Invoke tasks for Docker build, run, and management operations -- `lib_tasks_docker_release.py` - - Invoke tasks for Docker image release and publishing workflows -- `lib_tasks_find.py` - - Invoke tasks for searching and finding files in the repo -- `lib_tasks_gh.py` - - Invoke tasks for GitHub pull requests and issues -- `lib_tasks_git.py` - - Invoke tasks for git branch, merge, and commit operations -- `lib_tasks_integrate.py` - - Invoke tasks for integrating changes between repositories -- `lib_tasks_lint.py` - - Invoke tasks for linting and code quality checks -- `lib_tasks_perms.py` - - Invoke tasks for managing file permissions -- `lib_tasks_print.py` - - Invoke tasks for printing setup and environment info -- `lib_tasks_pytest.py` - - Invoke tasks for running pytest suites (fast, slow, superslow) -- `lib_tasks_utils.py` - - Shared utilities and helpers used across task modules diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py deleted file mode 100644 index 0aa7f7f4b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/asana_utils.py +++ /dev/null @@ -1,1156 +0,0 @@ -""" -Enhanced Asana Analytics with Time Estimation and Team Grouping. - -Import as: - -import helpers.asana_utils as hasautil -""" - -import datetime as datetime_lib -import json -import logging -import os -from typing import Any, Dict, List, Optional - -import asana -import asana.rest as arest -import dateutil.parser as dateutil_parser -import pandas as pd - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# EnhancedAsanaAnalytics -# ############################################################################# - - -class EnhancedAsanaAnalytics: - def __init__(self, access_token: Optional[str] = None) -> None: - # Get token from parameter or environment variable. - token = access_token or os.getenv("ASANA_ACCESS_TOKEN") - if not token: - raise ValueError( - "Asana access token must be provided or set in ASANA_ACCESS_TOKEN" - ) - # Initialize Asana API client with access token. - configuration = asana.Configuration() - configuration.access_token = token - self.api_client = asana.ApiClient(configuration) - # Initialize API endpoints. - self.workspaces_api = asana.WorkspacesApi(self.api_client) - self.users_api = asana.UsersApi(self.api_client) - self.tasks_api = asana.TasksApi(self.api_client) - self.stories_api = asana.StoriesApi(self.api_client) - self.projects_api = asana.ProjectsApi(self.api_client) - self.custom_fields_api = asana.CustomFieldsApi(self.api_client) - - def get_workspace_gid(self, workspace_name: Optional[str] = None) -> str: - """ - Get the workspace GID by name or return the first available workspace. - - Retrieve the GID (Global ID) for an Asana workspace. If no - workspace name is provided, return the GID of the first - workspace available to the user. - - :param workspace_name: name of the workspace to find. - :return: workspace GID as a string - """ - _LOG.info( - "Fetching workspace GID for workspace: %s", - workspace_name or "first available", - ) - # Fetch all available workspaces. - opts: Dict[str, Any] = {} - workspaces = self.workspaces_api.get_workspaces(opts) - # Convert to list if needed. - workspace_list = list(workspaces) if workspaces else [] - _LOG.info("Found %s workspaces", len(workspace_list)) - # Check if any workspaces exist. - if not workspace_list: - raise ValueError("No workspaces found") - result = None - # Search for specific workspace by name if provided. - if workspace_name: - for ws in workspace_list: - if ws["name"].lower() == workspace_name.lower(): - _LOG.info( - "Found workspace '%s' with GID: %s", - workspace_name, - ws["gid"], - ) - result = str(ws["gid"]) - break - if result is None: - raise ValueError(f"Workspace '{workspace_name}' not found") - else: - # Return first workspace if no name specified. - _LOG.info( - "Using first workspace: %s (GID: %s)", - workspace_list[0]["name"], - workspace_list[0]["gid"], - ) - result = str(workspace_list[0]["gid"]) - return result - - def get_team_members(self, workspace_gid: str) -> List[Dict[str, Any]]: - """ - Get all team members in a workspace. - - :param workspace_gid: workspace GID to query for users - :return: user information with keys 'gid','name', and 'email' - """ - _LOG.info("Fetching team members for workspace: %s", workspace_gid) - # Fetch all users in the workspace. - opts: Dict[str, Any] = {} - users = self.users_api.get_users_for_workspace(workspace_gid, opts) - # Convert to list if needed. - users_list = list(users) if users else [] - _LOG.info("Found %s team members", len(users_list)) - # Extract relevant user information. - result = [ - {"gid": u["gid"], "name": u["name"], "email": u.get("email", "N/A")} - for u in users_list - ] - # Log member names. - member_names = [r["name"] for r in result] - _LOG.debug("Team members: %s", ", ".join(member_names)) - return result - - def get_user_by_name( - self, workspace_gid: str, username: str - ) -> Optional[Dict[str, Any]]: - """ - Get a specific user by their name in a workspace. - - Search for a user by their display name (case-insensitive - partial match). - - :param workspace_gid: workspace GID to search in - :param username: username or partial name to search for - :return: user with 'gid', 'name', and 'email' - """ - _LOG.info("Searching for user: %s", username) - team_members = self.get_team_members(workspace_gid) - res = None - # Search for exact match first. - for team_member in team_members: - if team_member["name"].lower() == username.lower(): - _LOG.info("Found exact match: %s", team_member["name"]) - res = team_member - # Search for partial match. - for team_member in team_members: - if username.lower() in team_member["name"].lower(): - _LOG.info("Found partial match: %s", team_member["name"]) - res = team_member - if res is None: - _LOG.warning("User '%s' not found in workspace", username) - return res - - def get_user_tasks_detailed( - self, - workspace_gid: str, - user_identifier: str, - *, - start_date: Optional[datetime_lib.datetime] = None, - end_date: Optional[datetime_lib.datetime] = None, - ) -> List[Dict[str, Any]]: - """ - Get detailed task information including estimated time. - - Fetch all tasks for a user with extended fields including custom - fields for time estimates, projects, tags, sections, and dates. - - :param workspace_gid: workspace GID to query - :param user_identifier: user GID or username to retrieve tasks - for - :param start_date: start date for filtering tasks by creation - date. - :param end_date: end date for filtering tasks by creation date. - :return: data with name, completion status, timestamps, custom - fields, and project associations - """ - # Resolve username to GID if needed. - if not user_identifier.isdigit(): - _LOG.info("Resolving username '%s' to GID", user_identifier) - user = self.get_user_by_name(workspace_gid, user_identifier) - if not user: - _LOG.error("User '%s' not found", user_identifier) - return [] - user_gid = user["gid"] - _LOG.debug("Resolved '%s' to GID: %s", user_identifier, user_gid) - else: - user_gid = user_identifier - _LOG.info("Fetching detailed tasks for user GID: %s", user_gid) - try: - # Define query parameters for task retrieval with extended fields. - opts = { - "assignee": user_gid, - "workspace": workspace_gid, - "opt_fields": ( - "name,completed,completed_at,created_at,modified_at," - "projects.name,projects.gid,num_subtasks,memberships.section.name," - "custom_fields,custom_fields.name,custom_fields.display_value," - "custom_fields.number_value,due_on,due_at,start_on," - "assignee.name,tags.name" - ), - } - # Fetch all tasks for the user. - _LOG.debug("Querying Asana API for detailed tasks...") - tasks = self.tasks_api.get_tasks(opts) - # Convert to list if generator. - tasks_list = list(tasks) if tasks else [] - _LOG.info( - "Retrieved %d tasks from API for user GID: %s", - len(tasks_list), - user_gid, - ) - # Make start_date and end_date timezone-aware if they aren't already. - if start_date and start_date.tzinfo is None: - start_date = start_date.replace(tzinfo=datetime_lib.timezone.utc) - if end_date and end_date.tzinfo is None: - end_date = end_date.replace(tzinfo=datetime_lib.timezone.utc) - # Filter tasks by date range if specified. - filtered_tasks = [] - for task in tasks_list: - # Parse creation date. - created_at = ( - dateutil_parser.parse(task["created_at"]) - if task.get("created_at") - else None - ) - # Apply start date filter. - if start_date and created_at and created_at < start_date: - continue - # Apply end date filter. - if end_date and created_at and created_at > end_date: - continue - # Add task to filtered results. - filtered_tasks.append(task) - _LOG.info( - "Filtered to %d tasks within date range for user GID: %s", - len(filtered_tasks), - user_gid, - ) - return filtered_tasks - except arest.ApiException as e: - _LOG.error("API error fetching detailed tasks: %s", e) - raise - except Exception as e: - _LOG.error("Unexpected error fetching detailed tasks: %s", e) - return [] - - def extract_time_estimate(self, task: Dict[str, Any]) -> Optional[float]: - """ - Extract time estimate from custom fields. - - Search through task custom fields for time estimation values. - Looks for common field names like 'estimated time', 'estimate', - 'hours', etc. - - :param task: tasks data containing custom_fields - :return: estimated hours as float, or None if not found - """ - result = None - if not task.get("custom_fields"): - _LOG.debug( - "No custom fields found for task: %s", task.get("gid", "unknown") - ) - return result - # Common field names for time estimates. - time_field_names = [ - "estimated time", - "estimate", - "time estimate", - "hours", - "estimated hours", - "effort", - ] - for field in task["custom_fields"]: - field_name = field.get("name", "").lower() - # Check if field name matches any time estimation pattern. - if any(time_name in field_name for time_name in time_field_names): - # Try number_value first, then display_value. - if field.get("number_value") is not None: - result = float(field["number_value"]) / 60.0 - _LOG.debug( - "Found time estimate %s hours in field '%s' for task: %s", - result, - field.get("name"), - task.get("gid", "unknown"), - ) - break - elif field.get("display_value"): - try: - result = float(field["display_value"]) / 60.0 - _LOG.debug( - "Found time estimate %s hours in field '%s' for task: %s", - result, - field.get("name"), - task.get("gid", "unknown"), - ) - break - except (ValueError, TypeError): - _LOG.warning( - "Could not parse display_value '%s' as float for task: %s", - field.get("display_value"), - task.get("gid", "unknown"), - ) - return result - - def get_task_stories(self, task_gid: str) -> List[Dict[str, Any]]: - """ - Get all stories (comments and activity) for a task. - - Fetch all stories including comments, task updates, and system - activities for a specific task. - - :param task_gid: task GID to fetch stories for - :return: data of type, text, created_at, and creator information - """ - _LOG.info("Fetching stories for task: %s", task_gid) - try: - opts = { - "opt_fields": ( - "type,text,created_at,created_by.name,created_by.email," - "resource_subtype,is_edited" - ) - } - stories = self.stories_api.get_stories_for_task(task_gid, opts) - stories_list = list(stories) if stories else [] - _LOG.debug( - "Found %d stories for task %s", len(stories_list), task_gid - ) - return stories_list - except arest.ApiException as e: - _LOG.error("API error fetching stories for task %s: %s", task_gid, e) - return [] - except Exception as e: - _LOG.error( - "Unexpected error fetching stories for task %s: %s", task_gid, e - ) - return [] - - def extract_comment_metrics(self, task_gid: str) -> Dict[str, Any]: - """ - Extract comment and activity metrics for a task. - - Analyze all stories for a task to extract metrics including: - - Total comment count - - Unique commenters - - Activity count (system updates) - - Last activity timestamp - - Comment frequency - - :param task_gid: task GID to analyze - :return: comment metrics - """ - stories = self.get_task_stories(task_gid) - # Initialize counters. - num_comments = 0 - num_activities = 0 - unique_commenters = set() - last_activity_at = None - for story in stories: - # Parse created timestamp. - created_at = ( - dateutil_parser.parse(story["created_at"]) - if story.get("created_at") - else None - ) - # Track last activity. - if created_at: - if last_activity_at is None or created_at > last_activity_at: - last_activity_at = created_at - # Categorize story type. - story_type = story.get("type", "") - if story_type == "comment": - num_comments += 1 - # Track unique commenters. - if story.get("created_by") and story["created_by"].get("name"): - unique_commenters.add(story["created_by"]["name"]) - else: - # System activities (status changes, assignments, etc). - num_activities += 1 - result = { - "num_comments": num_comments, - "num_activities": num_activities, - "total_stories": len(stories), - "unique_commenters": len(unique_commenters), - "unique_commenter_names": list(unique_commenters), - "last_activity_at": last_activity_at, - } - _LOG.debug( - "Task %s metrics: %d comments, %d activities, %d unique commenters", - task_gid, - num_comments, - num_activities, - len(unique_commenters), - ) - return result - - def calculate_activity_rate( - self, - created_at: datetime_lib.datetime, - last_activity_at: Optional[datetime_lib.datetime], - num_comments: int, - num_activities: int, - ) -> Dict[str, float]: - """ - Calculate activity rate metrics for a task. - - Compute various activity rate metrics based on task timeline and - activity counts. - - :param created_at: task creation timestamp - :param last_activity_at: timestamp of last activity/comment - :param num_comments: total number of comments - :param num_activities: total number of system activities - :return: activity rate metric - """ - now = datetime_lib.datetime.now(datetime_lib.timezone.utc) - - # Calculate task age in days. - task_age_days = (now - created_at).total_seconds() / 86400 - - # Calculate days since last activity. - days_since_activity = None - if last_activity_at: - days_since_activity = ( - now - last_activity_at - ).total_seconds() / 86400 - - # Calculate activity rates (avoid division by zero). - if task_age_days > 0: - comments_per_day = num_comments / task_age_days - activities_per_day = num_activities / task_age_days - total_activity_per_day = ( - num_comments + num_activities - ) / task_age_days - else: - comments_per_day = 0.0 - activities_per_day = 0.0 - total_activity_per_day = 0.0 - - result = { - "task_age_days": task_age_days, - "comments_per_day": comments_per_day, - "activities_per_day": activities_per_day, - "total_activity_per_day": total_activity_per_day, - "days_since_activity": days_since_activity, - } - - return result - - def get_user_tasks_with_activity( - self, - workspace_gid: str, - user_identifier: str, - *, - start_date: Optional[datetime_lib.datetime] = None, - end_date: Optional[datetime_lib.datetime] = None, - include_comments: bool = True, - ) -> List[Dict[str, Any]]: - """ - Get detailed task information including comments and activity metrics. - - Extended version of get_user_tasks_detailed that also fetches - comment and activity data for each task. - - :param workspace_gid: workspace GID to query - :param user_identifier: user GID or username to retrieve tasks - for - :param start_date: start date for filtering tasks by creation - date - :param end_date: end date for filtering tasks by creation date - :param include_comments: if True, fetch comment/activity data - for each task (default: True). Set to False for faster - execution - :return: task data with comment and activity metrics included - """ - # Get detailed tasks first. - tasks = self.get_user_tasks_detailed( - workspace_gid, - user_identifier, - start_date=start_date, - end_date=end_date, - ) - - if not include_comments: - return tasks - - _LOG.info("Fetching comment/activity data for %d tasks", len(tasks)) - - # Enhance each task with comment metrics. - for i, task in enumerate(tasks): - if (i + 1) % 10 == 0: - _LOG.info( - "Processing task %d/%d for comments...", i + 1, len(tasks) - ) - - # Get comment metrics. - comment_metrics = self.extract_comment_metrics(task["gid"]) - - # Add metrics to task. - task["num_comments"] = comment_metrics["num_comments"] - task["num_activities"] = comment_metrics["num_activities"] - task["total_stories"] = comment_metrics["total_stories"] - task["unique_commenters"] = comment_metrics["unique_commenters"] - task["unique_commenter_names"] = comment_metrics[ - "unique_commenter_names" - ] - task["last_activity_at"] = comment_metrics["last_activity_at"] - - # Calculate activity rates if we have created_at. - if task.get("created_at"): - created_at = dateutil_parser.parse(task["created_at"]) - activity_rates = self.calculate_activity_rate( - created_at, - comment_metrics["last_activity_at"], - comment_metrics["num_comments"], - comment_metrics["num_activities"], - ) - task.update(activity_rates) - - _LOG.info("Comment/activity data added to all tasks") - return tasks - - def create_task_dataframe( - self, - workspace_gid: str, - user_identifiers: Optional[List[str]] = None, - *, - project_names: Optional[List[str]] = None, - start_date: Optional[datetime_lib.datetime] = None, - end_date: Optional[datetime_lib.datetime] = None, - team_mapping: Optional[Dict[str, str]] = None, - include_comments: bool = False, - ) -> pd.DataFrame: - """ - Create comprehensive task DataFrame for all users. - - Build a detailed DataFrame containing all task information for - specified users, with optional filtering by project and date - range. Includes time estimates, sprint information, and team - assignments. - - :param workspace_gid: workspace GID to query - :param user_identifiers: usernames or GIDs to analyze. - :param project_names: project names to filter by and use - as team names (e.g., ["tech-now", "tech-next"]). If - provided, team will be determined from project name - :param start_date: start date for filtering tasks by creation - date - :param end_date: end date for filtering tasks by creation date - :param team_mapping: username to team name. Only - used if project_names is not provided - - Example: {"John Doe": "tech-now", "Jane Smith": "tech-next"} - :param include_comments: if True, fetch comment/activity data - (default: False). Set to True to include activity metrics - :return: data with columns including user info, task - details, dates, completion status, time estimates, project, - sprint, section, tags, and subtasks - """ - _LOG.info("Creating comprehensive task DataFrame") - # Get users to analyze. - team_members = [] - if user_identifiers: - for user_id in user_identifiers: - if user_id.isdigit(): - # If GID, fetch user info. - opts = {"opt_fields": "name,email"} - user_info = self.users_api.get_user(user_id, opts) - team_members.append( - { - "gid": user_id, - "name": user_info["name"], - "email": user_info.get("email", "N/A"), - } - ) - else: - # If username, resolve to user. - user = self.get_user_by_name(workspace_gid, user_id) - if user: - team_members.append(user) - else: - # Get all team members if no specific users provided. - team_members = self.get_team_members(workspace_gid) - all_task_data = [] - # Process tasks for each team member. - for member in team_members: - _LOG.info("Processing tasks for: %s", member["name"]) - # Fetch detailed tasks for this user. - if include_comments: - tasks = self.get_user_tasks_with_activity( - workspace_gid, - member["gid"], - start_date=start_date, - end_date=end_date, - include_comments=True, - ) - else: - tasks = self.get_user_tasks_detailed( - workspace_gid, - member["gid"], - start_date=start_date, - end_date=end_date, - ) - # Process each task. - for task in tasks: - # Parse dates. - created_at = ( - dateutil_parser.parse(task["created_at"]) - if task.get("created_at") - else None - ) - completed_at = ( - dateutil_parser.parse(task["completed_at"]) - if task.get("completed_at") - else None - ) - due_at = ( - dateutil_parser.parse(task["due_at"]) - if task.get("due_at") - else None - ) - # Check if task is overdue. - is_overdue = False - if not task.get("completed") and due_at: - is_overdue = due_at < datetime_lib.datetime.now( - datetime_lib.timezone.utc - ) - # Extract time estimate from custom fields. - estimated_hours = self.extract_time_estimate(task) - # Calculate actual hours if task is completed. - actual_hours = None - if completed_at and created_at: - actual_hours = ( - completed_at - created_at - ).total_seconds() / 3600 - # Extract projects, tags, and sections. - projects = [p["name"] for p in task.get("projects", [])] - project_gids = [p["gid"] for p in task.get("projects", [])] - tags = [t["name"] for t in task.get("tags", [])] - # Extract sections (sprints in Asana). - sections = [] - sprints = [] - if task.get("memberships"): - for membership in task["memberships"]: - if membership.get("section"): - section_name = membership["section"]["name"] - sections.append(section_name) - # Identify sprint sections using common patterns. - if any( - keyword in section_name.lower() - for keyword in [ - "sprint", - "iteration", - "cycle", - "week", - ] - ): - sprints.append(section_name) - # Build task data dictionary. - task_data = { - # User info. - "user_name": member["name"], - "user_email": member["email"], - "user_gid": member["gid"], - # Task info. - "task_name": task.get("name", "Untitled"), - "task_gid": task["gid"], - # Dates. - "created_at": created_at, - "completed_at": completed_at, - "due_on": task.get("due_on"), - "due_at": due_at, - "start_on": task.get("start_on"), - # Status. - "is_completed": task.get("completed", False), - "is_overdue": is_overdue, - # Time tracking. - "estimated_hours": estimated_hours, - "actual_hours": actual_hours, - # Organization. - "project": projects[0] if projects else None, - "all_projects": ", ".join(projects) if projects else None, - "project_gid": project_gids[0] if project_gids else None, - "tags": ", ".join(tags) if tags else None, - "section": sections[0] if sections else None, - "sprint": sprints[0] if sprints else None, - "all_sprints": ", ".join(sprints) if sprints else None, - "num_subtasks": task.get("num_subtasks", 0), - } - # Add comment/activity metrics if included. - if include_comments: - task_data.update( - { - "num_comments": task.get("num_comments", 0), - "num_activities": task.get("num_activities", 0), - "total_stories": task.get("total_stories", 0), - "unique_commenters": task.get( - "unique_commenters", 0 - ), - "last_activity_at": task.get("last_activity_at"), - "task_age_days": task.get("task_age_days", 0), - "comments_per_day": task.get( - "comments_per_day", 0.0 - ), - "activities_per_day": task.get( - "activities_per_day", 0.0 - ), - "total_activity_per_day": task.get( - "total_activity_per_day", 0.0 - ), - "days_since_activity": task.get( - "days_since_activity" - ), - } - ) - # Add team - either from project name or mapping. - if project_names: - # Determine team from project name. - task_data["team"] = task_data["project"] - elif team_mapping: - task_data["team"] = team_mapping.get( - member["name"], "Unassigned" - ) - else: - # No team mapping, use project as team (default). - task_data["team"] = task_data["project"] - all_task_data.append(task_data) - # Create DataFrame. - df = pd.DataFrame(all_task_data) - # Filter by project if specified. - if project_names and len(df) > 0: - df = df[df["project"].isin(project_names)] - _LOG.info( - "Filtered to %d tasks from projects: %s", len(df), project_names - ) - _LOG.info("Created DataFrame with %d tasks", len(df)) - result = df - return result - - def create_team_comparison_df( - self, task_df: pd.DataFrame, metrics: Optional[List[str]] = None - ) -> pd.DataFrame: - """ - Create team-level comparison DataFrame from task DataFrame. - - Aggregate task-level data to team-level metrics for comparison - across teams. Requires task DataFrame to have 'team' column. - - :param task_df: data with 'team' column - :param metrics: metrics to calculate. If None, calculate all - :return: data with team-level aggregated metrics - """ - if "team" not in task_df.columns: - _LOG.error( - "task_df missing 'team' column. Available columns: %s", - task_df.columns.tolist(), - ) - raise ValueError( - "task_df must have 'team' column. Pass team_mapping or " - "project_names to create_task_dataframe()" - ) - - _LOG.info("Creating team comparison DataFrame") - _LOG.info("Found %d unique teams in data", task_df["team"].nunique()) - - # Set default metrics if not provided. - if metrics is None: - metrics = [ - "total_tasks", - "completed_tasks", - "in_progress_tasks", - "completion_rate", - "total_estimated_hours", - "avg_estimated_hours", - "total_actual_hours", - "overdue_tasks", - "overdue_rate", - "unique_users", - ] - team_stats = [] - # Calculate metrics for each team. - for team_name in task_df["team"].unique(): - if team_name is None or ( - isinstance(team_name, float) and pd.isna(team_name) - ): - _LOG.warning("Skipping None/NaN team name") - continue - - team_data = task_df[task_df["team"] == team_name] - _LOG.debug( - "Processing team: %s (%d tasks)", team_name, len(team_data) - ) - - stats = {"team": team_name} - # Calculate each requested metric. - if "total_tasks" in metrics: - stats["total_tasks"] = len(team_data) - if "completed_tasks" in metrics: - stats["completed_tasks"] = team_data["is_completed"].sum() - if "in_progress_tasks" in metrics: - stats["in_progress_tasks"] = (~team_data["is_completed"]).sum() - if "completion_rate" in metrics: - if len(team_data) > 0: - stats["completion_rate"] = ( - stats["completed_tasks"] / len(team_data) - ) * 100 - else: - stats["completion_rate"] = 0.0 - if "total_estimated_hours" in metrics: - stats["total_estimated_hours"] = team_data[ - "estimated_hours" - ].sum() - if "avg_estimated_hours" in metrics: - stats["avg_estimated_hours"] = team_data[ - "estimated_hours" - ].mean() - if "total_actual_hours" in metrics: - stats["total_actual_hours"] = team_data["actual_hours"].sum() - if "overdue_tasks" in metrics: - stats["overdue_tasks"] = team_data["is_overdue"].sum() - if "overdue_rate" in metrics: - active_tasks = (~team_data["is_completed"]).sum() - if active_tasks > 0: - stats["overdue_rate"] = ( - stats["overdue_tasks"] / active_tasks - ) * 100 - else: - stats["overdue_rate"] = 0.0 - if "unique_users" in metrics: - stats["unique_users"] = team_data["user_name"].nunique() - team_stats.append(stats) - - _LOG.info("Team comparison completed for %d teams", len(team_stats)) - result = pd.DataFrame(team_stats) - return result - - def create_user_comparison_df( - self, task_df: pd.DataFrame, metrics: Optional[List[str]] = None - ) -> pd.DataFrame: - """ - Create user-level comparison DataFrame with aggregated metrics. - - Aggregate task-level data to user-level metrics for individual - performance comparison. - - :param task_df: tasks data - :param metrics: metrics to calculate. If None, calculate all - :return: data with user-level aggregated metrics - """ - # Set default metrics if not provided. - if metrics is None: - metrics = [ - "total_tasks", - "completed_tasks", - "completion_rate", - "total_estimated_hours", - "avg_estimated_hours", - "overdue_tasks", - "unique_projects", - ] - user_stats = [] - # Calculate metrics for each user. - for user_name in task_df["user_name"].unique(): - user_data = task_df[task_df["user_name"] == user_name] - stats = { - "user_name": user_name, - "user_email": user_data["user_email"].iloc[0], - } - # Add team if available. - if "team" in task_df.columns: - stats["team"] = user_data["team"].iloc[0] - # Calculate each requested metric. - if "total_tasks" in metrics: - stats["total_tasks"] = len(user_data) - if "completed_tasks" in metrics: - stats["completed_tasks"] = user_data["is_completed"].sum() - if "completion_rate" in metrics: - if len(user_data) > 0: - stats["completion_rate"] = ( - stats["completed_tasks"] / len(user_data) - ) * 100 - else: - stats["completion_rate"] = 0.0 - if "total_estimated_hours" in metrics: - stats["total_estimated_hours"] = user_data[ - "estimated_hours" - ].sum() - if "avg_estimated_hours" in metrics: - stats["avg_estimated_hours"] = user_data[ - "estimated_hours" - ].mean() - if "overdue_tasks" in metrics: - stats["overdue_tasks"] = user_data["is_overdue"].sum() - if "unique_projects" in metrics: - projects = user_data["all_projects"].dropna() - unique_projects = set() - for proj_str in projects: - unique_projects.update(proj_str.split(", ")) - stats["unique_projects"] = len(unique_projects) - user_stats.append(stats) - result = pd.DataFrame(user_stats) - return result - - -# ############################################################################# -# Convenience functions -# ############################################################################# - - -def list_workspace_users( - workspace_name: str, *, access_token: Optional[str] = None -) -> List[str]: - """ - Get all usernames in a workspace. - - Convenience function to quickly see all available users in a - workspace. - - :param workspace_name: name of workspace to query - :param access_token: Asana access token - :return: usernames (display names) - """ - # Initialize analytics instance. - analytics_instance = EnhancedAsanaAnalytics(access_token) - # Get workspace GID. - workspace_gid_local = analytics_instance.get_workspace_gid(workspace_name) - # Get team members. - team_members = analytics_instance.get_team_members(workspace_gid_local) - # Extract usernames. - result = [member["name"] for member in team_members] - return result - - -def get_user_by_name( - workspace_name: str, - username: str, - *, - access_token: Optional[str] = None, -) -> Optional[Dict[str, Any]]: - """ - Get a specific user by their name in a workspace. - - Convenience function to find a user without instantiating the class. - - :param workspace_name: name of workspace to search in - :param username: username or partial name to search for - :param access_token: Asana access token - :return: user with 'gid', 'name', and 'email', or None if not found - """ - # Initialize analytics instance. - analytics_instance = EnhancedAsanaAnalytics(access_token) - # Get workspace GID. - workspace_gid_local = analytics_instance.get_workspace_gid(workspace_name) - # Find user. - result = analytics_instance.get_user_by_name(workspace_gid_local, username) - return result - - -def create_kibana_ready_dataset( - workspace_name: str, - start_date: datetime_lib.datetime, - end_date: datetime_lib.datetime, - *, - project_names: Optional[List[str]] = None, - team_mapping: Optional[Dict[str, str]] = None, - access_token: Optional[str] = None, - user_list: Optional[List[str]] = None, - include_comments: bool = False, -) -> Dict[str, pd.DataFrame]: - """ - Create Kibana-ready datasets with all metrics. - - Generate three DataFrames suitable for Kibana visualization: detailed - task-level data, user-level aggregates, and team-level aggregates. - By default, extracts ALL tasks from ALL users and ALL projects. - The 'project' column can be used for filtering in Kibana. - - :param workspace_name: Asana workspace name to analyze - :param start_date: start date for analysis period - :param end_date: end date for analysis period - :param project_names: project names to filter by - (e.g., ["tech-now", "tech-next"]). If None, extract ALL projects. - When provided, also uses project names as team names - :param team_mapping: usernames to team names. - Alternative to project_names. If both are None, uses project as - team - - Example: {"John Doe": "tech-now", "Jane Smith": "tech-next"} - :param access_token: Asana access token. If None, reads from - environment variable ASANA_ACCESS_TOKEN - :param user_list: specific usernames or GIDs to analyze. If - None, analyze ALL team members - :param include_comments: if True, fetch comment/activity data - (default: False). Set to True to include activity metrics - :return: data with three DataFrames: - - 'tasks': detailed task-level data with sprint/section info - - 'users': user-level aggregated metrics - - 'teams': team-level aggregated metrics - """ - _LOG.info("=" * 70) - _LOG.info("STARTING KIBANA DATASET CREATION") - _LOG.info("=" * 70) - _LOG.info("Workspace: %s", workspace_name) - _LOG.info("Date range: %s to %s", start_date.date(), end_date.date()) - _LOG.info("Project filter: %s", project_names if project_names else "ALL") - _LOG.info("User filter: %s", user_list if user_list else "ALL") - _LOG.info("Include comments: %s", include_comments) - - # Initialize analytics instance. - _LOG.info("Initializing Asana Analytics client...") - analytics = EnhancedAsanaAnalytics(access_token) - - # Get workspace GID. - _LOG.info("Resolving workspace GID for: %s", workspace_name) - workspace_gid = analytics.get_workspace_gid(workspace_name) - _LOG.info("Workspace GID resolved: %s", workspace_gid) - - # Create detailed task DataFrame. - _LOG.info("-" * 70) - _LOG.info("STEP 1/3: Creating detailed task DataFrame...") - _LOG.info("-" * 70) - task_df = analytics.create_task_dataframe( - workspace_gid, - user_identifiers=user_list, - project_names=project_names, - start_date=start_date, - end_date=end_date, - team_mapping=team_mapping, - include_comments=include_comments, - ) - _LOG.info("Task DataFrame created with %d rows", len(task_df)) - - # Create user-level comparison DataFrame. - _LOG.info("-" * 70) - _LOG.info("STEP 2/3: Creating user-level aggregates...") - _LOG.info("-" * 70) - user_df = analytics.create_user_comparison_df(task_df) - _LOG.info("User DataFrame created with %d rows", len(user_df)) - - # Create team-level comparison DataFrame. - _LOG.info("-" * 70) - _LOG.info("STEP 3/3: Creating team-level aggregates...") - _LOG.info("-" * 70) - team_df = analytics.create_team_comparison_df(task_df) - _LOG.info("Team DataFrame created with %d rows", len(team_df)) - - _LOG.info("=" * 70) - _LOG.info("DATASET CREATION COMPLETE!") - _LOG.info("=" * 70) - _LOG.info("Summary:") - _LOG.info(" Tasks: %d rows", len(task_df)) - _LOG.info(" Users: %d rows", len(user_df)) - _LOG.info(" Teams: %d rows", len(team_df)) - _LOG.info("=" * 70) - - result = {"tasks": task_df, "users": user_df, "teams": team_df} - return result - - -def save_to_ndjson( - df: pd.DataFrame, filepath: str, index_name: Optional[str] = None -) -> None: - """ - Save DataFrame to NDJSON format for Kibana/OpenSearch bulk upload. - - Convert DataFrame to newline-delimited JSON format suitable for - Elasticsearch/OpenSearch bulk API ingestion. - - :param df: data to save - :param filepath: output file path (e.g., 'asana_tasks.ndjson') - :param index_name: optional index name to include in bulk action - metadata. If None, only document data is written - """ - _LOG.info("Saving DataFrame to NDJSON: %s", filepath) - _LOG.info("DataFrame shape: %d rows, %d columns", len(df), len(df.columns)) - - # Convert DataFrame to records (list of dicts). - records = df.to_dict(orient="records") - - # Open file for writing. - with open(filepath, "w") as f: - for record in records: - # Convert timestamps to ISO format strings. - for key, value in record.items(): - if pd.isna(value): - # Convert NaN/None to null. - record[key] = None - elif isinstance(value, pd.Timestamp): - # Convert pandas Timestamp to ISO string. - record[key] = value.isoformat() - - if index_name: - # Write bulk API metadata line. - action = {"index": {"_index": index_name}} - f.write(json.dumps(action) + "\n") - - # Write document data line. - f.write(json.dumps(record) + "\n") - - _LOG.info("Successfully saved %d records to %s", len(records), filepath) - - -def save_datasets_for_kibana( - datasets: Dict[str, pd.DataFrame], - output_dir: str = ".", - *, - use_ndjson: bool = True, - index_prefix: str = "asana", -) -> Dict[str, str]: - """ - Save all datasets to files for Kibana ingestion. - - Save task, user, and team DataFrames to either NDJSON or CSV format - for Kibana/OpenSearch ingestion. - - :param datasets: dictionary with 'tasks', 'users', 'teams' - DataFrames from create_kibana_ready_dataset() - :param output_dir: directory to save files (default: current - directory) - :param use_ndjson: if True, save as NDJSON format. If False, save as - CSV (default: True) - :param index_prefix: prefix for index names when using NDJSON - (default: 'asana') - :return: dataset names to saved file paths - """ - _LOG.info("=" * 70) - _LOG.info("SAVING DATASETS FOR KIBANA") - _LOG.info("=" * 70) - _LOG.info("Output directory: %s", output_dir) - _LOG.info("Format: %s", "NDJSON" if use_ndjson else "CSV") - - saved_files = {} - extension = "ndjson" if use_ndjson else "csv" - - for dataset_name, df in datasets.items(): - # Construct file path. - filename = "{}_{}_{}.{}".format( - index_prefix, dataset_name, "kibana", extension - ) - filepath = "{}/{}".format(output_dir, filename) - - _LOG.info("Saving %s dataset (%d rows)...", dataset_name, len(df)) - - if use_ndjson: - # Save as NDJSON with index name. - index_name = "{}-{}".format(index_prefix, dataset_name) - save_to_ndjson(df, filepath, index_name=index_name) - else: - # Save as CSV. - df.to_csv(filepath, index=False) - _LOG.info("Saved to CSV: %s", filepath) - - saved_files[dataset_name] = filepath - - _LOG.info("=" * 70) - _LOG.info("ALL DATASETS SAVED!") - _LOG.info("=" * 70) - for dataset_name, filepath in saved_files.items(): - _LOG.info(" %s: %s", dataset_name, filepath) - _LOG.info("=" * 70) - - result = saved_files - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py deleted file mode 100644 index 318897d3e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/github_utils.py +++ /dev/null @@ -1,2110 +0,0 @@ -""" -Import as: - -import helpers.github_utils as hgitutil -""" - -import collections -import datetime -import functools -import itertools -import json -import logging -import os -import time -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple - -import github -import matplotlib.pyplot as plt -import pandas as pd -from tqdm import tqdm - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): Why not using helpers.hcache_simple as hcacsimp. -def github_cached(cache_type: str = "json", write_through: bool = True): - """ - Cache decorator specifically for GitHub API functions. - - Automatically excludes the 'client' parameter (first positional arg) - from cache keys since client instances change across sessions. - - :param cache_type: Type of cache ('json' or 'pickle') - :param write_through: If True, write to disk after each cache update - :return: Decorated function with caching - """ - - def decorator(func: Callable) -> Callable: - # Get function name for cache. - func_name = func.__name__ - if func_name.endswith("_intrinsic"): - func_name = func_name[: -len("_intrinsic")] - # Set cache type property. - existing_type = hcacsimp.get_cache_property(func_name, "type") - if not existing_type: - hcacsimp.set_cache_property(func_name, "type", cache_type) - - # Create a cached version that only uses args after client. - @functools.wraps(func) - def wrapper(client, *args, **kwargs): - # Create cache key from everything EXCEPT client. - cache_key = json.dumps( - {"args": args, "kwargs": kwargs}, - sort_keys=True, - default=str, - ) - # Get cache. - cache = hcacsimp.get_cache(func_name) - # Check if we have cached value. - if cache_key in cache: - _LOG.debug("Cache hit for %s", func_name) - return cache[cache_key] - # Cache miss - call the actual function. - _LOG.debug("Cache miss for %s, fetching from API", func_name) - result = func(client, *args, **kwargs) - # Store in cache - cache[cache_key] = result - # Write to disk if enabled. - if write_through: - hcacsimp.flush_cache_to_disk(func_name) - return result - - return wrapper - - return decorator - - -# ############################################################################# -# GitHubAPI -# ############################################################################# - - -class GitHubAPI: - """ - Initialize and manage authentication with the GitHub API using PyGithub. - """ - - def __init__( - self, - *, - access_token: Optional[str] = None, - base_url: Optional[str] = None, - ): - """ - Initialize the GitHub API client. - - :param access_token: GitHub personal access token; if not provided, it - is fetched from the environment variable `GITHUB_ACCESS_TOKEN` - :param base_url: optional custom GitHub Enterprise base URL - """ - self.access_token = access_token or os.getenv("GITHUB_ACCESS_TOKEN") - if not self.access_token: - raise ValueError( - "GitHub Access Token is required. Set it as an environment variable or pass it explicitly." - ) - auth = github.Auth.Token(self.access_token) - self.github = ( - github.Github(base_url=base_url, auth=auth) - if base_url - else github.Github(auth=auth) - ) - - def get_client(self) -> github.Github: - """ - Return the authenticated GitHub client. - - :return: an instance of the authenticated PyGithub client - """ - return self.github - - def close_connection(self) -> None: - """ - Close the GitHub API connection. - """ - self.github.close() - - -# ############################################################################# -# Utility APIs -# ############################################################################# - - -def get_repo_names(client: github.Github, org_name: str) -> Dict[str, List[str]]: - """ - Retrieve a list of repositories under a specific organization. - - :param client: authenticated instance of the PyGithub client - :param org_name: name of the GitHub organization - :return: a dictionary containing: - - owner: name of the organization - - repositories: repository names - """ - owner = client.get_organization(org_name) - hdbg.dassert_is_not( - owner, - None, - "'%s' is not a valid GitHub organization", - org_name, - ) - repos = [repo.name for repo in owner.get_repos()] - result = {"owner": org_name, "repositories": repos} - return result - - -def get_github_contributors( - client: github.Github, repo_names: List[str] -) -> Dict[str, List[str]]: - """ - Retrieve GitHub usernames contributing to specified repositories. - - :param client: authenticated instance of the PyGithub client - :param repo_names: repository names in the format 'owner/repo' to fetch - contributor usernames - :return: a dictionary containing: - - repository: repository name - - contributors: contributor GitHub usernames - """ - result = {} - for repo_name in repo_names: - repo = client.get_repo(repo_name) - hdbg.dassert_is_not(repo, None, "Could not fetch repo: %s", repo_name) - contributors = [ - contributor.login for contributor in repo.get_contributors() - ] - result[repo_name] = contributors - return result - - -def normalize_period_to_utc( - period: Optional[Tuple[datetime.datetime, datetime.datetime]], -) -> Tuple[Optional[datetime.datetime], Optional[datetime.datetime]]: - """ - Convert a datetime period to UTC and ensure both dates are timezone-aware. - - :param period: start and end datetime - :return: UTC-aware start and end datetime, or (None, None) if period - is None - """ - - def to_utc(dt: Optional[datetime.datetime]) -> Optional[datetime.datetime]: - res = None - if dt is None: - return res - else: - res = ( - dt.replace(tzinfo=datetime.timezone.utc) - if dt.tzinfo is None - else dt.astimezone(datetime.timezone.utc) - ) - return res - - norm = ( - tuple(to_utc(dt) for dt in period) - if period is not None - else (None, None) - ) - return norm - - -# ############################################################################# -# Global Metrics APIs -# ############################################################################# - - -def get_total_commits( - client: github.Github, - org_name: str, - *, - usernames: Optional[List[str]] = None, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, Any]: - """ - Fetch the number of commits made in the repositories of the specified - organization, optionally filtered by GitHub usernames and a specified time - period. - - :param client: authenticated instance of the PyGithub client - :param org_name: name of the GitHub organization - :param usernames: GitHub usernames to filter commits; if None, fetches for - all users - :param period: start and end datetime for filtering commits - :return: a dictionary containing: - - total_commits (int): total number of commits across all repositories - - period (str): the time range considered - - commits_per_repository (Dict[str, int]): repository names as keys and - commit counts as values - """ - # Retrieve organization repositories - repos_info = get_repo_names(client, org_name) - hdbg.dassert_in( - "repositories", - repos_info, - "Missing 'repositories' key in get_repo_names() output", - ) - repositories = repos_info["repositories"] - total_commits = 0 - commits_per_repository = {} - since, until = period if period else (None, None) - for repo_name in tqdm( - repositories, desc="Processing repositories", unit="repo" - ): - repo = client.get_repo(f"{org_name}/{repo_name}") - hdbg.dassert_is_not(repo, None, "Could not retrieve repo: %s", repo_name) - repo_commit_count = 0 - if usernames: - for username in usernames: - commits = repo.get_commits( - author=username, since=since, until=until - ) - hdbg.dassert_is_not( - commits, - None, - "Failed to get commits by '%s' in %s", - username, - repo_name, - ) - repo_commit_count += commits.totalCount - else: - commits = repo.get_commits(since=since, until=until) - hdbg.dassert_is_not( - commits, None, "Failed to get commits in %s", repo_name - ) - repo_commit_count = commits.totalCount - commits_per_repository[repo_name] = repo_commit_count - total_commits += repo_commit_count - result = { - "total_commits": total_commits, - "period": f"{since} to {until}" if since and until else "All time", - "commits_per_repository": commits_per_repository, - } - return result - - -def get_total_prs( - client: github.Github, - org_name: str, - *, - usernames: Optional[List[str]] = None, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, - state: str = "all", -) -> Dict[str, Any]: - """ - Fetch the number of pull requests made in the repositories of the specified - organization, optionally filtered by GitHub usernames, a specified time - period, and the state of the pull requests. - - :param client: authenticated instance of the PyGithub client - :param org_name: name of the GitHub organization - :param usernames: GitHub usernames to filter pull requests; if None, fetches - for all users - :param period: start and end datetime for filtering pull requests - :param state: the state of the pull requests to fetch; can be 'open', 'closed', or 'all' - :return: a dictionary containing: - - total_prs (int): total number of pull requests - - period (str): the time range considered - - prs_per_repository (Dict[str, int]): repository names as keys and pull - request counts as values - """ - # Retrieve repositories for the organization - repos_info = get_repo_names(client, org_name) - hdbg.dassert_in( - "repositories", repos_info, "Missing 'repositories' key in repo info" - ) - repositories = repos_info["repositories"] - total_prs = 0 - prs_per_repository = {} - since, until = normalize_period_to_utc(period) - for repo_name in tqdm( - repositories, desc="Processing repositories", unit="repo" - ): - repo = client.get_repo(f"{org_name}/{repo_name}") - hdbg.dassert_is_not( - repo, None, "Could not retrieve repository: %s", repo_name - ) - repo_pr_count = 0 - pulls = repo.get_pulls(state=state) - for pr in pulls: - hdbg.dassert_is_not( - pr, None, "PR could not be fetched in %s", repo_name - ) - if usernames and pr.user.login not in usernames: - continue - pr_created_at = ( - pr.created_at.replace(tzinfo=datetime.timezone.utc) - if pr.created_at.tzinfo is None - else pr.created_at.astimezone(datetime.timezone.utc) - ) - if since and until and not (since <= pr_created_at <= until): - continue - repo_pr_count += 1 - prs_per_repository[repo_name] = repo_pr_count - total_prs += repo_pr_count - result = { - "total_prs": total_prs, - "period": f"{since} to {until}" if since and until else "All time", - "prs_per_repository": prs_per_repository, - } - return result - - -def get_prs_not_merged( - client: github.Github, - org_name: str, - *, - usernames: Optional[List[str]] = None, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, Any]: - """ - Fetch the count of closed but unmerged pull requests in the specified - repositories and by the specified GitHub users within a given period. - - :param client: authenticated instance of the PyGithub client - :param org_name: name of the GitHub organization - :param usernames: GitHub usernames to filter pull requests; if None, fetches for all users - :param period: start and end datetime for filtering pull requests - :return: a dictionary containing: - - prs_not_merged (int): total number of closed but unmerged pull requests - - period (str): the time range considered - - prs_per_repository (Dict[str, int]): repository names as keys and - unmerged pull request counts as values - """ - # Fetch all repositories in the org. - repos_info = get_repo_names(client, org_name) - hdbg.dassert_in( - "repositories", - repos_info, - "Missing 'repositories' in get_repo_names() output", - ) - repositories = repos_info["repositories"] - total_unmerged_prs = 0 - prs_per_repository = {} - since, until = normalize_period_to_utc(period) - for repo_name in tqdm( - repositories, desc="Processing repositories", unit="repo" - ): - # Fetch repo object. - repo = client.get_repo(f"{org_name}/{repo_name}") - hdbg.dassert_is_not( - repo, - None, - "Could not fetch repo: %s/%s", - org_name, - repo_name, - ) - repo_unmerged_pr_count = 0 - issues = repo.get_issues(state="closed", since=since) - pulls = [] - for issue in issues: - if issue.pull_request: - pull = repo.get_pull(issue.number) - hdbg.dassert_is_not( - pull, - None, - "Could not fetch pull request #%d in %s", - issue.number, - repo_name, - ) - pulls.append(pull) - for pr in pulls: - _LOG.debug("Processing PR #%d from %s", pr.number, repo_name) - pr_created_at = pr.created_at or datetime.datetime.min - pr_created_at = ( - pr_created_at.replace(tzinfo=datetime.timezone.utc) - if pr_created_at.tzinfo is None - else pr_created_at.astimezone(datetime.timezone.utc) - ) - if pr.merged: - continue - if usernames and pr.user.login not in usernames: - continue - if since and until and not (since <= pr_created_at <= until): - continue - repo_unmerged_pr_count += 1 - prs_per_repository[repo_name] = repo_unmerged_pr_count - total_unmerged_prs += repo_unmerged_pr_count - result = { - "prs_not_merged": total_unmerged_prs, - "period": f"{since} to {until}" if since and until else "All time", - "prs_per_repository": prs_per_repository, - } - return result - - -# ############################################################################# -# Individual User Metrics APIs -# ############################################################################# - - -def get_commits_by_user( - client: github.Github, - username: str, - org_name: str, - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, Any]: - """ - Retrieve the number of commits made by a specific GitHub user. - - :param client: authenticated instance of the PyGithub client - :param username: GitHub username to fetch commit data for - :param org_name: name of the GitHub organization - :param period: start and end datetime for filtering commits - :return: a dictionary containing: - - user (str): GitHub username - - total_commits (int): total number of commits made by the user - - period (str): the time range considered - - commits_per_repository (Dict[str, int]): repository names as keys and - commit counts as values - """ - result = get_total_commits( - client=client, org_name=org_name, usernames=[username], period=period - ) - res_dict = { - "user": username, - "total_commits": result["total_commits"], - "period": result["period"], - "commits_per_repository": result["commits_per_repository"], - } - return res_dict - - -def get_prs_by_user( - client: github.Github, - username: str, - org_name: str, - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, - state: str = "all", -) -> Dict[str, Any]: - """ - Fetch the number of pull requests created by a specific GitHub user in the - given repositories and time period. - - :param client: authenticated instance of the PyGithub client - :param username: GitHub username to fetch pull request data for - :param org_name: name of the GitHub organization - :param period: start and end datetime for filtering pull requests - :param state: state of the pull requests to fetch; can be 'open', 'closed', - or 'all' - :return: a dictionary containing: - - user (str): GitHub username - - total_prs (int): total number of pull requests created - - period (str): the time range considered - - prs_per_repository (Dict[str, int]): repository names as keys and pull - request counts as values - """ - result = get_total_prs( - client=client, - org_name=org_name, - usernames=[username], - period=period, - state=state, - ) - res_dict = { - "user": username, - "total_prs": result["total_prs"], - "period": result["period"], - "prs_per_repository": result["prs_per_repository"], - } - return res_dict - - -def get_prs_not_merged_by_user( - client: github.Github, - username: str, - org_name: str, - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, Any]: - """ - Fetch the number of closed but unmerged pull requests created by a specific - GitHub user in the given repositories and time period. - - :param client: authenticated instance of the PyGithub client - :param username: GitHub username to fetch unmerged pull request data for - :param org_name: name of the GitHub organization - :param period: start and end datetime for filtering pull requests - :return: a dictionary containing: - - user (str): GitHub username - - prs_not_merged (int): total number of closed but unmerged pull requests - - period (str): the time range considered - - prs_per_repository (Dict[str, int]): repository names as keys and - unmerged PR counts as values - """ - result = get_prs_not_merged( - client=client, org_name=org_name, usernames=[username], period=period - ) - res_dict = { - "user": username, - "prs_not_merged": result["prs_not_merged"], - "period": result["period"], - "prs_per_repository": result["prs_per_repository"], - } - return res_dict - - -def days_between( - period: Tuple[datetime.datetime, datetime.datetime], -) -> List[datetime.date]: - """ - Generate each date in time span. - - :param period: start and end datetime - :return: date span - """ - start_date = period[0].date() - end_date = period[1].date() - days: List[datetime.date] = [] - current = start_date - while current <= end_date: - days.append(current) - current += datetime.timedelta(days=1) - _LOG.debug("Generated %d days in period.", len(days)) - return days - - -@github_cached(cache_type="json", write_through=True) -def get_commit_datetimes_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: Optional[str], - since: datetime.datetime, - until: datetime.datetime, -) -> List[str]: - """ - Fetch commit timestamps for user in repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: commit timestamps in ISO format - """ - timestamps: List[str] = [] - # Fetch the repository object. - repo_obj = client.get_repo(f"{org}/{repo}") - # Retrieve all commits in the specified time period. - commits = repo_obj.get_commits(since=since, until=until) - # Iterate through each commit to find ones by the specified user. - for c in commits: - # Skip commits with incomplete metadata. - if not c.commit or not c.commit.author or not c.commit.author.date: - continue - # Extract author and committer logins. - author_login = c.author.login if c.author else None - committer_login = c.committer.login if c.committer else None - # Check if this commit belongs to the target user. - if username in (author_login, committer_login): - # Convert commit date to UTC timezone. - dt = c.commit.author.date - dt_utc = ( - dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) - ) - # Add timestamp to results list. - timestamps.append(dt_utc.isoformat()) - # Log the results summary. - if not timestamps: - _LOG.info( - "No commits found for %s/%s user=%s in %s to %s - possibly outdated or inactive.", - org, - repo, - username, - since.date(), - until.date(), - ) - else: - _LOG.info( - "Fetched %d commits for %s/%s user=%s.", - len(timestamps), - org, - repo, - username, - ) - return timestamps - - -@github_cached(cache_type="json", write_through=True) -def get_pr_datetimes_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: str, - since: datetime.datetime, - until: datetime.datetime, -) -> List[str]: - """ - Fetch pull request timestamps for user in repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: PR created timestamps in ISO format - """ - timestamps: List[str] = [] - # Format dates for GitHub search query. - since_date = since.date().isoformat() - until_date = until.date().isoformat() - # Build search query for PRs authored by the user. - query = f"repo:{org}/{repo} is:pr author:{username} created:{since_date}..{until_date}" - # Execute the search query. - results = client.search_issues(query) - # Process each PR from search results. - for issue in results: - # Convert PR creation date to UTC timezone. - dt = issue.created_at - dt_utc = dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) - # Add timestamp to results list. - timestamps.append(dt_utc.isoformat()) - # Log the results summary. - if not timestamps: - _LOG.debug( - "No PRs found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", - org, - repo, - username, - since_date, - until_date, - ) - else: - _LOG.info( - "Found %d PRs for %s/%s user=%s.", - len(timestamps), - org, - repo, - username, - ) - return timestamps - - -@github_cached(cache_type="json", write_through=True) -def get_issue_datetimes_by_repo_intrinsic( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> Dict[str, List[str]]: - """ - Fetch opened and closed issue timestamps for a user in a repo over a given - period. - - :param client: authenticated PyGithub client - :param org: GitHub organization name - :param repo: repository name - :param username: GitHub username - :param period: time window to filter issues - :return: 'assigned' and 'closed' issues containing ISO timestamps - """ - # Extract and format the time period. - since_date = period[0].date().isoformat() - until_date = period[1].date().isoformat() - # Build search query for issues assigned to the user. - query = ( - f"repo:{org}/{repo} type:issue assignee:{username} " - f"created:{since_date}..{until_date}" - ) - # Execute the search query. - issues = client.search_issues(query) - # Initialize lists for assigned and closed issues. - assigned: List[str] = [] - closed: List[str] = [] - # Process each issue from search results. - for issue in issues: - # Skip pull requests that appear in issue search. - if issue.pull_request is not None: - continue - # Add issue creation timestamp to assigned list. - assigned.append(issue.created_at.isoformat()) - # Check if issue was closed within the period. - if issue.closed_at: - # Convert closed date to UTC timezone. - closed_dt = issue.closed_at - dt_utc = ( - closed_dt - if closed_dt.tzinfo - else closed_dt.replace(tzinfo=datetime.timezone.utc) - ) - # Add to closed list if within the specified period. - if period[0] <= dt_utc <= period[1]: - closed.append(dt_utc.isoformat()) - # Log the results summary. - _LOG.info( - "Found %d opened and %d closed issues for %s/%s user=%s", - len(assigned), - len(closed), - org, - repo, - username, - ) - # Return the results dictionary. - result_dict = {"assigned": assigned, "closed": closed} - return result_dict - - -@github_cached(cache_type="json", write_through=True) -def get_loc_stats_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: str, - since: datetime.datetime, - until: datetime.datetime, -) -> List[Dict[str, int]]: - """ - Fetch commit LOC stats for user in repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: additions, deletions in code - """ - stats_list: List[Dict[str, int]] = [] - # Fetch the repository object. - repo_obj = client.get_repo(f"{org}/{repo}") - # Retrieve all commits in the specified time period. - commits = repo_obj.get_commits(since=since, until=until) - # Track number of commits processed for safety limit. - commit_count = 0 - # Process each commit to extract LOC statistics. - for c in commits: - # Extract author and committer logins. - author_login = c.author.login if c.author else None - committer_login = c.committer.login if c.committer else None - # Skip commits not by the target user. - if username not in (author_login, committer_login): - continue - # Fetch commit statistics. - s = c.stats - # Skip if statistics are not available. - if s is None: - _LOG.debug("No stats available for commit %s", c.sha) - continue - # Convert commit date to UTC timezone. - dt = c.commit.author.date - dt_utc = dt if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc) - iso = dt_utc.date().isoformat() - # Add statistics to results list. - stats_list.append( - {"date": iso, "additions": s.additions, "deletions": s.deletions} - ) - # Increment commit counter and check safety limit. - commit_count += 1 - if commit_count > 1000: - _LOG.warning("Processed 1000 commits, stopping to avoid timeout") - break - # Log the results summary. - if not stats_list: - _LOG.info( - "No LOC stats found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", - org, - repo, - username, - since.date(), - until.date(), - ) - else: - _LOG.info( - "Fetched LOC stats for %s/%s user=%s entries=%d.", - org, - repo, - username, - len(stats_list), - ) - return stats_list - - -@github_cached(cache_type="json", write_through=True) -def get_issue_comment_datetimes_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: str, - since: datetime.datetime, - until: datetime.datetime, -) -> List[str]: - """ - Fetch issue comment timestamps for user in repo over period using search - API. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: comment timestamps in ISO format - """ - timestamps: List[str] = [] - # Format dates for GitHub search query. - since_date = since.date().isoformat() - until_date = until.date().isoformat() - # Build search query for issues where user has commented. - query = f"repo:{org}/{repo} is:issue commenter:{username} updated:{since_date}..{until_date}" - # Execute the search query. - results = client.search_issues(query) - # Process each issue to find user's comments. - for issue in results: - # Skip pull requests that appear in issue search. - if issue.pull_request: - continue - # Fetch all comments for this issue. - comments = issue.get_comments() - # Filter comments by the target user. - for comment in comments: - # Skip comments by other users. - if comment.user.login != username: - continue - # Convert comment date to UTC timezone. - comment_dt = comment.created_at - comment_dt_utc = ( - comment_dt - if comment_dt.tzinfo - else comment_dt.replace(tzinfo=datetime.timezone.utc) - ) - # Add timestamp if within the specified period. - if since <= comment_dt_utc <= until: - timestamps.append(comment_dt_utc.isoformat()) - # Log the results summary. - if not timestamps: - _LOG.info( - "No issue comments found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", - org, - repo, - username, - since.date(), - until.date(), - ) - else: - _LOG.info( - "Fetched %d issue comments for %s/%s user=%s.", - len(timestamps), - org, - repo, - username, - ) - return timestamps - - -@github_cached(cache_type="json", write_through=True) -def get_pr_review_datetimes_by_repo_period_intrinsic( - client, - org: str, - repo: str, - username: str, - since: datetime.datetime, - until: datetime.datetime, -) -> List[str]: - """ - Fetch PR review timestamps for user in repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param since: start datetime - :param until: end datetime - :return: review timestamps in ISO format - """ - timestamps: List[str] = [] - # Fetch the repository object. - repo_obj = client.get_repo(f"{org}/{repo}") - # Format dates for GitHub search query. - since_date = since.date().isoformat() - until_date = until.date().isoformat() - # Build search query for PRs reviewed by the user. - query = f"repo:{org}/{repo} is:pr reviewed-by:{username} updated:{since_date}..{until_date}" - # Execute the search query. - results = client.search_issues(query) - # Process each PR to find user's reviews. - for issue in results: - # Fetch the full PR object. - pr = repo_obj.get_pull(issue.number) - # Fetch all reviews for this PR. - reviews = pr.get_reviews() - # Filter reviews by the target user. - for review in reviews: - # Skip reviews by other users. - if review.user.login != username: - continue - # Convert review date to UTC timezone. - review_dt = review.submitted_at - review_dt_utc = ( - review_dt - if review_dt.tzinfo - else review_dt.replace(tzinfo=datetime.timezone.utc) - ) - # Add timestamp if within the specified period. - if since <= review_dt_utc <= until: - timestamps.append(review_dt_utc.isoformat()) - # Log the results summary. - if not timestamps: - _LOG.info( - "No PR reviews found for %s/%s user=%s in %s to %s - possibly inactive or outdated.", - org, - repo, - username, - since.date(), - until.date(), - ) - else: - _LOG.info( - "Fetched %d PR reviews for %s/%s user=%s.", - len(timestamps), - org, - repo, - username, - ) - return timestamps - - -def build_daily_commit_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily commit counts for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, commits, repo, user - """ - since, until = period - timestamps = get_commit_datetimes_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) - df["date"] = df.ts.dt.date - daily = df.groupby("date").size().reset_index(name="commits") - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - daily["commits"] = daily["commits"].fillna(0).astype(int) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily commit DataFrame rows=%d.", len(daily)) - return daily - - -def slice_by_date(df, start, end, date_col="date"): - """ - Slice DataFrame by date range. - - :param df: input DataFrame - :param start: start date (inclusive) - :param end: end date (inclusive) - :param date_col: name of the date column in df - :return: filtered DataFrame - """ - out = df.copy() - out[date_col] = pd.to_datetime(out[date_col], errors="coerce") - res = out.loc[(out[date_col] >= start) & (out[date_col] <= end)].copy() - return res - - -def build_daily_issue_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily assigned / closed issue counts for a user-repo pair. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with columns date, issues_assigned, issues_closed, - repo, user - """ - issue_data = get_issue_datetimes_by_repo_intrinsic( - client, org, repo, username, period - ) - df_assigned = pd.DataFrame( - {"ts": pd.to_datetime(issue_data["assigned"]), "issues_assigned": 1} - ) - df_assigned["date"] = df_assigned.ts.dt.date - df_closed = pd.DataFrame( - {"ts": pd.to_datetime(issue_data["closed"]), "issues_closed": 1} - ) - df_closed["date"] = df_closed.ts.dt.date - # Daily counts. - daily_assigned = ( - df_assigned.groupby("date")["issues_assigned"].sum().reset_index() - ) - daily_closed = df_closed.groupby("date")["issues_closed"].sum().reset_index() - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily_assigned, on="date", how="left").merge( - daily_closed, on="date", how="left" - ) - daily[["issues_assigned", "issues_closed"]] = ( - daily[["issues_assigned", "issues_closed"]].fillna(0).astype(int) - ) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily issue DataFrame rows=%d.", len(daily)) - return daily - - -def build_daily_pr_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily PR counts for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, prs, repo, user - """ - since, until = period - timestamps = get_pr_datetimes_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) - df["date"] = df.ts.dt.date - daily = df.groupby("date").size().reset_index(name="prs") - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - daily["prs"] = daily["prs"].fillna(0).astype(int) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily PR DataFrame rows=%d.", len(daily)) - return daily - - -def build_daily_loc_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily LOC additions and deletions for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, additions, deletions, repo, user - """ - since, until = period - # Fetch raw LOC stats list. - stats_list = get_loc_stats_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - # If no stats, return zeros for full range. - if not stats_list: - all_days = pd.DataFrame({"date": days_between(period)}) - # Initialize zeroes. - all_days["additions"] = all_days["date"].apply(lambda _: 0) - all_days["deletions"] = all_days["date"].apply(lambda _: 0) - # Format signs. - all_days["additions"] = ( - all_days["additions"].astype(str).apply(lambda x: "+" + x) - ) - all_days["deletions"] = ( - all_days["deletions"].astype(str).apply(lambda x: "-" + x) - ) - # Add context. - all_days["repo"] = repo - all_days["user"] = username - # TODO(*): Logging-248: Use `_LOG.debug()` instead of `_LOG.info()` for tracing execution. - _LOG.debug("Built daily LOC DataFrame rows=%d (no data).", len(all_days)) - return all_days - # Otherwise build from stats_list. - df = pd.DataFrame(stats_list) - df["date"] = pd.to_datetime(df["date"]).dt.date - # Sum per date. - daily = df.groupby("date")[["additions", "deletions"]].sum().reset_index() - # Ensure full date coverage. - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - # Fill missing and integerize. - daily[["additions", "deletions"]] = ( - daily[["additions", "deletions"]].fillna(0).astype(int) - ) - # Apply sign formatting. - daily["additions"] = daily["additions"].astype(str).apply(lambda x: "+" + x) - daily["deletions"] = daily["deletions"].astype(str).apply(lambda x: "-" + x) - # Add context. - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily LOC DataFrame rows=%d.", len(daily)) - return daily - - -def get_total_loc_for_period( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> Dict[str, int]: - """ - Get total LOC additions and deletions for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: additions and deletions totals - """ - since, until = period - stats = get_loc_stats_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - total_add = sum(item["additions"] for item in stats) - total_del = sum(item["deletions"] for item in stats) - _LOG.info( - "Total LOC for %s/%s user=%s => +%d -%d.", - org, - repo, - username, - total_add, - total_del, - ) - return {"additions": total_add, "deletions": total_del} - - -def prefetch_periodic_user_repo_data( - client, - org: str, - repos: List[str], - users: List[str], - period: Tuple[datetime.datetime, datetime.datetime], -) -> None: - """ - Prefetch and cache commits, PRs, LOC, issues, comments, and reviews for - each user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repos: repository names - :param users: GitHub usernames - :param period: start and end datetime objects - """ - # Validate input types. - if not isinstance(org, str): - raise ValueError(f"org must be a string, got {type(org).__name__}") - if not isinstance(repos, list) or not all(isinstance(r, str) for r in repos): - raise ValueError("repos must be a list of strings") - if not isinstance(users, list) or not all(isinstance(u, str) for u in users): - raise ValueError("users must be a list of strings") - # Initialize timer and pair up (repo, user) combinations. - start = time.time() - count = 0 - since, until = period - user_repo_pairs = list(itertools.product(repos, users)) - # Prefetch and cache GitHub data for each user-repo pair. - for repo, user in tqdm(user_repo_pairs, desc="Prefetching user-repo data"): - commits = get_commit_datetimes_by_repo_period_intrinsic( - client, org, repo, user, since, until - ) - prs = get_pr_datetimes_by_repo_period_intrinsic( - client, org, repo, user, since, until - ) - locs = get_loc_stats_by_repo_period_intrinsic( - client, org, repo, user, since, until - ) - issues = get_issue_datetimes_by_repo_intrinsic( - client, org, repo, user, period - ) - # issue_comments = get_issue_comment_datetimes_by_repo_period_intrinsic( - # client, org, repo, user, since, until - # ) - # pr_reviews = get_pr_review_datetimes_by_repo_period_intrinsic( - # client, org, repo, user, since, until - # ) - issue_comments = [] - pr_reviews = [] - _LOG.info( - "%s/%s: %d commits, %d PRs, %d LOC entries, %d issues assigned, " - "%d closed, %d issue comments, %d PR reviews", - repo, - user, - len(commits), - len(prs), - len(locs), - len(issues["assigned"]), - len(issues["closed"]), - len(issue_comments), - len(pr_reviews), - ) - count += 1 - # Report overall prefetch duration. - elapsed = time.time() - start - _LOG.info( - "Prefetched %d user-repo combos in %.2f seconds for period %s to %s.", - count, - elapsed, - period[0], - period[1], - ) - - -def build_daily_issue_comment_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily issue comment counts for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, issue_comments, repo, user - """ - since, until = period - timestamps = get_issue_comment_datetimes_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) - df["date"] = df.ts.dt.date - daily = df.groupby("date").size().reset_index(name="issue_comments") - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - daily["issue_comments"] = daily["issue_comments"].fillna(0).astype(int) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily issue comment DataFrame rows=%d.", len(daily)) - return daily - - -def build_daily_pr_review_df( - client, - org: str, - repo: str, - username: str, - period: Tuple[datetime.datetime, datetime.datetime], -) -> pd.DataFrame: - """ - Build daily PR review counts for user and repo over period. - - :param client: authenticated PyGithub client - :param org: GitHub org name - :param repo: repository name - :param username: GitHub username - :param period: start and end datetime objects - :return: data with date, pr_reviews, repo, user - """ - since, until = period - timestamps = get_pr_review_datetimes_by_repo_period_intrinsic( - client, org, repo, username, since, until - ) - df = pd.DataFrame({"ts": pd.to_datetime(timestamps)}) - df["date"] = df.ts.dt.date - daily = df.groupby("date").size().reset_index(name="pr_reviews") - all_days = pd.DataFrame({"date": days_between(period)}) - daily = all_days.merge(daily, on="date", how="left") - daily["pr_reviews"] = daily["pr_reviews"].fillna(0).astype(int) - daily["repo"] = repo - daily["user"] = username - _LOG.debug("Built daily PR review DataFrame rows=%d.", len(daily)) - return daily - - -def collect_all_metrics( - client, - org: str, - repos: List[str], - users: List[str], - period: Tuple[datetime.datetime, datetime.datetime], - *, - skip_issue_comments: bool = True, - skip_pr_reviews: bool = True, -) -> pd.DataFrame: - """ - Collect daily metrics for all user-repo combinations. - - :param client: authenticated PyGithub client - :param org: Github org name - :param repos: repository names - :param users: github usernames - :param period: start and end datetime - :param skip_issue_comments: skip fetching issue comments (for speed) - :param skip_pr_reviews: skip fetching PR reviews (for speed) - :return: concatenated data with date, commits, prs, additions, - deletions, issues_assigned, issues_closed, issue_comments - (optional), pr_reviews (optional), repo, user - """ - combined_frames: List[pd.DataFrame] = [] - # Track progress. - total_combinations = len(repos) * len(users) - current = 0 - for repo in repos: - # Ensure repo is a string. - if not isinstance(repo, str): - raise ValueError(f"Expected repo to be a string but got {repo!r}") - for user in users: - # Ensure user is a string. - if not isinstance(user, str): - raise ValueError( - f"Expected user to be a string but got {user!r}" - ) - current += 1 - _LOG.info( - "Processing %d/%d: %s/%s", - current, - total_combinations, - repo, - user, - ) - # Build each metric DataFrame. - df_c = build_daily_commit_df(client, org, repo, user, period) - df_p = build_daily_pr_df(client, org, repo, user, period) - df_l = build_daily_loc_df(client, org, repo, user, period) - df_i = build_daily_issue_df(client, org, repo, user, period) - # Start merging with required metrics. - df = ( - df_c.merge(df_p, on=["date", "repo", "user"], how="inner") - .merge(df_l, on=["date", "repo", "user"], how="inner") - .merge(df_i, on=["date", "repo", "user"], how="inner") - ) - # Optionally add issue comments. - if not skip_issue_comments: - df_ic = build_daily_issue_comment_df( - client, org, repo, user, period - ) - df = df.merge(df_ic, on=["date", "repo", "user"], how="inner") - else: - # Add dummy column for consistency. - df["issue_comments"] = 0 - # Optionally add PR reviews. - if not skip_pr_reviews: - df_pr = build_daily_pr_review_df(client, org, repo, user, period) - df = df.merge(df_pr, on=["date", "repo", "user"], how="inner") - else: - # Add dummy column for consistency. - df["pr_reviews"] = 0 - combined_frames.append(df) - # Concatenate all DataFrames or return empty. - combined = ( - pd.concat(combined_frames, ignore_index=True) - if combined_frames - else pd.DataFrame() - ) - _LOG.info("Collected metrics for %d daily records", len(combined)) - return combined - - -def summarize_user_metrics_for_repo( - combined: pd.DataFrame, repo: str -) -> pd.DataFrame: - """ - Summarize total commits, PRs, LOC, issues, comments, and reviews per user - in a specific repository. - - :param combined: data with all metrics - :param repo: repository name - :return: data with columns user, commits, prs, additions, deletions, - issues_assigned, issues_closed, issue_comments, pr_reviews - """ - df = combined[combined["repo"] == repo].copy() - df["additions"] = df["additions"].str.replace("+", "").astype(int) - df["deletions"] = df["deletions"].str.replace("-", "").astype(int) - df["issues_assigned"] = df["issues_assigned"].astype(int) - df["issues_closed"] = df["issues_closed"].astype(int) - # df["issue_comments"] = df["issue_comments"].astype(int) - # df["pr_reviews"] = df["pr_reviews"].astype(int) - summary = ( - df.groupby("user") - .agg( - commits=pd.NamedAgg(column="commits", aggfunc="sum"), - prs=pd.NamedAgg(column="prs", aggfunc="sum"), - additions=pd.NamedAgg(column="additions", aggfunc="sum"), - deletions=pd.NamedAgg(column="deletions", aggfunc="sum"), - issues_assigned=pd.NamedAgg(column="issues_assigned", aggfunc="sum"), - issues_closed=pd.NamedAgg(column="issues_closed", aggfunc="sum"), - # issue_comments=pd.NamedAgg(column="issue_comments", aggfunc="sum"), - # pr_reviews=pd.NamedAgg(column="pr_reviews", aggfunc="sum"), - ) - .reset_index() - ) - return summary - - -def summarize_repo_metrics_for_user( - combined: pd.DataFrame, user: str -) -> pd.DataFrame: - """ - Summarize total commits, PRs, LOC, issues, comments, and reviews per repo - for a user. - - :param combined: data with all metrics - :param user: GitHub username - :return: columns repo, commits, prs, additions, deletions, - issues_assigned, issues_closed, issue_comments, pr_reviews - """ - df = combined[combined["user"] == user].copy() - df["additions"] = df["additions"].str.replace("+", "").astype(int) - df["deletions"] = df["deletions"].str.replace("-", "").astype(int) - df["issue_comments"] = df["issue_comments"].astype(int) - df["pr_reviews"] = df["pr_reviews"].astype(int) - summary = ( - df.groupby("repo") - .agg( - commits=pd.NamedAgg(column="commits", aggfunc="sum"), - prs=pd.NamedAgg(column="prs", aggfunc="sum"), - additions=pd.NamedAgg(column="additions", aggfunc="sum"), - deletions=pd.NamedAgg(column="deletions", aggfunc="sum"), - issues_assigned=pd.NamedAgg(column="issues_assigned", aggfunc="sum"), - issues_closed=pd.NamedAgg(column="issues_closed", aggfunc="sum"), - issue_comments=pd.NamedAgg(column="issue_comments", aggfunc="sum"), - pr_reviews=pd.NamedAgg(column="pr_reviews", aggfunc="sum"), - ) - .reset_index() - ) - return summary - - -def summarize_users_across_repos( - combined: pd.DataFrame, - users: List[str], - repos: List[str], -) -> pd.DataFrame: - """ - Aggregate commit / PR / LOC / issue / comment / review totals per-user - across a repo subset. - - :param combined: output of `collect_all_metrics` - :param users: GitHub usernames - :param repos: repository names - :return: data with columns user, commits, prs, additions, deletions, - issues_assigned, issues_closed, issue_comments, pr_reviews - """ - # Filter to requested slice. - df = combined[ - combined["user"].isin(users) & combined["repo"].isin(repos) - ].copy() - # Normalise numeric columns. - df["additions"] = df["additions"].str.replace("+", "").astype(int) - df["deletions"] = df["deletions"].str.replace("-", "").astype(int) - df["issue_comments"] = df["issue_comments"].astype(int) - df["pr_reviews"] = df["pr_reviews"].astype(int) - df.rename( - columns={ - "issues_assigned": "issues_assigned", - "issues_closed": "issues_closed", - }, - inplace=True, - errors="ignore", - ) - # Aggregate across repos. - summary = ( - df.groupby("user") - .agg( - commits=("commits", "sum"), - prs=("prs", "sum"), - additions=("additions", "sum"), - deletions=("deletions", "sum"), - issues_assigned=("issues_assigned", "sum"), - issues_closed=("issues_closed", "sum"), - issue_comments=("issue_comments", "sum"), - pr_reviews=("pr_reviews", "sum"), - ) - .reset_index() - ) - return summary - - -def _filter_period( - df: pd.DataFrame, - *, - start: Optional[datetime.datetime] = None, - end: Optional[datetime.datetime] = None, -) -> pd.DataFrame: - """ - Slice a DataFrame by date using optional start and end boundaries. - - :param df: data with a 'date' column - :param start: start datetime (inclusive) - :param end: end datetime (inclusive) - :return: filtered data such that start ≤ date ≤ end - """ - if not pd.api.types.is_datetime64_any_dtype(df["date"]): - df = df.copy() - df["date"] = pd.to_datetime(df["date"]) - if start is not None: - df = df[df["date"] >= start] - if end is not None: - df = df[df["date"] <= end] - return df - - -def _plot_grouped_bars( - summary: pd.DataFrame, - index_col: str, - title: str, - *, - metrics: Optional[List[str]] = None, -) -> None: - """ - Internal helper to render grouped bar plots. - - :param summary: data with one row per category (user or repo), and - one column per metric - :param index_col: column name(e.g., "user" or "repo") - :param metrics: subset of metrics to plot (e.g., ["commits", "prs"]) - :param title: chart title - """ - # Validate and prepare the list of metrics to plot. - default_metrics = [ - "commits", - "prs", - "additions", - "deletions", - "issues_assigned", - "issues_closed", - "issue_comments", - "pr_reviews", - ] - to_plot = metrics if metrics else default_metrics - for m in to_plot: - if m not in default_metrics: - raise ValueError(f"Unsupported metric '{m}'") - # Filter to only metrics that exist in the summary. - to_plot = [m for m in to_plot if m in summary.columns] - # Compute layout parameters. - categories = summary[index_col].tolist() - x = range(len(to_plot)) - n_cat = len(categories) - width = 0.8 / n_cat if n_cat else 0.8 - # Plot bars for each category (user or repo). - fig_width = max(12, len(to_plot) * 1.5) - fig, ax = plt.subplots(figsize=(fig_width, 5)) - for idx, cat in enumerate(categories): - values = ( - summary.loc[summary[index_col] == cat, to_plot].astype(int).iloc[0] - ) - pos = [i + idx * width for i in x] - bars = ax.bar(pos, values, width=width, label=str(cat)) - for b in bars: - ax.text( - b.get_x() + b.get_width() / 2, - b.get_height(), - str(int(b.get_height())), - ha="center", - va="bottom", - fontsize=8, - ) - # Finalize plot aesthetics. - ax.set_xticks([i + width * (n_cat - 1) / 2 for i in x]) - ax.set_xticklabels( - [m.replace("_", " ").title() for m in to_plot], rotation=45, ha="right" - ) - ax.set_ylabel("Count") - ax.set_title(title) - ax.legend(title=index_col.replace("_", " ").title()) - plt.tight_layout() - plt.show() - - -def plot_metrics_by_user( - combined: pd.DataFrame, - repo: str, - *, - start: Optional[datetime.datetime] = None, - end: Optional[datetime.datetime] = None, - users: Optional[List[str]] = None, - metrics: Optional[List[str]] = None, -) -> None: - """ - Plot selected metrics for users in one repo. - - :param combined: output from `collect_all_metrics` - :param repo: repository name - :param start: start datetime (inclusive) - :param end: end datetime (inclusive) - :param users: optional subset of GitHub usernames to show - :param metrics: list of metrics to plot; defaults to all numeric columns - :return: grouped bar chart where each group = metric, each bar = user - """ - df_period = _filter_period(df=combined, start=start, end=end) - summary = summarize_user_metrics_for_repo(df_period, repo) - if users is not None: - summary = summary[summary["user"].isin(users)] - _plot_grouped_bars( - summary, - index_col="user", - metrics=metrics, - title=f"Metric comparison for {repo} " - f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})", - ) - - -def plot_metrics_by_repo( - combined: pd.DataFrame, - user: str, - *, - start: Optional[datetime.datetime] = None, - end: Optional[datetime.datetime] = None, - repos: Optional[List[str]] = None, - metrics: Optional[List[str]] = None, -) -> None: - """ - Plot specified metrics for repos for a single user as grouped bar chart. - - :param combined: data from `collect_all_metrics` - :param user: GitHub username - :param start: start datetime (inclusive) - :param end: end datetime (inclusive) - :param repos: repos to include - :param metrics: metrics to plot; defaults to all numeric columns - :return: grouped bar chart where each group = metric, each bar = repo - """ - df_period = _filter_period(df=combined, start=start, end=end) - summary = summarize_repo_metrics_for_user(df_period, user) - if repos is not None: - summary = summary[summary["repo"].isin(repos)] - _plot_grouped_bars( - summary, - index_col="repo", - metrics=metrics, - title=f"Metric comparison for {user} " - f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})", - ) - - -def plot_multi_metrics_totals_by_user( - combined: pd.DataFrame, - metrics: List[str], - *, - start: Optional[datetime.datetime] = None, - end: Optional[datetime.datetime] = None, - users: Optional[List[str]] = None, - repos: Optional[List[str]] = None, -) -> None: - """ - Plot multiple metrics (summed across repos) per user as grouped bars. - - :param combined: data from `collect_all_metrics` - :param metrics: metrics to plot, e.g. ["commits", "prs", "additions"] - :param start: start datetime (inclusive) - :param end: end datetime (inclusive) - :param users: users to include - :param repos: repos to include - :return: grouped bar chart where each group = user, each bar = one metric - """ - df_period = _filter_period(df=combined, start=start, end=end) - # Aggregate totals for each user across the selected repos. - summary = summarize_users_across_repos( - df_period, - users or df_period["user"].unique().tolist(), - repos or df_period["repo"].unique().tolist(), - ) - if users is not None: - summary = summary[summary["user"].isin(users)] - # Validate metrics exist. - for metric in metrics: - if metric not in summary.columns: - raise ValueError(f"Metric '{metric}' not found in summary columns") - # Set up bar positions and sizing. - users_sorted = summary["user"].tolist() - x = range(len(users_sorted)) - width = 0.8 / len(metrics) if metrics else 0.8 - fig_width = max(10, len(users_sorted) * 0.7) - fig, ax = plt.subplots(figsize=(fig_width, 5)) - # Draw bars for each metric across users - for i, metric in enumerate(metrics): - offsets = [pos + i * width for pos in x] - values = ( - summary.set_index("user") - .loc[users_sorted, metric] - .astype(int) - .tolist() - ) - bars = ax.bar( - offsets, values, width=width, label=metric.replace("_", " ").title() - ) - for bar in bars: - ax.text( - bar.get_x() + bar.get_width() / 2, - bar.get_height(), - str(int(bar.get_height())), - ha="center", - va="bottom", - fontsize=8, - ) - # Final plot styling. - ax.set_xticks([pos + width * (len(metrics) - 1) / 2 for pos in x]) - ax.set_xticklabels(users_sorted, rotation=15, ha="right") - ax.set_ylabel("Total count across repos") - ax.set_title( - f"Metric totals across repos by user " - f"({start.date() if start else 'ALL'} -> {end.date() if end else 'ALL'})" - ) - ax.legend() - plt.tight_layout() - plt.show() - - -def get_contributors_for_repo( - client, - org: str, - repo: str, - *, - top_n: Optional[int] = None, -) -> List[str]: - """ - Fetch GitHub usernames of contributors to a repository. - - :param client: authenticated PyGithub client - :param org: GitHub organization name - :param repo: repository name - :param top_n: if specified, return only the top N contributors by - commit count - :return: GitHub usernames - """ - repo_obj = client.get_repo(f"{org}/{repo}") - contributors = repo_obj.get_contributors() - usernames = list() - for idx, user in enumerate(contributors): - if top_n and idx >= top_n: - break - usernames.append(user.login) - _LOG.info("Fetched %d contributors for %s/%s", len(usernames), org, repo) - return usernames - - -def utc_period( - start: str, end: str -) -> Tuple[datetime.datetime, datetime.datetime]: - """ - Construct a UTC datetime period from string inputs. - - :param start: start date e.g. '2025-01-01' - :param end: end date e.g. '2025-05-24' - """ - date = ( - datetime.datetime.fromisoformat(start).replace( - tzinfo=datetime.timezone.utc - ), - datetime.datetime.fromisoformat(end).replace( - tzinfo=datetime.timezone.utc - ), - ) - return date - - -def slice_period( - df: pd.DataFrame, - start: datetime.date, - end: datetime.date, -) -> pd.DataFrame: - """ - Filter a DataFrame by date range. - - :param df: data with a `date` column of type `datetime.date` - :param start: start date for the filtering window - :param end: end date for the filtering window - :return: filtered data within the specified date range - """ - req_period = df[(df["date"] >= start) & (df["date"] <= end)] - return req_period - - -def compute_z_scores(summary: pd.DataFrame, metrics: List[str]) -> pd.DataFrame: - """ - Compute z-score (standardized score) for specified metrics across users. - - This helps assess how far a user's metric is from the group mean in units - of standard deviation. - - :param summary: data with users and raw metric values - :param metrics: metric column names to compute z-scores for - :return: data with added z-score columns suffixed with `_z` - """ - z_df = summary.copy() - for metric in metrics: - mean = z_df[metric].mean() - std = z_df[metric].std() - z_df[metric + "_z"] = (z_df[metric] - mean) / std - return z_df - - -def compute_percentile_ranks( - summary: pd.DataFrame, metrics: List[str] -) -> pd.DataFrame: - """ - Compute percentile rank for each user for the specified metrics. - - Percentile rank reflects the relative standing of a user compared to the - group. For example, a percentile of 0.8 means the user is ahead of 80% - of the group for that metric. - - :param summary: data with users and raw metric values - :param metrics: metric column names - :return: data with added percentile columns suffixed with `_pctile` - """ - perc_df = summary.copy() - for metric in metrics: - perc_df[metric + "_pctile"] = perc_df[metric].rank(pct=True) - return perc_df - - -def visualize_user_metric_comparison( - stats: pd.DataFrame, - *, - score_type: Literal["z", "percentile"] = "z", - top_n: Optional[int] = None, -) -> None: - """ - Visualize user performance across all available metrics using z-scores or - percentiles. - - :param stats: data with user metrics and their standardized scores - :param score_type: "z" for z-scores or "percentile" for relative - percentiles - :param top_n: number of top users to show in leaderboard bar chart - """ - suffix = "_z" if score_type == "z" else "_pctile" - score_cols = [col for col in stats.columns if col.endswith(suffix)] - if not score_cols: - raise ValueError( - f"No columns ending with '{suffix}' found in input DataFrame." - ) - # Stylized table. - import IPython - - IPython.display.display( - stats[["user"] + score_cols] - .set_index("user") - .style.format("{:.2f}") - .background_gradient( - axis=0, cmap="Greens" if score_type == "percentile" else "RdYlGn" - ) - ) - # Leaderboard chart (by average score). - stats["__score_avg__"] = stats[score_cols].mean(axis=1) - if top_n is None: - top_users = stats.sort_values("__score_avg__", ascending=False) - top_n_display = len(top_users) - else: - top_users = stats.sort_values("__score_avg__", ascending=False).head( - top_n - ) - top_n_display = top_n - fig, ax = plt.subplots(figsize=(max(8, 0.5 * len(top_users)), 4)) - ax.bar(top_users["user"], top_users["__score_avg__"], color="skyblue") - ax.set_ylabel( - "Average Score" - + (" (Z-score)" if score_type == "z" else " (Percentile)") - ) - ax.set_title(f"Top {top_n_display} Users by Average {score_type.title()}") - ax.axhline(0 if score_type == "z" else 0.5, color="gray", linestyle="--") - plt.xticks(rotation=15, ha="right") - plt.tight_layout() - plt.show() - stats.drop(columns="__score_avg__", inplace=True) - - -def compute_engagement_score( - summary: pd.DataFrame, - weights: Optional[Dict[str, float]] = None, -) -> pd.DataFrame: - """ - Compute a weighted engagement score for each user based on all metrics. - - :param summary: data with user metrics - :param weights: optional dictionary of metric weights; if None, uses - defaults - :return: summary with an added 'engagement_score' column - """ - # Default weights emphasizing collaboration and code quality. - default_weights = { - "commits": 1.0, - "prs": 2.0, - "additions": 0.001, - "deletions": 0.0005, - "issues_assigned": 0.5, - "issues_closed": 1.5, - "issue_comments": 0.3, - "pr_reviews": 2.5, - } - weights = weights or default_weights - summary = summary.copy() - summary["engagement_score"] = 0 - for metric, weight in weights.items(): - if metric in summary.columns: - summary["engagement_score"] += summary[metric] * weight - # Normalize to 0-100 scale. - max_score = summary["engagement_score"].max() - if max_score > 0: - summary["engagement_score"] = ( - summary["engagement_score"] / max_score * 100 - ).round(2) - summary_sorted = summary.sort_values("engagement_score", ascending=False) - return summary_sorted - - -# ############################################################################# -# PR Statistics -# ############################################################################# - - -def count_open_prs_by_author( - repo_obj, -) -> Dict[str, Dict[str, int]]: - """ - Count open PRs grouped by author and draft/ready status. - - :param repo_obj: PyGithub repository object - :return: dict mapping author -> {"ready": int, "draft": int} - """ - stats: Dict[str, Dict[str, int]] = collections.defaultdict( - lambda: {"ready": 0, "draft": 0} - ) - pulls = repo_obj.get_pulls(state="open") - for pr in pulls: - author = pr.user.login - status = "draft" if pr.draft else "ready" - stats[author][status] += 1 - _LOG.debug("Open PR #%d by %s status=%s", pr.number, author, status) - return dict(stats) - - -def count_closed_prs_by_author( - repo_obj, - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> Dict[str, int]: - """ - Count closed PRs grouped by author, optionally filtered by period. - - :param repo_obj: PyGithub repository object - :param period: optional (start, end) UTC-aware datetimes for filtering - :return: dict mapping author -> count of closed PRs - """ - stats: Dict[str, int] = collections.defaultdict(int) - since, until = normalize_period_to_utc(period) - pulls = repo_obj.get_pulls(state="closed") - for pr in pulls: - # Normalize the PR closed_at timestamp to UTC. - closed_at = pr.closed_at - if closed_at is None: - continue - if closed_at.tzinfo is None: - closed_at = closed_at.replace(tzinfo=datetime.timezone.utc) - else: - closed_at = closed_at.astimezone(datetime.timezone.utc) - # Filter by period if specified. - if since is not None and until is not None: - if not (since <= closed_at <= until): - continue - author = pr.user.login - stats[author] += 1 - _LOG.debug("Closed PR #%d by %s at %s", pr.number, author, closed_at) - return dict(stats) - - -def print_open_pr_stats( - open_stats: Dict[str, Dict[str, int]], -) -> None: - """ - Print open PR statistics by author and draft/ready status. - - :param open_stats: dict mapping author -> {"ready": int, "draft": int} - """ - if not open_stats: - _LOG.info("No open PRs found.") - return - # Sort by total PR count descending. - sorted_authors = sorted( - open_stats.items(), - key=lambda item: item[1]["ready"] + item[1]["draft"], - reverse=True, - ) - total_ready = 0 - total_draft = 0 - header = f"{'Author':<25} {'Ready':>7} {'Draft':>7} {'Total':>7}" - separator = "-" * len(header) - _LOG.info("Open PRs by author:") - _LOG.info(separator) - _LOG.info(header) - _LOG.info(separator) - for author, counts in sorted_authors: - ready = counts["ready"] - draft = counts["draft"] - total = ready + draft - total_ready += ready - total_draft += draft - _LOG.info("%-25s %7d %7d %7d", author, ready, draft, total) - _LOG.info(separator) - _LOG.info( - "%-25s %7d %7d %7d", - "TOTAL", - total_ready, - total_draft, - total_ready + total_draft, - ) - - -def print_closed_pr_stats( - closed_stats: Dict[str, int], - *, - period: Optional[Tuple[datetime.datetime, datetime.datetime]] = None, -) -> None: - """ - Print closed PR statistics by author. - - :param closed_stats: dict mapping author -> count of closed PRs - :param period: optional period used for filtering (for display only) - """ - if not closed_stats: - _LOG.info("No closed PRs found.") - return - # Sort by count descending. - sorted_authors = sorted( - closed_stats.items(), key=lambda item: item[1], reverse=True - ) - period_str = "all time" - if period is not None: - since, until = period - period_str = f"{since.date()} to {until.date()}" - header = f"{'Author':<25} {'Closed':>7}" - separator = "-" * len(header) - _LOG.info("Closed PRs by author (%s):", period_str) - _LOG.info(separator) - _LOG.info(header) - _LOG.info(separator) - total = 0 - for author, count in sorted_authors: - total += count - _LOG.info("%-25s %7d", author, count) - _LOG.info(separator) - _LOG.info("%-25s %7d", "TOTAL", total) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py deleted file mode 100644 index 96c8af1da..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hasyncio.py +++ /dev/null @@ -1,508 +0,0 @@ -""" -Import as: - -import helpers.hasyncio as hasynci -""" - -import asyncio -import contextlib -import datetime -import logging -import math -import time -from typing import ( - Any, - Callable, - Coroutine, - Dict, - Iterator, - List, - Optional, - Tuple, - Union, - cast, -) - -import async_solipsism # type: ignore[import-not-found] -import numpy as np -import pandas as pd - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hprint as hprint - -# Avoid dependency from other `helpers` modules, such as `helpers.hsql`, to prevent -# import cycles. - - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# _EventLoop -# ############################################################################# - - -# TODO(gp): We could make this a mixin and add this behavior to both asyncio and -# async_solipsism event loop. -# TODO(gp): -> _AsyncSolipsismEventLoop -# TODO(gp): Consider injecting a `get_wall_clock_time: hdatetim.GetWallClockTime` -# in the event loop so we can simplify the interfaces. An event loop always needs -# a function to get the wall clock. -class _EventLoop(async_solipsism.EventLoop): - """ - An `async_solipsism.EventLoop` returning also the wall-clock time. - """ - - # TODO(gp): If we pass an `initial_replayed_timestamp` we could incorporate here also - # the replayed time approach and can remove `ReplayedTime` object. - def __init__(self) -> None: - super().__init__() - self._initial_dt = datetime.datetime.utcnow() - - def get_current_time(self) -> datetime.datetime: - # `loop.time()` returns the number of seconds as `float` from when the event - # loop was created. - try: - num_secs = super().time() - except AttributeError: - # Sometimes we call the logger before `async_solipsism` is fully initialized. - # File "/app/amp/helpers/hdatetime.py", line 255, in get_current_time - # timestamp = event_loop.get_current_time() - # File "/app/amp/helpers/hasyncio.py", line 60, in get_current_time - # num_secs = super().time() - # File "/venv/lib/python3.8/site-packages/async_solipsism/loop.py", line 39, in time - # return self._selector.clock.time() - # AttributeError: 'NoneType' object has no attribute 'clock' - # Call stack: - # File "/app/amp/helpers/hcache.py", line 311, in clear_global_cache - # _LOG.info("After clear_global_cache: %s", info_after) - # Message: 'After clear_global_cache: %s' - # Arguments: ("'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan",) - # To avoid the error above we just set the `num_secs` to 0. - num_secs = 0 - return self._initial_dt + datetime.timedelta(seconds=num_secs) - - -# From https://stackoverflow.com/questions/49555991 -@contextlib.contextmanager -def solipsism_context() -> Iterator: - """ - Context manager to isolate an `asyncio_solipsism` event loop. - """ - # Use the variation of solipsistic `EventLoop` above. - event_loop = _EventLoop() - asyncio.set_event_loop(event_loop) - try: - yield event_loop - finally: - asyncio.set_event_loop(None) - - -async def gather_coroutines_with_wall_clock( - event_loop: asyncio.AbstractEventLoop, - *coroutines: Callable[[Any], Coroutine[Any, Any, Any]], -) -> List[Any]: - """ - Inject a wall clock associated to `event_loop` in all the coroutines and - then gathers them in a single coroutine. - """ - get_wall_clock_time = lambda: hdateti.get_current_time( - tz="ET", event_loop=event_loop - ) - # Construct the coroutines here by passing the `get_wall_clock_time()` - # function. - coros_list = [coro(get_wall_clock_time) for coro in coroutines] - # - result: List[Any] = await asyncio.gather(*coros_list) - return result - - -# TODO(gp): For some reason `asyncio.run()` doesn't seem to pick up the new event -# loop. So we use a re-implementation of `run` that does that. -def run( - coroutine: Coroutine, - event_loop: Optional[asyncio.AbstractEventLoop], - *, - close_event_loop: bool = True, -) -> Any: - """ - `asyncio.run()` wrapper that allows to use a specified `EventLoop`. - - :param coroutine: the coroutine to run - :param event_loop: the event loop to use. `None` means the standard `asyncio` - event loop - :param close_event_loop: if False the event loop is not closed, so that we can - run multiple times in the same event loop - :return: same output of `run_until_complete()` - """ - if event_loop is None: - # Use a normal `asyncio` EventLoop. - event_loop = asyncio.new_event_loop() - hdbg.dassert_issubclass(event_loop, asyncio.AbstractEventLoop) - hprint.log_frame(_LOG, "asyncio.run") - try: - ret = event_loop.run_until_complete(coroutine) - finally: - if close_event_loop: - event_loop.close() - return ret - - -# ############################################################################# -# Synchronous / asynchronous polling. -# ############################################################################# - - -# The result of a polling function in terms of a bool indicating success (which -# when True stops the polling) and a result. -PollOutput = Tuple[bool, Any] - -# A polling function accepts any inputs and returns a `PollOutput` in terms of -# (success, result). Typically polling functions don't accept any inputs and are -# built through lambdas and closures. -PollingFunction = Callable[[], PollOutput] - - -def _get_max_num_iterations( - sleep_in_secs: float, - timeout_in_secs: float, -) -> int: - hdbg.dassert_lt(0, sleep_in_secs) - hdbg.dassert_lt(0, timeout_in_secs) - max_num_iter = int(math.ceil(timeout_in_secs / sleep_in_secs)) - hdbg.dassert_lte(1, max_num_iter) - return max_num_iter - - -# TODO(gp): This is probably better implemented with an iterator. -def _poll_iterate( - polling_func: PollingFunction, - sleep_in_secs: float, - timeout_in_secs: float, - get_wall_clock_time: hdateti.GetWallClockTime, - num_iter: int, - max_num_iter: int, - tag: str, -) -> Tuple[int, PollOutput]: - """ - Execute an iteration of the polling loop. - - :return: the number of iterations executed and the output of the - polling function (sucess, return value) - :raises: TimeoutError in case of timeout - """ - _LOG.debug( - "\n## %s: wall clock time=%s: iter=%s/%s", - tag, - get_wall_clock_time(), - num_iter, - max_num_iter, - ) - hdbg.dassert_callable(get_wall_clock_time) - # Poll. - success, value = polling_func() - _LOG.debug("success=%s, value=%s", success, value) - if success: - # If success, then exit. - hprint.log_frame( - _LOG, - "%s: wall clock time=%s: poll done", - tag, - get_wall_clock_time(), - ) - else: - # Otherwise update state. - num_iter += 1 - if num_iter > max_num_iter: - msg = "Timeout for " + hprint.to_str( - "polling_func sleep_in_secs timeout_in_secs tag" - ) - _LOG.error(msg) - raise TimeoutError(msg) - return num_iter, (success, value) - - -# TODO(ai_gp): -> async_poll -async def poll( - polling_func: PollingFunction, - sleep_in_secs: float, - timeout_in_secs: float, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - tag: Optional[str] = None, -) -> Tuple[int, Any]: - """ - Call `polling_func()` every `sleep_in_secs` secs until the polling function - returns success or there is a timeout. A timeout happens if no success is - achieved within `timeout_in_secs` secs. - - :param polling_func: function returning a tuple (success, value) - :return: - - number of iterations before a successful call to `polling_func` - - result from `polling_func` - :raises: TimeoutError in case of timeout - """ - _LOG.debug(hprint.to_str("polling_func sleep_in_secs timeout_in_secs tag")) - if tag is None: - # Use the function calling this function. - tag = hintros.get_function_name(count=0) - max_num_iter = _get_max_num_iterations(sleep_in_secs, timeout_in_secs) - num_iter = 1 - while True: - num_iter, (success, value) = _poll_iterate( - polling_func, - sleep_in_secs, - timeout_in_secs, - get_wall_clock_time, - num_iter, - max_num_iter, - tag, - ) - if success: - return num_iter, value - _LOG.debug("sleep for %s secs", sleep_in_secs) - await asyncio.sleep(sleep_in_secs) - - -def sync_poll( - polling_func: PollingFunction, - sleep_in_secs: float, - timeout_in_secs: float, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - tag: Optional[str] = None, -) -> Tuple[int, Any]: - """ - Same interface and behavior of `poll()` but using a synchronous - implementation. - """ - _LOG.debug(hprint.to_str("polling_func sleep_in_secs timeout_in_secs tag")) - if tag is None: - # Use the function calling this function. - tag = hintros.get_function_name(count=0) - max_num_iter = _get_max_num_iterations(sleep_in_secs, timeout_in_secs) - num_iter = 1 - while True: - num_iter, (success, value) = _poll_iterate( - polling_func, - sleep_in_secs, - timeout_in_secs, - get_wall_clock_time, - num_iter, - max_num_iter, - tag, - ) - if success: - return success, value - _LOG.debug("sleep for %s secs", sleep_in_secs) - time.sleep(sleep_in_secs) - - -def get_poll_kwargs( - get_wall_clock_time: hdateti.GetWallClockTime, - *, - # TODO(ai_gp): Avoid using defaults. - sleep_in_secs: float = 1.0, - timeout_in_secs: float = 10.0, -) -> Dict[str, Any]: - hdbg.dassert_lt(0, sleep_in_secs) - hdbg.dassert_lt(0, timeout_in_secs) - hdbg.dassert_callable(get_wall_clock_time) - poll_kwargs = { - "sleep_in_secs": sleep_in_secs, - "timeout_in_secs": timeout_in_secs, - "get_wall_clock_time": get_wall_clock_time, - } - return poll_kwargs - - -# ############################################################################# -# Wait. -# ############################################################################# - - -# Represent a deterministic, if float, or random delay in [a, b] if a Tuple. -# All values are in seconds. -WaitInSecs = Union[float, Tuple[float, float]] - - -async def sleep( - delay_in_secs: WaitInSecs, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - # TODO(gp): -> msg - tag: Optional[str] = None, - # TODO(gp): How to handle random seed here? - seed: int = 42, -) -> None: - """ - Wait a deterministic or a randomized delay. - """ - if tag is None: - # Use the name of the function calling this function. - tag = hintros.get_function_name(count=0) - # Extract or compute the delay. - if isinstance(delay_in_secs, (int, float)): - # Deterministic delay. - pass - elif isinstance(delay_in_secs, tuple): - # Randomized delay. - hdbg.dassert_eq(len(delay_in_secs), 2) - min_, max_ = delay_in_secs - hdbg.dassert_lte(0, min_) - hdbg.dassert_lte(min_, max_) - delay_in_secs = np.random.rand(min_, max_) - else: - raise ValueError(f"Invalid delay_in_secs='{delay_in_secs}'") - # Wait. - hprint.log_frame( - _LOG, - "%s: wall_clock_time=%s: started waiting for %s secs", - tag, - get_wall_clock_time(), - delay_in_secs, - ) - hdbg.dassert_lte(0, delay_in_secs) - delay_in_secs = cast(float, delay_in_secs) - await asyncio.sleep(delay_in_secs) - hprint.log_frame( - _LOG, - "%s: wall_clock_time=%s: done waiting for %s secs", - tag, - get_wall_clock_time(), - delay_in_secs, - ) - - -# ////////////////////////////////////////////////////////////////////////////////// - - -def get_seconds_to_align_to_grid( - bar_duration_in_secs: int, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - add_buffer_in_secs: int = 0, -) -> Tuple[pd.Timestamp, float]: - """ - Given the current time return the amount of seconds to wait to align on a - grid with period `bar_duration_in_secs`. - - E.g., current_time=9:31:02am, bar_duration_in_secs=120 -> return 58 - - :param add_buffer_in_secs: number of seconds to add to make sure we - are right after the grid time - """ - hdbg.dassert_lte(0, add_buffer_in_secs) - current_time = get_wall_clock_time() - _LOG.debug("current_time=%s ...", current_time) - # Align on the time grid. - hdbg.dassert_isinstance(bar_duration_in_secs, int) - hdbg.dassert_lt(0, bar_duration_in_secs) - freq = f"{bar_duration_in_secs}S" - target_time = current_time.ceil(freq) - hdbg.dassert_lte(current_time, target_time) - _LOG.debug("target_time=%s", target_time) - secs_to_wait = (target_time - current_time).total_seconds() - # E.g., for - # target_time=2022-07-11 11:30:00-04:00 - # curr_time=2022-07-11 11:29:15.129365-04:00 - # The difference is 44secs, so we need to add 1 sec to make sure we pass - # the target time. - secs_to_wait += add_buffer_in_secs - return target_time, secs_to_wait - - -def _wait_until( - wait_until_timestamp: pd.Timestamp, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - tag: Optional[str] = None, -) -> float: - """ - Return amount of seconds to wait for. - - More accurate version of _wait_until, uses total_seconds() which - allows for returning fractional second values. - """ - if tag is None: - # Use the name of the function calling this function. - tag = hintros.get_function_name(count=2) - curr_timestamp = get_wall_clock_time() - _LOG.debug( - "wait_until_timestamp=%s, curr_timestamp=%s", - wait_until_timestamp, - curr_timestamp, - ) - # We can only wait for times in the future. - if curr_timestamp > wait_until_timestamp: - _LOG.warning( - "curr_timestamp=%s, wait_until_timestamp=%s is in the future: " - "continuing ", - curr_timestamp, - wait_until_timestamp, - ) - time_in_secs = 0 - else: - time_in_secs = (wait_until_timestamp - curr_timestamp).total_seconds() - _LOG.debug( - "%s: wall_clock_time=%s: sleep for %s secs", - tag, - get_wall_clock_time(), - time_in_secs, - ) - return time_in_secs - - -def sync_wait_until( - wait_until_timestamp: pd.Timestamp, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - tag: Optional[str] = None, - log_verbosity: int = logging.DEBUG, -) -> None: - """ - Synchronous wait until the wall clock time is `timestamp`. - - More accurate version of sync_wait_until allowing to wait for - fractional seconds. - """ - # Sync wait. - time_in_secs = _wait_until( - wait_until_timestamp, get_wall_clock_time, tag=tag - ) - hdbg.dassert_lte(0, time_in_secs) - # TODO(gp): Consider using part of align_on_time_grid for high-precision clock. - time.sleep(time_in_secs) - # - hprint.log_frame( - _LOG, - "%s: wall_clock_time=%s: done waiting", - tag, - get_wall_clock_time(), - verbosity=log_verbosity, - ) - - -async def async_wait_until( - wait_until_timestamp: pd.Timestamp, - get_wall_clock_time: hdateti.GetWallClockTime, - *, - # TODO(gp): -> msg - tag: Optional[str] = None, -) -> None: - """ - Asynchronous wait until the wall clock time is `timestamp`. - """ - _LOG.debug(hprint.to_str("wait_until_timestamp")) - time_in_secs = _wait_until( - wait_until_timestamp, get_wall_clock_time, tag=tag - ) - # Async wait. - hdbg.dassert_lte(0, time_in_secs) - await asyncio.sleep(time_in_secs) - # - hprint.log_frame( - _LOG, "%s: wall_clock_time=%s: done waiting", tag, get_wall_clock_time() - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py deleted file mode 100644 index e010f5b08..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/haws.py +++ /dev/null @@ -1,266 +0,0 @@ -""" -Import as: - -import helpers.haws as haws -""" - -import logging -from typing import Dict, List, Optional - -import boto3 -import boto3.session -from boto3.resources.base import ServiceResource -from botocore.client import BaseClient - -import helpers.hdbg as hdbg -import helpers.hserver as hserver - -_LOG = logging.getLogger(__name__) - - -# AWS profile is used as a mechanism to differentiate between different AWS accounts. -# See CmampTask12943. -# `test` and `preprod` environments are in the same account using `ck` profile. -# `prod` environment is in the different account using `csfy` profile. -AWS_PROFILE = { - "test": "ck", - "preprod": "ck", - "prod": "csfy", -} - -# ############################################################################# -# Utils -# ############################################################################# - - -def get_session( - aws_profile: str, *, region: Optional[str] = None -) -> boto3.session.Session: - """ - Return connected Boto3 session. - - :param aws_profile: AWS profile name to use for the session. - :param region: AWS region, if None get region from AWS credentials. - :return: Boto3 session object. - """ - hdbg.dassert_isinstance(aws_profile, str) - # When deploying jobs via ECS the container obtains credentials based on - # passed task role specified in the ECS task-definition, refer to: - # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html - if aws_profile in ["ck", "csfy"] and hserver.is_inside_ecs_container(): - _LOG.info("Fetching credentials from task IAM role") - session = boto3.session.Session() - else: - # We do not need to extract the credential from the file because - # the credential is already set and `boto3` know where to find them. - if region: - session = boto3.Session(profile_name=aws_profile, region_name=region) - else: - session = boto3.Session(profile_name=aws_profile) - return session - - -def get_service_client( - aws_profile: str, service_name: str, *, region: Optional[str] = None -) -> BaseClient: - """ - Return client to work with desired service in the specific region. - - For params look at `get_session()` - """ - session = get_session(aws_profile, region=region) - client = session.client(service_name=service_name) - return client - - -def get_service_resource(aws_profile: str, service_name: str) -> ServiceResource: - """ - Return resource to work with desired service in the specific region. - """ - session = get_session(aws_profile) - resource = session.resource(service_name=service_name) - return resource - - -# ############################################################################# -# ECS -# ############################################################################# - - -# TODO(Toma): Deprecate in favor of `get_service_client`. -def get_ecs_client( - aws_profile: str, *, region: Optional[str] = None -) -> BaseClient: - """ - Return client to work with Elastic Container Service in the specific - region. - - For params look at `get_session()` - """ - session = get_session(aws_profile, region=region) - client = session.client(service_name="ecs") - return client - - -def get_task_definition_image_url( - task_definition_name: str, environment: str, *, region: Optional[str] = None -) -> str: - """ - Get ECS task definition by name and return only image URL. - - :param task_definition_name: The name of the ECS task definition, - e.g., `cmamp-test`. - :param region: AWS region, if None get region from AWS credentials. - :param region: look at `get_session()` - """ - aws_profile = AWS_PROFILE[environment] - service_name = "ecs" - client = get_service_client(aws_profile, service_name, region=region) - # Get the last revision of the task definition. - task_description = client.describe_task_definition( - taskDefinition=task_definition_name - ) - task_definition_json = task_description["taskDefinition"] - image_url = task_definition_json["containerDefinitions"][0]["image"] - return image_url - - -def is_task_definition_exists( - task_definition_name: str, *, region: Optional[str] = None -) -> bool: - """ - Check if a task definition exists in the specified region. - - :param task_definition_name: the name of the ECS task definition - :param region: region of the task definition - :return: whether the task definition exists - """ - client = get_ecs_client("ck", region=region) - try: - client.describe_task_definition(taskDefinition=task_definition_name) - return True - except client.exceptions.ClientError as e: - _LOG.warning( - "Failed to describe task definition '%s': %s", - task_definition_name, - e, - ) - return False - - -# TODO(Nikola): Pass a dict config instead, so any part can be updated. -def update_task_definition( - task_definition_name: str, - new_image_url: str, - *, - region: Optional[str] = None, - environment: str, -) -> None: - """ - Create the new revision of specified ECS task definition. - - If region is different then the default one, it is assumed that ECR - replication is enabled from the default region to the target region. - - :param task_definition_name: The name of the ECS task definition for - which an update to container image URL is made, e.g., `cmamp- - test`. - :param new_image_url: New image URL for task definition. e.g., - `***.dkr.ecr.***/cmamp:prod`. - :param region: AWS region, if None get region from AWS credentials. - """ - aws_profile = AWS_PROFILE[environment] - client = get_ecs_client(aws_profile, region=region) - # Get the last revision of the task definition. - task_description = client.describe_task_definition( - taskDefinition=task_definition_name - ) - task_definition_json = task_description["taskDefinition"] - # Set new image. - old_image_url = task_definition_json["containerDefinitions"][0]["image"] - if old_image_url == new_image_url: - _LOG.info( - "New image url `%s` is already set for task definition `%s`!", - new_image_url, - task_definition_name, - ) - return - task_definition_json["containerDefinitions"][0]["image"] = new_image_url - # Register the new revision with the new image. - response = client.register_task_definition( - family=task_definition_name, - taskRoleArn=task_definition_json.get("taskRoleArn", ""), - executionRoleArn=task_definition_json["executionRoleArn"], - networkMode=task_definition_json["networkMode"], - containerDefinitions=task_definition_json["containerDefinitions"], - volumes=task_definition_json["volumes"], - placementConstraints=task_definition_json["placementConstraints"], - requiresCompatibilities=task_definition_json["requiresCompatibilities"], - cpu=task_definition_json["cpu"], - memory=task_definition_json["memory"], - ) - updated_image_url = response["taskDefinition"]["containerDefinitions"][0][ - "image" - ] - # Check if the image URL is updated. - hdbg.dassert_eq(updated_image_url, new_image_url) - _LOG.info( - "The image URL of `%s` task definition is updated to `%s`", - task_definition_name, - updated_image_url, - ) - - -def list_all_objects( - s3_client: BaseClient, bucket_name: str, prefix: str -) -> List[Dict]: - """ - List all objects in the specified S3 bucket under the given prefix, - handling pagination. - - :param s3_client: Instance of boto3 S3 client. - :param bucket_name: The name of the S3 bucket e.g., `cryptokaizen-data-test`. - :param prefix: Prefix to filter the S3 objects e.g., `binance/historical_bid_ask/`. - :return: A list of dictionaries containing metadata about each object. E.g., - ``` - [ - { - 'Key': 'binance/historical_bid_ask/S_DEPTH/1000BONK_USDT/2023-05-27/data.tar.gz', - 'LastModified': datetime.datetime(2024, 5, 30, 17, 12, 12, tzinfo=tzlocal()), - 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', - 'Size': 0, - 'StorageClass': 'STANDARD' - }, - { - 'Key': 'binance/historical_bid_ask/S_DEPTH/1000BONK_USDT/2023-05-28/data.tar.gz', - 'LastModified': datetime.datetime(2024, 5, 30, 17, 12, 12, tzinfo=tzlocal()), - 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', - 'Size': 0, - 'StorageClass': 'STANDARD' - } - ] - ``` - """ - objects = [] - continuation_token = None - while True: - # If there's a continuation token, include it in the request to fetch - # the next page of results. - if continuation_token: - response = s3_client.list_objects_v2( - Bucket=bucket_name, - Prefix=prefix, - ContinuationToken=continuation_token, - ) - else: - response = s3_client.list_objects_v2( - Bucket=bucket_name, Prefix=prefix - ) - # Extend the objects list with the contents of the current page. - objects.extend(response.get("Contents", [])) - # Check if there are more pages. - if response.get("IsTruncated"): - continuation_token = response.get("NextContinuationToken") - else: - break - return objects diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py deleted file mode 100644 index d72a2f708..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache.py +++ /dev/null @@ -1,1086 +0,0 @@ -""" -See `docs/coding/all.hcache.explanation.md` for implementation details. - -Import as: - -import helpers.hcache as hcache -""" - -import atexit -import copy -import functools -import logging -import os -import time -from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast - -import joblib -import joblib.func_inspect as jfunci -import joblib.memory as jmemor - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hintrospection as hintros -import helpers.hlogging as hloggin -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hsystem as hsystem -import helpers.htimer as htimer - -_LOG = hloggin.getLogger(__name__) -# Enable extra verbose debugging. Do not commit. -_TRACE = False - -# ############################################################################# - - -_IS_CACHE_ENABLED: bool = True - - -def enable_caching(val: bool) -> None: - """ - Enable or disable all caching, i.e., global, tagged global, function- - specific. - """ - global _IS_CACHE_ENABLED - if _TRACE: - _LOG.trace("") - _LOG.warning("Setting caching to %s -> %s", _IS_CACHE_ENABLED, val) - _IS_CACHE_ENABLED = val - - -def is_caching_enabled() -> bool: - """ - Check if cache is enabled. - - :return: whether the cache is enabled or not - """ - if _TRACE: - _LOG.trace("") - return _IS_CACHE_ENABLED - - -# Global switch to allow or prevent clearing the cache. -_IS_CLEAR_CACHE_ENABLED: bool = True - - -def enable_clear_cache(val: bool) -> None: - """ - Enable or disable clearing a cache (both global and function-specific). - """ - global _IS_CLEAR_CACHE_ENABLED - if _TRACE: - _LOG.trace("") - _LOG.warning( - "Enabling clear cache to %s -> %s", _IS_CLEAR_CACHE_ENABLED, val - ) - _IS_CLEAR_CACHE_ENABLED = val - - -# ############################################################################# -# Global cache interface -# ############################################################################# - - -def _get_cache_types() -> List[str]: - """ - Return the types (aka levels) of the cache. - """ - return ["mem", "disk"] - - -def _dassert_is_valid_cache_type(cache_type: str) -> None: - """ - Assert that `cache_type` is a valid cache type. - """ - hdbg.dassert_in(cache_type, _get_cache_types()) - - -def _get_global_cache_name(cache_type: str, tag: Optional[str] = None) -> str: - """ - Get the canonical cache name for a type of cache and tag, both global and - function-specific. - - E.g., `tmp.cache.{cache_type}.{tag}` like `tmp.cache.mem.unit_tests` - - :param cache_type: type of a cache - :param tag: optional unique tag of the cache - :return: name of the folder for a cache - """ - _dassert_is_valid_cache_type(cache_type) - cache_name = f"tmp.cache.{cache_type}" - if tag is not None: - cache_name += f".{tag}" - return cache_name - - -def _get_global_cache_path(cache_type: str, tag: Optional[str] = None) -> str: - """ - Get path to the directory storing the cache. - - For a memory cache, the path is in a predefined RAM disk. - For a disk cache, the path is on the file system relative to Git root. - - :return: the file system path to the cache - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - # Get the cache name. - cache_name = _get_global_cache_name(cache_type, tag) - # Get the enclosing directory path. - if cache_type == "mem": - if hsystem.get_os_name() == "Darwin": - root_path = "/tmp" - else: - root_path = "/mnt/tmpfs" - elif cache_type == "disk": - root_path = hgit.get_client_root(super_module=True) - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - # Compute path. - file_name = os.path.join(root_path, cache_name) - file_name = os.path.abspath(file_name) - return file_name - - -def _get_cache_size(path: str, description: str) -> str: - """ - Report information about a cache (global or function) stored at a given - path. - """ - if _TRACE: - _LOG.trace("") - if path is None: - txt = f"'{description}' cache: path='{path}' doesn't exist yet" - else: - if os.path.exists(path): - size_in_bytes = hsystem.du(path) - if isinstance(size_in_bytes, str): - size_as_str = size_in_bytes - else: - size_as_str = hintros.format_size(size_in_bytes) - else: - size_as_str = "nan" - # TODO(gp): Compute number of files. - txt = f"'{description}' cache: path='{path}', size={size_as_str}" - return txt - - -def get_global_cache_info( - tag: Optional[str] = None, add_banner: bool = False -) -> str: - """ - Report information on global cache. - """ - if _TRACE: - _LOG.trace("") - txt = [] - if add_banner: - txt.append(hprint.frame("get_global_cache_info()", char1="<")) - txt.append(f"is global cache enabled={is_caching_enabled()}") - # - cache_types = _get_cache_types() - txt.append(f"cache_types={str(cache_types)}") - for cache_type in cache_types: - path = _get_global_cache_path(cache_type, tag=tag) - description = f"global {cache_type}" - cache_info = _get_cache_size(path, description) - txt.append(cache_info) - txt = "\n".join(txt) - return txt - - -# This is the global memory cache. -_MEMORY_CACHE: Optional[joblib.Memory] = None - - -# This is the global disk cache. -_DISK_CACHE: Optional[joblib.Memory] = None - - -def _create_global_cache_backend( - cache_type: str, tag: Optional[str] = None -) -> joblib.Memory: - """ - Create a Joblib memory object storing a cache. - - :return: cache backend object - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - dir_name = _get_global_cache_path(cache_type, tag) - _LOG.debug( - "Creating cache for cache_type='%s' and tag='%s' at '%s'", - cache_type, - tag, - dir_name, - ) - cache_backend = joblib.Memory(dir_name, verbose=0, compress=True) - return cache_backend - - -# TODO(gp): -> _get_global_cache -def get_global_cache( - cache_type: str, tag: Optional[str] = None -) -> joblib.Memory: - """ - Get global cache by cache type. - - :return: caching backend - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - global _MEMORY_CACHE - global _DISK_CACHE - if tag is None: - if cache_type == "mem": - # Create global memory cache if it doesn't exist. - if _MEMORY_CACHE is None: - _MEMORY_CACHE = _create_global_cache_backend(cache_type) - global_cache = _MEMORY_CACHE - elif cache_type == "disk": - # Create global disk cache if it doesn't exist. - if _DISK_CACHE is None: - _DISK_CACHE = _create_global_cache_backend(cache_type) - global_cache = _DISK_CACHE - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - else: - # Build a one-off cache using tag. - global_cache = _create_global_cache_backend(cache_type, tag) - return global_cache - - -def set_global_cache(cache_type: str, cache_backend: joblib.Memory) -> None: - """ - Set global cache by cache type. - - :param cache_type: type of a cache - :param cache_backend: caching backend - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - global _MEMORY_CACHE - global _DISK_CACHE - if cache_type == "mem": - _MEMORY_CACHE = cache_backend - elif cache_type == "disk": - _DISK_CACHE = cache_backend - - -def clear_global_cache( - cache_type: str, tag: Optional[str] = None, destroy: bool = False -) -> None: - """ - Reset the global cache by cache type. - - :param cache_type: type of a cache. `None` to clear all the caches. - :param tag: optional unique tag of the cache, empty by default - :param destroy: remove physical directory - """ - if _TRACE: - _LOG.trace("") - if cache_type == "all": - for cache_type_tmp in _get_cache_types(): - clear_global_cache(cache_type_tmp, tag=tag, destroy=destroy) - return - _dassert_is_valid_cache_type(cache_type) - # Clear and / or destroy the cache `cache_type` with the given `tag`. - cache_path = _get_global_cache_path(cache_type, tag) - if not _IS_CLEAR_CACHE_ENABLED: - hdbg.dfatal(f"Trying to delete cache '{cache_path}'") - description = f"global {cache_type}" - try: - # TODO(ShaopengZ): in some test run outside CK infra, the - # _get_cache_size() hangs. - info_before = _get_cache_size(cache_path, description) - except ValueError: - _LOG.warning("Cache has already been deleted by another process.") - return - _LOG.info("Before clear_global_cache: %s", info_before) - _LOG.warning("Resetting 'global %s' cache '%s'", cache_type, cache_path) - if hs3.is_s3_path(cache_path): - # For now we only allow to delete caches under the unit test path. - _, abs_path = hs3.split_path(cache_path) - hdbg.dassert( - abs_path.startswith("/tmp/cache.unit_test/"), - "The path '%s' is not valid", - abs_path, - ) - if destroy: - _LOG.warning("Destroying '%s' ...", cache_path) - hio.delete_dir(cache_path) - else: - cache_backend = get_global_cache(cache_type, tag) - try: - cache_backend.clear(warn=True) - except FileNotFoundError as e: - # A race condition can cause: - # FileNotFoundError: [Errno 2] No such file or directory: '/app/tmp.cache.disk/joblib' - _LOG.error("Caught %s: continuing", str(e)) - # Report stats before and after. - try: - info_after = _get_cache_size(cache_path, description) - except ValueError: - _LOG.warning("Cache has already been deleted by another process.") - return - _LOG.info("After clear_global_cache: %s", info_after) - - -# ############################################################################# -# CachedValueException -# ############################################################################# - - -class CachedValueException(RuntimeError): - """ - A cached function is run for a value present in the cache. - - This exception is thrown when the `check_only_if_present` mode is - used. - """ - - -# ############################################################################# -# NotCachedValueException -# ############################################################################# - - -class NotCachedValueException(RuntimeError): - """ - A cached function is run for a value not present in the cache. - - This exception is thrown when the `enable_read_only` mode is used. - """ - - -# ############################################################################# -# _Cached -# ############################################################################# - - -class _Cached: - # pylint: disable=protected-access - """ - Implement a cache in memory and disk for a function. - - If the function value was not cached either in memory or on disk, the function - `f()` is executed and the value is stored in both caches for future calls. - - This class uses 2 levels of caching: - - memory cache: useful for caching across multiple executions of a function in - a process or in notebooks without resetting the state - - disk cache: useful for retrieving the state among different executions of a - process or when a notebook is reset - """ - - def _create_function_memory_cache(self) -> joblib.Memory: - """ - Initialize Joblib object storing a memory cache for this function. - """ - if _TRACE: - _LOG.trace("") - _LOG.debug("Create memory cache") - # For memory always use the global cache. - cache_type = "mem" - memory_cache = get_global_cache(cache_type, self._tag) - # Get the Joblib object corresponding to the cached function. - return memory_cache.cache(self._func) - - def _create_function_disk_cache( - self, - ) -> Tuple[joblib.Memory, joblib.memory.MemorizedFunc]: - """ - Initialize Joblib object storing a disk cache for this function. - """ - if _TRACE: - _LOG.trace("") - if self.has_function_cache(): - hdbg.dassert( - not self._use_mem_cache, - "When using function cache the memory cache needs to be disabled", - ) - # Create a function-specific cache. - memory_kwargs: Dict[str, Any] = { - "verbose": 0, - "compress": True, - } - if hs3.is_s3_path(self._disk_cache_path): - import helpers.hjoblib as hjoblib - - # Register the S3 backend. - hjoblib.register_s3fs_store_backend() - s3fs = hs3.get_s3fs(self._aws_profile) - bucket, path = hs3.split_path(self._disk_cache_path) - # Remove the initial `/` from the path that makes the path - # absolute, since `Joblib.Memory` wants a path relative to the - # bucket. - hdbg.dassert( - path.startswith("/"), - "The path should be absolute instead of %s", - path, - ) - path = path[1:] - memory_kwargs.update( - { - "backend": "s3", - "backend_options": {"s3fs": s3fs, "bucket": bucket}, - } - ) - else: - path = self._disk_cache_path - _LOG.debug("path='%s'\nmemory_kwargs=\n%s", path, str(memory_kwargs)) - disk_cache = joblib.Memory(path, **memory_kwargs) - else: - # Use the global cache. - cache_type = "disk" - disk_cache = get_global_cache(cache_type, self._tag) - # Get the Joblib object corresponding to the cached function. - disk_cached_func = disk_cache.cache(self._func) - return disk_cache, disk_cached_func - # - - # /////////////////////////////////////////////////////////////////////////// - - def _reset_cache_tracing(self) -> None: - """ - Reset the values used to track which cache we are hitting when - executing the cached function. - """ - if _TRACE: - _LOG.trace("") - # The reset values depend on which caches are enabled. - self._last_used_disk_cache = self._use_disk_cache - self._last_used_mem_cache = self._use_mem_cache - - # TODO(gp): Either allow users to initialize `mem_cache_path` here or with - # `set_function_cache_path()` but not both code paths. It's unclear which option - # is better. On the one side `set_function_cache_path()` is more explicit, but - # it can't be changed. On the other side the wrapper needs to be initialized in - # one shot. - def __init__( - self, - func: Callable, - *, - use_mem_cache: bool = True, - use_disk_cache: bool = True, - verbose: bool = False, - tag: Optional[str] = None, - disk_cache_path: Optional[str] = None, - aws_profile: Optional[str] = "am", - ): - """ - Construct the class. - - :param func: function to cache - :param use_mem_cache, use_disk_cache: whether we allow memory and disk caching - :param verbose: print high-level information about the cache - behavior, e.g., - - whether a function was cached or not - - from which level the data was retrieved - - the execution time - - the amount of data retrieved - :param tag: a tag added to the global cache path to make it specific (e.g., - when running unit tests we want to use a different cache) - :param disk_cache_path: path of the function-specific cache - :param aws_profile: the AWS profile to use in case of S3 backend - """ - # Make the class have the same attributes (e.g., `__name__`, `__doc__`, - # `__dict__`) as the called function. - functools.update_wrapper(self, func) - if _TRACE: - _LOG.trace("") - # Save interface parameters. - hdbg.dassert_callable(func) - self._func = func - # TODO(gp): We should use memory cache only inside Jupyter notebooks. - self._use_mem_cache = use_mem_cache - self._use_disk_cache = use_disk_cache - self._is_verbose = verbose - self._tag = tag - self._disk_cache_path = disk_cache_path - self._aws_profile = aws_profile - # - self._reset_cache_tracing() - # Create the memory and disk cache objects for this function. - # TODO(gp): We might simplify the code by using a dict instead of 2 variables. - # Store the Joblib memory cache object for this function. - self._memory_cached_func = self._create_function_memory_cache() - # Store the Joblib memory object and the Joblib memory cache object for - # this function. - ( - self._disk_cache, - self._disk_cached_func, - ) = self._create_function_disk_cache() - # Enable a mode where an exception `NotCachedValueException` is thrown if - # the value is not in the cache. - self._enable_read_only = False - # Enable a mode where an exception `NotCachedValueException` is thrown if - # the value is in the cache, instead of accessing the value. - self._check_only_if_present = False - - def get_function_cache_info(self, add_banner: bool = False) -> str: - """ - Return info about the caching properties for this function. - """ - if _TRACE: - _LOG.trace("") - txt = [] - if add_banner: - txt.append(hprint.frame("get_global_cache_info()", char1="<")) - has_func_cache = self.has_function_cache() - txt.append(f"has function-specific cache={has_func_cache}") - if has_func_cache: - # Function-specific cache: print the paths of the local cache. - cache_type = "disk" - txt.append(f"local {cache_type} cache path={self._disk_cache_path}") - txt = "\n".join(txt) - return txt - - def get_last_cache_accessed(self) -> str: - """ - Get the cache used in the latest call of the wrapped function. - - :return: type of cache used in the last call - """ - if _TRACE: - _LOG.trace("") - if self._last_used_mem_cache: - ret = "mem" - elif self._last_used_disk_cache: - # If the disk cache was used, then the memory cache should not been used. - hdbg.dassert(not self._last_used_mem_cache) - ret = "disk" - else: - ret = "no_cache" - return ret - - def enable_read_only(self, val: bool) -> None: - """ - If set to True, the cached function can only read from the cache but - not execute for new values. - - Otherwise a `NotCachedValueException` is thrown. - """ - if _TRACE: - _LOG.trace("") - _LOG.warning( - "Setting enable_read_only to %s -> %s", self._enable_read_only, val - ) - self._enable_read_only = val - - def enable_check_only_if_present(self, val: bool) -> None: - """ - If set to True, the cached function a `CachedValueException` is thrown - if a function invocation was cached, instead of executing it. - - This can be used to check if a value was already cached without - triggering retrieving the value from the cache, e.g., when - probing the content of the cache. - """ - _LOG.warning( - "Setting check_only_if_present to %s -> %s", - self._check_only_if_present, - val, - ) - self._check_only_if_present = val - - def _get_memorized_result(self, cache_type: str) -> joblib.MemorizedResult: - """ - Get the instance of a cache by type. - - From https://github.com/joblib/joblib/blob/master/joblib/memory.py - A `MemorizedResult` is an object representing a cached value - - :param cache_type: type of a cache - :return: instance of the Joblib cache - """ - if _TRACE: - _LOG.trace("") - _dassert_is_valid_cache_type(cache_type) - if cache_type == "mem": - memorized_result = self._memory_cached_func - elif cache_type == "disk": - memorized_result = self._disk_cached_func - _LOG.debug("memorized_result=%s", memorized_result) - return memorized_result - - def _get_function_specific_code_path(self) -> str: - if _TRACE: - _LOG.trace("") - # Get the store backend. - cache_type = "disk" - memorized_result = self._get_memorized_result(cache_type) - store_backend = memorized_result.store_backend - # Get the function id (which is the full path). - func_id = jmemor._build_func_identifier(self._func) - # Assemble the path. - func_path = os.path.join(store_backend.location, func_id, "func_code.py") - _LOG.debug("func_path='%s'", func_path) - hdbg.dassert( - store_backend._item_exists(func_path), "Can't find '%s'", func_path - ) - return func_path - - def update_func_code_without_invalidating_cache(self) -> None: - """ - Update the Python code stored in the cache. - - This is used when we make changes to the cached function but we don't want - to invalidate the cache. - - NOTE: here the caller must guarantee that the new function yields exactly - the same results than the previous ones. Use carefully. - """ - if _TRACE: - _LOG.trace("") - hdbg.dassert( - self.has_function_cache(), - "This is used only for function-specific caches", - ) - # From `store_cached_func_code` in - # https://github.com/joblib/joblib/tree/master/joblib/_store_backends.py - func_path = self._get_function_specific_code_path() - # Archive old code. - new_func_path = ( - func_path + "." + hdateti.get_current_timestamp_as_string(tz="ET") - ) - _LOG.debug("new_func_path='%s'", new_func_path) - # Get the store backend. - cache_type = "disk" - memorized_result = self._get_memorized_result(cache_type) - store_backend = memorized_result.store_backend - hdbg.dassert( - not store_backend._item_exists(new_func_path), - "'%s' already exists", - new_func_path, - ) - store_backend._move_item(func_path, new_func_path) - # Write out function code to the cache. - func_code, _, first_line = jfunci.get_func_code(memorized_result.func) - memorized_result._write_func_code(func_code, first_line) - _LOG.debug("Updated func_path='%s'", func_path) - - # /////////////////////////////////////////////////////////////////////////// - # Function-specific cache. - # /////////////////////////////////////////////////////////////////////////// - - def has_function_cache(self) -> bool: - """ - Return whether this function has a function-specific cache or uses the - global cache. - """ - if _TRACE: - _LOG.trace("") - has_func_cache = self._disk_cache_path is not None - return has_func_cache - - # TODO(gp): Can we reuse the same code for `clear_function_cache` as above? - def clear_function_cache(self, destroy: bool = False) -> None: - """ - Clear a function-specific cache. - """ - if _TRACE: - _LOG.trace("") - hdbg.dassert( - self.has_function_cache(), - "This function has no function-specific cache", - ) - # Get the path for the disk cache. - cache_path = self._disk_cache_path - hdbg.dassert_is_not(cache_path, None) - cache_path = cast(str, cache_path) - if not _IS_CLEAR_CACHE_ENABLED: - hdbg.dfatal(f"Trying to delete function cache '{cache_path}'") - # Collect info before. - cache_type = "disk" - description = f"function {cache_type}" - info_before = _get_cache_size(cache_path, description) - _LOG.info("Before clear_function_cache: %s", info_before) - # Clear / destroy the cache. - _LOG.warning( - "Resetting '%s' cache for function '%s' in dir '%s'", - cache_type, - self._func.__name__, - cache_path, - ) - if hs3.is_s3_path(cache_path): - # For now we only allow to delete caches under the unit test path. - _, abs_path = hs3.split_path(cache_path) - hdbg.dassert( - abs_path.startswith("/tmp/"), - "The path '%s' is not valid", - abs_path, - ) - if destroy: - _LOG.warning("Destroying '%s' ...", cache_path) - hio.delete_dir(cache_path) - else: - self._disk_cache.clear() - # Print stats. - info_after = _get_cache_size(cache_path, description) - _LOG.info("After clear_function_cache: %s", info_after) - - def set_function_cache_path(self, cache_path: Optional[str]) -> None: - """ - Set the path for the function-specific cache for a cache type. - - :param cache_path: cache directory or `None` to use global cache - """ - if _TRACE: - _LOG.trace("") - if cache_path: - hdbg.dassert_dir_exists(cache_path) - # We need to disable the memory cache. - if cache_path: - self._use_mem_cache = False - else: - self._use_mem_cache = True - self._disk_cache_path = cache_path - ( - self._disk_cache, - self._disk_cached_func, - ) = self._create_function_disk_cache() - - # /////////////////////////////////////////////////////////////////////////// - - # TODO(gp): We should use the actual stored dir. - def _get_cache_dir(self, cache_type: str, tag: Optional[str]) -> str: - """ - Return the dir of the cache corresponding to `cache_type` and `tag`. - """ - if _TRACE: - _LOG.trace("") - if cache_type == "no_cache": - return "no_cache" - if self.has_function_cache(): - hdbg.dassert_eq(cache_type, "disk") - ret = self._disk_cache_path - else: - ret = _get_global_cache_path(cache_type, tag=tag) - ret = cast(str, ret) - return ret - - def _get_identifiers( - self, cache_type: str, *args: Any, **kwargs: Any - ) -> Tuple[str, str]: - """ - Get digests for current function and arguments to be used in cache. - - :param cache_type: type of a cache - :param args: original arguments of the call - :param kwargs: original kw-arguments of the call - :return: digests of the function and current arguments - """ - memorized_result = self._get_memorized_result(cache_type) - _LOG.debug("memorized_result=%s", memorized_result) - hdbg.dassert_is_not( - memorized_result, - None, - "Cache backend not initialized for %s", - cache_type, - ) - # This is needed for joblib >= 1.4.2. - func_id = memorized_result.func_id - args_id = memorized_result._get_args_id(*args, **kwargs) - _LOG.debug("func_id=%s args_id=%s", func_id, args_id) - return func_id, args_id - - def _has_cached_version( - self, cache_type: str, func_id: str, args_id: str - ) -> bool: - """ - Check if a cache contains an entry for a corresponding function and - arguments digests, and that function source has not changed. - - :param cache_type: type of a cache - :param func_id: digest of the function obtained from _get_identifiers - :param args_id: digest of arguments obtained from _get_identifiers - :return: whether there is an entry in a cache - """ - if _TRACE: - _LOG.trace("") - memorized_result = self._get_memorized_result(cache_type) - has_cached_version = memorized_result.store_backend.contains_item( - [func_id, args_id] - ) - _LOG.debug("has_cached_version=%s", has_cached_version) - if has_cached_version: - # We must check that the source of the function is the same, otherwise, - # cache tracing will not be correct. - # First, try faster check via joblib hash. - if self._func in jmemor._FUNCTION_HASHES: - func_hash = memorized_result._hash_func() - if func_hash == jmemor._FUNCTION_HASHES[self._func]: - return True - # Otherwise, check the the source of the function is still the same. - func_code, _, _ = jmemor.get_func_code(self._func) - old_func_code_cache = ( - memorized_result.store_backend.get_cached_func_code([func_id]) - ) - old_func_code, _ = jmemor.extract_first_line(old_func_code_cache) - if func_code == old_func_code: - return True - return False - - def _store_cached_version( - self, cache_type: str, func_id: str, args_id: str, obj: Any - ) -> None: - """ - Store returned value from the intrinsic function in the cache. - - :param cache_type: type of a cache - :param func_id: digest of the function obtained from `_get_identifiers()` - :param args_id: digest of arguments obtained from `_get_identifiers()` - :param obj: return value of the intrinsic function - """ - if _TRACE: - _LOG.trace("") - # This corresponds to - # /venv/lib/python3.8/site-packages/joblib/memory.py - # __call__ - if self._enable_read_only: - raise NotCachedValueException - memorized_result = self._get_memorized_result(cache_type) - # Write out function code to the cache. - func_code, _, first_line = jfunci.get_func_code(memorized_result.func) - memorized_result._write_func_code(func_code, first_line) - # Store the returned value into the cache. - memorized_result.store_backend.dump_item([func_id, args_id], obj) - - def _execute_func_from_disk_cache(self, *args: Any, **kwargs: Any) -> Any: - if _TRACE: - _LOG.trace("") - func_info = ( - f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" - ) - # Get the function signature. - func_id, args_id = self._get_identifiers("disk", *args, **kwargs) - if self._has_cached_version("disk", func_id, args_id): - _LOG.debug("There is a disk cached version") - with htimer.TimedScope( - logging.INFO, "Loading cached version from disk" - ): - obj = self._disk_cached_func(*args, **kwargs) - if self._check_only_if_present: - raise CachedValueException(func_info) - else: - # INV: we didn't hit neither memory nor the disk cache. - self._last_used_disk_cache = False - # - _LOG.debug( - "%s: execute the intrinsic function", - func_info, - ) - # If the cache was read-only, then assert. - if self._enable_read_only: - msg = f"{func_info}: trying to execute" - raise NotCachedValueException(msg) - with htimer.TimedScope( - logging.INFO, "Updating cached version on disk" - ): - obj = self._disk_cached_func(*args, **kwargs) - # obj = self._execute_intrinsic_function(*args, **kwargs) - # The function was not cached in disk, so now we need to update the - # memory cache. - # self._store_cached_version("disk", func_id, args_id, obj) - return obj - - def _execute_intrinsic_function(self, *args: Any, **kwargs: Any) -> Any: - if _TRACE: - _LOG.trace("") - with htimer.TimedScope(logging.INFO, "Executing intrinsic function"): - func_info = ( - f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" - ) - _LOG.debug("%s: execute intrinsic function", func_info) - if self._enable_read_only: - msg = f"{func_info}: trying to execute" - raise NotCachedValueException(msg) - obj = self._func(*args, **kwargs) - return obj - - def _execute_func_from_mem_cache(self, *args: Any, **kwargs: Any) -> Any: - """ - Execute the function from memory cache and if not possible try the - lower cache levels. - """ - if _TRACE: - _LOG.trace("") - func_info = ( - f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" - ) - # Get the function signature. - func_id, args_id = self._get_identifiers("mem", *args, **kwargs) - if self._has_cached_version("mem", func_id, args_id): - _LOG.debug("There is a mem cached version") - if self._check_only_if_present: - raise CachedValueException(func_info) - # The function execution was cached in the mem cache. - with htimer.TimedScope( - logging.INFO, "Loading cached version from memory" - ): - obj = self._memory_cached_func(*args, **kwargs) - else: - # INV: we know that we didn't hit the memory cache, but we don't know - # about the disk cache. - _LOG.debug("There is not a mem cached version") - self._last_used_mem_cache = False - # - if self._use_disk_cache: - # Try the disk cache. - _LOG.debug( - "Trying to retrieve from disk", - ) - obj = self._execute_func_from_disk_cache(*args, **kwargs) - else: - _LOG.warning("Skipping disk cache") - obj = self._execute_intrinsic_function(*args, **kwargs) - # The function was not cached in memory, so now we need to update the - # memory cache. - self._store_cached_version("mem", func_id, args_id, obj) - return obj - - def _execute_func(self, *args: Any, **kwargs: Any) -> Any: - if _TRACE: - _LOG.trace("") - func_info = ( - f"{self._func.__name__}(args={str(args)} kwargs={str(kwargs)})" - ) - _LOG.debug( - "%s: use_mem_cache=%s use_disk_cache=%s", - func_info, - self._use_mem_cache, - self._use_disk_cache, - ) - if self._use_mem_cache: - _LOG.debug("Trying to retrieve from memory") - obj = self._execute_func_from_mem_cache(*args, **kwargs) - else: - if self.has_function_cache(): - # For function-specific cache, skipping the memory cache is the - # normal behavior. - _LOG.debug( - "Function has function-specific cache: skipping memory cache" - ) - else: - _LOG.warning("Skipping memory cache") - self._last_used_mem_cache = False - if self._use_disk_cache: - obj = self._execute_func_from_disk_cache(*args, **kwargs) - else: - _LOG.warning("Skipping disk cache") - self._last_used_disk_cache = False - obj = self._execute_intrinsic_function(*args, **kwargs) - return obj - - def __call__(self, *args: Any, **kwargs: Any) -> Any: - """ - Execute the wrapped function using the caches, if needed. - - :return: object returned by the wrapped function - """ - if _TRACE: - _LOG.trace("") - perf_counter_start: float - if self._is_verbose: - perf_counter_start = time.perf_counter() - # Execute the cached function. - if not is_caching_enabled(): - # No caching is allowed: execute the function. - _LOG.warning("All caching is disabled") - self._last_used_disk_cache = self._last_used_mem_cache = False - obj = self._func(*args, **kwargs) - else: - # Caching is allowed. - self._reset_cache_tracing() - obj = self._execute_func(*args, **kwargs) - _LOG.debug( - "%s: executed from '%s'", - self._func.__name__, - self.get_last_cache_accessed(), - ) - # TODO(gp): Not sure making a deep copy is a good idea. In the end, - # the client should not modify a cached value. - obj = copy.deepcopy(obj) - # Print caching info. - if self._is_verbose: - # Get time. - elapsed_time = time.perf_counter() - perf_counter_start - # Get memory. - # TODO(gp): This is very slow. - # obj_size = hintros.get_size_in_bytes(obj) - # obj_size_as_str = hintros.format_size(obj_size) - obj_size_as_str = "nan" - last_cache = self.get_last_cache_accessed() - cache_dir = self._get_cache_dir(last_cache, self._tag) - _LOG.info( - " --> Cache data for '%s' from '%s' cache " - "(size=%s, time=%.2f s, tag=%s, loc=%s)", - self._func.__name__, - last_cache, - obj_size_as_str, - elapsed_time, - self._tag, - cache_dir, - ) - return obj - - -# ############################################################################# -# Decorator -# ############################################################################# - - -def cache( - use_mem_cache: bool = True, - use_disk_cache: bool = True, - set_verbose_mode: bool = False, - tag: Optional[str] = None, - disk_cache_path: Optional[str] = None, - aws_profile: Optional[str] = None, -) -> Union[Callable, _Cached]: - """ - Decorate a function with a cache. - - The parameters are the same as `hcache._Cached`. - - Usage examples: - ``` - import helpers.hcache as hcache - - @hcache.cache() - def add(x: int, y: int) -> int: - return x + y - - @hcache.cache(use_mem_cache=False) - def add(x: int, y: int) -> int: - return x + y - ``` - """ - - def wrapper(func: Callable) -> _Cached: - return _Cached( - func, - use_mem_cache=use_mem_cache, - use_disk_cache=use_disk_cache, - verbose=set_verbose_mode, - tag=tag, - disk_cache_path=disk_cache_path, - aws_profile=aws_profile, - ) - - return wrapper - - -# ############################################################################# - -# Clean up the memory cache on-exit. -# TODO(gp): Add another function and make it silent. -atexit.register(clear_global_cache, cache_type="mem", destroy="true") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py deleted file mode 100644 index 0b3804436..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcache_simple.py +++ /dev/null @@ -1,1963 +0,0 @@ -""" -Detailed documentation at. - -- //helpers/docs/tools/helpers/all.hcache_simple.explanation.md -- //helpers/notebooks/hcache_simple.tutorial.ipynb - -Import as: - -import helpers.hcache_simple as hcacsimp -""" - -import functools -import glob -import json -import logging -import os -import pickle -import re -from typing import Any, Callable, Dict, List, Optional, Union, cast - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - -# Disable tracing for production code. -_LOG.trace = lambda *args, **kwargs: None -# _LOG.trace = _LOG.debug - -# ############################################################################# -# Memory cache. -# ############################################################################# - -# Type for the cache of a single function: key -> value properties. E.g., -# ``` -# { -# "{\"args\": [4], \"kwargs\": {}}": 16 -# } -# ``` -_FunctionCacheType = Dict[str, Any] - -# Basic type for caching data: func_name -> key -> value properties. E.g., -# ``` -# { -# "slow_square": { -# "{\"args\": [4], \"kwargs\": {}}": 16 -# } -# } -# ``` -_CacheType = Dict[str, _FunctionCacheType] - -# Type for cache property storage: func_name -> property_name -> property_value. E.g., -# ``` -# { -# "slow_square": { -# "type": "json", -# "cache_dir": "/tmp/cache", -# "write_through": True -# } -# } -# ``` -_CachePropertyType = Dict[str, Dict[str, Any]] - -# Create global variable for the memory cache. -if "_CACHE" not in globals(): - _LOG.trace("Creating _CACHE") - _CACHE: _CacheType = {} - -# Process-wide default `cache_mode` applied to every `@simple_cache` function -# when no explicit `cache_mode` is passed at the call site. Used by CLI scripts -# to flip all cached functions into refresh/disable/hit-or-abort mode from a -# single switch (see `hparser.add_cache_control_arg`). -_VALID_CACHE_MODES = ("REFRESH_CACHE", "DISABLE_CACHE", "HIT_CACHE_OR_ABORT") -_GLOBAL_CACHE_MODE: Optional[str] = None - - -def set_global_cache_mode(mode: Optional[str]) -> None: - """ - Set the process-wide default `cache_mode`. - - :param mode: one of `REFRESH_CACHE`, `DISABLE_CACHE`, - `HIT_CACHE_OR_ABORT`, or `None` to clear - """ - global _GLOBAL_CACHE_MODE - if mode is not None: - hdbg.dassert_in(mode, _VALID_CACHE_MODES) - _GLOBAL_CACHE_MODE = mode - - -def get_global_cache_mode() -> Optional[str]: - """ - Return the process-wide default `cache_mode`, or `None` if unset. - """ - return _GLOBAL_CACHE_MODE - - -# When enabled, every `@simple_cache` call emits a WARNING describing whether -# the result came from the cache, was computed on miss, or was recomputed -# because of an active `cache_mode`. -_CACHE_DEBUG: bool = False - - -def set_cache_debug(enabled: bool) -> None: - """ - Enable or disable process-wide cache-decision logging at WARNING level. - """ - global _CACHE_DEBUG - hdbg.dassert_isinstance(enabled, bool) - _CACHE_DEBUG = enabled - - -def get_cache_debug() -> bool: - """ - Return True if cache-decision logging is enabled. - """ - return _CACHE_DEBUG - - -def sanity_check_function_cache( - func_cache_data: _FunctionCacheType, *, assert_on_empty: bool = True -) -> None: - """ - Sanity check the function cache data. - - :param func_cache_data: The function cache data to check. - :param assert_on_empty: If True, assert that the function cache data - is not empty. - """ - hdbg.dassert_isinstance(func_cache_data, dict) - if assert_on_empty: - hdbg.dassert_ne(len(func_cache_data), 0, "Function data is empty") - for cache_key, cached_value in func_cache_data.items(): - hdbg.dassert_isinstance(cache_key, str) - hdbg.dassert_ne(cache_key, "", "Cache key is empty") - # cached_value can be any type, so no type check needed. - _ = cached_value - - -def sanity_check_cache( - cache_data: _CacheType, *, assert_on_empty: bool = True -) -> None: - """ - Sanity check the cache data. - - :param cache_data: The cache data to check. - :param assert_on_empty: If True, assert that the cache data is not - empty. - """ - hdbg.dassert_isinstance(cache_data, dict) - if assert_on_empty: - hdbg.dassert_ne(len(cache_data), 0, "Cache data is empty") - for func_name, func_cache_data in cache_data.items(): - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_ne(func_name, "", "Function name is empty") - sanity_check_function_cache( - func_cache_data, assert_on_empty=assert_on_empty - ) - - -def cache_data_to_str(cache_data: _CacheType) -> str: - """ - Convert cache data to a human-readable string. - - :param cache_data: The cache data to convert. - :return: A string representation of the cache data. - """ - txt = [] - txt.append(hprint.frame("Cache data")) - hdbg.dassert_isinstance(cache_data, dict) - for func_name, func_data in cache_data.items(): - txt.append(f"# func_name={func_name}") - hdbg.dassert_isinstance(func_data, dict) - for cache_key, cached_value in func_data.items(): - txt.append(f" cache_key={cache_key} cached_value={cached_value}") - result = "\n".join(txt) - return result - - -# ############################################################################# -# Cache properties. -# ############################################################################# - -# There are several ways to control caching behavior: -# - By passing special control parameters to the decorated function: -# `force_refresh`, `abort_on_cache_miss`, `report_on_cache_miss`, `cache_mode` -# - By setting cache properties: -# - E.g., set_cache_property("func_name", "write_through", False) - -# - There are two types of properties: -# - `User Properties`: Configurable by the user to alter caching behavior. -# E.g., -# - `abort_on_cache_miss`: Whether to raise an error if a cache miss occurs -# - `report_on_cache_miss`: Whether to return a special value ("_cache_miss_") -# on a cache miss -# - `force_refresh`: Whether to bypass the cache and refresh the value -# - `System Properties`: -# - cache type (e.g., "json" or "pickle") -# - write through (e.g., True or False) -# - exclude keys (e.g., ["password", "api_key"]) -# - per-function cache location (cache_dir, cache_prefix) -# - per-function S3 configuration (s3_bucket, s3_prefix, aws_profile, auto_sync_s3) - -_SYSTEM_PROPERTIES = [ - "type", - "write_through", - "exclude_keys", - "cache_dir", - "cache_prefix", - "s3_bucket", - "s3_prefix", - "aws_profile", - "auto_sync_s3", -] - - -def get_main_cache_dir() -> str: - """ - Get the main cache directory (git root). - - :return: The absolute path to the main cache directory. - """ - git_dir = hgit.find_git_root() - cache_dir = os.path.abspath(git_dir) - return cache_dir - - -# Create global variable for the cache directory. -if "_CACHE_DIR" not in globals(): - _LOG.trace("Creating _CACHE_DIR") - _CACHE_DIR = get_main_cache_dir() - - -def set_cache_dir(cache_dir: str) -> None: - """ - Set the cache directory. - """ - global _CACHE_DIR - hdbg.dassert_isinstance(cache_dir, str) - _CACHE_DIR = os.path.abspath(cache_dir) - hio.create_dir(_CACHE_DIR, incremental=True) - _LOG.trace("Setting _CACHE_DIR to %s", _CACHE_DIR) - - -def get_cache_dir() -> str: - """ - Get the cache directory. - """ - return _CACHE_DIR - - -# Create global variable for the cache file prefix. -if "_CACHE_FILE_PREFIX" not in globals(): - _LOG.trace("Creating _CACHE_FILE_PREFIX") - _CACHE_FILE_PREFIX = "tmp.cache_simple" - - -def set_cache_file_prefix(prefix: str) -> None: - """ - Set the cache file prefix. - - :param prefix: prefix to use for cache files - """ - global _CACHE_FILE_PREFIX - hdbg.dassert_isinstance(prefix, str) - hdbg.dassert_ne(prefix, "", "Cache file prefix cannot be empty") - if prefix.endswith("."): - _LOG.warning( - "Prefix '%s' ends with '.' - cache files will have '..' in names", - prefix, - ) - _CACHE_FILE_PREFIX = prefix - _LOG.trace("Setting _CACHE_FILE_PREFIX to %s", _CACHE_FILE_PREFIX) - - -def get_cache_file_prefix() -> str: - """ - Get the cache file prefix. - - :return: cache file prefix - """ - return _CACHE_FILE_PREFIX - - -# ############################################################################# -# S3 cache configuration. -# ############################################################################# - -# Create global variable for S3 bucket. -if "_S3_BUCKET" not in globals(): - _LOG.trace("Creating _S3_BUCKET") - _S3_BUCKET: Optional[str] = None - -# Create global variable for S3 prefix. -if "_S3_PREFIX" not in globals(): - _LOG.trace("Creating _S3_PREFIX") - _S3_PREFIX: str = "cache" - -# Create global variable for AWS profile. -if "_AWS_PROFILE" not in globals(): - _LOG.trace("Creating _AWS_PROFILE") - _AWS_PROFILE: str = "ck" - -# Create global variable to track S3 auto-pull attempts. -if "_S3_AUTO_PULL_ATTEMPTED" not in globals(): - _LOG.trace("Creating _S3_AUTO_PULL_ATTEMPTED") - _S3_AUTO_PULL_ATTEMPTED: set = set() - - -def set_s3_bucket(bucket: str) -> None: - """ - Set the S3 bucket for cache storage. - - :param bucket: S3 bucket name (e.g., "my-bucket" or "s3://my- - bucket") - """ - global _S3_BUCKET - hdbg.dassert_isinstance(bucket, str) - hdbg.dassert_ne(bucket, "", "S3 bucket cannot be empty") - # Keep s3:// prefix if present, otherwise add it. - if not bucket.startswith("s3://"): - bucket = f"s3://{bucket}" - _S3_BUCKET = bucket - _LOG.trace("Setting _S3_BUCKET to %s", _S3_BUCKET) - - -def get_s3_bucket() -> Optional[str]: - """ - Get the S3 bucket for cache storage. - - :return: S3 bucket name with s3:// prefix, or None if not configured - """ - return _S3_BUCKET - - -def set_s3_prefix(prefix: str) -> None: - """ - Set the S3 prefix for cache files. - - :param prefix: S3 prefix path (e.g., "cache" or "app/cache") - """ - global _S3_PREFIX - hdbg.dassert_isinstance(prefix, str) - # Remove leading/trailing slashes. - prefix = prefix.strip("/") - _S3_PREFIX = prefix - _LOG.trace("Setting _S3_PREFIX to %s", _S3_PREFIX) - - -def get_s3_prefix() -> str: - """ - Get the S3 prefix for cache files. - - :return: S3 prefix path - """ - return _S3_PREFIX - - -def set_aws_profile(profile: str) -> None: - """ - Set the AWS profile for S3 access. - - :param profile: AWS profile name (e.g., "ck", "csfy") - """ - global _AWS_PROFILE - hdbg.dassert_isinstance(profile, str) - hdbg.dassert_ne(profile, "", "AWS profile cannot be empty") - _AWS_PROFILE = profile - _LOG.trace("Setting _AWS_PROFILE to %s", _AWS_PROFILE) - - -def get_aws_profile() -> str: - """ - Get the AWS profile for S3 access. - - :return: AWS profile name - """ - return _AWS_PROFILE - - -def get_cache_property_file() -> str: - """ - Get the cache property file name. - - :return: The cache property file name. - """ - prefix = get_cache_file_prefix() - val = os.path.join(get_cache_dir(), f"{prefix}_property.pkl") - return val - - -def _get_initial_cache_property() -> _CachePropertyType: - """ - Get the initial cache property from disk or create an empty one. - - :return: A dictionary containing cache properties. - """ - file_name_ = get_cache_property_file() - if os.path.exists(file_name_): - _LOG.trace("Loading from %s", file_name_) - # TODO(gp): Use _load_data_from_file, if possible. - with open(file_name_, "rb") as file: - val = pickle.load(file) - else: - # func_name -> property_name -> value. - val = {} - val = cast(_CachePropertyType, val) - return val - - -# Create global variables for the cache properties. -if "_CACHE_PROPERTY" not in globals(): - _LOG.trace("Creating _CACHE_PROPERTY") - _CACHE_PROPERTY: _CachePropertyType = _get_initial_cache_property() - - -def _check_valid_cache_property(property_name: str) -> None: - """ - Verify that a cache property name is valid for the given type. - - :param property_name: The property name to validate. - """ - _LOG.trace(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(property_name, str) - valid_properties = [ - # Abort if there is a cache miss. This is used to make sure everything - # is cached. - "abort_on_cache_miss", - # Report if there is a cache miss and return `_cache_miss_` instead of - # accessing the real value. - "report_on_cache_miss", - # Force to refresh the value. - "force_refresh", - # TODO(gp): "force_refresh_once" - # json or pickle cache type. - "type", - # Write-through mode: flush cache to disk after each update. - "write_through", - # List of keys to exclude from cache key generation. - "exclude_keys", - # Per-function cache directory. - "cache_dir", - # Per-function cache file prefix. - "cache_prefix", - # Per-function S3 bucket. - "s3_bucket", - # Per-function S3 prefix. - "s3_prefix", - # Per-function AWS profile. - "aws_profile", - # Auto-sync to S3 after cache updates. - "auto_sync_s3", - ] - hdbg.dassert_in(property_name, valid_properties) - - -def _infer_cache_type_from_path(file_path: str) -> str: - """ - Infer cache type from file path extension. - - :param file_path: path to cache file (local or S3) - :return: inferred type ("pickle" or "json") - """ - if file_path.endswith(".pkl"): - out = "pickle" - elif file_path.endswith(".json"): - out = "json" - else: - # Default to json. - out = "json" - return out - - -def _save_func_cache_data_to_file( - file_name: str, - cache_type: Optional[str], - func_cache_data: _FunctionCacheType, -) -> None: - """ - Save the function cache data to a file. - - :param file_name: The name of the file. - :param func_cache_data: The function cache data to save. - """ - # Infer cache type from file extension if not set. - if cache_type is None: - cache_type = _infer_cache_type_from_path(file_name) - hio.create_enclosing_dir(file_name, incremental=True) - _LOG.trace("Saving to '%s'", file_name) - # Save data. - if cache_type == "pickle": - with open(file_name, "wb") as file: - pickle.dump(func_cache_data, file) - elif cache_type == "json": - with open(file_name, "w", encoding="utf-8") as file: - json.dump( - func_cache_data, - file, - indent=4, - sort_keys=True, - ensure_ascii=False, - ) - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - - -def set_cache_property(func_name: str, property_name: str, val: Any) -> None: - """ - Set a property for the cache of a given function name. - - :param func_name: The name of the function whose cache property is - to be set. - :param property_name: The name of the property to set. - :param val: The value to set for the property. - """ - _LOG.trace(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_isinstance(property_name, str) - _check_valid_cache_property(property_name) - # Assign value. - cache_property = _CACHE_PROPERTY - if func_name not in cache_property: - cache_property[func_name] = {} - dict_ = cache_property[func_name] - dict_[property_name] = val - # Update values on the disk. - file_name = get_cache_property_file() - _LOG.trace("Updating %s", file_name) - # Make sure the dict is well-formed. - for func_name_tmp in cache_property: - hdbg.dassert_isinstance(func_name_tmp, str) - _LOG.trace( - "func_name_tmp='%s' -> %s", - func_name_tmp, - cache_property[func_name_tmp], - ) - hio.create_enclosing_dir(file_name, incremental=True) - _save_func_cache_data_to_file(file_name, "pickle", cache_property) - - -def get_cache_property( - func_name: str, property_name: str -) -> Optional[Union[bool, Any]]: - """ - Get the value of a property for the cache of a given function name. - - :return: The property value, which can be of any type depending on - the property. Returns None if the property is not set (for - system properties), or False (for user properties). - """ - _LOG.trace(hprint.func_signature_to_str()) - _check_valid_cache_property(property_name) - # Read from in-memory property storage. - cache_property = _CACHE_PROPERTY - if property_name in _SYSTEM_PROPERTIES: - if func_name not in cache_property: - return None - value = cache_property[func_name].get(property_name) - else: - value = cache_property.get(func_name, {}).get(property_name, False) - return value - - -def reset_cache_property() -> None: - """ - Reset the cache property for the given type. - """ - file_name = get_cache_property_file() - _LOG.warning("Resetting %s", file_name) - # Empty the values. - global _CACHE_PROPERTY - cache_property = _CACHE_PROPERTY - # Empty the values excluding the system properties like `type` and - # `write_through`. - _LOG.trace("before cache_property=%s", cache_property) - # Iterate over a list of keys to avoid modifying the dictionary during iteration. - for func_name_tmp in list(cache_property.keys()): - # Only remove non-system properties from the function's property dict. - func_prop = cache_property[func_name_tmp] - for property_name_tmp in list(func_prop.keys()): - if property_name_tmp not in _SYSTEM_PROPERTIES: - del func_prop[property_name_tmp] - _LOG.trace("after cache_property=%s", cache_property) - # Update values on the disk. - _LOG.trace("Updating %s", file_name) - hio.create_enclosing_dir(file_name, incremental=True) - _save_func_cache_data_to_file(file_name, "pickle", cache_property) - - -# ############################################################################# -# Get cache. -# ############################################################################# - -# Functions to retrieve cache (both memory and disk). - - -def _get_valid_cache_prefixes() -> set: - """ - Get all valid cache file prefixes. - - :return: set of valid prefixes (global + per-function custom - prefixes) - """ - global_prefix = get_cache_file_prefix() - valid_prefixes = {global_prefix} - for func_name_tmp in _CACHE_PROPERTY: - func_prefix = get_cache_property(func_name_tmp, "cache_prefix") - if func_prefix: - valid_prefixes.add(func_prefix) - return valid_prefixes - - -def _extract_func_names_from_cache_files( - file_paths: List[str], valid_prefixes: set -) -> set: - """ - Extract function names from cache file paths. - - :param file_paths: list of file paths to process - :param valid_prefixes: set of valid cache prefixes to filter by - :return: set of function names - """ - func_names = set() - pattern = r"^(.+)\.([^\.]+)\.(?:json|pkl)$" - for file_path in file_paths: - base_name = os.path.basename(file_path) - match = re.match(pattern, base_name) - if match: - file_prefix = match.group(1) - # Only include if prefix is valid for this project. - if file_prefix in valid_prefixes: - func_name = match.group(2) - func_names.add(func_name) - return func_names - - -def get_cached_func_names(type_: str) -> List[str]: - """ - Retrieve the function names cached with the specified type. - - :param type_: the type of cache to retrieve: - - 'mem': memory cache only - - 'disk': disk cache only (includes global and custom local cache - directories) - - 's3': S3 cache only (includes global and custom S3 buckets) - - 'local': local caches (mem + disk) - - 'all': all caches (mem + disk + s3) - :return: names of functions cached with the specified type - """ - if type_ == "mem": - # Only include functions with non-empty cache dicts. - out = sorted([fn for fn in _CACHE.keys() if len(_CACHE[fn]) > 0]) - elif type_ == "disk": - all_func_names = set() - cache_dir = get_cache_dir() - # Collect all valid prefixes. - valid_prefixes = _get_valid_cache_prefixes() - # Search global cache directory. - disk_files = glob.glob(os.path.join(cache_dir, "*.json")) - disk_files += glob.glob(os.path.join(cache_dir, "*.pkl")) - property_file_name = os.path.basename(get_cache_property_file()) - # Filter out property file. - disk_files = [ - f for f in disk_files if os.path.basename(f) != property_file_name - ] - # Extract function names from disk files. - all_func_names.update( - _extract_func_names_from_cache_files(disk_files, valid_prefixes) - ) - # Search custom cache directories. - for func_name_tmp in _CACHE_PROPERTY: - func_cache_dir = get_cache_property(func_name_tmp, "cache_dir") - if func_cache_dir: - # Function has custom cache directory. - file_name = _get_cache_file_name(func_name_tmp) - if os.path.exists(file_name): - all_func_names.add(func_name_tmp) - out = sorted(all_func_names) - elif type_ == "s3": - all_func_names = set() - # Search global S3 bucket. - if _check_s3_configured(): - bucket = get_s3_bucket() - prefix = get_s3_prefix() - aws_profile = get_aws_profile() - func_names = _list_s3_cached_func_names(bucket, prefix, aws_profile) - all_func_names.update(set(func_names)) - # Search custom S3 buckets. - s3_configs = set() - for func_name_tmp in _CACHE_PROPERTY: - func_s3_bucket = get_cache_property(func_name_tmp, "s3_bucket") - if func_s3_bucket: - func_s3_prefix = get_cache_property(func_name_tmp, "s3_prefix") - if not func_s3_prefix: - func_s3_prefix = get_s3_prefix() - func_aws_profile = get_cache_property( - func_name_tmp, "aws_profile" - ) - if not func_aws_profile: - func_aws_profile = get_aws_profile() - config_key = ( - func_s3_bucket, - func_s3_prefix, - func_aws_profile, - ) - s3_configs.add(config_key) - # List files from each unique S3 bucket config. - for bucket, prefix, aws_profile in s3_configs: - func_names = _list_s3_cached_func_names(bucket, prefix, aws_profile) - all_func_names.update(set(func_names)) - out = sorted(all_func_names) - elif type_ == "local": - mem_func_names = get_cached_func_names("mem") - disk_func_names = get_cached_func_names("disk") - out = sorted(set(mem_func_names + disk_func_names)) - elif type_ == "all": - local_func_names = get_cached_func_names("local") - s3_func_names = get_cached_func_names("s3") - out = sorted(set(local_func_names + s3_func_names)) - else: - raise ValueError( - f"Invalid type '{type_}'. Valid types: 'mem', 'disk', 's3', " - "'local', 'all'" - ) - return out - - -def cache_property_to_str(func_name: str = "") -> str: - """ - Convert cache properties to a string representation. - - :param func_name: the name of the function whose cache properties - are to be converted - :return: a string representation of the cache properties. E.g., - ``` - # func_name=slow_square - type: json - write_through: False - exclude_keys: [] - ``` - """ - txt: List[str] = [] - if func_name == "": - func_names = get_cached_func_names("all") - for func_name_tmp in func_names: - txt.append(cache_property_to_str(func_name_tmp)) - result = "\n".join(txt) - return result - # - txt.append(f"# func_name={func_name}") - cache_property = _CACHE_PROPERTY - _LOG.trace("cache_property=%s", cache_property) - if func_name in cache_property: - for k, v in cache_property[func_name].items(): - txt.append(f"{k}: {v}") - result = "\n".join(txt) - return result - - -# ############################################################################# -# Cache performance. -# ############################################################################# - - -# Create global variable for the cache performance. -if "_CACHE_PERF" not in globals(): - _LOG.trace("Creating _CACHE_PERF") - # func_name -> perf properties (such as tot, hits, misses). - # Note: Values can be None when performance tracking is disabled. - _CACHE_PERF: Dict[str, Optional[Dict[str, int]]] = {} - - -def enable_cache_perf(func_name: str) -> None: - """ - Enable cache performance statistics for a given function. - """ - _CACHE_PERF[func_name] = {"tot": 0, "hits": 0, "misses": 0} - - -def disable_cache_perf(func_name: str = "") -> None: - """ - Disable cache performance statistics for a given function. - - If `func_name` is empty, disable cache performance statistics for all - functions. - """ - if func_name == "": - for func_name_tmp in get_cached_func_names("all"): - disable_cache_perf(func_name_tmp) - return - _CACHE_PERF[func_name] = None - - -def reset_cache_perf(func_name: str = "") -> None: - """ - Reset cache performance statistics for a given function. - """ - if func_name == "": - for func_name_tmp in get_cached_func_names("all"): - reset_cache_perf(func_name_tmp) - return - _CACHE_PERF[func_name] = {"tot": 0, "hits": 0, "misses": 0} - - -def get_cache_perf(func_name: str) -> Optional[Dict[str, int]]: - """ - Get the cache performance object for a given function. - """ - if func_name in _CACHE_PERF: - return _CACHE_PERF[func_name] - return None - - -def get_cache_perf_stats(func_name: str) -> str: - """ - Get the cache performance statistics for a given function. - - :param func_name: The name of the function whose cache performance - stats are to be retrieved. - :return: A string with the cache performance statistics. E.g., - `slow_square: hits=2 misses=0 tot=2 hit_rate=1.00`. - """ - perf = get_cache_perf(func_name) - if perf is None: - _LOG.warning("No cache performance stats for '%s'", func_name) - return "" - hits = perf["hits"] - misses = perf["misses"] - tot = perf["tot"] - hit_rate = hits / tot if tot > 0 else 0 - txt = ( - f"{func_name}: hits={hits} misses={misses} tot={tot}" - f" hit_rate={hit_rate:.2f}" - ) - return txt - - -# ############################################################################# -# Disk cache. -# ############################################################################# - -# Functions to save and retrieve cache from disk. -# ``` -# { -# "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"10 + 15\", \"gpt-5-nano\"], \"kwargs\": {}}": [ -# "25", -# 3.195e-05 -# ], -# "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"2 + 3\", \"gpt-5-nano\"], \"kwargs\": {}}": [ -# "5", -# 3.195e-05 -# ] -# } -# ``` - - -def _get_cache_file_name(func_name: str) -> str: - """ - Get the cache file name for a given function. - - The function returns the full cache file path including the local - directory, configured globally or per-function. - - :param func_name: the name of the function - :return: the cache file name with appropriate extension - """ - _LOG.trace("func_name='%s'", func_name) - hdbg.dassert_isinstance(func_name, str) - # Check for per-function cache dir, otherwise use global. - func_cache_dir = get_cache_property(func_name, "cache_dir") - if func_cache_dir: - cache_dir = func_cache_dir - else: - cache_dir = get_cache_dir() - # Check for per-function cache file prefix, otherwise use global. - func_cache_prefix = get_cache_property(func_name, "cache_prefix") - if func_cache_prefix: - prefix = func_cache_prefix - else: - prefix = get_cache_file_prefix() - file_name = os.path.join(cache_dir, f"{prefix}.{func_name}") - cache_type = get_cache_property(func_name, "type") - _LOG.trace(hprint.to_str("cache_type")) - if cache_type == "pickle": - file_name += ".pkl" - elif cache_type == "json": - file_name += ".json" - elif cache_type is None: - # Try to infer cache type from existing files. - if os.path.exists(file_name + ".pkl"): - file_name += ".pkl" - elif os.path.exists(file_name + ".json"): - file_name += ".json" - else: - # Default to json if no file exists. - file_name += ".json" - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - return file_name - - -def _save_cache_dict_to_disk( - func_name: str, func_cache_data: _FunctionCacheType -) -> None: - """ - Save a cache dictionary to the disk cache. - - :param func_name: The name of the function. - :param func_cache_data: The function cache data to save. - """ - # Get the filename for the disk cache. - file_name = _get_cache_file_name(func_name) - cache_type = get_cache_property(func_name, "type") - _LOG.trace(hprint.to_str("file_name cache_type")) - _save_func_cache_data_to_file(file_name, cache_type, func_cache_data) - - -def _load_func_cache_data_from_file( - file_name: str, cache_type: Optional[str] -) -> _FunctionCacheType: - """ - Load the function cache data from a file. - - :param file_name: the name of the file - :param cache_type: the type of the cache - :return: the function cache data - """ - # Infer cache type from file extension if not set. - if cache_type is None: - cache_type = _infer_cache_type_from_path(file_name) - # Load data. - _LOG.trace("Loading from '%s'", file_name) - hdbg.dassert_file_exists(file_name) - if cache_type == "pickle": - with open(file_name, "rb") as file: - func_cache_data = pickle.load(file) - elif cache_type == "json": - with open(file_name, "r", encoding="utf-8") as file: - func_cache_data = json.load(file) - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - return func_cache_data - - -# TODO(gp): Maybe private? -def get_disk_cache(func_name: str) -> _FunctionCacheType: - """ - Retrieve the disk cache for a given function. - - :param func_name: the name of the function - :return: cache data, if it exists - """ - file_name = _get_cache_file_name(func_name) - # Return empty cache if the disk cache does not exist. - if not os.path.exists(file_name): - _LOG.trace("No cache file on disk") - return {} - # Load data from existing file. - cache_type = get_cache_property(func_name, "type") - _LOG.trace(hprint.to_str("cache_type")) - func_cache_data = _load_func_cache_data_from_file(file_name, cache_type) - return func_cache_data - - -# ############################################################################# -# S3 cache. -# ############################################################################# - -# Functions to save and retrieve cache from S3. - - -def _build_s3_cache_path_for_type(func_name: str, cache_type: str) -> str: - """ - Build S3 cache path for a specific cache type. - - :param func_name: the name of the function - :param cache_type: the cache type ("json" or "pickle") - :return: the S3 path with appropriate extension - """ - # Check for per-function S3 bucket, otherwise use global. - bucket = get_cache_property(func_name, "s3_bucket") - if bucket: - # Ensure s3:// prefix. - if not bucket.startswith("s3://"): - bucket = f"s3://{bucket}" - else: - bucket = get_s3_bucket() - if bucket is None: - raise ValueError("S3 bucket not configured") - # Check for per-function S3 prefix, otherwise use global. - s3_prefix = get_cache_property(func_name, "s3_prefix") - if not s3_prefix: - s3_prefix = get_s3_prefix() - # Build cache file name with explicit type. - func_cache_prefix = get_cache_property(func_name, "cache_prefix") - if func_cache_prefix: - prefix = func_cache_prefix - else: - prefix = get_cache_file_prefix() - # Build filename with appropriate extension. - if cache_type == "pickle": - base_name = f"{prefix}.{func_name}.pkl" - elif cache_type == "json": - base_name = f"{prefix}.{func_name}.json" - else: - raise ValueError(f"Invalid cache type '{cache_type}'") - # Construct S3 path. - if s3_prefix: - s3_path = f"{bucket}/{s3_prefix}/{base_name}" - else: - s3_path = f"{bucket}/{base_name}" - return s3_path - - -def _get_s3_cache_path(func_name: str) -> str: - """ - Get the full S3 path for a cache file. - - :param func_name: the name of the function - :return: the S3 path (e.g., "s3://bucket/prefix/cache_file.json") - """ - # Check for per-function S3 bucket, otherwise use global. - bucket = get_cache_property(func_name, "s3_bucket") - if bucket: - # Ensure s3:// prefix. - if not bucket.startswith("s3://"): - bucket = f"s3://{bucket}" - else: - bucket = get_s3_bucket() - if bucket is None: - raise ValueError("S3 bucket not configured") - # Check for per-function S3 prefix, otherwise use global. - s3_prefix = get_cache_property(func_name, "s3_prefix") - if not s3_prefix: - s3_prefix = get_s3_prefix() - base_name = os.path.basename(_get_cache_file_name(func_name)) - if s3_prefix: - s3_path = f"{bucket}/{s3_prefix}/{base_name}" - else: - s3_path = f"{bucket}/{base_name}" - return s3_path - - -def _extract_func_name_from_cache_file(cache_file_name: str) -> Optional[str]: - """ - Extract function name from cache file name. - - Cache file names follow the format: .. - - :param cache_file_name: the cache file name (e.g., - "cache.my_func.json") - :return: the function name, or None if pattern does not match - """ - pattern = r"^(.+)\.([^\.]+)\.(?:json|pkl)$" - match = re.match(pattern, cache_file_name) - if match: - return match.group(2) - return None - - -def _list_s3_cached_func_names( - bucket: str, - prefix: Optional[str], - aws_profile: str, -) -> List[str]: - """ - List names of functions cached in S3 bucket. - - :param bucket: S3 bucket path (e.g., "s3://my-bucket") - :param prefix: S3 prefix path (e.g., "cache/shared") - :param aws_profile: AWS profile name - :return: names of functions cached in S3 bucket - """ - # Build S3 directory path. - if prefix: - s3_dir = f"{bucket}/{prefix}" - else: - s3_dir = bucket - # List files in S3 directory. - try: - s3_files = hs3.listdir( - s3_dir, - pattern="*", - only_files=True, - use_relative_paths=False, - aws_profile=aws_profile, - ) - except Exception as e: - _LOG.warning("Failed to list S3 directory '%s': %s", s3_dir, e) - return [] - # Collect all valid cache file prefixes. - valid_prefixes = _get_valid_cache_prefixes() - # Extract function names from S3 file names. - func_names = _extract_func_names_from_cache_files(s3_files, valid_prefixes) - out = sorted(func_names) - return out - - -def _check_s3_configured(func_name: Optional[str] = None) -> bool: - """ - Check if S3 is properly configured. - - :param func_name: the name of the function to check per-function S3 - settings - :return: True if S3 is configured, False otherwise - """ - # Check if per-function S3 bucket is defined. - if func_name: - func_s3_bucket = get_cache_property(func_name, "s3_bucket") - if func_s3_bucket: - return True - # Check if global bucket is defined. - bucket = get_s3_bucket() - if bucket is None: - _LOG.warning("S3 bucket not configured - use set_s3_bucket()") - return False - return True - - -def _upload_cache_to_s3(func_name: str) -> None: - """ - Upload a cache file to S3. - - :param func_name: the name of the function - """ - if not _check_s3_configured(func_name): - return - # Get local file. - local_file = _get_cache_file_name(func_name) - if not os.path.exists(local_file): - _LOG.debug("No local cache file to upload for '%s'", func_name) - return - # Get S3 path. - s3_path = _get_s3_cache_path(func_name) - # Check for per-function AWS profile, otherwise use global. - func_aws_profile = get_cache_property(func_name, "aws_profile") - if func_aws_profile: - aws_profile = func_aws_profile - else: - aws_profile = get_aws_profile() - _LOG.info("Uploading cache to %s", s3_path) - # Read local file and write to S3. - cache_type = get_cache_property(func_name, "type") - # Infer cache type from file extension if not set. - if cache_type is None: - cache_type = _infer_cache_type_from_path(local_file) - if cache_type == "pickle": - # Read pickle files as bytes and write. - with open(local_file, "rb") as f: - data = f.read() - s3fs_ = hs3.get_s3fs(aws_profile) - with s3fs_.open(s3_path, "wb") as f: - f.write(data) - else: - # Read JSON files as string and write. - data = hio.from_file(local_file) - hs3.to_file(data, s3_path, aws_profile=aws_profile) - - -def _download_cache_from_s3(func_name: str) -> bool: - """ - Download a cache file from S3. - - The function downloads the cache file from S3 to the local cache - directory, configured globally or per-function. - - :param func_name: the name of the function - :return: True if download is successful, False otherwise - """ - if not _check_s3_configured(func_name): - return False - # Check for per-function AWS profile, otherwise use global. - func_aws_profile = get_cache_property(func_name, "aws_profile") - if func_aws_profile: - aws_profile = func_aws_profile - else: - aws_profile = get_aws_profile() - s3fs_ = hs3.get_s3fs(aws_profile) - # Check cache type to determine file extension. - cache_type = get_cache_property(func_name, "type") - # If type is unknown, try both extensions in S3. - if cache_type is None: - # Try both .json and .pkl extensions. - for ext_type in ["json", "pickle"]: - # Build S3 path for this type. - s3_path_candidate = _build_s3_cache_path_for_type(func_name, ext_type) - if s3fs_.exists(s3_path_candidate): - # Set type property and use this path. - cache_type = ext_type - s3_path = s3_path_candidate - set_cache_property(func_name, "type", cache_type) - _LOG.debug("Found S3 cache with type=%s", ext_type) - break - else: - # Neither extension found in S3. - _LOG.debug("No S3 cache found for '%s'", func_name) - return False - else: - # Type is known, get paths normally. - s3_path = _get_s3_cache_path(func_name) - if not s3fs_.exists(s3_path): - _LOG.debug("No S3 cache found for '%s'", func_name) - return False - # Get local file path. - local_file = _get_cache_file_name(func_name) - _LOG.info("Downloading cache from %s", s3_path) - # Download from S3. - cache_type = get_cache_property(func_name, "type") - # Infer cache type from file extension if not set. - if cache_type is None: - cache_type = _infer_cache_type_from_path(s3_path) - hio.create_enclosing_dir(local_file, incremental=True) - if cache_type == "pickle": - # Read pickle files as bytes and write. - with s3fs_.open(s3_path, "rb") as f: - data = f.read() - with open(local_file, "wb") as f: - f.write(data) - else: - # Read JSON files as string and write. - data = hs3.from_file(s3_path, aws_profile=aws_profile) - hio.to_file(local_file, data) - return True - - -def push_cache_to_s3(func_name: str = "") -> None: - """ - Push local cache to S3 for a given function. - - :param func_name: the name of the function. If empty, push all - caches - """ - # Flush memory cache to disk. - flush_cache_to_disk(func_name) - funcs_to_push = [func_name] if func_name else get_cached_func_names("disk") - for func_name_tmp in funcs_to_push: - _LOG.info("Pushing cache to S3 for '%s'", func_name_tmp) - # Upload to S3. - _upload_cache_to_s3(func_name_tmp) - - -def pull_cache_from_s3(func_name: str = "") -> None: - """ - Pull cache from S3 to local storage for a given function. - - If no function name is provided, pulls all functions cached on S3 and - specified in _CACHE_PROPERTY and/or found in the global S3 bucket. - - Functions cached in a custom S3 bucket using another machine cannot be - pulled without sharing the _CACHE_PROPERTY file. - - Without it, the pull only retrieves cache files from the global bucket - - For more info, see `docs/tools/helpers/all.hcache_simple.explanation.md` - - :param func_name: the name of the function. If empty, pull all - discoverable caches - """ - if func_name != "": - _LOG.info("Pulling cache from S3 for '%s'", func_name) - # Download from S3. - success = _download_cache_from_s3(func_name) - if success: - # Load into memory cache. - force_cache_from_disk(func_name) - else: - _LOG.warning("Failed to pull cache from S3 for '%s'", func_name) - return - # Discover all cached functions and pull each one. - all_funcs = get_cached_func_names("s3") - for func_name_tmp in all_funcs: - pull_cache_from_s3(func_name_tmp) - _LOG.info("Pulled %d functions from S3", len(all_funcs)) - - -def sync_cache_with_s3(func_name: str = "") -> None: - """ - Sync cache between local and S3 (bidirectional merge). - - Downloads S3 cache, merges with local, and uploads result to S3. - - If no function name is provided, syncs all discoverable functions. - - :param func_name: the name of the function. If empty, sync all - caches - """ - if func_name == "": - # Discover all cached functions and sync each one. - all_funcs = get_cached_func_names("all") - for func_name_tmp in all_funcs: - sync_cache_with_s3(func_name_tmp) - _LOG.info("Synced %d functions with S3", len(all_funcs)) - return - _LOG.info("Syncing cache with S3 for '%s'", func_name) - # Get current local cache (disk + memory, memory takes precedence). - local_cache = get_disk_cache(func_name).copy() - local_cache.update(get_mem_cache(func_name).copy()) - # Download cache from S3. - success = _download_cache_from_s3(func_name) - if success: - # Load S3 cache. - s3_cache = get_disk_cache(func_name) - # Merge; if available, local takes precedence over what was downloaded - # from S3. - s3_cache.update(local_cache) - # Only save, upload, and store if merged cache is non-empty. - # Do not create empty cache files or entries. - if len(s3_cache) > 0: - # Save merged cache. - _save_cache_dict_to_disk(func_name, s3_cache) - # Upload back to S3. - _upload_cache_to_s3(func_name) - # Update memory cache. - global _CACHE - _CACHE[func_name] = s3_cache - else: - # Upload local cache to S3. - push_cache_to_s3(func_name) - - -# ############################################################################# -# Stats. -# ############################################################################# - - -def cache_stats_to_str( - func_name: Optional[str] = "", -) -> Optional["pd.DataFrame"]: # noqa: F821 - """ - Print the cache stats. - - If `func_name` is empty or None, returns stats for all functions with local cache - (mem + disk). - - E.g., - ``` - find_email: - memory: - - disk: 1044 - - verify_email: - memory: - - disk: 2322 - ``` - """ - # We want to limit the dependency from pandas in the cache. - import pandas as pd - - # Handle None as empty string. - if func_name is None: - func_name = "" - if func_name == "": - result = [] - for func_name_tmp in get_cached_func_names("local"): - result_tmp = cache_stats_to_str(func_name_tmp) - result.append(result_tmp) - if result: - result = pd.concat(result) - else: - result = None - return result - result = {} - # Memory cache. - if func_name in _CACHE: - result["memory"] = len(_CACHE[func_name]) - else: - result["memory"] = "-" - # Disk cache. - file_name = _get_cache_file_name(func_name) - if os.path.exists(file_name): - disk_cache = get_disk_cache(func_name) - result["disk"] = len(disk_cache) - else: - result["disk"] = "-" - result = pd.Series(result).to_frame().T - result.index = [func_name] - return result - - -def force_cache_from_disk(func_name: Optional[str] = "") -> None: - """ - Force loading the cache from disk and update the memory cache. - - :param func_name: the name of the function. If empty or None, apply - to all discoverable functions with cache on local disk - """ - # Handle None as empty string. - if func_name is None: - func_name = "" - if func_name == "": - _LOG.info("Before:\n%s", cache_stats_to_str()) - for func_name_tmp in get_cached_func_names("disk"): - force_cache_from_disk(func_name_tmp) - _LOG.info("After:\n%s", cache_stats_to_str()) - return - _LOG.trace("func_name='%s'", func_name) - # Get disk cache. - disk_cache = get_disk_cache(func_name) - _LOG.trace("disk_cache=%s", len(disk_cache)) - # Update the memory cache only if non-empty. - # Do not store empty dicts to avoid phantom cached functions. - if len(disk_cache) > 0: - global _CACHE - _CACHE[func_name] = disk_cache - - -def get_mem_cache(func_name: str) -> _FunctionCacheType: - """ - Retrieve the memory cache for a given function. - - :param func_name: the name of the function - :return: memory cache data - """ - mem_cache = _CACHE.get(func_name, {}) - return mem_cache - - -def flush_cache_to_disk(func_name: Optional[str] = "") -> None: - """ - Flush the memory cache to disk and update the memory cache. - - This merges memory cache with disk cache (memory takes precedence) - and saves to disk, then updates memory with the merged result. - - :param func_name: the name of the function. If empty or None, apply - to all functions with memory cache - """ - # Handle None as empty string. - if func_name is None: - func_name = "" - if func_name == "": - _LOG.info("Before:\n%s", cache_stats_to_str()) - for func_name_tmp in get_cached_func_names("mem"): - flush_cache_to_disk(func_name_tmp) - _LOG.info("After:\n%s", cache_stats_to_str()) - return - _LOG.trace("func_name='%s'", func_name) - # Get memory cache. - mem_cache = get_mem_cache(func_name) - _LOG.trace("mem_cache=%s", len(mem_cache)) - # Get disk cache. - disk_cache = get_disk_cache(func_name) - _LOG.trace("disk_cache=%s", len(disk_cache)) - # Merge disk cache with memory cache. - disk_cache.update(mem_cache) - # Save merged cache to disk only if non-empty. - # Do not create empty cache files. - if len(disk_cache) > 0: - _save_cache_dict_to_disk(func_name, disk_cache) - # Update the memory cache. - global _CACHE - _CACHE[func_name] = disk_cache - - -def get_cache(func_name: str) -> _FunctionCacheType: - """ - Retrieve the cache for a given function name. - - This function implements a three-tier cache lookup: - 1. Memory cache (fastest) - 2. Disk cache (persistent) - 3. S3 cache (shared, if configured) - - If S3 is configured and cache is not in memory/disk, attempts to pull - from S3 automatically (once per function per session). - - :param func_name: the name of the function whose cache is to be - retrieved - :return: cache data - """ - global _CACHE - global _S3_AUTO_PULL_ATTEMPTED - if func_name in _CACHE: - _LOG.trace("Loading mem cache for '%s'", func_name) - cache = get_mem_cache(func_name) - # Return cache from memory. - if cache: - return cache - # Try loading cache from local disk. - _LOG.trace("Loading disk cache for '%s'", func_name) - func_cache_data = get_disk_cache(func_name) - if func_cache_data: - _CACHE[func_name] = func_cache_data - return func_cache_data - # Try S3 auto-pull if configured. - if func_name not in _S3_AUTO_PULL_ATTEMPTED: - _S3_AUTO_PULL_ATTEMPTED.add(func_name) - if _check_s3_configured(func_name): - _LOG.trace( - "Cache not in memory/disk for '%s', attempting S3 pull", - func_name, - ) - success = _download_cache_from_s3(func_name) - if success: - _LOG.trace("S3 pull succeeded for '%s'", func_name) - # Reload from disk after S3 pull. - func_cache_data = get_disk_cache(func_name) - # Store in memory only if non-empty. - if len(func_cache_data) > 0: - _CACHE[func_name] = func_cache_data - return func_cache_data - # Return empty dict without storing it in _CACHE. - # Only store when we have actual cached data. - empty_cache: _FunctionCacheType = {} - return empty_cache - - -# ############################################################################# -# Reset cache. -# ############################################################################# - -# Functions to reset cache (both memory and disk). - - -def reset_mem_cache(func_name: Optional[str] = "") -> None: - """ - Reset the memory cache for a given function. - - :param func_name: The name of the function. If empty or None, reset - all memory caches (for functions currently in memory). - """ - _LOG.trace(hprint.func_signature_to_str()) - # Handle None as empty string. - if func_name is None: - func_name = "" - hdbg.dassert_isinstance(func_name, str) - if func_name == "": - _LOG.trace("Before resetting memory cache:\n%s", cache_stats_to_str()) - for func_name_tmp in get_cached_func_names("mem"): - reset_mem_cache(func_name=func_name_tmp) - _LOG.trace("After:\n%s", cache_stats_to_str()) - return - # Delete if present. - if func_name in _CACHE: - del _CACHE[func_name] - - -def reset_disk_cache( - func_name: Optional[str] = "", interactive: bool = True -) -> None: - """ - Reset the disk cache for a given function name. - - If `func_name` is empty or None, reset all discoverable disk cache files: - - All files in global cache directory matching global prefix - - All files for functions with custom cache_dir/cache_prefix tracked in - _CACHE_PROPERTY - - Note: This cannot discover orphaned cache files in custom directories - for functions not tracked in _CACHE_PROPERTY. - - :param func_name: The name of the function whose disk cache is to - be reset. If empty or None, reset all discoverable disk cache files. - :param interactive: If True, prompt the user for confirmation before - resetting the disk cache. - """ - _LOG.trace(hprint.func_signature_to_str()) - # Handle None as empty string. - if func_name is None: - func_name = "" - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_isinstance(interactive, bool) - if interactive: - hsystem.query_yes_no( - f"Are you sure you want to reset the disk cache for func_name={func_name}?" - ) - if func_name == "": - _LOG.trace("Before resetting disk cache:\n%s", cache_stats_to_str()) - _LOG.warning("Resetting disk cache") - # Reset files in global cache directory. - prefix = get_cache_file_prefix() - cache_files = glob.glob(os.path.join(get_cache_dir(), f"{prefix}.*")) - for file_name in cache_files: - if os.path.isfile(file_name): - os.remove(file_name) - # Reset files in per-function cache directories. - cache_property = _CACHE_PROPERTY - for func_name_tmp in cache_property: - func_props = cache_property[func_name_tmp] - # Check if function has per-function cache dir or prefix. - if "cache_dir" in func_props or "cache_prefix" in func_props: - # Get cache file for this function. - func_cache_file = _get_cache_file_name(func_name_tmp) - if os.path.exists(func_cache_file): - _LOG.debug( - "Removing per-function cache file '%s'", func_cache_file - ) - os.remove(func_cache_file) - _LOG.trace("After:\n%s", cache_stats_to_str()) - return - # - file_name = _get_cache_file_name(func_name) - if os.path.exists(file_name): - _LOG.warning("Removing cache file '%s'", file_name) - os.remove(file_name) - - -def reset_cache(func_name: Optional[str] = "", interactive: bool = True) -> None: - """ - Reset both memory and disk cache for a given function. - - If `func_name` is empty or None, reset all discoverable caches: - - All memory caches (for functions currently in memory) - - All disk cache files in global cache directory matching global prefix - - All disk cache files for functions with custom cache_dir/cache_prefix - tracked in _CACHE_PROPERTY - - Note: This cannot discover orphaned cache files in custom directories - for functions not tracked in _CACHE_PROPERTY. - - :param func_name: The name of the function. If empty or None, reset all - discoverable caches. - :param interactive: If True, prompt the user for confirmation before - resetting the disk cache. - """ - _LOG.trace(hprint.func_signature_to_str()) - # Handle None as empty string. - if func_name is None: - func_name = "" - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_isinstance(interactive, bool) - reset_mem_cache(func_name=func_name) - reset_disk_cache(func_name=func_name, interactive=interactive) - - -# ############################################################################# -# Mock / unit test cache. -# ############################################################################# - - -def _get_cache_key(args: Any, kwargs: Any) -> str: - cache_key = json.dumps( - {"args": args, "kwargs": kwargs}, - sort_keys=True, - default=str, - ) - _LOG.trace("cache_key=%s", cache_key) - return cache_key - - -def mock_cache(func_name: str, cache_key: str, value: Any) -> None: - """ - Mock the function cache for a given function and cache key. - - :param func_name: The name of the function. - :param cache_key: The cache key. - :param value: The value to store in the cache. - """ - # We should not use the main cache directory for mocking. - hdbg.dassert_ne( - get_cache_dir(), - get_main_cache_dir(), - msg="Do not use the main cache directory for mocking", - ) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_ne(func_name, "", "Function name is empty") - hdbg.dassert_isinstance(cache_key, str) - hdbg.dassert_ne(cache_key, "", "Cache key is empty") - # Get the function cache. - func_cache_data = get_cache(func_name) - # Update the function cache. - func_cache_data[cache_key] = value - # Ensure the cache dict is stored in memory. - global _CACHE - _CACHE[func_name] = func_cache_data - - -def mock_cache_from_args_kwargs( - func_name: str, args: Any, kwargs: Any, value: Any -) -> None: - """ - Mock the function cache for a given function and args/kwargs. - - E.g., when testing a cached expensive function (e.g., an LLM call or - downloading data) we can mock the cache to return a fixed value, - instead of calling the function. - - :param func_name: The name of the function. - :param args: The arguments for the function. - :param kwargs: The keyword arguments for the function. - :param value: The value to store in the cache. - """ - hdbg.dassert_isinstance(args, tuple, "args is not a tuple: %s", args) - hdbg.dassert_isinstance(kwargs, dict, "kwargs is not a dict: %s", kwargs) - # Get the cache key. - cache_key = _get_cache_key(args, kwargs) - # Mock the cache. - mock_cache(func_name, cache_key, value) - - -def mock_cache_from_disk( - func_name: str, func_cache_data: _FunctionCacheType -) -> None: - """ - Mock the function cache from disk data. - - :param func_name: The name of the function. - :param cache_data: The cache data to mock. - """ - hdbg.dassert_isinstance(func_name, str) - sanity_check_function_cache(func_cache_data, assert_on_empty=True) - for cache_key, cached_value in func_cache_data.items(): - mock_cache(func_name, cache_key, cached_value) - - -# ############################################################################# -# Decorator -# ############################################################################# - -# - Decorated functions accept special keyword arguments to control caching -# behavior: -# - `force_refresh=True`: Bypass cache and recompute the result -# - `abort_on_cache_miss=True`: Raise an exception if cache miss occurs -# - `report_on_cache_miss=True`: Return "_cache_miss_" instead of computing on -# cache miss -# - `cache_mode`: Alternative way to control caching with predefined modes: -# - `"REFRESH_CACHE"`: Force cache refresh (same as `force_refresh=True`) -# - `"HIT_CACHE_OR_ABORT"`: Abort on cache miss (same as -# `abort_on_cache_miss=True`) -# - `"DISABLE_CACHE"`: Completely disable caching for this call - - -# TODO(gp): Not sure that cache_mode is worth having the duplication. -def simple_cache( - *, - cache_type: str = "json", - write_through: bool = True, - exclude_keys: Optional[List[str]] = None, - cache_dir: Optional[str] = None, - cache_prefix: Optional[str] = None, - s3_bucket: Optional[str] = None, - s3_prefix: Optional[str] = None, - aws_profile: str = "ck", - auto_sync_s3: bool = False, -) -> Callable[..., Any]: - """ - Decorate a function to cache its results. - - The cache is stored in memory and on disk, with optional S3 support. - - All decorator parameters are stored as properties and persisted to disk. - This allows runtime modification via `set_cache_property(func_name, - property_name, value)`. - - Note: The cache type is only set during first decoration to prevent - accidental cache corruption (e.g., changing from json to pickle would - orphan existing cache files). To change cache type for an existing - function, first clear the property via reset_cache_property() or - manually set it via set_cache_property(). - - :param cache_type: type of cache to use ('json' or 'pickle') - :param write_through: if True, the cache is written to disk after - each access - :param exclude_keys: keys to exclude from the cache key - :param cache_dir: directory for this function's cache files. If - None, uses global cache directory - :param cache_prefix: prefix for this function's cache files. If - None, uses global cache prefix - :param s3_bucket: S3 bucket for this function's cache (e.g., - "s3://my-bucket"). If specified, enables S3 cache syncing for - this function - :param s3_prefix: S3 prefix path for this function's cache - :param aws_profile: AWS profile for S3 access - :param auto_sync_s3: if True, automatically sync to S3 after each - cache update - :return: a decorator that can be applied to a function - """ - - def decorator(func: Callable[..., Any]) -> Callable[..., Any]: - """ - Decorate a function to cache its results. - """ - hdbg.dassert_in(cache_type, ("json", "pickle")) - func_name = getattr(func, "__name__", "unknown_function") - if func_name.endswith("_intrinsic"): - func_name = func_name[: -len("_intrinsic")] - # Store function-specific properties. - # Note: cache type is only set if not already set to prevent accidental - # cache corruption (e.g., changing from json to pickle would orphan - # existing cache files). To change cache type, use reset_cache_property() - # first or manually set it via set_cache_property(). - existing_type = get_cache_property(func_name, "type") - if not existing_type: - set_cache_property(func_name, "type", cache_type) - # Store caching behavior settings. - set_cache_property(func_name, "write_through", write_through) - # Store exclude_keys as empty list if None for consistency. - exclude_keys_list: List[str] = ( - exclude_keys if exclude_keys is not None else [] - ) - set_cache_property(func_name, "exclude_keys", exclude_keys_list) - # Store per-function cache settings. - if cache_dir is not None: - set_cache_property(func_name, "cache_dir", cache_dir) - if cache_prefix is not None: - set_cache_property(func_name, "cache_prefix", cache_prefix) - # Store per-function S3 settings. - if s3_bucket is not None: - set_cache_property(func_name, "s3_bucket", s3_bucket) - if s3_prefix is not None: - set_cache_property(func_name, "s3_prefix", s3_prefix) - if aws_profile is not None: - set_cache_property(func_name, "aws_profile", aws_profile) - set_cache_property(func_name, "auto_sync_s3", auto_sync_s3) - - @functools.wraps(func) - def wrapper( - *args: Any, - force_refresh: bool = False, - abort_on_cache_miss: bool = False, - report_on_cache_miss: bool = False, - **kwargs: Any, - ) -> Any: - """ - Cache the results of the decorated function. - - :param args: Positional arguments for the function. - :param force_refresh: If True, the cache is refreshed - regardless of whether the key exists in the cache. - :param abort_on_cache_miss: If True, an exception is raised - if the key is not found in the cache. - :param report_on_cache_miss: If True, a message is logged if - the key is not found in the cache, and the function - returns "_cache_miss_" instead of accessing the real - value. - :param kwargs: Keyword arguments for the function. - :return: The cached value or the result of the function. - """ - # Get the function name. - func_name = getattr(func, "__name__", "unknown_function") - if func_name.endswith("_intrinsic"): - func_name = func_name[: -len("_intrinsic")] - # Get the cache. - cache = get_cache(func_name) - # Remove keys that should not be part of the cache key. - # Read from properties first, fall back to closure. - exclude_keys_prop = get_cache_property(func_name, "exclude_keys") - exclude_keys_to_use = ( - exclude_keys_prop - if exclude_keys_prop is not None - else exclude_keys_list - ) - # Also exclude cache_mode since it's a control parameter. - excluded_keys = set(exclude_keys_to_use) | {"cache_mode"} - kwargs_for_cache_key = { - k: v for k, v in kwargs.items() if k not in excluded_keys - } - # Prepare kwargs for the actual function call. - # Keep cache_mode since the wrapped function may need it in its signature. - kwargs_for_func = kwargs.copy() - # Resolve effective cache_mode: explicit kwarg wins, otherwise - # fall back to the process-wide global (set via - # `set_global_cache_mode`). Do NOT inject into kwargs_for_func, as - # the wrapped function may not accept a `cache_mode` parameter. - if "cache_mode" in kwargs: - cache_mode = kwargs.get("cache_mode") - else: - cache_mode = _GLOBAL_CACHE_MODE - # `cache_mode` is a special keyword argument to control caching - # behavior. - if cache_mode is not None: - _LOG.trace("cache_mode=%s", cache_mode) - if cache_mode == "REFRESH_CACHE": - # Force to refresh the cache. - _LOG.trace("Forcing cache refresh") - force_refresh = True - if cache_mode == "HIT_CACHE_OR_ABORT": - # Abort if the cache is not hit. - _LOG.trace("Abort on cache miss") - abort_on_cache_miss = True - if cache_mode == "DISABLE_CACHE": - # Disable the cache. - _LOG.trace("Disabling cache") - if _CACHE_DEBUG: - _LOG.warning( - "cache[%s]: COMPUTE (cache disabled by cache_mode=DISABLE_CACHE)", - func_name, - ) - value = func(*args, **kwargs_for_func) - return value - # Get the key. - cache_key = _get_cache_key(args, kwargs_for_cache_key) - # Update the performance stats. - cache_perf = get_cache_perf(func_name) - _LOG.trace("cache_perf is None=%s", cache_perf is None) - if cache_perf: - hdbg.dassert_in("tot", cache_perf) - cache_perf["tot"] += 1 - # Handle a forced refresh. - force_refresh = force_refresh or get_cache_property( - func_name, "force_refresh" - ) - _LOG.trace("force_refresh=%s", force_refresh) - if cache_key in cache and not force_refresh: - _LOG.trace("Cache hit for key='%s'", cache_key) - if _CACHE_DEBUG: - _LOG.warning("cache[%s]: HIT", func_name) - # Update the performance stats. - if cache_perf: - cache_perf["hits"] += 1 - # Retrieve the value from the cache. - value = cache[cache_key] - else: - _LOG.trace("Cache miss for key='%s'", cache_key) - # Update the performance stats. - if cache_perf: - cache_perf["misses"] += 1 - # Abort on cache miss. - abort_on_cache_miss = abort_on_cache_miss or get_cache_property( - func_name, "abort_on_cache_miss" - ) - _LOG.trace("abort_on_cache_miss=%s", abort_on_cache_miss) - if abort_on_cache_miss: - raise ValueError(f"Cache miss for key='{cache_key}'") - # Report on cache miss. - report_on_cache_miss = report_on_cache_miss or get_cache_property( - func_name, "report_on_cache_miss" - ) - _LOG.trace("report_on_cache_miss=%s", report_on_cache_miss) - if report_on_cache_miss: - _LOG.trace("Cache miss for key='%s'", cache_key) - return "_cache_miss_" - if _CACHE_DEBUG: - if force_refresh: - _LOG.warning( - "cache[%s]: RECOMPUTE (cache_mode=REFRESH_CACHE)", - func_name, - ) - else: - _LOG.warning("cache[%s]: COMPUTE (miss)", func_name) - # Access the intrinsic function. - value = func(*args, **kwargs_for_func) - # Update cache. - cache[cache_key] = value - # Ensure the cache dict is stored in memory. - global _CACHE - _CACHE[func_name] = cache - _LOG.trace( - "Updating cache with key='%s' value='%s'", cache_key, value - ) - # Check if write-through is enabled. - write_through_prop = get_cache_property( - func_name, "write_through" - ) - write_through_enabled = ( - write_through_prop - if write_through_prop is not None - else write_through - ) - if write_through_enabled: - _LOG.trace("Writing through to disk") - flush_cache_to_disk(func_name) - # Check if auto-sync to S3 is enabled. - auto_sync = get_cache_property(func_name, "auto_sync_s3") - if auto_sync: - _LOG.debug("Auto-syncing cache to S3 for '%s'", func_name) - _upload_cache_to_s3(func_name) - return value - - return wrapper - - return decorator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py deleted file mode 100644 index e2f54a02c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcfile.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -Import as: - -import helpers.hcfile as hcfile -""" - -import logging -import re -from typing import List, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hio as hio - -_LOG = logging.getLogger(__name__) - - -def parse_cfile(cfile: str) -> List[Tuple[str, str, str]]: - """ - Read and parse a cfile. - - :param cfile: path to the cfile - :return: list of tuples, each containing a line number and a transform, e.g., - [(file_name, line_number, transform), ...] - """ - # Read the cfile. - cfile_lines = hio.from_file(cfile) - cfile_lines = cfile_lines.split("\n") - # - ret = [] - # Parse the cfile. - for line in cfile_lines: - _LOG.debug("line=%s", line) - hdbg.dassert_isinstance(line, str) - # Parse the lines of the cfile, like - # ``` - # dev_scripts_helpers/llms/llm_prompts.py:106: in public function `test`:D404: ... - # dev_scripts_helpers/llms/llm_prompts.py:110: error: Need type annotation for ... - # dev_scripts_helpers/llms/llm_transform.py:63:33: F821 undefined name '_extract_bullet_points' [flake8] - # ``` - # extracting the file name, line number, and transform. - regex = r"^([^:]+):(\d+):(.*)$" - match = re.match(regex, line) - if match is None: - _LOG.debug("Failed to parse line '%s'", line) - continue - # Extract the file name, line number, and transform. - file_name = match.group(1) - line_number = match.group(2) - transform = match.group(3) - # Add values to the list. - ret.append((file_name, line_number, transform)) - return ret - - -# ############################################################################# - - -def inject_todos_from_cfile( - cfile_txt: str, todo_user: str, comment_prefix: str -) -> None: - """ - Inject the TODOs from a cfile in the corresponding files. - - Given a cfile with the following content: - the function will inject the TODO in the corresponding file and line - - :param cfile_txt: The content of the cfile. - :param todo_user: The user to use in the TODO. - :param comment_prefix: The prefix to use for the comment (e.g., "#") - """ - # For each file, store - # - the current file content - # - the offset (i.e., how many lines we inserted in the file so far, so - # we can inject the TODO at the correct line number) - # - the index of the last line modified to make sure the TODOs are for - # increasing line numbers. - file_content = {} - for todo_line in cfile_txt.split("\n"): - _LOG.debug("\n%s", hprint.frame(f"todo line='{todo_line}'")) - if todo_line.strip() == "": - continue - # dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py:101: The logic for extracting required status checks and pull request reviews is repeated. Consider creating a helper function to handle this extraction to reduce redundancy. - m = re.match(r"^\s*(\S+):(\d+):\s*(.*)$", todo_line) - if not m: - _LOG.warning("Can't parse line='%s': skipping", todo_line) - continue - file_name, todo_line_number, todo = m.groups() - todo_line_number = int(todo_line_number) - _LOG.debug(hprint.to_str("file_name todo_line_number todo")) - # Update the state if needed. - if file_name not in file_content: - _LOG.debug("Reading %s", file_name) - hdbg.dassert_path_exists(file_name) - txt = hio.from_file(file_name).split("\n") - offset = 0 - last_line_modified = 0 - file_content[file_name] = (txt, offset, last_line_modified) - # Extract the info for the file to process. - txt, offset, last_line_modified = file_content[file_name] - _LOG.debug(hprint.to_str("offset last_line_modified")) - hdbg.dassert_lt( - last_line_modified, - todo_line_number, - "The TODOs don't look like they are increasing line numbers: " - "TODO at line %d is before the last line modified %d", - todo_line_number, - last_line_modified, - ) - # We subtract 1 from the line number since TODOs count from 1, while - # Python arrays count from 0. - act_line_number = todo_line_number - 1 + offset - hdbg.dassert_lte(0, act_line_number) - hdbg.dassert_lt(act_line_number, len(txt)) - insert_line = txt[act_line_number] - _LOG.debug(hprint.to_str("act_line_number insert_line")) - # Extract how many spaces there are at place where the line to insert - # the TODO. - m = re.match(r"^(\s*)\S", insert_line) - hdbg.dassert(m, "Can't parse insert_line='%s'", insert_line) - spaces = len(m.group(1)) * " " # type: ignore[union-attr] - # Build the new line to insert. - new_line = spaces + f"{comment_prefix} TODO({todo_user}): {todo}" - _LOG.debug(hprint.to_str("new_line")) - # Insert the new line in txt at the correct position. - txt = txt[:act_line_number] + [new_line] + txt[act_line_number:] - # Update the state. - offset += 1 - file_content[file_name] = (txt, offset, todo_line_number) - # Write updated files back. - for file_name, (txt, offset, last_line_modified) in file_content.items(): - _ = last_line_modified - _LOG.info("Writing %d lines in %s", offset, file_name) - txt = "\n".join(txt) - hio.to_file(file_name, txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py deleted file mode 100644 index 675ba557d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt.py +++ /dev/null @@ -1,549 +0,0 @@ -""" -Import as: - -import helpers.hchatgpt as hchatgp -""" - -import logging -import math -import os -import sys -import time -from typing import Dict, List, Optional - -import helpers.hdbg as hdbg -import helpers.hio as hio - -# import helpers.henv as henv -# henv.install_module_if_not_present("openai") -import openai # noqa: E402 - -_LOG = logging.getLogger(__name__) - -# Setting API as env var in your terminal is the correct approach. -# NEVER upload any OpenAI API key to GitHub, OpenAI will revoke it. - -client = openai.OpenAI() - -# The OpenAI File ID cache will be saved as `prefix_to_root/gpt_id.json` -# Only files under the given root directory may be uploaded to OpenAI. -prefix_to_root = os.path.join(os.path.dirname(__file__), "..") - -# ############################################################################# -# Create/update/delete Assistant. -# ############################################################################# - - -def create_assistant( - assistant_name: str, - instructions: str, - *, - model: str = "gpt-3.5-turbo-1106", - use_retrieval: bool = True, - use_code_interpreter: bool = True, - use_function: Optional[Dict] = None, -) -> str: - """ - Create an OpenAI Assistant for your OpenAI Organization. All configs can - still be updated after creation. - - This method should only be used when a new Assistant is needed. - Otherwise, use the Assistant name to retrieve an existing Assistant. - - :param assistant_name: name of the Assistant to be created - :param instructions: instruction string that describes the expected - behavior of assistant - :param model: GPT model used by the assistant - :param use_retrieval: enable the retrieval tool from OpenAI - :param use_code_interpreter: enable the code interpreter tool from - OpenAI - :param use_function: enable the function tool from OpenAI (To be - implemented) - """ - # Create the assistant - tools = [] - if use_retrieval: - tools.append({"type": "retrieval"}) - if use_code_interpreter: - tools.append({"type": "code_interpreter"}) - if use_function: - tools.append(use_function) - if not model: - model = "gpt-3.5-turbo-1106" - assistant = client.beta.assistants.create( - instructions=instructions, - name=assistant_name, - model=model, - tools=tools, - ) - return assistant.id - - -def update_assistant_by_id( - assistant_id: str, - *, - instructions: str = "", - name: str = "", - tools: Optional[List[Dict[str, str]]] = None, - model: str = "", - file_ids: Optional[List[str]] = None, -) -> str: - """ - Update an existing OpenAI Assistant in our OpenAI Organization. - - :param assistant_id: Assistant to be updated - :param instructions: instruction string that describes the expected - behavior of assistant - :param name: change the name of assistant, no change when empty - :param tools: change the tools of assistant, no change when empty - :param model: change the model of assistant, no change when empty - :param file_ids: change the files linked to assistant, no change - when empty - """ - if tools is None: - tools = [] - if file_ids is None: - file_ids = [] - update_config = { - "instructions": instructions, - "name": name, - "tools": tools, - "model": model, - "file_ids": file_ids, - } - not_empty_params = {k: v for k, v in update_config.items() if v} - updated_assistant = client.beta.assistants.update( - assistant_id, **not_empty_params - ) - return updated_assistant.id - - -def delete_assistant_by_id(assistant_id: str) -> None: - """ - Delete an Assistant from our OpenAI Organization. - """ - client.beta.assistants.delete(assistant_id) - - -def get_all_assistants() -> List[openai.types.beta.assistant.Assistant]: - """ - Get all available Assistant objects in our OpenAI Organization. - """ - list_assistants_response = client.beta.assistants.list( - order="desc", - limit="100", - ) - assistants = list_assistants_response.data - return assistants - - -def get_all_assistant_names() -> List[str]: - """ - Get all available Assistant names in our OpenAI Organization. - """ - assistants = get_all_assistants() - return [assistant.name for assistant in assistants] - - -def get_assistant_id_by_name(assistant_name) -> str: - """ - Get the id of an Assistant by its name. - """ - assistant = None - assistants = get_all_assistants() - for cur_assistant in assistants: - if cur_assistant.name == assistant_name: - assistant = cur_assistant - break - hdbg.dassert_is_not( - assistant, None, f"Assistant '{assistant_name}' not found" - ) - assert assistant is not None - return assistant.id - - -# ############################################################################# -# Create directory structure storing gpt file ids -# ############################################################################# - - -def _path_to_dict(path: str) -> Dict: - """ - Generate a dictionary of all files under a given folder. - """ - for root, dirs, files in os.walk(path): - tree = {d: _path_to_dict(os.path.join(root, d)) for d in dirs} - tree.update({f: {"name": f} for f in files}) - return tree - return {} - - -# TODO(Henry): We use fileIO here to store the directory structure, which may -# not be thread-safe. Should change to use DAO if we have any. -def _dump_gpt_ids(dictionary: Dict) -> None: - """ - Dump a given OpenAI File ID dictionary into a cache file for furture use. - """ - file_path = os.path.join(prefix_to_root, "gpt_id.json") - hio.to_json(file_path, dictionary) - return - - -def _load_gpt_ids() -> Dict: - """ - Load the OpenAI File ID dictionary from the cache file. - """ - file_path = os.path.join(prefix_to_root, "gpt_id.json") - if os.path.exists(file_path) and os.path.isfile(file_path): - return hio.from_json(file_path) - else: - directory_dict = _path_to_dict(prefix_to_root) - _dump_gpt_ids(directory_dict) - return directory_dict - - -# ############################################################################# -# Upload file to OpenAI account -# ############################################################################# - - -def _upload_to_gpt_no_set_id(path_from_root: str) -> str: - """ - Upload a file to OpenAI. - - This method will NOT set File ID to cache. - """ - _LOG.info("Uploading file %s to chatgpt", path_from_root) - upload_file_response = client.files.create( - # Must use 'rb' regardless of file type. - file=open(os.path.join(prefix_to_root, path_from_root), "rb"), - purpose="assistants", - ) - gpt_id = upload_file_response.id - return gpt_id - - -def _get_gpt_id_file(dictionary: Dict, path_from_root: str) -> Dict[str, str]: - """ - Get the OpenAI File ID for a given file using a specific cache. - - If this file has not been uploaded to OpenAI, this method will - upload it and generate its OpenAI File ID. - """ - cur = dictionary - path_list = path_from_root.split("/") - for level in path_list: - cur = cur[level] - if "gpt_id" not in cur: - cur["gpt_id"] = _upload_to_gpt_no_set_id(path_from_root) - _dump_gpt_ids(dictionary) - return cur - - -def _set_gpt_id(path_from_root: str, gpt_id: str) -> None: - """ - Manually set the cached OpenAI File ID of a given file. - - This method should ONLY be called if a file manually uploaded to - OpenAI. It will NOT upload the given file to OpenAI. - """ - gpt_id_dict = _load_gpt_ids() - item = _get_gpt_id_file(gpt_id_dict, path_from_root) - item["gpt_id"] = gpt_id - _dump_gpt_ids(gpt_id_dict) - - -def _remove_gpt_id(path_from_root: str): - """ - Remove the cached ID of a given file. - - It does NOT fully remove a file from OpenAI. Use `remove_from_gpt` - to fully remove a file. - """ - gpt_id_dict = _load_gpt_ids() - item = _get_gpt_id_file(gpt_id_dict, path_from_root) - if "gpt_id" in item: - del item["gpt_id"] - _dump_gpt_ids(gpt_id_dict) - - -def get_gpt_id(path_from_root: str) -> str: - """ - Get the OpenAI File ID from cache for a given file. - - If this file has not been uploaded to OpenAI, this method will - upload it and generate its OpenAI File ID. - """ - gpt_id_dict = _load_gpt_ids() - return _get_gpt_id_file(gpt_id_dict, path_from_root)["gpt_id"] - - -def upload_to_gpt(path_from_root: str) -> str: - """ - Upload a file to OpenAI and set its File ID to cache. - """ - gpt_id = _upload_to_gpt_no_set_id(path_from_root) - _set_gpt_id(path_from_root, gpt_id) - return gpt_id - - -def remove_from_gpt(path_from_root: str) -> None: - """ - Fully remove a file from OpenAI. - - This method will first delete the file from OpenAI account, then - remove its OpenAI File ID from the cache. - """ - gpt_id = get_gpt_id(path_from_root) - client.files.delete(gpt_id) - _remove_gpt_id(path_from_root) - - -def get_gpt_file_from_id(gpt_id: str) -> openai.types.file_object.FileObject: - """ - Get a OpenAI File Object using its OpenAI File ID. - """ - return client.files.retrieve(gpt_id) - - -def get_gpt_file_from_path( - path_from_root: str, -) -> openai.types.file_object.FileObject: - """ - Get a OpenAI File Object using its file path. - """ - gpt_id = get_gpt_id(path_from_root) - return get_gpt_file_from_id(gpt_id) - - -# ############################################################################# -# Add/Remove files for an assistant -# ############################################################################# - -# Note that files for Assistant means files constantly used by this assistant -# (like guidelines). For one-time used files, add them to a message instead. -# One Assistant can have up to 20 files linked to it. - - -def set_assistant_files_by_name( - assistant_name: str, file_path_list: List[str] -) -> str: - """ - Use the given file list to overwrite the file list linked to an assistant. - """ - assistant_id = get_assistant_id_by_name(assistant_name) - file_ids = [get_gpt_id(path) for path in file_path_list] - return update_assistant_by_id(assistant_id, file_ids=file_ids) - - -def add_files_to_assistant_by_name( - assistant_name: str, file_path_list: List[str] -) -> str: - """ - Link all given files to an assistant. - - An Assistant can hold only 20 files, the oldest files will be - unlinked automatically. - """ - assistant_id = get_assistant_id_by_name(assistant_name) - assistant_files = client.beta.assistants.files.list( - assistant_id=assistant_id - ).data - existing_file_ids = [file.id for file in assistant_files] - new_file_ids = [get_gpt_id(path) for path in file_path_list] - file_ids = list(set(existing_file_ids + new_file_ids)) - file_ids = file_ids[-20:] - return update_assistant_by_id(assistant_id, file_ids=file_ids) - - -def delete_file_from_assistant_by_id(assistant_id: str, file_id: str) -> None: - """ - Unlink a file from an Assistant using Assistant id and file id. - - This method does NOT remove the file from OpenAI account. - """ - client.beta.assistants.files.delete( - assistant_id=assistant_id, file_id=file_id - ) - - -def delete_file_from_assistant_by_name( - assistant_name: str, file_path: str -) -> None: - """ - Unlink a file from an Assistant using Assistant name and file path. - - This method does NOT remove the file from OpenAI account. - """ - gpt_id = get_gpt_id(file_path) - assistant_id = get_assistant_id_by_name(assistant_name) - delete_file_from_assistant_by_id(assistant_id, gpt_id) - - -# ############################################################################# -# Create Thread and Message from user input -# ############################################################################# - - -def create_thread() -> str: - message_thread = client.beta.threads.create() - return message_thread.id - - -def create_message_on_thread( - thread_id: str, content: str, file_ids: List[str] -) -> str: - """ - Create a message on a thread, then link files to the message using file id. - - Files linked to a message can only be used by ChatGPT in the thread - that holds this message. - """ - if not content: - _LOG.error( - "Message content must not be empty. This will cause an OpenAI error." - ) - if file_ids: - message = client.beta.threads.messages.create( - thread_id=thread_id, - role="user", - content=content, - file_ids=file_ids, - ) - else: - message = client.beta.threads.messages.create( - thread_id=thread_id, - role="user", - content=content, - ) - return message.id - - -def create_message_on_thread_with_file_names( - thread_id: str, content: str, file_names: List[str] -) -> str: - """ - Create a message on a thread, then link files to the message using file - name. - - Files linked to a message can only be used by ChatGPT in the thread - that holds this message. - """ - if file_names: - file_ids = [get_gpt_id(file) for file in file_names] - else: - file_ids = [] - return create_message_on_thread(thread_id, content, file_ids) - - -# ############################################################################# -# Run thread on certain assistant -# ############################################################################# - - -def run_thread_on_assistant(assistant_id, thread_id, model: str = "") -> str: - """ - Run a thread on a given Assistant id. - - This is similar to sending a message to ChatGPT. - """ - if model: - run = client.beta.threads.runs.create( - thread_id=thread_id, assistant_id=assistant_id, model=model - ) - else: - run = client.beta.threads.runs.create( - thread_id=thread_id, assistant_id=assistant_id - ) - return run.id - - -def run_thread_on_assistant_by_name( - assistant_name: str, thread_id: str, model: str = "" -) -> str: - """ - Run a thread on a given Assistant name. - - This is similar to sending a message to ChatGPT. - """ - assistant_id = get_assistant_id_by_name(assistant_name) - if model: - return run_thread_on_assistant(assistant_id, thread_id, model) - else: - return run_thread_on_assistant(assistant_id, thread_id) - - -def wait_for_run_result(thread_id: str, run_id: str, timeout: int = 180) -> List: - """ - Wait for the thread to be processed. - - This is similar to waiting for ChatGPT's typing. - """ - finished = False - _LOG.info("Waiting for chatgpt response...") - for i in range(math.ceil(timeout / 5)): - _LOG.info("%s/%s seconds before timeout", i * 5, timeout) - time.sleep(5) - run = client.beta.threads.runs.retrieve( - thread_id=thread_id, run_id=run_id - ) - finished = run.status == "completed" - if finished: - break - if not finished: - raise TimeoutError("Failed to retrieve response from OpenAI.") - messages = client.beta.threads.messages.list(thread_id=thread_id).data - return messages - - -# ############################################################################# -# ChatGPT runner -# ############################################################################# - - -def e2e_assistant_runner( - assistant_name: str, - user_input: str = "", - *, - model: str = "", - input_file_names: Optional[List[str]] = None, - output_file_path: str = "", - vim_mode: bool = False, -) -> str: - """ - Send a message with files to an Assistant and wait for its reply. - - :param assistant_name: Assistant that should process this message - :param user_input: message to be sent to ChatGPT assistant - :param model: change the GPT model used by the assistant, no change - when empty this WILL update the configuration of the assistant - :param input_file_names: files to be used in this conversation - :param output_file_path: redirect ChatGPT's output to the given file - :param vim_mode: if True, take input from stdin and output to stdout - forcely - """ - if input_file_names is None: - input_file_names = [] - if not assistant_name: - _LOG.error("No Assistant name provided.") - return "" - if vim_mode: - user_input = "".join(sys.stdin.readlines()) - thread_id = create_thread() - create_message_on_thread_with_file_names( - thread_id, user_input, input_file_names - ) - if model: - run_id = run_thread_on_assistant_by_name( - assistant_name, thread_id, model - ) - else: - run_id = run_thread_on_assistant_by_name(assistant_name, thread_id) - messages = wait_for_run_result(thread_id, run_id) - output = messages[0].content[0].text.value - if vim_mode or not output_file_path: - sys.stdout.write(output) - if output_file_path: - with open(output_file_path, "w", encoding="utf-8") as fp: - fp.write(output) - return output diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py deleted file mode 100644 index 18ce63d7d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hchatgpt_instructions.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -Import as: - -import helpers.hchatgpt_instructions as hchainst -""" - -instructions = { - "MarkdownLinter": """ -You are a markdown linter. -If you are given a piece of text under markdown format, treat these text as the -content of the markdown content you need to lint. -If you are given a filename, you should find the file in your linked files, use -it as the markdown content you need to lint. -After get the markdown content, find and fix grammatical errors in that content -with the minimum amount of changes possible and preserve the formatting. -You don't need to add periods at the end of each sentence. -You should not add ```markdown ``` around the output content. -Your only output message should be the linted result of that file, no additional -explanations should be added in your output. - """, - "DocWriter": """ -You are a documentation writer. -If you are given several python code files, try to understand these files and -how they may work. -You should write a markdown document about these files for users that have not -read the codes to know the basic workflow of them, your can use examples to show -the user how they can easily use those codes. -For the format of markdown document, you can use files linked to you as -reference. You don't need to strictly follow the format, the goal is to make the -document easy to understand - """, -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py deleted file mode 100644 index 2fd175bf4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcoverage.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Import as: - -import helpers.hcoverage as hcovera -""" - -import glob -import logging -import os -import pathlib -import site -import subprocess -import sysconfig - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def _detect_site_packages() -> pathlib.Path: - """ - Return the Path to the site-packages directory for the active interpreter. - - - Try sysconfig first - - Fall back to site.getsitepackages() or user-site. - """ - try: - purelib = sysconfig.get_path("purelib") - if purelib: - return pathlib.Path(purelib) - except (KeyError, IOError): - _LOG.debug( - "sysconfig.get_path('purelib') failed, falling back to site packages" - ) - try: - sp_dirs = site.getsitepackages() - except AttributeError: - sp_dirs = [] - for d in sp_dirs: - if "site-packages" in d: - return pathlib.Path(d) - return pathlib.Path(site.getusersitepackages()) - - -def inject(coveragerc: str = ".coveragerc") -> None: - """ - Install the coverage startup hook into this env site-packages. - """ - rc = pathlib.Path(coveragerc).resolve() - os.environ["COVERAGE_PROCESS_START"] = str(rc) - _LOG.debug("Set COVERAGE_PROCESS_START to %s", rc) - sp = _detect_site_packages() - target = sp / "coverage.pth" - hook_line = "import coverage; coverage.process_startup()" - cmd = f'echo "{hook_line}" | sudo tee "{target}" > /dev/null' - try: - hsystem.system(cmd) - _LOG.debug("Installed coverage hook to %s via sudo tee", target) - except (OSError, subprocess.SubprocessError) as e: - hdbg.dassert(False, f"Failed to install coverage hook via sudo tee: {e}") - - -def remove() -> None: - """ - Remove the coverage startup hook from this env site-packages. - """ - sp = _detect_site_packages() - target = sp / "coverage.pth" - if target.is_file(): - cmd = f'sudo rm -f "{target}"' - try: - hsystem.system(cmd) - _LOG.info("Removed coverage hook from %s via sudo rm", target) - except Exception as e: - _LOG.error("Failed to remove coverage hook via sudo rm: %s", e) - raise - else: - # TODO(Maddy): Is this acceptable? - _LOG.warning("No coverage.pth found in %s", sp) - # Remove coverage environment variables. - try: - if "COVERAGE_PROCESS_START" in os.environ: - del os.environ["COVERAGE_PROCESS_START"] - _LOG.info("Removed COVERAGE_PROCESS_START from environment") - else: - _LOG.debug("COVERAGE_PROCESS_START not found in environment") - except Exception as e: - _LOG.error("Failed to remove COVERAGE_PROCESS_START: %s", e) - raise - - -def generate_coverage_dockerfile() -> str: - """ - Build a Dockerfile string that appends coverage support. - """ - # This requires to: - # - Install coverage, pytest, pytest-cov at build time - # - Create /coverage_data and writes .coveragerc - # - Set ENV COVERAGE_PROCESS_START to /coverage_data/.coveragerc - # - Write a coverage.pth into site-packages so coverage auto-starts - txt = """ - # Install coverage and testing dependencies. - RUN pip install --no-cache-dir coverage pytest pytest-cov - - # Create coverage data directory with proper permissions. - RUN mkdir -p /app/coverage_data && chmod 777 /app/coverage_data - - # Setup coverage configuration. - COPY .coveragerc /app/coverage_data/.coveragerc - ENV COVERAGE_PROCESS_START=/app/coverage_data/.coveragerc - - # Create coverage.pth file for automatic startup. - # This ensures coverage tracking starts automatically when Python runs. - RUN python - < None: - """ - Execute shell commands to run coverage steps in a Docker container. - - Assumes: - - A valid .coveragerc exists in the current working directory. - - coverage_data/ is the mounted folder inside the container. - """ - commands = [ - "mkdir -p coverage_data", - "chmod 777 coverage_data", - "cp .coveragerc coverage_data/.coveragerc", - "chmod 644 coverage_data/.coveragerc", - ] - for cmd in commands: - hsystem.system(cmd, suppress_output=False) - - -def coverage_combine() -> None: - """ - Execute shell commands to combine coverage data. - - Assumes: - - .coverage.* files are present in the current directory or coverage_data/. - """ - # Check if there are any coverage files in coverage_data/ and copy them. - if os.path.exists("coverage_data"): - coverage_files_cmd = ( - "find coverage_data -name '.coverage.*' 2>/dev/null | wc -l" - ) - rc = hsystem.system(coverage_files_cmd, abort_on_error=False) - if rc == 0: - # Use a simple existence check instead of parsing command output. - coverage_files = glob.glob("coverage_data/.coverage.*") - if coverage_files: - _LOG.info( - "Found coverage files in coverage_data/, copying to current directory" - ) - commands = [ - "cp coverage_data/.coverage.* . 2>/dev/null || true", - "rm -rf coverage_data/.coverage.* 2>/dev/null || true", - ] - for cmd in commands: - hsystem.system(cmd, suppress_output=False) - # Check if there are any .coverage.* files to combine. - coverage_files = glob.glob(".coverage.*") - num_files = len(coverage_files) - if num_files > 0: - _LOG.info("Found %d coverage data files to combine", num_files) - commands = [ - "coverage combine", - "coverage report --skip-empty", - ] - for cmd in commands: - hsystem.system(cmd, suppress_output=False) - else: - _LOG.warning("No .coverage.* files found to combine") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py deleted file mode 100644 index 6c64659c0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hcsv.py +++ /dev/null @@ -1,365 +0,0 @@ -""" -Import as: - -import helpers.hcsv as hcsv -""" - -import ast -import logging -import os -from typing import Any, Callable, Dict, List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hs3 as hs3 - -_LOG = logging.getLogger(__name__) - - -def _append_csv( - df: pd.DataFrame, path: str, *, index: bool = False, **kwargs: Any -) -> None: - """ - Append a df to the CSV file `path` without header. - """ - with open(path, "a") as f: - df.to_csv(f, header=False, index=index, **kwargs) - - -def _read_csv_range( - csv_path: str, from_: int, to: int, **kwargs: Any -) -> pd.DataFrame: - """ - Read a specified row range of a CSV file and convert to a DataFrame. - - This function: - - assumes the CSV file to have header, considered to be row 0. - - reads [from_, to), e.g., (to - from_) lines following list slicing semantics. - - :param csv_path: location of CSV file - :param from_: first line to read (header is row 0 and is always read) - :param to: last line to read, not inclusive - :return: DataFrame with columns from CSV line 0 (header) - """ - hdbg.dassert_lt(0, from_, msg="Row 0 assumed to be header row") - hdbg.dassert_lt(from_, to, msg="Empty range requested!") - skiprows = list(range(1, from_)) - nrows = to - from_ - df = pd.read_csv(csv_path, skiprows=skiprows, nrows=nrows, **kwargs) - if df.shape[0] < to: - _LOG.warning("Number of df rows = %i vs requested = %i", df.shape[0], to) - return df - - -# TODO(gp): There is no use of this function. -def build_chunk( - csv_path: str, - col_name: str, - start: int, - *, - nrows_at_a_time: int = 1000, - **kwargs: Any, -) -> pd.DataFrame: - """ - Build a DataFrame from a CSV subset as follows: - - - Names the columns using the header line (row 0) - - Reads the value in (row, col) coordinates (`start`, `col_name`) (if it - exists) as `value` - - Adds row `start` and all subsequent contiguous rows with `value` in - column `col_name` - - For memory efficiency, the CSV is processed in chunks of size `nrows_at_a_time`. - - :param csv_path: location of CSV file - :param col_name: name of column whose values define chunks - :param start: first row to process - :param nrows_at_a_time: size of chunks to process - :return: DataFrame with columns from CSV line 0 - """ - hdbg.dassert_lt(0, start) - stop = False - dfs: List[pd.DataFrame] = [] - init_df = _read_csv_range(csv_path, start, start + 1, **kwargs) - if init_df.shape[0] < 1: - return init_df - val = init_df[col_name].iloc[0] - _LOG.debug("Building chunk for %s", val) - counter = 0 - while not stop: - from_ = start + counter * nrows_at_a_time - df = _read_csv_range(csv_path, from_, from_ + nrows_at_a_time) - # Break if there are no matches. - if df.shape[0] == 0: - break - if not (df[col_name] == val).any(): - break - # Stop if we have run out of rows to read. - if df.shape[0] < nrows_at_a_time: - stop = True - idx_max = (df[col_name] == val)[::-1].idxmax() - # Stop if we have reached a new value. - if idx_max < (df.shape[0] - 1): - stop = True - dfs.append(df.iloc[0 : idx_max + 1]) - counter += 1 - if not dfs: - return pd.DataFrame() - return pd.concat(dfs, axis=0).reset_index(drop=True) - - -# TODO(gp): There is no use of this function. -def find_first_matching_row( - csv_path: str, - col_name: str, - val: str, - *, - start: int = 1, - nrows_at_a_time: int = 1000000, - **kwargs: Any, -) -> Optional[int]: - """ - Find first row in CSV where value in column `col_name` equals `val`. - - :param csv_path: location of CSV file - :param col_name: name of column whose values define chunks - :param val: value to match on - :param start: first row (inclusive) to start search on - :param nrows_at_a_time: size of chunks to process - :return: line in CSV of first matching row at or past start - """ - curr = start - while True: - _LOG.debug("Start of current chunk = line %i", curr) - df = _read_csv_range(csv_path, curr, curr + nrows_at_a_time, **kwargs) - if df.shape[0] < 1: - _LOG.info("Value %s not found", val) - break - matches = df[col_name] == val - if matches.any(): - idx_max = matches.idxmax() - return int(curr + idx_max) - curr += nrows_at_a_time - return None - - -# ############################################################################# -# CSV to PQ conversion -# ############################################################################# - - -def _csv_mapreduce( - csv_path: str, - out_dir: str, - key_func: Callable, - chunk_preprocessor: Optional[Callable], - *, - chunk_size: int = 1000000, -) -> None: - """ - Map-reduce-type processing of CSV. - - The phases are: - - Read the CSV in chunks as DataFrame - - Key each row of the DataFrame using a `groupby` - - "Reduce" keyed groups by writing and appending to a CSV - - :param csv_path: input CSV path - :param out_dir: output dir for CSV with filenames corresponding to keys - :param key_func: function to apply to each chunk DataFrame to key rows - Should return an iterable with elements like (key, df) - :param chunk_preprocessor: function to apply to each chunk DataFrame before - applying key_func - :param chunk_size: chunk_size of input to process - """ - # Read CSV data in chunks. - chunks = pd.read_csv(csv_path, chunksize=chunk_size) - # Preprocess chunk, if needed. - if chunk_preprocessor is not None: - chunks = map(chunk_preprocessor, chunks) - # Apply key_func to each chunk. - keyed_group_blocks = map(key_func, chunks) - # Append results. - for block in keyed_group_blocks: - for idx, df in block: - file_name = os.path.join(out_dir, idx + ".csv") - _append_csv(df, file_name) - - -def convert_csv_to_pq( - csv_path: str, - pq_path: str, - *, - normalizer: Optional[Callable] = None, - header: Optional[int] = 0, - compression: Optional[str] = "gzip", -) -> None: - """ - Convert CSV file to Parquet file. - - Output of `csv_map_reduce()` is typically header-less to support append mode, - and so `normalizer` may be used to add appropriate headers. Note that Parquet - requires string column names, whereas Pandas by default uses integer column - names. - - :param csv_path: full path of CSV - :param pq_path: full path of parquet - :param header: header specification of CSV - :param normalizer: function to apply to df before writing to PQ - """ - df = pd.read_csv(csv_path, header=header) - # TODO(Paul): Ensure that one of header, normalizer is not None. - if normalizer is not None: - df = normalizer(df) - df.to_parquet(pq_path, compression=compression) - - -def convert_csv_dir_to_pq_dir( - csv_dir: str, - pq_dir: str, - *, - normalizer: Optional[Callable] = None, - header: Optional[int] = None, -) -> None: - """ - Apply `convert_csv_to_pq()` to all files in `csv_dir`. - - :param csv_dir: directory storing CSV files on S3 or local - :param pq_dir: target directory to save PQ files (only local - filesystem) - :param header: header specification of CSV - :param normalizer: function to apply to df before writing to PQ - """ - # Get the filenames in `csv_dir`. - if hs3.is_s3_path(csv_dir): - # TODO(gp): Pass aws_profile. - s3fs = hs3.get_s3fs("am") - filenames = s3fs.ls(csv_dir) - else: - # Local filesystem. - hdbg.dassert_dir_exists(csv_dir) - # TODO(Paul): check .endswith(".csv") or do glob(csv_dir + "/*.csv") - filenames = os.listdir(csv_dir) - hdbg.dassert(filenames, "No files in the directory '%s'", csv_dir) - # Process all the filenames. - # TODO(gp): Add tqdm. - # TODO(gp): Consider parallelizing. - for filename in filenames: - # Remove .csv/.csv.gz. - csv_stem = hio.remove_extension( - filename, ".csv", check_file_exists=True, check_has_extension=False - ) - if csv_stem is None: - csv_stem = hio.remove_extension( - filename, - ".csv.gz", - check_file_exists=True, - check_has_extension=False, - ) - if csv_stem is None: - _LOG.warning( - "Skipping filename=%s since it has invalid extension", csv_stem - ) - continue - # Convert file to PQ. - pq_filename = csv_stem + ".pq" - convert_csv_to_pq( - os.path.join(csv_dir, filename), - os.path.join(pq_dir, pq_filename), - normalizer=normalizer, - header=header, - ) - - -# ############################################################################# -# CSV-JSON dict conversion -# ############################################################################# - - -# TODO(gp): convert_csv_to_json_dict? -# TODO(gp): path_to_csv -> file_name -def convert_csv_to_dict(path_to_csv: str, remove_nans: bool) -> Dict[Any, Any]: - """ - Convert a CSV file storing a dataframe into a JSON-compatible dict. - - :param path_to_csv: path to the CSV file - :param remove_nans: whether to remove NaNs from the dictionary - :return: a JSON-compatible dict with the dataframe data - """ - hdbg.dassert_file_exists(path_to_csv) - # Load the dataframe from a CSV file. - df = pd.read_csv(path_to_csv) - # Transform the dataframe into a dict. - dict_df = df.to_dict(orient="list") - if remove_nans: - # Remove NaNs from the dict. - for key in dict_df: - dict_df[key] = [x for x in dict_df[key] if not pd.isnull(x)] - return dict_df # type: ignore - - -# TODO(gp): path_to_csv -> file_name -def save_csv_as_json( - path_to_csv: str, remove_nans: bool, path_to_json: Optional[str] = None -) -> None: - """ - Convert the df from a CSV into a dict and save it into a JSON file. - - If the `path_to_json` is not provided, the JSON is saved in the folder where - the CSV file is located. - - :param path_to_csv: path to the CSV file - :param remove_nans: whether to remove NaNs from the dictionary - :param path_to_json: path to save the JSON file - """ - # Convert the df from the CSV into a JSON-compatible dict. - dict_df = convert_csv_to_dict(path_to_csv, remove_nans) - # Determine the JSON destination path. - if path_to_json is None: - path_to_json = hio.change_filename_extension( - path_to_csv, ".csv", ".json" - ) - # Save the dict into a JSON file. - hio.to_json(path_to_json, dict_df) - - -# ############################################################################# -# CSV files with types -# ############################################################################# - - -def to_typed_csv(df: pd.DataFrame, file_name: str) -> str: - """ - Convert df into CSV and creates a file with the dtypes of columns. - - This function creates a file containing the types with the same name - and suffix e.g., `foobar.csv.types`. - """ - # Save the types. - dtypes_filename = file_name + ".types" - hio.create_enclosing_dir(dtypes_filename, incremental=True) - dtypes_dict = str(df.dtypes.apply(lambda x: x.name).to_dict()) - # Save the data. - df.to_csv(file_name, index=False) - with open(dtypes_filename, "w") as dtypes_file: - dtypes_file.write(dtypes_dict) - return dtypes_filename - - -def from_typed_csv(file_name: str) -> pd.DataFrame: - """ - Load CSV file as df applying the original types of columns. - - This function uses a file with name `file_name.types` to load - information about the column types. - """ - # Load the types. - dtypes_filename = file_name + ".types" - hdbg.dassert_path_exists(dtypes_filename) - with open(dtypes_filename) as dtypes_file: - dtypes_dict = ast.literal_eval(list(dtypes_file)[0]) - # Load the data, applying the types. - df = pd.read_csv(file_name, dtype=dtypes_dict) - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py deleted file mode 100644 index 2849dfb10..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdataframe.py +++ /dev/null @@ -1,309 +0,0 @@ -""" -Helper functions for processing pandas dataframes. - -Import as: - -import helpers.hdataframe as hdatafr -""" - -# TODO(gp): Consider merging with `helpers/pandas_helpers.py`. - -import collections -import functools -import logging -import operator -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - - -_METHOD_TO_APPLY = Dict[str, Dict[str, Any]] - - -def _combine_masks( - masks: pd.DataFrame, mode: str, info: collections.OrderedDict -) -> pd.Series: - if mode == "and": - combined_mask = masks.all(axis=1) - elif mode == "or": - combined_mask = masks.any(axis=1) - else: - raise ValueError(f"Invalid `mode`='{mode}'") - if combined_mask.sum() == 0: - _LOG.warning("No data remaining after filtering.") - info["nrows_remaining"] = combined_mask.sum() - return combined_mask - - -def filter_data_by_values( - df: pd.DataFrame, - filters: Dict[Union[int, str], Tuple[Any, ...]], - mode: str, - info: Optional[collections.OrderedDict] = None, -) -> pd.DataFrame: - """ - Filter dataframe rows based on column values. - - :param df: dataframe - :param filters: `{col_name: (possible_values)}` - :param mode: `and` for conjunction and `or` for disjunction of filters - :param info: information storage - :return: filtered dataframe - """ - if info is None: - info = collections.OrderedDict() - info["nrows"] = df.shape[0] - if not filters: - info["nrows_remaining"] = df.shape[0] - return df.copy() - # Create filter masks for each column. - masks = [] - for col_name, vals in filters.items(): - hdbg.dassert_isinstance(vals, tuple) - mask = df[col_name].isin(vals) - info[f"n_{col_name}"] = mask.sum() - info[f"perc_{col_name}"] = hprint.perc(mask.sum(), df.shape[0]) - masks.append(mask) - masks = pd.concat(masks, axis=1) - combined_mask = _combine_masks(masks, mode, info) - filtered_df = df.loc[combined_mask].copy() - return filtered_df - - -def filter_data_by_comparison( - df: pd.DataFrame, - filters: Dict[ - Union[int, str], Union[Tuple[str, Any], Tuple[Tuple[str, Any], ...]] - ], - mode: str, - info: Optional[collections.OrderedDict] = None, -) -> pd.DataFrame: - """ - Filter dataframe by comparing columns to values. - - :param df: dataframe - :param filters: `{col_name: (comparison_method, value)}` or - `{col_name: ((comparison_method_i, value_i))}`. - `comparison_method` is one of the ("eq", "ne", "le", "lt", "ge", "gt") - pandas method names. - :param mode: `and` for conjunction and `or` for disjunction of filters - :param info: information storage - :return: filtered dataframe - """ - if info is None: - info = collections.OrderedDict() - info["nrows"] = df.shape[0] - if not filters: - info["nrows_remaining"] = df.shape[0] - return df.copy() - # Create filter masks for each column. - masks = [] - for col_name, tuple_ in filters.items(): - if not isinstance(tuple_[0], tuple): - tuple_ = (tuple_,) # type: ignore - for comparison_method, val in tuple_: - hdbg.dassert_in( - comparison_method, ("eq", "ne", "le", "lt", "ge", "gt") - ) - mask = getattr(df[col_name], comparison_method)(val) - info[f"n_{col_name}_{comparison_method}_{val}"] = mask.sum() - info[f"perc_{col_name}_{comparison_method}_{val}"] = hprint.perc( - mask.sum(), df.shape[0] - ) - masks.append(mask) - masks = pd.concat(masks, axis=1) - combined_mask = _combine_masks(masks, mode, info) - filtered_df = df.loc[combined_mask].copy() - return filtered_df - - -def filter_data_by_method( - df: pd.DataFrame, - filters: Dict[Union[int, str], _METHOD_TO_APPLY], - mode: str, - info: Optional[collections.OrderedDict] = None, -) -> pd.DataFrame: - """ - Filter dataframe by calling a method specified for each column. - - :param df: dataframe - :param filters: `{col_name: {method: kwargs}}`, where `method` is the - method called on the dataframe column, e.g. "isin" or "str.contains", - and `kwargs` are the kwargs for this method - :param mode: `and` for conjunction and `or` for disjunction of filters - :param info: information storage - :return: filtered dataframe - """ - if info is None: - info = collections.OrderedDict() - info["nrows"] = df.shape[0] - if not filters: - info["nrows_remaining"] = df.shape[0] - return df.copy() - # Create filter masks for each column. - masks = [] - for col_name, method_dict in filters.items(): - for method, kwargs in method_dict.items(): - mask = operator.attrgetter(method)(df[col_name])(**kwargs) - info[f"n_{col_name}"] = mask.sum() - info[f"perc_{col_name}"] = hprint.perc(mask.sum(), df.shape[0]) - masks.append(mask) - masks = pd.concat(masks, axis=1) - combined_mask = _combine_masks(masks, mode, info) - filtered_df = df.loc[combined_mask].copy() - return filtered_df - - -# ############################################################################# - - -def apply_nan_mode( - srs: pd.Series, - mode: str = "leave_unchanged", - info: Optional[dict] = None, -) -> pd.Series: - """ - Process NaN values in a series according to the parameters. - - :param srs: pd.Series to process - :param mode: method of processing NaNs - - "leave_unchanged" - no transformation - - "drop" - drop all NaNs - - "ffill" - forward fill not leading NaNs - - "ffill_and_drop_leading" - do ffill and drop leading NaNs - - "fill_with_zero" - fill NaNs with 0 - - "strict" - raise ValueError that NaNs are detected - :param info: information storage - :return: transformed copy of input series - """ - hdbg.dassert_isinstance(srs, pd.Series) - if srs.empty: - _LOG.warning("Empty input series `%s`", srs.name) - if mode == "leave_unchanged": - res = srs.copy() - elif mode == "drop": - res = srs.dropna().copy() - elif mode == "ffill": - res = srs.ffill().copy() - elif mode == "ffill_and_drop_leading": - res = srs.ffill().dropna().copy() - elif mode == "fill_with_zero": - res = srs.fillna(0).copy() - elif mode == "strict": - res = srs.copy() - if srs.isna().any(): - raise ValueError(f"NaNs detected in mode `{mode}`") - else: - raise ValueError(f"Unrecognized mode `{mode}`") - # - if info is not None: - hdbg.dassert_isinstance(info, dict) - # Dictionary should be empty. - hdbg.dassert(not info) - info["series_name"] = srs.name - info["num_elems_before"] = len(srs) - info["num_nans_before"] = np.isnan(srs).sum() - info["num_elems_removed"] = len(srs) - len(res) - info["num_nans_imputed"] = ( - info["num_nans_before"] - info["num_elems_removed"] - ) - info["percentage_elems_removed"] = ( - 100.0 * info["num_elems_removed"] / info["num_elems_before"] - ) - info["percentage_elems_imputed"] = ( - 100.0 * info["num_nans_imputed"] / info["num_elems_before"] - ) - return res - - -@functools.lru_cache() -def compute_points_per_year_for_given_freq(freq: str) -> float: - """ - Return the number of index time points per year. - - :param freq: string identifier of date frequency - :return: number of time points per year (approximate) - """ - # `pd.date_range` breaks for zero-period frequencies, so we need to work - # around that. - try: - # Leap years: 2012, 2016. - points_in_span = pd.date_range( - freq=freq, start="2012-01-01", end="2019-12-31" - ).size - span_in_years = 8 - points_per_year: float = points_in_span / span_in_years - return points_per_year - except ZeroDivisionError: - return 0.0 - - -# ############################################################################# - - -def infer_sampling_points_per_year(df: Union[pd.Series, pd.DataFrame]) -> float: - """ - Return the number of index time points per year. - - TODO(*): Consider extending to all frequencies and count points by - explicitly building indices of the given frequency. - - :param df: series or dataframe with non-null `df.index.freq` - :return: number of time points per year (approximate) - """ - hdbg.dassert(hasattr(df.index, "freq") and df.index.freq is not None) - freq = df.index.freq - # TODO(*): Make start, end dates parameters that can be passed in. - return compute_points_per_year_for_given_freq(freq) - - -def compute_count_per_year(df: Union[pd.Series, pd.DataFrame]) -> float: - """ - Return df.count() divided by the length of `df` in years. - """ - hdbg.dassert( - hasattr(df.index, "freq") and df.index.freq is not None, - msg="`df` must have a `DatetimeIndex` with a `freq`", - ) - assert hasattr(df.index, "freq") and df.index.freq is not None - freq = df.index.freq - # Calculate the time span of `df` in years. - points_per_year = compute_points_per_year_for_given_freq(freq) - span_in_years = df.size / points_per_year - # Determine the number of non-NaN/inf/etc. data points per year. - count_per_year = df.count() / span_in_years - count_per_year = cast(float, count_per_year) - return count_per_year - - -# ############################################################################# - - -def remove_duplicates( - df: pd.DataFrame, - duplicate_columns: Optional[List[str]], - control_column: Optional[str], -) -> pd.DataFrame: - """ - Remove duplicates from DataFrame. - - :param df: DataFrame to process - :param duplicate_columns: subset of column names, None for all - :param control_column: column max value of which determines the kept - row - :return: DataFrame with removed duplicates - """ - # Fix maximum value of control column at the bottom. - if control_column: - df = df.sort_values(by=control_column) - duplicate_columns = duplicate_columns or df.columns - df = df.drop_duplicates(subset=duplicate_columns) - # Sort by index to return to original view. - df = df.sort_index() - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py deleted file mode 100644 index e63152593..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdatetime.py +++ /dev/null @@ -1,909 +0,0 @@ -""" -Import as: - -import helpers.hdatetime as hdateti -""" - -import asyncio -import calendar -import datetime -import logging -import re -from typing import Callable, Iterable, Optional, Tuple, Union - -# TODO(gp): Use hdbg.WARNING -_WARNING = "\033[33mWARNING\033[0m" - -# Avoid dependency from other `helpers` modules to prevent import cycles. - -import pandas as pd # noqa: E402 # pylint: disable=wrong-import-position - -# TODO(gp): Check if dateutils is equivalent to `pytz` or better so we can simplify -# the dependencies. -try: - import pytz -except ModuleNotFoundError: - _module = "pytz" - print(_WARNING + f": Can't find {_module}: continuing") - - -import helpers.hdbg as hdbg # noqa: E402 # pylint: disable=wrong-import-position -import helpers.hprint as hprint # noqa: E402 # pylint: disable=wrong-import-position -import helpers.hwall_clock_time as hwacltim # noqa: E402 # pylint: disable=wrong-import-position - -_LOG = logging.getLogger(__name__) - -# We use the type `Datetime` to allow flexibility in the interface exposed to client. -# The typical pattern is: -# - we call `to_datetime()`, as soon as we enter functions exposed to users, -# to convert the user-provided datetime into a `datetime.datetime` -# - we use only `datetime.datetime` in the private interfaces -# TODO(gp): In practice we are using `pd.Timestamp` -# -# It's often worth to import this file even for just the type `Datetime`, -# since typically as soon as the caller uses this type, they also want to use -# `to_datetime()` and `dassert_*()` functions. -# TODO(gp): It would be better to call this `GeneralDateTime`, `FlexibleDateTime`, -# and rename `StrictDateTime` -> `DateTime`. -Datetime = Union[str, pd.Timestamp, datetime.datetime] - -# The type `StrictDateTime` is for stricter interfaces, although it is a bit of a -# compromise. -# Either one wants to allow everything that can be interpreted as a datetime (and -# then use `Datetime`), or strict (and then use only `datetime.datetime`). -StrictDatetime = Union[pd.Timestamp, datetime.datetime] - - -def dassert_is_datetime(datetime_: Datetime) -> None: - """ - Assert that `datetime_` is of type `Datetime`. - """ - hdbg.dassert_isinstance( - datetime_, - (str, pd.Timestamp, datetime.datetime), - "datetime_='%s' of type '%s' is not a DateTimeType", - datetime_, - str(type(datetime_)), - ) - - -def dassert_is_strict_datetime(datetime_: StrictDatetime) -> None: - """ - Assert that `datetime_` is of type `StrictDatetime`. - """ - hdbg.dassert_isinstance( - datetime_, - (pd.Timestamp, datetime.datetime), - "datetime_='%s' of type '%s' is not a StrictDateTimeType", - datetime_, - str(type(datetime_)), - ) - - -def dassert_str_is_date(date: str) -> None: - """ - Check if an input string is a date. - - :param date: date as string, e.g., "20221101" - """ - hdbg.dassert_isinstance(date, str) - try: - _ = datetime.datetime.strptime(date, "%Y%m%d") - except ValueError as e: - raise ValueError(f"date='{date}' doesn't have the right format: {e}") - - -# TODO(Grisha): also pass timezone. -def to_datetime(datetime_: Datetime) -> datetime.datetime: - """ - Convert a `Datetime` into a `datetime.datetime`. - - :return: tz-aware or naive datetime.datetime - """ - dassert_is_datetime(datetime_) - if isinstance(datetime_, str): - datetime_ = pd.Timestamp(datetime_) - if isinstance(datetime_, pd.Timestamp): - datetime_ = datetime_.to_pydatetime() - return datetime_ # type: ignore - - -def to_timestamp(datetime_: Datetime) -> pd.Timestamp: - """ - Convert a `Datetime` into a `pd.Timestamp`. - - :return: tz-aware or naive datetime.datetime - """ - dassert_is_datetime(datetime_) - timestamp = pd.Timestamp(datetime_) - return timestamp - - -# //////////////////////////////////////////////////////////////////////////////////O - - -def dassert_is_tz_naive(datetime_: StrictDatetime) -> None: - """ - Assert that the passed timestamp is tz-naive, i.e., doesn't have timezone - info. - """ - hdbg.dassert_is( - datetime_.tzinfo, None, "datetime_='%s' is not tz naive", datetime_ - ) - - -def dassert_has_tz(datetime_: StrictDatetime) -> None: - """ - Assert that the passed timestamp has timezone info. - """ - hdbg.dassert_is_not( - datetime_.tzinfo, - None, - "datetime_='%s' doesn't have timezone info", - datetime_, - ) - - -def dassert_has_specified_tz( - datetime_: StrictDatetime, tz_zones: Iterable[str] -) -> None: - """ - Assert that the passed timestamp has the timezone passed in `tz_zones`. - """ - # Make sure that the passed timestamp has timezone information. - dassert_has_tz(datetime_) - # Get the timezone. - tz_info = datetime_.tzinfo - # Unlike other timezones UTC is a `datetime.timezone` object not a - # `pytz.tzfile`. See CmTask5895 for details. - if ( - isinstance(tz_info, datetime.timezone) - and tz_info == datetime.timezone.utc - ): - tz_zone = "UTC" - else: - tz_zone = tz_info.zone # type: ignore - has_expected_tz = tz_zone in tz_zones - hdbg.dassert( - has_expected_tz, - "datetime_=%s (type=%s) tz_info=%s tz_info.zone=%s instead of tz_zones=%s", - datetime_, - type(datetime_), - tz_info, - tz_zone, - tz_zones, - ) - - -def dassert_has_UTC_tz(datetime_: StrictDatetime) -> None: - """ - Assert that the passed timestamp is UTC. - """ - tz_zones = (pytz.timezone("UTC").zone,) - dassert_has_specified_tz(datetime_, tz_zones) - - -def dassert_has_ET_tz(datetime_: StrictDatetime) -> None: - """ - Assert that the passed timestamp is Eastern Time (ET). - """ - tz_zones = ( - pytz.timezone("US/Eastern").zone, - pytz.timezone("America/New_York").zone, - ) - dassert_has_specified_tz(datetime_, tz_zones) - - -def dassert_tz_compatible( - datetime1: StrictDatetime, datetime2: StrictDatetime -) -> None: - """ - Assert that two timestamps are both naive or both have timezone info. - """ - dassert_is_strict_datetime(datetime1) - dassert_is_strict_datetime(datetime2) - has_tz1 = datetime1.tzinfo is not None - has_tz2 = datetime2.tzinfo is not None - hdbg.dassert_eq( - has_tz1, - has_tz2, - "datetime1='%s' and datetime2='%s' are not compatible", - str(datetime1), - str(datetime2), - ) - - -def dassert_have_same_tz( - datetime1: StrictDatetime, datetime2: StrictDatetime -) -> None: - """ - Assert that both timestamps have the same tz. - - The timezones are compared regardless of a DST mode. - """ - dassert_tz_compatible(datetime1, datetime2) - # Convert to string to remove DST mode info. - tz1_as_str = str(datetime1.tzinfo) - tz2_as_str = str(datetime2.tzinfo) - hdbg.dassert_eq( - tz1_as_str, - tz2_as_str, - "datetime1=%s (datetime1.tzinfo=%s) datetime2=%s (datetime2.tzinfo=%s) ", - datetime1, - tz1_as_str, - datetime2, - tz2_as_str, - ) - - -# TODO(gp): Replace this check with compatibility between series vs scalar. -# def dassert_srs_tz_compatible( -# def dassert_srs_has_tz -# def dassert_srs_is_tz_naive -def dassert_tz_compatible_timestamp_with_df( - datetime_: StrictDatetime, - df: pd.DataFrame, - col_name: Optional[str], -) -> None: - """ - Assert that timestamp and a df column are both naive or both have timezone - info. - - :param col_name: col_name. `None` represents the index. - """ - dassert_is_strict_datetime(datetime_) - hdbg.dassert_isinstance(df, pd.DataFrame) - if df.empty: - return - if col_name is None: - # We assume that the first element in the index is representative. - df_datetime = df.index[0] - else: - hdbg.dassert_in(col_name, df.columns) - df_datetime = df[col_name].iloc[0] - dassert_tz_compatible(df_datetime, datetime_) - - -# //////////////////////////////////////////////////////////////////////////////////O - - -def dassert_is_valid_timestamp(timestamp: Optional[pd.Timestamp]) -> None: - """ - Assert that a timestamp is `None` or a `pd.Timestamp` with timezone. - """ - if timestamp is not None: - hdbg.dassert_isinstance(timestamp, pd.Timestamp) - dassert_has_tz(timestamp) - - -def dassert_timestamp_lte( - start_timestamp: Optional[pd.Timestamp], - end_timestamp: Optional[pd.Timestamp], -) -> None: - dassert_is_valid_timestamp(start_timestamp) - dassert_is_valid_timestamp(end_timestamp) - if start_timestamp is not None and end_timestamp is not None: - hdbg.dassert_lte(start_timestamp, end_timestamp) - - -def dassert_timestamp_lt( - start_timestamp: Optional[pd.Timestamp], - end_timestamp: Optional[pd.Timestamp], -) -> None: - dassert_is_valid_timestamp(start_timestamp) - dassert_is_valid_timestamp(end_timestamp) - if start_timestamp is not None and end_timestamp is not None: - hdbg.dassert_lt(start_timestamp, end_timestamp) - - -def dassert_is_valid_interval( - start_timestamp: Optional[pd.Timestamp], - end_timestamp: Optional[pd.Timestamp], - left_close: bool, - right_close: bool, -) -> None: - """ - Assert that an interval has valid start and end timestamps. - """ - _LOG.debug( - hprint.to_str("start_timestamp end_timestamp left_close right_close") - ) - dassert_is_valid_timestamp(start_timestamp) - dassert_is_valid_timestamp(end_timestamp) - # Check the requested interval. - if start_timestamp is not None and end_timestamp is not None: - if left_close and right_close: - # If they are both closed, an interval like [a, a] makes sense, - # otherwise it doesn't. - hdbg.dassert_lte(start_timestamp, end_timestamp) - else: - hdbg.dassert_lt(start_timestamp, end_timestamp) - - -# ############################################################################# - - -def get_UTC_tz() -> datetime.tzinfo: - """ - Return the UTC timezone. - """ - return pytz.timezone("UTC") - - -def get_ET_tz() -> datetime.tzinfo: - """ - Return the US Eastern Time timezone. - """ - # TODO(Grisha): -> `US/Eastern`? - # It appears that "America/New_York" is to be preferred over "US/Eastern". - # https://www.iana.org/time-zones - # https://en.wikipedia.org/wiki/Tz_database - return pytz.timezone("America/New_York") - - -# Function returning the current (true, replayed, simulated) wall-clock time as a -# timestamp. -# TODO(gp): maybe GetWallClockTimeFunc is better to clarify that this is a function -# and not time. We often pass -GetWallClockTime = Callable[[], pd.Timestamp] - - -# TODO(gp): -> get_wall_clock_time -# TODO(gp): tz -> tz_mode since we are not passing neither a timezone or a -# timezone_as_str. -def get_current_time( - tz: str, - # TODO(gp): Add * - # *, - event_loop: Optional[asyncio.AbstractEventLoop] = None, -) -> pd.Timestamp: - """ - Return current time in UTC / ET timezone or as a naive time. - - This should be the only way to get the current wall-clock time, - since it handles both wall-clock time and "simulated" wall-clock - time through asyncio. - - :param tz: how to represent the returned time (e.g., "UTC", "ET", - "naive") - """ - if event_loop is not None: - # We accept only `hasyncio.EventLoop` here. If we are using standard asyncio - # EventLoop we rely on wall-clock time instead of `loop.time()`. - hdbg.dassert_isinstance(event_loop, asyncio.AbstractEventLoop) - hdbg.dassert(hasattr(event_loop, "get_current_time")) - timestamp = event_loop.get_current_time() - else: - # Use true real-time. - timestamp = datetime.datetime.utcnow() - # Convert it into the right - timestamp = pd.Timestamp(timestamp, tz=get_UTC_tz()) - if tz == "UTC": - pass - elif tz == "ET": - timestamp = timestamp.tz_convert(get_ET_tz()) - elif tz == "naive_UTC": - timestamp = timestamp.replace(tzinfo=None) - elif tz == "naive_ET": - timestamp = timestamp.tz_convert(get_ET_tz()) - timestamp = timestamp.replace(tzinfo=None) - else: - raise ValueError(f"Invalid tz='{tz}'") - return timestamp - - -def get_current_timestamp_as_string(tz: str) -> str: - """ - Return the current time in the format `YYYYMMDD_HHMMSS` (e.g., - 20210728_221734). - - Note that no information about the timezone is returned. Thus the - same time corresponds to `20210728_171749` for tz="ET" and - `20210728_221749` for tz="UTC". - """ - timestamp = get_current_time(tz) - ret = timestamp.strftime("%Y%m%d-%H%M%S") - return ret - - -def get_current_date_as_string(tz: str) -> str: - """ - Return the current date in the format `YYYYMMDD` (e.g., 20210728). - """ - timestamp = get_current_time(tz) - ret = timestamp.strftime("%Y%m%d") - return ret - - -# ############################################################################# -# Bar-related utilities -# ############################################################################# - - -def convert_seconds_to_minutes(num_secs: int) -> int: - hdbg.dassert_lt(0, num_secs) - hdbg.dassert_eq( - num_secs % 60, - 0, - "num_secs=%s is not an integer number of minutes", - num_secs, - ) - num_mins = int(num_secs / 60) - hdbg.dassert_lt(0, num_mins) - _LOG.debug(hprint.to_str("num_secs num_mins")) - return num_mins - - -# TODO(Dan): Unit test. -def convert_seconds_to_pandas_minutes(val: int) -> str: - """ - Convert a number of seconds to its Pandas delay representation in minutes. - - E.g. 300 -> '5T' - - :param val: number of seconds to convert - :return: Pandas delay representation - """ - res = convert_seconds_to_minutes(val) - res = f"{res}T" - return res - - -def convert_minutes_to_seconds(num_minutes: int) -> int: - """ - Convert minutes to seconds. - - E.g., 5 (minutes) -> 300 (seconds). - - :param num_minutes: the number of minutes to convert - :return: the number of seconds - """ - hdbg.dassert_isinstance(num_minutes, int) - hdbg.dassert_lt(0, num_minutes) - num_seconds = num_minutes * 60 - _LOG.debug(hprint.to_str("num_minutes num_seconds")) - return num_seconds - - -# TODO(gp): bar_duration_in_secs -> bar_{length,period}_in_secs -def find_bar_timestamp( - current_timestamp: pd.Timestamp, - bar_duration_in_secs: int, - *, - mode: str = "round", - max_distance_in_secs: int = 10, -) -> pd.Timestamp: - """ - Compute the bar (a, b] with period `bar_duration_in_secs` including - `current_timestamp`. - - :param current_timestamp: current timestamp - :param bar_duration_in_secs: bar duration in seconds - :param mode: how to compute the bar - - `round`: snap to the closest bar extreme - - `floor`: pick timestamp to the bar that includes it, returning the lower - bound. E.g., For `9:13am` and 5 mins bars returns `9:10am` - :param max_distance_in_secs: number of seconds representing the maximal distance - that it's allowed from the start of the bar - """ - _LOG.debug( - hprint.to_str( - "current_timestamp bar_duration_in_secs mode max_distance_in_secs" - ) - ) - hdbg.dassert_isinstance(current_timestamp, pd.Timestamp) - # Align. - reference_timestamp = f"{bar_duration_in_secs}S" - if mode == "round": - bar_timestamp = current_timestamp.round(reference_timestamp) - elif mode == "floor": - bar_timestamp = current_timestamp.floor(reference_timestamp) - hdbg.dassert_lte(bar_timestamp, current_timestamp) - else: - raise ValueError(f"Invalid mode='{mode}'") - _LOG.debug( - hprint.to_str("current_timestamp bar_duration_in_secs bar_timestamp") - ) - # Sanity check. - if mode == "round": - hdbg.dassert_lte(1, max_distance_in_secs) - if bar_timestamp >= current_timestamp: - distance_in_secs = (bar_timestamp - current_timestamp).seconds - else: - distance_in_secs = (current_timestamp - bar_timestamp).seconds - hdbg.dassert_lte(0, distance_in_secs) - hdbg.dassert_lte( - distance_in_secs, - max_distance_in_secs, - "current_timestamp=%s is too distant from bar_timestamp=%s", - current_timestamp, - bar_timestamp, - ) - _LOG.debug(hprint.to_str("bar_timestamp")) - return bar_timestamp - - -# This can't go in `helpers.hwall_clock_time` since it has a dependency from -# `find_bar_timestamp()` and might introduce an import loop. -def set_current_bar_timestamp( - current_timestamp: pd.Timestamp, - bar_duration_in_secs: int, -) -> None: - """ - Compute the current bar by snapping the current timestamp to the grid. - """ - mode = "round" - # E.g., `current_timestamp` is 09:26 and the next bar is at 09:30, so - # the distance is 4 minutes, i.e. max distance should be within a bar's - # length. - max_distance_in_secs = bar_duration_in_secs - bar_timestamp = find_bar_timestamp( - current_timestamp, - bar_duration_in_secs, - mode=mode, - max_distance_in_secs=max_distance_in_secs, - ) - _LOG.debug(hprint.to_str("current_timestamp bar_timestamp")) - hwacltim.set_current_bar_timestamp(bar_timestamp) - - -# ############################################################################# - - -def str_to_timestamp( - timestamp_as_str: str, tz: str, *, datetime_format: Optional[str] = None -) -> pd.Timestamp: - """ - Convert timestamp as string to `pd.Timestamp`. - - Localize input time to the specified timezone. - - E.g., `timestamp_as_str = "20230523_150513"`: - - `tz = "UTC"` -> "2023-05-23 15:05:13+0000" - - `tz = "US/Eastern"` -> "2023-05-23 15:05:13-0400" - - :param timestamp_as_str: string datetime (e.g., 20230523_150513) - :param tz: timezone info (e.g., "US/Eastern") - :param datetime_format: datetime format (e.g., %Y%m%d_%H%M%S) - If None, infer automatically - :return: pd.Timestamp with a specified timezone - """ - hdbg.dassert_isinstance(timestamp_as_str, str) - hdbg.dassert_isinstance(tz, str) - msg = "timestamp_as_str must be nonempty." - hdbg.dassert_is_not(timestamp_as_str, "", msg=msg) - _LOG.debug(hprint.to_str("timestamp_as_str tz datetime_format")) - if datetime_format is None: - # Try to infer the format automatically. - timestamp = pd.to_datetime(timestamp_as_str, infer_datetime_format=True) - else: - # Convert using the provided format. - timestamp = pd.to_datetime(timestamp_as_str, format=datetime_format) - # Convert to the specified timezone - timestamp = timestamp.tz_localize(tz) - return timestamp - - -def _handle_incorrect_conversions( - date: str, -) -> Optional[Tuple[Optional[str], Callable[[str], str]]]: - """ - Change data pre-processing for cases when `pd.to_datetime` is mistaken. - - :param date: string date - :return: date format and a function to apply to string dates before - passing them into `pd.to_datetime()` - """ - if len(date) in [7, 8]: - # "2021-M2" is transformed to '2020-01-01 00:00:01' by - # `pd.to_datetime`. - if date[:4].isdigit() and date[4] in ["-", ".", "/"] and date[5] == "M": - - def modify_monthly_date(x: str) -> str: - year_number = int(x[:4]) - month_number = x[6:] - num_days_in_month = calendar.monthrange( - year_number, int(month_number) - )[1] - modified_x = f"{x[:4]}-{month_number}-{num_days_in_month}" - return modified_x - - return "%Y-%m-%d", modify_monthly_date - return None - - -def _shift_to_period_end( # pylint: disable=too-many-return-statements - date: str, -) -> Optional[Callable[[StrictDatetime], StrictDatetime]]: - """ - Get function to shift the dates to the end of period. - - :param date: string date - :return: a function to shift the dates to the end of period. If `None`, no - shift is needed - """ - - def shift_to_month_end(x: StrictDatetime) -> StrictDatetime: - return x + pd.offsets.MonthEnd(0) - - def shift_to_quarter_end(x: StrictDatetime) -> StrictDatetime: - return x + pd.offsets.QuarterEnd(0) - - def shift_to_year_end(x: StrictDatetime) -> StrictDatetime: - return x + pd.offsets.YearEnd(0) - - if date[:4].isdigit(): - if len(date) == 7: - if date[5:].isdigit(): - # "2020-12" format. - return shift_to_month_end - if date[5] == "Q": - # "2021-Q1" format. - return shift_to_quarter_end - elif len(date) == 6: - # "2021Q1" format. - if date[4] == "Q": - return shift_to_quarter_end - elif len(date) == 4: - # "2021" format. - return shift_to_year_end - # "September 2020" or "Sep 2020" format. - # Get a flat list of month aliases. The full month name comes first. - # Since the `calendar` is using the natural month order, we need to - # shift the month aliases by one to get the correct order. - # E.g., `calendar.month_name[1:]` is `['January', 'February', ...]` and - # `calendar.month_abbr[1:]` is `['Jan', 'Feb', ...]`. - month_aliases = list(calendar.month_name[1:]) + list(calendar.month_abbr[1:]) - pattern = re.compile("|".join(month_aliases), re.IGNORECASE) - match = pattern.search(date) - if match is None: - return None - span = match.span() - date_without_month = f"{date[: span[0]]}{date[span[1] :]}".strip() - if len(date_without_month) == 4 and date_without_month.isdigit(): - return shift_to_month_end - return None - - -def _determine_date_format( - date: str, date_standard: Optional[str] = None -) -> Optional[Tuple[str, Callable[[str], str]]]: - """ - Determine date format for cases when `pd.to_datetime` fails. - - :param date: date string - :param date_standard: "standard" or "ISO_8601", `None` defaults to - "standard" - :return: date format and a function to transform date strings before - converting them to datetime using `pd.to_datetime` - """ - date_standard = date_standard or "standard" - if date_standard == "standard": - year_format = "%Y" - week_format = "%W" - day_of_week_format = "%w" - elif date_standard == "ISO_8601": - year_format = "%G" - week_format = "%V" - day_of_week_format = "%u" - else: - raise ValueError(f"Invalid `date_standard`='{date_standard}'") - # Determine format and original `date` modification function. - format_ = "" - if date[:4].isdigit(): - format_ += year_format - elif date[0] == "Q" and len(date) == 7 and date[-4:].isdigit(): - # "Q1 2020" format. - - def modify_quarterly_data(x: str) -> str: - year_number = x[-4:] - quarter = int(x[1:2]) - last_month_of_quarter = 3 * quarter - last_day_of_quarter = calendar.monthrange( - int(year_number), last_month_of_quarter - )[1] - modified_x = ( - f"{year_number}-{last_month_of_quarter}-{last_day_of_quarter}" - ) - return modified_x - - format_ = f"{year_format}-%m-%d" - return format_, modify_quarterly_data - else: - _LOG.error("This format is not supported: '%s'", date) - return None - next_char = date[4] - if next_char in ["-", ".", "/", " "]: - if len(date) not in [7, 8]: - _LOG.error("This format is not supported: '%s'", date) - return None - format_ += "-" - next_char = date[5] - if next_char == "W": - # "2020-W14" format. - - def modify_weekly_date(x: str) -> str: - x = re.sub(r"[//.\s]", "-", x) - return x + "-6" - - date_modification_func = modify_weekly_date - format_ += f"W{week_format}-{day_of_week_format}" - elif next_char == "S": - # "2020-S1" - semi-annual format. - def modify_semiannual_date(x: str) -> str: - x = re.sub(r"[//.\s]", "-", x) - return x.replace("S1", "06-30").replace("S2", "12-31") - - date_modification_func = modify_semiannual_date - format_ += "%m-%d" - elif next_char == "B": - # "2020-B1" - bi-monthly format (every other month). - # We'll index by the start of the month starting with January - # based on PiT. - - def modify_bimonthly_date(x: str) -> str: - x = re.sub(r"[//.\s]", "-", x) - bimonth_number = x[6] - month_number = int(bimonth_number) * 2 - 1 - modified_x = f"{x[:5]}{month_number}-01" - return modified_x - - date_modification_func = modify_bimonthly_date - format_ += "%m-%d" - else: - _LOG.error("This format is not supported: '%s'", date) - return None - elif next_char == "M" and len(date) == 7: - # "1959M01" format. - - def modify_monthly_date(x: str) -> str: - year_number = int(x[:4]) - month_number = x[5:] - num_days_in_month = calendar.monthrange( - year_number, int(month_number) - )[1] - modified_x = f"{x[:4]}-{month_number}-{num_days_in_month}" - return modified_x - - date_modification_func = modify_monthly_date - format_ += "-%m-%d" - else: - _LOG.error("This format is not supported: '%s'", date) - return None - return format_, date_modification_func - - -def to_generalized_datetime( - dates: Union[pd.Series, pd.Index], date_standard: Optional[str] = None -) -> Union[pd.Series, pd.Index]: - """ - Convert string dates to datetime. - - This works like `pd.to_datetime`, but supports more date formats and shifts - the dates to the end of period instead of the start. - - :param dates: series or index of dates to convert - :param date_standard: "standard" or "ISO_8601", `None` defaults to - "standard" - :return: datetime dates - """ - # This function doesn't deal with mixed formats. - hdbg.dassert_isinstance(dates, Iterable) - hdbg.dassert(not isinstance(dates, str)) - # Try converting to datetime using `pd.to_datetime`. - format_example_index = -1 - date_example = dates.tolist()[format_example_index] - format_fix = _handle_incorrect_conversions(date_example) - if format_fix is not None: - format_, date_modification_func = format_fix - dates = dates.map(date_modification_func) - date_example = dates.tolist()[format_example_index] - else: - format_ = None - datetime_dates = pd.to_datetime(dates, format=format_, errors="coerce") - # Shift to end of period if conversion has been successful. - # Handle both scalar and array cases for `pd.isna()`. - if hasattr(datetime_dates, "all"): - # datetime_dates is a Series or array-like - all_na = pd.isna(datetime_dates).all() - datetime_example = ( - datetime_dates.tolist()[format_example_index] - if hasattr(datetime_dates, "tolist") - else datetime_dates - ) - else: - # datetime_dates is a scalar - all_na = pd.isna(datetime_dates) - datetime_example = datetime_dates - if not all_na: - if ( - not pd.isna(datetime_example) - and hasattr(datetime_example, "strftime") - and datetime_example.strftime("%Y-%m-%d") == date_example - ): - return datetime_dates - shift_func = _shift_to_period_end(date_example) - if shift_func is not None: - if hasattr(datetime_dates, "map"): - datetime_dates = datetime_dates.map(shift_func) - else: - # For scalar case, apply the shift function directly - datetime_dates = shift_func(datetime_dates) - return datetime_dates - # If standard conversion fails, attempt our own conversion. - date_standard = date_standard or "standard" - format_determination_output = _determine_date_format( - date_example, date_standard - ) - if format_determination_output is None: - return datetime_dates - format_, date_modification_func = format_determination_output - dates = dates.map(date_modification_func) - return pd.to_datetime(dates, format=format_) - - -# ############################################################################# -# Unix to epoch conversion -# ############################################################################# - - -def convert_unix_epoch_to_timestamp( - epoch: int, unit: str = "ms", tz: str = "UTC" -) -> pd.Timestamp: - """ - Convert Unix epoch to timestamp. - - :param epoch: Unix time epoch - :param unit: epoch's time unit - :param tz: resulting timestamp timezone - :return: timestamp - """ - timestamp = pd.Timestamp(epoch, unit=unit, tz=tz) - return timestamp - - -def convert_timestamp_to_unix_epoch( - timestamp: pd.Timestamp, unit: str = "ms" -) -> int: - """ - Convert timestamp to Unix epoch. - - :param timestamp: timestamp - :param unit: epoch's time unit - :return: Unix time epoch - """ - # Make timestamp tz-naive if it is not. Converted to UTC tz before becoming - # naive automatically. - if timestamp.tz: - timestamp = timestamp.tz_convert(None) - # Convert to epoch. - epoch: int = (timestamp - pd.Timestamp("1970-01-01")) // pd.Timedelta( - "1" + unit - ) - return epoch - - -# TODO(Sameep): Reuse this function across the code base (`jackpy strftime`) when -# it doesn't make the import graph too complicated. -# TODO(gp): This seems redundant with get_timestamp() in `hwall_clock_time`. -def timestamp_to_str( - timestamp: pd.Timestamp, *, include_msec: bool = False -) -> str: - """ - Convert timestamp to string. - - :param timestamp: timestamp to convert - :param include_msec: whether to include milliseconds e.g. - `20230727_111057_123` - :return: timestamp in string format e.g. `20230727_111057`. - """ - hdbg.dassert_isinstance(timestamp, pd.Timestamp) - # Convert timestamp to string. - if include_msec: - # %f is the format code for microseconds. We truncate the last 3 digits - # to get milliseconds. - # This results in a string like "20230426_153042_123". - timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S_%f")[:-3] - else: - timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S") - return timestamp_str diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py deleted file mode 100644 index a11dfb243..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdbg.py +++ /dev/null @@ -1,1134 +0,0 @@ -""" -Import as: - -import helpers.hdbg as hdbg -""" - -import functools -import logging -import os -import pprint -import sys -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union - -# This module can depend only on: -# - Python standard modules -# - `helpers/hserver.py` -# See `helpers/dependencies.txt` for more details - -_LOG = logging.getLogger(__name__) - - -# Enforce that certain warnings are disabled. -import helpers.hwarnings as hwarnin # # isort:skip # noqa: E402,F401,F403 # pylint: disable=unused-import - - -# TODO(gp): Make these generate from MAPPING below. -INFO = "\033[36mINFO\033[0m" -WARNING = "\033[33mWARNING\033[0m" -ERROR = "\033[31mERROR\033[0m" - - -# ############################################################################# -# dfatal. -# ############################################################################# - -# Copied from printing.py to avoid cyclical dependencies. - - -def _line(chars: str = "#", num_cols: int = 80) -> str: - line_ = chars * num_cols + "\n" - return line_ - - -def _frame(x: str, chars: str = "#", num_cols: int = 80) -> str: - """ - Return a string with a frame of num_cols chars around the object x. - - :param x: object to print through str() - :param num_cols: number - """ - line_ = _line(chars=chars, num_cols=num_cols) - ret = "" - ret += line_ - ret += str(x) + "\n" - ret += line_ - return ret - - -# End of copy. - - -def dfatal(message: str, assertion_type: Optional[Any] = None) -> None: - """ - Print an error message and exits. - """ - ret = "" - message = str(message) - ret = "\n" + _frame(message, "#", 80) - if assertion_type is None: - assertion_type = AssertionError - raise assertion_type(ret) - - -# ############################################################################# -# dassert. -# ############################################################################# - -# TODO(gp): Would be nice to have a way to disable the assertions in certain -# builds, or at least know how much time is spent in the assertions. -# To disable we could have a fake_dbg.py that has all `dassert_*`, `logging` -# defined as `lambda x: 0`. - - -# INVARIANTS: -# - `dassert_COND()` checks that COND is true, and raises if COND is False -# - For this reason the condition inside the `dassert` is typically in the form -# `if not (...):`, even this might annoy the linter or look weird -# - The parameter `only_warning` is to report a problem but keep going. -# This can be used (sparingly) for production when we want to be aware of -# certain conditions without aborting. - - -def _to_msg(msg: Optional[str], *args: Any) -> str: - """ - Format error message `msg` using the params in `args`, like `msg % args`. - """ - if msg is None: - # If there is no message, we should have no arguments to format. - assert not args, f"args={str(args)}" - res = "" - else: - try: - res = msg % args - except TypeError as e: - # The arguments didn't match the format string: report error and - # print the result somehow. - res = f"Caught assertion while formatting message:\n'{str(e)}'" - _LOG.warning(res) - res += "\n" + msg + " " + " ".join(map(str, args)) - # res = "(" + res + ") " - return res - - -def _dfatal( - txt: Union[str, Iterable[str]], - msg: Optional[str], - *args: Any, - only_warning: bool = False, -) -> None: - """ - Abort execution. - - :param only_warning: issue a warning instead of aborting - """ - dfatal_txt = "* Failed assertion *\n" - # TODO(gp): This should be an iterable. - if isinstance(txt, list): - dfatal_txt += "\n".join(txt) - else: - dfatal_txt += str(txt) - msg = _to_msg(msg, *args) - if msg: - if not dfatal_txt.endswith("\n"): - dfatal_txt += "\n" - dfatal_txt += msg - if only_warning: - # Only warn. - dfatal_txt += "\nContinuing as per user request with only_warning=True" - _LOG.warning(dfatal_txt) - else: - # Abort. - dfatal(dfatal_txt) - - -def dassert( - cond: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # Handle the somehow frequent case of using `dassert` instead of another - # one, e.g., `dassert(y, list)` - if msg is not None: - assert isinstance(msg, str), ( - f"You passed '{msg}' or type '{type(msg)}' instead of str" - ) - if not cond: - txt = f"cond={cond}" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_eq( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 == val2 - if not cond: - txt = f"'{val1}'\n==\n'{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_ne( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 != val2 - if not cond: - txt = f"'{val1}'\n!=\n'{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_imply( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = not val1 or val2 - if not cond: - txt = f"'{val1}' implies '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# Comparison related. -# ############################################################################# - - -def dassert_lt( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 < val2 - if not cond: - txt = f"{val1} < {val2}" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_lte( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 <= val2 - if not cond: - txt = f"{val1} <= {val2}" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_lgt( - lower_bound: float, - x: float, - upper_bound: float, - lower_bound_closed: bool, - upper_bound_closed: bool, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert that `lower_bound <= x <= upper_bound`. - - :param lower_bound_closed, upper_bound_closed: control the open- - ness/close-ness of the interval extremes. - """ - # `lower_bound <= or < x`. - if lower_bound_closed: - dassert_lte(lower_bound, x, msg, *args, only_warning=only_warning) - else: - dassert_lt(lower_bound, x, msg, *args, only_warning=only_warning) - # `x <= or < upper_bound`. - if upper_bound_closed: - dassert_lte(x, upper_bound, msg, *args, only_warning=only_warning) - else: - dassert_lt(x, upper_bound, msg, *args, only_warning=only_warning) - - -def dassert_is_proportion( - x: float, msg: Optional[str] = None, *args: Any, only_warning: bool = False -) -> None: - """ - Assert that `0 <= x <= 1`. - """ - lower_bound_closed = True - upper_bound_closed = True - dassert_lgt( - 0, - x, - 1, - lower_bound_closed, - upper_bound_closed, - msg, - *args, - only_warning=only_warning, - ) - - -# ############################################################################# -# Membership. -# ############################################################################# - - -def dassert_in( - value: Any, - valid_values: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = value in valid_values - if not cond: - txt = f"'{value}' in '{valid_values}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_not_in( - value: Any, - valid_values: Iterable[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = value not in valid_values - if not cond: - txt = f"'{value}' not in '{valid_values}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# Type related. -# ############################################################################# - - -def dassert_is( - val1: Optional[str], - val2: Optional[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 is val2 - if not cond: - txt = f"'{val1}' is '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_is_not( - val1: Any, - val2: Optional[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = val1 is not val2 - if not cond: - txt = f"'{val1}' is not '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_type_is( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # pylint: disable=unidiomatic-typecheck - cond = type(val1) is val2 - if not cond: - txt = f"Type of '{val1}' is '{type(val1)}' instead of '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# TODO(gp): This is redundant with dassert_isinstance(..., (str, float)). -def dassert_type_in( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # pylint: disable=unidiomatic-typecheck - cond = type(val1) in val2 - if not cond: - txt = f"Type of '{val1}' is '{type(val1)}' not in '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_isinstance( - val1: Any, - val2: Union[type, Iterable[type]], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - cond = isinstance(val1, val2) # type: ignore[arg-type] - if not cond: - txt = f"Instance of '{val1}' is '{type(val1)}' instead of '{val2}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_issubclass( - val1: Any, - val2: Union[type, Iterable[type]], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert that an object `val1` is a subclass of `val2`. - """ - cond = issubclass(val1.__class__, val2) # type: ignore[arg-type] - if not cond: - txt = ( - f"Instance '{str(val1)}' of class '{val1.__class__.__name__}' is " - f"not a subclass of '{val2}'" - ) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_is_integer( - val: Union[int, float], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert that val represents an integer number, independently of the type. - """ - if isinstance(val, int): - pass - elif isinstance(val, float): - cond = val == int(val) - if not cond: - txt = f"Invalid val='{val}' of type '{type(val)}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - else: - txt = f"Invalid val='{val}' of type '{type(val)}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_callable( - func: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert that an object `val1` is callable. - """ - cond = callable(func) - if not cond: - txt = f"Obj '{str(func)}' of type '{str(type(func))}' is not callable" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# Set related. -# ############################################################################# - - -# TODO(gp): A more general solution is to have a function that traverses an obj -# and creates a corresponding obj only with deterministic data structures (e.g., -# converting sets and dicts to sorted lists). Then we can print with `pprint`. -def _set_to_str(set_: Set[Any], thr: Optional[int] = 20) -> str: - """ - Return a string with the ordered content of a set. - - This is useful when printing assertions that we want to be deterministic (e.g., - if we use it inside unit tests like: - ``` - with self.assertRaises(AssertionError) as cm: - ... - actual = str(cm.exception) - expected = r - self.assert_equal(actual, expected, fuzzy_match=True) - ``` - """ - try: - list_ = sorted(list(set_)) - # If sets have less than `thr` elements print them as well, otherwise - # print the beginning / end. - if thr is not None and len(list_) > thr: - txt = f"{len(list_)} [{min(list_)}, ... {max(list_)}]" - else: - txt = str(list_) - except TypeError: - # Sometimes the set has elements of different types and we can't easily - # sort them. In these cases we just skip the sorting. - txt = str(list(set_)) - return txt - - -def dassert_set_eq( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val1` has the same elements as `val2`, raise otherwise. - - :param only_warning: issue a warning instead of aborting - """ - val1 = set(val1) - val2 = set(val2) - # pylint: disable=superfluous-parens - if not (val1 == val2): - txt = [] - txt.append("val1 - val2=" + _set_to_str(val1.difference(val2))) - txt.append("val2 - val1=" + _set_to_str(val2.difference(val1))) - txt.append("val1=" + _set_to_str(val1)) - txt.append("set eq") - txt.append("val2=" + _set_to_str(val2)) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# TODO(gp): -> dassert_issubset to match Python set function. -def dassert_is_subset( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val1` is a subset of `val2`, raise otherwise. - """ - val1 = set(val1) - val2 = set(val2) - if not val1.issubset(val2): - txt = [] - txt.append("val1=" + _set_to_str(val1)) - txt.append("issubset") - txt.append("val2=" + _set_to_str(val2)) - txt.append("val1 - val2=" + _set_to_str(val1.difference(val2))) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# TODO(gp): -> dassert_no_intersection to match other functions. -def dassert_not_intersection( - val1: Any, - val2: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val1` has no intersection `val2`, raise otherwise. - """ - val1 = set(val1) - val2 = set(val2) - if val1.intersection(val2): - txt = [] - txt.append("val1=" + _set_to_str(val1)) - txt.append("has no intersection") - txt.append("val2=" + _set_to_str(val2)) - txt.append( - "val1.intersection(val2)=" + _set_to_str(val1.intersection(val2)) - ) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_is_iterable( - val: Any, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val` is an iterable (excluding strings, bytes), raise otherwise. - """ - cond = isinstance(val, Iterable) and not isinstance( - val, (str, bytes, bytearray) - ) - if not cond: - txt = f"Val '{val}' of type '{type(val)}' is not an iterable" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# Array related. -# ############################################################################# - - -def dassert_no_duplicates( - val1: Iterable[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val1` has no duplicates, raise otherwise. - """ - dassert_is_iterable(val1) - cond = len(set(val1)) == len(val1) - if not cond: - # Count the occurrences of each element of the seq. - v_to_num = [(v, val1.count(v)) for v in set(val1)] - # Build list of elements with duplicates. - dups = [v for v, n in v_to_num if n > 1] - txt = [] - txt.append("val1=\n" + pprint.pformat(val1)) - txt.append("has duplicates") - txt.append(",".join(map(str, dups))) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_is_sorted( - val1: Union[List, Tuple], - sort_kwargs: Optional[Dict[Any, Any]] = None, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that `val` is sorted, raise otherwise. - """ - # TODO(gp): Extend for pd.Series using the proper method. - dassert_isinstance(val1, (list, tuple)) - sort_kwargs = {} if sort_kwargs is None else sort_kwargs - sorted_val1 = sorted(val1, **sort_kwargs) - cond = sorted_val1 == val1 - if not cond: - txt = [] - txt.append("val1=\n" + pprint.pformat(val1)) - txt.append("is not sorted") - txt.append("sorted(val1)=\n" + pprint.pformat(sorted_val1)) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_eq_all( - val1: Iterable[Any], - val2: Iterable[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that two iterables `val1` and `val2` are equal, raise otherwise. - """ - dassert_is_iterable(val1) - val1 = list(val1) - dassert_is_iterable(val2) - val2 = list(val2) - cond = val1 == val2 - if not cond: - # mask = val1 != val2 - txt = [] - txt.append(f"val1={len(val1)}\n{val1}") - txt.append(f"val2={len(val2)}\n{val2}") - # txt += "\ndiff=%s" % mask.sum() - # txt += "\n%s" % val1[mask] - # txt += "\n%s" % val2[mask] - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def _get_first_type(obj: Iterable, tag: str) -> Type: - obj_types = {type(v) for v in obj} - dassert_eq( - len(obj_types), - 1, - "More than one type for elem of %s=%s", - tag, - map(str, obj_types), - ) - return list(obj_types)[0] - - -# TODO(gp): IMO a bit overfit to the use case. Move this to the files that are -# using is. -def dassert_all_attributes_are_same( - list_: List[Any], - attribute_name: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check if all the elements in the list have the same attribute value. - - :param list_: list of objects - :param attribute_name: name of the attribute to check - """ - dassert_isinstance(list_, list) - dassert_isinstance(attribute_name, str) - attribute_values = [getattr(element, attribute_name) for element in list_] - if len(set(attribute_values)) != 1: - txt = [] - txt.append("Elements in the list have different values for ") - txt.append(f"attribute {attribute_name}:\n\t{set(attribute_values)}") - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_array_has_same_type_element( - obj1: Any, - obj2: Any, - only_first_elem: bool, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that two objects iterables like arrays (e.g., pd.Index) have elements - of the same type. - - :param only_first_elem: whether to check only the first element or - all the elements of the iterable. - """ - # Get the types to compare. - if only_first_elem: - obj1_first_type = type(obj1[0]) - obj2_first_type = type(obj2[0]) - else: - obj1_first_type = _get_first_type(obj1, "obj1") - obj2_first_type = _get_first_type(obj2, "obj2") - # - if obj1_first_type != obj2_first_type: - txt = [] - num_elems = 5 - txt.append(f"obj1=\n{obj1[:num_elems]}") - txt.append(f"obj2=\n{obj2[:num_elems]}") - txt.append( - f"type(obj1)='{obj1_first_type}' is different from type(obj2)='{obj2_first_type}'" - ) - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_container_type( - obj: Any, - container_type: Optional[Any], - elem_type: Optional[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert `obj` is a certain type of container containing certain type of - objects. - - E.g., `obj` is a list of strings. - """ - # Add information about the obj. - if not msg: - msg = "" - msg = msg.rstrip("\n") + f"\nobj='{str(obj)}'" - # Check container. - if container_type is not None: - dassert_isinstance( - obj, container_type, msg, *args, only_warning=only_warning - ) - # Check the elements of the container. - if elem_type is not None: - for elem in obj: - dassert_isinstance( - elem, elem_type, msg, *args, only_warning=only_warning - ) - - -# TODO(gp): @all Replace calls to this with calls to `dassert_container_type()`. -def dassert_list_of_strings( - list_: List[str], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # TODO(gp): Allow iterable? - dassert_isinstance(list_, list, msg, *args, only_warning=only_warning) - for elem in list_: - dassert_isinstance(elem, str, msg, *args, only_warning=only_warning) - - -def dassert_all_defined_or_all_None( - vals: List[Any], - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check that all the values in a list are either all defined or all None. - """ - all_defined_cond = all(val is not None for val in vals) - all_none_cond = all(val is None for val in vals) - cond = all_defined_cond or all_none_cond - if not cond: - txt = f"Some values in list are defined and some are None: '{vals}'" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -# ############################################################################# -# File related. -# ############################################################################# - - -def dassert_path_exists( - path: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - dassert_isinstance(path, str) - path = os.path.abspath(path) - if not os.path.exists(path): - txt = f"Path '{path}' doesn't exist!" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_path_not_exists( - path: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - dassert_isinstance(path, str) - dassert_ne(path, "") - path = os.path.abspath(path) - if os.path.exists(path): - txt = f"Path '{path}' already exist!" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_file_exists( - file_name: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert unless `file_name` exists and it's a file and not a directory. - """ - dassert_isinstance(file_name, str) - dassert_ne(file_name, "") - file_name = os.path.abspath(file_name) - # `file_name` exists. - exists = os.path.exists(file_name) - if not exists: - txt = f"File '{file_name}' doesn't exist" - _dfatal(txt, msg, *args, only_warning=only_warning) - # `file_name` is a file. - is_file = os.path.isfile(file_name) - if not is_file: - txt = f"'{file_name}' is not a file" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_dir_exists( - dir_name: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Assert unless `dir_name` exists and it's a directory. - """ - dassert_isinstance(dir_name, str) - dassert_ne(dir_name, "") - dir_name = os.path.abspath(dir_name) - # `dir_name` exists. - exists = os.path.exists(dir_name) - if not exists: - txt = f"Dir '{dir_name}' doesn't exist" - _dfatal(txt, msg, *args, only_warning=only_warning) - # `dir_name` is a directory. - is_dir = os.path.isdir(dir_name) - if not is_dir: - txt = f"'{dir_name}' is not a dir" - _dfatal(txt, msg, *args, only_warning=only_warning) - - -def dassert_file_extension( - file_name: str, - extensions: Union[str, List[str]], - only_warning: bool = False, -) -> None: - """ - Ensure that file has one of the given extensions. - - :param extensions: don't need to start with `.`, e.g., use `csv` instead of - `.csv` - """ - # Handle single extension case. - if isinstance(extensions, str): - extensions = [extensions] - # Make sure extension starts with . - extensions = [f".{e}" if not e.startswith(".") else e for e in extensions] - # Check. - name, act_ext = os.path.splitext(file_name) - if act_ext == ".gz": - # Concatenate with the preceding extension, e.g., `.csv.gz`. - ext = os.path.splitext(name)[-1] - act_ext = (ext + act_ext).lower() - dassert_in( - act_ext, - extensions, - "Invalid extension '%s' for file '%s'", - act_ext, - file_name, - only_warning=only_warning, - ) - - -def dassert_is_path_abs(path: str, only_warning: bool = False) -> None: - """ - Assert that `path` is an absolute path. - """ - dassert_isinstance(path, str) - dassert_ne(path, "") - dassert( - os.path.isabs(path), - "Path '%s' is not absolute", - path, - only_warning=only_warning, - ) - - -def dassert_related_params( - params: Dict[str, Any], - mode: str, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - """ - Check whether `params` have a certain relationship. - - :params params: dictionary of parameter name, value - :params mode: - - `all_or_none_non_null`: either all params are null (i.e., `bool` evaluate - to false) or are non-null - - `all_or_none_non_None`: either all params are None or all params are not - None. This is useful when passing set of params that are optional - """ - # TODO(gp): Allow iterable? - dassert_isinstance(params, dict, msg, *args, only_warning=only_warning) - if mode == "all_or_none_non_null": - # Find out if at least one value is set. - is_non_null = map(bool, params.values()) - one_is_non_null = functools.reduce(lambda x, y: x or y, is_non_null) - for k, v in params.items(): - if bool(v) != one_is_non_null: - txt = f"All or none parameter should be non-null:\n{k}={v}\nparams={pprint.pformat(params)}\n" - _dfatal(txt, msg, *args, only_warning=only_warning) - elif mode == "all_or_none_non_None": - # Find out if at least one value is not None. - is_non_None = map(lambda x: x is not None, params.values()) - one_is_non_None = functools.reduce(lambda x, y: x or y, is_non_None) - for k, v in params.items(): - if (v is not None) != one_is_non_None: - txt = f"All or none parameter should be non-None:\n{k}={v}\nparams={pprint.pformat(params)}\n" - _dfatal(txt, msg, *args, only_warning=only_warning) - else: - raise ValueError(f"Invalid mode='{mode}'") - - -# ############################################################################# -# Command line. -# ############################################################################# - - -# Sample at the beginning of time before we start fiddling with command line -# args. -_CMD_LINE = " ".join(arg for arg in sys.argv) - - -def get_command_line() -> str: - return _CMD_LINE - - -# ############################################################################# -# Logger. -# ############################################################################# - - -# TODO(gp): Move this to helpers/hlogging.py and change all the callers. - - -# TODO(gp): maybe replace "force_verbose_format" and "force_print_format" with -# a "mode" in ("auto", "verbose", "print") -def init_logger( - verbosity: int = logging.INFO, - use_exec_path: bool = False, - log_filename: Optional[str] = None, - force_verbose_format: bool = False, - force_print_format: bool = False, - force_white: bool = True, - force_no_warning: bool = False, - in_pytest: bool = False, - report_memory_usage: bool = False, - report_cpu_usage: bool = False, - report_command_line: bool = True, -) -> None: - """ - Send stderr and stdout to logging (optionally teeing the logs to file). - - - Note that: - - logging.DEBUG = 10 - - logging.INFO = 20 - - :param verbosity: verbosity to use - :param use_exec_path: use the name of the executable - :param log_filename: log to that file - :param force_verbose_format: use the verbose format for the logging - :param force_print_format: use the print format for the logging - :param force_white: use white color for printing. This can pollute the - output of a script when redirected to file with echo characters - :param in_pytest: True when we are running through pytest, so that we - can overwrite the default logger from pytest - :param report_memory_usage: turn on reporting memory usage - :param report_cpu_usage: turn on reporting CPU usage - :param report_command_line: turn on reporting command line - """ - # Try to minimize dependencies. - import helpers.hlogging as hloggin - - # TODO(gp): Print the stacktrace every time is called. - if force_white: - sys.stdout.write("\033[0m") - if isinstance(verbosity, str): - # pylint: disable=protected-access - dassert(hasattr(logging, "_checkLevel")) - assert hasattr(logging, "_checkLevel") - verbosity = logging._checkLevel(verbosity) - # From https://stackoverflow.com/questions/14058453 - root_logger = logging.getLogger() - # Set verbosity for all loggers. - root_logger.setLevel(verbosity) - # if False: - # eff_level = root_logger.getEffectiveLevel() - # print( - # "effective level= %s (%s)" - # % (eff_level, logging.getLevelName(eff_level)) - # ) - # if False: - # # dassert_eq(root_logger.getEffectiveLevel(), verbosity) - # for handler in root_logger.handlers: - # handler.setLevel(verbosity) - # Exit to avoid to replicate the same output multiple times. - if not in_pytest and root_logger.handlers: - print(WARNING + ": Logger already initialized: skipping") - if False: - # Print info about the caller. - import traceback - - traceback.print_stack() - return - # - ch = logging.StreamHandler(sys.stdout) - ch.setLevel(verbosity) - # Set the formatter. - # formatter = hloggin.set_v1_formatter( - dassert(hasattr(hloggin, "set_v2_formatter")) - assert hasattr(hloggin, "set_v2_formatter") - formatter = hloggin.set_v2_formatter( - ch, - root_logger, - force_no_warning, - force_print_format, - force_verbose_format, - report_memory_usage, - report_cpu_usage, - ) - # Find name of the log file. - if use_exec_path and log_filename is None: - dassert_is(log_filename, None, msg="Can't specify conflicting filenames") - # Use the name of the executable. - import inspect - - frame = inspect.stack()[1] - module = inspect.getmodule(frame[0]) - if not hasattr(module, __file__): - if module is None: - filename = "none" - else: - filename = str(module.__file__) - else: - filename = "unknown_module" - log_filename = os.path.realpath(filename) + ".log" - # Handle teeing to a file. - if log_filename: - # Create a dir (and all its missing parent dirs) if it doesn't exist. - log_dirname = os.path.dirname(log_filename) - if log_dirname != "" and not os.path.exists(log_dirname): - os.makedirs(log_dirname) - # Delete the file since we don't want to append. - if os.path.exists(log_filename): - try: - os.unlink(log_filename) - except FileNotFoundError as e: - print(e) - # Tee to file. - file_handler = logging.FileHandler(log_filename) - root_logger.addHandler(file_handler) - file_handler.setFormatter(formatter) - # - _LOG.info("Saving log to file '%s'", log_filename) - # - _LOG.debug("Effective logging level=%s", _LOG.getEffectiveLevel()) - # Shut up chatty modules. - dassert(hasattr(hloggin, "shutup_chatty_modules")) - assert hasattr(hloggin, "shutup_chatty_modules") - hloggin.shutup_chatty_modules(verbose=False) - if report_command_line: - _LOG.info("> cmd='%s'", get_command_line()) - # - # test_logger() - - -def set_logger_verbosity( - verbosity: int, module_name: Optional[str] = None -) -> None: - """ - Change the verbosity of the logging after the initialization. - - Passing a module_name (e.g., matplotlib) one can change the logging - of that specific module. - - E.g., set_logger_verbosity(logging.WARNING, "matplotlib") - """ - logger = logging.getLogger(module_name) - if module_name is None and not logger.handlers: - assert 0, "ERROR: Logger not initialized" - logger.setLevel(verbosity) - eff_level = logger.getEffectiveLevel() - print(f"effective level= {eff_level} ({logging.getLevelName(eff_level)})") - dassert_eq(logger.getEffectiveLevel(), verbosity) - - -def get_logger_verbosity() -> int: - root_logger = logging.getLogger() - if not root_logger.handlers: - assert 0, "ERROR: Logger not initialized" - return root_logger.getEffectiveLevel() - - -# ############################################################################# -# Command line. -# ############################################################################# - - -# Sample at the beginning of time before we start fiddling with command line -# args. -_CMD_LINE = " ".join(arg for arg in sys.argv) -_EXEC_NAME = os.path.abspath(sys.argv[0]) - - -def get_command_line() -> str: - return _CMD_LINE - - -def get_exec_name() -> str: - return _EXEC_NAME diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py deleted file mode 100644 index 13d388249..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdict.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Import as: - -import helpers.hdict as hdict -""" - -import logging -from typing import ( - Any, - Dict, - Generator, - Iterable, - Mapping, - Optional, - Tuple, - Union, -) - -try: - from collections.abc import Mapping as AbcMapping -except ImportError: - from collections import Mapping as AbcMapping - -import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - - -def get_nested_dict_iterator( - nested: Mapping[Any, Any], - path: Optional[Iterable[Any]] = None, -) -> Generator[Tuple[Tuple, Any], None, None]: - """ - Return nested mapping iterator that iterates in a depth-first fashion. - - :param nested: nested dictionary - :param path: path to node to start the visit from or `None` to start from - the root - :return: path to leaf node, value - """ - if path is None: - path = [] - if not isinstance(path, tuple): - path = tuple(path) - if not nested.items(): - yield path, nested - for key, value in nested.items(): - local_path = path + (key,) - if isinstance(value, AbcMapping): - yield from get_nested_dict_iterator(value, local_path) - else: - yield local_path, value - - -def extract_leaf_values(nested: Dict[Any, Any], key: Any) -> Dict[Any, Any]: - """ - Extract leaf values with key matching `key`. - - :param nested: nested dictionary - :param key: leaf key value to match - :return: dict with key = path as tuple, value = leaf value - """ - d = {} - for k, v in get_nested_dict_iterator(nested): - if k[-1] == key: - d[k] = v - return d - - -_NO_VALUE_SPECIFIED = "__NO_VALUE_SPECIFIED__" - - -def typed_get( - dict_: Union[Dict, "Config"], # noqa: F821 - key: Any, - default_value: Optional[Any] = _NO_VALUE_SPECIFIED, - *, - expected_type: Optional[Any] = None, -) -> Any: - """ - Equivalent to `dict.get(key, default_val)` and check the type of the - output. - - :param default_value: default value to return if key is not in `config` - :param expected_type: expected type of `value` - :return: config[key] if available, else `default_value` - """ - hdbg.dassert_isinstance(dict_, dict) - if default_value == _NO_VALUE_SPECIFIED: - # No value is specified so check that the key is present with dassert_in - # to report a decent error. - hdbg.dassert_in(key, dict_) - try: - ret = dict_.__getitem__(key) - except KeyError as e: - # No key: use the default val if it was passed or asserts. - _LOG.debug("e=%s", e) - # We can't use None since None can be a valid default value, so we use - # another value. - if default_value != _NO_VALUE_SPECIFIED: - ret = default_value - else: - # No default value found, then raise. - raise e - if expected_type is not None: - hdbg.dassert_isinstance(ret, expected_type) - return ret - - -def checked_get( - dict_: Dict, - key: Any, -) -> Any: - """ - Ensure that the key exists and print a decent error message in case of - error, instead of a generic `TypeError`. - """ - hdbg.dassert_in(key, dict_) - return dict_[key] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py deleted file mode 100644 index 44f973a89..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker.py +++ /dev/null @@ -1,871 +0,0 @@ -""" -Import as: - -import helpers.hdocker as hdocker -""" - -import argparse -import copy -import hashlib -import logging -import os -import platform -import subprocess -import time -from typing import List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.henv as henv -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Docker utilities -# ############################################################################# - - -# TODO(gp): This is a function of the architecture. Move to the repo_config.py -# or the config file. -def get_use_sudo() -> bool: - """ - Check if Docker commands should be run with sudo. - - :return: Whether to use sudo for Docker commands. - """ - use_sudo = False - # if hserver.is_inside_docker(): - # use_sudo = True - return use_sudo - - -# TODO(gp): use_sudo should be set to None and the correct value inferred from -# the repo config. -def get_docker_executable(use_sudo: bool) -> str: - """ - Get the Docker executable with / without sudo, if needed. - """ - executable = "sudo " if use_sudo else "" - executable += "docker" - return executable - - -def process_docker_cmd( - docker_cmd: str, container_image: str, dockerfile: str, mode: str -) -> str: - """ - Process a Docker command according to the mode. - - :param docker_cmd: The Docker command to process. - :param container_image: The name of the Docker container. - :param dockerfile: The content of the Dockerfile. - :param mode: The mode to process the Docker command. - - "return_cmd": return the command as is. - - "system": execute the command. - - "save_to_file": save the command to a file. - :return: The output of the Docker command. - """ - _LOG.debug(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(docker_cmd, str) - hdbg.dassert_isinstance(container_image, str) - hdbg.dassert_isinstance(dockerfile, str) - if mode == "return_cmd": - ret = docker_cmd - elif mode == "system": - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd, suppress_output=False) - ret = "" - elif mode == "system_without_output": - hsystem.system(docker_cmd, suppress_output=True) - ret = "" - elif mode == "save_to_file": - file_name = f"tmp.process_docker_cmd.{container_image}.txt" - txt = [] - txt.append(f"docker_cmd={docker_cmd}") - txt.append(f"container_image={container_image}") - txt.append(f"dockerfile={dockerfile}") - txt = "\n".join(txt) - hio.to_file(file_name, txt) - ret = "" - else: - raise ValueError(f"Invalid mode='{mode}'") - return ret - - -def container_exists(container_name: str, use_sudo: bool) -> Tuple[bool, str]: - """ - Check if a Docker container is running by executing a command like: - - ``` - > docker container ls --filter=tmp.prettier -aq - aed8a5ce33a9 - ``` - """ - _LOG.debug(hprint.func_signature_to_str()) - # - executable = get_docker_executable(use_sudo) - cmd = f"{executable} container ls --filter name=/{container_name} -aq" - _, container_id = hsystem.system_to_one_line(cmd) - container_id = container_id.rstrip("\n") - exists = container_id != "" - _LOG.debug(hprint.to_str("exists container_id")) - return exists, container_id - - -def image_exists(image_name: str, use_sudo: bool) -> Tuple[bool, str]: - """ - Check if a Docker image already exists by executing a command like: - - ``` - > docker images tmp.prettier -aq - aed8a5ce33a9 - ``` - """ - _LOG.debug(hprint.func_signature_to_str()) - # - executable = get_docker_executable(use_sudo) - cmd = f"{executable} image ls --filter reference={image_name} -q" - _, image_id = hsystem.system_to_one_line(cmd) - image_id = image_id.rstrip("\n") - exists = image_id != "" - _LOG.debug(hprint.to_str("exists image_id")) - return exists, image_id - - -def container_rm(container_name: str, use_sudo: bool) -> None: - """ - Remove a Docker container by its name. - - :param container_name: Name of the Docker container to remove. - :param use_sudo: Whether to use sudo for Docker commands. - :raises AssertionError: If the container ID is not found. - """ - _LOG.debug(hprint.func_signature_to_str()) - # - executable = get_docker_executable(use_sudo) - # Find the container ID from the name. - # Docker filter refers to container names using a leading `/`. - cmd = f"{executable} container ls --filter name=/{container_name} -aq" - _, container_id = hsystem.system_to_one_line(cmd) - container_id = container_id.rstrip("\n") - hdbg.dassert_ne(container_id, "") - # Delete the container. - _LOG.debug(hprint.to_str("container_id")) - cmd = f"{executable} container rm --force {container_id}" - hsystem.system(cmd) - _LOG.debug("docker container '%s' deleted", container_name) - - -def volume_rm(volume_name: str, use_sudo: bool) -> None: - """ - Remove a Docker volume by its name. - - :param volume_name: Name of the Docker volume to remove. - :param use_sudo: Whether to use sudo for Docker commands. - """ - _LOG.debug(hprint.func_signature_to_str()) - # - executable = get_docker_executable(use_sudo) - cmd = f"{executable} volume rm {volume_name}" - hsystem.system(cmd) - _LOG.debug("docker volume '%s' deleted", volume_name) - - -# ############################################################################# - - -def get_current_arch() -> str: - """ - Return the architecture that we are running on (e.g., arm64, aarch64, - x86_64). - """ - cmd = "uname -m" - _, current_arch = hsystem.system_to_one_line(cmd) - _LOG.debug(hprint.to_str("current_arch")) - return current_arch - - -def _is_compatible_arch(val1: str, val2: str) -> bool: - valid_arch = ["x86_64", "amd64", "aarch64", "arm64"] - hdbg.dassert_in(val1, valid_arch) - hdbg.dassert_in(val2, valid_arch) - if val1 == val2: - return True - compatible_sets = [{"x86_64", "amd64"}, {"aarch64", "arm64"}] - for comp_set in compatible_sets: - if {val1, val2}.issubset(comp_set): - return True - return False - - -def check_image_compatibility_with_current_arch( - image_name: str, - *, - use_sudo: Optional[bool] = None, - pull_image_if_needed: bool = True, - assert_on_error: bool = True, -) -> None: - """ - Check if the Docker image is compatible with the current architecture. - - :param image_name: Name of the Docker image to check. - :param use_sudo: Whether to use sudo for Docker commands. - :param pull_image_if_needed: Whether to pull the image if it doesn't - exist. - :param assert_on_error: Whether to raise an error if the image is - not compatible with the current architecture. - """ - _LOG.debug(hprint.func_signature_to_str()) - hdbg.dassert_ne(image_name, "") - if use_sudo is None: - use_sudo = get_use_sudo() - # Get the architecture that we are running on. - current_arch = get_current_arch() - # > docker image inspect \ - # 623860924167.dkr.ecr.eu-north-1.amazonaws.com/helpers:local-saggese-1.1.0 \ - # --format '{{.Architecture}}' - # arm64 - # Check and pull the image if needed. - has_image, _ = image_exists(image_name, use_sudo) - if not has_image: - _LOG.warning("Image '%s' not found: trying to pull it", image_name) - if pull_image_if_needed: - cmd = f"docker pull {image_name}" - hsystem.system(cmd) - else: - hdbg.dfatal("Image '%s' not found", image_name) - # Check the image architecture. - executable = get_docker_executable(use_sudo) - cmd = f"{executable} inspect {image_name}" + r" --format '{{.Architecture}}'" - _, image_arch = hsystem.system_to_one_line(cmd) - _LOG.debug(hprint.to_str("image_arch")) - # Check architecture compatibility. - if not _is_compatible_arch(current_arch, image_arch): - msg = f"Running architecture '{current_arch}' != image architecture '{image_arch}'" - if assert_on_error: - hdbg.dfatal(msg) - else: - _LOG.warning(msg) - _LOG.debug( - "Running architecture '%s' and image architecture '%s' are compatible", - current_arch, - image_arch, - ) - - -# ############################################################################# - - -def wait_for_file_in_docker( - container_id: str, - docker_file_path: str, - out_file_path: str, - *, - check_interval_in_secs: float = 0.5, - timeout_in_secs: int = 10, -) -> None: - """ - Wait for a file to be generated inside a Docker container and copy it to - the host. - - This function periodically checks for the existence of a file inside - a Docker container. Once the file is found, it copies the file to - the specified output path on the host. - - :param container_id: ID of the Docker container. - :param docker_file_path: Path to the file inside the Docker - container. - :param out_file_path: Path to copy the file to on the host. - :param check_interval_in_secs: Time in seconds between checks. - :param timeout_in_secs: Maximum time to wait for the file in - seconds. - :raises ValueError: If the file is not found within the timeout - period. - """ - _LOG.debug("Waiting for file: %s:%s", container_id, docker_file_path) - start_time = time.time() - while not os.path.exists(out_file_path): - cmd = f"docker cp {container_id}:{docker_file_path} {out_file_path}" - hsystem.system(cmd) - if time.time() - start_time > timeout_in_secs: - raise ValueError( - "Timeout reached. File not found: " - f"{container_id}:{docker_file_path}" - ) - time.sleep(check_interval_in_secs) - _LOG.debug("File generated: %s", out_file_path) - - -def replace_shared_root_path( - path: str, *, replace_ecs_tokyo: Optional[bool] = False -) -> str: - """ - Replace root path of the shared directory based on the mapping. - - :param path: path to replace, e.g., `/data/shared` - :param replace_ecs_tokyo: if True replace `ecs_tokyo` to `ecs` in the path - :return: replaced shared data dir root path, e.g., - - `/data/shared/ecs_tokyo/.../20240522_173000.20240522_182500/` -> - `/shared_data/ecs/.../20240522_173000.20240522_182500/` - - `/data/shared/ecs/.../20240522_173000.20240522_182500` -> - `/shared_data/ecs/.../20240522_173000.20240522_182500` - """ - # Inside ECS, we keep the original shared data path and replace it only when - # running inside Docker on the dev server. - if hserver.is_inside_docker() and not hserver.is_inside_ecs_container(): - shared_data_dirs = hserver.get_shared_data_dirs() - if shared_data_dirs is not None: - if replace_ecs_tokyo: - # Make a copy to avoid modifying the original one. - shared_data_dirs = copy.deepcopy(shared_data_dirs) - shared_data_dirs["ecs_tokyo"] = "ecs" - for shared_dir, docker_shared_dir in shared_data_dirs.items(): - path = path.replace(shared_dir, docker_shared_dir) - _LOG.debug( - "Running inside Docker on the dev server, thus replacing %s " - "with %s", - shared_dir, - docker_shared_dir, - ) - else: - _LOG.debug("No replacement found, returning path as-is: %s", path) - return path - - -# ############################################################################# -# Dockerized executable utils. -# ############################################################################# - -# See `docs/tools/docker/all.dockerized_flow.explanation.md` for details -# about the Dockerized flow. - - -def get_docker_base_cmd(use_sudo: bool) -> List[str]: - """ - Get the base command for running a Docker container. - - E.g., - ``` - docker run --rm --user $(id -u):$(id -g) \ - -e CSFY_AWS_PROFILE -e CSFY_ECR_BASE_PATH \ - ... - -e OPENAI_API_KEY - ``` - - :param use_sudo: Whether to use sudo for Docker commands. - :return: The base command for running a Docker container. - """ - docker_executable = get_docker_executable(use_sudo) - # Get the env vars to pass to the Docker container. - vars_to_pass = henv.get_csfy_env_vars() + henv.get_api_key_env_vars() - vars_to_pass = sorted(vars_to_pass) - vars_to_pass_as_str = " ".join(f"-e {v}" for v in vars_to_pass) - # Build the command as a list. - docker_cmd = [ - docker_executable, - "run --rm", - "--user $(id -u):$(id -g)", - vars_to_pass_as_str, - ] - # Handle coverage. - # TODO(gp): Is this env var standard, or should it be - # CSFY_COVERAGE_PROCESS_START? - # if os.environ.get("COVERAGE_PROCESS_START"): - # _LOG.debug("Enabling coverage") - # host_cov_dir = os.path.abspath("coverage_data") - # # TODO(gp): Use `hio.create_dir()` instead. - # os.makedirs(host_cov_dir, exist_ok=True) - # os.chmod(host_cov_dir, 0o777) - # coverage_dir_container = "/app/coverage_data" - # docker_cmd.extend( - # [ - # f"-e COVERAGE_FILE={coverage_dir_container}/.coverage", - # f"-e COVERAGE_PROCESS_START={coverage_dir_container}/.coveragerc", - # f"-v {host_cov_dir}:{coverage_dir_container}", - # ] - # ) - return docker_cmd - - -def get_container_image_name( - image_name: str, dockerfile: str -) -> Tuple[str, str]: - """ - Get the name of the container image. - - :param image_name: Name of the Docker container to build. - :param dockerfile: Content of the Dockerfile for building the - container. - :return: Name of the container image. - """ - _LOG.debug(hprint.func_signature_to_str("image_name dockerfile")) - hdbg.dassert_ne(image_name, "") - hdbg.dassert_ne(dockerfile, "") - dockerfile = hprint.dedent(dockerfile) - # if os.environ.get("COVERAGE_PROCESS_START"): - # _LOG.debug("Enabling coverage") - # # Check if this is a Python-based Dockerfile. - # if any( - # keyword in dockerfile.lower() - # for keyword in ["python", "pip", "python3"] - # ): - # coverage_dockerfile = hcovera.generate_coverage_dockerfile() - # _LOG.debug("Coverage Dockerfile content:\n%s", coverage_dockerfile) - # dockerfile = dockerfile.strip() + "\n" + coverage_dockerfile - # _LOG.debug("Coverage support added to Dockerfile") - # else: - # _LOG.warning( - # "Skipping coverage addition - not a Python-based Dockerfile" - # ) - _LOG.debug("Final Dockerfile:\n%s", dockerfile) - # Get the current architecture. - current_arch = get_current_arch() - sha256_hash = hashlib.sha256(dockerfile.encode()).hexdigest() - short_hash = sha256_hash[:8] - # Build the name of the container image. - image_name_out = f"{image_name}.{current_arch}.{short_hash}" - return image_name_out, dockerfile - - -def build_container_image( - image_name: str, - dockerfile: str, - force_rebuild: bool, - use_sudo: bool, - *, - use_cache: bool = True, - incremental: bool = True, -) -> str: - """ - Build a Docker image from a Dockerfile. - - :param image_name: Name of the Docker container to build. - :param dockerfile: Content of the Dockerfile for building the - container. - :param force_rebuild: Whether to force rebuild the Docker container. - There are two level of caching. The first level of caching is - our approach of skipping `docker build` if the image already - exists and the Dockerfile hasn't changed. The second level is - the Docker cache itself, which is invalidated by `--no-cache`. - :param use_sudo: Whether to use sudo for Docker commands. - :return: Name of the built Docker container. - :raises AssertionError: If the container ID is not found. - """ - _LOG.debug(hprint.func_signature_to_str("dockerfile")) - # - image_name_out, dockerfile = get_container_image_name(image_name, dockerfile) - # Check if the container already exists. If not, build it. - has_container, _ = image_exists(image_name_out, use_sudo) - coverage_enabled = os.environ.get("COVERAGE_PROCESS_START") - # if coverage_enabled: - # # Add coverage suffix to image name for tracking. - # image_name_out += ".coverage" - # # Force rebuild when coverage is enabled. - # has_container = False - # _LOG.debug( - # "Coverage enabled - forcing rebuild of image: {image_name_out}" - # ) - if bool(os.environ.get("CSFY_DOCKER_FORCE_REBUILD", False)): - _LOG.warning( - "CSFY_DOCKER_FORCE_REBUILD forcing to rebuild container without cache" - ) - force_rebuild = True - if force_rebuild: - _LOG.warning( - "Forcing to rebuild of container '%s' without cache", - image_name, - ) - has_container = False - use_cache = False - _LOG.debug(hprint.to_str("has_container use_cache")) - # # Always prepare coverage files when coverage is enabled, regardless of container existence. - # if coverage_enabled: - # # Create build context directory for coverage files. - # build_context_dir = "tmp.docker_build" - # hio.create_dir(build_context_dir, incremental=incremental) - # # Always copy .coveragerc when coverage is enabled. - # coveragerc_src = ".coveragerc" - # coveragerc_dst = os.path.join(build_context_dir, ".coveragerc") - # if os.path.exists(coveragerc_src): - # shutil.copy2(coveragerc_src, coveragerc_dst) - # _LOG.debug( - # "Coverage enabled - copied {coveragerc_src} to {coveragerc_dst}" - # ) - # else: - # _LOG.warning( - # "Coverage enabled but .coveragerc not found at {coveragerc_src}" - # ) - if not has_container: - # Create a temporary Dockerfile. - _LOG.warning("Building Docker container...") - build_context_dir = "tmp.docker_build" - if not coverage_enabled: - # Only create build context if not already created for coverage - hio.create_dir(build_context_dir, incremental=incremental) - temp_dockerfile = os.path.join(build_context_dir, "Dockerfile") - hio.to_file(temp_dockerfile, dockerfile) - # Build the container. - docker_executable = get_docker_executable(use_sudo) - cmd = [ - f"{docker_executable} build", - f"-f {temp_dockerfile}", - f"-t {image_name_out}", - # "--platform linux/aarch64", - ] - if not use_cache: - cmd.append("--no-cache") - cmd.append(build_context_dir) - cmd = " ".join(cmd) - hsystem.system(cmd, suppress_output=False) - _LOG.info("Building Docker container... done") - return image_name_out - - -# ############################################################################# - - -def get_host_git_root() -> str: - """ - Get the Git root path on the host machine, when inside a Docker container. - """ - hdbg.dassert_in("CSFY_HOST_GIT_ROOT_PATH", os.environ) - host_git_root_path = os.environ["CSFY_HOST_GIT_ROOT_PATH"] - return host_git_root_path - - -def get_docker_mount_info( - is_caller_host: bool, use_sibling_container_for_callee: bool -) -> Tuple[str, str, str]: - """ - Get the Docker mount information for the current environment. - - This function determines the appropriate source and target paths for - mounting a directory in a Docker container. - - Same inputs as `convert_caller_to_callee_docker_path()`. - - :return: A tuple containing - - caller_mount_path: the mount path on the caller filesystem, e.g., - `/app` or `/Users/.../src/cmamp1` - - callee_mount_path: the mount path inside the called Docker container, - e.g., `/app` - - the mount string, e.g., - `source={caller_mount_path},target={callee_mount_path}` - type=bind,source=/app,target=/app - """ - _LOG.debug(hprint.func_signature_to_str()) - # Compute the mount path on the caller filesystem. - if is_caller_host: - # On the host machine, the mount path is the Git root. - caller_mount_path = hgit.find_git_root() - else: - # Inside a Docker container, the mount path depends on the container - # style. - use_host_git_root = ( - use_sibling_container_for_callee - and not hserver.is_csfy_dind_enabled() - ) - if use_host_git_root: - # For sibling containers, we need to get the Git root on the host. - caller_mount_path = get_host_git_root() - else: - # For children containers, we need to get the local Git root on the - # host. - caller_mount_path = hgit.find_git_root() - # The target mount path is always `/app` inside the Docker container. - callee_mount_path = "/app" - # Build the Docker mount string. - mount = f"type=bind,source={caller_mount_path},target={callee_mount_path}" - _LOG.debug(hprint.to_str("caller_mount_path callee_mount_path mount")) - return caller_mount_path, callee_mount_path, mount - - -def get_docker_mount_context() -> Tuple[bool, bool, str, str, str]: - """ - Return Docker mount context for container operations. - - :return: (is_caller_host, use_sibling_container_for_callee, - caller_mount_path, callee_mount_path, mount) - """ - is_caller_host = not hserver.is_inside_docker() - use_sibling_container_for_callee = hserver.use_docker_sibling_containers() - caller_mount_path, callee_mount_path, mount = get_docker_mount_info( - is_caller_host, use_sibling_container_for_callee - ) - return ( - is_caller_host, - use_sibling_container_for_callee, - caller_mount_path, - callee_mount_path, - mount, - ) - - -def build_and_run_docker_cmd( - use_sudo: bool, - callee_mount_path: str, - mount: str, - container_image: str, - dockerfile: str, - tool_cmd: str, - mode: str, - *, - override_entrypoint: bool = False, - wrap_in_bash: bool = False, -) -> str: - """ - Build and execute a Docker command. - """ - docker_cmd = get_docker_base_cmd(use_sudo) - if override_entrypoint: - docker_cmd.append("--entrypoint ''") - # Check that the container image exists. - hdbg.dassert( - image_exists(container_image, use_sudo)[0], - "Container image '%s' does not exist", - container_image, - ) - docker_cmd.extend( - [ - f"--workdir {callee_mount_path} --mount {mount}", - container_image, - ] - ) - if wrap_in_bash: - docker_cmd.append(f'bash -c "{tool_cmd}"') - else: - docker_cmd.append(tool_cmd) - docker_cmd_str = " ".join(docker_cmd) - return process_docker_cmd(docker_cmd_str, container_image, dockerfile, mode) - - -# TODO(gp): Move to helpers.hdbg. -def _dassert_valid_path(file_path: str, is_input: bool) -> None: - """ - Assert that a file path is valid, based on it being input or output. - - For input files, it ensures that the file or directory exists. For - output files, it ensures that the enclosing directory exists. - - :param file_path: The file path to check. - :param is_input: Whether the file path is an input file. - """ - if is_input: - # If it's an input file, then `file_path` must exist as a file or a dir. - hdbg.dassert_path_exists(file_path) - else: - # If it's an output, we might be writing a file that doesn't exist yet, - # but we assume that the including directory is already present. - dir_name = os.path.normpath(os.path.dirname(file_path)) - hio.create_dir(dir_name, incremental=True) - hdbg.dassert( - os.path.exists(file_path) or os.path.exists(dir_name), - "Invalid path: '%s' and '%s' don't exist", - file_path, - dir_name, - ) - - -# TODO(gp): Move to helpers.hdbg. -def _dassert_is_path_included(file_path: str, including_path: str) -> None: - """ - Assert that a file path is included within another path. - - This function checks if the given file path starts with the - specified including path. If not, it raises an assertion error. - - :param file_path: The file path to check. - :param including_path: The path that should include the file path. - """ - # TODO(gp): Maybe we need to normalize the paths. - hdbg.dassert( - file_path.startswith(including_path), - "'%s' needs to be underneath '%s'", - file_path, - including_path, - ) - - -def convert_caller_to_callee_docker_path( - caller_file_path: str, - caller_mount_path: str, - callee_mount_path: str, - check_if_exists: bool, - is_input: bool, - is_caller_host: bool, - use_sibling_container_for_callee: bool, -) -> str: - """ - Convert a file path from the (current) caller filesystem to the called - Docker container path. - - :param caller_file_path: The file path on the caller filesystem. - :param caller_mount_path: The source mount path on the host machine. - :param callee_mount_path: The target mount path inside the Docker - container. - :param check_if_exists: Whether to check if the file path exists. - :param is_input: Whether the file path is an input file (used only if - `check_if_exists` is True). - :param is_caller_host: Whether the caller is running on the host - machine or inside a Docker container. - :param use_sibling_container_for_callee: Whether to use a sibling - container or a children container - :return: The converted file path inside the Docker container. - """ - _LOG.debug(hprint.func_signature_to_str()) - hdbg.dassert_ne(caller_file_path, "") - hdbg.dassert_ne(caller_mount_path, "") - hdbg.dassert_ne(callee_mount_path, "") - if check_if_exists: - _dassert_valid_path(caller_file_path, is_input) - # Make the path absolute with respect to the (current) caller filesystem. - abs_caller_file_path = os.path.abspath(caller_file_path) - if is_caller_host: - # On the host, the path needs to be underneath the caller mount point. - caller_mount_point = caller_mount_path - else: - # We are inside a Docker container, so the path needs to be under - # the local Git root, since this is the mount point. - caller_mount_point = hgit.find_git_root() - _ = use_sibling_container_for_callee - # This is not always possible, e.g., '/var/log/app.log' needs to be - # underneath '/app' - _dassert_is_path_included(abs_caller_file_path, caller_mount_point) - # Make the path relative to the caller mount point. - _LOG.debug(hprint.to_str("caller_file_path caller_mount_point")) - rel_path = os.path.relpath(caller_file_path, caller_mount_point) - docker_path = os.path.join(callee_mount_path, rel_path) - docker_path = os.path.normpath(docker_path) - # - _LOG.debug( - " Converted %s -> %s -> %s", caller_file_path, rel_path, docker_path - ) - return docker_path - - -def is_path(path: str) -> bool: - """ - Check if `path` can be considered a file or a directory using heuristics. - - - return: True if the string looks like a path, False otherwise. - """ - # E.g., - # ``` - # is_path("file.txt") # True, since it has an extension - # is_path("/path/to/file.py") # True, since it has an absolute path - # is_path("/path/to") # True, since it has an absolute path - # is_path("../data.csv") # True, since it has an relative path - # is_path("folder/") # True, since it has a trailing slash - # is_path(".hidden") # True, since it has a leading dot - # is_path("readme") # False, since it has no extension and no path - # ``` - # Check if it has a file extension (e.g., .txt, .csv). - if os.path.splitext(path)[1]: - return True - # Check if it is an absolute or relative path (e.g., starts with "/" or "./" - # or "../") - if path.startswith("/") or path.startswith("./") or path.startswith("../"): - return True - # Check if it ends with a slash. - if path.endswith("/"): - return True - # Check if it has a hidden file. - basename = os.path.basename(path) - if basename.startswith(".") and basename.count(".") == 1: - return True - # Check if it contains a slash. - if "/" in path: - return True - return False - - -def convert_all_paths_from_caller_to_callee_docker_path( - cmd_opts: List[str], - caller_mount_path: str, - callee_mount_path: str, - is_caller_host: bool, - use_sibling_container_for_callee: bool, -) -> List[str]: - """ - Convert all the paths from the caller to the callee Docker container path. - - The paths are recognized by checking whether they point to an existing file - or directory. - - The limitation of this approach is that output files are not recognized. To - work around this problem: - - Create output dirs - - Explicitly parse options that are outputs (e.g., `-o `) - - :param cmd_opts: List of command options. - :param caller_mount_path: See `get_docker_mount_info()`. - :param callee_mount_path: See `get_docker_mount_info()`. - :param is_caller_host: See `get_docker_mount_info()`. - :param use_sibling_container_for_callee: See `get_docker_mount_info()`. - :return: List of converted command options. - """ - _LOG.debug(hprint.func_signature_to_str()) - # Converted command options. - cmd_opts_out = [] - # Scan the list of command option. - for cmd_opt_in in cmd_opts: - exists = os.path.exists(cmd_opt_in) - is_path_ = is_path(cmd_opt_in) - _LOG.debug(hprint.to_str("cmd_opt_in exists is_path_")) - if exists or is_path_: - check_if_exists = False - is_input = False - cmd_opt_out = convert_caller_to_callee_docker_path( - cmd_opt_in, - caller_mount_path, - callee_mount_path, - check_if_exists, - is_input, - is_caller_host, - use_sibling_container_for_callee, - ) - _LOG.debug(hprint.to_str("cmd_opt_in -> cmd_opt_out")) - cmd_opts_out.append(cmd_opt_out) - else: - _LOG.debug("File does not exist: %s", cmd_opt_in) - cmd_opts_out.append(cmd_opt_in) - _LOG.debug(hprint.to_str("cmd_opts_out")) - return cmd_opts_out - - -# ############################################################################# -# CLI utilities -# ############################################################################# - - -def add_open_arg(parser: argparse.ArgumentParser) -> None: - """ - Add --open option to parser for opening output files on macOS. - - :param parser: ArgumentParser instance to add the option to - """ - parser.add_argument( - "--open", - action="store_true", - default=False, - help="Open the output file on macOS", - ) - - -def open_file_on_macos(file_path: str) -> None: - """ - Open a file on macOS using the 'open' command. - - :param file_path: Path to the file to open - :raises subprocess.CalledProcessError: If open command fails - """ - if platform.system() != "Darwin": - _LOG.warning("--open flag only works on macOS") - return - subprocess.run(["open", file_path], check=True) - _LOG.info("Opened file with macOS 'open' command: %s", file_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py deleted file mode 100644 index 0ab2f2f2f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hdocker_tests.py +++ /dev/null @@ -1,197 +0,0 @@ -""" -Utilities for running docker tests. - -Import as: - -import helpers.hdocker_tests as hdoctest -""" - -import glob -import logging -import os -from typing import List - -import pytest - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# ############################################################################# -# Constants -# ############################################################################# - - -# Pattern for docker test files. -DOCKER_TEST_PATTERN = "docker_test_*.py" - - -# ############################################################################# -# Helper functions -# ############################################################################# - - -def get_docker_test_files(test_dir: str) -> List[str]: - """ - Find all docker test files in the specified directory. - - :param test_dir: directory to search for test files - :return: sorted list of test file paths - """ - pattern = os.path.join(test_dir, DOCKER_TEST_PATTERN) - files = sorted(glob.glob(pattern)) - _LOG.info("Found %d docker test files", len(files)) - for file in files: - _LOG.debug(" - %s", file) - return files - - -def _run_docker_pytest_cmd( - test_file: str, *, docker_cmd_script: str = "./docker_cmd.sh" -) -> int: - """ - Run a test file through docker_cmd.sh with pytest. - - :param test_file: path to the test file - :param docker_cmd_script: path to docker_cmd.sh script - :return: return code from the command - """ - hdbg.dassert_file_exists(test_file) - hdbg.dassert_file_exists(docker_cmd_script) - cmd = f'{docker_cmd_script} "pytest {test_file}"' - _LOG.info("Running: %s", cmd) - rc = hsystem.system(cmd, abort_on_error=False) - return rc - - -def run_docker_cmd(script_dir: str, *, shell_cmd: str = "ls /git_root") -> None: - """ - Run an arbitrary shell command inside Docker via docker_cmd.sh. - - :param script_dir: directory containing docker_cmd.sh - :param shell_cmd: shell command to run inside the container - """ - hdbg.dassert_path_exists(script_dir) - docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") - hdbg.dassert_file_exists(docker_cmd_script) - cmd = f"cd {script_dir} && bash {docker_cmd_script} '{shell_cmd}'" - hsystem.system(cmd) - - -def run_all_tests( - test_dir: str, *, docker_cmd_script: str = "./docker_cmd.sh" -) -> int: - """ - Find and run all docker test files in the directory. - - :param test_dir: directory containing test files - :param docker_cmd_script: path to docker_cmd.sh script - :return: 0 if all tests passed, non-zero otherwise - """ - test_files = get_docker_test_files(test_dir) - if not test_files: - _LOG.warning("No docker test files found in %s", test_dir) - return 0 - failed_tests = [] - for test_file in test_files: - return_code = _run_docker_pytest_cmd( - test_file, docker_cmd_script=docker_cmd_script - ) - if return_code != 0: - failed_tests.append(test_file) - if failed_tests: - _LOG.error("Failed tests: %s", failed_tests) - return 1 - _LOG.info("All tests passed") - return 0 - - -# ############################################################################# -# DockerTestCase -# ############################################################################# - - -# TODO(gp): Can this be used for run_dockerized_* tests? -class DockerTestCase(hunitest.TestCase): - """ - Base test class for Docker tests. - - Subclasses must set `_test_file = __file__` and may add notebook test - methods that call `self._helper(notebook_name)`. - """ - - _test_file: str = "" - - @pytest.mark.slow - def test_docker_build(self) -> None: - """ - Test that docker_build.sh runs without error. - """ - # Prepare inputs. - script_dir = os.path.dirname( - os.path.dirname(os.path.abspath(self._test_file)) - ) - docker_build_script = os.path.join(script_dir, "docker_build.sh") - hdbg.dassert_file_exists(docker_build_script) - # Run test. - cmd = f"cd {script_dir} && bash {docker_build_script}" - hsystem.system(cmd) - - @pytest.mark.slow - def test_docker_cmd(self) -> None: - """ - Test that docker_cmd.sh 'ls /git_root' runs without error. - """ - # Prepare inputs. - script_dir = os.path.dirname( - os.path.dirname(os.path.abspath(self._test_file)) - ) - docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") - hdbg.dassert_file_exists(docker_cmd_script) - # Run test. - cmd = f"cd {script_dir} && bash {docker_cmd_script} 'ls /git_root'" - hsystem.system(cmd) - - def test_docker_bash(self) -> None: - """ - Test that docker_bash.sh runs 'ls /git_root' and exits without error. - """ - # Prepare inputs. - script_dir = os.path.dirname( - os.path.dirname(os.path.abspath(self._test_file)) - ) - docker_bash_script = os.path.join(script_dir, "docker_bash.sh") - if not os.path.exists(docker_bash_script): - pytest.skip("docker_bash.sh not found in " + script_dir) - # Run test. - shell_cmd = "ls /git_root" - cmd = f"echo '{shell_cmd}' | bash {docker_bash_script}" - hsystem.system(cmd) - - def _run_notebook(self, notebook_name: str) -> None: - """ - Run a single notebook inside Docker. - - :param notebook_name: notebook filename relative to the project dir - """ - # Prepare inputs. - script_dir = os.path.dirname( - os.path.dirname(os.path.abspath(self._test_file)) - ) - docker_cmd_script = os.path.join(script_dir, "docker_cmd.sh") - notebook_path = os.path.join(script_dir, notebook_name) - hdbg.dassert_file_exists(notebook_path) - # Compute the notebook path inside the container via /git_root. - git_root = hgit.find_git_root(script_dir) - rel_path = os.path.relpath(script_dir, git_root) - container_notebook_path = f"/git_root/{rel_path}/{notebook_name}" - cmd = ( - f"cd {script_dir} && " - f"bash {docker_cmd_script} " - f"'jupyter nbconvert --execute --to html " - f"--ExecutePreprocessor.timeout=-1 {container_notebook_path}'" - ) - hsystem.system(cmd) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py deleted file mode 100644 index f52fc9230..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hemail.py +++ /dev/null @@ -1,47 +0,0 @@ -""" -Import as: - -import helpers.hemail as hemail -""" - -import email.mime.multipart as emmult -import email.mime.text as emtext -import os -import smtplib -from typing import Optional - - -def send_email( - subject: str, - message: str, - to_adr: str, - email_address: Optional[str] = None, - email_password: Optional[str] = None, - html: bool = False, -) -> None: - """ - Send mail to specified e-mail addresses. - - :param message: Message to be sent - :param to_adr: Mail to which to send messages - :type list - :return: None - """ - server = smtplib.SMTP("smtp.gmail.com", 587) - server.starttls() - if email_address is None: - email_address = os.environ["AM_EMAIL_ADDRESS"] - if email_password is None: - email_password = os.environ["AM_EMAIL_PASSWORD"] - server.login(email_address, email_password) - msg = emmult.MIMEMultipart() - msg["From"] = email_address - msg["To"] = ", ".join(to_adr) - msg["Subject"] = subject - if html: - msg.attach(emtext.MIMEText(message, "html")) - else: - msg.attach(emtext.MIMEText(message, "plain")) - - text = msg.as_string() - server.sendmail(email_address, to_adr, text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py deleted file mode 100644 index f2e0719bd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/henv.py +++ /dev/null @@ -1,541 +0,0 @@ -""" -Import as: - -import helpers.henv as henv -""" - -import logging -import os -from typing import Any, List, Tuple, Union - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hversion as hversio -import helpers.repo_config_utils as hrecouti - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - - -_WARNING = "\033[33mWARNING\033[0m" - - -# All printing functions should: -# - Return a string and not a list of strings -# - Add a newline at the end of the string (i.e., the string should end with -# `\n`) - - -# ############################################################################# -# Get env vars info. -# ############################################################################# - - -def get_env_var( - env_name: str, - *, - as_bool: bool = False, - default_value: Any = None, - abort_on_missing: bool = True, -) -> Union[str, bool, Any]: - """ - Get an environment variable by name. - - :param env_name: name of the env var - :param as_bool: convert the value into a Boolean - :param default_value: the default value to use in case it's not - defined - :param abort_on_missing: if the env var is not defined aborts, - otherwise use the default value - :return: value of env var - """ - if env_name not in os.environ: - if abort_on_missing: - hdbg.dassert_in( - env_name, - os.environ, - "Can't find env var '%s' in '%s'", - env_name, - str(os.environ), - ) - else: - return default_value - value = os.environ[env_name] - if as_bool: - # Convert the value into a boolean. - if value in ("0", "", "None", "False"): - value = False - else: - value = True - return value - - -def get_csfy_env_vars() -> List[str]: - """ - Get all the environment variables that start with `AM_`, `CK_`, `CSFY_`. - """ - # TODO(gp): We should only pass the `CSFY_` vars. - env_var_names = [ - v - for v in os.environ.keys() - if v.startswith("AM_") or v.startswith("CK_") or v.startswith("CSFY_") - ] - return env_var_names - - -# TODO(gp): Extract all the env vars that start with AM_, CK_, CSFY_ and make -# sure they have a description here. -def get_env_vars() -> List[str]: - """ - Return all the env vars that are expected to be set in Docker. - """ - # Keep in sync with `lib_tasks.py:_generate_compose_file()`. - env_var_names = [ - # Force enabling Docker-in-Docker. - "CSFY_ENABLE_DIND", - # Enable forcing certain unit tests to fail to check that unit test - # failures are caught. - "CSFY_FORCE_TEST_FAIL", - # The name of the host running Docker. - "CSFY_HOST_NAME", - # The OS of the host running Docker. - "CSFY_HOST_OS_NAME", - # The version of the host running Docker. - "CSFY_HOST_OS_VERSION", - # The name of the user running the host. - "CSFY_HOST_USER_NAME", - # Whether to check if certain property of the repo are as expected or not. - "CSFY_REPO_CONFIG_CHECK", - # Path to use for `repo_config.py`. E.g., used when running `helpers` - # container to avoid using the `repo_config.py` corresponding to the - # container launching the linter. - "CSFY_REPO_CONFIG_PATH", - "GH_ACTION_ACCESS_TOKEN", - # Whether we are running inside GH Actions. - "CSFY_CI", - # TODO(gp): Difference between amp and cmamp. - # CK AWS credentials. - "CSFY_AWS_ACCESS_KEY_ID", - "CSFY_AWS_DEFAULT_REGION", - "CSFY_AWS_SECRET_ACCESS_KEY", - "CSFY_AWS_SESSION_TOKEN", - # S3 bucket to use for CK. - "CSFY_AWS_S3_BUCKET", - # Path to the ECR for the Docker images for CK. - "CSFY_ECR_BASE_PATH", - ] - # No duplicates. - # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. - hdbg.dassert_eq( - len(set(env_var_names)), - len(env_var_names), - "There are duplicates", - str(env_var_names), - ) - # Sort. - env_var_names = sorted(env_var_names) - return env_var_names - - -def get_secret_env_vars() -> List[str]: - """ - Return the list of env vars that are secrets. - """ - secret_env_var_names = [ - # TODO(gp): Difference between amp and cmamp. - "CSFY_AWS_ACCESS_KEY_ID", - "CSFY_AWS_SECRET_ACCESS_KEY", - "GH_ACTION_ACCESS_TOKEN", - ] - # No duplicates. - # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. - hdbg.dassert_eq( - len(set(secret_env_var_names)), - len(secret_env_var_names), - "There are duplicates", - str(secret_env_var_names), - ) - # Secret env vars are a subset of the env vars. - env_vars = get_env_vars() - # TODO(gp): GFI. Use `hdbg.dassert_issubset()` instead. - if not set(secret_env_var_names).issubset(set(env_vars)): - diff = set(secret_env_var_names).difference(set(env_vars)) - cmd = f"Secret vars in `{str(diff)} are not in '{str(env_vars)}'" - assert 0, cmd - # Sort. - secret_env_var_names = sorted(secret_env_var_names) - return secret_env_var_names - - -def get_api_key_env_vars() -> List[str]: - """ - Return the list of env vars that are API keys. - """ - # Find all the env vars that end with "_API_KEY". - env_var_names = [ - env_var for env_var in os.environ.keys() if env_var.endswith("_API_KEY") - ] - return env_var_names - - -def check_env_vars() -> None: - """ - Make sure all the expected env vars are defined. - """ - env_vars = get_env_vars() - for env_var in env_vars: - hdbg.dassert_in( - env_var, - os.environ, - "env_var='%s' is not in env_vars='%s'", - env_var, - str(os.environ.keys()), - ) - - -def env_vars_to_string() -> str: - """ - Return a string with the signature of all the expected env vars (including - the secret ones). - """ - txt: List[str] = [] - # Get the expected env vars and the secret ones. - env_vars = get_env_vars() - secret_env_vars = get_secret_env_vars() - # Print a signature. - for env_name in env_vars: - is_defined = env_name in os.environ - is_empty = is_defined and os.environ[env_name] == "" - if not is_defined: - txt.append(f"{env_name}=undef") - else: - if env_name in secret_env_vars: - # Secret env var: print if it's empty or not. - if is_empty: - txt.append(f"{env_name}=empty") - else: - txt.append(f"{env_name}=***") - else: - # Not a secret var: print the value. - txt.append(f"{env_name}='{os.environ[env_name]}'") - result = "\n".join(txt) - return result - - -# ############################################################################# -# Get Git info. -# ############################################################################# - - -# Copied from helpers.hgit to avoid circular dependencies. - - -def _git_log(num_commits: int = 5, my_commits: bool = False) -> str: - """ - Return the output of a pimped version of git log. - - :param num_commits: number of commits to report - :param my_commits: True to report only the current user commits - :return: string - """ - cmd = [] - cmd.append("git log --date=local --oneline --graph --date-order --decorate") - cmd.append( - "--pretty=format:'%h %<(8)%aN% %<(65)%s (%>(14)%ar) %ad %<(10)%d'" - ) - cmd.append(f"-{num_commits}") - if my_commits: - # This doesn't work in a container if the user relies on `~/.gitconfig` to - # set the user name. - # TODO(gp): We should use `get_git_name()`. - cmd.append("--author $(git config user.name)") - cmd = " ".join(cmd) - data: Tuple[int, str] = hsystem.system_to_string(cmd) - _, txt = data - return txt - - -# End copy. - - -def _get_git_signature(git_commit_type: str = "all") -> str: - """ - Get information about current branch and latest commits. - """ - txt: List[str] = [] - # Get the branch name. - cmd = "git branch --show-current" - _, branch_name = hsystem.system_to_one_line(cmd) - txt.append(f"branch_name='{branch_name}'") - # Get the short Git hash of the current branch. - cmd = "git rev-parse --short HEAD" - _, hash_ = hsystem.system_to_one_line(cmd) - txt.append(f"hash='{hash_}'") - # Add info about the latest commits. - num_commits = 3 - if git_commit_type == "all": - txt.append("# Last commits:") - log_txt = _git_log(num_commits=num_commits, my_commits=False) - txt.append(hprint.indent(log_txt)) - elif git_commit_type == "mine": - txt.append("# Your last commits:") - log_txt = _git_log(num_commits=num_commits, my_commits=True) - txt.append(hprint.indent(log_txt)) - elif git_commit_type == "none": - pass - else: - raise ValueError(f"Invalid value='{git_commit_type}'") - # - result = "\n".join(txt) + "\n" - hdbg.dassert(result.endswith("\n"), "result='%s'", result) - return result - - -# def _get_submodule_signature( -# partial_signature: List[str], *, git_commit_type: str = "all" -# ) -> str: -# """ -# Add git signature for all submodules. -# :param partial_signature: the signature to append to -# `git_commit_type` the type of git commit to include in the -# signature -# :return: system signature enhanced by git submodule info -# """ -# # TODO(Juraj): Think of a better generalisation rather listing all the options. -# submodule_options = ["amp", "amp/helpers_root", "helpers_root"] -# signature = partial_signature -# prev_cwd = os.getcwd() -# for submodule in submodule_options: -# if os.path.exists(submodule): -# try: -# # Temporarily descend into submodule. -# os.chdir(submodule) -# signature.append(f"# Git {submodule}") -# git_amp_sig = _get_git_signature(git_commit_type) -# signature = _append(signature, git_amp_sig) -# # In case there is a runtime error we want to end up in a consistent -# # state (the original path). -# finally: -# os.chdir(prev_cwd) -# hdbg.dassert(txt_tmp.endswith("\n"), f"txt_tmp='%s'", txt_tmp) -# return signature - - -# ############################################################################# -# Get system info. -# ############################################################################# - - -def _get_platform_info() -> str: - """ - Get platform information as a list of strings. - """ - import platform - - txt_tmp: List[str] = [] - uname = platform.uname() - txt_tmp.append(f"system={uname.system}") - txt_tmp.append(f"node name={uname.node}") - txt_tmp.append(f"release={uname.release}") - txt_tmp.append(f"version={uname.version}") - txt_tmp.append(f"machine={uname.machine}") - txt_tmp.append(f"processor={uname.processor}") - # - txt = hprint.to_info("Platform info", txt_tmp) - return txt - - -def _get_psutil_info() -> str: - """ - Get system resource information using psutil. - """ - try: - import psutil - - has_psutil = True - except ModuleNotFoundError as e: - _LOG.warning("psutil is not installed: %s", str(e)) - has_psutil = False - txt_tmp = [] - if has_psutil: - txt_tmp.append(f"cpu count={psutil.cpu_count()}") - if hasattr(psutil, "cpu_freq") and psutil.cpu_freq is not None: - txt_tmp.append(f"cpu freq={str(psutil.cpu_freq())}") - else: - txt_tmp.append("cpu freq=unavailable") - # TODO(gp): Report in MB or GB. - txt_tmp.append(f"memory={str(psutil.virtual_memory())}") - txt_tmp.append(f"disk usage={str(psutil.disk_usage('/'))}") - else: - txt_tmp.append("psutil is not installed") - # - txt = hprint.to_info("psutils info", txt_tmp) - return txt - - -# ############################################################################# -# Get package info. -# ############################################################################# - - -def _get_library_version(lib_name: str) -> str: - try: - cmd = f"import {lib_name}" - # pylint: disable=exec-used - exec(cmd) - except ImportError: - version = "?" - else: - cmd = f"{lib_name}.__version__" - version = eval(cmd) - return version - - -def _get_package_info() -> Tuple[str, int]: - """ - Get package version information. - - Returns: - Tuple containing: - - List of strings with package info - - Number of failed imports - """ - import platform - - txt_tmp = [] - packages = [] - packages.append(("python", platform.python_version())) - # import sys - # print(sys.version) - libs = [ - "cvxopt", - "cvxpy", - "gluonnlp", - "gluonts", - "joblib", - "mxnet", - "numpy", - "pandas", - "pyarrow", - "scipy", - "seaborn", - "sklearn", - "statsmodels", - ] - libs = sorted(libs) - failed_imports = 0 - for lib in libs: - # This is due to Cmamp4924: - # WARNING: libarmpl_lp64_mp.so: cannot open shared object file: No such - # file or directory - try: - version = _get_library_version(lib) - except OSError as e: - print(_WARNING + ": " + str(e)) - if version.startswith("ERROR"): - failed_imports += 1 - packages.append((lib, version)) - txt_tmp.extend([f"{lib}: {version}" for (lib, version) in packages]) - # - txt = hprint.to_info("Packages", txt_tmp) - return txt, failed_imports - - -# ############################################################################# - - -def _get_git_info(git_commit_type: str) -> str: - txt_tmp: List[str] = [] - try: - txt_tmp.append(_get_git_signature(git_commit_type)) - # If there are any submodules, fetch their git signature. - # txt_tmp.append(_get_submodule_signature(txt_tmp, git_commit_type)) - except RuntimeError as e: - _LOG.warning(str(e)) - txt_tmp.append("No git info") - # - txt = hprint.to_info("Git info", txt_tmp) - return txt - - -# ############################################################################# -# Get system signature. -# ############################################################################# - - -def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: - """ - Return a string with the system signature. - - :param git_commit_type: the type of git commit to include in the - signature - :return: the system signature and the number of failed imports - """ - txt: List[str] = [] - # Add container version. - txt_tmp = hversio.get_container_version_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add Git signature. - txt_tmp = _get_git_info(git_commit_type) - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add platform info. - txt_tmp = _get_platform_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add psutil info. - txt_tmp = _get_psutil_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add Docker info. - txt_tmp = hserver.get_docker_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # Add package info. - txt_tmp, failed_imports = _get_package_info() - hprint.dassert_one_trailing_newline(txt_tmp) - txt.append(txt_tmp) - # - txt_str: str = hprint.to_info("System signature", txt) - return txt_str, failed_imports - - -# ############################################################################# -# Package all the information into a string. -# ############################################################################# - - -def env_to_str( - repo_config: bool = True, - server_config: bool = True, - system_signature: bool = True, - env_vars: bool = True, -) -> str: - """ - Package all the information into a string. - """ - # - msg = "" - # - if repo_config: - repo_config_str = hrecouti.get_repo_config().config_func_to_str() - msg += hprint.to_info("Repo config", repo_config_str) + "\n" - # - if server_config: - server_config_str = hserver.config_func_to_str() - msg += hprint.to_info("Server config", server_config_str) + "\n" - # - if system_signature: - msg += get_system_signature()[0] + "\n" - # - if env_vars: - env_vars_str = env_vars_to_string() - msg += hprint.to_info("Env vars", env_vars_str) + "\n" - return msg diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py deleted file mode 100644 index d758ff16b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hfile_tree.py +++ /dev/null @@ -1,232 +0,0 @@ -""" -Import as: - -import helpers.hfile_tree as hfiltree -""" - -import logging -import os -import pathlib -import re -from typing import Dict, List - -_LOG = logging.getLogger(__name__) - - -def _build_tree_lines( - dir_name: str, - nodes: List[pathlib.Path], - comments: Dict[str, str], -) -> str: - """ - Build the text lines for the directory tree while preserving inline - comments. - - :param dir_name: the directory name - :param nodes: relative paths under the given directory - :param comments: inline comments from existing file - :return: a formatted tree - - Example output: - ``` - devops - - __init__.py - - compose - - __init__.py - - tmp.docker-compose.yml - - docker_build - - create_users.sh - - dev.Dockerfile - - dockerignore.dev - - dockerignore.prod - - etc_sudoers - - fstab - - install_cprofile.sh - - install_dind.sh - - install_os_packages.sh - - install_publishing_tools.sh - - install_python_packages.sh - - pip_list.txt - - poetry.lock - - poetry.toml - - prod.Dockerfile - - pyproject.python_data_stack.toml - - pyproject.toml - - update_os.sh - - utils.sh - - docker_run - - bashrc - - docker_setenv.sh - - entrypoint.sh - - run_jupyter_server.sh - - env - - default.env - ``` - """ - lines = [dir_name] - for rel in nodes: - indent = " " * (len(rel.parts) - 1) - key = "/".join(rel.parts) - suffix = comments.get(key, "") - lines.append(f"{indent}- {rel.name}{suffix}".rstrip()) - return "\n".join(lines) - - -def _parse_comments(old_tree: List[str]) -> Dict[str, str]: - """ - Parse existing tree lines to extract inline comments. - - :param old_tree: the existing tree block - :return: inline comments and indentations - """ - comments: Dict[str, str] = {} - stack: List[str] = [] - for line in old_tree: - # Find indents, bullet points, name, and inline comments. - match = re.match(r"^(\s*)-\s+([^\s#]+)(\s*#.*)?$", line) - if not match: - continue - indent, name, suffix = match.groups() - level = len(indent) // 2 - stack = stack[:level] - stack.append(name) - key = "/".join(stack) - comments[key] = suffix or "" - return comments - - -def _get_tree_nodes( - dir_path: pathlib.Path, - depth: int, - include_tests: bool, - include_python: bool, - only_dirs: bool, -) -> List[pathlib.Path]: - """ - Get relative paths under the given directory based on filters. - - Filters include: - - Test files and directories - - Python files - - :param dir_path: the directory path - :param depth: maximum depth to traverse - :param include_tests: include test files or directories - :param include_python: only show python files - :param only_dirs: only show directories - :return: all relative paths that match the specified flags - """ - nodes: List[pathlib.Path] = [] - for dirpath, dirnames, filenames in os.walk(dir_path): - rel_dir = pathlib.Path(dirpath).relative_to(dir_path) - level = len(rel_dir.parts) - if 0 < depth <= level: - # Stop pruning on given depth. - dirnames[:] = [] - continue - if not include_tests: - # Prune out test directories. - filtered = [] - for d in dirnames: - dir_lower = d.lower() - if not ( - dir_lower.startswith("test_") - or dir_lower in {"test", "tests"} - ): - filtered.append(d) - dirnames[:] = filtered - candidates = dirnames + filenames - for name in candidates: - full_path = pathlib.Path(dirpath) / name - rel_path = full_path.relative_to(dir_path) - name_lower = name.lower() - is_dir = full_path.is_dir() - is_test_name = name_lower.startswith("test_") or name_lower in { - "test", - "tests", - } - is_test = is_test_name or name_lower.endswith("_test.py") - is_python = full_path.suffix in {".py", ".ipynb"} - if is_dir: - # Always include directories. - nodes.append(rel_path) - continue - # Flag filter to include test or python files. - allowed_by_flag = (include_tests and is_test) or ( - include_python and is_python - ) - if only_dirs: - include_file = allowed_by_flag - else: - include_file = allowed_by_flag or ( - not is_test - and not is_python - and not include_tests - and not include_python - ) - if include_file: - nodes.append(rel_path) - nodes.sort() - return nodes - - -def generate_tree( - path: str, - depth: int, - include_tests: bool, - include_python: bool, - only_dirs: bool, - output: str, -) -> str: - """ - Generate a directory tree, and optionally update or create a markdown file. - - :param path: directory path to traverse - :param depth: maximum depth to traverse - :param include_tests: include test files or directories - :param include_python: include show python files - :param only_dirs: only show directories - :param output: path of the markdown file to create or update - """ - dir_path = pathlib.Path(path).resolve() - nodes = _get_tree_nodes( - dir_path, depth, include_tests, include_python, only_dirs - ) - _LOG.debug("Collected %d nodes under '%s'", len(nodes), dir_path) - if output: - output_path = pathlib.Path(output) - start_marker = f"" - end_marker = "" - prefix = [] - suffix = [] - comments = {} - if output_path.exists(): - # Parse inline comments. - file = output_path.read_text(encoding="utf-8") - lines = file.splitlines() - _LOG.debug("Reading existing file '%s' for markers", output_path) - try: - idx_start = lines.index(start_marker) - idx_end = lines.index(end_marker) - _LOG.debug("Markers found at lines %d–%d", idx_start, idx_end) - except ValueError as exc: - raise RuntimeError( - "Couldn't find tree markers in output file." - ) from exc - # Parse existing file. - prefix = lines[:idx_start] - old_tree = lines[idx_start + 1 : idx_end] - suffix = lines[idx_end + 1 :] - comments = _parse_comments(old_tree) - # Build the directory tree. - tree_block = _build_tree_lines(dir_path.name, nodes, comments) - # Build the content of the file. - content = ( - "\n".join(prefix + [start_marker, tree_block, end_marker] + suffix) - + "\n" - ) - output_path.write_text(content, encoding="utf-8") - _LOG.debug("Writing updated tree to '%s'", output_path) - # Return tree without markers. - tree_block = _build_tree_lines(dir_path.name, nodes, {}) - return tree_block diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py deleted file mode 100644 index d63d59cea..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgit.py +++ /dev/null @@ -1,1869 +0,0 @@ -""" -Import as: - -import helpers.hgit as hgit -""" - -import collections -import functools -import logging -import os -import random -import re -import string -from typing import cast, List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - -# We refer to "Git" when we talk about the control system (e.g., "in a Git -# repository") and `git` when we refer to implementation of Git as a program -# installed in a computer. - -# TODO(gp): Check -# https://git-scm.com/book/en/v2/Appendix-B%3A-Embedding-Git-in-your-Applications-Dulwich - -# TODO(gp): Avoid "stuttering": the module is already called "git", so no need -# to make reference to git again. - -# TODO(gp): Add mem caching to some functions below. We assume that one doesn't -# change dir (which is a horrible idea) and thus we can memoize. - -# TODO(gp): Spell super_module and sub_module always in the same way in both -# comments and code. For simplicity (e.g., instead of `super_module` in code and -# `super-module` in comment) we might want to spell `supermodule` everywhere. - -# ############################################################################# -# Git branch functions -# ############################################################################# - - -def extract_gh_issue_number_from_branch(branch_name: str) -> Optional[int]: - """ - Extract the GitHub issue number from a branch name. - - Example: - CmampTask10725_Add_more_tabs_to_orange_tmux -> 10725 - HelpersTask23_Add_more_tabs_to_orange_tmux -> 23. - - Works only if `invoke gh_branch_create` was used to create the branch. - or the name was retrieved using `invoke gh_issue_title`. - - :param branch_name: the name of the branch - :return: the issue number or None if it can't be extracted - """ - match = re.match(r".*Task_?(\d+)(?:_\w+)?", branch_name) - if match: - # Return the captured number. - return int(match.group(1)) - return None - - -def get_branch_name(dir_name: str = ".") -> str: - """ - Return the name of the Git branch in a directory. - - E.g., `master` or `AmpTask672_Add_script_to_check_and_merge_PR` - - :param dir_name: directory containing the git repository - :return: the name of the current branch - """ - hdbg.dassert_path_exists(dir_name) - # > git rev-parse --abbrev-ref HEAD - # master - cmd = f"cd {dir_name} && git rev-parse --abbrev-ref HEAD" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - return output - - -def _get_branch_next_name_via_github_api( - curr_branch_name: str, - *, - max_num_ids: int = 100, -) -> Optional[str]: - """ - Find the next available branch name using GitHub API (fast method). - - Uses `gh pr list` to query merged branches and extract the highest number. - - :param curr_branch_name: current branch name (e.g., "gp_scratch") - :param max_num_ids: maximum number of IDs to check - :return: next available branch name or None if GitHub API is not available - """ - try: - # Query all PRs (merged, closed, open) and extract branch names - # matching pattern. - cmd = ( - "gh pr list --state all --json headRefName " - "| jq -r '.[].headRefName | select(test(\"^{branch}_[0-9]+$\"))' " - "| sed 's/.*_//' | sort -rn | head -1" - ).format(branch=re.escape(curr_branch_name)) - _LOG.debug("Running GitHub API query: %s", cmd) - ret, output = hsystem.system_to_one_line(cmd, suppress_output=True) - if ret != 0: - _LOG.debug("GitHub API query failed, falling back to linear scan") - return None - # Extract the highest number from all branches. - output = output.strip() - if output: - highest_num = int(output) - next_num = highest_num + 1 - new_branch_name = f"{curr_branch_name}_{next_num}" - _LOG.info( - "Found highest number '%s' in all branches, next is '%s'", - highest_num, - next_num, - ) - return new_branch_name - # No existing numbered branches found. - _LOG.debug("No existing numbered branches found, starting at 1") - return f"{curr_branch_name}_1" - except Exception as e: - _LOG.debug( - "Error querying GitHub API: %s, falling back to linear scan", - e, - ) - return None - - -@functools.lru_cache() -def _get_gh_pr_list() -> str: - """ - Get a cached list of all pull requests from GitHub (merged and open). - - Results are cached via functools.lru_cache to avoid repeated GitHub API calls. - - :return: raw output from `gh pr list` command - """ - cmd = "gh pr list -s all --limit 1000" - rc, txt = hsystem.system_to_string(cmd) - _ = rc - return txt - - -def does_branch_exist( - branch_name: str, - mode: str, - *, - dir_name: str = ".", -) -> bool: - """ - Check if a branch with the given name exists in local git or on GitHub. - - Supports checking in local git repository or on GitHub via the `gh` CLI. - - :param branch_name: the name of the branch to check - :param mode: where to check ("all" checks all, "git_local", "git_remote", "github") - :param dir_name: directory containing the git repository - :return: True if the branch exists in the specified location - """ - _LOG.debug(hprint.to_str("branch_name mode dir_name")) - # Handle the "all" case by recursion on all the possible modes. - if mode == "all": - exists = False - for mode_tmp in ("git_local", "git_remote", "github"): - exists_tmp = does_branch_exist( - branch_name, mode_tmp, dir_name=dir_name - ) - exists = exists or exists_tmp - return exists - # - hdbg.dassert_in(mode, ("git_local", "git_remote", "github")) - exists = False - if mode in ("git_local", "git_remote"): - # From https://stackoverflow.com/questions/35941566 - cmd = f"cd {dir_name} && git fetch --prune" - hsystem.system(cmd, abort_on_error=False) - # From https://stackoverflow.com/questions/5167957 - # > git rev-parse --verify LimeTask197_Get_familiar_with_CF2 - # f03bfa0b4577c2524afd6a1f24d06013f8aa9f1a - # > git rev-parse --verify I_dont_exist - # fatal: Needed a single revision - git_branch_name = branch_name - if mode == "git_remote": - git_branch_name = f"origin/{git_branch_name}" - cmd = f"cd {dir_name} && git rev-parse --verify {git_branch_name}" - rc = hsystem.system(cmd, abort_on_error=False) - exists = rc == 0 - _LOG.debug("branch_name='%s' on git: exists=%s", branch_name, exists) - # Check on GitHub. - if mode == "github": - txt = _get_gh_pr_list() - # ``` - # > gh pr list -s all --limit 10000 | grep AmpTask2163 - # 347 AmpTask2163_Implement_tiled_backtesting_1 AmpTask2163 ... MERGED - # ``` - # The text is separated by tabs. - # - # If there are no issues on the GitHub repo, just return. - # ``` - # > gh pr list -s all --limit 1000 - # no pull requests match your search in causify-ai/sports_analytics - # ``` - if txt == "": - return False - for line in txt.split("\n"): - # number, GH branch name, Git branch name, status. - fields = line.split("\t") - # fields=['179', - # 'CmTask2914: Add end-to-end unit test for prod reconcile', - # 'CmTask2914_Add_end_to_end_unit_test_around_the_prod_reconciliation', - # 'DRAFT', '2022-09-27 19:56:50 +0000 UTC'] - hdbg.dassert_lte(4, len(fields), "fields=%s", fields) - number, gh_branch_name, git_branch_name = fields[:3] - _ = number, gh_branch_name - if branch_name == git_branch_name: - exists = True - _LOG.debug( - "branch_name='%s' on github: exists=%s", branch_name, exists - ) - return exists - - -def _get_branch_next_name_linear_scan( - dir_name: str, - curr_branch_name: str, - *, - max_num_ids: int = 100, - log_verb: int = logging.DEBUG, -) -> str: - """ - Find the next available branch name using linear scanning (fallback method). - - Tries branch names sequentially until finding one that doesn't exist. - - :param dir_name: directory containing the git repository - :param curr_branch_name: current branch name (e.g., "gp_scratch") - :param max_num_ids: maximum number of IDs to check - :param log_verb: logging verbosity level - :return: next available branch name - """ - for i in range(1, max_num_ids): - new_branch_name = f"{curr_branch_name}_{i}" - _LOG.info("Trying branch name '%s' ...", new_branch_name) - mode = "all" - exists = does_branch_exist(new_branch_name, mode, dir_name=dir_name) - _LOG.log(log_verb, "-> exists=%s", exists) - if not exists: - _LOG.log(log_verb, "new_branch_name='%s'", new_branch_name) - return new_branch_name - raise ValueError( - f"Can't find the next branch name for '{curr_branch_name}' " - f"within {max_num_ids} ids" - ) - - -def get_branch_next_name( - dir_name: str = ".", - *, - curr_branch_name: Optional[str] = None, - log_verb: int = logging.DEBUG, - method: str = "auto", -) -> str: - """ - Return a name derived from the branch so that the branch doesn't exist. - - E.g., `AmpTask1903_Implemented_system_Portfolio` -> - `AmpTask1903_Implemented_system_Portfolio_3` - - :param dir_name: directory containing the git repository - :param curr_branch_name: branch name to use (if None, gets current branch) - :param log_verb: logging verbosity level - :param method: method to use ('auto' tries fast first, 'github_api', 'linear_scan') - :return: next available branch name - """ - if curr_branch_name is None: - curr_branch_name = get_branch_name(dir_name=dir_name) - hdbg.dassert_ne( - curr_branch_name, "master", "Cannot get next name for 'master' branch" - ) - _LOG.log(log_verb, "curr_branch_name='%s'", curr_branch_name) - max_num_ids = 100 - hdbg.dassert_in( - method, ["auto", "github_api", "linear_scan"], "Invalid method specified" - ) - # Try GitHub API method first (faster) if requested or on auto mode. - next_name: Optional[str] = None - if method in ("auto", "github_api"): - next_name = _get_branch_next_name_via_github_api( - curr_branch_name, - max_num_ids=max_num_ids, - ) - if next_name is None and method == "github_api": - raise ValueError("GitHub API method requested but failed") - # Fall back to linear scanning if GitHub API failed in auto mode. - if next_name is None and method == "auto": - _LOG.warning("GitHub API method failed, falling back to linear scan") - next_name = _get_branch_next_name_linear_scan( - dir_name, - curr_branch_name, - max_num_ids=max_num_ids, - log_verb=log_verb, - ) - else: - # Fall back to linear scanning method when explicitly requested. - next_name = _get_branch_next_name_linear_scan( - dir_name, - curr_branch_name, - max_num_ids=max_num_ids, - log_verb=log_verb, - ) - hdbg.dassert_ne(next_name, None) - return cast(str, next_name) - - -def get_branch_hash(dir_name: str = ".") -> str: - """ - Return the hash of the commit right before the branch was created. - - This finds the merge-base between the current branch and master, which is - the commit where the branch was created. - - :param dir_name: directory containing the git repository - :return: the hash of the commit where the branch diverged from master - """ - curr_branch_name = get_branch_name(dir_name=dir_name) - hdbg.dassert_ne( - curr_branch_name, "master", "Cannot get branch hash for 'master' branch" - ) - _LOG.debug("curr_branch_name=%s", curr_branch_name) - cmd = f"cd {dir_name} && git merge-base master {curr_branch_name}" - _, hash_ = hsystem.system_to_string(cmd) - hash_ = hash_.rstrip("\n").lstrip("\n") - hdbg.dassert_eq( - len(hash_.split("\n")), 1, "Expected single hash line from merge-base" - ) - return hash_ - - -# ############################################################################# - - -@functools.lru_cache() -def is_inside_submodule(git_dir: str = ".") -> bool: - """ - Return whether a dir is inside a Git submodule or a Git supermodule. - - We determine this by checking if the current Git repo is included inside another Git repo. - - :param git_dir: directory to check - :return: True if the directory is inside a submodule - """ - cmd = [] - # Go to the directory. - cmd.append(f"cd {git_dir}") - # > cd im/ - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd.append('cd "$(git rev-parse --show-toplevel)/.."') - # > git rev-parse --is-inside-work-tree - # true - cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") - # Execute the command chain and check the return code. - cmd_as_str = " && ".join(cmd) - rc = hsystem.system(cmd_as_str, abort_on_error=False) - ret: bool = rc == 0 - return ret - - -# ############################################################################# -# Git submodule functions -# ############################################################################# - - -@functools.lru_cache() -def get_client_root(super_module: bool) -> str: - """ - Return the full path of the root of the Git client. - - E.g., `/Users/saggese/src/.../amp`. - - :param super_module: if True use the root of the Git super_module, - if we are in a submodule. Otherwise use the Git sub_module root - """ - if super_module and is_inside_submodule(): - # https://stackoverflow.com/questions/957928 - # > cd /Users/saggese/src/.../amp - # > git rev-parse --show-superproject-working-tree - # /Users/saggese/src/... - cmd = "git rev-parse --show-superproject-working-tree" - else: - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd = "git rev-parse --show-toplevel" - # TODO(gp): Use system_to_one_line(). - _, out = hsystem.system_to_string(cmd) - out = out.rstrip("\n") - hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") - client_root: str = os.path.realpath(out) - return client_root - - -# TODO(gp): Replace `get_client_root` with this. -# TODO(gp): -> get_client_root2() or get_outermost_supermodule_root() -def find_git_root(path: str = ".") -> str: - """ - Find recursively the dir of the outermost super module. - - This function traverses the directory hierarchy upward from a specified - starting path to find the root directory of a Git repository. - It supports: - - standard git repository: where a `.git` directory exists at the root - - submodule: where repository is nested inside another, and the `.git` file contains - a `gitdir:` reference to the submodule's actual Git directory - - linked repositories: where the `.git` file points to a custom Git directory - location, such as in Git worktrees or relocated `.git` directories - - :param path: starting file system path. Defaults to the current directory (".") - :return: absolute path to the top-level Git repository directory - """ - import helpers.hio as hio - - path = os.path.abspath(path) - git_root_dir = None - while True: - git_dir = os.path.join(path, ".git") - _LOG.debug("git_dir=%s", git_dir) - # Check if `.git` is a directory which indicates a standard Git repository. - if os.path.isdir(git_dir): - # Found the Git root directory. - git_root_dir = path - break - # Check if `.git` is a file which indicates submodules or linked setups. - if os.path.isfile(git_dir): - txt = hio.from_file(git_dir) - lines = txt.split("\n") - for line in lines: - # Look for a `gitdir:` line that specifies the linked directory. - # Example: `gitdir: ../.git/modules/helpers_root` (submodule) - # or `gitdir: /path/to/.git/worktrees/name` (worktree). - if line.startswith("gitdir:"): - git_dir_path = line.split(":", 1)[1].strip() - _LOG.debug("git_dir_path=%s", git_dir_path) - # For worktrees, the current path is the root of the worktree. - # The worktree's `.git` file points to the shared git directory - # (e.g., main_repo/.git/worktrees/worktree_name). - if ".git/worktrees/" in git_dir_path: - git_root_dir = path - else: - # For other linked setups (submodules, custom .git directory), - # traverse up to find the root of the target repository. - abs_git_dir = os.path.abspath( - os.path.join(path, git_dir_path) - ) - # Traverse up to find the top-level `.git` directory. - while True: - # Check if the current directory is a `.git` directory. - if os.path.basename(abs_git_dir) == ".git": - git_root_dir = os.path.dirname(abs_git_dir) - # Found the root. - break - # Move one level up in the directory structure. - parent = os.path.dirname(abs_git_dir) - # Reached the filesystem root without finding the `.git` directory. - hdbg.dassert_ne( - parent, - abs_git_dir, - "Top-level .git directory not found.", - ) - # Continue traversing up. - abs_git_dir = parent - break - # Exit the loop if the Git root directory is found. - if git_root_dir is not None: - break - # Move up one level in the directory hierarchy. - parent = os.path.dirname(path) - # Reached the filesystem root without finding `.git`. - hdbg.dassert_ne( - parent, - path, - "No .git directory or file found in any parent directory.", - ) - # Update the path to the parent directory for the next iteration. - path = parent - hdbg.dassert_is_not( - git_root_dir, None, "Git root directory should have been found" - ) - return str(git_root_dir) - - -# ############################################################################# - - -# TODO(gp): There are several functions doing the same work. -# helpers_root/helpers/hgit.py:827:def find_file_in_git_tree( -# helpers_root/helpers/hsystem.py:757:def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: -def find_file(file_name: str, *, dir_path: Optional[str] = None) -> str: - """ - Find a file within a directory hierarchy, excluding version control and cache dirs. - - Searches for the file starting from a directory, skipping .git and .mypy_cache - to avoid expensive traversals. - - :param file_name: the name of the file to find - :param dir_path: the directory to start the search from (defaults to git root) - :return: the first absolute path to the file found - """ - if dir_path is None: - dir_path = find_git_root() - _LOG.debug(hprint.to_str("dir_path")) - cmd = ( - rf"find {dir_path} " - + r"\( -path '*/.git' -o -path '*/.mypy_cache' \) -prune " - + rf'-o -name "{file_name}" -print' - ) - _LOG.debug(hprint.to_str("cmd")) - _, res = hsystem.system_to_one_line(cmd) - hdbg.dassert_ne(res, "Can't find file '%s' in '%s'", file_name, dir_path) - return res - - -def _is_repo(repo_short_name: str) -> bool: - """ - Check if the current directory is in a repository with the given short name. - - Uses repo config to determine the repository type without relying on directory names. - - :param repo_short_name: the short name of the repository to check (e.g., "helpers", "amp") - :return: True if the current directory is in the specified repository - """ - import helpers.repo_config_utils as hrecouti - - curr_repo_short_name = hrecouti.get_repo_config().get_repo_short_name() - is_repo = bool(curr_repo_short_name == repo_short_name) - return is_repo - - -def is_helpers() -> bool: - """ - Return whether we are inside `helpers` repo. - - Either as super module, or a sub module depending on a current - working directory. - """ - return _is_repo("helpers") - - -def find_helpers_root(dir_path: str = ".") -> str: - """ - Find the root directory of the `helpers` repository. - - If the current directory is within the `helpers` repository, the root of the - repository is returned. Otherwise, the function searches for the `helpers_root` - directory starting from the root of the repository. - - :param dir_path: starting directory for the search - :return: absolute path to the `helpers_root` directory - """ - with hsystem.cd(dir_path): - git_root = find_git_root() - if is_helpers(): - # If we are in `helpers` repo as supermodule, its root is the helpers_root. - cmd = "git rev-parse --show-toplevel" - _, helpers_root = hsystem.system_to_one_line(cmd) - else: - # Search for the `helpers_root` directory from the root of the supermodule. - helpers_root = find_file("helpers_root", dir_path=git_root) - helpers_root = os.path.abspath(helpers_root) - # Verify that the directory and `helpers` subdirectory exist. - hdbg.dassert_dir_exists( - helpers_root, "helpers_root directory must exist" - ) - hdbg.dassert_dir_exists( - os.path.join(helpers_root, "helpers"), - "helpers subdirectory must exist within helpers_root", - ) - return helpers_root - - -# ############################################################################# - - -def resolve_git_client_dir(git_client_name: str) -> str: - """ - Resolve the absolute path of the Git client directory. - - Supports both relative names (assumed to be in ~/src/) and absolute paths. - - :param git_client_name: the name of the Git client (e.g., "helpers1" - or "/Users/saggese/src/helpers1") - :return: the absolute path of the Git client directory - """ - if not os.path.isabs(git_client_name): - # Relative names are resolved relative to ~/src/ directory for convenience. - git_client_dir = os.path.join(os.environ["HOME"], "src", git_client_name) - else: - # Absolute paths are used as-is. - git_client_dir = git_client_name - _LOG.debug(hprint.to_str("git_client_dir")) - hdbg.dassert_dir_exists(git_client_dir, "Git client directory must exist") - return git_client_dir - - -def project_file_name_in_git_client( - file_name: str, - git_src_dir: str, - git_dst_dir: str, - *, - check_src_file_exists: bool = False, - check_dst_file_exists: bool = False, -) -> str: - """ - Find the file corresponding to `file_name` in `git_src_dir` for the client - `git_dst_dir`. - - This is useful when we want to find the file in a destination Git client - directory corresponding to a file in a source Git client directory. - - E.g., for: - ``` - file_name = '/Users/saggese/src/helpers1/dev_scripts_helpers/system_tools/path.py' - git_src_dir = '/Users/saggese/src/helpers1' - git_dst_dir = '/Users/saggese/src/helpers2' - ``` - the output is - `/Users/saggese/src/helpers2/dev_scripts_helpers/system_tools/path.py` - - :param file_name: the name of the file to find (which is under `git_src_dir`) - :param git_src_dir: the directory of the Git client from which `file_name` is - :param git_dst_dir: the directory of the Git client to which find the - corresponding file - :param check_src_file_exists: if True, check that `file_name` exists in - `git_src_dir` - :param check_dst_file_exists: if True, check that the file in `git_dst_dir` - exists - :return: the absolute path of the file in `git_dst_dir` - """ - if not os.path.isabs(file_name): - file_name = os.path.abspath(file_name) - if check_src_file_exists: - hdbg.dassert_file_exists(file_name) - if not os.path.isabs(git_src_dir): - git_src_dir = os.path.abspath(git_src_dir) - if not os.path.isabs(git_dst_dir): - git_dst_dir = os.path.abspath(git_dst_dir) - # Compute the relative path of the file in the source git client. - hdbg.dassert_is_path_abs(file_name) - hdbg.dassert_is_path_abs(git_src_dir) - rel_path = os.path.relpath(file_name, git_src_dir) - # Compute the absolute path of the file in the destination git client. - hdbg.dassert_is_path_abs(git_dst_dir) - dst_file_path = os.path.join(git_dst_dir, rel_path) - dst_file_path = os.path.abspath(dst_file_path) - if check_dst_file_exists: - hdbg.dassert_file_exists(dst_file_path) - return dst_file_path - - -def get_project_dirname(only_index: bool = False) -> str: - """ - Return the name of the project directory (e.g., `/Users/saggese/src/amp1` -> `amp1`). - - NOTE: This works properly only outside Docker. Inside Docker the Git client is - mapped to `/app`, so the result might be incorrect. - - :param only_index: if True, return only the numeric suffix (e.g., "1" from "amp1") - :return: the directory name or numeric index suffix - """ - # git_dir = get_client_root(super_module=True) - git_dir = find_git_root() - _LOG.debug("git_dir=%s", git_dir) - ret = os.path.basename(git_dir) - if only_index: - last_char = ret[-1] - hdbg.dassert( - last_char.isdigit(), - "The last char `%s` of the git dir `%s` is not a digit", - last_char, - git_dir, - ) - ret = last_char - _LOG.debug("ret=%s", ret) - return ret - - -def is_amp() -> bool: - """ - Return whether we are inside `amp` repo. - - Either as super module or a sub module depending on a current - working directory. - """ - return _is_repo("amp") or _is_repo("cmamp") or _is_repo("sorr") - - -def is_in_helpers_as_supermodule() -> bool: - """ - Return whether we are in the `helpers` repo and it's a super-module, i.e., - `helpers` by itself. - """ - return is_helpers() and not is_inside_submodule(".") - - -# TODO(gp): Be consistent with submodule and sub-module in the code. Same for -# supermodule. -def is_in_amp_as_submodule() -> bool: - """ - Return whether we are in the `amp` repo and it's a sub-module, e.g., of - `lm`. - """ - return is_amp() and is_inside_submodule(".") - - -def is_in_amp_as_supermodule() -> bool: - """ - Return whether we are in the `amp` repo and it's a super-module, i.e., - `amp` by itself. - """ - return is_amp() and not is_inside_submodule(".") - - -def is_amp_present(*, dir_name: str = ".") -> bool: - """ - Return whether the `amp` dir exists. - - This is a bit of an hacky way of knowing if there is the amp - submodule. - - :param dir_name: path to the directory where we want to - check the existence of `amp`. - """ - amp_path = os.path.join(dir_name, "amp") - return os.path.exists(amp_path) - - -# Using these functions is the last resort to skip / change the tests depending -# on the repo. We should control the tests through what functionalities they -# have, rather than the name of the repo. - - -def is_cmamp() -> bool: - """ - Return whether we are inside `cmamp` repo. - """ - return _is_repo("cmamp") - - -def is_lem() -> bool: - """ - Return whether we are inside `lem` repo. - """ - return _is_repo("lem") - - -def is_lime() -> bool: - """ - Return whether we are inside `lime` repo. - """ - return _is_repo("lime") - - -# ############################################################################# - - -def _get_submodule_hash(dir_name: str) -> str: - """ - Report the Git hash that a submodule is at from the supermodule perspective. - - Uses git ls-tree to get the submodule commit hash from the parent repository. - > git ls-tree master | grep - 160000 commit 0011776388b4c0582161eb2749b665fc45b87e7e amp - - :param dir_name: the name of the submodule directory - :return: the git commit hash of the submodule - """ - hdbg.dassert_path_exists(dir_name) - # Use git ls-tree to get the submodule entry which includes its hash. - cmd = f"git ls-tree master | grep {dir_name}" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - _LOG.debug("output=%s", output) - # Parse the output; format is: "160000 commit ". - data: List[str] = output.split() - _LOG.debug("data=%s", data) - # Extract the hash from the third field (index 2). - git_hash = data[2] - return git_hash - - -@functools.lru_cache() -def get_path_from_supermodule() -> Tuple[str, str]: - """ - Return the path to the Git repo including the Git submodule for a submodule. - - Returns the superproject path and submodule path, or empty for a supermodule. - E.g., - - for amp included in another repo returns 'amp' - - for amp without supermodule returns '' - - :return: tuple of (superproject_path, submodule_path) - """ - # Get the superproject working tree path. - cmd = "git rev-parse --show-superproject-working-tree" - # > cd /Users/saggese/src/.../lm/amp - # > git rev-parse --show-superproject-working-tree - # /Users/saggese/src/.../lm - # - # > cd /Users/saggese/src/.../lm - # > git rev-parse --show-superproject-working-tree - # (No result) - superproject_path: str = hsystem.system_to_one_line(cmd)[1] - _LOG.debug("superproject_path='%s'", superproject_path) - # Query the .gitmodules file to get the path for the current submodule. - cmd = ( - f"git config --file {superproject_path}/.gitmodules --get-regexp path" - '| grep $(basename "$(pwd)")' - "| awk '{ print $2 }'" - ) - # > git config --file /Users/saggese/src/.../.gitmodules --get-regexp path - # submodule.amp.path amp - submodule_path: str = hsystem.system_to_one_line(cmd)[1] - _LOG.debug("submodule_path='%s'", submodule_path) - return superproject_path, submodule_path - - -@functools.lru_cache() -def get_submodule_paths() -> List[str]: - """ - Return the path of the submodules in this repo. - - :return: list of submodule paths, e.g., ["amp"] or [] - """ - # Query .gitmodules to get submodule paths. - # > git config --file .gitmodules --get-regexp path - # submodule.amp.path amp - cmd = "git config --file .gitmodules --get-regexp path | awk '{ print $2 }'" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug("txt=%s", txt) - # Convert the output string to a list of paths. - files: List[str] = hsystem.text_to_list(txt) - _LOG.debug("files=%s", files) - return files - - -def has_submodules() -> bool: - """ - Return whether the repository has any submodules configured. - - :return: True if the repository contains submodules - """ - return len(get_submodule_paths()) > 0 - - -# ############################################################################# - - -def _get_hash(git_hash: str, short_hash: bool, num_digits: int = 8) -> str: - """ - Return the git hash, optionally shortened. - - :param git_hash: the full git hash - :param short_hash: if True, return only the first num_digits characters - :param num_digits: number of digits for short hash - :return: the git hash or shortened version - """ - hdbg.dassert_lte(1, num_digits) - # Return shortened hash if requested, otherwise return full hash. - if short_hash: - ret = git_hash[:num_digits] - else: - ret = git_hash - return ret - - -def _group_hashes(head_hash: str, remh_hash: str, subm_hash: str) -> str: - """ - Group multiple hashes and display which ones are equal. - - Transform three hashes into a string that shows which ones are identical. - For example, if head_hash == remh_hash, display "head_hash = remh_hash = ". - - :param head_hash: the head hash - :param remh_hash: the remote head hash - :param subm_hash: the submodule hash - :return: formatted string showing hash equality - """ - # Build a mapping from hash names to their values. - map_ = collections.OrderedDict() - map_["head_hash"] = head_hash - map_["remh_hash"] = remh_hash - if subm_hash: - map_["subm_hash"] = subm_hash - # Invert the mapping to group identical hashes together. - inv_map = collections.OrderedDict() - for k, v in map_.items(): - if v not in inv_map: - inv_map[v] = [k] - else: - inv_map[v].append(k) - # Format the output so equal hashes are grouped together. - txt = [] - for k, v in inv_map.items(): - # Transform: - # ('a2bfc704', ['head_hash', 'remh_hash']) - # into - # 'head_hash = remh_hash = a2bfc704' - txt.append(f"{' = '.join(v)} = {k}") - txt = "\n".join(txt) - return txt - - -# ############################################################################# -# GitHub repository name -# ############################################################################# - - -# All functions should take as input `repo_short_name` and have a switch `mode` -# to distinguish full vs short repo name. - -# TODO(gp): Maybe rename full -> long to keep it more symmetric "short vs long". - - -def _parse_github_repo_name(repo_name: str) -> Tuple[str, str]: - """ - Parse a repo name from `git remote`. - - The supported formats are both SSH and HTTPS, e.g., - - `git@github.com:alphamatic/amp` - - `https://github.com/alphamatic/amp` - - For both of these strings the function returns ("github.com", "alphamatic/amp"). - """ - # Try to parse the SSH format, e.g., `git@github.com:alphamatic/amp` - m = re.match(r"^git@(\S+.com):(\S+)$", repo_name) - if not m: - # Try tp parse the HTTPS format, e.g., `https://github.com/alphamatic/amp` - m = re.match(r"^https://(\S+.com)/(\S+)$", repo_name) - hdbg.dassert(m, "Can't parse '%s'", repo_name) - # The linter doesn't understand that `dassert` is equivalent to an - # `assert`. - assert m is not None - host_name = m.group(1) - repo_name = m.group(2) - _LOG.debug("host_name=%s repo_name=%s", host_name, repo_name) - # We expect something like "alphamatic/amp". - m = re.match(r"^\S+/\S+$", repo_name) - hdbg.dassert(m, "repo_name='%s'", repo_name) - # The linter doesn't understand that `dassert` is equivalent to an - # `assert`. - assert m is not None - # origin git@github.com:.../ORG_....git (fetch) - suffix_to_remove = ".git" - if repo_name.endswith(suffix_to_remove): - repo_name = repo_name[: -len(suffix_to_remove)] - return host_name, repo_name - - -def get_repo_full_name_from_dirname( - dir_name: str, include_host_name: bool -) -> str: - """ - Return the full name of the repo in a directory. - - E.g., "alphamatic/amp" or "github.com/alphamatic/amp" (if hostname included). - - This function relies on `git remote` to extract the origin URL. - - :param dir_name: directory containing the git repository - :param include_host_name: if True, prepend the GitHub hostname (e.g., - "github.com/alphamatic/amp") - :return: the full name of the repo - - E.g., "alphamatic/amp", "github.com/alphamatic/amp". - """ - hdbg.dassert_path_exists(dir_name) - cmd = f"cd {dir_name}; (git remote -v | grep origin | grep fetch)" - _, output = hsystem.system_to_string(cmd) - # > git remote -v - # origin git@github.com:alphamatic/amp (fetch) - # origin git@github.com:alphamatic/amp (push) - data: List[str] = output.split() - _LOG.debug("data=%s", data) - hdbg.dassert_eq(len(data), 3, "Expected 3 fields from git remote output") - # Extract the origin URL (second field). - repo_name = data[1] - # Parse SSH/HTTPS URL into host and org/repo parts. - host_name, repo_name = _parse_github_repo_name(repo_name) - if include_host_name: - res = f"{host_name}/{repo_name}" - else: - res = repo_name - return res - - -# ############################################################################# -# Git hash -# ############################################################################# - - -def get_head_hash(dir_name: str = ".", short_hash: bool = False) -> str: - """ - Return the git commit hash of a repository with submodule/random suffix. - - Gets the HEAD commit hash and appends either the amp submodule hash (if present) - or a random suffix to make the hash unique across different module configurations. - - ``` - > git rev-parse HEAD - 4759b3685f903e6c669096e960b248ec31c63b69 - ``` - - :param dir_name: directory containing the git repository - :param short_hash: if True, return abbreviated hash (useful when combined with suffix) - :return: the commit hash with submodule/random suffix (e.g., "4759b36-abc123") - """ - hdbg.dassert_path_exists(dir_name) - # Get the commit hash, optionally abbreviated to 7 characters. - opts = "--short " if short_hash else " " - cmd = f"cd {dir_name} && git rev-parse {opts}HEAD" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - # Check whether we are building an orange image. If the condition - # is True, add './amp' hash to the tag as well. - if is_amp_present(dir_name=dir_name): - amp_hash = get_head_hash(os.path.join(dir_name, "amp"), short_hash=True) - output = output + "-" + amp_hash - else: - # Use random suffix when no submodule exists (needed for Docker image tags). - random_string = "".join( - random.choices(string.ascii_lowercase + string.digits, k=3) - ) - output = output + "-" + random_string - return output - - -def get_remote_head_hash(dir_name: str) -> str: - """ - Return the commit hash that the remote repository's HEAD points to. - - Queries the remote origin to get the current HEAD hash without fetching. - - :param dir_name: directory containing the git repository - :return: the remote HEAD commit hash - """ - hdbg.dassert_path_exists(dir_name) - sym_name = get_repo_full_name_from_dirname(dir_name, include_host_name=False) - cmd = f"git ls-remote git@github.com:{sym_name} HEAD 2>/dev/null" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - # > git ls-remote git@github.com:alphamatic/amp HEAD 2>/dev/null - # 921676624f6a5f3f36ab507baed1b886227ac2e6 HEAD - return output - - -def report_submodule_status(dir_names: List[str], short_hash: bool) -> str: - """ - Return a formatted string reporting the status of git repositories. - - Reports whether each directory is a submodule, current branch, and commit hashes - (local, remote, and submodule hash if applicable). - - :param dir_names: list of directory paths to report on - :param short_hash: if True, truncate hashes to 8 characters - :return: formatted string with status information for each directory - """ - txt = [] - for dir_name in dir_names: - txt.append(f"dir_name='{dir_name}'") - txt.append(f" is_inside_submodule: {is_inside_submodule(dir_name)}") - # Get branch name, highlighting if not on master (likely indicates incomplete work). - branch_name = get_branch_name(dir_name) - if branch_name != "master": - branch_name = f"!!! {branch_name} !!!" - txt.append(f" branch: {branch_name}") - # Get local and remote commit hashes. - head_hash = get_head_hash(dir_name) - head_hash = _get_hash(head_hash, short_hash) - txt.append(f" head_hash: {head_hash}") - remh_hash = get_remote_head_hash(dir_name) - remh_hash = _get_hash(remh_hash, short_hash) - txt.append(f" remh_hash: {remh_hash}") - # Get submodule hash if this is not the root directory. - if dir_name != ".": - subm_hash = _get_submodule_hash(dir_name) - subm_hash = _get_hash(subm_hash, short_hash) - txt.append(f" subm_hash: {subm_hash}") - txt_as_str = "\n".join(txt) - return txt_as_str - - -def get_repo_full_name_from_client(super_module: bool) -> str: - """ - Return the full name of the repo (e.g., "alphamatic/amp") from a Git - client. - - :param super_module: like in get_client_root() - """ - # Get the Git remote in the dir containing the Git repo. - git_dir = get_client_root(super_module) - repo_name = get_repo_full_name_from_dirname(git_dir, include_host_name=False) - return repo_name - - -def is_cwd_git_repo() -> bool: - """ - Return whether the current directory is a git repository root. - - Checks for the presence of a .git file or directory in the current location. - - :return: True if .git exists in the current directory - """ - return os.path.exists(".git") - - -# ############################################################################# -# Git path -# ############################################################################# - - -# TODO(gp): Use find_file -@functools.lru_cache() -def find_file_in_git_tree( - file_name: str, super_module: bool = True, remove_tmp_base: bool = False -) -> str: - """ - Find the path of a file in a Git tree. - - We get the Git root and then search for the file from there. - """ - root_dir = get_client_root(super_module=super_module) - cmd = rf"find {root_dir} -name '{file_name}' -not -path '*/.git/*'" - if remove_tmp_base: - cmd += r" -not -path '*/tmp\.base/*'" - _, file_name_out = hsystem.system_to_one_line(cmd) - _LOG.debug(hprint.to_str("file_name_out")) - hdbg.dassert_ne( - file_name_out, - "", - "Can't find file '%s' in dir '%s'", - file_name, - root_dir, - ) - file_name_out: str = os.path.abspath(file_name_out) - hdbg.dassert_path_exists(file_name_out) - return file_name_out - - -def get_path_from_git_root( - file_name: str, - super_module: bool, - *, - git_root: Optional[str] = None, -) -> str: - """ - Get the path of `file_name` from the root of the Git client. - - E.g., in Docker: - - `super_module=True` -> git_root=/app - - `super_module=False` -> git_root=/app/amp - - :param super_module: like get_client_root() - """ - # Get the root of the Git client. - if git_root is None: - git_root = get_client_root(super_module) - # - git_root = os.path.normpath(git_root) - _LOG.debug("git_root=%s", git_root) - file_name = os.path.normpath(file_name) - _LOG.debug("file_name=%s", file_name) - if file_name.startswith(git_root): - # Remove the `git_root` from file_name. - ret = os.path.relpath(file_name, git_root) - else: - # If the file is not under the root, we can't normalize it. - raise ValueError( - f"Can't normalize file_name='{file_name}' for git_root='{git_root}'" - ) - _LOG.debug( - "file_name=%s, git_root=%s (super_module=%s) -> ret=%s", - file_name, - git_root, - super_module, - ret, - ) - return str(ret) - - -# TODO(gp): Rewrite this function in a better way. -@functools.lru_cache() -def get_amp_abs_path() -> str: - """ - Return the absolute path of `amp` dir. - """ - repo_sym_name = get_repo_full_name_from_client(super_module=False) - _LOG.debug("repo_sym_name=%s", repo_sym_name) - # - repo_sym_names = ["alphamatic/amp"] - import helpers.repo_config_utils as hrecouti - - extra_amp_repo_sym_name = ( - hrecouti.get_repo_config().get_extra_amp_repo_sym_name() - ) - repo_sym_names.append(extra_amp_repo_sym_name) - _LOG.debug("repo_sym_names=%s", repo_sym_names) - # - if repo_sym_name in repo_sym_names: - # If we are in the amp repo, then the git client root is the amp - # directory. - git_root = get_client_root(super_module=False) - amp_dir = git_root - else: - # If we are not in the amp repo, then look for the amp dir. - amp_dir = find_file_in_git_tree( - "amp", super_module=True, remove_tmp_base=True - ) - git_root = get_client_root(super_module=True) - amp_dir = os.path.join(git_root, amp_dir) - amp_dir = os.path.abspath(amp_dir) - # Sanity check. - hdbg.dassert_dir_exists(amp_dir) - return amp_dir - - -# TODO(gp): Is this needed? -def get_repo_dirs() -> List[str]: - """ - Return the list of the repo repositories, e.g., `[".", "amp", "infra"]`. - """ - dir_names = ["."] - dirs = ["amp"] - for dir_name in dirs: - if os.path.exists(dir_name): - dir_names.append(dir_name) - return dir_names - - -# TODO(gp): It should go in hdocker? -# TODO(gp): There are functions in hdocker.py that might be more general than -# this. -def find_docker_file( - file_name: str, - *, - root_dir: str = ".", - dir_depth: int = -1, - mode: str = "return_all_results", - candidate_files: Optional[List[str]] = None, -) -> List[str]: - """ - Convert a file or dir that was generated inside Docker to a file in the - current Git client. - - This operation is best-effort since it might not be able to find the - corresponding file in the current repo. - - E.g., - - A file like '/app/amp/core/dataflow_model/utils.py', in a Docker container - with Git root in '/app' becomes 'amp/core/dataflow_model/utils.py' - - For a file like '/app/amp/core/dataflow_model/utils.py' outside Docker, we - look for the file 'dataflow_model/utils.py' in the current client and - then normalize with respect to the - - :param dir_depth: same meaning as in `find_file_with_dir()` - :param mode: same as `system_interaction.select_result_file_from_list()` - :param candidate_files: list of results from the `find` command for unit - test mocking - :return: the best guess for the file name corresponding to `file_name` - """ - _LOG.debug(hprint.func_signature_to_str()) - hdbg.dassert_isinstance(file_name, str) - # Clean up file name. - file_name = os.path.normpath(file_name) - _LOG.debug("file_name=%s", file_name) - # Find the file in the dir. - file_names = hsystem.find_file_with_dir( - file_name, - root_dir=root_dir, - dir_depth=dir_depth, - mode=mode, - candidate_files=candidate_files, - ) - # Purify. - _LOG.debug("Purifying file_names=%s", file_names) - file_names = [ - os.path.relpath(file_name, root_dir) for file_name in file_names - ] - return file_names - - -# TODO(gp): Use get_head_hash() and remove this. -def get_current_commit_hash(dir_name: str = ".") -> str: - """ - Return the full SHA-1 hash of the current HEAD commit. - - :param dir_name: directory containing the git repository - :return: the full commit hash (e.g., "0011776388b4c0582161eb2749b665fc45b87e7e") - """ - hdbg.dassert_path_exists(dir_name) - cmd = f"cd {dir_name} && git rev-parse HEAD" - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, sha = data - # 0011776388b4c0582161eb2749b665fc45b87e7e - _LOG.debug("sha=%s", sha) - return sha - - -# ############################################################################# -# Modified files -# ############################################################################# - - -def get_modified_files( - dir_name: str = ".", remove_files_non_present: bool = True -) -> List[str]: - """ - Return the files that are added and modified in the Git client. - - In other words the files that will be committed with a `git commit -am ...`. - Equivalent to `dev_scripts/git_files.sh` - - :param dir_name: directory with Git client - :param remove_files_non_present: remove the files that are not - currently present in the client - :return: list of files - """ - # If the client status is: - # > git status -s - # AM dev_scripts/infra/ssh_tunnels.py - # M helpers/git.py - # ?? linter_warnings.txt - # - # The result is: - # > git diff --cached --name-only - # dev_scripts/infra/ssh_tunnels.py - # - # > git ls-files -m - # dev_scripts/infra/ssh_tunnels.py - # helpers/git.py - cmd = "(git diff --cached --name-only; git ls-files -m) | sort | uniq" - files: List[str] = hsystem.system_to_files( - cmd, dir_name, remove_files_non_present - ) - return files - - -# TODO(gp): -> ...previously... -def get_previous_committed_files( - dir_name: str = ".", - num_commits: int = 1, - remove_files_non_present: bool = True, -) -> List[str]: - """ - Return files changed in the Git client in the last `num_commits` commits. - - Equivalent to `dev_scripts/git_previous_commit_files.sh` - - :param dir_name: directory with Git client - :param num_commits: how many commits in the past to consider - :param remove_files_non_present: remove the files that are not - currently present in the client - :return: list of files - """ - cmd = [] - cmd.append('git show --pretty="" --name-only') - cmd.append(f'$(git log --author "$(git config user.name)" -{num_commits}') - cmd.append(r"""| \grep "^commit " | perl -pe 's/commit (.*)/$1/')""") - cmd_as_str = " ".join(cmd) - files: List[str] = hsystem.system_to_files( - cmd_as_str, dir_name, remove_files_non_present - ) - return files - - -def get_modified_files_in_branch( - dst_branch: str, dir_name: str = ".", remove_files_non_present: bool = True -) -> List[str]: - """ - Return files modified in the current branch with respect to `dst_branch`. - - Equivalent to `git diff --name-only master...` - Please remember that there is a difference between `master` and `origin/master`. - See https://stackoverflow.com/questions/18137175 - - :param dir_name: directory with Git client - :param dst_branch: branch to compare to, e.g., `master`, `HEAD` - :param remove_files_non_present: remove the files that are not - currently present in the client - :return: list of files - """ - if dst_branch == "HEAD": - target = dst_branch - else: - target = f"{dst_branch}..." - cmd = f"git diff --name-only {target}" - files: List[str] = hsystem.system_to_files( - cmd, dir_name, remove_files_non_present - ) - return files - - -def get_modified_and_untracked_files( - repo_path: str = ".", *, mode: str = "all" -) -> List[str]: - """ - Get list of modified and untracked files in a git repository. - - Excludes files from submodules and deleted files. - - Mode options: - - "all": Both modified and untracked files (default, current behavior) - - "modified": Only files with changes (staged, modified, added, renamed, copied) - - "untracked": Only untracked files - - This includes (when mode="all"): - - Modified files (both staged and unstaged) - - Untracked files - - Cached/staged files - - The function uses `git status --porcelain -u` which shows all changes - including cached (staged) files. - - :param repo_path: Path to the git repository - :param mode: Filter mode: "all", "modified", or "untracked" - :return: List of file paths relative to repo_path - """ - hdbg.dassert_dir_exists(repo_path) - # Validate mode. - valid_modes = ["all", "modified", "untracked"] - hdbg.dassert_in( - mode, - valid_modes, - "Invalid mode '%s'; must be one of: %s", - mode, - ", ".join(valid_modes), - ) - # Get modified and untracked files, excluding submodules. - # The command uses: - # - git status --porcelain -u: Get status in machine-readable format with untracked files - # This includes both cached (staged) and modified files - # Status codes: ?? = untracked, M/A/R/C/D = modified/added/renamed/copied/deleted - cmd = f"cd {repo_path} && git status --porcelain -u" - _, output = hsystem.system_to_string(cmd, abort_on_error=False) - # Get submodule paths to exclude. - submodule_cmd = ( - f"cd {repo_path} && " - "git config -f .gitmodules --get-regexp path 2>/dev/null || true" - ) - _, submodule_output = hsystem.system_to_string( - submodule_cmd, abort_on_error=False - ) - submodule_paths = set() - for line in submodule_output.strip().split("\n"): - if line: - # Format: "submodule..path " - parts = line.split() - if len(parts) >= 2: - submodule_paths.add(parts[-1]) - # Parse output. - files = [] - for line in output.strip().split("\n"): - line = line.strip() - if not line: - continue - # Extract status code (first 2 characters) and filename (from position 3). - status_code = line[:2] if len(line) >= 2 else "" - file_name = line[3:].strip() if len(line) > 3 else "" - # Filter by mode. - if mode == "untracked": - # Untracked files have status "??" - if status_code != "??": - continue - elif mode == "modified": - # Modified files have any status other than "??" - if status_code == "??": - continue - # Skip submodule paths. - is_in_submodule = any( - file_name.startswith(subpath + "/") or file_name == subpath - for subpath in submodule_paths - ) - if is_in_submodule: - _LOG.debug("Skipping submodule file: %s", file_name) - continue - # Check if file exists (exclude deleted files). - file_path = os.path.join(repo_path, file_name) - if os.path.exists(file_path) and os.path.isfile(file_path): - files.append(file_name) - else: - _LOG.debug("Skipping non-existent or non-file: %s", file_path) - return files - - -def get_summary_files_in_branch( - dst_branch: str, - *, - dir_name: str = ".", -) -> str: - """ - Report summary of files in the current branch with respect to `dst_branch'. - - Same interface as `get_modified_files_in_branch`. - """ - # File types (from https://git-scm.com/docs/git-diff). - file_types = [ - ("added", "A"), - ("copied", "C"), - ("deleted", "D"), - ("modified", "M"), - ("renamed", "R"), - ("type changed", "T"), - ("unmerged", "U"), - ("unknown", "X"), - ("broken pairing", "B"), - ] - res = "" - for tag, diff_type in file_types: - cmd = f"git diff --diff-filter={diff_type} --name-only {dst_branch}..." - files = hsystem.system_to_files( - cmd, dir_name, remove_files_non_present=False - ) - _LOG.debug("files=%s", "\n".join(files)) - if files: - res += f"# {tag}: {len(files)}\n" - res += hprint.indent("\n".join(files)) + "\n" - res = res.rstrip("\n") - return res - - -# ############################################################################# -# Git commands. -# ############################################################################# - - -# TODO(gp): -> get_user_name() -@functools.lru_cache() -def get_git_name() -> str: - """ - Return the configured git user name from git config. - - Caches the result to avoid repeated config lookups. - - :return: the configured git user name (e.g., from user.name setting) - """ - cmd = "git config --get user.name" - # For some reason data is annotated as Any by mypy, instead of - # Tuple[int, str] so we need to cast it to the right value. - data: Tuple[int, str] = hsystem.system_to_one_line(cmd) - _, output = data - return output - - -def git_log(num_commits: int = 5, my_commits: bool = False) -> str: - """ - Return a formatted git log with graph, timestamps, and author information. - - Uses a custom pretty format to display commits in a user-friendly layout - with graph visualization, relative time, and author name. - - :param num_commits: number of commits to report - :param my_commits: if True, filter to only commits by the current git user - :return: formatted git log output - """ - cmd = [] - cmd.append("git log --date=local --oneline --graph --date-order --decorate") - cmd.append( - "--pretty=format:'%h %<(8)%aN% %<(65)%s (%>(14)%ar) %ad %<(10)%d'" - ) - cmd.append(f"-{num_commits}") - if my_commits: - # This doesn't work in a container if the user relies on `~/.gitconfig` to - # set the user name. - # TODO(gp): We should use `get_git_name()`. - cmd.append("--author $(git config user.name)") - cmd = " ".join(cmd) - data: Tuple[int, str] = hsystem.system_to_string(cmd) - _, txt = data - return txt - - -def git_stash_push( - prefix: str, msg: Optional[str] = None, log_level: int = logging.DEBUG -) -> Tuple[str, bool]: - """ - Stash current changes with a timestamped, labeled message. - - Creates a unique stash name from prefix, username, server, and timestamp to - enable tracking of which changes were stashed when and by whom. - - :param prefix: prefix for the stash tag (e.g., "backup", "work") - :param msg: optional message to append to the stash description - :param log_level: logging level for system output - :return: tuple of (stash_tag, was_stashed) indicating success - """ - import helpers.hdatetime as hdateti - - user_name = hsystem.get_user_name() - server_name = hsystem.get_server_name() - timestamp = hdateti.get_current_timestamp_as_string("naive_ET") - # Build unique tag from context to identify who stashed what when. - tag = f"{user_name}-{server_name}-{timestamp}" - tag = prefix + "." + tag - _LOG.debug("tag='%s'", tag) - cmd = "git stash push" - _LOG.debug("msg='%s'", msg) - push_msg = tag[:] - if msg: - push_msg += ": " + msg - cmd += f" -m '{push_msg}'" - hsystem.system(cmd, suppress_output=False, log_level=log_level) - # Verify that something was actually stashed (git stash push is silent on no-op). - cmd = rf"git stash list | \grep '{tag}' | wc -l" - _, output = hsystem.system_to_string(cmd) - was_stashed = int(output) > 0 - if not was_stashed: - msg = "Nothing was stashed" - _LOG.warning(msg) - # raise RuntimeError(msg) - return tag, was_stashed - - -def git_stash_apply(mode: str, log_level: int = logging.DEBUG) -> None: - """ - Apply or pop the most recent git stash. - - Displays the stash list before applying to help the user verify they're applying - the correct stash. - - :param mode: "apply" to keep the stash or "pop" to remove after applying - :param log_level: logging level for system output - """ - _LOG.debug("# Checking stash head ...") - cmd = "git stash list | head -3" - hsystem.system(cmd, suppress_output=False, log_level=log_level) - # Restore the stashed changes, either keeping or removing the stash. - _LOG.debug("# Restoring local changes...") - if mode == "pop": - cmd = "git stash pop --quiet" - elif mode == "apply": - cmd = "git stash apply --quiet" - else: - raise ValueError(f"mode='{mode}'") - hsystem.system(cmd, suppress_output=False, log_level=log_level) - - -# TODO(gp): Consider using this everywhere. Maybe it can simplify handling issues -# stemming from the super-module / sub-module repo. -def _get_git_cmd(super_module: bool) -> str: - """ - Build a git command prefix with explicit repository and working tree paths. - - Useful for running git commands from outside the repository or when working - with specific submodules/supermodules. - - :param super_module: if True, use supermodule root; else use current module root - :return: git command prefix (e.g., "git --git-dir=... --work-tree=...") - """ - cmd = [] - cmd.append("git") - client_root = get_client_root(super_module=super_module) - # Set the path to the repository (".git" directory), avoiding Git to search for - # it (from https://git-scm.com/docs/git) - cmd.append(f"--git-dir='{client_root}/.git'") - # Explicitly specify working tree location. - cmd.append(f"--work-tree='{client_root}'") - cmd = " ".join(cmd) - return cmd - - -def git_tag( - tag_name: str, super_module: bool = True, log_level: int = logging.DEBUG -) -> None: - """ - Create a git tag on the current commit (locally, not pushed). - - Overwrites existing tags with the same name (using -f flag). - - :param tag_name: the name of the tag to create - :param super_module: if True, tag the supermodule; else tag the current module - :param log_level: logging level for system output - """ - _LOG.debug("# Tagging current commit ...") - git_cmd = _get_git_cmd(super_module) - cmd = f"{git_cmd} tag -f {tag_name}" - _ = hsystem.system(cmd, suppress_output=False, log_level=log_level) - - -def git_push_tag( - tag_name: str, - remote: str = "origin", - super_module: bool = True, - log_level: int = logging.DEBUG, -) -> None: - """ - Push a git tag to the remote repository. - - :param tag_name: the name of the tag to push - :param remote: the remote name to push to (default: origin) - :param super_module: if True, tag the supermodule; else tag the current module - :param log_level: logging level for system output - """ - _LOG.debug("# Pushing current commit ...") - git_cmd = _get_git_cmd(super_module) - cmd = f"{git_cmd} push {remote} {tag_name}" - _ = hsystem.system(cmd, suppress_output=False, log_level=log_level) - - -def git_describe( - match: Optional[str] = None, log_level: int = logging.DEBUG -) -> str: - """ - Return the most recent git tag, or abbreviated commit hash if no tags exist. - - Useful for version identification and release tracking. - - :param match: optional glob pattern to filter tags (e.g., "cmamp-*") - :param log_level: logging level for system output - :return: the closest tag (e.g., "1.0.0") or short commit hash - """ - _LOG.debug("# Looking for version ...") - cmd = "git describe --tags --always --abbrev=0" - if match is not None: - hdbg.dassert_isinstance(match, str, "match pattern must be a string") - hdbg.dassert_ne(match, "", "match pattern cannot be empty") - cmd = f"{cmd} --match '{match}'" - num, tag = hsystem.system_to_one_line(cmd, log_level=log_level) - _ = num - return tag - - -def git_add_update( - file_list: Optional[List[str]] = None, log_level: int = logging.DEBUG -) -> None: - """ - Add files to the git staging area. - - If no file list is provided, adds all modified and deleted files (git add -u). - - :param file_list: list of specific files to add; if None, add all modified files - :param log_level: logging level for system output - """ - _LOG.debug("# Adding all changed files to staging ...") - cmd = f"git add {' '.join(file_list) if file_list is not None else '-u'}" - hsystem.system(cmd, suppress_output=False, log_level=log_level) - - -def fetch_origin_master_if_needed() -> None: - """ - Fetch the master branch from origin if running in a CI environment. - - In CI, master may not be fetched when testing a branch, but it's often needed - for tests that compare against baseline or merge behavior. This ensures master - is available if needed. - """ - if hserver.is_inside_ci(): - _LOG.warning("Running inside CI so fetching master") - cmd = "git branch -a" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug("%s=%s", cmd, txt) - cmd = r'git branch -a | egrep "\s+master\s*$" | wc -l' - # * (HEAD detached at pull/1337/merge) - # master - # remotes/origin/master - # remotes/pull/1337/merge - _, num = hsystem.system_to_one_line(cmd) - num = int(num) - _LOG.debug("num=%s", num) - if num == 0: - # See AmpTask1321 and AmpTask1338 for details. - cmd = "git fetch origin master:refs/remotes/origin/master" - hsystem.system(cmd) - cmd = "git branch --track master origin/master" - hsystem.system(cmd) - - -def is_client_clean( - dir_name: str = ".", - abort_if_not_clean: bool = False, -) -> bool: - """ - Return whether there are files modified, added, or removed in a directory. - - Ignores submodule changes (amp, helpers_root) to focus on actual code changes. - - :param dir_name: directory containing the git repository - :param abort_if_not_clean: if True and the client is not clean, - abort with a detailed message showing the modified files - :return: True if no files are modified (excluding submodules) - """ - _LOG.debug(hprint.to_str("abort_if_not_clean")) - files = get_modified_files(dir_name) - # Exclude submodule directories from consideration since their changes - # are tracked separately and don't affect code cleanliness. - if "amp" in files: - _LOG.warning("Skipping 'amp' in modified files") - files = [f for f in files if "amp" != f] - elif "helpers_root" in files: - _LOG.warning("Skipping 'helpers_root' in modified files") - files = [f for f in files if "helpers_root" != f] - # A Git client is clean iff there are no files in the index. - is_clean = len(files) == 0 - if abort_if_not_clean: - hdbg.dassert( - is_clean, "The Git client is not clean:\n%s", "\n".join(files) - ) - return is_clean - - -def delete_branches( - dir_name: str, - mode: str, - branches: List[str], - confirm_delete: bool, - abort_on_error: bool = True, -) -> None: - """ - Delete local or remote git branches. - - Optionally prompts the user for confirmation before performing deletion. - - :param dir_name: directory containing the git repository - :param mode: "local" for local branches or "remote" for remote branches - :param branches: list of branch names to delete - :param confirm_delete: if True, prompt user for confirmation before deletion - :param abort_on_error: if True, abort on any deletion error - """ - hdbg.dassert_isinstance( - branches, list, "branches must be a list, got type %s", type(branches) - ) - delete_cmd = f"cd {dir_name} && " - if mode == "local": - delete_cmd += "git branch -d" - elif mode == "remote": - delete_cmd += "git push origin --delete" - else: - raise ValueError(f"Invalid mode='{mode}'") - # Prompt for confirmation to prevent accidental deletion of important branches. - if confirm_delete: - branches_as_str = " ".join(branches) - msg = ( - hdbg.WARNING - + f": Delete {len(branches)} {mode} branch(es) '{branches_as_str}'?" - ) - hsystem.query_yes_no(msg, abort_on_no=True) - for branch in branches: - if mode == "remote": - prefix = "origin/" - hdbg.dassert( - branch.startswith(prefix), - "Remote branch '%s' needs to start with '%s'", - branch, - prefix, - ) - branch = branch[len(prefix) :] - cmd = f"{delete_cmd} {branch}" - hsystem.system( - cmd, - suppress_output=False, - log_level="echo", - abort_on_error=abort_on_error, - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py deleted file mode 100644 index e796b865f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hgoogle_drive_api.py +++ /dev/null @@ -1,1183 +0,0 @@ -""" -Use cases for this module are at: -helpers/notebooks/Master_how_to_use_hgoogle_drive_api.ipynb - -Import as: - -import helpers.hgoogle_drive_api as hgodrapi -""" - -import datetime -import importlib -import logging -import os -import re -import sys -from typing import List, Optional, Union - -# Keep try-except to avoid `ModuleNotFoundError` in CI/CD (HelpersTask #1183). -try: - # Authentication for Google API to produce credentials. - import google.oauth2.service_account as goasea - - # Google API client for service objects (e.g., Drive, Sheets, etc.) - import googleapiclient.discovery as godisc - - # Built on top of Google API to simplify interactions with Google Sheets. - import gspread - - _GOOGLE_API_AVAILABLE = True -except ImportError: - # If Google API packages are not installed, set placeholders. - _GOOGLE_API_AVAILABLE = False - -import pandas as pd - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg -import helpers.hmodule as hmodule -import helpers.hpandas as hpandas - -_LOG = logging.getLogger(__name__) - - -def install_needed_modules( - *, use_sudo: bool = True, venv_path: Optional[str] = None -) -> None: - """ - Install needed modules for Google Drive API. - - :param use_sudo: whether to use sudo to install the module - :param venv_path: path to the virtual environment E.g., - /Users/saggese/src/venv/client_venv.helpers - """ - hmodule.install_module_if_not_present( - "google", - package_name="google-auth", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - hmodule.install_module_if_not_present( - "googleapiclient", - package_name="google-api-python-client", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - hmodule.install_module_if_not_present( - "gspread", - package_name="gspread", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - # Reload this module (hgoogle_drive_api) if already imported - this_module_name = __name__ - if this_module_name in sys.modules: - importlib.reload(sys.modules[this_module_name]) - - -# ############################################################################# -# Credentials -# ############################################################################# - - -def get_credentials( - *, - service_key_path: Optional[str] = None, -) -> "goasea.Credentials": - """ - Get credentials for Google API with service account key. - - :param service_key_path: service account key file path. - :return: Google credentials. - """ - # service_key_path = "/home/.config/gspread_pandas/google_secret.json" - if not service_key_path: - service_key_path = os.path.join( - os.path.expanduser("~"), - ".config", - "gspread_pandas", - "google_secret.json", - ) - service_key_path = os.path.join(os.path.dirname(__file__), service_key_path) - # Download service.json from Google API, then save it as - # /home/.config/gspread_pandas/google_secret.json - # Instructions: https://gspread-pandas.readthedocs.io/en/latest/getting_started.html#client-credentials" - hdbg.dassert_file_exists( - service_key_path, - "Failed to read service key file: %s", - service_key_path, - ) - # Scopes required for making API calls. - scopes = [ - "https://www.googleapis.com/auth/drive", - "https://www.googleapis.com/auth/spreadsheets", - ] - creds = goasea.Credentials.from_service_account_file( - service_key_path, scopes=scopes - ) - return creds - - -# ############################################################################# -# Google Sheets API -# ############################################################################# - - -# TODO(gp): Extend this to work with v3, v4, etc. -# TODO(ai_gp): Make it private if it's not called by anybody else. -def get_sheets_service(credentials: "goasea.Credentials") -> "godisc.Resource": - """ - Get Google Sheets service with provided credentials. - - :param credentials: Google credentials object. - :return: Google Sheets service instance. - """ - # Ensure credentials are provided. - hdbg.dassert(credentials, "The 'credentials' parameter must be provided") - # Build the Sheets service. - sheets_service = godisc.build( - "sheets", "v4", credentials=credentials, cache_discovery=False - ) - return sheets_service - - -def _get_gsheet_id( - credentials: "goasea.Credentials", - sheet_id: str, - *, - tab_name: Optional[str] = None, -) -> str: - """ - Get the sheet ID from the sheet name in a Google Sheets document. - - :param credentials: Google credentials object. - :param sheet_id: ID of the Google Sheet document. - :param tab_name: Name of the sheet (tab) in the Google Sheets - document. - :return: Sheet ID of the sheet with the given name or the first - sheet if the name is not provided. - """ - sheets_service = get_sheets_service(credentials) - sheet_metadata = ( - sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() - ) - sheets = sheet_metadata.get("sheets", []) - if tab_name: - for sheet in sheets: - properties = sheet.get("properties", {}) - if properties.get("title") == tab_name: - return properties.get("sheetId") - raise ValueError(f"Sheet with name '{tab_name}' not found.") - # Return the ID of the first sheet if no sheet name is provided. - first_sheet_id = sheets[0].get("properties", {}).get("sheetId") - return first_sheet_id - - -def get_gsheet_name( - url: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Get the name of a Google Sheet from its URL. - - E.g., https://docs.google.com/spreadsheets/d/1GnnmtGTrHDwMP77VylEK0bSF_RLUV5BWf1iGmxuBQpI - -> pitchbook.Outreach_AI_companies - - :param url: URL of the Google Sheets file. - :param credentials: Google credentials object. - :return: Name of the Google Sheet (spreadsheet title). - """ - if credentials is None: - credentials = get_credentials() - # TODO(ai): Should we use the Sheets API instead? - client = gspread.authorize(credentials) - spreadsheet = client.open_by_url(url) - tab_name = spreadsheet.title - _LOG.debug("Retrieved sheet name: '%s'", tab_name) - return tab_name - - -def get_tabs_from_gsheet( - url: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> List[str]: - """ - Get all the tabs (worksheets) from a Google Sheet. - - :param url: URL of the Google Sheet. - :param credentials: Google credentials object. - :return: List of tab names. - """ - if credentials is None: - credentials = get_credentials() - client = gspread.authorize(credentials) - spreadsheet = client.open_by_url(url) - return [sheet.title for sheet in spreadsheet.worksheets()] - - -# ############################################################################# - - -def _extract_file_id_from_url(url: str) -> str: - """ - Extract the file ID from a Google Docs/Sheets/Drive URL. - - E.g., - https://docs.google.com/spreadsheets/d/FILE_ID/... - https://docs.google.com/document/d/FILE_ID/... - https://drive.google.com/file/d/FILE_ID/... - - :param url: URL of the Google Docs/Sheets/Drive file. - :return: File ID extracted from the URL. - """ - # Handle URLs like: - # https://docs.google.com/spreadsheets/d/FILE_ID/... - # https://docs.google.com/document/d/FILE_ID/... - # https://drive.google.com/file/d/FILE_ID/... - pattern = r"/d/([a-zA-Z0-9-_]+)" - match = re.search(pattern, url) - hdbg.dassert(match, "Invalid URL format: %s", url) - file_id = match.group(1) - _LOG.debug("Extracted file ID: '%s' from URL: '%s'", file_id, url) - return file_id - - -def get_gsheet_tab_url( - url: str, - tab_name: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Generate the full URL for a specific tab in a Google Sheet. - - E.g., - - Input URL: https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI - - Tab name: Sheet3 - - Output: https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI/edit?gid=229426446#gid=229426446 - - :param url: URL of the Google Sheets file. - :param tab_name: Name of the tab to generate the URL for. - :param credentials: Google credentials object. - :return: Full URL with the gid parameter for the specified tab. - """ - if credentials is None: - credentials = get_credentials() - hdbg.dassert(tab_name, "tab_name parameter must be provided") - # Extract the spreadsheet ID from the URL. - sheet_id = _extract_file_id_from_url(url) - _LOG.debug("Extracted sheet_id: '%s' from URL: '%s'", sheet_id, url) - # Get the gid for the specified tab. - gid = _get_gsheet_id(credentials, sheet_id, tab_name=tab_name) - _LOG.debug("Retrieved gid: '%s' for tab: '%s'", gid, tab_name) - # Construct the full URL with the gid parameter. - full_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/edit?gid={gid}#gid={gid}" - _LOG.debug("Generated full URL: '%s'", full_url) - return full_url - - -def _freeze_rows_in_gsheet( - credentials: "goasea.Credentials", - sheet_id: str, - num_rows_to_freeze: int, - *, - tab_name: Optional[str] = None, - bold: bool = True, -) -> None: - """ - Freeze specified rows in the given sheet. - - :param credentials: Google credentials object. - :param sheet_id: ID of the Google Sheet (spreadsheet ID). - :param num_rows_to_freeze: Number of rows to freeze (starting from - row 0). - :param tab_name: Name of the sheet (tab) to freeze rows in. Defaults - to the first tab if not provided. - :param bold: If True, make the frozen rows bold. - """ - hdbg.dassert_lt(0, num_rows_to_freeze) - tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) - sheets_service = get_sheets_service(credentials) - # Build the batch update request. - requests = [] - # Add freeze rows request. - requests.append( - { - "updateSheetProperties": { - "properties": { - "sheetId": tab_id, - "gridProperties": {"frozenRowCount": num_rows_to_freeze}, - }, - "fields": "gridProperties.frozenRowCount", - } - } - ) - # Add bold formatting request if requested. - if bold: - requests.append( - { - "repeatCell": { - "range": { - "sheetId": tab_id, - "startRowIndex": 0, - "endRowIndex": num_rows_to_freeze, - }, - "cell": { - "userEnteredFormat": { - "textFormat": { - "bold": True, - } - } - }, - "fields": "userEnteredFormat.textFormat.bold", - } - } - ) - _LOG.debug( - "Adding bold formatting to %s frozen rows", num_rows_to_freeze - ) - # Execute the batch update. - freeze_request = {"requests": requests} - response = ( - sheets_service.spreadsheets() - .batchUpdate(spreadsheetId=sheet_id, body=freeze_request) - .execute() - ) - _LOG.debug("response: %s", response) - - -def _set_row_height_in_gsheet( - credentials: "goasea.Credentials", - sheet_id: str, - height: int, - *, - start_index: Optional[int] = None, - end_index: Optional[int] = None, - tab_name: Optional[str] = None, -) -> None: - """ - Set the height for rows in the given Google sheet. - - :param credentials: Google credentials object. - :param sheet_id: ID of the Google Sheet (spreadsheet ID). - :param height: Height of the rows in pixels. - :param start_index: Starting index of the rows (zero-based). If - None, applies to all rows. - :param end_index: Ending index of the rows (zero-based). If None, - applies to all rows. - :param tab_name: Name of the sheet (tab) to set row height in. - Defaults to the first tab if not provided. - """ - tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) - sheets_service = get_sheets_service(credentials) - if start_index is None and end_index is None: - sheet_metadata = ( - sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() - ) - sheet_properties = next( - sheet - for sheet in sheet_metadata.get("sheets", []) - if sheet.get("properties", {}).get("sheetId") == tab_id - ).get("properties", {}) - grid_properties = sheet_properties.get("gridProperties", {}) - start_index, end_index = 0, grid_properties.get("rowCount", 1000) - elif start_index is None: - start_index = 0 - elif end_index is None: - sheet_metadata = ( - sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() - ) - sheet_properties = next( - sheet - for sheet in sheet_metadata.get("sheets", []) - if sheet.get("properties", {}).get("sheetId") == tab_id - ).get("properties", {}) - grid_properties = sheet_properties.get("gridProperties", {}) - end_index = grid_properties.get("rowCount", 1000) - elif start_index >= end_index: - raise ValueError( - f"Invalid params: start_index ({start_index}) must be less than end_index ({end_index})." - ) - # Create request. - set_row_height_request = { - "requests": [ - { - "updateDimensionProperties": { - "range": { - "sheetId": tab_id, - "dimension": "ROWS", - "startIndex": start_index, - "endIndex": end_index, - }, - "properties": {"pixelSize": height}, - "fields": "pixelSize", - } - } - ] - } - # Get response. - response = ( - sheets_service.spreadsheets() - .batchUpdate(spreadsheetId=sheet_id, body=set_row_height_request) - .execute() - ) - _LOG.debug("response: %s", response) - - -def _set_text_wrapping_clip_in_gsheet( - credentials: "goasea.Credentials", - sheet_id: str, - *, - tab_name: Optional[str] = None, -) -> None: - """ - Set text wrapping to "CLIP" for all columns in the given Google sheet. - - :param credentials: Google credentials object. - :param sheet_id: ID of the Google Sheet (spreadsheet ID). - :param tab_name: Name of the sheet (tab) to set text wrapping in. - Defaults to the first tab if not provided. - """ - tab_id = _get_gsheet_id(credentials, sheet_id=sheet_id, tab_name=tab_name) - sheets_service = get_sheets_service(credentials) - # Get sheet metadata to determine the range. - sheet_metadata = ( - sheets_service.spreadsheets().get(spreadsheetId=sheet_id).execute() - ) - sheet_properties = next( - sheet - for sheet in sheet_metadata.get("sheets", []) - if sheet.get("properties", {}).get("sheetId") == tab_id - ).get("properties", {}) - grid_properties = sheet_properties.get("gridProperties", {}) - row_count = grid_properties.get("rowCount", 1000) - col_count = grid_properties.get("columnCount", 26) - _LOG.debug( - "Setting text wrapping to CLIP for sheet with %s rows and %s columns", - row_count, - col_count, - ) - # Create request to set text wrapping to CLIP. - set_wrapping_request = { - "requests": [ - { - "repeatCell": { - "range": { - "sheetId": tab_id, - "startRowIndex": 0, - "endRowIndex": row_count, - "startColumnIndex": 0, - "endColumnIndex": col_count, - }, - "cell": { - "userEnteredFormat": { - "wrapStrategy": "CLIP", - } - }, - "fields": "userEnteredFormat.wrapStrategy", - } - } - ] - } - # Execute the batch update. - response = ( - sheets_service.spreadsheets() - .batchUpdate(spreadsheetId=sheet_id, body=set_wrapping_request) - .execute() - ) - _LOG.debug("response: %s", response) - - -def from_gsheet( - url: str, - *, - tab_name: Optional[str] = None, - credentials: Optional["goasea.Credentials"] = None, -) -> pd.DataFrame: - """ - Read data from a Google Sheet. - - :param url: URL of the Google Sheets file. - :param tab_name: Name of the tab to read (default: first sheet if - not specified). - :param credentials: Google credentials object. - :return: pandas DataFrame with the sheet data. - """ - if credentials is None: - credentials = get_credentials() - client = gspread.authorize(credentials) - spreadsheet = client.open_by_url(url) - if tab_name is None: - # Read the first sheet. - worksheet = spreadsheet.get_worksheet(0) - else: - # Read the specified sheet. - worksheet = spreadsheet.worksheet(tab_name) - data = worksheet.get_all_records() - hdbg.dassert(data, "The sheet '%s' is empty", tab_name) - df = pd.DataFrame(data) - _LOG.debug("Data fetched") - return df - - -def to_gsheet( - df: pd.DataFrame, - url: str, - *, - tab_name: Optional[str] = "new_data", - freeze_rows: bool = False, - set_text_wrapping_clip: bool = False, - credentials: Optional["goasea.Credentials"] = None, -) -> None: - """ - Write data to a specified Google Sheet and tab. - - :param df: Data to be written. - :param url: URL of the Google Sheet. - :param tab_name: Name of the tab where the data will be written. - :param freeze_rows: If True, freeze the header row. - :param set_text_wrapping_clip: If True, set text wrapping to CLIP. - :param credentials: Google credentials object. - """ - if credentials is None: - credentials = get_credentials() - client = gspread.authorize(credentials) - spreadsheet = client.open_by_url(url) - # Try to get existing worksheet or create new one. - try: - worksheet = spreadsheet.worksheet(tab_name) - except gspread.exceptions.WorksheetNotFound: - _LOG.debug( - "Tab '%s' not found, creating a new tab with that name", - tab_name, - ) - worksheet = spreadsheet.add_worksheet( - title=tab_name, rows="100", cols="20" - ) - # - if freeze_rows: - _freeze_rows_in_gsheet( - credentials, - spreadsheet.id, - num_rows_to_freeze=1, - tab_name=tab_name, - ) - # - _set_row_height_in_gsheet( - credentials, - spreadsheet.id, - height=20, - tab_name=tab_name, - ) - # Clear and write data. - worksheet.clear() - # Replace NaN/inf values with empty strings for JSON compatibility. - df_clean = df.fillna("").replace([float("inf"), float("-inf")], "") - values = [df_clean.columns.values.tolist()] + df_clean.values.tolist() - worksheet.update("A1", values) - # - if set_text_wrapping_clip: - _set_text_wrapping_clip_in_gsheet( - credentials, - spreadsheet.id, - tab_name=tab_name, - ) - _LOG.info("Data written to:\ntab '%s'\nGoogle Sheet '%s'", tab_name, url) - _LOG.info( - "url=%s", get_gsheet_tab_url(url, tab_name, credentials=credentials) - ) - - -# ############################################################################# -# Google file API -# ############################################################################# - - -def _get_gdrive_service(credentials: "goasea.Credentials") -> "godisc.Resource": - """ - Get Google Drive service with provided credentials. - - :param credentials: Google credentials object. - :return: Google Drive service instance. - """ - # Ensure credentials are provided. - hdbg.dassert(credentials, "The 'credentials' parameter must be provided") - # Build the drive service. - gdrive_service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - return gdrive_service - - -def _create_new_google_document( - credentials: "goasea.Credentials", - doc_name: str, - doc_type: str, -) -> str: - """ - Create a new Google document (Sheet or Doc). - - :param credentials: Google credentials object. - :param doc_name: The name of the new Google document. - :param doc_type: The type of the Google document ('sheets' or - 'docs'). - :return: doc_id. The ID of the created document in Google Drive. - """ - if doc_type not in ["sheets", "docs"]: - raise ValueError("Invalid doc_type. Must be 'sheets' or 'docs'.") - # Build the service for the respective document type. - service = godisc.build( - doc_type, - "v4" if doc_type == "sheets" else "v1", - credentials=credentials, - cache_discovery=False, - ) - # Create the document with the specified name. - document = {"properties": {"title": doc_name}} - create_method = ( - service.spreadsheets().create - if doc_type == "sheets" - else service.documents().create - ) - response = create_method( - body=document, - fields="spreadsheetId" if doc_type == "sheets" else "documentId", - ).execute() - # Extract the document ID. - doc_id = response.get( - "spreadsheetId" if doc_type == "sheets" else "documentId" - ) - return doc_id - - -def move_gfile_to_dir( - gfile_id: str, - folder_id: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> dict: - """ - Move a Google file to a specified folder in Google Drive. - - :param gfile_id: The ID of the Google file. - :param folder_id: The ID of the folder. - :param credentials: Google credentials object. - :return: The response from the API after moving the file. - """ - if credentials is None: - credentials = get_credentials() - service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - res = ( - service.files() - .update( - fileId=gfile_id, - body={}, - addParents=folder_id, - removeParents="root", - supportsAllDrives=True, - ) - .execute() - ) - return res - - -def share_google_file( - gfile_id: str, - user: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> None: - """ - Share a Google file with a user. - - :param gfile_id: The ID of the Google file. - :param user: The email address of the user. - :param credentials: Google credentials object. - """ - if credentials is None: - credentials = get_credentials() - # Build the Google Drive service using the provided credentials. - # TODO(gp): -> get_gdrive_service - service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - # Create the permission. - parameters = {"role": "reader", "type": "user", "emailAddress": user} - new_permission = ( - service.permissions().create(fileId=gfile_id, body=parameters).execute() - ) - _LOG.debug( - "The new permission ID of the document is: '%s'", - new_permission.get("id"), - ) - _LOG.debug("The Google file is shared with '%s'", user) - - -def create_empty_google_file( - gfile_type: str, - gfile_name: str, - gdrive_folder_id: str, - *, - user: Optional[str] = None, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Create a new Google file (sheet or doc) and move it to a specified folder. - - :param gfile_type: the type of the Google file ('sheet' or 'doc'). - :param gfile_name: the name of the new Google file. - :param gdrive_folder_id: the ID of the Google Drive folder. - :param user: the email address of the user to share the Google file. - :param credentials: Google credentials object for API access. - :return: the ID of the created Google file, or None if an error - occurred. - """ - if credentials is None: - credentials = get_credentials() - # Create the new Google file (either Sheet or Doc). - if gfile_type == "sheet": - gfile_id = _create_new_google_document( - credentials, - doc_name=gfile_name, - doc_type="sheets", - ) - elif gfile_type == "doc": - gfile_id = _create_new_google_document( - credentials, - doc_name=gfile_name, - doc_type="docs", - ) - else: - raise ValueError(f"Invalid gfile_type={gfile_type}") - _LOG.debug("Created a new Google %s '%s'", gfile_type, gfile_name) - # Move the Google file to the specified folder. - if gdrive_folder_id: - move_gfile_to_dir(gfile_id, gdrive_folder_id, credentials=credentials) - # Share the Google file to the user and send an email. - if user: - share_google_file(gfile_id, user, credentials=credentials) - _LOG.debug( - "The new Google '%s': '%s' is shared with '%s'", - gfile_type, - gfile_name, - user, - ) - # Return the file ID. - return gfile_id - - -def create_or_overwrite_with_timestamp( - file_name: str, - folder_id: str, - *, - file_type: str = "sheets", - overwrite: bool = False, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Create or overwrite a Google Sheet or Google Doc with a timestamp in a - specific Google Drive folder. - - :param file_name: Name for the file (timestamp will be added). - :param folder_id: Google Drive folder ID where the file will be - created or updated. - :param file_type: Type of file to create ('sheets' or 'docs'). - :param overwrite: If True, overwrite an existing file. Otherwise, - create a new file. - :param credentials: Google credentials object. - :return: The ID of the created or overwritten file. - """ - if credentials is None: - credentials = get_credentials() - # Authenticate with Google APIs using the provided credentials. - # TODO(gp): -> get_gdrive_service - drive_service = godisc.build("drive", "v3", credentials=credentials) - if file_type == "sheets": - mime_type = "application/vnd.google-apps.spreadsheet" - elif file_type == "docs": - mime_type = "application/vnd.google-apps.document" - else: - raise ValueError("Invalid file_type. Must be 'sheets' or 'docs'.") - query = ( - f"'{folder_id}' in parents and mimeType = '{mime_type}'" - f" and name contains '{file_name}'" - ) - response = ( - drive_service.files() - .list( - q=query, - fields="files(id, name)", - includeItemsFromAllDrives=True, - supportsAllDrives=True, - ) - .execute() - ) - files = response.get("files", []) - # Check if overwriting or creating new file. - if files and overwrite: - file_id = files[0]["id"] - _LOG.debug("Overwriting existing file '%s'", files[0]["name"]) - else: - # Create new file with timestamp. - timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - new_file_name = f"{file_name}_{timestamp}" - file_metadata = { - "name": new_file_name, - "mimeType": mime_type, - "parents": [folder_id], - } - file = ( - drive_service.files() - .create(body=file_metadata, fields="id", supportsAllDrives=True) - .execute() - ) - file_id = file.get("id") - _LOG.debug( - "New file '%s' created successfully in folder '%s'", - new_file_name, - folder_id, - ) - return file_id - - -# ############################################################################# -# Google folder API -# ############################################################################# - - -def create_google_drive_folder( - folder_name: str, - parent_folder_id: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> str: - """ - Create a new Google Drive folder inside the given folder. - - :param folder_name: the name of the new Google Drive folder. - :param parent_folder_id: the ID of the parent folder. - :param credentials: Google credentials object. - :return: the ID of the created Google Drive folder. - """ - if credentials is None: - credentials = get_credentials() - # Build the Google Drive service using the provided credentials. - # TODO(gp): -> get_gdrive_service - service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - # Define the metadata for the new folder. - file_metadata = { - "name": folder_name, - "mimeType": "application/vnd.google-apps.folder", - "parents": [parent_folder_id], - } - # Create the folder in Google Drive. - folder = service.files().create(body=file_metadata, fields="id").execute() - # Log and return the folder ID. - _LOG.debug("Created a new Google Drive folder '%s'", folder_name) - _LOG.debug("The new folder id is '%s'", folder.get("id")) - return folder.get("id") - - -def _get_folders_in_gdrive(*, credentials: "goasea.Credentials") -> list: - """ - Get a list of folders in Google Drive. - - :param credentials: Google credentials object. - :return: A list of folders (each containing an ID and name). - """ - # Build the Google Drive service using the provided credentials. - # TODO(gp): -> get_gdrive_service - service = godisc.build( - "drive", "v3", credentials=credentials, cache_discovery=False - ) - # Make the API request to list folders. - response = ( - service.files() - .list( - q="mimeType='application/vnd.google-apps.folder' and trashed=false", - spaces="drive", - fields="nextPageToken, files(id, name)", - ) - .execute() - ) - # Return the list of folders (id and name). - return response.get("files", []) - - -def get_folder_id_by_name( - credentials: "goasea.Credentials", - name: str, -) -> dict: - """ - Get the folder id by the folder name. - - :param credentials: Google credentials object. - :param name: The name of the folder. - :return: Dictionary with folder id and name. - """ - folders = _get_folders_in_gdrive(credentials=credentials) - folder_list = [] - # Find all folders matching the name. - for folder in folders: - if folder.get("name") == name: - folder_list.append(folder) - if len(folder_list) == 1: - _LOG.debug("Found folder: %s", folder_list[0]) - elif len(folder_list) > 1: - for folder in folder_list: - _LOG.debug( - "Found folder: '%s', '%s'", - folder.get("name"), - folder.get("id"), - ) - _LOG.debug( - "Return the first found folder. '%s' '%s' ", - folder_list[0].get("name"), - folder_list[0].get("id"), - ) - _LOG.debug( - "if you want to use another '%s' folder, " - "please change the folder id manually.", - name, - ) - else: - raise ValueError(f"Can't find the folder '{name}'.") - return folder_list[0] - - -def _get_folder_path_list( - service: "godisc.Resource", - file_id: str, -) -> List[str]: - """ - Get the full folder path as a list of folder names. - - :param service: Google Drive service instance. - :param file_id: The ID of the file. - :return: List of folder names from root to immediate parent folder. - Returns empty list if file is at root level. - """ - # Get file metadata with parents. - file_metadata = ( - service.files() - .get( - fileId=file_id, - fields="parents", - supportsAllDrives=True, - ) - .execute() - ) - parents = file_metadata.get("parents", []) - # If no parents, file is at root level. - if not parents: - _LOG.debug("File is at root level") - return [] - # Build the path by traversing up the folder hierarchy. - path_list = [] - current_id = parents[0] # Files typically have one parent in Google Drive. - while current_id: - folder_metadata = ( - service.files() - .get( - fileId=current_id, - fields="name,parents", - supportsAllDrives=True, - ) - .execute() - ) - folder_name = folder_metadata.get("name") - path_list.insert(0, folder_name) - parents = folder_metadata.get("parents", []) - current_id = parents[0] if parents else None - _LOG.debug("Folder path: %s", path_list) - return path_list - - -def get_google_path_from_url( - url: str, - *, - credentials: Optional["goasea.Credentials"] = None, -) -> List[str]: - """ - Get the full folder path from a Google Docs/Sheets/Drive URL. - - E.g., https://docs.google.com/spreadsheets/d/1GnnmtGTrHDwMP77VylEK0bSF_RLUV5BWf1iGmxuBQpI - -> ['My Drive', 'Folder1', 'Folder2'] - - :param url: URL of the Google Docs/Sheets/Drive file. - :param credentials: Google credentials object. - :return: List of folder names from root to immediate parent folder. - Returns empty list if file is at root level. - """ - if credentials is None: - credentials = get_credentials() - # Extract file ID from URL. - file_id = _extract_file_id_from_url(url) - # Get Google Drive service. - service = _get_gdrive_service(credentials) - # Get folder path as list. - path_list = _get_folder_path_list(service, file_id) - _LOG.debug("Retrieved folder path for URL '%s': %s", url, path_list) - return path_list - - -def print_info_about_google_url( - url: str, - *, - tab_name: Optional[str] = None, - credentials: Optional["goasea.Credentials"] = None, -) -> None: - """ - Print information about a Google Sheet URL. - - :param url: URL of the Google Sheets file. - :param tab_name: Optional tab name to display full URL for. - :param credentials: Google credentials object. - """ - if credentials is None: - credentials = get_credentials() - print("url: '%s'" % url) - print("file name: '%s'" % get_gsheet_name(url, credentials=credentials)) - print("tab names: '%s'" % get_tabs_from_gsheet(url, credentials=credentials)) - if tab_name is not None: - print( - "full url: '%s'" - % get_gsheet_tab_url(url, tab_name, credentials=credentials) - ) - print( - "folder path: '%s'" - % "/".join(get_google_path_from_url(url, credentials=credentials)) - ) - - -# TODO(gp): Add clean up -# TODO(gp): Make url mandatory and when url = "tmp" use the hardcored value. -# TODO(gp): -> save_df_to_gsheet -def save_df_to_tmp_gsheet( - df: pd.DataFrame, - *, - url: str = "", - tab_name: str = "", - remove_empty_columns: bool = False, - remove_stable_columns: bool = False, - verbose: bool = True, - credentials: Optional["goasea.Credentials"] = None, -) -> None: - """ - Save a DataFrame to a Google Sheet. - - :param df: The DataFrame to save. - :param url: URL of the Google Sheet (empty means default temp - sheet). - :param tab_name: The name of the tab to save the DataFrame to. - :param remove_empty_columns: Whether to remove empty columns. - :param remove_stable_columns: Whether to remove stable columns. - :param verbose: Whether to print verbose output. - :param credentials: Google credentials object. - """ - if credentials is None: - credentials = get_credentials() - if remove_stable_columns: - df = hpandas.remove_stable_columns(df, verbose=verbose) - if remove_empty_columns: - df = hpandas.remove_empty_columns(df, verbose=verbose) - if url == "": - url = "https://docs.google.com/spreadsheets/d/1NLY7dTmkXmllYfewDH53z-uSRpC9-zBTTmAOB_O30DI/edit?gid=0#gid=0" - if tab_name == "": - # Find the first tab name that is not empty. - tab_names = get_tabs_from_gsheet(url, credentials=credentials) - for i in range(0, 100): - tab_name = "Sheet" + str(i) - if tab_name not in tab_names: - break - hdbg.dassert_ne(tab_name, "No empty tab name found") - to_gsheet( - df, - url, - tab_name=tab_name, - freeze_rows=True, - set_text_wrapping_clip=True, - credentials=credentials, - ) - - -def _get_gsheet_to_df(url: str, tab_name: Optional[str]) -> pd.DataFrame: - credentials = get_credentials() - file_name = get_gsheet_name(url, credentials=credentials) - _LOG.info( - "Reading data:\n url='%s'\n file_name='%s'\n tab_name='%s'" - % (url, file_name, tab_name) - ) - df = from_gsheet(url, tab_name=tab_name, credentials=credentials) - return df - - -get_cached_gsheet_to_df = hcacsimp.simple_cache( - cache_type="pickle", write_through=True -)(_get_gsheet_to_df) - - -# TODO(gp): This is redundant with disable cache. -# TODO(gp): Create a function to normalize the column names. -def get_gsheet_to_df( - url: str, - tab_name: Optional[str], - *, - remove_spaces_in_cols: bool = True, - force_no_cache: bool = False, -) -> pd.DataFrame: - """ - Get a Google Sheet as a DataFrame with optional caching. - - :param url: The URL of the Google Sheet. - :param tab_name: The name of the tab to read - - `None` means the first sheet - :param remove_spaces_in_cols: Whether to remove spaces in the column names. - :param force_no_cache: Whether to bypass the cache and fetch fresh data. - :return: DataFrame containing the sheet data. - """ - if force_no_cache: - df = get_gsheet_to_df(url, tab_name) - else: - df = get_cached_gsheet_to_df(url, tab_name) - if remove_spaces_in_cols: - df.columns = df.columns.str.replace(" ", "") - return df - - -def read_all_gsheets( - url: str, *, tab_names: Union[str, List[str]], concat: bool = False -) -> Union[pd.DataFrame, List[pd.DataFrame]]: - """ - Read all the sheets from a Google Sheet. - - :param url: The URL of the Google Sheet. - :param tab_names: The names of the sheets to read. - :param concat: Whether to concatenate the DataFrames. - :return: A list of DataFrames, one for each sheet. - """ - dfs = [] - # TODO(ai_gp): -> _all_ - if tab_names == "all": - tab_names = get_tabs_from_gsheet(url) - for tab_name in tab_names: - df = get_cached_gsheet_to_df(url, tab_name) - dfs.append(df) - if len(dfs) > 1 and concat: - # Assert if the columns are the same. - for df in dfs[1:]: - hdbg.dassert_eq(df.columns, dfs[0].columns) - # Concatenate the DataFrames. - df = pd.concat(dfs) - df.reset_index(drop=True, inplace=True) - return df - return dfs diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py deleted file mode 100644 index fdc7ed66c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hintrospection.py +++ /dev/null @@ -1,284 +0,0 @@ -""" -Import as: - -import helpers.hintrospection as hintros -""" - -import collections.abc as cabc -import importlib -import inspect -import logging -import pickle -import re -import sys -import types -from typing import Any, Callable, List, Optional, cast - -import helpers.hdbg as hdbg - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - -_LOG = logging.getLogger(__name__) - - -# Copied from `hstring` to avoid import cycles. - - -def remove_prefix(string: str, prefix: str, assert_on_error: bool = True) -> str: - if string.startswith(prefix): - res = string[len(prefix) :] - else: - if assert_on_error: - raise RuntimeError( - f"string='{string}' doesn't start with prefix ='{prefix}'" - ) - return res - - -# End copy. - -# TODO(gp): object -> Any? - - -# ############################################################################# -# Function introspection -# ############################################################################# - - -def get_function_name(count: int = 0) -> str: - """ - Return the name of the function calling this function. - """ - ptr = inspect.currentframe() - # count=0 corresponds to the calling function, so we need to add an extra - # step walking the call stack. - count += 1 - for _ in range(count): - hdbg.dassert_is_not(ptr, None) - ptr = ptr.f_back # type: ignore - func_name = ptr.f_code.co_name # type: ignore - return func_name - - -def get_name_from_function(func: Callable) -> str: - """ - Return the name of the passed function. - - E.g., amp.helpers.test.test_hintrospection.test_function - """ - func_name = func.__name__ - # - module = inspect.getmodule(func) - hdbg.dassert_is_not( - module, None, f"Could not get module for function {func}" - ) - assert module is not None - module_name = module.__name__ - # Remove `app.` if needed from the module name, e.g., - # `app.amp.helpers.test.test_hintrospection`. - prefix = "app." - if module_name.startswith(prefix): - module_name = remove_prefix(module_name, prefix) - return f"{module_name}.{func_name}" - - -def get_function_from_string(func_as_str: str) -> Callable: - """ - Return the function from its name including the import. - - E.g., `import im.scripts.AmpTask317_transform_pq_by_date_to_by_asset` - """ - # Split txt in an import and function name. - m = re.match(r"^(\S+)\.(\S+)$", func_as_str) - hdbg.dassert(m, "txt='%s'", func_as_str) - m = cast(re.Match, m) - import_, function = m.groups() - _LOG.debug("import=%s", import_) - _LOG.debug("function=%s", function) - # Import the needed module. - imp = importlib.import_module(import_) - # Force the linter not to remove this import which is needed in the following - # eval. - _ = imp - python_code = f"imp.{function}" - func: Callable = eval(python_code) - _LOG.debug("%s -> func=%s", func_as_str, func) - return func - - -def get_methods(obj: Any, access: str = "all") -> List[str]: - """ - Return list of names corresponding to class methods of an object `obj`. - - :param obj: class or class object - :param access: allows to select private, public or all methods of - the object. - """ - methods = [method for method in dir(obj) if callable(getattr(obj, method))] - if access == "all": - pass - elif access == "private": - methods = [method for method in methods if method.startswith("_")] - elif access == "public": - methods = [method for method in methods if not method.startswith("_")] - else: - raise ValueError(f"Invalid access='{access}'") - return methods - - -# ############################################################################# - - -def is_iterable(obj: object) -> bool: - """ - Return whether obj can be iterated upon or not. - - Note that a string is iterable in Python, but typically we refer to - iterables as lists, tuples, so we exclude strings. - """ - # From https://stackoverflow.com/questions/1952464 - return not isinstance(obj, str) and isinstance(obj, cabc.Iterable) - - -# From https://stackoverflow.com/questions/53225 -def is_bound_to_object(method: object) -> bool: - """ - Return whether a method is bound to an object. - """ - _LOG.debug("method=%s", method) - if not hasattr(method, "__self__"): - _LOG.debug("hasattr(im_self)=False") - val = False - else: - # val = method.im_self is not None - val = True - return val - - -# From https://stackoverflow.com/questions/23852423 -def is_lambda_function(method: object) -> bool: - _LOG.debug("type(method)=%s", str(type(method))) - return isinstance(method, types.LambdaType) and method.__name__ == "" - - -def is_pickleable(obj: object, *, mode: str = "try_and_catch") -> bool: - """ - Return if an object is a bound method. - - :param obj: object to process - :param mode: approach to detect non-pikleable objects - - "type_search": detect non-pickleable objects by type, e.g., lambda - functions are not Pickleable - - "try_and_catch": try to pickle an object directly, if it fails, - an object is non-pickleable then - """ - _LOG.debug("obj=%s", obj) - if mode == "type_search": - _LOG.debug("callable=%s", callable(obj)) - if not callable(obj): - return True - # - is_bound = is_bound_to_object(obj) - _LOG.debug("is_bound=%s", is_bound) - if is_bound: - return False - # - is_lambda = is_lambda_function(obj) - _LOG.debug("is_lambda=%s", is_lambda) - if is_lambda: - return False - return True - elif mode == "try_and_catch": - try: - _ = pickle.dumps(obj) - return True - # `AttributeError` is raised when obj is a class with lambda param - # values, and `TypeError`is raised when the class has DB connection - # object as value. - except (AttributeError, TypeError) as e: - _LOG.debug("Cannot pickle object=%s, the error is %s", obj, str(e)) - return False - else: - raise ValueError(f"Invalid mode='{mode}'") - - -# ############################################################################# -# Object size -# ############################################################################# - - -# https://code.activestate.com/recipes/577504/ -# https://stackoverflow.com/questions/449560/how-do-i-determine-the-size-of-an-object-in-python - - -def get_size_in_bytes(obj: object, seen: Optional[set] = None) -> int: - """ - Recursively find size of an object `obj` in bytes. - """ - # From https://github.com/bosswissam/pysize - # getsizeof() returns the size in bytes. - size = sys.getsizeof(obj) - if seen is None: - seen = set() - obj_id = id(obj) - if obj_id in seen: - return 0 - # Mark as seen *before* entering recursion to gracefully handle - # self-referential objects. - seen.add(obj_id) - if hasattr(obj, "__dict__"): - for cls in obj.__class__.__mro__: - if "__dict__" in cls.__dict__: - d = cls.__dict__["__dict__"] - if inspect.isgetsetdescriptor(d) or inspect.ismemberdescriptor( - d - ): - size += get_size_in_bytes(obj.__dict__, seen) - break - if isinstance(obj, dict): - size += sum((get_size_in_bytes(v, seen) for v in obj.values())) - size += sum((get_size_in_bytes(k, seen) for k in obj.keys())) - elif isinstance(obj, cabc.Iterable) and not isinstance( - obj, (str, bytes, bytearray) - ): - size += sum((get_size_in_bytes(i, seen) for i in obj)) - if hasattr(obj, "__slots__"): # can have __slots__ with __dict__ - slots = getattr(obj, "__slots__", None) - if slots is not None: - size += sum( - get_size_in_bytes(getattr(obj, s), seen) - for s in slots - if hasattr(obj, s) - ) - return size - - -# TODO(gp): -> move to helpers/hprint.py -def format_size(num: float) -> str: - """ - Return a human-readable string for a filesize (e.g., "3.5 MB"). - """ - # From http://stackoverflow.com/questions/1094841 - for x in ["b", "KB", "MB", "GB", "TB"]: - if num < 1024.0: - return f"%3.1f {x}" % num - num /= 1024.0 - assert 0, f"Invalid num='{num}'" - - -# ############################################################################# -# Stacktrace -# ############################################################################# - - -def stacktrace_to_str() -> str: - """ - Print the stack trace. - """ - import traceback - - txt = traceback.format_stack() - txt = "".join(txt) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py deleted file mode 100644 index bc2f71ab7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hio.py +++ /dev/null @@ -1,1046 +0,0 @@ -""" -Functions to handle filesystem operations. - -Import as: - -import helpers.hio as hio -""" - -import datetime -import gzip -import json -import logging -import os -import re -import shlex -import shutil -import time -import uuid -from typing import Any, Dict, List, Optional, Union - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - -# Set logging level of this file. -_LOG.setLevel(logging.INFO) - -# ############################################################################# -# Glob. -# ############################################################################# - - -def purify_file_name(file_name: str) -> str: - """ - Remove non-Linux friendly characters from the basename. - """ - basename = os.path.basename(file_name) - for char in (" ", "_", "'", '"', "`", "/"): - basename = basename.replace(char, "_") - # - dir_name = os.path.dirname(file_name) - file_name_out = os.path.join(dir_name, basename) - file_name_out: str = os.path.normpath(file_name_out) - return file_name_out - - -def listdir( - dir_name: str, - pattern: str, - only_files: bool, - use_relative_paths: bool, - *, - exclude_git_dirs: bool = True, - maxdepth: Optional[int] = None, -) -> List[str]: - """ - Find all files and subdirectories under `directory` that match `pattern`. - - :param dir_name: path to the directory where to look for files - :param pattern: pattern to match a filename against (e.g., `*.py`) - :param only_files: look for only files instead of both files and directories - :param use_relative_paths: remove `dir_name` from path - :param exclude_git_dirs: skip `.git` dirs - :param maxdepth: limit the depth of directory traversal - """ - hdbg.dassert_dir_exists(dir_name) - # Escape the directory path. - dir_name = shlex.quote(dir_name) - cmd = [f"find {dir_name}", f'-name "{pattern}"'] - if maxdepth is not None: - cmd.append(f'-maxdepth "{maxdepth}"') - if only_files: - cmd.append("-type f") - if exclude_git_dirs: - cmd.append(r'-not -path "*/\.git/*"') - cmd = " ".join(cmd) - _, output = hsystem.system_to_string(cmd) - # TODO(gp): -> system_to_files - paths = [path for path in output.split("\n") if path != ""] - _LOG.debug("Found %s paths in %s", len(paths), dir_name) - _LOG.debug("\n".join(paths)) - if use_relative_paths: - paths = [os.path.relpath(path, start=dir_name) for path in paths] - return paths - - -def is_valid_filename_extension(ext: str) -> bool: - """ - By convention extensions don't include the initial `.`. - - E.g., "tgz" is valid, but not ".tgz". - """ - valid = not ext.startswith(".") - return valid - - -def change_filename_extension(filename: str, old_ext: str, new_ext: str) -> str: - """ - Change extension of a filename (e.g. "data.csv" to "data.json"). - - :param filename: the old filename (including extension) - :param old_ext: the extension of the old filename (e.g., "csv") - - If empty, it is extracted from the filename - :param new_ext: the extension to replace the old extension (e.g., "json") - :return: a filename with the new extension - """ - # If the old extension is empty, extract it from the filename. - if old_ext == "": - _, old_ext = os.path.splitext(filename) - # Remove the leading dot. - old_ext = old_ext.lstrip(".") - hdbg.dassert( - is_valid_filename_extension(old_ext), "Invalid extension '%s'", old_ext - ) - hdbg.dassert( - is_valid_filename_extension(new_ext), "Invalid extension '%s'", new_ext - ) - hdbg.dassert( - filename.endswith(old_ext), - "Extension '%s' doesn't match file '%s'", - old_ext, - filename, - ) - # Remove the old extension. - len_ext = len(old_ext) - new_filename = filename[:-len_ext] - hdbg.dassert(new_filename.endswith("."), "new_filename='%s'", new_filename) - # Add the new extension. - new_filename += new_ext - return new_filename - - -def is_paired_jupytext_python_file(py_filename: str) -> bool: - """ - Return if a Python file has a paired Jupyter notebook. - """ - hdbg.dassert( - py_filename.endswith("py"), "Invalid python filename='%s'", py_filename - ) - hdbg.dassert_file_exists(py_filename) - # Check if a corresponding ipynb file exists. - ipynb_filename = change_filename_extension(py_filename, "py", "ipynb") - is_paired = os.path.exists(ipynb_filename) - _LOG.debug( - "Checking ipynb file='%s' for py file='%s': is_paired=%s", - py_filename, - ipynb_filename, - is_paired, - ) - return is_paired - - -def keep_python_files( - file_names: List[str], exclude_paired_jupytext: bool -) -> List[str]: - """ - Return a list with all Python file names (i.e., with the `py` extension). - - :param exclude_paired_jupytext: exclude Python file that are associated to - notebooks (i.e., that have a corresponding `.ipynb` file) - """ - hdbg.dassert_isinstance(file_names, list) - # Check all the files. - py_file_names = [] - for file_name in file_names: - if file_name.endswith(".py"): - if exclude_paired_jupytext: - # Include only the non-paired Python files. - is_paired = is_paired_jupytext_python_file(file_name) - add = not is_paired - else: - # Include all the Python files. - add = True - else: - add = False - _LOG.debug("file_name='%s' -> add='%s'", file_name, add) - if add: - py_file_names.append(file_name) - _LOG.debug("Found %s python files", len(py_file_names)) - return py_file_names - - -def delete_file(file_name: str) -> None: - _LOG.debug("Deleting file '%s'", file_name) - # hs3.dassert_is_not_s3_path(file_name) - if not os.path.exists(file_name) or file_name == "/dev/null": - # Nothing to delete. - return - try: - os.unlink(file_name) - except OSError as e: - # It can happen that we try to delete the file, while somebody already - # deleted it, so we neutralize the corresponding exception. - if e.errno == 2: - # OSError: [Errno 2] No such file or directory. - pass - else: - raise e - - -def _create_dir( - dir_name: str, - incremental: bool, - abort_if_exists: bool = False, - ask_to_delete: bool = False, -) -> None: - """ - Create a directory `dir_name` if it doesn't exist. - - Same interface as `create_dir()` but without handling - `backup_dir_if_exists`. - """ - _LOG.debug( - hprint.to_str("dir_name incremental abort_if_exists ask_to_delete") - ) - hdbg.dassert_is_not(dir_name, None) - dir_name = os.path.normpath(dir_name) - if os.path.normpath(dir_name) == ".": - _LOG.debug("Can't create dir '%s'", dir_name) - exists = os.path.exists(dir_name) - is_dir = os.path.isdir(dir_name) - _LOG.debug(hprint.to_str("dir_name exists is_dir")) - if abort_if_exists: - hdbg.dassert_path_not_exists(dir_name) - # dir exists / dir does not exist - # incremental no-op mkdir - # not incremental rm+mkdir mkdir - if exists: - if incremental and is_dir: - # The dir exists and we want to keep it (i.e., incremental), so we - # are done. - # os.chmod(dir_name, 0755) - _LOG.debug( - "The dir '%s' exists and incremental=True: exiting", dir_name - ) - return - if ask_to_delete: - hsystem.query_yes_no( - f"Do you really want to delete dir '{dir_name}'?", - abort_on_no=True, - ) - # The dir exists and we want to create it from scratch (i.e., not - # incremental), so we need to delete the dir. - _LOG.debug("Deleting dir '%s'", dir_name) - if os.path.islink(dir_name): - delete_file(dir_name) - else: - hdbg.dassert_ne(os.path.normpath(dir_name), ".") - shutil.rmtree(dir_name) - _LOG.debug("Creating directory '%s'", dir_name) - # NOTE: `os.makedirs` raises `OSError` if the target directory already exists. - # A race condition can happen when another process creates our target - # directory, while we have just found that it doesn't exist, so we need to - # handle this situation gracefully. - try: - os.makedirs(dir_name) - except OSError as e: - _LOG.error(str(e)) - # It can happen that we try to create the directory while somebody else - # created it, so we neutralize the corresponding exception. - if e.errno == 17: - # OSError: [Errno 17] File exists. - pass - else: - raise e - - -def create_dir( - dir_name: str, - incremental: bool, - *, - abort_if_exists: bool = False, - ask_to_delete: bool = False, - backup_dir_if_exists: bool = False, -) -> None: - """ - Create a directory. - - :param incremental: if False then the directory is deleted and re- - created, otherwise the same directory is reused as it is - :param abort_if_exists: abort if the target directory already exists - :param ask_to_delete: if it is not incremental and the dir exists, - asks before deleting. This option is used when we want to start - with a clean dir (i.e., incremental=False) but, at the same - time, we want to make sure that the user doesn't want to delete - the content of the dir. Another approach is to automatically - rename the old dir with backup_dir_if_exists. - :param backup_dir_if_exists: if the target dir already exists, then - rename it using a timestamp (e.g., dir_20231003_080000) and - create a new target dir - """ - if backup_dir_if_exists: - if not os.path.exists(dir_name): - # Create new dir. - _LOG.debug("Creating dir '%s'", dir_name) - _create_dir(dir_name, incremental=True) - else: - _LOG.debug("Dir '%s' already exists", dir_name) - # Get dir timestamp. - dir_timestamp = os.path.getmtime(dir_name) - dir_datetime = datetime.datetime.fromtimestamp(dir_timestamp) - # Build new dir name with timestamp. - dir_name_new = ( - dir_name + "." + dir_datetime.strftime("%Y%m%d_%H%M%S") - ) - # Rename dir. - if not os.path.exists(dir_name_new): - _LOG.warning("Renaming dir '%s' -> '%s'", dir_name, dir_name_new) - os.rename(dir_name, dir_name_new) - else: - _LOG.warning("Dir '%s' already exists", dir_name_new) - # Create new dir. - _LOG.debug("Creating dir '%s'", dir_name) - _create_dir(dir_name, incremental=True) - else: - _create_dir( - dir_name, - incremental, - abort_if_exists=abort_if_exists, - ask_to_delete=ask_to_delete, - ) - - -# ############################################################################# -# Filesystem. -# ############################################################################# - - -def create_soft_link(src: str, dst: str) -> None: - """ - Create a soft-link to called (where and are files - or directories as in a Linux ln command). - - This is equivalent to a command like "cp " but creating a - soft link. - """ - _LOG.debug("# CreateSoftLink") - # hs3.dassert_is_not_s3_path(src) - # hs3.dassert_is_not_s3_path(dst) - # Create the enclosing directory, if needed. - enclosing_dir = os.path.dirname(dst) - _LOG.debug("enclosing_dir=%s", enclosing_dir) - create_dir(enclosing_dir, incremental=True) - # Create the link. Note that the link source needs to be an absolute path. - src = os.path.abspath(src) - cmd = f"ln -s {src} {dst}" - hsystem.system(cmd) - - -def delete_dir( - dir_: str, - change_perms: bool = False, - errnum_to_retry_on: int = 16, - num_retries: int = 1, - num_secs_retry: int = 1, -) -> None: - """ - Delete a directory. - - :param change_perms: change permissions to -R rwx before deleting to deal with - incorrect permissions left over - :param errnum_to_retry_on: specify the error to retry on, e.g., - ``` - OSError: [Errno 16] Device or resource busy: - 'gridTmp/.nfs0000000002c8c10b00056e57' - ``` - """ - _LOG.debug("Deleting dir '%s'", dir_) - # hs3.dassert_is_not_s3_path(dir_) - if not os.path.isdir(dir_): - # No directory so nothing to do. - return - if change_perms and os.path.isdir(dir_): - cmd = "chmod -R +rwx " + dir_ - hsystem.system(cmd) - i = 1 - while True: - try: - shutil.rmtree(dir_) - # Command succeeded: exit. - break - except OSError as e: - if errnum_to_retry_on is not None and e.errno == errnum_to_retry_on: - # TODO(saggese): Make it less verbose once we know it's working - # properly. - _LOG.warning( - "Couldn't delete %s: attempt=%s / %s", dir_, i, num_retries - ) - i += 1 - if i > num_retries: - hdbg.dfatal( - f"Couldn't delete {dir_} after {num_retries} attempts ({str(e)})" - ) - else: - time.sleep(num_secs_retry) - else: - # Unforeseen error: just propagate it. - raise e - - -def backup_file_or_dir_if_exists(path: str) -> None: - """ - Create a timestamped backup of a file or directory if it exists. - - If the path exists, it is moved to a new location with a timestamp - appended to the name (e.g., path.20231003_080000.backup). - - :param path: path to the file or directory to back up - """ - if not os.path.exists(path): - # Nothing to back up. - return - _LOG.warning("Path '%s' already exists: making a backup", path) - # Get current timestamp. - timestamp = datetime.datetime.now() - timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S") - # Build backup path. - backup_path = f"{path}.{timestamp_str}.backup" - # Move the file or directory to backup. - shutil.move(path, backup_path) - _LOG.info("Backed up '%s' -> '%s'", path, backup_path) - - -def dassert_is_valid_file_name(file_name: str) -> None: - hdbg.dassert_isinstance(file_name, str) - hdbg.dassert_ne(file_name, "") - - -# TODO(gp): Don't use default incremental. -def create_enclosing_dir(file_name: str, incremental: bool = False) -> str: - """ - Create the dir enclosing file_name, if needed. - - :param incremental: same meaning as in `create_dir()` - """ - _LOG.debug(hprint.to_str("file_name incremental")) - dassert_is_valid_file_name(file_name) - # hs3.dassert_is_not_s3_path(file_name) - # - dir_name = os.path.dirname(file_name) - _LOG.debug(hprint.to_str("dir_name")) - if dir_name != "": - _LOG.debug( - "Creating dir_name='%s' for file_name='%s'", dir_name, file_name - ) - create_dir(dir_name, incremental=incremental) - hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) - return dir_name - - -# ############################################################################# -# File. -# ############################################################################# - - -# TODO(saggese): We should have `lines` first since it is an input param. -# TODO(Nikola): Remove `use_gzip` and use `file_name` extension instead. -def to_file( - file_name: str, - txt: str, - use_gzip: bool = False, - mode: Optional[str] = None, - force_flush: bool = False, -) -> None: - """ - Write the content of txt into file_name, creating the enclosing directory - if needed. - - :param file_name: name of written file - :param txt: content of the file - :param use_gzip: whether the file should be compressed as gzip - :param mode: file writing mode - :param force_flush: whether to forcibly clear the file buffer - """ - _LOG.debug(hprint.to_str("file_name use_gzip mode force_flush")) - dassert_is_valid_file_name(file_name) - hdbg.dassert_isinstance(txt, str) - # Choose default writing mode based on compression. - if mode is None: - if use_gzip: - # Override default binary mode for `gzip`. - mode = "wt" - else: - mode = "w" - # Create the enclosing dir, if needed. - create_enclosing_dir(file_name, incremental=True) - if use_gzip: - # Check if user provided correct file name. - if not file_name.endswith(("gz", "gzip")): - _LOG.warning("The provided file extension is not for a gzip file.") - # Open gzipped file. - f = gzip.open(file_name, mode) - else: - # Open regular text file. - # buffering = 0 if mode == "a" else -1 - buffering = 0 if force_flush else -1 - f = open( # pylint: disable=consider-using-with,assignment - file_name, mode, buffering=buffering - ) - # Write file contents. - f.write(txt) # type: ignore - f.close() - # Clear internal buffer of the file. - if force_flush: - f.flush() - os.fsync(f.fileno()) - - -def _raise_file_decode_error(error: Exception, file_name: str) -> None: - """ - Raise UnicodeDecodeError with detailed error message. - - :param error: raised UnicodeDecodeError - :param file_name: name of read file that raised the exception - """ - msg = [] - msg.append(f"error={error}") - msg.append(f"file_name='{file_name}'") - msg_as_str = "\n".join(msg) - _LOG.error(msg_as_str) - raise RuntimeError(msg_as_str) - - -def from_file( - file_name: str, - *, - encoding: Optional[Any] = None, -) -> str: - """ - Read contents of a file as string. - - :param file_name: path to .txt,.gz or .pq file - :param encoding: encoding to use when reading the string - :return: contents of file as string - """ - dassert_is_valid_file_name(file_name) - hdbg.dassert_path_exists(file_name) - data: str = "" - if file_name.endswith((".gz", ".gzip")): - # Open gzipped file. - f = gzip.open(file_name, "rt", encoding=encoding) - else: - # Open regular text file. - f = open( # pylint: disable=consider-using-with - file_name, "r", encoding=encoding - ) - try: - # Read data. - data = f.read() - except UnicodeDecodeError as e: - # Raise unicode decode error message. - _raise_file_decode_error(e, file_name) - finally: - f.close() - hdbg.dassert_isinstance(data, str) - return data - - -# TODO(gp): Use hintro.format_size -def get_size_as_str(file_name: str) -> str: - if os.path.exists(file_name): - size_in_bytes = os.path.getsize(file_name) - if size_in_bytes < (1024**2): - size_in_kb = size_in_bytes / 1024.0 - res = "%.1f KB" % size_in_kb - elif size_in_bytes < (1024**3): - size_in_mb = size_in_bytes / (1024.0**2) - res = "%.1f MB" % size_in_mb - else: - size_in_gb = size_in_bytes / (1024.0**3) - res = "%.1f GB" % size_in_gb - else: - res = "nan" - return res - - -def remove_extension( - filename: str, - extension: str, - *, - check_file_exists: bool = False, - check_has_extension: bool = True, -) -> Optional[str]: - """ - Attempt to remove `extension` from `filename`. - - :param filename: str filename - :param extension: file extension starting with a dot. E.g., ".csv" - :return: filename without `extension`, if applicable, else returns `None`. - """ - hdbg.dassert_isinstance(filename, str) - hdbg.dassert(filename) - if check_file_exists: - hdbg.dassert_file_exists(filename) - # - hdbg.dassert_isinstance(extension, str) - hdbg.dassert( - extension.startswith("."), - "Filename extension=`%s` expected to start with `.`", - extension, - ) - # - ret: Optional[str] = None - if check_has_extension: - hdbg.dassert( - filename.endswith(extension), - "Filename '%s' doesn't have extension=`%s`", - filename, - extension, - ) - if filename.endswith(extension): - ret = filename[: -len(extension)] - return ret - - -# TODO(gp): @all Use msg in all uses of this script `jackpyc "create_executable"` -# TODO(gp): `file_name` should go last. -def create_executable_script( - file_name: str, content: str, *, msg: str = "" -) -> None: - # Write the file. - hdbg.dassert_isinstance(content, str) - to_file(file_name, content) - # Make it executable. - cmd = "chmod +x " + file_name - hsystem.system(cmd) - if msg: - print(f"# {msg}:\n> {file_name}") - - -def add_suffix_to_filename( - file_name: str, - suffix: Union[int, str], - *, - before_extension: bool = True, - with_underscore: bool = True, -) -> str: - """ - Add a suffix to a file name, with or without changing the extension. - - E.g., {base_name}.{ext} -> {file_name}.{suffix}.{ext} - - :param file_name: file name to modify - :param suffix: index to add to the file name - :param before_extension: whether to insert the index before the file - extension - :param with_underscore: whether to separate the index with an - underscore - :return: modified file name with an index - """ - suffix = str(suffix) - if with_underscore: - suffix = "_" + suffix - _LOG.debug(hprint.to_str("suffix")) - # - if before_extension: - # Add the suffix to the file name before the extension. - data = file_name.rsplit(".", 1) - if len(data) == 1: - # E.g., `system_log_dir` -> `system_log_dir_1` - ret = file_name + suffix - else: - # E.g., `dir/file.txt` -> `dir/file_1.txt`. - hdbg.dassert_eq(len(data), 2, "Invalid file_name='%s'", file_name) - file_name_no_ext, ext = data - ret = file_name_no_ext + suffix + "." + ext - else: - # Add the suffix after the name of the file. - # E.g., `dir/file.txt` -> `dir/file.txt_1`. - ret = file_name + suffix - _LOG.debug(hprint.to_str("ret")) - return ret - - -def rename_file_if_exists( - file_path: str, - suffix: str, - *, - before_extension: bool = True, -) -> None: - """ - Rename a file if it exists using provided suffix. - - Used to avoid overwriting if writing multiple files with the same name. - - :param file_path: a file path to modify - :param suffix: index to add to the file name - :param before_extension: whether to insert the suffix before the file extension - - if True, {file_path}.{ext} -> {file_path}.{suffix}.{ext} - - if False, {file_path}.{ext} -> {file_path}.{ext}.{suffix} - """ - if os.path.exists(file_path): - # Add a suffix to a file name. - if before_extension: - # Add a suffix before an extension, e.g., `file.suffix.csv`. - dir_path, file_name = os.path.split(file_path) - file_name, ext = os.path.splitext(file_name) - hdbg.dassert(ext.startswith("."), "Invalid extension='%s'", ext) - new_file_path = f"{file_name}.{suffix}{ext}" - new_file_path = os.path.join(dir_path, new_file_path) - else: - # Add a suffix after an extension, e.g., `file.csv.suffix`. - new_file_path = f"{file_path}.{suffix}" - hdbg.dassert_path_not_exists(new_file_path) - _LOG.debug("renaming %s to %s", file_path, new_file_path) - os.rename(file_path, new_file_path) - - -def change_file_extension(file_path: str, new_extension: str) -> str: - """ - Change the extension of a file path. - - :param file_path: The path of the file to change the extension of. - :param new_extension: The new extension to use, starting with `.` - :return: The new file path with the new extension. - """ - # Make sure the new extension starts with a dot - hdbg.dassert( - new_extension.startswith("."), "Invalid extension='%s'", new_extension - ) - # Split the file path into root and extension - file_name, _ = os.path.splitext(file_path) - # Create the new file path - new_file_path = file_name + new_extension - return new_file_path - - -def wait_for_file( - file_path: str, - *, - check_interval_in_secs: float = 0.5, - timeout_in_secs: int = 10, -) -> None: - """ - Wait until a specified file is generated or until the timeout is reached. - - :param file_path: The path of the file to wait for. - :param check_interval_in_secs: Time in seconds between checks - :param timeout_in_secs: Maximum time to wait for the file in seconds - """ - _LOG.debug("Waiting for file: %s", file_path) - start_time = time.time() - while not os.path.exists(file_path): - if time.time() - start_time > timeout_in_secs: - raise ValueError(f"Timeout reached. File not found: {file_path}") - time.sleep(check_interval_in_secs) - _LOG.debug("File generated: %s", file_path) - - -# ############################################################################# -# JSON -# ############################################################################# - - -def serialize_custom_types_for_json_encoder(obj: Any) -> Any: - """ - Serialize DataFrame and other objects for JSON. - - E.g. dataframe {"A": [0, 1], "B": [0, 1]} will go to a list of dictionaries: - [{"A": 0, "B": 0}, {"A": 1, "B": 1}] - each dictionary is for one row. - """ - import numpy as np - import pandas as pd - - result = None - if isinstance(obj, pd.DataFrame): # type: ignore - result = obj.to_dict("records") - elif isinstance(obj, pd.Series): # type: ignore - result = obj.to_dict() - elif isinstance(obj, np.int64): # type: ignore - result = int(obj) - elif isinstance(obj, np.float64): # type: ignore - result = float(obj) - elif isinstance(obj, uuid.UUID): - result = str(obj) - elif isinstance(obj, datetime.date): - result = obj.isoformat() - elif isinstance(obj, type(pd.NaT)): - result = None - elif isinstance(obj, type(pd.NA)): - result = None - else: - raise TypeError(f"Can not serialize {obj} of type {type(obj)}") - return result - - -def to_json(file_name: str, obj: dict, *, use_types: bool = False) -> None: - """ - Write an object into a JSON file. - - :param obj: data for writing - :param file_name: name of file - :param use_types: whether to use jsonpickle to save the file - """ - if not file_name.endswith(".json"): - _LOG.warning("The file '%s' doesn't end in .json", file_name) - # Create dir. - dir_name = os.path.dirname(file_name) - if dir_name != "" and not os.path.isdir(dir_name): - create_dir(dir_name, incremental=True) - # Write data as JSON. - with open(file_name, "w") as outfile: - if use_types: - # Use jsonpickle to save types. - import jsonpickle # type: ignore[import-untyped] - - txt = jsonpickle.encode(obj, indent=4) - outfile.write(txt) - else: - json.dump( - obj, - outfile, - indent=4, - default=serialize_custom_types_for_json_encoder, - ) - - -def from_json(file_name: str, *, use_types: bool = False) -> Dict: - """ - Read object from JSON file. - - :param file_name: name of file - :param use_types: whether to use jsonpickle to load the file - :return: dict with data - """ - hdbg.dassert(file_name) - if not file_name.endswith(".json"): - _LOG.warning("The file '%s' doesn't end in .json", file_name) - # Read file as text. - hdbg.dassert_file_exists(file_name) - txt = from_file(file_name) - # Remove comments (which are not supported natively by JSON). - txt_tmp = [] - for line in txt.split("\n"): - if re.match(r"^\s*#", line): - continue - txt_tmp.append(line) - txt_tmp = "\n".join(txt_tmp) - _LOG.debug("txt_tmp=\n%s", txt_tmp) - # Convert text into Python data structures. - data = {} - if use_types: - import jsonpickle # type: ignore - - data = jsonpickle.decode(txt_tmp) - else: - data = json.loads(txt_tmp) - return data - - -# TODO(gp): -> pandas_helpers.py -def load_df_from_json(path_to_json: str) -> "pd.DataFrame": # noqa: F821 # type: ignore - """ - Load a dataframe from a json file. - - :param path_to_json: path to the json file - :return: - """ - import pandas as pd - - # Load the dict with the data. - data = from_json(path_to_json) - # Preprocess the dict to handle arrays with different length. - data = {k: pd.Series(v) for k, v in data.items()} - # Package into a dataframe. - df = pd.DataFrame(data) - return df - - -# ############################################################################# -# Directory operations -# ############################################################################# - -# Copied from `hgit.py` to avoid import cycles. - - -def _find_git_root(path: str = ".") -> str: - """ - Find recursively the dir of the outermost super module. - - This function traverses the directory hierarchy upward from a specified - starting path to find the root directory of a Git repository. - It supports: - - standard git repository: where a `.git` directory exists at the root - - submodule: where repository is nested inside another, and the `.git` file contains - a `gitdir:` reference to the submodule's actual Git directory - - linked repositories: where the `.git` file points to a custom Git directory - location, such as in Git worktrees or relocated `.git` directories - - :param path: starting file system path. Defaults to the current directory (".") - :return: absolute path to the top-level Git repository directory - """ - path = os.path.abspath(path) - git_root_dir = None - while True: - git_dir = os.path.join(path, ".git") - _LOG.debug("git_dir=%s", git_dir) - # Check if `.git` is a directory which indicates a standard Git repository. - if os.path.isdir(git_dir): - # Found the Git root directory. - git_root_dir = path - break - # Check if `.git` is a file which indicates submodules or linked setups. - if os.path.isfile(git_dir): - # Using the `open()` to avoid import cycles with the `hio` module. - with open(git_dir, "r") as f: - txt = f.read() - lines = txt.split("\n") - for line in lines: - # Look for a `gitdir:` line that specifies the linked directory. - # Example: `gitdir: ../.git/modules/helpers_root`. - if line.startswith("gitdir:"): - git_dir_path = line.split(":", 1)[1].strip() - _LOG.debug("git_dir_path=%s", git_dir_path) - # Resolve the relative path to the absolute path of the Git directory. - abs_git_dir = os.path.abspath( - os.path.join(path, git_dir_path) - ) - # Traverse up to find the top-level `.git` directory. - while True: - # Check if the current directory is a `.git` directory. - if os.path.basename(abs_git_dir) == ".git": - git_root_dir = os.path.dirname(abs_git_dir) - # Found the root. - break - # Move one level up in the directory structure. - parent = os.path.dirname(abs_git_dir) - # Reached the filesystem root without finding the `.git` directory. - hdbg.dassert_ne( - parent, - abs_git_dir, - "Top-level .git directory not found.", - ) - # Continue traversing up. - abs_git_dir = parent - break - # Exit the loop if the Git root directory is found. - if git_root_dir is not None: - break - # Move up one level in the directory hierarchy. - parent = os.path.dirname(path) - # Reached the filesystem root without finding `.git`. - hdbg.dassert_ne( - parent, - path, - "No .git directory or file found in any parent directory.", - ) - # Update the path to the parent directory for the next iteration. - path = parent - return git_root_dir - - -# End copy. - - -def safe_rm_file(dir_path: str) -> None: - """ - Safely remove a file after ensuring it's within our Git client. - - This function provides a safety check to prevent accidental deletion - of files outside our Git repository. - - :param dir_path: Path to the directory to delete - :raises AssertionError: If dir_path is not within the Git client - :raises OSError: If directory doesn't exist or can't be deleted - """ - # Convert to absolute path for comparison. - dir_path = os.path.abspath(dir_path) - # Get the Git client root. - git_root = _find_git_root() - git_root = os.path.abspath(git_root) - # Ensure the directory is within our Git client. - hdbg.dassert( - dir_path.startswith(git_root), - "Directory '%s' is not within Git client root '%s'", - dir_path, - git_root, - ) - # Additional safety check: prevent deletion of Git root itself. - hdbg.dassert_ne( - dir_path, - git_root, - "Cannot delete Git client root directory '%s'", - git_root, - ) - # Verify directory exists before attempting deletion. - hdbg.dassert( - os.path.exists(dir_path), - "Directory '%s' does not exist", - dir_path, - ) - hdbg.dassert( - os.path.isdir(dir_path), - "Path '%s' is not a directory", - dir_path, - ) - # Perform the deletion. - _LOG.debug("Safely removing directory: %s", dir_path) - shutil.rmtree(dir_path) - _LOG.debug("Successfully removed directory: %s", dir_path) - - -# TODO(ai_gp): Add unit tests. -def is_subdir(dir1: str, dir2: str) -> bool: - """ - Check if `dir1` is a subdirectory of `dir2`. - - :param dir1: First directory - :param dir2: Second directory - :return: True if `dir1` is a subdirectory of `dir2`, False otherwise - """ - # Resolve to absolute and normalized paths. - abs_dir1 = os.path.abspath(dir1) - abs_dir2 = os.path.abspath(dir2) - # Get the common path prefix. - common = os.path.commonpath([abs_dir1, abs_dir2]) - # It's a subdir if they share the same common path as the parent. - return common == abs_dir2 - - -def write_file_back( - file_name: str, txt_old: List[str], txt_new: List[str] -) -> None: - """ - Write new text to file only if it differs from the old text. - - :param file_name: Path to the file to write to - :param txt_old: Original text as a list of strings - :param txt_new: New text as a list of strings - """ - # Process old text. - hdbg.dassert_list_of_strings(txt_old) - txt_as_str = "\n".join(txt_old) - # Process new text. - hdbg.dassert_list_of_strings(txt_new) - txt_new_as_str = "\n".join(txt_new) - # Write file back, if needed. - if txt_as_str != txt_new_as_str: - to_file(file_name, txt_new_as_str) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py deleted file mode 100644 index d11ecbafc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjoblib.py +++ /dev/null @@ -1,880 +0,0 @@ -""" -Import as: - -import helpers.hjoblib as hjoblib -""" - -import concurrent.futures -import logging -import math -import os -import pprint -import random -import sys -import traceback -from functools import wraps -from multiprocessing import Process, Queue -from typing import Any, Callable, Dict, List, Optional, Tuple, Union - -import joblib -from joblib._store_backends import StoreBackendBase, StoreBackendMixin -from tqdm.autonotebook import tqdm - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.htimer as htimer -import helpers.htqdm as htqdm - -# Avoid dependency from other `helpers` modules, such as `helpers.hcache`, to -# prevent import cycles. - - -_LOG = logging.getLogger(__name__) - -# - Assume one wants to execute `n` invocations of a given `func` -# - E.g., `func(param_1), func(param_2), ..., func(param_n)` -# - Each `param` is a tuple of `*args` and `**kwargs` to apply to `func` -# - A `Workload` is composed of: -# - `workload_func`: the function to execute -# - `func_name`: the name / description of the function `func` -# - `tasks`: a list of `n` set of parameters `*args`, `**kwargs` to apply -# to the function (e.g., `param_1`, ..., `param_n`) -# - Each `Task` executes a subset of the functions -# - `Tasks` are a partition of the function invocations, i.e., each function -# invocation is executed by one and only one task -# - The `n` `Tasks` are then executed by `k` threads in parallel or serially -# - Note that a single task can correspond to processing of multiple logical -# chunks of work, because they need to be processed together or because we -# want to enforce that it is executed on a single processor -# - E.g., if we want to concatenate files we can map multiple filenames in a -# single `Task`. In this case the `Task` contains a list of filenames to -# concatenate together - -# ############################################################################# -# Task -# ############################################################################# - -# A `Task` contains the parameters to pass to the function that needs to be -# executed. -# A `Task` is represented by a tuple of `*args` and `**kwargs`, e.g., -# ``` -# args=() -# kwargs={ -# 'asset_col_name': 'asset', -# 'dst_dir': './tmp.s3_out', -# 'parquet_file_names': [ -# './tmp.s3/20220110/data.parquet', -# './tmp.s3/20220111/data.parquet', -# './tmp.s3/20220112/data.parquet'] -# } -# ``` -Task = Tuple[Tuple[Any], Dict[str, Any]] - - -# TODO(gp): @Nikola add unit tests -def split_list_in_tasks( - list_in: List[Any], - n: int, - *, - keep_order: bool = False, - num_elems_per_task: Optional[int] = None, -) -> List[List[Any]]: - """ - Split a list in tasks based on the number of threads or elements per - partition. - - :param num_elems_per_task: force each task to have the given number of elements - :param keep_order: split the list so that consecutive elements of the list - are in different tasks. This favors executing the workload in order on `n` - threads - :return: list of lists of elements, where each list can be assigned to an - execution thread - - - E.g., [a, b, c, d, e] executed on 3 threads [1, 2, 3] gives the allocation - for `keep_order=True`: - ``` - 1 -> [a, d] - 2 -> [b, e] - 3 -> [c] - ``` - - For `keep_order=False` the allocation is: - ``` - 1 -> [a, b] - 2 -> [c, d] - 3 -> [e] - ``` - - For `num_elems_per_task=3` the allocation is: - ``` - 1 -> [a, b, c] - 2 -> [d, e] - 3 -> [] - ``` - """ - hdbg.dassert_lte(1, n) - hdbg.dassert_lte(n, len(list_in), "There are fewer tasks than threads") - if keep_order: - hdbg.dassert_is( - num_elems_per_task, - None, - "Can't specify num_elems_per_task with keep_order", - ) - list_out: List[list] = [[] for _ in range(n)] - for i, elem in enumerate(list_in): - _LOG.debug("%s: %s -> %s", i, elem, i % n) - list_out[i % n].append(elem) - else: - if num_elems_per_task is None: - k = int(math.ceil(len(list_in) / n)) - else: - k = num_elems_per_task - hdbg.dassert_lte(1, k) - list_out = [list_in[i : i + k] for i in range(0, len(list_in), k)] - # Ensure that the elements are all distributed. - hdbg.dassert_eq(sum(len(l_) for l_ in list_out), len(list_in)) - return list_out - - -def apply_incremental_mode( - src_dst_file_name_map: List[Tuple[str, str]], -) -> List[Tuple[str, str]]: - """ - Apply incremental mode to a map of source to destination files. - - Often the function in a `Workload` corresponds to reading a file, processing it, - and writing the output in a file. In this case, applying the incremental mode - means removing the tuples in the src_file -> dst_file mapping where the dst file - already exists. - - :return: filtered mapping - """ - hdbg.dassert_container_type(src_dst_file_name_map, list, tuple) - # - src_dst_file_name_map_tmp = [] - for src_dst_file_name in src_dst_file_name_map: - # Parse the element of the mapping. - hdbg.dassert_eq(len(src_dst_file_name), 2) - src_file_name, dst_file_name = src_dst_file_name - _LOG.debug("%s -> %s", src_file_name, dst_file_name) - # Discard the mapping element if the destination file already exists. - hdbg.dassert_path_exists(src_file_name) - if os.path.exists(dst_file_name): - _LOG.debug("Skipping %s -> %s", src_file_name, dst_file_name) - else: - src_dst_file_name_map_tmp.append((src_file_name, dst_file_name)) - _LOG.info( - "After applying incremental mode, there are %s / %s files to process", - len(src_dst_file_name_map_tmp), - len(src_dst_file_name_map), - ) - return src_dst_file_name_map_tmp - - -def validate_task(task: Task) -> bool: - """ - Assert if `Task` is malformed, otherwise return True. - - A valid `Task` is a tuple `(*args, **kwargs)`. - """ - # A `Task` is a tuple. - hdbg.dassert_isinstance(task, tuple) - hdbg.dassert_eq(len(task), 2) - # Parse the `Task`. - args, kwargs = task - _LOG.debug("task.args=%s", pprint.pformat(args)) - hdbg.dassert_isinstance(args, tuple) - _LOG.debug("task.kwargs=%s", pprint.pformat(kwargs)) - hdbg.dassert_isinstance(kwargs, dict) - return True - - -def task_to_string(task: Task, *, use_pprint: bool = True) -> str: - hdbg.dassert(validate_task(task)) - args, kwargs = task - txt = [] - if use_pprint: - txt.append(f"args={pprint.pformat(args)}") - txt.append(f"kwargs={pprint.pformat(kwargs)}") - else: - txt.append(f"args={str(args)}") - txt.append(f"kwargs={str(kwargs)}") - txt = "\n".join(txt) - return txt - - -# ############################################################################# -# Workload -# ############################################################################# - -# A `Workload` consists of multiple executions of a function with different -# parameters represented by `Tasks`. -# Note: `joblib_helper` can be used together with caching. The workload function -# doesn't have to be the one that is cached, but it can trigger caching of function -# results in the call stack. -Workload = Tuple[ - # `func`: the function representing the workload to execute - Callable, - # `func_name`: the mnemonic name of the function, which is used for debugging - # info and for naming the directory storing the cache - # - E.g., `vltbut.get_cached_bar_data_for_date_interval` - # - Note that the `func_name` can be different than the name of `func` - # - E.g., we can call - # `vltbut.get_cached_bar_data_for_date_interval_for_interval` inside `func`, - # in order to create a cache for - # `vltbut.get_cached_bar_data_for_date_interval`, so the cache name - # should be for `vltbut.get_cached_bar_data_for_date_interval` - str, - # `tasks`: a list of (*args, **kwargs) to pass to `func` - List[Task], -] - - -def validate_workload(workload: Workload) -> bool: - """ - Assert if the `Workload` is malformed, otherwise return True. - - A valid `Workload` is a triple `(func, func_name, List[Task])`. - """ - # A valid workload` is a triple. - hdbg.dassert_isinstance(workload, tuple) - hdbg.dassert_eq(len(workload), 3) - # Parse. - workload_func, func_name, tasks = workload - # Check each component. - hdbg.dassert_callable(workload_func) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert_container_type(tasks, List, tuple) - hdbg.dassert(all(validate_task(task) for task in tasks)) - return True - - -def randomize_workload( - workload: Workload, *, seed: Optional[int] = None -) -> Workload: - validate_workload(workload) - # Parse the workload. - workload_func, func_name, tasks = workload - # Randomize `tasks`. - seed = seed or 42 - random.seed(seed) - random.shuffle(tasks) - # Build a new workload. - workload = (workload_func, func_name, tasks) - validate_workload(workload) - return workload - - -def reverse_workload( - workload: Workload, *, seed: Optional[int] = None -) -> Workload: - """ - Reverse the workload. - - Typically we generate workload in chronological order, but sometimes - we want to run from most recent data to least recent, so that we - have the results about the most recent periods first, which is what - we care most about. - """ - validate_workload(workload) - # Parse the workload. - workload_func, func_name, tasks = workload - # Reverse. - _LOG.warning("Reversing the workload as per user request") - tasks = list(reversed(tasks)) - # Build a new workload. - workload = (workload_func, func_name, tasks) - validate_workload(workload) - return workload - - -def truncate_workload( - workload: Workload, - max_num: int, -) -> Workload: - """ - Limit the workload to the first `max_num` tasks. - """ - validate_workload(workload) - # Parse the workload. - workload_func, func_name, tasks = workload - # Truncate the workload. - _LOG.warning("Considering only the first %d / %d tasks", max_num, len(tasks)) - hdbg.dassert_lte(1, max_num) - hdbg.dassert_lte(max_num, len(tasks)) - tasks = tasks[:max_num] - # Build a new workload. - workload = (workload_func, func_name, tasks) - validate_workload(workload) - return workload - - -def workload_to_string(workload: Workload, *, use_pprint: bool = True) -> str: - """ - Print the workload. - - E.g., - - ``` - workload_func=_LimeTask317_process_chunk - func_name=_LimeTask317_process_chunk - # task 1 / 3 - args=([('./tmp.s3/20220110/data.parquet', - './tmp.s3_out/./tmp.s3/20220110/data.parquet')],) - kwargs={} - # task 2 / 3 - args=([('./tmp.s3/20220111/data.parquet', - './tmp.s3_out/./tmp.s3/20220111/data.parquet')],) - kwargs={} - # task 3 / 3 - args=([('./tmp.s3/20220112/data.parquet', - './tmp.s3_out/./tmp.s3/20220112/data.parquet')],) - kwargs={} - ``` - """ - validate_workload(workload) - workload_func, func_name, tasks = workload - txt = [] - workload_func_str = getattr(workload_func, "__name__", "unknown_function") - txt.append(f"workload_func={workload_func_str}") - txt.append(f"func_name={func_name}") - for i, task in enumerate(tasks): - txt.append(f"# task {i + 1} / {len(tasks)}") - txt.append(task_to_string(task, use_pprint=use_pprint)) - txt = "\n".join(txt) - return txt - - -# ############################################################################# -# Template for functions to execute in parallel. -# ############################################################################# - -# NOTE: the workload function: -# - asserts if there is an error, since the return value is a string with a summary -# of the execution -# - doesn't have to be the function that we intend to cache - - -def _workload_function(*args: Any, **kwargs: Any) -> str: - """ - Execute the function task. - - :raises: in case of error - :return: string representing information about the cached function - execution - """ - _ = args - incremental = kwargs.pop("incremental") - num_attempts = kwargs.pop("num_attempts") - _ = incremental, num_attempts - func_output: List[str] = [] - result = "\n".join(func_output) - return result - - -def _get_workload( - # args: argparse.Namespace -) -> None: - """ - Prepare the workload using the parameters from command line. - """ - # _ = args - - -# ############################################################################# -# Layer passing information from `parallel_execute` to the function to execute -# in parallel. -# ############################################################################# - - -def get_num_executing_threads(args_num_threads: Union[str, int]) -> int: - """ - Return the number of executing threads based on the value of - `args.num_threads`. - - E.g., - - `serial` corresponds to 1 - - `-1` corresponds to all available CPUs - """ - if args_num_threads == "serial": - num_executing_threads = 1 - elif args_num_threads == -1: - # All CPUs available. - num_executing_threads = joblib.cpu_count() - else: - # Assume it's an int. - num_executing_threads = int(args_num_threads) - hdbg.dassert_lte(1, num_executing_threads) - return num_executing_threads - - -def _run_in_process(func: Callable, q: Queue, *args: Any, **kwargs: Any) -> None: - """ - Run function as a process and store output in the input Queue. - """ - _LOG.debug("pid after processify=", os.getpid()) - try: - ret = func(*args, **kwargs) - except Exception: - # Store error logs in the queue. - ex_type, ex_value, tb = sys.exc_info() - error = ex_type, ex_value, "".join(traceback.format_tb(tb)) - ret = None - else: - error = None - q.put((ret, error)) - - -# TODO(grisha): Add type hints, add unit test to understand the behavior. -# From https://gist.github.com/schlamar/2311116 -# Note that this is not going to work with joblib.parallel with -# backend="multiprocessing" returning an error -# AssertionError: daemonic processes are not allowed to have children -def processify(func): - """ - Decorator to run a function as a process. - - Be sure that every argument and the return value is *pickable*. The - created process is joined, so the code does not run in parallel. - """ - - @wraps(func) - def wrapper(*args, **kwargs): - q = Queue() - p = Process( - target=_run_in_process, args=[func] + [q] + list(args), kwargs=kwargs - ) - p.start() - ret, error = q.get() - p.join() - if error: - ex_type, ex_value, tb_str = error - message = f"{ex_value.message} (in subprocess)\n{tb_str}" - raise ex_type(message) - return ret - - return wrapper - - -def _parallel_execute_decorator( - task_idx: int, - task_len: int, - incremental: bool, - abort_on_error: bool, - num_attempts: int, - log_file: str, - # TODO(gp): Pass these parameters first. - workload_func: Callable, - func_name: str, - processify_func: bool, - task: Task, - enable_file_logging: bool, - verbose_log: bool, -) -> Any: - """ - Parameters have the same meaning as in `parallel_execute()`. - - :param abort_on_error: control whether to abort on `workload_func` function - that is failing and asserting - - If `workload_func` fails: - - if `abort_on_error=True` the exception from `workload_func` is - propagated and the return value is `None` - - if `abort_on_error=False` the exception is not propagated, but the - return value is the string representation of the exception - :param processify_func: switch to enable wrapping a function into a process - :param enable_file_logging: see same parameter in `parallel_execute()` - :param verbose_log: see same parameter in `parallel_execute()` - :return: the return value of the workload function or the exception string - """ - # Validate very carefully all the parameters. - hdbg.dassert_lte(0, task_idx) - hdbg.dassert_lt(task_idx, task_len) - hdbg.dassert_isinstance(incremental, bool) - hdbg.dassert_isinstance(abort_on_error, bool) - hdbg.dassert_lte(1, num_attempts) - hdbg.dassert_isinstance(log_file, str) - hdbg.dassert_callable(workload_func) - hdbg.dassert_isinstance(func_name, str) - hdbg.dassert(validate_task(task)) - # Redirect the logging output of each task to a different file. - # TODO(gp): This file should go in the `task_dst_dir`. - # log_to_file = True - log_to_file = False - if log_to_file: - dst_dir = os.path.dirname(os.path.abspath(log_file)) - print(dst_dir) - hio.create_dir(dst_dir, incremental=True) - file_name = os.path.join( - dst_dir, f"{func_name}.{task_idx + 1}_{task_len}.log" - ) - _LOG.warning("Logging to %s", file_name) - file_handler = logging.FileHandler(file_name) - root_logger = logging.getLogger() - root_logger.addHandler(file_handler) - # Save information about the function to be executed. - txt = [] - # `start_ts` needs to be before running the function. - start_ts = hdateti.get_current_timestamp_as_string("naive_ET") - tag = f"{task_idx + 1}/{task_len} ({start_ts})" - txt.append("\n" + hprint.frame(tag) + "\n") - txt.append(f"tag={tag}") - workload_func_str = getattr(workload_func, "__name__", "unknown_function") - txt.append(f"workload_func={workload_func_str}") - txt.append(f"func_name={func_name}") - txt.append(task_to_string(task)) - # Run the workload. - args, kwargs = task - kwargs.update({"incremental": incremental, "num_attempts": num_attempts}) - with htimer.TimedScope( - logging.DEBUG, f"Execute '{workload_func_str}'" - ) as ts: - try: - if processify_func: - _LOG.debug("Using processify") - # Wrap the function into a process to enforce de-allocating - # memory at the end of the execution (see - # CmampTask5854: Resolve backtest memory leakage). - _LOG.debug("pid before processify=%s", os.getpid()) - workload_func = processify(workload_func) - res = workload_func(*args, **kwargs) - error = False - except Exception as e: # pylint: disable=broad-except - exception = e - txt.append(f"exception='{str(e)}'") - res = None - error = True - _LOG.error("Execution failed") - # Save information about the execution of the function. - elapsed_time = ts.elapsed_time - end_ts = hdateti.get_current_timestamp_as_string("naive_ET") - # TODO(gp): -> func_result - if verbose_log: - txt.append(f"func_res=\n{hprint.indent(str(res))}") - else: - txt.append("func_res=") - txt.append(f"elapsed_time_in_secs={elapsed_time}") - txt.append(f"start_ts={start_ts}") - txt.append(f"end_ts={end_ts}") - txt.append(f"error={error}") - # Update log file. - txt = "\n".join(txt) - _LOG.debug("txt=\n%s", hprint.indent(txt)) - if enable_file_logging: - hio.to_file(log_file, txt, mode="a") - if error: - # The execution wasn't successful. - _LOG.error(txt) - if abort_on_error: - _LOG.error("Aborting since abort_on_error=%s", abort_on_error) - raise exception # noqa: F821 - _LOG.error( - "Continuing execution since abort_on_error=%s", abort_on_error - ) - res = str(exception) - else: - # The execution was successful. - pass - return res - - -# TODO(gp): Pass a `task_dst_dir` to each task so it can write there. -# This is a generalization of `experiment_result_dir` for `run_config_list` and -# `run_notebook`. -def parallel_execute( - workload: Workload, - # Options for the `parallel_execute` framework. - dry_run: bool, - num_threads: Union[str, int], - incremental: bool, - abort_on_error: bool, - num_attempts: int, - log_file: str, - *, - backend: str = "loky", - enable_file_logging: bool = True, - verbose_log: bool = False, -) -> Optional[List[Any]]: - """ - Run a workload in parallel using joblib or asyncio. - - Note: - - if `abort_on_error=True` and a task fails early, `joblib` does not return partial results - - use `enable_logging=False` to disable logging entirely (useful for large results) - - use `verbose_log=False` to keep logging enabled but skip verbose output per task - - :param workload: the workload to execute - :param dry_run: if True, print the workload and exit without executing it - :param num_threads: joblib parameter to control how many threads to use - :param incremental: parameter passed to the function to execute to control if - we want to re-execute tasks already executed or not - :param abort_on_error: when True, if one task asserts then stop executing the - workload and return the exception of the failing task - - If False, the execution continues - :param num_attempts: number of times to attempt running a function before - declaring an error - :param log_file: file used to log information about the execution - :param backend: specify the backend type (e.g., joblib `loky` or `asyncio_process_executor`) - :param enable_file_logging: if False, skip writing any log file - :param verbose_log: if True, write detailed task results to the log file - - If False, large outputs will be omitted from the log to reduce file size - :return: results from executing `func` or the exception of the failing function - """ - # Print the parameters. - _LOG.info(hprint.frame("Workload")) - # It's too verbose to print all the workload. - # print(workload_to_string(workload, use_pprint=False)) - _LOG.info( - hprint.to_str( - "dry_run num_threads incremental num_attempts abort_on_error" - ) - ) - # Parse the workload. - validate_workload(workload) - workload_func, func_name, tasks = workload - _LOG.info("Saving log info in '%s'", log_file) - _LOG.info( - "Number of executing threads=%s (%s)", - get_num_executing_threads(num_threads), - num_threads, - ) - _LOG.info("Number of tasks=%s", len(tasks)) - # - if dry_run: - file_name = "./tmp.parallel_execute.workload.txt" - workload_as_str = workload_to_string(workload, use_pprint=False) - hio.to_file(file_name, workload_as_str) - _LOG.warning("Workload saved at '%s'", file_name) - _LOG.warning("Exiting without executing workload, as per user request") - return None - # Run. - task_len = len(tasks) - tqdm_out = htqdm.TqdmToLogger(_LOG, level=logging.INFO) - tqdm_iter = tqdm( - enumerate(tasks), - total=task_len, - file=tqdm_out, - desc=f"num_threads={num_threads} backend={backend}", - ) - if backend == "threading": - # Enable wrapping a function into a process for threading backend - # to force memory de-allocation. - # TODO(Grisha): unclear if there are cases when we want to use - # `False` with `threading` backends, consider exposing to the - # interface. - # TODO(Grisha): should we enable the switch for `num_threads="serial"`? will it work? - processify_func = True - else: - processify_func = False - if num_threads == "serial": - # Execute the tasks serially. - res = [] - for task_idx, task in tqdm_iter: - _LOG.debug("\n%s", hprint.frame(f"Task {task_idx + 1} / {task_len}")) - # Execute. - res_tmp = _parallel_execute_decorator( - task_idx, - task_len, - incremental, - abort_on_error, - num_attempts, - log_file, - # - workload_func, - func_name, - processify_func, - task, - enable_file_logging, - verbose_log, - ) - res.append(res_tmp) - else: - # Execute the tasks in parallel. - num_threads = int(num_threads) - # -1 is interpreted by joblib like for all cores. - _LOG.info("Using %d threads, backend='%s'", num_threads, backend) - if backend in ("loky", "threading", "multiprocessing"): - # from joblib.externals.loky import set_loky_pickler - # set_loky_pickler('cloudpickle') - # Removed `verbose` param which causes issues in HelpersTask715. - res = joblib.Parallel(n_jobs=num_threads, backend=backend)( - joblib.delayed(_parallel_execute_decorator)( - task_idx, - task_len, - incremental, - abort_on_error, - num_attempts, - log_file, - # - workload_func, - func_name, - processify_func, - task, - enable_file_logging, - verbose_log, - ) - # We can't use `tqdm_iter` since this only shows the submission of - # the jobs but not their completion. - for task_idx, task in enumerate(tasks) - ) - elif backend in ("asyncio_threading", "asyncio_multiprocessing"): - if backend == "asyncio_threading": - executor = concurrent.futures.ThreadPoolExecutor - elif backend == "asyncio_multiprocessing": - executor = concurrent.futures.ProcessPoolExecutor - else: - raise ValueError(f"Invalid backend='{backend}'") - func = lambda args_: _parallel_execute_decorator( - args_[0], - task_len, - incremental, - abort_on_error, - num_attempts, - log_file, - # - workload_func, - func_name, - processify_func, - args_[1], - enable_file_logging, - verbose_log, - ) - args = list(enumerate(tasks)) - use_progress_bar = True - if not use_progress_bar: - # Implementation without progress bar. - with executor(max_workers=num_threads) as executor_: - res = list(executor_.map(func, args)) - else: - # Implementation with progress bar. - res = [] - with tqdm_iter as pbar: - with executor(max_workers=num_threads) as executor_: - futures = { - executor_.submit(func, arg): arg for arg in args - } - _LOG.debug("done submitting") - for future in concurrent.futures.as_completed(futures): - res_tmp = future.result() - res.append(res_tmp) - pbar.update(1) - else: - raise ValueError(f"Invalid backend='{backend}'") - _LOG.info("Saved log info in '%s'", log_file) - return res - - -# ############################################################################# -# joblib storage backend for S3. -# ############################################################################# - -# This allows to store a joblib cache on S3. - -# Adapted from https://github.com/aabadie/joblib-s3 - - -# ############################################################################# -# _S3FSStoreBackend -# ############################################################################# - - -class _S3FSStoreBackend(StoreBackendBase, StoreBackendMixin): - """ - A StoreBackend for S3 cloud storage file system. - """ - - def __init__(self) -> None: - super().__init__() - self._objs: List[Any] = [] - - def _flush(self) -> None: - _ = self - - def clear_location(self, location: str) -> None: - """ - Check if object exists in store. - """ - if self.storage.exists(location): - self._flush() - self.storage.rm(location, recursive=True) - - def _mkdirp(self, directory: str) -> None: - """ - Create recursively a directory on the S3 store. - """ - # Remove root cachedir from input directory to create as it should - # have already been created in the configure function. - if directory.startswith(self.location): - directory = directory.replace(self.location + "/", "") - current_path = self.location - for sub_dir in directory.split("/"): - current_path = os.path.join(current_path, sub_dir) - self.storage.mkdir(current_path) - - def create_location(self, location: str) -> None: - """ - Create object location on store. - """ - self._mkdirp(location) - - def get_items(self) -> List[Any]: - """ - Return the whole list of items available in cache. - """ - _ = self - return [] - - def configure( - self, - location: str, - backend_options: Dict[str, Any], - verbose: int = 0, - ) -> None: - """ - Configure the store backend. - """ - options = backend_options - hdbg.dassert_in("s3fs", options) - self.storage = options["s3fs"] - hdbg.dassert_in("bucket", options) - bucket = options["bucket"] - # Ensure the given bucket exists. - root_bucket = os.path.join("s3://", bucket) - if not self.storage.exists(root_bucket): - self.storage.mkdir(root_bucket) - if location.startswith("/"): - location.replace("/", "") - self.location = os.path.join(root_bucket, location) - if not self.storage.exists(self.location): - self.storage.mkdir(self.location) - # Computation results can be stored compressed for faster I/O. - self.compress = backend_options["compress"] - # Memory map mode is not supported. - self.mmap_mode = None - # TODO(gp): No need to flush for now. - # for fd in self._objs: - # fd.flush(force=True) - - def _open_item(self, fd: Any, mode: str) -> Any: - self._objs.append(fd) - return self.storage.open(fd, mode) - - def _item_exists(self, path: str) -> bool: - self._flush() - ret: bool = self.storage.exists(path) - return ret - - def _move_item(self, src: str, dst: str) -> None: - self.storage.mv(src, dst) - - -_REGISTER_S3FS_STORE = False - - -def register_s3fs_store_backend() -> None: - """ - Register the S3 store backend for joblib memory caching. - """ - global _REGISTER_S3FS_STORE - if not _REGISTER_S3FS_STORE: - joblib.register_store_backend("s3", _S3FSStoreBackend) - _REGISTER_S3FS_STORE = True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py deleted file mode 100644 index 5b8aa72aa..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hjupyter.py +++ /dev/null @@ -1,383 +0,0 @@ -""" -Import as: - -import helpers.hjupyter as hjupyte -""" - -import logging -import os -from typing import Dict, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hsystem as hsystem -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - - -def run_notebook( - file_name: str, - scratch_dir: str, - *, - pre_cmd: str = "", -) -> None: - """ - Run jupyter notebook. - - Assert if the notebook doesn't complete successfully. - - :param file_name: path to the notebook to run. If this is a .py - file, convert to .ipynb first - :param scratch_dir: temporary dir storing the output - :param pre_cmd: - """ - file_name = os.path.abspath(file_name) - hdbg.dassert_path_exists(file_name) - hio.create_dir(scratch_dir, incremental=True) - # Build command line. - cmd = [] - if pre_cmd: - cmd.append(f"{pre_cmd} &&") - # Convert .py file into .ipynb if needed. - root, ext = os.path.splitext(file_name) - if ext == ".ipynb": - notebook_name = file_name - elif ext == ".py": - cmd.append(f"jupytext --update --to notebook {file_name};") - notebook_name = f"{root}.ipynb" - else: - raise ValueError(f"Unsupported file format for file_name='{file_name}'") - # Execute notebook. - cmd.append(f"cd {scratch_dir} &&") - cmd.append(f"jupyter nbconvert {notebook_name}") - cmd.append("--execute") - cmd.append("--to html") - cmd.append("--ExecutePreprocessor.kernel_name=python") - # No time-out. - cmd.append("--ExecutePreprocessor.timeout=-1") - # Execute. - cmd_as_str = " ".join(cmd) - hsystem.system(cmd_as_str, abort_on_error=True, suppress_output=False) - - -def run_notebook_cells( - notebook_path: str, - dst_notebook_path: str, - *, - num_cells: Optional[int] = None, - kernel_name: str = "python3", - timeout: int = 30, -) -> None: - """ - Execute the first N cells of a notebook and save the result. - - :param notebook_path: path to the source notebook to execute - :param dst_notebook_path: path where the executed notebook will be saved - :param num_cells: number of cells to execute from the beginning; if None, - execute all cells - :param kernel_name: name of the Jupyter kernel to use - :param timeout: execution timeout in seconds per cell - """ - import nbformat - from nbconvert.preprocessors import ExecutePreprocessor - - hdbg.dassert_path_exists(notebook_path) - # Read the notebook. - _LOG.info("Reading notebook '%s'", notebook_path) - with open(notebook_path) as f: - nb = nbformat.read(f, as_version=4) - # Truncate to first N cells if requested. - total_cells = len(nb.cells) - if num_cells is not None: - hdbg.dassert_lte(1, num_cells, "num_cells must be >= 1") - hdbg.dassert_lte( - num_cells, - total_cells, - "num_cells=%d exceeds total cells=%d in notebook", - num_cells, - total_cells, - ) - _LOG.info("Executing first %d of %d cells", num_cells, total_cells) - nb.cells = nb.cells[:num_cells] - else: - _LOG.info("Executing all %d cells", total_cells) - # Execute the cells. - ep = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name) - ep.preprocess(nb) - # Save the executed notebook. - _LOG.info("Saving executed notebook to '%s'", dst_notebook_path) - with open(dst_notebook_path, "w") as f: - nbformat.write(nb, f) - - -def build_run_notebook_cmd( - config_builder: str, - dst_dir: str, - notebook_path: str, - *, - extra_opts: str = "", -) -> str: - """ - Construct a command string to run dev_scripts/notebooks/run_notebook.py - with specified configurations. - - :param config_builder: the configuration builder to use for the - notebook execution - :param dst_dir: the destination directory where the notebook results - will be saved - :param notebook_path: the path to the notebook that should be - executed - :param extra_opts: options for "run_notebook.py", e.g., "-- - publish_notebook" - """ - # Importing inside func to avoid error while creating dockerized executable. - # TODO(Shaunak): debug why. - import helpers.hgit as hgit - - # TODO(Vlad): Factor out common code with the - # `helpers.lib_tasks_gh.publish_buildmeister_dashboard_to_s3()`. - run_notebook_script_path = hgit.find_file_in_git_tree("run_notebook.py") - cmd_run_txt = [ - run_notebook_script_path, - f"--notebook {notebook_path}", - f"--config_builder '{config_builder}'", - f"--dst_dir '{dst_dir}'", - f"{extra_opts}", - ] - cmd_run_txt = " ".join(cmd_run_txt) - return cmd_run_txt - - -# ############################################################################# - - -def find_paired_files( - directory: str, - *, - pattern: str = "*.py", - exclude_pattern: str = None, -) -> tuple: - """ - Find Python files and paired Jupyter notebooks in a directory. - - :param directory: path to the directory to search - :param pattern: glob pattern for Python files (default: "*.py") - :param exclude_pattern: suffix pattern to exclude (e.g., "_utils.py") - :return: tuple of (python_files, paired_notebooks, unpaired_notebooks) - - python_files: list of .py files matching pattern - - paired_notebooks: list of .ipynb files with corresponding .py - - unpaired_notebooks: list of .ipynb files without corresponding .py - """ - hdbg.dassert_path_exists(directory) - # Find Python files matching pattern. - py_files = hio.listdir( - directory, - pattern, - only_files=True, - use_relative_paths=False, - maxdepth=1, - ) - # Exclude files matching exclude_pattern. - if exclude_pattern: - py_files = [f for f in py_files if not f.endswith(exclude_pattern)] - py_files = sorted(py_files) - # Find notebook files. - nb_pattern = pattern.replace(".py", ".ipynb") - nb_files = hio.listdir( - directory, - nb_pattern, - only_files=True, - use_relative_paths=False, - maxdepth=1, - ) - nb_files = sorted(nb_files) - # Build set of base names from Python files. - py_basenames = set() - for py_file in py_files: - basename = os.path.basename(py_file) - basename = os.path.splitext(basename)[0] - py_basenames.add(basename) - # Check which notebooks have corresponding .py files. - paired_notebooks = [] - unpaired_notebooks = [] - for nb_file in nb_files: - basename = os.path.basename(nb_file) - basename = os.path.splitext(basename)[0] - if basename in py_basenames: - paired_notebooks.append(nb_file) - else: - unpaired_notebooks.append(nb_file) - return py_files, paired_notebooks, unpaired_notebooks - - -def execute_file_with_docker( - file_path: str, - *, - working_dir: str, - is_notebook: bool, -) -> Tuple[bool, str, float]: - """ - Execute a Python file or notebook using docker_cmd. - - :param file_path: path to the file to execute - :param working_dir: directory to cd into before execution - :param is_notebook: True if file is a notebook, False if Python script - :return: tuple of (success, error_message, elapsed_time) - """ - timer = htimer.Timer() - success = False - error_msg = "" - try: - if is_notebook: - # For notebooks, use hjupyter.run_notebook via docker_cmd. - scratch_dir = os.path.join(working_dir, "tmp.notebook_scratch") - # Build Python command to run notebook. - cmd = ( - f'python -c "' - f"import helpers.hjupyter as hjupyte; " - f"import helpers.hio as hio; " - f"hio.create_dir('{scratch_dir}', incremental=True); " - f"hjupyte.run_notebook('{file_path}', '{scratch_dir}')\"" - ) - else: - # For Python scripts, execute directly. - cmd = f"python {file_path}" - # Build invoke docker_cmd command. - docker_cmd = f'invoke docker_cmd --cmd "{cmd}"' - # Execute in the working directory. - hsystem.system( - docker_cmd, - abort_on_error=False, - suppress_output=False, - ) - success = True - except Exception as e: - error_msg = str(e) - elapsed = timer.get_elapsed() - return success, error_msg, elapsed - - -def execute_file_directly( - file_path: str, - *, - working_dir: str, - is_notebook: bool, -) -> Tuple[bool, str, float]: - """ - Execute a Python file or notebook directly (inside container). - - :param file_path: path to the file to execute - :param working_dir: directory to cd into before execution - :param is_notebook: True if file is a notebook, False if Python script - :return: tuple of (success, error_message, elapsed_time) - """ - timer = htimer.Timer() - success = False - error_msg = "" - try: - if is_notebook: - # For notebooks, use hjupyter.run_notebook. - scratch_dir = os.path.join(working_dir, "tmp.notebook_scratch") - hio.create_dir(scratch_dir, incremental=True) - run_notebook( - file_path, - scratch_dir, - pre_cmd=f"cd {working_dir}", - ) - else: - # For Python scripts, execute directly. - cmd = f"cd {working_dir} && python {file_path}" - hsystem.system( - cmd, - abort_on_error=True, - suppress_output=False, - ) - success = True - except Exception as e: - error_msg = str(e) - elapsed = timer.get_elapsed() - return success, error_msg, elapsed - - -def report_execution_results( - py_results: Dict[str, Tuple[bool, str, float]], - nb_results: Dict[str, Tuple[bool, str, float]], -) -> Tuple[int, str]: - """ - Report execution results and return failure information. - - :param py_results: results from Python file execution - :param nb_results: results from notebook execution - :return: tuple of (total_failures, error_message) - """ - # Collect failures. - py_failures = [f for f, (success, _, _) in py_results.items() if not success] - nb_failures = [f for f, (success, _, _) in nb_results.items() if not success] - # Calculate statistics. - py_total = len(py_results) - py_success = py_total - len(py_failures) - nb_total = len(nb_results) - nb_success = nb_total - len(nb_failures) - total_files = py_total + nb_total - total_success = py_success + nb_success - total_failures = len(py_failures) + len(nb_failures) - # Calculate timing statistics. - py_times = [elapsed for _, _, elapsed in py_results.values()] - nb_times = [elapsed for _, _, elapsed in nb_results.values()] - py_total_time = sum(py_times) if py_times else 0.0 - nb_total_time = sum(nb_times) if nb_times else 0.0 - total_time = py_total_time + nb_total_time - # Report summary. - _LOG.info("=" * 80) - _LOG.info("EXECUTION SUMMARY") - _LOG.info("=" * 80) - _LOG.info( - "Python scripts: %d total, %d success, %d failed", - py_total, - py_success, - len(py_failures), - ) - if py_total > 0: - _LOG.info(" Total time: %.2f seconds", py_total_time) - _LOG.info(" Average time: %.2f seconds", py_total_time / py_total) - _LOG.info( - "Notebooks: %d total, %d success, %d failed", - nb_total, - nb_success, - len(nb_failures), - ) - if nb_total > 0: - _LOG.info(" Total time: %.2f seconds", nb_total_time) - _LOG.info(" Average time: %.2f seconds", nb_total_time / nb_total) - _LOG.info("-" * 80) - _LOG.info( - "TOTAL: %d files, %d success, %d failed", - total_files, - total_success, - total_failures, - ) - _LOG.info("Total execution time: %.2f seconds", total_time) - # Build error message if failures exist. - error_message = "" - if total_failures > 0: - _LOG.error("=" * 80) - _LOG.error("FAILURES DETECTED") - _LOG.error("=" * 80) - if py_failures: - _LOG.error("Failed Python scripts:") - for file_path in py_failures: - basename = os.path.basename(file_path) - _, error, _ = py_results[file_path] - _LOG.error(" - %s: %s", basename, error) - if nb_failures: - _LOG.error("Failed notebooks:") - for file_path in nb_failures: - basename = os.path.basename(file_path) - _, error, _ = nb_results[file_path] - _LOG.error(" - %s: %s", basename, error) - _LOG.error("=" * 80) - error_message = ( - f"{total_failures} file(s) failed to execute. See log for details." - ) - return total_failures, error_message diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py deleted file mode 100644 index 5e0ec6214..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlatex.py +++ /dev/null @@ -1,334 +0,0 @@ -""" -Import as: - -import helpers.hlatex as hlatex -""" - -import logging -import re -from typing import List, Optional - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hmarkdown_headers as hmarhead -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Consider using `pypandoc` instead of calling `pandoc` directly. -# https://boisgera.github.io/pandoc - - -# TODO(gp): Add a switch to keep the tmp files or delete them. -def convert_pandoc_md_to_latex(txt: str) -> str: - """ - Run pandoc to convert a markdown file to a latex file. - """ - hdbg.dassert_isinstance(txt, str) - # Save to tmp file. - in_file_name = "./tmp.run_pandoc_in.md" - hio.to_file(in_file_name, txt) - # Run Pandoc. - out_file_name = "./tmp.run_pandoc_out.tex" - cmd = ( - f"pandoc {in_file_name} -o {out_file_name} --read=markdown --write=latex" - ) - container_type = "pandoc_only" - - # To minimze the dependency. - import dev_scripts_helpers.dockerize.lib_pandoc as dshdlipa - - dshdlipa.run_dockerized_pandoc(cmd, container_type) - # Read tmp file. - res = hio.from_file(out_file_name) - # Remove lines that contain \tightlist. - res = "\n".join( - [line for line in res.splitlines() if "\\tightlist" not in line] - ) - return res - - -def markdown_list_to_latex(markdown: str) -> str: - """ - Convert a Markdown list to LaTeX format. - - :param markdown: The Markdown text to convert - :return: The converted LaTeX text - """ - hdbg.dassert_isinstance(markdown, str) - markdown = hprint.dedent(markdown) - # Remove the first line if it's a title. - markdown_lines = markdown.split("\n") - m = re.match(r"^(\*+ )(.*)", markdown_lines[0]) - if m: - title = m.group(2) - markdown_lines = markdown_lines[1:] - else: - title = "" - markdown = "\n".join(markdown_lines) - # Convert. - txt = convert_pandoc_md_to_latex(markdown) - # Remove `\tightlist` and empty lines. - lines = txt.splitlines() - lines = [line for line in lines if "\\tightlist" not in line] - lines = [line for line in lines if line.strip() != ""] - txt = "\n".join(lines) - # Add the title frame. - if title: - txt = f"\\begin{{frame}}{{{title}}}" + "\n" + txt + "\n" + "\\end{frame}" - return txt - - -def remove_latex_formatting(latex_string: str) -> str: - r""" - Remove LaTeX formatting such as \textcolor{color}{content} and retains only - the content. - """ - cleaned_string = re.sub( - r"\\textcolor\{[^}]*\}\{([^}]*)\}", r"\1", latex_string - ) - return cleaned_string - - -def format_latex(txt: str) -> str: - """ - Format LaTeX text using `prettier`. - - :param txt: input LaTeX text to format - :return: formatted LaTeX text - """ - file_type = "tex" - # To minimize the dependency. - import dev_scripts_helpers.dockerize.lib_prettier as dshdlipr - - txt = dshdlipr.prettier_on_str(txt, file_type) - return txt - - -# ############################################################################# -# Frame Latex sections -# ############################################################################# - - -def _is_latex_line_separator(line: str, *, min_repeats: int = 5) -> bool: - """ - Check if the given line is a LaTeX comment separator. - - This function determines if a line consists of a comment character - `%` followed by repeated characters (`#`, `=`, `-`) that would - indicate a section separator. - - :param line: current line of text being processed - :param min_repeats: minimum number of times the characters have to - be repeated to be considered a separator - :return: whether the line is a separator - """ - separator_pattern = rf""" - ^\s*%\s* # % - ([#=\-])\1{{{min_repeats - 1},}} # Capture a character, then repeat it - # (`min_repeats` - 1) times. - \s*$ # Match only whitespace characters - # until the end of the line. - """ - res = bool(re.match(separator_pattern, line, re.VERBOSE)) - return res - - -def frame_sections(lines: List[str]) -> List[str]: - r""" - Add line separators before LaTeX section commands. - - This function adds comment separators before \section, \subsection, and - \subsubsection commands in LaTeX files. The separators are: - ``` - % #####... - \section - - % =====... - \subsection: - - % -----... - \subsubsection - ``` - - If a separator comment already exists immediately before the section command, - no separator is added. - - :param lines: list of strings representing the LaTeX file content - :return: list of strings with separators added before section commands - """ - hdbg.dassert_isinstance(lines, list) - # Loop 1: Remove existing latex separators. - txt_tmp: List[str] = [] - for line in lines: - if not _is_latex_line_separator(line): - txt_tmp.append(line) - # Loop 2: Remove consecutive empty lines, leaving only one. - txt_tmp2: List[str] = [] - prev_was_empty = False - for line in txt_tmp: - is_empty = line.strip() == "" - if is_empty: - if not prev_was_empty: - txt_tmp2.append(line) - prev_was_empty = True - else: - txt_tmp2.append(line) - prev_was_empty = False - # Loop 3: Add correct LaTeX separator based on section commands. - txt_new: List[str] = [] - # Define the section patterns and their corresponding separators. - # Total line length is 80 characters, "% " is 2 characters, so 78 separator chars. - prefix = "% " - section_patterns = [ - (r"^\\section\{", prefix + "#" * 78), - (r"^\\subsection\{", prefix + "=" * 78), - (r"^\\subsubsection\{", prefix + "-" * 78), - ] - for i, line in enumerate(txt_tmp2): - _LOG.debug("line=%d:%s", i, line) - txt_processed = False - # Check if the line matches any section command. - for pattern, separator in section_patterns: - m = re.match(pattern, line.strip()) - if m: - _LOG.debug(" -> Found section command") - txt_new.append(separator) - _LOG.debug(" -> Added separator: %s", separator) - txt_new.append(line) - txt_processed = True - break - if not txt_processed: - txt_new.append(line) - hdbg.dassert_isinstance(txt_new, list) - return txt_new - - -# ############################################################################# -# LaTeX Header Extraction -# ############################################################################# - - -def _is_latex_comment(line: str) -> bool: - r""" - Check if a line is a LaTeX comment. - - A LaTeX comment line starts with the `%` character. This function - handles the edge case where `%` is escaped (e.g., `\%`), which - should not be treated as a comment. - - :param line: line of text to check - :return: True if the line is a comment, False otherwise - """ - hdbg.dassert_isinstance(line, str) - # Strip leading whitespace to check the first non-whitespace character. - stripped_line = line.lstrip() - # Check if line starts with %. - if not stripped_line.startswith("%"): - return False - # Check if the % is escaped by looking at the character before it in the - # original line. - # Find the position of % in the original line. - percent_pos = line.find("%") - # If there's a character before %, check if it's a backslash. - if percent_pos > 0 and line[percent_pos - 1] == "\\": - # Check if the backslash itself is escaped. - if percent_pos > 1 and line[percent_pos - 2] == "\\": - # Double backslash before %, so % is not escaped. - return True - # Single backslash before %, so % is escaped. - return False - # % is at the beginning or has no backslash before it. - return True - - -def _extract_latex_section( - line: str, line_number: int -) -> Optional[hmarhead.HeaderInfo]: - r""" - Parse a LaTeX section command and extract section information. - - This function identifies LaTeX section commands (\section{}, \subsection{}, - \subsubsection{}) and extracts the section title. It handles several edge - cases including: - - Regex parsing of `\section[Short]{Long Title}` (extracts "Long Title") - - Handles nested braces within titles (e.g., `\section{Intro to \textbf{ML}}`) - - Does not handle multi-line section titles - - :param line: line of text to parse - :param line_number: line number in the original file - :return: HeaderInfo object if section found, None otherwise - """ - hdbg.dassert_isinstance(line, str) - hdbg.dassert_isinstance(line_number, int) - # Define section patterns with their corresponding levels. - # Pattern supports optional [short title] before {long title}. - regex = r"(?:\[.*?\])?\{(.*)\}" - section_patterns = [ - (r"\\section" + regex, 1), - (r"\\subsection" + regex, 2), - (r"\\subsubsection" + regex, 3), - ] - line_stripped = line.strip() - # Try to match each section pattern. - for pattern, level in section_patterns: - # Check if line starts with the section command. - match = re.match(pattern, line_stripped) - if match: - # Extract the title from the first capture group. - title = match.group(1) - # Skip sections with empty titles. - if not title: - return None - # Return HeaderInfo with level, title, and line number. - return hmarhead.HeaderInfo(level, title, line_number) - # No section command found. - return None - - -def extract_headers_from_latex( - lines: List[str], max_level: int, *, sanity_check: bool = True -) -> hmarhead.HeaderList: - r""" - Extract headers from a LaTeX file and return a HeaderList. - - This function processes a LaTeX file line by line, identifies section - commands (\section, \subsection, \subsubsection), and creates a list - of HeaderInfo objects. It skips commented-out lines (lines starting - with %) and only includes headers up to the specified maximum level. - - :param lines: content of the input LaTeX file as list of strings - :param max_level: maximum header levels to parse (e.g., '3' parses - \section, \subsection, and \subsubsection, but not deeper levels) - :param sanity_check: whether to check that the header list is valid - using the same validation as Markdown headers - :return: list of HeaderInfo objects, each containing (level, title, - line_number), e.g.: - ``` - [ - HeaderInfo(1, "Introduction", 5), - HeaderInfo(2, "Background", 10), - ... - ] - ``` - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_lte(1, max_level) - header_list: hmarhead.HeaderList = [] - # Process the input file to extract headers. - for line_number, line in enumerate(lines, start=1): - # Skip LaTeX comment lines. - if _is_latex_comment(line): - continue - # Check if this line contains a section command. - header_info = _extract_latex_section(line, line_number) - if header_info and header_info.level <= max_level: - # Add HeaderInfo to list. - header_list.append(header_info) - # Check the header list. - if sanity_check: - hmarhead.sanity_check_header_list(header_list) - else: - _LOG.debug("Skipping sanity check") - hdbg.dassert_isinstance(header_list, list) - return header_list diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py deleted file mode 100644 index 8f857d385..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlint.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Linting utilities for text and code files. - -Import as: - -import helpers.hlint as hlint -""" - -import logging - -import helpers.hgit as hgit -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def lint_file(file_path: str) -> None: - """ - Run lint_txt.py on the file to ensure proper formatting. - - :param file_path: path to the file to lint - """ - _LOG.info("Linting file: %s", file_path) - lint_script = hgit.find_file_in_git_tree("lint_txt.py", super_module=True) - # Run lint_txt.py. - cmd = f"{lint_script} -i {file_path} -v CRITICAL" - _LOG.debug("Running command: %s", cmd) - hsystem.system(cmd, suppress_output=True) - _LOG.info("File linted successfully: %s", file_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py deleted file mode 100644 index c13ed1255..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlist.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Import as: - -import helpers.hlist as hlist -""" - -from typing import Any, List, Optional, Set - -import helpers.hdbg as hdbg - - -# TODO(gp): -> return_single_element, return_single_element_or_assert? -def assert_single_element_and_return(list_: List[Any]) -> Any: - """ - Assert that the passed list has a single element and return that single - element. - - :return: return the unique element in the list - """ - hdbg.dassert_isinstance(list_, list) - hdbg.dassert_eq(len(list_), 1, "List has %d elements!", len(list_)) - return list_[0] - - -def find_duplicates(list_: List[Any]) -> List[Any]: - """ - Find the elements duplicated in a list. - """ - hdbg.dassert_isinstance(list_, list) - # Count the occurrences of each element of the seq. - set_l = set(list_) - v_to_num = [(v, list_.count(v)) for v in set_l] - # Build list of elems with duplicates. - res = [v for v, n in v_to_num if n > 1] - return res - - -def remove_duplicates(list_: List[Any]) -> List[Any]: - """ - Remove the elements duplicated in a list, without changing the order. - """ - hdbg.dassert_isinstance(list_, list) - list_out = [] - set_l: Set[Any] = set() - for v in list_: - if v not in set_l: - set_l.add(v) - list_out.append(v) - return list_out - - -def extract( - list_: List[Any], start_idx: Optional[int], end_idx: Optional[int] -) -> List[Any]: - """ - Filter the list using [start_idx, end_idx). - """ - if start_idx is not None: - hdbg.dassert_lte(0, start_idx) - else: - start_idx = 0 - if end_idx is not None: - hdbg.dassert_lte(end_idx, len(list_)) - else: - end_idx = len(list_) - if list_: - hdbg.dassert_lt(start_idx, end_idx) - list_ = list_[start_idx:end_idx] - return list_ - - -def chunk(list_: List[Any], n: int) -> List[Any]: - hdbg.dassert_lte(1, n) - hdbg.dassert_lte(n, len(list_)) - k, m = divmod(len(list_), n) - return [ - list_[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n) - ] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py deleted file mode 100644 index f821d4f76..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm.py +++ /dev/null @@ -1,680 +0,0 @@ -""" -Import as: - -import helpers.hllm as hllm -""" - -import functools -import logging -import os -import re -from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union - -import openai -import tqdm -from pydantic import BaseModel - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg -import helpers.hllm_cost as hllmcost -import helpers.hprint as hprint -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - - -# Create a generic type variable. -T = TypeVar("T", bound=BaseModel) - -# ############################################################################# -# Update LLM cache -# ############################################################################# - - -_UPDATE_LLM_CACHE = False - - -def set_update_llm_cache(update: bool) -> None: - """ - Set whether to update the LLM cache. - - :param update: True to update the cache, False otherwise - """ - global _UPDATE_LLM_CACHE - _UPDATE_LLM_CACHE = update - - -def get_update_llm_cache() -> bool: - """ - Get whether to update the LLM cache. - - :return: True if the cache should be updated, False otherwise - """ - return _UPDATE_LLM_CACHE - - -# ############################################################################# -# Utility Functions -# ############################################################################# - - -def _get_llm_provider_and_model(model: str) -> Tuple[str, str]: - """ - Get the provider and model names from a model string. - - The model can be specified as: - - "gpt-4o-mini" - - "openai/gpt-4o-mini" - - "deepseek/deepseek-r1-0528-qwen3-8b:free/" - - :param model: model to use for the completion - :return: tuple of provider name and model name - """ - if "/" in model: - if model.startswith("openai/"): - provider_name = "openai" - model = model.split("/")[1] - else: - provider_name = "openrouter" - else: - provider_name = "openai" - hdbg.dassert_in( - provider_name, - ("openai", "openrouter"), - "Unknown provider: %s", - provider_name, - ) - return provider_name, model - - -def response_to_txt(response: Any) -> str: - """ - Convert an OpenAI API response to a text string. - - :param response: API response object - :return: extracted text contents as a string - """ - if isinstance(response, openai.types.chat.chat_completion.ChatCompletion): - ret = response.choices[0].message.content - elif isinstance(response, openai.types.responses.Response): - ret = response.output_text - # elif isinstance(response, openai.pagination.SyncCursorPage): - # ret = response.data[0].content[0].text.value - elif isinstance(response, openai.types.beta.threads.message.Message): - ret = response.content[0].text.value - elif isinstance(response, str): - ret = response - elif isinstance(response, dict): - # Handle Chat Completions dict form. - if "choices" in response and "message" in response["choices"][0]: - ret = response["choices"][0]["message"]["content"] - # Handle Responses API dict form. - elif "output_text" in response: - ret = response["output_text"] - else: - raise ValueError( - f"Unknown dict structure in response: {response.keys()}" - ) - else: - raise ValueError(f"Unknown response type: {type(response)}") - hdbg.dassert_isinstance(ret, str) - return ret - - -def build_chat_completion_messages( - system_prompt: str, - user_prompt: str, - *, - images_as_base64: Optional[Tuple[str, ...]] = None, -) -> List[Dict[str, Any]]: - """ - Construct the standard messages payload for the Chat Completions API. - - :param system_prompt: system prompt - :param user_prompt: user prompt - :param images_as_base64: base64-encoded images - :return: messages in the format expected by the Chat Completions API - """ - hdbg.dassert_isinstance(system_prompt, str) - hdbg.dassert_isinstance(user_prompt, str) - ret = [{"role": "system", "content": system_prompt}] - # Build user message content. - if images_as_base64: - # Multi-modal message with text and images - user_content = [{"type": "text", "text": user_prompt}] - for image_b64 in images_as_base64: - user_content.append( - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}, - } - ) - ret.append({"role": "user", "content": user_content}) - else: - # Text-only message. - ret.append({"role": "user", "content": user_prompt}) - return ret - - -def build_responses_input( - user_prompt: str, - *, - images_as_base64: Optional[Tuple[str, ...]] = None, -) -> List[Dict[str, Any]]: - """ - Construct the user input payload for the Responses API. - - :param user_prompt: user prompt - :param images_as_base64: base64-encoded images - :return: input in the format expected by the Responses API - """ - hdbg.dassert_isinstance(user_prompt, str) - # Build user message content. - content_blocks = [{"type": "input_text", "text": user_prompt}] - if images_as_base64: - # Add image input. - for image_b64 in images_as_base64: - content_blocks.append( - { - "type": "input_image", - "image_url": f"data:image/jpeg;base64,{image_b64}", - } - ) - responses_input = [ - { - "role": "user", - "content": content_blocks, - } - ] - return responses_input - - -# ############################################################################# - - -@hcacsimp.simple_cache( - write_through=True, exclude_keys=["client", "cache_mode", "cost_tracker"] -) -def _call_api_sync( - # pylint: disable=unused-argument - # This is needed to support caching. - cache_mode: str, - client: openai.OpenAI, - user_prompt: str, - system_prompt: str, - temperature: float, - model: str, - *, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional[hllmcost.LLMCostTracker] = None, - use_responses_api: bool = False, - **create_kwargs, -) -> Dict[Any, Any]: - """ - Make a non-streaming API call. - - See `get_completion()` for other parameter descriptions. - - :param client: LLM client - :param cost_tracker: LLMCostTracker instance to track costs - :param use_responses_api: whether to use the Responses API instead - of Chat Completions - :return: OpenAI API result as a dictionary - """ - if not use_responses_api: - messages = build_chat_completion_messages( - system_prompt, user_prompt, images_as_base64=images_as_base64 - ) - completion = client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - **create_kwargs, - ) - else: - user_input = build_responses_input( - user_prompt, images_as_base64=images_as_base64 - ) - completion = client.responses.create( - model=model, - instructions=system_prompt, - input=user_input, - temperature=temperature, - **create_kwargs, - ) - completion_obj = completion.to_dict() - if isinstance(completion, openai.types.responses.Response): - # Store the output of the Responses API. - completion_obj["output_text"] = completion.output_text - if cost_tracker is not None: - # Calculate the cost of the completion. - hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) - cost = cost_tracker.calculate_cost(completion, model) - cost_tracker.accumulate_cost(cost) - # Store the cost in the completion object. - completion_obj["cost"] = cost - return completion_obj - - -@hcacsimp.simple_cache( - cache_type="pickle", - write_through=True, - exclude_keys=["client", "cache_mode", "cost_tracker"], -) -def _call_structured_api_sync( - # pylint: disable=unused-argument - # This is needed to support caching. - cache_mode: str, - client: openai.OpenAI, - model: str, - user_prompt: str, - system_prompt: str, - temperature: float, - response_format: type[T], - *, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional[hllmcost.LLMCostTracker] = None, - print_cost: bool = False, - **create_kwargs, -) -> T: - """ - Make a non-streaming structured API call. - - See `get_structured_completion()` for parameter descriptions. - - :param client: LLM client - :param response_format: expected structured output format - :return: parsed output as the specified Pydantic model - """ - user_input = build_responses_input( - user_prompt, images_as_base64=images_as_base64 - ) - response = client.responses.parse( - model=model, - instructions=system_prompt, - input=user_input, - temperature=temperature, - text_format=response_format, - **create_kwargs, - ) - # Extract the parsed output. - parsed_output: T = response.output_parsed - # Track costs. - if cost_tracker is not None: - hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) - cost = cost_tracker.calculate_cost(response) - cost_tracker.accumulate_cost(cost) - if print_cost: - _LOG.info("cost=%.6f", cost) - return parsed_output - - -# ############################################################################# -# LLMClient -# ############################################################################# - - -class LLMClient: - """ - Class to handle LLM API client creation and requests. - """ - - def __init__( - self, - model: str, - ) -> None: - """ - Initialize the LLMClient. - - The model can be specified as: - - "gpt-4o-mini" - - "openai/gpt-4o-mini" - - "deepseek/deepseek-r1-0528-qwen3-8b:free/" - - :param model: model to use for the completion. - """ - hdbg.dassert_isinstance(model, str) - if model == "": - provider_name, model = self.get_default_model() - else: - provider_name, model = _get_llm_provider_and_model(model) - - self.provider_name = provider_name - self.model = model - self.client = None - - def get_default_model(self) -> Tuple[str, str]: - """ - Get the default provider and model for the client. - - :return: default provider and model used in the client - """ - provider_name = "openai" - model = self._get_default_model(provider_name) - return provider_name, model - - def create_client(self) -> None: - """ - Create an LLM client. - """ - if self.provider_name == "openai": - base_url = "https://api.openai.com/v1" - api_key = os.environ.get("OPENAI_API_KEY") - elif self.provider_name == "openrouter": - base_url = "https://openrouter.ai/api/v1" - api_key = os.environ.get("OPENROUTER_API_KEY") - else: - raise ValueError(f"Unknown provider: {self.provider_name}") - _LOG.debug(hprint.to_str("self.provider_name base_url")) - client = openai.OpenAI(base_url=base_url, api_key=api_key) - self.client = client - - def call_llm( - self, - cache_mode: str, - user_prompt: str, - system_prompt: str, - temperature: float, - *, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional[hllmcost.LLMCostTracker] = None, - use_responses_api: bool = False, - **create_kwargs, - ) -> Dict[Any, Any]: - """ - Call the LLM API. - - Check `_call_api_sync()` params for more details. - """ - return _call_api_sync( - cache_mode=cache_mode, - client=self.client, - user_prompt=user_prompt, - system_prompt=system_prompt, - temperature=temperature, - model=self.model, - images_as_base64=images_as_base64, - cost_tracker=cost_tracker, - use_responses_api=use_responses_api, - **create_kwargs, - ) - - def _get_default_model(self, provider_name: str) -> str: - """ - Get the default model for a provider. - - :return: default model for the provider - """ - if provider_name == "openai": - model = "gpt-4o" - elif provider_name == "openrouter": - model = "openai/gpt-4o" - else: - raise ValueError(f"Unknown provider: {self.provider_name}") - return model - - -# ############################################################################# - - -@functools.lru_cache(maxsize=1024) -def get_completion( - user_prompt: str, - *, - system_prompt: str = "", - model: str = "", - report_progress: bool = False, - print_cost: bool = False, - cache_mode: str = "DISABLE_CACHE", - temperature: float = 0.1, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional["hllmcost.LLMCostTracker"] = None, - use_responses_api: bool = False, - return_raw: bool = False, - **create_kwargs, -) -> Union[str, Dict[Any, Any]]: - """ - Generate a completion using OpenAI's API. - - :param user_prompt: user input message - :param system_prompt: system instruction - :param model: model to use or empty string to use the default model - :param report_progress: whether to report progress running the API - call - :param cache_mode: - - "DISABLE_CACHE": No caching - - "REFRESH_CACHE": Make API calls and save responses to cache - - "HIT_CACHE_OR_ABORT": Use cached responses, fail if not in cache - - "NORMAL": Use cached responses if available, otherwise make API call - :param cache_file: file to save/load completion cache - :param temperature: adjust an LLM's sampling diversity: lower values make it - more deterministic, while higher values foster creative variation. - 0 < temperature <= 2, 0.1 is default value in OpenAI models. - :param images_as_base64: base64-encoded images to include in the user message - :param cost_tracker: LLMCostTracker instance to track costs - :param use_responses_api: whether to use the Responses API instead of Chat - Completions - :param return_raw: whether to return the raw API response instead of - extracting the text content - :param create_kwargs: additional params for the API call - :return: API response or its text content - """ - hdbg.dassert_in( - cache_mode, - ("DISABLE_CACHE", "REFRESH_CACHE", "HIT_CACHE_OR_ABORT", "NORMAL"), - ) - update_llm_cache = get_update_llm_cache() - if update_llm_cache: - cache_mode = "REFRESH_CACHE" - # Initialize LLM client. - # Skip client creation for HIT_CACHE_OR_ABORT mode since: - # - If cache hits, we never use the client - # - If cache misses, we abort before calling the function - llm_client = LLMClient(model=model) - if cache_mode != "HIT_CACHE_OR_ABORT": - llm_client.create_client() - if use_responses_api and llm_client.provider_name != "openai": - raise ValueError( - "Responses API is only supported for the 'openai' provider." - ) - if report_progress and return_raw: - raise ValueError( - "Streaming mode is only supported while returning text content." - ) - if report_progress and cache_mode == "HIT_CACHE_OR_ABORT": - raise ValueError( - "Streaming mode (report_progress=True) is not supported with " - "cache_mode='HIT_CACHE_OR_ABORT'." - ) - # Construct messages in OpenAI API request format. - _LOG.info("LLM API call ... ") - memento = htimer.dtimer_start(logging.DEBUG, "LLM API call") - if not report_progress: - completion = llm_client.call_llm( - cache_mode=cache_mode, - user_prompt=user_prompt, - system_prompt=system_prompt, - temperature=temperature, - images_as_base64=images_as_base64, - cost_tracker=cost_tracker, - use_responses_api=use_responses_api, - **create_kwargs, - ) - if not use_responses_api: - txt_response = completion["choices"][0]["message"]["content"] - else: - txt_response = completion["output_text"] - else: - # TODO(gp): This is not working. It doesn't show the progress and it - # doesn't show the cost. - # Stream the output to show progress. - collected_messages = [] - if not use_responses_api: - # Stream Chat Completions API. - messages = build_chat_completion_messages( - system_prompt, user_prompt, images_as_base64=images_as_base64 - ) - completion = llm_client.client.chat.completions.create( - model=model, - messages=messages, - stream=True, - **create_kwargs, - ) - for chunk in tqdm.tqdm( - completion, desc="Generating completion", unit=" chunks" - ): - if chunk.choices[0].delta.content is not None: - collected_messages.append(chunk.choices[0].delta.content) - else: - # Stream Responses API. - user_input = build_responses_input( - user_prompt, images_as_base64=images_as_base64 - ) - completion = llm_client.client.responses.create( - model=model, - instructions=system_prompt, - input=user_input, - stream=True, - **create_kwargs, - ) - for event in tqdm.tqdm( - completion, desc="Generating response", unit=" events" - ): - if event.type == "response.output_text.delta": - collected_messages.append(event.delta.value) - txt_response = "".join(collected_messages) - # Report the time taken. - msg, _ = htimer.dtimer_stop(memento) - _LOG.info(msg) - if print_cost and "cost" in completion: - _LOG.info("cost=%.6f", completion["cost"]) - if return_raw: - # Return the full completion/response object. - return completion - return txt_response - - -@functools.lru_cache(maxsize=1024) -def get_structured_completion( - user_prompt: str, - response_format: type[T], - *, - system_prompt: str = "", - model: str = "", - cache_mode: str = "DISABLE_CACHE", - temperature: float = 0.1, - images_as_base64: Optional[Tuple[str, ...]] = None, - cost_tracker: Optional[hllmcost.LLMCostTracker] = None, - print_cost: bool = False, - **create_kwargs, -) -> T: - """ - Generate a Structured Output using OpenAI's API. - - See `get_completion()` for other parameter descriptions. - - :param response_format: expected structured output format - :param cache_mode: - - "DISABLE_CACHE": No caching - - "REFRESH_CACHE": Make API calls and save responses to cache - - "HIT_CACHE_OR_ABORT": Use cached responses, fail if not in cache - - "NORMAL": Use cached responses if available, otherwise make API call - :return: output parsed into the specified format - """ - hdbg.dassert_in( - cache_mode, - ("DISABLE_CACHE", "REFRESH_CACHE", "HIT_CACHE_OR_ABORT", "NORMAL"), - ) - update_llm_cache = get_update_llm_cache() - if update_llm_cache: - cache_mode = "REFRESH_CACHE" - # Initialize LLM client. - # Skip client creation for HIT_CACHE_OR_ABORT mode since: - # - If cache hits, we never use the client - # - If cache misses, we abort before calling the function - if cache_mode == "HIT_CACHE_OR_ABORT": - # Don't create the client; pass None since it won't be used. - llm_client = LLMClient(model=model) - client = None - model_to_use = llm_client.model - else: - llm_client = LLMClient(model=model) - llm_client.create_client() - if llm_client.provider_name != "openai": - raise ValueError( - "`get_structured_completion()` currently only supports the " - "'openai' provider (Responses API + Structured Outputs). " - f"Got provider_name='{llm_client.provider_name}'." - ) - client = llm_client.client - model_to_use = llm_client.model - # Retrieve a structured response. - parsed_output: T = _call_structured_api_sync( - cache_mode=cache_mode, - client=client, - model=model_to_use, - user_prompt=user_prompt, - system_prompt=system_prompt, - temperature=temperature, - response_format=response_format, - images_as_base64=images_as_base64, - cost_tracker=cost_tracker, - print_cost=print_cost, - **create_kwargs, - ) - return parsed_output - - -# ############################################################################# - - -def apply_prompt_to_dataframe( - df, - prompt, - model: str, - input_col, - response_col, - *, - chunk_size=50, - allow_overwrite: bool = False, -): - _LOG.debug(hprint.to_str("prompt model input_col response_col chunk_size")) - hdbg.dassert_in(input_col, df.columns) - if not allow_overwrite: - hdbg.dassert_not_in(response_col, df.columns) - response_data = [] - for start in tqdm.tqdm( - range(0, len(df), chunk_size), desc="Processing chunks" - ): - end = start + chunk_size - chunk = df.iloc[start:end] - _LOG.debug("chunk.size=%s", chunk.shape[0]) - data = chunk[input_col].astype(str).tolist() - data = [f"{i + 1}: {val}" for i, val in enumerate(data)] - user = "\n".join(data) - _LOG.debug("user=\n%s", user) - try: - response = get_completion(user, system_prompt=prompt, model=model) - except Exception as e: - _LOG.error( - f"Error processing column {input} in chunk {start}-{end}: {e}" - ) - raise e - # processed_response = response.split("\n") - processed_response = [ - ln.rstrip() for ln in response.splitlines() if ln.strip() - ] - _LOG.debug(hprint.to_str("processed_response")) - _LOG.debug("len(processed_response)=%s", len(processed_response)) - hdbg.dassert_eq(len(processed_response), chunk.shape[0]) - for i in range(len(processed_response)): - m = re.match(r"\d+: (.*)\s*", processed_response[i]) - hdbg.dassert(m, f"Invalid response: {processed_response[i]}") - # The linter doesn't understand that `dassert` is equivalent to an - # `assert`. - assert m is not None - processed_response[i] = m.group(1).rstrip().lstrip() - _LOG.debug(hprint.to_str("processed_response")) - response_data.extend(processed_response) - df[response_col] = response_data - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py deleted file mode 100644 index bc42d6816..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cli.py +++ /dev/null @@ -1,840 +0,0 @@ -""" -Import as: - -import helpers.hllm_cli as hllmcli -""" - -import json -import logging -import shlex -import subprocess -import sys -import importlib -import pprint -import time -from typing import Callable, Dict, List, Optional, Tuple, Union - -try: - import llm - import tokencost - - _LLM_AVAILABLE = True -except ImportError: - _LLM_AVAILABLE = False - -import pandas as pd -from tqdm import tqdm - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hmodule as hmodule -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -# _LOG.trace = lambda *args, **kwargs: None -_LOG.trace = _LOG.debug - - -def install_needed_modules( - *, use_sudo: bool = True, venv_path: Optional[str] = None -) -> None: - """ - Install needed modules for LLM CLI. - - :param use_sudo: whether to use sudo to install the module - :param venv_path: path to the virtual environment - E.g., /Users/saggese/src/venv/client_venv.helpers - """ - hmodule.install_module_if_not_present( - "llm", - package_name="llm", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - hmodule.install_module_if_not_present( - "tokencost", - package_name="tokencost", - use_sudo=use_sudo, - use_activate=True, - venv_path=venv_path, - ) - # Reload this module if already imported. - this_module_name = __name__ - if this_module_name in sys.modules: - importlib.reload(sys.modules[this_module_name]) - - -def shutup_llm_logging() -> None: - """ - Shut up OpenAI logging. - """ - # OpenAI client logging. - logging.getLogger("openai").setLevel(logging.WARNING) - # Common HTTP logging sources - logging.getLogger("httpx").setLevel(logging.WARNING) - logging.getLogger("httpcore").setLevel(logging.WARNING) - logging.getLogger("urllib3").setLevel(logging.WARNING) - - -# ############################################################################# -# Helper functions -# ############################################################################# - - -def _check_llm_executable() -> bool: - """ - Check if the llm command-line executable is available. - - :return: True if llm executable exists, False otherwise - """ - try: - hsystem.system("which llm", suppress_output=True) - _LOG.debug("llm command found") - return True - except Exception: - _LOG.debug("llm command not found") - return False - - -def _apply_llm_via_executable( - input_str: str, - *, - system_prompt: Optional[str] = None, - model: Optional[str] = None, - expected_num_chars: Optional[int] = None, -) -> Tuple[str, float]: - """ - Apply LLM using the llm CLI executable. - - :param input_str: the input text to process - :param system_prompt: optional system prompt to use - :param model: optional model name to use - :param expected_num_chars: optional expected number of characters in - output (used for progress bar) - :return: tuple of (LLM response as string, cost in dollars) - """ - # Build command. - cmd = ["llm"] - if system_prompt: - cmd.extend(["--system", system_prompt]) - if model: - cmd.extend(["--model", model]) - # Add the user prompt. - cmd.append(input_str) - _LOG.debug("Running command: %s", " ".join(cmd)) - # Execute command. - if expected_num_chars: - # Use streaming with progress bar. - proc = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - response_parts = [] - with tqdm(total=expected_num_chars, unit="char") as pbar: - for line in proc.stdout: - response_parts.append(line) - pbar.update(len(line)) - # Wait for process to complete. - proc.wait() - if proc.returncode != 0: - error_msg = proc.stderr.read() if proc.stderr else "" - hdbg.dfatal( - f"llm command failed with return code: {proc.returncode} error: {error_msg}" - ) - response = "".join(response_parts) - else: - # Run without progress bar. - cmd_str = " ".join(shlex.quote(arg) for arg in cmd) - _, response = hsystem.system_to_string(cmd_str) - # Cost calculation not available when using executable. - cost = 0.0 - _LOG.debug("Cost calculation not available when using llm executable") - return response, cost - - -def _calculate_cost_from_usage( - usage: object, - model: str, -) -> float: - """ - Calculate LLM cost from usage object. - - :param usage: usage object from LLM result containing input/output token counts - :param model: model name for cost calculation - :return: total cost in dollars - """ - input_tokens = usage.input - output_tokens = usage.output - prompt_cost = tokencost.calculate_cost_by_tokens( - num_tokens=input_tokens, model=model, token_type="input" - ) - completion_cost = tokencost.calculate_cost_by_tokens( - num_tokens=output_tokens, model=model, token_type="output" - ) - cost = float(prompt_cost + completion_cost) - return cost - - -def _apply_llm_via_library( - input_str: str, - *, - system_prompt: Optional[str] = None, - model: Optional[str] = None, - expected_num_chars: Optional[int] = None, -) -> Tuple[str, float]: - """ - Apply LLM using the llm Python library. - - :param input_str: the input text to process - :param system_prompt: optional system prompt to use - :param model: optional model name to use - :param expected_num_chars: optional expected number of characters in - output (used for progress bar) - :return: tuple of (LLM response as string, cost in dollars) - """ - # Get the model. - if model: - llm_model = llm.get_model(model) - else: - llm_model = llm.get_model() - _LOG.debug("Using model: %s", llm_model.model_id) - # Execute with or without progress bar. - if expected_num_chars: - # Use streaming with progress bar. - response_parts = [] - with tqdm(total=expected_num_chars, unit="char") as pbar: - for chunk in llm_model.prompt( - input_str, system=system_prompt, stream=True - ): - chunk_str = str(chunk) - response_parts.append(chunk_str) - pbar.update(len(chunk_str)) - response = "".join(response_parts) - # Streaming doesn't provide usage info, so we can't calculate cost. - cost = 0.0 - _LOG.debug("Cost calculation not available for streaming mode") - else: - # Run without progress bar. - _LOG.trace("system_prompt=\n%s", system_prompt) - _LOG.trace("input_str=\n%s", input_str) - result = llm_model.prompt(input_str, system=system_prompt) - response = result.text() - _LOG.trace("response=\n%s", response) - # Calculate cost. - usage = result.usage() - cost = _calculate_cost_from_usage( - usage=usage, - model=llm_model.model_id, - ) - _LOG.debug( - "Cost: $%.6f (input: %d tokens, output: %d tokens)", - cost, - usage.input, - usage.output, - ) - return response, cost - - -# ############################################################################# -# Main functions -# ############################################################################# - - -@hcacsimp.simple_cache(cache_type="json", write_through=True) -def apply_llm( - input_str: str, - *, - system_prompt: Optional[str] = None, - model: Optional[str] = None, - use_llm_executable: bool = False, - expected_num_chars: Optional[int] = None, -) -> Tuple[str, float]: - """ - Apply an LLM to process input text using either CLI executable or library. - - This function provides a unified interface to call LLMs either through the - llm command-line executable or through the llm Python library. It supports - optional system prompts, model selection, and progress bars for long outputs. - - :param input_str: the input text to process with the LLM - :param system_prompt: optional system prompt to guide the LLM's behavior - :param model: optional model name to use (e.g., "gpt-4", "claude-3-opus") - :param use_llm_executable: if True, use the llm CLI executable; if False, - use the llm Python library - :param expected_num_chars: optional expected number of characters in - output; if provided, displays a progress bar during generation - :return: tuple of (LLM response as string, cost in dollars) - """ - hdbg.dassert_isinstance(input_str, str) - hdbg.dassert_ne(input_str, "", "Input string cannot be empty") - if system_prompt is not None: - hdbg.dassert_isinstance(system_prompt, str) - if model is not None: - hdbg.dassert_isinstance(model, str) - hdbg.dassert_ne(model, "", "Model cannot be empty string") - if expected_num_chars is not None: - hdbg.dassert_isinstance(expected_num_chars, int) - hdbg.dassert_lt(0, expected_num_chars) - _LOG.debug("Applying LLM to input text") - _LOG.debug("use_llm_executable=%s", use_llm_executable) - # Route to appropriate implementation. - if use_llm_executable: - # Check that llm executable exists. - hdbg.dassert( - _check_llm_executable(), - "llm executable not found. Install it using: pip install llm", - ) - response, cost = _apply_llm_via_executable( - input_str, - system_prompt=system_prompt, - model=model, - expected_num_chars=expected_num_chars, - ) - else: - response, cost = _apply_llm_via_library( - input_str, - system_prompt=system_prompt, - model=model, - expected_num_chars=expected_num_chars, - ) - _LOG.debug("LLM processing completed") - return response, cost - - -def apply_llm_with_files( - input_file: str, - output_file: str, - *, - system_prompt: Optional[str] = None, - model: Optional[str] = None, - use_llm_executable: bool = False, - expected_num_chars: Optional[int] = None, -) -> float: - """ - Apply an LLM to process text from an input file and save to output file. - - This is a convenience wrapper around apply_llm() that handles reading from - and writing to files. It reads the input file, processes the content using - the LLM, and writes the result to the output file. - - :param input_file: path to the input file containing text to process - :param output_file: path to the output file where result will be saved - :param system_prompt: optional system prompt to guide the LLM's behavior - :param model: optional model name to use (e.g., "gpt-4", "claude-3-opus") - :param use_llm_executable: if True, use the llm CLI executable; if False, - use the llm Python library - :param expected_num_chars: optional expected number of characters in - output; if provided, displays a progress bar during generation - :return: cost in dollars - """ - hdbg.dassert_isinstance(input_file, str) - hdbg.dassert_ne(input_file, "", "Input file cannot be empty") - hdbg.dassert_isinstance(output_file, str) - hdbg.dassert_ne(output_file, "", "Output file cannot be empty") - _LOG.debug("Reading input from file: %s", input_file) - # Read input file. - input_str = hio.from_file(input_file) - _LOG.debug("Read %d characters from input file", len(input_str)) - # Process with LLM. - response, cost = apply_llm( - input_str, - system_prompt=system_prompt, - model=model, - use_llm_executable=use_llm_executable, - expected_num_chars=expected_num_chars, - ) - # Write output file. - _LOG.debug("Writing output to file: %s", output_file) - hio.to_file(output_file, response) - _LOG.debug("Wrote %d characters to output file", len(response)) - return cost - - -# ############################################################################# -# Batch processing -# ############################################################################# - - -def _validate_batch_inputs( - prompt: str, - input_list: List[str], -) -> None: - """ - Validate prompt and input list for batch processing. - - :param prompt: System prompt to validate - :param input_list: List of inputs to validate - :raises: Assertion errors if validation fails - """ - hdbg.dassert_isinstance(prompt, str) - hdbg.dassert_isinstance(input_list, list) - hdbg.dassert_lt(0, len(input_list), "Input list cannot be empty") - for idx, input_str in enumerate(input_list): - hdbg.dassert_isinstance( - input_str, - str, - "Input at index %d must be a string", - idx, - ) - hdbg.dassert_ne( - input_str, - "", - "Input at index %d cannot be empty", - idx, - ) - - -@hcacsimp.simple_cache(cache_type="json", write_through=True) -def _llm( - system_prompt: str, - input_str: str, - model: str, -) -> Tuple[str, float]: - """ - Apply LLM using the llm Python library. - - :param input_str: the input text to process - :param system_prompt: optional system prompt to use - :param model: optional model name to use - :param expected_num_chars: optional expected number of characters in - output (used for progress bar) - :return: LLM response as string - """ - hdbg.dassert_isinstance(system_prompt, str) - _LOG.trace("system_prompt=\n%s", system_prompt) - # - hdbg.dassert_isinstance(input_str, str) - _LOG.trace("input_str=\n%s", input_str) - # - hdbg.dassert_isinstance(model, str) - hdbg.dassert_ne(model, "", "Model cannot be empty") - llm_model = llm.get_model(model) - _LOG.debug("model=%s", llm_model.model_id) - # Call the LLM. - result = llm_model.prompt(input_str, system=system_prompt) - response = result.text() - _LOG.trace("response=\n%s", response) - usage = result.usage() - cost = _calculate_cost_from_usage( - usage=usage, - model=model, - ) - return response, cost - - -def _call_llm_or_test_functor( - input_str: str, - system_prompt: Optional[str], - model: str, - testing_functor: Optional[Callable[[str], str]], -) -> Tuple[str, float]: - """ - Call LLM or testing functor if provided. - - :param input_str: Input text to process - :param system_prompt: System prompt (can be None) - :param model: Model name (required for cost calculation) - :param testing_functor: Optional testing functor - :return: Tuple of (response, cost) where cost is 0.0 if not calculated - """ - if testing_functor is None: - response, cost = _llm(system_prompt, input_str, model) - # # Calculate cost for this call. - # # Build full prompt for cost calculation. - # if system_prompt: - # full_prompt = system_prompt + "\n" + input_str - # else: - # full_prompt = input_str - # cost = _calculate_llm_cost(full_prompt, response, model) - else: - response = testing_functor(input_str) - cost = 0.0 - return response, cost - - -def _calculate_llm_cost( - prompt: str, - completion: str, - model: str, -) -> float: - """ - Calculate the cost of an LLM call using tokencost library. - - :param prompt: the prompt sent to the LLM - :param completion: the completion returned by the LLM - :param model: the model name used - :return: total cost in dollars - """ - prompt_cost = tokencost.calculate_prompt_cost(prompt, model) - completion_cost = tokencost.calculate_completion_cost(completion, model) - total_cost = prompt_cost + completion_cost - # Convert to float to ensure consistent type. - return float(total_cost) - - -def apply_llm_batch_individual( - prompt: str, - input_list: List[str], - *, - model: str, - testing_functor: Optional[Callable[[str], str]] = None, - progress_bar_object: Optional[tqdm] = None, -) -> Tuple[List[str], float]: - """ - Apply an LLM to process a batch of inputs one at the time. - """ - _validate_batch_inputs(prompt, input_list) - _LOG.debug("Processing batch of %d inputs individually", len(input_list)) - # Process each input sequentially with progress bar and error handling. - responses = [] - # Initialize total cost accumulator. - total_cost = 0.0 - for input_str in input_list: - response, cost = _call_llm_or_test_functor( - input_str=input_str, - system_prompt=prompt, - model=model, - testing_functor=testing_functor, - ) - total_cost += cost - responses.append(response) - if progress_bar_object is not None: - progress_bar_object.update(1) - _LOG.debug("Batch processing completed") - _LOG.debug("Total cost for batch with individual prompt: $%.6f", total_cost) - return responses, total_cost - - -def apply_llm_batch_with_shared_prompt( - prompt: str, - input_list: List[str], - *, - model: str, - testing_functor: Optional[Callable[[str], str]] = None, - progress_bar_object: Optional[tqdm] = None, -) -> Tuple[List[str], float]: - """ - Apply an LLM to process a batch of input texts using the same system prompt. - """ - _validate_batch_inputs(prompt, input_list) - _LOG.debug("Processing batch of %d inputs", len(input_list)) - # Process each input sequentially with progress bar. - responses = [] - total_cost = 0.0 - if testing_functor is None: - # TODO(gp): Factor this out and use a cache. - llm_model = llm.get_model(model) - conv = llm.Conversation(model=llm_model) - for input_str in input_list: - result = conv.prompt(input_str, system=prompt) - response = result.text() - usage = result.usage() - cost = _calculate_cost_from_usage( - usage=usage, - model=model, - ) - total_cost += cost - responses.append(response) - if progress_bar_object is not None: - progress_bar_object.update(1) - else: - for input_str in input_list: - response = testing_functor(input_str) - responses.append(response) - if progress_bar_object is not None: - progress_bar_object.update(1) - _LOG.debug("Batch processing completed") - _LOG.debug("Total cost for batch with shared prompt: $%.6f", total_cost) - return responses, total_cost - - -def apply_llm_batch_combined( - prompt: str, - input_list: List[str], - *, - model: str, - max_retries: int = 3, - testing_functor: Optional[Callable[[str], str]] = None, - progress_bar_object: Optional[tqdm] = None, -) -> Tuple[List[str], float]: - """ - Apply an LLM to process a batch using a single combined prompt. - - This function combines all queries into a single prompt and expects - structured JSON output. It includes retry logic for failed JSON parsing. - """ - _validate_batch_inputs(prompt, input_list) - hdbg.dassert_isinstance(max_retries, int) - hdbg.dassert_lt(0, max_retries) - _LOG.debug( - "Processing batch of %d inputs with combined prompt", len(input_list) - ) - # Build combined prompt. - - combined_prompt = f"{prompt}\n\n" - instruction = """ - Return the results only as a valid JSON object with string values, using - zero-based numeric keys that match the item numbers. - - Output format: - '{"0": "result1", "1": "result2", ...} - - """ - combined_prompt += hprint.dedent(instruction) - for idx, input_str in enumerate(input_list): - combined_prompt += f"{idx}: {input_str}\n" - combined_prompt += "\nReturn ONLY the JSON object, no other text." - _LOG.debug("Combined prompt:\n%s", combined_prompt) - # You are a calculator. Return only the numeric result. - # ``` - # Process the following items and return results as JSON in the format: - # {"0": "result1", "1": "result2", ...} - # 0: 2 + 2 - # 1: 3 * 3 - # 2: 10 - 5 - # 3: 20 / 4 - # Return ONLY the JSON object, no other text. - # ``` - # Process with retries for JSON parsing. - total_cost = 0.0 - if testing_functor is None: - for retry_num in range(max_retries): - _LOG.debug( - "Processing batch of %d inputs with combined prompt (attempt %d/%d)", - len(input_list), - retry_num + 1, - max_retries, - ) - system_prompt = combined_prompt - user_prompt = "Process the items listed above." - response, cost = _llm(system_prompt, user_prompt, model) - total_cost += cost - try: - # Parse JSON response. - # E.g., - # ``` - # {"0": "4", "1": "9", "2": "5", "3": "5"} - # ``` - _LOG.debug("Parsing JSON response:\n%s", response) - # Extract JSON from response (handle cases where LLM adds extra text). - response_stripped = response.strip() - # Find JSON object boundaries. - json_start = response_stripped.find("{") - json_end = response_stripped.rfind("}") + 1 - hdbg.dassert_lte(0, json_start) - hdbg.dassert_lt(json_start, json_end) - json_str = response_stripped[json_start:json_end] - result_dict = json.loads(json_str) - # Convert dict to list in order. - responses = [] - for idx in range(len(input_list)): - key = str(idx) - if key in result_dict: - responses.append(result_dict[key]) - else: - _LOG.warning("Missing result for index %d", idx) - responses.append("") - _LOG.debug("Successfully parsed JSON response") - if progress_bar_object is not None: - progress_bar_object.update(len(input_list)) - _LOG.debug( - "Total cost for batch with combined prompt: $%.6f", - total_cost, - ) - return responses, total_cost - except (json.JSONDecodeError, ValueError) as e: - _LOG.debug( - "JSON parsing failed (attempt %d/%d): %s", - retry_num + 1, - max_retries, - e, - ) - if retry_num == max_retries - 1: - hdbg.dfatal( - "Failed to parse JSON after %d retries", max_retries - ) - # Add instruction to retry. - combined_prompt += "\n\nPrevious response had invalid JSON format. Please return ONLY a valid JSON object." - else: - responses = [] - for input_str in input_list: - response = testing_functor(input_str) - responses.append(response) - if progress_bar_object is not None: - progress_bar_object.update(1) - total_cost = 0.0 - return responses, total_cost - # Should not reach here. - raise RuntimeError("Unexpected error in apply_llm_batch_combined") - - -# ############################################################################# - - -# TODO(gp): Move it somewhere else. -def get_tqdm_progress_bar() -> tqdm: - # Use appropriate tqdm for notebook or terminal - try: - from IPython import get_ipython - - if get_ipython() is not None: - from tqdm.notebook import tqdm as notebook_tqdm - - tqdm_progress = notebook_tqdm - else: - tqdm_progress = tqdm - except ImportError: - tqdm_progress = tqdm - return tqdm_progress - - -# TODO(gp): Skip values that already have a value in the target column. -# TODO(gp): Parallelize -def apply_llm_prompt_to_df( - prompt: str, - df: pd.DataFrame, - extractor: Callable[[Union[str, pd.Series]], str], - target_col: str, - batch_mode: str, - *, - model: str, - batch_size: int = 50, - dump_every_batch: Optional[str] = None, - tag: str = "Processing", - testing_functor: Optional[Callable[[str], str]] = None, - use_sys_stderr: bool = False, -) -> Tuple[pd.DataFrame, Dict[str, int]]: - """ - Apply an LLM to process a dataframe column using the same system prompt. - - This function processes text from dataframe rows using an extractor function, - applies the LLM to each item in batches, and stores the results in a target - column. It can optionally save progress to a file after each batch. - - :param prompt: system prompt to guide the LLM's behavior - :param df: dataframe to process - :param extractor: callable that extracts text from a row or string - :param target_col: name of column to store results - :param batch_mode: batch mode to use (individual, shared_prompt, combined) - :param model: model name to use (e.g., "gpt-4", "claude-3-opus") - :param batch_size: number of items to process in each batch - :param dump_every_batch: optional file path to dump the dataframe after each batch - :param tag: description tag for progress bar - :param testing_functor: optional functor to use for testing - :return: tuple of (dataframe with results, statistics dict) - """ - start_time = time.time() - hdbg.dassert_isinstance(prompt, str) - hdbg.dassert_ne(prompt, "", "Prompt cannot be empty") - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_lt(0, len(df), "Dataframe cannot be empty") - hdbg.dassert_isinstance(target_col, str) - hdbg.dassert_ne(target_col, "", "Target column cannot be empty") - hdbg.dassert_isinstance(model, str) - hdbg.dassert_ne(model, "", "Model cannot be empty") - hdbg.dassert_isinstance(batch_size, int) - hdbg.dassert_lt(0, batch_size) - if dump_every_batch is not None: - hdbg.dassert_isinstance(dump_every_batch, str) - hdbg.dassert_ne(dump_every_batch, "", "Dump file path cannot be empty") - # Create target column if it doesn't exist. - if target_col not in df.columns: - df[target_col] = None - # Process items in batches with progress bar for entire workload. - num_items = len(df) - num_batches = (num_items + batch_size - 1) // batch_size - _LOG.info( - "Processing %d items in %d batches of %d items each", - num_items, - num_batches, - batch_size, - ) - _LOG.info(hprint.to_str("model batch_mode")) - num_skipped = 0 - progress_bar_ctor = get_tqdm_progress_bar() - progress_bar_object = progress_bar_ctor( # type: ignore - total=num_items, - desc=tag, - dynamic_ncols=True, - # Workaround for unit tests. - file=sys.__stderr__ if use_sys_stderr else None, - ) - total_cost = 0.0 - # TODO(gp): Precompute the batch indices that needs to be processed. - for batch_num in range(num_batches): - # Get batch rows. - start_idx = batch_num * batch_size - end_idx = min(start_idx + batch_size, len(df)) - rows = df.iloc[start_idx:end_idx] - # Extract items from rows, filtering out invalid ones. - batch_items = [] - batch_indices = [] - for idx, row in rows.iterrows(): - extracted_text = extractor(row) - # Check if extraction returned valid text (not NaN/None/empty). - if extracted_text != "": - batch_items.append(extracted_text) - batch_indices.append(idx) - else: - # Set NaN for rows with missing company information. - df.at[idx, target_col] = "" - num_skipped += 1 - progress_bar_object.update(1) - # Call LLM only if there are valid items in this batch. - if batch_items: - _LOG.debug( - "Processing batch %d/%d (%d items, %d skipped)", - batch_num + 1, - num_batches, - len(batch_items), - len(rows) - len(batch_items), - ) - if batch_mode == "individual": - func = apply_llm_batch_individual - elif batch_mode == "shared_prompt": - func = apply_llm_batch_with_shared_prompt - elif batch_mode == "combined": - func = apply_llm_batch_combined - else: - hdbg.dfatal("Invalid batch mode: %s", batch_mode) - batch_responses, batch_cost = func( - prompt=prompt, - input_list=batch_items, - model=model, - testing_functor=testing_functor, - progress_bar_object=progress_bar_object, - ) - # Update total_cost. - total_cost += batch_cost - # Store results back into dataframe. - for idx, response in zip(batch_indices, batch_responses): - df.at[idx, target_col] = response - else: - _LOG.debug( - "Skipping batch %d/%d (all %d items have missing data)", - batch_num + 1, - num_batches, - len(rows), - ) - # Dump dataframe to file after batch if requested. - if dump_every_batch is not None: - _LOG.debug("Dumping dataframe to file: %s", dump_every_batch) - df.to_csv(dump_every_batch, index=False) - # Calculate elapsed time. - elapsed_time = time.time() - start_time - stats = { - "num_items": num_items, - "num_skipped": num_skipped, - "num_batches": num_batches, - "total_cost_in_dollars": total_cost, - "elapsed_time_in_seconds": elapsed_time, - } - _LOG.info("Processing completed:\n%s", pprint.pformat(stats)) - return df, stats diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py deleted file mode 100644 index 3d33b17d8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hllm_cost.py +++ /dev/null @@ -1,233 +0,0 @@ -""" -Import as: - -import helpers.hllm_cost as hllmcost -""" - -import logging -import os -from typing import Any - -import requests - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# OpenRouter API Helpers -# ############################################################################# - - -def _get_models_info_file() -> str: - """ - Get the path to the file for storing OpenRouter models info. - """ - dir_path = hgit.get_helpers_root_dir() - file_path = os.path.join( - dir_path, "dev_scripts_helpers/llms", "openrouter_models_info.csv" - ) - return file_path - - -def _retrieve_openrouter_model_info() -> "pd.DataFrame": - """ - Retrieve OpenRouter models info from the OpenRouter API. - """ - import pandas as pd - - response = requests.get("https://openrouter.ai/api/v1/models") - # {'architecture': {'input_modalities': ['text', 'image'], - # 'instruct_type': None, - # 'modality': 'text+image->text', - # 'output_modalities': ['text'], - # 'tokenizer': 'Mistral'}, - # 'context_length': 131072, - # 'created': 1746627341, - # 'description': 'Mistral Medium 3 is a high-performance enterprise-grade ' - # 'language model designed to deliver frontier-level ' - # ... - # 'broad compatibility across cloud environments.', - # 'id': 'mistralai/mistral-medium-3', - # 'name': 'Mistral: Mistral Medium 3', - # 'per_request_limits': None, - # 'pricing': {'completion': '0.000002', - # 'image': '0', - # 'internal_reasoning': '0', - # 'prompt': '0.0000004', - # 'request': '0', - # 'web_search': '0'}, - # 'supported_parameters': ['tools', - # 'tool_choice', - # 'max_tokens', - # 'temperature', - # 'top_p', - # 'stop', - # 'frequency_penalty', - # 'presence_penalty', - # 'response_format', - # 'structured_outputs', - # 'seed'], - # 'top_provider': {'context_length': 131072, - # 'is_moderated': False, - # 'max_completion_tokens': None}} - response_json = response.json() - # There is only one key in the response. - hdbg.dassert_eq(list(response_json.keys()), ["data"]) - response_json = response_json["data"] - model_info_df = pd.DataFrame(response_json) - return model_info_df - - -def _save_models_info_to_csv( - model_info_df: "pd.DataFrame", - file_name: str, -) -> "pd.DataFrame": - """ - Save models info to a CSV file. - """ - hdbg.dassert_isinstance(file_name, str) - hdbg.dassert_ne(file_name, "") - # TODO(*): Save all the data. - # Extract prompt, completion pricing from pricing column. - model_info_df["prompt_pricing"] = model_info_df["pricing"].apply( - lambda x: x["prompt"] - ) - model_info_df["completion_pricing"] = model_info_df["pricing"].apply( - lambda x: x["completion"] - ) - required_columns = [ - "id", - "name", - "description", - "prompt_pricing", - "completion_pricing", - "supported_parameters", - ] - # Take only relevant columns. - model_info_df = model_info_df.loc[:, required_columns] - # Save to CSV file. - model_info_df.to_csv(file_name, index=False) - return model_info_df - - -# ############################################################################# -# LLMCostTracker -# ############################################################################# - - -class LLMCostTracker: - """ - Track the costs of LLM API calls through one of the providers. - """ - - def __init__(self, provider_name: str, model: str) -> None: - """ - Initialize the class. - """ - self.current_cost: float = 0.0 - self.provider_name = provider_name - self.model = model - - def end_logging_costs(self) -> None: - """ - End logging costs by resetting the current cost to 0. - """ - self.current_cost = 0.0 - - def accumulate_cost(self, cost: float) -> None: - """ - Accumulate the cost. - - :param cost: The cost to accumulate - """ - self.current_cost += cost - - def get_current_cost(self) -> float: - """ - Get the current accumulated cost. - - :return: The current cost - """ - return self.current_cost - - def calculate_cost( - self, - completion: Any, - *, - models_info_file: str = "", - ) -> float: - """ - Calculate the cost of an API call, based on the provider. - - :param completion: the completion response from API - :return: the calculated cost in dollars - """ - import pandas as pd - - # Get the number of input and output tokens. - usage = getattr(completion, "usage", None) - hdbg.dassert( - usage is not None, - "Completion/response object has no 'usage' attribute", - ) - if hasattr(usage, "prompt_tokens") and hasattr( - usage, "completion_tokens" - ): - prompt_tokens = usage.prompt_tokens - completion_tokens = usage.completion_tokens - elif hasattr(usage, "input_tokens") and hasattr(usage, "output_tokens"): - prompt_tokens = usage.input_tokens - completion_tokens = usage.output_tokens - else: - raise ValueError( - f"Unknown usage structure on completion object: {usage}" - ) - # Get the provider and model details. - if self.provider_name == "openai": - # Get the pricing for the selected model. - # TODO(gp): Use pricing from OpenAI or Openrouter API. - # https://openai.com/api/pricing/ - # https://gptforwork.com/tools/openai-chatgpt-api-pricing-calculator - # Cost per 1M tokens. - pricing = { - "gpt-3.5-turbo": {"prompt": 0.5, "completion": 1.5}, - "gpt-4o-mini": {"prompt": 0.15, "completion": 0.60}, - "gpt-4o": {"prompt": 2.5, "completion": 10}, - "gpt-5.2": {"prompt": 1.75, "completion": 14.0}, - "gpt-5.1": {"prompt": 1.25, "completion": 10.0}, - "gpt-5-mini": {"prompt": 0.25, "completion": 2.00}, - } - hdbg.dassert_in(self.model, pricing) - model_pricing = pricing[self.model] - # Calculate the cost. - cost = (prompt_tokens / 1e6) * model_pricing["prompt"] + ( - completion_tokens / 1e6 - ) * model_pricing["completion"] - elif self.provider_name == "openrouter": - # If the model info file doesn't exist, download one. - if models_info_file == "": - models_info_file = _get_models_info_file() - _LOG.debug(hprint.to_str("models_info_file")) - if not os.path.isfile(models_info_file): - model_info_df = _retrieve_openrouter_model_info() - _save_models_info_to_csv(model_info_df, models_info_file) - else: - model_info_df = pd.read_csv(models_info_file) - # Extract pricing for this model. - hdbg.dassert_in(self.model, model_info_df["id"].values) - row = model_info_df.loc[model_info_df["id"] == self.model].iloc[0] - prompt_price = row["prompt_pricing"] - completion_price = row["completion_pricing"] - # Compute cost. - cost = ( - prompt_tokens * prompt_price - + completion_tokens * completion_price - ) - else: - raise ValueError(f"Unknown provider: {self.provider_name}") - _LOG.debug(hprint.to_str("prompt_tokens completion_tokens cost")) - return cost diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py deleted file mode 100644 index 94738202c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.py +++ /dev/null @@ -1,809 +0,0 @@ -""" -Import as: - -import helpers.hlogging as hloggin -""" - -import asyncio -import contextlib -import copy -import datetime -import logging -from typing import Any, Iterable, List, Optional, Tuple, Union - -# Avoid dependency from other helpers modules since this is used when the code -# is bootstrapped. - - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -# Copied from `helpers/hsystem.py` to avoid circular imports. -def _is_running_in_ipynb() -> bool: - try: - _ = get_ipython().config # type: ignore - res = True - except NameError: - res = False - return res - - -# Copied from `helpers/hsystem.py` to avoid circular dependencies. -def get_user_name() -> str: - import getpass - - res = getpass.getuser() - return res - - -# ############################################################################# -# Memory usage -# ############################################################################# - -# TODO(gp): Consider moving to hmemory.py - - -MemoryUsage = Tuple[float, float, float] - - -def get_memory_usage(process: Optional[Any] = None) -> MemoryUsage: - """ - Return the memory usage in terms of resident, virtual, and percent of total - used memory. - """ - if process is None: - import psutil - - process = psutil.Process() - rss_in_GB = process.memory_info().rss / (1024**3) - vms_in_GB = process.memory_info().vms / (1024**3) - mem_pct = process.memory_percent() - return (rss_in_GB, vms_in_GB, mem_pct) - - -def memory_to_str(resource_use: MemoryUsage, *, verbose: bool = True) -> str: - (rss_in_GB, vms_in_GB, mem_pct) = resource_use - if verbose: - txt = "rss=%.3fGB vms=%.3fGB mem_pct=%.0f%%" % ( - rss_in_GB, - vms_in_GB, - mem_pct, - ) - else: - txt = "%.3fGB %.3fGB %.0f%%" % (rss_in_GB, vms_in_GB, mem_pct) - return txt - - -def get_memory_usage_as_str(process: Optional[Any] = None) -> str: - """ - Like `get_memory_usage()` but returning a formatted string. - """ - resource_use = get_memory_usage(process) - txt = memory_to_str(resource_use) - return txt - - -# ############################################################################# -# Utils. -# ############################################################################# - -# White: 37. -# Red: 31 -# Green: 32 -# Yellow: 33 -# Blu: 34 -# Cyan: 36 -# White on red background: 41 - -_COLOR_MAPPING = { - # Green. - "TRACE": (32, "TRACE"), - # Blu. - "DEBUG": (34, "DEBUG"), - # Cyan. - "INFO": (36, "INFO "), - # White on red background. - "WARNING": (41, "WARN "), - "ERROR": (41, "ERROR"), - "CRITICAL": (41, "CRTCL"), -} - - -def reset_logger() -> None: - import importlib - - print("Resetting logger...") - logging.shutdown() - importlib.reload(logging) - - -def get_all_loggers() -> List: - """ - Return list of all registered loggers. - """ - logger_dict = logging.root.manager.loggerDict # type: ignore # pylint: disable=no-member - loggers = [logging.getLogger(name) for name in logger_dict] - return loggers - - -def get_matching_loggers( - module_names: Union[str, Iterable[str]], verbose: bool -) -> List: - """ - Find loggers that match a name or a name in a set. - """ - if isinstance(module_names, str): - module_names = [module_names] - loggers = get_all_loggers() - if verbose: - print("loggers=\n", "\n".join(map(str, loggers))) - # - sel_loggers = [] - for module_name in module_names: - if verbose: - print(f"module_name={module_name}") - # TODO(gp): We should have a regex. - # str(logger) looks like `` - sel_loggers_tmp = [ - logger - for logger in loggers - if str(logger).startswith(" None: - """ - Reduce the verbosity for external modules that are very chatty. - - :param verbosity: level of verbosity used for chatty modules: the - higher the better - :param verbose: print extra information - """ - module_names = [ - "aiobotocore", - "asyncio", - "boto", - "boto3", - "botocore", - "ccxt", # CCXT also needs to be shut up after the `exchange` is built. - "fsspec", - "hooks", - "httpcore", - "httpx", - "invoke", - "matplotlib", - "nose", - "openai", - "s3fs", - "s3transfer", - "urllib3", - # "ib_insync", - ] - # verbose = True - loggers = get_matching_loggers(module_names, verbose) - loggers = sorted(loggers, key=lambda logger: logger.name) - for logger in loggers: - logger.setLevel(verbosity) - if len(loggers) > 0: - logger_names = list({logger.name for logger in loggers}) - _LOG.debug( - "Shut up %d modules: %s", len(loggers), ", ".join(logger_names) - ) - # if _LOG.getEffectiveLevel() < logging.DEBUG: - # print(WARNING + - # " Shutting up %d modules: %s" - # % (len(loggers), ", ".join([logger.name for logger in loggers])) - # ) - - -# ############################################################################# -# _LocalTimeZoneFormatter -# ############################################################################# - - -# From https://stackoverflow.com/questions/32402502 -class _LocalTimeZoneFormatter: - """ - Override logging.Formatter to use an aware datetime object. - """ - - def __init__(self, *args: Any, **kwargs: Any): - super().__init__(*args, **kwargs) # type: ignore[call-arg] - try: - # TODO(gp): Automatically detect the time zone. It might be complicated in - # Docker. - import pytz - - self._tzinfo = pytz.timezone("America/New_York") - except ModuleNotFoundError: - # print(f"Can't import pytz: using UTC\n{str(e)}") - self._tzinfo = None - - def converter(self, timestamp: float) -> datetime.datetime: - # To make the linter happy and respecting the signature of the - # superclass method. - _ = self - # timestamp=1622423570.0147252 - dt = datetime.datetime.utcfromtimestamp(timestamp) - # Convert it to an aware datetime object in UTC time. - dt = dt.replace(tzinfo=datetime.timezone.utc) - if self._tzinfo is not None: - # Convert it to desired timezone. - dt = dt.astimezone(self._tzinfo) - return dt - - def formatTime( - self, record: logging.LogRecord, datefmt: Optional[str] = None - ) -> str: - dt = self.converter(record.created) - if datefmt: - s = dt.strftime(datefmt) - else: - try: - s = dt.isoformat(timespec="milliseconds") - except TypeError: - s = dt.isoformat() - return s - - -# ############################################################################# -# _ColoredFormatter -# ############################################################################# - - -# [mypy] error: Definition of "converter" in base class -# "_LocalTimeZoneFormatter" is incompatible with definition in base class -# "Formatter" -class _ColoredFormatter( # type: ignore[misc] - _LocalTimeZoneFormatter, logging.Formatter -): - """ - Logging formatter using colors for different levels. - """ - - _SKIP_DEBUG = True - - def format(self, record: logging.LogRecord) -> str: - colored_record = copy.copy(record) - # `levelname` is the internal name and can't be changed to `level_name` - # as per our conventions. - levelname = colored_record.levelname - if _ColoredFormatter._SKIP_DEBUG and levelname == "DEBUG": - colored_levelname = "" - else: - # Use white as default. - prefix = "\033[" - suffix = "\033[0m" - assert levelname in _COLOR_MAPPING, "Can't find info '%s'" - color_code, tag = _COLOR_MAPPING[levelname] - # Align the level name. - colored_levelname = f"{prefix}{color_code}m{tag}{suffix}" - colored_record.levelname = colored_levelname - return logging.Formatter.format(self, colored_record) - - -# From https://stackoverflow.com/questions/2183233 -def addLoggingLevel(levelName, levelNum, methodName=None): - """ - Comprehensively adds a new logging level to the `logging` module and the - currently configured logging class. - - `levelName` becomes an attribute of the `logging` module with the value - `levelNum`. `methodName` becomes a convenience method for both `logging` - itself and the class returned by `logging.getLoggerClass()` (usually just - `logging.Logger`). If `methodName` is not specified, `levelName.lower()` is - used. - - To avoid accidental clobberings of existing attributes, this method will - raise an `AttributeError` if the level name is already an attribute of the - `logging` module or if the method name is already present - - Example - ------- - >>> addLoggingLevel('TRACE', logging.DEBUG - 5) - >>> logging.getLogger(__name__).setLevel("TRACE") - >>> logging.getLogger(__name__).trace('that worked') - >>> logging.trace('so did this') - >>> logging.TRACE - 5 - """ - if not methodName: - methodName = levelName.lower() - - if hasattr(logging, levelName): - raise AttributeError( - "{} already defined in logging module".format(levelName) - ) - if hasattr(logging, methodName): - raise AttributeError( - "{} already defined in logging module".format(methodName) - ) - if hasattr(logging.getLoggerClass(), methodName): - raise AttributeError( - "{} already defined in logger class".format(methodName) - ) - - # This method was inspired by the answers to Stack Overflow post - # http://stackoverflow.com/q/2183233/2988730, especially - # http://stackoverflow.com/a/13638084/2988730 - def logForLevel(self, message, *args, **kwargs): - if self.isEnabledFor(levelNum): - self._log(levelNum, message, args, **kwargs) - - def logToRoot(message, *args, **kwargs): - logging.log(levelNum, message, *args, **kwargs) - - logging.addLevelName(levelNum, levelName) - setattr(logging, levelName, levelNum) - setattr(logging.getLoggerClass(), methodName, logForLevel) - setattr(logging, methodName, logToRoot) - - -addLoggingLevel("TRACE", 5) - - -# Note that this doesn't avoid evaluating the call. -# The only way to be completely sure that there is no evaluation is: -# ``` -# if False: _LOG.debug(...) -# ``` -def shut_up_log_debug(logger: logging.Logger) -> None: - logging.disable(logging.DEBUG) - # logger.debug = lambda *_: 0 - # logger.trace = lambda *_: 0 - - -# ############################################################################# -# ResourceUsageFilter -# ############################################################################# - - -# From https://stackoverflow.com/questions/10848342 -# and https://docs.python.org/3/howto/logging-cookbook.html#filters-contextual -class ResourceUsageFilter(logging.Filter): - """ - Add fields to the logger about memory and CPU use. - """ - - def __init__(self, report_cpu_usage: bool): - super().__init__() - import psutil - - self._process = psutil.Process() - self._report_cpu_usage = report_cpu_usage - if self._report_cpu_usage: - # Start sampling the CPU usage. - self._process.cpu_percent(interval=1.0) - - def filter(self, record: logging.LogRecord) -> bool: - """ - Override `logging.Filter()`, adding several fields to the logger. - """ - p = self._process - # Report memory usage. - resource_use = get_memory_usage_as_str(p) - # Report CPU usage. - if self._report_cpu_usage: - # CPU usage since the previous call. - cpu_use = p.cpu_percent(interval=None) - resource_use += " cpu=%.0f%%" % cpu_use - record.resource_use = resource_use # type: ignore - return True - - -# ############################################################################# - - -# TODO(gp): Replace `force_print_format` and `force_verbose_format` with `mode`. -def _get_logging_format( - force_print_format: bool, - force_verbose_format: bool, - force_no_warning: bool, - report_memory_usage: bool, - date_format_mode: str = "time", -) -> Tuple[str, str]: - """ - Compute the logging format depending whether running on notebook or in a - shell. - - The logging format can be: - - print: looks like a `print` statement - - :param force_print_format: force to use the non-verbose format - :param force_verbose_format: force to use the verbose format - """ - if _is_running_in_ipynb() and not force_no_warning: - print("WARNING: Running in Jupyter") - verbose_format = not _is_running_in_ipynb() - # - assert not (force_verbose_format and force_print_format), ( - f"Can't use both force_verbose_format={force_verbose_format} " - + f"and force_print_format={force_print_format}" - ) - if force_verbose_format: - verbose_format = True - if force_print_format: - verbose_format = False - # - if verbose_format: - # TODO(gp): We would like to have filename:name:funcName:lineno all - # justified on 15 chars. - # See https://docs.python.org/3/howto/logging-cookbook.html#use-of - # -alternative-formatting-styles - # Something like: - # {{asctime}-5s {{filename}{name}{funcname}{linedo}d}-15s {message} - # - # %(pathname)s Full pathname of the source file where the logging call was - # issued (if available). - # %(filename)s Filename portion of pathname. - # %(module)s Module (name portion of filename). - if True: - log_format = ( - # 04-28_08:08 INFO : - "%(asctime)-5s %(levelname)-5s" - ) - if report_memory_usage: - # rss=0.3GB vms=2.0GB mem_pct=2% cpu=91% - log_format += " [%(resource_use)-40s]" - log_format += ( - # lib_tasks _delete_branches - " %(module)-20s: %(funcName)-30s:" - # 142: ... - " %(lineno)-4d:" - " %(message)s" - ) - else: - # Super verbose: to help with debugging print more info without trimming. - log_format = ( - # 04-28_08:08 INFO : - "%(asctime)-5s %(levelname)-5s" - # .../src/lem1/amp/helpers/system_interaction.py - # _system : - " %(pathname)s %(funcName)-20s " - # 199: ... - " %(lineno)d:" - " %(message)s" - ) - if date_format_mode == "time": - date_fmt = "%H:%M:%S" - elif date_format_mode == "date_time": - date_fmt = "%m-%d_%H:%M" - elif date_format_mode == "date_timestamp": - date_fmt = "%Y-%m-%d %I:%M:%S %p" - else: - raise ValueError(f"Invalid date_format_mode='{date_format_mode}'") - else: - # Make logging look like a normal print(). - # TODO(gp): We want to still prefix with WARNING and ERROR. - log_format = "%(message)s" - date_fmt = "" - return date_fmt, log_format - - -def set_v1_formatter( - ch: Any, - root_logger: Any, - force_no_warning: bool, - force_print_format: bool, - force_verbose_format: bool, - report_cpu_usage: bool, - report_memory_usage: bool, -) -> _ColoredFormatter: - # Decide whether to use verbose or print format. - date_fmt, log_format = _get_logging_format( - force_print_format, - force_verbose_format, - force_no_warning, - report_memory_usage, - ) - # Use normal formatter. - # formatter = logging.Formatter(log_format, datefmt=date_fmt) - # Use formatter with colors. - formatter = _ColoredFormatter(log_format, date_fmt) - ch.setFormatter(formatter) - root_logger.addHandler(ch) - # Report resource usage. - if report_memory_usage: - # Get root logger. - log = logging.getLogger("") - # Create filter. - f = ResourceUsageFilter(report_cpu_usage) - # The ugly part:adding filter to handler. - log.handlers[0].addFilter(f) - return formatter - - -# ############################################################################# -# CustomFormatter -# ############################################################################# - - -# pylint: disable=line-too-long -class CustomFormatter(logging.Formatter): - """ - Override `format` to implement a completely custom logging formatting. - - The logging output looks like: - ``` - 07:37:17 /app/amp/helpers/hunit_test.py setUp 932 - Resetting random.seed to 20000101 - ``` - or for simulated time: - ``` - 07:43:17 @ 2022-01-18 02:43:17 workload /app/amp/helpers/test/test_hlogging.py workload:33 - -> wait - ``` - """ - - def __init__( - self, - *args: Any, - date_format_mode: str = "time", - report_memory_usage: bool = False, - report_cpu_usage: bool = False, - **kwargs: Any, - ): - super().__init__(*args, **kwargs) - self._date_fmt = self._get_date_format(date_format_mode) - # - try: - # TODO(gp): Automatically detect the time zone. It might be complicated - # in Docker. - import pytz - - self._tzinfo = pytz.timezone("America/New_York") - except ModuleNotFoundError: - # print(f"Can't import pytz: using UTC\n{str(e)}") - self._tzinfo = None - # - self._report_memory_usage = report_memory_usage - self._report_cpu_usage = report_cpu_usage - if self._report_memory_usage or self._report_cpu_usage: - import psutil - - self._process = psutil.Process() - if self._report_cpu_usage: - # Start sampling the CPU usage. - self._process.cpu_percent(interval=1.0) - - def format(self, record: logging.LogRecord) -> str: - # record = copy.copy(record) - # print(pprint.pformat(record.__dict__)) - # `record` looks like: - # {'args': (30,), - # 'created': 1642456725.5569131, - # 'exc_info': None, - # 'exc_text': None, - # 'filename': 'logging_main.py', - # 'funcName': 'test_logger', - # 'levelname': 'WARNING', - # 'levelno': 30, - # 'lineno': 105, - # 'module': 'logging_main', - # 'msecs': 556.9131374359131, - # 'msg': 'WARNING=%s', - # 'name': '__main__', - # 'pathname': 'helpers/logging_testing/logging_main.py', - # 'process': 16484, - # 'processName': 'MainProcess', - # 'relativeCreated': 29.956817626953125, - # 'stack_info': None, - # 'thread': 140250120021824, - # 'threadName': 'MainThread'} - msg = "" - # Add the wall clock time. - msg += self._get_wall_clock_time() - # Report memory usage, if needed. - # rss=0.240GB vms=1.407GB mem_pct=2% cpu=92% - if self._report_memory_usage: - msg_tmp = get_memory_usage_as_str(self._process) - # Escape the % to avoid confusing for a string to expand. - msg_tmp = msg_tmp.replace("%", "%%") - msg += " " + msg_tmp - # Report CPU usage, if needed. - if self._report_cpu_usage: - # CPU usage since the previous call. - msg_tmp = " cpu=%.0f" % self._process.cpu_percent(interval=None) - # Escape the % to avoid confusing for a string to expand. - msg_tmp += "%%" - msg += msg_tmp - # Get the (typically) simulated wall clock time. - import helpers.hwall_clock_time as hwacltim - - simulated_wall_clock_time = hwacltim.get_wall_clock_time() - if simulated_wall_clock_time is not None: - date_fmt = "%Y-%m-%d %I:%M:%S" - msg += " @ " + self._convert_time_to_string( - simulated_wall_clock_time, date_fmt - ) - # Colorize / shorten the logging level if it's not DEBUG. - if record.levelno != logging.DEBUG: - msg += f" - {self._colorize_level(record.levelname)}" - # Add information about which coroutine we are running in. - try: - asyncio.get_running_loop() - task = asyncio.current_task() - if task is not None: - msg += f" {task.get_name()}" - except (RuntimeError, AttributeError): - pass - # Add information about the caller. - # ``` - # /helpers/hunit_test.py setUp:932 - # ``` - # pathname = record.pathname.replace("/amp", "") - # msg += f" {pathname} {record.funcName}:{record.lineno}" - # ``` - # test_hlogging.py _print_time:28 - # ``` - msg += f" {record.filename} {record.funcName}:{record.lineno}" - # Indent. - if len(msg) < 50: - msg = "%-60s" % msg - else: - msg = "%-80s" % msg - # Add the caller string. - msg += f" {record.msg}" - record.msg = msg - return super().format(record) - - @staticmethod - def _get_date_format(date_format_mode: str) -> str: - if date_format_mode == "time": - date_fmt = "%H:%M:%S" - elif date_format_mode == "date_time": - date_fmt = "%m-%d_%H:%M" - elif date_format_mode == "date_timestamp": - date_fmt = "%Y-%m-%d %I:%M:%S %p" - else: - raise ValueError("Invalid date_format") - return date_fmt - - def _convert_time_to_string( - self, now: datetime.datetime, date_fmt: str - ) -> str: - # Convert it to an tz-aware datetime object in UTC time. - dt = now.replace(tzinfo=datetime.timezone.utc) - if self._tzinfo is not None: - # Convert it to desired timezone. - dt = dt.astimezone(self._tzinfo) - time_as_str = dt.strftime(date_fmt) - return time_as_str - - def _get_wall_clock_time(self) -> str: - dt = datetime.datetime.utcnow() - return self._convert_time_to_string(dt, self._date_fmt) - - def _colorize_level(self, level_name: str) -> str: - # Use white as default. - prefix = "\033[" - suffix = "\033[0m" - # Print stacktrace to debug. - if False: - import traceback - - txt = traceback.format_stack() - txt = "".join(txt) - print(txt) - - assert level_name in _COLOR_MAPPING, "Can't find info '%s'" - color_code, tag = _COLOR_MAPPING[level_name] - colored_level_name = f"{prefix}{color_code}m{tag}{suffix}" - return colored_level_name - - -def set_v2_formatter( - ch: Any, - root_logger: Any, - force_no_warning: bool, - force_print_format: bool, - force_verbose_format: bool, - report_memory_usage: bool, - report_cpu_usage: bool, -) -> Union[logging.Formatter, CustomFormatter]: - """ - See params in `init_logger()`. - """ - assert not (force_verbose_format and force_print_format), ( - f"Can't use both force_verbose_format={force_verbose_format} " - + f"and force_print_format={force_print_format}" - ) - # When running in a notebook make logging behave like a `print`. - verbose_format = True - if _is_running_in_ipynb(): - verbose_format = False - if not force_no_warning: - print("WARNING: Running in Jupyter") - # - if force_verbose_format: - verbose_format = True - if force_print_format: - verbose_format = False - # - if verbose_format: - # Force to report memory / CPU usage. - # report_memory_usage = report_cpu_usage = True - # print( - # "report_memory_usage=%s report_cpu_usage=%s" - # % (report_memory_usage, report_cpu_usage) - # ) - formatter: Union[logging.Formatter, CustomFormatter] = CustomFormatter( - report_memory_usage=report_memory_usage, - report_cpu_usage=report_cpu_usage, - ) - else: - # Make logging look like a normal `print()`. - log_format = "%(levelname)-5s %(message)s" - date_fmt = "" - formatter = logging.Formatter(log_format, datefmt=date_fmt) - ch.setFormatter(formatter) - root_logger.addHandler(ch) - return formatter - - -# TODO(gp): Not sure it works properly. -@contextlib.contextmanager -def set_level(logger: Any, level: int) -> None: - """ - Context manager changing the verbosity level. - """ - previous_level = logger.getEffectiveLevel() - try: - logger.setLevel(level) - yield - finally: - logger.setLevel(previous_level) - assert logger.getEffectiveLevel() == previous_level - - -# ############################################################################# - - -def getLogger(name: str) -> logging.Logger: - """ - Get logger with custom trace method support. - - This function provides the same functionality as `logging.getLogger()` - but with proper type hints that include the custom trace method. - - Usage: - ``` - # Instead of `import logging`. - import helpers.hlogging as hlogging - - _LOG = hlogging.getLogger(__name__) - _LOG.trace("This works without type checker errors") - _LOG.debug("Standard logging methods also work") - ``` - """ - return logging.getLogger(name) - - -def test_logger() -> None: - print("# Testing logger ...") - print("effective level=", _LOG.getEffectiveLevel()) - # - if hasattr(_LOG, "trace"): - if hasattr(logging, "TRACE"): - _LOG.trace("TRACE=%s", logging.TRACE) - else: - _LOG.trace("TRACE level not available") - # - _LOG.debug("DEBUG=%s", logging.DEBUG) - # - _LOG.info("INFO=%s", logging.INFO) - # - _LOG.warning("WARNING=%s", logging.WARNING) - # - _LOG.error("ERROR=%s", logging.ERROR) - # - _LOG.critical("CRITICAL=%s", logging.CRITICAL) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi deleted file mode 100644 index 993f9cc14..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hlogging.pyi +++ /dev/null @@ -1,14 +0,0 @@ -""" -Type stub for hlogging module with custom Logger that includes trace method. -""" - -import logging -from typing import Any - -class Logger(logging.Logger): - """ - Custom Logger class that includes trace method. - """ - def trace(self, msg: str, *args: Any, **kwargs: Any) -> None: ... - -def getLogger(name: str) -> Logger: ... diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py deleted file mode 100644 index 07fe8d14f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown as hmarkdo -""" - -from helpers.hmarkdown_bullets import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_coloring import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_comments import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_div_blocks import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_fenced_blocks import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_filtering import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_formatting import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_headers import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_rules import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_slides import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_tables import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hmarkdown_toc import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py deleted file mode 100644 index 0edb705a4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_bullets.py +++ /dev/null @@ -1,248 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_bullets as hmarbull -""" - -import logging -import re -from typing import Generator, List, Tuple - -from helpers.hmarkdown_comments import ( - process_comment_block, - process_single_line_comment, -) - -_LOG = logging.getLogger(__name__) - -_TRACE = False - -# ############################################################################# -# Formatting markdown -# ############################################################################# - - -# These are the colors that are supported by Latex / markdown, are readable on -# white, and form an equidistant color palette. -_ALL_COLORS = [ - "red", - "orange", - "brown", - "olive", - "green", - "teal", - "cyan", - "blue", - "violet", - "darkgray", - "gray", -] - - -# TODO(gp): -> hmarkdown_color.py? -# TODO(gp): This seems the same as `_colorize_bullet_points()`. -def colorize_bold_text( - markdown_text: str, color_sequence: bool, *, use_abbreviations: bool = True -) -> str: - r""" - Add colors to bold text in markdown using equidistant colors from an array. - - The function finds all bold text (enclosed in ** or __) and adds - LaTeX color commands while preserving the rest of the markdown - unchanged. - - :param markdown_text: Input markdown text - :param color_sequence: Sequence of colors to use - :param use_abbreviations: Use LaTeX abbreviations for colors, - `\red{text}` instead of `\textcolor{red}{text}` - :return: Markdown text with colored bold sections - """ - # Remove any existing color formatting. - # Remove \color{text} format. - markdown_text = re.sub(r"\\[a-z]+\{([^}]+)\}", r"\1", markdown_text) - # Remove \textcolor{color}{text} format. - markdown_text = re.sub( - r"\\textcolor\{[^}]+\}\{([^}]+)\}", r"\1", markdown_text - ) - # Find all bold text (both ** and __ formats). - bold_pattern = r"\*\*(.*?)\*\*|__(.*?)__" - # matches will look like: - # For **text**: group(1)='text', group(2)=None. - # For __text__: group(1)=None, group(2)='text'. - matches = list(re.finditer(bold_pattern, markdown_text)) - if not matches: - return markdown_text - result = markdown_text - # Calculate color spacing to use equidistant colors. - if color_sequence == "equidistant": - color_step = len(_ALL_COLORS) / len(matches) - elif color_sequence == "fixed": - color_step = 1 - else: - raise ValueError(f"Invalid color sequence: {color_sequence}") - # Process matches in reverse to not mess up string indices. - for i, match in enumerate(reversed(matches)): - # Get the matched bold text (either ** or __ format). - bold_text = match.group(1) or match.group(2) - # Calculate `color_idx` using equidistant spacing. - color_idx = int((len(matches) - 1 - i) * color_step) % len(_ALL_COLORS) - color = _ALL_COLORS[color_idx] - # Create the colored version. - if use_abbreviations: - # E.g., \red{text} - colored_text = f"\\{color}{{{bold_text}}}" - else: - # E.g., \textcolor{red}{text} - colored_text = f"\\textcolor{{{color}}}{{{bold_text}}}" - # Apply bold. - colored_text = f"**{colored_text}**" - # Replace in the original text. - result = result[: match.start()] + colored_text + result[match.end() :] - return result - - -def remove_bullets(markdown_text: str) -> str: - """ - Remove bullet points (dashes) and leading spaces from markdown text. - - This function removes all leading dashes (`-`) from lines and removes - leading whitespace. Empty lines are preserved. - - :param markdown_text: Input markdown text - :return: Markdown text with bullets removed - """ - lines = markdown_text.split("\n") - result = [] - for line in lines: - # Check if line is not empty. - if line.strip(): - # Remove leading whitespace. - stripped_line = line.lstrip() - # Check if line starts with a bullet point. - if stripped_line.startswith("- "): - # Remove the bullet and the space after it. - result.append(stripped_line[2:]) - else: - # Keep the line as is (no leading whitespace). - result.append(stripped_line) - else: - # Preserve empty lines. - result.append("") - return "\n".join(result) - - -def format_first_level_bullets(markdown_text: str) -> str: - """ - Add empty lines only before first level bullets and remove all empty lines - from markdown text. - - :param markdown_text: Input markdown text - :return: Formatted markdown text - """ - # Split into lines and remove empty ones. - lines = [line for line in markdown_text.split("\n") if line.strip()] - # Add empty lines only before first level bullets. - result = [] - for i, line in enumerate(lines): - # Check if current line is a first level bullet (no indentation). - if re.match(r"^- ", line): - # Add empty line before first level bullet if not at start. - if i > 0: - result.append("") - result.append(line) - return "\n".join(result) - - -def process_code_block( - line: str, in_code_block: bool, i: int, lines: List[str] -) -> Tuple[bool, bool, List[str]]: - """ - Process lines of text to handle code blocks that start and end with '```'. - - The transformation is to: - - add an empty line before the start/end of the code - - indent the code block with four spaces - - replace '//' with '# ' to comment out lines in Python code - - :param line: The current line of text being processed. - :param in_code_block: A flag indicating if the function is currently - inside a code block. - :param i: The index of the current line in the list of lines. - :param lines: the lines of text to process - :return: tuple containing: - - `do_continue`: whether to continue processing the current line or skip - it - - `in_code_block`: boolean indicating whether the function is currently - inside a code block - - list of processed lines of text - """ - out: List[str] = [] - do_continue = False - # Look for a code block. - if re.match(r"^(\s*)```", line): - _LOG.debug(" -> code block") - in_code_block = not in_code_block - # Add empty line before the start of the code block. - if ( - in_code_block - and (i + 1 < len(lines)) - and re.match(r"\s*", lines[i + 1]) - ): - out.append("\n") - out.append(" " + line) - if ( - not in_code_block - and (i + 1 < len(lines)) - and re.match(r"\s*", lines[i + 1]) - ): - out.append("\n") - do_continue = True - return do_continue, in_code_block, out - if in_code_block: - line = line.replace("// ", "# ") - out.append(" " + line) - # We don't do any of the other post-processing. - do_continue = True - return do_continue, in_code_block, out - return do_continue, in_code_block, out - - -# TODO(gp): -> iterator -# TODO(gp): where is this used? -def process_lines(lines: List[str]) -> Generator[Tuple[int, str], None, None]: - """ - Process lines of text to handle comment blocks, code blocks, and single - line comments. - - :param lines: list of all the lines of text being processed - :return: generator of processed lines of text - """ - out: List[str] = [] - in_skip_block = False - in_code_block = False - for i, line in enumerate(lines): - _LOG.debug("%s:line=%s", i, line) - # 1) Remove comment block. - if _TRACE: - _LOG.debug("# 1) Process comment block.") - do_continue, in_skip_block = process_comment_block(line, in_skip_block) - if do_continue: - continue - # 2) Remove code block. - if _TRACE: - _LOG.debug("# 2) Process code block.") - do_continue, in_code_block, out_tmp = process_code_block( - line, in_code_block, i, lines - ) - out.extend(out_tmp) - if do_continue: - continue - # 3) Remove single line comment. - if _TRACE: - _LOG.debug("# 3) Process single line comment.") - do_continue = process_single_line_comment(line) - if do_continue: - continue - out.append(line) - # - yield from enumerate(out) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py deleted file mode 100644 index ba7278726..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_coloring.py +++ /dev/null @@ -1,286 +0,0 @@ -""" -Utilities for colorizing markdown and LaTeX text with color commands. - -Import as: - -import helpers.hmarkdown_coloring as hmarcolo -""" - -import logging -import re -from typing import Dict, List, Optional - -import helpers.hdbg as hdbg -from helpers.hmarkdown_fenced_blocks import ( - replace_fenced_blocks_with_tags, - replace_tags_with_fenced_blocks, -) -from helpers.hmarkdown_tables import ( - replace_tables_with_tags, - replace_tags_with_tables, -) - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Colorize -# ############################################################################# - -# Mapping of markdown color names to their LaTeX color equivalents for use in -# \textcolor{} commands. -_MD_COLORS_LATEX_MAPPING = { - "red": "red", - "orange": "orange", - "yellow": "yellow", - "lime": "lime", - "green": "darkgreen", - "teal": "teal", - "cyan": "cyan", - "blue": "blue", - "purple": "purple", - "violet": "violet", - "magenta": "magenta", - "pink": "pink", - "brown": "brown", - "olive": "olive", - "gray": "gray", - "darkgray": "darkgray", - "lightgray": "lightgray", - "black": "black", - "white": "white", -} - - -def get_md_colors_latex_mapping() -> Dict[str, str]: - """ - Get a copy of the markdown-to-LaTeX color mapping. - - :return: Dict mapping color names (e.g., 'red', 'blue') to LaTeX color names - """ - return dict(_MD_COLORS_LATEX_MAPPING) - - -# Curated list of colors that are visually distinguishable and work well in -# both markdown and LaTeX contexts (excludes ones which are too light or have -# poor contrast). -_MD_COLORS = [ - "red", - "orange", - # "yellow", - # "lime", - "green", - "teal", - "cyan", - "blue", - # "purple", - "violet", - "magenta", - # "pink", - "brown", - "olive", - "gray", - "darkgray", - # "lightgray", - "black", - # "white", -] - - -def get_md_colors() -> List[str]: - """ - Get a copy of the curated list of markdown colors. - - :return: List of color names suitable for colorizing markdown/LaTeX - """ - return list(_MD_COLORS) - - -def process_color_commands(in_line: str) -> str: - r""" - Transform color commands like `\red{xyz}` into valid LaTeX syntax. - - If the content is text (not math), wraps it in `\text{}`. - - E.g.: - - `\red{abc}` -> `\textcolor{red}{\text{abc}}` - - `\blue{x + y}` -> `\textcolor{blue}{x + y}` - - :param in_line: input line to process - :return: line with color commands transformed - """ - for md_color, latex_color in get_md_colors_latex_mapping().items(): - # This regex matches color commands like \red{content}, \blue{content}, - # etc. - pattern = re.compile( - rf""" - \\{md_color} # Match the color command (e.g., \red, \blue, etc.). - \{{ # Match the opening curly brace. - ([^}}]*) # Capture everything inside the curly braces. - \}} # Match the closing curly brace. - """, - re.VERBOSE, - ) - - def _replacement(match: re.Match, latex_color: str) -> str: - """ - Replace a color command with LaTeX \textcolor directive. - """ - content = match.group(1) - # Math expressions (containing operators, brackets, etc.) render - # directly; plain text needs \text{} wrapper for proper LaTeX rendering. - is_math_expr = any(c in content for c in "+-*/=<>{}[]()^_") - if is_math_expr: - ret = rf"\textcolor{{{latex_color}}}{{{content}}}" - else: - ret = rf"\textcolor{{{latex_color}}}{{\text{{{content}}}}}" - return ret - - # Replace the color command with the LaTeX color command. - in_line = re.sub( - pattern, lambda m: _replacement(m, latex_color), in_line - ) - return in_line - - -def has_color_command(text: str) -> bool: - """ - Check if text contains any color commands like `\\red{...}` or `\\blue{...}`. - - :param text: text to check - :return: True if text contains at least one color command - """ - hdbg.dassert_isinstance(text, str) - # hdbg.dassert_not_in("\n", line) - for color in _MD_COLORS_LATEX_MAPPING.keys(): - # This regex matches LaTeX color commands like \red{content}, - # \blue{content}, etc. - pattern = re.compile( - rf""" - \\{color} # Match the color command (e.g., \red, \blue, etc.). - \{{ # Match the opening curly brace. - ([^}}]*) # Capture everything inside the curly braces. - \}} # Match the closing curly brace. - """, - re.VERBOSE, - ) - if re.search(pattern, text): - return True - return False - - -# TODO(gp): -> List[str] -# TODO(gp): Use hmarkdown.process_lines() and test it. -def colorize_bullet_points_in_slide( - txt: str, - *, - use_abbreviations: bool = True, - interpolate_colors: bool = False, - all_md_colors: Optional[List[str]] = None, -) -> str: - r""" - Colorize bold markdown items `**text**` with color commands. - - Scans the text line-by-line for bold markdown items and wraps each in a - color command (e.g., `**\red{text}**`). Skips code blocks and tables to - preserve their formatting. Bold items are colored sequentially using the - provided color list. - - :param txt: Markdown text containing bold items to colorize - :param use_abbreviations: - - If True, use abbreviated color syntax (e.g., `\red{foo}`) - - If False, use full LaTeX syntax (e.g., `\textcolor{red}{foo}`) - :param interpolate_colors: - - If True, evenly space selected colors across all bold items - - If False, use a predefined sequence for common counts (1-4 items get - fixed color sets, more items cycle through all_md_colors) - :param all_md_colors: List of available colors to cycle through - - Default: curated list from `get_md_colors()` - :return: Markdown text with bold items wrapped in color commands - """ - hdbg.dassert_isinstance(txt, str) - if all_md_colors is None: - all_md_colors = list(get_md_colors()) - # Strip code blocks and tables to avoid colorizing content inside them. - lines = txt.split("\n") - lines, fence_map = replace_fenced_blocks_with_tags(lines) - _LOG.debug("Found %s fenced blocks", len(fence_map)) - lines, table_map = replace_tables_with_tags(lines) - _LOG.debug("Found %s tables", len(table_map)) - # Count bold markers (**) to determine how many bold items exist. - tot_bold = 0 - # Scan the text line by line and count how many bold items there are. - for line in lines: - # Count the number of bold items. - num_bold = len(re.findall(r"\*\*", line)) - tot_bold += num_bold - _LOG.debug("tot_bold=%s", tot_bold) - if tot_bold == 0: - return txt - # Divide by 2 since each bold item is wrapped with ** on both sides. - # hdbg.dassert_eq(tot_bold % 2, 0, "tot_bold=%s needs to be even", tot_bold) - num_bolds = tot_bold // 2 - - def _interpolate_colors(num_bolds: int) -> List[str]: - """ - Sample colors evenly spaced to cover all bold items distinctly. - """ - step = len(all_md_colors) // num_bolds - colors = list(all_md_colors)[::step][:num_bolds] - return colors - - if interpolate_colors: - colors = _interpolate_colors(num_bolds) - else: - # Use fixed color sequences for small numbers of bold items; for larger - # counts, cycle through the available colors. - if num_bolds == 1: - colors = ["red"] - elif num_bolds == 2: - colors = ["red", "blue"] - elif num_bolds == 3: - colors = ["red", "green", "blue"] - elif num_bolds == 4: - colors = ["red", "green", "blue", "violet"] - else: - colors = all_md_colors[:num_bolds] - _LOG.debug("colors=%s", colors) - hdbg.dassert_lte( - num_bolds, len(colors), "Number of bold items exceeds available colors" - ) - color_idx = 0 - txt_out = [] - for line in lines: - - def color_replacer(match: re.Match[str]) -> str: - """ - Replace strings like "**foo**" with strings like "**\red{foo}**". - """ - nonlocal color_idx - text = match.group(1) - hdbg.dassert_lte( - color_idx, - len(colors), - "Color index out of bounds; not enough colors assigned", - ) - color_to_use = colors[color_idx] - hdbg.dassert_in( - color_to_use, - get_md_colors_latex_mapping(), - "Selected color is not in the color mapping", - ) - latex_color = get_md_colors_latex_mapping()[color_to_use] - color_idx += 1 - if use_abbreviations: - ret = f"**\\{color_to_use}{{{text}}}**" - else: - ret = f"**\\textcolor{{{latex_color}}}{{{text}}}**" - return ret - - line = re.sub(r"\*\*([^*]+)\*\*", color_replacer, line) - txt_out.append(line) - # Restore code blocks and tables that were temporarily replaced with tags. - txt_out = replace_tags_with_fenced_blocks(txt_out, fence_map) - txt_out = replace_tags_with_tables(txt_out, table_map) - txt_out = "\n".join(txt_out) - return txt_out diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py deleted file mode 100644 index 5b626a15a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_comments.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_comments as hmarcomm -""" - -import logging -import re -from typing import Tuple - -import helpers.hdbg as hdbg -from helpers.hmarkdown_headers import is_markdown_line_separator - -_LOG = logging.getLogger(__name__) - - -def process_single_line_comment(line: str) -> bool: - """ - Handle single line comment. - - We need to do it after the '//' in code blocks have been handled. - - :param line: line of text to process - :return: whether to continue processing the line or skip it - """ - do_continue = False - if line.startswith(r"%%") or line.startswith(r"//"): - do_continue = True - _LOG.debug(" -> do_continue=True") - return do_continue - # Skip frame. - if is_markdown_line_separator(line): - do_continue = True - _LOG.debug(" -> do_continue=True") - return do_continue - # Nothing to do. - return do_continue - - -def process_comment_block(line: str, in_skip_block: bool) -> Tuple[bool, bool]: - """ - Process lines of text to identify blocks that start with '' or '*/'. - - :param line: current line of text being processed - :param in_skip_block: flag indicating if the function is currently - inside a comment block - :return: tuple containing: - - `do_continue`: whether to continue processing the current line or skip - it - - `in_skip_block`: boolean indicating whether the function is currently - inside a comment block - """ - do_continue = False - if line.startswith(r"") or re.search(r"^\s*\*\/", line): - # End skipping comments. - in_skip_block = False - # Skip comment. - _LOG.debug(" -> skip") - do_continue = True - return do_continue, in_skip_block diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py deleted file mode 100644 index 169e06624..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_div_blocks.py +++ /dev/null @@ -1,132 +0,0 @@ -""" -Utilities for handling div blocks in markdown files. - -This module provides functions to add and remove prettier-ignore comments -around div blocks in markdown files. - -Import as: - -import helpers.hmarkdown_div_blocks as hmadiblo -""" - -from typing import List, Tuple - - -def _split_lines_into_chunks( - lines: List[str], -) -> List[Tuple[bool, List[str]]]: - """ - Split lines into chunks of div blocks and non-div blocks. - - A div block starts with a line containing ::: and ends with another - line containing :::. - - :param lines: List of strings representing lines in a markdown file. - :return: List of tuples (is_div_block, chunk_lines) where is_div_block - indicates if the chunk is a div block. - """ - chunks = [] - i = 0 - while i < len(lines): - line = lines[i] - # Check if this line starts a div block. - if line.strip().startswith(":::"): - # Look ahead to find the closing div block. - j = i + 1 - while j < len(lines): - if lines[j].strip().startswith(":::"): - # Found the end of the div block. - chunk_lines = lines[i : j + 1] - chunks.append((True, chunk_lines)) - i = j + 1 - break - j += 1 - else: - # No closing div block found, treat as regular line. - chunks.append((False, [line])) - i += 1 - else: - # Start a non-div block chunk. - chunk_lines = [line] - i += 1 - # Continue collecting non-div lines. - while i < len(lines) and not lines[i].strip().startswith(":::"): - chunk_lines.append(lines[i]) - i += 1 - chunks.append((False, chunk_lines)) - return chunks - - -def add_prettier_ignore_to_div_blocks(lines: List[str]) -> List[str]: - """ - Add prettier-ignore comments around div blocks. - - A div block starts with a line containing ::: and has another line - with ::: following it. - - Examples of div blocks: - - :::: - ::::{.column width=40%} - - :::columns - ::::{.column width=60%} - - :::: - ::: - - :param lines: List of strings representing lines in a markdown file. - :return: List of strings with prettier-ignore comments added. - """ - # Step 1: Split into chunks. - chunks = _split_lines_into_chunks(lines) - # Step 2: Process chunks and add prettier-ignore comments. - result = [] - for is_div_block, chunk_lines in chunks: - if is_div_block: - # Add prettier-ignore comments around div blocks. - result.append("") - result.append("") - result.extend(chunk_lines) - result.append("") - result.append("") - else: - # Add non-div block lines as-is. - result.extend(chunk_lines) - return result - - -def remove_prettier_ignore_from_div_blocks(lines: List[str]) -> List[str]: - """ - Remove all prettier-ignore comments from lines. - - This function removes: - - lines - - lines - - Empty lines before prettier-ignore-start - - Empty lines after prettier-ignore-end - - :param lines: List of strings representing lines in a markdown file. - :return: List of strings with prettier-ignore comments removed. - """ - result = [] - i = 0 - while i < len(lines): - line = lines[i] - # Check if this is a prettier-ignore-start comment. - if line.strip() == "": - # Remove empty line before prettier-ignore-start if present. - if result and result[-1] == "": - result.pop() - # Skip the prettier-ignore-start line. - i += 1 - continue - # Check if this is a prettier-ignore-end comment. - if line.strip() == "": - # Skip the prettier-ignore-end line. - i += 1 - # Skip empty line after prettier-ignore-end if present. - if i < len(lines) and lines[i] == "": - i += 1 - continue - # Add all other lines. - result.append(line) - i += 1 - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py deleted file mode 100644 index 8d3614b9b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_fenced_blocks.py +++ /dev/null @@ -1,131 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_fenced_blocks as hmafeblo -""" - -import logging -import pprint -import re -from typing import Dict, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Add a decorator like in hprint to process both strings and lists -# of strings. - - -def replace_fenced_blocks_with_tags( - lines: List[str], -) -> Tuple[List[str], Dict[str, str]]: - """ - Replace fenced blocks with a tag and return the mapping from tags to the - fenced block text. - - E.g., - ```` - hello - world - ```python - foo - ``` - bye - ```` - is replaced with: - ``` - hello - world - - bye - ``` - - :param lines: list of lines to process - :return: tuple containing: - - list of lines with the fenced blocks replaced by tags - - mapping from tags to the fenced block text - """ - hdbg.dassert_isinstance(lines, list) - result = [] - # True if we are inside a fenced block. - in_fenced_block = False - # Count the number of fenced blocks found. - fenced_block_count = 0 - # Store the mapping between the block number and the fence type. - fence_map = {} - # Store the text of the fenced block. - fence_depth = 0 - fence_text = [] - for i, line in enumerate(lines): - _LOG.debug("%d:line='%s'", i, line) - _LOG.debug( - " " - + hprint.to_str("fenced_block_count in_fenced_block fence_depth") - ) - # Look for the start of a fenced block. - fence_match = re.match(r"^\s*(`{3,})", line) - if fence_match: - _LOG.debug(" -> fence_match") - curr_fence_depth = len(fence_match.group(0)) - if not in_fenced_block: - # Start of a fenced block. - _LOG.debug(" -> start of fenced block") - in_fenced_block = True - fence_depth = curr_fence_depth - fenced_block_count += 1 - fence_text.append(line) - else: - # We are already in a fenced block. - fence_text.append(line) - if curr_fence_depth == fence_depth: - # End of block found. - _LOG.debug(" -> end of fenced block") - in_fenced_block = False - # Replace nested code block markers with tag. - result.append(f"") - fence_map[str(fenced_block_count)] = "\n".join(fence_text) - _LOG.debug(" -> added to fence_map") - # Reset state. - fence_depth = 0 - fence_text = [] - else: - if in_fenced_block: - _LOG.debug(" -> in_fenced_block") - fence_text.append(line) - else: - result.append(line) - return result, fence_map - - -def replace_tags_with_fenced_blocks( - lines: List[str], fence_map: Dict[str, str] -) -> List[str]: - """ - Replace tags with fenced blocks. - - :param lines: list of lines to process - :param fence_map: mapping from tags to fenced block text - :return: list of lines with tags replaced by fenced blocks - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_isinstance(fence_map, dict) - result = [] - for line in lines: - if line.startswith("")[0] - hdbg.dassert_in(tag, fence_map, "Found unmatched tag %s", tag) - result.append(fence_map[tag]) - del fence_map[tag] - else: - result.append(line) - hdbg.dassert_eq( - len(fence_map), - 0, - "Found %s unmatched tags:\n%s", - len(fence_map), - pprint.pformat(fence_map), - ) - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py deleted file mode 100644 index 666c3d03b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_filtering.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_filtering as hmarfilt -""" - -import logging -import re -from typing import List, Tuple - -import helpers.hdbg as hdbg -from helpers.hmarkdown_headers import ( - extract_section_from_markdown, -) -from helpers.hmarkdown_slides import extract_slides_from_markdown - -_LOG = logging.getLogger(__name__) - - -def filter_by_header(lines: List[str], header: str) -> List[str]: - """ - Extract a specific header from markdown text. - - :param lines: list of markdown lines to be processed - :param header: header to filter by (e.g., `# Introduction`) - :return: filtered lines - """ - hdbg.dassert_isinstance(lines, list) - # Filter by header. - txt_lines = extract_section_from_markdown(lines, header) - hdbg.dassert_isinstance(txt_lines, list) - return txt_lines - - -def _parse_range(range_as_str: str, max_value: int) -> Tuple[int, int]: - """ - Parse a 0-indexed range string like '0:10' into start and end indices. - - :param range_as_str: string in format 'start:end' where start/end - can be numbers or 'None' (None means 0 for start, max_value for end) - :param max_value: maximum value to use when 'None' is specified for end - :return: tuple of '(start_index, end_index)' as 0-indexed integers - """ - m = re.match(r"^(\S+):(\S+)$", range_as_str) - hdbg.dassert(m, "Invalid range_as_str='%s'", range_as_str) - assert m is not None - start_value, end_value = m.groups() - if start_value.lower() == "none": - start_value = 0 - else: - start_value = int(start_value) - if end_value.lower() == "none": - end_value = max_value - else: - end_value = int(end_value) - return start_value, end_value - - -def filter_by_lines(lines: List[str], filter_by_lines: str) -> List[str]: - """ - Filter the lines of text in `[start_line, end_line[` (0-indexed). - - :param lines: list of lines to be processed - :param filter_by_lines: 0-indexed range string like `0:10`, `0:None`, or `None:10` - :return: filtered lines - """ - hdbg.dassert_isinstance(lines, list) - start_line, end_line = _parse_range(filter_by_lines, len(lines)) - hdbg.dassert_lte(start_line, end_line) - txt = lines[start_line:end_line] - _LOG.warning( - "filter_by_lines='%s' -> lines=[%s:%s]", - filter_by_lines, - start_line, - end_line, - ) - hdbg.dassert_isinstance(txt, list) - return txt - - -def filter_by_slides(lines: List[str], filter_by_slides: str) -> List[str]: - """ - Filter the lines of text in `[start_slide, end_slide[` (0-indexed). - - :param lines: list of lines to be processed - :param filter_by_slides: 0-indexed range string like `0:10`, `0:None`, or `None:10` - :return: filtered lines - """ - hdbg.dassert_isinstance(lines, list) - slides_info, last_line_number = extract_slides_from_markdown(lines) - _LOG.debug("slides_info=%s\n%s", len(slides_info), slides_info) - start_slide, end_slide = _parse_range(filter_by_slides, len(slides_info)) - _LOG.debug("start_slide=%s, end_slide=%s", start_slide, end_slide) - hdbg.dassert_lte(start_slide, end_slide) - hdbg.dassert_lte(end_slide, len(slides_info)) - start_line = slides_info[start_slide].line_number - if end_slide == len(slides_info): - end_line = last_line_number - else: - end_line = slides_info[end_slide].line_number - _LOG.warning( - "filter_by_slides='%s' -> lines=[%s:%s]", - filter_by_slides, - start_line, - end_line, - ) - txt = lines[start_line - 1 : end_line - 1] - hdbg.dassert_isinstance(txt, list) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py deleted file mode 100644 index f3fd1b4a9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_formatting.py +++ /dev/null @@ -1,530 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_formatting as hmarform -""" - -import logging -import re -from typing import List - -import helpers.hdbg as hdbg -import helpers.hmarkdown_headers as hmarhead -import helpers.hmarkdown_slides as hmarslid -import dev_scripts_helpers.dockerize.lib_prettier as dshdlipr - -_LOG = logging.getLogger(__name__) - - -def remove_end_of_line_periods(lines: List[str]) -> List[str]: - """ - Remove periods at the end of each line in the given text. - - :param lines: list of input lines to process - :return: lines with end-of-line periods removed - """ - hdbg.dassert_isinstance(lines, list) - txt_out = [line.rstrip(".") for line in lines] - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -def remove_empty_lines(lines: List[str]) -> List[str]: - """ - Remove empty lines from the given text. - - :param lines: list of input lines to process - :return: lines with empty lines removed - """ - hdbg.dassert_isinstance(lines, list) - txt_out = [line for line in lines if line != ""] - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -# def remove_gdoc_artifacts(lines: List[str]) -> List[str]: -# """ -# Remove empty lines from the given text. - -# :param lines: list of input lines to process -# :return: lines with empty lines removed -# """ -# hdbg.dassert_isinstance(lines, list) -# # Remove “” and …. -# lines = re.sub(r"“", '"', lines) -# lines = re.sub(r"”", '"', lines) -# lines = re.sub(r"’", "'", lines) -# lines = re.sub(r"…", "", lines) -# hdbg.dassert_isinstance(lines, list) -# return lines - - -# TODO(gp): Add tests. -def remove_code_delimiters(lines: List[str]) -> List[str]: - """ - Remove ```python and ``` delimiters from a given text. - - :param lines: list of input lines containing code delimiters - :return: lines with the code delimiters removed - """ - hdbg.dassert_isinstance(lines, list) - # Join lines back to text, apply regex logic, then split again. - txt = "\n".join(lines) - # Replace the ```python and ``` delimiters with empty strings. - txt_out = txt.replace("```python", "").replace("```", "") - txt_out = txt_out.strip() - # Remove the numbers at the beginning of the line, if needed - # E.g., `3: """` -> `"""`. - txt_out = re.sub(r"(^\d+: )", "", txt_out, flags=re.MULTILINE) - # Split back into lines. - result = txt_out.split("\n") if txt_out else [] - hdbg.dassert_isinstance(result, list) - return result - - -def add_line_numbers(lines: List[str]) -> List[str]: - """ - Add line numbers to each line of text. - - :param lines: list of input lines to process - :return: lines with line numbers added - """ - hdbg.dassert_isinstance(lines, list) - numbered_lines = [] - for i, line in enumerate(lines, 1): - numbered_lines.append(f"{i}: {line}") - hdbg.dassert_isinstance(numbered_lines, list) - return numbered_lines - - -def remove_formatting(txt: str) -> str: - """ - Remove markdown and LaTeX formatting from text. - - :param txt: input text to process - :return: text with formatting removed - """ - # Replace bold markdown syntax with plain text. - txt = re.sub(r"\*\*(.*?)\*\*", r"\1", txt) - # Replace italic markdown syntax with plain text. - txt = re.sub(r"\*(.*?)\*", r"\1", txt) - # Remove \textcolor{red}{ ... }. - txt = re.sub(r"\\textcolor\{(.*?)\}\{(.*?)\}", r"\2", txt) - # Remove \red{ ... }. - txt = re.sub(r"\\\S+\{(.*?)\}", r"\1", txt) - return txt - - -def md_clean_up(txt: str) -> str: - """ - Clean up a Markdown file copy-pasted from Google Docs, ChatGPT. - - :param txt: input text to process - :return: text with the cleaning up applied - """ - # 0) General formatting. - # Remove dot at the end of each line. - txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) - # 1) ChatGPT formatting. - # E.g.,`` • Description Logics (DLs) are a family`` - # Replace `•` with `-` - txt = re.sub(r"•\s+", r"- ", txt) - # Replace `\t` with 2 spaces - txt = re.sub(r"\t", r" ", txt) - # Remove `⋅`. - txt = re.sub(r"⸻", r"", txt) - # “ - txt = re.sub(r"“", r'"', txt) - # ” - txt = re.sub(r"”", r'"', txt) - # ’ - txt = re.sub(r"’", r"'", txt) - # … - txt = re.sub(r"…", r"...", txt) - # 2) Latex formatting. - # Replace \( ... \) math syntax with $ ... $. - txt = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", txt) - # Replace \[ ... \] math syntax with $$ ... $$, handling multiline equations. - txt = re.sub(r"\\\[(.*?)\\\]", r"$$\1$$", txt, flags=re.DOTALL) - # Replace `P(.)`` with `\Pr(.)`. - txt = re.sub(r"P\((.*?)\)", r"\\Pr(\1)", txt) - # - txt = re.sub(r"\\left\[", r"[", txt) - txt = re.sub(r"\\right\]", r"]", txt) - # - txt = re.sub(r"\\mid", r"|", txt) - # - txt = re.sub(r"→", r"$\\rightarrow$", txt) - # Remove empty spaces at beginning / end of Latex equations $...$. - # E.g., $ \text{Student} $ becomes $\text{Student}$ - # txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) - # Transform `Example: Training a deep` into `E.g., training a deep`, - # converting the word after `Example:` to lower case. - txt = re.sub(r"\bExample:", "E.g.,", txt) - txt = re.sub(r"\bE.g.,\s+(\w)", lambda m: "E.g., " + m.group(1).lower(), txt) - return txt - - -def remove_empty_lines_from_markdown(lines: List[str]) -> List[str]: - """ - Remove all empty lines from markdown text. - - :param lines: list of input markdown lines - :return: formatted markdown lines - """ - hdbg.dassert_isinstance(lines, list) - # Remove empty lines. - result = [line for line in lines if line.strip()] - hdbg.dassert_isinstance(result, list) - return result - - -def prettier_markdown(txt: str) -> str: - """ - Format markdown text using `prettier`. - - :param txt: input text to format - :return: formatted text - """ - file_type = "md" - txt = dshdlipr.prettier_on_str(txt, file_type) - return txt - - -def format_markdown(txt: str) -> str: - """ - Format markdown text. - - :param txt: input text to format - :return: formatted text - """ - file_type = "md" - txt = dshdlipr.prettier_on_str(txt, file_type) - lines = txt.split("\n") - clean_lines = remove_empty_lines_from_markdown(lines) - txt = "\n".join(clean_lines) - return txt - - -def bold_first_level_bullets( - lines: List[str], *, max_length: int = 30 -) -> List[str]: - """ - Make first-level bullets bold in markdown text. - - :param lines: list of input markdown lines - :param max_length: max length of the bullet text to be bolded. The - value '-1' means no limit - :return: formatted markdown lines with first-level bullets in bold - """ - hdbg.dassert_isinstance(lines, list) - result = [] - for line in lines: - # Check if this is a first-level bullet point. - if re.match(r"^\s*- ", line): - # Check if the line has already bold text it in it. - if not re.search(r"\*\*", line): - # Bold first-level bullets. - indentation = len(line) - len(line.lstrip()) - if indentation == 0: - # First-level bullet, add bold markers. - m = re.match(r"^(\s*-\s+)(.*)", line) - hdbg.dassert(m, "Can't parse line='%s'", line) - bullet_text = m.group(2) # type: ignore[union-attr] - if max_length > -1 and len(bullet_text) <= max_length: - spaces = m.group(1) # type: ignore[union-attr] - line = spaces + "**" + bullet_text + "**" - result.append(line) - hdbg.dassert_isinstance(result, list) - return result - - -def format_figures(lines: List[str]) -> List[str]: - """ - Convert markdown slides with figures to use fenced div syntax with column - layout. - - If the input already uses column format or contains no figures, - returns unchanged. - - :param lines: list of input markdown lines - :return: formatted markdown lines with figures in column layout - """ - hdbg.dassert_isinstance(lines, list) - # Check if already in column format. - text = "\n".join(lines) - if "::: columns" in text and ":::: {.column" in text: - return lines - # Find first figure line to split content. - first_figure_idx = -1 - for i, line in enumerate(lines): - if re.match(r"^\s*!\[.*\]\(.*\)\s*$", line.strip()): - first_figure_idx = i - break - # If no figures found, return original lines unchanged. - if first_figure_idx == -1: - return lines - # Split content: slide titles (lines starting with *) stay outside columns, - # other content before first figure goes to left column, - # everything from first figure onwards goes to right column. - pre_figure_lines = lines[:first_figure_idx] - figure_content = lines[first_figure_idx:] - # Separate slide titles from other content - slide_titles = [] - text_lines = [] - for line in pre_figure_lines: - if line.strip().startswith("*"): - slide_titles.append(line) - else: - text_lines.append(line) - # Remove empty lines at the beginning and end of text_lines. - while text_lines and not text_lines[0].strip(): - text_lines.pop(0) - while text_lines and not text_lines[-1].strip(): - text_lines.pop() - # Build the column format. - result = [] - # Add slide titles first (outside columns) - result.extend(slide_titles) - result.append("::: columns") - result.append(":::: {.column width=65%}") - result.extend(text_lines) - result.append("::::") - result.append(":::: {.column width=40%}") - result.append("") - result.extend(figure_content) - result.append("::::") - result.append(":::") - hdbg.dassert_isinstance(result, list) - return result - - -def format_md_links_to_latex_format(lines: List[str]) -> List[str]: - r""" - Convert markdown links to formatted links with LaTeX styling. - - Convert markdown links: - - Plain URLs: - http://... or https://... - to the format: - [\textcolor{blue}{\underline{URL}}](URL) - - - Existing formatted links: - [Text](URL) - to the format: - [\textcolor{blue}{\underline{Text}}](URL) - - - Email links: - [](email@domain.com) or [](http://...) or [](https://...) - to the format: - [\textcolor{blue}{\underline{URL}}](URL) - - - Picture links - ![](lectures_source/.../lec_4_1_slide_5_image_1.png) - are left untouched - - :param lines: list of input markdown lines - :return: formatted markdown lines with styled links - """ - hdbg.dassert_isinstance(lines, list) - result = [] - # URL regex pattern. - url_pattern = r"https?://[^\s)}\]`]+" - # Pattern for URLs in backticks. - backtick_url_pattern = r"`(https?://[^\s`]+)`" - # Pattern for existing formatted links that need normalization. - # This matches [\textcolor{blue}{\underline{Text}}](URL) where Text != URL. - formatted_link_pattern = ( - r"\[\\textcolor\{blue\}\{\\underline\{([^}]+)\}\}\]\((https?://[^)]+)\)" - ) - # Pattern for markdown links: [Text](URL). - # Matches text that can include escaped underscores (\_ ). - markdown_link_pattern = r"\[((?:[^\]\\]|\\[_])+)\]\((https?://[^\)]+)\)" - # Pattern for email links: [email@domain.com](email@domain.com). - email_link_pattern = r"\[([^\]\\]+@[^\]\\]+)\]\(([^)]+@[^)]+)\)" - # Pattern for empty bracket links: [](URL) or [](email). - empty_bracket_pattern = r"\[\]\(([^\)]+)\)" - # Pattern for image links: ![...](...). - image_link_pattern = r"!\[.*?\]\([^\)]+\)" - for line in lines: - # Process the line for all URL patterns. - processed_line = line - # Store image links temporarily to avoid processing them. - image_placeholders = [] - - def store_image_link(match): - placeholder = f"__IMAGE_LINK_{len(image_placeholders)}__" - image_placeholders.append(match.group(0)) - return placeholder - - processed_line = re.sub( - image_link_pattern, store_image_link, processed_line - ) - - # Convert empty bracket links [](URL) or [](email). - def convert_empty_bracket_link(match): - target = match.group(1) - return rf"[\textcolor{{blue}}{{\underline{{{target}}}}}]({target})" - - processed_line = re.sub( - empty_bracket_pattern, convert_empty_bracket_link, processed_line - ) - - # Convert URLs in backticks. - def convert_backtick_url(match): - url = match.group(1) - return rf"[\textcolor{{blue}}{{\underline{{{url}}}}}]({url})" - - processed_line = re.sub( - backtick_url_pattern, convert_backtick_url, processed_line - ) - - # Normalize existing formatted links to keep existing display text. - def normalize_formatted_link(match): - text = match.group(1) - url = match.group(2) - return rf"[\textcolor{{blue}}{{\underline{{{text}}}}}]({url})" - - processed_line = re.sub( - formatted_link_pattern, normalize_formatted_link, processed_line - ) - - # Convert markdown links [Text](URL) to formatted links. - def convert_markdown_link(match): - text = match.group(1) - url = match.group(2) - return rf"[\textcolor{{blue}}{{\underline{{{text}}}}}]({url})" - - processed_line = re.sub( - markdown_link_pattern, convert_markdown_link, processed_line - ) - - # Convert email links [email@domain.com](email@domain.com) to formatted links. - def convert_email_link(match): - email = match.group(2) - return rf"[\textcolor{{blue}}{{\underline{{{email}}}}}]({email})" - - processed_line = re.sub( - email_link_pattern, convert_email_link, processed_line - ) - # Convert plain URLs (but avoid converting URLs that are already part - # of formatted links). - # First, temporarily replace formatted links to avoid interfering with - # them. - temp_placeholders = [] - # Store existing correctly formatted links temporarily. - correct_formatted_link_pattern = ( - r"\[\\textcolor\{blue\}\{\\underline\{([^}]+)\}\}\]\(([^)]+)\)" - ) - - def store_formatted_link(match): - placeholder = f"__FORMATTED_LINK_{len(temp_placeholders)}__" - temp_placeholders.append(match.group(0)) - return placeholder - - temp_line = re.sub( - correct_formatted_link_pattern, store_formatted_link, processed_line - ) - - # Convert remaining plain URLs. - def convert_plain_url(match): - url = match.group(0) - return rf"[\textcolor{{blue}}{{\underline{{{url}}}}}]({url})" - - temp_line = re.sub(url_pattern, convert_plain_url, temp_line) - # Restore formatted links. - for i, placeholder in enumerate(temp_placeholders): - temp_line = temp_line.replace(f"__FORMATTED_LINK_{i}__", placeholder) - # Restore image links. - for i, image_link in enumerate(image_placeholders): - temp_line = temp_line.replace(f"__IMAGE_LINK_{i}__", image_link) - result.append(temp_line) - hdbg.dassert_isinstance(result, list) - return result - - -# TODO(gp): -> format_first_level_bullets_in_slide -def format_first_level_bullets(lines: List[str]) -> List[str]: - """ - Add empty lines to separate first level bullets and remove all remaining - empty lines. - - This is the formatting we use in the slides. - - :param lines: list of input markdown lines - :return: formatted markdown lines - """ - hdbg.dassert_isinstance(lines, list) - # Remove empty lines. - lines_clean = [line for line in lines if line.strip()] - # Handle special case: if input was only empty lines, preserve structure. - if not lines_clean and lines: - return lines - # Add empty lines only before first level bullets. - result = [] - for i, line in enumerate(lines_clean): - # Check if current line is a first level bullet (no indentation). - if re.match(r"^- ", line): - # Add empty line before first level bullet if not at start. - if i > 0: - result.append("") - result.append(line) - hdbg.dassert_isinstance(result, list) - return result - - -# TODO(gp): Implement and add tests. -def format_column_blocks(lines: List[str]) -> List[str]: - """ - # Make sure that there is a single empty line before and after the following - # block: - # - # 1) - # ``` - # ::: columns - # :::: {.column width=55%} - # ``` - # 2) - # ``` - # :::: - # :::: {.column width=40%} - # ``` - # 3) - # ``` - # :::: - # ::: - # ``` - - # - """ - return lines - - -def format_markdown_slide(lines: List[str]) -> List[str]: - """ - Format markdown text for a slide. - - :param lines: input lines to format - :return: formatted slide text - """ - hdbg.dassert_isinstance(lines, list) - if False: - lines = bold_first_level_bullets(lines) - txt = "\n".join(lines) - # Format the markdown slides. - # TODO(gp): Maybe the conversion should be done inside `prettier_on_str` - # passing a marker to indicate that the text is a slide. - lines = hmarslid.convert_slide_to_markdown(lines) - # lines = format_column_blocks() - # - file_type = "md" - txt = "\n".join(lines) - txt = dshdlipr.prettier_on_str(txt, file_type) - # - lines = txt.split("\n") - lines = hmarslid.convert_markdown_to_slide(lines) - # Format the first level bullets. - lines = format_first_level_bullets(lines) - # - lines = hmarhead.capitalize_header(lines) - return lines diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py deleted file mode 100644 index 532de2aee..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_headers.py +++ /dev/null @@ -1,841 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_headers as hmarhead -""" - -import dataclasses -import logging -import re -from typing import List, Optional, Tuple, cast - -import helpers.hdbg as hdbg -import helpers.hparser as hparser -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - -_TRACE = False - - -def is_markdown_line_separator(line: str, *, min_repeats: int = 5) -> bool: - """ - Check if the given line is a Markdown separator. - - This function determines if a line consists of repeated characters - (`#`, `/`, `-`, `=`) that would indicate a markdown separator. - - :param line: current line of text being processed - :param min_repeats: minimum number of times the characters have to - be repeated to be considered a separator, e.g., if `min_repeats - = 2`, then `##`, `###`, `//` are considered to be line - separators, but `#`, `/` are not - :return: whether the line is a separator - """ - separator_pattern = rf""" - \#*\s* # Optional leading `#` and whitespace. - ([#/=\-])\1{{{min_repeats - 1},}} # Capture a character, then repeat it - # (`min_repeats` - 1) times. - \s*$ # Match only whitespace characters - # until the end of the line. - """ - res = bool(re.match(separator_pattern, line, re.VERBOSE)) - return res - - -def is_header(line: str) -> Tuple[bool, int, str]: - """ - Check if the given line is a Markdown header. - - :param line: line to check - :return: tuple containing: - - boolean indicating if the line is a header - - level of the header (`0` if not a header) - - title of the header (empty string if not a header) - """ - # hdbg.dassert(not is_markdown_line_separator(line), "line='%s'", line) - m = re.match(r"(#+)\s+(.*)", line) - is_header_ = bool(m) - if m: - level = len(m.group(1)) - title = m.group(2) - else: - level = 0 - title = "" - return is_header_, level, title - - -# ############################################################################# -# Frame chapters -# ############################################################################# - - -def _has_internal_capitals(word: str) -> bool: - """ - Check if a word has capital letters within it (not just at the start). - - This function detects words like `SimpleFeedForward`, `DeepNPTS` that - should be preserved without title case transformation. - - Note: uppercase letters immediately after an apostrophe are excluded - from this check, since they are not "internal capitals" but rather - normal English capitalization (e.g., "Won'T" has a capital T that is - not internal but rather a grammatical artifact of title case tools). - - :param word: word to check - :return: `True` if the word has internal capitals, `False` otherwise - """ - hdbg.dassert_isinstance(word, str) - # A word has internal capitals if it contains at least one uppercase letter - # after the first character, excluding uppercase letters immediately after - # an apostrophe. - if len(word) <= 1: - return False - for i in range(1, len(word)): - if word[i].isupper() and word[i - 1] != "'": - return True - return False - - -def frame_chapters(lines: List[str], *, max_lev: int = 4) -> List[str]: - """ - Add the frame around each chapter. - """ - hdbg.dassert_isinstance(lines, list) - txt_new: List[str] = [] - # _LOG.debug("lines=%s", lines) - for i, line in enumerate(lines): - _LOG.debug("line=%d:%s", i, line) - m = re.match(r"^(\#+) ", line) - txt_processed = False - if m: - comment = m.group(1) - lev = len(comment) - _LOG.debug(" -> lev=%s", lev) - if lev < max_lev: - sep = comment + " " + "#" * (80 - 1 - len(comment)) - txt_new.append(sep) - txt_new.append(line) - txt_new.append(sep) - txt_processed = True - else: - _LOG.debug( - " -> Skip formatting the chapter frame: lev=%d, max_lev=%d", - lev, - max_lev, - ) - if not txt_processed: - txt_new.append(line) - hdbg.dassert_isinstance(txt_new, list) - return txt_new - - -def has_mixed_case(word: str) -> bool: - """ - Check if a word has capital letters in positions other than the first. - - This detects words like "SimpleFeedForward", "DeepNPTS", etc. that should - be preserved as-is. - - :param word: word to check - :return: True if the word has capital letters after the first position - """ - if len(word) <= 1: - return False - # Check if any character after the first position is uppercase. - return any(c.isupper() for c in word[1:]) - - -def _capitalize_title_word(word: str) -> str: - """ - Capitalize the first letter of a word without capitalizing after apostrophes. - - Python's `str.title()` capitalizes the first letter after ANY non-alphanumeric - character, including apostrophes. For example, `"won't".title()` returns - `"Won'T"` instead of the expected `"Won't"`. - - This function instead capitalizes only the first letter of the word and - lowercases any uppercase letters that follow an apostrophe. - - :param word: word to capitalize - :return: word with proper title case (first letter capitalized, no capitals - after apostrophes) - """ - if not word: - return word - chars = list(word) - chars[0] = chars[0].upper() - for i in range(1, len(chars)): - if chars[i - 1] == "'": - chars[i] = chars[i].lower() - return "".join(chars) - - -def capitalize_header(lines: List[str]) -> List[str]: - """ - Improve the header and slide titles. - - - Headers start with one or more `#`s - - Slide titles start with one `*` - - - The title is transformed to title case as below: - - ML theory -> ML Theory - - A map of machine learning -> A Map of Machine Learning - - Business strategists -> - Business Strategists - - Establish a phased, collaborative approach -> - Establish a Phased, Collaborative Approach - - - Strings inside backticks, single quotes, and double quotes are preserved, - with careful handling to avoid matching apostrophes in contractions. - - Words with internal capital letters are preserved (e.g., SimpleFeedForward, - DeepNPTS). - - Contractions and words with apostrophes are properly capitalized - (e.g., "won't" becomes "Won't", not "Won'T"). - - Headers inside fenced code blocks are not processed. - """ - import helpers.hmarkdown_fenced_blocks as hmafeblo - - hdbg.dassert_isinstance(lines, list) - # Replace fenced blocks with tags to prevent processing headers inside them. - lines_without_fenced, fence_map = hmafeblo.replace_fenced_blocks_with_tags( - lines - ) - txt_new: List[str] = [] - for i, line in enumerate(lines_without_fenced): - # Parse header (starting with `#`) and slide title (starting with `*`). - m = re.match(r"^(\#+|\*) (.*)$", line) - if m: - # Parse the title. - title = m.group(2) - # Transform to title case, leaving words that are all capitalized - # and conjunctions as is, while preserving quoted strings. - non_cap_words = { - "a", - "an", - "and", - "as", - "at", - "but", - "by", - "for", - "in", - "of", - "on", - "or", - "the", - "to", - "vs", - "with", - } - # Find and temporarily replace quoted strings to preserve them. - quoted_strings = [] - placeholders = [] - # Pattern to match strings inside backticks, single quotes, or double quotes. - # Single quotes are matched only when not preceded or followed by word - # characters, to avoid matching apostrophes in contractions like "don't". - # Backtick and double-quote patterns are simpler since they're less likely - # to be used in natural text. - quote_pattern = r""" - ( # Start of alternation - `[^`]*` # Backtick-quoted string - | # OR - (? str: - quoted_strings.append(match.group(0)) - placeholder = f"__QUOTED_{len(quoted_strings) - 1}__" - placeholders.append(placeholder) - return placeholder - - # Replace quoted strings with placeholders. - title_with_placeholders = re.sub( - quote_pattern, replace_quoted, title, flags=re.VERBOSE - ) - # Split into words. - words = title_with_placeholders.split() - # Find the first non-numeric word index to always capitalize it, - # even if it's in non_cap_words (e.g., "4.4 the Victim" -> "4.4 The Victim"). - first_text_word_idx = None - for j, word in enumerate(words): - if word.startswith("__QUOTED_") and word.endswith("__"): - continue - # Skip numeric/punctuation-only prefixes like "4.4", "1.", "1.2.3". - if not re.match(r"^[\d\.\-]+$", word): - first_text_word_idx = j - break - # If all words are numeric, fall back to index 0. - if first_text_word_idx is None and words: - first_text_word_idx = 0 - # Process each word. - for i, word in enumerate(words): - if word.startswith("__QUOTED_") and word.endswith("__"): - # Skip placeholder words, they will be restored later. - continue - elif i == first_text_word_idx and not word.isupper(): - # Capitalize the first text word (may follow numeric prefix - # like "4.4") even if it's in non_cap_words. - if _has_internal_capitals(word): - # Preserve words with internal capitals. - pass - else: - words[i] = _capitalize_title_word(word) - elif word.isupper(): - # Skip words that are all caps (e.g. ML, API). - continue - elif _has_internal_capitals(word): - # Preserve words with internal capitals (e.g., SimpleFeedForward). - pass - elif word.lower() in non_cap_words: - # Don't capitalize conjunctions and other minor words. - words[i] = word.lower() - else: - # Capitalize other words. - words[i] = _capitalize_title_word(word) - title = " ".join(words) - # Restore quoted strings. - for i, placeholder in enumerate(placeholders): - title = title.replace(placeholder, quoted_strings[i]) - # Reconstruct the line. - line = m.group(1) + " " + title - txt_new.append(line) - else: - txt_new.append(line) - # Restore fenced blocks. - txt_new = hmafeblo.replace_tags_with_fenced_blocks(txt_new, fence_map) - hdbg.dassert_isinstance(txt_new, list) - return txt_new - - -# ############################################################################# -# Header processing -# ############################################################################# - - -# TODO(gp): This could be done by processing `HeaderList`. -def extract_section_from_markdown( - lines: List[str], header_name: str -) -> List[str]: - """ - Extract a section of text from a Markdown document based on the header - name. - - The function identifies a section by locating the specified header - and captures all lines until encountering another header of the same - or higher level. Headers are identified by the '#' prefix, and their - level is determined by the number of '#' characters. - - :param lines: markdown content as a list of strings - :param header_name: exact header name to extract (excluding `#` - symbols) - :return: extracted section as a list of strings, including the header line - itself and all lines until the next header of the same or higher - level - """ - hdbg.dassert_isinstance(lines, list) - _LOG.debug(hprint.to_str("lines")) - extracted_lines = [] - # Level of the current header being processed. - current_level: Optional[int] = None - # Flag to indicate if we're inside the desired section. - inside_section: bool = False - found = False - # Process each line in the markdown content. - for line in lines: - _LOG.debug(hprint.to_str("line")) - # Check if the line is a markdown header. - if line.strip().startswith("#"): - # Determine the level of the header by counting leading '#' - # characters. - header_level = len(line) - len(line.lstrip("#")) - # Extract the actual header text by stripping '#' and surrounding - # whitespace. - header_text = line.strip("#").strip() - _LOG.debug(hprint.to_str("header_level, header_text")) - # Handle the end of the desired section when encountering another - # header. - if inside_section: - hdbg.dassert_is_not(current_level, None) - current_level = cast(int, current_level) - if header_level <= current_level: - break - # Check if the current line is the desired header. - if header_text == header_name: - found = True - # Set the level of the matched header. - current_level = header_level - # Mark that we are now inside the desired section. - inside_section = True - # Add the line to the output if inside the desired section. - if inside_section: - extracted_lines.append(line) - _LOG.debug(hprint.to_str("extracted_lines")) - if not found: - raise ValueError(f"Header '{header_name}' not found") - hdbg.dassert_isinstance(extracted_lines, list) - return extracted_lines - - -# ############################################################################# -# HeaderInfo -# ############################################################################# - - -@dataclasses.dataclass -class HeaderInfo: - """ - Store the header level, the description, and the line number in the - original file. - - E.g., `(1, "Chapter 1", 5)` and `(2, "Section 1.1", 10)` - """ - - level: int - description: str - line_number: int - - def __init__(self, level: int, description: str, line_number: int): - hdbg.dassert_isinstance(level, int) - hdbg.dassert_lte(1, level) - self.level = level - # - hdbg.dassert_isinstance(description, str) - hdbg.dassert_ne( - description, - "", - "Invalid HeaderInfo: %s, %s, %s", - level, - description, - line_number, - ) - self.description = description - # - hdbg.dassert_isinstance(line_number, int) - hdbg.dassert_lte(1, line_number) - self.line_number = line_number - # - self.children: List[HeaderInfo] = [] - - def as_tuple(self) -> Tuple[int, str, int]: - return (self.level, self.description, self.line_number) - - def __repr__(self) -> str: - return ( - f"HeaderInfo({self.level}, '{self.description}', {self.line_number})" - ) - - -HeaderList = List[HeaderInfo] - - -def header_list_to_str(header_list: HeaderList) -> str: - """ - Convert a list of headers into a string. - - :param header_list: list of headers - :return: string representation of the header list - """ - return "\n".join([str(header) for header in header_list]) - - -def sanity_check_header_list(header_list: HeaderList) -> None: - """ - Check that the header list is valid. - - 1) The first header should be level 1. - 2) All level 1 headers are unique. - 3) Check that consecutive elements in the header list only increase by at - most one level at a time (even if it can decrease by multiple levels). - - E.g., the following is valid: - ``` - # Header 1 - # Header 2 - ## Header 2.1 - ## Header 2.2 - # Header 3 - ``` - - E.g., the following is valid: - ``` - # Header1 - ## Header 1.1 - ### Header 1.1.1 - # Header 2 - ``` - - E.g., the following is not valid: - ``` - # Header 1 - ### Header 1.0.1 - # Header 2 - ``` - - :param header_list: list of headers to validate - """ - # 1) The first header should be level 1. - if header_list and header_list[0].level > 1: - _LOG.warning( - "First header '%s' at line %s is not level 1, but %s", - header_list[0].description, - header_list[0].line_number, - header_list[0].level, - ) - # 2) All level 1 headers are unique. - level_1_headers = [ - header.description for header in header_list if header.level == 1 - ] - hdbg.dassert_no_duplicates(level_1_headers) - # 3) Check that consecutive elements in the header list only increase by at - # most one level at a time (even if it can decrease by multiple levels). - if len(header_list) > 1: - for i in range(1, len(header_list)): - hdbg.dassert_isinstance(header_list[i - 1], HeaderInfo) - hdbg.dassert_isinstance(header_list[i], HeaderInfo) - if header_list[i].level - header_list[i - 1].level > 1: - msg = [] - msg.append( - "Consecutive headers increase by more than one level:" - ) - msg.append(f" {header_list[i - 1]}") - msg.append(f" {header_list[i]}") - msg = "\n".join(msg) - raise ValueError(msg) - - -# TODO(gp): Move sanity check outside? -def extract_headers_from_markdown( - lines: List[str], max_level: int, *, sanity_check: bool = True -) -> HeaderList: - """ - Extract headers from Markdown file and return an `HeaderList`. - - :param lines: content of the input Markdown file as list of strings - :param max_level: maximum header levels to parse (e.g., '3' parses all levels - included `###`, but not `####`) - :param sanity_check: whether to check that the header list is valid - :return: generated `HeaderList`, e.g., - ``` - [ - (1, "Chapter 1", 5), - (2, "Section 1.1", 10), ...] - ``` - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_lte(1, max_level) - header_list: HeaderList = [] - # Process the input file to extract headers. - for line_number, line in enumerate(lines, start=1): - # TODO(gp): Use the iterator. - # Skip the visual separators. - if is_markdown_line_separator(line): - continue - # Get the header level and title. - is_header_, level, title = is_header(line) - if is_header_ and level <= max_level: - header_info = HeaderInfo(level, title, line_number) - header_list.append(header_info) - # Check the header list. - if sanity_check: - sanity_check_header_list(header_list) - else: - _LOG.debug("Skipping sanity check") - hdbg.dassert_isinstance(header_list, list) - return header_list - - -def header_list_to_vim_cfile( - markdown_file: str, header_list: HeaderList -) -> List[str]: - """ - Convert a list of headers into a Vim cfile format. - - Use the generated file in Vim as: - `:cfile ` - Use `:cnext` and `:cprev` to navigate between headers. - - :param markdown_file: path to the input Markdown file - :param header_list: list of headers, where each header is a tuple containing - the line number, level, and title - :return: generated cfile content as a list of strings in the format: - ``` - ... - ::
- ... - ``` - """ - hdbg.dassert_isinstance(markdown_file, str) - hdbg.dassert_isinstance(header_list, list) - _LOG.debug(hprint.to_str("header_list")) - output_lines = [ - f"{markdown_file}:{header_info.line_number}:{header_info.description}" - for header_info in header_list - ] - hdbg.dassert_isinstance(output_lines, list) - return output_lines - - -def header_list_to_markdown(header_list: HeaderList, mode: str) -> List[str]: - """ - Convert a list of headers into a Markdown format. - - :param header_list: list of headers, where each header is a tuple - containing the level, title, and line number - :param mode: format of the output: - - `list`: indents headers to create a nested list - - `headers`: uses Markdown header syntax (e.g., '#', '##', '###') - :return: generated Markdown content as a list of strings - """ - hdbg.dassert_isinstance(header_list, list) - _LOG.debug(hprint.to_str("header_list mode")) - output_lines = [] - for header_info in header_list: - level, title, line_number = header_info.as_tuple() - _ = line_number - if mode == "list": - header_prefix = " " * (level - 1) + "-" - elif mode == "headers": - header_prefix = "#" * level - else: - raise ValueError(f"Invalid mode '{mode}'") - output_lines.append(f"{header_prefix} {title}") - hdbg.dassert_isinstance(output_lines, list) - return output_lines - - -# ############################################################################# -# Process headers. -# ############################################################################# - - -def format_headers(lines: List[str], out_file_name: str, max_lev: int) -> None: - """ - Format the headers in the input lines and write the formatted text to the - output file. - - :param lines: list of input lines to process - :param out_file_name: name of the output file to write the formatted - text to - :param max_lev: maximum level of headings to include in the - formatted text - """ - hdbg.dassert_isinstance(lines, list) - txt = lines[:] - # - for line in txt: - m = re.search(r"max_level=(\d+)", line) - if m: - max_lev = int(m.group(1)) - _LOG.warning("Inferred max_level=%s", max_lev) - break - hdbg.dassert_lte(1, max_lev) - # Remove all headings. - txt_tmp = [] - for line in txt: - # Keep the comments. - if not is_markdown_line_separator(line): - txt_tmp.append(line) - txt = txt_tmp[:] - # Add proper heading of the correct length. - txt_tmp = [] - for line in txt: - # Keep comments. - found = False - for i in range(1, max_lev + 1): - if line.startswith("#" * i + " "): - row = "#" * i + " " + "#" * (79 - 1 - i) - txt_tmp.append(row) - txt_tmp.append(line) - txt_tmp.append(row) - found = True - if not found: - txt_tmp.append(line) - # TODO(gp): Remove all empty lines after a heading. - # TODO(gp): Format title (first line capital and then small). - hparser.to_file(txt_tmp, out_file_name) - - -def modify_header_level(lines: List[str], level: int) -> List[str]: - """ - Increase or decrease the level of headings by the specified amount. - - :param lines: input lines to modify - :param level: amount to adjust header levels (positive increases, - negative decreases) - :return: modified lines with header levels adjusted - """ - hdbg.dassert_isinstance(lines, list) - txt_tmp = [] - for line in lines: - # TODO(gp): Use the iterator. - line = line.rstrip(r"\n") - is_header_, current_level, title = is_header(line) - if is_header_: - modified_level = current_level + level - # Ensure modified level is within valid range (1-6 for markdown headers). - hdbg.dassert_lte(1, modified_level) - hdbg.dassert_lte(modified_level, 6) - line = "#" * modified_level + " " + title - txt_tmp.append(line) - hdbg.dassert_isinstance(txt_tmp, list) - return txt_tmp - - -# ############################################################################# -# _HeaderTreeNode -# ############################################################################# - - -# This is a different representation of the data than the one in `HeaderList` -# because it is a tree structure. So we use a different type hint. -_HeaderTree = List[HeaderInfo] - - -def build_header_tree(header_list: HeaderList) -> _HeaderTree: - """ - Build a tree (list of Node objects) from the flat list. - - We assume that the level changes never jump by more than 1. - - :param header_list: flat list of headers - :return: tree structure of headers - """ - tree: _HeaderTree = [] - stack: _HeaderTree = [] - for node in header_list: - if node.level == 1: - tree.append(node) - stack = [node] - else: - # Pop until we find the proper parent: one with level < current - # level. - while stack and stack[-1].level >= node.level: - stack.pop() - if stack: - stack[-1].children.append(node) - else: - tree.append(node) - stack.append(node) - # hdbg.dassert_eq(len(header_list), len(tree)) - # hdbg.dassert_eq(len(stack), 0) - return tree - - -def _find_header_tree_ancestry( - tree: _HeaderTree, level: int, description: str -) -> Optional[_HeaderTree]: - """ - Recursively search for the node matching (level, description). - - If found, return the ancestry as a list from the root down to that - node. Otherwise return None. - - :param tree: header tree to search - :param level: header level to match - :param description: header description to match - :return: ancestry list from root to matching node, or None if not - found - """ - for node in tree: - if node.level == level and node.description == description: - return [node] - result = _find_header_tree_ancestry(node.children, level, description) - if result: - return [node] + result - return None - - -def header_tree_to_str( - tree: _HeaderTree, - ancestry: Optional[_HeaderTree], - *, - open_modifier: str = "**", - close_modifier: str = "**", - indent: int = 0, -) -> str: - """ - Return the tree as a string. - - Only expand (i.e. recursively include children) for a node if it is part of - the ancestry of the selected node. - - :param tree: tree to convert to a string - :param ancestry: ancestry of the selected node - :param open_modifier: modifier to use for the open of the selected node - :param close_modifier: modifier to use for the close of the selected node - :param indent: indent of the tree - :return: string representation of the tree - - - Nodes not in the ancestry are included on one line (even if they have - children). - - The selected node (last in the ancestry) is included highlighted. - """ - prefix = " " * indent + "- " - result = [] - for node in tree: - _LOG.debug(hprint.to_str("node")) - # Check if this node is the next expected one in the ancestry branch. - if ancestry and node is ancestry[0]: - # If this is the last in the ancestry, it is the selected node. - val = prefix - if len(ancestry) == 1: - val += open_modifier + node.description + close_modifier - else: - val += node.description - _LOG.debug("-> %s", hprint.to_str("val")) - if val: - result.append(val) - # Expand this node’s children using the rest of the ancestry. - val = header_tree_to_str( - node.children, - ancestry[1:], - indent=indent + 1, - open_modifier=open_modifier, - close_modifier=close_modifier, - ) - else: - # For nodes not on the selected branch, include them without - # expanding. - val = prefix + node.description - _LOG.debug("-> %s", hprint.to_str("val")) - if val: - result.append(val) - return "\n".join(result) - - -def selected_navigation_to_str( - tree: _HeaderTree, - level: int, - description: str, - *, - open_modifier: str = "**", - close_modifier: str = "**", -) -> str: - """ - Given a level and description for the selected node, print the navigation. - - :param tree: header tree - :param level: level of the selected node - :param description: description of the selected node - :param open_modifier: modifier for opening the selected node - :param close_modifier: modifier for closing the selected node - :return: navigation string with selected node highlighted - """ - ancestry = _find_header_tree_ancestry(tree, level, description) - hdbg.dassert_ne( - ancestry, - None, - "Node (%s, '%s') not found", - level, - description, - ) - _LOG.debug(hprint.to_str("ancestry")) - txt = header_tree_to_str( - tree, - ancestry, - open_modifier=open_modifier, - close_modifier=close_modifier, - ) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py deleted file mode 100644 index a471a44cc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_rules.py +++ /dev/null @@ -1,367 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_rules as hmarrule -""" - -import logging -import re -from typing import Dict, List - -import helpers.hdbg as hdbg -import helpers.hmarkdown_headers as hmarhead -import helpers.hprint as hprint -from helpers.hmarkdown_headers import ( - extract_headers_from_markdown, - sanity_check_header_list, -) - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Add a decorator like in hprint to process both strings and lists -# of strings. - -# ############################################################################# -# Rules processing. -# ############################################################################# - -# Rules are organized in 4 levels of a markdown file: -# -# 1) Rule sets (level 1) -# - E.g., `General`, `Python`, `Notebooks`, `Markdown` -# - Level 1 is a set of rules determined mainly by the type of the file we -# are processing -# - Several sets of rules can be applied to a given file type -# - E.g., rules in `Python` and `Notebooks` apply to all Python files -# 2) Sections (level 2) -# - E.g., `Naming`, `Comments`, `Code_design`, `Imports`, `Type_annotations` -# 3) Targets (level 3) -# - E.g., LLM vs Linter -# 4) Atomic rules (level 4) -# - This is the set of rules that are applied to the file -# ``` -# - Spell commands in lower case and programs with the first letter in upper case -# - E.g., `git` as a command, `Git` as a program -# - E.g., capitalize the first letter of `Python` -# ``` - -# Extract the rules from the markdown file: -# ``` -# > extract_toc_from_txt.py \ -# -i docs/code_guidelines/all.coding_style_guidelines.reference.md \ -# --max_level 2 -# - General -# - Spelling -# - LLM -# - Linter -# - Python -# - Naming -# - LLM -# - Linter -# - Docstrings -# - ... -# - Comments -# - Code_implementation -# - Code_design -# - Imports -# - Type_annotations -# - Functions -# - Scripts -# - Logging -# - Misc -# - Unit_tests -# - All -# - Notebooks -# - General -# - Plotting -# - Jupytext -# - Markdown -# - Naming -# - General -# ``` - -# - The rules to apply to a Python file are automatically extractedas: -# `([`General:*`, `Python:*`], `LLM`)` -# - The rules to apply to a Notebook file are automatically extracted as: -# `([`General:*`, `Python:*`, `Notebooks:*`], `LLM`)` -# - A user can specify to apply a subset of rules like -# `([`General:*`, `Python:Naming,Docstrings`], `LLM,Linter`)` -# - Atomic rules are the first-level bullets of the markdown file, e.g., -# ``` -# - Spell commands in lower case and programs with the first letter in upper case -# - E.g., `git` as a command, `Git` as a program -# - E.g., capitalize the first letter of `Python` -# ``` - - -def sanity_check_rules(lines: List[str]) -> None: - """ - Sanity check the rules. - - :param lines: list of text lines to check - """ - header_list = extract_headers_from_markdown(lines, max_level=5) - # 1) Start with level 1 headers. - # 2) All level 1 headers are unique. - # 3) Header levels are increasing / decreasing by at most 1. - sanity_check_header_list(header_list) - # 4) Level 3 headers are always `LLM` or `Linter`. - # for header in header_list: - # if header.level != 3: - # hdbg.dassert_in(header.description, ["LLM", "Linter"]) - # TODO(gp): Implement this. - # 5) All headers have no spaces. - # TODO(gp): Implement this. - - -# A `Rule` is a string separated by `:` characters, where each part can be: -# - `*` (which means "match any string") -# - a `string` (e.g., `Spelling`) -# - a list of strings separated by `|` (e.g., `LLM|Linter`) -# -# E.g., valid rules are: -# - `General:*:LLM`, `*:*:Linter|LLM`, `General|Python:*:LLM`, `Python:*:Linter` -# - For a Python file -> `General|Python:*:LLM` -# - For a Notebook file -> `General|Python|Notebooks:*:LLM` -# - `Python:Naming|Docstrings|Comments:LLM` -SelectionRule = str - - -# A `Guidelines`` is a header list with only level 1 headers storing the full -# hierarchy of the rules as a description, e.g., -# `(1, "Spelling:All:LLM", xyz)` -# TODO(gp): Make Guidelines descend from HeaderList. - -HeaderInfo = hmarhead.HeaderInfo -HeaderList = hmarhead.HeaderList -Guidelines = HeaderList - - -def convert_header_list_into_guidelines( - header_list: HeaderList, -) -> Guidelines: - """ - Convert the header list into a `Guidelines` object with only level 1 - headers and full hierarchy of the rules as description. - - Expand a header list like: - ``` - - General - - Spelling - - LLM - - Linter - - Python - - Naming - - LLM - - Linter - ``` - represented internally as: - ``` - (1, "General", xyz), - (2, "Spelling", xyz), - (3, "LLM", xyz), - (3, "Linter", xyz), - (1, "Python", xyz), - (2, "Naming", xyz), - (3, "LLM", xyz), - (3, "Linter", xyz), - ``` - into: - ``` - [ - (1, "Spelling:All:LLM", xyz), - (1, "Spelling:All:Linter", xyz), - (1, "Python:Naming:LLM", xyz), - (1, "Python:Naming:Linter", xyz), - ] - ``` - - :param header_list: input header list to convert - :return: guidelines with flattened hierarchy - """ - hdbg.dassert_isinstance(header_list, list) - # Store the last level headers. - level_1 = "" - level_2 = "" - # Accumulate the last level headers. - level_3_headers = [] - # Scan the header list. - for header_info in header_list: - level, description, line_number = header_info.as_tuple() - # Store the headers found at each level. - if level == 1: - level_1 = description - elif level == 2: - level_2 = description - elif level == 3: - # Store the level 3 header. - hdbg.dassert_ne(level_1, "") - hdbg.dassert_ne(level_2, "") - full_level_3 = f"{level_1}:{level_2}:{description}" - header_info_tmp = HeaderInfo(1, full_level_3, line_number) - level_3_headers.append(header_info_tmp) - else: - raise ValueError(f"Invalid header info={header_info}") - return level_3_headers - - -def _convert_rule_into_regex(selection_rule: SelectionRule) -> str: - r""" - Convert a rule into an actual regular expression. - - E.g., - - `Spelling:*:LLM` -> `Spelling:(\S*):LLM` - - `*:*:Linter|LLM` -> `(\S*):(\S*):(Linter|LLM)` - - `Spelling|Python:*:LLM` -> `Spelling|Python:(\S*):LLM` - - `Python:*:Linter` -> `Python:(\S*):Linter` - - :param selection_rule: rule to convert to regex - :return: regex pattern string - """ - hdbg.dassert_isinstance(selection_rule, SelectionRule) - # Parse the rule into tokens. - selection_rule_parts = selection_rule.split(":") - hdbg.dassert_eq(len(selection_rule_parts), 3) - # Process each part of the rule regex. - rule_parts_out = [] - for rule_part_in in selection_rule_parts: - hdbg.dassert_not_in(" ", rule_part_in) - if rule_part_in == "*": - # Convert `*` into `\S*`. - rule_part_out = r"(\S*)" - elif "|" in rule_part_in: - # Convert `LLM|Linter` into `(LLM|Linter)`. - rule_part_out = "(" + rule_part_in + ")" - else: - # Keep the string as is. - rule_part_out = rule_part_in - rule_parts_out.append(rule_part_out) - # Join the parts of the rule back together. - rule_out = ":".join(rule_parts_out) - return rule_out - - -def extract_rules( - guidelines: Guidelines, selection_rules: List[SelectionRule] -) -> Guidelines: - """ - Extract the set of rules from the `guidelines` that match the rule regex. - - :param guidelines: guidelines to extract the rules from - :param selection_rules: selection rules to use to extract the rules - :return: extracted rules - """ - hdbg.dassert_isinstance(guidelines, list) - hdbg.dassert_isinstance(selection_rules, list) - # A rule regex is a string separated by `:` characters, where each part is - # - `*` (meaning "any string") - # - a `string` (e.g., `Spelling`) - # - a list of strings separated by `|` (e.g., `LLM|Linter`) - # E.g., `Spelling:*:LLM`, `*:*:Linter|LLM`, `Spelling|Python:*:LLM`. - # Convert each rule regex into a regular expression. - rule_regex_map: Dict[str, str] = {} - for rule_regex_str in selection_rules: - hdbg.dassert_isinstance(rule_regex_str, SelectionRule) - regex = _convert_rule_into_regex(rule_regex_str) - _LOG.debug(hprint.to_str("rule_regex_str regex")) - hdbg.dassert_not_in(rule_regex_str, rule_regex_map) - rule_regex_map[rule_regex_str] = regex - # Extract the set of rules from the `guidelines` that match the rule regex. - rule_sections = [] - for guideline in guidelines: - # A guideline description is a string separated by `:` characters, where each part is - # (1, "Python:Naming:Linter", xyz), - for k, v in rule_regex_map.items(): - if re.match(v, guideline.description): - _LOG.debug("%s matches %s", k, guideline.description) - if guideline not in rule_sections: - rule_sections.append(guideline) - # Select the rules. - _LOG.debug( - "Selected %s sections:\n%s", - len(rule_sections), - "\n".join([r.description for r in rule_sections]), - ) - return rule_sections - - -# TODO(gp): This seems private? -def parse_rules_from_txt(lines: List[str]) -> List[str]: - """ - Parse rules from a chunk of markdown text. - - - Extract first-level bullet point list items from text until the next one. - - Sub-lists nested under first-level items are extracted together with the - first-level items. - - :param lines: list of text lines to process - ``` - - Item 1 - - Item 2 - - Item 3 - - Item 4 - ``` - :return: extracted bullet points - """ - hdbg.dassert_isinstance(lines, list) - # Store the first-level bullet points. - bullet_points = [] - # Store the current item including the first level bullet point and all - # its sub-items. - current_item = "" - for line in lines: - line = line.rstrip() - if not line: - continue - if re.match(r"^- ", line): - # Match first-level bullet point item. - if current_item: - # Store the previous item, if any. - bullet_points.append(current_item) - # Start a new first-level bullet point item. - current_item = line - elif re.match(r"^\s+- ", line): - # Match a sub-item (non first-level bullet point item). - # Append a sub-item to the current item. - current_item += "\n" + line - elif len(line.strip()) != 0 and current_item: - # Append a line to the current item. - current_item += "\n" + line - # Add the last item if there is one. - if current_item: - bullet_points.append(current_item) - hdbg.dassert_isinstance(bullet_points, list) - return bullet_points - - -def extract_rules_from_section( - lines: List[str], start_line_number: int -) -> List[str]: - """ - Extract rules from a section of a markdown file. - - :param lines: list of markdown text lines to extract the rules from - :param start_line_number: line number of the section to start extracting - the rules from - :return: extracted rules - """ - hdbg.dassert_isinstance(lines, list) - # Find the line number of the next header. - end_line_number = start_line_number - while True: - hdbg.dassert_lt(end_line_number, len(lines)) - line = lines[end_line_number] - if line.startswith("#"): - break - end_line_number += 1 - _LOG.debug("end_line_number=%s", end_line_number) - # Parse the markdown text into a list of bullet points. - bullet_points = parse_rules_from_txt( - lines[start_line_number:end_line_number] - ) - # Extract the rules from the bullet points. - rules = [] - for bullet_point in bullet_points: - rules.append(bullet_point) - hdbg.dassert_isinstance(rules, list) - return rules diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py deleted file mode 100644 index 2cefec7a8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_slides.py +++ /dev/null @@ -1,201 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_slides as hmarslid -""" - -import logging -import re -from typing import Any, Callable, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -from helpers.hmarkdown_comments import process_comment_block -from helpers.hmarkdown_headers import ( - HeaderInfo, - HeaderList, - is_markdown_line_separator, -) - -_LOG = logging.getLogger(__name__) - - -_TRACE = True - - -def extract_slides_from_markdown( - lines: List[str], -) -> Tuple[HeaderList, int]: - """ - Extract slides (i.e., sections prepended by `*`) from Markdown file and - return an `HeaderList`. - - :param lines: content of the input Markdown file as list of strings - :return: tuple containing: - - generated `HeaderList` - ``` - [ - (1, "Slide 1", 5), - (1, "Slide 2", 10), ...] - ``` - - last line number of the file, e.g., '100' - """ - hdbg.dassert_isinstance(lines, list) - header_list: HeaderList = [] - # Process the input file to extract headers. - for line_number, line in enumerate(lines, start=1): - _LOG.debug("%d: %s", line_number, line) - # TODO(gp): Use the iterator. - # Skip the visual separators. - if is_markdown_line_separator(line): - continue - # Get the header level and title. - m = re.match(r"^\* (.*)$", line) - if m: - title = m.group(1) - header_info = HeaderInfo(1, title, line_number) - header_list.append(header_info) - last_line_number = len(lines) - # Return results. - hdbg.dassert_isinstance(header_list, list) - return header_list, last_line_number - - -# TODO(gp): Consider passing and returning List[str] -def process_slides(txt: str, transform: Callable[..., Any]) -> str: - """ - Process markdown text by applying a transform function to each slide. - - - Slides are sections prepended by `*` - - The text is processed by: - - Extracting the slides one by one - - Calling a `transform()` function on each slide (defined by the user) - - Joining the transformed slides back together - - Comments are left untouched. - - :param txt: markdown text to process - :param transform: function to transform each slide - :return: transformed text - """ - hdbg.dassert_isinstance(txt, str) - # Text of the current slide. - slide_txt: List[str] = [] - # Store all the transformed slides. - transformed_txt: List[str] = [] - # True inside a block to skip. - in_skip_block = False - # True inside a slide. - in_slide = False - # Track line number where slide started. - slide_start_line = 0 - lines = txt.splitlines() - for i, line in enumerate(lines): - _LOG.debug("%s:line='%s'", i, line) - # 1) Remove comment block. - do_continue, in_skip_block = process_comment_block(line, in_skip_block) - if _TRACE: - _LOG.debug(" -> %s", hprint.to_str("do_continue in_skip_block")) - if do_continue: - transformed_txt.append(line) - continue - # 2) Process slide. - if _TRACE: - _LOG.debug(" -> %s", hprint.to_str("in_slide")) - if line.startswith("* ") or line.startswith("#### "): - _LOG.debug("### Found slide") - # Found a slide or the end of the file. - if slide_txt: - _LOG.debug("# Transform slide") - # Transform the slide. - slide_title = slide_txt[0] - transformed_slide = transform( - slide_txt, - slide_title=slide_title, - slide_line_number=slide_start_line, - ) - hdbg.dassert_isinstance(transformed_slide, list) - transformed_txt.extend(transformed_slide) - else: - _LOG.debug("# First slide") - # Start a new slide. - slide_txt = [] - slide_txt.append(line) - slide_start_line = i - in_slide = True - elif in_slide: - _LOG.debug("# Accumulate slide") - slide_txt.append(line) - else: - _LOG.debug("# Accumulate txt outside slide") - transformed_txt.append(line) - # Process the last slide, if needed. - if slide_txt: - hdbg.dassert(in_slide) - in_slide = False - # Transform the slide. - slide_title = slide_txt[0] - transformed_slide = transform( - slide_txt, - slide_title=slide_title, - slide_line_number=slide_start_line, - ) - hdbg.dassert_isinstance(transformed_slide, list) - transformed_txt.extend(transformed_slide) - # - hdbg.dassert( - not in_skip_block, - "Found end of file while still parsing a comment block", - ) - hdbg.dassert(not in_slide, "Found end of file while still parsing a slide") - # Join the transformed slides back together. - result = "\n".join(transformed_txt) - return result - - -# ############################################################################# -# Slides conversion to markdown and back -# ############################################################################# - - -def convert_slide_to_markdown(lines: List[str], *, level: int = 5) -> List[str]: - """ - Convert slide to standard markdown. - - - Handle * bullets to markdown headers level 5 - - :param lines: list of lines to convert - :param level: level of the markdown headers to convert to - :return: list of converted lines - """ - hdbg.dassert_isinstance(lines, list) - converted_lines = [] - for line in lines: - if line.startswith("* "): - # Convert slide bullet to markdown header level 5. - converted_line = "#" * level + " " + line[2:] - converted_lines.append(converted_line) - else: - converted_lines.append(line) - return converted_lines - - -def convert_markdown_to_slide(lines: List[str], *, level: int = 5) -> List[str]: - """ - Convert standard markdown back to slide. - - - Handle markdown headers level 5 to * bullets - - :param lines: list of lines to convert - :param level: level of the markdown headers to convert to - :return: list of converted lines - """ - hdbg.dassert_isinstance(lines, list) - converted_lines = [] - for line in lines: - if line.startswith("#" * level + " "): - # Convert markdown header level 5 back to slide bullet. - converted_line = "* " + line[6:] - converted_lines.append(converted_line) - else: - converted_lines.append(line) - return converted_lines diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py deleted file mode 100644 index becc00b09..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_tables.py +++ /dev/null @@ -1,121 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_tables as hmartabl -""" - -import logging -from typing import Dict, List, Tuple - -import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - - -def replace_tables_with_tags( - lines: List[str], -) -> Tuple[List[str], Dict[str, str]]: - """ - Replace markdown tables with tag and return mapping from tags to the table. - - E.g., - ``` - Some text before - | Column 1 | Column 2 | - |----------|----------| - | Value 1 | Value 2 | - | Value 3 | Value 4 | - More text after - ``` - is replaced with: - ``` - Some text before - - More text after - ``` - - :param lines: list of lines to process - :return: tuple containing: - - list of lines with the tables replaced by tags - - mapping from tags to the table text - """ - hdbg.dassert_isinstance(lines, list) - result = [] - table_map = {} - table_count = 0 - i = 0 - while i < len(lines): - line = lines[i].strip() - # Check if this line starts a table (contains |). - if "|" in line and line.strip(): - # Look ahead to see if next line is a separator. - if i + 1 < len(lines): - next_line = lines[i + 1].strip() - # Check if next line is a table separator (contains --- and |). - if "|" in next_line and "-" in next_line: - # Found a table, collect all table lines. - table_lines = [] - # Add header line. - table_lines.append(lines[i]) - i += 1 - # Add separator line. - table_lines.append(lines[i]) - i += 1 - # Add data rows (continue while lines contain |). - while ( - i < len(lines) - and "|" in lines[i].strip() - and lines[i].strip() - ): - table_lines.append(lines[i]) - i += 1 - # Store the table. - table_count += 1 - table_text = "\n".join(table_lines) - table_map[str(table_count)] = table_text - result.append(f"") - continue - # Not a table line, add as-is. - result.append(lines[i]) - i += 1 - return result, table_map - - -def replace_tags_with_tables( - lines: List[str], table_map: Dict[str, str] -) -> List[str]: - """ - Replace tags with markdown tables. - - :param lines: list of lines to process - :param table_map: mapping from tags to table text - :return: list of lines with tags replaced by tables - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_isinstance(table_map, dict) - # Initialize output. - result = [] - table_map_copy = table_map.copy() - # Parse data. - for line in lines: - if line.startswith(""): - # Extract table number from tag like . - tag_match = line[6:-1] # Remove '' - hdbg.dassert_in( - tag_match, table_map_copy, f"Found unmatched tag {tag_match}" - ) - # Split table text into lines and add them. - table_text = table_map_copy[tag_match] - table_lines = table_text.split("\n") - result.extend(table_lines) - # Remove used tag from map. - del table_map_copy[tag_match] - else: - result.append(line) - # Ensure all tags were used. - hdbg.dassert_eq( - len(table_map_copy), - 0, - f"Found {len(table_map_copy)} unmatched tags: {list(table_map_copy.keys())}", - ) - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py deleted file mode 100644 index 7d8cb8d75..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmarkdown_toc.py +++ /dev/null @@ -1,164 +0,0 @@ -""" -Import as: - -import helpers.hmarkdown_toc as hmartoc -""" - -import logging -import os -import re -import tempfile -from typing import Any, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hdocker as hdocker -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import dev_scripts_helpers.dockerize.lib_markdown_toc as dshdlmato - -_LOG = logging.getLogger(__name__) - -# ############################################################################# -# YAML preamble -# ############################################################################# - - -def extract_yaml_frontmatter(lines: List[str]) -> Tuple[List[str], List[str]]: - """ - Extract YAML front matter from the beginning of the file. - - YAML front matter is delimited by `---` at the beginning and end. - Example: - ``` - --- - title: My Document - date: 2024-01-01 - --- - ``` - - :param lines: The lines to be processed. - :return: A tuple of (frontmatter_lines, remaining_lines). - """ - _LOG.debug("lines=%s", lines) - # Check if file starts with YAML front matter. - if len(lines) < 3: - # Not enough lines for front matter. - return [], lines - if not re.match(r"^---\s*$", lines[0]): - # No front matter marker at the beginning. - return [], lines - # Find the closing --- marker. - for i in range(1, len(lines)): - if re.match(r"^---\s*$", lines[i]): - # Found closing marker. - frontmatter = lines[: i + 1] - remaining = lines[i + 1 :] - _LOG.debug("Found YAML front matter: %d lines", len(frontmatter)) - return frontmatter, remaining - # No closing marker found, treat as no front matter. - _LOG.debug("No closing YAML front matter marker found") - return [], lines - - -def reattach_yaml_frontmatter( - yaml_frontmatter: List[str], lines: List[str] -) -> List[str]: - """ - Reattach YAML front matter to the beginning of the content lines. - - :param yaml_frontmatter: The YAML front matter lines to reattach. - :param lines: The content lines to prepend the front matter to. - :return: Combined lines with YAML front matter reattached. - """ - if not yaml_frontmatter: - return lines - # Add an empty line after the front matter if the remaining content doesn't - # start with one. - if lines and lines[0] != "": - return yaml_frontmatter + [""] + lines - return yaml_frontmatter + lines - - -# ############################################################################# -# TOC -# ############################################################################# - - -def refresh_toc( - lines: List[str], - *, - use_dockerized_markdown_toc: bool = True, - # TODO(gp): Remove this. - **kwargs: Any, -) -> List[str]: - """ - Refresh the table of contents (TOC) in the given text. - - :param lines: The lines to be processed. - :param use_dockerized_markdown_toc: if True, run markdown-toc in a - Docker container - :return: The lines with the updated TOC. - """ - _LOG.debug("lines=%s", lines) - # Check whether there is a TOC otherwise add it. - # Add `` comment in the doc to generate the TOC after that - # line. By default, it will generate at the top of the file. - # This workaround is useful to generate the TOC after the heading of the doc - # at the top and not include it in the TOC. - if "" not in lines: - _LOG.warning("No tags for table of content in md file: adding it") - lines = [""] + lines - txt = "\n".join(lines) - # Write file. - curr_dir = os.getcwd() - tmp_file_name = tempfile.NamedTemporaryFile(dir=curr_dir).name - hio.to_file(tmp_file_name, txt) - # Process TOC. - cmd_opts: List[str] = [] - if use_dockerized_markdown_toc: - # Run `markdown-toc` in a Docker container. - use_sudo = hdocker.get_use_sudo() - force_rebuild = False - dshdlmato.run_dockerized_markdown_toc( - tmp_file_name, - cmd_opts, - use_sudo=use_sudo, - force_rebuild=force_rebuild, - ) - else: - # Run `markdown-toc` installed on the host directly. - executable = "markdown-toc" - cmd = [executable] + cmd_opts - cmd.append("-i " + tmp_file_name) - # - cmd_as_str = " ".join(cmd) - _, output_tmp = hsystem.system_to_string(cmd_as_str, abort_on_error=True) - _LOG.debug("output_tmp=%s", output_tmp) - # Read file. - txt = hio.from_file(tmp_file_name) - # Clean up. - os.remove(tmp_file_name) - # Remove empty lines introduced by `markdown-toc`. - txt = hprint.remove_lead_trail_empty_lines(txt) - ret = txt.split("\n") - hdbg.dassert_isinstance(ret, list) - return ret - - -def remove_table_of_contents(txt: str) -> str: - """ - Remove the table of contents from the text of a markdown file. - - The table of contents is stored between - ``` - - ... - - ``` - - :param txt: Input markdown text - :return: Text with table of contents removed - """ - txt = re.sub(r".*?", "", txt, flags=re.DOTALL) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py deleted file mode 100644 index b8087b9fd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmatplotlib.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -Matplotlib utilities and plotting helpers. - -Import as: - -import helpers.hmatplotlib as hmatplo -""" - -import logging -import math -from typing import Any, Optional, Tuple - -import matplotlib as mpl -import matplotlib.pyplot as plt -import numpy as np - -import helpers.hdbg as hdbg -import helpers.hio as hio - -_LOG = logging.getLogger(__name__) - -# Default figure size for plots. -# TODO(gp): Is this used? -FIG_SIZE = (20, 5) - - -def get_multiple_plots( - num_plots: int, - num_cols: int, - y_scale: Optional[float] = None, - *args: Any, - **kwargs: Any, -) -> Tuple[mpl.figure.Figure, np.array]: - """ - Create figure to accommodate `num_plots` plots. - - The figure is arranged in rows with `num_cols` columns. - - :param num_plots: number of plots - :param num_cols: number of columns to use in the subplot - :param y_scale: the height of each plot. If `None`, the size of the whole - figure equals the default `figsize` - :return: figure and array of axes - """ - hdbg.dassert_lte(1, num_plots) - hdbg.dassert_lte(1, num_cols) - # Heuristic to find the dimension of the fig. - if y_scale is not None: - hdbg.dassert_lt(0, y_scale) - ysize = math.ceil(num_plots / num_cols) * y_scale - figsize: Optional[Tuple[float, float]] = (20, ysize) - else: - figsize = None - if "tight_layout" not in kwargs and not kwargs.get( - "constrained_layout", False - ): - kwargs["tight_layout"] = True - fig, ax = plt.subplots( - math.ceil(num_plots / num_cols), - num_cols, - figsize=figsize, - *args, - **kwargs, - ) - if isinstance(ax, np.ndarray): - ax = ax.flatten() - else: - ax = np.array([ax]) - # Remove extra axes that can appear when `num_cols` > 1. - empty_axes = ax[num_plots:] - for empty_ax in empty_axes: - empty_ax.remove() - return fig, ax[:num_plots] - - -def save_fig( - fig: Optional[mpl.figure.Figure], - file_name: str, - *, - print_markdown: bool = False, - path_prefix: Optional[str] = None, -) -> None: - """ - Save matplotlib figure to file and optionally print markdown reference. - - :param fig: Matplotlib figure. If None, uses the active figure. - :param file_name: Output filename - :param print_markdown: If True, print markdown image reference - :param path_prefix: Path prefix for markdown reference (e.g., "msml610/lectures_source") - """ - if fig is None: - fig = plt.gcf() - hdbg.dassert_isinstance(fig, mpl.figure.Figure) - hdbg.dassert_isinstance(file_name, str) - hio.create_enclosing_dir(file_name, incremental=True) - fig.savefig(file_name, dpi=300, bbox_inches="tight") - # Use print instead of _LOG.info. - print(f"Saved figure to '{file_name}'") - # - if print_markdown: - if path_prefix: - markdown_path = f"{path_prefix}/{file_name}" - else: - markdown_path = file_name - markdown_ref = f"![]({markdown_path})" - print(markdown_ref) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py deleted file mode 100644 index 27e5130ca..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmkdocs.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -Import as: - -import helpers.hmkdocs as hmkdocs -""" - -import re - -import helpers.hdbg as hdbg -import helpers.hmarkdown as hmarkdo - -# TODO(ai): Make function private. -# TODO(ai): Convert str to List[str] -# TODO(ai): Add unit tests. - - -# TODO(gp): -> hmarkdown_?.py -def dedent_python_code_blocks(txt: str) -> str: - """ - Dedent Python code blocks so they are aligned to column 0. - - This is needed by mkdocs to render a Python code block correctly. - - :param txt: Input markdown text - :return: Text with Python code blocks dedented - """ - import textwrap - - lines = txt.split("\n") - result = [] - # Store whether the parser is inside a code block. - in_python_block = False - # Store the current Python code block. - code_block_lines = [] - # Track whether current block is indented (inside a list item). - block_is_indented = False - for line in lines: - if line.strip() == "```python": - in_python_block = True - # Only dedent top-level blocks (fence at column 0). - block_is_indented = line != line.lstrip() - result.append(line) - elif line.strip() == "```" and in_python_block: - if code_block_lines and not block_is_indented: - # Dedent only top-level code blocks. - code_text = "\n".join(code_block_lines) - dedented_code = textwrap.dedent(code_text) - result.extend(dedented_code.split("\n")) - code_block_lines = [] - elif code_block_lines: - # Indented block: pass through unchanged. - result.extend(code_block_lines) - code_block_lines = [] - result.append(line) - in_python_block = False - block_is_indented = False - elif in_python_block: - code_block_lines.append(line) - else: - result.append(line) - return "\n".join(result) - - -def replace_indentation(txt: str, input_spaces: int, output_spaces: int) -> str: - """ - Replace indentation from input_spaces to output_spaces. - - :param txt: Input markdown text - :param input_spaces: Number of spaces to detect as one indentation - level - :param output_spaces: Number of spaces to replace each indentation - level with - :return: Text with indentation replaced - """ - hdbg.dassert_lte(1, input_spaces) - hdbg.dassert_lte(1, output_spaces) - lines = txt.split("\n") - result = [] - for line in lines: - # Count leading spaces. - leading_spaces = len(line) - len(line.lstrip()) - if leading_spaces > 0 and leading_spaces % input_spaces == 0: - # Calculate indentation level and convert to output spaces. - indentation_level = leading_spaces // input_spaces - new_indentation = " " * (indentation_level * output_spaces) - result.append(new_indentation + line.lstrip()) - else: - result.append(line) - return "\n".join(result) - - -def replace_indentation_with_four_spaces(txt: str) -> str: - """ - Replace 2 spaces indentation with 4 spaces since this is what mkdocs needs. - - :param txt: Input markdown text - :return: Text with 2-space indentation replaced with 4-space - indentation - """ - return replace_indentation(txt, input_spaces=2, output_spaces=4) - - -def convert_slides_to_markdown(txt: str, level: int) -> str: - """ - Convert strings storing "slides", i.e., `* ...` to markdown headers. - - E.g., - ``` - * Tools for Vision component - ``` - to: - ``` - #### Tools for Vision component - ``` - """ - lines = txt.split("\n") - result = [] - for line in lines: - if line.startswith("* "): - result.append("#" * level + " " + line[2:]) - else: - result.append(line) - return "\n".join(result) - - -def rewrite_absolute_doc_links(txt: str) -> str: - """ - Rewrite absolute /docs/ markdown links to root-relative HTML links. - - MkDocs only converts relative `.md` links to `.html`. Absolute links - like `/docs/path/file.md` are left unchanged and 404 at serve time. - This converts them to `/path/file.html` so they resolve correctly. - - :param txt: Input markdown text - :return: Text with absolute /docs/ links rewritten - """ - - def _replace(m: re.Match) -> str: - path = m.group(1) - # Strip /docs/ prefix and convert .md → .html. - path = re.sub(r"^/docs/", "/", path) - path = re.sub( - r"\.md(#[^)]*)?$", lambda h: ".html" + (h.group(1) or ""), path - ) - return f"({path})" - - # Match markdown links: ([text](/docs/...md)) including optional anchors. - txt = re.sub(r"\((/docs/[^)]+\.md(?:#[^)]*)?)\)", _replace, txt) - return txt - - -def preprocess_mkdocs_markdown(txt: str) -> str: - """ - Preprocess markdown text for mkdocs. - - This function applies the following transformations: - 1. Remove table of contents - 2. Dedent Python code blocks - 3. Replace 2 spaces indentation with 4 spaces - 4. Rewrite absolute /docs/ links to root-relative HTML links - - :param txt: Input markdown text - :return: Preprocessed markdown text - """ - txt = hmarkdo.remove_table_of_contents(txt) - txt = dedent_python_code_blocks(txt) - txt = replace_indentation_with_four_spaces(txt) - txt = convert_slides_to_markdown(txt, level=4) - txt = rewrite_absolute_doc_links(txt) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py deleted file mode 100644 index 66ed59b39..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmodule.py +++ /dev/null @@ -1,121 +0,0 @@ -""" -Import as: - -import helpers.hmodule as hmodule -""" - -import logging -import os -import subprocess -import textwrap -from typing import Any, Dict, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hserver as hserver - -_LOG = logging.getLogger(__name__) - -_WARNING = "\033[33mWARNING\033[0m" - - -# Use this to avoid extra dependencies from `hsystem`. -def _system_to_string(cmd: str) -> Tuple[int, str]: - """ - Run a command and return the output and the return code. - - :param cmd: command to run - :return: tuple of (return code, output) - """ - result = subprocess.run( - cmd, - stdout=subprocess.PIPE, - # Redirect stderr to stdout. - stderr=subprocess.STDOUT, - shell=True, - text=True, - ) - rc = result.returncode - output = result.stdout - output = output.strip() - return rc, output - - -def has_module(module: str) -> bool: - """ - Return whether a Python module can be imported or not. - """ - if module == "gluonts" and hserver.is_host_mac(): - # Gluonts and mxnet modules are not properly supported on the ARM - # architecture yet, see CmTask4886 for details. - return False - code = f""" - try: - import {module} - has_module_ = True - except ImportError as e: - _LOG.warning("%s: %s", _WARNING, str(e)) - has_module_ = False - """ - code = textwrap.dedent(code) - # To make the linter happy. - has_module_ = True - locals_: Dict[str, Any] = {} - # Need to explicitly declare and pass `locals_`: - # https://docs.python.org/3/library/functions.html#exec - # `Pass an explicit locals dictionary if you need to see effects - # of the code on locals after function exec() returns.` - exec(code, globals(), locals_) - has_module_ = locals_["has_module_"] - return has_module_ - - -def install_module_if_not_present( - import_name: str, - *, - package_name: Optional[str] = None, - use_sudo: bool = True, - use_activate: bool = False, - venv_path: Optional[str] = None, - quiet: bool = True, -) -> None: - """ - Install a Python module if it is not already installed. - - :param import_name: name used to import the module (e.g., "openai") - :param package_name: name of the package on PyPI (if different from `import_name`) - :param use_sudo: whether to use sudo to install the module - :param use_activate: whether to use the activate script to install the module - (e.g., "source /venv/bin/activate; pip install --quiet --upgrade openai") - :param venv_path: path to the virtual environment - E.g., /Users/saggese/src/venv/client_venv.helpers - :param quiet: whether to install the module quietly - """ - _has_module = has_module(import_name) - if _has_module: - print(f"Module '{import_name}' is already installed.") - return - print(f"Installing module '{import_name}'...") - # Sometime the package name is different from the import name. - # E.g., we import using `import dash_bootstrap_components` but the package - # name is `dash-bootstrap-components`. - if package_name is None: - package_name = import_name - # Sometime the package name is different from the import name. - # E.g., we import using `import dash_bootstrap_components` but the package - # name is `dash-bootstrap-components`. - if quiet: - quiet_flag = "--quiet" - else: - quiet_flag = "" - if venv_path is None: - venv_path = "/venv" - venv_path = os.path.join(venv_path, "bin/activate") - hdbg.dassert_file_exists(venv_path, "Can't find venv_path='{venv_path}'") - if use_activate: - cmd = f'/bin/bash -c "(source {venv_path}; pip install {quiet_flag} --upgrade {package_name})"' - else: - cmd = f"pip install {quiet_flag} {package_name}" - if use_sudo: - cmd = f"sudo {cmd}" - _, output = _system_to_string(cmd) - print(output) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py deleted file mode 100644 index 525673032..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hmoto.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -Import as: - -import helpers.hmoto as hmoto -""" - -import unittest.mock as umock -from typing import Generator, Union - -import pytest # isort:skip # noqa: E402 # pylint: disable=wrong-import-position - -# Equivalent to `import moto`, but skip this module if the module is not present. -# `moto` must be imported before `boto3` to properly mock it. -moto = pytest.importorskip("moto") - -# It is necessary that boto3 is imported after moto. -# If not, boto3 will access real AWS. -import boto3 # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position - -import helpers.hdbg as hdbg # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hs3 as hs3 # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hunit_test as hunitest # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position - - -# ############################################################################# -# S3Mock_TestCase -# ############################################################################# - - -@pytest.mark.requires_aws -@pytest.mark.requires_ck_infra -class S3Mock_TestCase(hunitest.TestCase): - # Mocked AWS credentials. - mock_aws_credentials_patch = umock.patch.dict( - hs3.os.environ, - { - "MOCK_AWS_ACCESS_KEY_ID": "mock_key_id", - "MOCK_AWS_SECRET_ACCESS_KEY": "mock_secret_access_key", - "MOCK_AWS_DEFAULT_REGION": "us-east-1", - }, - ) - mock_aws_credentials = None - mock_aws_profile = "__mock__" - # Mocked bucket. - mock_s3 = moto.mock_aws() - bucket_name = "mock_bucket" - # TODO(Nikola): Temporary here to ensure it is called only once. - # Used in some tests that are obtaining data from 3rd party providers. - binance_secret = None - - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def _mock_get_s3fs( - self, aws_profile: Union[str, hs3.S3FileSystem] - ) -> hs3.S3FileSystem: - """ - Mock implementation of `get_s3fs` to use the mocked environment - variables from `moto`. - """ - from s3fs import S3FileSystem - - hdbg.dassert_isinstance(aws_profile, (str, S3FileSystem)) - aws_profile = S3FileSystem(anon=False) - return aws_profile - - def set_up_test(self) -> None: - # Getting necessary secret before boto3 is mocked. - if self.binance_secret is None: - import helpers.hsecrets as hsecret - - self.binance_secret = hsecret.get_secret("binance.preprod.trading.1") - # Start boto3 mock. - self.mock_s3.start() - # Start AWS credentials mock. Must be started after moto mock, - # or it will be overridden by moto with `foobar` values. - self.mock_aws_credentials = self.mock_aws_credentials_patch.start() - # Initialize boto client and create bucket for testing. - s3_client = boto3.client("s3") - s3_client.create_bucket(Bucket=self.bucket_name) - # Precaution to ensure that we are using mocked botocore. - s3_test_client = boto3.client("s3") - buckets = s3_test_client.list_buckets()["Buckets"] - self.assertEqual(len(buckets), 1) - self.assertEqual(buckets[0]["Name"], self.bucket_name) - # Patch `get_s3fs` that uses the mocked environment variables. - self.mock_get_s3fs = umock.patch.object( - hs3, "get_s3fs", side_effect=self._mock_get_s3fs - ) - self.mock_get_s3fs.start() - - def tear_down_test(self) -> None: - # Empty the bucket otherwise deletion will fail. - s3_client = boto3.resource("s3") - hdbg.dassert_eq(self.bucket_name, "mock_bucket") - bucket = s3_client.Bucket(self.bucket_name) - bucket.objects.all().delete() - # Delete bucket. - bucket.delete() - # Stop mocked `get_s3fs`. - if hasattr(self, "mock_get_s3fs"): - self.mock_get_s3fs.stop() - # Stop moto. - self.mock_aws_credentials_patch.stop() - self.mock_s3.stop() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py deleted file mode 100644 index 13ae41c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnetwork.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Import as: - -import helpers.hnetwork as hnetwor -""" - -import logging -import os -import re -from typing import Optional, Tuple - -import requests - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def check_url(url: str) -> None: - """ - Check that an URL responds. - """ - try: - request = requests.get(url) - exists = request.status_code == 200 - # pylint: disable=broad-except - except Exception: - # TODO(gp): RuntimeError doesn't seem to catch. Find a narrower - # exception to catch. - exists = False - if not exists: - _LOG.warning("url '%s' doesn't exist", url) - - -def get_prefixes(jupyter_port: Optional[int] = None) -> Tuple[str, str]: - """ - Return the prefixes that a file should have under a GitHub repo and a - Jupyter notebook. - """ - hsystem.get_user_name() - if jupyter_port is None: - jupyter_port = 10001 - _LOG.warning( - "jupyter_port not available: using the default one %s", jupyter_port - ) - repo_name = hgit.get_repo_full_name_from_client(super_module=False) - _LOG.debug("repo_name=%s", repo_name) - github_prefix = f"https://github.com/{repo_name}/blob/master" - jupyter_prefix = f"http://localhost:{jupyter_port}/tree" - return github_prefix, jupyter_prefix - - -# TODO(gp): -> get_canonical_file_name_from_url -def get_file_name(url: str) -> str: - """ - Given an URL from GitHub or from Jupyter server extract the path - corresponding to the file. - - E.g., - - http://localhost:10001/notebooks/research/... - oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb - -> - oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb - - - https://github.com/.../.../blob/master/... - oil/ST/Task229_Exploratory_analysis_of_ST_data.ipynb - -> - oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb - """ - # "http://localhost:10001/notebooks/... - # oil/ST/Task229_Exploratory_analysis_of_ST_data_part1.ipynb" - ret = None - if ret is None: - m = re.search(r"http.*://localhost:\d+/(.*)", url) - if m: - ret = m.group(1) - to_remove = "notebooks/" - idx = ret.index(to_remove) - if idx >= 0: - end_idx = idx + len(to_remove) - ret = ret[end_idx:] - if ret is None: - # https://github.com/.../.../blob/master/... - # oil/ST/Task229_Exploratory_analysis_of_ST_data.ipynb - m = re.search(r"http.*://.*github.com/(.*)", url) - if m: - ret = m.group(1) - # Remove ".../.../blob/master" - ret = "/".join(ret.split("/")[4:]) - if ret is None: - if os.path.exists(url): - ret = url - if ret is None: - hdbg.dassert_is_not(ret, None, "url=%s", url) - return ret # type: ignore diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py deleted file mode 100644 index 75ecabfe4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnotebook.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Import as: - -import helpers.hnotebook as hnotebo -""" - -import logging - - -def config_notebook(sns_set: bool = True) -> None: - """ - Configure the notebook for plotting. - """ - import helpers.hmodule as hmodule - - # Matplotlib. - module = "matplotlib" - if hmodule.has_module(module): - # Matplotlib. - import matplotlib.pyplot as plt - - # plt.rcParams - plt.rcParams["figure.figsize"] = (20, 5) - plt.rcParams["legend.fontsize"] = 14 - plt.rcParams["font.size"] = 14 - plt.rcParams["image.cmap"] = "rainbow" - if False: - # Tweak the size of the plots to make it more readable when embedded in - # documents or presentations. - # font = {'family' : 'normal', - # #'weight' : 'bold', - # 'size' : 32} - # matplotlib.rc('font', **font) - scale = 3 - small_size = 8 * scale - medium_size = 10 * scale - bigger_size = 12 * scale - # Default text sizes. - plt.rc("font", size=small_size) - # Fontsize of the axes title. - plt.rc("axes", titlesize=small_size) - # Fontsize of the x and y labels. - plt.rc("axes", labelsize=medium_size) - # Fontsize of the tick labels. - plt.rc("xtick", labelsize=small_size) - # Fontsize of the tick labels. - plt.rc("ytick", labelsize=small_size) - # Legend fontsize. - plt.rc("legend", fontsize=small_size) - # Fontsize of the figure title. - plt.rc("figure", titlesize=bigger_size) - else: - print("No module '{module}'") - # Seaborn. - module = "seaborn" - if hmodule.has_module(module): - import seaborn as sns - - if sns_set: - sns.set() - else: - print("No module '{module}'") - # Pandas. - module = "pandas" - if hmodule.has_module(module): - import pandas as pd - - pd.set_option("display.max_rows", 500) - pd.set_option("display.max_columns", 500) - pd.set_option("display.width", 1000) - else: - print("No module '{module}'") - # Warnings. - import helpers.hwarnings as hwarnin - - # Force the linter to keep this import. - _ = hwarnin - - -def _info_print(msg: str, *args, **kwargs) -> None: - """ - Print a message with optional formatting arguments. - """ - if args: - msg = msg % args - print(msg) - - -def set_logger_to_print(log) -> None: - """ - Replace logger.info method with a print function. - - :param log: logger object to modify - """ - log.info = _info_print - - -def set_all_loggers_to_print() -> None: - """ - Replace all loggers' info method with a print function. - """ - for name in logging.root.manager.loggerDict: - logger = logging.getLogger(name) - # print("Setting logger %s to print" % name) - set_logger_to_print(logger) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py deleted file mode 100644 index 47fc37975..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumba.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Import as: - -import helpers.hnumba as hnumba -""" - -import logging -from typing import Any, Callable, TypeVar - -try: - import numba - - numba_available = True -except ImportError: - numba_available = False - -_LOG = logging.getLogger(__name__) - -# Switch to enable numba at run-time. -# For using in notebooks you need to force a reload of the library, like: -# import importlib -# importlib.reload(numba_) -# numba_.USE_NUMBA = False - -USE_NUMBA = True -RT = TypeVar("RT") # Return type for decorator. - - -def jit(f: Callable[..., RT]) -> Callable[..., RT]: - if USE_NUMBA and not numba_available: - _LOG.warning("numba is not installed") - use_numba = USE_NUMBA and numba_available - - if use_numba: - _LOG.debug("Using numba!") - wrapper: Callable[..., RT] = numba.jit(f) - else: - - def wrapper(*args: Any, **kwargs: Any) -> RT: - _LOG.debug("Not using numba!") - return f(*args, **kwargs) - - return wrapper diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py deleted file mode 100644 index 4cd0e8c4d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hnumpy.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Import as: - -import helpers.hnumpy as hnumpy -""" - -import contextlib -from typing import Iterator - -import numpy as np - -import helpers.hdbg as hdbg - - -# From https://stackoverflow.com/questions/49555991 -@contextlib.contextmanager -def random_seed_context(seed: int) -> Iterator: - """ - Context manager to isolate a numpy random seed. - """ - state = np.random.get_state() - np.random.seed(seed) - try: - yield - finally: - np.random.set_state(state) - - -# TODO(Juraj): unit test in CmTask5092. -def floor_with_precision(value: float, amount_precision: int) -> float: - """ - Floor a value using desired precision. - - The invariant for this function is that negative number are floored based - on their absolute value: e.g floor_with_precision(-4.6, 0) == -4. This is - useful for calculating share size where there are decimal precision - limitations. The desired behavior is to rather round down than overfill. - - Other examples: - floor_with_precision(0.125, 2) == 0.12 - floor_with_precision(0.4, 0) == 0.0 - - :param value: value to floor with desire - :param amount_precision: number of decimal points to floor to - :return: value floored using desired precision. - """ - # Custom solution to allow flooring using precision. - # https://stackoverflow.com/questions/58065055/floor-and-ceil-with-number-of-decimals/58065394#58065394 - # Precision < 0 does not make sense. - hdbg.dassert_lte(0, amount_precision) - # Store sign and get absolute value to get the desire - sign = -1 if value < 0 else 1 - value_abs = abs(value) - value_floored = np.true_divide( - np.floor(value_abs * 10**amount_precision), 10**amount_precision - ) - return value_floored * sign diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py deleted file mode 100644 index e9424b8cc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hobject.py +++ /dev/null @@ -1,500 +0,0 @@ -""" -Methods to introspect and print the state of an object. - -Import as: - -import helpers.hobject as hobject -""" - -import abc -import logging -import pprint -from typing import Any, Dict, List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hpandas as hpandas -import helpers.hprint as hprint -import helpers.hstring as hstring - -_LOG = logging.getLogger(__name__) - -# Mute this module unless we want to debug it. -_LOG.setLevel(logging.INFO) - -# ############################################################################# -# _to_skip* -# ############################################################################# - - -def _to_skip(is_: bool, mode: str) -> bool: - """ - Return whether to skip the attribute. - - :param is_: if `True` the attribute is of the type we are checking - :param mode: how to handle the attribute - :return: whether to skip the attribute - """ - hdbg.dassert_in(mode, ("skip", "only", "all")) - skip = False - if mode == "skip": - if is_: - # Skip everything. - skip = True - elif mode == "only": - if not is_: - # Keep only the callables. - skip = True - elif mode == "all": - # Keep everything. - skip = False - else: - raise ValueError(f"Invalid mode='{mode}'") - return skip - - -def _to_skip_callable_attribute(attr_name: Any, mode: str) -> bool: - """ - Decide whether to skip a callable attribute. - """ - # Check whether the attribute is callable. - is_callable = callable(attr_name) - skip = _to_skip(is_callable, mode) - return skip - - -def _to_skip_private_attribute(attr_name: str, mode: str) -> bool: - """ - Decide whether to skip a private attribute. - """ - # _Object__hello - # TODO(gp): This can be improved by passing the name of the object. - is_dunder = attr_name.startswith("_") and "__" in attr_name - # We assume that private attributes start with `_` and are not dunder. - is_private = not is_dunder and attr_name.startswith("_") - skip = _to_skip(is_private, mode) - return skip - - -def _to_skip_dunder_attribute(attr_name: str, mode: str) -> bool: - """ - Decide whether to skip a double under attribute. - """ - # Check if it is a dunder (i.e., double under method). E.g., `__hello__`. - is_dunder = attr_name.startswith("_") and "__" in attr_name - skip = _to_skip(is_dunder, mode) - return skip - - -def _to_skip_attribute( - attr_name: Any, - attr_value: Any, - callable_mode: str, - private_mode: str, - dunder_mode: str, - attr_names_to_skip: Optional[List[str]], -) -> bool: - """ - Decide whether to skip an attribute. - - :param attr_name: name of the attribute - :param attr_value: value of the attribute - :param callable_mode: how to handle attributes that are callable methods - :param private_mode: how to handle attributes that are private (e.g., - `_hello`) - :param dunder_mode: how to handle attributes that are dunder (e.g., - `__hello`) - :param attr_names_to_skip: a list of attributes (e.g., private, callable, dunder) - to skip. `None` to skip nothing. - :return: whether to skip the attribute - """ - # Check whether the attribute is one that was requested explicitly to skip. - if attr_names_to_skip is not None: - if attr_name in attr_names_to_skip: - skip = True - return skip - # Handle callable methods. - skip = _to_skip_callable_attribute(attr_value, callable_mode) - if skip: - _LOG.debug("Skip callable") - return skip - # Handle private methods. - skip = _to_skip_private_attribute(attr_name, private_mode) - if skip: - _LOG.debug("Skip private") - return skip - # Handle dunder methods. - skip = _to_skip_dunder_attribute(attr_name, dunder_mode) - if skip: - _LOG.debug("Skip dunder") - return skip - return False - - -# ############################################################################# -# obj_to_str -# ############################################################################# - - -def _type_to_str(attr_value: Any) -> str: - """ - Print the attribute value together with its type. - - E.g., `a=False , b=hello , c=3.14 ` - """ - type_as_str = str(type(attr_value)) - # Convert from `` to `str`. - type_as_str = hstring.remove_prefix(type_as_str, "") - # Add `<` and `>` around the type. - type_as_str = f"<{type_as_str}>" - return type_as_str - - -def _attr_to_str(attr_value: Any, print_type: bool) -> str: - """ - Print the attribute value handling different types. - """ - _LOG.debug("type(attr_value)=%s", type(attr_value)) - if isinstance(attr_value, pd.DataFrame): - res = f"pd.df({attr_value.shape}" - elif isinstance(attr_value, pd.Series): - res = f"pd.srs({attr_value.shape}" - elif isinstance(attr_value, dict): - res = str(attr_value) - else: - res = str(attr_value) - # Add the type, if needed. - if print_type: - res += " " + _type_to_str(attr_value) - return res - - -def obj_to_str( - obj: Any, - *, - attr_mode: str = "__dict__", - sort: bool = False, - print_type: bool = False, - callable_mode: str = "skip", - private_mode: str = "skip", - dunder_mode: str = "skip", - attr_names_to_skip: Optional[List[str]] = None, -) -> str: - """ - Print the attributes of an object. - - An object is printed as name of its class and its attributes, e.g., - ``` - _Object1 at 0x...=(a=False, b=hello, c=3.14) - ``` - - :param attr_mode: use `__dict__` or `dir()` - - It doesn't seem to make much difference - :sort: sort the attributes in order of name, or not - :param print_type: print the type of the attribute - :param callable_mode: how to handle attributes that are callable (i.e., - methods) - - `skip`: skip the callable methods - - `only`: print only the callable methods - - `all`: always print - :param private_mode: how to handle private attributes. Same params as - `callable_mode` - :param dunder_mode: how to handle double under attributes. Same params as - `callable_mode` - :param attr_names_to_skip: a list of attributes (e.g., private, callable, - dunder) to skip. This is used to avoid to print data that is redundant - (e.g., a cached value) - """ - ret = [] - if attr_mode == "__dict__": - # Use `__dict__` to get the attributes of the object. - values = obj.__dict__ - elif attr_mode == "dir": - # Use `dir()` to get the attributes of the object. - values = dir(obj) - elif attr_mode == "config": - # Use object method to get the attributes to print info for. - values = obj.get_config_attributes() - else: - raise ValueError(f"Invalid attr_mode='{attr_mode}'") - if sort: - values = sorted(values) - for attr_name in values: - if attr_mode == "__dict__": - attr_value = obj.__dict__[attr_name] - elif attr_mode in ["dir", "config"]: - attr_value = getattr(obj, attr_name) - else: - raise ValueError(f"Invalid attr_mode='{attr_mode}'") - skip = _to_skip_attribute( - attr_name, - attr_value, - callable_mode, - private_mode, - dunder_mode, - attr_names_to_skip, - ) - # `attr_value` can be callable object and needs to be properly handled - # for string conversion and formatting. - _LOG.debug(hprint.to_str("attr_name attr_value skip")) - if skip: - continue - # - out = f"{attr_name}=" + _attr_to_str(attr_value, print_type) - ret.append(out) - # - txt = hprint.to_object_str(obj) + "=" - txt += "(" + ", ".join(ret) + ")" - return txt - - -# ############################################################################# -# obj_to_repr -# ############################################################################# - - -def _attr_to_repr(attr_name: Any, attr_value: Any, print_type: bool) -> str: - """ - Print an object as name of its class and its attributes. - - E.g., - ``` - : - a='False' - b='hello' - c='3.14' - ``` - """ - _LOG.debug("type(attr_value)=%s", type(attr_value)) - if isinstance(attr_value, (pd.DataFrame, pd.Series)): - attr_value_as_str = hpandas.df_to_str(attr_value) - elif isinstance(attr_value, dict): - attr_value_as_str = pprint.pformat(attr_value) - else: - attr_value_as_str = repr(attr_value) - # - if len(attr_value_as_str.split("\n")) > 1: - # The string representing the attribute value spans multiple lines, so - # print like: - # ``` - # attr_name= (type) - # attr_value - # ``` - out = f"{attr_name}=" - if print_type: - out += " " + _type_to_str(attr_value) - out += "\n" + hprint.indent(attr_value_as_str) - else: - # The string representing the attribute value is a single line, so print - # like: - # ``` - # attr_name='attr_value' (type) - # ``` - out = f"{attr_name}='{str(attr_value)}'" - if print_type: - out += " " + _type_to_str(attr_value) - return out - - -# TODO(gp): Merge the code with obj_to_repr() using a switch for the different -# code. -def obj_to_repr( - obj: Any, - *, - attr_mode: str = "__dict__", - sort: bool = False, - print_type: bool = False, - callable_mode: str = "skip", - private_mode: str = "skip", - dunder_mode: str = "skip", - attr_names_to_skip: Optional[List[str]] = None, -) -> str: - """ - Same interface and behavior as `obj_to_str()`. - - Use `_attr_to_repr()` instead of a simple `attr_name = attr_value` - like in `obj_to_str()`. - """ - ret = [] - # TODO(Grisha): factor out the logic in a function `get_class_attributes(attr_mode)`. - if attr_mode == "__dict__": - values = obj.__dict__ - elif attr_mode == "dir": - values = dir(obj) - elif attr_mode == "config": - values = obj.get_config_attributes() - else: - raise ValueError(f"Invalid attr_mode='{attr_mode}'") - if sort: - values = sorted(values) - for attr_name in values: - if attr_mode == "__dict__": - attr_value = obj.__dict__[attr_name] - elif attr_mode in ["dir", "config"]: - attr_value = getattr(obj, attr_name) - else: - raise ValueError(f"Invalid attr_mode='{attr_mode}'") - skip = _to_skip_attribute( - attr_name, - attr_value, - callable_mode, - private_mode, - dunder_mode, - attr_names_to_skip, - ) - # `attr_value` can be callable object and needs to be properly handled - # for string conversion and formatting. - _LOG.debug(hprint.to_str("attr_name attr_value skip")) - if skip: - continue - # - out = _attr_to_repr(attr_name, attr_value, print_type) - ret.append(out) - # - txt = [] - txt.append(hprint.to_object_repr(obj) + ":") - txt.append(hprint.indent("\n".join(ret))) - return "\n".join(txt) - - -# ############################################################################# -# PrintableMixin -# ############################################################################# - - -class PrintableMixin: - """ - Implement `__str__()` and `__repr__()` to print the state of an object. - - These methods can be overridden with more specific methods by - derived classes. - """ - - @staticmethod - @abc.abstractmethod - def get_config_attributes() -> List[str]: - """ - Get list of attributes that are relevant to the configuration of each - block. - """ - ... - - # TODO(Grisha): decide if we need this method: what are the use-cases? - # Ideally we should just save `SystemConfig` and load it when needed. - def to_config_dict(self) -> Dict[str, Any]: - """ - Get class configuration as dict. - """ - res_dict = {} - # Get class attribute names to print. - attributes = self.get_config_attributes() - hdbg.dassert_is_subset(attributes, self.__dict__.keys()) - # Iterate over attributes and add their state to the dict. - for attr in attributes: - value = getattr(self, attr) - # Get a list of types the value class is derived from. - value_parent_classes = value.__class__.__mro__ - if any( - "helpers.hobject.PrintableMixin" in str(parent_class) - for parent_class in value_parent_classes - ): - # Call the function recursively if value is also - # a `PrintableMixin` descendant. - dict_val = value.to_config_dict() - else: - # Get attribute value representation. - dict_val = _attr_to_repr(attr, value, print_type=True) - # Put value in the result dict. - res_dict[attr] = dict_val - return res_dict - - def to_config_str(self) -> str: - """ - Get class configuration as string. - """ - ret = [] - attributes = self.get_config_attributes() - hdbg.dassert_is_subset(attributes, self.__dict__.keys()) - # Iterate over attributes and add their state to the dict. - for attr in attributes: - value = getattr(self, attr) - if isinstance(value, PrintableMixin): - # Call the function recursively if value is also - # a `PrintableMixin` descendant. - dict_val = value.to_config_str() - # Add attribute name for string representation. - dict_val = f"{attr}={dict_val}" - else: - dict_val = _attr_to_repr(attr, value, print_type=True) - # Put value in the result dict. - ret.append(dict_val) - txt = [] - txt.append(hprint.to_object_repr(self) + ":") - txt.append(hprint.indent("\n".join(ret))) - txt = "\n".join(txt) - return txt - - def __repr__( - self, - *, - attr_names_to_skip: Optional[List[str]] = None, - ) -> str: - """ - Used for debugging and development and need to be unambiguous. - """ - txt = obj_to_repr( - self, - print_type=True, - private_mode="all", - attr_names_to_skip=attr_names_to_skip, - ) - return txt - - def __str__( - self, - *, - attr_names_to_skip: Optional[List[str]] = None, - ) -> str: - """ - Used for creating output for end user and need to be readable. - """ - txt = obj_to_str( - self, - print_type=True, - private_mode="all", - attr_names_to_skip=attr_names_to_skip, - ) - return txt - - -# ############################################################################# - - -# TODO(gp): CleanUp. This is for testing and should be in hobject_test.py. -# TODO(gp): -> check_object_signature -def test_object_signature( - self_: Any, obj: Any, *, remove_lines_regex: Optional[str] = None -) -> None: - """ - Print a string representation of an object using both `str()` and `repr()`. - - :param obj: the object to print - :param remove_lines_regex: a regex to remove certain lines from the - output - """ - txt = [] - # - txt.append(hprint.frame("str:")) - txt.append(str(obj)) - # - txt.append(hprint.frame("repr:")) - txt.append(repr(obj)) - # - txt = "\n".join(txt) - # Remove certain lines, if needed. - if remove_lines_regex: - txt = hprint.filter_text(remove_lines_regex, txt) - # - self_.check_string(txt, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py deleted file mode 100644 index 2c6d9c729..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hopen.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -Support opening a file. - -Import as: - -import helpers.hopen as hopen -""" - -# TODO(gp): -> open_file or move it to system_interaction.py - -import logging -import os -from typing import Optional - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - -# ############################################################################# - - -def _cmd_open_html(file_name: str, os_name: str) -> Optional[str]: - """ - Get OS-specific command to open an HTML file. - """ - # Retrieve the executable. - os_cmds = { - "Darwin": "open", - "Windows": "start", - "Linux": "xdg-open", - } - hdbg.dassert_in(os_name, os_cmds) - exec_name = os_cmds[os_name] - if not hsystem.check_exec(exec_name): - _LOG.warning( - "Can't execute the command '%s' on this platform", exec_name - ) - return None - # Build the command. - full_cmd = f"{exec_name} {file_name}" - if os_name == "Linux": - _LOG.warning( - "To open files faster launch in background '%s &'", exec_name - ) - return full_cmd - - -def _cmd_open_pdf(file_name: str, os_name: str) -> Optional[str]: - """ - Get OS-specific command to open a PDF file. - """ - os_cmds = { - "Darwin": ( - "/usr/bin/osascript << EOF\n" - f'set theFile to POSIX file "{file_name}" as alias\n' - 'tell application "Skim"\n' - "activate\n" - "set theDocs to get documents whose path is " - "(get POSIX path of theFile)\n" - "if (count of theDocs) > 0 then revert theDocs\n" - "open theFile\n" - "end tell\n" - "EOF\n" - ) - } - if os_name not in os_cmds: - _LOG.warning("Opening PDF files on '%s' is not supported yet", os_name) - full_cmd = None - else: - full_cmd = os_cmds[os_name] - return full_cmd - - -def open_file(file_name: str) -> None: - """ - Open file locally if its extension is supported. - """ - # Detect file format by the (last) extension. - # E.g., 'hello.html.txt' is considered a txt file. - extension = os.path.split(file_name)[-1].split(".")[-1] - extension = extension.lower() - # Make sure file exists. - _LOG.info( - "\n%s", - hprint.frame( - f"Opening {extension} file '{file_name}'", char1="<", char2=">" - ), - ) - hdbg.dassert_path_exists(file_name) - # Get opening command. - os_name = hsystem.get_os_name() - cmd: Optional[str] - if extension == "pdf": - cmd = _cmd_open_pdf(file_name, os_name) - elif extension == "html": - cmd = _cmd_open_html(file_name, os_name) - else: - hdbg.dfatal(f"Opening '{extension}' files is not supported yet") - # Run command. - if cmd is not None: - _LOG.info("%s", cmd) - hio.to_file("open_file_cmd.sh", cmd) - hsystem.system("source open_file_cmd.sh", suppress_output=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py deleted file mode 100644 index 535e7f081..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Import as: - -import helpers.hpandas as hpandas -""" - -from helpers.hpandas_analysis import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_check_summary import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_clean import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_compare import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_conversion import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_dassert import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_display import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_io import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_multiindex import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_stats import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_transform import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import -from helpers.hpandas_utils import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old deleted file mode 100644 index 5be1b281a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas.py.old +++ /dev/null @@ -1,2684 +0,0 @@ -""" -Import as: - -import helpers.hpandas as hpandas -""" - -import csv -import dataclasses -import logging -import helpers.hlogging as hlogging -import random -import re -from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union - -import numpy as np -import pandas as pd - -# Handle different versions of s3fs where core module may be at different -# locations. -try: - import s3fs - - # Try to access s3fs.core to check if it exists - if hasattr(s3fs, "core"): - from s3fs.core import S3File, S3FileSystem - else: - # In newer versions, classes might be directly in s3fs module. - try: - from s3fs import S3File, S3FileSystem - except ImportError: - # Fallback to dynamic import - S3File = getattr(s3fs, "S3File", None) - S3FileSystem = getattr(s3fs, "S3FileSystem", None) -except ImportError: - # If s3fs is not available, define dummy classes for type hints. - s3fs = None - - class S3File: - pass - - class S3FileSystem: - pass - - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -# Avoid the following dependency from other `helpers` modules to prevent import -# cycles: -# import helpers.hs3 as hs3 -# import helpers.hsql as hsql -# import helpers.hunit_test as hunitest - - -_LOG = hlogging.getLogger(__name__) - -# Enable extra verbose debugging. Do not commit. -_TRACE = False - -RowsValues = List[List[str]] - - -# ############################################################################# - - -def to_series(df: pd.DataFrame, *, series_dtype: str = "float64") -> pd.Series: - """ - Convert a pd.DataFrame with a single column into a pd.Series. The problem - is that empty df or df with a single row are not converted correctly to a - pd.Series. - - :param df: dataframe with a single column to convert to a series - :param series_dtype: dtype of the desired series in case a DataFrame - is empty, otherwise inherit dtype from a DataFrame - """ - # See https://stackoverflow.com/questions/33246771 - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_eq(df.shape[1], 1, "df=%s doesn't have a single column", df) - if df.empty: - srs = pd.Series(dtype=series_dtype) - elif df.shape[0] > 1: - srs = df.squeeze() - else: - srs = pd.Series(df.iloc[0, 0], index=[df.index.values[0]]) - srs.name = df.index.name - hdbg.dassert_isinstance(srs, pd.Series) - return srs - - -def as_series(data: Union[pd.DataFrame, pd.Series]) -> pd.Series: - """ - Convert a single-column dataframe to a series or no-op if already a series. - """ - if isinstance(data, pd.Series): - return data - return to_series(data) - - -def dassert_is_days( - timedelta: pd.Timedelta, *, min_num_days: Optional[int] = None -) -> None: - hdbg.dassert( - (timedelta / pd.Timedelta(days=1)).is_integer(), - "timedelta='%s' is not an integer number of days", - timedelta, - ) - if min_num_days is not None: - hdbg.dassert_lte(1, timedelta.days) - - -# ############################################################################# - - -def _get_index(obj: Union[pd.Index, pd.DataFrame, pd.Series]) -> pd.Index: - """ - Return the index of a Pandas object. - """ - if isinstance(obj, pd.Index): - index = obj - else: - hdbg.dassert_isinstance(obj, (pd.Series, pd.DataFrame)) - index = obj.index - return index - - -# TODO(gp): Maybe for symmetry with the other functions, rename to -# dassert_datetime_index -def dassert_index_is_datetime( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the dataframe has an index containing datetimes. - - It works for both single and multi-indexed dataframes. - """ - index = _get_index(obj) - if isinstance(index, pd.MultiIndex): - # In case of multi index check that at least one level is a datetime. - is_any_datetime = any( - isinstance(level, pd.DatetimeIndex) for level in index.levels - ) - hdbg.dassert(is_any_datetime, msg, *args) - else: - hdbg.dassert_isinstance(index, pd.DatetimeIndex, msg, *args) - - -def dassert_unique_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a unique index. - """ - index = _get_index(obj) - if not index.is_unique: - dup_indices = index.duplicated(keep=False) - df_dup = obj[dup_indices] - dup_msg = f"Duplicated rows are:\n{df_to_str(df_dup)}\n" - if msg is None: - msg = dup_msg - else: - msg = dup_msg + msg - hdbg.dassert(index.is_unique, msg=msg, *args) - - -# TODO(gp): @all Add unit tests. -def dassert_increasing_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has an increasing index. - """ - index = _get_index(obj) - if not index.is_monotonic_increasing: - # Print information about the problematic indices like: - # ``` - # Not increasing indices are: - # full_symbol open high - # timestamp - # 2018-08-17 01:39:00+00:00 binance::BTC_USDT 6339.250000 6348.910000 - # 2018-08-17 00:01:00+00:00 kucoin::ETH_USDT 286.712987 286.712987 - # ``` - # Find the problematic indices. - mask = np.diff(index) <= pd.Timedelta(seconds=0) - mask = np.insert(mask, 0, False) - # TODO(gp): We might want to specify an integer with how many rows before - # after we want to show. - # Shift back to get the previous index that was creating the issue. - mask_shift = np.empty_like(mask) - mask_shift[: len(mask) - 1] = mask[1 : len(mask)] - mask_shift[len(mask) - 1] = False - # - mask = mask | mask_shift - dup_msg = f"Not increasing indices are:\n{df_to_str(obj[mask])}\n" - if msg is None: - msg = dup_msg - else: - msg = dup_msg + msg - # Dump the data to file for further inspection. - # obj.to_csv("index.csv") - hdbg.dassert(index.is_monotonic_increasing, msg=msg, *args) - - -# TODO(gp): @all Add more info in case of failures and unit tests. -def dassert_strictly_increasing_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a strictly increasing index. - """ - dassert_unique_index(obj, msg, *args) - dassert_increasing_index(obj, msg, *args) - - -# TODO(gp): Not sure it's used or useful? -def dassert_monotonic_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a monotonic (i.e., strictly increasing or - decreasing index). - """ - dassert_unique_index(obj, msg, *args) - index = _get_index(obj) - cond = index.is_monotonic_increasing or index.is_monotonic_decreasing - hdbg.dassert(cond, msg=msg, *args) - - -# TODO(Paul): @gp -> dassert_datetime_indexed_df -def dassert_time_indexed_df( - df: pd.DataFrame, allow_empty: bool, strictly_increasing: bool -) -> None: - """ - Validate that input dataframe is time indexed and well-formed. - - It works for both single and multi-indexed dataframes. - - :param df: dataframe to validate - :param allow_empty: allow empty data frames - :param strictly_increasing: if True the index needs to be strictly - increasing, instead of just increasing - """ - # Verify that Pandas dataframe is passed as input. - hdbg.dassert_isinstance(df, pd.DataFrame) - if not allow_empty: - # Verify that a non-empty dataframe is passed as input. - hdbg.dassert_lt(0, df.shape[0]) - # Verify that the dataframe has at least 1 column. - hdbg.dassert_lte(1, len(df.columns)) - # Verify that the index is increasing. - if strictly_increasing: - dassert_strictly_increasing_index(df) - else: - dassert_increasing_index(df) - # Check that the index is in datetime format. - dassert_index_is_datetime(df) - # Check that the passed timestamp has timezone info. - index_item = df.index[0] - if isinstance(index_item, tuple): - # In case of multi index assume that the first level is a datetime. - index_item = index_item[0] - hdateti.dassert_has_tz(index_item) - - -def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None: - """ - Ensure that remapping rows / columns is valid. - """ - hdbg.dassert_isinstance(to_remap, list) - hdbg.dassert_isinstance(remap_dict, dict) - # All the rows / columns to remap, should exist. - hdbg.dassert_is_subset( - remap_dict.keys(), - to_remap, - "Keys to remap should be a subset of existing columns", - ) - # The mapping is invertible. - hdbg.dassert_no_duplicates(remap_dict.keys()) - hdbg.dassert_no_duplicates(remap_dict.values()) - # Rows / columns should not be remapped on existing rows / columns. - hdbg.dassert_not_intersection(remap_dict.values(), to_remap) - - -def dassert_series_type_is( - srs: pd.Series, - type_: type, - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the data type of `srs` is `type_`. - - Examples of valid series types are - - np.float64 - - np.int64 - - pd.Timestamp - """ - hdbg.dassert_isinstance(srs, pd.Series) - hdbg.dassert_isinstance(type_, type) - hdbg.dassert_eq(srs.dtype.type, type_, msg, *args) - - -def dassert_series_type_in( - srs: pd.Series, - types: List[type], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the data type of `srs` is one of the types in `types`. - """ - hdbg.dassert_isinstance(srs, pd.Series) - hdbg.dassert_container_type(types, list, type) - hdbg.dassert_in(srs.dtype.type, types, msg, *args) - - -def dassert_indices_equal( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - allow_series: bool = False, - only_warning: bool = False, -) -> None: - """ - Ensure that `df1` and `df2` share a common index. - - Print the symmetric difference of indices if equality does not hold. - """ - if allow_series: - if isinstance(df1, pd.Series): - df1 = df1.to_frame() - if isinstance(df2, pd.Series): - df2 = df2.to_frame() - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert( - df1.index.equals(df2.index), - "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", - df1.index.difference(df2.index), - df2.index.difference(df1.index), - only_warning=only_warning, - ) - - -def dassert_columns_equal( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - sort_cols: bool = False, - only_warning: bool = False, -) -> None: - """ - Ensure that `df1` and `df2` have the same columns. - - Print the symmetric difference of columns if equality does not hold. - """ - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - if sort_cols: - _LOG.debug("Sorting dataframe columns.") - df1 = df1.sort_index(axis=1) - df2 = df2.sort_index(axis=1) - hdbg.dassert( - df1.columns.equals(df2.columns), - "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", - df1.columns.difference(df2.columns), - df2.columns.difference(df1.columns), - only_warning=only_warning, - ) - - -def dassert_axes_equal( - df1: pd.DataFrame, df2: pd.DataFrame, *, sort_cols: bool = False -) -> None: - """ - Ensure that `df1` and `df2` have the same index and same columns. - """ - dassert_indices_equal(df1, df2) - dassert_columns_equal(df1, df2, sort_cols=sort_cols) - - -# TODO(Grisha): instead of passing `rtol` and `atol` use `**allclose_kwargs: Dict[str, Any]`. -def dassert_approx_eq( - val1: Any, - val2: Any, - rtol: float = 1e-05, - atol: float = 1e-08, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # Approximate comparison is not applicable for strings. - hdbg.dassert_is_not(type(val1), str) - hdbg.dassert_is_not(type(val2), str) - # Convert iterable inputs to list in order to comply with numpy. - if isinstance(val1, Iterable): - val1 = list(val1) - if isinstance(val2, Iterable): - val2 = list(val2) - cond = np.allclose( - np.array(val1), np.array(val2), rtol=rtol, atol=atol, equal_nan=True - ) - if not cond: - txt = f"'{val1}'\n==\n'{val2}' rtol={rtol}, atol={atol}" - hdbg._dfatal(txt, msg, *args, only_warning=only_warning) # type: ignore - - -# ############################################################################# - - -def resample_index(index: pd.DatetimeIndex, frequency: str) -> pd.DatetimeIndex: - """ - Resample `DatetimeIndex`. - - :param index: `DatetimeIndex` to resample - :param frequency: frequency from `pd.date_range()` to resample to - :return: resampled `DatetimeIndex` - """ - _LOG.debug(hprint.to_str("index frequency")) - hdbg.dassert_isinstance(index, pd.DatetimeIndex) - dassert_unique_index(index, msg="Index must have only unique values") - min_date = index.min() - max_date = index.max() - _LOG.debug("min_date=%s max_date=%s", min_date, max_date) - # TODO(gp): Preserve the index name. - # index_name = index.name - resampled_index = pd.date_range( - start=min_date, - end=max_date, - freq=frequency, - ) - # Enable detailed debugging. - if False: - if len(resampled_index) > len(index): - # Downsample. - _LOG.debug( - "Index length increased by %s = %s - %s", - len(resampled_index) - len(index), - len(resampled_index), - len(index), - ) - elif len(resampled_index) < len(index): - # Upsample. - _LOG.debug( - "Index length decreased by %s = %s - %s", - len(index) - len(resampled_index), - len(index), - len(resampled_index), - ) - else: - _LOG.debug("Index length=%s has not changed", len(index)) - # resampled_index.name = index_name - return resampled_index - - -def resample_df(df: pd.DataFrame, frequency: str) -> pd.DataFrame: - """ - Resample `DataFrame` by placing NaN in missing locations in the index. - - :param df: `DataFrame` to resample - :param frequency: frequency from `pd.date_range()` to resample to - :return: resampled `DataFrame` - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - # Preserve the index name. - index_name = df.index.name - resampled_index = resample_index(df.index, frequency) - df_reindex = df.reindex(resampled_index) - df_reindex.index.name = index_name - return df_reindex - - -def find_gaps_in_dataframes( - df1: pd.DataFrame, df2: pd.DataFrame -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Find data present in one dataframe and missing in the other one. - - :param df1: first dataframe for comparison - :param df2: second dataframe for comparison - :return: two dataframes with missing data - """ - # Get data present in first, but not present in second dataframe. - first_missing_indices = df2.index.difference(df1.index) - first_missing_data = df2.loc[first_missing_indices] - # Get data present in second, but not present in first dataframe. - second_missing_indices = df1.index.difference(df2.index) - second_missing_data = df1.loc[second_missing_indices] - return first_missing_data, second_missing_data - - -# TODO(Grisha): use this idiom everywhere in the codebase, e.g., in `compare_dfs()`. -def apply_index_mode( - df1: pd.DataFrame, - df2: pd.DataFrame, - mode: str, -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Process DataFrames according to the index mode. - - :param df1: first input df - :param df2: second input df - :param mode: method of processing indices - - "assert_equal": check that both indices are equal, assert otherwise - - "intersect": restrict both dfs to a common index - - "leave_unchanged": ignore any indices mismatch and return dfs as-is - :return: transformed copy of the inputs - """ - _LOG.debug("mode=%s", mode) - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert_isinstance(mode, str) - # Copy in order not to modify the inputs. - df1_copy = df1.copy() - df2_copy = df2.copy() - if mode == "assert_equal": - dassert_indices_equal(df1_copy, df2_copy) - elif mode == "intersect": - # TODO(Grisha): Add sorting on demand. - common_index = df1_copy.index.intersection(df2_copy.index) - df1_copy = df1_copy[df1_copy.index.isin(common_index)] - df2_copy = df2_copy[df2_copy.index.isin(common_index)] - elif mode == "leave_unchanged": - _LOG.debug( - "Ignoring any index missmatch as per user's request.\n" - "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", - df1_copy.index.difference(df2_copy.index), - df2_copy.index.difference(df1_copy.index), - ) - else: - raise ValueError(f"Unsupported index_mode={mode}") - return df1_copy, df2_copy - - -def apply_columns_mode( - df1: pd.DataFrame, - df2: pd.DataFrame, - mode: str, -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Process DataFrames according to the column mode. - - :param df1: first input df - :param df2: second input df - :param mode: method of processing columns - - "assert_equal": check that both dfs have equal columns, assert otherwise - - "intersect": restrict both dfs to only include common columns - - "leave_unchanged": ignore any column mismatches and return dfs as-is - :return: transformed copy of the inputs - """ - _LOG.debug("mode=%s", mode) - # Input validation. - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert_isinstance(mode, str) - # Copy in order not to modify the inputs. - df1_copy = df1.copy() - df2_copy = df2.copy() - if mode == "assert_equal": - # Check if columns are equal or not. - dassert_columns_equal(df1_copy, df2_copy) - elif mode == "intersect": - # Filter dataframes based on its common columns. - common_columns = df1_copy.columns.intersection(df2_copy.columns) - df1_copy = df1_copy[common_columns] - df2_copy = df2_copy[common_columns] - # Log the string representation of 2 dfs. - _LOG.debug("df1 after filtering=\n%s", df_to_str(df1)) - _LOG.debug("df2 after filtering=\n%s", df_to_str(df2)) - elif mode == "leave_unchanged": - # Ignore mismatch. - _LOG.debug( - "Ignoring any column missmatch as per user's request.\n" - "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", - df1.columns.difference(df2.columns), - df2.columns.difference(df1.columns), - ) - else: - raise ValueError(f"Unsupported column mode: {mode}") - return df1_copy, df2_copy - - -def find_gaps_in_time_series( - time_series: pd.Series, - start_timestamp: pd.Timestamp, - end_timestamp: pd.Timestamp, - freq: str, -) -> pd.Series: - """ - Find missing points on a time interval specified by [start_timestamp, - end_timestamp], where point distribution is determined by . - - If the passed time series is of a unix epoch format. It is - automatically tranformed to pd.Timestamp. - - :param time_series: time series to find gaps in - :param start_timestamp: start of the time interval to check - :param end_timestamp: end of the time interval to check - :param freq: distance between two data points on the interval. - Aliases correspond to pandas.date_range's freq parameter, i.e. - "S" -> second, "T" -> minute. - :return: pd.Series representing missing points in the source time - series. - """ - _time_series = time_series - if str(time_series.dtype) in ["int32", "int64"]: - _time_series = _time_series.map(hdateti.convert_unix_epoch_to_timestamp) - correct_time_series = pd.date_range( - start=start_timestamp, end=end_timestamp, freq=freq - ) - return correct_time_series.difference(_time_series) - - -def check_and_filter_matching_columns( - df: pd.DataFrame, required_columns: List[str], filter_data_mode: str -) -> pd.DataFrame: - """ - Check that columns are the required ones and if not filter data depending - on `filter_data_mode`. - - :param df: data to check columns for - :param required_columns: columns to return, skipping columns that are not required - :param filter_data_mode: control behaviour with respect to extra or missing columns - - "assert": raise an error if required columns do not match received columns - - "warn_and_trim": return the intersection of required and received columns and - issue a warning - :return: input data as it is if required columns match received columns otherwise - processed data, see `filter_data_mode` - """ - received_columns = df.columns.to_list() - hdbg.dassert_lte(1, len(received_columns)) - # - if filter_data_mode == "assert": - # Raise an assertion. - only_warning = False - elif filter_data_mode == "warn_and_trim": - # Just issue a warning. - only_warning = True - # Get columns intersection while preserving the order of the columns. - columns_intersection = [ - col_name - for col_name in required_columns - if col_name in received_columns - ] - hdbg.dassert_lte(1, len(columns_intersection)) - df = df[columns_intersection] - else: - raise ValueError(f"Invalid filter_data_mode='{filter_data_mode}'") - hdbg.dassert_set_eq( - required_columns, - received_columns, - only_warning=only_warning, - msg="Received columns do not match required columns.", - ) - return df - - -def compare_dataframe_rows(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: - """ - Compare contents of rows with same indices. - - Index is set to default sequential integer values because compare is - sensitive to multi index (probably because new multi indexes are created - for each difference in `compare`). Multi index columns are regular columns now. - Excess columns are removed so both dataframes are always same shape because - `compare` expects identical dataframes (same number of rows, columns, etc.). - - :param df1: first dataframe for comparison - :param df2: second dataframe for comparison - :return: dataframe with data with same indices and different contents - """ - # Get rows on which the two dataframe indices match. - idx_intersection = df1.index.intersection(df2.index) - # Remove excess columns and reset indexes. - trimmed_second = df2.loc[idx_intersection].reset_index() - trimmed_first = df1.loc[idx_intersection].reset_index() - # Get difference between second and first dataframe. - data_difference = trimmed_second.compare(trimmed_first) - # Update data difference with original dataframe index names - # for easier identification. - index_names = tuple(df2.index.names) - # If index or multi index is named, it will be visible in data difference. - if index_names != (None,): - for index in data_difference.index: - for column in index_names: - data_difference.loc[index, column] = trimmed_second.loc[index][ - column - ] - data_difference = data_difference.convert_dtypes() - return data_difference - - -def drop_duplicates( - data: Union[pd.Series, pd.DataFrame], - use_index: bool, - column_subset: Optional[List[str]] = None, - *args: Any, - **kwargs: Any, -) -> Union[pd.Series, pd.DataFrame]: - """ - Wrap `pandas.drop_duplicates()`. - - See the official docs: - - https://pandas.pydata.org/docs/reference/api/pandas.Series.drop_duplicates.html - - https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html - - :param use_index: - - if `True`, use index values together with a column subset for - identifying duplicates - - if `False`, duplicated rows are with the exact same values in a subset - and different indices - :param column_subset: a list of columns to consider for identifying duplicates - :return: data without duplicates - """ - _LOG.debug(hprint.to_str("use_index column_subset args kwargs")) - num_rows_before = data.shape[0] - # Get all columns list for subset if no subset is passed. - if column_subset is None: - column_subset = data.columns.tolist() - else: - hdbg.dassert_lte(1, len(column_subset), "Columns subset cannot be empty") - if use_index: - # Add dummy index column to use it for duplicates detection. - index_col_name = "use_index_col" - hdbg.dassert_not_in(index_col_name, data.columns.tolist()) - column_subset.insert(0, index_col_name) - data[index_col_name] = data.index - # - data_no_dups = data.drop_duplicates(subset=column_subset, *args, **kwargs) - # - if use_index: - # Remove dummy index column. - data_no_dups = data_no_dups.drop([index_col_name], axis=1) - # Report the change. - num_rows_after = data_no_dups.shape[0] - if num_rows_before != num_rows_after: - _LOG.debug( - "Removed %s rows", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - return data_no_dups - - -def dropna( - df: pd.DataFrame, - *args: Any, - drop_infs: bool = False, - report_stats: bool = False, - **kwargs: Any, -) -> pd.DataFrame: - """ - Create a wrapper around pd.dropna() reporting information about the removed - rows. - - :param df: dataframe to process - :param drop_infs: if +/- np.inf should be considered as nans - :param report_stats: if processing stats should be reported - :return: dataframe with nans dropped - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - num_rows_before = df.shape[0] - if drop_infs: - df = df.replace([np.inf, -np.inf], np.nan) - df = df.dropna(*args, **kwargs) - if report_stats: - num_rows_after = df.shape[0] - pct_removed = hprint.perc( - num_rows_before - num_rows_after, num_rows_before - ) - _LOG.info("removed rows with nans: %s", pct_removed) - return df - - -def drop_axis_with_all_nans( - df: pd.DataFrame, - drop_rows: bool = True, - drop_columns: bool = False, - drop_infs: bool = False, - report_stats: bool = False, -) -> pd.DataFrame: - """ - Remove columns and rows not containing information (e.g., with only nans). - - The operation is not performed in place and the resulting df is - returned. Assume that the index is timestamps. - - :param df: dataframe to process - :param drop_rows: remove rows with only nans - :param drop_columns: remove columns with only nans - :param drop_infs: remove also +/- np.inf - :param report_stats: report the stats of the operations - :return: dataframe with specific nan axis dropped - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - if drop_infs: - df = df.replace([np.inf, -np.inf], np.nan) - if drop_columns: - # Remove columns with all nans, if any. - cols_before = df.columns[:] - df = df.dropna(axis=1, how="all") - if report_stats: - # Report results. - cols_after = df.columns[:] - removed_cols = set(cols_before).difference(set(cols_after)) - pct_removed = hprint.perc( - len(cols_before) - len(cols_after), len(cols_after) - ) - _LOG.info( - "removed cols with all nans: %s %s", - pct_removed, - hprint.list_to_str(removed_cols), - ) - if drop_rows: - # Remove rows with all nans, if any. - rows_before = df.index[:] - df = df.dropna(axis=0, how="all") - if report_stats: - # Report results. - rows_after = df.index[:] - removed_rows = set(rows_before).difference(set(rows_after)) - if len(rows_before) == len(rows_after): - # Nothing was removed. - min_ts = max_ts = None - else: - # TODO(gp): Report as intervals of dates. - min_ts = min(removed_rows) - max_ts = max(removed_rows) - pct_removed = hprint.perc( - len(rows_before) - len(rows_after), len(rows_after) - ) - _LOG.info( - "removed rows with all nans: %s [%s, %s]", - pct_removed, - min_ts, - max_ts, - ) - return df - - -def reindex_on_unix_epoch( - df: pd.DataFrame, in_col_name: str, unit: str = "s" -) -> pd.DataFrame: - """ - Transform the column `in_col_name` into a datetime index. `in_col_name` - contains Unix epoch (e.g., 1638194400) and it is converted into a UTC time. - - :param df: dataframe with a unix epoch - :param in_col_name: column containing unix epoch - :param unit: the unit of unix epoch - """ - # Convert. - temp_col_name = in_col_name + "_tmp" - hdbg.dassert_in(in_col_name, df.columns) - hdbg.dassert_not_in(temp_col_name, df.columns) - # Save. - df[temp_col_name] = pd.to_datetime(df[in_col_name], unit=unit, utc=True) - df.set_index(temp_col_name, inplace=True, drop=True) - df.index.name = None - return df - - -def get_df_signature(df: pd.DataFrame, num_rows: int = 6) -> str: - """ - Compute a simple signature of a dataframe in string format. - - The signature contains metadata about dataframe size and certain - amount of rows from start and end of a dataframe. It is used for - testing purposes. - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - text: List[str] = [f"df.shape={str(df.shape)}"] - with pd.option_context( - "display.max_colwidth", int(1e6), "display.max_columns", None - ): - # If dataframe size exceeds number of rows, show only subset in form of - # first and last rows. Otherwise, whole dataframe is shown. - if len(df) > num_rows: - text.append(f"df.head=\n{df.head(num_rows // 2)}") - text.append(f"df.tail=\n{df.tail(num_rows // 2)}") - else: - text.append(f"df.full=\n{df}") - text: str = "\n".join(text) - return text - - -# ############################################################################# - - -def trim_df( - df: pd.DataFrame, - ts_col_name: Optional[str], - start_ts: Optional[pd.Timestamp], - end_ts: Optional[pd.Timestamp], - left_close: bool, - right_close: bool, -) -> pd.DataFrame: - """ - Trim the dataframe using values in `ts_col_name`. - - The dataframe is trimmed in the interval bounded by `start_ts` and `end_ts`. - - :param df: the dataframe to trim - :param ts_col_name: the name of the column; `None` means index - :param start_ts: the start boundary for trimming - :param end_ts: the end boundary for trimming - :param left_close: whether to include the start boundary of the interval - - True: [start_ts, ... - - False: (start_ts, ... - :param right_close: whether to include the end boundary of the interval - - True: ..., end_ts] - - False: ..., end_ts) - :return: the trimmed dataframe - """ - if _TRACE: - _LOG.trace( - df_to_str(df, print_dtypes=True, print_shape_info=True, tag="df") - ) - _LOG.debug( - hprint.to_str("ts_col_name start_ts end_ts left_close right_close") - ) - if _TRACE: - _LOG.trace("df=\n%s", df_to_str(df)) - if df.empty: - # If the df is empty, there is nothing to trim. - return df - if start_ts is None and end_ts is None: - # If no boundaries are specified, there are no points of reference to trim - # to. - return df - num_rows_before = df.shape[0] - if start_ts is not None and end_ts is not None: - # Confirm that the interval boundaries are valid. - hdateti.dassert_tz_compatible(start_ts, end_ts) - hdbg.dassert_lte(start_ts, end_ts) - # Get the values to filter by. - if ts_col_name is None: - values_to_filter_by = pd.Series(df.index, index=df.index) - else: - hdbg.dassert_in(ts_col_name, df.columns) - values_to_filter_by = df[ts_col_name] - if values_to_filter_by.is_monotonic_increasing: - _LOG.trace("df is monotonic") - # The values are sorted; using the `pd.Series.searchsorted()` method. - # Find the index corresponding to the left boundary of the interval. - if start_ts is not None: - side = "left" if left_close else "right" - left_idx = values_to_filter_by.searchsorted(start_ts, side) - else: - # There is nothing to filter, so the left index is the first one. - left_idx = 0 - _LOG.debug(hprint.to_str("start_ts left_idx")) - # Find the index corresponding to the right boundary of the interval. - if end_ts is not None: - side = "right" if right_close else "left" - right_idx = values_to_filter_by.searchsorted(end_ts, side) - else: - # There is nothing to filter, so the right index is None. - right_idx = df.shape[0] - _LOG.debug(hprint.to_str("end_ts right_idx")) - # - hdbg.dassert_lte(0, left_idx) - hdbg.dassert_lte(left_idx, right_idx) - hdbg.dassert_lte(right_idx, df.shape[0]) - _LOG.debug(hprint.to_str("start_ts left_idx")) - if right_idx < df.shape[0]: - _LOG.debug(hprint.to_str("end_ts right_idx")) - df = df.iloc[left_idx:right_idx] - else: - _LOG.trace("df is not monotonic") - # The values are not sorted; using the `pd.Series.between` method. - if left_close and right_close: - inclusive = "both" - elif left_close: - inclusive = "left" - elif right_close: - inclusive = "right" - else: - inclusive = "neither" - epsilon = pd.DateOffset(minutes=1) - if start_ts is None: - start_ts = values_to_filter_by.min() - epsilon - if end_ts is None: - end_ts = values_to_filter_by.max() + epsilon - df = df[ - values_to_filter_by.between(start_ts, end_ts, inclusive=inclusive) - ] - # Report the changes. - num_rows_after = df.shape[0] - if num_rows_before != num_rows_after: - _LOG.debug( - "Removed %s rows", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - return df - - -# TODO(Nina): Add `filter_data_mode`. -def merge_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - threshold_col_name: str, - *, - threshold: float = 0.9, - intersecting_columns: Optional[List[str]] = None, - **pd_merge_kwargs: Any, -) -> pd.DataFrame: - """ - Wrap `pd.merge`. - - :param threshold_col_name: a column's name to check the minimum - overlap on - :param threshold: minimum overlap of unique values in a specified - column to perform the merge - :param intersecting_columns: allow certain columns to appear in both - dataframes; store both in the resulting df with corresponding - suffixes - """ - _LOG.debug( - hprint.to_str( - "threshold_col_name threshold intersecting_columns pd_merge_kwargs" - ) - ) - # Sanity check column types. - threshold_col1 = df1[threshold_col_name] - threshold_col2 = df2[threshold_col_name] - only_first_elem = False - hdbg.dassert_array_has_same_type_element( - threshold_col1, threshold_col2, only_first_elem - ) - # TODO(Grisha): @Dan Implement asserts for each asset id. - # Check that an overlap of unique values is above the specified threshold. - threshold_unique_values1 = set(threshold_col1) - threshold_unique_values2 = set(threshold_col2) - threshold_common_values = set(threshold_unique_values1) & set( - threshold_unique_values2 - ) - threshold_common_values_share1 = len(threshold_common_values) / len( - threshold_unique_values1 - ) - threshold_common_values_share2 = len(threshold_common_values) / len( - threshold_unique_values2 - ) - hdbg.dassert_lte(threshold, threshold_common_values_share1) - hdbg.dassert_lte(threshold, threshold_common_values_share2) - # Use an empty set instead of None to perform set difference further. - intersecting_columns_set = ( - set() if intersecting_columns is None else set(intersecting_columns) - ) - # Check that there are no common columns except for the ones in `intersecting_columns`. - df1_cols = ( - set(df1.columns.to_list()) - - set(pd_merge_kwargs["on"]) - - intersecting_columns_set - ) - df2_cols = ( - set(df2.columns.to_list()) - - set(pd_merge_kwargs["on"]) - - intersecting_columns_set - ) - hdbg.dassert_not_intersection(df1_cols, df2_cols) - # - res_df = df1.merge(df2, **pd_merge_kwargs) - return res_df - - -# TODO(gp): Is this (ironically) a duplicate of drop_duplicates? -def drop_duplicated( - df: pd.DataFrame, *, subset: Optional[List[str]] = None -) -> pd.DataFrame: - """ - Implement `df.duplicated` but considering also the index and ignoring nans. - """ - _LOG.debug("before df=\n%s", df_to_str(df)) - # Move the index to the df. - old_index_name = df.index.name - new_index_name = "_index.tmp" - hdbg.dassert_not_in(new_index_name, df.columns) - df.index.name = new_index_name - df.reset_index(drop=False, inplace=True) - # Remove duplicates by ignoring nans. - if subset is not None: - hdbg.dassert_isinstance(subset, list) - subset = [new_index_name] + subset - duplicated = df.fillna(0.0).duplicated(subset=subset, keep="first") - # Report the result of the operation. - if duplicated.sum() > 0: - num_rows_before = df.shape[0] - _LOG.debug("Removing duplicates df=\n%s", df_to_str(df.loc[duplicated])) - df = df.loc[~duplicated] - num_rows_after = df.shape[0] - _LOG.warning( - "Removed repeated rows num_rows=%s", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - _LOG.debug("after removing duplicates df=\n%s", df_to_str(df)) - # Set the index back. - df.set_index(new_index_name, inplace=True) - df.index.name = old_index_name - _LOG.debug("after df=\n%s", df_to_str(df)) - return df - - -# ############################################################################# - - -def infer_column_types(col: pd.Series): - """ - Determine which data type is most prevalent in a column. - - Examine the values in the given pandas Series and decides whether the - majority of entries are strings, numeric values, or booleans. - - :param col: The column to inspect. - :return: One of `"is_string"`, `"is_numeric"`, or `"is_bool"`, representing - the predominant type. - """ - vals = { - "is_numeric": pd.to_numeric(col, errors="coerce").notna(), - #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), - "is_bool": col.map(lambda x: isinstance(x, bool)), - "is_string": col.map(lambda x: isinstance(x, str)), - } - vals = {k: float(v.mean()) for k, v in vals.items()} - # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", - # (vals["is_numeric"] >= vals["is_string"], "is_numeric", - # "is_string")) - if vals["is_bool"] >= vals["is_numeric"] and (vals["is_bool"] != 0): - type_ = "is_bool" - elif vals["is_numeric"] >= vals["is_string"] and (vals["is_numeric"] != 0): - type_ = "is_numeric" - else: - type_ = "is_string" - vals["type"] = type_ - return vals - - -def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: - """ - Identify the predominant data type for each column in a DataFrame. - - :param df: The DataFrame whose columns will be analyzed. - :return: A DataFrame with two columns: - - `column`: the name of each original column. - - `predominant_type`: the most frequent type in that column, - one of `"string"`, `"numeric"`, or `"bool"`. - """ - return df.apply(lambda x: pd.Series(infer_column_types(x))).T - - -def convert_to_type(col: pd.Series, type_: str) -> pd.Series: - """ - Convert a pandas Series to a specified data type. - - :param col: The input column to be converted. - :param type_: The target data type. Expected values include: - - `"is_bool"`: convert values to booleans. - - `"is_int"`: convert values to integers. - - `"is_numeric"`: convert values to float. - - `"is_string"`: convert values to strings. - :return: A new Series with the same index as `col`, cast to the requested - type. - """ - if type_ == "is_bool": - return col.map( - lambda x: ( - True - if x in ["True", 1, "1", "true", True] - else False - if x in [0, "0", "False", False, "false"] - else None - ) - ) - elif type_ == "is_int": - return pd.to_numeric(col, errors="coerce", downcast="integer") - elif type_ == "is_numeric": - return pd.to_numeric(col, errors="coerce") - elif type_ == "is_string": - return col.astype(str) - else: - raise ValueError(f"Unknown column type: {type_}") - - -def convert_col_to_int( - df: pd.DataFrame, - col: str, -) -> pd.DataFrame: - """ - Convert a column to an integer column. - - Example use case: Parquet uses categoricals. If supplied with a - categorical-type column, this function will convert it to an integer - column. - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(col, str) - hdbg.dassert_in(col, df.columns) - # Attempt the conversion. - df[col] = df[col].astype("int64") - # Trust, but verify. - dassert_series_type_is(df[col], np.int64) - return df - - -def cast_series_to_type( - series: pd.Series, series_type: Optional[type] -) -> pd.Series: - """ - Convert a Pandas series to a given type. - - :param series: the input series - :param series_type: the type to convert the series into - - if None, then the series values are turned into Nones - :return: the series in the required type - """ - if series_type is None: - # Turn the series values into None. - series[:] = None - elif series_type is pd.Timestamp: - # Convert to timestamp. - series = pd.to_datetime(series) - elif series_type is dict: - # Convert to dict. - series = series.apply(eval) - else: - # Convert to the specified type. - series = series.astype(series_type) - return series - - -def _display(log_level: int, df: pd.DataFrame) -> None: - """ - Display a df in a notebook at the given log level. - - The behavior is similar to a command like `_LOG.log(log_level, ...)` but - for a notebook `display` command. - - :param log_level: log level at which to display a df. E.g., if `log_level = - logging.DEBUG`, then we display the df only if we are running with - `-v DEBUG`. If `log_level = logging.INFO` then we don't display it - """ - from IPython.display import display - - if ( - hsystem.is_running_in_ipynb() - and log_level >= hdbg.get_logger_verbosity() - ): - display(df) - - -def _df_to_str( - df: pd.DataFrame, - num_rows: Optional[int], - max_columns: int, - max_colwidth: int, - max_rows: int, - precision: int, - display_width: int, - use_tabulate: bool, - log_level: int, -) -> str: - is_in_ipynb = hsystem.is_running_in_ipynb() - out = [] - # Set dataframe print options. - with pd.option_context( - "display.max_colwidth", - max_colwidth, - # "display.height", 1000, - "display.max_rows", - max_rows, - "display.precision", - precision, - "display.max_columns", - max_columns, - "display.width", - display_width, - ): - if use_tabulate: - import tabulate - - out.append(tabulate.tabulate(df, headers="keys", tablefmt="psql")) - # TODO(Grisha): Add an option to display all rows since if `num_rows` - # is `None`, only first and last 5 rows are displayed. Consider using - # `df.to_string()` instead of `str(df)`. - if num_rows is None or df.shape[0] <= num_rows: - # Print the entire data frame. - if not is_in_ipynb: - out.append(str(df)) - else: - # Display dataframe. - _display(log_level, df) - else: - nr = num_rows // 2 - if not is_in_ipynb: - # Print top and bottom of df. - out.append(str(df.head(nr))) - out.append("...") - tail_str = str(df.tail(nr)) - # Remove index and columns from tail_df. - skipped_rows = 1 - if df.index.name: - skipped_rows += 1 - tail_str = "\n".join(tail_str.split("\n")[skipped_rows:]) - out.append(tail_str) - else: - # TODO(gp): @all use this approach also above and update all the - # unit tests. - df = [ - df.head(nr), - pd.DataFrame( - [["..."] * df.shape[1]], index=[" "], columns=df.columns - ), - df.tail(nr), - ] - df = pd.concat(df) - # Display dataframe. - _display(log_level, df) - if not is_in_ipynb: - txt = "\n".join(out) - else: - txt = "" - return txt - - -# TODO(gp): Maybe we can have a `_LOG_df_to_str(log_level, *args, **kwargs)` that -# calls `_LOG.log(log_level, hpandas.df_to_str(*args, **kwargs, log_level=log_level))`. -# TODO(gp): We should make sure this works properly in a notebook, although -# it's not easy to unit test. -def df_to_str( - df: Union[pd.DataFrame, pd.Series, pd.Index], - *, - # TODO(gp): Remove this hack in the integration. - # handle_signed_zeros: bool = False, - handle_signed_zeros: bool = True, - num_rows: Optional[int] = 6, - print_dtypes: bool = False, - print_shape_info: bool = False, - print_nan_info: bool = False, - print_memory_usage: bool = False, - memory_usage_mode: str = "human_readable", - tag: Optional[str] = None, - max_columns: int = 10000, - max_colwidth: int = 2000, - max_rows: int = 500, - precision: int = 6, - display_width: int = 10000, - use_tabulate: bool = False, - log_level: int = logging.DEBUG, -) -> str: - """ - Print a dataframe to string reporting all the columns without trimming. - - Note that code like: `_LOG.info(hpandas.df_to_str(df, num_rows=3))` works - properly when called from outside a notebook, i.e., the dataframe is printed - But it won't display the dataframe in a notebook, since the default level at - which the dataframe is displayed is `logging.DEBUG`. - - In this case to get the correct behavior one should do: - - ``` - log_level = ... - _LOG.log(log_level, hpandas.df_to_str(df, num_rows=3, log_level=log_level)) - ``` - - :param: handle_signed_zeros: convert `-0.0` to `0.0` - :param: num_rows: max number of rows to print (half from the top and half from - the bottom of the dataframe) - - `None` to print the entire dataframe - :param print_dtypes: report dataframe types and information about the type of - each column by looking at the first value - :param print_shape_info: report dataframe shape, index and columns - :param print_memory_usage: report memory use for each - """ - if df is None: - return "" - if isinstance(df, pd.Series): - df = pd.DataFrame(df) - elif isinstance(df, pd.Index): - df = df.to_frame(index=False) - hdbg.dassert_isinstance(df, pd.DataFrame) - # For some reason there are so-called "negative zeros", but we consider - # them equal to `0.0`. - df = df.copy() - if handle_signed_zeros: - for col_name in df.select_dtypes(include=[np.float64, float]).columns: - df[col_name] = df[col_name].where(df[col_name] != -0.0, 0.0) - out = [] - # Print the tag. - if tag is not None: - out.append(f"# {tag}=") - if not df.empty: - # Print information about the shape and index. - # TODO(Nikola): Revisit and rename print_shape_info to print_axes_info - if print_shape_info: - # TODO(gp): Unfortunately we can't improve this part of the output - # since there are many golden inside the code that would need to be - # updated. Consider automating updating the expected values in the code. - txt = f"index=[{df.index.min()}, {df.index.max()}]" - out.append(txt) - txt = f"columns={','.join(map(str, df.columns))}" - out.append(txt) - txt = f"shape={str(df.shape)}" - out.append(txt) - # Print information about the types. - if print_dtypes: - out.append("* type=") - - table = [] - - def _report_srs_stats(srs: pd.Series) -> List[Any]: - """ - Report dtype, the first element, and its type of series. - """ - row: List[Any] = [] - first_elem = srs.values[0] - num_unique = srs.nunique() - num_nans = srs.isna().sum() - row.extend( - [ - srs.dtype, - hprint.perc(num_unique, len(srs)), - hprint.perc(num_nans, len(srs)), - first_elem, - type(first_elem), - ] - ) - return row - - row = [] - col_name = "index" - row.append(col_name) - row.extend(_report_srs_stats(df.index)) - row = map(str, row) - table.append(row) - for col_name in df.columns: - row_: List[Any] = [] - row_.append(col_name) - row_.extend(_report_srs_stats(df[col_name])) - row_ = map(str, row_) - table.append(row_) - # - columns = [ - "col_name", - "dtype", - "num_unique", - "num_nans", - "first_elem", - "type(first_elem)", - ] - df_stats = pd.DataFrame(table, columns=columns) - stats_num_rows = None - df_stats_as_str = _df_to_str( - df_stats, - stats_num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - out.append(df_stats_as_str) - # Print info about memory usage. - if print_memory_usage: - out.append("* memory=") - mem_use_df = pd.concat( - [df.memory_usage(deep=False), df.memory_usage(deep=True)], - axis=1, - keys=["shallow", "deep"], - ) - # Add total row. - mem_use_df_total = pd.DataFrame({"total": mem_use_df.sum(axis=0)}) - mem_use_df = pd.concat([mem_use_df, mem_use_df_total.T]) - # Convert into the desired format. - if memory_usage_mode == "bytes": - pass - elif memory_usage_mode == "human_readable": - import helpers.hintrospection as hintros - - mem_use_df = mem_use_df.applymap(hintros.format_size) - else: - raise ValueError( - f"Invalid memory_usage_mode='{memory_usage_mode}'" - ) - memory_num_rows = None - memory_usage_as_txt = _df_to_str( - mem_use_df, - memory_num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - out.append(memory_usage_as_txt) - # Print info about nans. - if print_nan_info: - num_elems = df.shape[0] * df.shape[1] - num_nans = df.isna().sum().sum() - txt = f"num_nans={hprint.perc(num_nans, num_elems)}" - out.append(txt) - # - num_zeros = df.isnull().sum().sum() - txt = f"num_zeros={hprint.perc(num_zeros, num_elems)}" - out.append(txt) - # TODO(gp): np can't do isinf on objects like strings. - # num_infinite = np.isinf(df).sum().sum() - # txt = "num_infinite=" + hprint.perc(num_infinite, num_elems) - # out.append(txt) - # - num_nan_rows = df.dropna().shape[0] - txt = f"num_nan_rows={hprint.perc(num_nan_rows, num_elems)}" - out.append(txt) - # - num_nan_cols = df.dropna(axis=1).shape[1] - txt = f"num_nan_cols={hprint.perc(num_nan_cols, num_elems)}" - out.append(txt) - if hsystem.is_running_in_ipynb(): - if len(out) > 0 and log_level >= hdbg.get_logger_verbosity(): - print("\n".join(out)) - txt = None - # Print the df. - df_as_str = _df_to_str( - df, - num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - if not hsystem.is_running_in_ipynb(): - out.append(df_as_str) - txt = "\n".join(out) - return txt - - -def _assemble_df_rows(rows_values: RowsValues) -> RowsValues: - """ - Organize dataframe values into a column-row structure. - - - Indentation artifacts are removed - - The index placement is handled, i.e. - - if the index is named, the name is located and moved to the same - row as the column names - - if the index is not named, the row with the column names receives - a placeholder empty value in its place - - Empty columns are dropped - - :param rows_values: row values extracted from a string df representation - :return: row values assembled into a valid column-row structure - """ - # Clean up indentation artifacts. - if all(row[0] == "" for row in rows_values): - # Remove the first empty cell in each row. - for row in rows_values: - del row[0] - # If the index is named, its name is located in the second row, - # with an optional extra empty value cell value next to it. - if len(rows_values[1]) == 1 or ( - len(rows_values[1]) == 2 and rows_values[1][1] == "" - ): - # Move the index name to the row with all the column names. - if rows_values[0][0] == "": - rows_values[0][0] = rows_values[1][0] - else: - rows_values[0].insert(0, rows_values[1][0]) - # Drop the former index name row. - del rows_values[1] - else: - # Add an empty cell for the absent index name. - rows_values[0].insert(0, "") - # Identify and remove empty columns. - min_len_row = min(len(row) for row in rows_values) - idxs_to_delete = [] - for i in range(min_len_row): - if all(row[i] == "" for row in rows_values): - idxs_to_delete.append(i) - for idx in idxs_to_delete: - for row in rows_values: - del row[idx] - # Confirm that all the rows have the same number of values. - hdbg.dassert_eq(len({len(row) for row in rows_values}), 1) - return rows_values - - -def str_to_df( - df_as_str: str, - col_to_type: Dict[str, Optional[type]], - col_to_name_type: Dict[str, type], -) -> pd.DataFrame: - """ - Convert a string representation of a dataframe into a Pandas df. - - :param df_as_str: a df as a string - - the format of the string is the same as the output of - `hpandas.df_to_str()` on a pd.DataFrame, e.g. - ``` - col1 col2 col3 col4 - 0 0.1 a None 2020-01-01 - 1 0.2 "b c" None 2021-05-05 - ``` - - values (including column names) that contain spaces need - to be enclosed in double quotation marks, e.g. - "2023-03-15 16:35:41.205000+00:00" - :param col_to_type: a mapping between the column names and the - types of the values in these columns - - if a column is not present in the mapping, its values will - remain strings - - to indicate the type of index values, use {"__index__": ...} - mapping, e.g. {"__index__": pd.Timestamp} - :param col_to_name_type: a mapping between the column names and - the required types of these column names - - same conventions apply as for `col_to_type` (see above) - :return: a converted Pandas dataframe - """ - # Separate the rows. - rows = df_as_str.split("\n") - # Clean up extra spaces. - rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] - # Identify individual values in the rows. - rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) - # Remove the placeholder ["..."] row. - rows_values = [row for row in rows_values if row != ["..."]] - # Organize values into a proper column-row structure. - rows_values = _assemble_df_rows(rows_values) - # Get the column names. - column_names = rows_values[0][1:] - # Get the index. - index_values = [row[0] for row in rows_values[1:]] - index_name = rows_values[0][0] - # Construct the df. - df = pd.DataFrame( - [row[1:] for row in rows_values[1:]], - columns=column_names, - index=index_values, - ) - if index_name != "": - df.index.name = index_name - # Cast the columns into appropriate types. - for col, col_type in col_to_type.items(): - if col == "__index__": - df.index = cast_series_to_type(df.index, col_type) - else: - df[col] = cast_series_to_type(df[col], col_type) - # Cast the column names into appropriate types. - for col, col_name_type in col_to_name_type.items(): - if col == "__index__": - df.index = df.index.rename(col_name_type(df.index.name)) - else: - df = df.rename(columns={col: col_name_type(col)}) - return df - - -def convert_df_to_json_string( - df: pd.DataFrame, - n_head: Optional[int] = 10, - n_tail: Optional[int] = 10, - columns_order: Optional[List[str]] = None, -) -> str: - """ - Convert dataframe to pretty-printed JSON string. - - To select all rows of the dataframe, pass `n_head` as None. - - :param df: dataframe to convert - :param n_head: number of printed top rows - :param n_tail: number of printed bottom rows - :param columns_order: order for the KG columns sort - :return: dataframe converted to JSON string - """ - # Append shape of the initial dataframe. - shape = f"original shape={df.shape}" - # Reorder columns. - if columns_order is not None: - hdbg.dassert_set_eq(columns_order, df.cols) - df = df[columns_order] - # Select head. - if n_head is not None: - head_df = df.head(n_head) - else: - # If no n_head provided, append entire dataframe. - head_df = df - # Transform head to json. - head_json = head_df.to_json( - orient="index", - force_ascii=False, - indent=4, - default_handler=str, - date_format="iso", - date_unit="s", - ) - if n_tail is not None: - # Transform tail to json. - tail = df.tail(n_tail) - tail_json = tail.to_json( - orient="index", - force_ascii=False, - indent=4, - default_handler=str, - date_format="iso", - date_unit="s", - ) - else: - # If no tail specified, append an empty string. - tail_json = "" - # Join shape and dataframe to single string. - output_str = "\n".join([shape, "Head:", head_json, "Tail:", tail_json]) - return output_str - - -def convert_df( - df: pd.DataFrame, *, print_invalid_values: bool = False -) -> pd.DataFrame: - """ - Convert each DataFrame column to its predominant type. - - This function inspects every column in `df`, determines whether the - majority of its values are boolean, numeric, or string, and then - casts the column to that type using `convert_to_type`. - - :param df: The input DataFrame whose columns will be converted. - :param print_invalid_values: If True, print any original values that could - not be converted (they become NaN after conversion) - :return: a new DataFrame with each column cast to its detected predominant - type. - """ - df_out = pd.DataFrame(index=df.index) - for col in df.columns: - series = df[col] - # Determine the dominant datatype. - col_type = infer_column_types(series)["type"] - hdbg.dassert_in(col_type, ("is_bool", "is_numeric", "is_string")) - # Convert the column to dominant datatype. - converted = convert_to_type(series, col_type) - if print_invalid_values: - invalid_mask = series.notna() & converted.isna() - if invalid_mask.any(): - invalid = series[invalid_mask].tolist() - print(f"Column {col} dropped invalid values: {invalid}") - df_out[col] = converted - return df_out - - -# ############################################################################# - - -def read_csv_to_df( - stream: Union[str, S3File, S3FileSystem], - *args: Any, - **kwargs: Any, -) -> pd.DataFrame: - """ - Read a CSV file into a `pd.DataFrame`. - """ - # Gets filename from stream if it is not already a string, - # so it can be inspected for extension type. - file_name = stream if isinstance(stream, str) else vars(stream)["path"] - # Handle zipped files. - if any(file_name.endswith(ext) for ext in (".gzip", ".gz", ".tgz")): - hdbg.dassert_not_in("compression", kwargs) - kwargs["compression"] = "gzip" - elif file_name.endswith(".zip"): - hdbg.dassert_not_in("compression", kwargs) - kwargs["compression"] = "zip" - # Read. - _LOG.debug(hprint.to_str("args kwargs")) - df = pd.read_csv(stream, *args, **kwargs) - return df - - -def read_parquet_to_df( - stream: Union[str, S3File, S3FileSystem], - *args: Any, - **kwargs: Any, -) -> pd.DataFrame: - """ - Read a Parquet file into a `pd.DataFrame`. - """ - # Read. - _LOG.debug(hprint.to_str("args kwargs")) - df = pd.read_parquet(stream, *args, **kwargs) - return df - - -# ############################################################################# - - -# TODO(Paul): Add unit tests. -def compute_weighted_sum( - dfs: Dict[str, pd.DataFrame], - weights: pd.DataFrame, - *, - index_mode: str = "assert_equal", -) -> Dict[str, pd.DataFrame]: - """ - Compute weighted sums of `dfs` using `weights`. - - :param dfs: dataframes keyed by id; all dfs should have the same cols, - indices are handled based on the `index_mode` - :param weights: float weights indexed by id with unique col names - :param index_mode: same as `mode` in `apply_index_mode()` - :return: weighted sums keyed by weight col names - """ - hdbg.dassert_isinstance(dfs, dict) - hdbg.dassert(dfs, "dictionary of dfs must be nonempty") - # Get a dataframe from the dictionary and record its index and columns. - id_ = list(dfs)[0] - hdbg.dassert_isinstance(id_, str) - df = dfs[id_] - hdbg.dassert_isinstance(df, pd.DataFrame) - cols = df.columns - # Sanity-check dataframes in dictionary. - for key, value in dfs.items(): - hdbg.dassert_isinstance(key, str) - hdbg.dassert_isinstance(value, pd.DataFrame) - # The reference df is not modified. - _, value = apply_index_mode(df, value, index_mode) - hdbg.dassert( - value.columns.equals(cols), - "Column equality fails for keys=%s, %s", - id_, - key, - ) - # Sanity-check weights. - hdbg.dassert_isinstance(weights, pd.DataFrame) - hdbg.dassert_eq(weights.columns.nlevels, 1) - hdbg.dassert(not weights.columns.has_duplicates) - hdbg.dassert_set_eq(weights.index.to_list(), list(dfs)) - # Create a multiindexed dataframe to facilitate computing the weighted sums. - weighted_dfs = {} - combined_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys()) - # TODO(Paul): Consider relaxing the NaN-handling. - for col in weights.columns: - weighted_combined_df = combined_df.multiply(weights[col], level=0) - weighted_sums = weighted_combined_df.groupby(axis=1, level=1).sum( - min_count=len(dfs) - ) - weighted_dfs[col] = weighted_sums - return weighted_dfs - - -def subset_df(df: pd.DataFrame, nrows: int, seed: int = 42) -> pd.DataFrame: - """ - Remove N rows from the input data and shuffle the remaining ones. - - :param df: input data - :param nrows: the number of rows to remove from the original data - :param seed: see `random.seed()` - :return: shuffled data with removed rows - """ - hdbg.dassert_lte(1, nrows) - hdbg.dassert_lte(nrows, df.shape[0]) - idx = list(range(df.shape[0])) - random.seed(seed) - random.shuffle(idx) - idx = sorted(idx[nrows:]) - return df.iloc[idx] - - -def remap_obj( - obj: Union[pd.Series, pd.Index], - map_: Dict[Any, Any], - **kwargs: Any, -) -> pd.Series: - """ - Substitute each value of an object with another value from a dictionary. - - :param obj: an object to substitute value in - :param map_: values to substitute with - :return: remapped pandas series - """ - hdbg.dassert_lte(1, obj.shape[0]) - # TODO(Grisha): consider extending for other mapping types supported by - # `pd.Series.map`. - hdbg.dassert_isinstance(map_, dict) - # Check that every element of the object is in the mapping. - hdbg.dassert_is_subset(obj, map_.keys()) - new_srs = obj.map(map_, **kwargs) - return new_srs - - -def get_random_df( - num_cols: int, - seed: Optional[int] = None, - date_range_kwargs: Optional[Dict[str, Any]] = None, -) -> pd.DataFrame: - """ - Compute df with random data with `num_cols` columns and index obtained by - calling `pd.date_range(**kwargs)`. - - :param num_cols: the number of columns in a DataFrame to generate - :param seed: see `random.seed()` - :param date_range_kwargs: kwargs for `pd.date_range()` - """ - if seed: - np.random.seed(seed) - dt = pd.date_range(**date_range_kwargs) - df = pd.DataFrame(np.random.rand(len(dt), num_cols), index=dt) - return df - - -# ############################################################################# - -# TODO(gp): -> AxisNameSet -ColumnSet = Optional[Union[str, List[str]]] - - -# TODO(gp): -> _resolve_axis_names -def _resolve_column_names( - column_set: ColumnSet, - columns: Union[List[str], pd.Index], - *, - keep_order: bool = False, -) -> List[str]: - """ - Change format of the columns and perform some sanity checks. - - :param column_set: columns to proceed - :param columns: all columns available - :param keep_order: preserve the original order or allow sorting - """ - # Ensure that `columns` is well-formed. - if isinstance(columns, pd.Index): - columns = columns.to_list() - hdbg.dassert_isinstance(columns, list) - hdbg.dassert_lte(1, len(columns)) - # - if column_set is None: - # Columns were not specified, thus use the list of all the columns. - column_set = columns - else: - if isinstance(column_set, str): - column_set = [column_set] - hdbg.dassert_isinstance(column_set, list) - hdbg.dassert_lte(1, len(column_set)) - hdbg.dassert_is_subset(column_set, columns) - if keep_order: - # Keep the selected columns in the same order as in the original - # `columns`. - column_set = [c for c in columns if c in column_set] - return column_set - - -# TODO(Grisha): finish the function. -# TODO(Grisha): merge with the one in `dataflow.model.correlation.py`? -def remove_outliers( - df: pd.DataFrame, - lower_quantile: float, - *, - column_set: ColumnSet, - # TODO(Grisha): the params are not used. - fill_value: float = np.nan, - mode: str = "remove_outliers", - axis: Any = 0, - upper_quantile: Optional[float] = None, -) -> pd.DataFrame: - hdbg.dassert_eq(len(df.shape), 2, "Multi-index dfs not supported") - # - hdbg.dassert_lte(0.0, lower_quantile) - if upper_quantile is None: - upper_quantile = 1.0 - lower_quantile - hdbg.dassert_lte(lower_quantile, upper_quantile) - hdbg.dassert_lte(upper_quantile, 1.0) - # - df = df.copy() - if axis == 0: - all_columns = df.columns - columns = _resolve_column_names(column_set, all_columns) - hdbg.dassert_is_subset(columns, df.columns) - for column in all_columns: - if column in columns: - df[column] = df[column].quantile( - [lower_quantile, upper_quantile] - ) - elif axis == 1: - all_rows = df.rows - rows = _resolve_column_names(column_set, all_rows) - hdbg.dassert_is_subset(rows, df.rows) - for row in all_rows: - if row in rows: - df[row] = df[row].quantile([lower_quantile, upper_quantile]) - else: - raise ValueError(f"Invalid axis='{axis}'") - return df - - -# ############################################################################# - - -# TODO(Grisha): add assertions/logging. -def get_df_from_iterator( - iter_: Iterator[pd.DataFrame], - *, - sort_index: bool = True, -) -> pd.DataFrame: - """ - Concat all the dataframes in the iterator in one dataframe. - - :param iter_: dataframe iterator - :param sort_index: whether to sort output index or not - :return: combined iterator data - """ - # TODO(gp): @all make a copy of `iter_` so we don't consume it. - dfs = list(iter_) - df_res = pd.concat(dfs) - if sort_index: - df_res = df_res.sort_index() - return df_res - - -def heatmap_df(df: pd.DataFrame, *, axis: Any = None) -> pd.DataFrame: - """ - Colorize a df with a heatmap depending on the numeric values. - - :param axis: along which axis to compute the heatmap - - 0 colorize along rows - - 1 colorize along columns - - None: colorize everything - """ - # Keep it here to avoid long start up times. - import seaborn as sns - - cm = sns.diverging_palette(5, 250, as_cmap=True) - df = df.style.background_gradient(axis=axis, cmap=cm) - return df - - -def compare_nans_in_dataframes( - df1: pd.DataFrame, df2: pd.DataFrame -) -> pd.DataFrame: - """ - Compare equality of DataFrames in terms of NaNs. - - For example: - - `5 vs np.nan` is a mismatch - - `np.nan vs 5` is a mismatch - - `np.nan vs np.nan` is a match - - `np.nan vs np.inf` is a mismatch - - :param df1: dataframe to compare - :param df2: dataframe to compare with - :return: dataframe that shows the differences stacked side by side, see - `pandas.DataFrame.compare()` for an example - """ - dassert_axes_equal(df1, df2) - # Keep rows where df1's value is NaN and df2's value is not NaN and vice versa. - mask1 = df1.isna() & ~df2.isna() - mask2 = ~df1.isna() & df2.isna() - mask3 = mask1 | mask2 - # Compute a dataframe with the differences. - nan_diff_df = df1[mask3].compare(df2[mask3], result_names=("df1", "df2")) - return nan_diff_df - - -# TODO(Grisha): -> `compare_dataframes()`? -def compare_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - row_mode: str = "equal", - column_mode: str = "equal", - # TODO(Grisha): should be True by default? - compare_nans: bool = False, - diff_mode: str = "diff", - assert_diff_threshold: float = 1e-3, - close_to_zero_threshold: float = 1e-6, - zero_vs_zero_is_zero: bool = True, - remove_inf: bool = True, - log_level: int = logging.DEBUG, - only_warning: bool = True, -) -> pd.DataFrame: - """ - Compare two dataframes. - - This works for dataframes with and without multi-index. - - :param row_mode: control how the rows are handled - - "equal": rows need to be the same for the two dataframes - - "inner": compute the common rows for the two dataframes - :param column_mode: same as `row_mode` - :param compare_nans: include NaN comparison if True otherwise just - compare non-NaN values - :param diff_mode: control how the dataframes are compared in terms of - corresponding elements - - "diff": use the difference - - "pct_change": use the percentage difference - :param assert_diff_threshold: maximum allowed total difference - - do not assert if `None` - - works when `diff_mode` is "pct_change" - :param close_to_zero_threshold: round numbers below the threshold to 0 - :param zero_vs_zero_is_zero: replace the diff with 0 when comparing 0 to 0 - if True, otherwise keep the actual result - :param remove_inf: replace +-inf with `np.nan` - :param log_level: logging level - :param only_warning: when `True` the function issues a warning instead of aborting - :return: a singe dataframe with differences as values - """ - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - # Check value of `assert_diff_threshold`, if it was passed. - if assert_diff_threshold: - hdbg.dassert_lte(assert_diff_threshold, 1.0) - hdbg.dassert_lte(0.0, assert_diff_threshold) - # TODO(gp): Factor out this logic and use it for both compare_visually_dfs - # and - if row_mode == "equal": - dassert_indices_equal(df1, df2) - elif row_mode == "inner": - # TODO(gp): Add sorting on demand, otherwise keep the columns in order. - same_rows = list((set(df1.index)).intersection(set(df2.index))) - df1 = df1[df1.index.isin(same_rows)] - df2 = df2[df2.index.isin(same_rows)] - else: - raise ValueError(f"Invalid row_mode='{row_mode}'") - # - if column_mode == "equal": - hdbg.dassert_eq(sorted(df1.columns), sorted(df2.columns)) - elif column_mode == "inner": - # TODO(gp): Add sorting on demand, otherwise keep the columns in order. - col_names = sorted(list(set(df1.columns).intersection(set(df2.columns)))) - df1 = df1[col_names] - df2 = df2[col_names] - else: - raise ValueError(f"Invalid column_mode='{column_mode}'") - # Round small numbers to 0 to exclude them from the diff computation. - close_to_zero_threshold_mask = lambda x: abs(x) < close_to_zero_threshold - df1[close_to_zero_threshold_mask] = df1[close_to_zero_threshold_mask].round( - 0 - ) - df2[close_to_zero_threshold_mask] = df2[close_to_zero_threshold_mask].round( - 0 - ) - # Compute the difference df. - if diff_mode == "diff": - # Test and convert the assertion into a boolean. - is_ok = True - try: - pd.testing.assert_frame_equal( - df1, df2, check_like=True, check_dtype=False - ) - except AssertionError as e: - is_ok = False - _ = e - # Check `is_ok` and raise an assertion depending on `only_warning`. - if not is_ok: - hdbg._dfatal( - _, - "df1=\n%s\n and df2=\n%s\n are not equal.", - df_to_str(df1, log_level=log_level), - df_to_str(df2, log_level=log_level), - only_warning=only_warning, - ) - # Calculate the difference. - df_diff = df1 - df2 - if remove_inf: - df_diff = df_diff.replace([np.inf, -np.inf], np.nan) - elif diff_mode == "pct_change": - # Compare NaN values in dataframes. - nan_diff_df = compare_nans_in_dataframes(df1, df2) - _LOG.debug("Dataframe with NaN differences=\n%s", df_to_str(nan_diff_df)) - msg = "There are NaN values in one of the dataframes that are not in the other one." - hdbg.dassert_eq( - 0, nan_diff_df.shape[0], msg=msg, only_warning=only_warning - ) - # Compute pct_change. - df_diff = 100 * (df1 - df2) / df2.abs() - if zero_vs_zero_is_zero: - # When comparing 0 to 0 set the diff (which is NaN by default) to 0. - df1_mask = df1 == 0 - df2_mask = df2 == 0 - zero_vs_zero_mask = df1_mask & df2_mask - df_diff[zero_vs_zero_mask] = 0 - if remove_inf: - df_diff = df_diff.replace([np.inf, -np.inf], np.nan) - # Check if `df_diff` values are less than `assert_diff_threshold`. - if assert_diff_threshold is not None: - nan_mask = df_diff.isna() - within_threshold = ( - df_diff.abs() <= assert_diff_threshold - ) | nan_mask - expected = pd.DataFrame( - True, - index=within_threshold.index, - columns=within_threshold.columns, - ) - # Test and convert the assertion into boolean. - is_ok = True - try: - pd.testing.assert_frame_equal( - within_threshold, expected, check_exact=True - ) - except AssertionError as e: - is_ok = False - _ = e - # Check `is_ok` and raise assertion depending on `only_warning`. - if not is_ok: - hdbg._dfatal( - _, - "df1=\n%s\n and df2=\n%s\n have pct_change more than `assert_diff_threshold`.", - df_to_str(df1, log_level=log_level), - df_to_str(df2, log_level=log_level), - only_warning=only_warning, - ) - # Report max diff. - max_diff = df_diff.abs().max().max() - _LOG.log( - log_level, - "Maximum percentage difference between the two dataframes = %s", - max_diff, - ) - else: - raise ValueError(f"diff_mode={diff_mode}") - df_diff = df_diff.add_suffix(f".{diff_mode}") - return df_diff - - -# ############################################################################# -# Multi-index dfs -# ############################################################################# - - -# TODO(Grisha): should be a more elegant way to add a column. -def add_multiindex_col( - df: pd.DataFrame, multiindex_col: pd.DataFrame, col_name: str -) -> pd.DataFrame: - """ - Add column to a multiindex DataFrame. - - Note: each column in a multiindex DataFrame is a DataFrame itself. - - :param df: multiindex df - :param multiindex_col: column (i.e. singleindex df) of a multiindex df - :param col_name: name of a new column - :return: a multiindex DataFrame with a new column - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - hdbg.dassert_isinstance(multiindex_col, pd.DataFrame) - hdbg.dassert_isinstance(col_name, str) - hdbg.dassert_not_in(col_name, df.columns) - for col in multiindex_col.columns: - df[col_name, col] = multiindex_col[col] - return df - - -def list_to_str( - vals: List[Any], - *, - sep_char: str = ", ", - enclose_str_char: str = "'", - max_num: Optional[int] = 10, -) -> str: - """ - Convert a list of values into a formatted string representation. - - E.g., [1, "two", 3, 4, 5] -> "5 ['1', 'two', '3', '4', '5']" - - :param vals: values to be converted - :param sep_char: separator to use between elements - :param enclose_str_char: character to enclose each element's string - representation; if empty, elements are not enclosed - :param max_num: maximum number of elements to display in the output - :return: the formatted string representing the list - """ - vals_as_str = list(map(str, vals)) - # Add a str around. - if enclose_str_char: - vals_as_str = [ - enclose_str_char + v + enclose_str_char for v in vals_as_str - ] - # - ret = f"{len(vals)} [" - if max_num is not None and len(vals) > max_num: - hdbg.dassert_lt(1, max_num) - ret += sep_char.join(vals_as_str[: int(max_num / 2)]) - ret += sep_char + "..." + sep_char - ret += sep_char.join(vals_as_str[-int(max_num / 2) :]) - else: - ret += sep_char.join(vals_as_str) - ret += "]" - return ret - - -def multiindex_df_info( - df: pd.DataFrame, - *, - log_level: int = logging.INFO, - **list_to_str_kwargs: Dict[str, Any], -) -> str: - """ - Report information about a multi-index df. - """ - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - columns_level0 = df.columns.levels[0] - columns_level1 = df.columns.levels[1] - rows = df.index - ret = [] - ret.append( - f"shape={len(columns_level0)} x {len(columns_level1)} x {len(rows)}" - ) - ret.append( - "columns_level0=" + list_to_str(columns_level0, **list_to_str_kwargs) - ) - ret.append( - "columns_level1=" + list_to_str(columns_level1, **list_to_str_kwargs) - ) - ret.append("rows=" + list_to_str(rows, **list_to_str_kwargs)) - if isinstance(df.index, pd.DatetimeIndex): - # Display timestamp info. - start_timestamp = df.index.min() - end_timestamp = df.index.max() - frequency = df.index.freq - if frequency is None: - # Try to infer frequency. - frequency = pd.infer_freq(df.index) - ret.append(f"start_timestamp={start_timestamp}") - ret.append(f"end_timestamp={end_timestamp}") - ret.append(f"frequency={frequency}") - ret = "\n".join(ret) - _LOG.log(log_level, ret) - return ret - - -def subset_multiindex_df( - df: pd.DataFrame, - *, - # TODO(gp): Consider passing trim_df_kwargs as kwargs. - start_timestamp: Optional[pd.Timestamp] = None, - end_timestamp: Optional[pd.Timestamp] = None, - columns_level0: ColumnSet = None, - columns_level1: ColumnSet = None, - keep_order: bool = False, -) -> pd.DataFrame: - """ - Filter multi-index DataFrame by timestamp index and column levels. - - :param start_timestamp: see `trim_df()` - :param end_timestamp: see `trim_df()` - :param columns_level0: column names that corresponds to `df.columns.levels[0]` - - `None` means no filtering - :param columns_level1: column names that corresponds to `df.columns.levels[1]` - - `None` means no filtering - :param keep_order: see `_resolve_column_names()` - :return: filtered DataFrame - """ - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - # Filter by timestamp. - allow_empty = False - strictly_increasing = False - dassert_time_indexed_df(df, allow_empty, strictly_increasing) - df = trim_df( - df, - ts_col_name=None, - start_ts=start_timestamp, - end_ts=end_timestamp, - left_close=True, - right_close=True, - ) - # Filter level 0. - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - all_columns_level0 = df.columns.levels[0] - columns_level0 = _resolve_column_names( - columns_level0, all_columns_level0, keep_order=keep_order - ) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_is_subset(columns_level0, df.columns.levels[0]) - df = df[columns_level0] - # Filter level 1. - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - all_columns_level1 = df.columns.levels[1] - columns_level1 = _resolve_column_names( - columns_level1, all_columns_level1, keep_order=keep_order - ) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_is_subset(columns_level1, df.columns.levels[1]) - df = df.swaplevel(axis=1)[columns_level1].swaplevel(axis=1) - return df - - -# ############################################################################# - - -def compare_multiindex_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - subset_multiindex_df_kwargs: Optional[Dict[str, Any]] = None, - compare_dfs_kwargs: Optional[Dict[str, Any]] = None, -) -> pd.DataFrame: - """ - - Subset both multi-index dfs, if needed - - Compare dfs - - :param subset_multiindex_df: params for `subset_multiindex_df()` - :param compare_dfs_kwargs: params for `compare_dfs()` - :return: df with differences as values - """ - # Subset dfs. - if subset_multiindex_df_kwargs is None: - subset_multiindex_df_kwargs = {} - subset_df1 = subset_multiindex_df(df1, **subset_multiindex_df_kwargs) - subset_df2 = subset_multiindex_df(df2, **subset_multiindex_df_kwargs) - # Compare dfs. - if compare_dfs_kwargs is None: - compare_dfs_kwargs = {} - diff_df = compare_dfs(subset_df1, subset_df2, **compare_dfs_kwargs) - return diff_df - - -# ############################################################################# - - -def compute_duration_df( - tag_to_df: Dict[str, pd.DataFrame], - *, - intersect_dfs: bool = False, - valid_intersect: bool = False, -) -> Tuple[pd.DataFrame, Dict[str, pd.DataFrame]]: - """ - Compute a df with some statistics about the time index. - - E.g., - ``` - min_index max_index min_valid_index max_valid_index - tag1 - tag2 - ``` - - :param intersect_dfs: return a transformed dict with the intersection of - indices of all the dfs if True, otherwise return the input data as is - :param valid_intersect: intersect indices without NaNs if True, otherwise - intersect indices as is - :return: timestamp stats and updated dict of dfs, see `intersect_dfs` param - """ - hdbg.dassert_isinstance(tag_to_df, Dict) - # Create df and assign columns. - data_stats = pd.DataFrame() - min_col = "min_index" - max_col = "max_index" - min_valid_index_col = "min_valid_index" - max_valid_index_col = "max_valid_index" - # Collect timestamp info from all dfs. - for tag in tag_to_df.keys(): - # Check that the passed timestamp has timezone info. - hdateti.dassert_has_tz(tag_to_df[tag].index[0]) - dassert_index_is_datetime(tag_to_df[tag]) - # Compute timestamp stats. - data_stats.loc[tag, min_col] = tag_to_df[tag].index.min() - data_stats.loc[tag, max_col] = tag_to_df[tag].index.max() - data_stats.loc[tag, min_valid_index_col] = ( - tag_to_df[tag].dropna().index.min() - ) - data_stats.loc[tag, max_valid_index_col] = ( - tag_to_df[tag].dropna().index.max() - ) - # Make a copy so we do not modify the original data. - tag_to_df_updated = tag_to_df.copy() - # Change the initial dfs with intersection. - if intersect_dfs: - if valid_intersect: - # Assign start, end date column according to specs. - min_col = min_valid_index_col - max_col = max_valid_index_col - # The start of the intersection will be the max value amongt all start dates. - intersection_start_date = data_stats[min_col].max() - # The end of the intersection will be the min value amongt all end dates. - intersection_end_date = data_stats[max_col].min() - for tag in tag_to_df_updated.keys(): - df = trim_df( - tag_to_df_updated[tag], - ts_col_name=None, - start_ts=intersection_start_date, - end_ts=intersection_end_date, - left_close=True, - right_close=True, - ) - tag_to_df_updated[tag] = df - return data_stats, tag_to_df_updated - - -# ############################################################################# - - -# TODO(gp): Remove this since it's in Google API. -def to_gsheet( - df: pd.DataFrame, - gsheet_name: str, - gsheet_sheet_name: str, - overwrite: bool, -) -> None: - """ - Save a dataframe to a Google sheet. - - :param df: the dataframe to save to a Google sheet - :param gsheet_name: the name of the Google sheet to save the df - into; the Google sheet with this name must already exist on the - Google Drive - :param gsheet_sheet_name: the name of the sheet in the Google sheet - :param overwrite: if True, the contents of the sheet are erased - before saving the dataframe into it; if False, the dataframe is - appended to the contents of the sheet - """ - import gspread_pandas - - spread = gspread_pandas.Spread( - gsheet_name, sheet=gsheet_sheet_name, create_sheet=True - ) - if overwrite: - spread.clear_sheet() - else: - sheet_contents = spread.sheet_to_df(index=None) - combined_df = pd.concat([sheet_contents, df]) - df = combined_df.drop_duplicates() - spread.df_to_sheet(df, index=False) - - -# ############################################################################# -# _SummaryRow -# ############################################################################# - - -@dataclasses.dataclass -class _SummaryRow: - """ - Output of a check corresponding to a row of the summary df. - """ - - # Description of the check. - description: str - # Description of the output. - comment: str - # Whether the check was successful or not. - is_ok: bool - - -# ############################################################################# -# CheckSummary -# ############################################################################# - - -class CheckSummary: - """ - Collect and report the results of several checks performed in a notebook. - """ - - def __init__(self, *, title: Optional[str] = ""): - self.title = title - # - self._array: List[_SummaryRow] = [] - - def add(self, description: str, comment: str, is_ok: bool) -> None: - """ - Add the result of a single check. - """ - summary_row = _SummaryRow(description, comment, is_ok) - self._array.append(summary_row) - - def is_ok(self) -> bool: - """ - Compute whether all the checks were succesfull or not. - """ - is_ok = all(sr.is_ok for sr in self._array) - return is_ok - - def report_outcome( - self, *, notebook_output: bool = True, assert_on_error: bool = True - ) -> Optional[str]: - """ - Report the result of the entire check. - - :param notebook_output: report the result of the checks for a - notebook or as a string - :param assert_on_error: assert if one check failed - """ - df = pd.DataFrame(self._array) - - # Compute result as a string. - result = [] - if self.title: - result.append("# " + self.title) - result.append(str(df)) - is_ok = self.is_ok() - result.append(f"is_ok={is_ok}") - result = "\n".join(result) - # Display on a notebook, if needed. - if notebook_output: - if self.title: - print(self.title) - - # Convert DataFrame to HTML with colored rows based on 'is_ok' column. - def _color_rows(row: bool) -> str: - """ - Apply red/green color based on boolean value in `row["is_ok"]`. - """ - is_ok = row["is_ok"] - color = "#FA6B84" if not is_ok else "#ACF3AE" - return [f"background-color: {color}"] * len(row) - - df_html = df.style.apply(_color_rows, axis=1) - from IPython.display import display - - display(df_html) - print(f"is_ok={is_ok}") - # Assert if at least one of the check failed. - if not is_ok and assert_on_error: - raise ValueError("The checks have failed:\n" + result) - # For notebooks, we want to return None, since the outcome was - # already displayed. - if notebook_output: - result = None - return result - - -# ############################################################################# - - -def add_end_download_timestamp( - obj: Union[pd.DataFrame, Dict], *, timezone: str = "UTC" -) -> Union[pd.DataFrame, Dict]: - """ - Add a column 'end_download_timestamp' to the DataFrame with the current - time. - - :param obj: The DataFrame to which the column will be added. - :param timezone: The timezone for the current time. Defaults to - 'UTC'. - """ - # Get current timestamp. - current_ts = hdateti.get_current_time(timezone) - # Set value of end_download_timestamp. - obj["end_download_timestamp"] = current_ts - return obj - - -def filter_df( - df: pd.DataFrame, - col_name: str, - value: Any, - *, - invert: bool = False, - check_value: bool = True, - print_info: bool = True, -) -> pd.DataFrame: - hdbg.dassert_in(col_name, df.columns) - if isinstance(value, list): - mask = df[col_name].isin(value) - else: - if check_value: - hdbg.dassert_in(value, df[col_name].unique()) - mask = df[col_name] == value - if invert: - mask = ~mask - if print_info: - _LOG.info("selected=%s", hprint.perc(mask.sum(), df.shape[0])) - return df[mask] - - -def to_perc(vals: Union[List, pd.Series], **perc_kwargs: Dict[str, Any]) -> str: - """ - Report percentage of True for a list / series. - """ - if isinstance(vals, list): - vals = pd.Series(vals) - ret = hprint.perc(vals.sum(), len(vals), **perc_kwargs) - return ret diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py deleted file mode 100644 index 54ca04c93..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_analysis.py +++ /dev/null @@ -1,628 +0,0 @@ -""" -Statistical analysis and ML functions for pandas DataFrames. - -Import as: - -import helpers.hpandas_analysis as hpananal -""" - -import datetime -import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hprint as hprint - -# Lazy imports to avoid slow module loading. -# When a type checker analyzes the code: it pretends the imports exist, so you -# can use those names in type annotations without “unknown name” errors. -# These heavy dependencies are only imported when functions are actually called. -if TYPE_CHECKING: - import matplotlib as mpl - -_LOG = logging.getLogger(__name__) - - -def _get_num_pcs_to_plot(num_pcs_to_plot: int, max_pcs: int) -> int: - """ - Get the number of principal components to plot. - - :param num_pcs_to_plot: requested number of PCs to plot, use -1 for - all - :param max_pcs: maximum number of available principal components - :return: validated number of PCs to plot - """ - if num_pcs_to_plot == -1: - num_pcs_to_plot = max_pcs - hdbg.dassert_lte(0, num_pcs_to_plot) - hdbg.dassert_lte(num_pcs_to_plot, max_pcs) - return num_pcs_to_plot - - -def rolling_corr_over_time( - df: pd.DataFrame, com: float, nan_mode: str -) -> pd.DataFrame: - """ - Compute rolling correlation over time. - - :return: corr_df is a multi-index df storing correlation matrices - with labels - """ - import helpers.hpandas_dassert as hpandass - - hpandass.dassert_strictly_increasing_index(df) - # Handle NaNs based on mode. - if nan_mode == "drop": - df = df.dropna(how="any") - elif nan_mode == "fill_with_zero": - df = df.fillna(0.0) - elif nan_mode == "abort": - num_nans = np.isnan(df).sum().sum() - if num_nans > 0: - raise ValueError("df has %d nans\n%s" % (num_nans, df)) - else: - raise ValueError("Invalid nan_mode='%s'" % nan_mode) - corr_df = df.ewm(com=com, min_periods=3 * com).corr() - return corr_df - - -def _get_eigvals_eigvecs( - df: pd.DataFrame, dt: datetime.date, sort_eigvals: bool -) -> Tuple[np.array, np.array]: - """ - Compute eigenvalues and eigenvectors for a correlation matrix at a specific - date. - - :param df: correlation matrix dataframe with multiindex (date, - columns) - :param dt: date for which to compute eigenvalues/eigenvectors - :param sort_eigvals: whether to sort eigenvalues in descending order - :return: tuple of (eigenvalues array, eigenvectors array) - """ - hdbg.dassert_isinstance(dt, datetime.date) - df_tmp = df.loc[dt].copy() - # Compute rolling eigenvalues and eigenvectors. - # TODO(gp): Count and report inf and nans as warning. - df_tmp.replace([np.inf, -np.inf], np.nan, inplace=True) - df_tmp.fillna(0.0, inplace=True) - eigval, eigvec = np.linalg.eigh(df_tmp) - # Sort eigenvalues, if needed. - if not (sorted(eigval) == eigval).all(): - _LOG.debug("eigvals not sorted: %s", eigval) - if sort_eigvals: - _LOG.debug( - "Before sorting:\neigval=\n%s\neigvec=\n%s", eigval, eigvec - ) - _LOG.debug("eigvals: %s", eigval) - idx = eigval.argsort()[::-1] - eigval = eigval[idx] - eigvec = eigvec[:, idx] - _LOG.debug( - "After sorting:\neigval=\n%s\neigvec=\n%s", eigval, eigvec - ) - # - if (eigval == 0).all(): - eigvec = np.nan * eigvec - return eigval, eigvec - - -def rolling_pca_over_time( - df: pd.DataFrame, com: float, nan_mode: str, sort_eigvals: bool = True -) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: - """ - Compute rolling PCAs over time. - - :param sort_eigvals: sort the eigenvalues in descending orders - :return: - - eigval_df stores eigenvalues for the different components indexed by - timestamps - - eigvec_df stores eigenvectors as multiindex df - """ - import tqdm.autonotebook as tauton - - import helpers.hpandas_dassert as hpandass - - # Compute rolling correlation. - corr_df = rolling_corr_over_time(df, com, nan_mode) - # Compute eigvalues and eigenvectors. - timestamps = corr_df.index.get_level_values(0).unique() - eigval = np.zeros((timestamps.shape[0], df.shape[1])) - eigvec = np.zeros((timestamps.shape[0], df.shape[1], df.shape[1])) - for i, dt in tauton.tqdm( - enumerate(timestamps), - total=timestamps.shape[0], - desc="Computing rolling PCA", - ): - eigval[i], eigvec[i] = _get_eigvals_eigvecs(corr_df, dt, sort_eigvals) - # Package results. - eigval_df = pd.DataFrame(eigval, index=timestamps) - hdbg.dassert_eq(eigval_df.shape[0], len(timestamps)) - hpandass.dassert_strictly_increasing_index(eigval_df) - # Normalize by sum. - # TODO(gp): Move this up. - eigval_df = eigval_df.multiply(1 / eigval_df.sum(axis=1), axis="index") - # - # pylint ref: github.com/PyCQA/pylint/issues/3139 - eigvec = eigvec.reshape((-1, eigvec.shape[-1])) # pylint: disable=unsubscriptable-object - idx = pd.MultiIndex.from_product( - [timestamps, df.columns], names=["datetime", None] - ) - eigvec_df = pd.DataFrame(eigvec, index=idx, columns=range(df.shape[1])) # pylint: disable=unsubscriptable-object - hdbg.dassert_eq( - len(eigvec_df.index.get_level_values(0).unique()), len(timestamps) - ) - return corr_df, eigval_df, eigvec_df - - -def plot_pca_over_time( - eigval_df: pd.DataFrame, - eigvec_df: pd.DataFrame, - num_pcs_to_plot: int = 0, - num_cols: int = 2, -) -> None: - """ - Similar to plot_pca_analysis() but over time. - """ - import helpers.hmatplotlib as hmatplo - - # Plot eigenvalues. - eigval_df.plot(title="Eigenvalues over time", ylim=(0, 1)) - # Plot cumulative variance. - eigval_df.cumsum(axis=1).plot( - title="Fraction of variance explained by top PCs over time", ylim=(0, 1) - ) - # Plot eigenvalues. - max_pcs = eigvec_df.shape[1] - num_pcs_to_plot = _get_num_pcs_to_plot(num_pcs_to_plot, max_pcs) - _LOG.info("num_pcs_to_plot=%s", num_pcs_to_plot) - if num_pcs_to_plot > 0: - _, axes = hmatplo.get_multiple_plots( - num_pcs_to_plot, - num_cols=num_cols, - y_scale=4, - sharex=True, - sharey=True, - ) - for i in range(num_pcs_to_plot): - eigvec_df[i].unstack(1).plot( - ax=axes[i], ylim=(-1, 1), title="PC%s" % i - ) - - -def plot_time_distributions( - dts: List[Union[datetime.datetime, pd.Timestamp]], - mode: str, - density: bool = True, -) -> "mpl.axes.Axes": - """ - Compute distribution for an array of timestamps `dts`. - - - mode: see below - """ - hdbg.dassert_type_in(dts[0], (datetime.datetime, pd.Timestamp)) - hdbg.dassert_in( - mode, - ( - "time_of_the_day", - "weekday", - "minute_of_the_hour", - "day_of_the_month", - "month_of_the_year", - "year", - ), - ) - if mode == "time_of_the_day": - # Convert in minutes from the beginning of the day. - data = [dt.time() for dt in dts] - data = [t.hour * 60 + t.minute for t in data] - # 1 hour bucket. - step = 60 - bins = np.arange(0, 24 * 60 + step, step) - vals = pd.cut( - data, - bins=bins, - include_lowest=True, - right=False, - retbins=False, - labels=False, - ) - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = [ - "%02d:%02d" % (bins[k] / 60, bins[k] % 60) for k in count.index - ] - elif mode == "weekday": - data = [dt.date().weekday() for dt in dts] - bins = np.arange(0, 7 + 1) - vals = pd.cut( - data, - bins=bins, - include_lowest=True, - right=False, - retbins=False, - labels=False, - ) - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = "Mon Tue Wed Thu Fri Sat Sun".split() - elif mode == "minute_of_the_hour": - vals = [dt.time().minute for dt in dts] - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = list(map(str, list(range(1, 60 + 1)))) - elif mode == "day_of_the_month": - vals = [dt.date().day for dt in dts] - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = list(map(str, list(range(1, 31 + 1)))) - elif mode == "month_of_the_year": - vals = [dt.date().month for dt in dts] - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec".split() - elif mode == "year": - vals = [dt.date().year for dt in dts] - # Count. - count = pd.Series(vals).value_counts(sort=False) - # Compute the labels. - yticks = pd.Series(vals).unique().tolist() - else: - raise ValueError("Invalid mode='%s'" % mode) - hdbg.dassert_eq(count.sum(), len(dts)) - # - if density: - count /= count.sum() - label = "num points=%s" % len(dts) - ax = count.plot(kind="bar", label=label, figsize=(20, 7)) - ax.set_xticklabels(yticks) - if density: - ax.set_ylabel("Probability") - else: - ax.set_ylabel("Count") - ax.legend(loc="best") - return ax - - -# TODO(gp): It can't accept ax. Remove this limitation. -def jointplot( - df: pd.DataFrame, - predicted_var: str, - predictor_var: str, - height: Optional[int] = None, - *args: Any, - **kwargs: Any, -) -> None: - """ - Perform a scatterplot of two columns of a dataframe using - seaborn.jointplot(). - - :param df: dataframe - :param predicted_var: y-var - :param predictor_var: x-var :param args, kwargs: arguments passed to - seaborn.jointplot() - """ - import seaborn as sns - - hdbg.dassert_in(predicted_var, df.columns) - hdbg.dassert_in(predictor_var, df.columns) - df = df[[predicted_var, predictor_var]] - # Remove non-finite values. - # TODO(gp): Use explore.dropna(). - mask = np.all(np.isfinite(df.values), axis=1) - df = df[mask] - # Plot. - sns.jointplot( - x=predictor_var, y=predicted_var, data=df, height=height, *args, **kwargs - ) - - -def _preprocess_regression( - df: pd.DataFrame, - intercept: bool, - predicted_var: str, - predicted_var_delay: int, - predictor_vars: Union[str, List[str]], - predictor_vars_delay: int, -) -> Optional[Tuple[pd.DataFrame, List[str], List[str]]]: - """ - Preprocess data in dataframe form in order to perform a regression. - """ - # Sanity check vars. - hdbg.dassert_type_is(df, pd.DataFrame) - hdbg.dassert_lte(1, df.shape[0]) - if isinstance(predictor_vars, str): - predictor_vars = [predictor_vars] - hdbg.dassert_type_is(predictor_vars, list) - # hdbg.dassert_type_is(predicted_var, str) - hdbg.dassert_not_in(predicted_var, predictor_vars) - if not predictor_vars: - # No predictors. - _LOG.warning("No predictor vars: skipping") - return None - # - col_names = [predicted_var] + predictor_vars - hdbg.dassert_is_subset(col_names, df.columns) - df = df[col_names].copy() - num_rows = df.shape[0] - # Shift. - if predicted_var_delay != 0: - df[predicted_var] = df[predicted_var].shift(predicted_var_delay) - _LOG.warning("Shifting predicted_var=%s", predicted_var_delay) - if predictor_vars_delay != 0: - df[predictor_vars] = df[predictor_vars].shift(predictor_vars_delay) - _LOG.warning("Shifting predictor_vars=%s", predictor_vars_delay) - # Remove non-finite values. - # TODO(gp): Use the function. - df.dropna(how="all", inplace=True) - num_rows_after_drop_nan_all = df.shape[0] - if num_rows_after_drop_nan_all != num_rows: - _LOG.info( - "Removed %s rows with all nans", - hprint.perc(num_rows - num_rows_after_drop_nan_all, num_rows), - ) - # - df.dropna(how="any", inplace=True) - num_rows_after_drop_nan_any = df.shape[0] - if num_rows_after_drop_nan_any != num_rows_after_drop_nan_all: - _LOG.warning( - "Removed %s rows with any nans", - hprint.perc(num_rows - num_rows_after_drop_nan_any, num_rows), - ) - # Prepare data. - if intercept: - if "const" not in df.columns: - df.insert(0, "const", 1.0) - predictor_vars = ["const"] + predictor_vars[:] - param_names = predictor_vars[:] - hdbg.dassert(np.all(np.isfinite(df[predicted_var].values))) - hdbg.dassert( - np.all(np.isfinite(df[predictor_vars].values)), - msg="predictor_vars=%s" % predictor_vars, - ) - # Perform regression. - if df.shape[0] < 1: - return None - return df, param_names, predictor_vars - - -def ols_regress( - df: pd.DataFrame, - predicted_var: str, - predictor_vars: str, - intercept: bool, - print_model_stats: bool = True, - tsplot: bool = False, - tsplot_figsize: Optional[Any] = None, - jointplot_: bool = True, - jointplot_height: Optional[Any] = None, - predicted_var_delay: int = 0, - predictor_vars_delay: int = 0, - max_nrows: float = 1e4, -) -> Optional[Dict[str, Any]]: - """ - Perform OLS on columns of a dataframe. - - :param df: dataframe - :param predicted_var: y variable - :param predictor_vars: x variables - :param intercept: - :param print_model_stats: print or return the model stats - :param tsplot: plot a time-series if possible - :param tsplot_figsize: - :param jointplot_: plot a scatter plot - :param jointplot_height: - :param predicted_var_delay: - :param predictor_vars_delay: - :param max_nrows: do not plot if there are too many rows, since - notebook can be slow or hang - :return: - """ - import statsmodels.api - - import helpers.hmatplotlib as hmatplo - - obj = _preprocess_regression( - df, - intercept, - predicted_var, - predicted_var_delay, - predictor_vars, - predictor_vars_delay, - ) - if obj is None: - return None - df, param_names, predictor_vars = obj - hdbg.dassert_lte(1, df.shape[0]) - model = statsmodels.api.OLS( - df[predicted_var], df[predictor_vars], hasconst=intercept - ).fit() - regr_res = { - "param_names": param_names, - "coeffs": model.params, - "pvals": model.pvalues, - # pylint: disable=no-member - "rsquared": model.rsquared, - "adj_rsquared": model.rsquared_adj, - "model": model, - } - if print_model_stats: - # pylint: disable=no-member - _LOG.info(model.summary().as_text()) - if tsplot or jointplot_: - if max_nrows is not None and df.shape[0] > max_nrows: - _LOG.warning( - "Skipping plots since df has %d > %d rows", - df.shape[0], - max_nrows, - ) - else: - predictor_vars = [p for p in predictor_vars if p != "const"] - if len(predictor_vars) == 1: - if tsplot: - # Plot the data over time. - if tsplot_figsize is None: - tsplot_figsize = hmatplo.FIG_SIZE - df[[predicted_var, predictor_vars[0]]].plot( - figsize=tsplot_figsize - ) - if jointplot_: - # Perform scatter plot. - if jointplot_height is None: - jointplot_height = hmatplo.FIG_SIZE[1] - jointplot( - df, - predicted_var, - predictor_vars[0], - height=jointplot_height, - ) - else: - _LOG.warning( - "Skipping plots since there are too many predictors" - ) - if print_model_stats: - return None - return regr_res - - -def ols_regress_series( - srs1: pd.Series, - srs2: pd.Series, - intercept: bool, - srs1_name: Optional[Any] = None, - srs2_name: Optional[Any] = None, - convert_to_dates: bool = False, - **kwargs: Any, -) -> Dict[str, Any]: - """ - Regress two series against each other. - - Wrapper around regress() to regress series against each other. - """ - # Validate inputs are Series. - hdbg.dassert_isinstance(srs1, pd.Series) - hdbg.dassert_isinstance(srs2, pd.Series) - srs1 = srs1.copy() - srs2 = srs2.copy() - # - if convert_to_dates: - _LOG.warning("Sampling to date") - srs1.index = [pd.to_datetime(dt).date() for dt in srs1.index] - srs2.index = [pd.to_datetime(dt).date() for dt in srs2.index] - # - hdbg.dassert_array_has_same_type_element(srs1, srs2, only_first_elem=True) - # Check common indices. - common_idx = srs1.index.intersection(srs2.index) - hdbg.dassert_lte(1, len(common_idx)) - # Merge series into a dataframe. - if srs1_name is None: - srs1_name = srs1.name if srs1.name is not None else "" - if srs2_name is None: - srs2_name = srs2.name if srs2.name is not None else "" - if srs1_name == srs2_name: - srs1_name += "_1" - srs2_name += "_2" - _LOG.warning("Series have the same name: adding suffix to distinguish") - df = pd.concat([srs1, srs2], axis=1, join="outer") - df.columns = [srs1_name, srs2_name] - # - val = ols_regress(df, srs1_name, srs2_name, intercept=intercept, **kwargs) - val = cast(Dict[str, Any], val) - return val - - -def robust_regression( - df: pd.DataFrame, - predicted_var: str, - predictor_vars: str, - intercept: bool, - jointplot_: bool = True, - jointplot_figsize: Optional[Any] = None, - predicted_var_delay: int = 0, - predictor_vars_delay: int = 0, -) -> None: - """ - Perform robust regression using RANSAC algorithm to handle outliers. - - :param df: dataframe with data - :param predicted_var: dependent variable column name - :param predictor_vars: independent variable column name(s) - :param intercept: whether to include intercept in regression - :param jointplot_: whether to create a scatter plot - :param jointplot_figsize: size of the joint plot - :param predicted_var_delay: shift predicted variable by this many - periods - :param predictor_vars_delay: shift predictor variables by this many - periods - """ - import matplotlib.pyplot as plt - import sklearn.linear_model - - import helpers.hmatplotlib as hmatplo - - obj = _preprocess_regression( - df, - intercept, - predicted_var, - predicted_var_delay, - predictor_vars, - predictor_vars_delay, - ) - if obj is None: - return - # From http://scikit-learn.org/stable/auto_examples/linear_model/ - # plot_robust_fit.html#sphx-glr-auto-examples-linear-model-plot-robust-fit-py - # TODO(gp): Add also TheilSenRegressor and HuberRegressor. - - hdbg.dassert_eq(len(predictor_vars), 1) - y = df[predicted_var] - X = df[predictor_vars] - # Fit line using all data. - lr = sklearn.linear_model.LinearRegression() - lr.fit(X, y) - # Robustly fit linear model with RANSAC algorithm. - ransac = sklearn.linear_model.RANSACRegressor() - ransac.fit(X, y) - inlier_mask = ransac.inlier_mask_ - outlier_mask = np.logical_not(inlier_mask) - # Predict data of estimated models. - line_X = np.linspace(X.min().values[0], X.max().values[0], num=100)[ - :, np.newaxis - ] - line_y = lr.predict(line_X) - line_y_ransac = ransac.predict(line_X) - # Compare estimated coefficients - _LOG.info("Estimated coef for linear regression=%s", lr.coef_) - _LOG.info("Estimated coef for RANSAC=%s", ransac.estimator_.coef_) - if jointplot_: - if jointplot_figsize is None: - jointplot_figsize = hmatplo.FIG_SIZE - plt.figure(figsize=jointplot_figsize) - plt.scatter( - X[inlier_mask], - y[inlier_mask], - color="red", - marker="o", - label="Inliers", - ) - plt.scatter( - X[outlier_mask], - y[outlier_mask], - color="blue", - marker="o", - label="Outliers", - ) - plt.plot(line_X, line_y, color="green", linewidth=2, label="OLS") - plt.plot( - line_X, line_y_ransac, color="black", linewidth=3, label="RANSAC" - ) - plt.legend(loc="best") - plt.xlabel(", ".join(predictor_vars)) - plt.ylabel(predicted_var) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py deleted file mode 100644 index 0604afd67..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_check_summary.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -Import as: - -import helpers.hpandas_check_summary as hpachsum -""" - -import dataclasses -from typing import List, Optional - -import pandas as pd - -import helpers.hlogging as hloggin - -_LOG = hloggin.getLogger(__name__) - - -# ############################################################################# -# _SummaryRow -# ############################################################################# - - -@dataclasses.dataclass -class _SummaryRow: - """ - Output of a check corresponding to a row of the summary df. - """ - - # Description of the check. - description: str - # Description of the output. - comment: str - # Whether the check was successful or not. - is_ok: bool - - -# ############################################################################# -# CheckSummary -# ############################################################################# - - -class CheckSummary: - """ - Collect and report the results of several checks performed in a notebook. - """ - - def __init__(self, *, title: Optional[str] = ""): - self.title = title - # Initialize the array for storing summary rows. - self._array: List[_SummaryRow] = [] - - def add(self, description: str, comment: str, is_ok: bool) -> None: - """ - Add the result of a single check. - """ - summary_row = _SummaryRow(description, comment, is_ok) - self._array.append(summary_row) - - def is_ok(self) -> bool: - """ - Compute whether all the checks were successful or not. - """ - is_ok = all(sr.is_ok for sr in self._array) - return is_ok - - def report_outcome( - self, *, notebook_output: bool = True, assert_on_error: bool = True - ) -> Optional[str]: - """ - Report the result of the entire check. - - :param notebook_output: report the result of the checks for a - notebook or as a string - :param assert_on_error: assert if one check failed - """ - df = pd.DataFrame(self._array) - - # Compute result as a string. - result = [] - if self.title: - result.append("# " + self.title) - result.append(str(df)) - is_ok = self.is_ok() - result.append(f"is_ok={is_ok}") - result = "\n".join(result) - # Display on a notebook, if needed. - if notebook_output: - if self.title: - print(self.title) - - # Convert DataFrame to HTML with colored rows based on 'is_ok' column. - def _color_rows(row: bool) -> str: - """ - Apply red/green color based on boolean value in `row["is_ok"]`. - """ - is_ok = row["is_ok"] - color = "#FA6B84" if not is_ok else "#ACF3AE" - return [f"background-color: {color}"] * len(row) - - df_html = df.style.apply(_color_rows, axis=1) - from IPython.display import display - - display(df_html) - print(f"is_ok={is_ok}") - # Assert if at least one of the check failed. - if not is_ok and assert_on_error: - raise ValueError("The checks have failed:\n" + result) - # For notebooks, we want to return None, since the outcome was - # already displayed. - if notebook_output: - result = None - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py deleted file mode 100644 index c421095a3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_clean.py +++ /dev/null @@ -1,282 +0,0 @@ -""" -Import as: - -import helpers.hpandas_clean as hpanclea -""" - -from typing import Any, List, Optional, Union - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hpandas_utils as hpanutil -import helpers.hprint as hprint - -_LOG = hloggin.getLogger(__name__) - - -def drop_duplicates( - data: Union[pd.Series, pd.DataFrame], - use_index: bool, - column_subset: Optional[List[str]] = None, - *args: Any, - **kwargs: Any, -) -> Union[pd.Series, pd.DataFrame]: - """ - Wrap `pandas.drop_duplicates()` with additional index handling. - - See the official docs: - - https://pandas.pydata.org/docs/reference/api/pandas.Series.drop_duplicates.html - - https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html - - :param data: input series or dataframe - :param use_index: whether to consider index values when identifying duplicates - - if `True`, use index values together with a column subset for - identifying duplicates - - if `False`, duplicated rows are with the exact same values in a subset - and different indices - :param column_subset: a list of columns to consider for identifying duplicates - :param args: additional arguments passed to pandas.drop_duplicates() - :param kwargs: additional keyword arguments passed to pandas.drop_duplicates() - :return: data without duplicates - """ - _LOG.debug(hprint.to_str("use_index column_subset args kwargs")) - num_rows_before = data.shape[0] - # Get all columns list for subset if no subset is passed. - if column_subset is None: - column_subset = data.columns.tolist() - else: - hdbg.dassert_lte(1, len(column_subset), "Columns subset cannot be empty") - if use_index: - # Add dummy index column to use it for duplicates detection. - index_col_name = "use_index_col" - hdbg.dassert_not_in(index_col_name, data.columns.tolist()) - column_subset.insert(0, index_col_name) - data[index_col_name] = data.index - # Drop duplicates based on the column subset. - data_no_dups = data.drop_duplicates(subset=column_subset, *args, **kwargs) - # Clean up the temporary index column if it was added. - if use_index: - # Remove dummy index column. - data_no_dups = data_no_dups.drop([index_col_name], axis=1) - # Report the change. - num_rows_after = data_no_dups.shape[0] - if num_rows_before != num_rows_after: - _LOG.debug( - "Removed %s rows", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - return data_no_dups - - -def dropna( - df: pd.DataFrame, - *args: Any, - drop_infs: bool = False, - report_stats: bool = False, - **kwargs: Any, -) -> pd.DataFrame: - """ - Create a wrapper around pd.dropna() reporting information about the removed - rows. - - :param df: dataframe to process - :param drop_infs: if +/- np.inf should be considered as nans - :param report_stats: if processing stats should be reported - :return: dataframe with nans dropped - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - num_rows_before = df.shape[0] - if drop_infs: - df = df.replace([np.inf, -np.inf], np.nan) - df = df.dropna(*args, **kwargs) - if report_stats: - num_rows_after = df.shape[0] - pct_removed = hprint.perc( - num_rows_before - num_rows_after, num_rows_before - ) - _LOG.info("removed rows with nans: %s", pct_removed) - return df - - -def drop_axis_with_all_nans( - df: pd.DataFrame, - drop_rows: bool = True, - drop_columns: bool = False, - drop_infs: bool = False, - report_stats: bool = False, -) -> pd.DataFrame: - """ - Remove columns and rows not containing information (e.g., with only nans). - - The operation is not performed in place and the resulting df is - returned. Assume that the index is timestamps. - - :param df: dataframe to process - :param drop_rows: remove rows with only nans - :param drop_columns: remove columns with only nans - :param drop_infs: remove also +/- np.inf - :param report_stats: report the stats of the operations - :return: dataframe with specific nan axis dropped - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - if drop_infs: - df = df.replace([np.inf, -np.inf], np.nan) - if drop_columns: - # Remove columns with all nans, if any. - cols_before = df.columns[:] - df = df.dropna(axis=1, how="all") - if report_stats: - # Report results. - cols_after = df.columns[:] - removed_cols = set(cols_before).difference(set(cols_after)) - pct_removed = hprint.perc( - len(cols_before) - len(cols_after), len(cols_after) - ) - _LOG.info( - "removed cols with all nans: %s %s", - pct_removed, - hprint.list_to_str(removed_cols), - ) - if drop_rows: - # Remove rows with all nans, if any. - rows_before = df.index[:] - df = df.dropna(axis=0, how="all") - if report_stats: - # Report results. - rows_after = df.index[:] - removed_rows = set(rows_before).difference(set(rows_after)) - if len(rows_before) == len(rows_after): - # Nothing was removed. - min_ts = max_ts = None - else: - # TODO(gp): Report as intervals of dates. - min_ts = min(removed_rows) - max_ts = max(removed_rows) - pct_removed = hprint.perc( - len(rows_before) - len(rows_after), len(rows_after) - ) - _LOG.info( - "removed rows with all nans: %s [%s, %s]", - pct_removed, - min_ts, - max_ts, - ) - return df - - -def drop_duplicated( - df: pd.DataFrame, *, subset: Optional[List[str]] = None -) -> pd.DataFrame: - """ - Implement `df.duplicated` but considering also the index and ignoring nans. - """ - _LOG.debug("before df=\n%s", hpanutil.df_to_str(df)) - # Move the index to the df. - old_index_name = df.index.name - new_index_name = "_index.tmp" - hdbg.dassert_not_in(new_index_name, df.columns) - df.index.name = new_index_name - df.reset_index(drop=False, inplace=True) - # Remove duplicates by ignoring nans. - if subset is not None: - hdbg.dassert_isinstance(subset, list) - subset = [new_index_name] + subset - duplicated = df.fillna(0.0).duplicated(subset=subset, keep="first") - # Report the result of the operation. - if duplicated.sum() > 0: - num_rows_before = df.shape[0] - _LOG.debug( - "Removing duplicates df=\n%s", - hpanutil.df_to_str(df.loc[duplicated]), - ) - df = df.loc[~duplicated] - num_rows_after = df.shape[0] - _LOG.warning( - "Removed repeated rows num_rows=%s", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - _LOG.debug("after removing duplicates df=\n%s", hpanutil.df_to_str(df)) - # Set the index back. - df.set_index(new_index_name, inplace=True) - df.index.name = old_index_name - _LOG.debug("after df=\n%s", hpanutil.df_to_str(df)) - return df - - -def impute_nans(df: pd.DataFrame, column: str, value: Any) -> pd.DataFrame: - """ - Assign `value` to the `column` of `df` where the value is "nan". - - :param df: The DataFrame to modify. - :param column: The column in which to replace "nan" values. - :param value: The value to assign to "nan" entries. - :return: The DataFrame with the "nan" values assigned. - """ - df[column] = df[column].astype(str) - mask = df[column] == "nan" - # Assign the new value or keep the original value. - df[column] = np.where(mask, value, df[column]) - # There should be no more nans. - mask = df[column] == "nan" - hdbg.dassert_eq(mask.sum(), 0) - # - return df - - -# ############################################################################# - - -def remove_outliers( - df: pd.DataFrame, - lower_quantile: float, - *, - column_set: hpanutil.ColumnSet, - # TODO(Grisha): the params are not used. - fill_value: float = np.nan, - mode: str = "remove_outliers", - axis: Any = 0, - upper_quantile: Optional[float] = None, -) -> pd.DataFrame: - """ - Remove outliers from a dataframe based on quantile thresholds. - - :param df: input dataframe - :param lower_quantile: lower quantile threshold (0.0 to 1.0) - :param column_set: columns to apply outlier removal to - :param fill_value: value to use for filling outliers (currently unused) - :param mode: outlier removal mode (currently unused) - :param axis: axis along which to compute quantiles (0 for columns, 1 for rows) - :param upper_quantile: upper quantile threshold, defaults to 1 - lower_quantile - :return: dataframe with outliers removed based on quantile thresholds - """ - hdbg.dassert_eq(len(df.shape), 2, "Multi-index dfs not supported") - # Validate quantile parameters. - hdbg.dassert_lte(0.0, lower_quantile) - if upper_quantile is None: - upper_quantile = 1.0 - lower_quantile - hdbg.dassert_lte(lower_quantile, upper_quantile) - hdbg.dassert_lte(upper_quantile, 1.0) - # Create a copy of the dataframe to avoid modifying the original. - df = df.copy() - if axis == 0: - all_columns = df.columns - columns = hpanutil.resolve_column_names(column_set, all_columns) - hdbg.dassert_is_subset(columns, df.columns) - for column in all_columns: - if column in columns: - df[column] = df[column].quantile( - [lower_quantile, upper_quantile] - ) - elif axis == 1: - all_rows = df.rows - rows = hpanutil.resolve_column_names(column_set, all_rows) - hdbg.dassert_is_subset(rows, df.rows) - for row in all_rows: - if row in rows: - df[row] = df[row].quantile([lower_quantile, upper_quantile]) - else: - raise ValueError(f"Invalid axis='{axis}'") - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py deleted file mode 100644 index b40308daa..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_compare.py +++ /dev/null @@ -1,289 +0,0 @@ -""" -Import as: - -import helpers.hpandas_compare as hpancomp -""" - -import logging -from typing import List - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hpandas_dassert as hpandass -import helpers.hpandas_utils as hpanutil - -_LOG = hloggin.getLogger(__name__) - -RowsValues = List[List[str]] - - -def compare_dataframe_rows(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: - """ - Compare contents of rows with same indices. - - Index is set to default sequential integer values because compare is - sensitive to multi index (probably because new multi indexes are created - for each difference in `compare`). Multi index columns are regular columns now. - Excess columns are removed so both dataframes are always same shape because - `compare` expects identical dataframes (same number of rows, columns, etc.). - - :param df1: first dataframe for comparison - :param df2: second dataframe for comparison - :return: dataframe with data with same indices and different contents - """ - # Get rows on which the two dataframe indices match. - idx_intersection = df1.index.intersection(df2.index) - # Remove excess columns and reset indexes. - trimmed_second = df2.loc[idx_intersection].reset_index() - trimmed_first = df1.loc[idx_intersection].reset_index() - # Get difference between second and first dataframe. - data_difference = trimmed_second.compare(trimmed_first) - # Update data difference with original dataframe index names - # for easier identification. - index_names = tuple(df2.index.names) - # If index or multi index is named, it will be visible in data difference. - if index_names != (None,): - for index in data_difference.index: - for column in index_names: - data_difference.loc[index, column] = trimmed_second.loc[index][ - column - ] - data_difference = data_difference.convert_dtypes() - return data_difference - - -def compare_nans_in_dataframes( - df1: pd.DataFrame, df2: pd.DataFrame -) -> pd.DataFrame: - """ - Compare equality of DataFrames in terms of NaNs. - - For example: - - `5 vs np.nan` is a mismatch - - `np.nan vs 5` is a mismatch - - `np.nan vs np.nan` is a match - - `np.nan vs np.inf` is a mismatch - - :param df1: dataframe to compare - :param df2: dataframe to compare with - :return: dataframe that shows the differences stacked side by side, see - `pandas.DataFrame.compare()` for an example - """ - hpandass.dassert_axes_equal(df1, df2) - # Keep rows where df1's value is NaN and df2's value is not NaN and vice versa. - mask1 = df1.isna() & ~df2.isna() - mask2 = ~df1.isna() & df2.isna() - mask3 = mask1 | mask2 - # Compute a dataframe with the differences. - nan_diff_df = df1[mask3].compare(df2[mask3], result_names=("df1", "df2")) - return nan_diff_df - - -# TODO(Grisha): -> `compare_dataframes()`? - - -def compare_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - row_mode: str = "equal", - column_mode: str = "equal", - # TODO(Grisha): should be True by default? - compare_nans: bool = False, - diff_mode: str = "diff", - assert_diff_threshold: float = 1e-3, - close_to_zero_threshold: float = 1e-6, - zero_vs_zero_is_zero: bool = True, - remove_inf: bool = True, - log_level: int = logging.DEBUG, - only_warning: bool = True, -) -> pd.DataFrame: - """ - Compare two dataframes. - - This works for dataframes with and without multi-index. - - :param row_mode: control how the rows are handled - - "equal": rows need to be the same for the two dataframes - - "inner": compute the common rows for the two dataframes - :param column_mode: same as `row_mode` - :param compare_nans: include NaN comparison if True otherwise just - compare non-NaN values - :param diff_mode: control how the dataframes are compared in terms of - corresponding elements - - "diff": use the difference - - "pct_change": use the percentage difference - :param assert_diff_threshold: maximum allowed total difference - - do not assert if `None` - - works when `diff_mode` is "pct_change" - :param close_to_zero_threshold: round numbers below the threshold to 0 - :param zero_vs_zero_is_zero: replace the diff with 0 when comparing 0 to 0 - if True, otherwise keep the actual result - :param remove_inf: replace +-inf with `np.nan` - :param log_level: logging level - :param only_warning: when `True` the function issues a warning instead of aborting - :return: a singe dataframe with differences as values - """ - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - # Check value of `assert_diff_threshold`, if it was passed. - if assert_diff_threshold: - hdbg.dassert_lte(assert_diff_threshold, 1.0) - hdbg.dassert_lte(0.0, assert_diff_threshold) - # TODO(gp): Factor out this logic and use it for both compare_visually_dfs - # and - if row_mode == "equal": - hpandass.dassert_indices_equal(df1, df2) - elif row_mode == "inner": - # TODO(gp): Add sorting on demand, otherwise keep the columns in order. - same_rows = list((set(df1.index)).intersection(set(df2.index))) - df1 = df1[df1.index.isin(same_rows)] - df2 = df2[df2.index.isin(same_rows)] - else: - raise ValueError(f"Invalid row_mode='{row_mode}'") - # Handle column comparison mode. - if column_mode == "equal": - hdbg.dassert_eq(sorted(df1.columns), sorted(df2.columns)) - elif column_mode == "inner": - # TODO(gp): Add sorting on demand, otherwise keep the columns in order. - col_names = sorted(list(set(df1.columns).intersection(set(df2.columns)))) - df1 = df1[col_names] - df2 = df2[col_names] - else: - raise ValueError(f"Invalid column_mode='{column_mode}'") - # Round small numbers to 0 to exclude them from the diff computation. - close_to_zero_threshold_mask = lambda x: abs(x) < close_to_zero_threshold - df1[close_to_zero_threshold_mask] = df1[close_to_zero_threshold_mask].round( - 0 - ) - df2[close_to_zero_threshold_mask] = df2[close_to_zero_threshold_mask].round( - 0 - ) - # Compute the difference df. - if diff_mode == "diff": - # Test and convert the assertion into a boolean. - is_ok = True - try: - pd.testing.assert_frame_equal( - df1, df2, check_like=True, check_dtype=False - ) - except AssertionError as e: - is_ok = False - _ = e - # Check `is_ok` and raise an assertion depending on `only_warning`. - if not is_ok: - hdbg._dfatal( - _, - "df1=\n%s\n and df2=\n%s\n are not equal.", - hpanutil.df_to_str(df1, log_level=log_level), - hpanutil.df_to_str(df2, log_level=log_level), - only_warning=only_warning, - ) - # Calculate the difference. - df_diff = df1 - df2 - if remove_inf: - df_diff = df_diff.replace([np.inf, -np.inf], np.nan) - elif diff_mode == "pct_change": - # Compare NaN values in dataframes. - nan_diff_df = compare_nans_in_dataframes(df1, df2) - _LOG.debug( - "Dataframe with NaN differences=\n%s", - hpanutil.df_to_str(nan_diff_df), - ) - msg = "There are NaN values in one of the dataframes that are not in the other one." - hdbg.dassert_eq( - 0, nan_diff_df.shape[0], msg=msg, only_warning=only_warning - ) - # Compute pct_change. - df_diff = 100 * (df1 - df2) / df2.abs() - if zero_vs_zero_is_zero: - # When comparing 0 to 0 set the diff (which is NaN by default) to 0. - df1_mask = df1 == 0 - df2_mask = df2 == 0 - zero_vs_zero_mask = df1_mask & df2_mask - df_diff[zero_vs_zero_mask] = 0 - if remove_inf: - df_diff = df_diff.replace([np.inf, -np.inf], np.nan) - # Check if `df_diff` values are less than `assert_diff_threshold`. - if assert_diff_threshold is not None: - nan_mask = df_diff.isna() - within_threshold = ( - df_diff.abs() <= assert_diff_threshold - ) | nan_mask - expected = pd.DataFrame( - True, - index=within_threshold.index, - columns=within_threshold.columns, - ) - # Test and convert the assertion into boolean. - is_ok = True - try: - pd.testing.assert_frame_equal( - within_threshold, expected, check_exact=True - ) - except AssertionError as e: - is_ok = False - _ = e - # Check `is_ok` and raise assertion depending on `only_warning`. - if not is_ok: - hdbg._dfatal( - _, - "df1=\n%s\n and df2=\n%s\n have pct_change more than `assert_diff_threshold`.", - hpanutil.df_to_str(df1, log_level=log_level), - hpanutil.df_to_str(df2, log_level=log_level), - only_warning=only_warning, - ) - # Report max diff. - max_diff = df_diff.abs().max().max() - _LOG.log( - log_level, - "Maximum percentage difference between the two dataframes = %s", - max_diff, - ) - else: - raise ValueError(f"diff_mode={diff_mode}") - df_diff = df_diff.add_suffix(f".{diff_mode}") - return df_diff - - -def find_common_columns( - names: List[str], dfs: List[pd.DataFrame] -) -> pd.DataFrame: - """ - Find common columns across multiple dataframes. - - :param names: list of names for each dataframe - :param dfs: list of dataframes to compare - :return: dataframe showing common columns between each pair of dataframes - """ - df = [] - for i, df1 in enumerate(dfs): - df1 = dfs[i].columns - for j in range(i + 1, len(dfs)): - df2 = dfs[j].columns - common_cols = [c for c in df1 if c in df2] - df.append( - ( - names[i], - len(df1), - names[j], - len(df2), - len(common_cols), - ", ".join(common_cols), - ) - ) - df = pd.DataFrame( - df, - columns=[ - "table1", - "num_cols1", - "num_cols2", - "table2", - "num_comm_cols", - "common_cols", - ], - ) - return df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py deleted file mode 100644 index c9443c888..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_conversion.py +++ /dev/null @@ -1,221 +0,0 @@ -""" -Import as: - -import helpers.hpandas_conversion as hpanconv -""" - -from typing import List, Optional, Union - -import numpy as np -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin - -_LOG = hloggin.getLogger(__name__) - -RowsValues = List[List[str]] - -# ############################################################################# -# DataFrame/Series Conversion -# ############################################################################# - - -def to_series(df: pd.DataFrame, *, series_dtype: str = "float64") -> pd.Series: - """ - Convert a pd.DataFrame with a single column into a pd.Series. The problem - is that empty df or df with a single row are not converted correctly to a - pd.Series. - - :param df: dataframe with a single column to convert to a series - :param series_dtype: dtype of the desired series in case a DataFrame - is empty, otherwise inherit dtype from a DataFrame - """ - # See https://stackoverflow.com/questions/33246771 - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_eq(df.shape[1], 1, "df=%s doesn't have a single column", df) - if df.empty: - srs = pd.Series(dtype=series_dtype) - elif df.shape[0] > 1: - srs = df.squeeze() - else: - srs = pd.Series(df.iloc[0, 0], index=[df.index.values[0]]) - srs.name = df.index.name - hdbg.dassert_isinstance(srs, pd.Series) - return srs - - -def as_series(data: Union[pd.DataFrame, pd.Series]) -> pd.Series: - """ - Convert a single-column dataframe to a series or no-op if already a series. - """ - if isinstance(data, pd.Series): - return data - return to_series(data) - - -# ############################################################################# -# Infer type -# ############################################################################# - - -def infer_column_types(col: pd.Series): - """ - Determine which data type is most prevalent in a column. - - Examine the values in the given pandas Series and decides whether - the majority of entries are strings, numeric values, or booleans. - - :param col: The column to inspect. - :return: One of `"is_string"`, `"is_numeric"`, or `"is_bool"`, - representing the predominant type. - """ - vals = { - "is_numeric": pd.to_numeric(col, errors="coerce").notna(), - #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), - "is_bool": col.map(lambda x: isinstance(x, bool)), - "is_string": col.map(lambda x: isinstance(x, str)), - } - vals = {k: float(v.mean()) for k, v in vals.items()} - # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", - # (vals["is_numeric"] >= vals["is_string"], "is_numeric", - # "is_string")) - if vals["is_bool"] >= vals["is_numeric"] and (vals["is_bool"] != 0): - type_ = "is_bool" - elif vals["is_numeric"] >= vals["is_string"] and (vals["is_numeric"] != 0): - type_ = "is_numeric" - else: - type_ = "is_string" - vals["type"] = type_ - return vals - - -def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: - """ - Identify the predominant data type for each column in a DataFrame. - - :param df: The DataFrame whose columns will be analyzed. - :return: A DataFrame with two columns: - - `column`: the name of each original column. - - `predominant_type`: the most frequent type in that column, - one of `"string"`, `"numeric"`, or `"bool"`. - """ - return df.apply(lambda x: pd.Series(infer_column_types(x))).T - - -def convert_to_type(col: pd.Series, type_: str) -> pd.Series: - """ - Convert a pandas Series to a specified data type. - - :param col: The input column to be converted. - :param type_: The target data type. Expected values include: - - `"is_bool"`: convert values to booleans. - - `"is_int"`: convert values to integers. - - `"is_numeric"`: convert values to float. - - `"is_string"`: convert values to strings. - :return: A new Series with the same index as `col`, cast to the requested - type. - """ - if type_ == "is_bool": - return col.map( - lambda x: ( - True - if x in ["True", 1, "1", "true", True] - else False - if x in [0, "0", "False", False, "false"] - else None - ) - ) - elif type_ == "is_int": - return pd.to_numeric(col, errors="coerce", downcast="integer") - elif type_ == "is_numeric": - return pd.to_numeric(col, errors="coerce") - elif type_ == "is_string": - return col.astype(str) - else: - raise ValueError(f"Unknown column type: {type_}") - - -def convert_col_to_int( - df: pd.DataFrame, - col: str, -) -> pd.DataFrame: - """ - Convert a column to an integer column. - - Example use case: Parquet uses categoricals. If supplied with a - categorical-type column, this function will convert it to an integer - column. - """ - import helpers.hpandas_dassert as hpandass - - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(col, str) - hdbg.dassert_in(col, df.columns) - # Attempt the conversion. - df[col] = df[col].astype("int64") - # Trust, but verify. - hpandass.dassert_series_type_is(df[col], np.int64) - return df - - -def cast_series_to_type( - series: pd.Series, series_type: Optional[type] -) -> pd.Series: - """ - Convert a Pandas series to a given type. - - :param series: the input series - :param series_type: the type to convert the series into - - if None, then the series values are turned into Nones - :return: the series in the required type - """ - if series_type is None: - # Turn the series values into None. - series[:] = None - elif series_type is pd.Timestamp: - # Convert to timestamp. - series = pd.to_datetime(series) - elif series_type is dict: - # Convert to dict. - series = series.apply(eval) - else: - # Convert to the specified type. - series = series.astype(series_type) - return series - - -def convert_df( - df: pd.DataFrame, *, print_invalid_values: bool = False -) -> pd.DataFrame: - """ - Convert each DataFrame column to its predominant type. - - This function inspects every column in `df`, determines whether the - majority of its values are boolean, numeric, or string, and then - casts the column to that type using `convert_to_type`. - - :param df: The input DataFrame whose columns will be converted. - :param print_invalid_values: If True, print any original values that could - not be converted (they become NaN after conversion) - :return: a new DataFrame with each column cast to its detected predominant - type. - """ - df_out = pd.DataFrame(index=df.index) - for col in df.columns: - series = df[col] - # Determine the dominant datatype. - col_type = infer_column_types(series)["type"] - hdbg.dassert_in(col_type, ("is_bool", "is_numeric", "is_string")) - # Convert the column to dominant datatype. - converted = convert_to_type(series, col_type) - if print_invalid_values: - invalid_mask = series.notna() & converted.isna() - if invalid_mask.any(): - invalid = series[invalid_mask].tolist() - _LOG.info("Column %s dropped invalid values: %s", col, invalid) - df_out[col] = converted - return df_out - - -# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py deleted file mode 100644 index 7d62b84b3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_dassert.py +++ /dev/null @@ -1,371 +0,0 @@ -""" -Import as: - -import helpers.hpandas_dassert as hpandass -""" - -from typing import Any, Dict, Iterable, List, Optional, Union - -import numpy as np -import pandas as pd - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin - -_LOG = hloggin.getLogger(__name__) - - -RowsValues = List[List[str]] - -# ############################################################################# -# Index/Axis Validation & Assertions -# ############################################################################# - - -def _get_index(obj: Union[pd.Index, pd.DataFrame, pd.Series]) -> pd.Index: - """ - Return the index of a Pandas object. - - :param obj: pandas Index, DataFrame, or Series - :return: the index of the object - """ - if isinstance(obj, pd.Index): - index = obj - else: - hdbg.dassert_isinstance(obj, (pd.Series, pd.DataFrame)) - index = obj.index - return index - - -# TODO(gp): Maybe for symmetry with the other functions, rename to -# dassert_datetime_index - - -def dassert_index_is_datetime( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the dataframe has an index containing datetimes. - - It works for both single and multi-indexed dataframes. - """ - index = _get_index(obj) - if isinstance(index, pd.MultiIndex): - # In case of multi index check that at least one level is a datetime. - is_any_datetime = any( - isinstance(level, pd.DatetimeIndex) for level in index.levels - ) - hdbg.dassert(is_any_datetime, msg, *args) - else: - hdbg.dassert_isinstance(index, pd.DatetimeIndex, msg, *args) - - -def dassert_unique_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a unique index. - """ - import helpers.hpandas_utils as hpanutil - - index = _get_index(obj) - if not index.is_unique: - dup_indices = index.duplicated(keep=False) - df_dup = obj[dup_indices] - df_dup_as_str = hpanutil.df_to_str(df_dup) - dup_msg = f"Duplicated rows are:\n{df_dup_as_str}\n" - if msg is None: - msg = dup_msg - else: - msg = dup_msg + msg - hdbg.dassert(index.is_unique, msg=msg, *args) - - -# TODO(gp): @all Add unit tests. - - -def dassert_increasing_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has an increasing index. - """ - import helpers.hpandas_utils as hpanutil - - index = _get_index(obj) - if not index.is_monotonic_increasing: - # Print information about the problematic indices like: - # ``` - # Not increasing indices are: - # full_symbol open high - # timestamp - # 2018-08-17 01:39:00+00:00 binance::BTC_USDT 6339.250000 6348.910000 - # 2018-08-17 00:01:00+00:00 kucoin::ETH_USDT 286.712987 286.712987 - # ``` - # Find the problematic indices. - mask = np.diff(index) <= pd.Timedelta(seconds=0) - mask = np.insert(mask, 0, False) - # TODO(gp): We might want to specify an integer with how many rows before - # after we want to show. - # Shift back to get the previous index that was creating the issue. - mask_shift = np.empty_like(mask) - mask_shift[: len(mask) - 1] = mask[1 : len(mask)] - mask_shift[len(mask) - 1] = False - # - mask = mask | mask_shift - df_dup_as_str = hpanutil.df_to_str(obj[mask]) - dup_msg = f"Not increasing indices are:\n{df_dup_as_str}\n" - if msg is None: - msg = dup_msg - else: - msg = dup_msg + msg - # Dump the data to file for further inspection. - # obj.to_csv("index.csv") - hdbg.dassert(index.is_monotonic_increasing, msg=msg, *args) - - -# TODO(gp): @all Add more info in case of failures and unit tests. - - -def dassert_strictly_increasing_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a strictly increasing index. - """ - dassert_unique_index(obj, msg, *args) - dassert_increasing_index(obj, msg, *args) - - -# TODO(gp): Not sure it's used or useful? - - -def dassert_monotonic_index( - obj: Union[pd.Index, pd.DataFrame, pd.Series], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that a Pandas object has a monotonic (i.e., strictly increasing or - decreasing index). - """ - dassert_unique_index(obj, msg, *args) - index = _get_index(obj) - cond = index.is_monotonic_increasing or index.is_monotonic_decreasing - hdbg.dassert(cond, msg=msg, *args) - - -# TODO(Paul): @gp -> dassert_datetime_indexed_df - - -def dassert_time_indexed_df( - df: pd.DataFrame, allow_empty: bool, strictly_increasing: bool -) -> None: - """ - Validate that input dataframe is time indexed and well-formed. - - It works for both single and multi-indexed dataframes. - - :param df: dataframe to validate - :param allow_empty: allow empty data frames - :param strictly_increasing: if True the index needs to be strictly - increasing, instead of just increasing - """ - # Verify that Pandas dataframe is passed as input. - hdbg.dassert_isinstance(df, pd.DataFrame) - if not allow_empty: - # Verify that a non-empty dataframe is passed as input. - hdbg.dassert_lt(0, df.shape[0]) - # Verify that the dataframe has at least 1 column. - hdbg.dassert_lte(1, len(df.columns)) - # Verify that the index is increasing. - if strictly_increasing: - dassert_strictly_increasing_index(df) - else: - dassert_increasing_index(df) - # Check that the index is in datetime format. - dassert_index_is_datetime(df) - # Check that the passed timestamp has timezone info. - index_item = df.index[0] - if isinstance(index_item, tuple): - # In case of multi index assume that the first level is a datetime. - index_item = index_item[0] - hdateti.dassert_has_tz(index_item) - - -def dassert_indices_equal( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - allow_series: bool = False, - only_warning: bool = False, -) -> None: - """ - Ensure that `df1` and `df2` share a common index. - - Print the symmetric difference of indices if equality does not hold. - """ - if allow_series: - if isinstance(df1, pd.Series): - df1 = df1.to_frame() - if isinstance(df2, pd.Series): - df2 = df2.to_frame() - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert( - df1.index.equals(df2.index), - "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", - df1.index.difference(df2.index), - df2.index.difference(df1.index), - only_warning=only_warning, - ) - - -def dassert_columns_equal( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - sort_cols: bool = False, - only_warning: bool = False, -) -> None: - """ - Ensure that `df1` and `df2` have the same columns. - - Print the symmetric difference of columns if equality does not hold. - """ - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - if sort_cols: - _LOG.debug("Sorting dataframe columns.") - df1 = df1.sort_index(axis=1) - df2 = df2.sort_index(axis=1) - hdbg.dassert( - df1.columns.equals(df2.columns), - "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", - df1.columns.difference(df2.columns), - df2.columns.difference(df1.columns), - only_warning=only_warning, - ) - - -def dassert_axes_equal( - df1: pd.DataFrame, df2: pd.DataFrame, *, sort_cols: bool = False -) -> None: - """ - Ensure that `df1` and `df2` have the same index and same columns. - """ - dassert_indices_equal(df1, df2) - dassert_columns_equal(df1, df2, sort_cols=sort_cols) - - -# TODO(Grisha): instead of passing `rtol` and `atol` use `**allclose_kwargs: Dict[str, Any]`. - - -def dassert_series_type_is( - srs: pd.Series, - type_: type, - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the data type of `srs` is `type_`. - - Examples of valid series types are - - np.float64 - - np.int64 - - pd.Timestamp - """ - hdbg.dassert_isinstance(srs, pd.Series) - hdbg.dassert_isinstance(type_, type) - hdbg.dassert_eq(srs.dtype.type, type_, msg, *args) - - -def dassert_series_type_in( - srs: pd.Series, - types: List[type], - msg: Optional[str] = None, - *args: Any, -) -> None: - """ - Ensure that the data type of `srs` is one of the types in `types`. - """ - hdbg.dassert_isinstance(srs, pd.Series) - hdbg.dassert_container_type(types, list, type) - hdbg.dassert_in(srs.dtype.type, types, msg, *args) - - -def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None: - """ - Ensure that remapping rows / columns is valid. - """ - hdbg.dassert_isinstance(to_remap, list) - hdbg.dassert_isinstance(remap_dict, dict) - # All the rows / columns to remap, should exist. - hdbg.dassert_is_subset( - remap_dict.keys(), - to_remap, - "Keys to remap should be a subset of existing columns", - ) - # The mapping is invertible. - hdbg.dassert_no_duplicates(remap_dict.keys()) - hdbg.dassert_no_duplicates(remap_dict.values()) - # Rows / columns should not be remapped on existing rows / columns. - hdbg.dassert_not_intersection(remap_dict.values(), to_remap) - - -def dassert_approx_eq( - val1: Any, - val2: Any, - rtol: float = 1e-05, - atol: float = 1e-08, - msg: Optional[str] = None, - *args: Any, - only_warning: bool = False, -) -> None: - # Approximate comparison is not applicable for strings. - hdbg.dassert_is_not(type(val1), str) - hdbg.dassert_is_not(type(val2), str) - # Convert iterable inputs to list in order to comply with numpy. - if isinstance(val1, Iterable): - val1 = list(val1) - if isinstance(val2, Iterable): - val2 = list(val2) - cond = np.allclose( - np.array(val1), np.array(val2), rtol=rtol, atol=atol, equal_nan=True - ) - if not cond: - txt = f"'{val1}'\n==\n'{val2}' rtol={rtol}, atol={atol}" - hdbg._dfatal(txt, msg, *args, only_warning=only_warning) # type: ignore - - -# ############################################################################# - - -def dassert_is_days( - timedelta: pd.Timedelta, *, min_num_days: Optional[int] = None -) -> None: - """ - Assert that a timedelta represents an integer number of days. - - :param timedelta: the timedelta to check - :param min_num_days: optional minimum number of days to enforce - """ - hdbg.dassert( - (timedelta / pd.Timedelta(days=1)).is_integer(), - "timedelta='%s' is not an integer number of days", - timedelta, - ) - if min_num_days is not None: - hdbg.dassert_lte(1, timedelta.days) - - -# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py deleted file mode 100644 index 6c73c8988..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_display.py +++ /dev/null @@ -1,302 +0,0 @@ -""" -Import as: - -import helpers.hpandas_display as hpandisp -""" - -import logging -import os -from typing import List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hlist as hlist -import helpers.hlogging as hloggin -import helpers.hsystem as hsystem - -_LOG = hloggin.getLogger(__name__) - - -# Invariant: -# - When we are in a notebook we want to: -# - Convert `_LOG.info()` in `print()` using `hnotebo.set_logger_to_print()` -# - Display any dataframe using the `hpandas.display` function -# - Do not return any value -# -# - When we are not in a notebook we want to: -# - Use `_LOG.info()` and `_LOG.debug()` to log messages -# - Print the dataframe with `_LOG.debug()` -# - Return the result through a `return` statement -# -# - Each function should have a `log_level` parameter to control the logging level. -# - If `log_level` is not provided, it should be set to `logging.DEBUG` if we are not in a notebook, -# and `logging.INFO` if we are in a notebook. - - -def get_df_signature(df: pd.DataFrame, num_rows: int = 6) -> str: - """ - Compute a simple signature of a dataframe in string format. - - The signature contains metadata about dataframe size and certain - amount of rows from start and end of a dataframe. It is used for - testing purposes. - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - text: List[str] = [f"df.shape={str(df.shape)}"] - with pd.option_context( - "display.max_colwidth", int(1e6), "display.max_columns", None - ): - # If dataframe size exceeds number of rows, show only subset in form of - # first and last rows. Otherwise, whole dataframe is shown. - if len(df) > num_rows: - text.append(f"df.head=\n{df.head(num_rows // 2)}") - text.append(f"df.tail=\n{df.tail(num_rows // 2)}") - else: - text.append(f"df.full=\n{df}") - text: str = "\n".join(text) - return text - - -# ############################################################################# - - -def convert_df_to_json_string( - df: pd.DataFrame, - n_head: Optional[int] = 10, - n_tail: Optional[int] = 10, - columns_order: Optional[List[str]] = None, -) -> str: - """ - Convert dataframe to pretty-printed JSON string. - - To select all rows of the dataframe, pass `n_head` as None. - - :param df: dataframe to convert - :param n_head: number of printed top rows - :param n_tail: number of printed bottom rows - :param columns_order: order for the KG columns sort - :return: dataframe converted to JSON string - """ - # Append shape of the initial dataframe. - shape = f"original shape={df.shape}" - # Reorder columns. - if columns_order is not None: - hdbg.dassert_set_eq(columns_order, df.columns) - df = df[columns_order] - # Select head. - if n_head is not None: - head_df = df.head(n_head) - else: - # If no n_head provided, append entire dataframe. - head_df = df - # Transform head to json. - head_json = head_df.to_json( - orient="index", - force_ascii=False, - indent=4, - default_handler=str, - date_format="iso", - date_unit="s", - ) - if n_tail is not None: - # Transform tail to json. - tail = df.tail(n_tail) - tail_json = tail.to_json( - orient="index", - force_ascii=False, - indent=4, - default_handler=str, - date_format="iso", - date_unit="s", - ) - else: - # If no tail specified, append an empty string. - tail_json = "" - # Join shape and dataframe to single string. - output_str = "\n".join([shape, "Head:", head_json, "Tail:", tail_json]) - return output_str - - -# ############################################################################# - - -def convert_df_to_png( - df: pd.DataFrame, - file_path: str, - index: bool = True, - table_conversion: str = "kaleido", - dpi: int = 300, - print_markdown: bool = False, - markdown_path_prefix: Optional[str] = None, -) -> None: - """ - Convert a dataframe to a PNG image file. - - Uses the dataframe_image library to render the DataFrame as an image - with HTML styling. - - :param df: dataframe to convert - :param file_path: path where the PNG image will be saved - :param index: whether to include the index in the image - :param table_conversion: conversion method ('kaleido', 'chrome', or 'playwright') - :param dpi: resolution in dots per inch (default: 300 for print quality, - higher values = higher resolution and larger file size) - :param print_markdown: if True, print markdown image reference like - ![](path/to/image.png) - :param markdown_path_prefix: optional path to prepend to the image path in - the markdown reference (e.g., '../figures/' or 'assets/') - """ - # Keep this import here since it's an optional one. - import dataframe_image as dfi - - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(file_path, str) - # Ensure the output directory exists. - hio.create_enclosing_dir(file_path, incremental=True) - # Prepare dataframe for export, handling index parameter. - export_df = df - if not index: - # Reset index to exclude it from the image. - export_df = df.reset_index(drop=True) - dfi.export(export_df, file_path, table_conversion=table_conversion, dpi=dpi) - # Use print instead of _LOG.info. - print(f"PNG image saved to: '{file_path}'") - if print_markdown: - # Construct the markdown path. - markdown_path = file_path - if markdown_path_prefix: - markdown_path = os.path.join(markdown_path_prefix, file_path) - markdown_ref = f"![]({markdown_path})" - # Use print instead of _LOG.info. - print(markdown_ref) - - -# ############################################################################# - - -def print_or_display( - df: pd.DataFrame, - *, - index: bool = True, - as_txt: bool = False, - log_level: int = logging.INFO, -) -> None: - """ - Print or display a dataframe in a notebook at the given log level. - - :param df: dataframe to print - :param index: whether to show the index or not - :param as_txt: print if True, otherwise render as usual HTML table - :param log_level: log level at which to print the dataframe - """ - # print(_LOG.getEffectiveLevel()) - # print(log_level) - # print(_LOG.isEnabledFor(log_level)) - if hsystem.is_running_in_ipynb() and not as_txt: - from IPython.display import display, HTML - - if _LOG.isEnabledFor(log_level): - display(HTML(df.to_html(index=index))) - else: - _LOG.log(log_level, "%s", df.to_string(index=index)) - - -def display_df( - df: pd.DataFrame, - *, - index: bool = True, - inline_index: bool = False, - max_lines: Optional[int] = 5, - tag: Optional[str] = None, - mode: Optional[str] = None, - as_txt: bool = False, - log_level: int = logging.INFO, -) -> None: - """ - Display a Pandas object (series, df, panel) in a better way than the - ipython display, e.g., by printing head and tail of the dataframe, and - other formatting options. - - :param index: whether to show the index or not - :param inline_index: make the index part of the dataframe. This is used - when cutting and pasting to other applications, which are not happy - with the output pandas HTML form - :param max_lines: number of lines to print - :param mode: use different formats temporarily overriding the default, e.g., - - "all_rows": print all the rows - - "all_cols": print all the columns - - "all": print the entire df (it could be huge) - :param as_txt: print if True, otherwise render as usual html table - :param log_level: log level at which to print the dataframe - """ - # Convert Series to DataFrame if needed. - if isinstance(df, pd.Series): - df = pd.DataFrame(df) - # - hdbg.dassert_type_is(df, pd.DataFrame) - hdbg.dassert_eq( - hlist.find_duplicates(df.columns.tolist()), - [], - msg="Find duplicated columns", - ) - if tag is not None: - _LOG.log(log_level, "tag=%s", tag) - # Shrink the dataframe to the number of lines specified by `max_lines`, - # if needed. - if max_lines is not None: - hdbg.dassert_lte(1, max_lines) - if df.shape[0] > max_lines: - # log.error("Printing only top / bottom %s out of %s rows", - # max_lines, df.shape[0]) - ellipses = pd.DataFrame( - [["..."] * len(df.columns)], columns=df.columns, index=["..."] - ) - df = pd.concat( - [ - df.head(int(max_lines / 2)), - ellipses, - df.tail(int(max_lines / 2)), - ], - axis=0, - ) - # Inline the index, if needed. - if inline_index: - df = df.copy() - # Copy the index to a column and don't print the index. - if df.index.name is None: - col_name = "." - else: - col_name = df.index.name - df.insert(0, col_name, df.index) - df.index.name = None - index = False - # Print or display the dataframe. - if mode is None: - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - elif mode == "all_rows": - with pd.option_context( - "display.max_rows", None, "display.max_columns", 3 - ): - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - elif mode == "all_cols": - with pd.option_context( - "display.max_colwidth", int(1e6), "display.max_columns", None - ): - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - elif mode == "all": - with pd.option_context( - "display.max_rows", - int(1e6), - "display.max_columns", - 3, - "display.max_colwidth", - int(1e6), - "display.max_columns", - None, - ): - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - else: - print_or_display(df, index=index, as_txt=as_txt, log_level=log_level) - raise ValueError("Invalid mode=%s" % mode) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py deleted file mode 100644 index a1049d77f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_io.py +++ /dev/null @@ -1,128 +0,0 @@ -""" -Import as: - -import helpers.hpandas_io as hpanio -""" - -from typing import Any, Union - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hprint as hprint - -# Handle different versions of s3fs where core module may be at different -# locations. -try: - import s3fs - - # Try to access s3fs.core to check if it exists - if hasattr(s3fs, "core"): - from s3fs.core import S3File, S3FileSystem - else: - # In newer versions, classes might be directly in s3fs module. - try: - from s3fs import S3File, S3FileSystem - except ImportError: - # Fallback to dynamic import - S3File = getattr(s3fs, "S3File", None) - S3FileSystem = getattr(s3fs, "S3FileSystem", None) -except ImportError: - # If s3fs is not available, define dummy classes for type hints. - s3fs = None - - class S3File: - pass - - class S3FileSystem: - pass - - -_LOG = hloggin.getLogger(__name__) - - -def read_csv_to_df( - stream: Union[str, S3File, S3FileSystem], - *args: Any, - **kwargs: Any, -) -> pd.DataFrame: - """ - Read a CSV file into a `pd.DataFrame`. - - :param stream: file path, S3File, or S3FileSystem object - :param args: additional arguments passed to pd.read_csv() - :param kwargs: additional keyword arguments passed to pd.read_csv() - :return: dataframe with CSV contents - """ - # Gets filename from stream if it is not already a string, - # so it can be inspected for extension type. - file_name = stream if isinstance(stream, str) else vars(stream)["path"] - # Handle zipped files. - if any(file_name.endswith(ext) for ext in (".gzip", ".gz", ".tgz")): - hdbg.dassert_not_in("compression", kwargs) - kwargs["compression"] = "gzip" - elif file_name.endswith(".zip"): - hdbg.dassert_not_in("compression", kwargs) - kwargs["compression"] = "zip" - # Read. - _LOG.debug(hprint.to_str("args kwargs")) - df = pd.read_csv(stream, *args, **kwargs) - return df - - -def read_parquet_to_df( - stream: Union[str, S3File, S3FileSystem], - *args: Any, - **kwargs: Any, -) -> pd.DataFrame: - """ - Read a Parquet file into a `pd.DataFrame`. - - :param stream: file path, S3File, or S3FileSystem object - :param args: additional arguments passed to pd.read_parquet() - :param kwargs: additional keyword arguments passed to pd.read_parquet() - :return: dataframe with Parquet contents - """ - # Read. - _LOG.debug(hprint.to_str("args kwargs")) - df = pd.read_parquet(stream, *args, **kwargs) - return df - - -# ############################################################################# - - -# TODO(Paul): Remove this since it's a dup of hgoogle_drive_api.py. - - -def to_gsheet( - df: pd.DataFrame, - tab_name: str, - gsheet_tab_name: str, - overwrite: bool, -) -> None: - """ - Save a dataframe to a Google sheet. - - :param df: the dataframe to save to a Google sheet - :param tab_name: the name of the Google sheet to save the df - into; the Google sheet with this name must already exist on the - Google Drive - :param gsheet_tab_name: the name of the sheet in the Google sheet - :param overwrite: if True, the contents of the sheet are erased - before saving the dataframe into it; if False, the dataframe is - appended to the contents of the sheet - """ - import gspread_pandas - - spread = gspread_pandas.Spread( - tab_name, sheet=gsheet_tab_name, create_sheet=True - ) - if overwrite: - spread.clear_sheet() - else: - sheet_contents = spread.sheet_to_df(index=None) - combined_df = pd.concat([sheet_contents, df]) - df = combined_df.drop_duplicates() - spread.df_to_sheet(df, index=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py deleted file mode 100644 index f139a3ba9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_multiindex.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Import as: - -import helpers.hpandas_multiindex as hpanmult -""" - -import logging -from typing import Any, Dict, List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hpandas_compare as hpancomp -import helpers.hpandas_dassert as hpandass -import helpers.hpandas_transform as hpantran -import helpers.hpandas_utils as hpanutil -import helpers.hprint as hprint - -_LOG = hloggin.getLogger(__name__) - -RowsValues = List[List[str]] - -# ############################################################################# -# Functions -# ############################################################################# - - -def add_multiindex_col( - df: pd.DataFrame, multiindex_col: pd.DataFrame, col_name: str -) -> pd.DataFrame: - """ - Add column to a multiindex DataFrame. - - Note: each column in a multiindex DataFrame is a DataFrame itself. - - :param df: multiindex df - :param multiindex_col: column (i.e. singleindex df) of a multiindex df - :param col_name: name of a new column - :return: a multiindex DataFrame with a new column - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - hdbg.dassert_isinstance(multiindex_col, pd.DataFrame) - hdbg.dassert_isinstance(col_name, str) - hdbg.dassert_not_in(col_name, df.columns) - for col in multiindex_col.columns: - df[col_name, col] = multiindex_col[col] - return df - - -def multiindex_df_info( - df: pd.DataFrame, - *, - log_level: int = logging.INFO, - **list_to_str_kwargs: Dict[str, Any], -) -> str: - """ - Report information about a multi-index df. - """ - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - columns_level0 = df.columns.levels[0] - columns_level1 = df.columns.levels[1] - rows = df.index - ret = [] - ret.append( - f"shape={len(columns_level0)} x {len(columns_level1)} x {len(rows)}" - ) - ret.append( - "columns_level0=" - + hprint.list_to_str2(columns_level0, **list_to_str_kwargs) - ) - ret.append( - "columns_level1=" - + hprint.list_to_str2(columns_level1, **list_to_str_kwargs) - ) - ret.append("rows=" + hprint.list_to_str2(rows, **list_to_str_kwargs)) - if isinstance(df.index, pd.DatetimeIndex): - # Display timestamp info. - start_timestamp = df.index.min() - end_timestamp = df.index.max() - frequency = df.index.freq - if frequency is None: - # Try to infer frequency. - frequency = pd.infer_freq(df.index) - ret.append(f"start_timestamp={start_timestamp}") - ret.append(f"end_timestamp={end_timestamp}") - ret.append(f"frequency={frequency}") - ret = "\n".join(ret) - _LOG.log(log_level, ret) - return ret - - -def subset_multiindex_df( - df: pd.DataFrame, - *, - # TODO(gp): Consider passing trim_df_kwargs as kwargs. - start_timestamp: Optional[pd.Timestamp] = None, - end_timestamp: Optional[pd.Timestamp] = None, - columns_level0: hpanutil.ColumnSet = None, - columns_level1: hpanutil.ColumnSet = None, - keep_order: bool = False, -) -> pd.DataFrame: - """ - Filter multi-index DataFrame by timestamp index and column levels. - - :param start_timestamp: see `trim_df()` - :param end_timestamp: see `trim_df()` - :param columns_level0: column names that corresponds to `df.columns.levels[0]` - - `None` means no filtering - :param columns_level1: column names that corresponds to `df.columns.levels[1]` - - `None` means no filtering - :param keep_order: see `hpandas_utils.resolve_column_names()` - :return: filtered DataFrame - """ - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_eq(2, len(df.columns.levels)) - # Filter by timestamp. - allow_empty = False - strictly_increasing = False - hpandass.dassert_time_indexed_df(df, allow_empty, strictly_increasing) - df = hpantran.trim_df( - df, - ts_col_name=None, - start_ts=start_timestamp, - end_ts=end_timestamp, - left_close=True, - right_close=True, - ) - # Filter level 0. - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - all_columns_level0 = df.columns.levels[0] - columns_level0 = hpanutil.resolve_column_names( - columns_level0, all_columns_level0, keep_order=keep_order - ) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_is_subset(columns_level0, df.columns.levels[0]) - df = df[columns_level0] - # Filter level 1. - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - all_columns_level1 = df.columns.levels[1] - columns_level1 = hpanutil.resolve_column_names( - columns_level1, all_columns_level1, keep_order=keep_order - ) - hdbg.dassert_isinstance(df.columns, pd.MultiIndex) - hdbg.dassert_is_subset(columns_level1, df.columns.levels[1]) - df = df.swaplevel(axis=1)[columns_level1].swaplevel(axis=1) - return df - - -# ############################################################################# - - -def compare_multiindex_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - *, - subset_multiindex_df_kwargs: Optional[Dict[str, Any]] = None, - compare_dfs_kwargs: Optional[Dict[str, Any]] = None, -) -> pd.DataFrame: - """ - - Subset both multi-index dfs, if needed - - Compare dfs - - :param subset_multiindex_df: params for `subset_multiindex_df()` - :param compare_dfs_kwargs: params for `compare_dfs()` - :return: df with differences as values - """ - # Subset dfs. - if subset_multiindex_df_kwargs is None: - subset_multiindex_df_kwargs = {} - subset_df1 = subset_multiindex_df(df1, **subset_multiindex_df_kwargs) - subset_df2 = subset_multiindex_df(df2, **subset_multiindex_df_kwargs) - # Compare dfs. - if compare_dfs_kwargs is None: - compare_dfs_kwargs = {} - diff_df = hpancomp.compare_dfs(subset_df1, subset_df2, **compare_dfs_kwargs) - return diff_df - - -# ############################################################################# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py deleted file mode 100644 index b0a6bf9d8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_stats.py +++ /dev/null @@ -1,527 +0,0 @@ -""" -Import as: - -import helpers.hpandas_stats as hpanstat -""" - -import logging -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hpandas_dassert as hpandass -import helpers.hpandas_transform as hpantran -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = hloggin.getLogger(__name__) - - -def compute_duration_df( - tag_to_df: Dict[str, pd.DataFrame], - *, - intersect_dfs: bool = False, - valid_intersect: bool = False, -) -> Tuple[pd.DataFrame, Dict[str, pd.DataFrame]]: - """ - Compute a df with some statistics about the time index. - - E.g., - ``` - min_index max_index min_valid_index max_valid_index - tag1 2022-01-01 21:00:00+00:00 ... - tag2 2022-01-01 21:02:00+00:00 ... - tag3 2022-01-01 21:01:00+00:00 ... - ``` - - :param intersect_dfs: return a transformed dict with the intersection of - indices of all the dfs if True, otherwise return the input data as is - :param valid_intersect: intersect indices without NaNs if True, otherwise - intersect indices as is - :return: timestamp stats and updated dict of dfs, see `intersect_dfs` param - """ - hdbg.dassert_isinstance(tag_to_df, Dict) - # Create df and assign columns. - data_stats = pd.DataFrame() - min_col = "min_index" - max_col = "max_index" - min_valid_index_col = "min_valid_index" - max_valid_index_col = "max_valid_index" - # Collect timestamp info from all dfs. - for tag in tag_to_df.keys(): - # Check that the passed timestamp has timezone info. - first_idx = tag_to_df[tag].index[0] - hdateti.dassert_has_tz(cast(pd.Timestamp, first_idx)) - hpandass.dassert_index_is_datetime(tag_to_df[tag]) - # Compute timestamp stats. - data_stats.loc[tag, min_col] = tag_to_df[tag].index.min() - data_stats.loc[tag, max_col] = tag_to_df[tag].index.max() - data_stats.loc[tag, min_valid_index_col] = ( - tag_to_df[tag].dropna().index.min() - ) - data_stats.loc[tag, max_valid_index_col] = ( - tag_to_df[tag].dropna().index.max() - ) - # Make a copy so we do not modify the original data. - tag_to_df_updated = tag_to_df.copy() - # Change the initial dfs with intersection. - if intersect_dfs: - if valid_intersect: - # Assign start, end date column according to specs. - min_col = min_valid_index_col - max_col = max_valid_index_col - # The start of the intersection will be the max value amongt all start dates. - intersection_start_date = cast(pd.Timestamp, data_stats[min_col].max()) - # The end of the intersection will be the min value amongt all end dates. - intersection_end_date = cast(pd.Timestamp, data_stats[max_col].min()) - for tag in tag_to_df_updated.keys(): - df = hpantran.trim_df( - tag_to_df_updated[tag], - ts_col_name=None, - start_ts=intersection_start_date, - end_ts=intersection_end_date, - left_close=True, - right_close=True, - ) - tag_to_df_updated[tag] = df - return data_stats, tag_to_df_updated - - -# ############################################################################# - - -# TODO(gp): Remove this since it's in Google API. - - -def compute_weighted_sum( - dfs: Dict[str, pd.DataFrame], - weights: pd.DataFrame, - *, - index_mode: str = "assert_equal", -) -> Dict[str, pd.DataFrame]: - """ - Compute weighted sums of `dfs` using `weights`. - - :param dfs: dataframes keyed by id; all dfs should have the same cols, - indices are handled based on the `index_mode` - :param weights: float weights indexed by id with unique col names - :param index_mode: same as `mode` in `apply_index_mode()` - :return: weighted sums keyed by weight col names - """ - hdbg.dassert_isinstance(dfs, dict) - hdbg.dassert(dfs, "dictionary of dfs must be nonempty") - # Get a dataframe from the dictionary and record its index and columns. - id_ = list(dfs)[0] - hdbg.dassert_isinstance(id_, str) - df = dfs[id_] - hdbg.dassert_isinstance(df, pd.DataFrame) - cols = df.columns - # Sanity-check dataframes in dictionary. - for key, value in dfs.items(): - hdbg.dassert_isinstance(key, str) - hdbg.dassert_isinstance(value, pd.DataFrame) - # The reference df is not modified. - _, value = hpantran.apply_index_mode(df, value, index_mode) - hdbg.dassert( - value.columns.equals(cols), - "Column equality fails for keys=%s, %s", - id_, - key, - ) - # Sanity-check weights. - hdbg.dassert_isinstance(weights, pd.DataFrame) - hdbg.dassert_eq(weights.columns.nlevels, 1) - hdbg.dassert(not weights.columns.has_duplicates) - hdbg.dassert_set_eq(weights.index.to_list(), list(dfs)) - # Create a multiindexed dataframe to facilitate computing the weighted sums. - weighted_dfs = {} - combined_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys()) - # TODO(Paul): Consider relaxing the NaN-handling. - for col in weights.columns: - weighted_combined_df = combined_df.multiply(weights[col], level=0) - weighted_sums = weighted_combined_df.groupby(axis=1, level=1).sum( - min_count=len(dfs) - ) - weighted_dfs[col] = weighted_sums - return weighted_dfs - - -def remap_obj( - obj: Union[pd.Series, pd.Index], - map_: Dict[Any, Any], - **kwargs: Any, -) -> pd.Series: - """ - Substitute each value of an object with another value from a dictionary. - - :param obj: a Series or Index to remap values in - :param map_: dictionary mapping old values to new values - :param kwargs: additional keyword arguments passed to pd.Series.map() - :return: remapped pandas series - """ - hdbg.dassert_lte(1, obj.shape[0]) - # TODO(Grisha): consider extending for other mapping types supported by - # `pd.Series.map`. - hdbg.dassert_isinstance(map_, dict) - # Check that every element of the object is in the mapping. - hdbg.dassert_is_subset(obj, map_.keys()) - new_srs = obj.map(map_, **kwargs) - return cast(pd.Series, new_srs) - - -def get_random_df( - num_cols: int, - seed: Optional[int] = None, - date_range_kwargs: Optional[Dict[str, Any]] = None, -) -> pd.DataFrame: - """ - Compute df with random data with `num_cols` columns and index obtained by - calling `pd.date_range(**kwargs)`. - - :param num_cols: the number of columns in a DataFrame to generate - :param seed: see `random.seed()` - :param date_range_kwargs: kwargs for `pd.date_range()` - """ - if seed: - np.random.seed(seed) - if date_range_kwargs is None: - date_range_kwargs = {} - dt = pd.date_range(**date_range_kwargs) - df = pd.DataFrame(np.random.rand(len(dt), num_cols), index=dt) - return df - - -# ############################################################################# - - -def heatmap_df(df: pd.DataFrame, *, axis: Any = None) -> Any: - """ - Colorize a df with a heatmap depending on the numeric values. - - :param axis: along which axis to compute the heatmap - - 0 colorize along rows - - 1 colorize along columns - - None: colorize everything - """ - # Keep it here to avoid long start up times. - import seaborn as sns - - cm = sns.diverging_palette(5, 250, as_cmap=True) - return df.style.background_gradient(axis=axis, cmap=cm) - - -def to_perc(vals: Union[List, pd.Series], **perc_kwargs: Any) -> str: - """ - Report percentage of True values in a list or series. - - :param vals: list or series of boolean values - :param perc_kwargs: additional keyword arguments passed to hprint.perc() - :return: formatted percentage string - """ - if isinstance(vals, list): - vals = pd.Series(vals) - ret = hprint.perc(vals.sum(), len(vals), **perc_kwargs) - return cast(str, ret) - - -def add_end_download_timestamp( - obj: Union[pd.DataFrame, Dict], *, timezone: str = "UTC" -) -> Union[pd.DataFrame, Dict]: - """ - Add a column 'end_download_timestamp' to the DataFrame with the current - time. - - :param obj: The DataFrame to which the column will be added. - :param timezone: The timezone for the current time. Defaults to - 'UTC'. - """ - # Get current timestamp. - current_ts = hdateti.get_current_time(timezone) - # Set value of end_download_timestamp. - obj["end_download_timestamp"] = current_ts - return obj - - -def get_value_counts_stats_df( - df: pd.DataFrame, col_name: str, *, num_rows: int = 10 -) -> pd.DataFrame: - """ - Get the value counts of `col_name` in `df`. - - :param df: The DataFrame to get the value counts of `col_name` from. - :param col_name: The column name to get the value counts of. - :param num_rows: The number of rows to return. - :return: A DataFrame with the value counts of `col_name` in `df`. E.g., - ``` - count pct [%] - Venture Fund 1004 25.100 - Financial Services 274 6.850 - Venture Capital & Private Equity 176 4.400 - Computer Software 163 4.075 - Higher Education 133 3.325 - Information Technology & Services 73 1.825 - ``` - """ - hdbg.dassert_in(col_name, df.columns) - stats_df = df[col_name].value_counts().to_frame() - stats_df["pct [%]"] = stats_df["count"] / len(df) * 100 - if num_rows > 0: - stats_df = stats_df.head(num_rows) - return stats_df - - -def display_value_counts_stats_df( - df: pd.DataFrame, col_names: Union[str, List[str]], *, num_rows: int = 10 -) -> None: - if isinstance(col_names, list): - for col_name in col_names: - display_value_counts_stats_df(df, col_name, num_rows=num_rows) - return - import IPython.display - - hdbg.dassert_isinstance(col_names, str) - _LOG.info("# %s", col_names) - stats_df = get_value_counts_stats_df(df, col_names, num_rows=num_rows) - IPython.display.display(stats_df) - - -# ############################################################################# -# Functions moved from core/explore.py -# ############################################################################# - - -def report_zero_nan_inf_stats( - df: pd.DataFrame, - *, - zero_threshold: float = 1e-9, - verbose: bool = False, - as_txt: bool = False, - dbg_log_level: int = logging.DEBUG, -) -> pd.DataFrame: - """ - Report count and percentage about zeros, nans, infs for a df. - - :param df: dataframe to report the stats of - :param zero_threshold: threshold for classifying values as "zero" - :param verbose: if True, print the stats - :param as_txt: if True, print the stats as text - :param dbg_log_level: log level at which to print the debug info - :return: a DataFrame with the stats - """ - # Convert Series to DataFrame if needed. - if isinstance(df, pd.Series): - df = pd.DataFrame(df) - # Print stats about the input dataframe. - _LOG.log(dbg_log_level, "index in [%s, %s]", df.index.min(), df.index.max()) - num_rows = df.shape[0] - _LOG.log(dbg_log_level, "num_rows=%s", hprint.thousand_separator(num_rows)) - _LOG.log(dbg_log_level, "data=") - import helpers.hpandas_display as hpandisp - - hpandisp.display_df(df, as_txt=as_txt, log_level=dbg_log_level) - # Compute date-based stats only if index is datetime. - if isinstance(df.index, pd.DatetimeIndex): - # TODO(gp): Can we do this faster? - dates = [d.date() for d in df.index] - num_days = len(set(dates)) - _LOG.log(dbg_log_level, "num_days=%s", num_days) - num_weekdays = len(set(d for d in dates if d.weekday() < 5)) - _LOG.log(dbg_log_level, "num_weekdays=%s", num_weekdays) - # - stats_df = pd.DataFrame(None, index=df.columns) - if False: - # Find the index of the first non-nan value. - df = df.applymap(lambda x: not np.isnan(x)) - min_idx = df.idxmax(axis=0) - min_idx.name = "min_idx" - # Find the index of the last non-nan value. - max_idx = df.reindex(index=df.index[::-1]).idxmax(axis=0) - max_idx.name = "max_idx" - stats_df["num_rows"] = num_rows - # - num_zeros = (np.abs(df) < zero_threshold).sum(axis=0) - if verbose: - stats_df["num_zeros"] = num_zeros - stats_df["zeros [%]"] = (100.0 * num_zeros / num_rows).apply( - hprint.round_digits - ) - # - num_nans = np.isnan(df).sum(axis=0) - if verbose: - stats_df["num_nans"] = num_nans - stats_df["nans [%]"] = (100.0 * num_nans / num_rows).apply( - hprint.round_digits - ) - # - num_infs = np.isinf(df).sum(axis=0) - if verbose: - stats_df["num_infs"] = num_infs - stats_df["infs [%]"] = (100.0 * num_infs / num_rows).apply( - hprint.round_digits - ) - # - num_valid = df.shape[0] - num_zeros - num_nans - num_infs - if verbose: - stats_df["num_valid"] = num_valid - stats_df["valid [%]"] = (100.0 * num_valid / num_rows).apply( - hprint.round_digits - ) - # - _LOG.log(dbg_log_level, "stats_df=\n%s", stats_df) - return stats_df - - -def pvalue_to_stars(pval: Optional[float]) -> str: - """ - Convert p-value to star notation for statistical significance. - - :param pval: p-value to convert - :return: star notation (* to ****) or ? for non-significant, NA for NaN - """ - if pval is None or np.isnan(pval): - stars = "NA" - else: - hdbg.dassert_lte(0.0, pval) - hdbg.dassert_lte(pval, 1.0) - if pval < 0.005: - # More than 99.5% confidence. - stars = "****" - elif pval < 0.01: - # More than 99% confidence. - stars = "***" - elif pval < 0.05: - # More than 95% confidence. - stars = "**" - elif pval < 0.1: - # More than 90% confidence. - stars = "*" - else: - stars = "?" - return stars - - -def format_ols_regress_results(regr_res: Optional[pd.DataFrame]) -> pd.DataFrame: - """ - Format OLS regression results into a readable DataFrame. - - :param regr_res: regression results dictionary with coeffs, pvals, rsquared, etc. - :return: formatted DataFrame with coefficients and statistics - """ - if regr_res is None: - _LOG.warning("regr_res=None: skipping") - df = pd.DataFrame(None) - return df - row: List[Union[float, str]] = [ - "%.3f (%s)" % (coeff, pvalue_to_stars(pval)) - for (coeff, pval) in zip(regr_res["coeffs"], regr_res["pvals"]) - ] - row.append(float("%.2f" % (regr_res["rsquared"] * 100.0))) - row.append(float("%.2f" % (regr_res["adj_rsquared"] * 100.0))) - col_names = regr_res["param_names"] + ["R^2 [%]", "Adj R^2 [%]"] - df = pd.DataFrame([row], columns=col_names) - return df - - -# ############################################################################# -# Exploratory analysis functions -# ############################################################################# - - -def _get_unique_values_stats(df: pd.DataFrame) -> pd.DataFrame: - """ - Get unique values count and percentage for each column. - - :param df: dataframe to analyze - :return: DataFrame with num_unique and unique [%] columns - """ - stats_df = pd.DataFrame(None, index=df.columns) - num_unique = df.nunique() - stats_df["num_unique"] = num_unique - stats_df["unique [%]"] = (100.0 * num_unique / df.shape[0]).apply( - hprint.round_digits - ) - return stats_df - - -def explore_dataframe( - df: pd.DataFrame, - *, - show_distributions: bool = False, - show_correlations: bool = False, - zero_threshold: float = 1e-9, - dbg_log_level: int = logging.DEBUG, -) -> Optional[pd.DataFrame]: - """ - Perform comprehensive exploratory analysis of a DataFrame. - - Computes data quality metrics (zeros, NaNs, infinities, valid data), - optionally plots distributions of high-variability columns, and - optionally displays a correlation matrix. - - :param df: Input dataframe to analyze - :param show_distributions: If True, plots distributions of top-variability - columns in a 3-column grid - :param show_correlations: If True, displays correlation matrix as a heatmap - :param zero_threshold: Threshold for classifying values as "zero" in - quality report - :return: Statistics DataFrame from report_zero_nan_inf_stats with columns: - num_rows, zeros [%], nans [%], infs [%], valid [%] - """ - import matplotlib.pyplot as plt - from IPython.display import display - - hdbg.dassert_lt(0, len(df), "Dataframe is empty") - # Compute and display data quality statistics. - stats_df = report_zero_nan_inf_stats( - df, zero_threshold=zero_threshold, dbg_log_level=dbg_log_level - ) - # Add information about the number of unique values and percentage of unique values for each column. - unique_stats_df = _get_unique_values_stats(df) - stats_df = pd.concat([stats_df, unique_stats_df], axis=1) - if hsystem.is_running_in_ipynb(): - _LOG.info("stats_df=") - display(stats_df) - _LOG.debug("stats_df=\n%s", stats_df) - # Plot distributions if requested. - if hsystem.is_running_in_ipynb(): - if show_distributions: - _LOG.info("Univariate distributions:") - numeric_cols = df.select_dtypes(include="number").columns.tolist() - if len(numeric_cols) > 0: - # Compute standard deviation and select top columns. - std_vals = df[numeric_cols].std().sort_values(ascending=False) - num_to_plot = len(numeric_cols) - top_cols = std_vals.head(num_to_plot).index.tolist() - # Create grid of subplots. - import helpers.hmatplotlib as hmatplo - - fig, axes = hmatplo.get_multiple_plots( - num_to_plot, 3, y_scale=3.5 - ) - _ = fig - for i, col in enumerate(top_cols): - ax = axes[i] - col_data = df[col].dropna() - weights = np.ones_like(col_data) / len(col_data) * 100 - ax.hist(col_data, bins=30, weights=weights, edgecolor="k") - ax.set_title(col) - ax.set_xlabel("Value") - ax.set_ylabel("Percentage [%]") - plt.tight_layout() - plt.show() - # Display correlation matrix if requested. - if show_correlations: - numeric_df = df.select_dtypes(include="number") - if len(numeric_df.columns) >= 2: - corr_matrix = numeric_df.corr() - _LOG.info("Correlation matrix:") - # TODO(gp): Improve the plot changing the number of digits. - corr_heatmap = heatmap_df(corr_matrix) - display(corr_heatmap) - if hsystem.is_running_in_ipynb(): - return None - return stats_df diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py deleted file mode 100644 index 6eae1fa57..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_transform.py +++ /dev/null @@ -1,1023 +0,0 @@ -""" -Import as: - -import helpers.hpandas_transform as hpantran -""" - -import csv -import logging -import math -import random -import re -from typing import ( - Any, - Callable, - Collection, - Dict, - Iterator, - List, - Optional, - Tuple, - Union, -) - -import pandas as pd - -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin - -# TODO(ai_gp): Import the file and not the package to avoid cyclic imports. -import helpers.hpandas_conversion as hpanconv -import helpers.hprint as hprint - -_LOG = hloggin.getLogger(__name__) - -# Enable extra verbose debugging. Do not commit. -_TRACE = False - -RowsValues = List[List[str]] - -# ############################################################################# -# Resampling & Time Series Operations -# ############################################################################# - - -def resample_index(index: pd.DatetimeIndex, frequency: str) -> pd.DatetimeIndex: - """ - Resample `DatetimeIndex`. - - :param index: `DatetimeIndex` to resample - :param frequency: frequency from `pd.date_range()` to resample to - :return: resampled `DatetimeIndex` - """ - # Import locally to avoid cyclic import. - import helpers.hpandas_dassert as hpandass - - _LOG.debug(hprint.to_str("index frequency")) - hdbg.dassert_isinstance(index, pd.DatetimeIndex) - hpandass.dassert_unique_index( - index, msg="Index must have only unique values" - ) - min_date = index.min() - max_date = index.max() - _LOG.debug("min_date=%s max_date=%s", min_date, max_date) - # TODO(gp): Preserve the index name. - # index_name = index.name - resampled_index = pd.date_range( - start=min_date, - end=max_date, - freq=frequency, - ) - # Enable detailed debugging. - if False: - if len(resampled_index) > len(index): - # Downsample. - _LOG.debug( - "Index length increased by %s = %s - %s", - len(resampled_index) - len(index), - len(resampled_index), - len(index), - ) - elif len(resampled_index) < len(index): - # Upsample. - _LOG.debug( - "Index length decreased by %s = %s - %s", - len(index) - len(resampled_index), - len(index), - len(resampled_index), - ) - else: - _LOG.debug("Index length=%s has not changed", len(index)) - # resampled_index.name = index_name - return resampled_index - - -def resample_df(df: pd.DataFrame, frequency: str) -> pd.DataFrame: - """ - Resample `DataFrame` by placing NaN in missing locations in the index. - - :param df: `DataFrame` to resample - :param frequency: frequency from `pd.date_range()` to resample to - :return: resampled `DataFrame` - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - # Preserve the index name. - index_name = df.index.name - resampled_index = resample_index(df.index, frequency) - df_reindex = df.reindex(resampled_index) - df_reindex.index.name = index_name - return df_reindex - - -def reindex_on_unix_epoch( - df: pd.DataFrame, in_col_name: str, unit: str = "s" -) -> pd.DataFrame: - """ - Transform the column `in_col_name` into a datetime index. `in_col_name` - contains Unix epoch (e.g., 1638194400) and it is converted into a UTC time. - - :param df: dataframe with a unix epoch - :param in_col_name: column containing unix epoch - :param unit: the unit of unix epoch - """ - # Convert. - temp_col_name = in_col_name + "_tmp" - hdbg.dassert_in(in_col_name, df.columns) - hdbg.dassert_not_in(temp_col_name, df.columns) - # Save. - df[temp_col_name] = pd.to_datetime(df[in_col_name], unit=unit, utc=True) - df.set_index(temp_col_name, inplace=True, drop=True) - df.index.name = None - return df - - -def find_gaps_in_dataframes( - df1: pd.DataFrame, df2: pd.DataFrame -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Find data present in one dataframe and missing in the other one. - - :param df1: first dataframe for comparison - :param df2: second dataframe for comparison - :return: two dataframes with missing data - """ - # Get data present in first, but not present in second dataframe. - first_missing_indices = df2.index.difference(df1.index) - first_missing_data = df2.loc[first_missing_indices] - # Get data present in second, but not present in first dataframe. - second_missing_indices = df1.index.difference(df2.index) - second_missing_data = df1.loc[second_missing_indices] - return first_missing_data, second_missing_data - - -# TODO(Grisha): use this idiom everywhere in the codebase, e.g., in `compare_dfs()`. - - -def find_gaps_in_time_series( - time_series: pd.Series, - start_timestamp: pd.Timestamp, - end_timestamp: pd.Timestamp, - freq: str, -) -> pd.Series: - """ - Find missing points on a time interval specified by [start_timestamp, - end_timestamp], where point distribution is determined by . - - If the passed time series is of a unix epoch format. It is - automatically tranformed to pd.Timestamp. - - :param time_series: time series to find gaps in - :param start_timestamp: start of the time interval to check - :param end_timestamp: end of the time interval to check - :param freq: distance between two data points on the interval. - Aliases correspond to pandas.date_range's freq parameter, i.e. - "S" -> second, "T" -> minute. - :return: pd.Series representing missing points in the source time - series. - """ - _time_series = time_series - if str(time_series.dtype) in ["int32", "int64"]: - _time_series = _time_series.map(hdateti.convert_unix_epoch_to_timestamp) - correct_time_series = pd.date_range( - start=start_timestamp, end=end_timestamp, freq=freq - ) - return correct_time_series.difference(_time_series) - - -# ############################################################################# -# DataFrame Transformation -# ############################################################################# - - -def apply_index_mode( - df1: pd.DataFrame, - df2: pd.DataFrame, - mode: str, -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Process DataFrames according to the index mode. - - :param df1: first input df - :param df2: second input df - :param mode: method of processing indices - - "assert_equal": check that both indices are equal, assert otherwise - - "intersect": restrict both dfs to a common index - - "leave_unchanged": ignore any indices mismatch and return dfs as-is - :return: transformed copy of the inputs - """ - # Import locally to avoid cyclic import - import helpers.hpandas_dassert as hpandass - - _LOG.debug("mode=%s", mode) - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert_isinstance(mode, str) - # Copy in order not to modify the inputs. - df1_copy = df1.copy() - df2_copy = df2.copy() - if mode == "assert_equal": - hpandass.dassert_indices_equal(df1_copy, df2_copy) - elif mode == "intersect": - # TODO(Grisha): Add sorting on demand. - common_index = df1_copy.index.intersection(df2_copy.index) - df1_copy = df1_copy[df1_copy.index.isin(common_index)] - df2_copy = df2_copy[df2_copy.index.isin(common_index)] - elif mode == "leave_unchanged": - _LOG.debug( - "Ignoring any index missmatch as per user's request.\n" - "df1.index.difference(df2.index)=\n%s\ndf2.index.difference(df1.index)=\n%s", - df1_copy.index.difference(df2_copy.index), - df2_copy.index.difference(df1_copy.index), - ) - else: - raise ValueError(f"Unsupported index_mode={mode}") - return df1_copy, df2_copy - - -def apply_columns_mode( - df1: pd.DataFrame, - df2: pd.DataFrame, - mode: str, -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Process DataFrames according to the column mode. - - :param df1: first input df - :param df2: second input df - :param mode: method of processing columns - - "assert_equal": check that both dfs have equal columns, assert otherwise - - "intersect": restrict both dfs to only include common columns - - "leave_unchanged": ignore any column mismatches and return dfs as-is - :return: transformed copy of the inputs - """ - # Import locally to avoid cyclic import - import helpers.hpandas_dassert as hpandass - import helpers.hpandas_utils as hpanutil - - _LOG.debug("mode=%s", mode) - # Input validation. - hdbg.dassert_isinstance(df1, pd.DataFrame) - hdbg.dassert_isinstance(df2, pd.DataFrame) - hdbg.dassert_isinstance(mode, str) - # Copy in order not to modify the inputs. - df1_copy = df1.copy() - df2_copy = df2.copy() - if mode == "assert_equal": - # Check if columns are equal or not. - hpandass.dassert_columns_equal(df1_copy, df2_copy) - elif mode == "intersect": - # Filter dataframes based on its common columns. - common_columns = df1_copy.columns.intersection(df2_copy.columns) - df1_copy = df1_copy[common_columns] - df2_copy = df2_copy[common_columns] - # Log the string representation of 2 dfs. - _LOG.debug("df1 after filtering=\n%s", hpanutil.df_to_str(df1)) - _LOG.debug("df2 after filtering=\n%s", hpanutil.df_to_str(df2)) - elif mode == "leave_unchanged": - # Ignore mismatch. - _LOG.debug( - "Ignoring any column missmatch as per user's request.\n" - "df1.columns.difference(df2.columns)=\n%s\ndf2.columns.difference(df1.columns)=\n%s", - df1.columns.difference(df2.columns), - df2.columns.difference(df1.columns), - ) - else: - raise ValueError(f"Unsupported column mode: {mode}") - return df1_copy, df2_copy - - -def trim_df( - df: pd.DataFrame, - ts_col_name: Optional[str], - start_ts: Optional[pd.Timestamp], - end_ts: Optional[pd.Timestamp], - left_close: bool, - right_close: bool, -) -> pd.DataFrame: - """ - Trim the dataframe using values in `ts_col_name`. - - The dataframe is trimmed in the interval bounded by `start_ts` and `end_ts`. - - :param df: the dataframe to trim - :param ts_col_name: the name of the column; `None` means index - :param start_ts: the start boundary for trimming - :param end_ts: the end boundary for trimming - :param left_close: whether to include the start boundary of the interval - - True: [start_ts, ... - - False: (start_ts, ... - :param right_close: whether to include the end boundary of the interval - - True: ..., end_ts] - - False: ..., end_ts) - :return: the trimmed dataframe - """ - if _TRACE: - # Import locally to avoid cyclic import - import helpers.hpandas_utils as hpanutil - - _LOG.trace( - hpanutil.df_to_str( - df, print_dtypes=True, print_shape_info=True, tag="df" - ) - ) - _LOG.debug( - hprint.to_str("ts_col_name start_ts end_ts left_close right_close") - ) - if _TRACE: - # Import locally to avoid cyclic import - import helpers.hpandas_utils as hpanutil - - _LOG.trace("df=\n%s", hpanutil.df_to_str(df)) - if df.empty: - # If the df is empty, there is nothing to trim. - return df - if start_ts is None and end_ts is None: - # If no boundaries are specified, there are no points of reference to trim - # to. - return df - num_rows_before = df.shape[0] - if start_ts is not None and end_ts is not None: - # Confirm that the interval boundaries are valid. - hdateti.dassert_tz_compatible(start_ts, end_ts) - hdbg.dassert_lte(start_ts, end_ts) - # Get the values to filter by. - if ts_col_name is None: - values_to_filter_by = pd.Series(df.index, index=df.index) - else: - hdbg.dassert_in(ts_col_name, df.columns) - values_to_filter_by = df[ts_col_name] - if values_to_filter_by.is_monotonic_increasing: - _LOG.trace("df is monotonic") - # The values are sorted; using the `pd.Series.searchsorted()` method. - # Find the index corresponding to the left boundary of the interval. - if start_ts is not None: - side = "left" if left_close else "right" - left_idx = values_to_filter_by.searchsorted(start_ts, side) - else: - # There is nothing to filter, so the left index is the first one. - left_idx = 0 - _LOG.debug(hprint.to_str("start_ts left_idx")) - # Find the index corresponding to the right boundary of the interval. - if end_ts is not None: - side = "right" if right_close else "left" - right_idx = values_to_filter_by.searchsorted(end_ts, side) - else: - # There is nothing to filter, so the right index is None. - right_idx = df.shape[0] - _LOG.debug(hprint.to_str("end_ts right_idx")) - # - hdbg.dassert_lte(0, left_idx) - hdbg.dassert_lte(left_idx, right_idx) - hdbg.dassert_lte(right_idx, df.shape[0]) - _LOG.debug(hprint.to_str("start_ts left_idx")) - if right_idx < df.shape[0]: - _LOG.debug(hprint.to_str("end_ts right_idx")) - df = df.iloc[left_idx:right_idx] - else: - _LOG.trace("df is not monotonic") - # The values are not sorted; using the `pd.Series.between` method. - if left_close and right_close: - inclusive = "both" - elif left_close: - inclusive = "left" - elif right_close: - inclusive = "right" - else: - inclusive = "neither" - epsilon = pd.DateOffset(minutes=1) - if start_ts is None: - start_ts = values_to_filter_by.min() - epsilon - if end_ts is None: - end_ts = values_to_filter_by.max() + epsilon - df = df[ - values_to_filter_by.between(start_ts, end_ts, inclusive=inclusive) - ] - # Report the changes. - num_rows_after = df.shape[0] - if num_rows_before != num_rows_after: - _LOG.debug( - "Removed %s rows", - hprint.perc(num_rows_before - num_rows_after, num_rows_before), - ) - return df - - -def _assemble_df_rows(rows_values: RowsValues) -> RowsValues: - """ - Organize dataframe values into a column-row structure. - - - Indentation artifacts are removed - - The index placement is handled, i.e. - - if the index is named, the name is located and moved to the same - row as the column names - - if the index is not named, the row with the column names receives - a placeholder empty value in its place - - Empty columns are dropped - - :param rows_values: row values extracted from a string df representation - :return: row values assembled into a valid column-row structure - """ - # Clean up indentation artifacts. - if all(row[0] == "" for row in rows_values): - # Remove the first empty cell in each row. - for row in rows_values: - del row[0] - # If the index is named, its name is located in the second row, - # with an optional extra empty value cell value next to it. - if len(rows_values[1]) == 1 or ( - len(rows_values[1]) == 2 and rows_values[1][1] == "" - ): - # Move the index name to the row with all the column names. - if rows_values[0][0] == "": - rows_values[0][0] = rows_values[1][0] - else: - rows_values[0].insert(0, rows_values[1][0]) - # Drop the former index name row. - del rows_values[1] - else: - # Add an empty cell for the absent index name. - rows_values[0].insert(0, "") - # Identify and remove empty columns. - min_len_row = min(len(row) for row in rows_values) - idxs_to_delete = [] - for i in range(min_len_row): - if all(row[i] == "" for row in rows_values): - idxs_to_delete.append(i) - for idx in idxs_to_delete: - for row in rows_values: - del row[idx] - # Confirm that all the rows have the same number of values. - hdbg.dassert_eq(len({len(row) for row in rows_values}), 1) - return rows_values - - -# TODO(Nina): Add `filter_data_mode`. - - -def str_to_df( - df_as_str: str, - col_to_type: Dict[str, Optional[type]], - col_to_name_type: Dict[str, type], -) -> pd.DataFrame: - """ - Convert a string representation of a dataframe into a Pandas df. - - :param df_as_str: a df as a string - - the format of the string is the same as the output of - `hpandas_utils.df_to_str()` on a pd.DataFrame, e.g. - ``` - col1 col2 col3 col4 - 0 0.1 a None 2020-01-01 - 1 0.2 "b c" None 2021-05-05 - ``` - - values (including column names) that contain spaces need - to be enclosed in double quotation marks, e.g. - "2023-03-15 16:35:41.205000+00:00" - :param col_to_type: a mapping between the column names and the - types of the values in these columns - - if a column is not present in the mapping, its values will - remain strings - - to indicate the type of index values, use {"__index__": ...} - mapping, e.g. {"__index__": pd.Timestamp} - :param col_to_name_type: a mapping between the column names and - the required types of these column names - - same conventions apply as for `col_to_type` (see above) - :return: a converted Pandas dataframe - """ - # Separate the rows. - rows = df_as_str.split("\n") - # Clean up extra spaces. - rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] - # Identify individual values in the rows. - rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) - # Remove the placeholder ["..."] row. - rows_values = [row for row in rows_values if row != ["..."]] - # Organize values into a proper column-row structure. - rows_values = _assemble_df_rows(rows_values) - # Get the column names. - column_names = rows_values[0][1:] - # Get the index. - index_values = [row[0] for row in rows_values[1:]] - index_name = rows_values[0][0] - # Construct the df. - df = pd.DataFrame( - [row[1:] for row in rows_values[1:]], - columns=column_names, - index=index_values, - ) - if index_name != "": - df.index.name = index_name - # Cast the columns into appropriate types. - # Import locally to avoid cyclic import - import helpers.hpandas_conversion as hpanconv - - for col, col_type in col_to_type.items(): - if col == "__index__": - df.index = hpanconv.cast_series_to_type(df.index, col_type) - else: - df[col] = hpanconv.cast_series_to_type(df[col], col_type) - # Cast the column names into appropriate types. - for col, col_name_type in col_to_name_type.items(): - if col == "__index__": - df.index = df.index.rename(col_name_type(df.index.name)) - else: - df = df.rename(columns={col: col_name_type(col)}) - return df - - -# ############################################################################# -# Column Operations -# ############################################################################# - - -def check_and_filter_matching_columns( - df: pd.DataFrame, required_columns: List[str], filter_data_mode: str -) -> pd.DataFrame: - """ - Check that columns are the required ones and if not filter data depending - on `filter_data_mode`. - - :param df: data to check columns for - :param required_columns: columns to return, skipping columns that are not required - :param filter_data_mode: control behaviour with respect to extra or missing columns - - "assert": raise an error if required columns do not match received columns - - "warn_and_trim": return the intersection of required and received columns and - issue a warning - :return: input data as it is if required columns match received columns otherwise - processed data, see `filter_data_mode` - """ - received_columns = df.columns.to_list() - hdbg.dassert_lte(1, len(received_columns)) - # - if filter_data_mode == "assert": - # Raise an assertion. - only_warning = False - elif filter_data_mode == "warn_and_trim": - # Just issue a warning. - only_warning = True - # Get columns intersection while preserving the order of the columns. - columns_intersection = [ - col_name - for col_name in required_columns - if col_name in received_columns - ] - hdbg.dassert_lte(1, len(columns_intersection)) - df = df[columns_intersection] - else: - raise ValueError(f"Invalid filter_data_mode='{filter_data_mode}'") - hdbg.dassert_set_eq( - required_columns, - received_columns, - only_warning=only_warning, - msg="Received columns do not match required columns.", - ) - return df - - -# TODO(Grisha): finish the function. -# TODO(Grisha): merge with the one in `dataflow.model.correlation.py`? - - -# ############################################################################# -# Merge -# ############################################################################# - - -def merge_dfs( - df1: pd.DataFrame, - df2: pd.DataFrame, - threshold_col_name: str, - *, - threshold: float = 0.9, - intersecting_columns: Optional[List[str]] = None, - **pd_merge_kwargs: Any, -) -> pd.DataFrame: - """ - Wrap `pd.merge`. - - :param threshold_col_name: a column's name to check the minimum - overlap on - :param threshold: minimum overlap of unique values in a specified - column to perform the merge - :param intersecting_columns: allow certain columns to appear in both - dataframes; store both in the resulting df with corresponding - suffixes - """ - _LOG.debug( - hprint.to_str( - "threshold_col_name threshold intersecting_columns pd_merge_kwargs" - ) - ) - # Sanity check column types. - threshold_col1 = df1[threshold_col_name] - threshold_col2 = df2[threshold_col_name] - only_first_elem = False - hdbg.dassert_array_has_same_type_element( - threshold_col1, threshold_col2, only_first_elem - ) - # TODO(Grisha): @Dan Implement asserts for each asset id. - # Check that an overlap of unique values is above the specified threshold. - threshold_unique_values1 = set(threshold_col1) - threshold_unique_values2 = set(threshold_col2) - threshold_common_values = set(threshold_unique_values1) & set( - threshold_unique_values2 - ) - threshold_common_values_share1 = len(threshold_common_values) / len( - threshold_unique_values1 - ) - threshold_common_values_share2 = len(threshold_common_values) / len( - threshold_unique_values2 - ) - hdbg.dassert_lte(threshold, threshold_common_values_share1) - hdbg.dassert_lte(threshold, threshold_common_values_share2) - # Use an empty set instead of None to perform set difference further. - intersecting_columns_set = ( - set() if intersecting_columns is None else set(intersecting_columns) - ) - # Check that there are no common columns except for the ones in `intersecting_columns`. - df1_cols = ( - set(df1.columns.to_list()) - - set(pd_merge_kwargs["on"]) - - intersecting_columns_set - ) - df2_cols = ( - set(df2.columns.to_list()) - - set(pd_merge_kwargs["on"]) - - intersecting_columns_set - ) - hdbg.dassert_not_intersection(df1_cols, df2_cols) - # - res_df = df1.merge(df2, **pd_merge_kwargs) - return res_df - - -# TODO(gp): Is this (ironically) a duplicate of drop_duplicates? - - -def get_df_from_iterator( - iter_: Iterator[pd.DataFrame], - *, - sort_index: bool = True, -) -> pd.DataFrame: - """ - Concat all the dataframes in the iterator in one dataframe. - - :param iter_: dataframe iterator - :param sort_index: whether to sort output index or not - :return: combined iterator data - """ - # TODO(gp): @all make a copy of `iter_` so we don't consume it. - dfs = list(iter_) - df_res = pd.concat(dfs) - if sort_index: - df_res = df_res.sort_index() - return df_res - - -# ############################################################################# -# Filter -# ############################################################################# - - -def subset_df(df: pd.DataFrame, nrows: int, seed: int = 42) -> pd.DataFrame: - """ - Remove N rows from the input data and shuffle the remaining ones. - - :param df: input data - :param nrows: the number of rows to remove from the original data - :param seed: see `random.seed()` - :return: shuffled data with removed rows - """ - hdbg.dassert_lte(1, nrows) - hdbg.dassert_lte(nrows, df.shape[0]) - idx = list(range(df.shape[0])) - random.seed(seed) - random.shuffle(idx) - idx = sorted(idx[nrows:]) - return df.iloc[idx] - - -def filter_df( - df: pd.DataFrame, - col_name: str, - value: Any, - *, - invert: bool = False, - check_value: bool = True, - # TODO(gp): -> verbose - print_info: bool = True, -) -> pd.DataFrame: - """ - Filter a dataframe based on a column value. - - :param df: dataframe to filter - :param col_name: column name to filter on - :param value: value to filter on - :param invert: whether to invert the filter - :param check_value: whether to check that the value is in the column - :param print_info: whether to print information about the filter - :return: filtered dataframe - """ - hdbg.dassert_in(col_name, df.columns) - if isinstance(value, list): - mask = df[col_name].isin(value) - else: - if check_value: - hdbg.dassert_in(value, df[col_name].unique()) - mask = df[col_name] == value - if invert: - mask = ~mask - if print_info: - _LOG.info("selected=%s", hprint.perc(mask.sum(), df.shape[0])) - return df[mask] - - -def remove_empty_columns( - df: pd.DataFrame, *, verbose: bool = True -) -> pd.DataFrame: - """ - Remove empty columns from a dataframe. - - :param df: dataframe to remove empty columns from - :return: dataframe with empty columns removed - """ - mask = df.apply(lambda col: col.notna() & (col != "")).any() - non_empty_columns = df.columns[mask] - empty_columns = df.columns[~mask] - if verbose: - _LOG.info( - "kept %s columns: %s", - hprint.perc(len(non_empty_columns), len(df.columns)), - hprint.list_to_str(non_empty_columns), - ) - _LOG.info( - "removed %s columns: %s", - hprint.perc(len(empty_columns), len(df.columns)), - hprint.list_to_str(empty_columns), - ) - df = df[non_empty_columns] - return df - - -def remove_stable_columns( - df: pd.DataFrame, *, threshold: float = 0.9, verbose: bool = True -) -> pd.DataFrame: - """ - Remove columns from a dataframe that have less than threshold unique - values. - - :param df: dataframe to remove stable columns from - :param threshold: threshold for the percentage of stable columns to - remove - :return: dataframe with stable columns removed - """ - high_variability_columns = [] - for col in df.columns: - unique_values = df[col].unique() - if len(unique_values) / len(df) >= threshold: - high_variability_columns.append(col) - # Compute the columns to remove. - columns_to_remove = df.columns[~df.columns.isin(high_variability_columns)] - if verbose: - _LOG.info( - "kept %s columns: %s", - hprint.perc(len(high_variability_columns), len(df.columns)), - hprint.list_to_str(high_variability_columns), - ) - _LOG.info( - "removed %s columns: %s", - hprint.perc(len(columns_to_remove), len(df.columns)), - hprint.list_to_str(columns_to_remove), - ) - df = df[high_variability_columns] - return df - - -def adapt_to_series(f: Callable) -> Callable: - """ - Extend a function working on dataframes so that it can work on series. - """ - - def wrapper( - obj: Union[pd.Series, pd.DataFrame], *args: Any, **kwargs: Any - ) -> Any: - # Convert a pd.Series to a pd.DataFrame. - was_series = False - if isinstance(obj, pd.Series): - obj = pd.DataFrame(obj) - was_series = True - hdbg.dassert_isinstance(obj, pd.DataFrame) - # Apply the function. - res = f(obj, *args, **kwargs) - # Transform the output, if needed. - if was_series: - if isinstance(res, tuple): - res_obj, res_tmp = res[0], res[1:] - res_obj_srs = hpanconv.to_series(res_obj) - res_obj_srs = [res_obj_srs] - res_obj_srs.extend(res_tmp) - res = tuple(res_obj_srs) - else: - res = hpanconv.to_series(res) - return res - - return wrapper - - -# ############################################################################# - - -def add_pct( - df: pd.DataFrame, - col_name: str, - total: int, - dst_col_name: str, - num_digits: int = 2, - use_thousands_separator: bool = True, -) -> pd.DataFrame: - """ - Add to df a column "dst_col_name" storing the percentage of values in - column "col_name" with respect to "total". The rest of the parameters are - the same as hprint.round_digits(). - - :return: updated df - """ - # Add column with percentage right after col_name. - pos_col_name = df.columns.tolist().index(col_name) - df.insert(pos_col_name + 1, dst_col_name, (100.0 * df[col_name]) / total) - # Format. - df[col_name] = [ - hprint.round_digits( - v, num_digits=None, use_thousands_separator=use_thousands_separator - ) - for v in df[col_name] - ] - df[dst_col_name] = [ - hprint.round_digits( - v, num_digits=num_digits, use_thousands_separator=False - ) - for v in df[dst_col_name] - ] - return df - - -# ############################################################################# - - -def remove_columns( - df: pd.DataFrame, cols: Collection[str], log_level: int = logging.DEBUG -) -> pd.DataFrame: - """ - Remove specified columns from a dataframe. - - :param df: dataframe to remove columns from - :param cols: collection of column names to remove - :param log_level: logging level for reporting removed columns - :return: dataframe with specified columns removed - """ - to_remove = set(cols).intersection(set(df.columns)) - _LOG.log(log_level, "to_remove=%s", hprint.list_to_str(to_remove)) - df.drop(to_remove, axis=1, inplace=True) - _LOG.debug("df=\n%s", df.head(3)) - _LOG.log(log_level, hprint.list_to_str(df.columns)) - return df - - -def filter_with_df( - df: pd.DataFrame, filter_df: pd.DataFrame, log_level: int = logging.DEBUG -) -> pd.Series: - """ - Compute a mask for DataFrame df using common columns and values in - "filter_df". - """ - mask = None - for c in filter_df: - hdbg.dassert_in(c, df.columns) - vals = filter_df[c].unique() - if mask is None: - mask = df[c].isin(vals) - else: - mask &= df[c].isin(vals) - mask: pd.DataFrame - _LOG.log(log_level, "after filter=%s", hprint.perc(mask.sum(), len(mask))) - return mask - - -def filter_by_time( - df: pd.DataFrame, - lower_bound: hdateti.StrictDatetime, - upper_bound: hdateti.StrictDatetime, - inclusive: str, - ts_col_name: Optional[str], - log_level: int = logging.DEBUG, -) -> pd.DataFrame: - """ - Filter data by time between `lower_bound` and `upper_bound`. - - Pass `None` to `ts_col_name` to filter by `DatetimeIndex`. - - :param df: data to filter - :param lower_bound: left limit point of the time interval - :param upper_bound: right limit point of the time interval - :param inclusive: include boundaries - - "both": `[lower_bound, upper_bound]` - - "neither": `(lower_bound, upper_bound)` - - "right": `(lower_bound, upper_bound]` - - "left": `[lower_bound, upper_bound)` - :param ts_col_name: name of a timestamp column to filter with, or None to - use the DatetimeIndex - :param log_level: the level of logging, e.g. `DEBUG` - :return: dataframe filtered by time - """ - hdateti.dassert_is_strict_datetime(lower_bound) - hdateti.dassert_is_strict_datetime(upper_bound) - # Time filtering is not working if timezones are different. - hdateti.dassert_tz_compatible_timestamp_with_df(lower_bound, df, ts_col_name) - hdateti.dassert_tz_compatible_timestamp_with_df(upper_bound, df, ts_col_name) - # - if ts_col_name is None: - # Filter data by index. - hdbg.dassert_isinstance(df.index, pd.DatetimeIndex) - # Cast index to `pd.Series` to use the `between` method. - mask = df.index.to_series().between(lower_bound, upper_bound, inclusive) - else: - # Filter data by a specified column. - hdbg.dassert_in(ts_col_name, df.columns) - mask = df[ts_col_name].between(lower_bound, upper_bound, inclusive) - # - _LOG.log( - log_level, - "Filtering between %s and %s with inclusive=`%s`, selected rows=%s", - lower_bound, - upper_bound, - inclusive, - hprint.perc(mask.sum(), df.shape[0]), - ) - return df[mask] - - -def filter_by_val( - df: pd.DataFrame, - col_name: str, - min_val: float, - max_val: float, - use_thousands_separator: bool = True, - log_level: int = logging.DEBUG, -) -> pd.DataFrame: - """ - Filter out rows of df where df[col_name] is not in [min_val, max_val]. - """ - # TODO(gp): If column is ordered, this can be done more efficiently with - # binary search. - num_rows = df.shape[0] - if min_val is not None and max_val is not None: - hdbg.dassert_lte(min_val, max_val) - mask = None - if min_val is not None: - mask = min_val <= df[col_name] - if max_val is not None: - mask2 = df[col_name] <= max_val - if mask is None: - mask = mask2 - else: - mask &= mask2 - res = df[mask] - hdbg.dassert_lt(0, res.shape[0]) - _LOG.log( - log_level, - "Rows kept %s, removed %s rows", - hprint.perc( - res.shape[0], - num_rows, - use_thousands_separator=use_thousands_separator, - ), - hprint.perc( - num_rows - res.shape[0], - num_rows, - use_thousands_separator=use_thousands_separator, - ), - ) - return res - - -# ############################################################################# -# PCA -# ############################################################################# - - -def sample_rolling_df( - rolling_df: pd.DataFrame, periods: int -) -> Tuple[pd.DataFrame, pd.DatetimeIndex]: - """ - Given a rolling metric stored as multiindex (e.g., correlation computed by - pd.ewm) sample `periods` equispaced samples. - - :return: sampled df, array of timestamps selected - """ - timestamps = rolling_df.index.get_level_values(0) - ts = timestamps[:: math.ceil(len(timestamps) / periods)] - _LOG.debug("timestamps=%s", str(ts)) - # rolling_df_out = rolling_df.unstack().reindex(ts).stack(dropna=False) - rolling_df_out = rolling_df.loc[ts] - return rolling_df_out, ts diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py deleted file mode 100644 index aaacb290a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpandas_utils.py +++ /dev/null @@ -1,649 +0,0 @@ -""" -Import as: - -import helpers.hpandas_utils as hpanutil -""" - -import logging -from typing import Any, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd -import tqdm.autonotebook as tauton - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = hloggin.getLogger(__name__) - -# Import add_pct for use in this module. - - -# TODO(gp): -> AxisNameSet -ColumnSet = Optional[Union[str, List[str]]] - - -# ############################################################################# - - -# TODO(gp): Maybe we can have a `_LOG_df_to_str(log_level, *args, **kwargs)` that -# calls `_LOG.log(log_level, hpandas.df_to_str(*args, **kwargs, log_level=log_level))`. -# TODO(gp): We should make sure this works properly in a notebook, although -# it's not easy to unit test. - - -def _display(log_level: int, df: pd.DataFrame) -> None: - """ - Display a dataframe in a notebook at the given log level. - - The behavior is similar to a command like `_LOG.log(log_level, ...)` but - for a notebook `display` command. - - :param log_level: log level at which to display a df. E.g., if `log_level = - logging.DEBUG`, then we display the df only if we are running with - `-v DEBUG`. If `log_level = logging.INFO` then we don't display it - :param df: dataframe to display - """ - from IPython.display import display - - if ( - hsystem.is_running_in_ipynb() - and log_level >= hdbg.get_logger_verbosity() - ): - display(df) - - -def _df_to_str( - df: pd.DataFrame, - num_rows: Optional[int], - max_columns: int, - max_colwidth: int, - max_rows: int, - precision: int, - display_width: int, - use_tabulate: bool, - log_level: int, -) -> str: - """ - Convert a DataFrame to a string representation. - - :param df: The DataFrame to convert to a string. - :param num_rows: The number of rows to display. - :param max_columns: The maximum number of columns to display. - :param max_colwidth: The maximum width of each column. - :param max_rows: The maximum number of rows to display. - :param precision: The precision of the numbers. - :param display_width: The width of the display. - :param use_tabulate: Whether to use the tabulate library to format - the DataFrame. - :param log_level: The log level to use. - :return: A string representation of the DataFrame. - """ - is_in_ipynb = hsystem.is_running_in_ipynb() - out = [] - # Set dataframe print options. - with pd.option_context( - "display.max_colwidth", - max_colwidth, - # "display.height", 1000, - "display.max_rows", - max_rows, - "display.precision", - precision, - "display.max_columns", - max_columns, - "display.width", - display_width, - ): - if use_tabulate: - import tabulate - - out.append(tabulate.tabulate(df, headers="keys", tablefmt="psql")) - # TODO(Grisha): Add an option to display all rows since if `num_rows` - # is `None`, only first and last 5 rows are displayed. Consider using - # `df.to_string()` instead of `str(df)`. - if num_rows is None or df.shape[0] <= num_rows: - # Print the entire data frame. - if not is_in_ipynb: - out.append(str(df)) - else: - # Display dataframe. - _display(log_level, df) - else: - nr = num_rows // 2 - if not is_in_ipynb: - # Print top and bottom of df. - out.append(str(df.head(nr))) - out.append("...") - tail_str = str(df.tail(nr)) - # Remove index and columns from tail_df. - skipped_rows = 1 - if df.index.name: - skipped_rows += 1 - tail_str = "\n".join(tail_str.split("\n")[skipped_rows:]) - out.append(tail_str) - else: - # TODO(gp): @all use this approach also above and update all the - # unit tests. - df = [ - df.head(nr), - pd.DataFrame( - [["..."] * df.shape[1]], index=[" "], columns=df.columns - ), - df.tail(nr), - ] - df = pd.concat(df) - # Display dataframe. - _display(log_level, df) - if not is_in_ipynb: - txt = "\n".join(out) - else: - txt = "" - return txt - - -def _report_srs_stats(srs: pd.Series) -> List[Any]: - """ - Report dtype, the first element, and its type of series. - - :param srs: The series to report the stats of. - :return: A list of the stats. - """ - row: List[Any] = [] - first_elem = srs.values[0] - num_unique = srs.nunique() - num_nans = srs.isna().sum() - row.extend( - [ - srs.dtype, - hprint.perc(num_unique, len(srs)), - hprint.perc(num_nans, len(srs)), - first_elem, - type(first_elem), - ] - ) - return row - - -def df_to_str( - df: Union[pd.DataFrame, pd.Series, pd.Index], - *, - # TODO(gp): Remove this hack in the integration. - # handle_signed_zeros: bool = False, - handle_signed_zeros: bool = True, - num_rows: Optional[int] = 6, - print_dtypes: bool = False, - print_shape_info: bool = False, - print_nan_info: bool = False, - print_memory_usage: bool = False, - memory_usage_mode: str = "human_readable", - tag: Optional[str] = None, - max_columns: int = 10000, - max_colwidth: int = 2000, - max_rows: int = 500, - precision: int = 6, - display_width: int = 10000, - use_tabulate: bool = False, - log_level: int = logging.DEBUG, -) -> str: - """ - Print a dataframe to string reporting all the columns without trimming. - - Note that code like: `_LOG.info(hpandas.df_to_str(df, num_rows=3))` works - properly when called from outside a notebook, i.e., the dataframe is printed - But it won't display the dataframe in a notebook, since the default level at - which the dataframe is displayed is `logging.DEBUG`. - - In this case to get the correct behavior one should do: - ``` - log_level = ... - _LOG.log(log_level, hpandas.df_to_str(df, num_rows=3, log_level=log_level)) - ``` - - :param: handle_signed_zeros: convert `-0.0` to `0.0` - :param: num_rows: max number of rows to print (half from the top and half from - the bottom of the dataframe) - - `None` to print the entire dataframe - :param print_dtypes: report dataframe types and information about the type of - each column by looking at the first value - :param print_shape_info: report dataframe shape, index and columns - :param print_memory_usage: report memory use for each - """ - if df is None: - return "" - if isinstance(df, pd.Series): - df = pd.DataFrame(df) - elif isinstance(df, pd.Index): - df = df.to_frame(index=False) - hdbg.dassert_isinstance(df, pd.DataFrame) - # Convert "negative zeros" to `0.0`. - df = df.copy() - if handle_signed_zeros: - for col_name in df.select_dtypes(include=[np.float64, float]).columns: - df[col_name] = df[col_name].where(df[col_name] != -0.0, 0.0) - out = [] - # Print the tag. - if tag is not None: - out.append(f"# {tag}=") - if not df.empty: - # Print information about the shape and index. - # TODO(Nikola): Revisit and rename print_shape_info to print_axes_info - if print_shape_info: - # TODO(gp): Unfortunately we can't improve this part of the output - # since there are many golden inside the code that would need to be - # updated. Consider automating updating the expected values in the code. - txt = f"index=[{df.index.min()}, {df.index.max()}]" - out.append(txt) - txt = f"columns={','.join(map(str, df.columns))}" - out.append(txt) - txt = f"shape={str(df.shape)}" - out.append(txt) - # Print information about the types. - if print_dtypes: - out.append("* type=") - table = [] - row = [] - col_name = "index" - row.append(col_name) - row.extend(_report_srs_stats(df.index)) - row = map(str, row) - table.append(row) - for col_name in df.columns: - row_: List[Any] = [] - row_.append(col_name) - row_.extend(_report_srs_stats(df[col_name])) - row_ = map(str, row_) - table.append(row_) - # - columns = [ - "col_name", - "dtype", - "num_unique", - "num_nans", - "first_elem", - "type(first_elem)", - ] - df_stats = pd.DataFrame(table, columns=columns) - stats_num_rows = None - df_stats_as_str = _df_to_str( - df_stats, - stats_num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - out.append(df_stats_as_str) - # Print info about memory usage. - if print_memory_usage: - out.append("* memory=") - mem_use_df = pd.concat( - [df.memory_usage(deep=False), df.memory_usage(deep=True)], - axis=1, - keys=["shallow", "deep"], - ) - # Add total row. - mem_use_df_total = pd.DataFrame({"total": mem_use_df.sum(axis=0)}) - mem_use_df = pd.concat([mem_use_df, mem_use_df_total.T]) - # Convert into the desired format. - if memory_usage_mode == "bytes": - pass - elif memory_usage_mode == "human_readable": - import helpers.hintrospection as hintros - - mem_use_df = mem_use_df.applymap(hintros.format_size) - else: - raise ValueError( - f"Invalid memory_usage_mode='{memory_usage_mode}'" - ) - memory_num_rows = None - memory_usage_as_txt = _df_to_str( - mem_use_df, - memory_num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - out.append(memory_usage_as_txt) - # Print info about nans. - if print_nan_info: - num_elems = df.shape[0] * df.shape[1] - num_nans = df.isna().sum().sum() - txt = f"num_nans={hprint.perc(num_nans, num_elems)}" - out.append(txt) - # - num_zeros = df.isnull().sum().sum() - txt = f"num_zeros={hprint.perc(num_zeros, num_elems)}" - out.append(txt) - # TODO(gp): np can't do isinf on objects like strings. - # num_infinite = np.isinf(df).sum().sum() - # txt = "num_infinite=" + hprint.perc(num_infinite, num_elems) - # out.append(txt) - # - num_nan_rows = df.dropna().shape[0] - txt = f"num_nan_rows={hprint.perc(num_nan_rows, num_elems)}" - out.append(txt) - # - num_nan_cols = df.dropna(axis=1).shape[1] - txt = f"num_nan_cols={hprint.perc(num_nan_cols, num_elems)}" - out.append(txt) - if hsystem.is_running_in_ipynb(): - if len(out) > 0 and log_level >= hdbg.get_logger_verbosity(): - print("\n".join(out)) - txt = None - # Print the df. - df_as_str = _df_to_str( - df, - num_rows, - max_columns, - max_colwidth, - max_rows, - precision, - display_width, - use_tabulate, - log_level, - ) - if not hsystem.is_running_in_ipynb(): - out.append(df_as_str) - txt = "\n".join(out) - return txt - - -# ############################################################################# - - -def head( - df: pd.DataFrame, - *, - print_columns: bool = False, - num_rows: int = 2, - seed: Union[int, None] = None, -) -> str: - """ - Display a sample of rows from a DataFrame. - - By default shows the first `num_rows` rows. When a seed is provided, - randomly samples `num_rows` rows instead. - - :param df: The DataFrame to sample from. - :param num_rows: Number of rows to display. - :param seed: Optional random seed for reproducible sampling. If None, shows - first rows. - """ - txt = "" - if print_columns: - txt += "columns=%s\n" % ",".join(df.columns.tolist()) - txt += "shape=%s\n" % str(df.shape) - # - if seed is not None: - np.random.seed(seed) - index = np.random.choice(df.index, num_rows, replace=False) - index = sorted(index) - df = df.loc[index] - else: - df = df.head(num_rows) - with pd.option_context( - "display.width", - 200, - "display.max_columns", - None, - "display.max_colwidth", - None, - ): - txt += "\n" + str(df) - return txt - - -# ############################################################################# - - -def resolve_column_names( - column_set: ColumnSet, - columns: Union[List[str], pd.Index], - *, - keep_order: bool = False, -) -> List[str]: - """ - Change format of the columns and perform some sanity checks. - - :param column_set: columns to proceed - :param columns: all columns available - :param keep_order: preserve the original order or allow sorting - """ - # Ensure that `columns` is well-formed. - if isinstance(columns, pd.Index): - columns = columns.to_list() - hdbg.dassert_isinstance(columns, list) - hdbg.dassert_lte(1, len(columns)) - # - if column_set is None: - # Columns were not specified, thus use the list of all the columns. - column_set = columns - else: - if isinstance(column_set, str): - column_set = [column_set] - hdbg.dassert_isinstance(column_set, list) - hdbg.dassert_lte(1, len(column_set)) - hdbg.dassert_is_subset(column_set, columns) - if keep_order: - # Keep the selected columns in the same order as in the original - # `columns`. - column_set = [c for c in columns if c in column_set] - return column_set - - -def _get_unique_elements_in_column(df: pd.DataFrame, col_name: str) -> List[Any]: - """ - Get unique elements in a column, handling unhashable types. - - :param df: dataframe containing the column - :param col_name: name of the column to get unique elements from - :return: list of unique elements - """ - try: - vals = df[col_name].unique() - except TypeError: - # TypeError: unhashable type: 'list' - _LOG.error("Column '%s' has unhashable types", col_name) - vals = list(set(map(str, df[col_name]))) - cast(List[Any], vals) - return vals - - -def _get_variable_cols( - df: pd.DataFrame, threshold: int = 1 -) -> Tuple[List[str], List[str]]: - """ - Return columns of a df that contain less than unique values. - - :return: (variable columns, constant columns) - """ - var_cols = [] - const_cols = [] - for col_name in df.columns: - unique_elems = _get_unique_elements_in_column(df, col_name) - num_unique_elems = len(unique_elems) - if num_unique_elems <= threshold: - const_cols.append(col_name) - else: - var_cols.append(col_name) - return var_cols, const_cols - - -def remove_columns_with_low_variability( - df: pd.DataFrame, threshold: int = 1, log_level: int = logging.DEBUG -) -> pd.DataFrame: - """ - Remove columns of a df that contain less than unique values. - - :return: df with only columns with sufficient variability - """ - var_cols, const_cols = _get_variable_cols(df, threshold=threshold) - _LOG.log(log_level, "# Constant cols") - for col_name in const_cols: - unique_elems = _get_unique_elements_in_column(df, col_name) - _LOG.log( - log_level, - " %s: %s", - col_name, - hprint.list_to_str(list(map(str, unique_elems))), - ) - _LOG.log(log_level, "# Var cols") - _LOG.log(log_level, hprint.list_to_str(var_cols)) - return df[var_cols] - - -# Start copy-paste From helpers/hpandas_transform.py - - -def add_pct( - df: pd.DataFrame, - col_name: str, - total: int, - dst_col_name: str, - num_digits: int = 2, - use_thousands_separator: bool = True, -) -> pd.DataFrame: - """ - Add to df a column "dst_col_name" storing the percentage of values in - column "col_name" with respect to "total". The rest of the parameters are - the same as hprint.round_digits(). - - :return: updated df - """ - # Add column with percentage right after col_name. - pos_col_name = df.columns.tolist().index(col_name) - df.insert(pos_col_name + 1, dst_col_name, (100.0 * df[col_name]) / total) - # Format. - df[col_name] = [ - hprint.round_digits( - v, num_digits=None, use_thousands_separator=use_thousands_separator - ) - for v in df[col_name] - ] - df[dst_col_name] = [ - hprint.round_digits( - v, num_digits=num_digits, use_thousands_separator=False - ) - for v in df[dst_col_name] - ] - return df - - -# End copy-paste. - - -def print_column_variability( - df: pd.DataFrame, - max_num_vals: int = 3, - num_digits: int = 2, - use_thousands_separator: bool = True, -) -> pd.DataFrame: - """ - Print statistics about the values in each column of a data frame. - - This is useful to get a sense of which columns are interesting. - """ - print(("# df.columns=%s" % hprint.list_to_str(df.columns))) - res = [] - for c in tauton.tqdm(df.columns, desc="Computing column variability"): - vals = _get_unique_elements_in_column(df, c) - try: - min_val = min(vals) - except TypeError as e: - _LOG.debug("Column='%s' reported %s", c, e) - min_val = "nan" - try: - max_val = max(vals) - except TypeError as e: - _LOG.debug("Column='%s' reported %s", c, e) - max_val = "nan" - if len(vals) <= max_num_vals: - txt = ", ".join(map(str, vals)) - else: - txt = ", ".join(map(str, [min_val, "...", max_val])) - row = ["%20s" % c, len(vals), txt] - res.append(row) - res = pd.DataFrame(res, columns=["col_name", "num", "elems"]) - res.sort_values("num", inplace=True) - # TODO(gp): Fix this. - # res = add_count_as_idx(res) - res = add_pct( - res, - "num", - df.shape[0], - "[diff %]", - num_digits=num_digits, - use_thousands_separator=use_thousands_separator, - ) - res.reset_index(drop=True, inplace=True) - return res - - -def breakdown_table( - df: pd.DataFrame, - col_name: str, - num_digits: int = 2, - use_thousands_separator: bool = True, - verbosity: bool = False, -) -> pd.DataFrame: - """ - Create a breakdown table showing value counts and percentages for a column. - - :param df: dataframe to analyze - :param col_name: column name to create breakdown for - :param num_digits: number of decimal digits for percentages - :param use_thousands_separator: whether to use thousands separator - in counts - :param verbosity: whether to print additional details - :return: breakdown table with counts and percentages - """ - if isinstance(col_name, list): - for c in col_name: - print(("\n" + hprint.frame(c).rstrip("\n"))) - res = breakdown_table(df, c) - print(res) - return None - # - if verbosity: - print(("# col_name=%s" % col_name)) - first_col_name = df.columns[0] - res = df.groupby(col_name)[first_col_name].count() - res = pd.DataFrame(res) - res.columns = ["count"] - res.sort_values(["count"], ascending=False, inplace=True) - res = pd.concat( - [res, pd.DataFrame([df.shape[0]], index=["Total"], columns=["count"])] - ) - res["pct"] = (100.0 * res["count"]) / df.shape[0] - # Format. - res["count"] = [ - hprint.round_digits( - v, num_digits=None, use_thousands_separator=use_thousands_separator - ) - for v in res["count"] - ] - res["pct"] = [ - hprint.round_digits( - v, num_digits=num_digits, use_thousands_separator=False - ) - for v in res["pct"] - ] - if verbosity: - for k, df_tmp in df.groupby(col_name): - print((hprint.frame("%s=%s" % (col_name, k)))) - cols = [col_name, "description"] - with pd.option_context( - "display.max_colwidth", 100000, "display.width", 130 - ): - print((df_tmp[cols])) - return res diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py deleted file mode 100644 index 319c6cf44..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparquet.py +++ /dev/null @@ -1,1309 +0,0 @@ -""" -Import as: - -import helpers.hparquet as hparque -""" - -import collections -import datetime -import glob -import logging -import os -from typing import Any, Callable, Iterator, List, Optional, Tuple, Union - -import numpy as np -import pandas as pd -import pyarrow as pa -import pyarrow.dataset as ds -import pyarrow.fs as pafs -import pyarrow.parquet as pq - -# Check if S3FileSystem is available in `pyarrow.fs`. -if hasattr(pafs, "S3FileSystem"): - S3FileSystemAvailable = True - PyArrowS3FileSystem = pafs.S3FileSystem -else: - S3FileSystemAvailable = False - - # Define a dummy class for type hints when S3FileSystem is not available. - class PyArrowS3FileSystem: - def __init__(self, *args, **kwargs): - raise ImportError( - "S3FileSystem is not available in this version of pyarrow.fs" - ) - - -from tqdm.autonotebook import tqdm - -import helpers.hdataframe as hdatafr -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hpandas as hpandas -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# ParquetDataFrameGenerator -# ############################################################################# - - -class ParquetDataFrameGenerator: - # Allowed types. - OUTPUT_TYPES = ("basic", "verbose_open", "cm_task_1103") - - @staticmethod - def _wrap_all_assets_df(df: List[pd.DataFrame]) -> pd.DataFrame: - # Create a single dataframe for all the assets. - df = pd.concat(df) - _LOG.debug(hpandas.df_to_str(df, print_shape_info=True, tag="df")) - return df - - def _get_core_dataframes(self) -> List[pd.DataFrame]: - """ - Create core dataframes that are updated according to the output type. - - :return: list of core dataframes for specified assets with string values - Example: - - ``` - asset - 2000-01-01 A - 2000-01-02 A - 2000-01-03 A - ``` - """ - # Generate core dataframe for each asset. - df = [] - for asset in self._assets: - asset_df = pd.DataFrame( - {self._asset_col_name: asset}, - index=self._dataframe_index, - ) - _LOG.debug( - hpandas.df_to_str( - asset_df, print_shape_info=True, tag="asset_df" - ) - ) - df.append(asset_df) - return df - - def _get_daily_basic_dataframe(self) -> pd.DataFrame: - """ - Update core dataframes with additional columns. - - :return: updated core dataframe as presented below - Example: - - ``` - idx asset val1 val2 - 2000-01-01 0 A 00 00 - 2000-01-02 0 A 01 01 - 2000-01-03 0 A 02 02 - ``` - """ - asset_dataframes = self._get_core_dataframes() - for idx, asset_dataframe in enumerate(asset_dataframes): - # Positioned left from `asset` column. - asset_dataframe.insert(loc=0, column="idx", value=idx) - # Positioned right from `asset` column. - asset_dataframe.insert( - loc=2, - column="val1", - value=list(range(len(self._dataframe_index))), - ) - asset_dataframe.insert( - loc=3, - column="val2", - value=list(range(len(self._dataframe_index))), - ) - return self._wrap_all_assets_df(asset_dataframes) - - def _get_verbose_open_dataframe(self) -> pd.DataFrame: - """ - Update core dataframes with additional columns. - - :return: update core dataframe as presented below - Example: - - ``` - vendor_date interval start_time end_time ticker currency open id - 2021-11-24 60 1637762400 1637762460 A USD 100 1 - 2021-11-24 60 1637762400 1637762460 A USD 200 2 - ``` - """ - interval = self._dataframe_index[1] - self._dataframe_index[0] - interval = interval.seconds - asset_dataframes = self._get_core_dataframes() - for id_, asset_dataframe in enumerate(asset_dataframes): - start_time = ( - asset_dataframe.index - pd.Timestamp("1970-01-01") - ) // pd.Timedelta("1s") - end_time = start_time + interval - # Positioned left from `ticker` column. - asset_dataframe.insert( - loc=0, - column="vendor_date", - value=asset_dataframe.index.date.astype(str), - ) - asset_dataframe.insert(loc=1, column="interval", value=interval) - asset_dataframe.insert(loc=2, column="start_time", value=start_time) - asset_dataframe.insert(loc=3, column="end_time", value=end_time) - # Positioned right from `ticker` column. - asset_dataframe.insert(loc=5, column="currency", value="USD") - asset_dataframe.insert( - loc=6, - column="open", - value=list(range(len(self._dataframe_index))), - ) - asset_dataframe.insert(loc=7, column="id", value=id_) - return self._wrap_all_assets_df(asset_dataframes) - - # TODO(Dan): CmTask1490. - def _get_cm_task_1103_dataframe(self) -> pd.DataFrame: - """ - Update core dataframes with additional columns. - - :return: updated core dataframe as presented below - Example: - - ``` - full_symbol close - 2000-01-01 10689 100 - 2000-01-02 10689 200 - 2000-01-03 10689 300 - ``` - """ - asset_dataframes = self._get_core_dataframes() - for asset_dataframe in asset_dataframes: - # Positioned right from asset column. - asset_dataframe.insert( - loc=1, - column="close", - value=list(range(len(self._dataframe_index))), - ) - return self._wrap_all_assets_df(asset_dataframes) - - def __init__( - self, - start_date: str, - end_date: str, - output_type: str, - assets: List[Union[str, int]], - asset_col_name: str, - freq: str, - ) -> None: - """ - Constructor. - - :param start_date: start of date range including start_date - :param end_date: end of date range excluding end_date - :param output_type: type of data that is generated - :param assets: list of desired assets that can be names or ids - :param asset_col_name: name of the column that stores assets - :param freq: frequency of steps between start and end date - """ - self._start_date = start_date - self._end_date = end_date - self._output_type = output_type - self._assets = assets - self._asset_col_name = asset_col_name - self._freq = freq - self._dataframe_index = pd.date_range( - self._start_date, - self._end_date, - freq=self._freq, - inclusive="left", - tz="UTC", - ) - self._OUTPUT_TYPE_FUNCTION_MAP = { - "basic": self._get_daily_basic_dataframe, - "verbose_open": self._get_verbose_open_dataframe, - "cm_task_1103": self._get_cm_task_1103_dataframe, - } - - @property - def output_type_function(self) -> Callable: - """ - Return proper function for data generation depending on output type. - """ - return self._OUTPUT_TYPE_FUNCTION_MAP[self._output_type] - - def generate(self) -> pd.DataFrame: - """ - Generate specific dataframe based on inputs provided in instance - creation. - """ - if self._output_type not in self.OUTPUT_TYPES: - raise ValueError(f"Unsupported data type `{self._output_type}`!") - return self.output_type_function() - - -def add_date_partition_columns( - df: pd.DataFrame, partition_mode: str -) -> Tuple[pd.DataFrame, List[str]]: - """ - Add partition columns like year, month, day from datetime index. - - :param df: dataframe indexed by timestamp - :param partition_mode: - - "by_date": extract the date from the index - - E.g., an index like `2022-01-10 14:00:00+00:00` is transform to a - column `20220110` - - "by_year_month_day": split the index in year, month, day columns - - "by_year_month": split by year and month - - "by_year_week": split by year and week of the year - - "by_year": split by year - :return: - - df with additional partitioning columns - - list of partitioning columns - """ - with htimer.TimedScope(logging.DEBUG, "# add_date_partition_cols"): - if partition_mode == "by_date": - df["date"] = df.index.strftime("%Y%m%d") - partition_columns = ["date"] - else: - if partition_mode == "by_year_month_day": - partition_columns = ["year", "month", "day"] - elif partition_mode == "by_year_month": - partition_columns = ["year", "month"] - elif partition_mode == "by_year_week": - partition_columns = ["year", "weekofyear"] - elif partition_mode == "by_year": - partition_columns = ["year"] - elif partition_mode == "by_month": - partition_columns = ["month"] - else: - raise ValueError(f"Invalid partition_mode='{partition_mode}'") - # Add date columns chosen by partition mode. - for column_name in partition_columns: - # Extract data corresponding to `column_name` (e.g., - # `df.index.year`). - if column_name == "weekofyear": - # The `weekofyear` attribute has been deprecated in Pandas - # 2.1.0, so weeks are extracted using a function instead of - # the attribute name. - df["weekofyear"] = df.index.isocalendar().week - else: - df[column_name] = getattr(df.index, column_name) - return df, partition_columns - - -def to_partitioned_parquet( - df: pd.DataFrame, - partition_columns: List[str], - dst_dir: str, - *, - aws_profile: hs3.AwsProfile = None, - basename_template: str = None, -) -> None: - """ - Save the given dataframe as Parquet file partitioned along the given - columns. - - :param df: dataframe - :param partition_columns: partitioning columns - :param dst_dir: location of partitioned dataset - :param aws_profile: the name of an AWS profile or a s3fs filesystem - - E.g., in case of partition using `date`, the file layout looks like: - ``` - dst_dir/ - date=20211230/ - data.parquet - date=20211231/ - data.parquet - date=20220101/ - data.parquet - ``` - - In case of multiple columns like `asset`, `year`, `month`, the file layout - looks like: - ``` - dst_dir/ - asset=A/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ... - asset=B/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ``` - """ - # Use either S3 or local filesystem. - filesystem = None - if aws_profile is not None: - filesystem = hs3.get_s3fs(aws_profile) - # ParquetDataset appends an extra "/", creating an empty-named folder - # when saving on S3. - dst_dir = dst_dir.rstrip("/") - with htimer.TimedScope(logging.DEBUG, "# partition_dataset"): - # Read. - table = pa.Table.from_pandas(df) - # Write using partition. - # TODO(gp): add this logic to hparquet.to_parquet as a possible option. - _LOG.debug(hprint.to_str("partition_columns dst_dir")) - hdbg.dassert_is_subset(partition_columns, df.columns) - # TODO(gp): We would like to avoid overriding existing tiles. It's not clear - # how to do it. Either setting permissions to read-only before writing. - # Or having a list of files that will be written and ensure that none of - # those files already existing. - pq.write_to_dataset( - table, - dst_dir, - partition_cols=partition_columns, - filesystem=filesystem, - basename_template=basename_template, - ) - - -def generate_parquet_files( - start_date: str, - end_date: str, - assets: List[Union[str, int]], - asset_col_name: str, - dst_dir: str, - *, - freq: str = "1H", - output_type: str = "basic", - partition_mode: str = "by_date", - custom_partition_cols: Optional[str] = None, - reset_index: bool = False, -) -> None: - """ - Generate parquet files for testing. - - :param start_date: date from which the data is generated, value - included - :param end_date: date until which the data is generated, value - excluded - :param assets: list of assets that can be either names or ids - :param asset_col_name: name of the column that stores assets - :param dst_dir: destination dir for generated data - :param freq: frequency of data generation - :param output_type: type of data that is generated - :param partition_mode: Partition mode for parquet DataFrame, default - by date - :param custom_partition_cols: overrides default partition by time - :param reset_index: reset dataframe index to default sequential - integer values - """ - # Generate timespan. - hdbg.dassert_lt(start_date, end_date) - timespan = pd.date_range(start_date, end_date) - hdbg.dassert_lt(2, len(timespan)) - # Run dataframe generation. - pdg = ParquetDataFrameGenerator( - start_date, end_date, output_type, assets, asset_col_name, freq - ) - parquet_df = pdg.generate() - # Add partition columns to the dataframe. - df, partition_cols = add_date_partition_columns(parquet_df, partition_mode) - if custom_partition_cols: - # If custom partition is provided, it will override date partition. - # Sample: `["asset", "year", "month"]` - custom_partition_cols = custom_partition_cols.split(",") - # Ensure that date partition columns are present. - hdbg.dassert_is_subset(partition_cols, custom_partition_cols) - partition_cols = custom_partition_cols - # Partition and write dataset. - if reset_index: - df = df.reset_index(drop=True) - # TODO(Nikola): When direct run is possible, expose usage of `aws_profile` - # so generator can be used in conjunction with `helpers.hmoto.S3Mock_TestCase`. - # Will probably be part of CMTask #1490. - to_partitioned_parquet(df, partition_cols, dst_dir) - - -def get_pyarrow_s3fs(*args: Any, **kwargs: Any) -> PyArrowS3FileSystem: - """ - Return an Pyarrow S3Fs object from a given AWS profile. - - Same as `hs3.get_s3fs`, used specifically for accessing Parquet - datasets. - """ - # Check if S3FileSystem is available - hdbg.dassert( - S3FileSystemAvailable, - "S3FileSystem is not available in this version of pyarrow.fs", - ) - # When deploying jobs via ECS the container obtains credentials based on passed - # task role specified in the ECS task-definition, refer to: - # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html - if hserver.is_inside_ecs_container(): - _LOG.info("Fetching credentials from task IAM role") - s3fs_ = PyArrowS3FileSystem() - else: - aws_credentials = hs3.get_aws_credentials(*args, **kwargs) - s3fs_ = PyArrowS3FileSystem( - access_key=aws_credentials["aws_access_key_id"], - secret_key=aws_credentials["aws_secret_access_key"], - session_token=aws_credentials["aws_session_token"], - region=aws_credentials["aws_region"], - ) - return s3fs_ - - -def _get_parquet_tiles_from_file_path(file_path: str) -> List[Tuple[str, Any]]: - """ - Hacky function to help get tile values from parquet file path. - - Used by from_parquet when loading first n rows of a dataset only. - - Example - input: ...ccxt/binance/v1_0_0/currency_pair=CTK_USDT/ - year=2023/month=3/26dc59f62b87403d9a3e9f04c7c21382-0.parquet - output: [("currency_pair", "CTK_USDT"), ("year", 2023), ("month", 3)] - """ - path_parts = file_path.split("/") - tiles = [] - for part in path_parts: - if "=" in part: - col, value = part.split("=") - value = int(value) if value.isdigit() else value - tiles.append((col, value)) - return tiles - - -# TODO(Dan): Add mode to allow querying even when some non-existing columns are passed. -def from_parquet( - file_name: str, - *, - columns: Optional[List[str]] = None, - filters: Optional[List[Any]] = None, - n_rows: Optional[int] = None, - schema: Optional[List[Tuple[str, pa.DataType]]] = None, - log_level: int = logging.DEBUG, - report_stats: bool = False, - aws_profile: hs3.AwsProfile = None, -) -> pd.DataFrame: - """ - Load a dataframe from a Parquet file. - - The difference with `pd.read_pq` is that here we use Parquet - Dataset. - - :param file_name: path to a Parquet dataset - :param columns: columns to return, skipping reading columns that are not requested - - `None` means return all available columns - :param filters: Parquet query - :param n_rows: the number of rows to load, load all data if `None` - :param schema: see `pyarrow.Schema`, e.g., `schema = - [("int_col", pa.int32()), ("str_col", pa.string())]` - :param log_level: logging level to execute at - :param report_stats: whether to report Parquet file size or not - :param aws_profile: AWS profile to use if and only if using an S3 path, - otherwise `None` for local path - :return: data from Parquet dataset - """ - _LOG.debug(hprint.to_str("file_name columns filters schema")) - hdbg.dassert_isinstance(file_name, str) - hs3.dassert_is_valid_aws_profile(file_name, aws_profile) - if hs3.is_s3_path(file_name): - if isinstance(aws_profile, str): - filesystem = get_pyarrow_s3fs(aws_profile) - else: - # Note: `s3fs` filesystem is only to be used on exact file path - # as `pq.ParquetDataset` is not properly handling directory path. - filesystem = aws_profile - # Pyarrow S3FileSystem does not have `exists` method. - s3_filesystem = hs3.get_s3fs(aws_profile) - hs3.dassert_path_exists(file_name, s3_filesystem) - file_name = file_name.lstrip("s3://") - else: - filesystem = None - hdbg.dassert_path_exists(file_name) - # Load data. - with htimer.TimedScope( - logging.DEBUG, f"# Reading Parquet file '{file_name}'" - ) as ts: - if n_rows: - # Get the latest parquet file in the directory. - hdbg.dassert_isinstance( - aws_profile, - str, - "aws_profile must be a string for S3 operations", - ) - last_pq_file = hs3.get_latest_pq_in_s3_dir(file_name, aws_profile) - file = s3_filesystem.open(last_pq_file, "rb") - # Load the data. - parquet_file = pq.ParquetFile(file) - # Get the head of the data. - df = ( - parquet_file.read_row_group(0, columns=parquet_file.schema.names) - .to_pandas() - .head(n_rows) - ) - if columns: - # Note: `schema.names` also includes and index. - hdbg.dassert_is_subset(columns, parquet_file.schema.names) - df = df[columns] - # Hacky way to append tile values lost when obtaining particular .pq file. - tiles = _get_parquet_tiles_from_file_path(last_pq_file) - for col, value in tiles: - df[col] = value - else: - if schema is not None: - # Pass partition columns types explicitly. - schema = pa.schema(schema) - partitioning = ds.partitioning(schema, flavor="hive") - dataset = pq.ParquetDataset( - # Replace URI with path. - file_name, - filesystem=filesystem, - filters=filters, - partitioning=partitioning, - ) - if columns: - # Note: `schema.names` also includes and index. - hdbg.dassert_is_subset(columns, dataset.schema.names) - # To read also the index we need to use `read_pandas()`, instead of - # `read_table()`. - # See https://arrow.apache.org/docs/python/parquet.html#reading-and-writing-single-files. - table = dataset.read_pandas(columns=columns) - # Convert the Pandas Dataframe timestamp columns and index to `ns` - # resolution. The general approach is to preserve the time unit - # information after reading data back from Parquet files. - # Currently, it's challenging to resolve this issue since Parquet - # data is mixed with data from CSV files, which convert the time - # unit to `ns` by default. Refer to CmampTask7331 for details. - # https://github.com/cryptokaizen/cmamp/issues/7331 - df = table.to_pandas(coerce_temporal_nanoseconds=True) - if isinstance(df.index, pd.DatetimeIndex): - df.index = df.index.as_unit("ns") - # Report stats about the df. - _LOG.debug("df.shape=%s", str(df.shape)) - mem = df.memory_usage().sum() - _LOG.debug("df.memory_usage=%s", hintros.format_size(mem)) - # Report stats about the Parquet file size. - if report_stats: - file_size = hs3.du(file_name, human_format=True, aws_profile=aws_profile) - _LOG.log( - log_level, - "Loaded '%s' (size=%s, time=%.1fs)", - file_name, - file_size, - ts.elapsed_time, - ) - return df - - -# Copied from `hio.create_enclosing_dir()` to avoid circular dependencies. -def _create_enclosing_dir(file_name: str) -> Optional[str]: - dir_name = os.path.dirname(file_name) - if dir_name != "": - _LOG.debug( - "Creating dir_name='%s' for file_name='%s'", dir_name, file_name - ) - hdbg.dassert_is_not(dir_name, None) - dir_name = os.path.normpath(dir_name) - if os.path.normpath(dir_name) == ".": - _LOG.debug("Can't create dir '%s'", dir_name) - if os.path.exists(dir_name): - # The dir exists and we want to keep it, so we are done. - _LOG.debug("The dir '%s' exists: exiting", dir_name) - return None - _LOG.debug("Creating directory '%s'", dir_name) - try: - os.makedirs(dir_name) - except OSError as e: - _LOG.error(str(e)) - # It can happen that we try to create the directory while somebody else - # created it, so we neutralize the corresponding exception. - if e.errno == 17: - # OSError: [Errno 17] File exists. - pass - else: - raise e - hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) - return dir_name - - -def to_parquet( - df: pd.DataFrame, - file_name: str, - *, - log_level: int = logging.DEBUG, - report_stats: bool = False, - aws_profile: hs3.AwsProfile = None, -) -> None: - """ - Save a dataframe as Parquet. - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_isinstance(file_name, str) - hs3.dassert_is_valid_aws_profile(file_name, aws_profile) - if hs3.is_s3_path(file_name): - filesystem = hs3.get_s3fs(aws_profile) - hs3.dassert_path_not_exists(file_name, filesystem) - file_name = file_name.lstrip("s3://") - else: - filesystem = None - hdbg.dassert_path_not_exists(file_name) - hdbg.dassert_file_extension(file_name, ["parquet", "pq"]) - # There is no concept of directory on S3. - # Only applicable to local filesystem. - if aws_profile is None: - _create_enclosing_dir(file_name) - # Report stats about the df. - _LOG.debug("df.shape=%s", str(df.shape)) - mem = df.memory_usage().sum() - _LOG.debug("df.memory_usage=%s", hintros.format_size(mem)) - # Save data. - with htimer.TimedScope( - logging.DEBUG, f"# Writing Parquet file '{file_name}'" - ) as ts: - table = pa.Table.from_pandas(df) - # This is needed to handle: - # ``` - # pyarrow.lib.ArrowInvalid: Casting from timestamp[ns, tz=America/New_York] - # to timestamp[us] would lose data: 1663595160000000030 - # ``` - # No need to cast to `us` since pyarrow >= 15.0.0. - # See - # https://github.com/cryptokaizen/cmamp/blob/master/docs/infra/all.parquet.explanation.md#time-unit-conversion-when-writing-to-parquet - # for details. - # parquet_args = { - # "coerce_timestamps": "us", - # "allow_truncated_timestamps": True, - # } - # pq.write_table(table, file_name, filesystem=filesystem, **parquet_args) - pq.write_table(table, file_name, filesystem=filesystem) - # Report stats about the Parquet file size. - if report_stats: - file_size = hs3.du(file_name, human_format=True, aws_profile=aws_profile) - _LOG.log( - log_level, - "Saved '%s' (size=%s, time=%.1fs)", - file_name, - file_size, - ts.elapsed_time, - ) - - -# ############################################################################# - - -def _yield_parquet_tile( - file_name: str, - columns: Optional[List[str]], - filters: List[Any], - asset_id_col: str, -) -> Iterator[pd.DataFrame]: - """ - Yield Parquet data in a single tile given the filters. - - It is assumed that data is partitioned by asset_id, year and month, i.e. - the file layout is: - - ``` - file_name/ - asset_id=1032127330/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ... - asset_id=2133227690/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ``` - - :param file_name: see `from_parquet()` - :param columns: see `from_parquet()` - :param filters: see `from_parquet()` - :param asset_id_col: name of the column with asset ids - :return: a generator of `from_parquet()` dataframe - """ - # Without the schema being provided `pyarrow` incorrectly infers - # type of the asset id column, i.e. `pyarrow` reads assets as - # strings instead of integers. See the related discussion at - # `https://issues.apache.org/jira/browse/ARROW-6114`. - int_type = np.int64 - pyarrow_int_type = pa.from_numpy_dtype(int_type) - schema = [ - (asset_id_col, pyarrow_int_type), - # TODO(Grisha): consider passing year and month column names as params. - ("year", pyarrow_int_type), - ("month", pyarrow_int_type), - ] - tile = from_parquet( - file_name, - columns=columns, - filters=filters, - schema=schema, - ) - hpandas.dassert_series_type_is(tile[asset_id_col], int_type) - yield tile - - -def build_asset_id_filter( - asset_ids: List[int], - asset_id_col: str, -) -> List[List[Tuple[str, str, int]]]: - filters = [] - for asset_id in asset_ids: - filters.append([(asset_id_col, "==", asset_id)]) - return filters - - -def build_year_month_filter( - start_date: datetime.date, - end_date: datetime.date, -) -> list: - """ - Use the year/months to build a Parquet filter. - - If `start_date.year == end_date.year`, then return a list of - three tuples (to be "ANDed" together) based on the year and months. - Else, return a list of list of tuples: - - the inner lists consist of AND filters; the inner lists are ORed - together if used as a single filter - - each inner list filter represents a calendar year or part thereof - - One use case of this function is to generate a filter whose OR - components can be processed one-by-one. For example, if memory constraints - prevent loading an entire tile at once, then one could instead attempt to - load one-year tiles one at a time. - - NOTE: `start_date.day` and `end_date.day` are ignored. - - TODO(Paul): Consider adding a switch to support smaller AND filter chunks - (e.g., at monthly instead of yearly granularity). - """ - hdbg.dassert_isinstance(start_date, datetime.date) - hdbg.dassert_isinstance(end_date, datetime.date) - hdbg.dassert_lte(start_date, end_date) - start_year = start_date.year - end_year = end_date.year - filter_ = [] - # - if start_year == end_year: - filter_.append(("year", "==", start_year)) - filter_.append(("month", ">=", start_date.month)) - filter_.append(("month", "<=", end_date.month)) - else: - start_year_filter = [] - start_year_filter.append(("year", "==", start_year)) - start_year_filter.append(("month", ">=", start_date.month)) - end_year_filter = [] - end_year_filter.append(("year", "==", end_year)) - end_year_filter.append(("month", "<=", end_date.month)) - filter_.append(start_year_filter) - filter_.append(end_year_filter) - for year in range(start_year + 1, end_year): - year_filter = [] - year_filter.append(("year", "==", year)) - filter_.append(year_filter) - return filter_ - - -def yield_parquet_tiles_by_year( - file_name: str, - start_date: datetime.date, - end_date: datetime.date, - cols: List[Union[int, str]], - *, - asset_ids: Optional[List[int]] = None, - asset_id_col: str = "asset_id", -) -> Iterator[pd.DataFrame]: - """ - Yield Parquet data in tiles up to one year in length. - - :param file_name: as in `from_parquet()` - :param start_date: first date to load; day is ignored - :param end_date: last date to load; day is ignored - :param cols: if an `int` is supplied, it is cast to a string before reading - :param asset_ids: asset ids to load - :param asset_id_col: see `_yield_parquet_tile()` - :return: a generator of `from_parquet()` dataframes - """ - time_filters = build_year_month_filter(start_date, end_date) - hdbg.dassert_isinstance(time_filters, list) - # The list should not be empty. - hdbg.dassert(time_filters) - if not isinstance(time_filters[0], list): - time_filters = [time_filters] - columns = [str(col) for col in cols] - if asset_ids is None: - asset_ids = [] - asset_id_filter = build_asset_id_filter(asset_ids, asset_id_col) - for time_filter in time_filters: - if asset_id_filter: - combined_filter = [ - id_filter + time_filter for id_filter in asset_id_filter - ] - else: - combined_filter = time_filter - yield from _yield_parquet_tile( - file_name, columns, combined_filter, asset_id_col - ) - - -# TODO(Paul): Add additional time-restriction filter. -def yield_parquet_tiles_by_assets( - file_name: str, - asset_ids: List[int], - asset_id_col: str, - asset_batch_size: int, - cols: Optional[List[Union[int, str]]], -) -> Iterator[pd.DataFrame]: - """ - Yield Parquet data in tiles batched by asset ids. - - :param file_name: as in `from_parquet()` - :param asset_ids: asset ids to load - :param asset_id_col: see `_yield_parquet_tile()` - :param asset_batch_size: the number of asset to load in a single batch - :param cols: if an `int` is supplied, it is cast to a string before reading - :return: a generator of `from_parquet()` dataframes - """ - hdbg.dassert_isinstance(asset_id_col, str) - hdbg.dassert(asset_id_col, "`asset_id_col` must be nonempty") - batches = [ - asset_ids[i : i + asset_batch_size] - for i in range(0, len(asset_ids), asset_batch_size) - ] - columns: Optional[List[str]] = None - if cols: - columns = [str(col) for col in cols] - for batch in tqdm(batches): - _LOG.debug("assets=%s", batch) - filter_ = build_asset_id_filter(batch, asset_id_col) - yield from _yield_parquet_tile(file_name, columns, filter_, asset_id_col) - - -def build_filter_with_only_equalities( - start_timestamp: pd.Timestamp, end_timestamp: pd.Timestamp -) -> list: - """ - Build a list of Parquet filters based on equality conditions for partition - columns. - - This function creates a filter for each partition column (year, month, day) based on the - equality conditions between components of the timestamp arguments when possible. - - Example: - Input args: - start_timestamp: 2022-08-31T00:01:00+00:00 - end-timestamp: 2022-08-31T23:59:59+00:00 - Output: - [("year", "=", 2022), ("month", "=", 8), ("day", "=", 31)] - - These filters enhance performance by allowing to load data quicker when used in tandem with timestamp filters. - Less memory will be used because less `.pq` need to be loaded. - - :param start_timestamp: start of the interval. - :param end_timestamp: end of the interval: - """ - hdbg.dassert_isinstance(start_timestamp, pd.Timestamp) - hdbg.dassert_isinstance(end_timestamp, pd.Timestamp) - filters = [] - if start_timestamp.year == end_timestamp.year: - filters.append(("year", "==", start_timestamp.year)) - if start_timestamp.month == end_timestamp.month: - filters.append(("month", "==", start_timestamp.month)) - if start_timestamp.day == end_timestamp.day: - filters.append(("day", "==", start_timestamp.day)) - return filters - - -# TODO(Paul): The `int` assumption is baked in. We can generalize to strings -# if needed, but if we do, then we should continue to handle string ints as -# ints as we do here (e.g., there are sorting advantages, among others). -def _process_walk_triple( - triple: tuple, start_depth: int -) -> Tuple[Tuple[str, ...], Tuple[int, ...]]: - """ - Process a triple returned by `os.walk()` - - :param triple: (dirpath: str, dirnames: List[str], filenames: List[str]) - :param start_depth: the "depth" of `path` used in the call - `os.walk(path)` - :return: tuple(lhs_vals), tuple(rhs_vals) - """ - lhs_vals: List[str] = [] - rhs_vals: List[int] = [] - # If there are subdirectories, do not process. - if triple[1]: - return tuple(lhs_vals), tuple(rhs_vals) - depth = len(triple[0].split("/")) - rel_depth = depth - start_depth - key = tuple(triple[0].split("/")[start_depth:]) - if len(key) == 0: - return tuple(lhs_vals), tuple(rhs_vals) - hdbg.dassert_eq(len(key), rel_depth) - lhs_vals = [] - rhs_vals = [] - for string in key: - lhs, rhs = string.split("=") - lhs_vals.append(lhs) - rhs_vals.append(int(rhs)) - hdbg.dassert_eq(len(lhs_vals), len(rhs_vals)) - return tuple(lhs_vals), tuple(rhs_vals) - - -def collate_parquet_tile_metadata( - path: str, -) -> pd.DataFrame: - """ - Report stats in a dataframe on Parquet file partitions. - - The directories should be of the form `lhs=rhs` where "rhs" is a string - representation of an `int`. - - :param path: path to top-level Parquet directory - :return: dataframe with two file size columns and a multiindex reflecting - the Parquet path structure. - """ - hdbg.dassert_dir_exists(path) - # Remove the trailing slash to simplify downstream accounting. - if path.endswith("/"): - path = path[:-1] - hdbg.dassert(not path.endswith("/")) - # Walk the path. - # os.walk() yields a 3-tuple of the form - # (dirpath: str, dirnames: List[str], filenames: List[str]) - start_depth = len(path.split("/")) - headers_set = set() - dict_ = collections.OrderedDict() - for triple in os.walk(path): - # If the walk has taken us to, e.g., - # asset_id=100/year=2010/month=1/data.parquet - # then we expect - # lhs = ("asset_id", "year", "month") - # rhs = (100, 2010, 1) - lhs, rhs = _process_walk_triple(triple, start_depth) - # If the walkabout has not yet taken us to a file, continue. - if not lhs: - continue - # The tuple `lhs` is to become the index headers. We check later - # for uniqueness. - headers_set.add(lhs) - # Get the file name and full path. - file_name = triple[2][0] - file_path = os.path.join(triple[0], file_name) - # Record the size of the file. We keep this in bytes for easy - # join aggregations. - size_in_bytes = os.path.getsize(file_path) - dict_[rhs] = size_in_bytes - # Ensure that headers are unambiguous. - hdbg.dassert_eq(len(headers_set), 1) - # Convert to a multiindexed dataframe. - df = pd.DataFrame(dict_.values(), index=dict_.keys()) - df.rename(columns={0: "file_size_in_bytes"}, inplace=True) - headers = headers_set.pop() - df.index.names = headers - df.sort_index(inplace=True) - # Add a more human-readable file size column. Keep the original numerical - # one for downstream aggregations. - file_size = df["file_size_in_bytes"].apply(hintros.format_size) - df["file_size"] = file_size - return df - - -# ############################################################################# - -# A Parquet filtering condition. e.g., `("year", "=", year)` -ParquetFilter = Tuple[str, str, Any] -# The AND of Parquet filtering conditions, e.g., -# `[("year", "=", year), ("month", "=", month)]` -ParquetAndFilter = List[ParquetFilter] -# A OR-AND Parquet filtering condition, e.g., -# ``` -# [[('year', '=', 2020), ('month', '=', 1)], -# [('year', '=', 2020), ('month', '=', 2)], -# [('year', '=', 2020), ('month', '=', 3)]] -# ``` -ParquetOrAndFilter = List[ParquetAndFilter] - - -# TODO(gp): @Nikola add light unit tests for `by_year_week` and for additional_filter. -# TODO(gp): Can we return a single type? -def get_parquet_filters_from_timestamp_interval( - partition_mode: str, - start_timestamp: Optional[pd.Timestamp], - end_timestamp: Optional[pd.Timestamp], - *, - additional_filters: Optional[List[ParquetFilter]] = None, -) -> Union[ParquetOrAndFilter, ParquetAndFilter]: - """ - Convert a constraint on a timestamp [start_timestamp, end_timestamp] into a - Parquet filters expression, based on the passed partitioning / tiling - criteria. - - :param partition_mode: control filtering of Parquet datasets. It needs to be - in sync with the way the data was saved - :param start_timestamp: start of the interval. `None` means no bound - :param end_timestamp: end of the interval. `None` means no bound - :param additional_filters: AND conditions to add to the final filter. - E.g., if we want to constraint also on `exchange_id` and 'currency_pair`, - we can specify - `[("exchange_id", "in", (...)),("currency_pair", "in", (...))]` - :return: list of OR-AND predicates - """ - # Check timestamp interval. - left_close = True - right_close = True - hdateti.dassert_is_valid_interval( - start_timestamp, - end_timestamp, - left_close=left_close, - right_close=right_close, - ) - or_and_filter = [] - if partition_mode == "by_year_month": - # Handle the first and last year of the interval. - if start_timestamp: - # `[('year', '==', 2020), ('month', '>=', 6)]` - and_filter = [ - ("year", "==", start_timestamp.year), - ("month", ">=", start_timestamp.month), - ] - or_and_filter.append(and_filter) - if end_timestamp: - # `[('year', '==', 2021), ('month', '<=', 3)]` - and_filter = [ - ("year", "==", end_timestamp.year), - ("month", "<=", end_timestamp.month), - ] - or_and_filter.append(and_filter) - if start_timestamp and end_timestamp: - number_of_years = len( - range(start_timestamp.year, end_timestamp.year + 1) - ) - if number_of_years == 1: - # For a one-year range, we overwrite the result with a single AND - # statement, e.g., `[Jan 2020, Mar 2020]` corresponds to - # `[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 3)]]`. - # Note that this interval is different from and OR-AND form as - # `[[('year', '==', 2020), ('month', '>=', 1)], - # [('year', '==', 2020), ('month', '<=', 3)]]` - # since the first AND clause include months <= 3 and the second one - # include months >= 1, and the OR corresponds to the entire year, - # instead of the interval `[Jan 2020, Mar 2020]`. - and_filter = [ - ("year", "==", start_timestamp.year), - ("month", ">=", start_timestamp.month), - ("month", "<=", end_timestamp.month), - ] - or_and_filter = [and_filter] - elif number_of_years > 2: - # For ranges over two years, one OR statement is necessary to bridge - # the gap between first and last AND statement. - # `[('year', '>', 2020), ('year', '<', 2023)]` - # Inserted in middle as bridge between AND statements. - and_filter = [ - ("year", ">", start_timestamp.year), - ("year", "<", end_timestamp.year), - ] - or_and_filter.insert(1, and_filter) - else: - # For intervals of exactly two years the two AND conditions are - # enough to select the desired period of time. - pass - elif len(or_and_filter) == 1: - # Handle the case when exactly one of the interval bounds is passed, - # e.g., [June 2020, None]. - # In this case the first year was covered by the code above (i.e., - # `year >= 2020 and month == 6`) and we need to specify the rest of - # the years (i.e., `year > 2020`). - operator = ">" if start_timestamp else "<" - timestamp = start_timestamp if start_timestamp else end_timestamp - hdbg.dassert_is_not(timestamp, None, "timestamp should not be None") - extra_filter = [("year", operator, timestamp.year)] - or_and_filter.append(extra_filter) - else: - # If there is no interval provided, leave empty `or_and_filter` as is. - pass - elif partition_mode == "by_year_week": - # TODO(gp): Consider using the same approach above for months also here. - # Partition by year and week. - hdbg.dassert_is_not( - end_timestamp, - None, - "Parquet backend can't determine the boundaries of the data", - ) - # Include last week in the interval. - end_timestamp += pd.DateOffset(weeks=1) - # Get all weeks in the interval. - hdbg.dassert_is_not( - start_timestamp, - None, - "start_timestamp should not be None for by_year_week partition mode", - ) - dates = pd.date_range( - start_timestamp.date(), end_timestamp.date(), freq="W" - ) - for date in dates: - year = date.year - # https://docs.python.org/3/library/datetime.html#datetime.date.isocalendar - weekofyear = date.isocalendar().week - and_filter = [("year", "=", year), ("weekofyear", "=", weekofyear)] - or_and_filter.append(and_filter) - else: - raise ValueError(f"Unknown partition mode `{partition_mode}`!") - if additional_filters: - hdbg.dassert_isinstance(additional_filters, list) - if or_and_filter: - # Append additional filters for every present timestamp filter. - or_and_filter = [ - additional_filters + and_filter for and_filter in or_and_filter - ] - else: - # If no timestamp filters are provided, use additional filters. - or_and_filter = additional_filters - _LOG.debug("or_and_filter=%s", str(or_and_filter)) - if len(or_and_filter) == 0: - # Empty list is not acceptable value for pyarrow dataset. - # Only logical expression or `None`. - or_and_filter = None - return or_and_filter - - -def list_and_merge_pq_files( - root_dir: str, - *, - file_name: str = "data.parquet", - aws_profile: hs3.AwsProfile = None, - drop_duplicates_mode: Optional[str] = None, -) -> None: - """ - Merge all files of the Parquet dataset. - - Can be generalized to any used partition. - - The standard partition (also known as "by-tile") assumed is: - - ``` - root_dir/ - currency_pair=ADA_USDT/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ... - currency_pair=EOS_USDT/ - year=2021/ - month=12/ - data.parquet - year=2022/ - month=01/ - data.parquet - ``` - - :param root_dir: root directory of Parquet dataset - :param file_name: name of the single resulting file - :param aws_profile: the name of an AWS profile or a s3fs filesystem - """ - if aws_profile is not None: - filesystem = hs3.get_s3fs(aws_profile) - else: - filesystem = None - # Get full paths to each Parquet file inside root dir. - if filesystem: - # Use specialized S3 filesystem function to list Parquet files efficiently. - # since glob.glob() is very slow as it does a lot of accesses to S3. - # The extra `**/*` is needed by `pyarrow` >= 17. - parquet_files = filesystem.glob(f"{root_dir}/**/*.parquet") - else: - # For local filesystem, use glob.glob - parquet_files = glob.glob(f"{root_dir}/**/*.parquet", recursive=True) - _LOG.debug("Parquet files: '%s'", parquet_files) - # Get paths only to the lowest level of dataset folders. - dataset_folders = {f.rsplit("/", 1)[0] for f in parquet_files} - for folder in dataset_folders: - # Get files per folder and merge if there are multiple ones. - if filesystem: - # Use specialized S3 filesystem function to list Parquet files efficiently. - folder_files = filesystem.ls(folder) - else: - # For local filesystem, use os.listdir - folder_files = [os.path.join(folder, f) for f in os.listdir(folder)] - hdbg.dassert_ne( - len(folder_files), 0, msg=f"Empty folder `{folder}` detected!" - ) - if len(folder_files) == 1 and folder_files[0].endswith("/data.parquet"): - # If there is already single `data.parquet` file, no action is required. - continue - # Read all files in target folder. - # `partitioning=None` is required to read the dataset without - # partitioning columns. See CmTask7324 for details. - # https://github.com/cryptokaizen/cmamp/issues/7324 - data = pq.ParquetDataset( - folder_files, filesystem=filesystem, partitioning=None - ).read() - data = data.to_pandas() - # Drop duplicates on all non-metadata columns. - # TODO(gp): hparquet is general and we should pass the columns to remove - # or perform the transform after. - if drop_duplicates_mode is None: - duplicate_columns = data.columns.to_list() - for col_name in ["knowledge_timestamp", "end_download_timestamp"]: - if col_name in duplicate_columns: - duplicate_columns.remove(col_name) - control_column = None - elif drop_duplicates_mode == "bid_ask": - # Drop duplicates on timestamp index. - duplicate_columns = ["timestamp", "exchange_id"] - control_column = None - elif drop_duplicates_mode == "ohlcv": - # Drop duplicates on timestamp and keep one with largest volume. - duplicate_columns = ["timestamp", "exchange_id"] - control_column = "volume" - else: - hdbg.dfatal("Supported drop duplicates modes: ohlcv, bid_ask") - data = hdatafr.remove_duplicates(data, duplicate_columns, control_column) - # Remove all old files and write the new, merged one. - if filesystem: - filesystem.rm(folder, recursive=True) - pq.write_table( - pa.Table.from_pandas(data), - folder + "/" + file_name, - filesystem=filesystem, - ) - else: - # Use os.remove for local filesystem to remove files. - for file_path in folder_files: - os.remove(file_path) - data.to_parquet(os.path.join(folder, file_name)) - - -def maybe_cast_to_int(string: str) -> Union[str, int]: - """ - Return `string` as an `int` if convertible, otherwise a no-op. - - This is useful for parsing mixed-type dataframe columns that may - contain strings and ints. For example, a dataframe with columns - `feature1, feature2, 1, 2, 3` will be written and read back with - columns `1`, `2`, `3` as the strings "1", "2", "3" rather than the - ints. This function can be used to rectify that in a post-processing - column rename. - """ - hdbg.dassert_isinstance(string, str) - try: - val = int(string) - except ValueError: - val = string - return val diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py deleted file mode 100644 index bb04164ea..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hparser.py +++ /dev/null @@ -1,1176 +0,0 @@ -""" -Import as: - -import helpers.hparser as hparser -""" - -import argparse -import logging -import os -import sys -from typing import Any, Dict, List, Optional, Tuple, Union - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - -# TODO(gp): arg -> args - - -# ############################################################################# - - -def add_bool_arg( - parser: argparse.ArgumentParser, - name: str, - *, - default_value: bool = False, - help_: Optional[str] = None, -) -> argparse.ArgumentParser: - """ - Add options to a parser like `--xyz` and `--no_xyz`, controlled by - `args.xyz`. - - E.g., `add_bool_arg(parser, "run_diff_script", default_value=True)` adds - two options: - ``` - --run_diff_script Run the diffing script or not - --no_run_diff_script - ``` - corresponding to `args.run_diff_script`, where the default behavior is to have - that value equal to True unless one specifies `--no_run_diff_script`. - """ - group = parser.add_mutually_exclusive_group(required=False) - group.add_argument("--" + name, dest=name, action="store_true", help=help_) - group.add_argument("--no_" + name, dest=name, action="store_false") - parser.set_defaults(**{name: default_value}) - return parser - - -# ############################################################################# - - -def add_verbosity_arg( - parser: argparse.ArgumentParser, *, log_level: str = "INFO" -) -> argparse.ArgumentParser: - parser.add_argument( - "-v", - dest="log_level", - default=log_level, - # TRACE=5 - # DEBUG=10 - # INFO=20 - # WARNING=30 - # CRITICAL=50 - choices=["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], - help="Set the logging level", - ) - parser.add_argument( - "--no_report_command_line", - action="store_true", - help="Disable printing of executed commands", - ) - return parser - - -# TODO(gp): Use this everywhere. -def parse_verbosity_args( - args: argparse.Namespace, *args_: Any, **kwargs: Any -) -> None: - if hasattr(args, "no_report_command_line") and args.no_report_command_line: - report_command_line = False - else: - report_command_line = True - kwargs["report_command_line"] = report_command_line - # if args.log_level == "VERB_DEBUG": - # args.log_level = 5 - hdbg.init_logger(verbosity=args.log_level, *args_, **kwargs) - - -# ############################################################################# -# Command line for `@hcache_simple.simple_cache` functions. -# ############################################################################# - - -# TODO(gp): Use the ones from hcache_simple.py for DRY. -_CACHE_MODE_CHOICES = ("REFRESH_CACHE", "DISABLE_CACHE", "HIT_CACHE_OR_ABORT") - - -def add_cache_control_arg( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add `--cache_mode` switch controlling every - `@hcache_simple.simple_cache`-decorated function in the process. - - The resolved mode is applied globally via - `hcache_simple.set_global_cache_mode` in `parse_cache_control_args()`. - """ - parser.add_argument( - "--cache_mode", - action="store", - default=None, - choices=list(_CACHE_MODE_CHOICES), - help=( - "Override cache behavior for all @simple_cache functions. " - "REFRESH_CACHE repopulates, DISABLE_CACHE bypasses, " - "HIT_CACHE_OR_ABORT raises on miss." - ), - ) - parser.add_argument( - "--cache_debug", - action="store_true", - help=( - "Log at WARNING level for every @simple_cache call whether the " - "result was served from cache, computed on miss, or recomputed " - "because of `cache_mode`" - ), - ) - return parser - - -def parse_cache_control_args(args: argparse.Namespace) -> None: - """ - Apply `--cache_mode`, `--cache_debug` by setting the `hcache_simple` - process-wide globals. - """ - # Import lazily to avoid a circular dependency at module load time. - import helpers.hcache_simple as hcacsimp - - mode = getattr(args, "cache_mode", None) - if mode is not None: - _LOG.info("Setting global cache_mode=%s", mode) - hcacsimp.set_global_cache_mode(mode) - cache_debug = bool(getattr(args, "cache_debug", False)) - if cache_debug: - _LOG.info("Enabling cache_debug logging") - hcacsimp.set_cache_debug(cache_debug) - - -# ############################################################################# -# Command line options for handling the destination dir. -# ############################################################################# - - -def add_dst_dir_arg( - parser: argparse.ArgumentParser, - dst_dir_required: bool, - dst_dir_default: Optional[str] = None, -) -> argparse.ArgumentParser: - """ - Add command line options related to destination dir. - - E.g., `--dst_dir`, `--clean_dst_dir` - """ - # TODO(gp): Add unit test to check this. - # A required dst_dir implies no default dst_dir. - hdbg.dassert_imply( - dst_dir_required, - not dst_dir_default, - "Since dst_dir_required='%s', you need to specify a default " - "destination dir, instead of dst_dir_default='%s'", - dst_dir_required, - dst_dir_default, - ) - # If dst_dir is not required, then a default dst_dir must be specified. - hdbg.dassert_imply( - not dst_dir_required, - dst_dir_default, - "Since dst_dir_required='%s', you can't specify a default " - "destination dir, dst_dir_default='%s'", - dst_dir_required, - dst_dir_default, - ) - parser.add_argument( - "--dst_dir", - action="store", - default=dst_dir_default, - required=dst_dir_required, - help="Directory storing the results", - ) - parser.add_argument( - "--clean_dst_dir", - action="store_true", - help="Delete the destination dir before running", - ) - parser.add_argument( - "--no_confirm", - action="store_true", - help="Do not confirm before deleting dst dir", - ) - return parser - - -def parse_dst_dir_arg(args: argparse.Namespace) -> Tuple[str, bool]: - """ - Process the command line options related to destination dir. - - :return: a tuple (dst_dir, clean_dst_dir) - - dst_dir: the destination dir - - clean_dst_dir: whether to clean the destination dir or not - """ - dst_dir = args.dst_dir - _LOG.debug("dst_dir=%s", dst_dir) - # TODO(Dan): Fix `clean_dst_dir` usage since it is always `False` now. - clean_dst_dir = False - if args.clean_dst_dir: - _LOG.info("Cleaning dst_dir='%s'", dst_dir) - if os.path.exists(dst_dir): - _LOG.warning("Dir '%s' already exists", dst_dir) - if not args.no_confirm: - hsystem.query_yes_no( - f"Do you want to delete the dir '{dst_dir}'", - abort_on_no=True, - ) - hio.create_dir(dst_dir, incremental=False) - hio.create_dir(dst_dir, incremental=True) - _LOG.debug("clean_dst_dir=%s", clean_dst_dir) - return dst_dir, clean_dst_dir - - -# ############################################################################# -# Command line options related to selection actions. -# ############################################################################# - -# # Define valid and default actions. -# valid_actions = ["download", "process", "upload", "cleanup"] -# default_actions = ["download", "process"] -# # Create parser and add action arguments. -# parser = argparse.ArgumentParser(... -# hparser.add_action_arg(parser, valid_actions, default_actions) -# args = parser.parse_args() -# # Select which actions to execute based on CLI arguments. -# actions = hparser.select_actions(args, valid_actions, default_actions) -# # Display the selected actions in a formatted table. -# print(hparser.actions_to_string(actions, valid_actions, add_frame=True)) -# # mark_action() handles tracking which actions remain and logs skipped ones. -# while actions: -# # Current action to check -# action = actions[0] -# # Determine if this action should execute and get remaining actions -# # to_execute: True if action is in the list, False otherwise -# # actions: updated list with current action removed if to_execute=True -# to_execute, actions = hparser.mark_action(action, actions) -# if to_execute: -# # Execute the action -# if action == "download": -# print("Downloading data...") -# elif action == "process": - - -def add_action_arg( - parser: argparse.ArgumentParser, - valid_actions: List[str], - default_actions: Optional[List[str]], -) -> argparse.ArgumentParser: - """ - Add command line options to select actions to execute, skip, or enable. - - The function creates a mutually exclusive group with three options: - - `-a/--action`: specify exact actions to execute - - `-sa/--skip_action`: skip specific actions from default set - - `-e/--enable`: enable additional actions on top of defaults - - Available actions are listed once in the help epilog to avoid repetition. - - :param parser: parser to add the option to - :param valid_actions: list of valid actions - :param default_actions: list of default actions to execute - :return: parser with the option added - """ - # Add epilog with list of available actions to avoid repeating them. - actions_list = ", ".join(valid_actions) - if parser.epilog: - parser.epilog += f"\n\nAvailable actions: {actions_list}" - else: - parser.epilog = f"Available actions: {actions_list}" - # Create mutually exclusive group for action selection. - group = parser.add_mutually_exclusive_group(required=False) - group.add_argument( - "-a", - "--action", - action="append", - dest="action", - help="Actions to execute (see available actions below)", - ) - group.add_argument( - "-sa", - "--skip_action", - action="append", - dest="skip_action", - help="Actions to skip from default set (see available actions below)", - ) - group.add_argument( - "-e", - "--enable", - action="append", - dest="enable_action", - help="Enable additional actions on top of defaults (see available actions below)", - ) - if default_actions is not None: - hdbg.dassert_is_subset(default_actions, valid_actions) - parser.add_argument( - "--all", - action="store_true", - help=f"Run all the actions ({' '.join(default_actions)})", - ) - return parser - - -def actions_to_string( - actions: List[str], valid_actions: List[str], add_frame: bool -) -> str: - """ - Convert a list of actions to a string. - - :param actions: list of actions to convert - :param valid_actions: list of valid actions - :param add_frame: if `True`, add a frame around the actions - :return: string of the actions - """ - space = max(len(a) for a in valid_actions) + 2 - format_ = "%" + str(space) + "s: %s" - actions = [ - format_ % (a, "Yes" if a in actions else "-") for a in valid_actions - ] - actions_as_str = "\n".join(actions) - if add_frame: - ret = hprint.frame("# Action selected:") + "\n" - ret += hprint.indent(actions_as_str) - else: - ret = actions_as_str - return ret # type: ignore - - -def select_actions( - args: argparse.Namespace, - valid_actions: List[str], - default_actions: List[str], -) -> List[str]: - """ - Select actions based on the command line arguments. - - Supports three mutually exclusive modes: - - `--action`: run only specified actions - - `--skip_action`: run default actions minus specified ones - - `--enable`: run default actions plus specified additional ones - - :param args: command line arguments - :param valid_actions: list of valid actions - :param default_actions: list of default actions to execute - :return: list of selected actions - """ - hdbg.dassert( - not (args.action and args.all), - "You can't specify together --action and --all", - ) - hdbg.dassert( - not (args.action and args.skip_action), - "You can't specify together --action and --skip_action", - ) - # TODO(ai_gp): Is this still needed? - # Check for enable_action attribute (added for backward compatibility). - has_enable = hasattr(args, "enable_action") - if has_enable: - hdbg.dassert( - not (args.action and args.enable_action), - "You can't specify together --action and --enable", - ) - hdbg.dassert( - not (args.skip_action and args.enable_action), - "You can't specify together --skip_action and --enable", - ) - # Select actions. - if not args.action or args.all: - if default_actions is None: - default_actions = valid_actions[:] - hdbg.dassert_is_subset(default_actions, valid_actions) - # Convert it into list since through some code paths it can be a tuple. - actions = list(default_actions) - else: - # Validate actions specified by user. - for action in args.action: - hdbg.dassert_in( - action, - valid_actions, - "Invalid action '%s'", - action, - ) - actions = args.action[:] - hdbg.dassert_isinstance(actions, list) - hdbg.dassert_no_duplicates(actions) - # Remove actions, if needed. - if args.skip_action: - hdbg.dassert_isinstance(args.skip_action, list) - for skip_action in args.skip_action: - # Validate that skip_action is a valid action. - hdbg.dassert_in( - skip_action, - valid_actions, - "Invalid action '%s'", - skip_action, - ) - # Validate that skip_action is in the current action list. - if skip_action not in actions: - _LOG.warning( - "Skipping action '%s' since it's already not in actions='%s'", - skip_action, - actions, - ) - actions = [a for a in actions if a != skip_action] - # Add enabled actions on top of defaults. - if has_enable and args.enable_action: - hdbg.dassert_isinstance(args.enable_action, list) - for enable_action in args.enable_action: - hdbg.dassert_in( - enable_action, - valid_actions, - "Invalid action '%s'", - enable_action, - ) - if enable_action not in actions: - actions.append(enable_action) - # Reorder actions according to 'valid_actions'. - actions = [action for action in valid_actions if action in actions] - return actions - - -def mark_action( - action: str, actions: Optional[List[str]] -) -> Tuple[bool, Optional[List[str]]]: - """ - Mark an action as to be executed or skipped. - - :param action: action to mark - :param actions: list of actions, or None to execute all actions - :return: tuple of (to_execute, actions) - """ - if actions is None: - # If actions is None, execute all actions. - to_execute = True - else: - to_execute = action in actions - _LOG.debug("\n%s", hprint.frame(f"action={action}")) - if to_execute: - if actions is not None: - actions = [a for a in actions if a != action] - else: - _LOG.warning("Skip action='%s'", action) - return to_execute, actions - - -# ############################################################################# -# Command line options for input/output processing. -# ############################################################################# - -# For non-dockerized scripts the following idiom is used: -# -# ```python -# # Add input/output arguments to parser. -# hparser.add_input_output_args(parser) -# # Handle input/output arguments, including stdin/stdout. -# in_file_name, out_file_name = hparser.parse_input_output_args(args) -# ... -# # Read input file, handling stdin. -# in_lines = hparser.from_file(in_file_name) -# ... -# # Write output, handling stdout. -# hparser.to_file(txt, out_file_name) -# ``` -# See helpers_root/dev_scripts_helpers/coding_tools/transform_template.py as an -# example. - -# For dockerized scripts the following idiom is used inside the wrapper, which -# calls the dockerized script: -# -# ```python -# # Add input/output arguments to parser. -# hparser.add_input_output_args(parser) -# # Handle input/output arguments, including stdin/stdout. -# in_file_name, out_file_name = hparser.parse_input_output_args(args) -# tmp_in_file_name, tmp_out_file_name = hparser.adapt_input_output_args_for_dockerized_scripts( -# in_file_name, "llm_transform") -# ... -# # For stdin/stdout, suppress the output of the container. -# suppress_output = in_file_name == "-" or out_file_name == "-" -# _run_dockerized_llm_transform( -# tmp_in_file_name, -# cmd_line_opts, -# tmp_out_file_name, -# mode="system", -# force_rebuild=args.dockerized_force_rebuild, -# use_sudo=args.dockerized_use_sudo, -# suppress_output=suppress_output, -# ) -# ... -# # Write output, handling stdout. -# hparser.to_file(txt, out_file_name) -# ``` -# -# See helpers_root/dev_scripts_helpers/llms/llm_transform.py as an example. - - -def add_input_output_args( - parser: argparse.ArgumentParser, - *, - in_default: Optional[str] = None, - in_required: bool = True, - out_default: Optional[str] = None, - out_required: bool = False, -) -> argparse.ArgumentParser: - """ - Add options to parse input and output file name, and handle stdin / stdout. - - :param in_default: default file to be used for input - - If `None`, it must be specified by the user - :param in_required: whether the input file is required - :param out_default: default file to be used for output - - If `None`, it must be specified by the user - :param out_required: whether the output file is required - """ - parser.add_argument( - "-i", - "--input", - dest="input", - required=in_required, - type=str, - default=in_default, - help="Input file or `-` for stdin", - ) - parser.add_argument( - "-o", - "--output", - dest="output", - required=out_required, - type=str, - default=out_default, - help="Output file or `-` for stdout", - ) - return parser - - -def parse_input_output_args( - args: argparse.Namespace, *, clear_screen: bool = False -) -> Tuple[str, str]: - """ - Parse input and output file name, handling stdin / stdout. - - :return input and output file name. - """ - in_file_name = args.input - out_file_name = args.output - if out_file_name is None: - # If the output file is not specified, use the input file name, i.e., - # in place. - out_file_name = in_file_name - # Print summary. If we are using stdin / stdout, don't print anything since - # we don't want to pollute the output. - if in_file_name != "-": - if clear_screen: - os.system("clear") - _LOG.info(hprint.to_str("in_file_name")) - _LOG.info(hprint.to_str("out_file_name")) - - return in_file_name, out_file_name - - -def init_logger_for_input_output_transform( - args: argparse.Namespace, *, verbose: bool = True -) -> None: - """ - Initialize the logger when input/output transformation is used. - - :param verbose: if `False`, set the log level to `CRITICAL` so that no - output is printed and avoid to print: - ``` - 09:34:24 - INFO hdbg.py init_logger:1013 Saving log to file '/User... - 09:34:24 - INFO hdbg.py init_logger:1018 > cmd='/Users/saggese/src... - 09:34:24 - INFO hparser.py parse_input_output_args:368 in_file_name='lectures_source/Les... - 09:34:24 - INFO hparser.py parse_input_output_args:369 out_file_name='-' - ``` - """ - verbosity = args.log_level - if not verbose: - # Unless user has specified DEBUG level, set the log level to `CRITICAL` - # so that no output is printed. - if args.log_level == "INFO": - verbosity = "CRITICAL" - else: - # If the input is stdin, we don't want to print the command line or any - # other log messages, unless the user specified a more verbose log level. - if args.input == "-": - if args.log_level == "INFO": - verbosity = "CRITICAL" - else: - print("cmd line: " + hdbg.get_command_line()) - hdbg.init_logger(verbosity=verbosity, use_exec_path=True, force_white=False) - - -def from_file(file_name: str) -> List[str]: - """ - Read file or stdin (represented by `-`), returning an array of lines. - - If file_name is "pb" and the platform is macOS, read from clipboard. - """ - if file_name == "-": - _LOG.info("Reading from stdin") - # Read. - txt = [] - for line in sys.stdin: - txt.append(line.rstrip("\n")) - elif file_name == "pb": - # Read from clipboard (macOS only). - if hserver.is_host_mac(): - _LOG.info("Reading from clipboard") - cmd = "pbpaste" - rc, txt_str = hsystem.system_to_string(cmd) - txt = txt_str.split("\n") - else: - hdbg.dfatal("Reading from clipboard (pb) only works on macOS") - else: - txt = hio.from_file(file_name) - txt = txt.split("\n") - return txt - - -def to_file(txt: Union[str, List[str]], file_name: str) -> None: - """ - Write txt in a file or stdout (represented by `-`). - - If file_name is "pb" and the platform is macOS, write to clipboard. - """ - if isinstance(txt, str): - txt = [txt] - if file_name == "-": - _LOG.debug("Saving to stdout") - print("\n".join(txt)) - elif file_name == "pb": - # Write to clipboard (macOS only). - if hserver.is_host_mac(): - _LOG.info("Writing to clipboard") - txt_str = "\n".join(txt) - # Use echo with pbcopy, escaping single quotes. - txt_str_escaped = txt_str.replace("'", "'\\''") - cmd = f"echo -n '{txt_str_escaped}' | pbcopy" - hsystem.system(cmd) - _LOG.info("Written to clipboard") - else: - hdbg.dfatal("Writing to clipboard (pb) only works on macOS") - else: - _LOG.debug("Saving to file") - with open(file_name, "w") as f: - f.write("\n".join(txt)) - _LOG.info("Written file '%s'", file_name) - - -def adapt_input_output_args_for_dockerized_scripts( - in_file_name: str, tag: str -) -> Tuple[str, str]: - """ - Adapt input and output file name for dockerized scripts. - - Since we need to call a container and passing stdin/stdout is tricky, - we read the input and save it in a temporary file. - - :param tag: tag to be used for the temporary file name (e.g., `llm_transform`) - """ - # Since we need to call a container and passing stdin/stdout is tricky, - # we read the input and save it in a temporary file. - in_lines = from_file(in_file_name) - if in_file_name == "-": - tmp_in_file_name = f"tmp.{tag}.in.txt" - in_txt = "\n".join(in_lines) - hio.to_file(tmp_in_file_name, in_txt) - else: - tmp_in_file_name = in_file_name - # - tmp_out_file_name = f"tmp.{tag}.out.txt" - return tmp_in_file_name, tmp_out_file_name - - -# ############################################################################# -# Command line options for parallel processing. -# ############################################################################# - - -# pylint: disable=line-too-long -# TODO(gp): These should go in hjoblib.py -def add_parallel_processing_arg( - parser: argparse.ArgumentParser, - *, - num_threads_default: Optional[str] = None, -) -> argparse.ArgumentParser: - """ - Add parallel processing args. - - The "incremental idiom" means skipping processing computation that has - already been performed. E.g., if we need to transform files from one dir to - another we skip the files already processed (assuming that a file present - in the destination dir is an indication that it has already been - processed). - - The default behavior should always be incremental since "incremental mode" - is not destructive like the non-incremental, i.e., delete and restart - - The incremental behavior is disabled with `--no_incremental`. This implies - performing the computation in any case - - It is often implemented by deleting the destination dir and then running - again, even in incremental mode - - If the destination dir already exists, then we require the user to - explicitly use `--force` to confirm that the user knows what is doing - """ - parser.add_argument( - "--dry_run", - action="store_true", - help="Print the workload and exit without running it", - ) - parser.add_argument( - "--no_incremental", - action="store_true", - help="Skip workload already performed", - ) - parser.add_argument( - "--force", - action="store_true", - help="Confirm that one wants to remove the previous results. It works only together with --no_incremental", - ) - # - help = """ - Number of threads to use: - - '-1' to use all CPUs; - - '1' to use one-thread at the time but using the parallel execution (mainly used - for debugging) - - 'serial' to serialize the execution without using parallel execution""" - if num_threads_default is None: - parser.add_argument( - "--num_threads", - action="store", - help=help, - required=True, - ) - else: - parser.add_argument( - "--num_threads", - action="store", - help=help, - default=num_threads_default, - ) - parser.add_argument("--no_keep_order", action="store_true", help="") - parser.add_argument( - "--num_func_per_task", - action="store", - type=int, - default=None, - help="Number of function execute in a (parallel) task of the workload. `None` means automatically decided by the function", - ) - parser.add_argument( - "--skip_on_error", - action="store_true", - help="Continue execution after encountering an error", - ) - parser.add_argument( - "--num_attempts", - default=1, - type=int, - help="Repeat running an experiment up to `num_attempts` times", - required=False, - ) - return parser - - -def create_incremental_dir(dst_dir: str, args: argparse.Namespace) -> None: - """ - Create a dir using the "incremental idiom". - - If the dir already exists and the user requested the not - incremental, we require `--force` to confirm deleting the dir. - """ - if args.force: - hdbg.dassert( - args.no_incremental, "--force only works with --no_incremental" - ) - _LOG.debug(hprint.to_str("dst_dir args")) - if args.no_incremental: - # Create the dir from scratch. - _LOG.debug("No incremental mode") - if os.path.exists(dst_dir): - _LOG.debug("Dir '%s' already exists", dst_dir) - hdbg.dassert_dir_exists(dst_dir, "'%s' must be a directory") - if not args.force: - _LOG.warning( - "The directory '%s' already exists. To confirm deleting it use --force", - dst_dir, - ) - sys.exit(-1) - _LOG.warning("Deleting %s", dst_dir) - hio.create_dir(dst_dir, incremental=False) - else: - _LOG.debug("Incremental mode") - hio.create_dir(dst_dir, incremental=True) - - -# ############################################################################# -# Command line options for metadata output. -# ############################################################################# - - -def add_json_output_metadata_args( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add arguments related to storing the output metadata from a script. - - This data can be read / used by other scripts to post-process a - script results. - """ - parser.add_argument( - "--json_output_metadata", - type=str, - action="store", - help="File storing the output metadata of this script in JSON format", - ) - return parser - - -# Store the metadata about the output of a script. -OutputMetadata = Dict[str, str] - - -def process_json_output_metadata_args( - args: argparse.Namespace, - output_metadata: OutputMetadata, -) -> Optional[str]: - """ - Save the output metadata according to the command line options. - - :return: file name with the output metadata - """ - hdbg.dassert_isinstance(output_metadata, dict) - if args.json_output_metadata is None: - return None - file_name: str = args.json_output_metadata - _LOG.info("Saving output metadata into file '%s'", file_name) - if not file_name.endswith(".json"): - _LOG.warning( - "The output metadata file '%s' doesn't end in .json: adding it", - file_name, - ) - file_name += ".json" - hio.to_json(file_name, output_metadata) - _LOG.info("Saved output metadata into file '%s'", file_name) - return file_name - - -def read_output_metadata(output_metadata_file: str) -> OutputMetadata: - """ - Read the output metadata. - """ - output_metadata: OutputMetadata = hio.from_json(output_metadata_file) - return output_metadata - - -def str_to_bool(value: str) -> bool: - """ - Convert string representing true or false to the corresponding bool. - """ - if value.lower() == "true": - ret = True - elif value.lower() == "false": - ret = False - else: - raise argparse.ArgumentTypeError( - f"Invalid boolean value {value}. Use 'true' or 'false'." - ) - return ret - - -# ############################################################################# -# Command line options for dockerized scripts. -# ############################################################################# - - -def add_dockerized_script_arg( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add common command line arguments for dockerized scripts. - """ - parser.add_argument( - "--dockerized_force_rebuild", - action="store_true", - help="Force to rebuild the Docker container", - ) - parser.add_argument( - "--dockerized_use_sudo", - action="store_true", - help="Use sudo inside the container", - ) - return parser - - -def add_llm_prompt_arg( - parser: argparse.ArgumentParser, - *, - default_prompt: str = "", - is_required: bool = True, -) -> argparse.ArgumentParser: - """ - Add common command line arguments for `*llm_transform.py` scripts. - - :param default_prompt: default prompt to use - :param is_required: whether the prompt is required - :return: parser with the option added - """ - parser.add_argument( - "--debug", - action="store_true", - help="Print before/after the transform", - ) - if default_prompt != "": - is_required = False - parser.add_argument( - "-p", - "--prompt", - required=is_required, - type=str, - help="Prompt to apply", - default=default_prompt, - ) - parser.add_argument( - "-f", - "--fast_model", - action="store_true", - help="Use a fast LLM model vs a high-quality one", - ) - return parser - - -# ############################################################################# -# Command line options for limit range processing. -# ############################################################################# - - -def add_limit_range_arg( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add argument for limiting processing to a range of items. - - The range format is X:Y where X and Y are 1-indexed integers. - """ - parser.add_argument( - "--limit", - action="store", - help="Limit processing to item range X:Y (integers >= 1, inclusive)", - ) - return parser - - -def parse_limit_range(limit_str: str) -> Tuple[int, int]: - """ - Parse limit string in format "X:Y" and return tuple (start, end). - - :param limit_str: string in format "X:Y" where X and Y are integers >= 1 - :return: tuple in [start_index, end_index] - """ - hdbg.dassert( - ":" in limit_str, "Limit format must be X:Y, got: %s", limit_str - ) - parts = limit_str.split(":") - hdbg.dassert_eq( - len(parts), 2, "Limit format must be X:Y, got: %s", limit_str - ) - try: - start = int(parts[0]) - end = int(parts[1]) - except ValueError as e: - hdbg.dfatal("Invalid limit format, must be integers: %s" % str(e)) - hdbg.dassert_lte(1, start, "Start index must be >= 1, got: %s", start) - hdbg.dassert_lte(1, end, "End index must be >= 1, got: %s", end) - hdbg.dassert_lte( - start, end, "Start index must be <= end index, got: %s:%s", start, end - ) - return start, end - - -def parse_limit_range_args( - args: argparse.Namespace, -) -> Optional[Tuple[int, int]]: - """ - Parse limit range from command line arguments and log the result. - - :param args: parsed command line arguments containing 'limit' - attribute - :return: tuple of (start_index, end_index) as 0-indexed integers, or - None if no limit - """ - limit_range = None - if args.limit: - limit_range = parse_limit_range(args.limit) - _LOG.warning( - "Using limit range: [%s:%s]", limit_range[0], limit_range[1] - ) - return limit_range - - -def apply_limit_range( - items: List[Any], - limit_range: Optional[Tuple[int, int]] = None, - *, - item_name: str = "items", -) -> List[Any]: - """ - Apply limit range filtering to a list of items. - - :param items: list of items to filter - :param limit_range: optional tuple (start, end) for 0-indexed range - filtering - :param item_name: name of items for logging purposes - :return: filtered list of items - """ - if limit_range is not None: - start_idx, end_idx = limit_range - total_items = len(items) - hdbg.dassert_lt( - start_idx, - total_items, - "Start index %s exceeds available %s %s", - start_idx, - item_name, - total_items, - ) - hdbg.dassert_lt( - end_idx, - total_items, - "End index %s exceeds available %s %s", - end_idx, - item_name, - total_items, - ) - items = items[start_idx : end_idx + 1] - _LOG.warning( - "Found %s %s, limited to range %s:%s (%s %s)", - total_items, - item_name, - start_idx, - end_idx, - len(items), - item_name, - ) - else: - _LOG.info("Found %s %s to process", len(items), item_name) - # Print the items that will be processed. - _LOG.debug("Items to process:") - for i, item in enumerate(items): - _LOG.debug(" [%s]: %s", i, item) - return items - - -# ############################################################################# -# Command line options for multiple file input. -# ############################################################################# - - -def add_multi_file_args( - parser: argparse.ArgumentParser, -) -> argparse.ArgumentParser: - """ - Add command line options for specifying multiple input files. - - Three mutually exclusive methods are supported: - - `--files="file1,file2,..."`: comma-separated list of files - - `--from_files="file.txt"`: file containing one file per line - - `--input file1 --input file2`: repeated argument - - These options work alongside the existing `-i/--input` for backward - compatibility. - - :param parser: parser to add the options to - :return: parser with the options added - """ - group = parser.add_mutually_exclusive_group(required=False) - group.add_argument( - "--files", - type=str, - help="Comma-separated list of files to process (e.g., 'file1.txt,file2.txt,file3.txt')", - ) - group.add_argument( - "--from_files", - type=str, - help="Path to file containing one file path per line", - ) - group.add_argument( - "-i", - "--input", - action="append", - help="File to process (can be specified multiple times)", - ) - return parser - - -def parse_multi_file_args( - args: argparse.Namespace, -) -> List[str]: - """ - Parse multi-file command line arguments and return list of file paths. - - Handles three input methods: - - `--files="file1,file2,..."`: comma-separated list - - `--from_files="file.txt"`: file containing one file per line - - `--input file1 --input file2`: repeated argument - - If none of the multi-file options are specified, falls back to the single - `-i/--input` argument for backward compatibility. - - :param args: parsed command line arguments - :return: list of file paths to process - """ - file_list: List[str] = [] - # Check which multi-file option was specified. - if hasattr(args, "files") and args.files: - # Parse comma-separated list. - _LOG.debug("Using --files option") - file_list = [f.strip() for f in args.files.split(",")] - # Remove empty strings. - file_list = [f for f in file_list if f] - elif hasattr(args, "from_files") and args.from_files: - # Read file containing list of files. - _LOG.debug("Using --from_files option") - hdbg.dassert_path_exists(args.from_files) - content = hio.from_file(args.from_files) - lines = content.split("\n") - for line in lines: - # Strip whitespace. - line = line.strip() - # Skip empty lines and comments. - if line and not line.startswith("#"): - file_list.append(line) - elif hasattr(args, "input") and args.input: - # Check if args.input is a list (from --input repeated argument) or a string (from -i/--input single file). - if isinstance(args.input, list): - # Use repeated argument from add_multi_file_args. - _LOG.debug("Using --input option (repeated argument)") - file_list = args.input - else: - # Backward compatibility: support single file via -i/--input from add_input_output_args. - _LOG.debug( - "Using -i/--input option (single file, backward compatibility)" - ) - file_list = [args.input] - else: - # No file specified. - hdbg.dfatal("No input files specified") - # Validate that we have at least one file. - hdbg.dassert_isinstance(file_list, list) - hdbg.dassert_lt( - 0, len(file_list), "No input files specified after parsing arguments" - ) - # Validate that all files exist. - for file_path in file_list: - hdbg.dassert_path_exists(file_path) - _LOG.info("Found %s file(s) to process", len(file_list)) - return file_list diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py deleted file mode 100644 index e46fc8143..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpickle.py +++ /dev/null @@ -1,253 +0,0 @@ -""" -Pickle and JSON serialization/deserialization routines. - -Import as: - -import helpers.hpickle as hpickle -""" - -import gzip -import json -import logging -import marshal -import os -import pickle -import types -from typing import Any, Callable, Optional - -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hio as hio - -# TODO(Grisha): Can this module depend on hs3? -import helpers.hs3 as hs3 -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - - -def to_pickleable(obj: Any, force_values_to_string: bool) -> Any: - """ - Convert an object into an object with the same nested structure (e.g., - lists and dicts), but where all values are replaced with their pickleable - representations. - - :param obj: object to convert - :param force_values_to_string: if True, store all the object values - as strings - :return: pickleable object - """ - if isinstance(obj, list): - # Process list values recursively. - out = [to_pickleable(k, force_values_to_string) for k in obj] - elif isinstance(obj, tuple): - # Process tuple values recursively. - out = tuple([to_pickleable(k, force_values_to_string) for k in obj]) - elif isinstance(obj, dict): - # Process dict keys and values recursively. - out = {} - for k, v in obj.items(): - k = to_pickleable(k, force_values_to_string) - v = to_pickleable(v, force_values_to_string) - out[k] = v - elif hintros.is_iterable(obj): - # TODO(Grisha): is it ok that we convert any Iterable (e.g., set) to list? - # This means that input and output data types do not match. - # Process other iterable values recursively. - out = [to_pickleable(v, force_values_to_string) for v in obj] - else: - # We need to use try_and_catch mode because of CmTask7713. - if hintros.is_pickleable(obj, mode="try_and_catch"): - # Store a pickleable object. - if force_values_to_string: - # Store as string if specified. - out = str(obj) - else: - out = obj - else: - # Store a string representation of an unpickleable object. - out = str(obj) - return out - - -# ############################################################################# -# pickle -# ############################################################################# - - -def to_pickle( - obj: Any, - file_name: str, - *, - backend: str = "pickle", - log_level: int = logging.DEBUG, - aws_profile: Optional[hs3.AwsProfile] = None, -) -> None: - """ - Pickle object `obj` into file `file_name`. - - :param file_name: the file_name is not changed, but it is checked for - consistency with the backend (e.g., `pickle_gzip` needs a `.pkl.gz` - extension) - :param backend: pickle, dill, pickle_gzip - """ - hdbg.dassert_type_is(file_name, str) - hio.create_enclosing_dir(file_name, incremental=True) - with htimer.TimedScope(logging.DEBUG, f"Pickling to '{file_name}'") as ts: - # We assume that the user always specifies a .pkl extension and then we - # change the extension based on the backend. - if backend in ("pickle", "dill"): - hdbg.dassert_file_extension(file_name, "pkl") - if backend == "pickle": - # Use S3 file system. - if hs3.is_s3_path(file_name): - s3fs_ = hs3.get_s3fs(aws_profile) - with s3fs_.open(file_name, "wb") as s3_file: - pickler = pickle.Pickler( - s3_file, pickle.HIGHEST_PROTOCOL - ) - pickler.fast = True - pickler.dump(obj) - # Use local file system. - else: - with open(file_name, "wb") as fd: - pickler = pickle.Pickler(fd, pickle.HIGHEST_PROTOCOL) - pickler.fast = True - pickler.dump(obj) - elif backend == "dill": - import dill - - with open(file_name, "wb") as fd: - dill.dump(obj, fd) - else: - raise ValueError(f"Invalid backend='{backend}'") - elif backend == "pickle_gzip": - hdbg.dassert_file_extension(file_name, "pkl.gz") - with gzip.open(file_name, "wb") as zfd: - pickler = pickle.Pickler(zfd, pickle.HIGHEST_PROTOCOL) - pickler.fast = True - pickler.dump(obj) - else: - raise ValueError(f"Invalid backend='{backend}'") - # Report time and size. - if hs3.is_s3_path(file_name): - file_size = hs3.du(file_name, aws_profile=aws_profile, human_format=True) - else: - file_size = hintros.format_size(os.path.getsize(file_name)) - _LOG.log( - log_level, - "Saved '%s' (size=%s, time=%.1fs)", - file_name, - file_size, - ts.elapsed_time, - ) - - -def from_pickle( - file_name: str, - backend: str = "pickle", - *, - log_level: int = logging.DEBUG, - aws_profile: Optional[hs3.AwsProfile] = None, -) -> Any: - """ - Unpickle and return object stored in `file_name`. - """ - hdbg.dassert_isinstance(file_name, str) - with htimer.TimedScope( - logging.DEBUG, f"Unpickling from '{file_name}'" - ) as ts: - # We assume that the user always specifies a .pkl extension and then we - # change the extension based on the backend. - if backend in ("pickle", "dill"): - hdbg.dassert_file_extension(file_name, "pkl") - if backend == "pickle": - # Use S3 file system. - if hs3.is_s3_path(file_name): - s3fs_ = hs3.get_s3fs(aws_profile) - with s3fs_.open(file_name) as s3_file: - unpickler = pickle.Unpickler(s3_file) - obj = unpickler.load() - else: - with open(file_name, "rb") as fd: - unpickler = pickle.Unpickler(fd) - obj = unpickler.load() - elif backend == "dill": - import dill - - with open(file_name, "rb") as fd: - obj = dill.load(fd) - else: - raise ValueError(f"Invalid backend='{backend}'") - elif backend == "pickle_gzip": - hdbg.dassert_file_extension(file_name, "pkl.gz") - with gzip.open(file_name, "rb") as zfd: - unpickler = pickle.Unpickler(zfd) - obj = unpickler.load() - else: - raise ValueError(f"Invalid backend='{backend}'") - # Report time and size. - if hs3.is_s3_path(file_name): - file_size = hs3.du(file_name, aws_profile=aws_profile, human_format=True) - else: - file_size = hintros.format_size(os.path.getsize(file_name)) - _LOG.log( - log_level, - "Read '%s' (size=%s, time=%.1fs)", - file_name, - file_size, - ts.elapsed_time, - ) - return obj - - -# ############################################################################# - - -# TODO(gp): -> to_pickle_function -def pickle_function(func: Callable) -> str: - """ - Pickle a function into bytecode stored into a string. - - - return: string - """ - hdbg.dassert_callable(func) - hdbg.dassert(hasattr(func, "__code__")) - assert hasattr(func, "__code__") - code_as_bytes = marshal.dumps(func.__code__) - return code_as_bytes.decode() - - -# TODO(gp): -> from_pickle_function -def unpickle_function(code_as_str: str, func_name: str) -> Callable: - """ - Unpickle a function saved into string . The function is - injected in the global namespace as . - - - return: function - """ - hdbg.dassert_isinstance(code_as_str, str) - code = marshal.loads(code_as_str.encode()) - func = types.FunctionType(code, globals(), name=func_name) - return func - - -# ############################################################################# -# JSON -# ############################################################################# - -# TODO(gp): Maybe move helpers/hjson.py? - - -# TODO(gp): Switch file_name and obj to be consistent with the pickle functions. -def to_json(file_name: str, obj: object) -> None: - hdbg.dassert_file_extension(file_name, "json") - with open(file_name, "w") as outfile: - json.dump(obj, outfile) - - -def from_json(file_name: str) -> object: - hdbg.dassert_path_exists(file_name) - hdbg.dassert_file_extension(file_name, "json") - obj = json.loads(hio.from_file(file_name)) - return obj diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py deleted file mode 100644 index 5e1df13c8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hplayback.py +++ /dev/null @@ -1,495 +0,0 @@ -""" -Code to automatically generate unit tests for functions. - -Import as: - -import helpers.hplayback as hplayba -""" - -import inspect -import json -import logging -import os -from typing import Any, Callable, List, Optional - -import jsonpickle # type: ignore -import jsonpickle.ext.pandas as jepand # type: ignore -import pandas as pd - -import config_root.config as cconfig -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint - -jepand.register_handlers() - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): Use repr to serialize: -# >>> a = {"hello": [1, 2, (3, 4)]} -# >>> repr(a) -# "{'hello': [1, 2, (3, 4)]}" -# TODO(gp): Add more types. -# TODO(gp): -> _to_python_code -def to_python_code(obj: Any) -> str: - """ - Serialize an object into a string of Python code. - - :param obj: an object to serialize - :return: a string of Python code building the object - """ - output = [] - if isinstance(obj, (int, float)): - # Float 2.5 -> "2.5". - output.append(str(obj)) - elif isinstance(obj, str): - # String test -> '"test"'. - # Use jsonpickle to handle double quotes. - output.append(jsonpickle.encode(obj)) - elif isinstance(obj, list): - # List ["a", 1] -> '["a", 1]'. - output_tmp = "[" - for el in obj: - output_tmp += to_python_code(el) + ", " - output_tmp = output_tmp.rstrip(", ") + "]" - output.append(output_tmp) - elif isinstance(obj, tuple): - # Tuple ["a", 1] -> '["a", 1]'. - output_tmp = "(" - for el in obj: - output_tmp += to_python_code(el) + ", " - output_tmp = output_tmp.rstrip(", ") + ")" - output.append(output_tmp) - elif isinstance(obj, dict): - # Dict {"a": 1} -> '{"a": 1}'. - output_tmp = "{" - for key in obj: - output_tmp += ( - to_python_code(key) + ": " + to_python_code(obj[key]) + ", " - ) - output_tmp = output_tmp.rstrip(", ") + "}" - output.append(output_tmp) - elif isinstance(obj, pd.DataFrame): - # Dataframe with a column "a" and row values 1, 2 -> - # "pd.DataFrame.from_dict({'a': [1, 2]})". - vals = obj.to_dict(orient="list") - output.append(f"pd.DataFrame.from_dict({vals})") - elif isinstance(obj, pd.Series): - # Series init as pd.Series([1, 2]) - output.append( - f'pd.Series(data={obj.tolist()}, index={obj.index}, name="{obj.name}", ' - f"dtype={obj.dtype})" - ) - elif isinstance(obj, cconfig.Config): - # Config -> python_code -> "cconfig.Config.from_python(python_code)" - val = obj.to_python() - output.append(f'cconfig.Config.from_python("{val}")') - else: - # Use `jsonpickle` for serialization. - _LOG.warning( - "Type %s not found in serialization function: using jsonpickle.", - type(obj), - ) - output.append(f"r'{jsonpickle.encode(obj)}'") - output = "\n".join(output) - return output - - -# ############################################################################# -# Playback -# ############################################################################# - - -class Playback: - @staticmethod - def _get_test_file_name(file_with_code: str) -> str: - """ - Construct the test file name based on the file with the code to test. - - :param file_with_code: path to file with code to test. - :return: path to the file with generated test. - """ - # Get directory and filename of the testing code. - dirname_with_code, filename_with_code = os.path.split(file_with_code) - dirname_with_test = os.path.join(dirname_with_code, "test") - # Construct test file. - test_file = os.path.join( - dirname_with_test, f"test_by_playback_{filename_with_code}" - ) - return test_file - - def _update_code_to_existing(self) -> None: - """ - Get existing content from the file with test. - - If the file doesn't exist - creates it. - """ - # Create test file if it doesn't exist. - if not os.path.exists(self._test_file): - hio.create_enclosing_dir(self._test_file, True) - hio.to_file(self._test_file, "", mode="w") - else: - # Get already existing content in the test file. - self._code = hio.from_file(self._test_file).split("\n") - self._file_exists = True - - def _append(self, string: str, num_tabs: int = 0) -> None: - """ - Add indented line to the code. - """ - num_spaces = num_tabs * 4 - self._code.append(hprint.indent(string, num_spaces=num_spaces)) - - def __init__( - self, - mode: str, - to_file: Optional[bool] = None, - max_tests: Optional[int] = None, - ) -> None: - """ - Initialize the class variables. - - :param mode: the type of unit test to be generated (e.g. "assert_equal") - :param to_file: save playback output to the file - test/test_by_playback_.py - :param max_tests: limit a number of generated tests for the testing - function. Can be useful if the function is called a lot of times - during the execution. - """ - _LOG.debug(hprint.to_str("mode to_file max_tests")) - hdbg.dassert_in(mode, ("check_string", "assert_equal")) - self.mode = mode - # TODO(gp): Factor out in a function but need to discard one more level - # in the stack trace. - cur_frame = inspect.currentframe() - self._func_name = cur_frame.f_back.f_code.co_name # type: ignore - # We can use kw arguments for all args. Python supports this. - self._kwargs = cur_frame.f_back.f_locals.copy() # type: ignore - # It treats all arguments defined before itself as arguments. If this - # is done, it will mess up the function call that will be created in - # `Playback.run`. - expected_arg_count = cur_frame.f_back.f_code.co_argcount # type: ignore - if "kwargs" in self._kwargs: - expected_arg_count += 1 - _LOG.debug(hprint.to_str("expected_arg_count")) - # TODO(gp): Is this necessary? - # hdbg.dassert_eq( - # expected_arg_count, - # len(cur_frame.f_back.f_locals), # type: ignore - # msg="the Playback class should be the first thing instantiated in" - # " a function.", - # ) - # If the function is a method, store the parent class so we can also - # create that in the test. - if "self" in self._kwargs: - x = self._kwargs.pop("self") - self._parent_class = x - self._code = [ - f"# Test created for {cur_frame.f_back.f_globals['__name__']}" # type: ignore - f".{x.__class__.__name__}.{self._func_name}." - ] - else: - self._parent_class = None - self._code = [ - # pylint: disable=line-too-long - f"# Test created for {cur_frame.f_back.f_globals['__name__']}.{self._func_name}." # type: ignore - ] - self._append("") - # Check if need to write the code directly to file. - self._to_file = to_file if to_file is not None else False - # Find filename to write the code. - file_with_code = cur_frame.f_back.f_code.co_filename # type: ignore - self._test_file = self._get_test_file_name(file_with_code) - # Check if file exists, need to keep code already here. - self._file_exists = False - if self._to_file: - self._update_code_to_existing() - # Limit number of tests per tested function. - self._max_tests = max_tests or float("+inf") - - @staticmethod - def test_code(output: str) -> None: - # Try to execute in a fake environment. - # ``` - # local_env = {} - # _ = exec(output, local_env) - # ``` - _ = exec(output) # pylint: disable=exec-used - - def _check_code(self, func_output: Any) -> None: - """ - Generate test code that makes an assertion. - """ - if self.mode == "check_string": - if isinstance(func_output, (pd.DataFrame, pd.Series, str)): - if not isinstance(func_output, str): - self._append( - "actual = hpandas.df_to_str(actual, num_rows=None)", 2 - ) - if not isinstance(func_output, (str, bytes)): - self._append("actual = str(actual)", 2) - self._append("# Check output.", 2) - self._append("self.check_string(actual)", 2) - elif self.mode == "assert_equal": - self._append("# Define expected output.", 2) - func_output_as_code = to_python_code(func_output) - self._append(f"expected = {func_output_as_code}", 2) - if not isinstance( - func_output, (int, float, str, list, dict, pd.DataFrame) - ): - self._append("expected = jsonpickle.decode(expected)", 2) - - if isinstance(func_output, (pd.DataFrame, pd.Series)): - self._append( - "actual = hpandas.df_to_str(actual, num_rows=None)", 2 - ) - self._append( - "expected = hpandas.df_to_str(expected, num_rows=None)", 2 - ) - self._append("# Compare actual and expected output.", 2) - self._append("self.assertEqual(actual, expected)", 2) - else: - raise ValueError(f"Invalid mode='{self.mode}'") - - def _add_imports(self, additional: Optional[List[str]] = None) -> None: - """ - Add the code with imports. - """ - # Add imports. - self._append("import helpers.hpandas as hpandas") - self._append("import helpers.hunit_test as hunitest") - self._append("import jsonpickle") - self._append("import pandas as pd") - self._append("import config_root.config as cconfi") - for a in additional or []: - self._append(a) - self._code.extend(["", ""]) - - def _get_class_name_string(self) -> str: - """ - Get a string for the test code with the name of the test class. - - I.e. "class TestMyMethod(hunitest.TestCase):". - """ - test_name = ( - self._parent_class.__class__.__name__ - if self._parent_class is not None - else "" - ) - test_name += "".join( - [x.capitalize() for x in self._func_name.split("_")] - ) - class_string = f"class Test{test_name}(hunitest.TestCase):" - return class_string - - def _get_class_count(self) -> int: - """ - Find a number of already generated tests for the method. - """ - class_string = self._get_class_name_string() - count = 0 - for line in self._code: - count += line == class_string - return count - - def _add_test_class(self) -> None: - """ - Add the code with the test class definition and the test method - definition. - """ - # Add test class and test method. - class_string = self._get_class_name_string() - # Find how many times method was tested. - count = self._get_class_count() - if count >= self._max_tests: - # If it was already tested enough times, raise. - raise IndexError(f"{self._max_tests} tests already generated") - # Otherwise, continue to create a test code. - self._append(class_string) - self._append(f"def test{count + 1}(self) -> None:", 1) - - def _add_function_call(self) -> None: - """ - Add a call of the function to test to the test code. - """ - self._append("# Call function to test.", 2) - if self._parent_class is None: - fnc_call = [f"{k}={k}" for k in self._kwargs.keys()] - self._append(f"actual = {self._func_name}({', '.join(fnc_call)})", 2) - else: - var_code = to_python_code(self._parent_class) - # Re-create the parent class. - self._append(f"cls = {var_code}", 2) - self._append("cls = jsonpickle.decode(cls)", 2) - fnc_call = [f"{k}={k}" for k in self._kwargs.keys()] - # Call the method as a child of the parent class. - self._append( - f"actual = cls.{self._func_name}({', '.join(fnc_call)})", 2 - ) - - def _add_var_definitions(self) -> None: - """ - Add variables definitions for the function to test. - """ - if self._kwargs: - self._append("# Define input variables.", 2) - for key in self._kwargs: - as_python = to_python_code(self._kwargs[key]) - self._append(f"{key} = {as_python}", 2) - # Decode back to an actual Python object, if necessary. - if not isinstance( - self._kwargs[key], - ( - int, - float, - str, - list, - dict, - pd.DataFrame, - pd.Series, - cconfig.Config, - ), - ): - self._append(f"{key} = jsonpickle.decode({key})", 2) - - def _gen_code(self) -> str: - """ - Construct string with all generated test code. - """ - code = "\n".join(self._code) + "\n" - _LOG.debug("code=\n%s", code) - if self._to_file: - hio.to_file(self._test_file, code) - return code - - def run(self, func_output: Any) -> str: - """ - Generate a unit test for the function. - - The unit test compares the actual function output with the expected - `func_output`. - - :param func_output: the expected function output - :return: the code of the unit test - """ - if self._to_file and self._file_exists: - # Imports were added before, so skip. - pass - else: - # Start with imports. - self._add_imports() - # Count if we reached max number of tests generated for a single function. - try: - self._add_test_class() - except IndexError as exception: - # If there are already enough tests, not add anything. - _LOG.warning(str(exception)) - return "" - self._add_var_definitions() - self._add_function_call() - self._check_code(func_output) - return self._gen_code() - - -# ############################################################################# - - -def json_pretty_print(parsed: Any) -> str: - """ - Pretty print a JSON object. - - :param parsed: a JSON object - :return: a prettified JSON object - """ - if isinstance(parsed, str): - parsed = json.loads(parsed) - # `ret = pprint.pformat(parsed) - ret = json.dumps(parsed, indent=4, sort_keys=True) - return ret - - -def round_trip_convert(obj1: Any, log_level: int) -> Any: - """ - Encode and decode with `jsonpickle` ensuring the object remains the same. - - :param obj1: the initial object - :param log_level: the level of logging - :return: the object after encoding and decoding - """ - _LOG.log(log_level, "# obj1=\n%s", obj1) - _LOG.log(log_level, "class=%s", type(obj1)) - # Encode. - frozen = jsonpickle.encode(obj1) - _LOG.log(log_level, "# frozen=\n%s", json_pretty_print(frozen)) - # Decode. - obj2 = jsonpickle.decode(frozen) - _LOG.log(log_level, "# obj2=\n%s", obj2) - _LOG.log(log_level, "class=%s", type(obj1)) - # Check whether the decoded version is the same as the initial object. - if str(type(obj1)).startswith(" Callable: - def wrapper(*args: Any, **kwargs: Any) -> Any: - import helpers.hplayback as hplayba - - playback = hplayba.Playback("assert_equal") - res = func(*args, **kwargs) - code = playback.run(res) - print(code) - return res - - return wrapper(func) - - -# Inline the decorator as: -# -# 1) Rename `target_func` -> `target_func_tmp` -# ``` -# def target_function_tmp(...): -# ... -# ``` -# -# 2) Add wrapper: -# ``` -# def target_function_tmp(...): -# ... -# -# from typing import Any -# -# def target_function(*args: Any, **kwargs: Any) -> Any: -# import helpers.hplayback as hplayb -# playback = hplayb.Playback("assert_equal") -# res = target_func_tmp(*args, **kwargs) -# code = playback.run(res) -# print(code) -# return res -# ``` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py deleted file mode 100644 index 29a504226..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hprint.py +++ /dev/null @@ -1,1076 +0,0 @@ -""" -Import as: - -import helpers.hprint as hprint -""" - -import functools -import inspect -import logging -import pprint -import re -import sys -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union - -import helpers.hdbg as hdbg - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - -# Mute this module unless we want to debug it. -_LOG.setLevel(logging.INFO) - - -# ############################################################################# -# Debug output -# ############################################################################# - -_COLOR_MAP = { - "bold": 1, - # Colors. - "blue": 94, - "green": 92, - "white": 0, - "purple": 95, - "red": 91, - "yellow": 33, - # Blue. - "DEBUG": 34, - # Cyan. - "INFO": 36, - # Yellow. - "WARNING": 33, - # Red. - "ERROR": 31, - # White on red background. - "CRITICAL": 41, -} - - -def color_highlight(text: str, color: str) -> str: - """ - Return a colored string. - """ - prefix = "\033[" - suffix = "\033[0m" - hdbg.dassert_in(color, _COLOR_MAP) - color_code = _COLOR_MAP[color] - txt = f"{prefix}{color_code}m{text}{suffix}" - return txt - - -def clear_screen() -> None: - print((chr(27) + "[2J")) - - -def line(char: Optional[str] = None, num_chars: Optional[int] = None) -> str: - """ - Return a line with the desired character. - """ - char = "#" if char is None else char - num_chars = 80 if num_chars is None else num_chars - return char * num_chars - - -def pprint_pformat(obj: Any, *, sort_dicts: bool = False) -> str: - """ - Pretty-print in color. - """ - from pygments import highlight - from pygments.formatters import Terminal256Formatter - from pygments.lexers import PythonLexer - - txt = pprint.pformat(obj, sort_dicts=sort_dicts) - txt = highlight(txt, PythonLexer(), Terminal256Formatter()) - txt = txt.rstrip() - return txt - - -def pprint_color(obj: Any, *, tag: Optional[str] = None, sep: str = "") -> None: - """ - Pretty-print in color. - """ - txt = "" - if tag is not None: - txt += tag + "= " + sep - txt += pprint_pformat(obj) - print(txt) - - -# TODO(gp): -> Use *args instead of forcing to build a string to simplify the caller. -def frame( - message: str, - *, - char1: Optional[str] = None, - num_chars: Optional[int] = None, - char2: Optional[str] = None, - thickness: int = 1, - level: int = 0, -) -> str: - """ - Print a frame around a message. - - :param message: message to print - :param char1: char for top line of the frame - :param num_chars: how many chars in each line (by default 80 chars) - :param char2: char for bottom line of the frame - :param thickness: how many overlapping lines - - E.g., thickness = 2 - ``` - # #######... - # #######... - # hello - # #######... - # #######... - ``` - :param level: level of framing indent based on `#` char: - - E.g., level = 0 - ``` - #######... - hello - #######... - ``` - - E.g., level = 1 - ``` - # #######... - # hello - # #######... - ``` - """ - hdbg.dassert_isinstance(message, str) - # Fill in the default values. - if char1 is None: - # User didn't specify any char. - char1 = char2 = "#" - elif char1 is not None and char2 is None: - # User specified only one char. - char2 = char1 - elif char1 is None and char2 is not None: - # User specified the second char, but not the first one. - hdbg.dfatal(f"Invalid char1='{char1}' char2='{char2}'") - else: - # User specified both chars. Nothing to do. - pass - num_chars = 80 if num_chars is None else num_chars - # Sanity check. - hdbg.dassert_eq(len(char1), 1) - hdbg.dassert_lte(1, num_chars) - hdbg.dassert_eq(len(char2), 1) - hdbg.dassert_lte(1, thickness) - hdbg.dassert_lte(0, level) - # Build the return value. - prefix = "" - if level: - prefix = "#" * level + " " - ret = ( - (prefix + (line(char1, num_chars) + "\n") * thickness) - + (prefix + message + "\n") - + (prefix + (line(char2, num_chars) + "\n") * thickness) - ).rstrip("\n") - return ret - - -# ############################################################################# - - -StrOrList = Union[str, List[str]] - - -# TODO(gp): Use this everywhere in the codebase to avoid back-and-forth -# transforms between strings and lists of strings. -def split_lines(func: Callable) -> Callable: - """ - A decorator that splits a string input into lines before passing it to the - decorated function which expects a list of lines. - """ - - @functools.wraps(func) - def wrapper(txt: StrOrList, *args: Any, **kwargs: Any) -> StrOrList: - if isinstance(txt, str): - # Split the txt into lines. - lines = txt.splitlines() - is_str = True - else: - # The txt is already a list of lines: pass it as is. - hdbg.dassert_isinstance(txt, list) - lines = txt - is_str = False - # Call the function. - lines = func(lines, *args, **kwargs) - if is_str: - # Join the lines back together. - out = "\n".join(lines) - else: - # The output is already a list of lines. - hdbg.dassert_isinstance(lines, list) - out = lines - return out - - return wrapper - - -@split_lines -def prepend(lines: List[str], prefix: str) -> List[str]: - """ - Add `prefix` before each line of the string `txt`. - """ - hdbg.dassert_isinstance(lines, list) - lines_out = [prefix + curr_line for curr_line in lines] - hdbg.dassert_isinstance(lines_out, list) - return lines_out - - -@split_lines -def indent(lines: List[str], *, num_spaces: int = 2) -> List[str]: - """ - Add `num_spaces` spaces before each line of the passed string. - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_isinstance(num_spaces, int) - hdbg.dassert_lte(0, num_spaces) - spaces = " " * num_spaces - txt_out = [] - for curr_line in lines: - if curr_line.lstrip().rstrip() == "": - # Do not prepend any space to a line with only white characters. - txt_out.append("") - continue - txt_out.append(spaces + curr_line) - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -@split_lines -def strict_split(lines: List[str], max_length: int) -> List[str]: - """ - Split a string into chunks of `max_length` characters. - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_lte(1, max_length) - lines_out = [] - for line in lines: - for i in range(0, len(line), max_length): - lines_out.append(line[i : i + max_length]) - hdbg.dassert_isinstance(lines_out, list) - return lines_out - - -@split_lines -def remove_lead_trail_empty_lines(lines: List[str]) -> List[str]: - """ - Remove consecutive empty lines only at the beginning / end of a string. - """ - hdbg.dassert_isinstance(lines, list) - # Remove leading empty lines. - while lines and not lines[0].strip(): - lines.pop(0) - # Remove trailing empty lines. - while lines and not lines[-1].strip(): - lines.pop() - hdbg.dassert_isinstance(lines, list) - return lines - - -@split_lines -def dedent( - lines: List[str], *, remove_lead_trail_empty_lines_: bool = True -) -> List[str]: - """ - Remove from each line the minimum number of spaces to align the text on the - left. - - It is the opposite of `indent()`. - - :param txt: multi-line string - :param txt: multi-line string - :param remove_lead_trail_empty_lines_: if True, remove all the empty - lines at the beginning and at the end - """ - if remove_lead_trail_empty_lines_: - lines = remove_lead_trail_empty_lines(lines) - # Find the minimum number of leading spaces. - min_num_spaces = None - for curr_line in lines: - _LOG.debug( - "min_num_spaces=%s: curr_line='%s'", min_num_spaces, curr_line - ) - # Skip empty lines. - if curr_line.lstrip().rstrip() == "": - _LOG.debug(" -> Skipping empty line") - continue - m = re.search(r"^(\s*)", curr_line) - hdbg.dassert(m) - # The linter doesn't understand that `dassert` is equivalent to an - # `assert`. - assert m is not None - curr_num_spaces = len(m.group(1)) - _LOG.debug(" -> curr_num_spaces=%s", curr_num_spaces) - if min_num_spaces is None or curr_num_spaces < min_num_spaces: - min_num_spaces = curr_num_spaces - _LOG.debug("min_num_spaces=%s", min_num_spaces) - # Process each line and remove the minimum indentation. - txt_out = [] - for curr_line in lines: - _LOG.debug("curr_line='%s'", curr_line) - # Skip empty lines. - if curr_line.lstrip().rstrip() == "": - txt_out.append("") - continue - hdbg.dassert_lte(min_num_spaces, len(curr_line)) - txt_out.append(curr_line[min_num_spaces:]) - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -@split_lines -def align_on_left(lines: List[str]) -> List[str]: - """ - Remove all leading/trailing spaces for each line. - """ - hdbg.dassert_isinstance(lines, list) - txt_out = [] - for curr_line in lines: - curr_line = curr_line.rstrip(" ").lstrip(" ") - txt_out.append(curr_line) - hdbg.dassert_isinstance(txt_out, list) - return txt_out - - -@split_lines -def remove_empty_lines( - lines: List[str], *, mode: str = "no_empty_lines" -) -> List[str]: - """ - Remove empty lines from a multi-line string. - - :param lines: list of input lines to process - :param mode: - - no_empty_lines: remove all empty lines - - no_consecutive_empty_lines: remove consecutive empty lines - :return: lines with empty lines removed - """ - hdbg.dassert_isinstance(lines, list) - if mode == "no_empty_lines": - lines_out = [line for line in lines if line.rstrip().lstrip() != ""] - elif mode == "no_consecutive_empty_lines": - # If there are two or more consecutive empty lines, remove all but the last one. - lines_out = [] - prev_empty = False - for line in lines: - if re.search(r"^\s*$", line): - if prev_empty: - continue - prev_empty = True - else: - prev_empty = False - lines_out.append(line) - else: - raise ValueError(f"Invalid mode='{mode}'") - hdbg.dassert_isinstance(lines_out, list) - return lines_out - - -def vars_to_debug_string(vars_as_str: List[str], locals_: Dict[str, Any]) -> str: - """ - Create a string with var name -> var value. - - E.g., ["var1", "var2"] is converted into: ``` var1=... var2=... ``` - """ - txt = [] - for var in vars_as_str: - txt.append(var + "=") - txt.append(indent(str(locals_[var]))) - return "\n".join(txt) - - -# ############################################################################# -# Pretty print data structures. -# ############################################################################# - - -def to_object_str(obj: Any) -> str: - class_name = str(obj.__class__.__name__) - hex_str = str(hex(id(obj))) - return f"{class_name} at {hex_str}" - - -def to_object_repr(obj: Any) -> str: - class_module = str(obj.__class__.__module__) - class_name = str(obj.__class__.__name__) - hex_str = str(hex(id(obj))) - return f"<{class_module}.{class_name} at {hex_str}>" - - -def thousand_separator(v: float) -> str: - v = "{0:,}".format(v) - return v - - -# TODO(gp): -> to_percentage -def perc( - a: float, - b: float, - *, - invert: bool = False, - num_digits: int = 2, - only_perc: bool = False, - use_float: bool = False, - only_fraction: bool = False, - use_thousands_separator: bool = False, -) -> Union[str, float]: - """ - Calculate percentage a / b as a string. - - Asserts 0 <= a <= b. If true, returns a/b to `num_digits` decimal places. - - :param a: numerator - :param b: denominator - :param invert: assume the fraction is (b - a) / b - This is useful when we want to compute the complement of a count. - :param num_digits: number of digits to represent the percentage - :param only_perc: return only the percentage, without the fraction - - E.g., "50.00%" vs "10 / 20 = 50.00%" - :param use_float: return the percentage as a float. It requires - `only_perc = True` - :param only_fraction: return only the fraction, without the percentage - - E.g., "10 / 20" vs "10 / 20 = 50.00%" - :param use_thousands_separator: report the numbers using thousands separator - :return: string with a/b - """ - hdbg.dassert_lte(0, a) - hdbg.dassert_lte(a, b) - if invert: - a = b - a - if use_thousands_separator: - a_str = str("{0:,}".format(a)) - b_str = str("{0:,}".format(b)) - else: - a_str = str(a) - b_str = str(b) - # Validate and format the percentage. - hdbg.dassert_lte(0, num_digits) - if only_perc: - fmt = "%." + str(num_digits) + "f" - ret = fmt % (float(a) / b * 100.0) - if use_float: - # 57.27 - ret = float(ret) - else: - # 57.27% - hdbg.dassert_isinstance(ret, str) - ret += "%" - elif only_fraction: - # 4225 / 7377 - ret = f"{a_str} / {b_str}" - else: - # 4225 / 7377 = 57.27% - fmt = "%s / %s = %." + str(num_digits) + "f%%" - ret = fmt % (a_str, b_str, float(a) / b * 100.0) - return ret - - -def round_digits( - v: float, *, num_digits: int = 2, use_thousands_separator: bool = False -) -> str: - """ - Round digit returning a string representing the formatted number. - - :param v: value to convert - :param num_digits: number of digits to represent v on None is - (Default value = 2) - :param use_thousands_separator: use "," to separate thousands - (Default value = False) - :return: str with formatted value - """ - if (num_digits is not None) and isinstance(v, float): - fmt = "%0." + str(num_digits) + "f" - res = float(fmt % v) - else: - res = v - if use_thousands_separator: - res = "{0:,}".format(res) # type: ignore - res_as_str = str(res) - return res_as_str - - -# ############################################################################# -# Logging helpers -# ############################################################################# - - -# TODO(gp): Move this to hdbg.hlogging, but there are dependencies from this file. - -# https://stackoverflow.com/questions/2749796 has some solutions to find the -# name of variables from the caller. - - -_VarNamesType = Optional[Union[str, List[str]]] - - -def _to_var_list(expression: _VarNamesType) -> List[str]: - if isinstance(expression, List): - return expression - hdbg.dassert_isinstance(expression, str) - # If expression is a list of space-separated expressions, convert each in a - # string. - exprs = [v.lstrip().rstrip() for v in expression.split(" ")] - # Remove empty var names. - exprs = [v for v in exprs if v.strip().rstrip() != ""] - hdbg.dassert_isinstance(exprs, list) - hdbg.dassert_lte(1, len(exprs)) - return exprs - - -def to_str( - expression: str, - *, - frame_level: int = 1, - print_lhs: bool = True, - char_separator: str = ",", - mode: str = "repr", -) -> str: - """ - Return a string with the value of a variable / expression / multiple - variables. - - If expression is a space-separated compound expression, convert it into - `exp1=val1, exp2=val2, ...`. - - This is similar to Python 3.8 f-string syntax `f"{foo=} {bar=}"`. - We don't want to force to use Python 3.8 just for this feature. - ``` - > x = 1 - > to_str("x+1") - x+1=2 - ``` - - :param expression: the variable / expression to evaluate and print. - E.g., `to_str("exp1")` is converted into `exp1=val1`. - If expression is a space-separated compound expression, e.g., - `to_str("exp1 exp2 ...")`, it is converted into `exp1=val1, exp2=val2, ...` - :param frame_level: level of the frame to inspect - :param print_lhs: whether we want to print the left hand side (i.e., `exp1`) - :param char_separator: separator between the values of the expressions - when printed (e.g., `,`) - :param mode: select how to print the value of the expressions (e.g., `str`, - `repr`, `pprint`, `pprint_color`) - """ - # TODO(gp): If we pass an object it would be nice to find the name of it. - # E.g., https://github.com/pwwang/python-varname - hdbg.dassert_isinstance(expression, str) - if " " in expression: - exprs = _to_var_list(expression) - # Convert each expression into a value. - _to_str = lambda x: to_str(x, frame_level=frame_level + 2) - values = list(map(_to_str, exprs)) - # Assemble in a return value. - hdbg.dassert_lte(len(char_separator), 1) - sep = char_separator + " " - txt = sep.join(values) - return txt - # Certain expressions are evaluated as literals. - if expression in ("", "->", ":", "=", "\n"): - return expression - # Evaluate the expression. - frame_ = sys._getframe(frame_level) # pylint: disable=protected-access - ret = "" - if print_lhs: - ret += expression + "=" - try: - eval_ = eval(expression, frame_.f_globals, frame_.f_locals) - except Exception as e: - print("expression=''", expression) - raise e - if mode == "str": - ret += str(eval_) - elif mode == "repr": - ret += repr(eval_) - elif mode == "pprint": - ret += "\n" + indent(pprint.pformat(eval_)) - elif mode == "pprint_color": - ret += "\n" + indent(pprint_pformat(eval_)) - else: - raise ValueError(f"Invalid mode='{mode}'") - return ret - - -# TODO(gp): Extend this to work on class methods, static and not. -def _func_signature_to_str( - skip_vars: _VarNamesType, - assert_on_skip_vars_error: bool, - frame_level: int, -) -> Tuple[str, str]: - """ - Return the variables of the caller function as a string. - - Same params as `func_signature_to_str()`. - :return: function name and string with the variables of the caller function - as `var1 var2 ...` - """ - if skip_vars is not None: - skip_vars = _to_var_list(skip_vars) - # Get the caller's frame (i.e., the function that called this function). - caller_frame = inspect.currentframe() - for _ in range(frame_level): - hdbg.dassert_is_not( - caller_frame, None, "caller_frame should not be None" - ) - caller_frame = caller_frame.f_back - hdbg.dassert_is_not( - caller_frame, - None, - "caller_frame should not be None after traversing frames", - ) - caller_function_name = caller_frame.f_code.co_name - # _LOG.debug("caller_function_name=%s", caller_function_name) - # Retrieve the function object from the caller's frame. - caller_function = caller_frame.f_globals.get(caller_function_name, None) - if caller_function: - # Get the function's signature - sig = inspect.signature(caller_function) - var_names = list(sig.parameters.keys()) - if skip_vars: - if assert_on_skip_vars_error: - hdbg.dassert_is_subset(skip_vars, var_names) - var_names = [ - var_name for var_name in var_names if var_name not in skip_vars - ] - vars_str = " ".join(var_names) - else: - raise ValueError("Unable to determine caller function") - return caller_function_name, vars_str - - -def func_signature_to_str( - # We don't use * since we want to keep it simple to call this function. - skip_vars: _VarNamesType = None, - *, - assert_on_skip_vars_error: bool = True, - frame_level: int = 2, -) -> str: - r""" - Return the variables of the caller function as a string. - - Use like: - ``` - _LOG.debug("\n%s", hprint.func_signature_to_str()) - ``` - - :param skip_vars: list of variables to skip - :param assert_on_skip_vars_error: whether to assert if the variables to skip - are not found in the function signature - :param frame_level: level of the frame to inspect. By default we need to - access the frame of the caller of the caller, so frame_level = 2 - """ - # Get the variables. - func_name, func_signature = _func_signature_to_str( - skip_vars, - assert_on_skip_vars_error, - frame_level, - ) - # Get the value of the variables. - val = to_str(func_signature, frame_level=frame_level) - val = f"# {func_name}: {val}" - return val - - -# ############################################################################# - - -def log(logger: logging.Logger, verbosity: int, *vals: Any) -> None: - """ - Log at a certain verbosity. - - `log(_LOG, logging.DEBUG, "ticker", "exchange")` - - is equivalent to statements like: - - ``` - _LOG.debug("%s, %s", to_str("ticker"), to_str("exchange")) - _LOG.debug("ticker=%s, exchange=%s", ticker, exchange) - ``` - """ - logger_verbosity = hdbg.get_logger_verbosity() - # print("verbosity=%s logger_verbosity=%s" % (verbosity, logger_verbosity)) - # We want to avoid the overhead of converting strings, so we evaluate the - # expressions only if we are going to print. - if verbosity >= logger_verbosity: - # We need to increment frame_lev since we are 2 levels deeper in the stack. - _to_str = lambda x: to_str(x, frame_level=3) - num_vals = len(vals) - if num_vals == 1: - fstring = "%s" - vals = _to_str(vals[0]) # type: ignore - else: - fstring = ", ".join(["%s"] * num_vals) - vals = list(map(_to_str, vals)) # type: ignore - logger.log(verbosity, fstring, vals) - - -# TODO(gp): Replace calls to `_LOG.debug("\n%s", hprint.frame(...)` with this. -# TODO(gp): Consider changing the signature from -# _log_frame(_LOG, "hello", verbosity=logger.INFO)) -# to -# _log_frame(_LOG.info, "hello", ...) -# by using the first element as a Callable -def log_frame( - logger: logging.Logger, - fstring: str, - *args: Any, - level: int = 1, - char: str = "#", - verbosity: int = logging.DEBUG, -) -> None: - """ - Log using a frame around the text with different number of leading `#` (or - `char`) to organize the log visually. - - The logging output looks like: - _log_frame(_LOG, "hello", verbosity=logger.INFO)) - ``` - 07:44:51 printing : log_frame : 390 : - # ######################################################################### - # hello - # ######################################################################### - ``` - - :param txt: text to print in a frame - :param level: number of `#` (or `char`) to prepend the logged text - :param char: char to prepend the logged text with - :param verbosity: logging verbosity - """ - hdbg.dassert_isinstance(logger, logging.Logger) - hdbg.dassert_isinstance(fstring, str) - msg = fstring % args - msg = msg.rstrip().lstrip() - msg = frame(msg) - # Prepend a `# `, if needed. - if level > 0: - prefix = level * char + " " - msg = prepend(msg, prefix=prefix) - # Add an empty space. - msg = "\n" + msg - logger.log(verbosity, "%s", msg) - - -# ############################################################################# - - -def type_to_string(type_as_str: str) -> str: - """ - Return a short string representing the type of an object, e.g., - "dataflow.Node" (instead of "class <'dataflow.Node'>") - """ - if isinstance(type_as_str, type): - type_as_str = str(type_as_str) - hdbg.dassert_isinstance(type_as_str, str) - # Remove the extra string from: - # - prefix = " str: - ret = f"({type(obj)}) {obj}" - return ret - - -# ############################################################################# - - -def format_list( - list_: List[Any], - *, - sep: str = " ", - max_n: Optional[int] = None, - tag: Optional[str] = None, -) -> str: - # sep = ", " - if max_n is None: - max_n = 10 - hdbg.dassert_lte(1, max_n) - n = len(list_) - txt = "" - if tag is not None: - txt += f"{tag}: " - txt += f"({n}) " - if n < max_n: - txt += sep.join(map(str, list_)) - else: - num_elems = int(max_n / 2) - hdbg.dassert_lte(1, num_elems) - txt += sep.join(map(str, list_[:num_elems])) - txt += " ... " - # pylint: disable=invalid-unary-operand-type - txt += sep.join(map(str, list_[-num_elems:])) - return txt - - -# TODO(gp): Use format_list(). -def list_to_str( - list_: List, - *, - tag: str = "", - sort: bool = False, - axis: int = 0, - to_string: bool = False, -) -> str: - """ - Print list / index horizontally or vertically. - """ - # TODO(gp): Fix this. - _ = to_string - txt = "" - if axis == 0: - if list_ is None: - txt += f"{tag}: (0) None\n" - else: - # hdbg.dassert_in(type(l), (list, pd.Index, pd.Int64Index)) - vals = list(map(str, list_)) - if sort: - vals = sorted(vals) - txt += f"{tag}: ({len(list_)}) {' '.join(vals)}\n" - elif axis == 1: - txt += f"{tag} ({len(list_)}):\n" - vals = list(map(str, list_)) - if sort: - vals = sorted(vals) - txt += "\n".join(vals) + "\n" - else: - raise ValueError(f"Invalid axis='{axis}'") - return txt - - -def list_to_str2( - vals: List[Any], - *, - sep_char: str = ", ", - enclose_str_char: str = "'", - max_num: Optional[int] = 10, -) -> str: - """ - Convert a list of values into a formatted string representation. - - E.g., [1, "two", 3, 4, 5] -> "5 ['1', 'two', '3', '4', '5']" - - :param vals: values to be converted - :param sep_char: separator to use between elements - :param enclose_str_char: character to enclose each element's string - representation; if empty, elements are not enclosed - :param max_num: maximum number of elements to display in the output - :return: the formatted string representing the list - """ - vals_as_str = list(map(str, vals)) - # Add a str around. - if enclose_str_char: - vals_as_str = [ - enclose_str_char + v + enclose_str_char for v in vals_as_str - ] - # Build the output string with optional truncation. - ret = f"{len(vals)} [" - if max_num is not None and len(vals) > max_num: - hdbg.dassert_lt(1, max_num) - ret += sep_char.join(vals_as_str[: int(max_num / 2)]) - ret += sep_char + "..." + sep_char - ret += sep_char.join(vals_as_str[-int(max_num / 2) :]) - else: - ret += sep_char.join(vals_as_str) - ret += "]" - return ret - - -def set_diff_to_str( - obj1: Iterable, - obj2: Iterable, - *, - obj1_name: str = "obj1", - obj2_name: str = "obj2", - sep_char: str = " ", - add_space: bool = False, -) -> str: - """ - Compute the difference between two sequences of data and return a formatted - string. - - :param obj1: The first iterable object. - :param obj2: The second iterable object. - :param obj1_name: The name to use for the first object in the output string. - :param obj2_name: The name to use for the second object in the output string. - :param sep_char: The character to use for separating elements in the output - string. - :param add_space: Whether to add empty lines to make the output more readable. - :return: A formatted string showing the differences between the two objects. - - Example: - ``` - >>> obj1 = [1, 2, 3, 4] - >>> obj2 = [3, 4, 5, 6] - >>> set_diff_to_str(obj1, obj2, obj1_name="list1", obj2_name="list2") - * list1: (4) 1 2 3 4 - * list2: (4) 3 4 5 6 - * intersect=(2) 3 4 - * list1-list2=(2) 1 2 - * list2-list1=(2) 5 6 - ``` - """ - - def _to_string(obj: Iterable) -> str: - obj = sorted(list(obj)) - if sep_char == "\n": - txt = indent("\n" + sep_char.join(map(str, obj))) - else: - txt = sep_char.join(map(str, obj)) - return txt - - res: List[str] = [] - # obj1. - obj1 = set(obj1) - hdbg.dassert_lte(1, len(obj1)) - res.append(f"* {obj1_name}: ({len(obj1)}) {_to_string(obj1)}") - if add_space: - res.append("") - # obj2. - obj2 = set(obj2) - hdbg.dassert_lte(1, len(obj2)) - res.append(f"* {obj2_name}: ({len(obj2)}) {_to_string(obj2)}") - if add_space: - res.append("") - # obj1 intersect obj2. - intersection = obj1.intersection(obj2) - res.append(f"* intersect=({len(intersection)}) {_to_string(intersection)}") - if add_space: - res.append("") - # obj1 - obj2. - diff = obj1 - obj2 - res.append(f"* {obj1_name}-{obj2_name}=({len(diff)}) {_to_string(diff)}") - if add_space: - res.append("") - # obj2 - obj1. - diff = obj2 - obj1 - res.append(f"* {obj2_name}-{obj1_name}=({len(diff)}) {_to_string(diff)}") - if add_space: - res.append("") - # Join all result lines. - result = "\n".join(res) - return result - - -# ############################################################################# - - -def remove_non_printable_chars(txt: str) -> str: - # From https://stackoverflow.com/questions/14693701 - # 7-bit and 8-bit C1 ANSI sequences - ansi_escape = re.compile( - r""" - \x1B # ESC - (?: # 7-bit C1 Fe (except CSI) - [@-Z\\-_] - | # or [ for CSI, followed by a control sequence - \[ - [0-?]* # Parameter bytes - [ -/]* # Intermediate bytes - [@-~] # Final byte - ) - """, - re.VERBOSE, - ) - txt = ansi_escape.sub("", txt) - return txt - - -# TODO(gp): Maybe move to helpers/hpython.py since it's not about printing. -def sort_dictionary(dict_: Dict) -> Dict: - """ - Sort a dictionary recursively using nested OrderedDict. - """ - import collections - - res = collections.OrderedDict() - for k, v in sorted(dict_.items()): - if isinstance(v, dict): - res[k] = sort_dictionary(v) - else: - res[k] = v - return res - - -def to_pretty_str(obj: Any) -> str: - if isinstance(obj, dict): - res = pprint.pformat(obj) - # import json - # res = json.dumps(obj, indent=4, sort_keys=True) - else: - res = str(obj) - return res - - -# TODO(gp): GSI -> rename remove_lines()? -def filter_text(regex: str, txt: str) -> str: - """ - Remove lines in `txt` that match the regex `regex`. - """ - _LOG.debug("Filtering with '%s'", regex) - if regex is None: - return txt - txt_out = [] - txt_as_arr = txt.split("\n") - for line_ in txt_as_arr: - if re.search(regex, line_): - _LOG.debug("Skipping line='%s'", line_) - continue - txt_out.append(line_) - # We can only remove lines. - hdbg.dassert_lte( - len(txt_out), - len(txt_as_arr), - "txt_out=\n'''%s'''\ntxt=\n'''%s'''", - "\n".join(txt_out), - "\n".join(txt_as_arr), - ) - txt = "\n".join(txt_out) - return txt - - -def dassert_one_trailing_newline(txt: str) -> None: - match = re.search(r"\n*$", txt) - hdbg.dassert(match) - assert match is not None - num_newlines = len(match.group()) - hdbg.dassert_eq( - num_newlines, 0, "num_newlines='%s' txt='%s'", num_newlines, txt - ) - - -def to_info(tag: str, txt: Union[str, List[str]]) -> str: - """ - Return a string with a tag and the text indented. - - :param tag: the tag to add to the text - :param txt: the text to indent - :return: the string with the tag and the text indented - """ - hdbg.dassert_isinstance(tag, str) - hdbg.dassert_isinstance(txt, (str, list)) - txt_tmp = "" - txt_tmp += "# " + tag + "\n" - # Indent the text. - if not isinstance(txt, str): - for t in txt: - hdbg.dassert_isinstance(t, str) - txt = "\n".join(txt) - txt_tmp += indent(txt) - # Ensure that there is a single trailing newline. - txt_tmp = txt_tmp.rstrip("\n") - # txt_tmp += "\n" - # _dassert_one_trailing_newline(txt_tmp) - _LOG.debug("'%s'", txt_tmp) - return txt_tmp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py deleted file mode 100644 index c9cdd7be4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hpytest.py +++ /dev/null @@ -1,266 +0,0 @@ -""" -Import as: - -import helpers.hpytest as hpytest -""" - -import logging -import os -import shutil -import sys -from typing import List, Optional - -import junitparser - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def _pytest_show_artifacts( - dir_name: str, *, tag: Optional[str] = None -) -> List[str]: - hdbg.dassert_ne(dir_name, "") - hdbg.dassert_dir_exists(dir_name) - cd_cmd = f"cd {dir_name} && " - # There might be no pytest artifacts. - abort_on_error = False - file_names: List[str] = [] - # Find pytest artifacts. - cmd = 'find . -name ".pytest_cache" -type d' - _, output_tmp = hsystem.system_to_string( - cd_cmd + cmd, abort_on_error=abort_on_error - ) - file_names.extend(output_tmp.split()) - # - cmd = 'find . -name "__pycache__" -type d' - _, output_tmp = hsystem.system_to_string( - cd_cmd + cmd, abort_on_error=abort_on_error - ) - file_names.extend(output_tmp.split()) - # Find .pyc artifacts. - cmd = 'find . -name "*.pyc" -type f' - _, output_tmp = hsystem.system_to_string( - cd_cmd + cmd, abort_on_error=abort_on_error - ) - file_names.extend(output_tmp.split()) - # Remove empty lines. - file_names = hprint.remove_empty_lines(file_names) - # - if tag is not None: - num_files = len(file_names) - _LOG.info("%s: %d", tag, num_files) - _LOG.debug("\n%s", hprint.indent("\n".join(file_names))) - return file_names # type: ignore - - -def pytest_clean(dir_name: str, preview: bool = False) -> None: - """ - Clean pytest artifacts. - """ - _LOG.warning("Cleaning pytest artifacts") - hdbg.dassert_ne(dir_name, "") - hdbg.dassert_dir_exists(dir_name) - if preview: - _LOG.warning("Preview only: nothing will be deleted") - # Show before cleaning. - file_names = _pytest_show_artifacts(dir_name, tag="Before cleaning") - # Clean. - for f in file_names: - exists = os.path.exists(f) - _LOG.debug("%s -> exists=%s", f, exists) - if exists: - if not preview: - if os.path.isdir(f): - shutil.rmtree(f) - elif os.path.isfile(f): - os.remove(f) - else: - raise ValueError(f"Can't delete {f}") - else: - _LOG.debug("rm %s", f) - # Show after cleaning. - file_names = _pytest_show_artifacts(dir_name, tag="After cleaning") - hdbg.dassert_eq(len(file_names), 0) - - -# ############################################################################# -# JUnitReporter -# ############################################################################# - - -class JUnitReporter: - def __init__(self, xml_file: str): - self.xml_file = xml_file - self.xml_data = None - self.overall_stats = { - "passed": 0, - "failed": 0, - "error": 0, - "skipped": 0, - "total_time": 0.0, - "total_tests": 0, - } - - def _load(self) -> None: - """ - Load the JUnit XML file. - """ - self.xml_data = junitparser.JUnitXml.fromfile(self.xml_file) - - def parse(self): - """ - Parse the JUnit XML file. - """ - try: - self._load() - # Calculate overall statistics. - for suite in self.xml_data: - if isinstance(suite, junitparser.TestSuite): - self.overall_stats["total_time"] += suite.time or 0 - self.overall_stats["total_tests"] += suite.tests or 0 - self.overall_stats["passed"] += ( - (suite.tests or 0) - - (suite.failures or 0) - - (suite.errors or 0) - - (suite.skipped or 0) - ) - self.overall_stats["failed"] += suite.failures or 0 - self.overall_stats["error"] += suite.errors or 0 - self.overall_stats["skipped"] += suite.skipped or 0 - except Exception as e: - print(hprint.color_highlight(f"Error parsing XML file: {e}", "red")) - sys.exit(1) - - def _get_colored_status(self, case: junitparser.TestCase) -> str: - """ - Get the colored status representation of test case. - """ - if not case.result or len(case.result) == 0: - return hprint.color_highlight("PASSED", "green") - result_type = case.result[0].__class__.__name__ - if result_type == "Failure": - return hprint.color_highlight("FAILED", "red") - elif result_type == "Error": - return hprint.color_highlight("ERROR", "red") - elif result_type == "Skipped": - return hprint.color_highlight("SKIPPED", "yellow") - else: - return hprint.color_highlight("PASSED", "green") - - def _print_detailed_results(self): - print(hprint.color_highlight("=" * 70, "bold")) - print( - hprint.color_highlight( - f"collected {self.overall_stats['total_tests']} items", "bold" - ) - ) - for _, suite in enumerate(self.xml_data): - if not isinstance(suite, junitparser.TestSuite): - continue - # Print suite header. - print(f"\n{hprint.color_highlight('=' * 70, 'blue')}") - print(hprint.color_highlight(f"Test: {suite.name}", "bold")) - print( - hprint.color_highlight( - f"Timestamp: {getattr(suite, 'timestamp', 'Unknown')}", - "bold", - ) - ) - print(hprint.color_highlight("-" * 70, "blue")) - # Print each test case. - for case in suite: - if isinstance(case, junitparser.TestCase): - status_display = self._get_colored_status(case) - test_time = getattr(case, "time", 0) or 0 - print( - f" {case.classname}::{case.name} {status_display} ({test_time:.3f}s)" - ) - # Print suite summary. - suite_passed = ( - (suite.tests or 0) - - (suite.failures or 0) - - (suite.errors or 0) - - (suite.skipped or 0) - ) - summary_parts = [] - if suite_passed > 0: - summary_parts.append( - hprint.color_highlight(f"{suite_passed} passed", "green") - ) - if suite.failures and suite.failures > 0: - summary_parts.append( - hprint.color_highlight(f"{suite.failures} failed", "red") - ) - if suite.errors and suite.errors > 0: - summary_parts.append( - hprint.color_highlight(f"{suite.errors} error", "red") - ) - if suite.skipped and suite.skipped > 0: - summary_parts.append( - hprint.color_highlight(f"{suite.skipped} skipped", "WARNING") - ) - suite_summary = ( - ", ".join(summary_parts) if summary_parts else "no tests" - ) - suite_time = getattr(suite, "time", 0) or 0 - print( - hprint.color_highlight( - f"Summary: {suite_summary} in {suite_time:.3f}s", "INFO" - ) - ) - - def _print_final_summary(self): - summary_parts = [] - if self.overall_stats["passed"] > 0: - summary_parts.append( - hprint.color_highlight( - f"{self.overall_stats['passed']} passed", "green" - ) - ) - if self.overall_stats["failed"] > 0: - summary_parts.append( - hprint.color_highlight( - f"{self.overall_stats['failed']} failed", "red" - ) - ) - if self.overall_stats["error"] > 0: - summary_parts.append( - hprint.color_highlight( - f"{self.overall_stats['error']} error", "red" - ) - ) - if self.overall_stats["skipped"] > 0: - summary_parts.append( - hprint.color_highlight( - f"{self.overall_stats['skipped']} skipped", "yellow" - ) - ) - summary_text = ", ".join(summary_parts) if summary_parts else "no tests" - time_text = "in " + hprint.color_highlight( - f"{self.overall_stats['total_time']:.2f}s", "bold" - ) - # Determine overall status - if self.overall_stats["failed"] > 0 or self.overall_stats["error"] > 0: - status_indicator = hprint.color_highlight("FAILED", "red") - elif ( - self.overall_stats["skipped"] > 0 - and self.overall_stats["passed"] == 0 - ): - status_indicator = hprint.color_highlight("SKIPPED", "yellow") - else: - status_indicator = hprint.color_highlight("PASSED", "green") - # Print summary. - print(f"\n{hprint.color_highlight('=' * 70, 'bold')}") - print( - hprint.color_highlight( - f"Summary: {summary_text} {time_text}", "INFO" - ) - ) - print(hprint.color_highlight(f"Result: {status_indicator}", "INFO")) - - def print_summary(self): - self._print_detailed_results() - self._print_final_summary() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py deleted file mode 100644 index 2ee2166f9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hretry.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -Import as: - -import helpers.hretry as hretry -""" - -import asyncio -import functools -import logging -import time -from typing import Any, Tuple - -_LOG = logging.getLogger(__name__) - - -def sync_retry( - num_attempts: int, exceptions: Tuple[Any], retry_delay_in_sec: int = 0 -) -> object: - """ - Decorator retrying the wrapped function/method num_attempts times if the - `exceptions` listed in exceptions are thrown. - - :param num_attempts: the number of times to repeat the wrapped function/method - - The function will be called `num_attempts` times. - :param exceptions: list of exceptions that trigger a retry attempt - :param retry_delay_in_sec: the number of seconds to wait between retry attempts - :return: the result of the wrapped function/method - """ - - def decorator(func) -> object: - @functools.wraps(func) - def retry_wrapper(*args, **kwargs): - attempts_count = 1 - last_exception = None - while attempts_count < num_attempts + 1: - try: - return func(*args, **kwargs) - except exceptions as e: - last_exception = e - _LOG.warning( - "Exception %s thrown when attempting to run %s, attempt " - "%d of %d", - e, - func, - attempts_count, - num_attempts, - ) - attempts_count += 1 - time.sleep(retry_delay_in_sec) - _LOG.error( - "Function %s failed after %d attempts", func, num_attempts - ) - raise last_exception - - return retry_wrapper - - return decorator - - -def async_retry( - num_attempts: int, exceptions: Tuple[Any], retry_delay_in_sec: int = 0 -) -> object: - """ - Same as `sync_retry` decorator but for `async` functions. - """ - - def decorator(func) -> object: - @functools.wraps(func) - async def retry_wrapper(*args, **kwargs): - attempts_count = 1 - last_exception = None - while attempts_count < num_attempts + 1: - try: - return await func(*args, **kwargs) - except exceptions as e: - last_exception = e - _LOG.warning( - "Exception %s thrown when attempting to run %s, attempt " - "%d of %d", - e, - func, - attempts_count, - num_attempts, - ) - attempts_count += 1 - await asyncio.sleep(retry_delay_in_sec) - _LOG.error( - "Function %s failed after %d attempts", func, num_attempts - ) - raise last_exception - - return retry_wrapper - - return decorator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py deleted file mode 100644 index a28914cb7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hs3.py +++ /dev/null @@ -1,1129 +0,0 @@ -""" -Import as: - -import helpers.hs3 as hs3 -""" - -import argparse -import configparser -import copy -import functools -import gzip -import logging -import os -import pathlib -import re -from typing import Any, Dict, List, Optional, Tuple, Union - -_WARNING = "\033[33mWARNING\033[0m" - -try: - import s3fs - - # Handle different versions of s3fs where core module may be at different locations - if hasattr(s3fs, "core"): - from s3fs.core import S3File, S3FileSystem - else: - # In newer versions, classes might be directly in s3fs module - try: - from s3fs import S3File, S3FileSystem - except ImportError: - # Fallback to dynamic import - S3File = getattr(s3fs, "S3File", None) - S3FileSystem = getattr(s3fs, "S3FileSystem", None) -except ModuleNotFoundError: - _module = "s3fs" - print(_WARNING + f": Can't find {_module}: continuing") - # Define dummy classes for type hints when s3fs is not available - s3fs = None - - class S3File: - pass - - class S3FileSystem: - pass - - -# Avoid the following dependency from other `helpers` modules to prevent import cycles. -# import helpers.hpandas as hpandas -# import helpers.hsql as hsql -# import helpers.hunit_test as hunitest - -# To enforce this order of the imports we use the directive for the linter below. -import helpers.hdbg as hdbg # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hintrospection as hintros # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hio as hio # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hprint as hprint # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hserver as hserver # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.hsystem as hsystem # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position -import helpers.htimer as htimer # noqa: E402 module level import not at top of file # pylint: disable=wrong-import-position - -_LOG = logging.getLogger(__name__) - -# AWS Region global constants -# Moved to hs3.py from haws.py due to cyclic imports detected in -# build https://github.com/cryptokaizen/cmamp/actions/runs/10729983412/job/29757600889 -AWS_EUROPE_REGION_1 = "eu-north-1" -AWS_TOKYO_REGION_1 = "ap-northeast-1" -AWS_US_REGION_1 = "us-east-1" -AWS_REGIONS = [AWS_EUROPE_REGION_1, AWS_TOKYO_REGION_1, AWS_US_REGION_1] - -# TODO(gp): @all separate S3 code in `helpers/hs3.py` from authentication and -# AWS profile code in `helpers/aws_authentication.py`. - -# ############################################################################# -# Basic utils. -# ############################################################################# - -AwsProfile = Optional[Union[str, S3FileSystem]] - - -def is_s3_path(s3_path: str) -> bool: - """ - Return whether a path is on an S3 bucket, i.e., if it starts with `s3://`. - """ - hdbg.dassert_isinstance(s3_path, str) - valid = s3_path.startswith("s3://") - if s3_path.startswith("s3://s3://"): - valid = False - return valid - - -def dassert_is_s3_path(s3_path: str) -> None: - """ - Assert if a file is not a S3 path. - """ - hdbg.dassert( - is_s3_path(s3_path), - "Invalid S3 file='%s'", - s3_path, - ) - - -def dassert_is_not_s3_path(s3_path: str) -> None: - """ - Assert if a file is a S3 path. - """ - hdbg.dassert( - not is_s3_path(s3_path), - "Passed an S3 file='%s' when it was not expected", - s3_path, - ) - - -def dassert_is_valid_aws_profile(path: str, aws_profile: AwsProfile) -> None: - """ - Check that the value of `aws_profile` is compatible with the S3 or local - file `path`. - - :param path: S3 or local path - :param aws_profile: AWS profile to use if and only if using an S3 path, - otherwise `None` for local path - """ - if is_s3_path(path): - hdbg.dassert_is_not( - aws_profile, None, "path=%s aws_profile=%s", path, aws_profile - ) - else: - hdbg.dassert_is( - aws_profile, None, "path=%s aws_profile=%s", path, aws_profile - ) - - -# /////////////////////////////////////////////////////////////////////////////// - - -def get_s3fs(aws_profile: AwsProfile) -> S3FileSystem: - """ - Return a `s3fs` object from a given AWS profile. - - :param aws_profile: the name of an AWS profile or a s3fs filesystem - """ - if hserver.is_ig_prod(): - # On IG prod machines we let the Docker container infer the right AWS - # account. - _LOG.warning("Not using AWS profile='%s'", aws_profile) - s3fs_ = S3FileSystem() - else: - if isinstance(aws_profile, str): - # When deploying jobs via ECS the container obtains credentials - # based on passed task role specified in the ECS task-definition, - # refer to: - # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html - if ( - # TODO(heanh): Centralize the list of supported profiles. - aws_profile in ["ck", "csfy"] - and hserver.is_inside_ecs_container() - ): - _LOG.info("Fetching credentials from task IAM role") - s3fs_ = S3FileSystem() - else: - # TODO(heanh): Make this manual extraction of credentials - # code obsoleted. - # From https://stackoverflow.com/questions/62562945 - # aws_credentials = get_aws_credentials(aws_profile) - # _LOG.debug("%s", pprint.pformat(aws_credentials)) - # s3fs_ = S3FileSystem( - # anon=False, - # key=aws_credentials["aws_access_key_id"], - # secret=aws_credentials["aws_secret_access_key"], - # token=aws_credentials["aws_session_token"], - # client_kwargs={"region_name": aws_credentials["aws_region"]}, - # ) - # - # We do not need to extract the credential from the file because - # the config (`~/.aws/config`) and credential - # (`~/.aws/credentials`) are already set. - s3fs_ = S3FileSystem(anon=False, profile=aws_profile) - elif isinstance(aws_profile, S3FileSystem): - s3fs_ = aws_profile - else: - raise ValueError(f"Invalid aws_profile='{aws_profile}'") - return s3fs_ - - -def dassert_path_exists( - path: str, aws_profile: Optional[AwsProfile] = None -) -> None: - """ - Assert if S3 or local path doesn't exist. `aws_profile` is specified if and - only if path is an S3 path. - - :param path: S3 or local path - :param aws_profile: the name of an AWS profile or a s3fs filesystem - """ - dassert_is_valid_aws_profile(path, aws_profile) - if is_s3_path(path): - s3fs_ = get_s3fs(aws_profile) - hdbg.dassert(s3fs_.exists(path), f"S3 path '{path}' doesn't exist!") - else: - hdbg.dassert_path_exists(path) - - -def dassert_path_not_exists( - path: str, aws_profile: Optional[AwsProfile] = None -) -> None: - """ - Assert if S3 or local path exist. `aws_profile` is specified if and only if - path is an S3 path. - - :param path: S3 or local path - :param aws_profile: the name of an AWS profile or a s3fs filesystem - """ - dassert_is_valid_aws_profile(path, aws_profile) - if is_s3_path(path): - s3fs_ = get_s3fs(aws_profile) - hdbg.dassert(not s3fs_.exists(path), f"S3 path '{path}' already exist!") - else: - hdbg.dassert_path_not_exists(path) - - -# TODO(gp): Consider using `s3fs.split_path`. -def split_path(s3_path: str) -> Tuple[str, str]: - """ - Separate an S3 path in the bucket and the rest of the path as absolute from - the root. - - E.g., for `s3://alphamatic-data/tmp/hello` returns (`alphamatic- - data`, /tmp/hello`) - """ - dassert_is_s3_path(s3_path) - # Remove the s3 prefix. - prefix = "s3://" - hdbg.dassert(s3_path.startswith(prefix)) - s3_path = s3_path[len(prefix) :] - # Break the path into dirs. - dirs = s3_path.split("/") - bucket = dirs[0] - abs_path = os.path.join("/", *dirs[1:]) - hdbg.dassert( - abs_path.startswith("/"), - "The path should be absolute instead of %s", - abs_path, - ) - return bucket, abs_path - - -def _replace_star_with_double_star(pattern_to_modify: str) -> str: - """ - Replace a single star with a double star in a pattern. - - Originally we simply used to do `pattern.replace("*", "**")`. - but in the newer versions of `s3fs` this is not allowed: - `ValueError: Invalid pattern: '**' can - only be an entire path component` - - We also need to take care of special such as: - *.csv* -> **/*.csv* - - Examples: - s3://bucket/*/path/* -> s3://bucket/**/*/path/**/* - s3://bucket/*/path/csv* -> s3://bucket/**/*/path/csv* - - :param pattern_to_modify: pattern to replace wildcards in - :return: pattern with wildcards replaced - """ - append_wildcard = False - # Handle the special case of ending with wildcard - # (e.g.: *.csv*). - if re.match(r"(?=.*[a-zA-Z0-9]).*\*$", pattern_to_modify): - pattern_to_modify = pattern_to_modify[:-1] - append_wildcard = True - new_pattern = pattern_to_modify.replace("*", "**/*") - new_pattern = new_pattern + "*" if append_wildcard else new_pattern - return new_pattern - - -def listdir( - dir_name: str, - pattern: str, - only_files: bool, - use_relative_paths: bool, - *, - exclude_git_dirs: bool = True, - aws_profile: Optional[AwsProfile] = None, - maxdepth: Optional[int] = None, -) -> List[str]: - """ - Counterpart to `hio.listdir` with S3 support. - - :param dir_name: S3 or local path - :param aws_profile: AWS profile to use if and only if using an S3 path, - otherwise `None` for local path - :param maxdepth: limit the depth of directory traversal - """ - dassert_is_valid_aws_profile(dir_name, aws_profile) - _LOG.debug("pattern=%s", pattern) - if is_s3_path(dir_name): - s3fs_ = get_s3fs(aws_profile) - dassert_path_exists(dir_name, s3fs_) - # Ensure that there are no multiple stars in pattern. - hdbg.dassert_not_in("**", pattern) - # `hio.listdir` is using `find` which looks for files and directories - # descending recursively in the directory. - # One star in glob will use `maxdepth=1`. - pattern = _replace_star_with_double_star(pattern) - _LOG.debug("pattern=%s", pattern) - # Detailed S3 objects in dict form with metadata. - path_objects = s3fs_.glob( - f"{dir_name}/{pattern}", detail=True, maxdepth=maxdepth - ) - if only_files: - # Original `path_objects` must not be changed during loop. - temp_path_objects = copy.deepcopy(list(path_objects.values())) - # Use metadata to distinguish files from directories without - # calling `s3fs_.isdir/isfile`. - for path_object in temp_path_objects: - if path_object["type"] != "file": - path_objects.pop(path_object["Key"]) - paths = list(path_objects.keys()) - if exclude_git_dirs: - paths = [ - path for path in paths if ".git" not in pathlib.Path(path).parts - ] - bucket, absolute_path = split_path(dir_name) - # Basically the goal is to remove `s3://` from the full S3 path. - root_path = f"{bucket}{absolute_path}" - # Remove redundant separators. - paths = {os.path.normpath(path) for path in paths} - # Remove special entries such as `.` (`root_path` in this case) and - # bucket name to keep the same return format as in `hio.listdir()`. - paths_to_exclude = [bucket, root_path] - paths = [path for path in paths if path not in paths_to_exclude] - if use_relative_paths: - paths = [os.path.relpath(path, start=root_path) for path in paths] - else: - paths = hio.listdir( - dir_name, - pattern, - only_files, - use_relative_paths, - exclude_git_dirs=exclude_git_dirs, - maxdepth=maxdepth, - ) - return paths - - -def du( - path: str, - *, - human_format: bool = False, - aws_profile: Optional[AwsProfile] = None, -) -> Union[int, str]: - """ - Counterpart to `hsystem.du` with S3 support. - - If and only if `aws_profile` is specified, S3 is used instead of - local filesystem. - """ - dassert_is_valid_aws_profile(path, aws_profile) - if is_s3_path(path): - s3fs_ = get_s3fs(aws_profile) - dassert_path_exists(path, s3fs_) - size: Union[int, str] = s3fs_.du(path) - if human_format: - size = hintros.format_size(size) - else: - size = hsystem.du(path, human_format=human_format) - return size - - -def to_file( - lines: str, - file_name: str, - *, - mode: Optional[str] = None, - force_flush: bool = False, - aws_profile: Optional[AwsProfile] = None, -) -> None: - """ - Counterpart to `hio.to_file` with S3 support. - - If and only if `aws_profile` is specified, S3 is used instead of - local filesystem. - """ - dassert_is_valid_aws_profile(file_name, aws_profile) - if is_s3_path(file_name): - # Ensure that `bytes` is used. - if mode is not None and "b" not in mode: - raise ValueError("S3 only allows binary mode!") - hdbg.dassert_isinstance(lines, str) - # Convert lines to bytes, only supported mode for S3. - # Also create a list of new lines as raw bytes is not supported. - os_sep = os.linesep - lines_lst = [f"{line}{os_sep}".encode() for line in lines.split(os_sep)] - # Inspect file name and path. - hio.dassert_is_valid_file_name(file_name) - s3fs_ = get_s3fs(aws_profile) - mode = "wb" if mode is None else mode - # Open S3 file. `rb` is the default mode for S3. - with s3fs_.open(file_name, mode) as s3_file: - if file_name.endswith((".gz", ".gzip")): - # Open and decompress gzipped file. - with gzip.GzipFile(fileobj=s3_file) as gzip_file: - gzip_file.writelines(lines_lst) - else: - # Any other file. - s3_file.writelines(lines_lst) - if force_flush: - # TODO(Nikola): Investigate S3 alternative for `os.fsync(f.fileno())`. - s3_file.flush() - else: - use_gzip = file_name.endswith((".gz", ".gzip")) - hio.to_file( - file_name, - lines, - mode=mode, - use_gzip=use_gzip, - force_flush=force_flush, - ) - - -def from_file( - file_name: str, - encoding: Optional[Any] = None, - aws_profile: Optional[AwsProfile] = None, -) -> str: - """ - Counterpart to `hio.from_file` with S3 support. - - If and only if `aws_profile` is specified, S3 is used instead of - local filesystem. - """ - dassert_is_valid_aws_profile(file_name, aws_profile) - if is_s3_path(file_name): - if encoding: - raise ValueError("Encoding is not supported when reading from S3!") - # Inspect file name and path. - hio.dassert_is_valid_file_name(file_name) - s3fs_ = get_s3fs(aws_profile) - dassert_path_exists(file_name, s3fs_) - # Open s3 file. - with s3fs_.open(file_name) as s3_file: - if file_name.endswith((".gz", ".gzip")): - # Open and decompress gzipped file. - with gzip.GzipFile(fileobj=s3_file) as gzip_file: - data = gzip_file.read().decode() - else: - # Any other file. - data = s3_file.read().decode() - else: - data = hio.from_file(file_name, encoding=encoding) - return data - - -# TODO(Nina): consider adding support for handling dirs. -# TODO(Grisha): consider extending for the regular file system. -def copy_file_to_s3( - file_path: str, - s3_dst_file_path: str, - aws_profile: str, -) -> None: - """ - Copy a local file to S3. - - :param file_path: path to a file to copy - :param s3_dst_file_path: S3 path to copy to - :param aws_profile: aws profile - """ - hdbg.dassert_file_exists(file_path) - dassert_is_s3_path(s3_dst_file_path) - dassert_is_valid_aws_profile(s3_dst_file_path, aws_profile) - aws_s3_cp_cmd = f"aws s3 cp {file_path} {s3_dst_file_path}" - if not hserver.is_inside_ecs_container(): - # There is no `~/.aws/credentials` file inside an ECS container - # but the AWS credentials are received via a task role. So - # no need to pass the profile option. - aws_s3_cp_cmd += f" --profile {aws_profile}" - _LOG.info("Copying from %s to %s", file_path, s3_dst_file_path) - hsystem.system(aws_s3_cp_cmd, suppress_output=False) - - -def get_local_or_s3_stream( - file_name: str, **kwargs: Any -) -> Tuple[Union[S3FileSystem, str], Any]: - """ - Get S3 stream for desired file or simply returns file name. - - :param file_name: file name or full path to file - """ - _LOG.debug(hprint.to_str("file_name kwargs")) - # Handle the s3fs param, if needed. - if is_s3_path(file_name): - # For S3 files we need to have an `s3fs` parameter. - hdbg.dassert_in( - "s3fs", - kwargs, - "Credentials through s3fs are needed to access an S3 path", - ) - s3fs_ = kwargs.pop("s3fs") - hdbg.dassert_isinstance(s3fs_, S3FileSystem) - dassert_path_exists(file_name, s3fs_) - stream = s3fs_.open(file_name) - else: - if "s3fs" in kwargs: - _LOG.warning("Passed `s3fs` without an S3 file: ignoring it") - _ = kwargs.pop("s3fs") - hdbg.dassert_file_exists(file_name) - stream = file_name - return stream, kwargs - - -# ############################################################################# -# AWS. -# ############################################################################# - - -def _get_aws_config(file_name: str) -> configparser.RawConfigParser: - """ - Return a parser to the config in `~/.aws/{file_name}`. - """ - file_name = os.path.join(os.path.expanduser("~"), ".aws", file_name) - hdbg.dassert_file_exists(file_name) - # Read the config. - config = configparser.RawConfigParser() - config.read(file_name) - _LOG.debug("config.sections=%s", config.sections()) - return config - - -# ############################################################################# -# Authentication. -# ############################################################################# - -# Architecture of the AWS authentication -# -# - There can be two or more AWS S3 systems with different credentials, paths to -# bucket, and other properties -# - Some code needs to refer always and only to a specific S3 bucket -# - E.g., AM S3 bucket for Kibot data -# - Other code needs to work with different AWS S3 systems -# - E.g., `publish_notebooks`, saving / retrieving experiments, caching -# -# - The desired AWS S3 systems are selected through an `aws_profile` parameter -# (e.g., `ck`) -# - The value of AWS profile is obtained from -# - the `--aws_profile` command line option; or -# - a client specifying the needed `aws_profile` -# -# - The AWS profile is then used to access the `~/.aws` files and extract: -# - the credentials (e.g., `aws_access_key_id`, `aws_secret_access_key`, -# `aws_region`) -# - other variables (e.g., `aws_s3_bucket`) -# - The variables that are extracted from the files are passed through env vars -# directly for GitHub Actions CI -# - One can specify env vars conditioned to different profiles using the AWS -# profile -# - E.g., `ck` profile for `AWS_ACCESS_KEY_ID` corresponds to -# `CSFY_AWS_ACCESS_KEY_ID` - - -@functools.lru_cache() -def get_aws_credentials( - aws_profile: str, -) -> Dict[str, Optional[str]]: - """ - Read the AWS credentials for a given profile from `~/.aws` or from env - vars. - - :return: a dictionary with `access_key_id`, `aws_secret_access_key`, - `aws_region` and optionally `aws_session_token` - """ - _LOG.debug("Getting credentials for aws_profile='%s'", aws_profile) - if aws_profile == "__mock__": - # `mock` profile is artificial construct used only in tests. - aws_profile = aws_profile.strip("__") - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - # profile_prefix = aws_profile.upper() - profile_prefix = ( - "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() - ) - result: Dict[str, Optional[str]] = {} - if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: - key_to_env_var: Dict[str, str] = { - "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - "aws_session_token": f"{profile_prefix}_AWS_SESSION_TOKEN", - # TODO(gp): AWS_DEFAULT_REGION -> AWS_REGION so we can use the invariant - # that the var is simply the capitalized version of the key. - "aws_region": f"{profile_prefix}_AWS_DEFAULT_REGION", - } - else: - key_to_env_var: Dict[str, str] = { - "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - # TODO(gp): AWS_DEFAULT_REGION -> AWS_REGION so we can use the invariant - # that the var is simply the capitalized version of the key. - "aws_region": f"{profile_prefix}_AWS_DEFAULT_REGION", - } - # If all the AWS credentials are passed through env vars, they override the - # config file. - env_var_override = False - set_env_vars = [ - (env_var in os.environ and os.environ[env_var] != "") - for env_var in sorted(key_to_env_var.values()) - ] - if any(set_env_vars): - if not all(set_env_vars): - _LOG.warning( - "Some but not all AWS env vars are set (%s): ignoring", - str(set_env_vars), - ) - else: - env_var_override = True - if env_var_override: - _LOG.debug("Using AWS credentials from env vars") - # If one variable is defined all should be defined. - for key, env_var in key_to_env_var.items(): - _LOG.debug("'%s' in env vars=%s", env_var, env_var in os.environ) - _LOG.debug( - "'%s' != ''=%s", env_var, os.environ.get(env_var, None) != "" - ) - hdbg.dassert_in(env_var, os.environ) - result[key] = os.environ[env_var] - if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: - result["aws_session_token"] = os.environ[ - f"{profile_prefix}_AWS_SESSION_TOKEN" - ] - else: - result["aws_session_token"] = None - else: - _LOG.debug("Using AWS credentials from files") - # > more ~/.aws/credentials - # [am] - # aws_access_key_id=AKI... - # aws_secret_access_key=mhg.. - # aws_session_token = Fwo... - file_name = "credentials" - config = _get_aws_config(file_name) - # - key = "aws_access_key_id" - result[key] = config.get(aws_profile, key) - # - key = "aws_secret_access_key" - result[key] = config.get(aws_profile, key) - # - key = "aws_session_token" - if config.has_option(aws_profile, key): - result[key] = config.get(aws_profile, key) - else: - result[key] = None - # - key = "aws_s3_bucket" - if config.has_option(aws_profile, key): - result[key] = config.get(aws_profile, key) - else: - result[key] = None - # > more ~/.aws/config - # [am] - # region = us-east-1 - file_name = "config" - config = _get_aws_config(file_name) - key = "aws_region" - # For ~/.aws/config the tag is `profile am` instead of `am`. - result[key] = config.get(f"profile {aws_profile}", "region") - # - hdbg.dassert_is_subset(key_to_env_var.keys(), result.keys()) - return result - - -# ############################################################################# -# Bucket -# ############################################################################# - - -# TODO(Nikola): CmTask #1810 "Increase test coverage in helpers/hs3.py" -def get_s3_bucket_path(aws_profile: str, add_s3_prefix: bool = True) -> str: - """ - Return the S3 bucket from environment variable corresponding to a given - `aws_profile`. - - E.g., `aws_profile="am"` uses the value in `AM_AWS_S3_BUCKET` which - is usually set to `s3://alphamatic-data`. - """ - hdbg.dassert_type_is(aws_profile, str) - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - prefix = aws_profile.upper() - prefix = ( - "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() - ) - env_var = f"{prefix}_AWS_S3_BUCKET" - if env_var in os.environ: - _LOG.debug("No env var '%s'", env_var) - s3_bucket = os.environ[env_var] - else: - # Fall-back to local credentials. - _LOG.debug("Checking credentials") - aws_credentials = get_aws_credentials(aws_profile) - _LOG.debug("%s", aws_credentials) - s3_bucket = aws_credentials.get("aws_s3_bucket", "") - hdbg.dassert_ne(s3_bucket, "") - hdbg.dassert( - not s3_bucket.startswith("s3://"), - "Invalid %s value '%s'", - env_var, - s3_bucket, - ) - if add_s3_prefix: - s3_bucket = "s3://" + s3_bucket - return s3_bucket - - -# TODO(sonaal): Do we really need aws profile as argument or -# we can use default? Ref. https://github.com/cryptokaizen/cmamp/pull/6045#discussion_r1380392748 -def get_s3_bucket_path_unit_test( - aws_profile: str, *, add_s3_prefix: bool = True -) -> str: - if aws_profile == "ck": - s3_bucket = "cryptokaizen-unit-test" - else: - hdbg.dfatal(f"Invalid aws_profile={aws_profile}") - if add_s3_prefix: - s3_bucket = "s3://" + s3_bucket - return s3_bucket - - -def get_latest_pq_in_s3_dir(s3_path: str, aws_profile: str) -> str: - """ - Get the latest Parquet file in the specified directory. - - :param s3_path: the path to s3 directory, e.g. - `cryptokaizen-data/reorg/daily_staged.airflow.pq/bid_ask/crypto_chassis.downloaded_1sec/binance` - :param aws_profile: AWS profile to use - :return: the path to the latest Parquet file in the directory, - E.g. `cryptokaizen-data/reorg/daily_staged.airflow.pq/bid_ask/crypto_chassis.downloaded_1sec/binance/ - currency_pair=ETH_USDT/year=2022/month=12/data.parquet` - """ - hdbg.dassert_type_is(aws_profile, str) - s3fs_ = get_s3fs(aws_profile) - dir_name = f"{s3_path}/**/*.parquet" - pq_files = s3fs_.glob(dir_name, detail=True) - hdbg.dassert_lte(1, len(pq_files), "dir_name=%s", dir_name) - _LOG.debug("pq_files=%s", pq_files) - # Sort the files by the date they were modified for the last time. - sorted_files = sorted( - pq_files.items(), key=lambda t: t[1]["LastModified"], reverse=True - ) - # Get the path to the latest file. - latest_file_path = sorted_files[0][0] - return latest_file_path - - -# ############################################################################# -# Parser. -# ############################################################################# - - -def add_s3_args(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: - """ - Add the command line options for the AWS credentials. - """ - parser.add_argument( - "--aws_profile", - action="store", - type=str, - help="The AWS profile to use for `.aws/credentials` or for env vars", - ) - parser.add_argument( - "--s3_path", - action="store", - type=str, - default=None, - help="Full S3 dir path to use (e.g., `s3://alphamatic-data/foobar/`), " - "overriding any other setting", - ) - return parser - - -def _dassert_all_env_vars_set(key_to_env_var: Dict[str, str]) -> None: - """ - Check that the required AWS env vars are set and are not empty strings. - """ - for v in key_to_env_var.values(): - hdbg.dassert_in(v, os.environ) - hdbg.dassert_ne(v, "") - - -def _get_aws_file_text(key_to_env_var: Dict[str, str]) -> List[str]: - """ - Generate text from env vars for AWS files. - - E.g.: - ``` - aws_access_key_id=*** # gitleaks:allow - aws_secret_access_key=*** # gitleaks:allow - aws_s3_bucket=*** - ``` - :param key_to_env_var: aws settings names to the corresponding env - var names mapping - :return: AWS file text - """ - txt = [] - for k, v in key_to_env_var.items(): - line = f"{k}={os.environ[v]}" - txt.append(line) - return txt - - -def _get_aws_config_text(aws_profile: str) -> str: - """ - Generate text for the AWS config file, i.e. ".aws/config". - """ - # Set which env vars we need to get. - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - # profile_prefix = aws_profile.upper() - profile_prefix = ( - "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() - ) - region_env_var = f"{profile_prefix}_AWS_DEFAULT_REGION" - key_to_env_var = {"region": region_env_var} - # Check that env vars are set. - _dassert_all_env_vars_set(key_to_env_var) - text = _get_aws_file_text(key_to_env_var) - text.insert(0, f"[profile {aws_profile}]") - text = "\n".join(text) - return text - - -def _get_aws_credentials_text(aws_profile: str) -> str: - """ - Generate text for the AWS credentials file, i.e. ".aws/credentials". - """ - # Set which env vars we need to get. - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - # profile_prefix = aws_profile.upper() - profile_prefix = ( - "CSFY" if aws_profile.upper() in ["AM", "CK"] else aws_profile.upper() - ) - # Check if AWS session token is set in environment variable. - if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: - key_to_env_var = { - "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - "aws_session_token": f"{profile_prefix}_AWS_SESSION_TOKEN", - # TODO(heanh): Is this needed? - "aws_s3_bucket": f"{profile_prefix}_AWS_S3_BUCKET", - } - else: - key_to_env_var = { - "aws_access_key_id": f"{profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - "aws_secret_access_key": f"{profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - # TODO(heanh): Is this needed? - "aws_s3_bucket": f"{profile_prefix}_AWS_S3_BUCKET", - } - # Check that env vars are set. - _dassert_all_env_vars_set(key_to_env_var) - text = _get_aws_file_text(key_to_env_var) - text.insert(0, f"[{aws_profile}]") - text = "\n".join(text) - return text - - -def generate_aws_files( - home_dir: str = "~", - aws_profiles: Optional[List[str]] = None, -) -> None: - """ - Generate AWS configuration files. - - This is needed to use the AWS CLI and the `boto3` library when we are in CI. - """ - if home_dir == "~": - home_dir = os.path.expanduser(home_dir) - config_file_name = os.path.join(home_dir, ".aws", "config") - credentials_file_name = os.path.join(home_dir, ".aws", "credentials") - # Check if the files already exist. - if os.path.exists(credentials_file_name) and os.path.exists( - config_file_name - ): - _LOG.info( - "Both files exist: %s and %s; exiting", - credentials_file_name, - config_file_name, - ) - return - if aws_profiles is None: - aws_profiles = ["ck"] - config_file_text = [] - credentials_file_text = [] - # Get text with settings for both files. - for profile in aws_profiles: - current_config_text = _get_aws_config_text(profile) - config_file_text.append(current_config_text) - current_credentials_text = _get_aws_credentials_text(profile) - credentials_file_text.append(current_credentials_text) - # Create both files. - config_file_text = "\n\n".join(config_file_text) - hio.to_file(config_file_name, config_file_text) - _LOG.debug("Saved AWS config to %s", config_file_name) - - # - credentials_file_text = "\n\n".join(credentials_file_text) - hio.to_file(credentials_file_name, credentials_file_text) - _LOG.debug("Saved AWS credentials to %s", credentials_file_name) - - -# ############################################################################# -# Archive and retrieve data from S3. -# ############################################################################# - - -# TODO(gp): -> helpers/aws_utils.py - - -def archive_data_on_s3( - src_dir: str, s3_path: str, aws_profile: Optional[str], tag: str = "" -) -> str: - """ - Compress dir `src_dir` and save it on AWS S3 under `s3_path`. - - A timestamp and a tag is added to make the name more informative. - The tgz is created so that when expanded a dir with the name `src_dir` is - created. - - :param src_dir: directory that will be compressed - :param s3_path: full S3 path starting with `s3://` - :param aws_profile: the profile to use. We use a string and not an - `AwsProfile` since this is typically the outermost caller in the stack, - and it doesn't reuse an S3 fs object - :param tag: a tag to add to the name of the file - """ - _LOG.info( - "# Archiving '%s' to '%s' with aws_profile='%s'", - src_dir, - s3_path, - aws_profile, - ) - hdbg.dassert_dir_exists(src_dir) - dassert_is_s3_path(s3_path) - _LOG.info( - "The size of '%s' is %s", - src_dir, - hsystem.du(src_dir, human_format=True), - ) - # Add a timestamp if needed. - dst_path = hsystem.append_timestamp_tag(src_dir, tag) + ".tgz" - # Compress the dir. - # > (cd .../TestRunExperimentArchiveOnS3.test_serial1; \ - # tar cvzf /app/.../TestRunExperimentArchiveOnS3.test_serial1.tgz experiment.RH1E) - # experiment.RH1E/ - # experiment.RH1E/log.20210802-123758.txt - # experiment.RH1E/output_metadata.json - # ... - _LOG.debug("Destination path is '%s'", dst_path) - with htimer.TimedScope(logging.INFO, "Compressing"): - dir_name = os.path.dirname(src_dir) - base_name = os.path.basename(src_dir) - hdbg.dassert_ne(base_name, "", "src_dir=%s", src_dir) - cmd = "" - if dir_name != "": - cmd += f"cd {dir_name} && " - cmd += f"tar czf {dst_path} {base_name}" - hsystem.system(cmd) - _LOG.info( - "The size of '%s' is %s", - dst_path, - hsystem.du(dst_path, human_format=True), - ) - # Test expanding the tgz. The package should expand to the original dir. - # > tar tf /app/.../TestRunExperimentArchiveOnS3.test_serial1.tgz - # experiment.RH1E/ - # experiment.RH1E/log.20210802-123758.txt - # experiment.RH1E/output_metadata.json - _LOG.info("Testing archive") - cmd = f"tar tvf {dst_path}" - hsystem.system(cmd, log_level=logging.INFO, suppress_output=False) - # Copy to S3. - s3_file_path = os.path.join(s3_path, os.path.basename(dst_path)) - _LOG.info("Copying '%s' to '%s'", dst_path, s3_file_path) - hdbg.dassert_file_exists(dst_path) - s3fs_ = get_s3fs(aws_profile) - # TODO(gp): Make sure the S3 dir exists. - s3fs_.put(dst_path, s3_file_path) - _LOG.info("Data archived on S3 to '%s'", s3_file_path) - return s3_file_path - - -def copy_data_from_s3_to_local_dir( - src_s3_dir: str, dst_local_dir: str, aws_profile: str -) -> None: - """ - Copy data from S3 to a local dir. - - :param src_s3_dir: path on S3 storing the data to copy - :param scratch_space_path: local path on scratch space - :param aws_profile: AWS profile to use - """ - _LOG.debug( - "Copying input data from %s to %s", - src_s3_dir, - dst_local_dir, - ) - cmd = f"aws s3 sync {src_s3_dir} {dst_local_dir} --profile {aws_profile}" - hsystem.system(cmd, suppress_output=False, log_level="echo") - - -def retrieve_archived_data_from_s3( - s3_file_path: str, - dst_dir: str, - aws_profile: Optional[str] = None, - incremental: bool = True, -) -> str: - """ - Retrieve tgz file from S3, unless it's already present (incremental mode). - - :param s3_file_path: path to the S3 file with the archived data. E.g., - `s3://.../experiment.20210802-121908.tgz` - :param dst_dir: destination directory where to save the data - :param aws_profile: the profile to use. We use a string and not an - `AwsProfile` since this is typically the outermost caller in the stack, - and it doesn't reuse an S3 fs object - :param incremental: skip if the tgz file is already present locally - :return: path with the local tgz file - """ - _LOG.info( - "# Retrieving archive from '%s' to '%s' with aws_profile='%s'", - s3_file_path, - dst_dir, - aws_profile, - ) - dassert_is_s3_path(s3_file_path) - # Download the tgz file. - hio.create_dir(dst_dir, incremental=True) - dst_file = os.path.join(dst_dir, os.path.basename(s3_file_path)) - _LOG.debug(hprint.to_str("s3_file_path dst_dir dst_file")) - if incremental and os.path.exists(dst_file): - _LOG.warning("Found '%s': skipping downloading", dst_file) - else: - # Download. - s3fs_ = get_s3fs(aws_profile) - dassert_path_exists(s3_file_path, s3fs_) - _LOG.debug("Getting from s3: '%s' -> '%s", s3_file_path, dst_file) - s3fs_.get(s3_file_path, dst_file) - _LOG.info("Saved to '%s'", dst_file) - return dst_file - - -def expand_archived_data(src_tgz_file: str, dst_dir: str) -> str: - """ - Expand an S3 tarball storing results of an experiment. - - E.g., - - given a tgz file like `s3://.../experiment.20210802-121908.tgz` (which is the - result of compressing a dir like `/app/.../experiment.RH1E`) - - expand it into a dir `{dst_dir}/experiment.RH1E` - - :param src_tgz_file: path to the local file with the archived data. E.g., - `/.../experiment.20210802-121908.tgz` - :param dst_dir: directory where expand the archive tarball - :return: dir with the expanded data (e.g., `{dst_dir/experiment.RH1E`) - """ - _LOG.debug("Expanding '%s'", src_tgz_file) - # Get the name of the including dir, e.g., `experiment.RH1E`. - cmd = f"cd {dst_dir} && tar tzf {src_tgz_file} | head -1" - rc, enclosing_tgz_dir_name = hsystem.system_to_one_line(cmd) - _ = rc - _LOG.debug(hprint.to_str("enclosing_tgz_dir_name")) - tgz_dst_dir = os.path.join(dst_dir, enclosing_tgz_dir_name) - if os.path.exists(tgz_dst_dir): - hdbg.dassert_dir_exists(dst_dir) - _LOG.info( - "While expanding '%s' dst dir '%s' already exists: skipping", - src_tgz_file, - tgz_dst_dir, - ) - else: - # Expand the tgz file. - # The output should be the original compressed dir under `{dst_dir}`. - # E.g., - # > tar tzf /app/.../experiment.20210802-133901.tgz - # experiment.RH1E/ - # experiment.RH1E/log.20210802-133859.txt - # experiment.RH1E/result_0/ - with htimer.TimedScope(logging.INFO, "Decompressing"): - hdbg.dassert_file_exists(src_tgz_file) - cmd = f"cd {dst_dir} && tar xzf {src_tgz_file}" - hsystem.system(cmd) - hdbg.dassert_dir_exists(tgz_dst_dir) - # Return `{dst_dir}/experiment.RH1E`. - return tgz_dst_dir - - -def get_s3_bucket_from_stage( - stage: str, *, add_suffix: Optional[str] = None -) -> str: - """ - Retrieve the S3 bucket name based on the provided deployment stage. - - :param stage: the deployment stage, which can be 'test', 'preprod', - or 'prod'. - :param add_suffix: optional suffix to append to the bucket name. - :return: return corresponding S3 bucket name. - """ - # Mapping of stages to their respective S3 bucket names. - _S3_BUCKET_BY_STAGE = { - "test": "cryptokaizen-data-test", - "preprod": "cryptokaizen-data.preprod", - "prod": "cryptokaizen-data", - } - # TODO(Juraj): hack applied until a solution for #CmTask6620 is found. - # Retrieve the region from the environment variable or use the default region 'eu-north-1'. - region = os.environ.get("CSFY_AWS_DEFAULT_REGION", "eu-north-1") - # TODO(Juraj): hack applied until a solution for #CmTask6620 is found. - if region == "ap-northeast-1": - _S3_BUCKET_BY_STAGE["preprod"] = "cryptokaizen-data-tokyo.preprod" - # Ensure the provided stage is valid. - hdbg.dassert_in(stage, _S3_BUCKET_BY_STAGE) - s3_bucket = _S3_BUCKET_BY_STAGE[stage] - # Append the suffix to the bucket name if provided. - if add_suffix: - s3_bucket = os.path.join(s3_bucket, add_suffix) - return s3_bucket diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py deleted file mode 100644 index f86f50342..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsecrets.py +++ /dev/null @@ -1,233 +0,0 @@ -""" -Import as: - -import helpers.hsecrets as hsecret -""" - -import atexit -import json -import sys -import warnings -from typing import Any, Dict, Optional - -from botocore.client import BaseClient -from botocore.exceptions import ClientError - -import helpers.hdbg as hdbg - - -def get_secrets_client(aws_profile: str) -> BaseClient: - """ - Return client to work with AWS Secrets Manager in the specified region. - """ - import helpers.haws as haws - - session = haws.get_session(aws_profile) - client = session.client(service_name="secretsmanager") - return client - - -def _get_flag_value(flag: str) -> str: - """ - Return flag value with concatenated date string. - - E.g., for flag = 'pytest' return 'pytest_20240619'. - """ - # Import here to avoid import extra dependencies in the thin environment. - import helpers.hdatetime as hdateti - - timestamp = hdateti.get_current_date_as_string("naive_ET") - updated_flag = "_".join([flag, timestamp]) - return updated_flag - - -def update_usedby( - secret_name: str, - secret_value: Dict[str, Any], - usedBy: str, - *, - remove: bool = False, -) -> Dict[str, Any]: - """ - Update the value of `usedBy` attribute from `secret_value` in AWS secrets - manager to lock the key. Unlock the key at the end of process using default - value of `usedBy`. - - :param secret_name: SecretId of record to be updated. - :param secret_value: Current value of SecretString. - :param usedBy: value of `usedBy` to be updated. Used to remove from - list on deallocation of resource, i.e., when remove is True. - :param remove: Boolean to decide addition or removal of `usedBy` value - in the secret value list of scripts. Default is False. - :return secret_value: SecretString with updated `usedBy` script. - """ - hdbg.dassert_isinstance(secret_name, str) - aws_profile = "ck" - client = get_secrets_client(aws_profile) - # Modify value of used by in secret value. - if not remove: - try: - secret_value["usedBy"].append(usedBy) - except KeyError: - secret_value["usedBy"] = [usedBy] - else: - secret_value["usedBy"].remove(usedBy) - # Update the modified secret value in AWS secret manager. - client.update_secret( - SecretId=secret_name, SecretString=json.dumps(secret_value) - ) - return secret_value - - -def lock_secret( - secret_name: str, secret_value: Dict[str, Any] -) -> Optional[Dict[str, Any]]: - """ - Lock access to a secret to the current script. - - Lock access to secret key with trading keyword in `secret_name`, for a - runtime instance of a script, to avoid parallel run. - Add the script name to `usedBy` list in the AWS secret manager. - Raise error if the same script tries to access a locked key. - - :param secret_name: SecretId of record to be updated. - :param secret_value: Current value of SecretString. - :return secret_value: SecretString with updated `usedBy` script if not - already locked. - """ - current_script = sys.argv[0].split("/")[-1] - # Check if the current script is already using this secret. - current_usedBy = list( - filter(lambda x: current_script in x, secret_value.get("usedBy", [])) - ) - # Check current value of usedBy to determine further action. - if not current_usedBy: - # Fetch and update value of usedBy if not locked. - usedBy = _get_flag_value(current_script) - secret_value = update_usedby(secret_name, secret_value, usedBy) - # Release secret key lock on termination. - atexit.register( - update_usedby, secret_name, secret_value, usedBy, remove=True - ) - else: - # Raise warning of locked resource with current use info. - # raise RuntimeError() - warnings.warn( - f"Secret key is already in use by {current_usedBy[0]}", - RuntimeWarning, - ) - return secret_value - - -# TODO(Juraj): add support to access secrets for different profiles, not important rn -def get_secret(secret_name: str) -> Optional[Dict[str, Any]]: - """ - Fetch secret values(s) from AWS secrets manager. - - :return a dictionary of key-value pairs. E.g., `get_secret('binance')` returns - ``` - { - 'apiKey': '', - 'secret': '' - } - ``` - """ - # TODO(Juraj): This assertion can't be applied universally. - # Check if the secret name format is valid. - # dassert_valid_secret(secret_name) - hdbg.dassert_isinstance(secret_name, str) - # Create a AWS Secrets Manager client. - aws_profile = "ck" - client = get_secrets_client(aws_profile) - # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html - # for the full list of exceptions. - # Define access key to check the entity requesting for secret key. - access_key = "trading" - try: - get_secret_value_response = client.get_secret_value(SecretId=secret_name) - secret_string = get_secret_value_response["SecretString"] - hdbg.dassert_isinstance(secret_string, str) - secret_val = json.loads(secret_string) - # Check access entity value to lock secret key to avoid parallel run. - if access_key in secret_name: - # TODO(Juraj): Temporarily disabled in #Cmtask10068. - # secret_val = lock_secret(secret_name, secret_val) - pass - except ClientError as e: - if e.response["Error"]["Code"] == "ResourceNotFoundException": - # Let user know the secret does not exist. - raise ValueError(f"No such secret: {secret_name}") from e - # If not yet implemented handler then just re-raise. - raise e - return secret_val - - -# TODO(Juraj): add support to store secrets in different regions, not important rn. -def store_secret( - secret_name: str, secret_value: Dict[str, str], *, description: str = "" -) -> Optional[bool]: - """ - Store secret values(s) into AWS secrets manager, specify secret as a dict - of key-value pairs. - - :return: bool representing whether writing was successful or not - """ - hdbg.dassert_isinstance(secret_name, str) - # Create a AWS Secrets Manager client. - aws_profile = "ck" - client = get_secrets_client(aws_profile) - # See - # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_CreateSecret.html - # for the full list of exceptions. - try: - create_secret_value_response = client.create_secret( - Name=secret_name, - Description=description, - SecretString=json.dumps(secret_value), - ) - # If no exception was thrown and we get back the name we passed in the - # response then the secret was stored successfully. - return_name = create_secret_value_response["Name"] - hdbg.dassert_isinstance(return_name, str) - res: bool = create_secret_value_response["Name"] == secret_name - return res - except ClientError as e: - if e.response["Error"]["Code"] == "ResourceExistsException": - # Let user know the secret with this name already exists. - raise ValueError( - "Secret with this name already exists:", secret_name - ) from e - # If not yet implemented handler then just re-raise. - raise e - # If we did not return inside try block then something went wrong. - return False - - -# TODO(Juraj): this might be deprecated since this is only fit for exchange API keys -def dassert_valid_secret(secret_id: str) -> None: - """ - Enforce that the valid format is `exchange_id.stage.account_type.num`. - """ - values = secret_id.split(".") - hdbg.dassert_eq(len(values), 4) - hdbg.dassert_in( - values[0], - [ - "binance", - "bitfinex", - "coinbase", - "coinbaseprime", - "coinbasepro", - "ftx", - "gateio", - "huobi", - "kraken", - "kucoin", - "test", - ], - ) - hdbg.dassert_in(values[1], ["local", "preprod"]) - hdbg.dassert_in(values[2], ["trading", "sandbox"]) - hdbg.dassert( - values[3].isnumeric(), "values[3] should be numeric, got: %s", values[3] - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py deleted file mode 100644 index 8e3a0cffb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hserver.py +++ /dev/null @@ -1,1167 +0,0 @@ -""" -Identify on which server we are running. - -Import as: - -import helpers.hserver as hserver -""" - -import functools -import logging -import os -import shutil -import subprocess -from typing import Dict, List, Optional, Tuple - -# This module should depend only on: -# - Python standard modules -# See `helpers/dependencies.txt` for more details - -_LOG = logging.getLogger(__name__) - -_WARNING = "\033[33mWARNING\033[0m" - - -def _print(msg: str) -> None: - _ = msg - # _LOG.info(msg) - if False: - print(msg) - - -# Copied from hprint to avoid import cycles. -def _indent(txt: str, *, num_spaces: int = 2) -> str: - """ - Add `num_spaces` spaces before each line of the passed string. - """ - spaces = " " * num_spaces - txt_out = [] - for curr_line in txt.split("\n"): - if curr_line.lstrip().rstrip() == "": - # Do not prepend any space to a line with only white characters. - txt_out.append("") - continue - txt_out.append(spaces + curr_line) - res = "\n".join(txt_out) - return res - - -# We can't use `hsystem` to avoid import cycles. -def _system_to_string(cmd: str) -> Tuple[int, str]: - """ - Run a command and return the output and the return code. - - :param cmd: command to run - :return: tuple of (return code, output) - """ - result = subprocess.run( - cmd, - stdout=subprocess.PIPE, - # Redirect stderr to stdout. - stderr=subprocess.STDOUT, - shell=True, - text=True, - ) - rc = result.returncode - output = result.stdout - output = output.strip() - return rc, output - - -# ############################################################################# -# Host -# ############################################################################# - - -# We can't rely only on the name / version of the host to infer where we are -# running, since inside Docker the name of the host is like `01a7e34a82a5`. Of -# course, there is no way to know anything about the host for security reason, -# so we pass this value from the external environment to the container, through -# env vars (e.g., `CSFY_HOST_NAME`, `CSFY_HOST_OS_NAME`, `CSFY_HOST_OS_VERSION`). - - -# Sometimes we want to know if: -# - The processor is x86_64 or arm64 -# - The host is Mac or Linux -# - We are running on a Causify machine or on an external machine -# - We are inside CI or not -# TODO(gp): Grep all the use cases in the codebase and use the right function. - - -def get_host_user_name() -> Optional[str]: - """ - Return the name of the user running the host. - """ - return os.environ.get("CSFY_HOST_USER_NAME", None) - - -def get_dev_csfy_host_names() -> Tuple[str]: - """ - Return the names of the Causify dev servers. - """ - host_names = ("dev1", "dev2", "dev3") - return list(host_names) - - -# TODO(gp): -> is_inside_docker_container() -def is_inside_docker() -> bool: - """ - Return whether we are inside a container or not. - """ - # From https://stackoverflow.com/questions/23513045 - ret = os.path.exists("/.dockerenv") - return ret - - -def _get_host_name() -> str: - """ - Return the name of the host (not the machine) on which we are running. - - If we are inside a Docker container, we use the name of the host passed - through the `CSFY_HOST_NAME` env var. - """ - if is_inside_docker(): - host_name = os.environ["CSFY_HOST_NAME"] - else: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws' - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' - # machine='x86_64' - host_name = os.uname()[1] - _LOG.debug("host_name=%s", host_name) - return host_name - - -def _get_host_os_name() -> str: - """ - Return the name of the OS on which we are running (e.g., "Linux", - "Darwin"). - - If we are inside a Docker container, we use the name of the OS passed - through the `CSFY_HOST_OS_NAME` env var. - """ - if is_inside_docker(): - host_os_name = os.environ["CSFY_HOST_OS_NAME"] - else: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws' - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' - # machine='x86_64' - host_os_name = os.uname()[0] - _LOG.debug("host_os_name=%s", host_os_name) - return host_os_name - - -def _get_host_os_version() -> str: - """ - Return the version of the OS on which we are running. - - If we are inside a Docker container, we use the version of the OS passed - through the `CSFY_HOST_OS_VERSION` env var. - """ - if is_inside_docker(): - host_os_version = os.environ["CSFY_HOST_OS_VERSION"] - else: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws' - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' - # machine='x86_64' - host_os_version = os.uname()[2] - _LOG.debug("host_os_version=%s", host_os_version) - return host_os_version - - -def is_host_csfy_server() -> bool: - """ - Return whether we are running on a Causify dev server. - """ - host_name = _get_host_name() - ret = host_name in get_dev_csfy_host_names() - return ret - - -_MAC_OS_VERSION_MAPPING = { - "Catalina": "19.", - "Monterey": "21.", - "Ventura": "22.", - "Sequoia": "24.", - # macOS 26 Tahoe uses Darwin 25.x (see `uname -r`). - "Tahoe": "25.", -} - - -def get_host_mac_version() -> str: - """ - Get the macOS version (e.g., "Catalina", "Monterey", "Ventura", "Tahoe"). - """ - host_os_version = _get_host_os_version() - for version, tag in _MAC_OS_VERSION_MAPPING.items(): - if tag in host_os_version: - return version - raise ValueError(f"Invalid host_os_version='{host_os_version}'") - - -def is_host_mac_version(version: str) -> bool: - """ - Return whether we are running on a Mac with a specific version (e.g., - "Catalina", "Monterey", "Ventura"). - """ - assert version in _MAC_OS_VERSION_MAPPING, f"Invalid version='{version}'" - host_mac_version = get_host_mac_version() - ret = version.lower() == host_mac_version.lower() - return ret - - -def is_host_gp_mac() -> bool: - """ - Return whether we are running on a Mac owned by GP. - - This is used to check if we can use a specific feature before - releasing it to all the users. - """ - host_name = _get_host_name() - ret = host_name.startswith("gpmac.") - return ret - - -# ############################################################################# -# Detect server. -# ############################################################################# - - -def is_inside_ci() -> bool: - """ - Return whether we are running inside the Continuous Integration flow. - """ - if "CSFY_CI" not in os.environ: - ret = False - else: - ret = os.environ["CSFY_CI"] != "" - return ret - - -def is_inside_unit_test() -> bool: - """ - Return whether we are running code insider the regressions. - """ - ret = "PYTEST_CURRENT_TEST" in os.environ - return ret - - -# TODO(gp): Remove! -def is_dev_csfy() -> bool: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws', - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025', - # machine='x86_64' - host_name = os.uname()[1] - host_names = ("dev1", "dev2", "dev3") - csfy_host_name = os.environ.get("CSFY_HOST_NAME", "") - _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) - is_dev_csfy_ = host_name in host_names or csfy_host_name in host_names - return is_dev_csfy_ - - -# TODO(gp): This is obsolete and should be removed. -def is_dev4() -> bool: - """ - Return whether it's running on dev4. - """ - host_name = os.uname()[1] - csfy_host_name = os.environ.get("CSFY_HOST_NAME", None) - dev4 = "cf-spm-dev4" - _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) - is_dev4_ = dev4 in (host_name, csfy_host_name) - # - if not is_dev4_: - dev4 = "cf-spm-dev8" - _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) - is_dev4_ = dev4 in (host_name, csfy_host_name) - return is_dev4_ - - -def is_host_mac(*, version: Optional[str] = None) -> bool: - """ - Return whether we are running on macOS and, optionally, on a specific - version. - - :param version: check whether we are running on a certain macOS version (e.g., - `Catalina`, `Monterey`) - """ - _LOG.debug("version=%s", version) - host_os_name = os.uname()[0] - _LOG.debug("os.uname()=%s", str(os.uname())) - csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) - _LOG.debug( - "host_os_name=%s csfy_host_os_name=%s", host_os_name, csfy_host_os_name - ) - is_mac_ = host_os_name == "Darwin" or csfy_host_os_name == "Darwin" - if version is None: - # The user didn't request a specific version, so we return whether we - # are running on a Mac or not. - _LOG.debug("is_mac_=%s", is_mac_) - return is_mac_ - else: - # The user specified a version: if we are not running on a Mac then we - # return False, since we don't even have to check the macOS version. - if not is_mac_: - _LOG.debug("is_mac_=%s", is_mac_) - return False - # Check the macOS version we are running. - if version == "Catalina": - # Darwin gpmac.local 19.6.0 Darwin Kernel Version 19.6.0: - # root:xnu-6153.141.2~1/RELEASE_X86_64 x86_64 - macos_tag = "19.6" - elif version == "Monterey": - # Darwin alpha.local 21.5.0 Darwin Kernel Version 21.5.0: - # root:xnu-8020.121.3~4/RELEASE_ARM64_T6000 arm64 - macos_tag = "21." - elif version == "Ventura": - macos_tag = "22." - elif version == "Sequoia": - # Darwin gpmac.local 24.4.0 Darwin Kernel Version 24.4.0: - # root:xnu-11417.101.15~1/RELEASE_ARM64_T8112 arm64 - macos_tag = "24." - elif version == "Tahoe": - # Darwin … 25.1.0 Darwin Kernel Version 25.1.0: … /RELEASE_ARM64_… arm64 - macos_tag = "25." - else: - raise ValueError(f"Invalid version='{version}'") - _LOG.debug("macos_tag=%s", macos_tag) - host_os_version = os.uname()[2] - # 'Darwin Kernel Version 19.6.0: Mon Aug 31 22:12:52 PDT 2020; - # root:xnu-6153.141.2~1/RELEASE_X86_64' - csfy_host_os_version = os.environ.get("CSFY_HOST_VERSION", "") - _LOG.debug( - "host_os_version=%s csfy_host_os_version=%s", - host_os_version, - csfy_host_os_version, - ) - is_mac_ = macos_tag in host_os_version or macos_tag in csfy_host_os_version - _LOG.debug("is_mac_=%s", is_mac_) - return is_mac_ - - -def is_prod_csfy() -> bool: - """ - Detect whether we are running in a Causify production container. - - This env var is set inside `devops/docker_build/prod.Dockerfile`. - """ - # TODO(gp): CK -> CSFY - return bool(os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", False)) - - -# TODO(gp): Obsolete. -def is_ig_prod() -> bool: - """ - Detect whether we are running in an IG production container. - - This env var is set inside `//lime/devops_cf/setenv.sh` - """ - # CF sets up `DOCKER_BUILD` so we can use it to determine if we are inside - # a CF container or not. - # print("os.environ\n", str(os.environ)) - return bool(os.environ.get("DOCKER_BUILD", False)) - - -# TODO(Grisha): consider adding to `setup_to_str()`. -def is_inside_ecs_container() -> bool: - """ - Detect whether we are running in an ECS container. - """ - # When deploying jobs via ECS the container obtains credentials based - # on passed task role specified in the ECS task-definition, refer to: - # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html - ret = "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" in os.environ - return ret - - -# ############################################################################# - - -def is_external_linux() -> bool: - """ - Detect whether we are running on a non-server/non-CI Linux machine. - - This returns true when we run on the machine of an intern, or a non- - CSFY contributor. - """ - if is_host_csfy_server() or is_inside_ci(): - # Dev servers and CI are not external Linux systems. - ret = False - else: - # We need to check if the host is Linux. - host_os_name = _get_host_os_name() - ret = host_os_name == "Linux" - return ret - - -def is_external_dev() -> bool: - """ - Detect whether we are running on an system outside of Causify. - - E.g., a Linux / Mac contributor's laptop, an intern's laptop, a non- - CSFY machine. - """ - ret = is_host_mac() or is_external_linux() - return ret - - -# ############################################################################# -# Set up consistency. -# ############################################################################# - - -# TODO(gp): Update this. -def _get_setup_signature() -> str: - """ - Dump all the variables that are used to make a decision about the values of - the functions in `_get_setup_settings()`. - - This function is used to mock the state of the system for testing - purposes. - """ - cmds = [] - # is_prod_csfy() - cmds.append('os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", "*undef*")') - # is_dev4() - # is_dev_csfy() - # is_ig_prod() - cmds.append('os.environ.get("CSFY_HOST_NAME", "*undef*")') - # is_inside_ci() - cmds.append('os.environ.get("CSFY_CI", "*undef*")') - # is_mac() - cmds.append("os.uname()[0]") - cmds.append("os.uname()[2]") - # is_external_linux() - cmds.append('os.environ.get("CSFY_HOST_OS_NAME", "*undef*")') - # Build an array of strings with the results of executing the commands. - results = [] - for cmd in cmds: - result_tmp = cmd + "=" + str(eval(cmd)) - results.append(result_tmp) - # Join the results into a single string. - result = "\n".join(results) - return result - - -# The valid set ups are: -# - Running on a Causify server (e.g., `dev1`, `dev2`, `dev3`) -# - Container -# - Host -# - External Mac (GP, Paul, interns, contributors) -# - Container -# - Host -# - External Linux (interns, contributors) -# - Container -# - Host -# - Prod container on Linux -# - Container -# - CI -# - Container - - -def is_inside_docker_container_on_csfy_server() -> bool: - """ - Return whether we are running on a Docker container on a Causify server. - """ - ret = is_inside_docker() and is_host_csfy_server() - return ret - - -def is_outside_docker_container_on_csfy_server() -> bool: - """ - Return whether we are running outside a Docker container on a Causify - server. - """ - ret = not is_inside_docker() and is_host_csfy_server() - return ret - - -def is_inside_docker_container_on_host_mac() -> bool: - """ - Return whether we are running on a Docker container on a Mac host. - """ - ret = is_inside_docker() and is_host_mac() - return ret - - -def is_outside_docker_container_on_host_mac() -> bool: - """ - Return whether we are running outside of a Docker container on a Mac host. - """ - ret = not is_inside_docker() and is_host_mac() - return ret - - -def is_inside_docker_container_on_external_linux() -> bool: - """ - Return whether we are running on a Docker container on an external Linux. - """ - ret = is_inside_docker() and is_external_linux() - return ret - - -def is_outside_docker_container_on_external_linux() -> bool: - """ - Return whether we are outside of a Docker container on an external Linux. - """ - ret = not is_inside_docker() and is_external_linux() - return ret - - -def _get_setup_settings() -> List[Tuple[str, bool]]: - """ - Return a list of tuples with the name and value of the current server - setup. - - E.g., - ```bash - is_inside_docker_container_on_csfy_server=True - is_outside_docker_container_on_csfy_server=False - is_inside_docker_container_on_host_mac=False - is_outside_docker_container_on_host_mac=True - is_inside_docker_container_on_external_linux=False - is_outside_docker_container_on_external_linux=True - is_dev4=False - is_ig_prod=False - is_prod_csfy=False - is_inside_ci=False - ``` - """ - func_names = [ - "is_inside_docker_container_on_csfy_server", - "is_outside_docker_container_on_csfy_server", - # - "is_inside_docker_container_on_host_mac", - "is_outside_docker_container_on_host_mac", - # - "is_inside_docker_container_on_external_linux", - "is_outside_docker_container_on_external_linux", - # - "is_dev4", - "is_ig_prod", - "is_prod_csfy", - "is_inside_ci", - ] - # Store function name / value pairs as tuples. - setups = [] - for func_name in func_names: - val = eval(f"{func_name}()") - setups.append((func_name, val)) - return setups - - -def _setup_to_str(setups: List[Tuple[str, bool]]) -> str: - """ - Return a string representation of the current server setup configuration. - - :return: string with each setting on a new line, aligned with - padding - """ - # Find maximum length of setting names. - max_len = max(len(name) for name, _ in setups) + 1 - # Format each line with computed padding. - txt = [] - for name, value in setups: - txt.append(f"{name:<{max_len}}{value}") - return "\n".join(txt) - - -def _dassert_setup_consistency() -> None: - """ - Check that one and only one setup configuration is true. - - This is used to ensure that the setup configuration is one of the - expected ones and uniquely defined. - """ - setups = _get_setup_settings() - # One and only one set-up should be true. - sum_ = sum([value for _, value in setups]) - if sum_ != 1: - msg = "One and only one set-up config should be true:\n" - msg += _setup_to_str(setups) + "\n" - msg += "_get_setup_signature() returns:\n" - msg += _indent(_get_setup_signature()) - raise ValueError(msg) - - -# If the env var is not defined then we want to check. The only reason to skip -# it's if the env var is defined and equal to False. -check_repo = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") != "False" -_is_called = False -if check_repo: - # The repo check is executed at import time, before the logger is initialized. - # To debug the repo check, enable the following block. - if False: - import helpers.hdbg as hdbg - - hdbg.init_logger(verbosity=logging.DEBUG) - # Compute and cache the result. - if not _is_called: - _dassert_setup_consistency() - _is_called = True -else: - _LOG.warning("Skipping repo check in %s", __file__) - - -# ############################################################################# -# Detect Docker functionalities. -# ############################################################################# - - -# Each function below should run without asserting. E.g., when we check if -# docker supports privileged mode, we should check if `docker` is available, -# and then if docker supports privileged mode, instead of asserting if `docker` -# doesn't exist on the system. - - -@functools.lru_cache() -def has_docker() -> bool: - """ - Return whether we have Docker installed. - """ - return shutil.which("docker") is not None - - -@functools.lru_cache() -def docker_needs_sudo() -> bool: - """ - Return whether Docker commands need to be run with sudo. - """ - if not has_docker(): - return False - # This check is required to ensure it does not cause issues when running on ECS - # Fargate through Airflow, since ECS Fargate does not support either DinD - # or sibling containers. - # See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/fargate-security-considerations.html - # TODO(heanh): Check if we can use `is_inside_ecs_container()` to check if - # we are inside Airflow. - if not has_dind_support() and not use_docker_sibling_containers(): - return False - # Another way to check is to see if your user is in the docker group: - # > groups | grep docker - rc = os.system("docker run hello-world 2>&1 >/dev/null") - if rc == 0: - return False - # - rc = os.system("sudo docker run hello-world 2>&1 >/dev/null") - if rc == 0: - return True - assert False, "Failed to run docker" - - -def get_docker_executable() -> str: - """ - Return the docker executable, wrapper with `sudo` if needed. - """ - docker_needs_sudo_ = docker_needs_sudo() - executable = "docker" - if docker_needs_sudo_: - executable = "sudo " + executable - return executable - - -@functools.lru_cache() -def has_docker_privileged_mode() -> bool: - """ - Return whether the current container supports privileged mode. - - Docker privileged mode gives containers nearly all the same capabilities as - the host system's kernel. - - Privileged mode allows to: - - run Docker-in-Docker - - mount filesystems - """ - if not has_docker(): - return False - docker_executable = get_docker_executable() - cmd = f"{docker_executable} run --privileged hello-world 2>&1 >/dev/null" - rc = os.system(cmd) - _print(f"cmd={cmd} -> rc={rc}") - has_privileged_mode = rc == 0 - return has_privileged_mode - - -def has_docker_sibling_containers_support() -> bool: - """ - Return whether the current container supports running sibling containers. - """ - # We need to be inside a container to run sibling containers. - if not is_inside_docker(): - return False - # We assume that if the socket exists then we can run sibling containers. - if os.path.exists("/var/run/docker.sock"): - return True - return False - - -def has_docker_children_containers_support() -> bool: - """ - Return whether the current container supports Docker-in-Docker. - """ - # We need to be inside a container to run docker-in-docker. - if not is_inside_docker(): - return False - # We assume that if we have privileged mode then we can run docker-in-docker. - return has_docker_privileged_mode() - - -def is_csfy_dind_enabled() -> bool: - """ - Return whether `CSFY_ENABLE_DIND` is enabled (e.g. users opt-in to use - Docker-in-Docker). - """ - val = os.environ.get("CSFY_ENABLE_DIND", "0") - return val == "1" or val.lower() in ("true", "yes") - - -def can_run_docker_from_docker() -> bool: - """ - Return whether we can run docker from docker, either as children or sibling - container. - """ - return ( - has_docker_children_containers_support() - or has_docker_sibling_containers_support() - ) - - -def get_docker_info() -> str: - txt_tmp: List[str] = [] - # - has_docker_ = has_docker() - txt_tmp.append(f"has_docker={has_docker_}") - # - cmd = r"docker version --format '{{.Server.Version}}'" - _, docker_version = _system_to_string(cmd) - txt_tmp.append(f"docker_version='{docker_version}'") - # - docker_needs_sudo_ = docker_needs_sudo() - txt_tmp.append(f"docker_needs_sudo={docker_needs_sudo_}") - # - has_privileged_mode_ = has_docker_privileged_mode() - txt_tmp.append(f"has_privileged_mode={has_privileged_mode_}") - # - is_inside_docker_ = is_inside_docker() - txt_tmp.append(f"is_inside_docker={is_inside_docker_}") - # - if is_inside_docker_: - has_docker_sibling_containers_support_ = ( - has_docker_sibling_containers_support() - ) - has_docker_children_containers_support_ = ( - has_docker_children_containers_support() - ) - else: - has_docker_sibling_containers_support_ = "*undef*" - has_docker_children_containers_support_ = "*undef*" - txt_tmp.append( - f"has_docker_sibling_containers_support={has_docker_sibling_containers_support_}" - ) - txt_tmp.append( - f"has_docker_children_containers_support={has_docker_children_containers_support_}" - ) - # Format as title with indented items. - txt = "Docker info" + "\n" + _indent("\n".join(txt_tmp)) - return txt - - -def _is_mac_version_with_sibling_containers() -> bool: - if not is_host_mac(): - return False - mac_version = get_host_mac_version() - return mac_version in ("Monterey", "Ventura", "Sequoia", "Tahoe") - - -# ############################################################################# -# Detect Docker functionalities, based on the set-up. -# ############################################################################# - - -# TODO(gp): These approach is sub-optimal. We deduce what we can do based on the -# name of the set-up. We should base our decisions on the actual capabilities of -# the system. - - -# TODO(gp): -> has_docker_privileged_mode -@functools.lru_cache() -def has_dind_support() -> bool: - """ - Return whether the current container supports privileged mode. - - This is needed to use Docker-in-Docker. - """ - _print(f"is_inside_docker()={is_inside_docker()}") - if not is_inside_docker(): - # Outside Docker there is no privileged mode. - _print("-> ret = False") - return False - # TODO(gp): Not sure this is really needed since we do this check - # after enable_privileged_mode controls if we have dind or not. - if _is_mac_version_with_sibling_containers(): - return False - # TODO(gp): This part is not multi-process friendly. When multiple - # processes try to run this code they interfere. A solution is to run `ip - # link` in the entrypoint and create a `has_docker_privileged_mode` file - # which contains the value. - # We rely on the approach from https://stackoverflow.com/questions/32144575 - # to check if there is support for privileged mode. - # Sometimes there is some state left, so we need to clean it up. - # TODO(Juraj): this is slow and inefficient, but works for now. - cmd = "sudo docker run hello-world" - rc = os.system(cmd) - _print(f"cmd={cmd} -> rc={rc}") - has_dind = rc == 0 - # dind is supported on both Mac and GH Actions. - # TODO(Juraj): HelpersTask16. - # if check_repo: - # if hserver.is_inside_ci(): - # # Docker-in-docker is needed for GH actions. For all other builds is optional. - # assert has_dind, ( - # f"Expected privileged mode: has_dind={has_dind}\n" - # + hserver.setup_to_str() - # ) - # else: - # only_warning = True - # _raise_invalid_host(only_warning) - # return False - # else: - # csfy_repo_config = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") - # print( - # _WARNING - # + ": Skip checking since CSFY_REPO_CONFIG_CHECK=" - # + f"'{csfy_repo_config}'" - # ) - return has_dind - - -def _raise_invalid_host(only_warning: bool) -> None: - host_os_name = os.uname()[0] - am_host_os_name = os.environ.get("AM_HOST_OS_NAME", None) - msg = ( - f"Don't recognize host: host_os_name={host_os_name}, " - f"am_host_os_name={am_host_os_name}" - ) - if only_warning: - _LOG.warning(msg) - else: - raise ValueError(msg) - - -# TODO(gp): -> use_docker_in_docker_support -def enable_privileged_mode() -> bool: - """ - Return whether a host supports privileged mode for its containers. - """ - import helpers.repo_config_utils as hrecouti - - repo_name = hrecouti.get_repo_config().get_name() - # TODO(gp): Remove this dependency from a repo. - if repo_name in ("//dev_tools",): - ret = False - else: - # Keep this in alphabetical order. - if is_dev_csfy(): - ret = True - elif is_inside_ci(): - ret = True - elif is_external_linux(): - ret = True - elif is_host_mac(): - mac_version = get_host_mac_version() - if mac_version == "Catalina": - # Docker for macOS Catalina supports dind. - ret = True - elif mac_version in ("Monterey", "Ventura", "Sequoia", "Tahoe"): - # Docker doesn't seem to support dind for these versions of macOS. - ret = False - else: - raise ValueError(f"Invalid version='{mac_version}'") - # Docker doesn't seem to support dind for these versions of macOS. - ret = False - elif is_prod_csfy(): - ret = False - else: - ret = False - only_warning = True - _raise_invalid_host(only_warning) - return ret - - -# TODO(gp): -> use_docker_sudo_in_commands -def has_docker_sudo() -> bool: - """ - Return whether Docker commands should be run with `sudo` or not. - """ - # Keep this in alphabetical order. - if is_dev_csfy(): - ret = True - elif is_external_linux(): - ret = True - elif is_inside_ci(): - ret = False - elif is_host_mac(): - # macOS runs Docker with sudo by default. - # TODO(gp): This is not true. - ret = True - elif is_prod_csfy(): - ret = False - else: - ret = False - only_warning = True - _raise_invalid_host(only_warning) - return ret - - -# TODO(gp): -> use_docker_sibling_container_support -def use_docker_sibling_containers() -> bool: - """ - Return whether to use Docker sibling containers. - - Using sibling containers requires that all Docker containers are in - the same network so that they can communicate with each other. - """ - if is_dev_csfy() or _is_mac_version_with_sibling_containers(): - return True - return has_docker_sibling_containers_support() - # if is_dev_csfy(): - # val = True - # else: - # val = is_dev4() or _is_mac_version_with_sibling_containers() - # return val - - -# TODO(gp): -> use_docker_main_network -def use_main_network() -> bool: - # TODO(gp): Replace this. - return use_docker_sibling_containers() - - -# TODO(gp): -> get_docker_shared_data_dir_map -def get_shared_data_dirs() -> Optional[Dict[str, str]]: - """ - Get path of dir storing data shared between different users on the host and - Docker. - - E.g., one can mount a central dir `/data/shared`, shared by multiple - users, on a dir `/shared_data` in Docker. - """ - # TODO(gp): Keep this in alphabetical order. - if is_dev4(): - shared_data_dirs = { - "/local/home/share/cache": "/cache", - "/local/home/share/data": "/data", - } - elif is_dev_csfy(): - shared_data_dirs = { - "/data/shared": "/shared_data", - "/data/shared2": "/shared_data2", - "/data/shared_k8s": "/shared_k8s", - "/data/shared_test": "/shared_test", - } - elif is_external_dev() or is_inside_ci() or is_prod_csfy(): - shared_data_dirs = None - else: - shared_data_dirs = None - only_warning = True - _raise_invalid_host(only_warning) - return shared_data_dirs - - -def use_docker_network_mode_host() -> bool: - # TODO(gp): Not sure this is needed any more, since we typically run in - # bridge mode. - ret = is_host_mac() or is_dev_csfy() - ret = False - if ret: - assert use_docker_sibling_containers() - return ret - - -def use_docker_db_container_name_to_connect() -> bool: - """ - Connect to containers running DBs just using the container name, instead of - using port and localhost / hostname. - """ - if _is_mac_version_with_sibling_containers(): - # New Macs don't seem to see containers unless we connect with them - # directly with their name. - ret = True - else: - ret = False - if ret: - # This implies that we are using Docker sibling containers. - assert use_docker_sibling_containers() - return ret - - -# TODO(gp): This seems redundant with use_docker_sudo_in_commands -def run_docker_as_root() -> bool: - """ - Return whether Docker should be run with root user. - - I.e., adding `--user $(id -u):$(id -g)` to docker compose or not. - """ - # Keep this in alphabetical order. - if is_dev4() or is_ig_prod(): - # //lime runs on a system with Docker remap which assumes we don't - # specify user credentials. - ret = True - elif is_dev_csfy(): - # On dev1 / dev2 we run as users specifying the user / group id as - # outside. - ret = False - elif is_external_linux(): - ret = False - elif is_inside_ci(): - # When running as user in GH action we get an error: - # ``` - # /home/.config/gh/config.yml: permission denied - # ``` - # see https://github.com/alphamatic/amp/issues/1864 - # So we run as root in GH actions. - ret = True - elif is_host_mac(): - ret = False - elif is_prod_csfy(): - ret = False - else: - ret = False - only_warning = True - _raise_invalid_host(only_warning) - return ret - - -# TODO(gp): Probably obsolete -def get_docker_user() -> str: - """ - Return the user that runs Docker, if any. - """ - if is_dev4(): - val = "spm-sasm" - else: - val = "" - return val - - -# TODO(gp): Probably obsolete -def get_docker_shared_group() -> str: - """ - Return the group of the user running Docker, if any. - """ - if is_dev4(): - val = "sasm-fileshare" - else: - val = "" - return val - - -# TODO(gp): -> repo_config.yaml -def skip_submodules_test() -> bool: - """ - Return whether the tests in the submodules should be skipped. - - E.g. while running `i run_fast_tests`. - """ - import helpers.repo_config_utils as hrecouti - - repo_name = hrecouti.get_repo_config().get_name() - # TODO(gp): Why do we want to skip running tests? - # TODO(gp): Remove this dependency from a repo. - if repo_name in ("//dev_tools",): - # Skip running `amp` tests from `dev_tools`. - return True - return False - - -# ############################################################################# -# S3 buckets. -# ############################################################################# - - -def is_AM_S3_available() -> bool: - # AM bucket is always available. - val = True - _LOG.debug("val=%s", val) - return val - - -def is_CK_S3_available() -> bool: - val = True - if is_inside_ci(): - import helpers.repo_config_utils as hrecouti - - repo_name = hrecouti.get_repo_config().get_name() - # TODO(gp): Remove this dependency from a repo. - if repo_name in ("//amp", "//dev_tools"): - # No CK bucket. - val = False - # TODO(gp): We might want to enable CK tests also on lemonade. - if repo_name in ("//lemonade",): - # No CK bucket. - val = False - elif is_dev4(): - # CK bucket is not available on dev4. - val = False - _LOG.debug("val=%s", val) - return val - - -# ############################################################################# -# Functions. -# ############################################################################# - - -def config_func_to_str() -> str: - """ - Print the value of all the config functions. - """ - ret: List[str] = [] - # Get the functions with: - # grep "def " helpers/hserver.py | sort | awk '{ print $2 }' | perl -i -ne 'print "$1\n" if /^([^\(]+)/' - function_names = [ - "enable_privileged_mode", - "get_docker_shared_group", - "get_docker_user", - "get_host_user_name", - "get_shared_data_dirs", - "has_dind_support", - "has_docker_sudo", - "is_AM_S3_available", - "is_CK_S3_available", - "is_csfy_dind_enabled", - "is_dev4", - "is_dev_csfy", - "is_external_linux", - "is_host_mac", - "is_ig_prod", - "is_inside_ci", - "is_inside_docker", - "is_inside_ecs_container", - "is_inside_unit_test", - "is_prod_csfy", - "run_docker_as_root", - "skip_submodules_test", - "use_docker_db_container_name_to_connect", - "use_docker_network_mode_host", - "use_docker_sibling_containers", - "use_main_network", - ] - for func_name in sorted(function_names): - try: - _LOG.debug("func_name=%s", func_name) - func_value = eval(f"{func_name}()") - except NameError: - func_value = "*undef*" - msg = f"{func_name}='{func_value}'" - ret.append(msg) - # Package. - result = "\n".join(ret) - return result diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py deleted file mode 100644 index b960bd8bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsftp.py +++ /dev/null @@ -1,204 +0,0 @@ -""" -Import as: - -import helpers.hsftp as hsftp -""" - -import logging -import os -import subprocess -import sys -from io import BytesIO -from typing import List - -import helpers.haws as haws -import helpers.hmodule as hmodule -import helpers.hsecrets as hsecret - -hmodule.install_module_if_not_present("pysftp") - -import pysftp # noqa: E402 - -# Create a logger instance. -_LOG = logging.getLogger(__name__) - - -def install_lftp(): - """ - Install `lftp` using the system package manager. - """ - try: - subprocess.run(["sudo", "apt-get", "update"], check=True) - subprocess.run(["sudo", "apt-get", "install", "-y", "lftp"], check=True) - _LOG.info("`lftp` successfully installed using `apt`.") - except Exception as e: - _LOG.error("Failed to install `lftp`: %s", e) - sys.exit(1) - - -def check_lftp_connection(): - """ - Check if `lftp` is installed. - - If not, install it using the package manager. - """ - try: - # Check if `lftp` is available by trying to run it. - subprocess.run( - ["lftp", "--version"], - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - _LOG.info("`lftp` is already installed.") - except subprocess.CalledProcessError: - _LOG.error("Error occurred while checking `lftp` version.") - sys.exit(1) - except FileNotFoundError: - _LOG.warning("`lftp` is not installed. Attempting to install it...") - install_lftp() - - -def download_file_using_lftp( - remote_data_path: str, save_path: str, hostname: str, secret_name: str -) -> None: - """ - Download files from a remote SFTP server using `lftp` and a private SSH - key. - - :param remote_data_path: path to the remote directory on the SFTP - server from which files should be downloaded. - :param save_path: local directory where the downloaded files will be - saved. - :param hostname: hostname of the SFTP server. - :param secret_name: Name of the secret in AWS Secrets Manager that - stores the SFTP credentials, including the username and private - key. - :return: None. - """ - # Fetch the private key from AWS Secrets Manager - secret_dict = hsecret.get_secret(secret_name) - username = secret_dict["username"] - private_key = secret_dict["private_key"] - # Write the private key to a temporary file - with open("/tmp/temp_key.pem", "w") as temp_key_file: - temp_key_file.write(private_key) - # Ensure the key file has the correct permissions - os.chmod("/tmp/temp_key.pem", 0o600) - private_key_path = "/tmp/temp_key.pem" - # Construct the lftp command. - # The 'set sftp:connect-program' allows specifying custom SSH options for the SFTP connection. - # -o GSSAPIAuthentication=no: Disables GSSAPI to avoid unnecessary authentication mechanisms. - # -o StrictHostKeyChecking=no: Bypasses the host key verification prompt for new hosts. - # -a: Enables SSH agent forwarding for more seamless authentication. - # -x: Disables X11 forwarding (not needed for file transfer). - # -i {private_key_path}: Specifies the private key for SSH authentication. - # 'mirror --parallel=10': Downloads files from the remote server, with 10 parallel downloads to speed up the process. - lftp_cmd = ( - f"lftp -u {username}, -e \"set sftp:connect-program 'ssh -o GSSAPIAuthentication=no " - f"-o StrictHostKeyChecking=no -a -x -i {private_key_path}'; " - f'mirror --parallel=10 {remote_data_path} {save_path}; quit" ' - f"sftp://{hostname}" - ) - try: - _LOG.info("Executing lftp command: %s", lftp_cmd) - subprocess.run( - lftp_cmd, - shell=True, - check=True, - capture_output=True, - text=True, - ) - except subprocess.CalledProcessError as e: - _LOG.error( - "lftp command failed with error: %s", - e.stderr, - ) - - -def get_sftp_connection(hostname: str, secret_name: str) -> pysftp.Connection: - """ - Return SFTP connection object using a private key stored in AWS Secrets - Manager. - - :param hostname: hostname of the SFTP server. - :param secret_name: name of the secret in AWS Secrets Manager - containing the private key. - :return: active SFTP connection object. - """ - # Fetch the private key from AWS Secrets Manager - secret_dict = hsecret.get_secret(secret_name) - username = secret_dict["username"] - private_key = secret_dict["private_key"] - # Write the private key to a temporary file - with open("/tmp/temp_key.pem", "w") as temp_key_file: - temp_key_file.write(private_key) - # Ensure the key file has the correct permissions - os.chmod("/tmp/temp_key.pem", 0o600) - # Ensure pysftp is installed before attempting connection. - cnopts = pysftp.CnOpts() - # Disable host key checking. - cnopts.hostkeys = None - sftp = pysftp.Connection( - hostname, - username=username, - private_key="/tmp/temp_key.pem", - cnopts=cnopts, - ) - # Remove the temporary key file after establishing the connection - os.remove("/tmp/temp_key.pem") - return sftp - - -def download_file_to_s3( - sftp: pysftp.Connection, - s3_client: haws.BaseClient, - remote_dir: str, - filename: str, - s3_bucket: str, - s3_prefix: str, -) -> None: - """ - Download data from an SFTP server and upload it to an S3 bucket. - - :param sftp: An active SFTP Connection object. - :param s3_client: An AWS Base client object to interact with S3. - :param remote_dir: The directory on the SFTP server where the file - is located. - :param filename: The name of the file to download from the SFTP - server. - :param s3_bucket: The name of the S3 bucket to upload the file to. - :param s3_prefix: The prefix (path) in the S3 bucket where the file - will be stored. - :return: None. - """ - remote_path = f"{remote_dir}/{filename}" - s3_key = f"{s3_prefix}/{filename}" - with sftp.open(remote_path) as file_obj: - # Download data from sftp server. - file_data = file_obj.read() - try: - # Upload data to S3. - s3_client.upload_fileobj(BytesIO(file_data), s3_bucket, s3_key) - _LOG.info( - "Uploaded: %s to s3://%s/%s", remote_path, s3_bucket, s3_key - ) - except Exception as e: - _LOG.error("Failed to upload file to S3. Error: %s", str(e)) - raise e - - -def get_file_names(sftp: pysftp.Connection, sftp_remote_dir: str) -> List[str]: - """ - Retrieve all file names from a specified directory on a remote SFTP server. - - :param sftp: An active SFTP Connection object. - :param sftp_remote_dir: The directory on the SFTP server from which - to list file names. - :return: A list of file names present in the specified directory on - the SFTP server. - """ - file_names = [] - for item in sftp.listdir_attr(sftp_remote_dir): - file_names.append(item.filename) - return file_names diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py deleted file mode 100644 index 41c4cf571..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hslack.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -Slack notification utilities for sending messages to Slack channels. - -Import as: - -import helpers.hslack as hslack -""" - -import logging -import os -from typing import Optional - -import requests - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# SlackNotifier -# ############################################################################# - - -class SlackNotifier: - """ - Send notifications to Slack channels using bot tokens. - """ - - def __init__(self, bot_token: Optional[str] = None) -> None: - """ - Initialize Slack notifier. - - :param bot_token: Slack bot token (starts with 'xoxb-') - """ - self.bot_token = bot_token or os.environ.get("SLACK_BOT_TOKEN") - if not self.bot_token: - raise ValueError( - "No bot token provided via parameter or SLACK_BOT_TOKEN env var" - ) - - def send_message( - self, - channel: str, - message: str, - ) -> None: - """ - Send a message to a Slack channel. - - :param channel: Slack channel ID (e.g., 'C1234567890') or - channel name (e.g., '#notifications') - :param message: Message text to send - """ - URL = "https://slack.com/api/chat.postMessage" - headers = { - "Authorization": f"Bearer {self.bot_token}", - "Content-Type": "application/json", - } - payload = { - "channel": channel, - "text": message, - } - response = requests.post(URL, headers=headers, json=payload, timeout=30) - response.raise_for_status() - result = response.json() - if not result.get("ok"): - raise ValueError(f"Slack API error: {result.get('error')}") - _LOG.info("Message sent successfully to %s", channel) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py deleted file mode 100644 index 4c3f6a748..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Import as: - -import helpers.hsql as hsql -""" - -import helpers.hmodule as hmodule - -# The problem here is that part of the code base end up including `hsql` which -# requires `psycopg2` even though it's not called at run-time. -# To simplify the dependency management we include the code of `hsql` only if -# `psycopg2` is present. If not, we just create a stub for the needed type hints. -if hmodule.has_module("psycopg2"): - from helpers.hsql_implementation import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import - -else: - from typing import Any, List - - DbConnection = Any - - -def create_in_operator(values: List[str], column_name: str) -> str: - """ - Transform a list of possible values into an IN operator clause. - - :param values: a list of possible values for the given column, e.g. `["binance", "ftx"]` - :param column_name: the name of the column, e.g. 'exchange_id' - :return: IN operator clause with specified values, - e.g. `"exchange_id IN ('binance', 'ftx')"` - """ - in_operator = ( - f"{column_name} IN (" - + ",".join([f"'{value}'" for value in values]) - + ")" - ) - return in_operator diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py deleted file mode 100644 index ddd48d1e4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_implementation.py +++ /dev/null @@ -1,954 +0,0 @@ -""" -Import as: - -import helpers.hsql_implementation as hsqlimpl -""" - -import collections -import io -import logging -import os -import re -import time -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -import numpy as np -import pandas as pd -import psycopg2 as psycop -import psycopg2.extras as extras -import psycopg2.sql as psql - -import helpers.hasyncio as hasynci -import helpers.hdatetime as hdateti -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hpandas as hpandas -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hsecrets as hsecret -import helpers.htimer as htimer - -_LOG = logging.getLogger(__name__) - -# ############################################################################# -# Connection -# ############################################################################# - -DbConnection = Any - -# Invariant: keep the arguments in the interface in the same order as: -# host, dbname, port, user, password. -DbConnectionInfo = collections.namedtuple( - "DbConnectionInfo", ["host", "dbname", "port", "user", "password"] -) - - -def get_connection( - host: str, - dbname: str, - port: int, - user: str, - password: str, - autocommit: bool = True, -) -> DbConnection: - """ - Create a connection and cursor for a SQL database. - """ - _LOG.debug(hprint.to_str("host dbname port user")) - connection = psycop.connect( - host=host, dbname=dbname, port=port, user=user, password=password - ) - if autocommit: - connection.autocommit = True - return connection - - -def get_connection_from_aws_secret( - aws_region: str, - *, - stage: str = "prod", -) -> DbConnection: - """ - Create an SQL connection using credentials obtained from AWS - SecretsManager. - - The function uses `ck` AWS profile on the backend. - The intended usage is obtaining connection to a DB on RDS instances. - - :param aws_region: AWS DB region, e.g. "eu-north-1", "ap-northeast-1" - :param stage: DB stage to connect to. For "prod" stage it is only possible to obtain a read-only connection via this method. - """ - hdbg.dassert_in(stage, ["prod", "preprod", "test"]) - hdbg.dassert_in(aws_region, hs3.AWS_REGIONS) - dbname = f"{stage}.im_data_db" - if stage == "prod": - secret_name = f"{dbname}.read_only" - else: - secret_name = ( - dbname - if aws_region == hs3.AWS_EUROPE_REGION_1 - else f"{dbname}.{aws_region}" - ) - _LOG.info("Fetching secret: %s", secret_name) - db_creds = hsecret.get_secret(secret_name) - connection = get_connection( - host=db_creds["host"], - dbname=dbname, - port=db_creds["port"], - user=db_creds["username"], - password=db_creds["password"], - ) - return connection - - -def get_connection_from_env_vars() -> DbConnection: - """ - Create a SQL connection with the information from the environment - variables. - """ - # Get values from the environment variables. - host = os.environ["POSTGRES_HOST"] - dbname = os.environ["POSTGRES_DB"] - port = int(os.environ["POSTGRES_PORT"]) - user = os.environ["POSTGRES_USER"] - password = os.environ["POSTGRES_PASSWORD"] - # Build the - connection = get_connection( - host=host, - dbname=dbname, - port=port, - user=user, - password=password, - ) - return connection - - -def get_connection_from_string( - conn_as_str: str, - autocommit: bool = True, -) -> DbConnection: - """ - Create a connection from a string. - - E.g., `host=localhost dbname=im_db_local port=5432 user=... - password=...` - """ - regex = r"host=\w+ dbname=\w+ port=\d+ user=\w+ password=\w+" - m = re.match(regex, conn_as_str) - hdbg.dassert(m, "Invalid connection string: '%s'", conn_as_str) - connection = psycop.connect(conn_as_str) - if autocommit: - connection.autocommit = True - return connection - - -def get_connection_info_from_env_file(env_file_path: str) -> DbConnectionInfo: - """ - Get connection parameters from environment file. - - :param env_file_path: path to an environment file that contains db - connection parameters - """ - import dotenv - - db_config = dotenv.dotenv_values(env_file_path) - params = { - "host": db_config["POSTGRES_HOST"], - "dbname": db_config["POSTGRES_DB"], - "user": db_config["POSTGRES_USER"], - "password": db_config["POSTGRES_PASSWORD"], - } - key = "POSTGRES_PORT" - if key in db_config: - params["port"] = int(db_config[key]) - else: - params["port"] = 5432 - # The parameters' names are fixed and cannot be changed, see - # `https:://hub.docker.com/_/postgres`. - connection_parameters = DbConnectionInfo(**params) - return connection_parameters - - -def check_db_connection( - host: str, - dbname: str, - port: int, - user: str, - password: str, -) -> Tuple[bool, Optional[psycop.OperationalError]]: - """ - Check whether a connection to a DB exists, in a non-blocking way. - """ - try: - get_connection( - host=host, dbname=dbname, port=port, user=user, password=password - ) - connection_exist = True - error = None - except psycop.OperationalError as e: - connection_exist = False - error = e - return connection_exist, error - - -def wait_db_connection( - host: str, - dbname: str, - port: int, - user: str, - password: str, - *, - timeout_in_secs: int = 30, -) -> None: - """ - Wait until the database is available. - - :param timeout_in_secs: secs before timing out with `RuntimeError`. - """ - hdbg.dassert_lte(1, timeout_in_secs) - _LOG.debug("dbname=%s, port=%s, host=%s", dbname, port, host) - elapsed_secs = 0 - while True: - _LOG.info("Waiting for PostgreSQL to become available...") - conn_exists = check_db_connection(host, dbname, port, user, password) - if conn_exists[0]: - _LOG.info("PostgreSQL is available (after %s seconds)", elapsed_secs) - break - if elapsed_secs > timeout_in_secs: - raise psycop.OperationalError( - f"Cannot connect to db host={host} dbname={dbname} port={port} " - f"due to timeout={timeout_in_secs} seconds" - f"\n{conn_exists[1]}" - ) - elapsed_secs += 1 - time.sleep(1) - - -def db_connection_to_tuple(connection: DbConnection) -> DbConnectionInfo: - """ - Get database connection details using connection. Connection details - include: - - - Host - - Database name - - Port - - Username - - Password - - :param connection: a database connection - :return: database connection details - """ - info = connection.info - ret = DbConnectionInfo( - host=info.host, - dbname=info.dbname, - port=info.port, - user=info.user, - password=info.password, - ) - return ret - - -# ############################################################################# -# State of the whole DB -# ############################################################################# - - -def get_engine_version(connection: DbConnection) -> str: - """ - Report information on the SQL engine. - - E.g., ``` PostgreSQL 11.5 on x86_64-pc-linux-gnu compiled by gcc - (GCC) 4.8.3 20140911 (Red Hat 4.8.3-9), 64-bit ``` - """ - query = "SELECT version();" - df = pd.read_sql_query(query, connection) - # pylint: disable=no-member - info: str = df.iloc[0, 0] - return info - - -# ############################################################################# -# Tables -# ############################################################################# - - -def get_table_names(connection: DbConnection) -> List[str]: - """ - Report the name of the tables. - - E.g., tables=['entities', 'events', 'stories', 'taxonomy'] - """ - query = """ - SELECT table_name - FROM information_schema.tables - WHERE table_type = 'BASE TABLE' - AND table_schema = 'public' - """ - cursor = connection.cursor() - cursor.execute(query) - tables = [x[0] for x in cursor.fetchall()] - return tables - - -# TODO(gp): Test / fix this. -def get_indexes(connection: DbConnection) -> pd.DataFrame: - res = [] - tables = get_table_names(connection) - cursor = connection.cursor() - for table in tables: - query = f"""SELECT * FROM pg_indexes WHERE tablename = '{table}' """ - cursor.execute(query) - z = cursor.fetchall() - res.append(pd.DataFrame(z)) - tmp: pd.DataFrame = pd.concat(res) - tmp["index_type"] = tmp[4].apply( - lambda w: w.split("USING")[1].lstrip().split(" ")[0] - ) - tmp.columns = [ - "type: public/private", - "table_name", - "key_name", - "None", - "Statement", - "index_type", - ] - tmp["columns"] = tmp["Statement"].apply(lambda w: w.split("(")[1][:-1]) - - return tmp - - -def disconnect_all_clients(connection: DbConnection) -> None: - # From https://stackoverflow.com/questions/36502401 - # Not sure this will work in our case, since it might kill our own connection. - dbname = connection.info.host - query = f""" - SELECT pg_terminate_backend(pid) - FROM pg_stat_activity - WHERE datname = '{dbname}';""" - connection.cursor().execute(query) - - -# ############################################################################# -# Database -# ############################################################################# - - -def get_db_names(connection: DbConnection) -> List[str]: - """ - Return the names of the available DBs. - - E.g., ['postgres', 'rdsadmin', 'template0', 'template1'] - """ - query = "SELECT datname FROM pg_database;" - cursor = connection.cursor() - cursor.execute(query) - dbs = list(zip(*cursor.fetchall()))[0] - dbs = sorted(dbs) - return dbs - - -def create_database( - connection: DbConnection, - dbname: str, - *, - overwrite: Optional[bool] = None, -) -> None: - """ - Create empty database. - - :param connection: database connection - :param dbname: database to create - :param overwrite: overwrite existing database - """ - _LOG.debug("connection=%s", connection) - with connection.cursor() as cursor: - if overwrite: - cursor.execute( - psql.SQL("DROP DATABASE IF EXISTS {} WITH (FORCE);").format( - psql.Identifier(dbname) - ) - ) - else: - if dbname in get_table_names(connection): - raise ValueError(f"Database {dbname} already exists") - cursor.execute( - psql.SQL("CREATE DATABASE {};").format(psql.Identifier(dbname)) - ) - - -def remove_database(connection: DbConnection, dbname: str) -> None: - """ - Remove database in current environment. - - :param connection: a database connection - :param dbname: database name to drop, e.g. `im_db_local` - """ - # Drop database. - # From https://stackoverflow.com/questions/36502401 - connection.cursor().execute( - psql.SQL("DROP DATABASE {} WITH (FORCE);").format( - psql.Identifier(dbname) - ) - ) - - -def get_tables_size( - connection: DbConnection, - only_public: bool = True, - summary: bool = True, -) -> pd.DataFrame: - """ - Report the size of each table. - - E.g., - - ``` - table_name row_estimate total index toast table - 0 events 0.0 26 GB 0 bytes 192 bytes 26 GB - 1 stories 0.0 15 GB 43 GB 192 bytes 12 GB - 2 entities 10823400.0 76 MB 0 bytes 192 bytes 76 MB - 3 taxonomy 20691.0 690 kB 0 bytes 192 bytes 652 kB - ``` - """ - q = """SELECT *, pg_size_pretty(total_bytes) AS total - , pg_size_pretty(index_bytes) AS INDEX - , pg_size_pretty(toast_bytes) AS toast - , pg_size_pretty(table_bytes) AS TABLE - FROM ( - SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM ( - SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME - , c.reltuples AS row_estimate - , pg_total_relation_size(c.oid) AS total_bytes - , pg_indexes_size(c.oid) AS index_bytes - , pg_total_relation_size(reltoastrelid) AS toast_bytes - FROM pg_class c - LEFT JOIN pg_namespace n ON n.oid = c.relnamespace - WHERE relkind = 'r' - ) a - ) a - ORDER by total_bytes DESC""" - df = pd.read_sql_query(q, connection) - if only_public: - df = df[df["table_schema"] == "public"] - if summary: - cols = "table_name row_estimate total index toast table".split() - df = df[cols] - return df - - -# ############################################################################# -# Query -# ############################################################################# - - -# TODO(gp): -> as_df -def execute_query_to_df( - connection: DbConnection, - query: str, - limit: Optional[int] = None, - offset: Optional[int] = None, - use_timer: bool = False, - profile: bool = False, - verbose: bool = False, -) -> pd.DataFrame: - """ - Execute a query. - """ - if False: - # Ask the user before executing a query. - print(f"query=\n{query}") - import helpers.hsystem as hsystem - - hsystem.query_yes_no("Ok to execute?") - if limit is not None: - query += f" LIMIT {limit}" - if offset is not None: - query += f" OFFSET {offset}" - if profile: - query = "EXPLAIN ANALYZE " + query - if verbose: - _LOG.info("> %s", query) - # Compute. - if use_timer: - idx = htimer.dtimer_start(0, "Sql time") - cursor = connection.cursor() - try: - df = pd.read_sql_query(query, connection) - except psycop.OperationalError: - # Catch error and execute query directly to print error. - try: - cursor.execute(query) - except psycop.Error as e: - print(e.pgerror) - raise e - if use_timer: - htimer.dtimer_stop(idx) - if profile: - _LOG.info("df=%s", df) - return df - - -def head_table( - connection: DbConnection, - table: str, - limit: int = 5, -) -> str: - """ - Report the head of the table as str. - """ - txt = [] - query = f"SELECT * FROM {table} LIMIT {limit} " - df = execute_query_to_df(connection, query) - # pd.options.display.max_columns = 1000 - # pd.options.display.width = 130 - txt.append(str(df)) - txt = "\n".join(txt) - return txt - - -def head_tables( - connection: DbConnection, - tables: Optional[List[str]] = None, - limit: int = 5, -) -> str: - txt = [] - if tables is None: - tables = get_table_names(connection) - for table in tables: - txt.append("\n" + "#" * 80 + "\n" + table + "\n" + "#" * 80) - txt_tmp = head_table(connection, table, limit=limit) - txt.append(txt_tmp) - txt = "\n".join(txt) - return txt - - -def get_table_columns(connection: DbConnection, table_name: str) -> List[str]: - """ - Get column names for given table. - """ - query = f""" - SELECT column_name - FROM information_schema.columns - WHERE TABLE_NAME = '{table_name}'""" - cursor = connection.cursor() - cursor.execute(query) - columns = [x[0] for x in cursor.fetchall()] - return columns - - -def find_tables_common_columns( - connection: DbConnection, - tables: List[str], - as_df: bool = False, -) -> Optional[pd.DataFrame]: - limit = 5 - df = [] - for i, table in enumerate(tables): - table = tables[i] - query = f"SELECT * FROM {table} LIMIT {limit} " - df1 = execute_query_to_df(connection, query, verbose=False) - if df1 is None: - continue - for j in range(i + 1, len(tables)): - table = tables[j] - query = f"SELECT * FROM {table} LIMIT {limit} " - df2 = execute_query_to_df(connection, query, verbose=False) - if df2 is None: - continue - common_cols = [c for c in df1 if c in df2] - if as_df: - df.append( - ( - tables[i], - tables[j], - len(common_cols), - " ".join(common_cols), - ) - ) - else: - print(f"'{tables[i]}' vs '{tables[j]}'") - print(f" ({len(common_cols)}): {' '.join(common_cols)}") - obj = None - if as_df: - obj = pd.DataFrame( - df, columns=["table1", "table2", "num_comm_cols", "common_cols"] - ) - return obj - - -def remove_table( - connection: DbConnection, table_name: str, cascade: bool = False -) -> None: - """ - Remove a table from a database. - - :param connection: database connection - :param table_name: table name - :param cascade: whether to drop the objects dependent on the table - """ - query = f"DROP TABLE IF EXISTS {table_name}" - if cascade: - query = " ".join([query, "CASCADE"]) - connection.cursor().execute(query) - - -def remove_all_tables(connection: DbConnection, cascade: bool = False) -> None: - """ - Remove all the tables from a database. - - :param connection: database connection - :param cascade: whether to drop the objects dependent on the tables - """ - table_names = get_table_names(connection) - _LOG.warning("Deleting all the tables: %s", table_names) - for table_name in table_names: - _LOG.warning("Deleting %s ...", table_name) - remove_table(connection, table_name, cascade) - - -# ############################################################################# -# Insert -# ############################################################################# - - -def csv_to_series(csv_as_txt: str, sep: str = ",") -> pd.Series: - """ - Convert a text with (key, value) separated by `sep` into a `pd.Series`. - - :param csv_as_txt: a string containing csv data - E.g., - ``` - tradedate,2021-11-12 - targetlistid,1 - ``` - :param sep: csv separator, e.g. `,` - :return: series - """ - lines = hprint.dedent(csv_as_txt).split("\n") - tuples = [tuple(line.split(sep)) for line in lines] - # Remove empty tuples. - tuples = [t for t in tuples if t[0] != ""] - # Build series. - index, data = zip(*tuples) - # _LOG.debug("index=%s", index) - # _LOG.debug("data=%s", data) - srs = pd.Series(data, index=index) - return srs - - -def copy_rows_with_copy_from( - connection: DbConnection, df: pd.DataFrame, table_name: str -) -> None: - """ - Copy dataframe contents into DB directly from buffer. - - This function works much faster for large dataframes (>10000 rows). - - :param connection: DB connection - :param df: data to insert - :param table_name: name of the table for insertion - """ - # The target table needs to exist. - hdbg.dassert_in(table_name, get_table_names(connection)) - # Read the data. - buffer = io.StringIO() - df.to_csv(buffer, index=False, header=False) - buffer.seek(0) - # Copy the data to the DB. - cur = connection.cursor() - cur.copy_from(buffer, table_name, sep=",") - # TODO(gp): CmampTask413, is this still needed because the autocommit. - connection.commit() - - -# TODO(gp): -> table_name, df -def create_insert_query(df: pd.DataFrame, table_name: str) -> str: - """ - Create an INSERT query to insert data into a DB. - - :param df: data to insert into DB - :param table_name: name of the table for insertion - :return: sql query, e.g., - ``` - INSERT INTO ccxt_ohlcv_spot(timestamp,open,high,low,close) VALUES %s - ``` - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - columns = ",".join(list(df.columns)) - query = f"INSERT INTO {table_name}({columns}) VALUES %s" - _LOG.debug("query=%s", query) - return query - - -# TODO(gp): -> table_name, df -def create_insert_on_conflict_do_nothing_query( - df: pd.DataFrame, table_name: str, unique_columns: List[str] -) -> str: - """ - Create an INSERT query to insert data into a DB. If a unique constraint is - violated for a provided set of columns, duplicates are not inserted. - - :param df: data to insert into DB - :param table_name: name of the table for insertion - :param unique_columns: set of columns which should be unique record-wise. - :return: sql query, e.g., - ``` - INSERT INTO ccxt_bid_ask(timestamp,bid_size,bid_price,ask_size, - ask_price,exchange_id,currency_pair) VALUES %s - ON CONFLICT (timestamp, exchange_id, currency_pair) DO NOTHING; - ``` - """ - hdbg.dassert_isinstance(df, pd.DataFrame) - # Check that the constraint is actually applied to columns - # of the DataFrame. - hdbg.dassert_is_subset(unique_columns, list(df.columns)) - columns = ",".join(list(df.columns)) - unique_columns_str = ",".join(unique_columns) - query = f"INSERT INTO {table_name}({columns}) VALUES %s ON CONFLICT ({unique_columns_str}) \ - DO NOTHING" - _LOG.debug("query=%s", query) - return query - - -# TODO(gp): -> connection, table_name, obj -def execute_insert_query( - connection: DbConnection, - obj: Union[pd.DataFrame, pd.Series], - table_name: str, -) -> None: - """ - Insert a DB as multiple rows into the database. - - :param connection: connection to the DB - :param obj: data to insert - :param table_name: name of the table for insertion - """ - if isinstance(obj, pd.Series): - df = obj.to_frame().T - else: - df = obj - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_in(table_name, get_table_names(connection)) - _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) - # Ensure the DataFrame has compatible types with - # downstream consumers (e.g., database). - df = df.applymap(lambda x: float(x) if isinstance(x, np.float64) else x) - # Transform dataframe into list of tuples. - values = [tuple(v) for v in df.to_numpy()] - # Generate a query for multiple rows. - query = create_insert_query(df, table_name) - # Execute query for each provided row. - cur = connection.cursor() - extras.execute_values(cur, query, values) - connection.commit() - - -# TODO(gp): -> connection, table_name, obj -def execute_insert_on_conflict_do_nothing_query( - connection: DbConnection, - obj: Union[pd.DataFrame, pd.Series], - table_name: str, - unique_columns: List[str], -) -> None: - """ - Insert a DB as multiple rows into the database. If a a UNIQUE constraint is - violated for a provided set of columns, duplicates are not inserted. - - :param connection: connection to the DB - :param obj: data to insert - :param table_name: name of the table for insertion - :param unique_columns: set of columns which should be unique record-wise. - If unique_columns is an empty list, a regular DB insert is executed - without the UNIQUE constraint. - """ - if isinstance(obj, pd.Series): - df = obj.to_frame().T - else: - df = obj - hdbg.dassert_isinstance(df, pd.DataFrame) - hdbg.dassert_in(table_name, get_table_names(connection)) - _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) - # Transform dataframe into list of tuples. - values = [tuple(v) for v in df.to_numpy()] - # Generate a query for multiple rows. - if not unique_columns: - # If unique_columns is an empty list, currently used when saving - # bid/ask RT data, to experiment with using no uniqueness constraints. - query = create_insert_query(df, table_name) - else: - query = create_insert_on_conflict_do_nothing_query( - df, table_name, unique_columns - ) - # Execute query for each provided row. - cur = connection.cursor() - try: - extras.execute_values(cur, query, values) - connection.commit() - except Exception as e: - _LOG.error( - "Failed to insert data with the '%s'. Query %s. Values: %s", - str(e), - query, - values, - ) - raise e - - -def execute_query(connection: DbConnection, query: str) -> List[tuple]: - """ - Use for generic simple operations. - - :param connection: connection to the DB - :param query: generic query that can be: insert, update, delete, etc. - :return: list of tuples with the results of the query - """ - _LOG.debug(hprint.to_str("query")) - with connection.cursor() as cursor: - cursor.execute(query) - if not connection.autocommit: - connection.commit() - try: - result = cursor.fetchall() - except psycop.ProgrammingError: - result = [()] - return result - - -# ############################################################################# -# Build more complex SQL queries. -# ############################################################################# - - -# Invariants for functions with SQL queries -# -# - Functions creating tables -# - accept a parameter `incremental that has the same behavior as in -# `hio.create_dir(..., incremental)` -# - It controls the behavior of this function if the target table already exists. -# If `incremental` is True, then skip creating it and reuse it as it is; if -# False delete it and create it from scratch. -# -# - Function creating / execution SQL queries -# - We prefer functions that directly perform SQL queries implementing a given -# functionality (e.g., `get_num_rows()`) -# - Use `get_..._query()` returning the query text only when we want to freeze -# the query in a test, e.g., because it is complex - - -def get_remove_duplicates_query( - table_name: str, id_col_name: str, column_names: List[str] -) -> str: - """ - Get a query to remove duplicates from table, keeping last duplicated row. - - :param table_name: name of table - :param id_col_name: name of unique id column - :param column_names: names of columns to compare on - :return: query to execute duplicate removal - """ - # TODO(*): Add a "limit" parameter if possible, to check only in top N rows. - remove_statement = [] - remove_statement.append(f"DELETE FROM {table_name} a USING {table_name} b") - remove_statement.append(f"WHERE a.{id_col_name} < b.{id_col_name}") - for c in column_names: - remove_statement.append(f"AND a.{c} = b.{c}") - remove_statement = " ".join(remove_statement) - return remove_statement - - -def get_num_rows(connection: DbConnection, table_name: str) -> int: - """ - Return the number of rows in a DB table. - """ - cursor = connection.cursor() - query = f"SELECT COUNT(*) FROM {table_name}" - cursor.execute(query) - vals = cursor.fetchall() - # The return value is like: vals=[(0,)] - hdbg.dassert_eq(len(vals), 1) - return vals[0][0] # type: ignore[no-any-return] - - -# ############################################################################# -# Polling functions -# ############################################################################# - - -def is_row_with_value_present( - connection: DbConnection, - table_name: str, - field_name: str, - target_value: str, - *, - show_db_state: bool = True, -) -> hasynci.PollOutput: - """ - Check with a polling function if a row with `field_name` == `target_value` - is present in the table `table_name` of the DB. - - E.g., this can be used with polling to wait for the target value - "hello_world.txt" in the "filename" field of the table "table_name" to appear - - :return: - - success if the value is present - - result: None - """ - _LOG.debug(hprint.to_str("connection table_name field_name target_value")) - # Print the state of the DB, if needed. - if show_db_state: - query = f"SELECT * FROM {table_name} ORDER BY filename" - df = execute_query_to_df(connection, query) - _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) - # Check if the required row is available. - query = f"SELECT {field_name} FROM {table_name} WHERE {field_name}='{target_value}'" - df = execute_query_to_df(connection, query) - _LOG.debug("df=\n%s", hpandas.df_to_str(df, use_tabulate=False)) - # Package results. - success = df.shape[0] > 0 - result = None - return success, result - - -# TODO(gp): Add unit test. -async def wait_for_change_in_number_of_rows( - get_wall_clock_time: hdateti.GetWallClockTime, - db_connection: DbConnection, - table_name: str, - poll_kwargs: Dict[str, Any], - *, - tag: Optional[str] = None, -) -> int: - """ - Wait until the number of rows in a table changes. - - :param get_wall_clock_time: a function to get current time - :param db_connection: connection to the target DB - :param table_name: name of the table to poll - :param poll_kwargs: a dictionary with the kwargs for `poll()` - :param tag: name of the caller function - :return: number of new rows found - """ - num_rows = get_num_rows(db_connection, table_name) - - def _is_number_of_rows_changed() -> hasynci.PollOutput: - new_num_rows = get_num_rows(db_connection, table_name) - _LOG.debug("new_num_rows=%s num_rows=%s", new_num_rows, num_rows) - success = new_num_rows != num_rows - diff_num_rows = new_num_rows - num_rows - return success, diff_num_rows - - # Poll. - if tag is None: - # Use name of the caller function. - tag = hintros.get_function_name(count=0) - if poll_kwargs is None: - poll_kwargs = hasynci.get_poll_kwargs(get_wall_clock_time) - num_iters, diff_num_rows = await hasynci.poll( - _is_number_of_rows_changed, - tag=tag, - **poll_kwargs, - ) - _ = num_iters - diff_num_rows = cast(int, diff_num_rows) - return diff_num_rows diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py deleted file mode 100644 index 2aeff7c6c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsql_test.py +++ /dev/null @@ -1,273 +0,0 @@ -""" -Import as: - -import helpers.hsql_test as hsqltest -""" - -import abc -import logging -import os - -import pytest - -import helpers.hdocker as hdocker -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsql as hsql -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestDbHelper -# ############################################################################# - - -@pytest.mark.requires_docker_in_docker -@pytest.mark.skipif( - not hserver.can_run_docker_from_docker(), - reason="Need docker children / sibling support", -) -class TestDbHelper(hunitest.TestCase, abc.ABC): - """ - Allow testing code that interacts with a DB. - - It creates / destroys a test DB during setup / teardown of the class. This means - that the same DB is reused for multiple test methods of the same class. - - The invariant is that each test method should: - - (ideally) find a clean DB to work with - - not assume that the DB is clean. If the DB is not clean, tests should clean it - before starting, or work around it - - E.g., if a test needs to write a table, but the table is already present and - partially filled as a leftover from a previous test, the new test should - delete the table and create it again - - clean the DB after themselves, i.e., undo the work that has been done - - E.g., if a test creates a table, then the test should delete the table at - the end of the test - - - An existing DB can be reused - - A user can create a persistent local DB in the Docker container, e.g. for OMS: - ``` - docker> (cd oms; sudo docker-compose \ - --file /app/oms/devops/compose/tmp.docker-compose.yml up \ - -d \ - oms_postgres) - ``` - or - ``` - docker> invoke oms_docker_up - ``` - - Then this class skips creating / destructing the DB, making the tests faster - and allowing easier debugging. - """ - - @classmethod - def setUpClass(cls) -> None: - """ - Initialize the test database inside test container. - """ - _LOG.info("\n%s", hprint.frame("setUpClass")) - cls._create_docker_files() - # Read the connection parameters from the env file. - cls.db_env_file = cls._get_db_env_path() - connection_info = hsql.get_connection_info_from_env_file(cls.db_env_file) - _LOG.debug("connection_info=%s", connection_info) - conn_exists = hsql.check_db_connection(*connection_info)[0] - if conn_exists: - _LOG.warning("DB is already up: skipping docker compose") - # Since we have found the DB already up, we assume that we need to - # leave it running after the tests - cls.bring_down_db = False - else: - # Start the service. - cls.docker_compose_file_path = os.path.join( - hgit.get_amp_abs_path(), cls._get_compose_file() - ) - # TODO(Grisha): use invoke task CMTask #547. - cmd = ( - "sudo docker-compose " - f"--file {cls.docker_compose_file_path} " - f"--env-file {cls.db_env_file} " - f"up -d {cls._get_service_name()}" - ) - _LOG.debug("cmd=%s", cmd) - hsystem.system(cmd, suppress_output=False) - # Wait for the DB to be available. - hsql.wait_db_connection(*connection_info) - cls.bring_down_db = True - # Save connection info. - # TODO(gp): -> db_connection - cls.connection = hsql.get_connection(*connection_info, autocommit=True) - - # TODO(Grisha): difference between cmamp and kaizenflow. - @classmethod - def tearDownClass(cls) -> None: - """ - Bring down the test container. - """ - _LOG.info("\n%s", hprint.frame("tearDown")) - docker_compose_cleanup = cls.bring_down_db - if docker_compose_cleanup: - if hserver.use_main_network(): - # When using sibling containers `docker-compose down` tries to shut - # down also the `main_network`, while it is attached to the Docker - # container running the tests - # So we clean up the containers and volumes directly. - # TODO(gp): This could become an invoke target. - # Remove the container, e.g., `compose-oms_postgres7482-1`. - service_name = cls._get_service_name() - container_name = f"compose-{service_name}-1" - use_sudo = hdocker.get_use_sudo() - hdocker.container_rm(container_name, use_sudo) - # Remove the volume, e.g., `compose_oms_postgres7482_data`. - volume_name = f"compose_{service_name}_data" - hdocker.volume_rm(volume_name, use_sudo) - else: - # TODO(Grisha): use invoke task CMTask #547. - cmd = ( - "sudo docker-compose " - f"--file {cls.docker_compose_file_path} " - f"--env-file {cls.db_env_file} " - "down -v" - ) - hsystem.system(cmd, suppress_output=False) - else: - _LOG.warning("Leaving DB up") - if not hunitest.get_incremental_tests(): - os.unlink(cls._get_compose_file()) - os.unlink(cls._get_db_env_path()) - - @classmethod - @abc.abstractmethod - def get_id(cls) -> int: - """ - Return a unique ID to create an OMS instance. - - This ID is used to generate Docker compose / env files and - services, so that we can avoid collisions in case of parallel - execution. - - This function is specified by the unit test in a way that is - unique to each test. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _get_compose_file(cls) -> str: - """ - Get path to Docker compose file. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _get_service_name(cls) -> str: - """ - Get service name. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _get_db_env_path(cls) -> str: - """ - Get path to env file that contains DB connection parameters. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _create_docker_files(cls) -> str: - """ - Create the compose and env file for the DB run. - """ - raise NotImplementedError - - @classmethod - @abc.abstractmethod - def _get_postgres_db(cls) -> str: - """ - Return the name of the postgres DB to use (e.g., im_postgres_db_local). - """ - raise NotImplementedError - - -# ############################################################################# -# TestImOmsDbHelper -# ############################################################################# - - -class TestImOmsDbHelper(TestDbHelper, abc.ABC): - # TODO(gp): Rewrite building a YAML with a package. - @classmethod - def _create_docker_files(cls) -> None: - # Create compose file. - service_name = cls._get_service_name() - idx = cls.get_id() - host_port = 5432 + idx - txt = f"""version: '3.5' -services: - # Docker container running Postgres DB. - {service_name}: - image: postgres:13 - restart: "no" - environment:""" - if not hserver.use_docker_db_container_name_to_connect(): - # Use the port to connect. - txt += f""" - - POSTGRES_HOST=${{POSTGRES_HOST}} - - POSTGRES_DB=${{POSTGRES_DB}} - - POSTGRES_PORT=${{POSTGRES_PORT}} - - POSTGRES_USER=${{POSTGRES_USER}} - - POSTGRES_PASSWORD=${{POSTGRES_PASSWORD}} - volumes: - - {service_name}_data:/var/lib/postgresql/data - ports: - - {host_port}:5432""" - else: - # Do not use the port to connect. - txt += f""" - - POSTGRES_HOST=${{POSTGRES_HOST}} - - POSTGRES_DB=${{POSTGRES_DB}} - - POSTGRES_USER=${{POSTGRES_USER}} - - POSTGRES_PASSWORD=${{POSTGRES_PASSWORD}} - volumes: - - {service_name}_data:/var/lib/postgresql/data""" - # - txt += f""" -volumes: - {service_name}_data: {{}} - -networks: - default: - #name: {service_name}_network - name: main_network""" - compose_file_name = cls._get_compose_file() - hio.to_file(compose_file_name, txt) - # Create env file. - txt = [] - if not hserver.use_docker_db_container_name_to_connect(): - if hserver.is_dev4(): - host = "cf-spm-dev4" - else: - # host = os.environ["CSFY_HOST_NAME"] - host = "localhost" - else: - # Use the service name, e.g., `im_postgres...`. - host = service_name - postgres_db = cls._get_postgres_db() - txt.append(f"POSTGRES_HOST={host}") - txt.append(f"POSTGRES_DB={postgres_db}") - if not hserver.use_docker_db_container_name_to_connect(): - txt.append(f"POSTGRES_PORT={host_port}") - txt.append("POSTGRES_USER=aljsdalsd") - txt.append("POSTGRES_PASSWORD=alsdkqoen") - txt = "\n".join(txt) - env_file_name = cls._get_db_env_path() - hio.to_file(env_file_name, txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py deleted file mode 100644 index a56f9b0a1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hstring.py +++ /dev/null @@ -1,176 +0,0 @@ -""" -Import as: - -import helpers.hstring as hstring -""" - -import logging -import os -import re -import tempfile -from typing import List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def remove_prefix(string: str, prefix: str, assert_on_error: bool = True) -> str: - if string.startswith(prefix): - res = string[len(prefix) :] - else: - res = string - if assert_on_error: - raise RuntimeError( - f"string='{string}' doesn't start with prefix ='{prefix}'" - ) - return res - - -def remove_suffix(string: str, suffix: str, assert_on_error: bool = True) -> str: - if string.endswith(suffix): - res = string[: -len(suffix)] - else: - res = string - if assert_on_error: - raise RuntimeError( - f"string='{string}' doesn't end with suffix='{suffix}'" - ) - return res - - -def diff_strings( - txt1: str, - txt2: str, - txt1_descr: Optional[str] = None, - txt2_descr: Optional[str] = None, - width: int = 130, -) -> str: - # Write file. - def _to_file(txt: str, txt_descr: Optional[str]) -> str: - file_name = tempfile.NamedTemporaryFile().name - if txt_descr is not None: - txt = "# " + txt_descr + "\n" + txt - hio.to_file(file_name, txt) - return file_name - - file_name1 = _to_file(txt1, txt1_descr) - file_name2 = _to_file(txt2, txt2_descr) - # Get the difference between the files. - cmd = f"sdiff --width={width} {file_name1} {file_name2}" - _, txt = hsystem.system_to_string( - cmd, - # We don't care if they are different. - abort_on_error=False, - ) - return txt - - -# TODO(gp): GFI. Move to hpython_code.py -def get_docstring_line_indices(lines: List[str]) -> List[int]: - """ - Get indices of lines of code that are inside (doc)strings. - - :param lines: the code lines to check - :return: the indices of docstrings - """ - docstring_line_indices = [] - quotes = {'"""': False, "'''": False, "```": False} - for i, line in enumerate(lines): - # Determine if the current line is inside a (doc)string. - for quote in quotes: - quotes_matched = re.findall(quote, line) - for q in quotes_matched: - # Switch the docstring flag. - # pylint: disable=modified-iterating-dict - quotes[q] = not quotes[q] - if q in ('"""', "'''") and not quotes[q]: - # A triple-quote has just been closed. - # Reset the triple backticks flag. - quotes["```"] = False - if any(quotes.values()): - # Store the index if the quotes have been opened but not closed yet. - docstring_line_indices.append(i) - return docstring_line_indices - - -def get_docstrings(lines: List[str]) -> List[List[int]]: - """ - Get line indices grouped together by the docstring they belong to. - - :param lines: lines from the file to process - :return: grouped lines within docstrings - """ - # Get indices of lines that are within docstrings. - doc_indices = get_docstring_line_indices(lines) - # Group these indices into consecutive docstrings. - docstrings = [] - if doc_indices: - current_docstring = [doc_indices[0]] - for idx in doc_indices[1:]: - if idx == current_docstring[-1] + 1: - current_docstring.append(idx) - else: - docstrings.append(current_docstring) - current_docstring = [idx] - docstrings.append(current_docstring) - return docstrings - - -# TODO(gp): GFI. Move to hpython_code.py -def get_code_block_line_indices(lines: List[str]) -> List[int]: - """ - Get indices of lines that are inside code blocks. - - Code blocks are lines surrounded by triple backticks, e.g., - ``` - This line. - ``` - Note that the backticks need to be the leftmost element of their line. - - :param lines: the lines to check - :return: the indices of code blocks - """ - code_block_line_indices = [] - quotes = {"```": False} - for i, line in enumerate(lines): - # Determine if the current line is inside a code block. - for quote in quotes: - quotes_matched = re.findall(rf"^\s*({quote})", line) - for q in quotes_matched: - # Switch the flag. - # pylint: disable=modified-iterating-dict - quotes[q] = not quotes[q] - if any(quotes.values()): - # Store the index if the quotes have been opened but not closed yet. - code_block_line_indices.append(i) - return code_block_line_indices - - -def extract_version_from_file_name(file_name: str) -> Tuple[int, int]: - """ - Extract version number from filename_vXX.json file. - - E.g. - - 'universe_v3.1.json' -> (3, 1) - - 'universe_v1.json' -> (1, 0) - - 'dataset_schema_v3.json' -> (3, 0) - - Currently only JSON file extension is supported. - - :param file_name: file to extract version part from - :return: file version tuple in format (major, minor) - """ - basename = os.path.basename(file_name).rstrip(".json") - m = re.search(r"v(\d+(\.\d+)?)$", basename) - hdbg.dassert( - m, - "Can't parse file '%s', correct format is e.g. 'universe_v03.json'.", - basename, - ) - # Groups return tuple. - version = m.groups(1)[0].split(".") # type: ignore[arg-type, union-attr] - major, minor = int(version[0]), 0 if len(version) == 1 else int(version[1]) - return major, minor diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py deleted file mode 100644 index b63bd34f4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hsystem.py +++ /dev/null @@ -1,1097 +0,0 @@ -""" -Contain all the code needed to interact with the outside world, e.g., through -system commands, env vars, ... - -Import as: - -import helpers.hsystem as hsystem -""" - -import contextlib -import datetime -import getpass -import glob -import logging -import os -import re -import signal -import subprocess -import sys -import time -from typing import ( - Any, - Callable, - Generator, - List, - Match, - Optional, - Tuple, - Union, - cast, -) - -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hprint as hprint -import helpers.hserver as hserver - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - - -_LOG = logging.getLogger(__name__) - -# Set logging level of this file higher to avoid too much chatter. -_LOG.setLevel(logging.INFO) - -# ############################################################################# - - -# TODO(gp): Move to hdatetime.py and maybe merge with `timestamp_to_str()`. -def get_timestamp() -> str: - timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") - return timestamp - - -# TODO(gp): Maybe move to hserver.py -def is_running_in_ipynb() -> bool: - # From https://stackoverflow.com/questions/15411967 - try: - _ = get_ipython().config # type: ignore - res = True - except NameError: - res = False - return res - - -# ############################################################################# - -_USER_NAME = None - - -def set_user_name(user_name: str) -> None: - """ - To impersonate a user. - - To use only in rare cases for testing or back-door. - """ - _LOG.warning("Setting user to '%s'", user_name) - global _USER_NAME - _USER_NAME = user_name - - -def get_user_name() -> str: - if _USER_NAME is None: - res = getpass.getuser() - else: - res = _USER_NAME - hdbg.dassert_ne(res, "") - return res - - -def get_server_name() -> str: - res = os.uname() - # posix.uname_result( - # sysname='Darwin', - # nodename='gpmac.lan', - # release='18.2.0', - # version='Darwin Kernel Version 18.2.0: Mon Nov 12 20:24:46 PST 2018; - # root:xnu-4903.231.4~2/RELEASE_X86_64', - # machine='x86_64') - # This is not compatible with python2.7 - # return res.nodename - return res[1] - - -def get_os_name() -> str: - res = os.uname() - # This is not compatible with python2.7 - # return res.sysname - return res[0] - - -def get_env_var(env_var_name: str) -> str: - if env_var_name not in os.environ: - msg = f"Can't find '{env_var_name}': re-run dev_scripts/setenv.sh?" - _LOG.error(msg) - raise RuntimeError(msg) - return os.environ[env_var_name] - - -# ############################################################################# -# system(), system_to_string() -# ############################################################################# - - -# pylint: disable=too-many-branches,too-many-statements,too-many-arguments,too-many-locals -def _system( - cmd: str, - print_command: bool, - abort_on_error: bool, - suppress_error: Optional[Any], - suppress_output: Union[bool, str], - blocking: bool, - wrapper: Optional[Any], - output_file: Optional[Any], - num_error_lines: Optional[int], - tee: bool, - dry_run: bool, - log_level: Union[int, str], -) -> Tuple[int, str]: - """ - Execute a shell command. - - To print the command and see the output call this as: - ``` - _system(cmd, suppress_output=False, log_level="echo") - ``` - - See `system()` for options. - """ - _LOG.debug(hprint.func_signature_to_str()) - _LOG.debug("##> %s", cmd) - orig_cmd = cmd[:] - _LOG.debug("orig_cmd=%s", orig_cmd) - # Handle `suppress_output`. - hdbg.dassert_in(suppress_output, ("ON_DEBUG_LEVEL", True, False)) - if suppress_output == "ON_DEBUG_LEVEL": - # Show the output if we are at (or lower than) DEBUG level, since - # logging.DEBUG=10 and logging.INFO=20. - show_output = _LOG.getEffectiveLevel() <= logging.DEBUG - suppress_output = not show_output - _LOG.debug(hprint.to_str("suppress_output")) - # Prepare the command line. - cmd = f"({cmd})" - hdbg.dassert_imply(tee, output_file is not None) - if output_file is not None: - # Redirect to a file. - dir_name = os.path.dirname(output_file) - if not dir_name: - dir_name = "." - if not os.path.exists(dir_name): - _LOG.debug("Dir '%s' doesn't exist: creating", dir_name) - hdbg.dassert(bool(dir_name), "dir_name='%s'", dir_name) - os.makedirs(dir_name) - if tee: - cmd += f" 2>&1 | tee -a {output_file};" - cmd += " exit ${PIPESTATUS[0]}" - else: - cmd += f" 2>&1 >{output_file}" - else: - # Do not redirect to a file. - cmd += " 2>&1" - # Handle `wrapper`. - if wrapper: - cmd = wrapper + " && " + cmd - # Handle `log_level`. - # TODO(gp): Make it "ECHO" or "PRINT". - if isinstance(log_level, str): - hdbg.dassert_in(log_level, ("echo", "echo_frame")) - if log_level == "echo_frame": - print(hprint.frame(f"> {cmd}")) - elif log_level == "echo": - print(f"> {cmd}") - else: - raise ValueError(f"Invalid log_level='{log_level}'") - _LOG.debug("> %s", cmd) - else: - _LOG.log(log_level, "> %s", cmd) - output = "" - # Handle `dry_run`. - if dry_run: - _LOG.warning("As per user request, not executing command:\n%s", cmd) - rc = 0 - return rc, output - # Execute the command. - try: - stdout = subprocess.PIPE - stderr = subprocess.STDOUT - if print_command: - _LOG.info("> %s", cmd) - with subprocess.Popen( - cmd, - shell=True, - executable="/bin/bash", - stdout=stdout, - stderr=stderr, - ) as p: - output = "" - if blocking: - # Blocking call: get the output. - while True: - line = p.stdout.readline().decode("utf-8", errors="replace") # type: ignore - if not line: - break - if not suppress_output: - # print(" ==> " + line.rstrip("\n")) - print(" ... " + line.rstrip("\n")) - output += line - p.stdout.close() # type: ignore - rc = p.wait() - else: - # Not blocking. - # Wait until process terminates (without using p.wait()). - max_cnt = 20 - cnt = 0 - while p.poll() is None: - # Process hasn't exited yet, let's wait some time. - time.sleep(0.1) - cnt += 1 - _LOG.debug("cnt=%s, rc=%s", cnt, p.returncode) - if cnt > max_cnt: - break - if cnt > max_cnt: - # Timeout: we assume it worked. - rc = 0 - else: - rc = p.returncode - if suppress_error is not None: - hdbg.dassert_isinstance(suppress_error, set) - if rc in suppress_error: - rc = 0 - except OSError as e: - rc = -1 - _LOG.error("error=%s", str(e)) - _LOG.debug(" ==> rc=%s", rc) - if abort_on_error and rc != 0: - # Report the last `num_error_lines` of the output. - num_error_lines = num_error_lines or 30 - output_error = "\n".join(output.split("\n")[-num_error_lines:]) - msg = [] - msg.append("\n" + hprint.frame("_system() failed", thickness=2)) - msg.append(hprint.func_signature_to_str()) - msg.append(hprint.frame(f"cmd='{cmd}'", char1="%", thickness=1)) - msg.append(f"- rc='{rc}'") - msg.append(f"- output='\n{output_error}'") - # Save the output in a file. - file_name = "tmp.system_output.txt" - with open(file_name, "w") as f: - f.write(output) - msg.append(f"- Output saved in '{file_name}'") - # Save the command in an executable file. - file_name = "tmp.system_cmd.sh" - msg.append(f"- Command saved in '{file_name}'") - with open(file_name, "w") as f: - f.write(cmd) - os.chmod(file_name, 0o755) - # - msg = "\n".join(msg) - raise RuntimeError(msg) - # hdbg.dassert_type_in(output, (str, )) - return rc, output - - -# pylint: disable=too-many-arguments -def system( - cmd: str, - *, - print_command: bool = False, - abort_on_error: bool = True, - suppress_error: Optional[Any] = None, - suppress_output: Union[str, bool] = "ON_DEBUG_LEVEL", - blocking: bool = True, - wrapper: Optional[Any] = None, - output_file: Optional[Any] = None, - num_error_lines: Optional[int] = None, - tee: bool = False, - dry_run: bool = False, - log_level: Union[int, str] = logging.DEBUG, -) -> int: - """ - Execute a shell command, without capturing its output. - - :param cmd: string with command to execute - :param print_command: whether to print the command using `_LOG.info()` - :param abort_on_error: whether we should assert in case of error or not - :param suppress_error: set of error codes to suppress - :param suppress_output: whether to print the output or not - - If "ON_DEBUG_LEVEL" then print the output if the log level is DEBUG - :param blocking: blocking system call or not - :param wrapper: another command to prepend the execution of cmd - :param output_file: redirect stdout and stderr to this file - :param num_error_lines: number of lines of the output to display when - raising `RuntimeError` - :param tee: if True, tee append (i.e., `tee -a`) stdout and stderr to - `output_file` - :param dry_run: print the final command but not execute it - :param log_level: print the command to execute at level "log_level". - - If `echo` then print the command line to screen as `print()` and not - logging - :return: - - return code as int - - output of the command as str - """ - # print("cmd=", cmd) - # print("suppress_output=", suppress_output) - cmd = hprint.dedent(cmd) - rc, _ = _system( - cmd, - print_command=print_command, - abort_on_error=abort_on_error, - suppress_error=suppress_error, - suppress_output=suppress_output, - blocking=blocking, - wrapper=wrapper, - output_file=output_file, - num_error_lines=num_error_lines, - tee=tee, - dry_run=dry_run, - log_level=log_level, - ) - return rc - - -# def _system_to_string(cmd): -# py_ver = sys.version_info[0] -# if py_ver == 2: -# txt = subprocess.check_output(cmd) -# elif py_ver == 3: -# txt = subprocess.getoutput(cmd) -# else: -# raise RuntimeError("Invalid py_ver=" + py_ver) -# txt = [f for f in txt.split("\n") if f] -# hdbg.dassert_eq(len(txt), 1) -# return txt[0] - - -def system_to_string( - cmd: str, - *, - print_command: bool = False, - abort_on_error: bool = True, - suppress_output: Union[bool, str] = "ON_DEBUG_LEVEL", - wrapper: Optional[Any] = None, - dry_run: bool = False, - log_level: Union[int, str] = logging.DEBUG, -) -> Tuple[int, str]: - """ - Execute a shell command and capture its output. - - See _system() for options. - """ - rc, output = _system( - cmd, - print_command=print_command, - abort_on_error=abort_on_error, - suppress_error=None, - suppress_output=suppress_output, - # If we want to see the output the system call must be blocking. - blocking=True, - wrapper=wrapper, - output_file=None, - num_error_lines=None, - tee=False, - dry_run=dry_run, - log_level=log_level, - ) - output = output.rstrip("\n") - return rc, output - - -# ############################################################################# -# system_to_one_line() -# ############################################################################# - - -def get_first_line(output: str) -> str: - """ - Return the first (and only) line from a string. - - This is used when calling system_to_string() and expecting a single - line output. - """ - output = hprint.remove_empty_lines(output) - output_as_arr: List[str] = output.split("\n") - # Remove the annoying spurious matches under `tmp.base`. - output_as_arr = [line for line in output_as_arr if "/tmp.base/" not in line] - hdbg.dassert_eq(len(output_as_arr), 1, "output='%s'", output) - output = output_as_arr[0] - output = output.rstrip().lstrip() - return output - - -# TODO(gp): Move it to a more general file, e.g., `helpers/printing.py`? -def text_to_list(txt: str) -> List[str]: - """ - Convert a string (e.g., from system_to_string) into a list of lines. - """ - res = [line.rstrip().lstrip() for line in txt.split("\n")] - res = [line for line in res if line != ""] - return res - - -def system_to_one_line(cmd: str, *args: Any, **kwargs: Any) -> Tuple[int, str]: - """ - Execute a shell command, capturing its output (expected to be a single - line). - - This is a thin wrapper around system_to_string(). - """ - rc, output = system_to_string(cmd, *args, **kwargs) - output = get_first_line(output) - return rc, output - - -# ############################################################################# -# system_to_files() -# ############################################################################# - - -def to_normal_paths(files: List[str]) -> List[str]: - files = list(map(os.path.normpath, files)) - return files - - -def to_absolute_paths(files: List[str]) -> List[str]: - files = list(map(os.path.abspath, files)) - return files - - -def _remove_files_non_present(files: List[str]) -> List[str]: - """ - Return list of files from `files` excluding the files that don't exist. - """ - files_tmp = [] - for f in files: - if os.path.exists(f): - files_tmp.append(f) - else: - _LOG.warning("File '%s' doesn't exist: skipping", f) - return files_tmp - - -def remove_dirs(files: List[str]) -> List[str]: - """ - Return list of files from `files` excluding the files that are directories. - """ - files_tmp: List[str] = [] - dirs_tmp: List[str] = [] - for file in files: - if os.path.isdir(file): - _LOG.debug("file='%s' is a dir: skipping", file) - dirs_tmp.append(file) - else: - files_tmp.append(file) - if dirs_tmp: - _LOG.warning("Removed dirs: %s", ", ".join(dirs_tmp)) - return files_tmp - - -def select_result_file_from_list( - files: List[str], mode: str, file_name: str -) -> List[str]: - """ - Select a file from a list according to various approaches encoded in - `mode`. - - :param files: list of files to select from - :param file_name: name of the file we are looking for - :param mode: - - "return_all_results": return the list of files, whatever it is - - "assert_unless_one_result": assert unless there is a single file and return - the only file. Note that we still return a list to keep the interface - simple. - """ - res: List[str] = [] - if mode == "assert_unless_one_result": - # Expect to have a single result and return that. - if len(files) == 0: - hdbg.dfatal(f"mode={mode}: didn't find file {file_name}") - elif len(files) > 1: - hdbg.dfatal( - f"mode={mode}: found multiple files:\n" + "\n".join(files) - ) - res = [files[0]] - elif mode == "return_all_results": - # Return all files. - res = files - else: - hdbg.dfatal(f"Invalid mode='{mode}'") - return res - - -def system_to_files( - cmd: str, - dir_name: Optional[str] = None, - remove_files_non_present: bool = False, - mode: str = "return_all_results", -) -> List[str]: - """ - Execute command `cmd` in `dir_name` and return the output as a list of - strings. - - :param remove_files_non_present: remove files that don't exist on - the filesystem - :param mode: like in `select_result_file_from_list()` - """ - if dir_name is None: - dir_name = "." - hdbg.dassert_dir_exists(dir_name) - cmd = f"cd {dir_name} && {cmd}" - _, output = system_to_string(cmd) - # Remove empty lines. - _LOG.debug("output=\n%s", output) - files = output.split("\n") - files = [line.rstrip().rstrip() for line in files] - files = [line for line in files if line != ""] - _LOG.debug("files=%s", " ".join(files)) - # Convert to normalized paths. - files = [os.path.join(dir_name, f) for f in files] - files: List[str] = list(map(os.path.normpath, files)) # type: ignore - _LOG.debug(hprint.to_str("files")) - # Remove non-existent files, if needed. - if remove_files_non_present: - files = _remove_files_non_present(files) - # Process output. - files = select_result_file_from_list(files, mode, cmd) - return files - - -# ############################################################################# -# Functions handling processes -# ############################################################################# - - -def get_process_pids( - keep_line: Callable[[str], bool], -) -> Tuple[List[int], List[str]]: - """ - Find all the processes corresponding to `ps ax` filtered line by line with - `keep_line()`. - - :return: list of pids and filtered output of `ps ax` - """ - cmd = "ps ax" - rc, txt = system_to_string(cmd, abort_on_error=False) - _LOG.debug("txt=\n%s", txt) - pids: List[int] = [] - txt_out: List[str] = [] - if rc == 0: - for line in txt.split("\n"): - _LOG.debug("line=%s", line) - # PID TT STAT TIME COMMAND - if "PID" in line and "TT" in line and "STAT" in line: - txt_out.append(line) - continue - keep = keep_line(line) - _LOG.debug(" keep=%s", keep) - if not keep: - continue - # > ps ax | grep 'ssh -i' | grep localhost - # 19417 ?? Ss 0:00.39 ssh -i /Users/gp/.ssh/id_rsa -f -nNT \ - # -L 19999:localhost:19999 gp@54.172.40.4 - fields = line.split() - try: - pid = int(fields[0]) - except ValueError as e: - _LOG.error( - "Can't parse fields '%s' from line '%s'", fields, line - ) - raise e - _LOG.debug("pid=%s", pid) - pids.append(pid) - txt_out.append(line) - return pids, txt_out - - -def kill_process( - get_pids: Callable[[], Tuple[List[int], str]], - timeout_in_secs: int = 5, - polltime_in_secs: float = 0.1, -) -> None: - """ - Kill all the processes returned by the function `get_pids()`. - - :param timeout_in_secs: how many seconds to wait at most before - giving up - :param polltime_in_secs: how often to check for dead processes - """ - import tqdm - - pids, txt = get_pids() - _LOG.info("Killing %d pids (%s)\n%s", len(pids), pids, "\n".join(txt)) - if not pids: - return - for pid in pids: - try: - os.kill(pid, signal.SIGKILL) - except ProcessLookupError as e: - _LOG.warning(str(e)) - # - _LOG.info("Waiting %d processes (%s) to die", len(pids), pids) - for _ in tqdm.tqdm( - range(int(timeout_in_secs / polltime_in_secs)), desc="Polling process" - ): - time.sleep(polltime_in_secs) - pids, _ = get_pids() - if not pids: - break - pids, txt = get_pids() - hdbg.dassert_eq(len(pids), 0, "Processes are still alive:%s", "\n".join(txt)) - _LOG.info("Processes dead") - - -# ############################################################################# -# User interaction -# ############################################################################# - - -def query_yes_no(question: str, *, abort_on_no: bool = True) -> bool: - """ - Ask a yes/no question via `input()` and return their answer. - - :param question: string with the question presented to the user - :param abort_on_no: exit if the user answers "no" - :return: True for "yes" or False for "no" - """ - hdbg.dassert_isinstance(question, str) - hdbg.dassert_isinstance(abort_on_no, bool) - valid = { - "yes": True, - "y": True, - # - "no": False, - "n": False, - } - prompt = " [y/n] " - while True: - sys.stdout.write(question + prompt) - choice = input().lower() - if choice in valid: - ret = valid[choice] - break - _LOG.debug("ret=%s", ret) - if abort_on_no: - if not ret: - print("You answer no: exiting") - sys.exit(-1) - return ret - - -def press_enter_to_continue(prompt: str = "") -> None: - hdbg.dassert_isinstance(prompt, str) - if not prompt: - prompt = "Press Enter to continue..." - sys.stdout.write(prompt) - _ = input() - - -# ############################################################################# -# Functions similar to Linux commands. -# ############################################################################# - - -def check_exec(tool: str) -> bool: - """ - Check if an executable can be executed. - - :return: True if the executables "tool" can be executed. - """ - suppress_output = _LOG.getEffectiveLevel() > logging.DEBUG - cmd = f"which {tool}" - abort_on_error = False - rc = system( - cmd, - abort_on_error=abort_on_error, - suppress_output=suppress_output, - log_level=logging.DEBUG, - ) - return rc == 0 - - -def to_pbcopy(txt: str, pbcopy: bool) -> None: - """ - Save the content of txt in the system clipboard. - """ - txt = txt.rstrip("\n") - if not pbcopy: - print(txt) - return - if not txt: - print("Nothing to copy") - return - if hserver.is_host_mac(): - # -n = no new line - cmd = f"echo -n '{txt}' | pbcopy" - system(cmd) - _LOG.warning("\n# Copied to system clipboard:\n%s", txt) - else: - _LOG.warning("pbcopy works only on macOS") - print(txt) - - -# ############################################################################# - -# Copied from hgit to avoid import cycles. - - -def _find_git_root(path: str = ".") -> str: - """ - Find recursively the dir of the outermost super module. - - This function traverses the directory hierarchy upward from a specified - starting path to find the root directory of a Git repository. - It supports: - - standard git repository: where a `.git` directory exists at the root - - submodule: where repository is nested inside another, and the `.git` file contains - a `gitdir:` reference to the submodule's actual Git directory - - linked repositories: where the `.git` file points to a custom Git directory - location, such as in Git worktrees or relocated `.git` directories - - :param path: starting file system path. Defaults to the current directory (".") - :return: absolute path to the top-level Git repository directory - """ - path = os.path.abspath(path) - git_root_dir = None - while True: - git_dir = os.path.join(path, ".git") - _LOG.debug("git_dir=%s", git_dir) - # Check if `.git` is a directory which indicates a standard Git repository. - if os.path.isdir(git_dir): - # Found the Git root directory. - git_root_dir = path - break - # Check if `.git` is a file which indicates submodules or linked setups. - if os.path.isfile(git_dir): - # Using the `open()` to avoid import cycles with the `hio` module. - with open(git_dir, "r") as f: - txt = f.read() - lines = txt.split("\n") - for line in lines: - # Look for a `gitdir:` line that specifies the linked directory. - # Example: `gitdir: ../.git/modules/helpers_root`. - if line.startswith("gitdir:"): - git_dir_path = line.split(":", 1)[1].strip() - _LOG.debug("git_dir_path=%s", git_dir_path) - # Resolve the relative path to the absolute path of the Git directory. - abs_git_dir = os.path.abspath( - os.path.join(path, git_dir_path) - ) - # Traverse up to find the top-level `.git` directory. - while True: - # Check if the current directory is a `.git` directory. - if os.path.basename(abs_git_dir) == ".git": - git_root_dir = os.path.dirname(abs_git_dir) - # Found the root. - break - # Move one level up in the directory structure. - parent = os.path.dirname(abs_git_dir) - # Reached the filesystem root without finding the `.git` directory. - hdbg.dassert_ne( - parent, - abs_git_dir, - "Top-level .git directory not found.", - ) - # Continue traversing up. - abs_git_dir = parent - break - # Exit the loop if the Git root directory is found. - if git_root_dir is not None: - break - # Move up one level in the directory hierarchy. - parent = os.path.dirname(path) - # Reached the filesystem root without finding `.git`. - hdbg.dassert_ne( - parent, - path, - "No .git directory or file found in any parent directory.", - ) - # Update the path to the parent directory for the next iteration. - path = parent - return git_root_dir - - -# End copy. - - -def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: - """ - Find file in the repo. - """ - if root_dir is None: - root_dir = _find_git_root() - _, file_name_out = system_to_one_line( - rf"find {root_dir} -name {file_name} -not -path '*/\.git/*'" - ) - hdbg.dassert_ne(file_name_out, "", "File not found in repo: '%s'", file_name) - return file_name_out - - -# TODO(gp): Use find_file -def _find_file(filename: str, *, search_path: str = ".") -> Optional[str]: - """ - Find a file in a directory and report its absolute path. - - :param filename: the name of the file to find (e.g., "helpers_root") - :param search_path: the directory to search in (e.g., "/Users/saggese/src/helpers1") - :return: the absolute path of the file - """ - # Recursive glob. - search_path = os.path.join(search_path, "**", filename) - files = glob.glob(search_path, recursive=True) - if len(files) == 1: - return files[0] - elif len(files) > 1: - msg = f"Found multiple files with basename '{filename}' in directory '{search_path}':\n" - msg += "\n".join(files) - raise RuntimeError(msg) - else: - return None - - -# TODO(gp): -> find_path_greedily -def find_path( - path: str, *, dir_name: str = ".", abort_on_error: bool = False -) -> str: - """ - Find a path in a directory and report its absolute path. - - :param path: the path to find (e.g., "system_tools/path.py") - :param dir_name: the directory to search in (e.g., "/Users/saggese/src/helpers1") - :param abort_on_error: if True, raise an error if the path doesn't exist - :return: the absolute path of the path - """ - # Make the path absolute. - path_out = os.path.abspath(path) - # If the path exists, return it. - if os.path.exists(path_out): - return path_out - # If the path doesn't exist, abort. - if abort_on_error: - msg = f"path '{path}' doesn't exist in '{dir_name}'" - raise RuntimeError(msg) - # Look for a file with the same basename in ``dir_name``. - dir_name = os.path.abspath(dir_name) - basename = os.path.basename(path) - path_out = _find_file(basename, search_path=dir_name) - # If the file doesn't exist, abort. - if path_out is None: - msg = f"path '{path}' doesn't exist in '{dir_name}'" - raise RuntimeError(msg) - return path_out - - -# TODO(Nikola): Use filesystem's `du` and move to `hio` instead? -def du(path: str, human_format: bool = False) -> Union[int, str]: - """ - Return the size in bytes of a file or a directory (recursively). - - :param human_format: represent the size in KB, MB, ... instead of bytes - using `hintrospection.format_size()` - """ - hdbg.dassert_path_exists(path) - cmd = f"du -d 0 {path}" + " | awk '{print $1}'" - # > du -d 0 core - # 20 core - _, txt = system_to_one_line(cmd) - _LOG.debug("txt=%s", txt) - # `du` returns size in KB. - size_in_bytes = int(txt) * 1024 - size: Union[int, str] - if human_format: - size = hintros.format_size(size_in_bytes) - else: - size = size_in_bytes - return size - - -def _compute_file_signature(file_name: str, dir_depth: int) -> Optional[List]: - """ - Compute a signature for files using basename and `dir_depth` enclosing - dirs. - - :return: tuple of extracted enclosing dirs - - E.g., `("core", "dataflow_model", "utils.py")` - """ - # Split a file like: - # /app/amp/core/test/TestCheckSameConfigs.test_check_same_configs_error/output/test.txt - # into - # ['', 'app', 'amp', 'core', 'test', - # 'TestCheckSameConfigs.test_check_same_configs_error', 'output', 'test.txt'] - path = os.path.normpath(file_name) - paths = path.split(os.sep) - hdbg.dassert_lte(1, dir_depth) - if dir_depth > len(paths): - _LOG.warning( - "Can't compute signature of file_name='%s' with" - " dir_depth=%s, len(paths)=%s", - file_name, - dir_depth, - len(paths), - ) - signature = None - else: - signature = paths[-(dir_depth + 1) :] - return signature - - -# TODO(gp): -> hio.py -def find_file_with_dir( - file_name: str, - *, - root_dir: str = ".", - dir_depth: int = -1, - mode: str = "return_all_results", - candidate_files: Optional[List[str]] = None, -) -> List[str]: - """ - Find a file matching basename and several enclosing dir name starting from - `root_dir`. - - E.g., find a file matching `amp/core/dataflow_model/utils.py` with `dir_depth=1` - means looking for a file with basename 'utils.py' under a dir 'dataflow_model'. - - :param dir_depth: how many enclosing dirs in order to declare a match. - - `-1` to use as many enclosing dirs as possible. E.g., - `/app/amp/core/dataflow/utils.py` will use 3 levels, since `/app` is - removed - :param mode: control the returned list of files, like in - `select_result_file_from_list()` - :param candidate_files: list of results from the `find` command for unit test - mocking - :return: list of files found - """ - _LOG.debug(hprint.func_signature_to_str()) - # Find all the files in the dir with the same basename. - if candidate_files is None: - base_name = os.path.basename(file_name) - cmd = rf"find . -name '{base_name}' -not -path '*/\.git/*'" - # > find . -name "utils.py" - # ./amp/core/dataflow/utils.py - # ./amp/core/dataflow_model/utils.py - # ./amp/im/common/test/utils.py - mode_ = "return_all_results" - candidate_files = system_to_files(cmd, dir_name=root_dir, mode=mode_) - _LOG.debug("candidate files=\n%s", "\n".join(candidate_files)) - # - if dir_depth == -1: - # Remove "/app" if present. - prefix = "/app/" - if file_name.startswith(prefix): - file_name = file_name[len(prefix) :] - # Remove "amp" if present. - prefix = "amp/" - if file_name.startswith(prefix): - file_name = file_name[len(prefix) :] - # Count how many dirs levels there are. - dir_depth = len(os.path.normpath(file_name).split("/")) - 1 - _LOG.debug( - "inferred dir_depth=%s for file_name=%s", dir_depth, file_name - ) - # Check the matching files. - matching_files = [] - for candidate_file_name in sorted(candidate_files): - signature1 = _compute_file_signature(candidate_file_name, dir_depth) - signature2 = _compute_file_signature(file_name, dir_depth) - is_equal = signature1 == signature2 - _LOG.debug("found_file=%s -> is_equal=%s", candidate_file_name, is_equal) - if is_equal: - matching_files.append(candidate_file_name) - _LOG.debug( - "Found %d files:\n%s", len(matching_files), "\n".join(matching_files) - ) - # Select the result based on mode. - res = select_result_file_from_list(matching_files, mode, file_name) - _LOG.debug("-> res=%s", str(res)) - return res - - -# https://stackoverflow.com/questions/169070 -@contextlib.contextmanager -def cd(dir_name: str) -> Generator[None, None, None]: - """ - Context manager managing changing directory. - """ - hdbg.dassert_dir_exists(dir_name) - current_dir = os.getcwd() - _LOG.debug("Entering ctx manager: " + hprint.to_str("current_dir")) - try: - os.chdir(dir_name) - _LOG.debug("Switched to dir '%s'", os.getcwd()) - yield - finally: - _LOG.debug("Switching back to dir '%s'", current_dir) - os.chdir(current_dir) - _LOG.debug("Exiting ctx manager") - - -# ############################################################################# -# File timestamping. -# ############################################################################# - - -def has_timestamp(file_name: str) -> bool: - """ - Check whether `file_name` contains a timestamp. - - The timestamp is in the format `%Y%m%d-%H_%M_%S` (e.g., - 20210724-12_45_51). E.g., this function for - `experiment.RH1E.5T.20210724-12_45_51` returns True. - """ - file_name = os.path.basename(file_name) - # E.g., %Y%m%d-%H_%M_%S - # The separator is _, -, or nothing. - sep = "[-_]?" - regex = sep.join( - [r"\d{4}", r"\d{2}", r"\d{2}", r"\d{2}", r"\d{2}", r"\d{2}"] - ) - _LOG.debug("regex=%s", regex) - occurrences = re.findall(regex, file_name) - hdbg.dassert_lte( - len(occurrences), 1, "Found more than one timestamp", str(occurrences) - ) - m = re.search("(" + regex + ")", file_name) - has_timestamp_ = m is not None - if has_timestamp_: - m = cast(Match[str], m) - _LOG.debug("Found a timestamp '%s' in '%s'", m.group(1), file_name) - return has_timestamp_ - - -def append_timestamp_tag(file_name: str, tag: str) -> str: - """ - Add a tag and the current timestamp to a filename, before the extension. - - :return: new filename - """ - dir_name = os.path.dirname(file_name) - base_name = os.path.basename(file_name) - name, extension = os.path.splitext(base_name) - tag_ = "" - # E.g., 20210723-20_52_00 - if not has_timestamp(file_name): - import helpers.hdatetime as hdateti - - tag_ += "." + hdateti.get_current_timestamp_as_string(tz="ET") - # Add tag, if specified. - if tag: - # If the tag is specified prepend a `.` in the filename. - tag_ += "." + tag - new_file_name = os.path.join(dir_name, "".join([name, tag_, extension])) - _LOG.debug(hprint.to_str("file_name new_file_name")) - return new_file_name - - -def tee( - cmd: str, executable: str, abort_on_error: bool -) -> Tuple[int, List[str]]: - """ - Execute command and return its exit code and output lines. - - Captures output, removes empty lines, and optionally aborts on error. - - :param cmd: Command string to execute - :param executable: Executable to use for running the command - :param abort_on_error: Whether to abort execution if command fails - :return: Tuple of (exit code, list of non-empty output lines) - """ - _LOG.debug("cmd=%s executable=%s", cmd, executable) - rc, output = system_to_string(cmd, abort_on_error=abort_on_error) - hdbg.dassert_isinstance(output, str) - output1 = output.split("\n") - _LOG.debug("output1= (%d)\n'%s'", len(output1), "\n".join(output1)) - output2 = hprint.remove_empty_lines(output1) - _LOG.debug("output2= (%d)\n'%s'", len(output2), "\n".join(output2)) - hdbg.dassert_list_of_strings(output2) - return rc, output2 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py deleted file mode 100644 index 5278e3984..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htable.py +++ /dev/null @@ -1,180 +0,0 @@ -""" -Import as: - -import helpers.htable as htable -""" - -import copy -import csv -import logging -from typing import Any, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hprint as hprint - -_LOG = logging.getLogger(__name__) - - -TableType = List[List[str]] - - -# ############################################################################# -# Table -# ############################################################################# - - -class Table: - """ - A simple (rectangular) table without introducing a dependency from Pandas. - - The element in the table can be anything. - """ - - @staticmethod - def _check_table(table: TableType, column_names: List[str]) -> None: - """ - Check that the table is well-formed (e.g., the list of lists is - rectangular). - """ - hdbg.dassert_isinstance(table, list) - hdbg.dassert_isinstance(column_names, list) - hdbg.dassert_no_duplicates(column_names) - # Columns have no leading or trailing spaces. - for column_name in column_names: - hdbg.dassert_eq(column_name, column_name.rstrip().lstrip()) - # Check that the list of lists is rectangular. - for row in table: - hdbg.dassert_isinstance(table, list) - hdbg.dassert_eq( - len(row), - len(column_names), - "Invalid row='%s' for cols='%s'", - row, - column_names, - ) - - def __repr__(self) -> str: - res = "" - res += f"cols={str(self._column_names)}" - res += "\ntable=\n" + "\n".join(map(str, self._table)) - res += "\n" + f"size={str(self.size())}" - return res - - def __init__(self, table: TableType, column_names: List[str]) -> None: - # Check that the inputs are well-formed. - self._check_table(table, column_names) - # Save state. - self._table = table - self._column_names = column_names - _LOG.debug("%s", self.__repr__()) - # Map a column name to the index of the corresponding column, to allow - # indexing by column. - self._col_to_idx = { - col: idx for idx, col in enumerate(self._column_names) - } - _LOG.debug("col_to_idx=%s", str(self._col_to_idx)) - - @classmethod - def from_text(cls, cols: List[str], txt: str, delimiter: str) -> "Table": - """ - Build a table from a list of columns and the body of a CSV file. - """ - hdbg.dassert_isinstance(txt, str) - table = list(csv.reader(txt.split("\n"), delimiter=delimiter)) - return cls(table, cols) - - def size(self) -> Tuple[int, int]: - """ - Return the size of the table. - - :return: number of rows x columns (i.e., numpy / Pandas convention) - """ - return len(self._table), len(self._column_names) - - def filter_rows(self, column_name: str, value: str) -> "Table": - """ - Return a Table filtered with rows filtered by the criteria "field == - value". - """ - _LOG.debug("self=\n%s", repr(self)) - # Filter the rows. - hdbg.dassert_in(column_name, self._col_to_idx.keys()) - rows_filter = [ - row - for row in self._table - if row[self._col_to_idx[column_name]] == value - ] - _LOG.debug(hprint.to_str("rows_filter")) - # Build the resulting table. - table_filter = Table(rows_filter, self._column_names) - _LOG.debug("table_filter=\n%s", repr(table_filter)) - return table_filter - - def get_column(self, column_name: str) -> List[Any]: - """ - Return the list of unique values for a row / field. - """ - hdbg.dassert_in(column_name, self._column_names) - column_idx = self._col_to_idx[column_name] - # Scan the rows to extract the column. - vals = [] - for row in self._table: - vals.append(row[column_idx]) - return vals - - def unique(self, column_name: str) -> List[Any]: - """ - Return a list of unique values for a field. - """ - vals = self.get_column(column_name) - vals = sorted(list(set(vals))) - return vals - - def remove_column(self, column_name: str) -> "Table": - """ - Return a new Table with the specified column removed. - - :param column_name: name of the column to remove - :return: new Table without the specified column - """ - hdbg.dassert_in(column_name, self._column_names) - # Find the index of the column to remove. - column_idx = self._col_to_idx[column_name] - # Create new column names list without the removed column. - new_column_names = [ - col for col in self._column_names if col != column_name - ] - # Create new table rows without the removed column. - new_table = [ - [val for idx, val in enumerate(row) if idx != column_idx] - for row in self._table - ] - # Build and return the new table. - return Table(new_table, new_column_names) - - def __str__(self) -> str: - """ - Return a string representing the table with columns aligned. - """ - table = copy.deepcopy(self._table) - table.insert(0, self._column_names) - # Convert the cells to strings. - table_as_str = [[str(cell) for cell in row] for row in table] - # Find the length of each columns. - lengths = [max(map(len, col)) for col in zip(*table_as_str)] - _LOG.debug(hprint.to_str("lengths")) - # Compute format for the columns. - fmt = " ".join(f"{{:{x}}} |" for x in lengths) - _LOG.debug(hprint.to_str("fmt")) - # Add the row separating the column names. - row_sep = ["-" * length for length in lengths] - table.insert(1, row_sep) - table_as_str = [[str(cell) for cell in row] for row in table] - # Format rows. - rows_as_str = [fmt.format(*row) for row in table_as_str] - # Remove trailing spaces. - rows_as_str = [row.rstrip() for row in rows_as_str] - # Create string. - res = "\n".join(rows_as_str) - # res += "\nsize=" + str(self.size()) - return res diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py deleted file mode 100644 index 8ef0e3a4f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htest_logger.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python - -""" -Add a description of what the script does and examples of command lines. - -Check dev_scripts/linter.py to see an example of a script using this -template. - -Import as: - -import dev_scripts_helpers.script_template as dscscske -""" - -import argparse -import logging - -import helpers.hlogging as hloggin -import helpers.hparser as hparser - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -def _parse() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument("positional", nargs="*", help="...") - parser.add_argument("--dst_dir", action="store", help="Destination dir") - hparser.add_verbosity_arg(parser) - return parser - - -def _main(parser: argparse.ArgumentParser) -> None: - args = parser.parse_args() - hparser.parse_verbosity_args(args, use_exec_path=True) - hloggin.test_logger() - # - # logging.disable(logging.WARNING) - hloggin.shut_up_log_debug(_LOG) - hloggin.test_logger() - - -if __name__ == "__main__": - _main(_parse()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py deleted file mode 100644 index 7b6506ce6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htext_protect.py +++ /dev/null @@ -1,262 +0,0 @@ -""" -Utilities for protecting content during text processing. - -Extract and restore content that should not be modified by formatters and text -transformations (code blocks, comments, etc.). - -Import as: - -import helpers.htext_protect as htexprot -""" - -import logging -import re -from typing import Dict, List, Optional, Tuple - -import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Helper functions -# ############################################################################# - - -def _is_fenced_block_delimiter(line: str) -> bool: - """ - Check if line is a fenced block delimiter (```). - - :param line: Line to check - :return: True if line matches fenced block delimiter pattern - """ - return bool(re.match(r"^\s*```", line)) - - -def _is_math_block_delimiter(line: str) -> bool: - """ - Check if line is a math block delimiter ($$). - - :param line: Line to check - :return: True if line matches math block delimiter pattern - """ - return bool(re.match(r"^\s*\$\$\s*$", line)) - - -def _extract_single_line_html_comment(line: str) -> Optional[str]: - """ - Extract single-line HTML comment from line if present. - - Skips TOC markers ( and ) as they need to be - processed by the TOC generation logic. - - :param line: Line to check - :return: Full comment string if found, None otherwise - """ - # Skip TOC markers: they are processed by `refresh_toc`. - if "" in line or "" in line: - return None - # Match on single line. - m = re.match(r"^(\s*\s*)$", line) - if m: - return m.group(1) - return None - - -def _is_html_comment_start(line: str) -> bool: - """ - Check if line starts an HTML comment. - - Skips TOC markers as they need to be processed by TOC generation logic. - - :param line: Line to check - :return: True if line contains - """ - # Skip TOC markers. - if "" in line or "" in line: - return False - return "" not in line - - -def _is_html_comment_end(line: str) -> bool: - """ - Check if line ends an HTML comment. - - :param line: Line to check - :return: True if line contains --> without opening " in line and ") for .md and .txt files - - LaTeX comments (% ...) for .tex files - - :param lines: The lines to be processed - :param file_type: File extension ('md', 'txt', or 'tex') - :return: Tuple of (lines with placeholders, mapping of placeholders to - original content) - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_in(file_type, ["md", "txt", "tex"]) - _LOG.debug("Extracting protected content for file_type=%s", file_type) - # - protected_map: Dict[str, str] = {} - counter = 1 - lines_new: List[str] = [] - # State tracking. - in_fenced_block = False - in_math_block = False - in_html_comment = False - fenced_block_lines: List[str] = [] - math_block_lines: List[str] = [] - html_comment_lines: List[str] = [] - # Process each line. - for line in lines: - # Handle fenced blocks (for .md and .txt files). - if file_type in ["md", "txt"] and _is_fenced_block_delimiter(line): - if not in_fenced_block: - # Opening delimiter. - in_fenced_block = True - lines_new.append(line) - fenced_block_lines = [] - else: - # Closing delimiter: protect only content, keep delimiters visible. - placeholder = f"<<>>" - protected_map[placeholder] = "\n".join(fenced_block_lines) - counter += 1 - lines_new.append(placeholder) - lines_new.append(line) - in_fenced_block = False - fenced_block_lines = [] - continue - # Inside fenced block: accumulate. - if in_fenced_block: - fenced_block_lines.append(line) - continue - # Handle math blocks (for all file types). - if _is_math_block_delimiter(line): - if not in_math_block: - # Opening delimiter. - in_math_block = True - lines_new.append(line) - math_block_lines = [] - else: - # Closing delimiter: protect only content, keep delimiters visible. - placeholder = f"<<>>" - protected_map[placeholder] = "\n".join(math_block_lines) - counter += 1 - lines_new.append(placeholder) - lines_new.append(line) - in_math_block = False - math_block_lines = [] - continue - # Inside math block: accumulate. - if in_math_block: - math_block_lines.append(line) - continue - # Handle HTML comments (for .md and .txt files). - if file_type in ["md", "txt"]: - # Single-line HTML comment. - single_line_comment = _extract_single_line_html_comment(line) - if single_line_comment: - placeholder = f"<<>>" - protected_map[placeholder] = single_line_comment - counter += 1 - lines_new.append(placeholder) - continue - # Multi-line HTML comment start. - if _is_html_comment_start(line): - in_html_comment = True - html_comment_lines = [line] - continue - # Multi-line HTML comment end. - if in_html_comment and _is_html_comment_end(line): - html_comment_lines.append(line) - placeholder = f"<<>>" - protected_map[placeholder] = "\n".join(html_comment_lines) - counter += 1 - lines_new.append(placeholder) - in_html_comment = False - html_comment_lines = [] - continue - # Inside multi-line HTML comment: accumulate. - if in_html_comment: - html_comment_lines.append(line) - continue - # Handle LaTeX comments (for .tex files). - if file_type == "tex" and _is_latex_comment(line): - placeholder = f"<<>>" - protected_map[placeholder] = line - counter += 1 - lines_new.append(placeholder) - continue - # Regular line: keep as-is. - lines_new.append(line) - # Check for unclosed blocks. - if in_fenced_block: - _LOG.warning("Unclosed fenced block detected") - if in_math_block: - _LOG.warning("Unclosed math block detected") - if in_html_comment: - _LOG.warning("Unclosed HTML comment detected") - _LOG.debug("Extracted %d protected content blocks", len(protected_map)) - return lines_new, protected_map - - -def restore_protected_content( - lines: List[str], - protected_map: Dict[str, str], -) -> List[str]: - """ - Restore protected content by replacing placeholders with original text. - - :param lines: Lines containing placeholders - :param protected_map: Mapping of placeholders to original content - :return: Lines with restored content - """ - hdbg.dassert_isinstance(lines, list) - hdbg.dassert_isinstance(protected_map, dict) - _LOG.debug("Restoring %d protected content blocks", len(protected_map)) - # - lines_new: List[str] = [] - for line in lines: - # Check if line contains any placeholder. - restored = False - for placeholder, original in protected_map.items(): - if placeholder in line: - if line.strip() == placeholder: - # Placeholder is entire line: replace with multi-line content. - lines_new.extend(original.split("\n")) - restored = True - break - else: - # Placeholder embedded in line: replace inline. - line = line.replace(placeholder, original) - if not restored: - lines_new.append(line) - return lines_new diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py deleted file mode 100644 index 31cd642cf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hthreading.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python -""" -`timeout` decorator which is used to limit function execution time. - -Import as: - -import helpers.hthreading as hthread -""" - -import _thread -import sys -import threading -from typing import Any - - -def _timeout_handler() -> None: - sys.stderr.flush() - # Raise KeyboardInterrupt. - _thread.interrupt_main() - - -def timeout(timeout_sec: int) -> Any: - """ - Exit process if its execution takes longer than timeout_sec seconds. This - is a decorator that issue a KeyboardInterrupt, that will be raised if time - limit is exceed. - - :param timeout_sec: time limit - """ - - def outer(fn: Any) -> Any: - def inner(*args: Any, **kwargs: Any) -> Any: - timer = threading.Timer(timeout_sec, _timeout_handler) - timer.start() - try: - result = fn(*args, **kwargs) - finally: - timer.cancel() - return result - - return inner - - return outer diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py deleted file mode 100644 index c3aed5e80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htimer.py +++ /dev/null @@ -1,275 +0,0 @@ -""" -Import as: - -import helpers.htimer as htimer -""" - -import logging -import time -from typing import Any, Callable, Optional, Tuple, cast - -import helpers.hdbg as hdbg -import helpers.hlogging as hloggin - -# Avoid dependency from other `helpers` modules to prevent import cycles. - - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Timer -# ############################################################################# - - -class Timer: - """ - Measure time elapsed in one or more intervals. - """ - - def __init__(self, *, start_on_creation: bool = True): - """ - Create a timer. - - If "start_on_creation" is True start automatically the timer. - """ - self._stop: Optional[float] = None - # Store the time for the last elapsed interval. - self._last_elapsed: Optional[float] = None - # Store the total time for all the measured intervals. - self._total_elapsed = 0.0 - if start_on_creation: - # For better accuracy start the timer as last action, after all the - # bookkeeping. - self._start: Optional[float] = time.time() - else: - self._start = None - - def stop(self) -> None: - """ - Stop the timer and accumulate the interval. - """ - # Timer must have not been stopped before. - hdbg.dassert(self.is_started() and not self.is_stopped()) - # For better accuracy stop the timer as first action. - self._stop = time.time() - # Update the total elapsed time. - # Sometimes we get numerical error tripping this assertion - # (e.g., '1619552498.813126' <= '1619552498.805193') so we give - # a little slack to the assertion. - # hdbg.dassert_lte(self._start, self._stop + 1e-2) - self._last_elapsed = cast(float, self._stop) - cast(float, self._start) - self._total_elapsed += self._last_elapsed - # Stop. - self._start = None - self._stop = None - - def get_elapsed(self) -> float: - """ - Stop if not stopped already, and return the elapsed time. - """ - if not self.is_stopped(): - self.stop() - hdbg.dassert_is_not(self._last_elapsed, None) - return cast(float, self._last_elapsed) - - # ///////////////////////////////////////////////////////////////////////// - - def resume(self) -> None: - """ - Resume the timer after a stop. - """ - # Timer must have been stopped before. - hdbg.dassert(self.is_started() or self.is_stopped()) - self._stop = None - # Start last for better accuracy. - self._start = time.time() - - def is_started(self) -> bool: - return ( - self._start is not None and self._start >= 0 and self._stop is None - ) - - def is_stopped(self) -> bool: - return self._start is None and self._stop is None - - def get_total_elapsed(self) -> float: - """ - Stop if not stopped already, and return the total elapsed time. - """ - if not self.is_stopped(): - self.stop() - return self._total_elapsed - - def accumulate(self, *, timer: "Timer") -> None: - """ - Accumulate the value of a timer to the current object. - """ - # Both timers must be stopped. - hdbg.dassert(timer.is_stopped()) - hdbg.dassert(self.is_stopped()) - hdbg.dassert_lte(0.0, timer.get_total_elapsed()) - self._total_elapsed += timer.get_total_elapsed() - - def __repr__(self) -> str: - """ - Return string with the intervals measured so far. - """ - measured_time = self._total_elapsed - if self.is_started() and not self.is_stopped(): - # Timer still running. - measured_time += time.time() - cast(float, self._start) - ret = "%.3f secs" % measured_time - return ret - - -# ############################################################################# - - -_TimerMemento = Tuple[int, str, Timer] - - -def dtimer_start(log_level: int, message: str) -> _TimerMemento: - """ - Start measuring time. - - :return: memento of the timer. - """ - _LOG.log(log_level, "%s ...", message) - memento = log_level, message, Timer() - return memento - - -def dtimer_stop(memento: _TimerMemento) -> Tuple[str, float]: - """ - End measuring time. - - :return: - - message as as string - - time in seconds (int) - """ - log_level, message, timer = memento - timer.stop() - elapsed_time = round(timer.get_elapsed(), 3) - msg = f"{message} done (%.3f s)" % elapsed_time - _LOG.log(log_level, msg) - return msg, elapsed_time - - -# TODO(gp): Is this useful / used? -def stop_timer(timer: Timer) -> str: - timer.stop() - elapsed_time = round(timer.get_elapsed(), 3) - msg = "%.3f s" % elapsed_time - return msg - - -# ############################################################################# -# TimedScope -# ############################################################################# - - -class TimedScope: - """ - Measure the execution time of a block of code. - - ``` - with htimer.TimedScope(logging.INFO, "Work") as ts: - ... work work work ... - ``` - """ - - def __init__( - self, log_level: int, message: str, *, profile_memory: bool = False - ): - self._log_level = log_level - self._message = message - # TODO(gp): Implement profiling also memory using dmemory_start/end. - # State. - self._memento: Optional[_TimerMemento] = None - self.elapsed_time = None - - def get_result(self) -> str: - msg: str = f"{self._message} done (%.3f s)" % self.elapsed_time - return msg - - def __enter__(self) -> "TimedScope": - self._memento = dtimer_start(self._log_level, self._message) - return self - - def __exit__(self, *args: Any) -> None: - if self._memento is not None: - msg, self.elapsed_time = dtimer_stop(self._memento) - _ = msg - - -# ############################################################################# -# Decorator. -# ############################################################################# - - -def timed(f: Callable) -> Callable: - """ - Add a timer around the invocation of a function. - """ - - def wrapper(*args: Any, **kwargs: Any) -> Any: - func_name = getattr(f, "__name__", "unknown_function") - # - timer = dtimer_start(0, func_name) - v = f(*args, **kwargs) - dtimer_stop(timer) - return v - - return wrapper - - -# TODO(gp): Add an object that accumulates the times from multiple timers. -# E.g., use a dict for message -> time - - -# ############################################################################# - - -_MemoryMemento = Tuple[int, str, hloggin.MemoryUsage] - - -def dmemory_start(log_level: int, message: str) -> _MemoryMemento: - """ - Start measuring memory. - - :return: memento of the memory profile - """ - _LOG.log(log_level, "%s ...", message) - memory_usage = hloggin.get_memory_usage() - memento = (log_level, message, memory_usage) - return memento - - -def dmemory_stop(memento: _MemoryMemento, *, mode: str = "all") -> str: - """ - Stop measuring memory. - - :return: message as as string - """ - log_level, message, start_memory_usage = memento - end_memory_usage = hloggin.get_memory_usage() - verbose = False - start_mem = hloggin.memory_to_str(start_memory_usage, verbose=verbose) - end_mem = hloggin.memory_to_str(end_memory_usage, verbose=verbose) - diff_mem = tuple(x - y for x, y in zip(end_memory_usage, start_memory_usage)) - diff_mem = hloggin.memory_to_str(diff_mem, verbose=verbose) - # Package the output. - msg = [] - msg.append(f"{message} done:") - if mode == "all": - msg.append(f"start=({start_mem})") - msg.append(f"end=({end_mem})") - msg.append(f"diff=({diff_mem})") - elif mode == "only_diff": - msg.append(f"diff=({diff_mem})") - else: - raise ValueError(f"Invalid mode='{mode}'") - msg = " ".join(msg) - _LOG.log(log_level, msg) - return msg diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py deleted file mode 100644 index bb16ad381..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htqdm.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -Import as: - -import helpers.htqdm as htqdm -""" - -import io -import logging -from typing import Any, Optional - -# Avoid dependency from other `helpers` modules, such as `helpers.hjoblib`, to -# prevent import cycles. - - -# ############################################################################# -# TqdmToLogger -# ############################################################################# - - -# From https://github.com/tqdm/tqdm/issues/313 -class TqdmToLogger(io.StringIO): - """ - Output stream for `tqdm` which will output to logger module instead of the - `stdout`. - - Use as: - ``` - from tqdm.autonotebook import tqdm - - tqdm_out = TqdmToLogger(_LOG, level=logging.INFO) - for ... tqdm(..., file=tqdm_out): - ``` - """ - - logger = None - level = None - buf = "" - - def __init__(self, logger: Any, level: Optional[int] = None): - super().__init__() - self.logger = logger - self.level = level or logging.INFO - - def write(self, buf: str) -> None: - self.buf = buf.strip("\r\n\t ") - - def flush(self) -> None: - self.logger.log(self.level, self.buf) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py deleted file mode 100644 index 03de65ce1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htraceback.py +++ /dev/null @@ -1,228 +0,0 @@ -""" -Import as: - -import helpers.htraceback as htraceb -""" - -import logging -import os -import re -from typing import Any, List, Match, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hgit as hgit - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): Move some code to `hcfile.py`. - -# Store elements parsed from a line of a traceback: -# (file_name, line_num, text) -# E.g., -# ("test/test_lib_tasks.py", -# 27, -# "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)" -# ) -CfileRow = Tuple[str, int, str] - - -def cfile_row_to_str(cfile_row: CfileRow) -> str: - # helpers/git.py:295:def get_repo_long_name_from_client(super_module - hdbg.dassert_isinstance(cfile_row, tuple) - return ":".join(list(map(str, cfile_row))) - - -def cfile_to_str(cfile: List[CfileRow]) -> str: - hdbg.dassert_isinstance(cfile, list) - return "\n".join(map(cfile_row_to_str, cfile)) - - -def parse_traceback( - txt: str, *, purify_from_client: bool = True -) -> Tuple[List[CfileRow], Optional[str]]: - """ - Parse a string containing text including a Python traceback. - - :param txt: the text to parse - :param purify_from_client: express the files with respect to the Git root - :return: - - a list of `CFILE_ROW`, e.g., - ``` - ("test/test_lib_tasks.py", - 27, - "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)") - - a string storing the traceback, like: - ``` - Traceback (most recent call last): - File "/app/amp/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "/app/amp/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": - NameError: name 'repo_short_name' is not defined - ``` - - A `None` value means that no traceback was found. - """ - # TODO(gp): Horrible hack to get the tests to pass. IMO this whole function - # needs to be rewritten using a proper parser or library. Now it's full - # of weird handling of edge cases. - txt += "\n" - # - lines = txt.split("\n") - # pylint: disable=line-too-long - # Remove the artifacts of a GH run. E.g., - # "Run_fast_tests Run fast tests 2022-02-19T16:53:07.0945561Z NameError: name 'cofinanc' is not defined" -> - # -> "NameError: name 'cofinanc' is not defined". - lines = [ - re.split( - r"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+Z ", - line, - )[-1] - for line in lines - ] - state = "look_for" - cfile: List[CfileRow] = [] - i = 0 - start_idx = end_idx = 0 - while i < len(lines): - line = lines[i] - _LOG.debug("state=%-10s i=%d: line='%s'", state, i, line) - if state == "look_for": - if line.startswith("Traceback (most recent call last):"): - start_idx = i - # Update the state. - state = "parse" - i += 1 - continue - elif state == "parse": - # The file looks like: - # File "/app/amp/test/test_lib_tasks.py", line 27, in test_get_gh - # actual = ltasks._get_gh_issue_title(issue_id, repo) - regex = r"^\s*File \"(.+)\", line (\d+), in (\S+)$" - m = re.match(regex, line) - hdbg.dassert(m, "Can't parse '%s'", line) - m: Match[Any] - file_name = m.group(1) - line_num = int(m.group(2)) - func_name = m.group(3) - _LOG.debug(" -> %s %d %s", file_name, line_num, func_name) - # - # Parse the next line until the next `File...`. - _LOG.debug("Search end of snippet") - j = i + 1 - hdbg.dassert_lte(j, len(lines)) - while j < len(lines): - _LOG.debug(" j=%d: line='%s'", j, lines[j]) - if lines[j].startswith(' File "') or not lines[j].startswith( - " " - ): - _LOG.debug(" Found end of snippet") - break - j += 1 - # Concatenate the lines into a single line. - code = lines[i + 1 : j] - _LOG.debug(" -> code: [%d, %d]\n%s", i, j, "\n".join(code)) - code = map(lambda x: x.rstrip().lstrip(), code) - code_as_single_line = "/".join(code) - _LOG.debug(" -> code_as_single_line=\n%s", code_as_single_line) - # Assemble the result. - file_name = os.path.normpath(file_name) - cfile_row = ( - file_name, - line_num, - func_name + ":" + code_as_single_line, - ) - _LOG.debug(" => cfile_row='%s'", cfile_row_to_str(cfile_row)) - cfile.append(cfile_row) - # Update the state. - if not lines[j].startswith(" "): - _LOG.debug(" Found end of traceback") - end_idx = j - state = "end" - break - state = "parse" - i = j - continue - # - i += 1 - # - if state == "look_for": - # We didn't find a traceback. - cfile = [] - traceback = None - elif state == "end": - if ( - end_idx < len(lines) - 1 - and "Error:" not in lines[end_idx - 1] - and "Error:" in lines[end_idx] - ): - # Extend the traceback to the lines with the error description. - # E.g., for the snippet below: - # ``` - # if repo_short_name == "amp": - # NameError: name 'repo_short_name' is not defined - # ``` - # If the parsed traceback stops at 'if repo_short_name == "amp":', - # and thus, its last line does not include the error description - # ("NameError:..."), and the following line does include the error - # description, then the traceback will be extended to include the - # following line, making the parsed traceback end with the following - # two lines: - # ``` - # if repo_short_name == "amp": - # NameError: name 'repo_short_name' is not defined - # ``` - to_break = False - while end_idx < len(lines) - 1 and not to_break: - end_idx += 1 - line = lines[end_idx] - _LOG.debug( - "Extend traceback: to_break=%s, end_idx=%s, line='%s'", - to_break, - end_idx, - line, - ) - if ( - "________ Test" in line - or "====== slowest 3 durations" in line - ): - # Stop if we have reached the next traceback or the end of the - # pytest report. - to_break = True - hdbg.dassert_lte(0, start_idx) - hdbg.dassert_lte(start_idx, end_idx) - hdbg.dassert_lt(end_idx, len(lines)) - _LOG.debug("start_idx=%d end_idx=%d", start_idx, end_idx) - traceback = "\n".join(lines[start_idx:end_idx]) - else: - raise ValueError(f"Invalid state='{state}'") - _LOG.debug("traceback=\n%s", traceback) - _LOG.debug("cfile=\n%s", cfile_to_str(cfile)) - # Purify filenames from client so that refer to files in this client. - if cfile and purify_from_client: - _LOG.debug("# Purifying from client") - cfile_tmp = [] - for cfile_row in cfile: - file_name, line_num, text = cfile_row - # Leave the files relative to the current dir. - root_dir = hgit.get_client_root(super_module=False) - mode = "return_all_results" - file_names = hgit.find_docker_file( - file_name, root_dir=root_dir, mode=mode - ) - if len(file_names) == 0: - _LOG.warning("Can't find file corresponding to '%s'", file_name) - elif len(file_names) > 1: - _LOG.warning( - "Found multiple potential files corresponding to '%s'", - file_name, - ) - else: - file_name = file_names[0] - cfile_tmp.append((file_name, line_num, text)) - cfile = cfile_tmp - _LOG.debug("# After purifying from client") - _LOG.debug("cfile=\n%s", cfile_to_str(cfile)) - return cfile, traceback diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py deleted file mode 100644 index d706292ed..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htranslate.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env python - -""" -Allow translating text using AWS Translate. It can be used as a module or CLI -tool. - -Supported languages and languages codes: -https://docs.aws.amazon.com/translate/latest/dg/what-is.html - -Import as: - -import helpers.htranslate as htransl -""" - -import argparse -import configparser -import logging -import pathlib -import sys -from typing import Optional, Tuple - -import boto3 - -_LOG = logging.getLogger(__name__) - - -def _parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument( - "lang", - help=( - "source language code. " - "https://docs.aws.amazon.com/translate/latest/dg/what-is.html" - ), - ) - parser.add_argument("text", help="string to translate") - parser.add_argument( - "--aws", - type=pathlib.Path, - dest="credentials", - default=pathlib.Path().home() / ".aws/credentials", - help="Path to the aws credentials file.", - ) - return parser.parse_args() - - -def _load_credentials(conf_path: pathlib.Path) -> Tuple[str, str]: - """ - Load aws credentilas from config file. - - :param conf_path:credentials file path. - :return: A tuple consist of aws_access and aws_secret keys. - """ - config = configparser.ConfigParser() - config.read(conf_path) - try: - access = config.get("default", "aws_access_key_id") - secret = config.get("default", "aws_secret_access_key") - except configparser.NoOptionError as err: - _LOG.error("Unable to read option for: %s", err.args) - sys.exit(1) - else: - return access, secret - - -# ############################################################################# -# TranslateAPI -# ############################################################################# - - -class TranslateAPI: - def __init__( - self, - aws_access_key: str, - aws_secret_key: str, - region: Optional[str] = "us-east-2", - ) -> None: - self._translate = boto3.client( - service_name="translate", - region_name=region, - use_ssl=True, - aws_access_key_id=aws_access_key, - aws_secret_access_key=aws_secret_key, - ) - - def translate_text(self, text: str, lang_code: str) -> str: - """ - Translate given text into English. Amazon has a limit on text size: - 5,000 bytes. - - :param text: Foreing language text. - :param lang_code: Language code in accordance with supported - languages and code of Amazon. - :return: English text. - """ - tr = self._translate.translate_text( - Text=text, SourceLanguageCode=lang_code, TargetLanguageCode="en" - ) - return str(tr.get("TranslatedText")) - - -if __name__ == "__main__": - args = _parse_args() - aws_access, aws_secret = _load_credentials(args.credentials) - api = TranslateAPI(aws_access, aws_secret) - result = api.translate_text(args.text, args.lang) - print(result) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py deleted file mode 100644 index 1bb3472d7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/htypes.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -Contain general types based on standard Python libraries. - -Import as: - -import helpers.htypes as htypes -""" - -from typing import Any, Dict - -Kwargs = Dict[str, Any] diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py deleted file mode 100644 index d585faeef..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test.py +++ /dev/null @@ -1,1876 +0,0 @@ -""" -Enhanced unit testing framework built on top of unittest and pytest. - -This module provides: -- TestCase base class with golden file testing capabilities -- Utilities for comparing strings, dataframes, and other outputs -- Test outcome management with update and incremental modes -- Directory management for input, output, and scratch space -- Integration with Git for managing test outcomes - -Import as: - -import helpers.hunit_test as hunitest -""" - -import abc -import collections -import inspect -import logging -import os -import pprint -import random -import re -import sys -import traceback -import unittest -from typing import Any, Dict, List, Mapping, Optional, Tuple - -import pytest - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.htimer as htimer -import helpers.hunit_test_purification as huntepur -import helpers.hwall_clock_time as hwacltim -import helpers.repo_config_utils as hrecouti - -# We use strings as type hints (e.g., 'pd.DataFrame') since we are not sure -# we have the corresponding libraries installed. - - -# Minimize dependencies from installed packages. - -# TODO(gp): Use `hprint.color_highlight`. -_WARNING = "\033[33mWARNING\033[0m" - -try: - import numpy as np - - _HAS_NUMPY = True -except ImportError as e: - print(_WARNING + ": " + str(e)) - _HAS_NUMPY = False -try: - import pandas as pd - - _HAS_PANDAS = True -except ImportError as e: - print(_WARNING + ": " + str(e)) - _HAS_PANDAS = False - -try: - import matplotlib.pyplot as plt - - _HAS_MATPLOTLIB = True -except ImportError as e: - print(_WARNING + ": " + str(e)) - _HAS_MATPLOTLIB = False - - -_LOG = logging.getLogger(__name__) - -# Mute this module unless we want to debug it. -_LOG.setLevel(logging.INFO) - -# ############################################################################# - -# Global setter / getter for updating test. - -# This controls whether the output of a test is updated or not. -# Set by `conftest.py`. -_UPDATE_TESTS = False - - -# TODO(gp): -> ..._update_outcomes. -def set_update_tests(val: bool) -> None: - """ - Set the global flag for updating test outcomes. - - :param val: True to enable updating test outcomes, False otherwise - """ - global _UPDATE_TESTS - _UPDATE_TESTS = val - - -def get_update_tests() -> bool: - """ - Get the current state of the update tests flag. - - :return: True if test outcomes should be updated, False otherwise - """ - return _UPDATE_TESTS - - -# ############################################################################# - -# Global setter / getter for incremental mode. - -# This is useful when a long test wants to reuse some data already generated. -# Set by conftest.py. -_INCREMENTAL_TESTS = False - - -def set_incremental_tests(val: bool) -> None: - """ - Set the global flag for incremental test mode. - - :param val: True to enable incremental mode, False otherwise - """ - global _INCREMENTAL_TESTS - _INCREMENTAL_TESTS = val - - -def get_incremental_tests() -> bool: - """ - Get the current state of the incremental tests flag. - - :return: True if incremental mode is enabled, False otherwise - """ - return _INCREMENTAL_TESTS - - -# ############################################################################# - -_CONFTEST_IN_PYTEST = False - - -# TODO(gp): Use https://stackoverflow.com/questions/25188119 -# TODO(gp): -> is_in_unit_test() -def in_unit_test_mode() -> bool: - """ - Return True if we are inside a pytest run. - - This is set by `conftest.py`. - """ - return _CONFTEST_IN_PYTEST - - -# ############################################################################# - - -# Set by `conftest.py`. -_GLOBAL_CAPSYS = None - - -def pytest_print(txt: str) -> None: - """ - Print bypassing `pytest` output capture. - """ - with _GLOBAL_CAPSYS.disabled(): # type: ignore - sys.stdout.write(txt) - - -def pytest_warning(txt: str, prefix: str = "") -> None: - """ - Print a warning bypassing `pytest` output capture. - - :param prefix: prepend the message with a string - """ - txt_tmp = "" - if prefix: - txt_tmp += prefix - txt_tmp += hprint.color_highlight("WARNING", "yellow") + f": {txt}" - pytest_print(txt_tmp) - - -# ############################################################################# -# Generation and conversion functions. -# ############################################################################# - - -# TODO(gp): Is this dataflow Info? If so it should go somewhere else. -def convert_info_to_string(info: Mapping) -> str: - """ - Convert info to string for verifying test results. - - Info often contains `pd.Series`, so pandas context is provided to print all rows - and all contents. - - :param info: info to convert to string - :return: string representation of info - """ - output = [] - # Provide context for full representation of `pd.Series` in info. - with pd.option_context( - "display.max_colwidth", - int(1e6), - "display.max_columns", - None, - "display.max_rows", - None, - ): - output.append(hprint.frame("info")) - output.append(pprint.pformat(info)) - output_str = "\n".join(output) - return output_str - - -# TODO(gp): This seems the python3.9 version of `to_str`. Remove if possible. -def to_string(var: str) -> str: - """ - Generate an f-string expression for debugging variable values. - - :param var: the variable name to create an f-string for - :return: an f-string expression that will print the variable name and value - """ - return f"""f"{var}={{{var}}}""" - - -# ############################################################################# - - -def diff_files( - file_name1: str, - file_name2: str, - *, - tag: Optional[str] = None, - abort_on_exit: bool = True, - dst_dir: str = ".", - error_msg: str = "", -) -> None: - """ - Compare the passed filenames and create script to compare them with - vimdiff. - - :param tag: add a banner the tag - :param abort_on_exit: whether to assert or not - :param dst_dir: dir where to save the comparing script - """ - _LOG.debug(hprint.func_signature_to_str()) - file_name1 = os.path.relpath(file_name1, os.getcwd()) - file_name2 = os.path.relpath(file_name2, os.getcwd()) - msg = [] - # Add tag. - if tag is not None: - msg.append("\n" + hprint.frame(tag, char1="-")) - # Diff to screen. - _, res = hsystem.system_to_string( - f"echo; sdiff --expand-tabs -l -w 150 {file_name1} {file_name2}", - abort_on_error=False, - log_level=logging.DEBUG, - ) - msg.append(res) - # Save a script to diff. - diff_script = os.path.join(dst_dir, "tmp_diff.sh") - vimdiff_cmd = f""" - #!/bin/bash - if [[ $1 == "wrap" ]]; then - cmd='vimdiff -c "windo set wrap"' - else - cmd='vimdiff' - fi; - cmd="$cmd {file_name1} {file_name2}" - eval $cmd - """ - vimdiff_cmd = hprint.dedent(vimdiff_cmd) - # TODO(gp): Use hio.create_executable_script(). - hio.to_file(diff_script, vimdiff_cmd) - cmd = "chmod +x " + diff_script - hsystem.system(cmd) - # Report how to diff. - msg.append("Diff with:") - msg.append("> " + diff_script) - msg_as_str = "\n".join(msg) - # Append also error_msg to the current message. - if error_msg: - msg_as_str += "\n" + error_msg - # Add also the stack trace to the logging error. - if False: - log_msg_as_str = ( - msg_as_str - + "\n" - + hprint.frame("Traceback", char1="-") - + "\n" - + "".join(traceback.format_stack()) - ) - _LOG.error(log_msg_as_str) - # Assert. - if abort_on_exit: - raise RuntimeError(msg_as_str) - - -# ############################################################################# - - -def _remove_spaces(txt: str) -> str: - """ - Remove leading / trailing spaces and empty lines. - - This is used to implement fuzzy matching. - """ - txt = txt.replace("\\n", "\n").replace("\\t", "\t") - # Convert multiple empty spaces (but not newlines) into a single one. - txt = re.sub(r"[^\S\n]+", " ", txt) - # Remove insignificant crap. - lines = [] - for line in txt.split("\n"): - # Remove leading and trailing spaces. - line = re.sub(r"^\s+", "", line) - line = re.sub(r"\s+$", "", line) - # Skip empty lines. - if line != "": - lines.append(line) - txt = "\n".join(lines) - return txt - - -def _remove_banner_lines(txt: str) -> str: - """ - Remove lines of separating characters long at least 20 characters. - """ - txt_tmp: List[str] = [] - for line in txt.split("\n"): - if re.match(r"^\s*[\#\-><=]{20,}\s*$", line): - continue - txt_tmp.append(line) - txt = "\n".join(txt_tmp) - return txt - - -def _fuzzy_clean(txt: str) -> str: - """ - Remove irrelevant artifacts to make string comparison less strict. - """ - hdbg.dassert_isinstance(txt, str) - # Ignore spaces. - txt = _remove_spaces(txt) - # Ignore separation lines. - txt = _remove_banner_lines(txt) - return txt - - -def _ignore_line_breaks(txt: str) -> str: - """ - Replace all line breaks with spaces for loose comparison. - - :param txt: the input text - :return: text with line breaks replaced by spaces - """ - # Ignore line breaks. - txt = txt.replace("\n", " ") - return txt - - -def _sort_lines(txt: str) -> str: - """ - Sort the lines in alphabetical order. - - This is used when we want to perform a comparison of equality but - without order. Of course there are false negatives, since the - relative order of lines might matter. - """ - lines = txt.split("\n") - lines.sort() - lines = "\n".join(lines) - return lines - - -def _save_diff( - actual: str, - expected: str, - tag: str, - test_dir: str, -) -> None: - """ - Save actual and expected strings to temporary files for comparison. - - :param actual: the actual test output - :param expected: the expected test output - :param tag: identifier tag for the files - :param test_dir: directory to save files in - """ - if tag != "": - tag += "." - # Save expected strings to dir. - for dst_dir in (".", test_dir): - act_file_name = f"{dst_dir}/tmp.{tag}actual.txt" - hio.to_file(act_file_name, actual) - exp_file_name = f"{dst_dir}/tmp.{tag}expected.txt" - hio.to_file(exp_file_name, expected) - - -def assert_equal( - actual: str, - expected: str, - full_test_name: str, - test_dir: str, - *, - check_string: bool = False, - remove_lead_trail_empty_lines: bool = False, - dedent: bool = False, - purify_text: bool = False, - purify_expected_text: bool = False, - fuzzy_match: bool = False, - ignore_line_breaks: bool = False, - split_max_len: Optional[int] = None, - sort: bool = False, - abort_on_error: bool = True, - dst_dir: str = ".", - error_msg: str = "", -) -> bool: - """ - See interface in `TestCase.assert_equal()`. - - :param full_test_name: e.g., `TestRunNotebook1.test2` - :param check_string: if it was invoked by `check_string()` or directly - """ - _LOG.debug(hprint.func_signature_to_str("actual expected")) - # Store a mapping tag after each transformation (e.g., original, sort, ...) to - # (actual, expected). - values: Dict[str, str] = collections.OrderedDict() - - def _append(tag: str, actual: str, expected: str) -> None: - _LOG.debug( - "tag=%s\n actual='\n%s'\n expected='\n%s'", tag, actual, expected - ) - hdbg.dassert_not_in(tag, values) - values[tag] = (actual, expected) - - # - _LOG.debug("Before any transformation:") - tag = "original" - _append(tag, actual, expected) - # 1) Remove white spaces. - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_white_spaces(actual) - expected = text_purifier.purify_white_spaces(expected) - tag = "purify_white_spaces" - _append(tag, actual, expected) - # Remove empty leading / trailing lines. - if remove_lead_trail_empty_lines: - tag = "remove_lead_trail_empty_lines" - actual = hprint.remove_lead_trail_empty_lines(actual) - expected = hprint.remove_lead_trail_empty_lines(expected) - _append(tag, actual, expected) - # Dedent only expected since we often align it to make it look more readable - # in the Python code, if needed. - if dedent: - tag = "dedent" - expected = hprint.dedent(expected) - _append(tag, actual, expected) - # Purify text, if needed. - if purify_text: - tag = "purify_text" - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - if purify_expected_text: - expected = text_purifier.purify_txt_from_client(expected) - _append(tag, actual, expected) - # Ensure that there is a single `\n` at the end of the strings. - actual = actual.rstrip("\n") + "\n" - expected = expected.rstrip("\n") + "\n" - # Sort the lines. - if sort: - tag = "sort" - actual = _sort_lines(actual) - expected = _sort_lines(expected) - _append(tag, actual, expected) - # Fuzzy match, if needed. - if fuzzy_match: - tag = "fuzzy_match" - actual = _fuzzy_clean(actual) - expected = _fuzzy_clean(expected) - _append(tag, actual, expected) - # Ignore line breaks, if needed. - if ignore_line_breaks: - tag = "ignore_line_breaks" - actual = _ignore_line_breaks(actual) - expected = _ignore_line_breaks(expected) - _append(tag, actual, expected) - # Split the strings into lines of at most `split_max_len` characters. - if split_max_len: - tag = "split_max_len" - actual = hprint.strict_split(actual, split_max_len) - expected = hprint.strict_split(expected, split_max_len) - _append(tag, actual, expected) - # Check. - tag = "final" - _append(tag, actual, expected) - # - is_equal = expected == actual - _LOG.debug(hprint.to_str("is_equal")) - if is_equal: - return is_equal - _LOG.error( - "%s", - "\n" - + hprint.frame( - f"Test '{full_test_name}' failed", char1="=", num_chars=80 - ), - ) - if not check_string: - # If this is a `self.assert_equal()` and not a `self.check_string()`, - # then print the correct output, like: - # expected = r'""" - # 2021-02-17 09:30:00-05:00 - # 2021-02-17 10:00:00-05:00 - # 2021-02-17 11:00:00-05:00 - # """ - txt = [] - txt.append(hprint.frame(f"ACTUAL VARIABLE: {full_test_name}", char1="-")) - # TODO(gp): Switch to expected or expected_result. - exp_var = "expected = r" - # We always return the variable exactly as this should be, even if we - # could make it look better through indentation in case of fuzzy match. - actual_orig = values["original"][0] - if actual_orig.startswith('"'): - sep = "'''" - else: - sep = '"""' - exp_var += sep - if fuzzy_match: - # We can print in a more readable way since spaces don't matter. - exp_var += "\n" - exp_var += actual_orig - if fuzzy_match: - # We can print in a more readable way since spaces don't matter. - exp_var += "\n" - exp_var += sep - # Save the expected variable to files. - exp_var_file_name = f"{test_dir}/tmp.exp_var.txt" - hio.to_file(exp_var_file_name, exp_var) - # - exp_var_file_name = "tmp.exp_var.txt" - hio.to_file(exp_var_file_name, exp_var) - _LOG.info("Saved exp_var in %s", exp_var_file_name) - # - txt.append(exp_var) - txt = "\n".join(txt) - error_msg += txt - # Save all the values after the transformations. - debug = False - if debug: - for idx, key in enumerate(values.keys()): - actual_tmp, expected_tmp = values[key] - tag = f"{idx}.{key}" - _save_diff(actual_tmp, expected_tmp, tag, test_dir) - else: - key = "final" - actual_tmp, expected_tmp = values[key] - _save_diff(actual_tmp, expected_tmp, key, test_dir) - # Compare the last values. - act_file_name = f"{test_dir}/tmp.final.actual.txt" - exp_file_name = f"{test_dir}/tmp.final.expected.txt" - if fuzzy_match: - msg = "FUZZY ACTUAL vs FUZZY EXPECTED" - else: - msg = "ACTUAL vs EXPECTED" - msg += f": {full_test_name}" - diff_files( - act_file_name, - exp_file_name, - tag=msg, - abort_on_exit=abort_on_error, - dst_dir=dst_dir, - error_msg=error_msg, - ) - return is_equal - - -# TODO(gp): @all move to hpandas -def compare_df(df1: "pd.DataFrame", df2: "pd.DataFrame") -> None: - """ - Compare two dfs including their metadata. - """ - if not df1.equals(df2): - print(df1.compare(df2)) - raise ValueError("Dfs are different") - - def _compute_df_signature(df: "pd.DataFrame") -> str: - txt = [] - txt.append(f"df1=\n{str(df)}") - txt.append(f"df1.dtypes=\n{str(df.dtypes)}") - if hasattr(df.index, "freq"): - txt.append(f"df1.index.freq=\n{str(df.index.freq)}") - return "\n".join(txt) - - full_test_name = "dummy" - test_dir = "." - assert_equal( - _compute_df_signature(df1), - _compute_df_signature(df2), - full_test_name, - test_dir, - ) - - -# ############################################################################# - - -def create_test_dir( - dir_name: str, incremental: bool, file_dict: Dict[str, str] -) -> None: - """ - Create a directory `dir_name` with the files from `file_dict`. - - `file_dict` is interpreted as pair of files relative to `dir_name` - and content. - """ - hdbg.dassert_no_duplicates(file_dict.keys()) - hio.create_dir(dir_name, incremental=incremental) - for file_name in file_dict: - dst_file_name = os.path.join(dir_name, file_name) - _LOG.debug("file_name=%s -> %s", file_name, dst_file_name) - hio.create_enclosing_dir(dst_file_name, incremental=incremental) - file_content = file_dict[file_name] - hio.to_file(dst_file_name, file_content) - - -# TODO(gp): Make remove_dir_name=True default. -def get_dir_signature( - dir_name: str, - include_file_content: bool, - *, - remove_dir_name: bool = False, - num_lines: Optional[int] = None, -) -> str: - """ - Compute a string with the content of the files in `dir_name`. - - :param include_file_content: include the content of the files, besides the - name of files and directories - :param remove_dir_name: use paths relative to `dir_name` - :param num_lines: number of lines to include for each file - - The output looks like: - ``` - # Dir structure - $GIT_ROOT/.../tmp.scratch - $GIT_ROOT/.../tmp.scratch/dummy_value_1=1 - $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A - $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet - ... - - # File signatures - len(file_names)=3 - file_names=$GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet, - $GIT_ROOT/.../tmp.scratch/dummy_value_1=2/dummy_value_2=B/data.parquet, ... - # $GIT_ROOT/.../tmp.scratch/dummy_value_1=1/dummy_value_2=A/data.parquet - num_lines=13 - ''' - original shape=(1, 1) - Head: - { - "0":{ - "dummy_value_3":0 - } - } - Tail: - { - "0":{ - "dummy_value_3":0 - } - } - ''' - # $GIT_ROOT/.../tmp.scratch/dummy_value_1=2/dummy_value_2=B/data.parquet - ``` - """ - - def _remove_dir_name(file_name: str) -> str: - if remove_dir_name: - res = os.path.relpath(file_name, dir_name) - else: - res = file_name - return res - - txt: List[str] = [] - # Find all the files under `dir_name`. - _LOG.debug("dir_name=%s", dir_name) - hdbg.dassert_path_exists(dir_name) - cmd = f'find {dir_name} -name "*"' - remove_files_non_present = False - dir_name_tmp = None - file_names = hsystem.system_to_files( - cmd, dir_name_tmp, remove_files_non_present - ) - file_names = sorted(file_names) - # Save the directory / file structure. - txt.append("# Dir structure") - txt.append("\n".join(map(_remove_dir_name, file_names))) - # - if include_file_content: - txt.append("# File signatures") - # Remove the directories. - file_names = hsystem.remove_dirs(file_names) - # Scan the files. - txt.append(f"len(file_names)={len(file_names)}") - txt.append(f"file_names={', '.join(map(_remove_dir_name, file_names))}") - for file_name in file_names: - _LOG.debug("file_name=%s", file_name) - txt.append("# " + _remove_dir_name(file_name)) - # Read file. - txt_tmp = hio.from_file(file_name) - # This seems unstable on different systems. - # txt.append("num_chars=%s" % len(txt_tmp)) - txt_tmp = txt_tmp.split("\n") - # Filter lines, if needed. - txt.append(f"num_lines={len(txt_tmp)}") - if num_lines is not None: - hdbg.dassert_lte(1, num_lines) - txt_tmp = txt_tmp[:num_lines] - txt.append("'''\n" + "\n".join(txt_tmp) + "\n'''") - else: - hdbg.dassert_is(num_lines, None) - # Concat everything in a single string. - result = "\n".join(txt) - return result - - -# TODO(gp): GSI. Use the copy in helpers/hprint.py -def filter_text(regex: str, txt: str) -> str: - """ - Remove lines in `txt` that match the regex `regex`. - """ - _LOG.debug("Filtering with '%s'", regex) - if regex is None: - return txt - txt_out = [] - txt_as_arr = txt.split("\n") - for line in txt_as_arr: - if re.search(regex, line): - _LOG.debug("Skipping line='%s'", line) - continue - txt_out.append(line) - # We can only remove lines. - hdbg.dassert_lte( - len(txt_out), - len(txt_as_arr), - "txt_out=\n'''%s'''\ntxt=\n'''%s'''", - "\n".join(txt_out), - "\n".join(txt_as_arr), - ) - txt = "\n".join(txt_out) - return txt - - -def diff_strings( - string1: str, - string2: str, - *, - tag: Optional[str] = None, - abort_on_exit: bool = True, - dst_dir: str = ".", -) -> None: - """ - Compare two strings using the diff_files() flow by creating a script to - compare with vimdiff. - - :param dst_dir: where to save the intermediatary files - """ - _LOG.debug(hprint.to_str("tag abort_on_exit dst_dir")) - # Save the actual and expected strings to files. - file_name1 = f"{dst_dir}/tmp.string1.txt" - hio.to_file(file_name1, string1) - # - file_name2 = f"{dst_dir}/tmp.string2.txt" - hio.to_file(file_name2, string2) - # Compare with diff_files. - if tag is None: - tag = "string1 vs string2" - diff_files( - file_name1, - file_name2, - tag=tag, - abort_on_exit=abort_on_exit, - dst_dir=dst_dir, - ) - - -def diff_df_monotonic( - df: "pd.DataFrame", - *, - tag: Optional[str] = None, - abort_on_exit: bool = True, - dst_dir: str = ".", -) -> None: - """ - Check for a dataframe to be monotonic using the vimdiff flow from - diff_files(). - """ - _LOG.debug(hprint.to_str("abort_on_exit dst_dir")) - if not df.index.is_monotonic_increasing: - df2 = df.copy() - df2.sort_index(inplace=True) - diff_strings( - df.to_csv(), - df2.to_csv(), - tag=tag, - abort_on_exit=abort_on_exit, - dst_dir=dst_dir, - ) - - -# ############################################################################# - - -# pylint: disable=protected-access -def get_pd_default_values() -> "pd._config.config.DictWrapper": - """ - Get a deep copy of the current pandas default options. - - :return: a copy of pandas configuration options - """ - import copy - - vals = copy.deepcopy(pd.options) - return vals - - -def set_pd_default_values() -> None: - """ - Set pandas display options to standard default values for testing. - - This ensures consistent output across different test environments. - """ - # 'display': - default_pd_values = { - "chop_threshold": None, - "colheader_justify": "right", - "date_dayfirst": False, - "date_yearfirst": False, - "encoding": "UTF-8", - "expand_frame_repr": True, - "float_format": None, - "html": {"border": 1, "table_schema": False, "use_mathjax": True}, - "large_repr": "truncate", - "latex": { - "escape": True, - "longtable": False, - "multicolumn": True, - "multicolumn_format": "l", - "multirow": False, - "repr": False, - }, - "max_categories": 8, - "max_columns": 20, - "max_colwidth": 50, - "max_info_columns": 100, - "max_info_rows": 1690785, - "max_rows": 60, - "max_seq_items": 100, - "memory_usage": True, - "min_rows": 10, - "multi_sparse": True, - "notebook_repr_html": True, - "pprint_nest_depth": 3, - "precision": 6, - "show_dimensions": "truncate", - "unicode": {"ambiguous_as_wide": False, "east_asian_width": False}, - "width": 80, - } - section = "display" - for key, new_val in default_pd_values.items(): - if isinstance(new_val, dict): - continue - full_key = f"{section}.{key}" - old_val = pd.get_option(full_key) - if old_val != new_val: - _LOG.debug( - "-> Assigning a different value: full_key=%s, " - "old_val=%s, new_val=%s", - full_key, - old_val, - new_val, - ) - pd.set_option(full_key, new_val) - - -# If a golden outcome is missing asserts (instead of updating golden and adding -# it to Git repo, corresponding to "update"). -_ACTION_ON_MISSING_GOLDEN = "assert" - - -# ############################################################################# -# TestCase -# ############################################################################# - - -# TODO(gp): Remove all the calls to `dedent()` and use the `dedent` switch. -class TestCase(unittest.TestCase): - """ - Add some functions to compare actual results to a golden outcome. - """ - - def setUp(self) -> None: - """ - Execute before any test method. - """ - # Set up the base class in case it does something, current - # implementation does nothing, see - # https://docs.python.org/3/library/unittest.html#unittest.TestCase.setUp. - super().setUp() - # Print banner to signal the start of a new test. - func_name = f"{self.__class__.__name__}.{self._testMethodName}" - _LOG.info("\n%s", hprint.frame(func_name)) - # Set the random seed. - random_seed = 20000101 - _LOG.debug("Resetting random.seed to %s", random_seed) - random.seed(random_seed) - if _HAS_NUMPY: - _LOG.debug("Resetting np.random.seed to %s", random_seed) - np.random.seed(random_seed) - # Disable matplotlib plotting by overwriting the `show` function. - if _HAS_MATPLOTLIB: - plt.show = lambda: 0 - # Name of the dir with artifacts for this test. - self._scratch_dir: Optional[str] = None - # The base directory is the one including the class under test. - self._base_dir_name = os.path.dirname(inspect.getfile(self.__class__)) - _LOG.debug("base_dir_name=%s", self._base_dir_name) - # Store whether a test needs to be updated or not. - self._update_tests = get_update_tests() - self._overriden_update_tests = False - # Store whether the golden outcome of this test was updated. - self._test_was_updated = False - # Store whether the output files need to be added to hgit. - self._git_add = True - # Error message printed when comparing actual and expected outcome. - self._error_msg = "" - # Set the default pandas options (see AmpTask1140). - if _HAS_PANDAS: - self._old_pd_options = get_pd_default_values() - set_pd_default_values() - # Reset the timestamp of the current bar. - hwacltim.reset_current_bar_timestamp() - # Start the timer to measure the execution time of the test. - self._timer = htimer.Timer() - - def tearDown(self) -> None: - """ - Execute after each test method completes. - - Handles cleanup, timing, and restoration of default settings. - """ - # Stop the timer to measure the execution time of the test. - self._timer.stop() - pytest_print("(%.2f s) " % self._timer.get_total_elapsed()) - # Report if the test was updated - if self._test_was_updated: - if not self._overriden_update_tests: - pytest_warning("Test was updated) ", prefix="(") - else: - # We forced an update from the unit test itself, so no need - # to report an update. - pass - # Recover the original default pandas options. - if _HAS_PANDAS: - pd.options = self._old_pd_options - # Force matplotlib to close plots to decouple tests. - if _HAS_MATPLOTLIB: - plt.close() - plt.clf() - # Delete the scratch dir, if needed. - if self._scratch_dir and os.path.exists(self._scratch_dir): - if False: - # We want to keep this if the test failed, as an alternative - # to just re-running with --incremental. - result = self._outcome.result - # From https://stackoverflow.com/questions/4414234/getting-pythons-unittest-results-in-a-teardown-method - # https://github.com/pytest-dev/pytest/issues/10631 - # This doesn't work any longer. - # has_error = test_result.failures or test_result.errors - has_error = result._excinfo is not None - else: - # TODO(gp): The problem is that when there is a failure during - # the regressions, having artifacts in the scratch dir causes - # more tests to fail (especially the ones in the cycle detector). - # We need to make tests more robust to this and then we can enable - # the logic to keep files for the failed tests in the scratch dir. - has_error = False - if has_error or get_incremental_tests(): - _LOG.warning("Skipping deleting %s", self._scratch_dir) - else: - _LOG.debug("Deleting %s", self._scratch_dir) - hio.delete_dir(self._scratch_dir) - # Tear down the base class in case it does something, current - # implementation does nothing, see - # https://docs.python.org/3/library/unittest.html#unittest.TestCase.tearDown. - super().tearDown() - - def set_base_dir_name(self, base_dir_name: str) -> None: - """ - Set the base directory for the input, output, and scratch directories. - - This is used to override the standard location of the base - directory which is close to the class under test. - """ - self._base_dir_name = base_dir_name - _LOG.debug("Setting base_dir_name to '%s'", self._base_dir_name) - hio.create_dir(self._base_dir_name, incremental=True) - - def mock_update_tests(self) -> None: - """ - When unit testing the unit test framework we want to test updating the - golden outcome. - """ - self._update_tests = True - self._overriden_update_tests = True - self._git_add = False - - def _get_current_path( - self, - use_only_class_name: bool, - test_class_name: Optional[str], - test_method_name: Optional[str], - use_absolute_path: bool, - ) -> str: - """ - Return the name of the directory containing the input / output data. - - E.g., - ``` - ./core/dataflow/test/outcomes/TestContinuousSarimaxModel.test_compare - ``` - - The parameters have the same meaning as in `get_input_dir()`. - """ - if test_class_name is None: - test_class_name = self.__class__.__name__ - if use_only_class_name: - # Use only class name. - dir_name = test_class_name - else: - # Use both class and test method. - if test_method_name is None: - test_method_name = self._testMethodName - dir_name = f"{test_class_name}.{test_method_name}" - if use_absolute_path: - # E.g., `.../dataflow/test/outcomes/TestContinuousSarimaxModel.test_compare`. - dir_name = os.path.join(self._base_dir_name, "outcomes", dir_name) - else: - # E.g., `outcomes/TestContinuousSarimaxModel.test_compare`. - dir_name = os.path.join("outcomes", dir_name) - return dir_name - - def get_input_dir( - self, - *, - use_only_test_class: bool = False, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - use_absolute_path: bool = True, - ) -> str: - """ - Return the path of the directory storing input data for this test - class. - - E.g., `TestLinearRegression1.test1`. - - :param use_only_test_class: use only the name on the test class and not of - the method. E.g., when one wants all the test methods to use a single - file for testing - :param test_class_name: `None` uses the current test class name - :param test_method_name: `None` uses the current test method name - :param use_absolute_path: use the path from the file containing the test - :return: dir name - """ - # Get the dir of the test. - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - # Add `input` to the dir. - dir_name = os.path.join(dir_name, "input") - return dir_name - - def get_output_dir( - self, - *, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - ) -> str: - """ - Return the path of the directory storing output data for this test - class. - - :param test_class_name: override the current test class name - :param test_method_name: override the current test method name - :return: dir name - """ - # The output dir is specific of this dir. - use_only_test_class = False - use_absolute_path = True - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - # Add `output` to the dir. - dir_name = os.path.join(dir_name, "output") - return dir_name - - # TODO(gp): -> get_scratch_dir(). - def get_scratch_space( - self, - *, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - use_absolute_path: bool = True, - ) -> str: - """ - Return the path of the directory storing scratch data for this test. - - The directory is also created and cleaned up based on whether - the incremental behavior is enabled or not. - """ - if self._scratch_dir is None: - # Create the dir on the first invocation on a given test. - use_only_test_class = False - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - # Add `tmp.scratch` to the dir. - dir_name = os.path.join(dir_name, "tmp.scratch") - # On the first invocation create the dir. - incremental = get_incremental_tests() - hio.create_dir(dir_name, incremental=incremental) - # Store the value. - self._scratch_dir = dir_name - return self._scratch_dir - - def get_s3_scratch_dir( - self, - *, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - ) -> str: - """ - Return the path of a directory storing scratch data on S3 for this - test. - - E.g., - s3://alphamatic-data/tmp/cache.unit_test/ - root.98e1cf5b88c3.amp.TestTestCase1.test_get_s3_scratch_dir1 - """ - # Make the path unique for the test. - use_only_test_class = False - use_absolute_path = False - test_path = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - # Make the path unique for the current user. - user_name = hsystem.get_user_name() - server_name = hsystem.get_server_name() - project_dirname = hgit.get_project_dirname() - dir_name = f"{user_name}.{server_name}.{project_dirname}" - # Assemble everything in a single path. - import helpers.hs3 as hs3 - - aws_profile = "ck" - s3_bucket = hs3.get_s3_bucket_path_unit_test(aws_profile) - scratch_dir = f"{s3_bucket}/tmp/cache.unit_test/{dir_name}.{test_path}" - return scratch_dir - - def get_s3_input_dir( - self, - *, - use_only_test_class: bool = False, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - use_absolute_path: bool = False, - ) -> str: - """ - Return the S3 path for storing input data for this test. - - :param use_only_test_class: use only the test class name, not method - :param test_class_name: override the current test class name - :param test_method_name: override the current test method name - :param use_absolute_path: use the path from the file containing the test - :return: S3 path for test input data - """ - s3_bucket = hrecouti.get_repo_config().get_unit_test_bucket_path() - hdbg.dassert_isinstance(s3_bucket, str) - # Make the path unique for the test. - test_path = self.get_input_dir( - use_only_test_class=use_only_test_class, - test_class_name=test_class_name, - test_method_name=test_method_name, - use_absolute_path=use_absolute_path, - ) - hdbg.dassert_isinstance(test_path, str) - # Assemble everything in a single path. - input_dir = os.path.join(s3_bucket, test_path) - return input_dir - - def _get_test_name(self) -> str: - """ - Return the full test name as `class.method`. - """ - return f"{self.__class__.__name__}.{self._testMethodName}" - - # /////////////////////////////////////////////////////////////////////// - - def assert_equal( - self, - actual: str, - expected: str, - *, - remove_lead_trail_empty_lines: bool = False, - dedent: bool = False, - purify_text: bool = False, - purify_expected_text: bool = False, - fuzzy_match: bool = False, - ignore_line_breaks: bool = False, - split_max_len: Optional[int] = None, - sort: bool = False, - abort_on_error: bool = True, - dst_dir: str = ".", - ) -> bool: - """ - Return if `actual` and `expected` are different and report the - difference. - - Implement a better version of `self.assertEqual()` that reports - mismatching strings with sdiff and save them to files for - further analysis with vimdiff. - - The interface is similar to `check_string()`. - """ - _LOG.debug(hprint.to_str("fuzzy_match abort_on_error dst_dir")) - hdbg.dassert_in(type(actual), (bytes, str), "actual=%s", str(actual)) - hdbg.dassert_in( - type(expected), (bytes, str), "expected=%s", str(expected) - ) - # Get the current dir name. - use_only_test_class = False - test_class_name = None - test_method_name = None - use_absolute_path = True - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - _LOG.debug("dir_name=%s", dir_name) - hio.create_dir(dir_name, incremental=True) - hdbg.dassert_path_exists(dir_name) - # - test_name = self._get_test_name() - is_equal = assert_equal( - actual, - expected, - test_name, - dir_name, - check_string=False, - remove_lead_trail_empty_lines=remove_lead_trail_empty_lines, - dedent=dedent, - purify_text=purify_text, - purify_expected_text=purify_expected_text, - fuzzy_match=fuzzy_match, - ignore_line_breaks=ignore_line_breaks, - split_max_len=split_max_len, - sort=sort, - abort_on_error=abort_on_error, - dst_dir=dst_dir, - ) - return is_equal - - def assert_dfs_close( - self, - actual: "pd.DataFrame", - expected: "pd.DataFrame", - **kwargs: Any, - ) -> None: - """ - Assert dfs have same indexes and columns and that all values are close. - - This is a more robust alternative to `compare_df()`. In - particular, it is less sensitive to floating point round-off - errors. - """ - self.assertEqual(actual.index.to_list(), expected.index.to_list()) - self.assertEqual(actual.columns.to_list(), expected.columns.to_list()) - # Often the output of a failing assertion is difficult to parse - # so we resort to our special `assert_equal()`. - if not np.allclose(actual, expected, **kwargs): - import helpers.hpandas as hpandas - - self.assert_equal( - hpandas.df_to_str(actual), hpandas.df_to_str(expected) - ) - np.testing.assert_allclose(actual, expected, **kwargs) - - # /////////////////////////////////////////////////////////////////////// - - # TODO(gp): This needs to be moved to `helper.git` and generalized. - def _git_add_file(self, file_name: str) -> None: - """ - Add to git repo `file_name`, if needed. - """ - _LOG.debug(hprint.to_str("file_name")) - if self._git_add: - # Find the file relative to here. - mode = "assert_unless_one_result" - # The problem is that when we run from an included repo, we look - # for files like: - # ``` - # helpers_root/helpers/test/outcomes/TestCheckString1.test_check_string_missing3/output/test.txt - # ``` - # but in our directory we find files like: - # ``` - # helpers/test/outcomes/TestCheckString1.test_check_string_missing3/output/test.txt - # ``` - # so we need to make the file relative to the innermost repo. - git_root = hgit.get_client_root(super_module=False) - rel_file_name = os.path.relpath(file_name, git_root) - _LOG.debug(hprint.to_str("rel_file_name")) - file_names_tmp = hgit.find_docker_file(rel_file_name, mode=mode) - hdbg.dassert_eq(len(file_names_tmp), 1) - file_name_tmp = file_names_tmp[0] - _LOG.debug(hprint.to_str("file_name_tmp")) - cmd = f"cd amp; git add -u {file_name_tmp}" - rc = hsystem.system(cmd, abort_on_error=False) - if rc: - pytest_warning( - f"Can't git add file\n'{file_name}' -> '{file_name_tmp}'\n" - "You need to git add the file manually\n", - prefix="\n", - ) - pytest_print(f"> {cmd}\n") - - def _check_string_update_outcome( - self, file_name: str, actual: str, use_gzip: bool - ) -> None: - """ - Update the golden outcome file with actual test output. - - :param file_name: path to the golden outcome file - :param actual: the actual test output to save - :param use_gzip: whether to compress the file with gzip - """ - _LOG.debug(hprint.to_str("file_name")) - hio.to_file(file_name, actual, use_gzip=use_gzip) - # Add to git repo. - self._git_add_file(file_name) - - # /////////////////////////////////////////////////////////////////////// - - def _get_golden_outcome_file_name( - self, - tag: str, - *, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - ) -> Tuple[str, str]: - """ - Get the directory and file name for the golden outcome file. - - :param tag: identifier tag for the golden outcome file - :param test_class_name: override the current test class name - :param test_method_name: override the current test method name - :return: tuple of (directory_path, file_path) - """ - # Get the current dir name. - use_only_test_class = False - use_absolute_path = True - dir_name = self._get_current_path( - use_only_test_class, - test_class_name, - test_method_name, - use_absolute_path, - ) - _LOG.debug("dir_name=%s", dir_name) - hio.create_dir(dir_name, incremental=True) - hdbg.dassert_path_exists(dir_name) - # Get the expected outcome. - file_name = ( - self.get_output_dir( - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - + f"/{tag}.txt" - ) - return dir_name, file_name - - # TODO(gp): There is a lot of similarity between `check_string()` and - # `check_df_string()` that can be factored out if we extract the code that - # reads and saves the golden file. - def check_string( - self, - actual: str, - *, - remove_lead_trail_empty_lines: bool = False, - dedent: bool = False, - purify_text: bool = False, - fuzzy_match: bool = False, - ignore_line_breaks: bool = False, - split_max_len: Optional[int] = None, - sort: bool = False, - use_gzip: bool = False, - tag: str = "test", - abort_on_error: bool = True, - action_on_missing_golden: str = _ACTION_ON_MISSING_GOLDEN, - test_class_name: Optional[str] = None, - test_method_name: Optional[str] = None, - ) -> Tuple[bool, bool, Optional[bool]]: - """ - Check the actual outcome of a test against the expected outcome - contained in the file. If `--update_outcomes` is used, updates the - golden reference file with the actual outcome. - - :param actual: actual outcome of the test - :param remove_lead_trail_empty_lines: remove leading and trailing empty - :param dedent: call `dedent` on the expected string to align it to the - beginning of the row - :param purify_text: remove some artifacts (e.g., usernames, - directories, reference to Git client) - :param fuzzy_match: ignore differences in spaces - :param ignore_line_breaks: ignore difference due to line breaks - :param split_max_len: split the string into lines of at most this length - :param sort: sort the text and then compare it. In other terms we check - whether the lines are the same although in different order - :param use_gzip: use gzip to compress/decompress the golden outcome - :param tag: tag to identify the golden outcome file - :param abort_on_error: whether to raise an exception if the outcome is - different from the golden outcome - :param action_on_missing_golden: what to do (e.g., "assert" or "update" - when the golden outcome is missing) - :param test_class_name: override the current test class name - :param test_method_name: override the current test method name - :return: outcome_updated, file_exists, is_equal - :raises: `RuntimeError` if there is a mismatch. If `abort_on_error` is False - (which should be used only for unit testing) return the result but do not - assert - """ - _LOG.debug( - hprint.to_str( - "remove_lead_trail_empty_lines dedent purify_text fuzzy_match " - "ignore_line_breaks split_max_len sort use_gzip tag " - "abort_on_error action_on_missing_golden test_class_name " - "test_method_name" - ) - ) - hdbg.dassert_in(type(actual), (bytes, str), "actual='%s'", actual) - # - dir_name, file_name = self._get_golden_outcome_file_name( - tag, - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - if use_gzip: - file_name += ".gz" - _LOG.debug("file_name=%s", file_name) - # Remove reference from the current environment. - # TODO(gp): Not sure why we purify here and not delegate to `assert_equal`. - if purify_text: - _LOG.debug("Purifying actual outcome") - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - _LOG.debug("actual=\n%s", actual) - outcome_updated = False - file_exists = os.path.exists(file_name) - _LOG.debug("file_exists=%s", file_exists) - is_equal: Optional[bool] = None - if self._update_tests: - _LOG.debug("# Update golden outcomes") - # Determine whether outcome needs to be updated. - if file_exists: - expected = hio.from_file(file_name) - is_equal = expected == actual - if not is_equal: - outcome_updated = True - else: - # The golden outcome doesn't exist. - outcome_updated = True - _LOG.debug("outcome_updated=%s", outcome_updated) - if outcome_updated: - # Update the golden outcome. - self._check_string_update_outcome(file_name, actual, use_gzip) - else: - # Check the test result. - _LOG.debug("# Check golden outcomes") - if file_exists: - # Golden outcome is available: check the actual outcome against - # the golden outcome. - expected = hio.from_file(file_name) - test_name = self._get_test_name() - is_equal = assert_equal( - actual, - expected, - test_name, - dir_name, - check_string=True, - remove_lead_trail_empty_lines=remove_lead_trail_empty_lines, - dedent=dedent, - # We have handled the purification of the output earlier. - purify_text=False, - fuzzy_match=fuzzy_match, - ignore_line_breaks=ignore_line_breaks, - split_max_len=split_max_len, - sort=sort, - abort_on_error=abort_on_error, - ) - else: - # No golden outcome available. - _LOG.warning("Can't find golden outcome file '%s'", file_name) - if action_on_missing_golden == "assert": - # Save the result to a temporary file and assert. - file_name += ".tmp" - hio.to_file(file_name, actual, use_gzip=use_gzip) - msg = ( - "The golden outcome doesn't exist: saved the actual " - f"output in '{file_name}'" - ) - _LOG.error(msg) - if abort_on_error: - hdbg.dfatal(msg) - elif action_on_missing_golden == "update": - # Create golden file and add it to the repo. - _LOG.warning("Creating the golden outcome") - outcome_updated = True - self._check_string_update_outcome( - file_name, actual, use_gzip - ) - is_equal = None - else: - hdbg.dfatal( - "Invalid action_on_missing_golden=" - + f"'{action_on_missing_golden}'" - ) - self._test_was_updated = outcome_updated - _LOG.debug(hprint.to_str("outcome_updated file_exists is_equal")) - return outcome_updated, file_exists, is_equal - - # /////////////////////////////////////////////////////////////////////// - - def _check_df_update_outcome( - self, - file_name: str, - actual: "pd.DataFrame", - ) -> None: - """ - Update the golden outcome file with actual dataframe output. - - :param file_name: path to the golden outcome file - :param actual: the actual dataframe to save - """ - _LOG.debug(hprint.to_str("file_name")) - hio.create_enclosing_dir(file_name) - actual.to_csv(file_name) - pytest_warning(f"Update golden outcome file '{file_name}'", prefix="\n") - # Add to git repo. - self._git_add_file(file_name) - - def _to_error(self, msg: str) -> None: - """ - Append error message to the accumulated error log. - - :param msg: error message to log and accumulate - """ - self._error_msg += msg + "\n" - _LOG.error(msg) - - def _check_df_compare_outcome( - self, file_name: str, actual: "pd.DataFrame", err_threshold: float - ) -> Tuple[bool, "pd.DataFrame"]: - """ - Compare actual dataframe with golden outcome from file. - - :param file_name: path to the golden outcome file - :param actual: the actual dataframe to compare - :param err_threshold: relative error threshold for numerical comparison - :return: tuple of (is_equal, expected_dataframe) - """ - _LOG.debug(hprint.to_str("file_name")) - _LOG.debug("actual_=\n%s", actual) - hdbg.dassert_lte(0, err_threshold) - hdbg.dassert_lte(err_threshold, 1.0) - # Load the expected df from file. - expected = pd.read_csv(file_name, index_col=0) - _LOG.debug("expected=\n%s", expected) - hdbg.dassert_isinstance(expected, pd.DataFrame) - ret = True - # Compare columns. - if actual.columns.tolist() != expected.columns.tolist(): - msg = f"Columns are different:\n{str(actual.columns)}\n{str(expected.columns)}" - self._to_error(msg) - ret = False - # Compare the values. - _LOG.debug("actual.shape=%s", str(actual.shape)) - _LOG.debug("expected.shape=%s", str(expected.shape)) - # From https://numpy.org/doc/stable/reference/generated/numpy.allclose.html - # absolute(a - b) <= (atol + rtol * absolute(b)) - # absolute(a - b) / absolute(b)) <= rtol - is_close = np.allclose( - actual, expected, rtol=err_threshold, equal_nan=True - ) - if not is_close: - _LOG.error("Dataframe values are not close") - if actual.shape == expected.shape: - close_mask = np.isclose(actual, expected, equal_nan=True) - # - msg = f"actual=\n{actual}" - self._to_error(msg) - # - msg = f"expected=\n{expected}" - self._to_error(msg) - # - actual_masked = np.where(close_mask, np.nan, actual) - msg = f"actual_masked=\n{actual_masked}" - self._to_error(msg) - # - expected_masked = np.where(close_mask, np.nan, expected) - msg = f"expected_masked=\n{expected_masked}" - self._to_error(msg) - # - err = np.abs((actual_masked - expected_masked) / expected_masked) - msg = f"err=\n{err}" - self._to_error(msg) - max_err = np.nanmax(np.nanmax(err)) - msg = "max_err=%.3f" % max_err - self._to_error(msg) - else: - msg = ( - "Shapes are different:\n" - f"actual.shape={str(actual.shape)}\nexpected.shape={str(expected.shape)}" - ) - self._to_error(msg) - ret = False - _LOG.debug("ret=%s", ret) - return ret, expected - - def check_dataframe( - self, - actual: "pd.DataFrame", - *, - err_threshold: float = 0.05, - dedent: bool = False, - tag: str = "test_df", - abort_on_error: bool = True, - action_on_missing_golden: str = _ACTION_ON_MISSING_GOLDEN, - ) -> Tuple[bool, bool, Optional[bool]]: - """ - Like `check_string()` but for pandas dataframes, instead of strings. - """ - _LOG.debug(hprint.to_str("err_threshold tag abort_on_error")) - hdbg.dassert_isinstance(actual, pd.DataFrame) - # - dir_name, file_name = self._get_golden_outcome_file_name(tag) - _LOG.debug("file_name=%s", file_name) - outcome_updated = False - file_exists = os.path.exists(file_name) - _LOG.debug(hprint.to_str("file_exists")) - is_equal: Optional[bool] = None - if self._update_tests: - _LOG.debug("# Update golden outcomes") - # Determine whether outcome needs to be updated. - if file_exists: - is_equal, _ = self._check_df_compare_outcome( - file_name, actual, err_threshold - ) - _LOG.debug(hprint.to_str("is_equal")) - if not is_equal: - outcome_updated = True - else: - # The golden outcome doesn't exist. - outcome_updated = True - _LOG.debug("outcome_updated=%s", outcome_updated) - if outcome_updated: - # Update the golden outcome. - self._check_df_update_outcome(file_name, actual) - else: - # Check the test result. - _LOG.debug("# Check golden outcomes") - if file_exists: - # Golden outcome is available: check the actual outcome against - # the golden outcome. - is_equal, expected = self._check_df_compare_outcome( - file_name, actual, err_threshold - ) - # If not equal, report debug information. - if not is_equal: - test_name = self._get_test_name() - assert_equal( - str(actual), - str(expected), - test_name, - dir_name, - check_string=True, - remove_lead_trail_empty_lines=False, - dedent=dedent, - purify_text=False, - fuzzy_match=False, - ignore_line_breaks=False, - split_max_len=None, - sort=False, - abort_on_error=abort_on_error, - error_msg=self._error_msg, - ) - else: - # No golden outcome available. - _LOG.warning("Can't find golden outcome file '%s'", file_name) - if action_on_missing_golden == "assert": - # Save the result to a temporary file and assert. - file_name += ".tmp" - hio.create_enclosing_dir(file_name) - actual.to_csv(file_name) - msg = ( - "The golden outcome doesn't exist: saved the actual " - f"output in '{file_name}'" - ) - _LOG.error(msg) - if abort_on_error: - hdbg.dfatal(msg) - elif action_on_missing_golden == "update": - # Create golden file and add it to the repo. - _LOG.warning("Creating the golden outcome") - outcome_updated = True - self._check_df_update_outcome(file_name, actual) - is_equal = None - else: - hdbg.dfatal( - "Invalid action_on_missing_golden=" - + f"'{action_on_missing_golden}'" - ) - self._test_was_updated = outcome_updated - # TODO(gp): Print the file with the updated test. - _LOG.debug(hprint.to_str("outcome_updated file_exists is_equal")) - return outcome_updated, file_exists, is_equal - - def check_df_output( - self, - actual_df: "pd.DataFrame", - expected_length: Optional[int], - expected_column_names: Optional[List[str]], - expected_column_unique_values: Optional[Dict[str, List[Any]]], - expected_signature: str, - ) -> None: - """ - Verify that actual outcome dataframe matches the expected one. - - :param actual_df: actual outcome dataframe - :param expected_length: expected outcome dataframe length - - If `None`, skip the check - :param expected_column_names: expected outcome dataframe column names - - If `None`, skip the check - :param expected_column_unique_values: dict of column names and unique values - that they should contain - - If `None`, skip the check - :param expected_signature: expected outcome dataframe as string - - If `__CHECK_STRING__` use the value in `self.check_string()` - """ - # TODO(Grisha): get rid of `hpandas` dependency. - import helpers.hpandas as hpandas - - hdbg.dassert_isinstance(actual_df, pd.DataFrame) - if expected_length: - # Verify that the output length is correct. - actual_length = actual_df.shape[0] - self.assert_equal(str(actual_length), str(expected_length)) - if expected_column_names: - # Verify that the column names are correct. - self.assert_equal( - str(sorted(actual_df.columns)), - str(sorted(expected_column_names)), - ) - if expected_column_unique_values: - hdbg.dassert_is_subset( - list(expected_column_unique_values.keys()), actual_df.columns - ) - # Verify that the unique values in specified columns are correct. - for column in expected_column_unique_values: - actual_one_column_unique_values = sorted( - list(actual_df[column].unique()) - ) - self.assert_equal( - str(actual_one_column_unique_values), - str(sorted(expected_column_unique_values[column])), - ) - # Build signature. - actual_signature = hpandas.df_to_str( - actual_df, - print_shape_info=True, - tag="df", - ) - _LOG.debug("\n%s", actual_signature) - # Check signature. - if expected_signature == "__CHECK_STRING__": - self.check_string(actual_signature, dedent=True, fuzzy_match=True) - else: - hdbg.dassert_isinstance(expected_signature, str) - self.assert_equal( - actual_signature, - expected_signature, - dedent=True, - fuzzy_match=True, - ) - - def check_srs_output( - self, - actual_srs: "pd.Series", - expected_length: Optional[int], - expected_unique_values: Optional[List[Any]], - expected_signature: str, - ) -> None: - """ - Verify that actual outcome series matches the expected one. - - :param actual_srs: actual outcome series - :param expected_length: expected outcome series length - - If `None`, skip the check - :param expected_unique_values: list of expected unique values in series - - If `None`, skip the check - :param expected_signature: expected outcome series as string - """ - # Import `hpandas` dynamically to exclude `pandas` from the thin client - # requirements. See CmTask6613 for details. - import helpers.hpandas as hpandas - - hdbg.dassert_isinstance(actual_srs, pd.Series) - if expected_length: - # Verify that output length is correct. - self.assert_equal(str(actual_srs.shape[0]), str(expected_length)) - if expected_unique_values: - # Verify that unique values in series are correct. - self.assert_equal( - str(sorted(list(actual_srs.unique()))), - str(sorted(expected_unique_values)), - ) - # Build signature. - actual_signature = hpandas.df_to_str(actual_srs, num_rows=None) - _LOG.debug("\n%s", actual_signature) - # Check signature. - if expected_signature == "__CHECK_STRING__": - self.check_string(actual_signature, dedent=True, fuzzy_match=True) - else: - hdbg.dassert_isinstance(expected_signature, str) - self.assert_equal( - actual_signature, - expected_signature, - dedent=True, - fuzzy_match=True, - ) - - -# ############################################################################# -# QaTestCase -# ############################################################################# - - -@pytest.mark.qa -@pytest.mark.skipif( - hserver.is_inside_docker(), reason="Test needs to be run outside Docker" -) -class QaTestCase(TestCase, abc.ABC): - """ - Use for QA to test functionalities (e.g., invoke tasks) that run the dev / - prod container. - """ - - # TODO(Grisha): Linter should not remove `pass` statement from an empty class - # DevToolsTask #476. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py deleted file mode 100644 index cf429b5ac..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_purification.py +++ /dev/null @@ -1,450 +0,0 @@ -""" -Import as: - -import helpers.hunit_test_purification as huntepur -""" - -import datetime -import logging -import os -import re -from typing import List, Tuple - -import helpers.hgit as hgit -import helpers.hintrospection as hintros -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - -# Mute this module unless we want to debug it. -_LOG.setLevel(logging.INFO) - - -# ############################################################################# -# TextPurifier -# ############################################################################# - - -# TODO(gp): Not sure the class is really needed since now it's in a separate -# file. -class TextPurifier: - """ - A class to purify text by removing environment-specific information and - standardizing output for test comparisons. - """ - - def purify_txt_from_client(self, txt: str) -> str: - """ - Apply all purification steps to the input text. - - :param txt: input text to purify - :return: purified text - """ - # The order of substitutions is important. We want to start from the "most - # specific" (e.g., `amp/helpers/test/...`) to the "least specific" (e.g., - # `amp`). - txt = self.purify_directory_paths(txt) - txt = self.purify_from_environment(txt) - # Correct order: -> `app` -> `amp` -> - # Start with `app.amp.helpers_root.helpers...` - # After purifying app references -> `amp.helpers_root.helpers...` - # After purifying amp references -> `helpers_root.helpers...` - # - # Incorrect order: -> `amp` -> `app` -> - # Start with `amp.helpers_root.helpers...` - # After purifying `amp` references -> `app.amp.helpers_root.helpers...` - # After purifying `app` references -> `amp.helpers_root.helpers...` - # - txt = self.purify_app_references(txt) - txt = self.purify_amp_references(txt) - txt = self.purify_from_env_vars(txt) - txt = self.purify_object_representation(txt) - txt = self.purify_today_date(txt) - txt = self.purify_white_spaces(txt) - txt = self.purify_parquet_file_names(txt) - txt = self.purify_helpers(txt) - txt = self.purify_docker_image_name(txt) - return txt - - def purify_directory_paths(self, txt: str) -> str: - """ - Replace known directory paths with standardized placeholders. - - Apply replacements in this order: - 1. Replace Git root paths with `$GIT_ROOT`. - 2. Replace `CSFY_HOST_GIT_ROOT_PATH` with `$CSFY_HOST_GIT_ROOT_PATH`. - 3. Replace current working directory with `$PWD`. - - :param txt: input text that needs to be purified - :return: purified text - """ - _LOG.debug("Before: txt='\n%s'", txt) - # Collect all paths to replace with their priorities. - replacements = [] - # 1. Git root paths. - # Remove references to Git modules starting from the innermost one. - for super_module in [False, True]: - # Replace the git root path with `$GIT_ROOT`. - git_root = hgit.get_client_root(super_module=super_module) - if git_root and git_root != "/": - replacements.append((git_root, "$GIT_ROOT")) - _LOG.debug("Added git root '%s' for replacement", git_root) - else: - # Skip git root path if it is `/`. - pass - # 2. CSFY_HOST_GIT_ROOT_PATH environment variable. - # Replace the CSFY_HOST_GIT_ROOT_PATH with `$CSFY_HOST_GIT_ROOT_PATH`. - csfy_git_root = os.environ.get("CSFY_HOST_GIT_ROOT_PATH") - if csfy_git_root: - replacements.append((csfy_git_root, "$CSFY_HOST_GIT_ROOT_PATH")) - _LOG.debug( - "Added CSFY_HOST_GIT_ROOT_PATH '%s' for replacement", - csfy_git_root, - ) - # 3. Current working directory. - # Replace the path of current working directory with `$PWD`. - pwd = os.getcwd() - if pwd and pwd != "/": - replacements.append((pwd, "$PWD")) - _LOG.debug("Added PWD '%s' for replacement", pwd) - # Apply replacements in order of priority. - for path, replacement in replacements: - # Use word boundaries to avoid replacing path fragments. - # E.g., To avoid replacing `app` in `application.py`. - pattern = rf"(? str: - """ - Replace environment-specific values with placeholders. - - Perform these transformations: - 1. Replace directory paths with standardized placeholders. - 2. Replace the current user name with $USER_NAME. - 3. Handle special cases like usernames in paths and commands. - - :param txt: input text that needs to be purified - :return: purified text - """ - # Replace current username with `$USER_NAME`. - user_name = hsystem.get_user_name() - # Set a regex pattern that finds a user name surrounded by dot, dash or space. - # E.g., `IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0`, - # `--name $USER_NAME.amp_test.app.app`, `run --rm -l user=$USER_NAME`. - regex = rf"([\s\n\-\.\=]|^)+{user_name}+([.\s/-]|$)" - # Use `\1` and `\2` to preserve specific characters around `$USER_NAME`. - target = r"\1$USER_NAME\2" - txt = re.sub(regex, target, txt) - _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) - return txt - - def _apply_regex_replacements( - self, txt: str, regex_patterns: List[Tuple[str, str]] - ) -> str: - """ - Apply a series of regex replacements to text. - - :param txt: input text to process - :param regex_patterns: list of (pattern, replacement) tuples to - apply in order - :return: text with all regex replacements applied - """ - # Apply regex replacements in order. - txt_out = txt - for regex_pattern, replacement in regex_patterns: - txt_out = re.sub(regex_pattern, replacement, txt_out) - _LOG.debug( - "Applying %s -> %s: before=%s, after=%s", - regex_pattern, - replacement, - txt, - txt_out, - ) - return txt_out - - def purify_amp_references(self, txt: str) -> str: - """ - Remove references to amp from text by applying a series of regex - substitutions. - - Handle these patterns: - 1. Replace path references - - E.g., "amp/helpers/test/..." -> "helpers/test/..." - 2. Replace class references - - E.g., "" -> "" - 3. Replace comment references - - E.g., "# Test created for amp.helpers.test" -> "# Test created for helpers.test" - 4. Replace module references - - E.g., "amp.helpers.test.TestClass" -> "helpers.test.TestClass" - - :param txt: input text containing amp references - :return: text with amp references removed - """ - amp_patterns = [ - # Remove 'amp/' prefix from quoted paths. - (r"'amp/", "'"), - # Remove 'amp/' prefix from path segments. - (r"(?m)(^\s*|\s+)amp/", r"\1"), - # Replace '/amp/' with '/' and '/amp:' with ':' in paths. - (r"(?m)/amp/", "/"), - (r"(?m)/amp:", ":"), - # Remove 'amp.' prefix from class representations and tracebacks. - (r" str: - """ - Remove references to `/app` from text by applying a series of regex - substitutions. - - :param txt: input text containing app references - :return: text with app references removed - """ - app_patterns = [ - # Remove trailing '/app/' references. - (r"(? str: - """ - Replace environment variable values with their variable names. - - :param txt: input text containing environment variable values - :return: text with environment variable values replaced - """ - for env_var in [ - "CSFY_AWS_S3_BUCKET", - "CSFY_ECR_BASE_PATH", - ]: - if env_var in os.environ: - val = os.environ[env_var] - if val == "": - _LOG.debug("Env var '%s' is empty", env_var) - else: - txt = txt.replace(val, f"${env_var}") - _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) - return txt - - def purify_object_representation(self, txt: str) -> str: - """ - Remove references like `at 0x7f43493442e0`. - - :param txt: input text containing object representations - :return: text with object representations standardized - """ - object_patterns = [ - (r"at 0x[0-9A-Fa-f]+", "at 0x"), - (r" id='\d+'>", " id='xxx'>"), - (r"port=\d+", "port=xxx"), - (r"host=\S+ ", "host=xxx "), - ( - r"wall_clock_time=Timestamp\('.*?',", - r"wall_clock_time=Timestamp('xxx',", - ), - ] - txt = self._apply_regex_replacements(txt, object_patterns) - _LOG.debug("After %s: txt='\n%s'", hintros.get_function_name(), txt) - return txt - - def purify_today_date(self, txt: str) -> str: - """ - Remove today's date like `20220810`. - - :param txt: input text containing dates - :return: text with dates standardized - """ - today_date = datetime.date.today() - today_date_as_str = today_date.strftime("%Y%m%d") - # Replace predict.3.compress_tails.df_out.20220627_094500.YYYYMMDD_171106.csv.gz. - txt = re.sub( - today_date_as_str + r"_\d{6}", - "YYYYMMDD_HHMMSS", - txt, - flags=re.MULTILINE, - ) - txt = re.sub(today_date_as_str, "YYYYMMDD", txt, flags=re.MULTILINE) - return txt - - def purify_white_spaces(self, txt: str) -> str: - """ - Remove trailing white spaces. - - :param txt: input text with whitespace - :return: text with standardized whitespace - """ - txt_new = [] - for line in txt.split("\n"): - line = line.rstrip() - txt_new.append(line) - txt = "\n".join(txt_new) - return txt - - def purify_line_number(self, txt: str) -> str: - """ - Replace line number with `$LINE_NUMBER`. - - :param txt: input text containing line numbers - :return: text with line numbers standardized - """ - txt = re.sub(r"\.py::\d+", ".py::$LINE_NUMBER", txt, flags=re.MULTILINE) - return txt - - def purify_parquet_file_names(self, txt: str) -> str: - """ - Replace UUIDs file names to `data.parquet` in the golden outcomes. - - :param txt: input text containing parquet file names - :return: text with standardized parquet file names - """ - pattern = r""" - [0-9a-f]{32}-[0-9].* # GUID pattern. - (?=\.parquet) # positive lookahead assertion that matches a - # position followed by ".parquet" without - # consuming it. - """ - # TODO(Vlad): Need to change the replacement to `$FILE_NAME` as in the - # `purify_from_environment()` function. For now, some tests are expecting - # `data.parquet` files. - replacement = "data" - # flags=re.VERBOSE allows us to use whitespace and comments in the pattern. - txt = re.sub(pattern, replacement, txt, flags=re.VERBOSE) - return txt - - def purify_helpers(self, txt: str) -> str: - """ - Replace the path `helpers_root.helpers` with `helpers`. - - :param txt: input text containing helper references - :return: text with standardized helper references - """ - txt = re.sub( - r"helpers_root\.helpers\.", "helpers.", txt, flags=re.MULTILINE - ) - txt = re.sub( - r"helpers_root/helpers/", "helpers/", txt, flags=re.MULTILINE - ) - txt = re.sub( - r"helpers_root\.config_root", "config_root", txt, flags=re.MULTILINE - ) - txt = re.sub( - r"helpers_root/config_root/", "config_root/", txt, flags=re.MULTILINE - ) - txt = re.sub( - r"helpers_root/dev_scripts_helpers/", - "dev_scripts_helpers/", - txt, - flags=re.MULTILINE, - ) - return txt - - def purify_docker_image_name(self, txt: str) -> str: - """ - Remove temporary docker image name. - - :param txt: input text containing docker image names - :return: text with standardized docker image names - """ - # Purify command like: - # > docker run --rm ... tmp.latex.edb567be .. - # > ... tmp.latex.aarch64.2f590c86.2f590c86 - pattern = r""" - ^ # Start of line - ( # Start capture group 1 - .*docker.* # Any text containing "docker" - \s+ # One or more whitespace - tmp\.\S+\. # tmp.something. - ) # End capture group 1 - [a-z0-9]{8} # 8 character hex hash - ( # Start capture group 2 - \s+ # One or more whitespace - .* # Rest of the line - ) # End capture group 2 - $ # End of line - """ - txt = re.sub( - pattern, - r"\1xxxxxxxx\2", - txt, - flags=re.MULTILINE | re.VERBOSE, - ) - # Handle patterns like `tmp.latex.aarch64.2f590c86.2f590c86`. - pattern = r""" - ^ # Start of line - ( # Start capture group 1 - .*docker.* # Any text containing "docker" - \s+ # One or more whitespace - tmp\.\S+\.\S+\. # tmp.something.something. - ) # End capture group 1 - [a-z0-9]{8} # 8 character hex hash - \. # Literal dot - [a-z0-9]{8} # Another 8 character hex hash - ( # Start capture group 2 - \s+ # One or more whitespace - .* # Rest of the line - ) # End capture group 2 - $ # End of line - """ - txt = re.sub( - pattern, - r"\1xxxxxxxx\2", - txt, - flags=re.MULTILINE | re.VERBOSE, - ) - return txt - - def purify_file_names(self, file_names: List[str]) -> List[str]: - """ - Express file names in terms of the root of git repo, removing reference - to `amp`. - """ - git_root = hgit.get_client_root(super_module=True) - file_names = [os.path.relpath(f, git_root) for f in file_names] - # Apply amp reference purification to file paths. - file_names = list(map(self.purify_amp_references, file_names)) - return file_names - - -def purify_text(txt: str) -> str: - """ - Purify text by removing environment-specific information and standardizing - output for test comparisons. - """ - purifier = TextPurifier() - return purifier.purify_txt_from_client(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py deleted file mode 100644 index 5d00c50ad..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hunit_test_utils.py +++ /dev/null @@ -1,658 +0,0 @@ -""" -Import as: - -import helpers.hunit_test_utils as hunteuti -""" - -import abc -import contextlib -import glob -import logging -import os -import re -from typing import Any, Dict, Generator, List, Optional, Tuple -import unittest.mock as mock - -import pytest - -import helpers.hdbg as hdbg -import helpers.henv as henv -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hserver as hserver -import helpers.hstring as hstring -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def get_test_directories(root_dir: str) -> List[str]: - """ - Get paths of all the directories that contain unit tests. - - :param root_dir: the dir to start the search from, e.g. - `/src/cmamp1/helpers` - :return: paths of test directories - """ - paths = [] - for path, _, _ in os.walk(root_dir): - # Iterate over the paths to find the test directories. - if path.endswith("/test"): - paths.append(path) - hdbg.dassert_lte(1, len(paths)) - return paths - - -# ############################################################################# -# UnitTestRenamer -# ############################################################################# - - -class UnitTestRenamer: - """ - Rename a unit test in Python code and the corresponding directories - containing the inputs and the expected outputs. - """ - - @staticmethod - def _check_names(old_test_name: str, new_test_name: str) -> None: - """ - Check if the test names are valid. - - :param old_test_name: the old name of the test - :param new_test_name: the new name of the test - """ - # Assert if the classname does not start with `Test`. - for name in [old_test_name, new_test_name]: - hdbg.dassert( - name.startswith("Test"), - "Invalid test_class_name='%s'. A test class should start with `Test`", - name, - ) - # Assert if the names are the same. - hdbg.dassert_ne(old_test_name, new_test_name) - - @staticmethod - def _process_parameters( - old_test_name: str, - new_test_name: str, - ) -> Dict[str, str]: - """ - Build the processing config with the renaming parameters. - - :param old_test_name: the old name of the test - :param new_test_name: the new name of the test - :return: config for renaming process, i.e. a dictionary which includes the fields: - - `old_class`: old name of the class - - `new_class`: new name of the class - - `old_method`: new name of the method. If empty, only class should be renamed - - `new_method`: new name of the method - """ - # Build the processing config. - config: Dict[str, str] = {} - # Split by "." to separate class name and method name. - split_old_name = old_test_name.split(".") - split_new_name = new_test_name.split(".") - # Check the consistency of the names - they should have the same length. - hdbg.dassert_eq( - len(split_old_name), - len(split_new_name), - "The test names are not consistent; one has a method and the other does not.", - ) - # Check the format of the test name. - hdbg.dassert_in( - len(split_old_name), - [1, 2], - msg="Wrong test name format: it must contain no more than 1 dot", - ) - if len(split_old_name) == 1: - # Class name split by `.` is one element array, e.g. `["TestClassName"]`. - old_class_name, old_method_name = split_old_name[0], "" - new_class_name, new_method_name = split_new_name[0], "" - _LOG.debug( - "Trying to change the name of `{old_test_name}` unit test class to `%s`.", - new_test_name, - ) - else: - # Method name split by `.` is 2 element array, e.g. - # TestClassName.test2` - >`["TestClassName", "test2"]`. - old_class_name, old_method_name = split_old_name - new_class_name, new_method_name = split_new_name - hdbg.dassert_eq( - old_class_name, - new_class_name, - "To change the name of the method, specify the methods of the \ - same class. E.g. `--old TestCache.test1 --new TestCache.new_test1`", - ) - _LOG.debug( - "Trying to change the name of `%s` method of `%s` class to `%s`.", - old_method_name, - old_class_name, - new_method_name, - ) - # Fill the processing parameters. - config["old_class"] = old_class_name - config["old_method"] = old_method_name - config["new_class"] = new_class_name - config["new_method"] = new_method_name - return config - - def __init__( - self, old_test_name: str, new_test_name: str, root_dir: str - ) -> None: - """ - Construct the UnitTestRenamer. - - :param old_test_name: the old name of the test - :param new_test_name: the new name of the test - :param root_dir: the directory to start the search from - """ - # Check if the names of the test are valid. - self._check_names(old_test_name, new_test_name) - # Get the directories containing tests. - self.test_dirs = get_test_directories(root_dir) - # Construct the renaming config. - self.cfg = self._process_parameters(old_test_name, new_test_name) - - def _rename_class( - self, - content: str, - ) -> Tuple[str, int]: - """ - Rename a class in a Python file. - - :param content: the content of the file - :return: the content of the file with the class name replaced, - the number of substitutions replaced - """ - lines = content.split("\n") - docstring_line_indices = hstring.get_docstring_line_indices(lines) - num_replaced = 0 - for ind, line in enumerate(lines): - # Skip if the line is inside a docstring. - if ind not in docstring_line_indices: - # Rename the class. - new_line, num_replaced = re.subn( - rf"class {self.cfg['old_class']}\(", - rf"class {self.cfg['new_class']}(", - line, - ) - if num_replaced != 0: - lines[ind] = new_line - break - content = "\n".join(lines) - return content, num_replaced - - def _rename_method( - self, - content: str, - ) -> Tuple[str, int]: - """ - Rename the method of the class. - - :param content: the content of the file - :return: content of the file with the method renamed, the number - of substitutions made - """ - lines = content.split("\n") - # Flag that informs if the class border was found. - class_found = False - # The number of substitutions made in the content of the file. - num_replaced = 0 - class_pattern = rf"class {self.cfg['old_class']}\(" - method_pattern = rf"def {self.cfg['old_method']}\(" - docstring_line_indices = hstring.get_docstring_line_indices(lines) - for ind, line in enumerate(lines): - # Iterate over the lines of the file to find the specific method of the - # class that should be renamed. - # Skip if the line is inside a docstring. - if class_found and ind not in docstring_line_indices: - if line.startswith("class"): - # Break if the next class started and the method was not found. - break - # Rename the method. - new_line, num_replaced = re.subn( - method_pattern, f"def {self.cfg['new_method']}(", line - ) - if num_replaced != 0: - # Replace the line with method definition. - lines[ind] = new_line - break - else: - if re.search(class_pattern, line): - class_found = True - new_content = "\n".join(lines) - return new_content, num_replaced - - def _rename_in_file( - self, - test_dir: str, - file_path: str, - ) -> None: - """ - Process the file: - - - check if the content of the file contains target class - - change the class name, e.g. `TestClassName` -> `TestClassNameNew` - / change the method name `TestClassName.test2` -> `TestClassName.test_new` - - rename the outcomes if they exist - - :param test_dir: the path to the test directory containing the file, e.g. - `/src/cmamp1/helpers/test` - :param file_path: the path to the file, `/src/cmamp1/helpers/test/test_lib_tasks.py` - """ - content = hio.from_file(file_path) - if not re.search(rf"class {self.cfg['old_class']}\(", content): - # Return if target test class does not appear in file content. - return - if self.cfg["old_method"] == "": - # Rename the class. - content, n_replaced = self._rename_class(content) - if n_replaced != 0: - _LOG.info( - "%s: class `%s` was renamed to `%s`.", - file_path, - self.cfg["old_class"], - self.cfg["new_class"], - ) - else: - # Rename the method of the class. - content, n_replaced = self._rename_method(content) - if n_replaced != 0: - _LOG.info( - "%s: method `%s` of `%s` class was renamed to `%s`.", - file_path, - self.cfg["old_method"], - self.cfg["old_class"], - self.cfg["new_method"], - ) - # Rename the directories that contain target test outcomes. - self.rename_outcomes( - test_dir, - ) - # Write processed content back to file. - hio.to_file(file_path, content) - - def run(self) -> None: - """ - Run the renamer tool on the files under `root_dir`. - """ - # Iterate over test directories. - for path in self.test_dirs: - # Get all Python test files from this directory. - _LOG.debug("Scanning `%s` directory.", path) - search_pattern = os.path.join(path, "test_*.py") - files = glob.glob(search_pattern) - for test_file in files: - self._rename_in_file( - path, - test_file, - ) - - @staticmethod - def _rename_directory(outcome_path_old: str, outcome_path_new: str) -> None: - """ - Rename the outcomes directory and add it to git. - - :param outcome_path_old: the old name of outcome directory, e.g. - `/src/cmamp1/helpers/test/outcomes/TestRename.test_old` - :param outcome_path_new: the new name of outcome directory, e.g. - `/src/cmamp1/helpers/test/outcomes/TestRename.test_new` - """ - cmd = f"mv {outcome_path_old} {outcome_path_new}" - # Rename the directory. - rc = hsystem.system(cmd, abort_on_error=True, suppress_output=False) - _LOG.info( - "Renaming `%s` directory to `%s`. Output log: %s", - outcome_path_old, - outcome_path_new, - rc, - ) - # Add to git new outcome directory and remove the old one. - # The sequence of commands is used because `git mv` does not work - # properly while unit testing. - cmd = f"git add {outcome_path_new} && git rm -r {outcome_path_old}" - hsystem.system(cmd, abort_on_error=True, suppress_output=False) - - def _process_outcomes_dir( - self, outcome_dir: str, outcomes_path: str - ) -> bool: - """ - Process the directory containing target test outcomes. - - The stages of processing are: - - generate the new name of the directory - - rename and add it to git - - :param outcome_dir: the name of the directory containing the outcomes - :param outcomes_path: the path to the outcomes directory - :return: if the outcomes were renamed - """ - # Contruct the path to outcomes directory. - outcome_path_old = os.path.join(outcomes_path, outcome_dir) - # Construct old and new target dir names, e.g. - # `TestOldName.` and `TestNewName.` if class should be renamed or - # `TestOldName.test_old` and `TestOldName.test_new` if method should be renamed. - old_target = ".".join([self.cfg["old_class"], self.cfg["old_method"]]) - new_target = ".".join([self.cfg["new_class"], self.cfg["new_method"]]) - if self.cfg["old_method"] == "" and outcome_dir.startswith(old_target): - # Check if the class should be renamed, e.g. - # if `outcome_dir` is `TestOld.test1` and `old_target` is `TestOld.`. - # Split old directory name - the part before "." is the class name. - class_method = outcome_dir.split(".") - # Replace old class name with the new one, `["TestOld", "test1"]` - # -> `["TestNew", "test1"]`. - class_method[0] = self.cfg["new_class"] - # Construct the new outcome directory name -> `TestNew.test1`. - outcome_name_new = ".".join(class_method) - outcome_path_new = os.path.join(outcomes_path, outcome_name_new) - elif self.cfg["old_method"] != "" and outcome_dir == old_target: - # Check if the dir should be renamed. E.g. given that `old_target` - # is `TestOld.test1_new`, then if `outcome_dir` is `TestOld.test1`, - # it should not be renamed, and if `outcome_dir` is `TestOld.test1_new`, - # it should be renamed. - outcome_path_new = os.path.join(outcomes_path, new_target) - else: - return False - # Rename the directory and add it to git. - self._rename_directory(outcome_path_old, outcome_path_new) - return True - - def rename_outcomes( - self, - path: str, - ) -> None: - """ - Rename the directory that contains test outcomes. - - :param path: the path to the test directory, e.g. - `cmamp1/helpers/test/` - """ - outcomes_path = os.path.join(path, "outcomes") - dir_items = os.listdir(outcomes_path) - # Get the list of outcomes directories. - outcomes = [ - dir_name - for dir_name in dir_items - if os.path.isdir(os.path.join(outcomes_path, dir_name)) - ] - renamed = False - for outcome_dir in outcomes: - renamed = self._process_outcomes_dir(outcome_dir, outcomes_path) - if not renamed: - _LOG.info( - "No outcomes for `%s` were found in `%s`.", - self.cfg["old_class"], - outcomes_path, - ) - - -# ############################################################################# -# Obj_to_str_TestCase -# ############################################################################# - - -class Obj_to_str_TestCase(abc.ABC): - """ - Test case for testing `obj_to_str()` and `obj_to_repr()`. - """ - - def helper(self, obj: Any, method_name: str, expected_str: str) -> None: - """ - Common method for testing `__repr__` and `__str__`. - """ - hdbg.dassert_is_not(obj, None) - actual_str = getattr(obj, method_name)() - self.assert_equal( # type: ignore - actual_str, expected_str, purify_text=True, fuzzy_match=True - ) - - def run_test_repr(self, obj: Any, expected_str: str) -> None: - """ - Check that `__repr__` is printed correctly. - """ - method_name = "__repr__" - self.helper(obj, method_name, expected_str) - - def run_test_str(self, obj: Any, expected_str: str) -> None: - """ - Check that `__str__` is printed correctly. - """ - method_name = "__str__" - self.helper(obj, method_name, expected_str) - - def run_test_to_config_str(self, obj: Any, expected_str: str) -> None: - """ - Check that `to_config_str()` is printed correctly. - """ - method_name = "to_config_str" - self.helper(obj, method_name, expected_str) - - -# ############################################################################# - - -def _get_repo_short_name() -> str: - dir_name = "." - include_host_name = False - repo_name = hgit.get_repo_full_name_from_dirname(dir_name, include_host_name) - _LOG.debug("repo_name=%s", repo_name) - # ck/cmamp - short_repo_name = repo_name.split("/")[1] - _LOG.debug("short_repo_name=%s", short_repo_name) - return short_repo_name - - -def execute_only_in_target_repo(target_name: str) -> None: - repo_short_name = _get_repo_short_name() - if repo_short_name != target_name: - pytest.skip(f"Only run on {target_name} and not {repo_short_name}") - - -# TODO(gp): Remove and use pytest.skipif(). -def execute_only_on_ci() -> None: - is_inside_ci_ = hserver.is_inside_ci() - if not is_inside_ci_: - pytest.skip("Only run in CI") - - -def execute_only_on_dev4() -> None: - is_dev4_ = hserver.is_dev4() - if not is_dev4_: - pytest.skip("Only run on dev4") - - -def execute_only_on_dev_csfy() -> None: - is_dev_csfy_ = hserver.is_dev_csfy() - if not is_dev_csfy_: - pytest.skip("Only run on dev CSFY") - - -def execute_only_on_mac(*, version: Optional[str] = None) -> None: - is_host_mac_ = hserver.is_host_mac() - if version: - is_host_mac_ = hserver.is_host_mac_version(version) - if not is_host_mac_: - pytest.skip(f"Only run on Mac with version={version}") - - -def check_env_to_str( - self_: Any, expected: str, *, skip_secrets_vars: bool = False -) -> None: - actual = henv.env_to_str(system_signature=False) - actual = hunitest.filter_text("get_name", actual) - actual = hunitest.filter_text("get_repo_map", actual) - actual = hunitest.filter_text("CSFY_HOST_", actual) - if skip_secrets_vars: - # TODO(gp): Difference between amp and cmamp. - actual = hunitest.filter_text( - "AM_AWS_|CSFY_AWS_|GH_ACTION_ACCESS_TOKEN", actual - ) - self_.assert_equal(actual, expected, fuzzy_match=True, purify_text=True) - - -def is_test_file(file_path: str) -> bool: - """ - Check if a file is a test file. - - A file is considered a test file if: - - It contains "/test/" in its path, OR - - Its basename starts with "test_", OR - - Its basename ends with "_test.py" - - :param file_path: path to check - :return: True if file_path is a test file, False otherwise - """ - return ( - "/test/" in file_path - or file_path.split("/")[-1].startswith("test_") - or file_path.endswith("_test.py") - ) - - -def get_test_file_for_source(source_file: str) -> Optional[str]: - """ - Map a source Python file to its corresponding test file. - - E.g., helpers/hdbg.py -> helpers/test/test_hdbg.py - - :param source_file: path to a source Python file - :return: path to corresponding test file if it exists and source is not - already a test file; None otherwise - """ - if is_test_file(source_file): - return None - base_name = os.path.basename(source_file) - dir_name = os.path.dirname(source_file) - test_file = os.path.join(dir_name, "test", f"test_{base_name}") - if os.path.exists(test_file): - return test_file - return None - - -def get_test_files_for_sources(files: List[str]) -> List[str]: - """ - Map a list of source files to their corresponding test files. - - Filters out test files from the input list, then maps each source file - to its corresponding test file using `get_test_file_for_source`. - - :param files: list of file paths (may include both source and test files) - :return: list of test files that exist for the source files - """ - source_files = [f for f in files if not is_test_file(f)] - test_files = [] - for file_path in source_files: - test_file = get_test_file_for_source(file_path) - if test_file: - test_files.append(test_file) - return test_files - - -def get_parent_dirs(files: List[str]) -> List[str]: - """ - Get the minimal set of parent directories that contain all given files. - - Extracts the parent directory of each file, removes duplicates, and then - removes any directory that is a subdirectory of another directory in the - set. Files at the root level (with empty parent dir) are assigned to ".". - - Example: - Input: ["dev_scripts_helpers/scraping/process_hn_article.py", - "dev_scripts_helpers/scraping/test/__init__.py", - "helpers/hgit.py", - "helpers/lib_tasks_utils.py"] - Output: ["dev_scripts_helpers/scraping", "helpers"] - - :param files: list of file paths - :return: list of minimal parent directories - """ - if not files: - return [] - dirs = set() - for file_path in files: - dir_path = os.path.dirname(file_path) - if not dir_path: - dir_path = "." - dirs.add(dir_path) - dirs = sorted(dirs) - minimal_dirs = [] - for d in dirs: - is_subdir = False - for other_d in dirs: - if d != other_d and d.startswith(other_d + "/"): - is_subdir = True - break - if not is_subdir: - minimal_dirs.append(d) - return minimal_dirs - - -# ############################################################################# -# System call capture utilities -# ############################################################################# - - -@contextlib.contextmanager -def capture_system_calls( - side_effect: Optional[Any] = None, -) -> Generator[List[Dict[str, Any]], None, None]: - """ - Context manager that captures all system calls to `subprocess.run()` and - `hsystem._system()`, returning them as a list of invocations. - - Each invocation is a dict with 'function', 'args', and 'kwargs' keys. - - :param side_effect: Exception or return value to use for mocked calls - :return: List of invocations, each as {'function': str, 'args': tuple, - 'kwargs': dict} - - Example: - ``` - with capture_system_calls() as invocations: - my_function() - # Check captured calls. - assert len(invocations) == 1 - assert invocations[0]['function'] == 'subprocess.run' - ``` - """ - invocations: List[Dict[str, Any]] = [] - - def mock_subprocess_run(*args: Any, **kwargs: Any) -> Any: - invocations.append( - { - "function": "subprocess.run", - "args": args, - "kwargs": kwargs, - } - ) - if side_effect is not None: - if isinstance(side_effect, type) and issubclass( - side_effect, BaseException - ): - raise side_effect() - elif isinstance(side_effect, BaseException): - raise side_effect - return None - - def mock_hsystem(*args: Any, **kwargs: Any) -> Any: - invocations.append( - { - "function": "hsystem._system", - "args": args, - "kwargs": kwargs, - } - ) - if side_effect is not None: - if isinstance(side_effect, type) and issubclass( - side_effect, BaseException - ): - raise side_effect() - elif isinstance(side_effect, BaseException): - raise side_effect - return (0, "") # Return code and output - - with mock.patch("subprocess.run", side_effect=mock_subprocess_run): - with mock.patch("helpers.hsystem._system", side_effect=mock_hsystem): - yield invocations diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py deleted file mode 100644 index 18aea68c5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hversion.py +++ /dev/null @@ -1,300 +0,0 @@ -""" -Import as: - -import helpers.hversion as hversio -""" - -# This code implements version control for code -# The code version is used in two circumstances: -# 1) when any code using `hdbg.py` (which is included everywhere) starts in -# order to verify that the running code and the container in which the code -# is running are compatible -# 2) when a container is built to know what version of the code was used to build -# it - -import functools -import logging -import os -import re -from typing import List, Optional, cast - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem - -# This module can depend only on: -# - Python standard modules -# - a few helpers as described in `helpers/dependencies.txt` - -_LOG = logging.getLogger(__name__) - - -_INFO = "\033[36mINFO\033[0m" -_WARNING = "\033[33mWARNING\033[0m" -_ERROR = "\033[31mERROR\033[0m" -# -_VERSION_RE = r"\d+\.\d+\.\d+" - - -# Copied from helpers.hgit to avoid circular dependencies. - - -@functools.lru_cache() -def _is_inside_submodule(git_dir: str = ".") -> bool: - """ - Return whether a dir is inside a Git submodule or a Git supermodule. - - We determine this checking if the current Git repo is included - inside another Git repo. - """ - cmd = [] - # - Find the git root of the current directory - # - Check if the dir one level up is a valid Git repo - # Go to the dir. - cmd.append(f"cd {git_dir}") - # > cd im/ - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd.append('cd "$(git rev-parse --show-toplevel)/.."') - # > git rev-parse --is-inside-work-tree - # true - cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") - cmd_as_str = " && ".join(cmd) - rc = hsystem.system(cmd_as_str, abort_on_error=False) - ret: bool = rc == 0 - return ret - - -@functools.lru_cache() -def _get_client_root(super_module: bool) -> str: - """ - Return the full path of the root of the Git client. - - E.g., `/Users/saggese/src/.../amp`. - - :param super_module: if True use the root of the Git super_module, - if we are in a submodule. Otherwise use the Git sub_module root - """ - if super_module and _is_inside_submodule(): - # https://stackoverflow.com/questions/957928 - # > cd /Users/saggese/src/.../amp - # > git rev-parse --show-superproject-working-tree - # /Users/saggese/src/... - cmd = "git rev-parse --show-superproject-working-tree" - else: - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd = "git rev-parse --show-toplevel" - # TODO(gp): Use system_to_one_line(). - _, out = hsystem.system_to_string(cmd) - out = out.rstrip("\n") - hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") - client_root: str = os.path.realpath(out) - return client_root - - -# End copy. - - -def get_changelog_version( - container_dir_name: str, *, file_name: str = None -) -> Optional[str]: - """ - Return latest version from changelog.txt file. - - :param container_dir_name: container directory relative to the root - directory - :param file_name: changelog file name - """ - version: Optional[str] = None - supermodule = True - root_dir = _get_client_root(supermodule) - # Note: for `amp` as submodule one should pass `container_dir_name` relative - # to the root, e.g., `amp/optimizer` and not just `optimizer`. - hdbg.dassert_ne(container_dir_name, "") - if file_name is None: - file_name = "changelog.txt" - changelog_file = os.path.join(root_dir, container_dir_name, file_name) - hdbg.dassert_file_exists(changelog_file) - changelog = hio.from_file(changelog_file) - match = re.search(_VERSION_RE, changelog) - if match: - version = match.group() - return version - - -def get_container_version() -> Optional[str]: - """ - Return the container version. - - :return: container code version from the env var - """ - container_version: Optional[str] = None - if hserver.is_inside_docker(): - env_var = "AM_CONTAINER_VERSION" - if env_var not in os.environ: - # This can happen when GH Actions pull the image using invoke - # inside their container (but not inside ours), thus there is no - # AM_CONTAINER_VERSION. - print( - _WARNING - + f": The env var {env_var} should be defined when running inside a" - " container" - ) - else: - # We are running inside a container. - # Keep the code and the container in sync by versioning both and - # requiring to be the same. - container_version = os.environ["AM_CONTAINER_VERSION"] - return container_version - - -def _check_version(code_version: str, container_version: str) -> bool: - """ - Check whether the code version and the container version are the same. - - :param code_version: code version from the changelog - :param container_version: container code version from the env var - :return: whether the versions are the same or not - """ - # Since the code version from the changelog is extracted with the - # `_VERSION_RE` regex, we apply the same regex to the container version - # to keep the representations comparable. - match = re.search(_VERSION_RE, container_version) - hdbg.dassert( - match, - ( - "Invalid format of the container code version '%s'; " - "it should contain a number like '1.0.0'" - ), - container_version, - ) - container_version = match.group() # type: ignore - # Check if the versions are the same. - is_ok = container_version == code_version - if not is_ok: - msg = f""" - ----------------------------------------------------------------------------- - This code is not in sync with the container: - code_version='{code_version}' != container_version='{container_version}' - ----------------------------------------------------------------------------- - You need to: - - merge origin/master into your branch with `invoke git_merge_master` - - pull the latest container with `invoke docker_pull` - """ - msg = hprint.dedent(msg) - # Highlight in red. - # TODO(gp): Use the proper function, if dependencies allow it. - msg = f"\033[31m{msg}\033[0m" - print(msg) - if False: - raise RuntimeError(msg) - return is_ok - - -def check_version(container_dir_name: str) -> None: - """ - Check that the code and container code have compatible version, otherwise - raises `RuntimeError`. - - :param container_dir_name: container directory relative to the root - directory - """ - # TODO(gp): -> CK_SKIP_VERSION_CHECK. - if "SKIP_VERSION_CHECK" in os.environ: - # Skip the check altogether. - return - # Get code version. - code_version = get_changelog_version(container_dir_name) - container_version = get_container_version() - # Check version, if possible. - if container_version is None: - # No need to check. - return - code_version = cast(str, code_version) - _check_version(code_version, container_version) - - -def get_latest_changelog_entry( - changelog_path: str, -) -> dict: - """ - Parse the latest changelog entry from a changelog file. - - :param changelog_path: path to the changelog.txt file - :return: dict with keys: 'version', 'date', 'changes' (list of - change lines) - """ - hdbg.dassert_file_exists(changelog_path) - changelog = hio.from_file(changelog_path) - lines = changelog.split("\n") - version = None - date = None - changes = [] - in_entry = False - for line in lines: - line = line.rstrip() - # Check for version header (e.g., "# csfy-2.2.0"). - version_match = re.match(r"^#\s+(.+)$", line) - if version_match: - if version is None: - # This is the first (latest) entry. - version = version_match.group(1) - in_entry = True - else: - # We've reached the next entry, stop. - break - elif in_entry: - # Check for date (e.g., "- 2025-10-06"). - date_match = re.match(r"^-\s+(\d{4}-\d{2}-\d{2})$", line) - if date_match and date is None: - date = date_match.group(1) - # Collect change lines. - elif line.startswith("- ") and not date_match: - changes.append(line) - return {"version": version, "date": date, "changes": changes} - - -def bump_version(version: str, *, bump_type: str = "minor") -> str: - """ - Bump a semantic version number. - - :param version: version string in format X.Y.Z (e.g., "2.2.0") - :param bump_type: type of version bump - "major", "minor", or "patch" - :return: bumped version string - """ - hdbg.dassert_in(bump_type, ("major", "minor", "patch")) - # Parse version using regex. - match = re.match(r"^(\d+)\.(\d+)\.(\d+)$", version) - hdbg.dassert( - match, - f"Invalid version format: '{version}'. Expected X.Y.Z format.", - ) - major, minor, patch = map(int, match.groups()) - # Bump according to type. - if bump_type == "major": - major += 1 - minor = 0 - patch = 0 - elif bump_type == "minor": - minor += 1 - patch = 0 - else: # patch - patch += 1 - return f"{major}.{minor}.{patch}" - - -def get_container_version_info() -> str: - txt_tmp: List[str] = [] - # - container_version = str(get_container_version()) - txt_tmp.append(f"container_version='{container_version}'") - # - container_dir_name = "." - changelog_version = str(get_changelog_version(container_dir_name)) - txt_tmp.append(f"changelog_version='{changelog_version}'") - # - txt = hprint.to_info("Container version", txt_tmp) - return txt diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py deleted file mode 100644 index ea8392f6e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwall_clock_time.py +++ /dev/null @@ -1,125 +0,0 @@ -""" -Import as: - -import helpers.hwall_clock_time as hwacltim -""" - -# This should have no dependencies besides Python standard libraries since it's used -# in `helpers/hlogging.py`. - -import datetime -import logging -from typing import Callable, Optional, Union - -_LOG = logging.getLogger(__name__) - -# ############################################################################# -# Simulated real time -# ############################################################################# - -# Copied from `helpers/hdatetime.py` -# -# Function returning the current (true, replayed, simulated) wall-clock time as a -# timestamp. -_GetWallClockTime = Callable[[], "pd.Timestamp"] # noqa: F821 - -_get_wall_clock_time_func: Optional[_GetWallClockTime] = None - - -def set_wall_clock_time(get_wall_clock_time_func_: _GetWallClockTime) -> None: - """ - Set the global function to retrieve the wall clock time. - """ - assert callable(get_wall_clock_time_func_) - global _get_wall_clock_time_func - _get_wall_clock_time_func = get_wall_clock_time_func_ - - -def get_wall_clock_time_func() -> Optional[_GetWallClockTime]: - """ - Retrieve the global function retrieve the wall clock time. - """ - return _get_wall_clock_time_func - - -# We don't want to import `Pandas` just for a type. -def get_wall_clock_time() -> Optional["pd.Timestamp"]: # noqa: F821 - """ - Return the wall clock time (according to the set function) or `None` if no - function was set. - """ - func = _get_wall_clock_time_func - if func is None: - timestamp = None - else: - timestamp = func() - return timestamp - - -# ############################################################################# -# Real-world / machine real time. -# ############################################################################# - - -# TODO(Sameep): Redundant fuction replace by `hdatetime.timestamp_to_str()`. -def to_timestamp_str( - timestamp: "pd.Timestamp", # noqa: F821 - include_msec: bool = False, -) -> str: - if include_msec: - # Chop the last 4 miliseconds digits. This is needed for CcxtBroker_v2. - return timestamp.strftime("%Y%m%d_%H%M%S%f")[:-4] - else: - return timestamp.strftime("%Y%m%d_%H%M%S") - - -# This is redundant with `hdatetime.get_current_time()` and -# `hdateti.get_current_timestamp_as_string()` but we keep them to simplify -# dependencies. -def get_machine_wall_clock_time( - *, - as_str: bool = False, - include_msec: bool = False, -) -> Union[str, datetime.datetime]: - ret = datetime.datetime.utcnow() - if as_str: - ret = to_timestamp_str(ret, include_msec) - return ret - - -# ############################################################################# -# Current bar being processed. -# ############################################################################# - - -_CURR_BAR_TIMESTAMP: Optional["pd.Timestamp"] = None # noqa: F821 - - -def reset_current_bar_timestamp() -> None: - global _CURR_BAR_TIMESTAMP - _LOG.debug("Reset") - _CURR_BAR_TIMESTAMP = None - - -def set_current_bar_timestamp(timestamp: "pd.Timestamp") -> None: # noqa: F821 - _LOG.debug("timestamp=%s", timestamp) - global _CURR_BAR_TIMESTAMP - if _CURR_BAR_TIMESTAMP is not None: - # TODO(Grisha): should we relax the check by using - # `<=` instead of `<`? - assert _CURR_BAR_TIMESTAMP < timestamp, ( - "Bar timestamp can only move forward: " - + f"{_CURR_BAR_TIMESTAMP} <= {timestamp}" - ) - _CURR_BAR_TIMESTAMP = timestamp - - -def get_current_bar_timestamp( - *, - as_str: bool = False, - include_msec: bool = False, -) -> Optional[Union[str, "pd.Timestamp"]]: # noqa: F821 - ret = _CURR_BAR_TIMESTAMP - if _CURR_BAR_TIMESTAMP and as_str: - ret = to_timestamp_str(ret, include_msec=include_msec) - return ret diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py deleted file mode 100644 index 4f740f572..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/hwarnings.py +++ /dev/null @@ -1,156 +0,0 @@ -""" -When this module is imported certain annoying warnings are disabled. - -Import as: - -import helpers.hwarnings as hwarnin -""" - -if False: - _WARNING = "\033[33mWARNING\033[0m" - print(f"{_WARNING}: Disabling annoying warnings") - -# Avoid dependency from other `helpers` modules, such as `helpers.hprint`, to -# prevent import cycles. - -import warnings - -# From https://docs.python.org/3/library/warnings.html - -# TODO(gp): For some reason "once" doesn't work, so we ignore all of the warnings. -action = "ignore" - -try: - import statsmodels # noqa: F401 - - _HAS_STATSMODELS = True -except ImportError: - _HAS_STATSMODELS = False - - -if _HAS_STATSMODELS: - # /venv/lib/python3.8/site-packages/statsmodels/tsa/stattools.py:1910: - # InterpolationWarning: The test statistic is outside of the range of p-values - # available in the look-up table. The actual p-value is greater than the - # p-value returned. - from statsmodels.tools.sm_exceptions import InterpolationWarning - - # warnings.simplefilter("ignore", category=InterpolationWarning) - - # /venv/lib/python3.8/site-packages/statsmodels/tsa/stattools.py:1906: - # InterpolationWarning: The test statistic is outside of the range of p-values - # available in the look-up table. The actual p-value is smaller than the - # p-value returned. - warnings.filterwarnings( - action, - category=InterpolationWarning, - module=".*statsmodels.*", - lineno=1906, - append=False, - ) - - warnings.filterwarnings( - action, - category=InterpolationWarning, - module=".*statsmodels.*", - lineno=1910, - append=False, - ) - - -# /venv/lib/python3.8/site-packages/ipykernel/ipkernel.py:283: -# DeprecationWarning: `should_run_async` will not call `transform_cell` -# automatically in the future. Please pass the result to `transformed_cell` -# argument and any exception that happen during thetransform in -# `preprocessing_exc_tuple` in IPython 7.17 and above. -# and should_run_async(code) -warnings.filterwarnings( - action, - category=DeprecationWarning, - module=".*ipykernel.*", - lineno=283, - append=False, -) - - -# TODO(gp): Add this TqdmExperimentalWarning - -try: - import pandas as pd - - _HAS_PANDAS = True -except ImportError: - _HAS_PANDAS = False - - -if _HAS_PANDAS: - pd.set_option("mode.chained_assignment", None) - # TODO(gp): We should fix the issues and re-enable. - # See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - # row["net_cost"] -= cost - # /app/amp/oms/order_processing/order_processor.py:376: SettingWithCopyWarning: - # A value is trying to be set on a copy of a slice from a DataFrame - - # /venv/lib/python3.8/site-packages/pandas/io/sql.py:761: UserWarning: pandas - # only support SQLAlchemy connectable(engine/connection) ordatabase string URI or - # sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider - # using SQLAlchemy - # - # This seems a false alarm: - # https://github.com/pandas-dev/pandas/issues/45660#issuecomment-1077355514 - warnings.filterwarnings( - action, - category=UserWarning, - module=".*pandas.*", - lineno=761, - append=False, - ) - - # run_leq_node: 38%|███▊ | 3/8 [00:05<00:09, 1.98s/it]/app/amp/helpers/hdbg.py:309: PerformanceWarning: indexing past lexsort depth may impact performance. - # cond = value in valid_values - warnings.filterwarnings( - action, - category=pd.errors.PerformanceWarning, - module=".*hdbg.py.*", - lineno=309, - append=False, - ) - - # run_leq_node: 0%| | 0/8 [00:00 str: - """ - Get the shared configs S3 bucket. - - :param environment: environment to get the shared configs for - :return: shared configs S3 bucket - """ - hdbg.dassert_in(environment, ["prod", "preprod", "test"]) - bucket_name = hrecouti.get_repo_config().get_shared_configs_bucket_name( - environment - ) - hdbg.dassert_is_not( - bucket_name, - None, - f"Shared configs bucket is not defined in `repo_config.yaml` for environment: {environment}", - ) - return bucket_name - - -def _get_ecs_task_definition_template(environment: str) -> Dict[str, Any]: - """ - Get the ECS task definition template. - - :return: ECS task definition template - """ - s3_bucket = _get_shared_configs_s3_bucket(environment) - s3_path = f"{s3_bucket}/{environment}/templates/ecs/ecs_task_definition_template.json" - hs3.dassert_is_s3_path(s3_path) - task_definition_config = hs3.from_file( - s3_path, aws_profile=haws.AWS_PROFILE[environment] - ) - task_definition_config = json.loads(task_definition_config) - return task_definition_config - - -def _get_efs_mount_config_template(environment: str) -> Dict[str, Any]: - """ - Get the EFS mount config template. - - :return: EFS mount config template - """ - s3_bucket = _get_shared_configs_s3_bucket(environment) - s3_path = ( - f"{s3_bucket}/{environment}/templates/efs/efs_mount_config_template.json" - ) - hs3.dassert_is_s3_path(s3_path) - efs_config = hs3.from_file( - s3_path, aws_profile=haws.AWS_PROFILE[environment] - ) - efs_config = json.loads(efs_config) - return efs_config - - -def _set_task_definition_config( - task_definition_config: Dict, - task_definition_name: str, - region: str, - environment: str, -) -> Dict[str, Any]: - """ - Update template of ECS task definition with concrete values. - - :param task_definition_config: task definition config template - :param task_definition_name: name of the task definition - :param region: region to create the task definition in - :return: full formed task definition config dictionary - """ - # Replace placeholder values inside container definition - # from the template with concrete values. - # We use single container inside our task definition and - # the convention is to set the same name as the task - # definition itself. - task_definition_config["containerDefinitions"][0]["name"] = ( - task_definition_name - ) - # Set placeholder image URL. - # Get the base registry URL in the base region. - base_registry_url = hrecouti.get_repo_config().get_container_registry_url() - # Build the region-specific ECR registry URL for the target region. - # ECR registry URL format: `{account_id}.dkr.ecr.{region}.amazonaws.com`. - account_id = base_registry_url.split(".")[0] - registry_url = f"{account_id}.dkr.ecr.{region}.amazonaws.com" - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - # Make sure that the ECR replication is configured for the target region, - # so images are available in any new regions. - task_definition_config["containerDefinitions"][0]["image"] = ( - _IMAGE_URL_TEMPLATE.format(registry_url, image_name) - ) - # Set log configuration options. - log_config_opts = copy.deepcopy(_TASK_DEFINITION_LOG_OPTIONS_TEMPLATE) - log_config_opts["awslogs-group"] = log_config_opts["awslogs-group"].format( - task_definition_name - ) - log_config_opts["awslogs-region"] = region - task_definition_config["containerDefinitions"][0]["logConfiguration"][ - "options" - ] = log_config_opts - # Index is based on the order of the environment variables in the template. - # Set environment variable `CSFY_ECR_BASE_PATH`. - task_definition_config["containerDefinitions"][0]["environment"][0][ - "value" - ] = registry_url - # Set environment variable `CSFY_AWS_DEFAULT_REGION`. - task_definition_config["containerDefinitions"][0]["environment"][1][ - "value" - ] = region - # Configure access to EFS. - efs_config = _get_efs_mount_config_template(environment) - task_definition_config["volumes"] = efs_config[region]["volumes"] - task_definition_config["containerDefinitions"][0]["mountPoints"] = ( - efs_config[region]["mountPoints"] - ) - return task_definition_config - - -def _register_task_definition( - task_definition_name: str, region: str, environment: str -) -> None: - """ - Register a new ECS task definition. - - :param task_definition_name: name of the new task definition. - :param config_file: path to the JSON file containing the task - definition configuration. - :param region: region to create the task definition in - :param environment: environment to create the task definition in - """ - task_definition_config = _get_ecs_task_definition_template(environment) - client = haws.get_ecs_client(haws.AWS_PROFILE[environment], region=region) - # Prevent overwriting existing task definition if it exists. - if haws.is_task_definition_exists(task_definition_name, region=region): - _LOG.info( - "Task definition %s already exists in region %s", - task_definition_name, - region, - ) - return - # - task_definition_config = _set_task_definition_config( - task_definition_config, task_definition_name, region, environment - ) - client.register_task_definition( - family=task_definition_name, - taskRoleArn=task_definition_config.get("taskRoleArn", ""), - executionRoleArn=task_definition_config["executionRoleArn"], - networkMode=task_definition_config["networkMode"], - containerDefinitions=task_definition_config["containerDefinitions"], - volumes=task_definition_config.get("volumes", []), - placementConstraints=task_definition_config.get( - "placementConstraints", [] - ), - requiresCompatibilities=task_definition_config[ - "requiresCompatibilities" - ], - cpu=task_definition_config["cpu"], - memory=task_definition_config["memory"], - ) - _LOG.info( - "Registered new task definition: %s in region %s", - task_definition_name, - region, - ) - - -def aws_update_ecs_task_definition( - *, - task_definition: str, - image_tag: str, - region: str, - environment: str, -) -> None: - """ - Update an existing ECS task definition. - - :param task_definition: the name of the ECS task definition for - which an update to container image URL is made, e.g. cmamp-test - :param image_tag: the hash of the new candidate image, e.g. - 13538588e - :param region: region to update the task definition in - """ - hdbg.dassert_in(region, hs3.AWS_REGIONS) - old_image_url = haws.get_task_definition_image_url( - task_definition, environment=environment, region=region - ) - # Edit container version, e.g. cmamp:prod-12a45 - > cmamp:prod-12b46`. - new_image_url = re.sub("prod-(.+)$", f"prod-{image_tag}", old_image_url) - haws.update_task_definition( - task_definition, new_image_url, region=region, environment=environment - ) - - -@task -def aws_create_test_task_definition( - ctx, - issue_id: Optional[int] = None, - region: str = hs3.AWS_EUROPE_REGION_1, -) -> None: - """ - Create a new ECS task definition. - - :param issue_id: issue ID to create the task definition for - :param region: region to create the task definition in - """ - _ = ctx - hlitauti.report_task() - # Check if the `issue_id` provided is valid. - hdbg.dassert_is_not(issue_id, None, "issue_id is required") - is_valid_issue_id = str(issue_id).isdigit() - hdbg.dassert(is_valid_issue_id, f"issue_id '{issue_id}' must be an integer") - # Check if the `region` provided is valid. - hdbg.dassert_in(region, hs3.AWS_REGIONS) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-test-{issue_id}" - # Register task definition. - _register_task_definition( - task_definition_name, region=region, environment="test" - ) - - -@task -def aws_create_preprod_task_definition( - ctx, - region: str = hs3.AWS_EUROPE_REGION_1, -) -> None: - """ - Create a new ECS task definition for preprod environment. - - :param region: region to create the task definition in - """ - _ = ctx - hlitauti.report_task() - hdbg.dassert_in(region, hs3.AWS_REGIONS) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-preprod" - # Register task definition. - _register_task_definition( - task_definition_name, region=region, environment="preprod" - ) - - -@task -def aws_create_prod_task_definition( - ctx, - region: str = hs3.AWS_US_REGION_1, -) -> None: - """ - Create a new ECS task definition. - - :param region: region to create the task definition in - """ - _ = ctx - hlitauti.report_task() - hdbg.dassert_in(region, hs3.AWS_REGIONS) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-prod" - # Register task definition. - _register_task_definition( - task_definition_name, region=region, environment="prod" - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py deleted file mode 100644 index 111fa2815..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_bash.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_bash as hlitabas -""" - -import logging -import os - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hfile_tree as hfiltree -import helpers.hsystem as hsystem -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): GFI: Unit test. -@task -def bash_print_path(ctx): # type: ignore - """ - Print the bash path. - """ - _ = ctx - cmd = r"echo $PATH | sed 's/:/\n/g'" - _, ret = hsystem.system_to_string(cmd) - paths = ret.split("\n") - paths.sort() - # - all_paths = [] - # Remove empty lines. - for path in paths: - if path.strip() == "": - _LOG.error("Empty path: '%s'", path) - continue - if not os.path.exists(path): - _LOG.error("Dir doesn't exist: '%s'", path) - continue - if not os.path.isdir(path): - _LOG.error("Not a dir: '%s'", path) - continue - # TODO(gp): Make it efficient. - if paths.count(path) > 1: - _LOG.error("Duplicate path: '%s'", path) - continue - all_paths.append(path) - # Print the paths. - _LOG.info("Valid paths:") - for path in all_paths: - print(path) - - -@task -def bash_print_tree( # type: ignore - ctx, - path=".", - depth=0, - clean=False, - include_tests=False, - include_python=False, - only_dirs=False, - output="", -): - """ - Print a directory tree, and optionally update or create a markdown file. - - ``` - # To print tree for current directory: - > i bash_print_tree - - # Limit depth to 2 and include test files: - > i bash_print_tree --path="devops" --depth=2 --include-tests - - # Include python files: - > i bash_print_tree --path="devops" --include-python - - # Only show directories: - > i bash_print_tree --path="devops" --only-dirs - - # Write the tree to file, preserving comments: - > i bash_print_tree --path="devops" --output="README.md" - ``` - - :param path: directory path to traverse - :param depth: maximum depth to traverse - :param clean: clean untracked files in directory - :param include_tests: include test files or directories - :param include_python: include python files - :param only_dirs: only show directories - :param output: path of the markdown file to create or update - """ - _ = ctx - hdbg.dassert_lte(0, depth, "Depth must be non-negative: %s", depth) - if clean: - cmd = "git clean -fd" - hlitauti.run(ctx, cmd) - tree = hfiltree.generate_tree( - path, depth, include_tests, include_python, only_dirs, output - ) - print(tree) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py deleted file mode 100644 index f7dcadc54..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker.py +++ /dev/null @@ -1,1590 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_docker as hlitadoc -""" - -import functools -import getpass -import logging -import os -import re -from typing import Any, Dict, List, Optional, Union, cast - -# TODO(gp): We should use `pip install types-PyYAML` to get the mypy stubs. -import yaml -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hdict as hdict -import helpers.hdocker as hdocker -import helpers.henv as henv -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hsecrets as hsecret -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hversion as hversio -import helpers.lib_tasks_utils as hlitauti -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -# ############################################################################# -# Basic Docker commands. -# ############################################################################# - - -def _get_docker_exec(sudo: bool) -> str: - docker_exec = "docker" - if sudo: - docker_exec = "sudo " + docker_exec - return docker_exec - - -# //////////////////////////////////////////////////////////////////////////// -# Docker login -# //////////////////////////////////////////////////////////////////////////// - - -@functools.lru_cache() -def _get_aws_cli_version() -> int: - # > aws --version - # aws-cli/1.19.49 Python/3.7.6 Darwin/19.6.0 botocore/1.20.49 - # aws-cli/1.20.1 Python/3.9.5 Darwin/19.6.0 botocore/1.20.106 - cmd = "aws --version" - res = hsystem.system_to_one_line(cmd)[1] - # Parse the output. - m = re.match(r"aws-cli/((\d+)\.\d+\.\d+)\s", res) - hdbg.dassert_is_not(m, None, "Can't parse '%s'", res) - assert m is not None - version = m.group(1) - _LOG.debug("version=%s", version) - major_version = int(m.group(2)) - _LOG.debug("major_version=%s", major_version) - return major_version - - -def _check_docker_login(repo_name: str) -> bool: - """ - Check if we are already logged in to the Docker registry `repo_name`. - """ - file_name = os.path.join(os.environ["HOME"], ".docker/config.json") - json_data = hio.from_json(file_name) - # > more ~/.docker/config.json - # ``` - # { - # "auths": { - # "623860924167.dkr.ecr.eu-north-1.amazonaws.com": {}, - # "665840871993.dkr.ecr.us-east-1.amazonaws.com": {}, - # "https://index.docker.io/v1/": {} - # }, - # ``` - _LOG.debug("json_data=%s", json_data) - is_logged = any(repo_name in val for val in json_data["auths"].keys()) - return is_logged - - -def _docker_login_dockerhub() -> None: - """ - Log into the Docker Hub which is a public Docker image registry. - """ - # Check if we are already logged in to the target registry. - # TODO(gp): Enable caching https://github.com/causify-ai/helpers/issues/20 - use_cache = False - if use_cache: - is_logged = _check_docker_login("623860924167.dkr.ecr") - if is_logged: - _LOG.warning("Already logged in to the target registry: skipping") - return - _LOG.info("Logging in to the target registry") - secret_id = "causify_dockerhub" - secret = hsecret.get_secret(secret_id) - username = hdict.typed_get(secret, "username", expected_type=str) - password = hdict.typed_get(secret, "password", expected_type=str) - cmd = f"docker login -u {username} -p {password}" - hsystem.system(cmd, suppress_output=False) - - -def _docker_login_ecr() -> None: - """ - Log in the AM Docker repo_short_name on AWS. - """ - hlitauti.report_task() - if hserver.is_inside_ci(): - _LOG.warning("Running inside GitHub Action: skipping `docker_login`") - return - # TODO(gp): Enable caching https://github.com/causify-ai/helpers/issues/20 - use_cache = False - if use_cache: - # Check if we are already logged in to the target registry. - is_logged = _check_docker_login("623860924167.dkr.ecr") - if is_logged: - _LOG.warning("Already logged in to the target registry: skipping") - return - _LOG.info("Logging in to the target registry") - # Log in the target registry. - major_version = _get_aws_cli_version() - # docker login \ - # -u AWS \ - # -p eyJ... \ - # -e none \ - # https://*****.dkr.ecr.us-east-1.amazonaws.com - # TODO(gp): Move this to var in repo_config.py. - # TODO(gp): Hack - profile = "ck" - region = hs3.AWS_EUROPE_REGION_1 - cmd = "" - if major_version == 1: - cmd = f"eval $(aws ecr get-login --profile {profile} --no-include-email --region {region})" - elif major_version == 2: - if profile == "ck": - env_var = "CSFY_ECR_BASE_PATH" - else: - env_var = f"{profile.upper()}_ECR_BASE_PATH" - ecr_base_path = hlitauti.get_default_param(env_var) - # TODO(Nikola): Remove `_get_aws_cli_version()` and use only `aws ecr get-login-password` - # as it is present in both versions of `awscli`. - cmd = ( - "docker login -u AWS -p " - f"$(aws ecr get-login-password --profile {profile}) " - f"https://{ecr_base_path}" - ) - else: - NotImplementedError( - f"Docker login for awscli v{major_version} is not implemented!" - ) - # TODO(Grisha): fix properly. We pass `ctx` despite the fact that we do not - # need it with `use_system=True`, but w/o `ctx` invoke tasks (i.e. ones - # with `@task` decorator) do not work. - hsystem.system(cmd, suppress_output=False) - - -@task -def docker_login(ctx, target_registry="aws_ecr.ck"): # type: ignore - """ - Log in the target registry and skip if we are in kaizenflow. - - :param ctx: invoke context - :param target_registry: target Docker image registry to log in to - - "dockerhub.causify": public Causify Docker image registry - - "aws_ecr.ck": private AWS CK ECR - """ - _ = ctx - hlitauti.report_task() - # No login required as the `helpers` and `tutorials` images are accessible - # on the public DockerHub registry. - if not hserver.is_dev_csfy() and hrecouti.get_repo_config().get_name() in [ - "//helpers", - "//tutorials", - ]: - _LOG.warning("Skipping Docker login process for Helpers or Tutorials") - return - # We run everything using `hsystem.system(...)` but `ctx` is needed - # to make the function work as an invoke target. - if target_registry == "aws_ecr.ck": - _docker_login_ecr() - elif target_registry == "dockerhub.causify": - _docker_login_dockerhub() - else: - raise ValueError(f"Invalid Docker image registry='{target_registry}'") - - -@task -def docker_images_ls_repo(ctx, sudo=False): # type: ignore - """ - List images in the logged in repo_short_name. - """ - hlitauti.report_task() - docker_login(ctx) - # TODO(gp): Move this to a var ECR_BASE_PATH="CSFY_ECR_BASE_PATH" in repo_config.py. - ecr_base_path = hlitauti.get_default_param("CSFY_ECR_BASE_PATH") - docker_exec = _get_docker_exec(sudo) - hlitauti.run(ctx, f"{docker_exec} image ls {ecr_base_path}") - - -# //////////////////////////////////////////////////////////////////////////////// -# Version. -# //////////////////////////////////////////////////////////////////////////////// - - -_IMAGE_VERSION_RE = r"\d+\.\d+\.\d+" - - -def _dassert_is_version_valid(version: str) -> None: - """ - Check that the version is valid, i.e. looks like `1.0.0`. - """ - hdbg.dassert_isinstance(version, str) - hdbg.dassert_ne(version, "") - regex = rf"^({_IMAGE_VERSION_RE})$" - _LOG.debug("Testing with regex='%s'", regex) - m = re.match(regex, version) - hdbg.dassert(m, "Invalid version: '%s'", version) - - -# //////////////////////////////////////////////////////////////////////////////// -# Image. -# //////////////////////////////////////////////////////////////////////////////// - - -# This pattern aims to match the full image name including -# both registry and image path. -# Examples of valid matches include: -# - '623860924167.dkr.ecr.eu-north-1.amazonaws.com/cmamp' -# - 'ghcr.io/cryptokaizen/cmamp' -# This change is introduced to match the GHCR registry path, -# since it already includes `/` in the registry name itself. -_FULL_IMAGE_NAME_RE = r"([a-z0-9]+(-[a-z0-9]+)*\.)*[a-z]{2,}(\/[a-z0-9_-]+){1,2}" -_IMAGE_USER_RE = r"[a-z0-9_-]+" -# For candidate prod images which have added hash for easy identification. -_IMAGE_HASH_RE = r"[a-z0-9]{9}" -_IMAGE_STAGE_RE = rf"(local(?:-{_IMAGE_USER_RE})?|dev|prod|prod(?:-{_IMAGE_USER_RE})(?:-{_IMAGE_HASH_RE})?|prod(?:-{_IMAGE_HASH_RE})?)" - - -# TODO(Grisha): call `_dassert_is_base_image_name_valid()` and a separate -# function that validates an image tag. -def dassert_is_image_name_valid(image: str) -> None: - """ - Check whether an image name is valid. - - Invariants: - - Local images contain a username and a version - - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0` - - `dev` and `prod` images have an instance with a version and one without - to indicate the latest - - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0` - and `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev` - - `prod` candidate image has an optional tag (e.g., a username) and - a 9 character hash identifier corresponding Git commit - - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-4rf74b83a` - - and `*****.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-saggese-4rf74b83a` - - An image should look like: - - *****.dkr.ecr.us-east-1.amazonaws.com/amp:dev - *****.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0 - *****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0 - ghcr.io/cryptokaizen/cmamp:dev - """ - regex = "".join( - [ - # E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/cmamp` - # or `sorrentum/cmamp` or ghcr.io/cryptokaizen/cmamp. - rf"^{_FULL_IMAGE_NAME_RE}", - # E.g., `:local-saggese`. - rf"(:{_IMAGE_STAGE_RE})?", - # E.g., `-1.0.0`. - rf"(-{_IMAGE_VERSION_RE})?$", - ] - ) - _LOG.debug("Testing with regex='%s'", regex) - m = re.match(regex, image) - hdbg.dassert(m, "Invalid image: '%s'", image) - - -def _dassert_is_base_image_name_valid(base_image: str) -> None: - """ - Check that the base image is valid, i.e. looks like below. - - *****.dkr.ecr.us-east-1.amazonaws.com/amp ghcr.io/cryptokaizen/cmamp - """ - regex = rf"^{_FULL_IMAGE_NAME_RE}$" - _LOG.debug("regex=%s", regex) - m = re.match(regex, base_image) - hdbg.dassert(m, "Invalid base_image: '%s'", base_image) - - -# TODO(Grisha): instead of using `base_image` which is Docker registry address -# + image name, use those as separate parameters. See CmTask5074. -def _get_base_image(base_image: str) -> str: - """ - :return: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - """ - if base_image == "": - # TODO(gp): Use os.path.join. - base_image = ( - hlitauti.get_default_param("CSFY_ECR_BASE_PATH") - + "/" - + hlitauti.get_default_param("BASE_IMAGE") - ) - _dassert_is_base_image_name_valid(base_image) - return base_image - - -# This code path through Git tag was discontinued with CmTask746. -# def get_git_tag( -# version: str, -# ) -> str: -# """ -# Return the tag to be used in Git that consists of an image name and -# version. -# :param version: e.g., `1.0.0`. If None, the latest version is used -# :return: e.g., `amp-1.0.0` -# """ -# hdbg.dassert_is_not(version, None) -# _dassert_is_version_valid(version) -# base_image = hlibtaskut.get_default_param("BASE_IMAGE") -# tag_name = f"{base_image}-{version}" -# return tag_name - - -# TODO(gp): Consider using a token "latest" in version, so that it's always a -# string and we avoid a special behavior encoded in None. -def get_image( - base_image: str, - stage: str, - version: Optional[str], -) -> str: - """ - Return the fully qualified image name. - - For local stage, it also appends the username to the image name. - - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param stage: e.g., `local`, `dev`, `prod` - :param version: e.g., `1.0.0`, if None empty, the latest version is used - :return: e.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local` or - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:local-1.0.0` - """ - # Docker refers the default image as "latest", although in our stage - # nomenclature we call it "dev". - hdbg.dassert_in(stage, "local dev prod".split()) - # Get the base image. - base_image = _get_base_image(base_image) - _dassert_is_base_image_name_valid(base_image) - # Get the full image name. - image = [base_image] - # Handle the stage. - image.append(f":{stage}") - if stage == "local": - user = hsystem.get_user_name() - image.append(f"-{user}") - # Handle the version. - if version is not None and version != "": - _dassert_is_version_valid(version) - image.append(f"-{version}") - # - image = "".join(image) - dassert_is_image_name_valid(image) - return image - - -@task -def docker_remove_image(ctx, base_image="") -> None: # type: ignore - """ - Delete the current dev image to free up disk space. - - :param base_image: base name of the image (e.g., `*****.dkr.ecr.us- - east-1.amazonaws.com/amp`) - """ - # Display disk space before cleanup. - _LOG.info("Disk space before cleanup:") - hsystem.system("df -h", suppress_output=False) - # Handle the image. - stage = "dev" - version = "" - image = get_image(base_image, stage, version) - _LOG.info("Deleting Docker image: %s", image) - # Get Docker executable configuration. - use_sudo = hdocker.get_use_sudo() - docker_exec = hdocker.get_docker_executable(use_sudo) - # Delete the specific image. - cmd = f"{docker_exec} rmi -f {image}" - _LOG.info("Running: %s", cmd) - try: - result = hsystem.system(cmd, abort_on_error=False, suppress_output=False) - if result != 0: - _LOG.warning( - "Docker image deletion failed with exit code %s for image: %s", - result, - image, - ) - else: - _LOG.info("Successfully deleted Docker image: %s", image) - except Exception as e: - _LOG.error("Error during Docker image deletion: %s", e) - # Display disk space after cleanup. - _LOG.info("Disk space after cleanup:") - hsystem.system("df -h", suppress_output=False) - - -@task -def docker_ps(ctx, sudo=False): # type: ignore - # pylint: disable=line-too-long - """ - List all the running containers. - - ``` - > docker_ps - CONTAINER ID user IMAGE COMMAND CREATED STATUS PORTS service - 2ece37303ec9 gp *****....:latest "./docker_build/entry.sh" 5 seconds ago Up 4 seconds user_space - ``` - """ - hlitauti.report_task() - # pylint: enable=line-too-long - fmt = ( - r"""table {{.ID}}\t{{.Label "user"}}\t{{.Image}}\t{{.Command}}""" - + r"\t{{.RunningFor}}\t{{.Status}}\t{{.Ports}}" - + r'\t{{.Label "com.docker.compose.service"}}' - ) - docker_exec = _get_docker_exec(sudo) - cmd = f"{docker_exec} ps --format='{fmt}'" - cmd = hlitauti._to_single_line_cmd(cmd) - hlitauti.run(ctx, cmd) - - -def _get_last_container_id(sudo: bool) -> str: - docker_exec = _get_docker_exec(sudo) - # Get the last started container. - cmd = f"{docker_exec} ps -l | grep -v 'CONTAINER ID'" - # CONTAINER ID IMAGE COMMAND CREATED - # 90897241b31a eeb33fe1880a "/bin/sh -c '/bin/bash ... - _, txt = hsystem.system_to_one_line(cmd) - # Parse the output: there should be at least one line. - hdbg.dassert_lte(1, len(txt.split(" ")), "Invalid output='%s'", txt) - container_id: str = txt.split(" ")[0] - return container_id - - -@task -def docker_stats( # type: ignore - ctx, - all=False, # pylint: disable=redefined-builtin - sudo=False, -): - # pylint: disable=line-too-long - """ - Report last started Docker container stats, e.g., CPU, RAM. - - ``` - > docker_stats - CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS - 2ece37303ec9 ..._user_space_run_30 0.00% 15.74MiB / 31.07GiB 0.05% 351kB / 6.27kB 34.2MB / 12.3kB 4 - ``` - - :param all: report stats for all the containers - """ - # pylint: enable=line-too-long - hlitauti.report_task(txt=hprint.to_str("all")) - _ = ctx - fmt = ( - r"table {{.ID}}\t{{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" - + r"\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}\t{{.PIDs}}" - ) - docker_exec = _get_docker_exec(sudo) - cmd = f"{docker_exec} stats --no-stream --format='{fmt}'" - _, txt = hsystem.system_to_string(cmd) - if all: - output = txt - else: - # Get the id of the last started container. - container_id = _get_last_container_id(sudo) - print(f"Last container id={container_id}") - # Parse the output looking for the given container. - txt = txt.split("\n") - output = [] - # Save the header. - output.append(txt[0]) - for line in txt[1:]: - if line.startswith(container_id): - output.append(line) - # There should be at most two rows: the header and the one corresponding to - # the container. - hdbg.dassert_lte( - len(output), 2, "Invalid output='%s' for '%s'", output, txt - ) - output = "\n".join(output) - print(output) - - -@task -def docker_kill( # type: ignore - ctx, - all=False, # pylint: disable=redefined-builtin - sudo=False, -): - """ - Kill the last Docker container started. - - :param all: kill all the containers (be careful!) - :param sudo: use sudo for the Docker commands - """ - hlitauti.report_task(txt=hprint.to_str("all")) - docker_exec = _get_docker_exec(sudo) - # Last container. - opts = "-l" - if all: - _LOG.warning("Killing all the containers") - # TODO(gp): Ask if we are sure and add a --just-do-it option. - opts = "-a" - # Print the containers that will be terminated. - cmd = f"{docker_exec} ps {opts}" - hlitauti.run(ctx, cmd) - # Kill. - cmd = f"{docker_exec} rm -f $({docker_exec} ps {opts} -q)" - hlitauti.run(ctx, cmd) - - -# docker system prune -# docker container ps -f "status=exited" -# docker container rm $(docker container ps -f "status=exited" -q) -# docker rmi $(docker images --filter="dangling=true" -q) - -# pylint: disable=line-too-long -# Remove the images with hash -# > docker image ls -# REPOSITORY TAG IMAGE ID CREATED SIZE -# *****.dkr.ecr.us-east-2.amazonaws.com/im 07aea615a2aa9290f7362e99e1cc908876700821 d0889bf972bf 6 minutes ago 684MB -# *****.dkr.ecr.us-east-2.amazonaws.com/im rc d0889bf972bf 6 minutes ago 684MB -# python 3.7-slim-buster e7d86653f62f 14 hours ago 113MB -# *****.dkr.ecr.us-east-1.amazonaws.com/amp 415376d58001e804e840bf3907293736ad62b232 e6ea837ab97f 18 hours ago 1.65GB -# *****.dkr.ecr.us-east-1.amazonaws.com/amp dev e6ea837ab97f 18 hours ago 1.65GB -# *****.dkr.ecr.us-east-1.amazonaws.com/amp local e6ea837ab97f 18 hours ago 1.65GB -# *****.dkr.ecr.us-east-1.amazonaws.com/amp 9586cc2de70a4075b9fdcdb900476f8a0f324e3e c75d2447da79 18 hours ago 1.65GB -# pylint: enable=line-too-long - - -# ############################################################################# -# Docker development. -# ############################################################################# - -# TODO(gp): We might want to organize the code in a base class using a Command -# pattern, so that it's easier to generalize the code for multiple repos. -# -# class DockerCommand: -# def pull(): -# ... -# def cmd(): -# ... -# -# For now we pass the customizable part through the default params. - - -# //////////////////////////////////////////////////////////////////////////// -# Docker pull. -# //////////////////////////////////////////////////////////////////////////// - - -def _docker_pull( - ctx: Any, base_image: str, stage: str, version: Optional[str] -) -> None: - """ - Pull images from the registry. - """ - docker_login(ctx) - # - image = get_image(base_image, stage, version) - _LOG.info("image='%s'", image) - dassert_is_image_name_valid(image) - cmd = f"docker pull {image}" - hlitauti.run(ctx, cmd, pty=True) - - -@task -def docker_pull(ctx, stage="dev", version=None, skip_pull=False): # type: ignore - """ - Pull latest dev image corresponding to the current repo from the registry. - - :param skip_pull: if True skip pulling the docker image - """ - hlitauti.report_task() - if stage == "local": - _LOG.warning("Setting skip_pull to True for local stage") - skip_pull = True - if skip_pull: - _LOG.warning("Skipping pulling docker image as per user request") - return - # - base_image = "" - _docker_pull(ctx, base_image, stage, version) - - -@task -def docker_pull_helpers(ctx, stage="prod", version=None): # type: ignore - """ - Pull latest prod image of `helpers` from the registry. - - :param ctx: invoke context - :param stage: stage of the Docker image - :param version: version of the Docker image - """ - base_image = hlitauti.get_default_param("CSFY_ECR_BASE_PATH") + "/helpers" - _LOG.debug("base_image=%s", base_image) - _docker_pull(ctx, base_image, stage, version) - - -# //////////////////////////////////////////////////////////////////////////////// -# Compose files. -# //////////////////////////////////////////////////////////////////////////////// - -# TODO(gp): All this code can become `DockerComposeFileGenerator`. - -# There are several combinations to consider: -# - whether the Docker host can run with / without privileged mode -# - amp as submodule / as supermodule -# - different supermodules for amp - -# TODO(gp): use_privileged_mode -> use_docker_privileged_mode -# use_sibling_container -> use_docker_containers_containers - -DockerComposeServiceSpec = Dict[str, Union[str, List[str]]] - - -def _get_linter_service(stage: str) -> DockerComposeServiceSpec: - """ - Get the linter service specification for the `tmp.docker-compose.yml` file. - - :return: linter service specification - """ - superproject_path, submodule_path = hgit.get_path_from_supermodule() - if superproject_path: - # We are running in a Git submodule. - work_dir = f"/src/{submodule_path}" - repo_root = superproject_path - else: - work_dir = "/src" - repo_root = os.getcwd() - # TODO(gp): To avoid linter getting confused between `Sequence[str]` and - # `List[str]`, we should assign one element at the time. - linter_service_spec = { - "extends": "base_app", - "volumes": [ - f"{repo_root}:/src", - ], - "working_dir": work_dir, - "environment": [ - "MYPYPATH", - ], - } - if stage != "prod": - # When we run a development Linter container, we need to mount the - # Linter repo under `/app`. For prod container instead we copy / freeze - # the repo code in `/app`, so we should not mount it. - volumes = cast(List[str], linter_service_spec["volumes"]) - if superproject_path: - # When running in a Git submodule we need to go one extra level up. - # TODO(*): Clean up the indentation, #2242 (also below). - volumes.append("../../../:/app") - else: - volumes.append("../../:/app") - if stage == "prod": - # Use the `repo_config.py` inside the helpers container instead of - # the one in the calling repo. - environment = cast(List[str], linter_service_spec["environment"]) - environment.append("CSFY_REPO_CONFIG_PATH=/app/repo_config.py") - return linter_service_spec - - -# TODO(gp): Remove mount_as_submodule -def _generate_docker_compose_file( - stage: str, - use_privileged_mode: bool, - use_sibling_container: bool, - shared_data_dirs: Optional[Dict[str, str]], - mount_as_submodule: bool, - use_network_mode_host: bool, - use_main_network: bool, - file_name: Optional[str], -) -> str: - """ - Generate `tmp.docker-compose.yml` file and save it. - - :param shared_data_dirs: data directory in the host filesystem to mount - inside the container. `None` means no dir sharing - :param use_main_network: use `main_network` as default network - """ - _LOG.debug( - hprint.to_str( - "use_privileged_mode " - "use_sibling_container " - "shared_data_dirs " - "mount_as_submodule " - "use_network_mode_host " - "use_main_network " - "file_name " - ) - ) - # We could pass the env var directly, like: - # ``` - # - CSFY_ENABLE_DIND=$CSFY_ENABLE_DIND - # ``` - # but we prefer to inline it. - if use_privileged_mode: - CSFY_ENABLE_DIND = 1 - else: - CSFY_ENABLE_DIND = 0 - # ``` - # sysname='Linux' - # nodename='cf-spm-dev4' - # release='3.10.0-1160.53.1.el7.x86_64' - # version='#1 SMP Fri Jan 14 13:59:45 UTC 2022' - # machine='x86_64' - # ``` - csfy_host_os_name = os.uname()[0] - csfy_host_name = os.uname()[1] - csfy_host_os_version = os.uname()[2] - csfy_host_user_name = getpass.getuser() - # We assume that we don't use this code inside a container, since otherwise - # we would need to distinguish the container style (see - # docs/work_tools/docker/all.dockerized_flow.explanation.md) to find the - # outermost Git root. - if not hserver.is_inside_unit_test(): - hdbg.dassert(not hserver.is_inside_docker()) - else: - # We call this function as part of the unit tests, which we run insider - # the container. - pass - git_host_root_path = hgit.find_git_root() - # Find git root path in the container. - # The Git root is always mounted in the container at `/app`. So we need to - # use that as starting point. - # E.g. For CSFY_GIT_ROOT_PATH, we need to use `/app`, rather than - # `/data/dummy/src/cmamp1`. - # E.g. For CSFY_HELPERS_ROOT_PATH, we need to use `/app/helpers_root`. - # rather than `/data/dummy/src/cmamp1/helpers_root`. - git_root_path = "/app" - # Find helpers root path in the container. - helper_dir = hgit.find_helpers_root() - helper_relative_path = os.path.relpath(helper_dir, git_host_root_path) - helper_root_path = os.path.normpath( - os.path.join(git_root_path, helper_relative_path) - ) - # A super repo is a repo that contains helpers as a submodule and - # is not a helper itself. - use_helpers_as_nested_module = ( - 0 if hgit.is_in_helpers_as_supermodule() else 1 - ) - # We could do the same also with IMAGE for symmetry. - # Keep the env vars in sync with what we print in `henv.get_env_vars()`. - # Configure `base_app` service. - # TODO(gp): Use henv.get_env_vars() to get the env vars. - environment = [ - f"CSFY_ENABLE_DIND={CSFY_ENABLE_DIND}", - "CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL", - f"CSFY_HOST_NAME={csfy_host_name}", - f"CSFY_HOST_OS_NAME={csfy_host_os_name}", - f"CSFY_HOST_OS_VERSION={csfy_host_os_version}", - f"CSFY_HOST_USER_NAME={csfy_host_user_name}", - "CSFY_REPO_CONFIG_CHECK=True", - # Use inferred path for `repo_config.py`. - "CSFY_REPO_CONFIG_PATH=", - "CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID", - "CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION", - "CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE", - "CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET", - "CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY", - "CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN", - "CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH", - # The path of the outermost Git root on the host. - f"CSFY_HOST_GIT_ROOT_PATH={git_host_root_path}", - # The path of the outermost Git root in the Docker container. - f"CSFY_GIT_ROOT_PATH={git_root_path}", - # The path of the helpers dir in the Docker container (e.g., - # `/app`, `/app/helpers_root`) - f"CSFY_HELPERS_ROOT_PATH={helper_root_path}", - f"CSFY_USE_HELPERS_AS_NESTED_MODULE={use_helpers_as_nested_module}", - "CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN", - # This env var is used by GH Action to signal that we are inside the - # CI. It's set up by default by the GH Action runner. See: - # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables - "CSFY_CI=$CSFY_CI", - # TODO(Vlad): consider removing, locally we use our personal tokens - # from files and inside GitHub actions we use the `GH_TOKEN` - # environment variable. - ] - environment.extend( - [ - "GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN", - # Inside GitHub Actions we use `GH_TOKEN` environment variable, - # see https://cli.github.com/manual/gh_auth_login. - "GH_TOKEN=$GH_ACTION_ACCESS_TOKEN", - ] - ) - api_key_env_vars = henv.get_api_key_env_vars() - environment.extend([f"{env_var}=${env_var}" for env_var in api_key_env_vars]) - # - base_app_spec = { - "cap_add": ["SYS_ADMIN"], - "environment": environment, - "image": "${IMAGE}", - "restart": "no", - "volumes": [ - # TODO(gp): We should pass the value of $HOME from dev.Dockerfile to here. - # E.g., we might define $HOME in the env file. - "~/.aws:/home/.aws", - "~/.config/gspread_pandas/:/home/.config/gspread_pandas/", - "~/.config/gh:/home/.config/gh", - "~/.ssh:/home/.ssh", - ], - } - if use_privileged_mode: - # This is needed: - # - for Docker-in-docker (dind) - # - to mount fstabs - base_app_spec["privileged"] = use_privileged_mode - if shared_data_dirs: - # Mount shared dirs. - shared_volumes = [ - f"{host}:{container}" for host, container in shared_data_dirs.items() - ] - # Mount all dirs that are specified. - base_app_spec["volumes"].extend(shared_volumes) - if False: - # No need to mount file systems. - base_app_spec["volumes"].append("../docker_build/fstab:/etc/fstab") - if use_sibling_container: - # Use sibling-container approach. - base_app_spec["volumes"].append( - "/var/run/docker.sock:/var/run/docker.sock" - ) - if False: - base_app_spec["deploy"] = { - "resources": { - "limits": { - # This should be passed from command line depending on how much - # memory is available. - "memory": "60G", - }, - }, - } - if use_network_mode_host: - # Default network mode set to host so we can reach e.g. - # a database container pointing to localhost:5432. - # In tests we use dind so we need set back to the default "bridge". - # See CmTask988 and https://stackoverflow.com/questions/24319662 - base_app_spec["network_mode"] = "${NETWORK_MODE:-host}" - # Configure `app` service. - # Mount `amp` when it is used as submodule. In this case we need to - # mount the super project in the container (to make git work with the - # supermodule) and then change dir to `amp`. - app_spec = { - "extends": "base_app", - } - # Use absolute path of the dir to mount the volume and set working dir. - # The `app_dir` dir points to the root of the repo. - # The `working_dir` points to the path of the runnable dir. - # - If the runnable dir is the root of the repo, then `working_dir` is `/app`. - # - If the runnable dir is a subdirectory of the repo, then `working_dir` is `/app/subdir`. - curr_dir = os.getcwd() - rel_dir1 = os.path.relpath(curr_dir, git_host_root_path) - rel_dir2 = os.path.relpath(git_host_root_path, curr_dir) - app_dir = os.path.abspath(os.path.join(curr_dir, rel_dir2)) - working_dir = os.path.normpath(os.path.join("/app", rel_dir1)) - app_spec["volumes"] = [f"{app_dir}:/app"] - app_spec["working_dir"] = working_dir - # Configure `linter` service. - linter_spec = _get_linter_service(stage) - # Configure `jupyter_server` service. - # For Jupyter server we cannot use "host" network_mode because - # it is incompatible with the port bindings. - jupyter_server = { - "command": "devops/docker_run/run_jupyter_server.sh", - "environment": [ - "PORT=${PORT}", - ], - "extends": "app", - "network_mode": "${NETWORK_MODE:-bridge}", - # TODO(gp): Rename `AM_PORT`. - "ports": [ - "${PORT}:${PORT}", - ], - } - # Configure `jupyter_server_test` service. - # TODO(gp): For some reason the following doesn't work. - # jupyter_server_test: - # command: jupyter notebook -h 2>&1 >/dev/null - # extends: - # jupyter_server - jupyter_server_test = { - "command": "jupyter notebook -h 2>&1 >/dev/null", - "environment": [ - "PORT=${PORT}", - ], - "extends": "app", - "network_mode": "${NETWORK_MODE:-bridge}", - "ports": [ - "${PORT}:${PORT}", - ], - } - # Specify structure of the docker-compose file. - docker_compose = { - "version": "3", - "services": { - "base_app": base_app_spec, - "app": app_spec, - "linter": linter_spec, - "jupyter_server": jupyter_server, - "jupyter_server_test": jupyter_server_test, - }, - } - # Configure networks. - if use_main_network: - docker_compose["networks"] = {"default": {"name": "main_network"}} - - class _Dumper(yaml.Dumper): - """ - A custom YAML Dumper class that adjusts indentation. - """ - - def increase_indent(self_: Any, flow=False, indentless=False) -> Any: - """ - Override the method to modify YAML indentation behavior. - """ - return super().increase_indent(flow=False, indentless=False) - - # Convert the dictionary to YAML format. - yaml_str = yaml.dump( - docker_compose, - Dumper=_Dumper, - default_flow_style=False, - indent=2, - sort_keys=False, - ) - yaml_str = cast(str, yaml_str) - # Save YAML to file if file_name is specified. - if file_name: - if os.path.exists(file_name) and hserver.is_inside_ci(): - # Permission error is raised if we try to overwrite existing file. - # See CmTask #2321 for detailed info. - compose_directory = os.path.dirname(file_name) - hsystem.system(f"sudo rm -rf {compose_directory}") - hio.to_file(file_name, yaml_str) - return yaml_str - - -def get_base_docker_compose_path() -> str: - """ - Return the absolute path to the Docker compose file. - - E.g., `devops/compose/tmp.docker-compose.yml`. - """ - # Add the default path. - dir_name = "devops/compose" - # TODO(gp): Factor out the piece below. - docker_compose_path = "tmp.docker-compose.yml" - docker_compose_path = os.path.join(dir_name, docker_compose_path) - docker_compose_path = os.path.abspath(docker_compose_path) - return docker_compose_path - - -def _get_docker_compose_files( - stage: str, - generate_docker_compose_file: bool, - service_name: str, - extra_docker_compose_files: Optional[List[str]], -) -> List[str]: - """ - Generate the Docker compose file and return the list of Docker compose - paths. - - :return: list of the Docker compose paths - """ - docker_compose_files = [] - # Get the repo short name (e.g., `amp`). - repo_short_name = hrecouti.get_repo_config().get_repo_short_name() - _LOG.debug("repo_short_name=%s", repo_short_name) - # Check submodule status, if needed. - mount_as_submodule = False - if repo_short_name in ("amp", "cmamp"): - # Check if `amp` is a submodule. - path, _ = hgit.get_path_from_supermodule() - if path != "": - _LOG.warning("amp is a submodule") - mount_as_submodule = True - # Write Docker compose file. - file_name = get_base_docker_compose_path() - if service_name == "linter": - # Since we are running the prod `helpers` container we need to use the - # settings from the `repo_config` from that container, and not the settings - # launch the container corresponding to this repo. - enable_privileged_mode = False - use_docker_sibling_containers = False - get_shared_data_dirs = None - use_docker_network_mode_host = False - use_main_network = False - else: - # Use the settings from the `repo_config` corresponding to this container. - enable_privileged_mode = hserver.enable_privileged_mode() - use_docker_sibling_containers = hserver.use_docker_sibling_containers() - get_shared_data_dirs = hserver.get_shared_data_dirs() - use_docker_network_mode_host = hserver.use_docker_network_mode_host() - use_main_network = hserver.use_main_network() - # - if generate_docker_compose_file: - _generate_docker_compose_file( - stage, - enable_privileged_mode, - use_docker_sibling_containers, - get_shared_data_dirs, - mount_as_submodule, - use_docker_network_mode_host, - use_main_network, - file_name, - ) - else: - _LOG.warning("Skipping generating Docker compose file '%s'", file_name) - docker_compose_files.append(file_name) - # Add the compose files from command line. - if extra_docker_compose_files: - hdbg.dassert_isinstance(extra_docker_compose_files, list) - docker_compose_files.extend(extra_docker_compose_files) - # Add the compose files from the global params. - key = "DOCKER_COMPOSE_FILES" - if hlitauti.has_default_param(key): - docker_compose_files.append(hlitauti.get_default_param(key)) - # - _LOG.debug(hprint.to_str("docker_compose_files")) - for docker_compose in docker_compose_files: - hdbg.dassert_path_exists(docker_compose) - return docker_compose_files - - -_IMAGE_VERSION_FROM_CHANGELOG = "FROM_CHANGELOG" - - -def resolve_version_value( - version: str, - *, - container_dir_name: str = ".", -) -> str: - """ - Pass a version (e.g., 1.0.0) or a symbolic value (e.g., FROM_CHANGELOG) and - return the resolved value of the version. - - :return: full version with patch for prod (e.g., 1.3.2) - """ - hdbg.dassert_isinstance(version, str) - if version == _IMAGE_VERSION_FROM_CHANGELOG: - version = hversio.get_changelog_version(container_dir_name) - _dassert_is_version_valid(version) - prod_version = version - return prod_version - - -def to_dev_version(prod_version: str) -> str: - """ - Pass a prod version (e.g., 1.1.1) and strip the patch value. - - :return: stripped version without patch for dev (e.g., 1.1.0) - """ - hdbg.dassert_isinstance(prod_version, str) - _dassert_is_version_valid(prod_version) - # Strip patch value from the version. - dev_version = prod_version.split(".")[:-1] - dev_version = ".".join(dev_version) + ".0" - return dev_version - - -def dassert_is_subsequent_version( - version: str, - *, - container_dir_name: str = ".", -) -> None: - """ - Check that `version` is bigger than the current one as specified in the - changelog. - """ - if version != _IMAGE_VERSION_FROM_CHANGELOG: - current_version = hversio.get_changelog_version(container_dir_name) - hdbg.dassert_lte(current_version, version) - - -# //////////////////////////////////////////////////////////////////////////////// -# Misc. -# //////////////////////////////////////////////////////////////////////////////// - - -def _run_docker_as_user(as_user_from_cmd_line: bool) -> bool: - as_root = hserver.run_docker_as_root() - as_user = as_user_from_cmd_line - if as_root: - as_user = False - _LOG.debug( - "as_user_from_cmd_line=%s as_root=%s -> as_user=%s", - as_user_from_cmd_line, - as_root, - as_user, - ) - return as_user - - -def _get_container_name(service_name: str) -> str: - """ - Create a container name based on various information. - - E.g., `grisha.cmamp.app.cmamp1.20220317_232120` - - The information used to build a container is: - - Linux username - - Base Docker image name - - Service name - - Project directory that was used to start a container - - Container start timestamp - - :param service_name: `docker-compose` service name, e.g., `app` - :return: container name - """ - hdbg.dassert_ne(service_name, "", "You need to specify a service name") - # Get linux username. - linux_user = hsystem.get_user_name() - # Get dir name. - project_dir = hgit.get_project_dirname() - # Get Docker image base name. - image_name = hlitauti.get_default_param("BASE_IMAGE") - # Get current timestamp. - current_timestamp = hlitauti.get_ET_timestamp() - # Build container name. - container_name = f"{linux_user}.{image_name}.{service_name}.{project_dir}.{current_timestamp}" - _LOG.debug( - "get_container_name: container_name=%s", - container_name, - ) - return container_name - - -def _get_docker_base_cmd( - base_image: str, - stage: str, - version: str, - service_name: str, - # Params from `_get_docker_compose_cmd()`. - generate_docker_compose_file: bool, - extra_env_vars: Optional[List[str]], - extra_docker_compose_files: Optional[List[str]], - skip_docker_image_compatibility_check: bool, -) -> List[str]: - r""" - Get base `docker-compose` command encoded as a list of strings. - - It can be used as a base to build more complex commands, e.g., `run`, `up`, - `down`. - - E.g., - ``` - ['IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev', - '\n docker-compose', - '\n --file amp/devops/compose/tmp.docker-compose.yml', - '\n --file amp/devops/compose/tmp.docker-compose_as_submodule.yml', - '\n --env-file devops/env/default.env'] - ``` - :param generate_docker_compose_file: whether to generate or reuse the existing - Docker compose file - :param extra_env_vars: represent vars to add, e.g., `["PORT=9999", "DRY_RUN=1"]` - :param extra_docker_compose_files: `docker-compose` override files - :param skip_docker_image_compatibility_check: if True, skip checking image - architecture compatibility - """ - _LOG.debug(hprint.func_signature_to_str()) - docker_cmd_: List[str] = [] - # - Handle the image. - image = get_image(base_image, stage, version) - _LOG.debug("base_image=%s stage=%s -> image=%s", base_image, stage, image) - dassert_is_image_name_valid(image) - # The check is mainly for developers to avoid using the wrong image (e.g., - # an x86 vs ARM architecture). - # We can skip the image compatibility check during the CI or when - # explicitly skipped. - if not (hserver.is_inside_ci() or skip_docker_image_compatibility_check): - hdocker.check_image_compatibility_with_current_arch(image) - else: - _LOG.warning("Skipping docker image compatibility check") - docker_cmd_.append(f"IMAGE={image}") - # - Handle extra env vars. - if extra_env_vars: - hdbg.dassert_isinstance(extra_env_vars, list) - for env_var in extra_env_vars: - docker_cmd_.append(f"{env_var}") - # - docker_cmd_.append(r""" - docker compose""") - docker_compose_files = _get_docker_compose_files( - stage, - generate_docker_compose_file, - service_name, - extra_docker_compose_files, - ) - file_opts = " ".join([f"--file {dcf}" for dcf in docker_compose_files]) - _LOG.debug(hprint.to_str("file_opts")) - # TODO(gp): Use something like `.append(rf"{space}{...}")` - docker_cmd_.append(rf""" - {file_opts}""") - # - Handle the env file. - env_file = "devops/env/default.env" - docker_cmd_.append(rf""" - --env-file {env_file}""") - return docker_cmd_ - - -def _get_docker_compose_cmd( - base_image: str, - stage: str, - version: str, - cmd: str, - *, - # TODO(gp): make these params mandatory. - extra_env_vars: Optional[List[str]] = None, - extra_docker_compose_files: Optional[List[str]] = None, - extra_docker_run_opts: Optional[List[str]] = None, - service_name: str = "app", - use_entrypoint: bool = True, - generate_docker_compose_file: bool = True, - as_user: bool = True, - print_docker_config: bool = False, - use_bash: bool = False, - skip_docker_image_compatibility_check: bool = False, -) -> str: - """ - Get `docker-compose` run command. - - E.g., - ``` - IMAGE=*****..dkr.ecr.us-east-1.amazonaws.com/amp:dev \ - docker-compose \ - --file /amp/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name grisha.cmamp.app.cmamp1.20220317_232120 \ - --user $(id -u):$(id -g) \ - app \ - bash - ``` - :param cmd: command to run inside Docker container - :param extra_docker_run_opts: additional `docker-compose` run options - :param service_name: service to use to run a command - :param use_entrypoint: whether to use the `entrypoint.sh` or not - :param generate_docker_compose_file: generate the Docker compose file or not - :param as_user: pass the user / group id or not - :param print_docker_config: print the docker config for debugging purposes - :param use_bash: run command through a shell - :param skip_docker_image_compatibility_check: if True, skip checking image architecture compatibility - """ - _LOG.debug(hprint.func_signature_to_str()) - # - Get the base Docker command. - docker_cmd_ = _get_docker_base_cmd( - base_image, - stage, - version, - service_name, - generate_docker_compose_file, - extra_env_vars, - extra_docker_compose_files, - skip_docker_image_compatibility_check, - ) - # - Add the `config` command for debugging purposes. - docker_config_cmd: List[str] = docker_cmd_[:] - # TODO(gp): Use yaml approach like done for other parts of the code. - docker_config_cmd.append(r""" - config""") - # - Add the `run` command. - docker_cmd_.append(r""" - run \ - --rm""") - # - Add a name to the container. - container_name = _get_container_name(service_name) - docker_cmd_.append(rf""" - --name {container_name}""") - # - Handle the user. - as_user = _run_docker_as_user(as_user) - if as_user: - docker_cmd_.append(r""" - --user $(id -u):$(id -g)""") - # - Handle the extra docker options. - if extra_docker_run_opts: - hdbg.dassert_isinstance(extra_docker_run_opts, list) - extra_opts = " ".join(extra_docker_run_opts) - docker_cmd_.append(rf""" - {extra_opts}""") - # - Handle entrypoint. - if use_entrypoint: - docker_cmd_.append(rf""" - {service_name}""") - if cmd: - if use_bash: - cmd = f"bash -c '{cmd}'" - docker_cmd_.append(rf""" - {cmd}""") - else: - # No entrypoint. - docker_cmd_.append(rf""" - --entrypoint bash \ - {service_name}""") - # Print the config for debugging purpose. - if print_docker_config: - docker_config_cmd_as_str = hlitauti.to_multi_line_cmd(docker_config_cmd) - _LOG.debug("docker_config_cmd=\n%s", docker_config_cmd_as_str) - _LOG.debug( - "docker_config=\n%s", - hsystem.system_to_string(docker_config_cmd_as_str)[1], - ) - # Print the config for debugging purpose. - docker_cmd_: str = hlitauti.to_multi_line_cmd(docker_cmd_) - return docker_cmd_ - - -# //////////////////////////////////////////////////////////////////////////////// -# bash and cmd. -# //////////////////////////////////////////////////////////////////////////////// - - -def _docker_cmd( - ctx: Any, - docker_cmd_: str, - *, - skip_pull: bool = False, - **ctx_run_kwargs: Any, -) -> Optional[int]: - """ - Print and execute a Docker command. - - :param kwargs: kwargs for `ctx.run()` - """ - if hserver.is_inside_ci(): - import helpers.hs3 as hs3 - - # Generate files with the AWS settings that are missing when running - # inside CI. - hs3.generate_aws_files() - docker_pull(ctx, skip_pull=skip_pull) - _LOG.debug("cmd=%s", docker_cmd_) - rc: Optional[int] = hlitauti.run( - ctx, docker_cmd_, pty=True, **ctx_run_kwargs - ) - return rc - - -@task -def docker_bash( # type: ignore - ctx, - base_image="", - stage="dev", - version="", - use_entrypoint=True, - as_user=True, - generate_docker_compose_file=True, - container_dir_name=".", - skip_pull=False, - skip_docker_image_compatibility_check=False, -): - """ - Start a bash shell inside the container corresponding to a stage. - - :param use_entrypoint: whether to use the `entrypoint.sh` or not - :param as_user: pass the user / group id or not - :param generate_docker_compose_file: generate the Docker compose file or not - :param skip_pull: if True skip pulling the docker image - """ - _LOG.debug(hprint.func_signature_to_str("ctx")) - hlitauti.report_task(container_dir_name=container_dir_name) - # - cmd = "bash" - docker_cmd_ = _get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - generate_docker_compose_file=generate_docker_compose_file, - use_entrypoint=use_entrypoint, - as_user=as_user, - skip_docker_image_compatibility_check=skip_docker_image_compatibility_check, - ) - _LOG.debug("docker_cmd_=%s", docker_cmd_) - _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) - - -@task -def docker_cmd( # type: ignore - ctx, - base_image="", - stage="dev", - version="", - cmd="", - as_user=True, - generate_docker_compose_file=True, - use_bash=False, - container_dir_name=".", - skip_pull=False, -): - """ - Execute the command `cmd` inside a container corresponding to a stage. - - :param as_user: pass the user / group id or not - :param generate_docker_compose_file: generate or reuse the Docker - compose file - :param use_bash: run command through a shell - """ - hlitauti.report_task(container_dir_name=container_dir_name) - hdbg.dassert_ne(cmd, "") - # TODO(gp): Do we need to overwrite the entrypoint? - docker_cmd_ = _get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - generate_docker_compose_file=generate_docker_compose_file, - as_user=as_user, - use_bash=use_bash, - ) - _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) - - -# //////////////////////////////////////////////////////////////////////////////// -# Jupyter. -# //////////////////////////////////////////////////////////////////////////////// - - -def _get_docker_jupyter_cmd( - base_image: str, - stage: str, - version: str, - port: int, - self_test: bool, - *, - use_entrypoint: bool = True, - print_docker_config: bool = False, -) -> str: - cmd = "" - extra_env_vars = [f"PORT={port}"] - extra_docker_run_opts = ["--service-ports"] - service_name = "jupyter_server_test" if self_test else "jupyter_server" - # - docker_cmd_ = _get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - extra_env_vars=extra_env_vars, - extra_docker_run_opts=extra_docker_run_opts, - service_name=service_name, - use_entrypoint=use_entrypoint, - print_docker_config=print_docker_config, - ) - return docker_cmd_ - - -@task -def docker_jupyter( # type: ignore - ctx, - stage="dev", - version="", - base_image="", - auto_assign_port=True, - use_entrypoint=True, - port=None, - self_test=False, - container_dir_name=".", - skip_pull=False, -): - """ - Run Jupyter notebook server. - - :param auto_assign_port: use the UID of the user and the inferred - number of the repo (e.g., 4 for `~/src/amp4`) to get a unique - port - :param skip_pull: if True skip pulling the docker image - """ - hlitauti.report_task(container_dir_name=container_dir_name) - if port is None: - if auto_assign_port: - uid = os.getuid() - _LOG.debug("uid=%s", uid) - git_repo_idx = hgit.get_project_dirname(only_index=True) - git_repo_idx = int(git_repo_idx) - _LOG.debug("git_repo_idx=%s", git_repo_idx) - # We assume that there are no more than `max_idx_per_users` clients. - max_idx_per_user = 10 - hdbg.dassert_lte(git_repo_idx, max_idx_per_user) - port = (uid * max_idx_per_user) + git_repo_idx - else: - port = 9999 - _LOG.info("Assigned port is %s", port) - # - print_docker_config = False - docker_cmd_ = _get_docker_jupyter_cmd( - base_image, - stage, - version, - port, - self_test, - use_entrypoint=use_entrypoint, - print_docker_config=print_docker_config, - ) - _docker_cmd(ctx, docker_cmd_, skip_pull=skip_pull) - - -def _get_docker_dash_app_cmd( - base_image: str, - stage: str, - version: str, - port: int, - *, - print_docker_config: bool = False, -) -> str: - cmd = "" - extra_env_vars = [f"PORT={port}"] - extra_docker_run_opts = ["--service-ports"] - service_name = "dash_app" - # - docker_cmd_ = _get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - extra_env_vars=extra_env_vars, - extra_docker_run_opts=extra_docker_run_opts, - service_name=service_name, - print_docker_config=print_docker_config, - ) - return docker_cmd_ - - -@task -def docker_dash_app( # type: ignore - ctx, - stage="dev", - version="", - base_image="", - auto_assign_port=True, - port=None, - container_dir_name=".", -): - """ - Run dash app. - - :param auto_assign_port: use the UID of the user and the inferred - number of the repo (e.g., 4 for `~/src/amp4`) to get a unique - port - """ - hlitauti.report_task(container_dir_name=container_dir_name) - if port is None: - if auto_assign_port: - uid = os.getuid() - _LOG.debug("uid=%s", uid) - git_repo_idx = hgit.get_project_dirname(only_index=True) - git_repo_idx = int(git_repo_idx) - _LOG.debug("git_repo_idx=%s", git_repo_idx) - # We assume that there are no more than `max_idx_per_users` clients. - max_idx_per_user = 10 - hdbg.dassert_lte(git_repo_idx, max_idx_per_user) - port = (uid * max_idx_per_user) + git_repo_idx - else: - port = 9999 - # - _LOG.info("Assigned port is %s", port) - print_docker_config = False - docker_cmd_ = _get_docker_dash_app_cmd( - base_image, - stage, - version, - port, - print_docker_config=print_docker_config, - ) - _docker_cmd(ctx, docker_cmd_) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py deleted file mode 100644 index 4c2149f52..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_docker_release.py +++ /dev/null @@ -1,1890 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_docker_release as hltadore -""" - -import datetime -import logging -import os -from operator import attrgetter -from typing import Any, Optional - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hversion as hversio -import helpers.lib_tasks_aws as hlitaaws -import helpers.lib_tasks_docker as hlitadoc -import helpers.lib_tasks_gh as hlitagh -import helpers.lib_tasks_pytest as hlitapyt -import helpers.lib_tasks_utils as hlitauti -import helpers.repo_config_utils as hrecouti - -_DEFAULT_TARGET_REGISTRY = "aws_ecr.ck" -_LOG = logging.getLogger(__name__) -_AUTO_RELEASE_LABEL = "Automated release" - -# pylint: disable=protected-access - - -# ############################################################################# -# Docker image workflows. -# ############################################################################# - - -def _to_abs_path(filename: str) -> str: - filename = os.path.abspath(filename) - hdbg.dassert_path_exists(filename) - return filename - - -def _prepare_docker_ignore( - ctx: Any, - docker_ignore: str, - *, - copy_to_git_root: bool = True, -) -> None: - """ - Copy the target `docker_ignore` in the proper position for `docker build`. - - :param ctx: invoke context - :param docker_ignore: path to the `.dockerignore` file - :param copy_to_git_root: if True, copy the `.dockerignore` file to the - git root directory; otherwise, copy it to the current directory - """ - # Currently there is no built-in way to control which `.dockerignore` to - # use (https://stackoverflow.com/questions/40904409). - hdbg.dassert_path_exists(docker_ignore) - # Since all the runnable dirs copy the entire repo content, we use - # the Git root dir as a docker context so we need to copy the `.dockerignore` - # file to the Git root dir. - if copy_to_git_root: - dest_docker_ignore = os.path.join(hgit.find_git_root(), ".dockerignore") - else: - dest_docker_ignore = ".dockerignore" - cmd = f"cp -f {docker_ignore} {dest_docker_ignore}" - hlitauti.run(ctx, cmd) - - -def _get_dev_version(version: str, container_dir_name: str) -> str: - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - dev_version = hlitadoc.to_dev_version(prod_version) - _LOG.debug("prod_version=%s -> dev_version=%s", prod_version, dev_version) - return dev_version - - -def _create_multiarch_builder( - ctx: Any, -) -> None: - """ - Create a multi-arch builder for Docker buildx. - - :param ctx: invoke context - """ - # Create a multi-arch builder. - platform_builder_name = "multiarch_builder" - cmd = rf""" - docker buildx rm {platform_builder_name} - """ - # We do not abort on error since the platform builder might be present - # or not from previous executions. - hsystem.system(cmd, abort_on_error=False) - cmd = rf""" - docker buildx create \ - --name {platform_builder_name} \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use {platform_builder_name} - """ - hlitauti.run(ctx, cmd) - - -# ############################################################################# -# Local/Dev image flow -# ############################################################################# -# - A "local" image (which is a release candidate for the DEV image) is built -# with: -# ``` -# > i docker_build_local_image -# ``` -# - This creates a local image like `helpers:local.saggese-1.0.0` -# - A qualification process (e.g., running all unit tests and the QA tests) is -# performed on the local image (e.g., locally or through GitHub actions) -# - If the qualification process is passed, the image is released as `dev` on -# the registries - - -# Use Docker buildkit or not. -# DOCKER_BUILDKIT = 1 -DOCKER_BUILDKIT = 0 - - -def _build_multi_arch_image( - ctx: Any, - opts: str, - multi_arch: str, - build_args: str, - build_image: str, - dockerfile: str, -) -> None: - """ - Build a multi-architecture Docker image in a remote Docker registry. - - :param ctx: invoke context - :param opts: build options (e.g., --no-cache) - :param multi_arch: target architectures to build for (e.g., - `linux/amd64,linux/arm64`) - :param build_args: build arguments for the Docker build command - :param build_image: name of the image to build - :param dockerfile: path to the Dockerfile to use for building - """ - # Build the multi-arch image. - # Compress the current directory (in order to dereference symbolic - # links) into a tar stream and pipes it to the `docker build` command. - # See HelpersTask197. - cmd = rf""" - tar -czh . | DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ - time \ - docker buildx build \ - {opts} \ - --push \ - --platform {multi_arch} \ - {build_args} \ - --tag {build_image} \ - --file {dockerfile} \ - - - """ - hlitauti.run(ctx, cmd) - - -def _list_image(ctx: Any, image: str) -> None: - """ - List Docker image. - - :param ctx: invoke context - :param image: docker image reference in REPOSITORY[:TAG] format - Examples: - - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0` - - `*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev` - - `sorrentum/cmamp:dev-1.0.0` - - `ghcr.io/cryptokaizen/cmamp:prod` - """ - cmd = f"docker image ls {image}" - hlitauti.run(ctx, cmd) - - -def _run_tests( - ctx: Any, - stage: str, - version: str, - *, - skip_tests: Optional[bool] = False, - fast_tests: Optional[bool] = True, - slow_tests: Optional[bool] = True, - superslow_tests: Optional[bool] = True, - qa_tests: Optional[bool] = True, -) -> None: - """ - Run tests for a given stage and version. - - :param ctx: invoke context - :param stage: image stage (must be one of `local`, `dev`, or `prod`) - :param version: version to test - :param skip_tests: skip all tests if True - :param fast_tests: run fast tests - :param slow_tests: run slow tests - :param superslow_tests: run superslow tests - :param qa_tests: run QA tests - """ - hdbg.dassert_in(stage, ("local", "dev", "prod")) - if skip_tests: - _LOG.warning("Skipping all tests") - return - if fast_tests: - hlitapyt.run_fast_tests(ctx, stage=stage, version=version) - if slow_tests: - hlitapyt.run_slow_tests(ctx, stage=stage, version=version) - if superslow_tests: - hlitapyt.run_superslow_tests(ctx, stage=stage, version=version) - if qa_tests: - hlitapyt.run_qa_tests(ctx, stage=stage, version=version) - - -# TODO(sandeep): Consider promoting this to an invoke target and removing the callers. -# Reason: the caller invoke targets only contain this helper call. -def _docker_tag_and_push_multi_arch_image( - ctx: Any, - version: str, - base_image: str, - target_registry: str, - container_dir_name: str, - source_stage: str, - target_stage: str, -) -> None: - """ - Tag and push a multi-arch image to the target registry using `docker buildx - imagetools`. - - :param ctx: invoke context - :param version: version to tag the image with - :param base_image: base name of the image (e.g., - `*****.dkr.ecr.us-east-1.amazonaws.com/amp`) - :param target_registry: target Docker registry to push to (e.g., - `aws_ecr.ck` or `dockerhub.causify`) - :param container_dir_name: directory where Dockerfile is located - :param source_stage: source stage of the image (must be one of `local` or - `prod`) - :param target_stage: target stage to push the image as (must be one - of `dev` or `prod`) - """ - hdbg.dassert_in(source_stage, ("local", "prod")) - hdbg.dassert_in(target_stage, ("dev", "prod")) - # - hlitadoc.docker_login(ctx, target_registry) - # Get source version string. - if source_stage == "local": - source_stage_version = _get_dev_version(version, container_dir_name) - elif source_stage == "prod": - source_stage_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - else: - raise ValueError( - f"Invalid source stage='{source_stage}' for tagging and pushing" - ) - source_image_versioned = hlitadoc.get_image( - base_image, source_stage, source_stage_version - ) - _LOG.info( - "Pushing the %s image %s to the target_registry %s ", - source_stage, - source_image_versioned, - target_registry, - ) - if target_registry == "aws_ecr.ck": - # Use AWS Docker registry. - target_base_image = "" - elif target_registry == "dockerhub.causify": - # Use public GitHub Docker registry. - target_base_image_name = ( - hrecouti.get_repo_config().get_docker_base_image_name() - ) - target_base_image = f"causify/{target_base_image_name}" - else: - raise ValueError( - f"Invalid target Docker image registry='{target_registry}'" - ) - # Only create a versioned image for the 'dev' stage or for the - # `dockerhub.causify` registry. - if target_stage == "dev" or target_registry == "dockerhub.causify": - # Tag and push the source image as versioned target image. - target_versioned_image = hlitadoc.get_image( - target_base_image, target_stage, source_stage_version - ) - cmd = f"docker buildx imagetools create -t {target_versioned_image} {source_image_versioned}" - hlitauti.run(ctx, cmd) - # Tag and push the source image as target image. - target_latest_version = None - target_latest_image = hlitadoc.get_image( - target_base_image, target_stage, version=target_latest_version - ) - cmd = f"docker buildx imagetools create -t {target_latest_image} {source_image_versioned}" - hlitauti.run(ctx, cmd) - - -@task -def docker_push_dev_image( # type: ignore - ctx, - version, - base_image="", - container_dir_name=".", -): - """ - Push the "dev" image to ECR. - - :param ctx: invoke context - :param version: version to tag the image and code with - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # - dev_version = _get_dev_version(version, container_dir_name) - # - hlitadoc.docker_login(ctx) - # Push Docker versioned tag. - image_versioned_dev = hlitadoc.get_image(base_image, "dev", dev_version) - cmd = f"docker push {image_versioned_dev}" - hlitauti.run(ctx, cmd, pty=True) - # Push Docker tag. - latest_version = None - image_dev = hlitadoc.get_image(base_image, "dev", latest_version) - cmd = f"docker push {image_dev}" - hlitauti.run(ctx, cmd, pty=True) - - -@task -def docker_push_prod_image( # type: ignore - ctx, - version, - base_image="", - container_dir_name=".", -): - """ - Push the "prod" image to ECR. - - :param ctx: invoke context - :param version: version to tag the image and code with - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - # - hlitadoc.docker_login(ctx) - # Push versioned tag. - image_versioned_prod = hlitadoc.get_image(base_image, "prod", prod_version) - cmd = f"docker push {image_versioned_prod}" - hlitauti.run(ctx, cmd, pty=True) - # - latest_version = None - image_prod = hlitadoc.get_image(base_image, "prod", latest_version) - cmd = f"docker push {image_prod}" - hlitauti.run(ctx, cmd, pty=True) - - -# TODO(gp): We moved away from versioning of the prod image because we release -# continuously and so it's easier to track the hash. -def _docker_rollback_image( - ctx: Any, - base_image: str, - stage: str, - version: str, - push_to_repo: bool, -) -> None: - """ - Rollback the versioned image for a particular stage and optionally push it - to ECR. - - :param ctx: invoke context - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param stage: select a specific stage for the Docker image (must be - one of `dev` or `prod`) - :param version: version to tag the image and code with - :param push_to_repo: whether to push the rolled back image to ECR - """ - hdbg.dassert_in(stage, ("dev", "prod")) - # TODO(sandeep): Consider removing the redundant pull-push step. Instead of - # pulling the versioned image and pushing it back to ECR, directly push - # the local image. However, note that this may not work for multi-arch images - # since local images are arch-specific, while remote tags include all architectures. - # 1) Ensure that version of the image exists locally. - hlitadoc._docker_pull( - ctx, base_image=base_image, stage=stage, version=version - ) - # 2) Promote requested image to target stage. - image_versioned = hlitadoc.get_image(base_image, stage, version) - latest_version = None - image_latest = hlitadoc.get_image(base_image, stage, latest_version) - cmd = f"docker tag {image_versioned} {image_latest}" - hlitauti.run(ctx, cmd) - # 3) Push the image to ECR. - if push_to_repo: - if stage == "dev": - docker_push_dev_image(ctx, version=version) - elif stage == "prod": - docker_push_prod_image(ctx, version=version) - else: - raise ValueError(f"Invalid stage='{stage}' for rollback") - else: - _LOG.warning("Skipping pushing %s image to ECR, as requested", stage) - - -@task -def docker_build_local_image( # type: ignore - ctx, - version, - cache=True, - base_image="", - poetry_mode="update", - container_dir_name=".", - just_do_it=False, - multi_arch="", - cleanup_installation=True, -): - """ - Build a local image, i.e., a release candidate "dev" image. - - :param ctx: invoke context - :param version: version to tag the image with - :param cache: use the cache - :param base_image: the name for the base image - E.g., `*****.dkr.ecr.us-east-1.amazonaws.com/amp`. - For base_image, we use "" as default instead None since `invoke` can - only infer a single type. - :param poetry_mode: - - `update`: run `poetry lock` to update the packages - - `no_update`: it uses the current `poetry.lock` file, if it is valid - according to the constraints. This is useful when the goal is to - remove / add / update only a single package without updating - everything - :param container_dir_name: directory where the Dockerfile is located - :param just_do_it: execute the action ignoring the checks - :param multi_arch: - - if not specified, build for the current architecture - - if specified, build for the specified multiple architectures. E.g., - `linux/amd64,linux/arm64` - :param cleanup_installation: force clean up Docker installation. This can - be disabled to speed up the build process - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # For poetry_mode="update", the `poetry.lock` file is updated and saved as - # `/install/poetry.lock.out` to the container. - # For poetry_mode="no_update", the `poetry.lock` file from the repo is used, - # and it's passed as `/install/poetry.lock.in` to the container. - hdbg.dassert_in(poetry_mode, ("update", "no_update")) - if just_do_it: - _LOG.warning("Skipping subsequent version check") - else: - hlitadoc.dassert_is_subsequent_version( - version, container_dir_name=container_dir_name - ) - dev_version = _get_dev_version(version, container_dir_name) - # Prepare `.dockerignore`. - docker_ignore = "devops/docker_build/dockerignore.dev" - _prepare_docker_ignore(ctx, docker_ignore) - # Build the local image. - stage = "local" - image_local = hlitadoc.get_image(base_image, stage, dev_version) - # - dockerfile = "devops/docker_build/dev.Dockerfile" - # Keep the relative path instead of an absolute path to ensure it matches - # files inside the tar stream and avoids file not found errors. - # dockerfile = _to_abs_path(dockerfile) - opts = "--no-cache" if not cache else "" - build_args = [ - ("AM_CONTAINER_VERSION", dev_version), - ("INSTALL_DIND", True), - ("POETRY_MODE", poetry_mode), - ("CLEAN_UP_INSTALLATION", cleanup_installation), - ] - build_args = " ".join(f"--build-arg {k}={v}" for k, v in build_args) - # Build for both a single arch or multi-arch. - if multi_arch: - # Login to AWS ECR because for multi-arch we need to build the local - # image remotely. - hlitadoc.docker_login(ctx) - _create_multiarch_builder(ctx) - _build_multi_arch_image( - ctx, opts, multi_arch, build_args, image_local, dockerfile - ) - # TODO(sandeep): If possible, switch to using hlitadoc._docker_pull(). - # Pull the image from registry after building. - cmd = f"docker pull {image_local}" - hlitauti.run(ctx, cmd) - else: - # Build for a single architecture using `docker build`. - # Compress the current directory (in order to dereference symbolic - # links) into a tar stream and pipes it to the `docker build` command. - # See HelpersTask197. - cmd = rf""" - tar -czh . | DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ - time \ - docker build \ - {opts} \ - {build_args} \ - --tag {image_local} \ - --file {dockerfile} \ - - - """ - hlitauti.run(ctx, cmd) - # Retrieve the package files, if present. - if poetry_mode == "update": - # TODO(gp): Not sure it works properly for multi-arch build, since on - # different platforms the generated poetry.lock might be different. - # TODO(gp): For some reason we can't use more than one bash command in - # docker_cmd. - cmd = "cp -f /install/poetry.lock.out /install/pip_list.txt ." - opts = [ - "--stage local", - f"--version {version}", - f"--cmd '{cmd}'", - ] - opts.append("--skip-pull") - cmd = "invoke docker_cmd " + " ".join(opts) - hlitauti.run(ctx, cmd) - # The destination dir is always in the same relative position. - dst_dir = "./devops/docker_build" - hdbg.dassert_dir_exists(dst_dir) - cmd = f"cp -f poetry.lock.out {dst_dir}/poetry.lock" - hlitauti.run(ctx, cmd) - cmd = f"cp -f pip_list.txt {dst_dir}/pip_list.txt" - hlitauti.run(ctx, cmd) - # Check image and report stats. - _list_image(ctx, image_local) - - -@task -def docker_tag_local_image_as_dev( # type: ignore - ctx, - version, - base_image="", - container_dir_name=".", -): - """ - Mark the "local" image as "dev". - - :param ctx: invoke context - :param version: version to tag the image and code with - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # Get the version. - dev_version = _get_dev_version(version, container_dir_name) - # Tag local image as versioned dev image (e.g., `dev-1.0.0`). - image_versioned_local = hlitadoc.get_image(base_image, "local", dev_version) - image_versioned_dev = hlitadoc.get_image(base_image, "dev", dev_version) - cmd = f"docker tag {image_versioned_local} {image_versioned_dev}" - hlitauti.run(ctx, cmd) - # Tag local image as dev image. - latest_version = None - image_dev = hlitadoc.get_image(base_image, "dev", latest_version) - cmd = f"docker tag {image_versioned_local} {image_dev}" - hlitauti.run(ctx, cmd) - - -@task -def docker_release_dev_image( # type: ignore - ctx, - version, - cache=True, - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=False, - qa_tests=True, - push_to_repo=True, - poetry_mode="update", - container_dir_name=".", -): - """ - Build, test, and release to ECR the latest "dev" image. - - This can be used to test the entire flow from scratch by building an image, - running the tests, and pushing if needed. - - Phases: - 1) Build local image - 2) Run the unit tests (e.g., fast, slow, superslow) on the local image - 3) Mark local as dev image - 4) Run the QA tests on the dev image - 5) Push dev image to the repo - - :param ctx: invoke context - :param version: version to tag the image and code with - :param cache: use the cache - :param skip_tests: skip all the tests and release the dev image - :param fast_tests: run fast tests, unless all tests skipped - :param slow_tests: run slow tests, unless all tests skipped - :param superslow_tests: run superslow tests, unless all tests skipped - :param qa_tests: run QA tests (e.g., end-to-end linter tests) - :param push_to_repo: push the image to the repo_short_name - :param poetry_mode: same as - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # 1) Build "local" image. - docker_build_local_image( - ctx, - version, - cache=cache, - poetry_mode=poetry_mode, - container_dir_name=container_dir_name, - ) - # Run resolve after `docker_build_local_image` so that a proper check - # for subsequent version can be made in case `FROM_CHANGELOG` token - # is used. - dev_version = _get_dev_version(version, container_dir_name) - # 2) Run tests for the "local" image. - stage = "local" - _run_tests( - ctx, - stage, - dev_version, - skip_tests=skip_tests, - fast_tests=fast_tests, - slow_tests=slow_tests, - superslow_tests=superslow_tests, - qa_tests=False, - ) - # 3) Promote the "local" image to "dev". - docker_tag_local_image_as_dev( - ctx, dev_version, container_dir_name=container_dir_name - ) - # 4) Run QA tests for the (local version) of the dev image. - stage = "dev" - _run_tests( - ctx, - stage, - dev_version, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=qa_tests, - ) - # 5) Push the "dev" image to ECR. - if push_to_repo: - docker_push_dev_image( - ctx, dev_version, container_dir_name=container_dir_name - ) - else: - _LOG.warning( - "Skipping pushing dev image to repo_short_name, as requested" - ) - _LOG.info("==> SUCCESS <==") - - -# ///////////////////////////////////////////////////////////////////////////// -# Multi-arch build flow -# ///////////////////////////////////////////////////////////////////////////// - - -# TODO(gp): multi_build -> multi_arch - - -@task -def docker_tag_push_multi_build_local_image_as_dev( # type: ignore - ctx, - version, - local_base_image="", - target_registry=_DEFAULT_TARGET_REGISTRY, - container_dir_name=".", -): - """ - Mark the multi-arch "local" image as "dev" and push it. - - `base_image` and `target_registry` both contain information about the target - Docker registry. Docker image registry address in `local_base_image` name - is ignored when pushing, instead the `target_registry` param provides a - Docker image registry address to push to. - - :param ctx: invoke context - :param version: version to tag the image and code with - :param local_base_image: base name of a local image, - e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param target_registry: target Docker image registry to push the image to - - "dockerhub.causify": public Causify Docker image registry - - "aws_ecr.ck": private AWS CK ECR - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - source_stage = "local" - target_stage = "dev" - _docker_tag_and_push_multi_arch_image( - ctx, - version, - local_base_image, - target_registry, - container_dir_name, - source_stage, - target_stage, - ) - - -# TODO(gp): This needs to be merged with docker_release_dev_image. -@task -def docker_release_multi_build_dev_image( # type: ignore - ctx, - version, - cache=True, - poetry_mode="update", - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=False, - qa_tests=True, - # TODO(Grisha): use iterable values, see - # https://docs.pyinvoke.org/en/stable/concepts/invoking-tasks.html#iterable-flag-values - # target_registries=... - target_registries=_DEFAULT_TARGET_REGISTRY, - container_dir_name=".", -): - """ - Build, test, and release the latest multi-arch "dev" image. - - :param version: version to tag the image and code with - :param cache: use the cache - :param skip_tests: skip all the tests and release the dev image - :param fast_tests: run fast tests, unless all tests skipped - :param slow_tests: run slow tests, unless all tests skipped - :param superslow_tests: run superslow tests, unless all tests - skipped - :param qa_tests: run QA tests (e.g., end-to-end linter tests) - :param poetry_mode: update package dependencies using poetry - :param target_registries: comma separated list of target Docker - image registries to push the image to. E.g., - "aws_ecr.ck,dockerhub.causify". See `docker_login()` for - details. - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - target_registries = target_registries.split(",") - # 1) Build "local" image remotely in the CK AWS ECR registry and pull once - # it is built. - docker_build_local_image( - ctx, - version, - cache=cache, - poetry_mode=poetry_mode, - container_dir_name=container_dir_name, - multi_arch="linux/amd64,linux/arm64", - ) - # Run resolve after `docker_build_local_image` so that a proper check - # for subsequent version can be made in case `FROM_CHANGELOG` token - # is used. - dev_version = _get_dev_version(version, container_dir_name) - # 2) Run tests for the "local" image. - # 3) Run QA tests using the local version of an image. - # Use the local image because it is not possible to tag a multi-arch - # image as dev without releasing (pushing) it. - # The difference between a local and a dev image is just a tag. - stage = "local" - _run_tests( - ctx, - stage, - dev_version, - skip_tests=skip_tests, - fast_tests=fast_tests, - slow_tests=slow_tests, - superslow_tests=superslow_tests, - qa_tests=qa_tests, - ) - # 4) Tag the image as dev image and push it to the target registries. - for target_registry in target_registries: - docker_tag_push_multi_build_local_image_as_dev( - ctx, - version=dev_version, - target_registry=target_registry, - container_dir_name=container_dir_name, - ) - _LOG.info("==> SUCCESS <==") - - -# ############################################################################# -# Prod image flow: -# ############################################################################# -# - Prod image has no release candidate -# - Start from a Dev image already built and qualified -# - The prod image is created from the dev image by copying the code inside the -# image -# - The prod image is tagged as "prod" -# The prod flow doesn't support multi-arch because we only run on x86 in prod. - - -@task -def docker_build_prod_image( # type: ignore - ctx, - version, - cache=True, - base_image="", - candidate=False, - user_tag="", - container_dir_name=".", - tag=None, -): - """ - Build a prod image from a dev image. - - :param version: version to tag the image and code with - :param cache: note that often the prod image is just a copy of the - dev image so caching makes no difference - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param candidate: build a prod image with a tag format: prod-{hash} - where hash is the output of `hgit.get_head_hash()` - :param user_tag: the name of the user building the candidate image - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - # Prepare `.dockerignore`. - docker_ignore = "devops/docker_build/dockerignore.prod" - _prepare_docker_ignore(ctx, docker_ignore) - # TODO(gp): We should do a `i git_clean` to remove artifacts and check that - # the client is clean so that we don't release from a dirty client. - # Build prod image. - if candidate: - # For candidate prod images which need to be tested on the AWS infra add - # a hash identifier. - latest_version = None - image_versioned_prod = hlitadoc.get_image( - base_image, "prod", latest_version - ) - if not tag: - head_hash = hgit.get_head_hash(short_hash=True) - else: - head_hash = tag - # Add username to the prod image name. - if user_tag: - image_versioned_prod += f"-{user_tag}" - # Add head hash to the prod image name. - image_versioned_prod += f"-{head_hash}" - - else: - image_versioned_prod = hlitadoc.get_image( - base_image, "prod", prod_version - ) - # - dockerfile = "devops/docker_build/prod.Dockerfile" - dockerfile = _to_abs_path(dockerfile) - # - # TODO(gp): Use to_multi_line_cmd() - opts = "--no-cache" if not cache else "" - # Use dev version for building prod image. - dev_version = hlitadoc.to_dev_version(prod_version) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - hdbg.dassert( - not hgit.is_inside_submodule(), - "The build should be run from a super repo, not a submodule.", - ) - git_root_dir = hgit.find_git_root() - # TODO(heanh): Expose the build context to the interface and use `git_root_dir` by default. - cmd = rf""" - DOCKER_BUILDKIT={DOCKER_BUILDKIT} \ - time \ - docker build \ - {opts} \ - --tag {image_versioned_prod} \ - --file {dockerfile} \ - --build-arg VERSION={dev_version} \ - --build-arg ECR_BASE_PATH={os.environ["CSFY_ECR_BASE_PATH"]} \ - --build-arg IMAGE_NAME={image_name} \ - {git_root_dir} - """ - hlitauti.run(ctx, cmd) - if candidate: - _LOG.info("Head hash: %s", head_hash) - _list_image(ctx, image_versioned_prod) - else: - # Tag versioned image as latest prod image. - latest_version = None - image_prod = hlitadoc.get_image(base_image, "prod", latest_version) - cmd = f"docker tag {image_versioned_prod} {image_prod}" - hlitauti.run(ctx, cmd) - # - _list_image(ctx, image_prod) - - -@task -def docker_build_multi_arch_prod_image( # type: ignore - ctx, - version, - cache=True, - base_image="", - user_tag="", - container_dir_name=".", - tag=None, - multi_arch="linux/amd64,linux/arm64", -): - """ - Build a multi arch. versioned prod image from a dev image. For e.g.: we - have the dev image `helpers:dev-1.0.0` and we want to build a prod image - `helpers:prod-1.0.0`. - - :param version: version to tag the image and code with - :param cache: note that often the prod image is just a copy of the - dev image so caching makes no difference - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param user_tag: the name of the user building the candidate image - :param container_dir_name: directory where the Dockerfile is located - :param multi_arch: comma separated list of target architectures to - build the image for. E.g., `linux/amd64,linux/arm64` - """ - hlitauti.report_task(container_dir_name=container_dir_name) - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - # Prepare `.dockerignore`. - docker_ignore = "devops/docker_build/dockerignore.prod" - _prepare_docker_ignore(ctx, docker_ignore) - # TODO(gp): We should do a `i git_clean` to remove artifacts and check that - # the client is clean so that we don't release from a dirty client. - # Build prod image. - image_versioned_prod = hlitadoc.get_image(base_image, "prod", prod_version) - # Prepare the build. - dockerfile = "devops/docker_build/prod.Dockerfile" - # Keep the relative path instead of an absolute path to ensure it matches - # files inside the tar stream and avoids file not found errors. - # dockerfile = _to_abs_path(dockerfile) - # - opts = "--no-cache" if not cache else "" - # Use dev version for building prod image. - dev_version = hlitadoc.to_dev_version(prod_version) - build_args = [ - ("VERSION", dev_version), - ("ECR_BASE_PATH", os.environ["CSFY_ECR_BASE_PATH"]), - ] - build_args = " ".join(f"--build-arg {k}={v}" for k, v in build_args) - # Login to AWS ECR because for multi-arch we need to build the local - # image remotely. - hlitadoc.docker_login(ctx) - _create_multiarch_builder(ctx) - _build_multi_arch_image( - ctx, opts, multi_arch, build_args, image_versioned_prod, dockerfile - ) - # TODO(sandeep): If possible, switch to hlitadoc._docker_pull(). - # Pull the image from registry after building. - cmd = f"docker pull {image_versioned_prod}" - hlitauti.run(ctx, cmd) - _list_image(ctx, image_versioned_prod) - - -@task -def docker_tag_push_multi_arch_prod_image( # type: ignore - ctx, - version, - base_image="", - target_registry=_DEFAULT_TARGET_REGISTRY, - container_dir_name=".", -): - """ - Mark the multi-arch versioned "prod" image as "prod" and push them to the - target registry. - - `base_image` and `target_registry` both contain information about the target - Docker registry. - - :param ctx: invoke context - :param version: version to tag the image and code with - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param target_registry: target Docker image registry to push the image to - - "dockerhub.causify": public Causify Docker image registry - - "aws_ecr.ck": private AWS CK ECR - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - source_stage = "prod" - target_stage = "prod" - _docker_tag_and_push_multi_arch_image( - ctx, - version, - base_image, - target_registry, - container_dir_name, - source_stage, - target_stage, - ) - - -# TODO(gp): Can we merge this with docker_push_prod_image? -@task -def docker_push_prod_candidate_image( # type: ignore - ctx, - candidate, - base_image="", - container_dir_name=".", -): - """ - (ONLY CI/CD) Push the "prod" candidate image to ECR. - - :param ctx: invoke context - :param candidate: hash of the candidate prod image to push - :param base_image: e.g., *****.dkr.ecr.us-east-1.amazonaws.com/amp - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # - hlitadoc.docker_login(ctx) - # Push image with tagged with a hash ID. - image_versioned_prod = hlitadoc.get_image(base_image, "prod", None) - cmd = f"docker push {image_versioned_prod}-{candidate}" - hlitauti.run(ctx, cmd, pty=True) - - -@task -# TODO(Vlad): Add the release flow with the multi-arch support. -# See HelpersTask339. -def docker_release_prod_image( # type: ignore - ctx, - version, - cache=True, - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=False, - qa_tests=True, - push_to_repo=True, - container_dir_name=".", -): - """ - Build, test, and release to ECR the prod image. - - - Build prod image - - Run the tests - - Push the prod image repo - - :param ctx: invoke context - :param version: version to tag the image and code with - :param cache: use the cache - :param skip_tests: skip all the tests and release the dev image - :param fast_tests: run fast tests, unless all tests skipped - :param slow_tests: run slow tests, unless all tests skipped - :param superslow_tests: run superslow tests, unless all tests skipped - :param qa_tests: run QA tests (e.g., end-to-end linter tests) - :param push_to_repo: push the image to the repo_short_name - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - prod_version = hlitadoc.resolve_version_value( - version, container_dir_name=container_dir_name - ) - # 1) Build prod image. - docker_build_prod_image( - ctx, - cache=cache, - version=prod_version, - container_dir_name=container_dir_name, - ) - # 2) Run tests. - if skip_tests: - _LOG.warning("Skipping all tests and releasing") - fast_tests = slow_tests = superslow_tests = False - stage = "prod" - if fast_tests: - hlitapyt.run_fast_tests(ctx, stage=stage, version=prod_version) - if slow_tests: - hlitapyt.run_slow_tests(ctx, stage=stage, version=prod_version) - if superslow_tests: - hlitapyt.run_superslow_tests(ctx, stage=stage, version=prod_version) - # 3) Run QA tests using the local version of the prod image before pushing - # it to ECR. - if qa_tests: - hlitapyt.run_qa_tests(ctx, stage=stage, version=prod_version) - # 4) Push prod image. - if push_to_repo: - docker_push_prod_image( - ctx, version=prod_version, container_dir_name=container_dir_name - ) - else: - _LOG.warning("Skipping pushing image to repo_short_name as requested") - _LOG.info("==> SUCCESS <==") - - -@task(iterable=["docker_registry"]) -def docker_release_multi_arch_prod_image( - ctx, - version, - cache=True, - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=False, - qa_tests=True, - docker_registry=None, - container_dir_name=".", -): - """ - Build, test, and release to Docker registries the multi-arch prod image. - :param ctx: invoke context - :param version: version to tag the image and code with - :param cache: use the cache - :param skip_tests: skip all the tests - :param fast_tests: run fast tests, unless all tests skipped - :param slow_tests: run slow tests, unless all tests skipped - :param superslow_tests: run superslow tests, unless all tests skipped - :param qa_tests: run QA tests (e.g., end-to-end linter tests) - :param docker_registry: list of Docker image registries to push the image to - :param container_dir_name: directory where the Dockerfile is located - Example usage: - > invoke docker_release_multi_arch_prod_image \ - --version 1.2.0 - --docker-registry dockerhub.causify \ - --docker-registry aws_ecr.ck - """ - hlitauti.report_task() - # The default value for iterative task parameter will be an empty list. - # https://docs.pyinvoke.org/en/stable/concepts/invoking-tasks.html#iterable-flag-values - if len(docker_registry) == 0: - docker_registry = [_DEFAULT_TARGET_REGISTRY] - _LOG.warning( - "No Docker registries provided, using default: %s", docker_registry - ) - # 1) Build prod image. - docker_build_multi_arch_prod_image( - ctx, - version, - cache=cache, - container_dir_name=container_dir_name, - multi_arch="linux/amd64,linux/arm64", - ) - # 2) Run tests. - stage = "prod" - _run_tests( - ctx, - stage, - version, - skip_tests=skip_tests, - fast_tests=fast_tests, - slow_tests=slow_tests, - superslow_tests=superslow_tests, - qa_tests=qa_tests, - ) - # 3) Push prod image. - for registry in docker_registry: - docker_tag_push_multi_arch_prod_image( - ctx, - version=version, - target_registry=registry, - container_dir_name=container_dir_name, - ) - _LOG.info("==> SUCCESS <==") - - -# # TODO(gp): Useless IMO. -@task -def docker_release_all(ctx, version, container_dir_name="."): # type: ignore - """ - (ONLY CI/CD) Release both dev and prod image to ECR. - - This includes: - - docker_release_dev_image - - docker_release_prod_image - - :param version: version to tag the image and code with - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task() - docker_release_dev_image(ctx, version, container_dir_name=container_dir_name) - docker_release_prod_image( - ctx, version, container_dir_name=container_dir_name - ) - _LOG.info("==> SUCCESS <==") - - -@task -def docker_rollback_dev_image( # type: ignore - ctx, - version, - push_to_repo=True, -): - """ - Rollback the version of the dev image. - - Phases: - 1) Ensure that version of the image exists locally - 2) Promote versioned image as dev image - 3) Push dev image to the repo - - :param ctx: invoke context - :param version: version to tag the image and code with - :param push_to_repo: push the image to the ECR repo - """ - hlitauti.report_task() - stage = "dev" - _docker_rollback_image( - ctx, - base_image="", - stage=stage, - version=version, - push_to_repo=push_to_repo, - ) - _LOG.info("==> SUCCESS <==") - - -@task -def docker_rollback_prod_image( # type: ignore - ctx, - version, - push_to_repo=True, -): - """ - Rollback the version of the prod image. - - Same as parameters and meaning as `docker_rollback_dev_image`. - """ - hlitauti.report_task() - stage = "prod" - _docker_rollback_image( - ctx, - base_image="", - stage=stage, - version=version, - push_to_repo=push_to_repo, - ) - _LOG.info("==> SUCCESS <==") - - -def _check_workspace_dir_sizes() -> None: - """ - Check if user doesn't have large files/directories in their workspace. - - Use-case is running the function before building a candidate image. - Large files significanty slow dwon image creation and subsequent - pulling. Overtime it also increases costs of ECR usage. - """ - # Execute system command and split into a list of tuples [size, dir]. - # Threshold is chosen heuristically according to current repo dir sizes. - git_root = hgit.find_git_root() - with hsystem.cd(git_root): - fs_item_max_threshold = "200M" - directory_size_list = hsystem.system_to_string( - f"du --threshold {fs_item_max_threshold} -hs $(ls -A) | sort -hr" - )[1].split("\n") - # Filter out directories ignored by `dockerignore.prod` + "amp/" - # as submodule. - ignored_dirs = [ - "amp", - "ck.infra", - "amp/ck.infra", - "docs", - ".git", - "amp/.git", - ] - offending_items = [ - it.replace("\t", " ") - for it in directory_size_list - if it.split("\t")[1] not in ignored_dirs - ] - hdbg.dassert( - len(offending_items) == 0, - ( - "Your workspace contains one or more files/directories " - f"larger than {fs_item_max_threshold} move " - f"or delete the items:\n\t {offending_items}" - ), - ) - - -@task -def docker_create_candidate_image(ctx, container_dir_name=".", user_tag=""): # type: ignore - """ - Create new prod candidate image and update the specified ECS task - definition such that the Image URL specified in container definition points - to the new candidate image. - - :param task_definition: the name of the ECS task definition for - which an update to container image URL is made, e.g. cmamp-test - :param container_dir_name: the runnable dir path (e.g. - `./ck.infra/`) - :param user_tag: the name of the user creating the image, empty - parameter means the command was run via gh actions - :param region: AWS Region, for Tokyo region specify 'ap-northeast-1' - :return: the tag used for the image - """ - _check_workspace_dir_sizes() - # Get the hash of the image. - tag = hgit.get_head_hash(".", short_hash=True) - if user_tag: - # Add user name to the candidate tag. - tag = f"{user_tag}-{tag}" - # Create new prod image. - docker_build_prod_image( - ctx, - container_dir_name=container_dir_name, - version=hlitadoc._IMAGE_VERSION_FROM_CHANGELOG, - candidate=True, - tag=tag, - ) - # Push candidate image. - docker_push_prod_candidate_image(ctx, tag) - return tag - - -# ############################################################################# -# ECS task definition workflows. -# ECS task definition is a wrapper around a container definition. -# ############################################################################# - - -@task -def docker_release_test_task_definition( - ctx, - task_definition: Optional[str] = None, - user_tag: Optional[str] = None, - region: str = hs3.AWS_EUROPE_REGION_1, -): # type: ignore - """ - Release candidate image to test ECS task definition. - - :param region: region to create the task definition in - """ - hdbg.dassert_in(region, hs3.AWS_REGIONS) - # Verify that task definition is provided. - hdbg.dassert_is_not(task_definition, None, "task definition is required") - # Create candidate image. - current_dir = os.getcwd() - image_tag = docker_create_candidate_image(ctx, current_dir, user_tag) - # Update ECS task definition with new image URL. - hlitaaws.aws_update_ecs_task_definition( - task_definition=task_definition, - image_tag=image_tag, - region=region, - environment="test", - ) - - -@task -def docker_release_preprod_task_definition( - ctx, region: str = hs3.AWS_EUROPE_REGION_1 -): # type: ignore - """ - Release candidate image to preprod ECS task definition. - - :param region: region to create the task definition in - """ - hdbg.dassert_in(region, hs3.AWS_REGIONS) - # Preprod release should be done from master branch and the client should be - # clean. - curr_branch = hgit.get_branch_name() - hdbg.dassert_eq( - curr_branch, "master", msg="You should release from master branch" - ) - _ = hgit.is_client_clean(abort_if_not_clean=True) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-preprod" - # Create candidate image. - current_dir = os.getcwd() - image_tag = docker_create_candidate_image(ctx, current_dir) - # Update ECS task definition with new image URL. - hlitaaws.aws_update_ecs_task_definition( - task_definition=task_definition_name, - image_tag=image_tag, - region=region, - environment="preprod", - ) - - -@task -def docker_release_prod_task_definition(ctx, region: str = hs3.AWS_US_REGION_1): # type: ignore - """ - Release candidate image to prod ECS task definition. - - :param region: region to create the task definition in - """ - hdbg.dassert_in(region, hs3.AWS_REGIONS) - # Prod release should be done from master branch and the client should be - # clean. - curr_branch = hgit.get_branch_name() - hdbg.dassert_eq( - curr_branch, "master", msg="You should release from master branch" - ) - _ = hgit.is_client_clean(abort_if_not_clean=True) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - task_definition_name = f"{image_name}-prod" - # Create candidate image. - current_dir = os.getcwd() - image_tag = docker_create_candidate_image(ctx, current_dir) - # Update ECS task definition with new image URL. - hlitaaws.aws_update_ecs_task_definition( - task_definition=task_definition_name, - image_tag=image_tag, - region=region, - environment="prod", - ) - - -@task -def copy_ecs_task_definition_image_url(ctx, src_task_def, dst_task_def): # type: ignore - """ - Copy image URL from one task definition to another. - - Currently the implementation assumes the source region is Stockholm - and destination #TODO(Juraj): Because this is the configuration we - need at the moment. - - :param src_task_def: source ECS task definition (located in eu- - north-1) - :param dst_task_def: destination ECS task definition (located in ap- - northeast-1) - """ - # TODO(Vlad): Import locally to avoid redundant dependencies. - # See for detals: https://github.com/cryptokaizen/cmamp/issues/8086. - import helpers.haws as haws - - # - _ = ctx - src_image_url = haws.get_task_definition_image_url( - src_task_def, region=hs3.AWS_EUROPE_REGION_1 - ) - # We have cross-region replication enabled in ECR, all images live in both regions. - dst_image_url = src_image_url.replace( - hs3.AWS_EUROPE_REGION_1, hs3.AWS_TOKYO_REGION_1 - ) - haws.update_task_definition( - dst_task_def, dst_image_url, region=hs3.AWS_TOKYO_REGION_1 - ) - - -# TODO(gp): This might become obsolete. -@task -def docker_update_prod_task_definition( - ctx, version, preprod_tag, airflow_dags_s3_path, task_definition -): # type: ignore - """ - Update image in prod task definition to the desired version. - - :param version: latest version from `changelog.txt` or custom one (e.g., `1.1.1`) - :param preprod_tag: image that will be re-tagged with prod version - e.g., `preprod-d8sf76s` -> `prod-1.1.1` - :param airflow_dags_s3_path: S3 bucket from which airflow will load DAGs - :param task_definition: which ECS task definition to use - currently our prod ECS task definitions match short name of repos. - """ - # TODO(Nikola): Convert `haws` part to script so it can be called via `docker_cmd`. - # https://github.com/cryptokaizen/cmamp/pull/2594/files#r948551787 - import helpers.haws as haws - - # - # TODO(Nikola): Use env var for CK profile. - s3fs_ = hs3.get_s3fs(aws_profile="ck") - super_module = not hgit.is_inside_submodule() - # Prepare params for listing DAGs. - root_dir = hgit.get_client_root(super_module) - dags_path = [root_dir, "datapull", "airflow", "dags"] - if super_module and hgit.is_amp_present(): - # Main DAGs location is always in `cmamp`. - dags_path.insert(1, "amp") - dir_name = os.path.join(*dags_path) - pattern = "preprod.*.py" - only_files = True - use_relative_paths = False - # List preprod DAGs. - dag_paths = hs3.listdir(dir_name, pattern, only_files, use_relative_paths) - for dag_path in dag_paths: - # Abort in case one of the preprod DAGs is out of sync. - _, dag_name = os.path.split(dag_path) - hdbg.dassert_eq( - hs3.from_file(dag_path), - s3fs_.cat(airflow_dags_s3_path + dag_name).decode(), - msg=f"Preprod file `{dag_name}` is out of sync with `{airflow_dags_s3_path}`!", - ) - # Prepare params to compose new prod image url. - prod_version = hlitadoc.resolve_version_value(version) - base_image = "" - stage = "prod" - # Compose new prod image url. - new_prod_image_url = hlitadoc.get_image(base_image, stage, prod_version) - version = None - new_prod_image_url_no_version = hlitadoc.get_image( - base_image, stage, version - ) - # Check if preprod tag exist in preprod task definition as precaution. - preprod_task_definition_name = f"{task_definition}-preprod" - preprod_image_url = haws.get_task_definition_image_url( - preprod_task_definition_name - ) - preprod_tag_from_image = preprod_image_url.split(":")[-1] - msg = ( - f"Preprod tag is different in the image url `{preprod_tag_from_image}`!" - ) - hdbg.dassert_eq(preprod_tag_from_image, preprod_tag, msg=msg) - # Pull preprod image for re-tag. - hlitadoc.docker_login(ctx) - cmd = f"docker pull {preprod_image_url}" - hlitauti.run(ctx, cmd) - # Re-tag preprod image to prod. - cmd = f"docker tag {preprod_image_url} {new_prod_image_url}" - hlitauti.run(ctx, cmd) - cmd = f"docker tag {preprod_image_url} {new_prod_image_url_no_version}" - hlitauti.run(ctx, cmd) - cmd = f"docker rmi {preprod_image_url}" - hlitauti.run(ctx, cmd) - # Get original prod image for potential rollback. - original_prod_image_url = haws.get_task_definition_image_url(task_definition) - # Track successful uploads for potential rollback. - successful_uploads = [] - try: - # Update prod task definition to the latest prod tag. - haws.update_task_definition( - task_definition, new_prod_image_url, environment="prod" - ) - # Add prod DAGs to airflow s3 bucket after all checks are passed. - for dag_path in dag_paths: - # Update prod DAGs. - _, dag_name = os.path.split(dag_path) - prod_dag_name = dag_name.replace("preprod.", "prod.") - dag_s3_path = airflow_dags_s3_path + prod_dag_name - s3fs_.put(dag_path, dag_s3_path) - _LOG.info("Successfully uploaded `%s`!", dag_s3_path) - successful_uploads.append(dag_s3_path) - # Upload new tag to ECS. - docker_push_prod_image(ctx, prod_version) - except Exception as ex: - _LOG.info("Rollback started!") - # Rollback prod task definition image URL. - haws.update_task_definition( - task_definition, original_prod_image_url, environment="prod" - ) - _LOG.info( - "Reverted prod task definition image url to `%s`!", - original_prod_image_url, - ) - # Notify for potential rollback for airflow S3 bucket, if any. - if successful_uploads: - _LOG.warning("Starting S3 rollback!") - # Prepare bucket resource. - s3 = haws.get_service_resource(aws_profile="ck", service_name="s3") - bucket_name, _ = hs3.split_path(airflow_dags_s3_path) - if hasattr(s3, "Bucket"): - bucket = s3.Bucket(bucket_name) - else: - # We'll need to handle this differently since client doesn't - # have object_versions. - raise NotImplementedError( - "S3 resource Bucket attribute not available, fallback implementation needed" - ) - for successful_upload in successful_uploads: - # TODO(Nikola): Maybe even Telegram notification? - # Rollback successful upload. - _, prefix = hs3.split_path(successful_upload) - prefix = prefix.lstrip(os.sep) - versions = sorted( - bucket.object_versions.filter(Prefix=prefix), - key=attrgetter("last_modified"), - reverse=True, - ) - latest_version = versions[0] - latest_version.delete() - _LOG.info("Deleted version `%s`.", latest_version.version_id) - if len(versions) > 1: - rollback_version = versions[1] - _LOG.info( - "Active version is now `%s`!", - rollback_version.version_id, - ) - elif len(versions) == 1: - _LOG.info( - "Deleted version was also the only version. Nothing to rollback." - ) - else: - # TODO(Nikola): Do we need custom exception? - raise NotImplementedError - s3_rollback_message = ( - f"S3 uploads reverted: {successful_uploads}" - if successful_uploads - else "No S3 uploads." - ) - _LOG.info("Rollback completed! %s", s3_rollback_message) - raise ex - - -@task -def docker_build_frontend_feature_image( - ctx, - stage, - dev_image_version=None, - app_version=None, -): - """ - Build frontend image for releasing the features. - - :param stage: stage to release the image - :param dev_image_version: base dev image version to use - :param app_version: app version for feature releases - """ - hdbg.dassert_in(stage, ["test", "preprod", "prod"]) - # Get changelog paths. - current_dir = os.getcwd() - # Get image and app version. - if not dev_image_version: - dev_image_version = hversio.get_changelog_version(current_dir) - if not app_version: - errors = [] - # Here we assume FE has its own runnable dir or the app changelog file - # is inside `app` dir of a parent runnable dir. - for file_name in [ - "app_changelog.txt", - os.path.join("app", "app_changelog.txt"), - ]: - try: - app_version = hversio.get_changelog_version( - current_dir, file_name=file_name - ) - break - except AssertionError as e: - errors.append(str(e)) - else: - raise FileNotFoundError( - f"App changelog file not found. Provide app version explicitly. Errors: {errors}" - ) - # Set ECR base path. - if stage in ("test", "preprod"): - ecr_base_path = "623860924167.dkr.ecr.eu-north-1.amazonaws.com" - else: - ecr_base_path = "726416904550.dkr.ecr.us-east-1.amazonaws.com" - # Set prod docker file name. - dockerfile = "devops/docker_build/prod.Dockerfile" - dockerfile = _to_abs_path(dockerfile) - # Set image tag. - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - image_tag = f"{ecr_base_path}/{image_name}:{stage}-{app_version}" - git_root_dir = hgit.find_git_root() - # Docker build command. - cmd = rf""" - docker build --no-cache \ - --file {dockerfile} \ - --build-arg VERSION={dev_image_version} \ - --build-arg ECR_BASE_PATH={ecr_base_path} \ - --build-arg IMAGE_NAME={image_name} \ - --tag {image_tag} \ - {git_root_dir} - """ - hlitauti.run(ctx, cmd) - _list_image(ctx, image_tag) - - -# ############################################################################# -# Test dev image flow -# ############################################################################# - - -@task -def docker_build_test_dev_image( # type: ignore - ctx, - assignee="", - reviewers="", - container_dir_name=".", -): - """ - Automate the complete periodic release workflow for the dev image. - - This task performs: - 1) Bump version (e.g., 2.2.0 -> 2.3.0) - 2) Get release team members - 3) Create branch with date-based name - 4) Build image locally with the bumped version number - 5) Run tests (fast, slow, superslow) - 6) Add changelog entry for the release - 7) Stage poetry.lock and pip_list.txt files - 8) Commit changes with versioned message - 9) Push changes - 10) Create PR - 11) Tag and push image to GHCR - - :param ctx: invoke context - :param assignee: GitHub username to assign the PR to - :param reviewers: GitHub username(s) to request PR review. If not - specified, uses the release team members from GitHub team - configured in repo_config.yaml - :param container_dir_name: directory where the Dockerfile is located - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # 1) Bump version. - _LOG.info("Step 1: Bumping version") - current_version = hversio.get_changelog_version(container_dir_name) - hdbg.dassert(current_version, "Could not find current version in changelog") - _LOG.info("Current version: %s", current_version) - version = hversio.bump_version(current_version, bump_type="minor") - _LOG.info("Bumped version: %s -> %s", current_version, version) - # 2) Get release team members. - _LOG.info("Step 2: Getting release team members") - if not reviewers: - release_team_name = hrecouti.get_repo_config().get_release_team() - # Get team members from GitHub team. - team_members = hlitagh.gh_get_team_member_names(release_team_name) - reviewers = ",".join(team_members) - _LOG.info("Release team '%s' members: %s", release_team_name, reviewers) - # 3) Create branch with date-based name. - _LOG.info("Step 3: Creating branch with date-based name") - issue_prefix = hrecouti.get_repo_config().get_issue_prefix() - # Get current date in YYYYMMDD format. - today = datetime.date.today().strftime("%Y%m%d") - branch_name = f"{issue_prefix}_Periodic_image_release_{today}" - _LOG.info("Branch name: %s", branch_name) - cmd = f"git checkout -b {branch_name}" - hlitauti.run(ctx, cmd) - # 4) Build image locally. - _LOG.info("Step 4: Building local image with version %s", version) - docker_build_local_image( - ctx, - version=version, - cache=True, - poetry_mode="update", - container_dir_name=container_dir_name, - ) - # 5) Run tests. - _LOG.info("Step 5: Running tests") - dev_version = _get_dev_version(version, container_dir_name) - stage = "dev" - _run_tests( - ctx, - stage, - dev_version, - skip_tests=False, - fast_tests=True, - slow_tests=True, - superslow_tests=True, - qa_tests=False, - ) - # 6) Add changelog entry. - _LOG.info("Step 6: Adding changelog entry") - supermodule = True - root_dir = hversio._get_client_root(supermodule) - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - changelog_file = os.path.join(root_dir, container_dir_name, "changelog.txt") - hdbg.dassert_file_exists(changelog_file) - # Read the current changelog. - changelog_content = hio.from_file(changelog_file) - # Prepare new entry. - today = datetime.date.today().strftime("%Y-%m-%d") - new_entry = f"""# {image_name}-{version} -- {today} -- Periodic release: {today} - -""" - # Prepend new entry to changelog. - updated_changelog = new_entry + changelog_content - # Write back to file. - hio.to_file(changelog_file, updated_changelog) - _LOG.info("Added changelog entry for version %s", version) - # 7) Stage files. - _LOG.info("Step 7: Staging files") - # Fix git permissions in CI to avoid "insufficient permission" errors. - if hserver.is_inside_ci(): - _LOG.info("Running in CI, fixing git permissions") - cmd = "sudo chmod -R 777 .git/objects/" - hlitauti.run(ctx, cmd) - files_to_stage = [ - "devops/docker_build/poetry.lock", - "devops/docker_build/pip_list.txt", - "changelog.txt", - ] - for file_path in files_to_stage: - full_path = os.path.join(root_dir, container_dir_name, file_path) - if os.path.exists(full_path): - cmd = f"git add {full_path}" - hlitauti.run(ctx, cmd) - _LOG.info("Staged %s", full_path) - else: - _LOG.warning("File not found, skipping: %s", full_path) - # 8) Commit changes. - _LOG.info("Step 8: Committing changes") - commit_message = f"Poetry output from the v{version} build" - # --no-verify to skip pre-commit checks since the `poetry.lock` file is - # too big and the `check_file_size` is failed. - cmd = f'git commit -m "{commit_message}" --no-verify' - hlitauti.run(ctx, cmd) - # 9) Push changes. - _LOG.info("Step 9: Pushing changes") - cmd = f"git push origin {branch_name}" - hlitauti.run(ctx, cmd) - # 10) Create PR. - _LOG.info("Step 10: Creating pull request") - pr_body = f"- Periodic release of {image_name} dev image version {version}" - label = _AUTO_RELEASE_LABEL - hlitagh.gh_create_pr( - ctx, - body=pr_body, - draft=False, - reviewer=reviewers, - labels=label, - assignee=assignee, - ) - _LOG.info("PR submitted for branch %s", branch_name) - # 11) Tag and push to GHCR. - _LOG.info("Step 11: Tagging and pushing image to GHCR") - # Get GHCR base image path from repo config. - ghcr_base = hrecouti.get_repo_config().get_container_registry_url("ghcr") - ghcr_image_name = hrecouti.get_repo_config().get_docker_base_image_name() - ghcr_base_image = f"{ghcr_base}/{ghcr_image_name}" - _LOG.info("GHCR base image: %s", ghcr_base_image) - # Get local image name. - local_stage = "local" - image_local = hlitadoc.get_image("", local_stage, dev_version) - # Tag local image as versioned GHCR dev image (e.g., ghcr.io/causify-ai/csfy:dev-2.3.0). - ghcr_image_versioned = f"{ghcr_base_image}:dev-{version}" - cmd = f"docker tag {image_local} {ghcr_image_versioned}" - hlitauti.run(ctx, cmd) - _LOG.info("Tagged as versioned GHCR dev image: %s", ghcr_image_versioned) - # Push versioned GHCR dev image. - cmd = f"docker push {ghcr_image_versioned}" - hlitauti.run(ctx, cmd, pty=True) - _LOG.info("Pushed versioned GHCR dev image: %s", ghcr_image_versioned) - _LOG.info("==> SUCCESS <==") - - -@task -def docker_tag_push_dev_image( - ctx, - version="", - base_image="", - target_registries="ghcr,ecr", - container_dir_name=".", - dry_run=False, -): - """ - Pulls a versioned dev image from a base registry, then tags and pushes - it to the specified target registries (both as versioned and latest). - - :param ctx: invoke context - :param version: version to tag the image and code with. If empty, reads - from changelog - :param base_image: base image path to pull from (e.g., - ghcr.io/causify-ai/csfy). If empty, uses GHCR from repo config - :param target_registries: comma separated list of target Docker - image registries to push the image to. E.g., "ghcr,ecr". - See the `helpers.repo_config_utils.RepoConfig.get_container_registry_url()` - for supported registry names - :param container_dir_name: directory where the Dockerfile is located - :param dry_run: if True, only print the commands without executing - them - """ - hlitauti.report_task(container_dir_name=container_dir_name) - # Get version. - if not version: - version = hversio.get_changelog_version(container_dir_name) - # Get base image if not provided. - if not base_image: - ghcr_base = hrecouti.get_repo_config().get_container_registry_url("ghcr") - ghcr_image_name = hrecouti.get_repo_config().get_docker_base_image_name() - base_image = f"{ghcr_base}/{ghcr_image_name}" - # Pull the image. - stage = "dev" - source_dev_image_versioned = hlitadoc.get_image(base_image, stage, version) - cmd = f"docker pull {source_dev_image_versioned}" - hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) - # Tag and push to target registries. - for registry in target_registries.split(","): - # Strip whitespace from registry name. - registry = registry.strip() - # Tag and push the image to the target registry as latest dev image. - target_base = hrecouti.get_repo_config().get_container_registry_url( - registry - ) - target_image_name = ( - hrecouti.get_repo_config().get_docker_base_image_name() - ) - target_base_image = f"{target_base}/{target_image_name}" - latest_version = None - target_dev_image_latest = hlitadoc.get_image( - target_base_image, stage, latest_version - ) - cmd = ( - f"docker tag {source_dev_image_versioned} {target_dev_image_latest}" - ) - hlitauti.run(ctx, cmd, dry_run=dry_run) - cmd = f"docker push {target_dev_image_latest}" - hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) - # Tag and push versioned dev image to target registry. - target_dev_image_versioned = hlitadoc.get_image( - target_base_image, stage, version - ) - cmd = f"docker tag {source_dev_image_versioned} {target_dev_image_versioned}" - hlitauti.run(ctx, cmd, dry_run=dry_run) - cmd = f"docker push {target_dev_image_versioned}" - hlitauti.run(ctx, cmd, pty=True, dry_run=dry_run) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py deleted file mode 100644 index 7c1c360a6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_find.py +++ /dev/null @@ -1,606 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_find as hlitafin -""" - -import functools -import glob -import logging -import os -import re -from typing import Iterator, List, Optional, Tuple - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hlist as hlist -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# ############################################################################# -# Find test. -# ############################################################################# - - -def _find_test_files( - dir_name: Optional[str] = None, use_absolute_path: bool = False -) -> List[str]: - """ - Find all the files containing test code in `abs_dir`. - """ - dir_name = dir_name or "." - hdbg.dassert_dir_exists(dir_name) - _LOG.debug("abs_dir=%s", dir_name) - # Find all the file names containing test code. - _LOG.info("Searching from '%s'", dir_name) - path = os.path.join(dir_name, "**", "test_*.py") - _LOG.debug("path=%s", path) - file_names = glob.glob(path, recursive=True) - _LOG.debug("Found %d files: %s", len(file_names), str(file_names)) - hdbg.dassert_no_duplicates(file_names) - # Test files should always under a dir called `test`. - for file_name in file_names: - if "/old/" in file_name: - continue - if "/compute/" in file_name: - continue - hdbg.dassert_eq( - os.path.basename(os.path.dirname(file_name)), - "test", - "Test file '%s' needs to be under a `test` dir ", - file_name, - ) - hdbg.dassert_not_in( - "notebook/", - file_name, - "Test file '%s' should not be under a `notebook` dir", - file_name, - ) - # Make path relatives, if needed. - if use_absolute_path: - file_names = [os.path.abspath(file_name) for file_name in file_names] - # - file_names = sorted(file_names) - _LOG.debug("file_names=%s", file_names) - hdbg.dassert_no_duplicates(file_names) - return file_names - - -# TODO(gp): -> find_class since it works also for any class. -def _find_test_class( - class_name: str, file_names: List[str], exact_match: bool = False -) -> List[str]: - """ - Find test file containing `class_name` and report it in pytest format. - - E.g., for "TestLibTasksRunTests1" return - "test/test_lib_tasks.py::TestLibTasksRunTests1" - - :param exact_match: find an exact match or an approximate where `class_name` - is included in the class name - """ - # > jackpy TestLibTasksRunTests1 - # test/test_lib_tasks.py:60:class TestLibTasksRunTests1(hut.TestCase): - regex = r"^\s*class\s+(\S+)\s*\(" - _LOG.debug("regex='%s'", regex) - res: List[str] = [] - # Scan all the files. - for file_name in file_names: - _LOG.debug("file_name=%s", file_name) - txt = hio.from_file(file_name) - # Search for the class in each file. - for i, line in enumerate(txt.split("\n")): - # _LOG.debug("file_name=%s i=%s: %s", file_name, i, line) - # TODO(gp): We should skip ```, """, ''' - m = re.match(regex, line) - if m: - found_class_name = m.group(1) - _LOG.debug(" %s:%d -> %s", line, i, found_class_name) - if exact_match: - found = found_class_name == class_name - else: - found = class_name in found_class_name - if found: - res_tmp = f"{file_name}::{found_class_name}" - _LOG.debug("-> res_tmp=%s", res_tmp) - res.append(res_tmp) - res = sorted(list(set(res))) - return res - - -# TODO(gp): Extend this to accept only the test method. -# TODO(gp): Have a single `find` command with multiple options to search for different -# things, e.g., class names, test names, pytest_mark, ... -@task -def find_test_class( - ctx, class_name, dir_name=".", pbcopy=True, exact_match=False -): # type: ignore - """ - Report test files containing `class_name` in a format compatible with - pytest. - - :param class_name: the class to search - :param dir_name: the dir from which to search (default: .) - :param pbcopy: save the result into the system clipboard (only on - macOS) - """ - hlitauti.report_task(txt="class_name abs_dir pbcopy") - hdbg.dassert_ne(class_name, "", "You need to specify a class name") - _ = ctx - file_names = _find_test_files(dir_name) - res = _find_test_class(class_name, file_names, exact_match) - res = " ".join(res) - # Print or copy to clipboard. - hsystem.to_pbcopy(res, pbcopy) - - -# ////////////////////////////////////////////////////////////////////////////////// - - -@functools.lru_cache() -def _get_python_files(subdir: str) -> List[str]: - pattern = "*.py" - only_files = False - use_relative_paths = False - python_files = hio.listdir(subdir, pattern, only_files, use_relative_paths) - # Remove tmp files. - python_files = [f for f in python_files if not f.startswith("tmp")] - return python_files - - -# File, line number, line, info1, info2 -_FindResult = Tuple[str, int, str, str, str] -_FindResults = List[_FindResult] - - -def _scan_files(python_files: List[str]) -> Iterator: - for file_ in python_files: - _LOG.debug("file=%s", file_) - txt = hio.from_file(file_) - for line_num, line in enumerate(txt.split("\n")): - # TODO(gp): Skip commented lines. - # _LOG.debug("%s:%s line='%s'", file_, line_num, line) - yield file_, line_num, line - - -def _find_short_import(iterator: Iterator, short_import: str) -> _FindResults: - """ - Find imports in the Python files with the given short import. - - E.g., for dtfcodarun dataflow/core/test/test_builders.py:9:import - dataflow.core.dag_runner as dtfcodarun returns - """ - # E.g., - # `import dataflow.core.dag_runner as dtfcodarun` - regex = rf"import\s+(\S+)\s+as\s+({short_import})" - regex = re.compile(regex) - # - results: _FindResults = [] - for file_, line_num, line in iterator: - m = regex.search(line) - if m: - # E.g., - # dataflow/core/test/test_builders.py:9:import dataflow.core.dag_runner as dtfcodarun - _LOG.debug(" --> line:%s=%s", line_num, line) - long_import_txt = m.group(1) - short_import_txt = m.group(2) - full_import_txt = f"import {long_import_txt} as {short_import_txt}" - res = (file_, line_num, line, short_import_txt, full_import_txt) - # E.g., - _LOG.debug(" => %s", str(res)) - results.append(res) - return results - - -def _find_func_class_uses(iterator: Iterator, regex: str) -> _FindResults: - regexs = [] - # E.g., - # `dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)` - regexs.append(rf"\s+(\w+)\.(\w*{regex})\(") - # `dag_builder: dtfcodabui.DagBuilder` - regexs.append(rf":\s*(\w+)\.(\w*{regex})") - # - _LOG.debug("regexs=%s", str(regexs)) - regexs = [re.compile(regex_) for regex_ in regexs] - # - results: _FindResults = [] - for file_, line_num, line in iterator: - _LOG.debug("line='%s'", line) - m = None - for regex_ in regexs: - m = regex_.search(line) - if m: - # _LOG.debug("--> regex matched") - break - if m: - _LOG.debug(" --> line:%s=%s", line_num, line) - short_import_txt = m.group(1) - obj_txt = m.group(2) - res = (file_, line_num, line, short_import_txt, obj_txt) - # E.g., - # ('./helpers/lib_tasks.py', 10226, 'dtfsys', 'RealTimeDagRunner') - # ('./dataflow/core/test/test_builders.py', 70, 'dtfcodarun', 'FitPredictDagRunner') - # ('./dataflow/core/test/test_builders.py', 157, 'dtfcodarun', 'FitPredictDagRunner') - _LOG.debug(" => %s", str(res)) - results.append(res) - return results - - -def _process_find_results(results: _FindResults, how: str) -> List: - filtered_results: List = [] - if how == "remove_dups": - # Remove duplicates. - for result in results: - (_, _, _, info1, info2) = result - filtered_results.append((info1, info2)) - filtered_results = hlist.remove_duplicates(filtered_results) - filtered_results = sorted(filtered_results) - elif how == "all": - filtered_results = sorted(results) - else: - raise ValueError(f"Invalid how='{how}'") - return filtered_results - - -@task -def find(ctx, regex, mode="all", how="remove_dups", subdir="."): # type: ignore - """ - Find symbols, imports, test classes and so on. - - Example: - ``` - > i find DagBuilder - ('dtfcodabui', 'DagBuilder') - ('dtfcore', 'DagBuilder') - ('dtfcodabui', 'import dataflow.core.dag_builder as dtfcodabui') - ('dtfcore', 'import dataflow.core as dtfcore') - ``` - - :param regex: function or class use to search for - :param mode: what to look for - - `symbol_import`: look for uses of function or classes - E.g., `DagRunner` - returns - ``` - ('cdataf', 'PredictionDagRunner') - ('cdataf', 'RollingFitPredictDagRunner') - ``` - - `short_import`: look for the short import - E.g., `'dtfcodabui' - returns - ``` - ('dtfcodabui', 'import dataflow.core.dag_builder as dtfcodabui') - ``` - :param how: how to report the results - - `remove_dups`: report only imports and calls that are the same - """ - hlitauti.report_task(txt=hprint.to_str("regex mode how subdir")) - _ = ctx - # Process the `where`. - python_files = _get_python_files(subdir) - iter_ = _scan_files(python_files) - # Process the `what`. - if mode == "all": - for mode_tmp in ("symbol_import", "short_import"): - find(ctx, regex, mode=mode_tmp, how=how, subdir=subdir) - return - if mode == "symbol_import": - results = _find_func_class_uses(iter_, regex) - filtered_results = _process_find_results(results, "remove_dups") - print("\n".join(map(str, filtered_results))) - # E.g., - # ('cdataf', 'PredictionDagRunner') - # ('cdataf', 'RollingFitPredictDagRunner') - # Look for each short import. - results = [] - for short_import, _ in filtered_results: - iter_ = _scan_files(python_files) - results.extend(_find_short_import(iter_, short_import)) - elif mode == "short_import": - results = _find_short_import(iter_, regex) - else: - raise ValueError(f"Invalid mode='{mode}'") - # Process the `how`. - filtered_results = _process_find_results(results, how) - print("\n".join(map(str, filtered_results))) - - -# ############################################################################# -# Find test decorator. -# ############################################################################# - - -# TODO(gp): decorator_name -> pytest_mark -def _find_test_decorator( - decorator_name: str, file_names: List[str] -) -> List[str]: - """ - Find test files containing tests with a certain decorator - `@pytest.mark.XYZ`. - """ - hdbg.dassert_isinstance(file_names, list) - # E.g., - # @pytest.mark.slow(...) - # @pytest.mark.qa - string = f"@pytest.mark.{decorator_name}" - regex = rf"^\s*{re.escape(string)}\s*[\(]?" - _LOG.debug("regex='%s'", regex) - res: List[str] = [] - # Scan all the files. - for file_name in file_names: - _LOG.debug("file_name=%s", file_name) - txt = hio.from_file(file_name) - # Search for the class in each file. - for i, line in enumerate(txt.split("\n")): - # _LOG.debug("file_name=%s i=%s: %s", file_name, i, line) - # TODO(gp): We should skip ```, """, '''. We can add a function to - # remove all the comments, although we need to keep track of the - # line original numbers. - m = re.match(regex, line) - if m: - _LOG.debug(" -> found: %d:%s", i, line) - res.append(file_name) - # - res = sorted(list(set(res))) - return res - - -@task -def find_test_decorator(ctx, decorator_name="", dir_name="."): # type: ignore - """ - Report test files containing `class_name` in pytest format. - - :param decorator_name: the decorator to search - :param dir_name: the dir from which to search - """ - hlitauti.report_task() - _ = ctx - hdbg.dassert_ne(decorator_name, "", "You need to specify a decorator name") - file_names = _find_test_files(dir_name) - res = _find_test_decorator(decorator_name, file_names) - res = " ".join(res) - print(res) - - -# ############################################################################# -# Find / replace `check_string`. -# ############################################################################# - - -@task -def find_check_string_output( # type: ignore - ctx, class_name, method_name, as_python=True, fuzzy_match=False, pbcopy=True -): - """ - Find output of `check_string()` in the test running - class_name::method_name. - - E.g., for `TestResultBundle::test_from_config1` return the content of the file - `./core/dataflow/test/TestResultBundle.test_from_config1/output/test.txt` - - :param as_python: if True return the snippet of Python code that replaces the - `check_string()` with a `assert_equal` - :param fuzzy_match: if True return Python code with `fuzzy_match=True` - :param pbcopy: save the result into the system clipboard (only on macOS) - """ - hlitauti.report_task() - _ = ctx - hdbg.dassert_ne(class_name, "", "You need to specify a class name") - hdbg.dassert_ne(method_name, "", "You need to specify a method name") - # Look for the directory named `class_name.method_name`. - cmd = f"find . -name '{class_name}.{method_name}' -type d" - # > find . -name "TestResultBundle.test_from_config1" -type d - # ./core/dataflow/test/TestResultBundle.test_from_config1 - _, txt = hsystem.system_to_string(cmd, abort_on_error=False) - file_names = txt.split("\n") - if not txt: - hdbg.dfatal(f"Can't find the requested dir with '{cmd}'") - if len(file_names) > 1: - hdbg.dfatal(f"Found more than one dir with '{cmd}':\n{txt}") - dir_name = file_names[0] - # Find the only file underneath that dir. - hdbg.dassert_dir_exists(dir_name) - cmd = f"find {dir_name} -name 'test.txt' -type f" - _, file_name = hsystem.system_to_one_line(cmd) - hdbg.dassert_file_exists(file_name) - # Read the content of the file. - _LOG.info("Found file '%s' for %s::%s", file_name, class_name, method_name) - txt = hio.from_file(file_name) - if as_python: - # Package the code snippet. - if not fuzzy_match: - # Align the output at the same level as 'expected = r...'. - num_spaces = 8 - txt = hprint.indent(txt, num_spaces=num_spaces) - output = f""" - actual = - expected = r\"\"\" -{txt} - \"\"\".lstrip().rstrip() - self.assert_equal(actual, expected, fuzzy_match={fuzzy_match}) - """ - else: - output = txt - # Print or copy to clipboard. - hsystem.to_pbcopy(output, pbcopy=pbcopy) - return output - - -# ############################################################################# -# Find module dependencies. -# ############################################################################# - - -standard_libs = [ - "abc", - "argparse", - "datetime", - "importlib", - "logging", - "os", - "pandas", - "pytest", - "re", - "unittest", -] - - -@task -def find_dependency( # type: ignore - ctx, - module_name, - mode="print_deps", - only_module="", - ignore_standard_libs=True, - ignore_helpers=True, - remove_dups=True, -): - """ - E.g., ``` - - # Find all the dependency of a module from itself - > i find_dependency --module-name "amp.dataflow.model" --mode "find_lev2_deps" --ignore-helpers --only-module dataflow - amp/dataflow/model/stats_computer.py:16 dataflow.core - amp/dataflow/model/model_plotter.py:4 dataflow.model - ``` - - :param module_name: the module path to analyze (e.g., `amp.dataflow.model`) - :param mode: - - `print_deps`: print the result of grepping for imports - - `find_deps`: find all the dependencies - - `find_lev1_deps`, `find_lev2_deps`: find all the dependencies - :param only_module: keep only imports containing a certain module (e.g., `dataflow`) - :param ignore_standard_libs: ignore the Python standard libs (e.g., `os`, `...`) - :param ignore_helpers: ignore the `helper` lib - :param remove_dups: remove the duplicated imports - """ - _ = ctx - # (cd amp/dataflow/model/; jackpy "import ") | grep -v notebooks | grep -v test | grep -v __init__ | grep "import dataflow" - src_dir = module_name.replace(".", "/") - hdbg.dassert_dir_exists(src_dir) - # Find all the imports. - cmd = f'find {src_dir} -name "*.py" | xargs grep -n -r "^import "' - _, txt = hsystem.system_to_string(cmd) - # - if mode == "print_deps": - print(txt) - return - # Parse the output. - _LOG.debug("\n" + hprint.frame("Parse")) - lines = txt.split("\n") - lines_out = [] - for line in lines: - # ./forecast_evaluator_from_prices.py:16:import helpers.hpandas as hpandas - # import helpers.hunit_test as hunitest # pylint: disable=no-name-in-module' - data = line.split(":") - hdbg.dassert_lte(3, len(data), "Invalid line='%s'", line) - file, line_num, import_code = data[:3] - _LOG.debug(hprint.to_str("file line_num import_code")) - lines_out.append((file, line_num, import_code)) - lines = lines_out - _LOG.debug("Found %d imports", len(lines)) - # Remove irrelevant files and imports. - _LOG.debug("\n" + hprint.frame("Remove irrelevant entries")) - lines_out = [] - for line in lines: - file, line_num, import_code = line - _LOG.debug("# " + hprint.to_str("file line_num import_code")) - if "__init__.py" in file: - _LOG.debug("Remove because init") - continue - if "/test/" in file: - _LOG.debug("Remove because test") - continue - if "notebooks/" in file: - _LOG.debug("Remove because notebook") - continue - if "from typing import" in import_code: - _LOG.debug("Remove because typing") - continue - lines_out.append(line) - lines = lines_out - _LOG.debug("After removal %d imports", len(lines)) - # Process. - _LOG.debug("\n" + hprint.frame("Process entries")) - lines_out = [] - for line in lines: - # ./forecast_evaluator_from_prices.py:16:import helpers.hpandas as hpandas - file, line_num, import_code = line - _LOG.debug("# " + hprint.to_str("file line_num import_code")) - # Parse import code. - m = re.match(r"^import\s+(\S+)(\s+as)?", import_code) - hdbg.dassert(m, "Can't parse line='%s'", import_code) - assert m is not None - import_name = m.group(1) - _LOG.debug("import_name='%s'", import_name) - lev1_import = import_name.split(".")[0] - if ignore_standard_libs: - if lev1_import in standard_libs: - _LOG.debug("Ignoring standard lib '%s'", lev1_import) - continue - if ignore_helpers: - if lev1_import.startswith("helpers"): - _LOG.debug("Ignoring helpers '%s'", lev1_import) - continue - if only_module: - if only_module not in import_name: - _LOG.debug( - "Ignoring '%s' since it doesn't contain %s", - import_name, - only_module, - ) - continue - # - if mode == "find_deps": - dep = import_name - elif mode == "find_lev1_deps": - deps = import_name.split(".") - if len(deps) > 1: - dep = deps[0] - else: - dep = import_name - elif mode == "find_lev2_deps": - deps = import_name.split(".") - if len(deps) > 1: - dep = ".".join(deps[:2]) - else: - dep = import_name - else: - raise ValueError(f"Invalid mode='{mode}'") - lines_out.append((file, line_num, dep)) - lines = lines_out - # Remove repeated tuples. - if remove_dups: - _LOG.debug("\n" + hprint.frame("Remove repeated tuples")) - import_names = set() - lines_out = [] - for line in lines: - if line[2] in import_names: - continue - lines_out.append(line) - import_names.add(line[2]) - lines = lines_out - else: - _LOG.warning("Remove dups skipped") - # Sort. - _LOG.debug("\n" + hprint.frame("Sort tuples")) - lines = sorted(lines, key=lambda x: x[2]) - # Print and save. - print(hprint.frame("Results")) - _LOG.debug("\n" + hprint.frame("Print")) - txt = "\n".join([":".join(line) for line in lines]) - file_name = "cfile" - hio.to_file(file_name, txt) - _LOG.info("%s saved", file_name) - # - txt = "\n".join(["%s:%s\t\t\t%s" % line for line in lines]) - print(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py deleted file mode 100644 index 53c9600af..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_gh.py +++ /dev/null @@ -1,1252 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_gh as hlitagh -""" - -import datetime -import json -import logging -import os -import re -from typing import Any, Dict, List, Optional, Tuple - -import invoke.exceptions as invexc -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.htable as htable -import helpers.lib_tasks_utils as hlitauti -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# ############################################################################# -# GitHub CLI. -# ############################################################################# - - -@task -def gh_login( # type: ignore - ctx, - account="", - print_status=False, -): - hlitauti.report_task() - # - if not account: - # Retrieve the name of the repo, e.g., "alphamatic/amp". - full_repo_name = hgit.get_repo_full_name_from_dirname( - ".", include_host_name=False - ) - _LOG.debug(hprint.to_str("full_repo_name")) - account = full_repo_name.split("/")[0] - _LOG.info(hprint.to_str("account")) - # - ssh_filename = os.path.expanduser(f"~/.ssh/id_rsa.{account}.github") - _LOG.debug(hprint.to_str("ssh_filename")) - if os.path.exists(ssh_filename): - cmd = f"export GIT_SSH_COMMAND='ssh -i {ssh_filename}'" - print(cmd) - else: - _LOG.warning("Can't find file '%s'", ssh_filename) - # - if print_status: - cmd = "gh auth status" - hlitauti.run(ctx, cmd) - # - github_pat_filename = os.path.expanduser(f"~/.ssh/github_pat.{account}.txt") - if os.path.exists(github_pat_filename): - cmd = f"gh auth login --with-token <{github_pat_filename}" - hlitauti.run(ctx, cmd) - else: - _LOG.warning("Can't find file '%s'", github_pat_filename) - # - if print_status: - cmd = "gh auth status" - hlitauti.run(ctx, cmd) - - -# ############################################################################# - - -def _get_branch_name(branch_mode: str) -> Optional[str]: - if branch_mode == "current_branch": - branch_name: Optional[str] = hgit.get_branch_name() - elif branch_mode == "master": - branch_name = "master" - elif branch_mode == "all": - branch_name = None - else: - raise ValueError(f"Invalid branch='{branch_mode}'") - return branch_name - - -def _get_org_name(org_name: str) -> str: - """ - Get organization name, inferring from current repo if not provided. - - :param org_name: organization name or empty string - :return: organization name - """ - if not org_name: - # Infer organization from current repo. - full_repo_name = hgit.get_repo_full_name_from_dirname( - ".", include_host_name=False - ) - org_name = full_repo_name.split("/")[0] - return org_name - - -def _get_workflow_table() -> htable.TableType: - """ - Get a table with the status of the GH workflow for the current repo. - """ - # Get the workflow status from GH. - cmd = "export NO_COLOR=1; gh run list" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug(hprint.to_str("txt")) - # pylint: disable=line-too-long - # > gh run list - # STATUS TITLE WORKFLOW BRANCH EVENT ID ELAPSED AGE - # * AmpTask1786_Integrate_20230518_2 Fast tests AmpTask1786_Integrate_20230518_2 pull_request 5027911519 4m49s 4m - # > gh run list | more - # completed success AmpTask1786_Integrate_20230518_2 Fast tests AmpTask1786_Integrate_20230518_2 pull_request 5027911519 7m17s 10m - # in_progress AmpTask1786_Integrate_20230518_2 Slow tests AmpTask1786_Integrate_20230518_2 pull_request 5027911518 10m9s 10m - # pylint: enable=line-too-long - # The output is tab separated, so convert it into CSV. - first_line = txt.split("\n")[0] - _LOG.debug("first_line=%s", first_line.replace("\t", ",")) - num_cols = len(first_line.split("\t")) - _LOG.debug(hprint.to_str("first_line num_cols")) - cols = [ - # E.g., completed, in_progress. - "completed", - # E.g., success, failure. - "status", - # Aka title: parse but don't use. - "name", - "workflow", - "branch", - "event", - "id", - "elapsed", - "age", - ] - hdbg.dassert_eq(num_cols, len(cols)) - # Build the table. - table = htable.Table.from_text(cols, txt, delimiter="\t") - _LOG.debug(hprint.to_str("table")) - # Remove the "name" column as it's redundant with "workflow". - table = table.remove_column("name") - return table - - -def _print_table(table: htable.TableType) -> None: - table_str = str(table) - # Colorize the table. - color_map = {"success": "green", "failure": "red", "in progress": "yellow"} - for status, color in color_map.items(): - table_str = table_str.replace( - status, hprint.color_highlight(status, color) - ) - # Report the full status. - print(table_str) - - -# TODO(Grisha): seems like GH changed the output format, we should update accordingly, -# see CmTask #4672 "Slow tests fail (9835540316)" for details. -@task -def gh_workflow_list( # type: ignore - ctx, - filter_by_branch="current_branch", - filter_by_completed="all", - report_only_status=True, - show_stack_trace=False, - print_table=True, -): - """ - Report the status of the GH workflows. - - :param filter_by_branch: name of the branch to check - - `current_branch` for the current Git branch - - `master` for master branch - - `all` for all branches - :param filter_by_completed: filter table by the status of the workflow - - E.g., "failure", "success" - :param report_only_status: if True, report only the status of the workflows - :param show_stack_trace: in case of error run `pytest_repro` reporting also - the stack trace - :param print_table: if True, print the table with the status of the workflows - """ - hlitauti.report_task( - txt=hprint.to_str("filter_by_branch filter_by_completed") - ) - # Login. - gh_login(ctx) - # Get the table. - table = _get_workflow_table() - # Filter table based on the branch. - if filter_by_branch != "all": - field = "branch" - value = _get_branch_name(filter_by_branch) - print(f"Filtering table by {field}={value}") - table = table.filter_rows(field, value) - # Filter table by the workflow status. - if filter_by_completed != "all": - field = "completed" - value = filter_by_completed - print(f"Filtering table by {field}={value}") - table = table.filter_rows(field, value) - if ( - filter_by_branch not in ("current_branch", "master") - or not report_only_status - ): - _print_table(table) - return - # For each workflow find the last success. - branch_name = hgit.get_branch_name() - workflows = table.unique("workflow") - print(f"workflows={workflows}") - for workflow in workflows: - table_tmp = table.filter_rows("workflow", workflow) - if print_table: - print(hprint.frame(workflow)) - _print_table(table_tmp) - # Find the first success. - num_rows = table.size()[0] - _LOG.debug("num_rows=%s", num_rows) - for i in range(num_rows): - status_column = table_tmp.get_column("status") - _LOG.debug("status_column=%s", str(status_column)) - hdbg.dassert_lt( - i, len(status_column), "status_column=", status_column - ) - status = status_column[i] - if status == "success": - print(f"Workflow '{workflow}' for '{branch_name}' is ok") - break - if status == "failure": - _LOG.error( - "Workflow '%s' for '%s' is broken", workflow, branch_name - ) - # Get the output of the broken run. - # > gh run view 1477484584 --log-failed - workload_id = table_tmp.get_column("id")[i] - log_file_name = f"tmp.failure.{workflow}.{branch_name}.txt" - log_file_name = log_file_name.replace(" ", "_").lower() - cmd = f"gh run view {workload_id} --log-failed >{log_file_name}" - hsystem.system(cmd) - # Remove non-printable chars. - # TODO(heanh): Consider adding all the helpers util scripts - # to the `PATH` (when inside the container) so we can just use - # them without specifying the full path. - helpers_root_dir = hgit.find_helpers_root() - file_path = ( - f"{helpers_root_dir}/dev_scripts_helpers/system_tools" - ) - cmd = f"{file_path}/remove_escape_chars.py -i {log_file_name}" - hsystem.system(cmd) - print(f"# Log is in '{log_file_name}'") - # Run_fast_tests Run fast tests 2021-12-19T00:19:38.3394316Z FAILED data - # cmd = rf"grep 'Z FAILED ' {log_file_name}" - workflow_as_str = workflow.lower().replace(" ", "_") - script_name = f"./tmp.pytest_repro.{workflow_as_str}.sh" - cmd = f"invoke pytest_repro --file-name {log_file_name} --script-name {script_name}" - if show_stack_trace: - cmd += " -s" - hsystem.system(cmd, suppress_output=False, abort_on_error=False) - break - if status in ("startup_failure", "cancelled", "skipped"): - _LOG.debug( - "Workflow '%s' for '%s' has status '%s', skipping", - workflow, - branch_name, - status, - ) - break - if status == "": - if i == (len(status_column) - 1): - # If all the runs in the table are in progress, i.e. there is no - # failed or succesful run, issue a warning and exit. E.g., - # ######################################################### - # Superslow tests - # ######################################################### - # completed | status | workflow | branch | event | id | elapsed | age | - # ----------- | ------ | --------------- | ------ | ----------------- | ---------- | ------- | --- | - # in_progress | | Superslow tests | master | workflow_dispatch | 5421740561 | 13m25s | 13m | - _LOG.warning( - "No failed/successful run found for workflow=%s for branch=%s, all runs are in progress, exiting.", - workflow, - branch_name, - ) - else: - _LOG.debug( - "Workflow=%s for branch %s is in progress, skipping further checks", - workflow, - branch_name, - ) - break - else: - raise ValueError(f"Invalid status='{status}'") - - -@task -def gh_workflow_run(ctx, branch="current_branch", workflows="all"): # type: ignore - """ - Run GH workflows in a branch. - """ - hlitauti.report_task(txt=hprint.to_str("branch workflows")) - # Login. - gh_login(ctx) - # Get the branch name. - if branch == "current_branch": - branch_name = hgit.get_branch_name() - elif branch == "master": - branch_name = "master" - else: - raise ValueError(f"Invalid branch='{branch}'") - _LOG.debug(hprint.to_str("branch_name")) - # Get the workflows. - if workflows == "all": - gh_tests = ["fast_tests", "slow_tests"] - else: - gh_tests = [workflows] - _LOG.debug(hprint.to_str("workflows")) - # Run. - for gh_test in gh_tests: - gh_test += ".yml" - # gh workflow run fast_tests.yml --ref AmpTask1251_Update_GH_actions_for_amp - cmd = f"gh workflow run {gh_test} --ref {branch_name}" - hlitauti.run(ctx, cmd) - - -# ############################################################################# - - -# TODO(gp): Remove repo_short_name. -def _get_repo_full_name_from_cmd(repo_short_name: str) -> Tuple[str, str]: - """ - Convert the `repo_short_name` from command line (e.g., "current", "amp", - "lm") to the repo_short_name full name without host name. - """ - repo_full_name_with_host: str - if repo_short_name == "current": - # Get the repo name from the current repo. - repo_full_name_with_host = hgit.get_repo_full_name_from_dirname( - ".", include_host_name=True - ) - hdbg.dassert_eq( - repo_full_name_with_host, - hrecouti.get_repo_config().get_repo_full_name_with_hostname(), - ) - ret_repo_short_name = hrecouti.get_repo_config().get_repo_short_name() - else: - hdbg.dfatal("This code path is obsolete") - _LOG.debug( - "repo_short_name=%s -> repo_full_name_with_host=%s ret_repo_short_name=%s", - repo_short_name, - repo_full_name_with_host, - ret_repo_short_name, - ) - return repo_full_name_with_host, ret_repo_short_name - - -def _get_gh_issue_title(issue_id: int, repo_short_name: str) -> Tuple[str, str]: - """ - Get the title of a GitHub issue. - - :param repo_short_name: `current` refer to the repo where we are in, - otherwise a `repo_short_name` (e.g., "amp") - """ - # TODO(gp): I don't see applications where we need to pass the repo_short_name. - # One should always operate in the dir corresponding to a repo. - hdbg.dassert_eq(repo_short_name, "current") - repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( - repo_short_name - ) - # > (export NO_COLOR=1; gh issue view 1251 --json title) - # {"title":"Update GH actions for amp"} - hdbg.dassert_lte(1, issue_id) - cmd = f"gh issue view {issue_id} --repo {repo_full_name_with_host} --json title,url" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug("txt=\n%s", txt) - # Parse json. - dict_ = json.loads(txt) - _LOG.debug("dict_=\n%s", dict_) - title = dict_["title"] - _LOG.debug("title=%s", title) - url = dict_["url"] - _LOG.debug("url=%s", url) - # Remove some annoying chars. - for char in ": + ( ) / ` *".split(): - title = title.replace(char, "") - # Replace multiple spaces with one. - title = re.sub(r"\s+", " ", title) - title = title.replace(" ", "_") - # Remove some annoying chars. - for char in "- ' ` \"".split(): - title = title.replace(char, "_") - # Add the prefix `AmpTaskXYZ_...` - task_prefix = hrecouti.get_repo_config().get_issue_prefix() - # task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - _LOG.debug("task_prefix=%s", task_prefix) - title = f"{task_prefix}{issue_id}_{title}" - return title, url - - -@task -def gh_issue_title(ctx, issue_id, repo_short_name="current", pbcopy=True): # type: ignore - """ - Print the title that corresponds to the given issue and repo_short_name. - E.g., AmpTask1251_Update_GH_actions_for_amp. - - Before running the invoke, one must check their login status on GH - by running `gh auth status`. - - :param issue_id: id number of the issue to create the branch for - :param repo_short_name: short name of the repo to use for the branch - name building. "current" refers to the repo where the call is - implemented - :param pbcopy: save the result into the system clipboard (only on - macOS) - """ - hlitauti.report_task(txt=hprint.to_str("issue_id repo_short_name")) - # Login. - gh_login(ctx) - # - issue_id = int(issue_id) - hdbg.dassert_lte(1, issue_id) - title, url = _get_gh_issue_title(issue_id, repo_short_name) - # Print or copy to clipboard. - msg = f"{title}: {url}" - hsystem.to_pbcopy(msg, pbcopy=pbcopy) - - -@task -def gh_issue_create( # type: ignore - ctx, - title="", - body="", - labels="", - assignees="", - project="", - repo_short_name="current", -): - """ - Create a new GitHub issue in the specified repository. - - ``` - # Create a simple issue - > invoke gh_issue_create --title "Fix bug in parser" - - # Create an issue with body and labels - > invoke gh_issue_create --title "Add new feature" --body "Description here" --labels "enhancement,priority-high" - - # Create an issue with assignees - > invoke gh_issue_create --title "Review PR" --assignees "user1,user2" - - # Create an issue and add to a project - > invoke gh_issue_create --title "Implement feature" --project "Development Board" - ``` - - :param title: title of the issue (required) - :param body: body/description of the issue - :param labels: comma-separated list of labels to apply - :param assignees: comma-separated list of GitHub usernames to assign - :param project: GitHub project name or number to add the issue to - :param repo_short_name: `current` refer to the repo where we are in, - otherwise a `repo_short_name` (e.g., "amp") - :return: issue ID (integer) of the created issue - """ - hlitauti.report_task(txt=hprint.to_str("title repo_short_name")) - # Login. - gh_login(ctx) - # - hdbg.dassert(title, "Title is required") - hdbg.dassert_eq(repo_short_name, "current") - repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( - repo_short_name - ) - _LOG.info( - "Creating issue with title '%s' in %s", - title, - repo_full_name_with_host, - ) - # Build the command. - cmd = ( - "gh issue create" - + f" --repo {repo_full_name_with_host}" - + f' --title "{title}"' - ) - if body: - cmd += f' --body "{body}"' - if labels: - cmd += f' --label "{labels}"' - if assignees: - cmd += f' --assignee "{assignees}"' - if project: - cmd += f' --project "{project}"' - # Execute the command and capture output. - # gh issue create outputs the URL of the created issue, e.g., - # https://github.com/cryptokaizen/csfy/issues/7572 - _, output = hsystem.system_to_string(cmd) - _LOG.debug("gh issue create output: %s", output) - # Extract the issue ID from the URL. - # The URL format is: https://github.com/org/repo/issues/123 - match = re.search(r"/issues/(\d+)", output) - hdbg.dassert(match, f"Could not extract issue ID from output: {output}") - issue_id = int(match.group(1)) - _LOG.info("Created issue #%s", issue_id) - return issue_id - - -# ############################################################################# - - -def _check_if_pr_exists(title: str) -> bool: - """ - Return whether a PR exists or not. - """ - # > gh pr diff AmpTask1955_Lint_20211219 - # no pull requests found for branch "AmpTask1955_Lint_20211219" - cmd = f"gh pr diff {title}" - rc = hsystem.system(cmd, abort_on_error=False) - pr_exists: bool = rc == 0 - return pr_exists - - -@task -def gh_create_pr( # type: ignore - ctx, - body="", - draft=True, - auto_merge=False, - repo_short_name="current", - title="", - reviewer="", - labels="", - assignee="", -): - """ - Create a draft PR for the current branch in the corresponding - repo_short_name. - - ``` - # To open a PR in the web browser - > gh pr view --web - - # To see the status of the checks - > gh pr checks - ``` - - :param body: the body of the PR - :param draft: draft or ready-to-review PR - :param auto_merge: enable auto merging PR - :param repo_short_name: `current` refer to the repo where we are in, - otherwise a `repo_short_name` (e.g., "amp") - :param title: title of the PR or the branch name, if title is empty - :param reviewer: GitHub username to request review from - :param labels: comma-separated list of labels to apply - :param assignee: GitHub username to assign the PR to - """ - hlitauti.report_task() - # Login. - gh_login(ctx) - # - branch_name = hgit.get_branch_name() - if not title: - # Use the branch name as title. - title = branch_name - repo_full_name_with_host, repo_short_name = _get_repo_full_name_from_cmd( - repo_short_name - ) - _LOG.info( - "Creating PR with title '%s' for '%s' in %s", - title, - branch_name, - repo_full_name_with_host, - ) - if auto_merge: - hdbg.dassert( - not draft, "The PR can't be a draft in order to auto merge it" - ) - pr_exists = _check_if_pr_exists(title) - _LOG.debug(hprint.to_str("pr_exists")) - if pr_exists: - _LOG.warning("PR '%s' already exists: skipping creation", title) - else: - # Link the PR automatically to the branch, if possible. - issue_id = hgit.extract_gh_issue_number_from_branch(branch_name) - _LOG.debug(hprint.to_str("issue_id")) - if issue_id and str(issue_id) not in body: - body += f"\n\n#{issue_id}" - _LOG.info("Added issue id %s to the PR body", issue_id) - cmd = ( - "gh pr create" - + f" --repo {repo_full_name_with_host}" - + (" --draft" if draft else "") - + f' --title "{title}"' - + f' --body "{body}"' - ) - if reviewer: - cmd += f" --reviewer {reviewer}" - _LOG.info("Added reviewer %s to the PR", reviewer) - if labels: - cmd += f' --label "{labels}"' - _LOG.info("Added labels %s to the PR", labels) - if assignee: - cmd += f" --assignee {assignee}" - # TODO(gp): Use _to_single_line_cmd - hlitauti.run(ctx, cmd) - if auto_merge: - cmd = f"gh pr ready {title}" - hlitauti.run(ctx, cmd) - cmd = f"gh pr merge {title} --auto --delete-branch --squash" - hlitauti.run(ctx, cmd) - - -# TODO(gp): Add gh_open_pr to jump to the PR from this branch. - -# TODO(Grisha): probably the section deserves a separate lib. -# ############################################################################# -# Buildmeister dashboard -# ############################################################################# - - -# TODO(Grisha): consider moving to cmamp as we run the workflow from cmamp. -@task -def gh_publish_buildmeister_dashboard_to_s3(ctx, mark_as_latest=True): # type: ignore - """ - Run the buildmeister dashboard notebook and publish it to S3. - - :param mark_as_latest: if True, mark the dashboard as `latest`, otherwise - just publish a timestamped copy - """ - hlitauti.report_task() - # Login to GH CLI. - if hserver.is_inside_ci(): - _LOG.info("Skipping login since running inside CI") - else: - gh_login(ctx) - # Run and publish the Buildmeister dashboard Jupyter notebook locally. - run_notebook_script_path = hgit.find_file_in_git_tree("run_notebook.py") - amp_abs_path = hgit.get_amp_abs_path() - notebook_path = os.path.join( - amp_abs_path, "devops/notebooks/Master_buildmeister_dashboard.ipynb" - ) - dst_local_dir = os.path.join(amp_abs_path, "tmp.notebooks") - cmd_run_txt = [ - run_notebook_script_path, - f"--notebook {notebook_path}", - # The notebook does not require a config, so using a random dummy config. - # TODO(Grisha): consider creating a separate config builder for the notebook. - "--config_builder 'datapull.optima.common.qa.qa_check.build_dummy_data_reconciliation_config()'", - f"--dst_dir '{dst_local_dir}'", - "--publish", - "--num_threads serial", - ] - cmd_run_txt = " ".join(cmd_run_txt) - hsystem.system(cmd_run_txt) - # To avoid the dependency on `helpers.hs3`. - import helpers.hs3 as hs3 - - # Get HTML file name. - tmp_local_dir_name = os.path.join(amp_abs_path, "tmp.notebooks") - pattern = "Master_buildmeister_dashboard.0*.html" - only_files = True - use_relative_paths = False - local_html_files = hio.listdir( - tmp_local_dir_name, - pattern, - only_files=only_files, - use_relative_paths=use_relative_paths, - ) - # Assert if more than 1 file is returned. - hdbg.dassert_eq( - len(local_html_files), - 1, - f"Found more than one file in {tmp_local_dir_name} - {local_html_files}", - ) - local_html_file = local_html_files[0] - s3_build_path = os.path.join( - hrecouti.get_repo_config().get_html_bucket_path(), - "build/buildmeister_dashboard", - ) - aws_profile = "ck" - if mark_as_latest: - # Copy the dashboard notebook to S3 as latest build. - s3_latest_build_path = os.path.join( - s3_build_path, "Master_buildmeister_dashboard.latest.html" - ) - hs3.copy_file_to_s3(local_html_file, s3_latest_build_path, aws_profile) - # Copy the timestamped version of the dashboard notebook to S3. - # Need to add a trailing slash to the path to copy the file into the folder. - # https://docs.python.org/3/library/os.path.html#os.path.join - s3_build_path_folder = os.path.join(s3_build_path, "") - hs3.copy_file_to_s3(local_html_file, s3_build_path_folder, aws_profile) - - -def _gh_run_and_get_json(cmd: str) -> List[Dict[str, Any]]: - """ - Run a `gh` command and remove colors when running inside a notebook. - - :param cmd: `gh` command to run - :return: parsed JSON output of a command - """ - _, _txt = hsystem.system_to_string(cmd) - if hsystem.is_running_in_ipynb(): - # Remove the colors from the text. - _txt = re.sub(r"\x1b\[((1;)*[0-9]{2})*m", "", _txt) - _LOG.debug(hprint.to_str("_txt")) - ret: List[Dict[str, Any]] = json.loads(_txt) - return ret - - -def gh_get_open_prs(repo: str) -> List[Dict[str, Any]]: - """ - Return a list of open PRs. - - :param repo: repo name in the format "organization/repo", e.g., - "cryptokaizen/cmamp" - """ - cmd = f"gh pr list --state 'open' --json id --repo {repo}" - pull_requests = _gh_run_and_get_json(cmd) - return pull_requests - - -def _get_best_workflow_run( - workflow_name: str, - workflow_runs: List[Dict[str, Any]], - *, - preferred_event: Optional[str] = None, -) -> Optional[Dict[str, Any]]: - """ - Pick the best available workflow run: - - If `preferred_event` is specified (e.g., "schedule"), try that first. - - Otherwise, return the most recent success/failure run. - - :param workflow_name: GitHub Actions workflow name - :param workflow_runs: run metadata, sorted most-recent-first - :param preferred_event: trigger type to prioritize (e.g., "schedule") - :return: best-matching run - e.g., - ``` - { - 'conclusion': 'success', - 'status': 'completed', - 'url': 'https://github.com/cryptokaizen/cmamp/actions/runs/8714881296', - 'workflowName': 'Allure fast tests' - } - """ - run_status = None - if preferred_event: - for run in workflow_runs: - if run.get("event") == preferred_event and run["conclusion"] in [ - "success", - "failure", - ]: - run_status = run - break - if run_status is None: - _LOG.warning( - "No '%s' run found for workflow '%s'", - preferred_event, - workflow_name, - ) - if run_status is None: - for run in workflow_runs: - if run["conclusion"] in ["success", "failure"]: - run_status = run - break - return run_status - - -def gh_get_workflows( - repo_name: str, *, sort: bool = True -) -> List[Dict[str, str]]: - """ - Get a list of workflows for a given repo. - - :param repo_name: git repo name in the format "organization/repo", - e.g., "cryptokaizen/cmamp" - :param sort: if True, sort the list of workflow names - :return: list of workflows, e.g., [{"id": "12520125", "name": "Fast - tests"}, {"id": "12520124", "name": "Slow tests"}] - """ - hdbg.dassert_isinstance(repo_name, str) - _LOG.debug(hprint.to_str("repo_name")) - # Get the workflow list. - cmd = f"gh workflow list --json id,name --repo {repo_name}" - workflows = _gh_run_and_get_json(cmd) - workflows = [ - {"id": str(workflow["id"]), "name": workflow["name"]} - for workflow in workflows - ] - # sort workflow by name - if sort: - workflows = sorted(workflows, key=lambda workflow: workflow["name"]) - return workflows - - -def gh_get_workflow_details( - repo_name: str, workflow_id: str, fields: List[str], limit: int -) -> List[Dict[str, Any]]: - """ - Return the stats for a given workflow. - - :param repo_name: git repo name in the format "organization/repo", - e.g., "cryptokaizen/cmamp" - :param workflow_id: workflow id, e.g., "12520125" - :param fields: list of fields to return, e.g., ["workflowName", "status"] - :param limit: number of runs to return - :return: workflow stats - Example output: - ``` - [ - { - "conclusion": "success", - "status": "completed", - "url": "https://github.com/cryptokaizen/cmamp/actions/runs/7757345960", - "workflowName": "Slow tests" - } - ] - ``` - """ - hdbg.dassert_isinstance(repo_name, str) - hdbg.dassert_isinstance(workflow_id, str) - hdbg.dassert_container_type(fields, List, str) - _LOG.debug(hprint.to_str("repo_name workflow_id fields")) - # Fetch the latest `limit` runs for status calculation. - cmd = f""" - gh run list \ - --json {",".join(fields)} \ - --repo {repo_name} \ - --branch master \ - --limit {limit} \ - --workflow "{workflow_id}" - """ - workflow_statuses = _gh_run_and_get_json(cmd) - # We still want to return the statuses even there are less runs than requested. E.g., there is a new workflow with a few runs or there is a workflow that was never run. - hdbg.dassert_eq(len(workflow_statuses), limit, only_warning=True) - _LOG.debug("workflow_statuses=\n%s", workflow_statuses) - return workflow_statuses - - -def gh_get_details_for_all_workflows( - repo_list: List[str], -) -> "pd.DataFrame": # noqa: F821 - """ - Get status for all the workflows. - - :param repo_list: list of repos to get the status for e.g., - ["cryptokaizen/cmamp", "cryptokaizen/orange"] - :return: a table with the status of all the workflows, e.g., - ``` - Repo workflowName url status - cryptokaizen/cmamp Allure fast tests https://github.com/cryptokaizen/cmamp/actions/... completed - cryptokaizen/cmamp Allure slow tests https://github.com/cryptokaizen/cmamp/actions/... completed - ``` - """ - import pandas as pd - - # TODO(Grisha): expose cols to the interface, i.e. a caller decides what to do. - gh_cols = ["workflowName", "url", "status", "conclusion", "event"] - # Import locally in order not to introduce external dependencies to the lib. - repo_dfs = [] - for repo_name in repo_list: - # Get all workflows for the given repo. - workflows = gh_get_workflows(repo_name) - # For each workflow find the last run. - for workflow in workflows: - # Get at least a few runs to compute the status; this is useful when - # the latest run is not completed, in this case the run before the - # latest one tells the status for a workflow. - limit = 10 - workflow_id = workflow["id"] - workflow_name = workflow["name"] - workflow_statuses = gh_get_workflow_details( - repo_name, workflow_id, gh_cols, limit - ) - if len(workflow_statuses) < limit: - # TODO(Grisha): should we just insert empty rows as placeholders so that - # we know that such workflows exist? - _LOG.warning( - "Not enough runs to compute status for '%s', repo '%s', skipping the workflow", - workflow_name, - repo_name, - ) - continue - # Get the latest successful or failed workflow run (prioritize scheduled run if available). - SCHEDULED_WORKFLOWS = { - "Gitleaks Scan", - } - preferred_event = ( - "schedule" if workflow_name in SCHEDULED_WORKFLOWS else None - ) - workflow_status = _get_best_workflow_run( - workflow_name, workflow_statuses, preferred_event=preferred_event - ) - if workflow_status is None: - _LOG.warning( - "No successful or failed runs found for '%s', repo '%s', skipping the workflow", - workflow_name, - repo_name, - ) - continue - # Access the info of latest workflow run. - workflow_status = pd.DataFrame([workflow_status]) - workflow_status["repo_name"] = repo_name - repo_dfs.append(workflow_status) - # Collect per-repo tables into a single DataFrame. - df = pd.concat(repo_dfs, ignore_index=True) - # Rename the columns. - df = df.drop(columns=["status"]) - df = df.rename(columns={"workflowName": "workflow_name"}) - return df - - -def gh_get_overall_build_status_for_repo( - repo_df: "pd.Dataframe", # noqa: F821 - *, - use_colors: bool = True, -) -> str: - """ - Return the overall status of the workflows for a repo. - - :param repo_df: table with the status of the workflows for a repo - :param use_colors: if True, return the status with colors - :return: overall status of the build for a repo - """ - if use_colors: - hdbg.dassert( - hsystem.is_running_in_ipynb(), - msg="The use_colors option is applicable only when running inside a Jupyter notebook", - ) - # See: https://stackoverflow.com/questions/19746350/how-to-change-color-in-markdown-cells-ipython-jupyter-notebook - failed_status = 'Failed' - success_status = 'Success' - else: - failed_status = "Failed" - success_status = "Success" - if "failure" in repo_df["conclusion"].values: - # The build is failed if at least one workflow is failed. - overall_status = failed_status - else: - overall_status = success_status - return overall_status - - -def gh_get_workflow_type_names( - repo_name: str, *, sort: bool = True -) -> List[str]: - """ - Get a list of workflow names for a given repo. - - :param repo_name: git repo name in the format "organization/repo", - e.g., "cryptokaizen/cmamp" - :param sort: if True, sort the list of workflow names - :return: list of workflow names, e.g., ["Fast tests", "Slow tests"] - """ - hdbg.dassert_isinstance(repo_name, str) - _LOG.debug(hprint.to_str("repo_name")) - # Get the workflow list. - cmd = f"gh workflow list --json name --repo {repo_name}" - workflow_types = _gh_run_and_get_json(cmd) - workflow_names = [workflow["name"] for workflow in workflow_types] - if sort: - workflow_names = sorted(workflow_names) - # Check for duplicate workflow names. - hdbg.dassert_no_duplicates( - workflow_names, - f"Found duplicate workflow names in repo '{repo_name}'", - ) - return workflow_names - - -def gh_get_org_team_names(org_name: str = "", *, sort: bool = True) -> List[str]: - """ - Get a list of team names for a GitHub organization. - - :param org_name: organization name, e.g., "causify-ai". If empty, - infers from the current repo - :param sort: if True, sort team names alphabetically - :return: list of team names (slugs) - Example output: - ``` - ["dev_system", "dev_frontend", "qa_team"] - ``` - """ - org_name = _get_org_name(org_name) - _LOG.debug(hprint.to_str("org_name")) - # Get the team list using GitHub API. - cmd = f"gh api /orgs/{org_name}/teams --paginate" - teams_data = _gh_run_and_get_json(cmd) - # Extract team slugs from the response. - team_names = [team["slug"] for team in teams_data] - # Sort team names if requested. - if sort: - team_names = sorted(team_names) - _LOG.debug("Found %s teams for org '%s'", len(team_names), org_name) - return team_names - - -def gh_get_team_member_names(team_slug: str, *, org_name: str = "") -> List[str]: - """ - Get a list of member usernames for a specific team in a GitHub - organization. - - :param team_slug: team slug (URL-friendly team name), e.g., "dev_system" - :param org_name: organization name, e.g., "causify-ai". If empty, - infers from the current repo - :return: list of member usernames (login names) - Example output: - ``` - ["username1", "username2", "username3"] - ``` - """ - org_name = _get_org_name(org_name) - hdbg.dassert_isinstance(team_slug, str) - _LOG.debug(hprint.to_str("org_name team_slug")) - # Get the team members using GitHub API. - cmd = f"gh api /orgs/{org_name}/teams/{team_slug}/members --paginate" - members_data = _gh_run_and_get_json(cmd) - # Extract usernames from the response. - usernames = [member["login"] for member in members_data] - _LOG.debug( - "Found %s members in team '%s' (org: '%s')", - len(usernames), - team_slug, - org_name, - ) - return usernames - - -def make_clickable(url: str) -> str: - """ - Wrap a URL as an HTML anchor tag. - - :param url: URL to wrap (e.g., "https://github.com/causify-ai/cmamp/actions/...") - :return: HTML anchor string that makes the URL clickable in rendered Markdown - """ - anchor = f'{url}' - return anchor - - -def color_format(val: str, status_color_mapping: Dict[str, str]) -> str: - """ - Return a background-color style for DataFrame.style.map based on status. - - :param val: value to evaluate for status-based styling (e.g., - "success" or "failure") - :param status_color_mapping: map status strings to color values, - e.g.: { "success": "green", "failure": "red" } - :return: CSS string to apply as a style, e.g., "background-color: - green" - """ - color = status_color_mapping.get(val, "grey") - style = f"background-color: {color}" - return style - - -def render_repo_workflow_status_table( - workflow_df: "pd.DataFrame", # noqa: F821 - status_color_mapping: Dict[str, str], - timezone: str = "America/New_York", -) -> None: - """ - Render a dashboard summary of workflow statuses grouped by repo. - - :param workflow_df: data with columns ["repo_name", "workflow_name", - "conclusion", "url"] - :param status_color_mapping: color for outcomes {"success": "green", - "failure": "red"} - :param timezone: timezone for timestamp display - """ - import pandas as pd - from IPython.display import Markdown, display - - workflow_df["url"] = workflow_df["url"].apply(make_clickable) - repos = workflow_df["repo_name"].unique() - display(Markdown("## Overall Status")) - current_timestamp = pd.Timestamp.now(tz=timezone) - display(Markdown(f"**Last run: {current_timestamp}**")) - for repo in repos: - repo_df = workflow_df[workflow_df["repo_name"] == repo] - overall_status = gh_get_overall_build_status_for_repo(repo_df) - display(Markdown(f"## {repo}: {overall_status}")) - repo_df = repo_df.drop(columns=["repo_name"]) - display( - repo_df.style.map( - color_format, - status_color_mapping=status_color_mapping, - subset=["conclusion"], - ) - ) - - -def get_workflow_run_ids( - repo_path: str, workflow_id: str, *, older_than_days: Optional[int] = None -) -> List[str]: - """ - Get workflow run IDs, optionally filtering by age. - - :param repo_path: repository path in format "org/repo" - :param workflow_id: GitHub workflow ID - :param older_than_days: if specified, only return runs older than - this many days - :return: list of run IDs - """ - # See GitHub CLI API documentation: https://cli.github.com/manual/gh_api - # We use the -q/--jq option to filter results using jq syntax. - if older_than_days is not None: - # Use jq to filter runs by age directly in the gh api command. - # jq date filtering breakdown: - # - `fromdateiso8601` converts ISO 8601 date to Unix timestamp (seconds since epoch) - # - `now` returns current Unix timestamp - # - Days are converted to seconds (days * 86400 seconds/day) - # - Example: if older_than_days=30, cutoff = now - (30 * 86400) - # Only runs where created_at timestamp < cutoff are selected - cutoff_seconds = older_than_days * 86400 - # Log the cutoff date for debugging. - cutoff_date = datetime.datetime.now( - datetime.timezone.utc - ) - datetime.timedelta(days=older_than_days) - _LOG.debug("Filtering runs created before: %s", cutoff_date.isoformat()) - jq_filter = ( - f".workflow_runs[] | " - f"select((.created_at | fromdateiso8601) < (now - {cutoff_seconds})) | " - f".id" - ) - # WARNING: Using --paginate to fetch all workflow runs can be slow - # for workflows with a large number of runs (e.g., 1000+ runs). - # The GitHub API paginates results, and jq filters each page. - cmd = ( - f"gh api /repos/{repo_path}/actions/workflows/{workflow_id}/runs " - f"--paginate -q '{jq_filter}'" - ) - else: - # Get all run IDs without date filtering. - # Example API output (one ID per line): - # 11758293857 - # 11758293856 - # 11758293855 - cmd = ( - f"gh api /repos/{repo_path}/actions/workflows/{workflow_id}/runs " - "--paginate -q '.workflow_runs[].id'" - ) - # Execute command and parse output. - _, run_ids_output = hsystem.system_to_string(cmd) - run_ids = [ - run_id.strip() - for run_id in run_ids_output.strip().split("\n") - if run_id.strip() - ] - return run_ids - - -@task -def gh_delete_workflow_runs( # type: ignore - ctx, workflow_name, older_than_days=None, dry_run=False, confirmation=True -): - """ - Delete all workflow runs for a given workflow. - - :param workflow_name: name of the workflow to delete runs for - :param older_than_days: only delete runs older than this many days - (optional). If None, delete all runs. Example: - older_than_days=30 deletes runs created more than 30 days ago - :param dry_run: if True, show what would be deleted without actually - deleting - :param confirmation: if True, prompt user for confirmation before - deletion (default: True) - """ - hlitauti.report_task( - txt=hprint.to_str("workflow_name older_than_days dry_run confirmation") - ) - # Convert older_than_days to int if provided (invoke passes strings). - if older_than_days is not None: - older_than_days = int(older_than_days) - hdbg.dassert_lte(1, older_than_days) - # Login. - gh_login(ctx) - # - repo_full_name_with_host, _ = _get_repo_full_name_from_cmd("current") - # Get workflow ID by name. - repo_path = repo_full_name_with_host.replace("github.com/", "") - workflows = gh_get_workflows(repo_path) - workflow_id = None - for workflow in workflows: - if workflow["name"] == workflow_name: - workflow_id = workflow["id"] - break - if not workflow_id: - available_workflows = [w["name"] for w in workflows] - raise ValueError( - f"Workflow '{workflow_name}' not found. " - f"Available workflows: {available_workflows}" - ) - _LOG.info("Found workflow '%s' with ID: %s", workflow_name, workflow_id) - # Get all run IDs for this workflow, optionally filtering by date. - run_ids = get_workflow_run_ids( - repo_path, workflow_id, older_than_days=older_than_days - ) - # Check if any runs were found. - age_filter_msg = ( - f" older than {older_than_days} days" - if older_than_days is not None - else "" - ) - if not run_ids: - _LOG.info( - "No workflow runs%s found for '%s'", age_filter_msg, workflow_name - ) - return - _LOG.info("Found %d workflow runs%s to delete", len(run_ids), age_filter_msg) - # Prompt for confirmation if required. - if confirmation and not dry_run: - confirmation_msg = ( - f"\nAre you sure you want to delete {len(run_ids)} workflow run(s)" - f"{age_filter_msg} for '{workflow_name}'?\n" - f"Repository: {repo_full_name_with_host}\n" - f"Type 'yes' or 'y' to confirm: " - ) - user_input = input(confirmation_msg).strip().lower() - if user_input not in ("yes", "y"): - _LOG.info("Deletion cancelled by user") - return - _LOG.info("User confirmed deletion, proceeding...") - # Delete each run. - deleted_count = 0 - failed_count = 0 - for run_id in run_ids: - try: - cmd = f"gh api -X DELETE /repos/{repo_path}/actions/runs/{run_id}" - _LOG.info("Deleting run %s", run_id) - hlitauti.run(ctx, cmd, dry_run=dry_run) - deleted_count += 1 - except (invexc.UnexpectedExit, RuntimeError) as e: - _LOG.error("Failed to delete run %s: %s", run_id, str(e)) - failed_count += 1 - _LOG.info( - "Deletion complete: %d successful, %d failed out of %d total runs", - deleted_count, - failed_count, - len(run_ids), - ) - - -# ############################################################################# - -# def gh_get_pr_title(pr_url: str) -> str: -# > gh pr view https://github.com/causify-ai/helpers/pull/754 --json title -q .title -# HelpersTask705_Extend_coverage_in_pytest_to_cover_when_we_run_through_system diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py deleted file mode 100644 index b7a92d78c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_git.py +++ /dev/null @@ -1,1502 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_git as hlitagit -""" - -import logging -import os -import re -import stat -import subprocess -import time -from typing import Any, List - -from invoke import task - -import helpers.hdbg as hdbg -import helpers.hsystem as hsystem - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.lib_tasks_gh as hlitagh -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# Bits matching `chmod a+w` / `chmod a-w` on the symlink inode (not the target). -_SYMLINK_WRITE_BITS = stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH - - -def _collect_symlinks(dir: str) -> List[str]: - """ - Collect symlink paths under a given directory. - - :param dir: directory to walk - :return: symlink paths under `dir` - """ - out: List[str] = [] - for dirpath, dirnames, filenames in os.walk(dir, topdown=True): - # Skips `.git` directories. Does not follow symlinked directories. - if ".git" in dirnames: - dirnames.remove(".git") - for name in filenames: - path = os.path.join(dirpath, name) - if os.path.islink(path): - out.append(path) - for name in dirnames: - path = os.path.join(dirpath, name) - if os.path.islink(path): - out.append(path) - return out - - -def _add_write_perm_to_symlink(dir: str) -> None: - """ - Add write permission for all on each symlink under the given directory. - - :param dir: directory to walk - """ - _LOG.info("Adding write permission for all on each symlink under %s", dir) - for path in _collect_symlinks(dir): - try: - mode = os.lstat(path).st_mode - os.chmod( - path, - mode | _SYMLINK_WRITE_BITS, - ) - except OSError: - hdbg.dassert( - False, - "Failed to add write permissions to symlink; manual intervention may be needed", - ) - - -def _remove_write_perm_from_symlink(dir: str) -> None: - """ - Remove write permission for all on each symlink under a given directory. - - :param dir: directory to walk - """ - _LOG.info("Removing write permission for all on each symlink under %s", dir) - for path in _collect_symlinks(dir): - if not os.path.exists(path): - _LOG.warning("Skipping broken symlink: %s", path) - continue - try: - mode = os.lstat(path).st_mode - os.chmod( - path, - mode & ~_SYMLINK_WRITE_BITS, - ) - except OSError: - hdbg.dassert( - False, - "Failed to remove write permissions from symlink; manual intervention may be needed", - ) - - -def run_git_recursively(ctx: Any, cmd_: str) -> None: - """ - Execute a git command in the main repository and all submodules. - - :param ctx: Invoke context - :param cmd_: Git command to execute - """ - cmd = cmd_ - hlitauti.run(ctx, cmd) - # Run the same command on all submodules. - cmd = f"git submodule foreach '{cmd_}'" - hlitauti.run(ctx, cmd) - - -@task -def git_pull(ctx): # type: ignore - """ - Pull latest changes from remote for main repo and all submodules. - - Temporarily enables write permissions on symlinks to allow pull operations. - """ - hlitauti.report_task() - # Temporarily grant write access to symlinks needed for pulling. - root_dir = hgit.get_client_root(super_module=False) - _add_write_perm_to_symlink(root_dir) - try: - # Pull with autostash to preserve local changes during pull. - cmd = "git pull --autostash" - run_git_recursively(ctx, cmd) - finally: - # Restore restricted permissions on symlinks after pull completes. - _remove_write_perm_from_symlink(root_dir) - - -@task -def git_fetch_master(ctx): # type: ignore - """ - Fetch master branch from remote without switching to it. - - Updates the local master branch to track the latest remote master without - affecting the current branch. - """ - hlitauti.report_task() - # Fetch remote master directly into local master ref (colon syntax). - cmd = "git fetch origin master:master" - run_git_recursively(ctx, cmd) - - -@task -def git_merge_master( - ctx, - abort_if_not_ff=False, - abort_if_not_clean=True, - skip_fetch=False, - auto_merge=False, # type: ignore -): - """ - Merge `origin/master` into the current branch. - - :param abort_if_not_ff: abort if fast-forward is not possible - :param abort_if_not_clean: abort if the client is not clean - :param skip_fetch: skip fetching master - :param auto_merge: automatically commit and push if merge is - successful - """ - hlitauti.report_task() - # Verify working directory is clean before merging to avoid losing changes. - hgit.is_client_clean(dir_name=".", abort_if_not_clean=abort_if_not_clean) - # Fetch latest master from remote to ensure we merge the latest changes. - if not skip_fetch: - git_fetch_master(ctx) - # Perform merge, optionally restricting to fast-forward only to maintain linear history. - cmd = "git merge master" - if abort_if_not_ff: - cmd += " --ff-only" - hlitauti.run(ctx, cmd) - # Commit and push automatically if merge succeeded and user requested it. - if auto_merge: - _LOG.info("Auto-merge enabled: committing and pushing changes") - cmd = 'git commit -am "Merge master" && git push' - hlitauti.run(ctx, cmd) - - -@task -def git_clean(ctx, fix_perms_=False, dry_run=False): # type: ignore - """ - Clean the repo_short_name and its submodules from artifacts. - - Run `git status --ignored` to see what it's skipped. - """ - hlitauti.report_task(txt=hprint.to_str("dry_run")) - - def _run_all_repos(cmd: str) -> None: - # Use `run(ctx, cmd)` instead of `hsystem.system()` so unit tests can easily mock context. - hlitauti.run(ctx, cmd) - # Also clean submodules to ensure they're included in cleanup. - cmd = f"git submodule foreach '{cmd}'" - hlitauti.run(ctx, cmd) - - # Remove untracked files and directories from main repo and submodules. - git_clean_cmd = "git clean -fd" - if dry_run: - git_clean_cmd += " --dry-run" - # Suppress errors since git clean reports non-fatal warnings. - git_clean_cmd += " >/dev/null 2>&1" - _run_all_repos(git_clean_cmd) - # TODO(*): Add "are you sure?" or a `--force switch` to avoid to cancel by - # mistake. - # Fix permissions on symlinks if requested, then clean any temporary files created. - if fix_perms_: - cmd = "invoke fix_perms" - hlitauti.run(ctx, cmd) - # Remove temporary files that may have been created during permission fix. - _run_all_repos(git_clean_cmd) - # Remove common build artifacts and cache directories. - to_delete = [ - r"*\.pyc", - r"*\.pyo", - r".coverage", - r".DS_Store", - r".ipynb_checkpoints", - r".mypy_cache", - r".pytest_cache", - r".ruff_cache", - r".venv", - r"__pycache__", - r"cfile", - r"tmp.*", - r"*.tmp", - r".*_cache", - "htmlcov", - ] - opts = [f"-name '{opt}'" for opt in to_delete] - opts = " -o ".join(opts) - cmd = f"find . {opts} | sort" - if not dry_run: - cmd += " | xargs rm -rf" - hlitauti.run(ctx, cmd) - - -@task -def git_add_all_untracked(ctx): # type: ignore - """ - Add all untracked files to Git. - """ - hlitauti.report_task() - # cmd = "git add $(git ls-files -o --exclude-standard)" - cmd = "git ls-files -o --exclude-standard -z | xargs -0 git add" - hlitauti.run(ctx, cmd) - - -@task -def git_patch_create( # type: ignore - ctx, mode="diff", modified=False, branch=False, last_commit=False, files="" -): - """ - Create a patch file for the entire repo_short_name client from the base - revision. This script accepts a list of files to package, if specified. - - The parameters `modified`, `branch`, `last_commit` have the same meaning as - in `_get_files_to_process()`. - - :param mode: what kind of patch to create - - "diff": (default) creates a patch with the diff of the files - - "tar": creates a tar ball with all the files - """ - hlitauti.report_task( - txt=hprint.to_str("mode modified branch last_commit files") - ) - _ = ctx - # TODO(gp): Check that the current branch is up to date with master to avoid - # failures when we try to merge the patch. - hdbg.dassert_in( - mode, - ("tar", "diff"), - "Patch mode must be either 'tar' for archives or 'diff' for patches", - ) - # Currently only handles the current submodule (not parent repos). - # TODO(gp): Extend this to handle also nested repos. - super_module = False - git_client_root = hgit.get_client_root(super_module) - hash_ = hgit.get_head_hash(git_client_root, short_hash=True) - # Use timestamp and hash to ensure unique patch filenames across time. - timestamp = hlitauti.get_ET_timestamp() - tag = os.path.basename(git_client_root) - dst_file = f"patch.{tag}.{hash_}.{timestamp}" - if mode == "tar": - dst_file += ".tgz" - elif mode == "diff": - dst_file += ".patch" - else: - hdbg.dfatal("Invalid code path") - _LOG.debug("dst_file=%s", dst_file) - # Show what changes will be included in the patch. - _LOG.info( - "Difference between HEAD and master:\n%s", - hgit.get_summary_files_in_branch("master", dir_name="."), - ) - # Determine which files to include in the patch. - all_ = False - # Allow optional user-specified file subset (can be combined with other selectors). - mutually_exclusive = False - # Filter out directories; patches only work with files. - remove_dirs = True - files_as_list = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files, - mutually_exclusive, - remove_dirs, - ) - _LOG.info("Files to save:\n%s", hprint.indent("\n".join(files_as_list))) - if not files_as_list: - _LOG.warning("Nothing to patch: exiting") - return - files_as_str = " ".join(files_as_list) - # Choose command based on patch format: archive vs diff. - cmd = "" - if mode == "tar": - # Create compressed tar archive of the selected files. - cmd = f"tar czvf {dst_file} {files_as_str}" - cmd_inv = "tar xvzf" - elif mode == "diff": - # Generate diff against various targets for different merge strategies. - opts: str - if modified: - # Only uncommitted changes in working tree. - opts = "HEAD" - elif branch: - # All changes since branch point (includes commits on current branch). - opts = "master..." - elif last_commit: - # Only changes in the most recent commit. - opts = "HEAD^" - else: - raise ValueError( - "You need to specify one among -modified, --branch, " - "--last-commit" - ) - cmd = f"git diff {opts} --binary {files_as_str} >{dst_file}" - cmd_inv = "git apply" - else: - raise ValueError(f"Invalid cmd='{cmd}'") - # Execute the patch creation command. - _LOG.info("Creating the patch into %s", dst_file) - hdbg.dassert_ne( - cmd, - "", - "Patch creation command must not be empty", - ) - _LOG.debug("cmd=%s", cmd) - rc = hsystem.system(cmd, abort_on_error=False) - if not rc: - _LOG.warning("Command failed with rc=%d", rc) - # Provide instructions for applying the patch on different environments. - remote_file = os.path.basename(dst_file) - abs_path_dst_file = os.path.abspath(dst_file) - msg = f""" - # To apply the patch and execute: - > git checkout {hash_} - > {cmd_inv} {abs_path_dst_file} - - # To apply the patch to a remote client: - > export SERVER="server" - > export CLIENT_PATH="~/src" - > scp {dst_file} $SERVER: - > ssh $SERVER 'cd $CLIENT_PATH && {cmd_inv} ~/{remote_file}'" - """ - msg = hprint.dedent(msg) - print(msg) - - -def _filter_git_files_by_type( - file_paths: List[str], - keep_python: bool, - keep_jupyter: bool, - keep_markdown: bool, -) -> List[str]: - """ - Filter files by type for git_files task. - - Unlike linters2 version, this returns a flat list (not a tuple) - and does not separate paired jupytext files. - - :param file_paths: files to filter - :param keep_python: include Python files - :param keep_jupyter: include Jupyter notebooks - :param keep_markdown: include Markdown files - :return: filtered list of files - """ - filtered = [] - for f in file_paths: - is_py = f.endswith(".py") - is_ipynb = f.endswith(".ipynb") - is_md = f.endswith(".md") - if ( - (is_py and keep_python) - or (is_ipynb and keep_jupyter) - or (is_md and keep_markdown) - ): - filtered.append(f) - return filtered - - -@task -def git_files( # type: ignore - ctx, - modified=False, - branch=False, - last_commit=False, - keep_python=True, - keep_jupyter=True, - keep_markdown=True, - pbcopy=False, - only_print_files=False, -): - """ - Report which files are changed in the current branch with respect to master. - - The params have the same meaning as in `_get_files_to_process()`. - - :param keep_python: include Python files (default: True) - :param keep_jupyter: include Jupyter notebooks (default: True) - :param keep_markdown: include Markdown files (default: True) - :param only_print_files: only print files without logging headers/footers (default: False) - """ - if not only_print_files: - hlitauti.report_task() - _ = ctx - all_ = False - files = "" - # Use mutually_exclusive=True to enforce exactly one filter mode. - mutually_exclusive = True - remove_dirs = True - files_as_list = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files, - mutually_exclusive, - remove_dirs, - ) - # Filter by file type. - files_as_list = _filter_git_files_by_type( - files_as_list, keep_python, keep_jupyter, keep_markdown - ) - print("\n".join(sorted(files_as_list))) - # Optionally copy the file list to clipboard for easy pasting. - if not only_print_files: - res = " ".join(files_as_list) - hsystem.to_pbcopy(res, pbcopy) - - -@task -def git_last_commit_files(ctx, pbcopy=True): # type: ignore - """ - Print the status of the files in the previous commit. - - :param pbcopy: save the result into the system clipboard (only on - macOS) - """ - # Display the raw git log output for the latest commit. - cmd = 'git log -1 --name-status --pretty=""' - hlitauti.run(ctx, cmd) - # Parse the files that were actually committed (filtering out deletions if needed). - files = hgit.get_previous_committed_files(".") - txt = "\n".join(files) - print(f"\n# The files modified are:\n{txt}") - # Optionally copy the file list to clipboard for easy pasting into commands. - res = " ".join(files) - hsystem.to_pbcopy(res, pbcopy) - - -@task -def git_roll_amp_forward(ctx): # type: ignore - """ - Update amp submodule pointer to the latest master commit. - - Checks out master in amp, pulls latest changes, updates the parent repo's - submodule pointer, and commits the change. - """ - hlitauti.report_task() - AMP_DIR = "amp" - if os.path.exists(AMP_DIR): - # Update amp submodule to point to the latest master. - cmds = [ - f"cd {AMP_DIR} && git checkout master", - f"cd {AMP_DIR} && git pull", - # Stage the submodule pointer change in the parent repository. - f"git add {AMP_DIR}", - f"git commit -m 'Roll {AMP_DIR} pointer forward'", - "git push", - ] - for cmd in cmds: - hlitauti.run(ctx, cmd) - else: - _LOG.warning("%s does not exist, aborting", AMP_DIR) - - -# TODO(gp): Add git_co(ctx) -# Reuse hgit.git_stash_push() and hgit.stash_apply() -# git stash save your-file-name -# git checkout master -# # do whatever you had to do with master -# git checkout staging -# git stash pop - - -# ############################################################################# -# Branches workflows -# ############################################################################# - - -# TODO(gp): Consider renaming the commands as `git_branch_*` - - -@task -def git_branch_files(ctx): # type: ignore - """ - Report which files were added, changed, and modified in the current branch - with respect to master. - - This is a more detailed version of `invoke git_files --branch`, showing file - statuses (added, modified, deleted) rather than just the file list. - """ - hlitauti.report_task() - _ = ctx - # Display the detailed summary of changes made on this branch. - print( - "Difference between HEAD and master:\n" - + hgit.get_summary_files_in_branch("master", dir_name=".") - ) - - -@task -def git_branch_create( # type: ignore - ctx, - branch_name="", - issue_id=0, - repo_short_name="current", - suffix="", - only_branch_from_master=True, - check_branch_name=True, -): - """ - Create and push upstream branch `branch_name` or the one corresponding to - `issue_id` in repo_short_name `repo_short_name`. - - E.g., - ``` - > git checkout -b LemTask169_Get_GH_actions - > git push --set- upstream origin LemTask169_Get_GH_actions - ``` - - :param branch_name: name of the branch to create (e.g., - `LemTask169_Get_GH_actions`) - :param issue_id: use the canonical name for the branch corresponding to that - issue - :param repo_short_name: name of the GitHub repo_short_name that the `issue_id` - belongs to - - "current" (default): the current repo_short_name - - short name (e.g., "amp", "lm") of the branch - :param suffix: suffix (e.g., "02") to add to the branch name when using issue_id - :param only_branch_from_master: only allow to branch from master - :param check_branch_name: make sure the name of the branch is valid like - `{Amp,...}TaskXYZ_...` - """ - hlitauti.report_task() - if issue_id > 0: - # Convert GitHub issue ID to branch name. - hdbg.dassert_eq( - branch_name, - "", - "Cannot specify both --issue and --branch-name; choose one", - ) - title, _ = hlitagh._get_gh_issue_title(issue_id, repo_short_name) - branch_name = title - _LOG.info( - "Issue %d in %s repo_short_name corresponds to '%s'", - issue_id, - repo_short_name, - branch_name, - ) - if suffix != "": - # Add the suffix. - _LOG.debug("Adding suffix '%s' to '%s'", suffix, branch_name) - if suffix[0] in ("-", "_"): - _LOG.warning( - "Suffix '%s' should not start with '%s': removing", - suffix, - suffix[0], - ) - suffix = suffix.rstrip("-_") - branch_name += "_" + suffix - _LOG.info("branch_name='%s'", branch_name) - hdbg.dassert_ne( - branch_name, - "", - "Branch name cannot be empty", - ) - if check_branch_name: - # Reject numeric-only branch names to avoid confusion with commit SHAs. - m = re.match(r"^\d+$", branch_name) - hdbg.dassert( - not m, - "Branch names with only numbers are invalid", - ) - # Enforce naming convention `{RepoPrefix}TaskXYZ_Description` for consistency. - # The valid format of a branch name is `AmpTask1903_Implemented_system_...`. - m = re.match(r"^\S+Task\d+_\S+$", branch_name) - hdbg.dassert( - m, - "Branch name must follow convention: '{RepoPrefix,Amp,...}TaskXYZ_...'", - ) - # Prevent accidental duplicate branches. - hdbg.dassert( - not hgit.does_branch_exist(branch_name, mode="all"), - "Branch '%s' already exists", - branch_name, - ) - # Make sure we are branching from `master`, unless that's what the user wants. - # TODO(Vlad): Remove before merging - temporarily allowing branching from non-master. - curr_branch = hgit.get_branch_name() - if curr_branch != "master": - if only_branch_from_master: - _LOG.warning( - f"Branching from '{curr_branch}' instead of 'master'. " - "This is temporarily allowed but should be reviewed before merging." - ) - # hdbg.dfatal( - # f"You should branch from master and not from '{curr_branch}'" - # ) - # Fetch master. - cmd = "git pull --autostash --rebase" - hlitauti.run(ctx, cmd) - # git checkout -b LmTask169_Get_GH_actions_working_on_lm - cmd = f"git checkout -b {branch_name}" - hlitauti.run(ctx, cmd) - cmd = f"git push --set-upstream origin {branch_name}" - hlitauti.run(ctx, cmd) - - -# TODO(gp): @all Move to hgit. -def _delete_branches(ctx: Any, tag: str, confirm_delete: bool) -> None: - """ - Delete branches that have been merged into master. - - :param ctx: Invoke context - :param tag: Either "local" for local branches or "remote" for remote branches - :param confirm_delete: If True, ask user for confirmation before deleting - """ - if tag == "local": - # Delete local branches that are already merged into master. - # > git branch --merged - # * AmpTask1251_Update_GH_actions_for_amp_02 - find_cmd = r"git branch --merged master | grep -v master | grep -v \*" - delete_cmd = "git branch -d" - elif tag == "remote": - # Get the branches to delete. - find_cmd = ( - "git branch -r --merged origin/master" - + r" | grep -v master | sed 's/origin\///'" - ) - delete_cmd = "git push origin --delete" - else: - raise ValueError(f"Invalid tag='{tag}'") - # TODO(gp): Use system_to_lines - _, txt = hsystem.system_to_string(find_cmd, abort_on_error=False) - branches = hsystem.text_to_list(txt) - # Print info. - _LOG.info( - "There are %d %s branches to delete:\n%s", - len(branches), - tag, - "\n".join(branches), - ) - if not branches: - # No branch to delete, then we are done. - return - # Ask whether to continue. - if confirm_delete: - hsystem.query_yes_no( - hdbg.WARNING + f": Delete these {tag} branches?", abort_on_no=True - ) - for branch in branches: - cmd_tmp = f"{delete_cmd} {branch}" - hlitauti.run(ctx, cmd_tmp) - - -@task -def git_branch_delete_merged(ctx, confirm_delete=True): # type: ignore - """ - Remove (both local and remote) branches that have been merged into master. - """ - hlitauti.report_task() - # Ensure user is on master since we're deleting branches merged into master. - hdbg.dassert_eq( - hgit.get_branch_name(), - "master", - "Must be on master branch to safely delete merged branches", - ) - # - cmd = "git fetch --all --prune" - hlitauti.run(ctx, cmd) - # Delete local and remote branches that are already merged into master. - _delete_branches(ctx, "local", confirm_delete) - _delete_branches(ctx, "remote", confirm_delete) - # - cmd = "git fetch --all --prune" - hlitauti.run(ctx, cmd) - - -@task -def git_branch_rename(ctx, new_branch_name): # type: ignore - """ - Rename current branch both locally and remotely. - """ - hlitauti.report_task() - old_branch_name = hgit.get_branch_name(".") - # Ensure new branch name is actually different to avoid no-op rename. - hdbg.dassert_ne( - old_branch_name, - new_branch_name, - "New branch name must be different from current branch name", - ) - msg = ( - f"Do you want to rename the current branch '{old_branch_name}' to " - f"'{new_branch_name}'" - ) - hsystem.query_yes_no(msg, abort_on_no=True) - # https://stackoverflow.com/questions/30590083 - # Rename the local branch to the new name. - # > git branch -m - cmd = f"git branch -m {new_branch_name}" - hlitauti.run(ctx, cmd) - # Delete the old branch on remote. - # > git push --delete - cmd = f"git push origin --delete {old_branch_name}" - hlitauti.run(ctx, cmd) - # Prevent Git from using the old name when pushing in the next step. - # Otherwise, Git will use the old upstream name instead of . - # > git branch --unset-upstream - cmd = f"git branch --unset-upstream {new_branch_name}" - hlitauti.run(ctx, cmd) - # Push the new branch to remote. - # > git push - cmd = f"git push origin {new_branch_name}" - hlitauti.run(ctx, cmd) - # Reset the upstream branch for the new_name local branch. - # > git push -u - cmd = f"git push origin u {new_branch_name}" - hlitauti.run(ctx, cmd) - print("Done") - - -@task -def git_branch_next_name(ctx, branch_name=None, method="auto"): # type: ignore - """ - Return a name derived from the current branch so that the branch doesn't - exist. - - :param branch_name: if `None` use the current branch name, otherwise specify it - :param method: method to use ('auto', 'github_api', 'linear_scan') - - 'auto' (default): tries GitHub API first, falls back to linear scan - - 'github_api': use only GitHub API method (fast) - - 'linear_scan': use only linear scan method (always works) - - E.g., `AmpTask1903_Implemented_system_Portfolio` -> - `AmpTask1903_Implemented_system_Portfolio_3` - """ - hlitauti.report_task() - _ = ctx - branch_next_name = hgit.get_branch_next_name( - curr_branch_name=branch_name, method=method, log_verb=logging.INFO - ) - print(f"branch_next_name='{branch_next_name}'") - - -@task -def git_branch_copy( # type: ignore - ctx, - new_branch_name="", - skip_git_merge_master=False, - use_patch=False, - check_branch_name=True, -): - """ - Create a new branch with the same content of the current branch. - - :param new_branch_name: name for the new branch - :param skip_git_merge_master: skip merging master into current branch - :param use_patch: apply patching instead of merging - :param check_branch_name: enforce branch naming convention like - `{Amp,...}TaskXYZ_...` - """ - # Patch-based copying is not yet implemented. - hdbg.dassert( - not use_patch, - "Patch-based branch copying is not yet implemented", - ) - # Remove untracked files to ensure clean state when copying branch. - cmd = "git clean -fd" - hlitauti.run(ctx, cmd) - curr_branch_name = hgit.get_branch_name() - # Cannot copy master branch since it would be copying the source to itself. - hdbg.dassert_ne( - curr_branch_name, - "master", - "Cannot copy master branch", - ) - # Sync with master first to ensure new branch includes latest changes (if requested). - if not skip_git_merge_master: - cmd = "invoke git_merge_master --abort-if-not-ff" - hlitauti.run(ctx, cmd) - else: - _LOG.warning("Skipping git_merge_master as requested") - if use_patch: - # TODO(gp): Create a patch or do a `git merge`. - pass - # Generate unique branch name if not provided. - if new_branch_name is None or new_branch_name == "": - new_branch_name = hgit.get_branch_next_name() - _LOG.info("new_branch_name='%s'", new_branch_name) - hdbg.dassert_ne( - new_branch_name, - None, - "Branch name must not be None after generation", - ) - # Allow scratch branches to bypass naming convention. - if new_branch_name.startswith("gp_scratch"): - check_branch_name = False - # Create or checkout the target branch. - mode = "all" - new_branch_exists = hgit.does_branch_exist(new_branch_name, mode) - if new_branch_exists: - # Switch to existing branch to copy changes into it. - cmd = f"git checkout {new_branch_name}" - else: - # Create new branch from master as base. - cmd = f"git checkout master && invoke git_branch_create --branch-name '{new_branch_name}'" - if not check_branch_name: - cmd += " --no-check-branch-name" - hlitauti.run(ctx, cmd) - if use_patch: - # TODO(gp): Apply the patch. - pass - # Squash merge copies all commits as a single change without creating a merge commit. - cmd = f"git merge --squash --ff {curr_branch_name} && git reset HEAD" - hlitauti.run(ctx, cmd) - - -# /////////////////////////////////////////////////////////////////////////////// - - -def _git_diff_with_branch( - ctx: Any, - hash_: str, - tag: str, - # - dir_name: str, - subdir: str, - # - diff_type: str, - keep_extensions: str, - skip_extensions: str, - file_name: str, - # - only_print_files: bool, - dry_run: bool, -) -> None: - """ - Diff files from this client against files in a branch using vimdiff. - - Same parameters as `git_branch_diff_with`. - """ - _LOG.debug( - hprint.to_str( - "hash_ tag dir_name diff_type subdir keep_extensions skip_extensions" - " file_name only_print_files dry_run" - ) - ) - # Diff only works on non-master branches to avoid comparing with itself. - curr_branch_name = hgit.get_branch_name() - hdbg.dassert_ne( - curr_branch_name, - "master", - "Cannot diff master branch against itself", - ) - # Retrieve the list of changed files between current state and the given hash. - cmd = [] - cmd.append("git diff") - if diff_type: - cmd.append(f"--diff-filter={diff_type}") - cmd.append(f"--name-only HEAD {hash_}") - cmd = " ".join(cmd) - files = hsystem.system_to_files( - cmd, dir_name, remove_files_non_present=False - ) - files = sorted(files) - _LOG.debug("%s", "\n".join(files)) - # Filter to a single specific file if requested. - if file_name: - _LOG.debug("Filter by file_name") - _LOG.info("Before filtering files=%s", len(files)) - files_tmp = [] - for f in files: - if f == file_name: - files_tmp.append(f) - hdbg.dassert_eq( - 1, - len(files_tmp), - "Can't find file_name='%s' in\n%s", - file_name, - "\n".join(files), - ) - files = files_tmp - _LOG.info("After filtering by file_name: files=%s", len(files)) - _LOG.debug("%s", "\n".join(files)) - # Keep only files with specified extensions (useful for focusing on code vs docs). - if keep_extensions: - _LOG.debug("# Filter by keep_extensions") - _LOG.debug("Before filtering files=%s", len(files)) - extensions_lst = keep_extensions.split(",") - _LOG.warning( - "Keeping files with %d extensions: %s", - len(extensions_lst), - extensions_lst, - ) - files_tmp = [] - for f in files: - if any(f.endswith(ext) for ext in extensions_lst): - files_tmp.append(f) - files = files_tmp - _LOG.info("After filtering by keep_extensions: files=%s", len(files)) - _LOG.debug("%s", "\n".join(files)) - # Exclude files with specified extensions (useful for skipping config or build files). - if skip_extensions: - _LOG.debug("# Filter by skip_extensions") - _LOG.debug("Before filtering files=%s", len(files)) - extensions_lst = skip_extensions.split(",") - _LOG.warning( - "Skipping files with %d extensions: %s", - len(extensions_lst), - extensions_lst, - ) - files_tmp = [] - for f in files: - if not any(f.endswith(ext) for ext in extensions_lst): - files_tmp.append(f) - files = files_tmp - _LOG.info("After filtering by skip_extensions: files=%s", len(files)) - _LOG.debug("%s", "\n".join(files)) - # Limit diff to files within a specific subdirectory. - if subdir != "": - _LOG.debug("# Filter by subdir") - _LOG.debug("Before filtering files=%s", len(files)) - files_tmp = [] - for f in files: - if f.startswith(subdir): - files_tmp.append(f) - files = files_tmp - _LOG.info("After filtering by subdir: files=%s", len(files)) - _LOG.debug("%s", "\n".join(files)) - # Summary of what will be diffed. - _LOG.info("\n" + hprint.frame(f"# files={len(files)}")) - _LOG.info("\n" + "\n".join(files)) - if len(files) == 0: - _LOG.warning("No files match the filter criteria: exiting") - return - if only_print_files: - _LOG.warning("Exiting as per user request with --only-print-files") - return - # Create temporary directory to store base versions for comparison. - root_dir = hgit.get_repo_full_name_from_client(super_module=True) - # TODO(gp): We should get a temp dir. - dst_dir = f"/tmp/{root_dir}/tmp.{tag}" - hio.create_dir(dst_dir, incremental=False) - # Build vimdiff commands for each file, retrieving base version from source hash. - script_txt = [] - for branch_file in files: - _LOG.debug("\n%s", hprint.frame(f"branch_file={branch_file}")) - # Use current file as right side (what the branch currently has). - if os.path.exists(branch_file): - right_file = branch_file - else: - # For deleted files, use /dev/null as the right side. - right_file = "/dev/null" - # Flatten directory structure to avoid naming conflicts in temp directory. - tmp_file = branch_file - tmp_file = tmp_file.replace("/", "_") - tmp_file = os.path.join(dst_dir, tmp_file) - _LOG.debug( - "Extracting base version of %s to %s", - branch_file, - tmp_file, - ) - # Extract the base version from the specified hash/branch. - cmd = f"git show {hash_}:{branch_file} >{tmp_file}" - rc = hsystem.system(cmd, abort_on_error=False) - if rc != 0: - # File is new in the branch (didn't exist in base hash). - _LOG.debug("File '%s' is new (doesn't exist in base)", branch_file) - left_file = "/dev/null" - else: - left_file = tmp_file - # Generate vimdiff command to compare base and current versions. - cmd = f"vimdiff {left_file} {right_file}" - _LOG.debug("-> %s", cmd) - script_txt.append(cmd) - script_txt = "\n".join(script_txt) - # Display the diff commands that will be executed. - _LOG.info("\n%s" % hprint.frame("Diffing script")) - _LOG.info(script_txt) - # Create executable script for easy manual re-running. - script_file_name = f"./tmp.vimdiff_branch_with_{tag}.sh" - msg = f"To diff against {tag} run" - hio.create_executable_script(script_file_name, script_txt, msg=msg) - hlitauti.run(ctx, script_file_name, dry_run=dry_run, pty=True) - # Clean up temporary files. - cmd = f"rm -rf {dst_dir}" - hlitauti.run(ctx, cmd, dry_run=dry_run) - - -def _git_diff_with_branch_wrapper( - ctx: Any, - hash_: str, - tag: str, - # - dir_name: str, - subdir: str, - include_submodules: bool, - # - diff_type: str, - keep_extensions: str, - skip_extensions: str, - python: bool, - file_name: str, - # - only_print_files: bool, - dry_run: bool, -) -> None: - """ - Wrapper for _git_diff_with_branch that handles Python-specific filtering and submodules. - - Applies Python-specific extension filter if requested, then delegates to _git_diff_with_branch. - If include_submodules is True, also runs the diff for the amp submodule if present. - - Parameters are the same as _git_diff_with_branch with the addition of: - :param include_submodules: if True, also diff the amp submodule - :param python: if True, only diff Python files (overrides extension filters) - """ - hdbg.dassert_eq(dir_name, ".") - # If Python mode is enabled, override all extension filters to only diff Python files. - if python: - hdbg.dassert_eq( - diff_type, - "", - "Cannot specify diff_type with python mode", - ) - hdbg.dassert_eq( - keep_extensions, - "", - "Cannot specify keep_extensions with python mode", - ) - hdbg.dassert_eq( - skip_extensions, - "", - "Cannot specify skip_extensions with python mode", - ) - hdbg.dassert_eq( - file_name, - "", - "Cannot specify file_name with python mode", - ) - keep_extensions = "py" - # Diff files in the main repository. - _git_diff_with_branch( - ctx, - hash_, - tag, - dir_name, - subdir, - diff_type, - keep_extensions, - skip_extensions, - file_name, - only_print_files, - dry_run, - ) - # Also diff the amp submodule if it exists and was requested. - if include_submodules: - if hgit.is_amp_present(): - with hsystem.cd("amp"): - _git_diff_with_branch( - ctx, - hash_, - tag, - dir_name, - subdir, - diff_type, - keep_extensions, - skip_extensions, - file_name, - only_print_files, - dry_run, - ) - - -@task -def git_branch_diff_with( # type: ignore - ctx, - target="base", - hash_value="", - # Where to diff. - subdir="", - include_submodules=False, - # What files to diff. - diff_type="", - keep_extensions="", - skip_extensions="", - python=False, - file_name="", - # What actions. - only_print_files=False, - dry_run=False, -): - """ - Diff files of the current branch with master at the branching point. - - :param subdir: subdir to consider for diffing, instead of `.` - :param target: - - `base`: diff with respect to the branching point - - `master`: diff with respect to `origin/master` - - `head`: diff modified files - - `hash`: diff with respect to hash specified in `hash` - :param hash_value: the hash to use with target="hash" - :param include_submodules: run recursively on all submodules - :param diff_type: files to diff using git `--diff-filter` options - :param keep_extensions: a comma-separated list of extensions to check, e.g., - 'csv,py'. An empty string means keep all the extensions - :param skip_extensions: a comma-separated list of extensions to skip, e.g., - 'txt'. An empty string means do not skip any extension - :param only_print_files: print files to diff and exit - :param dry_run: execute diffing script or not - """ - # Determine the comparison target based on user preference. - dir_name = "." - hdbg.dassert_in(target, ("base", "master", "head", "hash"), "Invalid target") - # Resolve target to a specific git hash for consistent diffing. - if target == "base": - # Compare against the point where this branch diverged from master. - hdbg.dassert_eq( - hash_value, - "", - "Cannot specify hash_value when target is 'base'", - ) - hash_value = hgit.get_branch_hash(dir_name=dir_name) - tag = "base" - elif target == "master": - # Compare against the current state of the remote master branch. - hdbg.dassert_eq( - hash_value, - "", - "Cannot specify hash_value when target is 'master'", - ) - hash_value = "origin/master" - tag = "origin_master" - elif target == "head": - # Compare working directory against HEAD (uncommitted changes). - hdbg.dassert_eq( - hash_value, - "", - "Cannot specify hash_value when target is 'head'", - ) - hash_value = "" - tag = "head" - elif target == "hash": - # Compare against a user-specified commit hash. - hdbg.dassert_ne( - hash_value, - "", - "Must provide hash_value when target is 'hash'", - ) - tag = f"hash@{hash_value}" - else: - raise ValueError(f"Invalid target='{target}") - _git_diff_with_branch_wrapper( - ctx, - hash_value, - tag, - # - dir_name, - subdir, - include_submodules, - # - diff_type, - keep_extensions, - skip_extensions, - python, - file_name, - # - only_print_files, - dry_run, - ) - - -@task -def git_repo_copy(ctx, file_name, src_git_dir, dst_git_dir): # type: ignore - """ - Copy the code from the src Git client to the dst Git client. - - :param file_name: the name of the file to copy (which is under - `src_git_dir`) - :param src_git_dir: the directory of the source Git client (e.g., - "/Users/saggese/src/helpers1") - :param dst_git_dir: the directory of the destination Git client (e.g., - "/Users/saggese/src/helpers2") - """ - _ = ctx - src_git_dir = hgit.resolve_git_client_dir(src_git_dir) - dst_git_dir = hgit.resolve_git_client_dir(dst_git_dir) - # Map source file path to equivalent path in destination repository. - dst_file_path = hgit.project_file_name_in_git_client( - file_name, - src_git_dir, - dst_git_dir, - check_src_file_exists=True, - check_dst_file_exists=False, - ) - _LOG.info("Copying code from '%s' to '%s' ...", file_name, dst_git_dir) - # Perform the file copy operation. - hsystem.system_to_string(f"cp {file_name} {dst_file_path}") - - -# ############################################################################# - - -def _get_submodule_paths() -> List[str]: - """ - Get list of submodule paths from .gitmodules file. - - :return: List of submodule directory paths, empty if no submodules - found - """ - gitmodules_path = ".gitmodules" - if not os.path.exists(gitmodules_path): - _LOG.info("No .gitmodules file found") - return [] - # Extract submodule paths from git config using the .gitmodules file. - cmd = "git config --file .gitmodules --get-regexp path" - _, output = hsystem.system_to_string(cmd) - submodule_paths = [] - for line in output.strip().split("\n"): - if line: - # Parse format: "submodule..path " to extract path. - path = line.split(" ", 1)[1] - submodule_paths.append(path) - return submodule_paths - - -def _get_branch_name(submodule_path: str) -> str: - """ - Get the current branch name for a git repository. - - :param submodule_path: Path to the git repository directory - :return: Branch name or error message - """ - hdbg.dassert_dir_exists(submodule_path) - hdbg.dassert_path_exists(os.path.join(submodule_path, ".git")) - # Query git to get the symbolic name of the current HEAD. - cmd = f"cd {submodule_path} && git rev-parse --abbrev-ref HEAD" - _, branch_name = hsystem.system_to_string(cmd) - return branch_name.strip() - - -@task -def git_branches(ctx): # type: ignore - """ - Print the branch name for the main repository and each git submodule - directory. - - Example usage:: - > dev_scripts_helpers/git/print_git_branches.py - . (main): master - submodule1: feature/new-feature - submodule2: develop - submodule3: main - """ - _ = ctx - # Display main repository branch first for clarity. - main_branch = _get_branch_name(".") - print(f". -> {main_branch}") - # List submodule branches to detect if any are out of sync. - submodule_paths = _get_submodule_paths() - if not submodule_paths: - _LOG.debug("No git submodules found in this repository") - return - # Report branch for each submodule. - for path in submodule_paths: - branch_name = _get_branch_name(path) - print(f"{path} -> {branch_name}") - - -@task -def git_branch_is_merged(ctx): # type: ignore - """ - Check if the current branch was merged into master using GitHub API and git. - - Uses GitHub API to check for open/closed PRs and git to verify branch presence on remote. - """ - _ = ctx - hlitauti.report_task() - branch_name = hgit.get_branch_name() - print(f"branch_name='{branch_name}'") - # Check for PRs targeting master from the current branch on GitHub. - cmd = f"gh pr list --base master --head {branch_name}" - ctx.run(cmd, pty=True) - # Verify if the branch still exists on the remote repository. - cmd = f"git ls-remote --heads origin {branch_name}" - ctx.run(cmd, pty=True) - - -@task -def git_backup( - ctx, - file_mode="all", - backup_dir=None, - include_subrepos=True, - dry_run=False, -): # type: ignore - """ - Create a zip file with modified and/or untracked files from the current - repository and optionally its submodules. - - The zip file is created with a timestamp-based name in the specified - backup directory (default: $HOME/src/backups). - Example: `modified_files.helpers_root.20251119_130034.zip` - - :param file_mode: which files to include: "all" (default), "modified", or - "untracked" - :param backup_dir: directory where to save the zip file (default: - $HOME/src/backups) - :param include_subrepos: whether to include submodule files (default: True) - :param dry_run: if True, only print the files that would be included - without creating the zip - """ - hlitauti.report_task( - txt=hprint.to_str("file_mode, backup_dir, include_subrepos, dry_run") - ) - _ = ctx - # Validate backup scope to ensure user intent is clear. - valid_modes = ["all", "modified", "untracked"] - hdbg.dassert_in( - file_mode, - valid_modes, - "Invalid file_mode '%s'; must be one of: %s", - file_mode, - ", ".join(valid_modes), - ) - # Use default backup location if not specified. - if backup_dir is None: - backup_dir = os.path.join(os.path.expanduser("~"), "src", "backups") - hio.create_dir(backup_dir, incremental=True) - # Determine repository name for readable backup file naming. - super_module = False - git_client_root = hgit.get_client_root(super_module) - # Include timestamp to avoid overwriting previous backups. - timestamp = hlitauti.get_ET_timestamp() - repo_name = os.path.basename(git_client_root) - zip_file_name = f"modified_files.{repo_name}.{timestamp}.zip" - # Collect files from the main repository. - _LOG.info("Collecting %s files from main repository...", file_mode) - main_repo_files = hgit.get_modified_and_untracked_files(".", mode=file_mode) - _LOG.info("Found %d files in main repository", len(main_repo_files)) - all_files = [] - for file_path in main_repo_files: - all_files.append((".", file_path)) - # Also include submodule files if requested to ensure complete backup. - if include_subrepos: - submodule_paths = _get_submodule_paths() - if submodule_paths: - _LOG.info( - "Found %d submodule(s), collecting files...", - len(submodule_paths), - ) - for submodule_path in submodule_paths: - hdbg.dassert_dir_exists( - submodule_path, - msg=f"Submodule path does not exist: {submodule_path}", - ) - _LOG.info("Checking submodule: %s", submodule_path) - submodule_files = hgit.get_modified_and_untracked_files( - submodule_path, mode=file_mode - ) - _LOG.info( - "Found %d files in submodule %s", - len(submodule_files), - submodule_path, - ) - for file_path in submodule_files: - all_files.append((submodule_path, file_path)) - else: - _LOG.info("No submodules found") - else: - _LOG.info("Skipping submodules (include_subrepos=False)") - # Verify there's content to backup before proceeding. - if not all_files: - _LOG.warning("No %s files found. Nothing to zip.", file_mode) - return - # Display summary of what will be backed up. - _LOG.info( - "\n%s\nFound %d total files to include:\n%s", - hprint.frame("Files to include in zip"), - len(all_files), - hprint.indent( - "\n".join( - [ - ( - os.path.join(repo_path, file_path) - if repo_path != "." - else file_path - ) - for repo_path, file_path in all_files - ] - ) - ), - ) - if dry_run: - _LOG.warning("Dry-run mode: not creating zip file") - return - # Create zip file with all collected files. - zip_file_path = os.path.join(backup_dir, zip_file_name) - _LOG.info("Creating zip file: %s", zip_file_path) - import zipfile - - with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zipf: - for repo_path, file_path in all_files: - full_path = os.path.join(repo_path, file_path) - # Maintain directory hierarchy in archive for easy restoration. - arcname = ( - os.path.join(repo_path, file_path) - if repo_path != "." - else file_path - ) - try: - zipf.write(full_path, arcname=arcname) - _LOG.debug("Added to zip: %s", arcname) - except Exception as e: - _LOG.warning("Failed to add %s to zip: %s", full_path, e) - _LOG.info("Successfully created zip file: %s", zip_file_path) - # Display location for easy access. - abs_zip_path = os.path.abspath(zip_file_path) - print(f"\nZip file created at: {abs_zip_path}") - - -@task -def gh_watch(ctx, *, interval=60): # type: ignore - """ - Watch GitHub workflow status with periodic updates. - - Runs `invoke gh_workflow_list` every N seconds using the `watch` command. - If running in tmux, temporarily renames the window to "*GH_WATCH*" for - visibility and restores it on exit. - - :param interval: Update interval in seconds - """ - hlitauti.report_task() - # Check if running inside tmux and save original window name. - old_pane_title = None - if os.environ.get("TMUX"): - _LOG.info("Running in tmux, saving window name") - _, old_pane_title = hsystem.system_to_one_line( - "tmux display-message -p '#W'" - ) - _LOG.info("Original window name: %s", old_pane_title) - # Rename window to indicate we're watching workflows. - hsystem.system("tmux rename-window '*GH_WATCH*'") - try: - # Watch workflows by repeatedly running gh_workflow_list. - while True: - # Clear screen before displaying updated workflow status. - subprocess.run("clear; invoke gh_workflow_list", shell=True) - _LOG.info("Sleeping for %d seconds before next update", interval) - time.sleep(interval) - finally: - # Restore original tmux window name if it was changed. - if old_pane_title is not None: - _LOG.info("Restoring window name: %s", old_pane_title) - hsystem.system(f"tmux rename-window '{old_pane_title}'") - - -# TODO(gp): Add the following scripts: -# dev_scripts/git/gcl -# dev_scripts/git/git_branch.sh -# dev_scripts/git/git_branch_point.sh -# dev_scripts/create_class_diagram.sh diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py deleted file mode 100644 index ff4043f1d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_integrate.py +++ /dev/null @@ -1,837 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_integrate as hlitaint -""" - -import datetime -import logging -import os -from typing import List, Optional, Set, Tuple - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.lib_tasks_gh as hlitagh -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - - -DEFAULT_SRC_DIR_BASENAME = "cmamp1" -DEFAULT_DST_DIR_BASENAME = "kaizenflow1" - -# DEFAULT_SRC_DIR_BASENAME="amp1" -# DEFAULT_DST_DIR_BASENAME="cmamp1" - - -def _dassert_current_dir_matches(expected_dir_basename: str) -> None: - """ - Ensure that the name of the current dir is the one expected. - - E.g., `/Users/saggese/src/cmamp1` is a valid dir for an integration - branch for `cmamp1`. - """ - _LOG.debug(hprint.to_str("expected_dir_basename")) - # Get the basename of the current dir. - curr_dir_basename = os.path.basename(os.getcwd()) - # Check that it's what is expected. - hdbg.dassert_eq( - curr_dir_basename, - expected_dir_basename, - "The current dir '%s' doesn't match the expected dir '%s'", - curr_dir_basename, - expected_dir_basename, - ) - - -# TODO(gp): -> _dassert_is_integration_dir -def _dassert_is_integration_branch(abs_dir: str) -> None: - """ - Ensure that the branch in `abs_dir` is a valid integration or lint branch. - - E.g., `AmpTask1786_Integrate_20220402` is a valid integration - branch. - """ - _LOG.debug(hprint.to_str("abs_dir")) - branch_name = hgit.get_branch_name(dir_name=abs_dir) - hdbg.dassert_ne(branch_name, "master") - hdbg.dassert( - ("_Integrate_" in branch_name) or ("_Lint_" in branch_name), - "Invalid branch_name='%s' in abs_dir='%s'", - branch_name, - abs_dir, - ) - - -def _clean_both_integration_dirs(abs_dir1: str, abs_dir2: str) -> None: - """ - Run `i git_clean` on the passed dirs. - - :param abs_dir1, abs_dir2: full paths of the dirs to clean - """ - _LOG.debug(hprint.to_str("abs_dir1 abs_dir2")) - # - cmd = f"cd {abs_dir1} && invoke git_clean" - hsystem.system(cmd) - # - cmd = f"cd {abs_dir2} && invoke git_clean" - hsystem.system(cmd) - - -@task -def integrate_create_branch(ctx, dir_basename, dry_run=False): # type: ignore - """ - Create the branch for integration of `dir_basename` (e.g., amp1) in the - current dir. - - :param dir_basename: specify the dir name (e.g., `amp1`) to ensure the set-up is - correct. - """ - hlitauti.report_task() - # Check that the current dir has the name `dir_basename`. - _dassert_current_dir_matches(dir_basename) - # Login in GitHub. - hlitagh.gh_login(ctx) - # Create the integration branch with the current date, e.g., - # `AmpTask1786_Integrate_20211231`. - date = datetime.datetime.now().date() - date_as_str = date.strftime("%Y%m%d") - branch_name = f"AmpTask1786_Integrate_{date_as_str}" - # query_yes_no("Are you sure you want to create the branch ") - _LOG.info("Creating branch '%s'", branch_name) - cmd = f"invoke git_branch_create --branch-name '{branch_name}'" - hlitauti.run(ctx, cmd, dry_run=dry_run) - - -# ############################################################################# - - -def _resolve_src_dst_names( - src_dir_basename: str, - dst_dir_basename: str, - subdir: str, - *, - check_exists: bool = True, -) -> Tuple[str, str]: - """ - Return the full path of `src_dir_basename` and `dst_dir_basename`. - - :param src_dir_basename: the current dir (e.g., `amp1`) - :param dst_dir_basename: a dir parallel to the current one (`cmamp1`) - :param check_exists: check that the dst dir exists - - :return: absolute paths of both directories - """ - curr_parent_dir = os.path.dirname(os.getcwd()) - # - abs_src_dir = os.path.join(curr_parent_dir, src_dir_basename, subdir) - abs_src_dir = os.path.normpath(abs_src_dir) - hdbg.dassert_dir_exists(abs_src_dir) - # - abs_dst_dir = os.path.join(curr_parent_dir, dst_dir_basename, subdir) - abs_dst_dir = os.path.normpath(abs_dst_dir) - if check_exists: - hdbg.dassert_dir_exists(abs_dst_dir) - return abs_src_dir, abs_dst_dir - - -@task -def integrate_diff_dirs( # type: ignore - ctx, - src_dir_basename=DEFAULT_SRC_DIR_BASENAME, - dst_dir_basename=DEFAULT_DST_DIR_BASENAME, - reverse=False, - subdir="", - copy=False, - use_linux_diff=False, - check_branches=True, - clean_branches=True, - remove_usual=False, - run_diff_script=True, - dry_run=False, -): - """ - Integrate repos from dirs `src_dir_basename` to `dst_dir_basename` by diffing - or copying all the files with differences. - - ``` - # Use the default values for src / dst dirs to represent the usual set-up. - > i integrate_diff_dirs \ - --src-dir-basename amp1 \ - --dst-dir-basename cmamp1 \ - --subdir . - ``` - - :param src_dir_basename: dir with the source branch (e.g., amp1) - :param dst_dir_basename: dir with the destination branch (e.g., cmamp1) - :param reverse: switch the roles of the default source and destination branches - :param subdir: filter to the given subdir for both dirs (e.g., - `src_dir_basename/subdir` and `dst_dir_basename/subdir`) - :param copy: copy the files instead of diffing - :param use_linux_diff: use Linux `diff` instead of `diff_to_vimdiff.py` - :param remove_usual: remove the usual mismatching files (e.g., `.github`) - :param run_diff_script: run the diff script - :param dry_run: do not execute the commands - """ - _ = ctx - hlitauti.report_task() - if reverse: - src_dir_basename, dst_dir_basename = dst_dir_basename, src_dir_basename - _LOG.warning( - "Reversing dirs: %s", - hprint.to_str2(src_dir_basename, dst_dir_basename), - ) - # Check that the integration branches are in the expected state. - # _dassert_current_dir_matches(src_dir_basename) - # When we integrate a dir that doesn't exist in the dst branch, we need to - # skip the check for existence. - check_exists = False - abs_src_dir, abs_dst_dir = _resolve_src_dst_names( - src_dir_basename, dst_dir_basename, subdir, check_exists=check_exists - ) - hio.create_dir(abs_dst_dir, incremental=True) - if check_branches: - _dassert_is_integration_branch(abs_src_dir) - _dassert_is_integration_branch(abs_dst_dir) - else: - _LOG.warning("Skipping integration branch check") - # Clean branches if needed. - if clean_branches: - # We can clean up only the root dir. - if subdir == "": - _clean_both_integration_dirs(abs_src_dir, abs_dst_dir) - else: - _LOG.warning("Skipping integration branch cleaning") - # Copy or diff dirs. - _LOG.info("abs_src_dir=%s", abs_src_dir) - _LOG.info("abs_dst_dir=%s", abs_dst_dir) - hdbg.dassert_ne(abs_src_dir, abs_dst_dir) - if copy: - # Copy the files. - if dry_run: - cmd = f"diff -r --brief {abs_src_dir} {abs_dst_dir}" - else: - rsync_opts = "--delete -a" - cmd = f"rsync {rsync_opts} {abs_src_dir}/ {abs_dst_dir}" - else: - # Diff the files. - if use_linux_diff: - cmd = f"diff -r --brief {abs_src_dir} {abs_dst_dir}" - else: - cmd = "diff_to_vimdiff.py" - if run_diff_script: - cmd += " --run_diff_script" - else: - cmd += " --no_run_diff_script" - _LOG.warning("Skipping running diff script") - cmd += f" --dir1 {abs_src_dir} --dir2 {abs_dst_dir}" - if remove_usual: - vals = [ - r"\/\.github\/", - ] - regex = "|".join(vals) - cmd += f" --ignore_files='{regex}'" - # We need to use `system` to get vimdiff to connect to stdin and stdout. - if not dry_run: - # hlitauti.run(ctx, cmd, dry_run=dry_run, print_cmd=True) - os.system(cmd) - - -# ############################################################################# - - -# TODO(gp): Allow to pass the hash of the last integration to consider. -# Factor out the logic to find the hash - -# Sometimes we want to see the changes in one dir since an integration point - -# E.g., find all the changes in `datapull` since the last integration -# -# > git log --oneline datapull -# 77f612f75 SorrIssue244 CCXT timestamp representation unit test (#317) -# 6b981b1f6 Sorrtask298 rename get docker cmd to get docker run cmd (#331) -# bd33a5fb9 SorrTask267_Parquet_to_CSV (#267) -# 9819fd117 AmpTask1786_Integrate_20230518_im (#273) <==== -# d530ed561 Update (#272) -# b75eab7ad AmpTask1786_Integrate_20230518_3 (#271) -# -# > git difftool 9819fd117.. datapull -# ... -# -# > git diff --name-only 9819fd117.. datapull -# datapull/ccxt/data/extract/test/test_ccxt_extractor.py -# datapull/common/data/transform/convert_pq_to_csv.py -# datapull/im_lib_tasks.py -# datapull/test/test_im_lib_tasks.py -# -# for file in datapull/ccxt/data/extract/test/test_ccxt_extractor.py datapull/common/data/transform/convert_pq_to_csv.py datapull/im_lib_tasks.py datapull/test/test_im_lib_tasks.py; do -# vimdiff ~/src/cmamp1/$file ~/src/kaizenflow1/$file -# done - - -def _find_files_touched_since_last_integration( - abs_dir: str, subdir: str -) -> List[str]: - """ - Return the list of files modified since the last integration for `abs_dir`. - - :param abs_dir: directory to cd before executing this script - :param subdir: consider only the files under `subdir` - """ - _LOG.debug(hprint.to_str2(abs_dir)) - dir_basename = os.path.basename(abs_dir) - # TODO(gp): dir_basename can be computed from abs_dir_name to simplify the - # interface. - # Change the dir to the desired one. - old_dir = os.getcwd() - try: - os.chdir(abs_dir) - # Find the hash of all integration commits. - cmd = "git log --date=local --oneline --date-order | grep AmpTask1786_Integrate" - # Remove integrations like "'... Merge branch 'master' into - # AmpTask1786_Integrate_20220113'" - cmd += " | grep -v \"Merge branch 'master' into \"" - _, txt = hsystem.system_to_string(cmd) - _LOG.debug("integration commits=\n%s", txt) - txt = txt.split("\n") - # > git log --date=local --oneline --date-order | grep AmpTask1786_Integrate - # 72a1a101 AmpTask1786_Integrate_20211218 (#1975) - # 2acfd6d7 AmpTask1786_Integrate_20211214 (#1950) - # 318ab0ff AmpTask1786_Integrate_20211210 (#1933) - hdbg.dassert_lte(2, len(txt)) - print(f"# last_integration: '{txt[0]}'") - last_integration_hash = txt[0].split()[0] - print("* " + hprint.to_str("last_integration_hash")) - # Find the first commit after the commit with the last integration. - cmd = f"git log --oneline --reverse --ancestry-path {last_integration_hash}^..master" - _, txt = hsystem.system_to_string(cmd) - print(f"* commits after last integration=\n{txt}") - txt = txt.split("\n") - # > git log --oneline --reverse --ancestry-path 72a1a101^..master - # 72a1a101 AmpTask1786_Integrate_20211218 (#1975) - # 90e90353 AmpTask1955_Lint_20211218 (#1976) - # 4a2b45c6 AmpTask1858_Implement_buildmeister_workflows_in_invoke (#1860) - hdbg.dassert_lte(2, len(txt)) - first_commit_hash = txt[1].split()[0] - _LOG.debug("first_commit: '%s'", txt[1]) - _LOG.debug(hprint.to_str("first_commit_hash")) - # Find all the files touched in each branch. - cmd = f"git diff --name-only {first_commit_hash}..HEAD" - _, txt = hsystem.system_to_string(cmd) - files: List[str] = txt.split("\n") - finally: - os.chdir(old_dir) - _LOG.debug("Files modified since the integration=\n%s", "\n".join(files)) - # Filter files by subdir, if needed. - if subdir: - filtered_files = [] - for file in files: - if file.startswith(subdir): - filtered_files.append(file) - files = filtered_files - # Reorganize the files. - hdbg.dassert_no_duplicates(files) - files = sorted(files) - # Save to file for debugging. - file_name = os.path.join( - f"tmp.integrate_find_files_touched_since_last_integration.{dir_basename}.txt" - ) - hio.to_file(file_name, "\n".join(files)) - _LOG.debug("Saved file to '%s'", file_name) - return files - - -@task -def integrate_find_files_touched_since_last_integration( # type: ignore - ctx, - subdir="", -): - """ - Print the list of files modified since the last integration for this dir. - """ - hlitauti.report_task() - abs_dir = os.getcwd() - _ = ctx - files = _find_files_touched_since_last_integration(abs_dir, subdir) - # Print the result. - tag = "Files modified since the integration" - print(hprint.frame(tag)) - print("\n".join(files)) - - -# ############################################################################# - - -def _integrate_files( - files: Set[str], - abs_left_dir: str, - abs_right_dir: str, - only_different_files: bool, -) -> List[Tuple[str, str, str]]: - """ - Build a list of files to compare based on the pattern. - - :param files: relative path of the files to compare :param - abs_left_dir, abs_right_dir: path of the left / right dir - :param only_different_files: include in the script only the files - that are different - :return: list of files to compare - """ - _LOG.debug(hprint.to_str("abs_left_dir abs_right_dir only_different_files")) - files_to_diff: List[Tuple[str, str, str]] = [] - for file in sorted(list(files)): - _LOG.debug(hprint.to_str("file")) - left_file = os.path.join(abs_left_dir, file) - right_file = os.path.join(abs_right_dir, file) - # Check if both the files exist and are the same. - both_exist = os.path.exists(left_file) and os.path.exists(right_file) - if not both_exist: - # Both files don't exist: nothing to do. - equal: Optional[bool] = False - skip: Optional[bool] = True - else: - # They both exist. - if only_different_files: - # We want to check if they are the same. - try: - equal = hio.from_file(left_file) == hio.from_file(right_file) - except RuntimeError as e: - # RuntimeError: error='utf-8' codec can't decode byte 0xd0 in - # position 10: invalid continuation byte - _LOG.error("Caught error:\n%s", e) - equal = True - skip = equal - else: - # They both exist, and we want to process even if they are the - # same. - equal = None - skip = False - _ = left_file, right_file, both_exist, equal, skip - _LOG.debug(hprint.to_str("left_file right_file both_exist equal skip")) - # Execute the action on the 2 files. - if skip: - _LOG.debug(" Skip %s", file) - else: - _LOG.debug(" -> (%s, %s)", left_file, right_file) - files_to_diff.append((file, left_file, right_file)) - return files_to_diff - - -@task -def integrate_files( # type: ignore - ctx, - src_dir_basename=DEFAULT_SRC_DIR_BASENAME, - dst_dir_basename=DEFAULT_DST_DIR_BASENAME, - reverse=False, - subdir="", - mode="vimdiff", - file_direction="", - only_different_files=True, - check_branches=True, -): - """ - Find and copy the files that are touched only in one branch or in both. - - :param ctx: invoke ctx - :param src_dir_basename: dir with the source branch (e.g., amp1) - :param dst_dir_basename: dir with the destination branch (e.g., cmamp1) - :param reverse: switch the roles of the default source and destination branches - :param subdir: directory to select - :param mode: - - "print_dirs": print the directories - - "vimdiff": diff the files - - "copy": copy the files - :param file_direction: which files to diff / copy: - - "common_files": files touched in both branches - - "union_files": files touched in either branch - - "only_files_in_src": files touched only in the src dir - - "only_files_in_dst": files touched only in the dst dir - :param only_different_files: consider only the files that are different among - the branches - :param check_branches: ensure that the current branches are for integration - and not `master` - """ - hlitauti.report_task() - _ = ctx - if reverse: - src_dir_basename, dst_dir_basename = dst_dir_basename, src_dir_basename - _LOG.warning( - "Reversing dirs: %s", - hprint.to_str2(src_dir_basename, dst_dir_basename), - ) - # Check that the integration branches are in the expected state. - _dassert_current_dir_matches(src_dir_basename) - # We want to stay at the top level dir, since the subdir is handled by - # `integrate_find_files_touched_since_last_integration`. - abs_src_dir, abs_dst_dir = _resolve_src_dst_names( - src_dir_basename, dst_dir_basename, subdir="" - ) - if check_branches: - _dassert_is_integration_branch(abs_src_dir) - _dassert_is_integration_branch(abs_dst_dir) - else: - _LOG.warning("Skipping integration branch check") - # Find the files touched in each branch since the last integration. - src_files = set( - _find_files_touched_since_last_integration(abs_src_dir, subdir) - ) - dst_files = set( - _find_files_touched_since_last_integration(abs_dst_dir, subdir) - ) - # - if file_direction == "common_files": - files = src_files.intersection(dst_files) - elif file_direction == "only_files_in_src": - files = src_files - dst_files - elif file_direction == "only_files_in_dst": - files = dst_files - src_files - elif file_direction == "union_files": - files = src_files.union(dst_files) - else: - raise ValueError(f"Invalid file_direction='{file_direction}'") - # - files_to_diff = _integrate_files( - files, - abs_src_dir, - abs_dst_dir, - only_different_files, - ) - # Print the files. - print(hprint.frame(file_direction)) - _LOG.debug(hprint.to_str("files_to_diff")) - files_set = list(zip(*files_to_diff)) - if not files_set: - _LOG.warning("No file found: skipping") - return - files_set = sorted(list(files_set[0])) - txt = "\n".join(files_set) - print(hprint.indent(txt)) - # Process the files touched. - if mode == "print_dirs": - files_lst = [] - for file, left_file, right_file in files_to_diff: - dir_name = os.path.dirname(file) - # Skip empty dir, e.g., for `pytest.ini`. - if dir_name != "": - files_lst.append(dir_name) - files_lst = sorted(list(set(files_lst))) - print(hprint.frame("Dirs changed")) - print("\n".join(files_lst)) - else: - # Build the script with the operations to perform. - if mode == "copy" and file_direction == "only_files_in_dst": - raise ValueError("Can't copy files from destination") - script_txt = [] - for file, left_file, right_file in files_to_diff: - if mode == "copy": - cmd = f"cp -f {left_file} {right_file}" - elif mode == "vimdiff": - cmd = f"vimdiff {left_file} {right_file}" - else: - raise ValueError(f"Invalid mode='{mode}'") - _LOG.debug(" -> %s", cmd) - script_txt.append(cmd) - script_txt = "\n".join(script_txt) - # Execute / save the script. - if mode == "copy": - for cmd in script_txt.split("\n"): - hsystem.system(cmd) - elif mode == "vimdiff": - # Save the diff script. - script_file_name = f"./tmp.vimdiff.{file_direction}.sh" - hio.create_executable_script(script_file_name, script_txt) - print(f"# To diff run:\n> {script_file_name}") - else: - raise ValueError(f"Invalid mode='{mode}'") - - -@task -def integrate_find_files( # type: ignore - ctx, - subdir="", -): - """ - Find the files that are touched in the current branch since last - integration. - """ - hlitauti.report_task() - _ = ctx - # - abs_src_dir = "." - abs_src_dir = os.path.normpath(abs_src_dir) - hdbg.dassert_dir_exists(abs_src_dir) - # Find the files touched in each branch since the last integration. - src_files = sorted( - _find_files_touched_since_last_integration(abs_src_dir, subdir) - ) - print("* Files touched:\n" + "\n".join(src_files)) - - -@task -def integrate_diff_overlapping_files( # type: ignore - ctx, src_dir_basename, dst_dir_basename, subdir="" -): - """ - Find the files modified in both branches `src_dir_basename` and - `dst_dir_basename` Compare these files from HEAD to master version before - the branch point. - - This is used to check what changes were made to files modified by - both branches. - """ - hlitauti.report_task() - _ = ctx - # Check that the integration branches are in the expected state. - _dassert_current_dir_matches(src_dir_basename) - # When we integrate a dir that doesn't exist in the dst branch, we need to - # skip the check for existence. - check_exists = False - src_dir_basename, dst_dir_basename = _resolve_src_dst_names( - src_dir_basename, dst_dir_basename, subdir, check_exists=check_exists - ) - _dassert_is_integration_branch(src_dir_basename) - _dassert_is_integration_branch(dst_dir_basename) - _clean_both_integration_dirs(src_dir_basename, dst_dir_basename) - # Find the files modified in both branches. - src_hash = hgit.get_branch_hash(src_dir_basename) - _LOG.info("src_hash=%s", src_hash) - dst_hash = hgit.get_branch_hash(dst_dir_basename) - _LOG.info("dst_hash=%s", dst_hash) - diff_files1 = os.path.abspath("./tmp.files_modified1.txt") - diff_files2 = os.path.abspath("./tmp.files_modified2.txt") - cmd = f"cd {src_dir_basename} && git diff --name-only {src_hash} HEAD >{diff_files1}" - hsystem.system(cmd) - cmd = f"cd {dst_dir_basename} && git diff --name-only {dst_hash} HEAD >{diff_files2}" - hsystem.system(cmd) - common_files = "./tmp.common_files.txt" - cmd = f"comm -12 {diff_files1} {diff_files2} >{common_files}" - hsystem.system(cmd) - # Get the base files to diff. - files = hio.from_file(common_files).split("\n") - files = [f for f in files if f != ""] - _LOG.info("Found %d files to diff:\n%s", len(files), "\n".join(files)) - # Retrieve the original file and create the diff command. - script_txt = [] - for src_file in files: - hdbg.dassert_file_exists(src_file) - # TODO(gp): Add function to add a suffix to a name, using - # os.path.dirname(), os.path.basename(), os.path.split_extension(). - dst_file = src_file.replace(".py", ".base.py") - # Save the base file. - cmd = f"git show {src_hash}:{src_file} >{dst_file}" - rc = hsystem.system(cmd, abort_on_error=False) - if rc == 0: - # The file was created: nothing to do. - pass - elif rc == 128: - # Note that the file potentially could not exist, i.e., it was added - # in the branch. In this case Git returns: - # ``` - # rc=128 fatal: path 'dataflow/pipelines/real_time/test/ - # test_dataflow_pipelines_real_time_pipeline.py' exists on disk, but - # not in 'ce54877016204315766e90df7c45192bec1fbf20' - src_file = "/dev/null" - else: - raise ValueError(f"cmd='{cmd}' returned {rc}") - # Update the script to diff. - script_txt.append(f"vimdiff {dst_file} {src_file}") - # Save the script to compare. - script_file_name = "./tmp.vimdiff_overlapping_files.sh" - script_txt = "\n".join(script_txt) - hio.create_executable_script(script_file_name, script_txt) - print(f"# To diff against the base run:\n> {script_file_name}") - - -# ############################################################################# - - -def _infer_dst_file_path( - src_file_path: str, - *, - default_src_dir_basename: str = DEFAULT_SRC_DIR_BASENAME, - default_dst_dir_basename: str = DEFAULT_DST_DIR_BASENAME, - check_exists: bool = True, -) -> Tuple[str, str]: - """ - Convert a file path across two dirs with the same data structure. - - E.g., - `.../src/cmamp1/.../test_data_snapshots/alpha_numeric_data_snapshots` - is converted into - `.../src/amp1/.../test_data_snapshots/alpha_numeric_data_snapshots` - """ - _LOG.debug(hprint.to_str("src_file_path")) - src_file_path = os.path.normpath(src_file_path) - if check_exists: - hdbg.dassert_path_exists(src_file_path) - # Extract the repo dir name, by looking for one of the default basenames. - target_dir = f"/{default_dst_dir_basename}/" - idx = src_file_path.find(target_dir) - if idx >= 0: - src_dir_basename = default_dst_dir_basename - dst_dir_basename = default_src_dir_basename - subdir = src_file_path[idx + len(target_dir) :] - else: - target_dir = f"/{default_src_dir_basename}/" - idx = src_file_path.find(target_dir) - if idx >= 0: - src_dir_basename = default_src_dir_basename - dst_dir_basename = default_dst_dir_basename - subdir = src_file_path[idx + len(target_dir) :] - else: - raise ValueError( - f"Can't find either '{default_src_dir_basename}' or " - f"'{default_dst_dir_basename}' in file_path=" - f"'{src_file_path}'" - ) - # Replace src dir (e.g., `cmamp1`) with dst dir (e.g., `amp1`). - dst_file_path = src_file_path.replace( - f"/{src_dir_basename}/", f"/{dst_dir_basename}/" - ) - _LOG.debug(hprint.to_str("dst_file_path subdir")) - if check_exists: - hdbg.dassert_path_exists(dst_file_path) - return dst_file_path, subdir - - -@task -def integrate_rsync( # type: ignore - ctx, - src_dir, - src_dir_basename=DEFAULT_SRC_DIR_BASENAME, - dst_dir_basename=DEFAULT_DST_DIR_BASENAME, - dst_dir="", - check_dir=True, - dry_run=False, -): - """ - Use `rsync` to bring two dirs to sync. - - E.g., - ``` - > invoke integrate_diff_dirs - ... - ... Only in .../cmamp1/.../alpha_numeric_data_snapshots: alpha - ... Only in .../amp1/.../alpha_numeric_data_snapshots: latest - - # Accept the `cmamp1` side vs the `amp1` side with: - > invoke integrate_rsync .../cmamp1/.../alpha_numeric_data_snapshots/ - ``` - - :param src_dir: dir to be used. If empty, it is inferred from file_name - :param dst_dir: dir to be used. If empty, it is inferred from file_name - :param check_dir: force checking that src_dir and dst_dir are valid - integration dirs - :param dry_run: print the system command instead of executing them - """ - hlitauti.report_task() - _ = ctx - src_dir = os.path.normpath(src_dir) - hdbg.dassert_path_exists(src_dir) - _LOG.info(hprint.to_str("src_dir")) - if check_dir: - _dassert_is_integration_branch(src_dir) - # Resolve the dst dir. - if dst_dir == "": - dst_dir, _ = _infer_dst_file_path( - src_dir, - default_src_dir_basename=src_dir_basename, - default_dst_dir_basename=dst_dir_basename, - ) - if check_dir: - _dassert_is_integration_branch(dst_dir) - dst_dir = os.path.normpath(dst_dir) - hdbg.dassert_path_exists(dst_dir) - _LOG.info(hprint.to_str("dst_dir")) - # - _LOG.info("Syncing:\n'%s'\nto\n'%s'", src_dir, dst_dir) - cmd = f"rsync --delete -a -r {src_dir}/ {dst_dir}/" - hsystem.system(cmd, log_level=logging.INFO, dry_run=dry_run) - - -@task -def integrate_file( # type: ignore - ctx, - file_name, - src_dir_basename=DEFAULT_SRC_DIR_BASENAME, - dst_dir_basename=DEFAULT_DST_DIR_BASENAME, - dry_run=False, -): - """ - Diff corresponding files in two different repos. - - ``` - # The path is assumed referred to current dir. - > i integrate_file --file-name helpers/lib_tasks_integrate.py - - > i integrate_file --file-name /Users/saggese/src/kaizenflow1/helpers/lib_tasks_integrate.py - - > i integrate_file \ - --file-name helpers/lib_tasks_integrate.py \ - --src-dir-name cmamp1 - --dst-dir-name kaizenflow1 - ``` - - :param file_name: it can be a full path (e.g., - `/Users/saggese/src/kaizenflow1/helpers/lib_tasks_integrate.py`) - or a relative path to the root of the Git repo (e.g., - `helpers/lib_tasks_integrate.py) - :param dst_dir: dir to be used. If empty, it is inferred from file_name - :param check_dir: force checking that src_dir and dst_dir are valid - integration dirs - :param dry_run: print the system command instead of executing them - """ - hlitauti.report_task() - _ = ctx - file_name = os.path.normpath(file_name) - hdbg.dassert_file_exists(file_name) - # If the file is in the current dir, we need to prepend the dir name. - if not file_name.startswith("/"): - file_name = os.path.join(os.getcwd(), file_name) - _LOG.info(hprint.to_str("file_name")) - # Resolve the src / dst dir, if needed. - dst_file_name, _ = _infer_dst_file_path( - file_name, - default_src_dir_basename=src_dir_basename, - default_dst_dir_basename=dst_dir_basename, - ) - _LOG.info(hprint.to_str("file_name dst_file_name")) - # - _LOG.info("Syncing:\n'%s'\nto\n'%s'", file_name, dst_file_name) - cmd = f"vimdiff {file_name} {dst_file_name}" - # We need to use `system` to get vimdiff to connect to stdin and stdout. - if not dry_run: - # hlitauti.run(ctx, cmd, dry_run=dry_run, print_cmd=True) - os.system(cmd) - - -# Compare the timestamp of last modification of a file. -# FILE=helpers/lib_tasks_git.py; (cd ~/src/cmamp1; git log -1 $FILE); (cd ~/src/kaizenflow1; git log -1 $FILE) - -# > git log --pretty=format:"%h - %an, %ad : %s" --date=short | grep _Integrate_ | head -5 -# fffa1c8b2 - GP Saggese, 2023-06-30 : AmpTask1786_Integrate_20230627_7 (#367) -# 5a05a0c94 - GP Saggese, 2023-06-29 : AmpTask1786_Integrate_20230627_6 (#365) -# 6c3ad7d87 - GP Saggese, 2023-06-29 : AmpTask1786_Integrate_20230627_5 (#364) -# 36abfd8b3 - GP Saggese, 2023-06-28 : AmpTask1786_Integrate_20230627_3 (#361) -# 65fe42d38 - GP Saggese, 2023-06-28 : AmpTask1786_Integrate_20230627_2 (#360) - -# In Sorr -# GIT_INTEGR_HASH=fffa1c8b2 -# fffa1c8b2 - GP Saggese, 2023-06-30 : AmpTask1786_Integrate_20230627_7 (#367) - -# In cmamp -# 20526ed09 - GP Saggese, 2023-08-10 : AmpTask1786_Integrate_20230810_2 (#5011) - -# Show files changed since an integration point -# > git diff --name-only $GIT_INTEGR_HASH dataflow_amp -# dataflow_amp/system/mock1/test/test_mock1_forecast_system.py - -# Show the difference since an integration point -# git difftool $GIT_INTEGR_HASH.. dataflow_amp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py deleted file mode 100644 index a3599f2da..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_lint.py +++ /dev/null @@ -1,443 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_lint as hlitalin -""" - -import datetime -import filecmp -import logging -import os - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.lib_tasks_docker as hlitadoc -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -# ############################################################################# -# Linter. -# ############################################################################# - - -@task -def lint_check_python_files_in_docker( # type: ignore - ctx, - python_compile=True, - python_execute=True, - modified=False, - branch=False, - last_commit=False, - all_=False, - files="", -): - """ - Compile and execute Python files checking for errors. - - This is supposed to be run inside Docker. - - The params have the same meaning as in `_get_files_to_process()`. - """ - hlitauti.report_task() - _ = ctx - # We allow to filter through the user specified `files`. - mutually_exclusive = False - remove_dirs = True - file_list = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files, - mutually_exclusive, - remove_dirs, - ) - _LOG.debug("Found %d files:\n%s", len(file_list), "\n".join(file_list)) - # Filter keeping only Python files. - _LOG.debug("Filtering for Python files") - exclude_paired_jupytext = True - file_list = hio.keep_python_files(file_list, exclude_paired_jupytext) - _LOG.debug("file_list=%s", "\n".join(file_list)) - _LOG.info("Need to process %d files", len(file_list)) - if not file_list: - _LOG.warning("No files were selected") - # Scan all the files. - failed_filenames = [] - for file_name in file_list: - _LOG.info("Processing '%s'", file_name) - if python_compile: - import compileall - - success = compileall.compile_file(file_name, force=True, quiet=1) - _LOG.debug("file_name='%s' -> python_compile=%s", file_name, success) - if not success: - msg = f"'{file_name}' doesn't compile correctly" - _LOG.error(msg) - failed_filenames.append(file_name) - # TODO(gp): Add also `python -c "import ..."`, if not equivalent to `compileall`. - if python_execute: - cmd = f"python {file_name}" - rc = hsystem.system(cmd, abort_on_error=False, suppress_output=False) - _LOG.debug("file_name='%s' -> python_compile=%s", file_name, rc) - if rc != 0: - msg = f"'{file_name}' doesn't execute correctly" - _LOG.error(msg) - failed_filenames.append(file_name) - hprint.log_frame( - _LOG, - f"failed_filenames={len(failed_filenames)}", - verbosity=logging.INFO, - ) - _LOG.info("\n".join(failed_filenames)) - error = len(failed_filenames) > 0 - return error - - -@task -def lint_check_python_files( # type: ignore - ctx, - python_compile=True, - python_execute=True, - modified=False, - branch=False, - last_commit=False, - all_=False, - files="", -): - """ - Compile and execute Python files checking for errors. - - The params have the same meaning as in `_get_files_to_process()`. - """ - _ = ( - python_compile, - python_execute, - modified, - branch, - last_commit, - all_, - files, - ) - # Execute the same command line but inside the container. E.g., - # /Users/saggese/src/venv/amp.client_venv/bin/invoke lint_docker_check_python_files --branch - cmd_line = hdbg.get_command_line() - # Replace the full path of invoke with just `invoke`. - cmd_line = cmd_line.split() - cmd_line = ["/venv/bin/invoke lint_check_python_files_in_docker"] + cmd_line[ - 2: - ] - docker_cmd_ = " ".join(cmd_line) - cmd = f'invoke docker_cmd --cmd="{docker_cmd_}"' - hlitauti.run(ctx, cmd) - - -def _get_lint_docker_cmd( - base_image: str, - docker_cmd_: str, - stage: str, - version: str, - *, - use_entrypoint: bool = True, -) -> str: - """ - Create a command to run in Linter service. - - :param docker_cmd_: command to run - :param stage: the image stage to use - :return: the full command to run - """ - if base_image == "": - base_path = os.environ["CSFY_ECR_BASE_PATH"] - # Get an image to run the linter on. - linter_image = f"{base_path}/helpers" - else: - linter_image = base_image - _LOG.debug(hprint.to_str("linter_image")) - # Execute command line. - cmd: str = hlitadoc._get_docker_compose_cmd( - linter_image, - stage, - version, - docker_cmd_, - use_entrypoint=use_entrypoint, - ) - return cmd - - -@task -def lint_detect_cycles( # type: ignore - ctx, - dir_name=".", - stage="prod", - version="", - out_file_name="lint_detect_cycles.output.txt", - debug_tool=False, -): - """ - Detect cyclic imports in the directory files. - - For param descriptions, see `lint()`. - - :param dir_name: the name of the dir to detect cyclic imports in - - By default, the check will be carried out in the dir from where - the task is run - :param debug_tool: print the output of the cycle detector - """ - hlitauti.report_task() - # Remove the log file. - if os.path.exists(out_file_name): - cmd = f"rm {out_file_name}" - hlitauti.run(ctx, cmd) - # Prepare the command line. - docker_cmd_opts = [dir_name] - if debug_tool: - docker_cmd_opts.append("-v DEBUG") - docker_cmd_ = ( - "$(find -wholename '*import_check/detect_import_cycles.py') " - + hlitauti._to_single_line_cmd(docker_cmd_opts) - ) - # Execute command line. - base_image = "" - cmd = _get_lint_docker_cmd(base_image, docker_cmd_, stage, version) - # Use `PIPESTATUS` otherwise the exit status of the pipe is always 0 - # because writing to a file succeeds. - cmd = f"({cmd}) 2>&1 | tee -a {out_file_name}; exit $PIPESTATUS" - # Run. - hlitauti.run(ctx, cmd) - - -# pylint: disable=line-too-long -@task -def lint( # type: ignore - ctx, - base_image="", - stage="prod", - version="", - files="", - from_file="", - skip_files="", - dir_name="", - modified=False, - last_commit=False, - branch=False, - # It needs to be a string to allow the user to specify "serial". - num_threads="serial", - only_format=False, - only_check=False, -): - """ - Lint files. - - ``` - # To lint specific files: - > i lint --files="dir1/file1.py dir2/file2.md" - - # To lint the files changed in the last commit, excluding specific files: - > i lint --last-commit --skip-files="dir1/file1.py dir2/file2.md" - - # To lint all the files in the current dir using only formatting actions: - > i lint --dir-name . --only-format - - # To lint the files modified in the current git client: - > i lint --modified - - # To exclude certain paths from linting: - > i lint --files="$(find . -name '*.py' -not -path './compute/*' -not -path './amp/*')" - ``` - - :param stage: the image stage to use (e.g., "prod", "dev", "local") - :param version: the version of the container to use - :param files: specific files to lint (e.g. "dir1/file1.py dir2/file2.md") - :param from_file: specific file storing files to lint - :param skip_files: specific files to skip during linting (e.g. "dir1/file1.py dir2/file2.md") - :param dir_name: name of the dir where all files should be linted - :param modified: lint the files modified in the current git client - :param last_commit: lint the files modified in the previous commit - :param branch: lint the files modified in the current branch w.r.t. master - :param num_threads: number of threads to use ("serial", -1, 0, 1, 2, ...) - :param only_format: run only the modifying actions of Linter (e.g., black) - :param only_check: run only the non-modifying actions of Linter (e.g., pylint) - """ - # Check if the user is in a repo root. - hdbg.dassert( - hgit.is_cwd_git_repo(), - msg="Linter should run from repo root", - ) - hlitauti.report_task() - # Prepare the command line. - lint_cmd_opts = [] - # Add the file selection argument. - hdbg.dassert_eq( - int(len(files) > 0) - + int(len(from_file) > 0) - + int(len(dir_name) > 0) - + int(modified) - + int(last_commit) - + int(branch), - 1, - msg="Specify exactly one among --files, --from_file, --dir-name, --modified, --last-commit, --branch", - ) - if len(files) > 0: - lint_cmd_opts.append(f"--files {files}") - elif len(from_file) > 0: - lint_cmd_opts.append(f"--from_file {from_file}") - elif len(dir_name) > 0: - lint_cmd_opts.append(f"--dir_name {dir_name}") - elif modified: - lint_cmd_opts.append("--modified") - elif last_commit: - lint_cmd_opts.append("--last_commit") - elif branch: - lint_cmd_opts.append("--branch") - else: - raise ValueError("No file selection arguments are specified") - if len(skip_files) > 0: - lint_cmd_opts.append(f"--skip_files {skip_files}") - # - lint_cmd_opts.append(f"--num_threads {num_threads}") - # Add the action selection argument, if needed. - hdbg.dassert_lte( - int(only_format) + int(only_check), - 1, - msg="Specify only one among --only-format, --only-check", - ) - if only_format: - lint_cmd_opts.append("--only_format") - elif only_check: - lint_cmd_opts.append("--only_check") - else: - _LOG.info("All Linter actions selected") - # Compose the command line. - if hserver.is_host_mac(): - find_cmd = "$(find . -path '*linters/base.py')" - else: - find_cmd = "$(find -wholename '*linters/base.py')" - lint_cmd_ = find_cmd + " " + hlitauti._to_single_line_cmd(lint_cmd_opts) - docker_cmd_ = _get_lint_docker_cmd( - base_image, lint_cmd_, stage=stage, version=version - ) - # Run. - hlitauti.run(ctx, docker_cmd_) - - -@task -def lint_check_if_it_was_run(ctx): # type: ignore - """ - Check if the linter was run in the current branch. - - - abort the task with error if the files were modified - """ - hlitauti.report_task() - # Check if the files were modified. - hgit.is_client_clean(abort_if_not_clean=True) - - -@task -def lint_create_branch(ctx, dry_run=False): # type: ignore - """ - Create the branch for linting in the current dir. - - The dir needs to be specified to ensure the set-up is correct. - """ - hlitauti.report_task() - # - date = datetime.datetime.now().date() - date_as_str = date.strftime("%Y%m%d") - branch_name = f"AmpTask1955_Lint_{date_as_str}" - # query_yes_no("Are you sure you want to create the branch '{branch_name}'") - _LOG.info("Creating branch '%s'", branch_name) - cmd = f"invoke git_branch_create --branch-name '{branch_name}'" - hlitauti.run(ctx, cmd, dry_run=dry_run) - - -@task -def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): # type: ignore - """ - Sync code needed to run linter / ai_review from a Git client to the current one. - - :param git_client_name: the name of the Git client to sync from. It can be - something like "helpers1" and it will be used from "$HOME/src" or can - be a full path. - :param revert_to_original: if `True`, revert the changes to the original - """ - _ = ctx - hlitauti.report_task() - # Copy the code from the src Git client to the current one. - src_git_dir = hgit.resolve_git_client_dir(git_client_name) - # - files_to_copy = [ - # "hgit.py", - # "hmarkdown.py", - "llm_prompts.py", - "llm_transform.py", - "inject_todos.py", - "all.coding_style_guidelines.reference.md", - ] - # Revert the files in the current git client to the original code. - if revert_to_original: - _LOG.debug("Reverting to original code ...") - for file_name in files_to_copy: - _LOG.debug("Reverting %s to original code", file_name) - src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) - git_root_dir = hgit.find_git_root(src_git_dir) - src_file_path = os.path.relpath(src_file_path, git_root_dir) - cmd = "git checkout -- " + src_file_path - hsystem.system(cmd) - _LOG.info("Done") - return - # Get the path to the helpers repo. - src_helpers_dir = hgit.find_helpers_root(src_git_dir) - hdbg.dassert_ne(src_helpers_dir, "") - hdbg.dassert_dir_exists(src_helpers_dir) - # - dst_helpers_dir = hgit.find_helpers_root() - hdbg.dassert_dir_exists(dst_helpers_dir) - _LOG.debug(hprint.to_str("src_helpers_dir dst_helpers_dir")) - # - _LOG.info( - "Copying files from '%s' to '%s' ...", src_helpers_dir, dst_helpers_dir - ) - # Find the files to copy. - for file_name in files_to_copy: - _LOG.debug(hprint.to_str("file_name")) - # Get the path to the file in the src Git client. - src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) - src_file_path = os.path.abspath(os.path.join(src_git_dir, src_file_path)) - _LOG.debug(hprint.to_str("src_file_path")) - hdbg.dassert_file_exists(src_file_path) - # Get the path to the file in the dst Git client. - dst_file_path = hgit.project_file_name_in_git_client( - src_file_path, src_helpers_dir, dst_helpers_dir - ) - _LOG.debug(hprint.to_str("dst_file_path")) - # Copy the file. - _LOG.debug(hprint.to_str("src_file_path dst_file_path")) - dir_name = os.path.dirname(dst_file_path) - # Check that the files are different. - if os.path.exists(src_file_path) and os.path.isdir(dst_file_path): - if filecmp.cmp(src_file_path, dst_file_path, shallow=False): - _LOG.info( - "File '%s' is identical to '%s', skipping", - src_file_path, - dst_file_path, - ) - continue - # Copy the file. - hio.create_dir(dir_name, incremental=True) - cmd = f"cp -f {src_file_path} {dst_file_path}" - _LOG.debug(hprint.to_str("cmd")) - _LOG.info("Copying file '%s' to '%s' ...", src_file_path, dst_file_path) - hsystem.system(cmd) - _LOG.info("Done") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py deleted file mode 100644 index 215820d4d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_perms.py +++ /dev/null @@ -1,380 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_perms as hlitaper -""" - -import grp -import logging -import os -import pwd -import stat -from typing import Dict, List, Tuple - -import tqdm -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# ############################################################################# -# Fix permission -# ############################################################################# - - -# The desired invariants are that all files -# 1) are owned by our user or by Docker user -# 2) have the shared group as group -# 3) have the same user and group permissions - -# E.g., -# -rw-rw-r-- 1 sasm sasm-fileshare 21877 Nov 3 18:11 pytest_logger.log - -# The possible problems are: -# -r--r--r-- 1 sasm sasm-fileshare ./.git/objects/02/4df16f66c87bdfb -# -rw-r--r-- 1 265533 sasm-fileshare ./core_lime/dataflow/nodes/test/te -# -rw-rw-r-- 1 265533 sasm-fileshare ./research/real_time/notebooks/Lim - -# drwxr-sr-x 2 gsaggese sasm-fileshare 35 Oct 12 21:51 test -# chmod g=u amp/dev_scripts/git/git_hooks/test - - -def _save_dir_status(dir_name: str, filename: str) -> None: - cmd = f'find {dir_name} -name "*" | sort | xargs ls -ld >{filename}' - hsystem.system(cmd) - _LOG.info("Saved dir status in %s", filename) - - -# From https://stackoverflow.com/questions/1830618 -def _get_user_group(filename: str) -> Tuple[str, str]: - """ - Return the symbolic name of user and group of a file. - """ - uid = os.stat(filename).st_uid - try: - user = pwd.getpwuid(uid).pw_name - except KeyError as e: - # _LOG.warning("Error: ", str(e)) - _ = e - user = str(uid) - # - gid = os.stat(filename).st_gid - try: - group = grp.getgrgid(gid).gr_name - except KeyError as e: - _ = e - group = str(gid) - return user, group - - -def _find_files_for_user(dir_name: str, user: str, is_equal: bool) -> List[str]: - """ - Find all the files under `abs_dir` that are owned or not by `user`. - """ - _LOG.debug("") - mode = "\\!" if not is_equal else "" - cmd = f'find {dir_name} -name "*" {mode} -user "{user}"' - _, txt = hsystem.system_to_string(cmd) - files: List[str] = txt.split("\n") - return files - - -def _find_files_for_group( - dir_name: str, group: str, is_equal: bool -) -> List[str]: - """ - Find all the files under `abs_dir` that are owned by a group `group`. - """ - _LOG.debug("") - mode = "\\!" if not is_equal else "" - cmd = f'find {dir_name} -name "*" {mode} -group "{group}"' - _, txt = hsystem.system_to_string(cmd) - files: List[str] = txt.split("\n") - return files - - -def _compute_stats_by_user_and_group(dir_name: str) -> Tuple[Dict, Dict, Dict]: - """ - Scan all the files reporting statistics in terms of users and groups. - - It also compute a mapping from file to user and group. - """ - _LOG.debug("") - # Find all files. - cmd = f'find {dir_name} -name "*"' - _, txt = hsystem.system_to_string(cmd) - files = txt.split("\n") - # Get the user of each file. - user_to_files: Dict[str, List[str]] = {} - group_to_files: Dict[str, List[str]] = {} - file_to_user_group: Dict[str, Tuple[str, str]] = {} - for file in files: - user, group = _get_user_group(file) - # Update mapping from user to files. - if user not in user_to_files: - user_to_files[user] = [] - user_to_files[user].append(file) - # Update mapping from group to files. - if group not in group_to_files: - group_to_files[group] = [] - group_to_files[group].append(file) - # Update the mapping from file to (user, group). - hdbg.dassert_not_in(file, file_to_user_group) - file_to_user_group[file] = (user, group) - # Print stats. - txt1 = "" - for user, files in user_to_files.items(): - txt1 += f"{user}({len(files)}), " - _LOG.info("user=%s", txt1) - # - txt2 = "" - for group, files in group_to_files.items(): - txt2 += f"{group}({len(files)}), " - _LOG.info("group=%s", txt2) - return user_to_files, group_to_files, file_to_user_group - - -def _ls_l(files: List[str], size: int = 100) -> str: - """ - Run `ls -l` on the files using chunks of size `size`. - """ - txt = [] - for pos in range(0, len(files), size): - files_tmp = files[pos : pos + size] - files_tmp = [f"'{f}'" for f in files_tmp] - cmd = f"ls -ld {' '.join(files_tmp)}" - _, txt_tmp = hsystem.system_to_string(cmd) - txt.append(txt_tmp) - return "\n".join(txt) - - -def _exec_cmd_by_chunks( - cmd: str, files: List[str], abort_on_error: bool, size: int = 100 -) -> None: - """ - Execute `cmd` on files using chunks of size `size`. - """ - for pos in range(0, len(files), size): - files_tmp = files[pos : pos + size] - files_tmp = [f"'{f}'" for f in files_tmp] - cmd = f"{cmd} {' '.join(files_tmp)}" - hsystem.system(cmd, abort_on_error=abort_on_error) - - -def _print_problems(dir_name: str = ".") -> None: - """ - Do `ls -l` on files that are not owned by the current user and its group. - - This function is used for debugging. - """ - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - user = hsystem.get_user_name() - docker_user = hserver.get_docker_user() - # user_group = f"{user}_g" - # shared_group = hserver.get_docker_shared_group() - files_with_problems = [] - for file, (curr_user, curr_group) in file_to_user_group.items(): - _ = curr_user, curr_group - # Files owned by our user and - # if curr_user == user and curr_group == user_group: - # continue - if curr_user in (user, docker_user): - continue - # if curr_group == shared_group: - # continue - files_with_problems.append(file) - # - txt = _ls_l(files_with_problems) - print(txt) - - -def _change_file_ownership(file: str, abort_on_error: bool) -> None: - """ - Change ownership of files with an invalid user (e.g., 265533) by copying - and deleting. - """ - # pylint: disable=line-too-long - # > ls -l ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py - # -rw-r--r-- 1 265533 sasm-fileshare 14327 Nov 3 14:01 ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py - # - # > mv ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py{,.OLD} - # - # > cp ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py{.OLD,} - # - # > ls -l ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py - # -rw-r--r-- 1 gsaggese sasm-fileshare 14327 Nov 5 17:58 ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py - # - # > rm -rf ./core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py.OLD - # pylint: enable=line-too-long - hdbg.dassert_file_exists(file) - tmp_file = file + ".OLD" - # - cmd = f"mv {file} {tmp_file}" - hsystem.system(cmd, abort_on_error=abort_on_error) - # - cmd = f"cp {tmp_file} {file}" - hsystem.system(cmd, abort_on_error=abort_on_error) - # - cmd = f"rm -rf {tmp_file}" - hsystem.system(cmd, abort_on_error=abort_on_error) - - -def _fix_invalid_owner(dir_name: str, fix: bool, abort_on_error: bool) -> None: - """ - Fix files that are owned by a user that is not the current user or the - Docker one. - """ - _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) - # - _LOG.info("Before fix") - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - # - user = hsystem.get_user_name() - docker_user = hserver.get_docker_user() - for file, (curr_user, _) in tqdm.tqdm(file_to_user_group.items()): - if curr_user not in (user, docker_user): - _LOG.info("Fixing file '%s'", file) - hdbg.dassert_file_exists(file) - cmd = f"ls -l {file}" - hsystem.system( - cmd, abort_on_error=abort_on_error, suppress_output=False - ) - if fix: - _change_file_ownership(file, abort_on_error) - # - _LOG.info("After fix") - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - - -def _fix_group(dir_name: str, fix: bool, abort_on_error: bool) -> None: - """ - Ensure that all files are owned by the shared group. - """ - _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) - _LOG.info("Before fix") - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - if fix: - # Get the user and the group. - user = hsystem.get_user_name() - user_group = f"{user}_g" - shared_group = hserver.get_docker_shared_group() - # - for file, (curr_user, curr_group) in file_to_user_group.items(): - # If the group is the shared group there is nothing to do. - if curr_group == shared_group: - continue - cmd = f"chgrp {shared_group} {file}" - if curr_user == user: - # This is a paranoia check. - hdbg.dassert_eq(curr_group, user_group) - else: - # For files not owned by the current user, we need to `sudo`. - cmd = f"sudo -u {curr_user} {cmd}" - hsystem.system(cmd, abort_on_error=abort_on_error) - _LOG.info("After fix") - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - else: - _LOG.warning("Skipping fix") - - -def _fix_group_permissions(dir_name: str, abort_on_error: bool) -> None: - """ - Ensure that all files are owned by the shared group. - """ - _LOG.info("\n%s", hprint.frame(hintros.get_function_name())) - _, _, file_to_user_group = _compute_stats_by_user_and_group(dir_name) - user = hsystem.get_user_name() - # docker_user = get_default_param("DOCKER_USER") - for file, (curr_user, curr_group) in tqdm.tqdm(file_to_user_group.items()): - _ = curr_group - st_mode = os.stat(file).st_mode - perms = oct(st_mode & 0o777) - # perms=0o775 - if perms[2] != perms[3]: - _LOG.debug("%s -> %s, %s", file, oct(st_mode), perms) - cmd = f"chmod g=u {file}" - if curr_user != user: - # For files not owned by the current user, we need to `sudo`. - cmd = f"sudo -u {curr_user} {cmd}" - hsystem.system(cmd, abort_on_error=abort_on_error) - is_dir = os.path.isdir(file) - if is_dir: - # pylint: disable=line-too-long - # From https://www.gnu.org/software/coreutils/manual/html_node/Directory-Setuid-and-Setgid.html - # If a directory - # inherit the same group as the directory, - # pylint: enable=line-too-long - has_set_group_id = st_mode & stat.S_ISGID - if not has_set_group_id: - cmd = f"chmod g+s {file}" - if curr_user != user: - # For files not owned by the current user, we need to `sudo`. - cmd = f"sudo -u {curr_user} {cmd}" - hsystem.system(cmd, abort_on_error=abort_on_error) - - -@task -def fix_perms( # type: ignore - ctx, dir_name=".", action="all", fix=True, abort_on_error=True -): - """ - :param action: - - `all`: run all the fixes - - `print_stats`: print stats about file users and groups - - `print_problems`: - - `fix_invalid_owner`: fix the files with an invalid owner (e.g., mysterious - 265533) - - `fix_group`: ensure that shared group owns all the files - - `fix_group_permissions`: ensure that the group permissions are the same - as the owner ones - """ - _ = ctx - hlitauti.report_task() - # - if hserver.is_dev4(): - if action == "all": - action = ["fix_invalid_owner", "fix_group", "fix_group_permissions"] - else: - action = [action] - # - file_name1 = "./tmp.fix_perms.before.txt" - _save_dir_status(dir_name, file_name1) - # - if "print_stats" in action: - _compute_stats_by_user_and_group(dir_name) - if "print_problems" in action: - _print_problems(dir_name) - if "fix_invalid_owner" in action: - _fix_invalid_owner(dir_name, fix, abort_on_error) - if "fix_group" in action: - _fix_group(dir_name, fix, abort_on_error) - if "fix_group_permissions" in action: - _fix_group_permissions(dir_name, abort_on_error) - # - file_name2 = "./tmp.fix_perms.after.txt" - _save_dir_status(dir_name, file_name2) - # - cmd = f"To compare run:\n> vimdiff {file_name1} {file_name2}" - print(cmd) - elif hserver.is_dev_csfy(): - user = hsystem.get_user_name() - group = user - cmd = f"sudo chown -R {user}:{group} *" - hsystem.system(cmd) - cmd = f"sudo chown -R {user}:{group} .pytest_cache" - hsystem.system(cmd, abort_on_error=False) - elif hserver.is_external_dev(): - # Nothing to do. - pass - else: - raise ValueError(f"Invalid machine {os.uname()[1]}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py deleted file mode 100644 index 512c09a60..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_print.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_print as hlitapri -""" - -import logging -import os -import re - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.henv as henv -import helpers.hgit as hgit -import helpers.hsystem as hsystem -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - -# ############################################################################# -# Set-up. -# ############################################################################# - - -@task -def print_setup(ctx): # type: ignore - """ - Print some configuration variables. - """ - hlitauti.report_task() - _ = ctx - var_names = "CSFY_ECR_BASE_PATH BASE_IMAGE".split() - for v in var_names: - print(f"{v}={hlitauti.get_default_param(v)}") - - -@task -def print_tasks(ctx, as_code=False): # type: ignore - """ - Print all the available tasks in `lib_tasks.py`. - - These tasks might be exposed or not by different. - - :param as_code: print as python code so that it can be embed in a - `from helpers.lib_tasks import ...` - """ - hlitauti.report_task() - _ = ctx - func_names = [] - lib_tasks_file_name = os.path.join( - hgit.get_amp_abs_path(), "helpers/lib_tasks.py" - ) - hdbg.dassert_file_exists(lib_tasks_file_name) - # TODO(gp): Use __file__ instead of hardwiring the file. - cmd = rf'\grep "^@task" -A 1 {lib_tasks_file_name} | grep def' - # def print_setup(ctx): # type: ignore - # def git_pull(ctx): # type: ignore - # def git_fetch_master(ctx): # type: ignore - _, txt = hsystem.system_to_string(cmd) - for line in txt.split("\n"): - _LOG.debug("line=%s", line) - m = re.match(r"^def\s+(\S+)\(", line) - if m: - func_name = m.group(1) - _LOG.debug(" -> %s", func_name) - func_names.append(func_name) - func_names = sorted(func_names) - if as_code: - print("\n".join([f"{fn}," for fn in func_names])) - else: - print("\n".join(func_names)) - - -@task -def print_env( - ctx, - repo_config=True, - server_config=True, - system_signature=True, - env_vars=True, -): # type: ignore - """ - Print the repo configuration. - """ - _ = ctx - print( - henv.env_to_str( - repo_config=repo_config, - server_config=server_config, - system_signature=system_signature, - env_vars=env_vars, - ) - ) - - -# TODO(gp): -# Print a CSV -# cat /share/data/cf_production/20221005/system_log_dir/process_forecasts/target_positions/20221005_153006.csv | column -t -s, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py deleted file mode 100644 index 98a9b203e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_pytest.py +++ /dev/null @@ -1,1743 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_pytest as hlitapyt -""" - -import json -import logging -import os -import re -import sys -from typing import Any, List, Optional, Tuple - -from invoke import task - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hcoverage as hcovera -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hlist as hlist -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.htraceback as htraceb -import helpers.lib_tasks_docker as hlitadoc -import helpers.lib_tasks_lint as hlitalin -import helpers.lib_tasks_utils as hlitauti -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -# ############################################################################# -# Run tests. -# ############################################################################# - - -_COV_PYTEST_OPTS = [ - # Only compute coverage for current project and not venv libraries. - "--cov=.", - "--cov-branch", - # Report the missing lines. - # Name Stmts Miss Cover Missing - # ------------------------------------------------------------------------- - # myproj/__init__ 2 0 100% - # myproj/myproj 257 13 94% 24-26, 99, 149, 233-236, 297-298 - "--cov-report term-missing", - # Report data in the directory `htmlcov`. - "--cov-report html", - # "--cov-report annotate", -] - - -_TEST_TIMEOUTS_IN_SECS = { - "fast_tests": 5, - "slow_tests": 30, - "superslow_tests": 60 * 60, -} - - -_NUM_TIMEOUT_TEST_RERUNS = { - "fast_tests": 2, - "slow_tests": 1, - "superslow_tests": 1, -} - - -@task -def run_blank_tests(ctx, stage="dev", version=""): # type: ignore - """ - (ONLY CI/CD) Test that pytest in the container works. - """ - hlitauti.report_task() - _ = ctx - base_image = "" - cmd = '"pytest -h >/dev/null"' - docker_cmd_ = hlitadoc._get_docker_compose_cmd( - base_image, stage, version, cmd - ) - hsystem.system(docker_cmd_, abort_on_error=False, suppress_output=False) - - -def _select_tests_to_skip(test_list_name: str) -> str: - """ - Generate text for pytest specifying which tests to deselect. - """ - if test_list_name == "fast_tests": - skipped_tests = "not slow and not superslow" - elif test_list_name == "slow_tests": - skipped_tests = "slow and not superslow" - elif test_list_name == "superslow_tests": - skipped_tests = "not slow and superslow" - else: - raise ValueError(f"Invalid `test_list_name`={test_list_name}") - return skipped_tests - - -def _build_run_command_line( - test_list_name: str, - custom_marker: str, - pytest_opts: str, - skip_submodules: bool, - coverage: bool, - collect_only: bool, - tee_to_file: bool, - n_threads: str, - *, - allure_dir: Optional[str] = None, -) -> str: - """ - Build the pytest run command. - - E.g., - - ``` - pytest -m "optimizer and not slow and not superslow" \ - . \ - -o timeout_func_only=true \ - --timeout 5 \ - --reruns 2 \ - --only-rerun "Failed: Timeout" - ``` - - The rest of params are the same as in `run_fast_tests()`. - - The invariant is that we don't want to duplicate pytest options that can be - passed by the user through `-p` (unless really necessary). - - :param test_list_name: "fast_tests", "slow_tests" or - "superslow_tests" - :param custom_marker: specify a space separated list of - `pytest` markers to skip (e.g., `optimizer` for the optimizer - tests, see `pytest.ini`). Empty means no marker to skip - :param allure_dir: directory to save allure results to. If specified, allure - plugin will be installed on-the-fly and results will be generated - and saved to the specified directory - """ - hdbg.dassert_in( - test_list_name, _TEST_TIMEOUTS_IN_SECS, "Invalid test_list_name" - ) - pytest_opts = pytest_opts or "." - pytest_opts_tmp = [] - # Select tests to skip based on the `test_list_name` (e.g., fast tests) - # and on the custom marker, if present. - skipped_tests = _select_tests_to_skip(test_list_name) - timeout_in_sec = _TEST_TIMEOUTS_IN_SECS[test_list_name] - # Detect if we are running on a CK dev server / inside CI - # or a laptop outside the CK infra. - is_outside_ck_infra = ( - not hserver.is_dev_csfy() and not hserver.is_inside_ci() - ) - if is_outside_ck_infra: - timeout_multiplier = 10 - _LOG.warning( - f"Tests are running outside the CK server and CI, timeout increased {timeout_multiplier} times." - ) - # Since we are running outside the CK server we increase the duration - # of the timeout, since the thresholds are set for the CK server. - timeout_in_sec *= timeout_multiplier - if custom_marker != "": - pytest_opts_tmp.append(f'-m "{custom_marker} and {skipped_tests}"') - else: - pytest_opts_tmp.append(f'-m "{skipped_tests}"') - if pytest_opts: - pytest_opts_tmp.append(pytest_opts) - # Adding `timeout_func_only` is a workaround for - # https://github.com/pytest-dev/pytest-rerunfailures/issues/99. Because of - # it, we limit only run time, without setup and teardown time. - pytest_opts_tmp.append("-o timeout_func_only=true") - pytest_opts_tmp.append(f"--timeout {timeout_in_sec}") - num_reruns = _NUM_TIMEOUT_TEST_RERUNS[test_list_name] - pytest_opts_tmp.append( - f'--reruns {num_reruns} --only-rerun "Failed: Timeout"' - ) - if hserver.skip_submodules_test(): - # For some repos submodules should be skipped - # regardless of the passed value. - skip_submodules = True - if skip_submodules: - submodule_paths = hgit.get_submodule_paths() - _LOG.warning( - "Skipping %d submodules: %s", len(submodule_paths), submodule_paths - ) - pytest_opts_tmp.append( - " ".join([f"--ignore {path}" for path in submodule_paths]) - ) - if coverage: - pytest_opts_tmp.append(" ".join(_COV_PYTEST_OPTS)) - if collect_only: - _LOG.warning("Only collecting tests as per user request") - pytest_opts_tmp.append("--collect-only") - # Indicate the number of threads for parallelization. - if n_threads != "serial": - pytest_opts_tmp.append(f"-n {str(n_threads)}") - if allure_dir is not None: - pytest_opts_tmp.append(f"--alluredir={allure_dir}") - # Generate test report. - pytest_opts_tmp.append("--junit-xml=tmp.junit.xml") - # Add runnable dir image name to the test report. - image_name = hrecouti.get_repo_config().get_docker_base_image_name() - pytest_opts_tmp.append(f'-o junit_suite_name="{image_name}"') - # Concatenate the options. - _LOG.debug("pytest_opts_tmp=\n%s", str(pytest_opts_tmp)) - pytest_opts_tmp = [po for po in pytest_opts_tmp if po != ""] - # TODO(gp): Use to_multi_line_cmd() - pytest_opts = " ".join([po.rstrip().lstrip() for po in pytest_opts_tmp]) - cmd = f"pytest {pytest_opts}" - if allure_dir is not None: - # Install the `allure-pytest` before running the tests. This is needed - # to generate Allure results which serve as an input for generating - # Allure HTML reports. - # Excluding the command `"source /venv/bin/activate"` because post-activation, - # the `PATH` variable lacks necessary values, causing a failure in a test - # associated with `publish_notebook.py`. - cmd = f"sudo /venv/bin/pip install allure-pytest && {cmd}" - if tee_to_file: - cmd += f" 2>&1 | tee tmp.pytest.{test_list_name}.log" - return cmd - - -def _run_test_cmd( - ctx: Any, - stage: str, - version: str, - cmd: str, - coverage: bool, - collect_only: bool, - skip_pull: bool, - start_coverage_script: bool, - **ctx_run_kwargs: Any, -) -> Optional[int]: - """ - See params in `run_fast_tests()`. - """ - if collect_only: - # Clean files. - hlitauti.run(ctx, "rm -rf ./.coverage*") - # Run. - base_image = "" - # We need to add some " to pass the string as it is to the container. - cmd = f"'{cmd}'" - # We use "host" for the app container to allow access to the database - # exposing port 5432 on localhost (of the server), when running dind we - # need to switch back to bridge. See CmTask988. - extra_env_vars = ["NETWORK_MODE=bridge"] - docker_cmd_ = hlitadoc._get_docker_compose_cmd( - base_image, stage, version, cmd, extra_env_vars=extra_env_vars - ) - _LOG.info("cmd=%s", docker_cmd_) - # We can't use `hsystem.system()` because of buffering of the output, - # losing formatting and so on, so we stick to executing through `ctx`. - rc: Optional[int] = hlitadoc._docker_cmd( - ctx, docker_cmd_, skip_pull=skip_pull, **ctx_run_kwargs - ) - # Print message about coverage. - if coverage: - msg = """ - - The coverage results in textual form are above - - - To browse the files annotate with coverage, start a server (not from the - container): - > (cd ./htmlcov; python -m http.server 33333) - - Then go with your browser to `localhost:33333` to see which code is - covered - """ - msg = hprint.dedent(msg) - print(msg) - if start_coverage_script: - # Create and run a script to show the coverage in the browser. - script_txt = """ - (sleep 2; open http://localhost:33333) & - (cd ./htmlcov; python -m http.server 33333) - """ - script_txt = hprint.dedent(script_txt) - script_name = "./tmp.coverage.sh" - hio.create_executable_script(script_name, script_txt) - coverage_rc = hsystem.system(script_name) - if coverage_rc != 0: - _LOG.warning( - "Setting `rc` to `0` even though the coverage script fails." - ) - rc = 0 - return rc - - -def _run_tests( - ctx: Any, - test_list_name: str, - stage: str, - version: str, - custom_marker: str, - pytest_opts: str, - skip_pull: bool, - skip_submodules: bool, - coverage: bool, - collect_only: bool, - tee_to_file: bool, - n_threads: str, - git_clean_: bool, - *, - start_coverage_script: bool = False, - allure_dir: Optional[str] = None, - # TODO(Grisha): do we need to expose ctx kwargs to the invoke targets? - # E.g., to `run_fast_tests`. See CmTask3602 "All tests fail". - **ctx_run_kwargs: Any, -) -> Optional[int]: - """ - See params in `run_fast_tests()`. - """ - if git_clean_: - cmd = "invoke git_clean --fix-perms" - hlitauti.run(ctx, cmd) - # Build the command line. - cmd = _build_run_command_line( - test_list_name, - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - allure_dir=allure_dir, - ) - # Execute the command line. - rc = _run_test_cmd( - ctx, - stage, - version, - cmd, - coverage, - collect_only, - skip_pull, - start_coverage_script, - **ctx_run_kwargs, - ) - return rc - - -# TODO(Grisha): "Unit tests run_*_tests invokes" CmTask #1652. -@task -def run_tests( # type: ignore - ctx, - test_lists, - abort_on_first_error=False, - stage="dev", - version="", - custom_marker="", - pytest_opts="", - skip_pull=False, - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, - **kwargs, -): - """ - :param test_lists: comma separated list with test lists to run (e.g., `fast_test,slow_tests`) - :param abort_on_first_error: stop after the first test list failing - """ - results = [] - for test_list_name in test_lists.split(","): - rc = _run_tests( - ctx, - test_list_name, - stage, - version, - custom_marker, - pytest_opts, - skip_pull, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - warn=True, - allure_dir=allure_dir, - **kwargs, - ) - if rc != 0: - _LOG.error("'%s' tests failed", test_list_name) - if abort_on_first_error: - sys.exit(-1) - results.append((test_list_name, rc)) - # - rc = any(result[1] for result in results) - # Summarize the results. - _LOG.info("# Tests run summary:") - for test_list_name, rc in results: - if rc != 0: - _LOG.error("'%s' tests failed", test_list_name) - else: - _LOG.info("'%s' tests succeeded", test_list_name) - return rc - - -def _get_custom_marker( - *, - run_only_test_list: str = "", - skip_test_list: str = "", -) -> str: - """ - Get a custom pytest marker from comma-separated string representations of - test lists to run or skip. - - :param run_only_test_list: a string of comma-separated markers to - run, e.g. `run_only_test_list = - "requires_ck_infra,requires_aws"` - :param skip_test_list: a string of comma-separated markers to skip - :return: custom pytest marker - """ - # If we are running outside the CK server / CI, tests requiring CK infra - # should be automatically skipped. - is_outside_ck_infra = ( - not hserver.is_dev_csfy() and not hserver.is_inside_ci() - ) - # Skip tests that requires CK infra. - if is_outside_ck_infra: - _LOG.warning( - "Skipping the tests that require CK " - "infra when running outside the CK server / CI." - ) - if skip_test_list: - skip_test_list = "requires_ck_infra," + skip_test_list - else: - skip_test_list = "requires_ck_infra" - # Convert string representations of lists to actual lists. - if run_only_test_list: - # This works as expected when there is a single test in the list. - run_only_test_list_items = run_only_test_list.split(",") - _LOG.warning("Running only tests inside %s.", run_only_test_list_items) - else: - run_only_test_list_items = [] - if skip_test_list: - # This works as expected when there is a single test in the list. - skip_test_list_items = skip_test_list.split(",") - _LOG.warning("Skipping the tests inside %s.", skip_test_list_items) - else: - # The list can be empty when running inside CK infra. - skip_test_list_items = [] - # Convert marker strings for `pytest -m` using `and` and `not`. - run_only_marker_string = " and ".join(run_only_test_list_items) - skip_marker_string = " and ".join( - [("not " + item) for item in skip_test_list_items] - ) - if run_only_marker_string: - if skip_marker_string: - custom_marker = run_only_marker_string + " and " + skip_marker_string - else: - custom_marker = run_only_marker_string - else: - custom_marker = skip_marker_string - return custom_marker - - -# TODO(gp): Pass a test_list in fast, slow, ... instead of duplicating all the code CmTask #1571. -@task -def run_fast_tests( # type: ignore - ctx, - stage="dev", - version="", - pytest_opts="", - run_only_test_list="", - skip_test_list="", - skip_pull=False, - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run fast tests. check `gh auth status` before invoking to avoid auth - errors. - - :param stage: select a specific stage for the Docker image - :param pytest_opts: additional options for `pytest` invocation. It can be empty - :param run_only_test_list: select markers to run. Takes comma-separated tokens, - e.g. `--run_only_test_list = requires_ck_infra,requires_aws` - :param skip_test_list: select markers to skip. Takes comma-separated tokens. - :param skip_submodules: ignore all the dir inside a submodule - :param coverage: enable coverage computation - :param collect_only: do not run tests but show what will be executed - :param tee_to_file: save output of pytest in `tmp.pytest.log` - :param n_threads: the number of threads to run the tests with - - "auto": distribute the tests across all the available CPUs - :param git_clean_: run `invoke git_clean --fix-perms` before running the tests - :param allure_dir: directory to save allure results to. If specified, allure - plugin will be installed on-the-fly and results will be generated - and saved to the specified directory - """ - hlitauti.report_task() - hdbg.dassert( - not (run_only_test_list and skip_test_list), - "You can't specify both --run_only_test_list and --skip_test_list", - ) - test_list_name = "fast_tests" - # Convert cmd line marker lists to a pytest marker list. - custom_marker = _get_custom_marker( - run_only_test_list=run_only_test_list, skip_test_list=skip_test_list - ) - rc = _run_tests( - ctx, - test_list_name, - stage, - version, - custom_marker, - pytest_opts, - skip_pull, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir=allure_dir, - ) - return rc - - -@task -def run_slow_tests( # type: ignore - ctx, - stage="dev", - version="", - pytest_opts="", - run_only_test_list="", - skip_test_list="", - skip_pull=False, - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run slow tests. - - Same params as `invoke run_fast_tests`. - """ - hlitauti.report_task() - test_list_name = "slow_tests" - # Convert cmd line marker lists to a pytest marker list. - custom_marker = _get_custom_marker( - run_only_test_list=run_only_test_list, skip_test_list=skip_test_list - ) - rc = _run_tests( - ctx, - test_list_name, - stage, - version, - custom_marker, - pytest_opts, - skip_pull, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir=allure_dir, - ) - return rc - - -@task -def run_superslow_tests( # type: ignore - ctx, - stage="dev", - version="", - pytest_opts="", - run_only_test_list="", - skip_test_list="", - skip_pull=False, - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run superslow tests. - - Same params as `invoke run_fast_tests`. - """ - hlitauti.report_task() - test_list_name = "superslow_tests" - # Convert cmd line marker lists to a pytest marker list. - custom_marker = _get_custom_marker( - run_only_test_list=run_only_test_list, skip_test_list=skip_test_list - ) - rc = _run_tests( - ctx, - test_list_name, - stage, - version, - custom_marker, - pytest_opts, - skip_pull, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir=allure_dir, - ) - return rc - - -@task -def run_fast_slow_tests( # type: ignore - ctx, - abort_on_first_error=False, - stage="dev", - version="", - pytest_opts="", - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run fast and slow tests back-to-back. - - Same params as `invoke run_fast_tests`. - """ - hlitauti.report_task() - # Run fast tests but do not fail on error. - test_lists = "fast_tests,slow_tests" - custom_marker = "" - rc = run_tests( - ctx, - test_lists, - abort_on_first_error, - stage, - version, - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir, - ) - return rc - - -@task -def run_fast_slow_superslow_tests( # type: ignore - ctx, - abort_on_first_error=False, - stage="dev", - version="", - pytest_opts="", - skip_submodules=False, - coverage=False, - collect_only=False, - tee_to_file=False, - n_threads="serial", - git_clean_=False, - allure_dir=None, -): - """ - Run fast, slow, superslow tests back-to-back. - - Same params as `invoke run_fast_tests`. - """ - hlitauti.report_task() - # Run fast tests but do not fail on error. - test_lists = "fast_tests,slow_tests,superslow_tests" - custom_marker = "" - rc = run_tests( - ctx, - test_lists, - abort_on_first_error, - stage, - version, - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - git_clean_, - allure_dir, - ) - return rc - - -@task -def run_qa_tests( # type: ignore - ctx, - stage="dev", - version="", -): - """ - Run QA tests independently. - - :param version: version to tag the image and code with - :param stage: select a specific stage for the Docker image - """ - hlitauti.report_task() - # - qa_test_fn = hlitauti.get_default_param("QA_TEST_FUNCTION") - # Run the call back function. - rc = qa_test_fn(ctx, stage, version) - if not rc: - msg = "QA tests failed" - _LOG.error(msg) - raise RuntimeError(msg) - - -# ############################################################################# -# Coverage report -# ############################################################################# - - -def _publish_html_coverage_report_on_s3(aws_profile: str) -> None: - """ - Publish HTML coverage report on S3 so that it can be accessed via browser. - - Target S3 dir is constructed from linux user and Git branch name, e.g. - `s3://...-html/html_coverage/grisha_CmTask1047_fix_tests`. - """ - # Build the dir name from user and branch name. - user = hsystem.get_user_name() - branch_name = hgit.get_branch_name() - _LOG.debug("User='%s', branch_name='%s'", user, branch_name) - s3_html_coverage_dir = f"{user}_{branch_name}" - # Get the full path to the dir. - s3_html_base_dir = "html_coverage" - s3_html_bucket_path = hrecouti.get_repo_config().get_html_bucket_path() - s3_html_coverage_path = os.path.join( - s3_html_bucket_path, s3_html_base_dir, s3_html_coverage_dir - ) - # Copy HTML coverage data from the local dir to S3. - local_coverage_path = "./htmlcov" - # TODO(Nikola): Revert to `s3fs_.put` after `s3fs` is updated to latest - # version. See CmTask #2400. - use_aws_copy = True - if use_aws_copy: - sudo_prefix = "" - if hserver.is_inside_ci(): - # There is no AWS config in GH action, thus create default one from - # chosen profile. To bypass permission errors, `sudo` is used. - sudo_prefix = "sudo " - aws_set_param_cmd = "sudo aws configure set" - aws_set_profile_cmd = f"--profile {aws_profile}" - # TODO(Juraj): needed because ENV_VARS are now prefixed with - # `CSFY_` and not `CK_` or `AM_`. Proper fix to come in - # CmTask11095. - # profile_prefix = aws_profile.upper() - profile_prefix = ( - "CSFY" - if aws_profile.upper() in ["AM", "CK"] - else aws_profile.upper() - ) - # Check if AWS session token is set in environment variable. - if f"{profile_prefix}_AWS_SESSION_TOKEN" in os.environ: - aws_set_value_pairs = [ - f"aws_access_key_id ${profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - f"aws_secret_access_key ${profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - f"aws_session_token ${profile_prefix}_AWS_SESSION_TOKEN", - f"region ${profile_prefix}_AWS_DEFAULT_REGION", - ] - else: - aws_set_value_pairs = [ - f"aws_access_key_id ${profile_prefix}_AWS_ACCESS_KEY_ID", # gitleaks:allow - f"aws_secret_access_key ${profile_prefix}_AWS_SECRET_ACCESS_KEY", # gitleaks:allow - f"region ${profile_prefix}_AWS_DEFAULT_REGION", - ] - aws_config_cmds = [ - f"{aws_set_param_cmd} {aws_set_value_pair} {aws_set_profile_cmd}" - for aws_set_value_pair in aws_set_value_pairs - ] - aws_config_pipe_cmd = " && ".join(aws_config_cmds) - hsystem.system(aws_config_pipe_cmd) - cp_cmd = ( - f"{sudo_prefix}aws s3 cp {local_coverage_path} {s3_html_coverage_path} " - f"--recursive --profile {aws_profile}" - ) - hsystem.system(cp_cmd) - else: - # Use `s3fs` to copy data to AWS S3. - s3fs_ = hs3.get_s3fs(aws_profile) - s3fs_.put(local_coverage_path, s3_html_coverage_path, recursive=True) - _LOG.info( - "HTML coverage report is published on S3: path=`%s`", - s3_html_coverage_path, - ) - - -@task -def run_coverage_report( # type: ignore - ctx, - target_dir, - generate_html_report=False, - publish_html_on_s3=True, - aws_profile="ck", -): - """ - Compute test coverage stats. - - The flow is: - - Run tests and compute coverage stats for each test type - - Combine coverage stats in a single file - - Generate a text report - - Generate a HTML report (optional) - - Post it on S3 (optional) - - :param target_dir: directory to compute coverage stats for. The value '.' - uses all the dirs in the current working directory - :param generate_html_report: whether to generate HTML coverage report or not - :param publish_html_on_s3: whether to publish HTML coverage report or not - :param aws_profile: the AWS profile to use for publishing HTML report - """ - # TODO(Grisha): allow user to specify which tests to run. - # Run fast tests for the target dir and collect coverage results. - fast_tests_cmd = f"invoke run_fast_tests --coverage -p {target_dir}" - hlitauti.run(ctx, fast_tests_cmd, use_system=False) - fast_tests_coverage_file = ".coverage_fast_tests" - create_fast_tests_file_cmd = f"mv .coverage {fast_tests_coverage_file}" - hsystem.system(create_fast_tests_file_cmd) - # Run slow tests for the target dir and collect coverage results. - slow_tests_cmd = f"invoke run_slow_tests --coverage -p {target_dir}" - hlitauti.run(ctx, slow_tests_cmd, use_system=False) - slow_tests_coverage_file = ".coverage_slow_tests" - create_slow_tests_file_cmd = f"mv .coverage {slow_tests_coverage_file}" - hsystem.system(create_slow_tests_file_cmd) - # Check that coverage files are present for both fast and slow tests. - hdbg.dassert_file_exists(fast_tests_coverage_file) - hdbg.dassert_file_exists(slow_tests_coverage_file) - # - report_cmd: List[str] = [] - # Clean the previous coverage results. For some docker-specific reasons - # command which combines stats does not work when being run first in - # the chain `bash -c "cmd1 && cmd2 && cmd3"`. So `erase` command which - # does not affect the coverage results was added as a workaround. - report_cmd.append("coverage erase") - # Merge stats for fast and slow tests into single dir. - report_cmd.append( - f"coverage combine --keep {fast_tests_coverage_file} {slow_tests_coverage_file}" - ) - # Specify the dirs to include and exclude in the report. - exclude_from_report = None - if target_dir == ".": - # Include all dirs. - include_in_report = "*" - if hserver.skip_submodules_test(): - # Exclude submodules. - submodule_paths = hgit.get_submodule_paths() - exclude_from_report = ",".join( - path + "/*" for path in submodule_paths - ) - else: - # Include only the target dir. - include_in_report = f"*/{target_dir}/*" - # Generate text report with the coverage stats. - report_stats_cmd = ( - f"coverage report --include={include_in_report} --sort=Cover" - ) - if exclude_from_report is not None: - report_stats_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_stats_cmd) - if generate_html_report: - # Generate HTML report with the coverage stats. - report_html_cmd = f"coverage html --include={include_in_report}" - if exclude_from_report is not None: - report_html_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_html_cmd) - # Execute commands above one-by-one inside docker. Coverage tool is not - # installed outside docker. - full_report_cmd = " && ".join(report_cmd) - docker_cmd_ = f"invoke docker_cmd --use-bash --cmd '{full_report_cmd}'" - hlitauti.run(ctx, docker_cmd_) - if publish_html_on_s3: - # Publish HTML report on S3. - _publish_html_coverage_report_on_s3(aws_profile) - - -def _get_inclusion_settings(target_dir: str) -> Tuple[str, Optional[str]]: - """ - Determine include/omit glob patterns for the coverage report for both text - and HTML coverage reports. - - :param target_dir: directory for coverage stats; use "." to indicate all directories - :return: glob pattern to include and a comma-separated glob pattern to omit - - Examples: - 1. Cover everything (no submodules to omit): - `_get_inclusion_settings(".")` -> `("*", "")` - - 2. Only cover code under a specific directory: - `_get_inclusion_settings("helpers")` -> `("*/helpers/*", None)` - - In `_run_coverage`: - - To cover the entire repo coverage (e.g. `helpers` project root): - `_get_inclusion_settings(".")` corresponds to - ``` - > coverage report --include=* --sort=Cover - > coverage html --include=* [--omit=submodule1/*,submodule2/*] - ``` - - - To cover a single-directory: - ` _get_inclusion_settings("helpers")` corresponds to: - ``` - > coverage report --include=*/helpers/* --sort=Cover - > coverage html --include=*/helpers/* [--omit=...] - ``` - """ - if target_dir == ".": - include_in_report = "*" - exclude_from_report = "" - if hserver.skip_submodules_test(): - submodule_paths: List[str] = hgit.get_submodule_paths() - exclude_from_report = ",".join( - f"{path}/*" for path in submodule_paths - ) - else: - include_in_report = f"*/{target_dir}/*" - exclude_from_report = None - return include_in_report, exclude_from_report - - -@task -def run_coverage(ctx, suite, target_dir=".", generate_html_report=False): # type: ignore - """ - Task to run coverage for any test suite. - - :param ctx: invoke context - :param suite: suite to run ("fast", "slow", "superslow") - :param target_dir: directory to measure coverage - """ - hdbg.dassert_in(suite, ("fast", "slow", "superslow")) - # Build the command line. - test_cmd_parts = [ - # Invoke the "_tests" task. - "invoke", - f"run_{suite}_tests", - # Enable coverage computation. - "--coverage", - # Specify which directory to test. - "-p", - target_dir, - ] - test_cmd = hlitauti.to_multi_line_cmd(test_cmd_parts) - # Run the tests under coverage. - hlitauti.run(ctx, test_cmd, use_system=False) - hdbg.dassert_file_exists(".coverage") - # Compute which files/dirs to include and omit in the report. - include_in_report, exclude_from_report = _get_inclusion_settings(target_dir) - report_cmd: List[str] = [ - # Reset any previous coverage data to avoid contamination. - "coverage erase" - ] - # Generate a text report, including only our target paths. - report_stats_cmd = ( - f"coverage report --include={include_in_report} --sort=Cover" - ) - if exclude_from_report: - report_stats_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_stats_cmd) - # Produce HTML output for interactive browsing. - if generate_html_report: - report_html_cmd = f"coverage html --include={include_in_report}" - if exclude_from_report: - report_html_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_html_cmd) - # Export XML coverage report to integrate with Codecov. - report_cmd.append("coverage xml -o coverage.xml") - full_report_cmd = " && ".join(report_cmd) - docker_cmd_ = f"invoke docker_cmd --use-bash --cmd '{full_report_cmd}'" - hlitauti.run(ctx, docker_cmd_) - - -@task -def run_coverage_subprocess(ctx, target_dir=".", generate_html_report=False): # type: ignore - """ - Run comprehensive coverage using subprocess mode with hcoverage injection - and direct coverage run. This function runs all tests (fast, slow, - superslow) to generate complete coverage. - - :param ctx: invoke context - :param target_dir: directory to measure coverage - :param generate_html_report: whether to generate HTML coverage - report or not - """ - _LOG.info("Running comprehensive test coverage with subprocess injection...") - # Inject coverage hooks. - hcovera.inject() - try: - # Setup coverage environment for subprocess. - hcovera.coverage_commands_subprocess() - # Clean any existing coverage data. - erase_cmd = "coverage erase" - hsystem.system(erase_cmd, abort_on_error=True) - # Build the coverage command with parallel mode - run all tests. - coverage_cmd = ["coverage", "run", "--parallel-mode", "-m", "pytest"] - # Add target directory. - coverage_cmd.append(target_dir) - test_cmd = hlitauti.to_multi_line_cmd(coverage_cmd) - _LOG.debug("About to run command: {test_cmd}") - # Run tests with coverage tracking directly. - hsystem.system(test_cmd, abort_on_error=True) - # Combine coverage data from subprocesses directly. - hcovera.coverage_combine() - hdbg.dassert_file_exists(".coverage") - include_in_report, exclude_from_report = _get_inclusion_settings( - target_dir - ) - include_in_report = include_in_report.replace("/./", "/").replace( - "//", "/" - ) - report_cmd: List[str] = [] - # Generate a text report, including only our target paths. - report_stats_cmd = ( - f"coverage report --include={include_in_report} --sort=Cover" - ) - if exclude_from_report: - exclude_from_report = exclude_from_report.replace( - "/./", "/" - ).replace("//", "/") - report_stats_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_stats_cmd) - if generate_html_report: - # Generate HTML report with the coverage stats. - report_html_cmd = f"coverage html --include={include_in_report}" - if exclude_from_report: - report_html_cmd += f" --omit={exclude_from_report}" - report_cmd.append(report_html_cmd) - # Export XML coverage report to integrate with Codecov. - report_cmd.append("coverage xml -o coverage.xml") - full_report_cmd = " && ".join(report_cmd) - # Run coverage report commands directly (avoid Docker-in-Docker issues). - hsystem.system(full_report_cmd, abort_on_error=True) - except Exception as e: - _LOG.error("Coverage with subprocess failed: %s", e) - raise - finally: - # Always cleanup coverage hooks. - hcovera.remove() - - -# ############################################################################# -# Traceback. -# ############################################################################# - - -# TODO(gp): Consolidate the code from dev_scripts_helpers/testing here. - - -@task -def traceback(ctx, log_name="tmp.pytest_script.txt", purify=True): # type: ignore - """ - Parse the traceback from Pytest and navigate it with vim. - - ``` - # Run a unit test. - > pytest helpers/test/test_traceback.py 2>&1 | tee tmp.pytest.log - > pytest.sh helpers/test/test_traceback.py - # Parse the traceback - > invoke traceback -i tmp.pytest.log - ``` - - :param log_name: the file with the traceback - :param purify: purify the filenames from client (e.g., from running inside Docker) - """ - hlitauti.report_task() - # - dst_cfile = "cfile" - hio.delete_file(dst_cfile) - # Convert the traceback into a cfile. - cmd = [] - cmd.append("traceback_to_cfile.py") - if log_name: - cmd.append(f"-i {log_name}") - cmd.append(f"-o {dst_cfile}") - # Purify the file names. - if purify: - cmd.append("--purify_from_client") - else: - cmd.append("--no_purify_from_client") - cmd = " ".join(cmd) - hlitauti.run(ctx, cmd) - # Read and navigate the cfile with vim. - if os.path.exists(dst_cfile): - cmd = 'vim -c "cfile cfile"' - hlitauti.run(ctx, cmd, pty=True) - else: - _LOG.warning("Can't find %s", dst_cfile) - - -# ############################################################################# -# pytest_clean -# ############################################################################# - - -@task -def pytest_clean(ctx): # type: ignore - """ - Clean pytest artifacts. - """ - hlitauti.report_task() - _ = ctx - import helpers.hpytest as hpytest - - hpytest.pytest_clean(".") - - -# ############################################################################# -# pytest_repro -# ############################################################################# - - -def _get_failed_tests_from_file(file_name: str) -> List[str]: - hdbg.dassert_file_exists(file_name) - txt = hio.from_file(file_name) - if file_name.endswith("/cache/lastfailed"): - # Decode the json-style string. - # { - # "vendors/test/test_vendors.py::Test_gp::test1": true, - # "vendors/test/test_vendors.py::Test_kibot_utils1::...": true, - # } - vals = json.loads(txt) - hdbg.dassert_isinstance(vals, dict) - tests = [k for k, v in vals.items() if v] - else: - # Extract failed tests from the regular text output. - tests = re.findall(r"FAILED (\S+\.py::\S+::\S+)\b", txt) - return tests - - -@task -def pytest_repro( # type: ignore - ctx, - mode="tests", - file_name="./.pytest_cache/v/cache/lastfailed", - show_stacktrace=False, - create_script=True, - script_name="./tmp.pytest_repro.sh", -): - """ - Generate commands to reproduce the failed tests after a `pytest` run. - - The workflow is: - ``` - # Run a lot of tests, e.g., the entire regression suite. - server> i run_fast_slow_tests 2>&1 | log pytest.txt - docker> pytest ... 2>&1 | log pytest.txt - - # Run the `pytest_repro` to summarize test failures and to generate - # commands to reproduce them. - server> i pytest_repro - ``` - - :param mode: the granularity level for generating the commands - - "tests" (default): failed test methods, e.g., - ``` - pytest helpers/test/test_cache.py::TestCachingOnS3::test_with_caching1 - pytest helpers/test/test_cache.py::TestCachingOnS3::test_with_caching2 - ``` - - "classes": classes of the failed tests, e.g., - ``` - pytest helpers/test/test_cache.py::TestCachingOnS3 - pytest helpers/test/test_cache.py::TestCachingOnS3_2 - ``` - - "files": files with the failed tests, e.g., - :param file_name: the name of the file containing the pytest output file to parse - :param show_stacktrace: whether to show the stacktrace of the failed tests - - only if it is available in the pytest output file - :param create_script: create a script to run the tests - :return: commands to reproduce pytest failures at the requested granularity level - """ - hlitauti.report_task() - _ = ctx - # Read file. - _LOG.info("Reading file_name='%s'", file_name) - hdbg.dassert_file_exists(file_name) - _LOG.info("Reading failed tests from file '%s'", file_name) - # E.g., vendors/test/test_vendors.py::Test_gp::test1 - tests = _get_failed_tests_from_file(file_name) - if len(tests) == 0: - _LOG.info("Found 0 failed tests") - return "" - _LOG.debug("tests=%s", str(tests)) - # Process the tests. - targets = [] - for test in tests: - data = test.split("::") - hdbg.dassert_lte(len(data), 3, "Can't parse '%s'", test) - # E.g., dev_scripts/testing/test/test_run_tests.py - # E.g., helpers/test/helpers/test/test_list.py::Test_list_1 - # E.g., core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5 - test_file_name = test_class = test_method = "" - if len(data) >= 1: - test_file_name = data[0] - if len(data) >= 2: - test_class = data[1] - if len(data) >= 3: - test_method = data[2] - _LOG.debug( - "test=%s -> (%s, %s, %s)", - test, - test_file_name, - test_class, - test_method, - ) - if mode == "tests": - targets.append(test) - elif mode == "files": - if test_file_name != "": - targets.append(test_file_name) - else: - _LOG.warning( - "Skipping test='%s' since test_file_name='%s'", - test, - test_file_name, - ) - elif mode == "classes": - if test_file_name != "" and test_class != "": - targets.append(f"{test_file_name}::{test_class}") - else: - _LOG.warning( - "Skipping test='%s' since test_file_name='%s', test_class='%s'", - test, - test_file_name, - test_class, - ) - else: - hdbg.dfatal(f"Invalid mode='{mode}'") - # Package the output. - # targets is a list of tests in the format - # `helpers/test/test_env.py::Test_env1::test_get_system_signature1`. - hdbg.dassert_isinstance(targets, list) - targets = hlist.remove_duplicates(targets) - targets = sorted(targets) - failed_test_output_str = ( - f"Found {len(targets)} failed pytest '{mode}' target(s); " - "to reproduce run:\n" - ) - res = [f"pytest {t}" for t in targets] - res = "\n".join(res) - failed_test_output_str += res - # - if show_stacktrace: - # Get the stacktrace block from the pytest output. - txt = hio.from_file(file_name) - if ( - "====== FAILURES ======" in txt - and "====== slowest 3 durations ======" in txt - ): - failures_blocks = txt.split("====== FAILURES ======")[1:] - failures_blocks = [ - x.split("====== slowest 3 durations ======")[0] - for x in failures_blocks - ] - txt = "\n".join([x.rstrip("=").lstrip("=") for x in failures_blocks]) - # Get the classes and names of the failed tests, e.g. - # "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5" -> - # -> "TestSmaModel.test5". - failed_test_names = [ - test.split("::")[1] + "." + test.split("::")[2] for test in tests - ] - tracebacks = [] - for name in failed_test_names: - # Get the stacktrace for the individual test failure. - # Its start is marked with the name of the test, e.g. - # "___________________ TestSmaModel.test5 ___________________". - start_block = "__ " + name + " __" - traceback_block = txt.rsplit(start_block, maxsplit=1)[-1] - end_block_options = [ - "__ " + n + " __" for n in failed_test_names if n != name - ] - for end_block in end_block_options: - # The end of the traceback for the current failed test is the - # start of the traceback for the next failed test. - if end_block in traceback_block: - traceback_block = traceback_block.split(end_block)[0] - _, traceback_ = htraceb.parse_traceback( - traceback_block, purify_from_client=False - ) - traceback_text = ( - traceback_.strip() if traceback_ is not None else "" - ) - tracebacks.append("\n".join(["# " + name, traceback_text, ""])) - # Combine the stacktraces for all the failures. - full_traceback = "\n\n" + "\n".join(tracebacks) - failed_test_output_str += full_traceback - res += full_traceback - _LOG.info("%s", failed_test_output_str) - if create_script: - # pytest \ - # amp/oms/test/test_portfolio.py::TestDatabasePortfolio2::test1 \ - # ... - # $* - script_txt = [] - # pytest or pytest_log - script_txt.append("pytest_log \\") - script_txt.extend([f" {t} \\" for t in targets]) - script_txt.append(" $*") - script_txt = "\n".join(script_txt) - msg = "To run the tests" - hio.create_executable_script(script_name, script_txt, msg=msg) - return res - - -# ############################################################################# -# pytest_rename_test -# ############################################################################# - - -@task -def pytest_rename_test(ctx, old_test_class_name, new_test_class_name): # type: ignore - """ - Rename the test and move its golden outcome. - - E.g., to rename a test class and all the test methods: - - :param old_test_class_name: old class name - :param new_test_class_name: new class name - """ - hlitauti.report_task() - _ = ctx - root_dir = os.getcwd() - # `lib_tasks` is used from outside the Docker container in the thin dev - # environment and we want to avoid pulling in too many dependencies, unless - # necessary, so we import dynamically. - import helpers.hunit_test_utils as hunteuti - - renamer = hunteuti.UnitTestRenamer( - old_test_class_name, new_test_class_name, root_dir - ) - renamer.run() - - -# ############################################################################# -# pytest_find_ununsed_goldens -# ############################################################################# - - -@task -def pytest_find_unused_goldens( # type: ignore - ctx, - dir_name=".", - stage="prod", - version="", - out_file_name="pytest_find_unused_goldens.output.txt", -): - """ - Detect mismatches between tests and their golden outcome files. - - - When goldens are required by the tests but the corresponding files - do not exist - - When the existing golden files are not actually required by the - corresponding tests - - :param dir_name: the head dir to start the check from - """ - hlitauti.report_task() - # Remove the log file. - if os.path.exists(out_file_name): - cmd = f"rm {out_file_name}" - hlitauti.run(ctx, cmd) - # Prepare the command line. - amp_abs_path = hgit.get_amp_abs_path() - amp_path = amp_abs_path.replace( - os.path.commonpath([os.getcwd(), amp_abs_path]), "" - ) - script_path = os.path.join( - amp_path, "dev_scripts/find_unused_golden_files.py" - ).lstrip("/") - docker_cmd_opts = [f"--dir_name {dir_name}"] - docker_cmd_ = f"{script_path} " + hlitauti._to_single_line_cmd( - docker_cmd_opts - ) - # Execute command line. - base_image = "" - cmd = hlitalin._get_lint_docker_cmd(base_image, docker_cmd_, stage, version) - cmd = f"({cmd}) 2>&1 | tee -a {out_file_name}" - # Run. - hlitauti.run(ctx, cmd) - - -# ############################################################################# -# pytest_compare_logs -# ############################################################################# - - -def _purify_log_file( - file_name: str, remove_line_numbers: bool, grep_regex: str -) -> str: - txt = hio.from_file(file_name) - # Remove leading `16:34:27`. - txt = re.sub(r"^\d\d:\d\d:\d\d ", "", txt, flags=re.MULTILINE) - # Remove references like `at 0x7f43493442e0`. - txt = re.sub(r"at 0x\S{12}", "at 0x", txt, flags=re.MULTILINE) - # Remove `done (0.014 s)`. - txt = re.sub(r"(done) \(\d+\.\d+ s\)", "\\1", txt, flags=re.MULTILINE) - # Remove wall_clock_time='2022-06-17 04:36:56.062645-04:00'. - txt = re.sub(r"(wall_clock_time=)'.*'", "\\1", txt, flags=re.MULTILINE) - # Remove `real_wall_clock_time = '2022-06-17 04:33:19.946025-04:00'`. - txt = re.sub(r"(real_wall_clock_time=)'.*'", "\\1", txt, flags=re.MULTILINE) - # Remove `tqdm [00:00<00:00, 4.05it/s]`. - txt = re.sub(r"(htqdm.py.*)\[.*\]", "\\1", txt, flags=re.MULTILINE) - # Remove `Task-3`. - txt = re.sub(r"(Task-)\d+", "\\1", txt, flags=re.MULTILINE) - # Remove line number, e.g., - # `htqdm.py abstract_market_data.py get_data_for_interval:259` - if remove_line_numbers: - txt = re.sub( - r"(\.py [a-zA-Z_][a-zA-Z0-9_]*):\d+ ", - "\\1:0 ", - txt, - flags=re.MULTILINE, - ) - # - if grep_regex: - lines = [] - for line in txt.split("\n"): - if re.search(grep_regex, line): - lines.append(line) - txt = "\n".join(lines) - return txt - - -@task -def pytest_compare_logs( # type: ignore - ctx, file1, file2, remove_line_numbers=False, grep_regex="", dry_run=False -): - """ - Diff two log files removing the irrelevant parts (e.g., timestamps, object - pointers). - - :param remove_line_numbers: remove line numbers from function calls - (e.g., `abstract_market_data.py get_data_for_interval:259` - :param grep_regex: select lines based on a regex - """ - suffix = "tmp" - # - txt = _purify_log_file(file1, remove_line_numbers, grep_regex) - file1_tmp = hio.add_suffix_to_filename(file1, suffix) - hio.to_file(file1_tmp, txt) - # - txt = _purify_log_file(file2, remove_line_numbers, grep_regex) - file2_tmp = hio.add_suffix_to_filename(file2, suffix) - hio.to_file(file2_tmp, txt) - # Save the script to compare. - script_file_name = "./tmp.vimdiff_log.sh" - script_txt = f"vimdiff {file1_tmp} {file2_tmp}" - msg = "To diff run:" - hio.create_executable_script(script_file_name, script_txt, msg=msg) - hlitauti.run(ctx, script_file_name, dry_run=dry_run, pty=True) - - -# ############################################################################# -# pytest_buildmeister -# ############################################################################# - - -def _run( - cmd: str, - *, - abort_on_error: bool = False, - output_file: Optional[str] = None, - tee: bool = False, -) -> int: - rc = hsystem.system( - cmd, - abort_on_error=abort_on_error, - suppress_output=False, - log_level="echo_frame", - output_file=output_file, - tee=tee, - ) - return rc - - -def _get_invoke_cmd_line(target: str, opts: str, pytest_opts: str) -> str: - """ - - :param opts: options to pass to invoke - """ - cmd = ["invoke"] - cmd.append(target) - if opts: - cmd.append(opts) - if pytest_opts: - cmd.append("--pytest-opts " + pytest_opts) - cmd.append("2>&1") - return " ".join(cmd) - - -def _run_cmd_and_tg(cmd: str, *args: Any, **kwargs: Any) -> None: - rc = _run(cmd, *args, **kwargs) - if rc != 0: - # pytest returns 5, if there are no tests to run. - # On error, send Telegram message. - cmd = "tg.py" - _run(cmd, abort_on_error=False) - - -@task -def pytest_buildmeister_check(ctx, print_output=False): # type: ignore - """ - - :param print_output: print content of the file with the output of the - buildmeister run - """ - _ = ctx - # Concat the files generated by `invoke pytest_...` - log_file = "bm.log.txt" - if os.path.exists(log_file): - cmd = f"rm -rf {log_file}" - _run(cmd) - log_file = "bm.log.txt" - cmd = 'cat $(find . -name "bm.log*.txt" | sort) >' + log_file - _run(cmd) - # - if print_output: - print(hprint.frame("Print output")) - cmd = f"cat {log_file}" - _run(cmd) - # Report failures using `invoke pytest_repro`. - print(hprint.frame("Failures")) - # "> sudo -u sasm rm ./tmp.pytest_repro.sh; i pytest_repro -f {log_file}" - if os.path.exists("./tmp.pytest_repro.sh"): - cmd = "sudo -u sasm rm ./tmp.pytest_repro.sh" - _run(cmd) - # - cmd = f"invoke pytest_repro -f {log_file}" - _run(cmd) - # Report failures using `grep`. - print(hprint.frame("grep Failures")) - cmd = f"grep '^FAILED' {log_file} | sort" - _run(cmd) - - -@task -def pytest_buildmeister( # type: ignore - ctx, opts="", pytest_opts="", docker_clean=False, test=False -): - """ - Run the regression tests. - - - Run updating all the tests - - :param docker_clean: remove all dead Docker instances - :param opts: options to pass to the invoke (e.g., `--version 1.2.0` to test - a specific version of the Docker container) - :param pytest_opts: options to pass to pytest - :param test: just run a single quick test to verify functionality of this - script - """ - _ = ctx - if test: - # For testing. - pytest_opts = "amp/dataflow/backtest/test/test_dataflow_backtest_utils.py::Test_get_configs_from_command_line_Amp1::test1" - if docker_clean: - cmd = "dev_scripts_lime/docker_clean.sh" - _run(cmd) - # Clean and sync. - cmd = "invoke git_clean -f" - _run(cmd) - # - cmd = "invoke git_pull" - _run(cmd) - # - log_file = "bm.log*txt" - if os.path.exists(log_file): - cmd = f"rm -rf {log_file}" - _run(cmd) - # - files_to_merge = [] - # - target = "run_fast_tests" - cmd = _get_invoke_cmd_line(target, opts, pytest_opts) - log_file = f"bm.log.{target}.txt" - files_to_merge.append(log_file) - cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" - cmd = f"bash -c '{cmd}'" - _run_cmd_and_tg(cmd) - # - cmd = "invoke fix_perms" - hsystem.system(cmd) - # - target = "run_slow_tests" - cmd = _get_invoke_cmd_line(target, opts, pytest_opts) - log_file = f"bm.log.{target}.txt" - files_to_merge.append(log_file) - cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" - cmd = f"bash -c '{cmd}'" - _run_cmd_and_tg(cmd) - # - cmd = "invoke fix_perms" - _run(cmd) - # - target = "run_superslow_tests" - log_file = f"bm.log.{target}.txt" - files_to_merge.append(log_file) - cmd = _get_invoke_cmd_line(target, opts, pytest_opts) - cmd = f"({cmd} | tee {log_file};" + " exit ${PIPESTATUS[0]})" - cmd = f"bash -c '{cmd}'" - _run_cmd_and_tg(cmd) - # - pytest_buildmeister_check(ctx) - - -# ############################################################################# -# pytest_collect_only -# ############################################################################# - - -@task -def pytest_collect_only(ctx): # type: ignore - _ = ctx - cmd = 'invoke docker_cmd --cmd "pytest --collect-only 2>&1"' - hsystem.system(cmd, suppress_output=False) - - -# ############################################################################# -# pytest_add_untracked_golden_outcomes -# ############################################################################# - - -@task -def pytest_add_untracked_golden_outcomes(ctx): # type: ignore - """ - Add the golden outcomes files that are not tracked under git. - """ - _ = ctx - cmd = 'git add $(git ls-files . --exclude-standard --others | grep "output" | grep -v tmp)' - hsystem.system(cmd, suppress_output=False) - - -# ############################################################################# -# pytest_failed -# ############################################################################# - - -def _parse_failed_tests( - txt: str, only_file: bool, only_class: bool -) -> Tuple[List[str], int, int]: - """ - Parse the failed tests from the pytest output. - - :param only_file: return only the file name - :param only_class: return only the class name - :return: - - failed_tests: list of failed tests - - num_failed: number of failed tests - - num_passed: number of passed tests - """ - hdbg.dassert_lte(only_file + only_class, 1) - failed_tests = [] - num_failed = num_passed = 0 - for line in txt.split("\n"): - # Remove non printable characters. - line = re.sub(r"[^\x20-\x7E]", "", line) - # FAILED oms/broker/ccxt/test/test_ccxt_execution_quality.py::Test_compute_adj_fill_ecdfs::test3 - RuntimeError: - m = re.search(r"^(FAILED|ERROR) (\S+) -", line) - if m: - test_name = m.group(2) - _LOG.debug("line=%s ->\n\ttest_name='%s'", line, test_name) - failed_tests.append(test_name) - # helpers_root/helpers/test/test_hserver.py::Test_hserver1::test_gp1 (0.00 s) PASSED [ 36%] - m = re.search(r"(\S+) \(\S+ s\) (FAILED|ERROR)", line) - if m: - test_name = m.group(1) - _LOG.debug("line=%s ->\n\ttest_name='%s'", line, test_name) - failed_tests.append(test_name) - # ============ 11 failed, 917 passed, 113 skipped in 64.57s (0:01:04) ============ - # ======================== 4 failed, 43 passed in 40.48s ========================= - m = re.search(r"=+\s+(\d+)\s+failed,\s+(\d+)\s+passed.*", line) - if m: - num_failed = int(m.group(1)) - num_passed = int(m.group(2)) - failed_tests = sorted(list(set(failed_tests))) - # - if num_failed and num_passed and num_failed != len(failed_tests): - _LOG.warning( - "n_failed=%s len(failed_tests)=%s", num_failed, len(failed_tests) - ) - print(f"Failed tests: {num_failed}/{num_passed}") - # Filter, if needed. - if only_file or only_class: - failed_tests_tmp = [] - for test in failed_tests: - # oms/broker/ccxt/test/test_ccxt_execution_quality.py::Test_compute_adj_fill_ecdfs::test3 - m = re.match(r"(\S+)::(\S+)::\S+$", test) - hdbg.dassert(m, f"Can't parse '{test}'") - if only_file: - failed_tests_tmp.append(m.group(1)) - elif only_class: - failed_tests_tmp.append(m.group(1) + "::" + m.group(2)) - else: - raise RuntimeError("Unexpected") - failed_tests = sorted(list(set(failed_tests_tmp))) - return failed_tests, num_failed, num_passed - - -@task -def pytest_failed( - ctx, only_file=False, only_class=False, file_name="tmp.pytest_script.txt" -): # type: ignore - _ = ctx - hlitauti.report_task() - # Read file. - txt = hio.from_file(file_name) - # Extract info. - failed_tests, _, _ = _parse_failed_tests(txt, only_file, only_class) - print("\n".join(failed_tests)) - # Write the repro in a file. - repro_file_name = "tmp.pytest_failed.sh" - repro_txt = "pytest_log " + " ".join(failed_tests) + " $*" - hio.to_file(repro_file_name, repro_txt) - # - hio.create_executable_script(repro_file_name, repro_txt) - _LOG.warning("To run the failed tests run: %s", repro_file_name) - # Save to clipboard. - txt = " ".join(failed_tests) - hsystem.to_pbcopy(txt, pbcopy=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py deleted file mode 100644 index 64d60a88b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/lib_tasks_utils.py +++ /dev/null @@ -1,397 +0,0 @@ -""" -Import as: - -import helpers.lib_tasks_utils as hlitauti -""" - -import datetime -import glob -import logging -import os -import pprint -import re -import sys -from typing import Any, Dict, List, Optional, Union - -# We want to minimize the dependencies from non-standard Python packages since -# this code needs to run with minimal dependencies and without Docker. -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hversion as hversio - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Default params. -# ############################################################################# - -# This is used to inject the default params. -# TODO(gp): Using a singleton here is not elegant but simple. -_DEFAULT_PARAMS = {} - - -def set_default_params(params: Dict[str, Any]) -> None: - global _DEFAULT_PARAMS - _DEFAULT_PARAMS = params - _LOG.debug("Assigning:\n%s", pprint.pformat(params)) - - -def has_default_param(key: str) -> bool: - hdbg.dassert_isinstance(key, str) - return key in _DEFAULT_PARAMS - - -def get_default_param(key: str, *, override_value: Any = None) -> Any: - """ - Return the value from the default parameters dictionary, optionally - overriding it. - """ - hdbg.dassert_isinstance(key, str) - value = None - if has_default_param(key): - value = _DEFAULT_PARAMS[key] - if override_value: - _LOG.info("Overriding value %s with %s", value, override_value) - value = override_value - hdbg.dassert_is_not( - value, None, "key='%s' not defined from %s", key, _DEFAULT_PARAMS - ) - return value - - -def reset_default_params() -> None: - params: Dict[str, Any] = {} - set_default_params(params) - - -# ############################################################################# -# Utils. -# ############################################################################# - - -def parse_command_line() -> None: - # Since it's not easy to add global command line options to invoke, we - # piggy back the option that already exists. - # If one uses the debug option for `invoke` we turn off the code - # debugging. - # TODO(gp): Check http://docs.pyinvoke.org/en/1.0/concepts/library.html# - # modifying-core-parser-arguments - if ("-d" in sys.argv) or ("--debug" in sys.argv): - verbosity = logging.DEBUG - else: - verbosity = logging.INFO - # Suppress command line logging if only_print_files is requested. - report_command_line = "--only-print-files" not in sys.argv - hdbg.init_logger( - verbosity=verbosity, report_command_line=report_command_line - ) - - -# NOTE: We need to use a `# type: ignore` for all the @task functions because -# pyinvoke infers the argument type from the code and mypy annotations confuse -# it (see https://github.com/pyinvoke/invoke/issues/357). - -# In the following, when using `lru_cache`, we use functions from `hsyste` -# instead of `ctx.run()` since otherwise `lru_cache` would cache `ctx`. - -# We prefer not to cache functions running `git` to avoid stale values if we -# call git (e.g., if we cache Git hash and then we do a `git pull`). - -# pyinvoke `ctx.run()` is useful for unit testing, since it allows to: -# - mock the result of a system call -# - register the issued command line (to create the expected outcome of a test) -# On the other side `system_interaction.py` contains many utilities that make -# it easy to interact with the system. -# Once AmpPart1347 is implemented we can replace all the `ctx.run()` with calls -# to `system_interaction.py`. - - -_WAS_FIRST_CALL_DONE = False - - -# TODO(gp): This can be part of the @task -def report_task(txt: str = "", container_dir_name: str = ".") -> None: - """ - Print the task description. - - Each task should call this function at the beginning to print the - task name. - """ - # On the first invocation check the version of the container. - global _WAS_FIRST_CALL_DONE - if not _WAS_FIRST_CALL_DONE: - _WAS_FIRST_CALL_DONE = True - hversio.check_version(container_dir_name) - # Print the name of the function. - msg = hprint.func_signature_to_str( - skip_vars="ctx", assert_on_skip_vars_error=False, frame_level=3 - ) - print(hprint.color_highlight(msg, color="purple")) - - -# TODO(gp): Move this to helpers.system_interaction and allow to add the switch -# globally. -def _to_single_line_cmd(cmd: Union[str, List[str]]) -> str: - """ - Convert a multiline command (as a string or list of strings) into a single - line. - - E.g., convert - ``` - IMAGE=.../amp:dev \ - docker-compose \ - --file devops/compose/tmp.docker-compose.yml \ - --file devops/compose/tmp.docker-compose_as_submodule.yml \ - --env-file devops/env/default.env - ``` - into - ``` - IMAGE=.../amp:dev docker-compose --file ... - ``` - """ - if isinstance(cmd, list): - cmd = " ".join(cmd) - hdbg.dassert_isinstance(cmd, str) - cmd = cmd.rstrip().lstrip() - # Remove `\` at the end of the line. - cmd = re.sub(r" \\\s*$", " ", cmd, flags=re.MULTILINE) - # Use a single space between words in the command. - # TODO(gp): This is a bit dangerous if there are multiple spaces in a string - # that for some reason are meaningful. - cmd = " ".join(cmd.split()) - return cmd - - -def to_multi_line_cmd(docker_cmd_: List[str]) -> str: - r""" - Convert a command encoded as a list of strings into a single command - separated by `\`. - - E.g., convert - ``` - ['IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev', - '\n docker-compose', - '\n --file amp/devops/compose/tmp.docker-compose.yml', - '\n --file amp/devops/compose/tmp.docker-compose_as_submodule.yml', - '\n --env-file devops/env/default.env'] - ``` - into - ``` - IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ - docker-compose \ - --file devops/compose/tmp.docker-compose.yml \ - --file devops/compose/tmp.docker-compose_as_submodule.yml \ - --env-file devops/env/default.env - ``` - """ - # Expand all strings into single lines. - _LOG.debug("docker_cmd=%s", docker_cmd_) - docker_cmd_tmp = [] - for dc in docker_cmd_: - # Add a `\` at the end of each string. - hdbg.dassert(not dc.endswith("\\"), "dc='%s'", dc) - dc += " \\" - docker_cmd_tmp.extend(dc.split("\n")) - docker_cmd_ = docker_cmd_tmp - # Remove empty lines. - docker_cmd_ = [cmd for cmd in docker_cmd_ if cmd.rstrip().lstrip() != ""] - # Package the command. - result = "\n".join(docker_cmd_) - # Remove a `\` at the end, since it is not needed. - result = result.rstrip("\\") - _LOG.debug("docker_cmd=%s", result) - return result - - -# TODO(gp): Pass through command line using a global switch or an env var. -use_one_line_cmd = False - - -def run( - ctx: Any, - cmd: str, - *args: Any, - dry_run: bool = False, - use_system: bool = False, - print_cmd: bool = False, - **ctx_run_kwargs: Any, -) -> Optional[int]: - cmd = hprint.dedent(cmd) - _LOG.debug(hprint.to_str("cmd dry_run")) - if use_one_line_cmd: - cmd = _to_single_line_cmd(cmd) - _LOG.debug("cmd=%s", cmd) - if dry_run: - print(f"Dry-run: > {cmd}") - _LOG.warning("Skipping execution of '%s'", cmd) - res = None - else: - if print_cmd: - print(f"> {cmd}") - if use_system: - # TODO(gp): Consider using only `hsystem.system()` since it's more - # reliable. - res = hsystem.system(cmd, suppress_output=False) - else: - result = ctx.run(cmd, *args, **ctx_run_kwargs) - res = result.return_code - return res - - -# TODO(ai_gp): Use the one in ./helpers/hsystem.py -def _to_pbcopy(txt: str, pbcopy: bool) -> None: - """ - Save the content of txt in the system clipboard. - """ - txt = txt.rstrip("\n") - if not pbcopy: - print(txt) - return - if not txt: - print("Nothing to copy") - return - if hserver.is_host_mac(): - # -n = no new line - cmd = f"echo -n '{txt}' | pbcopy" - hsystem.system(cmd) - print(f"\n# Copied to system clipboard:\n{txt}") - else: - _LOG.warning("pbcopy works only on macOS") - print(txt) - - -def _filter_existing_paths(paths_from_user: List[str]) -> List[str]: - """ - Filter out the paths to non-existent files. - - :param paths_from_user: paths passed by user - :return: existing paths - """ - paths = [] - for user_path in paths_from_user: - if user_path.endswith("/*"): - # Get the files according to the "*" pattern. - dir_files = glob.glob(user_path) - if dir_files: - # Check whether the pattern matches files. - paths.extend(dir_files) - else: - _LOG.error( - ( - "'%s' pattern doesn't match any files: " - "the directory is empty or path does not exist" - ), - user_path, - ) - elif os.path.exists(user_path): - paths.append(user_path) - else: - _LOG.error("'%s' does not exist", user_path) - return paths - - -# TODO(gp): We should factor out the meaning of the params in a string and add it -# to all the tasks' help. -def _get_files_to_process( - modified: bool, - branch: bool, - last_commit: bool, - # TODO(gp): Pass abs_dir, instead of `all_` and remove the calls from the - # outer clients. - all_: bool, - files_from_user: str, - mutually_exclusive: bool, - remove_dirs: bool, -) -> List[str]: - """ - Get a list of files to process. - - The files are selected based on the switches: - - `branch`: changed in the branch - - `modified`: changed in the client (both staged and modified) - - `last_commit`: part of the previous commit - - `all`: all the files in the repo - - `files_from_user`: passed by the user - - :param modified: return files modified in the client (i.e., changed with - respect to HEAD) - :param branch: return files modified with respect to the branch point - :param last_commit: return files part of the previous commit - :param all: return all repo files - :param files_from_user: return files passed to this function - :param mutually_exclusive: ensure that all options are mutually exclusive - :param remove_dirs: whether directories should be processed - :return: paths to process - """ - _LOG.debug( - hprint.to_str( - "modified branch last_commit all_ files_from_user " - "mutually_exclusive remove_dirs" - ) - ) - if mutually_exclusive: - # All the options are mutually exclusive. - hdbg.dassert_eq( - int(modified) - + int(branch) - + int(last_commit) - + int(all_) - + int(len(files_from_user) > 0), - 1, - msg="Specify only one among --modified, --branch, --last-commit, " - "--all_files, and --files", - ) - else: - # We filter the files passed from the user through other the options, - # so only the filtering options need to be mutually exclusive. - hdbg.dassert_eq( - int(modified) + int(branch) + int(last_commit) + int(all_), - 1, - msg="Specify only one among --modified, --branch, --last-commit", - ) - dir_name = "." - if modified: - files = hgit.get_modified_files(dir_name) - elif branch: - files = hgit.get_modified_files_in_branch("master", dir_name) - elif last_commit: - files = hgit.get_previous_committed_files(dir_name) - elif all_: - pattern = "*" - only_files = True - use_relative_paths = True - files = hio.listdir(dir_name, pattern, only_files, use_relative_paths) - if files_from_user: - # If files were passed, filter out non-existent paths. - files = _filter_existing_paths(files_from_user.split()) - # Convert into a list. - hdbg.dassert_isinstance(files, list) - files_to_process = [f for f in files if f != ""] - # We need to remove `amp` to avoid copying the entire tree. - files_to_process = [f for f in files_to_process if f != "amp"] - _LOG.debug("files_to_process='%s'", str(files_to_process)) - # Remove dirs, if needed. - if remove_dirs: - files_to_process = hsystem.remove_dirs(files_to_process) - _LOG.debug("files_to_process='%s'", str(files_to_process)) - # Ensure that there are files to process. - if not files_to_process: - _LOG.warning("No files were selected") - return files_to_process - - -# Copied from helpers.datetime_ to avoid dependency from pandas. - - -def get_ET_timestamp() -> str: - # The timezone depends on how the shell is configured. - timestamp = datetime.datetime.now() - return timestamp.strftime("%Y%m%d_%H%M%S") - - -# End copy. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py deleted file mode 100644 index 631a68e5f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_main.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python - -""" -Import as: - -import helpers.logging_testing.logging_main as hlteloma -""" - -import logging -import sys -from typing import Union - -import helpers.hlogging as hloggin - -_LOG = logging.getLogger(__name__) -print(f"_LOG={_LOG}") - - -def install_basic_formatter() -> None: - # The output looks like - # ``` - # DEBUG:__main__: message - # ``` - logging.basicConfig() - - -def _install_formatter( - formatter: Union[hloggin.CustomFormatter, logging.Formatter], -) -> None: - root_logger_ = logging.getLogger() - ch = logging.StreamHandler(sys.stdout) - ch.setFormatter(formatter) - root_logger_.addHandler(ch) - - -def install_current_formatter() -> None: - date_fmt = "%m-%d_%H:%M" - log_format = ( - # 04-28_08:08 INFO : - "%(asctime)-5s %(levelname)-5s" - ) - log_format += ( - # lib_tasks _delete_branches - " %(module)-20s: %(funcName)-30s:" - # 142: ... - " %(lineno)-4d:" - " %(message)s" - ) - formatter = logging.Formatter(log_format, datefmt=date_fmt) - # - _install_formatter(formatter) - - -def install_custom_formatter() -> None: - formatter = hloggin.CustomFormatter() - _install_formatter(formatter) - - -if __name__ == "__main__": - # - print("\n# Installing formatter") - # install_basic_formatter() - # install_current_formatter() - install_custom_formatter() - # - print("\n# Loggers before setLevel") - root_logger = logging.getLogger() - print(f"root_logger={root_logger}") - # Show the loggers that have registered. - print(f"loggers={hloggin.get_all_loggers()}") - # - verbosity = logging.DEBUG - # verbosity = logging.ERROR - print(f"\n# Loggers after setLevel {verbosity}") - root_logger.setLevel(verbosity) - # Setting the verbosity for the root logger sets the verbosity for all the - # children ones. - print(f"root_logger={root_logger}") - print(f"loggers={hloggin.get_all_loggers()}") - # - hloggin.test_logger() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py deleted file mode 100644 index ad88346fe..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/logging_testing/logging_module.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -Import as: - -import helpers.logging_testing.logging_module as hltelomo -""" - -import logging - -_LOG = logging.getLogger(__name__) -print(f"_LOG={_LOG}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py deleted file mode 100644 index 5b0445a31..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/conftest.py +++ /dev/null @@ -1,17 +0,0 @@ -import pathlib -from typing import Any, Optional - - -def pytest_ignore_collect( # type: ignore - collection_path: pathlib.Path, path: Any, config: Any -) -> Optional[bool]: - """ - Skip all tests in this directory. - - :param collection_path: path to analyze - :param path: path to analyze (deprecated) - :param config: pytest config object - :return: True if the path should be ignored - """ - # Ignore this directory and all its subdirectories. - return True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb deleted file mode 100644 index 7df18640d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.ipynb +++ /dev/null @@ -1,638 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# Test Cache in Jupyter Notebook" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T16:23:59.696680Z", - "start_time": "2021-08-16T16:23:58.792511Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mWARNING\u001b[0m: Disabling annoying warnings\n", - "\u001b[0m\u001b[36mINFO\u001b[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-89e1d81f-7cff-47ee-9790-af936835f517.json'\n", - "\u001b[33mWARNING\u001b[0m: Running in Jupyter\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import logging\n", - "\n", - "import joblib\n", - "\n", - "import helpers.hcache as hcache\n", - "import helpers.hdbg as hdbg\n", - "import helpers.hs3 as hs3\n", - "\n", - "hnotebook.config_notebook()\n", - "\n", - "# hdbg.init_logger(verbosity=logging.DEBUG)\n", - "hdbg.init_logger(verbosity=logging.INFO)\n", - "# hdbg.test_logger()\n", - "_LOG = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2, - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# Define computation function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-15T12:07:59.739169Z", - "start_time": "2021-08-15T12:07:59.714831Z" - } - }, - "outputs": [], - "source": [ - "def func(a, b):\n", - " # hello\n", - " # assert 0\n", - " out = a * b\n", - " print(f\"Multiplication: {a} * {b} = {out}\")\n", - " return out\n", - "\n", - "\n", - "inputs = (1, 2)\n", - "exp_output = 2\n", - "\n", - "func(*inputs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:32:30.476809Z", - "start_time": "2021-08-14T23:32:30.202040Z" - } - }, - "outputs": [], - "source": [ - "!ls hello/joblib/__main__*/f/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:42:14.671491Z", - "start_time": "2021-08-14T23:42:13.356163Z" - } - }, - "outputs": [], - "source": [ - "!pip install https://github.com/aabadie/joblib-s3.git" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:46:59.749548Z", - "start_time": "2021-08-14T23:46:54.455947Z" - } - }, - "outputs": [], - "source": [ - "#!git clone git://github.com/aabadie/joblib-s3.git\n", - "# !(cd joblib-s3 && pip install -r requirements.txt .)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:56:02.954013Z", - "start_time": "2021-08-14T23:56:02.793451Z" - }, - "scrolled": false - }, - "outputs": [], - "source": [ - "# import joblibs3\n", - "\n", - "# joblibs3.register_s3fs_store_backend()\n", - "\n", - "# # dict(compress=False, bucket=None, anon=False,\n", - "# #key=None, secret=None, token=None, use_ssl=True)\n", - "# dict2 = {\n", - "# \"bucket\": \"alphamatic-data\",\n", - "# \"key\": dict_[\"aws_access_key_id\"],\n", - "# \"secret\": dict_[\"aws_secret_access_key\"],\n", - "# }\n", - "# mem = joblib.Memory('joblib_cache', backend='s3', verbose=100, compress=True,\n", - "# backend_options=dict2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T13:24:11.630748Z", - "start_time": "2021-08-16T13:24:10.983061Z" - } - }, - "outputs": [], - "source": [ - "# hjoblib.register_s3fs_store_backend()\n", - "\n", - "s3fs = hs3.get_s3fs(\"am\")\n", - "\n", - "dict2 = {\n", - " \"bucket\": \"alphamatic-data\",\n", - " # \"key\": dict_[\"aws_access_key_id\"],\n", - " # \"secret\": dict_[\"aws_secret_access_key\"],\n", - " \"s3fs\": s3fs,\n", - "}\n", - "\n", - "mem = joblib.Memory(\n", - " \"joblib_cache\",\n", - " backend=\"s3\",\n", - " verbose=100,\n", - " compress=True,\n", - " backend_options=dict2,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-17T15:51:32.654896Z", - "start_time": "2021-08-17T15:51:32.258447Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "ename": "PermissionError", - "evalue": "Access Denied", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter)\u001b[0m\n\u001b[1;32m 531\u001b[0m \u001b[0mdircache\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m \u001b[0;32masync\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mit\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 533\u001b[0m \u001b[0mdircache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CommonPrefixes\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/aiobotocore/paginate.py\u001b[0m in \u001b[0;36m__anext__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcurrent_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0mparsed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_extract_parsed_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/aiobotocore/client.py\u001b[0m in \u001b[0;36m_make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0merror_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 154\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merror_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_response\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moperation_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 155\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mClientError\u001b[0m: An error occurred (AccessDenied) when calling the ListObjectsV2 operation: Access Denied", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0ms3fs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m#mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0mself\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msync\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36msync\u001b[0;34m(loop, func, timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBaseException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 54\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/fsspec/asyn.py\u001b[0m in \u001b[0;36m_runner\u001b[0;34m(event, coro, result, timeout)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mcoro\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0masyncio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoro\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mcoro\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_ls\u001b[0;34m(self, path, detail, refresh)\u001b[0m\n\u001b[1;32m 719\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsbuckets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 721\u001b[0;31m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 722\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mawait\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lsdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrefresh\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrefresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/s3fs/core.py\u001b[0m in \u001b[0;36m_lsdir\u001b[0;34m(self, path, refresh, max_items, delimiter)\u001b[0m\n\u001b[1;32m 553\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"name\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Key\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 554\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mClientError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 555\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mtranslate_boto_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 556\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdelimiter\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mPermissionError\u001b[0m: Access Denied" - ] - } - ], - "source": [ - "# hjoblib.register_s3fs_store_backend()\n", - "\n", - "s3fs = hs3.get_s3fs(\"am\")\n", - "dict_ = {}\n", - "\n", - "dict2 = {\n", - " \"bucket\": \"alphamatic-data\",\n", - " # \"key\": dict_[\"aws_access_key_id\"],\n", - " # \"secret\": dict_[\"aws_secret_access_key\"],\n", - " \"s3fs\": s3fs,\n", - "}\n", - "path = \"/tmp/cache.unit_test/root.98e1cf5b88c3.app.TestCachingOnS3.test_with_caching1\"\n", - "\n", - "\n", - "s3fs.ls(path)\n", - "\n", - "# mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:54:50.021139Z", - "start_time": "2021-08-14T23:54:50.017180Z" - } - }, - "outputs": [], - "source": [ - "print(dict_)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:51:25.440705Z", - "start_time": "2021-08-14T23:51:25.419214Z" - } - }, - "outputs": [], - "source": [ - "# dict_[\"bucket\"] = \"alphamatic-data/tmp\"\n", - "\n", - "print(dict_)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def dec(func=None, val=5):\n", - " if func is not None:\n", - " return" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "68549a47", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:53:08.985727Z", - "start_time": "2021-08-14T23:53:08.795065Z" - } - }, - "outputs": [], - "source": [ - "dict_ = hs3.get_aws_credentials(\"am\")\n", - "print(dict_)\n", - "# s3fs = hs3.get_s3fs(\"am\")\n", - "# s3fs.ls(\"s3://alphamatic-data/tmp\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T13:25:34.841885Z", - "start_time": "2021-08-16T13:25:34.820510Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "s3fs.clear_instance_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T16:23:17.621301Z", - "start_time": "2021-08-16T16:23:16.722753Z" - } - }, - "outputs": [], - "source": [ - "# import joblib\n", - "\n", - "# cachedir = \"./hello\"\n", - "# memory = joblib.Memory(cachedir, verbose=0)\n", - "\n", - "\n", - "@mem.cache()\n", - "def f(x):\n", - " # hello\n", - " print(f\"Running f({x})\")\n", - " return x\n", - "\n", - "\n", - "f(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-16T16:26:31.661915Z", - "start_time": "2021-08-16T16:26:31.640938Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'hello'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hcache.cache(set_verbose_mode=True)\n", - "\n", - "\n", - "def hello():\n", - " return \"hello\"\n", - "\n", - "\n", - "hello()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Memory cache" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T22:19:51.526004Z", - "start_time": "2021-08-14T22:19:51.259763Z" - } - }, - "outputs": [], - "source": [ - "!ls /app/tmp.cache.disk/joblib/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T22:19:25.253342Z", - "start_time": "2021-08-14T22:19:24.986513Z" - } - }, - "outputs": [], - "source": [ - "!ls /mnt/tmpfs/tmp.cache.mem/joblib/lib" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-08-14T23:21:32.636049Z", - "start_time": "2021-08-14T23:21:32.479710Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "memory_cached_func = hcache._Cached(\n", - " func, use_mem_cache=True, use_disk_cache=False\n", - ")\n", - "\n", - "print(memory_cached_func.get_function_cache_info())\n", - "\n", - "# cache_type = None\n", - "# memory_cached_func.clear_function_cache(cache_type)\n", - "\n", - "hdbg.dassert_eq(memory_cached_func(*inputs), exp_output)\n", - "hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), \"no_cache\")\n", - "\n", - "hdbg.dassert_eq(memory_cached_func(*inputs), exp_output)\n", - "hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), \"mem\")\n", - "\n", - "print(\"memory caching checks passed\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:56:58.620662Z", - "start_time": "2020-09-03T19:56:58.610337Z" - } - }, - "outputs": [], - "source": [ - "def computation_function(a, b):\n", - " # hello\n", - " # assert 0\n", - " out = a * b\n", - " print(f\"Multiplication: {a} * {b} = {out}\")\n", - " return out\n", - "\n", - "\n", - "inputs = (1, 2)\n", - "exp_output = 2\n", - "\n", - "# hdbg.dassert_eq(memory_cached_computation(*inputs), exp_output)\n", - "# hdbg.dassert_eq(memory_cached_computation.get_last_cache_accessed(), \"mem\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Disk cache" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:45:20.999548Z", - "start_time": "2020-09-03T19:45:20.987298Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "disk_cached_computation = hcache._Cached(\n", - " computation_function, use_mem_cache=False, use_disk_cache=True\n", - ")\n", - "\n", - "disk_cached_computation.clear_function_cache()\n", - "\n", - "hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), \"no_cache\")\n", - "\n", - "hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), \"disk\")\n", - "\n", - "print(\"disk caching checks passed\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Full cache" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:34:54.851944Z", - "start_time": "2020-09-03T19:34:54.839379Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "fully_cached_computation = hcache._Cached(\n", - " computation_function, use_mem_cache=True, use_disk_cache=True\n", - ")\n", - "\n", - "fully_cached_computation.clear_function_cache()\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"no_cache\")\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", - "\n", - "print(\"Clear mem cache\")\n", - "fully_cached_computation.clear_function_cache()\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"disk\")\n", - "\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")\n", - "\n", - "print(\"full caching checks passed\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:34:54.856369Z", - "start_time": "2020-09-03T19:34:54.853563Z" - } - }, - "outputs": [], - "source": [ - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-03T19:34:54.995926Z", - "start_time": "2020-09-03T19:34:54.859279Z" - } - }, - "outputs": [], - "source": [ - "# This should fail all the times, because we clear the memory cache.\n", - "fully_cached_computation.clear_function_cache()\n", - "hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output)\n", - "hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), \"mem\")" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py deleted file mode 100644 index 3469f42b7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache.tutorial.py +++ /dev/null @@ -1,274 +0,0 @@ -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] pycharm={"name": "#%% md\n"} -# # Test Cache in Jupyter Notebook - -# %% -# %load_ext autoreload -# %autoreload 2 - -import logging - -import joblib - -import helpers.hcache as hcache -import helpers.hdbg as hdbg -import helpers.hs3 as hs3 - -hnotebook.config_notebook() - -# hdbg.init_logger(verbosity=logging.DEBUG) -hdbg.init_logger(verbosity=logging.INFO) -# hdbg.test_logger() -_LOG = logging.getLogger(__name__) - - -# %% [markdown] pycharm={"name": "#%% md\n"} -# # Define computation function - - -# %% -def func(a, b): - # hello - # assert 0 - out = a * b - print(f"Multiplication: {a} * {b} = {out}") - return out - - -inputs = (1, 2) -exp_output = 2 - -func(*inputs) - -# %% -# !ls hello/joblib/__main__*/f/ - -# %% -# !pip install https://github.com/aabadie/joblib-s3.git - -# %% -# #!git clone git://github.com/aabadie/joblib-s3.git -# !(cd joblib-s3 && pip install -r requirements.txt .) - -# %% -# import joblibs3 - -# joblibs3.register_s3fs_store_backend() - -# # dict(compress=False, bucket=None, anon=False, -# #key=None, secret=None, token=None, use_ssl=True) -# dict2 = { -# "bucket": "alphamatic-data", -# "key": dict_["aws_access_key_id"], -# "secret": dict_["aws_secret_access_key"], -# } -# mem = joblib.Memory('joblib_cache', backend='s3', verbose=100, compress=True, -# backend_options=dict2) - -# %% -# hjoblib.register_s3fs_store_backend() - -s3fs = hs3.get_s3fs("am") - -dict2 = { - "bucket": "alphamatic-data", - # "key": dict_["aws_access_key_id"], - # "secret": dict_["aws_secret_access_key"], - "s3fs": s3fs, -} - -mem = joblib.Memory( - "joblib_cache", - backend="s3", - verbose=100, - compress=True, - backend_options=dict2, -) - -# %% -# hjoblib.register_s3fs_store_backend() - -s3fs = hs3.get_s3fs("am") -dict_ = {} - -dict2 = { - "bucket": "alphamatic-data", - # "key": dict_["aws_access_key_id"], - # "secret": dict_["aws_secret_access_key"], - "s3fs": s3fs, -} -path = "/tmp/cache.unit_test/root.98e1cf5b88c3.app.TestCachingOnS3.test_with_caching1" - - -s3fs.ls(path) - -# mem = joblib.Memory(path, backend='s3', verbose=100, compress=True, backend_options=dict2) - - -# %% -print(dict_) - -# %% -# dict_["bucket"] = "alphamatic-data/tmp" - -print(dict_) - - -# %% -def dec(func=None, val=5): - if func is not None: - return - - -# %% - -# %% -dict_ = hs3.get_aws_credentials("am") -print(dict_) -# s3fs = hs3.get_s3fs("am") -# s3fs.ls("s3://alphamatic-data/tmp") - -# %% -s3fs.clear_instance_cache() - - -# %% -# import joblib - -# cachedir = "./hello" -# memory = joblib.Memory(cachedir, verbose=0) - - -@mem.cache() -def f(x): - # hello - print(f"Running f({x})") - return x - - -f(1) - -# %% -hcache.cache(set_verbose_mode=True) - - -def hello(): - return "hello" - - -hello() - -# %% [markdown] pycharm={"name": "#%% md\n"} -# ## Memory cache - -# %% -# !ls /app/tmp.cache.disk/joblib/ - -# %% -# !ls /mnt/tmpfs/tmp.cache.mem/joblib/lib - -# %% pycharm={"name": "#%%\n"} -memory_cached_func = hcache._Cached( - func, use_mem_cache=True, use_disk_cache=False -) - -print(memory_cached_func.get_function_cache_info()) - -# cache_type = None -# memory_cached_func.clear_function_cache(cache_type) - -hdbg.dassert_eq(memory_cached_func(*inputs), exp_output) -hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), "no_cache") - -hdbg.dassert_eq(memory_cached_func(*inputs), exp_output) -hdbg.dassert_eq(memory_cached_func.get_last_cache_accessed(), "mem") - -print("memory caching checks passed") - - -# %% -def computation_function(a, b): - # hello - # assert 0 - out = a * b - print(f"Multiplication: {a} * {b} = {out}") - return out - - -inputs = (1, 2) -exp_output = 2 - -# hdbg.dassert_eq(memory_cached_computation(*inputs), exp_output) -# hdbg.dassert_eq(memory_cached_computation.get_last_cache_accessed(), "mem") - -# %% [markdown] -# ## Disk cache - -# %% pycharm={"name": "#%%\n"} -disk_cached_computation = hcache._Cached( - computation_function, use_mem_cache=False, use_disk_cache=True -) - -disk_cached_computation.clear_function_cache() - -hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), "no_cache") - -hdbg.dassert_eq(disk_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(disk_cached_computation.get_last_cache_accessed(), "disk") - -print("disk caching checks passed") - -# %% [markdown] -# ## Full cache - -# %% pycharm={"name": "#%%\n"} -fully_cached_computation = hcache._Cached( - computation_function, use_mem_cache=True, use_disk_cache=True -) - -fully_cached_computation.clear_function_cache() - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "no_cache") - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") - -print("Clear mem cache") -fully_cached_computation.clear_function_cache() - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "disk") - -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") - -print("full caching checks passed") - -# %% -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") - -# %% -# This should fail all the times, because we clear the memory cache. -fully_cached_computation.clear_function_cache() -hdbg.dassert_eq(fully_cached_computation(*inputs), exp_output) -hdbg.dassert_eq(fully_cached_computation.get_last_cache_accessed(), "mem") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb deleted file mode 100644 index 3050efc31..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.ipynb +++ /dev/null @@ -1,858 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CONTENTS:\n", - "- [hcache_simple Tutorial](#hcache_simple-tutorial)\n", - " - [Imports](#imports)\n", - " - [1. Basic Caching](#1.-basic-caching)\n", - " - [2. Cache Performance Monitoring](#2.-cache-performance-monitoring)\n", - " - [3. Cache Management](#3.-cache-management)\n", - " - [4. Dynamic Runtime Parameters](#4.-dynamic-runtime-parameters)\n", - " - [5. Configurable Cache Locations](#5.-configurable-cache-locations)\n", - " - [6. Per-Function Configuration](#6.-per-function-configuration)\n", - " - [7. Excluding Keys from Cache](#7.-excluding-keys-from-cache)\n", - " - [8. Runtime Property Modification](#8.-runtime-property-modification)\n", - " - [9. S3 Integration](#9.-s3-integration)\n", - " - [10. Binary Data with Pickle](#10.-binary-data-with-pickle)\n", - " - [Summary](#summary)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "# hcache_simple Tutorial\n", - "\n", - "This tutorial demonstrates the `hcache_simple` module - a lightweight caching system with memory, disk, and S3 storage.\n", - "\n", - "**Key Features:**\n", - "- Simple decorator-based caching\n", - "- Memory and disk persistence (JSON or pickle)\n", - "- S3 sync for team cache sharing\n", - "- Per-function configuration\n", - "- Performance monitoring\n", - "- Auto-pull from S3 on first cache miss" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0mWARNING: Running in Jupyter\n", - "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-7db4a0c4-f8f9-4dd2-9ed5-4a4fdb7cefef.json'\n" - ] - } - ], - "source": [ - "import logging\n", - "import os\n", - "import tempfile\n", - "import time\n", - "\n", - "import pandas as pd\n", - "\n", - "import helpers.hcache_simple as hcacsimp\n", - "import helpers.hdbg as hdbg\n", - "\n", - "hdbg.init_logger(verbosity=logging.INFO)\n", - "_LOG = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "\n", - "## 1. Basic Caching\n", - "\n", - "The `@simple_cache` decorator caches function results automatically.\n", - "\n", - "- First call: Computes result and stores in cache\n", - "- Subsequent calls: Returns cached result instantly\n", - "- Cache is stored in memory and on disk (JSON format)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "@hcacsimp.simple_cache(cache_type=\"json\")\n", - "def expensive_computation(x: int) -> int:\n", - " \"\"\"\n", - " Simulate expensive computation.\n", - " \"\"\"\n", - " _LOG.info(\"Computing result for x=%s (this takes 2 seconds)...\", x)\n", - " time.sleep(2)\n", - " return x**2" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "First call with x=5:\n", - "WARNING S3 bucket not configured - use set_s3_bucket()\n", - "INFO Computing result for x=5 (this takes 2 seconds)...\n", - "Result: 25\n", - "Time taken: 2.006 seconds\n", - "\n", - "Second call with x=5 (from cache):\n", - "WARNING Cache hit for expensive_computation\n", - "Result: 25\n", - "Time taken: 0.001207 seconds (much faster!)\n" - ] - } - ], - "source": [ - "# First call - computes and caches.\n", - "print(\"First call with x=5:\")\n", - "start_time = time.time()\n", - "result = expensive_computation(5)\n", - "elapsed_time = time.time() - start_time\n", - "print(f\"Result: {result}\")\n", - "print(f\"Time taken: {elapsed_time:.3f} seconds\\n\")\n", - "# Second call - returns from cache instantly.\n", - "print(\"Second call with x=5 (from cache):\")\n", - "start_time = time.time()\n", - "result = expensive_computation(5)\n", - "elapsed_time = time.time() - start_time\n", - "print(f\"Result: {result}\")\n", - "print(f\"Time taken: {elapsed_time:.6f} seconds (much faster!)\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## 2. Cache Performance Monitoring\n", - "\n", - "Track cache efficiency with performance metrics:\n", - "- **hits**: Number of times result was retrieved from cache\n", - "- **misses**: Number of times function had to compute result\n", - "- **tot**: Total number of function calls\n", - "- **hit_rate**: Percentage of cache hits" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO Computing result for x=10 (this takes 2 seconds)...\n", - "WARNING Cache hit for expensive_computation\n", - "WARNING Cache hit for expensive_computation\n", - "INFO Computing result for x=20 (this takes 2 seconds)...\n", - "\n", - "Performance Statistics:\n", - "expensive_computation: hits=2 misses=2 tot=4 hit_rate=0.50\n" - ] - } - ], - "source": [ - "# Enable performance monitoring.\n", - "hcacsimp.enable_cache_perf(\"expensive_computation\")\n", - "# Make some calls.\n", - "expensive_computation(10) # Miss - first call with x=10.\n", - "expensive_computation(10) # Hit - cached result.\n", - "expensive_computation(10) # Hit - cached result.\n", - "expensive_computation(20) # Miss - first call with x=20.\n", - "# Check performance stats.\n", - "print(\"\\nPerformance Statistics:\")\n", - "print(hcacsimp.get_cache_perf_stats(\"expensive_computation\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## 3. Cache Management\n", - "\n", - "Control cache lifecycle with these operations:\n", - "- `flush_cache_to_disk()`: Write memory cache to disk\n", - "- `reset_mem_cache()`: Clear memory cache (keeps disk cache)\n", - "- `force_cache_from_disk()`: Reload cache from disk\n", - "- `cache_stats_to_str()`: View cache statistics" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cache statistics:\n", - " memory disk\n", - "expensive_computation 3 3\n", - "\n", - "Flushed to disk\n", - "Memory cache cleared\n", - " memory disk\n", - "expensive_computation - 3\n", - "\n", - "Reloaded from disk\n", - " memory disk\n", - "expensive_computation 3 3\n" - ] - } - ], - "source": [ - "# View current cache state.\n", - "print(\"Cache statistics:\")\n", - "print(hcacsimp.cache_stats_to_str(\"expensive_computation\"))\n", - "# Flush to disk (ensure persistence).\n", - "hcacsimp.flush_cache_to_disk(\"expensive_computation\")\n", - "print(\"\\nFlushed to disk\")\n", - "# Clear memory cache.\n", - "hcacsimp.reset_mem_cache(\"expensive_computation\")\n", - "print(\"Memory cache cleared\")\n", - "print(hcacsimp.cache_stats_to_str(\"expensive_computation\"))\n", - "# Reload from disk.\n", - "hcacsimp.force_cache_from_disk(\"expensive_computation\")\n", - "print(\"\\nReloaded from disk\")\n", - "print(hcacsimp.cache_stats_to_str(\"expensive_computation\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "\n", - "## 4. Dynamic Runtime Parameters\n", - "\n", - "Control caching behavior per function call:\n", - "- `force_refresh=True`: Bypass cache and recompute\n", - "- `abort_on_cache_miss=True`: Raise error if not in cache\n", - "- `report_on_cache_miss=True`: Log warning on cache miss" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "@hcacsimp.simple_cache(cache_type=\"json\")\n", - "def data_processor(data: str) -> str:\n", - " \"\"\"\n", - " Process data string.\n", - " \"\"\"\n", - " _LOG.info(\"Processing: %s\", data)\n", - " time.sleep(1)\n", - " return data.upper()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING S3 bucket not configured - use set_s3_bucket()\n", - "INFO Processing: hello\n", - "First call: HELLO (time: 1.007s)\n", - "WARNING Cache hit for data_processor\n", - "Cached call: HELLO (time: 0.001437s - from cache!)\n", - "INFO Processing: hello\n", - "Force refresh: HELLO\n", - "With report: _cache_miss_\n" - ] - } - ], - "source": [ - "# Normal call - caches result.\n", - "start_time = time.time()\n", - "result = data_processor(\"hello\")\n", - "elapsed_time = time.time() - start_time\n", - "print(f\"First call: {result} (time: {elapsed_time:.3f}s)\")\n", - "# Cached call - returns instantly.\n", - "start_time = time.time()\n", - "result = data_processor(\"hello\")\n", - "elapsed_time = time.time() - start_time\n", - "print(f\"Cached call: {result} (time: {elapsed_time:.6f}s - from cache!)\")\n", - "# Force refresh - recomputes even though cached.\n", - "result = data_processor(\"hello\", force_refresh=True)\n", - "print(f\"Force refresh: {result}\")\n", - "# Report on cache miss.\n", - "result = data_processor(\"world\", report_on_cache_miss=True)\n", - "print(f\"With report: {result}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cache miss error: Cache miss for key='{\"args\": [\"new_value\"], \"kwargs\": {}}'\n" - ] - } - ], - "source": [ - "# Abort on cache miss - raises ValueError if not cached.\n", - "try:\n", - " result = data_processor(\"new_value\", abort_on_cache_miss=True)\n", - "except ValueError as e:\n", - " print(f\"Cache miss error: {e}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## 5. Configurable Cache Locations\n", - "\n", - "Customize where cache files are stored globally:\n", - "- `set_cache_dir()`: Change cache directory\n", - "- `set_cache_file_prefix()`: Change cache file prefix" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cache directory set to: /tmp/tmpxyhjnt9y\n", - "Cache file prefix set to: my_project\n", - "WARNING S3 bucket not configured - use set_s3_bucket()\n", - "\n", - "Cache files created: ['my_project_property.pkl', 'my_project.custom_location_func.json']\n" - ] - } - ], - "source": [ - "# Set custom cache directory.\n", - "cache_dir = tempfile.mkdtemp()\n", - "hcacsimp.set_cache_dir(cache_dir)\n", - "print(f\"Cache directory set to: {cache_dir}\")\n", - "# Set custom prefix.\n", - "hcacsimp.set_cache_file_prefix(\"my_project\")\n", - "print(\"Cache file prefix set to: my_project\")\n", - "\n", - "\n", - "# New cached function will use these settings.\n", - "@hcacsimp.simple_cache(cache_type=\"json\")\n", - "def custom_location_func(x: int) -> int:\n", - " return x * 3\n", - "\n", - "\n", - "# Call function.\n", - "result = custom_location_func(7)\n", - "# Verify cache file location.\n", - "cache_files = [f for f in os.listdir(cache_dir) if \"my_project\" in f]\n", - "print(f\"\\nCache files created: {cache_files}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "\n", - "## 6. Per-Function Configuration\n", - "\n", - "Override global settings for specific functions:\n", - "- Each function can have its own cache directory\n", - "- Each function can have its own cache prefix\n", - "- Useful for organizing different types of caches" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "@hcacsimp.simple_cache(\n", - " cache_type=\"json\",\n", - " cache_dir=\"/tmp/function_a_cache\",\n", - " cache_prefix=\"func_a\",\n", - ")\n", - "def function_a(x: int) -> int:\n", - " return x + 100\n", - "\n", - "\n", - "@hcacsimp.simple_cache(\n", - " cache_type=\"json\",\n", - " cache_dir=\"/tmp/function_b_cache\",\n", - " cache_prefix=\"func_b\",\n", - ")\n", - "def function_b(x: int) -> int:\n", - " return x + 200" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING Cache hit for function_a\n", - "WARNING Cache hit for function_b\n", - "function_a(5) = 105\n", - "function_b(5) = 205\n", - "\n", - "function_a cache location:\n", - " Cache file: /tmp/function_a_cache/func_a.function_a.json\n", - "\n", - "function_b cache location:\n", - " Cache file: /tmp/function_b_cache/func_b.function_b.json\n" - ] - } - ], - "source": [ - "# Call both functions - each uses its own cache location.\n", - "result_a = function_a(5)\n", - "result_b = function_b(5)\n", - "print(f\"function_a(5) = {result_a}\")\n", - "print(f\"function_b(5) = {result_b}\")\n", - "# Verify separate cache files.\n", - "print(\"\\nfunction_a cache location:\")\n", - "cache_file_a = hcacsimp._get_cache_file_name(\"function_a\")\n", - "print(f\" Cache file: {cache_file_a}\")\n", - "print(\"\\nfunction_b cache location:\")\n", - "cache_file_b = hcacsimp._get_cache_file_name(\"function_b\")\n", - "print(f\" Cache file: {cache_file_b}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "\n", - "## 7. Excluding Keys from Cache\n", - "\n", - "Some parameters should not affect cache lookup:\n", - "- Session IDs\n", - "- Logger objects\n", - "- Timestamps\n", - "- Random seeds (when you want same result)\n", - "\n", - "Use `exclude_keys` to ignore these parameters." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "@hcacsimp.simple_cache(\n", - " cache_type=\"json\",\n", - " exclude_keys=[\"session_id\", \"timestamp\"],\n", - ")\n", - "def api_call(query: str, session_id: str, timestamp: float) -> str:\n", - " \"\"\"\n", - " Simulate API call where session_id and timestamp don't affect result.\n", - " \"\"\"\n", - " _LOG.info(\"Making API call for query: %s\", query)\n", - " time.sleep(1)\n", - " return f\"Response for: {query}\"" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING S3 bucket not configured - use set_s3_bucket()\n", - "INFO Making API call for query: search python\n", - "First call: Response for: search python (time: 1.006s)\n", - "WARNING Cache hit for api_call\n", - "Second call (from cache despite different session/timestamp): Response for: search python (time: 0.001377s)\n", - "INFO Making API call for query: search java\n", - "Third call (different query, cache miss): Response for: search java\n" - ] - } - ], - "source": [ - "# These calls have different session_id and timestamp but return cached result.\n", - "start_time = time.time()\n", - "result1 = api_call(\"search python\", session_id=\"abc123\", timestamp=1.0)\n", - "elapsed_time = time.time() - start_time\n", - "print(f\"First call: {result1} (time: {elapsed_time:.3f}s)\")\n", - "start_time = time.time()\n", - "result2 = api_call(\"search python\", session_id=\"xyz789\", timestamp=2.0)\n", - "elapsed_time = time.time() - start_time\n", - "print(\n", - " f\"Second call (from cache despite different session/timestamp): {result2} (time: {elapsed_time:.6f}s)\"\n", - ")\n", - "# Different query triggers cache miss.\n", - "result3 = api_call(\"search java\", session_id=\"abc123\", timestamp=1.0)\n", - "print(f\"Third call (different query, cache miss): {result3}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "\n", - "## 8. Runtime Property Modification\n", - "\n", - "All decorator parameters are stored as properties and can be modified at runtime.\n", - "This allows you to change cache behavior without redecorating functions.\n", - "\n", - "**Common use cases:**\n", - "- Disable write-through temporarily for performance\n", - "- Add/remove keys from exclusion list\n", - "- Enable/disable S3 sync dynamically" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "@hcacsimp.simple_cache(cache_type=\"json\", exclude_keys=[\"session_id\"])\n", - "def api_call(query: str, session_id: str) -> str:\n", - " \"\"\"\n", - " Simulate API call where session_id doesn't affect result.\n", - " \"\"\"\n", - " _LOG.info(\"Making API call for query=%s\", query)\n", - " time.sleep(1)\n", - " return f\"Result for: {query}\"" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Initial exclude_keys: ['session_id']\n", - "Calling with query='python', session_id='abc'...\n", - "INFO Making API call for query=python\n", - "Result: Result for: python (time: 1.004s)\n", - "\n", - "Calling with query='python', session_id='xyz' (different session_id)...\n", - "WARNING Cache hit for api_call\n", - "Result: Result for: python (time: 0.001291s - cache hit!)\n" - ] - } - ], - "source": [ - "# Demonstrate initial exclude_keys behavior.\n", - "print(\"Initial exclude_keys: ['session_id']\")\n", - "print(\"Calling with query='python', session_id='abc'...\")\n", - "start_time = time.time()\n", - "result1 = api_call(\"python\", session_id=\"abc\")\n", - "elapsed1 = time.time() - start_time\n", - "print(f\"Result: {result1} (time: {elapsed1:.3f}s)\")\n", - "# Same query, different session_id - should hit cache.\n", - "print(\"\\nCalling with query='python', session_id='xyz' (different session_id)...\")\n", - "start_time = time.time()\n", - "result2 = api_call(\"python\", session_id=\"xyz\")\n", - "elapsed2 = time.time() - start_time\n", - "print(f\"Result: {result2} (time: {elapsed2:.6f}s - cache hit!)\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Modifying exclude_keys to [] (empty - don't exclude session_id)\n", - "exclude_keys now: []\n", - "\n", - "Calling with query='python', session_id='new123' (after modification)...\n", - "INFO Making API call for query=python\n", - "Result: Result for: python (time: 1.004s - cache miss, computed new!)\n" - ] - } - ], - "source": [ - "# Now modify exclude_keys to REMOVE session_id from exclusion.\n", - "print(\"\\nModifying exclude_keys to [] (empty - don't exclude session_id)\")\n", - "hcacsimp.set_cache_property(\"api_call\", \"exclude_keys\", [])\n", - "# Verify change.\n", - "exclude_keys_after = hcacsimp.get_cache_property(\"api_call\", \"exclude_keys\")\n", - "print(f\"exclude_keys now: {exclude_keys_after}\")\n", - "# Now same query with different session_id creates NEW cache entry.\n", - "print(\n", - " \"\\nCalling with query='python', session_id='new123' (after modification)...\"\n", - ")\n", - "start_time = time.time()\n", - "result3 = api_call(\"python\", session_id=\"new123\")\n", - "elapsed3 = time.time() - start_time\n", - "print(f\"Result: {result3} (time: {elapsed3:.3f}s - cache miss, computed new!)\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## 9. S3 Integration\n", - "\n", - "**Note:** These examples are commented out because they require AWS credentials.\n", - "Uncomment and configure to use S3 caching.\n", - "\n", - "**S3 as Third Storage Layer:**\n", - "- S3 is integrated into the cache lookup as the third tier: Memory → Disk → S3\n", - "- When `get_cache()` is called, it automatically checks all three layers\n", - "- A cache \"miss\" only occurs if key not found in ANY layer\n", - "\n", - "**S3 Features:**\n", - "- `auto_sync_s3=True`: Automatically upload cache updates to S3\n", - "- Auto-pull: Automatically checks S3 as part of cache lookup (one-time per function)\n", - "- Manual cache operations: Use `push_cache_to_s3()` to manually upload, `pull_cache_from_s3()` to manually download and `sync_cache_with_s3()` to manually cache files between S3 and disk\n", - "\n", - "**Usage:**\n", - "1. Configure S3 globally or per-function\n", - "2. First call on any machine computes and uploads to S3\n", - "3. Other machines automatically check S3 during cache lookup\n", - "4. Updates are automatically synced to S3 (if `auto_sync_s3=True`)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# # Global S3 configuration (applies to all cached functions).\n", - "# hcacsimp.set_s3_bucket(\"s3://my-team-bucket\")\n", - "# hcacsimp.set_s3_prefix(\"cache/shared\")\n", - "# hcacsimp.set_aws_profile(\"my-aws-profile\")\n", - "#\n", - "# @hcacsimp.simple_cache(\n", - "# cache_type=\"json\",\n", - "# auto_sync_s3=True, # Auto-upload to S3 after cache updates on disk.\n", - "# )\n", - "# def expensive_llm_call(prompt: str) -> str:\n", - "# \"\"\"\n", - "# Simulate expensive LLM API call.\n", - "# \"\"\"\n", - "# time.sleep(3)\n", - "# return f\"LLM response to: {prompt}\"\n", - "#\n", - "# # First call on any machine - computes and uploads to S3.\n", - "# result = expensive_llm_call(\"Summarize this document\")\n", - "# print(f\"Result: {result}\")\n", - "#\n", - "# # On another machine - S3 is automatically checked during cache lookup.\n", - "# # get_cache() checks: memory → disk → S3.\n", - "# result = expensive_llm_call(\"Summarize this document\")\n", - "# print(f\"Result from cache: {result}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# # Per-function S3 configuration (overrides global settings).\n", - "# @hcacsimp.simple_cache(\n", - "# cache_type=\"json\",\n", - "# s3_bucket=\"s3://project-specific-bucket\",\n", - "# s3_prefix=\"cache/llm\",\n", - "# aws_profile=\"project-profile\",\n", - "# auto_sync_s3=True,\n", - "# )\n", - "# def project_specific_cache(data: str) -> str:\n", - "# return f\"Processed: {data}\"\n", - "#\n", - "# result = project_specific_cache(\"test data\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "\n", - "## 10. Binary Data with Pickle\n", - "\n", - "For complex Python objects (DataFrames, models, etc.), use pickle format:\n", - "- `cache_type=\"pickle\"`: Stores any Python object\n", - "- Supports DataFrames, numpy arrays, custom classes, etc.\n", - "- Trade-off: Not human-readable like JSON" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "@hcacsimp.simple_cache(cache_type=\"pickle\")\n", - "def create_dataframe(rows: int) -> pd.DataFrame:\n", - " \"\"\"\n", - " Create a DataFrame (can't be cached as JSON easily).\n", - " \"\"\"\n", - " _LOG.info(\"Creating DataFrame with %s rows...\", rows)\n", - " time.sleep(1)\n", - " return pd.DataFrame(\n", - " {\n", - " \"id\": range(rows),\n", - " \"value\": [x**2 for x in range(rows)],\n", - " }\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING S3 bucket not configured - use set_s3_bucket()\n", - "INFO Creating DataFrame with 5 rows...\n", - "First call:\n", - " id value\n", - "0 0 0\n", - "1 1 1\n", - "2 2 4\n", - "3 3 9\n", - "4 4 16\n", - "Time taken: 1.007 seconds\n", - "WARNING Cache hit for create_dataframe\n", - "\n", - "Second call (from cache):\n", - " id value\n", - "0 0 0\n", - "1 1 1\n", - "2 2 4\n", - "3 3 9\n", - "4 4 16\n", - "Time taken: 0.001184 seconds (from cache!)\n" - ] - } - ], - "source": [ - "# First call - computes and caches DataFrame.\n", - "start_time = time.time()\n", - "df = create_dataframe(5)\n", - "elapsed_time = time.time() - start_time\n", - "print(\"First call:\")\n", - "print(df)\n", - "print(f\"Time taken: {elapsed_time:.3f} seconds\")\n", - "# Second call - returns cached DataFrame instantly.\n", - "start_time = time.time()\n", - "df = create_dataframe(5)\n", - "elapsed_time = time.time() - start_time\n", - "print(\"\\nSecond call (from cache):\")\n", - "print(df)\n", - "print(f\"Time taken: {elapsed_time:.6f} seconds (from cache!)\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Summary\n", - "\n", - "The `hcache_simple` module provides:\n", - "- **Easy caching**: Just add `@simple_cache` decorator\n", - "- **Multiple storage layers**: Memory (fast) → Disk (persistent) → S3 (shared)\n", - "- **Flexible configuration**: Global and per-function settings\n", - "- **Runtime modification**: Change cache behavior without redecorating functions\n", - "- **Performance monitoring**: Track cache efficiency\n", - "- **Team collaboration**: Share caches via S3 with auto-pull\n", - "- **Format support**: JSON (human-readable) or pickle (binary)\n", - "\n", - "For full documentation, see: `docs/tools/helpers/all.hcache_simple.explanation.md`" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py deleted file mode 100644 index 48c2d2d3c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hcache_simple.tutorial.py +++ /dev/null @@ -1,486 +0,0 @@ -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.17.1 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# CONTENTS: -# - [hcache_simple Tutorial](#hcache_simple-tutorial) -# - [Imports](#imports) -# - [1. Basic Caching](#1.-basic-caching) -# - [2. Cache Performance Monitoring](#2.-cache-performance-monitoring) -# - [3. Cache Management](#3.-cache-management) -# - [4. Dynamic Runtime Parameters](#4.-dynamic-runtime-parameters) -# - [5. Configurable Cache Locations](#5.-configurable-cache-locations) -# - [6. Per-Function Configuration](#6.-per-function-configuration) -# - [7. Excluding Keys from Cache](#7.-excluding-keys-from-cache) -# - [8. Runtime Property Modification](#8.-runtime-property-modification) -# - [9. S3 Integration](#9.-s3-integration) -# - [10. Binary Data with Pickle](#10.-binary-data-with-pickle) -# - [Summary](#summary) - -# %% [markdown] -# -# # hcache_simple Tutorial -# -# This tutorial demonstrates the `hcache_simple` module - a lightweight caching system with memory, disk, and S3 storage. -# -# **Key Features:** -# - Simple decorator-based caching -# - Memory and disk persistence (JSON or pickle) -# - S3 sync for team cache sharing -# - Per-function configuration -# - Performance monitoring -# - Auto-pull from S3 on first cache miss - -# %% [markdown] -# -# ## Imports - -# %% -import logging -import os -import tempfile -import time - -import pandas as pd - -import helpers.hcache_simple as hcacsimp -import helpers.hdbg as hdbg - -hdbg.init_logger(verbosity=logging.INFO) -_LOG = logging.getLogger(__name__) - -# %% [markdown] -# -# ## 1. Basic Caching -# -# The `@simple_cache` decorator caches function results automatically. -# -# - First call: Computes result and stores in cache -# - Subsequent calls: Returns cached result instantly -# - Cache is stored in memory and on disk (JSON format) - - -# %% -@hcacsimp.simple_cache(cache_type="json") -def expensive_computation(x: int) -> int: - """ - Simulate expensive computation. - """ - _LOG.info("Computing result for x=%s (this takes 2 seconds)...", x) - time.sleep(2) - return x**2 - - -# %% -# First call - computes and caches. -print("First call with x=5:") -start_time = time.time() -result = expensive_computation(5) -elapsed_time = time.time() - start_time -print(f"Result: {result}") -print(f"Time taken: {elapsed_time:.3f} seconds\n") -# Second call - returns from cache instantly. -print("Second call with x=5 (from cache):") -start_time = time.time() -result = expensive_computation(5) -elapsed_time = time.time() - start_time -print(f"Result: {result}") -print(f"Time taken: {elapsed_time:.6f} seconds (much faster!)") - -# %% [markdown] -# -# ## 2. Cache Performance Monitoring -# -# Track cache efficiency with performance metrics: -# - **hits**: Number of times result was retrieved from cache -# - **misses**: Number of times function had to compute result -# - **tot**: Total number of function calls -# - **hit_rate**: Percentage of cache hits - -# %% -# Enable performance monitoring. -hcacsimp.enable_cache_perf("expensive_computation") -# Make some calls. -expensive_computation(10) # Miss - first call with x=10. -expensive_computation(10) # Hit - cached result. -expensive_computation(10) # Hit - cached result. -expensive_computation(20) # Miss - first call with x=20. -# Check performance stats. -print("\nPerformance Statistics:") -print(hcacsimp.get_cache_perf_stats("expensive_computation")) - -# %% [markdown] -# -# ## 3. Cache Management -# -# Control cache lifecycle with these operations: -# - `flush_cache_to_disk()`: Write memory cache to disk -# - `reset_mem_cache()`: Clear memory cache (keeps disk cache) -# - `force_cache_from_disk()`: Reload cache from disk -# - `cache_stats_to_str()`: View cache statistics - -# %% -# View current cache state. -print("Cache statistics:") -print(hcacsimp.cache_stats_to_str("expensive_computation")) -# Flush to disk (ensure persistence). -hcacsimp.flush_cache_to_disk("expensive_computation") -print("\nFlushed to disk") -# Clear memory cache. -hcacsimp.reset_mem_cache("expensive_computation") -print("Memory cache cleared") -print(hcacsimp.cache_stats_to_str("expensive_computation")) -# Reload from disk. -hcacsimp.force_cache_from_disk("expensive_computation") -print("\nReloaded from disk") -print(hcacsimp.cache_stats_to_str("expensive_computation")) - -# %% [markdown] -# -# ## 4. Dynamic Runtime Parameters -# -# Control caching behavior per function call: -# - `force_refresh=True`: Bypass cache and recompute -# - `abort_on_cache_miss=True`: Raise error if not in cache -# - `report_on_cache_miss=True`: Log warning on cache miss - - -# %% -@hcacsimp.simple_cache(cache_type="json") -def data_processor(data: str) -> str: - """ - Process data string. - """ - _LOG.info("Processing: %s", data) - time.sleep(1) - return data.upper() - - -# %% -# Normal call - caches result. -start_time = time.time() -result = data_processor("hello") -elapsed_time = time.time() - start_time -print(f"First call: {result} (time: {elapsed_time:.3f}s)") -# Cached call - returns instantly. -start_time = time.time() -result = data_processor("hello") -elapsed_time = time.time() - start_time -print(f"Cached call: {result} (time: {elapsed_time:.6f}s - from cache!)") -# Force refresh - recomputes even though cached. -result = data_processor("hello", force_refresh=True) -print(f"Force refresh: {result}") -# Report on cache miss. -result = data_processor("world", report_on_cache_miss=True) -print(f"With report: {result}") - -# %% -# Abort on cache miss - raises ValueError if not cached. -try: - result = data_processor("new_value", abort_on_cache_miss=True) -except ValueError as e: - print(f"Cache miss error: {e}") - -# %% [markdown] -# -# ## 5. Configurable Cache Locations -# -# Customize where cache files are stored globally: -# - `set_cache_dir()`: Change cache directory -# - `set_cache_file_prefix()`: Change cache file prefix - -# %% -# Set custom cache directory. -cache_dir = tempfile.mkdtemp() -hcacsimp.set_cache_dir(cache_dir) -print(f"Cache directory set to: {cache_dir}") -# Set custom prefix. -hcacsimp.set_cache_file_prefix("my_project") -print("Cache file prefix set to: my_project") - - -# New cached function will use these settings. -@hcacsimp.simple_cache(cache_type="json") -def custom_location_func(x: int) -> int: - return x * 3 - - -# Call function. -result = custom_location_func(7) -# Verify cache file location. -cache_files = [f for f in os.listdir(cache_dir) if "my_project" in f] -print(f"\nCache files created: {cache_files}") - -# %% [markdown] -# -# ## 6. Per-Function Configuration -# -# Override global settings for specific functions: -# - Each function can have its own cache directory -# - Each function can have its own cache prefix -# - Useful for organizing different types of caches - - -# %% -@hcacsimp.simple_cache( - cache_type="json", - cache_dir="/tmp/function_a_cache", - cache_prefix="func_a", -) -def function_a(x: int) -> int: - return x + 100 - - -@hcacsimp.simple_cache( - cache_type="json", - cache_dir="/tmp/function_b_cache", - cache_prefix="func_b", -) -def function_b(x: int) -> int: - return x + 200 - - -# %% -# Call both functions - each uses its own cache location. -result_a = function_a(5) -result_b = function_b(5) -print(f"function_a(5) = {result_a}") -print(f"function_b(5) = {result_b}") -# Verify separate cache files. -print("\nfunction_a cache location:") -cache_file_a = hcacsimp._get_cache_file_name("function_a") -print(f" Cache file: {cache_file_a}") -print("\nfunction_b cache location:") -cache_file_b = hcacsimp._get_cache_file_name("function_b") -print(f" Cache file: {cache_file_b}") - -# %% [markdown] -# -# ## 7. Excluding Keys from Cache -# -# Some parameters should not affect cache lookup: -# - Session IDs -# - Logger objects -# - Timestamps -# - Random seeds (when you want same result) -# -# Use `exclude_keys` to ignore these parameters. - - -# %% -@hcacsimp.simple_cache( - cache_type="json", - exclude_keys=["session_id", "timestamp"], -) -def api_call(query: str, session_id: str, timestamp: float) -> str: - """ - Simulate API call where session_id and timestamp don't affect result. - """ - _LOG.info("Making API call for query: %s", query) - time.sleep(1) - return f"Response for: {query}" - - -# %% -# These calls have different session_id and timestamp but return cached result. -start_time = time.time() -result1 = api_call("search python", session_id="abc123", timestamp=1.0) -elapsed_time = time.time() - start_time -print(f"First call: {result1} (time: {elapsed_time:.3f}s)") -start_time = time.time() -result2 = api_call("search python", session_id="xyz789", timestamp=2.0) -elapsed_time = time.time() - start_time -print( - f"Second call (from cache despite different session/timestamp): {result2} (time: {elapsed_time:.6f}s)" -) -# Different query triggers cache miss. -result3 = api_call("search java", session_id="abc123", timestamp=1.0) -print(f"Third call (different query, cache miss): {result3}") - - -# %% [markdown] -# -# ## 8. Runtime Property Modification -# -# All decorator parameters are stored as properties and can be modified at runtime. -# This allows you to change cache behavior without redecorating functions. -# -# **Common use cases:** -# - Disable write-through temporarily for performance -# - Add/remove keys from exclusion list -# - Enable/disable S3 sync dynamically - - -# %% -@hcacsimp.simple_cache(cache_type="json", exclude_keys=["session_id"]) -def api_call(query: str, session_id: str) -> str: - """ - Simulate API call where session_id doesn't affect result. - """ - _LOG.info("Making API call for query=%s", query) - time.sleep(1) - return f"Result for: {query}" - - -# %% -# Demonstrate initial exclude_keys behavior. -print("Initial exclude_keys: ['session_id']") -print("Calling with query='python', session_id='abc'...") -start_time = time.time() -result1 = api_call("python", session_id="abc") -elapsed1 = time.time() - start_time -print(f"Result: {result1} (time: {elapsed1:.3f}s)") -# Same query, different session_id - should hit cache. -print("\nCalling with query='python', session_id='xyz' (different session_id)...") -start_time = time.time() -result2 = api_call("python", session_id="xyz") -elapsed2 = time.time() - start_time -print(f"Result: {result2} (time: {elapsed2:.6f}s - cache hit!)") - -# %% -# Now modify exclude_keys to REMOVE session_id from exclusion. -print("\nModifying exclude_keys to [] (empty - don't exclude session_id)") -hcacsimp.set_cache_property("api_call", "exclude_keys", []) -# Verify change. -exclude_keys_after = hcacsimp.get_cache_property("api_call", "exclude_keys") -print(f"exclude_keys now: {exclude_keys_after}") -# Now same query with different session_id creates NEW cache entry. -print( - "\nCalling with query='python', session_id='new123' (after modification)..." -) -start_time = time.time() -result3 = api_call("python", session_id="new123") -elapsed3 = time.time() - start_time -print(f"Result: {result3} (time: {elapsed3:.3f}s - cache miss, computed new!)") - -# %% [markdown] -# -# ## 9. S3 Integration -# -# **Note:** These examples are commented out because they require AWS credentials. -# Uncomment and configure to use S3 caching. -# -# **S3 as Third Storage Layer:** -# - S3 is integrated into the cache lookup as the third tier: Memory → Disk → S3 -# - When `get_cache()` is called, it automatically checks all three layers -# - A cache "miss" only occurs if key not found in ANY layer -# -# **S3 Features:** -# - `auto_sync_s3=True`: Automatically upload cache updates to S3 -# - Auto-pull: Automatically checks S3 as part of cache lookup (one-time per function) -# - Manual cache operations: Use `push_cache_to_s3()` to manually upload, `pull_cache_from_s3()` to manually download and `sync_cache_with_s3()` to manually cache files between S3 and disk -# -# **Usage:** -# 1. Configure S3 globally or per-function -# 2. First call on any machine computes and uploads to S3 -# 3. Other machines automatically check S3 during cache lookup -# 4. Updates are automatically synced to S3 (if `auto_sync_s3=True`) - -# %% -# # Global S3 configuration (applies to all cached functions). -# hcacsimp.set_s3_bucket("s3://my-team-bucket") -# hcacsimp.set_s3_prefix("cache/shared") -# hcacsimp.set_aws_profile("my-aws-profile") -# -# @hcacsimp.simple_cache( -# cache_type="json", -# auto_sync_s3=True, # Auto-upload to S3 after cache updates on disk. -# ) -# def expensive_llm_call(prompt: str) -> str: -# """ -# Simulate expensive LLM API call. -# """ -# time.sleep(3) -# return f"LLM response to: {prompt}" -# -# # First call on any machine - computes and uploads to S3. -# result = expensive_llm_call("Summarize this document") -# print(f"Result: {result}") -# -# # On another machine - S3 is automatically checked during cache lookup. -# # get_cache() checks: memory → disk → S3. -# result = expensive_llm_call("Summarize this document") -# print(f"Result from cache: {result}") - -# %% -# # Per-function S3 configuration (overrides global settings). -# @hcacsimp.simple_cache( -# cache_type="json", -# s3_bucket="s3://project-specific-bucket", -# s3_prefix="cache/llm", -# aws_profile="project-profile", -# auto_sync_s3=True, -# ) -# def project_specific_cache(data: str) -> str: -# return f"Processed: {data}" -# -# result = project_specific_cache("test data") - -# %% [markdown] -# -# ## 10. Binary Data with Pickle -# -# For complex Python objects (DataFrames, models, etc.), use pickle format: -# - `cache_type="pickle"`: Stores any Python object -# - Supports DataFrames, numpy arrays, custom classes, etc. -# - Trade-off: Not human-readable like JSON - - -# %% -@hcacsimp.simple_cache(cache_type="pickle") -def create_dataframe(rows: int) -> pd.DataFrame: - """ - Create a DataFrame (can't be cached as JSON easily). - """ - _LOG.info("Creating DataFrame with %s rows...", rows) - time.sleep(1) - return pd.DataFrame( - { - "id": range(rows), - "value": [x**2 for x in range(rows)], - } - ) - - -# %% -# First call - computes and caches DataFrame. -start_time = time.time() -df = create_dataframe(5) -elapsed_time = time.time() - start_time -print("First call:") -print(df) -print(f"Time taken: {elapsed_time:.3f} seconds") -# Second call - returns cached DataFrame instantly. -start_time = time.time() -df = create_dataframe(5) -elapsed_time = time.time() - start_time -print("\nSecond call (from cache):") -print(df) -print(f"Time taken: {elapsed_time:.6f} seconds (from cache!)") - -# %% [markdown] -# -# ## Summary -# -# The `hcache_simple` module provides: -# - **Easy caching**: Just add `@simple_cache` decorator -# - **Multiple storage layers**: Memory (fast) → Disk (persistent) → S3 (shared) -# - **Flexible configuration**: Global and per-function settings -# - **Runtime modification**: Change cache behavior without redecorating functions -# - **Performance monitoring**: Track cache efficiency -# - **Team collaboration**: Share caches via S3 with auto-pull -# - **Format support**: JSON (human-readable) or pickle (binary) -# -# For full documentation, see: `docs/tools/helpers/all.hcache_simple.explanation.md` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb deleted file mode 100644 index 7b505f87e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.ipynb +++ /dev/null @@ -1,424 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "7fb27b941602401d91542211134fc71a", - "metadata": {}, - "source": [ - "CONTENTS:\n", - "- [hgoogle_file_api.py](#hgoogle_file_api.py)\n", - " - [Get Credentials for your drive](#get-credentials-for-your-drive)\n", - " - [Get Tab/Sheet id of a particular google sheet](#get-tab/sheet-id-of-a-particular-google-sheet)\n", - " - [Freeze Rows](#freeze-rows)\n", - " - [Change the height of certin rows](#change-the-height-of-certin-rows)\n", - " - [Read some nice data](#read-some-nice-data)\n", - " - [Write this nice data](#write-this-nice-data)" - ] - }, - { - "cell_type": "markdown", - "id": "982ab891-de0a-47d5-946a-0f4fd3f16307", - "metadata": {}, - "source": [ - "\n", - "# hgoogle_file_api.py" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "6b997caf-4bfc-47bc-b7e1-584f02da328f", - "metadata": {}, - "outputs": [], - "source": [ - "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade google-api-python-client)\"\n", - "# !sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade pip install oauth2client)\"\n", - "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --upgrade gspread)\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "0fdf8a01-00ed-4e40-8b8b-3e4ecfe37d45", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import importlib\n", - "import helpers.hgoogle_drive_api as hgodrapi\n", - "\n", - "importlib.reload(hgodrapi)" - ] - }, - { - "cell_type": "markdown", - "id": "f9733115-f65b-43fb-8b56-32be7588c617", - "metadata": {}, - "source": [ - "\n", - "## Get Credentials for your drive" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "0f3eb12a-bd7e-4846-a8f0-331ece997137", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "google_creds = hgodrapi.get_credentials()\n", - "print(google_creds)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "feb74dae-ff52-44ce-b698-4c04cc2bc8f3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "service = hgodrapi.get_sheets_service(google_creds)\n", - "print(service)" - ] - }, - { - "cell_type": "markdown", - "id": "9e1c8840-c759-4bd6-a2c5-f30d94daf72b", - "metadata": {}, - "source": [ - "\n", - "## Get Tab/Sheet id of a particular google sheet" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "67fe7cc1-0f90-4b45-b93d-c6eaecd25028", - "metadata": {}, - "outputs": [], - "source": [ - "tab_name = \"cleaned_profiles_1\"\n", - "url = \"https://docs.google.com/spreadsheets/d/1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA/edit?gid=1687996260#gid=1687996260\"\n", - "sheet_id = \"1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA\"\n", - "credentials = google_creds" - ] - }, - { - "cell_type": "markdown", - "id": "f18db947-8170-4cba-8799-dfe792e1c732", - "metadata": {}, - "source": [ - "\n", - "## Freeze Rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "232a1ee0-83d2-4449-a8c0-a8e8eca02fc5", - "metadata": {}, - "outputs": [], - "source": [ - "row_indices = [0, 1, 2]\n", - "hgodrapi.freeze_rows(\n", - " credentials,\n", - " sheet_id=sheet_id,\n", - " row_indices=row_indices,\n", - " tab_name=tab_name,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "dd9b9b7d-2dc6-416d-bd9c-a8039fadaba2", - "metadata": {}, - "source": [ - "\n", - "## Change the height of certin rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "50db6e3d-8d05-47ea-9ace-dc79ce131f37", - "metadata": {}, - "outputs": [], - "source": [ - "hgodrapi.set_row_height(\n", - " google_creds,\n", - " sheet_id=sheet_id,\n", - " height=20,\n", - " start_index=0,\n", - " end_index=2,\n", - " tab_name=tab_name,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "3569d048-d69e-4e4b-ab53-a93b6f4a41d1", - "metadata": {}, - "source": [ - "\n", - "## Read some nice data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e63bede3-2948-4a37-b444-36b4dba81c6d", - "metadata": {}, - "outputs": [], - "source": [ - "nice_data = hgodrapi.from_gsheet(google_creds, url, tab_name=tab_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "59233081-ac03-4ac7-96b1-4de1b07fae75", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameTitleFirmLocationPersonal Investment FocusGeographic FocusTypical Deal SizePreferred StagesNotable Personal AI InvestmentsPotential Fit with KaizenfirstNamelastName
0Michael MoritzManaging PartnerSequoia CapitalMenlo ParkAI/MLGlobal$10MEarly to GrowthGoogleHigh'''MichaelMoritz
1Navid AlipourManaging PartnerAnalytics VenturesSan DiegoAI/MLSan Diego$5MSeedCureMetrixHigh''NavidAlipour
2Aaref HilalyPartnerBain Capital VenturesPalo AltoReal-time AnalyticsBurlingameSeedSeed/Early StageRubrikMediumAarefHilaly
3Aaron FleishmanPrincipalTola CapitalSeattle WAEnterprise AIPNW$5M-$20MSeries ADatabricksHighAaronFleishman
4Aaron JacobsonPartnerNew Enterprise AssociatesMenlo ParkMLOpsNorth AmericaSeries A$10M-$30MDatabricksHighAaronJacobson
\n", - "
" - ], - "text/plain": [ - " Name Title Firm Location \\\n", - "0 Michael Moritz Managing Partner Sequoia Capital Menlo Park \n", - "1 Navid Alipour Managing Partner Analytics Ventures San Diego \n", - "2 Aaref Hilaly Partner Bain Capital Ventures Palo Alto \n", - "3 Aaron Fleishman Principal Tola Capital Seattle WA \n", - "4 Aaron Jacobson Partner New Enterprise Associates Menlo Park \n", - "\n", - " Personal Investment Focus Geographic Focus Typical Deal Size \\\n", - "0 AI/ML Global $10M \n", - "1 AI/ML San Diego $5M \n", - "2 Real-time Analytics Burlingame Seed \n", - "3 Enterprise AI PNW $5M-$20M \n", - "4 MLOps North America Series A \n", - "\n", - " Preferred Stages Notable Personal AI Investments Potential Fit with Kaizen \\\n", - "0 Early to Growth Google High''' \n", - "1 Seed CureMetrix High'' \n", - "2 Seed/Early Stage Rubrik Medium \n", - "3 Series A Databricks High \n", - "4 $10M-$30M Databricks High \n", - "\n", - " firstName lastName \n", - "0 Michael Moritz \n", - "1 Navid Alipour \n", - "2 Aaref Hilaly \n", - "3 Aaron Fleishman \n", - "4 Aaron Jacobson " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nice_data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "30dcc791-cbdb-45f1-9298-a74e0a7babab", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(100, 12)" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nice_data.shape" - ] - }, - { - "cell_type": "markdown", - "id": "5c4cafb4-fe5f-4f6e-b594-759b199acb7e", - "metadata": {}, - "source": [ - "\n", - "## Write this nice data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b1f4a89-cb96-417a-86f4-ebc513c18510", - "metadata": {}, - "outputs": [], - "source": [ - "hgodrapi.to_gsheet(google_creds, nice_data, url, tab_name=\"testing_tab\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py deleted file mode 100644 index a76ac9e94..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hgoodle_drive_api.tutorial.py +++ /dev/null @@ -1,107 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# CONTENTS: -# - [hgoogle_file_api.py](#hgoogle_file_api.py) -# - [Get Credentials for your drive](#get-credentials-for-your-drive) -# - [Get Tab/Sheet id of a particular google sheet](#get-tab/sheet-id-of-a-particular-google-sheet) -# - [Freeze Rows](#freeze-rows) -# - [Change the height of certin rows](#change-the-height-of-certin-rows) -# - [Read some nice data](#read-some-nice-data) -# - [Write this nice data](#write-this-nice-data) - -# %% [markdown] -# -# # hgoogle_file_api.py - -# %% -# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade google-api-python-client)" -# # !sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade pip install oauth2client)" -# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade gspread)" - -# %% -import importlib -import helpers.hgoogle_drive_api as hgodrapi - -importlib.reload(hgodrapi) - -# %% [markdown] -# -# ## Get Credentials for your drive - -# %% -google_creds = hgodrapi.get_credentials() -print(google_creds) - -# %% -service = hgodrapi.get_sheets_service(google_creds) -print(service) - -# %% [markdown] -# -# ## Get Tab/Sheet id of a particular google sheet - -# %% -tab_name = "cleaned_profiles_1" -url = "https://docs.google.com/spreadsheets/d/1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA/edit?gid=1687996260#gid=1687996260" -sheet_id = "1VRJQZ4kSoqAeOr9MkWcYbIcArNRyglTREaMg1WlZHGA" -credentials = google_creds - -# %% [markdown] -# -# ## Freeze Rows - -# %% -row_indices = [0, 1, 2] -hgodrapi.freeze_rows( - credentials, - sheet_id=sheet_id, - row_indices=row_indices, - tab_name=tab_name, -) - -# %% [markdown] -# -# ## Change the height of certin rows - -# %% -hgodrapi.set_row_height( - google_creds, - sheet_id=sheet_id, - height=20, - start_index=0, - end_index=2, - tab_name=tab_name, -) - -# %% [markdown] -# -# ## Read some nice data - -# %% -nice_data = hgodrapi.from_gsheet(google_creds, url, tab_name=tab_name) - -# %% -nice_data.head() - -# %% -nice_data.shape - -# %% [markdown] -# -# ## Write this nice data - -# %% -hgodrapi.to_gsheet(google_creds, nice_data, url, tab_name="testing_tab") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb deleted file mode 100644 index 3bb70bdef..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.ipynb +++ /dev/null @@ -1,13040 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CONTENTS:\n", - "- [Description](#description)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "# Description\n", - "\n", - "This notebook examines ..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet jupyterlab-vim)\"\n", - "#!jupyter labextension enable" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2021-04-02T18:11:14.828251Z", - "start_time": "2021-04-02T18:11:14.514771Z" - } - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import logging\n", - "\n", - "import helpers.hdbg as hdbg\n", - "import helpers.henv as henv" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2021-04-02T18:11:24.635995Z", - "start_time": "2021-04-02T18:11:18.239237Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# System signature\n", - " # Container version\n", - " container_version='1.2.0'\n", - " changelog_version='2.0.0'\n", - " # Git info\n", - " branch_name='CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI'\n", - " hash='0ca93d8c'\n", - " # Last commits:\n", - " * 0ca93d8c GP Saggese Merge ( 5 minutes ago) Fri May 9 22:09:03 2025 (HEAD -> CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI, origin/CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI)\n", - " |\\ \n", - " * | 99cbbf22 GP Saggese Lint ( 6 minutes ago) Fri May 9 22:08:07 2025 \n", - " | * 27b38c48 GP Saggese CmampTask12067_Read_docs_about_DataPull_4 (#698) ( 8 minutes ago) Fri May 9 22:06:25 2025 (origin/master, origin/HEAD, master)\n", - " # Platform info\n", - " system=Linux\n", - " node name=0f79e8b845ee\n", - " release=6.10.14-linuxkit\n", - " version=#1 SMP Thu Mar 20 16:32:56 UTC 2025\n", - " machine=aarch64\n", - " processor=aarch64\n", - " # psutils info\n", - " cpu count=8\n", - " cpu freq=None\n", - " memory=svmem(total=16749285376, available=14575529984, percent=13.0, used=1910644736, free=9673363456, active=2843516928, inactive=3252117504, buffers=490647552, cached=4674629632, shared=1093632, slab=694362112)\n", - " disk usage=sdiskusage(total=270233210880, used=102272610304, free=154199986176, percent=39.9)\n", - " # Docker info\n", - " has_docker=True\n", - " docker_version='28.0.4'\n", - " docker_needs_sudo=False\n", - " has_privileged_mode=True\n", - " is_inside_docker=True\n", - " has_docker_sibling_containers_support=True\n", - " has_docker_children_containers_support=True\n", - " # Packages\n", - " python: 3.12.3\n", - " cvxopt: ?\n", - " cvxpy: ?\n", - " gluonnlp: ?\n", - " gluonts: ?\n", - " joblib: 1.4.2\n", - " mxnet: ?\n", - " numpy: 2.2.3\n", - " pandas: 2.2.3\n", - " pyarrow: 19.0.1\n", - " scipy: 1.15.2\n", - " seaborn: 0.13.2\n", - " sklearn: 1.6.1\n", - " statsmodels: 0.14.4\n" - ] - } - ], - "source": [ - "print(henv.get_system_signature()[0])\n", - "\n", - "hnotebook.config_notebook()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2021-04-02T18:11:24.668793Z", - "start_time": "2021-04-02T18:11:24.638503Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0mWARNING: Running in Jupyter\n", - "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-0f2f4a10-7f18-4858-af02-b60808101345.json'\n" - ] - } - ], - "source": [ - "# hdbg.init_logger(verbosity=logging.DEBUG)\n", - "hdbg.init_logger(verbosity=logging.INFO)\n", - "# hdbg.test_logger()\n", - "_LOG = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet openai requests)\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "import helpers.hllm as hllm\n", - "import helpers.hpandas as hpandas" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "val = hllm.get_model_stats()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'architecture': {'input_modalities': ['text', 'image'],\n", - " 'instruct_type': None,\n", - " 'modality': 'text+image->text',\n", - " 'output_modalities': ['text'],\n", - " 'tokenizer': 'Mistral'},\n", - " 'context_length': 131072,\n", - " 'created': 1746627341,\n", - " 'description': 'Mistral Medium 3 is a high-performance enterprise-grade '\n", - " 'language model designed to deliver frontier-level '\n", - " 'capabilities at significantly reduced operational cost. It '\n", - " 'balances state-of-the-art reasoning and multimodal '\n", - " 'performance with 8× lower cost compared to traditional large '\n", - " 'models, making it suitable for scalable deployments across '\n", - " 'professional and industrial use cases.\\n'\n", - " '\\n'\n", - " 'The model excels in domains such as coding, STEM reasoning, '\n", - " 'and enterprise adaptation. It supports hybrid, on-prem, and '\n", - " 'in-VPC deployments and is optimized for integration into '\n", - " 'custom workflows. Mistral Medium 3 offers competitive '\n", - " 'accuracy relative to larger models like Claude Sonnet '\n", - " '3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining '\n", - " 'broad compatibility across cloud environments.',\n", - " 'id': 'mistralai/mistral-medium-3',\n", - " 'name': 'Mistral: Mistral Medium 3',\n", - " 'per_request_limits': None,\n", - " 'pricing': {'completion': '0.000002',\n", - " 'image': '0',\n", - " 'internal_reasoning': '0',\n", - " 'prompt': '0.0000004',\n", - " 'request': '0',\n", - " 'web_search': '0'},\n", - " 'supported_parameters': ['tools',\n", - " 'tool_choice',\n", - " 'max_tokens',\n", - " 'temperature',\n", - " 'top_p',\n", - " 'stop',\n", - " 'frequency_penalty',\n", - " 'presence_penalty',\n", - " 'response_format',\n", - " 'structured_outputs',\n", - " 'seed'],\n", - " 'top_provider': {'context_length': 131072,\n", - " 'is_moderated': False,\n", - " 'max_completion_tokens': None}}\n" - ] - } - ], - "source": [ - "import pprint\n", - "\n", - "pprint.pprint(val[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamecreateddescriptioncontext_lengthper_request_limitssupported_parametersarchitecture_modalityarchitecture_input_modalitiesarchitecture_output_modalitiesarchitecture_tokenizerarchitecture_instruct_typepricing_promptpricing_completionpricing_requestpricing_imagepricing_web_searchpricing_internal_reasoningtop_provider_context_lengthtop_provider_max_completion_tokenstop_provider_is_moderatedpricing_input_cache_readpricing_input_cache_write
0mistralai/mistral-medium-3Mistral: Mistral Medium 31746627341Mistral Medium 3 is a high-performance enterpr...131072None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone0.00000040.0000020000131072.0NaNFalseNaNNaN
1google/gemini-2.5-pro-previewGoogle: Gemini 2.5 Pro Preview1746578513Gemini 2.5 Pro is Google’s state-of-the-art AI...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[text, image, file][text]GeminiNone0.000001250.0000100.00516001048576.065535.0False0.000000310.000001625
2arcee-ai/caller-largeArcee AI: Caller Large1746487869Caller Large is Arcee's specialist \"function‑c...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.000000550.00000085000032768.0NaNFalseNaNNaN
3arcee-ai/spotlightArcee AI: Spotlight1746481552Spotlight is a 7‑billion‑parameter vision‑lang...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[image, text][text]OtherNone0.000000180.000000180000131072.065537.0FalseNaNNaN
4arcee-ai/maestro-reasoningArcee AI: Maestro Reasoning1746481269Maestro Reasoning is Arcee's flagship analysis...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000090.00000330000131072.032000.0FalseNaNNaN
5arcee-ai/virtuoso-largeArcee AI: Virtuoso Large1746478885Virtuoso‑Large is Arcee's top‑tier general‑pur...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000750.00000120000131072.064000.0FalseNaNNaN
6arcee-ai/coder-largeArcee AI: Coder Large1746478663Coder‑Large is a 32 B‑parameter offspring of Q...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000050.0000008000032768.0NaNFalseNaNNaN
7arcee-ai/virtuoso-medium-v2Arcee AI: Virtuoso Medium V21746478434Virtuoso‑Medium‑v2 is a 32 B model distilled f...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000050.00000080000131072.032768.0FalseNaNNaN
8arcee-ai/arcee-blitzArcee AI: Arcee Blitz1746470100Arcee Blitz is a 24 B‑parameter dense model di...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000450.00000075000032768.0NaNFalseNaNNaN
9microsoft/phi-4-reasoning-plus:freeMicrosoft: Phi 4 Reasoning Plus (free)1746130961Phi-4-reasoning-plus is an enhanced 14B parame...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
10microsoft/phi-4-reasoning-plusMicrosoft: Phi 4 Reasoning Plus1746130961Phi-4-reasoning-plus is an enhanced 14B parame...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.000000070.00000035000032768.0NaNFalseNaNNaN
11microsoft/phi-4-reasoning:freeMicrosoft: Phi 4 Reasoning (free)1746121275Phi-4-reasoning is a 14B parameter dense decod...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
12qwen/qwen3-0.6b-04-28:freeQwen: Qwen3 0.6B (free)1746043526Qwen3-0.6B is a lightweight, 0.6 billion param...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000032000.0NaNFalseNaNNaN
13inception/mercury-coder-small-betaInception: Mercury Coder Small Beta1746033880Mercury Coder Small is the first diffusion lar...32000None[max_tokens, frequency_penalty, presence_penal...text->text[text][text]OtherNone0.000000250.000001000032000.0NaNFalseNaNNaN
14qwen/qwen3-1.7b:freeQwen: Qwen3 1.7B (free)1746031388Qwen3-1.7B is a compact, 1.7 billion parameter...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000032000.0NaNFalseNaNNaN
15qwen/qwen3-4b:freeQwen: Qwen3 4B (free)1746031104Qwen3-4B is a 4 billion parameter dense langua...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None000000128000.0NaNFalseNaNNaN
16opengvlab/internvl3-14b:freeOpenGVLab: InternVL3 14B (free)1746021355The 14b version of the InternVL3 series. An ad...32000None[max_tokens, temperature, top_p]text+image->text[image, text][text]OtherNone00000032000.0NaNFalseNaNNaN
17opengvlab/internvl3-2b:freeOpenGVLab: InternVL3 2B (free)1746019807The 2b version of the InternVL3 series, for an...32000None[max_tokens, temperature, top_p]text+image->text[image, text][text]OtherNone00000032000.0NaNFalseNaNNaN
18deepseek/deepseek-prover-v2:freeDeepSeek: DeepSeek Prover V2 (free)1746013094DeepSeek Prover V2 is a 671B parameter model, ...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
19deepseek/deepseek-prover-v2DeepSeek: DeepSeek Prover V21746013094DeepSeek Prover V2 is a 671B parameter model, ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone0.00000050.000002180000131072.0NaNFalseNaNNaN
20meta-llama/llama-guard-4-12bMeta: Llama Guard 4 12B1745975193Llama Guard 4 is a Llama 4 Scout-derived multi...163840None[max_tokens, temperature, top_p, stop, frequen...text+image->text[image, text][text]OtherNone0.000000050.000000050000163840.0NaNFalseNaNNaN
21qwen/qwen3-30b-a3b:freeQwen: Qwen3 30B A3B (free)1745878604Qwen3, the latest generation in the Qwen large...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
22qwen/qwen3-30b-a3bQwen: Qwen3 30B A3B1745878604Qwen3, the latest generation in the Qwen large...40960None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Qwen3None0.00000010.0000003000040960.040960.0FalseNaNNaN
23qwen/qwen3-8b:freeQwen: Qwen3 8B (free)1745876632Qwen3-8B is a dense 8.2B parameter causal lang...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.040960.0FalseNaNNaN
24qwen/qwen3-8bQwen: Qwen3 8B1745876632Qwen3-8B is a dense 8.2B parameter causal lang...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.0000000350.0000001380000128000.0NaNFalseNaNNaN
25qwen/qwen3-14b:freeQwen: Qwen3 14B (free)1745876478Qwen3-14B is a dense 14.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
26qwen/qwen3-14bQwen: Qwen3 14B1745876478Qwen3-14B is a dense 14.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.000000070.00000024000040960.040960.0FalseNaNNaN
27qwen/qwen3-32b:freeQwen: Qwen3 32B (free)1745875945Qwen3-32B is a dense 32.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
28qwen/qwen3-32bQwen: Qwen3 32B1745875945Qwen3-32B is a dense 32.8B parameter causal la...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.00000010.0000003000040960.0NaNFalseNaNNaN
29qwen/qwen3-235b-a22b:freeQwen: Qwen3 235B A22B (free)1745875757Qwen3-235B-A22B is a 235B parameter mixture-of...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None00000040960.0NaNFalseNaNNaN
30qwen/qwen3-235b-a22bQwen: Qwen3 235B A22B1745875757Qwen3-235B-A22B is a 235B parameter mixture-of...40960None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwen3None0.000000140.000002000040960.040960.0FalseNaNNaN
31tngtech/deepseek-r1t-chimera:freeTNG: DeepSeek R1T Chimera (free)1745760875DeepSeek-R1T-Chimera is created by merging Dee...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
32thudm/glm-z1-rumination-32bTHUDM: GLM Z1 Rumination 32B1745601495THUDM: GLM Z1 Rumination 32B is a 32B-paramete...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000240.00000024000032000.0NaNFalseNaNNaN
33thudm/glm-z1-9b:freeTHUDM: GLM Z1 9B (free)1745601140GLM-Z1-9B-0414 is a 9B-parameter language mode...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032000.0NaNFalseNaNNaN
34thudm/glm-4-9b:freeTHUDM: GLM 4 9B (free)1745601023GLM-4-9B-0414 is a 9 billion parameter languag...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032000.0NaNFalseNaNNaN
35microsoft/mai-ds-r1:freeMicrosoft: MAI DS R1 (free)1745194100MAI-DS-R1 is a post-trained variant of DeepSee...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
36thudm/glm-z1-32b:freeTHUDM: GLM Z1 32B (free)1744924148GLM-Z1-32B-0414 is an enhanced reasoning varia...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
37thudm/glm-z1-32bTHUDM: GLM Z1 32B1744924148GLM-Z1-32B-0414 is an enhanced reasoning varia...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000240.00000024000032000.0NaNFalseNaNNaN
38thudm/glm-4-32b:freeTHUDM: GLM 4 32B (free)1744920915GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
39thudm/glm-4-32bTHUDM: GLM 4 32B1744920915GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000240.00000024000032000.0NaNFalseNaNNaN
40google/gemini-2.5-flash-previewGoogle: Gemini 2.5 Flash Preview1744914667Gemini 2.5 Flash is Google's state-of-the-art ...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[image, text, file][text]GeminiNone0.000000150.000000600.0006192001048576.065535.0False0.00000003750.0000002333
41google/gemini-2.5-flash-preview:thinkingGoogle: Gemini 2.5 Flash Preview (thinking)1744914667Gemini 2.5 Flash is Google's state-of-the-art ...1048576None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[image, text, file][text]GeminiNone0.000000150.000003500.0006192001048576.065535.0False0.00000003750.0000002333
42openai/o4-mini-highOpenAI: o4 Mini High1744824212OpenAI o4-mini-high is the same model as [o4-m...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text, file][text]OtherNone0.00000110.000004400.000841500200000.0100000.0True0.000000275NaN
43openai/o3OpenAI: o31744823457o3 is a well-rounded and powerful model across...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text, file][text]OtherNone0.000010.0000400.0076500200000.0100000.0True0.0000025NaN
44openai/o4-miniOpenAI: o4 Mini1744820942OpenAI o4-mini is a compact reasoning model in...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[image, text][text]OtherNone0.00000110.000004400.000841500200000.0100000.0True0.000000275NaN
45shisa-ai/shisa-v2-llama3.3-70b:freeShisa AI: Shisa V2 Llama 3.3 70B (free)1744754858Shisa V2 Llama 3.3 70B is a bilingual Japanese...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3None00000032768.0NaNFalseNaNNaN
46qwen/qwen2.5-coder-7b-instructQwen: Qwen2.5 Coder 7B Instruct1744734887Qwen2.5-Coder-7B-Instruct is a 7B parameter in...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]QwenNone0.000000010.00000003000032768.0NaNFalseNaNNaN
47openai/gpt-4.1OpenAI: GPT-4.11744651385GPT-4.1 is a flagship large language model opt...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.0000020.00000800001047576.032768.0True0.0000005NaN
48openai/gpt-4.1-miniOpenAI: GPT-4.1 Mini1744651381GPT-4.1 Mini is a mid-sized model delivering p...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.00000040.000001600001047576.032768.0True0.0000001NaN
49openai/gpt-4.1-nanoOpenAI: GPT-4.1 Nano1744651369For tasks that demand low latency, GPT‑4.1 nan...1047576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[image, text][text]GPTNone0.00000010.000000400001047576.032768.0True0.000000025NaN
50eleutherai/llemma_7bEleutherAI: Llemma 7b1744643225Llemma 7B is a language model for mathematics....4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Othercode-llama0.00000080.000001200004096.04096.0FalseNaNNaN
51alfredpros/codellama-7b-instruct-solidityAlfredPros: CodeLLaMa 7B Instruct Solidity1744641874A finetuned 7 billion parameters Code LLaMA - ...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otheralpaca0.00000080.000001200004096.04096.0FalseNaNNaN
52arliai/qwq-32b-arliai-rpr-v1:freeArliAI: QwQ 32B RpR v1 (free)1744555982QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
53agentica-org/deepcoder-14b-preview:freeAgentica: Deepcoder 14B Preview (free)1744555395DeepCoder-14B-Preview is a 14B parameter code ...96000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000096000.0NaNFalseNaNNaN
54moonshotai/kimi-vl-a3b-thinking:freeMoonshot AI: Kimi VL A3B Thinking (free)1744304841Kimi-VL is a lightweight Mixture-of-Experts vi...131072None[max_tokens, temperature, top_p, reasoning, in...text+image->text[image, text][text]OtherNone000000131072.0NaNFalseNaNNaN
55x-ai/grok-3-mini-betaxAI: Grok 3 Mini Beta1744240195Grok 3 Mini is a lightweight, smaller thinking...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.00000030.00000050000131072.0NaNFalseNaNNaN
56x-ai/grok-3-betaxAI: Grok 3 Beta1744240068Grok 3 is the latest model from xAI. It's thei...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000030.0000150000131072.0NaNFalseNaNNaN
57nvidia/llama-3.3-nemotron-super-49b-v1:freeNVIDIA: Llama 3.3 Nemotron Super 49B v1 (free)1744119494Llama-3.3-Nemotron-Super-49B-v1 is a large lan...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone000000131072.0NaNFalseNaNNaN
58nvidia/llama-3.3-nemotron-super-49b-v1NVIDIA: Llama 3.3 Nemotron Super 49B v11744119494Llama-3.3-Nemotron-Super-49B-v1 is a large lan...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000130.00000040000131072.0NaNFalseNaNNaN
59nvidia/llama-3.1-nemotron-ultra-253b-v1:freeNVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free)1744115059Llama-3.1-Nemotron-Ultra-253B-v1 is a large la...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3None000000131072.0NaNFalseNaNNaN
60meta-llama/llama-4-maverick:freeMeta: Llama 4 Maverick (free)1743881822Llama 4 Maverick 17B Instruct (128E) is a high...256000None[max_tokens, temperature, top_p, structured_ou...text+image->text[text, image][text]OtherNone000000256000.0NaNFalseNaNNaN
61meta-llama/llama-4-maverickMeta: Llama 4 Maverick1743881822Llama 4 Maverick 17B Instruct (128E) is a high...1048576None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0.000000170.000000600.0006684001048576.016384.0FalseNaNNaN
62meta-llama/llama-4-scout:freeMeta: Llama 4 Scout (free)1743881519Llama 4 Scout 17B Instruct (16E) is a mixture-...512000None[max_tokens, temperature, top_p, structured_ou...text+image->text[text, image][text]OtherNone000000512000.0NaNFalseNaNNaN
63meta-llama/llama-4-scoutMeta: Llama 4 Scout1743881519Llama 4 Scout 17B Instruct (16E) is a mixture-...1048576None[max_tokens, temperature, top_p, presence_pena...text+image->text[text, image][text]OtherNone0.000000080.000000300001048576.01048576.0FalseNaNNaN
64all-hands/openhands-lm-32b-v0.1OpenHands LM 32B V0.11743613013OpenHands LM v0.1 is a 32B open-source coding ...16384None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.00000260.0000034000016384.04096.0FalseNaNNaN
65mistral/ministral-8bMistral: Ministral 8B1743430021Ministral 8B is a state-of-the-art language mo...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000010.00000010000131072.0NaNFalseNaNNaN
66deepseek/deepseek-v3-base:freeDeepSeek: DeepSeek V3 Base (free)1743272023Note that this is a base model mostly meant fo...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
67scb10x/llama3.1-typhoon2-8b-instructTyphoon2 8B Instruct1743196511Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000180.0000001800008192.0NaNFalseNaNNaN
68scb10x/llama3.1-typhoon2-70b-instructTyphoon2 70B Instruct1743196170Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000880.0000008800008192.0NaNFalseNaNNaN
69allenai/molmo-7b-d:freeAllenAI: Molmo 7B D (free)1743023247Molmo is a family of open vision-language mode...4096None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0000004096.0NaNFalseNaNNaN
70bytedance-research/ui-tars-72b:freeBytedance: UI-TARS 72B (free)1743020065UI-TARS 72B is an open-source multimodal AI mo...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone00000032768.0NaNFalseNaNNaN
71qwen/qwen2.5-vl-3b-instruct:freeQwen: Qwen2.5 VL 3B Instruct (free)1743014573Qwen2.5 VL 3B is a multimodal LLM from the Qwe...64000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone00000064000.0NaNFalseNaNNaN
72google/gemini-2.5-pro-exp-03-25Google: Gemini 2.5 Pro Experimental1742922099Gemini 2.5 Pro is Google’s state-of-the-art AI...1000000None[max_tokens, temperature, top_p, tools, tool_c...text+image->text[text, image, file][text]GeminiNone0000001000000.065535.0FalseNaNNaN
73qwen/qwen2.5-vl-32b-instruct:freeQwen: Qwen2.5 VL 32B Instruct (free)1742839838Qwen2.5-VL-32B is a multimodal vision-language...8192None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0000008192.0NaNFalseNaNNaN
74qwen/qwen2.5-vl-32b-instructQwen: Qwen2.5 VL 32B Instruct1742839838Qwen2.5-VL-32B is a multimodal vision-language...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000090.00000090000128000.0NaNFalseNaNNaN
75deepseek/deepseek-chat-v3-0324:freeDeepSeek: DeepSeek V3 0324 (free)1742824755DeepSeek V3, a 685B-parameter, mixture-of-expe...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
76deepseek/deepseek-chat-v3-0324DeepSeek: DeepSeek V3 03241742824755DeepSeek V3, a 685B-parameter, mixture-of-expe...163840None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]DeepSeekNone0.00000030.000000880000163840.0NaNFalseNaNNaN
77featherless/qwerky-72b:freeQwerky 72B (free)1742481597Qwerky-72B is a linear-attention RWKV variant ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.04096.0FalseNaNNaN
78openai/o1-proOpenAI: o1-pro1742423211The o1 series of models are trained with reinf...200000None[max_tokens, temperature, top_p, reasoning, in...text+image->text[text, image][text]GPTNone0.000150.000600.2167500200000.0100000.0TrueNaNNaN
79mistralai/mistral-small-3.1-24b-instruct:freeMistral: Mistral Small 3.1 24B (free)1742238937Mistral Small 3.1 24B Instruct is an upgraded ...96000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone00000096000.096000.0FalseNaNNaN
80mistralai/mistral-small-3.1-24b-instructMistral: Mistral Small 3.1 24B1742238937Mistral Small 3.1 24B Instruct is an upgraded ...131072None[max_tokens, temperature, top_p, presence_pena...text+image->text[text, image][text]MistralNone0.000000050.000000150000131072.0NaNFalseNaNNaN
81open-r1/olympiccoder-32b:freeOlympicCoder 32B (free)1742077228OlympicCoder-32B is a high-performing open-sou...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
82google/gemma-3-1b-it:freeGoogle: Gemma 3 1B (free)1741963556Gemma 3 1B is the smallest of the new Gemma 3 ...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma00000032768.08192.0FalseNaNNaN
83google/gemma-3-4b-it:freeGoogle: Gemma 3 4B (free)1741905510Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma000000131072.08192.0FalseNaNNaN
84google/gemma-3-4b-itGoogle: Gemma 3 4B1741905510Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.000000020.000000040000131072.0NaNFalseNaNNaN
85ai21/jamba-1.6-largeAI21: Jamba 1.6 Large1741905173AI21 Jamba Large 1.6 is a high-performance hyb...256000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.0000020.0000080000256000.04096.0FalseNaNNaN
86ai21/jamba-1.6-miniAI21: Jamba Mini 1.61741905171AI21 Jamba Mini 1.6 is a hybrid foundation mod...256000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]OtherNone0.00000020.00000040000256000.04096.0FalseNaNNaN
87google/gemma-3-12b-it:freeGoogle: Gemma 3 12B (free)1741902625Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma000000131072.08192.0FalseNaNNaN
88google/gemma-3-12b-itGoogle: Gemma 3 12B1741902625Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.000000050.00000010000131072.0NaNFalseNaNNaN
89cohere/command-aCohere: Command A1741894342Command A is an open-weights 111B parameter mo...256000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.00000250.000010000256000.08192.0FalseNaNNaN
90openai/gpt-4o-mini-search-previewOpenAI: GPT-4o-mini Search Preview1741818122GPT-4o mini Search Preview is a specialized mo...128000None[web_search_options, max_tokens, response_form...text->text[text][text]GPTNone0.000000150.00000060.02750.00021700128000.016384.0TrueNaNNaN
91openai/gpt-4o-search-previewOpenAI: GPT-4o Search Preview1741817949GPT-4o Search Previewis a specialized model fo...128000None[web_search_options, max_tokens, response_form...text->text[text][text]GPTNone0.00000250.000010.0350.00361300128000.016384.0TrueNaNNaN
92rekaai/reka-flash-3:freeReka: Flash 3 (free)1741812813Reka Flash 3 is a general-purpose, instruction...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
93google/gemma-3-27b-it:freeGoogle: Gemma 3 27B (free)1741756359Gemma 3 introduces multimodality, supporting v...96000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma00000096000.08192.0FalseNaNNaN
94google/gemma-3-27b-itGoogle: Gemma 3 27B1741756359Gemma 3 introduces multimodality, supporting v...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Geminigemma0.00000010.000000200.000025600131072.016384.0FalseNaNNaN
95thedrummer/anubis-pro-105b-v1TheDrummer: Anubis Pro 105B V11741642290Anubis Pro 105B v1 is an expanded and refined ...131072None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]OtherNone0.00000080.0000010000131072.0131072.0FalseNaNNaN
96thedrummer/skyfall-36b-v2TheDrummer: Skyfall 36B V21741636566Skyfall 36B v2 is an enhanced iteration of Mis...32768None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]OtherNone0.00000050.0000008000032768.032768.0FalseNaNNaN
97microsoft/phi-4-multimodal-instructMicrosoft: Phi 4 Multimodal Instruct1741396284Phi-4 Multimodal Instruct is a versatile 5.6B ...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]OtherNone0.000000050.000000100.0001768500131072.0NaNFalseNaNNaN
98perplexity/sonar-reasoning-proPerplexity: Sonar Reasoning Pro1741313308Note: Sonar Pro pricing includes Perplexity se...128000None[max_tokens, temperature, top_p, reasoning, in...text+image->text[text, image][text]Otherdeepseek-r10.0000020.000008000.0050128000.0NaNFalseNaNNaN
99perplexity/sonar-proPerplexity: Sonar Pro1741312423Note: Sonar Pro pricing includes Perplexity se...200000None[max_tokens, temperature, top_p, web_search_op...text+image->text[text, image][text]OtherNone0.0000030.000015000.0050200000.08000.0FalseNaNNaN
100perplexity/sonar-deep-researchPerplexity: Sonar Deep Research1741311246Sonar Deep Research is a research-focused mode...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.0000020.000008000.0050.000003128000.0NaNFalseNaNNaN
101deepseek/deepseek-r1-zero:freeDeepSeek: DeepSeek R1 Zero (free)1741297434DeepSeek-R1-Zero is a model trained via large-...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r1000000163840.0NaNFalseNaNNaN
102qwen/qwq-32b:freeQwen: QwQ 32B (free)1741208814QwQ is the reasoning model of the Qwen series....40000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwenqwq00000040000.040000.0FalseNaNNaN
103qwen/qwq-32bQwen: QwQ 32B1741208814QwQ is the reasoning model of the Qwen series....131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwenqwq0.000000150.00000020000131072.0NaNFalseNaNNaN
104moonshotai/moonlight-16b-a3b-instruct:freeMoonshot AI: Moonlight 16B A3B Instruct (free)1740719801Moonlight-16B-A3B-Instruct is a 16B-parameter ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0000008192.0NaNFalseNaNNaN
105nousresearch/deephermes-3-llama-3-8b-preview:freeNous: DeepHermes 3 Llama 3 8B Preview (free)1740719372DeepHermes 3 Preview is the latest version of ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone000000131072.0NaNFalseNaNNaN
106openai/gpt-4.5-previewOpenAI: GPT-4.5 (Preview)1740687810GPT-4.5 (Preview) is a research preview of Ope...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GPTNone0.0000750.0001500.10837500128000.016384.0True0.0000375NaN
107google/gemini-2.0-flash-lite-001Google: Gemini 2.0 Flash Lite1740506212Gemini 2.0 Flash Lite offers a significantly f...1048576None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GeminiNone0.0000000750.000000300001048576.08192.0FalseNaNNaN
108anthropic/claude-3.7-sonnetAnthropic: Claude 3.7 Sonnet1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.064000.0False0.00000030.00000375
109anthropic/claude-3.7-sonnet:thinkingAnthropic: Claude 3.7 Sonnet (thinking)1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.064000.0False0.00000030.00000375
110anthropic/claude-3.7-sonnet:betaAnthropic: Claude 3.7 Sonnet (self-moderated)1740422110Claude 3.7 Sonnet is an advanced large languag...200000None[max_tokens, temperature, stop, reasoning, inc...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.0128000.0False0.00000030.00000375
111perplexity/r1-1776Perplexity: R1 17761740004929R1 1776 is a version of DeepSeek-R1 that has b...128000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r10.0000020.0000080000128000.0NaNFalseNaNNaN
112mistralai/mistral-sabaMistral: Saba1739803239Mistral Saba is a 24B-parameter language model...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000020.0000006000032768.0NaNFalseNaNNaN
113cognitivecomputations/dolphin3.0-r1-mistral-24...Dolphin3.0 R1 Mistral 24B (free)1739462498Dolphin 3.0 R1 is the next generation of the D...32768None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r100000032768.0NaNFalseNaNNaN
114cognitivecomputations/dolphin3.0-mistral-24b:freeDolphin3.0 Mistral 24B (free)1739462019Dolphin 3.0 is the next generation of the Dolp...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone00000032768.0NaNFalseNaNNaN
115meta-llama/llama-guard-3-8bLlama Guard 3 8B1739401318Llama Guard 3 is a Llama-3.1-8B pretrained mod...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.000000020.000000060000131072.0NaNFalseNaNNaN
116openai/o3-mini-highOpenAI: o3 Mini High1739372611OpenAI o3-mini-high is the same model as [o3-m...200000None[tools, tool_choice, seed, max_tokens, respons...text->text[text][text]OtherNone0.00000110.00000440000200000.0100000.0True0.00000055NaN
117deepseek/deepseek-r1-distill-llama-8bDeepSeek: R1 Distill Llama 8B1738937718DeepSeek R1 Distill Llama 8B is a distilled la...32000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10.000000040.00000004000032000.032000.0FalseNaNNaN
118google/gemini-2.0-flash-001Google: Gemini 2.0 Flash1738769413Gemini Flash 2.0 offers a significantly faster...1000000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GeminiNone0.00000010.000000400.0000258001000000.08192.0False0.0000000250.0000001833
119qwen/qwen-vl-plusQwen: Qwen VL Plus1738731255Qwen's Enhanced Large Visual Language Model. S...7500None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0.000000210.0000006300.0002688007500.01500.0FalseNaNNaN
120aion-labs/aion-1.0AionLabs: Aion-1.01738697557Aion-1.0 is a multi-model system designed for ...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.0000040.0000080000131072.032768.0FalseNaNNaN
121aion-labs/aion-1.0-miniAionLabs: Aion-1.0-Mini1738697107Aion-1.0-Mini 32B parameter model is a distill...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]OtherNone0.00000070.00000140000131072.032768.0FalseNaNNaN
122aion-labs/aion-rp-llama-3.1-8bAionLabs: Aion-RP 1.0 (8B)1738696718Aion-RP-Llama-3.1-8B ranks the highest in the ...32768None[max_tokens, temperature, top_p]text->text[text][text]OtherNone0.00000020.0000002000032768.032768.0FalseNaNNaN
123qwen/qwen-vl-maxQwen: Qwen VL Max1738434304Qwen VL Max is a visual understanding model wi...7500None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone0.00000080.000003200.001024007500.01500.0FalseNaNNaN
124qwen/qwen-turboQwen: Qwen-Turbo1738410974Qwen-Turbo, based on Qwen2.5, is a 1M context ...1000000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.000000050.000000200001000000.08192.0FalseNaNNaN
125qwen/qwen2.5-vl-72b-instruct:freeQwen: Qwen2.5 VL 72B Instruct (free)1738410311Qwen2.5-VL is proficient in recognizing common...131072None[max_tokens, temperature, top_p, seed, respons...text+image->text[text, image][text]QwenNone000000131072.02048.0FalseNaNNaN
126qwen/qwen2.5-vl-72b-instructQwen: Qwen2.5 VL 72B Instruct1738410311Qwen2.5-VL is proficient in recognizing common...32000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.000000250.00000075000032000.0NaNFalseNaNNaN
127qwen/qwen-plusQwen: Qwen-Plus1738409840Qwen-Plus, based on the Qwen2.5 foundation mod...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.00000040.00000120000131072.08192.0FalseNaNNaN
128qwen/qwen-maxQwen: Qwen-Max1738402289Qwen-Max, based on Qwen2.5, provides the best ...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]QwenNone0.00000160.0000064000032768.08192.0FalseNaNNaN
129openai/o3-miniOpenAI: o3 Mini1738351721OpenAI o3-mini is a cost-efficient language mo...200000None[tools, tool_choice, seed, max_tokens, respons...text->text[text][text]OtherNone0.00000110.00000440000200000.0100000.0True0.00000055NaN
130deepseek/deepseek-r1-distill-qwen-1.5bDeepSeek: R1 Distill Qwen 1.5B1738328067DeepSeek R1 Distill Qwen 1.5B is a distilled l...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.000000180.000000180000131072.032768.0FalseNaNNaN
131mistralai/mistral-small-24b-instruct-2501:freeMistral: Mistral Small 3 (free)1738255409Mistral Small 3 is a 24B-parameter language mo...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]MistralNone00000032768.0NaNFalseNaNNaN
132mistralai/mistral-small-24b-instruct-2501Mistral: Mistral Small 31738255409Mistral Small 3 is a 24B-parameter language mo...28000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]MistralNone0.000000060.00000012000028000.014000.0FalseNaNNaN
133deepseek/deepseek-r1-distill-qwen-32b:freeDeepSeek: R1 Distill Qwen 32B (free)1738194830DeepSeek R1 Distill Qwen 32B is a distilled la...16000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r100000016000.016000.0FalseNaNNaN
134deepseek/deepseek-r1-distill-qwen-32bDeepSeek: R1 Distill Qwen 32B1738194830DeepSeek R1 Distill Qwen 32B is a distilled la...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r10.000000120.000000180000131072.016384.0FalseNaNNaN
135deepseek/deepseek-r1-distill-qwen-14b:freeDeepSeek: R1 Distill Qwen 14B (free)1738193940DeepSeek R1 Distill Qwen 14B is a distilled la...64000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r100000064000.0NaNFalseNaNNaN
136deepseek/deepseek-r1-distill-qwen-14bDeepSeek: R1 Distill Qwen 14B1738193940DeepSeek R1 Distill Qwen 14B is a distilled la...64000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Qwendeepseek-r10.000000150.00000015000064000.064000.0FalseNaNNaN
137perplexity/sonar-reasoningPerplexity: Sonar Reasoning1738131107Sonar Reasoning is a reasoning model provided ...127000None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Otherdeepseek-r10.0000010.0000050.005000127000.0NaNFalseNaNNaN
138perplexity/sonarPerplexity: Sonar1738013808Sonar is lightweight, affordable, fast, and si...127072None[max_tokens, temperature, top_p, web_search_op...text+image->text[text, image][text]OtherNone0.0000010.0000010.005000127072.0NaNFalseNaNNaN
139liquid/lfm-7bLiquid: LFM 7B1737806883LFM-7B, a new best-in-class language model. LF...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000010.00000001000032768.0NaNFalseNaNNaN
140liquid/lfm-3bLiquid: LFM 3B1737806501Liquid's LFM 3B delivers incredible performanc...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000020.00000002000032768.0NaNFalseNaNNaN
141deepseek/deepseek-r1-distill-llama-70b:freeDeepSeek: R1 Distill Llama 70B (free)1737663169DeepSeek R1 Distill Llama 70B is a distilled l...8192None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10000008192.04096.0FalseNaNNaN
142deepseek/deepseek-r1-distill-llama-70bDeepSeek: R1 Distill Llama 70B1737663169DeepSeek R1 Distill Llama 70B is a distilled l...131072None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]Llama3deepseek-r10.00000010.00000040000131072.016384.0FalseNaNNaN
143deepseek/deepseek-r1:freeDeepSeek: R1 (free)1737381095DeepSeek R1 is here: Performance on par with [...163840None[max_tokens, reasoning, include_reasoning, tem...text->text[text][text]DeepSeekdeepseek-r1000000163840.0NaNFalseNaNNaN
144deepseek/deepseek-r1DeepSeek: R11737381095DeepSeek R1 is here: Performance on par with [...163840None[max_tokens, temperature, top_p, reasoning, in...text->text[text][text]DeepSeekdeepseek-r10.00000050.000002180000163840.0163840.0FalseNaNNaN
145minimax/minimax-01MiniMax: MiniMax-011736915462MiniMax-01 is a combines MiniMax-Text-01 for t...1000192None[max_tokens, temperature, top_p]text+image->text[text, image][text]OtherNone0.00000020.000001100001000192.01000192.0FalseNaNNaN
146mistralai/codestral-2501Mistral: Codestral 25011736895522[Mistral](/mistralai)'s cutting-edge language ...262144None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000030.00000090000262144.0NaNFalseNaNNaN
147microsoft/phi-4Microsoft: Phi 41736489872[Microsoft Research](/microsoft) Phi-4 is desi...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000070.00000014000016384.016384.0FalseNaNNaN
148deepseek/deepseek-chat:freeDeepSeek: DeepSeek V3 (free)1735241320DeepSeek-V3 is the latest model from the DeepS...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone000000163840.0NaNFalseNaNNaN
149deepseek/deepseek-chatDeepSeek: DeepSeek V31735241320DeepSeek-V3 is the latest model from the DeepS...163840None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]DeepSeekNone0.000000380.000000890000163840.0163840.0FalseNaNNaN
150sao10k/l3.3-euryale-70bSao10K: Llama 3.3 Euryale 70B1734535928Euryale L3.3 70B is a model focused on creativ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000070.00000080000131072.016384.0FalseNaNNaN
151openai/o1OpenAI: o11734459999The latest and strongest model family from Ope...200000None[tools, tool_choice, seed, max_tokens, respons...text+image->text[text, image][text]GPTNone0.0000150.0000600.02167500200000.0100000.0True0.0000075NaN
152eva-unit-01/eva-llama-3.33-70bEVA Llama 3.33 70B1734377303EVA Llama 3.33 70b is a roleplay and storywrit...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.0000040.000006000016384.04096.0FalseNaNNaN
153x-ai/grok-2-vision-1212xAI: Grok 2 Vision 12121734237338Grok 2 Vision 1212 advances image-based AI wit...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GrokNone0.0000020.0000100.00360032768.0NaNFalseNaNNaN
154x-ai/grok-2-1212xAI: Grok 2 12121734232814Grok 2 1212 introduces significant enhancement...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000020.000010000131072.0NaNFalseNaNNaN
155cohere/command-r7b-12-2024Cohere: Command R7B (12-2024)1734158152Command R7B (12-2024) is a small, fast update ...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]CohereNone0.00000003750.000000150000128000.04000.0FalseNaNNaN
156google/gemini-2.0-flash-exp:freeGoogle: Gemini 2.0 Flash Experimental (free)1733937523Gemini Flash 2.0 offers a significantly faster...1048576None[max_tokens, temperature, top_p, stop]text+image->text[text, image][text]GeminiNone0000001048576.08192.0FalseNaNNaN
157meta-llama/llama-3.3-70b-instruct:freeMeta: Llama 3.3 70B Instruct (free)1733506137The Meta Llama 3.3 multilingual large language...8000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30000008000.08000.0FalseNaNNaN
158meta-llama/llama-3.3-70b-instructMeta: Llama 3.3 70B Instruct1733506137The Meta Llama 3.3 multilingual large language...131000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000090.000000350000131000.0131000.0FalseNaNNaN
159amazon/nova-lite-v1Amazon: Nova Lite 1.01733437363Amazon Nova Lite 1.0 is a very low-cost multim...300000None[tools, max_tokens, temperature, top_p, top_k,...text+image->text[text, image][text]NovaNone0.000000060.0000002400.0000900300000.05120.0TrueNaNNaN
160amazon/nova-micro-v1Amazon: Nova Micro 1.01733437237Amazon Nova Micro 1.0 is a text-only model tha...128000None[tools, max_tokens, temperature, top_p, top_k,...text->text[text][text]NovaNone0.0000000350.000000140000128000.05120.0TrueNaNNaN
161amazon/nova-pro-v1Amazon: Nova Pro 1.01733436303Amazon Nova Pro 1.0 is a capable multimodal mo...300000None[tools, max_tokens, temperature, top_p, top_k,...text+image->text[text, image][text]NovaNone0.00000080.000003200.001200300000.05120.0TrueNaNNaN
162qwen/qwq-32b-preview:freeQwen: QwQ 32B Preview (free)1732754541QwQ-32B-Preview is an experimental research mo...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwendeepseek-r100000016384.0NaNFalseNaNNaN
163qwen/qwq-32b-previewQwen: QwQ 32B Preview1732754541QwQ-32B-Preview is an experimental research mo...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwendeepseek-r10.000000090.00000027000032768.0NaNFalseNaNNaN
164google/learnlm-1.5-pro-experimental:freeGoogle: LearnLM 1.5 Pro Experimental (free)1732216551An experimental version of [Gemini 1.5 Pro](/g...40960None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone00000040960.08192.0FalseNaNNaN
165eva-unit-01/eva-qwen-2.5-72bEVA Qwen2.5 72B1732210606EVA Qwen2.5 72B is a roleplay and storywriting...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000040.000006000016384.04096.0FalseNaNNaN
166openai/gpt-4o-2024-11-20OpenAI: GPT-4o (2024-11-20)1732127594The 2024-11-20 version of GPT-4o offers a leve...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
167mistralai/mistral-large-2411Mistral Large 24111731978685Mistral Large 2 2411 is an update of [Mistral ...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000131072.0NaNFalseNaNNaN
168mistralai/mistral-large-2407Mistral Large 24071731978415This is Mistral AI's flagship model, Mistral L...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000131072.0NaNFalseNaNNaN
169mistralai/pixtral-large-2411Mistral: Pixtral Large 24111731977388Pixtral Large is a 124B parameter, open-weight...131072None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]MistralNone0.0000020.00000600.00288800131072.0NaNFalseNaNNaN
170x-ai/grok-vision-betaxAI: Grok Vision Beta1731976624Grok Vision Beta is xAI's experimental languag...8192None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GrokNone0.0000050.00001500.009008192.0NaNFalseNaNNaN
171infermatic/mn-inferor-12bInfermatic: Mistral Nemo Inferor 12B1731464428Inferor 12B is a merge of top roleplay models,...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.00000080.0000012000016384.04096.0FalseNaNNaN
172qwen/qwen-2.5-coder-32b-instruct:freeQwen2.5 Coder 32B Instruct (free)1731368400Qwen2.5-Coder is the latest series of Code-Spe...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.0NaNFalseNaNNaN
173qwen/qwen-2.5-coder-32b-instructQwen2.5 Coder 32B Instruct1731368400Qwen2.5-Coder is the latest series of Code-Spe...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000060.00000015000032768.016384.0FalseNaNNaN
174raifle/sorcererlm-8x22bSorcererLM 8x22B1731105083SorcererLM is an advanced RP and storytelling ...16000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralvicuna0.00000450.0000045000016000.0NaNFalseNaNNaN
175eva-unit-01/eva-qwen-2.5-32bEVA Qwen2.5 32B1731104847EVA Qwen2.5 32B is a roleplaying/storywriting ...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000260.0000034000016384.04096.0FalseNaNNaN
176thedrummer/unslopnemo-12bUnslopnemo 12B1731103448UnslopNemo v4.1 is the latest addition from th...32000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.000000450.00000045000032000.016000.0FalseNaNNaN
177anthropic/claude-3.5-haiku:betaAnthropic: Claude 3.5 Haiku (self-moderated)1730678400Claude 3.5 Haiku features offers enhanced capa...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0False0.000000080.000001
178anthropic/claude-3.5-haikuAnthropic: Claude 3.5 Haiku1730678400Claude 3.5 Haiku features offers enhanced capa...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0True0.000000080.000001
179anthropic/claude-3.5-haiku-20241022:betaAnthropic: Claude 3.5 Haiku (2024-10-22) (self...1730678400Claude 3.5 Haiku features enhancements across ...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0False0.000000080.000001
180anthropic/claude-3.5-haiku-20241022Anthropic: Claude 3.5 Haiku (2024-10-22)1730678400Claude 3.5 Haiku features enhancements across ...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.00000080.0000040000200000.08192.0True0.000000080.000001
181neversleep/llama-3.1-lumimaid-70bNeverSleep: Lumimaid v0.2 70B1729555200Lumimaid v0.2 70B is a finetune of [Llama 3.1 ...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000150.00000225000016384.02048.0FalseNaNNaN
182anthracite-org/magnum-v4-72bMagnum v4 72B1729555200This is a series of models designed to replica...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000150.00000225000016384.01024.0FalseNaNNaN
183anthropic/claude-3.5-sonnet:betaAnthropic: Claude 3.5 Sonnet (self-moderated)1729555200New Claude 3.5 Sonnet delivers better-than-Opu...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0False0.00000030.00000375
184anthropic/claude-3.5-sonnetAnthropic: Claude 3.5 Sonnet1729555200New Claude 3.5 Sonnet delivers better-than-Opu...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0True0.00000030.00000375
185x-ai/grok-betaxAI: Grok Beta1729382400Grok Beta is xAI's experimental language model...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GrokNone0.0000050.0000150000131072.0NaNFalseNaNNaN
186mistralai/ministral-8bMistral: Ministral 8B1729123200Ministral 8B is an 8B parameter model featurin...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000010.00000010000128000.0NaNFalseNaNNaN
187mistralai/ministral-3bMistral: Ministral 3B1729123200Ministral 3B is a 3B parameter model optimized...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000040.000000040000131072.0NaNFalseNaNNaN
188qwen/qwen-2.5-7b-instruct:freeQwen2.5 7B Instruct (free)1729036800Qwen2.5 7B is the latest series of Qwen large ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.032768.0FalseNaNNaN
189qwen/qwen-2.5-7b-instructQwen2.5 7B Instruct1729036800Qwen2.5 7B is the latest series of Qwen large ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000050.0000001000032768.016384.0FalseNaNNaN
190nvidia/llama-3.1-nemotron-70b-instructNVIDIA: Llama 3.1 Nemotron 70B Instruct1728950400NVIDIA's Llama 3.1 Nemotron 70B is a language ...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000120.00000030000131072.0131072.0FalseNaNNaN
191inflection/inflection-3-productivityInflection: Inflection 3 Productivity1728604800Inflection 3 Productivity is optimized for fol...8000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000250.0000100008000.01024.0FalseNaNNaN
192inflection/inflection-3-piInflection: Inflection 3 Pi1728604800Inflection 3 Pi powers Inflection's [Pi](https...8000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000250.0000100008000.01024.0FalseNaNNaN
193google/gemini-flash-1.5-8bGoogle: Gemini 1.5 Flash 8B1727913600Gemini Flash 1.5 8B is optimized for speed and...1000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.00000003750.0000001500001000000.08192.0False0.000000010.0000000583
194thedrummer/rocinante-12bRocinante 12B1727654400Rocinante 12B is designed for engaging storyte...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.000000250.0000005000032768.0NaNFalseNaNNaN
195anthracite-org/magnum-v2-72bMagnum v2 72B1727654400From the maker of [Goliath](https://openrouter...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000030.000003000032768.0NaNFalseNaNNaN
196liquid/lfm-40bLiquid: LFM 40B MoE1727654400Liquid's 40.3B Mixture of Experts (MoE) model....32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherchatml0.000000150.00000015000032768.0NaNFalseNaNNaN
197meta-llama/llama-3.2-3b-instruct:freeMeta: Llama 3.2 3B Instruct (free)1727222400Llama 3.2 3B is a 3-billion-parameter multilin...20000None[max_tokens, temperature, top_p]text->text[text][text]Llama3llama300000020000.020000.0FalseNaNNaN
198meta-llama/llama-3.2-3b-instructMeta: Llama 3.2 3B Instruct1727222400Llama 3.2 3B is a 3-billion-parameter multilin...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000010.000000020000131072.016384.0FalseNaNNaN
199meta-llama/llama-3.2-1b-instruct:freeMeta: Llama 3.2 1B Instruct (free)1727222400Llama 3.2 1B is a 1-billion-parameter language...131000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama3000000131000.0NaNFalseNaNNaN
200meta-llama/llama-3.2-1b-instructMeta: Llama 3.2 1B Instruct1727222400Llama 3.2 1B is a 1-billion-parameter language...131072None[max_tokens, temperature, top_p, top_k, stop, ...text->text[text][text]Llama3llama30.0000000050.000000010000131072.0NaNFalseNaNNaN
201meta-llama/llama-3.2-90b-vision-instructMeta: Llama 3.2 90B Vision Instruct1727222400The Llama 90B Vision model is a top-tier, 90-b...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama30.00000120.000001200.00173400131072.02048.0FalseNaNNaN
202meta-llama/llama-3.2-11b-vision-instruct:freeMeta: Llama 3.2 11B Vision Instruct (free)1727222400Llama 3.2 11B Vision is a multimodal model wit...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama3000000131072.02048.0FalseNaNNaN
203meta-llama/llama-3.2-11b-vision-instructMeta: Llama 3.2 11B Vision Instruct1727222400Llama 3.2 11B Vision is a multimodal model wit...131072None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]Llama3llama30.0000000490.00000004900.0000794800131072.016384.0FalseNaNNaN
204qwen/qwen-2.5-72b-instruct:freeQwen2.5 72B Instruct (free)1726704000Qwen2.5 72B is the latest series of Qwen large...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml00000032768.0NaNFalseNaNNaN
205qwen/qwen-2.5-72b-instructQwen2.5 72B Instruct1726704000Qwen2.5 72B is the latest series of Qwen large...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Qwenchatml0.000000120.00000039000032768.016384.0FalseNaNNaN
206qwen/qwen-2.5-vl-72b-instructQwen: Qwen2.5-VL 72B Instruct1726617600Qwen2.5 VL 72B is a multimodal LLM from the Qw...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000060.000000600.0005780032768.0NaNFalseNaNNaN
207neversleep/llama-3.1-lumimaid-8bNeverSleep: Lumimaid v0.2 8B1726358400Lumimaid v0.2 8B is a finetune of [Llama 3.1 8...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000032768.02048.0FalseNaNNaN
208openai/o1-previewOpenAI: o1-preview1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.0000150.000060000128000.032768.0True0.0000075NaN
209openai/o1-preview-2024-09-12OpenAI: o1-preview (2024-09-12)1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.0000150.000060000128000.032768.0True0.0000075NaN
210openai/o1-miniOpenAI: o1-mini1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.00000110.00000440000128000.065536.0True0.00000055NaN
211openai/o1-mini-2024-09-12OpenAI: o1-mini (2024-09-12)1726099200The latest and strongest model family from Ope...128000None[seed, max_tokens]text->text[text][text]GPTNone0.00000110.00000440000128000.065536.0True0.00000055NaN
212mistralai/pixtral-12bMistral: Pixtral 12B1725926400The first multi-modal, text+image-to-text mode...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]MistralNone0.00000010.000000100.00014450032768.0NaNFalseNaNNaN
213cohere/command-r-plus-08-2024Cohere: Command R+ (08-2024)1724976000command-r-plus-08-2024 is an update of the [Co...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000250.000010000128000.04000.0FalseNaNNaN
214cohere/command-r-08-2024Cohere: Command R (08-2024)1724976000command-r-08-2024 is an update of the [Command...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.000000150.00000060000128000.04000.0FalseNaNNaN
215qwen/qwen-2.5-vl-7b-instruct:freeQwen: Qwen2.5-VL 7B Instruct (free)1724803200Qwen2.5 VL 7B is a multimodal LLM from the Qwe...64000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone00000064000.064000.0FalseNaNNaN
216qwen/qwen-2.5-vl-7b-instructQwen: Qwen2.5-VL 7B Instruct1724803200Qwen2.5 VL 7B is a multimodal LLM from the Qwe...32768None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]QwenNone0.00000020.000000200.00014450032768.0NaNFalseNaNNaN
217sao10k/l3.1-euryale-70bSao10K: Llama 3.1 Euryale 70B v2.21724803200Euryale L3.1 70B v2.2 is a model focused on cr...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000070.00000080000131072.016384.0FalseNaNNaN
218google/gemini-flash-1.5-8b-expGoogle: Gemini 1.5 Flash 8B Experimental1724803200Gemini Flash 1.5 8B Experimental is an experim...1000000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GeminiNone0000001000000.08192.0FalseNaNNaN
219microsoft/phi-3.5-mini-128k-instructMicrosoft: Phi-3.5 Mini 128K Instruct1724198400Phi-3.5 models are lightweight, state-of-the-a...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.000000030.000000090000131072.0NaNFalseNaNNaN
220nousresearch/hermes-3-llama-3.1-70bNous: Hermes 3 70B Instruct1723939200Hermes 3 is a generalist language model with m...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.000000120.00000030000131072.0131072.0FalseNaNNaN
221nousresearch/hermes-3-llama-3.1-405bNous: Hermes 3 405B Instruct1723766400Hermes 3 is a generalist language model with m...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.00000080.00000080000131072.0131072.0FalseNaNNaN
222openai/chatgpt-4o-latestOpenAI: ChatGPT-4o1723593600OpenAI ChatGPT 4o is continually updated by Op...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GPTNone0.0000050.00001500.00722500128000.016384.0TrueNaNNaN
223sao10k/l3-lunaris-8bSao10K: Llama 3 8B Lunaris1723507200Lunaris 8B is a versatile generalist and rolep...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000020.0000000500008192.0NaNFalseNaNNaN
224aetherwiing/mn-starcannon-12bAetherwiing: Starcannon 12B1723507200Starcannon 12B v2 is a creative roleplay and s...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000080.0000012000016384.04096.0FalseNaNNaN
225openai/gpt-4o-2024-08-06OpenAI: GPT-4o (2024-08-06)1722902400The 2024-08-06 version of GPT-4o offers improv...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
226meta-llama/llama-3.1-405b:freeMeta: Llama 3.1 405B (base) (free)1722556800Meta's latest class of model (Llama 3.1) launc...64000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none00000064000.0NaNFalseNaNNaN
227meta-llama/llama-3.1-405bMeta: Llama 3.1 405B (base)1722556800Meta's latest class of model (Llama 3.1) launc...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.0000020.000002000032768.0NaNFalseNaNNaN
228nothingiisreal/mn-celeste-12bMistral Nemo 12B Celeste1722556800A specialized story writing and roleplaying mo...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000080.0000012000016384.04096.0FalseNaNNaN
229perplexity/llama-3.1-sonar-small-128k-onlinePerplexity: Llama 3.1 Sonar 8B Online1722470400Llama 3.1 Sonar is Perplexity's latest model f...127072None[max_tokens, temperature, top_p, top_k, freque...text->text[text][text]Llama3None0.00000020.00000020.005000127072.0NaNFalseNaNNaN
230perplexity/llama-3.1-sonar-large-128k-onlinePerplexity: Llama 3.1 Sonar 70B Online1722470400Llama 3.1 Sonar is Perplexity's latest model f...127072None[max_tokens, temperature, top_p, top_k, freque...text->text[text][text]Llama3None0.0000010.0000010.005000127072.0NaNFalseNaNNaN
231meta-llama/llama-3.1-8b-instruct:freeMeta: Llama 3.1 8B Instruct (free)1721692800Meta's latest class of model (Llama 3.1) launc...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama3000000131072.04096.0FalseNaNNaN
232meta-llama/llama-3.1-8b-instructMeta: Llama 3.1 8B Instruct1721692800Meta's latest class of model (Llama 3.1) launc...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000020.00000003000016384.016384.0FalseNaNNaN
233meta-llama/llama-3.1-405b-instructMeta: Llama 3.1 405B Instruct1721692800The highly anticipated 400B class of Llama3 is...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Llama3llama30.00000080.0000008000032768.016384.0FalseNaNNaN
234meta-llama/llama-3.1-70b-instructMeta: Llama 3.1 70B Instruct1721692800Meta's latest class of model (Llama 3.1) launc...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Llama3llama30.00000010.000000280000131072.016384.0FalseNaNNaN
235mistralai/codestral-mambaMistral: Codestral Mamba1721347200A 7.3B parameter Mamba-based model designed fo...262144None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000250.000000250000262144.0NaNFalseNaNNaN
236mistralai/mistral-nemo:freeMistral: Mistral Nemo (free)1721347200A 12B parameter model with a 128k token contex...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral000000128000.0128000.0FalseNaNNaN
237mistralai/mistral-nemoMistral: Mistral Nemo1721347200A 12B parameter model with a 128k token contex...98304None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.000000030.00000007000098304.049152.0FalseNaNNaN
238openai/gpt-4o-miniOpenAI: GPT-4o-mini1721260800GPT-4o mini is OpenAI's newest model after [GP...128000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image, file][text]GPTNone0.000000150.000000600.00021700128000.016384.0True0.000000075NaN
239openai/gpt-4o-mini-2024-07-18OpenAI: GPT-4o-mini (2024-07-18)1721260800GPT-4o mini is OpenAI's newest model after [GP...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.000000150.000000600.00722500128000.016384.0True0.000000075NaN
240google/gemma-2-27b-itGoogle: Gemma 2 27B1720828800Gemma 2 27B by Google is an open model built f...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0.00000010.000000300008192.0NaNFalseNaNNaN
241alpindale/magnum-72bMagnum 72B1720656000From the maker of [Goliath](https://openrouter...16384None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.0000040.000006000016384.04096.0FalseNaNNaN
242google/gemma-2-9b-it:freeGoogle: Gemma 2 9B (free)1719532800Gemma 2 9B by Google is an advanced, open-sour...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0000008192.08192.0FalseNaNNaN
243google/gemma-2-9b-itGoogle: Gemma 2 9B1719532800Gemma 2 9B by Google is an advanced, open-sour...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Geminigemma0.000000020.0000000600008192.0NaNFalseNaNNaN
24401-ai/yi-large01.AI: Yi Large1719273600The Yi Large model was designed by 01.AI with ...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]YiNone0.0000030.000003000032768.04096.0FalseNaNNaN
245ai21/jamba-instructAI21: Jamba Instruct1719273600The Jamba-Instruct model, introduced by AI21 L...256000None[max_tokens, temperature, top_p, stop]text->text[text][text]OtherNone0.00000050.00000070000256000.04096.0FalseNaNNaN
246anthropic/claude-3.5-sonnet-20240620:betaAnthropic: Claude 3.5 Sonnet (2024-06-20) (sel...1718841600Claude 3.5 Sonnet delivers better-than-Opus ca...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0False0.00000030.00000375
247anthropic/claude-3.5-sonnet-20240620Anthropic: Claude 3.5 Sonnet (2024-06-20)1718841600Claude 3.5 Sonnet delivers better-than-Opus ca...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.08192.0True0.00000030.00000375
248sao10k/l3-euryale-70bSao10k: Llama 3 Euryale 70B v2.11718668800Euryale 70B v2.1 is a model focused on creativ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000001480.0000014800008192.08192.0FalseNaNNaN
249cognitivecomputations/dolphin-mixtral-8x22bDolphin 2.9.2 Mixtral 8x22B 🐬1717804800Dolphin 2.9 is designed for instruction follow...16000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000090.0000009000016000.0NaNFalseNaNNaN
250qwen/qwen-2-72b-instructQwen 2 72B Instruct1717718400Qwen2 72B is a transformer-based model that ex...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Qwenchatml0.00000090.0000009000032768.04096.0FalseNaNNaN
251mistralai/mistral-7b-instruct:freeMistral: Mistral 7B Instruct (free)1716768000A high-performing, industry-standard 7.3B para...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral00000032768.016384.0FalseNaNNaN
252mistralai/mistral-7b-instructMistral: Mistral 7B Instruct1716768000A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.0000000280.000000054000032768.016384.0FalseNaNNaN
253nousresearch/hermes-2-pro-llama-3-8bNousResearch: Hermes 2 Pro - Llama-3 8B1716768000Hermes 2 Pro is an upgraded, retrained version...131072None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3chatml0.0000000250.000000040000131072.0131072.0FalseNaNNaN
254mistralai/mistral-7b-instruct-v0.3Mistral: Mistral 7B Instruct v0.31716768000A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.0000000280.000000054000032768.016384.0FalseNaNNaN
255microsoft/phi-3-mini-128k-instructMicrosoft: Phi-3 Mini 128K Instruct1716681600Phi-3 Mini is a powerful 3.8B parameter model ...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.00000010.00000010000128000.0NaNFalseNaNNaN
256microsoft/phi-3-medium-128k-instructMicrosoft: Phi-3 Medium 128K Instruct1716508800Phi-3 128K Medium is a powerful 14-billion par...131072None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Otherphi30.00000010.00000030000131072.0NaNFalseNaNNaN
257neversleep/llama-3-lumimaid-70bNeverSleep: Llama 3 Lumimaid 70B1715817600The NeverSleep team is back, with a Llama 3 70...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.0000040.00000600008192.04096.0FalseNaNNaN
258deepseek/deepseek-coderDeepSeek-Coder-V21715644800DeepSeek-Coder-V2, an open-source Mixture-of-E...128000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]OtherNone0.000000040.000000120000128000.0NaNFalseNaNNaN
259google/gemini-flash-1.5Google: Gemini 1.5 Flash1715644800Gemini 1.5 Flash is a foundation model that pe...1000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.0000000750.000000300.00004001000000.08192.0False0.000000018750.0000001583
260openai/gpt-4oOpenAI: GPT-4o1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.00000250.0000100.00361300128000.016384.0True0.00000125NaN
261openai/gpt-4o:extendedOpenAI: GPT-4o (extended)1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.0000060.00001800.00722500128000.064000.0TrueNaNNaN
262meta-llama/llama-guard-2-8bMeta: LlamaGuard 2 8B1715558400This safeguard model has 8B parameters and is ...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3none0.00000020.000000200008192.0NaNFalseNaNNaN
263openai/gpt-4o-2024-05-13OpenAI: GPT-4o (2024-05-13)1715558400GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image, file][text]GPTNone0.0000050.00001500.00722500128000.04096.0TrueNaNNaN
264allenai/olmo-7b-instructOLMo 7B Instruct1715299200OLMo 7B Instruct by the Allen Institute for AI...2048None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Otherzephyr0.000000080.0000002400002048.0NaNFalseNaNNaN
265neversleep/llama-3-lumimaid-8b:extendedNeverSleep: Llama 3 Lumimaid 8B (extended)1714780800The NeverSleep team is back, with a Llama 3 8B...24576None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000024576.02048.0FalseNaNNaN
266neversleep/llama-3-lumimaid-8bNeverSleep: Llama 3 Lumimaid 8B1714780800The NeverSleep team is back, with a Llama 3 8B...24576None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.000000093750.00000075000024576.02048.0FalseNaNNaN
267sao10k/fimbulvetr-11b-v2Fimbulvetr 11B v21713657600Creative writing model, routed with permission...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000080.000001200004096.04096.0FalseNaNNaN
268meta-llama/llama-3-8b-instructMeta: Llama 3 8B Instruct1713398400Meta's latest class of model (Llama 3) launche...8192None[max_tokens, temperature, top_p, top_k, seed, ...text->text[text][text]Llama3llama30.000000030.0000000600008192.016384.0FalseNaNNaN
269meta-llama/llama-3-70b-instructMeta: Llama 3 70B Instruct1713398400Meta's latest class of model (Llama 3) launche...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama3llama30.00000030.000000400008192.016384.0FalseNaNNaN
270mistralai/mixtral-8x22b-instructMistral: Mixtral 8x22B Instruct1713312000Mistral's official instruct fine-tuned version...65536None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.00000040.0000012000065536.0NaNFalseNaNNaN
271microsoft/wizardlm-2-8x22bWizardLM-2 8x22B1713225600WizardLM-2 8x22B is Microsoft AI's most advanc...65536None[max_tokens, temperature, top_p, presence_pena...text->text[text][text]Mistralvicuna0.00000050.0000005000065536.016384.0FalseNaNNaN
272google/gemini-pro-1.5Google: Gemini 1.5 Pro1712620800Google's latest multimodal model, supports ima...2000000None[max_tokens, temperature, top_p, stop, frequen...text+image->text[text, image][text]GeminiNone0.000001250.00000500.0006575002000000.08192.0FalseNaNNaN
273openai/gpt-4-turboOpenAI: GPT-4 Turbo1712620800The latest GPT-4 Turbo model with vision capab...128000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]GPTNone0.000010.0000300.0144500128000.04096.0TrueNaNNaN
274cohere/command-r-plusCohere: Command R+1712188800Command R+ is a new, 104B-parameter LLM from C...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.0000030.0000150000128000.04000.0FalseNaNNaN
275cohere/command-r-plus-04-2024Cohere: Command R+ (04-2024)1712016000Command R+ is a new, 104B-parameter LLM from C...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.0000030.0000150000128000.04000.0FalseNaNNaN
276sophosympatheia/midnight-rose-70bMidnight Rose 70B1711065600A merge with a complex family tree, this model...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000080.000000800004096.0NaNFalseNaNNaN
277cohere/commandCohere: Command1710374400Command is an instruction-following conversati...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]CohereNone0.0000010.00000200004096.04000.0FalseNaNNaN
278cohere/command-rCohere: Command R1710374400Command-R is a 35B parameter model that perfor...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000050.00000150000128000.04000.0FalseNaNNaN
279anthropic/claude-3-haiku:betaAnthropic: Claude 3 Haiku (self-moderated)1710288000Claude 3 Haiku is Anthropic's fastest and most...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.000000250.0000012500.000400200000.04096.0False0.000000030.0000003
280anthropic/claude-3-haikuAnthropic: Claude 3 Haiku1710288000Claude 3 Haiku is Anthropic's fastest and most...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.000000250.0000012500.000400200000.04096.0True0.000000030.0000003
281anthropic/claude-3-opus:betaAnthropic: Claude 3 Opus (self-moderated)1709596800Claude 3 Opus is Anthropic's most powerful mod...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000150.00007500.02400200000.04096.0False0.00000150.00001875
282anthropic/claude-3-opusAnthropic: Claude 3 Opus1709596800Claude 3 Opus is Anthropic's most powerful mod...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000150.00007500.02400200000.04096.0True0.00000150.00001875
283anthropic/claude-3-sonnet:betaAnthropic: Claude 3 Sonnet (self-moderated)1709596800Claude 3 Sonnet is an ideal balance of intelli...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.04096.0False0.00000030.00000375
284anthropic/claude-3-sonnetAnthropic: Claude 3 Sonnet1709596800Claude 3 Sonnet is an ideal balance of intelli...200000None[tools, tool_choice, max_tokens, temperature, ...text+image->text[text, image][text]ClaudeNone0.0000030.00001500.004800200000.04096.0True0.00000030.00000375
285cohere/command-r-03-2024Cohere: Command R (03-2024)1709341200Command-R is a 35B parameter model that perfor...128000None[tools, max_tokens, temperature, top_p, stop, ...text->text[text][text]CohereNone0.00000050.00000150000128000.04000.0FalseNaNNaN
286mistralai/mistral-largeMistral Large1708905600This is Mistral AI's flagship model, Mistral L...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.0000020.0000060000128000.0NaNFalseNaNNaN
287openai/gpt-3.5-turbo-0613OpenAI: GPT-3.5 Turbo (older v0613)1706140800GPT-3.5 Turbo is OpenAI's fastest model. It ca...4095None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000010.00000200004095.04096.0FalseNaNNaN
288openai/gpt-4-turbo-previewOpenAI: GPT-4 Turbo Preview1706140800The preview GPT-4 model with improved instruct...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000010.000030000128000.04096.0TrueNaNNaN
289nousresearch/nous-hermes-2-mixtral-8x7b-dpoNous: Hermes 2 Mixtral 8x7B DPO1705363200Nous Hermes 2 Mixtral 8x7B DPO is the new flag...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralchatml0.00000060.0000006000032768.02048.0FalseNaNNaN
290mistralai/mistral-mediumMistral Medium1704844800This is Mistral AI's closed-source, medium-sid...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000002750.0000081000032768.0NaNFalseNaNNaN
291mistralai/mistral-smallMistral Small1704844800With 22 billion parameters, Mistral Small v24....32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.00000020.0000006000032768.0NaNFalseNaNNaN
292mistralai/mistral-tinyMistral Tiny1704844800Note: This model is being deprecated. Recommen...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]MistralNone0.000000250.00000025000032768.0NaNFalseNaNNaN
293mistralai/mistral-7b-instruct-v0.2Mistral: Mistral 7B Instruct v0.21703721600A high-performing, industry-standard 7.3B para...32768None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralmistral0.00000020.0000002000032768.0NaNFalseNaNNaN
294mistralai/mixtral-8x7b-instructMistral: Mixtral 8x7B Instruct1702166400Mixtral 8x7B Instruct is a pretrained generati...32768None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.000000080.00000024000032768.0NaNFalseNaNNaN
295neversleep/noromaid-20bNoromaid 20B1700956800A collab between IkariDev and Undi. This merge...8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.000000750.000001500008192.02048.0FalseNaNNaN
296anthropic/claude-2.1:betaAnthropic: Claude v2.1 (self-moderated)1700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0FalseNaNNaN
297anthropic/claude-2.1Anthropic: Claude v2.11700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0TrueNaNNaN
298anthropic/claude-2:betaAnthropic: Claude v2 (self-moderated)1700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0FalseNaNNaN
299anthropic/claude-2Anthropic: Claude v21700611200Claude 2 delivers advancements in key capabili...200000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000200000.04096.0TrueNaNNaN
300undi95/toppy-m-7bToppy M 7B1699574400A wild 7B parameter model that merges several ...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Mistralalpaca0.00000080.000001200004096.04096.0FalseNaNNaN
301alpindale/goliath-120bGoliath 120B1699574400A large LLM created by combining two fine-tune...6144None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000656250.00000937500006144.0512.0FalseNaNNaN
302openrouter/autoAuto Router1699401600Your prompt will be processed by a meta-model ...2000000None[]text->text[text][text]RouterNone-1-1NaNNaNNaNNaNNaNNaNFalseNaNNaN
303openai/gpt-3.5-turbo-1106OpenAI: GPT-3.5 Turbo 16k (older v1106)1699228800An older GPT-3.5 Turbo model with improved ins...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000010.000002000016385.04096.0TrueNaNNaN
304openai/gpt-4-1106-previewOpenAI: GPT-4 Turbo (older v1106)1699228800The latest GPT-4 Turbo model with vision capab...128000None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000010.000030000128000.04096.0TrueNaNNaN
305jondurbin/airoboros-l2-70bAiroboros 70B1698537600A Llama 2 70B fine-tune using synthetic data (...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2airoboros0.00000050.000000500004096.0NaNFalseNaNNaN
306openai/gpt-3.5-turbo-instructOpenAI: GPT-3.5 Turbo Instruct1695859200This model is a variant of GPT-3.5 Turbo tuned...4095None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]GPTchatml0.00000150.00000200004095.04096.0TrueNaNNaN
307mistralai/mistral-7b-instruct-v0.1Mistral: Mistral 7B Instruct v0.11695859200A 7.3B parameter model that outperforms Llama ...2824None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]Mistralmistral0.000000110.0000001900002824.0NaNFalseNaNNaN
308pygmalionai/mythalion-13bPygmalion: Mythalion 13B1693612800A blend of the new Pygmalion-13b and MythoMax....8192None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000056250.00000112500008192.01024.0FalseNaNNaN
309openai/gpt-3.5-turbo-16kOpenAI: GPT-3.5 Turbo 16k1693180800This model offers four times the context lengt...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.0000030.000004000016385.04096.0TrueNaNNaN
310openai/gpt-4-32kOpenAI: GPT-4 32k1693180800GPT-4-32k is an extended version of GPT-4, wit...32767None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000060.00012000032767.04096.0TrueNaNNaN
311openai/gpt-4-32k-0314OpenAI: GPT-4 32k (older v0314)1693180800GPT-4-32k is an extended version of GPT-4, wit...32767None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000060.00012000032767.04096.0TrueNaNNaN
312mancer/weaverMancer: Weaver (alpha)1690934400An attempt to recreate Claude-style verbosity,...8000None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.0000011250.00000112500008000.01000.0FalseNaNNaN
313anthropic/claude-2.0:betaAnthropic: Claude v2.0 (self-moderated)1690502400Anthropic's flagship model. Superior performan...100000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000100000.04096.0FalseNaNNaN
314anthropic/claude-2.0Anthropic: Claude v2.01690502400Anthropic's flagship model. Superior performan...100000None[max_tokens, temperature, top_p, top_k, stop]text->text[text][text]ClaudeNone0.0000080.0000240000100000.04096.0TrueNaNNaN
315undi95/remm-slerp-l2-13bReMM SLERP 13B1689984000A recreation trial of the original MythoMax-L2...6144None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.00000056250.00000112500006144.01024.0FalseNaNNaN
316gryphe/mythomax-l2-13bMythoMax 13B1688256000One of the highest performing and most popular...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2alpaca0.0000000650.00000006500004096.04096.0FalseNaNNaN
317meta-llama/llama-2-70b-chatMeta: Llama 2 70B Chat1687219200The flagship, 70 billion parameter language mo...4096None[max_tokens, temperature, top_p, stop, frequen...text->text[text][text]Llama2llama20.00000090.000000900004096.0NaNFalseNaNNaN
318openai/gpt-3.5-turboOpenAI: GPT-3.5 Turbo1685232000GPT-3.5 Turbo is OpenAI's fastest model. It ca...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.00000050.0000015000016385.04096.0TrueNaNNaN
319openai/gpt-3.5-turbo-0125OpenAI: GPT-3.5 Turbo 16k1685232000The latest GPT-3.5 Turbo model with improved i...16385None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.00000050.0000015000016385.04096.0TrueNaNNaN
320openai/gpt-4OpenAI: GPT-41685232000OpenAI's flagship model, GPT-4 is a large-scal...8191None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000030.0000600008191.04096.0TrueNaNNaN
321openai/gpt-4-0314OpenAI: GPT-4 (older v0314)1685232000GPT-4-0314 is the first version of GPT-4 relea...8191None[tools, tool_choice, max_tokens, temperature, ...text->text[text][text]GPTNone0.000030.0000600008191.04096.0TrueNaNNaN
\n", - "
" - ], - "text/plain": [ - " id name created description context_length per_request_limits supported_parameters architecture_modality architecture_input_modalities architecture_output_modalities architecture_tokenizer architecture_instruct_type pricing_prompt pricing_completion pricing_request pricing_image pricing_web_search pricing_internal_reasoning top_provider_context_length top_provider_max_completion_tokens top_provider_is_moderated pricing_input_cache_read pricing_input_cache_write\n", - "0 mistralai/mistral-medium-3 Mistral: Mistral Medium 3 1746627341 Mistral Medium 3 is a high-performance enterpr... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.0000004 0.000002 0 0 0 0 131072.0 NaN False NaN NaN\n", - "1 google/gemini-2.5-pro-preview Google: Gemini 2.5 Pro Preview 1746578513 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0.00000125 0.00001 0 0.00516 0 0 1048576.0 65535.0 False 0.00000031 0.000001625\n", - "2 arcee-ai/caller-large Arcee AI: Caller Large 1746487869 Caller Large is Arcee's specialist \"function‑c... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.00000055 0.00000085 0 0 0 0 32768.0 NaN False NaN NaN\n", - "3 arcee-ai/spotlight Arcee AI: Spotlight 1746481552 Spotlight is a 7‑billion‑parameter vision‑lang... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000018 0.00000018 0 0 0 0 131072.0 65537.0 False NaN NaN\n", - "4 arcee-ai/maestro-reasoning Arcee AI: Maestro Reasoning 1746481269 Maestro Reasoning is Arcee's flagship analysis... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000009 0.0000033 0 0 0 0 131072.0 32000.0 False NaN NaN\n", - "5 arcee-ai/virtuoso-large Arcee AI: Virtuoso Large 1746478885 Virtuoso‑Large is Arcee's top‑tier general‑pur... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000075 0.0000012 0 0 0 0 131072.0 64000.0 False NaN NaN\n", - "6 arcee-ai/coder-large Arcee AI: Coder Large 1746478663 Coder‑Large is a 32 B‑parameter offspring of Q... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 NaN False NaN NaN\n", - "7 arcee-ai/virtuoso-medium-v2 Arcee AI: Virtuoso Medium V2 1746478434 Virtuoso‑Medium‑v2 is a 32 B model distilled f... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", - "8 arcee-ai/arcee-blitz Arcee AI: Arcee Blitz 1746470100 Arcee Blitz is a 24 B‑parameter dense model di... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000045 0.00000075 0 0 0 0 32768.0 NaN False NaN NaN\n", - "9 microsoft/phi-4-reasoning-plus:free Microsoft: Phi 4 Reasoning Plus (free) 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "10 microsoft/phi-4-reasoning-plus Microsoft: Phi 4 Reasoning Plus 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.00000007 0.00000035 0 0 0 0 32768.0 NaN False NaN NaN\n", - "11 microsoft/phi-4-reasoning:free Microsoft: Phi 4 Reasoning (free) 1746121275 Phi-4-reasoning is a 14B parameter dense decod... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "12 qwen/qwen3-0.6b-04-28:free Qwen: Qwen3 0.6B (free) 1746043526 Qwen3-0.6B is a lightweight, 0.6 billion param... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "13 inception/mercury-coder-small-beta Inception: Mercury Coder Small Beta 1746033880 Mercury Coder Small is the first diffusion lar... 32000 None [max_tokens, frequency_penalty, presence_penal... text->text [text] [text] Other None 0.00000025 0.000001 0 0 0 0 32000.0 NaN False NaN NaN\n", - "14 qwen/qwen3-1.7b:free Qwen: Qwen3 1.7B (free) 1746031388 Qwen3-1.7B is a compact, 1.7 billion parameter... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "15 qwen/qwen3-4b:free Qwen: Qwen3 4B (free) 1746031104 Qwen3-4B is a 4 billion parameter dense langua... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 128000.0 NaN False NaN NaN\n", - "16 opengvlab/internvl3-14b:free OpenGVLab: InternVL3 14B (free) 1746021355 The 14b version of the InternVL3 series. An ad... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "17 opengvlab/internvl3-2b:free OpenGVLab: InternVL3 2B (free) 1746019807 The 2b version of the InternVL3 series, for an... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "18 deepseek/deepseek-prover-v2:free DeepSeek: DeepSeek Prover V2 (free) 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "19 deepseek/deepseek-prover-v2 DeepSeek: DeepSeek Prover V2 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.0000005 0.00000218 0 0 0 0 131072.0 NaN False NaN NaN\n", - "20 meta-llama/llama-guard-4-12b Meta: Llama Guard 4 12B 1745975193 Llama Guard 4 is a Llama 4 Scout-derived multi... 163840 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000005 0.00000005 0 0 0 0 163840.0 NaN False NaN NaN\n", - "21 qwen/qwen3-30b-a3b:free Qwen: Qwen3 30B A3B (free) 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", - "22 qwen/qwen3-30b-a3b Qwen: Qwen3 30B A3B 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 40960.0 False NaN NaN\n", - "23 qwen/qwen3-8b:free Qwen: Qwen3 8B (free) 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 40960.0 False NaN NaN\n", - "24 qwen/qwen3-8b Qwen: Qwen3 8B 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.000000035 0.000000138 0 0 0 0 128000.0 NaN False NaN NaN\n", - "25 qwen/qwen3-14b:free Qwen: Qwen3 14B (free) 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", - "26 qwen/qwen3-14b Qwen: Qwen3 14B 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000007 0.00000024 0 0 0 0 40960.0 40960.0 False NaN NaN\n", - "27 qwen/qwen3-32b:free Qwen: Qwen3 32B (free) 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", - "28 qwen/qwen3-32b Qwen: Qwen3 32B 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 NaN False NaN NaN\n", - "29 qwen/qwen3-235b-a22b:free Qwen: Qwen3 235B A22B (free) 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", - "30 qwen/qwen3-235b-a22b Qwen: Qwen3 235B A22B 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000014 0.000002 0 0 0 0 40960.0 40960.0 False NaN NaN\n", - "31 tngtech/deepseek-r1t-chimera:free TNG: DeepSeek R1T Chimera (free) 1745760875 DeepSeek-R1T-Chimera is created by merging Dee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "32 thudm/glm-z1-rumination-32b THUDM: GLM Z1 Rumination 32B 1745601495 THUDM: GLM Z1 Rumination 32B is a 32B-paramete... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", - "33 thudm/glm-z1-9b:free THUDM: GLM Z1 9B (free) 1745601140 GLM-Z1-9B-0414 is a 9B-parameter language mode... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "34 thudm/glm-4-9b:free THUDM: GLM 4 9B (free) 1745601023 GLM-4-9B-0414 is a 9 billion parameter languag... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", - "35 microsoft/mai-ds-r1:free Microsoft: MAI DS R1 (free) 1745194100 MAI-DS-R1 is a post-trained variant of DeepSee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "36 thudm/glm-z1-32b:free THUDM: GLM Z1 32B (free) 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "37 thudm/glm-z1-32b THUDM: GLM Z1 32B 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", - "38 thudm/glm-4-32b:free THUDM: GLM 4 32B (free) 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "39 thudm/glm-4-32b THUDM: GLM 4 32B 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", - "40 google/gemini-2.5-flash-preview Google: Gemini 2.5 Flash Preview 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000006 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", - "41 google/gemini-2.5-flash-preview:thinking Google: Gemini 2.5 Flash Preview (thinking) 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000035 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", - "42 openai/o4-mini-high OpenAI: o4 Mini High 1744824212 OpenAI o4-mini-high is the same model as [o4-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", - "43 openai/o3 OpenAI: o3 1744823457 o3 is a well-rounded and powerful model across... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.00001 0.00004 0 0.00765 0 0 200000.0 100000.0 True 0.0000025 NaN\n", - "44 openai/o4-mini OpenAI: o4 Mini 1744820942 OpenAI o4-mini is a compact reasoning model in... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", - "45 shisa-ai/shisa-v2-llama3.3-70b:free Shisa AI: Shisa V2 Llama 3.3 70B (free) 1744754858 Shisa V2 Llama 3.3 70B is a bilingual Japanese... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "46 qwen/qwen2.5-coder-7b-instruct Qwen: Qwen2.5 Coder 7B Instruct 1744734887 Qwen2.5-Coder-7B-Instruct is a 7B parameter in... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen None 0.00000001 0.00000003 0 0 0 0 32768.0 NaN False NaN NaN\n", - "47 openai/gpt-4.1 OpenAI: GPT-4.1 1744651385 GPT-4.1 is a flagship large language model opt... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.000002 0.000008 0 0 0 0 1047576.0 32768.0 True 0.0000005 NaN\n", - "48 openai/gpt-4.1-mini OpenAI: GPT-4.1 Mini 1744651381 GPT-4.1 Mini is a mid-sized model delivering p... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000004 0.0000016 0 0 0 0 1047576.0 32768.0 True 0.0000001 NaN\n", - "49 openai/gpt-4.1-nano OpenAI: GPT-4.1 Nano 1744651369 For tasks that demand low latency, GPT‑4.1 nan... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000001 0.0000004 0 0 0 0 1047576.0 32768.0 True 0.000000025 NaN\n", - "50 eleutherai/llemma_7b EleutherAI: Llemma 7b 1744643225 Llemma 7B is a language model for mathematics.... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other code-llama 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "51 alfredpros/codellama-7b-instruct-solidity AlfredPros: CodeLLaMa 7B Instruct Solidity 1744641874 A finetuned 7 billion parameters Code LLaMA - ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "52 arliai/qwq-32b-arliai-rpr-v1:free ArliAI: QwQ 32B RpR v1 (free) 1744555982 QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "53 agentica-org/deepcoder-14b-preview:free Agentica: Deepcoder 14B Preview (free) 1744555395 DeepCoder-14B-Preview is a 14B parameter code ... 96000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 96000.0 NaN False NaN NaN\n", - "54 moonshotai/kimi-vl-a3b-thinking:free Moonshot AI: Kimi VL A3B Thinking (free) 1744304841 Kimi-VL is a lightweight Mixture-of-Experts vi... 131072 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [image, text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", - "55 x-ai/grok-3-mini-beta xAI: Grok 3 Mini Beta 1744240195 Grok 3 Mini is a lightweight, smaller thinking... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.0000003 0.0000005 0 0 0 0 131072.0 NaN False NaN NaN\n", - "56 x-ai/grok-3-beta xAI: Grok 3 Beta 1744240068 Grok 3 is the latest model from xAI. It's thei... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000003 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", - "57 nvidia/llama-3.3-nemotron-super-49b-v1:free NVIDIA: Llama 3.3 Nemotron Super 49B v1 (free) 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", - "58 nvidia/llama-3.3-nemotron-super-49b-v1 NVIDIA: Llama 3.3 Nemotron Super 49B v1 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000013 0.0000004 0 0 0 0 131072.0 NaN False NaN NaN\n", - "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free) 1744115059 Llama-3.1-Nemotron-Ultra-253B-v1 is a large la... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", - "60 meta-llama/llama-4-maverick:free Meta: Llama 4 Maverick (free) 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 256000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 256000.0 NaN False NaN NaN\n", - "61 meta-llama/llama-4-maverick Meta: Llama 4 Maverick 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 1048576 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000017 0.0000006 0 0.0006684 0 0 1048576.0 16384.0 False NaN NaN\n", - "62 meta-llama/llama-4-scout:free Meta: Llama 4 Scout (free) 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 512000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 512000.0 NaN False NaN NaN\n", - "63 meta-llama/llama-4-scout Meta: Llama 4 Scout 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 1048576 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Other None 0.00000008 0.0000003 0 0 0 0 1048576.0 1048576.0 False NaN NaN\n", - "64 all-hands/openhands-lm-32b-v0.1 OpenHands LM 32B V0.1 1743613013 OpenHands LM v0.1 is a 32B open-source coding ... 16384 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "65 mistral/ministral-8b Mistral: Ministral 8B 1743430021 Ministral 8B is a state-of-the-art language mo... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", - "66 deepseek/deepseek-v3-base:free DeepSeek: DeepSeek V3 Base (free) 1743272023 Note that this is a base model mostly meant fo... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "67 scb10x/llama3.1-typhoon2-8b-instruct Typhoon2 8B Instruct 1743196511 Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000018 0.00000018 0 0 0 0 8192.0 NaN False NaN NaN\n", - "68 scb10x/llama3.1-typhoon2-70b-instruct Typhoon2 70B Instruct 1743196170 Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000088 0.00000088 0 0 0 0 8192.0 NaN False NaN NaN\n", - "69 allenai/molmo-7b-d:free AllenAI: Molmo 7B D (free) 1743023247 Molmo is a family of open vision-language mode... 4096 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 4096.0 NaN False NaN NaN\n", - "70 bytedance-research/ui-tars-72b:free Bytedance: UI-TARS 72B (free) 1743020065 UI-TARS 72B is an open-source multimodal AI mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "71 qwen/qwen2.5-vl-3b-instruct:free Qwen: Qwen2.5 VL 3B Instruct (free) 1743014573 Qwen2.5 VL 3B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", - "72 google/gemini-2.5-pro-exp-03-25 Google: Gemini 2.5 Pro Experimental 1742922099 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1000000 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0 0 0 0 0 0 1000000.0 65535.0 False NaN NaN\n", - "73 qwen/qwen2.5-vl-32b-instruct:free Qwen: Qwen2.5 VL 32B Instruct (free) 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 8192 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", - "74 qwen/qwen2.5-vl-32b-instruct Qwen: Qwen2.5 VL 32B Instruct 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000009 0.0000009 0 0 0 0 128000.0 NaN False NaN NaN\n", - "75 deepseek/deepseek-chat-v3-0324:free DeepSeek: DeepSeek V3 0324 (free) 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "76 deepseek/deepseek-chat-v3-0324 DeepSeek: DeepSeek V3 0324 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] DeepSeek None 0.0000003 0.00000088 0 0 0 0 163840.0 NaN False NaN NaN\n", - "77 featherless/qwerky-72b:free Qwerky 72B (free) 1742481597 Qwerky-72B is a linear-attention RWKV variant ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 4096.0 False NaN NaN\n", - "78 openai/o1-pro OpenAI: o1-pro 1742423211 The o1 series of models are trained with reinf... 200000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] GPT None 0.00015 0.0006 0 0.21675 0 0 200000.0 100000.0 True NaN NaN\n", - "79 mistralai/mistral-small-3.1-24b-instruct:free Mistral: Mistral Small 3.1 24B (free) 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 96000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0 0 0 0 0 0 96000.0 96000.0 False NaN NaN\n", - "80 mistralai/mistral-small-3.1-24b-instruct Mistral: Mistral Small 3.1 24B 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 131072 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Mistral None 0.00000005 0.00000015 0 0 0 0 131072.0 NaN False NaN NaN\n", - "81 open-r1/olympiccoder-32b:free OlympicCoder 32B (free) 1742077228 OlympicCoder-32B is a high-performing open-sou... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "82 google/gemma-3-1b-it:free Google: Gemma 3 1B (free) 1741963556 Gemma 3 1B is the smallest of the new Gemma 3 ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 32768.0 8192.0 False NaN NaN\n", - "83 google/gemma-3-4b-it:free Google: Gemma 3 4B (free) 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", - "84 google/gemma-3-4b-it Google: Gemma 3 4B 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000002 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", - "85 ai21/jamba-1.6-large AI21: Jamba 1.6 Large 1741905173 AI21 Jamba Large 1.6 is a high-performance hyb... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.000002 0.000008 0 0 0 0 256000.0 4096.0 False NaN NaN\n", - "86 ai21/jamba-1.6-mini AI21: Jamba Mini 1.6 1741905171 AI21 Jamba Mini 1.6 is a hybrid foundation mod... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000002 0.0000004 0 0 0 0 256000.0 4096.0 False NaN NaN\n", - "87 google/gemma-3-12b-it:free Google: Gemma 3 12B (free) 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", - "88 google/gemma-3-12b-it Google: Gemma 3 12B 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000005 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", - "89 cohere/command-a Cohere: Command A 1741894342 Command A is an open-weights 111B parameter mo... 256000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 256000.0 8192.0 False NaN NaN\n", - "90 openai/gpt-4o-mini-search-preview OpenAI: GPT-4o-mini Search Preview 1741818122 GPT-4o mini Search Preview is a specialized mo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.00000015 0.0000006 0.0275 0.000217 0 0 128000.0 16384.0 True NaN NaN\n", - "91 openai/gpt-4o-search-preview OpenAI: GPT-4o Search Preview 1741817949 GPT-4o Search Previewis a specialized model fo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.0000025 0.00001 0.035 0.003613 0 0 128000.0 16384.0 True NaN NaN\n", - "92 rekaai/reka-flash-3:free Reka: Flash 3 (free) 1741812813 Reka Flash 3 is a general-purpose, instruction... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "93 google/gemma-3-27b-it:free Google: Gemma 3 27B (free) 1741756359 Gemma 3 introduces multimodality, supporting v... 96000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 96000.0 8192.0 False NaN NaN\n", - "94 google/gemma-3-27b-it Google: Gemma 3 27B 1741756359 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.0000001 0.0000002 0 0.0000256 0 0 131072.0 16384.0 False NaN NaN\n", - "95 thedrummer/anubis-pro-105b-v1 TheDrummer: Anubis Pro 105B V1 1741642290 Anubis Pro 105B v1 is an expanded and refined ... 131072 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000008 0.000001 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "96 thedrummer/skyfall-36b-v2 TheDrummer: Skyfall 36B V2 1741636566 Skyfall 36B v2 is an enhanced iteration of Mis... 32768 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 32768.0 False NaN NaN\n", - "97 microsoft/phi-4-multimodal-instruct Microsoft: Phi 4 Multimodal Instruct 1741396284 Phi-4 Multimodal Instruct is a versatile 5.6B ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000005 0.0000001 0 0.00017685 0 0 131072.0 NaN False NaN NaN\n", - "98 perplexity/sonar-reasoning-pro Perplexity: Sonar Reasoning Pro 1741313308 Note: Sonar Pro pricing includes Perplexity se... 128000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0 128000.0 NaN False NaN NaN\n", - "99 perplexity/sonar-pro Perplexity: Sonar Pro 1741312423 Note: Sonar Pro pricing includes Perplexity se... 200000 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000003 0.000015 0 0 0.005 0 200000.0 8000.0 False NaN NaN\n", - "100 perplexity/sonar-deep-research Perplexity: Sonar Deep Research 1741311246 Sonar Deep Research is a research-focused mode... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0.000003 128000.0 NaN False NaN NaN\n", - "101 deepseek/deepseek-r1-zero:free DeepSeek: DeepSeek R1 Zero (free) 1741297434 DeepSeek-R1-Zero is a model trained via large-... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "102 qwen/qwq-32b:free Qwen: QwQ 32B (free) 1741208814 QwQ is the reasoning model of the Qwen series.... 40000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0 0 0 0 0 0 40000.0 40000.0 False NaN NaN\n", - "103 qwen/qwq-32b Qwen: QwQ 32B 1741208814 QwQ is the reasoning model of the Qwen series.... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0.00000015 0.0000002 0 0 0 0 131072.0 NaN False NaN NaN\n", - "104 moonshotai/moonlight-16b-a3b-instruct:free Moonshot AI: Moonlight 16B A3B Instruct (free) 1740719801 Moonlight-16B-A3B-Instruct is a 16B-parameter ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", - "105 nousresearch/deephermes-3-llama-3-8b-preview:free Nous: DeepHermes 3 Llama 3 8B Preview (free) 1740719372 DeepHermes 3 Preview is the latest version of ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", - "106 openai/gpt-4.5-preview OpenAI: GPT-4.5 (Preview) 1740687810 GPT-4.5 (Preview) is a research preview of Ope... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.000075 0.00015 0 0.108375 0 0 128000.0 16384.0 True 0.0000375 NaN\n", - "107 google/gemini-2.0-flash-lite-001 Google: Gemini 2.0 Flash Lite 1740506212 Gemini 2.0 Flash Lite offers a significantly f... 1048576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.000000075 0.0000003 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", - "108 anthropic/claude-3.7-sonnet Anthropic: Claude 3.7 Sonnet 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", - "109 anthropic/claude-3.7-sonnet:thinking Anthropic: Claude 3.7 Sonnet (thinking) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", - "110 anthropic/claude-3.7-sonnet:beta Anthropic: Claude 3.7 Sonnet (self-moderated) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [max_tokens, temperature, stop, reasoning, inc... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 128000.0 False 0.0000003 0.00000375\n", - "111 perplexity/r1-1776 Perplexity: R1 1776 1740004929 R1 1776 is a version of DeepSeek-R1 that has b... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.000002 0.000008 0 0 0 0 128000.0 NaN False NaN NaN\n", - "112 mistralai/mistral-saba Mistral: Saba 1739803239 Mistral Saba is a 24B-parameter language model... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", - "113 cognitivecomputations/dolphin3.0-r1-mistral-24... Dolphin3.0 R1 Mistral 24B (free) 1739462498 Dolphin 3.0 R1 is the next generation of the D... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "114 cognitivecomputations/dolphin3.0-mistral-24b:free Dolphin3.0 Mistral 24B (free) 1739462019 Dolphin 3.0 is the next generation of the Dolp... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "115 meta-llama/llama-guard-3-8b Llama Guard 3 8B 1739401318 Llama Guard 3 is a Llama-3.1-8B pretrained mod... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.00000002 0.00000006 0 0 0 0 131072.0 NaN False NaN NaN\n", - "116 openai/o3-mini-high OpenAI: o3 Mini High 1739372611 OpenAI o3-mini-high is the same model as [o3-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", - "117 deepseek/deepseek-r1-distill-llama-8b DeepSeek: R1 Distill Llama 8B 1738937718 DeepSeek R1 Distill Llama 8B is a distilled la... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.00000004 0.00000004 0 0 0 0 32000.0 32000.0 False NaN NaN\n", - "118 google/gemini-2.0-flash-001 Google: Gemini 2.0 Flash 1738769413 Gemini Flash 2.0 offers a significantly faster... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.0000001 0.0000004 0 0.0000258 0 0 1000000.0 8192.0 False 0.000000025 0.0000001833\n", - "119 qwen/qwen-vl-plus Qwen: Qwen VL Plus 1738731255 Qwen's Enhanced Large Visual Language Model. S... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.00000021 0.00000063 0 0.0002688 0 0 7500.0 1500.0 False NaN NaN\n", - "120 aion-labs/aion-1.0 AionLabs: Aion-1.0 1738697557 Aion-1.0 is a multi-model system designed for ... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.000004 0.000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", - "121 aion-labs/aion-1.0-mini AionLabs: Aion-1.0-Mini 1738697107 Aion-1.0-Mini 32B parameter model is a distill... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.0000007 0.0000014 0 0 0 0 131072.0 32768.0 False NaN NaN\n", - "122 aion-labs/aion-rp-llama-3.1-8b AionLabs: Aion-RP 1.0 (8B) 1738696718 Aion-RP-Llama-3.1-8B ranks the highest in the ... 32768 None [max_tokens, temperature, top_p] text->text [text] [text] Other None 0.0000002 0.0000002 0 0 0 0 32768.0 32768.0 False NaN NaN\n", - "123 qwen/qwen-vl-max Qwen: Qwen VL Max 1738434304 Qwen VL Max is a visual understanding model wi... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.0000008 0.0000032 0 0.001024 0 0 7500.0 1500.0 False NaN NaN\n", - "124 qwen/qwen-turbo Qwen: Qwen-Turbo 1738410974 Qwen-Turbo, based on Qwen2.5, is a 1M context ... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.00000005 0.0000002 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", - "125 qwen/qwen2.5-vl-72b-instruct:free Qwen: Qwen2.5 VL 72B Instruct (free) 1738410311 Qwen2.5-VL is proficient in recognizing common... 131072 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", - "126 qwen/qwen2.5-vl-72b-instruct Qwen: Qwen2.5 VL 72B Instruct 1738410311 Qwen2.5-VL is proficient in recognizing common... 32000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.00000025 0.00000075 0 0 0 0 32000.0 NaN False NaN NaN\n", - "127 qwen/qwen-plus Qwen: Qwen-Plus 1738409840 Qwen-Plus, based on the Qwen2.5 foundation mod... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000004 0.0000012 0 0 0 0 131072.0 8192.0 False NaN NaN\n", - "128 qwen/qwen-max Qwen: Qwen-Max 1738402289 Qwen-Max, based on Qwen2.5, provides the best ... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000016 0.0000064 0 0 0 0 32768.0 8192.0 False NaN NaN\n", - "129 openai/o3-mini OpenAI: o3 Mini 1738351721 OpenAI o3-mini is a cost-efficient language mo... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", - "130 deepseek/deepseek-r1-distill-qwen-1.5b DeepSeek: R1 Distill Qwen 1.5B 1738328067 DeepSeek R1 Distill Qwen 1.5B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000018 0.00000018 0 0 0 0 131072.0 32768.0 False NaN NaN\n", - "131 mistralai/mistral-small-24b-instruct-2501:free Mistral: Mistral Small 3 (free) 1738255409 Mistral Small 3 is a 24B-parameter language mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "132 mistralai/mistral-small-24b-instruct-2501 Mistral: Mistral Small 3 1738255409 Mistral Small 3 is a 24B-parameter language mo... 28000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0.00000006 0.00000012 0 0 0 0 28000.0 14000.0 False NaN NaN\n", - "133 deepseek/deepseek-r1-distill-qwen-32b:free DeepSeek: R1 Distill Qwen 32B (free) 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 16000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16000.0 16000.0 False NaN NaN\n", - "134 deepseek/deepseek-r1-distill-qwen-32b DeepSeek: R1 Distill Qwen 32B 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000012 0.00000018 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "135 deepseek/deepseek-r1-distill-qwen-14b:free DeepSeek: R1 Distill Qwen 14B (free) 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", - "136 deepseek/deepseek-r1-distill-qwen-14b DeepSeek: R1 Distill Qwen 14B 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000015 0.00000015 0 0 0 0 64000.0 64000.0 False NaN NaN\n", - "137 perplexity/sonar-reasoning Perplexity: Sonar Reasoning 1738131107 Sonar Reasoning is a reasoning model provided ... 127000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000001 0.000005 0.005 0 0 0 127000.0 NaN False NaN NaN\n", - "138 perplexity/sonar Perplexity: Sonar 1738013808 Sonar is lightweight, affordable, fast, and si... 127072 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", - "139 liquid/lfm-7b Liquid: LFM 7B 1737806883 LFM-7B, a new best-in-class language model. LF... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000001 0.00000001 0 0 0 0 32768.0 NaN False NaN NaN\n", - "140 liquid/lfm-3b Liquid: LFM 3B 1737806501 Liquid's LFM 3B delivers incredible performanc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000002 0.00000002 0 0 0 0 32768.0 NaN False NaN NaN\n", - "141 deepseek/deepseek-r1-distill-llama-70b:free DeepSeek: R1 Distill Llama 70B (free) 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 8192 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0 0 0 0 0 0 8192.0 4096.0 False NaN NaN\n", - "142 deepseek/deepseek-r1-distill-llama-70b DeepSeek: R1 Distill Llama 70B 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.0000001 0.0000004 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "143 deepseek/deepseek-r1:free DeepSeek: R1 (free) 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, reasoning, include_reasoning, tem... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "144 deepseek/deepseek-r1 DeepSeek: R1 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.0000005 0.00000218 0 0 0 0 163840.0 163840.0 False NaN NaN\n", - "145 minimax/minimax-01 MiniMax: MiniMax-01 1736915462 MiniMax-01 is a combines MiniMax-Text-01 for t... 1000192 None [max_tokens, temperature, top_p] text+image->text [text, image] [text] Other None 0.0000002 0.0000011 0 0 0 0 1000192.0 1000192.0 False NaN NaN\n", - "146 mistralai/codestral-2501 Mistral: Codestral 2501 1736895522 [Mistral](/mistralai)'s cutting-edge language ... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000003 0.0000009 0 0 0 0 262144.0 NaN False NaN NaN\n", - "147 microsoft/phi-4 Microsoft: Phi 4 1736489872 [Microsoft Research](/microsoft) Phi-4 is desi... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000007 0.00000014 0 0 0 0 16384.0 16384.0 False NaN NaN\n", - "148 deepseek/deepseek-chat:free DeepSeek: DeepSeek V3 (free) 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", - "149 deepseek/deepseek-chat DeepSeek: DeepSeek V3 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.00000038 0.00000089 0 0 0 0 163840.0 163840.0 False NaN NaN\n", - "150 sao10k/l3.3-euryale-70b Sao10K: Llama 3.3 Euryale 70B 1734535928 Euryale L3.3 70B is a model focused on creativ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "151 openai/o1 OpenAI: o1 1734459999 The latest and strongest model family from Ope... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [text, image] [text] GPT None 0.000015 0.00006 0 0.021675 0 0 200000.0 100000.0 True 0.0000075 NaN\n", - "152 eva-unit-01/eva-llama-3.33-70b EVA Llama 3.33 70B 1734377303 EVA Llama 3.33 70b is a roleplay and storywrit... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "153 x-ai/grok-2-vision-1212 xAI: Grok 2 Vision 1212 1734237338 Grok 2 Vision 1212 advances image-based AI wit... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000002 0.00001 0 0.0036 0 0 32768.0 NaN False NaN NaN\n", - "154 x-ai/grok-2-1212 xAI: Grok 2 1212 1734232814 Grok 2 1212 introduces significant enhancement... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000002 0.00001 0 0 0 0 131072.0 NaN False NaN NaN\n", - "155 cohere/command-r7b-12-2024 Cohere: Command R7B (12-2024) 1734158152 Command R7B (12-2024) is a small, fast update ... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.0000000375 0.00000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "156 google/gemini-2.0-flash-exp:free Google: Gemini 2.0 Flash Experimental (free) 1733937523 Gemini Flash 2.0 offers a significantly faster... 1048576 None [max_tokens, temperature, top_p, stop] text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", - "157 meta-llama/llama-3.3-70b-instruct:free Meta: Llama 3.3 70B Instruct (free) 1733506137 The Meta Llama 3.3 multilingual large language... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 8000.0 8000.0 False NaN NaN\n", - "158 meta-llama/llama-3.3-70b-instruct Meta: Llama 3.3 70B Instruct 1733506137 The Meta Llama 3.3 multilingual large language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009 0.00000035 0 0 0 0 131000.0 131000.0 False NaN NaN\n", - "159 amazon/nova-lite-v1 Amazon: Nova Lite 1.0 1733437363 Amazon Nova Lite 1.0 is a very low-cost multim... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.00000006 0.00000024 0 0.00009 0 0 300000.0 5120.0 True NaN NaN\n", - "160 amazon/nova-micro-v1 Amazon: Nova Micro 1.0 1733437237 Amazon Nova Micro 1.0 is a text-only model tha... 128000 None [tools, max_tokens, temperature, top_p, top_k,... text->text [text] [text] Nova None 0.000000035 0.00000014 0 0 0 0 128000.0 5120.0 True NaN NaN\n", - "161 amazon/nova-pro-v1 Amazon: Nova Pro 1.0 1733436303 Amazon Nova Pro 1.0 is a capable multimodal mo... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.0000008 0.0000032 0 0.0012 0 0 300000.0 5120.0 True NaN NaN\n", - "162 qwen/qwq-32b-preview:free Qwen: QwQ 32B Preview (free) 1732754541 QwQ-32B-Preview is an experimental research mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16384.0 NaN False NaN NaN\n", - "163 qwen/qwq-32b-preview Qwen: QwQ 32B Preview 1732754541 QwQ-32B-Preview is an experimental research mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0.00000009 0.00000027 0 0 0 0 32768.0 NaN False NaN NaN\n", - "164 google/learnlm-1.5-pro-experimental:free Google: LearnLM 1.5 Pro Experimental (free) 1732216551 An experimental version of [Gemini 1.5 Pro](/g... 40960 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 40960.0 8192.0 False NaN NaN\n", - "165 eva-unit-01/eva-qwen-2.5-72b EVA Qwen2.5 72B 1732210606 EVA Qwen2.5 72B is a roleplay and storywriting... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "166 openai/gpt-4o-2024-11-20 OpenAI: GPT-4o (2024-11-20) 1732127594 The 2024-11-20 version of GPT-4o offers a leve... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", - "167 mistralai/mistral-large-2411 Mistral Large 2411 1731978685 Mistral Large 2 2411 is an update of [Mistral ... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", - "168 mistralai/mistral-large-2407 Mistral Large 2407 1731978415 This is Mistral AI's flagship model, Mistral L... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", - "169 mistralai/pixtral-large-2411 Mistral: Pixtral Large 2411 1731977388 Pixtral Large is a 124B parameter, open-weight... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.000002 0.000006 0 0.002888 0 0 131072.0 NaN False NaN NaN\n", - "170 x-ai/grok-vision-beta xAI: Grok Vision Beta 1731976624 Grok Vision Beta is xAI's experimental languag... 8192 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000005 0.000015 0 0.009 0 0 8192.0 NaN False NaN NaN\n", - "171 infermatic/mn-inferor-12b Infermatic: Mistral Nemo Inferor 12B 1731464428 Inferor 12B is a merge of top roleplay models,... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "172 qwen/qwen-2.5-coder-32b-instruct:free Qwen2.5 Coder 32B Instruct (free) 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "173 qwen/qwen-2.5-coder-32b-instruct Qwen2.5 Coder 32B Instruct 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000006 0.00000015 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "174 raifle/sorcererlm-8x22b SorcererLM 8x22B 1731105083 SorcererLM is an advanced RP and storytelling ... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral vicuna 0.0000045 0.0000045 0 0 0 0 16000.0 NaN False NaN NaN\n", - "175 eva-unit-01/eva-qwen-2.5-32b EVA Qwen2.5 32B 1731104847 EVA Qwen2.5 32B is a roleplaying/storywriting ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "176 thedrummer/unslopnemo-12b Unslopnemo 12B 1731103448 UnslopNemo v4.1 is the latest addition from th... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000045 0.00000045 0 0 0 0 32000.0 16000.0 False NaN NaN\n", - "177 anthropic/claude-3.5-haiku:beta Anthropic: Claude 3.5 Haiku (self-moderated) 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", - "178 anthropic/claude-3.5-haiku Anthropic: Claude 3.5 Haiku 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", - "179 anthropic/claude-3.5-haiku-20241022:beta Anthropic: Claude 3.5 Haiku (2024-10-22) (self... 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", - "180 anthropic/claude-3.5-haiku-20241022 Anthropic: Claude 3.5 Haiku (2024-10-22) 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", - "181 neversleep/llama-3.1-lumimaid-70b NeverSleep: Lumimaid v0.2 70B 1729555200 Lumimaid v0.2 70B is a finetune of [Llama 3.1 ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000015 0.00000225 0 0 0 0 16384.0 2048.0 False NaN NaN\n", - "182 anthracite-org/magnum-v4-72b Magnum v4 72B 1729555200 This is a series of models designed to replica... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000015 0.00000225 0 0 0 0 16384.0 1024.0 False NaN NaN\n", - "183 anthropic/claude-3.5-sonnet:beta Anthropic: Claude 3.5 Sonnet (self-moderated) 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", - "184 anthropic/claude-3.5-sonnet Anthropic: Claude 3.5 Sonnet 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", - "185 x-ai/grok-beta xAI: Grok Beta 1729382400 Grok Beta is xAI's experimental language model... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000005 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", - "186 mistralai/ministral-8b Mistral: Ministral 8B 1729123200 Ministral 8B is an 8B parameter model featurin... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", - "187 mistralai/ministral-3b Mistral: Ministral 3B 1729123200 Ministral 3B is a 3B parameter model optimized... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000004 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", - "188 qwen/qwen-2.5-7b-instruct:free Qwen2.5 7B Instruct (free) 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 32768.0 False NaN NaN\n", - "189 qwen/qwen-2.5-7b-instruct Qwen2.5 7B Instruct 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000005 0.0000001 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "190 nvidia/llama-3.1-nemotron-70b-instruct NVIDIA: Llama 3.1 Nemotron 70B Instruct 1728950400 NVIDIA's Llama 3.1 Nemotron 70B is a language ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "191 inflection/inflection-3-productivity Inflection: Inflection 3 Productivity 1728604800 Inflection 3 Productivity is optimized for fol... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", - "192 inflection/inflection-3-pi Inflection: Inflection 3 Pi 1728604800 Inflection 3 Pi powers Inflection's [Pi](https... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", - "193 google/gemini-flash-1.5-8b Google: Gemini 1.5 Flash 8B 1727913600 Gemini Flash 1.5 8B is optimized for speed and... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.0000000375 0.00000015 0 0 0 0 1000000.0 8192.0 False 0.00000001 0.0000000583\n", - "194 thedrummer/rocinante-12b Rocinante 12B 1727654400 Rocinante 12B is designed for engaging storyte... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000025 0.0000005 0 0 0 0 32768.0 NaN False NaN NaN\n", - "195 anthracite-org/magnum-v2-72b Magnum v2 72B 1727654400 From the maker of [Goliath](https://openrouter... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000003 0.000003 0 0 0 0 32768.0 NaN False NaN NaN\n", - "196 liquid/lfm-40b Liquid: LFM 40B MoE 1727654400 Liquid's 40.3B Mixture of Experts (MoE) model.... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000015 0.00000015 0 0 0 0 32768.0 NaN False NaN NaN\n", - "197 meta-llama/llama-3.2-3b-instruct:free Meta: Llama 3.2 3B Instruct (free) 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 20000 None [max_tokens, temperature, top_p] text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 20000.0 20000.0 False NaN NaN\n", - "198 meta-llama/llama-3.2-3b-instruct Meta: Llama 3.2 3B Instruct 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000001 0.00000002 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "199 meta-llama/llama-3.2-1b-instruct:free Meta: Llama 3.2 1B Instruct (free) 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131000.0 NaN False NaN NaN\n", - "200 meta-llama/llama-3.2-1b-instruct Meta: Llama 3.2 1B Instruct 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131072 None [max_tokens, temperature, top_p, top_k, stop, ... text->text [text] [text] Llama3 llama3 0.000000005 0.00000001 0 0 0 0 131072.0 NaN False NaN NaN\n", - "201 meta-llama/llama-3.2-90b-vision-instruct Meta: Llama 3.2 90B Vision Instruct 1727222400 The Llama 90B Vision model is a top-tier, 90-b... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.0000012 0.0000012 0 0.001734 0 0 131072.0 2048.0 False NaN NaN\n", - "202 meta-llama/llama-3.2-11b-vision-instruct:free Meta: Llama 3.2 11B Vision Instruct (free) 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", - "203 meta-llama/llama-3.2-11b-vision-instruct Meta: Llama 3.2 11B Vision Instruct 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.000000049 0.000000049 0 0.00007948 0 0 131072.0 16384.0 False NaN NaN\n", - "204 qwen/qwen-2.5-72b-instruct:free Qwen2.5 72B Instruct (free) 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", - "205 qwen/qwen-2.5-72b-instruct Qwen2.5 72B Instruct 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen chatml 0.00000012 0.00000039 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "206 qwen/qwen-2.5-vl-72b-instruct Qwen: Qwen2.5-VL 72B Instruct 1726617600 Qwen2.5 VL 72B is a multimodal LLM from the Qw... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000006 0.0000006 0 0.000578 0 0 32768.0 NaN False NaN NaN\n", - "207 neversleep/llama-3.1-lumimaid-8b NeverSleep: Lumimaid v0.2 8B 1726358400 Lumimaid v0.2 8B is a finetune of [Llama 3.1 8... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 32768.0 2048.0 False NaN NaN\n", - "208 openai/o1-preview OpenAI: o1-preview 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", - "209 openai/o1-preview-2024-09-12 OpenAI: o1-preview (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", - "210 openai/o1-mini OpenAI: o1-mini 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", - "211 openai/o1-mini-2024-09-12 OpenAI: o1-mini (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", - "212 mistralai/pixtral-12b Mistral: Pixtral 12B 1725926400 The first multi-modal, text+image-to-text mode... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Mistral None 0.0000001 0.0000001 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", - "213 cohere/command-r-plus-08-2024 Cohere: Command R+ (08-2024) 1724976000 command-r-plus-08-2024 is an update of the [Co... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000025 0.00001 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "214 cohere/command-r-08-2024 Cohere: Command R (08-2024) 1724976000 command-r-08-2024 is an update of the [Command... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.00000015 0.0000006 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "215 qwen/qwen-2.5-vl-7b-instruct:free Qwen: Qwen2.5-VL 7B Instruct (free) 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 64000.0 False NaN NaN\n", - "216 qwen/qwen-2.5-vl-7b-instruct Qwen: Qwen2.5-VL 7B Instruct 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000002 0.0000002 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", - "217 sao10k/l3.1-euryale-70b Sao10K: Llama 3.1 Euryale 70B v2.2 1724803200 Euryale L3.1 70B v2.2 is a model focused on cr... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "218 google/gemini-flash-1.5-8b-exp Google: Gemini 1.5 Flash 8B Experimental 1724803200 Gemini Flash 1.5 8B Experimental is an experim... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", - "219 microsoft/phi-3.5-mini-128k-instruct Microsoft: Phi-3.5 Mini 128K Instruct 1724198400 Phi-3.5 models are lightweight, state-of-the-a... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.00000003 0.00000009 0 0 0 0 131072.0 NaN False NaN NaN\n", - "220 nousresearch/hermes-3-llama-3.1-70b Nous: Hermes 3 70B Instruct 1723939200 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "221 nousresearch/hermes-3-llama-3.1-405b Nous: Hermes 3 405B Instruct 1723766400 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.0000008 0.0000008 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "222 openai/chatgpt-4o-latest OpenAI: ChatGPT-4o 1723593600 OpenAI ChatGPT 4o is continually updated by Op... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 16384.0 True NaN NaN\n", - "223 sao10k/l3-lunaris-8b Sao10K: Llama 3 8B Lunaris 1723507200 Lunaris 8B is a versatile generalist and rolep... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000005 0 0 0 0 8192.0 NaN False NaN NaN\n", - "224 aetherwiing/mn-starcannon-12b Aetherwiing: Starcannon 12B 1723507200 Starcannon 12B v2 is a creative roleplay and s... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "225 openai/gpt-4o-2024-08-06 OpenAI: GPT-4o (2024-08-06) 1722902400 The 2024-08-06 version of GPT-4o offers improv... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", - "226 meta-llama/llama-3.1-405b:free Meta: Llama 3.1 405B (base) (free) 1722556800 Meta's latest class of model (Llama 3.1) launc... 64000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", - "227 meta-llama/llama-3.1-405b Meta: Llama 3.1 405B (base) 1722556800 Meta's latest class of model (Llama 3.1) launc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.000002 0.000002 0 0 0 0 32768.0 NaN False NaN NaN\n", - "228 nothingiisreal/mn-celeste-12b Mistral Nemo 12B Celeste 1722556800 A specialized story writing and roleplaying mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "229 perplexity/llama-3.1-sonar-small-128k-online Perplexity: Llama 3.1 Sonar 8B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.0000002 0.0000002 0.005 0 0 0 127072.0 NaN False NaN NaN\n", - "230 perplexity/llama-3.1-sonar-large-128k-online Perplexity: Llama 3.1 Sonar 70B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", - "231 meta-llama/llama-3.1-8b-instruct:free Meta: Llama 3.1 8B Instruct (free) 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 4096.0 False NaN NaN\n", - "232 meta-llama/llama-3.1-8b-instruct Meta: Llama 3.1 8B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000003 0 0 0 0 16384.0 16384.0 False NaN NaN\n", - "233 meta-llama/llama-3.1-405b-instruct Meta: Llama 3.1 405B Instruct 1721692800 The highly anticipated 400B class of Llama3 is... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000008 0.0000008 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "234 meta-llama/llama-3.1-70b-instruct Meta: Llama 3.1 70B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000001 0.00000028 0 0 0 0 131072.0 16384.0 False NaN NaN\n", - "235 mistralai/codestral-mamba Mistral: Codestral Mamba 1721347200 A 7.3B parameter Mamba-based model designed fo... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 262144.0 NaN False NaN NaN\n", - "236 mistralai/mistral-nemo:free Mistral: Mistral Nemo (free) 1721347200 A 12B parameter model with a 128k token contex... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 128000.0 128000.0 False NaN NaN\n", - "237 mistralai/mistral-nemo Mistral: Mistral Nemo 1721347200 A 12B parameter model with a 128k token contex... 98304 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000003 0.00000007 0 0 0 0 98304.0 49152.0 False NaN NaN\n", - "238 openai/gpt-4o-mini OpenAI: GPT-4o-mini 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.000217 0 0 128000.0 16384.0 True 0.000000075 NaN\n", - "239 openai/gpt-4o-mini-2024-07-18 OpenAI: GPT-4o-mini (2024-07-18) 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.007225 0 0 128000.0 16384.0 True 0.000000075 NaN\n", - "240 google/gemma-2-27b-it Google: Gemma 2 27B 1720828800 Gemma 2 27B by Google is an open model built f... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.0000001 0.0000003 0 0 0 0 8192.0 NaN False NaN NaN\n", - "241 alpindale/magnum-72b Magnum 72B 1720656000 From the maker of [Goliath](https://openrouter... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", - "242 google/gemma-2-9b-it:free Google: Gemma 2 9B (free) 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0 0 0 0 0 0 8192.0 8192.0 False NaN NaN\n", - "243 google/gemma-2-9b-it Google: Gemma 2 9B 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.00000002 0.00000006 0 0 0 0 8192.0 NaN False NaN NaN\n", - "244 01-ai/yi-large 01.AI: Yi Large 1719273600 The Yi Large model was designed by 01.AI with ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Yi None 0.000003 0.000003 0 0 0 0 32768.0 4096.0 False NaN NaN\n", - "245 ai21/jamba-instruct AI21: Jamba Instruct 1719273600 The Jamba-Instruct model, introduced by AI21 L... 256000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000005 0.0000007 0 0 0 0 256000.0 4096.0 False NaN NaN\n", - "246 anthropic/claude-3.5-sonnet-20240620:beta Anthropic: Claude 3.5 Sonnet (2024-06-20) (sel... 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", - "247 anthropic/claude-3.5-sonnet-20240620 Anthropic: Claude 3.5 Sonnet (2024-06-20) 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", - "248 sao10k/l3-euryale-70b Sao10k: Llama 3 Euryale 70B v2.1 1718668800 Euryale 70B v2.1 is a model focused on creativ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000148 0.00000148 0 0 0 0 8192.0 8192.0 False NaN NaN\n", - "249 cognitivecomputations/dolphin-mixtral-8x22b Dolphin 2.9.2 Mixtral 8x22B 🐬 1717804800 Dolphin 2.9 is designed for instruction follow... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000009 0.0000009 0 0 0 0 16000.0 NaN False NaN NaN\n", - "250 qwen/qwen-2-72b-instruct Qwen 2 72B Instruct 1717718400 Qwen2 72B is a transformer-based model that ex... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000009 0.0000009 0 0 0 0 32768.0 4096.0 False NaN NaN\n", - "251 mistralai/mistral-7b-instruct:free Mistral: Mistral 7B Instruct (free) 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "252 mistralai/mistral-7b-instruct Mistral: Mistral 7B Instruct 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "253 nousresearch/hermes-2-pro-llama-3-8b NousResearch: Hermes 2 Pro - Llama-3 8B 1716768000 Hermes 2 Pro is an upgraded, retrained version... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.000000025 0.00000004 0 0 0 0 131072.0 131072.0 False NaN NaN\n", - "254 mistralai/mistral-7b-instruct-v0.3 Mistral: Mistral 7B Instruct v0.3 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", - "255 microsoft/phi-3-mini-128k-instruct Microsoft: Phi-3 Mini 128K Instruct 1716681600 Phi-3 Mini is a powerful 3.8B parameter model ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", - "256 microsoft/phi-3-medium-128k-instruct Microsoft: Phi-3 Medium 128K Instruct 1716508800 Phi-3 128K Medium is a powerful 14-billion par... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000003 0 0 0 0 131072.0 NaN False NaN NaN\n", - "257 neversleep/llama-3-lumimaid-70b NeverSleep: Llama 3 Lumimaid 70B 1715817600 The NeverSleep team is back, with a Llama 3 70... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 8192.0 4096.0 False NaN NaN\n", - "258 deepseek/deepseek-coder DeepSeek-Coder-V2 1715644800 DeepSeek-Coder-V2, an open-source Mixture-of-E... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000004 0.00000012 0 0 0 0 128000.0 NaN False NaN NaN\n", - "259 google/gemini-flash-1.5 Google: Gemini 1.5 Flash 1715644800 Gemini 1.5 Flash is a foundation model that pe... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.000000075 0.0000003 0 0.00004 0 0 1000000.0 8192.0 False 0.00000001875 0.0000001583\n", - "260 openai/gpt-4o OpenAI: GPT-4o 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", - "261 openai/gpt-4o:extended OpenAI: GPT-4o (extended) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000006 0.000018 0 0.007225 0 0 128000.0 64000.0 True NaN NaN\n", - "262 meta-llama/llama-guard-2-8b Meta: LlamaGuard 2 8B 1715558400 This safeguard model has 8B parameters and is ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.0000002 0.0000002 0 0 0 0 8192.0 NaN False NaN NaN\n", - "263 openai/gpt-4o-2024-05-13 OpenAI: GPT-4o (2024-05-13) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 4096.0 True NaN NaN\n", - "264 allenai/olmo-7b-instruct OLMo 7B Instruct 1715299200 OLMo 7B Instruct by the Allen Institute for AI... 2048 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other zephyr 0.00000008 0.00000024 0 0 0 0 2048.0 NaN False NaN NaN\n", - "265 neversleep/llama-3-lumimaid-8b:extended NeverSleep: Llama 3 Lumimaid 8B (extended) 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", - "266 neversleep/llama-3-lumimaid-8b NeverSleep: Llama 3 Lumimaid 8B 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", - "267 sao10k/fimbulvetr-11b-v2 Fimbulvetr 11B v2 1713657600 Creative writing model, routed with permission... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "268 meta-llama/llama-3-8b-instruct Meta: Llama 3 8B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, top_k, seed, ... text->text [text] [text] Llama3 llama3 0.00000003 0.00000006 0 0 0 0 8192.0 16384.0 False NaN NaN\n", - "269 meta-llama/llama-3-70b-instruct Meta: Llama 3 70B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000003 0.0000004 0 0 0 0 8192.0 16384.0 False NaN NaN\n", - "270 mistralai/mixtral-8x22b-instruct Mistral: Mixtral 8x22B Instruct 1713312000 Mistral's official instruct fine-tuned version... 65536 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.0000004 0.0000012 0 0 0 0 65536.0 NaN False NaN NaN\n", - "271 microsoft/wizardlm-2-8x22b WizardLM-2 8x22B 1713225600 WizardLM-2 8x22B is Microsoft AI's most advanc... 65536 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Mistral vicuna 0.0000005 0.0000005 0 0 0 0 65536.0 16384.0 False NaN NaN\n", - "272 google/gemini-pro-1.5 Google: Gemini 1.5 Pro 1712620800 Google's latest multimodal model, supports ima... 2000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.00000125 0.000005 0 0.0006575 0 0 2000000.0 8192.0 False NaN NaN\n", - "273 openai/gpt-4-turbo OpenAI: GPT-4 Turbo 1712620800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.00001 0.00003 0 0.01445 0 0 128000.0 4096.0 True NaN NaN\n", - "274 cohere/command-r-plus Cohere: Command R+ 1712188800 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "275 cohere/command-r-plus-04-2024 Cohere: Command R+ (04-2024) 1712016000 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "276 sophosympatheia/midnight-rose-70b Midnight Rose 70B 1711065600 A merge with a complex family tree, this model... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000008 0.0000008 0 0 0 0 4096.0 NaN False NaN NaN\n", - "277 cohere/command Cohere: Command 1710374400 Command is an instruction-following conversati... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.000001 0.000002 0 0 0 0 4096.0 4000.0 False NaN NaN\n", - "278 cohere/command-r Cohere: Command R 1710374400 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "279 anthropic/claude-3-haiku:beta Anthropic: Claude 3 Haiku (self-moderated) 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 False 0.00000003 0.0000003\n", - "280 anthropic/claude-3-haiku Anthropic: Claude 3 Haiku 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 True 0.00000003 0.0000003\n", - "281 anthropic/claude-3-opus:beta Anthropic: Claude 3 Opus (self-moderated) 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 False 0.0000015 0.00001875\n", - "282 anthropic/claude-3-opus Anthropic: Claude 3 Opus 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 True 0.0000015 0.00001875\n", - "283 anthropic/claude-3-sonnet:beta Anthropic: Claude 3 Sonnet (self-moderated) 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 False 0.0000003 0.00000375\n", - "284 anthropic/claude-3-sonnet Anthropic: Claude 3 Sonnet 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 True 0.0000003 0.00000375\n", - "285 cohere/command-r-03-2024 Cohere: Command R (03-2024) 1709341200 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", - "286 mistralai/mistral-large Mistral Large 1708905600 This is Mistral AI's flagship model, Mistral L... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 128000.0 NaN False NaN NaN\n", - "287 openai/gpt-3.5-turbo-0613 OpenAI: GPT-3.5 Turbo (older v0613) 1706140800 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 4095 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 4095.0 4096.0 False NaN NaN\n", - "288 openai/gpt-4-turbo-preview OpenAI: GPT-4 Turbo Preview 1706140800 The preview GPT-4 model with improved instruct... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", - "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo Nous: Hermes 2 Mixtral 8x7B DPO 1705363200 Nous Hermes 2 Mixtral 8x7B DPO is the new flag... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000006 0.0000006 0 0 0 0 32768.0 2048.0 False NaN NaN\n", - "290 mistralai/mistral-medium Mistral Medium 1704844800 This is Mistral AI's closed-source, medium-sid... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000275 0.0000081 0 0 0 0 32768.0 NaN False NaN NaN\n", - "291 mistralai/mistral-small Mistral Small 1704844800 With 22 billion parameters, Mistral Small v24.... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", - "292 mistralai/mistral-tiny Mistral Tiny 1704844800 Note: This model is being deprecated. Recommen... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 32768.0 NaN False NaN NaN\n", - "293 mistralai/mistral-7b-instruct-v0.2 Mistral: Mistral 7B Instruct v0.2 1703721600 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000002 0.0000002 0 0 0 0 32768.0 NaN False NaN NaN\n", - "294 mistralai/mixtral-8x7b-instruct Mistral: Mixtral 8x7B Instruct 1702166400 Mixtral 8x7B Instruct is a pretrained generati... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000008 0.00000024 0 0 0 0 32768.0 NaN False NaN NaN\n", - "295 neversleep/noromaid-20b Noromaid 20B 1700956800 A collab between IkariDev and Undi. This merge... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.00000075 0.0000015 0 0 0 0 8192.0 2048.0 False NaN NaN\n", - "296 anthropic/claude-2.1:beta Anthropic: Claude v2.1 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", - "297 anthropic/claude-2.1 Anthropic: Claude v2.1 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", - "298 anthropic/claude-2:beta Anthropic: Claude v2 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", - "299 anthropic/claude-2 Anthropic: Claude v2 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", - "300 undi95/toppy-m-7b Toppy M 7B 1699574400 A wild 7B parameter model that merges several ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "301 alpindale/goliath-120b Goliath 120B 1699574400 A large LLM created by combining two fine-tune... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000065625 0.000009375 0 0 0 0 6144.0 512.0 False NaN NaN\n", - "302 openrouter/auto Auto Router 1699401600 Your prompt will be processed by a meta-model ... 2000000 None [] text->text [text] [text] Router None -1 -1 NaN NaN NaN NaN NaN NaN False NaN NaN\n", - "303 openai/gpt-3.5-turbo-1106 OpenAI: GPT-3.5 Turbo 16k (older v1106) 1699228800 An older GPT-3.5 Turbo model with improved ins... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 16385.0 4096.0 True NaN NaN\n", - "304 openai/gpt-4-1106-preview OpenAI: GPT-4 Turbo (older v1106) 1699228800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", - "305 jondurbin/airoboros-l2-70b Airoboros 70B 1698537600 A Llama 2 70B fine-tune using synthetic data (... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000005 0.0000005 0 0 0 0 4096.0 NaN False NaN NaN\n", - "306 openai/gpt-3.5-turbo-instruct OpenAI: GPT-3.5 Turbo Instruct 1695859200 This model is a variant of GPT-3.5 Turbo tuned... 4095 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] GPT chatml 0.0000015 0.000002 0 0 0 0 4095.0 4096.0 True NaN NaN\n", - "307 mistralai/mistral-7b-instruct-v0.1 Mistral: Mistral 7B Instruct v0.1 1695859200 A 7.3B parameter model that outperforms Llama ... 2824 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000011 0.00000019 0 0 0 0 2824.0 NaN False NaN NaN\n", - "308 pygmalionai/mythalion-13b Pygmalion: Mythalion 13B 1693612800 A blend of the new Pygmalion-13b and MythoMax.... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 8192.0 1024.0 False NaN NaN\n", - "309 openai/gpt-3.5-turbo-16k OpenAI: GPT-3.5 Turbo 16k 1693180800 This model offers four times the context lengt... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000003 0.000004 0 0 0 0 16385.0 4096.0 True NaN NaN\n", - "310 openai/gpt-4-32k OpenAI: GPT-4 32k 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", - "311 openai/gpt-4-32k-0314 OpenAI: GPT-4 32k (older v0314) 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", - "312 mancer/weaver Mancer: Weaver (alpha) 1690934400 An attempt to recreate Claude-style verbosity,... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000001125 0.000001125 0 0 0 0 8000.0 1000.0 False NaN NaN\n", - "313 anthropic/claude-2.0:beta Anthropic: Claude v2.0 (self-moderated) 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 False NaN NaN\n", - "314 anthropic/claude-2.0 Anthropic: Claude v2.0 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 True NaN NaN\n", - "315 undi95/remm-slerp-l2-13b ReMM SLERP 13B 1689984000 A recreation trial of the original MythoMax-L2... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 6144.0 1024.0 False NaN NaN\n", - "316 gryphe/mythomax-l2-13b MythoMax 13B 1688256000 One of the highest performing and most popular... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000000065 0.000000065 0 0 0 0 4096.0 4096.0 False NaN NaN\n", - "317 meta-llama/llama-2-70b-chat Meta: Llama 2 70B Chat 1687219200 The flagship, 70 billion parameter language mo... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 llama2 0.0000009 0.0000009 0 0 0 0 4096.0 NaN False NaN NaN\n", - "318 openai/gpt-3.5-turbo OpenAI: GPT-3.5 Turbo 1685232000 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", - "319 openai/gpt-3.5-turbo-0125 OpenAI: GPT-3.5 Turbo 16k 1685232000 The latest GPT-3.5 Turbo model with improved i... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", - "320 openai/gpt-4 OpenAI: GPT-4 1685232000 OpenAI's flagship model, GPT-4 is a large-scal... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN\n", - "321 openai/gpt-4-0314 OpenAI: GPT-4 (older v0314) 1685232000 GPT-4-0314 is the first version of GPT-4 relea... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Normalize the nested JSON\n", - "df = pd.json_normalize(val, sep=\"_\")\n", - "df\n", - "# View the resulting DataFrame\n", - "# print(df.T) # Transpose just for readable vertical inspection" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id mistralai/mistral-medium-3\n", - "name Mistral: Mistral Medium 3\n", - "created 1746627341\n", - "description Mistral Medium 3 is a high-performance enterpr...\n", - "context_length 131072\n", - "per_request_limits None\n", - "supported_parameters [tools, tool_choice, max_tokens, temperature, ...\n", - "architecture_modality text+image->text\n", - "architecture_input_modalities [text, image]\n", - "architecture_output_modalities [text]\n", - "architecture_tokenizer Mistral\n", - "architecture_instruct_type None\n", - "pricing_prompt 0.0000004\n", - "pricing_completion 0.000002\n", - "pricing_request 0\n", - "pricing_image 0\n", - "pricing_web_search 0\n", - "pricing_internal_reasoning 0\n", - "top_provider_context_length 131072.0\n", - "top_provider_max_completion_tokens NaN\n", - "top_provider_is_moderated False\n", - "pricing_input_cache_read NaN\n", - "pricing_input_cache_write NaN\n", - "Name: 0, dtype: object" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.iloc[0].T" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "col_names = [\"id\", \"context_length\", \"pricing_prompt\", \"pricing_completion\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id object\n", - "name object\n", - "created int64\n", - "description object\n", - "context_length int64\n", - "per_request_limits object\n", - "supported_parameters object\n", - "architecture_modality object\n", - "architecture_input_modalities object\n", - "architecture_output_modalities object\n", - "architecture_tokenizer object\n", - "architecture_instruct_type object\n", - "pricing_prompt object\n", - "pricing_completion object\n", - "pricing_request object\n", - "pricing_image object\n", - "pricing_web_search object\n", - "pricing_internal_reasoning object\n", - "top_provider_context_length float64\n", - "top_provider_max_completion_tokens float64\n", - "top_provider_is_moderated bool\n", - "pricing_input_cache_read object\n", - "pricing_input_cache_write object\n", - "dtype: object" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.dtypes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.38819875776397517, 'type': 'is_bool'}\n", - "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.5962732919254659, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", - "{'is_numeric': 1.0, 'is_bool': 1.0, 'is_string': 0.0, 'type': 'is_bool'}\n", - "{'is_numeric': 0.13043478260869565, 'is_bool': 0.0, 'is_string': 0.13043478260869565, 'type': 'is_numeric'}\n", - "{'is_numeric': 0.07142857142857142, 'is_bool': 0.0, 'is_string': 0.07142857142857142, 'type': 'is_numeric'}\n" - ] - } - ], - "source": [ - "for col in df.columns:\n", - " print(hpandas.infer_column_types(df[col]))" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
is_numericis_boolis_string
id0.0000000.01.000000
name0.0000000.01.000000
created1.0000000.00.000000
description0.0000000.01.000000
context_length1.0000000.00.000000
per_request_limits0.0000000.00.000000
supported_parameters0.0000000.00.000000
architecture_modality0.0000000.01.000000
architecture_input_modalities0.0000000.00.000000
architecture_output_modalities0.0000000.00.000000
architecture_tokenizer0.0000000.01.000000
architecture_instruct_type0.0000000.00.388199
pricing_prompt1.0000000.01.000000
pricing_completion1.0000000.01.000000
pricing_request0.9968940.00.996894
pricing_image0.9968940.00.996894
pricing_web_search0.9968940.00.996894
pricing_internal_reasoning0.9968940.00.996894
top_provider_context_length0.9968940.00.000000
top_provider_max_completion_tokens0.5962730.00.000000
top_provider_is_moderated1.0000001.00.000000
pricing_input_cache_read0.1304350.00.130435
pricing_input_cache_write0.0714290.00.071429
\n", - "
" - ], - "text/plain": [ - " is_numeric is_bool is_string\n", - "id 0.000000 0.0 1.000000\n", - "name 0.000000 0.0 1.000000\n", - "created 1.000000 0.0 0.000000\n", - "description 0.000000 0.0 1.000000\n", - "context_length 1.000000 0.0 0.000000\n", - "per_request_limits 0.000000 0.0 0.000000\n", - "supported_parameters 0.000000 0.0 0.000000\n", - "architecture_modality 0.000000 0.0 1.000000\n", - "architecture_input_modalities 0.000000 0.0 0.000000\n", - "architecture_output_modalities 0.000000 0.0 0.000000\n", - "architecture_tokenizer 0.000000 0.0 1.000000\n", - "architecture_instruct_type 0.000000 0.0 0.388199\n", - "pricing_prompt 1.000000 0.0 1.000000\n", - "pricing_completion 1.000000 0.0 1.000000\n", - "pricing_request 0.996894 0.0 0.996894\n", - "pricing_image 0.996894 0.0 0.996894\n", - "pricing_web_search 0.996894 0.0 0.996894\n", - "pricing_internal_reasoning 0.996894 0.0 0.996894\n", - "top_provider_context_length 0.996894 0.0 0.000000\n", - "top_provider_max_completion_tokens 0.596273 0.0 0.000000\n", - "top_provider_is_moderated 1.000000 1.0 0.000000\n", - "pricing_input_cache_read 0.130435 0.0 0.130435\n", - "pricing_input_cache_write 0.071429 0.0 0.071429" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.apply(lambda x: pd.Series(hpandas.infer_column_types(x))).T" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": { - "lines_to_next_cell": 2 - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
is_numericis_boolis_stringtype
id0.00.01.0is_bool
name0.00.01.0is_bool
created1.00.00.0is_numeric
description0.00.01.0is_bool
context_length1.00.00.0is_numeric
per_request_limits0.00.00.0is_bool
supported_parameters0.00.00.0is_bool
architecture_modality0.00.01.0is_bool
architecture_input_modalities0.00.00.0is_bool
architecture_output_modalities0.00.00.0is_bool
architecture_tokenizer0.00.01.0is_bool
architecture_instruct_type0.00.00.388199is_bool
pricing_prompt1.00.01.0is_numeric
pricing_completion1.00.01.0is_numeric
pricing_request0.9968940.00.996894is_numeric
pricing_image0.9968940.00.996894is_numeric
pricing_web_search0.9968940.00.996894is_numeric
pricing_internal_reasoning0.9968940.00.996894is_numeric
top_provider_context_length0.9968940.00.0is_numeric
top_provider_max_completion_tokens0.5962730.00.0is_numeric
top_provider_is_moderated1.01.00.0is_bool
pricing_input_cache_read0.1304350.00.130435is_numeric
pricing_input_cache_write0.0714290.00.071429is_numeric
\n", - "
" - ], - "text/plain": [ - " is_numeric is_bool is_string type\n", - "id 0.0 0.0 1.0 is_bool\n", - "name 0.0 0.0 1.0 is_bool\n", - "created 1.0 0.0 0.0 is_numeric\n", - "description 0.0 0.0 1.0 is_bool\n", - "context_length 1.0 0.0 0.0 is_numeric\n", - "per_request_limits 0.0 0.0 0.0 is_bool\n", - "supported_parameters 0.0 0.0 0.0 is_bool\n", - "architecture_modality 0.0 0.0 1.0 is_bool\n", - "architecture_input_modalities 0.0 0.0 0.0 is_bool\n", - "architecture_output_modalities 0.0 0.0 0.0 is_bool\n", - "architecture_tokenizer 0.0 0.0 1.0 is_bool\n", - "architecture_instruct_type 0.0 0.0 0.388199 is_bool\n", - "pricing_prompt 1.0 0.0 1.0 is_numeric\n", - "pricing_completion 1.0 0.0 1.0 is_numeric\n", - "pricing_request 0.996894 0.0 0.996894 is_numeric\n", - "pricing_image 0.996894 0.0 0.996894 is_numeric\n", - "pricing_web_search 0.996894 0.0 0.996894 is_numeric\n", - "pricing_internal_reasoning 0.996894 0.0 0.996894 is_numeric\n", - "top_provider_context_length 0.996894 0.0 0.0 is_numeric\n", - "top_provider_max_completion_tokens 0.596273 0.0 0.0 is_numeric\n", - "top_provider_is_moderated 1.0 1.0 0.0 is_bool\n", - "pricing_input_cache_read 0.130435 0.0 0.130435 is_numeric\n", - "pricing_input_cache_write 0.071429 0.0 0.071429 is_numeric" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hpandas.infer_column_types_df(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0 True\n", - "1 True\n", - "2 True\n", - "3 True\n", - "4 True\n", - "5 True\n", - "6 True\n", - "7 True\n", - "8 True\n", - "9 True\n", - "10 True\n", - "11 True\n", - "12 True\n", - "13 True\n", - "14 True\n", - "15 True\n", - "16 True\n", - "17 True\n", - "18 True\n", - "19 True\n", - "20 True\n", - "21 True\n", - "22 True\n", - "23 True\n", - "24 True\n", - "25 True\n", - "26 True\n", - "27 True\n", - "28 True\n", - "29 True\n", - "30 True\n", - "31 True\n", - "32 True\n", - "33 True\n", - "34 True\n", - "35 True\n", - "36 True\n", - "37 True\n", - "38 True\n", - "39 True\n", - "40 True\n", - "41 True\n", - "42 True\n", - "43 True\n", - "44 True\n", - "45 True\n", - "46 True\n", - "47 True\n", - "48 True\n", - "49 True\n", - "50 True\n", - "51 True\n", - "52 True\n", - "53 True\n", - "54 True\n", - "55 True\n", - "56 True\n", - "57 True\n", - "58 True\n", - "59 True\n", - "60 True\n", - "61 True\n", - "62 True\n", - "63 True\n", - "64 True\n", - "65 True\n", - "66 True\n", - "67 True\n", - "68 True\n", - "69 True\n", - "70 True\n", - "71 True\n", - "72 True\n", - "73 True\n", - "74 True\n", - "75 True\n", - "76 True\n", - "77 True\n", - "78 True\n", - "79 True\n", - "80 True\n", - "81 True\n", - "82 True\n", - "83 True\n", - "84 True\n", - "85 True\n", - "86 True\n", - "87 True\n", - "88 True\n", - "89 True\n", - "90 True\n", - "91 True\n", - "92 True\n", - "93 True\n", - "94 True\n", - "95 True\n", - "96 True\n", - "97 True\n", - "98 True\n", - "99 True\n", - "100 True\n", - "101 True\n", - "102 True\n", - "103 True\n", - "104 True\n", - "105 True\n", - "106 True\n", - "107 True\n", - "108 True\n", - "109 True\n", - "110 True\n", - "111 True\n", - "112 True\n", - "113 True\n", - "114 True\n", - "115 True\n", - "116 True\n", - "117 True\n", - "118 True\n", - "119 True\n", - "120 True\n", - "121 True\n", - "122 True\n", - "123 True\n", - "124 True\n", - "125 True\n", - "126 True\n", - "127 True\n", - "128 True\n", - "129 True\n", - "130 True\n", - "131 True\n", - "132 True\n", - "133 True\n", - "134 True\n", - "135 True\n", - "136 True\n", - "137 True\n", - "138 True\n", - "139 True\n", - "140 True\n", - "141 True\n", - "142 True\n", - "143 True\n", - "144 True\n", - "145 True\n", - "146 True\n", - "147 True\n", - "148 True\n", - "149 True\n", - "150 True\n", - "151 True\n", - "152 True\n", - "153 True\n", - "154 True\n", - "155 True\n", - "156 True\n", - "157 True\n", - "158 True\n", - "159 True\n", - "160 True\n", - "161 True\n", - "162 True\n", - "163 True\n", - "164 True\n", - "165 True\n", - "166 True\n", - "167 True\n", - "168 True\n", - "169 True\n", - "170 True\n", - "171 True\n", - "172 True\n", - "173 True\n", - "174 True\n", - "175 True\n", - "176 True\n", - "177 True\n", - "178 True\n", - "179 True\n", - "180 True\n", - "181 True\n", - "182 True\n", - "183 True\n", - "184 True\n", - "185 True\n", - "186 True\n", - "187 True\n", - "188 True\n", - "189 True\n", - "190 True\n", - "191 True\n", - "192 True\n", - "193 True\n", - "194 True\n", - "195 True\n", - "196 True\n", - "197 True\n", - "198 True\n", - "199 True\n", - "200 True\n", - "201 True\n", - "202 True\n", - "203 True\n", - "204 True\n", - "205 True\n", - "206 True\n", - "207 True\n", - "208 True\n", - "209 True\n", - "210 True\n", - "211 True\n", - "212 True\n", - "213 True\n", - "214 True\n", - "215 True\n", - "216 True\n", - "217 True\n", - "218 True\n", - "219 True\n", - "220 True\n", - "221 True\n", - "222 True\n", - "223 True\n", - "224 True\n", - "225 True\n", - "226 True\n", - "227 True\n", - "228 True\n", - "229 True\n", - "230 True\n", - "231 True\n", - "232 True\n", - "233 True\n", - "234 True\n", - "235 True\n", - "236 True\n", - "237 True\n", - "238 True\n", - "239 True\n", - "240 True\n", - "241 True\n", - "242 True\n", - "243 True\n", - "244 True\n", - "245 True\n", - "246 True\n", - "247 True\n", - "248 True\n", - "249 True\n", - "250 True\n", - "251 True\n", - "252 True\n", - "253 True\n", - "254 True\n", - "255 True\n", - "256 True\n", - "257 True\n", - "258 True\n", - "259 True\n", - "260 True\n", - "261 True\n", - "262 True\n", - "263 True\n", - "264 True\n", - "265 True\n", - "266 True\n", - "267 True\n", - "268 True\n", - "269 True\n", - "270 True\n", - "271 True\n", - "272 True\n", - "273 True\n", - "274 True\n", - "275 True\n", - "276 True\n", - "277 True\n", - "278 True\n", - "279 True\n", - "280 True\n", - "281 True\n", - "282 True\n", - "283 True\n", - "284 True\n", - "285 True\n", - "286 True\n", - "287 True\n", - "288 True\n", - "289 True\n", - "290 True\n", - "291 True\n", - "292 True\n", - "293 True\n", - "294 True\n", - "295 True\n", - "296 True\n", - "297 True\n", - "298 True\n", - "299 True\n", - "300 True\n", - "301 True\n", - "302 False\n", - "303 True\n", - "304 True\n", - "305 True\n", - "306 True\n", - "307 True\n", - "308 True\n", - "309 True\n", - "310 True\n", - "311 True\n", - "312 True\n", - "313 True\n", - "314 True\n", - "315 True\n", - "316 True\n", - "317 True\n", - "318 True\n", - "319 True\n", - "320 True\n", - "321 True\n", - "Name: pricing_request, dtype: bool" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.to_numeric(df[\"pricing_request\"], errors=\"coerce\").notna()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0.000002\n", - "1 0.00001\n", - "2 0.00000085\n", - "3 0.00000018\n", - "4 0.0000033\n", - "5 0.0000012\n", - "6 0.0000008\n", - "7 0.0000008\n", - "8 0.00000075\n", - "9 0\n", - "10 0.00000035\n", - "11 0\n", - "12 0\n", - "13 0.000001\n", - "14 0\n", - "15 0\n", - "16 0\n", - "17 0\n", - "18 0\n", - "19 0.00000218\n", - "20 0.00000005\n", - "21 0\n", - "22 0.0000003\n", - "23 0\n", - "24 0.000000138\n", - "25 0\n", - "26 0.00000024\n", - "27 0\n", - "28 0.0000003\n", - "29 0\n", - "30 0.000002\n", - "31 0\n", - "32 0.00000024\n", - "33 0\n", - "34 0\n", - "35 0\n", - "36 0\n", - "37 0.00000024\n", - "38 0\n", - "39 0.00000024\n", - "40 0.0000006\n", - "41 0.0000035\n", - "42 0.0000044\n", - "43 0.00004\n", - "44 0.0000044\n", - "45 0\n", - "46 0.00000003\n", - "47 0.000008\n", - "48 0.0000016\n", - "49 0.0000004\n", - "50 0.0000012\n", - "51 0.0000012\n", - "52 0\n", - "53 0\n", - "54 0\n", - "55 0.0000005\n", - "56 0.000015\n", - "57 0\n", - "58 0.0000004\n", - "59 0\n", - "60 0\n", - "61 0.0000006\n", - "62 0\n", - "63 0.0000003\n", - "64 0.0000034\n", - "65 0.0000001\n", - "66 0\n", - "67 0.00000018\n", - "68 0.00000088\n", - "69 0\n", - "70 0\n", - "71 0\n", - "72 0\n", - "73 0\n", - "74 0.0000009\n", - "75 0\n", - "76 0.00000088\n", - "77 0\n", - "78 0.0006\n", - "79 0\n", - "80 0.00000015\n", - "81 0\n", - "82 0\n", - "83 0\n", - "84 0.00000004\n", - "85 0.000008\n", - "86 0.0000004\n", - "87 0\n", - "88 0.0000001\n", - "89 0.00001\n", - "90 0.0000006\n", - "91 0.00001\n", - "92 0\n", - "93 0\n", - "94 0.0000002\n", - "95 0.000001\n", - "96 0.0000008\n", - "97 0.0000001\n", - "98 0.000008\n", - "99 0.000015\n", - "100 0.000008\n", - "101 0\n", - "102 0\n", - "103 0.0000002\n", - "104 0\n", - "105 0\n", - "106 0.00015\n", - "107 0.0000003\n", - "108 0.000015\n", - "109 0.000015\n", - "110 0.000015\n", - "111 0.000008\n", - "112 0.0000006\n", - "113 0\n", - "114 0\n", - "115 0.00000006\n", - "116 0.0000044\n", - "117 0.00000004\n", - "118 0.0000004\n", - "119 0.00000063\n", - "120 0.000008\n", - "121 0.0000014\n", - "122 0.0000002\n", - "123 0.0000032\n", - "124 0.0000002\n", - "125 0\n", - "126 0.00000075\n", - "127 0.0000012\n", - "128 0.0000064\n", - "129 0.0000044\n", - "130 0.00000018\n", - "131 0\n", - "132 0.00000012\n", - "133 0\n", - "134 0.00000018\n", - "135 0\n", - "136 0.00000015\n", - "137 0.000005\n", - "138 0.000001\n", - "139 0.00000001\n", - "140 0.00000002\n", - "141 0\n", - "142 0.0000004\n", - "143 0\n", - "144 0.00000218\n", - "145 0.0000011\n", - "146 0.0000009\n", - "147 0.00000014\n", - "148 0\n", - "149 0.00000089\n", - "150 0.0000008\n", - "151 0.00006\n", - "152 0.000006\n", - "153 0.00001\n", - "154 0.00001\n", - "155 0.00000015\n", - "156 0\n", - "157 0\n", - "158 0.00000035\n", - "159 0.00000024\n", - "160 0.00000014\n", - "161 0.0000032\n", - "162 0\n", - "163 0.00000027\n", - "164 0\n", - "165 0.000006\n", - "166 0.00001\n", - "167 0.000006\n", - "168 0.000006\n", - "169 0.000006\n", - "170 0.000015\n", - "171 0.0000012\n", - "172 0\n", - "173 0.00000015\n", - "174 0.0000045\n", - "175 0.0000034\n", - "176 0.00000045\n", - "177 0.000004\n", - "178 0.000004\n", - "179 0.000004\n", - "180 0.000004\n", - "181 0.00000225\n", - "182 0.00000225\n", - "183 0.000015\n", - "184 0.000015\n", - "185 0.000015\n", - "186 0.0000001\n", - "187 0.00000004\n", - "188 0\n", - "189 0.0000001\n", - "190 0.0000003\n", - "191 0.00001\n", - "192 0.00001\n", - "193 0.00000015\n", - "194 0.0000005\n", - "195 0.000003\n", - "196 0.00000015\n", - "197 0\n", - "198 0.00000002\n", - "199 0\n", - "200 0.00000001\n", - "201 0.0000012\n", - "202 0\n", - "203 0.000000049\n", - "204 0\n", - "205 0.00000039\n", - "206 0.0000006\n", - "207 0.00000075\n", - "208 0.00006\n", - "209 0.00006\n", - "210 0.0000044\n", - "211 0.0000044\n", - "212 0.0000001\n", - "213 0.00001\n", - "214 0.0000006\n", - "215 0\n", - "216 0.0000002\n", - "217 0.0000008\n", - "218 0\n", - "219 0.00000009\n", - "220 0.0000003\n", - "221 0.0000008\n", - "222 0.000015\n", - "223 0.00000005\n", - "224 0.0000012\n", - "225 0.00001\n", - "226 0\n", - "227 0.000002\n", - "228 0.0000012\n", - "229 0.0000002\n", - "230 0.000001\n", - "231 0\n", - "232 0.00000003\n", - "233 0.0000008\n", - "234 0.00000028\n", - "235 0.00000025\n", - "236 0\n", - "237 0.00000007\n", - "238 0.0000006\n", - "239 0.0000006\n", - "240 0.0000003\n", - "241 0.000006\n", - "242 0\n", - "243 0.00000006\n", - "244 0.000003\n", - "245 0.0000007\n", - "246 0.000015\n", - "247 0.000015\n", - "248 0.00000148\n", - "249 0.0000009\n", - "250 0.0000009\n", - "251 0\n", - "252 0.000000054\n", - "253 0.00000004\n", - "254 0.000000054\n", - "255 0.0000001\n", - "256 0.0000003\n", - "257 0.000006\n", - "258 0.00000012\n", - "259 0.0000003\n", - "260 0.00001\n", - "261 0.000018\n", - "262 0.0000002\n", - "263 0.000015\n", - "264 0.00000024\n", - "265 0.00000075\n", - "266 0.00000075\n", - "267 0.0000012\n", - "268 0.00000006\n", - "269 0.0000004\n", - "270 0.0000012\n", - "271 0.0000005\n", - "272 0.000005\n", - "273 0.00003\n", - "274 0.000015\n", - "275 0.000015\n", - "276 0.0000008\n", - "277 0.000002\n", - "278 0.0000015\n", - "279 0.00000125\n", - "280 0.00000125\n", - "281 0.000075\n", - "282 0.000075\n", - "283 0.000015\n", - "284 0.000015\n", - "285 0.0000015\n", - "286 0.000006\n", - "287 0.000002\n", - "288 0.00003\n", - "289 0.0000006\n", - "290 0.0000081\n", - "291 0.0000006\n", - "292 0.00000025\n", - "293 0.0000002\n", - "294 0.00000024\n", - "295 0.0000015\n", - "296 0.000024\n", - "297 0.000024\n", - "298 0.000024\n", - "299 0.000024\n", - "300 0.0000012\n", - "301 0.000009375\n", - "302 -1\n", - "303 0.000002\n", - "304 0.00003\n", - "305 0.0000005\n", - "306 0.000002\n", - "307 0.00000019\n", - "308 0.000001125\n", - "309 0.000004\n", - "310 0.00012\n", - "311 0.00012\n", - "312 0.000001125\n", - "313 0.000024\n", - "314 0.000024\n", - "315 0.000001125\n", - "316 0.000000065\n", - "317 0.0000009\n", - "318 0.0000015\n", - "319 0.0000015\n", - "320 0.00006\n", - "321 0.00006\n", - "Name: pricing_completion, dtype: object" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[\"pricing_completion\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcontext_lengthpricing_promptpricing_completion
302openrouter/auto2000000-1-1
133deepseek/deepseek-r1-distill-qwen-32b:free1600000
59nvidia/llama-3.1-nemotron-ultra-253b-v1:free13107200
113cognitivecomputations/dolphin3.0-r1-mistral-24...3276800
57nvidia/llama-3.3-nemotron-super-49b-v1:free13107200
114cognitivecomputations/dolphin3.0-mistral-24b:free3276800
54moonshotai/kimi-vl-a3b-thinking:free13107200
53agentica-org/deepcoder-14b-preview:free9600000
52arliai/qwq-32b-arliai-rpr-v1:free3276800
231meta-llama/llama-3.1-8b-instruct:free13107200
226meta-llama/llama-3.1-405b:free6400000
125qwen/qwen2.5-vl-72b-instruct:free13107200
45shisa-ai/shisa-v2-llama3.3-70b:free3276800
87google/gemma-3-12b-it:free13107200
92rekaai/reka-flash-3:free3276800
131mistralai/mistral-small-24b-instruct-2501:free3276800
81open-r1/olympiccoder-32b:free3276800
60meta-llama/llama-4-maverick:free25600000
236mistralai/mistral-nemo:free12800000
62meta-llama/llama-4-scout:free51200000
83google/gemma-3-4b-it:free13107200
93google/gemma-3-27b-it:free9600000
79mistralai/mistral-small-3.1-24b-instruct:free9600000
251mistralai/mistral-7b-instruct:free3276800
77featherless/qwerky-72b:free3276800
75deepseek/deepseek-chat-v3-0324:free16384000
242google/gemma-2-9b-it:free819200
73qwen/qwen2.5-vl-32b-instruct:free819200
135deepseek/deepseek-r1-distill-qwen-14b:free6400000
72google/gemini-2.5-pro-exp-03-25100000000
70bytedance-research/ui-tars-72b:free3276800
69allenai/molmo-7b-d:free409600
101deepseek/deepseek-r1-zero:free16384000
102qwen/qwq-32b:free4000000
66deepseek/deepseek-v3-base:free16384000
104moonshotai/moonlight-16b-a3b-instruct:free819200
105nousresearch/deephermes-3-llama-3-8b-preview:free13107200
71qwen/qwen2.5-vl-3b-instruct:free6400000
218google/gemini-flash-1.5-8b-exp100000000
82google/gemma-3-1b-it:free3276800
156google/gemini-2.0-flash-exp:free104857600
204qwen/qwen-2.5-72b-instruct:free3276800
21qwen/qwen3-30b-a3b:free4096000
38thudm/glm-4-32b:free3276800
157meta-llama/llama-3.3-70b-instruct:free800000
18deepseek/deepseek-prover-v2:free16384000
17opengvlab/internvl3-2b:free3200000
23qwen/qwen3-8b:free4096000
16opengvlab/internvl3-14b:free3200000
14qwen/qwen3-1.7b:free3200000
202meta-llama/llama-3.2-11b-vision-instruct:free13107200
12qwen/qwen3-0.6b-04-28:free3200000
11microsoft/phi-4-reasoning:free3276800
162qwen/qwq-32b-preview:free1638400
9microsoft/phi-4-reasoning-plus:free3276800
15qwen/qwen3-4b:free12800000
164google/learnlm-1.5-pro-experimental:free4096000
148deepseek/deepseek-chat:free16384000
199meta-llama/llama-3.2-1b-instruct:free13100000
36thudm/glm-z1-32b:free3276800
35microsoft/mai-ds-r1:free16384000
34thudm/glm-4-9b:free3200000
33thudm/glm-z1-9b:free3200000
188qwen/qwen-2.5-7b-instruct:free3276800
172qwen/qwen-2.5-coder-32b-instruct:free3276800
25qwen/qwen3-14b:free4096000
197meta-llama/llama-3.2-3b-instruct:free2000000
141deepseek/deepseek-r1-distill-llama-70b:free819200
29qwen/qwen3-235b-a22b:free4096000
143deepseek/deepseek-r1:free16384000
27qwen/qwen3-32b:free4096000
215qwen/qwen-2.5-vl-7b-instruct:free6400000
31tngtech/deepseek-r1t-chimera:free16384000
200meta-llama/llama-3.2-1b-instruct1310720.0000000050.00000001
198meta-llama/llama-3.2-3b-instruct1310720.000000010.00000002
139liquid/lfm-7b327680.000000010.00000001
46qwen/qwen2.5-coder-7b-instruct327680.000000010.00000003
243google/gemma-2-9b-it81920.000000020.00000006
232meta-llama/llama-3.1-8b-instruct163840.000000020.00000003
84google/gemma-3-4b-it1310720.000000020.00000004
140liquid/lfm-3b327680.000000020.00000002
115meta-llama/llama-guard-3-8b1310720.000000020.00000006
223sao10k/l3-lunaris-8b81920.000000020.00000005
253nousresearch/hermes-2-pro-llama-3-8b1310720.0000000250.00000004
254mistralai/mistral-7b-instruct-v0.3327680.0000000280.000000054
252mistralai/mistral-7b-instruct327680.0000000280.000000054
268meta-llama/llama-3-8b-instruct81920.000000030.00000006
219microsoft/phi-3.5-mini-128k-instruct1310720.000000030.00000009
237mistralai/mistral-nemo983040.000000030.00000007
160amazon/nova-micro-v11280000.0000000350.00000014
24qwen/qwen3-8b1280000.0000000350.000000138
193google/gemini-flash-1.5-8b10000000.00000003750.00000015
155cohere/command-r7b-12-20241280000.00000003750.00000015
187mistralai/ministral-3b1310720.000000040.00000004
117deepseek/deepseek-r1-distill-llama-8b320000.000000040.00000004
258deepseek/deepseek-coder1280000.000000040.00000012
203meta-llama/llama-3.2-11b-vision-instruct1310720.0000000490.000000049
80mistralai/mistral-small-3.1-24b-instruct1310720.000000050.00000015
20meta-llama/llama-guard-4-12b1638400.000000050.00000005
189qwen/qwen-2.5-7b-instruct327680.000000050.0000001
97microsoft/phi-4-multimodal-instruct1310720.000000050.0000001
88google/gemma-3-12b-it1310720.000000050.0000001
124qwen/qwen-turbo10000000.000000050.0000002
132mistralai/mistral-small-24b-instruct-2501280000.000000060.00000012
173qwen/qwen-2.5-coder-32b-instruct327680.000000060.00000015
159amazon/nova-lite-v13000000.000000060.00000024
316gryphe/mythomax-l2-13b40960.0000000650.000000065
10microsoft/phi-4-reasoning-plus327680.000000070.00000035
147microsoft/phi-4163840.000000070.00000014
26qwen/qwen3-14b409600.000000070.00000024
259google/gemini-flash-1.510000000.0000000750.0000003
107google/gemini-2.0-flash-lite-00110485760.0000000750.0000003
63meta-llama/llama-4-scout10485760.000000080.0000003
294mistralai/mixtral-8x7b-instruct327680.000000080.00000024
264allenai/olmo-7b-instruct20480.000000080.00000024
163qwen/qwq-32b-preview327680.000000090.00000027
158meta-llama/llama-3.3-70b-instruct1310000.000000090.00000035
266neversleep/llama-3-lumimaid-8b245760.000000093750.00000075
207neversleep/llama-3.1-lumimaid-8b327680.000000093750.00000075
265neversleep/llama-3-lumimaid-8b:extended245760.000000093750.00000075
212mistralai/pixtral-12b327680.00000010.0000001
28qwen/qwen3-32b409600.00000010.0000003
49openai/gpt-4.1-nano10475760.00000010.0000004
186mistralai/ministral-8b1280000.00000010.0000001
118google/gemini-2.0-flash-00110000000.00000010.0000004
234meta-llama/llama-3.1-70b-instruct1310720.00000010.00000028
65mistral/ministral-8b1310720.00000010.0000001
240google/gemma-2-27b-it81920.00000010.0000003
256microsoft/phi-3-medium-128k-instruct1310720.00000010.0000003
255microsoft/phi-3-mini-128k-instruct1280000.00000010.0000001
94google/gemma-3-27b-it1310720.00000010.0000002
22qwen/qwen3-30b-a3b409600.00000010.0000003
142deepseek/deepseek-r1-distill-llama-70b1310720.00000010.0000004
307mistralai/mistral-7b-instruct-v0.128240.000000110.00000019
205qwen/qwen-2.5-72b-instruct327680.000000120.00000039
190nvidia/llama-3.1-nemotron-70b-instruct1310720.000000120.0000003
134deepseek/deepseek-r1-distill-qwen-32b1310720.000000120.00000018
220nousresearch/hermes-3-llama-3.1-70b1310720.000000120.0000003
58nvidia/llama-3.3-nemotron-super-49b-v11310720.000000130.0000004
30qwen/qwen3-235b-a22b409600.000000140.000002
90openai/gpt-4o-mini-search-preview1280000.000000150.0000006
136deepseek/deepseek-r1-distill-qwen-14b640000.000000150.00000015
214cohere/command-r-08-20241280000.000000150.0000006
238openai/gpt-4o-mini1280000.000000150.0000006
103qwen/qwq-32b1310720.000000150.0000002
196liquid/lfm-40b327680.000000150.00000015
41google/gemini-2.5-flash-preview:thinking10485760.000000150.0000035
40google/gemini-2.5-flash-preview10485760.000000150.0000006
239openai/gpt-4o-mini-2024-07-181280000.000000150.0000006
61meta-llama/llama-4-maverick10485760.000000170.0000006
67scb10x/llama3.1-typhoon2-8b-instruct81920.000000180.00000018
130deepseek/deepseek-r1-distill-qwen-1.5b1310720.000000180.00000018
3arcee-ai/spotlight1310720.000000180.00000018
216qwen/qwen-2.5-vl-7b-instruct327680.00000020.0000002
262meta-llama/llama-guard-2-8b81920.00000020.0000002
293mistralai/mistral-7b-instruct-v0.2327680.00000020.0000002
86ai21/jamba-1.6-mini2560000.00000020.0000004
122aion-labs/aion-rp-llama-3.1-8b327680.00000020.0000002
229perplexity/llama-3.1-sonar-small-128k-online1270720.00000020.0000002
145minimax/minimax-0110001920.00000020.0000011
291mistralai/mistral-small327680.00000020.0000006
112mistralai/mistral-saba327680.00000020.0000006
119qwen/qwen-vl-plus75000.000000210.00000063
39thudm/glm-4-32b320000.000000240.00000024
32thudm/glm-z1-rumination-32b320000.000000240.00000024
37thudm/glm-z1-32b320000.000000240.00000024
13inception/mercury-coder-small-beta320000.000000250.000001
292mistralai/mistral-tiny327680.000000250.00000025
126qwen/qwen2.5-vl-72b-instruct320000.000000250.00000075
194thedrummer/rocinante-12b327680.000000250.0000005
279anthropic/claude-3-haiku:beta2000000.000000250.00000125
235mistralai/codestral-mamba2621440.000000250.00000025
280anthropic/claude-3-haiku2000000.000000250.00000125
55x-ai/grok-3-mini-beta1310720.00000030.0000005
269meta-llama/llama-3-70b-instruct81920.00000030.0000004
146mistralai/codestral-25012621440.00000030.0000009
76deepseek/deepseek-chat-v3-03241638400.00000030.00000088
149deepseek/deepseek-chat1638400.000000380.00000089
270mistralai/mixtral-8x22b-instruct655360.00000040.0000012
0mistralai/mistral-medium-31310720.00000040.000002
48openai/gpt-4.1-mini10475760.00000040.0000016
127qwen/qwen-plus1310720.00000040.0000012
8arcee-ai/arcee-blitz327680.000000450.00000075
176thedrummer/unslopnemo-12b320000.000000450.00000045
278cohere/command-r1280000.00000050.0000015
19deepseek/deepseek-prover-v21310720.00000050.00000218
285cohere/command-r-03-20241280000.00000050.0000015
7arcee-ai/virtuoso-medium-v21310720.00000050.0000008
6arcee-ai/coder-large327680.00000050.0000008
271microsoft/wizardlm-2-8x22b655360.00000050.0000005
96thedrummer/skyfall-36b-v2327680.00000050.0000008
144deepseek/deepseek-r11638400.00000050.00000218
305jondurbin/airoboros-l2-70b40960.00000050.0000005
318openai/gpt-3.5-turbo163850.00000050.0000015
319openai/gpt-3.5-turbo-0125163850.00000050.0000015
245ai21/jamba-instruct2560000.00000050.0000007
2arcee-ai/caller-large327680.000000550.00000085
308pygmalionai/mythalion-13b81920.00000056250.000001125
315undi95/remm-slerp-l2-13b61440.00000056250.000001125
206qwen/qwen-2.5-vl-72b-instruct327680.00000060.0000006
289nousresearch/nous-hermes-2-mixtral-8x7b-dpo327680.00000060.0000006
150sao10k/l3.3-euryale-70b1310720.00000070.0000008
121aion-labs/aion-1.0-mini1310720.00000070.0000014
217sao10k/l3.1-euryale-70b1310720.00000070.0000008
5arcee-ai/virtuoso-large1310720.000000750.0000012
295neversleep/noromaid-20b81920.000000750.0000015
221nousresearch/hermes-3-llama-3.1-405b1310720.00000080.0000008
233meta-llama/llama-3.1-405b-instruct327680.00000080.0000008
224aetherwiing/mn-starcannon-12b163840.00000080.0000012
179anthropic/claude-3.5-haiku-20241022:beta2000000.00000080.000004
95thedrummer/anubis-pro-105b-v11310720.00000080.000001
180anthropic/claude-3.5-haiku-202410222000000.00000080.000004
51alfredpros/codellama-7b-instruct-solidity40960.00000080.0000012
50eleutherai/llemma_7b40960.00000080.0000012
267sao10k/fimbulvetr-11b-v240960.00000080.0000012
276sophosympatheia/midnight-rose-70b40960.00000080.0000008
123qwen/qwen-vl-max75000.00000080.0000032
161amazon/nova-pro-v13000000.00000080.0000032
171infermatic/mn-inferor-12b163840.00000080.0000012
300undi95/toppy-m-7b40960.00000080.0000012
177anthropic/claude-3.5-haiku:beta2000000.00000080.000004
178anthropic/claude-3.5-haiku2000000.00000080.000004
228nothingiisreal/mn-celeste-12b163840.00000080.0000012
68scb10x/llama3.1-typhoon2-70b-instruct81920.000000880.00000088
74qwen/qwen2.5-vl-32b-instruct1280000.00000090.0000009
249cognitivecomputations/dolphin-mixtral-8x22b160000.00000090.0000009
4arcee-ai/maestro-reasoning1310720.00000090.0000033
317meta-llama/llama-2-70b-chat40960.00000090.0000009
250qwen/qwen-2-72b-instruct327680.00000090.0000009
230perplexity/llama-3.1-sonar-large-128k-online1270720.0000010.000001
287openai/gpt-3.5-turbo-061340950.0000010.000002
277cohere/command40960.0000010.000002
138perplexity/sonar1270720.0000010.000001
137perplexity/sonar-reasoning1270000.0000010.000005
303openai/gpt-3.5-turbo-1106163850.0000010.000002
42openai/o4-mini-high2000000.00000110.0000044
210openai/o1-mini1280000.00000110.0000044
211openai/o1-mini-2024-09-121280000.00000110.0000044
44openai/o4-mini2000000.00000110.0000044
129openai/o3-mini2000000.00000110.0000044
116openai/o3-mini-high2000000.00000110.0000044
312mancer/weaver80000.0000011250.000001125
201meta-llama/llama-3.2-90b-vision-instruct1310720.00000120.0000012
272google/gemini-pro-1.520000000.000001250.000005
1google/gemini-2.5-pro-preview10485760.000001250.00001
248sao10k/l3-euryale-70b81920.000001480.00000148
181neversleep/llama-3.1-lumimaid-70b163840.00000150.00000225
306openai/gpt-3.5-turbo-instruct40950.00000150.000002
182anthracite-org/magnum-v4-72b163840.00000150.00000225
128qwen/qwen-max327680.00000160.0000064
169mistralai/pixtral-large-24111310720.0000020.000006
286mistralai/mistral-large1280000.0000020.000006
85ai21/jamba-1.6-large2560000.0000020.000008
154x-ai/grok-2-12121310720.0000020.00001
47openai/gpt-4.110475760.0000020.000008
100perplexity/sonar-deep-research1280000.0000020.000008
227meta-llama/llama-3.1-405b327680.0000020.000002
153x-ai/grok-2-vision-1212327680.0000020.00001
168mistralai/mistral-large-24071310720.0000020.000006
98perplexity/sonar-reasoning-pro1280000.0000020.000008
111perplexity/r1-17761280000.0000020.000008
167mistralai/mistral-large-24111310720.0000020.000006
166openai/gpt-4o-2024-11-201280000.00000250.00001
225openai/gpt-4o-2024-08-061280000.00000250.00001
260openai/gpt-4o1280000.00000250.00001
192inflection/inflection-3-pi80000.00000250.00001
91openai/gpt-4o-search-preview1280000.00000250.00001
213cohere/command-r-plus-08-20241280000.00000250.00001
191inflection/inflection-3-productivity80000.00000250.00001
89cohere/command-a2560000.00000250.00001
64all-hands/openhands-lm-32b-v0.1163840.00000260.0000034
175eva-unit-01/eva-qwen-2.5-32b163840.00000260.0000034
290mistralai/mistral-medium327680.000002750.0000081
195anthracite-org/magnum-v2-72b327680.0000030.000003
284anthropic/claude-3-sonnet2000000.0000030.000015
283anthropic/claude-3-sonnet:beta2000000.0000030.000015
309openai/gpt-3.5-turbo-16k163850.0000030.000004
184anthropic/claude-3.5-sonnet2000000.0000030.000015
183anthropic/claude-3.5-sonnet:beta2000000.0000030.000015
275cohere/command-r-plus-04-20241280000.0000030.000015
274cohere/command-r-plus1280000.0000030.000015
109anthropic/claude-3.7-sonnet:thinking2000000.0000030.000015
110anthropic/claude-3.7-sonnet:beta2000000.0000030.000015
99perplexity/sonar-pro2000000.0000030.000015
24401-ai/yi-large327680.0000030.000003
246anthropic/claude-3.5-sonnet-20240620:beta2000000.0000030.000015
247anthropic/claude-3.5-sonnet-202406202000000.0000030.000015
56x-ai/grok-3-beta1310720.0000030.000015
108anthropic/claude-3.7-sonnet2000000.0000030.000015
152eva-unit-01/eva-llama-3.33-70b163840.0000040.000006
257neversleep/llama-3-lumimaid-70b81920.0000040.000006
241alpindale/magnum-72b163840.0000040.000006
165eva-unit-01/eva-qwen-2.5-72b163840.0000040.000006
120aion-labs/aion-1.01310720.0000040.000008
174raifle/sorcererlm-8x22b160000.00000450.0000045
263openai/gpt-4o-2024-05-131280000.0000050.000015
222openai/chatgpt-4o-latest1280000.0000050.000015
170x-ai/grok-vision-beta81920.0000050.000015
185x-ai/grok-beta1310720.0000050.000015
261openai/gpt-4o:extended1280000.0000060.000018
301alpindale/goliath-120b61440.00000656250.000009375
313anthropic/claude-2.0:beta1000000.0000080.000024
297anthropic/claude-2.12000000.0000080.000024
299anthropic/claude-22000000.0000080.000024
298anthropic/claude-2:beta2000000.0000080.000024
314anthropic/claude-2.01000000.0000080.000024
296anthropic/claude-2.1:beta2000000.0000080.000024
304openai/gpt-4-1106-preview1280000.000010.00003
43openai/o32000000.000010.00004
273openai/gpt-4-turbo1280000.000010.00003
288openai/gpt-4-turbo-preview1280000.000010.00003
151openai/o12000000.0000150.00006
282anthropic/claude-3-opus2000000.0000150.000075
281anthropic/claude-3-opus:beta2000000.0000150.000075
208openai/o1-preview1280000.0000150.00006
209openai/o1-preview-2024-09-121280000.0000150.00006
321openai/gpt-4-031481910.000030.00006
320openai/gpt-481910.000030.00006
311openai/gpt-4-32k-0314327670.000060.00012
310openai/gpt-4-32k327670.000060.00012
106openai/gpt-4.5-preview1280000.0000750.00015
78openai/o1-pro2000000.000150.0006
\n", - "
" - ], - "text/plain": [ - " id context_length pricing_prompt pricing_completion\n", - "302 openrouter/auto 2000000 -1 -1\n", - "133 deepseek/deepseek-r1-distill-qwen-32b:free 16000 0 0\n", - "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free 131072 0 0\n", - "113 cognitivecomputations/dolphin3.0-r1-mistral-24... 32768 0 0\n", - "57 nvidia/llama-3.3-nemotron-super-49b-v1:free 131072 0 0\n", - "114 cognitivecomputations/dolphin3.0-mistral-24b:free 32768 0 0\n", - "54 moonshotai/kimi-vl-a3b-thinking:free 131072 0 0\n", - "53 agentica-org/deepcoder-14b-preview:free 96000 0 0\n", - "52 arliai/qwq-32b-arliai-rpr-v1:free 32768 0 0\n", - "231 meta-llama/llama-3.1-8b-instruct:free 131072 0 0\n", - "226 meta-llama/llama-3.1-405b:free 64000 0 0\n", - "125 qwen/qwen2.5-vl-72b-instruct:free 131072 0 0\n", - "45 shisa-ai/shisa-v2-llama3.3-70b:free 32768 0 0\n", - "87 google/gemma-3-12b-it:free 131072 0 0\n", - "92 rekaai/reka-flash-3:free 32768 0 0\n", - "131 mistralai/mistral-small-24b-instruct-2501:free 32768 0 0\n", - "81 open-r1/olympiccoder-32b:free 32768 0 0\n", - "60 meta-llama/llama-4-maverick:free 256000 0 0\n", - "236 mistralai/mistral-nemo:free 128000 0 0\n", - "62 meta-llama/llama-4-scout:free 512000 0 0\n", - "83 google/gemma-3-4b-it:free 131072 0 0\n", - "93 google/gemma-3-27b-it:free 96000 0 0\n", - "79 mistralai/mistral-small-3.1-24b-instruct:free 96000 0 0\n", - "251 mistralai/mistral-7b-instruct:free 32768 0 0\n", - "77 featherless/qwerky-72b:free 32768 0 0\n", - "75 deepseek/deepseek-chat-v3-0324:free 163840 0 0\n", - "242 google/gemma-2-9b-it:free 8192 0 0\n", - "73 qwen/qwen2.5-vl-32b-instruct:free 8192 0 0\n", - "135 deepseek/deepseek-r1-distill-qwen-14b:free 64000 0 0\n", - "72 google/gemini-2.5-pro-exp-03-25 1000000 0 0\n", - "70 bytedance-research/ui-tars-72b:free 32768 0 0\n", - "69 allenai/molmo-7b-d:free 4096 0 0\n", - "101 deepseek/deepseek-r1-zero:free 163840 0 0\n", - "102 qwen/qwq-32b:free 40000 0 0\n", - "66 deepseek/deepseek-v3-base:free 163840 0 0\n", - "104 moonshotai/moonlight-16b-a3b-instruct:free 8192 0 0\n", - "105 nousresearch/deephermes-3-llama-3-8b-preview:free 131072 0 0\n", - "71 qwen/qwen2.5-vl-3b-instruct:free 64000 0 0\n", - "218 google/gemini-flash-1.5-8b-exp 1000000 0 0\n", - "82 google/gemma-3-1b-it:free 32768 0 0\n", - "156 google/gemini-2.0-flash-exp:free 1048576 0 0\n", - "204 qwen/qwen-2.5-72b-instruct:free 32768 0 0\n", - "21 qwen/qwen3-30b-a3b:free 40960 0 0\n", - "38 thudm/glm-4-32b:free 32768 0 0\n", - "157 meta-llama/llama-3.3-70b-instruct:free 8000 0 0\n", - "18 deepseek/deepseek-prover-v2:free 163840 0 0\n", - "17 opengvlab/internvl3-2b:free 32000 0 0\n", - "23 qwen/qwen3-8b:free 40960 0 0\n", - "16 opengvlab/internvl3-14b:free 32000 0 0\n", - "14 qwen/qwen3-1.7b:free 32000 0 0\n", - "202 meta-llama/llama-3.2-11b-vision-instruct:free 131072 0 0\n", - "12 qwen/qwen3-0.6b-04-28:free 32000 0 0\n", - "11 microsoft/phi-4-reasoning:free 32768 0 0\n", - "162 qwen/qwq-32b-preview:free 16384 0 0\n", - "9 microsoft/phi-4-reasoning-plus:free 32768 0 0\n", - "15 qwen/qwen3-4b:free 128000 0 0\n", - "164 google/learnlm-1.5-pro-experimental:free 40960 0 0\n", - "148 deepseek/deepseek-chat:free 163840 0 0\n", - "199 meta-llama/llama-3.2-1b-instruct:free 131000 0 0\n", - "36 thudm/glm-z1-32b:free 32768 0 0\n", - "35 microsoft/mai-ds-r1:free 163840 0 0\n", - "34 thudm/glm-4-9b:free 32000 0 0\n", - "33 thudm/glm-z1-9b:free 32000 0 0\n", - "188 qwen/qwen-2.5-7b-instruct:free 32768 0 0\n", - "172 qwen/qwen-2.5-coder-32b-instruct:free 32768 0 0\n", - "25 qwen/qwen3-14b:free 40960 0 0\n", - "197 meta-llama/llama-3.2-3b-instruct:free 20000 0 0\n", - "141 deepseek/deepseek-r1-distill-llama-70b:free 8192 0 0\n", - "29 qwen/qwen3-235b-a22b:free 40960 0 0\n", - "143 deepseek/deepseek-r1:free 163840 0 0\n", - "27 qwen/qwen3-32b:free 40960 0 0\n", - "215 qwen/qwen-2.5-vl-7b-instruct:free 64000 0 0\n", - "31 tngtech/deepseek-r1t-chimera:free 163840 0 0\n", - "200 meta-llama/llama-3.2-1b-instruct 131072 0.000000005 0.00000001\n", - "198 meta-llama/llama-3.2-3b-instruct 131072 0.00000001 0.00000002\n", - "139 liquid/lfm-7b 32768 0.00000001 0.00000001\n", - "46 qwen/qwen2.5-coder-7b-instruct 32768 0.00000001 0.00000003\n", - "243 google/gemma-2-9b-it 8192 0.00000002 0.00000006\n", - "232 meta-llama/llama-3.1-8b-instruct 16384 0.00000002 0.00000003\n", - "84 google/gemma-3-4b-it 131072 0.00000002 0.00000004\n", - "140 liquid/lfm-3b 32768 0.00000002 0.00000002\n", - "115 meta-llama/llama-guard-3-8b 131072 0.00000002 0.00000006\n", - "223 sao10k/l3-lunaris-8b 8192 0.00000002 0.00000005\n", - "253 nousresearch/hermes-2-pro-llama-3-8b 131072 0.000000025 0.00000004\n", - "254 mistralai/mistral-7b-instruct-v0.3 32768 0.000000028 0.000000054\n", - "252 mistralai/mistral-7b-instruct 32768 0.000000028 0.000000054\n", - "268 meta-llama/llama-3-8b-instruct 8192 0.00000003 0.00000006\n", - "219 microsoft/phi-3.5-mini-128k-instruct 131072 0.00000003 0.00000009\n", - "237 mistralai/mistral-nemo 98304 0.00000003 0.00000007\n", - "160 amazon/nova-micro-v1 128000 0.000000035 0.00000014\n", - "24 qwen/qwen3-8b 128000 0.000000035 0.000000138\n", - "193 google/gemini-flash-1.5-8b 1000000 0.0000000375 0.00000015\n", - "155 cohere/command-r7b-12-2024 128000 0.0000000375 0.00000015\n", - "187 mistralai/ministral-3b 131072 0.00000004 0.00000004\n", - "117 deepseek/deepseek-r1-distill-llama-8b 32000 0.00000004 0.00000004\n", - "258 deepseek/deepseek-coder 128000 0.00000004 0.00000012\n", - "203 meta-llama/llama-3.2-11b-vision-instruct 131072 0.000000049 0.000000049\n", - "80 mistralai/mistral-small-3.1-24b-instruct 131072 0.00000005 0.00000015\n", - "20 meta-llama/llama-guard-4-12b 163840 0.00000005 0.00000005\n", - "189 qwen/qwen-2.5-7b-instruct 32768 0.00000005 0.0000001\n", - "97 microsoft/phi-4-multimodal-instruct 131072 0.00000005 0.0000001\n", - "88 google/gemma-3-12b-it 131072 0.00000005 0.0000001\n", - "124 qwen/qwen-turbo 1000000 0.00000005 0.0000002\n", - "132 mistralai/mistral-small-24b-instruct-2501 28000 0.00000006 0.00000012\n", - "173 qwen/qwen-2.5-coder-32b-instruct 32768 0.00000006 0.00000015\n", - "159 amazon/nova-lite-v1 300000 0.00000006 0.00000024\n", - "316 gryphe/mythomax-l2-13b 4096 0.000000065 0.000000065\n", - "10 microsoft/phi-4-reasoning-plus 32768 0.00000007 0.00000035\n", - "147 microsoft/phi-4 16384 0.00000007 0.00000014\n", - "26 qwen/qwen3-14b 40960 0.00000007 0.00000024\n", - "259 google/gemini-flash-1.5 1000000 0.000000075 0.0000003\n", - "107 google/gemini-2.0-flash-lite-001 1048576 0.000000075 0.0000003\n", - "63 meta-llama/llama-4-scout 1048576 0.00000008 0.0000003\n", - "294 mistralai/mixtral-8x7b-instruct 32768 0.00000008 0.00000024\n", - "264 allenai/olmo-7b-instruct 2048 0.00000008 0.00000024\n", - "163 qwen/qwq-32b-preview 32768 0.00000009 0.00000027\n", - "158 meta-llama/llama-3.3-70b-instruct 131000 0.00000009 0.00000035\n", - "266 neversleep/llama-3-lumimaid-8b 24576 0.00000009375 0.00000075\n", - "207 neversleep/llama-3.1-lumimaid-8b 32768 0.00000009375 0.00000075\n", - "265 neversleep/llama-3-lumimaid-8b:extended 24576 0.00000009375 0.00000075\n", - "212 mistralai/pixtral-12b 32768 0.0000001 0.0000001\n", - "28 qwen/qwen3-32b 40960 0.0000001 0.0000003\n", - "49 openai/gpt-4.1-nano 1047576 0.0000001 0.0000004\n", - "186 mistralai/ministral-8b 128000 0.0000001 0.0000001\n", - "118 google/gemini-2.0-flash-001 1000000 0.0000001 0.0000004\n", - "234 meta-llama/llama-3.1-70b-instruct 131072 0.0000001 0.00000028\n", - "65 mistral/ministral-8b 131072 0.0000001 0.0000001\n", - "240 google/gemma-2-27b-it 8192 0.0000001 0.0000003\n", - "256 microsoft/phi-3-medium-128k-instruct 131072 0.0000001 0.0000003\n", - "255 microsoft/phi-3-mini-128k-instruct 128000 0.0000001 0.0000001\n", - "94 google/gemma-3-27b-it 131072 0.0000001 0.0000002\n", - "22 qwen/qwen3-30b-a3b 40960 0.0000001 0.0000003\n", - "142 deepseek/deepseek-r1-distill-llama-70b 131072 0.0000001 0.0000004\n", - "307 mistralai/mistral-7b-instruct-v0.1 2824 0.00000011 0.00000019\n", - "205 qwen/qwen-2.5-72b-instruct 32768 0.00000012 0.00000039\n", - "190 nvidia/llama-3.1-nemotron-70b-instruct 131072 0.00000012 0.0000003\n", - "134 deepseek/deepseek-r1-distill-qwen-32b 131072 0.00000012 0.00000018\n", - "220 nousresearch/hermes-3-llama-3.1-70b 131072 0.00000012 0.0000003\n", - "58 nvidia/llama-3.3-nemotron-super-49b-v1 131072 0.00000013 0.0000004\n", - "30 qwen/qwen3-235b-a22b 40960 0.00000014 0.000002\n", - "90 openai/gpt-4o-mini-search-preview 128000 0.00000015 0.0000006\n", - "136 deepseek/deepseek-r1-distill-qwen-14b 64000 0.00000015 0.00000015\n", - "214 cohere/command-r-08-2024 128000 0.00000015 0.0000006\n", - "238 openai/gpt-4o-mini 128000 0.00000015 0.0000006\n", - "103 qwen/qwq-32b 131072 0.00000015 0.0000002\n", - "196 liquid/lfm-40b 32768 0.00000015 0.00000015\n", - "41 google/gemini-2.5-flash-preview:thinking 1048576 0.00000015 0.0000035\n", - "40 google/gemini-2.5-flash-preview 1048576 0.00000015 0.0000006\n", - "239 openai/gpt-4o-mini-2024-07-18 128000 0.00000015 0.0000006\n", - "61 meta-llama/llama-4-maverick 1048576 0.00000017 0.0000006\n", - "67 scb10x/llama3.1-typhoon2-8b-instruct 8192 0.00000018 0.00000018\n", - "130 deepseek/deepseek-r1-distill-qwen-1.5b 131072 0.00000018 0.00000018\n", - "3 arcee-ai/spotlight 131072 0.00000018 0.00000018\n", - "216 qwen/qwen-2.5-vl-7b-instruct 32768 0.0000002 0.0000002\n", - "262 meta-llama/llama-guard-2-8b 8192 0.0000002 0.0000002\n", - "293 mistralai/mistral-7b-instruct-v0.2 32768 0.0000002 0.0000002\n", - "86 ai21/jamba-1.6-mini 256000 0.0000002 0.0000004\n", - "122 aion-labs/aion-rp-llama-3.1-8b 32768 0.0000002 0.0000002\n", - "229 perplexity/llama-3.1-sonar-small-128k-online 127072 0.0000002 0.0000002\n", - "145 minimax/minimax-01 1000192 0.0000002 0.0000011\n", - "291 mistralai/mistral-small 32768 0.0000002 0.0000006\n", - "112 mistralai/mistral-saba 32768 0.0000002 0.0000006\n", - "119 qwen/qwen-vl-plus 7500 0.00000021 0.00000063\n", - "39 thudm/glm-4-32b 32000 0.00000024 0.00000024\n", - "32 thudm/glm-z1-rumination-32b 32000 0.00000024 0.00000024\n", - "37 thudm/glm-z1-32b 32000 0.00000024 0.00000024\n", - "13 inception/mercury-coder-small-beta 32000 0.00000025 0.000001\n", - "292 mistralai/mistral-tiny 32768 0.00000025 0.00000025\n", - "126 qwen/qwen2.5-vl-72b-instruct 32000 0.00000025 0.00000075\n", - "194 thedrummer/rocinante-12b 32768 0.00000025 0.0000005\n", - "279 anthropic/claude-3-haiku:beta 200000 0.00000025 0.00000125\n", - "235 mistralai/codestral-mamba 262144 0.00000025 0.00000025\n", - "280 anthropic/claude-3-haiku 200000 0.00000025 0.00000125\n", - "55 x-ai/grok-3-mini-beta 131072 0.0000003 0.0000005\n", - "269 meta-llama/llama-3-70b-instruct 8192 0.0000003 0.0000004\n", - "146 mistralai/codestral-2501 262144 0.0000003 0.0000009\n", - "76 deepseek/deepseek-chat-v3-0324 163840 0.0000003 0.00000088\n", - "149 deepseek/deepseek-chat 163840 0.00000038 0.00000089\n", - "270 mistralai/mixtral-8x22b-instruct 65536 0.0000004 0.0000012\n", - "0 mistralai/mistral-medium-3 131072 0.0000004 0.000002\n", - "48 openai/gpt-4.1-mini 1047576 0.0000004 0.0000016\n", - "127 qwen/qwen-plus 131072 0.0000004 0.0000012\n", - "8 arcee-ai/arcee-blitz 32768 0.00000045 0.00000075\n", - "176 thedrummer/unslopnemo-12b 32000 0.00000045 0.00000045\n", - "278 cohere/command-r 128000 0.0000005 0.0000015\n", - "19 deepseek/deepseek-prover-v2 131072 0.0000005 0.00000218\n", - "285 cohere/command-r-03-2024 128000 0.0000005 0.0000015\n", - "7 arcee-ai/virtuoso-medium-v2 131072 0.0000005 0.0000008\n", - "6 arcee-ai/coder-large 32768 0.0000005 0.0000008\n", - "271 microsoft/wizardlm-2-8x22b 65536 0.0000005 0.0000005\n", - "96 thedrummer/skyfall-36b-v2 32768 0.0000005 0.0000008\n", - "144 deepseek/deepseek-r1 163840 0.0000005 0.00000218\n", - "305 jondurbin/airoboros-l2-70b 4096 0.0000005 0.0000005\n", - "318 openai/gpt-3.5-turbo 16385 0.0000005 0.0000015\n", - "319 openai/gpt-3.5-turbo-0125 16385 0.0000005 0.0000015\n", - "245 ai21/jamba-instruct 256000 0.0000005 0.0000007\n", - "2 arcee-ai/caller-large 32768 0.00000055 0.00000085\n", - "308 pygmalionai/mythalion-13b 8192 0.0000005625 0.000001125\n", - "315 undi95/remm-slerp-l2-13b 6144 0.0000005625 0.000001125\n", - "206 qwen/qwen-2.5-vl-72b-instruct 32768 0.0000006 0.0000006\n", - "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo 32768 0.0000006 0.0000006\n", - "150 sao10k/l3.3-euryale-70b 131072 0.0000007 0.0000008\n", - "121 aion-labs/aion-1.0-mini 131072 0.0000007 0.0000014\n", - "217 sao10k/l3.1-euryale-70b 131072 0.0000007 0.0000008\n", - "5 arcee-ai/virtuoso-large 131072 0.00000075 0.0000012\n", - "295 neversleep/noromaid-20b 8192 0.00000075 0.0000015\n", - "221 nousresearch/hermes-3-llama-3.1-405b 131072 0.0000008 0.0000008\n", - "233 meta-llama/llama-3.1-405b-instruct 32768 0.0000008 0.0000008\n", - "224 aetherwiing/mn-starcannon-12b 16384 0.0000008 0.0000012\n", - "179 anthropic/claude-3.5-haiku-20241022:beta 200000 0.0000008 0.000004\n", - "95 thedrummer/anubis-pro-105b-v1 131072 0.0000008 0.000001\n", - "180 anthropic/claude-3.5-haiku-20241022 200000 0.0000008 0.000004\n", - "51 alfredpros/codellama-7b-instruct-solidity 4096 0.0000008 0.0000012\n", - "50 eleutherai/llemma_7b 4096 0.0000008 0.0000012\n", - "267 sao10k/fimbulvetr-11b-v2 4096 0.0000008 0.0000012\n", - "276 sophosympatheia/midnight-rose-70b 4096 0.0000008 0.0000008\n", - "123 qwen/qwen-vl-max 7500 0.0000008 0.0000032\n", - "161 amazon/nova-pro-v1 300000 0.0000008 0.0000032\n", - "171 infermatic/mn-inferor-12b 16384 0.0000008 0.0000012\n", - "300 undi95/toppy-m-7b 4096 0.0000008 0.0000012\n", - "177 anthropic/claude-3.5-haiku:beta 200000 0.0000008 0.000004\n", - "178 anthropic/claude-3.5-haiku 200000 0.0000008 0.000004\n", - "228 nothingiisreal/mn-celeste-12b 16384 0.0000008 0.0000012\n", - "68 scb10x/llama3.1-typhoon2-70b-instruct 8192 0.00000088 0.00000088\n", - "74 qwen/qwen2.5-vl-32b-instruct 128000 0.0000009 0.0000009\n", - "249 cognitivecomputations/dolphin-mixtral-8x22b 16000 0.0000009 0.0000009\n", - "4 arcee-ai/maestro-reasoning 131072 0.0000009 0.0000033\n", - "317 meta-llama/llama-2-70b-chat 4096 0.0000009 0.0000009\n", - "250 qwen/qwen-2-72b-instruct 32768 0.0000009 0.0000009\n", - "230 perplexity/llama-3.1-sonar-large-128k-online 127072 0.000001 0.000001\n", - "287 openai/gpt-3.5-turbo-0613 4095 0.000001 0.000002\n", - "277 cohere/command 4096 0.000001 0.000002\n", - "138 perplexity/sonar 127072 0.000001 0.000001\n", - "137 perplexity/sonar-reasoning 127000 0.000001 0.000005\n", - "303 openai/gpt-3.5-turbo-1106 16385 0.000001 0.000002\n", - "42 openai/o4-mini-high 200000 0.0000011 0.0000044\n", - "210 openai/o1-mini 128000 0.0000011 0.0000044\n", - "211 openai/o1-mini-2024-09-12 128000 0.0000011 0.0000044\n", - "44 openai/o4-mini 200000 0.0000011 0.0000044\n", - "129 openai/o3-mini 200000 0.0000011 0.0000044\n", - "116 openai/o3-mini-high 200000 0.0000011 0.0000044\n", - "312 mancer/weaver 8000 0.000001125 0.000001125\n", - "201 meta-llama/llama-3.2-90b-vision-instruct 131072 0.0000012 0.0000012\n", - "272 google/gemini-pro-1.5 2000000 0.00000125 0.000005\n", - "1 google/gemini-2.5-pro-preview 1048576 0.00000125 0.00001\n", - "248 sao10k/l3-euryale-70b 8192 0.00000148 0.00000148\n", - "181 neversleep/llama-3.1-lumimaid-70b 16384 0.0000015 0.00000225\n", - "306 openai/gpt-3.5-turbo-instruct 4095 0.0000015 0.000002\n", - "182 anthracite-org/magnum-v4-72b 16384 0.0000015 0.00000225\n", - "128 qwen/qwen-max 32768 0.0000016 0.0000064\n", - "169 mistralai/pixtral-large-2411 131072 0.000002 0.000006\n", - "286 mistralai/mistral-large 128000 0.000002 0.000006\n", - "85 ai21/jamba-1.6-large 256000 0.000002 0.000008\n", - "154 x-ai/grok-2-1212 131072 0.000002 0.00001\n", - "47 openai/gpt-4.1 1047576 0.000002 0.000008\n", - "100 perplexity/sonar-deep-research 128000 0.000002 0.000008\n", - "227 meta-llama/llama-3.1-405b 32768 0.000002 0.000002\n", - "153 x-ai/grok-2-vision-1212 32768 0.000002 0.00001\n", - "168 mistralai/mistral-large-2407 131072 0.000002 0.000006\n", - "98 perplexity/sonar-reasoning-pro 128000 0.000002 0.000008\n", - "111 perplexity/r1-1776 128000 0.000002 0.000008\n", - "167 mistralai/mistral-large-2411 131072 0.000002 0.000006\n", - "166 openai/gpt-4o-2024-11-20 128000 0.0000025 0.00001\n", - "225 openai/gpt-4o-2024-08-06 128000 0.0000025 0.00001\n", - "260 openai/gpt-4o 128000 0.0000025 0.00001\n", - "192 inflection/inflection-3-pi 8000 0.0000025 0.00001\n", - "91 openai/gpt-4o-search-preview 128000 0.0000025 0.00001\n", - "213 cohere/command-r-plus-08-2024 128000 0.0000025 0.00001\n", - "191 inflection/inflection-3-productivity 8000 0.0000025 0.00001\n", - "89 cohere/command-a 256000 0.0000025 0.00001\n", - "64 all-hands/openhands-lm-32b-v0.1 16384 0.0000026 0.0000034\n", - "175 eva-unit-01/eva-qwen-2.5-32b 16384 0.0000026 0.0000034\n", - "290 mistralai/mistral-medium 32768 0.00000275 0.0000081\n", - "195 anthracite-org/magnum-v2-72b 32768 0.000003 0.000003\n", - "284 anthropic/claude-3-sonnet 200000 0.000003 0.000015\n", - "283 anthropic/claude-3-sonnet:beta 200000 0.000003 0.000015\n", - "309 openai/gpt-3.5-turbo-16k 16385 0.000003 0.000004\n", - "184 anthropic/claude-3.5-sonnet 200000 0.000003 0.000015\n", - "183 anthropic/claude-3.5-sonnet:beta 200000 0.000003 0.000015\n", - "275 cohere/command-r-plus-04-2024 128000 0.000003 0.000015\n", - "274 cohere/command-r-plus 128000 0.000003 0.000015\n", - "109 anthropic/claude-3.7-sonnet:thinking 200000 0.000003 0.000015\n", - "110 anthropic/claude-3.7-sonnet:beta 200000 0.000003 0.000015\n", - "99 perplexity/sonar-pro 200000 0.000003 0.000015\n", - "244 01-ai/yi-large 32768 0.000003 0.000003\n", - "246 anthropic/claude-3.5-sonnet-20240620:beta 200000 0.000003 0.000015\n", - "247 anthropic/claude-3.5-sonnet-20240620 200000 0.000003 0.000015\n", - "56 x-ai/grok-3-beta 131072 0.000003 0.000015\n", - "108 anthropic/claude-3.7-sonnet 200000 0.000003 0.000015\n", - "152 eva-unit-01/eva-llama-3.33-70b 16384 0.000004 0.000006\n", - "257 neversleep/llama-3-lumimaid-70b 8192 0.000004 0.000006\n", - "241 alpindale/magnum-72b 16384 0.000004 0.000006\n", - "165 eva-unit-01/eva-qwen-2.5-72b 16384 0.000004 0.000006\n", - "120 aion-labs/aion-1.0 131072 0.000004 0.000008\n", - "174 raifle/sorcererlm-8x22b 16000 0.0000045 0.0000045\n", - "263 openai/gpt-4o-2024-05-13 128000 0.000005 0.000015\n", - "222 openai/chatgpt-4o-latest 128000 0.000005 0.000015\n", - "170 x-ai/grok-vision-beta 8192 0.000005 0.000015\n", - "185 x-ai/grok-beta 131072 0.000005 0.000015\n", - "261 openai/gpt-4o:extended 128000 0.000006 0.000018\n", - "301 alpindale/goliath-120b 6144 0.0000065625 0.000009375\n", - "313 anthropic/claude-2.0:beta 100000 0.000008 0.000024\n", - "297 anthropic/claude-2.1 200000 0.000008 0.000024\n", - "299 anthropic/claude-2 200000 0.000008 0.000024\n", - "298 anthropic/claude-2:beta 200000 0.000008 0.000024\n", - "314 anthropic/claude-2.0 100000 0.000008 0.000024\n", - "296 anthropic/claude-2.1:beta 200000 0.000008 0.000024\n", - "304 openai/gpt-4-1106-preview 128000 0.00001 0.00003\n", - "43 openai/o3 200000 0.00001 0.00004\n", - "273 openai/gpt-4-turbo 128000 0.00001 0.00003\n", - "288 openai/gpt-4-turbo-preview 128000 0.00001 0.00003\n", - "151 openai/o1 200000 0.000015 0.00006\n", - "282 anthropic/claude-3-opus 200000 0.000015 0.000075\n", - "281 anthropic/claude-3-opus:beta 200000 0.000015 0.000075\n", - "208 openai/o1-preview 128000 0.000015 0.00006\n", - "209 openai/o1-preview-2024-09-12 128000 0.000015 0.00006\n", - "321 openai/gpt-4-0314 8191 0.00003 0.00006\n", - "320 openai/gpt-4 8191 0.00003 0.00006\n", - "311 openai/gpt-4-32k-0314 32767 0.00006 0.00012\n", - "310 openai/gpt-4-32k 32767 0.00006 0.00012\n", - "106 openai/gpt-4.5-preview 128000 0.000075 0.00015\n", - "78 openai/o1-pro 200000 0.00015 0.0006" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.sort_values(\"pricing_prompt\")[col_names]" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAABqwAAAHJCAYAAADwyhjGAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXl0VFXWt58aMlZSGSAEQoAMShImIQgKwYi2imCLLWqD2gqKiC1qK+DYyiAo6CvtAMqkyCCCoiKDjAoSgwwKUQhhzCBDIASSVCWVqZKq7498uVBGEAJHbjjnWetddm6d+9Rv34vd683mnG1wu91uFAqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLhPFSB1AoFAqFQqFQKBQKhUKhUCgUCoVCoVDIjWpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLimpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLivlSB1BcXrjdblwu96WOoUuMRsNFfTYX2yfCKWNGGWsW4dS7T4RTZZTDJ8KpMsrhE+GUMaOMNYtwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjPn2XA0ajAYPBcE5rVcNKcVFxudwUFDgudQzdYTYbCQmxYLeXUlXl0p1PhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4dS7T4RTZdRnRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzXg5EBpqwWQ6t4aVOhJQoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUV3DavMzEweeughOnbsSFJSEm+++SaVlZV/ep/b7WbGjBn07NmTDh060L9/f3755Zc66/Ly8njyySfp1KkTXbt25b///S8lJSV11q1bt46+ffvSvn17evXqxZdffllnTWVlJW+88QZJSUl07NiRhx56iKysrHrXtGjRInr16kX79u3p27cv69evr7OmuLiYl156ia5du9KpUyeeeuopjh8/Xmedy+Vi9uzZ3HrrrbRr146kpCRGjBjhseaBBx4gLi6uzv9lZmbW8SkUCoVCoVAoFAqFQqFQKBQKhUKhUIjCfKkDnI7NZmPgwIFERUUxefJk8vLymDhxIuXl5YwaNeqs986cOZP33nuPkSNHEhcXx/z583n44YdZsmQJLVq0AMDpdPLII48AMGnSJMrLy3njjTcYMWIE06dP11w///wzTzzxBHfffTcvvfQSmzdv5r///S8Wi4Vbb71VWzd+/HhWrFjBCy+8QHh4ONOmTWPQoEF88803BAYGnldN33zzDa+88gqPPfYY1157LStWrOCJJ55g/vz5dOzYUVv39NNPc+DAAcaMGYOPjw/vvPMOQ4YM4csvv8RsPvU6R40axfr163n88ce58soryc/PZ9u2bXWeW2JiIs8//7zHtcjIyD97VQqFQqFQKBQKhUKhUCgUCoVCoVA0WI6edJB5rASLt5HGVt9LHUeBzhpWCxcuxOFwMGXKFIKDgwGorq5m7NixDB06lPDw8D+8r6KigunTp/Pwww8zaNAgADp37sytt97KRx99xJgxYwBYvXo1+/fvZ8WKFcTExABgtVoZPHgwO3bsoEOHDgBMnTqVDh068OqrrwJw7bXXcujQId577z2tYXXs2DG++OILRo8ezd133w1A+/btueGGG1i4cCFDhgw5r5ree+89brvtNp5++mntO/ft28f777/PzJkzAUhLSyM1NZWPPvqIHj16ABAdHU2fPn1Ys2YNffr0AWDTpk0sXryYr776iri4OO053XbbbXWendVq9WiIKRQKhUKhUCgUCoVCoVAoFAqFQnG5UlLmZMbSXaRnF2jX2kWHMvSOtlh8vS5hMoWujgRMSUmhW7duWmMHoHfv3rhcLjZu3HjG+7Zv305JSQm9e/fWrnl7e3PzzTeTkpLi4Y+Li9OaVQBJSUkEBwezYcMGoOaYvy1btnjspALo06cPmZmZHD58GIDU1FRcLpfHuuDgYJKSkup855/VdOjQIXJycjzy137npk2btOMDU1JSsFqtJCUlaWtiYmJISEjw+M7PP/+crl27ejSrFAqFQqFQKBQKhUKhUCgUCoVCoZCdGUt3kZFT4HEtI6eA6Ut2XaJEilp0tcMqKyuLu+66y+Oa1WolLCzsD2dDnX4f4NGIAoiNjWXOnDmUl5fj6+tLVlZWnTUGg4Ho6GjNcfDgQZxO5x+6AJ544glycnIA8PX1xc/Pr866L774ok5NM2bM4NNPP6WgoICEhASCg4O176z9p9Vq5cknnyQ1NRUvLy86dOiA0+nk0KFDxMbGkpWVRXR0NOvXr+edd94hOzubiIgIgoKCPJ7Pr7/+SqNGjejZsydHjx4FoGPHjkycOJHo6Ght3cmTJ9m2bZvW2PLz8+P+++9n5MiRGAyGMz7vP8Ns1lUfVBeYTEaPf+rNJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zChjzSKcF8t39KTDY2dVLS43pGcXcMJeTtNQ/0uaUWZ01bCy2+1YrdY614OCgrDZbGe9z9vbGx8fH4/rVqsVt9uNzWbD19cXu92uzZY6k7/2n7/PUdvAqaioYPLkyUybNo3t27czceJEj1lUVqvVI6vdbmf37t1s2bLFY77Wr7/+ypEjRzy+c/To0ZjNZm2+1vjx4z0+t9vtuN3uOvO1pk6dSlhYmPad+fn55Obm4u/vT2JiItu3b+fEiRMMHjyYlStXas+ppKSEtm3bkpycTHV1NV988QUffvghRUVFvPbaa2d83mfDaDQQEmKp170yYLX6/fmiS+gT4ZQxo4w1i3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoxw+EU4ZMmYeKznr545K1wX/flvEc5QFXTWs9MzixYsBGDRoENdddx2rV68mJyeHhQsXnnW+FsDWrVvrzNfq1KkTe/fu9ViXlZXFypUrtd1dBQUFjBs3jszMTBITE4GaHWC/n6+1YsUKcnNzNY/b7cbb25s1a9aQmZnJgw8+yLPPPsszzzzDsmXLtJlbX3/9NaGhodp9jz76KElJSSxevJhx48ZhNJ5/J9jlcmO3l573fZc7JpMRq9UPu72M6mqX7nwinDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYMsCu7gEMnHLQMs9AmKvTPb/gTLlZGf6+znyxm8TZSWOiol1vEc7wcsFr9znnXma4aVlarleLi4jrXbTYbQUFBZ72vsrKSiooKj11Wdrsdg8Gg3Wu1WikpqdtBtdlsNGvWDEBb+/scP/zwA4C2rnb3Vu0sqn79+mnfeXpWPz8/iouL68zX8vb2Ji8vz+M7o6OjPY4ijIqKAmD37t0ABAQEYLPZ6szXatKkCb/99huHDx8mMjISq9VK06ZNady4MZmZmQA0b96cpk2bcuDAAe2+05tVAP7+/rRp04aff/6Z0tJSAgIC6jyrc6GqSv3LeCaqq10X9flcbJ8Ip4wZZaxZhFPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcF4MX15hKa/N/ZmSsirtWoCfmVcGXk1YcP2O2judC80YFuRHu+hQMnIKcLlPXTcaoE1UKI2tvhf8DES8a1nQ1WGKMTExdWZVFRcXk5+fX2em1O/vA8jOzva4npWVRUREBL6+vmf0u91usrOzNUfLli3x8vKqs652blXtupiYGAoKCmjUqJHH2t/PyQoJCfG4r7amsrIySkpKKC8v1z77fQMpOzsbg8HAyZMntc/dbrfHHCqAwsJC7bsBrrjiijM8qZojDc9Gfn4+RqOx3s0qhUKhUCgUCoVCoVAoFAqFQqFQyMnvm1UAJWVVjJvz8yVKVJehd7Sts+urTVQoQ+9oe4kSKWrR1Q6r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSko6432JiYkEBATw+uuvc/DgQQoKCoiPjyc3N5ebbrrJw7906VIefvhh0tLS8PLyokOHDhQVFXH99dcDNbufrrnmGj777DO+/PJLsrOziYiIoKSkhNDQUCIjIwHo0aMHRqMRh8PB3Llz+eSTT2jfvj07d+5k2LBh2ndGRkZy8OBBHnnkEY+aDAaDNl+rRYsWmEwmdu/eTXJyMnl5eTz33HN8++23hISEaLu9rrzySgDGjRvHf/7zH8rKyvjss8+0XVS1s66Cg4P56aef6NatG6WlNcfzrVy5kmPHjtG27al/6W688UZtjtbpnK05eC6Yzbrqg+oCvQ4ZFOmUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSp5h2ZJ+o0q2opKati98FC2sc0qpf7YtYcFODDc/cnkl9Uhr28GquvibDgC587JeJdy4bB7Xa7/3zZX8OhQ4e45ZZbsFgsPPLII/z222989dVXtG7dmmXLlmnrBg4cSG5uLmvXrtWuDR48mNTUVG666SYSExOZPXs2x48fZ+7cuVxzzTUAlJaWav/5kUceoaSkhE8++YTg4GA2bdqkuebPn8+rr75KbGws99xzD2vXrmXbtm0kJiayYMECbV2fPn3IzMwkKiqKO++8k5kzZ1JaWsq6deu0owMnTpzIxx9/TGBgoEdNzZo14+jRo6SkpBAeHk779u2prKykbdu27Nq1i44dO5Kenk779u3x9/dn1qxZ/PDDDzzyyCN4eXkRGxvLnj17iI2NxWg0sn//ft566y1uv/12kpOTKS4uplGjRlx99dXa/K3GjRvz7bff4ufnx88//8zgwYO54ooruPPOOzl06BDz5s3D5XKxYMECOnXqVK936Ha7MRjOfg6oQqFQKBQKhUKhUCgUCoVCoVAoLi8WrNnDp6v3nvHz+3rFce8t8X9hIkVDQ1c7rFasWIG3tzfx8fFMnToVi8VCUlISmzZtIi8vj/DwcABcLhfV1dXafRUVFaSlpdG1a1fS09NJSUkhPj4el8vFypUrtSbVunXrqKysJCkpidmzZ2M2m+nevTupqans2LGDDh06aOtiYmIwmUxMmjSJiIgITCaTx9F/x44dIzs7Gy8vL/Ly8pg6dSodOnRgx44dLF++nCFDhgDw22+/AdC6dWuPmjZu3OgxXysuLg6omYEFcOTIEaZMmcL06dO1NbVNsGuuuYaff67ZQtmiRQseffRR7rvvPm3d119/jcPh4LXXXmPFihVAzZyswMBA/PxqOsVhYWG43W4OHDjAhAkTcLlc+Pn58f7779e7WVXzbtzY7aX1vv9yxWTS/yBElVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM2NDqHlXdgGHTjhoGWapc1Te+RARevZdSs0b+VNY6KiX+3hRGcUXcTcUNIx3fTlgtfqd864zXTWsUlJSSEpK4oMPPtCu2e12unbtysaNG+nXrx8A8+bN87hv+/btOBwOXnrpJRISErTrEyZM8NiFVdvImjVrlnbN7XZz7bXXsmHDBjp06EBlZSVbtmxh5MiRDBo0SFt32223ceDAAQ4fPkxkZCSpqam4XC7cbjevvvqqlu2JJ54gJSVFa1gdPnwYgFGjRhEfH6/V1KVLF4KCgjzma+3bt481a9YQFxfHQw89RM+ePXnhhRe04xBr52slJydz22238eKLLzJhwgR++eUXzQE1s65CQ0OZNm0aW7Zs4cEHH6Rnz55s2LBBq6dVq1Y0btyYHj16sH//fo4ePcpnn32mNQUvBDVQ7szocRCiaKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU8aMMtYswiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwXQxnXmFpnZlTAX5mXhl4NWHB/ufta9MqlAA/8x8eCxjgZyahZch55y0pczJj6S7Sswu0a+2ia+ZNWXy9zjvjH9EQ3rUs6KphlZWVxV133eVxzWq1EhYW5rG7CSAzM5Px48eTlpaG0VjTnaudL1VLbGwsc+bMoby8HF9fX7KysoiJiWHGjBl8+umnFBQUkJCQQJMmTTT/wYMHcTqdhIaG8uSTT5KamoqXlxdNmjQBID09ncjISLKysggICKC0tJQPP/yQ0aNHExERQVRUlEfW/Px8vLy8GDVqFEeOHMHhcHDVVVdhNBq1HVNwar7WgAEDAJgyZQo7duz4w/la8+fPp6SkBIB//etfNGrUiNjYWK3+yspK3nnnHX799Vd27Nih5f79bCq3282iRYtwuWr+5bnlllt48MEHGTFixHm9N4VCoVAoFAqFQqFQKBQKhUKhUDQsft+sgppZU+Pm/Mx7/0mul/OVgVczbs4fN8Hqw4ylu8jIKfC4lpFTwPQluxjev2O9nAr9oquGld1ux2q11rkeFBSEzWbTfrbZbAwcOJCoqCgmT57MwoUL+fbbb3n77bcZNWqUts5qteJ2u7HZbPj6+mK32zl8+DBr165l5MiRxMXFMX/+fNatW0dgYKDmBnjvvffw8/Nj0qRJlJeX89prrwHw7rvvEhAQwK+//orDUbN98eqrr2bUqFFs3ryZqVOnemQvLi6mcePG/Prrr9x2223aDi+Xy0WLFi20dd26dcNoNLJnzx4ArrzySlavXk1ERIR2VCFAx44dSU1NJSIiAgCz2czWrVt5+umntTXl5eUsWLCAFi1a0LRpUw4ePEhmZiaDBw9m586dtG/fHqg5WtHlchEXF0fr1q1Zt24dM2bMoKCggNGjR+Pt7X2eb5D/n0kNlfs9eh2EKNIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zKjXmndknvjDnVBQ07TafbCQ9jGNztvbrHEAH4zoSUZOAQfzL+yYwaMnHR47q2pxuSE9u4AT9nKahp7/TrBaGsK7lg1dNazOlYULF+JwOJgyZQrBwcGkp6fz/fffs3DhQoYOHXrGY+3cbjcZGRkMHjxYO+6vc+fOXH311Rw5csRj7aFDh1i5cqW2K8lgMPD000/jdDoZNmwYbrcbk8lE27ZtefXVVwG49tprmTNnDqWlpR7feezYMW666Sa2bdvG2rVrufLKKyksLOTYsWPaui+++AIvLy+uueYavv/+e/bs2UOnTp345ZdfPOZ3LV++nKuvvlo7atDhcNCyZUu2b9+uuaxWK23btuWnn37yqOmjjz6ioKCAiRMnAnDixAkA9u7dy969p4bhffHFFwwZMoSoqKhzeyGnYTQaCAmxnPd9smC1XpzzVUX5RDhlzChjzSKceveJcKqMcvhEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinynj5+47kl7Bvdx4RjS1EhAXUy5FbcOjs33GylOTOLevlBkgKsZBU77tryDxWctbPHZWui/K7aD2/a9nQVcPKarVSXFxc57rNZiMoKEj7OSUlhW7duhEcHKzdV1VV0w0+fdaV3W7HYDBo95pMJqqqqujdu7fm8vb29tjBVbs2MjLS4wi92vlTHTt25K233mLixIl8/PHH9OnTxyPrddddx+rVq7VZV76+vjgcDl5//XWPGjp06EB+fr5HTbXzu+Li4vjPf/7DPffc4zG/69ChQ+Tk5PDss89it9t58cUXWbRoEcuXL+fNN9+ksrJS2xX1ySefcOTIEf7xj39gt9v54YcftGMNAcrKyqiurmbs2LHaMYQAd911F+np6eTn59erYeVyubHbS/98oWSYTPof4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mvJi+kjInUxfvZGfWqV1H7WNCefzO9lj8zm+eU0To2ZsqzRv5U1joqFdOuDh1+3sZzvq5xdt4yTOK9F0uWK1+57zrTFcNq5iYmDqzqoqLi8nPz/doHv1+1lXtZyEhIR73Z2VlERERga+vL4B27N/pLrfbTUlJCWVlZZSXl9OyZUsMBoO2tpbs7GwtD9Q0yYA6u7lqZ0tlZWURGRlJQEAATqfTo1lVXFxMRUWFR3PuXOZ31f4zOjqaX3/9VVsXGxuL0+nk0KFDxMbGAlBQUMDgwYOxWCzY7XbMZs9XXVlZidvtrnPsX+08sMzMTLp06UJ9UAPlzkxDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzXgzfB1/trDPPaVd2Ae9/tfO85zm1aRVKgJ/5D48FDPAzk9Ay5KLUfyF1hwX50S46lIycAlzuU9eNBmgTFUpjq+8lz/hX+GRCVw2r5ORkpk2b5jHLatWqVRiNRpKSTm0g/P2sq8TERAICarY+1u6UcjqdrFmzhuTkZG1dREQEO3bs4OjRo9ruoU2bNmlH+NlsNsLDw/Hx8fHY/QSwYsUK/P39cTqdAFxxxRUAZGRkaDu2bDYbaWlpHjnCwsI4fvx4nZoMBgPl5eVnrKmW03d/1f7z9+tqf6793OFwMGTIEJxOJ4MGDWLChAl/6PXz82Py5Mm8+eabFBYW8uqrr2oztE6fGXa+qBlWdWkI56GqjPrziXDKmFHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVGfGS+WT8Q8pzEPd2XMrK0eTasAPzNjHu56wb/jvVh1D+vXng9+t6usbXTNrjK9ZBTlkxFdNawGDBjAvHnzGDZsGEOHDiUvL48333yTAQMGeOxkqq6u5qOPPuLRRx8FwMfHh6FDh/K///2Pffv2sWnTJhYsWEBRURGDBw/W7rvyyitZvXo1Tz75JMOHD6esrIw333yTNm3akJGRoa0LCgri+PHjjBkzht69e7NlyxaWL19OQkKCtqZRo5qBc/PnzycqKorw8HCmT5+OxWLxmGEVGxvL7t2769TUsWNH0tPTtXVut5t9+/axatUqAO0/Oxx1tzSuX79eayytX7+ekydPenz+5JNPsmfPHl577TVycnIASE9Px2q10rFjR6BmFpbb7SY3N5fWrVtTWFjIW2+9pTXkDIazb7c8E2qG1dlpCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjBfqEzHPKSTEwoLxt5G29zh7fisgvlUoneKa/PmN58GF1h0SAq8Pu47c/BJyTzguaG7XmdDbu5YZXTWsgoKCmDNnDuPGjWPYsGFYLBbuvvtunnnmGY91tbOoTmfIkCFMmzaN/fv38+ijj5KQkMBHH31EixYttDUhISG43W5atGjB8OHDMZvN3HzzzcTHx7N7927t2L4mTZrQokULtm3bxhdffEFERATjx4/niy++0NbU/jMpKYlJkybhcDhITEzk9ddfZ8iQIdrnjRs3xmq1YjKZPGoymUwcOnRqsJ2XlxfLli1j2bJlAHz99dd8/fXXGI3GOt/5yiuvaPe99NJLHs8PauZ4ATz//PMezwdg7969QM2Mrnbt2rFjxw727dsH1Bxn+O9//5sPPviAsLCwc3pnv0fNsPpjTCb9n4eqMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZjxeVEZxeTVWXxNhwfVvZIic5xTbLJBOcU2w28suaCbU6Vzs5xjgY+LqhHBdZxTx5/tywGptoDOsoGZH0uzZs8+6pmPHjgQHB3tcKykpobS0lNdff51+/fr94X21s6ueeuopPvjgA+36xIkTPWZdxcTEsG/fPq15BDU7oP7v//5PO5qwZcuWeHl5cfXVVzN58mRt3bp16zy+KyYmhqKiIt59912POVZPPvmkxyytdu3aERwczPvvv69dKy4upkuXLh4ugPfff5+bbrpJWzdv3jzeeOMNrTlX25QC+Oqrr3jxxRfZtGkToaGhHs9w/vz5QM3uq7vuuouJEycSHR3NBx98wFVXXfWHz/BcUOdznpmGcB6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2WUwyfCqTLK4RPhVBnl8IlwyphRxppFOGXIWFLmZMbSXR7H+LWLDmXoHW2x+Hqdt++vmOckw3v5K5wiMspCgzxMMTk5mR9//BG73a5d+6NZV7+ndtbVypUrtWt/NOsqOTmZPXv2aMfpQc2sq6KiIq6//noAvL29ueaaa1i9erXHd6xYsYLY2FgiIyMB6NGjB0ajkTVr1mhrbDYbqampdb7zz2pq0aIFUVFR2rGBp39nt27d8Pb2PvNDOwu1zT8fHx/mz5/P1Vdf7dFMUygUCoVCoVAoFAqFQqFQKBQKxbkzY+kuMnIKPK5l5BQwfcmuejuH3tGWNlGhHtfaRNU0wRSKywHd7bDKzMxk/PjxpKWlYbFYuOOOO3j66ac9mjFnmnXVvn17+vfvT0FBAQkJCVRVVWG321m7di1Q05C57777mDFjBh999BHe3t4EBwfXmXXVq1cvJk2axO23347L5SIkJITKykp69uxJhw4dtHVDhgxh0KBBXHXVVbhcLpo0acKRI0d4++23tTVNmzbllltuYfTo0YwdOxY/Pz/8/f0JCAhgwIAB2rp+/foxbdo0unfvDtQcS1hYWOgxv6u4uJjAwECWLVvGihUr8PPzIyAggPz8fG23FMALL7zA4sWLPZ5rt27dABg0aBAvvvii9p83bdqkrfnPf/4DwJgxY+rx5k5xocPuLkcawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqJ+PRkw6PnVW1uNyQnl3ACXs5TUP9z9sbFODDc/cnkl9Uhv0iHDNYiyzvRbRTREbZ0FXDymazMXDgQKKiopg8eTJ5eXlMnDiR8vJyRo0apa37o1lXrVu35pdffuHZZ58lLi6O+fPn891333nMYnI6naxfv57Q0FCqq6spLi7m+PHjtGvXzmPW1a+//sqxY8eIiIjgxIkT2Gw2Kisr6d27t0feFStW4Ovri9Vq5eTJk+Tn5xMYGEiPHj08avrpp59o3LgxpaWllJWV4XA46N27N4GBgdq6999/H5fLRfPmzcnNzSU/P5+qqir69u2rramsrKRVq1bExsayadMmTp48SXV1NWaz2eOIxMcff5zU1FTy8/PrPOPS0lKqq6sxmUwYjUa8vLwwGAxUVlYSGxvL448/ru0iqw9Go+G8h/vJREMY4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwiXCqjHL4RDhVRjl8IpwyZpSxZhHOyz1j5rGSs37uqHRd0O9QRf3+9XJ/L3+VU0RGWdBVw2rhwoU4HA6mTJmiNWCqq6sZO3YsQ4cO1XYageesq4qKCrp3787gwYMZNGgQAJ07d+bWW2/1OHZv9erVHDhwgBUrVmhH3qWmpjJ48GB27Nih7Z6aOnUqV111FQsXLtTuHTFiBDNmzOAf//gHAMeOHeOLL75g9OjR9O/fH4CioiJuuOEGFi5cyJAhQ7SaSktLWb9+vVbTZ599xtixY3nuuecIDw8nLy+Pzz//nBdffJEHHngAqJmZ1bdvX6ZPn87UqVMBaNSoEZMmTfJ4Zg6HQzua8LHHHgNq5mulpqZqa7Zs2cKDDz4IwOeff05OTg7z5s2jSZMmREVFMW3aNP72t7/x1FNPceutt57nW/PE5XJjt5dekONyxGTS/wA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTJeHOfxojKKL3D3kr+X4ayfW7yNFBY66uUGOd+LrBkvB6xWv3PedaarhlVKSgrdunXz2C3Uu3dvRo8ezcaNG+nXr98f3rd9+3ZKSko8dkB5e3tz8803a8cB1vrj4uI85jMlJSURHBzMhg0b6NChA5WVlWzZsoWRI0d6fEefPn1Yvnw5hw8fJjIyktTUVFwul0eDJzg4mKSkJFJSUrSG1bnUtGfPHqqrqz3mbxkMBnr06MEnn3xCZWXlGedT+fv74+Pjg9PpPONzbdKkCQaDgQEDBnDXXXdhsYjdAaUGyp2ZhjDAT2XUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqkyyuET4VQZ5fCJcKqMcvhEOGXMKGPNIpx6zFhS5mTG0l0eR/m1i66ZD2Xx9TovV1iQH+2iQ8nIKcDlPnXdaKiZOdXY6ntR6pfhvYj2iXCKyCgLujpMMSsry6OZBGC1WgkLCyMrK+us9wF17o2NjSU3N5fy8vIz+g0GA82aNWPBggV07NiR6667DqfTScuWLeu4Tv+urKwsGjVqxGeffabNturfvz8BAQEeWbOysggPD+fJJ5+kU6dOdO3alTfeeIPGjRtr6yorKwHYtm0bffv2pX379vTq1YusrCwqKys5fPiw5qusrGTixIl0796dq666ihtuuAFA2/kFsHLlSv7973+TnJxMx44deeCBB3C73QwePJj27dt7PIPffvuN22+/HaiZYdWzZ88zPmeFQqFQKBQKhUKhUCgUCoVCobgcmbF0Fxk5BR7XMnIKmL5kV718Q+9oS5uoUI9rbaJqGmAKheKP0dUOK7vdjtVqrXM9KCgIm8121vu8vb3x8fHxuG61WnG73dhsNnx9fbHb7R5zo6BmxlRmZia+vr5MnjyZrVu3MmPGDL7++mtuvPFGD1ft+trvrK6u5r333mPkyJHa3KxvvvkGl+tU99Rut7NmzRpCQ0OZNGkS5eXlvPHGGzgcDs3VqlUrAF555RX++c9/8tJLL7F582btKMDTax8/fjyLFy/Wmly1c7NO38E1e/ZsmjdvzgsvvEBISAgjRowAYMmSJTzxxBPauuDgYPr27YuPjw/z58/HaDRy9OhRpkyZ4rHufDGbddUH1QUNYYCfyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnr7zx60uGxs6oWlxvSsws4YS+naaj/eTmDAnx47v5E8ovKsF/gEYO/R5b3ItInwikio2zoqmF1KVi4cCFVVVXEx8dz3XXX4e/vz4wZM1i7di15eXkec7NOp7q6mqKiIh599FGPuVlJSUmUlJwaqudyuThx4gTz5s3TdjZZrVYGDx7MyZMnAWjdujVWq5Xy8nLuvPNOoqKi2LVrFwaDAbfbjcFQc+Zp7dysZ555hmuvvZb8/Hw+/fRTfvjhB959911efvlloGYGV2hoKACZmZmcPHmSTp068fHHH/P4449jNNb8C1NUVMTixYs9sgLMnDmz3g0ro9EgbOjf5UBDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRzovpe3vBNtIzT9Lhisb8Z0BivRyZx0rO+rmj0lXv33mK/F2pnt+LKKesGWVBVw0rq9VKcXFxnes2m42goKCz3ldZWUlFRYXHLiu73Y7BYNDutVqtHs0kqJkxFRgYSOPGjQG0tS6Xy2Nult1u9/i8rKwMt9tdZ25WVFQU6enp2jWz2UxAQECduVkGg4HCwkKg5pi/0tJSGjduzIABAwBo3rw5t912G8uXL6e6uhpAm5v1z3/+U8vRs2dPunbtytKlS7WGVW2zCmDZsmWYzWb+9re/kZaWRmlpKQEBAQBMnDiRxx9/nL59+7Jw4UJmz57NDz/8wIkTJygvL8fX1/eMz/xMuFxu7PbS877vcsdk0v8AP5VRfz4RThkzylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYswiljRhlrFuGUMaOMNYtw6t0nwilbxtQdR5ixdLf287c/HeLbnw7x2B1t6N4+4rxc/l6Gs35u8TZSWOioV07Z3osop6wZLwesVr9z3nWmq4ZVTExMnVlVxcXF5Ofn15k99fv7ALKzs4mPj9euZ2VlERERoTVeYmJi2Ldvn8e9mZmZlJeXa46WLVvi5eWFj49PnVlUp3+X2Vzz6GobXbVUVVXhcrm0ho/ZbMbLy3MoX0lJCW63m6qqKgAOHjxIVVUV48aNIyYmhvLycqKjo/nf//4HgMPh0DI0atTIo3lnNBpp3rw5+/fv/8Nn880339CtWzf27NlDeHi41qyq5bXXXuOOO+7weG4Xihood2YawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPhlDGjjDWLcF4M3+nNqtOZtiSDrglNz8sVFuRHu+hQMnIKcLlPXTcaauZONbb6XnBeWd6LaKesGWVBVw2r5ORkpk2b5jHLatWqVRiNRpKSks54X2JiIgEBAaxcuVJrvDidTlasWAFAx44dsVgstGvXjj179pCTk0NUVBRQs3vL5XJx/fXXAzW7pLp27cqWLVv45JNPmDt3LgkJCQQEBBAbG0tkZCQATZo0AeDRRx8lJycHLy8vrr/+eg4cOKB5fX198fb25sSJE9x2220cPHiQiIgIOnXqpH1X7VqomTG1efNmHA4HHTp04NChQx6f2+12fH19eeihh0hLS8NisXD77bdz4sQJj7lZZWVlfPDBByxevJj8/HxsNht2u53nnntOW1NZWcnw4cP54Ycf8PLyYuHChfTu3Zvi4mKuvPLKeu2uqkXNsKpLQzgPVWXUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRThlzChjzSKceveJcMqUcfqSnWf9/OMVGQzp2+68nMP6teeDxTvZmVWgXWsbHcrjd7a/oN93yvReRDplzSgbumpYDRgwgHnz5jFs2DCGDh1KXl4eb775JgMGDPCYJTVw4EByc3NZu3YtAD4+PgwdOpTJkycTGhpK69atmTt3LsePH6ddu3a89tpr5OXlMWHCBKxWK08++STDhw+nrKwMl8tFTEwMHTp00PyRkZFs3LiRoKAgHnnkERYtWsQvv/zCK6+8oq3x968Zsrdnzx7uv/9+/P39mTVrFm73aS14wMvLi+rqamw2G0899RTbtm1j8eLFhISEeBxfCLBmzRoeeOABXC4XCxcupLKy0uPzffv2ceTIEby9vXniiSc4ePAgc+fOxeVyaXOpAF599VXWrFlDmzZtKCoqwul04na7KSoq0tbs3buXtWvX0qhRI5o1a0Z6ejpbt26loqKC//znP/V8g2qG1Z/REM5DVRn15xPhlDGjjDWLcMqYUcaaRThlzChjzSKceveJcKqMcvhEOFVGOXwinCqjHD4RThkzylizCOeF+vYftp/1872HbOf9O8qQEHh92HXk5peQe8JBRGMLEWEBf37jOSLDe/krnLJmlAVdNayCgoKYM2cO48aNY9iwYVgsFu6++26eeeYZj3Uul4vauU61DBkyBLfbzaxZsygoKCA0NBRvb28+/PBDgoODAaiurmbMmDFcddVVDB8+HLPZjLe3N8nJyZqnoqKCb775Bm9vb9xuN//73/9o1qwZwcHB2u4pgCNHjgDQt29fVqxYgcPh4MorryQjI8NjblZZWRmBgYFcccUVTJkyBYvFQkxMDEeOHNHWOJ1OoGbH1dy5cwkODua2225j2bJlVFdXa+vKy8txu90UFhbyzjvvEBYWRlxcHBkZGR6zt1auXMlDDz3EZ599ho+PDxEREVx55ZWsWLGC4cOHA7Bu3Tr8/PwwmUzs3r1bu7dZs2Zcc801VFZWajvAzgc1w+qPMZn0fx6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhbAgZjxeVUVxejdXXRFhw/X+pf2WkleOFZWf8PK5FUL1nTgX4mLg6IRy7vazejtNpCO9FZdRvxssBq7WBzrACiI2NZfbs2WddM2/evDrXDAYDQ4cOZejQoQDcf//9tG3bVmtWAfTu3ZvRo0fTu3dvZs6cqa07fPiwtmb79u2UlJRgMBh49tln6devHwATJkzQdnQBHDt2DICHHnqIN954AwC3281VV12Ft7c3vr6+VFZW4nA4aNKkiUdN3377LcOGDdPmX+Xk5AA1xwvW5gc4cOAAv/zyizY3q6KiAoDVq1drDSq73U6XLl20n2tnY/n7+9OqVSuOHj3Khx9+yMyZMz12fx09epSysjLKyk79F3thYSEAXbp0YcyYMdx7771neQtnRp3PeWYawnmoKqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWNGGWu+GM6SMiczlu4iPbtAu9YuOpShd7TF4ut13r7Bt7Vl4868M37+UJ82ups5pcf3ItonwilrRlnQXcPqYpGVlcVdd93lcc1qtRIWFkZWVpZ2LTk5malTp/LAAw+wc+dO7Wi938/Nio2NZc6cOZSXl+Pr64vNZsNsNvP6669z8OBBCgoKiI+Pp7q6mpCQEAAOHjyI2+3m+PHjPPzww6SlpeHl5cWVV14JoM3DOnjwIF5eXixZsoRvvvmG7OxsIiIiKC0txWQyaeuKioowGAw888wz7N27F4fDQfv27QEICwsDwGQyERQUxFtvveVRYy3ffPMNt912G0OGDGH37t3s2bOnzrObOHEi3bt3r+eTVygUCoVCoVAoFAqFQqFQKBSKU8xYuouMnAKPaxk5BUxfsovh/TvWy/lo3wRmLN39h9cVCkXD5LJtWNntdqxWa53rQUFB2Gw27ec+ffrwzjvvsHv3bh577DFWr15NRkYGsbGxHnOz5s6di9vtxmaz4evrS0lJCY0bN2bLli3cdNNNJCYmMnv2bKqqqrTvrf0es9nMTz/9xCOPPEJJSQmffPIJAKGhoR5ZMzMziY2NZfjw4axdu5acnByP2VQlJSWEhISwceNG+vXrR6tWrbSdYk2aNNHWJSYmsmbNGo+6mzdvzrFjx+jcuTNQ04CzWq0kJiZyzTXXMHXqVLp168ahQ4fo06dPnfla58OFDCG8XGkIA/xURv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinjBllrFmEU8aMMtYswql3nwinXjMePenw2FlVi8sN6dkFnLCX0zTU/7y9PTo0p0eH5ny0LJ3dB20ktAxi8O3t6p2zFlnei0ifCKesGWXjsm1YnSsrVqzA29ub+Ph4pk6ditFoxGAwcODAAfLy8rSm1enH6dX+nJ+fT9euXUlPTyclJYX4+HgKCws5ceKEx1qn00lSUhKzZ8/GbDbTpUsXtmzZwsGDB7U1paWlxMTEYDKZmDRpEhERETRr1oyjR496fGdBQQHdu3dnw4YNOBwOOnTowNatWz1cP/74Y506a2duVVVVeVy3Wq20bNkSgJCQEE6cOHFBzSqj0XDeAw1loiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRn35tu89zt7NvxHfKpROcU3+/IbzQK81i3TKmFHGmkU49e4T4dRbxsxjJWf93FHpuqDfJ4588Jp633s2Lvf38lf4RDhlzSgLl23Dymq1UlxcXOe6zWbT5j0BpKSkkJSUxAcffADA/PnzefXVV3G73dpOJoCBAwcyatQo7V6TyUR1dTUvvfQSCQmntpled9112s6q2rWRkZHMmjVLW5Odnc2tt95KdnY2AAEBAZSVldG/f38GDRqkrXvqqac4evQohw8fJjIyEl9fXxwOB++8845HDR06dCA/Px+Affv2UVJSwtSpU7nxxhuBmrlc9957L2az2WOm1+955ZVXtF1f9cXlcmO3l16Q43LEZNL/AD+VUX8+EU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8a8glLGfryVkrJTf2E0wM/MmIe70iTk/HcQiMgoyifCKWNGGWsW4dS7T4RTrxn9vQxn/dzibaSw0FEvN+j/Oer1vYj0iXDKmvFywGr1O+ddZ5dtwyomJsZjVhVAcXEx+fn5xMTEaNd+P+uq9rOQkBCP+7OysoiIiMDX1xeAwMBAj/VQswOqpKSEsrIyysvLadmyJQaDQVtbS22jqrahVnuE4OlHEELNEYC13x0ZGUlAQABOp9OjWVVcXExFRYXmOnDgAIBHE2358uX4+PhQUVFBXl4eAQEB2mdbt25l48aNAPz73/9m5MiRdOnSpe4DPQ/UQLkz0xAG+KmM+vOJcMqYUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRjl8Ipwqoz58v29WAZSUVTFm1lbe+0/yGe46P/RW81/hlDGjjDWLcOrdJ8Kpt4xhQX60iw4lI6cA12mHWBkN0CYqlMZW34uSV+/PUW/v5a/wiXDKmlEWGmzDKjMzk/Hjx5OWlobFYuGOO+7g6aefxtvbG4Dk5GSmTZvmMctq1apVGI1GcnJy6NmzJwUFBVRUVGiNIaiZ/xQQEIDL5WLFihXMnz8fs9lMVVUVffr00dZFRESwY8cOFi9ezKeffkp2djYhISGUltbsLrLZbISHh+Pj40N+fj5vvPEGS5cuxeFwYLFY8PX1xel0AnDFFVcANUf5ff7556SlpeHn54fdbtdcAGFhYRw/fpzHHnuMjIwM8vLy6NWrFwaDgfLycqBmVhXACy+8QG5uLnl5eVqTq7KykoiICK0Gt9ut5QX45Zdf+Ne//sXgwYN57rnn6v1u1AyrujSE81BVRv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinLBl3ZJ6o06yqpaSsit0HC2kf06jefj3WLNopY0YZaxbh1LtPhFPPGYf1a88Hi3eyM6tAu9Y2OpTH72x/wb9L1Ptz1PN7EeUT4ZQ1o2wY3L8fztQAsNls3HbbbURFRTF06FDy8vKYOHEiffv2ZdSoUR5roqOjPdbExMSwa9cuRo4cSVxcnHYE37fffkuLFi0AmDp1Ku+88w6+vr4MHTqU9evXs3PnTrp27crcuXMBmDJlCpMnTwZqjgFs3749c+fO1ZpfKSkphIeHk5ycTF5eHl5eXjz44IMcP36cZcuWYTab6dy5M3PnzmXbtm3cd999GAwGWrVqxZ133snXX39NTk4Obrebt956i9tvv53nnnuOpUuX4ufnR5cuXdiwYQM+Pj60adOG9PR00tPTqa6uJjk5maKiIvr06YOPjw+LFi0Cahpeqamp2nMcOXIkq1at4sYbb2T16tUMHjyYRYsWER0dzeeff16vd+N2uzEYzr7NV6FQKBQKhUKhUCgUisuRBWv28OnqvWf8/L5ecdx7S/xfmEihUCj0RW5+CbknHEQ0thARFvDnNygUCqlokDusFi5ciMPhYMqUKdpMpurqasaOHcvQoUMJDw8nKCiIOXPmMG7cOIYNG4bFYuHOO+/kiy++4OGHH9YaVWazmerqaj766CPGjBkD1MycAjAYDEydOpWEhAReeOEFJkyYwI4dO+jQoQMhISFAzZyqbdu28euvv9KrVy92795NRkaGdmxfSEgIeXl5hIaGMnfuXCIiIvjvf//LhAkTKCoq0hwARqMRm83G1KlTSUxMpEePHsybN4/anmLjxo0JCgoiISGBLVu2ADXzqzp27MihQ4cAMJlMzJ49mzlz5vDjjz9y7NgxjEYjN954I99++y3p6em0a9cOgO+//x6n08nq1asB+Oijj4CaOVj1Rc2w+mNMJv2fh6oy6s8nwtkQMu7KLuDQCQctwyy0iQrVXT4RTpVRnxllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpS8aI0LMPWW/eyP+yntMiwiljRhlrFuHUu0+EsyFkDPAxcXVCOHZ72QX99+Hp6P05NoT3ojLqN+PlwGU/wyolJYVu3bppzSqA3r17M3r0aDZu3Ei/fv0AiI2NZfbs2dqaTZs2MWfOHHr37q1d69ixI8ePHyclJUW7tm7dOgBGjRqludxuN1OnTmXDhg106NCBli1bAnDXXXfx/PPPa/c+/vjjZGRkcOLECSIjI7WZV8uWLdMaU263mzfffFObO1U76yomJobly5drruXLlzNv3jzy8/OBmnlZNpuNd999l6CgIOLi4rjhhhv45ZdfPGZpXXnllYwfP57y8nK6detG3759efDBB/n22285fvy4tm7gwIHMmjWLtLQ07drYsWO1BlZ9UedznpmGcB6qyqg/nwinHjPmFZby2tyf6wynfmXg1YQFX9hw6ouR769wqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCOflnrFNq1AC/Mx/eCxggJ+ZhJYhUsxpEeGUMaOMNYtw6t0nwqkyyuET4VQZ9emTiQZ5mGJWVpZHgwbAarUSFhZGVlbWWe8DPO5NTk7m2LFjHDlyRJsD9euvv2IwGEhKStLWGQwGoqOjNUejRjVnTh87dkxb43Q6SU9P9/iu2qZaYWGhtm7Tpk04nU4cjpq/ReDt7Y3JZNJmVtXy/fffYzKZtHt79OiB0WhkzZo12pry8nJSU1NJTk6uU++6desoLS3l9ttvZ9u2bXVqr73/2muvpU2bNtx666188803tG/f/ozPUKFQKETx+2YV1JzzP27Oz5cokUKhUCgUCoVCcf68MvBqAvw8/35w7V/EUigUCoVCoVCcmQa5w8put2O1WutcDwoKwmazaT9nZmYyfvx40tLSsFgstGrVCm9vb3x8fLQ1AwYM4MMPP6SyspK1a9dSWVnJ0aNHCQ0NpX///hQUFJCQkEBVVRUHDhzA37/mb/mXlZUBsHLlStauXYu3tzfBwcHarqnaHI0aNcJsNnP77bfjcrkICQmhsrKSmJgY7Rg/qNl1lZeXx1VXXYXL5aJJkyYcOXKE8PBwzdW0aVNuueUWRo8ezdixYwH4+OOPCQgIYMCAAZprxYoVrFy5kg0bNgDw448/smjRIv72t78RFRWlrZkyZQpwqpmWnZ0NoM3yqi8XOijxcqQhDPBTGfXnE+HUa0aRw6n1WrNInwinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8ZmjQP4YERPMnIKOJh/8Y66Pj2b3moW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyppRNhpkw+pcsNlsDBw4kKioKCZPnkxeXh5jx46lqsrzF6JBQUEMGzaMCRMm8N///peAgAC8vLwoKCjghRdeIC4ujvnz5/Pdd99hNp96XLUeq9WK0WikuLiY48ePEx8fz86dO7V1J0+epKqqisjISE6cOIHNZqOyspKkpCSPhpXL5cJsNhMUFMTJkyfJz88nMDBQa5DV1vTTTz/RuHFjSktLcTqdlJSUcP311xMYGKitW7VqFTk5OTidTgDtmMHaGV1waoeYxWKhvLwcX19fWrduTXV1NV9//TXPP/88Xl5e5/3cjUYDISGW875PFqzWs59nfql9IpwyZpSx5gt15hYcOuvnR06Wkty5Zb39oL+a/wqfCKeMGWWsWYRTxowy1izCqXefCKfKKIdPhFNl1JcvKcRC0p8vqxd6rVmkU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPKarVqO5lOx2azaXOiFi5ciMPhYMqUKdqxfJs2bWL58uUcPHhQm0EF4O/vj8FgYOvWrRgMBjp27EhMTAyDBg0CoHPnztx6661UVFRo/tqm1LPPPss999wDQGpqKoMHDwbQ1u3fvx+z2cx3332nfd+IESNITU3V1hw7dgy32023bt348MMPASgqKuKGG27g+PHjHjWVlpayfv16goODiYuL4+abb2blypU899xzhIeHA/DOO+9gNBpxu93Ex8dz9OhRFi1aRJMmTbQMzZs3B+D111/n1ltv1a6vXLmSp59+moMHDxIbG3t+LwZwudzY7aXnfd/ljsmk/wF+MmY8XlRGcXk1Vl8TYcEX/j8kyzZms+dgEW1aBXNb9+gL9oE870XkcGq91izSJ8IpY0YZaxbhlDGjjDWLcOrdJ8KpMuozo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpyyZrwcsFr9znnXWYNsWMXExNSZVVVcXEx+fr42oyklJYVu3bppzSqAPn36sHz5cpYtW8awYcO061lZWURERODr68umTZtwuVy4XKf+QHl7e3PTTTcxb948zb97924ASktPNWeSkpKwWCw4HA5iYmKorKzk2LFjVFdXezTTanNcddVVQE2jC8BoPPXSgoOD6dq1K99///1Za4qPj2ft2rVs3LiRfv36eXjeeOMNAO68807i4+PP6xlfCGqg3JlpCAP8ZMhYUuZkxtJdpGcXaNfaRYcy9I62WHzPf2dhRk4Bby38Rft5Z+ZJPluXyXP3dSS+5cU5+uNyfy9/xXBqvdX8V/hEOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpa0ZZaJCHKSYnJ/Pjjz9it9u1a6tWrcJoNJKUVLPhPisrS2v01NKjRw8MBgMpKSnaNafTyZo1a0hOTtbuA8jJySEnJ0dbZ7PZcLlczJgxg6SkJDZv3kyjRo1YvXq1tsZgMODt7U1AQACRkZEcPHiQ6upqDAYDN998Mx06dKB///4UFRUBcMUVV2jf6efnR2pqKh07dqRr167897//1Y70O70mk8lE3759ad++PVCzgyssLMyjgVdZWcl9993Hxx9/DMDmzZvrNPi2bNkCwNNPP01cXBwJCQn8/e9/58svv8RqtXrsQFMoLjdmLN1FRk6Bx7WMnAKmL9lVL9/pzarTefPTP76u+GPUcGqFQqFQKBQKhUKhUCgUCoVCXhrkDqsBAwYwb948hg0bxtChQ8nLy+PNN99kwIAB2rF4drud1atXs3r1atauXQuAj48PoaGh7Nixgzlz5tC6dWsWLFhAUVGRdpSf3W7Hy8uL6OhonnzySYYPH87JkydZsmQJUHOEXkVFBS+//DIRERH88ssvjBkzht69e7NlyxYKCwuJi4sDappctVRUVPCvf/2LtLQ0Xn75ZQA6duwI1Bz/V9ucioyM5MYbb2TBggWUlJRgNBq1mmw2G2vWrKF79+7cfPPNTJkyhZUrVxIcHMyePXu07xoyZAjbtm2jY8eO/PLLL5SWlnLffffxv//9j/j4eEJDQyktLcXHx4dOnToRFRVFRkYGO3bsYP/+/fWeX1WL2dwg+6BCaQgD/GTJePSkw2NnVS0uN6RnF3DCXk7TUP8/uPOPWfJD1lk/X7n5N27vUf/jAWV5LyBuOLWeaxblE+GUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mlLFmEU5ZM8qGwe12uy91iPqQmZnJuHHjSEtLw2KxcMcdd/DMM8/g7e0NQNu2bWnatClut5t169Zp99122234+fmRn59PQUEBCQkJvPjii3Tq1AmAqVOn8sEHH/Dtt98yfvx4UlNTqa6uxul04nK5SElJITw8nG7dulFYWMhrr73G7Nmzyc7O1o4VbNy4MbNmzWLz5s0MHDiQ22+/nbCwMJYsWYLD4cDlclFZWclbb73F7bffzoMPPsiWLVuYPn06s2fPJi0tDW9vb+x2O0ajUTt+MD4+HovFQklJSZ3n4e/vT1paGseOHaNnz56c6bVOmDBBOzrw6aefZseOHZw4cQKDwYDFYuHkyZOMGTOGe++9t17vxe12YzAY6nWvQvFX8PPuPMZ+uPmMn49+5FquTgg/Z99LH6SyM/PkGT9vH9uI1x/vcV4ZFQqFPliz5Td2HsjnqivDuKlrq0sdR6FQKBQKhUKhUCgUCoXisqZB7rACiI2NZfbs2Wf83Gq10qdPH0aMGOFx3W63c8MNNzBy5Mgz3ldZWUlwcDCTJ08G4P7776ekpIS9e/dqc6iaNm1KQUEBBoOBZcuWafcPGDBAW3P48GEAEhMTue+++3j++ecBePHFF/nqq6+0dSdPnsRkMtGzZ0969uwJ1DR+rrrqKkwmE1BzzJ/b7aZ9+/YedX/33Xc8/vjj9O3bFzg1D2vr1q0EBQURFxfHc889R1paGjabTWtWAbzzzjseta9evZqnnnpKa5DVB5fLjd1e+ucLJcNk0v8AP1ky+nudvaFq8TZSWOg4Z1/ryKCzNqziWwSfl+/3yPJeGpJPhFNl1FfG7Fwbr87+iVrF99uPMGXRL4x+qCtRzay6yCjKJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxppFOGXNeDlgtfqd866zBtuw+jNiYmLqzG0qLi4mPz+/zmyr398HkJ2dTXx8PFAzO6p58+baDiqAK6+8kn379nl8h9vtJjs7W5s55XDU/JK6rKzM4zv8/PwAaN68ufZ5dXU1NptNa2IZDAZ8fX21htXBgwf/0NW0aVMALVdWVhaNGjXSPLXExsbyxRdf1Km3urqaqqoqDhw4wPvvv4/JZCIwMPCMz+dcUAPlzkxDGOB3uWcMC/KjXXQoGTkFuE7biGg0QJuoUBpbfc/LfVu3KL7ccOZjAXtf2+qi1H+5v5eG6BPhVBn14Tu9WaU5XTD2463MfO7GC0z3/306q/mvcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCKesGWXhsm1YJScnM23aNOx2O1Zrzd+GXrVqFUajUWso1ZKZmcn48eNJS0vD398fLy8vli9frjWsbDYbVVVV3HbbbR7+JUuWsGHDBpYvX05BQQGRkZEUFRVx/fXXA1BaWorBYGDVqlX88ssvpKam4uXlpc2HCggI0HwGg4F3332Xn3/+mezsbMLDw7Hb7bRs2VLLALBnzx5effVVVq9ejcPh0OZbtWjRAqjZQebr68uoUaP49ddfAZg1axYPP/ywx0wtqGl+JSUlaY01b29vqquradOmzQU9ezXDqi4N4TxUmTIO69eeDxbvZGdWgXatbXQoj9/Zvl5/fl/8VyITPtn+h9cv9N8Hmd5LQ/GJcKqM+sn4fdrhOs2qWqpd8GP6UZI7Nq+3X481i3bKmFHGmkU49e4T4VQZ9ZlRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRTlkzysZl27AaMGAA8+bNY9iwYQwdOpS8vDzefPNNBgwYoDV5oOa4v+3bt9O5c2cmT55MXl4eY8eO5aOPPiIsLIzWrVvjcrkoLy9n8ODB2n29evXiueeeY9++fdx99900adKEWbNmYTKZCAkJ0daZTCZ27NjBb7/9xtChQ9m1axdr1qzxyGo2m2nWrBnz58+nS5cuPPXUU3z66ae43W6tuVWLy+Vi4cKFPPDAAwDMmzcPAB8fH21NWVkZq1evplWrmnkbFRUV7N27F5fr1G/gJk6cSGpqKk6nk169enHixAm2bdsGQEZGhkdz7nwwGg2EhFjqda8MWK1+uvaJcOoxY0gIvD7sOnLzS8g94SCisYWIsIA/v/EMdA+xsKxTCxZ9t4+0vcfpFNeEe/7W+oIy/h4Z3ktD84lwqoyX3pd1tPisnx/ItXPHDRf+77eeav6rnDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOlVEOnwinjBllrFmEU8aMMtYswilrRlm4bBtWQUFBzJkzh3HjxjFs2DAsFgt33303zzzzjMe6Y8eO4Xa7mTJlCsHBwQBUVVUxZswYZs6cic1mw2g00rt3b20XE9Q0jqqrqwkKCmLFihWYzWZ69+7N5s2b+eijjxgzZgxWq5WqqioAQkJCmDJlChEREXTr1o1NmzZx8OBBwsPDsVqt5Obm0qRJE7Kysti5cyeJiYmUlJRw7NgxrR4Ap9NJdHQ0CxcuxGKx0KdPH5YtW0ZaWhr33HMPVquV0tJSysrKKCoqAmqOQlyyZAlG46nObkxMDLNnz8bLy4vvvvuO8PBwbr/9dpYtW8bixYt59tln6/Xc1QyrP8Zk0v95qDJmDPAxcXVCOHZ72QXNmaqlV5cW3PO31hfNB3K+F737RDhVRv1kjGkWyPdn+fyKCKuaS3eJfSKceveJcMqYUcaaRThlzChjzSKcMmaUsWYRThkzylizCKfefSKcIjIeLyqjuLwaq6+JsOAL/yW8jM9RxppFOGXNeDlgtaoZVkDN3KbZs2efdU3Tpk2Ji4vTmlUAffr0YcyYMQwfPpx+/fpx//33a8fm1ZKamgrAgw8+yBNPPKFdnzBhAmvXrgVOzcOKiopi9erVHms2b97M5s2b6dKlC1FRUezcuZMXX3yRQYMGATXzsDp37ozD4eDw4cO0bNkSk8lEdXU1Cxcu1BpY69atY9myZRw4cED7zvLycrZu3UpQUBAvvPAC6enpREdHaw0sgLvuuotXX32VESNGeHznN998g9PpPLcHfAbU+ZxnpiGch6oy6s8nwiljRhlrFuG83DP2aB/BnJV7/vBYQJMRurdrpubS6cQnwql3nwinjBllrFmEU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwnkxfCVlTmYs3UV6doF2rV10KEPvaIvF1+ssd54bsjxHkT4RTpVRnz6Z0F3D6vR5UhaLhTvuuIOnn34ab2/vs97ndruZOXMmn376KQUFBSQkJPDiiy/SsWNHj3V5eXmMHz9emydVXl7OgAEDPNZYrVasViv/93//x+jRo/H396e0tNRjHtaKFSsAOHr0qDYHqlOnTlx99dXk5uZSXl5OYmIiRqOR6upqHnroIW1GVkVFBaGhoWRlZQEQHx/PsmXL2L17N7169dJ2W9U2ybKysoiMjKRp06bk5eXxxhtv8O233+J0OgkKCiIoKIgjR44A0KNHDwC6du3qUdP+/fu56aabtJ9NJhP9+vXjk08+ITExkc2bNzNp0iQAbrjhhnN+XwqFQqFQXG68PPBqxs/52aNpZTLWXFcoFAqFQqFQKBQKGZixdBcZOQUe1zJyCpi+ZBfD+3e8NKEUCsVlj64aVjabjYEDBxIVFaXNk5o4cSLl5eWMGjXqrPfOnDmT9957j5EjRxIXF8f8+fN5+OGHWbJkiXaUn9Pp5JFHHgFg0qRJlJeX88wzz5CSksJLL72kuX7++WdsNhsxMTG8/fbbbNiwgVmzZnHvvffy4osvkpeXp+2iWr16NS+88ALh4eE888wzbN68Gbfbjc1mIzw8nMDAQA4dOgTA448/zqpVq9i1axf+/v7YbDYA2rdvD8DXX39N79696dOnj8fOsNp1rVu35siRI6xYsYKHH36Y7OxsVqxYgdVq1XZPNW3alDZt2rBr1y4ee+wxNm7cyN69ewkICOC5557TnE888QRt2rShZcuW3HPPPdp1Ly8vxo8ff97v7nTMZjVU7vc0hAF+KqP+fCKcMmaUsWYRTpkyxjYP5uOXbiJ1Ry77DttoHRlEjw4RFyOibmsW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwXizf0ZMOj51VtbjckJ5dwAl7OU1D/S9pRpFOvftEOFVG/WaUDYPb7XZf6hC1TJ8+nWnTprF+/XrtiL7PPvuMsWPHsn79esLDw//wvoqKCrp3787999/P8OHDAaisrOTWW28lOTmZMWPGALB8+XJGjhzJihUrtOP62rRpQ3V1NYsWLaJDhw4ADB48mJ9//pm+ffsybtw4AB599FG2bt2K2+3GYrEQERHBzp07efXVV+nfvz8A9957L9u3bwcgJSWF8PBwunbtSnFxMY0bN6aoqIiEhAS6dOnChx9+yNVXX838+fPZtm0b9913H40aNaKsrAyz2czNN9/M7t27ycjI4K233uL222/n3//+N+vWrSMyMpK8vDwiIiK48847eeeddzCZTGRkZADwzjvvMHPmTIKCgigqKsLb25svv/yS2NhY7ZnNmDGDjz/+mMLCQgwGA76+vpjNZoqLixk5cqTW2Dtf3G43BoOhXvcqFAqFQqFQKBQKhUKhUCgUsrB973H2/lZAfKtQOsU1udRxNH7encfYDzef8fPRj1zL1Ql//HtahUKhuBB0tcMqJSWFbt26ecyT6t27N6NHj2bjxo3069fvD+/bvn07JSUl9O7dW7vm7e3NzTffrO2EqvXHxcVpzSqAoKAgHA4HGzZsoEOHDlRWVrJlyxZ8fHy0OVEA/fv3Z8OGDXz33XdERkYycuRIdu7c6XF83oIFC/jHP/7B7t27tXudTidhYWGkpKRo6+x2Ox9++CGVlZUAlJWVATBgwACeeuopbd0777xDRkYGFosFgIKCAgwGA99++61HU+iTTz6huLhY+9lkMuHt7c2PP/6ozbA6vVkF0LNnTyZNmsSIESOYNm0aq1atYsSIEeTn5/Puu+8yYMAAAgIC/vB5nw2Xy43dXnre913umEz6H+CnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU49+/IKShn78VZKyqq0awF+ZsY83JUmIfXbuXQxM/p7nf0vo1u8jRQWOurlVn925KhZhFPWjJcDVqvfOe8601XDKisri7vuusvjmtVqJSwsTJv3dKb7AI9GFEBsbCxz5syhvLwcX19fsrKy6qyJiYlh//79muPgwYM4nU6qqqo81tY2fGrnSVVV1fwPSkFBAU2anPobEF5eXhiNRnx9fQGoqqrS1tYSGBiIwWDAbK55/OXl5UBNc+t0vLxqBhie7nK73djtdo9mmtls1ly1lJeXc+2111JUVITZbObzzz/nn//8p/b5gQMHgJpjCB977DGthoCAACorK8nLy6tXw6omp/qX8Uw0hAF+KqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWPGi+H7fbMKoKSsijGztvLef5IvyA0XnjEsyI920aFk5BTgOu1sLqMB2kSF0tjqe8HPQP3Z0adTZdSnTyZ01bCy2+1YrdY614OCgrQ5TrVkZmYyfvx40tLSMBgMmEymOkfRWa1WbZ6Ur68vdrudwMBAZsyYwaeffkpBQQGhoaEUFxdz8uRJ4NS8KIPBwMqVKxk3bhxeXl4kJyd7fO7n54fBYGDq1KlkZ2eTnZ1Ns2bNOH78uEeGqqoqTp48yUsvvcSGDRtwOBxER0fjdrsJCQkBoLS0ZkfS999/T3p6OmlpaVgsFnx8fIBTDavanwcNGsTJkyfJy8vjqaee4sSJEwQGBmrf2bJlS0aOHEmbNm2YPn06W7du5ZVXXuG7775j+vTpADRv3hyA48ePs3TpUj744AOqqqrw8fHBYDAQEVH/WR1qhlVdGsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhFPGjDLWLMIpY0YZaxbh1KtvR+aJOs2qWkrKqth9sJD2MY3q5b6YNQ/r154PFu9kZ1aBdq1tdCiP39n+gn73p/7sXBxUxotDQ8goG7pqWJ0rNpuNgQMHEhUVxeTJk1m4cCHffvstEydOZNSoUWe9d/fu3Xz11VeMHDmSuLg4Zs+ezdGjR9m1axepqals2bIFAH9/f44dO8akSZMoLy/nueee8/CYTCYCAgJYtWoVnTp14plnnmHRokWUlZVhNJ76A1k7H2rx4sXcf//9+Pv78/HHHwPUac7t27ePwsJChg0bxrZt21i/fr3H5z4+Pvj4+LBnzx46d+5MXl4eixcvxsvLy6Nhdcstt7BhwwZsNhsnT56kdkxZamoqeXl5hIeH06xZMwwGA8XFxXTo0IE777yTmTNnUlhYSKNGjfDz8zvPt1KD0WggJMRSr3tlwGqt33P9q3winDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOGTPKWLMIp4wZZaxZhFPGjDLWLMIpY0YZaxbh1Jsvt+DQWT8/crKU5M4tL+g7LkbNISHw+rDryM0vIfeEg4jGFiLC6nca0x+h/uzo06ky6tMnE7pqWFmtVo9ZTLXYbDaPI/AWLlyIw+FgypQpBAcHc/DgQb799lsWLFjA0KFDCQ+vGfpnt9sxGAzavYGBgWRkZDB48GAGDRoEQOfOnUlMTMTpdDJs2DBtN1NJSQnvvvuudizge++9R3Z2NgUFBVrWsrIymjVrxtGjR3n77bdJSEigefPmHDt2TMsaGBhIUVERbdq04csvv8RsNtOrVy+WLVvGkSNHALR8Xl5eBAUF8e677xIREUG3bt3YtGmTdqSg1WqldevWdOnShSVLlgBgNBq55pprKCws1L7z5MmT/Oc//6nzHKuqqvjxxx+58847mTJliraLKysri59//hmA0NBQTp48yaFDh2jRosV5vT9QM6zOhMmk//NQVUb9+UQ4ZcwoY80inDJmlLFmEU4ZM8pYswin3n0inCqjPjPKWLMIp4wZZaxZhFPGjDLWLMJ5sX3Hi8ooLq/G6msiLLj+v4yOCD37vc0b+etqPlSAj4mrE8Kx28vqnet0ZPyzI2PNIpyyZrwcsFob6AyrmJiYOrOqiouLyc/P95gnlZKSQrdu3QgODtbuA3C5XGzcuJF+/foBNfOmIiIitCaU1WqlqqqK3r17a67amVPe3t78/PPPVFZW0qFDB8LCwjy+89lnn+Xxxx/n8OHDALRq1Yqqqir69+/Pv//9b21d//79OXLkCIcPHyYyMpLg4GCKioqYPXu21pgqLi5m2bJl5Ofne+RPSEhg0aJFmmvmzJls2rSJnJwcunbtSkxMDJs2bWLRokU8//zzxMXF0b9/f7755htat26t3RcZGcnevXuZOnUq3333HYsWLSI+Ph6ADh06AJCdnU1JSQklJSUez7uiogKAbdu21athBWqG1dloCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8IpwwZS8qczFi6i/TsAu1au+hQht7RFouv13n72rQKJcDP/IfHAgb4mUloGaLmQ+nEqXefCKfKqE+fTOjqMMXk5GR+/PFH7Ha7dm3VqlUYjUaSkpK0a1lZWR7NpMTERAICAvD399caXk6nkzVr1mizpwCaNm0K1BznV8umTZuoqKiguLiY8vJyvL29sVgsOJ1Oj2wrV67E19eXEydOADUNK0DbcQU1O8H27t2rZQRo3LgxBoPBY77WqlWrMBgMFBUVAdCiRQuMRqM2y6qWdevW4e3tzcGDB7XnY7PZ2LRpk7amoKCAjIwMjzoBcnNzmTFjBi+//LL23T4+PrRs2RKAl156icceewyTycQTTzzB9OnTiY6Oxs/Pj44dO9KzZ08UCoVCoVAoFAqFQqFQKBQKmZmxdBcZOQUe1zJyCpi+ZFe9na8MvJoAP899BAF+Zl4ZeHW9nQqFQnE5oKsdVgMGDGDevHkMGzaMoUOHkpeXx5tvvsmAAQO0Y/4ACgsLWbRoESNHjgRqGjFDhw7lf//7Hz/99BObNm1iwYIFFBQUsHfvXjp27IjFYqFly5YYDAZGjBjB8OHDKSsr480336RNmzZkZGRgs9nw9fXFYrGQl5dHp06dcDqdhIaGcvz4cRISErDZbEDNziyA+fPns3DhQsxmM/7+/lgsFsrKyrR1zZs3Z/v27SQnJ+N0OgkKCsLhcNCxY0fS09M96j9w4AAdO3akurqakJAQTpw4QbNmzTRXp06d6Ny5M0OHDtXumT9/Pq1bt+aWW27RrvXr148TJ05QXV3N/fffr82jSkpK0nJ/++23TJs2DYApU6Z45IiNjdV2r9WHCxm8eLnSEAb4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRThkzylizCKeMGWWsWYTzYviOnnR47KyqxeWG9OwCTtjLaRrqf97eZo0D+GBETzJyCjiY76BlmIU2UaH1zlmLLO9FtFPvPhFOlVG/GWVDVw2roKAg5syZw7hx4xg2bBgWi4W7776bZ555xmOd2+3G5fLcUjdkyBBmz57N3r17efTRR2ndujU+Pj6YTCYmT55MXl4eY8eOBSAqKorhw4djNpu5+eab6dq1K88//7zmqqiowGAwYLFYKCwsxG634+XlRWBgoLamdq6Uv3/N/yiVl5djt9tp06aNtgsLanZduVwurFYrhYWFlJaWUl5eTmRkpEfDyu12YzKZ8Pf3x2azYbfbCQwMxNvb28OVk5OD1WqltLSUyspKKioqaNOmDWbzqVdZVlZGXl4eJpMJk8mkNakiIiK0Nffccw/e3t5MnTqV22+/nYSEBCZPnqw1+dxut8eusHPFaDQQEmI57/tkoSEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRjl8Ipwqoxw+Ec7LPWPmsZKzfu6odF3Q78GSQiwk/fmy8+Zyfy9/lVPvPhFOlVGfPpnQVcMKanb3zJ49+6xrQkNDufvuuz2uGQwGTCYT//rXvxg5ciTTp09n2rRpTJkyRdsttGnTJpYvX86zzz7L5MmTtXs///xzDAYDQUFBVFRUYLfbiYmJYcWKFQBUVlZy6623kpmZSdeuXQHYuXMnAM8//zz33HMPAKmpqQwePBhAm1e1f/9+zGYzKSkp2veNGDGCH374QVtz7Ngx3G433bt358MPPwSgqKiIG264gePHj2vrFi5cSFlZGevXryc4OJi4uDhuuukmli5dyjPPPKPtQjt48CBXXHEFCxYs0L6zS5cu/PDDD9jtdqxWK+Hh4cyePZsBAwbwwgsvALBjxw7WrFlDeno6GzdupEePHn/6vn6Py+XGbi/984WSYTLpf4CfyqifAayno/eaRTj17hPhVBn1mVHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMI58Xw+Xud/S9zW7yNFBY66uUGfdYs2iljRhlrFuGUNePlgNXqd867znTXsDoXYmJitBlRtRQXF5Ofn6/NtkpJSaFbt24eR9v16dOH5cuXs2zZMoYNG6Zdz8rKIiIiAl9fXzZt2oTL5fLYweXt7c1NN93EvHnzNP/u3bsBPOZOJSUlYbFYcDgcxMTEUFlZybFjx6iursZms2mNp9ocV111FVDT6AIwGk+9tODgYLp27cr3339/1pri4+NZu3YtGzdupF+/fhw6dIiqqioOHDhAly5dPJ7Rb7/9RpcuXdixYwclJSUUFBQQHx8P1OwqW7t2Lb169eKrr77S5mbVBzVQ7sw0hAF+KuP5c7EHsP4Reqv5r3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMI54X4woL8aBcdSkZOAS73qetGA7SJCqWx1feiZNVTzX+VU8aMMtYswilrRllokA2r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSqrZSJuVlcVdd93lcV+PHj0wGAwsWrSIRYsWaQ2b3NxcbrrpJu0+gJycHB5++GHS0tLw8vIiLCwMl8vFtddeC9Q0fxo3bsxnn33Gl19+SXZ2NhEREbjdbgICAoiMjOTAgQNUV1djNBp55pln2Lt3Lw6HgyuuuAJA+2dWVhb+/v5s3brVoyZfX1+AOjUtWrRI24m1YMECrFarlrv2n2PGjGHdunVs27aNqqoqKioqAJg4cSJeXl6EhoZiMBh4/vnnPY5D/OqrrwC09QqF4s852wDW4f07XppQCoVCoVAoFAqFQqFQKC6YoXe0ZfoSz7+k2iaq5i+pKhQKheLi0iAbVn369OGdd95h9+7dPPbYY/z222989dVXtG7dWjsWz263s3r1alavXs3atWsB8PHxwc/Pj6NHj3LTTTeRmJjI7Nmzyc/Pp3fv3tp9Xl5eGAwGfvrpJx555BFKSkqYN28eAC1bttTWtWzZku3btxMbG8vw4cNZu3YtOTk52qwom80GQLNmzbQdUK1atWLGjBkAJCQkaK7Q0FByc3Pr1AR41LR3715mzpzJLbfcQk5ODhaLhfz8fLZu3erxnStWrODgwYOMHz8eHx8fHn/8cQAiIyO1nVx///vfWbFiBf/4xz9IT0/n8OHDOJ1Oqqur6d+/f73fj9mshsr9noYwwE9lrB+iBrBezIwifSKceveJcKqM+swoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhPNi+YICfHju/kTyi8qwCxgDcPo/9eYT4ZQxo4w1i3DKmlE2GmTDasWKFXh7exMfH8/UqVOxWCwkJSWxadMm8vLytAaPy+XC7T61X7eiooKysjLCwsJIT08nJSWF+Ph4XC4XK1eu5JprrgHA7XbjdDpJSkpi9uzZmM1m4uLi2LNnD7t379b8ubm5xMTEYDKZmDRpEhEREQQGBlJYWOiRNzc3l+7du7NhwwYcDgcJCQmkpaWRlpbGAw88AIDD4ahTU6tWrfjtt988atqyZQsAa9asAWp2gsGpmVq1bN26lY8++qjOHKqtW7dqRwW+/vrrXHHFFSxevJicnBz8/f2prq7m9ttvx9+/fr9gNxoNFzRs8nKnIQzwUxnPD9EDWGvRU81/lVPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmi+m80h+Cft25xHR2EJEWMAF+0T+vkum9yLKJ8Kpd58Ip8qoT59MNMiGVUpKCklJSXzwwQfaNbvdTteuXbWdTFarlT59+jBixAhtzfbt23G73Vx33XVMmDBBuz5hwgRtF5bVaqWqqorWrVsza9Ysbc1nn33GqFGjSEtLo2fPngQGBnLw4EFeeOEFBg0apK3r3bs3WVlZHD58WJtZ5Xa7eeedd7Sfc3Jy6NWrF/v379e+s6SkhOTkZI+aJk6cyMcff6zVFBAQQFFREe+//752hCFA586dKS0tpbKyUvuO2iZeLbNmzeLhhx8mIyNDu+bt7c1jjz1Go0aNePnllxk6dChvv/32Be2ucrnc2O2lf75QMkwm/Q/wUxnVAFa9OPXuE+FUGfWZUcaaRThlzChjzSKceveJcKqM+swoY80inDJmlLFmEU4ZM8pY88V0lpQ5mbp4JzuzTp2G0j4mlMfvbI/Fr/5zpvVcsyifCKeMGWWsWYRT1oyXA1ar3znvOmuQDas/mk9ltVoJCwvTZjjFxMRo/7mW2mZNp06dPK7HxsYyZ84cysvLiYmJAaBJkyYea7Kzs/H29ubgwYPa5+np6dp6qGlMnThxQst47bXXYjQa8fPz0xpJtZ8B5Ofna1mdTifNmzf3+M4jR47g5eWlrQ8LC6OoqIjo6GhtTXFxMQ6HA7fbzaFDh7Q8YWFhGAynfpGelZWF0WgkLy+vzvNcvnw5MTExpKWl0bx5cxITE+usOR/UQLkz0xAG+KmM54cawCrOqXefCKfKKIdPhFNllMMnwiljRhlrFuGUMaOMNYtwyphRxppFOGXMKGPNF8P5wVc768yZ3pVdwPtf7bwoc6b1WLNonwinjBllrFmEU9aMstAgG1Z2ux2n08lDDz1EWloaFouFO+64A6vVqs1wSk5OZtq0adjtdqxWK1Czwwrg8OHD9OzZk4KCAhISErjhhhtwu93YbDYSExMxGAzk5eXx5JNPkpqaitlspqqqitDQUM3fpk0b1q1bx5YtW3jrrbfIzs4mJCQEu90O1MyS8vb21ppMb7zxBkuXLsXhcGCxWAgODqakpOYosdpj+w4cOKDV5OfnR3FxsUdNV1xxBfv372f16tUsWbKE3NxcQkJCtOdSm9/b25ujR49y8803c/z4ccLDwykvLyc8PJzi4mJt/eTJk5kyZYr2c21jbMGCBdx77731fj9qhlVdGsJ5qCpj/RnWrz0f/O5vb7WNrvnbWxf674Neaxbp1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNF8spcs60XmsW6RPhlDGjjDWLcMqaUTYaZMPK7XazaNEi2rRpw+TJk8nLy2PixIkYjaf+IAwYMIB58+YxbNgwhg4dSl5eHj/88AMGg4FZs2YxcuRI4uLiGD58OG+//bZ2n4+PD1arlf3791NUVMSQIUNYv349O3fuxMvr1Lbhrl27AvDhhx9y3XXX8be//Y25c+fWydqmTRvWr1/PvHnzePDBBzl+/DjLli3D19dXm6/VtGlTDAYDP/74I1FRUTz22GN8/fXXFBYW4nQ6Nde1117LypUreffdd/n73/9+xu/09/enqKiImJgY7r//ftasWcO2bdsICQnBx8fHY21tM+6ee+5h0aJF/N///Z/HUYLni5phdXYawnmoKuP5ExICrw+7jtz8EnJPOC7a+dino7ea/wqn3n0inCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpY0YZaxbhlDGjjDVfqPOvmDOtt5r/Cp8Ip4wZZaxZhFPWjLLQIBtW3t7eVFRUMGXKFIKDgwGorq5m1KhRmM01JQUFBTFnzhzGjRvHsGHDsFgsdOzYkS1btvDggw9qc6diYmIoLCzE7XZrx/YFBgZis9lwu91MnTqVhIQEXnjhBSZMmKA1mRo1agTUzIratm0bv/76K7169SIvL4/U1FTNFR4eDkBoaChz584lIiKC//73v7zxxht4e3trNfn4+FBRUYHNZmPq1KkkJiby97//ncmTJ2s1NWvWDIDg4GC+/fZbLBYLDzzwAN9//z1ZWVnadyYmJrJ//36ys7PZuHEj0dHRPPzww8yaNYvGjRt7PEuXy0WHDh3IysoiLi6Ovn37XtC7UTOs/hiTSf/noaqMF+4L8DFxdUI4dnvZBc2tOh291yzCuSu7gEMnHLQMs9AmKlR3+UQ4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1nyxnCLnTOu1ZpE+EU4ZM8pYswinrBkvB6zWy3yGlclkIiAgQGtWAVx33XUAVFZWatdiY2OZPXu29vNHH33Eli1baNeunXZt/vz59OvXjz179uDr66td9/HxYePGjdrPLpeLiRMnajuemjZtCkC3bt14//33tXVvv/02qamp+PvXbC+uqKjQvqdFixbaupkzZ1JWVqb9bDabCQwMJDU1VbuWm5vL5MmTtZpq8/3zn/9kxIgR2rrDhw+TlZWlZYqPj2fbtm1s2bJFm2OVmZnJrFmztEZbLb6+vrz77rvceOONDB8+nIuBOp/zzDSE81BVRv35RDj1mDGvsJTX5v5MSVmVdi3Az8wrA68mLLh+RzZczHx/hVNllMMnwqkyyuET4ZQxo4w1i3DKmFHGmkU4ZcwoY80inDJmlLHmC3X+FXOm9VbzX+ET4ZQxo4w1i3DKmlEWGmTDqrq6msLCQh544AF27tyJxWIhPj4ewGPX0u+p3an06aef8uabb1JQUEB8fDwHDx6kurqa8vJyfH19MRgMVFRU8PDDD5OWloaXlxcdOnTA7XZrxwIeO3YMgF27dtG3b1+ys7OJiIjQmkqlpTW7jGqP4Bs9ejR79+7F4XDQvn17CgoKPLKeS03l5eUAbNiwgTVr1pCbm0t0dLQ2N+vYsWPExsYSERGBzWYjOTkZu91OeHi4trPqxhtv9HgmZWVl3Hjjjbjdbt5++22+//57Jk+eXKexpVAoFKL5fbMKoKSsinFzfua9/yRfolQKhUKhUCgUCoVCoWhIDL2jLdOX7PKYZdUmKpShd7S9hKkUCoVCcS40yIZVRUUFbreb3bt389hjj/Hbb7/x1Vdf4e3tTVXVqV92Dhw4kNzcXNauXQvUNJEMBgM//fQTN910E4mJicyePZvi4mIAbDab1nAyGo389NNPPPLII5SUlPDJJ5/g5eWl7Viy2WwAHD16FH9/f4YPH87atWvZtm2bx+fV1dV4eXmxceNG+vXrR6tWrZg5cybV1dXa7qtzranWuXfvXjp37syAAQNYtGgRR48e9fg8KysLq9VKRUUFjzzyCEeOHGHx4sWYTCbuv/9+7Tvz8/Nxu90EBgYSGBhIWFgY27Zt4+6772b9+vX1fj9msxoq93sawgA/lVF/PhFOvWbckXmiTrOqlpKyKnYfLKR9TP0a6XqtWaRPhFPGjDLWLMIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmi+mMyjAh+fuTyS/qAx7eTVWXxNhwRc+T0bPNYvyiXDKmFHGmkU4Zc0oGw2yYQVgMpmIj49n6tSpWCwWkpKS2Lhxo8cxey6Xi+rqau3nqqoq3G43Xbt2JT09nZSUFOLj4ykrK9OaVlCz68jlctGlSxdmz56N2Wyme/fupKameqwDiIiIwGQyMWnSJCIiImjbti27du3SPi8tLcXpdNK9e3c2bNiAw+GgQ4cObN++HZfLc1vgudQEkJCQwIkTJ5g0aRLR0dE0b96cI0eOaJ8PGTKExx9/nAkTJjB37lxKSkowm81UVVWxZ88e7UjEI0eO0L59e3bu3Mnw4cO57777uOuuu0hPT+fQoUMeRxieK0aj4YKHV17ONIQBfiqj/nwinHrLmFtw6KyfHzlZSnLnlvX2w8Wt+Uh+Cft25xHR2EJEWMBF8+rtvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtZ8MZ2ifj+l55pF+UQ4ZcwoY80inLJmlIUG2bAyGo00b96cTz75RLtmt9vp0qWLdjwewLx58zzuq92BNHLkSK666irt+kMPPcSPP/5IUFAQUNPYCgwMZNasWdoat9tN27ZtteZR7Yyq6667jldffVVbt2DBAnbt2qXNuqrN884772h+gFtvvZXDhw+fV021s6zuuecej51SY8aMYcGCBVqm0NBQAF577TWcTiffffcdb7zxBk888QTHjx/X7quqqiIsLIy9e/dq12qbeYcPH65Xw8rlcmO3l573fZc7JpP+B/ipjPrziXDqNWNE6Nn/h7x5I39dDMYtKXMydfFOdmadOlqifUwoj9/ZHoufV729en0vIn0inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinrBkvB6xWv3PeddYgG1Z/RO1RfRdr7ZnWuN3uP7xenxz1df3+vjOte+ONN1i5ciUzZ87k0KGa3QsxMTHa53fffTcvvvgiq1atokePHhw9epQ1a9YA0KRJk3Ou4/eogXJnpiEM8FMZ9ecT4dRbxjatQgnwM//hsYABfmYSWoboYjDuB1/tJCOnwOParuwC3v9qJ8P7d7wgN+jvvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPK5XJx5MgRHnjgAXbu3InFYiE+Ph4Aq9V6xvtqdzi99dZbHDx4kIKCAuLj48nOzgZOzbAym82cOHGChx9+mLS0NLy8vOjQoQPV1dXaLqbS0ppdRKmpqfTt25fs7GwiIiIICKg5GsrLy8sjzzPPPMPevXtxOBy0b9+ew4cPexwJeC41eXt7A/Dll18yb948cnNziY6OxuFweGQCmDx5Mh9//DF+fn78+9//xuVy0aNHD6KiorQ1t99+Oxs3buQ///mPds3f35/AwEBatmx57i/kd6gZVnVpCOehqoz684lw6jnjmIe7MmbWVo+mVYCfmTEPd72g/165WPmOnnR4DO2txeWG9OwCTtjLaRrqf0kzinTKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKesGWWjQTasAKqrq9m9ezePPfYYv/32G1999RXe3t74+Z06VmrgwIHk5uaydu1aAMxmMwaDgS1btnDTTTeRmJjI7NmzPY4RBPDz88NoNPLTTz/xyCOPUFJSwieffIKXlxeBgYEea48cOUJsbCzDhw9n7dq1bNu2zeNzf39/vLy82LhxI/369aNVq1bMnDmTqqoqjEbPP7jnUhNARkYGnTt3ZsCAASxatMjjaEGAZcuWMWXKFHx9fXnkkUdYsmQJhw8fJicnh+PHj2u7p7p3787JkyeJiYmhY8eOpKamcvz4cRo3bozZXL8/GmqG1dlpCOehqoz684lw6jFjSIiFBeNvI23vcfb8VkB8q1A6xdV/t+fvudB8mcdKzvq5o9J1wf/9p8f3Itonwql3nwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcKqM+vTJRINsWPn4+FBZWUl8fDxTp07FYrGQlJTExo0bPRotLpeL6upq7Wd/f3/cbjdXX3016enppKSkEB8fT2lpKSUlJR4zplwuF126dGH27NmYzWa6d+9Oamqqdhxf7dqmTZtiMpmYNGkSERERxMXFsXfvXu1zk8mE0+mke/fubNiwAYfDQYcOHdi2bRs+Pj7nVVOtMy4ujhMnTjBp0iSio6Np2rQpx44d0z5funQpAOXl5UyePFn7jsOHD/PBBx8wZswY3G43J0+exGQyceTIEXJzc2ndujXh4eHs3LmTDRs20LNnz/N+N2qG1R9jMun/PFSVUX8+Ec6GkDG2WSCd4ppgt5fVe26ViHz+Xmc/8tXibdTFnC1RThkzylizCKeMGWWsWYRT7z4RTpVRnxllrFmEU8aMMtYswiljRhlrFuE8XlRGcXk1Vl8TYcEX/svjhlCzyqjPjDLWLMIpa8bLAav1Mp9hZTKZCA0N5ZNPPtGu5ebmcsMNN1BZWaldmzdvnsd9VVU1x0zdf//99OnTR7ver18/9uzZg6+vL1AzI8rHx4dZs2Zpa1wuF23atMHpdAI1jSqAdu3a8f7772vr3n77bfbu3asdHVhRUQHAq6++SosWLbR11113HWVlZedVU22+66+/nhEjRmjrnnzySY4dO+aR6ZdffqFfv37Mnz+fmTNn0q1bN+68807Ky8s1N8Add9zBhAkTNNfq1at56qmn2LRpU70aVqBmWJ2NhnAeqsqoP58Ip4wZL9QXFuRHu+hQMnIKcJ02StBogDZRoTS2+upizpZop4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPh1GPGkjInM5bu8jjSvV10KEPvaIvF1+uS5/srnCqjHD4RTpVRnz6ZaJCHKVZXV1NUVORxlN/GjRuBU3Oe/ojanUoZGRnaNafTyZEjR6iurtaaOQaDgYqKCnJycrR1mzdvxu12a7Opjh07BuCxBmDfvn3AqXlStbuoNm/erK2x2WwUFhZy+u6vc6mpNt/+/fs9vjMrK8sjU1ZWFv7+/syZM4eJEyfSrVs3AGJiYrS1FkvNsVV5eXkertpGVm1zT6FQKBSnGHpHW9pEhXpcaxNV8//4KBQKhUKhUCgUCoXi0jNj6S4ycgo8rmXkFDB9ya5LlEihUCgU50qD3GFVUVGBwWDgmmuuISAggKuuuopffvmFoKAgj0bL72dYlZaWYjAYmDlzJrNmzSIqKorGjRtrO51sNhu+vr5UV1djNpu59dZb8fPzo3379uTk5BAaGorBYNDWAhw4cICEhATCwsJo27Yt69ev9/jc6XTi7e3NK6+8wtixY7nyyisxmUz4+PhoDahzranWuX79ehISEmjevDnR0dFkZ2drnx86dIgffvgBh8OB0Whk3LhxfP755wwYMACn00lBQc3/YAcHB2M2m9m4cSMJCQkEBQXRqVMnfvrpJwBCQz1/IXs+mM0Nsg8qlIYwwE9l1J9PhFPGjBfTFxTgw3P3J5JfVIb9Ih8tcfo/LwZ6fo6inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzyljzxXIePenw2FlVi8sN6dkFnLCX0zTU/5LlE+1UGfWZUcaaRThlzSgbDa5hZbPZcLlchIeHExoayoEDB0hNTSU+Pl47rq+W38+w+umnn3C73Vx//fXs3LmT7OxssrOzGThwIB9//DFQ02DKz8/Hz8+P2NhYdu/ezdatWwkPD6dNmzbaDKs9e/YA0KFDBwoKCsjNzWXdunXceeedLF68WPvOtLQ0nE4nSUlJ/Prrr+zZswez2Uz//v1ZuHDhedX0yy+/ANC9e3cyMzM5cuQIhw4d4sEHH2Tu3LkAOBwOrWaXy0VRURFbtmxhy5YtADRq1EhzVVVVYTAYsFqtFBcXs27dOozGmn+ZaneSnS9Go4GQEEu97pWBhjDAT2XUn0+EU8aMF9Mn6r/nZHwvIpx694lwqoxy+EQ4ZcwoY80inDJmlLFmEU4ZM8pYswinjBllrPlCnZnHSs76uaPSdcH//5zeav4rfCKcMmaUsWYRTlkzykKDa1jVNnl69erFf//7XwA+++wzxo4dS0hICEFBQdra02dYVVRU8PPPPwMwefJkfHx8qKys5NZbbyU9PR2DwUBQUBCrV6/G6XTSvXt3ZsyYAUBqaiqDBw8mKCiI2NhYAJYvXw7AU089xXXXXQfAiBEjtO8ICgri2LFjZGdnY7FY+OijjwAoKirihhtuYN++fVrWc62pdqfYK6+8QkxMDAADBgwgLS1N+87Y2Fh69uzJsWPH+Oyzz4CaYwt79epFp06dcLlc2jNISEggNjaWb775RjvusLbZFRYWVq/343K5sdtL63Xv5YzJpP8Bfiqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhPN4URnFF3GHOuj/OTaE96Iy6jOjjDVfLKe/l+Gsn1u8jRQWOurl1mvNIn0inDJmlLFmEU5ZM14OWK1+57zrrME1rFJSUggODtZmLQH07t2bUaNGcfLkSa2R83u2b99ORUUFANnZ2cTHx+Pt7c3NN9/MF198QUREBL6+vqSkpGC1Wjl+/Lh2b1JSEkFBQfz222/cfPPNVFZWsmPHDoxGI1lZWVrDqk+fPlojKyYmhtTUVNxuNw6HA5vNRlBQEMHBwSQlJfHzzz9z5ZVXnnNNhw4d0jJlZWVpdfbp04cJEybg5eVFixYttO/etGkTbrcbg8FAcHAwUDOv6pprrgFg9+7d3HXXXYwYMYL//ve/5OfnU1FRwT333APAVVddVe93pAbKnZmGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLK4RPh1GPGkjInM5bu8jiiq110zQxQi2/9Tvb4PXp/jnp8L6J9IpwyZpSx5gt1hgX50S46lIycAlzuU9eNhpr5w42tvhecV281/xU+EU4ZM8pYswinrBllQXeHKWZmZvLQQw/RsWNHkpKSePPNN6msrNQ+z8rK4sorr+THH3/EbrcDYLVaCQwMxO128/bbb9OhQwf69++vHaFXex+Av78/Tz/9NJ06daJr1678+uuvlJSUkJSUpK2LiYlhz5493HrrrbRv355bb71Vmzl1/fXXc/DgQaqqqoiLi+Ojjz4iKSmJjh07MnPmTACaNWtGZGQkWVlZhISEYDAYuPfee7WacnNzKSwsJDk52aOmlJQUbrrpJtq3b8+//vUv/Pxq/tZZUlKSlj8iIoKJEyfStWtXOnXqxKpVq3C5XFx11VV4e3sDkJycjM1m44cffiAtLY3u3bsDcPToUe07KyoqcLvdvPLKK/Tu3Zt//vOfjBs3DoCmTZuesfGnUCgUCoVCoVAoFArFuTJj6S4ycgo8rmXkFDB9ya5LlEihUFzuDL2jLW2iQj2utYmqaZQrFAqFQt/oaoeVzWZj4MCBREVFMXnyZPLy8pg4cSLl5eWMGjUKALvdTteuXcnJyWHYsGEMHTqUvLw8SkpqzqgdPHgwcXFxzJ8/n3vvvZemTZuyfv167HY73t7e+Pv7k5OTw913301YWJjWZOrXr5/mj46Oxu12U1BQwLBhw0hPT2ft2rVYLBY6dOjAtm3bgJrm1+7du+natSvJycnaEYTXX3+95rJarZSVlZGTk8MDDzwAwJw5c4Ca4/xq14WEhFBZWUl1dTVPPfUUGzZsoKysjMjISMLDw7UZVH5+fmRmZnLLLbfQpk0bPvzwQwBuu+027Tl26tSJsLAwhgwZAoDBULMd+oorruCWW24BoFWrVixYsACXy8WAAQMwm83Mnz8fgPj4+At6j2az7vqgl5yGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNV8s59GTDo+dVbW43JCeXcAJezlNQ/0vacaG5BPhVBn1mVHGmi+mMyjAh+fuTyS/qAz7RTyKVM81i/KJcMqYUcaaRThlzSgbumpYLVy4EIfDwZQpU7Rj7Kqrqxk7dixDhw4lPDwcAF9fX+bMmcO4ceMYNmwY/v7+uN1u4uLiGDRoEACdO3emc+fO2Gw2ze9yuThx4gSDBg1i1apVFBQUEBISwvHjxz3W7dmzh7Zt29K8eXOmT5+O2WzG19eX2vlOtaSlpXHffffx888/8+6772pzn4qLi7U1drsdg8FA//79WbZsGQ6Hg5CQEAoKCigtLSUwMBCArVu3cv3111NZWcmUKVOwWCyYTCacTqfHd2ZmZjJo0CDWr1/P+vXrady4MSUlJRQWFnqsmzlzJmPGjPHYZfbCCy9gNte88uuuu45du3YRFBTEp59+itFo1Oo7cuTIeb230zEaDRc8vPJypiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDj1ljHzWMlZP3dUui7K//+o9+eot/fyV/hEOGXMeDF9R/JL2Lc7j4jGFiLCAi6aV8/vRdTvp/RcsyifCKeMGWWsWYRT1oyyoKuGVUpKCt26ddOaVVAzy2n06NFs3LiRfv36YbVaKS4uJjY2ltmzZwOwadMmBg0aRNu2p7b2ent7c99997F27Vqg5tjAqqoqWrduzYsvvsiLL74IwGeffcaoUaNIS0ujZ8+eBAYGcvDgQR566CGt+VWbIysri8OHDxMUFATUNMCefvpp7eecnBx69erF/v37te8sKSkhOTmZ0aNHM3r0aAAmTpzIxx9/rNUUEBBAUVER//znP7npppu07+zcuTP5+flUVlZq32GxWHjhhRe0/Bs3buThhx8mIyPD41leccUVFBcX88orr2A2mxk9ejQrVqygR48eADRp0gQAh8NBVVUVXl5ePPHEE7z//vvk5eXV8w2Cy+XGbi+t9/2XKyaT/gf4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUseaL5fT3Mpz1c4u3kcJCR73coP/nqNf3ItInwiljxovpKylzMnXxTnZmndrt2D4mlMfvbI/Fr/5z5NR7kaNmEU4ZM8pYswinrBkvB6xWv3PedaarhlVWVhZ33XWXxzWr1UpYWJg2wykmJkb7z7XUNms6derkcT02NpY5c+ZQXl6uzWSqbdTUkp2djbe3NwcPHtQ+T09P95jh5Ha7OXHihJbx2muvxWg04ufnpzWSaj8DyM/P17I6nU6aN2/u8Z1HjhzBy8tLWx8WFkZRURHR0dHamuLiYhwOB263m0OHDml5wsLCtCP+ar/TaDTWaTLNnTsXk8nEvffey5IlSwA4fPiw9rnT6cRkMpGamkpeXh4RERFUVVXx9ttvU1lZSXl5Ob6+vtQHNVDuzDSEAX4qo/58IpwyZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVEOnwin3jKGBfnRLjqUjJwCXO5T142Gmnkyja2+FyWv3p+j3t7LX+ET4ZQx48XwffDVzjpz5HZlF/D+VzsZ3r/jBblBvRe9OlVGOXwinCqjPn0yoauGVe3Mp98TFBSkHdmXnJzMtGnTPNZu374dODU7qhar1Yrb7cZms5GYmIjBYKCg4NT/SDudTtasWUNQUBAbN26kY8eO2menHxG4adMm7Ha7dt3b21trMs2YMYNPP/2UgoIC/P39CQoK0uZp1e5mysnJ4cknnyQ1NRWz2UxpaSlWq1X7jiuuuIL9+/ezZcsWnnnmGbKzs7WjAmu/MzExEV9fXxwOB2+88QZLly7F4XBgNptp3LixxzGEeXl5TJ48mZiYGDp37ozRWNO9bNasmbamVatWVFdXc/fdd1NQUIDBYMDf3x+j0YjL5cJut9e7YaVmWNWlIZyHqjLqzyfCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxpovpnNYv/Z88LvdHW2ja3Z3XOj/76j356jn9yLKJ8IpY8aL5RM5R069l4uDynhx0HtGGWsW4ZQ1o2zoqmF1LgwYMIB58+YxbNgwhg4dSl5eHj/88ANGo1GbcQUwcOBADhw4oP3s4+NDUFAQe/bsYc6cObRu3ZoFCxZQVFREZWUlfn5+TJ48mc2bN/Phhx8yZswYAgICKCsr480336R79+78+OOPmq9NmzasX7+et99+mwEDBuBwOFiyZIk2IwqgadOmGAwGfvjhB5o0acLgwYNZsWIF2dnZlJWVaeuuvfZaVq5cydixY7n++uvp0aMH8+bNw+0+7a+gAS1atGD//v188sknPPDAAxw4cICUlBQqKiq05tLkyZNZunQpTqeTqqoqBgwYwOLFiwEoLT11VJ/VasXb25uSkhL69u1LdnY2W7du1b7z9F1c54OaYXV2GsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUQ6fCKceM4aEwOvDriM3v4TcE46LPj8H9P8c9fheRPtEOGXMeKG+v2KOnHov+nSqjHL4RDhVRn36ZEJXDava+VS/x2azaUfvBQUFMWfOHMaNG8ewYcOwWCwkJiayadMmKioq8PHxAWrmS1VVVWEwGLR7IyMjcTqdzJo1i4KCAhISErj99tv5/PPP6dq1K9dddx3NmjXjww8/pKSkhGeeeQYvLy9uvvlm7r//fvr166e5WrVqBUBwcDCLFi0iIiKCV199lTfffJOqqiotu7+/Pw6Hg4qKCmbOnEliYiIPPfQQL7/8MhUVFcCpnU/+/v5s2bKF9PR0/vWvf7Fz505++ukn7TubN2/O/v37CQgIYO7cuURHR/PWW2/x3HPP4XQ6AfD19dWON8zKyqKkpIRWrVpRVFTEt99+y2+//UarVq3w9/enVatWHDhwgIULFwI1DbGKigry8/M95oidD2qG1R9jMun/PFSVUX8+EU4ZM8pYswinjBllrFmEU0TGXdkFHDrhoGWYhTZRoRfsawg1q4z684lwqoz6zChjzSKcAT4mrk4Ix24vu6C5Vaej9+fYEN6LyqjPjBfLJ3KOnHovctQswiljRhlrFuGUNePlgNXaQGdY/dF8quLiYvLz8z1mSsXGxjJ79mzt502bNrFp0yays7OJj48HYN68eUycOJE1a9Zou49iY2PZt28fGzZs0O697777MJlM2n0tW7bEbDZTVVXFqFGj6NevHwDr1q3TMgLaMXvvvfceXbp00XwfffQRR48e1X729fXFYDCwZcsW7Zrdbufll1/Wjg6MjIwEoHfv3rz++uvaupdffpmffvpJ+67aXU8rV670aCiNGTNGa1g1atRIu+50Ojly5AhHjhwBapp4I0eOZNGiRSQkJLB8+XLcbje//fYbbrebqKgo/va3v+Hv74+XV/0Hb6rzOc9MQzgPVWXUn0+EU8aMMtYswiljRhlrFuG8GL68wlJem/szJWWn/nJQgJ+ZVwZeTVhw/Y6zOR091izaKWNGGWsW4ZQxo4w1i3DKmFHGmkU4Zcx4ob6/Yo6cei/6dKqMcvhEOFVGffpk4qI0rA4cOMChQ4c85j6dzj/+8Y9z8iQn151PtWrVKoxGI0lJSR5rMzMzGT9+PGlpaVqDZfny5VrjqXY+VXJysod/6dKlvPHGG6xcuZKCggKcTicul0ubf+Xt7c211/4/9s48Lqp6///PmYFhGRwWRQx3MAEXRE3NjRat1G56M7tZplbmpSK7V/O2+C3TLDNvZmVqWpq7Vje7LmlmlpFmVmqK+wK4gCDIMjAsAzPz+2N+HJ1QbzLzsYOf83o8eiTnfM7zvF7ngzjDez6f98389NNPzJs3jylTpuDr60twcDAtW7ZUikvV/tavX8+UKVNIT08nIiKCs2fPUlVVRXl5Of7+/vj4+FBYWMirr77Kpk2bsFqtREZGAijb71X//9ixYzz66KPs2bMHk8mkHD99+jQtW7ZUCm9Dhw4lLy8Po9FIq1at3Fal9e7dm8DAQMLCwrDZbBQUFBAQEKA809atWytjZ82axfvvv19jHm666aY/NF+aNGnSpEmTJk3XUr8vVgGUlFUxZfGvvPePxMtcpUmTJk2aNGnSJK+SBrVl3poDbr2s2rQII2lQ2z/RlSZNmjRp0nRpeVSwOnXqFP/617/Yt29fjX5L1dLpdH+4YHWp/lTTp09n6NChbv2phg0bxu7du+ncuTOzZs0iJyeHyZMns2DBAsLDw936U40aNUq57q677uKNN95g4cKFDBkyhIYNGzJnzhx0Oh2hoaHKuL///e9s27aNrKwskpOTOXDgAF9//TVt2rRx86vX61m1ahVdunThmWeeYcWKFcp2gEVFRfj7+ysrlVatWsXw4cMB1+ovvV6P1WpVxgLs27ePyMhIkpOTSUlJ4eeff3Y7b7fbAcjNzeXhhx8GYPHixcCF/lTh4eGUlZXhdDp5+umnadGiBf/5z39Yu3YtNptNWa0F8Ouvv2IwGBQuuLYdfOedd/7QfF1OnjbOvR5VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUxv8fadyKtRrKpWSVkVh04V0D6q/iXP/y+pNbNIpoweZcwsgimjRxkzi2DK6FHGzCKYMnr0Ji84yI/nhnUit7AMS7kds7+B8BDPe6to8+IdaR69I7V7lDGzCKasHmWTRwWriRMncvToUSZMmMBNN92krDqqrS7Vn2rIkCGMHTvWbVx2djZOp5P3339f2RqvqqqKSZMm8eGHH1JUVERcXBwLFiygadOmynUOh4PS0lJatmzJhg0b8PHxQafTERQUxIIFC5g0aRLgKgiBa7XV+++/T2RkJI888giLFi1i3759xMfHK7yGDRuSlpZGamoqnTp1IiQkhIMHDyr3dDqd2O12oqKiWLVqFSaTiaFDh7JixQqys7Pdcvn4+KDX63n33Xdp2bIl9913H59//rlSsDKZTDRr1owuXbooxbHu3buzfft2ZUtAcBXShg4dygMPPABAZmYma9eupby8nPz8C5+oKS8vx+Fw4OPjQ3BwME2aNOHAgQNs3bqV+++/v1ZzqNfrPG7YeT2rLjTw0zyqjyeCKaNHGTOLYMroUcbMIpie8rLyT1/xfOb5UhI7N/PoHmrLfC2YMnqUMbMIpoweZcwsgimjRxkzi2B6k7f7yDmO/HSS2OZhdIxp6DWumjOL+l2NbN87IngimJpHOXgimJpHdfJkkkcFq927d5OUlKSsHPKGft+f6lJq1KgRMTExbn2cBgwYwKRJkxg3bpzSd+pSfktLS5k5cyZxcXEAdO/encaNG5OSkqKMS0lJwcfHhwcffJDx48cDrsLTf//7X77//nvi4+MxmVz/0I8cOZLHH39cuXby5MkcPHiQkpISt1Vhq1atIjg4WPl6/fr1ylZ+1cfbtWvHJ598oozZv38/n3/+OadPu35BExYWRllZGVOnTnXrddW7d28KCwuVr4ODg922CRw8eDAdOnRgwIABbiusevXqxdGjR9mzZ49ybOrUqUybNo3BgwdjMBgu+RyvJIfDicVSetXXXe8yGNTfwE/zqD6eCKaMHmXMLIIpo0cZM4tgeosXGXblF/yN6weqpmm4TPMikql2ngim5lGdHmXMLIIpo0cZM4tgepOXk1/K5I9/rtEPc9JjXWkYWvt+mGrOLIopo0cZM4tgyuhRxswimLJ6vB5kNgf84VVnHhWsQkNDqVevnieIWiktLY377rvP7ZjZbCY8PJy0tLQrXgcQFRWlHIuKisJqtZKVlaX0nTp27BhVVVVu43Q6HS1btlQY1QWrgAD3X56Ul5cDrlVN0dHRBAQEYDAY3IpVTqeTiooKdDodAM2aNbsk69y5c27MqKgo8vLyKCoqUngWi4W8vDy34lhUVFSN57B9+3YA2rdvX8PvzTffjMVioUWLFiQkJFBSUkJ+fj7h4eGXfZZXktZQ7vKqCw38NI/q44lgyuhRxswimDJ6lDGzCKanvDbNwwgK8LnktoBBAT7ENQtVXdNwGeblWjDVzhPB1DzKwRPB1DzKwRPBlMXj74tV4NpaeNLCn73SD1ONmUUzZfQoY2YRTBk9yphZBFNWj7LIo4LV0KFDWbt2LcOGDavVapzaymKxcKntB4ODg5Xt8y53ndFoxM/PTzmWmJjI+++/j9PppFu3bgQFBVFSUoJOp6Nnz56X5UdGRgLw+eef8+GHH5Kfn09sbCxnzpwBLvSdql+/PsePH+exxx5jz549+Pr6Eh8fT3l5ufLMjEYjOp2Oo0ePMnDgQNLT04mMjCQkJASj0YjD4frm7tWrF3q9nrFjx3LkyBGsVitBQUE4HA4GDhzolmnu3LkMHz6c1NRUAgIClEyDBg1SxjVr1ozx48fTpk0bdu7cydy5czlx4gRGo9Gtp9fVSuthVVN1YT9UzaP6eCKYMnqUMbMIpoweZcwsgulN3qTHujJp4aU/Fe3J6w81ZxbFlNGjjJlFMGX0KGNmEUwZPcqYWQTTWzytH6bmUW08EUzNozo9yphZBFNWj7LJo4JVixYtcDgcDBo0iPvuu49GjRpxqcLVnXfe6clthGrAgAHMnDkTgOHDh3P+/HlWr15NcHCw26qlkSNH8ttvv9G5c2fAVWQCOHDgAH379qVTp04sWrSI8+fPu/GrC1u//PILjz/+OCUlJSxbtozAwEC3vlM6nY7z588TEhLCuHHj2Lx5M7t27XLb9rBRo0a0aNGC7du3M3jwYIqLi9m8eTM6nY4HH3zQLdM777zDoUOHGD16NCtXrsRmsxEVFaVkOnz4MOvWraNfv37YbDY++eQT/Pz8qKioAFyrwGojrYfVlVUX9kPVPKqPJ4Ipo0cZM4tgyuhRxswimN7ghYaaWPna3ew5co7DJ/Ol6jshiimjRxkzi2DK6FHGzCKYMnqUMbMIpqc8rR+mGKaMHmXMLIIpo0cZM4tgyupRFnlUsBo7dqzy5zfffPOSY3Q6HYcOHfLkNjVkNpvdejRV6+Kt8i53nc1mo6KiQllltWHDBnx8fKisrGTp0qWYTCaCgoIoKioiJydHKfA4HA4cDofCr96+r3Xr1uzfv5+UlBRiY2OprKykoKBAGVdQUABAly5dWLRoET4+PvTo0YNt27a5eTUYDNSrVw+DwcCMGTOIjIwkPj6eAwcOKOOys7NJT0+nR48ebN68meLiYiIjI8nPz2f9+vWMHj1ayWQ0GomJiWH27NnY7XYSEhLYt2+fkqlBgwaYzWbmzJlDdnY2Op2O4OBgAgICKCws5NSpU0RHR1/13Gg9rC4tg0H9+6FqHtXHE8GU0aOMmUUwZfQoY2aAddvTOXyqkDbNQ7i7R0uPeSI8Bhr13Ng0FJNRX+u+VRerLsyL5lF9PBFMzaM6PcqYWQRTRo8yZhbB9BZP64epeVQbTwRT86hOjzJmFsGU1eP1ILP5GvWwWrJkiSeX11qX6tFUXFxMbm6uW9+pS10HkJ6eTmxsLAApKSnccMMN2O12vv32W8BViNuwYYOykglcWW+++WaFkZeXB0Dv3r157rnnlHs89dRTbNmyRRlXWFgIwMyZM5XCk9PppH379kofLJvNRlVVFREREaxbt05hrV+/nmeffVZZZbVt2zacTiejRo3i6aefZsiQIbz++us8/fTTpKSkKAWrlJQUevbsSbNmzdi7dy8LFiygbdu2dO3aVcnUoEED3n77bU6dOsXAgQNZtWoVixYtYseOHVczFZeUtj/n5VUX9kPVPKqPJ4Ipo0cZM4tgyuhRlswHM/J5a9VvytepJ87zybcneO6hBGKbhXno0DseS8oqmb/2APvT85Vj7VqGkTSoLSZ/X08tqnJeRDNl9ChjZhFMGT3KmFkEU0aPMmYWwfSUp/XDFMOU0aOMmUUwZfQoY2YRTFk9yiKPNlPs2rXrH/rP20pMTOTHH3/EYrEox7766iv0en2NvlMXq1OnTgQFBbFx40bl2IkTJ8jPzycxMVE51qdPHwD27NmjHNuxYweFhYXccsstAJw+7VpKvnv3brd75ObmAtCgQQMAysvL0el0fP3118oYi8WC3W6nXr16AJw6dQqn08nJkyfdMlXfo2FD11Y3aWlpBAcH8+yzz3LzzTczefJkAKKjo90KeGlpaVgsFhYtWsS0adPo3r07ZrOZ8PDwGoW+119/nUGDBikFvOLiYsxmM82aNbvsc9SkSZMmTZo0XX+6uFh1saavuPTxP0Pz1x7gYEa+27GDGfnMW3PgT3KkSZMmTZo0aapLennkTQQFuH92OyjAh5dH3vQnOdKkSZMmTZo0XSyPVlhdrOPHj5OZmQlA48aNadWqlbfQNTR06FAWLVpEnz59KC8vx2g0YrPZuP/++2v0ncrKymLz5s0A+Pn58fe//513332XFStWUFZWRmVlJT4+PowaNUq57q677uKFF15g9erVrFmzBr3eVdfr1asX8fHxgKvo5OPjw969e+nWrRslJSWYTCaKiooA1/aE/v7+lJWV0bJlS1599VWmTZuGzWbD398fg8GgrJyqvsbPz69GJnBtqwhw7tw5iouLMZlMHDx4kA4dOhAZGUlUVJSykgtcq7p++eUXBg4cSJMmTRg2bBi//vorwcHB5OTkKOO6dOmiFMhWrVqlHO/Xrx++vrX/lLInTc+vV9WFBn6aR/XxRDBl9ChjZhFMGT3KlHnND2lXPL/xp5Pc06t22wN6y+PZ81a3lVXVcjhhf3o+eZZyGoUF/qkeRfFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9CbvhgZBzHn2Vg5m5HMq10qzcBNtWni+klzNmUUxZfQoY2YRTBk9yphZBFNWj7LJ44LVN998w7Rp05RiVbWaNGnCCy+8oKxW8racTqfbny/+uloOhwO73X7Jay++5lLX/hFVF5L+yPV/5H5Op9ON+ftx1SuzLBaLUmg6deoUp06dqsEBWLt2LWvXrlWOFxUVcfbsWQAqKiooLXX1mvL19UWn0+Hn54evry8TJ078n3kuJ71eR2ioqdbXX++qCw38NI/q44lgyuhRxswimDJ6lCHz0TNFVzx/+HQhIzz8991TjyeyS6543mpzePwaRG3zci2YMnqUMbMIpoweZcwsgimjRxkzi2B6k9cz1MTl9+epvdScWRRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9//33PPPMM0RGRjJ27Fiio6MB1zZ7n376KWPGjOGDDz5w227PG1q1ahUVFRV89913yiqlTz75hMmTJ/PUU08pq6yWLl3qdl1FRQXz589n9OjRjBs3DoDu3btjs9lYsGABkyZNAmDTpk1UVlZy//3389prrwGu/lGjRo1i3759xMfHYzabqayspEOHDnz66afKPYYMGUJqaqrSryogIIAjR44wefJkHnjgAcC1Aqp79+7KqqjqsTabjZSUFCXTnDlzePfdd5UCVKtWrdi2bRv9+/dnxowZyj0TExPJz7/wiePQ0FCGDBnCmDFj+Mtf/kJSUhITJkwgKCiIhIQEABYvXozJZFLuCTB58mQOHTqEr68vNpsNo9F41XPjcDixWEqv+rrrXQaD+hv4aR7VxwM4V1hGcbkds7+B8BDP/7HT5kWOzCKYMnqUKXPrJsGknjh/2fOxTUP+9Cbkgb66K543GfV/ukdRPBFMGT3KmFkEU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9Xg8ymwP+8KozjwpWc+bMISYmhuXLlxMYeGELlj59+vDwww/z0EMPMXv2bK8XrFJSUujevbtS2AHo378/r7zyCtu3b2fw4MGXvG737t2UlJTQv39/5VhUVBTnzp1TijYAW7ZsAVw9r6rVs2dPQkJC+P7774mPj1d6PHXu3NntHtX9pvLy8mjSpAn+/v44nU769eunjAkODsZgMFBcXAxAs2bN0Ol0NGvWzC1T9T2q+2KFhIRgt9uVPlrVCgkJ4dy5c0qRKSoqirS0NBYsWIDZbGbw4MFMmDCBkpISoqKiAFefq+qtCLt06eLG69KlC5MmTeLBBx+85HP8X9Iayl1edaGBn+ZRHbySskrmrz3gtv1Vu5ZhJA1qi8m/9lt2VkubF3UyNY9y8EQwPeXd3b0Fn39/+W0B+9/c/E9vQh4eHEC7lmEczMjHcdEidL0O2rQIo4HZ/0/3KJongimjRxkzi2DK6FHGzCKYMnqUMbMIpoweZcwsgql2ngim5lEOngim5lGdPJnk0WaKR44c4a9//atbsapagYGB3HvvvRw5csSTW1xSaWlpSuGlWmazmfDwcNLSLvyy5cSJEzz66KMkJCTQs2dP5s2bB+B2bWJiItnZ2WRmZlJeXg7A3r17AZg5cybx8fE88MAD7N27l5YtWyr8+vXrA7BhwwY6duxI165defHFF0lNTVU8AkoB6oEHHqB9+/bcddddTJ8+ncrKSqxW16eAjUYjBoOBzMxMevbsSUJCAo8++ijr16/HYDBQUFAAXCiGvffee0qmKVOmkJGRgdPp5PTp00qm7du3M2/ePO666y5iY2MB1xaGPXu6Fr0PHDjwks/WYDCwZMkSbr/99quYEU2aNHlb89ce4GBGvtuxgxn5zFtz4E9ypEmTputdzz2UcFXH/wwlDWpbo89EmxauYr4mTZo0adKkSZMmTZo0adKkqW7LoxVWfn5+yiqdS6moqAg/Pz9PbnFJWSwWzGZzjePBwcGKn6KiIkaOHEmLFi2YNWsWOTk5TJ48Gb1e7+Zp6NChfPTRR9hsNjZv3ozNZiMrKwuAUaNGKSvIHnzwQXx9fZXiXPXqqOzsbO6//37Cw8NZsGABVVVVyv3B1UcLID8/n+TkZPbv38/ChQuJiIhw28avqqqKqqoq2rVrR2JiIsuXL+fs2bM0bNhQYVVUVACQlZXF8OHDAde2hz4+Pm73HDp0KLNnz8ZkMvHxxx8TFBRESUkJCQkJynaJ1au3EhMTSUxMpLKyko8//phz586xf/9+unXrVuv58fHRmsr9XnWhgZ/mUT28s+etbiurquVwwv70fPIs5TQKq/lBgWvpUSRT7TwRTM2jOj3KlrldVAOWvNSXL39M5+DJQto0D+HuHi095nrTY3CQH88N60RuYRkWL2+XevH/1cYTwZTRo4yZRTBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT86hOjzJmFsGU1aNs8qhg1a1bN5YsWULv3r3p2LGj27m9e/eydOlSZUXPtdaqVauwWq28//77yiqnb7/9li1btpCTk6MUboKDg0lOTuaNN97g//7v/zCZTDidTmJjY3nkkUcA17Z/nTt3VopRAD///DMAAwYM4IcffiA/P58mTZq4rfAC1yo0o9FIt27dmDdvHj4+PjRt2pSSkguNw7OzswHo0KEDhYWFvPvuu0RERGA0GrHb7cq4HTt2ADBo0CDWrVuH1WqlefPmpKenu90zNTUVnU6Hn58fOTk5+Pq6tg+7/fbbazyn++67T9mu8OjRo2zevJm5c+cyYsQI5bqrkV6v87jh+fWsutDAT/P45/NOZJdc8bzV5vD475k2L+pkah7l4IlgepP38N3tvMa6WN70KOq1hprnRRRTRo8yZhbBlNGjjJlFMGX0KGNmbzIzc0s4eiiHyAYmIsODvMKsltqfo5rnRRRPBFPtPBFMzaMcPBFMzaM6eTLJo4LVv/71L4YOHcpDDz1EfHw8LVu6PoWbnp7Ovn37qF+/PuPHj/eK0YtlNpuVFU4Xq6ioiODgYODSfa46d+7Mli1b2Lp1Kw888IByPDAwEJ1Ox88//8yePXt45JFHCA8PV84bjUYeeughVqxYofAPHHBtyzV48GBmzpwJgNPppEuXLhQXFxMcHIzNZiM3N5eAgABmzZql8LZs2cJTTz1FaGgoANu2bQMgISGBCRMmKOOefvpptm7dqtzz2LFjAIwePZo33ngDcK02q+5BVT3utdde49577+WLL75g6dKlrFixgi+//JKqqqrLrk4DmDZtGrfccgv//Oc/OXXqFNHR0VeYhUvL4XBisZRe9XXXuwwG9Tfw0zyqhxfoq7vieZNRT0GBtVZsbV7kyCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYMnqUMbM3mSVllcz9IpXUtAu7T7SPCuOpe9tjCvCsr6/an6Oa50UUTwRT7TwRTM2jOj3KmFkEU1aP14PM5oA/vOrMo4JV06ZNWbt2LfPmzSMlJYUNGzYAEBkZyYgRI/j73/+u9HrypqKiomqsZCouLiY3N1fpT5WWlsZ9993nNqZNmzYA/Pbbb24Fq7S0NCIjI/H391e4ubm5Ne5ps9mUrfTOnTuHTqcjLS2N3r17A64eUeHh4RQXFxMVFcWpU6dwOByUlpa6FdOqC0HVRbG0tDR8fHzIzMx0u2eTJk2orKxUMlV7uriHl9lsJigoiNLSUpo2bQq4CobVq66GDRum8N59913effdd9u3bpxybNGkSY8eOJSQkhD59+tRYKVcbaQ3lLq+60MBP8/jn88KDA2jXMoyDGfk4nBeO63WuXi0NzP4e+9XmRZ1MzaMcPBFMzaMcPBFMGT3KmFkEU0aPMmYWwZTRo4yZvcGcszq1Rl/fA+n5zF6dyrgHEjx055Lan6Ma50U0TwRT7TwRTM2jHDwRTM2jOnkyyaOCFUD9+vWZMGGC28og0UpMTGTu3LkMHz6c1NRUTCYTsbGx6PV6ZQvCS60k6tSpE3q9nh9++IFbb72V/Px8YmNjycrKom/fvsp1BoOBI0eO8Nhjj7Fnzx58fX2JjIwEXKugAEpKSoiIiOCTTz7h888/Jz09ncjISM6fP09AQABNmjRh165dgKuQNXbsWI4cOYLVaiU2NhaAG2+8UblnvXr12L59u1umoCDXUvfqTFarldDQUObPn8+///1vsrKyaNmyJTabjYYNG2I0GgF48MEH+fTTTwkNDaWoqAiDwUB5eTlDhgxh4MCB+Pr6YjQaMZlMFBQUAK4eW5999hmfffYZgHK8NtJ6WNVUXdgPVfOoLl7y4PbM+d2nCdu2dH2a0JO/Y9q8eEeaR+9I7R5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzN5iiuzr6y2PdYkngimjRxkzi2DK6FHGzCKYsnqUTR4XrP4MDRgwgHfeeYdDhw7xxBNPcPLkSVavXk3r1q2V3lSAUoDZvHkzAH5+fvj7+5Obm0vfvn3p1KkTixYtIjc3l/79+yvX6fV6DAYDv/zyC48//jglJSUsXboUuLBKC1wryXbv3k10dDTjxo1j8+bNZGRkKMWtat1www1s376dwYMH07x5c+bPnw/gtprJZDJRVFRUIxPglql58+b89ttvdO7cmaFDh/LZZ59hs9lo0qQJABUVFaxfv57IyEhGjx5NixYtmDFjBnv37uXnn3/m9ddfB6Bhw4ZERESQmJhIZGQkNpuNdevWKT23qgtzVyuth9WVVRf2Q9U8qoMXGgpTk3uTlVtCVp7V6/u1a/OiTqbmUQ6eCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyuhRxsyeMq9FX19Q/3NU27xcC54Iptp5IpiaRzl4IpiaR3XyZNJVFaxefPFFdDodU6ZMwWAw8OKLL/7Pa3Q6HVOnTq21wUtpw4YNGI1GYmNjmTt3LiaTiZ49e7Jjxw5ycnKIiIjAbDZTWVmJXn+hmllRUUFpaSk33HAD+/fvJyUlhdjYWBwOBxs3bqRbt27KdeBa2bRo0SJ8fHyIjo7m+PHjnDp1SuGfOnWKqKgoDAYDM2bMIDIyksDAQCwWC3Chp1RWVhY9evTg+++/x2q1cuONN5KamsqhQ4cA17Z+BQUFNTI1adKEM2fOuGVKT08nPj6evLw8ZsyYQcuWLTEYDOTk5ACwePFigoOD+fTTT/HxcU1v06ZN2bt3L6dOnWLPnj1KoSwmJobNmzeTl5eHTqejVatWgGvLwuprr1ZaD6tLy2BQ/36omkf18QCC/AzcFBeBxVJW675VF0ubFzkyi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKaMHutC5nOFZRSX2zH7GwgP8c4v17zhUWRfX1D/XNeF7x0ZPcqYWQRTRo8yZhbBlNXj9SCzWVAPq507d6LT6XA4HBgMBnbu3Pk/r9HprvwiozZKSUmhZ8+ezJkzRzlmsVjo2rWrspIpKiqKkJAQZs+erYzZtm0bAEOGDOHpp59Wjr/xxhvKKqzq3lAtWrRg4cKFbmNOnDjBTz/9RJcuXWjRogWpqamMHj2aRx55BACn00nnzp0pKSnhzJkzNGvWDIPBgN1u55133lEKWN9++y1PPvkkx48fV+5ptVpJTEzkww8/VO75xBNPcObMGSVT48aNSU1NJSkpSdnCsLi4mJtuuomsrCxsNhtpaWmcOXOG7t27X/LZrVmzRilYvfPOO27n1q5dy7/+9S+lcFVbaftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXmUg+cNZklZJfPXHnDbdq9dyzCSBrXF5O/rDYseebwWfX099VgXeSKYMnqUMbMIpoweZcwsgimrR1l0VQWrb7/99opfXyulpaVx3333uR0zm82Eh4eTlpYGuPpcffDBB269rDZs2ADAoEGDalybmZlJhw4dMJlM6HQ6t5VZlZWVbN68mfr16yv82NhY1q1bx65du1i0aBH5+fk0adIEq9WqeGzSpAmNGjUiJyeHl156iW3btuHr60twcDAhISFkZmYC0KtXLwDy8vIYOHAg6enpREREkJOTg8lkUu7ZqlUrUlNT2bhxI6+88gpWq5XIyEj0ej12u53Tp08zevRo7r33XrKysli6dCnHjh3D6XSi1+upqKjgnnvucctusVh47733+Oqrr8jNzQVwy65JkyZNmjRp0qRJkyZNmjRpur40f+0BDmbkux07mJHPvDUHGPdAwp9j6ndKGtSWeWvci2ptWriKapo0adKkSZOm61Me9bDKysoiLCwMf3//S54vLy8nPz+/Rk8nT3VxEepiBQcHU1RUBMDQoUNZunQpycnJJCUlkZOTw+bNm9Hr9TRt2lS5ZtiwYezatQuA1157jYqKCv7v//6P9PR0Fi9eTOvWrVm5ciWFhYW0a9dO4bdv3x6Ar7/+miFDhtCwYUMWLlyIXq/H4XAo41q1akVmZiY7d+4kKSmJAwcO8PXXX9OwYUMKCgoAaNSoETqdjoMHD9KlSxeeeeYZVqxYgc1mIyQkRGElJCTwxRdfsGHDBkaMGAHA0qVL8fX1paKigqKiIjp16kSDBg149tlnadGiBXPmzOHdd98lNTWVJk2a0LlzZwCmTZuG3W7nm2++wdfXl4SEBDZv3kxQUBA33nijR/Pj46MVvH6vutDAT/OoPp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimDJ6VGvms+etbkWgajmcsD89nzxLOY3CAv9UjwDBQX48N6wTuYVlWARsW3jx/693ngimjB5lzCyCKaNHGTOLYMrqUTZ5VLDq06cP06dPr7Fqp1rffvstzz77rNKr6VoqODiYxYsXM2XKFJKTkzGZTLRr1459+/a5jcvOzsbpdK0v79q1KxEREbz11lsUFBTw4YcfUlRURFxcHAsWLOCDDz5Q+ltVX3PDDTewYcMGfHx86N+/P9u3b+fcuXMKv7y8HIDQ0FDef/99IiMjeeSRR1i0aBEGg0EZ53Q6ldVUqampdOrUidatW7N9+3ZlTPW9mzRpwqpVqzCZTAwdOpTPPvvMLdOqVauwWq28//77mEwmXnzxRfR6PVlZWUo/rOjoaN555x3Onz+PwWCgsLAQgAULFpCQkFDr567X67zS+PR6VV1o4Kd5VB9PBFNGjzJmFsGU0aOaM3/yzRH2Hs2lY0xD7u/T2ivMasn0HOsKTwRTRo8yZhbBlMmj9rNW86g2nqfME9klVzxvtTm88r7eW7lF/o5B7XOttu+da8ETwVQ7TwRT8ygHTwRT86hOnkzyqGBVXbS5nCorK4VsL2c2mykuLq5xvKioSOkTBRAdHc2iRYuUr5cvX86uXbuoqKjAz88PcK1uCgoK4siRI8q1N9xwAwUFBYwbN47Bgwe78W+44QYAzpw5A8Df//53HnroIWXMiy++yOrVqxVWdUFo06ZNyhin08nKlSuVgpXNZgMgPj7eze+WLVvYunWr8gxPnToFwIwZM4iPj1fGHTp0iN27dyv3TElJoXv37gQHB/Pcc89RWlrK0qVLefjhh5V+WPfffz/vvvsu//jHP3jyySd56KGHKCkp8ahYBeBwOLFYSj1iXI8yGNTfwE/zqD6eCKaMHmXMLIIpo0c1Zz6Qfp43l+9Rvk49cZ4lGw7x4sOdiGsRpgqPongimGrniWDK6FHGzCKYMnnUftZqHtXG8xYz0PfKvcZNRj0FBdZasUGe51iXeCKYMnqUMbMIpoweZcwsgimrx+tBZnPAH151dtUFq5KSEiwWi/J1YWEhWVlZNcZZLBY2bNhAeHj41d7ifyoqKkrp61St4uJicnNziYqKuuJ1AOnp6cTGxgKuXlONGzcmMjJS2drwxhtv5OjRo273cDqdpKen07NnTwClV1VZWZnbPQICXNXTxo0bK+ftdrtbMU2n0+Hv768UrKoLUb9nNWrUCEDxVX3+4hVcF9+zeqvD6h5fb775Jhs3buTDDz/kpptucuvxdebMGXJzcwkNDeWRRx5h165d+Pv789JLL/Hiiy9iMtX+E0xaQ7nLqy408NM8qo8ngimjRxkzi2DK6FGNmS/+BerFemPZbha+cHutuRdLhudY13gimDJ6lDGzCKYMHrWftWKYMnpUW+bw4ADatQzjYEY+jos+h6zXuXpENTD7e8Xv9f4c6yJPBFNGjzJmFsGU0aOMmUUwZfUoi666YLVo0SJmz54NuAovU6dOZerUqZcc63Q6+ec//+mRwUspMTGRDz74wK2X1VdffYVer1cKSpdSp06dCAoKYuPGjUrBqqioiKqqKu6++243/po1a5RVVACff/45hYWFfPjhh6xatYrmzZuj0+nYsmULo0aNUsYdPnwYgKCgILd733HHHZSXlxMXF8eYMWMoKSmhSZMmigeAffv2kZCQgNFo5I477qBZs2YAbj23DAYDL7zwAhUVFURGRvL3v/+djIwMdDodRqMRuFAszMzMxNfXl/nz5xMREeHW4ysvLw+A119/XVnB5XQ6Wb16NcXFxbz77rt/bDIuIa2HVU3Vhf1QNY/q44lgyuhRxswimDJ6VGvmNT+kXfH8xp9Ock+vlrXmy/Ic6xJPBFNGjzJmFsGUxaP2s1bzqEaeN5nJg9sz54tUUtPylWNtW4bx1L3tPX5PL9NzrCs8EUwZPcqYWQRTRo8yZhbBlNWjbLrqglXPnj0JDAzE6XTy73//m7vvvpu2bdu6jdHpdAQEBNC2bVvat2/vNbPVGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOJSIiQhk3cuRIsrKy2Lx5MwB+fn4kJSUxa9YswsLCaN26NQ6Hg/Lycrei01133cWLL77Ijz/+yHfffcf58+d5+eWXCQ4OZsaMGeTk5DB58mQAfvvtNyZNmkT//v3ZuXMnu3fvdvNavRKroqKChx9+mD179jB69GgMBoOygqqqqkoZ36RJE26//XZWrlypXFu9Mis3Nxe73U5xcTGDBg3C6XQyYcIEdDqd29aLVVVVZGZmkpCQQP/+/fniiy946KGHCAgIUHpqORwOZWz9+vVp1qwZTz/9NMuWLeOrr77i9OnTboWyPyqth9WVVRf2Q9U8qo8ngimjRxkzi2DK6FFtmY+eKbri+cOnCxmhor4TongimGrniWDK6FHGzCKY17tH7WetOKaMHtWYOTQUpib3Jiu3hKw8K5ENTESGB/3vC69CMjzHusYTwZTRo4yZRTBl9ChjZhFMWT3KoqsuWHXs2JGOHTsCri3q7rzzTlq39m7j2f+l4OBgFi9ezJQpU0hOTsZkMjFkyBDGjh3rNs7hcGC3292OjR49GqfTycKFC8nPz0ev19O/f3+34oyvry/16tWjXr16jBs3Drvdjk6nY/Xq1cqqqB07drB+/XomTpzI8uXL+c9//kNkZCSDBg1izZo1BAcHU1FRgcViISoqiltvvZX//ve/WK1WfH19CQgIoH79+gCkpqYC8PTTT/PLL7+wePFijEaj4r26YHXs2DF8fHyYNGkSH374IVlZWdSrVw+Hw6FsC5idna3k+O233/jtt9+UrwsKCpQVVtXbOnbr1o0dO3bw1FNPcdddd9GmTRv69u3LsWPHalWw0npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGtmVs3CSb1xPnLno9tGqL1nbjOeCKYMnqUMbMIpiwetZ+1mkdv8H7Ym8WxzCJaNwmmV3ykxzzwvscgPwM3xUVgsZR59D19sdQ+LyKYaueJYMroUcbMIpgyepQxswhmXfB4ID2f03lWmoWbaONhz9PrSWazwB5WF+vpp592+7q4uJjAwECqezOJVHR0NIsWLbrimKVLl9Y4ptPpSEpKIikpCYBhw4YpK5mqVVxcTH5+PuPHj2fw4MEMGzaM4OBgpVgFMGDAANavX8/58+dZt26dcnzatGlKP6wdO3bgcDhwOp08//zzPP/88wBMnTqVpUuXKj21Dh06BLi2EazO5HQ66dy5M1arlaioKGw2G9nZ2djtdu68807uv/9+ALZs2cJTTz1Fq1atANi2bRsAt9xyC/Pnz1d8JSUlsXXrVvr16wfAzp07AYiLi7vkc6yoqLjis72StP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPast8d/cWfP795beq6n9zc63vxHXKE8GU0aOMmUUwr3eP2s9acUwZPKaftTB16a9U/95r6+5MFqw/yEsjb6J5hFkVHkXzRDBl9ChjZhFMtfNEMDWPcvBEMDWPtVNOQSmvL/mVkrILO6kFBfjw8sibCA8J9IZFaeTxZoqpqamMGjWKDh060K1bN37++WcA8vPzefLJJ5XCiFqVmJjIjz/+qKw4gpr9sNLS0pTiUrV69eqFTqcjJSVFOVZZWcnXX39NYmKich1ARkYGGRkZyjidTofD4eDmm28G4OTJkzRo0IBNmza5jTEajQQFBdGkSRNOnTqF3W5Hr9fz9ddfK+MaNmwIoBSs0tLSCAwM5JdffnHL5HS6OqlWZ9q/fz/h4eGsW7eO7t27065dO4YOHcqnn34KUGObR02aNGnSpEmTevTcQwlXdVyTJk2aNF29tJ+1mmqri4tV1bI74LXFv/45hjRp0qRJkyZNQvX7YhVASVkVU7R/+69aHq2w2r17NyNHjiQiIoKBAwfy2WefKefCwsIoKSnhk08+oVu3bh4b/b1OnDjBa6+9xp49ezCZTAwaNIh//vOfGI3GK17ndDr58MMPWbFiBfn5+dx44434+fnV6Ic1cOBAXnvtNbZt20ZpaSkrVqxg48aNbNmyBXD1wwoLC2Pv3r307t2b/Px8ZRu/6n5YFosFX19fmjdvztChQ7Hb7ZSVlSnFo2bNminj4uPj2bp1K4mJiRQUFGAwGCgrK1O2W6zeyu/222/ntddeY+bMmRQVFeHj45rChIQEhdWgQQNKSkro27cvdrudqqoqKioq0Ol0So+v3NxciouLKS8vx8/PD7vdTmpqKnv27OHOO+9UvNVGnjZovR5VFxr4aR7VxxPBlNGjjJlFMGX0qObM7aIasOSlvnz5YzoHTxbSpnkId/do6Q2LUj3HusITwZTRo4yZRTBl8qj9rNU81kZb95ypUayqlt0BP+4/S2JC41rz1ZhZNFNGjzJmFsFUO08EU/OoTo8yZhbBVKvHfSfyahSrqlVSVsWhUwW0j6pfa75s8qhgNXPmTKKjo/n0008pKSlxK1iBqz/SF1984ZHBS6moqIiRI0fSokULZs2aRU5ODtOmTaO8vJyJEyde8doPP/yQ9957j/HjxxMTE8Py5cs5ceIElZWVSj+swYMHs337dnQ6HTNmzCA5OZmysjKqqty/8fz9/XE6nRQXF6PT6QgKCuLcuXMcOHBA6f+k0+mIi4tj48aN6PV6fH19MRgMFBcXu21FGBwcTFBQEBaLBYfDQb169SgvL3dbJQXQunVrvvnmG3Q6HXq9Xhl37tw5t3F2ux273U5FRQX+/v74+flRVFTEkSNHiImJwel0Ul5eDri2IiwqKsLX15eqqqoaWyRejfR6HaFeaD58vaouNPDTPKqPJ4Ipo0cZM4tgyuhRzZkfvrudVziXkkzPsa7wRDBl9ChjZhFMmTxqP2s1j1ejtLPFVzx/PMvCoNs87wOupszXiimjRxkzi2CqnSeCqXmUgyeCqXm8emXln77i+czzpSR2rv3iENnkUcEqNTWVcePGYTQa0el0Nc5HRESQl5fnyS0uqVWrVmG1Wnn//fcJCQkBXAWayZMnk5SUpKwi+r0qKiqYN28ejz32GI888ggAnTt3pl+/fsTGxrJq1SoA1q9fz+LFi9mwYQNRUVGEhIRw8803s2HDBvbt20d8fDzgWqXUsGFDfvjhB+Uezz77LO+99x79+vXDbDZjs9n48ssvmTRpEg888AAAixYt4o033uCrr77iqaeewmw2c/DgQaqqqti6dauSqW/fvpw5c4acnByCg4MB+Pzzz/nLX/7CjBkzANd2g3fddRffffcdTz/9NGazmYKCAoqLi9myZYvSd+vf//43H330Ed988w0xMTGYzWb0ej19+/Zl1qxZiv/evXvz888/U1lZia+v71XPjcPhxGIpverrrncZDPI1GZTRo4yZRTDVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEcy64PFcYRnF5XbM/gbCQ2r3i6uoG+qx9QrnW0WaKSio/YdEZZwXGT3KmFkEU+08EUzNozo9yphZBFOtHiPDrvyaoXH9QI/+7b8eZDYH/OFVbB4VrHx8fHA4Lj+ROTk5BAZ6v6lYSkoK3bt3Vwo7AP379+eVV15h+/btDB48+JLX7d69m5KSEvr3768cMxqN3HHHHWzevNmNHxMTo/StioqKwmazERISwvfff098fDznz5/HZrPRpUsXt3sMGDCA9evXc+bMGeV6h8NBv379lDHZ2dn4+/uzY8cOnnrqKaKiovj666/p0aOHksnpdGKxWHA6nWzfvp2//OUv+Pj4kJOT4+a/uk/W4cOHsdlsREVFUVJSAkC9evWUcSdPnkSv1yvbEbZs2ZJ9+/YpPa2qFRwczLlz5/jtt99qZPuj8nbTu+tJMjQZFM0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY9y8EQwNY+1U0lZJfPXHmB/er5yrF3LMJIGtcXkf3Uf5OzVPpLFGw9fcltAgx56tLvBK/llmBfRPBFMtfNEMGX0KGNmEUwZPcqYWQRTbR7bNA8jKMDnktsCBgX4ENcsVPt9+VXIow0fO3TowKZNmy55rrS0lNWrV9e66HElpaWlERYWxqOPPkpCQgI9e/bkgw8+IDw8XCngXO46gO+++45bb72V+Ph4HnjgAYxGI1lZWcoWeWlpaURGRjJmzBg6duxIamoqW7dupUmTJgqjejVWeHg4AwcOpH379tx1110cP35cYXTq1AlfX18CAgL44IMP6NmzJx06dGDFihU0btxYYSUmJlJWVoavr6+SqWvXrhQVFREaGkpaWhpGo1HpZ3X06FHuuusu2rdvz3PPPUdoaChVVVWcPn2aXr16KVsP9u/fnw4dOhATE8N3332Hj48PgwYNAqBHjx4Aygq47OxsEhISOHbsGODqEaZJkyZNmjRp0qRJkyZNmjRd75q/9gAHM/Ldjh3MyGfemgO14r008iZ+/yFig951XJMmTZo0adJ0/enlkTcRFOC+NigowIeXtX/7r1oerbB65plnePjhh/n73//O3XffDcCRI0c4c+YMCxYsID8/n6eeesorRi9WUVERGzZsoE2bNm49rPR6PUVFRZe9zmKxYDAYmDNnjlsPq8WLF+N0OikqKsLf35+ioiJOnTpFREQEM2bMID8/n5dffpnDhw/jcDj4/PPP+eijjwBYsmQJ999/PxMmTGDChAm8/fbbikc/Pz9iY2NJTU1l+fLlDBs2jL1797Jnzx7Onj2LzWYD4K677uLZZ59l06ZNtGrVihEjRrBq1SoMBgMOh0PJdOutt3Lw4EHeffddBg4cSIcOHVizZg16vV65Z3R0NIMGDWL16tUUFRUpfbfsdjvz589XemsNHDiQCRMm8PHHHxMVFcXSpUuprKx0e8a1lY+P9xrfXS+SpcmgSJ4Iptp5IpgyepQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2Cq1ePZ81a3lVXVcjhhf3o+eZZyGoVd3c4x0Y1D+HhCX7bty+LomSJaNwmmV3xkrT1eLFnmRSRPBFPtPBFMGT3KmFkEU0aPMmYWwVSzxxsaBDHn2Vs5mJHPqVwrzcJNtGkR5g2L0smjglWHDh2YP38+kyZN4vnnnwdg2rRpADRr1oz58+cTGxvrucvfyel0YrPZavSwmjhxImVlZZe9rqqqCrvdzuOPP+7Ww+qWW26hoqJCGVdaWkpRURGrVq1y29bv5Zdf5tChQ8yYMYPbbruNL7/8klatWvHqq68C0LhxY86fP6+s1AJo2rQpqamp+Pn5sWzZMuLi4pg/fz5PPvmksp2ir6+v0gMsMzOTlStXcscddxATE8PUqVOVTC1btgQgKCiIjRs3EhkZyeuvv86SJUs4cuSIcs9nnnmGb7/9FqvVisFgwG6306VLF5599lmWL19OdHQ0er2eESNGsHjxYp5//nlsNhsNGzbk3LlzAJfsSfZHpNfrCA011epaGXS9Nxm8FjwRTLXzRDC9ycvMLeHooRwiG5iIDA/yGlfNmb3NFPUMQa7nWFd4IpiaRzl4IpgyepQxswimjB5lzCyCqTaPJ7JLrnjeanPU+j3uPbfcWKvr/oiu93m5FjwRTLXzRDBl9ChjZhFMGT3KmFkEU80ee4aa6Pm/h2m6gjwqWAF0796dTZs2cejQITIyMnA6nTRt2pR27drVuujxv6TX64mMjKzRw2rixIlYLJbLXle9aqhPnz7KMaPRSGxsLD/++CPBwcGAq7BVr149pVgFcP/99zNp0iSioqJYv349hw4d4ssvv6Rjx47KmKVLl7Jy5UomTZqkrFYqLi4G4JtvvlH4AE2aNOHMmTPK1waDgcaNG/P1118rxywWC1OnTlUyVa/IGjduHMOGDVPG7d+/nyNHjij9wpYsWYLRaOSHH35g/fr1vPjii7z99tsMHz6cOXPmMGPGDAD++c9/cu7cOb788ksACgsLFWZ4ePhln+OV5HA4sVhKa3Xt9SyDQY4mgyJ5Iphq54lgepNXUlbJ3C9SSU278InU9lFhPHVve0wBV7fXvyiPInjeZIp6ht70KIongql2ngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCqVaPgb5X/r2FyaivdaN0tWYWyRPBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOr2DwuWFUrLi6OuLg4b+GuWldTHPsjYy83xul0es1HbVm/v+73444fP05UVBRGo1E5ZjAYiImJ4dSpU8oxf39/2rZty6FDh3j77bfZu3cvr7zyCuBaPVdbaU3kLq/rvcngteCJYKqdJ4LpDd6c1ak19vo/kJ7P7NWpjHsgwSM2qDOzt5minyHI8RzrGk8EU/MoB08EU0aPMmYWwZTRo4yZRTDV5jE8OIB2LcM4mJGP46K32nodtGkRRgOzv8d+1Zb5WvBEMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkqypY/fLLL7W6SZcuXWp13eXkcDjIysrCYrFgNpsB+OqrrwCUrwFOnDjBa6+9xp49ezCZTEr/pi1bthAfHw9AZWUlhw8fBlB6WPn4+JCXl0evXr2wWCzExcVx9913Y7fblVVMpaWuVURffvkl69atw9fXlzvuuIOCggLAtc3fxX7uueceCgoKiIyMZPjw4Zw5c0bZErA60+nTp+nevTtlZWV07NhReW7VjOoC1OzZs3nrrbcwmUwMGjSIXbt2uXmqqqpiz5499O7dW9niz263c/jwYaWoaLPZeP311/n000/x8fHhr3/9Ky+99BIACQkJbqvLrlZaD6uakmnPVlE8EUy180QwvcUTsde/tz2K4nmLKfIZesujSJ4Iptp5IpiaR3V6lDGzCKbaeSKYmkd1epQxswimmj0mD27PnN+tem/b0rXq3ZP3t2rOLIongimjRxkzi2CqnSeCqXlUp0cZM4tgyupRNl1VwWr48OFXvYJIp9Nx6NChqzZ2Jel0OoxGI8nJySQlJZGTk8P06dMJDg4mIMC132RRURGDBg1Cr9cze/ZscnJymDx5MjqdjoULFxIWFkbr1q1ZuXIlVqv78v7q7fwAnnzySbZs2cLUqVMJCQmhXr16gKsoBFBSUkJiYiLt27dn0aJFNVjVX+fn5zNixAhycnKYMmUK/v7+biulqotXwcHBjBw5ki+++IL33nsPs9msZKruZZWfn88999xDw4YNWbJkieLlYlZFRQV6/YW/GBMmTODkyZO89tprAJSXl/Ppp5/i7+9PVFQU+/fv57PPPgPgX//611XPSbW0HlZXlkx7toriiWCqnSeC6SlP5F7/1VJbZm8zr8UzhOv/OdZFngim5lEOngimjB5lzCyCKaNHGTOLYKrRY2goTE3uTVZuCVl5VtX3ZhXB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiqypYLVmyRJSPq1JwcDB9+vThzJkzJCcnYzKZGDJkCOvWrVP6RK1atQq73U6DBg3o3bs3ADt27GD9+vU8/PDDLFy4kPz8fOLi4hgxYgQfffQRwcHBVFRUUFxcTPPmzYmJiWH+/PkYDAYCAgLQ6/UKPzU1FYBhw4bxyy+/sGPHDkJDQ5UCVfW4Y8eOYTAYGD58OP/973+xWq00aNCAiooKgoJcL4Czs7MB6Ny5M0ajkblz5xIQEIDBYMButyus3bt3A/DEE0+wceNGsrKyCAsLU1ZRVY/7+OOP2blzJ3PmzFHYhYWFzJ8/X1m1deLECXQ6HQ0bNuTIkSOAaxUWQEREBGVlZUqh7Gqk9bC6tAwG9e+HqnlUH08E01u8urTXv1rnReQzBHmeY13iiWBqHtXpUcbMIphq5wGcKyyjuNyO2d9AeIjnb0plnBcRTLXzRDA1j95hBvkZuCkuAoulzKPXYaL8iWBqHtXpUcbMIphq54lgah7V6VHGzCKYsnq8HmQ2C+ph1bVr11oZ8raioqIoKChg0aJFyrHi4mI+/vhjZSu7lJQUbrvtNubMmaOMGTBgAOvXrycoKIjvv/9eOT5t2jQiIyPx9/dnx44dOBwO9Ho9s2bNUsZMnTqVpUuXKvzqVWPNmzdn4sSJgGtFWefOnbFarURFRWGz2cjOzsZut/PEE0/w/PPPA64tCZ966imFtW3bNgCCgoKYP3++cs+kpCS2bt2qjDtx4gQA7du3Z+zYsQBYLBa6dOmCwWBQtjzU6/V0796d7t27ExMTA8C8efMICwtT2Onp6djtdjIyMpRjx48fB6Bv374MGDCAmTNn/tEpcZO2P+flVRf2Q9U8qo8ngukpry7u9a+2ebkWz9BTj9eCJ4Kpdp4IpuZRDp4IpowevcErKatk/toDblu7tmsZRtKgtpj8fT21KOW8iGCqnSeCqXmUgyeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFV1WwupLOnz9PZmYmAI0bN6Z+/freQtdQYmIic+fOZfjw4aSmpmIymYiNjUWv19OzZ08A0tLSuO+++9yu69WrFzqdjs8++4zPPvuM/Px8YmNjycrKom/fvsp1ABkZGTz22GPs2bMHX19fwsPDcTgc3HzzzQCcPHmSBg0a8Mknn/D555+Tnp5OZGQkTqeToKAgmjRpwvHjx7Hb7ej1esaOHcuRI0ewWq20atUKQPl/WloagYGB/Pzzz26Z/P39AZRMmZmZBAcHM3/+fP7973+TlZVFy5YtMRgMREZGKj2uwFXAe+ONN5SvJ0yYwKuvvkrDhg0B6N27N4888gi//vorx48fp7y8nAYNGpCXl8fs2bNp0aKFV+dMkyZN15+SBrVl3hr3X9a1aeH6ZZ2mPybtGWrSpElT3dT8tQc4mJHvduxgRj7z1hxg3AMJf44pTZo0adKkSZMmTZo0aarj8rhgtWPHDv7973/X6FMVFxfH+PHj6dGjh6e3qKEBAwbwzjvvcOjQIZ544glOnjzJ6tWrad26NREREYBr5dGmTZvYtGkTmzdvBsDPz4+AgADOnj1L37596dSpE4sWLSI3N5f+/fsr1/n6+qLT6fjll194/PHHKSkpYenSpQA0a9ZMGdesWTN2795NdHQ048aNY/PmzWRkZBAZGQm4+mgB3HDDDWzfvp3BgwfTvHlzZRVVXFycwgoLCyMrK6tGJsAtU7t27fjtt9/o3LkzQ4cO5bPPPsNutytjqjVixAiysrKUrw8ePMgDDzzA3LlziY2NJTw8nE2bNtGrVy8SEhJYtmwZTZs2JS8vj+zsbKWAVxt50pT2elVdaOCneVQfTwTTm7zgID+eG9aJ3MIyLF7eDuni/6uN502mqGd4sTcZnmNd4Ylgah7V6VHGzCKYauWdPW91+6BBtRxO2J+eT56lnEZhgX+qR5FMGT3KmFkEU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomzwqWG3evJl//OMf1K9fn8cff1xZlZOens6aNWsYPXo077zzDnfccYc3vCrasGEDRqOR2NhY5s6di8lkomfPnuzYsYOcnByleONwOHA6L+yzVFFRQVlZGeHh4ezfv5+UlBRiY2NxOBxs3LiRbt26Aa6t/SorK+nZsyeLFi3Cx8eHmJgYDh8+zKFDhxR+VlYWUVFRGAwGZsyYQWRkJPXq1aOgoMDNb1ZWFj169OD777/HarUSFxfHnj172LNnD8OHDwfAarXWyNS8eXNOnjzplikjI4P4+Hjy8vKYMWMGLVu2xNfXlzNnzij327NnDwcPHnTzkJOTA8CcOXN47733AFi9ejVhYWFKYezuu+9mz549rFixgocffrhWc6PX6wgNNdXqWhlUFxr4aR7VxxPB9CZP1N95NWf2NlPkz02ZnmNd4GXmlnD0UI7WzF2lTLXzRDBl8uitv38nskuueN5qc3j8c12meRHJVDtPBFPzKAdPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWL3zzjvceOONLF++nKAg9zd9TzzxBA8++KCQglVKSgo9e/Z0609lsVjo2rWrspLJbDYzYMAAnn32WWXM7t27cTqd9O7d2227vDfeeENZhWU2m6mqqqJ169YsXLhQGfPJJ58wceJE9uzZw6233kq9evU4deoUL7zwAo888ogyrn///qSlpXHmzBmCg4MBVwHsnXfeUb7OyMjgrrvu4tixY8o9S0pKSExMdMs0bdo0Pv74YyVTUFAQhYWFvP76624roDp37sy5c+ew2WwYjUZSUlIwm838/PPPxMbG8txzzzFq1CjuvfdeAgMvfNrz4p5W4CpYZWdn85///OfqJ+X/y+FwYrGU1vr661UGg/ob+Gke1ccTwZTRo4yZRTBl9OhNXklZJXO/SCU17cKqjPZRYTx1b3tMAbXvdyPjvIhgqp0ngimTR2///Qv01V3xvMmop6DAetVckGteRDLVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEUxZPV4PMpsD/vCqM48KVqdPn+bZZ5+tUawCCAoKYsiQIbz99tue3OKSulR/KrPZTHh4uNKDKioqSvlztapXHXXs2NHteHR0NIsXL6a8vJyoqCgApddTtdLT0zEajZw6dUo5v3//fmU8uApTeXl5isebb74ZvV5PQECAUqyqPgeQm5ureK2srKRx48Zu98zMzMTX11cZHx4eTmFhIS1btlTGFBcXY7VacTqdnD59mujoaNLS0mjZsiU6nfub6Us9k99r165dbplqI62h3OVVFxr4aR7VxxPBlNGjjJlFMGX06A3enNWpNfrdHEjPZ/bqVK/0u5FxXkQw1c4TwZTBo7f//oUHB9CuZRgHM/JxXNjMAb3O1Yewgdnf4/wyzMu1YKqdJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUeFayioqLIz8+/7Pnz588r2wR6UxaLhcrKSh599FH27NmDyWRi0KBBmM1mpW9UYmIiH3zwARaLBbPZDLhWWAGcOXOGW2+9lfz8fOLi4rjttttwOp0UFRXRqVMndDodOTk5jBkzhm3btuHj40NVVRVhYWEKv02bNnz77bfs3LmTt956i/T0dEJDQ7FYLICrf5XRaFSKTG+++SZr167FarViMpkICQmhpMS1nUivXr0AOH78uJIpICCA4uJit0ytWrXi2LFjbNq0iTVr1pCVlUVoaKjyXKrHWSwWAgMDmTBhAgAzZ85k7969mEwmZQxAamoqK1as4IcffgBcfa+OHTvG7NmzPZofrYdVTdWF/VA1j+rjiWDK6FHGzCKYMnr0Fk/rdyOfRxkzi2B6gyfq71/y4PbM+d2qrbYtXau2PHktLMu8iGaqnSeCqXlUp0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNHhWs/vWvfzFu3Djat2/vtkUduPpbffLJJ8ycOdMjg5eS0+nks88+o02bNsyaNYucnBymTZuGXn/hG2Ho0KEsXbqU5ORkkpKSyMnJ4YcffkCn07Fw4ULGjx9PTEwM48aNc/Po5+eH2Wzm2LFjFBYWMnr0aL777jtSU1Px9b2wZUjXrl0B+Oijj+jduzd9+vRhyZIlNby2adOG7777jqVLlzJixAjOnTvHunXr8Pf3V/prNWrUCJ1Ox48//kiLFi144okn+O9//0tBQQGVlZUK6+abb2bjxo28++67/OUvf7nsPUtLSzl69CiHDh0CoFOnTuzbtw+LxUKDBg2UcZ999hkpKSmYTK499o8fP063bt08KjJqPayurLqwH6rmUX08EUwZPcqYWQRTRo+e8rR+N2J4Iphq54lgXu8eRf39Cw2Fqcm9ycotISvPqvWlUylT7TwRTM2jHDwRTM2jHDwRTBk9yphZBFNGjzJmFsGU1aMs8qhgtXTpUkJDQxkzZgwNGzakWbNmAJw6dYpz587RokULlixZ4lZU0el0zJ071yPTRqORiooK3n//fUJCQgCw2+1MnDgRHx9XpODgYBYvXsyUKVNITk7GZDKRkJDAzp07GTFihNJ3KioqioKCApxOp7JtX7169SgqKsLpdDJ37lzi4uJ44YUXeOONN5QiU/369QEwmUzs2rWLvXv3ctddd5GTk8O2bdsUVkREBODqF7VkyRIiIyP5v//7P958802MRqOSyc/Pj4qKCoqKipg7dy6dOnXiL3/5C7NmzVIy3XDDDQCEhITwzTffYDKZGD58OFu3biUtLU25Z25uLqWlpZSWunpJ7dy5U7nPxQWrH374gby8PGUbQ6fTyc6dO9m4cSNjxoyp1dxoPawuLYNB/fuhah7VxxPBlNGjjJlFMGX06C2e1u9GPo8yZhbB9AZP5N8/gCA/AzfFRWCxlHnEqZYs8yKaqXaeCKbmUZ0eZcwsglkXPJ4rLKO43I7Z30B4iOe/pKwLmWX0KGNmEUwZPcqYWQRTVo/Xg8zma9TD6ujRo8CFQkpmZiYABoOBG264gYqKCmVMtX7fV6k2MhgMBAUFKcUqgN69ewNgs9mUY9HR0SxatEj5esGCBezcuZN27dopx5YvX87gwYM5fPgw/v7+ynE/Pz+2b9+ufO1wOJg2bZqy4qlRo0YAdO/e3W0LvZkzZ7Jt2zYCA11bi1RUVCj3adq0qTLuww8/pKysTPnax8eHevXqsW3bNuVYVlYWs2bNUjJV+/vb3/7Gs88+q4w7c+YMaWlpiqcbbriBzMxMDh065LbqrEOHDkrBDeC7774jPz+fhx56iJycHDp16sSCBQvwVNr+nJdXXdgPVfOoPp4IpoweZcwsgimjR0954cEBxDYL4fCpwhrn4pqHaP1uVMRUO08E83r3eC36TXnq8VrwRDBl9ChjZhFMGT3KmFkEU40eS8oqmb/2gNv2s+1ahpE0qC0mf98rXHlt/F0LpoweZcwsgimjRxkzi2DK6lEWeVSw+vbbb73l46pkt9spLCx0609VXVy6eNXS71W9UungwYMMGDAAgMrKSjIzM7Hb7ZSXl+Pv749Op6OiooKMjAxle7yffvoJp9OpbAuYnZ0NQEZGhts9qgt01aub/Pz8lOurC1ZFRUUUFBS4bTH4RzKVl5cDcOzYMbd7pqWlKZ6io6MxGAzKPXv06AFAeno65eXlbgVDq9XK6NGjqayspHXr1srz0aRJkyZNmjR5V5f9uI7zcic0adLkLSUNasu8Ne6/TGzTwvXLRE2aNGnSpMkTzV97gIMZ+W7HDmbkM2/NAcY9kPDnmNKkSZMmTZrqsOpkhcJms+Hv7+/Wn2r69OkEBwdTVVWljBs5ciRZWVls3rwZcBWRDAYDixcvJjw8nNatW7Ny5UqluNStWzeCgoKoqKjAbDYzZswYxo0bR1lZGdOnTycsLEwp+BQVFQFw4sQJOnbsSGVlJWFhYZw7d87tvN1uJygoiEmTJvHqq6/i4+NDYGAgfn5+SgGqOpOvry+JiYlUVlYSHByM1Wp1y1TN/O6770hISMButxMaGkpubq7beYPBgNlsJikpCXAVzXx8fNxWpAHceuutWCwWwLVKCyAmJoa3336bu+++u9bz40mj6etVdaGBn+ZRfTwRTBk9yphZBFNGj97inT1v5dAlVlcBHDpVSJ6lnEZhgbViyzgvIphq54lgyuQxOMiP54Z1IrewDIsXt2u62JvaMotkyuhRxswimDJ6lDGzCKZaPZ49b3X7MES1HE7Yn56vvcZTAVPtPBFMzaM6PcqYWQRTVo+yySsFq8rKSnJycrBYLG5bzlWrbVvvfnpRp9Nx//33c+TIEaU/1ZAhQ0hJSXEb53A4sNvtbsf0ej1PP/00CxcuJD8/X1lZZLPZeO2116ioqOCll16iefPmtGjRgnHjxuHj48Mdd9xBTk5OjXw6nQ6TyURBQQEWiwVfX1+3bQkdDgfl5eXUq1ePqqoqysvLsVgsNGzYkJycHDdWeXk5ERERFBQUUFpaSnl5udKX6mIZjUYCAwMpKirCYrEQGBhIScmFhtJVVVWUlpYSEhJCaWkpFRUVlJWV0axZMxyOC0sRq4tVv9e4ceNqXbDS63UeN46/nlUXGvhpHtXHE8GU0aOMmUUwZfToKe9EdskVz1ttDo//7ZRxXkQw1c4TwZTJo8jXqGrNLJIpo0cZM4tgyuhRxswimGrzqL3GE8MTwVQ7TwRT8ygHTwRT86hOnkzyqGBlsVh48803WbdundLb6WI5nU50Oh2HDh3y5DY1ZDabMRqNbv2pANavX+9W4Fm6dGmN6yorK3nkkUeU1Ufz5s3j/fffR6fTcccdd+Dv78+cOXM4efIkS5YsISIiQrl+6NChSr+ugADXN12/fv2YOXMm4Fol1bdvX3JychQf58+fp6qqihUrVhAVFQXAtm3bGDVqlJtXvV5PgwYN3Ipuzz77LBs3blTGVRebnnjiCZKTkwEoLCwkMTERQBlXWFiIw+Hgyy+/VFZVffLJJ7zyyivExcUp/K5duxIYGMi8efMYPny48mdP5HA4sVhKPWJcjzIY1N3QFbzv0ds8EUy180QwZfQoY2YRTBk9eosX6Hvl/p0mo56CAmut2DLOiwim2nkimDJ6lDGzCKaMHmXMLIIpwqO33xfVhcyaR/V41F7jyedRxswimDJ6lDGzCKasHq8Hmc0Bf3jVmUcFqxdeeIHvvvuOAQMG0KFDB+rVq+cJ7g8rKipK6dtUreLiYnJzc5Wi0OWuA1c/p9jYWABSUlJo1KgRdrsdf39/ADp06EBmZibbt29n8ODBgKv4lp6eTs+ePQHIy8sDUApY4Fr51K5dO3JycpR7FRYWAlC/fn1lXM+ePfH19cVkcn3SxmazUVVVVWM11W233cb69euVolP1doPVvbAAQkJCaNGiBUePHlWOFxcXo9fr3Xj9+/dn4sSJSkaR0hrKXV5qb+gKdaPJoNo9yphZBFPtPBFMzeP1yQsPDqBdyzAOZuTjuGihtl7n6qPTwOzvsV8Z50UEU+08EUwZPcqYWQRTRo8yZhbB9AZP9PsiNWYWzdQ8Xr2013hieCKYaueJYGoe5eCJYGoe1cmTSR4VrLZv387w4cOZMGGCt/z8ISUmJjJ37lyGDx9OamoqJpOJ2NhY9Hq9UlC6lDp16kRQUBBTp07l1KlT5OfnU1VVhZ+fH4MGDVLG9enThw0bNvD+++8zZcoUfH19iY+Pp7CwkFtuuQWA06dPA65+Utu2bSM9PZ3IyEiqtyBs0KAB4NrmT6fTMXbsWI4cOYLVaqV9+/bY7XalwHfq1CmcTicZGRlumZo3bw5Aw4YNAVfxy2AwsGzZMmbPnk1WVhYtW7YkJycHHx8fjEYjACUlJVRVVTFw4EBOnz5NWVkZn3zyCYBbH6tz585x6tQpYmJiADAYDMydO5cnn3zSswnSJExaQ1dNmjRpqptKGtSWeWvcf7HWpoXrF2uaNGnSpEmTpquT9r5Ik1qkvcbTpEmTJk2avCuPClYhISFKUeVaasCAAbzzzjscOnSIJ554gpMnT7J69Wpat27ttoXfyJEjycrKYvPmzQD4+fmRkJDAtm3b6Nu3L506dWL69OmUlpbSv39/5brbb78dnU7H2bNneeKJJygpKWHZsmWEhYURHx8PuLZDNBgMpKWlER0dzbhx49i8eTO7du0CoKioCH9/f8rKyqhXr56yWqt58+Z8+OGHOBwOgoKClLHg6gX2+0zg6pNVfU+TycTevXvp3LkzQ4cO5bPPPqOwsBC9/sKSutLSUnQ6HRkZGURFRXH48GEmTJiA0WgkLCxMGZebm0uLFi2Ii4vjxx9/xGKx8M4773Dw4EGmT5+ubHt4tfLx0ZrK/V5qb+jqLY8ieSKYaueJYMroUcbMIpgyevQmLzjIj+eGdSK3sAyLl7cuuvj/3pCan6Moptp5IpgyepQxswimjB5lzCyC6S2eyPdFas0skql59IypvcaTy6OMmUUwZfQoY2YRTFk9yiaPClZ/+9vf+PLLL3nwwQfdCiaitWHDBoxGI7GxscydOxeTyUTPnj3ZsWMHOTk5StHK4XAoK54AKioq2LNnD127dmX//v1Kv6jAwEA2btxIt27dAPj2229xOp00bNiQRYsW4ePjQ48ePdi2bRv79u1TilYOh4OoqCgMBgMzZswgMjKSZs2acerUKeWeVVVVWCwWevTowffff4/VaiU+Pp5ffvlF2VawWj4+Pm6Zunbtys8//6wUtMBVjIqPjycvL48ZM2bQsmVLwsLCKCgoUMbodDqeeuopsrOz+fLLLwHXNoIOh8NtnqZPn05ycnKN7RW//vprnn/+eZo0aXLVc6PX64Q2tK7rUntDV6gbTQbV7lHGzCKYaueJYGoer3+eqH8jZZwXEUy180QwZfQoY2YRTG/yMnNLOHooh8gGJiLDg7zGVXNmUUwZ5uVavC9S87yIYmoePZP2Gs+7UrtHGTOLYMroUcbMIpiyepRFHhWskpOTsdls3HfffQwaNIiIiAgMBkONcXfeeacnt6mhlJQUevbsyZw5c5RjFouFrl27uvWdWrp0qdt1u3fvxmq1MmHCBOLi4gDo3r07jRs3VopX1XwfHx8GDhzI+PHjAVcPq5tvvpnvv/+e+Ph4TCYTTqeT++67j8cff1y5dvLkyaxYsYKSkhK31V7vvPOOW0+prl27UlxcDKAcb9u2LcuWLVPG7N+/n/vuu0/ZflCn01FVVUVSUhJ9+/ZVxj366KP8+OOP2Gw2jEYjZrOZyspKpk6dyk033cSLL77IG2+8waBBg9w89O3blyNHjrg9o7/+9a8cPny4VsUqAIfDicVSWqtrr2cZDOpu6Are8SiSJ4Kpdp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYMrmsaSskrlfpJKadmG1TPuoMJ66tz2mgNr3IVJzZlFMmeZF5PsiNc+LKKbmUZ0eZcwsgql2ngim5lGdHmXMLIIpq8frQWZzwB9edeZRwSonJ4edO3dy6NAhDh06dMkxOp3usudqq7S0NO677z63Y2azmfDw8BqrhX5/HUBUVJRyLCoqCqvVSlZWFuXl5fj7+3Ps2DGqqqrcxul0Olq2bKkwTCbXp2d+v21eeXk5AJmZmURHRxMQEIDBYHArFDmdTioqKpSt/po1a3ZJ1rlz59yY/v7+wIX+WNWqqKgAXH21oqOjiYqKqvEcSkpKyM3Ndct0KeXm5l6y6Hg10hrKXV5qb+jqqcdrwRPBVDtPBFNGjzJmFsGU0aOMmUUwZfQoY2YRTLXzRDBl8ThndWqNPkQH0vOZvTrVK32I1JhZNFOGebkW74vUOC+imZpHOXgimDJ6lDGzCKaMHmXMLIIpq0dZ5FHBasKECRw4cICkpCTi4+OpV6+et3xdURaLBbPZXON4cHCw2/Z5J06c4LXXXmPPnj2YTCaaN2+O0WjEz89PGZOYmMjs2bNxOp1K36ns7GwAZs6cyaRJk4iLi+PFF19040dGRgLw0Ucf8dZbb+Hr60ufPn348ccfgQt9qerXr8/x48fp168fmZmZREZGcvvtt1NeXq4UhoxGIzqdjtTUVHr27InVaqVjx474+flhNBpxOFzf3E2bNgXgueee49y5c5hMJvr168eBAwfc7pmYmMgHH3zAxIkT2bBhAwBDhgwBoGfPnpd8pq+//jpLliwBoF27dlcxGzWl9bCqKW/tX5o8uD1zfvdpx7YtXZ929PS514U9W9XuUcbMIphq54lgah7V6VHGzCKYMnqUMbMIptp5IpgyedT6EKnTY12ZF1Hvi9Q6LyKZmkd1epQxswim2nkimJpHdXqUMbMIpqweZZNHBatdu3YxevRonnnmGW/58ZqKiooYOXIkLVq0YNasWeTk5DB58mSqqqrcxg0dOpSPPvqIiooKfv75Z2w2m9IPatSoUcTExLB8+XIefPBBfH19uemmmwCUXlBZWVncf//9hIeHs2DBghr86uJYfn4+ycnJ7N+/n4ULFxIREUF+/oUX106nk5KSEtq0aUNiYiLLly/n7NmzNGzYsAbrzJkzDB8+HHBte+jj4z6NgwYN4r333mPdunV07dqVrVu3Ul5eTkJCgrJN4a+//spHH33EHXfcgcPhYOXKlcr1r7zySi2futbD6n/J0/1LQ0NhanJvsnJLyMqzen0/eagbe7aq3aOMmUUw1c4TwdQ8ysETwdQ8ysETwZTRo4yZRTC1PkTqZMoyL6LfF6ltXq4FU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYNWjQwG2ru2sls9ms9H+6WEVFRYqfVatWYbVaef/99wkJCQFgx44drF+/nlOnTinb8AUHBzNy5EhmzZrFSy+9RGBgIE6nk6ioKB555BEAOnfuTOfOnamsrFT4qampANxxxx388MMP5Ofn06RJE2UrvupxJ06cwGAw0K1bN+bNm4ePjw9NmzbFYrEoY6pXdLVp04bCwkLeffddIiIiMBqNlJaWKuN2794NQP/+/Vm3bh1Wq5XmzZuTnp7uds9Vq1bRoEEDmjVrxvbt2wG47777ePHFF5VnFR4eTmVlJTNnziQvLw+n04ler6dv377Ex8fXem60HlaXlsHg3f1Lg/wM3BQXgcVS5lHfqovlbY/e5olgqp0ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpgyedT6EHmXea6wjOJyO2Z/A+Ehtf8FSV2aF/D++yIZv3c0j+r0KGNmEUy180QwNY/q9ChjZhFMWT1eDzKbr1EPq0cffZRVq1YxZMgQpafTtdClejQVFxe79WhKSUmhe/fuSrEKYMCAAaxfv55169aRnJysHC8pKaFx48Z8++237Nixg0ceeQSn88Jm2EajkQcffJClS5cq/Oq+XF26dOH9998HXKukOnfujNVqJSoqCpvNRnZ2Nna7nddee00pKG3ZsoWnnnqKDh06ALBt2zbAVUSaP3++ct+kpCS2bt2q3PPEiROAq2A1Y8YMwLU9YpcuXTAYDMqWgf/5z38YNmwYTz75JKtXr+bFF19kzJgxGI1Ghd28eXMWLFjAypUrmTJlCq1atcJisRAeHn71E/I7aftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKYNHrQ+Rd5glZZXMX3vAbRu/di3DSBrUFpO/71Xz6uK8iGCqnSeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFHhWsbDYbPj4+3HnnnfTv359GjRpR3ZepWjqdTlmp5C0lJrp6NF3cy+qrr75Cr9crPZrS0tLo06cPjz76qNLD6u6770an05GSkqIUrCorK/n6669JTExUrgNIT0+nV69eWCwW4uLiiIyMxOFwcPPNNwNw8uRJwsLCmDNnDu+88w6+vr7ccccd+Pr6EhQURJMmTTh+/Dh2ux2dTsc999xDQUEBkZGRDBgwAIBWrVop9wwMDGT79u10796dsrIyOnbsiM1mAy70ncrMzCQoKIhXXnmF8ePHYzKZGDRoEL6+vjRq1Aij0ciZM2fIzc1l+fLlzJo1C7vdDkBycjIfffSRUlhMS0tj/vz5fPHFF4BrdZrFYqG8vNyrc6VJkyZNmjRp0qRJkyZNl1LSoLbMW+NebGnTwlVs0fTHNH/tAQ5m5LsdO5iRz7w1Bxj3QEKtmNq8aNKkSZMmTZo0afqz5FHB6s0331T+vGzZskuOEVGwGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOVXo0FRUV8fnnn+Pr68vs2bPJyclh2rRpGI1G9u3bx+LFi2ndujUrV66ksLCQUaNGAa4VS3q9HofDVQF98skn2bJlCxs2bABQthIsKiqioqICq9VKYmIi7du3Z9GiRVitVmJiYpQx4Fp5lZ+fz4gRI8jJyWHOnDkAJCQkuN3TbrcrWxR+8cUXZGRkoNPp3DL5+PhQUlLCPffcQ8OGDVmyZAmVlZXceOONAOTl5QFQUFBAdHQ0gYGB/Pbbb+zevZvBgwcze/ZsWrVqxY8//si6desA10oug8HABx98wMaNG7n33nvp0qVLrefHkya316vqQgM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKZvH4CA/nhvWidzCMixe2M6uWmrO7E3m2fNWt6JStRxO2J+eT56lnEZhgVfNrSvzIoKpdp4IpuZRnR5lzCyCqXaeCKbmUZ0eZcwsgimrR9nkUcFqy5Yt3vJxVQoODmbx4sVMmTKF5ORkTCYTQ4YMYezYscoYp9OJ0+kkLCyM3r17A2C325k4cSKxsbEsXLiQ/Px84uLiWLBggbKdXlVVFQ6Hg4cffphz584xf/58DAYDRqNRWfEEUFpaitVq5ZVXXmHlypXs2LGD0NBQrFYrfn5+bn5btWpFYmIi//3vf7FarYSFhZGfn09AQIDCKikpYcyYMfz666/MnTuXgIAA9Hq929aETqcTh8PB//3f/7Fs2TKysrIICwsjJydHWTlVXWjT6XQcPXrUzUdGRgarVq3ipZdeIi4ujqqqKgDmzZvnluvhhx/myJEjtZobvV7ncRPe61l1oYGf5lF9PBFMGT3KmFkEU0aPsmXefeQcR346SWzzMDrGNPQaV7bnKIIngimjRxkzi2B6kyfq/YOaM3uDeSK75IrnrTaHR8+2rsyLCKbaeSKYmkc5eCKYMnqUMbMIpoweZcwsgimrR1nkUcGqcePG3vJx1YqOjmbRokWXPa/X62ncuDFff/21cqx///5MnDiRiIgI1qxZc8nrqldFDRw4UOkxBa5+XT/++KPSh6qqqop69erx0EMP8dBDDwGuglLbtm0pKysDIDDQ9Wm2zp078/zzz/P8888DsHLlSiZNmkRlZSXgWmEFMHz4cJ5++mnlnv369ePMmTNumSIjIxkxYgQjRoxQru3SpYvCqPY3fPhw5X4AI0eO5KeffqJHjx4AzJkzh4EDB/Lyyy8rY+655x6ys7MZP348DocDvf7qK8EOhxOLpfSqr7veZTCov4Gf5lF9PBFMGT3KmFkEU0aPsmXOyS9l8sc/U1JWpRwLCvBh0mNdaRh69Z/QF+FRFFPtPBFMGT3KmFkEU0aPas0c6Ku74nmTUU9BgbVWbJDnOdYlngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCKavH60Fmc8AfXnXmUcGqWqWlpfzyyy9kZmYCrkJWly5dlIKNWqTTXfkF/dWOvdyYi1dFeerjallNmzbFaDTidDqpqqqitLSUb7/9ll27dgFQUVEBuHp0bdu2jbVr19ZgvvXWW9x+++1ER0f/YZ8XS2sod3nVhQZ+mkf18UQwZfQoY2YRTBk9ypL598UqgJKyKiYt/Jn3/pHoERvkeY4ieSKYMnqUMbMIpowe1ZY5PDiAdi3DOJiRj+Oit416navnVAOzv1f8Xu/PsS7yRDA1j3LwRDBl9ChjZhFMGT3KmFkEU1aPssjjgtXSpUt55513KC0tdSuumEwmxo4dy8MPP+zpLWolh8NBZmYmw4cPJzU1FZPJRGxsLABms/my11WvUHrrrbc4deoU+fn5xMbGkp6eDrhWYPn7++Pj40NeXh6PPfYYe/bswdfXl/j4eOx2u1KoKy11rTTatm0bAwcOJD09ncjISIKCggDw9fV18zN27FiOHDmC1Wqlffv2nDlzRtni749mMhqN3HjjjaxatYqPP/4YcK3Muvvuu1m3bh1t27oa5b799ttK8QpgxowZ7N27F6PRyIcffkhkZGStn73Ww6qm6sJ+qJpH9fFEMGX0KGNmEUwZPcqUed+JvBrFqmqVlFVx6FQB7aPq14ot03MUxRPBlNGjjJlFMGX0qObMyYPbM+eLVFLT8pVjbVuG8dS97T1+XybTc6wrPBFMzaM6PcqYWQRT7TwRTM2jOj3KmFkEU1aPssmjgtV///tfXn/9dRISEhgxYgRRUVEApKWlsXTpUl5//XWCgoL461//6g2vVy273c6hQ4d44oknOHnyJKtXr8ZoNCq9o8C1VV5WVhabN28GwMfHB51Ox86dO+nbty+dOnVi0aJFypZ71aruMfXLL7/w+OOPU1JSwrJly/D19aVevXpuYzMzM4mOjmbcuHFs3rxZWe1UrcDAQHx9fdm+fTuDBw+mefPmfPjhh1RVVdXYlu+PZKpXrx7l5eU0atSI7OxsmjRpwrp16+jQoQPNmjUDICEhQRn//fffc/jwYQCioqK4+eaba/nEtR5W/0t1YT9UzaP6eCKYMnqUMbMIpoweZciclX/6iuczz5eS2LmZR/eQ4TmK5olgyuhRxswimDJ6VGPm0FCYmtybrNwSsvKsRDYwERke5CV3LsnwHOsaTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9/PHHdOnShUWLFmEwGJTjsbGx3HXXXTzyyCN8/PHHf0rBys/PD5vNRmxsLHPnzsVkMtGzZ0+2b9+Oj8+F2A6HA7vdrnwdGBiI0+nkpptuYv/+/aSkpBAbG0tpaSklJSXKCqzqa6vz+/j40KNHD7Zt26asNKse26hRIwwGAzNmzCAyMpKYmBiOHDminDcYDFRWVtKjRw++//57rFYr8fHx7Nq1Cz8/v6vO9PHHH7Nz507eeustsrOzOX/+PG3atOH48ePY7Xa3ubLZbIwfP17pp3Vx4as20npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGmzJFhV/73v3H9wFr3QZHpOYriiWDK6FHGzCKYMnoUkflcYRnF5XbM/gbCQzz/hUaQn4Gb4iKwWMo86lt1serCc1S7Rxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vB5kNl+jHlbp6ek8//zzbgWQahkMBvr168ebb77pyS1qLYPBQFhYGMuWLVOOZWVlcdttt2Gz2ZRjS5cudbuuqsq1Hc6wYcMYMGCAcnzw4MEcPnwYf39/wNVbys/Pj4ULFypjHA4Hbdq0UYo/jRo1AqBdu3bMnj1bGTdz5kyOHDmibB1YvTXfq6++StOmTZVxvXv3pqys7Koz6fV6unfvzueff05MTAzJyck0aNCA5557jvz8fMLDw5WxY8aMwWKxMG3aNF544QXuuOOO//Fk/7e0/Tkvr7qwH6rmUX08EUwZPcqYWQRTRo8yZG7TPIygAJ9LbgsYFOBDXLNQj/3K8BxF80QwZfQoY2YRTBk9eoNXUlbJ/LUH2J9+YQu/di3DSBrUFpO/r6cWpZwXEUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LIo80U69Wrx5kzZy57/syZM0q/pmstu91OYWGh21Z+27dvB1x9ni6n6pVKBw8eVI5VVlaSmZmJ3W6nvLwcAJ1OR0VFBRkZGcq4n376CafTqfSmys7OBnAbA3D06FHgQo+r6lVUP/30kzKmqKiIgoICLl79VdtMALt27SIoKIjQ0FDl2NKlS9m6dSsPPfQQ99577xWv16RJkyZNmjRd33p55E0EBbh/likowIeXR970JznSpEmTputP89ce4GBGvtuxgxn5zFtz4E9ypEmTJk2aNGnSpEmTeuTRCqtbbrmFZcuW0a5dO+6++263cxs2bGD58uXcc889Hhm8lE6cOMFrr73Gnj17MJlMDBo0iH/+859uRRubzYa/vz/JyckkJSWRk5PD9OnT8fPz44svvuDzzz8nLi6OqqoqLBaL0sOqtLQUg8HAggULWLRoEUajkZCQEGWlU1FREf7+/uh0OgIDA7nnnntwOByEhoZis9kICwtDp9MpY6v9dujQAYfDQcOGDcnMzHQ7b7fbMZlMvPLKK0yePJmAgAACAwPx8/NTCmTVmQwGA7169cJut9OgQQMsFgvBwcHKyrDDhw/z1ltv0bZtW1JSUgB46623cDgcjBgxQinIzZ8/nxkzZqDT6fjkk0+U/BkZGWRnZyurw2ojT5v7Xo+qCw38NI/q44lgyuhRxswimDJ6lC3zDQ2CmPPsrRzMyOdUrpVm4SbatAjzmCvbcxTBE8GU0aOMmUUwZfToLd7Z81a3lVXVcjhhf3o+eZZyGoUF/qkeRTJl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWj7LJo4LV+PHj+e233xg/fjzTpk2jRYsWgKvokZeXR1RUFM8++6w3fCoqKipi5MiRtGjRglmzZpGTk8O0adMoLy9n4sSJyjidTsf999/PkSNHSE5OxmQy0bp1a3799VcSEhL45z//yfLly9myZYvbFnl2u10pBtntdoqLizl37hyNGzd2WylVUVFBaWkpTZo0IS8vj6KiImw2GzExMTU8+/r6EhwczPnz58nNzcVkMlFSUqKct9lslJWVER4eTmlpKWVlZVitVlq3bs3x48eVcU6nE5vNRuPGjcnNzaWgoACbzea2aqpBgwbYbDY++OAD9HrXX4ywsDC6du1K//79lXHffvutwrTb7eTm5gLw6aef0rBhQ8aMGVOr+dHrdYSGmmp1rQyqCw38NI/q44lgyuhRxswimDJ6lC1zz1ATPb1GuyDZnqMIngimjB5lzCyCKaNHT3knskuueN5qc3j8XkrGeRHBVDtPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWIWFhfHFF1+watUqUlJSyMrKAqB169aMHj2aBx54QNnuzltatWoVVquV999/n5CQEMBVZJo8eTJJSUlEREQAYDabMRqNLFq0CHAVmHr06EFgYCBdunShe/fudO7cmX79+pGYmKjwq1c/LViwgNjYWAC2bdvGqFGj0Ol0BAcHA2C1WgkJCWHLli3Ktc8++yzffPMN0dHRAEovqwceeICXXnoJgMLCQm655RYAhZWZmYnD4WDt2rVKpk8++YRXXnnFrRgFEB0dzYYNG5Svhw4dyoEDBxRWSEgIZ86c4fHHH+df//oXMTExPPbYY4waNcqNU1hYyBNPPOF2vEuXLgBuha2rlcPhxGIprfX116sMBvU38NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR+8wzxWWUVxux+xvIDzE818WeMtfoK/uiudNRj0FBdZasevCvMjoUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwerweZzQF/eNWZRwUrcPVfGjlyJCNHjvQU9YeUkpJC9+7dlcIOuAosr7zyCtu3b2fw4MEAREVFkZaWpozZvXs3JSUl6HQ6oqKiAFffpzvuuEPZDg8u9J26WD179sTPzw+j0Yi/vz82mw2r1UrDhg3dxvXv35/169fToEED4ELvqotXcIWEhBAbG8tvv/2m+MjLywNQthKsZk2cOFEpRJ0+fRqHw1GjJ9jtt9/Onj17aNasGQA//vgjmZmZjBgx4orPMT09nQ8++IAPPvigxrm7776bffv21brYqDWUu7zqQr2YNyQAAQAASURBVAM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmsnUrKKpm/9oDbtnvtWoaRNKgtJn/fP91feHAA7VqGcTAjH4fzwnG9Dtq0CKOB2d/jZ6rGeRHNE8FUO08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYFRYWkp2draxE+r2OHDlCo0aNlKKLN5SWlsZ9991Xo49VQEAAx44dU8YlJibywQcfYLFYMJvNSvFKr9eTkZHBrbfeSn5+Pg0bNiQrK4vy8nL8/f0pKirCx8eH//znP+Tk5LBt2zZ8fHyorKxUCk+nTp3C6XRy7tw5Vq1axYoVK0hPT6devXoANGnSRBnn6+vL1q1bKSwsZO3atVitVsVH9bjCwkJ0Oh3Lli3j119/Zc+ePQQEuD4JWL9+fSU3wNGjR1myZAnLly8nKysLk8m1ZUT1dox79+4lJCSEn3/+mUmTJgEwffp0NmzYwNy5c5Ui25IlSwCoqqpi06ZNfPPNN5w/fx69Xk/Xrl3x9fX8DZ0mTZo0adKkSZMmTZo0XSvNX3uAgxn5bscOZuQzb80Bxj2Q8OeY+p2SBrVl3hr3olqbFq6imiZNmjRp0qRJkyZNssujgtUbb7xBeno6n3766SXPv/LKK0RFRTF16lRPbuMmi8WC0Wis0cfq5ZdfJiUlheeffx5wbZW3dOlSkpOTSUpKYseOHQC0b9+ehQsXMn78eGJiYnj66adxOp0cOnSIjh07UlJSQlxcHMuWLaNBgwaMHj2a7777jn379mGxWABXHy1wrZx65ZVX6N27N3369GHx4sWAazVUtdcGDRqwZ88eUlNTGTFiBOfOnWPdunUAFBcXU69ePUpKSmjTpg2zZs2iefPmPPHEE/z3v/8lPz+f8+fPu93TYDDw+uuv85e//MXtnunp6XTt2pXc3FysVivPPfccRqNR8bl//37++te/kpKSgo+PD926dQMgISGBsrIy5fneddddhIWFKf2vaiMfH62p3O9VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM1j7Zlnz1vdikDVcjhhf3o+eZZyGoUF/mn+qhUc5MdzwzqRW1iGxcvbFl78f29Ilu+dusQTwdQ8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9SibPCpY/fTTTzz44IOXPX/bbbexatUqT25xSe3du7dGH6t3332XEydOkJOTQ0REBMHBwSxevJgpU6aQnJyMXq9Hr9dz7NgxHnvsMR555BEAIiIiKCkpYeXKlXTs2BEAk8mE0+nE6XQyd+5c4uLiiI6O5sSJE+zbt0/xUb9+fSoqKti1axd79+7ltttuY/369WzatEnpWeXr64tOpyMsLIwlS5YQGRlJnz592LJlC6tWrWL06NGAq5+VwWCgqKiIuXPn0qlTJ4qKisjIyCAnJ0e5p8lkwt/fn2+++QaTycSAAQNYu3Ytn3/+OQ888ABOp1PpnVVeXg5Abm4uAOfPn+frr79mwIABAOzYsQObzcb06dOx2Wy89NJLHDhwgK+++qrWc6PX6zxuFHw9qy408NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIprd4u4+c48hPJ4ltHkbHmIb/+4KrkFozi2SqzeOJ7JIrnrfaHB6/T/FmZlHvmdQ2L9eCJ4Kpdp4IpuZRDp4Ippo9ZuaWcPRQDpENTESGB/3vC/6g1JxZFE8EU0aPMmYWwZTVoyzyqGCVn59PaGjoZc+HhIQoK4S8JbPZzPHjx2v0saouMF3cxyo6OppFixYBsHz5cl599VWsViv9+/dXrnvkkUd4+eWX+eWXXxT+6dOniY2NZc2aNcq4oUOH4uvry/fff69cf/ToUZ577jml+JWRkcH69es5d+4cZ86cwWw2k5+fj9PpZN26dcrWiDNnzuSHH34gJSWF0aNHK1sW3nLLLcyZM0e5Z69evZRM1VsD5uTkMHv2bPr27QvA9u3bWbt2LQcOHMBms2E2mwFXYWvXrl1KX6z33nuPDz74gJSUFKVg9emnn9KtWzcGDRoEuFafDRo0iM2bN9OvX79azY/D4cRiKa3VtdezDAb1N/DTPKqPJ4Ipo0cZM4tgyuhRxswimDJ6lDGzCKa3eDn5pUz++GdKyqqUY0EBPkx6rCsNQ2u34sbbHkXxRDDV6jHQV3fF8yajnoICa63Yas0skieCKaNHGTOLYMroUcbM3mSWlFUy94tUUtMurLxtHxXGU/e2xxRQ+xYYas4siieCKaNHGTOLYMrq8XqQ2Rzwh1edeVSwCg8P5+DBg5c9f+DAAcLCwjy5RQ1FRUWxd+9eoqKilGPFxcXk5eVRr149pdfTpa671J/T0tIICQnh7NmzlJeXExUVxbFjx+jQoYMyxul0kp6eTv369UlLS6NZs2b4+PhQVVVVg3Xxn6OioigpKSEsLMytj1daWprC+qOZunTpopxr2bKlG6vay+nTp7nxxhsBaNy4sVKsulgXe6xeFfb666/zxRdfUFFRgU6n47fffqt1wQrQGspdQXWhgZ/mUX08EUwZPcqYWQRTRo8yZhbBlNGjjJlFMD3l/b5YBVBSVsWkhT/z3j8SPbUHqC/ztWCqzWN4cADtWoZxMCMfh/PCcb3O1SOqgdnfY79qy3wteCKYMnqUMbMIpoweZczsDeac1ak1ehoeSM9n9upUr/Q0VGNm0TwRTBk9yphZBFNWj7LIo4JV3759WbFiBYmJifTp08ft3DfffMPq1asZOnSoRwZ/r8TERH799VelPxPAV199hV6vp0GDBkqvp9+rU6dOGI1Gqqqq8PPzA6CyspKvv/6atm3bsn37doqKikhMTHRbWQWurfMKCwuJi4tj+/btdO3aVTkXEHBhed+GDRto0aIFGRkZFBUV0atXL3Q6HXq9nvnz57NixQrOnz9PVVUVHTp0YP/+/W6ZbDYbY8aMYdu2bTidrndZYWFhFBUV0bRpU8LDw8nNzWXfvn2MHTuW9PR0dDodkZGRnDp1yu2e586d49FHHyU1NZXi4mJat26tsKqVk5PDsmXLajyrFStWMHbsWOU5Xa20HlY1VRf2Q9U8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIpjd4+07k1ShWVaukrIpDpwpoH1W/1nw1ZhbNVLPH5MHtmfO7T9S3ben6RL0n71HUnFkUTwRTRo8yZhbBlNGjjJm9xawrPQ1FMTWP6vQoY2YRTFk9yiads7oyUgsVFxfz0EMPcfz4cWJjY5XVPceOHePw4cNER0ezYsUKZZs6b6ioqIiuXbvSpEkTJk+eTE5ODtOmTeOee+7h559/pmPHjkyZMoWRI0eSlZXF5s2blWsfe+wxtm/fzoQJE2jdujUrV65k27ZtjBs3jilTppCSkkJYWBgdO3YkMDCQN998k7KyMqZPn05UVBS//PIL/v7+vP3226xdu5a1a9fStGlTXn/9dXbu3MmcOXOYMmUKL730Em+99Rb33HMP/fv3Jz09Hb1ez5AhQ/jtt9/IyMjA4XDgdDo5cOCAksnHx4eIiAhuv/12/vOf/ygrpP7yl78wZcoUJk+ezIoVKwBXfzCA77//HofDVa1duXIlnTp14rbbbiMrK4umTZvSsGFDZWvA2267jbS0NDZt2sTTTz/NN998g9PpJCkpidLSUtatW0dgYCBnz57ltddeY8iQIVc9P06n85IruzRp0qRJkyZNmjRputZa+fVhVmw6ctnzD90Vw4N3xl5DR5quhbJyS8jKs3q9Z4kmTZo0aVK/fj2Uw+SPfrrs+Vcev5mb4iKuoSNNmjRp0nQ18miFVb169fjkk0/46KOP2Lx5M5s2bQKgWbNmPPXUU4waNYrAQM/2hf+9goODCQ4Oxul0kpycjMlkYsiQIYwdO5Y+ffooW+85HA7sdrvbtbfffjvbt29nwYIFFBQUEBcXx4IFCzh27Bg6nY7g4GB8fX2Jjo6msLCQcePG4ePjwx133EGjRo3YsWMHXbt2pXfv3txwww2sXbuW06dPM2rUKCIjI3nttde46aabFJ8APXv2JC0tDaPRyJo1a+jUqRPTp09n2LBhVFVVKWMDAwMpLS0lLy+PDRs28OCDD9KtWzeSkpKoqKgA4NZbb2XFihX4+vqybds2WrZsyZw5c/j444/ZuXOncs927dpht9ux2Wzs3bsXgOeee44DBw4oY+Lj49myZQtOp5MlS5bQpEkTHnroIUaNGsU999zD8ePHazU/Wg+rS8tgUP9+qJpH9fFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9AYvMuzKzY4b1w+sdV8jUGfm3+tcYRnF5XbM/gbCQzxv/lwXvneC/AzcFBeBxVLm0fyK8ieCqXlUp0cZM4tgyuhRxszeYmo9DTWPavQoY2YRTFk9Xg+6Zj2sAAIDA3nmmWd45pln/ufYyspKfvvtN2JjY6lXr16t73njjTcSEhLC7NmzlWPFxcXk5uYqfaCWLl1a47ro6GgA5s+fT2zshU9Sbtq0icjISPz9/QGIiYnh6NGjfP/998qYhx56CIPBoFx3cR+rV199lcGDBwPw7bffAhf6ZPn6upo5fvjhh259qOrXr8/Zs2eVrwMCAtDr9ezatUs5ZrFYACgpKQGgSZMmAAwcOJCpU6cq47Zs2cLOnTvR612T3qpVK3bu3MnOnTv5+eefGTFiBF26dGH9+vW0bt0agL///e988cUXpKWl8e2339boNVZdJKuNtP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPMmYWwZTRo4yZRTA94bVpHkZQgM8ltwUMCvAhrlmoV7yqKXO1Ssoqmb/2gNu2SO1ahpE0qC0m/9o3nK/W9f69cy14IpiaRzl4IpiaRzl4Iphq86j1NBTDE8GU0aOMmUUwZfUoi67pZopFRUWMGDFC6d1UWyUmJvLjjz8qBR240MeqZ8+el72uU6dOBAUFsXHjRuVYdR+rxMREN/7hw4fJyMhQjh09epTKykpuueUWAIxGIzfffDO+vr6kpaUp4zZs2EB0dLRSXKreDvHYsWPKmKKiIs6ePYvNZqO8vBwAHx8fysrK3DJVr1ir3rWx+v9nzpxxy3Xw4EEATp8+rfgvKipix44dypisrCwOHjzolrO6+DZgwADi4uLo06cPb7zxBtnZ2bRt2/ayz1GTJk2aNGnSpEmTprqil0feRFCA++f0ggJ8eHnkTX+So2uj+WsP1Gg4fzAjn3lrDvxJjjRp0qRJk6Zro6RBbWnTwv2D2W1auD60oUmTJk2a1C2PV1hdrTxomaVo6NChLF26lOTkZJKSksjJyWH69OkMHTqUiIgL+9D+7W9/48iRI+h0OkwmE4MGDWLUqFHMnTuXsLAwpY9VYWEho0aNUq678847qV+/PnfffTcAjRs3pri4mKioKOLj4918bNu2jY8++ogVK1bQrFkzDh8+zMyZM9386nQ6pkyZwmuvvUaDBg0IDAwkICAAm81GUVER/v7+ykqs7t27o9fradq0KTk5OURGRiqrnYqKigDYuXMnbdu2JTAwkMjISKUYVn2+Y8eO3HjjjTz++OPK854yZQoxMTHceeedAOzbt4/z58+j0+koLCxUCmuLFi2iXr16SvbayJOGxter6kIDP82j+ngimDJ6lDGzCKaMHmXMLIIpo0cZM4tgeot3Q4Mg5jx7Kwcz8jmVa6VZuKnGL7FqK7Vm1hrOq5sngql5VKdHGTOLYMroUcbM3mQGB/nx3LBO5BaWYfHytrje8CeSqXlUp0cZM4tgyupRNl3zgpU3FBwczOLFi5kyZUqNPlbVKioq4uDBg+h0OubMmUNOTg7Tpk3jnnvu4emnn2bhwoXk5+crfayaNm2qXLto0SIKCwuJjo7m5MmTZGZmArit3qqsrOS9997Dx8cHo9FIeXk5R48epXXr1vTv318Zl5mZidPppHXr1pw9e5bz58+Tk5PDvffeyxdffKGMKywsRKfT0bx5c06fPs3JkycxGo20aNFCGWO1uvbYveGGG7Db7Zw/f56jR49y88038+OPPyrjvvzyS44dO0ZsbCzp6elUVFSQm5vLq6++io+Pa8o3btxIVVUV//znP9m6dSv79+9XimdWq5Xi4mICAq7+H3O9XkdoqOmqr5NFZrPnL5BE8kQwZfQoY2YRTLXzRDA1j3LwRDA1j3LwRDBl8tgz1MTl92LwTGrLfCK75IrnrTaHx6/ZZfreEcUTwdQ8ysETwdQ8ysETwVSzR1G/n1JzZlE8EUwZPcqYWQRTVo+yqE4WrMDVj2rRokWXPb9q1Sp8fX357rvvCAkJAcButzN58mS+++47kpKSLnldRUUF8+bNY9SoUYwbNw4Am81Ghw4d+PXXX5VxmzZt4tixY4SFhTF48GDGjx/Ptm3bGDVqFPv27VNWYu3evRuAzz77DD8/PwCeffZZtm3bhk6nIzg4mOzsbEpKSmjfvj3/+c9/AFcB67bbbiMjI4OEhAQAtm3bBsALL7xAv379APjkk0+YNGkS4CrkAbz33nv85S9/YcaMGezcuZMRI0YQExPDypUruf322wEYPXq00rfqiSeeAGDdunWMHz8egEOHDtGwYcP/NQ015HA4sVhKr/q6610Gg/ob+Gke1ccTwZTRo7ebzYtgyjgvoP7nKOu8aB7VxxPBlNGjiMwrNh/h8MlC2rQIYWjfGI953vKoNZxXN08EU/OoTo8yZhbBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOrzupswep/KSUlhe7duyvFKoD+/fvzyiuvsH37dgYPHnzJ63bv3k1JSYnbKimj0Uh4eDjp6elu/FatWnH8+HGioqIA1wqskJAQvv/+e+Lj47HZbEofrPT0dLeeUevXryciIgJ/f3+lEFW99R9ASEgIPXr0YOvWrQo/NTUVnU5HTk6OW6aJEycCEBUVxenTp8nIyOBf//qXW64ePXqwbNkybDYbRqNRKVZdrDZt2lz5of5BaQ3lLq+60MBP86g+ngimDB5FNJvXGth7h1fXnqMs8yKaKaNHGTOLYKqRt+vIOWZ/caEvb0Z2MRt+Os0zQ9qR0OrqP/T1e3nqUWs4Xzd4IpiaRzl4IpiaRzl4IpgyepQxswimjB5lzCyCKatHWVRnN1M8ceIEjz76KAkJCfTs2ZPp06djs9mU82lpaUqhp1pms5nw8HBWr17NrbfeSnx8PA888AC//fab23UAJpOJMWPG0LFjR7p27YrBYKC8vJxz584p4/z9/dHpdHz00Ue0b9+efv36YTabFcapU6ew2+34+/szceJEevbsSUJCAvPnzwcgLi5OYdWrV49jx44xdOhQJVNaWhpVVVXccsstAGRkZBAZGcny5cu56667aN++PQ8//DC+vr6EhITQpEkT5d4RERFMmDCBJ598EoAdO3ZQWVnJ6dOnL/k8s7Oz+etf/wqAXq/3WvFKkyZNmv4siWg2rzWw946056hJk6a6rouLVRfrvf9c+vifIa3hvCZNmjRp0qRJkyZNmuqa6uQKq6KiIkaOHEmLFi2YNWuW0p+qvLxcWW1ksVgwm801rrXb7fz666+88MILxMTEsHz5ch577DHWrFlD06ZNsVgsGI1GkpOTAZgxYwbl5eW88sorAIwZM4YxY8aQlZVFUVERTqeTm266iYkTJ/LTTz8xd+5czp49q/gEaNy4MXv37uXuu+8mNjaWjz76CIBOnTopXkNDQyktLeXw4cM8/vjj5ObmsmrVKgBle0GLxUJMTAw7duygbdu2jBs3jnXr1lFZWUmTJk3c7vnSSy+RnZ1Nr1692LRpE9nZ2QD8+uuvREdHAzBq1Ci6detGTEwMs2fPVgp+999/P+Hh4bWeHx+fOlsHFaa60MBP86g+ngimLB5FNJvXGth7h1eXnqNM8yKSKaNHGTOLYKqVt2zT4Sue//TbYzx0Z+22B/RmZq3hvHp5IpiaR3V6lDGzCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2VQnC1arVq3CarXy/vvv1+hPlZSURERExCWvq6iooKCggJiYGB555BEAOnfuTL9+/ViwYIHSC8rhcHDs2DE2bNigrNI6evQoc+fOpby8nOTkZGw2G/7+/tx44428+uqrANx888189NFHVFVVud03LS2Nvn37smvXLjZv3kxUVBRFRUUcOXJEGVNcXIyvry/dunVjwYIF+Pj40LhxYzIzM8nJyVEyHTp0iJtuugmLxcKMGTOIjIzEx8eHvLw8t3sePux6I71p0ybA1RMLYOXKlTzwwAMAtGzZks8//5ysrCxsNht6vR6Hw8E//vGPq50SRXq9TlhTy+tBdaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9BYvM7eEo4dyiGxgIjI8qNaco6eLrnj+8KlCVf0cqwsN53cfOceRn04S2zyMjjGeb6lYLbV+L4pkah7l4Ilgah7l4IlgyuhRzZm99XrnUpLpOdYVngim5lGdPJl0TQtW9erV44033uDGG2/0iPNH+lOZzWaKi4vdrtu9ezcOh8Ntuzuj0cgdd9zB5s2bAde2gVVVVbRu3dptS8EbbrgBgFtvvZWxY8dy3333ceDAAQYMGOB2j6ZNm5KWlsaZM2cIDg4GwOl0MnXqVOXrjIwM7rrrLo4dO6bcs6SkhMTERObMmaOwpk2bxscff6xkCgoKorCwkEcffZS+ffsq4zp37sy5c+ew2WzKPUwmE7t27UKnczVc3r59O4899hiNGzdWrnvppZew2Wx0794dgCeeeIL33ntPuaY2cjicWCyltb7+epXBoP4GfppH9fFEMGXxKKLZvNbA3ju8uvQcZZoXkUwZPcqYWQTTW7ySskrmfpFKatqF1Z3to8J46t72mAKuvm9e66bBZGQXX/Z8bLMQ1fwcE8H0Ji8nv5TJH/9MSdmFD/wFBfgw6bGuNAyt3Wpbb3sUwRPB1Dyq06OMmUUwZfQoY2YRTLXzvMn09usdER5F8UQw1c4TwdQ8qtfj9SCzOeAPrzrzqGD1yy+/XPG8TqfDaDTSqFEjGjZsiJ+fH/fee68ntwRcK5buu+8+t2PV/amqezhFRUUpf67WwYMHAejYsaPb8ejoaBYvXkx5eblSpGrY0P2Tfenp6RiNRk6dOqWc379/v1tRy+l0Kiud0tLSuPnmm9Hr9QQEBCiFpOpzALm5uYrXyspKt2ISQGZmJr6+vsr48PBwCgsLadmypTKmuLgYq9WK0+nk9OnTip/w8HC3wlNaWhp6vZ6cnBy3ezz66KNYrVY++ugjpT+Xp9Iayl1edaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9JQ3Z3Vqjb55B9Lzmb06lXEPJFw1b2if1nz9y5nLnv/b7Teq7ueYCKY3eL8vVgGUlFUxaeHPvPePRI/YoM7MopmaRzl4IpiaRzl4IpgyelRjZm+/3rmUZHiOdY0ngql5VCdPJnlUsBo+fPgfXo3TvHlznnnmmRorkmoji8VCZWUljz76KHv27MFkMjFo0CDMZrPSwykxMZEPPvjArZfV7t27AThz5gy33nor+fn5xMXFcdttt+F0OikqKqJTp07odDpycnIYM2YM27Ztw8fHh6qqKsLCwhR+mzZt+Pbbb9m5cydvvfUW6enphIaGYrFYAFcvKaPRqBSZ3nzzTdauXYvVasVkMhESEkJJiWtbpF69egFw/PhxJVNAQADFxcVumVq1asWxY8fYtGkTa9asISsri9DQUOW5VPv39/cnJyeHYcOGceDAAcrKyoiPj6dRo0Zuq84efPBB5ZmMGjVKOb569Woef/zxWs+P1sOqpurCfqiaR/XxRDBl8pg8uD1zfvcJs7YtXZ8wq+3PKRFMkGteoO48R9nmRRRTRo8yZhbB9AZPVN+8f94fzzuf7bvkcTX9HBPB9BZv34m8GsWqapWUVXHoVAHto+rXiq3WzCKZmkd1epQxswimjB5lzCyCqXaet5gi+wR7y6NIngim2nkimJpH9XqUTR4VrD766CPeeustbDYbf/vb32jWrBkAJ0+e5LPPPsPf358nn3ySzMxMPvnkE5599ln0ej39+vXzyLTT6eSzzz6jTZs2zJo1i5ycHKZNm4Zef+EbYejQoSxdupTk5GSSkpLIycnhhx9+QKfTsXDhQsaPH09MTAzjxo1j5syZynV+fn6YzWaOHTtGYWEho0eP5rvvviM1NRVf3wtLaLt27ao8g969e9OnTx+WLFlSw2ubNm34f+yde1xUdf7/n3NhuAxyk4siKkIqqJhSWmpSpl3UzI10czVvWV8qat1ca7VatXLT3G2r1aQ0De92WU0tzeymoWZ5SRHvgoKgqAwwMHKdmd8f/Dg6oW7CfOzg57wej31snPM5z/N6nc/AjLz5fN7fffcdS5YsYdSoUZw9e5Z169bh5eWF01nzp+XNmjVDp9Oxbds2IiMjefLJJ/nss88oLCykqqpKYd1+++1s2LCBd955hwceeOCK92zRogXHjx8nPz+f6Oho9u/fT0ZGBn379uXIkSMArFu3TilWvfbaa0BNQW/16tXEx8fXe260HlZXV2PYD1XzqD6eCKYMHgMD4fXk3uSdKyXvvM0te3iLYF4qGeYFGt9zlGVeRDNl9ChjZhHMhvBE9c3re3sb+t7ehgVr9/PL4bN0aR/KuAc71ddmHd3o8wKQZ8m56vncggsk3NKqQfdQW+brwdQ8ysETwdQ8ysETwZTRo9oyX48+wXDjP8fGyBPB1DyqkyeTGlSw+uGHH/D09OTjjz/GZDK5nBs+fDgjR47kl19+4fnnn+dPf/oTDz/8MPPnz29wwcpkMlFRUcGcOXOUPlZ2u50pU6ZgNNZE8vf3Z9GiRbz22mskJydjNpvp0qULO3bsYNSoUYwZMwao2Y6vsLAQp9OpbNvXpEkTiouLcTqdpKSkEBsby6RJk5gxY4ZSZGratOav/Wp7Re3du5f77ruP/Px80tLSFFZYWBgAQUFBLF68mPDwcF566SXeeOMNl2fm6elJRUUFxcXFpKSkEB8fzwMPPMDs2bOVTLV9tAICAvj6668xm82MHDmS77//nszMTOWebdu2paKiAr1ez8GDB4GaflibN29Wxqxdu1a599///neX57t7925iY2Px9r72byyth9Xl5e79S93NE8GU0aOMmUUw3c0rKS0HoLS0nEJj/Xv0XSpfTwO3xoZhtZbVu0/JpZJxXgAycwrJOW+jorwSbzfMTWPIrHmUw6OMmUUw3cET2TcPYOhd0Yx7sJP2flAPhQdd/d8aLZr6qKYXmEzzcqnOFpVRUm7Hz8tASEDDf+ki43MUkTkjy0LOeRutQsx0iAxqME/GeRHBVDtPBFNGj2rNLPrzjizPsTHxRDA1j+r1eCPouvWwWrduHU899VSdYhXUFGAGDRrEe++9x/PPP4+npycPPvggc+fObcgtATAYDPj6+irFKoDevXsDUFlZqRyLjo4mNTVV+XrBggXs2LGDTp0u/vXjsmXLSExM5NChQ3h5ebn437p1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWloaPj41S20rKiqU+7Rs2VIZN3/+fMrKypSvjUYjTZo0IS0tTTmWl5fH7NmzlUy1/v74xz/y17/+VRl36tQpMjMzFU9RUVFs376dHTt2sHr1aiZPnswdd9xBamoq7dq1A6B///5s2bLlss/3n//8JxkZGS4rz65F2v6cV1Zj2A9V86g+ngimDB5Ly6qYtzbDZWuETm2CSBrcEbNXw5rO1kptma8H0x28/MIL/GPxTpetoHy9jfx99K2EBNR/q4paqTGzaKbmUQ6eCOaN7vF69M1rqMfrwRPBbCivQ+sgfL2Nl90W0NfbSGyrQNX1ApNhXkD8ZyhZnqO7eY3t85MIpoweZcwsgql2XkOZ2ucdcUy180QwNY/q5MmkBm2mWFZWxvnz5694/ty5c1y4cHG1TZMmTVy27auv7HY7RUVFSr8oQCkuXa54VqvalUoHDhxQjlVVVZGbm4vdbqe8vOav8HU6HRUVFZw4cUIZ9+OPP+J0OpVtAc+cOQPgMgZgz549AIwdO5ZevXopK5x+/PFHZUxxcTGFhYXY7XaXTAUFBSQkJNC5c2ceeeQRVqxY4ZKp1t/q1avp2rUr3bt356WXXuLYsWMunhISEiguLqZfv368/PLLACxatIgDBw6QkJAA1BT4/vCHP6DT6TAYDC4Znn32WZKTk6/4HDVp0qSpMWje2ow6TWcPnLDw/pqM38mRplr9+pctUNOv5LVFO38nR5o0abqRlTS4Y51VCB0ia375run31d9H34qvt+vfUNb+Al7T7yftM5Q6pX1+0qRJ09Wkfd7RpEnTjaIGrbC67bbbWLx4MV26dKFPnz4u57799lsWL17M7bffrhw7ePAgLVq0aMgtgZpVVF5eXi79qWbNmoW/vz/V1Rc/wI0ePZq8vDw2bdoEwIULFzAYDCxatIiQkBDatWvHihUrlJVOxcXFeHl5odPp8Pf359lnn2XChAmUlZUxa9YsgoKC0Ol0yliAzMxMpk2bpqxY2rFjh3LvyMhIpk2bhoeHB7NmzUKv1xMWFsb777+Pp6enUoCCmmKU0+nE09OTZ599lvXr1zNv3jyaNGmiZLJYav7RcO7cOQYMGECbNm1YuHChi39AKYRZLBbuvPNOvv32W9577z1atGjBvffeC0BISAjR0dGYTCY6d+7Mbbfdxvr168nMzOT48ePcdNNN9Z6fhjSavlHVGBr4aR7VxxPBlMWj1nRWvR73HT9/2b+mh5pfuhzMLiQuqmm92GrNLJKpeVSnRxkzi2C6i+fv68kLI+I5V1SG1Y3bm13qTW2ZRTLdyWse7Mvcv97FgRMWss+5d4uzS/9fbTwRTHfxRH6Gkuk5upvXmD4/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lE2NahgNWXKFEaNGsXTTz9NWFiYsuVdTk4O+fn5hIeHK/2RKioqOH36NEOHDm2waZ1Ox9ChQzl8+LDSn2rIkCF1trhzOBwuq5gA9Ho9zzzzDAsXLsRisRAbG8tTTz3F22+/7cJPSEigoqKCCRMmYDQaueeee8jPz1d6WNXqr3/9K2vWrOHTTz/FbDbj4eFBVVUV7du3Z9CgQaxatYpdu3aRmJjIm2++ic1mIz4+nkceeYTFixcrz8bpdBIXF4evry9z5szBx8cHX19fpUAGsG/fPgCeeOIJvvrqKzZt2kRYWBinTp1y8ZSSkkLnzp1p27YtX3zxBQChoaEYjUZllRnA999/z913361kb9GiBZMnT2bz5s1UVVUpq8muRXq9zi2NHG9UNYYGfppH9fFEMG90j1rTWXHMhvLyLDlXPZ9bcIGEW1o16B5qy3w9mJpHOXgimGr2mHuulCMH8wkPNhMe4ttgnsjPqDLNiwher0AzvdxGuyg1ZxbFbCjvenyGkuE5upvXGD8/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lEWNahgFR4ezrp161i5ciVpaWnk5uYCNb2jRo8ezSOPPKL0cvL09GT+/PkNdwz4+flhMplc+lMBfP755/j7+ytfL1mypM51VVVVjBkzhqSkJOX4xx9/rKyqqh3ncDiYPXu2y/XDhg2jefPmAMrY9u3bs27dOgBGjBiBh4cH27dvV8536NCBXbt20aVLF1599VWF9dZbbyljdu/eDUC7du14/fXXlTEzZsxg8eLFyrjMzEwAEhMTmThxIgBOp5NbbrkFm82Gv78/lZWV7Nixg4kTJzJmzBhuvfVWJk+ezMSJE3nhhRc4deoUERERHD9+nF27drF8+XLlfomJiXh7e/OXv/yF7OxsoqOj/8dM1JXD4cRqvfC/B0omg0H9Dfw0j+rjiWDK4lFrOqtej+FBV//Q1qKpT73nRq2ZRTI1j+r0KGNmdzJLy6pIWZ1OeubFVR5xUUE8/VAcZu/6989Rc2ZRPBFMGT3KlFnkZyiZnqO7eY3p85MIpoweZcwsgql2ngim5lGdHmXMLIIpq8cbQX5+3r951VmDClYA3t7ejB07lrFjxzYU9ZsVFRWlFG9qVVJSwrlz54iKirrqdQBZWVnExMQoxzMzMwkPD8fLy0sZd+TIEZdrnU4nWVlZ9OpV87d/rVq1wsPDg8zMTHr37q1wbr31Vpd71d6ntpfVpfesHVObpbCw0GVMixYtcDgcysq12vOXXqvT6QgMDKSsrIyWLVuSnZ1NVVVVnecQGRmpXBsREcHevXuBmm0SH3roIQ4fPkxoaCjdunW74vP7rdIayl1ZjaGBn+ZRfTwRzBvdo9Z0VhyzobwOrYPw9TZedlsbX28jsa0CG+xXbZmvB1PzKAdPBFONHueuSq/TPycjy8K7q9KZ8EiXBrpTZ2bRPBFMGT3KkPl6fIaS4Tm6m9cYPz+JYMroUcbMIphq54lgah7l4Ilgah7VyZNJDS5Y/R5KSEjgvffew2q14ufnB8CXX36JXq9XCkoAx48fZ/r06ezZswez2czAgQPx9fVlw4YNSiGpqqqKr776ioSEBOW63r17s2bNGu644w6sViuxsbEMHDiQoqIi7rzzTgBMJhNdunRh7ty5vP3223h4eGC1Wjlx4gTR0dFEREQAcMcddwDwySefsHLlSsLDwxk5ciRpaWk8/fTTAFitVgwGA1u2bKFHjx6UlZXRtWtXAgICAOjUqRNQ0+fKbDYzdepUJk6ciNlsZvDgwVitVvz9/TGZTEofq507d/KPf/yDnJyarQNqC2a158+fPw/Ak08+CdSsgDObzaxduxYvLy9atWpV7/nReljVVWPYD1XzqD6eCKZMHpMT45j7q7/Q79im5i/0G/pzSq2ZRTLdyZv2WHemLfzJ5Zcuvt5Gpj3WvUFzo+bMopiaR3V6lDGzu5ha/xzNoww8EUx38kR9hpLtObqb11g+P4lgyuhRxswimGrniWBqHtXpUcbMIpiyepRNDS5Y/fDDD3z66afk5ORgtVrr9HjS6XR8/fXXDb2Ni4YNG8aSJUtITk4mKSmJ/Px8Zs2axbBhwwgLCwNqCjODBw9Gr9fz7rvvkp+fz8yZM7nppptYuHAhQUFBtGvXjhUrVlBUVMS4ceMU/qU9oZ566im++eYbXn/9dW677TY6d+4M1BS6zpw5Q1FREQkJCcTFxfHuu+9y5MgRl35YtayKigrGjh1Lfn4+r732Gv7+/gwbNkwZ53DUVFz9/f0ZPXo0q1evZtu2bQAEBwcrYyorK7HZbAwaNIjQ0FAWL15MVVUVcXFxLs/o/fff59577yU6OppvvvmGadOmAXDmzBmgpvgFNYW3xx57DJvNxsqVK5Vj9elfBVoPq/+lxrAfquZRfTwRTBk8BgbC68m9yTtXSt55m9t6oFwqtWW+Hkx38AIDzayYPpA9h89y6KSFmNZBdG0f6gZ3NVJjZtFMzaMcPBFMtXnU+ueI4YlgyuhRlsyiP0PJ8hzdzWtsn59EMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkBhWsPvjgA958802aNm1K586dad++vbt8XVX+/v4sWrSI1157jeTkZMxmM0OGDOG5555TxqxcuRK73U5wcLCyZZ/dbmfatGk8/vjjLFy4EIvFQmxsLAsWLFC23auoqOCDDz7g0Ucf5ezZs8ybNw+DwYC3t7cyBmDjxo2cOnWKqVOnsmLFCrZv345Op8PpdNKiRQtlXEpKCkajkdjYWD777DNsNhvBwcF4e3vTpEkTZZzT6eTJJ59k7969pKSk4O3tjV6vx+FwKD2sKisrcTgcvPTSSyxdupS8vDyCgoLIz88nJCREeTZQU3T66quvXPgAhw8fBuDQoUPAxd5iOp2OiIgIjh07htVqpbS0FF/fa/9HidbD6vIyGNS/H2pj8LhuaxaHsovo0DqAgT3bNJjXGDJrHhvO8/U0cGtsGFZrWYP6Vl0qtWcWwRThMbp5E7q2D3Xb3DSGzDJ6PFtURkm5HT8vAyEB7vnQrvbn2BjmRa0etf45mkc1epQxM7j/M5SMz1HGz08imDJ6lDGzCKbaeSKYmkd1epQxswimrB5vBPn5XaceVosXL+b2229n3rx59V6RU19FR0eTmpp6xfNbtmyhT58+zJ07VznWv39/pk6dSps2bdi8efNlr9u9ezelpaUMGTKE2NhY5fiMGTPYtGmTC799+/YMHz6c4cOHAzBixAj27t3L5s2b6dy5M5WVlfz4449UV1czfPhwEhMTAfjmm294+umnOXXqFBEREVitVqBm+8BLi2733HMPZ86cUXprVVdX4+3tzahRoxg1ahRQs5Kse/fu6HQ1/7iv/f8HH3yQf/zjHwrr5Zdf5pNPPiE5ORmA0tJSJVefPn2UcV26dKGsrIzKysorPtv/JW1/ziurMeyHqkaPB05Y+NfKX5Sv048X8NG3x3lheBdiWgU10KE6M4tmyuhRxswimDJ6lDGzO5ilZVXMW5vhsr1bpzZBJA3uiNnLPZ8b1f4c1TgvonkNZWr9c8TwRDBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT8ygHTwRT86hOnkxq0GaKVquV++6777oXq36LMjMziYqKcjnm5+dHSEgImZmZV70OqHNtdHQ0eXl5ylZ6l+MnJCRgt9uVVUzZ2dlUV1fX6a0VHR3tci+73Y5OpyMtLU0ZU1VVRWFhIXr9xSkqLy+ntLSUEydOKMcyMjIAlNVQtVsQ1vasqlVtL6varQe9vb0xGo3KtoO1cjgceHh4EBTU8CKAJk3u0qXFqks1a/nlj2vSpEmTpt9f89ZmcOCExeXYgRMW3l+T8Ts50tRYlDS4Ix0iXT+LdoisKXZq0qRJkyZNmjRp0qRJk6YbVw1aYRUXF0dWVpa7vFyTjh8/zvTp09mzZw9ms5nBgwfzl7/8BZPJBNQU0/z8/Opc5+/vz48//shdd92lbAk4efJkunTpolxnMpkoKipi+vTppKWl4eHhQWxsLE6nk+LiYry8vLBarTRp0oRvv/2Wt99+m6ysLMLCwtDpdOzYsYO0tDR27NgBQN++fUlNTWXt2rXYbDaMxprHXlxcDMCFCxcICAhgwYIFfPnll+Tl5QE1K6pqV0xBTcEqODiYUaNG4XQ6KSgoQKfT4enpiaenpwvz4MGD9OnTB6vVSmVlpbJiqvZ8VFQUO3bsYOnSpXzxxReUlJRgMBioqKhocLGqIQ1fb1Q1hgZ+avW45ocrF5gBNvx4kkF31G97QLVmFsmU0aOMmUUwZfQoY2Z3MU8X2FxWVtXK4YT9WRbOW8tpFuTzu3psTDwRTDV79Pf15IUR8ZwrKsPqxu0k1ZxZFE8EU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9yqYGFaymTZvGE088QadOnRg0aJC7PP1PFRcXM3r0aCIjI5k9ezb5+fnMnDmT8vJypkyZctVrLRYLFouFSZMm0b59e5YtW8Zjjz3GmjVrXHpUPf744wC8+eablJeX88orr9RhnTt3jmeeeYYhQ4bw4osv8uOPP5KSkkJ1dTXJyclK8ay6uppPPvmESZMmERYWxvjx4wGU1VoATZo0obKyktzcXJxOJ82bN+fMmTPY7XZljE6n4+abb+abb77Bw8MDT09PwsLCyMrKoqCgwMWbt7c3FouF6upqgoKC8PLyIjs7m+zsbOLj4xk6dCipqak4HA7Ky8ux2+1Knyubrf77YOv1ugY3wr6R1Rga+KnN45FTxVc9fyiniFEqa74uw7xcD6baeSKYmkc5eCKYavN4/EzpVc/bKh1u+byg9ueotnm5Hjx3MkV9plRzZlE8EUwZPcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLD6y1/+QnV1NS+88ALTpk2jWbNmLlvYQU2RZe3atQ0y+WutXLkSm83GnDlzCAgIAGq21XvllVdISkoiLCwMPz8/SkpKXK6rqKjAYrFw8803M2bMGABuueUW7r//fhYsWMC0adPw8/OjsrKSI0eOsGHDBmXbvz179rB48WKys7MV/r59++jcuTOvvvoqALfffjsrV66kqqqKPXv2cOzYMQYOHMjmzZuZNm0aQ4YMASA1NZWhQ4eyZ88ehg4dip+fHwUFBTidTtLS0pRMTzzxBFu2bCE/P1+5586dO3nggQd48803lVydOnXiyJEjQM0KMqgpOn3zzTdEREQA8N133/Hkk0+Snp7OH/7wB6KiooiOjiYzM1MpUPXt25fs7GwOHTrE6dOnad68+TXPjcPhxGq9cM3X3egyGNTfwE+tHttF+JN+vOCK52NaBqim+bpM8yKSqXaeCKbmUZ0eZczsLqaPh+6q580mfYMaxav9Oap1XkTyAM4WlVHi5hVRas+seZTDo4yZRTBl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWjzeC/Py8f/OqswYVrAICAggICKB169YNwVyztmzZQo8ePZTCDkD//v2ZOnUqW7duJTExkaioqDq9qtLS0nA6nfTu3Vs5ZjKZuOeee9i0aRNwsXdVZGSkS48qvV6PTqfjxx9/pFu3bkRGRpKens4TTzyhjHE6nVRWVmKz2Th16hStWrXCYDBgt9u5//77lXHnz58H4NixY8o9bTYbCQkJLpkMBgOAkqlFixakp6fTv39/ZUxJSQlVVVXk5eVRWVnp4rlJkybKf9f2sLp0m8SmTZvSokUL/vrXv+Lv709YWBh33nmnkqW+0hrKXVmNoYGf2jwO7BHJfzdfeVvA/re3Vl3zdRnm5Xow1c4TwdQ8ysETwVSbxxB/b2JaBXAou6jOudjWAQT7ebnFr9qfo9rmRRSvtKyKeWszXLaB7NSmpueU2avhvW7VmFk0U/MoB08EU/MoB08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLGlSwWrJkibt8XJMyMzN5+OGHXY75+fkREhKiFKkSEhJISUlh5MiRpKenYzablWLN4MGDXa6Njo5m0aJFlJeXEx8fj16vx2q1Kn2uYmJiyMvLo2nTpgo/JiaGdevWsWbNGt555x08PDzo3LmzslopMzOTiIgImjVrxpkzZxg5ciRZWVmEh4cTEBBAYGAgubm5ANxxxx1ATV+uXr16YbPZiIuLU3zX3vOmm24iPT2dlJQUJk6ciNlsJiYmBr1ej91uJycnh+joaFq1akVeXh533303FRUVtGrVioKCAgwGA4mJiQBUVlbi5eXF5s2b2bZtG5WVlbz44oucOXOG2267jfDwcBFTp0lTvfTC8C7MWv7LZY9r0qRJkyZ16oprrOr/NzGaVKp5azM4cMLicuzACQvvr8lgwiNdfh9TmjRp0qRJkyZNmjRp0qSp0alBBavfS1ar1WWlUK38/f0pLq7pdzNgwADefvttDh48yJNPPsnJkydZtWoVgEuvqtGjR3Ps2DGcTifFxcWEhYVhMpmwWCz069eP+Ph4UlNTOXfuHDfffLPCj42NBeDo0aM88cQTlJaWsnTpUgIDAyksLFTGhYSEkJubS3V1NRMmTGDTpk3s2rWLW2+9lb179wLQrFkzAHJzc0lMTKR169bMnz9fKTbVsmJiYgDIyspyydSqVSuys7OVcQkJCSxdupSqqiqqqqo4fvw4ULPFYG32wsJCtm3bRlhYGGfOnAHg9ddfp1WrVrz//vsNmh+jUWsq92s1hgZ+avbYKSqYxS/344ttWRw4WUSH1gEM7NmmwVw1ZxbFlNGjjJlFMGX0KGNmdzFPF9g4eJnVVQAHs4s4by2nWZBPvflqf45qnRcRvNMFNpeVVbVyOGF/lqVBc63WzCKZmkd1epQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6lE2XVPB6ueffwagW7duLl//L9WOv55av349JpOJmJgYUlJSMJvNSmGnticUgMPhwOG4uDyvoqKC8vJyQkJC2L9/P1u2bCEmJgaHw8G5c+fw9fUFYPfu3UBNESk1NRWj0UjPnj1JS0tz8XH69GmMRiMGg4E333yT8PBwOnfuTFZWljKmtmDUqlUrNm/ejM1mo3Pnzuzbt8+lD9cvv/wCQNu2bZVMvXr1Ytu2bS733Lx5M56enuh0OoxGI82aNaOwsJDU1FQeeughoqOj8ff3JyoqikOHDqHX63E4HDz44IPs2rWLv/zlL7z33nvodFfvP3E56fU6YQ2ybwQ1hgZ+avb46MBObmNdKjVnFsWUyeNXO06SfuwcN7cNoV9392xh+9HXh9l75Bxd24cytG87tzBBrnkRyVQ7TwRTbR6Pnym96nlbpcMtnxfU/hzVNi8ieNdjrtWW+XowNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LomgpWI0eORKfTsXfvXkwmk/L1leR0OtHpdBw8eLDBRi+Vn5+fSyGnVsXFxfj7+wM1fa569erF3LlzlfMLFixg1qxZfP/99zzyyCNAzbaGH3/8MVOmTMHf39+lEPXBBx8o186YMYPly5fTpUsXADIyMgAYP3680hPL6XTSrVs3SkpK8Pf3p7KyknPnzuHt7c26desU1jfffMPTTz9NYGAggFLk6tOnDy+++KIy7plnnuH7779XMh09elTxUturymq1KgVBf39/cnJyyMnJwd/fn7S0NEwmEwCpqanMmDGD2bNn8/bbb+Pp6cnZs2cZO3Ys7dq1Y/LkyUyePJmsrCyGDx/O1q1bla0Kr0UOhxOr9cI1X3ejy2BQfwM/zaP6eCKYMnnMyivm1dSfqUV8vzuXOZ/8wtSx3YlsXneV7m9RRlYBbyzbo3ydfryAxesPMvnReGIjg+rtVaZ5EclUO08EU60efTyu/kcvZpOewkJbvdig/ueo1nkRwRM512rNLJKpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ9dUsFq8eDGAUgSp/fp6KyoqSunrVKuSkhLOnTunFHIu1+eqQ4cOQM1KpdqCVe3Y8PBwvLy8FO65c+fq3LOyspJWrVoBcPbsWXQ6HZmZmUrBSqfTERISQklJCVFRUWRnZ+NwOLhw4YJLMS06Ohqo2S6w9v5Go1HpaVWriIgIqqqqlEy1njIzM5Vjfn5++Pr6cuHCBVq2bMn27dsBiIyMVOYJalZlAcr2gBaLRenPdekKs9pnlJ2dfblH/5ukNZS7shpDAz/No/p4IpgyeLy0WKUwHfDKhz8x/4W768W8tFh1qWYs3c3CSfVjXioZ5uV6MNXOE8FUm8cQf286tQniwAkLjkt6Vul10CEyiGA/L7f4VftzVNu8iOBdj7lWW+brwdQ8ysETwdQ8ysETwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KomsqWHXv3v2qX18vJSQk8N5777n0svryyy/R6/X06tULqFl5VFVVxdixY9mzZw9ms5mBAwei1+tdVnxVVVXx1VdfkZCQoFxnMBg4dOgQd9xxB1arldjYWKVAVLvCqrS0lODgYObOncvbb7+Nh4cH99xzDyUlJXh7exMREcGuXbuU+wwaNIjCwkLCw8NJTEwELhaRrFYrvr6+fP/99/To0YOysjK6du2KXl9TdazNZLPZ8PPzY+rUqUycOBGz2czgwYOprq4mNDQUk8mk9LHav38/Xbp0Qa/X06ZNG/r27QtAUFCQ8v/e3t4cOHBA6Y3Vo0cP7rnnHgBatGhR7/nReljVVWPYD1XzqD6eCKYsHr/fc6pOsapWdgds23+ahC7X9nNuzQ+ZVz2/4ceTDLqjfr3VZJkX0Uy180Qw1ewxOTGOuavTSc+0KMc6tgni6YfiGvxZQe3PUc3zIoInaq7VnFkUU/OoTo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY/q9ChjZhFMWT3KpmsqWP1a1dXVlJeXK32dfq3S0lK8vLwwGht0mzoaNmwYS5YsITk5maSkJPLz85k1axbDhg1TelM5nU4WL16Mh4cH7777Lvn5+cycORMPDw8OHz7MokWLaNeuHStWrKCoqIhx48YpfKfz4p+HPvXUU3zzzTesWrUKuLgCyel0cuHCBWw2GwkJCcTFxZGamorNZqN9+/Yufp1OJxaLhVGjRpGfn8+///1vAG6//XZlTHV1NXa7HX9/f0aPHs3q1as5ceIEOp3OJVNFRQVWq5VBgwYRGhrK4sWLqaqqUnxdes82bdpw991388svv/DOO+8AcO+99wI1q8H++Mc/snz5crp27QrUrJzbunUrbdu2pUePHvWaG62H1dXVGPZD1TyqjyeCeaN7zDxdd9vYS3Usz8rgPtfWe+rIqeKrnj+UU8QolfVpEcGU0aOMmd3BDAyE15N7k3eulLzzNsKDzYSHXP4zY32l9ueoxnkRwRM912rMLJqpeZSDJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUNqiRNnz6dnTt38vnnn1/2/J/+9Cduu+02Xn755Ybcpo78/f1ZtGgRr732GsnJyZjNZoYMGcJzzz2njDGZTJSVlREUFKRs2We325kyZQo333wzCxcuxGKxEBsby4IFC2jZsiUAPj4+OBwOhg0bhsViYd68eRgMBkwmE5WVlcq2fk6nE5vNxtSpU1mxYgXbt28nMDAQm81G06ZNFZ9Qs53gXXfdxWeffYbNZiMwMJDCwkKaNWsGgMFgoLS0lGeffZadO3eSkpKCt7c3Op0OLy8vl0xVVVW89NJLLF26lLy8PIKCgsjPz1dYtff8xz/+werVq1m6dCkOhwMvLy/Ky8tdimQTJ04kKCiIOXPmADVFsxYtWvDBBx+4bCd4LdJ6WF1eBoP690PVPKqPJ4Ipi8eo5k34/irnbwr3u+aeKu0i/Ek/XnDF8zEtA1TTp0UEU0aPMmYWwfT1NHBrbBhWa1mD+lZdKrU/x8YwLxlZFnLO22gVYqZDA3rwXSp3z7WM86J5VKdHGTOLYMroUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwebwT5+QnqYfVr/fDDD/zhD3+44vn77ruPtWvXNuQWV1R0dDSpqalXPG8wGAgJCeH7779XjtUWrtq2bcvHH3982euqq6sBuO222xgwYIByPDExkUOHDikFJKfTiaenJ8OHD2f48OEAOBwOOnToQFVVFYBSRIqKiuJvf/sbf/vb3wB46623eO+99/Dx8QGgoqICgMGDB/PMM8+4+C0rK3PJ5Ovry6hRoxg1ahQAeXl59OnTh8rKSuVeUNPbasmSJcq1DzzwAMeOHVMKc1BTAAsPD8fHx4fi4mJCQ0Pp2bOn4ru+0vbnvLIaw36omkf18UQwb3SPd8SFs2jDoctuC2jQQ89Oza+ZPbBHJP/dfOVtAfvf3lp1fVpEMGX0KGNmEUwZPaoxc37hBf6xeCelZdXKMV9vI38ffSshAT7usKj656jGeRHNE8GU0aOMmUUwZfQoY2YRTBk9yphZBFPtPBFMzaMcPBFMzaM6eTKpQQWrs2fPKtvVXU6hoaHk5+c35Bb1lt1up6ioyKXP1datWwHqrB46fvw406dPZ8+ePeh0OgDS09OVglVVVRW5ubnY7XbKy8vx8vJCp9NRUVHBG2+8wYYNG7BYLEREROB0OvHw8ADgzJkzCv/ZZ58lLS0NDw8PpVB14ULNSiRPT08APvzwQ3bu3ElWVhZhYWEUFBQo52ozFRYW8uqrr7Jx40ZsNhvh4eEumVq2bElERARvvPEG//nPfzh69CghISGcPXuWmJgYl+zDhw936bN15swZli1bxogRI4iOjm7wHGjSpEnT76mXR9/K9EU7XYpWBn3N8frqheFdmLX8l8se16RJk6bGoF8XqwBKy6p5bdFO/jM+4XdypUmTJk2aNGnSpEmTJk2aNDWwYBUQEEBWVtYVzx8/fvyK/a1Eq7KyEi8vrzp9rvz9/ZVVVAAjRoxg9+7d3HLLLcyePZuVK1fy9ddfk5qaSrNmzZQ+V7UrnYqLi5WClZeXFwsXLmTIkCGEhoaycOFC4OKKqeLimn4nWVlZWCwWkpKSyMjI4KuvvnI5b7fb8fHxYdmyZXTr1o0///nPLF++XCmQXZpJp9OxcuVKRo4cCcCSJUvQ6XQuK7HuuecePvzwQ+Xr/Px89Ho9U6dOdXlGmZmZeHl5ceedd7Jx40aaNGlCVFSU4r++amgj9RtRjaGBn+ZRfTwRTJk8RrcI4MMX+5G2L48jp4ppF+HPHZ3DG8TsFBXM4pf78cW2LA6cLKJD6wAG9mzTICbINS8imWrniWBqHtXpUa2Z9x0/X6dYVavSsmoOZhcSF9W03ny1P0e1zotIngimjB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnqUTQ0qWPXu3ZuVK1cyaNAgOnTo4HIuIyODjz/+mPvvv79BBusrnU7H0KFDOXz4sEufqy1btriMO3PmDE6nkzlz5hAQEMD+/fv57rvvcDgczJ8/n+LiYmJjY3nqqad4++23Xa6tqqqiTZs2rF+/HqPRSP/+/fniiy/Izc2t4ycwMJA5c+YQHh7OsGHDWLlyJdnZ2cp5h8NBaGgomZmZpKenEx8fj5eXF5mZrttPVVdXExUVxcqVKzGbzQwbNoxly5Zx/PhxZcwLL7xAREQE8+fPJz8/H51Oh06n49ixY3Tt2hWAo0ePUlhYCMDGjRsBKCkpYe/evXzzzTd15vO3Sq/XERhorte1MqgxNPDTPKqPJ4Ipk8dBd7Z1C+dSPTqwk9uZINe8iGSqnSeCKZvHMa9+SUFxBcEBnnz4d/d91lRzZncw8yw5Vz2fW3CBhFta1ZtfK7U/R7XNy/XgiWDK6FHGzCKYMnqULfNHXx9m75FzdG0fytC+7dzGle05iuCJYMroUcbMIpgyepQxswimrB5lUYMKVuPHj+eHH35g6NCh3H333dx0001ATTHku+++IygoiPHjx7vF6LXKz88Pk8lUp8/V559/jr+/v/J1s2bNaN++PQEBAcp1drsdgAkTJpCYmAjAxx9/jE6nU641GAzY7XbeeustYmNjFd7WrVuVlVO1YyMiIpSiENSsuFq5cqWyOs3X15fy8nKee+45xowZo4z785//TGZmJqdOnSIiIgIvLy9sNhsrV650yfDpp59y7tw55Wu9Xs+jjz7Ko48+yqRJk9i/fz89e/Zk5syZJCYmYjAYmDlzJk2bNiUmJkYpxD344IP07t2b0aNH43A40OuvvRLscDixWi9c83U3ugwG9Tfw0zyqjyeCKaNHGTOLYMroUcbMIpju5L332T627T+rfH2+qIJBf11DQudmPP5g/QvJas7sTmZ40NX/0dSiqQ+FhbZ6sUH9z1Gt8yKSJ4Ipo0cZM4tgyuhRtswZWQW8sWyP8nX68QIWrz/I5EfjiY0MUoVHUUy180QwZfQoY2YRTBk9yphZBFNWjzeC/Py8f/OqswYVrMLCwvjvf//Lm2++yTfffMOmTZuAmgLMoEGDeO65567a40qkoqKi6qxOKikp4dy5c0RFRSnHMjMzefjhh12ug5oVUZden5mZSXh4OF5eXgA0adLEZTyA0+mktLSUsrIyysvLadWqFTqdThlbq9pCVUlJCVBTJAPqPKvS0lLl3hEREfj6+lJVVeVSrCopKaGiokJhXUkdO3Zk0aJFWCwWQkJCyMrKoqCggK1bt9KtWzdl3Mcff8zHH3/M+vXr693HSmsod2U1hgZ+mkf18UQwZfQoY2YRTBk9yphZBNMdvEuLVZdqy74zjBlQv9Xhl0qNmd3J7NA6CF9v42W3BfT1NhLbKtAtftX+HNU2L9eDJ4Ipo0cZM4tgyuhRlsyXFqsu1Yylu1k46e4GsUGe5yiSJ4Ipo0cZM4tgyuhRxswimLJ6lEUNKlgBhIaG8sYbb+B0OrFYLAAEBQWh0+kabK4hSkhIICUlhZEjR5Keno7ZbCYmJga9Xk+vXr2UcVarVSkYAcTHxysrnpYuXcrixYuJiYkhLy+Pfv36KePCw8PZt28fTz31FHv27MHDw4POnTtz4ULN6qLi4mLCwsLw9PQkNzeXBx98kKysLMLDwwkICMDHx4eqqioAZWXawoULmT59Ojabjbi4OPbt26ewAEJCQjh79mydTDqdzqXXFcAnn3zCBx98QHZ2NkajkXXr1uHr60tgYCAAkyZN4tlnn73ss/v73/9OeHj9+7xoPazqqjHsh6p5VB9PBFNGjzJmFsGU0aOMmUUw3cUb//bmq56f+O4PvD3+znqx1ZpZBHPaY92ZtvAnl6KVr7eRaY91b/BnOLU/RzXPiyieCKaMHmXMLIIpo0eZMq/5IfOq5zf8eJJBd9Sv96tMz1EUTwRTRo8yZhbBlNGjjJlFMGX1KJsaXLCqlU6no2nT+jdpdrcGDBjA22+/zcGDB3nyySc5efIkq1atol27di4rmex2OwsWLOD//u//APD09KRLly6kpaXRokULRowYQWpqKufOnaN///7KdbUrq37++Wcef/xxSktLWbp0Kb6+vsrKKABvb28KCwsJCQlhwoQJbNq0iV27dhEREaGMqX1u+/btIzExkdatWzN//nwqKytdMrVs2ZL9+/fXyRQcHKwUtQ4dOsTf/vY3Dh06xKBBg/D29ubYsWP88MMPDB8+HKOxZspre1RNmDCB2267DYDk5GRsNhubN2/m0Ucfrddz13pYXV2NYT9UzaP6eCKYMnqUMbMIpoweZcwsgtlQXmFp1VXPW0qqGvwZRG2ZRTADA82smD6QPYfPcuikhZjWQXRtH+omdzVS+3NU47yI5olgyuhRxswimDJ6lCHzkVPFVz1/KKeIUSp7nxbBVDtPBFNGjzJmFsGU0aOMmUUwZfUoi66pYDVnzhx0Oh1PPfUUer2eOXPm/M9rdDodycnJ9TZYX61fvx6TyURMTAwpKSmYzWZ69erF9u3byc/PV4pWBoOB6uqLf2FaUVGhrJgqKiri7bffJiYmBofDwYYNG5TizpkzZwC45ZZbSE1NxWg00rNnT9LS0lx6XVVVVWE2mzEYDLz55puEh4fTuXNnjhw5QufOnZUxUFNE2rx5Mzabjc6dO/PLL79QWVmpsGr7VP0609atWwkKCgIgODiYnJwcvL292bhxIzqdDqPRSEREBKdOnarznFq3bk2XLl2AmmJdYGAgGRkZ9X7uWg+ry8tgUP9+qJrHhvMysizknLfRKsRMhwbs0X6p1J5ZBPNsURkl5Xb8vAyEBDT8Db4xZNY8qtOjjJlFMN31PR3o63HVolVQE49691+ScV6imzeha/tQrNayBvWtulRqf46NYV40j+r0KGNmEUwZPcqUuV2EP+nHC654PqZlgGrep0Uw1c4TwZTRo4yZRTBl9ChjZhFMWT3eCPLzE9TDqrZg9cQTT2AymVRdsNqyZQu9evVi7ty5yjGr1Ur37t3ZunUriYmJAHTp0oWAgABlzO7du7HZbOh0Ol599VVl3IwZM5QeXXCxYDVp0iRiYmKAmh5WN998MyaTCS8vLyorK7HZbISGhrJu3Trl2q+//prk5GSCg4MBOHHiBAD3338/SUlJyrhHHnmEX375RVnNdf78eQDeffddpYhltVrp1q2b8nVZWRk2m413332Xfv36MWnSJPbv388f//hHZs2aRWVlJSaT6bLP7Ntvv+WVV15h48aN1/Ko60jbn/PKagz7oWoer135hRf4x+KddbZX+vvoWwkJ8HGHRdVlFsEsLati3toM9mdZlGOd2gSRNLgjZi+P393f9WBqHuXgiWCq0aO7v6fffKY3j8389orn/5Xcu8HPQIZ5Ec0TwVQ7TwRT8ygHTwRT8ygHTwSzobyBPSL57+YrbwvY//bWqnufFsFUO08EU0aPMmYWwZTRo4yZRTBl9SiLrmkzxUOHDnHw4EGl4HHo0KH/+b+DBw8KMf6/lJmZqRR6auXn50dISAiZmRc/RCUkJLBt2zasVqtyHVCn11V0dDR5eXlKr6ji4mKMRiMbNmxQxlRXV+N0OpU+UdnZ2TidTs6ePasUpQDlXrXbAmZnZ+Ph4cHmza69GWw2GwaDQRlXVFSETqfjq6++UsY4nU50Oh0hISEu/tu0cd0bOjo6mqqqKnJyclyOT5s2jdjYWHr06MGkSZP49ttviYuLu8JT1aRJ0+X062IVQGlZNa8t2vk7OWqcmrc2gwMnLC7HDpyw8P6a+q/61KRJ0+8nEd/Td8SFXdNxTZo0adKkSdP10wvDu1zTcU2aNGnSpEmTpl+r3j2sKisr+eGHH2jRooWywkhNslqtVFVVMXbsWPbs2YPZbGbw4MH4+fkp/Z4Ahg0bxpIlS0hOTiYpKYnt27cDEBcXxyOPPILFYiE2NpaCggKcTifFxcV4eXlRWlpK27ZtmTdvHgsWLMBkMhEQEIDdbleKR7X3adq0KYMGDcLhcBAYGEhFRQWAso2f1WqladOm7N69m5tvvhmHw0FoaCinTp3CYDAoXmvvOXXqVF555RW8vb3x8fFBr9cTHh7ucs/333+fbdu2cf78eQwGg7LCKycnh+joaA4ePEiLFi3o06cPLVu2ZOHChaxevRqAt956q0HPvqENu29ENYYGfprH+mnf8fN1ilW1Ki2r5mB2IXFR9e/vp8bMIpinC2wuqzBq5XDC/iwL563lNAuq32o1tWYWyRPBlNGjjJndxRT1Pf1/g+P4v8FxPPefzRRYq2jq58Fbf76z3j5rJcu8iOSJYKqdJ4KpeVSnRxkzi2DK6FG2zJ2igln8cj++2JbFgZNFdGgdwMCebf73hf9Dsj1HETwRTBk9yphZBFNGjzJmFsGU1aNsqnfBysPDg/Hjx/PSSy+psmDldDr55JNP6NChA7NnzyY/P5+ZM2ei17u+WPz9/Vm0aBGvvfYaycnJ6PV6dDod+/fv5/nnn6d9+/YsW7aMvXv31uHn5uYSFBSE3W6npKSEs2fP4uPjg5eXl8vYgoICWrRowfnz5ykuLqaysrKOX5vNhpeXF35+fhQUFHDu3Dk8PT2x2+0u9zx16hTBwcFcuHBB2f7P29u7zjZ/a9asUf67urpaKUbV9rFq164dbdq0YePGjVgsFhyOi0sUa7c7rI/0el2DG57fyGoMDfw0j9emPEvOVc/nFlwg4ZZW9ebXSk2ZRTCPnym96nlbpaPBP1vUlvl68EQwZfQoY+aGMkV/T6dOHVDva6+mG31ergdPBFPtPBFMzaMcPBFMzaMcPBFMd/IeHdjJbaxLJdtzFMETwZTRo4yZRTBl9ChjZhFMWT3KonoXrHQ6HZGRkRQWFrrTj9tkMpmoqKhgzpw5So8qu93OlClTMBpdY0dHR5OamgrAokWLeP311xk9ejRjxowB4JZbbqF3794UFRUpvaKcTidWq5UNGzYoWw+mpaUxbtw4nE4ngDI2KiqK9evXK/dLSkri+++/V84bDAZKSkp49dVXeeSRR4Ca7f969erlUogymUxUVlaydu1aJdNHH33kkqmWedddd/H+++8r1w4YMIDjx4/To0cPAFq3bs2CBQuorKzkgQceICkpiRdffBEPDw8yMjIYMKB+vwhyOJxYrRfqde2NLINB/Q38NI/144UHXf0NqEVTnwY1s1djZhFMHw/dVc+bTXpVNWn+YW8eR3OLaRfhzx2dwxvMU+u8iOSJYKqdJ4KpVo8iv6fB/bnPFpVRUm7Hz8tASEDD/2Gh1nkRyRPBVDsPtNeOLB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53Vu2AFNYWXmTNncv/999fpF/V7y2Aw4OvrqxR2AHr37g1w2RVOtaqurtnaq1Oni38RZDKZaNGiBSUlJcrqKafTiaenp0vunj17otPpqKqqAqBZs2ZA3X5SMTExfP/99/j41GyFU7tFYM+ePZUxAQEBBAUFUVZWdk2Zav21a9fO5Z7R0dEcP35c8VSrBQsW4OfnR2JiIi+++OIVn8u1SGsod2U1hgZ+msdrU4fWQfh6Gy+7LaCvt5HYVoFu8aqmzCKYIf7edGoTxIETFhzOi8f1OugQGUSwn5cqmjRnnbby+pKd1H7m+H53Lgs+P8DLo2+ldZhfg9ju8iiaKaNHGTM3lBni701MqwAOZRfVORfbKsAt39PQ8NylZVXMW5vhsn1hpzZBJA3uiNnL43f3dz2YMnp0B0977cjpUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdSggtXevXsJCAhg0KBBdO/enRYtWtTZDg/g5Zdfbsht6iW73U5hYSEjR44kPT0ds9msbF346+3zLlXtSqXly5cza9YsLBYLMTExZGdnY7fbKS8vx8vLC51OR0VFBY899hh79uzBw8ODzp0743Q68fCo+Udq7dZ6GRkZPPjgg2RlZREeHq48owsXalYieXp6AjB16lQOHz6MzWYjLi4Oi8Xi4vW3ZCovLwdg9erVrFmzhvz8fF544QUyMzMVT9HR0QDk5eWRkpJC27ZtiY+PB6CqqkrpraVJk6bfpr+PvpXXFu10KVr5ehv5++hbf0dXjU9Jgzvy/hrXX/51iKz55Z9adGmxqlZ2B0xftJP5L9z9+5jSpEml0l1pkdXVF19dV81bm8GBExaXYwdOWHh/TQYTHuny+5jS1CikvXY0adKkSZMmTZo0adKkyf1qUMFq6dKlyn9v3779smN0Ot3vUrCqqKjA6XRy8OBBnnzySU6ePMmqVaswmUzKKiqA0aNHk5eXx6ZNm4CaIpJOp+Pnn3+mX79+xMfHk5qaSklJCQDFxcVKwUmv1/Pzzz/z+OOPU1paytKlS/Hw8ED3/39DU1xcDMDp06fx8fFhwoQJbNq0iV27drmct9vteHh4sHXrVhITE2ndujXz58/Hbrcrq69+a6Za5rlz5+jYsSP5+fl89dVXZGVluZyfOXMm69evp7KykpCQENq3b89///tfzGYznTt3btCzNxq1pnK/VmNo4Kd5rL+aB/sy9693ceCEhexzNlqFmOkQ6Z7Cr1ozi2D6+3rywoh4zhWVYXXz9kru8Pf9nlN1ilW1sjtg2/7TJHRpUS+2mudFFE8EU+08EUy1ejxdYOPgyaLLnjt4sojz1nKaBfnUm+8uj5cWyGvlcML+LEuDPKp1XkTyRDDVytNeO/J5lDGzCKaMHmXMLIIpo0cZM4tgqp0ngql5VKdHGTOLYMrqUTY1qGB16NAhd/kQIoPBQExMDCkpKZjNZnr16sXWrVtdttlzOBzY7Xbl6+rqapxOJ927d2f//v1s2bKFmJgYysrKlKIVQFlZGQ6Hg27dupGamorRaKRnz56kpaW5jAMIDw/HYDDw5ptvEh4eTseOHcnIyFDOX7hwgaqqKnr27MnmzZux2Wx07tyZ3bt343C4/nb0t2QCmDRpEitWrAAgNzeX559/npkzZyrn7XY7+fn5eHh48MMPPxAWFgbA448/Trdu3er7yNHrdQ1qon6jKvdcKUcO5hMebCY8xNdtXFmbDKrVY69AM73cQqortWYWwRT1M6Sh/jJPl1z1/LE8K4P7tLvqmP8lNc+LKJ4Iptp5Iphq83j8TOlVz9sqHW75Xle7R7XNy/XgiWCqjae9dsTwRDDVzhPB1DzKwRPB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiBhWsanXkyBE2b95Mbm4uABERESQkJNTpo3Q9pdfradGihcsqMKvVSrdu3bBarcqxJUuWuFxXuwJp4sSJ3HzzzcrxsWPHsm3bNvz9/YGawlaTJk1YuHChMsbpdNKxY0eleFTbo6p37968+uqryrgVK1aQkZGh9Lqq9fP2228rfID777+fU6dOXVOm2uvvvPNOxo4dS/v27Rk7dqwyF7Xnv/rqKwA+++wzQkNDAZRCldVqxc+vfr1YHA4nVuuFel17I6q0rIqU1emkZ178K9y4qCCefigOs3f9+xsYDHI2GVS7Rxkzi2CqlRfVvAnfX+X8TeF+FBba6sWWcV5EMNXOE8FUq0cfj6vv+2c26ev9/QLq96jWeRHJE8FUK0977cjnUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUZ0eZcwsgimrxxtBfn7ev3nVWYMKVpWVlUyZMoU1a9bgdDrR62tu6nA4ePPNNxk0aBDTp0+/as+o6yndFZsp1G/slcY4nU63+bhWVlRUFACZmZnKf9d+7eHhQcuWLYGL/bUGDhzocv0777zDO++8w759+5TeWtcqraHcRc1dlV6nv0FGloV3V6W7pb+BrE0G1e5RxswimGrj3REXzqINhy67LaBBDz07NW+wXxnnRQRT7TwRTLV5DPH3plObIA6csOC45KOMXlfTmy7Yz8stftXuUW3zcj14Iphq42mvHTE8EUy180QwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoQQWrf/7zn3z22WcMHz6cRx99lFatWqHT6Th58iRLlixhxYoV+Pv789JLL7nL72+Ww+EgLy/PZbXQl19+CVBn9dDx48eZPn06e/bsUY599dVXSi+nqqoqZfvD2h5WRqOR8+fP88Ybb7BhwwYsFgsRERHY7XZlZdWFCzUrjXbu3Mmzzz5LWloaHh4e+PrWbAnn4eHh4uedd95h586dZGVlERYWRl5enkvBqjbTq6++ysaNG7HZbISHh7swWrZsSWRkJJ9++inLli0DYM6cOTRp0oTbbrtNKR726dOHtLQ0AEwmE+Xl5dRujThhwgTFW32k9bCqkdbfQD6PMmYWwVQzb+rY7rzy4U8uRSuDvuZ4Q372yTgvIphq54lgqtljcmIcc3+1yrhjm5pVxg39rKB2j2qeF1E8EUw187TXjlweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6lE0NKlitXbuWwYMHM2XKFJfjUVFRTJ06ldLSUtauXfu7FKx0Oh0mk4nk5GSSkpLIz89n1qxZ+Pv74+19cQ/JESNGsHv3bm655RZmz57NypUr+frrr1mwYAEhISG0a9eOFStWYLO5buvh4+ODl5cXCxcuZMiQIYSGhirbA/662HP8+HHOnz9PUlISGRkZynZ8l7J8fHxYtmwZ3bp1489//jPLly/Hbrcrq9ZqMzmdTlauXMnIkSOBmi0NdTodRuPFqRw3bhx///vflWJW06ZNycnJcdniMCQkhKqqKkwmE3/961+Jjo5m9OjRQM2WhUlJSfV67loPq4vS+huI4Ylgqp0ngimjR3fwAgPNfPbPwXz900n2Hj3HzW1D6Ne9tRvc1UjGeRHBVDtPBFONHgMD4fXk3uSdKyXvvM3tfRxB/R7VOC+ieSKYauRprx05PcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLCqrq52KYL8Wl27duW7775ryC3qLX9/f/r27cupU6dITk7GbDYzZMgQ1q1b59In6syZMzidTubMmUNAQADZ2dl8/fXXOJ1O5s+fT3FxMbGxsYwaNYoPPvhAudbPz4/c3FzatGnD+vXrMRqN9O/fn88//5xz584pHmoVGBjInDlzCA8PJzExkVWrVmGxWBRWZWUloaGhZGZmkp6eTnx8PEajUekLBtCkSROKioqIiopi5cqVmM1mhg0bxrJly1zGFRYWYjKZlGJXeXk5I0aMYOXKleTn5xMWFkaTJk0AePjhhxkxYoRybUhICPn5+fV+7loPq4vS+hvI51HGzCKYaucBdI8JpV/31litZQ3qw1MrGedFBFPtPBHM5ZsOc+hkER0iAxjWr70bHLrfo6+ngVtjw9z2/QLq99gYXjsyehSRWXvtyOHxbFEZJeV2/LwMhAS455cPas8sgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vBHk53edeljdcccdpKWlMXz48Mue/+GHH+jVq1dDblFvRUVFUVhYSGpqqnKspKSEDz/80KW3U7NmzWjfvj0BAQHKdVDTO2rChAkkJiYCMHPmTMLDw/Hy8gJqikx2u5233nqL2NhY5Zr169dTWFgIoGyRGBISwsaNG5V7fvPNN6xatYpTp04B0Lp1a6qrqxk+fDhPPfWUMu6RRx4hOzubU6dOERERQUBAAEVFRaxcuVIphpWUlLBs2TKlSAawZcsWevfuzdy5c2nfvj1jx45l6NChLF++nK1bt5KYmMhNN90EgNl8cYXP4cOHGTp0KOfPn2/Ak9d6WNVK628ghieCqXaeCKaMHmXMLIIpo0c1Zt51+Czvrt6vfH3iTAnrf8zhz0M60eWmUHdYlOI5NjaeCKaMHmXMLIIpg8fSsirmrc1w2ea7U5sgkgZ3xOxV/y3UL5XaMl8PpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNWgzxfHjx3Pq1CmeeeYZtm/fTm5uLrm5uWzbto3k5GTy8vIYP348RUVFLv9zh44fP87YsWPp0qULvXr1YtasWVRWVirnExIS2LZtG1arVTn25ZdfotfrOXHiBHfddRedO3fml19+UXpKAcTHx+Pr64u3tzfvv/8+Xbt2pVu3bnz00Uf06NFDGdesWTOgpj/Vgw8+SFxcHHfeeScVFRWUlJRQXl6OyWTCbDZTVVXFG2+8Qa9evejSpQtTpkzBZDIphaHWrWu2k8rKylIy9ejRg/37a34RlZmZCUBwcDA6nY41a9Zw3333ERcXx8CBA9HpdC7PNTMzk4iICF588UUA3nrrLV5++WWaNm2qsBISEtDpdHz44YfEx8fTqVMnEhISSE9Pp0WLFm6ZI02QNLgjHSKDXI51iKz5R64mTZo0adJUX11arLpU//n08sc1adKkSdO1a97aDA6csLgcO3DCwvtrMn4nR5o0adKkSZMmTZo03dhq0AqrAQMGAHDkyBG++eYbl3NOZ82SkoEDB9a57uDBgw25LcXFxYwePZrIyEhmz55Nfn4+M2fOpLy8XOmnNWzYMJYsWVKnh1VcXBwLFy5k4sSJtG/fnjFjxrBmzRqeffZZWrZsiaenJ48//jhvv/02Z86cISkpie+++4709HRycnIUD7VFnenTp9O7d2/69u3L4sWLXTx6eXlhNpvJz89nyZIljBo1irNnz7Ju3TqMRiMFBQXAxZ5Xa9eupXXr1jz55JN89tlnykqt4uJi5Z67d+/mH//4Bw888IByT6fT6VKYKy4u5osvvqC6uhqoKcLt27ePoqIihRkcHMytt97Kzp07lf5ctVsBXroCrT5qaCP1G0n+vp68MCKec0VlWN24jYisTQbV7lHGzCKYaueJYGoe1elRrZmXbjx01fMff3uU4ffWf3tAWZ5jY+KJYMroUcbMIpiyeDxdYHNZWVUrhxP2Z1k4by2nWZDP7+pRJE8EU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomxpUsEpOTkanu3qfHhFauXIlNptN6TsFYLfbeeWVV0hKSiIsLAx/f38WLVrEa6+9pvSweuihh/j000957LHHGDNmjMLT6XQsWLCAadOmARAREaGcS0lJITY2lkmTJjFjxgz27dtH586dMRgMQE2fql27drF3717uu+8+jh07xt69e5XrjcaaRxwUFMTixYsJDw/npZdeYsaMGZw5c8Yll16vp7i4mJSUFOLj4+nbty8ffPCBUrAymUzodDqCg4P5+uuvMZvNjBw5kg0bNriwHA6Hy7Z+O3bsUP67dhtCgI4dO5KRkYHRaOTChQsEBgZSUFDAtm3bsNvtSsZrkV6vIzDQ/L8HSiZRz0TWJoNq9yhjZhFMtfNEMDWPNz4v91wpRw7mEx5sJjzE939fcBkdySm+6vlD2UVued9R83MUxVQ7TwRTJo/u+P67ktSaWSTzRvd4/EzpVc/bKh3az1qV8EQw1c4TwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KogYVrJ599ll3+bgmbdmyhR49eijFKoD+/fszdepUpUcTQHR0tEsPq+3bt7No0SL69++vHAsKCqJFixZs2bJFOfbDDz9gNBoZOXIkEydOBGpWjKWkpLB582Y6d+6s9H76v//7Px5//HHl2ldeeYW9e/dSWlpKWFiYcnzdunVK3ymAOXPmUFJSAqAcj4uL46OPPlLG7N+/nw8++EBZ2aXT6ZTCXL9+/ZRxhw4d4vTp01RWVmIymfDy8sJut7Nv3z6XgmLHjh2VVV1HjhwhNTWVlJQU7r77bmXME088wZYtWygpKXF5vr9VDocTq/XCNV93o8tgUH8DP82j+ngimDJ6lDGzCKaMHt3JKy2rImV1OumZF/9aPy4qiKcfisPsfW19UNq19OfEmZIrno9pFUBhoa3eXtX8HEUx1c4TwZTJozu//0R5FMUTwZTFo4/H1f8w02zSaz9rf2eeCKbaeSKYmkd1epQxswim2nkimJpHdXqUMbMIpqwebwT5+Xn/5lVnDSpY/V7KzMzk4Ycfdjnm5+dHSEiI0qPpSteB65Z3UVFR2Gw28vLyKC8vx8vLi6NHj1JdXe0yTqfT0aZNG4VRW7Dy9natlpaXlwOQm5tLdHQ03t7eGAwGl2KV0+mkoqJCKSa1atXqsqyzZ8+6ML28vICa7fwuVUVFBQA5OTlER0crBatLi1UlJSVUV1crrGPHjgEQGxtLdXU1VVVVZGRkkJ6eDkBBQUG9ClaA1lDuKmoMDfw0j+rjiWDK6FHGzCKYMnp0B2/uqvQ6fVAysiy8uyqdCY90uSbWsL7t+OrnU1c8/8e727olvxqfo2im2nkimDJ4dOf335WktszXg3mjewzx96ZTmyAOnLDgcF48rtfV9KQN9vPSftaqhCeCqXaeCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpXVasXPz6/OcX9/f2X7vCtdZzKZ8PT0VI4lJCTw7rvv4nQ6lb5TZ86cQafT0atXL5frjUYjW7dupUuXLkrfqb179zJixAgAqqqq2L59O3Cx71TTpk05duwYb7zxBhs2bMBisRAREUF5ebmy5V7tVn9ZWVk8++yzpKWl4eHhgb+/Px4eHjgcNS/uli1bAvDRRx8xZcoUsrKyCAsLU7YDrL1nkyZNOHXqFK+++iobN27EZrMRHh6OTqdTeovV9uBKT0+vs1JOp9MRHh5+9Um4irQeVnXVGPZD1TyqjyeCKaNHGTOLYMro0V08EX1Q/jK0M29/su+yxxv6PqzW5yiSqXaeCKYsHrU+RJrHhig5MY65v1qd17FNzeo87Wft788TwVQ7TwRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBSt3atiwYXzwwQdUVFTw008/UVlZSVFREdHR0S5b+o0YMYKdO3fi5+fH7Nmz+emnn5g3bx7r1q2jY8eOtGvXjhUrVtQpmEVERPDzzz+zcOFChgwZQmhoKAsXLkSn07msgNLr9Zw5c4aysjKSkpLIyMjgq6++wmQyKWNqVzytWrWKbt268ec//5nly5dTVVXlcs9mzZpx6tQpVq5cyciRIwFYsmQJer0evb7mm6VTp0506tRJ6fu1d+9edu3aRVVVFWFhYXVWe/1WaT2srq7GsB+q5lF9PBFMGT3KmFkEU0aPDeWJ6IPS9/Y29L29DQvW7ueXw2fp0j6UcQ92aojNOlLbc7weTLXzRDBvdI9aHyJxTBk8BgbC68m9yTtXSt55m9b/TKU8EUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoURas/Pz8lP5Pl6q4uNhl673LXVdZWUlFRYWyysrf35/Ro0cze/ZsXn75ZcxmM0FBQbRt29bl2tpVTN27d6d37940b96cefPm4XQ6mT9/PsXFxcTGxvKPf/yD5557TvHRpEkTHA4Hbdq0Yf369RiNRvr3789XX32F3W5X+N7e3pSWlhIYGMicOXMIDw9nzJgxpKamKlv+1TKDgoLIzMwkPT2d+Ph4goOD2bdvn3Lex8cHh8NBVFQUK1euxGw2M2zYMFasWKEUtwwGA++99x7vvPMOq1ev5uzZszRt2pSCggLOnDlDeno6cXFx1zw3Wg+ry8tgUP9+qJpH9fFEMGX0KGNmEUwZPZ4tKqOk3I6fl4GQgPp/2BTZB2XoXdGMe7ATVmtZg3qpXCq1z4sIptp5IpiyeNT6EGke3cHz9TRwa2yY9rNWZTwRTLXzRDA1j+r0KGNmEUy180QwNY/q9ChjZhFMWT3eCPLzu8F7WEVFRdXpVVVSUsK5c+dc+k5d7jqArKwsYmJilOOlpaW0aNGCb7/9FoAXXniBI0eOuFwbFhZGfn6+cl2rVq0wGo1UV1czYcIEEhMTARRG7b1qVzS99tprdOvWTeHt3r2b06dPK1/XFtA2btyoHLNaraSmplJaWvOXoREREQD06dOH119/XRn38ssvs2/fPuVetSu3VqxY4dKHau3atS6rsUJCQpg+fTpjx47lgQce4K677mLUqFEAZGdn16tgBVoPq6upMeyHqnlUH08EU0aPMmYWwZTBY2lZFfPWZrhsI9apTRBJgzti9vK4Zt716IMiw7xcD6baeSKYN7pHrQ+ROKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHuXgiWBqHtXJk0mNcjPFhIQEtm3bhtVqVY59+eWX6PX6On2nLlV8fDy+vr5s2LBBOVZVVcVXX31FQkKCC//QoUOcOHFCOXbkyBGqqqq48847gZq+U7fffjseHh4uxbP169cTHR2tFJdqe20dPXpUGVNcXMzp06eprKykvLwcqOmPVVZW5pKptnhV23eq9v9PnXJttH7gwAEAcnJyADCbzS7HoaZIV1JSoqzWuvS5HTlyhOTkZJfjtf2yNGnSpEmTJtk0b20GB05YXI4dOGHh/TUZ9WYmDe5Ih8ggl2MdImuKYJo0aRIr7ftPkyZNmjRp0qRJkyZNmhqHGuUKq2HDhrFkyRKSk5NJSkoiPz+fWbNmMWzYMJe+U3/84x85fPgwOp0Os9nM4MGDGTduHCkpKQQFBSl9p4qKihg3bpxy3b333kvTpk0ZOHAgAC1atKCkpISoqCg6d+7s4iMtLY0PPviA5cuX06pVKw4dOsRbb73l4len0/Haa68xffp0goOD8fHxwdvbm8rKSoqLi/Hy8sLDo+Yvtnv06IFer6dly5bk5+cTHh6uFJlq+2Pt2LGDjh074uPjQ3h4uFIMqz3v7e2Nt7c3L774IgMGDGDBggWYTCZCQ0OxWC7+Au5Pf/oTR48eJTExkXHjxvHLL78A0KVLF5ec16qGNiC+EdUYGvhpHtXHE8GU0aOMmUUwZfF4usDmsrKqVg4n7M+ycN5aTrMgn2vm+vt68sKIeM4VlWF1wzaDtZJlXkQz1c4TwZTJo6jvv0u9qS2zSKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNjbJg5e/vz6JFi3jttddITk7GbDYzZMgQnnvuOWVMcXExBw4cQKfTMXfuXPLz85k5cyaDBg3imWeeYeHChVgsFmJjY1mwYIHLiqLU1FSKioqIjo7m5MmT5ObmAris3qqqquI///kPRqMRk8lEeXk5R44coV27dvTv318Zl5ubi9PppF27dpw+fZqCggLy8/N56KGHWL16tTKuqKgInU5H69atycnJ4eTJk5hMJiIjI5UxNlvNfunNmzfHbrdTUFDAkSNHuP3229m2bZvLMwoNDaVr164sXLgQnU6Hh4cHDz/8MB988IFLhsrKSpfeVgB/+9vf6js16PU6tzSuvlHVGBr4aR7VxxPBlNGjjJlFMG90j8fPlF71vK3S0aD3OVHvkTf6vFwvptp5IpgyeRT5GVWtmUUyZfQoY2YRTBk9yphZBFNGjzJmFsFUO08EU/MoB08EU/OoTp5MapQFK4Do6GhSU1OveH7lypV4eHjw3XffKX2c7HY7r7zyCt999x1JSUmXva6iooL333+fcePGMWHCBAAqKyu5+eab2blzpzJu48aNHD16lKCgIBITE5k4cSJpaWmMGzeOffv2KSuUdu/eDcAnn3yi9Kn661//SlpaGjqdDn9/f86cOUNpaSlxcXF8+umnQE0Bq0+fPpw4cYIuXboAkJaWBsCkSZO4//77Afjoo4+YNm0aUFPIg5ptCC9cuEB4eDi33norERER7N+/H6fTqYzJzc3l0KFDvP3228ycOZMxY8bw2muvATWFLJvNpmwteC1yOJxYrReu+bobXQaD+hv4aR7VxxPBlNGjjJlFMBuDx/lr93M4p5jYVv6MG9SpXgwfD91Vz5tNegoLbfVig5zzIqNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ422YPW/tGXLFnr06KEUqwD69+/P1KlT2bp1K4mJiZe9bvfu3ZSWlrqskjKZTISEhJCVleXCv+mmmzh27BhRUVFAzQqsgIAANm/eTOfOnamsrFT6YGVlZRETEwPAgAED+PzzzwkLC8PLy0spRF3aXyogIICePXvy/fffK/z09HR0Oh35+fkumaZMmQKgjIuKiuL8+fMsXLiQjz76SCnsZWZmKmNOnTpFVVWV0ruqtlgFMGrUKG6++WY+/vjj3/Ko60hrKHdlNYYGfppH9fFEMGX0KGNmEUw1etyanseCLw4pX58tLGPz3jP834Ox3N6h+TWxQvy96dQmiAMnLDicF4/rdTU9b4L9vNySX4Z5Ec0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jHDwRTM2jOnky6YYtWGVmZtK3b1/Gjh3Lnj17lB5WISEhZGZmXvU6gO+++46nnnpK2TawVatW/Pzzz5w9e5bQ0FAyMzMxGAwAvPLKK8ycOZN77rmH1q1bK4zs7Gzsdjsmk4mxY8dSWlpKeHg4gwcPBiA2Nla5p6+vr7K9X3l5OV27dsXDw4Pq6mruvPNOAE6cOEFoaCizZ8/mrbfeUjJ5enri7e1NREQEAPv27cPpdFJZWcnw4cPx8PDAZDKRlpbG008/7XLvy+mVV14hLi6uIY9fkyZNmjRpum66tFh1qeatPXjNBSuApMEdeX9Nhksvqw6RQSQN7lhvj5o0adKkSZMmTZo0adKkSZMmTZqurhu2YFVcXMz69evp0KEDs2fPVnpY6fV6iouLr3id1WrFYDAwd+5cJk6cSPv27Vm2bBmbN28G4Nlnn+XZZ58lNzcXi8WCv78/M2fOpLy8nDfeeIPz589z+PBhxQPUbLFXVFTEn/70J6xWK++88w4APXr0UO4JoNfrMZlMPProo3zxxRdkZWWh0+mU7QWLi4vx8PCgvLycu+66i+joaJYsWUJlZSXx8fFKhtzcXAwGAyaTiaFDh7J+/XrOnj2Lv78/w4YNA2q2DYSa1VgzZswA4JFHHgGgY8eOdOxY/1/KGY1aU7lfqzE08NM8qo8ngimjRxkzi2Cq1eP7a9Kvev7D9Qd44sFr2x7Q39eTF0bEc66oDGu5HT8vAyEB7tl/WpZ5EckTwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsumGLVjVrjCaM2eOSw+rKVOmUFZWdsXrqqursdvtPP7444wZMwaAW265hTvvvBOLxYLBYCA5OVnZvm/JkiW0b98eqCkCjRs3Dp3Otf/FTTfdxKBBg1i+fLlS5CouLqZp06YAXLhwgdLSUiZOnMi+fftYsGABtau3fp3Jbrfzz3/+k/nz57N161aaNGmCxWIhODgYqNlW8OTJkzz//POcPXuWNWvWUFRUhF6vJyoqiiZNmgAX+2Hdd999So8sd0iv1wltaN3Y1Rga+Gke1ccTwZTRo4yZRTDdycs9V8qRg/mEB5sJD/GtF+PoKetVzx/OKa73+5LI9zM1z4sopoweZcwsgql2ngim5lEOngim5lEOngim5lEOngimjB5lzCyCKaNHGTOLYMrqURbdsAUrvV5PeHh4nR5WU6ZMUVY0XU61q6L69u2rHDOZTMTExLBt2zYWLlyIl5cXt912G3a7XSlWQU0PK4PBQGRkJAA+Pj4AxMfHk5SURFJSEgArVqxg2rRpVFVVARdXWP3xj3/kiSeeUHj3338/p06dqpPpwQcf5MEHH1Su7datm8JYtGgRer2ewYMHYzQaeeqpp3jllVf49ttv0el0VFZWYjKZmD59OgAeHh4uz2P8+PG0bt36tzziy8rhcGK1Xqj39TeqDAb1N/DTPKqPJ4Ipo0cZM4tgupNXWlZFyup00jMvbrkXFxXE0w/FYfb2uCZW2wg/zhZe+Q9R2rf0p7DQVi+fss2LKKaMHmXMLIKpdp4IpuZRnR5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53dsAWry+nXK58aOvZKY5xO52WP18fHtbIyMzM5efKkst3gpdq9ezf//e9/+dOf/kRWVhYA//nPf/jPf/6jjHnnnXd455132LdvH56enr/Z56XSGspdWY2hgZ/mUX08EUwZPcqYWQTTHby5q9I5cMLiciwjy8K7q9KZ8EiXa2KNG9iRren5Vzw/dkCHBvuVZV5EM2X0KGNmEUy180QwNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7Lohi1YORwOcnNzGTlyJOnp6ZjNZmJiYoCL/ZsuJ39/fwD+9a9/kZ2djcViISYmRinwFBcX4+XlhdFo5Pz58zz22GPs2bMHDw8POnfujN1uV1ZWXbhQs9IoLS2NBx98kKysLMLDw/H1rdn2yMPDw8XPc889x+HDh7HZbMTFxXHq1Ckcjosv7N+S6YknnuChhx5i2bJlbNy4UbnWbDaTkpKirP5avHgx77//Ps2aNWPXrl3k5ORgt9vR6/VMmjRJ8VYfaT2s6qox7IeqeVQfTwRTRo8yZhbBdBfvdIGN/VmWOscdTtifZeG8tZxmQT7XxHxycAfeW3Pgsscb8p4k07yIZMroUcbMIphq54lgah7V6VHGzCKYMnqUMbMIpoweZcwsgql2ngim5lGdHmXMLIIpq0fZdMMWrKCmZ9XBgwd58sknOXnyJKtWrcJkMuHtfXEPydGjR5OXl8emTZsAMBqN6HQ6duzYQb9+/YiPjyc1NbXONoLe3t7o9Xp+/vlnHn/8cUpLS1m6dCkeHh5Kn6ha5ebmEh0dzYQJE9i0aRO7du1yOe/j44OHhwdbt24lMTGR1q1bM3/+fKqrq9HrXV/c/ytTdHQ0hw4dYuPGjTz88MNER0fz3nvvYbVa2bp1K7fddhsAt912Gw6Hg//7v//j4YcfZsqUKTz22GM4HA6ysrLq3Pe3SuthdXU1hv1QNY/q44lgyuhRxswimA3lHT9TetXztkrHNb+PDExoy8CEtryzcjf7jp2n803BjB8W3xCbLpJhXq4HU0aPMmYWwVQ7TwRT8ygHTwRT8ygHTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KItu2IKVp6cnlZWVxMTEkJKSgtlsplevXmzduhWj8WJsh8OB3W5Xvvbx8cHpdHLrrbeyf/9+tmzZQkxMDBcuXKC0tFRZgVV7bbdu3UhNTcVoNNKzZ0/S0tKUbfxqxzZr1gyDwcCbb75JeHg47du35/Dhw8p5g8FAVVUVPXv2ZPPmzdhsNjp37syuXbtctuX7rZn+85//8MADD/D6668DcPToUdauXcsHH3zA+PHjMRgMVFdX89JLLzFq1Cief/555dqmTZtSVFRU7+eu9bC6vAwG9e+HqnlUH08EU0aPMmYWwXQXz8fj6tvimk36evecGjsgVvFYX8alkmleRDJl9Hi2qIyScjt+XgZCAtzzDxW1ZxbBVDsParYzzTlvo1WImQ6RQQ3myTgvIphq54lgah7V6VHGzCKYMnpsDJnd/R4I6n+OjWFeNI/q9ChjZhFMWT3eCPLz03pYYTAYCAoKYunSpcqxvLw8+vTpQ2VlpXJsyZIlLtdVV1cDMGLECAYMGKAcT0xM5NChQ3h5eQE1vaU8PT1ZuHChMsbhcNChQweqqqqAmkIVQKdOnXj33XeVcW+99RaHDx9Wtg6sqKgA4NVXX6Vly5bKuN69e1NWdrGR/G/JlJOTw4kTJ1yKUDNnzsRut7N27Vry8/MJDw9n27Zt5ObmMmrUKGXc4cOHGThw4NUe62+Stj/nldUY9kPVPKqPJ4Ipo0cZM4tgNpQX4u9NRIiZU+fqFpRahpoJ9vNSXc8pGeblejBl8FhaVsW8tRku2152ahNE0uCOmL3qv93ypVJb5uvBVCMvv/AC/1i8k9KyauWYr7eRv4++lZCAa9vW9HKScV5EMNXOE8HUPMrBE8HUPMrBcwdT9HsgqP85qnFeRPNEMGX0KGNmEUxZPcqiRluwOn78ONOnT2fPnj2YzWYGDx7MX/7yF0wmE1CzdV5RURFWq5Xa/k5bt24FalYc3XXXXVgsFmJjY5k8eTJdunQBUFYq/fzzz2zYsIG0tDSMRiPl5eXY7XbKy8vx8vJCp9NRUVHBypUrWb58OVlZWQQGBuJ0OpX+T2fOnAEgKyuLN954g7Vr12Kz2ZSiV22Pq9pVVGvXrmXnzp3s2bMHb29vioqKlLGXZlq8eDHLli0jLy+PwMBAACV3ZmYmAJ999hlvvPEGZ8+eJSwsTOmFVVpasxXT3r17CQgIID09nVGjRik9rADuv/9+902UJk2aNGnS9CtdrlgFkHO24auiNGn6PTVvbQYHTlhcjh04YeH9NRlMeKTL72NKkxD9+hd1AKVl1by2aCf/GZ/wO7nSpEmTJk2axEt7D9SkSZMmTSLVKAtWxcXFjB49msjISGbPnk1+fj4zZ86kvLycKVOmAFBZWYmXlxfJyckkJSWRn5/PrFmz8PT0ZN++fUyaNIn27dszYcIEhg0bxqZNm2jZsiUXLlzAYDCwYsUKgoODeeKJJ/juu+/Yt2+fcu/agpXZbGbq1Kn07t2bvn37snjxYgAKCgqUsVBTRMrOzmbUqFGcPXuWdevWAZCfnw/UFKJ8fX2ZPXs2rVu35sknn+Szzz7DYrG4rLCqrKzEaDTyj3/8gwceeMDlnrWsgwcPKv/fr18/AgIC2LJlC7t37wYuFqx++OEHrFYrEydOZMiQIWzevJns7GwAwsPDGzQ/DWlwf6OqMTTw0zyqjyeCKaNHGTOLYLqLt3Tjoaue//jbowy/t3292GrNLJKpeVSPx9MFNpeVVbVyOGF/loXz1nKaBdX/r47VmFk0U628fcfP1/lFXa1Ky6o5mF1IXFTTerFlnBcRTLXzRDA1j+r0KGNmEUwZPao1s8j3QFD/c1TrvIjkiWDK6FHGzCKYsnqUTY2yYLVy5UpsNhtz5swhICAAqCn6vPLKKyQlJREWFoZOp2Po0KEcPnyY5ORkzGYzDz30EIsXL6Z9+/aMGTMGgKioKPbs2cOCBQuYNm2acg+n04nT6SQlJYXY2FgeeughVq9ezcGDBxW+j48PRqORXbt2sXfvXu677z6Xws+lCgoKYvHixYSHhzNhwgT+/e9/s337doYOHQrUrOwyGAwUFxeTkpJCfHw84eHhbN26lfz8fOWeJpOJJk2a8PXXX2M2mxk5ciTLli0jIyMDgCZNmgA1WxsuX76cJk2aEBUVxQMPPMDnn39OZmYm8fHxNGnSBIfDgdPpZPny5TgcDuLi4khPT+fjjz/m4Ycfrtfc6PU6AgPN9bpWBjWGBn6aR/XxRDDdycs9V8qRg/mEB5sJD/F1G1fNmUUx1ezxqx0nST92jpvbhtCve+t6c47kFF/1/KHsoga/j8g0L6J4Ipg3usfjZ0qvet5W6WjQa3v34bMc/vEkMa2D6No+tN6cX+tGnxcRvDxLzlXP5xZcIOGWVg26h4zzIoKpdp4Ipoyf8UQw1c4TwdQ8ysFrKPN6vAeC+p+j2ublevBEMGX0KGNmEUxZPcqiRlmw2rJlCz169FCKVQD9+/dn6tSpbN26lcTERPz8/DCZTKSmpipjtm/fzqJFi+jQoYNybNmyZcyYMYNNmzYB4Ofnh91up127dspKKICPPvqI1atXs2fPHu666y6aNGlCdnY2kyZNUopftT7Onz/PqVOn8Pf3B2qKX+vWrVO+PnHiBP/+9785evSock+bzcadd97J3LlzFdbMmTPZunWrksnX15eioiJmzpxJv379lHEff/wxZ8+epbKykoiICAA+/PBDoqKilDGffPIJn3/+udLrKiYmhrS0NFasWMHEiRMZPnw4Pj4+TJ48WdlWsD5yOJxYrRfqff2NKoNB/Q38NI/q44lgupNXWlZFyup00jMvriiIiwri6YfiMHvXv1eLmjOLYqrZY1ZeMa+m/kwt4vvducz55Bemju1OZHO/a+a1a+nPiTMlVzwf0yqAwsL6bQ0o07yI4olgyuLRx0N31fNmk75er+18ywVe+fCnOn0ipj3WndDAhq3YkmFeRPDCg67+j88WTX1U83NMBFNGj7Jlbiyf8UQw1c4TwdQ8qtOjWjOLfA8E9T9Htc6LSJ4IpoweZcwsgimrxxtBfn7ev3nVWaMsWGVmZtZZAeTn50dISIhSbImKiqpTeDlw4AAAXbt2dTkeHR3NokWLKC8vV4o8oaGuf7malZWFyWRSVk+Fhoayf/9+l6KQ0+nk/Pnzisfbb78dvV6Pt7e3UqyqPQdw7tw5xWtVVRUtWrRwuWdubi4eHh7K+JCQEIqKimjTpo0ypqSkBJvNhtPpJCcnR/GTmZnp4m379u0AdOvWDYC2bdsCNX2zDAYDf/rTn1izZg2AUtSqr7SGcldWY2jgp3lUH08E0x28uavS6/Rqyciy8O6qdLf0alFjZtFMNXq8tFilMB3wyoc/Mf+Fu6+ZN6xvO776+dQVz//x7rYNfgYyzItongjmje4xxN+bTm2COHDCgsN58bheBx0igwj286oX+9fFKqjZcmfawp/c0ifiRp8XEbwOrYPw9TZedkskX28jsa0CVfdzTARTRo+yZG5sn/FEMNXOE8HUPMrBayjzerwHgvqfo9rm5XrwRDBl9ChjZhFMWT3KItUVrI4fP8706dPZs2cPZrOZwYMH85e//AWTyaSMsVqt+Pm5/lW30+nEbrezdOlSFi9eTFBQEPv373cZW9vHadOmTcyYMQMPDw/uueceunXrhtPppLi4mPj4eHQ6HSdPnuTBBx8kKyuL5s2bY7VaCQ4OVvpSdejQgW+//ZZly5YxefJkbDYbbdq0wWq1AjX9q0wmEyEhIRQWFjJ27Fglk7+/P0FBQZSU1PyF+R133AHAzz//zH333UdeXh6tWrUiJycHPz8/5Z5RUVEcPXqUF154gePHj1NWVsbkyZPR6XQu/oODg3nppZd48cUXKSsrIywsjNzcXIKCgpRC1R133IHRaGTRokUEBQURHx+P2VyzTU379vXrHVIrrYdVXTWG/VA1j+rjiWC6iyeyV4taM4tkqtXj93tO1SlW1crugG37T5PQpcXlB1xFfxnambc/2XfZ4w15D5FlXkTyRDBl8picGMfcX61K6NimZlVCfV7bWq8k9fKmPdadaQsvv/JNTT/HRDBl9ChT5sb0GU8EU+08EUzNozo9qjmzqPdAd3psLDwRTM2jOj3KmFkEU1aPsklVBavi4mJGjx5NZGQks2fPJj8/n5kzZ1JeXs6UKVOueu38+fMpKCigS5cu/OUvfyE1NZXNmzczbtw4xo8fT35+Pj/88AMAZ86c4c0336S8vJy//e1vysoiAE9PT8xmMzk5OXTt2pXnnnuOTz75hMLCQpo2vfgLge7duwOwefNmHn30UXx8fPjwww8xmUwuK5Tatm1LWloaR48eJTk5mV27dvHdd98RGRmpFKyaNWsGwMGDB+nTpw9Dhgxh0aJFVFRUuNwzPj6ejRs3cvLkSaKiosjIyGDOnDnce++9fPnll8q4nj17snbtWvr370+nTp1ISUnB4XAQGxurjAkODsbLy4vS0lJuueUW4uLiWL58OVCzkqu+0npYXV2NYT9UzaP6eCKYDeWJ7tUC6st8PZhq85h5+spb9wEcy7MyuE+7a+b2vb0NfW9vw4K1+/nl8Fm6tA9l3IOd6muzjm70ebkePBFMGTwGBsLryb3JO1dK3nlbg/u+aL2S1MsLDDSzYvpA9hw+y6GTFq23mEqZaueJYMr4GU8EU+08EUzNoxw8dzBFvweC+p+jGudFNE8EU0aPMmYWwZTVoyxSVcFq5cqV2Gw25syZo/SnstvtvPLKKyQlJREWFgbUbP9XW+wBqKio4P3338fHx4du3brRo0cPbrnlFvr27Ut+fj7JycmYzWZatWrF0aNH+ec//0lMTAwA7733HocPH0an07n0nPLy8uL06dO89dZbxMbG0qtXL3bt2kW7du2UMQAtW7bkv//9L0ajkfvuu4+vvvoKQGGVl5crX7/zzjuEh4fzhz/8gTVr1hAYGKhk0Ol0+Pn5sW/fPrZv366slDp+/LjCqt0KMDo6moyMDAAGDRrEXXfdxZdffqmM++c//0n37t2ZP38+GzduxOl00rNnT3bt2oXdbsdgMLBr1y5KS0vp2LEje/bs4dtvvyUoKAiAjIwMysrK8Pa+9m8srYfV5dUY9kPVPKqPJ4LpLp6oXi2g3swimWeLyigpt+PnZSAkwD0fatzhMap5E76/yvmbwv0atEf90LuiGfdgJ6zWsgZxaiXja0fzqE6Pvp4Gbo0Na/BrW+uVpG4eQHTzJnRtH6ran2MimDJ6lClzY/qMJ4Kpdp4IpuZRnR4bQ2Z3vweC+p9jY5gXzaM6PcqYWQRTVo83ghptD6stW7bQo0cPpVgF0L9/f6ZOncrWrVtJTEwE6van2r17N6Wlpeh0OqVvk8lkYsCAAWzatIm9e/cCMHbsWI4ePepyzzVr1nDzzTdjMpnw8vKisrKSCxcuEBoayubNm5VxX3/9NVu3biU4OBiAEydOADBkyBCSkpKUcY888gi//PKL4qO2p9Xy5cuVgpLVauWzzz5Tvs7JycHpdBIZGcnHH3+ssObNm8ebb75Jq1atlNwATzzxBFarlcmTJ/Pss8/yxRdf4OHhQcuWLZVrhw4dyvHjx1m6dCnz58/n0KFD/PTTTzgcDgwGA1lZWQBK4QsgPz8fgNOnT/Piiy/y1ltvXXW+riRtf84rqzHsh6p5VB9PBLOhPFG9WtzpUTTPHczSsirmrc1w2XqnU5sgkgZ3xOxV/6bml6ohHu+IC2fRhkOX3RbQoIeenZpre9SrhKl5vDF5Wq+kxsETwdQ8ysETwZTxM54Iptp5IpiaRzl4IpgyepQxswimjB5lzCyCKatHWaSqzRQzMzOVokyt/Pz8CAkJcSlQJSQksG3bNqVfVO05vV5Pr169lHHR0dHk5eUpq5yKi4sxGo1s2LBBGVNdXY3T6VRWO2VnZ+N0Ojl79qxSlAKUe0VERCjjPDw8XIpaADabDYPBoIwrKipCp9MpK6+gZnWWTqdTtt6r9X/kyBHlPgAFBQUAREZGAjWruSIjI122/wNYv349PXr0cOnzNW/ePFJTU3n11Vfx8PBg0aJF/OlPf8LDo+YXoHfeeSd9+vQhJCSEadOmMW/ePAYMGABAYmIiycnJaNKkSdPVlDS4Ix0ig1yOdYisKbZo+m2atzajTlPzAycsvL8m4wpXXH+9PPpWfv1HMAZ9zXFNmjSJ199H34qvt+vfmPl6G/m79j2oSZMmQdI+42nSpEmTJk2aNGn6vaSqFVZWqxU/P786x/39/SkuLla+HjZsGEuWLCE5OZmkpCS2b9+uHK/dNhDggw8+wOl0ctttt+Hr60tlZSWxsbEsXLiQoKAg2rVrx4oVK7Db7UrxqPY+fn5+DBw4EIAWLVooWxDWbptntVoJDAxk9+7ddOzYEU9PT2XLQYPBoHgoLS2ldevWTJkyhalTpxIcHIyPjw8Gg4Hw8HCXezqdTnr06IFer6dly5bk5eUBYDRenKY//vGPzJo1iy+++AKAUaNGkZWVxdKlS5Ux69at48033wRg8uTJAHTs2JEBAwZgsVgICgqivLwch8NBaWkp06ZNc3nezz//vJKzPmpok80bUY2hgZ/mUX08EUx38vx9PXlhRDznisqwunE7OzVndidTZFNzd3kEiG4RwIcv9iNtXx5HThXTLsKfOzqHN4hZK7XPtVpfOyJ5IpgyenQnr3mwL3P/ehcHTljIPmejVYi5zi+S6yNtXtwjzaN7pHaPsmVuLJ/xRDDVzhPB1Dyq06OMmUUw1c4TwdQ8qtOjjJlFMGX1KJtUVbD6rfL392fRokW89tprJCcno9fr0ev1TJo0SRlTXFzMqVOnAJg+fToVFRW8/PLLXLhwgWeeeYaFCxdisViIjY2lc+fOeHl5udyjpKSEtm3bcvLkSXJzc5WeVbVyOBwUFhbSvHlzdDodZ86c4ciRIwQHB7sU15xOJydOnKB9+/acPn2agoIC8vPzCQ4OdlkRBTV9rFq3bk1OTg4nT550KVTVZvrwww+JjIykoKCAkpISjh07RkJCAl27dlXGffTRR3WeWUZGBn/605+YMWMGiYmJ2Gw2Tp8+jdFoZNy4cXh6erJ48WJKS0uZP38+f/vb365xVmqk1+sa3IT3RlZjaOCneVQfTwTTnTxR3/NqzuwO5vVoag7uyz3ozrZu4VxOap9rtb12rgdPBFNGj+7k9Qo00+t/D7tmafOiTqbmUQ6eCKaMn/FEMNXOE8HUPMrBE8GU0aOMmUUwZfQoY2YRTFk9yiJVFaz8/PyUlUyXqri4WOn3VKvo6GhSU1MBWLZsGa+++qpLUWnlypUYDAYcDgf33HMPXl5ezJ07l8zMTP7whz+49J0aNmyYwvf2rnkx3X///UoPp8rKSvr160d+fr4yrqCggKqqKhYsWKBsY5iWlsa4ceNcvOr1eoKDg1m7dq1y7K9//SsbNmxQxjkcNftZPvHEE8pWfEVFRSQkJAAo41auXInNZuPzzz/n22+/ZfLkybzwwgv861//Ij8/X1lddu7cOR544AFllVVtnqysLNq2bav4OnLkCCkpKdx9990ADB48mPvuu4/FixeTnJyMr6/vlSfrCnI4nFitF675uhtdBoP6G/hpHtXHE8GU0aOIzOu2ZnEou4gOrQMY2LNNvRgim5pD43iOavcoY2YRTBk9ish8tqiMEjevdNDmpeGS0WNGloWc8+5b7Qfqf46NYV4ag0fttaPOedE8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9XgjyM/P+zevOlNVwSoqKsqlVxXUrHQ6d+5cnd5Wv74OICsri5iYGAC2bNlCs2bNsNvtyuqpm2++mdzcXLZu3UpiYiJQswIqKytL6X11/vx5AJo3b67wTSYTnTp1Ij8/X7lXUVERAE2bNlXG9erVCw8PD8zmmr9Eq6yspLq6uk6xrU+fPnz++ecEBAQAcPbsWaCmR1WtAgICiIyM5MiRI8rxLVu20KNHD+U6gH79+jFr1iwlU05ODidOnOD55593uecDDzzA7NmzyczMJC4ujmPHjgEQGxvrck+o6euVn59fr4JVzfXaN+OV1Bga+Gke1ccTwZTRozt4B05Y+NfKX5Sv048X8NG3x3lheBdiWl3bL16uR1NzUOdzFM1UO08EU/N44/JKy6qYtzbDZQvRTm1qesmYvTwaalGbF5Uy1egxv/AC/1i8k9KyauVYbT+1kID6b2F7qdT+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08maSqglVCQgLvvfeeSy+rL7/8Er1erxSUanX8+HGmT5/Onj178PHxwcPDg88//1wpWB0/fpyqqioGDRqkXNO3b1/Wr1/P8uXL+c9//oPFYiEiIoKioiLuvPNOAHJycgDYsWMHzz77LGlpaXh4eKDX11QAg4ODASgvL0en0/HOO++wc+dOsrKyCAsLo7q6miZNmgCQnZ2N0+nk5MmTvPrqq2zcuBGbzaYwQkNDgZril8FgYP369axevZo9e/ZgNpu5cOECRqNR2TowMzOThx9+mE8++URZ/fXkk0/i5+enFPpq/z8sLIwXX3yRr7/+mqqqKmX1lYdHzS82WrRoAcBdd9112bmo7a+lSZMmTZou6tJi1aWatfwXFk66+5p5SYM78v4a119Ea03NNWnSdKnmrc3gwAmLy7EDJyy8vyaDCY90+X1MaZJSvy44AJSWVfPaop38Z3zC7+RKU2OQ9trRpEmTJk2aNGnS9FulqoLVsGHDWLJkCcnJySQlJZGfn8+sWbMYNmyYUnABGDFiBLt37+aWW25h9uzZ5Ofn88orr7BgwQJCQkJo164dRUVFeHh4MG7cOOW6++67jxdeeIH09HSGDBlCaGgoCxcuxGAwEBgYCIDVasVoNLJ//35ycnJISkoiIyODr776CqjZntDLy4uysjKaN2/OsmXL6NatG3/+859Zvnw5TqcTu92ujIWaVVwrV65k5MiRACxZsgSAiooK5Z6BgYF89913hIeHk5yczJYtW/jpp5/Q6S5uGWW1Wvnll1+YP38+nTp1oqCggKCgII4fP86+fftc7jlhwgRKSkoYNmwYubm5fPHFFwCEhIQA0KlTJ0JDQ5XVXQC+vr5UVFRw//33K1sj1kdGo9ZU7tdqDA38NI/q44lgyujRXbw1P2Re9fyGH08y6I5r2x5QVFNzUO9zFMlUO08EU/OoTo/u4p0usLkUtGvlcML+LAvnreU0C6rf6gRtXtwjWTzuO36+TsGhVqVl1RzMLiQuqullz/8Wqf05qnVeRPLcxdReO+qcF5E8EUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsklVBSt/f38WLVrEa6+9RnJyMmazmSFDhvDcc8+5jDtz5gxOp5M5c+a4bGM3bdo05s+fT3FxMU6nk4cffthlmz2Hw4HD4cDX15f169djNBrp378/P/74IwsWLGDatGku9wkMDGTOnDmEh4dz11138f3333Pw4EGleGa32wkNDSUzM5P09HTi4+MpLy8nNzfXhVNVVUWbNm1YuXIlZrOZhx56iE8//ZTt27czdOhQhWUymdDr9bzzzju0adOGrl27smfPHpf+VDt37gRg//79APz8888uX9cqOzsbk8nE8uXLiYiIYPDgwXz22Wf89NNPdOvWDYPBwMCBA1m8eDFBQUEUFRVRVVWFp6en0kerPtLrdcKa894IagwN/DSP6uOJYMrkMfdcKUcO5hMebCY8pH5bnQIcOVV81fOHcooYVc+ffyJ/bqp1XkQy1c4TwdQ83pi842dKr3reVulo8M8PmebFXe8Hl5NaM7uLmWfJuer53IILJNzSqt78Wqn9OaptXq4Hr6FM7bUjhieCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFqipYAURHR5OamnrVMc2aNaN9+/YuvZwGDBjAtGnTmDBhAomJifTo0UPZmq9Wu3fvxul0cu+99zJjxgzl+IwZM9i0aRMAfn5+VFdX065dO9atW6eM+eijj/j+++/Zs2cPd911F02aNCE7O5tJkyYxZswYZVz//v2xWCycOnVK6V1Vu8Kq9usTJ07w6aefcvToUeWepaWlJCQkMHfuXIU1c+ZM9uzZo/Sn8vX1paioiHfffZd+/fop42655RYuXLhAZWWlcg+z2cyuXbuUFVpbt27ls88+48CBA8p1ZrMZT09P0tLSACgrK+Pee+9lyZIlTJky5apzcCU5HE6s1gv1uvZGlsGg/gZ+mkf18UQwZfJYWlZFyup00jMvrk6Iiwri6YfiMHtfe9+XdhH+pB8vuOL5mJYBFBba6uVVpnkRyVQ7TwRT86hOj+7i+XjornrebNJrP3d+g9z9fiDCoyieu5jhQVf/B3eLpj71fi2C+p+jWudFJM9dTO21o855EckTwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfV4I8jPz/s3rzpTXcHqt6i2l9Ol8vPzIyQkROnhFBUVpfx3rWqLNV27dnU5Hh0dzaJFiygvLycqKgq42F+qVllZWZhMJrKzs5Xz+/fvV8ZDTWHq/Pnzisfbb78dvV6Pt7e3UkiqPQdw7tw5xWtVVZXSV6pWubm5eHh4KONDQkIoKiqiTZuLW06VlJRgs9lwOp3k5OQofkJCQly2E8zMzESv15Ofn+9yj/Lycm6//XasViuRkZH4+vpy8uRJGiKtodyV1Rga+Gke1ccTwZTB49xV6XX6vmRkWXh3VXq9+r4M7BHJfzdfeVvA/re3bnB+GeblejDVzhPB1DzemLwQf286tQniwAkLDufF43pdTb+7YD8v7efOb5C73w8uJ7VldjezQ+sgfL2Nl93azdfbSGyrQLf4VftzVNu8XA9eQ5naa0cMTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIsaZcHKarVSVVXF2LFj2bNnD2azmcGDB+Pn56f0cEpISOC9997DarXi5+cH1KywAjh16hR33XUXFouF2NhY+vTpg9PppLi4mPj4eHQ6Hfn5+Tz77LOkpaVhNBqprq4mKChI4Xfo0IFvv/2WHTt28K9//YusrCwCAwOxWq1ATS8pk8mkFJneeOMN1q5di81mw2w2ExAQQGlpzTYvd9xxBwDHjh1TMnl7e1NSUuKSqXXr1hw9epTx48dz5swZTCYTwcHB6HQ6F/9eXl7k5+czYsQIMjIyKCsro3PnzjRr1oySkhLlObZq1Yp7772X/fv3Y7PZOHXqFBUVFfj41K8XQq20HlZ11Rj2Q9U8qo8ngimLR1F9XyY/Gs+Mpbsve7whP/tkmRfRTLXzRDA1j+r06E5ecmIcc3+1Oqhjm5rVQdrPnf8tkX3A3OVRJM+dzGmPdWfawp9cCg++3kamPda9wZ//1f4c1TwvonjuZGqvHXXOiyieCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2dQoC1ZOp5NPPvmEDh06MHv2bPLz85k5cyZ6/cUXwrBhw1iyZAnJyckkJSWRn5/PDz/8gE6nY+HChUycOJH27dszYcIE3nrrLeU6T09P/Pz8OHr0KEVFRTzxxBN89913pKen4+FxccuQ7t27A/DBBx/Qu3dv+vbty+LFi+t47dChA9999x1Llixh1KhRnD17lnXr1uHl5YXTWfOnss2aNUOn07Ft2zYiIyN58skn+eyzzygsLKSqqkphtW/fnq+//hqr1crjjz/O+fPnWb58eZ17tmjRguPHj5Ofn090dDT79+8nIyODvn37cuTIEQBmz57NL7/8QlpaGgMGDCA8PJz169eTl5fHgQMHqKqqcsn7W6X1sLq6GsN+qJpH9fFEMG90j6L6vvQMNLOua0s++eYIew6fpWv7UIb2bVdfm3V0o8/L9WKqnSeCqXm8cXmBgfB6cm/yzpWSd97m9v5LN/q8XI8+YKCuzKKYgYFmVkwfyJ7DZzl00kJM6yC6tg/93xdeg9T+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdQoC1Ymk4mKigrmzJmj9LGy2+1MmTIFo7Emkr+/P4sWLeK1114jOTkZs9lMly5d2LFjB6NGjVL6TkVFRVFYWIjT6VS27WvSpAnFxcU4nU5SUlKIjY1l0qRJzJgxQykyNW3aFLjYK2rv3r3cd9995Ofnk5aWprDCwsIACAoKYvHixYSHh/PSSy/xxhtvYDKZlEyenp5UVFRQXFxMSkoK8fHxPPDAA8yePVvJ1L59ewCaN29OSkoKZrOZ4cOH89///pcLFy4o92zbti0VFRXo9XoOHjwI1PTD2rx5szKmQ4cOLF68GKPRyNdff01YWBi33347VVVVrFu3jl27dnH77bdf89xoPawuL4NB/fuhah4bzsvIspBz3karEDMdIoMazAP1ZxbBdAdPZN8XgPu6tWRo33ZYrWUN4tRKlnkRzVQ7TwRT86hOjyIy+3oauDU2TPu5c40S/X6gxsyimdHNm9C1fajbXoug/ufYGOalMXjUXjvqnBfNozo9yphZBFPtPBFMzaM6PcqYWQRTVo83gm74HlYGgwFfX1+lWAXQu3dvACorK5Vj0dHRpKamKl8vWLCAHTt20KlTJ+XYsmXLSExM5NChQ3h5eSnHPT092bp1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWlqasq1eRUWFcp+WLVsq4+bPn09ZWZnytdFopEmTJqSlpSnH8vLymD17tpKpQ4cOADzxxBP069dPGffNN99QVlam8KOioti+fTs7duxg9erVTJ48mTvuuIPU1FTatatZCdC3b186dOiAj48PKSkpCuuNN94AUApz9ZG2P+eV1Rj2Q9U8XrvyCy/wj8U762xz8vfRtxIS0LAtNmultszXg9kQ3vXo+9JQj9eDJ4Ipo0cZM4tgyuhRxswimNr7gTqZMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNcqCld1up6ioyKU/VW1x6dJVS79W7UqlAwcOMGDAAACqqqrIycnBbrdz88034+vrS0VFBRUVFZw4cYLIyEgAfvzxR5xOp7JN3pkzZwDYs2ePSz8sg8EAwIULNauMPD09Afjzn//MiRMn8PDw4M4778Risbh4tdvtWCwWBg4cSHZ2NuHh4XTt2tUlU8uWLYmMjOTf//43U6dOxWazERcXx+nTp4mIiFDGJSQkMHfuXBITEzl69CgAr7/+OgcOHODxxx9X7nnTTTexbNkyunfvTnFxMY899hiffPIJRqORW2+99donRpMmSfXrYhVAaVk1ry3ayX/GJ/xOrjQlDe7I+2syXHqXdIgMImlwx9/RlSZNmjRput7S3g80adKkSZMmTZo0adKkqXGoURasKisr8fLyculPNWvWLPz9/amuvvhL49GjR5OXl8emTZuAmiKSwWBg0aJFhISE0K5dOxYvXozVagVg+vTpVFRU8NJLL2EymXj22WeZMGECZWVlzJo1i6CgIHS6mm1FiouLASgoKKBr166MGjWKTz75hMzMTJfzVVVV6PV6Dh06xIgRI/Dx8WHhwoU4nU6X1WAVFRU4HA6Ki4v585//zK5du1i9ejU+Pj4umZo1a8aPP/5Inz59uOWWW5gzZw5Op5Px48crY6KiovDw8ODYsWP06tWL77//ns8//5yAgADuvfdeZdzatWvx9fVV8i9cuBCA559/vl79q2rV0Ma5N6IaQwM/zWP9tO/4+TrFqlqVllVzMLuQuKim9earMbNoprt4/r6evDAinnNFZVjL7fh5GQgJcM8ewmrNLJIpo0cZM4tgyuhRxswimNr7gXukeVQfTwRT86hOjzJmFsGU0aOMmUUw1c4TwdQ8qtOjjJlFMGX1KJsaZcFKp9MxdOhQDh8+rPSnGjJkCFu2bHEZ53A4sNvtLsf0ej3PPPMMCxcuxGKxEBQUhNFopLq6mu7duxMWFsabb76JxWKhWbNmTJgwAaPRyD333EN+fr6yVV7t1oC33XYbhYWF/Pvf/6Z58+b4+fkpBSCAU6dO4XA4SExMZP369dhsNtq2bcuBAweoXY0FNVvwBQcHc9NNNzFnzhzMZjNRUVFkZ2crY86cOcPPP//M4MGD+eWXX9iyZQt2ux2j0ais+AJYuXIlRqORfv36KcW6tm3bcuzYMQoKCpS+WtHR0fzyyy/o9XrsdjsBAQHodDp+/vlnxo0bpxTnrkV6vc4tjatvVDWGBn6ax2tTniXnqudzCy6QcEurevNrpabMopm550o5cjCf8GAz4SG+DeaJ/Jkk07yI4olgqp0ngulOnru/B2sl23MUwRPBlMmj9n6geVQbTwRTNo+7D5/l8I8niWkdRNf2oW7jqjmzKKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpWfnx8mk8mlPxXA559/jr+/v/L1kiVL6lxXVVXFmDFjSEpKAmDEiBEEBARw+PBh5dpmzZphsVjo378/8+fPV64fNmwYzZs3B2oKUQD3338/w4cPV8ZMnjyZVatWKayCggIMBgMzZsxQxjidTm6++WalYFVZWYnT6aRt27Yumb755huefvpp9PqaimxaWhoOh4OXXnqJX375haeffpqnn36ao0ePsmXLFp544gkAtmzZQs+ePfnXv/7FqlWrmDx5Mu+++y733nsvW7duJTExEafTycmTJxkzZgyTJk2iffv2/N///R9dunRh+PDhbN26lTvuuOMaZwYcDidW64Vrvu5Gl8Gg/gZ+msf68cKDrv4G1KKpj9bM/TeqtKyKlNXppGde3LIpLiqIpx+Kw+xd/1Wfas4siieCKaNH2TJr34Pq5YlgyuhRxswimDJ6lDGzCKY7efmWC7zy4U91eshOe6w7oYH17yGr5syimJpHdXqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ42yYBUVFaVsvVerkpISzp07R1RU1FWvA8jKyiImJgaAzMxMWrRoQXh4OF5eXkDNaqQjR4643MPpdJKVlUWvXr0AsNlqfgFdVlbmcg9v75pfXrdo0UI5b7fbKS4uVopYOp0OLy8vpWBVu4rq16xmzZoBKL4yMzNp2rQpWVlZjB8/nj/84Q+MHz+et956i08//VS5LjMzk4cfftiF5evrS0hIiJLJYrFgsViU51CrDh06uHiqj7SGcldWY2jgp3m8NnVoHYSvt/Gy2wL6ehuJbRWoNXP/jZq7Kp0DJywuxzKyLLy7Kp0Jj3RpoDt1ZhbNE8GU0aMsmbXvQfXzRDBl9ChjZhFMGT3KmFkE0x28XxeroGY77mkLf3JLD1k1ZhbN1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiRlmwSkhIICUlhZEjR5Keno7ZbCYmJga9Xq8UlC6n+Ph4fH19ef3118nOzsZisVBRUUFFRQUPPvigC3/NmjWsXbuWZcuW4eHhQefOnSkqKuLOO+8Eavph6XQ6/vvf/7JmzRqysrIIDw9Xtgz09b24hY5Op+O5557j8OHD2Gw24uLiKCkpoWXLlsDFflcHDx50yRQdHQ2gjLNarXh6ejJmzBgA1qxZQ3p6Oh07dlQYtbyvv/6atWvXcv78eQBefPFFvL29lXFBQUF4eHjwt7/9jb/97W8AzJo1i1mzZgEXC271kdbDqq4aw36omsf6a9pj3Zm28PJ/4dnQ7we1ZnY383SBjf1ZljrHHU7Yn2XhvLWcZkH1+2tZtWYWyRPBlNGjTJm170F180QwZfQoY2YRTBk9yphZBNNdPJE9ZNWaWSRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBasBAwbw9ttvc/DgQZ588klOnjzJqlWraNeundKfCWD06NHk5eUpfZw8PT3p0qULaWlp9OvXj/j4eGbNmoXNZqN///7KdXfffTc6nY5z587x5JNPUlpaytKlSwkKCqJz587KOL1ez/Hjx4mOjmbChAls2rSJXbt2uXg1Go00adJE2YqvdevWzJ8/H4fDQXBwsMvYysrKOpkAZWVWeXk5p0+fxuFwMGjQILp06cK2bdtYvXp1nX5TDoeDfv36kZ+fz9dff82BAwdcVqDpdDq6d+/O1q1bGTRoEOvWraNHjx4cPXoUX19fevToUa+50XpYXV2NYT9UzeO1KzDQzIrpA9lz+CyHTlrcvoc+qC+zu5nHz5Re9byt0tHgny1qy3w9eCKYMnqUIbP2Pdg4eCKYMnqUMbMIpoweZcwsgtlQ3vXoIau2zNeDqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFjbJgtX79ekwmEzExMaSkpGA2m+nVqxfbt28nPz9fKVo5HA7sdrtyXUVFBXv27KF79+7s37+fLVu2YDAYMJlMbNiwgdtuuw2Ab7/9FqfTSXh4OKmpqRiNRnr27ElaWhr79u2jc+fO+Pn5YbfbiYyMxGAw8Oabb/L/2Hvv8KiK9v//tTVlk00nEFpIgCR0kB5AaUoTfBAU9YvYUdGPCioWQFRQHhRFsSGCFBUriiAWBCEU6UgnlBAgJCSBlE02ZTe7+/sjvzPu0h4pRxfPvK7LC7N79n3ue2bOzJwp98TFxVG7dm1OnDghJpmCgoI4evQonTt3ZvXq1djtdlq0aMHmzZspLa0eHFKuNRqNPj61adOGbdu2iV1bLpcLt7t6K+GSJUtYsmTJOdMnLCyM06dP8+mnn4rPcnNzgepwiAp9+vRh3bp17Ny5E4Ddu3fTs2dPnnjiCcxm8yXljTzD6twYDP4fD1XaePl6ibVCaZ1UA5ut/LLOrfLG330GyCsqp6TChTXQQEz4pTXIwSbdBb+3mPWXnKZaLItqaGrRRi35LJ9B/9ZTQ1OLNmrRZzU0tWijFn1WQ/NK6al5hqy/+qym5pXoy5+JFtPR3/XU0NSijVr0WQ1NLdqoRZ/V0NSqjf8GrNZ/+RlWaWlppKam8t5774nPbDab2DE0ePBgABYsWODzu23btmG323nuuedISUkB4I477iAvL4+0tDRx3YoVKwB45JFHhJbH46Fjx46sXr2aFi1aUK9e9YqtHj16iJB6AA8//DAnTpzg1KlT1KlTR5w/NX36dDEx5fF4RFhAgHr16qHT6YiPj+eTTz4RWkuXLmXbtm3k5+cDkJKSwrJly3jttdd8QhgOHDiQAwcO4HA4MJvNJCQkEB4ezrvvviuuycrKomfPnrRt21Z8ZjRWZ//nn39Op06deOihh7j33nv/ajacFxmf8/xcDfFQpY3+p6eG5pXQKy138uH3e3zCiDVrEMnIQU2xBJouSismLIhmDSLZm1mA2/Pn53odNImPJNoaeNn2aiVf1NbUoo1a8Fk+g1eHnhqaWrRRiz6roalFG7Xosxqal6v3d5wh628+q6F5Jfvy50ML6Xi16amhqUUbteizGppatFGLPquhqVUbtcJVGUwxIyNDhLZTsFqtxMTEkJGRccHfAT6/7datGydPnuTEiRNUVFQAsGPHDnQ6nc95WDqdjgYNGgiNqKjqmNgnT54U1zidTnbv3u1zr/DwcAAKCwvFdb///jtOpxO7vXrVl9lsxmAwYLPZfOxdtWoVBoNB/LZGjeoQY8eP/xkCobi4mMzMTDwej/i8W7durF+/nqKiIpxOJ1lZWTzxxBMA55yQGjBgAAAzZ85k5syZeO9Kk0gkkvPx4fd72JtZ4PPZ3swCZi7ec0l6Iwc1pUl8pM9nTeKrX5olEon6yGdQIpFIJFcL40e0JSTId/1tSJCR8SPanucXkjO50n15iUQikUgkkivBVbnDymazYbVaz/o8LCyM4uLiC/7ObDYTEBAgPhs2bBgfffQRDoeD5cuX43A4yMnJITEx8azzsPbt20dwcPWB4+Xl5QAsX76cefPm0bhxYxYuXCh2TSl2REVFYTKZePTRRxk9ejTl5eVMnTqVhIQEn4knj8dDXl4eEydOpG/fvmzcuJGlS5cSGxsrtPT66vnFOXPmULNmTWJjY5k5cyYWi4XKykpx3bBhw1iwYAH9+vXj9OnTQPWE24033ugzWed0OunatauYXCspKeGNN95gy5YtzJo16y/lxbkwGq/KeVBVuRoO8JM2+p+eGppXSi/ntN1nNaaC2wO7jxRwylZBzcjgi9IMCwng6TvakF9Uju0KhiXRUr6oqalFG7Xms3wG/VdPDU0t2qhFn9XQ1KKNWvRZDc0rqVcrOoT3xlzH3swCjuXbqRdjOWvRxaXgzz5fSU01+vJX2kY19dTQ9Hc9NTS1aKMWfVZDU4s2atFnNTS1aqPWuConrK4kYWFhjBo1ildffZXnn3+ekJAQQkJCaNWqlc91brdbnCXlzc0338ycOXMoKCggJSWF6dOn88ADD4jv9Xo9NWvWJD4+ntGjR2M0Gunduzd16tTxCWmo0+kYOHAgW7du5euvvyYuLo5JkyYxd+7cs+45YMAApk2bht1up02bNrzwwgs89thjPj7NmzeP559/npKSEgICAggODmbz5s1kZ2cTFxcHwLFjx1izZo2Pj1AdcjEvL0/s6LoY9HrdZR/K/m/majjAT9rof3pqaF6u3uGTpRf83u5wX3JdoFYdooV8+Ts0tWij1nyWz6D/6qmhqUUbteizGppatFGLPquheSX1UiMspP7vyy4af/b5Smiq2Zf35t+ejlejnhqaWrRRiz6roalFG7XosxqaWrVRK1yVE1ZWq1XsZPKmuLhYnBN1vt85HA4qKyt9dlkFBwej0+nYtGkTgYGBDBkyROygUliwYAHDhg0T+sq/vXr14sUXXxTXZWZm+nxvtVqpqKhgxowZPnpvvvmmj61Wq5XY2FimTp3qc91bb7111j1HjBjhc89169b5fA+QmJjI559/Lv4uLy/n+uuv56OPPmLChAninsHBwfz2228idOEbb7zBzJkz+f333xk0aNB50/J8uN0ebLayi/7dvx2Dwf8P8JM2+p+eGpp3TvpV/P/8cb0uWSfYpLvg9xazXh52/Q/qqaGpRRu16LMamlq0UYs+q6Hp73pqaEob/dNGLfqshqYWbfRXn9Xsy4N20vFq0lNDU4s2atFnNTS1aKMWfVZDU6s2/huwWoP+8q6zq3LCKiEh4ayzqkpKSsjPzz/rbKszfwdw5MgRkpOTxecZGRnExcURGBgorjtw4IDPbz0eD0eOHBHnWtWrVw+TyURGRgZdu3b10fK+V0JCAqdOnTprMu3Mc7j+ik/Kv2f+NiMjA5PJRN26dc/re1BQEImJiRw9elR8lpaWRqdOncRkFUCnTp2YOXMme/fuvaQJK0AeKHcBroYD/KSN/qd3JTTvmbLyrM+Uyas5z/S4aL2YsCCaNYhkb2YBbq/Np3pd9Zk30dZAedi1H+ipoalFG7XosxqaWrRRiz6roenvempoShu1oaeGprRRG3qXq/l39OUv18a/Q08NTX/XU0NTizZq0Wc1NLVooxZ9VkNTqzZqhatywqpbt268//77DB8+nF27dmGxWEhOTkav14sJpXPRpk0bQkJCeOWVVzh27BgFBQUkJyeTnZ1Nr169fPS///577rnnHrZv347JZKJFixYUFRVx7bXXAmA2m+nQoQNffPEF33zzDUeOHCEuLo7w8HASExOpU6cOAF26dEGv1/PEE0+Qnp6O3W6nefPm7Nq1i1GjRl2UT3Xr1iU+Pp4PP/yQ1157jezsbBo0aEBVVRWdOnXCbDYDMGPGDN55551zpkFiYqL4/z179lBeXk5SUtJZ1zmdzr+aHRKJRKOMHNSUmYv3+MS/bxIfychBTf9BqyQSiUQikUgkEsn/QvblJRKJRCKR+CNX5YRVv379mD59Ovv27ePBBx/k6NGjLFq0iMaNGxMbGyuuGzFiBNnZ2SxfvhyAgIAAWrVqxdq1a+nVqxdt2rRh7ty55Ofn07dvX/G7Hj16YDKZ2Lx5M/fddx+lpaV88sknREZG0qJFC5/rXnrpJRITExk9ejTLly9n69at3HXXXeIa5fyqdevWMXjwYOrXr8+sWbOorKxkwIABF+1Tly5d+OSTT7jmmmsYNmwYX331FYcPH/a5Z2lpKf369SMpKQmr1Up+fj7ffPMNubm53HHHHeK68vJy9Ho9Tz/9NABbtmxhxYoVhIaGXtaEldEoD5U7k6vhAD9po//pXSlN7zCA5+KeKSsvKTxgWEgAT9/RhvyicmwVLqyBBmLCLz9Gr1byRU09NTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0NSijf7ss1p9eW/btJCOV4ueGppatFGLPquhqUUbteizGppatVFr6Dwej+d/X+ZfzJw5k/fee0/sVFJ2I/3++++sWrVKTPAMHz6cEydOsHJldSisyspKOnXqRNOmTc/aYdW7d28mTpwIwNKlSxkzZgypqals374do9FIixYtWLt2LV999ZWYtLr33nvJzs7GaDSKHVZhYWHY7XaWLVsGwMmTJ+nevTsdO3YUO6xatGjBzp07eeSRR7j//vsvyqcbbrgBq9VKcXGxzw6rOnXqMGvWLABWrFjB3LlzOXDgAGVlZcTGxqLT6SgsLGT9+vViJ5aysyowMBC32018fDxDhw7liy++oE2bNrz88ssXnTcejwed7sLxsCUSyV/nRH4pOafsxEVbiIsJuSSNG8cs/p/XLJl2aSFA1eBK+CyRSCQSiUQikUgkkquLXzYeZdehfFo2iqFX+/r/tDkSiUQi+Qe4KndYpaWlkZqaynvvvSc+s9lstG/fXuxkAliwYIHP77Zt24bdbue5554jJSVFfP7qq6+KXViKfnJyMnPmzBGfeTweOnbsyOrVq2nRogUOh4ONGzfy5JNP+uxuWrFiBQ8//DBZWVnUqVOHtWvX4vF4mD59us8ZVo888ghpaWliwuqv+HT8+HEyMzN59913fUIYzp8/n6lTp+JwODCbzfTs2ZOePXuK7ysrK+ncuTP9+vUTk1VQfa6V0+lkx44dPuk0a9YsH1svBrfbg81Wdkm//TdjMPj/AX7SRv/SKy138v63u9iV8WeIjuYJkTz8n+ZYgkyXa+pZ+MOhymr5rMWyqIamFm3Uos9qaGrRRi36rIamv+upoSlt9E8bteizGppatFGLPquhqUUbtebzkexiXpq7GUVm1bYTvPPVH7xwd3via1n9wkY19NTQlDb6p41a9FkNTa3a+G/Aag36y7vOrsoJq4yMDG6++Wafz6xWKzExMWRkZFzwdwAJCQk+nycmJjJv3jwqKioIDAwkIyPjrGt0Oh0NGjQQGseOHcPpdJ5TS7lXnTp1yMjIICoq6qwJoMTERL7++uuL8kn5t0GDBmdpOZ1Ojh8/7nNGlcJvv/1GaWmpTwhCgIiICLKzs+nYsSM2m434+HiGDRtGfn7+WX5dDPJAufNzNRzgJ230D733Fu1ib2aBz2d7jhTw7qJdjL611UVpzXmmB/dMWXnB7/3hUOUr6fO50GJZVENTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1pozb01NCUNmpDTw3NK6HnPVkldN3w4sebmPV0j8vSBv/0WW1NaaM29NTQlDb6p56WuConrGw2G1br2SsswsLCKC4uFn8fPnyYSZMmsX37diwWC/Xr18dsNhMQEODzO6vVisfjobi4mMDAQGw2G1lZWVx33XUUFBSQkpLCs88+66Ov/Dtr1iwee+wxTCYTvXv35sEHH/T53mazYTAYGDhwoAgb+MADD4iwft4+bdu2jdTUVOx2O61bt2b8+PHnvOeECRPYs2cPFouFQYMG0bt3b5/vAb766is++ugjsrOzMZvNhIeH065dOx+/ExISyMnJETuzCgsLmTx5MjqdjtTU1EvImWrkGVZnczXEQ5U2+o9ezmm7z+HHCm4P7D5SwClbBTUjgy/rHt5c7jN7JfxW02ctlkU1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KKNWvRZDU0t2qgln1dtzzprskrB5Yb1u3Po1qr2JWn7q89qakob/dNGLfqshqZWbdQaV+WE1V+huLiYESNGEB8fz4wZM8jNzeXFF1+kqqrqL/32+PHjjB07lqSkJD799FPuuecemjVrhtFYnWSKTk5ODtOmTaOiooL//ve/ZGdn+2jl5+eTm5vLddddx3PPPceGDRt4/vnnGThwoM91LpeLXbt28cILLxAbG8sHH3zAXXfdhcViEdeUl5eLeys+TZkyhRMnTvho/fDDD4wfP54HH3yQ5s2b88gjj+DxeNi5cyetWrUCoKCggD179mAwGIiLi6N///4sW7aMgoICDAYDkZGRF5fg/z96vY6ICMv/vlCjWK1X5hBbtfTU0NSijZerd/hk6QW/tzvcF/2cKWdUeZ9ndaXPrbocv9Xw+Uy0WBbV0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NKWN2tBTQ/Ny9TJySi74/aFsG4O6N76se/ibz3+HprRRG3pqaEob/VNPS1yVE1ZWq5WSkrMbtOLiYhF67/PPP8dut/POO+8QHh4OwO+//87SpUs5duwY9erVE7+z2WzodDrCwsKorKzEZrPRoEEDcTbVNddcQ58+fTh8+DDt27cHYNeuXQCMHDmSHj16CLvuvfdeAGHHwYMHMRqNvPTSSwB07NiR48ePs3r1anHNyZMn8Xg8dOjQgSFDhgDQvHlzunfvjt1uF9dt27YNgOeee46WLVsC1RNdEydO9Lnn22+/Tf/+/Xn88cf56quvcLvdJCUl8e677zJr1iyRFoWFhcyZM4dZs2bxwQcfEBwcjE6no6qqimPHjp0zvOD/Qp5hdW4MBv+PhyptvHy9NTuyOXiimMZ1wujSIu6SdYJNugt+bzHrL/nMqU9fuF74fDnnVnlzJdJRTZ+1WBbV0NSijVr0Ga5cXaagxXTUos9qaPq7nhqa0kb/tFENn/ccKeD4KTv1Yiw0ib+0xYLeaDFf1ND0dz01NKWN/mmjlnxOqBXKqgt83zDO6jfvglrKFzU1tWijFn1WQ1OrNv4bsFr/5WdYJSQknHVWVUlJic/ZS2lpaXTq1ElMVgH069ePpUuXsmTJEkaNGiU+z8jIIC4ujsDAQH7//Xfcbjdu958Fymw206tXLxYsWCD09+3bB0BZ2Z+TM6mpqVgsFux2OwkJCTgcDk6ePInL5fKZTFPsUCad1q5dC4Be/2emhYeH0759e1atWiXuefjwYaB615ZC3759mTBhAgaDgbp163L8+HEyMzN56qmnAFi6dCkJCQkMGTKEqVOnivB/TqcTgGbNmjF37lwAPB4PrVq1oqKi4q9nxjmQ8TnPz9UQD1XaePEcybHxyoItPgfEzl66l3Ej2lI/9uzwpf+LmLAgmjWIZG9mAW7Pn5/rddAkPpJoa+Bl++9v+aJFn/8OPTU0tWijVny+0nWZGjaqrenvempoatFGLfqshqYWbbwSermFZUyev4XS8j8jf4QEGRk/oi0x4Zcf8lmL+aKGpr/rqaEpbdSGnhqal6vXpXkc837cf86wgAY9dG5Wy+/eBbWQL3+HphZt1KLPamhq1UatcFUGU+zWrRvr16/HZrOJz3766Sf0er04eykjI4PIyEjuvvtuWrVqRWpqKhs3bkSn05GWliZ+53Q6+eWXX+jWrZv4HcCRI0fo0qULLVq04NZbbyU/Px+3203Hjh0BOHr0KJGRkbz33nu0bt2a9u3bM27cOEwmEyEhIdSpU4djx47hcrnQ6XTceOONNG/enBtuuIHdu3cD0LBhQ3HP4OBg1q1bR6dOnWjVqhV33303paXVIbIUn06cOEFISAgvvPCC8OmDDz7AZDIRFxeH2WwW9n/22WekpqayYcMG4uLiSExMxOl0cvz4cQBq1KhBQEAA3bt3p0WLFvTs2ZNbb70Vp9NJSEiIzw40iURyYbwHeBVcbpg0b8sla44c1PSs1bZN4iMZOajpJWv6O1r0WSLxJ9SoyyQSiUTiy5mTVQCl5VW8LOtaiUSiYcaNaMuZC+8N+urPJRKJRKItrsodVsOGDWPBggWMGjWKkSNHkpuby9SpUxk2bBixsbFAdXjAb775BpPJxLvvvivOezKbzezcuZN58+bRuHFjFi5cSFFRkQjlZ7PZ0Ov1YofVQw89xIoVK1i2bBmAmMgpLi6msrISu91Ot27daN68OXPnzsVut5OUlCSugeqdSwUFBdx5553k5uby3nvvAYjzpJR7ulwuwsLCGDFiBN9++y2ZmZnodDofn4xGI6Wlpdx4443UqFGD+fPn43Q6adSokc898/LyqFOnDqdOnSIlJQWr1erz/Zo1a9DpdHg8HiorK8nKyiIrKwuA//u//8NkMl1y/hiNV+U8qKpcDQf4SRsvDbUOiA0LCeDpO9qQX1SOrcKFNdBATPjlx7/153zRos9q6amhqUUbteTz1XTYtRqa/q6nhqYWbdSiz2poatHGK6W38/CpsyarFErLq9h3rJDmCVGXpK3FfFFD09/11NCUNvqnjVrzObF2OB8/14u1O7M5kHVlQ1N7/+tvempoShv900Yt+qyGplZt1BpX5YRVWFgY8+bN4+WXX2bUqFFYLBaGDBnCE088Ia7xeDx4PB4iIyPp2rUrUH3e04QJE0hOTmbOnDkUFBSQkpLC7NmzqVu3LgBVVVW43W7+3//7f+Tl5fHhhx9iMBgwm804HA6hX1ZWht1u54UXXmDhwoX8/vvvREREYLfbCQgI8LG3YcOGdOvWje+++w673U5kZCQFBQUEBQUJrdLSUh599FG2bNnC+++/T1BQEHq9Ho/nz9hYHo8Ht9vN888/zyeffEJ2djaRkZHk5uZisVh87vnxxx/z4IMPAhAREXFWGt588818+eWXOJ1OTCYTOp0Og8FAeXm5mCC7FPR6HRERlv99ocbYlp5H+oajJNePpHVSjSumq9VDBq+U5on8Ug7syyUu2kJcTMglaah9QKxaz5M/54sWfVZLTw1NLdqoBZ+vxsOu1dD0dz01NLVooxZ9VkNTizZerl52wfELfn/idBndrrm8SBdazBc1NP1dTw1NaaN/6V2Jd9Xz4a8+A9x4baMrpuWNP/uslqa0URt6amhKG/1TT0tclRNWAImJieLspXOh1+upXbs2v/zyi/hMOe8pNjaWxYsXn/N3yg6kgQMHijOmAO6++27Wr18vzqGqqqoiNDSU22+/ndtvvx2onlBq2rQp5eXlAAQHV8cgv+aaaxg7dixjx44FYOHChUycOFGcI6WENhw+fDiPPPKIuGefPn3ErifFp7i4OO68807uvPNO8dt27doJDcW+kpISvvnmG7Hb68zvv/nmG4KDg1mxYgVmsxmAAwcOcOONN7JgwQL69Olz3rS9EG63B5ut7H9fqBFyC8p48eNNZ8Won3hPe2pEXHqMeoNBm4cMXinN0nIn73+7i10ZBeKz5gmRPPyf5liCLm53oZoHxIK28uVq0VNDU9ronzZqyeer6bBrNTT9XU8NTS3aqEWf1dDUoo1XSi8u8sIDF7WjgmVd+w9r+rueGprSRv+y8Uq+q6plo1p6amj6u54amtJG/7RRiz6roalVG/8NWK1Bf3nX2VU7YXUp6HS6K3rt+a7x3hV1uXZcrFZCQgJQfS6W8v/K3yaTSewkO3ToEAkJCWKyCuCPP/4Aqie7Lgd5oNyfnDlZBdXhPibO2cTbj3W7bH2tHjJ4uZrvLdrF3swCn8/2HCng3UW7GH1rq4vS+jsOiAVt5MvVpqeGprRRG3pqaF6Jw67n/LD/vN/742HXamj6u54amlq0UYs+q6GpRRsvV69J/UhCgoznDAsYEmQkpV6ErGv9RNPf9dTQlDb6h96VfFc9H/7m89+h6e96amhKG7Whp4amtNE/9bTEv3bCyu12c+LECYYPH86uXbuwWCwkJycDiPOczoWyA+n111/n2LFjFBQUkJyczJEjR4DqHViBgYEYjUZOnTrFPffcw/bt2zGZTLRo0QKXyyV2VpWVVe80Wrt2LQMHDuTIkSPExcURElK9nVs5J0qx54knniA9PR273U7z5s3JysoSZ2n9VZ/q1q2L1Wpl7Nix4rfz588nKCiITp06iQmqmJgYli5dSvfu3Tl9+jSBgYGUlZURGBjoM9F1KcgzrKqRMer908ac03Z2Hyk463O3B3YfKeCUrYKakRe3++2Fu9vz4sebfCatDPrqzy/3edBKvlxNempoShv900Yt+Tz9y+0X/P69b3fyf0NbXZK2ltJRLT01NLVooxZ9VkNTizZeSb2J97Rn4pxzR2C4nH6jFvNFDU1/11NDU9roPzaq8a56pW1UU08NTX/XU0NT2uifNmrRZzU0tWqj1vjXTlhB9ZlV+/bt48EHH+To0aMsWrQIs9kszo4CGDFiBNnZ2SxfvhwAo9GITqdj48aN9OrVizZt2jB37lwRUk9BOWNq8+bN3HfffZSWlvLJJ59gMpkIDQ31ufbEiRMkJiYyevRoli9fztatW32+Dw4OxmQysW7dOgYPHkz9+vWZNWsWVVVV6PW+hfuv+NSkSRM2bNjAgAEDWLp0KQ6Hg5MnT9K/f39xTf/+/Vm0aBHBwcEMHjyYb7/9FpvNhtPppEePHpec5vIMqz+RMerV0btczcMnSy/4vd3hvugyHBFh4bvXBvHrpqPsOJhPy0Yx9Gpf/5JtPBf/9ny5GvXU0JQ2akNPDc3L1TucfeHd1QdP2C67fddCOqqtp4amFm3Uos9qaGrRxiuhFxFhYeGk/mxPz2P/0QJ5xq2favq7nhqa0sZ/Xk+Nd9Vz4U8+/12a/q6nhqa0URt6amhKG/1TT0v8ayesAgICcDgcJCcn8/7772OxWEhNTWXdunUYjX+67Xa7cblc4u/g4GA8Hg9t27Zl9+7dpKWlkZycTFlZGaWlpWIHlvLbdu3aMXfuXIxGI507d2bt2rUijJ9ybc2aNTEYDEybNo24uDiSkpJIT08X3xsMBpxOJ507d2b16tXY7XZatGjB1q1bCQgIuGif5s2bx1dffcWsWbOEfkJCAtu3/7l6OjU1lY8//ph3332XmTNn4nQ6SUpK4tChQ2RnZ19yusszrP5Exqi/8jbmFZVTUuHCGmggJvzSKv5g04VDclrM+kvOl/bJNejVvj42W/llnVvlzdWQL/5uoxZ9VkNTizZqyefEuFC2HTh93u8b1ZZnWP2TempoatFGLfqshqYWbVTD58RaobROqnHF+o1azBc1NP1dTw1NaaP/2Kjmuyr4p89qa/q7nhqa0kb/tFGLPquhqVUb/w3IM6yonqSJjIzkk08+EZ9lZ2fTvXt3HA6H+GzBggU+v6uqqg7NcMcdd9CvXz/x+eDBg9m/fz+BgYFA9dlSAQEBzJkzR1zjdrtp0qQJTqcTqJ6oAmjWrBnvvvuuuO7NN98kPT1dhA6srKwE4KWXXhJnTAF07dqV8vLyi/YJYOjQoQwdOpSkpCTuvvtuNm/ejN3u27Hp1KkTq1ev5o8//mDu3Ll07NiRtm3bCvsvFRmfsxoZo/7K6ZWWO/nw+z0+4RGaNYhk5KCmWAIv7uDZmLAgmjWIZG9mAW6vI+L0OmgSH0m0NVDmi59o+rueGprSRm3oqaF5uXqPDG7JPVNWnvf7h//TQtaNfqCnhqYWbdSiz2poatFGLfqshqYWbdSiz2po/ttt/DveVS/Xxr9DTw1Nf9dTQ1PaqA09NTSljf6ppyX+tcEUXS4XRUVFPqH81q1bByDOcToXyk6lvXv3is+cTicnTpzA5XJRUVEBgE6no7KykszMTHHdhg0b8Hg84myqkydPAvhcA3DgwAHgzzOulF1UGzZsENcUFxdTWFiI9+6vi/HJ4/GIybfdu3ezbt067rjjDp9rPvzwQ+bOncvkyZNJTExkypQp6PV6brrppvOmj+TiGD+iLSFBvvPCIUFGxo9o+w9ZdHXy4fd7zjp4dm9mATMX77kkvZGDmtIkPtLnsybx1RNgEolEolVG9Gl8UZ9LJBKJRCKRSK4s8l1VIpFIJFrnqt1hdfjwYSZNmsT27duxWCwMGjSIxx9/XEzcOBwOAgMDGTVqFCNHjiQ3N5epU6cSFhbGrl27uO666ygoKMBkMmGxWEhLSwOqJ5EMBgPz5s0jLS2NzMxMXC6XCPNXXFxMYGAgOp2OsLAw7rnnHuDPySmLxYJOpxPXAmRkZDBo0CCys7MpKysTE0nK9y6Xi9DQUKZMmcLHH3/M8ePHcbvdYlJMQfFp6NChlJeXc+rUKTweD8HBwUITYN++fUycOJE//vgDgGXLljFo0CCfc66WLFnCtGnTqFGjBk8//TRQPQnXpk0bn3teCpdzWPC/jVrRIbw35jr2ZhZwLN9OvRjLWZ3PS0FLhwyqcfBsWEgAT9/RhvyicmyXGWLQGy3li5qa/q6nhqa00T9t1JrPPdvWo2fbesz4+g8OZNloXMfKo0NaXbau1tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaq9a7qbZu/+aympr/rqaEpbfRPG7XosxqaWrVRa1yVE1bFxcWMGDGC+Ph4ZsyYQW5uLlOmTKGiooIJEyYA1ZMvQ4cOJT09nVGjRmGxWBgyZAjfffcdu3fv5plnniEpKYknnniCvLw8jh8/LsLx6fV6rFYrhw8fBqB27dqcPn2a0tI/D8DU6XQ0a9ZMnB8VEBBAvXr12L9/P6dOnfKxt1mzZuzevRudTkdUVBRms5msrCyxWwsgIiKC/Px8jh49il6vp379+hw7dkxMlCn3vOaaa0hLS8NoNGKxWKhVqxbp6emcPv3nuRM//PCDmKxSWLx4MatXr2bjxo3Anzuz8vLyxDUej4etW7dy7733snr16kvKG71ed0UOAf23kRphIVUFXS0cMqjmwbNqlVUt5MvfoenvempoShu1oaeG5pXUm3C/Gi2W9tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaqOa7irz6rqenvempoShu1oaeGprTRP/W0xFU5YfX5559jt9t55513CA8PB6p3Kb344ouMHDmS2NhYrFYrZrOZuXPnit9VVlby8ccf07JlS+666y4A0tLS6NOnD7Nnz2bixIlYrVacTienTp3ixx9/JCEhAYDJkyczf/58jh07JvTT09Np3bo1n3/+ubhHx44dOXHiBABhYWFAdUi+iRMncuuttwKwc+dOhg4dyvbt2xk6dChWq5XTp0+j0+lYt26d8On+++8nLS2N3Nxccc8dO3YwYMAApk2bJu7ZrFkzEWYQ4Mknn2TMmDHodDoWLVrEs88+y6233soPP/yAy+XCYDAwZcoU8vPzcTgc4hwvt9tNz549yc7OJicnh1q1al103rjdHmy2sov+3b8dg8H/D/C70pprdmRz8EQxjeuE0aVF3CXrqHnwrBbzRYs2atFnNTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0MwrKqdEhR0j/pyOV0O+SBv900Yt+qyGpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dlRNWaWlpdOrUSUzsAPTt25cXXniBdevWMXjwYBISEsjIyPD53dq1a/F4PHTt2lV8Zjab6d27N8uXLwcQE1Tx8fHi/6F615VOp2PDhg20a9eO+Ph4du3axf333y+u8Xg8OBwO7HY7WVlZ1KtXD4PBgMvlok+fPuI6ZQfWoUOHxD3tdjvdunXz8clgMAAIn2rXrs2uXbvo27evuKakpASn00l2djYOh0OERFTCEiokJyfzxRdfUFBQQExMDABVVVWEhIT4+Fi/fn2ys7N9dnZdLPJAufNzNRzgd7maR3JsvLJgC0qdvGrbCWYv3cu4EW2pH2u9aL2/4+BZLeSL2npqaPq7nhqa0kZt6KmhKW3Uhp4amlq0UYs+q6GpRRu16POV0Cwtd/Lh93t8wnw3a1B9Jo8l0HQlTPT7dPTHfFFbTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9ron3pa4qqcsMrIyKBnz57cfffdPmdYxcTEiEmqbt268cEHH2Cz2bBaqwfJly1bBlSfU6WcYZWSkkL79u3Jzs6moqKCNm3aoNfr8Xg8PProo6xduxaj0UhVVRWRkZFCPzk5mSVLlnD8+HEGDhzIkSNHiIiIwG63Cxvr1KlDzZo1yc3N5YMPPuD777/HbrdjsViwWq1iJ1aXLl0AsNvtwqegoCBKSkqwWCzing0bNmTXrl3s2rWL1157jezsbCIiItDpdLhcLo4fP05iYiIABw4cYNq0aWzatAmAd955h6CgICIiIkQ6hoaGsnz5cpKSknzSNzg4mLi4S98RI9E23pNVCi43TJq3hVlP97gkzZGDmjJzse9Lrjx4ViKRSCQSiUQi8X8+/H4PezMLfD7bm1nAzMV7GH1rq3/GKIlEIpFIJBKJX3JVTlgVFxezbNkymjRp4nOGlV6vp7i4GIBhw4axYMECRo0axciRI8nNzWX58uXodDrmz5/Pk08+SVJSEqNHjxbnPRUXFxMbG0toaChHjx6lrKyM+++/n99++41du3ZhMpmEfvPmzQH45JNP6Nq1Kz179mT+/Pk+NgI0btyYEydOsGDBAu68807y8vJYsmQJgYGBlJVVh86rWbMmOp2OrVu3Eh8fz4MPPsh3331HYWEhQUFBQqtVq1Z8++23fPDBBwwYMOCc99y/fz+vvvoqu3btombNmrRv355Vq1Zx+vRpDAYDhw8fJikpiS+++IITJ06g11dvxXO7q2cYdDodr7zyymXlj9EoD5U7k6vhAL8roblqe9ZZk1UKLjes351Dt1a1L1pXrYNntZIvauqpoenvempoShv900Yt+qyGphZt1KLPamj6u54amtJG/7RRiz5fKc2c03afRWcKbg/sPlLAKVsFNSOD/1EbryY9NTSljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJKyX03plnWE2YMIHy8nKg+vyoefPm8fLLLzNq1CgsFgtNmjRh+/bt3HPPPeIMq4SEBIqKisSEDYDRaBT3ef/990lJSeGZZ57h1VdfpaSkxOeakJAQtm7dyo4dO7jhhhsoLCxk5cqVQis4uLrzHRkZyfz584mLi+P5559n6tSpPvcExITb+++/T5s2bRgwYAAzZswQPnlr/frrr1gsFoYPH87atWvFGVbR0dFUVlZit9s5duwYubm5AEyaNImXX36ZX3/9laSkJBo2bEheXh5utxu9Xk9UVBS1atUiLy+P7777jj59+pwVVvCvoNfrVD0c9GrHnw/wO5FfyoF9ucRFW4iLCfnfPzgHGTklF/z+ULaNQd0bX5I2qHfwrD/ni1qaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShu1oaeGpr/ZePhk6QW/tzvcV6SP7+/p6G/58nfoqaGpRRu16LMamv6up4amtFEbempoShv9U09LXJUTVnq9nri4uLPOsJowYQI2m018lpiYyNy5c8XfkyZNYvv27fTs2VN89umnn3L33Xezfv16wsLCgOrJr9DQUNatWyeu83g8TJ069azJo/79+/PSSy+J6xYuXMjKlStxOp0Awp4lS5YIfYDPPvuMrKws8bfBYKB27dr88ssv4jObzcaMGTOEhsPhAOCRRx7hjjvuENeVl5dz4MABgoODiY6OZtiwYWzfvp1169axYsUKnn32WXr27MmUKVPE2VRt2rTBbrdjNBrZs2eP0Nq6dSu3334769atE6EKLwa324PNVnbRv/u3YzD47wF+peVO3v92F7sy/lz52Dwhkof/0xxL0MXFlE+oFcqqC3zfMM5KYaH90gzFv9NRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2+qeNWvT5SmkGmy68CNJi1sv3g39YU9ronzZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KqN/was1qC/vOvsqpywOhcXsxvor1x7vmuUCZ8rYcelap35uzOv6969O9HR0UyZMkWcT/Xee++h0+kYNGgQAAUFBZSXl6PT6ejYsSM2m434+Hhuv/12AI4dO/aX/TgTeaDc+fHHA/zeW7TrrJjye44U8O6iXRcdU75L8zjm/bj/nGEBDXro3KzWFfHfH9NRTT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1/szEmLIhmDSLZm1mA2+s1Vq+rPpM22hoo3w/8RFPaqA09NTS1aKMWfVZDU4s2atFnNTS1aqNWuConrNxuN9nZ2dhsNqxWKwA//fQTgPj7XCg7nFasWEGLFi0AcDqd7N+/H6g+AyowMBCj0cipU6fIzMwkPj4egN9//x2Xy0VOTg6tWrXCbDYDsH37dp97rF+/HgCTyeRjz/PPP8/u3bspKCigUaNGHD9+3CckoOLTgw8+yMaNGzGZTDRs2NBHQ7nnkiVL+PLLLzly5AhxcXFUVFQAiDOxwsLCmDt3LrfffjuLFi0Cqnd0vfbaa9StWxeoDitoMpkICwsT52Tl5eXx8ssvA1C79sWfM6Qgz7A6G3+Nh6pGTPkX7m7Pix9v8pm0MuirP7/csuGv6aiWnhqaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShv900Yt+nwlNUcNbs57Z0R0aNqgOqKDfD/45zWljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJK51Oh9lsZtSoUYwcOZLc3FymTp0qJl8URowYQXZ2NsuXLweqz50yGAzMmTOHyMhIGjduzMKFC7HbfUMQBAcHExYWxqOPPsro0aMpLy9nypQp6HQ6DAYDb731Fps2beLDDz/k4MGDTJw4kb59+7Jx40afkH6KltlsZvny5fTv35+UlBRmz55NVVUVBoPBxyeXy8WGDRu47777yM/P54svvsBoNPr4BPDHH3/QtGlTRo8ezffff09mZqbP96dPn2bo0KFUVlaSlJREeno6AQEBjB49GqgOY6jT6ejSpQu//fYbiYmJxMXF4XA4cDqdOBwO2rZte0l5I8+wujD+Fg9VjZjyEREWvnttEL9uOsqOg/m0bBRDr/b1L8fMs/C3dFRbTw1NLdqoRZ/V0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FND0x9tjIiAV0Z1JTu/lOxT9ss6M/d8+Hs6+mO+qK2nhqYWbdSiz2po+rueGprSRm3oqaEpbfRPPS1xVU5YhYWF0bNnT7Kyshg1ahQWi4UhQ4acdU6U2+3G5XKJv61WKy6Xi1GjRjFnzhwKCgpISUnhzjvv5KOPPhK/DQsLo1GjRhgMBkaPHo3RaKRu3brk5eXRvn17unbtSq1atfjwww8B2LhxI19//TVxcXE88cQTvPHGG0LLYrHgcDho0aIFGzZsYMWKFbRq1YqdO3f67LAKCgqitLSUpKQkZs2ahcVi4frrr+fnn3+msrJS2AVQq1YtSktLmTZtGg0aNKBx48YcOHBAfP/WW2+Js7bS09OBP3dfTZ48mf79+wMwffp05s6dy+LFi8nJyUGn09G6dWs2b97MyZMnSUxMvOi8kWdYnRuD4crGL80rKqekwoU10EBM+KVXgGrGlG+fXINe7etjs5VfVlx6b650Ovq7nhqaWrRRiz6roalFG68Gn/ccKeD4KTv1Yiw0iY+8AhZqMx39XU8NTS3aeDX4/N9Pt3Ikp4TEuFCeuv2aK2ChNtPR3/XU0LwabAwJMNA2JVa+H/iZprTRP23Uos9qaPq7nhqa0kb/tFGLPquhqVUb/w1Yrf/yM6wSEhIoLCxk7ty54rOSkhI+/vhjEhISxGcLFiw463cAPXv2ZNSoUeLzKVOmEBcXR2BgoLjuwIEDfPfdd+Ka22+/HaPRSHJyMgD16tXDaDRSVVXF/fffz+DBgwFYuXKlz730+uqMePrpp2nXrp3Qu/7668nJyRF/BwQEAPDFF1+Iz2w2Gz///DOlpdW7YOrUqQNA586deeWVV8R148aN48CBA+JeSpjCTZs2+UzgdevWTWgBBAYG8uCDD/Lggw/yzDPPsHv3bu644w42b97M5SDjc56fy41fWlru5MPv9/iE8WvWIJKRg5piCTRdtN7fEVNei3FlteizGpr+rqeGprRRG3pXQjO3sIzJ87dQWl4lPgsJMjJ+RFtiwi8ulOv50EI6Xm16amhq0UZ/9PnHDZl8tSpD/L0ro5A7J/3KsJ6JXN/uyuxU10I6Xm16amhKG7Whp4amtFEbempoatFGLfqshqYWbdSiz2poatVGrXBVBlPs1q0b69evx2azic9++ukn9Ho9qamp5/1dmzZtCAkJ4ccffxSfOZ1OfvnlF7p16+ajv3//fp9QewcOHMDpdHLttdcC1edJdezYEZPJREbGny+Xy5YtIzExUUwuKedPHTx4UFxTXFxMTk4ODodDnD9lNBopLy/38ennn38GwOPx+PyblZXl49fevXsBOH78OFA9SabX68UEHIDL5aKiogKn03ne9FHst1qt1KtX74LXSf4ZPvx+D3szC3w+25tZwMzFey5Zc+Sgpmetym8SXz0JJpFIJBL/5MzJKoDS8ipenrflH7JIIpFcDt6TVd58vuLw32yJRCKRSCQSiUQikfxzXJU7rIYNG8aCBQvOOsNq2LBhxMbGiuvOPMMqICCAkSNHMmPGDJ8zrAoKCkhPT6dVq1ZYLBZuvPFGEhMTfc6wKikpISEhgRYtWgj9hx56iLVr1zJnzhzmzp1LZGQkeXl5vPnmmz726vV6Jk+ezKuvvorRaCQ4OJjAwEAcDgfFxcUEBgZiMpkwGAx069YNp9NJWFgYdruduLg4ERKwuLgYqA5B2KpVK1wuFxEREZw6dcrn+zp16rB//366detGWVkZgYGBhIeHU1xcLM7NKi0tpU+fPhiNRoqKinA6nXg8Hg4ePMhzzz2HyXTxu3UULvfg3H8jV+LAvZzTdp+dVQpuD+w+UsApWwU1Iy9+VX1YSABP39GG/KJybFcgzKCCFg9C1KLPamj6u54amtJG/7TRX33eefjUWZNVCqXlVew7VkjzhKhL1tdKOl5NempoatFGf/V5yicXnmh+44vtPH3HpYcH1Eo6Xk16amhKG/3TRi36rIamFm3Uos9qaPq7nhqa0kb/tFGLPquhqVUbtcZVOWEVFhbGvHnzePnll33OsHriiSd8rjvzDCuA+++/H4/HI86waty4MQEBARgMBmbMmEFubi5TpkyhV69e2O12cYaVTqejX79+Plrbtm0DwGQyUVVVhc1mw2Qy0axZM3GNy+XC7XYTFhZGVVUVFRUV2Gw26tat67Obyul04nA4iI2NpbCwkLKyMioqKnxC+imYzWaCg4MpLi7GZrNhsVh8tCwWC3q9HpfLhclkwuFwcOLECRITEzl27BgA2dnZ2Gw2zGYzVVVVuN1uPB4PJpOJLl26XGLOgF6vIyLCcsm//7dyIr+UA/tyL+uA4cMnSy/4vd3hvqy0VyvftHgQohZ9VkPT3/XU0LySelei3jkXWktHNfQuVzO74PgFvz9xuoxu11z+Tul/ezpejXpqaGrRRn/zOfN/9PEyckquSD/t356OV6OeGppas/GXjUfZdSiflo1i6NX+yoTPBP/2WS1Nrdl4tfSVtZYvamn6u54amtJGbeipoSlt9E89LXFVTlgBJCYm+pxhdS7OPMMKQKfTMXLkSEaOHAnAzJkz+eCDD3jnnXcIDw8HqieZXnzxRX777TexY6tTp044HA6hU1lZycyZMwkODuaOO+7gySefxOFw0KdPH2bPns3EiRMBOHHiBABz584V51+tXbuWe++9F51OJyak7HY74eHhpKWliXuMGTOGX3/9lQYNGgCIcH633nor48aNA6CoqEiEKVS0Tpw4gdvt5tdffxU+ffHFF7zwwgtEREQA1buwNm7cSFBQ9cPzzDPPsHPnTk6fPs1nn33G+PHjL5i258Pt9mCzlV3Sb/+NlJY7ef/bXezK+HNnVPOESB7+T3MsQRe3iy3YpLvg9xaz/rIOLzYY/P+QQX+3UYs+q6Hp73pqaF5JvStZ76hlo1qa/q53pTTjIi/c8a0dFSzbg3+ZnhqaWrTRX32OrxnC3syi836fUCtUPtP/Mj01NLVm45HsYl6auxlFZtW2E7zz1R+8cHd74mtZ/cJGNfTU0NSajVdLX1lr+aKWpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dtRNWV4q0tDQ6deokJnYA+vbtywsvvMC6desYPHgwAAkJCT5nVW3bto3S0lJ0Oh0JCQlA9c6n3r17ixCEACdPnjzrnqmpqQQEBGA2m0VoQLvdTo0aNXyu69u3L0uXLiU6OhpAnKkVExMjrgkPDyc5OZk//vhD2KGECNTpdD5aEyZMEJNawcFnh47T6/XUq1ePvLy8/5FqF0YeKPcn7y3addaZU3uOFPDuol2MvrXVRWnFhAXRrEEkezMLcHv+/Fyvqz5zKtoaeEXS/mo4ZNDfbdSiz2po+rueGppXQu9K1jvnQivpqKbe5Wo2qR+JQQ/n6vsa9JBSL0K2B/9SPTU0tWijv/n85LA23DNl5Xm/H31ra/lM/0v11NDUio3ek1VC1w0vfryJWU/3uCxt8E+f1dbUio1XW19ZK/mitqa/66mhKW3Uhp4amtJG/9TTEpqfsMrIyODmm2/2+cxqtRITE+MzQdWtWzc++OADbDYbVqtVfKfX60lNTRXXJSYmMm/ePCoqKggMDKS4uBij0ciPP/4odlhVVVXh8XjEbqdjx47h8XjIy8sjMzOT+Ph4ABHmr06dOuI6k8nE6tWrxQ4xqN6dZTAYxHVFRUXodDp++eUXhg4dCoDH40Gn0/lMdp2Jy+Xi4MGDdO7c+eIT0gt5hlU1apw5NWpwc947YzVY0wbVq8EuN92vhpit/m6jFn1WQ9Pf9dTQvFJ6ap11dyVtVFPT3/WulGbOafs5J6ugerDucvIZtJOOV5OeGppatNGffb69V0M++/XQOT+Xfbx/n54amlqycdX2rAu2g+t359CtVe1L0vZXn9XU1JKNV1NfWUv5oqamv+upoSlt9E8bteizGppatVFraH7CSpmAOpOwsDCKi4vF38OGDWPBggWMGjWKkSNH8vvvv4vPlbCBAPPnz8fj8VBcXExgYCClpaU0adKEOXPmEBkZSePGjVm4cCEul0tMHin3qVOnDo8++iijR4+mvLyc6dOnAxAZGSlsjY6O5o8//mDixIn07duXjRs3cvDgQQwGg7ChtLSU5s2bM3XqVPR6PbGxscycORODwUBcXJy4rry8nNWrVwPVYQRPnjyJy+WiVq1aFBQUiPteDPIMqz9R48ypiAh4ZVRXsvNLyT5lv+LxtuHqiNnq7zZq0Wc1NP1dTw3Ny9VT+6w70EY6qq13uZp/Rz7Dvz8dr0Y9NTS1aKM/+nxb36bc1rcpE2auI/1YEUn1wnlpZOr//uFFoIV0vNr01NDUgo0ZOSUX/P5Qto1B3Rtf1j38zee/Q1MLNl6NfWUt5MvfoenvempoShu1oaeGprTRP/W0hOYnrP4qYWFhzJs3j5dffplRo0ah1+vR6/U888wzPtd5PJ6zfpucnEyvXr2YM2cOBQUFpKSk0KJFCwIDA32ue+655/j2228ZPXo0RqORbt26sXTpUp9rAgMDmTFjBtOnT+frr78mLi6OPn36sGLFCp/runfvTklJCdOmTcNut9OmTRtq166N2WwW15w+fZrHHnvsLHtfeOEFGjRoQIcOHS46neQZVn+i5plTIQEG2qbEYrOVX9aZBt5c6RirV1pPDU1/11NDU4s2aslnNesdLaWjWnpXSlOeaej/NmrRZzU0/V1PDc0nb2sj9GQf79+rp4amlmxMqBXKqgt83zDO6jf9HS3li5qaWuwraylf1NT0dz01NKWN/mmjFn1WQ1OrNv4bkGdYXQRWq5WSkrNXaBUXF4vznhQSExOZO3cuAJ9++ikvvfTSWRNUI0aM8Dkrymq1YrfbGTlypE8Yv2HDholrlH9NJhMzZswQ12RmZrJ06VIfrdLSUnr27EnPnj3FdW+++aaPrVarlfLycsaOHcvYsWPF5127dvW5rk6dOqSnp7N69WoefvhhHnjggXNOYF0sMj5nNX/HmVNajdnq7zZq0Wc1NP1dTw3Ny9WT9c7VoXe5mvJMQ/U0/V1PDU0t2qhFn9XQ1KKNWvRZDc3L1evSPI55P+4/71mOnZvV8rv+jhby5e/Q1GJfWQv58ndo+rueGprSRm3oqaEpbfRPPS2h+WCKCQkJPmdVAZSUlJCfn09CQsIFfwdw5MgRn88zMjKIi4sTu6fOpe/xeDhy5IjQqFevHiaT6azrlL+V6xISEjh16pRPqELlOm9bL8anP/74g8cee4ybbrrpikxWSXwZOagpTeJ9Qys2iY9k5KCm/5BFEonk346sd7SBzGeJRCKRaJlxI9py5iJdg776c4nkQsg+lEQikUgk/o3md1h169aNDz74wOcsq59++gm9Xk9q6vnjxrdp04aQkBB+/PFHkpOTAXA6nfzyyy9069bNR//7778nMzOT+Ph4AH7//XeKioq49tprATCbzXTo0IGff/6ZESNGiN8uW7aMxMRE6tSpA0CXLl3Q6/X88ssvDB06FKjeCbZ27Voefvjhi/bp0KFDjBw5ko4dO/Liiy9echpKzo8l0MToW1txylaB3eHGYtYTbQ383z+USCSSS0TWO9pA5rNEIpFItEz9WCuznu7B+t05HMq20TDOSudmtf5psyRXAbIPJZFIJBKJf6P5Cathw4axYMECRo0axciRI8nNzWXq1KkMGzaM2NhYcd2IESPIzs5m+fLlAAQEBDBy5EhmzJhBZGQkjRs3ZuHChRQVFXHvvfeK391www3MnDmTRx99lNGjR1NeXs7UqVO57rrraNGihbjuoYce4s4772TixIn07duXjRs3snTpUt58801xTc2aNRkyZAhTp05Fr9cTGxvLzJkzCQ0NZdiwYRfl0+nTp7n33nsJCAhgxIgR7N69W/w+JCSEhg0bXvnE1jA1I4OJiLBQWGiX20ElEsnfgqx3tIHMZ4lEIpFomW6tajOoe2PZDkouGtmHkkgkEonEP9H8hFVYWBjz5s3j5ZdfZtSoUVgsFoYMGcITTzzhc53b7cblcvl8dv/99+PxeJgzZw4FBQWkpKQwe/Zs6tatK64xmUx89NFHTJo0idGjR2M0GunduzfPPfecj1bbtm2ZMWMG06dP5+uvvyYuLo5JkybRt29fn+vGjRuHxWJh2rRp2O122rRpw8cff0xoaOhF+XTo0CFOnjwJwF133eVzj/bt27NgwYKLT0yJRCKRSCQSiUQikUgkEolEIpFIJJJLQPMTVgCJiYnMnTv3gtecawJHp9MxcuRIRo4cecHfxsbGMmPGjP9pR8+ePenZs+cFrzGbzYwdO5axY8de8Lr/5VOHDh1IT0//nzZJJBKJRCKRSCQSiUQikUgkEolEIpGojf5/XyKRSCQSiUQikUgkEolEIpFIJBKJRCKRqIecsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/ipywkkgkEolEIpFIJBKJRCKRSCQSiUQikfyjyAkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KnLCSSCQSiUQikUgkEolEIpFIJBKJRCKR/KPICSuJRCKRSCQSiUQikUgkEolEIpFIJBLJP4qcsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/is7j8Xj+aSMk/x48Hg9utyxS58Jg0ONyuf1WTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaM29NTQlDb6p96/Ab1eh06n+0vXygkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KDAkokUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8UOWElkUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8U4z9tgERyJTl8+DCTJk1i+/btWCwWBg0axOOPP47ZbL7g7zweD7NmzWL+/PmcOnUKnU5HaGgoQ4YM8fl9bm4ukyZNYu3atZhMJnr37s2zzz6LxWJh1qxZfPbZZxQUFBAXF4fL5SInJweDwYDL5SIsLIzevXuTn5/P+vXrMRqN1KxZk1OnTlFeXk7r1q0ZP348CQkJrFixggkTJnDq1CkAAgICAAgKCiI4OJjCwkKCg4OJiYnh8OHDOJ1O9Ho9gYGBOJ1OEhISeOKJJ+jevTuHDx/miSeeID09Xfhbp04datasyZYtW3j66aepVasWP/zwA2vXrqWiogIAnU6HXq8nPDycm266iZ49e/L111/z+++/c/LkSTweDwBWq5Unn3ySW2+9FY/Hw7Bhw/jjjz/OSuPY2Fjmzp1LQkICubm5PP/886xZs+as6/R6PSaTiZSUFHr37s2cOXM4ffr0WdfpdDo8Hg8BAQEkJiZy8uRJbDYbQUFBOBwOKisradiwIceOHSMuLo6bbrqJjRs3snnzZqqqqny0DAYDRqORa665hnbt2vHNN99w4sQJkeZdunShqKiInTt3otfr8Xg8VFZWijTQ6XTUrl2btm3bsn37dk6cOEFAQABOp5OAgACRzzab7ax7KwQHB1OrVi2ys7PR6XSUlZWddY3RaKR+/fpUVFSQn59PgwYNfPJ50qRJbN26lcrKyrN+Gx8fT2FhIcXFxeIzi8VCVVUVgYGBoiyHhIT4/G7IkCHs2rVL+FmnTh1ef/111qxZwzvvvHNOX4xGIz179uSVV17x0Tt8+DCjR49m//79Qs9gMNCsWTOeffZZWrVqBcDChQt54403sNlsProBAQHceuutPPXUU1RWVvLss8/y22+/UVVVhU6nE9coz5vNZsPhcJzTxubNmzN16lQsFgtPPvkkW7Zswe12C7u8y/7jjz/O2rVree655ygsLDxLq0aNGrz00kukpqby5ptv8tVXX1FaWgpUlx+lnNSpU4fi4mKKioowm82Ul5fjcrnOaZ/ZbMZgMIiyoNRJQ4cO5fHHH2fNmjU+dYT37+Li4ggMDOTo0aN4PB4cDgdutxuDwUDNmjUpKSmhqqqKrl27Mm7cOEpKSnzyRUGn0xEbG8stt9zC/fffzzfffHPOfPHm5ptvZv/+/ezbt0/cU/F95MiRDB48WNSV+fn56HQ6nE4nACaTiXbt2jF+/HgsFgtPPfUUW7duFfliMpmoqqoSz2BYWBht2rTh6NGjHDlyBIPBgNPpFGmamJjIU089RWpqKm+88QZffPEF5eXleDwegoKCcLvdVFZW8tBDD/Hdd99x+vRpTCYTZWVlIs/OxGis7jbp9XocDgd6vR6z2SzKXOvWrcnMzCQjIwOj0YjT6cTtdhMYGEibNm0YP348derUEfac+ZxHRESQlpaG2Wzm8OHDTJgwgW3btuHxeDCZTAwYMIDnn3/ep83Jz88HEHWLyWQCOKvNUdJGyZfWrVvz8ssvizbnueeeo6ioyCf/o6KiRDsKMHnyZL755huRZ1D9zHXs2JExY8aQlJQknvP09HQ8Hg8GgwG9Xk9YWBjx8fFkZWVRWFhITEwMBQUFonwnJSXx5ptvYrFYePnll1m1ahVVVVU+eWE0GuncuTMOh4Nt27bhcrlEfptMJgICAqisrBRtoPczWVJSclZeXnPNNdx33328++677Ny5U5Q1JY+Dg4Pp06cPzz77LO+++y5paWlkZWWJZ0pph7zrRqUe++qrr5g1axbHjh3z8cFsNjNs2DAef/xx3n33XXbs2MGuXbuorKwUekq7esstt/D444/z66+/nrPtSE5OFmmm9E2qqqoIDg6muLhYlHW9Xk9VVZXomxw7duyc5Ruq24XatWuLvknz5s0JDw9n48aNlJeXYzQaKS8vF2mk0+kICwujfv367Nu3T3ynPPtKHRgQEEB4eDg2m42SkhIaNGiAyWTi4MGD6PV68TwqZQ+q28WkpCTy8/PJycnBZDKJZ9hkMmGxWLDb7cTExBAaGkpeXp7ow1RVVWGxWIiKiiI3N5fS0lJCQkKw2+0iH9xu91llzLtcX3vttRQVFbFr1y6fPqV3/avT6QgPDycgIICCggLRLqempjJ58mQWLVok2iHFL4PBQPfu3RkxYgSvv/46O3bsEDbo9XqCgoKA6jqsQYMGbNq0ifz8fDwejyjvMTExANhsNho2bIjH4+HAgQO4XC6hFRgYSEREBLm5ubjdbkwmE8HBwZSUlGA0GtHr9VgsFuLi4igpKSE7O5tatWpRt25ddu/e7VMfKCh92vPRokULAgMD2bVrF1VVVT51hTcRERH/M8282yyHwyHyOygoCJ1OR0FBwXntALjvvvv44osvznr2vVm3bh0TJ05k5cqVwi+z2XxJde2hQ4fYvn07brdb5IHBYOCaa65h4sSJ561rofo5ufXWWxkzZgyATzoofarevXvz5JNPEhUVhdls5vTp0+j1elHmveta77rf7XbjcrlwOp0+da1iz5NPPnnOPidU9wFfeOEFH3t0Oh1ms5nKykruvfde9u3bx/bt2wkMDPTJF4vFQmxsLCdPnhR1ZGBgIHv37vV57hSbXnzxRVF/lpSU8Oyzz/rkC1Q/H02bNmXcuHG0atUKt9vNmDFj+Omnn3zq76CgIPr378+zzz5LVlYW9913n8g/b2rXrs2oUaO4+eab+fzzz/nll1/Ys2ePqD+90el0NGzYkEmTJpGdnc2PP/7I1q1bz3pHMZlM9OvXjwkTJoj3xs2bN4v2VyEqKooxY8Zw8803c/z4cSZNmsTevXvFs+59X+/0WbhwIW+//fZZ5d+73IaEhPj0372JjIzkySef5OabbxaflZSUMHnyZH744Qef+srj8WA0Gunbty8dOnTgq6++4vDhw9jtdtG+63Q6goODuf766xk+fDiDBg06R0mqJjAwEJfLJdqoM9HpdJhMJoKCgqioqCAgIACDwUBRUZFPedHr9URERDBo0CBGjRrFO++8w6JFi86ZbwozZ85k3rx5bN68WdRLer2eNm3aMHPmTJFX27dvJzg4GL1eL8pMYGAgDz30EA8++CBZWVn07NnzvD4mJSXx0ksvibK8bNkyvv76azZt2uRTH3qX5ejo6PNqepfl3Nxcxo4de8589S7LANu2bePFF188q2+vpGHDhg0pKio653OhoJTlwYMHs3jxYrZs2UJ2djZut9unLAOEhIRw66238vjjj1NZWcnkyZNZunSpT1q3bNmSV155hQYNGoh69fTp0xgMBtF3MJvNREdHU1hYSFBQEBaLhezs7LPandq1azN+/Hi6d+/OypUrefPNNzl06NBZdlmtVqZPn067du148803+fbbb88qJ3FxcTRu3Jj9+/dz+vRpzGYzFRUVPu0pVNexERERoh9hMBjE+975UJ6j82E0Gn36d8r1Y8aM4YEHHhDv91u2bKGqqkr0PS0WC5GRkRw7doynn34aq9XKrFmzOH78OIBPOgQHB9OgQQPy8vJEfeXxeER78Z///IfHH3+cgoIChgwZ4lMmlGc8MTFRjDksWrSISZMmiXRQ7DYajVgsFmw2G263W/TtoDr/AwIC0Ov1NGzYkAceeIBevXqxf/9+HnzwQXJycs6bRjt37iQ9PZ2HH374LNsGDBjAhAkTAPj4449ZvXo1hw4dorKbDUD5AACB/0lEQVSyUtzbu66dMWPGeccvoqKi+OSTT0hISDjvdUajkeHDhzN69GgxPlheXs4rr7zCd9995zPuoNfr0ev1PuMcSUlJ57y3TqdDp9P5tG/ffvst8+bN4+DBg6Is6nQ6AgMD6devH3Fxcfz++++i3AcHB2Oz2cSzFBcXx6hRo3juuefOm7bKvc8c91i8eDHvvfeeyBdlDOKxxx5j3rx5fPLJJ+Tl5QGI8m00GkU7n5mZKfoe5+sztmzZkgcffJDp06dz6NAhPB6PyDOj0YjH4yEiIoL27dtTs2ZNFi9eTGFhoU/ZNpvNdOjQgaeeeoqkpCS+/fZbPvzwQzIyMs55z6SkJKKiotiyZQuAeA/wHoNISEgAwOFwMH36dDZs2MDevXvFM6Pk0/PPP0+rVq1E32HHjh2iPtDpdAQFBdGvXz8xvtajRw8xvujNmXW3YtfHH3/MzJkzRVsZHR3NggULhH3Dhw9n06ZNZ+nddNNNjBs3jtDQ0HOmwdWGznOhGlQiuYooLi6mf//+xMfHM3LkSHJzc5kyZQoDBw4UDdn5+PDDD3nrrbcIDAykdu3aBAcHs2fPHkwmEzfddBMTJkzA6XQyePBgAJ544gkqKir473//S3JyMtdccw1vv/02Tz75JACvvvoqBoMBi8WCxWIhJyeHW2+9la+++orQ0FCmTJnC/Pnz2bBhA40bN+app57igw8+4Pjx40yePJn7778fnU7Hf/7zH5YuXUplZSVGo5GAgAAqKiro1q0bJSUlbNmyhaCgILp168bPP/8sKtCmTZvyzTffMHPmTMaMGUNRURE6nY727duzdetWn47R008/zY4dO9iwYYOoEE0mEwaDgcrKShITE8nNzaVOnTqYzWYOHDhAeXm5mAxyOp14PB7eeustDhw4wLvvvntW+kZERFBYWEhkZCTLli3jzjvvxOl0cuTIETHBYLVaKSoqwmg00rVrV4qLi9myZQuNGjUiIyNDNHahoaGUlpbi8Xi46aab+OGHH3A6nbRt25a9e/diNBopKyujqqqKm266if/85z+sXr2aOXPmEBkZSWFhoWhYTSaT6EhbrVaio6PJyMggIiICi8VCfHw869atE9/37NmTxYsX43K5CAoKory8HIPBgNVqxeVyYbPZROerpKSEkpISmjRpwp49e2jcuDH33Xcfs2fPJi8vj5EjR/Lf//6X5s2bs2PHDjHQ+uCDD7Jnzx7WrFlDREQE8fHx7NmzB4AGDRqQnp6OxWLh9ddfZ9WqVSKfn3nmGeLj44mKiuKnn34SA9I6nY6dO3ficDhE+WncuDEHDhxAr6/eZPvoo4/yxRdfkJyczMyZM0W+ffTRR7z22msAtGrVirp167JkyRICAgIYNGgQX375JSkpKZjNZnbs2IFOpyM5OZnKykoyMzPp3Lkzs2fPFs/n9ddfT1FREQ0aNODo0aPodDpcLhctWrTg8OHDLF68mJ07dzJ69GgxsGaz2cRgZ0BAAHa7nWHDhnH48GE2bdpEzZo1ycnJEb4FBwdTVlZGixYt2LlzJ7Vq1SInJ0d8rvybnJxMQUEBoaGhZGdnU15eTrNmzdi9e7foYCsTTB07dmTFihUYjUYcDodP3ns8HurXr8/x48e57rrrWLduHZWVlXTt2pXNmzeLwfi2bduyZcsWoqKicDqdlJeXiwE5l8tFSEgIbreb8vJyLBYLAwcO5LPPPhMDAddccw1r164lMDCQ1NRUfv31VzHhFxgYKAbkxo4dy1tvvUVFRQUtW7Zk586dADRr1oxDhw5RXl5OUlISjz32GNOnT8fj8ZCfny/yJTMzE6jufCYkJJCVlQVAmzZt2LBhg8gXpa4wm80YjUYqKyu59tpr2bRpE5WVlWLip3Xr1uzfv5/ExET27NlD//79+fnnn+nXrx+LFy/2eVHT6/U0btxY5EtBQQGRkZE0a9aM77//HrPZLMqxTqejQYMG7N27lwYNGoiJxKKiIpGmDRo0EPmSlpaGy+Vi4MCBLF68WHQ6XS4XJpOJUaNGMWfOHFG3NG7cmPT0dAIDA0lJSRETg+Hh4eTk5Ii/lQHhnJwcevbsyYoVK0hISBADdMrL3Jtvvsmnn37K8ePHSU1NZenSpVRVVYlJNu+Jj9tvv53HHnuMfv36YbfbiYyMpHv37nz99dfo9Xrat28v2hwlHZX0U/QAnzYnLi6OQ4cO4XQ6qV27NgAFBQVYrVbR5kD1S215ebl4IR49ejQffvghAwcOxOFw8PXXXwN/dsTtdjt6vZ6oqChKS0uZO3cuI0eOpKioiM6dO/PHH3+ICQblmWzTpg3t27fngw8+AGDo0KHk5uaSlpZGREQEkZGRFBcXU1hYiMFgEM+JwWAgNjaW48ePi3YgMDCQiooKoqKiOHXqFAEBASQnJ5OcnMw333zj80zWqlWLEydOiPJRr1498WKvtNd2u12UR6vVKl4MU1JSiI2NpVatWnz88cdYrVbxW71eT7169cjMzCQgIIAffviBnTt3MmbMGJKTk9m3b5+oU5U2SK/XM3jwYH7++WdSUlJEu9ypUyeaNGnCggULcDgcDBgwgGXLlolBAO8Jc6UuU9LMYDAwcuRInn/+eSoqKsSzpSzIaNq0Kdu2bcNoNPLII4+wbt06Nm/eTLt27cjMzKSgoACXy0WzZs1Em/XUU08xevRoSktLue+++5g1a5ZoO3U6HZWVlWKSPDMzk+DgYBo2bCgGpAFq1apFRUUFnTp14scff8RoNDJ27FimTp1KVVUVvXv3Zvny5bjdbuLi4igoKKCyspLrrrsOs9nMzz//TL169aiqqiInJwePx0PXrl1Zs2YNOp1O1LXl5eUivWJjY8nPzxcTXDVr1uTkyZMA9O3bl82bN4vJJmViWRkgtVgsPPzwwwC8/vrrhIaGMm3aNNGn7Nixo6h/b731VtLT09m2bRsGg4EpU6awZcsWvvnmG7p168aqVauIiorCbrdTUVGB2+2mZs2aFBcXYzKZRP3gcrlEu6zw8MMPs2bNGnbt2kXr1q3Zvn27sDc2NpasrCyCg4P573//y7PPPktpaSm1a9fm9OnTVFVVERMTIwYawsLC6NatG0uWLBEDws2aNWP79u3079+fpUuXYjAYmDRpEgsWLGDv3r0kJyezf/9+wsLCcLvdBAQEcOrUKYKCgqhRowYul4uCggLRZiUlJbFt2zaxuGLatGlMnjyZY8eOcc0117B582YAevXqRX5+Pn/88cd50ywmJoahQ4cye/Zs0d/Mz8/H4XCI/mR5eTlxcXG0a9eOH374gaqqKp544gk6duzI66+/zt69e7Hb7RgMBp8JJGUAyWQy0a1bN4qLi9m4cSMmk0m0w8XFxQQFBZ23rj0Tpa41mUzUqVMHj8fDkSNHiImJEXVkcHCwqGuVut77FTwgIAC3280tt9yCw+Hgm2++EemwbNkyMjMzRd+8vLycxx57jFmzZgl9ZQLJ4/H41P1DhgwR/bC6devSoEED0tLSiIyMZOrUqeJ949prr2Xt2rU4nU6MRiOdOnVizZo1dOvWjdjYWGFPSEgIx44dExNOFouFJk2a8P/+3//jmWeeEfnSsmVLfvzxR1GWGzVqxPPPP094eLhYmKVMzttsNkwmE2azmcWLF1O3bl1GjBgh8sV7MDsgIIDQ0FDKy8tZvHgxzz//PBs3bhTtsbKgTGmvmzZtyr59+ygoKPCZbNXpdFitVqqqqigrK2P69OlMmTKF9u3b89tvv6HT6UT/U9FWJpwCAgK45ppryM3N5ejRo1RWVqLX67FardjtdjHw2LZtWw4cOEBoaKjP4JlSdyt2TJ8+nfj4eBYsWMDx48fFQh2lHdbr9RiNRoxGIy+++CJPPfUUAQEBVFVViUHMkJAQsaCnffv2TJ06ldTUVPR6vUgPg8FAZGSk6L9Mnz6dPn36AHDvvfeydetWPB4PDRs2ZPfu3aL/WbduXfbv30/Tpk3p1KkTX331lZiItNlsGAwGOnbsyOHDhwkMDCQrK0uUn6ioKJ8JvejoaE6dOkVoaCglJSUEBgaKPqSy8Ke4uJjg4GAAUUd6PB5iY2PJzc0V7249evRg8+bNwhaA0tJSTCYTLpdL5Pf06dOxWCw888wzREZGkp6eTlBQEC6XS/QpkpKSKCgoEGMIr732Gunp6eh0OiIjIzGbzeTk5DBlyhSSk5O56aabiIuLIy8vT7R1kZGRYiI6KChIlOWHHnqI1atXi7pe6bsbDAZRlj/44AOGDx8u0ikwMBBATEYrZXn//v1iolKn0/lMBgQFBYmynJKSwqBBg8T7aklJibjWuyy73W4iIyMJDg7m5MmToj8aFhbmU5Zr1KhBjRo1SEhI4KuvvjpnPaj4PXjwYI4ePcrWrVvFwi6LxSIWg0VERHDLLbcwc+ZM0fc+cyGcUift27eP3Nxcn4kPk8kkFm/o9XomTJjASy+9RFJSEnv37hUaShkzm83UqlWLDh06sGzZMiwWC06nk4KCAsxmsyhrRUVFXHfddWzfvh2Xy0VpaSnh4eEUFRWJd3Sl7MXFxZGdnY3RaMTtdpOUlER6ejp169aloKBAvEso9ir1alVVlah3IiMjxaIDZRGc0t+tqqqiRYsWfPTRR/Tv35+oqCjS09MxGo24XC7q1q3L0aNHha833ngjS5cuJSUlhSNHjuBwOHzeLR0OB1VVVQQEBBAUFERpaSkul4saNWqId5UBAwawfPlyTp06RXx8PFarVbxDAnTo0IGtW7fy3HPP8dJLL2EwGMT7mN1uF31oqH43rFGjBllZWaLfoix2uvfeeykpKeHLL79kwoQJTJ06VdRR3pPSSl/ltddeo0WLFvTs2VM818p7OCDaqzFjxnDPPffQuXNnvv/+e7GwUSn7Sl3bqlUrTp48yaxZs1i1ahV6vZ7mzZuzdetWDAYDUVFRLFu2jA8++ICPPvpIvOt7axkMBm655RYxvjhmzBh++OEHjEYjkZGR6HQ6Tp48idVqpW/fvhQUFLB+/XoWL15Mr169GD58OOvXr+f48eN4PB4xjnX48GHR3t1yyy18+eWXDBgwgC+//FIs1OncuTPLly8Xz+gdd9xB586d+fTTT1m3bp0YV6moqODEiRPiuRs+fDjXXXcdTz75pBizUvpTyvPVtm1b0tPTadGiBevWrRP1f2xsrBiDaNy4MXv27KF9+/asW7dOvJvDnwvsKysriY6ORq/Xi0ktpZ4yGAw0b96csWPHkpmZyXPPPce1117Lb7/9JsqA8qyYTCaefvppvvjiC3Jzc2ncuLFYxGk0GomKiiIvL4/Q0FCcTidDhw7l66+/xu12i7ozOjqaoqIiqqqqeOihh/jiiy9ITEzE7Xazd+9eUY6GDh3KkSNHOH78OD/88AOhoaHYbDZ69OhBRUWFaEt79erFunXriI6OpqCggMWLFzN8+HDq1avHxo0bCQ0Nxe12i/ozMjKS5s2bM3PmTKHldDopKSmhRYsWYgFAQUGBT5s8btw4lixZIvJOr9fz7bffotfrhX3Dhw8nNzeXFi1a0KhRI0JCQjh+/DjffvstTZs2Zc6cOeetq68mZEhAyb+Gzz//HLvdzjvvvEPXrl0ZMmQITz31FJ9//jm5ubnn/V1lZSUzZ86kTZs2uN1u5s+fz/z584mJiaFJkybi9z///DMHDx7krbfeokePHvTr14/JkyezatUq3nvvPe655x7uuusu1qxZQ8uWLUXn7LvvvmPAgAH89ttvuN1uSkpKqFGjBps2bWL48OGkp6cTFhbGu+++S0lJCS+99BI6nY7777+f+vXrYzAY6NWrl1h9/NRTT7F69Wq2bt0q7N+zZw/XX389ZrOZ3bt3M2TIEJo3b87LL79McXExer2eBx54gPnz54tVFspkBcAzzzyDzWYTq38BMUB86NAhbrvtNg4cOECHDh2orKykRYsW/PDDD7zyyiuiczFt2jRmzZoldHU6HRMmTBCdveTkZIqKinj55Zc5ePAgL7zwAlDdME2ZMoWCggIGDRrEuHHjWLVqFR6PB7PZLF7iBwwYQOvWrSkpKSEgIIDmzZuTk5MjXj63bdvGjz/+yPLly8VnY8eOpWPHjoSHh6PX6yksLCQ0NBS9Xs/DDz+M0+nEarXSoEEDbDabaFwKCwuZNWsWs2fPpmbNmkD1hMuPP/7INddcIzpdTZo0oWbNmrRv3140/Dt37uTEiRN8/vnnpKSksHv3bpHP8fHxzJs3j4qKChYuXEjLli1JSEgQK46uv/56li9fLgaclV1d48aNY9y4caSnp9OtWzc8Hg+HDx/mpZdeEvlst9uZNm2a2LU2ZMgQtm/fzpQpU0Q6R0ZGMmDAAEaOHAlUr95o1KgR27dvF2VZ6Zw6HA7efvttUSauv/56Xn/9dfr164fL5RIvX1OnTmXnzp3odDoeeeQRDhw4wNtvv43JZGLt2rVC7/PPP8dms9GsWTPy8/O5//77eeGFF9DpdBQVFREeHs7s2bN544030Ol0DBo0iOLiYgICAkhJScHpdHL99dfjcrlYuHChmDypW7curVq1YtiwYUD1Kqfu3buze/duunbtSk5ODl26dKGiooKwsDAcDgfR0dFERERQVFTE4cOHARg5ciTXX3+9KG+hoaFkZWVx++23s3z5cuLi4kSHLDo6mri4OGrUqIHb7ebo0aMkJCTw66+/ioGahx56iLKyMgICAmjUqBFbt27llltu4dSpU2JCVRlcUSZ3lMnfsrIy0tLSSE5Oxmw2Y7fbefTRR2nZsiW1a9dm+fLloo7YtWsXUVFRhIWFAbB06VL0ej1JSUns2LEDgEceeYQ9e/ZQXl6OyWQiPT2dmJgY3nrrLQ4ePEhxcbHIlwceeIAXX3wRnU4nBj0bNmzIhg0bfPJFr9eLARKlTKxatYrbbrsNk8kknpv33nuPZ599ln379nHdddexbNky7rnnHnbs2EHr1q1FOgA0adLEJ1/ef/99li1bRmJiohiMMxqNVFRUcOedd7J3714xUK4MnCu7HQCOHTsm8kWn03HfffcxZcoU1q9fLwZJAG677TaxSt3tdhMTEyMGzxwOB9u3bycrK4sPP/yQL7/8kuTkZBITE4HqlfZKHb9+/XrxEjl//nwqKiqoU6cOAB07duTdd9+luLiYRYsWicEsk8nEL7/8QmBgICaTicDAQD7//HM++ugjbDYbFRUVfPTRR4wfP57nnnuOiooKnzZnx44dxMTE0LJlS0JCQvB4PDRv3pyYmBifNmffvn24XC7uvPNOTpw4wVNPPUV5eTmFhYU+bc7IkSMJDAwUaZidnc1TTz3FwoUL+frrr8Xk8W+//cbKlSvF86LsTnv11Vex2Wy0aNGCjh07inoYYNeuXXTt2pUdO3aIgd/w8HCMRiOzZs2iefPmIu/Lysq47rrrcDgcYvApLCxM2FVSUkLt2rWpqKjgkUce4dSpU8TGxhIZGcmOHTsYMmQISUlJ4pls3rw5xcXFjBgxQkxqKZNAShtWWlrKTTfdJNovZVdH165dWbVqFUOGDEGv1+N0OqlZsyZ6vZ7Y2FgxMdqjRw9cLhezZ8/m7bffpkePHmJlc2BgIM2bNycpKUnsyPvmm29YsmQJqampos164403ePrppxk3bhwAP/zwA23btsVoNIpdN+Hh4dSqVYuysjKSkpJEmr311lsMGDCAhx56SAxmAtxxxx08//zzbNu2jSZNmohBv/z8fOLi4jh69CinTp3CZDIRERHh02YpA5JKOa1ZsyYBAQE4HA48Hg/XX389YWFhZGZmotPpKC8vZ8+ePUyYMIEBAwYA8Morr1BRUcGvv/7KXXfdhcViERMkNWrUYOXKlQQHB9OlSxdycnKoqKggJiaGmjVrkp6eTocOHTh27BjZ2dmi7d+9ezeJiYl4PB7WrFlDeXk5PXr0YMWKFbRs2VIs2rjvvvvo27ev6AM2adKEZ555hoKCArEbRGlvlQH4srIyBgwYIF6Wi4uLCQsLE31K7/p34sSJFBQUUKtWLXQ6Hdu2bRMDZytXrsTj8dC/f38xOGgymTh58iTDhw+npKRErDxt0aIFS5YsYezYsaIPtXTpUo4cOUKdOnXYtWsXOp2O8ePHExMTQ25uLjExMVRUVPDTTz9RWlpKdHS0GCCYNm2az2plm83GyZMnRZpVVVUxffp0+vfvL9IsNDSUTZs2kZ6eTtOmTcWu/LFjx+J0Ojl16pTYxf/ll1/yzTff+KRZdnY28fHxPmkWExNDYmIimzZtQqfT8cADD/DOO+9QVFR0wTSbO3cuXbp0EW3WoUOHqKioYN68eWRnZ1NRUUHt2rXJzc1lzJgxTJgwAZ1Ox6JFi8RgioLRaKRdu3aibfB4PAQHB9OsWTOWL1/Ohg0bxET/p59+ysKFC/F4PJSXl59V14aFhYlV4lC9w9G7rnU6nUydOpWjR48yfPhw8vLyxOBccXGxqGvbtWsnnunw8HB0Oh0tWrTA5XLx2Wefibp27ty5PProoyxcuBCo7u9XVlaKPq2y0EZpi8eOHYtOp2PFihWi7l+2bBk6nY4777yT48eP8+ijj4q61rvub926NSaTiV69egGQlZVFUFAQa9eu9bFn6dKlrF27Vqwsr6ys5J133hETN0q+bN++nS5duqDT6fjhhx/EBL+C2Wyma9eu2O12kpOTxeT67Nmz2b59u8gXZUfXrbfeClT3TU+dOkVwcDCvvPIKGzduJCIigvr161NZWcl3331H69atiYmJISgoiHXr1lFcXCz6WzVq1BA2KM9gUlISb7/9NosWLSIxMRGHw0FJSYmYmA8ODhYDycHBwSKqwoABA0T74Xa7WbhwIePHj6eqqorKykrWrVuHzWbD6XSKXaB169alsrKSZs2aERMTg8Vi4e233yY5OZlHH31U7MQymUwicoWyuMZgMPD+++8TFxfHQw89JHab16hRg9LSUu644w7RR5g+fbqYHImJiaF169a88MILFBQU0LRpU6Kjo3n77bcB2L59O2vXrhXvmAcOHBCr251OJ3379iU8PJy9e/eKNvD//b//R0lJCcOGDUOn07FhwwYee+wxMjMzufHGGzl+/DgDBgwgKSlJ9LV0Oh2NGjUS73JQPSHz+OOPizqyc+fOuN1u/u///o+ysjJq1qwp0njQoEEYDAbi4uKwWCysXbuW++67j0OHDtGlSxdKS0u59dZbad68OVA9iO92u1m6dCl79uzxWZCyaNEixo0bR3Z2Nq1btyY9PR2bzcY777xDo0aNxOQAVC9e+e677zAYDEydOpUlS5YA0LBhQzEJoCy0fOihh8TguDJR3LJlS5/J0g4dOoiFsEpZViaBQkNDRVn+9ttvzyrL3jvNGjRoQN26dcXElcPhEGV59uzZmEwm9Hq9z45a70VAykRNZGQkQ4cOFe97Ho/nrLJ8/PhxnnnmGTZv3izKstIfa9iwoZjMDgkJYeHChSJii8fjYfz48SxcuFC8QxUUFPDhhx9yzz33oNfrxSSHshtAmZBavXq1GOx2u90EBQXRoUMHnE4nwcHBREdHY7FYmD59uliUp9ChQwdiYmIIDw/H6XRy9OhRvv76a/r06UNeXh5BQUF0794do9FI9+7dKSoqIiEhgQ0bNuBwOMRizHr16mE2m3E6nTRr1kz0/7Kzs+nSpYuY9HvppZdo2bIlcXFxYvJG4cYbb8RgMIj3NJvNRvfu3cnKyqJHjx6UlZXx5ZdfsmrVKoKCgkQ/dM+ePXz22WfY7XbCwsKoWbOmGMBX6mcliklaWho9evQgPT2d1NRUsZigefPmeDweUWYqKyvp1KmTeE9r27Ytubm59OnTh4ULF3Lq1CkGDRrEzz//jM1mo127dkD1zqPCwkKaN2/OtGnTAMQ7hd1uJykpSeSjMvmWlZVFRESEuParr77CYDDw7bff8vLLL5OamsqMGTOoqKjAZDKJRSNt2rQR0QGMRiO///47Y8eOFc+P2+3mq6++4oUXXhALFFatWkVRURHLly/n9OnThISE0Lp1a3777TesVivx8fGirq1ZsyY1a9Zk5cqVuN1uxo0bR7169QgNDcVkMlFYWMjnn3/O7t27Rb0VEBDATz/9JCaonE6nGB90u9389NNPoq5dvHgxpaWl1KhRA5vNxqhRo3jjjTfEOAdUL+R66KGHxA5qqN4t8/rrr3Pq1CnMZjNz587lzTff5MSJE9SrV4/y8nK+/vpr3nnnHQYMGEBQUBBVVVUMGDCA5ORk1q1bR3x8PAEBAURFRbFgwQI8Ho/Y0VWrVi0xFqBM5A4bNgyz2SzGIrZu3crIkSNZt26deM+dP38+s2bNomXLltSqVYs//viDIUOGsGPHDh544AFq1aoldpsr72t16tTh1KlT3H333aJMKHlXv359tm3bhl6vZ8mSJWJyTafT0b9/f3Q6HbVq1aJfv354PB52797NkSNHeOCBB8Rk1YIFCzCbzQwZMgSz2SzGMBcsWMANN9yAw+EgOTkZgPHjx4s+4sKFC6moqOCdd97h9ddfx+FwMHDgQKB6bEcZi/38889F3X/fffeJPAJ4+eWXefbZZzlx4gShoaHMnj2bRYsWkZubS+vWrSktLeXrr78mNTWVgIAAatWqJcbXKisrOX36tBi7+vLLL5k/fz4FBQW0bt1atMm///672NH+/PPP89RTTzFmzBiWLl3qY5/SDrz++uuMHDmSO+64g2eeeYYxY8awbt26C45/X03ICSvJv4a0tDQ6deokOlBQvZLW7XaLXTLnYtu2bZSWllJaWip+bzab6d27N1lZWeL3aWlpJCUliW2YAKmpqVgsFsrLy+nbty8Oh4ONGzfSr18/0dCGh4fTr18/8vLyiI+Px+Px8Nlnn+F2uxk1ahTh4eGsXr2a8PBwOnXqxLFjx3C73fTt21f4NHjwYLGi4+abbxarRRMSEvB4PGRlZTFo0CC6du2KwWBg9erV9OvXj8zMTLG9tm/fvgA+nT+F9evXi1V1APXr16d+/fpCT9H4+eefxQAMVA/+KBw7dgyHwyHC1AAMGDCAvn37Cm2Px8OGDRtISkqibt26QPXW3OjoaKD6BbZv3764XC7++OMPWrZsyZEjR/B4PPTt21c0iG3atGHgwIEijJuymmH9+vWiA35m2VDS3ul0EhkZSVRUFAApKSli1Xpubi7h4eEEBweLfFZe2pR8NplMNG7cGKhuJHr37i1CIMbExJCXl0dCQgIJCQnEx8cDcP/995+Vz0ePHqVXr14sX76c0NBQOnfuzE033cThw4fJyMgQL6Jut5s+ffrQokULAOrVq0dqaippaWkA9OvXj6NHj9KhQwcyMjJEh1EJlbJp0yaxLT87O1uUK8W/wMBAfv/9d9q1aydshOrQGd5hD73ztKqqivz8fAwGAzt37hT5e+edd+J2u8XAtFIWATEJ2bx5c0pLS+nbt68oG8eOHaNjx46sXLlS7OhRwv6kpqYyePBgsWLZarWKl8HOnTuzbds2+vTpw+7du8WAvbKi+tprrwX+HIhs0qQJVVVV9O/fny1bthAZGUlAQIB4ftPS0khNTSU8PJy4uDgAkU7Kah69Xs/x48e58cYbycnJEavXlEmXo0eP0qdPH9LS0rBarXTr1g273Y7b7aZHjx4EBwcTHByMx+MRk6dKGQsODhbhs7KysnA6ncIe5ZlWVukqz3RWVhaZmZki9MiBAwfo1KmTeJYTExNFvgQGBnLttdeKfElISBD3u1C+eIcPU/LFarWKgQTvfFm+fDkdOnTwWfmn1MNKvii7UZTJwpYtWwLQu3dvkS/BwcG0bt1aPL+pqalih413vjRr1gyPxyNWunbu3Fms5vXOl8rKSlEHRkRE0KVLFxGyrGfPnmLFeXJysnimFHuVFXcJCQlUVVWxceNGhg4dClR3ZsPDw+nduzfl5eVcf/31YuDK7XaTmpoKQE5ODuHh4cLuyspK0ebUr19f2KOEmvvll1+IjIz0aXOUdFTCSSrhBwsKCnzanIEDB1JQUCDaHLfbLcqY0uYcOnSIJk2aiHAibrebfv36iTYnJiYGj8fDypUrxX09Ho/Y4VezZk3Cw8Pp2rWrWJlntVpFfd2/f3+hpYQ48Hg8oi3LyclBp9OJ5w5g4MCBYqFCWVkZzZo1AxCLDuLi4khPTxf5pryIKWF0UlJSyMnJITQ0lNWrV4v69+jRozRp0oTS0lL+85//iAmijIwMMegaExMjVsQ1atRI6CckJHD06FHxDKalpdGhQwe2bduG2+2mbt26YgdK7dq1qaqqYvny5WRmZornQ0kzpc1SVr0rbZZSN3qjlFWPx0NBQQGtWrUSuxL69u0rQk4oExBBQUGinKxZs4bU1FThW8+ePUUaKW3WypUrxcBiXl6emPxISUkB/myzvvvuO6xWK126dOHo0aMYjUZSU1MJDg4mKiqKm266SYRnUeo1pc3q2rUrAPv376dJkyY4HA4GDRoknv/OnTvTsmVLnE4npaWlPPvss3g8HvGiqtg4fPhwTCaTuHe/fv0oLCxk8ODBWK1WkXbXX3895eXldOrUiYCAANFfaNy4sbgmODiYtWvXivpXKUtQPWin1IdKn8+7/lXSUqlb+vbty/Hjx8nMzGTgwIFUVVWxcuVKAFH2EhMT2b17N3FxcVitVlH/KgNIygCh0qcaMmSIsPXYsWOUlpaSkpIiBlkHDBhA8+bNcTqd4nnZsGEDycnJDBgwgH379tGpUyduuOEGsTNFKUfbtm1j6NChoq5et26dqLdSU1OpV68eBw4cwO1206VLF2GHxWIRZSs0NFT0a8PDw4mKihJppkyQnZlmSl/rYtIsISFBtKFKO64sBlHud8sttwg/lDbr6NGjfPXVV5SVlYm+kFLXKuGjoXoXgNK/1+l0REdHi7o2ISGBpKQkURd517XFxcX06tVLTFgpK6cv1L9XdrM0btxY1LXKgGbnzp1FXav015X/GjZsKNJd2emjTFyEhoaKeuPGG28Udf/NN9+Mx+MRK+S988X7fWPgwIG43e7zvm9UVVWRmZnJDTfcIOp+xR4lbJDyfCttoGKPki8nT54Uz/TRo0dZvny5qMeUfBk5ciRut1v0V4KCgkhLSyMtLQ2DwUB0dDR169alrKyM2267TeRLUFAQcXFx4t3OZrMRGhoq8rBfv34UFBSIsOxGo5EGDRrgdrtFfijpnJiYiMvlEgsl0tLSxLXKxLyyGywxMVG8Y6xZs0b0GwCx+0RpL5UJ8ZiYGI4fP87p06ex2+0MGTIEt9tNfHy82Ilx+PBhsrKyWLt2rUjvyspKMenfuHFjQkNDCQoKEv3ONWvWEBkZSVhYmCiP7du3FwP7q1atIiQkhIqKCgoKCujTp4+wTbm3ct+0tDQxoVezZk0cDgdt2rShSZMmREVF+Txjv/zyC506dWLr1q0kJSWJcuZ2u8WEgcPhIDMzk169erF582YxaN2wYUO2bNniM/gHiBDSVquV7du3Exoayu7du8U7m8Ivv/xC06ZNOXr0KC1btsThcIj+nrI79bbbbhO7HI4fP05AQAC///67aLszMjLOyqumTZuKPAwPD2fRokUA4t2jrKyM8PBwmjVrRkFBAWvXrhV2n1mWlfpOr9eLvo2SV97vvcq9lbK8fv16oLqPfaGyrGgrz5V3mxEcHCzK8sqVKwkMDPQpy0p4rjPLcnZ29v8sywEBAXz//fdkZmaKsqz0r1u3bi3Kcn5+vk+f0+Px0KdPHxISEmjSpAnR0dEEBgbicDjo27cvq1evxuOpjmqgvOe2bdtWPKPeIa3Ky8sZPnw44eHhxMTEiInlwsJCkpOTxYRYSEgII0aMICMjg65du+LxeEQ/TenjZWdnc91115Gamip2ZcXFxVFRUUFycrIYg9i/fz+dOnUS5QAQYxZKeD6r1Sre0TZv3uxzrcVi4dixYzRt2lSUVaPRKCbwld19CQkJREZGkpqaKt6fXC4Xa9asEX1OZSfRzTffLMJqKn3B4uJiatasKeq4sLAw4uLi2LJlC8nJyT4h2bKysujcuTM33HADf/zxBykpKT4hYe+++27RTiuLvEpKSsjLy+P666/HbreLcp6ZmYnVauX//u//AMTuPGXR4rBhw0Q52LVrl3iGsrKySElJETsSld1lQUFBop1WyntaWhrbtm0jJCQEq9Uq0uvM8rlx40YMBgMbNmzAbrfTp08fLBYL9erVIzw8/Lx1bY8ePVi+fDl9+/ala9euhISEkJaWJt59Q0JC6Ny5M+Hh4aIeBET/w+PxiMUBt9xyC/v376e0tFS8lyu7kHr37i3qBKVeqFu3rtjhA3/2n5TJ5k6dOoldO97vg/369ePkyZPifWft2rW4XC6OHj0qQj+Gh4f7LIgrLi4W7ZvH46Fjx45s3ryZTp06AX++Tyi73Ww2m8+4ar9+/Thy5Ih4FktLS0VYfu++ldFoFHX+3r17adSokUgzZTIzPDyclStXsnHjRnr16sXGjRtxu90UFRWRlJTE8ePH6dSpE1VVVWJBkNL3a9SoEe3btyc1NZXNmzeL8Sbl/spxCD169BDpquRbUVERrVq1Ijw8XIRLfuyxx0RZVtLszHxSogedqZeYmEhaWhp2u13sglfyKSUlBZ1OJyIVrF69WuzSPHDggNhJpVyrHAWQlZXFl19+KdpW5TrgnPadCyXPzheS+2pDTlhJ/jVkZGT4TCZB9YtUTEzMeeOYKr+D6gFF798r5yIpYeLOpa/T6USlnJCQIELkJSQkUFpaSkVFBRUVFaIij4mJISYmhoMHDxIVFUV4eDgNGjQQNigdAEVPuafye2UXgbLCLzk5WazYadCgAQ0bNkSn05GRkSF+462XnZ3N/PnzsVqt4nOn0ynOW1EG6hQ7FL0TJ04QExMjzq5S0kHZ5eU9Sei9qqhTp078+uuv4j5KY+GdjgcOHBArMNatWyfCE7hcLpKTk8Wgw7PPPivitBYXF4uXzIiICBo2bEhAQAAZGRlnnRei5LFiozIAuG7dOrG6WglFqOSpd8hEpTOlrGIyGo2iYXS5XCQmJopJQO+wEMr3gDivwzufPR4PJSUllJaWijNXlDwrKioSE4oej4fbbruNb775RpSBxMREoaV0MJRyqjB16lQ8Hg8vvviiOEcHEHYkJyeLlXdOp5OsrCzxXXZ2Nh999BHnQrFRefF56aWXgOrn56effhLPm3dZhOrzq7wH6BISErBarSKtgoODRTqGh4eTlZWFy+US6eLxeES5VlbqKOH1lGclNjYWo9EowmV4/+sdlqRTp044nU4RIkGxR9Fv0KCBWP2plGclvIOSjh9++CFQ3TlSOvNKGVC0GjRoQGJiorDD5XJRVVWFw+EgKipK7LRStq0XFxdTUVEhXgJPnTol7FFs837pUO6j5CtUP2f16tUT1ykDpUoYnTPzRfH/fPmiTOafmS9BQUEilIESvkbZPRAVFeVjp1IPK+ngHb85KipKTA7GxcWdlS+A8F2n04m0UupdJZa0on3nnXf6aHufXXJm/a683NasWVOEHVXKW0lJiVjx5XK5KCoq4ssvvxR1/Jn1q7L7JSQkxKeOV3ZYKaEWlUF08G1zEhMTRXif6OhoTp48KezxTscaNWqIwSkljZXrlDanbt26PufcKIP3Z7Y5CQkJPrHWvdscxZ+8vDyfNsftdvvY1LBhQ7Hj4OTJkyL8n7eW1WoVAyMBAQGizQoPD6dJkyZix4T3rjWoHrzX6XQiJK3yQquU1dLSUhISEkS9oTw3NWrU8Akj633GnTIgrUyyKPW7MvielZVFo0aNiImJITg4GJPJxJEjR0SaKSFjFe0dO3YwYsQIYmJixMuMMiCqhOVT0iwxMVGstFfSLCMjQ0zceuOdZkpYEagO7aacQeR9Pt65nhelzapfv77w88w2SxkYUnZGKPdR2iylXVDaLKX9rqqqoqqqyuc5sNvtYld0Xl4e8+fPx2w2U1BQIMpTdHQ0Bw8epKKigoSEBPE8WK1W8Yzk5OSwZMkSsTJQuU5JR8Wv0NBQEhMTRYib77//HqiuA6dNmyZ2hHrXRdu3b2fFihVERkZSWlqK0+kUC1Dy8vIoLy8XO3W82wOlvvQ+d8i7/lXSUTlDy7v+9a6jvftU3iHhlGdKqSuVsgxw8OBBoLqMhoWFiTRr1aqVT54odVq9evVEKB2lvQoLCxN5r6RZRkaGyBe3283BgwdFiBXvemrixIliQK+wsFD0a5Vy53K5xI7VoqKis9Js+/btQuvll1/mp59++p9pBog2VJnMUJ5hpaw3adJE9De826wlS5aIPqziX05ODkajkX379uHxeMjNzRVnogFn1bWNGjUSYbiV7xVuvvlm4b9yJhycv3+v7DJXQrEq9ijPgGLjqVOniI6Oxmg0ip0oCpMnTxaTyVDdjih9Ku+632w2i3SIiYnxqWu96/5zvR+c+b7h8XgYMGCAqPu97QFEvigo9jRp0oTg4GACAgLIzMwU/bXdu3eL9ljxuUWLFsTExIhBdOUctYMHD4q2V6mTEhISRL4oYbiVOtflcrFnzx4OHDjAvffei8ViEfmiTHqYTCbCw8PFMxEdHU1QUBAmk0nU2Uodr5yP510elJ3Xdrsdu91OdnY2hw8fFvdR0l1pp5W0V8qrshNDKbdnnuuh3Fs5swfgs88+o1atWrRv356Kigrsdjvl5eUcOnSIvXv3ilCmp0+fpkmTJnTt2pUaNWpgtVo5deqUz7vKa6+9xsMPP0x4eLjPvZX7hoaGivP+oHoxUEJCgljcoSxQzMnJIT4+nkOHDhEeHs68efO4/fbbiYmJETuKlUlSZUemEkI6ISEBp9N51plJ33zzjaiz8vPzqVGjhjgDUykzoaGhnDx5UqyaV9pHJby50qYpz3FRURGbN2+msrKSkpIS0tPTRTtWv359kVfKxAf8Obj3xRdfANWLqJTJFUA8g8rCOm8/Fi1aRHR0tAhBXl5eLvo2Sn9EKctRUVGinChl2Xuh1e7du8nIyGDcuHHExsb6lGWXyyXOg6yqqqJJkybUqFFD7BJVynJ+fr4Ij+hdlqG6n6qUZY/Hg91uZ8uWLSKsonK9d1kODg4WE3RKWVbaw9atW/uUZWW3TFBQkE8ECKU8Kc9tQkKCqJO7desmdj/Gx8dTo0YNEbrVOyJMQkICDRo0oLy83OccKmWsQFmAMHnyZKB6saSSdkajkaysLOrXry8WJCQmJor89D7D2+l0ikmwhIQEn3cYpT0sKCggOjpavJMr/Xjvd0WlblXKLVTXCUq7cPjwYfH+VlVVJRaOQPV7zpEjR8S7rtLWl5aWotPpxISIghIe+9ixY7Rq1YqMjAyf93+drvrswRMnToi6Pjs7m/r165OVlSXS2btvk5CQQI0aNXA4HMTHx4v+V61atejSpQvr16+nVq1aot32eDwiEg1ULzCuUaMGQUFBZGRkiGcoIyNDjCF59zdMJhM///wzgIh8k52dLZ7NsrIy0tPTRdhTpZ0LDg4WY0DKRGJCQgI2m42DBw/6pL93XRsVFcXWrVspLS1lwIABJCYmimfWO9T8ypUr6devnxjnUBaYZmRkYDAYxM6isrIyJk2aBFRPsBuNRrFbXUlvqB5DWLx48VnnGSnh7pWQjZMnT8bpdLJ7925OnjwpIqco7bTyvqOMc1VVVYmF08p1Sp3wySefsGXLFmFP7dq1ycjIoHHjxqLfqdfrRZ/TZrOd9d6slCWlrlCee+VdV+mLKCE+09PTadSokWgflYUypaWlfPvttzidTkJCQnzaI6VPrqSjzWYjLCxM9OOUvojy3Cn96rKyMmJjY9mzZw8ej0eEnR83bhwdOnQQfij9j4yMDJ/3dOX59h5jU647czG80nYoi8GVts97jHPr1q1iIaPyLqD0mZ1OJ2+88Qb3338/6enpJCQkiPo0IyNDHBUSEBBAz549adGiBffeey9Hjhw5y75NmzbRqlUrmjVrxu23386XX37Ju+++S48ePYRvVztywkryr8Fms/lMxCgo8bAv9Duz2UxJSYnP75WVuyEhIRQXF4sVfGeihIwICAgQ97FarWLFRHFxsdBVtoSXlJQILW/7lAZeOTxe8cnbruLiYnFdaGioeKFWrnO5XD739NZ79dVX6d27t1gdBPg0LIpN3pqKnhJOTfm8oKCAGTNm0LNnT9EQwZ8TOzExMbz33ntiRZnSQCqrQ5XwXX369OHmm29Gp9ORk5PDXXfd5TO4pfDSSy+Jv/fs2SNi3QYFBREWFobBYKC4uPiceW2z2USaORwOevbsyW+//SZWHcOfk27KpIKCsupIGRhPTEwUK0yUkIJKh/nMw+aVDpaSfmfm89atW4mNjaW8vNwnn51OJ926dRMdopSUFBYsWCDyy2q1+pQ15Z5KWb7tttuYNGkStWvXFg2/gnJegLJSWnlx8Lbx1VdfFYNJ3h1J7/tBdciFDh06iLj448ePx+12i/KnlB1ATHq4XC4xqOmd7vBn7GolbrCSvso9lU6Lcq13uA7FJ6XTAn8OLirnkigDw0qnXOnIK/Yoz1tQUJB4aVcGyZTVicrWeiUsgLKa0Hvljbc9VqtVfKeE/VOeAaW8PPjgg8TFxYkVicrnpaWlWK1WkS/eaa8MeCvp652O3mezKfmnxEg/M1+U67zzpaqqSnTclBeHM/Olfv363HPPPUJbecFTwvadSVhYmMgXpcPepEkTkd5n5ov3M6jki3JmSs+ePUU94z0RDr6Dgt5pr9SB3t8pHeTKykrhl5Jn3vmi7IIbP348n376qfg9/LlT1ftf7zpe8U2pY5Uyfmabo8S0h+o6tLKyUthzZjoqE7/eKyK92xzvl3uDwSD+PrPNCQsL8ykn3s+A984M7zZHeSbPTEflJd37/CfvNllJeyVMpmKPUn96l2/lmVQGLPLy8ti9e7cYLFFCDTocDkJDQ0VZ9k7b4uJin5Vl3uV7zZo1orx4LwxQyndoaChhYWFissm7blRCOiqMGzeOPn36EBYW5lMHKPZ5p5nin3IGgdJmne/Abu80UygqKhIhRWrWrOkTzk5BSXcl373riTPbrFq1aonfDBgw4Kw2q7y8XLQVii1WqxWn03lWWahduzYOh4OKigoGDBhAWFgYdevWpbi4WNSjSpjZM+2oqKjgueeew2AwUKtWLZ/BceU6ZSWzd+hipe2/7rrrxOr+WbNm8dVXXzFr1izq1q3rUw7uv/9+fvvtN06fPi2el7vuuotHH30Ui8Uizl/avHkzhYWFPvWvki5wdv2rpKNSlr2fK5vNJup87/rXe2GNdzp6T0rp9Xoxkac8v8rvvCdTvOstZWedsno4MTFR/FbxRyl7SvlPS0tDp9NRp04dcRYAVC/umDRpkngBV8qZMriqlAOHw0FcXJyo05Q0a9eunVhEoNNVn3n25ptv/s80A3zaUMVP5Z7evpzZBu7du1fs2jWbzaKubdSoEffdd5+wRdFSJjS869qwsDAxoaj0TRWUEFNK/iicq38fFBREeno6PXv2FIssTCaTCDd6Zl0bEhIibFM0Vq5cyfbt2+nRo4fIL71eL+qNM983lHRQzhfx1jpXX+JC7xvKBJG3hsKZbaC3Pcrg+XPPPSf8huqwsN75oixgUNJTCSFWWFgoBnWU87mUa5Uznc5cOaxEL8jJyRFlTLFPeRaU9lixU6njlUkR5V1PGcxW8vf+++8XOzSUtPZ4PKLfAPj0L5TQkWfuEFLu6+2zgnJvp9MpBt/37NnDxx9/LBZEKAtCJk+eTFlZGadPnxaLW2bNmiXKoJI+3osgR40aJcIAe0+0KPfV6/WiDYTqwXLFTu8y43A4mD17Nnv27GHDhg107NiR5557TpQtJZyud9lQ2kJl0E9p05TFVLVq1RLhoJUBTOW5Vn6rnB+i+KT44H3WpVIvt2vXjl69emEwGMSAXUlJiWj3lfcI5f+VtlvZ8aloW61WMRANf76TlpeX+9QxBoOBX375BZvNRmFhIcHBwaJP5J2vSnlSninvvPJuDyIjI0lJSeHHH3/kjTfeEJ8r6amc4eNdnpRzj7wXaSnPk2KjsvjIbDb7lGUlfZX8OXr0qNgBpNiolBVvlIVGZ75nKnWYckaXd1p79++LiopEWQgLCxM+mUwmkUfeCy8Uf5UxCG+8n8usrCwRJl6JyqCUI5vNRmRkJOHh4ezcuVPUn8riKPiznVPyOy8vj7y8PFF+rrnmGuG/0l88s15V0iEsLAybzSbKrbKLTJkcsNvtHDx4kKqqKpo2bcrs2bN93u+9xy+U/terr74qwi96o/SJbTYbbdq0EWfTKpMcyiJr77re46mOAqDc81zjWUr6e4eNDAwMZMaMGRgMBtLT0/nPf/4j7GjZsqX4jVI+jUYjxcXFIk1/++03tm/fjsfjEee5mc1mevbsyfjx432eUeXZUNqYa6+9lieeeIKffvqJ8vJyn/LpXUatViuvvfYaOp2O2267TXyuPJNK+Vy6dCmxsbG0a9dOvDcWFxdTWVkpztK66aabSElJEeMcyplcyv2UOuLDDz8UfdGuXbvidrtZsWKFsMfj8XDDDTcwceJEYmNjxU40JX8Bn/ZB+W1ERARGo5F77rmH06dP+/T3FH+U56eoqIi77rpL3FOpEwYPHuwTqWTp0qWi3Crtm9vtFhN1Sh/FOz0VSktLRTQM+LN/4D3eAb79OGVXZ2hoKPXq1ROLDJS0UxaPLF++HKhetKig7KT0vpcyFub9/nzy5Elhw0MPPUR0dDROp5O3335b9EU2bdqE0+k8a1xXqUO8x9gUH4CzxjbCwsJEPa9M4JWVlREaGsqSJUvYvn07gwcPBhDPWFhYmBinuPfeezl27Bi33347BoNBtPvFxcXk5+eLRRuvv/66CPN97733+pwj3q5dO55//nk++ugjgoOD2bp1K+PHjycgIECE7vw3ICesJBKNsHbtWtauXcuYMWMuW6uqqorRo0cD1atvvVEq4sDAQK677jpGjBgBcFb8emVFWMOGDcULRePGjdm3b59PJ1qhbdu2YtWxcgD0peDxeFi1ahVNmjTBaDSKVaJnHvh6PoYOHSrsq6ys9Hnx8x5I+ysoB56fifcZB0ajkWnTpomY7GfuHjsXEydOpFevXgQHB/usKvorFBYWsnbtWp/O3flo2bIlsbGx6HQ6EYaooKDA5+XCX1BWK/0vXC6Xz6G9Z6IM3Clxvb0H9y6V2267jeDgYDGp5D3w8U9gNBrFajll4vJMUlNTRXgsZaWf946BC6GsuPcOH/q/UFZgw9l1DuCzI+FKoeSL8tJ5ww038O23317x+1ztKKt+r7nmmouuAy/E6dOncblchIeH07x5c9FObNy48bJ0u3Tp4jPQDBcXNqF3795A9QDPpEmTLngA+ZVCSdfY2FgxMHDmKvXLQdlNcTnk5OSI3U7//e9/OXr0KDk5OT51gnLI9pnlxOFw8Mgjj4iD4/9K++Nt+7p160R9MnDgQDp06MBDDz101u61uXPn0qRJE3F+ElTvyBk1apTY2VKjRg0RRvB8k4l/F263+7x18IVQJpEAcSbRhdi7dy8TJkw4axXpzTffTK9evcTAmJJmZ4Yjcblc1KtX7yzd//u//+O6664Tv12wYME5F5ZdSVwulwgn602PHj3o1q2bsOViWLp06UXb4XQ6RV/iXG3WX6GyspJXXnmFRx999JzhrtXAu8xfTn3udrt55JFHxMCj9/mSahAQEECdOnV45513xIryy0E5U0mv1/+lZ+hKoeyGNxqNxMTEcPfdd5/1fjJp0iR0Op3Y0e3xeHjggQfEDhEF74n/zp07s2DBAoxGo1h0d6nccccdxMbGijNinn322Qtef76yGxsbi8FgIDAwkLCwsP+vvfOOr7q+/v/r7tzkJiEJGWSQkAAJK8jeCSsqIkNAoiB0WmmlCmKp8KXQAoKigKyCVFtBFBSihBE2DkgAQYYIBLJDhtn7Zt3c/P7I4xw+nyTgaJWf7Xk+Hn1Ybu79jPc47/N+n8VRXMpIVuCOQ54yXRU5sLQ2Rp977jmEh4ezkxZBNWnuRmNjI1asWMERL/di9OjRbDzV6XRYvXo1amtrOVrn38FoNCIsLAyrV69mh4UfG4q+Icgx5tv4d+bGqlWr2CBwNygKhigpKUFKSkqLsa6kffv2fNhN6c6U2QQAYNq0afjoo49U+z2lY4ISLy8vBAQE8D7jwoUL3/Jmd4cMnTRuKFq2uLgYe/bswbRp01g/bU2Opaen4/Tp0yqH3ta4desW3nnnHQBQRc3Sof93JTk5mduF0sASCxYsQHV1NUJDQ7Fp0yb+nKJN7kVMTAwmTZrUou8XLVqE8ePHszO4EpIV3t7eePLJJ/HnP/8ZlZWVKoctJZ988gnrnMrahUrsdjs+++wzjB07tsW+gJwxNRoNgoODsXr1aj7naO06QFPU9eOPPw6dTocLFy7A0dGR0yvSGcSCBQswZswYODg4wMPDgw0hlBZUCRkdqU5eY2MjduzY0eq7kGycPHkyp1RX0rNnT45ABO5kjvkp0Ov16NOnD6f3bt++PTu0kywH7uwnAagM4Q0NDaryBErIEcZgMKgMmBERETh48CB0Oh2uX7/OBquKigo2iv0YlJaWYsmSJZg0aRL69++v+ltwcDA/R2RkJDujNz93ojSa3t7eGD58OKKiorBx40bk5uaqvvvcc89hypQp6Nu3L7Zv34533nkHbdq0QWJiImbNmvWdzr1+DojBSvivQelFoIQs2vf6HXmlKH9PBfwqKyvh6uoKFxeXVg8vyDOqtraW70PpegCwdw0A9qx1dnZW5R1unjqCIhXonZSbFbLoA+B0cnRP8hhR3pOut3TpUsycORNms1mlsGi1WlbC6JmU+WvpemVlZazMbtiwAV999RX+8Y9/wMvLS7W5UKbBo2soP9fpdCpPTPLIbWxsRPv27eHo6MieGUVFRaywVFRU8KaHCs4D4LQADQ0NcHV1bbWvld4XjY2NCAsLw8CBA2G32/ngipRZyndP0D3JQ9PPz4/z3Z4+fZpDloE70WVKbxHqs9b62WazYdy4cS362cnJCRUVFZx2CbijdNtsNlWkkbJ9aSw3j5pSQu1NSh4ZR+gZMzIyMHPmTFZo6CCXolCU48pms/FmkOofUU0I5dhRto1er1c9o3LsKPuaPBuV7aL0eqF0McrvV1RUoLGxkfuMDkfMZjPKysrYuEaKJaWaoOdxcXHBgQMHUF5ejg4dOnDhT6BpA0mFgIE74fB0L6Wiq2zj8vJybmNKMabX61FZWamSERTFokw5Sam/qB+VbU9RXdS+Sm8gm83GY0+ZjoY8gpX9Qt9r3i+0GVSmISotLb1rv1RWVnIbtKYglZWVcVuR0knjhK5DhmBqI4L6BWjaDCjT7ZBHEkV5KsPklTKLZCpB7QA0bQTovajPyEhFHsNarRZjxozhZyT5Ru+s/K9Sxis9tuidAbRYc5RelFRvhZ6neTuSYVB5CKRcc5QGYzp0o7WMrkHvRv3ffM2hPqR2oHFEfU/QRkGn06Fnz548z5VzErizpjg6OrZYs5ReqNSGdXV12LJlCzQaDUaNGsV1c6i/gKY5Setz87ZVRqkA6vGt1Wr5N3StkpISjmisrKxEWVkZbDYbf5fazNXVlZ/DbrcjOjoar7zyCsrKyloYmunfzeeLzWbjAuyurq4tvGQJZZuR3KHaItSftA42ny8VFRU8DpRygiJ+qK1IjpnNZnTu3LnFmmU2m1UHAiSTDAYDFzomPDw8eE5NnDgRr732GqxWKyorKzki53e/+x0ee+wxfkblgcTAgQM5okV5XfoepbClttBoNCgrK+MUos8++yyAptR7a9euRZs2bfD3v/9dNQ6ioqIwcOBA2Gw2nj9nzpxBfHw8y52ysjKWexkZGap1lP7bXP5SO9JYVspfijgh2ULyV/nuyjlF70TMmDFD9R36HY0P5bVpjCxcuJDnvbKeBq0nNPauXbsGoKn2DfULpZGi/qZ7ajQa/rcyxSn9LTQ0lPv1bm2m0+nYw/xebQZAtYbSe9I9le9C1yddwt/fnyOhKY0oyVq6FtUXIrndXNZSBBy1H6VvAppqndIzKJ2rmuv3CxcuRHl5OQYMGAAvLy+Vfk9ZBprLWuUBUmVlJbZt2watVouxY8eioKCAn9dms3Harub7DWX6T6WspfdqbX+glP0HDx7kvzXfbyhRymsAKtlPvxk4cCA/j5OTEx88U7+QJzu1J/WLm5sbH+ZS/R36LkVbNo/+d3Bw4DSxyqgucgRSphSm5ycZr4xAowhSZZQQ3ZsM6tRfpDcAUOkXFL1Iae6U0Lht/jndm3Q1m82Gv//971zHhpwBgKbsBm5ubpyS2s/PD9evX0dsbCzfW6mv0n1JB1XKG7ovGehJVyktLeXnVI4Zo9EIJycneHl5wdfXFy+//DL27t2LoqIijgCn9qT9AxmzSQ6RHKG2pjlD65hyXptMJo6QBoB//vOfAMDp0CgFNUUNKfevFPFIz00Hzcrxr1y7KUqLHP3y8vL4s/Lyco4IoD0aPVtdXR0GDhzIke9Wq5UdIKh9lZFCND+UfaVcD2gsR0ZGqvQKmmt2u53HslKm6/V6VcSA2WzmiGZ6TqvVyu90t7FsNpt5bVBG4jbfT5Ju0tygSrKzoaGhhVxV6veHDx9mWVhYWKjaC5Dc0Gq1eOaZZ/ga48aNQ2FhITp06KCqQamMniQ9R4ky4qeyshLPPPMMoqKicODAAZ5zZOClNie9haKDadyS0Yn6qTW5SvOPopmaGzmo3piTkxMaGxtx48YN9OjRAy4ud9LbVlVV8ffoN8ePH8fMmTM5LaASWkNcXFxw7NgxdtCgfiM92Gw2q/TvmpoaVfYSpW7zwgsvqPRCkikFBQU4fPgw+vTpA7PZrEoR+8knn/BYpfFps9ng6urK9bk6dOiApUuX8hyi8amM0Gke0UKRgjSmKNsCyZnmZ0Bbt27FH/7wBzz22GMtztEo2r2oqAh1dXUYN24cAPCenb6jjNIG7tQSJr3T1dUVt27d4nZ5+OGH0aVLFzQ0NOA3v/kNP+tDDz3Ejt4vvvgigDvRayQPyLFbmdWCHJYoNWnXrl2RnJys0vcoIovG2EMPPcTvqowioncinY0i36iPaK2k9VP53nQtwmKxoK6ujucEfU953kHvqJwjJKfKysp4XSCDKK0F9LlyfivXb2pTiio7fPgwR4i2adOG37eiooJTzycnJ7NM0el0uHbtWotzXWVGFOU4ojWxuWGU+kmj0XCknMFgwKefforw8HAsXbqU24zmGJ3T0fN5eXmhT58+KCgoYJ2Ovufm5qbSMYODg+Hj44OcnJxWzzrDwsIwaNAgPPLII3BwcMC5c+d+VMPcT4kYrIT/GpQ5d4mKigoUFBS0qD3V/HdAkxGkec5SHx8fFBYWct2L5tdXequkpaWhffv2MBgMSE1NhcVigYODAxwcHPh3hYWFKCgoQKdOnVBYWIjS0lKkpaXxMygVGvqcctMCTQtBfX09rFYrGhsbkZiYyAfVyloYrT1rRkYGtmzZgn79+qkMVps2bcK7774Lm83GeWvpOeh6fn5+KCgoYIH8xRdfYNOmTQgLC0NFRYXquUmhp82O0tOGPCyVuZGVtcHatGmDxsZG1NTUQKfTITExkb1iqHYIAFXqK/K2ojpQVL+heR8rDSN2ux2+vr6w2+2or69Xed4oU83RMwF3Fsi0tDT23IqIiMDUqVM5vJoWaGpfZSqV1vrZw8MDXbt2bdHPQUFBuHXrFqxWK2pqalSpBoxGo6rNUlNTodFoUFRUxJ+lpaWpxr5yY0y/TUxM5BRLBoMB/v7+SEtLQ1VVFbZs2YLo6GhVG65btw79+vXj1I4GgwGZmZlczyQpKYn7xd/fXzUWAXANIlLq6BmpLaqrq7kfSktL4e/vD51Ox+1CHk6pqam88SgqKuL5FhwcjLy8PNhsNj4Uof9Seg4fHx/4+voiKyuLD1ypj9LS0mAwGJCcnAxHR0dYLBb4+vrymLdYLNDpdKyUkYcdzT9SNPR6PT9PWloaUlJSePNBBikKe3d0dOQc/wUFBTAYDNDpdLypd3NzQ0pKikoWKBU45Zgij1nqF3qvnJwcVFRUcB5/SlVAv6Pv3atfyHhZXl5+z35paGiAVqtFUVGR6jlpLFJ/0L327duHoqIiNkYtWrSI0/8o5yD1C/2N2ga4My8pDzgV/qXxoZR1Sq/i1NRUvlZeXh6Cg4NV70X1tPLz81uk9dHr9S0iN5SKtFLGk7dtUFAQALXCq1xzqI6gTqdDYWEhfHx8+HmU7Zifn8/KLrURfY/WnKysLG7/wsJCTi/YfM1JS0tTHYYoxxnNZS8vL9Wao3ymy5cvcz0aZf0vjUbDcyA1NRUVFRU8puvq6njOlJaW4saNG/D19VWtk/ReX3/9NSwWC9LT05GWlga9Xg+j0cgOFhaLhQuoK/uA5B5dh1Jt0jumpqbymFamsSgsLIS/vz+SkpJQUFDAKTw7dOjAbRMcHKwa323btuU+p+eia1N6TeW4orlPhwLKmjFKlG3Wrl07ljvUZhRNRO+snC/BwcFISUlhY25mZibPF5JP1GaUWpfWruZrFq0L5DxC6zeNVeX4JNlFaxZ5T1ZWVrIHJUUU09hQzquqqirYbDZYrVakpqayTE1JSeF0SqmpqfxeFRUVLHd8fHxUBgGdTofQ0FBkZmaqZFFQUBCCg4NZ/tL4J5mp0+lQUFDAkeK5ubkqOas8CFd+TuuBl5cX13MCmuSvUkaT/PXz81PVdKR2JFmpPPymcUU6FbXZV199peoTkmknTpxAXFwcv8+tW7dQXFwMnU6HlJQUpKamcupWyvFPhyLUNtSW9B6U9qT5ONHpdHzY17FjRx5792oz2mjfq83oWZRtRtD3rl+/znNduWY9/PDDfG+SYyRrac1ydnbmAvX0HsqxnJyczPUmAbUTwHPPPcefL1q0iPtQqd8XFBTg4MGDcHR05IN1pZ7crl07lnn0jG3btuW2b2xsRFJSElJTU5GRkYFBgwbhX//6Fz9HWloaH4QoZb/y0KqwsFAla5Wyv/n+QPn5oUOHADRFvyhlP+0PiOZrIMn+69ev8ztVVVXx8wQFBXG70d+//vprFBQU8KGPs7MzfH19uUi7UueisUBpz0iXov6j9QBQ1y2kyJH6+nqUlJSooheqq6tRX1/PMpv2EKmpqSpjDz0npasj/TAkJITlC70nrdPUP83TMtO4VTon0b2Dg4O5mD3QZNSlesoODg5sFOzWrRvrLNQWbm5uSE5ORn5+PioqKtC2bVtuOxrfFRUVLVKX030rKipQWFjI+5mvv/6a113aM9DYVa7v5MlfUlLChh5y9KK6w5Tui4rS0/pFdR1pzlD/VVVVITQ0lA0pQJPuEBAQwMZoaovw8HDY7XZe00jPoxp4+fn5nLaNjIh0aE7yVpleNSMjg3XS5557DrW1tSgtLUW/fv04AiIoKIjPCEgef/3115zW02q1wmw2s26j7CsaKzROqP/o+ZuPZXpmGss6nQ41NTUoKSmBXq/H9evXkZ+fz0Y7Gsuenp5wcnJCfX19C2Mz1W+821imd1KO5erqapUDm1arhYeHB8tV5VimVLHV1dUqOUTjidqbHDBramqwbds2bvfdu3dzunWr1aqKUli8eDEcHBzg5OQEHx8fbhul4092djbLp+ZOlv7+/khNTYWDgwNWr16N4cOHc00ziuKn2lmJiYm85y4oKODzAIrgdXZ25nFL84Geh76r3HMDTbLPYDDwGYQyeoy+r4zwa9u2Le9127Rpg7KyMmzZsgUFBQXYu3ev6rf79u1DYWEhfH19uR6ycv9P/erm5qba62RmZiIgIEC1vtC+lWqlOjs7w8HBgfUv0oN79uypWqf1ej3XZwaaMmpQfVBvb29OM//aa6/BYDCo5hDdm/Rfs9kMi8XC46W5vkTjmmr7kfwmGd+jRw92MFauec1lbWBgIEd30tig7zQ/xyP5WVNTw/ekd6fnI/1DaUR94YUXMG7cOHh6emLZsmX8veaO9o2NjUhLS2PnjU6dOsFgMKhkQm1tLf//goICuLu7s2GCdHxCud+hdyJH+7q6Ojg5OeHWrVsoKChATU0N7HY7Rza5uLi0uBa1Lcl40sdoD066CMnt0NBQ3lN16NABNTU1MJvNSEtLQ7du3QA0yXaSH76+vqwn0BpMDhW0pyJd5OLFi6isrES7du2g1Wr5mZT7UEKpi9B1Sd+lZ1c6virPjWmsKKG1o7GxEb6+vrz2kfFt48aNfD6l1+u5ZnVwcDBycnL4b0RtbS3LjODgYHTs2BEmk0klP4mSkpJ7nmsDYIefu0Wl/dwQg5XwX0NERAQSEhJUyglZ3YcMGXLX3/Xu3ZuVNvp9fX09jh49ioCAAP59REQEEhMTVeH5Z86cQVVVFcxmMw4dOgSj0YgBAwZwXl1SxuLi4uDl5YW0tDRoNBoO+d68eTNKS0sRGRmJsrIyJCQk8GJ76NAhfqePP/4YWq0WDQ0N/P81Gg1SUlK47sC+ffvw+eefw2azITIyEnFxcQgKCmIv0vHjx2P79u2qujMA8MQTT+CNN96ARqPhRSA9PR1ZWVl8PaqDQgp1QEAAR/wcPnwYGo0G7du3h9FoxLVr1zid2KFDh9hbk55j4MCB3I70fnSonJmZyeG8DzzwAC5fvoyRI0ciKCgIhw8fZqUmKSkJWq0Wffv2Zc836qfW0glGREQgPT2dn+HmzZtswLpx4wYvUu7u7pxTm/qZDuGV/Xzy5EkATYtGfHw8L7q5ubnw8vJCSkoK0tPT+Rpvv/22qp/j4+MB3PFEoXaIjY1FSEgIoqKikJCQwM979OhRxMXFAWg6eDh9+jR7TsXFxSEwMBBnz55Fx44dYbFYcOjQIR77ykKT7u7uOHz4MCIiIliZr66uxqBBg/Dll19yGPP27duxfft2dO3aVTVOtm/fjiNHjkCv16NLly5ISEjgEPOkpCS89dZbAJrCnJVjEQCGDx8OjUbDh9D0jDR2zpw5oyoQabFYUF1djfj4eB7zwcHB7AXU2NiIhIQE9OnTB0eOHEH37t3Z44z6jNIWKY3KERERiIuLQ9++fVFcXMxecytWrGDP6crKSmRnZyMiIoL7WqvVorq6GhcvXkRAQAD279+Pdu3a8YExKUBBQUE4cuQIIiIiUFZWhlOnTsFisUCr1eLkyZOwWq2cL508kT/++GP2zKTxq9FoYDabcfr0aR47cXFxrKSQjAgICEBQUBAuX74MoCmEPiEhgfs3OTkZO3bs4Of/7LPPuF/S0tL4We7VL8qw/Hv1C9B0EHv27FneAJKMIPml0Wgwfvx4+Pj4cAo5mgfBwcEIDw9HcXExz8GtW7dyv9jtduTk5KC8vJz7pbCwkK/h5eWFtm3bstKn1Wp5bptMJj6Io36hzdnJkycRERGB4uJi3LhxA3FxcYiIiFA9d1FREXbv3g0XFxf0798fe/bsAQCOEDt27BjMZjOOHz8OrVbLMp4OONq1a4eysjLuF5PJxGtOVlYWTp06xQcOWq0WDz74IIqLi1VrDj1PbW0tzGYzLl26hKCgIJ7XtObs27cPHh4evOZQjQSNRsNrTqdOnXDt2jUUFxe3uuYUFBRAo9Fg5MiRfF+NRsN9f/nyZTz99NO8QSP5S/Oc2pDWLxrTNGbatWuHxsZG/h7QtNmmaFdHR0fePNEcyM7ORlhYGKdJ0Wq1SExM5KLaiYmJaNeuHcrLyxEZGcntFhQUhBs3bsBisWDv3r04ffo0H8KRYYDWHYvFwgWvacMTFBTEczAiIgJnz55Fnz59WDabTCZotVpkZ2dDr9cjKioKQUFByMvLU7XZvn370LdvX5w+fZpllLLNlJAhkKINLl26BF9fX24zMrQVFxfzPeh9IyIicPr0aX63kydP8nzJyMjA0aNHeV3dv38/X6e1NWvixIm8ZgUGBqKhoQHx8fGwWq0oLi5GbGwsHz7m5+er1qxt27YBAM6fP8+HxjQHIyIiEB8fj6+++gp6vR5arRbbtm2DRtNULyguLo6jB3bs2MF1L+Lj4xEXFwc3Nzd8/PHHHHFYWlqKI0eOwGw249y5c2hoaEBiYiL8/PyQlJSkOugYOnSoSv4CTYdLVNeKHEA0Gg1u3LjBfU9tSWNPKX/37dsHvV6PkSNHArgTgZScnIwePXogNzcXZWVlLH91Op3KgYPW99jYWP7Mz88PFosFly5d4nXx0KFDuHr1KgwGA2JjY1mnIrnl7u6OzMxMPPXUUxyhHR8fD61Wi969e2PPnj28hr733ntwd3fnNgPupMuk31y8eFElt0hvCgkJQVlZmWoukp6sXLOat9n+/ftx4cIFmM3me7YZ6YfUZkCTIevq1avcR7t37+b70nwBmtKVBQQEcJspZS2ttXR4RhQVFbGsTUtLQ2JiInvVOzg48JrVpk0bdOrUiZ0dAgMD4ebmptLvycA4ZMgQjpJort9bLBZUVVUhPj6eZS1FddP/kpKS8Mgjj2D79u3YvHmzyvDh7OyMxx9/HFqtFgcPHmTZT+/Xtm1bfh46xFHuN/bt2wedTtdC9sfHx+PmzZvQarUtZH9SUhL3UVlZGetXtAaSHKN+0ev1PKcDAwMRFRWF7Oxsfg+TyYQ333wTWq0WycnJ3C8RERGIiIjgdTczMxOOjo7YuXMn94vVakV2dja6d+/ORtuqqiokJibi888/R3l5OSwWC9q1a8c1FtPT06HVavnwjto5JSUFer0eISEhcHNzQ25uLurq6lhmGY1GvPnmm/zdqqoqNDY28nPSIVh+fj7S09O5zajeVEFBAQICAuDh4QGLxYKYmBhotVqkpaXxZyEhIfD39+f5RzrNvn37WAbRIa6LiwuuX7/O9y4rK4PVakVJSQkb5KxWK4YPH45Lly4hICAA7u7uOHLkCGJjYwE0Ob55eHjwfSMiInhe5efnw2g04ssvv8S1a9dQVFSEIUOG4ODBg9BoNHjwwQeRkJCAfv36ITExUbVWka5oMpkQFBTE62NSUhLCwsKQlJSEfv36qaIKgSYDHEWnA00H6+Hh4dBoNCoDx5gxY5CcnIygoCCcPXsWnp6ebKggQ9auXbtQVlaG06dPo3379qitrUX//v25nlNISAjy8vJUfUXRRA0NDdi8eTPeeOMNAHccWnx8fLB582bk5ubC3d2d92iku5hMJqxYsQIAVJEq9HfqKzo8vHnzJt+bxjKtye7u7jyWP/jgA446oLEM3DFiBQUFISYmhtuRolJCQkIwcuRI1NTUsA5IuiWtPfcay3V1dQgMDFSN5ZqaGr6/s7MzLBYLGhoakJCQwDonpahU6pwajQZHjx5FWloarl27hsLCQtTU1MBoNGL8+PF4/PHH2RlJmYmDxsc333zDOp7ZbMbRo0dRWlqK9PR0jihzc3PDzZs3WbZUVlZi+/btCA4O5vWDZKfFYmFZX1ZWhi+++IJrUt28eRN+fn64evUqevXqhcuXLyMsLIz34+SwQc4WZrOZx21kZCQOHjzI89dgMHCEemBgIL7++msEBQXh1KlTXAeY+pLalMYtObgBTVFEpHM2NDTAwcEB06dPh1arRZ8+fXi8OTk5sV5KjpYJCQno27cvbty4oTIo+vv7Iz4+HkeOHEGvXr1w/fp1dhACgB07duCvf/0rG1Lr6ur4zOfo0aNwcnLiNPuBgYEoKyvDxo0beQ4BdyKod+7cyeNg165dqKurg5ubG8sKSrFNsvbQoUO8Z6CaSpRBhjLqUP/961//AtAkL2pqahAZGYnMzExef5TvHBcX16qsBe6kTKS9YWVlJc+JqqoqVFZW8vkgpXCj8TRkyBB2cNJoNDh9+jSfKyrThUdFReHixYsYPXo07+MjIiKQmZnJ8i0wMBBnzpxBaWkpp8FPSUnBgAEDWCZcvXoV3bp1Q1xcHO93jh49ys6kdAZB70P7HaWsJ93AarXy+AbAZ4qkw7i4uKjOVePi4tiYSsbES5cuITAwkHXEmpoa2Gw21hW7deuGW7du8XVtNhuMRiNKS0t57BYWFmLAgAHQaptSuGZlZbGM1+v17FDVtm1b1kXi4uLwxRdfICAgAEVFRbDZbBg7dixKS0vRtWtX3oeWlJTg2rVrHMXq5OQEu92OkJAQHgPr168H0LRXpDYj2U39dOPGDZWDJ52TJCcnIyIiAu7u7hzlW1dXx7pxXFwcunTpotpDlpeXo3Pnzjhy5Ajy8vJw/vx5dp6hfhoxYgQKCwt5P0X9k5ubi7y8PNXzKbFarfj0008RGBiI+vp61oN/7mgaf6rklYLwI1NWVoaxY8eiQ4cOeOaZZ5CXl4dXXnkF48aNw+LFi/l7v/jFL5CTk6MKk9y6dSvWr1/PqRUsFguuXr0Ko9GICRMmYPHixaivr8ekSZNw+/ZtWCwWLFy4EKtWrUJoaCj69OmDDRs2cJjvihUroNPp4OTkBIvFgpycHERHR2P37t1wdnbGq6++im3btuHs2bMs9IOCgpCRkYGXX36ZDwInT56Mffv2sTesyWTiRbm8vJzz4w4bNgxHjhxhhYpCZ7ds2YIXX3wRpaWl0Gg0GDBgAC5cuMCp4GprazF//nxERkZi2bJlOHfuHEcYURqBTp06IS8vD927d0dCQgIMBgMr0FQUub6+Hm+88QaSkpKwadMmODk5sReOEnd3d8TFxWHEiBGora3FsGHDuPghhdZrNBoMHz4clZWVOH/+PFxcXNCjRw828tD9gaa0Q3FxcZyOYcCAATh48CAyMzNRV1cHk8mEp59+Grm5uYiJiYGHhweKi4vZs1mv1/O1HB0d4ePjw5FcVquVDyyApoV79OjR7NEUFBSElJQU9pqkBTsqKgo3btxARUUFKioq0K1bN3z99dfo3Lkz5s+fjzfffJP/rtVqER0djYiICMybNw9WqxWzZs3C3r178c0336BNmzbw8/PDjRs3YLfb4e3tjby8PDg5OWH16tVYvHgx8vPz8dZbb2HBggXo0KEDSkpKkJycDL1ez0YcMkQ6ODigpqYGoaGhfCgBAH/84x/x4Ycforq6mlMJAE1K+VNPPYXGxkb06tUL7du3R2xsLM+T/Px8+Pn5cX5goOlg3tHREWlpaRg8eDBsNhtycnKwZ88ePPjggygtLUWHDh1449bQ0ICePXsiOTkZsbGxuHLlCubNmwej0Qiz2cx5600mE0wmE6xWK6Kjo5GSkoIvvviCw6MJSinZs2dPXLlyhaMjKAWXxWJBZWUlunTpgqKiIjg7O7OXo6+vL3JycrgA7JQpU7B//34MHDgQJ06cUBW7r6mp4QPHtm3borS0FMOHD0dCQgKsVisX1KVx1LdvX1y4cAEeHh7seUiHIvTcyiLIAwYMwLlz51jRNxqNnMph8ODBOH78ODQaDSIjI5GRkcGeOhMnTsTBgwdRX1+Pnj17ciqjHj16cBQcbezpML6goID7hRTZxsZGBAcH4/bt2zAYDAgICMDNmzd5U0GebZTegw78n3/+ebz99tvQarUoLS3lw+jAwEAkJydj7NixOHr0KMaMGYPY2FguKgw0Kf+hoaHcL1arFbm5ufweJNucnZ35ea9fv47g4GA+uC8pKeG2Cg4ORmZmJoYPH45Tp06xN1mbNm14M2Wz2aDX6/Hwww/j008/ZW9yFxcXWK1WaLVadOzYERkZGaisrMSDDz4IrVbLhzTk+VZSUoLRo0fj+PHj6NSpE8aPH48PPviAI6ycnZ3h7e2NkpIS3thS6kZKCUHja+jQoXjkkUfw+uuvw2q1wm63w2g0std4//79ec2hdqT2o2L3AFRrjp+fH5KTk1FfX8/XMhqNcHFxUa05AwYMwPnz5/lejz76KI4fP47x48fDbrdjz549KuMKpfkaPXo0fv3rX6OxsRGzZs1CaWkpTCYTdDodH9zRnOzTpw/69evH0R1eXl7sZevm5gZ3d3eUlZWhpKRElUpWp9PB19cX6enpfOBLMs3DwwOFhYVwcHBAaGgocnNzkZ+fj9GjRyMhIQE1NTVwcXFBaWkpy4KAgABkZWXxmufk5MRRVcCdgycHBwf+/MUXX8TGjRthsVi4b2l8U8TawYMHceXKFbz44ovo0qWLKt84rUFarRZPPPEEIiMjUVRUhCVLlqCurg4GgwHDhg3jQ41HH30UBw4c4MMcZZQGreHUZjk5ObBYLBgzZgzeffddGAwGTv2j0+nQuXNnXLt2DXq9Hq+88gqOHDmCY8eOwdXVlVOmtbZmzZ07F5WVlXj66aexdetWHve0PhqNRrRp0wb5+flwcnJCSEgIrl69yuOZ5sioUaMQExMDi8WC559/HqtWrYLNZsODDz6II0eOcEqe27dvsyPAqFGjcOLECbRv3x42m41rYg0dOhSnT5+GRtNU7/H8+fNch+vAgQPcx0OHDkV8fDw6derEm+Y+ffpwehAay+7u7hwdRu02ePBgnD17FhqNBh4eHpgzZw5eeeUVDBgwgOXvk08+icTERHz55ZfQ6XR49dVXceHCBezZswcRERH49NNP0bZtW/YAttvt8PHx4dRIpCc1NDTAbDarHG7mzJmDvLw87Nq1i9cc6ktvb29kZWXB0dERq1atwksvvcSHj3TYFxgYiKKiIk7lOmLECMTGxvJ6Qp7QU6ZMwe7du2E2m7F48WLs2LED165dg6enJ4qKiuDu7s73Juegjh078mad0gu++uqrKCoqwl/+8heW0z4+PkhPT0e3bt1w8eJF1r1CQ0ORmJh41zbz8vLC1KlT8Y9//APV1dXo2LEjCgoK+LCrpKQE1dXV8PX1ZaNOXV0dHBwc4OXlhWPHjuHAgQOYN28eR4EBTYfIdKBIqbi6du2KK1eusCymiDGz2XxXWas0NiplLckK8oo3Go1wdHREu3btUFBQgJUrV3JNCqX+SfPEbrdjypQpsNvtiImJgV6vh9lsZgPU4MGD2bFi7ty5ePPNN1FVVcUH0rW1tWhsbFTJ/ilTpnA9GpPJxPsDd3d3rFq1SrXf+Oijjzhlz5w5c7B161aMGzcOdXV1iImJgZeXF0aPHo1jx45x9LGDgwP8/f0xatQobN++nfvF3d2dawb5+fmhpKQEVqsV7dq1Q25uLrch6QAU9d65c2cUFRXB19cX586dYx2DIk6pxlN1dTXef/99/PKXv2SnJJPJhLq6Ou6bNm3aoFu3bkhMTOQ1idYjcgqirA4dO3ZESkoK7HY715BSGrXNZjNsNhvsdjtMJhPWr1+PgoICrFixgtOWUspbGmN9+/bFrVu34OLioopIU6Y+12g06NixI/Lz8zFx4kQkJSXhiy++4LFIawy1w8yZM7F582YMGjQIly9fZkOMMr3zgAEDMG3aNK4h1rzeIcm9tWvXYteuXcjJyUH79u15jnbs2BFXr17lAz5/f38kJibC3d0dM2fOxL/+9S94e3sjJSWFnS8GDx6MlJQUODg4sAHm8OHDrHP7+/sjKyuLIwkp6o2MAEqDCNCkMzU2NqKqqorna5cuXfjwvaGhAc7OztBomuoO5uXlcVpDMihTPZTg4GCkp6ezHpaYmAiz2cyRWbdv30ZoaCiKi4v5DOG1115DYmIiR/OYTCbk5OSwwYsie2/fvq2KSKU+ozS7RUVFPM9qa2tZ79doNJwetrq6GmPGjEFBQQEbWEiPJZRjmfQAahcaI2azGVarlXVW0vccHBxUaYJpLJMT2a9+9SsAwLvvvst7XrPZzJHNGk1TJoUbN26gd+/euHjxIj+XVqtVRaCazWZMmjQJGRkZuHjxImpra9nARtHubm5umDp1KrZu3YrZs2fjrbfe4kgTZcrfyMhI3LhxA/n5+fD19eVICNp3k/PHX/7yFyxbtgxhYWFsfKQxRNclR9tDhw6htraWs3GQkY+iSWfOnInY2FjY7XZUVFTw+CVdk84gPD09OTMG7S1pL0P7l+7du+Orr75inUIp8+rr6zl1ITkhmEwmNtrW1tbCy8sLc+fOxSuvvAJfX18kJiZCr9dzzcj09HTWGx599FEcPHgQXbt2xfXr11XRKRQtTHsOMjhR+3l6esJqtWLs2LE4duwYOwJ27doV165dg0ajwezZs5GYmMg165YtWwaNRsPp6KqqqritSBbT3NJomurtUV9Q1h43NzesWLECzz77LK9dzWUU0OQouWTJErz44os8Ho1GIxvZjUYjBg0ahBUrVmDSpEkcFQkAffr0gaOjIz7//HNoNBqsXbsWY8aMAdDkiHvp0iWYTCZMmzYNCQkJfAYUFxeHp556ivVRkrG1tbVskHz88cexePFizJw5ExcvXuR9XFhYGMrLy/lsYtCgQXBxccHp06c5deSyZctQXFyMdevW8bihiFC9Xs/RgNXV1ZgwYQI2btzIc23UqFE4fvw42rRpwzrE66+/jpiYGHZW69OnDyorK3H79m1UVVVh8ODBePzxx2EymTB//nzuJ9KnSKbSmkXnbqRzOjk5obS0FE5OTujUqROuX7+OwYMH49SpU7wnJxlI54eenp7QaDTslENnTqSH1dfXIygoCLdv38aIESNw4sQJ/juNa4PBAG9vb9bXQ0ND8eWXX/JY8fHxQX5+Pnr06IGXXnoJS5YsYYc6q9XKRiQAvO+12+3o1asXxo8fj3feeYfX5sGDByM/Px+FhYV89nns2DHExcVhyZIlaGhoQFVVFcLDw3k9rKioQGxsLJYsWYIzZ85w5iIHBwf4+vriypUrcHd3R48ePTBu3Dh88sknuHXrFnJzczn9P0VXlZaW8visrKzE+PHjeV8yceJEnDx5kg19cXFxuHnzJt566y3k5eWha9eucHZ2xokTJ5CVlQUXFxd4e3tjz549qtqEP1fEYCX8V5GSkoJly5bh0qVLcHJywoQJEzB37lzVZJ0xYways7PZ6xNoEo5bt27F9u3bOWexi4sLJk+erPp9Xl4eJkyYgNLSUjg7OyMqKgoLFy6Ek5MTtm7divfffx/FxcXw9fVFQ0MDcnNzWblwcXFBVFQUCgsLkZCQAJ1OBx8fH6SlpcFms2Hw4MFYtGgRQkJCcPz4cSxZsoQt9CaTiQW4o6MjSkpK4OjoCE9PT06ro9VqebOs1+uxceNGjBgxAikpKZgzZw57jQNN3jWvv/46nnjiCcyfPx9Wq5W9Y5rj7u6OiRMnoqioiDfrzZk4cSJeffVVNDY24qGHHuJ0C0rCwsKwZs0ahISEIDo6GteuXeP6KgR5WdlsNnTp0gVRUVF48803W4RMGwwGVgxMJhOngyspKWm1gKSrqyt+9atf4ezZs2ywU0KHoX379kW/fv2wZ88eVowdHR0xdOhQlJSU4KuvvoJW21RfRVmnym63w9/fnxX57OxsTs9iNBo5tWR1dTV69+6NgoICmM1mzJo1C2+88QbS0tLg6ekJs9mM7Oxs2O12NgIojWtarZYVGEopYjabcebMGR7758+fZ88cer7Ro0fjoYcewpIlS1ShxZQb22QyISoqCunp6fjmm29Uc+P48eNcGwRoUvpfe+01bNu2DV9++SWHRJNySWlcRo0ahZUrV+L3v/89z7eUlBTMnTuX0wrShq1bt25YsGABp1nYuXMn1qxZ0yIHuclkwtSpUzF//nzU1tZiwYIFOHnypMpbk5QRFxcXBAUFIT09nVMX0EEUAJ5vFosF0dHRdy22O2zYMPz973/HqVOn8Kc//alVQ2ynTp0wb948DBkyBGvXrsW2bdt4I+jg4MDt4+fnh7KyMpSWlrKyrRz/QNN8o0M+KvhNBxvkVT137lx8/vnnKhnRHA8PDz7ArK+vZ8OCt7c3KioqUFVVBaPRiKNHj6KyslLVL4RWq0VAQACmTZuGadOmISYmptV+AZrC+F9//XX86U9/4rmjxNXVFfPnz8fkyZNZVpL3EG3KDQYD+vXrx/3yxBNPqIyRzZ+tTZs26N27NzIyMjjVCOXqB5oitubPn48hQ4ZgzZo12LZtW4v2bn5NBwcHnnf0mcFg4JQuN2/eVB0yNm/zXr16ISMjo0X6JGL69Ol46aWXsGbNGnzwwQfct62xcuVK7N69mw8HjEYjxo4di0WLFqnWHBoDyjpTAFqsOdQ21AZ9+/bF0qVLec1ZuHBhi9QDxOHDh+Hn54fly5djz549dy3k2r9/f/z1r3/F5MmTVamHyHMuKCgIWVlZKCkpgaenJx9iAnfWCIvFgmXLluHTTz9VGfKApjWif//+aGhowMWLF1XvQweetbW17N362WefYe3atdi9e3eLdYQ2uTNnzuS6jMrxodU25UR/6KGHkJSUhKSkJLi7u3PaW/Igpucj2UhybPfu3di6dStu376tegej0Yjo6GjMnz8fDz/8cKvzxWQy4f/+7/8wdepUTJs2DZcvX2517CrbjHQTAK2ugzqdDgEBAWhoaMA333wDT09P5OTkwN3dHeXl5Xdds8LDw+Hq6opz586hpqaG1yZqI6BprFGqqHsVQyfIG99gMCApKYkNrTS36DDRZDKxsSI3N5flJh0eODo6oqqqCh4eHqwblZeX8xqq0+lU6fWUODg4sNMNQYd+5EQzcOBAnDlzBhUVFfDw8GCdsrn8dXNzg8lkQlFRETp06IAXXngBQ4YMwfLly/Hxxx+zjKN1UqfTYfjw4Zg5cybWrFmDK1eutNpnzfsPuOPB7OnpyTUy6MD7busBjVODwQCTydRqPVb6XkBAAB9eK+WTwWBgw4Oyvk9YWBj8/f2RkJAAvV6vajM6ZK+vr2ddWJmW6NvaTLlm0eE2RbuTgwIAdpIKDAyEzWZjHWbnzp1YvXp1i7lP0Qnr1q2Dr68vFixYgBMnTqhkyaOPPvq9Ze1nn312V11i7ty5mDVr1veStR9++GELnWrZsmU4deoUDAYD6zY055SyVin7yUud2l6pjzffb9BhnLOzM493AC3Gcmtjh2qzKQ1I5OCi0+mwYMECHD9+vIWeSvLgb3/7G9asWYPs7GzExsa26BegaY527doVixYtQq9evXD79m08++yzLfQXBwcHjB07FgsXLuS9W2trt7OzM1566SXExsZyCru7odFoEBISguXLl+P06dN33TcZDAY88sgjWLx4MfLy8rBs2TLeeyjnuYeHB1544QXExsYiOTkZHTt2xM2bN9lRS3lfap8HHngAmzdvxrp161q999ChQ7Fu3ToUFRXhpZdewtWrV1u8t6enJ+bMmYMpU6bwnjg2NhbLly9nJ0C6LxnBH374Ybi5uXFUOBk8APBe5MEHH8SCBQvw/vvv46233mJDJBlBGxoa+N908N/82eierq6uHIlDhiiC9HmNRoNf/vKXePbZZ7Fp0ybExMS0aDtHR0f069cPs2fPhpOTE++TaA5rNBr07t2bo/npDMHR0RHOzs68n3VwcMCsWbOQkJCAW7dusRG5NV2oc+fOWLp0KY/lxx577K5jpXv37li0aBGSk5Oxc+dOJCcnq9YsirZTjuU///nPXD9Uia+vL5599lnExsbi8uXLcHFxgYeHR4u5Qdf18/Pj9L93W7dpLDc0NHDKvrvh5OSE6OhozJ07F7W1tVi+fDk70AFNczc8PBwrVqxAcHAwy1VKeUfrDUUAlpSUwGw2o6GhodV9h06nw6ZNm/iwe+3atUhOTm6xloaHh2Pjxo1wc3NT7dGU7zho0CB06NABR44cQVFRERu3lftGAJx+k+oNUipk5Z577969yMjIwNq1a5GSkoLGxsYWz6TVatGhQwe0bdsWly9fvqueAjQ5dYWEhODChQuqurSOjo6Ijo7GP//5T8yfPx8uLi7YunVrq2nAHB0dERgYiIKCAjZ4klx1c3PDY489hrlz56K4uJijo1tjy5YtGDFiBGJiYrB8+XKVjkBnFY6OjjwPmxs0WyM2NhbPPPMMOxIq26h///5YsmQJgoODERMTg2XLlqkce5Sy9tq1a5g5c2ar99Dr9fjb3/7GJR0A4LHHHkNeXh6qqqrYKYfGZ0hICObMmaM65yBMJhOefPJJdq6dMWMGMjMz0bNnT5w8eVI13qlGV5cuXbBgwQL85S9/QXp6uqqNmuPk5IS3334bgYGBWLlyJT755BMulUHjxGQyYfjw4aro8uZQhPWoUaNw9epVLvvg7OwMnU6H4uJi7hsyKLm6umLixImYO3cuYmNjsXTpUtV68Ktf/Qpz5szBO++8g/feew95eXmt3tvDw4PnCNUoa419+/YhKysLb7zxBqdWVD6TyWRCZGQkXnzxRezcuRN79+6961kf0JSuMyAgAMePH2+xv27Tpg0mTJiAcePGYfXq1ap1QImXlxeCgoL47GrkyJGt7tO8vLywfv169OrVix2BW2Py5MlYuHAhkpOTsXr1aty6dYvrcdJ7+vr64g9/+INqfN6+fRvLli1DfHw819vs1asXli1bhpCQEGRkZGDp0qW4dOkSn01ptVr4+flh3Lhx+M1vfnPXGsk/N8RgJQiCIAiCIAiCIAiCIAiCIAiCINxXpIaVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIws+A0NBQbNiw4Xv95ty5cwgNDcW5c+d+pKcSBEEQBEEQBEH4zyAGK0EQBEEQBEEQBEH4AVy8eBEbNmxAeXn5/X4UQRAEQRAEQfjZo2lsbGy83w8hCIIgCIIgCIIg3Jva2lrodDro9frv/Bu73Y76+noYDAZoteKv+J/m7bffxqpVq3DixAn4+/vf78cRBEEQBEEQhJ81smMRBEEQBEEQBEH4/xS73Y7a2loAgMlk+l7GKgDQarUwmUw/a2NVY2Mjampq7vdjCIIgCIIgCILwI/Pz3bUIgiAIgiAIgiD8TNiwYQNCQ0ORkpKC559/Hr1798aAAQOwfPlyNkgBTXWqli5din379mHs2LHo0aMHTp06xX9rXsMqLy8PCxcuxNChQ9G9e3eMHDkSS5YsQV1dHYDWa1jNmDEDjz76KJKTkzFjxgz07NkTw4YNwz/+8Y8Wz52dnY1Zs2bhgQcewKBBg7BixQqcOnXqe9fF+uijjxAaGorz589j8eLFGDBgAHr37o358+ejrKxM9d2RI0fimWeewalTpzBp0iSEh4dj165dAIDbt2/jueeeQ//+/dGzZ09MnToVn376qer39M5xcXHYuHEjhg0bhl69euG5555DRUUF6urq8PLLL2PQoEHo1asXFixYwO3VWj889NBD6NGjByZNmoTz58/zdzZs2IBVq1YBAEaNGoXQ0FCEhoYiKyvrO7eLIAiCIAiCIAh3+H7ueYIgCIIgCIIgCMIPZs6cOfDz88O8efNw+fJlvPvuuygvL2fDBwCcPXsWhw4dwvTp0+Hm5gY/P79Wr5WXl4cpU6agoqICU6dORXBwMPLy8nDkyBHU1NTAaDTe9TnKysrw29/+FlFRURgzZgyOHDmC119/HZ07d0ZkZCQAwGq14he/+AUKCgowc+ZMtG3bFgcOHPhehqrmLF26FC4uLpg9ezbS0tKwc+dO5OTk4N1334VGo+HvpaWlYd68eYiOjsbUqVPRoUMHFBYW4oknnkB1dTVmzJgBNzc3fPzxx/j973+P9evXIyoqSnWvrVu3wsHBAb/73e+QkZGBHTt2QK/XQ6PRoLy8HLNnz8aVK1fw0Ucfwc/PD7Nnz1b9/vz584iLi8OMGTNgNBqxc+dO/Pa3v8Xu3bvRuXNnREVFIT09HQcOHMCCBQvg5uYGAHB3d//B7SMIgiAIgiAI/8uIwUoQBEEQBEEQBOEnwt/fH5s3bwYATJ8+HRaLBe+//z5+/etfIywsDECTsWb//v3o2LHjPa+1Zs0aFBYW4sMPP0SPHj348+effx7fVqo4Pz8fr776KiZOnAgAmDJlCkaOHImYmBg2WH3wwQe4ffs2Nm3ahNGjRwMAnnjiCf7ND8FgMOCdd96BwWAAAPj6+uK1117DyZMnMWrUKP5eRkYG3nrrLQwbNow/W7FiBQoLC/Hee++hb9++AIDHH38c48ePx8qVKzFq1ChV6sOGhga8++67fK+SkhIcPHhQFU02ffp0ZGZm4qOPPmphsLp16xZiYmLQvXt3AMDYsWPx8MMPY/369di4cSPCwsLQtWtXHDhwAKNHj5YaVoIgCIIgCILwbyIpAQVBEARBEARBEH4ipk+frvr3U089BQD4/PPP+bN+/fp9q7HKbrfj+PHjGDFihMpYRSijlVrD0dEREyZM4H8bjUb06NEDt2/f5s9OnToFb29vlSHJZDJh6tSp97z2vYiOjmYDEgA8+eST0Ov1+Oyzz1Tf8/f3VxmrAOCzzz5DeHg4G6sAwMnJCdHR0cjOzkZycrLq+xMmTFDdKzw8HI2NjZg8ebLqe+Hh4cjNzYXNZlN93qtXLzZWAU3GtVGjRuH06dNoaGj4nm8uCIIgCIIgCMK3IQYrQRAEQRAEQRCEn4jAwEDVv9u3bw+tVquqe/RdInWKi4tRWVmJTp06/aDn8PHxaWHUcnV1VdWTys7ORvv27Vt8r3379j/onkDL93dycoKnpyeys7NVn7fWBjk5OejQoUOLz4ODg/nvSnx9fVX/dnZ2BgC0a9euxed2ux0VFRX3fFYACAoKQnV1NYqLi1v8TRAEQRAEQRCEfw8xWAmCIAiCIAiCINwnWouEcnBw+NHvq9PpfvR7/Dv8J9pAmR7wu3z+bWkUBUEQBEEQBEH4cRGDlSAIgiAIgiAIwk9ERkZGi3/b7fbvXf/I3d0dFosFSUlJ/8nHU+Hn54fMzMwWhpzMzMwffM3m719VVYWCggL4+fl96299fX2RlpbW4vPU1FT++3+S5s8KAOnp6TCbzXB3dwfw7akXBUEQBEEQBEH47ojBShAEQRAEQRAE4SfivffeU/17x44dAICIiIjvdR2tVovRo0fjk08+wdWrV1v8/T8RLTR06FDk5eXhxIkT/FltbS0+/PDDH3zNDz74APX19fzvnTt3wmazfaf3j4yMxFdffYVLly7xZ1arFR9++CH8/Py+te7X9+XSpUu4du0a/zs3NxcnTpzAkCFDOELNbDYDQIt0goIgCIIgCIIgfH/09/sBBEEQBEEQBEEQ/lfIysrCrFmzMGzYMFy+fBn79u3Do48+irCwsO99rRdeeAHx8fGYMWMGpk6dipCQEBQUFODw4cN4//334eLi8m89a3R0NHbs2IF58+Zh5syZ8PT0xP79+2EymQD8sOii+vp6/PKXv8SYMWOQlpaG999/H3369MGoUaO+9be/+93vcPDgQTz99NOYMWMGXF1dsXfvXmRlZWHDhg13TfX3Q+ncuTN+85vfYMaMGTAajdi5cycA4I9//CN/p1u3bgCAtWvX4pFHHoHBYMCIESPg6Oj4H30WQRAEQRAEQfhfQAxWgiAIgiAIgiAIPxFvvPEG1q1bh9WrV0Ov1+Opp57C/Pnzf9C1vL298eGHH2LdunXYv38/Kisr4e3tjYiIiP9IDSgnJyds27YNy5cvx/bt2+Ho6IiJEyeiV69e+OMf/8iGq+/D4sWLsX//fqxfvx719fUYO3YsFi1a9J2MX23btsWuXbvw2muvYceOHaitrUVoaCi2bNmC4cOH/4A3vDf9+vXDAw88gE2bNiEnJwcdO3bEypUrVcbF8PBwPP/889i1axdOnToFu92OEydOiMFKEARBEARBEH4AmkapLCsIgiAIgiAIgvCjsmHDBmzcuBFnzpzh+kc/V9555x2sXLkSn3/+Oby9vb/Tbz766CMsWLAAe/bsQY8ePX7kJ/z3CQ0NxfTp07F48eL7/SiCIAiCIAiC8D+D1LASBEEQBEEQBEEQWqWmpkb179raWnzwwQcICgr6zsYqQRAEQRAEQRCE74KkBBQEQRAEQRAEQRBaZfbs2fD19UVYWBgqKyuxb98+pKam4vXXXwfQZNCqqKi45zVcXV1/ikcVBEEQBEEQBOFnjhisBEEQBEEQBEEQhFYZOnQo9uzZg/3796OhoQEdO3bE2rVr8cgjjwAA4uLisGDBgnteY/v27T/FowqCIAiCIAiC8DNHalgJgiAIgiAIgiAIP4j8/HwkJyff8zvdunWTKCtBEARBEARBEL4VMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xXt/X4AQRAEQRAEQRAEQRAEQRAEQRAE4X8bMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI95X/B/V7Nk7OqfW/AAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "df[[\"pricing_prompt\", \"pricing_completion\"]].plot.scatter(\n", - " x=\"pricing_prompt\", y=\"pricing_completion\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "unsupported operand type(s) for /: 'str' and 'str'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:218\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 218\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:242\u001b[0m, in \u001b[0;36mevaluate\u001b[0;34m(op, a, b, use_numexpr)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_numexpr:\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# error: \"None\" not callable\u001b[39;00m\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_str\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _evaluate_standard(op, op_str, a, b)\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:73\u001b[0m, in \u001b[0;36m_evaluate_standard\u001b[0;34m(op, op_str, a, b)\u001b[0m\n\u001b[1;32m 72\u001b[0m _store_test_result(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[46], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprice_ratio\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_prompt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/common.py:76\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer..new_method\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[1;32m 74\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/arraylike.py:210\u001b[0m, in \u001b[0;36mOpsMixin.__truediv__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__truediv__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__truediv__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtruediv\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/series.py:6135\u001b[0m, in \u001b[0;36mSeries._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 6133\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_arith_method\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, op):\n\u001b[1;32m 6134\u001b[0m \u001b[38;5;28mself\u001b[39m, other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_align_for_op(other)\n\u001b[0;32m-> 6135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexOpsMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/base.py:1382\u001b[0m, in \u001b[0;36mIndexOpsMixin._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 1379\u001b[0m rvalues \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(rvalues\u001b[38;5;241m.\u001b[39mstart, rvalues\u001b[38;5;241m.\u001b[39mstop, rvalues\u001b[38;5;241m.\u001b[39mstep)\n\u001b[1;32m 1381\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(\u001b[38;5;28mall\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 1382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_construct_result(result, name\u001b[38;5;241m=\u001b[39mres_name)\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:283\u001b[0m, in \u001b[0;36marithmetic_op\u001b[0;34m(left, right, op)\u001b[0m\n\u001b[1;32m 279\u001b[0m _bool_arith_check(op, left, right) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 281\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_na_arithmetic_op\" has incompatible type\u001b[39;00m\n\u001b[1;32m 282\u001b[0m \u001b[38;5;66;03m# \"Union[ExtensionArray, ndarray[Any, Any]]\"; expected \"ndarray[Any, Any]\"\u001b[39;00m\n\u001b[0;32m--> 283\u001b[0m res_values \u001b[38;5;241m=\u001b[39m \u001b[43m_na_arithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res_values\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:227\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_cmp \u001b[38;5;129;01mand\u001b[39;00m (\n\u001b[1;32m 221\u001b[0m left\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(right, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[1;32m 222\u001b[0m ):\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# Don't do this for comparisons, as that will handle complex numbers\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# incorrectly, see GH#32047\u001b[39;00m\n\u001b[0;32m--> 227\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43m_masked_arith_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", - "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:163\u001b[0m, in \u001b[0;36m_masked_arith_op\u001b[0;34m(x, y, op)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# See GH#5284, GH#5035, GH#19448 for historical reference\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[0;32m--> 163\u001b[0m result[mask] \u001b[38;5;241m=\u001b[39m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43myrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_scalar(y):\n", - "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'" - ] - } - ], - "source": [ - "df[\"price_ratio\"] = df[\"pricing_completion\"] / df[\"pricing_prompt\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# df[\"total_price\"] =" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py deleted file mode 100644 index c94786208..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hllm.tutorial.py +++ /dev/null @@ -1,118 +0,0 @@ -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# CONTENTS: -# - [Description](#description) - -# %% [markdown] -# -# # Description -# -# This notebook examines ... - -# %% -# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet jupyterlab-vim)" -# #!jupyter labextension enable - -# %% -# %load_ext autoreload -# %autoreload 2 - -import logging - -import helpers.hdbg as hdbg -import helpers.henv as henv - -# %% -print(henv.get_system_signature()[0]) - -hnotebook.config_notebook() - -# %% -# hdbg.init_logger(verbosity=logging.DEBUG) -hdbg.init_logger(verbosity=logging.INFO) -# hdbg.test_logger() -_LOG = logging.getLogger(__name__) - -# %% -# !sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet openai requests)" - -# %% -import helpers.hllm as hllm -import helpers.hpandas as hpandas - -# %% -val = hllm.get_model_stats() - -# %% -import pprint - -pprint.pprint(val[0]) - -# %% -import pandas as pd - -# %% -# Normalize the nested JSON -df = pd.json_normalize(val, sep="_") -df -# View the resulting DataFrame -# print(df.T) # Transpose just for readable vertical inspection - -# %% -df.iloc[0].T - -# %% -col_names = ["id", "context_length", "pricing_prompt", "pricing_completion"] - -# %% -df.dtypes - -# %% [markdown] -# # - -# %% -for col in df.columns: - print(hpandas.infer_column_types(df[col])) - -# %% -df.apply(lambda x: pd.Series(hpandas.infer_column_types(x))).T - -# %% -hpandas.infer_column_types_df(df) - - -# %% -pd.to_numeric(df["pricing_request"], errors="coerce").notna() - -# %% -df["pricing_completion"] - -# %% -df.sort_values("pricing_prompt")[col_names] - -# %% -df[["pricing_prompt", "pricing_completion"]].plot.scatter( - x="pricing_prompt", y="pricing_completion" -) - -# %% -df["price_ratio"] = df["pricing_completion"] / df["pricing_prompt"] - -# %% - -# %% -# df["total_price"] = diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb deleted file mode 100644 index 60491a1c6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.ipynb +++ /dev/null @@ -1,993 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-07T22:25:23.663978Z", - "start_time": "2020-06-07T22:25:23.661756Z" - } - }, - "source": [ - "# Description" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Imports" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:30:40.920362Z", - "start_time": "2020-06-09T19:30:40.864535Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "%matplotlib inline\n", - "\n", - "import json\n", - "import logging\n", - "\n", - "import jsonpickle\n", - "import jsonpickle.ext.pandas as jsonpickle_pandas\n", - "\n", - "jsonpickle_pandas.register_handlers()\n", - "\n", - "import pandas as pd # noqa: E402\n", - "\n", - "import helpers.hdbg as hdbg # noqa: E402\n", - "import helpers.henv as henv # noqa: E402\n", - "import helpers.hplayback as hplayba # noqa: E402" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:30:43.871255Z", - "start_time": "2020-06-09T19:30:43.739350Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0mWARNING: Logger already initialized: skipping\n", - "# Packages\n", - " python: 3.7.6\n", - " gluonnlp: 0.9.1\n", - " gluonts: 0.5.0\n", - " joblib: 0.15.1\n", - " mxnet: 1.6.0\n", - " numpy: 1.18.4\n", - " pandas: 1.0.3\n", - " pyarrow: 0.17.1\n", - " scipy: 1.4.1\n", - " seaborn: 0.10.1\n", - " sklearn: 0.23.1\n", - " statsmodels: 0.11.1\n", - "# Last commits:\n", - " * 268f2f1 saggese PTask2231: Checkpoint ( 2 days ago) Sun Jun 7 20:58:52 2020 (HEAD -> PTask2231_Playback_approach_for_unit_testing, origin/PTask2231_Playback_approach_for_unit_testing)\n", - " * 7025106 pavel-... PTask2291: Add args, kwargs. New tests ( 6 days ago) Wed Jun 3 11:38:56 2020 \n", - " * 60e0b11 saggese PTask2291: Add leftover files ( 10 days ago) Sat May 30 10:06:29 2020 \n" - ] - } - ], - "source": [ - "hdbg.init_logger(verbosity=logging.INFO)\n", - "\n", - "_LOG = logging.getLogger(__name__)\n", - "\n", - "_LOG.info(\"%s\", henv.get_system_signature()[0])\n", - "\n", - "hnotebook.config_notebook()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:53:12.564104Z", - "start_time": "2020-06-09T19:53:12.513350Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Product Price\n", - "hello \n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200\n" - ] - } - ], - "source": [ - "data = {\n", - " \"Product\": [\"Desktop Computer\", \"Tablet\", \"iPhone\", \"Laptop\"],\n", - " \"Price\": [700, 250, 800, 1200],\n", - "}\n", - "\n", - "df = pd.DataFrame(data, columns=[\"Product\", \"Price\"])\n", - "df.index.name = \"hello\"\n", - "print(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:56:24.324137Z", - "start_time": "2020-06-09T19:56:24.279767Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Product': hello\n", - " 0 Desktop Computer\n", - " 1 Tablet\n", - " 2 iPhone\n", - " 3 Laptop\n", - " Name: Product, dtype: object,\n", - " 'Price': hello\n", - " 0 700\n", - " 1 250\n", - " 2 800\n", - " 3 1200\n", - " Name: Price, dtype: int64}" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# df.to_json(orient=\"\")\n", - "df.to_dict(orient=\"series\")" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:58:05.848188Z", - "start_time": "2020-06-09T19:58:05.747808Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "\"pd.DataFrame({'Product': ['Desktop Computer', 'Tablet', 'iPhone', 'Laptop'], 'Price': [700, 250, 800, 1200]})\"" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hplayba.to_python_code(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:58:31.870465Z", - "start_time": "2020-06-09T19:58:31.822189Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ProductPrice
0Desktop Computer700
1Tablet250
2iPhone800
3Laptop1200
\n", - "
" - ], - "text/plain": [ - " Product Price\n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame.from_dict(\n", - " {\n", - " \"Product\": [\"Desktop Computer\", \"Tablet\", \"iPhone\", \"Laptop\"],\n", - " \"Price\": [700, 250, 800, 1200],\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T20:14:52.983985Z", - "start_time": "2020-06-09T20:14:52.861966Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# Initialize values for unit test.\n", - "dummy_0 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", - "dummy_0 = jsonpickle.decode(dummy_0)\n", - "dummy_1 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", - "dummy_1 = jsonpickle.decode(dummy_1)\n", - "# Call function.\n", - "act = F(dummy_0, dummy_1)\n", - "# Create expected value of function output.\n", - "exp = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Product,Price\\nDesktop ComputerDesktop Computer,1400\\nTabletTablet,500\\niPhoneiPhone,1600\\nLaptopLaptop,2400\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Product\": \"object\", \"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"}}'\n", - "exp = jsonpickle.decode(exp)\n", - "# Check.\n", - "assert act.equals(exp)\n" - ] - } - ], - "source": [ - "use_playback = True\n", - "\n", - "\n", - "def F(a, b):\n", - " if use_playback:\n", - " playback = Playback(\"assert_equal\", \"F\", a, b)\n", - " playback.start()\n", - " c = a + b\n", - " if use_playback:\n", - " output = playback.end(c)\n", - " res = output\n", - " else:\n", - " res = c\n", - " return res\n", - "\n", - "\n", - "a = df\n", - "b = df\n", - "print(F(a, b))" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T20:20:24.981307Z", - "start_time": "2020-06-09T20:20:24.839197Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'[3, 3, ]'" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hplayba.to_python_code([\"3\", 3])" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:30:54.111194Z", - "start_time": "2020-06-09T19:30:54.046499Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# obj1=\n", - " Product Price\n", - "hello \n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200\n", - "class=\n", - "# frozen=\n", - "{\n", - " \"meta\": {\n", - " \"dtypes\": {\n", - " \"Price\": \"int64\",\n", - " \"Product\": \"object\"\n", - " },\n", - " \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": \\\"hello\\\"}}\"\n", - " },\n", - " \"py/object\": \"pandas.core.frame.DataFrame\",\n", - " \"txt\": true,\n", - " \"values\": \"Product,Price\\nDesktop Computer,700\\nTablet,250\\niPhone,800\\nLaptop,1200\\n\"\n", - "}\n", - "# obj2=\n", - " Product Price\n", - "hello \n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200\n", - "class=\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ProductPrice
hello
0Desktop Computer700
1Tablet250
2iPhone800
3Laptop1200
\n", - "
" - ], - "text/plain": [ - " Product Price\n", - "hello \n", - "0 Desktop Computer 700\n", - "1 Tablet 250\n", - "2 iPhone 800\n", - "3 Laptop 1200" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hplayba.round_trip_convert(df, logging.INFO)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-07T22:32:12.623139Z", - "start_time": "2020-06-07T22:32:12.577435Z" - } - }, - "outputs": [], - "source": [ - "hplayba.round_trip_convert(\"hello\", logging.INFO)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:45:27.442281Z", - "start_time": "2020-06-09T19:45:27.380299Z" - } - }, - "outputs": [], - "source": [ - "def F(a, b):\n", - " return a + b" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2020-06-09T19:45:36.907940Z", - "start_time": "2020-06-09T19:45:36.861549Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "# Initialize values for unit test.\n", - "dummy_0 = r\"3\"\n", - "dummy_0 = jsonpickle.decode(dummy_0)\n", - "dummy_1 = r\"2\"\n", - "dummy_1 = jsonpickle.decode(dummy_1)\n", - "# Call function.\n", - "act = F(dummy_0, dummy_1)\n", - "# Create expected value of function output.\n", - "exp = r\"5\"\n", - "exp = jsonpickle.decode(exp)\n", - "# Check.\n", - "assert act == exp\n", - "\n", - "\n", - "# #############################################################################\n", - "# Playback\n", - "# #############################################################################" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:31:00.704146Z", - "start_time": "2020-05-29T18:31:00.695276Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "class Playback:\n", - " # def __init__(self, file_name, mode, *args, **kwargs):\n", - " # self.args = args\n", - " # self.kwargs = kwargs\n", - " def __init__(self, file_name, mode, func_name, a, b):\n", - " self.a = a\n", - " self.b = b\n", - "\n", - " def start(self):\n", - " self.a_json = jsonpickle.encode(self.a)\n", - " self.b_json = jsonpickle.encode(self.b)\n", - "\n", - " def end(self, ret):\n", - " self.ret_json = jsonpickle.encode(ret)\n", - " output = []\n", - " output.append(\"# Initialize values for unit test.\")\n", - " output.append(\"a = %s\" % jsonpickle.decode(self.a_json))\n", - " output.append(\"b = %s\" % jsonpickle.decode(self.b_json))\n", - " output.append(\"# Apply values.\")\n", - " output.append(\"act = F(a, b)\")\n", - " output.append(\"exp = %s\" % jsonpickle.decode(self.ret_json))\n", - " # output.append(\"self.assertEqual(act, exp)\")\n", - " # output.append(\"assert act == exp\")\n", - " output = \"\\n\".join(output)\n", - " print(\"output=\", output)\n", - "\n", - "\n", - "# def F(a: int, b: int):\n", - "# c = {}\n", - "# c[\"pavel\"] = a + b\n", - "# return c\n", - "\n", - "\n", - "def F(a: int, b: int):\n", - " playback = Playback(\"\", \"\", \"F\", a, b)\n", - " playback.start()\n", - " c = {}\n", - " c[\"pavel\"] = a + b\n", - " playback.end(c)\n", - " return c\n", - "\n", - "\n", - "res = F(3, 4)\n", - "print(res)\n", - "\n", - "\n", - "# #############################################################################\n", - "# Playback\n", - "# #############################################################################" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:55:14.350318Z", - "start_time": "2020-05-29T18:55:14.319820Z" - } - }, - "outputs": [], - "source": [ - "class Playback: # noqa: F811\n", - " # def __init__(self, file_name, mode, *args, **kwargs):\n", - " # self.args = args\n", - " # self.kwargs = kwargs\n", - " def __init__(self, file_name, mode, func_name, a, b):\n", - " self.a = a\n", - " self.b = b\n", - "\n", - " def start(self):\n", - " self.a_json = jsonpickle.encode(self.a)\n", - " self.b_json = jsonpickle.encode(self.b)\n", - "\n", - " def end(self, ret):\n", - " self.ret_json = jsonpickle.encode(ret)\n", - " output = []\n", - " output.append(\"# Initialize values for unit test.\")\n", - " # output.append(\"a = %s\" % jsonpickle.decode(self.a_json))\n", - " # output.append(\"b = %s\" % jsonpickle.decode(self.b_json))\n", - " output.append(f\"a = r'{self.a_json}'\")\n", - " output.append(\"a = jsonpickle.decode(a)\")\n", - " output.append(f\"b = r'{self.b_json}'\")\n", - " output.append(\"b = jsonpickle.decode(b)\")\n", - " output.append(\"# Apply values.\")\n", - " # output.append(\"act = F(a, b)[1]\")\n", - " output.append(\"act = F(a, b)\")\n", - " output.append(f\"exp = r'{self.ret_json}'\")\n", - " output.append(\"exp = jsonpickle.decode(exp)\")\n", - " # output.append(\"self.assertEqual(act, exp)\")\n", - " output.append(\"assert act.equals(exp)\")\n", - " # output.append(\"assert act == exp\")\n", - " output = \"\\n\".join(output)\n", - " return output\n", - "\n", - "\n", - "# def F(a: int, b: int):\n", - "# c = {}\n", - "# c[\"pavel\"] = a + b\n", - "# return c\n", - "\n", - "use_playback = True\n", - "\n", - "\n", - "def F(a: pd.DataFrame, b: pd.DataFrame):\n", - " if use_playback:\n", - " playback = Playback(\"\", \"\", \"F\", a, b)\n", - " playback.start()\n", - " # c = {}\n", - " # c[\"pavel\"] = a + b\n", - " c = a + b\n", - " if use_playback:\n", - " output = playback.end(c)\n", - " res = output, c\n", - " else:\n", - " res = c\n", - " return res\n", - "\n", - "\n", - "a = pd.DataFrame({\"Price\": [700, 250, 800, 1200]})\n", - "b = pd.DataFrame({\"Price\": [1, 1, 1, 1]})\n", - "\n", - "res = F(a, b)\n", - "output = res[0]\n", - "print(output)\n", - "exec(output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:51:02.968918Z", - "start_time": "2020-05-29T18:51:02.964513Z" - } - }, - "outputs": [], - "source": [ - "# Initialize values for unit test.\n", - "a = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}'\n", - "a = jsonpickle.decode(a)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:44:10.790106Z", - "start_time": "2020-05-29T18:44:10.779459Z" - } - }, - "outputs": [], - "source": [ - "a = pd.DataFrame({\"Price\": [700, 250, 800, 1200]})\n", - "\n", - "# round_trip(a)\n", - "frozen = jsonpickle.encode(a)\n", - "print(frozen)\n", - "print(f\"frozen2 = '{frozen}'\")\n", - "# print(\"frozen = '%s'\" % frozen)\n", - "assert 0\n", - "#\n", - "print(\"frozen=\")\n", - "print(json_pretty_print(frozen)) # noqa: F821\n", - "#\n", - "obj2 = jsonpickle.decode(frozen)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:49:44.390404Z", - "start_time": "2020-05-29T18:49:44.384524Z" - } - }, - "outputs": [], - "source": [ - "frozen2 = r'{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}'\n", - "print(frozen2)\n", - "# print(\"\\n\")\n", - "# print(frozen)\n", - "if False and isinstance(frozen2, str):\n", - " # print(frozen2[61])\n", - " # assert 0\n", - " frozen2 = json.loads(frozen2)\n", - " print(frozen2)\n", - "frozen2 = jsonpickle.decode(frozen2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:40:34.682031Z", - "start_time": "2020-05-29T18:40:34.668987Z" - } - }, - "outputs": [], - "source": [ - "a = \"\"\"{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\\\"py/object\\\": \\\"pandas.core.indexes.range.RangeIndex\\\", \\\"values\\\": \\\"[0, 1, 2, 3]\\\", \\\"txt\\\": true, \\\"meta\\\": {\\\"dtype\\\": \\\"int64\\\", \\\"name\\\": null}}\"}}\"\"\"\n", - "a = jsonpickle.decode(a)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:39:18.728676Z", - "start_time": "2020-05-29T18:39:18.711958Z" - } - }, - "outputs": [], - "source": [ - "# Initialize values for unit test.\n", - "a = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n700\\n250\\n800\\n1200\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", - "a = jsonpickle.decode(a)\n", - "b = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n1\\n1\\n1\\n1\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", - "b = jsonpickle.decode(b)\n", - "# Apply values.\n", - "act = F(a, b)\n", - "exp = '{\"py/object\": \"pandas.core.frame.DataFrame\", \"values\": \"Price\\n701\\n251\\n801\\n1201\\n\", \"txt\": true, \"meta\": {\"dtypes\": {\"Price\": \"int64\"}, \"index\": \"{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}\"}}'\n", - "exp = jsonpickle.decode(exp)\n", - "assert act == exp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:27:30.781670Z", - "start_time": "2020-05-29T18:27:30.777539Z" - } - }, - "outputs": [], - "source": [ - "# Initialize values for unit test.\n", - "a = 3\n", - "b = 4\n", - "# Apply values.\n", - "act = F(a, b)\n", - "exp = {\"pavel\": 7}\n", - "assert act == exp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:10:03.802405Z", - "start_time": "2020-05-29T18:10:03.790642Z" - }, - "lines_to_next_cell": 2 - }, - "outputs": [], - "source": [ - "df2 = round_trip(df) # noqa: F821\n", - "\n", - "\n", - "# #############################################################################\n", - "# Thing\n", - "# #############################################################################" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T21:48:14.394447Z", - "start_time": "2020-05-11T21:48:14.384307Z" - } - }, - "outputs": [], - "source": [ - "class Thing:\n", - " def __init__(self, name):\n", - " self.name = name\n", - "\n", - "\n", - "obj = Thing(\"Awesome\")\n", - "\n", - "round_trip(obj) # noqa: F821" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T19:58:38.314059Z", - "start_time": "2020-05-11T19:58:38.309331Z" - } - }, - "outputs": [], - "source": [ - "def test(a: int, b: int):\n", - " print(round_trip(a)) # noqa: F821\n", - "\n", - "\n", - "test(\"strunz\", 6)\n", - "test(4, 6)\n", - "test([\"hello\"], 6)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T14:03:41.315868Z", - "start_time": "2020-05-11T14:03:41.311264Z" - } - }, - "outputs": [], - "source": [ - "df.index.dtype #" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T14:03:00.632566Z", - "start_time": "2020-05-11T14:03:00.623714Z" - } - }, - "outputs": [], - "source": [ - "df.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-29T18:16:34.748252Z", - "start_time": "2020-05-29T18:16:34.736249Z" - } - }, - "outputs": [], - "source": [ - "# import io\n", - "# import io.StringIO\n", - "# from io import StringIO\n", - "\n", - "# output = StringIO.StringIO()\n", - "\n", - "orient = \"columns\"\n", - "# orient = \"split\"\n", - "# orient = \"records\"\n", - "# orient = \"table\"\n", - "df_as_str = df.to_json(orient=orient)\n", - "\n", - "# split\n", - "# records\n", - "# index\n", - "# values\n", - "# table\n", - "# columns (the default format)\n", - "\n", - "python_code = []\n", - "target_var = \"df_as_str\"\n", - "python_code.append(f\"{target_var} = {df_as_str}\")\n", - "python_code.append(f\"{target_var}.index.name = '{df.index.name}'\")\n", - "python_code = \"\\n\".join(python_code)\n", - "print(python_code)\n", - "\n", - "exec(python_code)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-11T13:42:52.575973Z", - "start_time": "2020-05-11T13:42:52.568178Z" - } - }, - "outputs": [], - "source": [ - "arr = eval(df_as_str)\n", - "df2 = pd.DataFrame.from_dict(arr, orient=\"columns\")\n", - "df2.index.name" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:.conda-develop] *", - "language": "python", - "name": "conda-env-.conda-develop-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "165px" - }, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py deleted file mode 100644 index 22176ce52..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/hplayback.tutorial.py +++ /dev/null @@ -1,374 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python [conda env:.conda-develop] * -# language: python -# name: conda-env-.conda-develop-py -# --- - -# %% [markdown] -# # Description - -# %% [markdown] -# # Imports - -# %% -# %load_ext autoreload -# %autoreload 2 -# %matplotlib inline - -import json -import logging - -import jsonpickle -import jsonpickle.ext.pandas as jsonpickle_pandas - -jsonpickle_pandas.register_handlers() - -import pandas as pd # noqa: E402 - -import helpers.hdbg as hdbg # noqa: E402 -import helpers.henv as henv # noqa: E402 -import helpers.hplayback as hplayba # noqa: E402 - -# %% -hdbg.init_logger(verbosity=logging.INFO) - -_LOG = logging.getLogger(__name__) - -_LOG.info("%s", henv.get_system_signature()[0]) - -hnotebook.config_notebook() - -# %% -data = { - "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], - "Price": [700, 250, 800, 1200], -} - -df = pd.DataFrame(data, columns=["Product", "Price"]) -df.index.name = "hello" -print(df) - -# %% -# df.to_json(orient="") -df.to_dict(orient="series") - -# %% -hplayba.to_python_code(df) - -# %% -pd.DataFrame.from_dict( - { - "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], - "Price": [700, 250, 800, 1200], - } -) - -# %% -use_playback = True - - -def F(a, b): - if use_playback: - playback = Playback("assert_equal", "F", a, b) - playback.start() - c = a + b - if use_playback: - output = playback.end(c) - res = output - else: - res = c - return res - - -a = df -b = df -print(F(a, b)) - -# %% -hplayba.to_python_code(["3", 3]) - -# %% -hplayba.round_trip_convert(df, logging.INFO) - -# %% -hplayba.round_trip_convert("hello", logging.INFO) - - -# %% -def F(a, b): - return a + b - - -# %% -# Initialize values for unit test. -dummy_0 = r"3" -dummy_0 = jsonpickle.decode(dummy_0) -dummy_1 = r"2" -dummy_1 = jsonpickle.decode(dummy_1) -# Call function. -act = F(dummy_0, dummy_1) -# Create expected value of function output. -exp = r"5" -exp = jsonpickle.decode(exp) -# Check. -assert act == exp - - -# ############################################################################# -# Playback -# ############################################################################# - - -# %% -class Playback: - # def __init__(self, file_name, mode, *args, **kwargs): - # self.args = args - # self.kwargs = kwargs - def __init__(self, file_name, mode, func_name, a, b): - self.a = a - self.b = b - - def start(self): - self.a_json = jsonpickle.encode(self.a) - self.b_json = jsonpickle.encode(self.b) - - def end(self, ret): - self.ret_json = jsonpickle.encode(ret) - output = [] - output.append("# Initialize values for unit test.") - output.append("a = %s" % jsonpickle.decode(self.a_json)) - output.append("b = %s" % jsonpickle.decode(self.b_json)) - output.append("# Apply values.") - output.append("act = F(a, b)") - output.append("exp = %s" % jsonpickle.decode(self.ret_json)) - # output.append("self.assertEqual(act, exp)") - # output.append("assert act == exp") - output = "\n".join(output) - print("output=", output) - - -# def F(a: int, b: int): -# c = {} -# c["pavel"] = a + b -# return c - - -def F(a: int, b: int): - playback = Playback("", "", "F", a, b) - playback.start() - c = {} - c["pavel"] = a + b - playback.end(c) - return c - - -res = F(3, 4) -print(res) - - -# ############################################################################# -# Playback -# ############################################################################# - - -# %% -class Playback: # noqa: F811 - # def __init__(self, file_name, mode, *args, **kwargs): - # self.args = args - # self.kwargs = kwargs - def __init__(self, file_name, mode, func_name, a, b): - self.a = a - self.b = b - - def start(self): - self.a_json = jsonpickle.encode(self.a) - self.b_json = jsonpickle.encode(self.b) - - def end(self, ret): - self.ret_json = jsonpickle.encode(ret) - output = [] - output.append("# Initialize values for unit test.") - # output.append("a = %s" % jsonpickle.decode(self.a_json)) - # output.append("b = %s" % jsonpickle.decode(self.b_json)) - output.append(f"a = r'{self.a_json}'") - output.append("a = jsonpickle.decode(a)") - output.append(f"b = r'{self.b_json}'") - output.append("b = jsonpickle.decode(b)") - output.append("# Apply values.") - # output.append("act = F(a, b)[1]") - output.append("act = F(a, b)") - output.append(f"exp = r'{self.ret_json}'") - output.append("exp = jsonpickle.decode(exp)") - # output.append("self.assertEqual(act, exp)") - output.append("assert act.equals(exp)") - # output.append("assert act == exp") - output = "\n".join(output) - return output - - -# def F(a: int, b: int): -# c = {} -# c["pavel"] = a + b -# return c - -use_playback = True - - -def F(a: pd.DataFrame, b: pd.DataFrame): - if use_playback: - playback = Playback("", "", "F", a, b) - playback.start() - # c = {} - # c["pavel"] = a + b - c = a + b - if use_playback: - output = playback.end(c) - res = output, c - else: - res = c - return res - - -a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) -b = pd.DataFrame({"Price": [1, 1, 1, 1]}) - -res = F(a, b) -output = res[0] -print(output) -exec(output) - -# %% -# Initialize values for unit test. -a = r'{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}' -a = jsonpickle.decode(a) - -# %% -a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) - -# round_trip(a) -frozen = jsonpickle.encode(a) -print(frozen) -print(f"frozen2 = '{frozen}'") -# print("frozen = '%s'" % frozen) -assert 0 -# -print("frozen=") -print(json_pretty_print(frozen)) # noqa: F821 -# -obj2 = jsonpickle.decode(frozen) - -# %% -frozen2 = r'{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}' -print(frozen2) -# print("\n") -# print(frozen) -if False and isinstance(frozen2, str): - # print(frozen2[61]) - # assert 0 - frozen2 = json.loads(frozen2) - print(frozen2) -frozen2 = jsonpickle.decode(frozen2) - -# %% -a = """{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{\"py/object\": \"pandas.core.indexes.range.RangeIndex\", \"values\": \"[0, 1, 2, 3]\", \"txt\": true, \"meta\": {\"dtype\": \"int64\", \"name\": null}}"}}""" -a = jsonpickle.decode(a) - -# %% -# Initialize values for unit test. -a = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n700\n250\n800\n1200\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' -a = jsonpickle.decode(a) -b = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n1\n1\n1\n1\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' -b = jsonpickle.decode(b) -# Apply values. -act = F(a, b) -exp = '{"py/object": "pandas.core.frame.DataFrame", "values": "Price\n701\n251\n801\n1201\n", "txt": true, "meta": {"dtypes": {"Price": "int64"}, "index": "{"py/object": "pandas.core.indexes.range.RangeIndex", "values": "[0, 1, 2, 3]", "txt": true, "meta": {"dtype": "int64", "name": null}}"}}' -exp = jsonpickle.decode(exp) -assert act == exp - -# %% -# Initialize values for unit test. -a = 3 -b = 4 -# Apply values. -act = F(a, b) -exp = {"pavel": 7} -assert act == exp - -# %% -df2 = round_trip(df) # noqa: F821 - - -# ############################################################################# -# Thing -# ############################################################################# - - -# %% -class Thing: - def __init__(self, name): - self.name = name - - -obj = Thing("Awesome") - -round_trip(obj) # noqa: F821 - - -# %% -def test(a: int, b: int): - print(round_trip(a)) # noqa: F821 - - -test("strunz", 6) -test(4, 6) -test(["hello"], 6) - -# %% -df.index.dtype # - -# %% -df.dtypes - -# %% -# import io -# import io.StringIO -# from io import StringIO - -# output = StringIO.StringIO() - -orient = "columns" -# orient = "split" -# orient = "records" -# orient = "table" -df_as_str = df.to_json(orient=orient) - -# split -# records -# index -# values -# table -# columns (the default format) - -python_code = [] -target_var = "df_as_str" -python_code.append(f"{target_var} = {df_as_str}") -python_code.append(f"{target_var}.index.name = '{df.index.name}'") -python_code = "\n".join(python_code) -print(python_code) - -exec(python_code) - -# %% -arr = eval(df_as_str) -df2 = pd.DataFrame.from_dict(arr, orient="columns") -df2.index.name - -# %% diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb deleted file mode 100644 index 4516033f2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.ipynb +++ /dev/null @@ -1,1774 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "895cb286", - "metadata": {}, - "source": [ - "Show Parquet / Pyarrow API." - ] - }, - { - "cell_type": "markdown", - "id": "b068d525", - "metadata": {}, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "id": "8f46ec68", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:53:22.684558Z", - "start_time": "2021-06-16T20:53:22.645267Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0m\u001b[33mWARNING\u001b[0m: Logger already initialized: skipping\n" - ] - } - ], - "source": [ - "import logging\n", - "import os\n", - "import random\n", - "\n", - "import pandas as pd\n", - "import pyarrow as pa\n", - "import pyarrow.dataset as ds\n", - "import pyarrow.parquet as pq\n", - "from pyarrow.dataset import DirectoryPartitioning\n", - "\n", - "import helpers.hdbg as hdbg\n", - "import helpers.hio as hio\n", - "\n", - "hdbg.init_logger(verbosity=logging.INFO)\n", - "_LOG = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "215ff89e", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:19:03.323062Z", - "start_time": "2021-06-15T11:19:03.303632Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " idx instr val1 val2\n", - "2000-01-01 0 A 99 30\n", - "2000-01-02 0 A 54 46\n", - "2000-01-03 0 A 85 86\n", - "2000-01-04 0 A 97 62\n", - "2000-01-05 0 A 12 25\n" - ] - } - ], - "source": [ - "def get_df() -> pd.DataFrame:\n", - " \"\"\"\n", - " Create pandas random data, like:\n", - "\n", - " ```\n", - " idx instr val1 val2\n", - " 2000-01-01 0 A 99 30\n", - " 2000-01-02 0 A 54 46\n", - " 2000-01-03 0 A 85 86\n", - " ```\n", - " \"\"\"\n", - " instruments = \"A B C D E\".split()\n", - " \"id stock val1 val2\".split()\n", - " df_idx = pd.date_range(\n", - " pd.Timestamp(\"2000-01-01\"), pd.Timestamp(\"2000-01-15\"), freq=\"1D\"\n", - " )\n", - " # print(df_idx)\n", - " random.seed(1000)\n", - "\n", - " df = []\n", - " for idx, inst in enumerate(instruments):\n", - " df_tmp = pd.DataFrame(\n", - " {\n", - " \"idx\": idx,\n", - " \"instr\": inst,\n", - " \"val1\": [random.randint(0, 100) for k in range(len(df_idx))],\n", - " \"val2\": [random.randint(0, 100) for k in range(len(df_idx))],\n", - " },\n", - " index=df_idx,\n", - " )\n", - " # print(df_tmp)\n", - " df.append(df_tmp)\n", - " df = pd.concat(df)\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "8e8235d0", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:35:16.903580Z", - "start_time": "2021-06-15T11:35:16.895316Z" - } - }, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "EOL while scanning string literal (, line 4)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m4\u001b[0m\n\u001b[0;31m txt += \"# df=\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m EOL while scanning string literal\n" - ] - } - ], - "source": [ - "def df_to_str(df: pd.DataFrame) -> str:\n", - " txt = \"\"\n", - " txt += \"# df=\\n%s\" % df.head(3)\n", - " txt += \"\\n# df.shape=\\n%s\" % str(df.shape)\n", - " txt += \"\\n# df.dtypes=\\n%s\" % str(df.dtypes)\n", - " return txt" - ] - }, - { - "cell_type": "markdown", - "id": "17cc474b", - "metadata": {}, - "source": [ - "# Save and load all data in one file" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "cb399156", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:25:27.514505Z", - "start_time": "2021-06-15T11:25:27.496811Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " idx instr val1 val2\n", - "2000-01-01 0 A 99 30\n", - "2000-01-02 0 A 54 46\n", - "2000-01-03 0 A 85 86\n", - "# df.shape=\n", - "(75, 4)\n", - "# df.dtypes=\n", - "idx int64\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "df = get_df()\n", - "# print(df.head())\n", - "print(df_to_str(df))" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "940dc7d2", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:25:34.893472Z", - "start_time": "2021-06-15T11:25:34.886977Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "table=\n", - "pyarrow.Table\n", - "idx: int64\n", - "instr: string\n", - "val1: int64\n", - "val2: int64\n", - "__index_level_0__: timestamp[ns]\n" - ] - } - ], - "source": [ - "table = pa.Table.from_pandas(df)\n", - "\n", - "print(\"table=\\n%s\" % table)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "93df67fc", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:25:38.560269Z", - "start_time": "2021-06-15T11:25:38.533905Z" - } - }, - "outputs": [], - "source": [ - "# Save.\n", - "file_name = \"df_in_one_file.pq\"\n", - "pq.write_table(table, file_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "155e36c0", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:25:51.016044Z", - "start_time": "2021-06-15T11:25:51.001034Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pyarrow.Table\n", - "idx: int64\n", - "instr: string\n", - "val1: int64\n", - "val2: int64\n", - "__index_level_0__: timestamp[us]\n", - "# df=\n", - " idx instr val1 val2\n", - "2000-01-01 0 A 99 30\n", - "2000-01-02 0 A 54 46\n", - "2000-01-03 0 A 85 86\n", - "# df.shape=\n", - "(75, 4)\n", - "# df.dtypes=\n", - "idx int64\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Load.\n", - "df2 = pq.read_table(file_name)\n", - "print(df2)\n", - "\n", - "df2 = df2.to_pandas()\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "markdown", - "id": "1098757c", - "metadata": {}, - "source": [ - "## Read a subset of columns" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "6f4a652f", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:27:11.924350Z", - "start_time": "2021-06-15T11:27:11.910680Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pyarrow.Table\n", - "idx: int64\n", - "val1: int64\n", - "# df=\n", - " idx val1\n", - "0 0 99\n", - "1 0 54\n", - "2 0 85\n", - "# df.shape=\n", - "(75, 2)\n", - "# df.dtypes=\n", - "idx int64\n", - "val1 int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "df2 = pq.read_table(file_name, columns=[\"idx\", \"val1\"])\n", - "print(df2)\n", - "\n", - "df2 = df2.to_pandas()\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "markdown", - "id": "012cebdb", - "metadata": {}, - "source": [ - "## Partitioned dataset\n", - "\n", - "from https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data\n", - "\n", - "- A dataset can exploit a nested structure, where the sub-dir names hold information about which subset of the data is stored in that dir\n", - "- E.g., \"Hive\" patitioning scheme \"key=vale\" dir names" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "ca26642e", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:30:11.964993Z", - "start_time": "2021-06-15T11:30:11.947282Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " idx instr val1 val2\n", - "2000-01-01 0 A 99 30\n", - "2000-01-02 0 A 54 46\n", - "2000-01-03 0 A 85 86\n", - "# df.shape=\n", - "(75, 4)\n", - "# df.dtypes=\n", - "idx int64\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "df = get_df()\n", - "print(df_to_str(df))" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "7cae349f", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:30:27.617064Z", - "start_time": "2021-06-15T11:30:27.541418Z" - } - }, - "outputs": [], - "source": [ - "base = \".\"\n", - "dir_name = os.path.join(base, \"parquet_dataset_partitioned\")\n", - "os.system(\"rm -rf %s\" % dir_name)\n", - "\n", - "pq.write_to_dataset(table, dir_name, partition_cols=[\"idx\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "fd57116d", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:30:30.672054Z", - "start_time": "2021-06-15T11:30:30.389512Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" - ] - } - ], - "source": [ - "!ls parquet_dataset_partitioned" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "ac82b5ad", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:31:29.322947Z", - "start_time": "2021-06-15T11:31:29.298883Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "./parquet_dataset_partitioned/idx=0/cab9de6eff0c47bcb688a1ce437c7f89.parquet\n", - "./parquet_dataset_partitioned/idx=1/56813e569097420cae892720d3bb0789.parquet\n", - "./parquet_dataset_partitioned/idx=2/5c9a17d2e1294dd58c7d8695868c2cb5.parquet\n", - "./parquet_dataset_partitioned/idx=3/b28576eb22d54999980a313a24511497.parquet\n", - "./parquet_dataset_partitioned/idx=4/8ee3f0d7585b48959a560c954562add8.parquet\n" - ] - } - ], - "source": [ - "# Read data back.\n", - "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "print(\"\\n\".join(dataset.files))" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "64394b7f", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:32:01.839074Z", - "start_time": "2021-06-15T11:32:01.822727Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " instr val1 val2 idx\n", - "2000-01-01 A 99 30 0\n", - "2000-01-02 A 54 46 0\n", - "2000-01-03 A 85 86 0\n", - "# df.shape=\n", - "(75, 4)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Read everything.\n", - "df2 = dataset.to_table().to_pandas()\n", - "\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "df96e1db", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:33:05.171630Z", - "start_time": "2021-06-15T11:33:05.147040Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " instr val1 val2 idx\n", - "2000-01-01 B 18 22 1\n", - "2000-01-02 B 59 89 1\n", - "2000-01-03 B 91 90 1\n", - "# df.shape=\n", - "(15, 4)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "dtype: object\n", - "# df=\n", - " instr val1 val2 idx\n", - "2000-01-01 A 99 30 0\n", - "2000-01-02 A 54 46 0\n", - "2000-01-03 A 85 86 0\n", - "# df.shape=\n", - "(45, 4)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Load part of the data.\n", - "\n", - "df2 = dataset.to_table(filter=ds.field(\"idx\") == 1).to_pandas()\n", - "print(df_to_str(df2))\n", - "\n", - "df2 = dataset.to_table(filter=ds.field(\"idx\") < 3).to_pandas()\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "markdown", - "id": "b3c27848", - "metadata": {}, - "source": [ - "## Add year-month partitions" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "69d2ea15", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:36:11.106142Z", - "start_time": "2021-06-15T11:36:11.087701Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " idx instr val1 val2 year month\n", - "2000-01-01 0 A 99 30 2000 1\n", - "2000-01-02 0 A 54 46 2000 1\n", - "2000-01-03 0 A 85 86 2000 1\n", - "# df.shape=\n", - "(75, 6)\n", - "# df.dtypes=\n", - "idx int64\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "year int64\n", - "month int64\n", - "dtype: object\n" - ] - } - ], - "source": [ - "df = get_df()\n", - "df[\"year\"] = df.index.year\n", - "df[\"month\"] = df.index.month\n", - "\n", - "print(df_to_str(df))" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "1a2f8c3a", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:01.738085Z", - "start_time": "2021-06-15T11:37:01.730748Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "table=\n", - "pyarrow.Table\n", - "idx: int64\n", - "instr: string\n", - "val1: int64\n", - "val2: int64\n", - "year: int64\n", - "month: int64\n", - "__index_level_0__: timestamp[ns]\n" - ] - } - ], - "source": [ - "table = pa.Table.from_pandas(df)\n", - "\n", - "print(\"table=\\n%s\" % table)" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "9112ed65", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:04.832037Z", - "start_time": "2021-06-15T11:37:04.702121Z" - } - }, - "outputs": [], - "source": [ - "base = \".\"\n", - "dir_name = os.path.join(base, \"pq_partitioned2\")\n", - "os.system(\"rm -rf %s\" % dir_name)\n", - "\n", - "pq.write_to_dataset(table, dir_name, partition_cols=[\"idx\", \"year\", \"month\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "844913cc", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:17.553902Z", - "start_time": "2021-06-15T11:37:17.276875Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" - ] - } - ], - "source": [ - "!ls $dir_name" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "e5ba8be3", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:07.695235Z", - "start_time": "2021-06-15T11:37:07.433612Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bc6b2314c7f640a38c62029280f6f65e.parquet\r\n" - ] - } - ], - "source": [ - "!ls $dir_name/idx=0/year=2000/month=1" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "2d93f116", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:37:26.153218Z", - "start_time": "2021-06-15T11:37:26.109040Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "./pq_partitioned2/idx=0/year=2000/month=1/bc6b2314c7f640a38c62029280f6f65e.parquet\n", - "./pq_partitioned2/idx=1/year=2000/month=1/bb178ff0bdd344ca8328f9d67398b322.parquet\n", - "./pq_partitioned2/idx=2/year=2000/month=1/16081eea25fd4da6bd802037b541766c.parquet\n", - "./pq_partitioned2/idx=3/year=2000/month=1/1557b3c461054eadba16e3072fbd3a8a.parquet\n", - "./pq_partitioned2/idx=4/year=2000/month=1/07a0c7fcf054450296b35452b57236ef.parquet\n" - ] - } - ], - "source": [ - "# Read data back.\n", - "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "print(\"\\n\".join(dataset.files))" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "21148afd", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T11:39:19.396955Z", - "start_time": "2021-06-15T11:39:19.374534Z" - }, - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " instr val1 val2 idx year month\n", - "2000-01-01 C 99 37 2 2000 1\n", - "2000-01-02 C 98 48 2 2000 1\n", - "2000-01-03 C 70 58 2 2000 1\n", - "# df.shape=\n", - "(15, 6)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "year int32\n", - "month int32\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Read data back.\n", - "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "df2 = dataset.to_table(filter=ds.field(\"idx\") == 2).to_pandas()\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "id": "d9e4e596", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:54:34.478646Z", - "start_time": "2021-06-16T20:54:34.250254Z" - }, - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "idx: int64\n", - "instr: string\n", - "val1: int64\n", - "val2: int64\n", - "year: int64\n", - "month: int64\n", - "__index_level_0__: timestamp[ns]\n", - "-- schema metadata --\n", - "pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975\n" - ] - } - ], - "source": [ - "# We could scan manually and create the dirs manually if we don't want to add\n", - "# add a new dir.\n", - "base = \".\"\n", - "dir_name = os.path.join(base, \"parquet_dataset_partitioned2\")\n", - "os.system(\"rm -rf %s\" % dir_name)\n", - "\n", - "schemas = []\n", - "\n", - "schema = pa.Table.from_pandas(df).schema\n", - "print(schema)\n", - "# assert 0\n", - "# idx: int64\n", - "# instr: string\n", - "# val1: int64\n", - "# val2: int64\n", - "# year: int64\n", - "# month: int64\n", - "\n", - "# grouped = df.groupby(lambda x: x.day)\n", - "group_by_idx = df.groupby(\"idx\")\n", - "for idx, df_tmp in group_by_idx:\n", - " _LOG.debug(\"idx=%s -> df.shape=%s\", idx, str(df_tmp.shape))\n", - " #\n", - " group_by_year = df_tmp.groupby(lambda x: x.year)\n", - " for year, df_tmp2 in group_by_year:\n", - " _LOG.debug(\"year=%s -> df.shape=%s\", year, str(df_tmp2.shape))\n", - " #\n", - " group_by_month = df_tmp2.groupby(lambda x: x.month)\n", - " for month, df_tmp3 in group_by_month:\n", - " _LOG.debug(\"month=%s -> df.shape=%s\", month, str(df_tmp3.shape))\n", - " # file_name = \"df_in_one_file.pq\"\n", - " # pq.write_table(table, file_name)\n", - " # /app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet\n", - " subdir_name = os.path.join(\n", - " dir_name, f\"idx={idx}\", f\"year={year}\", f\"month={month}\"\n", - " )\n", - " table = pa.Table.from_pandas(df_tmp3, schema=schema)\n", - " schemas.append(table.schema)\n", - " # print(df_tmp3)\n", - " # print(table.schema)\n", - " # pq.write_to_dataset(table,\n", - " # subdir_name, schema=schema)\n", - " file_name = os.path.join(subdir_name, \"df_out.pq\")\n", - " hio.create_enclosing_dir(file_name)\n", - " pq.write_table(table, file_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "id": "8309de4a", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:41:14.320037Z", - "start_time": "2021-06-16T20:41:14.314354Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "schemas[0] == schemas[4]" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "id": "f0e49f46", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:42:26.864001Z", - "start_time": "2021-06-16T20:42:26.856395Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", - " idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", - " idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", - " idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975,\n", - " idx: int64\n", - " instr: string\n", - " val1: int64\n", - " val2: int64\n", - " year: int64\n", - " month: int64\n", - " __index_level_0__: timestamp[ns]\n", - " -- schema metadata --\n", - " pandas: '{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na' + 975]" - ] - }, - "execution_count": 104, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "schemas" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1130cbc2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 124, - "id": "e5bdcdd8", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:53:50.373825Z", - "start_time": "2021-06-16T20:53:50.099251Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df_out.pq\r\n" - ] - } - ], - "source": [ - "!ls $dir_name/idx=0/year=2000/month=1" - ] - }, - { - "cell_type": "code", - "execution_count": 130, - "id": "aaf67ae6", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T20:55:02.764098Z", - "start_time": "2021-06-16T20:55:02.717192Z" - } - }, - "outputs": [ - { - "ename": "ArrowInvalid", - "evalue": "Unable to merge: Field month has incompatible types: int64 vs int32", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mArrowInvalid\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#src_dir = f\"{dir_name}/idx=0/year=2000/month=1\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0msrc_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"{dir_name}/idx=0/year=2000\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m dataset = ds.dataset(src_dir,\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"parquet\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m partitioning=\"hive\")\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/dataset.py\u001b[0m in \u001b[0;36mdataset\u001b[0;34m(source, schema, format, filesystem, partitioning, partition_base_dir, exclude_invalid_files, ignore_prefixes)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 656\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_filesystem_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 657\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 658\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_is_path_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msource\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/dataset.py\u001b[0m in \u001b[0;36m_filesystem_dataset\u001b[0;34m(source, schema, filesystem, partitioning, format, partition_base_dir, exclude_invalid_files, selector_ignore_prefixes)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mfactory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mFileSystemDatasetFactory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpaths_or_selector\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfactory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfinish\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/_dataset.pyx\u001b[0m in \u001b[0;36mpyarrow._dataset.DatasetFactory.finish\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.pyarrow_internal_check_status\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.check_status\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mArrowInvalid\u001b[0m: Unable to merge: Field month has incompatible types: int64 vs int32" - ] - } - ], - "source": [ - "# Read data back.\n", - "# https://github.com/dask/dask/issues/4194\n", - "# src_dir = f\"{dir_name}/idx=0/year=2000/month=1\"\n", - "src_dir = f\"{dir_name}/idx=0/year=2000\"\n", - "dataset = ds.dataset(src_dir, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "df2 = dataset.to_table().to_pandas()\n", - "# print(df_to_str(df2))\n", - "print(\"\\n\".join(dataset.files))" - ] - }, - { - "cell_type": "markdown", - "id": "98f4111d", - "metadata": {}, - "source": [ - "## Partition manually" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "id": "f0b33d85", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-15T00:57:11.260871Z", - "start_time": "2021-06-15T00:57:11.235982Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(((year == 2009) and (month == 11)) and (day == 3))\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Neither field_names nor schema was passed; cannot infer field_names", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpartitioning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/2009/11/3\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mpartitioning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdiscover\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/venv/lib/python3.8/site-packages/pyarrow/_dataset.pyx\u001b[0m in \u001b[0;36mpyarrow._dataset.DirectoryPartitioning.discover\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Neither field_names nor schema was passed; cannot infer field_names" - ] - } - ], - "source": [ - "partitioning = DirectoryPartitioning(\n", - " pa.schema([(\"year\", pa.int16()), (\"month\", pa.int8()), (\"day\", pa.int8())])\n", - ")\n", - "print(partitioning.parse(\"/2009/11/3\"))\n", - "\n", - "# partitioning.discover()" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "ad70cbee", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:03:31.809969Z", - "start_time": "2021-06-16T11:03:31.526597Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'idx=0' 'idx=1' 'idx=2' 'idx=3' 'idx=4'\r\n" - ] - } - ], - "source": [ - "!ls /app/data" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "b19d1189", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:18:31.838549Z", - "start_time": "2021-06-16T11:18:31.821223Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet\n", - "/app/data/idx=0/year=2000/month=1/0435eeb615b14155bdc26266b91a4b1b.parquet\n", - "/app/data/idx=0/year=2000/month=1/05cc8a039ec746acb5668fde0a372028.parquet\n", - "/app/data/idx=0/year=2000/month=1/06039c8c6e9e4d54be7dcbef2bcdfa78.parquet\n", - "/app/data/idx=0/year=2000/month=1/0bb8e349594445a08fca4e337a7922d4.parquet\n", - "/app/data/idx=0/year=2000/month=1/115df7cedef540469cec56ee40ac19bd.parquet\n", - "/app/data/idx=0/year=2000/month=1/1174a70ffe614f4a9875b680e255902b.parquet\n", - "/app/data/idx=0/year=2000/month=1/122bdd75109c4fc7918d6db00f7bed41.parquet\n", - "/app/data/idx=0/year=2000/month=1/1543e41a14234c279fdfaa8656e8a71d.parquet\n", - "/app/data/idx=0/year=2000/month=1/17bd91618d5240fe83309608e91cb1ef.parquet\n", - "/app/data/idx=0/year=2000/month=1/18461c0ee57845768a503cfc865e323b.parquet\n", - "/app/data/idx=0/year=2000/month=1/1994694468184272a388fb8b40f03d5b.parquet\n", - "/app/data/idx=0/year=2000/month=1/1eb5f1adbe57418fa5d866d35902c39a.parquet\n", - "/app/data/idx=0/year=2000/month=1/2bd3c3ae435b489bb194ef7b2a715d9a.parquet\n", - "/app/data/idx=0/year=2000/month=1/2ded0d292def4e1186653d90852295f6.parquet\n", - "/app/data/idx=0/year=2000/month=1/2ff4a2fa54664e67bab85a76324738ed.parquet\n", - "/app/data/idx=0/year=2000/month=1/378e55b8faf24033abf1c275741a88e8.parquet\n", - "/app/data/idx=0/year=2000/month=1/37a96e8834af4a87bc64ec3d1199ad54.parquet\n", - "/app/data/idx=0/year=2000/month=1/3a24331d6e51402d9a86c974f8a3bd05.parquet\n", - "/app/data/idx=0/year=2000/month=1/3ae93c6a21cc4a88bbaf90219f275563.parquet\n", - "/app/data/idx=0/year=2000/month=1/3b5f35d2add64a738cec5061659e35a2.parquet\n", - "/app/data/idx=0/year=2000/month=1/3d16749690f840c49facce0e37461a7e.parquet\n", - "/app/data/idx=0/year=2000/month=1/44bf70580b9a43829addb2a9e8f89dc6.parquet\n", - "/app/data/idx=0/year=2000/month=1/46237f338cee47c69f33b15fcb83817a.parquet\n", - "/app/data/idx=0/year=2000/month=1/46f19733b2d642c29adb58bf9499b6ca.parquet\n", - "/app/data/idx=0/year=2000/month=1/485f7d3d06b3486ca4bc8b35420f997a.parquet\n", - "/app/data/idx=0/year=2000/month=1/4cd8551c6c8f4daab7313732b9c1cea8.parquet\n", - "/app/data/idx=0/year=2000/month=1/4eca6951932d47d8a5678422da4a3d70.parquet\n", - "/app/data/idx=0/year=2000/month=1/512a55d6f91c412f951ba66728bfe118.parquet\n", - "/app/data/idx=0/year=2000/month=1/521193be72e1465ca33034cfd8e93ac9.parquet\n", - "/app/data/idx=0/year=2000/month=1/59aa37cc4792493899e812215b3bb822.parquet\n", - "/app/data/idx=0/year=2000/month=1/5aaa32a61b614d65a91747336c8028f0.parquet\n", - "/app/data/idx=0/year=2000/month=1/5cf9306e97ae42fdae53369710a5d0b6.parquet\n", - "/app/data/idx=0/year=2000/month=1/5fb97e575a9c4ff282293e9810040594.parquet\n", - "/app/data/idx=0/year=2000/month=1/613e6048f8434fccafed8c9d457fddc1.parquet\n", - "/app/data/idx=0/year=2000/month=1/61d608f23a69494eaef248d79a776ede.parquet\n", - "/app/data/idx=0/year=2000/month=1/62bf226aebb641229b33f7e3bf9f5cb1.parquet\n", - "/app/data/idx=0/year=2000/month=1/62c09d56d67d4c738568fed318152ca9.parquet\n", - "/app/data/idx=0/year=2000/month=1/652129318d7a4d5b83e256a94803ecdc.parquet\n", - "/app/data/idx=0/year=2000/month=1/67607b3708e44233927974a861972a99.parquet\n", - "/app/data/idx=0/year=2000/month=1/69915fb955c24302a905e4520a76f547.parquet\n", - "/app/data/idx=0/year=2000/month=1/6cc812431ef44bd195e5baf9715095a6.parquet\n", - "/app/data/idx=0/year=2000/month=1/6ce2493e58b34b53ae42da84ee0ef165.parquet\n", - "/app/data/idx=0/year=2000/month=1/6e88cf1910bc4a71bcea865ed1605363.parquet\n", - "/app/data/idx=0/year=2000/month=1/70c1ba22a85f4b489096f80eacd5855c.parquet\n", - "/app/data/idx=0/year=2000/month=1/7705f37eac7e40ceb2fba4c9fd2cb81d.parquet\n", - "/app/data/idx=0/year=2000/month=1/7bdb4feb6a874697b8c2f9a6cb03a6e6.parquet\n", - "/app/data/idx=0/year=2000/month=1/7fd82496e8274e999d217df302fd46b0.parquet\n", - "/app/data/idx=0/year=2000/month=1/8130570ceae44ca69ce7b2cd9865c3ec.parquet\n", - "/app/data/idx=0/year=2000/month=1/83f8e04fd5ac49ec80ac7b98e8221278.parquet\n", - "/app/data/idx=0/year=2000/month=1/8469e01698bf47f28fda41a3935eeb64.parquet\n", - "/app/data/idx=0/year=2000/month=1/874aa31290804dd0abca1a8f40dc4875.parquet\n", - "/app/data/idx=0/year=2000/month=1/888ef49654f241df8cae8454a5cd3f07.parquet\n", - "/app/data/idx=0/year=2000/month=1/8aa4e41d00fc438c9de0906ecc66bbb9.parquet\n", - "/app/data/idx=0/year=2000/month=1/90e662712235472ebae79fd64eaae094.parquet\n", - "/app/data/idx=0/year=2000/month=1/91e7dcbfc57a495a943bad2400690bc1.parquet\n", - "/app/data/idx=0/year=2000/month=1/9394c04aef64432fb94219d0e8b50286.parquet\n", - "/app/data/idx=0/year=2000/month=1/9740961302bc40b192d20715c52d6ef6.parquet\n", - "/app/data/idx=0/year=2000/month=1/99e92f5585514ed4bd43b5bf50bdaaa8.parquet\n", - "/app/data/idx=0/year=2000/month=1/9bf5c3598f69411fb1acdc30779b25bd.parquet\n", - "/app/data/idx=0/year=2000/month=1/9d81c342203c4396ac2d9efcbb0cae7a.parquet\n", - "/app/data/idx=0/year=2000/month=1/9d8b2486e80f40468cf4ae50a41fda41.parquet\n", - "/app/data/idx=0/year=2000/month=1/a170565f336f4b3b99994c8d83012a4d.parquet\n", - "/app/data/idx=0/year=2000/month=1/a50138dae90f478781bf032908703ef4.parquet\n", - "/app/data/idx=0/year=2000/month=1/a5ab58aa310e47669e9d3604bf94f155.parquet\n", - "/app/data/idx=0/year=2000/month=1/a5bd118e999e4df6ab3306e52671228e.parquet\n", - "/app/data/idx=0/year=2000/month=1/a5c0a7da693147b98f68811b4af7c79e.parquet\n", - "/app/data/idx=0/year=2000/month=1/a84afce396eb4afa91de3b08129e2ab7.parquet\n", - "/app/data/idx=0/year=2000/month=1/a8c1f364a7c944bb89d59d354059e596.parquet\n", - "/app/data/idx=0/year=2000/month=1/aa3bb180eda948c4aab93428ece443a8.parquet\n", - "/app/data/idx=0/year=2000/month=1/aa868fa8e11a4a838c19a1a260dcf6f6.parquet\n", - "/app/data/idx=0/year=2000/month=1/b01aa53c572d492f9667f157455742fc.parquet\n", - "/app/data/idx=0/year=2000/month=1/b6a7fc9dd14a4af6a3635cd138abdfe2.parquet\n", - "/app/data/idx=0/year=2000/month=1/b740e474de9f4b5497877c14f688faed.parquet\n", - "/app/data/idx=0/year=2000/month=1/b81d3d9c4045498c9deb3968b935e422.parquet\n", - "/app/data/idx=0/year=2000/month=1/b8c5a9f58500424785e4c83520931127.parquet\n", - "/app/data/idx=0/year=2000/month=1/b9176233e3934efebb0b12e1a780a3b1.parquet\n", - "/app/data/idx=0/year=2000/month=1/ba3d62351b7745f5a4e18f27159d5820.parquet\n", - "/app/data/idx=0/year=2000/month=1/bb9f583ed63840b39ada7bb0f45b9d57.parquet\n", - "/app/data/idx=0/year=2000/month=1/c55358bb09194e7aad9828678b5eaa61.parquet\n", - "/app/data/idx=0/year=2000/month=1/c5e31c9f04a6491dbf068fa889095e27.parquet\n", - "/app/data/idx=0/year=2000/month=1/c70308ef1a954ccea429f0de60c41fb3.parquet\n", - "/app/data/idx=0/year=2000/month=1/cf1e928b55ba4dd09bfa2765dadffb76.parquet\n", - "/app/data/idx=0/year=2000/month=1/d08715970c714455b7b9fbf18a86e8c0.parquet\n", - "/app/data/idx=0/year=2000/month=1/d27b68dc839f47e2a25814d805b9d759.parquet\n", - "/app/data/idx=0/year=2000/month=1/d46043c1511647a5b3b96450580ce6e1.parquet\n", - "/app/data/idx=0/year=2000/month=1/d592794fbc7f4ed0877d5a350fabf8d4.parquet\n", - "/app/data/idx=0/year=2000/month=1/d8b05ee145d046a1ac321708b68e91de.parquet\n", - "/app/data/idx=0/year=2000/month=1/d9163626e55f40bb88142c43eb4b9fab.parquet\n", - "/app/data/idx=0/year=2000/month=1/dcb0cd8bc9084246955a6090f643a43d.parquet\n", - "/app/data/idx=0/year=2000/month=1/dd0db6d0e040442bb0b950efa6ac6e6a.parquet\n", - "/app/data/idx=0/year=2000/month=1/dd306d9fd65a459fbbf1e32fc9260ae3.parquet\n", - "/app/data/idx=0/year=2000/month=1/e05e535b8969470680658f6c2924bb68.parquet\n", - "/app/data/idx=0/year=2000/month=1/e3aff8e0f7094609b4de8bacac5faa4c.parquet\n", - "/app/data/idx=0/year=2000/month=1/e439d12c5539461da2b12a54d7dbb1c3.parquet\n", - "/app/data/idx=0/year=2000/month=1/e51258868c044644a708c74ff4c2ca46.parquet\n", - "/app/data/idx=0/year=2000/month=1/ea632843bd34467496837fea693443ff.parquet\n", - "/app/data/idx=0/year=2000/month=1/ecf1306aadb04ecdabb50803116eb0fa.parquet\n", - "/app/data/idx=0/year=2000/month=1/ef2355b80a7346afbabd33743d7e69a2.parquet\n", - "/app/data/idx=0/year=2000/month=1/ef7d760f2a2245e08f8c038bdf554edd.parquet\n", - "/app/data/idx=0/year=2000/month=1/f4ca5d31138248eca2beb467548461ed.parquet\n", - "/app/data/idx=0/year=2000/month=1/fba715c8fda84ad88d370f71b2408c12.parquet\n", - "/app/data/idx=0/year=2000/month=1/fe435999dba9476baec1b3009d529d32.parquet\n", - "/app/data/idx=0/year=2000/month=1/fe53414bfef84cb39ca04b48c8e8332c.parquet\n", - "/app/data/idx=0/year=2000/month=1/ff75b3e1006f42c9ba9deb689324ee3e.parquet\n", - "/app/data/idx=1/year=2000/month=1/056b4d30021044298d7fde4cdd296561.parquet\n", - "/app/data/idx=1/year=2000/month=1/0c138f0939f347928f5c2d1c92207d57.parquet\n", - "/app/data/idx=1/year=2000/month=1/0cb27647424c4302b7a1cd47369b4e6d.parquet\n", - "/app/data/idx=1/year=2000/month=1/1064ed9fc62a450890a19bd906d7953a.parquet\n", - "/app/data/idx=1/year=2000/month=1/14f3b6e2235c4a2eabf23840c82059ec.parquet\n", - "/app/data/idx=1/year=2000/month=1/1541e4cf70a048b88c7f8296456b8437.parquet\n", - "/app/data/idx=1/year=2000/month=1/19e5b00a91f64342be20a2faee8ef69c.parquet\n", - "/app/data/idx=1/year=2000/month=1/2512f9bc30c04375bd71f270e1901050.parquet\n", - "/app/data/idx=1/year=2000/month=1/2641066820c74d5fadd5d1a42b40d23f.parquet\n", - "/app/data/idx=1/year=2000/month=1/2b1c634e1ded48a2887abbb539f1ea41.parquet\n", - "/app/data/idx=1/year=2000/month=1/2bc577092b964473943428b8c04f6414.parquet\n", - "/app/data/idx=1/year=2000/month=1/2bc84c76804345c581c00b8e0ad59752.parquet\n", - "/app/data/idx=1/year=2000/month=1/2bd2238465b1416a8870494b579fae42.parquet\n", - "/app/data/idx=1/year=2000/month=1/2d5c13231ffc48aeb76bdb071663ceff.parquet\n", - "/app/data/idx=1/year=2000/month=1/2e48508ad08c4154813996117b6a833a.parquet\n", - "/app/data/idx=1/year=2000/month=1/3ca7d082ede544aab9f1f564acbffc14.parquet\n", - "/app/data/idx=1/year=2000/month=1/3d1f61cf39764307bbf39762d9c38af7.parquet\n", - "/app/data/idx=1/year=2000/month=1/40a2f2b0bd8c49be95aafc319ffd4a69.parquet\n", - "/app/data/idx=1/year=2000/month=1/4201c94937bc44f3809d9bf883b49cd7.parquet\n", - "/app/data/idx=1/year=2000/month=1/422474d1c6934fd298944ef7c9f21bfe.parquet\n", - "/app/data/idx=1/year=2000/month=1/444a6621429443c8b6550c6c04b27a24.parquet\n", - "/app/data/idx=1/year=2000/month=1/4940c21244274606bd6b543df4738ccf.parquet\n", - "/app/data/idx=1/year=2000/month=1/4b87781720884af7ae79d3f59fd69cd3.parquet\n", - "/app/data/idx=1/year=2000/month=1/4dd866c257864005a62854991f666b25.parquet\n", - "/app/data/idx=1/year=2000/month=1/4f06000c93bb45f18edfa84eeb89a1b9.parquet\n", - "/app/data/idx=1/year=2000/month=1/50716e5b2e004ba38d414a101ae09427.parquet\n", - "/app/data/idx=1/year=2000/month=1/50fc4338cf41483091d11a2616eb6221.parquet\n", - "/app/data/idx=1/year=2000/month=1/563109ba1ed647ef9518393a9d1ddb2e.parquet\n", - "/app/data/idx=1/year=2000/month=1/586e3969f1084af2bf28cee6f721cdc6.parquet\n", - "/app/data/idx=1/year=2000/month=1/5a1ba9682db3414ea33666e64d055535.parquet\n", - "/app/data/idx=1/year=2000/month=1/5e2241ecbf364a0784626be86e38d6eb.parquet\n", - "/app/data/idx=1/year=2000/month=1/6300ef1b3beb44f0937dc8f890e845ce.parquet\n", - "/app/data/idx=1/year=2000/month=1/64aeabc396ba42ada56c695a32ed12a7.parquet\n", - "/app/data/idx=1/year=2000/month=1/664ca39e99134dabbe6d4c7402f626aa.parquet\n", - "/app/data/idx=1/year=2000/month=1/68efc5543f394005bb82c0dc63a3b01f.parquet\n", - "/app/data/idx=1/year=2000/month=1/6c51260b47964705a3dcfa1cf25ca106.parquet\n", - "/app/data/idx=1/year=2000/month=1/6f9ad552153244679f73a058dfc5b42e.parquet\n", - "/app/data/idx=1/year=2000/month=1/718ffd8c75a14cde953e8e3275341d31.parquet\n", - "/app/data/idx=1/year=2000/month=1/728984a554734a25a69f0eb1f32f842f.parquet\n", - "/app/data/idx=1/year=2000/month=1/75296fd97a724c74bc09e9d64b528f50.parquet\n", - "/app/data/idx=1/year=2000/month=1/76ca85d0dfd849829f105ee6fddb6439.parquet\n", - "/app/data/idx=1/year=2000/month=1/77ac6bd92e7f4a46bbc7634de174bbf3.parquet\n", - "/app/data/idx=1/year=2000/month=1/79a48d3eb0c144ccb13fa4baf944c92b.parquet\n", - "/app/data/idx=1/year=2000/month=1/7a1ae42ab80b4cbf9c00a5b7f213a12c.parquet\n", - "/app/data/idx=1/year=2000/month=1/7af9fe9698494063a751f9a8f5a317dc.parquet\n", - "/app/data/idx=1/year=2000/month=1/7cd226f5679b4cae9af7b881fa1787b7.parquet\n", - "/app/data/idx=1/year=2000/month=1/7fed9a3f251c44209ce0933cfe60ec98.parquet\n", - "/app/data/idx=1/year=2000/month=1/842f90063cbb44b4ae1e7d6b9b4aa59e.parquet\n", - "/app/data/idx=1/year=2000/month=1/84dceabacd264c82981347142463feb9.parquet\n", - "/app/data/idx=1/year=2000/month=1/85d7b8fa841e42b097e34dcd8f13beca.parquet\n", - "/app/data/idx=1/year=2000/month=1/878a1b363a0a48c3b0af294e9f885d72.parquet\n", - "/app/data/idx=1/year=2000/month=1/887e26b6f1004e4fb2a5e373b4d9c5f3.parquet\n", - "/app/data/idx=1/year=2000/month=1/88bc144aa2ed4334b077b19f702a9a99.parquet\n", - "/app/data/idx=1/year=2000/month=1/88fe979886ee453789ca1b1083300618.parquet\n", - "/app/data/idx=1/year=2000/month=1/8b75d58338e64ae1bc694bb0d7044597.parquet\n", - "/app/data/idx=1/year=2000/month=1/8cf24285a4a5450ca5c56c731f5c87a0.parquet\n", - "/app/data/idx=1/year=2000/month=1/8d873dde8103478ba44283b5c90e5060.parquet\n", - "/app/data/idx=1/year=2000/month=1/8e25293517d8490b9f12892f63f35b3a.parquet\n", - "/app/data/idx=1/year=2000/month=1/92bbf16c4b7f4888ae4f93efcec6d40a.parquet\n", - "/app/data/idx=1/year=2000/month=1/9443d531d13f41b491771f22caa9d5a4.parquet\n", - "/app/data/idx=1/year=2000/month=1/94b871d36d384a24a6f42f34d56f822c.parquet\n", - "/app/data/idx=1/year=2000/month=1/9543cef54d3340ba9c8a2dca154947b8.parquet\n", - "/app/data/idx=1/year=2000/month=1/985415e78a0c4abcb42a96c44bdef44b.parquet\n", - "/app/data/idx=1/year=2000/month=1/9b501f9c98c3455ab37f13dc32d4836e.parquet\n", - "/app/data/idx=1/year=2000/month=1/9f0ac6f2e23242b1afb424389a8a1f08.parquet\n", - "/app/data/idx=1/year=2000/month=1/a20bfc0770454e1185f3d1b91efed93c.parquet\n", - "/app/data/idx=1/year=2000/month=1/a31f4026dbab4ef9807081ad9be5e5cc.parquet\n", - "/app/data/idx=1/year=2000/month=1/a472f43a45da4357b63cb0b5535e3237.parquet\n", - "/app/data/idx=1/year=2000/month=1/a74453d72e364b0f819ecf238d9b53fd.parquet\n", - "/app/data/idx=1/year=2000/month=1/a94d3fce611243d29a21b612f01e5a18.parquet\n", - "/app/data/idx=1/year=2000/month=1/a990f67b865f4e599ffa926341915ae2.parquet\n", - "/app/data/idx=1/year=2000/month=1/aa28c2d20ed140b18ddead5b11b96a0b.parquet\n", - "/app/data/idx=1/year=2000/month=1/aa724649481e4f7aa95b78cfe333c72d.parquet\n", - "/app/data/idx=1/year=2000/month=1/ac4487b08071423481580622be8d9914.parquet\n", - "/app/data/idx=1/year=2000/month=1/ad2a3795a1ad46f0b7b509a6ebdc85f4.parquet\n", - "/app/data/idx=1/year=2000/month=1/afa56f8175ed41a8b34bac4ac6786cf3.parquet\n", - "/app/data/idx=1/year=2000/month=1/b6c7cee2c50642bbaacf29e16dbbece5.parquet\n", - "/app/data/idx=1/year=2000/month=1/b9c0158311a04c3fa9c594d6db280053.parquet\n", - "/app/data/idx=1/year=2000/month=1/bbbd7a1b72b645ed8afdada3a0fd9fac.parquet\n", - "/app/data/idx=1/year=2000/month=1/bbce481ce9fc404684db9578007edd4b.parquet\n", - "/app/data/idx=1/year=2000/month=1/bbf2ea53874d4bb49b7ebf959c24b060.parquet\n", - "/app/data/idx=1/year=2000/month=1/bd054b89ad8a46f29968468a4fd6d34d.parquet\n", - "/app/data/idx=1/year=2000/month=1/c1a395d1127240c1b9d7ebcb0d63842f.parquet\n", - "/app/data/idx=1/year=2000/month=1/c27376832ccd439685bdc3b11cdcec0f.parquet\n", - "/app/data/idx=1/year=2000/month=1/c5c55b01bbe1494e9297385e99e9f0d3.parquet\n", - "/app/data/idx=1/year=2000/month=1/c872faa9a863454cadc603827abd3f6c.parquet\n", - "/app/data/idx=1/year=2000/month=1/c9528d72e8574a279c0995c3de171de3.parquet\n", - "/app/data/idx=1/year=2000/month=1/cb7475b11c924a689515ade22ec7b134.parquet\n", - "/app/data/idx=1/year=2000/month=1/cb9a2e526b7845daaaf8f3ced61d8597.parquet\n", - "/app/data/idx=1/year=2000/month=1/cd356e54f63c483ea4792e842667c1ac.parquet\n", - "/app/data/idx=1/year=2000/month=1/cdd3925db9ae44a0ba2760031b229219.parquet\n", - "/app/data/idx=1/year=2000/month=1/d118c630c6194befaae2217985c9073c.parquet\n", - "/app/data/idx=1/year=2000/month=1/d68ddf28bd144430a5dc2c4437f37472.parquet\n", - "/app/data/idx=1/year=2000/month=1/d7adfebd0e9249f989f41e10ca61bf59.parquet\n", - "/app/data/idx=1/year=2000/month=1/d9b7947e9c6b400080d2226093fcc571.parquet\n", - "/app/data/idx=1/year=2000/month=1/d9f610ef03c748619ee5ef2ddcde2634.parquet\n", - "/app/data/idx=1/year=2000/month=1/dcbf892a4231404c90139ee3adfc6815.parquet\n", - "/app/data/idx=1/year=2000/month=1/e083fc488a7446bbbdad82c37f8fca29.parquet\n", - "/app/data/idx=1/year=2000/month=1/e5f84abccb0d407898e892f78dcb9ce1.parquet\n", - "/app/data/idx=1/year=2000/month=1/e74ca84dac2e4d53977a54d9daeb7adc.parquet\n", - "/app/data/idx=1/year=2000/month=1/e85272be7a1c411a886bc856c6012396.parquet\n", - "/app/data/idx=1/year=2000/month=1/ec83d2e5ff534be1b28b4cf511b67e0d.parquet\n", - "/app/data/idx=1/year=2000/month=1/ef6709a1008c43cc994cf01278474c94.parquet\n", - "/app/data/idx=1/year=2000/month=1/f7249440aa6f403f934e5018d34a583c.parquet\n", - "/app/data/idx=1/year=2000/month=1/fc96559adfd2419a9a1cf883b4d521fb.parquet\n", - "/app/data/idx=2/year=2000/month=1/0210672cfa44441bbcf4c07a2bd3c467.parquet\n", - "/app/data/idx=2/year=2000/month=1/0259160641d446518dffe477c5265240.parquet\n", - "/app/data/idx=2/year=2000/month=1/04cce58d49ba4c3982dd0823f43f29a9.parquet\n", - "/app/data/idx=2/year=2000/month=1/058031e2ce2d4bd99cbe7297756dd547.parquet\n", - "/app/data/idx=2/year=2000/month=1/06918413b55f43a19fb7f4e13712c396.parquet\n", - "/app/data/idx=2/year=2000/month=1/07a8bd5cb80140a48f709d86fe3e00aa.parquet\n", - "/app/data/idx=2/year=2000/month=1/0bca80d1ee444038871e5fbb1ccc4d21.parquet\n", - "/app/data/idx=2/year=2000/month=1/0bd86024c6234346b739be5af1a49ed2.parquet\n", - "/app/data/idx=2/year=2000/month=1/0c2d3de1afda4b8f82f43cf658a09fb8.parquet\n", - "/app/data/idx=2/year=2000/month=1/0d4d954eab7043a0a8d7bd751897deb5.parquet\n", - "/app/data/idx=2/year=2000/month=1/0d976fec817b4dd88d3082fe39e6f2b6.parquet\n", - "/app/data/idx=2/year=2000/month=1/12255adedd3948d4b8ced88001a61e04.parquet\n", - "/app/data/idx=2/year=2000/month=1/1406843e1322465e8384ba8685a9eb9d.parquet\n", - "/app/data/idx=2/year=2000/month=1/15ab3cbd13ce4fc7ab69d5c2b1672ca2.parquet\n", - "/app/data/idx=2/year=2000/month=1/18b68b427e2947bbaee4122bc2b0fbf8.parquet\n", - "/app/data/idx=2/year=2000/month=1/1a883ab5889441578fbf5f0a2c822c07.parquet\n", - "/app/data/idx=2/year=2000/month=1/1cc7612ae5e34455a716fc38b84427bb.parquet\n", - "/app/data/idx=2/year=2000/month=1/1e188269ac30443fa796a8bdbea70e46.parquet\n", - "/app/data/idx=2/year=2000/month=1/1e803b9281ef4d4289f8a207de2fd2a2.parquet\n", - "/app/data/idx=2/year=2000/month=1/2099fd988d544989b1117a45cd92e2c5.parquet\n", - "/app/data/idx=2/year=2000/month=1/245b2e48c3d442f990dfd9f3f18f5544.parquet\n", - "/app/data/idx=2/year=2000/month=1/2cc8cd4af02e48728683551df1d9b517.parquet\n", - "/app/data/idx=2/year=2000/month=1/2e389e9f7c4f43ea8ff96d1fa13f0347.parquet\n", - "/app/data/idx=2/year=2000/month=1/2edcd33c70704b64b80987aba03d724e.parquet\n", - "/app/data/idx=2/year=2000/month=1/3170732421924aeaa451ca82a4b77131.parquet\n", - "/app/data/idx=2/year=2000/month=1/3227923c1dab4e7fbe07511111e76f67.parquet\n", - "/app/data/idx=2/year=2000/month=1/3607d6e90ab64fff84b4f2c9477540ce.parquet\n", - "/app/data/idx=2/year=2000/month=1/3b733f19c98f44ebb6ab31e93d18f09b.parquet\n", - "/app/data/idx=2/year=2000/month=1/3d79e3dd93d44a208aadd899a9632005.parquet\n", - "/app/data/idx=2/year=2000/month=1/3e2d5106997b4d2a8a4aaaada70b5c34.parquet\n", - "/app/data/idx=2/year=2000/month=1/3ea888ba5f0c4c46aaa55795799c8614.parquet\n", - "/app/data/idx=2/year=2000/month=1/4065fbfffe364f5b8f661dd0caff5c00.parquet\n", - "/app/data/idx=2/year=2000/month=1/4438f729a59e4bee856e9766a7866777.parquet\n", - "/app/data/idx=2/year=2000/month=1/489418f859104268b59905195289b433.parquet\n", - "/app/data/idx=2/year=2000/month=1/4a07e13d3bab4ee4bed09868f4d0ae6a.parquet\n", - "/app/data/idx=2/year=2000/month=1/4ade79216a6f42ffbfa7ee5c2949d904.parquet\n", - "/app/data/idx=2/year=2000/month=1/508e221eeacc4624977761af65fdf95f.parquet\n", - "/app/data/idx=2/year=2000/month=1/520f6ff1dee6468099730664d5bea3de.parquet\n", - "/app/data/idx=2/year=2000/month=1/537a5c5b6d2949eca8c35db48dcc123f.parquet\n", - "/app/data/idx=2/year=2000/month=1/552fbc5a37494e7bb792e3c225cd4021.parquet\n", - "/app/data/idx=2/year=2000/month=1/589b0598f3eb4f178125912219919413.parquet\n", - "/app/data/idx=2/year=2000/month=1/62c0c2448a5d49889e2d2b8421264798.parquet\n", - "/app/data/idx=2/year=2000/month=1/6312935db784424a957645de2de4a4c2.parquet\n", - "/app/data/idx=2/year=2000/month=1/64f32e163bed483b860f21c6666b0a7d.parquet\n", - "/app/data/idx=2/year=2000/month=1/66414c74b1ab4c3cb155b440359b1705.parquet\n", - "/app/data/idx=2/year=2000/month=1/6f0e1508809f47efba9fe398311b711c.parquet\n", - "/app/data/idx=2/year=2000/month=1/724d5c288c834e34846ad8871a94ee10.parquet\n", - "/app/data/idx=2/year=2000/month=1/740e15b45d2745a997e81672fc58481e.parquet\n", - "/app/data/idx=2/year=2000/month=1/75d5db2fb8404493bd6f6ebbeee50e91.parquet\n", - "/app/data/idx=2/year=2000/month=1/765f6b9e1260430680f79e9c4b8de8a1.parquet\n", - "/app/data/idx=2/year=2000/month=1/7893a366f6fd4770ac34af71a74af552.parquet\n", - "/app/data/idx=2/year=2000/month=1/7d6b206a0cdc4c7baefb675350602e10.parquet\n", - "/app/data/idx=2/year=2000/month=1/7d7c7ec0eaf04cf386ce6d93c5107246.parquet\n", - "/app/data/idx=2/year=2000/month=1/7e5eb92603774185bce487436db2af8f.parquet\n", - "/app/data/idx=2/year=2000/month=1/7f393857790e43da9549ed4c69797d18.parquet\n", - "/app/data/idx=2/year=2000/month=1/7f72ff606a804972a50960d0efcebcae.parquet\n", - "/app/data/idx=2/year=2000/month=1/8415983fe0a549c89ea28b25db102138.parquet\n", - "/app/data/idx=2/year=2000/month=1/86cf478f40914946b5b86106be97f7d8.parquet\n", - "/app/data/idx=2/year=2000/month=1/86f1de6e862141be8bd612465486fd16.parquet\n", - "/app/data/idx=2/year=2000/month=1/895fb45b8f554034a79ebd9c8eff9cad.parquet\n", - "/app/data/idx=2/year=2000/month=1/896bad5a081440b582d71fbb5baa4998.parquet\n", - "/app/data/idx=2/year=2000/month=1/8c2163530eef4b7b9e22fc1d4d99d6d5.parquet\n", - "/app/data/idx=2/year=2000/month=1/8c3b5f112ddf48e1a165bcad69f7e548.parquet\n", - "/app/data/idx=2/year=2000/month=1/8f54037c274c424fa2e13e83afe6a983.parquet\n", - "/app/data/idx=2/year=2000/month=1/9267bc6aecba4d66952bc7778a97bbb0.parquet\n", - "/app/data/idx=2/year=2000/month=1/978623e40a264ecbb8e3e7afee4a9221.parquet\n", - "/app/data/idx=2/year=2000/month=1/9b501c10edd94539b8147571202e7dfe.parquet\n", - "/app/data/idx=2/year=2000/month=1/a2cbd94909a7409cb233cc388fcd53be.parquet\n", - "/app/data/idx=2/year=2000/month=1/a570b6d3b72d4c8090c4efcb2eeb2d70.parquet\n", - "/app/data/idx=2/year=2000/month=1/a88b8e956c104202a8f2d279c7e58741.parquet\n", - "/app/data/idx=2/year=2000/month=1/a97573410ce04706ac3d5c88f9cd285e.parquet\n", - "/app/data/idx=2/year=2000/month=1/a9c31f330c2d454a8911627eaafe7e31.parquet\n", - "/app/data/idx=2/year=2000/month=1/aa941bd2b9574ce294967019aa4cd515.parquet\n", - "/app/data/idx=2/year=2000/month=1/af86ac06c6f7484c8bbb8215a408ce73.parquet\n", - "/app/data/idx=2/year=2000/month=1/b35d48ff673541559bf27f4c3e1feab6.parquet\n", - "/app/data/idx=2/year=2000/month=1/b5b85036b2c540f9add4b86012873462.parquet\n", - "/app/data/idx=2/year=2000/month=1/b8b4abc89c824a17a263d898f4bca476.parquet\n", - "/app/data/idx=2/year=2000/month=1/bb6a1df466d84085bc0900641233cbc3.parquet\n", - "/app/data/idx=2/year=2000/month=1/bb95334225ce41768c1175ccabad174b.parquet\n", - "/app/data/idx=2/year=2000/month=1/bca9c21e480249eebb26aeed167b1293.parquet\n", - "/app/data/idx=2/year=2000/month=1/bf49382a8e024ffe9c17e4849ce4127f.parquet\n", - "/app/data/idx=2/year=2000/month=1/c06c38062a2b4e13b4e1ee1eaf03bfa2.parquet\n", - "/app/data/idx=2/year=2000/month=1/c1f40b6256444001af06dc2fb98f5e5c.parquet\n", - "/app/data/idx=2/year=2000/month=1/c4968d0cbcd54c83a0dd3e57039f0578.parquet\n", - "/app/data/idx=2/year=2000/month=1/c6afa57132184a71becf083d1b553473.parquet\n", - "/app/data/idx=2/year=2000/month=1/c87a24c747984bf58745b666dac98323.parquet\n", - "/app/data/idx=2/year=2000/month=1/cc34429087f54f7aaf1e84bc12517c26.parquet\n", - "/app/data/idx=2/year=2000/month=1/cc839cdd3fbe465abc78861a4cc11acf.parquet\n", - "/app/data/idx=2/year=2000/month=1/db6c45d7e8234bc1949ddd8973010d7f.parquet\n", - "/app/data/idx=2/year=2000/month=1/dbb0a2e2bdbc4319a07d04af0d9356fc.parquet\n", - "/app/data/idx=2/year=2000/month=1/dbde0aee2a4647939d6f027a99e37cc4.parquet\n", - "/app/data/idx=2/year=2000/month=1/ddd0738116b5496391991ad6d3e781b9.parquet\n", - "/app/data/idx=2/year=2000/month=1/e52fd781bd78475789d4160624a6e34a.parquet\n", - "/app/data/idx=2/year=2000/month=1/e9c5c04f931f4fd4b6afb51db34cda54.parquet\n", - "/app/data/idx=2/year=2000/month=1/eee841a6139a4fe19620045f04c2f908.parquet\n", - "/app/data/idx=2/year=2000/month=1/ef42e36ceb794730ac25dad68f73294d.parquet\n", - "/app/data/idx=2/year=2000/month=1/efe26f73b0494f828fcf2686b6874c71.parquet\n", - "/app/data/idx=2/year=2000/month=1/f15094f2f10748e59573fecb5435ecc4.parquet\n", - "/app/data/idx=2/year=2000/month=1/f1e37026291c41c5ae698956baa6bf39.parquet\n", - "/app/data/idx=2/year=2000/month=1/f1f56b07a73646e4a5219a2623b04489.parquet\n", - "/app/data/idx=2/year=2000/month=1/f25704c4b00a418c9fa2385f9018adc7.parquet\n", - "/app/data/idx=2/year=2000/month=1/f60540924a1641de9d64f66c1af980dd.parquet\n", - "/app/data/idx=2/year=2000/month=1/f62eada23e1d430dacb69eeff0d5ba59.parquet\n", - "/app/data/idx=2/year=2000/month=1/f9b43fe646ec4607baa500b1360a6e1c.parquet\n", - "/app/data/idx=2/year=2000/month=1/fc3a31bc82ba4f17a93a18138887d9d5.parquet\n", - "/app/data/idx=3/year=2000/month=1/00b291e6d0d2494a8652e6ffcf1746c5.parquet\n", - "/app/data/idx=3/year=2000/month=1/01b6882837054cc4801c6929a630abd7.parquet\n", - "/app/data/idx=3/year=2000/month=1/09ebeae420f348c28a365f607978aeda.parquet\n", - "/app/data/idx=3/year=2000/month=1/0c41010bec604c93b974e72fa35cc2c7.parquet\n", - "/app/data/idx=3/year=2000/month=1/0cb995ed168f4829a38db4f75d4ed14b.parquet\n", - "/app/data/idx=3/year=2000/month=1/0cf1a660ee984efcaabe1d1bb9263a9a.parquet\n", - "/app/data/idx=3/year=2000/month=1/0d0bbc2ee628424f8204240680f44389.parquet\n", - "/app/data/idx=3/year=2000/month=1/0f72553d38cb47f095fdf35e03507dd3.parquet\n", - "/app/data/idx=3/year=2000/month=1/0ff3e55ae9464e369302d1fb2abaec40.parquet\n", - "/app/data/idx=3/year=2000/month=1/1165cf18728c41edb7bb8a765ae7854d.parquet\n", - "/app/data/idx=3/year=2000/month=1/12a3b4dadd4f43389c269f4b736278c2.parquet\n", - "/app/data/idx=3/year=2000/month=1/1a204362f488461da026ee347c817e2e.parquet\n", - "/app/data/idx=3/year=2000/month=1/1c306421662241b48b85f24d033898fc.parquet\n", - "/app/data/idx=3/year=2000/month=1/22155eaaf5ce4e36bbb36b162dadae9e.parquet\n", - "/app/data/idx=3/year=2000/month=1/229cb1d3321f4660866b414f3a647fff.parquet\n", - "/app/data/idx=3/year=2000/month=1/280b6ca59e1f4312b872fd23d96ed6df.parquet\n", - "/app/data/idx=3/year=2000/month=1/2859c7dccfe54951a955941fa23a33b1.parquet\n", - "/app/data/idx=3/year=2000/month=1/2a17999c98294f38ac3e60af45779214.parquet\n", - "/app/data/idx=3/year=2000/month=1/2e3b411a5a3a48aba5e52053e54dbe9f.parquet\n", - "/app/data/idx=3/year=2000/month=1/2eb295d22ddd4ca9801d7b0a6a950261.parquet\n", - "/app/data/idx=3/year=2000/month=1/313a5fc7ea2c49009cd68f31ce030eb3.parquet\n", - "/app/data/idx=3/year=2000/month=1/319b8c873aba46d9a39aaed1d7ade697.parquet\n", - "/app/data/idx=3/year=2000/month=1/36c17affd08e450ba034d29818f6c94f.parquet\n", - "/app/data/idx=3/year=2000/month=1/37170fb9855d47f0871cbf1b3c4a5763.parquet\n", - "/app/data/idx=3/year=2000/month=1/3772fba9cef64744a8aa5ad999a1d48d.parquet\n", - "/app/data/idx=3/year=2000/month=1/3d68d10aee3b46e9ab4c2341f395e9f8.parquet\n", - "/app/data/idx=3/year=2000/month=1/3da7295cc0ee4953aad41cddb746c0ec.parquet\n", - "/app/data/idx=3/year=2000/month=1/401a2d5e38ee4581ac5950131e7739ed.parquet\n", - "/app/data/idx=3/year=2000/month=1/40bb809ba5824fa48218e2543e1317d8.parquet\n", - "/app/data/idx=3/year=2000/month=1/42c11bbbec28471d818c4eda7ffa0316.parquet\n", - "/app/data/idx=3/year=2000/month=1/430d92d720ef40aca2043cdd9a4216a7.parquet\n", - "/app/data/idx=3/year=2000/month=1/4344d9475f474d4289c16c14e3d76205.parquet\n", - "/app/data/idx=3/year=2000/month=1/4965043c1c58485fb9a81ca502c9704c.parquet\n", - "/app/data/idx=3/year=2000/month=1/4c954d56c1f040f8adcb92a116fc3e4a.parquet\n", - "/app/data/idx=3/year=2000/month=1/4cb7c012e50c4e45988d6c73f931babf.parquet\n", - "/app/data/idx=3/year=2000/month=1/4d11aa2de91047638fd1fbb49180b828.parquet\n", - "/app/data/idx=3/year=2000/month=1/59de1ba8fd7b41d7819849137f7b9817.parquet\n", - "/app/data/idx=3/year=2000/month=1/5a31ef5acc2340b7a575b1d77e9e9917.parquet\n", - "/app/data/idx=3/year=2000/month=1/5b14185275384ee5ae5839b6d69c714e.parquet\n", - "/app/data/idx=3/year=2000/month=1/5b35b2943a7c476aa5dc3a2af08f13fe.parquet\n", - "/app/data/idx=3/year=2000/month=1/5e6bb9eceb2d4a4ebddd39e06db86d67.parquet\n", - "/app/data/idx=3/year=2000/month=1/5f8372dbc36a4681bdebfaa9f3328eec.parquet\n", - "/app/data/idx=3/year=2000/month=1/6317cb7958d2459595a28bdca41f42d5.parquet\n", - "/app/data/idx=3/year=2000/month=1/67ba93ec02b44b0593c0ff37aa3db5b7.parquet\n", - "/app/data/idx=3/year=2000/month=1/69be17b95a9046c2a4553f5c077f5fff.parquet\n", - "/app/data/idx=3/year=2000/month=1/6ac05cada45b48b89ec15b0f76df21ac.parquet\n", - "/app/data/idx=3/year=2000/month=1/6ce38fe0d6a54853a757745eb148960a.parquet\n", - "/app/data/idx=3/year=2000/month=1/7000686e11b34200ae44dfe294dc8c8e.parquet\n", - "/app/data/idx=3/year=2000/month=1/70f44eb7513c4100aa2cd5779e3c5d67.parquet\n", - "/app/data/idx=3/year=2000/month=1/7421bdc2222640b38ada8d94e10e5865.parquet\n", - "/app/data/idx=3/year=2000/month=1/78f4a6251bb7423e800ada3444bb54c1.parquet\n", - "/app/data/idx=3/year=2000/month=1/874eb82772844f269bc5360ef1971245.parquet\n", - "/app/data/idx=3/year=2000/month=1/87baf01b30ce467ca976e26ad5bec1e2.parquet\n", - "/app/data/idx=3/year=2000/month=1/8a31ab99c92a4a8b829f37561cc99956.parquet\n", - "/app/data/idx=3/year=2000/month=1/8aa9003415c649288a13560a1352805b.parquet\n", - "/app/data/idx=3/year=2000/month=1/8ae3a6e6214f4816b469f09b01c2e955.parquet\n", - "/app/data/idx=3/year=2000/month=1/8ff02b303fca4f86a129197874e8e6fe.parquet\n", - "/app/data/idx=3/year=2000/month=1/94c27fe8b6084f7b8606cef710bab753.parquet\n", - "/app/data/idx=3/year=2000/month=1/94c4de33006f424e8cb424accfad8a2c.parquet\n", - "/app/data/idx=3/year=2000/month=1/9c9b600151fb47e5a073e51a735e1537.parquet\n", - "/app/data/idx=3/year=2000/month=1/9e59161660e140209e94cab5f7ea5098.parquet\n", - "/app/data/idx=3/year=2000/month=1/9fadcdc1ab7a4b9783128af7b744d705.parquet\n", - "/app/data/idx=3/year=2000/month=1/9fd3848ab9c54869b34c3a5d8e79be9a.parquet\n", - "/app/data/idx=3/year=2000/month=1/a2c45c983d5b469997c55c4e2ad72427.parquet\n", - "/app/data/idx=3/year=2000/month=1/a3f1f0a5cca84c4eaa7f2a1bef1f88b0.parquet\n", - "/app/data/idx=3/year=2000/month=1/a43049d78c9341668d77a63fc3b4d57f.parquet\n", - "/app/data/idx=3/year=2000/month=1/aa89184d32ca40c28f44109c97cee774.parquet\n", - "/app/data/idx=3/year=2000/month=1/ab3cf71e9caa44ec90adc43a56867162.parquet\n", - "/app/data/idx=3/year=2000/month=1/acab0d093d9a4bca854719e790512a25.parquet\n", - "/app/data/idx=3/year=2000/month=1/acf77747edbf4df5b457cfc8a77e0dc0.parquet\n", - "/app/data/idx=3/year=2000/month=1/b5672b45b393472986217241b378742f.parquet\n", - "/app/data/idx=3/year=2000/month=1/b7fd4df9bc9440ff94d713a7e43959d2.parquet\n", - "/app/data/idx=3/year=2000/month=1/b81af51b094e457faa6c786d1fffc470.parquet\n", - "/app/data/idx=3/year=2000/month=1/bbedc33b622c46b7af6af9c62e139163.parquet\n", - "/app/data/idx=3/year=2000/month=1/befaac43d5fa49f0a118ffaac6b5c4d3.parquet\n", - "/app/data/idx=3/year=2000/month=1/c0a4a83a65d94f2281b2039cac0e2c9e.parquet\n", - "/app/data/idx=3/year=2000/month=1/c4f44bc2181f45a3866cc232d80f2e46.parquet\n", - "/app/data/idx=3/year=2000/month=1/c63bff60ba67488d8ce536aa47774b53.parquet\n", - "/app/data/idx=3/year=2000/month=1/c74c114cc7e34985aeb20e14c2b26f3c.parquet\n", - "/app/data/idx=3/year=2000/month=1/c7eb09b4b0cf44eab86d88f11d00c222.parquet\n", - "/app/data/idx=3/year=2000/month=1/ce3160350479478da1a327405dc4cbe8.parquet\n", - "/app/data/idx=3/year=2000/month=1/cfb6a5a4bdbb4bb0a6afa699aa2e100a.parquet\n", - "/app/data/idx=3/year=2000/month=1/d137ffa9eeeb418491e792c7871334c6.parquet\n", - "/app/data/idx=3/year=2000/month=1/d43ffbf42b694713ae6e4b1e408529f9.parquet\n", - "/app/data/idx=3/year=2000/month=1/d7f91f13f3444032995bc7c6c0bcd1cd.parquet\n", - "/app/data/idx=3/year=2000/month=1/d9aa67eaa7f144fc8613ce81bd072167.parquet\n", - "/app/data/idx=3/year=2000/month=1/ddeb24d5cdb043f380654ff98d83adc9.parquet\n", - "/app/data/idx=3/year=2000/month=1/e1e0b2ae05154f459914dad148a7779f.parquet\n", - "/app/data/idx=3/year=2000/month=1/e74ecdc304164cd8b953c808a1353bfd.parquet\n", - "/app/data/idx=3/year=2000/month=1/e7eb8d26146c423eaa1a77343d16920b.parquet\n", - "/app/data/idx=3/year=2000/month=1/e937a5e6dd0241c1a50b24a1c9b4ea7a.parquet\n", - "/app/data/idx=3/year=2000/month=1/e94bfddc06704799a2699d3a90d9843b.parquet\n", - "/app/data/idx=3/year=2000/month=1/f08328e844ab486ca07eda98bf1ca9ba.parquet\n", - "/app/data/idx=3/year=2000/month=1/f154e97e55b0428185553c4acb9ce227.parquet\n", - "/app/data/idx=3/year=2000/month=1/f38d8f30947f4bd08fb1c10bc81d8ee7.parquet\n", - "/app/data/idx=3/year=2000/month=1/f3fb7ade438a4929aba0109858f4abe4.parquet\n", - "/app/data/idx=3/year=2000/month=1/f458cf905d5845f1ac64183bba7a4826.parquet\n", - "/app/data/idx=3/year=2000/month=1/f6ce7accff3e4eb8b601078583655865.parquet\n", - "/app/data/idx=3/year=2000/month=1/f7d729c528904fd182207989fef04050.parquet\n", - "/app/data/idx=3/year=2000/month=1/f9d5734d70c542a3bf5ba9e004cb2e95.parquet\n", - "/app/data/idx=3/year=2000/month=1/fbeb2f31e5784074a90d737fb8c4e047.parquet\n", - "/app/data/idx=3/year=2000/month=1/fc1fb4ad31c448eeb8724a3069e760f0.parquet\n", - "/app/data/idx=3/year=2000/month=1/fde846fa6d8649c9b1770638786fb18c.parquet\n", - "/app/data/idx=3/year=2000/month=1/fe9c940d68fd4759a90408a1245022a6.parquet\n", - "/app/data/idx=3/year=2000/month=1/ffebea86d7fe4a64a973415ab3b6eccf.parquet\n", - "/app/data/idx=4/year=2000/month=1/01a585864dc644b6a4a7b13ae97c1f85.parquet\n", - "/app/data/idx=4/year=2000/month=1/0251c252cf544dc49285c7e4fcbf9784.parquet\n", - "/app/data/idx=4/year=2000/month=1/026b7ed2f32a4a4d9b1fe4bf2e2c45ce.parquet\n", - "/app/data/idx=4/year=2000/month=1/03343bb5f29d42f19ce58caddb755df7.parquet\n", - "/app/data/idx=4/year=2000/month=1/04f9e581b08c424595f85fa85f87cb2c.parquet\n", - "/app/data/idx=4/year=2000/month=1/05751ecfd2734eedb17546ca81f8344a.parquet\n", - "/app/data/idx=4/year=2000/month=1/05f08cd7531f42a792e243c617b344f1.parquet\n", - "/app/data/idx=4/year=2000/month=1/061bd006ae35412eb8e5b758c50102c4.parquet\n", - "/app/data/idx=4/year=2000/month=1/06ba2d68586e4088921c99eddd5a5d86.parquet\n", - "/app/data/idx=4/year=2000/month=1/06df2daa4186437791d71a6b8e23519d.parquet\n", - "/app/data/idx=4/year=2000/month=1/07369c0250b5496bbac305aa1909eaa1.parquet\n", - "/app/data/idx=4/year=2000/month=1/0beb1321d8304074994a90b3a7eb94c5.parquet\n", - "/app/data/idx=4/year=2000/month=1/0f0e0602ffe5408a82d5265b2dc5ec18.parquet\n", - "/app/data/idx=4/year=2000/month=1/0fc5d753f2184cb0868ae28fc84c227e.parquet\n", - "/app/data/idx=4/year=2000/month=1/135fcc4c1e5a4823ae050c1e89fa413c.parquet\n", - "/app/data/idx=4/year=2000/month=1/156b561654924ad1b111bd5c965a46c2.parquet\n", - "/app/data/idx=4/year=2000/month=1/168d6922b1824cedb14d5654d75ba284.parquet\n", - "/app/data/idx=4/year=2000/month=1/1827f11f108341ccb48a0bb6ab694a64.parquet\n", - "/app/data/idx=4/year=2000/month=1/18e1c91f8c724d30a77bdd47e665c571.parquet\n", - "/app/data/idx=4/year=2000/month=1/19016c157bce43e394b117e8e0ed2557.parquet\n", - "/app/data/idx=4/year=2000/month=1/1a2c4e9d435f4c5faf83efbbb559118b.parquet\n", - "/app/data/idx=4/year=2000/month=1/1d3c8ecb9804470c87bfd7c25a3dab28.parquet\n", - "/app/data/idx=4/year=2000/month=1/1ee5f78eb54548278ae0a857c616e84c.parquet\n", - "/app/data/idx=4/year=2000/month=1/1ff311b87ba74e998ff7a5267ba52832.parquet\n", - "/app/data/idx=4/year=2000/month=1/285e2e6ef8c34d45b73916b4bfe1a2bf.parquet\n", - "/app/data/idx=4/year=2000/month=1/288d2d389b1e4a7695454e12fc442592.parquet\n", - "/app/data/idx=4/year=2000/month=1/2bfb7829ce324e1bb182159d8a6e7966.parquet\n", - "/app/data/idx=4/year=2000/month=1/2cb8084772654371bc4aab66bab3d5fc.parquet\n", - "/app/data/idx=4/year=2000/month=1/2d0d1ce706fe41feadf69279c0290101.parquet\n", - "/app/data/idx=4/year=2000/month=1/2d4a13244f154d278d237535e957d174.parquet\n", - "/app/data/idx=4/year=2000/month=1/2d73ffb2b7314b48b25c924dad691fa1.parquet\n", - "/app/data/idx=4/year=2000/month=1/30c6048fdac04824831e0a984445c238.parquet\n", - "/app/data/idx=4/year=2000/month=1/3b3610138fd84568b3f6b20ccce2b296.parquet\n", - "/app/data/idx=4/year=2000/month=1/3d33b2adeb0c406aafda7296398833d2.parquet\n", - "/app/data/idx=4/year=2000/month=1/3fb3450af6ed4ddc996b10c7316018af.parquet\n", - "/app/data/idx=4/year=2000/month=1/4384e6f19b984984a0e583891fab8200.parquet\n", - "/app/data/idx=4/year=2000/month=1/4499b3a4074d42ad87a6a74f031bad48.parquet\n", - "/app/data/idx=4/year=2000/month=1/578cd70733f54818812b7fee342f7922.parquet\n", - "/app/data/idx=4/year=2000/month=1/5a42dc9b52a845b394f570bc7e233637.parquet\n", - "/app/data/idx=4/year=2000/month=1/5e3f996936cd466c8f182e4925b457b9.parquet\n", - "/app/data/idx=4/year=2000/month=1/6171f6c076d442ce9ee9b2223a1c9e29.parquet\n", - "/app/data/idx=4/year=2000/month=1/637fabc040bd4139901780de2f98df24.parquet\n", - "/app/data/idx=4/year=2000/month=1/649b57f24c1c49e7aa025d1a111f31a6.parquet\n", - "/app/data/idx=4/year=2000/month=1/65dac4a30aba4d3e9a18e731bef42800.parquet\n", - "/app/data/idx=4/year=2000/month=1/6759ad29fa9a416498d408a97082da2d.parquet\n", - "/app/data/idx=4/year=2000/month=1/682c068895b54404aa02c22ec59d98d7.parquet\n", - "/app/data/idx=4/year=2000/month=1/6a2c44eebd7c447ab0eac8b5596612ce.parquet\n", - "/app/data/idx=4/year=2000/month=1/6c36185edd4a41bc8869406a3bc9b533.parquet\n", - "/app/data/idx=4/year=2000/month=1/6ce5ab2e0fce43c9be58cd6ca0ab1b0c.parquet\n", - "/app/data/idx=4/year=2000/month=1/6e7dfa62c7ab4743bd5b47c2d65fcd3f.parquet\n", - "/app/data/idx=4/year=2000/month=1/7137092484b641e3a41226810acbe2b7.parquet\n", - "/app/data/idx=4/year=2000/month=1/727c5b50be444555bb0c8cb3493f136c.parquet\n", - "/app/data/idx=4/year=2000/month=1/761ed2925727400586b3f95bebe32b12.parquet\n", - "/app/data/idx=4/year=2000/month=1/779bc731cfdc4eb582b7d45275f45f7d.parquet\n", - "/app/data/idx=4/year=2000/month=1/79ee8145c2814549a38530b2c506544e.parquet\n", - "/app/data/idx=4/year=2000/month=1/7b59b765fa454ce0a9fbd88628d6f604.parquet\n", - "/app/data/idx=4/year=2000/month=1/8292f989424444f6aa18bbcfc68f1734.parquet\n", - "/app/data/idx=4/year=2000/month=1/83b3730a855b494487dd6728a517ee3b.parquet\n", - "/app/data/idx=4/year=2000/month=1/84ebbce76a7a4107b939b685da66b5f4.parquet\n", - "/app/data/idx=4/year=2000/month=1/855921d6f64644a38bd2be5d9669fe0a.parquet\n", - "/app/data/idx=4/year=2000/month=1/85ab57ea6d0e48efac390b6047a6f435.parquet\n", - "/app/data/idx=4/year=2000/month=1/8b974e69e33e41cdb5bde25a6a422fd6.parquet\n", - "/app/data/idx=4/year=2000/month=1/8dd5278b54e9413ebd42286dea00c4a3.parquet\n", - "/app/data/idx=4/year=2000/month=1/93128598152643a297db72dec38a07b5.parquet\n", - "/app/data/idx=4/year=2000/month=1/95448aeaacdc40fe97d207b2c80ca784.parquet\n", - "/app/data/idx=4/year=2000/month=1/9571568631184e1386c3528b8ce9ed26.parquet\n", - "/app/data/idx=4/year=2000/month=1/95f8e19b3af344db98dcc5c5f9546c3a.parquet\n", - "/app/data/idx=4/year=2000/month=1/9921911b40d041f6ac72c4d44578c5cf.parquet\n", - "/app/data/idx=4/year=2000/month=1/9b4d80c840c14d3b9c67da4c9877b628.parquet\n", - "/app/data/idx=4/year=2000/month=1/9e259ea36fbb4c0ba9b6535a3f34544e.parquet\n", - "/app/data/idx=4/year=2000/month=1/9e3706d0ded44106bf8e0dee8900cd28.parquet\n", - "/app/data/idx=4/year=2000/month=1/9e67c71850a54fe1aa354c43d2cd9c38.parquet\n", - "/app/data/idx=4/year=2000/month=1/9ef278bcdc3b41e89059c309bcbb005e.parquet\n", - "/app/data/idx=4/year=2000/month=1/a33e6304bb1b47daa86853f19b009366.parquet\n", - "/app/data/idx=4/year=2000/month=1/a9bc812dc596492eafcc73f01d0e53a3.parquet\n", - "/app/data/idx=4/year=2000/month=1/abf0e405806744df9ea3e9908eb0451f.parquet\n", - "/app/data/idx=4/year=2000/month=1/acd468d1addc4d75944766e48c3eb324.parquet\n", - "/app/data/idx=4/year=2000/month=1/adbdc6fec62c463aa94e0ce707ae1768.parquet\n", - "/app/data/idx=4/year=2000/month=1/b24807369dfc461e92eb8a56a7931070.parquet\n", - "/app/data/idx=4/year=2000/month=1/b2f3d43c99f44131969e0fcf27cfbf3c.parquet\n", - "/app/data/idx=4/year=2000/month=1/b461c7cfd0f4483f8309f670f4f4265d.parquet\n", - "/app/data/idx=4/year=2000/month=1/b584791f45f74432a067632281285b9a.parquet\n", - "/app/data/idx=4/year=2000/month=1/b7ca6973a34c4f92831f16216beb33f2.parquet\n", - "/app/data/idx=4/year=2000/month=1/b7f16808e8e4491e8f86d3ae9766f2b9.parquet\n", - "/app/data/idx=4/year=2000/month=1/b9a2d05a74a84d71a1b65a0f05895011.parquet\n", - "/app/data/idx=4/year=2000/month=1/bc3036cc653e4584893f8b36e33c8f85.parquet\n", - "/app/data/idx=4/year=2000/month=1/be1318c7564d48be8435c11344627932.parquet\n", - "/app/data/idx=4/year=2000/month=1/bf90009dc7b14cfaab939f435d975a0b.parquet\n", - "/app/data/idx=4/year=2000/month=1/c0105d7e54fc42dc93d5140782960815.parquet\n", - "/app/data/idx=4/year=2000/month=1/c450cbe2674e488d8e30953252bc7a4b.parquet\n", - "/app/data/idx=4/year=2000/month=1/c82d37b18d65434ca1fe1b9cf4d29ccb.parquet\n", - "/app/data/idx=4/year=2000/month=1/cb53085f9145493b9a171d31b682e75f.parquet\n", - "/app/data/idx=4/year=2000/month=1/cc14bf7a74c9498889bc52e29f83edff.parquet\n", - "/app/data/idx=4/year=2000/month=1/ce3c90dd7e7a4f5a862580c14aa22c28.parquet\n", - "/app/data/idx=4/year=2000/month=1/d47149e3e1e34123a48f623ca121e8a8.parquet\n", - "/app/data/idx=4/year=2000/month=1/d9e0e3e786a942f5892c6ce17b37eb4a.parquet\n", - "/app/data/idx=4/year=2000/month=1/dbb82450694e4e76ab34f3e650d36594.parquet\n", - "/app/data/idx=4/year=2000/month=1/dc67b56f0c814648b9ebf8e1c483b923.parquet\n", - "/app/data/idx=4/year=2000/month=1/de2e16496bcd405b8d48aec4da4d5ae4.parquet\n", - "/app/data/idx=4/year=2000/month=1/de37409ac14b49c38c9c0da26d6c721f.parquet\n", - "/app/data/idx=4/year=2000/month=1/e8072594944141a5b078b74e739307d3.parquet\n", - "/app/data/idx=4/year=2000/month=1/e9b36b985eb44b44a5436af438f7ceb0.parquet\n", - "/app/data/idx=4/year=2000/month=1/eefdd2cdde1d4085964d1469a11f462c.parquet\n", - "/app/data/idx=4/year=2000/month=1/f89525bf20e540f29b021ce5f4d9eb3c.parquet\n", - "/app/data/idx=4/year=2000/month=1/fab7e098a4c8489785225a74b71ec2ef.parquet\n" - ] - } - ], - "source": [ - "dir_name = \"/app/data\"\n", - "\n", - "# Read data back.\n", - "dataset = ds.dataset(dir_name, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "print(\"\\n\".join(dataset.files))" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "ba4d7dc4", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:17:50.865185Z", - "start_time": "2021-06-16T11:17:50.378460Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# df=\n", - " instr val1 val2 idx year month\n", - "2000-01-09 00:00:00-05:00 A 99 54 0 2000 1\n", - "2000-01-13 09:30:00-05:00 A 99 62 0 2000 1\n", - "2000-01-13 09:35:00-05:00 A 54 76 0 2000 1\n", - "# df.shape=\n", - "(18075, 6)\n", - "# df.dtypes=\n", - "instr object\n", - "val1 int64\n", - "val2 int64\n", - "idx int32\n", - "year int32\n", - "month int32\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Read everything.\n", - "df2 = dataset.to_table().to_pandas()\n", - "\n", - "print(df_to_str(df2))" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "68e84388", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:05:43.018220Z", - "start_time": "2021-06-16T11:05:43.007510Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['A' 'B' 'C' 'D' 'E']\n", - "DatetimeIndex(['2000-01-06 00:00:00-05:00', '2000-01-10 00:00:00-05:00',\n", - " '2000-01-01 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", - " '2000-01-08 00:00:00-05:00', '2000-01-12 00:00:00-05:00',\n", - " '2000-01-09 00:00:00-05:00', '2000-01-02 00:00:00-05:00',\n", - " '2000-01-14 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", - " '2000-01-07 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", - " '2000-01-15 00:00:00-05:00', '2000-01-05 00:00:00-05:00',\n", - " '2000-01-11 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", - " '2000-01-05 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", - " '2000-01-02 00:00:00-05:00', '2000-01-14 00:00:00-05:00',\n", - " '2000-01-12 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", - " '2000-01-13 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", - " '2000-01-07 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", - " '2000-01-08 00:00:00-05:00', '2000-01-10 00:00:00-05:00',\n", - " '2000-01-11 00:00:00-05:00', '2000-01-09 00:00:00-05:00',\n", - " '2000-01-02 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", - " '2000-01-05 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", - " '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", - " '2000-01-10 00:00:00-05:00', '2000-01-11 00:00:00-05:00',\n", - " '2000-01-14 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", - " '2000-01-07 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", - " '2000-01-08 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", - " '2000-01-09 00:00:00-05:00', '2000-01-08 00:00:00-05:00',\n", - " '2000-01-14 00:00:00-05:00', '2000-01-01 00:00:00-05:00',\n", - " '2000-01-03 00:00:00-05:00', '2000-01-02 00:00:00-05:00',\n", - " '2000-01-04 00:00:00-05:00', '2000-01-15 00:00:00-05:00',\n", - " '2000-01-12 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", - " '2000-01-10 00:00:00-05:00', '2000-01-07 00:00:00-05:00',\n", - " '2000-01-05 00:00:00-05:00', '2000-01-11 00:00:00-05:00',\n", - " '2000-01-09 00:00:00-05:00', '2000-01-06 00:00:00-05:00',\n", - " '2000-01-11 00:00:00-05:00', '2000-01-13 00:00:00-05:00',\n", - " '2000-01-14 00:00:00-05:00', '2000-01-04 00:00:00-05:00',\n", - " '2000-01-10 00:00:00-05:00', '2000-01-09 00:00:00-05:00',\n", - " '2000-01-12 00:00:00-05:00', '2000-01-07 00:00:00-05:00',\n", - " '2000-01-06 00:00:00-05:00', '2000-01-03 00:00:00-05:00',\n", - " '2000-01-01 00:00:00-05:00', '2000-01-08 00:00:00-05:00',\n", - " '2000-01-02 00:00:00-05:00', '2000-01-05 00:00:00-05:00',\n", - " '2000-01-15 00:00:00-05:00'],\n", - " dtype='datetime64[ns, America/New_York]', freq=None)\n" - ] - } - ], - "source": [ - "print(df2[\"instr\"].unique())\n", - "print(df2.index)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "205.6px" - }, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py deleted file mode 100644 index d7d5f9e56..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/parquet.tutorial.py +++ /dev/null @@ -1,304 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# Show Parquet / Pyarrow API. - -# %% [markdown] -# ## Imports - -# %% -import logging -import os -import random - -import pandas as pd -import pyarrow as pa -import pyarrow.dataset as ds -import pyarrow.parquet as pq -from pyarrow.dataset import DirectoryPartitioning - -import helpers.hdbg as hdbg -import helpers.hio as hio - -hdbg.init_logger(verbosity=logging.INFO) -_LOG = logging.getLogger(__name__) - - -# %% -def get_df() -> pd.DataFrame: - """ - Create pandas random data, like: - - ``` - idx instr val1 val2 - 2000-01-01 0 A 99 30 - 2000-01-02 0 A 54 46 - 2000-01-03 0 A 85 86 - ``` - """ - instruments = "A B C D E".split() - "id stock val1 val2".split() - df_idx = pd.date_range( - pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-15"), freq="1D" - ) - # print(df_idx) - random.seed(1000) - - df = [] - for idx, inst in enumerate(instruments): - df_tmp = pd.DataFrame( - { - "idx": idx, - "instr": inst, - "val1": [random.randint(0, 100) for k in range(len(df_idx))], - "val2": [random.randint(0, 100) for k in range(len(df_idx))], - }, - index=df_idx, - ) - # print(df_tmp) - df.append(df_tmp) - df = pd.concat(df) - return df - - -# %% -def df_to_str(df: pd.DataFrame) -> str: - txt = "" - txt += "# df=\n%s" % df.head(3) - txt += "\n# df.shape=\n%s" % str(df.shape) - txt += "\n# df.dtypes=\n%s" % str(df.dtypes) - return txt - - -# %% [markdown] -# # Save and load all data in one file - -# %% -df = get_df() -# print(df.head()) -print(df_to_str(df)) - -# %% -table = pa.Table.from_pandas(df) - -print("table=\n%s" % table) - -# %% -# Save. -file_name = "df_in_one_file.pq" -pq.write_table(table, file_name) - -# %% -# Load. -df2 = pq.read_table(file_name) -print(df2) - -df2 = df2.to_pandas() -print(df_to_str(df2)) - -# %% [markdown] -# ## Read a subset of columns - -# %% -df2 = pq.read_table(file_name, columns=["idx", "val1"]) -print(df2) - -df2 = df2.to_pandas() -print(df_to_str(df2)) - -# %% [markdown] -# ## Partitioned dataset -# -# from https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data -# -# - A dataset can exploit a nested structure, where the sub-dir names hold information about which subset of the data is stored in that dir -# - E.g., "Hive" patitioning scheme "key=vale" dir names - -# %% -df = get_df() -print(df_to_str(df)) - -# %% -base = "." -dir_name = os.path.join(base, "parquet_dataset_partitioned") -os.system("rm -rf %s" % dir_name) - -pq.write_to_dataset(table, dir_name, partition_cols=["idx"]) - -# %% -# !ls parquet_dataset_partitioned - -# %% -# Read data back. -dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") - -print("\n".join(dataset.files)) - -# %% -# Read everything. -df2 = dataset.to_table().to_pandas() - -print(df_to_str(df2)) - -# %% -# Load part of the data. - -df2 = dataset.to_table(filter=ds.field("idx") == 1).to_pandas() -print(df_to_str(df2)) - -df2 = dataset.to_table(filter=ds.field("idx") < 3).to_pandas() -print(df_to_str(df2)) - -# %% [markdown] -# ## Add year-month partitions - -# %% -df = get_df() -df["year"] = df.index.year -df["month"] = df.index.month - -print(df_to_str(df)) - -# %% -table = pa.Table.from_pandas(df) - -print("table=\n%s" % table) - -# %% -base = "." -dir_name = os.path.join(base, "pq_partitioned2") -os.system("rm -rf %s" % dir_name) - -pq.write_to_dataset(table, dir_name, partition_cols=["idx", "year", "month"]) - -# %% -# !ls $dir_name - -# %% -# !ls $dir_name/idx=0/year=2000/month=1 - -# %% -# Read data back. -dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") - -print("\n".join(dataset.files)) - -# %% -# Read data back. -dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") - -df2 = dataset.to_table(filter=ds.field("idx") == 2).to_pandas() -print(df_to_str(df2)) - -# %% -# We could scan manually and create the dirs manually if we don't want to add -# add a new dir. -base = "." -dir_name = os.path.join(base, "parquet_dataset_partitioned2") -os.system("rm -rf %s" % dir_name) - -schemas = [] - -schema = pa.Table.from_pandas(df).schema -print(schema) -# assert 0 -# idx: int64 -# instr: string -# val1: int64 -# val2: int64 -# year: int64 -# month: int64 - -# grouped = df.groupby(lambda x: x.day) -group_by_idx = df.groupby("idx") -for idx, df_tmp in group_by_idx: - _LOG.debug("idx=%s -> df.shape=%s", idx, str(df_tmp.shape)) - # - group_by_year = df_tmp.groupby(lambda x: x.year) - for year, df_tmp2 in group_by_year: - _LOG.debug("year=%s -> df.shape=%s", year, str(df_tmp2.shape)) - # - group_by_month = df_tmp2.groupby(lambda x: x.month) - for month, df_tmp3 in group_by_month: - _LOG.debug("month=%s -> df.shape=%s", month, str(df_tmp3.shape)) - # file_name = "df_in_one_file.pq" - # pq.write_table(table, file_name) - # /app/data/idx=0/year=2000/month=1/02e3265d515e4fb88ebe1a72a405fc05.parquet - subdir_name = os.path.join( - dir_name, f"idx={idx}", f"year={year}", f"month={month}" - ) - table = pa.Table.from_pandas(df_tmp3, schema=schema) - schemas.append(table.schema) - # print(df_tmp3) - # print(table.schema) - # pq.write_to_dataset(table, - # subdir_name, schema=schema) - file_name = os.path.join(subdir_name, "df_out.pq") - hio.create_enclosing_dir(file_name) - pq.write_table(table, file_name) - -# %% -schemas[0] == schemas[4] - -# %% -schemas - -# %% - -# %% -# !ls $dir_name/idx=0/year=2000/month=1 - -# %% -# Read data back. -# https://github.com/dask/dask/issues/4194 -# src_dir = f"{dir_name}/idx=0/year=2000/month=1" -src_dir = f"{dir_name}/idx=0/year=2000" -dataset = ds.dataset(src_dir, format="parquet", partitioning="hive") - -df2 = dataset.to_table().to_pandas() -# print(df_to_str(df2)) -print("\n".join(dataset.files)) - -# %% [markdown] -# ## Partition manually - -# %% -partitioning = DirectoryPartitioning( - pa.schema([("year", pa.int16()), ("month", pa.int8()), ("day", pa.int8())]) -) -print(partitioning.parse("/2009/11/3")) - -# partitioning.discover() - -# %% -# !ls /app/data - -# %% -dir_name = "/app/data" - -# Read data back. -dataset = ds.dataset(dir_name, format="parquet", partitioning="hive") - -print("\n".join(dataset.files)) - -# %% -# Read everything. -df2 = dataset.to_table().to_pandas() - -print(df_to_str(df2)) - -# %% -print(df2["instr"].unique()) -print(df2.index) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb deleted file mode 100644 index 6dcf8078c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.ipynb +++ /dev/null @@ -1,210 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "81a273af", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:42.864614Z", - "start_time": "2021-06-16T11:41:42.860710Z" - } - }, - "outputs": [], - "source": [ - "# https://s3fs.readthedocs.io/en/latest/" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "8fef0639", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:42.888158Z", - "start_time": "2021-06-16T11:41:42.869135Z" - } - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 3" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "37fe11a3", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.140014Z", - "start_time": "2021-06-16T11:41:42.890655Z" - } - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import helpers.hs3 as hs3" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a4130a2c", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.145271Z", - "start_time": "2021-06-16T11:41:43.141535Z" - } - }, - "outputs": [], - "source": [ - "aws_profile = \"am\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a49a28ff", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.158474Z", - "start_time": "2021-06-16T11:41:43.148428Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "aws_region=%s us-east-1\n" - ] - } - ], - "source": [ - "# s3 = s3fs.S3FileSystem(anon=False, key=aws_access_key_id, secret=aws_secret_access_key)\n", - "\n", - "s3 = hs3.get_s3fs(aws_profile)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "1795133f", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.819759Z", - "start_time": "2021-06-16T11:41:43.160432Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bucket=alphamatic-data\n" - ] - }, - { - "data": { - "text/plain": [ - "['alphamatic-data/README.md', 'alphamatic-data/data']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bucket = hs3.get_s3_bucket_path(aws_profile, add_s3_prefix=False)\n", - "print(\"bucket=\" + bucket)\n", - "s3.ls(bucket)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "9bc9623e", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.828493Z", - "start_time": "2021-06-16T11:41:43.822315Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['alphamatic-data/README.md', 'alphamatic-data/data']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s3.ls(bucket)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "65f95a8a", - "metadata": { - "ExecuteTime": { - "end_time": "2021-06-16T11:41:43.839153Z", - "start_time": "2021-06-16T11:41:43.832520Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "print(os.environ[\"AWS_DEFAULT_REGION\"])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py deleted file mode 100644 index 65aa9d9f8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/s3.tutorial.py +++ /dev/null @@ -1,44 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 -# language: python -# name: python3 -# --- - -# %% -# https://s3fs.readthedocs.io/en/latest/ - -# %% -# %load_ext autoreload -# %autoreload 3 - -# %% -import os - -import helpers.hs3 as hs3 - -# %% -aws_profile = "am" - -# %% -# s3 = s3fs.S3FileSystem(anon=False, key=aws_access_key_id, secret=aws_secret_access_key) - -s3 = hs3.get_s3fs(aws_profile) - -# %% -bucket = hs3.get_s3_bucket_path(aws_profile, add_s3_prefix=False) -print("bucket=" + bucket) -s3.ls(bucket) - -# %% -s3.ls(bucket) - -# %% -print(os.environ["AWS_DEFAULT_REGION"]) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb deleted file mode 100644 index 9f3df144d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.ipynb +++ /dev/null @@ -1,448 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "ae351077", - "metadata": {}, - "source": [ - "# Maple\n", - "\n", - "https://www.sagemath.org/" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "67b105e6", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T08:59:38.281663Z", - "start_time": "2022-11-24T08:59:32.166395Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting sagemath\n", - " Downloading sagemath-1.3.0.tar.gz (9.4 kB)\n", - "Collecting cython>=0.26\n", - " Downloading Cython-0.29.32-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (1.9 MB)\n", - "\u001b[K |████████████████████████████████| 1.9 MB 3.2 MB/s eta 0:00:01\n", - "\u001b[?25hBuilding wheels for collected packages: sagemath\n", - " Building wheel for sagemath (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for sagemath: filename=sagemath-1.3.0-py3-none-any.whl size=9330 sha256=eb8efd936116026e66a021d4bdd88dc4d9ce207fd633706229625d26878de267\n", - " Stored in directory: /root/.cache/pip/wheels/da/63/1f/6dc0b464e0fec31a0d318d11748e11be903fe893fd6fb713fe\n", - "Successfully built sagemath\n", - "Installing collected packages: cython, sagemath\n", - "Successfully installed cython-0.29.32 sagemath-1.3.0\n" - ] - } - ], - "source": [ - "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install sagemath)\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "70f1c613", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "8dd49c0c", - "metadata": {}, - "source": [ - "# Sympy" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "bab397f4", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T08:59:31.082906Z", - "start_time": "2022-11-24T08:59:08.303577Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting sympy\n", - " Downloading sympy-1.11.1-py3-none-any.whl (6.5 MB)\n", - "\u001b[K |████████████████████████████████| 6.5 MB 4.4 MB/s eta 0:00:01\n", - "\u001b[?25hCollecting mpmath>=0.19\n", - " Downloading mpmath-1.2.1-py3-none-any.whl (532 kB)\n", - "\u001b[K |████████████████████████████████| 532 kB 6.2 MB/s eta 0:00:01\n", - "\u001b[?25hInstalling collected packages: mpmath, sympy\n", - "Successfully installed mpmath-1.2.1 sympy-1.11.1\n" - ] - } - ], - "source": [ - "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install sympy)\"" - ] - }, - { - "cell_type": "markdown", - "id": "c32a78b2", - "metadata": {}, - "source": [ - "## Features\n", - "\n", - "https://docs.sympy.org/latest/tutorials/intro-tutorial/features.html#" - ] - }, - { - "cell_type": "markdown", - "id": "547104ae", - "metadata": {}, - "source": [ - "## Logic\n", - "\n", - "https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html#what-is-symbolic-computation" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "016ffec6", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T08:59:48.097485Z", - "start_time": "2022-11-24T08:59:47.660109Z" - } - }, - "outputs": [], - "source": [ - "import sympy\n", - "from sympy import * # noqa: F403" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "15a65c7c", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:01:58.628860Z", - "start_time": "2022-11-24T09:01:58.614742Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle y \\vee \\left(x \\wedge y\\right)$" - ], - "text/plain": [ - "y | (x & y)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x, y = sympy.symbols(\"x,y\")\n", - "y | (x & y)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "c016e526", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:02:16.425181Z", - "start_time": "2022-11-24T09:02:16.418742Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle x \\Rightarrow y$" - ], - "text/plain": [ - "Implies(x, y)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x >> y" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "961ab5b7", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:02:36.687945Z", - "start_time": "2022-11-24T09:02:36.681518Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle \\text{True}$" - ], - "text/plain": [ - "True" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Evaluate an expression.\n", - "(y & x).subs({x: True, y: True})" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "d36a6df4", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:03:53.122377Z", - "start_time": "2022-11-24T09:03:53.108926Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle \\left(x \\wedge \\neg w\\right) \\vee \\left(y \\wedge z \\wedge \\neg x\\right)$" - ], - "text/plain": [ - "(x & ~w) | (y & z & ~x)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "w, x, y, z = sympy.symbols(\"w x y z\")\n", - "minterms = [{w: 0, x: 1}, {y: 1, z: 1, x: 0}]\n", - "sympy.SOPform([w, x, y, z], minterms)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "351f8a29", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:04:52.260031Z", - "start_time": "2022-11-24T09:04:52.244286Z" - } - }, - "outputs": [ - { - "data": { - "text/latex": [ - "$\\displaystyle \\neg x \\wedge \\neg y$" - ], - "text/plain": [ - "~x & ~y" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "b = (~x & ~y & ~z) | (~x & ~y & z)\n", - "sympy.simplify_logic(b)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "6997a50b", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:07:01.070407Z", - "start_time": "2022-11-24T09:07:01.063092Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0, 0] -> True\n", - "[0, 1] -> True\n", - "[1, 0] -> False\n", - "[1, 1] -> True\n" - ] - } - ], - "source": [ - "# Compute truth table.\n", - "from sympy.logic.boolalg import truth_table # noqa: E402\n", - "\n", - "table = truth_table(x >> y, [x, y])\n", - "for t in table:\n", - " print(f\"{t[0]} -> {t[1]}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "c70e51cf", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:08:01.433951Z", - "start_time": "2022-11-24T09:08:01.298800Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sympy.satisfiable(x & ~x)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "f9d0eda7", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:08:14.379803Z", - "start_time": "2022-11-24T09:08:14.364702Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{y: True, x: True}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sympy.satisfiable((x | y) & (x | ~y) & (~x | y))" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "782bd93c", - "metadata": { - "ExecuteTime": { - "end_time": "2022-11-24T09:28:42.188931Z", - "start_time": "2022-11-24T09:28:42.124276Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{L: True, Q: True, B: False, N: False}" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# - (not L => Q and B and N)\n", - "# - (N => not L)\n", - "# - not Q => B\n", - "# - not B\n", - "\n", - "L, N, Q, B = sympy.symbols(\"L N Q B\")\n", - "\n", - "C = (\n", - " sympy.Implies(~L, Q & B & N)\n", - " & sympy.Implies(N, ~L)\n", - " & sympy.Implies(~Q, B)\n", - " & ~B\n", - ")\n", - "sympy.satisfiable(C)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1298f34b", - "metadata": {}, - "outputs": [], - "source": [ - "## Stats\n", - "\n", - "# https://docs.sympy.org/latest/modules/stats.html#" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py deleted file mode 100644 index bd5b8a5aa..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/notebooks/sage.tutorial.py +++ /dev/null @@ -1,98 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# # Maple -# -# https://www.sagemath.org/ - -# %% -# !sudo /bin/bash -c "(source /venv/bin/activate; pip install sagemath)" - -# %% - -# %% [markdown] -# # Sympy - -# %% -# !sudo /bin/bash -c "(source /venv/bin/activate; pip install sympy)" - -# %% [markdown] -# ## Features -# -# https://docs.sympy.org/latest/tutorials/intro-tutorial/features.html# - -# %% [markdown] -# ## Logic -# -# https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html#what-is-symbolic-computation - -# %% -import sympy -from sympy import * # noqa: F403 - -# %% -x, y = sympy.symbols("x,y") -y | (x & y) - -# %% -x >> y - -# %% -# Evaluate an expression. -(y & x).subs({x: True, y: True}) - -# %% -w, x, y, z = sympy.symbols("w x y z") -minterms = [{w: 0, x: 1}, {y: 1, z: 1, x: 0}] -sympy.SOPform([w, x, y, z], minterms) - -# %% -b = (~x & ~y & ~z) | (~x & ~y & z) -sympy.simplify_logic(b) - -# %% -# Compute truth table. -from sympy.logic.boolalg import truth_table # noqa: E402 - -table = truth_table(x >> y, [x, y]) -for t in table: - print(f"{t[0]} -> {t[1]}") - -# %% -sympy.satisfiable(x & ~x) - -# %% -sympy.satisfiable((x | y) & (x | ~y) & (~x | y)) - -# %% -# - (not L => Q and B and N) -# - (N => not L) -# - not Q => B -# - not B - -L, N, Q, B = sympy.symbols("L N Q B") - -C = ( - sympy.Implies(~L, Q & B & N) - & sympy.Implies(N, ~L) - & sympy.Implies(~Q, B) - & ~B -) -sympy.satisfiable(C) - -# %% -## Stats - -# https://docs.sympy.org/latest/modules/stats.html# diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py deleted file mode 100644 index 7550952ca..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conda.py +++ /dev/null @@ -1,192 +0,0 @@ -""" -Import as: - -import helpers.old.conda as holdcond -""" - -import json -import logging -import os -from typing import Any, Dict, List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hsystem as hsystem -import helpers.old.user_credentials as holuscre - -_LOG = logging.getLogger(__name__) - - -def conda_system(cmd: str, *args: Any, **kwargs: Any) -> int: - """ - When running a conda command we need to execute a script to configure - conda. This script is typically executed in .bashrc but here we create a - new bash shell every time to execute a command, so we need to re-initialize - the shell before any conda command. - - :param cmd: - :param args: - :param kwargs: - :return: - """ - # TODO(gp): Pass conda_env_name as done in get_conda_list() - path = holuscre.get_credentials()["conda_sh_path"] - hdbg.dassert_path_exists(path) - hdbg.dassert(os.path.isfile(path), "'%s' is not a file", path) - cmd = f"source {path} && {cmd}" - output: int = hsystem.system(cmd, *args, **kwargs) - return output - - -def conda_system_to_string( - cmd: str, *args: Any, **kwargs: Any -) -> Tuple[int, str]: - path = holuscre.get_credentials()["conda_sh_path"] - hdbg.dassert_path_exists(path) - hdbg.dassert(os.path.isfile(path), "'%s' is not a file", path) - cmd = f"source {path} && {cmd}" - output: Tuple[int, str] = hsystem.system_to_string(cmd, *args, **kwargs) - return output - - -def get_conda_envs_dirs() -> List[str]: - """ - :return: list of the env dirs from conda - """ - _, ret = conda_system_to_string(r"conda config --show envs_dirs --json") - _LOG.debug("ret=%s", ret) - envs = json.loads(ret) - hdbg.dassert_in("envs_dirs", envs) - envs = envs["envs_dirs"] - hdbg.dassert_isinstance(envs, list) - return list(envs) - - -def set_conda_env_root(conda_env_path: str) -> None: - """ - Set conda env dirs so that it matches what specified in. - - > conda config --show envs_dirs --json - { - "envs_dirs": [ - "/Users/gp/.conda/envs", - ] - } - - > conda config --prepend envs_dirs /data/gp_wd/anaconda2/envs2 - """ - envs = get_conda_envs_dirs() - # - if not envs or envs[0] != conda_env_path: - _LOG.warning( - "%s is not the first env dir in %s", conda_env_path, str(envs) - ) - # Reset the list of conda envs. - _LOG.debug("Resetting envs_dir %s", str(envs)) - for env in envs: - _LOG.debug("Deleting %s", env) - cmd = f"conda config --remove envs_dirs {env}" - # We don't abort because of a bug in conda not deleting the key - # when asked for. - # CondaKeyError: 'envs_dirs': u'/data/shared/anaconda2/envs' is not - # in the u'envs_dirs' key of the config file - conda_system(cmd, abort_on_error=False) - envs = get_conda_envs_dirs() - _LOG.debug("Current envs: %s", str(envs)) - # Add the conda env. - cmd = f"conda config --prepend envs_dirs {conda_env_path}" - conda_system(cmd) - # Check. - envs = get_conda_envs_dirs() - hdbg.dassert( - envs or envs[0] != conda_env_path, - msg=f"{conda_env_path} is not first env dir in {envs}", - ) - else: - _LOG.debug( - "Nothing to do, since %s is already in %s", conda_env_path, envs - ) - - -def get_conda_info_envs() -> Tuple[dict, None]: - """ - :return: (env_dict, active_env) - - env_dict: map 'conda env name -> conda env path' - - active_env: name of the active conda env - """ - # > conda info --envs - # # conda environments: - # # - # aws /Users/gp/.conda/envs/aws - # bbg /Users/gp/.conda/envs/bbg - # deeplearning /Users/gp/.conda/envs/deeplearning - # jupyter /Users/gp/.conda/envs/jupyter - # test_conda /Users/gp/.conda/envs/test_conda - # TODO(gp): Use --json but we need to parse the json without any module. - ret = conda_system_to_string(r"conda info --envs")[1] - _LOG.debug("Parsing conda info\n%s", ret) - ret = ret.split("\n") - env_dict = {} - active_env = None - for line in ret: - line = line.rstrip().lstrip() - if line == "": - continue - if line.startswith("#"): - continue - vals = line.split() - if len(vals) == 2: - env_name, env_path = vals - env_dict[env_name] = env_path - elif len(vals) == 3: - env_name, star, env_path = vals - hdbg.dassert_eq(star, "*") - env_dict[env_name] = env_path - else: - _LOG.debug("Can't parse line='%s'", line) - return env_dict, active_env - - -def get_conda_list(conda_env_name: str) -> Dict[str, Dict[str, str]]: - """ - :return: env_dict mapping package name to their info - - env_dict: map 'conda env name -> conda env path' - - active_env: name of the active conda env - """ - # > conda list - # # packages in environment at /Users/gp/.conda/envs/: - # # - # # Name Version Build Channel - # absl-py 0.5.0 py_0 conda-forge - # agate 1.6.0 py_3 conda-forge - # agate-dbf 0.2.0 py27_0 conda-forge - # agate-excel 0.2.2 py_0 conda-forge - # TODO(gp): Use --json but we need to parse the json without any module. - cmd = rf"(conda activate {conda_env_name} 2>&1) >/dev/null && conda list" - ret = conda_system_to_string(cmd)[1] - ret = ret.split("\n") - env_dict = {} - labels = {1: "version", 2: "build", 3: "channel"} - for line in ret: - line = line.rstrip().lstrip() - _LOG.debug("line='%s'", line) - if line == "": - continue - if line.startswith("#"): - continue - vals = line.split() - env_dict[vals[0]] = {labels[k]: vals[k] for k in range(1, len(vals[:4]))} - return env_dict - - -_CONDA_PATH = None - - -def get_conda_path() -> Optional[str]: - global _CONDA_PATH - if not _CONDA_PATH: - rc, txt = conda_system_to_string("which conda", abort_on_error=False) - if rc == 0: - _CONDA_PATH = str(txt) - else: - _CONDA_PATH = "n/a" - return _CONDA_PATH diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py deleted file mode 100644 index 5b0445a31..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/conftest.py +++ /dev/null @@ -1,17 +0,0 @@ -import pathlib -from typing import Any, Optional - - -def pytest_ignore_collect( # type: ignore - collection_path: pathlib.Path, path: Any, config: Any -) -> Optional[bool]: - """ - Skip all tests in this directory. - - :param collection_path: path to analyze - :param path: path to analyze (deprecated) - :param config: pytest config object - :return: True if the path should be ignored - """ - # Ignore this directory and all its subdirectories. - return True diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py deleted file mode 100644 index f51cb5d8d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/env2.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Import as: - -import helpers.old.env2 as holdenv2 -""" - -import logging -import os -from typing import Tuple - -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.old.conda as holdcond - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -def get_system_info(add_frame: bool) -> str: - msg = "" - if add_frame: - msg += hprint.frame("System info") + "\n" - msg += f"user name={hsystem.get_user_name()}\n" - msg += f"server name={hsystem.get_server_name()}\n" - msg += f"os name={hsystem.get_os_name()}\n" - msg += f"conda path={holdcond.get_conda_path()}\n" - msg += f"conda env root={str(holdcond.get_conda_envs_dirs())}\n" - return msg - - -def get_package_summary(conda_env_name: str, add_frame: bool) -> str: - msg = "" - if add_frame: - msg += hprint.frame("Package summary") + "\n" - conda_list = holdcond.get_conda_list(conda_env_name) - msg = "" - for package in ["pandas", "numpy", "scipy", "arrow-cpp"]: - ver = conda_list[package]["version"] if package in conda_list else "None" - line = f"{package}: {ver}" - msg += line + "\n" - return msg - - -def get_conda_export_list(conda_env_name: str, add_frame: bool) -> str: - msg = "" - if add_frame: - msg += hprint.frame("Package summary") + "\n" - cmd = rf"(conda activate {conda_env_name} 2>&1 >/dev/null) && conda list --export" - _, msg_tmp = holdcond.conda_system_to_string(cmd) - msg += msg_tmp - return msg - - -def save_env_file(conda_env_name: str, dir_name: str) -> Tuple[str, str]: - msg = "" - msg += get_system_info(add_frame=True) - msg += get_package_summary(conda_env_name, add_frame=True) - msg += get_conda_export_list(conda_env_name, add_frame=True) - # Save results. - if dir_name is not None: - file_name = ( - f"{conda_env_name}.{hsystem.get_user_name()}.{hsystem.get_os_name()}." - f"{hsystem.get_server_name()}.txt" - ) - dst_file = os.path.join(dir_name, file_name) - dst_file = os.path.abspath(dst_file) - hio.create_enclosing_dir(dst_file, incremental=True) - _LOG.info("Saving conda env signature to '%s'", dst_file) - hio.to_file(dst_file, msg) - else: - dst_file = None - return msg, dst_file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py deleted file mode 100644 index a9d6b4f46..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/tunnels.py +++ /dev/null @@ -1,267 +0,0 @@ -""" -Import as: - -import helpers.old.tunnels as holdtunn -""" - -import logging -import os -from typing import Any, Dict, List, Tuple, Union, cast - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.old.user_credentials as holuscre - -_LOG = logging.getLogger(__name__) - - -def _get_services_info() -> list: - # Server ports. - services = [ - # service name, server public IP, local port, remote port. - ("MongoDb", hsystem.get_env_var("OLD_DEV_SERVER"), 27017, 27017), - ("Jenkins", hsystem.get_env_var("JENKINS_SERVER"), 8080, 8080), - # ("Reviewboard", hsystem.get_env_var("REVIEWBOARD_SERVER"), 8000, 8000), - # ("Doc server", hsystem.get_env_var("REVIEWBOARD_SERVER"), 8001, 80), - # Netdata to Jenkins and Dev server. - # ("Dev system performance", DEV_SERVER, 19999), - # ("Jenkins system performance", DEV_SERVER, 19999), - ] - return services - - -# ############################################################################# - - -def get_tunnel_info() -> Tuple[list, str]: - credentials = holuscre.get_credentials() - # - tunnel_info = credentials["tunnel_info"] - hdbg.dassert_is_not(tunnel_info, None) - # Add tunnels for standard services. - services = _get_services_info() - tunnel_info.extend(services) - # - ssh_key_path = credentials["ssh_key_path"] - hdbg.dassert_is_not(ssh_key_path, None) - # TODO(gp): Add check to make sure that the source ports are all different. - return tunnel_info, ssh_key_path - - -def tunnel_info_to_string(tunnel_info: list) -> str: - ret = "\n".join(map(str, tunnel_info)) - ret = hprint.indent(ret) - return ret - - -def parse_service( - service: Tuple[str, str, int, int], -) -> Dict[str, Union[str, int]]: - hdbg.dassert_eq(len(service), 4, "service=%s", service) - service_name, server, local_port, remote_port = service - return { - "service_name": service_name, - "server": server, - "local_port": local_port, - "remote_port": remote_port, - } - - -def find_service( - service_name: str, tunnel_info: list -) -> Tuple[str, str, int, int]: - found_service = False - for service in tunnel_info: - if service_name == parse_service(service)["service_name"]: - hdbg.dassert(not found_service) - found_service = True - ret: Tuple[str, str, int, int] = service - hdbg.dassert(found_service) - return ret - - -def get_server_ip(service_name: str) -> str: # pylint: disable=unused-argument - tunnel_info, _ = get_tunnel_info() - _LOG.debug("tunnels=\n%s", tunnel_info_to_string(tunnel_info)) - service = find_service("Doc server", tunnel_info) - server = parse_service(service)["server"] - server = cast(str, server) - return server - - -def _get_tunnel_info() -> Tuple[Any, str]: - credentials = holuscre.get_credentials() - # - tunnel_info = credentials["tunnel_info"] - hdbg.dassert_is_not(tunnel_info, None) - # Add tunnels for standard services. - services = _get_services_info() - tunnel_info.extend(services) - # - ssh_key_path = credentials["ssh_key_path"] - hdbg.dassert_is_not(ssh_key_path, None) - # TODO(gp): Add check to make sure that the source ports are all different. - return tunnel_info, ssh_key_path - - -def _tunnel_info_to_string(tunnel_info: list) -> str: - ret = "\n".join(map(str, tunnel_info)) - ret = hprint.indent(ret) - return ret - - -def _service_to_string(service: Tuple[str, str, str, str]) -> str: - service_name, server, local_port, remote_port = service - ret = ( - f"tunnel for service '{service_name}'" - + f" server='{server}'" - + f" port='{local_port}->{remote_port}'" - ) - return ret - - -# ############################################################################# - - -def _get_ssh_tunnel_process( - local_port: int, remote_port: int, fuzzy_match: bool -) -> Tuple[List[int], str]: - """ - Return the pids of the processes attached to a given port. - """ - - def _keep_line(line: str) -> bool: - keep = "ssh -i" in line - if keep: - if fuzzy_match: - keep = (f" {local_port}:localhost " in line) or ( - f" localhost:{remote_port} " in line - ) - else: - keep = f" {local_port}:localhost:{remote_port} " in line - return keep - - _LOG.debug("local_port=%d -> remote_port=%d", local_port, remote_port) - pids, txt = hsystem.get_process_pids(_keep_line) - _LOG.debug("pids=%s", pids) - _LOG.debug("txt=\n%s", txt) - return pids, txt - - -def _create_tunnel( - server_name: str, - local_port: int, - remote_port: int, - user_name: str, - ssh_key_path: str, -) -> None: - """ - Create tunnel from localhost to 'server' for the ports `local_port -> - remote_port` and `user_name`. - """ - ssh_key_path = os.path.expanduser(ssh_key_path) - _LOG.debug("ssh_key_path=%s", ssh_key_path) - hdbg.dassert_path_exists(ssh_key_path) - # - cmd = ( - "ssh -i {ssh_key_path} -f -nNT -L {local_port}:localhost:{remote_port}" - + " {user_name}@{server}" - ) - cmd = cmd.format( - user_name=user_name, - ssh_key_path=ssh_key_path, - local_port=local_port, - remote_port=remote_port, - server=server_name, - ) - hsystem.system(cmd, blocking=False) - # Check that the tunnel is up and running. - pids = _get_ssh_tunnel_process(local_port, remote_port, fuzzy_match=True) - hdbg.dassert_lte(1, len(pids)) - - -def _kill_ssh_tunnel_process(local_port: int, remote_port: int) -> None: - """ - Kill all the processes attached to either local or remote port. - """ - get_pids = lambda: _get_ssh_tunnel_process( - local_port, remote_port, fuzzy_match=True - ) - hsystem.kill_process(get_pids) - - -# ############################################################################# - - -def start_tunnels(user_name: str) -> None: - """ - Start all the tunnels for the given user. - """ - _LOG.debug("user_name=%s", user_name) - # Get tunnel info. - tunnel_info, ssh_key_path = _get_tunnel_info() - _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) - # - for service in tunnel_info: - _, server, local_port, remote_port = service - pids, _ = _get_ssh_tunnel_process( - local_port, remote_port, fuzzy_match=False - ) - if not pids: - _LOG.info("Starting %s", _service_to_string(service)) - _create_tunnel( - server, local_port, remote_port, user_name, ssh_key_path - ) - else: - _LOG.warning( - "%s already exists: skipping", _service_to_string(service) - ) - - -def stop_tunnels() -> None: - """ - Stop all the tunnels for the given user. - """ - # Get the tunnel info. - tunnel_info, _ = _get_tunnel_info() - _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) - # - for service in tunnel_info: - _, _, local_port, remote_port = service - _LOG.info("Stopping %s", _service_to_string(service)) - _kill_ssh_tunnel_process(local_port, remote_port) - - -def check_tunnels() -> None: - """ - Check the status of the tunnels for the given user. - """ - # Get the tunnel info. - tunnel_info, _ = _get_tunnel_info() - _LOG.info("\n%s", _tunnel_info_to_string(tunnel_info)) - # - for service in tunnel_info: - _, _, local_port, remote_port = service - pids, _ = _get_ssh_tunnel_process( - local_port, remote_port, fuzzy_match=False - ) - if pids: - msg = f"exists with pid={pids}" - else: - msg = "doesn't exist" - _LOG.info("%s -> %s", _service_to_string(service), msg) - - -def kill_all_tunnel_processes() -> None: - """ - Kill all the processes that have `ssh -i ...:localhost:...". - """ - - # cmd = "ps ax | grep 'ssh -i' | grep localhost: | grep -v grep" - def _keep_line(line: str) -> bool: - keep = ("ssh -i" in line) and (":localhost:" in line) - return keep - - get_pids = lambda: hsystem.get_process_pids(_keep_line) - hsystem.kill_process(get_pids) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py deleted file mode 100755 index 5faded15d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/old/user_credentials.py +++ /dev/null @@ -1,208 +0,0 @@ -#!/usr/bin/env python -""" -Import as: - -import helpers.old.user_credentials as holuscre -""" - -import argparse -import logging -import os -import pprint -from typing import Any, Dict, List, Tuple - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hparser as hparser -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -def get_dev_server_ip() -> str: - """ - Get the dev server name from the user environment. - """ - env_var_name = "" - if env_var_name not in os.environ: - _LOG.error( - "Can't find '%s': re-run dev_scripts/setenv.sh?", env_var_name - ) - raise RuntimeError - dev_server = os.environ[env_var_name] - return dev_server - - -# pylint: disable=too-many-statements -def get_credentials() -> Dict[str, Any]: - """ - Report information about a user set-up as a function of: 1) user name 2) - server name 3) git repository name. - - The mandatory information are: - 1) git_user_name - 2) git_user_email - 3) conda_sh_path: the path of the script bootstrapping conda - - To find "conda_sh_path": - > which conda - /data/root/anaconda3/bin/conda - > find /data/root/anaconda3 -name "conda.sh" - - In one instruction: - > CONDA_DIR=$(dirname $(which conda))"/.."; find $CONDA_DIR -name "conda.sh" - - If there are multiple ones you want to pick the one under - `profile.d`, e.g., `/anaconda3/etc/profile.d/conda.sh` - 4) conda_env_path: the path of the dir storing the conda environments - - To find "conda_env_path" - > conda info - ... - envs directories : /data/saggese/.conda/envs - - The optional information are: - 5) ssh_key_path: the path of the ssh key to use - 6) tunnel_info: list of "personal" ports to forward - - This is an advanced behavior that allows to specify in your user - config a set of ports to forward from one computer (typically your - laptop) to a set of services that are specific of your set-up (e.g., - started through `run_jupyter_server.py`) - - E.g., - ```python - if server_name in ("gpmac.local", "gpmac.lan"): - if git_repo_name == "": - service = ("Jupyter1", get_dev_server_ip(), 10003, 10003) - ``` - when GP runs `ssh_tunnels.py` from his laptop in a - `` client, a tunnel is open to the dev - server where `run_jupyter_server.py` will have started a notebook server - 7) jupyter_port: on which port to start a jupyter server on a specific server - - It's a good idea for everybody to have a different port to avoid port - collisions - 8) notebook_html_path: the path where to save html of notebooks - 9) notebook_backup_path: the path where to backup the source .ipynb code of - notebooks - """ - # - user_name = hsystem.get_user_name() - server_name = hsystem.get_server_name() - _LOG.debug("user_name='%s'", user_name) - _LOG.debug("server_name='%s'", server_name) - git_repo_name = hgit.get_repo_full_name_from_client(super_module=True) - # Values to assign. - git_user_name = "" - git_user_email = "" - conda_sh_path = "" - ssh_key_path = "~/.ssh/id_rsa" - tunnel_info: List[Tuple[str, str, str, str]] = [] - jupyter_port = -1 - notebook_html_path = "" - notebook_backup_path = "" - # - conda_env_path = "~/.conda/envs" - conda_env_path = os.path.expanduser(conda_env_path) - if server_name in (): - conda_sh_path = "/anaconda3/etc/profile.d/conda.sh" - if user_name == "saggese": - # GP. - git_user_name = "saggese" - git_user_email = "abc@xyz.com" - if server_name.startswith("gpmac") or server_name.startswith( - "giacintos-mbp" - ): - # Laptop. - conda_sh_path = "/Users/saggese/opt/anaconda3/etc/profile.d/conda.sh" - conda_env_path = "/Users/saggese/.conda/envs" - if git_repo_name == "": - # Forward port 10003 to the notebook server that is started by - # `run_jupyter_server.py` when executed on the dev server. - # service = ("Jupyter1", get_dev_server_ip(), 10003, 10003) - # tunnel_info.append(service) - # jupyter_port = 10001 - pass - elif server_name == "": - if git_repo_name == "": - jupyter_port = 10003 - else: - hdbg.dassert_ne(conda_sh_path, "") - elif user_name == "paul": - # Paul. - git_user_name = "paul" - git_user_email = "abc@xyz.com" - if server_name in ("Pauls-MacBook-Pro.local", "Pauls-MBP"): - conda_sh_path = "/Users/paul/anaconda3/etc/profile.d/conda.sh" - conda_env_path = "/Users/paul/.conda/envs" - # Check. - for var_name, val_name in [ - ("git_user_name", git_user_name), - ("git_user_email", git_user_email), - ("conda_sh_path", conda_sh_path), - ("conda_env_path", conda_env_path), - # We allow the rest of the variables (e.g., ssh_key_path, tunnel_info) to - # be empty since in some configurations they can be undefined. - ]: - hdbg.dassert_is_not( - val_name, - None, - "Undefined '%s': add your credentials for user_name='%s' and " - "server_name='%s' to '%s'", - var_name, - user_name, - server_name, - __file__, - ) - conda_sh_path = os.path.expanduser(conda_sh_path) - conda_sh_path = os.path.abspath(conda_sh_path) - hdbg.dassert_path_exists(conda_sh_path) - # - conda_env_path = os.path.abspath(os.path.expanduser(conda_env_path)) - # Not necessarily the conda_env_path exists. - if not os.path.exists(conda_env_path): - _LOG.warning("The dir '%s' doesn't exist: creating it", conda_env_path) - hio.create_dir(conda_env_path, incremental=True) - hdbg.dassert_path_exists(os.path.dirname(conda_env_path)) - # - for service in tunnel_info: - # TODO(gp): We should call in ssh_tunnels.py to keep this encapsulated. - hdbg.dassert_eq(len(service), 4) - service_name, server, local_port, remote_port = service - _ = service_name, server, local_port, remote_port - ret = { - "git_user_name": git_user_name, - "git_user_email": git_user_email, - "conda_sh_path": conda_sh_path, - "conda_env_path": conda_env_path, - "ssh_key_path": ssh_key_path, - "tunnel_info": tunnel_info, - "jupyter_port": jupyter_port, - "notebook_html_path": notebook_html_path, - "notebook_backup_path": notebook_backup_path, - } - _LOG.debug("Credentials: %s", ret) - return ret - - -# ############################################################################# - - -def _parse() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument( - "--user", action="store", default=None, help="Impersonate a user" - ) - hparser.add_verbosity_arg(parser) - return parser - - -def _main(parser: argparse.ArgumentParser) -> None: - args = parser.parse_args() - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) - if args.user: - hsystem.set_user_name(args.user) - usc = get_credentials() - pprint.pprint(usc) - - -if __name__ == "__main__": - _main(_parse()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh deleted file mode 100644 index 45acd8194..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/install-texlive.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/sh - -# NOTE TO MAINTAINERS: this must be updated each time a new texlive is -# released! -default_version=2024 -tlversion=${1:-"$default_version"} -installer_archive=install-tl-unx.tar.gz - -usage () -{ - printf 'Install TeXLive\n' - printf 'Usage: %s [OPTIONS]\n\n' "$0" - printf 'Options:\n' - printf ' -t: TeXLive version (default %s)\n' "$default_version" - printf ' -m: mirror URL\n' -} - -if ! args=$(getopt 't:m:' "$@"); then - usage && exit 1 -fi -# The variable is intentionally left unquoted. -# shellcheck disable=SC2086 -set -- $args - -tlversion= -mirror_url= - -while true; do - case "$1" in - (-t) - tlversion="${2}" - shift 2 - ;; - (-m) - mirror_url="${2}" - shift 2 - ;; - (--) - shift - break - ;; - (*) - printf 'Unknown option: %s\n' "$1" - usage - exit 1 - ;; - esac -done - -[ -n "$tlversion" ] || tlversion="$default_version" - -if [ -z "$mirror_url" ] && [ "$tlversion" != "$default_version" ]; then - # Default mirror for historic releases - mirror_url="ftp://tug.org/historic/" -fi - -if [ -z "$mirror_url" ]; then - # Get the mirror URL from the redirect. Otherwise, if we were to - # always use the mirror URL, we'd run into problems whenever we get - # installer and signatures from different mirrors that are not 100% - # in sync. - mirror_url=$(wget -4 --quiet --output-document=/dev/null \ - --server-response \ - http://mirror.ctan.org/ \ - 2>&1 | \ - sed -ne 's/.*Location: \(.*\)$/\1/p' | head -n 1) -fi - -# Trim trailing slash(es) -mirror_url=$(echo "$mirror_url" | sed -e 's/\/*$//') - -if [ "$tlversion" = "$default_version" ]; then - installer_url="$mirror_url/systems/texlive/tlnet/" - repository= -else - installer_url="$mirror_url/systems/texlive/$tlversion/tlnet-final/" - repository=$installer_url -fi - -# Log the installer and repository url -printf 'installer URL: %s\n' "${installer_url}" -printf 'repository: %s\n' "${repository}" - -# Download the install-tl perl script. The archive integrity and signature is -# verified later, so it's ok if we use an insecure connection. -wget -4 --no-verbose --no-check-certificate \ - "$installer_url/$installer_archive" \ - "$installer_url/$installer_archive".sha512 \ - "$installer_url/$installer_archive".sha512.asc \ - || exit 1 - -## Verifiy installer integrity -# get current signing key -gpg --keyserver keyserver.ubuntu.com \ - --receive-key 0xC78B82D8C79512F79CC0D7C80D5E5D9106BAB6BC || exit 5 -gpg --verify "$installer_archive".sha512.asc || exit 5 -sha512sum "$installer_archive".sha512 || exit 5 - -## Proceed with installation -# Extract installer -mkdir -p ./install-tl -tar --strip-components 1 -zvxf "$installer_archive" -C "$PWD/install-tl" \ - || exit 1 - -# Run the default installation with the specified profile. -./install-tl/install-tl ${repository:+-repository "$repository"} \ - --profile=/root/texlive.profile - -# Cleanup installation artifacts. -rm -rf ./install-tl \ - "$installer_archive" \ - "$installer_archive.sha512" \ - "$installer_archive.sha512.asc" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt deleted file mode 100644 index 9e4ccf64f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/packages.txt +++ /dev/null @@ -1,115 +0,0 @@ -# Packages listed in https://pandoc.org/MANUAL.html#creating-a-pdf - -######################################################################### -### Packages required by pandoc -amsfonts # math fonts -amsmath # math commands -babel # required when pandoc is used with lang -beamer # for presentations -bidi # used by xelatex if the `dir` variable is set -bookmark # bookmarks with hyperref -booktabs # nicer-looking tables -caption # customize captions in floating envs; required for beamer -csquotes # used for typography if the `csquotes` variable is set to true -euler # use AMS Euler fonts for math -eurosym # Metafont and macros for Euro sign -fancyvrb # Verbatim environments for code blocks -framed # Needed with certain `--highlight-style` options -geometry # required if the `geometry` variable set -graphics # required if the document contains images -hyperref # hyperlinks -listings # if the `--listing` option is used -lm # Latin modern fonts -lm-math # Latin modern fonts for math -memoir # frequently used document class -multirow # Tabular cells spanning multiple rows -pgf # for TikZ and beamer -setspace # required if the `linestretch` variable is used -soul # required for underlined text -subfig # Figures broken into subfigures -tools # the LaTeX standard tools bundle; e.g., calc, longtable -xcolor # colors - -# Deprecated! Only used by older pandoc versions before 3.0. -ulem - -######################################################################### -### Semi-optional packages -# -# The following packages will be used to improve output quality if -# present, but pandoc does not require them to be present: -footnotehyper # to allow footnotes in tables -microtype # for better spacing adjustments -parskip # for better inter-paragraph spaces -upquote # for straight quotes in verbatim environments -xurl # for better line breaks in URLs - -######################################################################### -### Intentionally **NOT** installed due to size constraints. -# -#xeCJR # If CJKmainfont is set, xeCJK is needed. - -######################################################################### -### Required when using pandoc-crossref -cleveref # Intelligent cross-referencing -float # Improved interface for floating objects - -######################################################################### -### Extra engines and packages for XeLaTeX and LuaLaTeX. -fontspec # required with xelatex or lualatex -ifmtarg # if-then-else commands used in the default template -iftex # Checks for the specific LaTeX engine being used -latexmk -lua-ul # LuaLaTeX replacement of soul -luacode -luacolor -lualatex-math # LuaTeX specific math patches -luatexbase -mathspec # used by xelatex if the `mathspec` variable is set -selnolig # Used with LuaLaTeX to disable illegal typographic ligatures -unicode-math # Unicode math support for XeTeX and LuaTeX -xetex - -######################################################################### -### Reference management tools -biber -biblatex -bibtex -natbib - -######################################################################### -### I18n and languages -# -# The choice of selected languages is historic, those were the ones -# installed by TeXLive by default for a long time. -bidi -babel-basque -babel-czech -babel-danish -babel-dutch -babel-english -babel-finnish -babel-french -babel-german -babel-hungarian -babel-italian -babel-norsk -babel-polish -babel-portuges -babel-spanish -babel-swedish -hyphen-basque -hyphen-czech -hyphen-danish -hyphen-dutch -hyphen-english -hyphen-finnish -hyphen-french -hyphen-german -hyphen-hungarian -hyphen-italian -hyphen-norwegian -hyphen-polish -hyphen-portuguese -hyphen-spanish -hyphen-swedish diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile deleted file mode 100644 index dd5364e87..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/pandoc_docker_files/texlive.profile +++ /dev/null @@ -1,32 +0,0 @@ -# texlive.profile written on Tue Feb 5 09:43:07 2019 UTC -# It will NOT be updated and reflects only the -# installation profile at installation time. -# -# NOTE: see also alpine/latex.Dockerfile which appends -# `binary_x86_64-linuxmusl 1` to this file, use for non-glibc distributions. -selected_scheme scheme-basic -TEXDIR /opt/texlive/texdir -TEXMFLOCAL /opt/texlive/texmf-local -TEXMFSYSVAR /opt/texlive/texdir/texmf-var -TEXMFSYSCONFIG /opt/texlive/texdir/texmf-config -TEXMFVAR ~/.texlive/texmf-var -TEXMFCONFIG ~/.texlive/texmf-config -TEXMFHOME ~/texmf -instopt_adjustpath 0 -instopt_adjustrepo 1 -instopt_letter 0 -instopt_portable 0 -instopt_write18_restricted 1 -tlpdbopt_autobackup 1 -tlpdbopt_backupdir tlpkg/backups -tlpdbopt_create_formats 1 -tlpdbopt_desktop_integration 1 -tlpdbopt_file_assocs 1 -tlpdbopt_generate_updmap 0 -tlpdbopt_install_docfiles 0 -tlpdbopt_install_srcfiles 0 -tlpdbopt_post_code 1 -tlpdbopt_sys_bin /usr/local/bin -tlpdbopt_sys_info /usr/local/share/info -tlpdbopt_sys_man /usr/local/share/man -tlpdbopt_w32_multi_user 1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py deleted file mode 100644 index d8807f46b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/repo_config_utils.py +++ /dev/null @@ -1,411 +0,0 @@ -""" -Import as: - -import helpers.repo_config_utils as hrecouti -""" - -import logging -import os -from typing import Any, Dict, List, Optional, Union - -import yaml - -_LOG = logging.getLogger(__name__) - -# ############################################################################# - -# Copied from hprint to avoid import cycles. - - -# TODO(gp): It should use *. -def indent(txt: str, num_spaces: int = 2) -> str: - """ - Add `num_spaces` spaces before each line of the passed string. - """ - spaces = " " * num_spaces - txt_out = [] - for curr_line in txt.split("\n"): - if curr_line.lstrip().rstrip() == "": - # Do not prepend any space to a line with only white characters. - txt_out.append("") - continue - txt_out.append(spaces + curr_line) - res = "\n".join(txt_out) - return res - - -# End copy. - - -# ############################################################################# - - -def _find_config_file(file_name: str) -> str: - """ - Find recursively the dir of config file. - - This function traverses the directory hierarchy upward from a - specified starting path to find the directory that contains the - config file. - - :param file_name: name of the file to find - :return: path to the file - """ - curr_dir = os.getcwd() - while True: - path = os.path.join(curr_dir, file_name) - if os.path.exists(path): - break - parent = os.path.dirname(curr_dir) - if parent == curr_dir: - # We cannot use helpers since it creates circular import. - raise FileNotFoundError( - f"Could not find '{file_name}' in current directory or any parent directories" - ) - curr_dir = parent - return path - - -def _get_env_var( - env_name: str, - as_bool: bool = False, - default_value: Any = None, - abort_on_missing: bool = True, -) -> Union[str, bool]: - """ - Get an environment variable by name. - - :param env_name: name of the env var - :param as_bool: convert the value into a Boolean - :param default_value: the default value to use in case it's not - defined - :param abort_on_missing: if the env var is not defined aborts, - otherwise use the default value - :return: value of env var - """ - if env_name not in os.environ: - if abort_on_missing: - assert 0, f"Can't find env var '{env_name}' in '{str(os.environ)}'" - else: - return default_value - value = os.environ[env_name] - if as_bool: - # Convert the value into a boolean. - if value in ("0", "", "None", "False"): - value = False - else: - value = True - return value - - -# ############################################################################# -# RepoConfig -# ############################################################################# - - -class RepoConfig: - def __init__(self, data: Dict) -> None: - """ - Set the data to be used by the module. - """ - self._data = data - - def set_repo_config_data(self, data: Dict) -> None: - self._data = data - - @classmethod - def from_file(cls, file_name: Optional[str] = None) -> "RepoConfig": - """ - Return the text of the code stored in `repo_config.yaml`. - """ - if file_name is None: - file_name = RepoConfig._get_repo_config_file() - assert os.path.exists(file_name), f"File '{file_name}' doesn't exist" - _LOG.debug("Reading file_name='%s'", file_name) - try: - with open(file_name, "r") as file: - # Use `safe_load()` to avoid executing arbitrary code. - data = yaml.safe_load(file) - assert isinstance(data, dict), ( - "data=\n%s\nis not a dict but %s", - str(data), - type(data), - ) - except Exception as e: - raise ValueError(f"Error reading YAML file {file_name}: {e}") - return cls(data) - - # TODO(gp): -> __str__? - def config_func_to_str(self) -> str: - """ - Return the string representation of the config function. - """ - ret: List[str] = [] - ret.append(f"get_host_name='{self.get_host_name()}'") - ret.append( - f"get_html_dir_to_url_mapping='{self.get_html_dir_to_url_mapping()}'" - ) - ret.append(f"get_invalid_words='{self.get_invalid_words()}'") - ret.append( - f"get_docker_base_image_name='{self.get_docker_base_image_name()}'" - ) - ret.append(f"get_release_team='{self.get_release_team()}'") - txt = "\n".join(ret) - return txt - - # repo_info - - # TODO(gp): -> get_repo_name - def get_name(self) -> str: - """ - Return the name of the repo, e.g., in `//amp`. - """ - value = self._data["repo_info"]["repo_name"] - return f"//{value}" - - def get_github_repo_account(self) -> str: - """ - Return the account name of the repo on GitHub, e.g., `causify-ai`, - `gpsaggese`. - """ - value = self._data["repo_info"]["github_repo_account"] - return value - - def get_repo_short_name(self) -> str: - """ - Return the short name of the repo, e.g., `amp`. - """ - value = self._data["repo_info"]["repo_name"] - return value - - def get_repo_full_name(self) -> str: - """ - Return the full name of the repo, e.g., `causify-ai/amp`, - `gpsaggese/notes`. - """ - github_repo_account = self._data["repo_info"]["github_repo_account"] - repo_name = self._data["repo_info"]["repo_name"] - value = f"{github_repo_account}/{repo_name}" - return value - - def get_repo_full_name_with_hostname(self) -> str: - """ - Return the full name of the repo, e.g., `github.com/causify-ai/amp`. - """ - repo_full_name = self.get_repo_full_name() - host_name = self.get_host_name() - value = f"{host_name}/{repo_full_name}" - return value - - # TODO(gp): We should replace this with `get_full_repo_name()`, since - # the mapping is not needed. - def get_repo_map(self) -> Dict[str, str]: - """ - Return a mapping of short repo name -> long repo name. - - E.g., - ``` - {"amp": "causify-ai/amp"} - {"helpers": "causify-ai/helpers"} - ``` - """ - repo_name = self._data["repo_info"]["repo_name"] - github_repo_account = self._data["repo_info"]["github_repo_account"] - repo_map = {repo_name: f"{github_repo_account}/{repo_name}"} - return repo_map - - # TODO(gp): Is this needed? - def get_extra_amp_repo_sym_name(self) -> str: - github_repo_account = self._data["repo_info"]["github_repo_account"] - repo_name = self._data["repo_info"]["repo_name"] - if repo_name in ["orange", "lemonade"]: - # TODO(Grisha): it should return cmamp name, not the current - return f"{github_repo_account}/cmamp" - else: - return f"{github_repo_account}/{repo_name}" - - # TODO(gp): -> get_github_host_name - def get_host_name(self) -> str: - """ - Return the host name of the repo, e.g., `github.com`. - """ - value = self._data["repo_info"]["github_host_name"] - return value - - def get_invalid_words(self) -> List[str]: - """ - Return a list of words that are considered invalid in the repo. - """ - values = self._data["repo_info"]["invalid_words"] - if values is None: - invalid_words = [] - else: - invalid_words = values.split(",") - return invalid_words - - def get_issue_prefix(self) -> str: - """ - Return the prefix for the issue, e.g., `CmampTask`, `HelpersTask`. - """ - value = self._data["repo_info"]["issue_prefix"] - return value - - # docker_info - - def get_docker_base_image_name(self) -> str: - """ - Return a base name for docker image. - - E.g., `helpers`. - """ - value = self._data["docker_info"]["docker_image_name"] - return value - - def get_release_team(self) -> str: - """ - Return the release team name for docker image. - - E.g., `dev_system`. - """ - value = self._data["docker_info"].get("release_team") - return value - - # s3_bucket_info - - def get_unit_test_bucket_path(self) -> str: - """ - Return the path to the unit test bucket. - """ - value = self._data["s3_bucket_info"]["unit_test_bucket_name"] - return value - - def get_html_bucket_path(self) -> str: - """ - Return the path to the bucket where published HTMLs are stored. - """ - value = self._data["s3_bucket_info"]["html_bucket_name"] - return value - - def get_html_bucket_path_v2(self) -> str: - """ - Return the path to the bucket with published HTMLs. - - "v2" version allows for the published HTMLs to be browsed. - """ - html_bucket = self.get_html_bucket_path() - html_bucket_path = os.path.join(html_bucket, "v2") - return html_bucket_path - - def get_html_ip(self) -> str: - """ - Return the IP of the bucket where published HTMLs are stored. - """ - value = self._data["s3_bucket_info"]["html_ip"] - return value - - def get_html_ip_v2(self) -> str: - """ - Return the IP of the bucket with published HTMLs. - - "v2" version allows for the published HTMLs to be browsed. - """ - ip = self.get_html_ip() - ip_v2 = f"{ip}/v2" - return ip_v2 - - def get_html_dir_to_url_mapping(self) -> Dict[str, str]: - """ - Return a mapping between directories mapped on URLs. - - This is used when we have web servers serving files from - specific directories. - """ - dir_to_url = { - self.get_html_bucket_path(): self.get_html_ip(), - self.get_html_bucket_path_v2(): self.get_html_ip_v2(), - } - return dir_to_url - - def get_shared_configs_bucket_name(self, environment: str) -> str: - """ - Return the name of the shared configs bucket. - """ - if "shared_configs_bucket_name" not in self._data["s3_bucket_info"]: - return None - value: Dict[str, str] = self._data["s3_bucket_info"][ - "shared_configs_bucket_name" - ] - bucket_name = value.get(environment, None) - return bucket_name - - def get_dir_suffix(self) -> str: - """ - Return the suffix of the dev_scripts_{dir_suffix} dir for the repo. - - E.g., `helpers` for `dev_scripts_helpers` in //helpers repo. - """ - value = self._data["runnable_dir_info"]["dir_suffix"] - return value - - def use_helpers_as_nested_module(self) -> bool: - """ - Return whether the helpers repo is used as a nested module. - """ - value = bool( - self._data["runnable_dir_info"]["use_helpers_as_nested_module"] - ) - return value - - # TODO(gp): Add functions for container_registry_info. - - def get_container_registry_url(self, registry: str = "ecr") -> str: - """ - Return the URL of the container registry. - - :param registry: the name of the container registry (e.g., `ecr`, `ghcr`) - :return: the URL of the container registry - """ - return self._data["container_registry_info"][registry] - - # Utils. - - @staticmethod - def _get_repo_config_file() -> str: - """ - Return the absolute path to `repo_config.yml` that should be used. - - The `repo_config.yml` is determined based on an overriding env var or - based on the root of the Git path. - """ - env_var = "CSFY_REPO_CONFIG_PATH" - file_path = _get_env_var(env_var, abort_on_missing=False) - if file_path: - _LOG.warning( - "Using value '%s' for %s from env var", file_path, env_var - ) - else: - # client_root = _find_git_root() - # We cannot use git root here because the config file doesn't always - # reside in the root of the repo (e.g., it can be in subdir such as - # //cmamp/ck.infra for runnable dir). - file_path = _find_config_file("repo_config.yaml") - file_path = os.path.abspath(file_path) - _LOG.debug("Reading file_name='%s'", file_path) - # Check if path exists. - # We can't use helpers since it creates circular import. - if not os.path.exists(file_path): - raise FileNotFoundError(f"File '{file_path}' doesn't exist") - return file_path - - -_REPO_CONFIG = None - - -def get_repo_config() -> RepoConfig: - """ - Return the repo config object. - """ - global _REPO_CONFIG - if _REPO_CONFIG is None: - _REPO_CONFIG = RepoConfig.from_file() - return _REPO_CONFIG diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py deleted file mode 100644 index cd24fecf1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/stage_linked_file.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Import as: - -import helpers.stage_linked_file as hstlifil -""" - -import argparse -import logging -import os -import shutil -from typing import List - -_LOG = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) - - -def find_symlinks(dst_dir: str) -> List[str]: - """ - Find all symbolic links in the destination directory. - - :param dst_dir: Directory to search for symbolic links. - :return: List of paths to symbolic links. - """ - symlinks = [] - for root, _, files in os.walk(dst_dir): - for file in files: - file_path = os.path.join(root, file) - if os.path.islink(file_path): - symlinks.append(file_path) - return symlinks - - -def stage_links(symlinks: List[str]) -> None: - """ - Replace symbolic links with writable copies of the linked files. - - :param symlinks: List of symbolic links to replace. - """ - for link in symlinks: - # Resolve the original file the symlink points to. - target_file = os.readlink(link) - if not os.path.exists(target_file): - _LOG.warning( - f"Warning: Target file does not exist for link {link} -> {target_file}" - ) - continue - # Replace the symlink with a writable copy of the target file. - try: - os.remove(link) - # Copy file to the symlink location. - shutil.copy2(target_file, link) - # Make the file writable. - os.chmod(link, 0o644) - _LOG.info("Staged: %s -> %s", link, target_file) - except Exception as e: - _LOG.error("Error staging link %s: %s", link, e) - - -def main(): - parser = argparse.ArgumentParser( - description="Stage symbolic links for modification." - ) - parser.add_argument( - "--dst_dir", required=True, help="Destination directory." - ) - args = parser.parse_args() - symlinks = find_symlinks(args.dst_dir) - if not symlinks: - _LOG.info("No symbolic links found to stage.") - return - stage_links(symlinks) - _LOG.info("Staged %s files for modification.", len(symlinks)) - - -if __name__ == "__main__": - main() - -""" -Usage - - - python3 stage_linked_file.py --dst_dir /path/to/dst - -""" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py deleted file mode 100644 index 27344070d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/config.py +++ /dev/null @@ -1,30 +0,0 @@ -""" -Import as: - -import helpers.telegram_notify.config as htenocon -""" - -import getpass -import os -from typing import Tuple - -import helpers.hdbg as hdbg - -NOTIFY_JUPYTER_TOKEN = os.environ["CSFY_TELEGRAM_TOKEN"] - - -def get_info() -> Tuple[str, str]: - user = getpass.getuser() - # telegram_token is the token of your bot - # - You can use @NotifyJupyterBot, its token is - # '***REMOVED***' - # chat_id: To get it, start messaging with the bot. Then go to - # https://api.telegram.org/bot/getUpdates and get your chat id. - # (If you are using @NotifyJupyterBot, go to - # https://api.telegram.org/bot***REMOVED***/getUpdates ) - if user in ("saggese", "gsaggese", "root"): - telegram_token = NOTIFY_JUPYTER_TOKEN - chat_id = "967103049" - else: - hdbg.dfatal(f"User `{user}` is not in the config.py") - return telegram_token, chat_id diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py deleted file mode 100644 index e90c3968d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/get_chat_id.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python - -""" -Import as: - -import helpers.telegram_notify.get_chat_id as htngchid -""" - -import argparse -import json -import logging -from typing import Dict, cast - -import requests - -import helpers.telegram_notify.config as htenocon -import helpers.telegram_notify.telegram_notify as htnoteno - -_LOG = logging.getLogger(__name__) -_LOG.setLevel(logging.INFO) - - -def _get_updates_dict(token: str) -> dict: - updates_cont = requests.post( - f"https://api.telegram.org/bot{token}/getUpdates" - ).content - updates_dict = json.loads(updates_cont) - assert updates_dict["ok"], updates_dict - return cast(dict, updates_dict) - - -def _get_username_id(updates_dict: dict) -> Dict[str, str]: - return { - result["message"]["from"]["username"]: result["message"]["from"]["id"] - for result in updates_dict["result"] - } - - -def _get_chat_id_updates_dict(username: str, updates_dict: dict) -> str: - username_id = _get_username_id(updates_dict) - assert username in username_id.keys(), ( - "Either the username is wrong or you" - " have not sent a message to the bot yet" - ) - return username_id[username] - - -def send_chat_id(token: str, username: str) -> str: - updates_dict = _get_updates_dict(token) - chat_id = _get_chat_id_updates_dict(username, updates_dict) - htnoteno.TelegramNotify.send( - text=f"Your chat id is: {chat_id}", token=token, chat_id=chat_id - ) - return chat_id - - -def _main() -> None: - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument("--username", required=True, action="store", type=str) - parser.add_argument("--token", required=False, action="store", type=str) - args = parser.parse_args() - username = args.username - if args.token: - token_ = args.token - else: - _LOG.info("Using default token for NotifyJupyterBot.") - token_ = htenocon.NOTIFY_JUPYTER_TOKEN - chat_id_ = send_chat_id(token_, username) - print(f"Your chat id is: {chat_id_}") - - -if __name__ == "__main__": - _main() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py deleted file mode 100644 index 6e0e3eb16..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/telegram_notify/telegram_notify.py +++ /dev/null @@ -1,155 +0,0 @@ -""" -Import as: - -import helpers.telegram_notify.telegram_notify as htnoteno -""" - -import json -import logging -import os -import os.path -import re -import sys -from typing import Optional - -import requests - -# Alternative that works for both Python 2 and 3: -import requests.compat as rcompa - -import helpers.telegram_notify.config as htenocon - -_LOG = logging.getLogger(__name__) - - -def _get_launcher_name() -> str: - """ - Return the name of jupyter notebook or path to python file you are running. - """ - import ipykernel - - try: # Python 3 (see Edit2 below for why this may not work in Python 2) - import notebook.notebookapp as ihnb - except ImportError: # Python 2 - import warnings - - import IPython.utils.shimmodule as iush - - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=iush.ShimWarning) - import IPython.html.notebookapp as ihnb - launcher = sys.argv[0] - if os.path.basename(launcher) == "ipykernel_launcher.py": - match = re.search( - "kernel-(.*).json", ipykernel.connect.get_connection_file() - ) - if match is None: - return launcher - kernel_id = match.group(1) - servers = ihnb.list_running_servers() - for ss in servers: - response = requests.get( - rcompa.urljoin(ss["url"], "api/sessions"), # type: ignore - params={"token": ss.get("token", "")}, - ) - for nn in json.loads(response.text): - if nn["kernel"]["id"] == kernel_id: - relative_path = nn["notebook"]["path"] - return str(os.path.basename(relative_path)) - return launcher - - -# ############################################################################# -# TelegramNotebookNotify -# ############################################################################# - - -class TelegramNotebookNotify: - """ - Sends notifications. - """ - - def __init__(self) -> None: - self.launcher_name = _get_launcher_name() - self.token, self.chat_id = htenocon.get_info() - - @staticmethod - def send( - text: str, token: Optional[str], chat_id: Optional[str] - ) -> Optional[bytes]: - if chat_id is None or token is None: - _LOG.warning( - "Not sending notifications. To send notifications, both " - "`chat_id` and `token` need to be specified. Go to README.md" - "for more information." - ) - return None - payload = {"chat_id": chat_id, "text": text, "parse_mode": "HTML"} - return requests.post( - f"https://api.telegram.org/bot{token}/sendMessage", - data=payload, - ).content - - def notify(self, message: str) -> None: - msg = f"
{self.launcher_name}
: {message}" - self.send(msg, self.token, self.chat_id) - - -# ############################################################################# -# _RequestsHandler -# ############################################################################# - - -class _RequestsHandler(logging.Handler): - def emit(self, record: logging.LogRecord) -> bytes: # type: ignore - token, chat_id = htenocon.get_info() - log_entry = self.format(record) - payload = {"chat_id": chat_id, "text": log_entry, "parse_mode": "HTML"} - return requests.post( - f"https://api.telegram.org/bot{token}/sendMessage", - data=payload, - ).content - - -# ############################################################################# -# _LogFormatter -# ############################################################################# - - -class _LogFormatter(logging.Formatter): - def format(self, record: logging.LogRecord) -> str: - launcher_name = _get_launcher_name() - return f"
{launcher_name}
: {record.msg}" - - -def init_tglogger(log_level: int = logging.DEBUG) -> None: - """ - Send notifications using logging. - """ - _tg_log = logging.getLogger("telegram_notify") - _tg_log.setLevel(log_level) - handler = _RequestsHandler() - formatter = _LogFormatter() - handler.setFormatter(formatter) - _tg_log.handlers = [handler] - - -# ############################################################################# -# TelegramNotify -# ############################################################################# - - -class TelegramNotify: - """ - Send notifications. - """ - - def __init__(self) -> None: - self.token, self.chat_id = htenocon.get_info() - - def send(self, text: str) -> Optional[bytes]: - payload = {"chat_id": self.chat_id, "text": text, "parse_mode": "HTML"} - return requests.post( - f"https://api.telegram.org/bot{self.token}/sendMessage", - data=payload, - ).content diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt deleted file mode 100644 index 3135b8c8e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_disk_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt deleted file mode 100644 index 3135b8c8e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt deleted file mode 100644 index 3135b8c8e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableCheckOnlyIfPresent1.test_mem_disk_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(1, 2) kwargs={}) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt deleted file mode 100644 index 2f396a270..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_disk_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt deleted file mode 100644 index 2f396a270..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt deleted file mode 100644 index 2f396a270..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCacheEnableReadOnly1.test_mem_disk_cache1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -func(args=(4, 4) kwargs={}): trying to execute diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal1/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal2/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_equal3/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal1/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal2/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt deleted file mode 100644 index 00529190c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ - a b c -0 0 2 2 -1 3 4 5 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckDataFrame1.test_check_df_not_equal4/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt deleted file mode 100644 index 95d09f2b1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt deleted file mode 100644 index b68450ebb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt deleted file mode 100644 index b68450ebb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt deleted file mode 100644 index b68450ebb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestCheckString1.test_check_string_not_equal3/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world2 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt deleted file mode 100644 index efbdde823..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test1/output/test.txt +++ /dev/null @@ -1,31 +0,0 @@ -original shape=(7, 2) -Head: -{ - "0":{ - "col_1":1.0, - "col_2":1 - }, - "1":{ - "col_1":2.0, - "col_2":2 - }, - "2":{ - "col_1":3.0, - "col_2":3 - } -} -Tail: -{ - "4":{ - "col_1":5.0, - "col_2":5 - }, - "5":{ - "col_1":6.0, - "col_2":6 - }, - "6":{ - "col_1":7.0, - "col_2":7 - } -} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt deleted file mode 100644 index cab20a014..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test2/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"421470c7-7797-4a94-b584-eb83ff2de88a", - "col_2":1 - }, - "1":{ - "col_1":"22cde381-1782-43dc-8c7a-8712cbdf5ee1", - "col_2":2 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt deleted file mode 100644 index 4a6c9e821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test3/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"2020-01-01T00:00:00", - "col_2":1.0 - }, - "1":{ - "col_1":"2020-05-12T00:00:00", - "col_2":2.0 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt deleted file mode 100644 index 4a6c9e821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test4/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"2020-01-01T00:00:00", - "col_2":1.0 - }, - "1":{ - "col_1":"2020-05-12T00:00:00", - "col_2":2.0 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt deleted file mode 100644 index 3c50fde31..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json/output/test.txt +++ /dev/null @@ -1,31 +0,0 @@ -original shape=(7, 2) -Head: -{ - "0":{ - "col_1":1.0, - "col_2":1 - }, - "1":{ - "col_1":2.0, - "col_2":2 - }, - "2":{ - "col_1":3.0, - "col_2":3 - } -} -Tail: -{ - "4":{ - "col_1":5.0, - "col_2":5 - }, - "5":{ - "col_1":6.0, - "col_2":6 - }, - "6":{ - "col_1":7.0, - "col_2":7 - } -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt deleted file mode 100644 index 4a6c9e821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_datetime/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"2020-01-01T00:00:00", - "col_2":1.0 - }, - "1":{ - "col_1":"2020-05-12T00:00:00", - "col_2":2.0 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt deleted file mode 100644 index 4a6c9e821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_timestamp/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"2020-01-01T00:00:00", - "col_2":1.0 - }, - "1":{ - "col_1":"2020-05-12T00:00:00", - "col_2":2.0 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt deleted file mode 100644 index cab20a014..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDataframeToJson.test_dataframe_to_json_uuid/output/test.txt +++ /dev/null @@ -1,13 +0,0 @@ -original shape=(2, 2) -Head: -{ - "0":{ - "col_1":"421470c7-7797-4a94-b584-eb83ff2de88a", - "col_2":1 - }, - "1":{ - "col_1":"22cde381-1782-43dc-8c7a-8712cbdf5ee1", - "col_2":2 - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt deleted file mode 100644 index 9c8c2a07e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ -## docker_images_ls_repo: -## docker_login: -eval $(aws ecr get-login --profile am --no-include-email --region us-east-1) -docker image ls 665840871993.dkr.ecr.us-east-1.amazonaws.com diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt deleted file mode 100644 index e2df28b1f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_all/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -## docker_kill: all=True -docker ps -a -docker rm -f $(docker ps -a -q) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt deleted file mode 100644 index 44a4748dc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_kill_last/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -## docker_kill: all=False -docker ps -l -docker rm -f $(docker ps -l -q) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt deleted file mode 100644 index 613a41c2d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_docker_ps/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -docker ps --format='table {{.ID}}\t{{.Label "user"}}\t{{.Image}}\t{{.Command}}\t{{.RunningFor}}\t{{.Status}}\t{{.Ports}}\t{{.Label "com.docker.compose.service"}}' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt deleted file mode 100644 index 0c262d7ea..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_clean/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -report_memory_usage=False report_cpu_usage=False -## git_clean: dry_run=False -find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt deleted file mode 100644 index e8a2a8473..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_fetch_master/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -report_memory_usage=False report_cpu_usage=False -## git_fetch_master: -git fetch origin master:master diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt deleted file mode 100644 index 36f22574b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_git_pull/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ -report_memory_usage=False report_cpu_usage=False -## git_pull: -git pull --autostash -git submodule foreach 'git pull --autostash' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt deleted file mode 100644 index 06d15ab26..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks1.test_print_setup/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -## print_setup: -ECR_BASE_PATH=665840871993.dkr.ecr.us-east-1.amazonaws.com -BASE_IMAGE=amp diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt deleted file mode 100644 index 265ef5fcf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('eval $(aws ecr get-login --no-include-email --region us-east-1)') -call('docker image ls 665840871993.dkr.ecr.us-east-1.amazonaws.com') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt deleted file mode 100644 index 202366437..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_all/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('docker ps -a') -call('docker rm -f $(docker ps -a -q)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt deleted file mode 100644 index 4ee19d730..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_kill_last/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('docker ps -l') -call('docker rm -f $(docker ps -l -q)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt deleted file mode 100644 index c8b46747d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_login/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('eval $(aws ecr get-login --profile am --no-include-email --region us-east-1)') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt deleted file mode 100644 index 614c9318f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_ps/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('docker ps --format=\'table {{.ID}}\\t{{.Label "user"}}\\t{{.Image}}\\t{{.Command}}\\t{{.RunningFor}}\\t{{.Status}}\\t{{.Ports}}\\t{{.Label "com.docker.compose.service"}}\'') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt deleted file mode 100644 index 029e8a64f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_pull/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('eval $(aws ecr get-login --no-include-email --region us-east-1)') -call('docker pull 665840871993.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev', pty=True) -call('docker pull 665840871993.dkr.ecr.us-east-1.amazonaws.com/helpers:prod', pty=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt deleted file mode 100644 index 7d238de7e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_docker_stats/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call("docker stats --no-stream --format='table {{.ID}}\\t{{.Name}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}\\t{{.PIDs}}'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_find_test_class1/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt deleted file mode 100644 index dc7c8a671..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('gh pr create --repo alphamatic/amp --draft --title "AmpTask1310_Implement_RH1E" --body ""') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt deleted file mode 100644 index 1aa1034a0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('gh pr create --repo github.com/alphamatic/amp --draft --title "test" --body "\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt deleted file mode 100644 index d93250129..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('gh pr create --repo github.com/alphamatic/amp --draft --title "test" --body "hello_world\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt deleted file mode 100644 index a7010f356..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_create_pr3/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('gh pr create --repo github.com/alphamatic/amp --title "test" --body "\n\n#1"') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_issue_title/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_list/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt deleted file mode 100644 index 7e38db5a7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_gh_workflow_run/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('gh workflow run fast_tests.yml --ref AmpTask1310_Implement_RH1E') -call('gh workflow run slow_tests.yml --ref AmpTask1310_Implement_RH1E') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt deleted file mode 100644 index e79742c64..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git pull --autostash') -call('git checkout -b test') -call('git push --set-upstream origin test') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt deleted file mode 100644 index 25c178bb7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create1/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git pull --autostash --rebase') -call('git checkout -b AmpTask123_test') -call('git push --set-upstream origin AmpTask123_test') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt deleted file mode 100644 index 72eb80ddc..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_create2/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git pull --autostash --rebase') -call('git checkout -b CmampTask1_fix_amp_tmux_session_script') -call('git push --set-upstream origin CmampTask1_fix_amp_tmux_session_script') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_branch_files/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt deleted file mode 100644 index b7c58a3d2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git clean -fd >/dev/null 2>&1') -call("git submodule foreach 'git clean -fd >/dev/null 2>&1'") -call("find . -name '*\\.pyc' -o -name '*\\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' -o -name '.*_cache' -o -name 'htmlcov' | sort | xargs rm -rf") \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt deleted file mode 100644 index b7c58a3d2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_clean2/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git clean -fd >/dev/null 2>&1') -call("git submodule foreach 'git clean -fd >/dev/null 2>&1'") -call("find . -name '*\\.pyc' -o -name '*\\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' -o -name '.*_cache' -o -name 'htmlcov' | sort | xargs rm -rf") \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt deleted file mode 100644 index 0241acc2e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_fetch_master/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('git fetch origin master:master') -call("git submodule foreach 'git fetch origin master:master'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt deleted file mode 100644 index d9d3fc510..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_merge_master/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -call('git fetch origin master:master') -call("git submodule foreach 'git fetch origin master:master'") -call('git merge master') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt deleted file mode 100644 index 78883f1ba..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_git_pull/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -call('git pull --autostash') -call("git submodule foreach 'git pull --autostash'") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt deleted file mode 100644 index 70a06c388..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('./linters/base.py --files ./helpers/lib_tasks.py ./helpers/test/TestDryRunTasks2.test_git_branch_create/output/test.txt ./helpers/test/TestDryRunTasks2.test_git_merge_master/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint1/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint2/output/test.txt ./helpers/test/TestDryRunTasks2.test_lint3/output/test.txt ./helpers/test/test_lib_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt deleted file mode 100644 index 28b088e72..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('./linters/base.py --files core/dataflow/builders.py core/dataflow/core.py core/dataflow/dataflow_design.md core/dataflow/runners.py core/dataflow/visualization.py core/test/test_core.py dev_scripts/client_setup/build.sh devops/docker_build/install_packages.sh devops/docker_build/install_requirements.sh devops/docker_build/poetry.lock devops/docker_build/pyproject.toml documentation/general/workflows.txt helpers/datetime_.py helpers/git.py helpers/lib_tasks.py helpers/test/TestDryRunTasks1.test_docker_images_ls_repo/output/test.txt helpers/test/TestDryRunTasks1.test_docker_kill_all/output/test.txt helpers/test/TestDryRunTasks1.test_docker_kill_last/output/test.txt helpers/test/TestDryRunTasks1.test_docker_ps/output/test.txt helpers/test/TestDryRunTasks1.test_docker_stats/output/test.txt helpers/test/TestDryRunTasks1.test_git_clean/output/test.txt helpers/test/TestDryRunTasks1.test_git_pull/output/test.txt helpers/test/TestDryRunTasks1.test_git_pull_master/output/test.txt helpers/test/TestDryRunTasks1.test_print_setup/output/test.txt helpers/test/TestDryRunTasks2.test_docker_images_ls_repo/output/test.txt helpers/test/TestDryRunTasks2.test_docker_kill_all/output/test.txt helpers/test/TestDryRunTasks2.test_docker_kill_last/output/test.txt helpers/test/TestDryRunTasks2.test_docker_login/output/test.txt helpers/test/TestDryRunTasks2.test_docker_ps/output/test.txt helpers/test/TestDryRunTasks2.test_docker_pull/output/test.txt helpers/test/TestDryRunTasks2.test_docker_stats/output/test.txt helpers/test/TestDryRunTasks2.test_gh_create_pr/output/test.txt helpers/test/TestDryRunTasks2.test_gh_issue_title/output/test.txt helpers/test/TestDryRunTasks2.test_gh_workflow_list/output/test.txt helpers/test/TestDryRunTasks2.test_gh_workflow_run/output/test.txt helpers/test/TestDryRunTasks2.test_git_branch_files/output/test.txt helpers/test/TestDryRunTasks2.test_git_clean/output/test.txt helpers/test/TestDryRunTasks2.test_git_clean2/output/test.txt helpers/test/TestDryRunTasks2.test_git_pull/output/test.txt helpers/test/TestDryRunTasks2.test_git_pull_master/output/test.txt helpers/test/TestDryRunTasks2.test_print_setup/output/test.txt helpers/test/test_cache.py helpers/test/test_lib_tasks.py im/kibot/data/load/kibot_s3_data_loader.py im/kibot/data/load/test/test_s3_data_loader.py tasks.py test/test_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt deleted file mode 100644 index 9fac068a3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_lint3/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -call('./linters/base.py --files /app/amp/helpers/test/test_lib_tasks.py') diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestDryRunTasks2.test_print_setup/output/test.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt deleted file mode 100644 index ac6627a2e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/input/test.txt +++ /dev/null @@ -1,101 +0,0 @@ -,Name,Frequency,Country,Unit,Start Date,End Date,Commodity,Contracts,Business Category,is_alive,source_code,dataset_code,series_code,original_name,extracted_frequency,is_downloaded,WIND Commodity,Update,id_is_broken -0,Coal and coke CO2 emissions – Aruba – million metric tonnes carbon dioxide,Annual,Aruba,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ABW-MMTCD.A,"Coal and coke CO2 emissions, Aruba, Annual — million metric tonnes carbon dioxide",Annual,success,,, -1,Coal and coke CO2 emissions – Albania – million metric tonnes carbon dioxide,Annual,Albania,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ALB-MMTCD.A,"Coal and coke CO2 emissions, Albania, Annual — million metric tonnes carbon dioxide",Annual,success,,, -2,Coal and coke CO2 emissions – United Arab Emirates – million metric tonnes carbon dioxide,Annual,United Arab Emirates,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARE-MMTCD.A,"Coal and coke CO2 emissions, United Arab Emirates, Annual — million metric tonnes carbon dioxide",Annual,success,,, -3,Coal and coke CO2 emissions – Argentina – million metric tonnes carbon dioxide,Annual,Argentina,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARG-MMTCD.A,"Coal and coke CO2 emissions, Argentina, Annual — million metric tonnes carbon dioxide",Annual,success,,, -4,Coal and coke CO2 emissions – Armenia – million metric tonnes carbon dioxide,Annual,Armenia,"kw, in millions",,,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",,True,EIA,INTL,1-8-ARM-MMTCD.A,"Coal and coke CO2 emissions, Armenia, Annual — million metric tonnes carbon dioxide",Annual,success,,, -5,Germany: Term Structure of Interest Rate on Listed Federal Securities: 1,Daily,Germany,%,1997-08-07,2019-12-19,,,Upstream,True,WIND,Deutsche Bundesbank,G0008063,,,success,Gold,2019-12-20,False -6,Germany: Term Structure of Interest Rate on Listed Federal Securities: 10,Daily,Germany,%,1997-08-07,2019-12-19,,,Upstream,True,WIND,Deutsche Bundesbank,SG000S6E,,,not_attempted,Gold,2019-12-20,True -7,France: Treasury Bills Reference Rate: 1Y,Daily,France,%,1989-01-03,2019-12-19,,,Upstream,True,WIND,Banque de France,G0008146,,,success,Gold,2019-12-20,False -8,France: Treasury Bills Reference Rate: 10Y,Daily,France,%,1987-01-02,2019-12-19,,,Upstream,True,WIND,Banque de France,G1400003,,,success,Gold,2019-12-20,False -9,Spain: Government Securities Yields: 12M,Daily,Spain,%,1987-07-01,2019-12-19,,,Upstream,True,WIND,Bank of Spain,G2700068,,,success,Gold,2019-12-20,False -10,Spain: Government Securities Yields: 10Y,Daily,Spain,%,1989-07-18,2019-12-19,,,Upstream,True,WIND,Bank of Spain,G2700075,,,success,Gold,2019-12-20,False -11,Italy: Government Securities Yields: 3Y_,Daily,Italy,%,1989-07-24,2019-12-19,,,Upstream,True,WIND,Bank of Italy,G1700018,,,success,Gold,2019-12-20,False -12,Italy: Government Securities Yields: 10,Daily,Italy,%,1991-03-05,2019-12-19,,,Upstream,True,WIND,Bank of Italy,G1700020,,,success,Gold,2019-12-20,False -13,Futures Closing Price (Active Contract): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M0066358,,,success,Gold,2019-12-20,False -14,Futures Settlement Price (Continuous 3M): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0068142,,,success,Gold,2019-12-20,False -15,Futures Closing Price (Continuous): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0147027,,,success,Gold,2019-12-20,False -16,Futures Closing Price (Continuous 3M): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0031868,,,success,Gold,2019-12-20,False -17,Futures Settlement Price (Active Contract): Gold,Daily,China,yuan/g,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0181376,,,success,Gold,2019-12-20,False -18,Futures Trading Volume: Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6409,,,not_attempted,Gold,2019-12-20,True -19,Futures Turnover: Gold,Daily,China,"CNY, in 10,000s",2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6410,,,not_attempted,Gold,2019-12-20,True -20,Futures Position: Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00G6411,,,not_attempted,Gold,2019-12-20,True -21,Futures Trading Volume (Active Contract): Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M0096581,,,success,Gold,2019-12-20,False -22,Futures Position (Active Contract): Gold,Daily,China,lots,2008-01-09,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,M00096614,,,not_attempted,Gold,2019-12-20,True -23,Closing Stock on Warrant: Gold,Daily,China,kg,2008-01-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0049497,,,success,Gold,2019-12-20,False -24,Duplicate) Closing Stock on Warrant: Gold: Total,Daily,China,kg,2008-01-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Futures Exchange,S0049505,,,success,Gold,2019-12-20,False -25,Futures Closing Price (Continuous): COMEX Gold,Daily,United States,USD/ounce,1975-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0069669,,,success,Gold,2019-12-20,False -26,Futures Closing Price (Active Contract}: COMEX Gold),Daily,United States,USD/ounce,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0180903,,,success,Gold,2019-12-20,False -27,Futures Closing Price (Continuous): COMEX Mini Gold,Daily,United States,USD/ounce,2010-12-06,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S0147014,,,success,Gold,2019-12-19,False -28,Futures Settlement Price (Active Contract}: COMEX Gold),Daily,United States,USD/ounce,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G0945,,,not_attempted,Gold,2019-12-20,True -29,Futures Closing Price (Active Contract: COMEX Mini Gold),Daily,United States,USD/ounce,2013-01-03,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G0906,,,not_attempted,Gold,2019-12-20,True -30,Futures Settlement Price (Active Contract}: COMEX Mini Gold),Daily,United States,USD/ounce,2013-01-03,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,S01G094E,,,not_attempted,Gold,2019-12-20,True -31,Futures Trading Volume (Active Contract): COMEX Gold,Daily,United States,lots,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,M00096642,,,not_attempted,Gold,2019-12-20,True -32,Futures Position (Active Contract): COMEX Gold,Daily,United States,lots,2010-04-13,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,COMEX,M0096645,,,success,Gold,2019-12-20,False -33,COMEX: Silver: Inventory,Daily,United States,ozt,1992-09-01,2019-12-19,Silver,"COMEX:6Q,COMEX:QI,COMEX:SI,COMEX:SIL,COMEX:SIT,COMEX:SSP,COMEX:SV,COMEX:XY,COMEX:YV,DGCX:DS,ICEUS:YI,ICEUS:ZI,IFUS:HIO,IFUS:YI,IFUS:ZI,LME:AG,MCX:SILVER,SHFE:AG,TCE:12",Midstream,True,WIND,CME,S0114145,,,success,Gold,2019-12-20,False -34,SGE Gold: Closing Price: Au9995,Daily,China,yuan/g,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035818,,,success,Gold,2019-12-20,False -35,SGE Gold: Closing Price: Au9999,Daily,China,yuan/g,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035819,,,success,Gold,2019-12-20,False -36,SGE Gold: Closing Price: Au100G,Daily,China,yuan/g,2006-12-25,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035820,,,success,Gold,2019-12-20,False -37,SGE Gold: Closing Price: AuT+D,Daily,China,yuan/g,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035821,,,success,Gold,2019-12-20,False -38,SGE Gold: Settlement Price: Au (T+D),Daily,China,yuan/g,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0202645,,,success,Gold,2019-12-20,False -39,SGE Gold: Volume: Au9995,Daily,China,kg,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035824,,,success,Gold,2019-12-20,False -40,SGE Gold: Volume: Au9999,Daily,China,kg,2002-10-30,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035825,,,success,Gold,2019-12-20,False -41,SGE Gold: Volume: Au100g,Daily,China,kg,2006-12-25,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035826,,,success,Gold,2019-12-20,False -42,SGE Gold: Volume: AuT+D,Daily,China,kg,2004-09-27,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0035827,,,success,Gold,2019-12-20,False -43,SGE Gold: Position: Au (T+D),Daily,China,kg,2008-08-11,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S5806075,,,success,Gold,2019-12-20,False -44,SGE Gold: Deferred Payment of Direction: Au (T+D),Daily,China,,2008-08-22,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0182163,,,success,Gold,2019-12-20,False -45,SGE Gold: Delivery Volume: Au (T+D),Daily,China,kg,2008-08-22,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Shanghai Gold Exchange,S0181748,,,success,Gold,2019-12-20,False -46,Loco London Gold: In USD,Daily,United Kingdom,USD/ounce,1968-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031645,,,success,Gold,2019-12-20,False -47,Loco Londen Gold: In EUR,Daily,United Kingdom,EUR/ounce,1999-01-04,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031646,,,success,Gold,2019-12-20,False -48,Loco Londen Gold: In GBP,Daily,United Kingdom,GBP/ounce,1968-01-02,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S0031647,,,success,Gold,2019-12-20,False -49,Closing Price: Paper Gold: Bank of China,Daily,China,yuan/g,2011-01-20,2014-10-31,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Company Website,S5806366,,,not_attempted,Gold,2014-11-03,False -50,Closing Price: Paper Gold: China Construction Bank,Daily,China,yuan/g,2011-01-20,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Company Website,S5806367,,,success,Gold,2019-12-20,False -51,Closing Price: Paper Gold: Industrial and Commercial Bank of China,Daily,China,yuan/g,2011-01-20,2019-12-18,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Company Website,S5806365,,,success,Gold,2019-12-20,False -52,Price: Gold: 99.95,Daily,China,yuan/g,2007-01-04,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S5801701,,,success,Gold,2019-12-20,False -53,Price: Gold: 99.99,Daily,China,yuan/g,2007-01-04,2019-12-20,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,According to the Press Finishing,S5801702,,,success,Gold,2019-12-20,False -54,SPDR Gold Shares: Total Net Asset Value Qunces in the Trust,Daily,United States,ozt,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105520,,,success,Gold,2019-12-20,False -55,SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United States,tons,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105521,,,success,Gold,2019-12-20,False -56,SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United States,USD,2004-11-18,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,spdrgoldshares.com,S0105522,,,success,Gold,2019-12-20,False -57,iShares: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United States,USD,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807688,,,success,Gold,2019-12-20,False -58,iShares: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United States,ozt,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807690,,,success,Gold,2019-12-20,False -59,iShares: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United States,tons,2005-01-28,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807691,,,success,Gold,2019-12-20,False -60,GBS: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,USD,2004-04-01,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807692,,,success,Gold,2019-12-20,False -61,GBS: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807693,,,success,Gold,2019-12-20,False -62,GBS: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807694,,,success,Gold,2019-12-20,False -63,PHAU: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,USD,2007-04-25,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807695,,,success,Gold,2019-12-20,False -64,PHAU: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-04-24,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807696,,,success,Gold,2019-12-20,False -65,PHAU: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-04-24,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807697,,,success,Gold,2019-12-20,False -66,SGBS: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,Switzerland,USD,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808213,,,success,Gold,2019-12-20,False -67,SGBS: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,Switzerland,ozt,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808214,,,success,Gold,2019-12-20,False -68,SGBS: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,Switzerland,tons,2009-12-17,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5808215,,,success,Gold,2019-12-20,False -69,GOLD: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,United Kingdom,AUD,2004-01-09,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807698,,,success,Gold,2019-12-20,False -70,GOLD: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,United Kingdom,ozt,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807699,,,success,Gold,2019-12-20,False -71,GOLD: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,United Kingdom,tons,2007-12-31,2019-12-19,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,True,WIND,Wind,S5807700,,,success,Gold,2019-12-20,False -72,SGOL: SPDR Gold Shares: Total Net Asset Value in the Trust,Daily,Switzerland,USD,2009-09-04,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807701,,,success,Gold,2019-10-31,False -73,SGOL: SPDR Gold Shares: Total Net Asset Value oz.t in the Trust,Daily,Switzerland,ozt,2009-09-02,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807702,,,success,Gold,2019-10-31,False -74,SGOL: SPDR Gold Shares: Total Net Asset Value Tonnes in the Trust,Daily,Switzerland,tons,2009-09-02,2019-10-30,Gold,"CBT:ZG,COMEX:8Q,COMEX:GC,COMEX:GCD,COMEX:GCK,COMEX:GCT,COMEX:GSP,COMEX:GSR,COMEX:MGC,COMEX:MGT,COMEX:QO,COMEX:QS,COMEX:XK,COMEX:YG,COMEX:oz troyUSD,DGCX:DG,ICEUS:YG,IFUS:AUD,IFUS:GDF,IFUS:YG,IFUS:ZG,LME:AU,MCX:GOLD,RTS:GOLD,SHFE:AU,TAIFEX:TGF,TCE:11,TCE:16",Midstream,False,WIND,Wind,S5807703,,,success,Gold,2019-10-31,False -75,Price: Chinese Major Ports FOB: Silicon: 98.5,Daily,United Kingdom,USD/ton,2006-06-02,2014-05-30,,,Upstream,False,WIND,According to the Press Finishing,S0149035,,,not_attempted,,2014-06-03,False -76,"Price: Silicon Powder: -200 Mesh,-300 Mesh: Shanghai-made",Daily,China,yuan/kg,2005-01-04,2019-12-24,,,Upstream,True,WIND,According to the Press Finishing,S5801759,,,success,,2019-12-24,False -77,Market Price: Secondary Metallurgical Coke: National,Daily,China,yuan/ton,2013-12-31,2019-12-20,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,National Bureau of Statistics of China,S5914487,,,success,,2019-12-24,False -78,"Ex-factory Price (Tax-inclusive): Metallurgical Coke Grade 3 (A15%,0.6%): Yunng",Daily,China,yuan/ton,2004-10-22,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S0033511,,,success,,2019-12-24,False -79,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Rizhao,Daily,China,yuan/ton,2012-03-08,2015-02-11,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,False,WIND,Wind,S5118432,,,success,,2015-02-11,False -80,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Zibo,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118277,,,success,,2019-12-24,False -81,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Yinchuan,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118276,,,success,,2019-12-24,False -82,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Xinjiang County,Daily,China,yuan/ton,2012-03-05,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118275,,,success,,2019-12-24,False -83,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Xuzhou,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118273,,,success,,2019-12-24,False -84,Exit Price (Tax-inclusive): Secondary Metallurgical Coke: Tianjin,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118270,,,success,,2019-12-24,False -85,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Shuangyashan,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118267,,,success,,2019-12-24,False -86,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Shijiazhuang,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118266,,,success,,2019-12-24,False -87,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Shanghai,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118264,,,success,,2019-12-24,False -88,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Qigihar,Daily,China,yuan/ton,2011-09-09,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118263,,,success,,2019-12-24,False -89,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Panzhihua,Daily,China,yuan/ton,2010-04-13,2019-12-06,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118260,,,success,,2019-12-06,False -90,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Mudanjlang,Daily,China,yuan/ton,2011-09-01,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118259,,,success,,2019-12-24,False -91,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Lvliang,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118258,,,success,,2019-12-24,False -92,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Linyt,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118255,,,success,,2019-12-24,False -93,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Linfen,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118254,,,success,,2019-12-24,False -94,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Jinzhong,Daily,China,yuan/ton,2011-09-16,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118252,,,success,,2019-12-24,False -95,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Hancheng,Daily,China,yuan/ton,2012-03-06,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118247,,,success,,2019-12-24,False -96,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Fukang,Daily,China,yuan/ton,2012-03-05,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118246,,,success,,2019-12-24,False -97,Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Fushun,Daily,China,yuan/ton,2011-09-09,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118245,,,success,,2019-12-24,False -98,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Ordos,Daily,China,yuan/ton,2010-04-20,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118243,,,success,,2019-12-24,False -99,Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Anyang,Daily,China,yuan/ton,2010-04-13,2019-12-24,Coal,"CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF",Upstream,True,WIND,Wind,S5118241,,,success,,2019-12-24,False diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt deleted file mode 100644 index 2de8022c8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestFilterDataByMethod.test1/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - Name Frequency Country Unit Start Date End Date Commodity Contracts Business Category is_alive source_code dataset_code series_code original_name extracted_frequency is_downloaded WIND Commodity Update id_is_broken -5 Germany: Term Structure of Interest Rate on Listed Federal Securities: 1 Daily Germany % 1997-08-07 2019-12-19 NaN NaN Upstream True WIND Deutsche Bundesbank G0008063 NaN NaN success Gold 2019-12-20 False -7 France: Treasury Bills Reference Rate: 1Y Daily France % 1989-01-03 2019-12-19 NaN NaN Upstream True WIND Banque de France G0008146 NaN NaN success Gold 2019-12-20 False -8 France: Treasury Bills Reference Rate: 10Y Daily France % 1987-01-02 2019-12-19 NaN NaN Upstream True WIND Banque de France G1400003 NaN NaN success Gold 2019-12-20 False -... -97 Price to Factory (Tax-inclusive): Secondary Metallurgical Coke: Fushun Daily China yuan/ton 2011-09-09 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118245 NaN NaN success NaN 2019-12-24 False -98 Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Ordos Daily China yuan/ton 2010-04-20 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118243 NaN NaN success NaN 2019-12-24 False -99 Ex-factory Price (Tax-inclusive): Secondary Metallurgical Coke: Anyang Daily China yuan/ton 2010-04-13 2019-12-24 Coal CLEAR:ALW,CLEAR:MTF,CLEAR:QP,CLEAR:QX,DCE:JM,ICEEUROPE:AFR,ICEEUROPE:ATW,ICEEUROPE:ILB,ICEEUROPE:NCF,ICEEUROPE:UCA,ICEEUROPE:UCP,ICEEUROPE:UCX,IFEU:AFO,IFEU:AFR,IFEU:ATD,IFEU:ATH,IFEU:ATW,IFEU:CRF,IFEU:GCF,IFEU:GNA,IFEU:GNO,IFEU:I42,IFEU:M42,IFEU:NCC,IFEU:NCF,IFEU:NWC,IFEU:NWD,IFEU:NWE,IFEU:NWF,IFEU:NWG,IFEU:RBC,IFEU:RBD,IFEU:RBE,IFEU:RBF,IFEU:RBG,IFEU:RCA,IFEU:RCO,IFEU:RDE,IFEU:RDF,IFEU:RDG,NYMEX:ACM,NYMEX:ACT,NYMEX:ALW,NYMEX:ICI,NYMEX:M5F,NYMEX:MFF,NYMEX:MTF,NYMEX:QLD,NYMEX:QP,NYMEX:QXB,NYMEX:SSI,SGX:ACF Upstream True WIND Wind S5118241 NaN NaN success NaN 2019-12-24 False diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt deleted file mode 100644 index 8c6bdf3cf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt +++ /dev/null @@ -1,18 +0,0 @@ -def func1(): - """ - First function. - - ``` - foo - ``` - """ - - -def func2(): - """ - Second function. - - ``` - foo - ``` - """ \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt deleted file mode 100644 index 3f4d616bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test1/output/test.txt +++ /dev/null @@ -1,52 +0,0 @@ -# Test created for __main__.plbck_sum. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestPlbckSum(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 0 - b = 1 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test2(self) -> None: - # Define input variables. - a = 1 - b = 2 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test3(self) -> None: - # Define input variables. - a = 2 - b = 3 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test4(self) -> None: - # Define input variables. - a = 3 - b = 4 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt deleted file mode 100644 index 3f4d616bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test2/output/test.txt +++ /dev/null @@ -1,52 +0,0 @@ -# Test created for __main__.plbck_sum. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestPlbckSum(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 0 - b = 1 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test2(self) -> None: - # Define input variables. - a = 1 - b = 2 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test3(self) -> None: - # Define input variables. - a = 2 - b = 3 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test4(self) -> None: - # Define input variables. - a = 3 - b = 4 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt deleted file mode 100644 index 1a2ceab1a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackFileMode1.test3/output/test.txt +++ /dev/null @@ -1,30 +0,0 @@ -# Test created for __main__.plbck_sum. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestPlbckSum(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 0 - b = 1 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) - -class TestPlbckSum(hunitest.TestCase): - def test2(self) -> None: - # Define input variables. - a = 1 - b = 2 - # Call function to test. - actual = plbck_sum(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt deleted file mode 100644 index b5439e39d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test1/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 3 - b = 2 - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = 5 - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt deleted file mode 100644 index 6631e9e27..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test10/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = {"1": 2} - b = {"3": 4} - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt deleted file mode 100644 index 80e85048a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test11/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}) - b = pd.DataFrame.from_dict({'Price': [1, 1, 1, 1]}) - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = hpandas.df_to_str(actual, num_rows=None) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt deleted file mode 100644 index 1d91a4a88..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test12/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = {"1": ["a", 2]} - b = {"3": pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}), "4": {"5": 6}} - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = {"1": ["a", 2], "3": pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}), "4": {"5": 6}} - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt deleted file mode 100644 index badcab6f7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test13/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = pd.Series(data=[10, 20, 15], index=RangeIndex(start=0, stop=3, step=1), name="N Numbers", dtype=int64) - b = pd.Series(data=[10.0, 0.0, 5.5], index=RangeIndex(start=0, stop=3, step=1), name="Z Numbers", dtype=float64) - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = hpandas.df_to_str(actual, num_rows=None) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt deleted file mode 100644 index 6b92491e1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test14/output/test.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = pd.Series(data=[10, 20, 15], index=RangeIndex(start=0, stop=3, step=1), name="N Numbers", dtype=int64) - b = pd.Series(data=[10.0, 0.0, 5.5], index=RangeIndex(start=0, stop=3, step=1), name="Z Numbers", dtype=float64) - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = pd.Series(data=[20.0, 20.0, 20.5], index=RangeIndex(start=0, stop=3, step=1), name="None", dtype=float64) - expected = jsonpickle.decode(expected) - actual = hpandas.df_to_str(actual, num_rows=None) - expected = hpandas.df_to_str(expected, num_rows=None) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt deleted file mode 100644 index 403295821..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test15/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = cconfig.Config.from_python("Config({'meta': 'meta value 1', 'list': [1, 2]})") - b = cconfig.Config.from_python("Config({'meta': 'meta value 2'})") - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt deleted file mode 100644 index 5a0f6c938..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test16/output/test.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = cconfig.Config.from_python("Config({'meta': 'meta value 1', 'list': [1, 2]})") - b = cconfig.Config.from_python("Config({'meta': 'meta value 2'})") - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = cconfig.Config.from_python("Config({'meta': 'meta value 2', 'list': [1, 2]})") - expected = jsonpickle.decode(expected) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt deleted file mode 100644 index 1884fe5bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test17/output/test.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string_none. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckStringNone(hunitest.TestCase): - def test1(self) -> None: - # Call function to test. - actual = get_result_check_string_none() - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt deleted file mode 100644 index 710587bb8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test18/output/test.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal_none. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqualNone(hunitest.TestCase): - def test1(self) -> None: - # Call function to test. - actual = get_result_assert_equal_none() - # Define expected output. - expected = "Some string." - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt deleted file mode 100644 index 40dc558c5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test2/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = "test" - b = "case" - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = "testcase" - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt deleted file mode 100644 index 68b93d84d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test3/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = [1, 2, 3] - b = [4, 5, 6] - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = [1, 2, 3, 4, 5, 6] - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt deleted file mode 100644 index faa6861c2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test4/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = {"1": 2} - b = {"3": 4} - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = {"1": 2, "3": 4} - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt deleted file mode 100644 index abfa197bd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test5/output/test.txt +++ /dev/null @@ -1,22 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = pd.DataFrame.from_dict({'Price': [700, 250, 800, 1200]}) - b = pd.DataFrame.from_dict({'Price': [1, 1, 1, 1]}) - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = pd.DataFrame.from_dict({'Price': [701, 251, 801, 1201]}) - actual = hpandas.df_to_str(actual, num_rows=None) - expected = hpandas.df_to_str(expected, num_rows=None) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt deleted file mode 100644 index f7fa7c8c9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test6/output/test.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_assert_equal. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultAssertEqual(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = r'{"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B98BAQ=="]]}' - a = jsonpickle.decode(a) - b = r'{"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B9wBAQ=="]]}' - b = jsonpickle.decode(b) - # Call function to test. - actual = get_result_assert_equal(a=a, b=b) - # Define expected output. - expected = r'{"py/reduce": [{"py/type": "datetime.timedelta"}, {"py/tuple": [1096, 0, 0]}]}' - expected = jsonpickle.decode(expected) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt deleted file mode 100644 index 25588d901..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test7/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = 3 - b = 2 - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt deleted file mode 100644 index cd51f2ced..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test8/output/test.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = "test" - b = "case" - # Call function to test. - actual = get_result_check_string(a=a, b=b) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt deleted file mode 100644 index c42805818..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestPlaybackInputOutput1.test9/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Test created for helpers.test.test_hplayback.get_result_check_string. - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest -import jsonpickle -import pandas as pd -import config_root.config as cconfi - - -class TestGetResultCheckString(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - a = [1, 2, 3] - b = [4, 5, 6] - # Call function to test. - actual = get_result_check_string(a=a, b=b) - actual = str(actual) - # Check output. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt deleted file mode 100644 index 8547d2955..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_copy_rows_with_copy_from1/output/test.txt +++ /dev/null @@ -1,30 +0,0 @@ -original shape=(5, 3) -Head: -{ - "0":{ - "id":1, - "column_1":1000.0, - "column_2":"test_string_1" - }, - "1":{ - "id":2, - "column_1":1001.0, - "column_2":"test_string_2" - }, - "2":{ - "id":3, - "column_1":1002.0, - "column_2":"test_string_3" - }, - "3":{ - "id":4, - "column_1":1003.0, - "column_2":"test_string_4" - }, - "4":{ - "id":5, - "column_1":1004.0, - "column_2":"test_string_5" - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt deleted file mode 100644 index 4f0f96902..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_create_insert_query/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -INSERT INTO test_table(id,column_1,column_2) VALUES %s diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt deleted file mode 100644 index c5faf0358..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal1/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -original shape=(3, 3) -Head: -{ - "0":{ - "id":1, - "column_1":1000.0, - "column_2":"test_string_1" - }, - "1":{ - "id":4, - "column_1":1002.0, - "column_2":"test_string_3" - }, - "2":{ - "id":5, - "column_1":1001.0, - "column_2":"test_string_2" - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt deleted file mode 100644 index 8547d2955..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_duplicate_removal2/output/test.txt +++ /dev/null @@ -1,30 +0,0 @@ -original shape=(5, 3) -Head: -{ - "0":{ - "id":1, - "column_1":1000.0, - "column_2":"test_string_1" - }, - "1":{ - "id":2, - "column_1":1001.0, - "column_2":"test_string_2" - }, - "2":{ - "id":3, - "column_1":1002.0, - "column_2":"test_string_3" - }, - "3":{ - "id":4, - "column_1":1003.0, - "column_2":"test_string_4" - }, - "4":{ - "id":5, - "column_1":1004.0, - "column_2":"test_string_5" - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt deleted file mode 100644 index 8547d2955..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestSql1.test_execute_insert_query1/output/test.txt +++ /dev/null @@ -1,30 +0,0 @@ -original shape=(5, 3) -Head: -{ - "0":{ - "id":1, - "column_1":1000.0, - "column_2":"test_string_1" - }, - "1":{ - "id":2, - "column_1":1001.0, - "column_2":"test_string_2" - }, - "2":{ - "id":3, - "column_1":1002.0, - "column_2":"test_string_3" - }, - "3":{ - "id":4, - "column_1":1003.0, - "column_2":"test_string_4" - }, - "4":{ - "id":5, - "column_1":1004.0, - "column_2":"test_string_5" - } -} -Tail: diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt deleted file mode 100644 index cd2308af6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/TestToPartitionedDataset.test_to_partitioned_dataset/output/test.txt +++ /dev/null @@ -1,65 +0,0 @@ -# Dir structure -. -dummy_value_1=1 -dummy_value_1=1/dummy_value_2=A -dummy_value_1=1/dummy_value_2=A/data.parquet -dummy_value_1=2 -dummy_value_1=2/dummy_value_2=B -dummy_value_1=2/dummy_value_2=B/data.parquet -dummy_value_1=3 -dummy_value_1=3/dummy_value_2=C -dummy_value_1=3/dummy_value_2=C/data.parquet -# File signatures -len(file_names)=3 -file_names=dummy_value_1=1/dummy_value_2=A/data.parquet, dummy_value_1=2/dummy_value_2=B/data.parquet, dummy_value_1=3/dummy_value_2=C/data.parquet -# dummy_value_1=1/dummy_value_2=A/data.parquet -num_lines=13 -''' -original shape=(1, 1) -Head: -{ - "0":{ - "dummy_value_3":0 - } -} -Tail: -{ - "0":{ - "dummy_value_3":0 - } -} -''' -# dummy_value_1=2/dummy_value_2=B/data.parquet -num_lines=13 -''' -original shape=(1, 1) -Head: -{ - "0":{ - "dummy_value_3":0 - } -} -Tail: -{ - "0":{ - "dummy_value_3":0 - } -} -''' -# dummy_value_1=3/dummy_value_2=C/data.parquet -num_lines=13 -''' -original shape=(1, 1) -Head: -{ - "0":{ - "dummy_value_3":0 - } -} -Tail: -{ - "0":{ - "dummy_value_3":0 - } -} -''' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt deleted file mode 100644 index ca3ab848c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test1/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ - description comment is_ok -0 hello Number of not submitted OMS child orders=0 / 7... True -1 hello2 ok True -is_ok=True \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt deleted file mode 100644 index b0e7738bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_CheckSummary.test2/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ - description comment is_ok -0 hello Number of not submitted OMS child orders=0 / 7... True -1 hello2 not_ok False -is_ok=False \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt deleted file mode 100644 index 393449cf4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_multiple_div_blocks/output/test.txt +++ /dev/null @@ -1,19 +0,0 @@ -Some text before - - - -:::: -::::{.column width=40%} - - - -Middle text - - - -:::columns -::::{.column width=60%} - - - -Some text after \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt deleted file mode 100644 index f3bdbccbf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_add_prettier_ignore_to_div_blocks.test_simple_div_block/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - - -:::: -::: - diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt deleted file mode 100644 index d5e54b365..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_column_mode.test3/output/test.txt +++ /dev/null @@ -1,9 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -df1.columns.difference(df2.columns)= -Index(['B'], dtype='object') -df2.columns.difference(df1.columns)= -Index(['C'], dtype='object') -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt deleted file mode 100644 index 464343e55..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_index_mode.test3/output/test.txt +++ /dev/null @@ -1,9 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -df1.index.difference(df2.index)= -Index([1, 4], dtype='int64') -df2.index.difference(df1.index)= -Index([5, 6], dtype='int64') -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json deleted file mode 100644 index 1e4b47491..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_llm_prompt_to_df2.test2/input/tmp.cache_simple._llm.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"10 + 15\", \"gpt-5-nano\"], \"kwargs\": {}}": [ - "25", - 3.195e-05 - ], - "{\"args\": [\"You are a calculator. Given input in the format \\\"a + b\\\", return only\\nthe sum as a number.\\n\\nReturn ONLY the numeric result, nothing else.\", \"2 + 3\", \"gpt-5-nano\"], \"kwargs\": {}}": [ - "5", - 3.195e-05 - ] -} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt deleted file mode 100644 index dbd21a9a0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test1/output/test.txt +++ /dev/null @@ -1,41 +0,0 @@ - 0 -2010-01-31 NaN -2010-02-28 NaN -2010-03-31 NaN -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-06-30 NaN -2010-07-31 NaN -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 -2013-02-28 NaN -2013-03-31 NaN -2013-04-30 NaN diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt deleted file mode 100644 index 6e33e1427..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test2/output/test.txt +++ /dev/null @@ -1,33 +0,0 @@ - 0 -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt deleted file mode 100644 index 3a043159d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test3/output/test.txt +++ /dev/null @@ -1,41 +0,0 @@ - 0 -2010-01-31 NaN -2010-02-28 NaN -2010-03-31 NaN -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-06-30 0.146756 -2010-07-31 0.146756 -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 -2013-02-28 0.686501 -2013-03-31 0.686501 -2013-04-30 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt deleted file mode 100644 index 200d35c7a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test4/output/test.txt +++ /dev/null @@ -1,38 +0,0 @@ - 0 -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-06-30 0.146756 -2010-07-31 0.146756 -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 -2013-02-28 0.686501 -2013-03-31 0.686501 -2013-04-30 0.686501 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt deleted file mode 100644 index 590e9e5f7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_apply_nan_mode.test5/output/test.txt +++ /dev/null @@ -1,41 +0,0 @@ - 0 -2010-01-31 0.000000 -2010-02-28 0.000000 -2010-03-31 0.000000 -2010-04-30 0.302333 -2010-05-31 0.146756 -2010-06-30 0.000000 -2010-07-31 0.000000 -2010-08-31 0.345561 -2010-09-30 0.396767 -2010-10-31 0.538817 -2010-11-30 0.419195 -2010-12-31 0.685220 -2011-01-31 0.204452 -2011-02-28 0.878117 -2011-03-31 0.027388 -2011-04-30 0.670468 -2011-05-31 0.417305 -2011-06-30 0.558690 -2011-07-31 0.140387 -2011-08-31 0.198101 -2011-09-30 0.800745 -2011-10-31 0.968262 -2011-11-30 0.313424 -2011-12-31 0.692323 -2012-01-31 0.876389 -2012-02-29 0.894607 -2012-03-31 0.085044 -2012-04-30 0.039055 -2012-05-31 0.169830 -2012-06-30 0.878143 -2012-07-31 0.098347 -2012-08-31 0.421108 -2012-09-30 0.957890 -2012-10-31 0.533165 -2012-11-30 0.691877 -2012-12-31 0.315516 -2013-01-31 0.686501 -2013-02-28 0.000000 -2013-03-31 0.000000 -2013-04-30 0.000000 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt deleted file mode 100644 index 9f8585df5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_header_list1.test2/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -Consecutive headers increase by more than one level: - HeaderInfo(1, 'Chapter 1', 1) - HeaderInfo(3, 'Subsection 1.1.1', 6) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt deleted file mode 100644 index ce0136250..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt deleted file mode 100644 index 0c31b6c66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_check_string_debug1.test2/output/test_df.txt +++ /dev/null @@ -1,3 +0,0 @@ -,a,b,c -0,0,1,2 -1,3,4,5 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv deleted file mode 100644 index 0ddcc75ab..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_convert_csv_to_dict.test1/input/test.csv +++ /dev/null @@ -1,5 +0,0 @@ -col1,col2,col3 -a,a,a -b,b,b -c,,c -d,, diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt deleted file mode 100644 index 4f8eb6107..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt deleted file mode 100644 index b31ec5ee2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test3/output/test.txt +++ /dev/null @@ -1,6 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -hello -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt deleted file mode 100644 index 134e5b23c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test4/output/test.txt +++ /dev/null @@ -1,6 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -hello world -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt deleted file mode 100644 index f99e55fe0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test5/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -Caught assertion while formatting message: -'not all arguments converted during string formatting' -hello %s world too_many -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt deleted file mode 100644 index 5ebc30e5f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test6/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -cond=False -Caught assertion while formatting message: -'not enough arguments for format string' -hello %s -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt deleted file mode 100644 index c941ca91b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert1.test7/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -You passed '['hello']' or type '' instead of str diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt deleted file mode 100644 index 41b8447e3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test3/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -'1' -== -'2' -hello world -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt deleted file mode 100644 index 41b8447e3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test4/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -'1' -== -'2' -hello world -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt deleted file mode 100644 index 3bdf77365..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_eq1.test5/output/test.txt +++ /dev/null @@ -1,10 +0,0 @@ - -################################################################################ -* Failed assertion * -'1' -== -'2' -Caught assertion while formatting message: -'not enough arguments for format string' -hello %s -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt deleted file mode 100644 index 5e9f4aa95..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_fail1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -issubclass() arg 2 must be a class, a tuple of classes, or a union \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt deleted file mode 100644 index 3eeaf0ce1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail1/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -Instance '' of class '_Man' is not a subclass of '' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt deleted file mode 100644 index e5b23c85f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_issubclass1.test_man_fail2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -Instance '' of class '_Man' is not a subclass of '' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt deleted file mode 100644 index 69b3f64e9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_eq_all2/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -val1=3 -[1, 2, 3] -val2=3 -[1, 2, 4] -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt deleted file mode 100644 index 11a472589..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_in2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -'a' in '['xyz']' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt deleted file mode 100644 index bb58d202b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -'a' is 'None' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt deleted file mode 100644 index fca016604..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance2/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -Instance of 'a' is '' instead of '' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt deleted file mode 100644 index b377f94fe..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_instance5/output/test.txt +++ /dev/null @@ -1,5 +0,0 @@ - -################################################################################ -* Failed assertion * -Instance of 'a' is '' instead of '(, )' -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt deleted file mode 100644 index 1c61bf06a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted2/output/test.txt +++ /dev/null @@ -1,9 +0,0 @@ - -################################################################################ -* Failed assertion * -val1= -[1, 2, 4, 3] -is not sorted -sorted(val1)= -[1, 2, 3, 4] -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt deleted file mode 100644 index a13f9d582..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_is_sorted4/output/test.txt +++ /dev/null @@ -1,9 +0,0 @@ - -################################################################################ -* Failed assertion * -val1= -[1, 2, 4, 3] -is not sorted -sorted(val1)= -[4, 3, 2, 1] -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt deleted file mode 100644 index 9fe19e631..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_misc1.test_no_duplicates2/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ - -################################################################################ -* Failed assertion * -val1= -[1, 3, 3] -has duplicates -3 -################################################################################ diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt deleted file mode 100644 index a1f1fdce9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_dassert_str_is_date.test2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -date='2022-11-01' doesn't have the right format: time data '2022-11-01' does not match format '%Y%m%d' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt deleted file mode 100644 index 48cd44539..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_conjunction1/output/test.txt +++ /dev/null @@ -1,28 +0,0 @@ -################################################################################ -data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -2 7 8 9 -3 10 11 12 -################################################################################ -filters -################################################################################ -{'col_0': (('gt', 1), ('lt', 7)), 'col_1': ('eq', 5)} -################################################################################ -filtered_data -################################################################################ - col_0 col_1 col_2 -1 4 5 6 -################################################################################ -info -################################################################################ -OrderedDict([('nrows', 4), - ('n_col_0_gt_1', np.int64(3)), - ('perc_col_0_gt_1', '3 / 4 = 75.00%'), - ('n_col_0_lt_7', np.int64(2)), - ('perc_col_0_lt_7', '2 / 4 = 50.00%'), - ('n_col_1_eq_5', np.int64(1)), - ('perc_col_1_eq_5', '1 / 4 = 25.00%'), - ('nrows_remaining', np.int64(1))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt deleted file mode 100644 index c935f88e6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_comparison.test_disjunction1/output/test.txt +++ /dev/null @@ -1,28 +0,0 @@ -################################################################################ -data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -2 7 8 9 -3 10 11 12 -################################################################################ -filters -################################################################################ -{'col_0': ('gt', 2), 'col_1': ('eq', 5)} -################################################################################ -filtered_data -################################################################################ - col_0 col_1 col_2 -1 4 5 6 -2 7 8 9 -3 10 11 12 -################################################################################ -info -################################################################################ -OrderedDict([('nrows', 4), - ('n_col_0_gt_2', np.int64(3)), - ('perc_col_0_gt_2', '3 / 4 = 75.00%'), - ('n_col_1_eq_5', np.int64(1)), - ('perc_col_1_eq_5', '1 / 4 = 25.00%'), - ('nrows_remaining', np.int64(3))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt deleted file mode 100644 index 456d06923..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_conjunction1/output/test.txt +++ /dev/null @@ -1,26 +0,0 @@ -################################################################################ -data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -################################################################################ -filters -################################################################################ -{'col_0': (1, 12), 'col_1': (2, 11), 'col_2': (3, 6)} -################################################################################ -filtered_data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -################################################################################ -info -################################################################################ -OrderedDict([('nrows', 2), - ('n_col_0', np.int64(1)), - ('perc_col_0', '1 / 2 = 50.00%'), - ('n_col_1', np.int64(1)), - ('perc_col_1', '1 / 2 = 50.00%'), - ('n_col_2', np.int64(2)), - ('perc_col_2', '2 / 2 = 100.00%'), - ('nrows_remaining', np.int64(1))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt deleted file mode 100644 index ae70053b9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_filter_data_by_values1.test_disjunction1/output/test.txt +++ /dev/null @@ -1,27 +0,0 @@ -################################################################################ -data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -################################################################################ -filters -################################################################################ -{'col_0': (1, 12), 'col_1': (2, 11), 'col_2': (3, 6)} -################################################################################ -filtered_data -################################################################################ - col_0 col_1 col_2 -0 1 2 3 -1 4 5 6 -################################################################################ -info -################################################################################ -OrderedDict([('nrows', 2), - ('n_col_0', np.int64(1)), - ('perc_col_0', '1 / 2 = 50.00%'), - ('n_col_1', np.int64(1)), - ('perc_col_1', '1 / 2 = 50.00%'), - ('n_col_2', np.int64(2)), - ('perc_col_2', '2 / 2 = 100.00%'), - ('nrows_remaining', np.int64(2))]) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt deleted file mode 100644 index a947c3402..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -A fake check_string output to use for test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt deleted file mode 100644 index 62b216ee4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_check_string_output1.test2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -A fake check_string output to use for test2 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt deleted file mode 100644 index 3b18e512d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test2/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt deleted file mode 100644 index 3b18e512d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt deleted file mode 100644 index 3b18e512d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt deleted file mode 100644 index 3b18e512d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv deleted file mode 100644 index abc3dac80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C,D,E -1,2.3456,c,d,78 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types deleted file mode 100644 index 81816c1d2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_from_typed_csv.test1/input/test.csv.types +++ /dev/null @@ -1 +0,0 @@ -{'A': 'int64', 'B': 'float64', 'C': 'object', 'D': 'object', 'E': 'int64'} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt deleted file mode 100644 index 4a3a582fe..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test1/output/test.txt +++ /dev/null @@ -1,58 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt deleted file mode 100644 index 47371468a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test2/output/test.txt +++ /dev/null @@ -1,58 +0,0 @@ -stage='prod', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs={'/data/shared': '/shared_data'}, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=0 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - - /data/shared:/shared_data - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt deleted file mode 100644 index 5ebe91b26..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test3/output/test.txt +++ /dev/null @@ -1,60 +0,0 @@ -stage='prod', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=0 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} -networks: - default: - name: main_network diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt deleted file mode 100644 index eb8d4824a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test4/output/test.txt +++ /dev/null @@ -1,57 +0,0 @@ -stage='dev', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=0 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - - ../../:/app - environment: - - MYPYPATH - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt deleted file mode 100644 index 2c9d5ecf0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file1.test5/output/test.txt +++ /dev/null @@ -1,56 +0,0 @@ -stage='dev', use_privileged_mode=False, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=0 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - - CSFY_CI=$CSFY_CI - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /app:/app - linter: - extends: base_app - volumes: - - /app:/src - - ../../../:/app - environment: - - MYPYPATH - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt deleted file mode 100644 index 9ba5c60c9..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test1/output/test.txt +++ /dev/null @@ -1,63 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_GIT_ROOT_PATH=/app - - CSFY_HELPERS_ROOT_PATH=/app/helpers_root - - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /data/dummy/src/cmamp1:/app - working_dir: /app - linter: - extends: base_app - volumes: - - /data/dummy/src/cmamp1:/src - working_dir: /src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt deleted file mode 100644 index 91e37ffc3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test2/output/test.txt +++ /dev/null @@ -1,63 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_GIT_ROOT_PATH=/app - - CSFY_HELPERS_ROOT_PATH=/app - - CSFY_USE_HELPERS_AS_NESTED_MODULE=0 - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /data/dummy/src/helpers1:/app - working_dir: /app - linter: - extends: base_app - volumes: - - /data/dummy/src/helpers1:/src - working_dir: /src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt deleted file mode 100644 index a16d2f133..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test3/output/test.txt +++ /dev/null @@ -1,63 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_GIT_ROOT_PATH=/app - - CSFY_HELPERS_ROOT_PATH=/app/helpers_root - - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /data/dummy/src/cmamp1:/app - working_dir: /app/ck.infra - linter: - extends: base_app - volumes: - - /data/dummy/src/cmamp1/ck.infra:/src - working_dir: /src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt deleted file mode 100644 index b4afb6c80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_generate_compose_file2.test4/output/test.txt +++ /dev/null @@ -1,63 +0,0 @@ -stage='prod', use_privileged_mode=True, use_sibling_container=False, shared_data_dirs=None, mount_as_submodule=False, use_network_mode_host=True -version: '3' -services: - base_app: - cap_add: - - SYS_ADMIN - environment: - - CSFY_ENABLE_DIND=1 - - CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL - - CSFY_REPO_CONFIG_CHECK=True - - CSFY_REPO_CONFIG_PATH= - - CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID - - CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION - - CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE - - CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET - - CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY - - CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN - - CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH - - CSFY_GIT_ROOT_PATH=/app - - CSFY_HELPERS_ROOT_PATH=/app/amp/helpers_root - - CSFY_USE_HELPERS_AS_NESTED_MODULE=1 - - CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN - - CSFY_CI=$CSFY_CI - - GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN - - GH_TOKEN=$GH_ACTION_ACCESS_TOKEN - image: ${IMAGE} - restart: 'no' - volumes: - - ~/.aws:/home/.aws - - ~/.config/gspread_pandas/:/home/.config/gspread_pandas/ - - ~/.config/gh:/home/.config/gh - - ~/.ssh:/home/.ssh - privileged: true - network_mode: ${NETWORK_MODE:-host} - app: - extends: base_app - volumes: - - /data/dummy/src/orange1:/app - working_dir: /app - linter: - extends: base_app - volumes: - - /data/dummy/src/orange1:/src - working_dir: /src - environment: - - MYPYPATH - - CSFY_REPO_CONFIG_PATH=/app/repo_config.py - jupyter_server: - command: devops/docker_run/run_jupyter_server.sh - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} - jupyter_server_test: - command: jupyter notebook -h 2>&1 >/dev/null - environment: - - PORT=${PORT} - extends: app - network_mode: ${NETWORK_MODE:-bridge} - ports: - - ${PORT}:${PORT} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl deleted file mode 100644 index 25ffea79afb3dad6014da937fd8ff7c64cfbb55f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 405 zcmbu(u}TCn5C-5=POmty^Br!R#KqQ5tptU`K4g&`x>)b*xwLl zAPnC>A16QGM~haxHX(s9!SC>iPp{kDcc1UPQuAlsa1IP%@R`>S$wC5qgNF9qcEih^ zM)$?XXzv!h9D($~l)kwaw@T#&&7%2;aOs%O@yz+ry2;oJv-TCg^Z#rJt zOv+;7YpG+DdW`ZPBmWrrRNJ=rhZ*Uc+e;dwsBxOiGhf16goGn_QVDW#6MA: - a='False' - b='hello' - c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt deleted file mode 100644 index 7aad26473..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test2/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False, b=hello, c=3.14) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt deleted file mode 100644 index d491215bd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test3/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False , b=hello , c=3.14 ) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt deleted file mode 100644 index b5e297083..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test4/output/test.txt +++ /dev/null @@ -1,12 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False, b=hello, c=3.14, hello=. at 0x>) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' - hello='. at 0x>' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt deleted file mode 100644 index b69634f84..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test5/output/test.txt +++ /dev/null @@ -1,12 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False, b=hello, c=3.14, _hello=under) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' - _hello='under' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt deleted file mode 100644 index 332cd0a1d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str1.test6/output/test.txt +++ /dev/null @@ -1,12 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object1 at 0x=(a=False, b=hello, c=3.14, _Object1__hello=double_dunder) -################################################################################ -repr: -################################################################################ -: - a='False' - b='hello' - c='3.14' - _Object1__hello='double_dunder' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt deleted file mode 100644 index 28193b95d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test1/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt deleted file mode 100644 index fccd31195..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test2/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(object2=_Object2 at 0x=(x=True, y=world, z=6.28), p=p, q=q) -################################################################################ -repr: -################################################################################ -: - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' - p='p' - q='q' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt deleted file mode 100644 index 08aebee19..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test3/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p , q=q , object2=_Object2 at 0x=(x=True, y=world, z=6.28) ) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt deleted file mode 100644 index 28193b95d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test4/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt deleted file mode 100644 index 28193b95d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test5/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt deleted file mode 100644 index 28193b95d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_obj_to_str2.test6/output/test.txt +++ /dev/null @@ -1,11 +0,0 @@ -################################################################################ -str: -################################################################################ -_Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) -################################################################################ -repr: -################################################################################ -: - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt deleted file mode 100644 index 14a9380bb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_linux1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -xdg-open a.html diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt deleted file mode 100644 index b0047fa49..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_mac1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt deleted file mode 100644 index b0047fa49..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_html.test_windows1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt deleted file mode 100644 index b0047fa49..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_open_pdf.test_mac1/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -None diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt deleted file mode 100644 index 34d8d7aa1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/input/test.txt +++ /dev/null @@ -1,16 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - ```python - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - ``` -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt deleted file mode 100644 index 38f3146a7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - - - ```python - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - ``` - - -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt deleted file mode 100644 index 34d8d7aa1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/input/test.txt +++ /dev/null @@ -1,16 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - ```python - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - ``` -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt deleted file mode 100644 index dacb761b7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt +++ /dev/null @@ -1,20 +0,0 @@ -0:- Functions can be declared in the body of another function -1:- E.g., to hide utility functions in the scope of the function that uses them -2: - -3: ```python -4: def print_integers(values): -5: -6: def _is_integer(value): -7: try: -8: return value == int(value) -9: except: -10: return False -11: -12: for v in values: -13: if _is_integer(v): -14: print(v) -15: ``` -16: - -17:- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt deleted file mode 100644 index 52f34afc3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_purify_from_env_vars.test_end_to_end/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -$AM_AWS_S3_BUCKET = $AM_AWS_S3_BUCKET -$CSFY_AWS_S3_BUCKET = $CSFY_AWS_S3_BUCKET diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed deleted file mode 100644 index 0850990c3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dev_scripts/testing/test/test_run_tests.py": true, - "dev_scripts/testing/test/test_run_tests2.py": true, - "helpers/test/test_printing.py::Test_dedent1::test1": true, - "helpers/test/test_printing.py::Test_dedent1::test2": true, - "helpers/test/test_printing.py::Test_dedent2::test1": true, - "documentation/scripts/test/test_all.py": true, - "documentation/scripts/test/test_render_md.py": true, - "helpers/test/helpers/test/test_list.py::Test_list_1": true, - "helpers/test/helpers/test/test_list.py::Test_list_2": true, - "helpers/test/test_cache.py::TestAmpTask1407": true -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt deleted file mode 100644 index 61323668a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/output/test.txt +++ /dev/null @@ -1,15 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/input/cache/lastfailed' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 10 failed pytest 'tests' target(s); to reproduce run: -pytest dev_scripts/testing/test/test_run_tests.py -pytest dev_scripts/testing/test/test_run_tests2.py -pytest documentation/scripts/test/test_all.py -pytest documentation/scripts/test/test_render_md.py -pytest helpers/test/helpers/test/test_list.py::Test_list_1 -pytest helpers/test/helpers/test/test_list.py::Test_list_2 -pytest helpers/test/test_cache.py::TestAmpTask1407 -pytest helpers/test/test_printing.py::Test_dedent1::test1 -pytest helpers/test/test_printing.py::Test_dedent1::test2 -pytest helpers/test/test_printing.py::Test_dedent2::test1 -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test1/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt deleted file mode 100644 index 9e66e81bd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt +++ /dev/null @@ -1,325 +0,0 @@ -============================= test session starts ============================== -platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 -cachedir: .pytest_cache -rootdir: /app, configfile: pytest.ini -plugins: flaky-3.7.0, timeout-2.0.2, rerunfailures-10.2, cov-3.0.0, instafail-0.4.2, xdist-2.5.0, forked-1.4.0 -collecting ... >>ENV<<: is_inside_container=True: code_version=1.0.6, container_version=1.0.6, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=False AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True -# Git - branch_name='CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests' - hash='ca2dbf510' - # Last commits: - * ca2dbf510 Sonya Nikiforova Merge branch 'master' into CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests ( 2 hours ago) Mon Feb 14 16:25:29 2022 (HEAD -> CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests, origin/CmTask695_Unify_pytest_outputs_for_run_fast_slow_tests) - |\ - * | 63a471cca sonniki CmTask695: Update for reproducibility ( 2 hours ago) Mon Feb 14 16:15:14 2022 - | * 0d236ad57 Nikola Jašek CMTask1103: Add tests for HistoricalPqByTileClient (#1176) ( 2 hours ago) Mon Feb 14 16:01:56 2022 (origin/master, origin/HEAD) -# Machine info - system=Linux - node name=61ceebd0998a - release=5.11.0-1028-aws - version=#31~20.04.1-Ubuntu SMP Fri Jan 14 14:37:50 UTC 2022 - machine=x86_64 - processor=x86_64 - cpu count=8 - cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) - memory=svmem(total=33295769600, available=23499386880, percent=29.4, used=9048117248, free=17212899328, active=2693218304, inactive=12081451008, buffers=651313152, cached=6383439872, shared=286130176, slab=934486016) - disk usage=sdiskusage(total=104021790720, used=40223850496, free=63781163008, percent=38.7) -# Packages - python: 3.8.10 - gluonnlp: ? - gluonts: 0.6.7 - joblib: 1.1.0 - mxnet: 1.9.0 - numpy: 1.22.0 - pandas: 1.3.5 - pyarrow: 6.0.1 - scipy: 1.6.1 - seaborn: 0.11.2 - sklearn: 1.0.2 - statsmodels: 0.13.1 -INFO: > cmd='/venv/bin/pytest datapull/common/data/client/test/test_historical_pq_clients.py' -INFO: Saving log to file 'tmp.pytest.log' -collected 9 items - -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 (1.14 s) PASSED [ 11%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_start_ts_for_symbol1 (1.05 s) PASSED [ 22%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_universe1 (0.00 s) PASSED [ 33%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 (1.26 s) FAILED [ 44%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 (1.44 s) FAILED [ 55%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 (1.09 s) FAILED [ 66%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 (0.95 s) FAILED [ 77%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 (0.86 s) FAILED [ 88%] -datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data6 (1.05 s) PASSED [100%] - -=================================== FAILURES =================================== -________________ TestHistoricalPqByTileClient1.test_read_data1 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 89, in test_read_data1 - self._test_read_data1( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 44, in _test_read_data1 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data1 --------------------------------------------------------------------------------- - -# df= ( -index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(4320, 4) | df.shape=(4320, 4) -full_symbol close year month ( -timestamp ( -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( -2021-12-30 00:02:00+00:00 1467591036 2 2021 12 ( -... ( -2022-01-01 23:57:00+00:00 1467591036 4317 2022 1 ( -2022-01-01 23:58:00+00:00 1467591036 4318 2022 1 ( -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data1/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data1/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data1 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month -shape=(4320, 4) - full_symbol close year month -timestamp -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 -2021-12-30 00:02:00+00:00 1467591036 2 2021 12 -... -2022-01-01 23:57:00+00:00 1467591036 4317 2022 1 -2022-01-01 23:58:00+00:00 1467591036 4318 2022 1 -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1""" -________________ TestHistoricalPqByTileClient1.test_read_data2 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 132, in test_read_data2 - self._test_read_data2( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 61, in _test_read_data2 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data2 --------------------------------------------------------------------------------- - -# df= ( -index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(8640, 4) | df.shape=(8640, 4) -full_symbol close year month ( -timestamp ( -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( -2021-12-30 00:00:00+00:00 1508924190 0 2021 12 ( -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( -... ( -2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 ( -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( -2022-01-01 23:59:00+00:00 1508924190 4319 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data2/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data2/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data2 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2021-12-30 00:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month -shape=(8640, 4) - full_symbol close year month -timestamp -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 -2021-12-30 00:00:00+00:00 1508924190 0 2021 12 -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 -... -2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 -2022-01-01 23:59:00+00:00 1508924190 4319 2022 1""" -________________ TestHistoricalPqByTileClient1.test_read_data3 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 176, in test_read_data3 - self._test_read_data3( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 79, in _test_read_data3 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data3 --------------------------------------------------------------------------------- - -# df= ( -index=[2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] | df.index in [2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(2640, 4) | df.shape=(2640, 4) -full_symbol close year month ( -timestamp ( -2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 ( -2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 ( -2022-01-01 02:01:00+00:00 1467591036 3001 2022 1 ( -... ( -2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 ( -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 ( -2022-01-01 23:59:00+00:00 1508924190 4319 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data3/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data3/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data3 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2022-01-01 02:00:00+00:00, 2022-01-01 23:59:00+00:00] -columns=full_symbol,close,year,month -shape=(2640, 4) - full_symbol close year month -timestamp -2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 -2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 -2022-01-01 02:01:00+00:00 1467591036 3001 2022 1 -... -2022-01-01 23:58:00+00:00 1508924190 4318 2022 1 -2022-01-01 23:59:00+00:00 1467591036 4319 2022 1 -2022-01-01 23:59:00+00:00 1508924190 4319 2022 1""" -________________ TestHistoricalPqByTileClient1.test_read_data4 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 221, in test_read_data4 - self._test_read_data4( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 97, in _test_read_data4 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data4 --------------------------------------------------------------------------------- - -# df= ( -index=[2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] | df.index in [2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(6002, 4) | df.shape=(6002, 4) -full_symbol close year month ( -timestamp ( -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 ( -2021-12-30 00:00:00+00:00 1508924190 0 2021 12 ( -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 ( -... ( -2022-01-01 01:59:00+00:00 1508924190 2999 2022 1 ( -2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 ( -2022-01-01 02:00:00+00:00 1508924190 3000 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data4/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data4/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data4 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2021-12-30 00:00:00+00:00, 2022-01-01 02:00:00+00:00] -columns=full_symbol,close,year,month -shape=(6002, 4) - full_symbol close year month -timestamp -2021-12-30 00:00:00+00:00 1467591036 0 2021 12 -2021-12-30 00:00:00+00:00 1508924190 0 2021 12 -2021-12-30 00:01:00+00:00 1467591036 1 2021 12 -... -2022-01-01 01:59:00+00:00 1508924190 2999 2022 1 -2022-01-01 02:00:00+00:00 1467591036 3000 2022 1 -2022-01-01 02:00:00+00:00 1508924190 3000 2022 1""" -________________ TestHistoricalPqByTileClient1.test_read_data5 _________________ -Traceback (most recent call last): - File "/app/datapull/common/data/client/test/test_historical_pq_clients.py", line 267, in test_read_data5 - self._test_read_data5( - File "/app/datapull/common/data/client/test/im_client_test_case.py", line 114, in _test_read_data5 - self.check_df_output(actual_df, *args, **kwargs) - File "/app/helpers/hunit_test.py", line 1516, in check_df_output - self.assert_equal( - File "/app/helpers/hunit_test.py", line 1230, in assert_equal - is_equal = assert_equal( - File "/app/helpers/hunit_test.py", line 957, in assert_equal - diff_files( - File "/app/helpers/hunit_test.py", line 666, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: TestHistoricalPqByTileClient1.test_read_data5 --------------------------------------------------------------------------------- - -# df= ( -index=[2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] | df.index in [2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] -columns=full_symbol,close,year,month | df.columns=full_symbol,close,year,month -shape=(242, 4) | df.shape=(242, 4) -full_symbol close year month ( -timestamp ( -2021-12-31 23:00:00+00:00 1467591036 2820 2021 12 ( -2021-12-31 23:00:00+00:00 1508924190 2820 2021 12 ( -2021-12-31 23:01:00+00:00 1467591036 2821 2021 12 ( -... ( -2022-01-01 00:59:00+00:00 1508924190 2939 2022 1 ( -2022-01-01 01:00:00+00:00 1467591036 2940 2022 1 ( -2022-01-01 01:00:00+00:00 1508924190 2940 2022 1 ( -Diff with: -> vimdiff datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data5/tmp.actual.txt datapull/common/data/client/test/TestHistoricalPqByTileClient1.test_read_data5/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestHistoricalPqByTileClient1.test_read_data5 --------------------------------------------------------------------------------- -exp = r"""# df= -index=[2021-12-31 23:00:00+00:00, 2022-01-01 01:00:00+00:00] -columns=full_symbol,close,year,month -shape=(242, 4) - full_symbol close year month -timestamp -2021-12-31 23:00:00+00:00 1467591036 2820 2021 12 -2021-12-31 23:00:00+00:00 1508924190 2820 2021 12 -2021-12-31 23:01:00+00:00 1467591036 2821 2021 12 -... -2022-01-01 00:59:00+00:00 1508924190 2939 2022 1 -2022-01-01 01:00:00+00:00 1467591036 2940 2022 1 -2022-01-01 01:00:00+00:00 1508924190 2940 2022 1""" -============================= slowest 3 durations ============================== -1.44s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 -1.26s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 -1.14s call datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 -=========================== short test summary info ============================ -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 -FAILED datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 -========================= 5 failed, 4 passed in 10.94s ========================= diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt deleted file mode 100644 index c297aad27..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/output/test.txt +++ /dev/null @@ -1,10 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 5 failed pytest 'tests' target(s); to reproduce run: -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 -pytest datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test2/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt deleted file mode 100644 index 8c9d7793d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt +++ /dev/null @@ -1,10 +0,0 @@ -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1175206Z SKIPPED [1] core/statistics/test/test_requires_statsmodels.py:315: cmamp #654. -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1175722Z SKIPPED [1] config_root/config/test/test_config.py:325: See AmpTask1573 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1176275Z XFAIL core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1176859Z XFAIL core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1177550Z FAILED dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1178650Z FAILED dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1179474Z FAILED dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 - Na... -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1180384Z ^[[31m= ^[[31m^[[1m3 failed^[[0m, ^[[32m1511 passed^[[0m, ^[[33m155 skipped^[[0m, ^[[33m60 deselected^[[0m, ^[[33m2 xfailed^[[0m, ^[[33m1 rerun^[[0m^[[31m in 211.15s (0:03:31)^[[0m^[[31m =^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1367972Z 11:53:07 @ 2022-02-19 06:51:34 - ^[[36mINFO ^[[0m hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=28.0 KB -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1381857Z 11:53:07 @ 2022-02-19 06:51:34 - ^[[33mWARN ^[[0m hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt deleted file mode 100644 index e16188c74..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/output/test.txt +++ /dev/null @@ -1,8 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 -pytest dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 -pytest dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test3/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt deleted file mode 100644 index 58f583b0e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt +++ /dev/null @@ -1,61 +0,0 @@ -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0521158Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_daily1 (0.03 s) ^[[32mPASSED^[[0m^[[31m [ 99%]^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0932903Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) ^[[32mPASSED^[[0m^[[31m [100%]^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0933619Z -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0933865Z =================================== FAILURES =================================== -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0934800Z ^[[31m^[[1m_____________________ TestRealTimeMvnReturnsWithOms1.test1 _____________________^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0935555Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0936347Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 388, in test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0937188Z market_data = self.get_market_data(event_loop) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0938027Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 325, in get_market_data -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0939155Z df = self.get_market_data_df() -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0939988Z File "/app/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 310, in get_market_data_df -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0940754Z df = node.fit()["df_out"] -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0941392Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0941905Z self._lazy_load(fit=True) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0942562Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0943252Z rets = self._generate_returns(fit) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0943957Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0944732Z vol = cofinanc.compute_annualized_volatility(avg_rets) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0945561Z NameError: name 'cofinanc' is not defined -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0959832Z ^[[31m^[[1m____________________ TestMultivariateNormalDataSource.test1 ____________________^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0961700Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0968475Z File "/app/dataflow/core/nodes/test/test_sources.py", line 175, in test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0970838Z df = node.fit()["df_out"] -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0972952Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0973577Z self._lazy_load(fit=True) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0974176Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0976810Z rets = self._generate_returns(fit) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0977529Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0978880Z vol = cofinanc.compute_annualized_volatility(avg_rets) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0981739Z NameError: name 'cofinanc' is not defined -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0982702Z ^[[31m^[[1m_________________________ TestMvnReturnsBuilder.test1 __________________________^[[0m -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0985191Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0985837Z File "/app/dataflow/core/test/test_builders.py", line 74, in test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0986469Z result_bundle = dag_runner.fit() -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0987113Z File "/app/dataflow/core/dag_runner.py", line 170, in fit -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0987711Z return self._run_dag(method) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0988321Z File "/app/dataflow/core/dag_runner.py", line 181, in _run_dag -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0988936Z df_out, info = self._run_dag_helper(method) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0989566Z File "/app/dataflow/core/dag_runner.py", line 110, in _run_dag_helper -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0990221Z df_out = self.dag.run_leq_node(nid, method)["df_out"] -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0991397Z File "/app/dataflow/core/dag.py", line 428, in run_leq_node -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0992521Z self._run_node(id_, pred_nid, method) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0993298Z File "/app/dataflow/core/dag.py", line 593, in _run_node -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0993800Z output = getattr(node, method)(**kwargs) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0994361Z File "/app/dataflow/core/nodes/sources.py", line 334, in fit -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0994834Z self._lazy_load(fit=True) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0995336Z File "/app/dataflow/core/nodes/sources.py", line 361, in _lazy_load -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0995859Z rets = self._generate_returns(fit) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0996779Z File "/app/dataflow/core/nodes/sources.py", line 354, in _generate_returns -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0997405Z vol = cofinanc.compute_annualized_volatility(avg_rets) -Run_fast_tests Run fast tests 2022-02-19T16:53:07.0998205Z NameError: name 'cofinanc' is not defined -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1048290Z ============================= slowest 3 durations ============================== -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1048893Z 26.48s setup oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1049478Z 8.44s call helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1050189Z 5.32s setup dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1116212Z =========================== short test summary info ============================ -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z SKIPPED [1] test/test_tasks.py:68: Test needs to be run outside Docker -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119804Z SKIPPED [1] test/test_tasks.py:60: Test needs to be run outside Docker -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt deleted file mode 100644 index bc2ab8612..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/output/test.txt +++ /dev/null @@ -1,61 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 -pytest dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 -pytest dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 - -# TestRealTimeMvnReturnsWithOms1.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 388, in test1 - market_data = self.get_market_data(event_loop) - File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 325, in get_market_data - df = self.get_market_data_df() - File "$GIT_ROOT/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py", line 310, in get_market_data_df - df = node.fit()["df_out"] - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit - self._lazy_load(fit=True) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load - rets = self._generate_returns(fit) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns - vol = cofinanc.compute_annualized_volatility(avg_rets) -NameError: name 'cofinanc' is not defined -^[[31m^[[1m__________________ - -# TestMultivariateNormalDataSource.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/core/nodes/test/test_sources.py", line 175, in test1 - df = node.fit()["df_out"] - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit - self._lazy_load(fit=True) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load - rets = self._generate_returns(fit) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns - vol = cofinanc.compute_annualized_volatility(avg_rets) -NameError: name 'cofinanc' is not defined -^[[31m^[[1m_______________________ - -# TestMvnReturnsBuilder.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/core/test/test_builders.py", line 74, in test1 - result_bundle = dag_runner.fit() - File "$GIT_ROOT/dataflow/core/dag_runner.py", line 170, in fit - return self._run_dag(method) - File "$GIT_ROOT/dataflow/core/dag_runner.py", line 181, in _run_dag - df_out, info = self._run_dag_helper(method) - File "$GIT_ROOT/dataflow/core/dag_runner.py", line 110, in _run_dag_helper - df_out = self.dag.run_leq_node(nid, method)["df_out"] - File "$GIT_ROOT/dataflow/core/dag.py", line 428, in run_leq_node - self._run_node(id_, pred_nid, method) - File "$GIT_ROOT/dataflow/core/dag.py", line 593, in _run_node - output = getattr(node, method)(**kwargs) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 334, in fit - self._lazy_load(fit=True) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 361, in _lazy_load - rets = self._generate_returns(fit) - File "$GIT_ROOT/dataflow/core/nodes/sources.py", line 354, in _generate_returns - vol = cofinanc.compute_annualized_volatility(avg_rets) -NameError: name 'cofinanc' is not defined - -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test4/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt deleted file mode 100644 index b0f4950ce..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt +++ /dev/null @@ -1,36 +0,0 @@ -amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] - -=================================== FAILURES =================================== -__________________________ TestE8c_ModelBuilder.test1 __________________________ -Traceback (most recent call last): - File "/app/dataflow/pipelines/E8/test/test_E8c_pipeline.py", line 79, in test1 - self.check_string(actual) - File "/app/amp/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "/app/amp/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '/app/dataflow/pipelines/E8/test/TestE8c_ModelBuilder.test1/output/test.txt.tmp' -################################################################################ - -__________________________ TestE8a_ModelBuilder.test1 __________________________ -Traceback (most recent call last): - File "/app/dataflow/pipelines/E8/test/test_E8a_pipeline.py", line 72, in test1 - self.check_string(actual) - File "/app/amp/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "/app/amp/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '/app/dataflow/pipelines/E8/test/TestE8a_ModelBuilder.test1/output/test.txt.tmp' -################################################################################ - -============================= slowest 3 durations ============================== -10.36s call dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 -7.77s call dataflow/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit -7.31s call dataflow/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit -=========================== short test summary info ============================ -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_ModelBuilder::test1 -Run_fast_tests Run fast tests 2022-02-19T16:53:07.1119288Z FAILED dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt deleted file mode 100644 index 063e0af62..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/output/test.txt +++ /dev/null @@ -1,36 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 2 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_ModelBuilder::test1 -pytest dataflow/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_ModelBuilder::test1 - -# TestE8a_ModelBuilder.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/pipelines/E8/test/test_E8a_pipeline.py", line 72, in test1 - self.check_string(actual) - File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow/pipelines/E8/test/TestE8a_ModelBuilder.test1/output/test.txt.tmp' -################################################################################ - -# TestE8c_ModelBuilder.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/pipelines/E8/test/test_E8c_pipeline.py", line 79, in test1 - self.check_string(actual) - File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow/pipelines/E8/test/TestE8c_ModelBuilder.test1/output/test.txt.tmp' -################################################################################ - -________________________ - -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test5/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt deleted file mode 100644 index a2ee5ad54..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt +++ /dev/null @@ -1,2533 +0,0 @@ -INFO: > cmd='/local/home/gsaggese/src/venv/amp.client_venv/bin/invoke run_fast_slow_superslow_tests' ->>ENV<<: is_inside_container=False: code_version=1.0.3, container_version=None, is_inside_docker=False, is_inside_ci=False, CI_defined=False, CSFY_CI='nan' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=False -## run_fast_slow_superslow_tests:  -## run_fast_tests:  -15:12:49 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"' -IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"'  -WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. -WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. -WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. -WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. -Creating compose_app_run ... - - -Creating compose_app_run ... done -##> devops/docker_run/entrypoint.sh -UID=0 -GID=0 -# Activate environment -##> devops/docker_build/entrypoint/patch_environment_variables.sh -# Set PATH -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -# Set PYTHONPATH -PYTHONPATH=/app/amp:/app: -# Configure env -Testing sudo -/app -Setting up Docker -{ "storage-driver": "vfs" } - * Starting Docker: docker  -[ OK ] - * Docker is running -# Check AWS authentication setup -AWS_DEFAULT_REGION='us-east-1' - Name Value Type Location - ---- ----- ---- -------- - profile am manual --profile -access_key ****************3J32 shared-credentials-file -secret_key ****************QpHW shared-credentials-file - region us-east-1 env AWS_DEFAULT_REGION -CONTAINER_VERSION='' -BUILD_TAG='' -which python: /venv/bin/python -python -V: Python 3.8.10 -docker -v: Docker version 20.10.12, build e91ed57 -docker-compose -v: docker-compose version 1.25.0, build unknown -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -PYTHONPATH=/app/amp:/app: -entrypoint.sh: 'pytest -m "not slow and not superslow" . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun "Failed: Timeout"' -============================= test session starts ============================== -platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 -cachedir: .pytest_cache -rootdir: /app, configfile: pytest.ini -plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 -timeout: 5.0s -timeout method: signal -timeout func_only: True -collecting ...  -collecting 0 items  -collecting 0 items  -collecting 67 items  -collecting 70 items  -collecting 230 items  -collecting 548 items  -collecting 622 items  -collecting 801 items  -collecting 1084 items  -collecting 1419 items  -collecting 1775 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True ------------------------------------------------------------------------------ -This code is not in sync with the container: -code_version='1.0.3' != container_version='amp-1.0.3' ------------------------------------------------------------------------------ -You need to: -- merge origin/master into your branch with `invoke git_merge_master` -- pull the latest container with `invoke docker_pull` -# Git - branch_name='AmpTask2163_Implement_tiled_backtesting_5' - hash='29bdaf1' - # Last commits: - * 29bdaf1 saggese Lint ( 3 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) - * c26c937 saggese Checkpoint ( 3 minutes ago) Mon Mar 7 20:09:34 2022 - * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) -# Machine info - system=Linux - node name=5f6da4732626 - release=3.10.0-1160.36.2.el7.x86_64 - version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 - machine=x86_64 - processor=x86_64 - cpu count=8 - cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) - memory=svmem(total=66548252672, available=51710918656, percent=22.3, used=11804581888, free=14433091584, active=30353010688, inactive=18354896896, buffers=0, cached=40310579200, shared=2491396096, slab=2053443584) - disk usage=sdiskusage(total=107362627584, used=32545419264, free=74817208320, percent=30.3) -# Packages - python: 3.8.10 - gluonnlp: ? - gluonts: 0.6.7 - joblib: 1.1.0 - mxnet: 1.8.0 - numpy: 1.21.1 - pandas: 1.3.4 - pyarrow: 6.0.1 - scipy: 1.6.1 - seaborn: 0.11.2 - sklearn: 1.0.1 - statsmodels: 0.13.1 -INFO: > cmd='/venv/bin/pytest -m not slow and not superslow . -o timeout_func_only=true --timeout 5 --reruns 2 --only-rerun Failed: Timeout' -INFO: Saving log to file 'tmp.pytest.log' - -collected 1874 items / 81 deselected / 1793 selected  - -amp/dataflow/model/test/test_experiment_utils.py::Test_get_configs_from_command_line1::test1 (0.02 s) PASSED [ 0%] -amp/core/finance/test/test_prediction_processing.py::TestStackPredictionDf::test1 (0.03 s) PASSED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call1 SKIPPED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call2 SKIPPED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call3 SKIPPED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_function_call4 SKIPPED [ 0%] -amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_parser SKIPPED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_get_df1 (0.01 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_read_with_filter1 (0.03 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_everything1 (0.02 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_one_column1 (0.02 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestParquet1::test_write_and_read_two_columns1 (0.02 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_merge1 (0.08 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read1 (0.05 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read2 (0.06 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read3 (0.03 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestPartitionedParquet1::test_write_and_read4 (0.02 s) PASSED [ 0%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_full1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_half1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_half2 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_invalid1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_invalid2 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_one_year1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_one_year2 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_over_two_years1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestGetParquetFiltersFromTimestampInterval1::test_by_month_two_years1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns2 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns3 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestAddDatePartitionColumns::test_add_date_partition_columns4 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_get_test_data1 (0.00 s) PASSED [ 1%] -amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_to_partitioned_dataset SKIPPED [ 1%] -amp/helpers/test/test_hparquet.py::TestToPartitionedDataset::test_to_partitioned_dataset_wrong_column (0.00 s) PASSED [ 1%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test1 (0.03 s) PASSED [ 1%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test2 (0.02 s) PASSED [ 1%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test3 (0.02 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test4 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test5 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test6 (0.02 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test7 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestContinuousSkLearnModel::test8 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexSkLearnModel::test1 (0.07 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexSkLearnModel::test2 (0.10 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexPooledSkLearnModel::test1 (0.04 s) PASSED [ 2%] -amp/dataflow/core/nodes/test/test_sklearn_models.py::TestMultiindexPooledSkLearnModel::test2 (0.08 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_end_ts_for_symbol1 (0.21 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_get_universe1 (0.00 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data1 (0.05 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data2 (0.07 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data3 (0.08 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data4 (0.08 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_data5 (0.08 s) PASSED [ 2%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_csv_unadjusted_data5 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_parquet_data2 (0.06 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotEquitiesCsvParquetByAssetClient::test_read_parquet_data5 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_metadata1 (0.11 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_get_universe1 (0.00 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data1 (0.05 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data2 (0.07 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data3 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data4 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_data5 (0.08 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_csv_expiry_data5 (0.09 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_data2 (0.06 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_data5 (0.07 s) PASSED [ 3%] -amp/datapull/kibot/data/client/test/test_kibot_clients.py::TestKibotFuturesCsvParquetByAssetClient::test_read_parquet_expiry_data5 (0.07 s) PASSED [ 3%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_mixed_constraints SKIPPED [ 3%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_only_gmv_constraint SKIPPED [ 3%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_restrictions SKIPPED [ 4%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer1::test_short_ban SKIPPED [ 4%] -amp/optimizer/test/test_single_period_optimization.py::Test_SinglePeriodOptimizer2::test1 SKIPPED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse1 (0.00 s) PASSED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse2 (0.38 s) PASSED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse3 (0.00 s) PASSED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse4 (0.00 s) PASSED [ 4%] -amp/helpers/test/test_traceback.py::Test_Traceback1::test_parse_empty_traceback1 (0.00 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestGhLogin1::test_gh_login (0.23 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_images_ls_repo (0.56 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_kill_all SKIPPED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_kill_last SKIPPED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_ps (0.21 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_docker_stats SKIPPED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean (0.22 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_fetch_master (0.22 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_pull (0.21 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_images_ls_repo (0.36 s) PASSED [ 4%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_kill_all SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_kill_last SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_login (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_ps (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_pull (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_docker_stats SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_find_test_class1 (0.14 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr1 SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr2 SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_create_pr3 SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_issue_title (0.42 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_gh_workflow_list SKIPPED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_files (0.15 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_clean (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_clean2 (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create3 (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_fetch_master (0.00 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_merge_master (0.08 s) PASSED [ 5%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_pull (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint1 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint2 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_lint3 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_print_setup (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title1 (0.47 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title3 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasks1::test_get_gh_issue_title4 (0.44 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRemoveSpaces1::test1 (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash1 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash2 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash3 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash4 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_bash5 (0.02 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGetDockerCmd1::test_docker_jupyter1 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests1 (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests2 (0.00 s) PASSED [ 6%] -amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests4 SKIPPED [ 6%] -amp/helpers/test/test_lib_tasks.py::Test_build_run_command_line1::test_run_fast_tests5 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class1 (0.14 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class2 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_class3 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_decorator1 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_decorator2 SKIPPED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_files1 (0.09 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksRunTests1::test_find_test_files2 (0.09 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_diff_files_abort1 (0.16 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_branch1 (0.33 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_files1 (0.23 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_last_commit1 (0.37 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::TestLibTasksGitCreatePatch1::test_tar_modified1 (0.44 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_parse_linter_output1::test1 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_parse_linter_output1::test2 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test1 (0.15 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2 (0.14 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert1 (0.00 s) PASSED [ 7%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_assert3 (0.07 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_branch1 SKIPPED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files1 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_files3 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_last_commit1 (0.03 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_get_files_to_process1::test_modified1 (0.07 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_classes1 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_classes2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_files1 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_files2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_tests1 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro1::test_tests2 (0.00 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test1 (0.22 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test2 (0.22 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test3 (0.22 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test4 (0.22 s) PASSED [ 8%] -amp/helpers/test/test_lib_tasks.py::Test_pytest_repro_end_to_end::test5 (0.22 s) PASSED [ 9%] -amp/helpers/test/test_lib_tasks.py::TestFailing::test_failing (0.00 s) PASSED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data2 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input1 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input2 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input3 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data_invalid_input4 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_order_book SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_order_book_invalid_input1 SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_get_exchange_currency_pairs SKIPPED [ 9%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_initialize_class SKIPPED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_1 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_2 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_1tile_3 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_2tiles_1 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_2tiles_2 (0.00 s) PASSED [ 9%] -amp/dataflow/model/test/test_backtest_config.py::Test_build_configs_varying_tiled_periods1::test_3tiles_1 (0.00 s) PASSED [ 9%] -amp/helpers/test/test_hpandas.py::Test_dassert_is_unique1::test_dassert_is_unique1 (0.00 s) PASSED [ 9%] -amp/helpers/test/test_hpandas.py::Test_dassert_is_unique1::test_dassert_is_unique2 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_to_series1::test1 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_to_series1::test2 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_to_series1::test3 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df1 (0.02 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df2 (0.02 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df3 (0.02 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_trim_df4 (0.01 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types1 (0.01 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types2 (0.01 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::Test_trim_df1::test_types3 (0.01 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str1 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str2 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str3 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str4 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDfToStr::test_df_to_str5 (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_datetime (0.00 s) PASSED [ 10%] -amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_timestamp (0.00 s) PASSED [ 11%] -amp/helpers/test/test_hpandas.py::TestDataframeToJson::test_dataframe_to_json_uuid (0.00 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_get_universe1 (0.00 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data2 (0.06 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data3 (0.07 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data4 (0.06 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data5 (0.07 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCsvClient1::test_read_data6 (0.00 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_end_ts_for_symbol1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_start_ts_for_symbol1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_get_universe1 (0.00 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data1 (0.04 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data2 (0.06 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data3 (0.07 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data4 (0.06 s) PASSED [ 11%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data5 (0.07 s) PASSED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtPqByAssetClient1::test_read_data6 (0.00 s) PASSED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_end_ts_for_symbol1 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_start_ts_for_symbol1 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_get_universe1 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data1 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data2 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data3 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data4 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data5 SKIPPED [ 12%] -amp/datapull/ccxt/data/client/test/test_ccxt_clients.py::TestCcxtCddDbClient1::test_read_data6 SKIPPED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_infs (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_zero_in_bin_interior_false (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_get_symmetric_equisized_bins::test_zero_in_bin_interior_true (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_digitize1::test1 (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_digitize1::test_heaviside1 (0.00 s) PASSED [ 12%] -amp/core/signal_processing/test/test_misc_transformations.py::Test_compute_weighted_sum1::test1 (0.00 s) PASSED [ 12%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal1 (0.00 s) PASSED [ 12%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal5 (0.03 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_equal_fuzzy_match1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_not_equal1 (0.03 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_assert_not_equal2 (0.04 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir2 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir3 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_input_dir4 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_output_dir1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_s3_scratch_dir1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_s3_scratch_dir2 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space2 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::TestTestCase1::test_get_scratch_space3 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_equal1 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_equal2 (0.00 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_not_equal1 (0.03 s) PASSED [ 13%] -amp/helpers/test/test_unit_test.py::Test_AssertEqual1::test_not_equal_debug SKIPPED [ 13%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string1 (0.01 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing1 (0.01 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing2 (0.00 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_missing3 (0.15 s) (WARNING: Test was updated) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal1 (0.04 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal2 (0.03 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckString1::test_check_string_not_equal3 (0.04 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal1 (0.02 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal2 (0.02 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_equal3 (0.02 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing1 -WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_missing1/output/test_df.txt'(0.02 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing2 (0.01 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_missing3 -WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_missing3/output/test_df.txt'(0.15 s) (WARNING: Test was updated) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal1 (0.06 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal2 (0.05 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal3 -WARNING: Update golden outcome file '/app/amp/helpers/test/TestCheckDataFrame1.test_check_df_not_equal3/output/test_df.txt'(0.03 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::TestCheckDataFrame1::test_check_df_not_equal4 (0.05 s) PASSED [ 14%] -amp/helpers/test/test_unit_test.py::Test_check_string_debug1::test1 (0.16 s) (WARNING: Test was updated) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_check_string_debug1::test2 -WARNING: Update golden outcome file '/app/amp/helpers/test/Test_check_string_debug1.test2/output/test_df.txt'(0.15 s) (WARNING: Test was updated) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_unit_test1::test_purify_txt_from_client1 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_unit_test1::test_purify_txt_from_client2 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::TestSubsetDf1::test1 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_get_dir_signature1::test1 (0.02 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_get_dir_signature1::test2 (0.02 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test1 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test2 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_txt_from_client1::test3 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test1 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test2 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_object_reference1::test3 (0.00 s) PASSED [ 15%] -amp/helpers/test/test_unit_test.py::Test_purify_amp_reference1::test1 (0.00 s) PASSED [ 15%] -amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeReturnPipeline1::test1 (0.47 s) PASSED [ 15%] -amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimePipelineWithOms1::test1 (0.98 s) PASSED [ 15%] -amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms1::test1 SKIPPED [ 15%] -amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py::TestRealTimeMvnReturnsWithOms2::test1 SKIPPED [ 15%] -amp/datapull/common/data/transform/test/test_convert_csv_to_pq.py::TestCsvToPq::test_csv_to_pq_script SKIPPED [ 16%] -dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 (0.00 s) FAILED [ 16%] -research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit (6.31 s) RERUN [ 16%] -research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit (1.61 s) PASSED [ 16%] -research/RH2E/test/test_RH2E_pipeline.py::TestRH2E_DagBuilder::test1 (4.68 s) PASSED [ 16%] -research/RH2E/test/test_RH2Ec_pipeline.py::TestRH2Ec_DagBuilder::test1 (0.18 s) PASSED [ 16%] -research/RH2E/test/test_RH2Ed_pipeline.py::TestRH2Ed_DagBuilder::test1 (0.19 s) PASSED [ 16%] -research/RH2E/test/test_RH2Ee_pipeline.py::TestRH2Ee_DagBuilder::test1 (0.39 s) PASSED [ 16%] -research/RH2E/test/test_RH2Ef_pipeline.py::TestRH2Ef_DagBuilder::test1 (4.61 s) PASSED [ 16%] -research/RH2E/test/test_RH2Eg_pipeline.py::TestRH2Eg_DagBuilder::test1 (3.68 s) PASSED [ 16%] -research/RH1E/test/test_RH1E_pipeline.py::TestRH1E_DagBuilder::test1 (2.02 s) PASSED [ 16%] -research/RH1E/test/test_RH1Eb_pipeline.py::TestRH1Eb_DagBuilder::test1 (0.17 s) PASSED [ 16%] -research/RH1E/test/test_RH1Eb_pipeline.py::TestRH1Eb_DagBuilder::test2 (1.97 s) PASSED [ 16%] -oms_lime/test/test_eg_broker.py::TestEgBroker1::test_place_order1 (0.94 s) PASSED [ 16%] -oms_lime/test/test_eg_portfolio_example.py::TestEgPortfolioExample1::test_get_eg_portfolio_example1 (1.74 s) PASSED [ 16%] -oms_lime/test/test_eg_portfolio_example.py::TestEgPortfolioExample1::test_get_eg_portfolio_example2 (0.06 s) PASSED [ 16%] -oms_lime/test/test_eg_restrictions.py::TestEgRestrictions1::test_get_trading_restrictions (0.02 s) PASSED [ 16%] -dataflow_lime/system/test/test_E8d_replayed_system_runner.py::TestReplayedE8dWithMockedOms1::test_save_data SKIPPED [ 16%] -dataflow_lime/pipelines/E8/test/test_E8a_pipeline.py::TestE8a_DagBuilder::test1 (0.47 s) PASSED [ 16%] -dataflow_lime/pipelines/E8/test/test_E8c_pipeline.py::TestE8c_DagBuilder::test1 (4.82 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (5.13 s) RERUN [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (6.41 s) RERUN [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit (2.90 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_predict (0.79 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution (6.48 s) RERUN [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution (4.11 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder3::test_get_dag1 (0.01 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder3::test_get_dag2 (0.01 s) PASSED [ 17%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder4::test_fit (2.90 s) PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove1 PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove2 PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove3 PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove4 PASSED [ 17%] -amp/core/plotting/test/test_correlation.py::Test_select_series_to_remove::test_select_series_to_remove5 PASSED [ 17%] -amp/oms/test/test_oms_db.py::TestOmsDbRemoveAllTables1::test1 SKIPPED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio1::test_state (0.02 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics1 (0.02 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics2 (0.09 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_get_historical_statistics3 (0.01 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_initialization_with_cash1 (0.02 s) PASSED [ 17%] -amp/oms/test/test_portfolio.py::TestDataFramePortfolio2::test_initialization_with_holdings1 (0.08 s) PASSED [ 18%] -amp/oms/test/test_portfolio.py::TestMockedPortfolio1::test1 SKIPPED [ 18%] -amp/oms/test/test_portfolio.py::TestMockedPortfolio1::test2 SKIPPED [ 18%] -amp/oms/test/test_portfolio.py::TestMockedPortfolio2::test1 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestSimulatedProcessForecasts1::test_initialization1 (0.63 s) PASSED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts1::test_mocked_system1 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system1 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system2 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system3 SKIPPED [ 18%] -amp/oms/test/test_process_forecasts.py::TestMockedProcessForecasts2::test_mocked_system4 SKIPPED [ 18%] -amp/oms/test/test_restrictions.py::TestRestrictions1::test1 SKIPPED [ 18%] -amp/oms/test/test_restrictions.py::TestRestrictions1::test2 SKIPPED [ 18%] -amp/dataflow/system/test/test_real_time_dag_adapter.py::TestRealtimeDagAdapter1::testMvnReturnsBuilder1 (0.05 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes1 (0.01 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes2 (0.01 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes3 (0.01 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes4 (0.02 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG1::test_add_nodes5 (0.13 s) PASSED [ 18%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes1 (0.02 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes10 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes2 (0.02 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes3 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes4 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes5 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes6 (0.02 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes7 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes8 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG2::test_connect_nodes9 (0.01 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks1 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks2 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag.py::Test_dataflow_core_DAG3::test_sources_sinks3 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag_adapter.py::TestDagAdapter1::test1 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_dag_adapter.py::TestDagAdapter1::test2 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_from_config1 (0.01 s) PASSED [ 19%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_get_columns_for_tag1 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_get_tags_for_column1 (0.00 s) PASSED [ 19%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_pickle1 (0.05 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_to_config1 (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestResultBundle::test_to_dict_and_back (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_feature_col_names1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags2 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_target_and_prediction_col_names_for_tags3 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_get_targets_and_predictions_for_tags1 (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_prediction_col_names1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_target_col_names1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_result_bundle.py::TestPredictionResultBundle::test_to_config1 (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_runners.py::TestRollingFitPredictDagRunner1::test1 (0.43 s) PASSED [ 20%] -amp/dataflow/core/test/test_runners.py::TestIncrementalDagRunner1::test1 (0.47 s) PASSED [ 20%] -amp/dataflow/core/test/test_utils.py::Test_get_df_info_as_string::test1 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_utils.py::Test_get_df_info_as_string::test2 (0.00 s) PASSED [ 20%] -amp/dataflow/core/test/test_visualization.py::Test_dataflow_core_visualization1::test_draw1 (0.01 s) PASSED [ 20%] -amp/dataflow/core/test/test_visualization.py::Test_dataflow_core_visualization1::test_draw_to_file1 (0.01 s) PASSED [ 20%] -amp/core/plotting/test/test_portfolio_stats.py::Test_plot_portfolio_stats1::test1 PASSED [ 20%] -amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test1 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test2 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_binning.py::TestGetSymmetricNormalQuantiles1::test3 (0.00 s) PASSED [ 21%] -amp/config_root/config/test/test_config_builders.py::TestGetConfigsFromBuilder1::test1 (0.00 s) PASSED [ 21%] -amp/config_root/config/test/test_config_builders.py::TestGetConfigFromEnv::test_no_env_variables (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test1 (0.01 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test2 (0.01 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test3 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_turnover::test4 (0.01 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test1 (0.02 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test2 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_average_holding_period::test3 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test1 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test2 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test3 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::Test_compute_avg_turnover_and_holding_period::test4 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::TestComputeTurn1::test1 (0.00 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::TestMaximizeWeightEntropy1::test1 (0.13 s) PASSED [ 21%] -amp/core/statistics/test/test_turnover.py::TestFindNearestAffinePoint1::test1 (0.01 s) PASSED [ 22%] -research/returns/test/test_dataflow_lime_returns_pipeline.py::TestReturnsPipeline::test1 (0.11 s) PASSED [ 22%] -im_lime/eg/test/test_eg_transform_pq_by_date_to_by_asset.py::TestEgTransformByDateToByTile1::test_transform1 (4.30 s) PASSED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test1 (3.12 s) PASSED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache1 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache2 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache3 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache4 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache5 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader1::test_cache6 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_historical1 (0.70 s) PASSED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_real_time1 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_replayed_time1 SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgSingleInstrumentDataReader2::test_save_data SKIPPED [ 22%] -core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py::TestEgMultipleInstrumentDataReader1::test_historical1 (0.75 s) PASSED [ 22%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_incorrect_datetime (0.00 s) PASSED [ 22%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_integer_datetime (0.00 s) PASSED [ 22%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestConvertTimestampColumn::test_string_datetime (0.00 s) PASSED [ 22%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_index_already_present (0.00 s) PASSED [ 23%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_milliseconds (0.00 s) PASSED [ 23%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_seconds (0.00 s) PASSED [ 23%] -amp/datapull/common/data/transform/test/test_transform_utils.py::TestReindexOnDatetime::test_reindex_on_datetime_wrong_column (0.00 s) PASSED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_end_ts_for_symbol1 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_start_ts_for_symbol1 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_get_universe1 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data1 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data2 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data3 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data4 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data5 SKIPPED [ 23%] -amp/datapull/common/data/client/test/test_historical_pq_clients.py::TestHistoricalPqByTileClient1::test_read_data6 SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_command_line SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_function_call1 SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_function_call2 SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py::TestPqByDateToByAsset1::test_process_chunk SKIPPED [ 23%] -amp/helpers/test/test_lib_tasks_find.py::Test_find_short_import1::test1 (0.00 s) PASSED [ 23%] -amp/helpers/test/test_lib_tasks_find.py::Test_find_func_class_uses1::test1 (0.00 s) PASSED [ 24%] -amp/dataflow/system/test/test_real_time_runner.py::TestRealTimeDagRunner1::test_replayed_time1 SKIPPED [ 24%] -amp/dataflow/system/test/test_real_time_runner.py::TestRealTimeDagRunner1::test_simulated_replayed_time1 (0.46 s) PASSED [ 24%] -amp/dataflow/core/test/test_builders.py::TestArmaReturnsBuilder::test1 (0.16 s) PASSED [ 24%] -amp/dataflow/core/test/test_builders.py::TestArmaReturnsBuilder::test_str1 (0.00 s) PASSED [ 24%] -amp/dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test1 (0.15 s) PASSED [ 24%] -amp/dataflow/core/test/test_builders.py::TestMvnReturnsBuilder::test_str1 (0.00 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_gluonts_models.py::TestDeepARGlobalModel::test_fit1 (1.13 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_gluonts_models.py::TestDeepARGlobalModel::test_fit_dag1 (1.07 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_col_csv1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_col_parquet1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_index_csv1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_datetime_index_parquet1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_filter_dates1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestDiskDataSource::test_filter_dates_open_boundary1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestArmaDataSource::test1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_sources.py::TestMultivariateNormalDataSource::test1 (0.01 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test1 (0.07 s) PASSED [ 24%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test2 (0.02 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test3 (0.07 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test4 (0.08 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSmaModel::test5 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2 (0.13 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3 (0.16 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test01 (0.10 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test02 (0.10 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test03 (0.13 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test04 (0.13 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test05 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test06 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test07 (0.19 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test08 SKIPPED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test09 (0.37 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test10 (0.10 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test11 (0.09 s) PASSED [ 25%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test12 (0.13 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModel::test13 (0.16 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1 (0.20 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2 (0.25 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3 (0.34 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1 (0.01 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2 (0.01 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1 (0.01 s) PASSED [ 26%] -amp/dataflow/core/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test1 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test2 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test3 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test4 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_bet_starts::test5 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test1 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test2 (0.01 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test3 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test4 (0.00 s) PASSED [ 26%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_run_ends::test5 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test1 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test10 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test11 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test12 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test2 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test3 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test4 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test5 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test6 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test7 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test8 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_signed_runs.py::Test_compute_signed_run_lengths::test9 (0.01 s) PASSED [ 27%] -amp/core/statistics/test/test_t_test.py::TestTTest1samp::test1 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_t_test.py::TestTTest1samp::test2 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_t_test.py::TestTTest1samp::test3 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_t_test.py::TestTTest1samp::test4 (0.00 s) PASSED [ 27%] -amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator1::test_high_sample_count (0.02 s) PASSED [ 27%] -amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator1::test_moderate_sample_count (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator2::test_high_sample_count (0.41 s) PASSED [ 28%] -amp/core/statistics/test/test_covariance_shrinkage.py::TestAnalyticalNonlinearShrinkageEstimator2::test_moderate_sample_count (0.20 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test1 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test2 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test3 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test4 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test5 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test6 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeMoments::test7 (0.01 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test1 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test2 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test3 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test4 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test5 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracZero::test6 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test1 (0.00 s) PASSED [ 28%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test2 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test3 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test4 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test5 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeFracNan::test6 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeNumFiniteSamples::test1 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeNumUniqueValues::test1 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeDenominatorAndPackage::test1 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test1 (0.01 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test2 (0.01 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::TestComputeSpecialValueStats::test3 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test1 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test2 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test3 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test4 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test5 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_jensen_ratio::test6 (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_2dof (0.00 s) PASSED [ 29%] -amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_4dof (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_compute_t_distribution_j_2::test_almost_normal (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test1 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test10 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test11 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test12 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test2 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test3 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test5 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test6 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test7 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test8 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::TestComputeZeroDiffProportion::test9 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test1 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test2 (0.01 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test3 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test4 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test5 (0.00 s) PASSED [ 30%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test6 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_descriptive.py::Test_summarize_time_index_info::test7 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeDrawdownCdf::test3 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeNormalizedDrawdownCdf::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeNormalizedDrawdownCdf::test2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdownApproximateCdf::test3 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::TestComputeMaxDrawdown::test3 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::Test_compute_drawdown::test1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::Test_compute_time_under_water::test1 (0.01 s) PASSED [ 31%] -amp/core/statistics/test/test_drawdown.py::Test_compute_time_under_water::test2 (0.01 s) PASSED [ 31%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed1 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed2 (0.00 s) PASSED [ 31%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_equally_distributed3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_exponentially_distributed3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_entropy.py::Test_compute_hill_number::test_scale_invariance1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test4 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_forecastability.py::Test_compute_forecastability::test5 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestGetInterarrivalTime::test3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_interarrival_time.py::TestComputeInterarrivalTimeStats::test3 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test1 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test2 (0.00 s) PASSED [ 32%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test3 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test4 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test5 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_normality.py::TestApplyNormalityTest::test6 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_small_df (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_small_series (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_q_values.py::Test_estimate_q_values::test_user_supplied_pi0 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test0 SKIPPED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test1 (0.38 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients1::test_generate_input_data SKIPPED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test0 SKIPPED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test1 (0.02 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test2 (0.05 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test3 (0.04 s) PASSED [ 33%] -amp/core/statistics/test/test_regression.py::TestComputeRegressionCoefficients2::test_generate_input_data SKIPPED [ 33%] -amp/core/statistics/test/test_requires_statsmodels.py::TestComputeKratio::test1 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_requires_statsmodels.py::TestComputeKratio::test2 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test1 (0.00 s) PASSED [ 33%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 (0.00 s) XFAIL [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test3 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test1 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test2 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test3 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test4 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test5 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test6 (0.02 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 (0.01 s) XFAIL [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test1 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test2 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test3 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test4 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test5 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test6 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test7 (0.01 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyAdfTest::test8 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test1 (0.00 s) PASSED [ 34%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test2 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test3 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test4 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test5 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test6 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test7 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyKpssTest::test8 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test1 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test2 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test3 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test4 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test5 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test6 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test7 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test8 SKIPPED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestApplyLjungBoxTest::test9 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test1 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test2 (0.00 s) PASSED [ 35%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test3 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test4 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test5 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test6 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test7 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test8 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test_nan (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::TestCalculateHitRate::test_smoke (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test1 (0.03 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test2 (0.03 s) PASSED [ 36%] -amp/core/statistics/test/test_requires_statsmodels.py::Test_compute_bet_stats::test3 (0.03 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test1 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test2 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_annualized_return_and_volatility::test3 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test1 (0.01 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test2 (0.01 s) PASSED [ 36%] -amp/core/statistics/test/test_returns_and_volatility.py::Test_compute_returns_per_bet::test3 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeSharpeRatio::test1 (0.00 s) PASSED [ 36%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeSharpeRatioStandardError::test1 (0.00 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test1 (0.02 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test2 (0.09 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatio::test3 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatioStandardError::test1 (0.02 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestComputeAnnualizedSharpeRatioStandardError::test2 (0.09 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_summarize_sharpe_ratio::test1 (0.00 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test1 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test2 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test3 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test4 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_nans1 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_oos_not_from_interval1 (0.00 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::Test_zscore_oos_sharpe_ratio::test_zeros1 (0.01 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestSharpeRatioCorrelationConversion::test1 (0.00 s) PASSED [ 37%] -amp/core/statistics/test/test_sharpe_ratio.py::TestSharpeRatioCorrelationConversion::test2 (0.00 s) PASSED [ 37%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_smooth_derivative1::test1 (0.03 s) PASSED [ 37%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_smooth_moving_average1::test1 (0.01 s) PASSED [ 37%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test1 (0.00 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test2 (0.00 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test3 (0.00 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test4 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test5 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test6 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_extract_smooth_moving_average_weights::test7 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_moment1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_norm1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_var1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_std1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_demean1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_skew1::test1 (0.02 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_kurtosis1::test1 (0.02 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_sharpe_ratio1::test1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_corr1::test1 (0.02 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zcorr1::test1 (0.03 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_atol1 (0.01 s) PASSED [ 38%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_clean1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_inf1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_inf2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_nan1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_nan2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_zero1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_arma_zero2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_default_values1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_default_values2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_atol1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_clean1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_inf1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_inf2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_nan1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_nan2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_zero1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay1_arma_zero2 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_atol1 (0.01 s) PASSED [ 39%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_clean1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_inf1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_inf2 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_nan1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_nan2 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_zero1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_zscore1::test_delay2_arma_zero2 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_ema_smoothing.py::Test_compute_rolling_annualized_sharpe_ratio::test1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test1 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test2 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test3 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test4 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test5 (0.16 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test_compute_ipca::test6 (0.17 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test1 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test2 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test3 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test4 (0.01 s) PASSED [ 40%] -amp/core/signal_processing/test/test_incremental_pca.py::Test__compute_ipca_step::test5 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_nan1 (0.14 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_nan2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_zero1 (0.10 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_set_to_zero2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_winsorize1 (0.11 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::Test_process_outliers1::test_winsorize2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test1 (0.00 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test2 (0.00 s) PASSED [ 41%] -amp/core/signal_processing/test/test_outliers.py::TestProcessNonfinite1::test3 (0.00 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_clean1 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_depth (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode1 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_output_mode3 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode1 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode2 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_get_swt::test_timing_mode3 (0.01 s) PASSED [ 41%] -amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test1 (0.03 s) PASSED [ 42%] -amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test2 (0.03 s) PASSED [ 42%] -amp/core/signal_processing/test/test_swt.py::Test_compute_swt_var::test3 (0.03 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test1 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test2 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test__compute_lagged_cumsum::test_lag_1 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test_correlate_with_lagged_cumsum::test1 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test_correlate_with_lagged_cumsum::test2 (0.01 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test_calculate_inverse::test1 (0.00 s) PASSED [ 42%] -amp/core/signal_processing/test/test_cross_correlation.py::Test_calculate_presudoinverse::test1 (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_set_non_ath_to_nan1::test1 (0.01 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_remove_times_outside_window::test_bypass (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_remove_times_outside_window::test_remove (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_set_weekends_to_nan::test1 (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_set_weekends_to_nan::test2 (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_remove_weekends::test_bypass (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_ablation.py::Test_remove_weekends::test_remove (0.00 s) PASSED [ 42%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_ask_value (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_bid_value (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_centered_order_book_imbalance (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_geometric_mid (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_log_relative_spread (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_mid (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_mid_value (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_order_book_imbalance (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_quoted_spread (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_relative_spread (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_bid_ask.py::Test_process_bid_ask::test_weighted_mid (0.00 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_time_bars1::test1 (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_time_bars1::test2 (0.02 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test1 (0.02 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test2 (0.02 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_resample_ohlcv_bars1::test3 (0.02 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_nans1 (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_nans2 (0.01 s) PASSED [ 43%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_no_nans1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_no_nans2 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_compute_twap_vwap1::test_with_offset (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_endpoints_daily (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_endpoints_intraday (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::TestResamplePortfolioBarMetrics1::test_resampling_invariance (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_business_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_month1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_week1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_day_to_year1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_no_freq_day_to_business_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_business_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_only_minute1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_upsample_business_day_to_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_srs::test_upsample_month_to_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_business_day1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_month1 (0.01 s) PASSED [ 44%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_week1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_day_to_year1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_no_freq_day_to_business_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_business_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_only_minute1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_upsample_business_day_to_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_resampling.py::Test_resample_df::test_upsample_month_to_day1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::TestComputeOvernightReturns::test1 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test1 (0.00 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test2 (0.00 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test3 (0.00 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test4 (0.00 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test5 (0.01 s) PASSED [ 45%] -amp/core/finance/test/test_returns.py::Test_compute_prices_from_rets::test6 (0.00 s) PASSED [ 45%] -amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_config_with_function (0.00 s) PASSED [ 45%] -amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_config_with_object (0.00 s) PASSED [ 45%] -amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_roundtrip_transform1 (0.00 s) PASSED [ 45%] -amp/config_root/config/test/test_config.py::TestFlatConfigSet1::test_set1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_existing_key2 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key2 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key3 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigGet1::test_non_existing_key4 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigIn1::test_in1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestFlatConfigIn1::test_not_in1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key2 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key3 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_existing_key4 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key2 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigGet1::test_non_existing_key3 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key1 (0.00 s) PASSED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key2 SKIPPED [ 46%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key3 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_existing_key4 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigSet1::test_not_existing_key1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config_print1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_config_to_python1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigMisc1::test_roundtrip_transform1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_in1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_in2 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in2 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in3 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigIn1::test_not_in4 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update2 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigUpdate1::test_update3 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigFlatten1::test_flatten1 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestNestedConfigFlatten1::test_flatten2 (0.00 s) PASSED [ 47%] -amp/config_root/config/test/test_config.py::TestSubtractConfig1::test_test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config.py::TestSubtractConfig1::test_test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config.py::TestDassertIsSerializable1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config.py::TestDassertIsSerializable1::test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config.py::TestFromEnvVar1::test1 (0.44 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_validate_configs1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_validate_configs1::test_check_same_configs_error (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_get_config_from_flattened_dict1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_get_config_from_flattened_dict1::test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_get_config_from_nested_dict1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_get_config_from_nested_dict1::test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_intersect_configs1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_intersect_configs1::test_same_config (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_subtract_configs1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_subtract_configs1::test_same_config (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test1 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test2 (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_diff_configs1::test_same_config (0.00 s) PASSED [ 48%] -amp/config_root/config/test/test_config_utils.py::Test_convert_to_dataframe1::test1 (0.01 s) PASSED [ 49%] -amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test1 (0.00 s) PASSED [ 49%] -amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test2 (0.00 s) PASSED [ 49%] -amp/config_root/config/test/test_config_utils.py::Test_build_config_diff_dataframe1::test3 (0.00 s) PASSED [ 49%] -dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test1 (0.47 s) PASSED [ 49%] -dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test2 (0.26 s) PASSED [ 49%] -dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource1::test3 (0.27 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_log_portfolio_read_portfolio (0.10 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_multiday_overnight_returns_injected (0.02 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_1_asset_floating_gmv (0.02 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_1_asset_targeted_gmv (0.02 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_3_assets_floating_gmv (0.03 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_3_assets_targeted_gmv (0.03 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_4_assets_dollar_neutrality_demean (0.04 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_intraday_4_assets_dollar_neutrality_side_preserving (0.04 s) PASSED [ 49%] -amp/dataflow/model/test/test_forecast_evaluator.py::TestForecastEvaluator1::test_to_str_multiday_1_asset_targeted_gmv (0.02 s) PASSED [ 49%] -im_lime/eg/test/test_eg_historical_pq_by_date_taq_bar_client.py::TestEgHistoricalPqByDateTaqBarClient1::test_read_data1 (1.75 s) PASSED [ 49%] -im_lime/eg/test/test_eg_historical_pq_by_date_taq_bar_client.py::TestEgHistoricalPqByDateTaqBarClient1::test_read_data2 (1.82 s) PASSED [ 49%] -market_data_lime/test/test_eg_historical_market_data.py::TestEgHistoricalMarketData1::test_get_data_at_timestamp1 (2.07 s) PASSED [ 50%] -market_data_lime/test/test_eg_historical_market_data.py::TestEgHistoricalMarketData1::test_should_be_online1 (0.00 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_at_timestamp1 (0.15 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval1 SKIPPED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval2 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval3 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval4 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_interval5 (0.15 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period1 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period2 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period3 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period4 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period5 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period6 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_data_for_last_period7 (0.16 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_last_end_time1 (0.06 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_last_price1 (0.23 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_get_twap_price1 (0.15 s) PASSED [ 50%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_is_online1 (0.06 s) PASSED [ 51%] -amp/market_data/test/test_market_data_im_client.py::TestImClientMarketData::test_should_be_online1 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test1 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test2 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test3 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test4 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test5 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test6 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestDassertIsFullSymbolValid::test7 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestParseFullSymbol::test1 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestParseFullSymbol::test2 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestConstructFullSymbol::test1 (0.00 s) PASSED [ 51%] -amp/datapull/common/data/client/test/test_full_symbol.py::TestConstructFullSymbol::test2 (0.00 s) PASSED [ 51%] -amp/datapull/common/universe/test/test_universe_utils.py::TestStringToNumericalId::test1 (0.00 s) PASSED [ 51%] -amp/datapull/common/universe/test/test_universe_utils.py::TestBuildNumericalToStringIdMapping::test1 (0.00 s) PASSED [ 51%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates1 (0.00 s) PASSED [ 51%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates2 (0.00 s) PASSED [ 51%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates3 (0.00 s) PASSED [ 51%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_filter_dates4 (0.00 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils1::test_get_available_dates1 (0.00 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test1 (1.39 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test2 (2.44 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestGetBarData1::test3 (2.81 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval1::test_tsla1 (1.61 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval1::test_tsla2 (1.05 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_convert_string_to_timestamp1 (0.02 s) PASSED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_generate_raw_eg_data SKIPPED [ 52%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::TestTaqBarsUtils2::test_process_bar_data1 (0.02 s) PASSED [ 52%] -vendors_lime/datastream_liquidity/test/test_datastream_liquidity_utils.py::TestDatastreamLiquidityUtils1::test_get_liquidity_data1 (0.82 s) PASSED [ 52%] -vendors_lime/datastream_liquidity/test/test_datastream_liquidity_utils.py::TestDatastreamLiquidityUtils1::test_get_liquidity_data2 (0.74 s) PASSED [ 52%] -research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline1::test_replayed_time1 SKIPPED [ 52%] -research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline1::test_save_data SKIPPED [ 52%] -research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_tiny1 (0.00 s) PASSED [ 52%] -research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v1 (0.00 s) PASSED [ 52%] -research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v2_all (0.20 s) PASSED [ 52%] -research/test/test_dataflow_lime_universe.py::TestEgUniverse1::test_v2_top100 (0.08 s) PASSED [ 52%] -oms_lime/test/test_eg_portfolio.py::TestEgPortfolio1::test_send_orders1 SKIPPED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData1::test_should_be_online1 (0.02 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_data1 (0.04 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_data3 (0.05 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_last_end_time1 (0.01 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_is_online1 (0.02 s) PASSED [ 53%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_sql_get_query1 (0.01 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData1::test_save_market_data1 SKIPPED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data1 (0.19 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_at_timestamp1 (0.19 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_at_timestamp2 (0.17 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_get_data_for_interval1 (0.18 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_print_info_for_serialized_data1 SKIPPED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData2::test_round_trip1 (0.16 s) PASSED [ 53%] -market_data_lime/test/test_eg_replayed_market_data.py::TestEgReplayedMarketData3::test_get_data1 (0.50 s) PASSED [ 53%] -market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period1 (0.00 s) SKIPPED [ 53%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data1 (0.08 s) PASSED [ 53%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data2 (0.08 s) PASSED [ 53%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data3 (0.12 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data4 (0.12 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data5 (0.12 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_0 (0.02 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_1 (0.07 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_3 (0.08 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_6 (0.08 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData1::test_get_data_for_minute_63 (0.11 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_at_timestamp1 (0.04 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_at_timestamp2 (0.02 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_for_interval1 (0.03 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData2::test_get_data_for_interval2 (0.04 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_get_last_end_time1 (0.03 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available1 (0.07 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available2 (0.05 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData3::test_is_last_bar_available3 (0.84 s) PASSED [ 54%] -amp/market_data/test/test_replayed_market_data.py::TestReplayedMarketData4::test_is_last_bar_available1 (0.08 s) PASSED [ 54%] -amp/dataflow/model/test/test_stats_computer.py::TestStatsComputer1::test_compute_portfolio_stats1 (0.04 s) PASSED [ 54%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_bash SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_cmd1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_images_ls_repo1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_jupyter1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_login1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_ps SKIPPED (T...) [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_docker_stats SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_list SKIPPED (Test n...) [ 55%] -amp/test/test_tasks.py::TestExecuteTasks1::test_print_setup1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_collect_only2 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_docker_build_local_image SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_docker_build_prod_image SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_docker_jupyter1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_docker_pull1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_lint1 SKIPPED (Test ...) [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_run_blank_tests1 SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_run_fast_tests SKIPPED [ 55%] -amp/test/test_tasks.py::TestExecuteTasks2::test_run_fast_tests_failed SKIPPED [ 55%] -amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order1 SKIPPED [ 56%] -amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order2 SKIPPED [ 56%] -amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order_and_timeout1 SKIPPED [ 56%] -amp/oms/test/test_order_processor.py::TestOrderProcessor1::test_submit_order_and_timeout2 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerCmd::test1 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerCmd::test2 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerDown::test1 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerDown::test2 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerUp::test1 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetImDockerUp::test2 (0.00 s) PASSED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test1 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test2 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test3 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetCreateDbCmd::test4 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test1 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test2 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestGetRemoveDbCmd::test3 SKIPPED [ 56%] -amp/datapull/test/test_im_lib_tasks.py::TestImDockerCmd::test1 SKIPPED [ 56%] -amp/datapull/ccxt/universe/test/test_universe.py::TestGetUniverse::test_get_universe1 (0.00 s) PASSED [ 57%] -amp/datapull/ccxt/universe/test/test_universe.py::TestGetUniverse::test_get_universe2 (0.00 s) PASSED [ 57%] -amp/datapull/ccxt/universe/test/test_universe.py::TestGetVendorUniverse::test1 (0.00 s) PASSED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_exchange_id1 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_exchange_id2 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_symbol_id1 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_symbol_id2 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_trade_symbol_id1 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_get_trade_symbol_id2 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data1 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data2 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data3 SKIPPED [ 57%] -amp/im/kibot/data/load/test/test_sql_data_loader.py::TestSqlDataLoader1::test_read_data4 SKIPPED [ 57%] -amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test1 (0.00 s) PASSED [ 57%] -amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test2 (0.00 s) PASSED [ 57%] -amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test3 (0.00 s) PASSED [ 57%] -amp/helpers/test/test_playback.py::TestJsonRoundtrip1::test4 (0.00 s) PASSED [ 57%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test1 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test10 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test11 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test12 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test13 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test14 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test15 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test16 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test17 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test18 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test2 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test3 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test4 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test5 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test6 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test7 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test8 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestPlaybackInputOutput1::test9 (0.00 s) PASSED [ 58%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_config1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_dataseries1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_df1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_dict1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_float1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_float2 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_float3 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_int1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_int2 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_int3 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_list1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_str1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_str2 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestToPythonCode1::test_str3 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestPlaybackFilePath1::test1 (0.00 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test1 (0.50 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test2 (0.50 s) PASSED [ 59%] -amp/helpers/test/test_playback.py::TestPlaybackFileMode1::test3 (0.49 s) PASSED [ 59%] -amp/helpers/test/test_printing.py::Test_printing1::test_color_highlight1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test2 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test3 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test4 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test5 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_to_str1::test6 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_log::test2 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_log::test3 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_log::test4 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_sort_dictionary::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_indent1::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_dedent1::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_dedent1::test2 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_dedent1::test_roundtrip1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_align_on_left1::test1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_logging1::test_log_frame1 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_logging1::test_log_frame2 (0.00 s) PASSED [ 60%] -amp/helpers/test/test_printing.py::Test_logging1::test_log_frame3 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test1 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test2 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test3 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test4 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test5 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test6 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system1::test7 (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system2::test_get_os_name (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system2::test_get_server_name (0.05 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_system2::test_get_user_name (0.10 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test1 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test2 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_compute_file_signature1::test3 (0.00 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test1 (0.17 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test2 (0.18 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test3 (0.18 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test4 (0.18 s) PASSED [ 61%] -amp/helpers/test/test_system_interaction.py::Test_find_file_with_dir1::test5 (0.18 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_Linux_commands1::test_du1 (0.19 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_not_timestamp1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp2 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp3 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp4 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_has_timestamp1::test_has_timestamp5 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test2 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test_no_timestamp1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_system_interaction.py::Test_append_timestamp_tag1::test_no_timestamp2 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestCacheFunctions::test_get_cache_name1 (0.00 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_changed_function (0.12 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_redefined_function (0.12 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching1 (0.30 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching2 (0.30 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching3 (0.30 s) PASSED [ 62%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching4 (0.32 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching5 (0.29 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_disk_reset (0.39 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_mem_reset (0.40 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_with_caching_mem_reset2 (0.43 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestGlobalCache1::test_without_caching1 (0.00 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestFunctionSpecificCache1::test_with_caching1 (0.70 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestFunctionSpecificCache1::test_with_caching2 (0.64 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCachePerformance::test_performance_dataframe (0.17 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCachePerformance::test_performance_series (0.16 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheDecorator::test_decorated_function (0.11 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheDecorator::test_decorated_function_no_mem (0.11 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestAmpTask1407::test1 (0.10 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestAmpTask1407::test2 (0.11 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCachingOnS3::test_with_caching1 SKIPPED [ 63%] -amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_disk_cache1 (0.33 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_mem_cache1 (0.32 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheEnableReadOnly1::test_mem_disk_cache1 (0.32 s) PASSED [ 63%] -amp/helpers/test/test_cache.py::TestCacheUpdateFunction1::test1 (0.01 s) PASSED [ 64%] -amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_disk_cache1 (0.32 s) PASSED [ 64%] -amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_mem_cache1 (0.32 s) PASSED [ 64%] -amp/helpers/test/test_cache.py::TestCacheEnableCheckOnlyIfPresent1::test_mem_disk_cache1 (0.33 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test1 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test2 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test3 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test4 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test5 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test6 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert1::test7 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test1 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test2 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test3 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test4 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_eq1::test5 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_eq_all1 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_eq_all2 (0.00 s) PASSED [ 64%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_in1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_in2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance3 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance4 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_instance5 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted3 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_sorted4 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_subset1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_is_subset2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_no_duplicates1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_no_duplicates2 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_not_intersection1 (0.00 s) PASSED [ 65%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_not_intersection2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_set_eq1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_misc1::test_set_eq2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_lgt1::test3 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test3 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert3 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_is_proportion1::test_assert4 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert2 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_container_type1::test_assert3 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test1 (0.00 s) PASSED [ 66%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_fail1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man2 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man_fail1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_issubclass1::test_man_fail2 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_callable1::test1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_dbg.py::Test_dassert_callable1::test_fail1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_branch_name1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_client_root1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_client_root2 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_path_from_supermodule1 (0.11 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_project_dirname1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_get_submodule_paths1 (0.05 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_is_amp (0.11 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule1::test_is_inside_submodule1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_get_head_hash1 (0.05 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_get_remote_head_hash1 (0.05 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes1 (0.00 s) PASSED [ 67%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes2 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_submodule2::test_group_hashes3 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_all_repo_names1 (0.10 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_all_repo_names2 (0.11 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_client1 (0.05 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_client2 (0.05 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_dirname1 (0.05 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_full_name_from_dirname2 (0.05 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name1 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name2 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name4 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_repo_name_rountrip1 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_get_task_prefix_from_repo_short_name1 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name1 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name2 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name3 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_repo_name1::test_parse_github_repo_name4 (0.00 s) PASSED [ 68%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root1 SKIPPED [ 68%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root2 SKIPPED [ 69%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root3 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root4 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_path1::test_get_path_from_git_root5 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files1 (0.11 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_modified_files_in_branch1 (0.05 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_previous_committed_files1 (0.07 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_get_summary_files_in_branch1 (0.47 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_git_modified_files1::test_git_log1 (0.07 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test1 (0.13 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test2 (0.13 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test3 (0.17 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test4 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_find_docker_file1::test5 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_docker_base_image_name1 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_host_name1 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_get_repo_map1 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_git.py::Test_execute_repo_config_code1::test_has_didn_support1 (0.00 s) PASSED [ 69%] -amp/helpers/test/test_hasyncio.py::Test_hasyncio1::test_real_time1 (1.00 s) PASSED [ 70%] -amp/helpers/test/test_hasyncio.py::Test_hasyncio1::test_simulated_time1 (0.00 s) PASSED [ 70%] -amp/helpers/test/test_hlogging.py::Test_logging1::test_logging_levels1 (0.00 s) PASSED [ 70%] -amp/helpers/test/test_hlogging.py::Test_hlogging_asyncio1::test_real_time1 (1.00 s) PASSED [ 70%] -amp/helpers/test/test_hlogging.py::Test_hlogging_asyncio1::test_simulated_time1 (0.00 s) PASSED [ 70%] -amp/helpers/test/test_io_.py::Test_find_all_files1::test1 (0.20 s) PASSED [ 70%] -amp/helpers/test/test_io_.py::Test_change_filename_extension1::test1 (0.00 s) PASSED [ 70%] -amp/helpers/test/test_io_.py::Test_load_df_from_json::test1 (0.01 s) PASSED [ 70%] -amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_get_file_name1 (0.00 s) PASSED [ 70%] -amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_get_file_name2 (0.00 s) PASSED [ 70%] -amp/dev_scripts/test/test_amp_dev_scripts.py::Test_url_py1::test_run1 (0.36 s) PASSED [ 70%] -amp/dev_scripts/test/test_amp_dev_scripts.py::Test_env1::test_get_system_signature1 (0.16 s) PASSED [ 70%] -amp/dev_scripts/infra/test/test_all.py::Test_ssh_tunnel::test1 SKIPPED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_caesar1 (0.00 s) PASSED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_author1 SKIPPED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_file_size1 (0.28 s) PASSED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_master1 (0.05 s) PASSED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_check_words_in_text1 (0.00 s) PASSED [ 70%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex1 (0.00 s) PASSED [ 71%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex2 (0.00 s) PASSED [ 71%] -amp/dev_scripts/git/git_hooks/test/test_install_hooks.py::Test_git_hooks_utils1::test_regex3 (0.00 s) PASSED [ 71%] -amp/dataflow/model/test/test_forecast_mixer.py::TestForecastMixer1::test_generate_portfolio_bar_metrics_df (0.05 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_model_selection1 (1.77 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_model_return_correlation1 (0.28 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_multiple_tests_adjustment1 (0.18 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_positions1 (0.43 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_rets_and_vol1 (0.71 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_rets_signal_analysis1 (0.58 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_return_correlation1 (0.32 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_returns_and_predictions1 (1.13 s) PASSED [ 71%] -amp/dataflow/model/test/test_model_plotter.py::TestModelPlotter1::test_plot_sharpe_ratio_panel1 (0.40 s) PASSED [ 71%] -amp/dataflow/model/test/test_regression_analyzer.py::TestRegressionAnalyzer1::test_compute_moments (0.06 s) PASSED [ 71%] -amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_replayed_time1 (0.00 s) PASSED [ 71%] -amp/core/test/test_real_time.py::TestReplayedTime1::test1 (0.00 s) PASSED [ 71%] -amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_real_time1 (3.03 s) PASSED [ 71%] -amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_replayed_time1 (4.01 s) PASSED [ 72%] -amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_simulated_replayed_time1 (0.00 s) PASSED [ 72%] -amp/core/test/test_real_time.py::Test_execute_with_real_time_loop1::test_simulated_time1 (0.00 s) PASSED [ 72%] -amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test1 (0.40 s) PASSED [ 72%] -amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test2 (0.39 s) PASSED [ 72%] -amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test3 (0.39 s) PASSED [ 72%] -amp/research_amp/cc/test/test_detect_outliers.py::TestDetectOutliers::test4 (0.39 s) PASSED [ 72%] -amp/optimizer/test/test_utils.py::Test_compute_tangency_portfolio::test_precision_equivalency (0.00 s) PASSED [ 72%] -amp/optimizer/test/test_utils.py::Test_compute_tangency_portfolio::test_toy_case (0.00 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_get_data1 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_get_twap_price1 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread1 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread2 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread3 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_full_spread4 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint1 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint2 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_midpoint3 (0.01 s) PASSED [ 72%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread1 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread2 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread3 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread4 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread5 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_partial_spread6 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price1 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price2 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulatorFunctions1::test_order_price3 (0.01 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test1 (0.06 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random1 (0.06 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random2 (0.08 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator1::test_random3 (0.10 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test1 (0.03 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test2 (0.03 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test3 (0.07 s) PASSED [ 73%] -amp/oms/test/test_pnl_simulator.py::TestPnlSimulator2::test_perf1 SKIPPED [ 73%] -amp/oms/test/test_api.py::Test_Contract1::test1 (0.00 s) PASSED [ 73%] -amp/oms/test/test_api.py::Test_Contract1::test_cmp1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Contract1::test_cmp2 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Order1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_OrderStatus1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Trade1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_cmp1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_cmp2 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_diff1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_diff2 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_Position1::test_diff3 (0.00 s) PASSED [ 74%] -amp/oms/test/test_api.py::Test_OMS1::test1 SKIPPED (unconditional skip) [ 74%] -amp/oms/test/test_api.py::Test_OMS1::test2 SKIPPED (unconditional skip) [ 74%] -amp/oms/test/test_broker.py::TestSimulatedBroker1::test_submit_and_fill1 (0.05 s) PASSED [ 74%] -amp/oms/test/test_broker.py::TestMockedBroker1::test1 SKIPPED (Need ...) [ 74%] -amp/oms/test/test_order.py::TestOrder1::test1 (0.00 s) PASSED [ 74%] -amp/oms/test/test_order.py::TestOrders1::test1 (0.00 s) PASSED [ 74%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_exchange_exist1 SKIPPED [ 74%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_symbol_exist1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_ensure_trade_symbol_exist1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_get_remaining_data_to_load SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_daily_data1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_daily_data_with_holes SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_minute_data1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_bulk_minute_data_with_holes SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_daily_data1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_minute_data1 SKIPPED [ 75%] -amp/im/kibot/test/test_kibot_sql_writer_backend.py::TestSqlWriterBackend1::test_insert_tick_data1 SKIPPED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract1 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract2 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_contract3 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol1 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol2 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol3 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_contract_symbol_mapper.py::TestContractSymbolMapper::test_get_kibot_symbol4 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract1 (0.00 s) PASSED [ 75%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract2 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract3 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract4 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract5 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract6 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_compare_expiry_contract7 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract1 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract2 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_parse_expiry_contract3 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract1 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract2 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract3 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_expiry_contract_mapper.py::TestExpiryContractMapper::test_sort_expiry_contract4 (0.00 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contract_slow1 (0.66 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts1 (0.06 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts2 (0.06 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_expiry_contracts3 (0.06 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures1 (0.05 s) PASSED [ 76%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures3 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures4 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures5 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures6 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures_slow1 (0.39 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_futures_slow2 (0.39 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata1 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata2 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata3 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata4 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata5 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow1 (0.38 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow2 (0.40 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_metadata_slow3 (0.39 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_zero_element1 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_get_zero_element2 (0.05 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_kibot_hardcoded_contract_lifetime_computer1 (0.00 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_kibot_metadata.py::TestKibotMetadata::test_kibot_hardcoded_contract_lifetime_computer2 (0.00 s) PASSED [ 77%] -amp/im/kibot/metadata/test/test_load.py::TestTickerListLoader::test_parsing_logic (0.00 s) PASSED [ 78%] -amp/im/kibot/metadata/test/test_load.py::TestTickerListLoader::test_real_call SKIPPED [ 78%] -amp/im/kibot/metadata/test/test_load.py::TestAdjustmentsLoader::test_real_call SKIPPED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_etfs (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_forex (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_futures (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_all_stocks (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractAssetClass::test_sp500 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_daily (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_minutely (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractFrequency::test_tick (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractContractType::test_continuous (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_dataset_name_parser.py::TestDatasetNameParserExtractContractType::test_expiry (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test1 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test10 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test11 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test12 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test13 (0.00 s) PASSED [ 78%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test14 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test2 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test3 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test4 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test5 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test6 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test7 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test8 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_file_path_generator.py::TestFilePathGenerator::test9 (0.00 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_s3_data_loader.py::TestKibotS3DataLoader::test1 (0.17 s) PASSED [ 79%] -amp/im/kibot/data/load/test/test_s3_data_loader.py::TestKibotS3DataLoader::test_read_data_with_start_end_ts SKIPPED [ 79%] -amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_daily_data_from_s3_1 SKIPPED [ 79%] -amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_daily_data_from_s3_2 SKIPPED [ 79%] -amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_minutely_data_from_s3_1 SKIPPED [ 79%] -amp/im/ib/data/transform/test/test_transform.py::TestReadFromS3WriteToSql::test_insert_minutely_data_from_s3_2 SKIPPED [ 79%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol1 (0.00 s) PASSED [ 79%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol2 (0.00 s) PASSED [ 79%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol3 (0.00 s) PASSED [ 79%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_convert_df_to_row_to_symbol4 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name1 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name2 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name3 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name4 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_extract_exchange_code_from_full_name5 (0.00 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_parse_symbols_file1 (0.00 s) PASSED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_exchange_exist1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_symbol_exist1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_ensure_trade_symbol_exist1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_get_remaining_data_to_load SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_daily_data1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_daily_data_with_holes SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_minute_data1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_bulk_minute_data_with_holes SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_daily_data1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_minute_data1 SKIPPED [ 80%] -amp/im/ib/test/test_ib_sql_writer_backend.py::TestIbSqlWriterBackend1::test_insert_tick_data1 SKIPPED [ 80%] -amp/im/kibot/data/extract/test/test_kibot_data_download.py::TestKibotDownload::test_extract_dataset_links (0.03 s) PASSED [ 81%] -amp/im/kibot/data/extract/test/test_kibot_data_download.py::TestKibotDownload::test_extract_payload_links (1.53 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path1 (0.00 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path2 (0.00 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_generate_file_path3 (0.00 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_file_path_generator.py::TestIbFilePathGenerator::test_get_latest_symbols_file1 (0.03 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_dtypes1 (0.04 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data1 (0.10 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data2 (0.04 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data3 (0.10 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data_check_date_type (0.05 s) PASSED [ 81%] -amp/im/ib/data/load/test/test_s3_data_loader.py::TestS3IbDataLoader1::test_read_data_with_start_end_ts (1.51 s) PASSED [ 81%] -amp/im/eoddata/test/test_read_symbol_list.py::Test_read_symbols_from_file::test1 (0.00 s) PASSED [ 81%] -amp/im/ib/connect/test/test_im_tasks.py::TestImTwsStartIbInterface::test1 SKIPPED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_filter_table1 (0.00 s) PASSED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_filter_table2 (0.00 s) PASSED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_filter_table3 (0.00 s) PASSED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_from_text1 (0.00 s) PASSED [ 81%] -amp/helpers/test/test_table.py::TestTable1::test_from_text_invalid1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_from_text_invalid2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_repr1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_str1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_unique1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_table.py::TestTable1::test_unique2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_timer.py::TestTimedScope::test_1 (1.00 s) PASSED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test__check_version1 SKIPPED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test__check_version2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test_check_version1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test_get_changelog_version1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_versioning.py::TestVersioning1::test_get_container_version1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_find_duplicates1::test1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_find_duplicates1::test2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test2 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_remove_duplicates1::test3 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_extract1::test1 (0.00 s) PASSED [ 82%] -amp/helpers/test/test_list.py::Test_list_extract1::test2 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test3 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test4 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test5 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test6 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_extract1::test7 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test2 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test3 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test4 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list_chunk1::test5 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_find_duplicates1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_find_duplicates2 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates2 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_list.py::Test_list1::test_remove_duplicates3 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_open.py::Test_open_unknown::test_unknown_extension1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_open.py::Test_open_unknown::test_unknown_os1 (0.00 s) PASSED [ 83%] -amp/helpers/test/test_open.py::Test_open_html::test_linux1 SKIPPED (...) [ 84%] -amp/helpers/test/test_open.py::Test_open_html::test_mac1 SKIPPED (Se...) [ 84%] -amp/helpers/test/test_open.py::Test_open_html::test_windows1 SKIPPED [ 84%] -amp/helpers/test/test_open.py::Test_open_pdf::test_mac1 (0.06 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_get_credentials1::test1 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_functions1::test_extract_bucket_from_path1 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_exists1 (0.01 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_exists2 (0.05 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_exists3 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_glob1 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_s3.py::Test_s3_1::test_ls1 (0.01 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_dry_run1 (0.00 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_asyncio_threading1 (0.06 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_asyncio_threading2 (0.03 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_loky1 (0.06 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_parallel_loky2 (1.97 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute1::test_serial1 (0.06 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_asyncio_threading1 (0.02 s) PASSED [ 84%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_asyncio_threading2 (0.02 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_loky1 (1.34 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_parallel_loky2 (1.19 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_serial1 (0.02 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute2::test_serial2 (0.02 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading1 (0.08 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading2 (0.04 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading3 (0.08 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_asyncio_threading4 (0.04 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky1 (0.08 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky2 (1.38 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky3 PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_serial1 (0.07 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_serial2 (0.08 s) PASSED [ 85%] -amp/helpers/test/test_joblib_helpers.py::Test_joblib_example1::test1 SKIPPED [ 85%] -amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_datetime1 (0.00 s) PASSED [ 85%] -amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_datetime_fail1 (0.00 s) PASSED [ 85%] -amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_strict_datetime1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_is_datetime1::test_is_strict_datetime_fail1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_dassert_is_datetime1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_dassert_is_datetime_assert1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_datetime_conversions (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime2 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz1::test_to_datetime3 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp2 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp_assert1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_dassert_tz_compatible1::test_dassert_compatible_timestamp_assert2 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_ET (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_UTC (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_naive_ET (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_get_current_time1::test_get_current_time_naive_UTC (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_annual1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_bimonthly1 (0.00 s) PASSED [ 86%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_daily1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_index1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly2 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly3 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly4 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_monthly5 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly2 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_quarterly3 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_semiannual1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_semiannual2 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_srs1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_to_generalized_datetime::test_weekly1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test2 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_unix_epoch_to_timestamp::test3 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test1 (0.00 s) PASSED [ 87%] -amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test2 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_datetime_.py::Test_convert_timestamp_to_unix_epoch::test3 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test2 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test3 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test4 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test5 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test6 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test7 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test8 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dict.py::Test_get_nested_dict_iterator::test9 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_env.py::Test_env1::test_get_system_signature1 (0.17 s) PASSED [ 88%] -amp/helpers/test/test_hnumpy.py::TestRandomSeedContext::test_example1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_hnumpy.py::TestRandomSeedContext::test_example2 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_csv_helpers.py::Test_convert_csv_to_dict::test1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_csv_helpers.py::Test_from_typed_csv::test1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_csv_helpers.py::Test_to_typed_csv::test1 (0.00 s) PASSED [ 88%] -amp/helpers/test/test_dataframe.py::Test_filter_data_by_values1::test_conjunction1 (0.01 s) PASSED [ 88%] -amp/helpers/test/test_dataframe.py::Test_filter_data_by_values1::test_disjunction1 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_filter_data_by_comparison::test_conjunction1 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_filter_data_by_comparison::test_disjunction1 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::TestFilterDataByMethod::test1 (0.02 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test1 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test2 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test3 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test4 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test5 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_apply_nan_mode::test6 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test1 (0.01 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test2 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test3 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test4 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test5 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test6 (0.00 s) PASSED [ 89%] -amp/helpers/test/test_dataframe.py::Test_compute_points_per_year_for_given_freq::test7 (0.00 s) PASSED [ 89%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md1::test_uml_file_names1 (0.00 s) PASSED [ 89%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md2::test_render_command1 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md2::test_render_command2 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml1 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml2 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml3 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml4 (0.00 s) PASSED [ 90%] -amp/documentation/scripts/test/test_render_md.py::Test_render_md3::test_render_plantuml_playback1 (0.01 s) PASSED [ 90%] -amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test1 (0.00 s) PASSED [ 90%] -amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test2 (0.00 s) PASSED [ 90%] -amp/dev_scripts/test/test_toml_merge.py::TestMergeToml::test3 (0.00 s) PASSED [ 90%] -amp/dataflow/pipelines/features/test/test_feature_pipeline.py::TestFeaturePipeline::test1 (0.23 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_fit_with_oos (0.02 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_fit_without_oos (0.03 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_predict_with_oos (0.04 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_apply_sklearn_model_predict_without_oos (0.00 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1 (0.01 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1 (0.01 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2 (0.01 s) PASSED [ 90%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json3 (0.00 s) PASSED [ 91%] -amp/dataflow/model/test/test_dataframe_modeler.py::TestDataFrameModeler::test_merge (0.01 s) PASSED [ 91%] -amp/dataflow/model/test/test_model_evaluator.py::TestModelEvaluator1::test_aggregate_models1 (0.26 s) PASSED [ 91%] -amp/dataflow/model/test/test_model_evaluator.py::TestModelEvaluator1::test_calculate_stats1 (1.41 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer1::test_column_arithmetic (0.03 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer2::test_resampling (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer3::test_multicolumn_processing1 (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer3::test_multicolumn_processing2 (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer4::test_drop_nans (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestGroupedColDfToDfTransformer4::test_drop_nans_without_reindexing (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer1::test1 (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans (0.01 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_then_join (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_without_reindexing (0.01 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToDfTransformer2::test_drop_nans_without_reindexing_then_attempt_join (0.01 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer1::test1 (0.02 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer2::test1 (0.04 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer2::test2 (0.51 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans (0.01 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_then_join (0.02 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_without_reindexing (0.01 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestSeriesToSeriesTransformer3::test_drop_nans_without_reindexing_then_attempt_join (0.01 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestFunctionWrapper::test1 (0.01 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestTwapVwapComputer::test1 (0.02 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestTwapVwapComputer::test2 (0.03 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestMultiindexTwapVwapComputer::test1 (0.08 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_transformers.py::TestMultiindexTwapVwapComputer::test2 (0.13 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test1 (0.02 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test2 (0.03 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestUnsupervisedSkLearnModel::test3 (0.05 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test1 (0.04 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test2 (0.06 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestMultiindexUnsupervisedSkLearnModel::test3 (0.08 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test1 (0.04 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test2 (0.04 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_unsupervised_sklearn_models.py::TestResidualizer::test3 (0.06 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_local_level_model.py::TestLocalLevelModel::test1 (0.01 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test0 SKIPPED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test1 (0.06 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test2 (0.05 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test3 (0.09 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test4 (0.05 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test5 (0.09 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test6 (0.05 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test7 (0.09 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_regression_models.py::TestLinearRegression::test_generate_input_data SKIPPED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1 (1.04 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2 (1.07 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_step_one1 SKIPPED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_with_constant1 (1.25 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict2 (1.07 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_different_intervals1 (1.46 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_different_intervals_no_x1 (1.28 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_with_nan (1.08 s) PASSED [ 93%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test1 (0.02 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test_invert_zret_0_zscoring1 (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestMultihorizonReturnsPredictionProcessor::test_invert_zret_3_zscoring1 (0.02 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_pass_through (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_pass_through_no_writing (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteDf::test_write (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_pass_through (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_pass_through_no_writing (0.01 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sinks.py::TestWriteCols::test_write (0.01 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_linearize_eigval_eigvec (0.01 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_sort_eigval1 (0.00 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_sort_eigval2 (0.00 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_stabilize_eigenvec1 (0.01 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer1::test_stabilize_eigenvec2 (0.02 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer2::test1 (0.23 s) PASSED [ 94%] -amp/core/test/test_residualizer.py::TestPcaFactorComputer2::test2 (0.35 s) PASSED [ 94%] -amp/core/test/test_timeseries_study.py::TestTimeSeriesDailyStudy::test_usual_case (0.29 s) PASSED [ 94%] -amp/core/test/test_timeseries_study.py::TestTimeSeriesMinutelyStudy::test_usual_case (0.58 s) PASSED [ 94%] -amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test1 (0.13 s) PASSED [ 95%] -amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test2 (0.13 s) PASSED [ 95%] -amp/core/test/test_timeseries_study.py::TestMapDictToDataframeTest1::test3 (0.13 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test_shape1 (0.00 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestCreateIterSingleIndex::test_truncate1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_local_ts (0.28 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_none_x_vars (0.00 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToGluon::test_transform_series_target (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_correctness SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_correctness_local_ts SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform_artificial_ts SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluon::test_transform_none_x_vars SKIPPED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromGluonForecasts::test_transform1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToSklean::test_transform1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformToSklean::test_transform_none_x_vars1 (0.01 s) PASSED [ 95%] -amp/core/test/test_data_adapters.py::TestTransformFromSklean::test_transform1 (0.01 s) PASSED [ 96%] -amp/core/test/test_explore.py::Test_explore1::test_ols_regress_series (0.20 s) PASSED [ 96%] -amp/core/test/test_explore.py::Test_explore1::test_rolling_pca_over_time1 SKIPPED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column1 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column2 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column3 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_column4 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index1 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index2 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index3 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_filter_by_index4 (0.00 s) PASSED [ 96%] -amp/core/test/test_explore.py::TestFilterByTime::test_no_intersection (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pairs::test1 (0.01 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_difference1 (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_difference2 (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_compressed_mean (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_difference1 (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_difference2 (0.00 s) PASSED [ 96%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_difference_of_logs (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_mean (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_mean_of_logs (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_normalized_difference1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_cross_feature_pair::test_normalized_difference2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_identity_1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_identity_2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_statistical_leverage_scores::test_upper_triangular_3 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_identity_3 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_2 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_normalized_principal_loadings::test_upper_triangular_3 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compare_subspaces::test1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_effective_rank::test1 (0.00 s) PASSED [ 97%] -amp/core/test/test_features.py::Test_compute_effective_rank::test2 (0.00 s) PASSED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test1 (0.02 s) PASSED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test2 SKIPPED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test3 SKIPPED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_grassmann::test4 SKIPPED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test1 (0.01 s) PASSED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test2 SKIPPED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test3 (0.01 s) PASSED [ 98%] -amp/core/test/test_features.py::Test_select_cols_by_greedy_volume::test4 (0.01 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestResampleIndex1::test1 (0.01 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test1 (0.01 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test2 (0.09 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test3 (0.08 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test4 (0.13 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestDfRollingApply::test5 (0.02 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestReadDataFromS3::test_read_csv1 (0.10 s) PASSED [ 98%] -amp/core/test/test_pandas_helpers.py::TestReadDataFromS3::test_read_parquet1 (1.08 s) PASSED [ 98%] -amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test1 (0.00 s) PASSED [ 98%] -amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test2 (0.00 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::TestArmaProcess::test3 (0.00 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::TestMultivariateNormalProcess::test1 (0.01 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::TestMultivariateNormalProcess::test2 (0.00 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::Test_generate_arima_signal_and_response::test1 (0.00 s) PASSED [ 99%] -amp/core/test/test_artificial_signal_generators.py::TestGenerateRecipeDataset::test1 (0.01 s) PASSED [ 99%] -amp/core/test/test_backtest.py::TestGeneratePredictions::test1 SKIPPED [ 99%] -amp/core/test/test_backtest.py::TestGeneratePredictions::test2 SKIPPED [ 99%] -amp/core/test/test_backtest.py::TestGeneratePredictions::test3 SKIPPED [ 99%] -amp/core/information_bars/test/test_bars.py::TestBars::test_get_dollar_bars (0.07 s) PASSED [ 99%] -amp/core/information_bars/test/test_bars.py::TestBars::test_get_tick_bars (0.02 s) PASSED [ 99%] -amp/core/information_bars/test/test_bars.py::TestBars::test_get_volume_bars (0.07 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_daily1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_daily_shift_freq1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_minutely1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestBuildLocalTimeseries::test_multiple_responses_daily1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_daily1 (0.03 s) PASSED [ 99%] -amp/core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] - -=================================== FAILURES =================================== -__________________ Test_get_configs_from_command_line1.test1 ___________________ -Traceback (most recent call last): - File "/app/dataflow_lime/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 - configs = dtfmoexuti.get_configs_from_command_line(args) - File "/app/amp/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - configs = cconfig.get_configs_from_builder(config_builder) - File "/app/amp/config_root/config/builder.py", line 46, in get_configs_from_builder - imp = importlib.import_module(import_) - File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - return _bootstrap._gcd_import(name[level:], package, level) - File "", line 1014, in _gcd_import - File "", line 991, in _find_and_load - File "", line 973, in _find_and_load_unlocked -ModuleNotFoundError: No module named 'dataflow_lime.pipelines.E8.8Ed_configs' -============================= slowest 3 durations ============================== -6.49s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_execution -6.41s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder::test_fit -6.31s call research/RH4E/test/test_RH4Ea_pipeline.py::Test_RH4Ea_DagBuilder::test_fit -=========================== short test summary info ============================ -SKIPPED [5] amp/datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py: Need dind support -SKIPPED [1] amp/helpers/test/test_hparquet.py:741: CmTask1305: after removing circular dependencies in `hio.from_file`, this test fails reading a parquet file -SKIPPED [5] amp/optimizer/test/test_single_period_optimization.py: Requires special docker container. -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:200: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:192: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:184: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:263: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:271: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:287: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:298: Only run in amp as supermodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:307: Only run in amp as supermodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:316: Only run in amp as supermodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:332: Only run in amp -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:390: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:399: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:408: AmpTask1347: Add support for mocking `system*()` functions to unit test -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:481: CmampTask #683. -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:536: Only run in amp as submodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:571: Only run in amp as submodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:600: Only run in amp as submodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:635: Only run in amp as supermodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:698: Only run in amp as submodule -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:792: Only run in amp -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:1003: Only run in amp -SKIPPED [1] amp/helpers/test/test_lib_tasks.py:1343: This test makes sense for a branch -SKIPPED [9] amp/datapull/ccxt/data/extract/test/test_exchange_class.py: Enable after CMTask1292 is resolved. -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:789: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:769: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:809: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:530: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:573: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:620: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:666: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:711: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/client/test/test_ccxt_clients.py:757: Need dind support -SKIPPED [1] amp/helpers/test/test_unit_test.py:335: This is only used to debug the debugging the infrastructure -SKIPPED [1] amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py:380: Need dind support -SKIPPED [1] amp/dataflow/pipelines/real_time/test/test_dataflow_pipelines_real_time_pipeline.py:534: Need dind support -SKIPPED [1] amp/datapull/common/data/transform/test/test_convert_csv_to_pq.py:60: CmTask1305: after removing circular dependencies in `hio.from_file`, this test fails reading a parquet file -SKIPPED [1] dataflow_lime/system/test/test_E8d_replayed_system_runner.py:130: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py: Need dind support -SKIPPED [1] amp/oms/test/test_portfolio.py:291: Need dind support -SKIPPED [1] amp/oms/test/test_portfolio.py:320: Need dind support -SKIPPED [1] amp/oms/test/test_portfolio.py:412: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:119: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:238: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:243: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:248: Need dind support -SKIPPED [1] amp/oms/test/test_process_forecasts.py:253: Need dind support -SKIPPED [1] amp/oms/test/test_restrictions.py:18: Need dind support -SKIPPED [1] amp/oms/test/test_restrictions.py:45: Need dind support -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:57: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:75: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:93: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:124: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:150: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:198: This is for manual testing -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:274: Next PR will rewrite this -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:325: LimeTask296: Break 2022-01-06 -SKIPPED [1] core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:311: Run manually -SKIPPED [9] amp/datapull/common/data/client/test/test_historical_pq_clients.py: Some tests are returning an empty df -SKIPPED [4] amp/datapull/common/data/transform/test/test_transform_pq_by_date_to_by_asset.py: TODO(gp): Need to update this tests after transform v1.3 -SKIPPED [1] amp/dataflow/system/test/test_real_time_runner.py:39: Too slow for real time -SKIPPED [1] amp/dataflow/core/nodes/test/test_volatility_models.py:423: unconditional skip -SKIPPED [1] amp/core/statistics/test/test_regression.py:46: This test fails on some computers due to AmpTask1649 -SKIPPED [1] amp/core/statistics/test/test_regression.py:17: This test generates the input data -SKIPPED [1] amp/core/statistics/test/test_regression.py:137: This test fails on some computers due to AmpTask1649 -SKIPPED [1] amp/core/statistics/test/test_regression.py:108: This test generates the input data -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:270: cmamp #654. -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:283: cmamp #654. -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:296: cmamp #654. -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:303: cmamp #654. -SKIPPED [1] amp/core/statistics/test/test_requires_statsmodels.py:315: cmamp #654. -SKIPPED [1] amp/config_root/config/test/test_config.py:325: See AmpTask1573 -SKIPPED [1] amp/market_data/test/test_market_data_im_client.py:134: CmTask882. -SKIPPED [1] vendors_lime/taq_bars/test/test_taq_bars_utils.py:304: This is used to generate the frozen input -SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:52: LimeTask222 Use volume for volume everywhere -SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:39: Run manually -SKIPPED [1] oms_lime/test/test_eg_portfolio.py:14: Finish this -SKIPPED [1] market_data_lime/test/test_eg_replayed_market_data.py:26: Run manually -SKIPPED [1] market_data_lime/test/test_eg_replayed_market_data.py:110: Run manually -SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:36: Skip on Mondays -SKIPPED [1] amp/test/test_tasks.py:68: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:60: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:44: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:64: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:56: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:48: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:52: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:36: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:40: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:122: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:95: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:102: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:85: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:89: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:142: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:112: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:129: Test needs to be run outside Docker -SKIPPED [1] amp/test/test_tasks.py:134: Test needs to be run outside Docker -SKIPPED [1] amp/oms/test/test_order_processor.py:70: Need dind support -SKIPPED [1] amp/oms/test/test_order_processor.py:78: Need dind support -SKIPPED [1] amp/oms/test/test_order_processor.py:86: Need dind support -SKIPPED [1] amp/oms/test/test_order_processor.py:96: Need dind support -SKIPPED [7] amp/datapull/test/test_im_lib_tasks.py: CMTask #789. -SKIPPED [1] amp/datapull/test/test_im_lib_tasks.py:240: amp #1189 -SKIPPED [10] amp/im/kibot/data/load/test/test_sql_data_loader.py: CmTask666 -SKIPPED [1] amp/helpers/test/test_cache.py:731: See CMTask #952. -SKIPPED [1] amp/helpers/test/test_git.py:217: Run only in amp as super-module -SKIPPED [1] amp/helpers/test/test_git.py:229: Run only in amp as sub-module -SKIPPED [1] amp/dev_scripts/infra/test/test_all.py: unconditional skip -SKIPPED [1] amp/dev_scripts/git/git_hooks/test/test_install_hooks.py:21: There are no Git credentials inside Docker -SKIPPED [1] amp/oms/test/test_pnl_simulator.py:432: For performance measurement -SKIPPED [1] amp/oms/test/test_api.py:162: unconditional skip -SKIPPED [1] amp/oms/test/test_api.py:191: unconditional skip -SKIPPED [1] amp/oms/test/test_broker.py:55: Need dind support -SKIPPED [11] amp/im/kibot/test/test_kibot_sql_writer_backend.py: CmTask666 -SKIPPED [1] amp/im/kibot/metadata/test/test_load.py:47: Disabled waiting for PTask4139 -SKIPPED [1] amp/im/kibot/metadata/test/test_load.py:66: Disabled waiting for PTask4139 -SKIPPED [1] amp/im/kibot/data/load/test/test_s3_data_loader.py:23: Not implemented yet -SKIPPED [4] amp/im/ib/data/transform/test/test_transform.py: CmTask666 -SKIPPED [11] amp/im/ib/test/test_ib_sql_writer_backend.py: CmTask666 -SKIPPED [1] amp/im/ib/connect/test/test_im_tasks.py: unconditional skip -SKIPPED [1] amp/helpers/test/test_versioning.py:23: CmampTask570 -SKIPPED [3] amp/helpers/test/test_open.py: See cryptomtc/cmamp#321 -SKIPPED [1] amp/helpers/test/test_joblib_helpers.py: Just for experimenting with joblib -SKIPPED [1] amp/dataflow/core/nodes/test/test_regression_models.py:35: This test fails on some computers due to AmpTask1649 -SKIPPED [1] amp/dataflow/core/nodes/test/test_regression_models.py:18: This test generates the input data -SKIPPED [1] amp/dataflow/core/nodes/test/test_sarimax_models.py:39: cmamp #654. -SKIPPED [1] amp/core/test/test_data_adapters.py:146: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_data_adapters.py:161: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_data_adapters.py:118: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_data_adapters.py:177: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_data_adapters.py:132: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_explore.py:25: https://github.com/.../.../issues/3676 -SKIPPED [1] amp/core/test/test_features.py:510: Apparent instability -SKIPPED [1] amp/core/test/test_features.py:517: Apparent instability -SKIPPED [1] amp/core/test/test_features.py:524: Apparent instability -SKIPPED [1] amp/core/test/test_features.py:556: Apparent instability -SKIPPED [1] amp/core/test/test_backtest.py:27: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_backtest.py:69: Disabled because of PTask2440 -SKIPPED [1] amp/core/test/test_backtest.py:111: Disabled because of PTask2440 -XFAIL amp/core/statistics/test/test_requires_statsmodels.py::TestMultipleTests::test2 -XFAIL amp/core/statistics/test/test_requires_statsmodels.py::TestMultiTTest::test7 -FAILED dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 -= 1 failed, 1581 passed, 209 skipped, 81 deselected, 2 xfailed, 4 rerun in 200.01s (0:03:20) = -15:16:12 @ 2022-03-07 10:15:22 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=120.0 KB -15:16:12 @ 2022-03-07 10:15:22 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' -15:16:12 @ 2022-03-07 10:15:22 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... -15:16:12 @ 2022-03-07 10:15:22 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan -ERROR: 1 -15:16:15 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3720 Fast tests failed -## run_slow_tests:  -15:16:15 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"' -IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"'  -WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. -WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. -WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. -WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. -Creating compose_app_run ... - - -Creating compose_app_run ... done -##> devops/docker_run/entrypoint.sh -UID=0 -GID=0 -# Activate environment -##> devops/docker_build/entrypoint/patch_environment_variables.sh -# Set PATH -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -# Set PYTHONPATH -PYTHONPATH=/app/amp:/app: -# Configure env -Testing sudo -/app -Setting up Docker -{ "storage-driver": "vfs" } - * Starting Docker: docker  -[ OK ] - * Docker is running -# Check AWS authentication setup -AWS_DEFAULT_REGION='us-east-1' - Name Value Type Location - ---- ----- ---- -------- - profile am manual --profile -access_key ****************3J32 shared-credentials-file -secret_key ****************QpHW shared-credentials-file - region us-east-1 env AWS_DEFAULT_REGION -CONTAINER_VERSION='' -BUILD_TAG='' -which python: /venv/bin/python -python -V: Python 3.8.10 -docker -v: Docker version 20.10.12, build e91ed57 -docker-compose -v: docker-compose version 1.25.0, build unknown -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -PYTHONPATH=/app/amp:/app: -entrypoint.sh: 'pytest -m "slow and not superslow" . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun "Failed: Timeout"' -============================= test session starts ============================== -platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 -cachedir: .pytest_cache -rootdir: /app, configfile: pytest.ini -plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 -timeout: 30.0s -timeout method: signal -timeout func_only: True -collecting ...  -collecting 0 items  -collecting 0 items  -collecting 67 items  -collecting 70 items  -collecting 230 items  -collecting 548 items  -collecting 562 items  -collecting 794 items  -collecting 1037 items  -collecting 1375 items  -collecting 1424 items  -collecting 1775 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True ------------------------------------------------------------------------------ -This code is not in sync with the container: -code_version='1.0.3' != container_version='amp-1.0.3' ------------------------------------------------------------------------------ -You need to: -- merge origin/master into your branch with `invoke git_merge_master` -- pull the latest container with `invoke docker_pull` -# Git - branch_name='AmpTask2163_Implement_tiled_backtesting_5' - hash='29bdaf1' - # Last commits: - * 29bdaf1 saggese Lint ( 6 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) - * c26c937 saggese Checkpoint ( 7 minutes ago) Mon Mar 7 20:09:34 2022 - * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) -# Machine info - system=Linux - node name=d232c57e32e2 - release=3.10.0-1160.36.2.el7.x86_64 - version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 - machine=x86_64 - processor=x86_64 - cpu count=8 - cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) - memory=svmem(total=66548252672, available=51706417152, percent=22.3, used=11809091584, free=14425956352, active=30357913600, inactive=18355712000, buffers=0, cached=40313204736, shared=2491396096, slab=2054676480) - disk usage=sdiskusage(total=107362627584, used=32545501184, free=74817126400, percent=30.3) -# Packages - python: 3.8.10 - gluonnlp: ? - gluonts: 0.6.7 - joblib: 1.1.0 - mxnet: 1.8.0 - numpy: 1.21.1 - pandas: 1.3.4 - pyarrow: 6.0.1 - scipy: 1.6.1 - seaborn: 0.11.2 - sklearn: 1.0.1 - statsmodels: 0.13.1 -INFO: > cmd='/venv/bin/pytest -m slow and not superslow . -o timeout_func_only=true --timeout 30 --reruns 1 --only-rerun Failed: Timeout' -INFO: Saving log to file 'tmp.pytest.log' - -collected 1874 items / 1803 deselected / 71 selected  - -amp/helpers/test/test_sql.py::TestSql1::test_copy_rows_with_copy_from1 SKIPPED [ 1%] -amp/helpers/test/test_sql.py::TestSql1::test_create_database SKIPPED [ 2%] -amp/helpers/test/test_sql.py::TestSql1::test_create_insert_query SKIPPED [ 4%] -amp/helpers/test/test_sql.py::TestSql1::test_db_connection_to_tuple SKIPPED [ 5%] -amp/helpers/test/test_sql.py::TestSql1::test_duplicate_removal1 SKIPPED [ 7%] -amp/helpers/test/test_sql.py::TestSql1::test_duplicate_removal2 SKIPPED [ 8%] -amp/helpers/test/test_sql.py::TestSql1::test_execute_insert_query1 SKIPPED [ 9%] -amp/helpers/test/test_sql.py::TestSql1::test_remove_database1 SKIPPED [ 11%] -amp/helpers/test/test_sql.py::TestSql1::test_remove_database_invalid SKIPPED [ 12%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create1 (0.84 s) PASSED [ 14%] -amp/helpers/test/test_lib_tasks.py::TestDryRunTasks2::test_git_branch_create2 (0.47 s) PASSED [ 15%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data1_database_portfolio SKIPPED [ 16%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data1_database_vs_dataframe_portfolio SKIPPED [ 18%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data2_database_portfolio SKIPPED [ 19%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data2_database_vs_dataframe_portfolio SKIPPED [ 21%] -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data3_database_portfolio SKIPPED [ 22%] -amp/datapull/ccxt/data/extract/test/test_exchange_class.py::TestCcxtExchange1::test_download_ohlcv_data1 SKIPPED [ 23%] -amp/datapull/common/data/transform/test/test_extract_data_from_db.py::TestExtractDataFromDb1::test_extract_data_from_db SKIPPED [ 25%] -dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 (1.29 s) FAILED [ 26%] -dataflow_lime/system/test/test_E8d_replayed_system_runner.py::TestReplayedE8dWithMockedOms1::test1 SKIPPED [ 28%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance1 (19.17 s) PASSED [ 29%] -dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance2 (19.22 s) PASSED [ 30%] -research/RH2E/test/test_RH2E_prod_models.py::Test_RH2Eg_ProdModels::test_end_to_end_slow1 SKIPPED [ 32%] -amp/oms/test/test_oms_db.py::TestOmsDbSubmittedOrdersTable1::test_create_table1 SKIPPED [ 33%] -amp/oms/test/test_oms_db.py::TestOmsDbAcceptedOrdersTable1::test_create_table1 SKIPPED [ 35%] -amp/oms/test/test_oms_db.py::TestOmsDbAcceptedOrdersTable1::test_insert1 SKIPPED [ 36%] -amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table1 SKIPPED [ 38%] -amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table2 SKIPPED [ 39%] -amp/oms/test/test_oms_db.py::TestOmsDbTableInteraction1::test_wait_for_table3 SKIPPED [ 40%] -amp/oms/test/test_oms_db.py::TestOmsDbCurrentPositionsTable1::test_create_table1 SKIPPED [ 42%] -amp/oms/test/test_oms_db.py::TestOmsDbRestrictionsTable1::test_create_table1 SKIPPED [ 43%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentSuccess1::test_parallel1 (8.08 s) PASSED [ 45%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentSuccess1::test_serial1 (11.31 s) PASSED [ 46%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_parallel1 (11.48 s) PASSED [ 47%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_parallel2 (11.42 s) PASSED [ 49%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_serial1 (14.94 s) PASSED [ 50%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentFail2::test_serial2 (15.19 s) PASSED [ 52%] -amp/dataflow/model/test/test_run_experiment.py::TestRunExperimentArchiveOnS3::test_serial1 (8.62 s) PASSED [ 53%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test_parallel1 (11.59 s) PASSED [ 54%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test_serial1 (11.41 s) PASSED [ 56%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_parallel1 (15.41 s) PASSED [ 57%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_parallel2 (15.34 s) PASSED [ 59%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_serial1 (11.61 s) PASSED [ 60%] -amp/dev_scripts/test/test_run_notebook.py::TestRunNotebook2::test_serial2 (12.15 s) PASSED [ 61%] -im_lime/eg/test/test_eg_historical_pq_by_asset_taq_bar_client.py::TestEgHistoricalPqByTileTaqBarClient1::test_read_data_for_multiple_symbols1 (1.92 s) PASSED [ 63%] -amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_equities1 (1.31 s) PASSED [ 64%] -amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_futures1 (16.72 s) PASSED [ 66%] -amp/dataflow/core/nodes/test/test_gluonts_models.py::TestContinuousDeepArModel::test_fit_dag1 (6.20 s) PASSED [ 67%] -amp/dataflow/core/nodes/test/test_gluonts_models.py::TestContinuousDeepArModel::test_predict_dag1 (4.90 s) PASSED [ 69%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval_perf1::test1 (13.29 s) PASSED [ 70%] -vendors_lime/taq_bars/test/test_taq_bars_utils.py::Test_get_cached_bar_data_for_date_interval_perf1::test2 (7.24 s) PASSED [ 71%] -research/real_time/test/test_dataflow_lime_real_time_pipeline.py::TestEgRealTimeReturnPipeline2::test_real_time1 SKIPPED [ 73%] -market_data_lime/test/test_eg_real_time_market_data.py::TestEgRealTimeMarketData2::test_get_last_end_time2 (0.03 s) PASSED [ 74%] -market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period2 (0.00 s) SKIPPED [ 76%] -market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period3 (0.00 s) SKIPPED [ 77%] -market_data_lime/test/test_eg_stitched_market_data_interface.py::TestEgStitchedMarketData1::test_get_data_for_last_period_compare1 (0.00 s) SKIPPED [ 78%] -amp/test/test_tasks.py::TestExecuteTasks2::test_collect_only1 SKIPPED [ 80%] -amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_create_all_tables1 SKIPPED [ 81%] -amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_create_im_database SKIPPED [ 83%] -amp/datapull/common/db/test/test_create_db.py::TestCreateDb1::test_up1 SKIPPED [ 84%] -amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_real_time1 (3.62 s) PASSED [ 85%] -amp/core/test/test_real_time.py::Test_align_on_time_grid1::test_real_time2 (3.99 s) PASSED [ 87%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_parse_symbols_file2 (11.92 s) PASSED [ 88%] -amp/helpers/test/test_joblib_helpers.py::Test_parallel_execute3::test_parallel_loky4 (1.33 s) PASSED [ 90%] -amp/dataflow/system/test/test_source_nodes.py::TestKibotEquityReader::test1 (7.63 s) PASSED [ 91%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1 (1.90 s) PASSED [ 92%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1 (1.48 s) PASSED [ 94%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict1 (1.49 s) PASSED [ 95%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_predict_no_x1 (1.57 s) PASSED [ 97%] -amp/dataflow/core/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_summary (2.32 s) PASSED [ 98%] -amp/core/test/test_backtest.py::TestGeneratePredictions::test4 SKIPPED [100%] - -=================================== FAILURES =================================== -_________________ Test_TiledBacktest_E8d.test_end_to_end_slow1 _________________ -Traceback (most recent call last): - File "/app/dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py", line 35, in test_end_to_end_slow1 - self._test(config_builder, experiment_builder, run_model_extra_opts) - File "/app/amp/dataflow/model/run_prod_model_flow.py", line 175, in _test - self.check_string(configs_signature, fuzzy_match=True, tag=tag) - File "/app/amp/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "/app/amp/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '/app/dataflow_lime/pipelines/E8/test/Test_TiledBacktest_E8d.test_end_to_end_slow1/output/configs_signature.txt.tmp' -################################################################################ - -============================= slowest 3 durations ============================== -19.22s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance2 -19.17s call dataflow_lime/pipelines/E8/test/test_E8d_pipeline.py::Test_E8d_DagBuilder2::test_invariance1 -16.72s call amp/dataflow/pipelines/returns/test/test_returns_pipeline.py::TestReturnsBuilder::test_futures1 -=========================== short test summary info ============================ -SKIPPED [1] amp/helpers/test/test_sql.py:95: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:36: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:46: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:21: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:111: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:131: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:79: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:58: Need dind support -SKIPPED [1] amp/helpers/test/test_sql.py:71: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:126: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:210: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:162: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:226: Need dind support -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:200: Need dind support -SKIPPED [1] amp/datapull/ccxt/data/extract/test/test_exchange_class.py:35: Enable after CMTask1292 is resolved. -SKIPPED [1] amp/datapull/common/data/transform/test/test_extract_data_from_db.py:38: Need dind support -SKIPPED [1] dataflow_lime/system/test/test_E8d_replayed_system_runner.py:250: Need dind support -SKIPPED [1] research/RH2E/test/test_RH2E_prod_models.py:61: Disabled since cache was invalidated -SKIPPED [1] amp/oms/test/test_oms_db.py:46: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:127: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:136: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:192: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:203: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:223: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:292: Need dind support -SKIPPED [1] amp/oms/test/test_oms_db.py:310: Need dind support -SKIPPED [1] research/real_time/test/test_dataflow_lime_real_time_pipeline.py:131: LimeTask222 Use volume for volume everywhere -SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:59: Skip on Mondays -SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:81: Skip on Mondays -SKIPPED [1] market_data_lime/test/test_eg_stitched_market_data_interface.py:105: Skip on Mondays -SKIPPED [1] amp/test/test_tasks.py:116: Test needs to be run outside Docker -SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:20: Need dind support -SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:49: Need dind support -SKIPPED [1] amp/datapull/common/db/test/test_create_db.py:12: Need dind support -SKIPPED [1] amp/core/test/test_backtest.py:153: Disabled because of PTask2440 -FAILED dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 -==== 1 failed, 35 passed, 35 skipped, 1803 deselected in 297.23s (0:04:57) ===== -15:21:15 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=35.7 MB -15:21:15 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' -15:21:15 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... -15:21:15 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan -ERROR: 1 -15:21:18 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3737 Slow tests failed -## run_superslow_tests:  -15:21:18 - INFO  lib_tasks.py _run_test_cmd:3442 cmd=IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"' -IMAGE=665840871993.dkr.ecr.us-east-1.amazonaws.com/amp:dev \ -NETWORK_MODE=bridge \ - docker-compose \ - --file /local/home/gsaggese/src/sasm-lime4/devops/compose/docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - app \ - 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"'  -WARNING: The AM_PUBLISH_NOTEBOOK_LOCAL_PATH variable is not set. Defaulting to a blank string. -WARNING: The AWS_ACCESS_KEY_ID variable is not set. Defaulting to a blank string. -WARNING: The AWS_SECRET_ACCESS_KEY variable is not set. Defaulting to a blank string. -WARNING: Found orphan containers (compose_oms_postgres_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up. -Creating compose_app_run ... - - -Creating compose_app_run ... done -##> devops/docker_run/entrypoint.sh -UID=0 -GID=0 -# Activate environment -##> devops/docker_build/entrypoint/patch_environment_variables.sh -# Set PATH -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -# Set PYTHONPATH -PYTHONPATH=/app/amp:/app: -# Configure env -Testing sudo -/app -Setting up Docker -{ "storage-driver": "vfs" } - * Starting Docker: docker  -[ OK ] - * Docker is running -# Check AWS authentication setup -AWS_DEFAULT_REGION='us-east-1' - Name Value Type Location - ---- ----- ---- -------- - profile am manual --profile -access_key ****************3J32 shared-credentials-file -secret_key ****************QpHW shared-credentials-file - region us-east-1 env AWS_DEFAULT_REGION -CONTAINER_VERSION='' -BUILD_TAG='' -which python: /venv/bin/python -python -V: Python 3.8.10 -docker -v: Docker version 20.10.12, build e91ed57 -docker-compose -v: docker-compose version 1.25.0, build unknown -PATH=/app/amp/documentation/scripts:/app/amp/dev_scripts/testing:/app/amp/dev_scripts/notebooks:/app/amp/dev_scripts/install:/app/amp/dev_scripts/infra:/app/amp/dev_scripts/git:/app/amp/dev_scripts/aws:/app/amp/dev_scripts:/app/amp:/app/dev_script_p1:/app:.:/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -PYTHONPATH=/app/amp:/app: -entrypoint.sh: 'pytest -m "not slow and superslow" . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun "Failed: Timeout"' -============================= test session starts ============================== -platform linux -- Python 3.8.10, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /venv/bin/python3 -cachedir: .pytest_cache -rootdir: /app, configfile: pytest.ini -plugins: flaky-3.7.0, cov-3.0.0, forked-1.4.0, instafail-0.4.2, rerunfailures-10.2, timeout-2.0.1, xdist-2.5.0 -timeout: 3600.0s -timeout method: signal -timeout func_only: True -collecting ...  -collecting 0 items  -collecting 0 items  -collecting 67 items  -collecting 70 items  -collecting 230 items  -collecting 548 items  -collecting 641 items  -collecting 801 items  -collecting 1084 items  -collecting 1391 items  -collecting 1671 items >>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CSFY_CI='' ->>ENV<<: AM_S3_BUCKET=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True ------------------------------------------------------------------------------ -This code is not in sync with the container: -code_version='1.0.3' != container_version='amp-1.0.3' ------------------------------------------------------------------------------ -You need to: -- merge origin/master into your branch with `invoke git_merge_master` -- pull the latest container with `invoke docker_pull` -# Git - branch_name='AmpTask2163_Implement_tiled_backtesting_5' - hash='29bdaf1' - # Last commits: - * 29bdaf1 saggese Lint (11 minutes ago) Mon Mar 7 20:10:16 2022 (HEAD -> AmpTask2163_Implement_tiled_backtesting_5, origin/AmpTask2163_Implement_tiled_backtesting_5) - * c26c937 saggese Checkpoint (12 minutes ago) Mon Mar 7 20:09:34 2022 - * 9846b44 saggese Fix break ( 27 hours ago) Sun Mar 6 17:24:31 2022 (origin/master, origin/HEAD, master) -# Machine info - system=Linux - node name=61bb36f6d969 - release=3.10.0-1160.36.2.el7.x86_64 - version=#1 SMP Wed Jul 21 11:57:15 UTC 2021 - machine=x86_64 - processor=x86_64 - cpu count=8 - cpu freq=scpufreq(current=2499.998, min=0.0, max=0.0) - memory=svmem(total=66548252672, available=51712106496, percent=22.3, used=11803402240, free=14392971264, active=30350835712, inactive=18393743360, buffers=0, cached=40351879168, shared=2491396096, slab=2055942144) - disk usage=sdiskusage(total=107362627584, used=32546025472, free=74816602112, percent=30.3) -# Packages - python: 3.8.10 - gluonnlp: ? - gluonts: 0.6.7 - joblib: 1.1.0 - mxnet: 1.8.0 - numpy: 1.21.1 - pandas: 1.3.4 - pyarrow: 6.0.1 - scipy: 1.6.1 - seaborn: 0.11.2 - sklearn: 1.0.1 - statsmodels: 0.13.1 -INFO: > cmd='/venv/bin/pytest -m not slow and superslow . -o timeout_func_only=true --timeout 3600 --reruns 1 --only-rerun Failed: Timeout' -INFO: Saving log to file 'tmp.pytest.log' - -collected 1874 items / 1864 deselected / 10 selected  - -amp/dataflow/system/test/test_example_pipeline1.py::TestExamplePipeline1::test_market_data3_database_vs_dataframe_portfolio SKIPPED [ 10%] -dataflow_lime/system/test/test_E8d_replayed_system_runner.py::Test_E8d_Replayed_SystemRunner::test1 - - -(462.17 s) PASSED [ 20%] -research/RH2E/test/test_RH2E_prod_models.py::Test_RH2Eg_ProdModels::test_end_to_end_superslow1 SKIPPED [ 30%] -im_lime/eg/test/test_eg_historical_pq_by_asset_taq_bar_client.py::TestEgHistoricalPqByTileTaqBarClient1::test_read_data1 (30.67 s) PASSED [ 40%] -amp/core/test/test_gallery_signal_processing.py::Test_gallery_signal_processing1::test_notebook1 (47.16 s) PASSED [ 50%] -dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource2::test1 (47.27 s) PASSED [ 60%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_1 (14.17 s) PASSED [ 70%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_2 (0.21 s) PASSED [ 80%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_3 (0.26 s) PASSED [ 90%] -amp/im/ib/metadata/test/test_ib_symbols.py::TestIbSymbolUniverse::test_get_4 (0.21 s) PASSED [100%] - -============================= slowest 3 durations ============================== -462.17s call dataflow_lime/system/test/test_E8d_replayed_system_runner.py::Test_E8d_Replayed_SystemRunner::test1 -47.27s call dataflow_lime/system/test/test_eg_historical_data_source.py::TestEgHistoricalDataSource2::test1 -47.16s call amp/core/test/test_gallery_signal_processing.py::Test_gallery_signal_processing1::test_notebook1 -=========================== short test summary info ============================ -SKIPPED [1] amp/dataflow/system/test/test_example_pipeline1.py:239: Need dind support -SKIPPED [1] research/RH2E/test/test_RH2E_prod_models.py:88: Disabled since cache was invalidated -========== 8 passed, 2 skipped, 1864 deselected in 610.66s (0:10:10) =========== -15:31:32 - INFO  hcache.py clear_global_cache:293 Before clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=0.0 b -15:31:32 - WARN  hcache.py clear_global_cache:294 Resetting 'global mem' cache '/mnt/tmpfs/tmp.cache.mem' -15:31:32 - WARN  hcache.py clear_global_cache:304 Destroying '/mnt/tmpfs/tmp.cache.mem' ... -15:31:32 - INFO  hcache.py clear_global_cache:311 After clear_global_cache: 'global mem' cache: path='/mnt/tmpfs/tmp.cache.mem', size=nan -15:31:34 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3758 Fast tests failed -15:31:34 - ERROR lib_tasks.py run_fast_slow_superslow_tests:3763 Slow tests failed -15:31:34 - INFO  lib_tasks.py run_fast_slow_superslow_tests:3770 Superslow tests passed -Traceback (most recent call last): - File "/local/home/gsaggese/src/venv/amp.client_venv/bin/invoke", line 8, in - sys.exit(program.run()) - File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/program.py", line 384, in run - self.execute() - File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/program.py", line 566, in execute - executor.execute(*self.tasks) - File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/executor.py", line 129, in execute - result = call.task(*args, **call.kwargs) - File "/local/home/gsaggese/src/venv/amp.client_venv/lib64/python3.6/site-packages/invoke/tasks.py", line 127, in __call__ - result = self.body(*args, **kwargs) - File "/local/home/gsaggese/src/sasm-lime4/amp/helpers/lib_tasks.py", line 3772, in run_fast_slow_superslow_tests - raise RuntimeError("Some tests failed") -RuntimeError: Some tests failed diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt deleted file mode 100644 index 4168d0576..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/output/test.txt +++ /dev/null @@ -1,41 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 2 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow_lime/pipelines/E8/test/test_E8d_configs.py::Test_get_configs_from_command_line1::test1 -pytest dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py::Test_TiledBacktest_E8d::test_end_to_end_slow1 - -# Test_get_configs_from_command_line1.test1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow_lime/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 - configs = dtfmoexuti.get_configs_from_command_line(args) - File "$GIT_ROOT/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - configs = cconfig.get_configs_from_builder(config_builder) - File "$GIT_ROOT/config_root/config/builder.py", line 46, in get_configs_from_builder - imp = importlib.import_module(import_) - File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - return _bootstrap._gcd_import(name[level:], package, level) - File "", line 1014, in _gcd_import - File "", line 991, in _find_and_load - File "", line 973, in _find_and_load_unlocked -ModuleNotFoundError: No module named 'dataflow_lime.pipelines.E8.8Ed_configs' - - -_______________ - -# Test_TiledBacktest_E8d.test_end_to_end_slow1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow_lime/pipelines/E8/test/test_E8d_prod_models.py", line 35, in test_end_to_end_slow1 - self._test(config_builder, experiment_builder, run_model_extra_opts) - File "$GIT_ROOT/dataflow/model/run_prod_model_flow.py", line 175, in _test - self.check_string(configs_signature, fuzzy_match=True, tag=tag) - File "$GIT_ROOT/helpers/hunit_test.py", line 1360, in check_string - hdbg.dfatal(msg) - File "$GIT_ROOT/helpers/hdbg.py", line 63, in dfatal - raise assertion_type(ret) -AssertionError: -################################################################################ -The golden outcome doesn't exist: saved the actual output in '$GIT_ROOT/dataflow_lime/pipelines/E8/test/Test_TiledBacktest_E8d.test_end_to_end_slow1/output/configs_signature.txt.tmp' -################################################################################ - -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test6/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt deleted file mode 100644 index 955be2326..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt +++ /dev/null @@ -1,396 +0,0 @@ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4532280Z core/event_study/test/test_core.py::TestUnwrapLocalTimeseries::test_minutely1 (0.03 s) PASSED [100%] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4532780Z -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4533026Z =================================== FAILURES =================================== -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4533724Z _______________________ TestDryRunTasks1.test_git_clean ________________________ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4534485Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535021Z File "/app/helpers/test/test_lib_tasks.py", line 189, in test_git_clean -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535516Z self.dry_run(target) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4535998Z File "/app/helpers/test/test_lib_tasks.py", line 170, in dry_run -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4536460Z self.check_string(act) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4536939Z File "/app/helpers/hunit_test.py", line 1266, in check_string -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4537409Z is_equal = assert_equal( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4537889Z File "/app/helpers/hunit_test.py", line 881, in assert_equal -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4538319Z diff_files( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4538746Z File "/app/helpers/hunit_test.py", line 586, in diff_files -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4539220Z raise RuntimeError(msg_as_str) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4539617Z RuntimeError: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4540266Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4540856Z ACTUAL vs EXPECTED: TestDryRunTasks1.test_git_clean -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541568Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541928Z -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4542194Z report_memory_usage=False report_cpu_usage=False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4542753Z ## git_clean: dry_run=False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4543426Z > git clean -fd >/dev/null 2>&1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4544154Z > git submodule foreach 'git clean -fd >/dev/null 2>&1' -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4544817Z > git clean -fd -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4545492Z > git submodule foreach 'git clean -fd' -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4546194Z find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.i ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4546664Z Diff with: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4547307Z > vimdiff helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.actual.txt helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.expected.txt -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4547931Z or running: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4548273Z > ./tmp_diff.sh -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4548887Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4549484Z EXPECTED VARIABLE: TestDryRunTasks1.test_git_clean -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4550166Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4550742Z exp = r"""report_memory_usage=False report_cpu_usage=False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4551201Z ## git_clean: dry_run=False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4552300Z find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4553032Z """ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4637180Z ____ Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 ____ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4638716Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4641551Z File "/app/dataflow/system/example1/test/test_example1_forecast_system.py", line 50, in test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4642218Z self._test_fit_over_backtest_period1(system, output_col_name) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4642836Z File "/app/dataflow/system/dtfamsys.py", line 114, in _test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4643452Z self.check_string(actual, fuzzy_match=True, purify_text=True) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644017Z File "/app/helpers/hunit_test.py", line 1266, in check_string -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644483Z is_equal = assert_equal( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4644949Z File "/app/helpers/hunit_test.py", line 881, in assert_equal -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4645544Z diff_files( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4645997Z File "/app/helpers/hunit_test.py", line 586, in diff_files -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4646473Z raise RuntimeError(msg_as_str) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4646868Z RuntimeError: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4647564Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4648290Z FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649091Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649436Z -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649668Z system_config ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4650153Z dag_config: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4650634Z filter_ath: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4651136Z col_mode: replace_all ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4651652Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4652148Z start_time: 09:30:00 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4652605Z end_time: 16:00:00 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4653072Z resample: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4653690Z in_col_groups: [('close',), ('volume',), ('feature1',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4654208Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4654696Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4655175Z rule: 5T ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4655784Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4656444Z vwap_groups: [('close', 'volume', 'vwap')] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4656971Z reindex_like_input: False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4657465Z join_output_with_input: False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4657953Z compute_ret_0: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4658550Z in_col_groups: [('close',), ('vwap',), ('twap',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4659057Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4659540Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660015Z mode: log_rets ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660489Z col_mapping: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4660968Z close: close.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4661457Z vwap: vwap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4661945Z twap: twap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4662415Z compute_vol: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4663124Z in_col_group: ('vwap.ret_0',) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4663731Z out_col_group: ('vwap.ret_0.vol',) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4664238Z drop_nans: True ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4664863Z permitted_exceptions: (,) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4665372Z adjust_rets: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4665979Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4666481Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4666971Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4667587Z term1_col: vwap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4668076Z term2_col: vwap.ret_0.vol ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4668576Z out_col: vwap.ret_0.vol_adj ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4669067Z term2_delay: 2 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4669552Z operation: div ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4670017Z drop_nans: True ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4670500Z compress_rets: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4671111Z in_col_groups: [('vwap.ret_0.vol_adj',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4671611Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4672094Z col_mapping: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4672591Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4673126Z dag_builder_object: nid_prefix= ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4673669Z dag_builder_class: Example1_DagBuilder < -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4674415Z system_class: Example1_ForecastSystem < -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4674950Z dag_config_config: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4675420Z resample: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4675903Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4676387Z rule: 1T ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4676933Z dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.expected.txt -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4695757Z or running: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4696094Z > ./tmp_diff.sh -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4696806Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4697507Z EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4698303Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4698867Z exp = r"""################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4699274Z system_config -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4699701Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700106Z dag_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700449Z filter_ath: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4700810Z col_mode: replace_all -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701206Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701589Z start_time: 09:30:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4701975Z end_time: 16:00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4702320Z resample: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4702851Z in_col_groups: [('close',), ('volume',), ('feature1',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4703292Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4703670Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4704018Z rule: 5T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4704700Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4705360Z vwap_groups: [('close', 'volume', 'vwap')] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4705811Z reindex_like_input: False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4706221Z join_output_with_input: False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4707219Z compute_ret_0: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4707770Z in_col_groups: [('close',), ('vwap',), ('twap',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708195Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708558Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4708947Z mode: log_rets -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4709311Z col_mapping: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4709685Z close: close.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710049Z vwap: vwap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710416Z twap: twap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4710775Z compute_vol: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4711241Z in_col_group: ('vwap.ret_0',) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4711723Z out_col_group: ('vwap.ret_0.vol',) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4712125Z drop_nans: True -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4712643Z permitted_exceptions: (,) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4713077Z adjust_rets: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4713595Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714124Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714543Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4714941Z term1_col: vwap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4715350Z term2_col: vwap.ret_0.vol -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4715752Z out_col: vwap.ret_0.vol_adj -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4716142Z term2_delay: 2 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4716723Z operation: div -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717096Z drop_nans: True -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717445Z compress_rets: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4717947Z in_col_groups: [('vwap.ret_0.vol_adj',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4718433Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4718799Z col_mapping: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4719194Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4719637Z dag_builder_object: nid_prefix= -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4720674Z dag_builder_class: Example1_DagBuilder -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721179Z system_class: Example1_ForecastSystem -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721575Z dag_config_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4721934Z resample: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4722300Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4722662Z rule: 1T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4723350Z dag_runner_object: > -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724172Z market_data_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724575Z asset_id_col_name: asset_id -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4724989Z asset_ids: [1467591036, 3303714233] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4725381Z backtest_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4725884Z universe_str: example1_v1-top2 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4726310Z trading_period_str: 1T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4726712Z time_interval_str: Jan2000 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4727275Z start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4727822Z end_timestamp: 2000-01-31 00:00:00+00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4728377Z market_object: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4728897Z dag_object: name=None -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4729261Z mode=strict -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4732047Z nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4734910Z edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4735787Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4736220Z vwap.ret_0.vol_adj.c -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4736654Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737097Z 1467591036 3303714233 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737459Z end_ts -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4737924Z 2000-01-01 10:00:00-05:00 -0.98 -0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4738406Z 2000-01-01 10:05:00-05:00 0.98 0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4738892Z 2000-01-01 10:10:00-05:00 -0.98 -0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4739261Z """ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4739878Z ________ Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 _________ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4740430Z Traceback (most recent call last): -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4741028Z File "/app/dataflow/system/example1/test/test_example1_forecast_system.py", line 57, in test_fit_over_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4741597Z self._test_fit_over_period1( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4742128Z File "/app/dataflow/system/dtfamsys.py", line 137, in _test_fit_over_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4742734Z self.check_string(actual, fuzzy_match=True, purify_text=True) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4743389Z File "/app/helpers/hunit_test.py", line 1266, in check_string -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4743841Z is_equal = assert_equal( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4744328Z File "/app/helpers/hunit_test.py", line 881, in assert_equal -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4744767Z diff_files( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4745216Z File "/app/helpers/hunit_test.py", line 586, in diff_files -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4745672Z raise RuntimeError(msg_as_str) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4746068Z RuntimeError: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4746708Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4747409Z FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748182Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748547Z -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748848Z system_config ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4749340Z dag_config: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4749833Z filter_ath: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4750343Z col_mode: replace_all ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4750846Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4751336Z start_time: 09:30:00 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4751805Z end_time: 16:00:00 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4752323Z resample: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4752956Z in_col_groups: [('close',), ('volume',), ('feature1',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4753459Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4754106Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4754581Z rule: 5T ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4755213Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4755868Z vwap_groups: [('close', 'volume', 'vwap')] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4756379Z reindex_like_input: False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4756884Z join_output_with_input: False ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4757379Z compute_ret_0: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4757975Z in_col_groups: [('close',), ('vwap',), ('twap',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4758480Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4758952Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4759435Z mode: log_rets ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4759916Z col_mapping: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4760412Z close: close.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4786435Z vwap: vwap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4787169Z twap: twap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4787693Z compute_vol: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4788439Z in_col_group: ('vwap.ret_0',) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4789095Z out_col_group: ('vwap.ret_0.vol',) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4789615Z drop_nans: True ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4790296Z permitted_exceptions: (,) ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4790847Z adjust_rets: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4791745Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4792292Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4792798Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4793324Z term1_col: vwap.ret_0 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4794102Z term2_col: vwap.ret_0.vol ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4794647Z out_col: vwap.ret_0.vol_adj ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4795177Z term2_delay: 2 ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4795669Z operation: div ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4796310Z drop_nans: True ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4796825Z compress_rets: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4797500Z in_col_groups: [('vwap.ret_0.vol_adj',)] ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4798031Z out_col_group: () ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4798529Z col_mapping: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4799069Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4799632Z dag_builder_object: nid_prefix= ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4800220Z dag_builder_class: Example1_DagBuilder < -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4800794Z system_class: Example1_ForecastSystem < -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4801362Z dag_config_config: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4801873Z resample: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4802385Z transformer_kwargs: ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4802893Z rule: 1T ( -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4803458Z dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.expected.txt -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4817745Z or running: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4818089Z > ./tmp_diff.sh -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4818751Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4819543Z EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4820358Z -------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4820978Z exp = r"""################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4821394Z system_config -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4821833Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822241Z dag_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822588Z filter_ath: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4822959Z col_mode: replace_all -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4823370Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4823780Z start_time: 09:30:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4824182Z end_time: 16:00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4824541Z resample: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825093Z in_col_groups: [('close',), ('volume',), ('feature1',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825551Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4825951Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4826312Z rule: 5T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4827017Z resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4827800Z vwap_groups: [('close', 'volume', 'vwap')] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4828262Z reindex_like_input: False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4828701Z join_output_with_input: False -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4829101Z compute_ret_0: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4829632Z in_col_groups: [('close',), ('vwap',), ('twap',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830079Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830488Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4830867Z mode: log_rets -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4831247Z col_mapping: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4831634Z close: close.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832038Z vwap: vwap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832422Z twap: twap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4832798Z compute_vol: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4833272Z in_col_group: ('vwap.ret_0',) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4833972Z out_col_group: ('vwap.ret_0.vol',) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4834393Z drop_nans: True -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4834954Z permitted_exceptions: (,) -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4835410Z adjust_rets: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4835964Z in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4836403Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4836795Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4837206Z term1_col: vwap.ret_0 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4837629Z term2_col: vwap.ret_0.vol -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838047Z out_col: vwap.ret_0.vol_adj -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838451Z term2_delay: 2 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4838829Z operation: div -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4839211Z drop_nans: True -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4839571Z compress_rets: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840090Z in_col_groups: [('vwap.ret_0.vol_adj',)] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840527Z out_col_group: () -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4840906Z col_mapping: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4841320Z vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4841792Z dag_builder_object: nid_prefix= -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4842257Z dag_builder_class: Example1_DagBuilder -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4842739Z system_class: Example1_ForecastSystem -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4843303Z dag_config_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4843654Z resample: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4844032Z transformer_kwargs: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4844412Z rule: 1T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4845150Z dag_runner_object: > -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4845882Z market_data_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4846282Z asset_id_col_name: asset_id -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4846700Z asset_ids: [1467591036, 3303714233] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847096Z backtest_config: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847559Z universe_str: example1_v1-top2 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4847978Z trading_period_str: 1T -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4848377Z time_interval_str: Jan2000 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4848941Z start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4849581Z end_timestamp: 2000-01-31 00:00:00+00:00 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4850153Z market_object: -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4850683Z dag_object: name=None -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4851051Z mode=strict -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4853864Z nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4856657Z edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4857531Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4857948Z vwap.ret_0.vol_adj.c -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4858372Z ################################################################################ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4858854Z 1467591036 3303714233 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4859226Z end_ts -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4859697Z 2000-01-01 10:00:00-05:00 -0.98 -0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4860188Z 2000-01-01 10:05:00-05:00 0.98 0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4860695Z 2000-01-01 10:10:00-05:00 -0.98 -0.98 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4861053Z """ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4862316Z ============================= slowest 3 durations ============================== -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4866438Z 26.87s setup oms/test/test_broker.py::TestDatabaseBroker1::test1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4867269Z 6.46s setup datapull/ccxt/data/extract/test/test_compare_realtime_and_historical_data.py::TestCompareRealtimeAndHistoricalData1::test_parser -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4868189Z 6.24s setup datapull/talos/data/client/test/test_talos_clients.py::TestTalosSqlRealTimeImClient1::test_build_numerical_to_string_id_mapping -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4868892Z =========================== short test summary info ============================ -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4956618Z FAILED helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean - Run... -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4957400Z FAILED dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_backtest_period1 -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4958274Z FAILED dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_period1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt deleted file mode 100644 index 94e600076..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/output/test.txt +++ /dev/null @@ -1,399 +0,0 @@ -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading file_name='$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Reading failed tests from file '$GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/input/log.txt' -HH:MM:SS - INFO lib_tasks_pytest.py pytest_repro:{LINE_NUM} Found 3 failed pytest 'tests' target(s); to reproduce run: -pytest dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_backtest_period1 -pytest dataflow/system/example1/test/test_example1_forecast_system.py::Test_Example1_ForecastSystem_FitPredict::test_fit_over_period1 -pytest helpers/test/test_lib_tasks.py::TestDryRunTasks1::test_git_clean - -# TestDryRunTasks1.test_git_clean -Traceback (most recent call last): - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 189, in test_git_clean - self.dry_run(target) - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 170, in dry_run - self.check_string(act) - File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string - is_equal = assert_equal( - File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal - diff_files( - File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -ACTUAL vs EXPECTED: TestDryRunTasks1.test_git_clean --------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4541928Z -report_memory_usage=False report_cpu_usage=False ( -## git_clean: dry_run=False ( - > git clean -fd >/dev/null 2>&1 - > git submodule foreach 'git clean -fd >/dev/null 2>&1' - > git clean -fd - > git submodule foreach 'git clean -fd' -find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.i ( -Diff with: -> vimdiff helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.actual.txt helpers/test/outcomes/TestDryRunTasks1.test_git_clean/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: TestDryRunTasks1.test_git_clean --------------------------------------------------------------------------------- -exp = r"""report_memory_usage=False report_cpu_usage=False -## git_clean: dry_run=False -find . -name '*\.pyc' -o -name '*\.pyo' -o -name '.coverage' -o -name '.ipynb_checkpoints' -o -name '.mypy_cache' -o -name '.pytest_cache' -o -name '__pycache__' -o -name 'cfile' -o -name 'tmp.*' -o -name '*.tmp' | sort | xargs rm -rf -""" -__ - -# Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/system/example1/test/test_example1_forecast_system.py", line 50, in test_fit_over_backtest_period1 - self._test_fit_over_backtest_period1(system, output_col_name) - File "$GIT_ROOT/dataflow/system/dtfamsys.py", line 114, in _test_fit_over_backtest_period1 - self.check_string(actual, fuzzy_match=True, purify_text=True) - File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string - is_equal = assert_equal( - File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal - diff_files( - File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 --------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4649436Z -system_config ( -dag_config: ( -filter_ath: ( -col_mode: replace_all ( -transformer_kwargs: ( -start_time: 09:30:00 ( -end_time: 16:00:00 ( -resample: ( -in_col_groups: [('close',), ('volume',), ('feature1',)] ( -out_col_group: () ( -transformer_kwargs: ( -rule: 5T ( -resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( -vwap_groups: [('close', 'volume', 'vwap')] ( -reindex_like_input: False ( -join_output_with_input: False ( -compute_ret_0: ( -in_col_groups: [('close',), ('vwap',), ('twap',)] ( -out_col_group: () ( -transformer_kwargs: ( -mode: log_rets ( -col_mapping: ( -close: close.ret_0 ( -vwap: vwap.ret_0 ( -twap: twap.ret_0 ( -compute_vol: ( -in_col_group: ('vwap.ret_0',) ( -out_col_group: ('vwap.ret_0.vol',) ( -drop_nans: True ( -permitted_exceptions: (,) ( -adjust_rets: ( -in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( -out_col_group: () ( -transformer_kwargs: ( -term1_col: vwap.ret_0 ( -term2_col: vwap.ret_0.vol ( -out_col: vwap.ret_0.vol_adj ( -term2_delay: 2 ( -operation: div ( -drop_nans: True ( -compress_rets: ( -in_col_groups: [('vwap.ret_0.vol_adj',)] ( -out_col_group: () ( -col_mapping: ( -vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( -dag_builder_object: nid_prefix= ( -dag_builder_class: Example1_DagBuilder < -system_class: Example1_ForecastSystem < -dag_config_config: ( -resample: ( -transformer_kwargs: ( -rule: 1T ( -dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_backtest_period1 --------------------------------------------------------------------------------- -exp = r"""################################################################################ -system_config -################################################################################ -dag_config: - filter_ath: - col_mode: replace_all - transformer_kwargs: - start_time: 09:30:00 - end_time: 16:00:00 - resample: - in_col_groups: [('close',), ('volume',), ('feature1',)] - out_col_group: () - transformer_kwargs: - rule: 5T - resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] - vwap_groups: [('close', 'volume', 'vwap')] - reindex_like_input: False - join_output_with_input: False - compute_ret_0: - in_col_groups: [('close',), ('vwap',), ('twap',)] - out_col_group: () - transformer_kwargs: - mode: log_rets - col_mapping: - close: close.ret_0 - vwap: vwap.ret_0 - twap: twap.ret_0 - compute_vol: - in_col_group: ('vwap.ret_0',) - out_col_group: ('vwap.ret_0.vol',) - drop_nans: True - permitted_exceptions: (,) - adjust_rets: - in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] - out_col_group: () - transformer_kwargs: - term1_col: vwap.ret_0 - term2_col: vwap.ret_0.vol - out_col: vwap.ret_0.vol_adj - term2_delay: 2 - operation: div - drop_nans: True - compress_rets: - in_col_groups: [('vwap.ret_0.vol_adj',)] - out_col_group: () - col_mapping: - vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c -dag_builder_object: nid_prefix= -dag_builder_class: Example1_DagBuilder -system_class: Example1_ForecastSystem -dag_config_config: - resample: - transformer_kwargs: - rule: 1T -dag_runner_object: > -market_data_config: - asset_id_col_name: asset_id - asset_ids: [1467591036, 3303714233] -backtest_config: - universe_str: example1_v1-top2 - trading_period_str: 1T - time_interval_str: Jan2000 - start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 - end_timestamp: 2000-01-31 00:00:00+00:00 -market_object: -dag_object: name=None -mode=strict -nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] -edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] -################################################################################ -vwap.ret_0.vol_adj.c -################################################################################ - 1467591036 3303714233 -end_ts -2000-01-01 10:00:00-05:00 -0.98 -0.98 -2000-01-01 10:05:00-05:00 0.98 0.98 -2000-01-01 10:10:00-05:00 -0.98 -0.98 -""" -______ - -# Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 -Traceback (most recent call last): - File "$GIT_ROOT/dataflow/system/example1/test/test_example1_forecast_system.py", line 57, in test_fit_over_period1 - self._test_fit_over_period1( - File "$GIT_ROOT/dataflow/system/dtfamsys.py", line 137, in _test_fit_over_period1 - self.check_string(actual, fuzzy_match=True, purify_text=True) - File "$GIT_ROOT/helpers/hunit_test.py", line 1266, in check_string - is_equal = assert_equal( - File "$GIT_ROOT/helpers/hunit_test.py", line 881, in assert_equal - diff_files( - File "$GIT_ROOT/helpers/hunit_test.py", line 586, in diff_files - raise RuntimeError(msg_as_str) -RuntimeError: --------------------------------------------------------------------------------- -FUZZY ACTUAL vs EXPECTED: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 --------------------------------------------------------------------------------- -Run_fast_tests Run fast tests 2022-07-10T12:40:40.4748547Z -system_config ( -dag_config: ( -filter_ath: ( -col_mode: replace_all ( -transformer_kwargs: ( -start_time: 09:30:00 ( -end_time: 16:00:00 ( -resample: ( -in_col_groups: [('close',), ('volume',), ('feature1',)] ( -out_col_group: () ( -transformer_kwargs: ( -rule: 5T ( -resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', ( -vwap_groups: [('close', 'volume', 'vwap')] ( -reindex_like_input: False ( -join_output_with_input: False ( -compute_ret_0: ( -in_col_groups: [('close',), ('vwap',), ('twap',)] ( -out_col_group: () ( -transformer_kwargs: ( -mode: log_rets ( -col_mapping: ( -close: close.ret_0 ( -vwap: vwap.ret_0 ( -twap: twap.ret_0 ( -compute_vol: ( -in_col_group: ('vwap.ret_0',) ( -out_col_group: ('vwap.ret_0.vol',) ( -drop_nans: True ( -permitted_exceptions: (,) ( -adjust_rets: ( -in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] ( -out_col_group: () ( -transformer_kwargs: ( -term1_col: vwap.ret_0 ( -term2_col: vwap.ret_0.vol ( -out_col: vwap.ret_0.vol_adj ( -term2_delay: 2 ( -operation: div ( -drop_nans: True ( -compress_rets: ( -in_col_groups: [('vwap.ret_0.vol_adj',)] ( -out_col_group: () ( -col_mapping: ( -vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c ( -dag_builder_object: nid_prefix= ( -dag_builder_class: Example1_DagBuilder < -system_class: Example1_ForecastSystem < -dag_config_config: ( -resample: ( -transformer_kwargs: ( -rule: 1T ( -dag_runner_object: vimdiff dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.actual.txt dataflow/system/example1/test/outcomes/Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1/tmp.expected.txt -or running: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -EXPECTED VARIABLE: Test_Example1_ForecastSystem_FitPredict.test_fit_over_period1 --------------------------------------------------------------------------------- -exp = r"""################################################################################ -system_config -################################################################################ -dag_config: - filter_ath: - col_mode: replace_all - transformer_kwargs: - start_time: 09:30:00 - end_time: 16:00:00 - resample: - in_col_groups: [('close',), ('volume',), ('feature1',)] - out_col_group: () - transformer_kwargs: - rule: 5T - resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})] - vwap_groups: [('close', 'volume', 'vwap')] - reindex_like_input: False - join_output_with_input: False - compute_ret_0: - in_col_groups: [('close',), ('vwap',), ('twap',)] - out_col_group: () - transformer_kwargs: - mode: log_rets - col_mapping: - close: close.ret_0 - vwap: vwap.ret_0 - twap: twap.ret_0 - compute_vol: - in_col_group: ('vwap.ret_0',) - out_col_group: ('vwap.ret_0.vol',) - drop_nans: True - permitted_exceptions: (,) - adjust_rets: - in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)] - out_col_group: () - transformer_kwargs: - term1_col: vwap.ret_0 - term2_col: vwap.ret_0.vol - out_col: vwap.ret_0.vol_adj - term2_delay: 2 - operation: div - drop_nans: True - compress_rets: - in_col_groups: [('vwap.ret_0.vol_adj',)] - out_col_group: () - col_mapping: - vwap.ret_0.vol_adj: vwap.ret_0.vol_adj.c -dag_builder_object: nid_prefix= -dag_builder_class: Example1_DagBuilder -system_class: Example1_ForecastSystem -dag_config_config: - resample: - transformer_kwargs: - rule: 1T -dag_runner_object: > -market_data_config: - asset_id_col_name: asset_id - asset_ids: [1467591036, 3303714233] -backtest_config: - universe_str: example1_v1-top2 - trading_period_str: 1T - time_interval_str: Jan2000 - start_timestamp_with_lookback: 2000-01-01 00:00:00+00:00 - end_timestamp: 2000-01-31 00:00:00+00:00 -market_object: -dag_object: name=None -mode=strict -nodes=[('filter_ath', {'stage': }), ('resample', {'stage': }), ('compute_ret_0', {'stage': }), ('compute_vol', {'stage': }), ('adjust_rets', {'stage': }), ('compress_rets', {'stage': }), ('read_data', {'stage': })] -edges=[('filter_ath', 'resample', {'df_in': 'df_out'}), ('resample', 'compute_ret_0', {'df_in': 'df_out'}), ('compute_ret_0', 'compute_vol', {'df_in': 'df_out'}), ('compute_vol', 'adjust_rets', {'df_in': 'df_out'}), ('adjust_rets', 'compress_rets', {'df_in': 'df_out'}), ('read_data', 'filter_ath', {'df_in': 'df_out'})] -################################################################################ -vwap.ret_0.vol_adj.c -################################################################################ - 1467591036 3303714233 -end_ts -2000-01-01 10:00:00-05:00 -0.98 -0.98 -2000-01-01 10:05:00-05:00 0.98 0.98 -2000-01-01 10:10:00-05:00 -0.98 -0.98 -""" - -# To run the tests: -> $GIT_ROOT/helpers/test/outcomes/Test_pytest_repro_end_to_end.test7/tmp.scratch/tmp.pytest_repro.sh \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt deleted file mode 100644 index d0b931699..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test2/input/test.txt +++ /dev/null @@ -1,7 +0,0 @@ - -```python - -def check_empty_lines(): - print("Check empty lines are present!") - -``` diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt deleted file mode 100644 index 34d8d7aa1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/input/test.txt +++ /dev/null @@ -1,16 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - ```python - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - ``` -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt deleted file mode 100644 index de229ba17..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt +++ /dev/null @@ -1,16 +0,0 @@ -- Functions can be declared in the body of another function -- E.g., to hide utility functions in the scope of the function that uses them - - def print_integers(values): - - def _is_integer(value): - try: - return value == int(value) - except: - return False - - for v in values: - if _is_integer(v): - print(v) - -- Hello \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt deleted file mode 100644 index fb18a0a9c..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_code_delimiters1.test6/input/test.txt +++ /dev/null @@ -1,9 +0,0 @@ -```python -def no_start_python(): - print("No mention of python at the start")``` -``` - -``` - A markdown paragraph contains - delimiters that needs to be removed. -``` \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt deleted file mode 100644 index 6c1304cfb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_multiple_blocks/output/test.txt +++ /dev/null @@ -1,7 +0,0 @@ -Text before -:::: -::::{.column width=40%} -Middle text -:::columns -::::{.column width=60%} -Text after \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt deleted file mode 100644 index 0ac895652..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_remove_prettier_ignore_from_div_blocks.test_remove_simple_block/output/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -:::: -::: \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt deleted file mode 100644 index 9f8585df5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -Consecutive headers increase by more than one level: - HeaderInfo(1, 'Chapter 1', 1) - HeaderInfo(3, 'Subsection 1.1.1', 6) \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt deleted file mode 100644 index ab5bbf048..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str1.test2/output/test.txt +++ /dev/null @@ -1,71 +0,0 @@ -################################################################################ -level=1, description='Chapter 1' -################################################################################ -- **Chapter 1** - - Section 1.1 - - Section 1.2 -- Chapter 2 -################################################################################ -level=2, description='Section 1.1' -################################################################################ -- Chapter 1 - - **Section 1.1** - - Subsection 1.1.1 - - Subsection 1.1.2 - - Section 1.2 -- Chapter 2 -################################################################################ -level=3, description='Subsection 1.1.1' -################################################################################ -- Chapter 1 - - Section 1.1 - - **Subsection 1.1.1** - - Subsection 1.1.2 - - Section 1.2 -- Chapter 2 -################################################################################ -level=3, description='Subsection 1.1.2' -################################################################################ -- Chapter 1 - - Section 1.1 - - Subsection 1.1.1 - - **Subsection 1.1.2** - - Section 1.2 -- Chapter 2 -################################################################################ -level=2, description='Section 1.2' -################################################################################ -- Chapter 1 - - Section 1.1 - - **Section 1.2** -- Chapter 2 -################################################################################ -level=1, description='Chapter 2' -################################################################################ -- Chapter 1 -- **Chapter 2** - - Section 2.1 - - Section 2.2 -################################################################################ -level=2, description='Section 2.1' -################################################################################ -- Chapter 1 -- Chapter 2 - - **Section 2.1** - - Subsection 2.1.1 - - Section 2.2 -################################################################################ -level=3, description='Subsection 2.1.1' -################################################################################ -- Chapter 1 -- Chapter 2 - - Section 2.1 - - **Subsection 2.1.1** - - Section 2.2 -################################################################################ -level=2, description='Section 2.2' -################################################################################ -- Chapter 1 -- Chapter 2 - - Section 2.1 - - **Section 2.2** \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt deleted file mode 100644 index df89fcd63..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_selected_navigation_to_str2.test2/output/test.txt +++ /dev/null @@ -1,40 +0,0 @@ -################################################################################ -level=1, description='Models' -################################################################################ -- **Models** - - Naive Bayes - - Decision trees - - Random forests - - Linear models -################################################################################ -level=2, description='Naive Bayes' -################################################################################ -- Models - - **Naive Bayes** - - Decision trees - - Random forests - - Linear models -################################################################################ -level=2, description='Decision trees' -################################################################################ -- Models - - Naive Bayes - - **Decision trees** - - Random forests - - Linear models -################################################################################ -level=2, description='Random forests' -################################################################################ -- Models - - Naive Bayes - - Decision trees - - **Random forests** - - Linear models -################################################################################ -level=2, description='Linear models' -################################################################################ -- Models - - Naive Bayes - - Decision trees - - Random forests - - **Linear models** \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt deleted file mode 100644 index 1c6176761..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_sort_dictionary.test1/output/test.txt +++ /dev/null @@ -1,40 +0,0 @@ -OrderedDict([('build-system', - OrderedDict([('build-backend', 'poetry.masonry.api'), - ('requires', ['poetry>=0.12'])])), - ('tool', - OrderedDict([('poetry', - OrderedDict([('authors', ['']), - ('dependencies', - OrderedDict([('awscli', '*'), - ('boto3', '*'), - ('bs4', '*'), - ('flaky', '*'), - ('fsspec', '*'), - ('gluonts', '*'), - ('invoke', '*'), - ('jsonpickle', '*'), - ('jupyter', '*'), - ('lxml', '*'), - ('matplotlib', '*'), - ('mxnet', '*'), - ('networkx', '*'), - ('pandas', '^1.1.0'), - ('psycopg2', '*'), - ('pyarrow', '*'), - ('pytest', '^6.0.0'), - ('pytest-cov', '*'), - ('pytest-instafail', - '*'), - ('pytest-xdist', '*'), - ('python', '^3.7'), - ('pywavelets', '*'), - ('requests', '*'), - ('s3fs', '*'), - ('seaborn', '*'), - ('sklearn', '*'), - ('statsmodels', '*'), - ('tqdm', '*')])), - ('description', ''), - ('dev-dependencies', OrderedDict()), - ('name', 'lm'), - ('version', '0.1.0')]))]))]) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt deleted file mode 100644 index 66475c930..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test3/output/test.txt +++ /dev/null @@ -1,4 +0,0 @@ -time data "28-07-2023 15:05:13" doesn't match format "%Y%m%d_%H%M%S", at position 0. You might want to try: - - passing `format` if your strings have a consistent format; - - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format; - - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this. \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt deleted file mode 100644 index 41895df11..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_str_to_timestamp1.test4/output/test.txt +++ /dev/null @@ -1 +0,0 @@ -Unknown datetime string format, unable to parse: qwe28abc07-201234, at position 0 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt deleted file mode 100644 index 0498168e2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_system1.test7/output/test.txt +++ /dev/null @@ -1,16 +0,0 @@ - -################################################################################ -################################################################################ -_system() failed -################################################################################ -################################################################################ -# _system: cmd='(ls this_file_doesnt_exist) 2>&1', abort_on_error=True, suppress_error=None, suppress_output=True, blocking=True, wrapper=None, output_file=None, num_error_lines=30, tee=False, dry_run=False, log_level=10 -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -cmd='(ls this_file_doesnt_exist) 2>&1' -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -- rc='' -- output=' -ls: cannot access 'this_file_doesnt_exist': No such file or directory -' -- Output saved in 'tmp.system_output.txt' -- Command saved in 'tmp.system_cmd.sh' \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv deleted file mode 100644 index abc3dac80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/outcomes/Test_to_typed_csv.test1/input/test.csv +++ /dev/null @@ -1,2 +0,0 @@ -A,B,C,D,E -1,2.3456,c,d,78 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py deleted file mode 100644 index 7b0473b8a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_create_link.py +++ /dev/null @@ -1,136 +0,0 @@ -import filecmp -import os -import pathlib -import shutil -from typing import List, Tuple - -import dev_scripts_helpers.system_tools.create_links as dshstcrli -import helpers.hio as hio -import helpers.hunit_test as hunitest - - -# ############################################################################# -# Test_create_links -# ############################################################################# - - -class Test_create_links(hunitest.TestCase): - """ - Unit tests for the `create_links.py` script. - """ - - def create_file( - self, dir_path: pathlib.Path, file_name: str, content: str - ) -> pathlib.Path: - """ - Create a file with the given content in the specified directory. - - This helper function ensures the directory exists before - creating the file and writing the specified content into it. - - :param dir_path: path to the directory where the file will be - created - :param file_name: name of the file to create - :param content: content to write into the file - :return: full path to the created file - """ - dir_path = pathlib.Path(dir_path) - file_path = dir_path / file_name - hio.to_file(file_name=str(file_path), txt=content) - return file_path - - def test__find_common_files(self) -> None: - """ - Test identifying common files between two directories. - - Create two directories, each containing identical files, - and checks that the `_find_common_files` function identifies these files. - """ - base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) - src_dir: pathlib.Path = base_dir / "test_src_dir" - dst_dir: pathlib.Path = base_dir / "test_dst_dir" - src_dir.mkdir(parents=True, exist_ok=True) - dst_dir.mkdir(parents=True, exist_ok=True) - file1_src: pathlib.Path = self.create_file( - src_dir, "file1.txt", "Hello, World!" - ) - file1_dst: pathlib.Path = shutil.copy(file1_src, dst_dir) - common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( - str(src_dir), str(dst_dir) - ) - self.assertEqual(len(common_files), 1) - self.assertEqual(common_files[0], (str(file1_src), str(file1_dst))) - - def test__replace_with_links_absolute(self) -> None: - """ - Test replacing common files with absolute symbolic links. - - Create identical files in two directories and replace the files - in the destination directory with absolute symbolic links - pointing to the source files. - """ - base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) - src_dir: pathlib.Path = base_dir / "test_src_dir" - dst_dir: pathlib.Path = base_dir / "test_dst_dir" - file1: pathlib.Path = self.create_file( - src_dir, "file1.txt", "Hello, World!" - ) - shutil.copy(file1, dst_dir) - common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( - str(src_dir), str(dst_dir) - ) - dshstcrli._replace_with_links(common_files, use_relative_paths=False) - for _, dst_file in common_files: - self.assertTrue(os.path.islink(dst_file)) - self.assert_equal(os.readlink(dst_file), str(file1)) - - def test__replace_with_links_relative(self) -> None: - """ - Test replacing common files with relative symbolic links. - - Create identical files in two directories and replace the files - in the destination directory with relative symbolic links - pointing to the source files. - """ - base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) - src_dir: pathlib.Path = base_dir / "test_src_dir" - dst_dir: pathlib.Path = base_dir / "test_dst_dir" - file1: pathlib.Path = self.create_file( - src_dir, "file1.txt", "Hello, World!" - ) - shutil.copy(file1, dst_dir) - common_files: List[Tuple[str, str]] = dshstcrli._find_common_files( - src_dir, dst_dir - ) - dshstcrli._replace_with_links(common_files, use_relative_paths=True) - for src_file, dst_file in common_files: - self.assertTrue(os.path.islink(dst_file)) - expected_link: str = os.path.relpath( - src_file, os.path.dirname(dst_file) - ) - self.assert_equal(os.readlink(dst_file), expected_link) - - def test__stage_links(self) -> None: - """ - Test replacing symbolic links with writable file copies. - - Create symbolic links in a directory and then stage them by - replacing each link with a copy of the original file it points - to. - """ - base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) - src_dir: pathlib.Path = base_dir / "test_src_dir" - dst_dir: pathlib.Path = base_dir / "test_dst_dir" - src_dir.mkdir(parents=True, exist_ok=True) - dst_dir.mkdir(parents=True, exist_ok=True) - file1: pathlib.Path = self.create_file( - src_dir, "file1.txt", "Hello, World!" - ) - link1: pathlib.Path = dst_dir / "file1.txt" - os.symlink(file1, link1) - symlinks: List[str] = dshstcrli._find_symlinks(dst_dir) - dshstcrli._stage_links(symlinks) - for link in symlinks: - self.assertFalse(os.path.islink(link)) - self.assertTrue(os.path.isfile(link)) - self.assertTrue(filecmp.cmp(link, file1, shallow=False)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py deleted file mode 100644 index 98994cb5a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hasyncio.py +++ /dev/null @@ -1,96 +0,0 @@ -import asyncio -import logging -from typing import Optional - -import helpers.hasyncio as hasynci -import helpers.hdatetime as hdateti -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_hasyncio1 -# ############################################################################# - - -class Test_hasyncio1(hunitest.TestCase): - """ - Execute a workload using different time semantics: - - - real time - - simulated time - """ - - @staticmethod - async def workload(get_wall_clock_time: hdateti.GetWallClockTime) -> None: - """ - Coroutine simulating a workload waiting for 1s. - """ - - def _print_time() -> None: - true_wall_clock_time = hdateti.get_current_time("ET") - _LOG.debug("wall_clock_time=%s", true_wall_clock_time) - event_loop_time = get_wall_clock_time() - _LOG.debug("event_loop_time=%s", event_loop_time) - - _print_time() - # The execution here is just waiting. - _LOG.debug(" -> execute") - await asyncio.sleep(1.0) - # - _print_time() - - def run_test( - self, - event_loop: Optional[asyncio.AbstractEventLoop], - get_wall_clock_time: hdateti.GetWallClockTime, - ) -> None: - coroutine = self.workload(get_wall_clock_time) - hasynci.run(coroutine, event_loop=event_loop) - - def test_real_time1(self) -> None: - """ - Use real-time semantic. - - In this case: - ``` - wall_clock_time=2021-09-27 20:40:43.775683-04:00 - event_loop_time=2021-09-27 20:40:43.799074-04:00 - -> execute - wall_clock_time=2021-09-27 20:40:44.808990-04:00 - event_loop_time=2021-09-27 20:40:44.812472-04:00 - ``` - - - the wall clock time and the event loop time both advance - """ - # Use the wall clock time with no special event loop. - get_wall_clock_time = lambda: hdateti.get_current_time(tz="ET") - event_loop = None - # Run. - self.run_test(event_loop, get_wall_clock_time) - - def test_simulated_time1(self) -> None: - """ - Use simulated time semantic. - - In this case: - ``` - wall_clock_time=2021-09-27 20:38:47.843501-04:00 - event_loop_time=2021-09-27 20:38:47.841555-04:00 - -> execute - wall_clock_time=2021-09-27 20:38:47.868272-04:00 - event_loop_time=2021-09-27 20:38:48.841555-04:00 - ``` - - - the wall_clock time doesn't advance since the execution is instantaneous - - the event loop time moves forward 1 sec - """ - # Use the solipsistic event loop to simulate the real-time faster. - with hasynci.solipsism_context() as event_loop: - # Use the simulated wall clock time. - get_wall_clock_time = lambda: hdateti.get_current_time( - tz="ET", event_loop=event_loop - ) - # Run. - self.run_test(event_loop, get_wall_clock_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py deleted file mode 100644 index 5469e009e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_haws.py +++ /dev/null @@ -1,276 +0,0 @@ -import os -import unittest.mock as umock -from typing import Optional - -import boto3 -import pytest -from botocore.client import BaseClient -from moto import mock_aws - -import helpers.haws as haws -import helpers.hunit_test as hunitest - - -# ############################################################################# -# Haws_test_case -# ############################################################################# - - -class Haws_test_case(hunitest.TestCase): - @pytest.fixture(autouse=True, scope="class") - def aws_credentials(self) -> None: - """ - Mocked AWS credentials for moto. - """ - os.environ["MOCK_AWS_ACCESS_KEY_ID"] = "testing" - os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["MOCK_AWS_SECURITY_TOKEN"] = "testing" - os.environ["MOCK_AWS_SESSION_TOKEN"] = "testing" - os.environ["MOCK_AWS_DEFAULT_REGION"] = "us-east-1" - - -# ############################################################################# -# Test_get_session -# ############################################################################# - - -class Test_get_session(Haws_test_case): - @pytest.fixture(autouse=True) - def set_up_test(self) -> None: - os.environ["MOCK_AWS_S3_BUCKET"] = "mock_aws_bucket" - - @mock_aws - @umock.patch("boto3.Session") - def test_get_session1(self, mock_boto3_session: umock.Mock) -> None: - """ - Test that `haws.get_session` correctly return a session without region - parameter. - """ - aws_profile = "__mock__" - # Create a mock session. - mock_session = umock.MagicMock() - mock_boto3_session.return_value = mock_session - # Test that get_session returns a session object. - session = haws.get_session(aws_profile) - self.assertEqual(session, mock_session) - # Verify that `boto3.Session` was called with the correct profile. - mock_boto3_session.assert_called_once_with(profile_name=aws_profile) - - @mock_aws - @umock.patch("boto3.Session") - def test_get_session2(self, mock_boto3_session: umock.Mock) -> None: - """ - Test that `haws.get_session` correctly return a session with region - parameter. - """ - aws_profile = "__mock__" - region = "us-east-1" - # Create a mock session - mock_session = umock.MagicMock() - mock_boto3_session.return_value = mock_session - # Test that `get_session` returns a session object with the specified region. - session = haws.get_session(aws_profile, region=region) - self.assertEqual(session, mock_session) - # Verify that `boto3.Session` was called with the correct profile and region. - mock_boto3_session.assert_called_once_with( - profile_name=aws_profile, region_name=region - ) - - -# ############################################################################# -# Test_get_service_client -# ############################################################################# - - -class Test_get_service_client(Haws_test_case): - @mock_aws - @umock.patch("helpers.haws.get_session") - def test1(self, mock_get_session: umock.Mock) -> None: - """ - Test `haws.get_service_client()` returns a client for S3. - """ - aws_profile = "__mock__" - service_name = "s3" - region = "us-east-1" - # Create a mock session with the expected credentials. - mock_session = boto3.session.Session( - aws_access_key_id="testing", - aws_secret_access_key="testing", - region_name=region, - ) - mock_get_session.return_value = mock_session - # Create mock client for S3. - client = haws.get_service_client( - aws_profile=aws_profile, service_name=service_name, region=region - ) - # Check that the returned client is for the S3 service. - self.assert_equal(client.meta.service_model.service_name, "s3") - # Check for region. - self.assert_equal(client.meta.region_name, region) - - -# ############################################################################# -# Test_get_service_resource -# ############################################################################# - - -class Test_get_service_resource(Haws_test_case): - @mock_aws - @umock.patch("helpers.haws.get_session") - def test1(self, mock_get_session: umock.Mock) -> None: - """ - Test that `haws.get_service_resource()` correctly retrieves a S3 - resource. - """ - aws_profile = "__mock__" - service_name = "s3" - # Create a mock session with the expected credentials. - mock_session = boto3.session.Session( - aws_access_key_id="testing", - aws_secret_access_key="testing", - region_name="us-east-1", - ) - mock_get_session.return_value = mock_session - # Create mock S3 bucket. - s3 = boto3.resource("s3") - s3.create_bucket(Bucket="my-test-bucket") - s3_resource = haws.get_service_resource( - aws_profile=aws_profile, service_name=service_name - ) - # Get all `S3` buckets. - buckets = list(s3_resource.buckets.all()) - bucket_names = [bucket.name for bucket in buckets] - # Check. - self.assertIn("my-test-bucket", bucket_names) - - -# ############################################################################# -# Test_get_task_definition_image_url -# ############################################################################# - - -class Test_get_task_definition_image_url(Haws_test_case): - @mock_aws - @umock.patch("helpers.haws.get_service_client") - def test1(self, mock_get_service_client: umock.Mock) -> None: - """ - Test that `get_task_definition_image_url` retrieves correct image URL. - """ - # Mock data. - task_definition_name = "my-task-definition" - mock_image_url = "old_image_url" - region = "us-east-1" - # Mock the return value of `get_service_client`. - mock_client = boto3.client("ecs", region_name=region) - mock_get_service_client.return_value = mock_client - # Create a mock task definition. - mock_client.register_task_definition( - family=task_definition_name, - # The following are required parameters. - containerDefinitions=[ - {"name": "my-container", "image": mock_image_url, "memory": 512} - ], - ) - image_url = haws.get_task_definition_image_url( - task_definition_name, environment="test" - ) - self.assertEqual(image_url, mock_image_url) - - -# ############################################################################# -# Test_update_task_definition -# ############################################################################# - - -class Test_update_task_definition(Haws_test_case): - @mock_aws - @umock.patch("helpers.haws.get_ecs_client") - def test1(self, mock_get_ecs_client: BaseClient) -> None: - """ - Test updating a task definition with a new image URL. - """ - # Mock data. - task_definition_name = "my-task-definition" - old_image_url = "old_image_url" - new_image_url = "new_image_url" - region = "us-east-1" - # Mock the return value of `get_ecs_client`. - mock_client = boto3.client("ecs", region_name=region) - mock_get_ecs_client.return_value = mock_client - # Create a mock task definition. - mock_client.register_task_definition( - family=task_definition_name, - containerDefinitions=[ - {"name": "my-container", "image": old_image_url} - ], - executionRoleArn="__mock__", - networkMode="bridge", - requiresCompatibilities=["EC2"], - cpu="256", - memory="512", - ) - # Update task definition. - haws.update_task_definition( - task_definition_name, - new_image_url, - region=region, - environment="test", - ) - # Check if the task definition is updated. - task_description = mock_client.describe_task_definition( - taskDefinition=task_definition_name - ) - updated_image_url = task_description["taskDefinition"][ - "containerDefinitions" - ][0]["image"] - self.assertEqual(updated_image_url, new_image_url) - - -# ############################################################################# -# Test_get_ecs_client -# ############################################################################# - - -class Test_get_ecs_client(Haws_test_case): - def mock_aws_client( - self, mock_get_session: umock.Mock, *, region: Optional[str] = None - ) -> None: - aws_profile = "__mock__" - test_cluster_name = "test-cluster" - # Create a mock session with the expected credentials. - mock_session = boto3.session.Session( - aws_access_key_id="testing", - aws_secret_access_key="testing", - region_name=region or "us-east-1", - ) - mock_get_session.return_value = mock_session - # Create mock ECS client. - ecs_client = boto3.client("ecs", region_name="us-east-1") - ecs_client.create_cluster(clusterName=test_cluster_name) - # Get ECS client. - if region: - test_client = haws.get_ecs_client(aws_profile, region=region) - else: - test_client = haws.get_ecs_client(aws_profile) - # Get the created cluster. - cluster_name = test_client.list_clusters()["clusterArns"][0] - # Check cluster name. - self.assertIn(test_cluster_name, cluster_name) - - @mock_aws - @umock.patch("helpers.haws.get_session") - def test1(self, mock_get_session: umock.Mock) -> None: - """ - Test that `haws.get_ecs_client()` correctly return a client to work - with ECS within a specified region. - """ - self.mock_aws_client(mock_get_session, region="us-east-1") - - @mock_aws - @umock.patch("helpers.haws.get_session") - def test2(self, mock_get_session: umock.Mock) -> None: - """ - Test that `haws.get_ecs_client()` correctly return a client to work - with ECS without a specified region. - """ - self.mock_aws_client(mock_get_session) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py deleted file mode 100644 index 1699e7bcd..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache.py +++ /dev/null @@ -1,1002 +0,0 @@ -import logging -import tempfile -import time -from typing import Any, Callable, Generator, Tuple - -import numpy as np -import pandas as pd -import pytest - -import helpers.hcache as hcache -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Do not commit this. -# _LOG.debug = _LOG.info - - -# TODO(gp): Use hprint.log_frame -def _LOG_frame(txt: str) -> None: - _LOG.debug("\n%s", hprint.frame(txt)) - - -# ############################################################################# - - -def _get_add_function() -> Callable: - """ - Return a function with the ability to track state, used for testing. - """ - - def func(x: int, y: int) -> int: - func.executed = True # type: ignore[attr-defined] - return x + y - - func.executed = False # type: ignore[attr-defined] - return func - - -def _reset_add_function(func: Callable) -> None: - """ - Reset the function before another execution, so we can verify if it was - executed or not. - - We should do this every time we run the cached version of the - function. - """ - func.executed = False # type: ignore[attr-defined] - hdbg.dassert(not func.executed) # type: ignore[attr-defined] - - -# ############################################################################# - - -# ############################################################################# -# _ResetGlobalCacheHelper -# ############################################################################# - - -class _ResetGlobalCacheHelper(hunitest.TestCase): - """ - Create a global cache for each test method and resets it at every test - method invocation. - """ - - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def _remove_all_caches(self) -> None: - """ - Clean and remove all the caches for this test. - """ - cache_type = "all" - hcache.clear_global_cache(cache_type, tag=self.cache_tag, destroy=True) - - def set_up_test(self) -> None: - # Create a tag like "TestCacheFeatures::test_without_caching1". - self.cache_tag = f"{self.__class__.__name__}::{self._testMethodName}" - # Clean all the caches before this test method is run. - self._remove_all_caches() - - def tear_down_test(self) -> None: - # Clean and remove all the caches after the test method is run. - self._remove_all_caches() - - def _get_f_cf_functions( - self, **cached_kwargs: Any - ) -> Tuple[Callable, hcache._Cached]: - """ - Create the intrinsic function `f` and its cached version `cf`. - """ - # Make sure that we are using the unit test cache. - # disk_cache_name = hcache._get_global_cache_name("disk", self.cache_tag) - # _LOG.debug("disk_cache_name=%s", disk_cache_name) - # _LOG.debug( - # "disk_cache_path=%s", hcache._get_global_cache_path("disk", self.cache_tag) - # ) - # TODO(gp): Add an assertion. - # Create the intrinsic function. - f = _get_add_function() - # Create the cached function. - cf = hcache._Cached(f, tag=self.cache_tag, **cached_kwargs) - # Reset all the caches. - hcache.clear_global_cache("all", self.cache_tag) - cf._reset_cache_tracing() - return f, cf - - def _execute_and_check_state( - self, - f: Callable, - cf: hcache._Cached, - val1: int, - val2: int, - exp_cf_state: str, - ) -> None: - """ - Call the function `f(val1, val2) and its cached function `cf(val1, - val2)` and check whether the intrinsic function was executed and what - caches were used, according to `exp_f_state` and `exp_cf_state`. - """ - # If there was no caching then we must have executed the function. - exp_f_state = exp_cf_state == "no_cache" - _LOG.debug( - "\n%s", - hprint.frame( - f"val1={val1}, val2={val2}, exp_f_state={exp_f_state}, " - f"exp_cf_state={exp_cf_state}", - char1="<", - ), - ) - # Reset the intrinsic function since we want to verify if it was called - # or not when we call the cached function. - _reset_add_function(f) - # Call the cached function. - actual = cf(val1, val2) - expected = val1 + val2 - # Check the result. - self.assertEqual(actual, expected) - # Check which function was executed and what caches were used. - _LOG.debug( - "f.executed=%s vs %s", - f.executed, # type: ignore[attr-defined] - exp_f_state, - ) - _LOG.debug( - "cf.get_last_cache_accessed=%s vs %s", - cf.get_last_cache_accessed(), - exp_cf_state, - ) - self.assertEqual(f.executed, exp_f_state) # type: ignore[attr-defined] - self.assertEqual(cf.get_last_cache_accessed(), exp_cf_state) - - -# ############################################################################# - - -# ############################################################################# -# TestCacheFunctions -# ############################################################################# - - -class TestCacheFunctions(hunitest.TestCase): - def test_get_cache_name1(self) -> None: - """ - Make sure we are using the unit test cache and not the development - cache, by checking the name of the disk cache. - """ - cache_tag = "unittest" - disk_cache_name = hcache._get_global_cache_name("disk", cache_tag) - _LOG.debug("disk_cache_name=%s", disk_cache_name) - self.assertIn(cache_tag, disk_cache_name) - - -# ############################################################################# - - -# ############################################################################# -# TestGlobalCache1 -# ############################################################################# - - -class TestGlobalCache1(_ResetGlobalCacheHelper): - def test_without_caching1(self) -> None: - """ - If we execute two times without caching, we get two executions of the - intrinsic function. - """ - f = _get_add_function() - self.assertFalse(f.executed) # type: ignore[attr-defined] - # Execute. - actual = f(3, 4) - self.assertEqual(actual, 7) - # The function was executed. - self.assertTrue(f.executed) # type: ignore[attr-defined] - # Reset. - _reset_add_function(f) - self.assertFalse(f.executed) # type: ignore[attr-defined] - # Execute again. - actual = f(3, 4) - self.assertEqual(actual, 7) - # Check that the function is executed again, since there is no caching. - self.assertTrue(f.executed) # type: ignore[attr-defined] - - def test_with_caching1(self) -> None: - """ - - Leave the caches enabled - - Show that the memory cache is used - """ - # Both memory and disk cache enabled. - f, cf = self._get_f_cf_functions() - # 1) Execute and verify that it is executed, since it was not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Execute and verify that it is not executed, since it's cached in memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 3) Execute and verify that it is not executed, since it's cached. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - - def test_with_caching2(self) -> None: - """ - - Leave the caches enabled - - Cache different values - """ - # Both memory and disk cache enabled. - f, cf = self._get_f_cf_functions() - # 1) Execute and verify that it is executed, since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Use a different workload. - _LOG.debug("\n%s", hprint.frame("Execute")) - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - # 3) Execute the second time: verify that it is not executed, since cached. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 4) Use a different workload: not executed since cached. - _LOG.debug("\n%s", hprint.frame("Execute")) - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="mem") - - def test_with_caching3(self) -> None: - """ - - Disable both mem and disk cache - - Cache a single value - """ - # Disable both memory and disk cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=False, use_disk_cache=False - ) - # 1) Execute the first time: executed since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - # 2) Execute the second time: executed since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - - def test_with_caching4(self) -> None: - """ - - Disable only the disk cache - - Cache different values - """ - # Use only memory cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=True, use_disk_cache=False - ) - # 1) Execute and verify that it is executed since not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - # 2) Execute the second time: verify that it was cached from memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="mem") - - def test_with_caching5(self) -> None: - """ - - Disable only the memory cache - - Cache different values - """ - # Use only disk cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=False, use_disk_cache=True - ) - # 1) Verify that it is executed since there is no cache. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - # 2) Verify that it is executed, since it's cached in memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="disk") - - # //////////////////////////////////////////////////////////////////////////// - - def test_with_caching_mem_reset(self) -> None: - """ - - Use only the memory cache - - Execute and cache - - Reset the mem cache - - Execute again - - Check that the cached function is recomputed - """ - # Use only memory cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=True, use_disk_cache=False - ) - # 1) Verify that it is executed, since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Verify that it is not executed, since it's cached in memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 3) Reset memory cache. - _LOG.debug("\n%s", hprint.frame("Reset memory cache")) - hcache.clear_global_cache("mem", self.cache_tag) - # 4) Verify that it is executed, since the cache was emptied. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - - def test_with_caching_disk_reset(self) -> None: - """ - Same as `test_with_caching_mem_reset()` but using the disk cache. - """ - # Use only disk cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=False, use_disk_cache=True - ) - # 1) Verify that it is executed, since it's not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Verify that it is not executed, since cached in disk. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 3) Reset disk cache. - _LOG.debug("\n%s", hprint.frame("Reset memory cache")) - hcache.clear_global_cache("disk", self.cache_tag) - # 4) Verify that it is executed, since the cache was emptied. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - - def test_with_caching_mem_reset2(self) -> None: - """ - - Use both caches - - Execute and cache - - Reset the mem cache - - Execute again - - Check that the cached value is found in the disk cache - """ - # Use both memory and disk cache - f, cf = self._get_f_cf_functions(use_mem_cache=True, use_disk_cache=True) - # 1) Verify that it is executed. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Verify that it is not executed, since it's cached in memory. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 3) Reset memory cache. - hcache.clear_global_cache("mem", self.cache_tag) - # 4) Verify that it is not executed, since it's in the disk cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - - # //////////////////////////////////////////////////////////////////////////// - - def test_redefined_function(self) -> None: - """ - If the cached function is redefined, but it's still the same, then the - intrinsic function should not be recomputed. - """ - # Define the function inline imitating working in a notebook. - _LOG.debug("\n%s", hprint.frame("Define function")) - add = _get_add_function() - cached_add = hcache._Cached(add, tag=self.cache_tag) - # 1) Execute the first time. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state( - add, cached_add, 1, 2, exp_cf_state="no_cache" - ) - # 2) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 3) Redefine the function inline. - _LOG.debug("\n%s", hprint.frame("Redefine function")) - add = _get_add_function() - cached_add = hcache._Cached(add, tag=self.cache_tag) - # 4) Execute the third time. Should still use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 5) Execute the fourth time. Should still use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 6) Check that call with other arguments miss the cache. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state( - add, cached_add, 3, 4, exp_cf_state="no_cache" - ) - - def test_changed_function(self) -> None: - """ - If the function is redefined, but the code is not the same, then the - intrinsic function should be recomputed. - """ - # Define the function imitating working in a notebook. - _LOG.debug("\n%s", hprint.frame("Define function")) - - def add(x: int, y: int) -> int: - add.executed = True # type: ignore[attr-defined] - return x + y - - cached_add = hcache._Cached(add, tag=self.cache_tag) - # 1) Execute the first time. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state( - add, cached_add, 1, 2, exp_cf_state="no_cache" - ) - # 2) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 3) Redefine the function with different code. - _LOG.debug("\n%s", hprint.frame("Redefine function")) - - # pylint: disable=function-redefined - def add(x: int, y: int) -> int: # type: ignore[no-redef] - add.executed = True # type: ignore[attr-defined] - z = x + y - return z - - cached_add = hcache._Cached(add, tag=self.cache_tag) - # 4) Execute the third time. Should still use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state( - add, cached_add, 1, 2, exp_cf_state="no_cache" - ) - # 5) Execute the fourth time. Should still use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="mem") - # 6) Check that call with other arguments miss the cache. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state( - add, cached_add, 3, 4, exp_cf_state="no_cache" - ) - - -# ############################################################################# - - -# ############################################################################# -# _ResetFunctionSpecificCacheHelper -# ############################################################################# - - -class _ResetFunctionSpecificCacheHelper(_ResetGlobalCacheHelper): - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test2() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test2(self) -> None: - self.set_up_test() - # Create temp directories to store the cache. - self.disk_cache_dir = tempfile.mkdtemp() - # Clear global cache. - hcache.clear_global_cache("all", tag=self.cache_tag) - - -# ############################################################################# -# TestFunctionSpecificCache1 -# ############################################################################# - - -class TestFunctionSpecificCache1(_ResetFunctionSpecificCacheHelper): - def test_with_caching1(self) -> None: - """ - - Test using the function-specific disk cache - - Disable function-specific cache and switching to global cache - - Test using the global cache - """ - # Use a global cache and - _LOG.debug("\n%s", hprint.frame("Starting")) - _LOG.debug( - "# get_global_cache_info()=\n%s", - hcache.get_global_cache_info(tag=self.cache_tag), - ) - f, cf = self._get_f_cf_functions( - use_mem_cache=False, - use_disk_cache=True, - disk_cache_path=self.disk_cache_dir, - ) - _LOG.debug( - "# cf.get_function_cache_info()=\n%s", cf.get_function_cache_info() - ) - # 1) Execute and verify that it is executed. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Execute and verify that it is not executed, since it's cached on disk. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 3) Clear the global cache. - _LOG.debug("\n%s", hprint.frame("clear_global_cache")) - hcache.clear_global_cache("all") - # 4) Execute and verify that it is not executed, since it's cached on disk. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - - def test_with_caching2(self) -> None: - """ - - Test using the function-specific disk cache - - Disable function-specific cache and switching to global cache - - Test using the global cache - """ - # Use only per-function disk cache. - f, cf = self._get_f_cf_functions( - use_mem_cache=False, disk_cache_path=self.disk_cache_dir - ) - # 1) Execute and verify that it is executed. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Clear the global cache. - _LOG.debug("\n%s", hprint.frame("clear_global_cache")) - hcache.clear_global_cache("all") - # 3) Execute and verify that it is not executed. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 4) Use the global cache. - _LOG.debug( - "\n%s", hprint.frame("Disable function cache and use global cache") - ) - cf.set_function_cache_path(None) - # 5) Execute and verify that function is executed with global cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 6) Execute. Now we get the value from the memory cache since disabling - # the function cache means enabling the memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="mem") - # 7) Restore back specific cache. - _LOG.debug("\n%s", hprint.frame("Restore function cache")) - cf.set_function_cache_path(self.disk_cache_dir) - # Verify that it is *NOT* executed with specific cache. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - - -# ############################################################################# - - -# ############################################################################# -# TestCachePerformance -# ############################################################################# - - -class TestCachePerformance(_ResetGlobalCacheHelper): - @staticmethod - # pylint: disable=unused-argument - def _computation(*args: Any) -> None: - """ - Simulate work. - - :param args: throw away arguments - """ - # Emulate small quantity of work. - time.sleep(0.01) - - @staticmethod - def _timeit(func: Callable, *args: Any) -> float: - """ - Get performance measure of the call to fn with args. - - :param fn: callable function - :param args: any arguments to pass to the function fn - :return: precise time in seconds - """ - perf_start = time.perf_counter() - func(*args) - perf_diff = time.perf_counter() - perf_start - return perf_diff - - def _test_performance(self, val: Any) -> None: - """ - Test performance of the cache over some argument val. - - :param val: any hashable argument - """ - # Create cached versions of the computation function. - _mem_cached_computation = hcache._Cached( - self._computation, - tag=self.cache_tag, - use_mem_cache=True, - use_disk_cache=False, - ) - _disk_cached_computation = hcache._Cached( - self._computation, - tag=self.cache_tag, - use_mem_cache=False, - use_disk_cache=True, - ) - # First step: no cache. - no_cache_ct = self._timeit(lambda: self._computation(val)) - print(f"no cache run time={no_cache_ct}") - # Second step: memory cache. - memory_no_cache_ct = self._timeit(lambda: _mem_cached_computation(val)) - print(f"empty memory cache run time={memory_no_cache_ct}") - print(f"empty memory cache overhead={memory_no_cache_ct - no_cache_ct}") - memory_cache_ct = self._timeit(lambda: _mem_cached_computation(val)) - print(f"hot memory cache run time={memory_cache_ct}") - print(f"hot memory cache benefit={no_cache_ct - memory_cache_ct}") - # Third step: disk cache. - disk_no_cache_ct = self._timeit(lambda: _disk_cached_computation(val)) - print(f"empty disk cache run time={disk_no_cache_ct}") - print(f"empty disk cache overhead={disk_no_cache_ct - no_cache_ct}") - disk_cache_ct = self._timeit(lambda: _disk_cached_computation(val)) - print(f"hot disk cache run time={disk_cache_ct}") - print(f"hot disk cache benefit={no_cache_ct - disk_cache_ct}") - - def test_performance_dataframe(self) -> None: - """ - Test performance of the cache over pandas DataFrame. - """ - # Create a somewhat big DataFrame with random data. - df = pd.DataFrame( - np.random.randint(0, 100, size=(100, 4)), columns=list("ABCD") - ) - print("testing pandas dataframe, with sample size", df.shape) - self._test_performance(df) - - def test_performance_series(self) -> None: - """ - Test performance of the cache over pandas Series. - """ - # Create a somewhat big DataFrame with random data. - s = pd.Series(np.random.randint(0, 100, size=100)) - print("testing pandas series, with sample size", s.shape) - self._test_performance(s) - - -# ############################################################################# - - -# ############################################################################# -# TestCacheDecorator -# ############################################################################# - - -class TestCacheDecorator(_ResetGlobalCacheHelper): - def test_decorated_function(self) -> None: - """ - Test decorator with both caches enabled. - """ - - # Define the function inline imitating working in a notebook. - @hcache.cache(tag=self.cache_tag) - def add(x: int, y: int) -> int: - add.__wrapped__.executed = True - return x + y - - # Execute the first time. - self._execute_and_check_state( - add.__wrapped__, add, 1, 2, exp_cf_state="no_cache" - ) - # Execute the second time. Must use memory cache. - self._execute_and_check_state( - add.__wrapped__, add, 1, 2, exp_cf_state="mem" - ) - - def test_decorated_function_no_mem(self) -> None: - """ - Test decorator with only disk cache. - """ - - # Define the function inline imitating working in a notebook. - @hcache.cache(tag=self.cache_tag, use_mem_cache=False) - def add(x: int, y: int) -> int: - add.__wrapped__.executed = True - return x + y - - # Execute the first time. - self._execute_and_check_state( - add.__wrapped__, add, 1, 2, exp_cf_state="no_cache" - ) - # Execute the second time. Must use disk cache. - self._execute_and_check_state( - add.__wrapped__, add, 1, 2, exp_cf_state="disk" - ) - - -# ############################################################################# - - -# ############################################################################# -# TestAmpTask1407 -# ############################################################################# - - -class TestAmpTask1407(_ResetGlobalCacheHelper): - def test1(self) -> None: - """ - A class method can't be cached. - """ - - class _AmpTask1407Class: - def __init__(self, string: str) -> None: - self._string = string - - @hcache.cache(tag=self.cache_tag) - def print(self, n: int) -> str: - string = "" - for _ in range(n): - string += "hello" + ("o" * len(self._string)) + " " - return string - - obj = _AmpTask1407Class("test") - with self.assertRaises(ValueError): - obj.print(5) - - def test2(self) -> None: - """ - A static method can be cached. - """ - - class _AmpTask1407Class: - def __init__(self, string: str) -> None: - self._string = string - - @staticmethod - @hcache.cache(tag=self.cache_tag) - def static_print(n: int) -> str: - print("--> hello: ", n) - string = "" - for _ in range(n): - string += "hello" + ("o" * len("world")) + " " - return string - - @hcache.cache(tag=self.cache_tag) - def print(self, n: int) -> str: - string = "" - for _ in range(n): - string += "hello" + ("o" * len(self._string)) + " " - return string - - obj = _AmpTask1407Class("test") - obj.static_print(5) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "no_cache") - # - obj.static_print(5) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") - obj.static_print(5) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") - # - obj.static_print(6) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "no_cache") - obj.static_print(6) - self.assertEqual(obj.static_print.get_last_cache_accessed(), "mem") - - -# ############################################################################# - - -# ############################################################################# -# TestCachingOnS3 -# ############################################################################# - - -class TestCachingOnS3(_ResetFunctionSpecificCacheHelper): - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test3() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test3(self) -> None: - self.set_up_test2() - # Get a directory to store the cache on S3. - self.disk_cache_dir = self.get_s3_scratch_dir() - self.aws_profile = "am" - # Clear global cache. - hcache.clear_global_cache("all", tag=self.cache_tag) - - @pytest.mark.skip(reason="See CMTask #952.") - def test_with_caching1(self) -> None: - """ - - Test using the function-specific cache - - Disable function-specific cache and switching to global cache - - Test using the global cache - """ - _LOG.debug("\n%s", hprint.frame("Starting")) - _LOG.debug( - "\n%s", - hcache.get_global_cache_info(tag=self.cache_tag, add_banner=True), - ) - f, cf = self._get_f_cf_functions( - use_mem_cache=False, - disk_cache_path=self.disk_cache_dir, - aws_profile=self.aws_profile, - ) - _LOG.debug("\n%s", cf.get_function_cache_info(add_banner=True)) - cf.clear_function_cache(destroy=False) - # 1) Execute and verify that it is executed, since the value is not cached. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 2) Execute and verify that it is not executed, since it's cached on disk. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 3) Clear the global cache. - _LOG.debug("\n%s", hprint.frame("Clear global cache")) - hcache.clear_global_cache("all") - # 4) Verify that it is *NOT* executed, since the S3 cache is used. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - # 5) Clear the function cache. - _LOG.debug("\n%s", hprint.frame("Clear function cache")) - cf.clear_function_cache() - # 6) Clear the function cache. - _LOG.debug("\n%s", hprint.frame("Execute the 4th time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # 7) Verify that it is executed. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="disk") - - -# ############################################################################# - - -# ############################################################################# -# TestCacheEnableReadOnly1 -# ############################################################################# - - -class TestCacheEnableReadOnly1(_ResetGlobalCacheHelper): - def _helper(self, cache_from: str, **kwargs: Any) -> None: - """ - Test that when enabling read-only mode we get an assertion only if the - function invocation was not cached. - """ - # Both memory and disk cache enabled, although we use only memory. - f, cf = self._get_f_cf_functions(**kwargs) - # Execute and verify that it is executed, since it was not cached. - _LOG_frame("Execute the 1st time") - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state="no_cache") - # Execute and verify that it is not executed, since it's cached in memory. - _LOG_frame("Execute the 2nd time") - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) - _LOG_frame("Execute the 3rd time") - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) - # - # Enable the read-only mode. - # - _LOG_frame("Enable read-only mode") - cf.enable_read_only(True) - # This is cached so it doesn't raise. - self._execute_and_check_state(f, cf, 3, 4, exp_cf_state=cache_from) - # This is not cached so it should raise. - with self.assertRaises(hcache.NotCachedValueException) as cm: - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - actual = str(cm.exception) - self.check_string(actual) - # - # Disable the read-only mode. - # - _LOG_frame("Disable read-only mode") - cf.enable_read_only(False) - # Now this doesn't assert even if it's not in the cache. - self._execute_and_check_state(f, cf, 4, 4, exp_cf_state="no_cache") - - def test_mem_cache1(self) -> None: - self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=False) - - def test_disk_cache1(self) -> None: - self._helper(cache_from="disk", use_mem_cache=False, use_disk_cache=True) - - def test_mem_disk_cache1(self) -> None: - self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=True) - - -# ############################################################################# - - -# ############################################################################# -# TestCacheUpdateFunction1 -# ############################################################################# - - -class TestCacheUpdateFunction1(_ResetGlobalCacheHelper): - def test1(self) -> None: - # Define the function imitating working in a notebook. - _LOG.debug("\n%s", hprint.frame("Define function")) - - def add(x: int, y: int) -> int: - add.executed = True # type: ignore[attr-defined] - return x + y - - disk_cache_dir = self.get_scratch_space() - _LOG.debug("disk_cache_dir=%s", disk_cache_dir) - cached_add = hcache._Cached( - add, - use_mem_cache=False, - use_disk_cache=True, - disk_cache_path=disk_cache_dir, - ) - # 1) Execute the first time. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state( - add, cached_add, 1, 2, exp_cf_state="no_cache" - ) - # 2) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - func_path = cached_add._get_function_specific_code_path() - code_before = hio.from_file(func_path) - _LOG.debug("code_before=\n%s", code_before) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="disk") - # 3) Redefine the function with different code while running. - _LOG.debug("\n%s", hprint.frame("Update function")) - - # This function is redefined on purpose to test the code. - def add(x: int, y: int) -> int: # type: ignore[no-redef] - add.executed = True # type: ignore[attr-defined] - return x * y - - cached_add._func = add - cached_add._disk_cached_func.func = add - cached_add.update_func_code_without_invalidating_cache() - # - code_after = hio.from_file(func_path) - _LOG.debug("code_after=\n%s", code_after) - self.assertNotEqual(code_before, code_after) - # 4) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(add, cached_add, 1, 2, exp_cf_state="disk") - - -# ############################################################################# - - -# ############################################################################# -# TestCacheEnableCheckOnlyIfPresent1 -# ############################################################################# - - -class TestCacheEnableCheckOnlyIfPresent1(_ResetGlobalCacheHelper): - def _helper(self, cache_from: str, **kwargs: Any) -> None: - # Both memory and disk cache enabled. - f, cf = self._get_f_cf_functions(**kwargs) - # 1) Execute the first time. - _LOG.debug("\n%s", hprint.frame("Execute the 1st time")) - self._execute_and_check_state(f, cf, 1, 2, exp_cf_state="no_cache") - # 2) Execute the second time. Must use memory cache. - _LOG.debug("\n%s", hprint.frame("Execute the 2nd time")) - self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) - # 3) Enable the `check_only_if_present` mode. - _LOG.debug("\n%s", hprint.frame("Enable check_only_if_present")) - cf.enable_check_only_if_present(True) - # Since the value was cached, we should get an assertion. - with self.assertRaises(hcache.CachedValueException) as cm: - self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) - actual = str(cm.exception) - self.check_string(actual) - # 4) Try with a new value. - _LOG.debug("\n%s", hprint.frame("Execute the 3rd time")) - self._execute_and_check_state(f, cf, 2, 2, exp_cf_state="no_cache") - # 5) Disable the `check_only_if_present` mode. - _LOG.debug("\n%s", hprint.frame("Disable check_only_if_present")) - cf.enable_check_only_if_present(False) - # 6) Execute a value: we should get a cache hit. - _LOG.debug("\n%s", hprint.frame("Execute the 4rd time")) - self._execute_and_check_state(f, cf, 1, 2, exp_cf_state=cache_from) - # 7) Execute a value: we should get a cache hit. - _LOG.debug("\n%s", hprint.frame("Execute the 5th time")) - self._execute_and_check_state(f, cf, 2, 2, exp_cf_state=cache_from) - - # TODO(gp): Add a test for verbose mode in __call__ - # TODO(gp): get_function_cache_info - def test_mem_cache1(self) -> None: - self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=False) - - def test_disk_cache1(self) -> None: - self._helper(cache_from="disk", use_mem_cache=False, use_disk_cache=True) - - def test_mem_disk_cache1(self) -> None: - self._helper(cache_from="mem", use_mem_cache=True, use_disk_cache=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py deleted file mode 100644 index 3555e378f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcache_simple.py +++ /dev/null @@ -1,2606 +0,0 @@ -import copy -import logging -import os -from typing import Any, Dict - -import pandas as pd -import pytest - -import helpers.hcache_simple as hcacsimp -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_json_double(x: int) -> int: - """ - Return double the input and cache it using JSON. - - :param x: input integer to be doubled - :return: doubled value (x * 2) - """ - res = x * 2 - return res - - -@hcacsimp.simple_cache(cache_type="pickle") -def _cached_pickle_square(x: int) -> int: - """ - Return the square of the input and cache it using pickle. - - :param x: input integer to be squared - :return: squared value (x**2) - """ - res = x**2 - return res - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_multi_arg_sum(a: int, b: int) -> int: - """ - Return the sum of two numbers. - - :param a: first number - :param b: second number - :return: sum of a and b. - """ - res = a + b - return res - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_refreshable_func(x: int) -> int: - """ - Return x multiplied by 10 and update the call count. - - :param x: The input integer - :return: The result of multiplying x by 10 - """ - _cached_refreshable_func.call_count += 1 - res = x * 10 - return res - - -# Initialize the call counter for the refreshable function. -_cached_refreshable_func.call_count = 0 - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_kwarg_diff(a: int, b: int = 0) -> int: - """ - Return the difference between a and b. - - :param a: The minuend - :param b: The subtrahend (defaults to 0) - :return: The difference (a - b) - """ - res = a - b - return res - - -@hcacsimp.simple_cache(cache_type="json") -def _cached_add_100(x: int) -> int: - """ - Return x plus 100. Used primarily for testing cache statistics. - - :param x: The input integer - :return: value (x + 100) - """ - res = x + 100 - return res - - -# ############################################################################# -# _BaseCacheTest -# ############################################################################# - - -class _BaseCacheTest(hunitest.TestCase): - """ - Base test class to provide common setup and teardown functionality. - - Instead of using setUp/tearDown, we use set_up_test/tear_down_test - along with a pytest fixture that ensures these methods run before - and after each test. - """ - - @pytest.fixture(autouse=True) - def setup_teardown_test(self, monkeypatch): - # Store monkeypatch for use in tests. - self.monkeypatch = monkeypatch - # Run common setup before each test. - self.set_up_test() - yield - # Run common teardown after each test. - self.tear_down_test() - - def set_up_test(self) -> None: - """ - Setup operations to run before each test: - - - Isolate all global variables to prevent race conditions. - - Set cache directory to test scratch space. - """ - _LOG.debug("set_up_test") - super().setUp() - # Isolate configuration globals. - scratch_space = self.get_scratch_space() - self.monkeypatch.setattr(hcacsimp, "_CACHE_DIR", scratch_space) - self.monkeypatch.setattr( - hcacsimp, "_CACHE_FILE_PREFIX", hcacsimp._CACHE_FILE_PREFIX - ) - self.monkeypatch.setattr(hcacsimp, "_S3_BUCKET", hcacsimp._S3_BUCKET) - self.monkeypatch.setattr(hcacsimp, "_S3_PREFIX", hcacsimp._S3_PREFIX) - self.monkeypatch.setattr(hcacsimp, "_AWS_PROFILE", hcacsimp._AWS_PROFILE) - # Isolate data structure globals. - self.monkeypatch.setattr(hcacsimp, "_CACHE", {}) - # Use deepcopy for _CACHE_PROPERTY to preserve decorator-set properties. - self.monkeypatch.setattr( - hcacsimp, "_CACHE_PROPERTY", copy.deepcopy(hcacsimp._CACHE_PROPERTY) - ) - self.monkeypatch.setattr(hcacsimp, "_CACHE_PERF", {}) - self.monkeypatch.setattr(hcacsimp, "_S3_AUTO_PULL_ATTEMPTED", set()) - - def tear_down_test(self) -> None: - """ - Teardown operations to run after each test. - - All global variables are isolated via monkeypatch, so they are - automatically restored after each test. - """ - _LOG.debug("tear_down_test") - - - -# ############################################################################# -# Test_get_cache -# ############################################################################# - - -class Test_get_cache(_BaseCacheTest): - """ - Test get_cache functionality for retrieving cached values. - """ - - def test1(self) -> None: - """ - Verify that get_cache returns a cache with the expected key and value. - """ - # Populate the cache by calling _cached_json_double. - _cached_json_double(2) - # Retrieve the in-memory cache for _cached_json_double. - cache: Dict[str, Any] = hcacsimp.get_cache("_cached_json_double") - # Assert that the key '{"args": [2], "kwargs": {}}' is in the cache and - # its value is 4. - self.assertIn('{"args": [2], "kwargs": {}}', cache) - self.assertEqual(cache['{"args": [2], "kwargs": {}}'], 4) - - -# ############################################################################# -# Test_flush_cache_to_disk -# ############################################################################# - - -class Test_flush_cache_to_disk(_BaseCacheTest): - """ - Test flush_cache_to_disk functionality for persisting cache to disk. - """ - - def test1(self) -> None: - """ - Verify that flushing creates a cache file on disk. - """ - # Call _cached_json_double to populate the cache. - _cached_json_double(3) - # Flush the cache to disk. - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Define expected cache file name. - cache_file = hcacsimp._get_cache_file_name("_cached_json_double") - # Assert that the cache file now exists on disk. - self.assertTrue( - os.path.exists(cache_file), - f"Cache file {cache_file} should exist on disk.", - ) - - def test2(self) -> None: - """ - Verify that the disk cache file contains the expected key and value. - """ - # Populate cache and flush to disk. - _cached_json_double(3) - # Flush the cache to disk. - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Define the expected cache file name. - cache_file = hcacsimp._get_cache_file_name("_cached_json_double") - # # Open and load the disk cache file. - disk_cache = hcacsimp._load_func_cache_data_from_file(cache_file, "json") - # Assert that the disk cache contains the key '{"args": [3], "kwargs": - # {}}' with the correct value. - self.assertIn('{"args": [3], "kwargs": {}}', disk_cache) - # Assert that the value for key '{"args": [3], "kwargs": {}}' is 6. - self.assertEqual(disk_cache['{"args": [3], "kwargs": {}}'], 6) - - -# ############################################################################# -# Test_reset_mem_cache -# ############################################################################# - - -class Test_reset_mem_cache(_BaseCacheTest): - """ - Test reset_mem_cache functionality for clearing in-memory cache. - """ - - def test1(self) -> None: - """ - Verify that the cache is empty after `reset_mem_cache` is called. - """ - # Populate the in-memory cache. - _cached_json_double(5) - # Reset the in-memory cache. - hcacsimp.reset_mem_cache("_cached_json_double") - # Retrieve the memory cache after reset. - cache_after: Dict[str, Any] = hcacsimp.get_mem_cache( - "_cached_json_double" - ) - # Verify that the key '{"args": [5], "kwargs": {}}' is no longer in the cache. - self.assertNotIn('{"args": [5], "kwargs": {}}', cache_after) - - -# ############################################################################# -# Test_force_cache_from_disk -# ############################################################################# - - -class Test_force_cache_from_disk(_BaseCacheTest): - """ - Test force_cache_from_disk functionality for loading cache from disk. - """ - - def test1(self) -> None: - """ - Verify that the memory cache is empty after a reset. - """ - # Populate cache and flush to disk. - _cached_json_double(7) - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Reset in-memory cache. - hcacsimp.reset_mem_cache("_cached_json_double") - mem_cache: Dict[str, Any] = hcacsimp.get_mem_cache("_cached_json_double") - # Ensure that the in-memory cache is empty. - self.assertNotIn( - '{"args": [7], "kwargs": {}}', - mem_cache, - "Memory cache should be empty after reset.", - ) - - def test2(self) -> None: - """ - Populate disk cache, reset memory, force reload, and verify that the - key appears. - """ - # Populate cache, flush to disk, and then reset in-memory cache. - _cached_json_double(7) - hcacsimp.flush_cache_to_disk("_cached_json_double") - hcacsimp.reset_mem_cache("_cached_json_double") - _LOG.debug("Force reload disk cache for '_cached_json_double'") - # Force reload cache from disk. - hcacsimp.force_cache_from_disk("_cached_json_double") - full_cache: Dict[str, Any] = hcacsimp.get_cache("_cached_json_double") - # Assert that the key is restored in the in-memory cache. - self.assertIn( - '{"args": [7], "kwargs": {}}', - full_cache, - "After forcing, disk key should appear in memory.", - ) - - -# ############################################################################# -# Test_get_cache_perf -# ############################################################################# - - -class Test_get_cache_perf(_BaseCacheTest): - """ - Test cache performance tracking functionality. - """ - - def test1(self) -> None: - """ - Verify that performance tracking records hits and misses correctly. - """ - # Enable performance tracking. - hcacsimp.enable_cache_perf("_cached_json_double") - _LOG.debug("Call _cached_json_double(8) twice") - # First call should be a miss. - _LOG.debug("# First call should be a miss") - _cached_json_double(8) - # Second call should be a hit. - _LOG.debug("# Second call should be a hit") - _cached_json_double(8) - # Retrieve performance statistics. - stats: str = hcacsimp.get_cache_perf_stats("_cached_json_double") - # Verify that one hit and one miss are recorded. - self.assertIn("hits=1", stats) - self.assertIn("misses=1", stats) - - def test2(self) -> None: - """ - Verify that disabling performance tracking returns None. - """ - # Disable performance tracking. - hcacsimp.disable_cache_perf("_cached_json_double") - # Assert that performance data is no longer available. - self.assertIsNone(hcacsimp.get_cache_perf("_cached_json_double")) - - -# ############################################################################# -# Test_set_cache_property -# ############################################################################# - - -class Test_set_cache_property(_BaseCacheTest): - """ - Test set_cache_property and get_cache_property functionality. - """ - - def test1(self) -> None: - """ - Verify that setting a valid cache property works and can be retrieved. - """ - # Set a valid cache property. - hcacsimp.set_cache_property( - "_cached_json_double", "report_on_cache_miss", True - ) - # Retrieve and verify the property. - val: bool = hcacsimp.get_cache_property( - "_cached_json_double", "report_on_cache_miss" - ) - self.assertTrue(val) - - def test2(self) -> None: - """ - Verify that resetting cache properties clears previously set - properties. - """ - # Set and verify the cache property. - hcacsimp.set_cache_property( - "_cached_json_double", "report_on_cache_miss", True - ) - self.assertTrue( - hcacsimp.get_cache_property( - "_cached_json_double", "report_on_cache_miss" - ) - ) - # Reset all cache properties. - hcacsimp.reset_cache_property() - # Verify that the property is no longer True. - self.assertFalse( - hcacsimp.get_cache_property( - "_cached_json_double", "report_on_cache_miss" - ) - ) - - def test3(self) -> None: - """ - Verify that setting an invalid cache property raises an error. - """ - # Verify that setting an invalid property raises an error. - with self.assertRaises(AssertionError): - hcacsimp.set_cache_property( - "_cached_json_double", "invalid_prop", True - ) - - def test4(self) -> None: - """ - Verify return of a string containing the property value. - """ - # Set force_refresh property and verify that it appears in the properties string. - hcacsimp.set_cache_property("_cached_json_double", "force_refresh", True) - prop_str: str = hcacsimp.cache_property_to_str("_cached_json_double") - # Check output. - self.assertIn("force_refresh: True", prop_str) - - -# ############################################################################# -# Test_get_cached_func_names -# ############################################################################# - - -class Test_get_cached_func_names(_BaseCacheTest): - """ - Test get_cached_func_names functionality for retrieving cached function - names. - """ - - def test1(self) -> None: - """ - Verify that memory cache function names include `_cached_json_double`. - """ - # Populate in-memory cache. - _cached_json_double(9) - # Retrieve function names from the memory cache. - mem_funcs = hcacsimp.get_cached_func_names("mem") - # Check output. - self.assertIn("_cached_json_double", mem_funcs) - - def test2(self) -> None: - """ - Verify that all cache function names include both JSON and pickle - functions. - """ - # Populate and flush caches for JSON and pickle functions. - _cached_json_double(2) - # Flush _cached_json_double cache to disk. - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Call _cached_pickle_square with input 2. - _cached_pickle_square(2) - # Flush _cached_pickle_square cache to disk. - hcacsimp.flush_cache_to_disk("_cached_pickle_square") - # Retrieve all local cached function names (both memory and disk). - all_funcs = hcacsimp.get_cached_func_names("local") - # Check output. - self.assertIn("_cached_json_double", all_funcs) - self.assertIn("_cached_pickle_square", all_funcs) - - def test3(self) -> None: - """ - Verify that disk cache function names include `_cached_json_double` - after flushing. - """ - # Flush JSON cache to disk and verify disk cache function names. - _cached_json_double(2) - # Flush _cached_json_double cache to disk. - hcacsimp.flush_cache_to_disk("_cached_json_double") - # Retrieve function names from the disk cache. - disk_funcs = hcacsimp.get_cached_func_names("disk") - # Check output. - self.assertIn("_cached_json_double", disk_funcs) - - def test4(self) -> None: - """ - Verify that disk-cached function names include functions with custom - cache_dir and cache_prefix. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - custom_cache_dir = os.path.join(scratch_dir, "custom_cache") - - # Create a cached function with custom cache location. - @hcacsimp.simple_cache( - cache_type="json", - cache_dir=custom_cache_dir, - cache_prefix="custom_prefix", - ) - def _custom_location_func(x: int) -> int: - return x * 3 - - # Run. - _custom_location_func(5) - hcacsimp.flush_cache_to_disk("_custom_location_func") - disk_funcs = hcacsimp.get_cached_func_names("disk") - # Check. - self.assertIn("_custom_location_func", disk_funcs) - - -# ############################################################################# -# Test_cache_stats_to_str -# ############################################################################# - - -class Test_cache_stats_to_str(_BaseCacheTest): - """ - Test cache_stats_to_str functionality for generating cache statistics. - """ - - def test1(self) -> None: - """ - Verify that cache_stats_to_str returns a DataFrame with 'memory' and - 'disk' columns. - """ - # Populate cache. - _cached_add_100(1) - stats_df: pd.DataFrame = hcacsimp.cache_stats_to_str("_cached_add_100") - # Assert that the returned object is a DataFrame. - self.assertIsInstance(stats_df, pd.DataFrame) - # Verify that it contains the 'memory' and 'disk' columns. - self.assertIn("memory", stats_df.columns) - self.assertIn("disk", stats_df.columns) - - -# ############################################################################# -# Test__cached_kwarg_diff -# ############################################################################# - - -class Test__cached_kwarg_diff(_BaseCacheTest): - """ - Test caching behavior with keyword arguments. - """ - - def test1(self) -> None: - """ - Test that verifies keyword arguments are handled correctly by the - cache. - """ - # Call with different keyword argument values. - res1: int = _cached_kwarg_diff(5, b=3) - res2: int = _cached_kwarg_diff(5, b=10) - # Both calls should return the different result as both args, kwargs are used for caching. - self.assertNotEqual(res1, res2) - - -# ############################################################################# -# Test__cached_multi_arg_sum -# ############################################################################# - - -class Test__cached_multi_arg_sum(_BaseCacheTest): - """ - Test caching behavior with multiple positional arguments. - """ - - def test1(self) -> None: - """ - Verify that the cache for _cached_multi_arg_sum contains the correct - key. - """ - # Populate the cache. - _cached_multi_arg_sum(1, 2) - cache: Dict[str, Any] = hcacsimp.get_cache("_cached_multi_arg_sum") - _LOG.debug("cache=%s", cache) - # Verify that the cache key is formatted as '{"args": [1, 2], "kwargs": {}}'. - self.assertIn('{"args": [1, 2], "kwargs": {}}', cache) - - -# ############################################################################# -# Test__cached_pickle_square -# ############################################################################# - - -class Test__cached_pickle_square(_BaseCacheTest): - """ - Test caching with pickle serialization. - """ - - def test1(self) -> None: - """ - Ensure that _cached_pickle_square returns the correct value and disk - file. - """ - # Call the function to square the input. - res: int = _cached_pickle_square(4) - # Flush the cache to disk. - hcacsimp.flush_cache_to_disk("_cached_pickle_square") - cache_file = hcacsimp._get_cache_file_name("_cached_pickle_square") - # Open and load the pickle cache file. - func_cache_data = hcacsimp._load_func_cache_data_from_file( - cache_file, "pickle" - ) - _LOG.debug("func_cache_data=%s", func_cache_data) - # Verify the result and cache contents. - self.assertEqual(res, 16) - self.assertIn('{"args": [4], "kwargs": {}}', func_cache_data) - self.assertEqual(func_cache_data['{"args": [4], "kwargs": {}}'], 16) - - -# ############################################################################# -# Test__cached_refreshable_func -# ############################################################################# - - -class Test__cached_refreshable_func(_BaseCacheTest): - """ - Test force_refresh cache property functionality. - """ - - def test1(self) -> None: - """ - Verify that `_cached_refreshable_func` is called only once initially. - """ - # Reset call counter. - _cached_refreshable_func.call_count = 0 - # Call the function twice with the same input. - _cached_refreshable_func(3) - _cached_refreshable_func(3) - # Verify that the function was only called once (cache hit on the second - # call). - self.assertEqual( - _cached_refreshable_func.call_count, - 1, - "Function should be called only once initially.", - ) - - def test2(self) -> None: - """ - Verify that enabling `force_refresh` causes `_cached_refreshable_func` - to be re-called. - """ - # Call the function normally. - res: int = _cached_refreshable_func(3) - # Enable force_refresh so that the function will be re-called. - hcacsimp.set_cache_property( - "_cached_refreshable_func", "force_refresh", True - ) - # Verify that the function returns the correct value (3 * 10 = 30). - self.assertEqual(res, 30) - # Verify that the function's call count has incremented, indicating it - # was re-called. - self.assertEqual( - _cached_refreshable_func.call_count, - 2, - "Function should be re-called when force_refresh is enabled.", - ) - - -# ############################################################################# -# Test_reset_cache_perf -# ############################################################################# - - -class Test_reset_cache_perf(_BaseCacheTest): - """ - Test reset_cache_perf functionality for resetting performance statistics. - """ - - def test1(self) -> None: - """ - Verify that reset_cache_perf resets stats for a single function. - """ - # Prepare inputs. - hcacsimp.enable_cache_perf("_cached_json_double") - _cached_json_double(5) - _cached_json_double(5) - # Run test. - hcacsimp.reset_cache_perf("_cached_json_double") - # Check outputs. - perf = hcacsimp.get_cache_perf("_cached_json_double") - self.assertEqual(perf["tot"], 0) - self.assertEqual(perf["hits"], 0) - self.assertEqual(perf["misses"], 0) - - def test2(self) -> None: - """ - Verify that reset_cache_perf with empty func_name resets all functions. - """ - # Prepare inputs. - hcacsimp.enable_cache_perf("_cached_json_double") - hcacsimp.enable_cache_perf("_cached_multi_arg_sum") - _cached_json_double(1) - _cached_multi_arg_sum(1, 2) - # Run test. - hcacsimp.reset_cache_perf("") - # Check outputs. - perf1 = hcacsimp.get_cache_perf("_cached_json_double") - perf2 = hcacsimp.get_cache_perf("_cached_multi_arg_sum") - self.assertEqual(perf1["tot"], 0) - self.assertEqual(perf2["tot"], 0) - - -# ############################################################################# -# Test_disable_cache_perf -# ############################################################################# - - -class Test_disable_cache_perf(_BaseCacheTest): - """ - Test disable_cache_perf functionality for disabling performance tracking. - """ - - def test1(self) -> None: - """ - Verify that disable_cache_perf with empty func_name disables all - functions. - """ - # Prepare inputs. - hcacsimp.enable_cache_perf("_cached_json_double") - hcacsimp.enable_cache_perf("_cached_multi_arg_sum") - _cached_json_double(1) - _cached_multi_arg_sum(1, 2) - # Run test. - hcacsimp.disable_cache_perf("") - # Check outputs. - perf1 = hcacsimp.get_cache_perf("_cached_json_double") - perf2 = hcacsimp.get_cache_perf("_cached_multi_arg_sum") - # After disabling, perf should be None. - self.assertIsNone(perf1) - self.assertIsNone(perf2) - - -# ############################################################################# -# Test_get_cache_perf_stats -# ############################################################################# - - -class Test_get_cache_perf_stats(_BaseCacheTest): - """ - Test get_cache_perf_stats for retrieving performance statistics. - """ - - def test1(self) -> None: - """ - Verify that get_cache_perf_stats returns empty string when no stats - exist. - """ - # Prepare inputs. - # Ensure no perf stats exist for a non-tracked function. - hcacsimp.disable_cache_perf("_cached_json_double") - # Run test. - stats = hcacsimp.get_cache_perf_stats("_cached_json_double") - # Check outputs. - self.assertEqual(stats, "") - - -# ############################################################################# -# Test_cache_property_to_str -# ############################################################################# - - -class Test_cache_property_to_str(_BaseCacheTest): - """ - Test cache_property_to_str for converting properties to string. - """ - - def test1(self) -> None: - """ - Verify that cache_property_to_str with empty func_name returns all - functions. - """ - # Prepare inputs. - # Call functions to ensure they are cached. - _cached_json_double(1) - _cached_multi_arg_sum(1, 2) - hcacsimp.set_cache_property("_cached_json_double", "force_refresh", True) - hcacsimp.set_cache_property( - "_cached_multi_arg_sum", "write_through", True - ) - # Run test. - result = hcacsimp.cache_property_to_str("") - # Check outputs. - self.assertIn("_cached_json_double", result) - self.assertIn("_cached_multi_arg_sum", result) - self.assertIn("force_refresh: True", result) - self.assertIn("write_through: True", result) - - - -# ############################################################################# -# Test_reset_mem_cache_all -# ############################################################################# - - -class Test_reset_mem_cache_all(_BaseCacheTest): - """ - Test reset_mem_cache with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that reset_mem_cache with empty func_name resets all caches. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - # Run test. - hcacsimp.reset_mem_cache("") - # Check outputs. - cache1 = hcacsimp.get_mem_cache("_cached_json_double") - cache2 = hcacsimp.get_mem_cache("_cached_multi_arg_sum") - self.assertEqual(len(cache1), 0) - self.assertEqual(len(cache2), 0) - - -# ############################################################################# -# Test_reset_disk_cache_all -# ############################################################################# - - -class Test_reset_disk_cache_all(_BaseCacheTest): - """ - Test reset_disk_cache with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that reset_disk_cache with empty func_name removes all cache - files. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - hcacsimp.flush_cache_to_disk("_cached_json_double") - hcacsimp.flush_cache_to_disk("_cached_multi_arg_sum") - # Run test. - hcacsimp.reset_disk_cache("", interactive=False) - # Check outputs. - cache_file1 = hcacsimp._get_cache_file_name("_cached_json_double") - self.assertFalse(os.path.exists(cache_file1)) - cache_file2 = hcacsimp._get_cache_file_name("_cached_multi_arg_sum") - self.assertFalse(os.path.exists(cache_file2)) - - -# ############################################################################# -# Test_force_cache_from_disk_all -# ############################################################################# - - -class Test_force_cache_from_disk_all(_BaseCacheTest): - """ - Test force_cache_from_disk with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that force_cache_from_disk with empty func_name loads all - caches. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - hcacsimp.flush_cache_to_disk("_cached_json_double") - hcacsimp.flush_cache_to_disk("_cached_multi_arg_sum") - hcacsimp.reset_mem_cache("") - # Run test. - hcacsimp.force_cache_from_disk("") - # Check outputs. - cache1 = hcacsimp.get_mem_cache("_cached_json_double") - cache2 = hcacsimp.get_mem_cache("_cached_multi_arg_sum") - self.assertGreater(len(cache1), 0) - self.assertGreater(len(cache2), 0) - - -# ############################################################################# -# Test_flush_cache_to_disk_all -# ############################################################################# - - -class Test_flush_cache_to_disk_all(_BaseCacheTest): - """ - Test flush_cache_to_disk with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that flush_cache_to_disk with empty func_name flushes all - caches. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - # Run test. - hcacsimp.flush_cache_to_disk("") - # Check outputs. - cache_file1 = hcacsimp._get_cache_file_name("_cached_json_double") - self.assertTrue(os.path.exists(cache_file1)) - # - cache_file2 = hcacsimp._get_cache_file_name("_cached_multi_arg_sum") - self.assertTrue(os.path.exists(cache_file2)) - - -# ############################################################################# -# Test_cache_stats_to_str_all -# ############################################################################# - - -class Test_cache_stats_to_str_all(_BaseCacheTest): - """ - Test cache_stats_to_str with empty func_name parameter. - """ - - def test1(self) -> None: - """ - Verify that cache_stats_to_str with empty func_name returns stats for - all functions. - """ - # Prepare inputs. - _cached_json_double(1) - _cached_multi_arg_sum(2, 3) - # Run test. - result = hcacsimp.cache_stats_to_str("") - # Check outputs. - self.assertIsNotNone(result) - self.assertIn("_cached_json_double", result.index) - self.assertIn("_cached_multi_arg_sum", result.index) - - -# ############################################################################# -# Test_get_cached_func_names_invalid -# ############################################################################# - - -class Test_get_cached_func_names_invalid(_BaseCacheTest): - """ - Test get_cached_func_names with invalid type parameter. - """ - - def test1(self) -> None: - """ - Verify that get_cached_func_names raises ValueError for invalid type. - """ - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hcacsimp.get_cached_func_names("invalid_type") - self.assertIn("Invalid type", str(cm.exception)) - - -# ############################################################################# -# Test__get_cache_file_name -# ############################################################################# - - -class Test__get_cache_file_name(_BaseCacheTest): - """ - Test _get_cache_file_name for various configurations. - """ - - def test1(self) -> None: - """ - Verify that _get_cache_file_name raises ValueError for invalid cache - type. - """ - # Prepare inputs. - hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hcacsimp._get_cache_file_name("_cached_json_double") - self.assertIn("Invalid cache type", str(cm.exception)) - - def test2(self) -> None: - """ - Test global cache_dir + global cache_prefix (default fallback). - - Verifies that when no per-function properties are set, the - function falls back to global cache_dir and cache_prefix. - """ - # Prepare inputs. - func_name = "_cached_json_double" - # Run. - actual = hcacsimp._get_cache_file_name(func_name) - # Check. - global_cache_dir = hcacsimp.get_cache_dir() - global_cache_prefix = hcacsimp.get_cache_file_prefix() - expected = os.path.join( - global_cache_dir, f"{global_cache_prefix}.{func_name}.json" - ) - self.assertEqual(actual, expected) - - def test3(self) -> None: - """ - Test per-function cache_dir + global cache_prefix. - - Verifies that per-function cache_dir is used while falling back - to global cache_prefix. - """ - # Prepare inputs. - custom_dir = "/tmp/custom_test_dir" - func_name = "_cached_json_double" - hcacsimp.set_cache_property(func_name, "cache_dir", custom_dir) - # Run. - actual = hcacsimp._get_cache_file_name(func_name) - # Check. - global_cache_prefix = hcacsimp.get_cache_file_prefix() - expected = os.path.join( - custom_dir, f"{global_cache_prefix}.{func_name}.json" - ) - self.assertEqual(actual, expected) - - def test4(self) -> None: - """ - Test global cache_dir + per-function cache_prefix. - - Verifies that per-function cache_prefix is used while falling - back to global cache_dir. - """ - # Prepare inputs. - custom_prefix = "custom_prefix" - func_name = "_cached_json_double" - hcacsimp.set_cache_property(func_name, "cache_prefix", custom_prefix) - # Run. - actual = hcacsimp._get_cache_file_name(func_name) - # Check. - global_cache_dir = hcacsimp.get_cache_dir() - expected = os.path.join( - global_cache_dir, f"{custom_prefix}.{func_name}.json" - ) - self.assertEqual(actual, expected) - - def test5(self) -> None: - """ - Test per-function cache_dir + per-function cache_prefix. - - Verifies that both per-function cache_dir and cache_prefix are - used when both are set (no fallback to global values). - """ - # Prepare inputs. - custom_dir = "/tmp/custom_test_dir_both" - custom_prefix = "custom_prefix_both" - func_name = "_cached_json_double" - hcacsimp.set_cache_property(func_name, "cache_dir", custom_dir) - hcacsimp.set_cache_property(func_name, "cache_prefix", custom_prefix) - # Run. - actual = hcacsimp._get_cache_file_name(func_name) - # Check. - expected = os.path.join(custom_dir, f"{custom_prefix}.{func_name}.json") - self.assertEqual(actual, expected) - - def test6(self) -> None: - """ - Test file path format for pickle cache type. - - Verifies that _get_cache_file_name returns correct file - extension for pickle (.pkl) cache type. - """ - # Prepare inputs. - func_name = "_cached_pickle_square" - # Run. - actual = hcacsimp._get_cache_file_name(func_name) - # Check. - self.assertTrue(actual.endswith(".pkl")) - self.assertIn(func_name, actual) - - def test7(self) -> None: - """ - Test file path format for json cache type. - - Verifies that _get_cache_file_name returns correct file - extensions for json (.json) cache type. - """ - # Prepare inputs. - func_name = "_cached_json_double" - # Run. - actual = hcacsimp._get_cache_file_name(func_name) - # Check. - self.assertTrue(actual.endswith(".json")) - self.assertIn(func_name, actual) - - - -# ############################################################################# -# Test__save_cache_dict_to_disk -# ############################################################################# - - -class Test__save_cache_dict_to_disk(_BaseCacheTest): - """ - Test _save_cache_dict_to_disk for invalid cache type. - """ - - def test1(self) -> None: - """ - Verify that _save_cache_dict_to_disk raises ValueError for invalid - cache type. - """ - # Prepare inputs. - hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") - data = {"key": "value"} - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hcacsimp._save_cache_dict_to_disk("_cached_json_double", data) - self.assertIn("Invalid cache type", str(cm.exception)) - - - -# ############################################################################# -# Test_get_disk_cache_invalid -# ############################################################################# - - -class Test_get_disk_cache_invalid(_BaseCacheTest): - """ - Test get_disk_cache for invalid cache type. - """ - - def test1(self) -> None: - """ - Verify that get_disk_cache raises ValueError for invalid cache type. - """ - # Prepare inputs. - hcacsimp.set_cache_property("_cached_json_double", "type", "invalid") - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hcacsimp.get_disk_cache("_cached_json_double") - self.assertIn("Invalid cache type", str(cm.exception)) - -@hcacsimp.simple_cache(cache_type="json") -def _cache_mode_function(x: int) -> int: - """ - Test function to verify cache_mode parameter. - - :param x: input integer - :return: x * 5 - """ - _cache_mode_function.call_count += 1 - res = x * 5 - return res - - -_cache_mode_function.call_count = 0 - - -# ############################################################################# -# Test_cache_mode -# ############################################################################# - - -class Test_cache_mode(_BaseCacheTest): - """ - Test cache_mode parameter functionality. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_cache_mode_function", "type", "json") - _cache_mode_function.call_count = 0 - - def test1(self) -> None: - """ - Verify that setting force_refresh property forces cache refresh. - """ - # Prepare inputs. - _cache_mode_function(10) - initial_count = _cache_mode_function.call_count - # Set force_refresh property. - hcacsimp.set_cache_property("_cache_mode_function", "force_refresh", True) - # Run test. - result = _cache_mode_function(10) - # Check outputs. - self.assertEqual(result, 50) - self.assertEqual(_cache_mode_function.call_count, initial_count + 1) - - def test2(self) -> None: - """ - Verify that setting abort_on_cache_miss property aborts on cache miss. - """ - # Prepare inputs. - hcacsimp.set_cache_property( - "_cache_mode_function", "abort_on_cache_miss", True - ) - # Run test and check output. - with self.assertRaises(ValueError) as cm: - _cache_mode_function(99) - self.assertIn("Cache miss", str(cm.exception)) - - def test3(self) -> None: - """ - Verify that calling with different arguments bypasses cache. - """ - # Prepare inputs. - _cache_mode_function(15) - initial_count = _cache_mode_function.call_count - # Run test. - result1 = _cache_mode_function(16) - result2 = _cache_mode_function(17) - # Check outputs. - self.assertEqual(result1, 80) - self.assertEqual(result2, 85) - self.assertEqual(_cache_mode_function.call_count, initial_count + 2) - - -@hcacsimp.simple_cache(cache_type="json") -def _abort_test_function(x: int) -> int: - """ - Test function to verify abort_on_cache_miss parameter. - - :param x: input integer - :return: x * 7 - """ - res = x * 7 - return res - - -# ############################################################################# -# Test_abort_on_cache_miss -# ############################################################################# - - -class Test_abort_on_cache_miss(_BaseCacheTest): - """ - Test abort_on_cache_miss functionality. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_abort_test_function", "type", "json") - - def test1(self) -> None: - """ - Verify that abort_on_cache_miss=True raises error on cache miss. - """ - # Run test and check output. - with self.assertRaises(ValueError) as cm: - _abort_test_function(100, abort_on_cache_miss=True) - self.assertIn("Cache miss", str(cm.exception)) - - -@hcacsimp.simple_cache(cache_type="json") -def _report_test_function(x: int) -> int: - """ - Test function to verify report_on_cache_miss parameter. - - :param x: input integer - :return: x * 8 - """ - res = x * 8 - return res - - -# ############################################################################# -# Test_report_on_cache_miss -# ############################################################################# - - -class Test_report_on_cache_miss(_BaseCacheTest): - """ - Test report_on_cache_miss functionality. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_report_test_function", "type", "json") - - def test1(self) -> None: - """ - Verify that report_on_cache_miss=True returns '_cache_miss_' on miss. - """ - # Run test. - result = _report_test_function(200, report_on_cache_miss=True) - # Check outputs. - self.assertEqual(result, "_cache_miss_") - - -@hcacsimp.simple_cache(cache_type="json", write_through=True) -def _write_through_function(x: int) -> int: - """ - Test function to verify write_through parameter. - - :param x: input integer - :return: x * 9 - """ - res = x * 9 - return res - - -# ############################################################################# -# Test_write_through -# ############################################################################# - - -class Test_write_through(_BaseCacheTest): - """ - Test write_through functionality for automatic disk caching. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_write_through_function", "type", "json") - - def test1(self) -> None: - """ - Verify that write_through=True automatically writes to disk. - """ - # Run test. - _write_through_function(11) - # Check outputs. - cache_file = hcacsimp._get_cache_file_name("_write_through_function") - self.assertTrue(os.path.exists(cache_file)) - # - disk_cache = hcacsimp._load_func_cache_data_from_file(cache_file, "json") - self.assertIn('{"args": [11], "kwargs": {}}', disk_cache) - self.assertEqual(disk_cache['{"args": [11], "kwargs": {}}'], 99) - - -@hcacsimp.simple_cache(cache_type="json") -def _test_cache_mode_kwarg(x: int, **kwargs) -> int: - """ - Test function that accepts kwargs to test cache_mode parameter. - - :param x: input integer - :param kwargs: additional keyword arguments - :return: x * 3 - """ - _test_cache_mode_kwarg.call_count += 1 - res = x * 3 - return res - - -_test_cache_mode_kwarg.call_count = 0 - - -# ############################################################################# -# Test_cache_mode_parameter -# ############################################################################# - - -class Test_cache_mode_parameter(_BaseCacheTest): - """ - Test cache_mode parameter as a keyword argument. - """ - - def set_up_test(self) -> None: - """ - Setup operations to run before each test. - """ - super().set_up_test() - hcacsimp.set_cache_property("_test_cache_mode_kwarg", "type", "json") - _test_cache_mode_kwarg.call_count = 0 - - def test1(self) -> None: - """ - Verify that cache_mode='REFRESH_CACHE' keyword forces refresh. - """ - # Prepare inputs. - _test_cache_mode_kwarg(20) - initial_count = _test_cache_mode_kwarg.call_count - # Run test. - result = _test_cache_mode_kwarg(20, cache_mode="REFRESH_CACHE") - # Check outputs. - self.assertEqual(result, 60) - self.assertEqual(_test_cache_mode_kwarg.call_count, initial_count + 1) - - def test2(self) -> None: - """ - Verify that cache_mode='HIT_CACHE_OR_ABORT' raises error on miss. - """ - # Run test and check output. - with self.assertRaises(ValueError) as cm: - _test_cache_mode_kwarg(88, cache_mode="HIT_CACHE_OR_ABORT") - self.assertIn("Cache miss", str(cm.exception)) - - def test3(self) -> None: - """ - Verify that cache_mode='DISABLE_CACHE' bypasses cache. - """ - # Prepare inputs. - _test_cache_mode_kwarg(30) - initial_count = _test_cache_mode_kwarg.call_count - # Run test. - result1 = _test_cache_mode_kwarg(30, cache_mode="DISABLE_CACHE") - result2 = _test_cache_mode_kwarg(30, cache_mode="DISABLE_CACHE") - # Check outputs. - self.assertEqual(result1, 90) - self.assertEqual(result2, 90) - self.assertEqual(_test_cache_mode_kwarg.call_count, initial_count + 2) - - -# ############################################################################# -# Module-level helpers for new tests. -# ############################################################################# - - -@hcacsimp.simple_cache(cache_type="json") -def _test_intrinsic_func_intrinsic(x: int) -> int: - """ - Return x times 3. Named with `_intrinsic` suffix to test suffix stripping. - - :param x: input integer - :return: x * 3 - """ - res = x * 3 - return res - - -@hcacsimp.simple_cache(cache_type="json", exclude_keys=["session_id"]) -def _test_exclude_keys_func(x: int, *, session_id: str = "") -> int: - """ - Return x times 2, ignoring session_id for caching purposes. - - :param x: input integer - :param session_id: session identifier (excluded from cache key) - :return: x * 2 - """ - res = x * 2 - return res - - -@hcacsimp.simple_cache(cache_type="json", write_through=False) -def _test_no_write_through(x: int) -> int: - """ - Return x plus 1, with write_through disabled. - - :param x: input integer - :return: x + 1 - """ - res = x + 1 - return res - - -# ############################################################################# -# Test_sanity_check_function_cache -# ############################################################################# - - -class Test_sanity_check_function_cache(_BaseCacheTest): - """ - Test sanity_check_function_cache for validating function cache dicts. - """ - - def test1(self) -> None: - """ - Verify that sanity_check_function_cache passes for valid cache data. - """ - # Prepare inputs. - func_cache_data = {'{"args": [1], "kwargs": {}}': 2} - # Run test. - hcacsimp.sanity_check_function_cache(func_cache_data) - # Check outputs (no exception raised). - - def test2(self) -> None: - """ - Verify that sanity_check_function_cache passes for empty dict when - assert_on_empty=False. - """ - # Prepare inputs. - func_cache_data: dict = {} - # Run test. - hcacsimp.sanity_check_function_cache( - func_cache_data, assert_on_empty=False - ) - # Check outputs (no exception raised). - - -# ############################################################################# -# Test_sanity_check_cache -# ############################################################################# - - -class Test_sanity_check_cache(_BaseCacheTest): - """ - Test sanity_check_cache for validating nested cache dicts. - """ - - def test1(self) -> None: - """ - Verify that sanity_check_cache passes for valid nested cache data. - """ - # Prepare inputs. - cache_data = {"my_func": {'{"args": [1], "kwargs": {}}': 42}} - # Run test. - hcacsimp.sanity_check_cache(cache_data) - # Check outputs (no exception raised). - - def test2(self) -> None: - """ - Verify that sanity_check_cache passes for empty dict when - assert_on_empty=False. - """ - # Prepare inputs. - cache_data: dict = {} - # Run test. - hcacsimp.sanity_check_cache(cache_data, assert_on_empty=False) - # Check outputs (no exception raised). - - -# ############################################################################# -# Test_cache_data_to_str -# ############################################################################# - - -class Test_cache_data_to_str(_BaseCacheTest): - """ - Test cache_data_to_str for converting cache data to a string. - """ - - def test1(self) -> None: - """ - Verify that cache_data_to_str returns a string with the function name - and cache key. - """ - # Prepare inputs. - cache_data = {"my_func": {'{"args": [1], "kwargs": {}}': 42}} - # Run test. - result = hcacsimp.cache_data_to_str(cache_data) - # Check outputs. - self.assertIn("my_func", result) - self.assertIn('{"args": [1], "kwargs": {}}', result) - self.assertIn("42", result) - - -# ############################################################################# -# Test_get_cache_property_system -# ############################################################################# - - -class Test_get_cache_property_system(_BaseCacheTest): - """ - Test get_cache_property for system properties on unknown functions. - """ - - def test1(self) -> None: - """ - Verify that get_cache_property returns None for a system property when - the function is not in the cache property dict. - """ - # Run test. - val = hcacsimp.get_cache_property("_nonexistent_func_xyz", "type") - # Check outputs. - self.assertIsNone(val) - - -# ############################################################################# -# Test_set_cache_property_new_func -# ############################################################################# - - -class Test_set_cache_property_new_func(_BaseCacheTest): - """ - Test set_cache_property for a brand new function not yet in cache property. - """ - - def test1(self) -> None: - """ - Verify that set_cache_property creates a new entry for a function that - was not previously registered. - """ - # Run test. - hcacsimp.set_cache_property("_brand_new_func_xyz", "force_refresh", True) - # Check outputs. - val = hcacsimp.get_cache_property("_brand_new_func_xyz", "force_refresh") - self.assertTrue(val) - - -# ############################################################################# -# Test_cache_property_to_str_no_props -# ############################################################################# - - -class Test_cache_property_to_str_no_props(_BaseCacheTest): - """ - Test cache_property_to_str for a function with no properties in the cache. - """ - - def test1(self) -> None: - """ - Verify that cache_property_to_str returns the function name header even - when the function has no registered cache properties. - """ - # Run test with a function name not in _CACHE_PROPERTY. - result = hcacsimp.cache_property_to_str("_nonexistent_func_xyz") - # Check outputs. - self.assertIn("_nonexistent_func_xyz", result) - - -# ############################################################################# -# Test__get_cache_file_name_auto_detect -# ############################################################################# - - -class Test__get_cache_file_name_auto_detect(_BaseCacheTest): - """ - Test _get_cache_file_name when cache type is None (auto-detect from disk). - """ - - def test1(self) -> None: - """ - Verify that _get_cache_file_name infers .pkl extension when a .pkl file - exists on disk. - """ - # Prepare inputs: create a valid .pkl file in the cache dir. - cache_dir = hcacsimp.get_cache_dir() - func_name = "_auto_detect_pkl_func" - pkl_path = os.path.join(cache_dir, f"tmp.cache_simple.{func_name}.pkl") - hcacsimp._save_func_cache_data_to_file(pkl_path, "pickle", {}) - # Run test. - file_name = hcacsimp._get_cache_file_name(func_name) - # Check outputs. - self.assertTrue(file_name.endswith(".pkl")) - - def test2(self) -> None: - """ - Verify that _get_cache_file_name infers .json extension when a .json - file exists on disk. - """ - # Prepare inputs: create a valid .json file in the cache dir. - cache_dir = hcacsimp.get_cache_dir() - func_name = "_auto_detect_json_func" - json_path = os.path.join(cache_dir, f"tmp.cache_simple.{func_name}.json") - hcacsimp._save_func_cache_data_to_file(json_path, "json", {}) - # Run test. - file_name = hcacsimp._get_cache_file_name(func_name) - # Check outputs. - self.assertTrue(file_name.endswith(".json")) - - def test3(self) -> None: - """ - Verify that _get_cache_file_name defaults to .json when no file exists. - """ - # Prepare inputs: use a brand new function name with no disk file. - func_name = "_no_file_func_xyz" - # Run test. - file_name = hcacsimp._get_cache_file_name(func_name) - # Check outputs. - self.assertTrue(file_name.endswith(".json")) - - -# ############################################################################# -# Test__save_func_cache_data_to_file_infer -# ############################################################################# - - -class Test__save_func_cache_data_to_file_infer(_BaseCacheTest): - """ - Test _save_func_cache_data_to_file when cache_type is None (inferred from - file extension). - """ - - def test1(self) -> None: - """ - Verify that _save_func_cache_data_to_file infers pickle format from - .pkl extension when cache_type is None. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - file_name = os.path.join(scratch_dir, "tmp_test_infer.pkl") - data = {'{"args": [1], "kwargs": {}}': 42} - # Run test. - hcacsimp._save_func_cache_data_to_file(file_name, None, data) - # Check outputs. - self.assertTrue(os.path.exists(file_name)) - loaded = hcacsimp._load_func_cache_data_from_file(file_name, "pickle") - self.assertEqual(loaded, data) - - -# ############################################################################# -# Test__load_func_cache_data_from_file_infer -# ############################################################################# - - -class Test__load_func_cache_data_from_file_infer(_BaseCacheTest): - """ - Test _load_func_cache_data_from_file when cache_type is None (inferred from - file extension). - """ - - def test1(self) -> None: - """ - Verify that _load_func_cache_data_from_file infers pickle format from - .pkl extension when cache_type is None. - """ - # Prepare inputs: save a pickle file. - scratch_dir = self.get_scratch_space() - file_name = os.path.join(scratch_dir, "tmp_test_load_infer.pkl") - data = {'{"args": [5], "kwargs": {}}': 25} - hcacsimp._save_func_cache_data_to_file(file_name, "pickle", data) - # Run test with None cache_type (should infer from .pkl). - result = hcacsimp._load_func_cache_data_from_file(file_name, None) - # Check outputs. - self.assertEqual(result, data) - - -# ############################################################################# -# Test_reset_disk_cache_no_file -# ############################################################################# - - -class Test_reset_disk_cache_no_file(_BaseCacheTest): - """ - Test reset_disk_cache when the target function has no disk cache file. - """ - - def test1(self) -> None: - """ - Verify that reset_disk_cache does not raise when the function has no - cache file on disk. - """ - # Prepare inputs: use a function that has never been cached to disk. - func_name = "_cached_json_double" - # Ensure no disk file exists. - hcacsimp.reset_disk_cache(func_name, interactive=False) - cache_file = hcacsimp._get_cache_file_name(func_name) - self.assertFalse(os.path.exists(cache_file)) - # Run test: reset again when no file exists (should not raise). - hcacsimp.reset_disk_cache(func_name, interactive=False) - # Check outputs (no exception raised). - - -# ############################################################################# -# Test_mock_cache -# ############################################################################# - - -class Test_mock_cache(_BaseCacheTest): - """ - Test mock_cache for inserting values directly into the cache. - """ - - def test1(self) -> None: - """ - Verify that mock_cache inserts a value into the function cache that can - be retrieved as a cache hit. - """ - # Prepare inputs. - func_name = "_cached_json_double" - cache_key = '{"args": [99], "kwargs": {}}' - value = 198 - # Run test. - hcacsimp.mock_cache(func_name, cache_key, value) - # Check outputs. - cache = hcacsimp.get_cache(func_name) - self.assertEqual(cache[cache_key], value) - - def test2(self) -> None: - """ - Verify that a mocked cache value causes a cache hit when the decorated - function is called. - """ - # Prepare inputs. - func_name = "_cached_json_double" - cache_key = '{"args": [77], "kwargs": {}}' - value = 154 - # Run test. - hcacsimp.mock_cache(func_name, cache_key, value) - result = _cached_json_double(77, abort_on_cache_miss=True) - # Check outputs. - self.assertEqual(result, value) - - -# ############################################################################# -# Test_mock_cache_from_args_kwargs -# ############################################################################# - - -class Test_mock_cache_from_args_kwargs(_BaseCacheTest): - """ - Test mock_cache_from_args_kwargs for inserting values via args/kwargs. - """ - - def test1(self) -> None: - """ - Verify that mock_cache_from_args_kwargs inserts the correct value into - the cache for the given args and kwargs. - """ - # Prepare inputs. - func_name = "_cached_json_double" - args = (55,) - kwargs: dict = {} - value = 110 - # Run test. - hcacsimp.mock_cache_from_args_kwargs(func_name, args, kwargs, value) - # Check outputs. - expected_key = '{"args": [55], "kwargs": {}}' - cache = hcacsimp.get_cache(func_name) - self.assertEqual(cache[expected_key], value) - - -# ############################################################################# -# Test_mock_cache_from_disk -# ############################################################################# - - -class Test_mock_cache_from_disk(_BaseCacheTest): - """ - Test mock_cache_from_disk for bulk-inserting cache data from a dict. - """ - - def test1(self) -> None: - """ - Verify that mock_cache_from_disk populates the cache from a dict of - pre-computed values. - """ - # Prepare inputs. - func_name = "_cached_json_double" - func_cache_data = { - '{"args": [33], "kwargs": {}}': 66, - '{"args": [44], "kwargs": {}}': 88, - } - # Run test. - hcacsimp.mock_cache_from_disk(func_name, func_cache_data) - # Check outputs. - cache = hcacsimp.get_cache(func_name) - self.assertEqual(cache['{"args": [33], "kwargs": {}}'], 66) - self.assertEqual(cache['{"args": [44], "kwargs": {}}'], 88) - - -# ############################################################################# -# Test_simple_cache_intrinsic -# ############################################################################# - - -class Test_simple_cache_intrinsic(_BaseCacheTest): - """ - Test simple_cache decorator with a function whose name ends in _intrinsic. - """ - - def test1(self) -> None: - """ - Verify that the _intrinsic suffix is stripped and the cache key uses - the base function name. - """ - # Run test. - result = _test_intrinsic_func_intrinsic(5) - # Check outputs. - self.assertEqual(result, 15) - # Cache should be stored under the base name (without _intrinsic). - cache = hcacsimp.get_cache("_test_intrinsic_func") - self.assertIn('{"args": [5], "kwargs": {}}', cache) - - -# ############################################################################# -# Test_simple_cache_existing_type -# ############################################################################# - - -class Test_simple_cache_existing_type(_BaseCacheTest): - """ - Test that simple_cache preserves a pre-existing cache type setting. - """ - - def test1(self) -> None: - """ - Verify that applying simple_cache with cache_type='json' does not - override an existing 'pickle' type already set for the function. - """ - # Prepare inputs: set the type before decoration. - hcacsimp.set_cache_property("_inline_type_func", "type", "pickle") - - def _inline_type_func(x: int) -> int: - return x - - # Apply decorator with a different cache_type. - hcacsimp.simple_cache(cache_type="json")(_inline_type_func) - # Check outputs: type should remain 'pickle'. - val = hcacsimp.get_cache_property("_inline_type_func", "type") - self.assertEqual(val, "pickle") - - -# ############################################################################# -# Test_simple_cache_exclude_keys -# ############################################################################# - - -class Test_simple_cache_exclude_keys(_BaseCacheTest): - """ - Test simple_cache decorator with exclude_keys parameter. - """ - - def test1(self) -> None: - """ - Verify that calls with the same primary arg but different excluded - kwargs produce a single cache entry (the excluded key is ignored). - """ - # Run test: two calls with same x but different session_id. - result1 = _test_exclude_keys_func(5, session_id="abc") - result2 = _test_exclude_keys_func(5, session_id="xyz") - # Check outputs. - self.assertEqual(result1, 10) - self.assertEqual(result2, 10) - # Only one cache entry should exist. - cache = hcacsimp.get_cache("_test_exclude_keys_func") - self.assertEqual(len(cache), 1) - - -# ############################################################################# -# Test_simple_cache_no_write_through -# ############################################################################# - - -class Test_simple_cache_no_write_through(_BaseCacheTest): - """ - Test simple_cache decorator with write_through=False. - """ - - def test1(self) -> None: - """ - Verify that with write_through=False the computed value is not - automatically persisted to disk after a function call. - """ - # Run test. - result = _test_no_write_through(7) - self.assertEqual(result, 8) - # Reset memory cache so that reading goes to disk. - hcacsimp.reset_mem_cache("_test_no_write_through") - # Check outputs: disk cache should not contain the computed value. - disk_cache = hcacsimp.get_disk_cache("_test_no_write_through") - self.assertNotIn('{"args": [7], "kwargs": {}}', disk_cache) - - -# ############################################################################# -# Test_global_cache_file_prefix -# ############################################################################# - - -class Test_global_cache_file_prefix(_BaseCacheTest): - """ - Test global cache file prefix configuration. - """ - - def test1(self) -> None: - """ - Verify that set_cache_file_prefix changes the cache file prefix. - """ - # Prepare inputs. - custom_prefix = "my_test_cache" - # Run. - hcacsimp.set_cache_file_prefix(custom_prefix) - _ = _cached_json_double(5) - # Check. - cache_file = hcacsimp._get_cache_file_name("_cached_json_double") - self.assertIn(custom_prefix, cache_file) - - def test2(self) -> None: - """ - Verify that get_cache_file_prefix returns the configured prefix. - """ - # Prepare inputs. - custom_prefix = "test_prefix" - hcacsimp._CACHE_FILE_PREFIX = custom_prefix - # Run. - actual = hcacsimp.get_cache_file_prefix() - # Check. - self.assertEqual(actual, custom_prefix) - - -# ############################################################################# -# Test helper functions for per-function configuration -# ############################################################################# - - -@hcacsimp.simple_cache( - cache_type="json", - cache_dir="/tmp/custom_cache", - cache_prefix="project_cache", -) -def _test_per_function_cache_dir_and_prefix(x: int) -> int: - """ - Test function with custom cache directory and prefix. - - :param x: input integer - :return: x * 2 - """ - res = x * 2 - return res - - -@hcacsimp.simple_cache( - cache_type="json", - cache_dir="/tmp/custom_cache", -) -def _test_per_function_cache_dir(x: int) -> int: - """ - Test function with custom cache directory and default prefix. - - :param x: input integer - :return: x * 2 - """ - res = x * 2 - return res - - -@hcacsimp.simple_cache( - cache_type="json", - cache_prefix="project_cache", -) -def _test_per_function_prefix(x: int) -> int: - """ - Test function with custom prefix and default directory. - - :param x: input integer - :return: x * 3 - """ - res = x * 3 - return res - - -@hcacsimp.simple_cache( - cache_type="json", - s3_bucket="s3://decorator-bucket", - s3_prefix="decorator/prefix", - aws_profile="decorator-profile", -) -def _test_per_function_s3_configs(x: int) -> int: - """ - Test function with all S3 parameters set via decorator. - - :param x: input integer - :return: x * 6 - """ - return x * 6 - - -# ############################################################################# -# Test_per_function_cache_dir -# ############################################################################# - - -class Test_per_function_cache_dir(_BaseCacheTest): - """ - Test per-function cache directory configuration. - """ - - def test1(self) -> None: - """ - Test cache_dir configured via decorator parameter. - - Verifies that when cache_dir is set in the @simple_cache - decorator, the cache file is created in the specified custom - directory. - """ - # Run. - _ = _test_per_function_cache_dir(10) - # Check. - # Verify cache file is in decorator-specified directory. - cache_file = hcacsimp._get_cache_file_name("_test_per_function_cache_dir") - self.assertIn("/tmp/custom_cache", cache_file) - # Flush to disk to verify file creation. - hcacsimp.flush_cache_to_disk("_test_per_function_cache_dir") - self.assertTrue(os.path.exists(cache_file)) - - def test2(self) -> None: - """ - Test that cache_dir can be retrieved. - - Verifies that cache_dir property set via decorator can be - retrieved using get_cache_property. - """ - # Run. - cache_dir = hcacsimp.get_cache_property( - "_test_per_function_cache_dir", "cache_dir" - ) - # Check. - self.assertEqual(cache_dir, "/tmp/custom_cache") - - def test3(self) -> None: - """ - Test cache_dir configured via set_cache_property() function call. - - Verifies that cache_dir can be set manually via - set_cache_property() for functions without cache_dir in their - decorator. - """ - # Prepare inputs. - custom_dir = self.get_scratch_space() + "/manual_cache" - # Set cache_dir manually. - hcacsimp.set_cache_property( - "_cached_json_double", "cache_dir", custom_dir - ) - # Run. - _ = _cached_json_double(10) - # Check. - # Verify cache file is in manually-set directory. - cache_file = hcacsimp._get_cache_file_name("_cached_json_double") - self.assertIn(custom_dir, cache_file) - # Flush to disk to verify file creation. - hcacsimp.flush_cache_to_disk("_cached_json_double") - self.assertTrue(os.path.exists(cache_file)) - - def test4(self) -> None: - """ - Test get/set cache_dir property API. - - Verifies that cache_dir can be stored and retrieved via - get/set_cache_property functions. - """ - # Prepare inputs. - custom_dir = "/tmp/test_cache_dir" - # Run. - hcacsimp.set_cache_property( - "_cached_json_double", "cache_dir", custom_dir - ) - actual = hcacsimp.get_cache_property("_cached_json_double", "cache_dir") - # Check. - self.assertEqual(actual, custom_dir) - - -# ############################################################################# -# Test_per_function_cache_prefix -# ############################################################################# - - -class Test_per_function_cache_prefix(_BaseCacheTest): - """ - Test per-function cache prefix configuration. - """ - - def test1(self) -> None: - """ - Test cache_prefix configured via decorator parameter. - - Verifies that when cache_prefix is set in the @simple_cache - decorator, the cache file name uses the specified custom prefix. - """ - # Run. - _ = _test_per_function_prefix(7) - # Check. - cache_file = hcacsimp._get_cache_file_name("_test_per_function_prefix") - self.assertIn("project_cache", cache_file) - - def test2(self) -> None: - """ - Test that cache_prefix can be retrieved. - - Verifies that cache_prefix property set via decorator can be - retrieved using get_cache_property. - """ - # Run. - cache_prefix = hcacsimp.get_cache_property( - "_test_per_function_prefix", "cache_prefix" - ) - # Check. - self.assertEqual(cache_prefix, "project_cache") - - def test3(self) -> None: - """ - Test cache_prefix configured via set_cache_property() function call. - - Verifies that cache_prefix can be set manually via - set_cache_property() for functions without cache_prefix in their - decorator. - """ - # Prepare inputs. - custom_prefix = "test_prefix" - # Set cache_prefix manually. - hcacsimp.set_cache_property( - "_cached_json_double", "cache_prefix", custom_prefix - ) - # Run. - _ = _cached_json_double(7) - # Check. - cache_file = hcacsimp._get_cache_file_name("_cached_json_double") - self.assertIn(custom_prefix, cache_file) - - def test4(self) -> None: - """ - Test get/set cache_prefix property API. - - Verifies that cache_prefix can be stored and retrieved via - get/set_cache_property functions. - """ - # Prepare inputs. - custom_prefix = "my_project_cache" - # Run. - hcacsimp.set_cache_property( - "_cached_json_double", "cache_prefix", custom_prefix - ) - actual = hcacsimp.get_cache_property( - "_cached_json_double", "cache_prefix" - ) - # Check. - self.assertEqual(actual, custom_prefix) - - -# ############################################################################# -# Test_per_function_cache_dir_and_prefix -# ############################################################################# - - -class Test_per_function_cache_dir_and_prefix(_BaseCacheTest): - """ - Test per-function cache directory and prefix configured together. - """ - - def test1(self) -> None: - """ - Test both cache_dir and cache_prefix configured via decorator. - - Verifies that when both cache_dir and cache_prefix are set in - the @simple_cache decorator, both are applied correctly to the - cache file path. - """ - # Run. - _ = _test_per_function_cache_dir_and_prefix(10) - # Check. - cache_file = hcacsimp._get_cache_file_name( - "_test_per_function_cache_dir_and_prefix" - ) - # Verify custom directory is used. - self.assertIn("/tmp/custom_cache", cache_file) - # Verify custom prefix is used. - self.assertIn("project_cache", cache_file) - # Flush to disk to verify file creation. - hcacsimp.flush_cache_to_disk("_test_per_function_cache_dir_and_prefix") - self.assertTrue(os.path.exists(cache_file)) - - def test2(self) -> None: - """ - Test that cache_dir and cache_prefix can be retrieved. - - Verifies that both cache_dir and cache_prefix properties set via - decorator can be retrieved using get_cache_property. - """ - # Run. - cache_dir = hcacsimp.get_cache_property( - "_test_per_function_cache_dir_and_prefix", "cache_dir" - ) - cache_prefix = hcacsimp.get_cache_property( - "_test_per_function_cache_dir_and_prefix", "cache_prefix" - ) - # Check. - self.assertEqual(cache_dir, "/tmp/custom_cache") - self.assertEqual(cache_prefix, "project_cache") - - -# ############################################################################# -# Test_s3_configuration -# ############################################################################# - - -class Test_s3_configuration(_BaseCacheTest): - """ - Test S3 configuration (global and per-function). - """ - - def test1(self) -> None: - """ - Verify that set_s3_bucket stores bucket with s3:// prefix. - """ - # Prepare inputs. - bucket = "my-test-bucket" - # Run. - hcacsimp.set_s3_bucket(bucket) - actual = hcacsimp.get_s3_bucket() - # Check. - self.assertEqual(actual, "s3://my-test-bucket") - - def test2(self) -> None: - """ - Verify that set_s3_bucket preserves existing s3:// prefix. - """ - # Prepare inputs. - bucket = "s3://my-test-bucket" - # Run. - hcacsimp.set_s3_bucket(bucket) - actual = hcacsimp.get_s3_bucket() - # Check. - self.assertEqual(actual, "s3://my-test-bucket") - - def test3(self) -> None: - """ - Verify that set_s3_prefix and get_s3_prefix work correctly. - """ - # Prepare inputs. - prefix = "cache/project1" - # Run. - hcacsimp.set_s3_prefix(prefix) - actual = hcacsimp.get_s3_prefix() - # Check. - self.assertEqual(actual, prefix) - - def test4(self) -> None: - """ - Verify that set_aws_profile and get_aws_profile work correctly. - """ - # Prepare inputs. - profile = "my-aws-profile" - # Run. - hcacsimp.set_aws_profile(profile) - actual = hcacsimp.get_aws_profile() - # Check. - self.assertEqual(actual, profile) - - def test5(self) -> None: - """ - Verify that per-function s3_bucket can be set and retrieved. - """ - # Prepare inputs. - func_name = "_cached_json_double" - s3_bucket = "s3://function-specific-bucket" - # Run. - hcacsimp.set_cache_property(func_name, "s3_bucket", s3_bucket) - actual = hcacsimp.get_cache_property(func_name, "s3_bucket") - # Check. - self.assertEqual(actual, s3_bucket) - - def test6(self) -> None: - """ - Verify that per-function s3_prefix can be set and retrieved. - """ - # Prepare inputs. - func_name = "_cached_json_double" - s3_prefix = "custom/prefix" - # Run. - hcacsimp.set_cache_property(func_name, "s3_prefix", s3_prefix) - actual = hcacsimp.get_cache_property(func_name, "s3_prefix") - # Check. - self.assertEqual(actual, s3_prefix) - - def test7(self) -> None: - """ - Verify that per-function aws_profile can be set and retrieved. - """ - # Prepare inputs. - func_name = "_cached_json_double" - aws_profile = "function-aws-profile" - # Run. - hcacsimp.set_cache_property(func_name, "aws_profile", aws_profile) - actual = hcacsimp.get_cache_property(func_name, "aws_profile") - # Check. - self.assertEqual(actual, aws_profile) - - def test8(self) -> None: - """ - Verify that auto_sync_s3 property can be set and retrieved. - """ - # Prepare inputs. - func_name = "_cached_json_double" - auto_sync = True - # Run. - hcacsimp.set_cache_property(func_name, "auto_sync_s3", auto_sync) - actual = hcacsimp.get_cache_property(func_name, "auto_sync_s3") - # Check. - self.assertEqual(actual, auto_sync) - - -# ############################################################################# -# Test_per_function_s3_decorator -# ############################################################################# - - -class Test_per_function_s3_decorator(_BaseCacheTest): - """ - Test S3 configuration set via decorator parameters. - """ - - def test1(self) -> None: - """ - Test that all S3 decorator parameters are stored correctly. - """ - # Run. - s3_bucket = hcacsimp.get_cache_property( - "_test_per_function_s3_configs", "s3_bucket" - ) - s3_prefix = hcacsimp.get_cache_property( - "_test_per_function_s3_configs", "s3_prefix" - ) - aws_profile = hcacsimp.get_cache_property( - "_test_per_function_s3_configs", "aws_profile" - ) - # Check. - self.assertEqual(s3_bucket, "s3://decorator-bucket") - self.assertEqual(s3_prefix, "decorator/prefix") - self.assertEqual(aws_profile, "decorator-profile") - - -# ############################################################################# -# Test__get_s3_cache_path -# ############################################################################# - - -class Test__get_s3_cache_path(_BaseCacheTest): - """ - Test _get_s3_cache_path function. - """ - - def test1(self) -> None: - """ - Test S3 path with global bucket and no prefix. - """ - # Prepare inputs. - hcacsimp.set_s3_bucket("s3://my-bucket") - # Run. - actual = hcacsimp._get_s3_cache_path("_cached_json_double") - # Check. - self.assertIn("s3://my-bucket", actual) - self.assertIn("_cached_json_double", actual) - - def test2(self) -> None: - """ - Test S3 path with global bucket and prefix. - """ - # Prepare inputs. - hcacsimp.set_s3_bucket("s3://my-bucket") - hcacsimp.set_s3_prefix("cache/data") - # Run. - actual = hcacsimp._get_s3_cache_path("_cached_json_double") - # Check. - self.assertIn("s3://my-bucket/cache/data", actual) - self.assertIn("_cached_json_double", actual) - - def test3(self) -> None: - """ - Test S3 path with per-function bucket overriding global. - """ - # Prepare inputs. - hcacsimp.set_s3_bucket("s3://global-bucket") - hcacsimp.set_cache_property( - "_cached_json_double", "s3_bucket", "s3://function-bucket" - ) - # Run. - actual = hcacsimp._get_s3_cache_path("_cached_json_double") - # Check. - self.assertIn("s3://function-bucket", actual) - self.assertNotIn("global-bucket", actual) - - def test4(self) -> None: - """ - Test S3 path with per-function prefix overriding global. - """ - # Prepare inputs. - hcacsimp.set_s3_bucket("s3://my-bucket") - hcacsimp.set_s3_prefix("global/prefix") - hcacsimp.set_cache_property( - "_cached_json_double", "s3_prefix", "function/prefix" - ) - # Run. - actual = hcacsimp._get_s3_cache_path("_cached_json_double") - # Check. - self.assertIn("s3://my-bucket/function/prefix", actual) - self.assertNotIn("global/prefix", actual) - - def test5(self) -> None: - """ - Test S3 path with decorator-configured bucket and prefix. - """ - # Run. - actual = hcacsimp._get_s3_cache_path("_test_per_function_s3_configs") - # Check. - self.assertIn("s3://decorator-bucket/decorator/prefix", actual) - self.assertIn("_test_per_function_s3_configs", actual) - - def test6(self) -> None: - """ - Test that ValueError is raised when S3 bucket is not configured. - """ - # Run and check. - with self.assertRaises(ValueError) as cm: - hcacsimp._get_s3_cache_path("_cached_json_double") - self.assertEqual(str(cm.exception), "S3 bucket not configured") - - -# ############################################################################# -# Test__extract_func_name_from_cache_file -# ############################################################################# - - -class Test__extract_func_name_from_cache_file(_BaseCacheTest): - """ - Test _extract_func_name_from_cache_file function. - """ - - def test1(self) -> None: - """ - Test extraction from JSON cache file with standard prefix. - """ - # Prepare inputs. - cache_file_name = "tmp.cache_simple._cached_json_double.json" - # Run. - actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) - # Check. - self.assertEqual(actual, "_cached_json_double") - - def test2(self) -> None: - """ - Test extraction from pickle cache file. - """ - # Prepare inputs. - cache_file_name = "tmp.cache_simple._cached_pickle_square.pkl" - # Run. - actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) - # Check. - self.assertEqual(actual, "_cached_pickle_square") - - def test3(self) -> None: - """ - Test extraction with custom prefix. - """ - # Prepare inputs. - cache_file_name = "my_project_cache._my_function.json" - # Run. - actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) - # Check. - self.assertEqual(actual, "_my_function") - - def test4(self) -> None: - """ - Test extraction returns None for invalid file name. - """ - # Prepare inputs. - cache_file_name = "invalid_filename" - # Run. - actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) - # Check. - self.assertIsNone(actual) - - def test5(self) -> None: - """ - Test extraction returns None for file without extension. - """ - # Prepare inputs. - cache_file_name = "cache.function_name" - # Run. - actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) - # Check. - self.assertIsNone(actual) - - def test6(self) -> None: - """ - Test extraction with custom prefix and dir. - """ - # Prepare inputs. - cache_file_name = "my_dir/my_project_cache._my_function.json" - # Run. - actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) - # Check. - self.assertEqual(actual, "_my_function") - - def test7(self) -> None: - """ - Test extraction when custom prefix has dots in it. - """ - # Prepare inputs. - cache_file_name = "dir1/dir2/my.project.cache._my_function.json" - # Run. - actual = hcacsimp._extract_func_name_from_cache_file(cache_file_name) - # Check. - self.assertEqual(actual, "_my_function") - - -# ############################################################################# -# Test__check_s3_configured -# ############################################################################# - - -class Test__check_s3_configured(_BaseCacheTest): - """ - Test _check_s3_configured function. - """ - - def test1(self) -> None: - """ - Test returns False when S3 bucket is not explicitly configured. - """ - # Run. - actual = hcacsimp._check_s3_configured() - # Check. - self.assertFalse(actual) - - def test2(self) -> None: - """ - Test returns True when global S3 bucket is configured. - """ - # Prepare inputs. - hcacsimp.set_s3_bucket("s3://my-bucket") - # Run. - actual = hcacsimp._check_s3_configured() - # Check. - self.assertTrue(actual) - - def test3(self) -> None: - """ - Test returns True when per-function S3 bucket is configured. - """ - # Prepare inputs. - func_name = "_cached_json_double" - hcacsimp.set_cache_property( - func_name, "s3_bucket", "s3://function-bucket" - ) - # Run. - actual = hcacsimp._check_s3_configured(func_name) - # Check. - self.assertTrue(actual) - - def test4(self) -> None: - """ - Test per-function bucket overrides missing global bucket. - """ - # Prepare inputs. - func_name = "_cached_json_double" - hcacsimp.set_cache_property( - func_name, "s3_bucket", "s3://function-bucket" - ) - # Run. - actual_with_func = hcacsimp._check_s3_configured(func_name) - actual_without_func = hcacsimp._check_s3_configured() - # Check. - self.assertTrue(actual_with_func) - self.assertFalse(actual_without_func) - - def test5(self) -> None: - """ - Test with decorator-configured S3 bucket. - """ - # Run. - actual = hcacsimp._check_s3_configured("_test_per_function_s3_configs") - # Check. - self.assertTrue(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py deleted file mode 100644 index 4ab1219a4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcfile.py +++ /dev/null @@ -1,335 +0,0 @@ -import logging -import os -from typing import Any, List - -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.hcfile as hcfile - -_LOG = logging.getLogger(__name__) - - -def _create_test_file(self_: Any, filename: str, content: str) -> str: - """ - Create a test file with given content in the scratch directory. - - :param scratch_dir: Directory to create file in - :param filename: Name of file to create - :param content: Content to write to file - :return: Full path to created file - """ - scratch_dir = self_.get_scratch_space() - file_path = os.path.join(scratch_dir, filename) - content = hprint.dedent(content) - hio.to_file(file_path, content) - return file_path - - -def _create_cfile(self_: Any, cfile_content: List[str]) -> str: - """ - Create a cfile with TODOs in the scratch directory. - - :param scratch_dir: Directory to create file in - :param cfile_content: List of TODO lines to write - :return: Full path to created cfile - """ - content = "\n".join(cfile_content) - return _create_test_file(self_, "cfile.txt", content) - - -# ############################################################################# -# Test_parse_cfile1 -# ############################################################################# - - -class Test_parse_cfile1(hunitest.TestCase): - def helper(self, cfile_content: str, expected: str) -> None: - """ - Helper function to test parsing a cfile. - - :param cfile_content: Content to write to the test cfile - :param expected: Expected output from parse_cfile - """ - # Prepare inputs. - cfile_path = _create_test_file(self, "cfile.txt", cfile_content) - # Run function under test. - actual = hcfile.parse_cfile(cfile_path) - actual = "\n".join(map(str, actual)) - # Check output. - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test parsing a cfile with valid entries. - """ - cfile_content = r""" - file1.py:10: Add docstring - file2.py:20: Add type hints - file3.py:30: Fix formatting - """ - expected = r""" - ('file1.py', '10', ' Add docstring') - ('file2.py', '20', ' Add type hints') - ('file3.py', '30', ' Fix formatting') - """ - self.helper(cfile_content, expected) - - def test2(self) -> None: - """ - Test parsing a cfile with valid entries. - """ - cfile_content = r""" - dev_scripts_helpers/llms/llm_transform.py:63:33: F821 undefined name '_extract_bullet_points' [flake8] - dev_scripts_helpers/llms/llm_cli.py:23: [C0301(line-too-long), ] Line too long (109/100) [pylint] - helpers/hio.py: 'pandas' is imported multiple times [normalize_imports] - helpers/hmarkdown.py:770:38: W605 invalid escape sequence '\S' [flake8] - """ - expected = r""" - ('dev_scripts_helpers/llms/llm_transform.py', '63', "33: F821 undefined name '_extract_bullet_points' [flake8]") - ('dev_scripts_helpers/llms/llm_cli.py', '23', ' [C0301(line-too-long), ] Line too long (109/100) [pylint]') - ('helpers/hmarkdown.py', '770', "38: W605 invalid escape sequence '\\S' [flake8]") - """ - self.helper(cfile_content, expected) - - def test_empty_file(self) -> None: - """ - Test parsing an empty cfile. - """ - self.helper("", "") - - def test_invalid_entries(self) -> None: - """ - Test parsing a cfile with invalid entries that should be skipped. - """ - cfile_content = r""" - file1.py:10: Valid entry - Invalid line without proper format - file2.py:20: Another valid entry - :30: Missing filename - file3.py:: Missing line number - """ - expected = r""" - ('file1.py', '10', ' Valid entry') - ('file2.py', '20', ' Another valid entry') - (' ', '30', ' Missing filename') - """ - self.helper(cfile_content, expected) - - -# ############################################################################# -# Test_inject_todos_from_cfile1 -# ############################################################################# - - -class Test_inject_todos_from_cfile1(hunitest.TestCase): - def _inject_todos(self, cfile_content: str) -> None: - """ - Helper to inject TODOs with standard parameters. - """ - todo_user = "user" - comment_prefix = "#" - hcfile.inject_todos_from_cfile(cfile_content, todo_user, comment_prefix) - - def test1(self) -> None: - """ - Test injecting TODOs from a cfile into a Python file. - """ - # Create a test file. - test_file_content = """ - def hello(msg): - print(msg) - - def world(): - print("world") - """ - file_path = _create_test_file(self, "test.py", test_file_content) - # Create cfile with TODOs. - cfile_content = [ - f"{file_path}:1: Add type hints.", - f"{file_path}:4: Add docstring.", - ] - _create_cfile(self, cfile_content) - # Run the function under test. - self._inject_todos("\n".join(cfile_content)) - # Check output. - actual = hio.from_file(file_path) - expected = """ - # TODO(user): Add type hints. - def hello(msg): - print(msg) - - # TODO(user): Add docstring. - def world(): - print("world") - """ - self.assert_equal(actual, expected, dedent=True) - - def test_one_line_file(self) -> None: - """ - Test injecting TODOs into an empty file. - """ - # Create an empty test file - test_file_content = """ - print("hello") - """ - file_path = _create_test_file(self, "empty.py", test_file_content) - # Create cfile with TODOs - cfile_content = [f"{file_path}:1: Add content to empty file."] - _create_cfile(self, cfile_content) - # Run the function under test - self._inject_todos("\n".join(cfile_content)) - # Check output - actual = hio.from_file(file_path) - expected = """ - # TODO(user): Add content to empty file. - print("hello") - """ - self.assert_equal(actual, expected, dedent=True) - - def test_invalid_line_numbers(self) -> None: - """ - Test handling of TODOs with invalid line numbers. - """ - # Create a test file - test_file_content = """ - line1 - line2 - """ - file_path = _create_test_file(self, "test.py", test_file_content) - # Create cfile with invalid line numbers - cfile_content = [ - f"{file_path}:999: This line number doesn't exist.", - ] - _create_cfile(self, cfile_content) - # This should raise an assertion error due to invalid line numbers - with self.assertRaises(AssertionError) as err: - self._inject_todos("\n".join(cfile_content)) - # Check output. - expected = """ - ################################################################################ - * Failed assertion * - 998 < 2 - ################################################################################ - """ - self.assert_equal( - str(err.exception), expected, dedent=True, fuzzy_match=True - ) - - def test2(self) -> None: - """ - Test injecting TODOs from a cfile into a Python file with a complex - class. - """ - # Create a test file. - test_file_content = """ - import logging - from typing import List, Optional - - class DataProcessor: - def __init__(self): - self.logger = logging.getLogger(__name__) - self.data = [] - - def process_batch(self, items): - for item in items: - self.data.append(self._transform(item)) - - def _transform(self, item): - return item.upper() - - def get_results(self): - return self.data - - def clear(self): - self.data = [] - """ - file_path = _create_test_file(self, "test.py", test_file_content) - # Create cfile with TODOs. - cfile_content = [ - f"{file_path}:4: Add class docstring explaining purpose and usage", - f"{file_path}:5: Add type hints for instance variables", - f"{file_path}:9: Add type hints for items parameter", - f"{file_path}:10: Consider adding batch size validation", - f"{file_path}:13: Add error handling for non-string inputs", - f"{file_path}:16: Add return type hint and docstring", - f"{file_path}:19: Add docstring explaining clear behavior", - ] - _create_cfile(self, cfile_content) - # Run function under test. - self._inject_todos("\n".join(cfile_content)) - # Check output. - actual = hio.from_file(file_path) - expected = """ - import logging - from typing import List, Optional - - # TODO(user): Add class docstring explaining purpose and usage - class DataProcessor: - # TODO(user): Add type hints for instance variables - def __init__(self): - self.logger = logging.getLogger(__name__) - self.data = [] - - # TODO(user): Add type hints for items parameter - def process_batch(self, items): - # TODO(user): Consider adding batch size validation - for item in items: - self.data.append(self._transform(item)) - - # TODO(user): Add error handling for non-string inputs - def _transform(self, item): - return item.upper() - - # TODO(user): Add return type hint and docstring - def get_results(self): - return self.data - - # TODO(user): Add docstring explaining clear behavior - def clear(self): - self.data = [] - """ - self.assert_equal(actual, expected, dedent=True) - - def test3(self) -> None: - """ - Test injecting TODOs from a cfile into multiple Python files. - """ - # Create first test file. - test_file1_content = """ - def foo(): - pass - """ - file_path1 = _create_test_file(self, "test1.py", test_file1_content) - # Create second test file. - test_file2_content = """ - def bar(): - return None - """ - file_path2 = _create_test_file(self, "test2.py", test_file2_content) - # Create cfile. - cfile_content = [ - f"{file_path1}:1: Add docstring for foo.", - f"{file_path2}:1: Add docstring for bar.", - f"{file_path2}:2: Add type hint for return.", - ] - _create_cfile(self, cfile_content) - # Run function under test. - self._inject_todos("\n".join(cfile_content)) - # Check output. - actual1 = hio.from_file(file_path1) - expected1 = """ - # TODO(user): Add docstring for foo. - def foo(): - pass - """ - self.assert_equal(actual1, expected1, dedent=True) - # - actual2 = hio.from_file(file_path2) - expected2 = """ - # TODO(user): Add docstring for bar. - def bar(): - # TODO(user): Add type hint for return. - return None - """ - self.assert_equal(actual2, expected2, dedent=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py deleted file mode 100644 index d8f2c19e2..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hcsv.py +++ /dev/null @@ -1,81 +0,0 @@ -import logging -import os - -import pandas as pd - -import helpers.hcsv as hcsv -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_convert_csv_to_dict -# ############################################################################# - - -class Test_convert_csv_to_dict(hunitest.TestCase): - def test1(self) -> None: - dir_name = self.get_input_dir() - test_csv_path = os.path.join(dir_name, "test.csv") - actual_result = hcsv.convert_csv_to_dict(test_csv_path, remove_nans=True) - expected_result = { - "col1": ["a", "b", "c", "d"], - "col2": ["a", "b"], - "col3": ["a", "b", "c"], - } - self.assertEqual(actual_result, expected_result) - - -# ############################################################################# -# Test_from_typed_csv -# ############################################################################# - - -class Test_from_typed_csv(hunitest.TestCase): - """ - Check the opportunity to load correctly. - - .csv file with dtype param, which exist in .types prefix file. And - finally it checks that dtypes of loaded dataframe didn't change - compared with the original one. - """ - - def test1(self) -> None: - dir_name = self.get_input_dir() - test_csv_path = os.path.join(dir_name, "test.csv") - os.path.join(dir_name, "test.csv.types") - actual_result = ( - hcsv.from_typed_csv(test_csv_path) - .dtypes.apply(lambda x: x.name) - .to_dict() - ) - expected_result = { - "A": "int64", - "B": "float64", - "C": "object", - "D": "object", - "E": "int64", - } - self.assertEqual(actual_result, expected_result) - - -# ############################################################################# -# Test_to_typed_csv -# ############################################################################# - - -class Test_to_typed_csv(hunitest.TestCase): - """ - Check whether the function 'to_typed_csv' create file with '.types' prefix - or not. - """ - - def test1(self) -> None: - dir_name = self.get_input_dir() - test_csv_path = os.path.join(dir_name, "test.csv") - test_csv_types_path = os.path.join(dir_name, "test.csv.types") - df = pd.read_csv(test_csv_path) - hcsv.to_typed_csv(df, test_csv_path) - self.assertTrue(os.path.exists(test_csv_types_path)) - os.remove(test_csv_types_path) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py deleted file mode 100644 index aaa5c0c9e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdataframe.py +++ /dev/null @@ -1,299 +0,0 @@ -""" -Import as: - -import helpers.test.test_dataframe as httdat -""" - -import collections -import logging -import os - -import numpy as np -import pandas as pd - -import helpers.hdataframe as hdatafr -import helpers.hpandas as hpandas -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_filter_data_by_values1 -# ############################################################################# - - -class Test_filter_data_by_values1(hunitest.TestCase): - def test_conjunction1(self) -> None: - data = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - data = data.add_prefix("col_") - filters = {"col_0": (1, 12), "col_1": (2, 11), "col_2": (3, 6)} - info: collections.OrderedDict = collections.OrderedDict() - filtered_data = hdatafr.filter_data_by_values(data, filters, "and", info) - # TODO(gp): Factor out the common code. - str_output = ( - f"{hprint.frame('data')}\n" - f"{hpandas.df_to_str(data)}\n" - f"{hprint.frame('filters')}\n{filters}\n" - f"{hprint.frame('filtered_data')}\n" - f"{hpandas.df_to_str(filtered_data)}\n" - f"{hunitest.convert_info_to_string(info)}" - ) - self.check_string(str_output) - - def test_disjunction1(self) -> None: - data = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) - data = data.add_prefix("col_") - filters = {"col_0": (1, 12), "col_1": (2, 11), "col_2": (3, 6)} - info: collections.OrderedDict = collections.OrderedDict() - filtered_data = hdatafr.filter_data_by_values(data, filters, "or", info) - str_output = ( - f"{hprint.frame('data')}\n" - f"{hpandas.df_to_str(data)}\n" - f"{hprint.frame('filters')}\n{filters}\n" - f"{hprint.frame('filtered_data')}" - f"\n{hpandas.df_to_str(filtered_data)}\n" - f"{hunitest.convert_info_to_string(info)}" - ) - self.check_string(str_output) - - -# ############################################################################# -# Test_filter_data_by_comparison -# ############################################################################# - - -class Test_filter_data_by_comparison(hunitest.TestCase): - def test_conjunction1(self) -> None: - data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) - data = data.add_prefix("col_") - filters = {"col_0": (("gt", 1), ("lt", 7)), "col_1": ("eq", 5)} - info: collections.OrderedDict = collections.OrderedDict() - filtered_data = hdatafr.filter_data_by_comparison( - data, filters, "and", info - ) - str_output = ( - f"{hprint.frame('data')}\n" - f"{hpandas.df_to_str(data)}\n" - f"{hprint.frame('filters')}\n{filters}\n" - f"{hprint.frame('filtered_data')}\n" - f"{hpandas.df_to_str(filtered_data)}\n" - f"{hunitest.convert_info_to_string(info)}" - ) - self.check_string(str_output) - - def test_disjunction1(self) -> None: - data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) - data = data.add_prefix("col_") - filters = {"col_0": ("gt", 2), "col_1": ("eq", 5)} - info: collections.OrderedDict = collections.OrderedDict() - filtered_data = hdatafr.filter_data_by_comparison( - data, filters, "or", info - ) - str_output = ( - f"{hprint.frame('data')}\n" - f"{hpandas.df_to_str(data)}\n" - f"{hprint.frame('filters')}\n{filters}\n" - f"{hprint.frame('filtered_data')}" - f"\n{hpandas.df_to_str(filtered_data)}\n" - f"{hunitest.convert_info_to_string(info)}" - ) - self.check_string(str_output) - - -# ############################################################################# -# TestFilterDataByMethod -# ############################################################################# - - -class TestFilterDataByMethod(hunitest.TestCase): - """ - Test was generated automatically with Playback. - """ - - def test1(self) -> None: - # Define input variables. - input_path = os.path.join(self.get_input_dir(), "test.txt") - data = pd.read_csv(input_path, index_col=0) - filters = { - "Frequency": {"isin": {"values": ["Monthly", "Weekly", "Daily"]}}, - "source_code": {"isin": {"values": ["WIND"]}}, - "is_downloaded": {"isin": {"values": ["success"]}}, - } - mode = "and" - info: collections.OrderedDict = collections.OrderedDict() - # Call function to test. - actual = hdatafr.filter_data_by_method( - df=data, filters=filters, mode=mode, info=info - ) - actual = hpandas.df_to_str(actual, precision=3) - # Check output. - self.check_string(actual, fuzzy_match=True) - - -# ############################################################################# -# Test_apply_nan_mode -# ############################################################################# - - -class Test_apply_nan_mode(hunitest.TestCase): - @staticmethod - def _get_series_with_nans(seed: int) -> pd.Series: - date_range = {"start": "1/1/2010", "periods": 40, "freq": "M"} - series = hpandas.get_random_df( - num_cols=1, - seed=seed, - date_range_kwargs=date_range, - )[0] - series[:3] = np.nan - series[-3:] = np.nan - series[5:7] = np.nan - return series - - def test1(self) -> None: - """ - Test for `mode=leave_unchanged`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series) - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - def test2(self) -> None: - """ - Test for `mode="drop"`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series, mode="drop") - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - def test3(self) -> None: - """ - Test for `mode="ffill"`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series, mode="ffill") - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - def test4(self) -> None: - """ - Test for `mode="ffill_and_drop_leading"`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series, mode="ffill_and_drop_leading") - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - def test5(self) -> None: - """ - Test for `mode="fill_with_zero"`. - """ - series = self._get_series_with_nans(seed=1) - actual = hdatafr.apply_nan_mode(series, mode="fill_with_zero") - actual_string = hpandas.df_to_str(actual, num_rows=None) - self.check_string(actual_string) - - # Smoke test for empty input. - def test6(self) -> None: - series = pd.Series(dtype="float64") - hdatafr.apply_nan_mode(series) - - -# ############################################################################# -# Test_compute_points_per_year_for_given_freq -# ############################################################################# - - -class Test_compute_points_per_year_for_given_freq(hunitest.TestCase): - def test1(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("T") - np.testing.assert_equal(actual, 525780.125) - - def test2(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("B") - np.testing.assert_equal(actual, 260.875) - - def test3(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("D") - np.testing.assert_equal(actual, 365.25) - - def test4(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("W") - np.testing.assert_equal(actual, 52.25) - - def test5(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("M") - np.testing.assert_equal(actual, 12.0) - - def test6(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("Y") - np.testing.assert_equal(actual, 1.0) - - def test7(self) -> None: - actual = hdatafr.compute_points_per_year_for_given_freq("0D") - np.testing.assert_equal(actual, 0.0) - - -# ############################################################################# -# TestRemoveDuplicates -# ############################################################################# - - -class TestRemoveDuplicates(hunitest.TestCase): - def test_remove_duplicates1(self) -> None: - test_data = { - "dummy_value_1": [1, 2, 1], - "dummy_value_2": ["A", "A", "A"], - "knowledge_timestamp": [3, 2, 1], - "end_download_timestamp": [3, 2, 1], - } - df = pd.DataFrame(data=test_data) - duplicate_columns = ["dummy_value_1", "dummy_value_2"] - control_column = None - actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) - actual = hpandas.df_to_str(actual) - expected = r""" - dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp - 0 1 A 3 3 - 1 2 A 2 2""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_remove_duplicates2(self) -> None: - test_data = { - "dummy_value_1": [1, 2, 1], - "dummy_value_2": ["A", "A", "A"], - "knowledge_timestamp": [3, 2, 1], - "end_download_timestamp": [3, 2, 1], - } - df = pd.DataFrame(data=test_data) - duplicate_columns = None - control_column = "knowledge_timestamp" - actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) - actual = hpandas.df_to_str(actual) - expected = r""" - dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp - 0 1 A 3 3 - 1 2 A 2 2 - 2 1 A 1 1""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_remove_duplicates3(self) -> None: - test_data = { - "dummy_value_1": [1, 2, 1], - "dummy_value_2": ["A", "A", "A"], - "knowledge_timestamp": [3, 2, 1], - "end_download_timestamp": [3, 2, 1], - } - df = pd.DataFrame(data=test_data) - duplicate_columns = ["dummy_value_1", "dummy_value_2"] - control_column = "knowledge_timestamp" - actual = hdatafr.remove_duplicates(df, duplicate_columns, control_column) - actual = hpandas.df_to_str(actual) - expected = r""" - dummy_value_1 dummy_value_2 knowledge_timestamp end_download_timestamp - 1 2 A 2 2 - 2 1 A 1 1""" - self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py deleted file mode 100644 index fac073570..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdatetime.py +++ /dev/null @@ -1,932 +0,0 @@ -import datetime -import logging - -import pandas as pd -import pytz - -import helpers.hdatetime as hdateti -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - -_STR_TS_NAIVE = "2021-01-04 09:30:00" -_STR_TS_UTC = "2021-01-04 09:30:00-00:00" -_STR_TS_ET = "2021-01-04 09:30:00-05:00" - -_PD_TS_NAIVE = pd.Timestamp("2021-01-04 09:30:00") -_PD_TS_UTC = pd.Timestamp("2021-01-04 09:30:00-00:00", tz="UTC") -_PD_TS_ET = pd.Timestamp("2021-01-04 09:30:00-05:00", tz="America/New_York") - -_DT_DT_NAIVE = datetime.datetime(2021, 1, 4, 9, 30, 0) -_DT_DT_UTC = pytz.timezone("UTC").localize(_DT_DT_NAIVE) -_DT_DT_ET = pytz.timezone("America/New_York").localize(_DT_DT_NAIVE) - - -# ############################################################################# -# Test_dassert_is_datetime1 -# ############################################################################# - - -class Test_dassert_is_datetime1(hunitest.TestCase): - def test_is_datetime1(self) -> None: - """ - Test valid datetime objects. - """ - objs = [ - _STR_TS_NAIVE, - _STR_TS_UTC, - _STR_TS_ET, - _PD_TS_NAIVE, - _PD_TS_UTC, - _PD_TS_ET, - _DT_DT_NAIVE, - _DT_DT_UTC, - _DT_DT_ET, - ] - for obj in objs: - _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) - hdateti.dassert_is_datetime(obj) - - def test_is_datetime_fail1(self) -> None: - """ - Test invalid datetime objects. - """ - objs = [0, 0.0] - for obj in objs: - _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) - with self.assertRaises(AssertionError): - hdateti.dassert_is_datetime(obj) - - def test_is_strict_datetime1(self) -> None: - """ - Test valid datetime objects. - """ - objs = [ - _PD_TS_NAIVE, - _PD_TS_UTC, - _PD_TS_ET, - _DT_DT_NAIVE, - _DT_DT_UTC, - _DT_DT_ET, - ] - for obj in objs: - _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) - hdateti.dassert_is_strict_datetime(obj) - - def test_is_strict_datetime_fail1(self) -> None: - """ - Test invalid datetime objects. - """ - objs = [0, _STR_TS_NAIVE, _STR_TS_UTC, _STR_TS_ET, "hello"] - for obj in objs: - _LOG.debug("obj='%s', type='%s'", str(obj), str(type(obj))) - with self.assertRaises(AssertionError): - hdateti.dassert_is_strict_datetime(obj) - - -# ############################################################################# -# Test_dassert_tz1 -# ############################################################################# - - -class Test_dassert_tz1(hunitest.TestCase): - def test_datetime_conversions(self) -> None: - # Get a tz-naive datetime. - dt = datetime.datetime(2020, 1, 5, 9, 30, 0) - hdateti.dassert_is_tz_naive(dt) - # Localize it to UTC. - dt_utc = pytz.timezone("UTC").localize(dt) - hdateti.dassert_has_tz(dt_utc) - hdateti.dassert_has_UTC_tz(dt_utc) - # Convert to ET. - dt_et = dt_utc.astimezone(pytz.timezone("US/Eastern")) - hdateti.dassert_has_tz(dt_et) - hdateti.dassert_has_ET_tz(dt_et) - # Convert it back to UTC. - dt_utc2 = dt_et.astimezone(pytz.timezone("UTC")) - hdateti.dassert_has_tz(dt_utc2) - hdateti.dassert_has_UTC_tz(dt_utc2) - self.assertEqual(dt_utc, dt_utc2) - # Make it naive. - dt2 = dt_utc2.replace(tzinfo=None) - hdateti.dassert_is_tz_naive(dt2) - self.assertEqual(dt, dt2) - - def test_dassert_is_datetime1(self) -> None: - for obj in [ - _STR_TS_NAIVE, - _STR_TS_UTC, - _STR_TS_ET, - _PD_TS_NAIVE, - _PD_TS_UTC, - _PD_TS_ET, - _DT_DT_NAIVE, - _DT_DT_UTC, - _DT_DT_ET, - ]: - hdateti.dassert_is_datetime(obj) - - def test_dassert_is_datetime_assert1(self) -> None: - datetime_ = 5 - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_is_datetime(datetime_) - actual = str(cm.exception) - # pylint: disable=line-too-long - expected = r""" - * Failed assertion * - Instance of '5' is '' instead of '(, , )' - datetime_='5' of type '' is not a DateTimeType - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_to_datetime1(self) -> None: - """ - Apply `to_datetime` to a naive datetime. - """ - for obj in [ - _STR_TS_NAIVE, - _PD_TS_NAIVE, - _DT_DT_NAIVE, - ]: - _LOG.debug("obj='%s' type='%s'", obj, type(obj)) - actual = hdateti.to_datetime(obj) - expected = _DT_DT_NAIVE - self.assertEqual(actual, expected) - # Check the tz info. - hdateti.dassert_is_tz_naive(actual) - with self.assertRaises(AssertionError): - hdateti.dassert_has_tz(actual) - hdateti.dassert_has_UTC_tz(actual) - hdateti.dassert_has_ET_tz(actual) - - def test_to_datetime2(self) -> None: - """ - Apply `to_datetime` to a UTC datetime. - """ - for obj in [ - _STR_TS_UTC, - _PD_TS_UTC, - _DT_DT_UTC, - ]: - _LOG.debug("obj='%s' type='%s'", obj, type(obj)) - actual = hdateti.to_datetime(obj) - expected = _DT_DT_UTC - self.assertEqual(actual, expected) - # Check the tz info. - hdateti.dassert_has_tz(actual) - hdateti.dassert_has_UTC_tz(actual) - with self.assertRaises(AssertionError): - hdateti.dassert_is_tz_naive(actual) - hdateti.dassert_has_ET_tz(actual) - - def test_to_datetime3(self) -> None: - """ - Apply `to_datetime` to an ET datetime. - """ - for obj in [ - _STR_TS_ET, - _PD_TS_ET, - _DT_DT_ET, - ]: - _LOG.debug("obj='%s' type='%s'", obj, type(obj)) - actual = hdateti.to_datetime(obj) - expected = _DT_DT_ET - self.assertEqual(str(actual), str(expected)) - - -# ############################################################################# -# Test_dassert_tz_compatible1 -# ############################################################################# - - -class Test_dassert_tz_compatible1(hunitest.TestCase): - def test_dassert_compatible_timestamp1(self) -> None: - """ - Both datetimes are naive. - """ - for datetime1 in [_PD_TS_NAIVE, _DT_DT_NAIVE]: - for datetime2 in [_PD_TS_NAIVE, _DT_DT_NAIVE]: - hdateti.dassert_tz_compatible(datetime1, datetime2) - - def test_dassert_compatible_timestamp2(self) -> None: - """ - Both datetimes have tz info. - """ - for datetime1 in [_PD_TS_UTC, _PD_TS_ET]: - for datetime2 in [_DT_DT_UTC, _DT_DT_ET]: - hdateti.dassert_tz_compatible(datetime1, datetime2) - - def test_dassert_compatible_timestamp_assert1(self) -> None: - """ - Test a single not compatible pair of datetimes and check the raised - exception. - """ - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_tz_compatible(_PD_TS_NAIVE, _DT_DT_UTC) - actual = str(cm.exception) - # pylint: disable=line-too-long - expected = """ - * Failed assertion * - 'False' - == - 'True' - datetime1='2021-01-04 09:30:00' and datetime2='2021-01-04 09:30:00+00:00' are not compatible - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_dassert_compatible_timestamp_assert2(self) -> None: - """ - Test a pairs of non-compatible datetimes making sure the assertion is - raised. - """ - for datetime1 in [ - _PD_TS_NAIVE, - _DT_DT_NAIVE, - _PD_TS_NAIVE, - _DT_DT_NAIVE, - ]: - for datetime2 in [_PD_TS_UTC, _PD_TS_ET, _DT_DT_UTC, _DT_DT_ET]: - with self.assertRaises(AssertionError): - hdateti.dassert_tz_compatible(datetime1, datetime2) - - -# ############################################################################# -# Test_dassert_have_same_tz1 -# ############################################################################# - - -class Test_dassert_have_same_tz1(hunitest.TestCase): - """ - Test an assertion that checks that timezones are equal for input - timestamps. - """ - - def test1(self) -> None: - """ - Timezones are equal. - """ - hdateti.dassert_have_same_tz(_DT_DT_ET, _PD_TS_ET) - - def test2(self) -> None: - """ - Both timestamps are tz-naive. - """ - hdateti.dassert_have_same_tz(_PD_TS_NAIVE, _DT_DT_NAIVE) - - def test3(self) -> None: - """ - Different timezones. - """ - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_have_same_tz(_DT_DT_ET, _DT_DT_UTC) - actual = str(cm.exception) - # pylint: disable=line-too-long - expected = """ - * Failed assertion * - 'America/New_York' - == - 'UTC' - datetime1=2021-01-04 09:30:00-05:00 (datetime1.tzinfo=America/New_York) datetime2=2021-01-04 09:30:00+00:00 (datetime2.tzinfo=UTC) - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Same timezone but different DST mode (i.e. EST vs EDT). - """ - ts_est = pd.Timestamp("2023-03-12 01:55:00-05:00", tz="America/New_York") - ts_edt = pd.Timestamp("2023-03-12 03:00:00-04:00", tz="America/New_York") - hdateti.dassert_have_same_tz(ts_est, ts_edt) - - -# ############################################################################# -# Test_get_current_time1 -# ############################################################################# - - -class Test_get_current_time1(hunitest.TestCase): - def test_get_current_time_UTC(self) -> None: - tz = "UTC" - dt = hdateti.get_current_time(tz) - _LOG.debug("tz=%s -> dt=%s", tz, dt) - hdateti.dassert_has_UTC_tz(dt) - - def test_get_current_time_ET(self) -> None: - tz = "ET" - dt = hdateti.get_current_time(tz) - _LOG.debug("tz=%s -> dt=%s", tz, dt) - hdateti.dassert_has_ET_tz(dt) - - def test_get_current_time_naive_UTC(self) -> None: - tz = "naive_UTC" - dt = hdateti.get_current_time(tz) - _LOG.debug("tz=%s -> dt=%s", tz, dt) - hdateti.dassert_is_tz_naive(dt) - - def test_get_current_time_naive_ET(self) -> None: - tz = "naive_ET" - dt = hdateti.get_current_time(tz) - _LOG.debug("tz=%s -> dt=%s", tz, dt) - hdateti.dassert_is_tz_naive(dt) - - -# ############################################################################# -# Test_to_generalized_datetime -# ############################################################################# - - -class Test_to_generalized_datetime(hunitest.TestCase): - def test_srs1(self) -> None: - srs = pd.Series(["2010-01-01", "2010-01-02"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_index1(self) -> None: - idx = pd.Index(["2010-01-01", "2010-01-02"]) - actual = hdateti.to_generalized_datetime(idx) - expected = pd.Index( - [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] - ) - pd.testing.assert_index_equal(actual, expected) - - def test_daily1(self) -> None: - srs = pd.Series(["1 Jan 2010", "2 Jan 2010"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2010-01-01"), pd.Timestamp("2010-01-02")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_weekly1(self) -> None: - srs = pd.Series(["2021-W14", "2021-W15"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-04-10"), pd.Timestamp("2021-04-17")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_semiannual1(self) -> None: - srs = pd.Series(["2021-S1", "2021-S2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-06-30"), pd.Timestamp("2021-12-31")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_semiannual2(self) -> None: - srs = pd.Series(["2021/S1", "2021/S2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-06-30"), pd.Timestamp("2021-12-31")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_bimonthly1(self) -> None: - srs = pd.Series(["2021-B1", "2021-B2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-01-01"), pd.Timestamp("2021-03-01")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly1(self) -> None: - srs = pd.Series(["2020-M1", "2020-M2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly2(self) -> None: - srs = pd.Series(["2020M01", "2020M02"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly3(self) -> None: - srs = pd.Series(["2020-01", "2020-02"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly4(self) -> None: - srs = pd.Series(["2020 Jan", "2020 Feb"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_monthly5(self) -> None: - srs = pd.Series(["January 2020", "February 2020"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-01-31"), pd.Timestamp("2020-02-29")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_quarterly1(self) -> None: - srs = pd.Series(["2020-Q1", "2020-Q2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_quarterly2(self) -> None: - srs = pd.Series(["2020Q1", "2020Q2"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_quarterly3(self) -> None: - srs = pd.Series(["Q1 2020", "Q2 2020"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2020-03-31"), pd.Timestamp("2020-06-30")] - ) - pd.testing.assert_series_equal(actual, expected) - - def test_annual1(self) -> None: - srs = pd.Series(["2021", "2022"]) - actual = hdateti.to_generalized_datetime(srs) - expected = pd.Series( - [pd.Timestamp("2021-12-31"), pd.Timestamp("2022-12-31")] - ) - pd.testing.assert_series_equal(actual, expected) - - -# ############################################################################# -# Test_find_bar_timestamp1 -# ############################################################################# - - -class Test_find_bar_timestamp1(hunitest.TestCase): - """ - Use mode="round". - """ - - def helper1(self, current_timestamp: pd.Timestamp) -> None: - bar_duration_in_secs = 15 * 60 - max_distance_in_secs = 10 - actual = hdateti.find_bar_timestamp( - current_timestamp, - bar_duration_in_secs, - max_distance_in_secs=max_distance_in_secs, - ) - expected = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test1(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") - self.helper1(current_timestamp) - - def test2(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T08:00:05", tz="UTC") - self.helper1(current_timestamp) - - def test3(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T07:59:55", tz="UTC") - self.helper1(current_timestamp) - - def test4(self) -> None: - current_timestamp = pd.Timestamp( - "2021-09-09 08:01:59.500000+0000", tz="UTC" - ) - bar_duration_in_secs = 1 - # - actual = hdateti.find_bar_timestamp( - current_timestamp, bar_duration_in_secs, mode="round" - ) - expected = pd.Timestamp("2021-09-09T08:02:00+0000", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - # /////////////////////////////////////////////////////////////////////////// - - def test5(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T07:59:20", tz="UTC") - with self.assertRaises(AssertionError) as cm: - self.helper1(current_timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 40 <= 10 - current_timestamp=2021-09-09 07:59:20+00:00 is too distant from bar_timestamp=2021-09-09 08:00:00+00:00 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test6(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T08:10:20", tz="UTC") - with self.assertRaises(AssertionError) as cm: - self.helper1(current_timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 280 <= 10 - current_timestamp=2021-09-09 08:10:20+00:00 is too distant from bar_timestamp=2021-09-09 08:15:00+00:00 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_find_bar_timestamp2 -# ############################################################################# - - -class Test_find_bar_timestamp2(hunitest.TestCase): - """ - Use mode="floor". - """ - - def test1(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T07:59:55", tz="UTC") - bar_duration_in_secs = 15 * 60 - # - actual = hdateti.find_bar_timestamp( - current_timestamp, bar_duration_in_secs, mode="floor" - ) - expected = pd.Timestamp("2021-09-09T07:45:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test2(self) -> None: - current_timestamp = pd.Timestamp("2021-09-09T08:01:55", tz="UTC") - bar_duration_in_secs = 15 * 60 - # - actual = hdateti.find_bar_timestamp( - current_timestamp, bar_duration_in_secs, mode="floor" - ) - expected = pd.Timestamp("2021-09-09T08:00:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test3(self) -> None: - current_timestamp = pd.Timestamp( - "2021-09-09 08:01:59.500000+0000", tz="UTC" - ) - bar_duration_in_secs = 1 - # - actual = hdateti.find_bar_timestamp( - current_timestamp, bar_duration_in_secs, mode="floor" - ) - expected = pd.Timestamp("2021-09-09T08:01:59+0000", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# -# Test_convert_seconds_to_minutes -# ############################################################################# - - -class Test_convert_seconds_to_minutes(hunitest.TestCase): - def test1(self) -> None: - """ - Check that conversion is implemented correcty. - """ - num_secs = 300 - actual = hdateti.convert_seconds_to_minutes(num_secs) - expected = int(num_secs / 60) - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - Check that an error is raised when input is not an integer number of - minutes. - """ - num_secs = 10 - with self.assertRaises(AssertionError) as cm: - hdateti.convert_seconds_to_minutes(num_secs) - actual = str(cm.exception) - expected = """ - * Failed assertion * - '10' - == - '0' - num_secs=10 is not an integer number of minutes - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_convert_unix_epoch_to_timestamp -# ############################################################################# - - -class Test_convert_unix_epoch_to_timestamp(hunitest.TestCase): - def test1(self) -> None: - """ - Test with default parameter values. - """ - epoch = 1631145600000 - actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch) - expected = pd.Timestamp("2021-09-09T00:00:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test2(self) -> None: - """ - Test with specified unit. - """ - epoch = 1631145600 - unit = "s" - actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch, unit=unit) - expected = pd.Timestamp("2021-09-09T00:00:00", tz="UTC") - self.assert_equal(str(actual), str(expected)) - - def test3(self) -> None: - """ - Test with specified timezone. - """ - epoch = 1631145600000 - tz = "US/Pacific" - actual = hdateti.convert_unix_epoch_to_timestamp(epoch=epoch, tz=tz) - expected = pd.Timestamp("2021-09-08T17:00:00", tz="US/Pacific") - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# -# Test_convert_timestamp_to_unix_epoch -# ############################################################################# - - -class Test_convert_timestamp_to_unix_epoch(hunitest.TestCase): - def test1(self) -> None: - """ - Test with default parameter values. - """ - timestamp = pd.Timestamp("2021-09-09") - actual = hdateti.convert_timestamp_to_unix_epoch(timestamp=timestamp) - expected = 1631145600000 - self.assert_equal(str(actual), str(expected)) - - def test2(self) -> None: - """ - Test with specified unit. - """ - timestamp = pd.Timestamp("2021-09-09") - unit = "s" - actual = hdateti.convert_timestamp_to_unix_epoch( - timestamp=timestamp, unit=unit - ) - expected = 1631145600 - self.assert_equal(str(actual), str(expected)) - - def test3(self) -> None: - """ - Test for a timestamp with specified timezone. - """ - timestamp = pd.Timestamp("2021-09-08T17:00:00", tz="US/Pacific") - actual = hdateti.convert_timestamp_to_unix_epoch(timestamp=timestamp) - expected = 1631145600000 - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# -# Test_str_to_timestamp1 -# ############################################################################# - - -class Test_str_to_timestamp1(hunitest.TestCase): - """ - Test if string representation of datetime is converted correctly. - """ - - def test1(self) -> None: - """ - - `datetime_str` has a valid format - - `datetime_format` has a valid pattern for `datetime_str` - """ - datetime_str = "20230728_150513" - timezone_info = "US/Eastern" - datetime_format = "%Y%m%d_%H%M%S" - actual = hdateti.str_to_timestamp( - datetime_str, timezone_info, datetime_format=datetime_format - ) - expected = pd.Timestamp("2023-07-28 15:05:13-0400", tz="US/Eastern") - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - - `datetime_str` has a valid format - - `datetime_format` has an valid pattern for `datetime_str` - - `timezone_info` is UTC - """ - datetime_str = "20230728_150513" - timezone_info = "UTC" - format = "%Y%m%d_%H%M%S" - actual = hdateti.str_to_timestamp( - datetime_str, timezone_info, datetime_format=format - ) - expected = pd.Timestamp("2023-07-28 15:05:13+0000", tz="UTC") - self.assertEqual(actual, expected) - - def test3(self) -> None: - """ - - `datetime_str` has a valid format - - `datetime_format` has an invalid pattern for `datetime_str` - """ - datetime_str = "28-07-2023 15:05:13" - timezone_info = "US/Eastern" - datetime_format = "%Y%m%d_%H%M%S" - # The datetime format does not match the string representation of datetime. - with self.assertRaises(ValueError) as err: - hdateti.str_to_timestamp( - datetime_str, timezone_info, datetime_format=datetime_format - ) - actual = str(err.exception) - self.check_string(actual) - - def test4(self) -> None: - """ - - `datetime_str` has an invalid format - - `datetime_format` is not defined - """ - datetime_str = "qwe28abc07-201234" - timezone_info = "US/Eastern" - # Invalid datetime, should raise a ValueError. - with self.assertRaises(ValueError) as err: - hdateti.str_to_timestamp(datetime_str, timezone_info) - actual = str(err.exception) - self.check_string(actual) - - -# ############################################################################# -# Test_dassert_str_is_date -# ############################################################################# - - -class Test_dassert_str_is_date(hunitest.TestCase): - """ - Test that the function checks a string representation of date correctly. - """ - - def test1(self) -> None: - """ - - date has a valid format - """ - date_str = "20221101" - hdateti.dassert_str_is_date(date_str) - - def test2(self) -> None: - """ - - date has an invalid format - """ - date = "2022-11-01" - with self.assertRaises(ValueError) as err: - hdateti.dassert_str_is_date(date) - actual = str(err.exception) - self.check_string(actual) - - -# ############################################################################# -# Test_dassert_is_valid_timestamp -# ############################################################################# - - -class Test_dassert_is_valid_timestamp(hunitest.TestCase): - def test1(self) -> None: - """ - Test should not raise an exception when timestamp has a timezone. - """ - timestamp = pd.Timestamp( - "2021-01-04 09:30:00-05:00", tz="America/New_York" - ) - hdateti.dassert_is_valid_timestamp(timestamp) - - def test2(self) -> None: - """ - Test should raise an exception when timestamp is without timezone info. - """ - # Set inputs. - timestamp = pd.Timestamp("2021-01-04 09:30:00") - # Run. - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_is_valid_timestamp(timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 'None' is not 'None' - datetime_='2021-01-04 09:30:00' doesn't have timezone info - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Test should not raise an exception when timestamp is none. - """ - timestamp = None - hdateti.dassert_is_valid_timestamp(timestamp) - - def test4(self) -> None: - """ - Test should raise an exception when timestamp is of type string. - """ - # Set input. - timestamp = "2021-01-04 09:30:00" - # Run. - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_is_valid_timestamp(timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - Instance of '2021-01-04 09:30:00' is '' instead of '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_timestamp_lt -# ############################################################################# - - -class Test_dassert_timestamp_lt(hunitest.TestCase): - def test1(self) -> None: - """ - Test with valid timestamps where start is less than end. - """ - start_timestamp = pd.Timestamp("2021-01-02 09:30:00-00:00", tz="UTC") - end_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - - def test2(self) -> None: - """ - Test with equal timestamps, this is should raise an exception. - """ - # Set inputs. - start_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") - end_timestamp = pd.Timestamp("2021-02-02 09:30:00-00:00", tz="UTC") - # Run. - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 2021-02-02 09:30:00+00:00 < 2021-02-02 09:30:00+00:00 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Test with start timestamp greater than end timestamp, this is should - raise an exception. - """ - # Set inputs. - start_timestamp = pd.Timestamp( - "2021-02-04 09:30:00-05:00", tz="America/New_York" - ) - end_timestamp = pd.Timestamp( - "2021-01-04 09:30:00-05:00", tz="America/New_York" - ) - # Run. - with self.assertRaises(AssertionError) as cm: - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 2021-02-04 09:30:00-05:00 < 2021-01-04 09:30:00-05:00 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Test with start timestamp as None. - """ - start_timestamp = None - end_timestamp = pd.Timestamp( - "2021-01-04 09:30:00-05:00", tz="America/New_York" - ) - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - - def test5(self) -> None: - """ - Test with end timestamp as None. - """ - start_timestamp = pd.Timestamp( - "2021-01-04 09:30:00-05:00", tz="America/New_York" - ) - end_timestamp = None - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) - - def test6(self) -> None: - """ - Test with both timestamps as None. - """ - start_timestamp = None - end_timestamp = None - hdateti.dassert_timestamp_lt(start_timestamp, end_timestamp) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py deleted file mode 100644 index 9dd38d00e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdbg.py +++ /dev/null @@ -1,934 +0,0 @@ -import collections -import logging -from typing import List, Tuple - -import helpers.hdbg as hdbg -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# TODO(gp): Make sure the coverage is 100%. - -# ############################################################################# - - -# ############################################################################# -# Test_dassert1 -# ############################################################################# - - -# TODO(gp): Use a self.assert_equal() instead of a check_string() since this -# code needs to be stable. -class Test_dassert1(hunitest.TestCase): - """ - Test `dassert()`. - """ - - def test1(self) -> None: - """ - An assertion that is verified. - """ - hdbg.dassert(True) - - def test2(self) -> None: - """ - An assertion that is not verified. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False) - self.check_string(str(cm.exception)) - - def test3(self) -> None: - """ - An assertion with a message. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False, msg="hello") - self.check_string(str(cm.exception)) - - def test4(self) -> None: - """ - An assertion with a message to format. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False, "hello %s", "world") - self.check_string(str(cm.exception)) - - def test5(self) -> None: - """ - Too many parameters. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False, "hello %s", "world", "too_many") - self.check_string(str(cm.exception)) - - def test6(self) -> None: - """ - Not enough parameters. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert(False, "hello %s") - self.check_string(str(cm.exception)) - - def test7(self) -> None: - """ - Common error of calling `dassert()` instead of `dassert_eq()`. - - According to the user's intention the assertion should trigger, - but, because of using `dassert()` instead of `dassert_eq()`, the - assertion will not trigger. We notice that the user passed a - list instead of a string as `msg` and raise. - """ - with self.assertRaises(AssertionError) as cm: - y = ["world"] - hdbg.dassert(y, ["hello"]) - self.check_string(str(cm.exception)) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_eq1 -# ############################################################################# - - -class Test_dassert_eq1(hunitest.TestCase): - def test1(self) -> None: - hdbg.dassert_eq(1, 1) - - def test2(self) -> None: - hdbg.dassert_eq(1, 1, msg="hello world") - - def test3(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_eq(1, 2, msg="hello world") - self.check_string(str(cm.exception)) - - def test4(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_eq(1, 2, "hello %s", "world") - self.check_string(str(cm.exception)) - - def test5(self) -> None: - """ - Raise assertion with incorrect message. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_eq(1, 2, "hello %s") - self.check_string(str(cm.exception)) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_misc1 -# ############################################################################# - - -# TODO(gp): Break it in piece. -class Test_dassert_misc1(hunitest.TestCase): - # dassert_in - - def test_in1(self) -> None: - hdbg.dassert_in("a", "abc") - - def test_in2(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_in("a", "xyz".split()) - self.check_string(str(cm.exception)) - - # dassert_is - - def test_is1(self) -> None: - a = None - hdbg.dassert_is(a, None) - - def test_is2(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is("a", None) - self.check_string(str(cm.exception)) - - # dassert_isinstance - - def test_is_instance1(self) -> None: - hdbg.dassert_isinstance("a", str) - - def test_is_instance2(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_isinstance("a", int) - self.check_string(str(cm.exception)) - - def test_is_instance3(self) -> None: - hdbg.dassert_isinstance("a", (str, int)) - - def test_is_instance4(self) -> None: - hdbg.dassert_isinstance(5.0, (float, int)) - - def test_is_instance5(self) -> None: - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_isinstance("a", (float, int)) - # TODO(gp): Replace all check_string with assert_equal - self.check_string(str(cm.exception)) - - # dassert_set_eq - - def test_set_eq1(self) -> None: - a = [1, 2, 3] - b = [2, 3, 1] - hdbg.dassert_set_eq(a, b) - - def test_set_eq2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 3] - b = [2, 2, 1] - hdbg.dassert_set_eq(a, b) - # Check. - actual = str(cm.exception) - expected = """ - * Failed assertion * - val1 - val2=[3] - val2 - val1=[] - val1=[1, 2, 3] - set eq - val2=[1, 2] - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - # dassert_is_subset - - def test_is_subset1(self) -> None: - a = [1, 2] - b = [2, 1, 3] - hdbg.dassert_is_subset(a, b) - - def test_is_subset2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 3] - b = [4, 2, 1] - hdbg.dassert_is_subset(a, b) - # Check. - actual = str(cm.exception) - expected = """ - * Failed assertion * - val1=[1, 2, 3] - issubset - val2=[1, 2, 4] - val1 - val2=[3] - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - # dassert_not_intersection - - def test_not_intersection1(self) -> None: - a = [1, 2, 3] - b = [4, 5] - hdbg.dassert_not_intersection(a, b) - - def test_not_intersection2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 3] - b = [4, 2, 1] - hdbg.dassert_not_intersection(a, b) - actual = str(cm.exception) - expected = """ - * Failed assertion * - val1=[1, 2, 3] - has no intersection - val2=[1, 2, 4] - val1.intersection(val2)=[1, 2] - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - # dassert_no_duplicates - - def test_no_duplicates1(self) -> None: - a = [1, 2, 3] - hdbg.dassert_no_duplicates(a) - - def test_no_duplicates2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 3, 3] - hdbg.dassert_no_duplicates(a) - self.check_string(str(cm.exception)) - - # dassert_is_sorted - - def test_is_sorted1(self) -> None: - a = [1, 2, 3] - hdbg.dassert_is_sorted(a) - - def test_is_sorted2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 4, 3] - hdbg.dassert_is_sorted(a) - self.check_string(str(cm.exception)) - - def test_is_sorted3(self) -> None: - """ - Test an array that is sorted descending. - """ - a = [3, 2, 2] - hdbg.dassert_is_sorted(a, sort_kwargs={"reverse": True}) - - def test_is_sorted4(self) -> None: - """ - Test an array that is not sorted descending. - """ - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 4, 3] - sort_kwargs = {"reverse": True} - hdbg.dassert_is_sorted(a, sort_kwargs=sort_kwargs) - self.check_string(str(cm.exception)) - - # dassert_eq_all - - def test_eq_all1(self) -> None: - a = [1, 2, 3] - b = [1, 2, 3] - hdbg.dassert_eq_all(a, b) - - def test_eq_all2(self) -> None: - with self.assertRaises(AssertionError) as cm: - a = [1, 2, 3] - b = [1, 2, 4] - hdbg.dassert_eq_all(a, b) - self.check_string(str(cm.exception)) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_lgt1 -# ############################################################################# - - -class Test_dassert_lgt1(hunitest.TestCase): - def test1(self) -> None: - """ - No assertion raised since `0 <= 0 <= 3`. - """ - hdbg.dassert_lgt( - 0, 0, 3, lower_bound_closed=True, upper_bound_closed=True - ) - - def test2(self) -> None: - """ - Raise assertion since it is not true that `0 < 0 <= 3`. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_lgt( - 0, 0, 3, lower_bound_closed=False, upper_bound_closed=True - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 0 < 0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Raise assertion since it is not true that `0 < 100 <= 3`. - - The formatting of the assertion is correct. - """ - with self.assertRaises(AssertionError) as cm: - lower_bound_closed = False - upper_bound_closed = True - hdbg.dassert_lgt( - 0, - 100, - 3, - lower_bound_closed, - upper_bound_closed, - "hello %s", - "world", - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 100 <= 3 - hello world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_is_proportion1 -# ############################################################################# - - -class Test_dassert_is_proportion1(hunitest.TestCase): - def test1(self) -> None: - """ - Passing assertion with correct message and format. - """ - hdbg.dassert_is_proportion(0.1, "hello %s", "world") - - def test2(self) -> None: - """ - Passing assertion with correct message and format. - """ - hdbg.dassert_is_proportion(0.0, "hello %s", "world") - - def test3(self) -> None: - """ - Passing assertion with correct message and format. - """ - hdbg.dassert_is_proportion(1.0, "hello %s", "world") - - def test_assert1(self) -> None: - """ - Failing assertion with correct message and format. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_proportion(1.01, "hello %s", "world") - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 1.01 <= 1 - hello world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert2(self) -> None: - """ - Failing assertion with correct message. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_proportion(1.01, "hello world") - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 1.01 <= 1 - hello world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert3(self) -> None: - """ - Failing assertion with incorrect message formatting. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_proportion(1.01, "hello", "world") - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 1.01 <= 1 - Caught assertion while formatting message: - 'not all arguments converted during string formatting' - hello world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert4(self) -> None: - """ - Failing assertion with incorrect message formatting. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_proportion(1.01, "hello %s %s", "world") - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 1.01 <= 1 - Caught assertion while formatting message: - 'not enough arguments for format string' - hello %s %s world - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_container_type1 -# ############################################################################# - - -class Test_dassert_container_type1(hunitest.TestCase): - def test1(self) -> None: - list_ = "a b c".split() - hdbg.dassert_container_type(list_, List, str) - - def test_assert1(self) -> None: - """ - Check that assertion fails since a list is not a tuple. - """ - list_ = "a b c".split() - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_container_type(list_, Tuple, str) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '['a', 'b', 'c']' is '' instead of 'typing.Tuple' - obj='['a', 'b', 'c']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert2(self) -> None: - """ - Check that assertion fails since a list contains strings and ints. - """ - list_ = ["a", 2, "c", "d"] - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_container_type(list_, list, str) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '2' is '' instead of '' - obj='['a', 2, 'c', 'd']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert3(self) -> None: - """ - Like `test_assert3()` but with a message. - """ - list_ = ["a", 2, "c", "d"] - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_container_type( - list_, list, str, "list_ is %s homogeneous", "not" - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '2' is '' instead of '' - list_ is not homogeneous - obj='['a', 2, 'c', 'd']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# _Animal -# ############################################################################# - - -class _Animal: - pass - - -# ############################################################################# -# _Man -# ############################################################################# - - -class _Man(_Animal): - pass - - -# ############################################################################# -# _Vegetable -# ############################################################################# - - -class _Vegetable: - pass - - -# ############################################################################# -# Test_dassert_issubclass1 -# ############################################################################# - - -class Test_dassert_issubclass1(hunitest.TestCase): - def test_man1(self) -> None: - """ - An instance of `_Man` descends from `_Animal`. - """ - man = _Man() - hdbg.dassert_issubclass(man, _Man) - - def test_man2(self) -> None: - """ - An instance of `_Man` descends from object. - """ - man = _Man() - hdbg.dassert_issubclass(man, object) - - def test_man_fail1(self) -> None: - """ - An instance of `_Man` doesn't descends from `_Vegetable`. - """ - man = _Man() - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_issubclass(man, _Vegetable) - # We need to purify from object references. - self.check_string(str(cm.exception), purify_text=True) - - def test_man_fail2(self) -> None: - """ - An instance of `_Man` doesn't descends from `int`. - """ - man = _Man() - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_issubclass(man, int) - self.check_string(str(cm.exception), purify_text=True) - - def test1(self) -> None: - """ - In Python everything is an object. - """ - hdbg.dassert_issubclass(5, object) - hdbg.dassert_issubclass(int, object) - hdbg.dassert_issubclass(int, (object, int)) - - def test_fail1(self) -> None: - """ - `issubclass` only accepts classes and not instances as second argument. - """ - with self.assertRaises(Exception) as cm: - hdbg.dassert_issubclass(int, 5.0) - self.check_string(str(cm.exception), purify_text=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_callable1 -# ############################################################################# - - -class Test_dassert_callable1(hunitest.TestCase): - def test1(self) -> None: - func = lambda x: x - hdbg.dassert_callable(func) - - def test_fail1(self) -> None: - func = 4 - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_callable(func) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Obj '4' of type '' is not callable - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_all_defined_or_all_None -# ############################################################################# - - -class Test_dassert_all_defined_or_all_None(hunitest.TestCase): - def test1(self) -> None: - """ - Verify that test passes when all the values are defined. - """ - vals = [1, 2, 3] - hdbg.dassert_all_defined_or_all_None(vals) - - def test2(self) -> None: - """ - Verify that assertion is raised when at least one of the values is not - defined. - """ - vals = [1, 2, None, None] - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_all_defined_or_all_None(vals) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Some values in list are defined and some are None: '[1, 2, None, None]' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Verify that test passes when all the values are not defined. - """ - vals = [None, None, None] - hdbg.dassert_all_defined_or_all_None(vals) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_related_params1 -# ############################################################################# - - -class Test_dassert_related_params1(hunitest.TestCase): - def test1(self) -> None: - obj = {"val1": 1, "val2": 1, "val3": "hello"} - mode = "all_or_none_non_null" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - - def test2(self) -> None: - obj = {"val1": 0, "val2": None, "val3": ""} - mode = "all_or_none_non_null" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - - def test3(self) -> None: - obj = {"val1": 1, "val2": 0, "val3": "hello"} - with self.assertRaises(Exception) as cm: - mode = "all_or_none_non_null" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - actual = str(cm.exception) - expected = """ - * Failed assertion * - All or none parameter should be non-null: - val2=0 - params={'val1': 1, 'val2': 0, 'val3': 'hello'} - message 'hello world' - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_related_params2 -# ############################################################################# - - -class Test_dassert_related_params2(hunitest.TestCase): - def test1(self) -> None: - obj = {"val1": 1, "val2": 1, "val3": "hello"} - mode = "all_or_none_non_None" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - - def test2(self) -> None: - obj = { - "val1": None, - "val2": None, - "val3": None, - } - mode = "all_or_none_non_None" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - - def test3(self) -> None: - obj = {"val1": None, "val2": None, "val3": "hello"} - with self.assertRaises(Exception) as cm: - mode = "all_or_none_non_None" - hdbg.dassert_related_params(obj, mode, "message %s", "'hello world'") - actual = str(cm.exception) - expected = """ - * Failed assertion * - All or none parameter should be non-None: - val1=None - params={'val1': None, 'val2': None, 'val3': 'hello'} - message 'hello world' - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_all_attributes_are_same1 -# ############################################################################# - - -class Test_dassert_all_attributes_are_same1(hunitest.TestCase): - def test1(self) -> None: - """ - Wrong type of object. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_all_attributes_are_same(5, "a") - actual = str(cm.exception) - expected = """ - * Failed assertion * - Instance of '5' is '' instead of '' - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test2(self) -> None: - """ - Wrong type of attribute. - """ - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_all_attributes_are_same([1, 2, 3], 1) - actual = str(cm.exception) - expected = """ - * Failed assertion * - Instance of '1' is '' instead of '' - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test3(self) -> None: - """ - Attribute with different values. - """ - Obj = collections.namedtuple("Obj", ["a", "b"]) - list_ = [Obj(1, 2), Obj(1, 3)] - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_all_attributes_are_same(list_, "b") - actual = str(cm.exception) - expected = """ - * Failed assertion * - Elements in the list have different values for - attribute b: - {2, 3} - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test4(self) -> None: - """ - Attribute with same values. - """ - Obj = collections.namedtuple("Obj", ["a", "b"]) - list_ = [Obj(1, 2), Obj(1, 2)] - hdbg.dassert_all_attributes_are_same(list_, "b") - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_lt -# ############################################################################# - - -class Test_dassert_lt(hunitest.TestCase): - def test1(self) -> None: - """ - Test that the function doesn't raise an exception if first value is - less than second value. - """ - val1 = 1 - val2 = 2 - hdbg.dassert_lt(val1, val2) - - def test2(self) -> None: - """ - Test that the function raises an exception if first value is equal to - second value. - """ - # Set inputs. - val1 = 2 - val2 = 2 - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_lt(val1, val2) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 2 < 2 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Test that the function raises an exception if first value is greater - than second value. - """ - # Set inputs. - val1 = 3 - val2 = 2 - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_lt(val1, val2) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 3 < 2 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Test that the function doesn't raise an exception when we pass string - inputs. - """ - val1 = "a" - val2 = "b" - hdbg.dassert_lt(val1, val2) - - def test5(self) -> None: - """ - Test that the function raises an exception where first value is greater - than second value with floats. - """ - # Set inputs. - val1 = 2.0 - val2 = 1.0 - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_lt(val1, val2) - actual = str(cm.exception) - expected = """ - * Failed assertion * - 2.0 < 1.0 - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_is_integer -# ############################################################################# - - -class Test_dassert_is_integer(hunitest.TestCase): - def test1(self) -> None: - """ - Test that the function do not raise the exception with integer values. - """ - val = 5 - hdbg.dassert_is_integer(val) - - def test2(self) -> None: - """ - Test that the function do not raise the exception with float values - that represent an integer. - """ - val = 5.0 - hdbg.dassert_is_integer(val) - - def test3(self) -> None: - """ - Test that the function raises an exception for float values that do not - represent an integer. - """ - # Set inputs. - val = 5.5 - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_integer(val) - actual = str(cm.exception) - expected = """ - * Failed assertion * - Invalid val='5.5' of type '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Test that the function raises an exception for non-integer and non- - float types. - """ - # Set inputs. - val = "5" - # Run. - with self.assertRaises(AssertionError) as cm: - hdbg.dassert_is_integer(val) - actual = str(cm.exception) - expected = """ - * Failed assertion * - Invalid val='5' of type '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py deleted file mode 100644 index b3f6d7f04..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdict.py +++ /dev/null @@ -1,107 +0,0 @@ -import logging - -import config_root.config as cconfig -import helpers.hdict as hdict -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_get_nested_dict_iterator -# ############################################################################# - - -class Test_get_nested_dict_iterator(hunitest.TestCase): - def test1(self) -> None: - """ - Test basic case with no nesting. - """ - dict_ = {"key0": "value0", "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0",), "value0"), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test2(self) -> None: - """ - Test simple nested case. - """ - dict_ = { - "key0": {"key00": "value00", "key01": "value01"}, - "key1": "value1", - } - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [ - (("key0", "key00"), "value00"), - (("key0", "key01"), "value01"), - (("key1",), "value1"), - ] - self.assertListEqual(actual_result, expected_result) - - def test3(self) -> None: - """ - Test multilevel nested case. - """ - dict_ = {"key0": {"key00": {"key000": "value000"}}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [ - (("key0", "key00", "key000"), "value000"), - (("key1",), "value1"), - ] - self.assertListEqual(actual_result, expected_result) - - def test4(self) -> None: - """ - Test flat case with `None` value. - """ - dict_ = {"key0": "value0", "key1": None} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0",), "value0"), (("key1",), None)] - self.assertListEqual(actual_result, expected_result) - - def test5(self) -> None: - """ - Test nested case with `None` value. - """ - dict_ = {"key0": {"key00": None}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0", "key00"), None), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test6(self) -> None: - """ - Test flat case with empty dict value. - """ - dict_ = {"key0": {}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0",), {}), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test7(self) -> None: - """ - Test nested case with empty dict value. - """ - dict_ = {"key0": {"key00": {}}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0", "key00"), {}), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test8(self) -> None: - """ - Test flat case with empty Config value. - """ - config = cconfig.Config() - dict_ = {"key0": config, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0",), config), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) - - def test9(self) -> None: - """ - Test nexted case with empty Config value. - """ - config = cconfig.Config() - dict_ = {"key0": {"key00": config}, "key1": "value1"} - actual_result = list(hdict.get_nested_dict_iterator(dict_)) - expected_result = [(("key0", "key00"), config), (("key1",), "value1")] - self.assertListEqual(actual_result, expected_result) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py deleted file mode 100644 index 7220d1474..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker.py +++ /dev/null @@ -1,624 +0,0 @@ -import logging -import os -import unittest.mock as umock -from typing import List, Optional, Tuple - -import helpers.hdbg as hdbg -import helpers.hdocker as hdocker -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_replace_shared_root_path1 -# ############################################################################# - - -class Test_replace_shared_root_path1(hunitest.TestCase): - def test1(self) -> None: - """ - Test replacing shared root path. - """ - # Mock `hserver.get_shared_data_dirs()` to return a dummy mapping. - mock_mapping = { - "/data/shared1": "/shared_folder1", - "/data/shared2": "/shared_folder2", - } - with umock.patch.object( - hserver, "get_shared_data_dirs", return_value=mock_mapping - ): - # Test replacing shared root path. - path1 = "/data/shared1/asset1" - act1 = hdocker.replace_shared_root_path(path1) - exp1 = "/shared_folder1/asset1" - self.assertEqual(act1, exp1) - # - path2 = "/data/shared2/asset2" - act2 = hdocker.replace_shared_root_path(path2) - exp2 = "/shared_folder2/asset2" - self.assertEqual(act2, exp2) - # - path3 = 'object("/data/shared2/asset2/item")' - act3 = hdocker.replace_shared_root_path(path3) - exp3 = 'object("/shared_folder2/asset2/item")' - self.assertEqual(act3, exp3) - - def test2(self) -> None: - """ - Test replacing shared root path with the `replace_ecs_tokyo` parameter. - """ - # Mock `hserver.get_shared_data_dirs()` to return a dummy mapping. - mock_mapping = { - "/data/shared": "/shared_folder", - } - with umock.patch.object( - hserver, "get_shared_data_dirs", return_value=mock_mapping - ): - # Test if `ecs_tokyo` is replaced if `replace_ecs_tokyo = True`. - path1 = 'object("/data/shared/ecs_tokyo/asset2/item")' - replace_ecs_tokyo = True - act1 = hdocker.replace_shared_root_path( - path1, replace_ecs_tokyo=replace_ecs_tokyo - ) - exp1 = 'object("/shared_folder/ecs/asset2/item")' - self.assertEqual(act1, exp1) - # Test if `ecs_tokyo` is not replaced if `replace_ecs_tokyo` is not - # defined. - path2 = 'object("/data/shared/ecs_tokyo/asset2/item")' - act2 = hdocker.replace_shared_root_path(path2) - exp2 = 'object("/shared_folder/ecs_tokyo/asset2/item")' - self.assertEqual(act2, exp2) - - -# ############################################################################# -# Test_convert_to_docker_path1 -# ############################################################################# - - -class Test_convert_to_docker_path1(hunitest.TestCase): - @staticmethod - def convert_caller_to_callee_docker_path( - in_file_path: str, - is_caller_host: bool, - use_sibling_container_for_callee: bool, - check_if_exists: bool, - ) -> Tuple[str, str]: - """ - Prepare inputs and call the function to convert a file name to Docker - paths. - - :return: A tuple containing - - docker_file_path: the Docker file path - - mount: the Docker mount string - """ - ( - source_host_path, - callee_mount_path, - mount, - ) = hdocker.get_docker_mount_info( - is_caller_host, use_sibling_container_for_callee - ) - docker_file_path = hdocker.convert_caller_to_callee_docker_path( - in_file_path, - source_host_path, - callee_mount_path, - check_if_exists=check_if_exists, - is_input=True, - is_caller_host=is_caller_host, - use_sibling_container_for_callee=use_sibling_container_for_callee, - ) - return docker_file_path, mount - - def helper( - self, - in_file_path: str, - is_caller_host: bool, - use_sibling_container_for_callee: bool, - check_if_exists: bool, - exp_docker_file_path: str, - exp_mount: str, - ) -> None: - """ - Test converting a file name to Docker paths. - """ - # Run test. - docker_file_path, mount = self.convert_caller_to_callee_docker_path( - in_file_path, - is_caller_host, - use_sibling_container_for_callee, - check_if_exists, - ) - # Check output. - self.assert_equal(docker_file_path, exp_docker_file_path) - self.assert_equal(mount, exp_mount) - - def test1(self) -> None: - """ - Test converting a file name to Docker paths. - """ - # - Prepare inputs. - dir_name = self.get_input_dir() - in_file_path = os.path.join(dir_name, "tmp.llm_transform.in.txt") - is_caller_host = True - use_sibling_container_for_callee = True - check_if_exists = False - # - Prepare outputs. - helpers_root_path = hgit.find_helpers_root() - exp_docker_file_path = os.path.join( - helpers_root_path, - "helpers/test/outcomes", - "Test_convert_to_docker_path1.test1/input", - "tmp.llm_transform.in.txt", - ) - exp_mount = "type=bind,source=/app,target=/app" - self.helper( - in_file_path, - is_caller_host, - use_sibling_container_for_callee, - check_if_exists, - exp_docker_file_path, - exp_mount, - ) - - def test2(self) -> None: - """ - Test converting a file name of an existing file to a Docker path. - """ - # - Prepare inputs. - dir_name = self.get_input_dir() - # Create a file. - # E.g., in_file_path='/app/helpers/test/outcomes/Test_convert_to_docker_path1.test2/input/input.md' - in_file_path = os.path.join(dir_name, "tmp.input.md") - hio.to_file(in_file_path, "empty") - _LOG.debug(hprint.to_str("in_file_path")) - is_caller_host = True - use_sibling_container_for_callee = True - check_if_exists = True - # - Prepare outputs. - helpers_root_path = hgit.find_helpers_root() - exp_docker_file_path = os.path.join( - helpers_root_path, - "helpers/test/outcomes", - "Test_convert_to_docker_path1.test2/input", - "tmp.input.md", - ) - exp_mount = "type=bind,source=/app,target=/app" - self.helper( - in_file_path, - is_caller_host, - use_sibling_container_for_callee, - check_if_exists, - exp_docker_file_path, - exp_mount, - ) - - -# ############################################################################# -# Test_is_path1 -# ############################################################################# - - -class Test_is_path1(hunitest.TestCase): - def helper(self, path: str, expected: bool) -> None: - """ - Test helper for `is_path()` function. - """ - # Run test. - actual = hdocker.is_path(path) - # Check outputs. - _LOG.debug(hprint.to_str("path actual expected")) - self.assertEqual(actual, expected) - - def test_file_with_extension(self) -> None: - """ - Test paths with file extensions. - """ - # Prepare inputs. - test_cases = [ - ("file.txt", True), - ("document.pdf", True), - ("script.py", True), - ("data.csv", True), - ("image.jpg", True), - ("config.json", True), - ("readme.md", True), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - def test_absolute_paths(self) -> None: - """ - Test absolute paths. - """ - # Prepare inputs. - test_cases = [ - ("/path/to/file.py", True), - ("/usr/bin/python", True), - ("/etc/config", True), - ("/home/user", True), - ("/", True), - ("/data/shared", True), - ] - # Check outputs. - for path, expected in test_cases: - self.helper(path, expected) - - def test_relative_paths(self) -> None: - """ - Test relative paths starting with ./ or ../. - """ - # Prepare inputs and run tests. - test_cases = [ - ("./file.txt", True), - ("../data.csv", True), - ("./folder/subfolder", True), - ("../parent/file", True), - ("./", True), - ("../", True), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - def test_trailing_slash_paths(self) -> None: - """ - Test paths ending with slash (indicating directories). - """ - # Prepare inputs and run tests. - test_cases = [ - ("folder/", True), - ("data/", True), - ("my_directory/", True), - ("nested/folder/", True), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - def test_non_path_strings(self) -> None: - """ - Test strings that should not be considered paths. - """ - # Prepare inputs and run tests. - test_cases = [ - ("readme", False), - ("hello", False), - ("command", False), - ("data", False), - ("test", False), - ("python", False), - ("docker", False), - ("", False), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - def test_edge_cases(self) -> None: - """ - Test edge cases and complex scenarios. - """ - # Prepare inputs and run tests. - test_cases = [ - # - Files with multiple extensions. - ("file.tar.gz", True), - ("backup.sql.bz2", True), - # - Hidden files. - (".hidden", True), - (".gitignore", True), - # - Complex paths. - ("./nested/folder/file.txt", True), - ("../parent/folder/", True), - ("/absolute/path/file.py", True), - # - Files without extension in paths. - # True because it contains a slash. - ("folder/README", True), - # True because starts with "./". - ("./config", True), - # True because starts with "/". - ("/usr/bin/python", True), - # - Strings that might be confused with paths. - # True because has extension. - ("folder.name", True), - # False because no extension, slash, or path prefix. - ("file-name", False), - # False because no extension, slash, or path prefix. - ("under_score", False), - ] - # Run tests. - for path, expected in test_cases: - self.helper(path, expected) - - -# ############################################################################# -# Test_convert_all_paths_from_caller_to_callee_docker_path1 -# ############################################################################# - - -class Test_convert_all_paths_from_caller_to_callee_docker_path1( - hunitest.TestCase -): - def helper( - self, - cmd_opts: List[str], - expected_str: str, - *, - is_caller_host: bool = True, - use_sibling_container_for_callee: bool = True, - create_files: Optional[List[str]] = None, - ) -> None: - """ - Helper for `convert_all_paths_from_caller_to_callee_docker_path()`. - """ - hdbg.dassert_isinstance(cmd_opts, list) - hdbg.dassert_isinstance(expected_str, str) - # Prepare inputs. - if create_files: - # Create temporary files for testing existing file paths. - for file_path in create_files: - dir_name = os.path.dirname(file_path) - if dir_name: - hio.create_dir(dir_name, incremental=True) - hio.to_file(file_path, "test content") - # Get docker mount info for the test. - ( - caller_mount_path, - callee_mount_path, - _, - ) = hdocker.get_docker_mount_info( - is_caller_host, use_sibling_container_for_callee - ) - # Run test. - actual = hdocker.convert_all_paths_from_caller_to_callee_docker_path( - cmd_opts, - caller_mount_path, - callee_mount_path, - is_caller_host, - use_sibling_container_for_callee, - ) - _LOG.debug("actual=\n%s", str(actual)) - # Check outputs. - actual_str = "\n".join(actual) - actual_str = huntepur.purify_text(actual_str) - expected_str = huntepur.purify_text(expected_str) - self.assert_equal(actual_str, expected_str, dedent=True) - - # ///////////////////////////////////////////////////////////////////////////// - - def test_mixed_options_with_paths_and_non_paths(self) -> None: - """ - Test converting mixed command options with paths and non-paths. - """ - # Prepare inputs. - cmd_opts = [ - "--verbose", - "file.txt", # Path-like (has extension) - "--output", - "./output.log", # Path-like (relative path) - "command", # Not a path - # "/absolute/path", # Path-like (absolute) - "--flag", - "folder/", # Path-like (trailing slash) - ] - expected_output = [ - "--verbose", - "/app/file.txt", # Converted - "--output", - "/app/output.log", # Converted - "command", # Not converted - # "/app/absolute/path", # Converted - "--flag", - "/app/folder", # Converted - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_existing_files_get_converted(self) -> None: - """ - Test that existing files are converted even without path-like - appearance. - """ - # Prepare inputs. - temp_dir = self.get_scratch_space() - existing_file = os.path.join(temp_dir, "testfile") - cmd_opts = [ - "--input", - existing_file, # Will exist, should be converted - "nonexistent", # Doesn't exist and not path-like, won't be converted - ] - expected_output = [ - "--input", - f"/app/{os.path.relpath(existing_file, hgit.find_git_root())}", # Converted - "nonexistent", # Not converted - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output, create_files=[existing_file]) - - def test_path_like_strings_without_existing_files(self) -> None: - """ - Test that path-like strings are converted even if files don't exist. - """ - # Prepare inputs. - cmd_opts = [ - "script.py", # Path-like (extension) but doesn't exist - "./config.json", # Path-like (relative) but doesn't exist - # "/usr/bin/tool", # Path-like (absolute) but doesn't exist - "plain_word", # Not path-like and doesn't exist - ] - expected_output = [ - "/app/script.py", # Converted (has extension) - "/app/config.json", # Converted (relative path) - # "/app/usr/bin/tool", # Converted (absolute path) - "plain_word", # Not converted - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_empty_command_options(self) -> None: - """ - Test handling of empty command options list. - """ - # Prepare inputs. - cmd_opts = [] - expected_output = [] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_only_non_path_options(self) -> None: - """ - Test command options with no paths. - """ - # Prepare inputs. - cmd_opts = [ - "--verbose", - "--debug", - "command", - "argument", - "--flag", - ] - expected_output = [ - "--verbose", - "--debug", - "command", - "argument", - "--flag", - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_only_path_options(self) -> None: - """ - Test command options with only paths. - """ - # Prepare inputs. - cmd_opts = [ - "input.txt", - "./config.yaml", - # "/var/log/app.log", - "data/", - "./output.json", - ] - expected_output = [ - "/app/input.txt", - "/app/config.yaml", - # "/app/var/log/app.log", - "/app/data", - "/app/output.json", - ] - expected_output = "\n".join(expected_output) - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_complex_paths_with_extensions(self) -> None: - """ - Test complex paths with multiple extensions and special cases. - """ - # Prepare inputs. - cmd_opts = [ - "archive.tar.gz", # Multiple extensions - ".hidden", # Hidden file - "backup.sql.bz2", # Multiple extensions - ".gitignore", # Hidden config file - ] - expected_output = """ - $GIT_ROOT/archive.tar.gz - $GIT_ROOT/.hidden - $GIT_ROOT/backup.sql.bz2 - $GIT_ROOT/.gitignore - """ - # Run test and check outputs. - self.helper(cmd_opts, expected_output) - - def test_sibling_vs_child_container_modes(self) -> None: - """ - Test different container modes (sibling vs child). - """ - # Prepare inputs. - cmd_opts = ["input.txt", "output/"] - # Test sibling container mode. - expected_output = ["/app/input.txt", "/app/output"] - expected_output = "\n".join(expected_output) - self.helper( - cmd_opts, - expected_output, - is_caller_host=True, - use_sibling_container_for_callee=True, - ) - # Test child container mode. - expected_output = ["/app/input.txt", "/app/output"] - expected_output = "\n".join(expected_output) - self.helper( - cmd_opts, - expected_output, - is_caller_host=True, - use_sibling_container_for_callee=False, - ) - - -# ############################################################################# -# Test_get_docker_mount_info1 -# ############################################################################# - - -class Test_get_docker_mount_info1(hunitest.TestCase): - def test1(self) -> None: - """ - With CSFY_ENABLE_DIND, sibling-style docker.sock must still bind the - repo root inside this container, not CSFY_HOST_GIT_ROOT_PATH. - """ - # - Prepare inputs. - git_root = hgit.find_git_root() - env = { - "CSFY_ENABLE_DIND": "1", - "CSFY_HOST_GIT_ROOT_PATH": "/path/only/on/outer/host", - } - # - Prepare outputs. - exp_target = "/app" - exp_mount = f"type=bind,source={git_root},target=/app" - # Run test. - with umock.patch.dict(os.environ, env, clear=False): - source, target, mount = hdocker.get_docker_mount_info( - is_caller_host=False, - use_sibling_container_for_callee=True, - ) - # Check outputs. - self.assert_equal(source, git_root) - self.assert_equal(target, exp_target) - self.assert_equal(mount, exp_mount) - - def test2(self) -> None: - """ - Without DinD, sibling mode uses CSFY_HOST_GIT_ROOT_PATH for bind - source. - """ - # - Prepare inputs. - host_root = "/tmp/explicit_host_git_root_for_test" - env = { - "CSFY_ENABLE_DIND": "0", - "CSFY_HOST_GIT_ROOT_PATH": host_root, - } - # - Prepare outputs. - exp_target = "/app" - exp_mount = f"type=bind,source={host_root},target=/app" - # Run test. - with umock.patch.dict(os.environ, env, clear=False): - source, target, mount = hdocker.get_docker_mount_info( - is_caller_host=False, - use_sibling_container_for_callee=True, - ) - # Check outputs. - self.assert_equal(source, host_root) - self.assert_equal(target, exp_target) - self.assert_equal(mount, exp_mount) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py deleted file mode 100644 index 203ae012e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hdocker_tests.py +++ /dev/null @@ -1,158 +0,0 @@ -""" -Unit tests for hdocker_tests.py -""" - -import logging -import os - -import helpers.hdocker_tests as hdoctest -import helpers.hio as hio -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_get_docker_test_files -# ############################################################################# - - -class Test_get_docker_test_files(hunitest.TestCase): - """ - Test the get_docker_test_files function. - """ - - def test1(self) -> None: - """ - Test finding docker test files in a directory. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - hio.to_file(os.path.join(scratch_dir, "docker_test_1.py"), "") - hio.to_file(os.path.join(scratch_dir, "docker_test_2.py"), "") - hio.to_file(os.path.join(scratch_dir, "other_file.py"), "") - # Run test. - actual = hdoctest.get_docker_test_files(scratch_dir) - # Check outputs. - self.assertEqual(len(actual), 2) - self.assertTrue(any("docker_test_1.py" in f for f in actual)) - self.assertTrue(any("docker_test_2.py" in f for f in actual)) - - def test2(self) -> None: - """ - Test with no matching files. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create non-matching files. - hio.to_file(os.path.join(scratch_dir, "test_file.py"), "") - hio.to_file(os.path.join(scratch_dir, "other_file.py"), "") - # Run test. - actual = hdoctest.get_docker_test_files(scratch_dir) - # Check outputs. - self.assertEqual(len(actual), 0) - - def test3(self) -> None: - """ - Test with single docker test file. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - hio.to_file(os.path.join(scratch_dir, "docker_test_single.py"), "") - # Run test. - actual = hdoctest.get_docker_test_files(scratch_dir) - # Check outputs. - self.assertEqual(len(actual), 1) - self.assertTrue("docker_test_single.py" in actual[0]) - - def test4(self) -> None: - """ - Test that files are returned in sorted order. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - hio.to_file(os.path.join(scratch_dir, "docker_test_z.py"), "") - hio.to_file(os.path.join(scratch_dir, "docker_test_a.py"), "") - hio.to_file(os.path.join(scratch_dir, "docker_test_m.py"), "") - # Run test. - actual = hdoctest.get_docker_test_files(scratch_dir) - # Check outputs. - self.assertEqual(len(actual), 3) - basenames = [os.path.basename(f) for f in actual] - self.assertEqual( - basenames, - ["docker_test_a.py", "docker_test_m.py", "docker_test_z.py"], - ) - - -# ############################################################################# -# Test_run_docker_cmd -# ############################################################################# - - -class Test_run_docker_cmd(hunitest.TestCase): - """ - Test the run_docker_cmd function. - """ - - def test1(self) -> None: - """ - Test that error is raised when docker_cmd.sh does not exist in - script_dir. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Run test and check output. - with self.assertRaises(AssertionError): - hdoctest.run_docker_cmd(scratch_dir) - - def test2(self) -> None: - """ - Test that error is raised when script_dir does not exist. - """ - # Prepare inputs. - nonexistent_dir = "/nonexistent_dir_that_does_not_exist" - # Run test and check output. - with self.assertRaises(AssertionError): - hdoctest.run_docker_cmd(nonexistent_dir) - - -# ############################################################################# -# Test_run_all_tests -# ############################################################################# - - -class Test_run_all_tests(hunitest.TestCase): - """ - Test the run_all_tests function. - """ - - def test1(self) -> None: - """ - Test with no docker test files returns 0. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create non-matching files. - hio.to_file(os.path.join(scratch_dir, "test_file.py"), "") - # Run test. - actual = hdoctest.run_all_tests(scratch_dir) - # Check outputs. - self.assertEqual(actual, 0) - - def test2(self) -> None: - """ - Test with docker test files when docker_cmd_script doesn't exist. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - hio.to_file(os.path.join(scratch_dir, "docker_test_1.py"), "") - nonexistent_docker_cmd = os.path.join( - scratch_dir, "nonexistent_docker_cmd.sh" - ) - # Run test and check output. - with self.assertRaises(AssertionError): - hdoctest.run_all_tests( - scratch_dir, docker_cmd_script=nonexistent_docker_cmd - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py deleted file mode 100644 index d1f229435..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_henv.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging - -import helpers.henv as henv -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_env1 -# ############################################################################# - - -class Test_env1(hunitest.TestCase): - def test_get_system_signature1(self) -> None: - txt = henv.get_system_signature() - _LOG.debug(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py deleted file mode 100644 index f50f79994..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hfile_tree.py +++ /dev/null @@ -1,347 +0,0 @@ -import logging -import pathlib - -import helpers.hfile_tree as hfiltree -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_generate_tree -# ############################################################################# - - -class Test_generate_tree(hunitest.TestCase): - def test1(self) -> None: - """ - Test generating default tree. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = False - include_python = False - only_dirs = False - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- compose", - "- docker_build", - " - create_users.sh", - " - pip_list.txt", - "- docker_run", - ] - ) - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - Test generating default tree with depth. - """ - # Prepare inputs. - path = self.devops_dir - depth = 1 - include_tests = False - include_python = False - only_dirs = False - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- compose", - "- docker_build", - "- docker_run", - ] - ) - self.assertEqual(actual, expected) - - def test3(self) -> None: - """ - Test generating tree including test files and dirs. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = True - include_python = False - only_dirs = False - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- compose", - "- docker_build", - "- docker_run", - "- test", - " - test_docker.py", - ] - ) - self.assertEqual(actual, expected) - - def test4(self) -> None: - """ - Test generating tree including python files. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = False - include_python = True - only_dirs = False - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- __init__.py", - "- compose", - "- docker_build", - "- docker_run", - " - execute.py", - "- user_credentials.py", - ] - ) - self.assertEqual(actual, expected) - - def test5(self) -> None: - """ - Test generating tree with only directories. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = False - include_python = False - only_dirs = True - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- compose", - "- docker_build", - "- docker_run", - ] - ) - self.assertEqual(actual, expected) - - def test6(self) -> None: - """ - Test generating tree including tests, python files, and only - directories. - """ - # Prepare inputs. - path = self.devops_dir - depth = 0 - include_tests = True - include_python = True - only_dirs = True - output = "" - # Call tested function. - actual = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - # Check output. - expected = "\n".join( - [ - "devops", - "- __init__.py", - "- compose", - "- docker_build", - "- docker_run", - " - execute.py", - "- test", - " - test_docker.py", - "- user_credentials.py", - ] - ) - self.assertEqual(actual, expected) - - def test7(self) -> None: - """ - Test writing tree to file. - """ - # Prepare inputs. - scratch = pathlib.Path(self.get_scratch_space()) - path = self.devops_dir - depth = 0 - include_tests = False - include_python = False - only_dirs = False - output = scratch / "TREE.md" - # Call tested function. - _ = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - actual = output.read_text(encoding="utf-8") - # Check output. - expected = ( - "\n".join( - [ - "", - "devops", - "- compose", - "- docker_build", - " - create_users.sh", - " - pip_list.txt", - "- docker_run", - "", - ] - ) - + "\n" - ) - self.assertEqual(actual, expected) - - def test8(self) -> None: - """ - Test updating tree on existing file, preserving comments. - """ - # Prepare inputs. - scratch = pathlib.Path(self.get_scratch_space()) - path = self.devops_dir - depth = 0 - include_tests = False - include_python = False - only_dirs = False - output = scratch / "TREE.md" - # Create existing file. - content = ( - "\n".join( - [ - "", - "devops", - "- compose # compose-comment", - "- docker_build", - " - pip_list.txt # pip-comment", - "", - ] - ) - + "\n" - ) - output.write_text(content, encoding="utf-8") - # Call tested function. - _ = hfiltree.generate_tree( - path=path, - depth=depth, - include_tests=include_tests, - include_python=include_python, - only_dirs=only_dirs, - output=output, - ) - actual = output.read_text(encoding="utf-8") - # Check output. - expected = ( - "\n".join( - [ - "", - "devops", - "- compose # compose-comment", - "- docker_build", - " - create_users.sh", - " - pip_list.txt # pip-comment", - "- docker_run", - "", - ] - ) - + "\n" - ) - self.assertEqual(actual, expected) - - def setUp(self) -> None: - """ - Create a `devops` directory in scratch space. - - Scratch directory layout: - ``` - devops - - __init__.py - - user_credentials.py - - compose - - docker_run - - execute.py - - docker_build - - create_users.sh - - pip_list.txt - - test - - TestDocker - - test_docker.py - ``` - """ - super().setUp() - scratch = self.get_scratch_space() - self.devops_dir = pathlib.Path(scratch) / "devops" - self.devops_dir.mkdir() - structure = { - "": ["__init__.py", "user_credentials.py"], - "compose": [], - "docker_run": ["execute.py"], - "docker_build": ["create_users.sh", "pip_list.txt"], - "test": ["TestDocker", "test_docker.py"], - } - # Create empty dirs and files. - for subdir, files in structure.items(): - folder = self.devops_dir / subdir if subdir else self.devops_dir - if subdir: - folder.mkdir() - for name in files: - (folder / name).touch() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py deleted file mode 100644 index 8a7135578..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hgit.py +++ /dev/null @@ -1,822 +0,0 @@ -import logging -import os -import tempfile -from typing import Generator, List, Optional - -import pytest - -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# Unfortunately we can't check the outcome of some of these functions since we -# don't know in which dir we are running. Thus we just test that the function -# completes and visually inspect the outcome, if possible. - - -# ############################################################################# -# Test_git_submodule1 -# ############################################################################# - - -class Test_git_submodule1(hunitest.TestCase): - def test_get_client_root1(self) -> None: - actual = hgit.get_client_root(super_module=True) - _LOG.debug("actual=%s", actual) - - def test_get_client_root2(self) -> None: - actual = hgit.get_client_root(super_module=False) - _LOG.debug("actual=%s", actual) - - def test_get_project_dirname1(self) -> None: - actual = hgit.get_project_dirname() - _LOG.debug("actual=%s", actual) - - def test_get_branch_name1(self) -> None: - actual = hgit.get_branch_name() - _LOG.debug("actual=%s", actual) - - def test_is_inside_submodule1(self) -> None: - actual = hgit.is_inside_submodule() - _LOG.debug("actual=%s", actual) - - # Outside CK infra, the following call hangs, so we skip it. - # TODO(gp): I don't see why it requires our infra. - @pytest.mark.requires_ck_infra - def test_is_amp(self) -> None: - actual = hgit.is_amp() - _LOG.debug("actual=%s", actual) - - def test_get_path_from_supermodule1(self) -> None: - actual = hgit.get_path_from_supermodule() - _LOG.debug("actual=%s", actual) - - def test_get_submodule_paths1(self) -> None: - actual = hgit.get_submodule_paths() - _LOG.debug("actual=%s", actual) - - -# ############################################################################# -# Test_git_submodule2 -# ############################################################################# - - -class Test_git_submodule2(hunitest.TestCase): - # def test_get_submodule_hash1(self) -> None: - # dir_name = "amp" - # _ = hgit._get_submodule_hash(dir_name) - - def test_get_remote_head_hash1(self) -> None: - dir_name = "." - actual = hgit.get_head_hash(dir_name) - _LOG.debug("actual=%s", actual) - - # def test_report_submodule_status1(self) -> None: - # dir_names = ["."] - # short_hash = True - # _ = hgit.report_submodule_status(dir_names, short_hash) - - def test_get_head_hash1(self) -> None: - dir_name = "." - actual = hgit.get_head_hash(dir_name) - _LOG.debug("actual=%s", actual) - - def _helper_group_hashes( - self, - head_hash: str, - remh_hash: str, - subm_hash: Optional[str], - expected: str, - ) -> None: - actual = hgit._group_hashes(head_hash, remh_hash, subm_hash) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_group_hashes1(self) -> None: - head_hash = "a2bfc704" - remh_hash = "a2bfc704" - subm_hash = None - expected = "head_hash = remh_hash = a2bfc704" - # - self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) - - def test_group_hashes2(self) -> None: - head_hash = "22996772" - remh_hash = "92167662" - subm_hash = "92167662" - expected = """ - head_hash = 22996772 - remh_hash = subm_hash = 92167662 - """ - # - self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) - - def test_group_hashes3(self) -> None: - head_hash = "7ea03eb6" - remh_hash = "7ea03eb6" - subm_hash = "7ea03eb6" - expected = "head_hash = remh_hash = subm_hash = 7ea03eb6" - # - self._helper_group_hashes(head_hash, remh_hash, subm_hash, expected) - - -# ############################################################################# -# Test_git_repo_name1 -# ############################################################################# - - -class Test_git_repo_name1(hunitest.TestCase): - def test_parse_github_repo_name1(self) -> None: - repo_name = "git@github.com:alphamatic/amp" - host_name, repo_name = hgit._parse_github_repo_name(repo_name) - self.assert_equal(host_name, "github.com") - self.assert_equal(repo_name, "alphamatic/amp") - - def test_parse_github_repo_name2(self) -> None: - repo_name = "https://github.com/alphamatic/amp" - hgit._parse_github_repo_name(repo_name) - host_name, repo_name = hgit._parse_github_repo_name(repo_name) - self.assert_equal(host_name, "github.com") - self.assert_equal(repo_name, "alphamatic/amp") - - def test_parse_github_repo_name3(self) -> None: - repo_name = "git@github.fake.com:alphamatic/amp" - host_name, repo_name = hgit._parse_github_repo_name(repo_name) - self.assert_equal(host_name, "github.fake.com") - self.assert_equal(repo_name, "alphamatic/amp") - - def test_parse_github_repo_name4(self) -> None: - repo_name = "https://github.fake.com/alphamatic/amp" - host_name, repo_name = hgit._parse_github_repo_name(repo_name) - self.assert_equal(host_name, "github.fake.com") - self.assert_equal(repo_name, "alphamatic/amp") - - def test_get_repo_full_name_from_dirname1(self) -> None: - actual = hgit.get_repo_full_name_from_dirname( - dir_name=".", include_host_name=False - ) - _LOG.debug("actual=%s", actual) - - def test_get_repo_full_name_from_dirname2(self) -> None: - actual = hgit.get_repo_full_name_from_dirname( - dir_name=".", include_host_name=True - ) - _LOG.debug("actual=%s", actual) - - def test_get_repo_full_name_from_client1(self) -> None: - actual = hgit.get_repo_full_name_from_client(super_module=True) - _LOG.debug("actual=%s", actual) - - def test_get_repo_full_name_from_client2(self) -> None: - actual = hgit.get_repo_full_name_from_client(super_module=False) - _LOG.debug("actual=%s", actual) - - -# ############################################################################# -# Test_git_path1 -# ############################################################################# - - -# Outside CK infra, the following class hangs, so we skip it. -@pytest.mark.requires_ck_infra -class Test_git_path1(hunitest.TestCase): - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", - ) - def test_get_path_from_git_root1(self) -> None: - file_name = "/app/helpers/test/test_hgit.py" - actual = hgit.get_path_from_git_root(file_name, super_module=True) - _LOG.debug("get_path_from_git_root()=%s", actual) - # Check. - expected = "helpers/test/test_hgit.py" - self.assert_equal(actual, expected) - - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), - reason="Run only in amp as sub-module", - ) - def test_get_path_from_git_root2(self) -> None: - file_name = "/app/amp/helpers/test/test_hgit.py" - actual = hgit.get_path_from_git_root(file_name, super_module=True) - _LOG.debug("get_path_from_git_root()=%s", actual) - # Check. - expected = "amp/helpers/test/test_hgit.py" - self.assert_equal(actual, expected) - - def test_get_path_from_git_root3(self) -> None: - file_name = "/app/amp/helpers/test/test_hgit.py" - git_root = "/app" - actual = hgit.get_path_from_git_root( - file_name, super_module=False, git_root=git_root - ) - # Check. - expected = "amp/helpers/test/test_hgit.py" - self.assert_equal(actual, expected) - - def test_get_path_from_git_root4(self) -> None: - file_name = "/app/amp/helpers/test/test_hgit.py" - git_root = "/app/amp" - actual = hgit.get_path_from_git_root( - file_name, super_module=False, git_root=git_root - ) - # Check. - expected = "helpers/test/test_hgit.py" - self.assert_equal(actual, expected) - - def test_get_path_from_git_root5(self) -> None: - file_name = "helpers/test/test_hgit.py" - git_root = "/app/amp" - with self.assertRaises(ValueError): - hgit.get_path_from_git_root( - file_name, super_module=False, git_root=git_root - ) - - -# ############################################################################# -# Test_git_modified_files1 -# ############################################################################# - - -# Outside CK infra, the following class hangs, so we skip it. -@pytest.mark.requires_ck_infra -@pytest.mark.slow(reason="Around 7s") -@pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", -) -class Test_git_modified_files1(hunitest.TestCase): - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - - def set_up_test(self) -> None: - """ - All these tests need a reference to Git master branch. - """ - hgit.fetch_origin_master_if_needed() - - def test_get_modified_files1(self) -> None: - actual = hgit.get_modified_files() - _LOG.debug("actual=%s", actual) - - def test_get_previous_committed_files1(self) -> None: - actual = hgit.get_previous_committed_files() - _LOG.debug("actual=%s", actual) - - def test_get_modified_files_in_branch1(self) -> None: - actual = hgit.get_modified_files_in_branch("master") - _LOG.debug("actual=%s", actual) - - def test_get_summary_files_in_branch1(self) -> None: - actual = hgit.get_summary_files_in_branch("master") - _LOG.debug("actual=%s", actual) - - def test_git_log1(self) -> None: - actual = hgit.git_log() - _LOG.debug("actual=%s", actual) - - -# ############################################################################# - - -# ############################################################################# -# Test_find_docker_file1 -# ############################################################################# - - -# Outside CK infra, the following class hangs, so we skip it. -@pytest.mark.requires_ck_infra -class Test_find_docker_file1(hunitest.TestCase): - def test1(self) -> None: - """ - Test for a file `amp/helpers/test/test_hgit.py` that is not from Docker - (i.e., it doesn't start with `/app`) and exists in the repo. - """ - amp_dir = hgit.get_amp_abs_path() - # Use this file since `find_docker_file()` needs to do a `find` in the - # repo, and we need to have a fixed file structure. - file_name = hgit.find_file_in_git_tree("test_hgit.py") - actual = hgit.find_docker_file( - file_name, - root_dir=amp_dir, - ) - expected = ["helpers/test/test_hgit.py"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test2(self) -> None: - """ - Test for a file `/app/amp/helpers/test/test_hgit.py` that is from - Docker (i.e., it starts with `/app`) and exists in the repo. - """ - amp_dir = hgit.get_amp_abs_path() - # Use this file since `find_docker_file()` needs to do a `find` in the - # repo, and we need to have a fixed file structure. - file_name = hgit.find_file_in_git_tree("test_hgit.py") - expected = ["helpers/test/test_hgit.py"] - actual = hgit.find_docker_file( - file_name, - root_dir=amp_dir, - ) - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test3(self) -> None: - """ - Test for a file `/venv/lib/python3.8/site-packages/invoke/tasks.py` - that is from Docker (e.g., it starts with `/app`), but doesn't exist in - the repo. - """ - file_name = "/venv/lib/python3.8/site-packages/invoke/tasks.py" - actual = hgit.find_docker_file(file_name) - expected: List[str] = [] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test4(self) -> None: - """ - Test for a file `./core/dataflow/utils.py` that is from Docker (i.e., - it starts with `/app`), but has multiple copies in the repo. - """ - amp_dir = hgit.get_amp_abs_path() - file_name = "/app/amp/core/dataflow/utils.py" - dir_depth = 1 - candidate_files = [ - "core/dataflow/utils.py", - "core/foo/utils.py", - "core/bar/utils.py", - ] - candidate_files = [os.path.join(amp_dir, f) for f in candidate_files] - actual = hgit.find_docker_file( - file_name, - root_dir=amp_dir, - dir_depth=dir_depth, - candidate_files=candidate_files, - ) - # Only one candidate file matches basename and one dirname. - expected = ["core/dataflow/utils.py"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test5(self) -> None: - amp_dir = hgit.get_amp_abs_path() - file_name = "/app/amp/core/dataflow/utils.py" - dir_depth = -1 - candidate_files = [ - "core/dataflow/utils.py", - "bar/dataflow/utils.py", - "core/foo/utils.py", - ] - candidate_files = [os.path.join(amp_dir, f) for f in candidate_files] - actual = hgit.find_docker_file( - file_name, - root_dir=amp_dir, - dir_depth=dir_depth, - candidate_files=candidate_files, - ) - # Only one file matches `utils.py` using all the 3 dir levels. - expected = ["core/dataflow/utils.py"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_extract_gh_issue_number_from_branch -# ############################################################################# - - -class Test_extract_gh_issue_number_from_branch(hunitest.TestCase): - def test_extract_gh_issue_number_from_branch1(self) -> None: - """ - Tests extraction from a branch name with a specific format. - """ - branch_name = "CmampTask10725_Add_more_tabs_to_orange_tmux" - actual = hgit.extract_gh_issue_number_from_branch(branch_name) - expected = "10725" - self.assert_equal(str(actual), expected) - - def test_extract_gh_issue_number_from_branch2(self) -> None: - """ - Tests extraction from another branch name format. - """ - branch_name = "HelpersTask23_Add_more_tabs_to_orange_tmux" - actual = hgit.extract_gh_issue_number_from_branch(branch_name) - expected = "23" - self.assert_equal(str(actual), expected) - - def test_extract_gh_issue_number_from_branch3(self) -> None: - """ - Tests extraction from a short branch name format. - """ - branch_name = "CmTask3434" - actual = hgit.extract_gh_issue_number_from_branch(branch_name) - expected = "3434" - self.assert_equal(str(actual), expected) - - def test_extract_gh_issue_number_from_branch4(self) -> None: - """ - Tests behavior when no issue number is present in the branch name. - """ - branch_name = "NoTaskNumberHere" - actual = hgit.extract_gh_issue_number_from_branch(branch_name) - expected = "None" - self.assert_equal(str(actual), expected) - - -# ############################################################################# -# Test_find_git_root1 -# ############################################################################# - - -class Test_find_git_root1(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is a super repo (e.g. //orange) - - the repo contains another super repo (e.g. //amp) as submodule (first level) - - the first level submodule contains another submodule (e.g. //helpers) (second level) - - Directory structure: - orange/ - |-- .git/ - `-- amp/ - |-- .git (points to ../.git/modules/amp) - |-- ck.infra/ - `-- helpers_root/ - `-- .git (points to ../../.git/modules/amp/modules/helpers_root) - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create `orange` repo. - self.repo_dir = os.path.join(temp_dir, "orange") - hio.create_dir(self.repo_dir, incremental=False) - self.git_dir = os.path.join(self.repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create `amp` submodule under `orange`. - self.submodule_dir = os.path.join(self.repo_dir, "amp") - hio.create_dir(self.submodule_dir, incremental=False) - submodule_git_file = os.path.join(self.submodule_dir, ".git") - txt = "gitdir: ../.git/modules/amp" - hio.to_file(submodule_git_file, txt) - submodule_git_file_dir = os.path.join( - self.repo_dir, ".git", "modules", "amp" - ) - hio.create_dir(submodule_git_file_dir, incremental=False) - # Create `helpers_root` submodule under `amp`. - self.subsubmodule_dir = os.path.join(self.submodule_dir, "helpers_root") - hio.create_dir(self.subsubmodule_dir, incremental=False) - subsubmodule_git_file = os.path.join(self.subsubmodule_dir, ".git") - txt = "gitdir: ../../.git/modules/amp/modules/helpers_root" - hio.to_file(subsubmodule_git_file, txt) - subsubmodule_git_file_dir = os.path.join( - self.repo_dir, ".git", "modules", "amp", "modules", "helpers_root" - ) - hio.create_dir(subsubmodule_git_file_dir, incremental=False) - # Create `ck.infra` runnable dir under `amp`. - self.runnable_dir = os.path.join(self.submodule_dir, "ck.infra") - hio.create_dir(self.runnable_dir, incremental=False) - - def test1(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in the super repo (e.g. //orange) - """ - self.set_up_test() - with hsystem.cd(self.repo_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test2(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in first level submodule (e.g. //amp) - """ - self.set_up_test() - with hsystem.cd(self.submodule_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test3(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in second level submodule (e.g. //helpers) - """ - self.set_up_test() - with hsystem.cd(self.subsubmodule_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test4(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in a runnable dir (e.g. ck.infra) under the - first level submodule (e.g. //amp) - """ - self.set_up_test() - with hsystem.cd(self.runnable_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - -# ############################################################################# -# Test_find_git_root2 -# ############################################################################# - - -class Test_find_git_root2(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is a super repo (e.g. //cmamp) - - the repo contains //helpers as submodule - - Directory structure: - cmamp/ - |-- .git/ - |-- ck.infra/ - `-- helpers_root/ - `-- .git (points to ../.git/modules/helpers_root) - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create `cmamp` repo. - self.repo_dir = os.path.join(temp_dir, "cmamp") - hio.create_dir(self.repo_dir, incremental=False) - self.git_dir = os.path.join(self.repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create `helpers_root` submodule under `cmamp`. - self.submodule_dir = os.path.join(self.repo_dir, "helpers_root") - hio.create_dir(self.submodule_dir, incremental=False) - submodule_git_file = os.path.join(self.submodule_dir, ".git") - txt = "gitdir: ../.git/modules/helpers_root" - hio.to_file(submodule_git_file, txt) - submodule_git_file_dir = os.path.join( - self.repo_dir, ".git", "modules", "helpers_root" - ) - hio.create_dir(submodule_git_file_dir, incremental=False) - # Create `ck.infra` runnable dir under `cmamp`. - self.runnable_dir = os.path.join(self.repo_dir, "ck.infra") - hio.create_dir(self.runnable_dir, incremental=False) - - def test1(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in the super repo (e.g. //cmamp) - """ - self.set_up_test() - with hsystem.cd(self.repo_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test2(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is the submodule (e.g. //helpers) - """ - self.set_up_test() - with hsystem.cd(self.submodule_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test3(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in a runnable dir (e.g. ck.infra) - """ - self.set_up_test() - with hsystem.cd(self.runnable_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - -# ############################################################################# -# Test_find_git_root3 -# ############################################################################# - - -class Test_find_git_root3(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is //helpers - - Directory structure: - helpers/ - |-- .git/ - `-- arbitrary1/ - `-- arbitrary1a/ - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create `helpers` repo. - self.repo_dir = os.path.join(temp_dir, "helpers") - hio.create_dir(self.repo_dir, incremental=False) - self.git_dir = os.path.join(self.repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create arbitrary directory under `helpers`. - self.arbitrary_dir = os.path.join( - self.repo_dir, "arbitrary1", "arbitrary1a" - ) - hio.create_dir(self.arbitrary_dir, incremental=False) - - def test1(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is the root of repo - """ - self.set_up_test() - with hsystem.cd(self.repo_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - def test2(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is in an arbitrary directory under the repo - """ - self.set_up_test() - with hsystem.cd(self.arbitrary_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - -# ############################################################################# -# Test_find_git_root4 -# ############################################################################# - - -class Test_find_git_root4(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is a linked repo - - Directory structure: - repo/ - `-- .git/ - linked_repo/ - `-- .git (points to /repo/.git) - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create repo. - self.repo_dir = os.path.join(temp_dir, "repo") - hio.create_dir(self.repo_dir, incremental=False) - self.git_dir = os.path.join(self.repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create linked repo. - self.linked_repo_dir = os.path.join(temp_dir, "linked_repo") - hio.create_dir(self.linked_repo_dir, incremental=False) - # Create pointer from linked repo to the actual repo. - linked_git_file = os.path.join(self.linked_repo_dir, ".git") - txt = f"gitdir: {self.git_dir}\n" - hio.to_file(linked_git_file, txt) - - def test1(self) -> None: - """ - Check that the function returns the correct git root if - - the caller is the linked repo - """ - self.set_up_test() - with hsystem.cd(self.linked_repo_dir): - git_root = hgit.find_git_root(".") - self.assert_equal(git_root, self.repo_dir) - - -# ############################################################################# -# Test_find_git_root5 -# ############################################################################# - - -class Test_find_git_root5(hunitest.TestCase): - """ - Check that the error is raised when no .git directory is found. - - Directory structure: - arbitrary_dir/ - broken_repo/ - `-- .git (points to /nonexistent/path/to/gitdir) - """ - - @pytest.fixture(autouse=True) - def setup_teardown_test(self): - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test(self) -> None: - # `self.get_scratch_space()` does not work in the case as it creates - # a temp directory within the repo where `.git` exists by default - # (e.g. /app/helpers/test/outcomes/Test_find_git_root5.test1/tmp.scratch) - # This preventing the exception from being raised. - # We need a structure without `.git` for this test. - self.temp_dir = tempfile.TemporaryDirectory() - # Create arbitrary directory that is not a git repo. - self.arbitrary_dir = os.path.join(self.temp_dir.name, "arbitrary_dir") - hio.create_dir(self.arbitrary_dir, incremental=False) - # Create arbitrary directory that is a submodule or linked repo that - # point to non existing super repo. - self.repo_dir = os.path.join(self.temp_dir.name, "broken_repo") - hio.create_dir(self.repo_dir, incremental=False) - # Create an invalid `.git` file with a non-existent `gitdir`. - invalid_git_file = os.path.join(self.repo_dir, ".git") - txt = "gitdir: /nonexistent/path/to/gitdir" - hio.to_file(invalid_git_file, txt) - - def tear_down_test(self) -> None: - self.temp_dir.cleanup() - - def test1(self) -> None: - """ - Check that the error is raised when the caller is in a directory that - is not either a git repo or a submodule. - """ - with ( - hsystem.cd(self.arbitrary_dir), - self.assertRaises(AssertionError) as cm, - ): - _ = hgit.find_git_root(".") - actual = str(cm.exception) - expected = """ - * Failed assertion * - '/' - != - '/' - No .git directory or file found in any parent directory. - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test2(self) -> None: - """ - Check that the error is raised when the caller is in a submodule or - linked repo that points to non existing super repo. - """ - with hsystem.cd(self.repo_dir), self.assertRaises(AssertionError) as cm: - _ = hgit.find_git_root(".") - actual = str(cm.exception) - expected = """ - * Failed assertion * - '/' - != - '/' - Top-level .git directory not found. - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - -# ############################################################################# -# Test_find_git_root6 -# ############################################################################# - - -class Test_find_git_root6(hunitest.TestCase): - """ - Check that the function returns the correct git root if: - - the repo is a worktree - - Directory structure: - main_repo/ - `-- .git/ - |-- config - `-- worktrees/ - `-- csfy2/ - |-- HEAD - `-- config - csfy2/ (worktree) - `-- .git (points to /main_repo/.git/worktrees/csfy2) - """ - - def set_up_test(self) -> None: - temp_dir = self.get_scratch_space() - # Create main repo with a .git directory. - self.main_repo_dir = os.path.join(temp_dir, "main_repo") - hio.create_dir(self.main_repo_dir, incremental=False) - self.git_dir = os.path.join(self.main_repo_dir, ".git") - hio.create_dir(self.git_dir, incremental=False) - # Create worktree git metadata directory. - self.worktree_git_dir = os.path.join(self.git_dir, "worktrees", "csfy2") - hio.create_dir(self.worktree_git_dir, incremental=False) - # Create worktree directory. - self.worktree_dir = os.path.join(temp_dir, "csfy2") - hio.create_dir(self.worktree_dir, incremental=False) - # Create pointer from worktree to the git directory. - worktree_git_file = os.path.join(self.worktree_dir, ".git") - txt = f"gitdir: {self.worktree_git_dir}\n" - hio.to_file(worktree_git_file, txt) - - def test1(self) -> None: - """ - Check that the function returns the worktree root when called from a worktree. - """ - self.set_up_test() - with hsystem.cd(self.worktree_dir): - git_root = hgit.find_git_root(".") - # For worktrees, the function should return the worktree root, - # not the main repository root. - self.assert_equal(git_root, self.worktree_dir) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py deleted file mode 100644 index 2e4a97ca4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hintrospection.py +++ /dev/null @@ -1,406 +0,0 @@ -import logging -import os -import re -from typing import Any, Callable - -import helpers.hdbg as hdbg -import helpers.hintrospection as hintros -import helpers.hpickle as hpickle -import helpers.hstring as hstring -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_is_pickleable -# ############################################################################# - - -def hello() -> bool: - return False - - -# ############################################################################# -# _ClassPickleable -# ############################################################################# - - -class _ClassPickleable: - """ - Class with pickleable param values. - """ - - def __init__(self) -> None: - self._arg1 = 1 - self._arg2 = ["2", 3] - - @staticmethod - def say2(self) -> None: - print("Hello") - - def say(self) -> None: - print("Hello") - - -# ############################################################################# -# _ClassNonPickleable -# ############################################################################# - - -class _ClassNonPickleable: - """ - Class with non-pickleable param values. - """ - - def __init__(self) -> None: - self._arg1 = lambda x: x - self._arg2 = 2 - - -# ############################################################################# -# Test_is_pickleable1 -# ############################################################################# - - -class Test_is_pickleable1(hunitest.TestCase): - def helper( - self, - obj: Any, - exp_str: str, - exp_bound: bool, - exp_lambda: bool, - exp_pickled: bool, - ) -> None: - _LOG.debug("obj=%s", obj) - # - act_str = str(obj) - _LOG.debug("act_str=%s", act_str) - _LOG.debug("exp_str=%s", exp_str) - self.assert_equal(act_str, exp_str, purify_text=True) - # - act_bound = hintros.is_bound_to_object(obj) - _LOG.debug("act_bound=%s", act_bound) - _LOG.debug("exp_bound=%s", exp_bound) - self.assertEqual(act_bound, exp_bound) - # - act_lambda = hintros.is_lambda_function(obj) - _LOG.debug("act_lambda=%s", act_lambda) - _LOG.debug("exp_lambda=%s", exp_lambda) - self.assertEqual(act_lambda, exp_lambda) - # Try to pickle. - try: - file_name = os.path.join(self.get_scratch_space(), "obj.pkl") - hpickle.to_pickle(obj, file_name) - act_pickled = True - except AttributeError as e: - _LOG.error("e=%s", e) - act_pickled = False - _LOG.debug("act_pickled=%s", act_pickled) - _LOG.debug("exp_pickled=%s", exp_pickled) - self.assertEqual(act_pickled, exp_pickled) - - def test_lambda1(self) -> None: - # Local lambda. - lambda_ = lambda: 0 - func = lambda_ - exp_str = r". at 0x>" - # A lambda is not bound to an object. - exp_bound = False - exp_lambda = True - # A lambda is not pickleable. - exp_pickled = False - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_lambda2(self) -> None: - lambda_ = lambda x: x - func = lambda_ - exp_str = r". at 0x>" - # A lambda is not bound to an object. - exp_bound = False - exp_lambda = True - # A lambda is not pickleable. - exp_pickled = False - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_func1(self) -> None: - def _hello() -> bool: - return False - - # - func = _hello - exp_str = ( - r"._hello at 0x>" - ) - exp_bound = False - exp_lambda = False - # A local object is not pickleable. - exp_pickled = False - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_func2(self) -> None: - # Global function. - func = hello - exp_str = r"" - exp_bound = False - exp_lambda = False - # A global function is pickleable since it's not bound locally or - # to an object. - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_method1(self) -> None: - # A class method but unbound to an object. - func = _ClassPickleable.say - exp_str = r"" - exp_bound = False - exp_lambda = False - # A unbound class method is actually pickleable. - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_method2(self) -> None: - # A static class method. - func = _ClassPickleable.say2 - exp_str = r"" - exp_bound = False - exp_lambda = False - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_method3(self) -> None: - # A bound method. - class_instance = _ClassPickleable() - func = class_instance.say - exp_str = r">" - exp_bound = True - exp_lambda = False - # A method bound to an object is just a function, so it's pickleable. - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - def test_method4(self) -> None: - # A static class method. - class_instance = _ClassPickleable() - func = class_instance.say2 - exp_str = r"" - exp_bound = False - exp_lambda = False - exp_pickled = True - self.helper(func, exp_str, exp_bound, exp_lambda, exp_pickled) - - -# ############################################################################# -# Test_is_pickleable2 -# ############################################################################# - - -class Test_is_pickleable2(hunitest.TestCase): - def helper( - self, - obj: Any, - mode: str, - expected: bool, - ) -> None: - """ - Check that picklebility is detected correctly for specified mode. - """ - _LOG.debug("obj=%s", obj) - actual = hintros.is_pickleable(obj, mode=mode) - _LOG.debug("actual=%s", actual) - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - - def test_non_callable1(self) -> None: - obj = [1, "2", 0.3] - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_non_callable2(self) -> None: - obj = [1, "2", 0.3] - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_lambda1(self) -> None: - obj = lambda x: x - mode = "type_search" - expected = False - self.helper(obj, mode, expected) - - def test_lambda2(self) -> None: - obj = lambda x: x - mode = "try_and_catch" - expected = False - self.helper(obj, mode, expected) - - def test_local_object1(self) -> None: - def _hello() -> bool: - return False - - obj = _hello - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_local_object2(self) -> None: - def _hello() -> bool: - return False - - obj = _hello - mode = "try_and_catch" - expected = False - self.helper(obj, mode, expected) - - def test_global_object1(self) -> None: - obj = hello - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_global_object2(self) -> None: - obj = hello - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_unbound_class_method1(self) -> None: - obj = _ClassPickleable.say - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_unbound_class_method2(self) -> None: - obj = _ClassPickleable.say - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_static_class_method1(self) -> None: - obj = _ClassPickleable.say - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_static_class_method2(self) -> None: - obj = _ClassPickleable.say - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_bound_to_object_method1(self) -> None: - class_instance = _ClassPickleable() - obj = class_instance.say - mode = "type_search" - expected = False - self.helper(obj, mode, expected) - - def test_bound_to_object_method2(self) -> None: - class_instance = _ClassPickleable() - obj = class_instance.say - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_pickleable_class1(self) -> None: - obj = _ClassPickleable() - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_pickleable_class2(self) -> None: - obj = _ClassPickleable() - mode = "try_and_catch" - expected = True - self.helper(obj, mode, expected) - - def test_nonpickleable_class1(self) -> None: - obj = _ClassNonPickleable() - mode = "type_search" - expected = True - self.helper(obj, mode, expected) - - def test_nonpickleable_class2(self) -> None: - obj = _ClassNonPickleable() - mode = "try_and_catch" - expected = False - self.helper(obj, mode, expected) - - -# ############################################################################# -# Test_get_function_name1 -# ############################################################################# - - -def test_function() -> None: - pass - - -# ############################################################################# -# Test_get_function_name1 -# ############################################################################# - - -class Test_get_function_name1(hunitest.TestCase): - def test1(self) -> None: - actual = hintros.get_function_name() - expected = "test1" - self.assert_equal(actual, expected, purify_text=True) - - -# ############################################################################# -# Test_get_name_from_function1 -# ############################################################################# - - -class Test_get_name_from_function1(hunitest.TestCase): - def test1(self) -> None: - actual = hintros.get_name_from_function(test_function) - actual = hstring.remove_prefix(actual, "amp.", assert_on_error=False) - expected = "helpers.test.test_hintrospection.test_function" - self.assert_equal(actual, expected, purify_text=True) - - -# ############################################################################# -# Test_get_function_from_string1 -# ############################################################################# - - -def dummy_function() -> None: - pass - - -# ############################################################################# -# Test_get_function_from_string1 -# ############################################################################# - - -class Test_get_function_from_string1(hunitest.TestCase): - def test1(self) -> None: - """ - Test that function is correctly extracted from a string. - """ - func_str = "helpers.test.test_hintrospection.dummy_function" - # Compute the actual value. - act_func = hintros.get_function_from_string(func_str) - actual = hintros.get_name_from_function(act_func) - actual = hstring.remove_prefix(actual, "amp.", assert_on_error=False) - # Compute the expected value. - exp_func = dummy_function - expected = hintros.get_name_from_function(exp_func) - expected = hstring.remove_prefix(expected, "amp.", assert_on_error=False) - # Run. - hdbg.dassert_isinstance(act_func, Callable) - # The function can have different names depending on whether `helpers` - # is a sub-repo or a super-repo: - # helpers.test.test_hintrospection.dummy_function - # helpers_root.helpers.test.test_hintrospection.dummy_function - # - actual = re.sub( - r"helpers_root\.helpers\.", "helpers.", actual, flags=re.MULTILINE - ) - expected = re.sub( - r"helpers_root\.helpers\.", "helpers.", expected, flags=re.MULTILINE - ) - self.assert_equal(actual, expected, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py deleted file mode 100644 index cbf1f16f3..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hio.py +++ /dev/null @@ -1,225 +0,0 @@ -import logging -import os - -import numpy as np -import pandas as pd - -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_rename_file_if_exists -# ############################################################################# - - -class Test_rename_file_if_exists(hunitest.TestCase): - """ - Test that the function renames existing files correctly. - """ - - def check_file( - self, - file_to_rename: str, - before_extension: bool, - expected_file_name: str, - ) -> None: - """ - Check that file is renamed correctly. - """ - # Create a target file to rename. - scratch_dir = self.get_scratch_space() - file_name = "test_file.txt" - file_path = os.path.join(scratch_dir, file_name) - lines = "" - hio.to_file(file_path, lines) - # Rename the file. - file_to_rename = os.path.join(scratch_dir, file_to_rename) - suffix = "suffix" - hio.rename_file_if_exists( - file_to_rename, suffix, before_extension=before_extension - ) - # Check that file is renamed. - expected_file_path = os.path.join(scratch_dir, expected_file_name) - self.assertTrue(os.path.exists(expected_file_path)) - - def test1(self) -> None: - """ - Test that suffix is added before an extension. - """ - file_to_rename = "test_file.txt" - before_extension = True - expected_file_name = "test_file.suffix.txt" - self.check_file(file_to_rename, before_extension, expected_file_name) - - def test2(self) -> None: - """ - Test that suffix is added after an extension. - """ - file_to_rename = "test_file.txt" - before_extension = False - expected_file_name = "test_file.txt.suffix" - self.check_file(file_to_rename, before_extension, expected_file_name) - - def test3(self) -> None: - """ - Test that non-existing file is not renamed. - """ - file_to_rename = "not_exist.txt" - before_extension = False - expected_file_name = "not_exist.txt" - with self.assertRaises(AssertionError): - self.check_file(file_to_rename, before_extension, expected_file_name) - - -# ############################################################################# -# Test_find_all_files1 -# ############################################################################# - - -class Test_find_all_files1(hunitest.TestCase): - def test1(self) -> None: - dir_name = hgit.get_client_root(super_module=False) - # Check that there are files. - pattern = "*" - only_files = True - use_relative_paths = True - all_files = hio.listdir( - dir_name, pattern, only_files, use_relative_paths - ) - self.assertGreater(len(all_files), 0) - # Check that there are more files than Python files. - exclude_paired_jupytext = False - py_files = hio.keep_python_files(all_files, exclude_paired_jupytext) - self.assertGreater(len(py_files), 0) - self.assertGreater(len(all_files), len(py_files)) - # Check that there are more Python files than not paired Python files. - exclude_paired_jupytext = True - not_paired_py_files = hio.keep_python_files( - all_files, exclude_paired_jupytext - ) - self.assertGreater(len(not_paired_py_files), 0) - self.assertGreater(len(py_files), len(not_paired_py_files)) - - -# ############################################################################# -# Test_change_filename_extension1 -# ############################################################################# - - -class Test_change_filename_extension1(hunitest.TestCase): - def test1(self) -> None: - file_name = "./core/dataflow_model/notebooks/Master_experiment_runner.py" - actual = hio.change_filename_extension(file_name, "py", "ipynb") - expected = ( - "./core/dataflow_model/notebooks/Master_experiment_runner.ipynb" - ) - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_load_df_from_json -# ############################################################################# - - -class Test_load_df_from_json(hunitest.TestCase): - def test1(self) -> None: - test_json_path = os.path.join(self.get_input_dir(), "test.json") - actual_result = hio.load_df_from_json(test_json_path) - expected_result = pd.DataFrame( - { - "col1": ["a", "b", "c", "d"], - "col2": ["a", "b", np.nan, np.nan], - "col3": ["a", "b", "c", np.nan], - } - ) - actual_result = hpandas.df_to_str(actual_result) - expected_result = hpandas.df_to_str(expected_result) - self.assertEqual(actual_result, expected_result) - - -# ############################################################################# -# Test_safe_rm_file -# ############################################################################# - - -class Test_safe_rm_file(hunitest.TestCase): - def test_successful_removal_within_git_client(self) -> None: - """ - Test successful removal of directory within Git client. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - test_dir = os.path.join(scratch_dir, "test_dir_to_remove") - os.makedirs(test_dir) - # Create a test file in the directory to ensure it has content - test_file = os.path.join(test_dir, "test_file.txt") - hio.to_file(test_file, "test content") - # Verify directory exists before removal - self.assertTrue(os.path.exists(test_dir)) - # Run test. - hio.safe_rm_file(test_dir) - # Check output. - self.assertFalse(os.path.exists(test_dir)) - - def test_removal_of_nested_directory(self) -> None: - """ - Test removal of deeply nested directory structure. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - nested_dir = os.path.join(scratch_dir, "parent", "child", "grandchild") - os.makedirs(nested_dir) - # Create files at different levels - hio.to_file(os.path.join(nested_dir, "file1.txt"), "content1") - hio.to_file( - os.path.join(os.path.dirname(nested_dir), "file2.txt"), "content2" - ) - parent_dir = os.path.join(scratch_dir, "parent") - # Verify directory exists - self.assertTrue(os.path.exists(parent_dir)) - # Run test. - hio.safe_rm_file(parent_dir) - # Check output. - self.assertFalse(os.path.exists(parent_dir)) - - def test_directory_does_not_exist(self) -> None: - """ - Test that function raises assertion error for non-existent directory. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - non_existent_dir = os.path.join(scratch_dir, "non_existent_directory") - # Ensure directory doesn't exist - self.assertFalse(os.path.exists(non_existent_dir)) - # Run test and check output. - with self.assertRaises(AssertionError) as cm: - hio.safe_rm_file(non_existent_dir) - self.assertIn("does not exist", str(cm.exception)) - - def test_cannot_delete_git_root(self) -> None: - """ - Test that function prevents deletion of Git client root directory. - """ - # Prepare inputs. - git_root = hgit.find_git_root() - # Run test and check output. - with self.assertRaises(AssertionError) as cm: - hio.safe_rm_file(git_root) - self.assertIn("Cannot delete Git client root", str(cm.exception)) - - def test_directory_outside_git_client_rejected(self) -> None: - """ - Test that function rejects directories outside Git client. - """ - # Prepare inputs. - # Use /tmp which should be outside any Git client - outside_dir = "/tmp" - # Run test and check output. - with self.assertRaises(AssertionError) as cm: - hio.safe_rm_file(outside_dir) - self.assertIn("is not within Git client root", str(cm.exception)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py deleted file mode 100644 index 70450e943..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlatex.py +++ /dev/null @@ -1,665 +0,0 @@ -""" -Unit tests for hlatex module. - -This module tests LaTeX text processing utilities including: -- Removing LaTeX formatting commands -- Detecting LaTeX line separators -- Framing sections with separator lines -- Detecting LaTeX comments -- Extracting section headers and their hierarchy -""" - -import logging - -import helpers.hlatex as hlatex -import helpers.hmarkdown_headers as hmarhead -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -# ############################################################################# -# Test_remove_latex_formatting1 -# ############################################################################# - - -class Test_remove_latex_formatting1(hunitest.TestCase): - """ - Test the remove_latex_formatting function. - """ - - def test1(self) -> None: - """ - Test removal of textcolor commands from LaTeX text. - """ - # Prepare inputs. - txt = r""" - - If there is \textcolor{red}{no pattern}, we can try learning: - - Measure if \textcolor{blue}{learning works}. - - In the \textcolor{orange}{worst case}, conclude that it - \textcolor{green}{does not work}. - - If we can find the \textcolor{purple}{solution in one step} or - \textcolor{cyan}{program the solution}: - - \textcolor{brown}{Machine learning} is not the \textcolor{teal}{recommended - technique}, but it still works. - - Without \textcolor{magenta}{data}, we cannot do anything: - \textcolor{violet}{data is all that matters}. - """ - txt = hprint.dedent(txt) - # Prepare outputs. - expected = r""" - - If there is no pattern, we can try learning: - - Measure if learning works. - - In the worst case, conclude that it - does not work. - - If we can find the solution in one step or - program the solution: - - Machine learning is not the recommended - technique, but it still works. - - Without data, we cannot do anything: - data is all that matters.""" - expected = hprint.dedent(expected) - # Run test. - actual = hlatex.remove_latex_formatting(txt) - # Check outputs. - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_is_latex_line_separator1 -# ############################################################################# - - -class Test_is_latex_line_separator1(hunitest.TestCase): - """ - Test the _is_latex_line_separator function. - """ - - def test1(self) -> None: - """ - Test that a line with repeated # characters is recognized as separator. - """ - # Prepare inputs. - line = "% ##########" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertTrue(actual) - - def test2(self) -> None: - """ - Test that a line with repeated = characters is recognized as separator. - """ - # Prepare inputs. - line = "% ==========" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertTrue(actual) - - def test3(self) -> None: - """ - Test that a line with repeated - characters is recognized as separator. - """ - # Prepare inputs. - line = "% ----------" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertTrue(actual) - - def test4(self) -> None: - """ - Test that a line with too few repeated characters is not a separator. - """ - # Prepare inputs. - line = "% ####" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertFalse(actual) - - def test5(self) -> None: - """ - Test that a regular comment is not recognized as separator. - """ - # Prepare inputs. - line = "% This is a regular comment" - # Run test. - actual = hlatex._is_latex_line_separator(line) - # Check outputs. - self.assertFalse(actual) - - -# ############################################################################# -# Test_frame_sections1 -# ############################################################################# - - -class Test_frame_sections1(hunitest.TestCase): - """ - Test the frame_sections function. - """ - - def helper(self, input_txt: str, expected: str) -> None: - """ - Helper method to test frame_sections function. - - :param input_txt: Input LaTeX text - :param expected: Expected output after processing - """ - # Prepare inputs. - lines = hprint.dedent(input_txt) - lines = lines.split("\n") - # Run test. - actual = hlatex.frame_sections(lines) - actual = "\n".join(actual) - # Prepare outputs. - expected = hprint.dedent(expected) - # Check outputs. - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test adding separator before a single section command. - """ - # Prepare inputs. - input_txt = r""" - \section{Introduction} - This is the introduction. - """ - # Prepare outputs. - expected = r""" - % ############################################################################## - \section{Introduction} - This is the introduction. - """ - # Run test. - self.helper(input_txt, expected) - - def test2(self) -> None: - """ - Test adding separators before section, subsection, and subsubsection. - """ - # Prepare inputs. - input_txt = r""" - \section{Proposed framework} - - \subsection{Combining Physics-Informed and Data-Driven Approaches} - - \subsubsection{Detailed Analysis} - """ - # Prepare outputs. - expected = r""" - % ############################################################################## - \section{Proposed framework} - - % ============================================================================== - \subsection{Combining Physics-Informed and Data-Driven Approaches} - - % ------------------------------------------------------------------------------ - \subsubsection{Detailed Analysis} - """ - # Run test. - self.helper(input_txt, expected) - - def test3(self) -> None: - """ - Test that existing separators are removed and replaced with correct ones. - """ - # Prepare inputs. - input_txt = r""" - % ============== - \section{Introduction} - - % ############## - \subsection{Background} - """ - # Prepare outputs. - expected = r""" - % ############################################################################## - \section{Introduction} - - % ============================================================================== - \subsection{Background} - """ - # Run test. - self.helper(input_txt, expected) - - def test4(self) -> None: - """ - Test that multiple consecutive empty lines are reduced to one. - """ - # Prepare inputs. - input_txt = r""" - \section{Introduction} - - - - This is text after multiple empty lines. - """ - # Prepare outputs. - expected = r""" - % ############################################################################## - \section{Introduction} - - This is text after multiple empty lines. - """ - # Run test. - self.helper(input_txt, expected) - - def test5(self) -> None: - """ - Test with mixed content including text, sections, and empty lines. - """ - # Prepare inputs. - input_txt = r""" - This is some introductory text. - - \section{Methods} - - We describe the methods here. - - - \subsection{Data Collection} - - Details about data collection. - - \subsubsection{Sampling Strategy} - - Sampling details here. - """ - # Prepare outputs. - expected = r""" - This is some introductory text. - - % ############################################################################## - \section{Methods} - - We describe the methods here. - - % ============================================================================== - \subsection{Data Collection} - - Details about data collection. - - % ------------------------------------------------------------------------------ - \subsubsection{Sampling Strategy} - - Sampling details here. - """ - # Run test. - self.helper(input_txt, expected) - - def test6(self) -> None: - """ - Test that lines without section commands are left unchanged. - """ - # Prepare inputs. - input_txt = r""" - This is regular text. - No sections here. - Just content. - """ - # Prepare outputs. - expected = r""" - This is regular text. - No sections here. - Just content. - """ - # Run test. - self.helper(input_txt, expected) - - -# ############################################################################# -# Test_is_latex_comment -# ############################################################################# - - -class Test_is_latex_comment(hunitest.TestCase): - """ - Test the _is_latex_comment function. - """ - - def test1(self) -> None: - """ - Test that a line starting with % is recognized as a comment. - """ - # Prepare inputs. - line = "% This is a comment" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertTrue(actual) - - def test2(self) -> None: - """ - Test that a line with leading whitespace and % is a comment. - """ - # Prepare inputs. - line = " % This is a comment" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertTrue(actual) - - def test3(self) -> None: - """ - Test that a regular line is not recognized as a comment. - """ - # Prepare inputs. - line = "This is regular text" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertFalse(actual) - - def test4(self) -> None: - """ - Test that a line with escaped % character is not a comment. - """ - # Prepare inputs. - line = r"The value is \% of the total" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertFalse(actual) - - def test5(self) -> None: - """ - Test that a line with % in the middle is not a comment. - """ - # Prepare inputs. - line = r"Text before \% and after" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertFalse(actual) - - def test6(self) -> None: - """ - Test that a line with only % is a comment. - """ - # Prepare inputs. - line = "%" - # Run test. - actual = hlatex._is_latex_comment(line) - # Check outputs. - self.assertTrue(actual) - - -# ############################################################################# -# Test_extract_latex_section -# ############################################################################# - - -class Test_extract_latex_section(hunitest.TestCase): - """ - Test the _extract_latex_section function. - """ - - def helper( - self, line: str, expected_level: int, expected_title: str - ) -> None: - """ - Helper method to test extraction of LaTeX section commands. - - :param line: LaTeX line to parse - :param expected_level: Expected section level (0 if no section) - :param expected_title: Expected title (empty string if no section) - """ - # Prepare inputs - line_number is arbitrary for testing. - line_number = 1 - # Run test. - header_info = hlatex._extract_latex_section(line, line_number) - # Check outputs. - if expected_level == 0: - # No section expected. - self.assertIsNone(header_info) - else: - # Section expected. - self.assertIsNotNone(header_info) - self.assert_equal(str(header_info.level), str(expected_level)) - self.assert_equal(header_info.description, expected_title) - - def test1(self) -> None: - """ - Test extraction of basic section command. - """ - line = r"\section{Introduction}" - self.helper(line, 1, "Introduction") - - def test2(self) -> None: - """ - Test extraction of basic subsection command. - """ - line = r"\subsection{Background}" - self.helper(line, 2, "Background") - - def test3(self) -> None: - """ - Test extraction of basic subsubsection command. - """ - line = r"\subsubsection{Details}" - self.helper(line, 3, "Details") - - def test4(self) -> None: - """ - Test extraction of section with nested LaTeX commands. - """ - line = r"\section{Introduction to \textbf{Machine Learning}}" - self.helper(line, 1, r"Introduction to \textbf{Machine Learning}") - - def test5(self) -> None: - """ - Test extraction of section with optional short title. - """ - line = r"\section[Short Title]{Long Title for Table of Contents}" - # Should extract the long title (in curly braces). - self.helper(line, 1, "Long Title for Table of Contents") - - def test6(self) -> None: - """ - Test extraction of section with escaped special characters. - """ - line = r"\section{Cost Analysis: \$100 \& More}" - self.helper(line, 1, r"Cost Analysis: \$100 \& More") - - def test7(self) -> None: - """ - Test extraction of section with leading whitespace. - """ - line = r" \section{Methods}" - self.helper(line, 1, "Methods") - - def test8(self) -> None: - """ - Test that a regular line is not recognized as a section. - """ - line = "This is regular text" - self.helper(line, 0, "") - - def test9(self) -> None: - """ - Test that section with empty title is not extracted. - """ - line = r"\section{}" - # Sections with empty titles should not be extracted. - self.helper(line, 0, "") - - -# ############################################################################# -# Test_extract_headers_from_latex -# ############################################################################# - - -class Test_extract_headers_from_latex(hunitest.TestCase): - """ - Test the extract_headers_from_latex function. - """ - - def helper(self, lines: str, expected: str, *, max_level: int = 3) -> None: - """ - Helper method to test header extraction from LaTeX documents. - - :param lines: LaTeX document content as a string - :param expected: Expected string representation of header list - :param max_level: Maximum header level to extract (default: 3) - """ - # Prepare inputs. - lines_list = hprint.dedent(lines).split("\n") - # Run test. - actual = hlatex.extract_headers_from_latex( - lines_list, max_level, sanity_check=False - ) - actual_str = hmarhead.header_list_to_str(actual) - # Prepare outputs. - expected = hprint.dedent(expected) - # Check outputs. - self.assert_equal(actual_str, expected) - - def test1(self) -> None: - """ - Test extraction from a basic LaTeX document with multiple section levels. - """ - # Prepare inputs. - lines = r""" - \section{Introduction} - This is the introduction. - - \subsection{Background} - Background information here. - - \section{Methods} - Methods description. - """ - # Prepare outputs. - expected = """ - HeaderInfo(1, 'Introduction', 1) - HeaderInfo(2, 'Background', 4) - HeaderInfo(1, 'Methods', 7)""" - # Run test. - self.helper(lines, expected) - - def test2(self) -> None: - """ - Test that commented-out sections are skipped. - """ - # Prepare inputs. - lines = r""" - \section{Introduction} - % \section{Old Section} - \subsection{Current Subsection} - % \subsection{Old Subsection} - """ - # Prepare outputs. - expected = """ - HeaderInfo(1, 'Introduction', 1) - HeaderInfo(2, 'Current Subsection', 3)""" - # Run test. - self.helper(lines, expected) - - def test3(self) -> None: - """ - Test that only headers up to max_level are extracted. - """ - # Prepare inputs. - lines = r""" - \section{Chapter 1} - \subsection{Section 1.1} - \subsubsection{Section 1.1.1} - """ - # Prepare outputs. - # Should only get section and subsection, not subsubsection. - expected = """ - HeaderInfo(1, 'Chapter 1', 1) - HeaderInfo(2, 'Section 1.1', 2)""" - # Run test. - self.helper(lines, expected, max_level=2) - - def test4(self) -> None: - """ - Test extraction with nested LaTeX commands in titles. - """ - # Prepare inputs. - lines = r""" - \section{Introduction to \textbf{ML}} - \subsection{Using \emph{Neural Networks}} - """ - # Prepare outputs. - expected = r""" - HeaderInfo(1, 'Introduction to \textbf{ML}', 1) - HeaderInfo(2, 'Using \emph{Neural Networks}', 2)""" - # Run test. - self.helper(lines, expected) - - def test5(self) -> None: - """ - Test that line numbers are correctly recorded. - """ - # Prepare inputs. - lines = r""" - Some text here. - - \section{First Section} - More text. - - \subsection{First Subsection} - Even more text. - """ - # Prepare outputs. - # Line numbers should be 3 and 6 (1-indexed). - expected = """ - HeaderInfo(1, 'First Section', 3) - HeaderInfo(2, 'First Subsection', 6)""" - # Run test. - self.helper(lines, expected) - - def test6(self) -> None: - """ - Test extraction from document with no sections. - """ - # Prepare inputs. - lines = """ - This is just regular text. - No sections here. - """ - # Prepare outputs. - expected = "" - # Run test. - self.helper(lines, expected) - - def test7(self) -> None: - """ - Test extraction with all three section levels. - """ - # Prepare inputs. - lines = r""" - \section{Chapter 1} - Introduction to chapter. - - \subsection{Section 1.1} - Section content. - - \subsubsection{Subsection 1.1.1} - Detailed content. - - \subsection{Section 1.2} - More content. - - \section{Chapter 2} - Second chapter. - """ - # Prepare outputs. - expected = """ - HeaderInfo(1, 'Chapter 1', 1) - HeaderInfo(2, 'Section 1.1', 4) - HeaderInfo(3, 'Subsection 1.1.1', 7) - HeaderInfo(2, 'Section 1.2', 10) - HeaderInfo(1, 'Chapter 2', 13)""" - # Run test. - self.helper(lines, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py deleted file mode 100644 index f8d9b237d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlist.py +++ /dev/null @@ -1,176 +0,0 @@ -import logging -from typing import List, Optional - -import helpers.hlist as hlist -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_list_find_duplicates1 -# ############################################################################# - - -class Test_list_find_duplicates1(hunitest.TestCase): - def test1(self) -> None: - list_ = "a b c d".split() - list_out = hlist.find_duplicates(list_) - self.assertEqual(list_out, []) - - def test2(self) -> None: - list_ = "a b c a d e f f".split() - list_out = hlist.find_duplicates(list_) - self.assertEqual(set(list_out), set("a f".split())) - - -# ############################################################################# -# Test_list_remove_duplicates1 -# ############################################################################# - - -class Test_list_remove_duplicates1(hunitest.TestCase): - def test1(self) -> None: - list_ = "a b c d".split() - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "a b c d".split()) - - def test2(self) -> None: - list_ = "a b c a d e f f".split() - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "a b c d e f".split()) - - def test3(self) -> None: - list_ = "a b c a d e f f".split() - list_ = list(reversed(list_)) - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "f e d a c b".split()) - - -# ############################################################################# -# Test_list_extract1 -# ############################################################################# - - -class Test_list_extract1(hunitest.TestCase): - def _helper( - self, - start_idx: Optional[int], - end_idx: Optional[int], - expected_list: List[str], - ) -> None: - list_ = "a b c d".split() - actual_list = hlist.extract(list_, start_idx, end_idx) - self.assertEqual(actual_list, expected_list) - - def test1(self) -> None: - start_idx = 0 - end_idx = 1 - expected_list = "a".split() - self._helper(start_idx, end_idx, expected_list) - - def test2(self) -> None: - start_idx = 1 - end_idx = None - expected_list = "b c d".split() - self._helper(start_idx, end_idx, expected_list) - - def test3(self) -> None: - start_idx = None - end_idx = None - expected_list = "a b c d".split() - self._helper(start_idx, end_idx, expected_list) - - def test4(self) -> None: - start_idx = None - end_idx = 2 - expected_list = "a b".split() - self._helper(start_idx, end_idx, expected_list) - - def test5(self) -> None: - start_idx = None - end_idx = 2 - expected_list = "a b".split() - self._helper(start_idx, end_idx, expected_list) - - def test6(self) -> None: - start_idx = 0 - end_idx = 4 - expected_list = "a b c d".split() - self._helper(start_idx, end_idx, expected_list) - - def test7(self) -> None: - start_idx = 0 - end_idx = 3 - expected_list = "a b c".split() - self._helper(start_idx, end_idx, expected_list) - - -# ############################################################################# -# Test_list_chunk1 -# ############################################################################# - - -class Test_list_chunk1(hunitest.TestCase): - def _helper(self, n: int, expected_list: List[List[str]]) -> None: - list_ = "a b c d e f".split() - actual_list = hlist.chunk(list_, n) - self.assertEqual(actual_list, expected_list) - - def test1(self) -> None: - n = 1 - expected_list = ["a b c d e f".split()] - self._helper(n, expected_list) - - def test2(self) -> None: - n = 2 - expected_list = [["a", "b", "c"], ["d", "e", "f"]] - self._helper(n, expected_list) - - def test3(self) -> None: - n = 3 - expected_list = [["a", "b"], ["c", "d"], ["e", "f"]] - self._helper(n, expected_list) - - def test4(self) -> None: - n = 4 - expected_list = [["a", "b"], ["c", "d"], ["e"], ["f"]] - self._helper(n, expected_list) - - def test5(self) -> None: - n = 6 - expected_list = [["a"], ["b"], ["c"], ["d"], ["e"], ["f"]] - self._helper(n, expected_list) - - -# ############################################################################# -# Test_list1 -# ############################################################################# - - -class Test_list1(hunitest.TestCase): - def test_find_duplicates1(self) -> None: - list_ = "a b c d".split() - list_out = hlist.find_duplicates(list_) - self.assertEqual(list_out, []) - - def test_find_duplicates2(self) -> None: - list_ = "a b c a d e f f".split() - list_out = hlist.find_duplicates(list_) - self.assertEqual(set(list_out), set("a f".split())) - - def test_remove_duplicates1(self) -> None: - list_ = "a b c d".split() - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "a b c d".split()) - - def test_remove_duplicates2(self) -> None: - list_ = "a b c a d e f f".split() - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "a b c d e f".split()) - - def test_remove_duplicates3(self) -> None: - list_ = "a b c a d e f f".split() - list_ = list(reversed(list_)) - list_out = hlist.remove_duplicates(list_) - self.assertEqual(list_out, "f e d a c b".split()) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py deleted file mode 100644 index 820d21519..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm.py +++ /dev/null @@ -1,361 +0,0 @@ -import os -import types -import unittest.mock as umock -from typing import Any, Dict - -import pandas as pd -import pytest - -pytest.importorskip("openai") # noqa: E402 # pylint: disable=wrong-import-position -import helpers.hdbg as hdbg # noqa: E402 -import helpers.hllm as hllm # noqa: E402 -import helpers.hunit_test as hunitest # noqa: E402 - -_USER_PROMPT1 = "what is machine learning?" -_USER_PROMPT2 = _USER_PROMPT1.upper() - -_SYSTEM_PROMPT1 = "You are a helpful AI assistant." -_SYSTEM_PROMPT2 = ( - "You are a helpful AI assistant and excellent in explaining things." -) - -_TEMPERATURE1 = 0.1 -_TEMPERATURE2 = 0.2 - -_TOP_P1 = 0.5 - -_MODEL1 = "gpt-4o-mini" -_MODEL2 = "gpt-3.5-turbo" -_MODEL3 = "deepseek/deepseek-r1-0528-qwen3-8b:free" -_MODEL4 = "openai/gpt-4o-mini" - - -# Test functions for the unit tests. -def _get_completion_parameters1() -> Dict[str, Any]: - data = { - "user_prompt": _USER_PROMPT1, - "system_prompt": _SYSTEM_PROMPT1, - "temperature": _TEMPERATURE1, - "model": _MODEL1, - } - return data - - -def _get_completion_parameters2() -> Dict[str, Any]: - data = { - "user_prompt": _USER_PROMPT2, - "system_prompt": _SYSTEM_PROMPT2, - "temperature": _TEMPERATURE2, - "model": _MODEL2, - "top_p": _TOP_P1, - } - return data - - -def _get_completion_parameters3() -> Dict[str, Any]: - data = { - "user_prompt": _USER_PROMPT2, - "system_prompt": _SYSTEM_PROMPT2, - "temperature": _TEMPERATURE2, - "model": _MODEL3, - "top_p": _TOP_P1, - } - return data - - -def _get_completion_parameters4() -> Dict[str, Any]: - data = { - "user_prompt": _USER_PROMPT1, - "system_prompt": _SYSTEM_PROMPT1, - "temperature": _TEMPERATURE1, - "model": _MODEL4, - } - return data - - -# ############################################################################# -# Test_get_completion -# ############################################################################# - - -class Test_get_completion(hunitest.TestCase): - def test1(self) -> None: - """ - Verify that get_completion() returns response from cache with the - expected response. - """ - parameters1 = _get_completion_parameters1() - actual_response = hllm.get_completion( - **parameters1, cache_mode="HIT_CACHE_OR_ABORT" - ) - self.assertIsInstance(actual_response, str) - self.check_string(actual_response) - - def test2(self) -> None: - """ - Verify with different openai models. - """ - parameters2 = _get_completion_parameters2() - actual_response = hllm.get_completion( - **parameters2, cache_mode="HIT_CACHE_OR_ABORT" - ) - self.assertIsInstance(actual_response, str) - self.check_string(actual_response) - - def test3(self) -> None: - """ - Verify if hllm.get_completion() support openrouter models. - """ - parameters3 = _get_completion_parameters3() - actual_response = hllm.get_completion( - **parameters3, cache_mode="HIT_CACHE_OR_ABORT" - ) - self.assertIsInstance(actual_response, str) - self.check_string(actual_response) - - def test4(self) -> None: - """ - Verify with OpenAI-prefixed models. - """ - parameters4 = _get_completion_parameters4() - actual_response = hllm.get_completion( - **parameters4, cache_mode="HIT_CACHE_OR_ABORT" - ) - self.assertIsInstance(actual_response, str) - self.check_string(actual_response) - - -# ############################################################################# -# Test_response_to_txt -# ############################################################################# - - -class Test_response_to_txt(hunitest.TestCase): - # Dummy classes to satisfy `isinstance` checks. - - class DummyChatCompletion: - def __init__(self, text: str = "") -> None: - msg = types.SimpleNamespace(content=text) - choice = types.SimpleNamespace(message=msg) - self.choices = [choice] - - class DummyThreadMessage: - def __init__(self, text: str = "") -> None: - # mimic .content[0].text.value - value_obj = types.SimpleNamespace(value=text) - text_obj = types.SimpleNamespace(text=value_obj) - self.content = [text_obj] - - @umock.patch( - "openai.types.chat.chat_completion.ChatCompletion", - new=DummyChatCompletion, - ) - def test_chat_completion_branch(self) -> None: - resp = Test_response_to_txt.DummyChatCompletion("hello chat") - actual = hllm.response_to_txt(resp) - expected = "hello chat" - self.assert_equal(actual, expected) - - @umock.patch( - "openai.types.beta.threads.message.Message", - new=DummyThreadMessage, - ) - def test_thread_message_branch(self) -> None: - resp = Test_response_to_txt.DummyThreadMessage("thread reply") - actual = hllm.response_to_txt(resp) - expected = "thread reply" - self.assert_equal(actual, expected) - - def test_str_pass_through(self) -> None: - actual = hllm.response_to_txt("just a string") - expected = "just a string" - self.assert_equal(actual, expected) - - def test_unknown_type_raises(self) -> None: - with self.assertRaises(ValueError) as cm: - hllm.response_to_txt(12345) - self.assertIn("Unknown response type", str(cm.exception)) - - -# ############################################################################# -# Test_retrieve_openrouter_model_info -# ############################################################################# - - -class Test_retrieve_openrouter_model_info(hunitest.TestCase): - @umock.patch("requests.get") - def test_retrieve_success(self, mock_get) -> None: - # Prepare dummy JSON data. - data = [ - {"id": "model1", "name": "Model One"}, - {"id": "model2", "name": "Model Two"}, - ] - mock_response = umock.Mock() - mock_response.json.return_value = {"data": data} - mock_get.return_value = mock_response - # Call the function under test. - df = hllm._retrieve_openrouter_model_info() - # Build expected DataFrame. - expected_df = pd.DataFrame(data) - # Verify DataFrame content. - self.assertEqual( - df.to_dict(orient="records"), expected_df.to_dict(orient="records") - ) - # Ensure the correct URL was requested. - mock_get.assert_called_once_with("https://openrouter.ai/api/v1/models") - - @umock.patch("requests.get") - def test_missing_data_key_raises(self, mock_get) -> None: - # JSON missing the 'data' key. - mock_response = umock.Mock() - mock_response.json.return_value = {"wrong": []} - mock_get.return_value = mock_response - # Expect an assertion from hdbg.dassert_eq. - with self.assertRaises(AssertionError): - hllm._retrieve_openrouter_model_info() - - -# ############################################################################# -# Test_save_models_info_to_csv -# ############################################################################# - - -class Test_save_models_info_to_csv(hunitest.TestCase): - def get_temp_path(self) -> str: - """ - Helper function for creating temporary directory. - """ - self.tmp_dir = self.get_scratch_space() - tmp_file_name = "tmp.models_info.csv" - self.tmp_path = os.path.join(self.tmp_dir, tmp_file_name) - return self.tmp_path - - def test_save_models_info(self) -> None: - """ - Save Dataframe as a CSV and check. - """ - # Prepare a DataFrame with extra columns. - data = [ - { - "id": "m1", - "name": "Model1", - "description": "desc1", - "pricing": {"prompt": 0.1, "completion": 0.2}, - "supported_parameters": ["a", "b"], - "extra_col": 123, - }, - { - "id": "m2", - "name": "Model2", - "description": "desc2", - "pricing": {"prompt": 0.3, "completion": 0.4}, - "supported_parameters": ["c"], - "extra_col": 456, - }, - ] - df = pd.DataFrame(data) - output_file: str = self.get_temp_path() - # Call the function under test. - returned_df = hllm._save_models_info_to_csv(df, output_file) - # The returned DataFrame should have only the selected columns. - expected_columns = [ - "id", - "name", - "description", - "prompt_pricing", - "completion_pricing", - "supported_parameters", - ] - hdbg.dassert_eq(list(returned_df.columns), expected_columns) - # Verify pricing values are extracted correctly. - self.assert_equal( - str(returned_df["prompt_pricing"]), - str(pd.Series([0.1, 0.3], name="prompt_pricing", dtype=float)), - ) - self.assert_equal( - str(returned_df["completion_pricing"]), - str(pd.Series([0.2, 0.4], name="completion_pricing", dtype=float)), - ) - # File should be created and readable. - hdbg.dassert_file_exists(output_file) - saved_df = pd.read_csv(output_file) - self.assert_equal( - str(returned_df["completion_pricing"]), - str(saved_df["completion_pricing"]), - ) - self.assert_equal( - str(returned_df["prompt_pricing"]), str(saved_df["prompt_pricing"]) - ) - - -# ############################################################################# -# Test_calculate_cost -# ############################################################################# - - -class Test_calculate_cost(hunitest.TestCase): - def get_tmp_path(self) -> str: - """ - Return temporary file path. - """ - self.tmp_dir = self.get_scratch_space() - tmp_file_name: str = "tmp.models_info.csv" - self.tmp_path = os.path.join(self.tmp_dir, tmp_file_name) - return self.tmp_path - - def test_openai_cost(self) -> None: - """ - Known OpenAI model and token counts produce expected cost. - """ - comp = types.SimpleNamespace( - usage=types.SimpleNamespace( - prompt_tokens=1000000, completion_tokens=2000000 - ) - ) - llm_cost_tracker = hllm.LLMCostTracker() - cost = llm_cost_tracker.calculate_cost( - comp, model="gpt-3.5-turbo", models_info_file="" - ) - # 1000000*(0.5/1000000) + 20000000*(1.5/1000000) = 3.5 - self.assertAlmostEqual(cost, 3.5) - - def test_openai_unknown_model(self) -> None: - """ - Passing an unknown OpenAI model should raise an assertion or - ValueError. - """ - comp = types.SimpleNamespace( - usage=types.SimpleNamespace(prompt_tokens=1, completion_tokens=1) - ) - llm_cost_tracker = hllm.LLMCostTracker() - with pytest.raises(AssertionError): - llm_cost_tracker.calculate_cost( - comp, model="nonexistent-model", models_info_file="" - ) - - def test_openrouter_load_existing_csv(self) -> None: - """ - Assume that the CSV file exists for OpenRouter. - - Then we should load CSV and calculate cost without fetching. - """ - # Write a tiny CSV: id,prompt_pricing,completion_pricing - temp_csv_file = self.get_tmp_path() - pd.DataFrame( - { - "id": ["deepseek/m1"], - "prompt_pricing": [0.1], - "completion_pricing": [0.2], - } - ).to_csv(temp_csv_file, index=False) - comp = types.SimpleNamespace( - usage=types.SimpleNamespace(prompt_tokens=1, completion_tokens=1) - ) - llm_cost_tracker = hllm.LLMCostTracker() - cost = llm_cost_tracker.calculate_cost( - comp, - model="deepseek/m1", - models_info_file=temp_csv_file, - ) - # 1*0.1 + 1*0.2 = 0.1 + 0.2 = 0.3 - self.assertAlmostEqual(cost, 0.3) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py deleted file mode 100644 index fc684420b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hllm_cli.py +++ /dev/null @@ -1,1403 +0,0 @@ -import logging -import os -import time -from typing import Callable, Dict, Optional - -import pandas as pd -import pytest - -import helpers.hcache_simple as hcacsimp -import helpers.hio as hio -import helpers.hllm_cli as hllmcli -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -from helpers.test.test_hcache_simple import _BaseCacheTest - -_LOG = logging.getLogger(__name__) - -# Disable calling LLM when testing. -_RUN_REAL_LLM = False -# _RUN_REAL_LLM = True - -# ############################################################################# -# Test_apply_llm_with_files -# ############################################################################# - -# Test cases shared across both library and executable tests. -# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. -_TEST_CASES = [ - # llm_cli.py --input_file input.txt --output_file output.txt - ( - "Basic usage with input file", - {}, - ), - # llm_cli.py --input_file input.txt --output_file output.txt --system_prompt "You are a helpful math assistant. Solve the problem step by step." - ( - "With custom system prompt", - { - "system_prompt": "You are a helpful math assistant. Solve the problem step by step." - }, - ), - # llm_cli.py --input_file input.txt --output_file output.txt --model gpt-4 - ( - "With specific model selection", - {"model": "gpt-4"}, - ), - # llm_cli.py --input_file input.txt --output_file output.txt --expected_num_chars 500 - ( - "With progress bar (expected character count)", - {"expected_num_chars": 500}, - ), - # llm_cli.py --input_file input.txt --output_file output.txt --system_prompt "You are a helpful assistant that provides concise answers" --model gpt-4o-mini --expected_num_chars 1000 - ( - "Complete example with all options", - { - "system_prompt": "You are a helpful assistant that provides concise answers", - "model": "gpt-4o-mini", - "expected_num_chars": 1000, - }, - ), -] - -# Test cases for input_text functionality. -# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. -_TEST_CASES_INPUT_TEXT = [ - # llm_cli.py --input_text "2+2=" --output_file output.txt - ( - "Basic usage with input text", - { - "input_text": "2+2=", - }, - ), - # llm_cli.py --input_text "What is Python?" --output_file output.txt --system_prompt "You are a helpful assistant" - ( - "With input text and system prompt", - { - "input_text": "What is Python?", - "system_prompt": "You are a helpful assistant", - }, - ), - # llm_cli.py --input_text "Explain recursion" --output_file output.txt --model gpt-4o-mini - ( - "With input text and specific model", - { - "input_text": "Explain recursion", - "model": "gpt-4o-mini", - }, - ), -] - -# Test cases for print_only functionality. -# Each tuple contains (description, kwargs) and corresponding llm_cli.py command. -_TEST_CASES_PRINT_ONLY = [ - # llm_cli.py --input_text "2+2=" --output_file - - ( - "Print to screen with input text", - { - "input_text": "2+2=", - "print_only": True, - }, - ), -] - - -# ############################################################################# -# TestApplyLlmBase -# ############################################################################# - - -class TestApplyLlmBase(_BaseCacheTest): - """ - Base class with helper methods for testing apply_llm functions. - - Provides common helper methods used across different test classes to - reduce code duplication and maintain consistency. - """ - - def _run_test_cases(self, use_llm_executable: bool) -> None: - """ - Helper method to run test cases with specified interface. - - :param use_llm_executable: if True, use CLI executable; if False, use library - """ - # Get scratch space for test files. - scratch_dir = self.get_scratch_space() - # Create input file. - input_file = os.path.join(scratch_dir, "input.txt") - hio.to_file(input_file, "2+2=") - # Run each test case. - for idx, (description, kwargs) in enumerate(_TEST_CASES, 1): - _LOG.info("Running test case %d: %s", idx, description) - output_file = os.path.join(scratch_dir, f"output_{idx}.txt") - # Run test. - hllmcli.apply_llm_with_files( - input_file=input_file, - output_file=output_file, - use_llm_executable=use_llm_executable, - **kwargs, - ) - # Check that output file was created. - self.assertTrue(os.path.exists(output_file)) - # Check that output file is not empty. - output_content = hio.from_file(output_file) - self.assertGreater(len(output_content), 0) - - def _run_test_cases_input_text(self, use_llm_executable: bool) -> None: - """ - Helper method to run input_text test cases with specified interface. - - :param use_llm_executable: if True, use CLI executable; if False, use library - """ - # Get scratch space for test files. - scratch_dir = self.get_scratch_space() - # Run each test case. - for idx, (description, kwargs) in enumerate(_TEST_CASES_INPUT_TEXT, 1): - _LOG.info("Running test case %d: %s", idx, description) - output_file = os.path.join(scratch_dir, f"output_text_{idx}.txt") - # Extract input_text from kwargs. - kwargs_copy = kwargs.copy() - input_text = kwargs_copy.pop("input_text") - # Run test using apply_llm directly. - response = hllmcli.apply_llm( - input_text, - use_llm_executable=use_llm_executable, - **kwargs_copy, - ) - # Write output to file. - hio.to_file(output_file, response) - # Check that output file was created. - self.assertTrue(os.path.exists(output_file)) - # Check that output file is not empty. - output_content = hio.from_file(output_file) - self.assertGreater(len(output_content), 0) - - -# ############################################################################# -# Test_apply_llm_with_files1 -# ############################################################################# - - -@pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", -) -class Test_apply_llm_with_files1(TestApplyLlmBase): - """ - Test apply_llm_with_files using both library and executable interfaces. - - Tests run various command-line configurations to ensure they execute - without errors. Does not verify output correctness. - """ - - def test_library(self) -> None: - """ - Test multiple command-line configurations using library interface. - - Tests various command-line argument combinations to ensure they - execute without errors. Does not verify output correctness. - """ - self._run_test_cases(use_llm_executable=False) - - @pytest.mark.skipif( - not hllmcli._check_llm_executable(), reason="llm executable not found" - ) - def test_executable(self) -> None: - """ - Test multiple command-line configurations using executable interface. - - Tests various command-line argument combinations to ensure they - execute without errors. Does not verify output correctness. - """ - self._run_test_cases(use_llm_executable=True) - - -# ############################################################################# -# Test_apply_llm_with_files2 -# ############################################################################# - - -@pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", -) -class Test_apply_llm_with_files2(TestApplyLlmBase): - def test1_library(self) -> None: - """ - Test input_text parameter using library interface. - - Tests that input_text parameter works correctly when text is provided - directly instead of from a file. Does not verify output correctness. - """ - self._run_test_cases_input_text(use_llm_executable=False) - - @pytest.mark.skipif( - not hllmcli._check_llm_executable(), reason="llm executable not found" - ) - def test1_executable(self) -> None: - """ - Test input_text parameter using executable interface. - - Tests that input_text parameter works correctly when text is provided - directly instead of from a file. Does not verify output correctness. - """ - self._run_test_cases_input_text(use_llm_executable=True) - - # ////////////////////////////////////////////////////////////////////////// - - def _run_test_cases_print_only(self, use_llm_executable: bool) -> None: - """ - Helper method to run print_only test cases with specified interface. - - :param use_llm_executable: if True, use CLI executable; if False, use library - """ - # Run each test case. - for idx, (description, kwargs) in enumerate(_TEST_CASES_PRINT_ONLY, 1): - _LOG.info("Running test case %d: %s", idx, description) - # Extract parameters from kwargs. - kwargs_copy = kwargs.copy() - input_text = kwargs_copy.pop("input_text") - kwargs_copy.pop("print_only") # Not needed for apply_llm - # Run test using apply_llm directly - this should print to stdout. - response = hllmcli.apply_llm( - input_text, - use_llm_executable=use_llm_executable, - **kwargs_copy, - ) - # Print response to stdout (simulating print_only behavior). - print(response) - - def test2_library(self) -> None: - """ - Test print_only parameter using library interface. - - Tests that print_only parameter works correctly when output should be - printed to screen instead of written to file. Does not verify output - correctness. - """ - self._run_test_cases_print_only(use_llm_executable=False) - - @pytest.mark.skipif( - not hllmcli._check_llm_executable(), reason="llm executable not found" - ) - def test2_executable(self) -> None: - """ - Test print_only parameter using executable interface. - - Tests that print_only parameter works correctly when output should be - printed to screen instead of written to file. Does not verify output - correctness. - """ - self._run_test_cases_print_only(use_llm_executable=True) - - -# ############################################################################# -# Test_llm1 -# ############################################################################# - - -@pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", -) -class Test_llm1(hunitest.TestCase): - """ - Test _llm() function with different models and prompt lengths. - - Tests verify that _llm() correctly processes prompts of varying lengths - across different models, and tracks timing and cost information. - """ - - @staticmethod - def get_short_prompt() -> str: - """ - Get a short test prompt. - - :return: short system prompt string - """ - prompt = "You are a helpful assistant. Answer concisely." - return prompt - - @staticmethod - def get_medium_prompt() -> str: - """ - Get a medium-length test prompt. - - :return: medium-length system prompt string - """ - prompt = """ - You are a helpful assistant. Your task is to provide clear and - accurate answers to questions. Always be concise but thorough in - your explanations. If you don't know something, acknowledge it. - Use simple language that anyone can understand. - """ - prompt = hprint.dedent(prompt) - return prompt - - @staticmethod - def get_long_prompt() -> str: - """ - Get a long test prompt. - - :return: long system prompt string - """ - prompt = """ - You are a highly knowledgeable AI assistant with expertise across - multiple domains including technology, science, mathematics, and - general knowledge. Your primary objectives are: - - 1. Provide accurate and well-researched information - 2. Explain concepts clearly and thoroughly - 3. Use examples when they help clarify complex topics - 4. Cite sources or acknowledge uncertainty when appropriate - 5. Adapt your language to the user's level of understanding - 6. Break down complex problems into manageable steps - 7. Verify calculations and logical reasoning before responding - 8. Consider multiple perspectives when discussing controversial topics - - When answering questions: - - Start with a direct answer to the question - - Follow with supporting details and context - - Use bullet points or numbered lists for clarity - - Provide examples when helpful - - Suggest follow-up resources if relevant - - Always maintain a professional, helpful, and respectful tone. - """ - prompt = hprint.dedent(prompt) - return prompt - - def test1(self) -> None: - """ - Test _llm() with multiple models and prompt lengths. - - Tests short, medium, and long prompts across different models to - verify proper handling and cost calculation. Reports results in a - comprehensive table with time, cost, and cost-per-character metrics. - """ - hcacsimp.set_cache_property("_test_llm", "mode", "DISABLE_CACHE") - # Define test configurations with model-specific inputs. - # Questions are designed to elicit longer responses for more accurate cost - # comparisons. - test_configs = [ - ( - "gpt-5-nano", - "Explain the concept of machine learning and provide examples of its applications in real-world scenarios.", - ), - ( - "gpt-4o-mini", - "Describe the history and culture of Paris, France, including its major landmarks and contributions to art and literature.", - ), - ( - "gpt-4o", - "Explain what recursion is in computer science, provide multiple examples with code, and discuss when to use recursion versus iteration.", - ), - ] - # Store results for tabular reporting. - results = [] - # Run tests for each model and prompt type combination. - for model, input_str in test_configs: - for prompt_type, prompt_getter in [ - ("short", self.get_short_prompt), - ("medium", self.get_medium_prompt), - ("long", self.get_long_prompt), - ]: - _LOG.info("Testing model=%s with %s prompt", model, prompt_type) - system_prompt = prompt_getter() - # Run test. - start_time = time.time() - response, cost = hllmcli._llm(system_prompt, input_str, model) - elapsed_time = time.time() - start_time - # Check outputs. - self.assertIsInstance(response, str) - self.assertGreater(len(response), 0) - self.assertIsInstance(cost, float) - self.assertGreaterEqual(cost, 0.0) - # Calculate cost per character and cost per 1M characters. - response_len = len(response) - cost_per_char = cost / response_len if response_len > 0 else 0.0 - cost_per_1m_chars = ( - cost_per_char * 1_000_000 if response_len > 0 else 0.0 - ) - # Store results. - results.append( - { - "Model": model, - "Prompt Type": prompt_type, - "Time (s)": elapsed_time, - "Cost ($)": cost, - "Response Length": response_len, - "Cost/Char ($)": cost_per_char, - "Cost/1M Chars ($)": cost_per_1m_chars, - } - ) - # Create DataFrame for tabular display. - results_df = pd.DataFrame(results) - # Format numeric columns. - results_df["Time (s)"] = results_df["Time (s)"].round(2) - results_df["Cost ($)"] = results_df["Cost ($)"].round(6) - results_df["Cost/Char ($)"] = results_df["Cost/Char ($)"].round(8) - results_df["Cost/1M Chars ($)"] = results_df["Cost/1M Chars ($)"].round( - 2 - ) - # Log results table. - _LOG.info("\n%s", hprint.frame("LLM Test Results")) - with pd.option_context( - "display.max_columns", - None, - "display.max_rows", - None, - "display.width", - None, - "display.max_colwidth", - None, - ): - _LOG.info("\n%s", results_df.to_string(index=False)) - - -# ############################################################################# -# Test_apply_llm_batch1 -# ############################################################################# - - -def _eval_functor(input_str: str, *, delay: float = 0.0) -> str: - """ - Evaluate the input string using eval and return the result as a string. - - :param input_str: mathematical expression to evaluate - :return: result of evaluation as a string - """ - _LOG.debug("input_str='%s'", input_str) - if delay > 0.0: - time.sleep(delay) - result = eval(input_str) - result_str = str(result) - _LOG.debug("-> result_str='%s'", result_str) - return result_str - - -# ############################################################################# -# Test_apply_llm_batch1 -# ############################################################################# - - -class Test_apply_llm_batch1(hunitest.TestCase): - """ - Test and compare three batch processing approaches. - - Tests: - - apply_llm_batch_individual() - - apply_llm_batch_with_shared_prompt() - - apply_llm_batch_combined() - to verify they return consistent results using a testing functor that uses - eval. - """ - - @staticmethod - def get_test_prompt() -> str: - """ - Get a simple test prompt for batch processing. - - :return: system prompt string - """ - prompt = "You are a calculator. Return only the numeric result." - return prompt - - def helper( - self, - model: str, - func: Callable, - testing_functor: Optional[Callable[[str], str]], - ) -> None: - """ - Helper function to run a batch processing function with test inputs. - - :param func: batch processing function to test - :param testing_functor: optional testing functor for mocking - """ - _LOG.trace(hprint.to_str("model func testing_functor")) - # Create test inputs. - prompt = self.get_test_prompt() - input_list = ["2 + 2", "3 * 3", "10 - 5", "20 / 4"] - expected_responses = ["4", "9", "5", "5"] - # Run the function. - responses, cost = func( - prompt=prompt, - input_list=input_list, - model=model, - testing_functor=testing_functor, - ) - # Check basic properties. - responses = [str(int(float(r))) for r in responses] - self.assertEqual(responses, expected_responses) - if testing_functor is None: - self.assertGreater(cost, 0.0) - else: - self.assertEqual(cost, 0.0) - - @pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", - ) - def test_individual1(self) -> None: - """ - Test apply_llm_batch_individual without testing_functor. - - This test uses the real LLM API. - """ - model = "gpt-5-nano" - func = hllmcli.apply_llm_batch_individual - testing_functor = None - self.helper( - model, - func, - testing_functor, - ) - - def test_individual2(self) -> None: - """ - Test apply_llm_batch_individual with testing_functor. - - This test uses a mock calculator instead of the real LLM API. - """ - model = "" - func = hllmcli.apply_llm_batch_individual - testing_functor = _eval_functor - self.helper( - model, - func, - testing_functor, - ) - - @pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", - ) - def test_shared1(self) -> None: - """ - Test apply_llm_batch_with_shared_prompt without testing_functor. - - This test uses the real LLM API. - """ - model = "gpt-5-nano" - func = hllmcli.apply_llm_batch_with_shared_prompt - testing_functor = None - self.helper( - model, - func, - testing_functor, - ) - - def test_shared2(self) -> None: - """ - Test apply_llm_batch_with_shared_prompt with testing_functor. - - This test uses a mock calculator instead of the real LLM API. - """ - model = "" - func = hllmcli.apply_llm_batch_with_shared_prompt - testing_functor = _eval_functor - self.helper( - model, - func, - testing_functor, - ) - - @pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", - ) - def test_combined1(self) -> None: - """ - Test apply_llm_batch_combined without testing_functor. - - This test uses the real LLM API. - """ - model = "gpt-5-nano" - # model = "gpt-4o-mini" - func = hllmcli.apply_llm_batch_combined - testing_functor = None - self.helper( - model, - func, - testing_functor, - ) - - def test_combined2(self) -> None: - """ - Test apply_llm_batch_combined with testing_functor. - - This test uses a mock calculator instead of the real LLM API. - """ - model = "" - func = hllmcli.apply_llm_batch_combined - testing_functor = _eval_functor - self.helper( - model, - func, - testing_functor, - ) - - -# ############################################################################# -# Test_apply_llm_prompt_to_df1 -# ############################################################################# - - -class Test_apply_llm_prompt_to_df1(hunitest.TestCase): - """ - Test apply_llm_prompt_to_df with testing_functor. - - This is used to test the logic around `apply_llm_batch_*()` functions. - """ - - @staticmethod - def _extract_expression(obj) -> str: - """ - Extract mathematical expression from a DataFrame row or string. - - :param obj: either a string or a pandas Series - :return: extracted string for evaluation - """ - if isinstance(obj, pd.Series): - # Extract from DataFrame row. - if "expression" in obj.index: - expr = obj["expression"] - # Handle None, NaN, or empty string. - if pd.isna(expr) or expr == "": - return "" - return str(expr) - return "" - else: - # Already a string. - if pd.isna(obj) or obj == "": - return "" - return str(obj) - - def helper( - self, - df: pd.DataFrame, - batch_size: int, - expected_df: pd.DataFrame, - expected_stats: Dict[str, int], - ) -> None: - """ - Test apply_llm_prompt_to_df with testing_functor that uses eval. - """ - # Prepare inputs. - prompt = "Dummy" - extractor = self._extract_expression - # To test the progress bar. - # delay = 0.5 - delay = 0.0 - testing_functor = lambda input_str: _eval_functor(input_str, delay=delay) - # Run test. - result_df, stats = hllmcli.apply_llm_prompt_to_df( - prompt=prompt, - df=df, - extractor=extractor, - target_col="result", - batch_mode="individual", - batch_size=batch_size, - model="gpt-5-nano", - testing_functor=testing_functor, - use_sys_stderr=True, - ) - # Check outputs. - self.assert_equal(str(result_df), str(expected_df)) - elapsed_time = stats.pop("elapsed_time_in_seconds") - self.assertGreater(elapsed_time, 0.0) - self.assertEqual(stats, expected_stats) - - def helper_test1(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with testing_functor that uses eval. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": ["2 + 3", "10 * 5", "100 - 25", "15 / 3"], - } - ) - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": ["2 + 3", "10 * 5", "100 - 25", "15 / 3"], - "result": ["5", "50", "75", "5.0"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 0, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - def helper_test2(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with larger dataframe and batch_size > 1. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": [ - "1 + 1", - "2 * 3", - "10 - 5", - "20 / 4", - "3 ** 2", - "100 // 3", - "15 % 4", - ], - } - ) - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": [ - "1 + 1", - "2 * 3", - "10 - 5", - "20 / 4", - "3 ** 2", - "100 // 3", - "15 % 4", - ], - "result": ["2", "6", "5", "5.0", "9", "33", "3"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 0, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - def helper_test3(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with pre-filled target column values. - - This test verifies that all rows are processed and pre-filled values - are overwritten with computed results from the testing_functor. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": [ - "5 + 5", - "3 * 4", - "20 - 8", - "16 / 2", - "2 ** 3", - ], - } - ) - # Pre-fill some values in the target column. - df["result"] = [None, "12", None, None, "8"] - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": [ - "5 + 5", - "3 * 4", - "20 - 8", - "16 / 2", - "2 ** 3", - ], - "result": ["10", "12", "12", "8.0", "8"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 0, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - def helper_test4(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with rows that have empty extraction results. - - This test verifies that rows with empty or None expressions are skipped - and marked with empty string in the result column. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": ["5 + 5", "", "10 + 10", None, "15 + 15"], - } - ) - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": ["5 + 5", "", "10 + 10", None, "15 + 15"], - "result": ["10", "", "20", "", "30"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 2, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - def helper_test5(self, batch_size: int) -> None: - """ - Test apply_llm_prompt_to_df with batch where all items have missing data. - - This test verifies that batches with all empty/None items are skipped - entirely and the else branch is executed. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "expression": ["1 + 1", "", None, "", "5 + 5"], - } - ) - # Prepare outputs. - expected_df = pd.DataFrame( - { - "expression": ["1 + 1", "", None, "", "5 + 5"], - "result": ["2", "", "", "", "10"], - } - ) - num_items = len(df) - expected_stats = { - "num_items": num_items, - "num_skipped": 3, - "num_batches": (num_items + batch_size - 1) // batch_size, - "total_cost_in_dollars": 0.0, - } - # Run test. - self.helper(df, batch_size, expected_df, expected_stats) - - # batch_size=1 - - def test1_num_batch1(self) -> None: - self.helper_test1(batch_size=1) - - def test2_num_batch1(self) -> None: - self.helper_test2(batch_size=1) - - def test3_num_batch1(self) -> None: - self.helper_test3(batch_size=1) - - def test4_num_batch1(self) -> None: - self.helper_test4(batch_size=1) - - def test5_num_batch1(self) -> None: - self.helper_test5(batch_size=1) - - # batch_size=2 - - def test1_num_batch2(self) -> None: - self.helper_test1(batch_size=2) - - def test2_num_batch2(self) -> None: - self.helper_test2(batch_size=2) - - def test3_num_batch2(self) -> None: - self.helper_test3(batch_size=2) - - def test4_num_batch2(self) -> None: - self.helper_test4(batch_size=2) - - def test5_num_batch2(self) -> None: - self.helper_test5(batch_size=2) - - # batch_size=3 - - def test1_num_batch3(self) -> None: - self.helper_test1(batch_size=3) - - def test2_num_batch3(self) -> None: - self.helper_test2(batch_size=3) - - def test3_num_batch3(self) -> None: - self.helper_test3(batch_size=3) - - def test4_num_batch3(self) -> None: - self.helper_test4(batch_size=3) - - def test5_num_batch3(self) -> None: - self.helper_test5(batch_size=3) - - # batch_size=10 - - def test1_num_batch10(self) -> None: - self.helper_test1(batch_size=10) - - def test2_num_batch10(self) -> None: - self.helper_test2(batch_size=10) - - def test3_num_batch10(self) -> None: - self.helper_test3(batch_size=10) - - def test4_num_batch10(self) -> None: - self.helper_test4(batch_size=10) - - def test5_num_batch10(self) -> None: - self.helper_test5(batch_size=10) - - -# ############################################################################# -# Test_apply_llm_prompt_to_df2 -# ############################################################################# - - -# TODO(gp): Convert this into a unit test for apply_llm_prompt. -class Test_apply_llm_prompt_to_df2(_BaseCacheTest): - """ - Test apply_llm_prompt_to_df with mocked cache. - """ - - @staticmethod - def get_test_prompt() -> str: - """ - Get a simple test prompt for LLM. - - This prompt asks the LLM to sum two numbers, providing a simple - and predictable test case. - - :return: system prompt string - """ - prompt = """ - You are a calculator. Given input in the format "a + b", return only - the sum as a number. - - Return ONLY the numeric result, nothing else. - """ - prompt = hprint.dedent(prompt) - return prompt - - @staticmethod - def extract_test_fields(obj) -> str: - """ - Extract test fields from a DataFrame row or string. - - :param obj: either a string or a pandas Series - :return: extracted string for LLM processing - """ - if isinstance(obj, pd.Series): - # Extract from DataFrame row. - if "num1" in obj.index and "num2" in obj.index: - num1 = obj["num1"] - num2 = obj["num2"] - return f"{num1} + {num2}" - return "" - else: - # Already a string. - return obj - - def create_test_df(self) -> pd.DataFrame: - """ - Create a minimal DataFrame with test data (2 rows). - """ - df = pd.DataFrame( - { - "num1": [2, 10], - "num2": [3, 15], - } - ) - return df - - def run_cached_apply_llm_prompt_to_df(self) -> None: - prompt = self.get_test_prompt() - df = self.create_test_df() - prompt = self.get_test_prompt() - extractor = self.extract_test_fields - result_df, _ = hllmcli.apply_llm_prompt_to_df( - prompt=prompt, - df=df, - extractor=extractor, - target_col="sum", - batch_mode="individual", - model="gpt-5-nano", - batch_size=10, - use_sys_stderr=True, - ) - _LOG.debug("result_df=%s", result_df) - # Check outputs. - expected_df = pd.DataFrame( - { - "num1": [2, 10], - "num2": [3, 15], - "sum": ["5", "25"], - } - ) - self.assert_equal(str(result_df), str(expected_df)) - - @pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", - ) - def test1(self) -> None: - """ - Warm up cache by calling apply_llm and save cache to file. - - This test creates a cache by calling apply_llm with test data, - then saves the cache to a file for use in subsequent tests. - """ - # Create a file with the cache content for test2 in the input directory. - input_dir = self.get_input_dir( - test_class_name=self.__class__.__name__, - test_method_name="test2", - ) - hcacsimp.set_cache_dir(input_dir) - # Call apply_llm to warm up the cache for both inputs. - self.run_cached_apply_llm_prompt_to_df() - # Flush the cache to disk to ensure it's saved. - hcacsimp.flush_cache_to_disk("_llm") - func_cache_data = hcacsimp.get_disk_cache("_llm") - # Check that the cache file exists and is not empty. - hcacsimp.sanity_check_function_cache( - func_cache_data, assert_on_empty=True - ) - - def test2(self) -> None: - """ - Test apply_llm_prompt_to_df with mocked cache. - - This test - - loads the cache file created in test1 - - mocks the cache with the data from the cache file - - verifies that apply_llm_prompt_to_df uses the cached values without - hitting the LLM API. - """ - # Prepare inputs. - # # Set up temporary cache directory. - scratch_dir = self.get_scratch_space() - hcacsimp.set_cache_dir(scratch_dir) - # Load the saved cache file from test2's input directory. - input_dir = self.get_input_dir() - # Load the cache data from the cache file. - cache_file = os.path.join(input_dir, "tmp.cache_simple._llm.json") - _LOG.debug("cache_file=%s", cache_file) - func_cache_data = hcacsimp._load_func_cache_data_from_file( - cache_file, "json" - ) - _LOG.debug("func_cache_data=%s", func_cache_data) - hcacsimp.sanity_check_function_cache( - func_cache_data, assert_on_empty=True - ) - _LOG.debug("Loaded func_cache_data=\n%s", func_cache_data) - hcacsimp.mock_cache_from_disk("_llm", func_cache_data) - try: - # Set abort_on_cache_miss to ensure we don't hit the LLM API. - hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", True) - # Run apply_llm_prompt_to_df with mocked cache. - self.run_cached_apply_llm_prompt_to_df() - finally: - # Reset the cache property. - hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", False) - - def test3(self) -> None: - """ - Test apply_llm_prompt_to_df without mocked cache. - - This test verifies that apply_llm_prompt_to_df raises an error when the - cache is missed and abort_on_cache_miss=True. - """ - # Set up temporary cache directory. - scratch_dir = self.get_scratch_space() - hcacsimp.set_cache_dir(scratch_dir) - try: - # Set abort_on_cache_miss to ensure we don't hit the LLM API. - hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", True) - with self.assertRaises(ValueError) as fail: - # Run apply_llm_prompt_to_df without mocked cache. - self.run_cached_apply_llm_prompt_to_df() - self.assertIn("Cache miss", str(fail.exception)) - finally: - # Reset the cache property. - hcacsimp.set_cache_property("_llm", "abort_on_cache_miss", False) - - -# ############################################################################# -# Test_apply_llm_batch_cost_comparison -# ############################################################################# - - -@pytest.mark.skipif( - not _RUN_REAL_LLM, - reason="Real LLM not enabled", -) -class Test_apply_llm_batch_cost_comparison(hunitest.TestCase): - """ - Test and compare costs of different batch processing approaches. - - Tests both direct batch function calls and apply_llm_prompt_to_df with - different batch modes. - """ - - @staticmethod - def get_person_industry_prompt() -> str: - """ - Get the industry classification prompt for testing. - - :return: system prompt string - """ - prompt = """ - Given the following list of industries with examples, classify the text into the - corresponding industry: - - Industrial & Built Environment - - Transportation & Logistics - - Consumer & Retail - - Technology & Digital Services - - Health & Life Sciences - - Finance & Professional Services - - Public & Social Sector - - Media, Marketing & Experiences - - You MUST report the industry exactly as one of the options above. Do not - include any other text. - If you are not sure about the industry, return "unknown". - """ - prompt = hprint.dedent(prompt) - return prompt - - @staticmethod - def get_test_industries() -> list: - """ - Get a list of test company descriptions for industry classification. - - :return: list of company descriptions - """ - industries = [ - "A company that sells fresh produce and operates farms", - "A car manufacturer that produces electric vehicles", - "A construction company specializing in residential buildings", - "A company that manufactures consumer electronics and appliances", - "An online learning platform providing courses for students", - "An electric utility company providing power generation services", - "A civil engineering firm providing infrastructure design", - "A company organizing corporate events and conferences", - "A bank providing retail banking and investment services", - "A nonprofit organization focused on environmental conservation", - "A hospital providing emergency and surgical medical services", - "A staffing agency providing recruitment and temp worker services", - "A data center company providing server hardware and infrastructure", - "A software development company creating enterprise resource planning systems", - "A cybersecurity firm providing threat detection and penetration testing", - "A cloud infrastructure provider offering scalable computing resources", - "An IT company providing network management and server maintenance", - "A consulting firm helping businesses integrate SAP and Oracle systems", - "A help desk company providing 24/7 technical support services", - "A data analytics company building business intelligence dashboards", - "A DevOps company providing CI/CD pipeline automation tools", - "A law firm specializing in corporate mergers and acquisitions", - "A shipping company providing international freight and logistics", - "A factory manufacturing industrial machinery and equipment", - "An advertising agency creating brand campaigns for consumer products", - "A streaming service providing movies and TV shows online", - "A pharmaceutical company developing new drugs and vaccines", - "A commercial real estate firm managing office building portfolios", - "An online retailer selling clothing and accessories through eCommerce", - "A sports equipment manufacturer producing gear for athletes", - "A telecommunications company providing mobile and internet services", - "A hotel chain operating luxury resorts and vacation properties", - ] - return industries - - def helper(self, model: str, batch_size: int) -> None: - """ - Compare costs and time of different batch modes in apply_llm_prompt_to_df. - - This test compares the performance of three batch modes: - 1. individual: processes each query separately - 2. shared_prompt: uses shared prompt context - 3. combined: combines all queries into single API call - """ - # Reset cache before each batch mode to ensure fair comparison. - hcacsimp.set_cache_dir(self.get_scratch_space()) - _LOG.info("Cache directory: %s", hcacsimp.get_cache_dir()) - hcacsimp.reset_cache("", interactive=False) - # Prepare inputs. - prompt = self.get_person_industry_prompt() - industries = self.get_test_industries() - testing_functor = None - # Create DataFrame from test data. - df = pd.DataFrame({"description": industries}) - - # Extractor function to get text from DataFrame row. - def extractor(obj): - if isinstance(obj, pd.Series): - return obj["description"] - return str(obj) - - # Test each batch mode. - batch_modes = ["individual", "shared_prompt", "combined"] - results = [] - # Store result DataFrames to compare across batch modes. - result_dfs = {} - for batch_mode in batch_modes: - _LOG.info( - "\n%s", hprint.frame("Testing batch mode: %s" % batch_mode) - ) - # Create a copy of the DataFrame for this batch mode. - df_copy = df.copy() - # Call apply_llm_prompt_to_df with the current batch mode. - result_df, stats = hllmcli.apply_llm_prompt_to_df( - prompt=prompt, - df=df_copy, - extractor=extractor, - target_col="industry", - batch_mode=batch_mode, - model=model, - batch_size=batch_size, - testing_functor=testing_functor, - use_sys_stderr=True, - ) - # Get elapsed time from stats. - elapsed_time = stats["elapsed_time_in_seconds"] - # Print time and cost for this batch mode. - _LOG.info( - "Batch mode '%s': Time=%.2fs, Cost=$%.6f", - batch_mode, - elapsed_time, - stats["total_cost_in_dollars"], - ) - # Store results. - results.append( - { - "Batch Mode": batch_mode, - "Time (s)": elapsed_time, - "Num Items": stats["num_items"], - "Num Skipped": stats["num_skipped"], - "Num Batches": stats["num_batches"], - "Total Cost ($)": stats["total_cost_in_dollars"], - } - ) - # Store result DataFrame for comparison. - result_dfs[batch_mode] = result_df - # Verify results. - self.assertEqual(len(result_df), len(industries)) - self.assertIn("industry", result_df.columns) - # Check that all batch modes produce the same results. - # Compare each batch mode's results with the first batch mode. - first_batch_mode = batch_modes[0] - first_result_df = result_dfs[first_batch_mode]["industry"].reset_index( - drop=True - ) - for batch_mode in batch_modes[1:]: - compare_result_df = result_dfs[batch_mode]["industry"].reset_index( - drop=True - ) - # Create a comparison DataFrame between the two batch modes. - match_df = pd.DataFrame( - { - first_batch_mode: first_result_df, - batch_mode: compare_result_df, - } - ) - # Add a column with whether they match or not. - match_df["Match"] = ( - match_df[first_batch_mode] == match_df[batch_mode] - ) - all_match = match_df["Match"].all() - if not all_match: - _LOG.error( - "Results mismatch between '%s' and '%s':\n%s", - first_batch_mode, - batch_mode, - match_df, - ) - _LOG.info( - "Results match between '%s' and '%s'", - first_batch_mode, - batch_mode, - ) - # Create comparison DataFrame. - comparison_df = pd.DataFrame(results) - # Add relative metrics compared to individual mode. - individual_time = comparison_df.loc[ - comparison_df["Batch Mode"] == "individual", "Time (s)" - ].iloc[0] - individual_cost = comparison_df.loc[ - comparison_df["Batch Mode"] == "individual", "Total Cost ($)" - ].iloc[0] - comparison_df["Time Ratio"] = comparison_df["Time (s)"] / individual_time - comparison_df["Cost Ratio"] = ( - comparison_df["Total Cost ($)"] / individual_cost - ) - # Format the DataFrame for better readability. - comparison_df["Time (s)"] = comparison_df["Time (s)"].round(2) - comparison_df["Total Cost ($)"] = comparison_df["Total Cost ($)"].round( - 6 - ) - comparison_df["Time Ratio"] = comparison_df["Time Ratio"].round(2) - comparison_df["Cost Ratio"] = comparison_df["Cost Ratio"].round(2) - # Print comparison_df without truncation. - with pd.option_context( - "display.max_columns", - None, - "display.max_rows", - None, - "display.width", - None, - "display.max_colwidth", - None, - ): - _LOG.info("Batch mode comparison:\n%s", comparison_df) - - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 17.98 32 4 0.000653 1.00 1.00 - # shared_prompt 17.60 32 4 0.000998 0.98 1.53 - # combined 8.42 32 4 0.000330 0.47 0.51 - # - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 19.27 32 2 0.000651 1.00 1.00 - # shared_prompt 19.34 32 2 0.001385 1.00 2.13 - # combined 7.45 32 2 0.000277 0.39 0.43 - # - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 16.38 32 1 0.000651 1.00 1.00 - # shared_prompt 17.51 32 1 0.002148 1.07 3.30 - # combined 6.15 32 1 0.000251 0.38 0.39 - def test1(self) -> None: - model = "gpt-4o-mini" - batch_size = 8 - self.helper(model, batch_size) - # - batch_size = 16 - self.helper(model, batch_size) - # - batch_size = 32 - self.helper(model, batch_size) - - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 68.57 32 4 0.002711 1.00 1.00 - # shared_prompt 53.07 32 4 0.002638 0.77 0.97 - # combined 29.30 32 4 0.001654 0.43 0.61 - # - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 68.40 32 2 0.002788 1.00 1.00 - # shared_prompt 53.88 32 2 0.002809 0.79 1.01 - # combined 25.99 32 2 0.001643 0.38 0.59 - # - # Batch Mode Time (s) Num Items Num Batches Total Cost ($) Time Ratio Cost Ratio - # individual 59.38 32 1 0.002610 1.00 1.00 - # shared_prompt 52.61 32 1 0.002482 0.89 0.95 - # combined 15.79 32 1 0.001118 0.27 0.43 - def test2(self) -> None: - model = "gpt-5-nano" - batch_size = 8 - self.helper(model, batch_size) - # - batch_size = 16 - self.helper(model, batch_size) - # - batch_size = 32 - self.helper(model, batch_size) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py deleted file mode 100644 index a7e567679..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hlogging.py +++ /dev/null @@ -1,103 +0,0 @@ -import asyncio -import logging -from typing import Optional - -import helpers.hasyncio as hasynci -import helpers.hdatetime as hdateti -import helpers.hlogging as hloggin -import helpers.hunit_test as hunitest -import helpers.hwall_clock_time as hwacltim - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -# ############################################################################# -# Test_logging1 -# ############################################################################# - - -class Test_logging1(hunitest.TestCase): - def test_logging_levels1(self) -> None: - hloggin.test_logger() - - -# ############################################################################# - - -# ############################################################################# -# Test_hlogging_asyncio1 -# ############################################################################# - - -class Test_hlogging_asyncio1(hunitest.TestCase): - @staticmethod - async def workload(get_wall_clock_time: hdateti.GetWallClockTime) -> None: - """ - Coroutine simulating a workload waiting for 1s. - """ - # Set the coroutine name. - task = asyncio.current_task() - task.set_name("workload") - - def _print_time() -> None: - true_wall_clock_time = hdateti.get_current_time("ET") - _LOG.debug("wall_clock_time=%s", true_wall_clock_time) - event_loop_time = get_wall_clock_time() - _LOG.debug("event_loop_time=%s", event_loop_time) - - _print_time() - _LOG.debug(" -> wait") - await asyncio.sleep(1.0) - _print_time() - - def run_test( - self, - event_loop: Optional[asyncio.AbstractEventLoop], - get_wall_clock_time: hdateti.GetWallClockTime, - ) -> None: - coroutine = self.workload(get_wall_clock_time) - hasynci.run(coroutine, event_loop=event_loop) - - # pylint: disable=line-too-long - def test_real_time1(self) -> None: - """ - Use the logger. - - The output is like: - - ``` - 07:55:54 hunit_test.py setUp:932 Resetting random.seed to 20000101 - 07:55:54 hunit_test.py setUp:935 Resetting np.random.seed to 20000101 - 07:55:54 hunit_test.py setUp:944 base_dir_name=/app/amp/helpers/test - ``` - """ - # Use the wall clock time with no special event loop. - get_wall_clock_time = lambda: hdateti.get_current_time(tz="ET") - event_loop = None - # Run. - self.run_test(event_loop, get_wall_clock_time) - - # pylint: disable=line-too-long - def test_simulated_time1(self) -> None: - """ - Use the logger with event_loop and asyncio. - - The output is like: - - ``` - 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py _print_time:28 wall_clock_time=2022-01-18 07:52:55.337574-05:00 - 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py _print_time:30 event_loop_time=2022-01-18 07:52:55.310587-05:00 - 07:52:55 @ 2022-01-18 02:52:55 workload test_hlogging.py workload:33 -> wait - ``` - """ - with hasynci.solipsism_context() as event_loop: - # Use the simulate wall clock time. - get_wall_clock_time = lambda: hdateti.get_current_time( - tz="ET", event_loop=event_loop - ) - hwacltim.set_wall_clock_time(get_wall_clock_time) - # Run. - self.run_test(event_loop, get_wall_clock_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py deleted file mode 100644 index 2f1653c79..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_bullets.py +++ /dev/null @@ -1,716 +0,0 @@ -import logging -import os -from typing import List - -import helpers.hio as hio -import helpers.hmarkdown as hmarkdo -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_remove_bullets -# ############################################################################# - - -class Test_remove_bullets(hunitest.TestCase): - """ - Test the remove_bullets function. - """ - - def helper(self, text: str, expected: str) -> None: - """ - Helper to test remove_bullets function. - - :param text: Input text with bullets - :param expected: Expected output with bullets removed - """ - # Run test. - text = hprint.dedent(text) - actual = hmarkdo.remove_bullets(text) - # Check outputs. - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test basic bullet removal. - """ - # Prepare inputs. - text = """ - - First item - - Second item - - Third item - """ - # Prepare outputs. - expected = """ - First item - Second item - Third item - """ - # Run test. - self.helper(text, expected) - - def test2(self) -> None: - """ - Test nested bullets removal. - """ - # Prepare inputs. - text = """ - - First item - - Nested item - - Another nested - - Second item - """ - # Prepare outputs. - expected = """ - First item - Nested item - Another nested - Second item - """ - # Run test. - self.helper(text, expected) - - def test3(self) -> None: - """ - Test mixed content with bullets and non-bullets. - """ - # Prepare inputs. - text = """ - - Bullet item - Regular text line - - Another bullet - More regular text - """ - # Prepare outputs. - expected = """ - Bullet item - Regular text line - Another bullet - More regular text - """ - # Run test. - self.helper(text, expected) - - def test4(self) -> None: - """ - Test empty lines preservation. - """ - # Prepare inputs. - text = """ - - First item - - - Second item - - - Third item - """ - # Prepare outputs. - expected = """ - First item - - Second item - - Third item - """ - # Run test. - self.helper(text, expected) - - -# ############################################################################# -# Test_bold_first_level_bullets1 -# ############################################################################# - - -class Test_bold_first_level_bullets1(hunitest.TestCase): - def helper(self, text: str, expected: str) -> None: - """ - Helper to test bold_first_level_bullets function. - """ - text = hprint.dedent(text) - lines = text.split("\n") - actual_lines = hmarkdo.bold_first_level_bullets(lines) - actual = "\n".join(actual_lines) - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test basic first-level bullet bolding. - """ - text = r""" - - First item - - Sub item - - Second item - """ - expected = r""" - - **First item** - - Sub item - - **Second item** - """ - self.helper(text, expected) - - def test2(self) -> None: - """ - Test with mixed content including non-bullet text. - """ - text = r""" - Some text here - - First bullet - More text - - Second bullet - - Nested bullet - Final text - """ - expected = r""" - Some text here - - **First bullet** - More text - - **Second bullet** - - Nested bullet - Final text - """ - self.helper(text, expected) - - def test3(self) -> None: - """ - Test with multiple levels of nesting. - """ - text = r""" - - Top level - - Second level - - Third level - - Back to second - - Another top - """ - expected = r""" - - **Top level** - - Second level - - Third level - - Back to second - - **Another top** - """ - self.helper(text, expected) - - def test4(self) -> None: - """ - Test with empty lines between bullets. - """ - text = r""" - - First item - - - Second item - - Sub item - - - Third item - """ - expected = r""" - - **First item** - - - **Second item** - - Sub item - - - **Third item** - """ - self.helper(text, expected) - - def test5(self) -> None: - """ - Test with text that already contains some bold markers. - """ - text = r""" - - First **important** point - - Sub point - - Second point with emphasis - """ - expected = r""" - - First **important** point - - Sub point - - **Second point with emphasis** - """ - self.helper(text, expected) - - -# ############################################################################# -# Test_colorize_bold_text1 -# ############################################################################# - - -class Test_colorize_bold_text1(hunitest.TestCase): - def test1(self) -> None: - """ - Test basic case with single bold text. - """ - text = "This is **bold** text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"This is **\red{bold}** text" - self.assert_equal(actual, expected) - - def test2(self) -> None: - """ - Test multiple bold sections get different colors. - """ - text = "**First** normal **Second** text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"**\red{First}** normal **\teal{Second}** text" - self.assert_equal(actual, expected) - - def test3(self) -> None: - """ - Test underscore style bold text. - """ - text = "This is __bold__ text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"This is **\red{bold}** text" - self.assert_equal(actual, expected) - - def test4(self) -> None: - """ - Test text with no bold sections returns unchanged. - """ - text = "This is plain text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = "This is plain text" - self.assert_equal(actual, expected) - - def test5(self) -> None: - """ - Test mixed bold styles in same text. - """ - text = "**First** and __Second__ bold" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"**\red{First}** and **\teal{Second}** bold" - self.assert_equal(actual, expected) - - def test6(self) -> None: - """ - Test with abbreviations=False uses full \textcolor syntax. - """ - text = "This is **bold** text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=False - ) - expected = r"This is **\textcolor{red}{bold}** text" - self.assert_equal(actual, expected) - - def test7(self) -> None: - """ - Test with multiple bullet lists and different colors. - """ - text = """ - **List 1:** - - First item - - Second item - - **List 2:** - - Another item - - Final item - """ - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r""" - **\red{List 1:}** - - First item - - Second item - - **\teal{List 2:}** - - Another item - - Final item - """ - self.assert_equal(actual, expected) - - def test8(self) -> None: - text = hprint.dedent( - r""" - - **\red{Objective}** - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - - - **\orange{Key Components}** - - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - Utility update: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - - - **\blue{Learning Process}** - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - - - **\violet{Advantages}** - - More sample-efficient than direct utility estimation - - Leverages structure of the MDP to generalize better - - - **\pink{Challenges}** - - Requires accurate model estimation - - Computational cost of solving Bellman equations repeatedly - - - **\olive{Example}** - - A thermostat estimates room temperature dynamics and uses them to predict - comfort level under a fixed heating schedule - - - **\darkgray{Use Case}** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ - ) - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = hprint.dedent( - r""" - - **\red{Objective}** - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - - - **\orange{Key Components}** - - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - Utility update: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - - - **\olive{Learning Process}** - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - - - **\green{Advantages}** - - More sample-efficient than direct utility estimation - - Leverages structure of the MDP to generalize better - - - **\cyan{Challenges}** - - Requires accurate model estimation - - Computational cost of solving Bellman equations repeatedly - - - **\blue{Example}** - - A thermostat estimates room temperature dynamics and uses them to predict - comfort level under a fixed heating schedule - - - **\darkgray{Use Case}** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ - ) - self.assert_equal(actual, expected) - - def test9(self) -> None: - """ - Test basic case with single bold text. - """ - text = "**First** normal **Second** text" - actual = hmarkdo.colorize_bold_text( - text, color_sequence="equidistant", use_abbreviations=True - ) - expected = r"**\red{First}** normal **\teal{Second}** text" - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_format_first_level_bullets1 -# ############################################################################# - - -class Test_format_first_level_bullets1(hunitest.TestCase): - # TODO(ai): Rename -> helper - def format_and_compare_markdown(self, text: str, expected: str) -> None: - text = hprint.dedent(text) - expected = hprint.dedent(expected) - # - lines = text.split("\n") - actual_lines = hmarkdo.format_first_level_bullets(lines) - actual = "\n".join(actual_lines) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test basic case with single first level bullet. - """ - text = """ - Some text - - First bullet - More text""" - expected = """ - Some text - - - First bullet - More text""" - self.format_and_compare_markdown(text, expected) - - def test2(self) -> None: - """ - Test multiple first level bullets. - """ - text = """ - - First bullet - - Second bullet - - Third bullet""" - expected = """ - - First bullet - - - Second bullet - - - Third bullet""" - self.format_and_compare_markdown(text, expected) - - def test3(self) -> None: - """ - Test mixed first level and indented bullets. - """ - text = """ - - First level - - - Second level - - Another second - - Back to first""" - expected = """ - - First level - - Second level - - Another second - - - Back to first""" - self.format_and_compare_markdown(text, expected) - - def test4(self) -> None: - """ - Test mixed content with text and bullets. - """ - text = """ - Some initial text - - First bullet - Some text in between - - Second bullet - Final text""" - expected = """ - Some initial text - - - First bullet - Some text in between - - - Second bullet - Final text""" - self.format_and_compare_markdown(text, expected) - - def test5(self) -> None: - """ - Test nested bullets with multiple levels. - """ - text = """ - - Level 1 - - Level 2 - - Level 3 - - Another level 1 - - Level 2 again""" - expected = """ - - Level 1 - - Level 2 - - Level 3 - - - Another level 1 - - Level 2 again""" - self.format_and_compare_markdown(text, expected) - - def test6(self) -> None: - """ - Test empty lines handling. - """ - text = """ - - First bullet - - - Second bullet - - - Third bullet""" - expected = """ - - First bullet - - - Second bullet - - - Third bullet""" - self.format_and_compare_markdown(text, expected) - - def test7(self) -> None: - """ - Test mixed content with bullets and text. - """ - text = """ - Some text here - - First bullet - More text - - Second bullet - - Nested bullet - Final paragraph - - Last bullet""" - expected = """ - Some text here - - - First bullet - More text - - - Second bullet - - Nested bullet - Final paragraph - - - Last bullet""" - self.format_and_compare_markdown(text, expected) - - def test8(self) -> None: - """ - Test bullets with inline formatting. - """ - text = """ - - **Bold bullet** point - - *Italic nested* bullet - - `Code bullet` here - - **_Mixed_** formatting""" - expected = """ - - **Bold bullet** point - - *Italic nested* bullet - - - `Code bullet` here - - **_Mixed_** formatting""" - self.format_and_compare_markdown(text, expected) - - def test9(self) -> None: - """ - Test bullets with special characters. - """ - text = """ - - Bullet with (parentheses) - - Bullet with [brackets] - - Bullet with {braces} - - Bullet with $math$""" - expected = """ - - Bullet with (parentheses) - - Bullet with [brackets] - - - Bullet with {braces} - - Bullet with $math$""" - self.format_and_compare_markdown(text, expected) - - def test10(self) -> None: - text = hprint.dedent( - r""" - - **Objective** - - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - - - **Key Components** - - - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - **Utility update**: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - - - **Learning Process** - - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - - - **Use Case** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ - ) - expected = hprint.dedent( - r""" - - **Objective** - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - - - **Key Components** - - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - **Utility update**: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - - - **Learning Process** - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - - - **Use Case** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ - ) - self.format_and_compare_markdown(text, expected) - - -# ############################################################################# -# Test_process_lines1 -# ############################################################################# - - -class Test_process_lines1(hunitest.TestCase): - # TODO(gp): This doesn't seem correct. - def test1(self) -> None: - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - txt_in = hio.from_file(input_file_path) - txt_in = hprint.dedent(txt_in) - lines = txt_in.split("\n") - out = [] - for i, line in hmarkdo.process_lines(lines): - _LOG.debug(hprint.to_str("line")) - out.append(f"{i}:{line}") - actual = "\n".join(out) - self.check_string( - actual, dedent=True, remove_lead_trail_empty_lines=True - ) - - -# ############################################################################# -# Test_process_code_block1 -# ############################################################################# - - -class Test_process_code_block1(hunitest.TestCase): - def helper(self, txt: str) -> str: - out: List[str] = [] - in_code_block = False - lines = txt.split("\n") - for i, line in enumerate(lines): - _LOG.debug("%s:line=%s", i, line) - # Process the code block. - do_continue, in_code_block, out_tmp = hmarkdo.process_code_block( - line, in_code_block, i, lines - ) - out.extend(out_tmp) - if do_continue: - continue - # - out.append(line) - return "\n".join(out) - - def test1(self) -> None: - # Prepare inputs. - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - txt_in = hio.from_file(input_file_path) - txt_in = hprint.dedent(txt_in, remove_lead_trail_empty_lines_=True) - # Run function. - actual = self.helper(txt_in) - # Check output. - self.check_string( - actual, dedent=True, remove_lead_trail_empty_lines=True - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py deleted file mode 100644 index e33c04dc8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_coloring.py +++ /dev/null @@ -1,205 +0,0 @@ -import helpers.hmarkdown as hmarkdo -import helpers.hunit_test as hunitest - - -# ############################################################################# -# Test_process_color_commands1 -# ############################################################################# - - -class Test_process_color_commands1(hunitest.TestCase): - def test_text_content1(self) -> None: - """ - Test with plain text content. - """ - txt_in = r"\red{Hello world}" - expected = r"\textcolor{red}{\text{Hello world}}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - def test_math_content1(self) -> None: - """ - Test color command with mathematical content. - """ - txt_in = r"\blue{x + y = z}" - expected = r"\textcolor{blue}{x + y = z}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - def test_multiple_colors1(self) -> None: - """ - Test multiple color commands in the same line. - """ - txt_in = r"The \red{quick} \blue{fox} \green{jumps}" - expected = r"The \textcolor{red}{\text{quick}} \textcolor{blue}{\text{fox}} \textcolor{darkgreen}{\text{jumps}}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - def test_mixed_content1(self) -> None: - """ - Test color commands with both text and math content. - """ - txt_in = r"\red{Result: x^2 + y^2}" - expected = r"\textcolor{red}{Result: x^2 + y^2}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - def test_nested_braces1(self) -> None: - """ - Test color command with nested braces. - """ - txt_in = r"\blue{f(x) = {x + 1}}" - expected = r"\textcolor{blue}{f(x) = {x + 1}}" - actual = hmarkdo.process_color_commands(txt_in) - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_colorize_bullet_points_in_slide1 -# ############################################################################# - - -class Test_colorize_bullet_points_in_slide1(hunitest.TestCase): - def test1(self) -> None: - # Prepare inputs. - text = r""" - - **VC Theory** - - Measures model - - - **Bias-Variance Decomposition** - - Prediction error - - **Bias** - - **Variance** - - - **Computation Complexity** - - Balances model - - Related to - - E.g., Minimum - - - **Bayesian Approach** - - Treats ML as probability - - Combines prior knowledge with observed data to update belief about a model - - - **Problem in ML Theory:** - - Assumptions may not align with practical problems - """ - # Run function. - all_md_colors = [ - "red", - "orange", - "yellow", - "lime", - "green", - "teal", - "cyan", - "blue", - "purple", - "violet", - "magenta", - "pink", - "brown", - "olive", - "gray", - "darkgray", - "lightgray", - "black", - "white", - ] - - actual = hmarkdo.colorize_bullet_points_in_slide( - text, all_md_colors=all_md_colors - ) - # Check output. - expected = r""" - - **\red{VC Theory}** - - Measures model - - - **\orange{Bias-Variance Decomposition}** - - Prediction error - - **\yellow{Bias}** - - **\lime{Variance}** - - - **\green{Computation Complexity}** - - Balances model - - Related to - - E.g., Minimum - - - **\teal{Bayesian Approach}** - - Treats ML as probability - - Combines prior knowledge with observed data to update belief about a model - - - **\cyan{Problem in ML Theory:}** - - Assumptions may not align with practical problems - """ - self.assert_equal(actual, expected) - - def test2(self) -> None: - # Prepare inputs. - text = r""" - * Machine Learning Flow - - ::: columns - :::: {.column width=90%} - - Question - - E.g., "How can we predict house prices?" - - Input data - - E.g., historical data of house sales - - - _"If I were given one hour to save the planet, I would spend 59 minutes - defining the problem and one minute resolving it"_ (Albert Einstein) - - - **Not all phases are equally important!** - - Question $>$ Data $>$ Features $>$ Algorithm - - Clarity of the question impacts project success - - Quality and relevance of data are crucial for performance - - Proper feature selection simplifies the model and improves accuracy - - Algorithm is often less important (contrary to popular belief!) - :::: - :::: {.column width=5%} - - ```graphviz[height=90%] - digraph BayesianFlow { - rankdir=TD; - splines=true; - ... - } - ``` - :::: - ::: - """ - # Run function. - actual = hmarkdo.colorize_bullet_points_in_slide(text) - # Check output. - expected = r""" - * Machine Learning Flow - - ::: columns - :::: {.column width=90%} - - Question - - E.g., "How can we predict house prices?" - - Input data - - E.g., historical data of house sales - - - _"If I were given one hour to save the planet, I would spend 59 minutes - defining the problem and one minute resolving it"_ (Albert Einstein) - - - **\red{Not all phases are equally important!}** - - Question $>$ Data $>$ Features $>$ Algorithm - - Clarity of the question impacts project success - - Quality and relevance of data are crucial for performance - - Proper feature selection simplifies the model and improves accuracy - - Algorithm is often less important (contrary to popular belief!) - :::: - :::: {.column width=5%} - - ```graphviz[height=90%] - digraph BayesianFlow { - rankdir=TD; - splines=true; - ... - } - ``` - :::: - ::: - """ - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py deleted file mode 100644 index 8d47a3966..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_div_blocks.py +++ /dev/null @@ -1,355 +0,0 @@ -import logging -from typing import List, Tuple - -import helpers.hprint as hprint -import helpers.hmarkdown_div_blocks as hmadiblo -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def _prepare_div_block_inputs(txt: str, expected: str) -> Tuple[List[str], str]: - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=False) - if expected.startswith("\n"): - expected = expected[1:] - if expected.endswith("\n"): - expected = expected[:-1] - lines = txt.split("\n") - return lines, expected - - -# ############################################################################# -# Test_add_prettier_ignore_to_div_blocks -# ############################################################################# - - -class Test_add_prettier_ignore_to_div_blocks(hunitest.TestCase): - """ - Test the function to add prettier-ignore comments around div blocks. - """ - - def helper(self, txt: str, expected: str) -> None: - # Prepare inputs. - lines, expected = _prepare_div_block_inputs(txt, expected) - # Run test. - actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.assert_equal(actual, expected) - - def test_simple_div_block(self) -> None: - """ - Test a simple div block with two colons. - """ - txt = """ - :::: - ::: - """ - # Add a leading empty line in expected since function adds it. - expected = """ - - - :::: - ::: - - - """ - self.helper(txt, expected) - - def test_div_block_with_attributes(self) -> None: - """ - Test a div block with column attributes. - """ - txt = """ - :::: - ::::{.column width=40%} - """ - expected = """ - - - :::: - ::::{.column width=40%} - - - """ - self.helper(txt, expected) - - def test_multiple_div_blocks(self) -> None: - """ - Test multiple div blocks in the same content. - """ - txt = """ - Some text before - - :::: - ::::{.column width=40%} - - Middle text - - :::columns - ::::{.column width=60%} - - Some text after - """ - expected = """ - Some text before - - - - :::: - ::::{.column width=40%} - - - - Middle text - - - - :::columns - ::::{.column width=60%} - - - - Some text after - """ - self.helper(txt, expected) - - def test_no_div_blocks(self) -> None: - """ - Test content with no div blocks. - """ - txt = """ - Some normal text - with no div blocks - at all - """ - expected = """ - Some normal text - with no div blocks - at all - """ - self.helper(txt, expected) - - def test_unclosed_div_block(self) -> None: - """ - Test a div block that is not closed. - """ - txt = """ - Some text - - :::: - - More text - """ - expected = """ - Some text - - :::: - - More text - """ - self.helper(txt, expected) - - -# ############################################################################# -# Test_remove_prettier_ignore_from_div_blocks -# ############################################################################# - - -class Test_remove_prettier_ignore_from_div_blocks(hunitest.TestCase): - """ - Test the function to remove prettier-ignore comments from div blocks. - """ - - def helper(self, txt: str, expected: str) -> None: - # Prepare inputs. - lines, expected = _prepare_div_block_inputs(txt, expected) - # Run test. - actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.assert_equal(actual, expected) - - def test_remove_simple_block(self) -> None: - """ - Test removing prettier-ignore from a simple div block. - """ - txt = """ - - - :::: - ::: - - - """ - expected = """ - :::: - ::: - """ - self.helper(txt, expected) - - def test_remove_block_with_content(self) -> None: - """ - Test removing prettier-ignore from a div block with content. - """ - txt = """ - Some text before - - - :::: - ::::{.column width=40%} - - - Some text after - """ - expected = """ - Some text before - :::: - ::::{.column width=40%} - Some text after - """ - self.helper(txt, expected) - - def test_remove_multiple_blocks(self) -> None: - """ - Test removing prettier-ignore from multiple div blocks. - """ - txt = """ - Text before - - - :::: - ::::{.column width=40%} - - - Middle text - - - :::columns - ::::{.column width=60%} - - - Text after - """ - expected = """ - Text before - :::: - ::::{.column width=40%} - Middle text - :::columns - ::::{.column width=60%} - Text after - """ - self.helper(txt, expected) - - def test_no_prettier_ignore_comments(self) -> None: - """ - Test content with no prettier-ignore comments. - """ - txt = """ - Some normal text - with no prettier-ignore comments - at all - """ - expected = """ - Some normal text - with no prettier-ignore comments - at all - """ - self.helper(txt, expected) - - -# ############################################################################# -# Test_add_remove_prettier_ignore_roundtrip -# ############################################################################# - - -class Test_add_remove_prettier_ignore_roundtrip(hunitest.TestCase): - """ - Test that adding and removing prettier-ignore comments is a roundtrip. - """ - - def helper(self, txt: str) -> None: - # Prepare inputs. - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - # Add prettier-ignore comments. - lines_with_comments = hmadiblo.add_prettier_ignore_to_div_blocks(lines) - # Remove prettier-ignore comments. - lines_restored = hmadiblo.remove_prettier_ignore_from_div_blocks( - lines_with_comments - ) - actual = "\n".join(lines_restored) - expected = txt - # Check outputs. - self.assert_equal(actual, expected) - - def test_roundtrip_simple(self) -> None: - """ - Test that add and remove operations are inverses for simple div block. - """ - txt = """ - :::: - ::: - """ - self.helper(txt) - - def test_roundtrip_complex1(self) -> None: - """ - Test roundtrip for content with multiple div blocks and text. - """ - txt = """ - Text1 - - :::: - ::::{.column width=40%} - - Text2 - - :::columns - ::::{.column width=60%} - - Text3 - """ - self.helper(txt) - - def test_roundtrip_complex2(self) -> None: - """ - Test roundtrip for content with multiple div blocks and text. - """ - txt = """ - Text1 - ::: - ::::{.column width=40%} - Text2 - :::: - ::::{.column width=40%} - Text3 - :::columns - ::::{.column width=60%} - Text4 - """ - self.helper(txt) - - def test_roundtrip_complex3(self) -> None: - """ - Test roundtrip for content with multiple div blocks and text. - """ - txt = """ - Text1 - - ::: - ::::{.column width=40%} - - Text2 - :::: - ::::{.column width=40%} - - Text3 - :::columns - ::::{.column width=60%} - Text4 - """ - self.helper(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py deleted file mode 100644 index c8ccc96b8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_fenced_blocks.py +++ /dev/null @@ -1,218 +0,0 @@ -import logging -import pprint -from typing import Dict, List - -import helpers.hmarkdown as hmarkdo -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_replace_fenced_blocks_with_tags1 -# ############################################################################# - - -class Test_replace_fenced_blocks_with_tags1(hunitest.TestCase): - def helper( - self, text: str, expected_lines: List[str], expected_map: Dict[str, str] - ) -> None: - """ - Test replacing fenced code blocks with tags. - """ - lines = hprint.dedent(text, remove_lead_trail_empty_lines_=True) - lines = lines.split("\n") - # Call function. - actual_lines, fence_map = hmarkdo.replace_fenced_blocks_with_tags(lines) - # Check output. - fence_map_as_str = pprint.pformat(fence_map) - expected_map_as_str = pprint.pformat(expected_map) - self.assert_equal(fence_map_as_str, expected_map_as_str) - # - actual_lines = "\n".join(actual_lines) - expected_lines = hprint.dedent( - expected_lines, remove_lead_trail_empty_lines_=True - ) - self.assert_equal(actual_lines, expected_lines) - - def helper_round_trip(self, text: str) -> None: - """ - Test the round trip. - """ - # Do the round trip. - lines = text.split("\n") - actual_lines, fence_map = hmarkdo.replace_fenced_blocks_with_tags(lines) - act_text = hmarkdo.replace_tags_with_fenced_blocks( - actual_lines, fence_map - ) - # Check output. - act_text = "\n".join(act_text) - self.assert_equal(act_text, text) - - def test1(self) -> None: - """ - Test replacing fenced code blocks with tags. - """ - # Prepare inputs. - text = """ - Some text before - ```python - def foo(): - return 42 - ``` - Text between blocks - ```` - Plain code block - ```` - Some text after - """ - # Prepare outputs. - expected_lines = """ - Some text before - - Text between blocks - - Some text after - """ - # Check fence map. - expected_map = { - "1": "```python\ndef foo():\n return 42\n```", - "2": "````\nPlain code block\n````", - } - self.helper(text, expected_lines, expected_map) - - def test2(self) -> None: - """ - Test nested fenced blocks. - """ - text = """ - ```` - Outer block - ```python - def nested(): - pass - ``` - Still outer - ```` - """ - expected_lines = """ - - """ - expected_map = { - "1": "````\nOuter block\n```python\ndef nested():\n pass\n```\nStill outer\n````" - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test3(self) -> None: - """ - Test empty fenced blocks. - """ - text = """ - Before - ``` - ``` - After - ```python - ``` - End - """ - expected_lines = """ - Before - - After - - End - """ - expected_map = {"1": "```\n```", "2": "```python\n```"} - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test4(self) -> None: - """ - Test blocks with different fence lengths. - """ - text = """ - Start - ``` - Three - ``` - Middle - ````` - Five - ````` - End - """ - expected_lines = """ - Start - - Middle - - End - """ - expected_map = {"1": "```\nThree\n```", "2": "`````\nFive\n`````"} - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test5(self) -> None: - """ - Test blocks with language specifiers. - """ - text = """ - ```python - def foo(): pass - ``` - ```bash - echo hello - ``` - ```javascript - console.log('hi'); - ``` - """ - expected_lines = """ - - - - """ - expected_map = { - "1": "```python\ndef foo(): pass\n```", - "2": "```bash\necho hello\n```", - "3": "```javascript\nconsole.log('hi');\n```", - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test6(self) -> None: - """ - Test blocks with indentation. - """ - text = """ - Outside - ``` - Indented block - More indent - ``` - ```python - def foo(): - pass - ``` - End - """ - expected_lines = """ - Outside - - - End - """ - expected_map = { - "1": " ```\n Indented block\n More indent\n ```", - "2": " ```python\n def foo():\n pass\n ```", - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py deleted file mode 100644 index 91efef1f4..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_filtering.py +++ /dev/null @@ -1,449 +0,0 @@ -import logging - -import helpers.hmarkdown_filtering as hmarfilt -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_filter_by_header1 -# ############################################################################# - - -class Test_filter_by_header1(hunitest.TestCase): - def test_basic_header_extraction(self) -> None: - """ - Test basic header extraction functionality. - """ - # Prepare inputs. - test_content = """ - # Introduction - This is the introduction section. - Some content here. - - ## Section 1 - Content for section 1. - - # Conclusion - Final thoughts here. - """ - test_content = hprint.dedent( - test_content, remove_lead_trail_empty_lines_=False - ) - lines = test_content.split("\n") - # Run test. - result_lines = hmarfilt.filter_by_header(lines, "Introduction") - result_content = "\n".join(result_lines) - # Check outputs. - expected = """ - # Introduction - This is the introduction section. - Some content here. - - ## Section 1 - Content for section 1. - """ - self.assert_equal(result_content, expected, dedent=True) - - def test_header_not_found(self) -> None: - """ - Test behavior when header is not found. - """ - # Prepare inputs. - test_content = """ - # Introduction - This is the introduction section. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - # Check outputs. - with self.assertRaises(ValueError): - hmarfilt.filter_by_header(lines, "NonExistent") - - -# ############################################################################# -# Test_parse_range1 -# ############################################################################# - - -class Test_parse_range1(hunitest.TestCase): - def test_numeric_range(self) -> None: - """ - Test parsing numeric range (0-indexed). - """ - # Run test. - start, end = hmarfilt._parse_range("0:10", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 10) - - def test_none_start(self) -> None: - """ - Test range with None start (defaults to 0). - """ - # Run test. - start, end = hmarfilt._parse_range("None:10", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 10) - - def test_none_end(self) -> None: - """ - Test range with None end (defaults to max_value). - """ - # Run test. - start, end = hmarfilt._parse_range("0:None", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 20) - - def test_both_none(self) -> None: - """ - Test range with both None (0:max_value). - """ - # Run test. - start, end = hmarfilt._parse_range("None:None", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 20) - - def test_invalid_range(self) -> None: - """ - Test invalid range format. - """ - # Run test. - with self.assertRaises(AssertionError): - hmarfilt._parse_range("invalid", 20) - - def test_case_insensitive_none(self) -> None: - """ - Test case insensitive None parsing. - """ - # Run test. - start, end = hmarfilt._parse_range("NONE:none", 20) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 20) - - -# ############################################################################# -# Test_filter_by_lines1 -# ############################################################################# - - -class Test_filter_by_lines1(hunitest.TestCase): - def test_basic_line_filtering(self) -> None: - """ - Test basic line filtering functionality (0-indexed). - """ - # Prepare inputs. - test_content = """ - Line 1 - Line 2 - Line 3 - Line 4 - Line 5 - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (indices 1:3 = Line 2 and Line 3). - result_lines = hmarfilt.filter_by_lines(lines, "1:3") - result_content = "\n".join(result_lines) - # Check outputs. - expected = "Line 2\nLine 3" - self.assertEqual(result_content, expected) - - def test_line_filtering_with_none(self) -> None: - """ - Test line filtering with None start (defaults to 0). - """ - # Prepare inputs. - test_content = """ - Line 1 - Line 2 - Line 3 - Line 4 - Line 5 - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (None:2 = indices 0:2 = Line 1 and Line 2). - result_lines = hmarfilt.filter_by_lines(lines, "None:2") - result_content = "\n".join(result_lines) - # Check outputs. - expected = "Line 1\nLine 2" - self.assertEqual(result_content, expected) - - def test_line_filtering_to_end(self) -> None: - """ - Test line filtering from start to end. - """ - # Prepare inputs. - test_content = """ - Line 1 - Line 2 - Line 3 - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (1:None = indices 1:3 = Line 2 and Line 3). - result_lines = hmarfilt.filter_by_lines(lines, "1:None") - result_content = "\n".join(result_lines) - # Check outputs. - expected = "Line 2\nLine 3" - self.assertEqual(result_content, expected) - - def test_invalid_range_order(self) -> None: - """ - Test that start line <= end line is enforced. - """ - # Prepare inputs. - test_content = "Line 1\nLine 2\nLine 3" - lines = test_content.split("\n") - # Run test. - # Check outputs. - with self.assertRaises(AssertionError): - hmarfilt.filter_by_lines(lines, "2:1") - - -# ############################################################################# -# Test_filter_by_slides1 -# ############################################################################# - - -class Test_filter_by_slides1(hunitest.TestCase): - def test_basic_slide_filtering(self) -> None: - """ - Test basic slide filtering functionality. - """ - # Prepare inputs. - test_content = """ - # Header 1 - - - - - * Slide 1 - Content for slide 1. - - * Slide 2 - Content for slide 2. - - * Slide 3 - Content for slide 3. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - result_lines = hmarfilt.filter_by_slides(lines, "0:1") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("Slide 1", result_content) - self.assertNotIn("Slide 2", result_content) - - def test_slide_filtering_with_none_end(self) -> None: - """ - Test slide filtering to the end. - """ - # Prepare inputs. - test_content = """ - * Slide 1 - Content 1. - - * Slide 2 - Content 2. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - result_lines = hmarfilt.filter_by_slides(lines, "0:None") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("Slide 1", result_content) - self.assertIn("Slide 2", result_content) - - def test_slide_filtering_invalid_range(self) -> None: - """ - Test that invalid slide ranges raise errors. - """ - # Prepare inputs. - test_content = """ - * Slide 1 - Content 1. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - # Check outputs. - with self.assertRaises(AssertionError): - hmarfilt.filter_by_slides(lines, "1:0") - - def test_slide_filtering_beyond_slides(self) -> None: - """ - Test filtering with end beyond available slides. - """ - # Prepare inputs. - test_content = """ - * Slide 1 - Content 1. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - # Check outputs. - with self.assertRaises(AssertionError): - hmarfilt.filter_by_slides(lines, "0:5") - - def test_no_slides_content(self) -> None: - """ - Test behavior with content that has no slides. - """ - # Prepare inputs. - test_content = """ - # Header 1 - Just regular content without slides. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - # Check outputs (should fail validation since there are no slides). - with self.assertRaises(AssertionError): - hmarfilt.filter_by_slides(lines, "0:1") - - def test_slide_filtering_single_slide(self) -> None: - """ - Test filtering a single slide when there's only one slide (0-indexed). - """ - # Prepare inputs. - test_content = """ - * Only Slide - This is the only content. - Additional content after the slide. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (0:1 = only slide at index 0). - result_lines = hmarfilt.filter_by_slides(lines, "0:1") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("Only Slide", result_content) - self.assertIn("This is the only content.", result_content) - - def test_slide_end_boundary(self) -> None: - """ - Test filtering to the end of slides (0-indexed). - """ - # Prepare inputs. - test_content = """ - * Slide 1 - Content 1. - - * Slide 2 - Content 2. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (0:2 = slides 0 and 1). - result_lines = hmarfilt.filter_by_slides(lines, "0:2") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("Slide 1", result_content) - self.assertIn("Slide 2", result_content) - - -# ############################################################################# -# Test_additional_edge_cases1 -# ############################################################################# - - -class Test_additional_edge_cases1(hunitest.TestCase): - def test_filter_by_header_with_subsection(self) -> None: - """ - Test extracting a subsection header. - """ - # Prepare inputs. - test_content = """ - # Introduction - This is the introduction. - - ## Subsection 1 - Content for subsection 1. - - ## Subsection 2 - Content for subsection 2. - - # Conclusion - Final thoughts. - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test. - result_lines = hmarfilt.filter_by_header(lines, "Subsection 1") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertIn("## Subsection 1", result_content) - self.assertIn("Content for subsection 1.", result_content) - - def test_parse_range_edge_cases(self) -> None: - """ - Test edge cases for range parsing (0-indexed). - """ - # Run test. - start, end = hmarfilt._parse_range("0:0", 1) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 0) - # Run test. - start, end = hmarfilt._parse_range("None:None", 1000) - # Check outputs. - self.assertEqual(start, 0) - self.assertEqual(end, 1000) - - def test_filter_lines_single_line(self) -> None: - """ - Test filtering with empty range (0:0). - """ - # Prepare inputs. - test_content = "Single line content" - lines = test_content.split("\n") - # Run test (0:0 = empty range). - result_lines = hmarfilt.filter_by_lines(lines, "0:0") - result_content = "\n".join(result_lines) - # Check outputs. - self.assertEqual(result_content, "") - - def test_filter_lines_exact_range(self) -> None: - """ - Test filtering with exact boundaries (0-indexed). - """ - # Prepare inputs. - test_content = """ - Line 1 - Line 2 - Line 3 - """ - test_content = hprint.dedent(test_content) - lines = test_content.split("\n") - # Run test (0:2 = indices 0 and 1 = Line 1 and Line 2). - result_lines = hmarfilt.filter_by_lines(lines, "0:2") - result_content = "\n".join(result_lines) - # Check outputs. - expected = "Line 1\nLine 2" - self.assertEqual(result_content, expected) - - def test_parse_range_invalid_formats(self) -> None: - """ - Test various invalid range formats. - """ - # Run test. - with self.assertRaises(AssertionError): - hmarfilt._parse_range("5", 10) - # Run test. - with self.assertRaises(AssertionError): - hmarfilt._parse_range("", 10) - # Run test. - with self.assertRaises(ValueError): - hmarfilt._parse_range("1:2:3", 10) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py deleted file mode 100644 index abf2faf66..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_formatting.py +++ /dev/null @@ -1,1403 +0,0 @@ -import logging -import os - -import helpers.hio as hio -import helpers.hmarkdown_div_blocks as hmadiblo -import helpers.hmarkdown_formatting as hmarform -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_remove_end_of_line_periods1 -# ############################################################################# - - -class Test_remove_end_of_line_periods1(hunitest.TestCase): - def helper(self, input_text: str, expected_text: str) -> None: - # Prepare inputs. - input_text = hprint.dedent(input_text).strip() - expected_text = hprint.dedent(expected_text).strip() - lines = input_text.split("\n") - # Run test. - actual_lines = hmarform.remove_end_of_line_periods(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.assertEqual(actual, expected_text) - - def test_standard_case(self) -> None: - input_text = """ - Hello. - World. - This is a test. - """ - expected_text = """ - Hello - World - This is a test - """ - self.helper(input_text, expected_text) - - def test_no_periods(self) -> None: - input_text = """ - Hello - World - This is a test - """ - expected_text = """ - Hello - World - This is a test - """ - self.helper(input_text, expected_text) - - def test_multiple_periods(self) -> None: - input_text = """ - Line 1..... - Line 2..... - End. - """ - expected_text = """ - Line 1 - Line 2 - End - """ - self.helper(input_text, expected_text) - - def test_empty_string(self) -> None: - input_text = "" - expected_text = "" - self.helper(input_text, expected_text) - - def test_leading_and_trailing_periods(self) -> None: - input_text = """ - .Line 1. - .Line 2. - ..End.. - """ - expected_text = """ - .Line 1 - .Line 2 - ..End - """ - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_md_clean_up1 -# ############################################################################# - - -class Test_md_clean_up1(hunitest.TestCase): - def test1(self) -> None: - # Prepare inputs. - txt = r""" - **States**: - - \( S = \{\text{Sunny}, \text{Rainy}\} \) - **Observations**: - - \( O = \{\text{Yes}, \text{No}\} \) (umbrella) - - ### Initial Probabilities: - \[ - P(\text{Sunny}) = 0.6, \quad P(\text{Rainy}) = 0.4 - \] - - ### Transition Probabilities: - \[ - \begin{aligned} - P(\text{Sunny} \to \text{Sunny}) &= 0.7, \quad P(\text{Sunny} \to \text{Rainy}) = 0.3 \\ - P(\text{Rainy} \to \text{Sunny}) &= 0.4, \quad P(\text{Rainy} \to \text{Rainy}) = 0.6 - \end{aligned} - \] - - ### Observation (Emission) Probabilities: - \[ - \begin{aligned} - P(\text{Yes} \mid \text{Sunny}) &= 0.1, \quad P(\text{No} \mid \text{Sunny}) = 0.9 \\ - P(\text{Yes} \mid \text{Rainy}) &= 0.8, \quad P(\text{No} \mid \text{Rainy}) = 0.2 - \end{aligned} - \] - """ - txt = hprint.dedent(txt) - actual = hmarform.md_clean_up(txt) - actual = hprint.dedent(actual) - expected = r""" - **States**: - - $S = \{\text{Sunny}, \text{Rainy}\}$ - **Observations**: - - $O = \{\text{Yes}, \text{No}\}$ (umbrella) - - ### Initial Probabilities: - $$ - \Pr(\text{Sunny}) = 0.6, \quad \Pr(\text{Rainy}) = 0.4 - $$ - - ### Transition Probabilities: - $$ - \begin{aligned} - \Pr(\text{Sunny} \to \text{Sunny}) &= 0.7, \quad \Pr(\text{Sunny} \to \text{Rainy}) = 0.3 \\ - \Pr(\text{Rainy} \to \text{Sunny}) &= 0.4, \quad \Pr(\text{Rainy} \to \text{Rainy}) = 0.6 - \end{aligned} - $$ - - ### Observation (Emission) Probabilities: - $$ - \begin{aligned} - \Pr(\text{Yes} | \text{Sunny}) &= 0.1, \quad \Pr(\text{No} | \text{Sunny}) = 0.9 \\ - \Pr(\text{Yes} | \text{Rainy}) &= 0.8, \quad \Pr(\text{No} | \text{Rainy}) = 0.2 - \end{aligned} - $$""" - self.assert_equal(actual, expected, dedent=True) - - -# ############################################################################# -# Test_remove_code_delimiters1 -# ############################################################################# - - -class Test_remove_code_delimiters1(hunitest.TestCase): - def test1(self) -> None: - """ - Test a basic example. - """ - # Prepare inputs. - content = r""" - ```python - def hello_world(): - print("Hello, World!") - ``` - """ - content = hprint.dedent(content) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - def hello_world(): - print("Hello, World!") - """ - self.assert_equal(actual, expected, dedent=True) - - def test2(self) -> None: - """ - Test an example with empty lines at the start and end. - """ - # Prepare inputs. - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - content = hio.from_file(input_file_path) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - def check_empty_lines(): - print("Check empty lines are present!") - """ - self.assert_equal(actual, expected, dedent=True) - - def test3(self) -> None: - """ - Test a markdown with headings, Python and yaml blocks. - """ - # Prepare inputs. - content = r""" - # Section 1 - - This section contains comment and python code. - - > "Knowledge is like a tree, growing stronger with each branch of understanding." - - ```python - def greet(name): - return f"Hello, {name}!" - print(greet("World")) - ``` - - # Section 2 - - Key points below. - - - Case Study 1: Implementation in modern industry - - Case Study 2: Comparative analysis of traditional vs. modern methods - - ```yaml - future: - - AI integration - - Process optimization - - Sustainable solutions - ``` - """ - content = hprint.dedent(content) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - # Section 1 - - This section contains comment and python code. - - > "Knowledge is like a tree, growing stronger with each branch of understanding." - - - def greet(name): - return f"Hello, {name}!" - print(greet("World")) - - - # Section 2 - - Key points below. - - - Case Study 1: Implementation in modern industry - - Case Study 2: Comparative analysis of traditional vs. modern methods - - yaml - future: - - AI integration - - Process optimization - - Sustainable solutions - - """ - self.assert_equal(actual, expected, dedent=True) - - def test4(self) -> None: - """ - Test another markdown with headings and multiple indent Python blocks. - """ - # Prepare inputs. - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - content = hio.from_file(input_file_path) - content = hprint.dedent(content) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - self.check_string(actual, dedent=True) - - def test5(self) -> None: - """ - Test an empty string. - """ - # Prepare inputs. - content = "" - lines = content.split("\n") if content else [] - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = "" - self.assert_equal(actual, expected, dedent=True) - - def test6(self) -> None: - """ - Test a Python and immediate markdown code block. - """ - # Prepare inputs. - in_dir_name = self.get_input_dir() - input_file_path = os.path.join(in_dir_name, "test.txt") - content = hio.from_file(input_file_path) - lines = content.split("\n") - # Call function. - actual_lines = hmarform.remove_code_delimiters(lines) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - def no_start_python(): - print("No mention of python at the start") - - - - A markdown paragraph contains - delimiters that needs to be removed. - """ - self.assert_equal(actual, expected, dedent=True) - - -# ############################################################################# -# Test_format_markdown_slide -# ############################################################################# - - -class Test_format_markdown_slide(hunitest.TestCase): - def helper(self, input_text: str, expected_text: str) -> None: - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual = hmarform.format_markdown_slide(lines) - actual = "\n".join(actual) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - _LOG.debug("actual=\n%s", actual) - _LOG.debug("expected=\n%s", expected) - self.assert_equal(str(actual), str(expected)) - - def test1(self) -> None: - """ - Test formatting a simple slide with bullets. - """ - input_text = """ - * Slide title - - First bullet - - Second bullet - """ - expected_text = """ - * Slide Title - - - First bullet - - - Second bullet - """ - self.helper(input_text, expected_text) - - def test2(self) -> None: - """ - Test formatting multiple slides. - """ - input_text = """ - * First slide - - Point A - - Point B - * Second slide - - Point X - - Point Y - """ - expected_text = """ - * First Slide - - - Point A - - - Point B - * Second Slide - - - Point X - - - Point Y - """ - self.helper(input_text, expected_text) - - def test3(self) -> None: - """ - Test formatting slides with nested bullets. - """ - input_text = """ - * Main slide - - First level - - Nested point - - Another nested - - Second level - """ - expected_text = """ - * Main Slide - - - First level - - Nested point - - Another nested - - - Second level - """ - self.helper(input_text, expected_text) - - def test4(self) -> None: - """ - Test formatting empty input. - """ - # Prepare inputs. - input_text = """ - """ - # Check outputs. - expected_text = """ - """ - self.helper(input_text, expected_text) - - def test5(self) -> None: - """ - Test formatting slide title capitalization. - """ - input_text = """ - * mixed case slide title - - Point one - """ - expected_text = """ - * Mixed Case Slide Title - - - Point one - """ - self.helper(input_text, expected_text) - - def test6(self) -> None: - """ - Test formatting slide with only title, no bullet points. - """ - input_text = """ - * Solo slide title - """ - expected_text = """ - * Solo Slide Title - """ - self.helper(input_text, expected_text) - - def test7(self) -> None: - """ - Test formatting slide with deeply nested bullets. - """ - input_text = """ - * Main slide - - Level 1 - - Level 2 - - Level 3 - - Level 4 - - Back to level 1 - """ - expected_text = """ - * Main Slide - - - Level 1 - - Level 2 - - Level 3 - - Level 4 - - - Back to level 1 - """ - self.helper(input_text, expected_text) - - def test8(self) -> None: - """ - Test formatting slide with nested bullets and special formatting. - """ - input_text = r""" - * What Are Data Analytics? - - **Collections of data** - - - Aggregated, organized data sets for analysis - - - E.g., customer purchase histories in a CRM system - - **Dashboards** - - - Visual displays of key metrics for insights - - E.g., dashboard showing quarterly revenue, expenses - - - **Descriptive statistics** - - Summary metrics: mean, median, mode, standard deviation - - E.g., average sales per quarter to understand trends - - **Historical reports** - - - Examination of past performance - - E.g., monthly sales reports for past fiscal year - - **Models** - - Statistical representations to forecast, explain phenomena - - - E.g., predictive model to anticipate customer churn based on behavioral data - """ - expected_text = r""" - * What Are Data Analytics? - - - **Collections of data** - - Aggregated, organized data sets for analysis - - E.g., customer purchase histories in a CRM system - - - **Dashboards** - - Visual displays of key metrics for insights - - E.g., dashboard showing quarterly revenue, expenses - - - **Descriptive statistics** - - Summary metrics: mean, median, mode, standard deviation - - E.g., average sales per quarter to understand trends - - - **Historical reports** - - Examination of past performance - - E.g., monthly sales reports for past fiscal year - - - **Models** - - Statistical representations to forecast, explain phenomena - - E.g., predictive model to anticipate customer churn based on behavioral data - """ - self.helper(input_text, expected_text) - - def test9(self) -> None: - """ - This reproduces a broken behavior of prettier with fenced divs. - """ - input_text = r""" - * Incremental vs Iterative - ::: columns - :::: {.column width=55%} - - - **Incremental Development** - - Each increment adds functional components - - Require upfront planning to divide features meaningfully - - Integration of increments can be complex - - - **Iterative Development** - - Each increment delivers usable system - - Refine and improve product through repeated cycles - - Get feedback - - Uncover and adjust for unknown requirements - - - **Incremental $\gg$ Iterative** - - :::: - :::: {.column width=40%} - - ![](msml610/lectures_source/figures/Lesson02_Monalisa_incremental.png){width=90%} - - \small _Incremental - - \vspace{0.5cm} - - ![](msml610/lectures_source/figures/Lesson02_Monalisa_iterative.png){width=90%} - - \small _Iterative_ - - \vspace{0.5cm} - - ![](msml610/lectures_source/figures/Lesson02_Skateboard.png){width=90%} - - \small _Incremental vs Iterative_ - :::: - ::: - """ - expected_text = r""" - * Incremental vs Iterative - ::: columns - :::: {.column width=55%} - - - **Incremental Development** - - Each increment adds functional components - - Require upfront planning to divide features meaningfully - - Integration of increments can be complex - - - **Iterative Development** - - Each increment delivers usable system - - Refine and improve product through repeated cycles - - Get feedback - - Uncover and adjust for unknown requirements - - - **Incremental $\gg$ Iterative** - :::: - :::: {.column width=40%} - ![](msml610/lectures_source/figures/Lesson02_Monalisa_incremental.png){width=90%} - \small \_Incremental - \vspace{0.5cm} - ![](msml610/lectures_source/figures/Lesson02_Monalisa_iterative.png){width=90%} - \small _Iterative_ - \vspace{0.5cm} - ![](msml610/lectures_source/figures/Lesson02_Skateboard.png){width=90%} - \small _Incremental vs Iterative_ - :::: - ::: - """ - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_format_figures -# ############################################################################# - - -class Test_format_figures(hunitest.TestCase): - def helper(self, input_text: str, expected_text: str) -> None: - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual_lines = hmarform.format_figures(lines) - actual = "\n".join(actual_lines) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - self.assert_equal(actual, expected) - - def test_basic_text_with_figures(self) -> None: - """ - Test converting basic text with figures to column format. - """ - input_text = """ - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - Read / write small amounts of data frequently - - **Columnar DBs** - - E.g., Amazon Redshift, Snowflake - - Read / write large amounts of data infrequently - - Analytics requires a few columns - - Better data compression - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) - """ - expected_text = """ - ::: columns - :::: {.column width=65%} - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - Read / write small amounts of data frequently - - **Columnar DBs** - - E.g., Amazon Redshift, Snowflake - - Read / write large amounts of data infrequently - - Analytics requires a few columns - - Better data compression - :::: - :::: {.column width=40%} - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - def test_no_figures_no_change(self) -> None: - """ - Test that text without figures remains unchanged. - """ - input_text = """ - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - **Columnar DBs** - - E.g., Amazon Redshift, Snowflake - - Better data compression - """ - expected_text = """ - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - **Columnar DBs** - - E.g., Amazon Redshift, Snowflake - - Better data compression - """ - self.helper(input_text, expected_text) - - def test_already_in_columns_format_no_change(self) -> None: - """ - Test that text already in columns format remains unchanged. - """ - input_text = """ - ::: columns - :::: {.column width=65%} - - **Row-based DBs** - - E.g., MySQL, Postgres - :::: - :::: {.column width=40%} - ![](some_image.png) - :::: - ::: - """ - expected_text = """ - ::: columns - :::: {.column width=65%} - - **Row-based DBs** - - E.g., MySQL, Postgres - :::: - :::: {.column width=40%} - ![](some_image.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - def test_single_figure(self) -> None: - """ - Test converting text with a single figure. - """ - input_text = """ - - **Important concept** - - This is the main point - - Supporting detail - - ![](path/to/image.png) - """ - expected_text = """ - ::: columns - :::: {.column width=65%} - - **Important concept** - - This is the main point - - Supporting detail - :::: - :::: {.column width=40%} - - ![](path/to/image.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - def test_mixed_content_with_figures(self) -> None: - """ - Test converting mixed content including text and figures. - """ - input_text = """ - ## Section header - - Some introductory text here. - - - **Point one** - - Detail A - - Detail B - - **Point two** - - Detail X - - Detail Y - - ![](image1.png) - - Additional text between figures. - - ![](image2.png) - """ - expected_text = """ - ::: columns - :::: {.column width=65%} - ## Section header - - Some introductory text here. - - - **Point one** - - Detail A - - Detail B - - **Point two** - - Detail X - - Detail Y - :::: - :::: {.column width=40%} - - ![](image1.png) - - Additional text between figures. - - ![](image2.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - def test_empty_input(self) -> None: - """ - Test that empty input returns empty output. - """ - input_text = "" - expected_text = "" - self.helper(input_text, expected_text) - - def test_with_slide_title(self) -> None: - """ - Test that slide title is left unchanged. - """ - input_text = """ - * VCS: How to Track Data - - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - Read / write small amounts of data frequently - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) - """ - expected_text = """ - * VCS: How to Track Data - ::: columns - :::: {.column width=65%} - - **Row-based DBs** - - E.g., MySQL, Postgres - - Optimized for reading / writing rows - - Read / write small amounts of data frequently - :::: - :::: {.column width=40%} - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_1.png) - - ![](data605/lectures_source/images/lecture_2/lec_2_slide_47_image_2.png) - :::: - ::: - """ - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_format_md_links_to_latex_format -# ############################################################################# - - -class Test_format_md_links_to_latex_format(hunitest.TestCase): - def helper(self, input_text: str, expected_text: str) -> None: - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual_lines = hmarform.format_md_links_to_latex_format(lines) - actual = "\n".join(actual_lines) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - self.assert_equal(actual, expected) - - # ========================================================================= - # Edge cases. - # ========================================================================= - - def test_empty_input(self) -> None: - """ - Test empty input. - """ - # Prepare inputs. - input_text = "" - expected_text = "" - # Run test. - self.helper(input_text, expected_text) - - def test_no_links(self) -> None: - """ - Test content without any links. - """ - # Prepare inputs. - input_text = """ - # Important Notes - - - This is regular text - - No links here - - Just plain content - """ - expected_text = """ - # Important Notes - - - This is regular text - - No links here - - Just plain content - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Plain URL conversion: http://... or https://... - # ========================================================================= - - def test_plain_http_url(self) -> None: - """ - Test converting single plain HTTP URL. - """ - # Prepare inputs. - input_text = """ - Visit http://example.com - """ - expected_text = r""" - Visit [\textcolor{blue}{\underline{http://example.com}}](http://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_https_url(self) -> None: - """ - Test converting single plain HTTPS URL. - """ - # Prepare inputs. - input_text = """ - Visit https://example.com - """ - expected_text = r""" - Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_with_path(self) -> None: - """ - Test converting plain URLs with paths. - """ - # Prepare inputs. - input_text = """ - Check out https://ubuntu.com/tutorials/command-line-for-beginners - """ - expected_text = r""" - Check out [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_with_query_parameters(self) -> None: - """ - Test converting plain URL with query parameters. - """ - # Prepare inputs. - input_text = """ - Search: https://example.com/search?q=python&page=1 - """ - expected_text = r""" - Search: [\textcolor{blue}{\underline{https://example.com/search?q=python&page=1}}](https://example.com/search?q=python&page=1) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_with_fragment(self) -> None: - """ - Test converting plain URL with fragment. - """ - # Prepare inputs. - input_text = """ - Docs: https://docs.python.org/3/tutorial/index.html#tutorial-index - """ - expected_text = r""" - Docs: [\textcolor{blue}{\underline{https://docs.python.org/3/tutorial/index.html#tutorial-index}}](https://docs.python.org/3/tutorial/index.html#tutorial-index) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_at_line_start(self) -> None: - """ - Test plain URL at beginning of line. - """ - # Prepare inputs. - input_text = """ - https://example.com is a good site - """ - expected_text = r""" - [\textcolor{blue}{\underline{https://example.com}}](https://example.com) is a good site - """ - # Run test. - self.helper(input_text, expected_text) - - def test_plain_url_at_line_end(self) -> None: - """ - Test plain URL at end of line. - """ - # Prepare inputs. - input_text = """ - Check this link https://example.com - """ - expected_text = r""" - Check this link [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # URL in backticks conversion: `http://...` or `https://...` - # ========================================================================= - - def test_backtick_url(self) -> None: - """ - Test converting single URL in backticks. - """ - # Prepare inputs. - input_text = """ - Visit `https://example.com` for details - """ - expected_text = r""" - Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) for details - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Markdown link conversion: [Text](URL) - # ========================================================================= - - def test_markdown_link_simple(self) -> None: - """ - Test converting simple markdown link [Text](URL). - """ - # Prepare inputs. - input_text = """ - Check out [this tutorial](https://example.com/tutorial) - """ - expected_text = r""" - Check out [\textcolor{blue}{\underline{this tutorial}}](https://example.com/tutorial) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_markdown_link_preserves_text(self) -> None: - """ - Test that markdown link preserves the display text. - """ - # Prepare inputs. - input_text = """ - See [documentation](https://docs.example.com) here - """ - expected_text = r""" - See [\textcolor{blue}{\underline{documentation}}](https://docs.example.com) here - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Email link conversion: [email@domain.com](email@domain.com) - # ========================================================================= - - def test_email_link_simple1(self) -> None: - """ - Test converting simple email link. - """ - # Prepare inputs. - input_text = """ - Contact: [support@example.com](support@example.com) - """ - expected_text = r""" - Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_email_link_simple2(self) -> None: - """ - Test converting simple email link. - """ - # Prepare inputs. - input_text = """ - Contact: [](support@example.com) - """ - expected_text = r""" - Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Multiple URLs. - # ========================================================================= - - def test_multiple_urls_same_line(self) -> None: - """ - Test converting multiple URLs on same line. - """ - # Prepare inputs. - input_text = """ - Visit https://example.com and https://another.com - """ - expected_text = r""" - Visit [\textcolor{blue}{\underline{https://example.com}}](https://example.com) and [\textcolor{blue}{\underline{https://another.com}}](https://another.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_multiple_urls_different_lines(self) -> None: - """ - Test converting multiple URLs on different lines. - """ - # Prepare inputs. - input_text = """ - Tutorial: https://ubuntu.com/tutorials/command-line-for-beginners - - Documentation: https://docs.python.org/3/ - """ - expected_text = r""" - Tutorial: [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) - - Documentation: [\textcolor{blue}{\underline{https://docs.python.org/3/}}](https://docs.python.org/3/) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Mixed link types. - # ========================================================================= - - def test_mixed_plain_and_backtick_urls(self) -> None: - """ - Test handling mixed plain and backtick URLs. - """ - # Prepare inputs. - input_text = """ - Plain: https://example.com - Backtick: `https://docs.example.com` - """ - expected_text = r""" - Plain: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - Backtick: [\textcolor{blue}{\underline{https://docs.example.com}}](https://docs.example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_mixed_plain_and_markdown_links(self) -> None: - """ - Test handling mixed plain URLs and markdown links. - """ - # Prepare inputs. - input_text = """ - Plain: https://example.com - Markdown: [Click here](https://docs.example.com) - """ - expected_text = r""" - Plain: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - Markdown: [\textcolor{blue}{\underline{Click here}}](https://docs.example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_mixed_all_types(self) -> None: - """ - Test handling all link types in same content. - """ - # Prepare inputs. - input_text = r""" - ## Resources - - - Plain URL: https://ubuntu.com/tutorials/command-line-for-beginners - - Backtick URL: `https://docs.python.org/3/` - - Markdown link: [Click here](https://github.com) - - Email: [support@example.com](support@example.com) - - Already formatted: [\textcolor{blue}{\underline{https://stackoverflow.com}}](https://stackoverflow.com) - """ - expected_text = r""" - ## Resources - - - Plain URL: [\textcolor{blue}{\underline{https://ubuntu.com/tutorials/command-line-for-beginners}}](https://ubuntu.com/tutorials/command-line-for-beginners) - - Backtick URL: [\textcolor{blue}{\underline{https://docs.python.org/3/}}](https://docs.python.org/3/) - - Markdown link: [\textcolor{blue}{\underline{Click here}}](https://github.com) - - Email: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) - - Already formatted: [\textcolor{blue}{\underline{https://stackoverflow.com}}](https://stackoverflow.com) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Complex scenarios. - # ========================================================================= - - def test_url_with_file_extension(self) -> None: - """ - Test URL pointing to file with extension. - """ - # Prepare inputs. - input_text = """ - Download: https://cdn.example.com/files/document.pdf - """ - expected_text = r""" - Download: [\textcolor{blue}{\underline{https://cdn.example.com/files/document.pdf}}](https://cdn.example.com/files/document.pdf) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_already_formatted_link_preserved(self) -> None: - """ - Test that already formatted links are preserved. - """ - # Prepare inputs. - input_text = r""" - Link: [\textcolor{blue}{\underline{Example Site}}](https://example.com) - """ - expected_text = r""" - Link: [\textcolor{blue}{\underline{Example Site}}](https://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - # ========================================================================= - # Image/picture links should be left untouched. - # ========================================================================= - - def test_filter_image_simple(self) -> None: - """ - Test that simple image links are left untouched. - """ - # Prepare inputs. - input_text = """ - Check this image: ![](path/to/image.png) - """ - expected_text = """ - Check this image: ![](path/to/image.png) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_filter_jpg_images(self) -> None: - """ - Test that JPG image links are left untouched. - """ - # Prepare inputs. - input_text = """ - ![](lectures_source/images/lec_4_1_slide_5_image_1.jpg) - """ - expected_text = """ - ![](lectures_source/images/lec_4_1_slide_5_image_1.jpg) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_filter_mixed_images_and_emails(self) -> None: - """ - Test that image links are not processed while email links are. - """ - # Prepare inputs. - input_text = """ - Contact: [](support@example.com) - Image: ![](path/to/image.png) - Link: https://example.com - """ - expected_text = r""" - Contact: [\textcolor{blue}{\underline{support@example.com}}](support@example.com) - Image: ![](path/to/image.png) - Link: [\textcolor{blue}{\underline{https://example.com}}](https://example.com) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_filter_image_with_alt_text(self) -> None: - """ - Test that image links with alt text are left untouched. - """ - # Prepare inputs. - input_text = """ - ![Alt text](path/to/image.png) - """ - expected_text = """ - ![Alt text](path/to/image.png) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_filter_multiple_images(self) -> None: - """ - Test that multiple image links are left untouched. - """ - # Prepare inputs. - input_text = """ - ![](image1.png) - ![](image2.jpg) - ![](image3.gif) - """ - expected_text = """ - ![](image1.png) - ![](image2.jpg) - ![](image3.gif) - """ - # Run test. - self.helper(input_text, expected_text) - - def test_markdown_link_with_escaped_underscores(self) -> None: - """ - Test markdown link with escaped underscores in the text. - """ - # Prepare inputs. - input_text = r""" - [tutorial\_docker\_compose](https://github.com/gpsaggese/umd_classes/tree/main/data605/tutorials/tutorial_docker_compose) - """ - expected_text = r""" - [\textcolor{blue}{\underline{tutorial\_docker\_compose}}](https://github.com/gpsaggese/umd_classes/tree/main/data605/tutorials/tutorial_docker_compose) - """ - # Run test. - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_add_prettier_ignore_to_div_blocks -# ############################################################################# - - -class Test_add_prettier_ignore_to_div_blocks(hunitest.TestCase): - """ - Test the function to add prettier-ignore comments around div blocks. - """ - - def test_simple_div_block(self) -> None: - """ - Test a simple div block with two colons. - """ - # Prepare inputs. - txt = """ - :::: - ::: - """ - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.check_string(actual) - - def test_multiple_div_blocks(self) -> None: - """ - Test multiple div blocks in the same content. - """ - # Prepare inputs. - txt = """ - Some text before - - :::: - ::::{.column width=40%} - - Middle text - - :::columns - ::::{.column width=60%} - - Some text after - """ - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - actual_lines = hmadiblo.add_prettier_ignore_to_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.check_string(actual) - - -# ############################################################################# -# Test_remove_prettier_ignore_from_div_blocks -# ############################################################################# - - -class Test_remove_prettier_ignore_from_div_blocks(hunitest.TestCase): - """ - Test the function to remove prettier-ignore comments from div blocks. - """ - - def test_remove_simple_block(self) -> None: - """ - Test removing prettier-ignore from a simple div block. - """ - # Prepare inputs. - txt = """ - - - :::: - ::: - - - """ - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.check_string(actual) - - def test_remove_multiple_blocks(self) -> None: - """ - Test removing prettier-ignore from multiple div blocks. - """ - # Prepare inputs. - txt = """ - Text before - - - :::: - ::::{.column width=40%} - - - Middle text - - - :::columns - ::::{.column width=60%} - - - Text after - """ - txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) - lines = txt.split("\n") - # Run test. - actual_lines = hmadiblo.remove_prettier_ignore_from_div_blocks(lines) - actual = "\n".join(actual_lines) - # Check outputs. - self.check_string(actual) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py deleted file mode 100644 index 34ea20964..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_headers.py +++ /dev/null @@ -1,2002 +0,0 @@ -import logging -import os -import pprint -from typing import Any, List, Tuple, cast - -import helpers.hio as hio -import helpers.hmarkdown as hmarkdo -import helpers.hmarkdown_headers as hmarhead -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def _to_header_list(data: List[Tuple[int, str]]) -> hmarkdo.HeaderList: - res = [ - hmarkdo.HeaderInfo(level, text, 5 * i + 1) - for i, (level, text) in enumerate(data) - ] - return res - - -def get_header_list1() -> hmarkdo.HeaderList: - data = [ - (1, "Chapter 1"), - (2, "Section 1.1"), - (3, "Subsection 1.1.1"), - (3, "Subsection 1.1.2"), - (2, "Section 1.2"), - (1, "Chapter 2"), - (2, "Section 2.1"), - (3, "Subsection 2.1.1"), - (2, "Section 2.2"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_header_list2() -> hmarkdo.HeaderList: - data = [ - (1, "Module Alpha"), - (2, "Lesson Alpha-1"), - (3, "Topic Alpha-1.a"), - (3, "Topic Alpha-1.b"), - (2, "Lesson Alpha-2"), - (3, "Topic Alpha-2.a"), - (1, "Module Beta"), - (2, "Lesson Beta-1"), - (3, "Topic Beta-1.a"), - (2, "Lesson Beta-2"), - (1, "Module Gamma"), - (2, "Lesson Gamma-1"), - (3, "Topic Gamma-1.a"), - (3, "Topic Gamma-1.b"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_header_list3() -> hmarkdo.HeaderList: - data = [ - (1, "Topic A"), - (2, "Subtopic A.1"), - (3, "Detail A.1.i"), - (3, "Detail A.1.ii"), - (2, "Subtopic A.2"), - (1, "Topic B"), - (2, "Subtopic B.1"), - (3, "Detail B.1.i"), - (2, "Subtopic B.2"), - (3, "Detail B.2.i"), - (3, "Detail B.2.ii"), - (2, "Subtopic B.3"), - (1, "Topic C"), - (2, "Subtopic C.1"), - (3, "Detail C.1.i"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_header_list4() -> hmarkdo.HeaderList: - data = [ - (1, "Chapter 1"), - (3, "Subsection 1.1.1"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_header_list5() -> hmarkdo.HeaderList: - data = [ - (1, "Chapter 1"), - (2, "Section 1.1"), - (3, "Subsection 1.1.1"), - (1, "Chapter 2"), - ] - header_list = _to_header_list(data) - return header_list - - -def _get_markdown_example1() -> str: - content = r""" - # Header1 - Content under header 1. - ## Header2 - Content under subheader 2. - # Header3 - Content under header 3. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_example2() -> str: - content = r""" - # Header1 - Content under header 1. - ## Header2 - Content under subheader 2. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_no_header_example1() -> str: - content = r""" - This is some content without any headers. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_example4() -> str: - content = r""" - # Chapter 1 - - Welcome to the first chapter. This chapter introduces fundamental concepts and - lays the groundwork for further exploration. - - ## Section 1.1 - - This section discusses the initial principles and key ideas that are crucial for - understanding the topic. - - ### Subsection 1.1.1 - - The first subsection dives deeper into the details, providing examples and - insights that help clarify the concepts. - - Example: - ```python - def greet(name): - return f"Hello, {name}!" - print(greet("World")) - ``` - - ### Subsection 1.1.2 - - Here, we examine alternative perspectives and additional considerations that - were not covered in the previous subsection. - - - Key Point 1: Understanding different viewpoints enhances comprehension. - - Key Point 2: Practical application reinforces learning. - - ## Section 1.2 - - This section introduces new frameworks and methodologies that build upon the - foundation established earlier. - - > "Knowledge is like a tree, growing stronger with each branch of understanding." - - # Chapter 2 - - Moving forward, this chapter explores advanced topics and real-world - applications. - - ## Section 2.1 - - This section provides an in-depth analysis of core mechanisms that drive the - subject matter. - - ### Subsection 2.1.1 - - A deep dive into specific case studies and empirical evidence that support - theoretical claims. - - - Case Study 1: Implementation in modern industry - - Case Study 2: Comparative analysis of traditional vs. modern methods - - ## Section 2.2 - - The final section of this chapter presents summary conclusions, key takeaways, - and potential future developments. - - ```yaml - future: - - AI integration - - Process optimization - - Sustainable solutions - ``` - - Stay curious and keep exploring! - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_example5() -> hmarkdo.HeaderList: - content = r""" - # Models - test - ## Naive Bayes - test2 - ## Decision trees - test3 - ## Random forests - ## Linear models - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_slides_example1() -> str: - content = r""" - # Header1 - - * Slide 1 - Content 1. - - ## Header2 - - * Slide 2 - Content 2. - - * Slide 3 - Content 3. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _get_markdown_slides_example2() -> str: - content = r""" - # Header1 - - * Slide1 - Content 1. - """ - content = hprint.dedent(content) - content = cast(str, content) - return content - - -def _test_navigation_flow( - self_: Any, - txt: str, - header_list_exp: str, - header_tree_exp: str, - level: int, - description: str, - nav_str_exp: str, -) -> None: - # 1) Extract headers. - lines = txt.split("\n") - header_list = hmarkdo.extract_headers_from_markdown(lines, max_level=3) - actual = pprint.pformat(header_list) - self_.assert_equal( - actual, header_list_exp, dedent=True, remove_lead_trail_empty_lines=True - ) - # 2) Build header tree. - tree = hmarkdo.build_header_tree(header_list) - actual = hmarkdo.header_tree_to_str(tree, ancestry=None) - self_.assert_equal( - actual, header_tree_exp, dedent=True, remove_lead_trail_empty_lines=True - ) - # 3) Compute the navigation bar for a specific header. - actual = hmarkdo.selected_navigation_to_str(tree, level, description) - self_.assert_equal( - actual, nav_str_exp, dedent=True, remove_lead_trail_empty_lines=True - ) - - -def _test_full_navigation_flow(self_: Any, txt: str) -> None: - res: List[str] = [] - # Extract headers. - lines = txt.split("\n") - header_list = hmarkdo.extract_headers_from_markdown(lines, max_level=3) - # Build header tree. - tree = hmarkdo.build_header_tree(header_list) - # Create a navigation map for any header. - for node in header_list: - level, description, _ = node.as_tuple() - res_tmp = hprint.frame(hprint.to_str("level description")) - res.append(res_tmp) - # - res_tmp = hmarkdo.selected_navigation_to_str(tree, level, description) - res.append(res_tmp) - # Check. - actual = "\n".join(res) - self_.check_string(actual) - - -# ############################################################################# -# Test_header_list_to_vim_cfile1 -# ############################################################################# - - -class Test_header_list_to_vim_cfile1(hunitest.TestCase): - def test1(self) -> None: - """ - Test conversion of header list to vim cfile format with multiple - levels. - """ - # Prepare inputs. - markdown_file = "test.py" - headers = get_header_list1() - # Call function. - actual_lines = hmarkdo.header_list_to_vim_cfile(markdown_file, headers) - actual = "\n".join(actual_lines) - # Check output. - expected = r""" - test.py:1:Chapter 1 - test.py:6:Section 1.1 - test.py:11:Subsection 1.1.1 - test.py:16:Subsection 1.1.2 - test.py:21:Section 1.2 - test.py:26:Chapter 2 - test.py:31:Section 2.1 - test.py:36:Subsection 2.1.1 - test.py:41:Section 2.2 - """ - self.assert_equal(actual, expected, dedent=True) - - -# ############################################################################# -# Test_header_list_to_markdown1 -# ############################################################################# - - -class Test_header_list_to_markdown1(hunitest.TestCase): - def helper( - self, headers: hmarkdo.HeaderList, mode: str, expected: str - ) -> None: - """ - Helper method to test header_list_to_markdown function. - - :param headers: list of HeaderInfo objects - :param mode: conversion mode ("list" or "headers") - :param expected: expected output string - """ - # Call function. - actual_lines = hmarkdo.header_list_to_markdown(headers, mode) - actual = "\n".join(actual_lines) - # Check output. - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test conversion of header list to markdown list format with - indentation. - """ - # Prepare inputs. - headers = get_header_list1() - mode = "list" - # Prepare outputs. - expected = r""" - - Chapter 1 - - Section 1.1 - - Subsection 1.1.1 - - Subsection 1.1.2 - - Section 1.2 - - Chapter 2 - - Section 2.1 - - Subsection 2.1.1 - - Section 2.2 - """ - # Run test. - self.helper(headers, mode, expected) - - def test2(self) -> None: - """ - Test conversion of header list to markdown headers format with - proper heading levels. - """ - # Prepare inputs. - headers = get_header_list1() - mode = "headers" - # Prepare outputs. - expected = r""" - # Chapter 1 - ## Section 1.1 - ### Subsection 1.1.1 - ### Subsection 1.1.2 - ## Section 1.2 - # Chapter 2 - ## Section 2.1 - ### Subsection 2.1.1 - ## Section 2.2 - """ - # Run test. - self.helper(headers, mode, expected) - - -# ############################################################################# -# Test_is_markdown_line_separator1 -# ############################################################################# - - -class Test_is_markdown_line_separator1(hunitest.TestCase): - def helper(self, line: str, expected: bool) -> None: - """ - Helper method to test is_markdown_line_separator function. - - :param line: input line to test - :param expected: expected boolean result - """ - # Call function. - actual = hmarkdo.is_markdown_line_separator(line) - # Check output. - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test that a line with only dashes is recognized as a separator. - """ - # Prepare inputs. - line = "-----------------------" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test2(self) -> None: - """ - Test that a line with hash prefix and dashes is a valid separator. - """ - # Prepare inputs. - line = "# ------" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test3(self) -> None: - """ - Test that a line with hash prefix and hash characters is a valid - separator. - """ - # Prepare inputs. - line = "# #########" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test4(self) -> None: - """ - Test that a line with triple hash prefix and equals is a valid - separator. - """ - # Prepare inputs. - line = "### =====" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test5(self) -> None: - """ - Test that a line with hash and slashes is a valid separator. - """ - # Prepare inputs. - line = "#//////" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test6(self) -> None: - """ - Test that a line with hash, spaces, and slashes is a valid - separator. - """ - # Prepare inputs. - line = "# //////" - # Prepare outputs. - expected = True - # Run test. - self.helper(line, expected) - - def test7(self) -> None: - """ - Test that plain text is not recognized as a separator. - """ - # Prepare inputs. - line = "Not a separator" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test8(self) -> None: - """ - Test that a short dash line is not a valid separator. - """ - # Prepare inputs. - line = "# --" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test9(self) -> None: - """ - Test that mixed separator characters are not valid. - """ - # Prepare inputs. - line = "# ###---" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test10(self) -> None: - """ - Test that two equals signs alone are not a valid separator. - """ - # Prepare inputs. - line = "==" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test11(self) -> None: - """ - Test that dash prefix with slashes is not a valid separator. - """ - # Prepare inputs. - line = "- //////" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test12(self) -> None: - """ - Test that separators with trailing text are not valid. - """ - # Prepare inputs. - line = "=== Not a seperator" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - def test13(self) -> None: - """ - Test that separators with surrounding text are not valid. - """ - # Prepare inputs. - line = "--- Not a seperator ---" - # Prepare outputs. - expected = False - # Run test. - self.helper(line, expected) - - -# ############################################################################# -# Test_extract_section_from_markdown1 -# ############################################################################# - - -class Test_extract_section_from_markdown1(hunitest.TestCase): - def helper(self, content: str, header_name: str, expected: str) -> None: - """ - Helper method to test extract_section_from_markdown function. - - :param content: markdown content to extract from - :param header_name: name of header to extract - :param expected: expected output string - """ - # Call function. - lines = content.split("\n") - actual_lines = hmarkdo.extract_section_from_markdown(lines, header_name) - actual = "\n".join(actual_lines) - # Check output. - self.assert_equal(actual, expected, dedent=True) - - # TODO(gp): This doesn't seem correct. - def test1(self) -> None: - """ - Test extracting a section that includes a subheader. - """ - # Prepare inputs. - content = _get_markdown_example1() - # Prepare outputs. - expected = r""" - # Header1 - Content under header 1. - ## Header2 - Content under subheader 2. - """ - # Run test. - self.helper(content, "Header1", expected) - - def test2(self) -> None: - """ - Test extracting a subheader section only. - """ - # Prepare inputs. - content = _get_markdown_example1() - content = hprint.dedent(content) - # Prepare outputs. - expected = r""" - ## Header2 - Content under subheader 2. - """ - # Run test. - self.helper(content, "Header2", expected) - - def test3(self) -> None: - """ - Test extracting the last header section in the document. - """ - # Prepare inputs. - content = _get_markdown_example1() - content = hprint.dedent(content) - # Prepare outputs. - expected = r""" - # Header3 - Content under header 3. - """ - # Run test. - self.helper(content, "Header3", expected) - - def test4(self) -> None: - """ - Test extracting a header that spans to the end of document. - """ - # Prepare inputs. - content = _get_markdown_example2() - # Prepare outputs. - expected = r""" - # Header1 - Content under header 1. - ## Header2 - Content under subheader 2. - """ - # Run test. - self.helper(content, "Header1", expected) - - def test5(self) -> None: - # Prepare inputs. - content = _get_markdown_no_header_example1() - # Call tested function. - with self.assertRaises(ValueError) as fail: - lines = content.split("\n") - hmarkdo.extract_section_from_markdown(lines, "Header4") - # Check output. - actual = str(fail.exception) - expected = r"Header 'Header4' not found" - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_extract_headers_from_markdown1 -# ############################################################################# - - -class Test_extract_headers_from_markdown1(hunitest.TestCase): - def helper(self, content: str, max_level: int, expected: str) -> None: - """ - Helper method to test extract_headers_from_markdown function. - - :param content: markdown content to extract headers from - :param max_level: maximum header level to extract - :param expected: expected output string representation - """ - # Call function. - lines = content.split("\n") - actual = hmarkdo.extract_headers_from_markdown( - lines, max_level=max_level - ) - # Check output. - self.assert_equal(str(actual), expected) - - def test1(self) -> None: - """ - Test extracting multiple headers with different levels from markdown - content. - """ - # Prepare inputs. - content = _get_markdown_example1() - max_level = 3 - # Prepare outputs. - expected = r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3), HeaderInfo(1, 'Header3', 5)]""" - # Run test. - self.helper(content, max_level, expected) - - def test2(self) -> None: - """ - Test extracting headers from a simple two-level structure. - """ - # Prepare inputs. - content = _get_markdown_example2() - max_level = 3 - # Prepare outputs. - expected = ( - r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3)]""" - ) - # Run test. - self.helper(content, max_level, expected) - - def test3(self) -> None: - # Prepare inputs. - content = r""" - This is some content without any headers. - """ - content = hprint.dedent(content) - # Call function. - lines = content.split("\n") - actual = hmarkdo.extract_headers_from_markdown(lines, max_level=3) - # Check output. - expected: List[str] = [] - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# -# Test_extract_slides_from_markdown1 -# ############################################################################# - - -class Test_extract_slides_from_markdown1(hunitest.TestCase): - def helper(self, content: str, expected: str) -> None: - """ - Helper method to test extract_slides_from_markdown function. - - :param content: markdown content to extract slides from - :param expected: expected output string representation - """ - # Call function. - lines = content.split("\n") - actual = hmarkdo.extract_slides_from_markdown(lines) - # Check output. - self.assert_equal(str(actual), expected) - - def test1(self) -> None: - """ - Test extracting multiple slides from markdown presentation format. - """ - # Prepare inputs. - content = _get_markdown_slides_example1() - # Prepare outputs. - expected = r"""([HeaderInfo(1, 'Slide 1', 3), HeaderInfo(1, 'Slide 2', 8), HeaderInfo(1, 'Slide 3', 11)], 12)""" - # Run test. - self.helper(content, expected) - - def test2(self) -> None: - """ - Test extracting a single slide from markdown presentation format. - """ - # Prepare inputs. - content = _get_markdown_slides_example2() - # Prepare outputs. - expected = r"""([HeaderInfo(1, 'Slide1', 3)], 4)""" - # Run test. - self.helper(content, expected) - - def test3(self) -> None: - # Prepare inputs. - content = _get_markdown_no_header_example1() - # Call function. - lines = content.split("\n") - actual = hmarkdo.extract_slides_from_markdown(lines) - # Check output. - expected = r"""([], 1)""" - self.assert_equal(str(actual), expected) - - -# ############################################################################# -# Test_selected_navigation_to_str1 -# ############################################################################# - - -class Test_selected_navigation_to_str1(hunitest.TestCase): - def test1(self) -> None: - """ - Create navigation bar from Markdown text `_get_markdown_example4()`. - """ - txt = _get_markdown_example4() - header_list_exp = """ - [HeaderInfo(1, 'Chapter 1', 1), - HeaderInfo(2, 'Section 1.1', 6), - HeaderInfo(3, 'Subsection 1.1.1', 11), - HeaderInfo(3, 'Subsection 1.1.2', 23), - HeaderInfo(2, 'Section 1.2', 31), - HeaderInfo(1, 'Chapter 2', 38), - HeaderInfo(2, 'Section 2.1', 43), - HeaderInfo(3, 'Subsection 2.1.1', 48), - HeaderInfo(2, 'Section 2.2', 56)] - """ - header_tree_exp = """ - - Chapter 1 - - Chapter 2 - """ - level = 3 - description = "Subsection 1.1.2" - nav_str_exp = """ - - Chapter 1 - - Section 1.1 - - Subsection 1.1.1 - - **Subsection 1.1.2** - - Section 1.2 - - Chapter 2 - """ - _test_navigation_flow( - self, - txt, - header_list_exp, - header_tree_exp, - level, - description, - nav_str_exp, - ) - - def test2(self) -> None: - txt = _get_markdown_example4() - _test_full_navigation_flow(self, txt) - - -# ############################################################################# -# Test_selected_navigation_to_str2 -# ############################################################################# - - -class Test_selected_navigation_to_str2(hunitest.TestCase): - def test1(self) -> None: - """ - Create navigation bar from Markdown text `_get_markdown_example5()`. - """ - txt = _get_markdown_example5() - header_list_exp = r""" - [HeaderInfo(1, 'Models', 1), - HeaderInfo(2, 'Naive Bayes', 3), - HeaderInfo(2, 'Decision trees', 5), - HeaderInfo(2, 'Random forests', 7), - HeaderInfo(2, 'Linear models', 8)] - """ - header_tree_exp = """ - - Models - """ - level = 2 - description = "Decision trees" - nav_str_exp = """ - - Models - - Naive Bayes - - **Decision trees** - - Random forests - - Linear models - """ - _test_navigation_flow( - self, - txt, - header_list_exp, - header_tree_exp, - level, - description, - nav_str_exp, - ) - - def test2(self) -> None: - txt = _get_markdown_example5() - _test_full_navigation_flow(self, txt) - - -# ############################################################################# -# Test_modify_header_level1 -# ############################################################################# - - -class Test_modify_header_level1(hunitest.TestCase): - def helper( - self, input_lines: List[str], level: int, expected_lines: List[str] - ) -> None: - """ - Helper method to test `modify_header_level` function. - - :param input_lines: list of input text lines - :param level: level adjustment to apply - :param expected_lines: list of expected output lines - """ - # Prepare inputs. - input_text = "\n".join(input_lines) - # Call tested function. - actual_lines = hmarkdo.modify_header_level(input_lines, level) - actual = "\n".join(actual_lines) - # Check output. - expected = "\n".join(expected_lines) - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test the inputs to increase headings. - """ - # Prepare inputs and outputs. - input_lines = [ - "# Chapter 1", - "## Section 1.1", - "### Subsection 1.1.1", - "#### Sub-subsection 1.1.1.1", - ] - level = 1 - expected_lines = [ - "## Chapter 1", - "### Section 1.1", - "#### Subsection 1.1.1", - "##### Sub-subsection 1.1.1.1", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test2(self) -> None: - """ - Test inputs to increase headings with level 5 becoming level 6. - """ - # Prepare inputs and outputs. - input_lines = ["# Chapter 1", "##### Sub-sub-subsection 1.1.1.1.1"] - level = 1 - expected_lines = ["## Chapter 1", "###### Sub-sub-subsection 1.1.1.1.1"] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test3(self) -> None: - """ - Test inputs to increase headings including a paragraph which remains - unchanged. - """ - # Prepare inputs and outputs. - input_lines = ["# Chapter 1", "Paragraph 1"] - level = 1 - expected_lines = ["## Chapter 1", "Paragraph 1"] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test4(self) -> None: - """ - Test inputs of paragraphs which remain unchanged. - """ - # Prepare inputs and outputs. - input_lines = ["Paragraph 1", "Paragraph 2"] - level = 1 - expected_lines = ["Paragraph 1", "Paragraph 2"] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test5(self) -> None: - """ - Test to increase headings with mixed levels. - """ - # Prepare inputs and outputs. - input_lines = [ - "# Chapter 1", - "##### Sub-sub-subsection 1.1.1.1.1", - "# Chapter 2", - "### Subsection 2.1", - "# Chapter 3", - ] - level = 1 - expected_lines = [ - "## Chapter 1", - "###### Sub-sub-subsection 1.1.1.1.1", - "## Chapter 2", - "#### Subsection 2.1", - "## Chapter 3", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test6(self) -> None: - """ - Test the inputs to decrease headings. - """ - # Prepare inputs and outputs. - input_lines = [ - "## Section 1.1", - "### Subsection 1.1.1", - "#### Sub-subsection 1.1.1.1", - "##### Sub-sub-subsection 1.1.1.1.1", - ] - level = -1 - expected_lines = [ - "# Section 1.1", - "## Subsection 1.1.1", - "### Sub-subsection 1.1.1.1", - "#### Sub-sub-subsection 1.1.1.1.1", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test7(self) -> None: - """ - Test inputs to decrease headings by one level. - """ - # Prepare inputs and outputs. - input_lines = [ - "## Chapter 1", - "##### Sub-subsection 1.1.1.1", - ] - level = -1 - expected_lines = [ - "# Chapter 1", - "#### Sub-subsection 1.1.1.1", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test8(self) -> None: - """ - Test inputs of paragraphs which remain unchanged. - """ - # Prepare inputs and outputs. - input_lines = ["Paragraph 1", "Paragraph 2", "Paragraph 3"] - level = -1 - expected_lines = ["Paragraph 1", "Paragraph 2", "Paragraph 3"] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test9(self) -> None: - """ - Test increasing headers by 2 levels. - """ - # Prepare inputs and outputs. - input_lines = [ - "# Chapter 1", - "## Section 1.1", - "### Subsection 1.1.1", - ] - level = 2 - expected_lines = [ - "### Chapter 1", - "#### Section 1.1", - "##### Subsection 1.1.1", - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test10(self) -> None: - """ - Test decreasing headers by 2 levels. - """ - # Prepare inputs and outputs. - input_lines = [ - "### Chapter 1", - "#### Section 1.1", - "##### Subsection 1.1.1", - ] - level = -2 - expected_lines = [ - "# Chapter 1", # 3-2=1 - "## Section 1.1", # 4-2=2 - "### Subsection 1.1.1", # 5-2=3 - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - def test11(self) -> None: - """ - Test increasing headers by 2 levels. - """ - # Prepare inputs and outputs. - input_lines = [ - "### Level 3", - "#### Level 4", - ] - level = 2 - expected_lines = [ - "##### Level 3", # 3+2=5 - "###### Level 4", # 4+2=6 - ] - # Call the helper. - self.helper(input_lines, level, expected_lines) - - -# ############################################################################# -# Test_format_headers1 -# ############################################################################# - - -class Test_format_headers1(hunitest.TestCase): - def helper( - self, input_text: List[str], expected: List[str], max_lev: int - ) -> None: - """ - Process the given text with a specified maximum level and compare the - result with the expected output. - - :param input_text: the text to be processed - :param expected: the expected output after processing the text - :param max_lev: the maximum heading level to be formatted - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - write_file = os.path.join(scratch_dir, "write_file.txt") - # Call tested function. - hmarkdo.format_headers(input_text, write_file, max_lev=max_lev) - # Check output. - actual = hio.from_file(write_file) - self.assertEqual(actual, "\n".join(expected)) - - def test1(self) -> None: - """ - Test the inputs to check the basic formatting of headings. - """ - input_text = [ - "# Chapter 1", - "section text", - ] - expected = [ - "# #############################################################################", - "# Chapter 1", - "# #############################################################################", - "section text", - ] - self.helper(input_text, expected, max_lev=1) - - def test2(self) -> None: - """ - Test inputs with headings beyond the maximum level to ensure they are - ignored during formatting. - """ - input_text = [ - "# Chapter 1", - "## Section 1.1", - "### Section 1.1.1", - ] - expected = [ - "# #############################################################################", - "# Chapter 1", - "# #############################################################################", - "## ############################################################################", - "## Section 1.1", - "## ############################################################################", - "### Section 1.1.1", - ] - self.helper(input_text, expected, max_lev=2) - - def test3(self) -> None: - """ - Test the inputs to check that markdown line separators are removed. - """ - input_text = [ - "# Chapter 1", - "-----------------", - "Text", - "############", - ] - expected = [ - "# #############################################################################", - "# Chapter 1", - "# #############################################################################", - "Text", - ] - self.helper(input_text, expected, max_lev=1) - - def test4(self) -> None: - """ - Test inputs where max_level is inferred from the file content. - """ - input_text = [ - "# Chapter 1", - "max_level=1", - "## Section 1.1", - ] - expected = [ - "# #############################################################################", - "# Chapter 1", - "# #############################################################################", - "max_level=1", - "## Section 1.1", - ] - self.helper(input_text, expected, max_lev=2) - - def test5(self) -> None: - """ - Test inputs with no headers to ensure they remain unchanged. - """ - input_text = [ - "Only text", - "No headings", - ] - expected = [ - "Only text", - "No headings", - ] - self.helper(input_text, expected, max_lev=3) - - -# ############################################################################# -# Test_sanity_check_header_list1 -# ############################################################################# - - -class Test_sanity_check_header_list1(hunitest.TestCase): - def test1(self) -> None: - """ - Test that the header list with valid level increase is accepted. - """ - # Prepare inputs. - header_list = get_header_list1() - # Call function. - hmarkdo.sanity_check_header_list(header_list) - - def test2(self) -> None: - """ - Test that the header list with an increase of more than one level - raises an error. - """ - # Prepare inputs. - header_list = get_header_list4() - # Call function. - with self.assertRaises(ValueError) as err: - hmarkdo.sanity_check_header_list(header_list) - # Check output. - actual = str(err.exception) - self.check_string(actual) - - def test3(self) -> None: - """ - Test that the header list is accepted when heading levels decrease by - more than one. - """ - # Prepare inputs. - header_list = get_header_list5() - # Call function. - hmarkdo.sanity_check_header_list(header_list) - - -# ############################################################################# -# Test__has_internal_capitals1 -# ############################################################################# - - -class Test__has_internal_capitals1(hunitest.TestCase): - """ - Test `_has_internal_capitals` function. - """ - - def helper(self, word: str, expected: bool) -> None: - """ - Test helper for `_has_internal_capitals`. - - :param word: word to test - :param expected: expected result - """ - # Run test. - actual = hmarhead._has_internal_capitals(word) - # Check outputs. - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test word with internal capital letters. - """ - # Prepare inputs. - word = "SimpleFeedForward" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test2(self) -> None: - """ - Test word with multiple internal capital letters. - """ - # Prepare inputs. - word = "DeepNPTS" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test3(self) -> None: - """ - Test word with capital only at the start. - """ - # Prepare inputs. - word = "Machine" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test4(self) -> None: - """ - Test all lowercase word. - """ - # Prepare inputs. - word = "learning" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test5(self) -> None: - """ - Test all uppercase word. - """ - # Prepare inputs. - word = "ML" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test6(self) -> None: - """ - Test single lowercase character. - """ - # Prepare inputs. - word = "a" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test7(self) -> None: - """ - Test single uppercase character. - """ - # Prepare inputs. - word = "A" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test8(self) -> None: - """ - Test empty string. - """ - # Prepare inputs. - word = "" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test9(self) -> None: - """ - Test camelCase word. - """ - # Prepare inputs. - word = "camelCase" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - -# ############################################################################# -# Test_capitalize_header1 -# ############################################################################# - - -class Test_capitalize_header1(hunitest.TestCase): - def helper(self, txt: str, expected: str) -> None: - # Prepare inputs. - txt = hprint.dedent(txt) - # Run function. - lines = txt.split("\n") - actual_lines = hmarkdo.capitalize_header(lines) - actual = "\n".join(actual_lines) - # Check outputs. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test capitalizing a short two-word title. - """ - txt = r""" - * ML theory - """ - expected = r""" - * ML Theory - """ - self.helper(txt, expected) - - def test2(self) -> None: - """ - Test capitalizing a longer multi-word title. - """ - txt = r""" - * A map of machine learning - """ - expected = r""" - * A Map of Machine Learning - """ - self.helper(txt, expected) - - def test3(self) -> None: - """ - Test that strings inside backticks are preserved. - """ - txt = r""" - # Using `python` for Machine Learning - """ - expected = r""" - # Using `python` for Machine Learning - """ - self.helper(txt, expected) - - def test4(self) -> None: - """ - Test that strings inside single quotes are preserved. - """ - txt = r""" - * Working with 'machine learning' algorithms - """ - expected = r""" - * Working with 'machine learning' Algorithms - """ - self.helper(txt, expected) - - def test5(self) -> None: - """ - Test that strings inside double quotes are preserved. - """ - txt = r""" - # Understanding "deep learning" concepts - """ - expected = r""" - # Understanding "deep learning" Concepts - """ - self.helper(txt, expected) - - def test6(self) -> None: - """ - Test mixed usage of quotes and backticks. - """ - txt = r""" - * Using `python` and "machine learning" for 'data science' - """ - expected = r""" - * Using `python` and "machine learning" for 'data science' - """ - self.helper(txt, expected) - - def test7(self) -> None: - """ - Test complex title with various quote types. - """ - txt = r""" - # Introduction to `sklearn` and "data preprocessing" in 'python' - """ - expected = r""" - # Introduction to `sklearn` and "data preprocessing" in 'python' - """ - self.helper(txt, expected) - - def test8(self) -> None: - """ - Test that words with internal capitals are preserved. - """ - txt = r""" - # SimpleFeedForward model - """ - expected = r""" - # SimpleFeedForward Model - """ - self.helper(txt, expected) - - def test9(self) -> None: - """ - Test multiple words with internal capitals. - """ - txt = r""" - * DeepNPTS and SimpleFeedForward models - """ - expected = r""" - * DeepNPTS and SimpleFeedForward Models - """ - self.helper(txt, expected) - - def test10(self) -> None: - """ - Test mixed normal words and words with internal capitals. - """ - txt = r""" - # Using SimpleFeedForward for machine learning - """ - expected = r""" - # Using SimpleFeedForward for Machine Learning - """ - self.helper(txt, expected) - - def test11(self) -> None: - """ - Test that headers inside fenced code blocks are not processed. - """ - txt = r""" - # Main header - - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - ``` - - ## Another header - """ - expected = r""" - # Main Header - - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - ``` - - ## Another Header - """ - self.helper(txt, expected) - - def test12(self) -> None: - """ - Test headers inside multiple fenced code blocks are not processed. - """ - txt = r""" - # First header - - ```python - # comment in code - x = 1 - ``` - - ## Second header - - ```bash - # shell comment - echo "hello" - ``` - """ - expected = r""" - # First Header - - ```python - # comment in code - x = 1 - ``` - - ## Second Header - - ```bash - # shell comment - echo "hello" - ``` - """ - self.helper(txt, expected) - - def test13(self) -> None: - """ - Test that the first word after a numeric prefix is capitalized. - """ - txt = r""" - ## 4.4 the Victim Triangle - """ - expected = r""" - ## 4.4 The Victim Triangle - """ - self.helper(txt, expected) - - def test14(self) -> None: - """ - Test that "of", "a", "an" after a numeric prefix are capitalized. - """ - txt = r""" - ## 1.1 of mice and men - """ - expected = r""" - ## 1.1 Of Mice and Men - """ - self.helper(txt, expected) - - def test15(self) -> None: - """ - Test that "of", "a", "an" are capitalized. - """ - txt = r""" - ## of mice and men - """ - expected = r""" - ## Of Mice and Men - """ - self.helper(txt, expected) - - -# ############################################################################# -# Test_capitalize_header2 -# ############################################################################# - - -class Test_capitalize_header2(hunitest.TestCase): - """ - Test enhanced capitalize_header functionality for mixed case words and - fenced blocks. - """ - - def helper(self, txt: str, expected: str) -> None: - """ - Helper method to test capitalize_header function. - - :param txt: input text to process - :param expected: expected output after processing - """ - # Prepare inputs. - txt = hprint.dedent(txt) - # Run function. - lines = txt.split("\n") - actual_lines = hmarkdo.capitalize_header(lines) - actual = "\n".join(actual_lines) - # Check outputs. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test that SimpleFeedForward is preserved as-is. - """ - txt = r""" - # using SimpleFeedForward for predictions - """ - expected = r""" - # Using SimpleFeedForward for Predictions - """ - self.helper(txt, expected) - - def test2(self) -> None: - """ - Test that DeepNPTS is preserved as-is. - """ - txt = r""" - # training with DeepNPTS model - """ - expected = r""" - # Training with DeepNPTS Model - """ - self.helper(txt, expected) - - def test3(self) -> None: - """ - Test multiple mixed case words in the same header. - """ - txt = r""" - # comparing SimpleFeedForward and DeepNPTS models - """ - expected = r""" - # Comparing SimpleFeedForward and DeepNPTS Models - """ - self.helper(txt, expected) - - def test4(self) -> None: - """ - Test mixed case words combined with all caps words. - """ - txt = r""" - # using API with SimpleFeedForward for ML tasks - """ - expected = r""" - # Using API with SimpleFeedForward for ML Tasks - """ - self.helper(txt, expected) - - def test5(self) -> None: - """ - Test mixed case word as the first word in header. - """ - txt = r""" - # SimpleFeedForward network architecture - """ - expected = r""" - # SimpleFeedForward Network Architecture - """ - self.helper(txt, expected) - - def test6(self) -> None: - """ - Test that headers inside fenced blocks are not capitalized. - """ - txt = r""" - # Main header - Some text - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - ``` - """ - expected = r""" - # Main Header - Some text - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - ``` - """ - self.helper(txt, expected) - - def test7(self) -> None: - """ - Test that multiple headers inside fenced blocks are not capitalized. - """ - txt = r""" - # introduction to forecasting - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - q75 = forecast.quantile(0.75) - - # 90% confidence interval - q05 = forecast.quantile(0.05) - q95 = forecast.quantile(0.95) - - # mean and median - mean = forecast.mean - median = forecast.quantile(0.5) - ``` - # conclusion - """ - expected = r""" - # Introduction to Forecasting - ```python - # 50% confidence interval (interquartile range) - q25 = forecast.quantile(0.25) - q75 = forecast.quantile(0.75) - - # 90% confidence interval - q05 = forecast.quantile(0.05) - q95 = forecast.quantile(0.95) - - # mean and median - mean = forecast.mean - median = forecast.quantile(0.5) - ``` - # Conclusion - """ - self.helper(txt, expected) - - def test8(self) -> None: - """ - Test that headers in fenced blocks with language specifier are not - capitalized. - """ - txt = r""" - # data processing - ```bash - # run the script - python script.py - ``` - """ - expected = r""" - # Data Processing - ```bash - # run the script - python script.py - ``` - """ - self.helper(txt, expected) - - def test9(self) -> None: - """ - Test mixed case words inside fenced blocks are preserved. - """ - txt = r""" - # using SimpleFeedForward model - ```python - # SimpleFeedForward implementation - class SimpleFeedForward: - pass - ``` - """ - expected = r""" - # Using SimpleFeedForward Model - ```python - # SimpleFeedForward implementation - class SimpleFeedForward: - pass - ``` - """ - self.helper(txt, expected) - - def test10(self) -> None: - """ - Test multiple fenced blocks in the same document. - """ - txt = r""" - # first section - ```python - # code block 1 - x = 1 - ``` - # second section - ```python - # code block 2 - y = 2 - ``` - """ - expected = r""" - # First Section - ```python - # code block 1 - x = 1 - ``` - # Second Section - ```python - # code block 2 - y = 2 - ``` - """ - self.helper(txt, expected) - - def test11(self) -> None: - """ - Test that slide titles (starting with *) also preserve mixed case. - """ - txt = r""" - * using SimpleFeedForward for predictions - """ - expected = r""" - * Using SimpleFeedForward for Predictions - """ - self.helper(txt, expected) - - def test12(self) -> None: - """ - Test mixed case words with punctuation. - """ - txt = r""" - # SimpleFeedForward: a neural network approach - """ - expected = r""" - # SimpleFeedForward: a Neural Network Approach - """ - self.helper(txt, expected) - - def test13(self) -> None: - """ - Test that normal words without mixed case are still capitalized - properly. - """ - txt = r""" - # introduction to machine learning - """ - expected = r""" - # Introduction to Machine Learning - """ - self.helper(txt, expected) - - def test14(self) -> None: - """ - Test empty fenced blocks don't cause issues. - """ - txt = r""" - # header before - ``` - ``` - # header after - """ - expected = r""" - # Header Before - ``` - ``` - # Header After - """ - self.helper(txt, expected) - - -# ############################################################################# -# Test_has_mixed_case1 -# ############################################################################# - - -class Test_has_mixed_case1(hunitest.TestCase): - """ - Test the _has_mixed_case helper function. - """ - - def helper(self, word: str, expected: bool) -> None: - """ - Test helper for has_mixed_case. - - :param word: word to test - :param expected: expected result - """ - # Call function. - actual = hmarkdo.has_mixed_case(word) - # Check output. - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test SimpleFeedForward has mixed case. - """ - # Prepare inputs. - word = "SimpleFeedForward" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test2(self) -> None: - """ - Test DeepNPTS has mixed case (all caps after first). - """ - # Prepare inputs. - word = "DeepNPTS" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test3(self) -> None: - """ - Test Machine does not have mixed case (only first char capital). - """ - # Prepare inputs. - word = "Machine" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test4(self) -> None: - """ - Test lowercase word has no mixed case. - """ - # Prepare inputs. - word = "machine" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test5(self) -> None: - """ - Test all caps word has mixed case (caps after first position). - """ - # Prepare inputs. - word = "API" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test6(self) -> None: - """ - Test single character has no mixed case. - """ - # Prepare inputs. - word = "A" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test7(self) -> None: - """ - Test two character word with first capital has no mixed case. - """ - # Prepare inputs. - word = "At" - # Prepare outputs. - expected = False - # Run test. - self.helper(word, expected) - - def test8(self) -> None: - """ - Test two character word with both caps has mixed case. - """ - # Prepare inputs. - word = "ML" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) - - def test9(self) -> None: - """ - Test camelCase word has mixed case. - """ - # Prepare inputs. - word = "camelCase" - # Prepare outputs. - expected = True - # Run test. - self.helper(word, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py deleted file mode 100644 index f12ae2d5a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_rules.py +++ /dev/null @@ -1,377 +0,0 @@ -import logging -from typing import List, Tuple, cast - -import helpers.hmarkdown as hmarkdo -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def _to_header_list(data: List[Tuple[int, str]]) -> hmarkdo.HeaderList: - res = [ - hmarkdo.HeaderInfo(level, text, 5 * i + 1) - for i, (level, text) in enumerate(data) - ] - return res - - -def get_header_list6() -> hmarkdo.HeaderList: - """ - - Spelling - - All - - LLM - - Linter - - Python - - Naming - - LLM - - Linter - - Docstrings - - LLM - - Linter - - Unit_tests - - All - - LLM - - Linter - """ - data = [ - (1, "Spelling"), - (2, "All"), - (3, "LLM"), - (3, "Linter"), - (1, "Python"), - (2, "Naming"), - (3, "LLM"), - (3, "Linter"), - (2, "Docstrings"), - (3, "LLM"), - (3, "Linter"), - (1, "Unit_tests"), - (2, "All"), - (3, "LLM"), - (3, "Linter"), - ] - header_list = _to_header_list(data) - return header_list - - -def get_guidelines_txt1() -> str: - txt = r""" - # General - - ## Spelling - - ### LLM - - ### Linter - - - Spell commands in lower case and programs with the first letter in upper case - - E.g., `git` as a command, `Git` as a program - - E.g., capitalize the first letter of `Python` - - Capitalize `JSON`, `CSV`, `DB` and other abbreviations - - # Python - - ## Naming - - ### LLM - - - Name functions using verbs and verbs/actions - - Good: `download_data()`, `process_input()`, `calculate_sum()` - - Good: Python internal functions as `__repr__`, `__init__` are valid - - Good: Functions names like `to_dict()`, `_parse()`, `_main()` are valid - - Name classes using nouns - - Good: `Downloader()`, `DataProcessor()`, `User()` - - Bad: `DownloadStuff()`, `ProcessData()`, `UserActions()` - - ### Linter - - - Name executable Python scripts using verbs and actions - - E.g., `download.py` and not `downloader.py` - - # Unit_tests - - ## Rules - - ### LLM - - - A test class should test only one function or class to help understanding - test failures - - A test method should only test a single case to ensures clarity and - precision in testing - - E.g., "for these inputs the function responds with this output" - """ - txt = hprint.dedent(txt) - txt = cast(str, txt) - return txt - - -# ############################################################################# -# Test_convert_header_list_into_guidelines1 -# ############################################################################# - - -class Test_convert_header_list_into_guidelines1(hunitest.TestCase): - def test1(self) -> None: - """ - Test converting a header list into guidelines. - """ - # Prepare inputs. - header_list = get_header_list6() - # Call function. - guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) - # Check output. - actual = "\n".join(map(str, guidelines)) - expected = """ - HeaderInfo(1, 'Spelling:All:LLM', 11) - HeaderInfo(1, 'Spelling:All:Linter', 16) - HeaderInfo(1, 'Python:Naming:LLM', 31) - HeaderInfo(1, 'Python:Naming:Linter', 36) - HeaderInfo(1, 'Python:Docstrings:LLM', 46) - HeaderInfo(1, 'Python:Docstrings:Linter', 51) - HeaderInfo(1, 'Unit_tests:All:LLM', 66) - HeaderInfo(1, 'Unit_tests:All:Linter', 71) - """ - self.assert_equal(actual, expected, dedent=True) - - -# ############################################################################# -# Test_extract_rules1 -# ############################################################################# - - -class Test_extract_rules1(hunitest.TestCase): - def helper(self, selection_rules: List[str], expected: str) -> None: - """ - Test extracting rules from a markdown file. - """ - # Prepare inputs. - guidelines = get_header_list6() - guidelines = hmarkdo.convert_header_list_into_guidelines(guidelines) - # Call function. - selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) - # Check output. - actual = "\n".join(map(str, selected_guidelines)) - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["Spelling:*:LLM"] - expected = """ - HeaderInfo(1, 'Spelling:All:LLM', 11) - """ - self.helper(selection_rules, expected) - - def test2(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["Spelling:NONE:LLM"] - expected = """ - """ - self.helper(selection_rules, expected) - - def test3(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["Spelling:All:*"] - expected = """ - HeaderInfo(1, 'Spelling:All:LLM', 11) - HeaderInfo(1, 'Spelling:All:Linter', 16) - """ - self.helper(selection_rules, expected) - - def test4(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["Spelling:All:*", "Python:*:*"] - expected = """ - HeaderInfo(1, 'Spelling:All:LLM', 11) - HeaderInfo(1, 'Spelling:All:Linter', 16) - HeaderInfo(1, 'Python:Naming:LLM', 31) - HeaderInfo(1, 'Python:Naming:Linter', 36) - HeaderInfo(1, 'Python:Docstrings:LLM', 46) - HeaderInfo(1, 'Python:Docstrings:Linter', 51) - """ - self.helper(selection_rules, expected) - - -# ############################################################################# -# Test_parse_rules_from_txt1 -# ############################################################################# - - -class Test_parse_rules_from_txt1(hunitest.TestCase): - def helper(self, text: str, expected: List[str]) -> None: - # Prepare inputs. - text = hprint.dedent(text) - lines = text.split("\n") - # Call function. - actual = hmarkdo.parse_rules_from_txt(lines) - # Check output. - actual = str(actual) - expected = str(expected) - self.assert_equal(actual, expected, dedent=True) - - def test_basic_list1(self) -> None: - """ - Test extracting simple first-level bullet points. - """ - text = """ - - Item 1 - - Item 2 - - Item 3 - """ - expected = ["- Item 1", "- Item 2", "- Item 3"] - self.helper(text, expected) - - def test_nested_list1(self) -> None: - """ - Test extracting bullet points with nested sub-items. - """ - text = """ - - Item 1 - - Item 2 - - Sub-item 2.1 - - Sub-item 2.2 - - Item 3 - """ - expected = [ - "- Item 1", - "- Item 2\n - Sub-item 2.1\n - Sub-item 2.2", - "- Item 3", - ] - self.helper(text, expected) - - def test_empty_list1(self) -> None: - """ - Test handling empty input. - """ - text = "" - expected = [] - self.helper(text, expected) - - -# ############################################################################# -# Test_end_to_end_rules1 -# ############################################################################# - - -class Test_end_to_end_rules1(hunitest.TestCase): - def test_get_header_list1(self) -> None: - """ - Test extracting headers from a markdown file. - """ - # Prepare inputs. - txt = get_guidelines_txt1() - max_level = 4 - # Run function. - lines = txt.split("\n") - header_list = hmarkdo.extract_headers_from_markdown(lines, max_level) - # Check output. - actual = "\n".join(map(str, header_list)) - expected = """ - HeaderInfo(1, 'General', 1) - HeaderInfo(2, 'Spelling', 3) - HeaderInfo(3, 'LLM', 5) - HeaderInfo(3, 'Linter', 7) - HeaderInfo(1, 'Python', 14) - HeaderInfo(2, 'Naming', 16) - HeaderInfo(3, 'LLM', 18) - HeaderInfo(3, 'Linter', 28) - HeaderInfo(1, 'Unit_tests', 33) - HeaderInfo(2, 'Rules', 35) - HeaderInfo(3, 'LLM', 37) - """ - self.assert_equal(actual, expected, dedent=True) - # Run function. - guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) - # Check output. - actual = "\n".join(map(str, guidelines)) - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'General:Spelling:Linter', 7) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.assert_equal(actual, expected, dedent=True) - - def helper_extract_rules( - self, selection_rules: List[str], expected: str - ) -> None: - """ - Helper function to test extracting rules from a markdown file. - """ - # Prepare inputs. - txt = get_guidelines_txt1() - max_level = 4 - lines = txt.split("\n") - header_list = hmarkdo.extract_headers_from_markdown(lines, max_level) - guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) - # Call function. - selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) - # Check output. - actual = "\n".join(map(str, selected_guidelines)) - self.assert_equal(actual, expected, dedent=True) - - def test_extract_rules1(self) -> None: - """ - Test extracting rules from a markdown file. - """ - selection_rules = ["General:*:LLM"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules2(self) -> None: - selection_rules = ["General:NONE:LLM"] - expected = """ - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules3(self) -> None: - selection_rules = ["*:*:LLM"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules4(self) -> None: - selection_rules = ["*:*:LLM", "General:*:*"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'General:Spelling:Linter', 7) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules5(self) -> None: - selection_rules = ["*:*:*"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'General:Spelling:Linter', 7) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.helper_extract_rules(selection_rules, expected) - - def test_extract_rules6(self) -> None: - selection_rules = ["*:*:*", "General:*:*"] - expected = """ - HeaderInfo(1, 'General:Spelling:LLM', 5) - HeaderInfo(1, 'General:Spelling:Linter', 7) - HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) - """ - self.helper_extract_rules(selection_rules, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py deleted file mode 100644 index 39137551e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_slides.py +++ /dev/null @@ -1,399 +0,0 @@ -import logging -from typing import List - -import helpers.hmarkdown as hmarkdo -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_process_slides -# ############################################################################# - - -class Test_process_slides(hunitest.TestCase): - @staticmethod - def transform( - slide_text: List[str], - *, - slide_title: str = "", - slide_line_number: int = 0, - ) -> str: - """ - Example adding a `@` to the beginning of each line of the slide. - - :param slide_text: List of lines in the slide - :param slide_title: Title of the slide - :param slide_line_number: Line number of the slide - :return: Transformed text - """ - _LOG.debug("input=\n%s", "\n".join(slide_text)) - # Transform. - text_out = [f"@{line}" for line in slide_text] - _LOG.debug("output=\n%s", "\n".join(text_out)) - return text_out - - def helper(self, text: str, expected: str) -> None: - """ - Test helper for process_slides. - - :param text: Input text with slides - :param expected: Expected output after transformation - """ - # Prepare inputs. - text = hprint.dedent(text, remove_lead_trail_empty_lines_=False) - # Process. - actual = hmarkdo.process_slides(text, self.transform) - # Check output. - expected = hprint.dedent(expected, remove_lead_trail_empty_lines_=False) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test multiple slides. - """ - text = """ - * Slide 1 - - Point 1 - - Point 2 - - * Slide 2 - - Point A - - Point B - """ - expected = """ - @* Slide 1 - @ - Point 1 - @ - Point 2 - @ - @* Slide 2 - @ - Point A - @ - Point B - """ - self.helper(text, expected) - - def test2(self) -> None: - """ - Test single line slide. - """ - text = """ - * Single line slide - """ - expected = """ - @* Single line slide - """ - self.helper(text, expected) - - def test3(self) -> None: - """ - Test slide with inline comment. - """ - text = """ - * Slide with comment - # This is a comment - - Point 1 - """ - expected = """ - @* Slide with comment - @ # This is a comment - @ - Point 1 - """ - self.helper(text, expected) - - def test4(self) -> None: - """ - Test slide with comment block. - """ - text = """ - * Slide with block - - - Point 1 - """ - expected = """ - @* Slide with block - @ - @ - Point 1 - """ - self.helper(text, expected) - - def test5(self) -> None: - text = """ - * Slide 1 - * Slide 2 - """ - expected = """ - @* Slide 1 - @* Slide 2 - """ - self.helper(text, expected) - - def test6(self) -> None: - text = """ - - * Slide 1 - * Slide 2 - """ - expected = """ - - @* Slide 1 - @* Slide 2 - """ - self.helper(text, expected) - - def test7(self) -> None: - text = """ - - * Slide 1 - * Slide 2 - - """ - expected = """ - - @* Slide 1 - @* Slide 2 - @ - """ - self.helper(text, expected) - - def test8(self) -> None: - text = """ - //* Slide 1 - * Slide 2 - - """ - expected = """ - //* Slide 1 - @* Slide 2 - @ - """ - self.helper(text, expected) - - -# ############################################################################# -# Test_convert_slide_to_markdown -# ############################################################################# - - -class Test_convert_slide_to_markdown(hunitest.TestCase): - """ - Test converting slide bullets to markdown headers. - """ - - def helper(self, input_text, expected_text) -> None: - """ - Test helper for convert_slide_to_markdown. - - :param input_text: Input text with slide bullets - :param expected_text: Expected output with markdown headers - """ - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual = hmarkdo.convert_slide_to_markdown(lines) - actual = "\n".join(actual) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test converting a simple slide bullet to markdown header. - """ - input_text = """* This is a slide title""" - expected_text = """##### This is a slide title""" - self.helper(input_text, expected_text) - - def test2(self) -> None: - """ - Test converting multiple slide bullets. - """ - input_text = """ - * First slide - - Some content - * Second slide - - More content - """ - expected_text = """ - ##### First slide - - Some content - ##### Second slide - - More content - """ - self.helper(input_text, expected_text) - - def test3(self) -> None: - """ - Test converting slides mixed with other content. - """ - input_text = """ - Some intro text - * Slide title - - Point 1 - - Point 2 - Regular markdown text - * Another slide - """ - expected_text = """ - Some intro text - ##### Slide title - - Point 1 - - Point 2 - Regular markdown text - ##### Another slide - """ - self.helper(input_text, expected_text) - - def test4(self) -> None: - """ - Test converting text with no slide bullets. - """ - input_text = """ - Regular text - More text - - Regular bullet point - """ - expected_text = """ - Regular text - More text - - Regular bullet point - """ - self.helper(input_text, expected_text) - - def test5(self) -> None: - """ - Test converting empty input. - """ - input_text = "" - expected_text = "" - self.helper(input_text, expected_text) - - -# ############################################################################# -# Test_convert_markdown_to_slide -# ############################################################################# - - -class Test_convert_markdown_to_slide(hunitest.TestCase): - """ - Test converting markdown headers to slide bullets. - """ - - def helper(self, input_text: str, expected_text: str) -> None: - """ - Test helper for convert_markdown_to_slide. - - :param input_text: Input text with markdown headers - :param expected_text: Expected output with slide bullets - """ - # Prepare inputs. - lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - actual = hmarkdo.convert_markdown_to_slide(lines) - actual = "\n".join(actual) - # Check outputs. - expected = hprint.dedent(expected_text).strip() - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test converting a simple h5 header to slide bullet. - """ - input_text = """ - ##### This is a slide title - """ - expected_text = """ - * This is a slide title - """ - self.helper(input_text, expected_text) - - def test2(self) -> None: - """ - Test converting multiple h5 headers. - """ - input_text = """ - ##### First slide - - Some content - ##### Second slide - - More content - """ - expected_text = """ - * First slide - - Some content - * Second slide - - More content - """ - self.helper(input_text, expected_text) - - def test3(self) -> None: - """ - Test converting headers mixed with other content. - """ - input_text = """ - Some intro text - ##### Slide title - - Point 1 - - Point 2 - Regular markdown text - ##### Another slide - """ - expected_text = """ - Some intro text - * Slide title - - Point 1 - - Point 2 - Regular markdown text - * Another slide - """ - self.helper(input_text, expected_text) - - def test4(self) -> None: - """ - Test converting text with no h5 headers. - """ - input_text = """ - Regular text - # H1 header - ## H2 header - #### H4 header - """ - expected_text = """ - Regular text - # H1 header - ## H2 header - #### H4 header - """ - self.helper(input_text, expected_text) - - def test5(self) -> None: - """ - Test converting empty input. - """ - input_text = "" - expected_text = "" - self.helper(input_text, expected_text) - - def test6(self) -> None: - """ - Test that converting slide to markdown and back gives original result. - """ - # Prepare inputs. - input_text = """ - * First slide - - Some content - * Second slide - Regular text - """ - original_lines = hprint.dedent(input_text).strip().split("\n") - # Run test. - markdown_lines = hmarkdo.convert_slide_to_markdown(original_lines) - roundtrip_lines = hmarkdo.convert_markdown_to_slide(markdown_lines) - # Check outputs. - self.assert_equal(str(roundtrip_lines), str(original_lines)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py deleted file mode 100644 index f651aa3bf..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_tables.py +++ /dev/null @@ -1,196 +0,0 @@ -import logging -import pprint -from typing import Dict, List - -import helpers.hmarkdown_tables as hmartabl -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_replace_tables_with_tags1 -# ############################################################################# - - -class Test_replace_tables_with_tags1(hunitest.TestCase): - def helper( - self, text: str, expected_lines: List[str], expected_map: Dict[str, str] - ) -> None: - """ - Test replacing markdown tables with tags. - """ - lines = hprint.dedent(text, remove_lead_trail_empty_lines_=True) - lines = lines.split("\n") - # Call function. - actual_lines, table_map = hmartabl.replace_tables_with_tags(lines) - # Check output. - table_map_as_str = pprint.pformat(table_map) - expected_map_as_str = pprint.pformat(expected_map) - self.assert_equal(table_map_as_str, expected_map_as_str) - # - actual_lines = "\n".join(actual_lines) - expected_lines = hprint.dedent( - expected_lines, remove_lead_trail_empty_lines_=True - ) - self.assert_equal(actual_lines, expected_lines) - - def helper_round_trip(self, text: str) -> None: - """ - Test the round trip. - """ - # Do the round trip. - lines = text.split("\n") - actual_lines, table_map = hmartabl.replace_tables_with_tags(lines) - act_text = hmartabl.replace_tags_with_tables(actual_lines, table_map) - # Check output. - act_text = "\n".join(act_text) - self.assert_equal(act_text, text) - - def test1(self) -> None: - """ - Test replacing simple markdown table with tags. - """ - # Prepare inputs. - text = """ - Some text before - | Column 1 | Column 2 | - |----------|----------| - | Value 1 | Value 2 | - | Value 3 | Value 4 | - Text between tables - | Name | Age | City | - |------|-----|------| - | John | 25 | NYC | - Some text after - """ - # Prepare outputs. - expected_lines = """ - Some text before - - Text between tables - - Some text after - """ - # Check table map. - expected_map = { - "1": "| Column 1 | Column 2 |\n|----------|----------|\n| Value 1 | Value 2 |\n| Value 3 | Value 4 |", - "2": "| Name | Age | City |\n|------|-----|------|\n| John | 25 | NYC |", - } - self.helper(text, expected_lines, expected_map) - - def test2(self) -> None: - """ - Test table with alignment indicators. - """ - text = """ - | Left | Center | Right | - |:-----|:------:|------:| - | L1 | C1 | R1 | - | L2 | C2 | R2 | - """ - expected_lines = """ - - """ - expected_map = { - "1": "| Left | Center | Right |\n|:-----|:------:|------:|\n| L1 | C1 | R1 |\n| L2 | C2 | R2 |" - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test3(self) -> None: - """ - Test table with minimal structure. - """ - text = """ - Before - | A | B | - |---|---| - | 1 | 2 | - After - """ - expected_lines = """ - Before - - After - """ - expected_map = {"1": "| A | B |\n|---|---|\n| 1 | 2 |"} - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test4(self) -> None: - """ - Test table with empty cells. - """ - text = """ - | Col1 | Col2 | Col3 | - |------|------|------| - | A | | C | - | | B | | - """ - expected_lines = """ - - """ - expected_map = { - "1": "| Col1 | Col2 | Col3 |\n|------|------|------|\n| A | | C |\n| | B | |" - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test5(self) -> None: - """ - Test multiple tables with different column counts. - """ - text = """ - First table: - | A | B | - |---|---| - | 1 | 2 | - - Second table: - | X | Y | Z | W | - |---|---|---|---| - | a | b | c | d | - | e | f | g | h | - """ - expected_lines = """ - First table: - - - Second table: - - """ - expected_map = { - "1": "| A | B |\n|---|---|\n| 1 | 2 |", - "2": "| X | Y | Z | W |\n|---|---|---|---|\n| a | b | c | d |\n| e | f | g | h |", - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) - - def test6(self) -> None: - """ - Test table with indentation. - """ - text = """ - Outside - | Col1 | Col2 | - |------|------| - | Val1 | Val2 | - End - """ - expected_lines = """ - Outside - - End - """ - expected_map = { - "1": " | Col1 | Col2 |\n |------|------|\n | Val1 | Val2 |" - } - self.helper(text, expected_lines, expected_map) - # - self.helper_round_trip(text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py deleted file mode 100644 index fc88b62a1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmarkdown_toc.py +++ /dev/null @@ -1,228 +0,0 @@ -import logging - -import helpers.hmarkdown as hmarkdo -import helpers.hmarkdown_toc as hmartoc -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_extract_yaml_frontmatter1 -# ############################################################################# - - -class Test_extract_yaml_frontmatter1(hunitest.TestCase): - """ - Test the extract_yaml_frontmatter function. - """ - - def helper( - self, - txt: str, - expected_frontmatter: list, - expected_remaining: list, - ) -> None: - """ - Test helper for extract_yaml_frontmatter. - - :param txt: Input text to process - :param expected_frontmatter: Expected front matter lines - :param expected_remaining: Expected remaining lines - """ - # Prepare inputs. - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Run test. - frontmatter, remaining = hmartoc.extract_yaml_frontmatter(lines) - # Check outputs. - self.assertEqual(frontmatter, expected_frontmatter) - self.assertEqual(remaining, expected_remaining) - - def test1(self) -> None: - """ - Test extracting YAML front matter from a file. - """ - # Prepare inputs. - txt = """ - --- - title: My Document - date: 2024-01-01 - --- - # Content - This is the main content. - """ - # Prepare outputs. - expected_frontmatter = [ - "---", - "title: My Document", - "date: 2024-01-01", - "---", - ] - expected_remaining = ["# Content", "This is the main content."] - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - def test2(self) -> None: - """ - Test processing a file without YAML front matter. - """ - # Prepare inputs. - txt = """ - # Content - This is the main content. - """ - # Prepare outputs. - expected_frontmatter = [] - expected_remaining = ["# Content", "This is the main content."] - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - def test3(self) -> None: - """ - Test handling incomplete YAML front matter (missing closing delimiter). - """ - # Prepare inputs. - txt = """ - --- - title: My Document - # Content without closing delimiter - """ - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Prepare outputs. - expected_frontmatter = [] - expected_remaining = lines - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - def test4(self) -> None: - """ - Test extracting empty YAML front matter. - """ - # Prepare inputs. - txt = """ - --- - --- - # Content - """ - # Prepare outputs. - expected_frontmatter = ["---", "---"] - expected_remaining = ["# Content"] - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - def test5(self) -> None: - """ - Test that separators not at the beginning are not treated as front matter. - """ - # Prepare inputs. - txt = """ - # Content - --- - More content - """ - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Prepare outputs. - expected_frontmatter = [] - expected_remaining = lines - # Run test. - self.helper(txt, expected_frontmatter, expected_remaining) - - -# ############################################################################# -# Test_remove_table_of_contents1 -# ############################################################################# - - -class Test_remove_table_of_contents1(hunitest.TestCase): - def test1(self) -> None: - """ - Test removing table of contents from markdown text. - """ - # Prepare inputs. - text = """ - # Introduction - - This is an introduction. - - - - [Section 1](#section-1) - - [Section 2](#section-2) - - - ## Section 1 - - Content of section 1. - """ - expected = """ - # Introduction - - This is an introduction. - - - - ## Section 1 - - Content of section 1. - """ - text = hprint.dedent(text) - # Run test. - actual = hmarkdo.remove_table_of_contents(text) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test2(self) -> None: - """ - Test text without table of contents remains unchanged. - """ - # Prepare inputs. - text = """ - # Introduction - - This is an introduction. - - ## Section 1 - - Content of section 1. - """ - text = hprint.dedent(text) - # Run test. - actual = hmarkdo.remove_table_of_contents(text) - # Check output. - self.assert_equal(actual, text) - - def test3(self) -> None: - """ - Test removing multi-line table of contents. - """ - # Prepare inputs. - text = """ - # Introduction - - - - [Section 1](#section-1) - - [Subsection 1.1](#subsection-11) - - [Section 2](#section-2) - - [Subsection 2.1](#subsection-21) - - [Subsection 2.2](#subsection-22) - - - ## Section 1 - """ - expected = """ - # Introduction - - - - ## Section 1 - """ - text = hprint.dedent(text) - # Run test. - actual = hmarkdo.remove_table_of_contents(text) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py deleted file mode 100644 index 16f0f097a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmkdocs.py +++ /dev/null @@ -1,394 +0,0 @@ -import logging - -import helpers.hmkdocs as hmkdocs -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_dedent_python_code_blocks1 -# ############################################################################# - - -class Test_dedent_python_code_blocks1(hunitest.TestCase): - def test_simple_code_block(self) -> None: - """ - Test dedenting a simple Python code block. - """ - # Prepare inputs. - text = """ - # Example - - ```python - def hello(): - print("Hello") - ``` - """ - expected = """ - # Example - - ```python - def hello(): - print("Hello") - ``` - """ - text = hprint.dedent(text) - expected = hprint.dedent(expected) - # Run test. - actual = hmkdocs.dedent_python_code_blocks(text) - # Check output. - self.assert_equal(actual, expected) - - def test_multiple_code_blocks(self) -> None: - """ - Test dedenting multiple Python code blocks. - """ - # Prepare inputs. - text = """ - # Example 1 - - ```python - def hello(): - print("Hello") - ``` - - # Example 2 - - ```python - def goodbye(): - print("Goodbye") - ``` - """ - expected = """ - # Example 1 - - ```python - def hello(): - print("Hello") - ``` - - # Example 2 - - ```python - def goodbye(): - print("Goodbye") - ``` - """ - text = hprint.dedent(text) - expected = hprint.dedent(expected) - # Run test. - actual = hmkdocs.dedent_python_code_blocks(text) - # Check output. - self.assert_equal(actual, expected) - - def test_no_python_blocks(self) -> None: - """ - Test text without Python code blocks remains unchanged. - """ - # Prepare inputs. - text = """ - # Example - - This is just text. - - ```javascript - console.log("Hello"); - ``` - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.dedent_python_code_blocks(text) - # Check output. - self.assert_equal(actual, text) - - def test_already_aligned_code(self) -> None: - """ - Test code that is already aligned. - """ - # Prepare inputs. - text = """ - # Example - - ```python - def hello(): - print("Hello") - ``` - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.dedent_python_code_blocks(text) - # Check output. - self.assert_equal(actual, text) - - -# ############################################################################# -# Test_replace_indentation1 -# ############################################################################# - - -class Test_replace_indentation1(hunitest.TestCase): - def test_two_to_four_spaces(self) -> None: - """ - Test replacing 2-space indentation with 4-space indentation. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - - Sub item 2 - """ - expected = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - - Sub item 2 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=4 - ) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test_four_to_two_spaces(self) -> None: - """ - Test replacing 4-space indentation with 2-space indentation. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - - Sub item 2 - """ - expected = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - - Sub item 2 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=4, output_spaces=2 - ) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test_two_to_eight_spaces(self) -> None: - """ - Test replacing 2-space indentation with 8-space indentation. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - expected = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=8 - ) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test_three_to_six_spaces(self) -> None: - """ - Test replacing 3-space indentation with 6-space indentation. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - expected = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=3, output_spaces=6 - ) - # Check output. - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test_no_indentation(self) -> None: - """ - Test text without indentation remains unchanged. - """ - # Prepare inputs. - text = """ - - Item 1 - - Item 2 - - Item 3 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=4 - ) - # Check output. - self.assert_equal(actual, text) - - def test_same_input_output_spaces(self) -> None: - """ - Test that using same input and output spaces leaves text unchanged. - """ - # Prepare inputs. - text = """ - - Item 1 - - Sub item 1 - - Sub sub item 1 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=2 - ) - # Check output. - self.assert_equal(actual, text) - - def test_empty_text(self) -> None: - """ - Test empty text handling. - """ - # Prepare inputs. - text = "" - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=2, output_spaces=4 - ) - # Check output. - self.assert_equal(actual, text) - - def test_zero_to_four_spaces(self) -> None: - """ - Test converting zero indentation to 4 spaces (edge case). - """ - # Prepare inputs. - text = """ - Item 1 - Item 2 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.replace_indentation( - text, input_spaces=1, output_spaces=4 - ) - # Check output. - self.assert_equal(actual, text) - - -# ############################################################################# -# Test_preprocess_mkdocs_markdown1 -# ############################################################################# - - -class Test_preprocess_mkdocs_markdown1(hunitest.TestCase): - def test_full_preprocessing(self) -> None: - """ - Test the complete preprocessing pipeline. - """ - # Prepare inputs. - text = """ - # Introduction - - - - [Section 1](#section-1) - - [Section 2](#section-2) - - - ## Section 1 - - Here is some Python code: - - ```python - def example(): - print("Hello") - if True: - print("World") - ``` - - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - """ - expected = """ - # Introduction - - - - ## Section 1 - - Here is some Python code: - - ```python - def example(): - print("Hello") - if True: - print("World") - ``` - - - Item 1 - - Sub item 1 - - Sub sub item 1 - - Item 2 - """ - text = hprint.dedent(text) - expected = hprint.dedent(expected) - # Run test. - actual = hmkdocs.preprocess_mkdocs_markdown(text) - # Check output. - self.assert_equal(actual, expected) - - def test_empty_text(self) -> None: - """ - Test preprocessing empty text. - """ - # Prepare inputs. - text = "" - # Run test. - actual = hmkdocs.preprocess_mkdocs_markdown(text) - # Check output. - self.assert_equal(actual, text) - - def test_text_without_preprocessing_needs(self) -> None: - """ - Test text that doesn't need any preprocessing. - """ - # Prepare inputs. - text = """ - # Simple Markdown - - This is just simple text. - - - Item 1 - - Item 2 - """ - text = hprint.dedent(text) - # Run test. - actual = hmkdocs.preprocess_mkdocs_markdown(text) - # Check output. - self.assert_equal(actual, text) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py deleted file mode 100644 index abb48a154..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hmodule.py +++ /dev/null @@ -1,25 +0,0 @@ -import logging - -import helpers.hmodule as hmodule -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_hmodule1 -# ############################################################################# - - -class Test_hmodule1(hunitest.TestCase): - def test_has_module1(self) -> None: - """ - Check that the function returns true for the existing package. - """ - self.assertTrue(hmodule.has_module("numpy")) - - def test_has_not_module1(self) -> None: - """ - Check that the function returns false for the non-existing package. - """ - self.assertFalse(hmodule.has_module("no_such_module")) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py deleted file mode 100644 index 4d6b7bceb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hnumpy.py +++ /dev/null @@ -1,215 +0,0 @@ -import logging - -import numpy as np -import collections - -import helpers.hnumpy as hnumpy -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestRandomSeedContext -# ############################################################################# - - -class TestRandomSeedContext(hunitest.TestCase): - def test_example1(self) -> None: - """ - Getting more random numbers without context manager changes the - sequence of random numbers. - """ - n = 3 - # First batch. - np.random.seed(0) - vals1a = np.random.randn(n) - vals2a = np.random.randn(n) - # Second batch. - np.random.seed(0) - vals1b = np.random.randn(n) - vals = np.random.randn(n) - _ = vals - vals2b = np.random.randn(n) - # Check. - self.assertEqual(str(vals1a), str(vals1b)) - # Of course this might fail with a vanishingly small probability. - self.assertNotEqual(str(vals2a), str(vals2b)) - - def test_example2(self) -> None: - """ - Getting more random numbers with context manager doesn't change the - sequence of random numbers. - """ - n = 3 - # First batch. - np.random.seed(0) - vals1a = np.random.randn(n) - vals2a = np.random.randn(n) - # Second batch. - np.random.seed(0) - vals1b = np.random.randn(n) - with hnumpy.random_seed_context(42): - vals = np.random.randn(n) - _ = vals - vals2b = np.random.randn(n) - # Check. - self.assertEqual(str(vals1a), str(vals1b)) - self.assertEqual(str(vals2a), str(vals2b)) - - -# ############################################################################# -# TestFloorWithPrecision -# ############################################################################# - - -class TestFloorWithPrecision(hunitest.TestCase): - def _test_floor_with_precision( - self, - value: float, - precision: int, - expected: str, - ) -> None: - """ """ - actual = hnumpy.floor_with_precision(value, precision) - self.assert_equal(str(actual), expected) - - def test_floor_with_precision1(self) -> None: - """ - Test for negative float values as input. - """ - expected_as_str = "-4.63" - self._test_floor_with_precision(-4.6385, 2, expected_as_str) - - def test_floor_with_precision2(self) -> None: - """ - Test for Zero precision. - """ - expected_as_str = "-4.0" - self._test_floor_with_precision(-4.6385, 0, expected_as_str) - - def test_floor_with_precision3(self) -> None: - """ - Test for negative precision. - """ - value = 4.6385 - amount_precision = -2 - with self.assertRaises(AssertionError) as cm: - hnumpy.floor_with_precision(value, amount_precision) - # Check. - actual = str(cm.exception) - expected = """ - * Failed assertion * - 0 <= -2 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_floor_with_precision4(self) -> None: - """ - Test for positive float values as input. - """ - expected_as_str = "4.63" - self._test_floor_with_precision(4.6385, 2, expected_as_str) - - def test_floor_with_precision5(self) -> None: - """ - Test for integer values as input. - """ - expected_as_str = "4.0" - self._test_floor_with_precision(4, 0, expected_as_str) - - def test_floor_with_precision6(self) -> None: - """ - Test for very small value as input. - """ - expected = 0.0000532 - self._test_floor_with_precision(0.0000532999, 7, str(expected)) - - def test_floor_with_precision7(self) -> None: - """ - Test for very large value as input. - """ - expected_as_str = "4289734.12345" - self._test_floor_with_precision(4289734.1234599999, 5, expected_as_str) - - -# ############################################################################# -# Test_OrderedDict_repr_str -# ############################################################################# - - -class Test_OrderedDict_repr_str(hunitest.TestCase): - """ - The tests are used to gatekeep the expected behavior of - dunder method __str__ and __repr__ for the OrderedDict class. - - The tests stem from changes in Python 3.12. Observe below: - - Python 3.9.5: - >>> from collections import OrderedDict - >>> import numpy - >>> dct = OrderedDict({ "test": numpy.int64(42)}) - >>> dct["test"] - 42 - >>> print(dct) - OrderedDict([('test', 42)]) - >>> str(dct) - "OrderedDict([('test', 42)])" - >>> repr(dct) - "OrderedDict([('test', 42)])" - >>> str(dct["test"]) - '42' - >>> repr(dct["test"]) - '42' - - Python 3.12.3: - >>> from collections import OrderedDict - >>> import numpy - >>> dct = OrderedDict({"test": numpy.int64(42)}) - >>> dct = OrderedDict({"test": numpy.int64(42)}) - KeyboardInterrupt - >>> str(dct) - "OrderedDict({'test': np.int64(42)})" - >>> repr(dct) - "OrderedDict({'test': np.int64(42)})" - >>> str(dct["test"]) - '42' - >>> repr(dct["test"]) - 'np.int64(42)' - """ - - def test_str_single1(self) -> None: - """ - Test that the __str__ method on a single item in OrderedDict returns the expected string. - """ - d = collections.OrderedDict({"test": np.int64(42)}) - actual = str(d["test"]) - expected = "42" - self.assert_equal(actual, expected) - - def test_repr_single1(self) -> None: - """ - Test that the __repr__ method on a single item in OrderedDict returns the expected string. - """ - d = collections.OrderedDict({"test": np.int64(42)}) - actual = repr(d["test"]) - expected = "np.int64(42)" - self.assert_equal(actual, expected) - - def test_str_full1(self) -> None: - """ - Test that the __str__ method of OrderedDict returns the expected string. - """ - d = collections.OrderedDict({"test": np.int64(42)}) - actual = str(d) - expected = "OrderedDict({'test': np.int64(42)})" - self.assert_equal(actual, expected) - - def test_repr_full1(self) -> None: - """ - Test that the __repr__ method of OrderedDict returns the expected string. - """ - d = collections.OrderedDict({"test": np.int64(42)}) - actual = repr(d) - expected = "OrderedDict({'test': np.int64(42)})" - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py deleted file mode 100644 index 6106dd551..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hobject.py +++ /dev/null @@ -1,392 +0,0 @@ -import abc -import logging -from typing import Any, Callable, List, Optional - -import pandas as pd - -import helpers.hdbg as hdbg -import helpers.hobject as hobject -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# _Obj_to_str_TestCase -# ############################################################################# - - -# Note that we can't derive this class from `hunitest.TestCase` otherwise the -# unit test framework will try to run the tests in this class. -class _Obj_to_str_TestCase(abc.ABC): - """ - Test case for testing `obj_to_str()` and `obj_to_repr()`. - """ - - @abc.abstractmethod - def get_object(self) -> Any: - """ - Build object to test. - """ - ... - - def helper(self, *, expected: Optional[str] = None, **kwargs: Any) -> None: - obj = self.get_object() - hdbg.dassert_is_not(obj, None) - # - txt: List[str] = [] - # Get `str()`. - txt.append(hprint.frame("str:")) - txt.append(hobject.obj_to_str(obj, **kwargs)) - # Get `repr()`. - txt.append(hprint.frame("repr:")) - txt.append(hobject.obj_to_repr(obj, **kwargs)) - # Concat. - txt = "\n".join(txt) - # Check. - if expected is None: - self.check_string(txt, purify_text=True) - else: - hdbg.dassert_isinstance(expected, str) - self.assert_equal(txt, expected, purify_text=True, fuzzy_match=True) - - def test1(self, expected: str) -> None: - """ - Use `__dict__` to extract the attributes. - """ - self.helper(expected=expected, attr_mode="__dict__") - - def test2(self, expected: str) -> None: - """ - Use `dir` to extract the attributes. - """ - self.helper(expected=expected, attr_mode="dir") - - def test3(self, expected: str) -> None: - """ - Use `__dict__` and print the type of the attributes. - """ - self.helper(expected=expected, print_type=True) - - def test4(self) -> None: - """ - Print only callable attributes. - """ - self.helper(callable_mode="all") - - def test5(self) -> None: - """ - Print only private attributes. - """ - self.helper(private_mode="all") - - def test6(self) -> None: - """ - Print only dunder attributes. - """ - self.helper(dunder_mode="all") - - -# ############################################################################# -# _Object1 -# ############################################################################# - - -class _Object1: - """ - Object storing only scalar members and not other nested objects. - """ - - def __init__(self) -> None: - self.a = False - self.b = "hello" - self.c = 3.14 - self._hello = "under" - self.__hello = "double_dunder" - self.hello = lambda x: x + 1 - - -# ############################################################################# -# Test_obj_to_str1 -# ############################################################################# - - -class Test_obj_to_str1(hunitest.TestCase, _Obj_to_str_TestCase): - def get_object(self) -> Any: - obj = _Object1() - return obj - - def test1(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object1 at 0x=(a=False, b=hello, c=3.14) - ################################################################################ - repr: - ################################################################################ - : - a='False' - b='hello' - c='3.14' - """ - super().test1(expected) - - def test2(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object1 at 0x=(a=False, b=hello, c=3.14) - ################################################################################ - repr: - ################################################################################ - : - a='False' - b='hello' - c='3.14' - """ - super().test2(expected) - - def test3(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object1 at 0x=(a=False , b=hello , c=3.14 ) - ################################################################################ - repr: - ################################################################################ - : - a='False' - b='hello' - c='3.14' - """ - super().test3(expected) - - -# ############################################################################# -# _Object2 -# ############################################################################# - - -class _Object2: - """ - Object using a `obj_to_str()` as repr. - """ - - def __init__(self) -> None: - self.x = True - self.y = "world" - self.z = 6.28 - self._hello = "under" - self.__hello = "double_dunder" - self.hello = lambda x: x + 1 - - def __repr__(self) -> str: - return hobject.obj_to_str(self) - - -# ############################################################################# -# _Object3 -# ############################################################################# - - -class _Object3: - """ - Object storing another object. - """ - - def __init__(self) -> None: - self.p = "p" - self.q = "q" - self.object2 = _Object2() - - -# ############################################################################# -# Test_obj_to_str2 -# ############################################################################# - - -class Test_obj_to_str2(hunitest.TestCase, _Obj_to_str_TestCase): - def get_object(self) -> Any: - obj = _Object3() - return obj - - def test1(self) -> None: - # TODO(gp): object2 in repr should be printed recursively as repr, but - # it's not. - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object3 at 0x=(p=p, q=q, object2=_Object2 at 0x=(x=True, y=world, z=6.28)) - ################################################################################ - repr: - ################################################################################ - : - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' - """ - super().test1(expected) - - def test2(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object3 at 0x=(object2=_Object2 at 0x=(x=True, y=world, z=6.28), p=p, q=q) - ################################################################################ - repr: - ################################################################################ - : - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' - p='p' - q='q' - """ - super().test2(expected) - - def test3(self) -> None: - expected = r""" - ################################################################################ - str: - ################################################################################ - _Object3 at 0x=(p=p , q=q , object2=_Object2 at 0x=(x=True, y=world, z=6.28) ) - ################################################################################ - repr: - ################################################################################ - : - p='p' - q='q' - object2='_Object2 at 0x=(x=True, y=world, z=6.28)' - """ - super().test3(expected) - - -# ############################################################################# -# _Abstract_ClassA -# ############################################################################# - - -class _Abstract_ClassA(abc.ABC, hobject.PrintableMixin): - """ - Abstract class descending from `PrintableMixin`. - """ - - def __init__(self) -> None: - self._arg0 = 0 - self._arg1 = "one" - self._arg2 = 2 - - @staticmethod - def get_config_attributes() -> List[str]: - return ["_arg1", "_arg2"] - - -# ############################################################################# -# _ClassB -# ############################################################################# - - -class _ClassB(hobject.PrintableMixin): - """ - Class descending from `PrintableMixin`. - """ - - def __init__(self, get_wall_clock_time: Callable) -> None: - self._arg5 = {"key1": "five", "key2": 5} - self._arg6 = "abc" - self._get_wall_clock_time = get_wall_clock_time - - @staticmethod - def get_config_attributes() -> List[str]: - return ["_arg5", "_get_wall_clock_time"] - - def get_wall_clock_time(self) -> pd.Timestamp: - """ - Return wall clock time in the timezone specified in the ctor. - - Initially wall clock time can be in any timezone, but cannot be - timezone-naive. - """ - wall_clock_time = self._get_wall_clock_time() - return wall_clock_time - - -# ############################################################################# -# _ClassA -# ############################################################################# - - -class _ClassA(_Abstract_ClassA): - """ - Class descending from `_AbstractClassA` and embedding `_ClassB`. - """ - - def __init__(self) -> None: - super().__init__() - self._arg3 = [3, 3, 3] - get_wall_clock_time = lambda: pd.Timestamp( - "2022-04-23", tz="America/New_York" - ) - helper_class = _ClassB(get_wall_clock_time) - self._arg4 = helper_class - self._arg10 = { - "key": 1, - "get_wall_clock_time": helper_class.get_wall_clock_time, - } - - def get_config_attributes(self) -> List[str]: - config_attributes = super().get_config_attributes() - child_class_attributes = ["_arg3", "_arg4", "_arg10"] - config_attributes.extend(child_class_attributes) - return config_attributes - - -# ############################################################################# -# Test_PrintableMixin_to_config_str -# ############################################################################# - - -class Test_PrintableMixin_to_config_str(hunitest.TestCase): - def check_test_class_str(self, test_class: Any, expected: str) -> None: - actual = test_class.to_config_str() - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Print `_Abstract_ClassA`. - """ - test_class = _Abstract_ClassA() - expected = r""" - : - _arg1='one' - _arg2='2' - """ - self.check_test_class_str(test_class, expected) - - def test2(self) -> None: - """ - Print `_ClassA`. - """ - test_class = _ClassA() - expected = r""" - : - _arg1='one' - _arg2='2' - _arg3='[3, 3, 3]' - _arg4=: - _arg5='{'key1': 'five', 'key2': 5}' - _get_wall_clock_time='. at 0x>' - _arg10= - {'get_wall_clock_time': : - _arg5='{'key1': 'five', 'key2': 5}' - _arg6='abc' >, - 'key': 1} - """ - self.check_test_class_str(test_class, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py deleted file mode 100644 index 9e9887915..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hopen.py +++ /dev/null @@ -1,92 +0,0 @@ -import logging - -import pytest - -import helpers.hopen as hopen -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# TODO(gp): Some of these tests should be executed outside of the container to -# test other systems. - - -# ############################################################################# -# Test_open_unknown -# ############################################################################# - - -class Test_open_unknown(hunitest.TestCase): - """ - Test unknown extension and unknown systems. - """ - - def test_unknown_extension1(self) -> None: - """ - Test unknown extension raises an error. - """ - with self.assertRaises(AssertionError) as cm: - hopen.open_file("a.unknown_ext") - # Check error text. - self.assertIn("unknown_ext", str(cm.exception)) - - def test_unknown_os1(self) -> None: - """ - Test unknown OS raises an error. - """ - with self.assertRaises(AssertionError) as cm: - hopen._cmd_open_html("b.html", "UnknownOS") - # Check error text. - self.assertIn("UnknownOS", str(cm.exception)) - - -# ############################################################################# -# Test_open_html -# ############################################################################# - - -@pytest.mark.skip(reason="See cryptomtc/cmamp#321") -class Test_open_html(hunitest.TestCase): - """ - Test different command correctness for opening html file. - """ - - def test_linux1(self) -> None: - """ - Test Linux. - """ - cmd = hopen._cmd_open_html("a.html", "Linux") - self.check_string(str(cmd)) - - def test_windows1(self) -> None: - """ - Test Windows. - """ - cmd = hopen._cmd_open_html("b.html", "Windows") - self.check_string(str(cmd)) - - def test_mac1(self) -> None: - """ - Test Darwin. - """ - cmd = hopen._cmd_open_html("c.html", "Darwin") - self.check_string(str(cmd)) - - -# ############################################################################# -# Test_open_pdf -# ############################################################################# - - -class Test_open_pdf(hunitest.TestCase): - """ - Test different command correctness for opening pdf file. - """ - - def test_mac1(self) -> None: - """ - Test Darwin. - """ - cmd = hopen._cmd_open_html("a.pdf", "Darwin") - self.check_string(str(cmd)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py deleted file mode 100644 index be5200d47..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_analysis.py +++ /dev/null @@ -1,42 +0,0 @@ -import logging - -import numpy as np -import pandas as pd -import pytest - -import helpers.hpandas_analysis as hpananal -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_explore1 -# ############################################################################# - - -class Test_explore1(hunitest.TestCase): - def test_ols_regress_series(self) -> None: - x = 5 * np.random.randn(100) - y = x + np.random.randn(*x.shape) - df = pd.DataFrame() - df["x"] = x - df["y"] = y - hpananal.ols_regress_series( - df["x"], df["y"], intercept=True, print_model_stats=False - ) - - @pytest.mark.skip(reason="https://github.com/.../.../issues/3676") - def test_rolling_pca_over_time1(self) -> None: - np.random.seed(42) - df = pd.DataFrame(np.random.randn(10, 5)) - df.index = pd.date_range("2017-01-01", periods=10) - corr_df, eigval_df, eigvec_df = hpananal.rolling_pca_over_time( - df, 0.5, "fill_with_zero" - ) - txt = ( - "corr_df=\n%s\n" % corr_df.to_string() - + "eigval_df=\n%s\n" % eigval_df.to_string() - + "eigvec_df=\n%s\n" % eigvec_df.to_string() - ) - self.check_string(txt) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py deleted file mode 100644 index 595877a97..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_check_summary.py +++ /dev/null @@ -1,67 +0,0 @@ -import logging - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_CheckSummary -# ############################################################################# - - -class Test_CheckSummary(hunitest.TestCase): - def test1(self) -> None: - """ - All the tests have passed. - """ - # Prepare inputs. - obj = hpandas.CheckSummary() - obj.add( - "hello", - "Number of not submitted OMS child orders=0 / 73 = 0.00%", - True, - ) - obj.add("hello2", "ok", True) - # Check. - is_ok = obj.is_ok() - self.assertTrue(is_ok) - # - actual = obj.report_outcome(notebook_output=False, assert_on_error=False) - self.check_string(actual) - # No assertion expected. - obj.report_outcome() - - def test2(self) -> None: - """ - Not all the tests have passed. - """ - # Prepare inputs. - obj = hpandas.CheckSummary() - obj.add( - "hello", - "Number of not submitted OMS child orders=0 / 73 = 0.00%", - True, - ) - obj.add("hello2", "not_ok", False) - # Check. - is_ok = obj.is_ok() - self.assertFalse(is_ok) - # - actual = obj.report_outcome(notebook_output=False, assert_on_error=False) - self.check_string(actual) - # - with self.assertRaises(ValueError) as e: - actual = obj.report_outcome() - actual_exception = str(e.exception) - expected_exception = r""" - The checks have failed: - description comment is_ok - 0 hello Number of not submitted OMS child orders=0 / 7... True - 1 hello2 not_ok False - is_ok=False - """ - self.assert_equal(actual_exception, expected_exception, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py deleted file mode 100644 index a65340957..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_clean.py +++ /dev/null @@ -1,364 +0,0 @@ -import logging - -import numpy as np -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# TestDropNa -# ############################################################################# - - -class TestDropNa(hunitest.TestCase): - def test_dropna1(self) -> None: - """ - Test if all types of NaNs are dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, 1, 3, 2, 0], - "dummy_value_2": ["0", "A", "B", None, "D"], - "dummy_value_3": [0, 0, pd.NA, 0, 0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.dropna(test_df, drop_infs=False) - # Prepare expected result. - expected = { - "dummy_value_1": [1, 0], - "dummy_value_2": ["A", "D"], - "dummy_value_3": [0, 0], - } - # Set the dtype of numeral columns to float to match the dataframe after NA dropping. - expected = pd.DataFrame(data=expected).astype( - {"dummy_value_1": "float64", "dummy_value_3": "object"} - ) - # Set the index of the rows that remained. - expected = expected.set_index(pd.Index([1, 4])) - # Check. - hunitest.compare_df(actual, expected) - - def test_dropna2(self) -> None: - """ - Test if infs are dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [-np.inf, 1, 3, 2, 0], - "dummy_value_2": ["0", "A", "B", "C", "D"], - "dummy_value_3": [0, 0, np.inf, 0, 0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.dropna(test_df, drop_infs=True) - # Prepare expected result. - expected = { - "dummy_value_1": [1, 2, 0], - "dummy_value_2": ["A", "C", "D"], - "dummy_value_3": [0, 0, 0], - } - # Set the dtype of numeral columns to float to match the dataframe after NA dropping. - expected = pd.DataFrame(data=expected).astype( - {"dummy_value_1": "float64", "dummy_value_3": "float64"} - ) - # Set the index of the rows that remained. - expected = expected.set_index(pd.Index([1, 3, 4])) - # Check. - hunitest.compare_df(actual, expected) - - -# ############################################################################# -# TestDropAxisWithAllNans -# ############################################################################# - - -class TestDropAxisWithAllNans(hunitest.TestCase): - def test_drop_rows1(self) -> None: - """ - Test if row full of nans is dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, 2, 3], - "dummy_value_2": [pd.NA, "B", "C"], # type: ignore - "dummy_value_3": [None, 1.0, 1.0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.drop_axis_with_all_nans(test_df, drop_rows=True) - # Prepare expected result. - expected = { - "dummy_value_1": [2, 3], - "dummy_value_2": ["B", "C"], - "dummy_value_3": [1.0, 1.0], - } - # Set the dtype of numeral columns to float to match the dataframe after NA dropping. - expected = pd.DataFrame(data=expected).astype( - {"dummy_value_1": "float64"} - ) - # Set the index of the rows that remained. - expected = expected.set_index(pd.Index([1, 2])) - # Check. - hunitest.compare_df(actual, expected) - - def test_drop_rows2(self) -> None: - """ - Test if non fully nan row is not dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, 2, 3], - "dummy_value_2": ["A", "B", "C"], # type: ignore - "dummy_value_3": [None, 1.0, 1.0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.drop_axis_with_all_nans(test_df, drop_rows=True) - # Prepare expected result. - expected = { - "dummy_value_1": [np.nan, 2, 3], - "dummy_value_2": ["A", "B", "C"], # type: ignore - "dummy_value_3": [None, 1.0, 1.0], - } - # Set the dtype of numeral columns to float to match the dataframe after NA dropping. - expected = pd.DataFrame(data=expected).astype( - {"dummy_value_1": "float64"} - ) - # Set the index of the rows that remained. - expected = expected.set_index(pd.Index([0, 1, 2])) - # Check. - hunitest.compare_df(actual, expected) - - def test_drop_columns1(self) -> None: - """ - Test if column full of nans is dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, pd.NA, None], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [1.0, 1.0, 1.0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.drop_axis_with_all_nans(test_df, drop_columns=True) - # Prepare expected result. - expected = { - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [1.0, 1.0, 1.0], - } - expected = pd.DataFrame(data=expected) - # Check. - hunitest.compare_df(actual, expected) - - def test_drop_columns2(self) -> None: - """ - Test if column that is not full of nans is not dropped. - """ - # Prepare actual result. - test_data = { - "dummy_value_1": [np.nan, 2, None], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [1.0, 1.0, 1.0], - } - test_df = pd.DataFrame(data=test_data) - # Drop NA. - actual = hpandas.drop_axis_with_all_nans(test_df, drop_columns=True) - # Prepare expected result. - expected = { - "dummy_value_1": [np.nan, 2, None], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [1.0, 1.0, 1.0], - } - expected = pd.DataFrame(data=expected) - # Check. - hunitest.compare_df(actual, expected) - - -# ############################################################################# -# TestDropDuplicates -# ############################################################################# - - -class TestDropDuplicates(hunitest.TestCase): - """ - Test that duplicates are dropped correctly. - """ - - @staticmethod - def get_test_data() -> pd.DataFrame: - test_data = [ - (1, "A", 3.2), - (1, "A", 3.2), - (10, "B", 3.2), - (8, "A", 3.2), - (4, "B", 8.2), - (10, "B", 3.2), - ] - index = [ - "dummy_value1", - "dummy_value3", - "dummy_value2", - "dummy_value1", - "dummy_value1", - "dummy_value2", - ] - columns = ["int", "letter", "float"] - df = pd.DataFrame(data=test_data, index=index, columns=columns) - return df - - def test_drop_duplicates1(self) -> None: - """ - - use_index = True - - column_subset is not None - """ - # Prepare test data. - df = self.get_test_data() - use_index = True - column_subset = ["float"] - no_duplicates_df = hpandas.drop_duplicates( - df, use_index, column_subset=column_subset - ) - no_duplicates_df = hpandas.df_to_str(no_duplicates_df) - # Prepare expected result. - expected_signature = r""" - int letter float - dummy_value1 1 A 3.2 - dummy_value3 1 A 3.2 - dummy_value2 10 B 3.2 - dummy_value1 4 B 8.2 - """ - # Check. - self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) - - def test_drop_duplicates2(self) -> None: - """ - - use_index = True - - column_subset = None - """ - # Prepare test data. - df = self.get_test_data() - use_index = True - no_duplicates_df = hpandas.drop_duplicates(df, use_index) - no_duplicates_df = hpandas.df_to_str(no_duplicates_df) - # Prepare expected result. - expected_signature = r""" - int letter float - dummy_value1 1 A 3.2 - dummy_value3 1 A 3.2 - dummy_value2 10 B 3.2 - dummy_value1 8 A 3.2 - dummy_value1 4 B 8.2 - """ - # Check. - self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) - - def test_drop_duplicates3(self) -> None: - """ - - use_index = False - - column_subset = None - """ - # Prepare test data. - df = self.get_test_data() - use_index = False - no_duplicates_df = hpandas.drop_duplicates(df, use_index) - no_duplicates_df = hpandas.df_to_str(no_duplicates_df) - # Prepare expected result. - expected_signature = r""" - int letter float - dummy_value1 1 A 3.2 - dummy_value2 10 B 3.2 - dummy_value1 8 A 3.2 - dummy_value1 4 B 8.2 - """ - # Check. - self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) - - def test_drop_duplicates4(self) -> None: - """ - - use_index = False - - column_subset is not None - """ - # Prepare test data. - df = self.get_test_data() - use_index = False - column_subset = ["letter", "float"] - no_duplicates_df = hpandas.drop_duplicates( - df, use_index, column_subset=column_subset - ) - no_duplicates_df = hpandas.df_to_str(no_duplicates_df) - # Prepare expected result. - expected_signature = r""" - int letter float - dummy_value1 1 A 3.2 - dummy_value2 10 B 3.2 - dummy_value1 4 B 8.2 - """ - # Check. - self.assert_equal(no_duplicates_df, expected_signature, fuzzy_match=True) - - -# ############################################################################# -# Test_impute_nans -# ############################################################################# - - -class Test_impute_nans(hunitest.TestCase): - def test1(self) -> None: - """ - Test basic imputation of "nan" strings with empty string. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": ["value1", "nan", "value3"], - "col2": ["a", "b", "c"], - } - ) - # Call function to test. - result_df = hpandas.impute_nans(df, "col1", "") - # Check output. - self.assertEqual(result_df["col1"].tolist(), ["value1", "", "value3"]) - self.assertEqual(result_df["col2"].tolist(), ["a", "b", "c"]) - - def test2(self) -> None: - """ - Test imputation with a custom value. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": ["value1", "nan", "value3"], - "col2": ["a", "nan", "c"], - } - ) - # Call function to test. - result_df = hpandas.impute_nans(df, "col2", "MISSING") - # Check output. - self.assertEqual(result_df["col1"].tolist(), ["value1", "nan", "value3"]) - self.assertEqual(result_df["col2"].tolist(), ["a", "MISSING", "c"]) - - def test3(self) -> None: - """ - Test with no "nan" values present. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": ["value1", "value2", "value3"], - "col2": ["a", "b", "c"], - } - ) - # Call function to test. - result_df = hpandas.impute_nans(df, "col1", "") - # Check output - should be unchanged. - self.assertEqual( - result_df["col1"].tolist(), ["value1", "value2", "value3"] - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py deleted file mode 100644 index 9567c91e5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_compare.py +++ /dev/null @@ -1,650 +0,0 @@ -import logging -from typing import Tuple - -import numpy as np -import pandas as pd -import pytest - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# TestCompareDataframeRows -# ############################################################################# - - -class TestCompareDataframeRows(hunitest.TestCase): - def get_test_data(self) -> pd.DataFrame: - test_data = { - "dummy_value_1": [0, 1, 3, 2, 0], - "dummy_value_2": ["0", "A", "C", "B", "D"], - "dummy_value_3": [0, 0, 0, 0, 0], - } - df = pd.DataFrame(data=test_data) - df.index.name = "test" - return df - - def test_compare_dataframe_rows1(self) -> None: - """ - Verify that differences are caught and displayed properly. - """ - # Prepare inputs. - test_data = self.get_test_data() - edited_test_data = test_data.copy()[1:-1] - edited_test_data.loc[1, "dummy_value_2"] = "W" - edited_test_data.loc[2, "dummy_value_2"] = "Q" - edited_test_data.loc[2, "dummy_value_3"] = "1" - # Run. - data_difference = hpandas.compare_dataframe_rows( - test_data, edited_test_data - ) - # Check output. - actual = hpandas.df_to_str(data_difference) - expected = r""" dummy_value_2 dummy_value_3 test - self other self other - 0 W A 1 - 1 Q C 1 0 2""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_compare_dataframe_rows2(self) -> None: - """ - Verify that differences are caught and displayed properly without - original index. - """ - # Prepare inputs. - test_data = self.get_test_data() - test_data.index.name = None - edited_test_data = test_data.copy()[1:-1] - edited_test_data.loc[1, "dummy_value_2"] = "W" - edited_test_data.loc[2, "dummy_value_2"] = "Q" - edited_test_data.loc[2, "dummy_value_3"] = "1" - # Run. - data_difference = hpandas.compare_dataframe_rows( - test_data, edited_test_data - ) - # Check output. - actual = hpandas.df_to_str(data_difference) - expected = r""" dummy_value_2 dummy_value_3 - self other self other - 0 W A NaN NaN - 1 Q C 1 0.0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_compare_dfs -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -class Test_compare_dfs(hunitest.TestCase): - """ - - Define two DataFrames that can be either equal or different in terms of columns or rows - - Compare its values by calculating the difference - """ - - @staticmethod - def get_test_dfs_equal() -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Both DataFrames have only equal rows and columns names. - """ - timestamp_index1 = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - values1 = { - "tsA": pd.Series([1, 2, 3]), - "tsB": pd.Series([4, 5, 6]), - "tsC": pd.Series([7, 8, 9]), - "timestamp": timestamp_index1, - } - df1 = pd.DataFrame(data=values1) - df1 = df1.set_index("timestamp") - # - timestamp_index2 = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - values2 = { - "tsA": pd.Series([1.1, 1.9, 3.15]), - "tsB": pd.Series([0, 5, 5.8]), - "tsC": pd.Series([6.5, 8.6, 9.07]), - "timestamp": timestamp_index2, - } - df2 = pd.DataFrame(data=values2) - df2 = df2.set_index("timestamp") - return df1, df2 - - @staticmethod - def get_test_dfs_close_to_zero() -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - DataFrames with values that are close to 0. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - ] - values1 = { - "tsA": [3e-9, -3e-9], - "tsB": [6e-3, 4e-9], - "timestamp": timestamp_index, - } - df1 = pd.DataFrame(data=values1) - df1 = df1.set_index("timestamp") - # - values2 = { - "tsA": [15e-3, -5e-9], - "tsB": [5e-9, 3e-9], - "timestamp": timestamp_index, - } - df2 = pd.DataFrame(data=values2) - df2 = df2.set_index("timestamp") - return df1, df2 - - def get_test_dfs_different(self) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - DataFrames have both unique and equal rows and columns. - """ - df1, df2 = self.get_test_dfs_equal() - df2 = df2.rename( - columns={"tsC": "extra_col"}, - index={ - pd.Timestamp("2022-01-01 21:03:00+00:00"): pd.Timestamp( - "2022-01-01 21:04:00+00:00" - ) - }, - ) - return df1, df2 - - def test1(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "diff" - """ - df1, df2 = self.get_test_dfs_equal() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="diff", - assert_diff_threshold=None, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.diff tsB.diff tsC.diff - timestamp - 2022-01-01 21:01:00+00:00 -0.10 4.0 0.50 - 2022-01-01 21:02:00+00:00 0.10 0.0 -0.60 - 2022-01-01 21:03:00+00:00 -0.15 0.2 -0.07 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "pct_change" - - zero_vs_zero_is_zero = False - - remove_inf = False - """ - df1, df2 = self.get_test_dfs_equal() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="pct_change", - assert_diff_threshold=None, - zero_vs_zero_is_zero=False, - remove_inf=False, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change - timestamp - 2022-01-01 21:01:00+00:00 -9.090909 inf 7.692308 - 2022-01-01 21:02:00+00:00 5.263158 0.000000 -6.976744 - 2022-01-01 21:03:00+00:00 -4.761905 3.448276 -0.771775 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - - DataFrames are not equal - - Column and row modes are `inner` - - diff_mode = "diff" - """ - df1, df2 = self.get_test_dfs_different() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="inner", - column_mode="inner", - diff_mode="diff", - assert_diff_threshold=None, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.diff tsB.diff - timestamp - 2022-01-01 21:01:00+00:00 -0.1 4.0 - 2022-01-01 21:02:00+00:00 0.1 0.0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - - DataFrames are not equal - - Column and row modes are `inner` - - diff_mode = "pct_change" - """ - df1, df2 = self.get_test_dfs_different() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="inner", - column_mode="inner", - diff_mode="pct_change", - assert_diff_threshold=None, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change - timestamp - 2022-01-01 21:01:00+00:00 -9.090909 NaN - 2022-01-01 21:02:00+00:00 5.263158 0.0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test5(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "diff" - - All values of the second DataFrame are zeros - - Check that if the second DataFrame consists of zeros, - the function will perform comparison to the initial DataFrame. - """ - df1, df2 = self.get_test_dfs_different() - # Create DataFrame with zeros. - df2 = df1 * 0 - # Compare. - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="diff", - assert_diff_threshold=None, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.diff tsB.diff tsC.diff - timestamp - 2022-01-01 21:01:00+00:00 1 4 7 - 2022-01-01 21:02:00+00:00 2 5 8 - 2022-01-01 21:03:00+00:00 3 6 9 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test6(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "pct_change" - - close_to_zero_threshold = 1e-6 - - zero_vs_zero_is_zero = True - - remove_inf = True - - The second DataFrame has numbers below the close_to_zero_threshold. - """ - df1, df2 = self.get_test_dfs_close_to_zero() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="pct_change", - assert_diff_threshold=None, - zero_vs_zero_is_zero=True, - remove_inf=True, - ) - # - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change - timestamp - 2022-01-01 21:01:00+00:00 -100.0 NaN - 2022-01-01 21:02:00+00:00 0.0 0.0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test7(self) -> None: - """ - - DataFrames are equal - - Column and row modes are `equal` - - diff_mode = "pct_change" - - close_to_zero_threshold = 1e-6 - - zero_vs_zero_is_zero = False - - remove_inf = False - - The second DataFrame has numbers below the close_to_zero_threshold. - """ - df1, df2 = self.get_test_dfs_close_to_zero() - df_diff = hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="pct_change", - assert_diff_threshold=None, - zero_vs_zero_is_zero=False, - remove_inf=False, - ) - # - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change - timestamp - 2022-01-01 21:01:00+00:00 -100.0 inf - 2022-01-01 21:02:00+00:00 NaN NaN - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test8(self) -> None: - """ - Test NaN comparison with NaNs present at different location in two - dataframes. - """ - # Build test dataframes. - df1 = pd.DataFrame( - data={ - "A": [1.1, np.nan, 3.1, np.nan, np.inf, np.inf], - "B": [0, 0, 0, 0, 0, 0], - } - ) - df2 = pd.DataFrame( - data={ - "A": [3.0, 2.2, np.nan, np.nan, np.nan, np.inf], - "B": [0, 0, 0, 0, 0, 0], - } - ) - # Check. - with self.assertRaises(AssertionError) as cm: - compare_nans = True - hpandas.compare_dfs( - df1, df2, compare_nans=compare_nans, only_warning=False - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - DataFrame.iloc[:, 0] (column name="A") are different - - DataFrame.iloc[:, 0] (column name="A") values are different (66.66667 %) - [index]: [0, 1, 2, 3, 4, 5] - [left]: [1.1, nan, 3.1, nan, inf, inf] - [right]: [3.0, 2.2, nan, nan, nan, inf] - At positional index 0, first diff: 1.1 != 3.0 - df1= - A B - 0 1.1 0 - 1 NaN 0 - 2 3.1 0 - 3 NaN 0 - 4 inf 0 - 5 inf 0 - and df2= - A B - 0 3.0 0 - 1 2.2 0 - 2 NaN 0 - 3 NaN 0 - 4 NaN 0 - 5 inf 0 - are not equal. - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test9(self) -> None: - """ - Test to verify the error when df1 and df2 have different index types. - """ - df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - # Create df2 with a DatetimeIndex. - dates = pd.date_range("2021-01-01", periods=3) - df2 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "timestamp": dates}) - df2 = df2.set_index("timestamp") - with self.assertRaises(AssertionError) as cm: - hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - df1.index.difference(df2.index)= - RangeIndex(start=0, stop=3, step=1) - df2.index.difference(df1.index)= - DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq=None) - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test10(self) -> None: - """ - Check `assert_diff_threshold` functionality in presence of NaN values - in df_diff. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - df2 = pd.DataFrame( - { - "tsA": [100, 200, 300], - "tsB": [400, 500, 600], - "tsC": [700, 800, 900], - "timestamp": timestamp_index, - } - ) - df2 = df2.set_index("timestamp") - adjustment_factor = 1.000001 - df1 = df2 * adjustment_factor - df1.iloc[1, 2] = np.nan - df_diff = hpandas.compare_dfs( - df1, - df2, - diff_mode="pct_change", - only_warning=True, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change - timestamp - 2022-01-01 21:01:00+00:00 0.0001 0.0001 0.0001 - 2022-01-01 21:02:00+00:00 0.0001 0.0001 NaN - 2022-01-01 21:03:00+00:00 0.0001 0.0001 0.0001 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test11(self) -> None: - """ - Check functionality for `remove_inf = False` in presence of `diff_mode - = 'pct_change'`. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - df2 = pd.DataFrame( - { - "tsA": [100, 200, 300], - "tsB": [400, 500, 600], - "tsC": [700, 800, 900], - "timestamp": timestamp_index, - } - ) - df2 = df2.set_index("timestamp") - adjustment_factor = 1.00001 - df1 = df2 * adjustment_factor - df1.iloc[1, 2] = np.inf - with self.assertRaises(AssertionError) as cm: - hpandas.compare_dfs( - df1, - df2, - diff_mode="pct_change", - remove_inf=False, - only_warning=False, - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - DataFrame.iloc[:, 0] (column name="tsA") are different - - DataFrame.iloc[:, 0] (column name="tsA") values are different (100.0 %) - [index]: [2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00, 2022-01-01 21:03:00+00:00] - [left]: [False, False, False] - [right]: [True, True, True] - df1= - tsA tsB tsC - timestamp - 2022-01-01 21:01:00+00:00 100.001 400.004 700.007 - 2022-01-01 21:02:00+00:00 200.002 500.005 inf - 2022-01-01 21:03:00+00:00 300.003 600.006 900.009 - and df2= - tsA tsB tsC - timestamp - 2022-01-01 21:01:00+00:00 100 400 700 - 2022-01-01 21:02:00+00:00 200 500 800 - 2022-01-01 21:03:00+00:00 300 600 900 - have pct_change more than `assert_diff_threshold`. - """ - self.assert_equal(actual, expected, purify_text=True, fuzzy_match=True) - - def test12(self) -> None: - """ - Check functionality for `remove_inf = True` in presence of `diff_mode = - 'pct_change'`. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - df2 = pd.DataFrame( - { - "tsA": [100, 200, 300], - "tsB": [400, 500, 600], - "tsC": [700, 800, 900], - "timestamp": timestamp_index, - } - ) - df2 = df2.set_index("timestamp") - adjustment_factor = 1.00001 - df1 = df2 * adjustment_factor - df1.iloc[1, 2] = np.inf - df_diff = hpandas.compare_dfs( - df1, - df2, - diff_mode="pct_change", - only_warning=True, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change - timestamp - 2022-01-01 21:01:00+00:00 0.001 0.001 0.001 - 2022-01-01 21:02:00+00:00 0.001 0.001 NaN - 2022-01-01 21:03:00+00:00 0.001 0.001 0.001 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test13(self) -> None: - """ - Check test case when negative values in df2. - """ - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - ] - df2 = pd.DataFrame( - { - "tsA": [100, 200, -300], - "tsB": [400, -500, 600], - "tsC": [700, -800, 900], - "timestamp": timestamp_index, - } - ) - df2 = df2.set_index("timestamp") - adjustment_factor = 1.00001 - df1 = df2 * adjustment_factor - df_diff = hpandas.compare_dfs( - df1, - df2, - diff_mode="pct_change", - only_warning=True, - ) - actual = hpandas.df_to_str(df_diff) - expected = r""" tsA.pct_change tsB.pct_change tsC.pct_change - timestamp - 2022-01-01 21:01:00+00:00 0.001 0.001 0.001 - 2022-01-01 21:02:00+00:00 0.001 -0.001 -0.001 - 2022-01-01 21:03:00+00:00 -0.001 0.001 0.001 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_invalid_input(self) -> None: - """ - Put two different DataFrames with `equal` mode. - """ - df1, df2 = self.get_test_dfs_different() - with self.assertRaises(AssertionError): - hpandas.compare_dfs( - df1, - df2, - row_mode="equal", - column_mode="equal", - diff_mode="pct_change", - ) - - -# ############################################################################# -# Test_compare_nans_in_dataframes -# ############################################################################# - - -class Test_compare_nans_in_dataframes(hunitest.TestCase): - def test1(self) -> None: - """ - Check that NaN differences are identified correctly. - """ - # Build test dataframes. - df1 = pd.DataFrame( - data={ - "A": [1.1, np.nan, 3.1, np.nan, np.inf, np.inf], - "B": [0, 0, 0, 0, 0, 0], - } - ) - df2 = pd.DataFrame( - data={ - "A": [3.0, 2.2, np.nan, np.nan, np.nan, np.inf], - "B": [0, 0, 0, 0, 0, 0], - } - ) - df = hpandas.compare_nans_in_dataframes(df1, df2) - actual = hpandas.df_to_str(df) - expected = r""" - A - df1 df2 - 1 NaN 2.2 - 2 3.1 NaN - 4 inf NaN - """ - self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py deleted file mode 100644 index 0bd4eaeee..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_conversion.py +++ /dev/null @@ -1,276 +0,0 @@ -import logging - -import numpy as np -import pandas as pd -import pytest - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_to_series1 -# ############################################################################# - - -class Test_to_series1(hunitest.TestCase): - def helper(self, n: int, expected: str) -> None: - vals = list(range(n)) - df = pd.DataFrame([vals], columns=[f"a{i}" for i in vals]) - df = df.T - _LOG.debug("df=\n%s", df) - srs = hpandas.to_series(df) - _LOG.debug("srs=\n%s", srs) - actual = str(srs) - self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) - - def test1(self) -> None: - n = 0 - expected = r""" - Series([], dtype: float64) - """ - self.helper(n, expected) - - def test2(self) -> None: - n = 1 - expected = r""" - a0 0 - dtype: int64""" - self.helper(n, expected) - - def test3(self) -> None: - n = 5 - expected = r""" - a0 0 - a1 1 - a2 2 - a3 3 - a4 4 - Name: 0, dtype: int64""" - self.helper(n, expected) - - -# ############################################################################# -# Test_cast_series_to_type -# ############################################################################# - - -class Test_cast_series_to_type(hunitest.TestCase): - """ - Test converting a series into a given type. - """ - - def test1(self) -> None: - series = pd.Series(["1", "2", "3"]) - series_type = int - actual = hpandas.cast_series_to_type(series, series_type) - self.assertEqual(actual.dtype.type, np.int64) - - def test2(self) -> None: - series = pd.Series(["0.1", "0.2", "0.3"]) - series_type = float - actual = hpandas.cast_series_to_type(series, series_type) - self.assertEqual(actual.dtype.type, np.float64) - - def test3(self) -> None: - series = pd.Series(["None", "None", "None"]) - series_type = None - actual = hpandas.cast_series_to_type(series, series_type) - for i in range(len(actual)): - self.assertIsNone(actual.iloc[i]) - - def test4(self) -> None: - series = pd.Series(["2020-01-01", "2020-02-02", "2020-03-03"]) - series_type = pd.Timestamp - actual = hpandas.cast_series_to_type(series, series_type) - self.assertEqual(actual.dtype.type, np.datetime64) - - def test5(self) -> None: - series = pd.Series(["{}", "{1: 2, 3: 4}", "{'a': 'b'}"]) - series_type = dict - actual = hpandas.cast_series_to_type(series, series_type) - for i in range(len(actual)): - self.assertEqual(type(actual.iloc[i]), dict) - - -# ############################################################################# -# Test_convert_to_type -# ############################################################################# - - -class Test_convert_to_type(hunitest.TestCase): - def test_convert_to_type_bool(self) -> None: - """ - Check converting to bool column. - """ - # Mix of booleans, truthy/falsy strings, numerics, and invalid values - data = [True, False, "True", "false", 1, 0, "1", "0", "yes", None] - series = pd.Series(data) - result = hpandas.convert_to_type(series, "is_bool") - expected = pd.Series( - [True, False, True, False, True, False, True, False, None, None] - ) - pd.testing.assert_series_equal(result, expected) - - def test_convert_to_type_int_and_numeric(self) -> None: - """ - Check converting to numeric and int column. - """ - # Strings that parse to numbers, floats, invalid strings, and ints - series = pd.Series(["1", "2", "3.5", "abc", 4], dtype=object) - # is_int should coerce numeric strings to numbers, invalid -> NaN - result_int = hpandas.convert_to_type(series, "is_int") - expected_int = pd.to_numeric(series, errors="coerce") - pd.testing.assert_series_equal(result_int, expected_int) - # is_numeric is the same as to_numeric - result_numeric = hpandas.convert_to_type(series, "is_numeric") - pd.testing.assert_series_equal(result_numeric, expected_int) - - def test_convert_to_type_string(self) -> None: - """ - Check converting to string column. - """ - # Strings vs non-strings - data = ["a", 1, None, "hello", True, 3.14] - series = pd.Series(data, dtype=object) - result = hpandas.convert_to_type(series, "is_string") - expected = pd.Series(["a", "1", "None", "hello", "True", "3.14"]) - pd.testing.assert_series_equal(result, expected) - - def test_convert_to_type_unknown(self) -> None: - "Check converting to invalid datatype column." - series = pd.Series([1, 2, 3], dtype=object) - with pytest.raises(ValueError) as exc: - hpandas.convert_to_type(series, "invalid_type") - self.assertIn("Unknown column type: invalid_type", str(exc.value)) - - -# ############################################################################# -# Test_infer_column_types -# ############################################################################# - - -class Test_infer_column_types(hunitest.TestCase): - def test_numeric_dominance(self) -> None: - """ - Check with numeric dominant column. - """ - # 5 elements: '1','2',3 (numeric), 'a', None - col = pd.Series(["1", "2", 3, "a", None], dtype=object) - vals = hpandas.infer_column_types(col) - # is_numeric: True for "1","2",3 → 3/5 = 0.6 - assert pytest.approx(vals["is_numeric"], rel=1e-6) == 0.6 - # is_bool: none are bool → 0.0 - assert vals["is_bool"] == 0.0 - # is_string: "1","2","a" are str → 3/5 = 0.6 - assert pytest.approx(vals["is_string"], rel=1e-6) == 0.6 - # numeric ≥ string, and bool < numeric ⇒ type is numeric - self.assert_equal(vals["type"], "is_numeric") - - def test_bool_dominance(self) -> None: - """ - Check with bool dominant column. - """ - # 4 elements: True, False, True (bool), "x" - col = pd.Series([True, False, True, "x"], dtype=object) - vals = hpandas.infer_column_types(col) - # is_bool: 3/4 = 0.75 - assert pytest.approx(vals["is_bool"], rel=1e-6) == 0.75 - # is_numeric: True→1, False→0, True→1, "x"→NaN → notna → 3/4 = 0.75 - assert pytest.approx(vals["is_numeric"], rel=1e-6) == 0.75 - # is_string: only "x" → 1/4 = 0.25 - assert pytest.approx(vals["is_string"], rel=1e-6) == 0.25 - # bool ≥ numeric ⇒ type is bool - self.assert_equal(vals["type"], "is_bool") - - def test_string_dominance(self) -> None: - """ - Check with string dominant column. - """ - # 3 elements: 1.5 (numeric), "a","b" (strings) - col = pd.Series([1.5, "a", "b"], dtype=object) - vals = hpandas.infer_column_types(col) - # is_bool: none are bool → 0/3 = 0.0 - assert pytest.approx(vals["is_bool"], rel=1e-6) == 0.0 - # is_numeric: 1/3 ≈ 0.333... - assert pytest.approx(vals["is_numeric"], rel=1e-6) == pytest.approx( - 1 / 3, rel=1e-6 - ) - # is_string: 2/3 ≈ 0.666... - assert pytest.approx(vals["is_string"], rel=1e-6) == pytest.approx( - 2 / 3, rel=1e-6 - ) - # bool < numeric < string ⇒ type is string - self.assert_equal(vals["type"], "is_string") - - -# ############################################################################# -# Test_convert_df -# ############################################################################# - - -class Test_convert_df(hunitest.TestCase): - def test_convert_df_all_bool(self) -> None: - """ - A column of pure booleans should stay booleans. - """ - df = pd.DataFrame({"flag": [True, False, True, False]}) - df_out = hpandas.convert_df(df) - # Expect a DataFrame back - assert isinstance(df_out, pd.DataFrame) - # Column dtype must be bool - self.assert_equal(df_out["flag"].dtype.name, "bool") - # Values preserved - self.assert_equal( - str(df_out["flag"].tolist()), str([True, False, True, False]) - ) - - def test_convert_df_all_numeric(self) -> None: - """ - A column of numeric strings and ints should become floats. - """ - df = pd.DataFrame({"score": ["1", 2, "3.5", 4]}, dtype=object) - df_out = hpandas.convert_df(df) - assert isinstance(df_out, pd.DataFrame) - # dtype should be float64 - assert df_out["score"].dtype == float - # Values converted correctly - assert df_out["score"].tolist() == [1.0, 2.0, 3.5, 4.0] - - def test_convert_df_all_string(self) -> None: - """ - A column of strings (and mixed non-numeric non-bool) stays as-is. - """ - df = pd.DataFrame( - {"name": ["alice", "bob", "", "charlie"]}, dtype=object - ) - df_out = hpandas.convert_df(df) - print(df_out.head(5)) - assert isinstance(df_out, pd.DataFrame) - # dtype remains object (strings) - self.assert_equal(df_out["name"].dtype.name, "object") - self.assert_equal( - str(df_out["name"].tolist()), str(["alice", "bob", "", "charlie"]) - ) - - def test_convert_df_mixed_columns(self) -> None: - """ - Different datatype columns should convert accordingly. - """ - df = pd.DataFrame( - { - "flag": [True, False, False], - "value": [10, 20, "xyz"], - "text": ["one", "hello", 2], - }, - dtype=object, - ) - df_out = hpandas.convert_df(df) - # flag → bool - self.assert_equal(df_out["flag"].dtype.name, "bool") - self.assertIn("float", df_out["value"].dtype.name) - self.assert_equal(df_out["text"].dtype.name, "object") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py deleted file mode 100644 index 44b7c7b18..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_dassert.py +++ /dev/null @@ -1,448 +0,0 @@ -import logging - -import numpy as np -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_dassert_is_unique1 -# ############################################################################# - - -class Test_dassert_is_unique1(hunitest.TestCase): - def get_df1(self) -> pd.DataFrame: - """ - Return a df without duplicated index. - """ - num_rows = 5 - idx = [ - pd.Timestamp("2000-01-01 9:00") + pd.Timedelta(minutes=i) - for i in range(num_rows) - ] - values = [[i] for i in range(len(idx))] - df = pd.DataFrame(values, index=idx) - _LOG.debug("df=\n%s", df) - # - actual = hpandas.df_to_str(df) - expected = r""" - 0 - 2000-01-01 09:00:00 0 - 2000-01-01 09:01:00 1 - 2000-01-01 09:02:00 2 - 2000-01-01 09:03:00 3 - 2000-01-01 09:04:00 4""" - self.assert_equal(actual, expected, fuzzy_match=True) - return df - - def test_dassert_is_unique1(self) -> None: - df = self.get_df1() - hpandas.dassert_unique_index(df) - - def get_df2(self) -> pd.DataFrame: - """ - Return a df with duplicated index. - """ - num_rows = 4 - idx = [ - pd.Timestamp("2000-01-01 9:00") + pd.Timedelta(minutes=i) - for i in range(num_rows) - ] - idx.append(idx[0]) - values = [[i] for i in range(len(idx))] - df = pd.DataFrame(values, index=idx) - _LOG.debug("df=\n%s", df) - # - actual = hpandas.df_to_str(df) - expected = r""" - 0 - 2000-01-01 09:00:00 0 - 2000-01-01 09:01:00 1 - 2000-01-01 09:02:00 2 - 2000-01-01 09:03:00 3 - 2000-01-01 09:00:00 4""" - self.assert_equal(actual, expected, fuzzy_match=True) - return df - - def test_dassert_is_unique2(self) -> None: - df = self.get_df2() - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_unique_index(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - Duplicated rows are: - 0 - 2000-01-01 09:00:00 0 - 2000-01-01 09:00:00 4 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_valid_remap -# ############################################################################# - - -class Test_dassert_valid_remap(hunitest.TestCase): - def test1(self) -> None: - """ - Check that the function works with correct inputs. - """ - # Set inputs. - to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] - remap_dict = { - "dummy_value_1": "1, 2, 3", - "dummy_value_2": "A, B, C", - } - # Check. - hpandas.dassert_valid_remap(to_remap, remap_dict) - - def test2(self) -> None: - """ - Check that an assertion is raised if dictionary keys are not a subset. - """ - # Set inputs. - to_remap = ["dummy_value_1", "dummy_value_2"] - remap_dict = { - "dummy_value_1": "1, 2, 3", - "dummy_value_2": "A, B, C", - "dummy_value_3": "A1, A2, A3", - } - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_valid_remap(to_remap, remap_dict) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - val1=['dummy_value_1', 'dummy_value_2', 'dummy_value_3'] - issubset - val2=['dummy_value_1', 'dummy_value_2'] - val1 - val2=['dummy_value_3'] - Keys to remap should be a subset of existing columns""" - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check that an assertion is raised if the duplicate values are present - in the dict. - """ - # Set inputs. - to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] - remap_dict = { - "dummy_value_1": 1, - "dummy_value_2": "A, B, C", - "dummy_value_3": "A, B, C", - } - # Run. - with self.assertRaises(AttributeError) as cm: - hpandas.dassert_valid_remap(to_remap, remap_dict) - actual = str(cm.exception) - expected = r""" - 'dict_values' object has no attribute 'count'""" - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Check that an assertion is raised if the input is not a list. - """ - # Set inputs. - to_remap = {"dummy_value_1"} - remap_dict = { - "dummy_value_1": "1, 2, 3", - } - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_valid_remap(to_remap, remap_dict) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '{'dummy_value_1'}' is '' instead of '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - def test5(self) -> None: - """ - Check that an assertion is raised if the input is not a dictionary. - """ - # Set inputs. - to_remap = ["dummy_value_1"] - remap_dict = [ - "dummy_value_1 : 1, 2, 3", - ] - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_valid_remap(to_remap, remap_dict) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of '['dummy_value_1 : 1, 2, 3']' is '' instead of '' - """ - # Check. - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_increasing_index -# ############################################################################# - - -class Test_dassert_increasing_index(hunitest.TestCase): - def test1(self) -> None: - """ - Check that a monotonically increasing index passes the assert. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:03"), - pd.Timestamp("2000-01-01 9:04"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - hpandas.dassert_increasing_index(df) - - def test2(self) -> None: - """ - Check that an assert is raised when index is not monotonically - increasing. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:04"), - pd.Timestamp("2000-01-01 9:03"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_increasing_index(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - Not increasing indices are: - 0 - 2000-01-01 09:04:00 0 - 2000-01-01 09:03:00 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check that a monotonically increasing index with duplicates passes the - assert. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:00"), - pd.Timestamp("2000-01-01 9:00"), - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:01"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - hpandas.dassert_increasing_index(df) - - -# ############################################################################# -# Test_dassert_strictly_increasing_index -# ############################################################################# - - -class Test_dassert_strictly_increasing_index(hunitest.TestCase): - def test1(self) -> None: - """ - Check that unique and monotonically increasing index passes the assert. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:03"), - pd.Timestamp("2000-01-01 9:04"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - hpandas.dassert_strictly_increasing_index(df) - - def test2(self) -> None: - """ - Check that an assert is raised for an increasing index with duplicates. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:03"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_strictly_increasing_index(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - Duplicated rows are: - 0 - 2000-01-01 09:01:00 0 - 2000-01-01 09:01:00 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check that an assert is raised for a not monotonically increasing - index. - """ - # Build test dataframe. - idx = [ - pd.Timestamp("2000-01-01 9:01"), - pd.Timestamp("2000-01-01 9:03"), - pd.Timestamp("2000-01-01 9:02"), - pd.Timestamp("2000-01-01 9:04"), - ] - values = [0, 0, 0, 0] - df = pd.DataFrame(values, index=idx) - # Run. - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_strictly_increasing_index(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - Not increasing indices are: - 0 - 2000-01-01 09:03:00 0 - 2000-01-01 09:02:00 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_dassert_index_is_datetime -# ############################################################################# - - -class Test_dassert_index_is_datetime(hunitest.TestCase): - @staticmethod - def get_multiindex_df( - index_is_datetime: bool, - ) -> pd.DataFrame: - """ - Helper function to get test multi-index dataframe. Example of dataframe - returned when `index_is_datetime = True`: - - ``` - column1 column2 - index timestamp - index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431 - 2022-01-01 21:10:00+00:00 1.303778 -0.288235 - index2 2022-01-01 21:00:00+00:00 1.237079 1.168012 - 2022-01-01 21:10:00+00:00 1.333692 1.708455 - ``` - - Example of dataframe returned when `index_is_datetime = False`: - - ``` - column1 column2 - index timestamp - index1 string1 -0.122140 -1.949431 - string2 1.303778 -0.288235 - index2 string1 1.237079 1.168012 - string2 1.333692 1.708455 - ``` - """ - if index_is_datetime: - index_inner = [ - pd.Timestamp("2022-01-01 21:00:00", tz="UTC"), - pd.Timestamp("2022-01-01 21:10:00", tz="UTC"), - ] - else: - index_inner = ["string1", "string2"] - index_outer = ["index1", "index2"] - iterables = [index_outer, index_inner] - index = pd.MultiIndex.from_product( - iterables, names=["index", "timestamp"] - ) - columns = ["column1", "column2"] - nums = np.random.uniform(-2, 2, size=(4, 2)) - df = pd.DataFrame(nums, index=index, columns=columns) - return df - - def test1(self) -> None: - """ - Check that multi-index dataframe index is datetime type. - """ - index_is_datetime = True - df = self.get_multiindex_df(index_is_datetime) - hpandas.dassert_index_is_datetime(df) - - def test2(self) -> None: - """ - Check that multi-index dataframe index is not datetime type. - """ - index_is_datetime = False - df = self.get_multiindex_df(index_is_datetime) - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_index_is_datetime(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - cond=False - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check for empty dataframe. - """ - df = pd.DataFrame() - with self.assertRaises(AssertionError) as cm: - hpandas.dassert_index_is_datetime(df) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - Instance of 'RangeIndex(start=0, stop=0, step=1)' is '' instead of '' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test4(self) -> None: - """ - Check that single-indexed dataframe index is datetime type. - """ - index_is_datetime = True - df = self.get_multiindex_df(index_is_datetime) - df = df.loc["index1"] - hpandas.dassert_index_is_datetime(df) - - -# ############################################################################# -# Test_dassert_approx_eq1 -# ############################################################################# - - -class Test_dassert_approx_eq1(hunitest.TestCase): - def test1(self) -> None: - hpandas.dassert_approx_eq(1, 1.0000001) - - def test2(self) -> None: - srs1 = pd.Series([1, 2.0000001]) - srs2 = pd.Series([0.999999, 2.0]) - hpandas.dassert_approx_eq(srs1, srs2, msg="hello world") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py deleted file mode 100644 index 2c69e4fe7..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_display.py +++ /dev/null @@ -1,685 +0,0 @@ -import datetime -import logging -import unittest.mock -import uuid -from typing import Optional, Union - -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hpandas_display as hpandisp -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestDataframeToJson -# ############################################################################# - - -class TestDataframeToJson(hunitest.TestCase): - """ - Test dataframe to JSON conversion. - """ - - def test1(self) -> None: - """ - Verify correctness of dataframe to JSON transformation. - """ - # Prepare inputs. - test_dataframe = pd.DataFrame( - { - "col_1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], - "col_2": [1, 2, 3, 4, 5, 6, 7], - } - ) - # Run test. - output_str = hpandas.convert_df_to_json_string( - test_dataframe, n_head=3, n_tail=3 - ) - # Check output. - self.check_string(output_str) - - def test2(self) -> None: - """ - Verify correctness of UUID-containing dataframe transformation. - """ - # Prepare inputs. - test_dataframe = pd.DataFrame( - { - "col_1": [ - uuid.UUID("421470c7-7797-4a94-b584-eb83ff2de88a"), - uuid.UUID("22cde381-1782-43dc-8c7a-8712cbdf5ee1"), - ], - "col_2": [1, 2], - } - ) - # Run test. - output_str = hpandas.convert_df_to_json_string( - test_dataframe, n_head=None, n_tail=None - ) - # Check output. - self.check_string(output_str) - - def test3(self) -> None: - """ - Verify correctness of transformation of a dataframe with Timestamps. - """ - # Prepare inputs. - test_dataframe = pd.DataFrame( - { - "col_1": [ - pd.Timestamp("2020-01-01"), - pd.Timestamp("2020-05-12"), - ], - "col_2": [1.0, 2.0], - } - ) - # Run test. - output_str = hpandas.convert_df_to_json_string( - test_dataframe, n_head=None, n_tail=None - ) - # Check output. - self.check_string(output_str) - - def test4(self) -> None: - """ - Verify correctness of transformation of a dataframe with datetime. - """ - # Prepare inputs. - test_dataframe = pd.DataFrame( - { - "col_1": [ - datetime.datetime(2020, 1, 1), - datetime.datetime(2020, 5, 12), - ], - "col_2": [1.0, 2.0], - } - ) - # Run test. - output_str = hpandas.convert_df_to_json_string( - test_dataframe, n_head=None, n_tail=None - ) - # Check output. - self.check_string(output_str) - - -# ############################################################################# -# Test_list_to_str -# ############################################################################# - - -class Test_list_to_str(hunitest.TestCase): - """ - Test list to string conversion. - """ - - def test1(self) -> None: - """ - Check that a list is converted to string correctly. - """ - # Prepare inputs. - items = [1, "two", 3, 4, "five"] - # Run test. - actual = hprint.list_to_str2(items, enclose_str_char="|", sep_char=" ; ") - # Check output. - expected = "5 [|1| ; |two| ; |3| ; |4| ; |five|]" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - Check that a list is converted to string and truncated correctly. - """ - # Prepare inputs. - items = list(range(15)) - # Run test. - actual = hprint.list_to_str2(items, enclose_str_char="", sep_char=" - ") - # Check output. - expected = "15 [0 - 1 - 2 - 3 - 4 - ... - 10 - 11 - 12 - 13 - 14]" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Check that a list is converted to string correctly, without additional - parameters. - """ - # Prepare inputs. - items = [1, 2, 3, 4, "five"] - # Run test. - actual = hprint.list_to_str2(items) - # Check output. - expected = "5 ['1', '2', '3', '4', 'five']" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_display_df -# ############################################################################# - - -class Test_display_df(hunitest.TestCase): - """ - Test the display_df function. - """ - - def helper_test_display_df( - self, - df: Union[pd.DataFrame, pd.Series], - expected: Optional[str], - **kwargs, - ) -> None: - """ - Test helper for display_df. - - :param df: Input dataframe or series - :param expected: Expected output to compare with actual output - :param kwargs: Keyword arguments to pass to display_df - """ - # Capture the output from print_or_display and logging. - outputs = [] - tag = kwargs.get("tag") - - def mock_print_or_display( - mock_df: pd.DataFrame, - *, - index: bool = True, - as_txt: bool = False, - log_level: int = logging.INFO, - ) -> None: - """ - Capture the dataframe string representation. - """ - if as_txt or not index: - output = mock_df.to_string(index=index) - else: - output = mock_df.to_html(index=index) - outputs.append(output) - - # Run test. - with unittest.mock.patch( - "helpers.hpandas_display.print_or_display", - side_effect=mock_print_or_display, - ): - with unittest.mock.patch( - "helpers.hpandas_display._LOG.log" - ) as mock_log: - hpandisp.display_df( - df, - log_level=logging.DEBUG, - **kwargs, - ) - # Capture tag logging if present. - if tag is not None and mock_log.called: - for call in mock_log.call_args_list: - if "tag=" in str(call): - outputs.append(f"tag={tag}") - # Check output if expected is provided. - if expected is not None: - expected = hprint.dedent(expected) - actual = "\n".join(outputs) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Test display_df with small dataframe. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
01a
12b
23c
- """ - # Run test. - self.helper_test_display_df(df, expected=expected) - - def test2(self) -> None: - """ - Test display_df with large dataframe and max_lines. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": list(range(100)), - "col_2": [f"val_{i}" for i in range(100)], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
00val_0
11val_1
.........
9898val_98
9999val_99
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, max_lines=5) - - def test3(self) -> None: - """ - Test display_df with inline_index=True. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - } - ) - # Prepare outputs. - expected = """ - . col_1 col_2 - 0 1 a - 1 2 b - 2 3 c - """ - # Run test. - self.helper_test_display_df( - df, expected=expected, inline_index=True, index=True - ) - - def test4(self) -> None: - """ - Test display_df with index=False. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - } - ) - # Prepare outputs. - expected = """ - col_1 col_2 - 1 a - 2 b - 3 c - """ - # Run test. - self.helper_test_display_df(df, expected=expected, index=False) - - def test5(self) -> None: - """ - Test display_df with named index and inline_index=True. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - } - ) - df.index.name = "my_index" - # Prepare outputs. - expected = """ - my_index col_1 col_2 - 0 1 a - 1 2 b - 2 3 c - """ - # Run test. - self.helper_test_display_df( - df, expected=expected, inline_index=True, index=False - ) - - def test6(self) -> None: - """ - Test display_df with Pandas Series (should convert to DataFrame). - """ - # Prepare inputs. - series = pd.Series([1, 2, 3, 4, 5], name="my_series") - # Prepare outputs. - expected = """ - . my_series - 0 1 - 1 2 - 2 3 - 3 4 - 4 5 - - """ - # Run test. - self.helper_test_display_df( - series, expected=expected, inline_index=True, index=False - ) - - def test7(self) -> None: - """ - Test display_df with tag parameter. - """ - # Prepare inputs. - df = pd.DataFrame({"col_1": [1, 2, 3]}) - # Prepare outputs. - expected = """ - . col_1 - 0 1 - 1 2 - 2 3 - tag=my_tag - """ - # Run test. - self.helper_test_display_df( - df, expected=expected, tag="my_tag", inline_index=True, index=False - ) - - def test8(self) -> None: - """ - Test display_df with mode='all_rows'. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": list(range(50)), - "col_2": [f"val_{i}" for i in range(50)], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
00val_0
11val_1
.........
4848val_48
4949val_49
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, mode="all_rows") - - def test9(self) -> None: - """ - Test display_df with mode='all_cols'. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": [1, 2, 3], - "col_2": ["a", "b", "c"], - "col_3": [10.5, 20.5, 30.5], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2col_3
01a10.5
12b20.5
23c30.5
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, mode="all_cols") - - def test10(self) -> None: - """ - Test display_df with mode='all'. - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": list(range(50)), - "col_2": [f"val_{i}" for i in range(50)], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
00val_0
11val_1
.........
4848val_48
4949val_49
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, mode="all") - - def test11(self) -> None: - """ - Test display_df with invalid mode raises error. - """ - # Prepare inputs. - df = pd.DataFrame({"col_1": [1, 2, 3]}) - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hpandisp.display_df( - df, - mode="invalid_mode", - log_level=logging.DEBUG, - ) - self.assertIn("Invalid mode", str(cm.exception)) - - def test12(self) -> None: - """ - Test display_df with duplicate columns raises assertion. - """ - # Prepare inputs. - df = pd.DataFrame([[1, 2], [3, 4]]) - df.columns = ["col", "col"] - # Run test and check output. - with self.assertRaises(AssertionError): - hpandisp.display_df(df, log_level=logging.DEBUG) - - def test13(self) -> None: - """ - Test display_df with single row dataframe. - """ - # Prepare inputs. - df = pd.DataFrame({"col_1": [1], "col_2": ["a"]}) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - -
col_1col_2
01a
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, max_lines=5) - - def test14(self) -> None: - """ - Test display_df with max_lines=1 (edge case). - """ - # Prepare inputs. - df = pd.DataFrame( - { - "col_1": list(range(10)), - "col_2": [f"val_{i}" for i in range(10)], - } - ) - # Prepare outputs. - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
col_1col_2
00val_0
11val_1
.........
88val_8
99val_9
- """ - # Run test. - self.helper_test_display_df(df, expected=expected, mode="all") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py deleted file mode 100644 index c1f66b0d8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_io.py +++ /dev/null @@ -1,43 +0,0 @@ -import logging -import os - -import pytest - -import helpers.hpandas as hpandas -import helpers.hs3 as hs3 -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# TestReadDataFromS3 -# ############################################################################# - - -class TestReadDataFromS3(hunitest.TestCase): - def test_read_csv1(self) -> None: - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_name = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - # TODO(sonaal): Reorganize all s3 input data, CmampTask5650. - "alphamatic-data", - "data/kibot/all_stocks_1min/RIMG.csv.gz", - ) - hs3.dassert_path_exists(file_name, s3fs) - stream, kwargs = hs3.get_local_or_s3_stream(file_name, s3fs=s3fs) - hpandas.read_csv_to_df(stream, **kwargs) - - @pytest.mark.slow("~15 sec.") - def test_read_parquet1(self) -> None: - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_name = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "data/kibot/pq/sp_500_1min/AAPL.pq", - ) - hs3.dassert_path_exists(file_name, s3fs) - stream, kwargs = hs3.get_local_or_s3_stream(file_name, s3fs=s3fs) - hpandas.read_parquet_to_df(stream, **kwargs) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py deleted file mode 100644 index 0e1b813fa..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_multiindex.py +++ /dev/null @@ -1,680 +0,0 @@ -import logging - -import numpy as np -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_subset_multiindex_df -# ############################################################################# - - -class Test_subset_multiindex_df(hunitest.TestCase): - """ - Filter Multiindex DataFrame with 2 column levels. - """ - - @staticmethod - def get_multiindex_df() -> pd.DataFrame: - timestamp_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - ] - iterables = [["asset1", "asset2"], ["open", "high", "low", "close"]] - index = pd.MultiIndex.from_product(iterables, names=[None, "timestamp"]) - nums = np.array( - [ - [ - 0.77650806, - 0.12492164, - -0.35929232, - 1.04137784, - 0.20099949, - 1.4078602, - -0.1317103, - 0.10023361, - ], - [ - -0.56299812, - 0.79105046, - 0.76612895, - -1.49935339, - -1.05923797, - 0.06039862, - -0.77652117, - 2.04578691, - ], - [ - 0.77348467, - 0.45237724, - 1.61051308, - 0.41800008, - 0.20838053, - -0.48289112, - 1.03015762, - 0.17123323, - ], - [ - 0.40486053, - 0.88037142, - -1.94567068, - -1.51714645, - -0.52759748, - -0.31592803, - 1.50826723, - -0.50215196, - ], - [ - 0.17409714, - -2.13997243, - -0.18530403, - -0.48807381, - 0.5621593, - 0.25899393, - 1.14069646, - 2.07721856, - ], - ] - ) - df = pd.DataFrame(nums, index=timestamp_index, columns=index) - return df - - def test1(self) -> None: - """ - Filter by: - - - Timestamp index range - - Level 1 columns - - Level 2 columns - """ - df = self.get_multiindex_df() - df_filtered = hpandas.subset_multiindex_df( - df, - start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), - end_timestamp=pd.Timestamp("2022-01-01 21:03:00+00:00"), - columns_level0=["asset1"], - columns_level1=["high", "low"], - ) - expected_length = 3 - expected_column_names = [("asset1", "high"), ("asset1", "low")] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:03:00+00:00] - columns=('asset1', 'high'),('asset1', 'low') - shape=(3, 2) - asset1 - timestamp high low - 2022-01-01 21:01:00+00:00 0.124922 -0.359292 - 2022-01-01 21:02:00+00:00 0.791050 0.766129 - 2022-01-01 21:03:00+00:00 0.452377 1.610513 - """ - self.check_df_output( - df_filtered, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test2(self) -> None: - """ - Filter by: - - - Timestamp index range - - Level 1 columns - """ - df = self.get_multiindex_df() - df_filtered = hpandas.subset_multiindex_df( - df, - start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), - end_timestamp=pd.Timestamp("2022-01-01 21:02:00+00:00"), - columns_level1=["close"], - ) - expected_length = 2 - expected_column_names = [("asset1", "close"), ("asset2", "close")] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00] - columns=('asset1', 'close'),('asset2', 'close') - shape=(2, 2) - asset1 asset2 - timestamp close close - 2022-01-01 21:01:00+00:00 1.041378 0.100234 - 2022-01-01 21:02:00+00:00 -1.499353 2.045787 - """ - self.check_df_output( - df_filtered, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test3(self) -> None: - """ - Filter by: - - - Timestamp index range - - Level 2 columns - """ - df = self.get_multiindex_df() - df_filtered = hpandas.subset_multiindex_df( - df, - start_timestamp=pd.Timestamp("2022-01-01 21:01:00+00:00"), - end_timestamp=pd.Timestamp("2022-01-01 21:02:00+00:00"), - columns_level0=["asset2"], - ) - expected_length = 2 - expected_column_names = [ - ("asset2", "close"), - ("asset2", "high"), - ("asset2", "low"), - ("asset2", "open"), - ] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:02:00+00:00] - columns=('asset2', 'close'),('asset2', 'high'),('asset2', 'low'),('asset2', 'open') - shape=(2, 4) - asset2 - timestamp close high low open - 2022-01-01 21:01:00+00:00 0.100234 1.407860 -0.131710 0.200999 - 2022-01-01 21:02:00+00:00 2.045787 0.060399 -0.776521 -1.059238 - """ - self.check_df_output( - df_filtered, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test4(self) -> None: - """ - Filter by: - - - Level 1 columns - - Level 2 columns - """ - df = self.get_multiindex_df() - df_filtered = hpandas.subset_multiindex_df( - df, - columns_level0=["asset2"], - columns_level1=["low"], - ) - expected_length = 5 - expected_column_names = [("asset2", "low")] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:01:00+00:00, 2022-01-01 21:05:00+00:00] - columns=('asset2', 'low') - shape=(5, 1) - asset2 - timestamp low - 2022-01-01 21:01:00+00:00 -0.131710 - 2022-01-01 21:02:00+00:00 -0.776521 - 2022-01-01 21:03:00+00:00 1.030158 - 2022-01-01 21:04:00+00:00 1.508267 - 2022-01-01 21:05:00+00:00 1.140696 - """ - self.check_df_output( - df_filtered, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test_columns_level0_invalid_input(self) -> None: - df = self.get_multiindex_df() - with self.assertRaises(AssertionError): - hpandas.subset_multiindex_df( - df, - columns_level0=["invalid_input"], - ) - - def test_columns_level1_invalid_input(self) -> None: - df = self.get_multiindex_df() - with self.assertRaises(AssertionError): - hpandas.subset_multiindex_df( - df, - columns_level1=["invalid_input"], - ) - - -# ############################################################################# -# Test_compare_multiindex_dfs -# ############################################################################# - - -class Test_compare_multiindex_dfs(hunitest.TestCase): - """ - Subset Multiindex DataFrames with 2 column levels and compare its values. - """ - - @staticmethod - def get_multiindex_dfs() -> pd.DataFrame: - timestamp_index1 = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - ] - iterables1 = [["asset1", "asset2"], ["open", "high", "low", "close"]] - index1 = pd.MultiIndex.from_product( - iterables1, names=[None, "timestamp"] - ) - nums1 = np.array( - [ - [ - 0.77650806, - 0.12492164, - -0.35929232, - 1.04137784, - 0.20099949, - 1.4078602, - -0.1317103, - 0.10023361, - ], - [ - -0.56299812, - 0.79105046, - 0.76612895, - -1.49935339, - -1.05923797, - 0.06039862, - -0.77652117, - 2.04578691, - ], - [ - 0.77348467, - 0.45237724, - 1.61051308, - 0.41800008, - 0.20838053, - -0.48289112, - 1.03015762, - 0.17123323, - ], - [ - 0.40486053, - 0.88037142, - -1.94567068, - -1.51714645, - -0.52759748, - -0.31592803, - 1.50826723, - -0.50215196, - ], - [ - 0.17409714, - -2.13997243, - -0.18530403, - -0.48807381, - 0.5621593, - 0.25899393, - 1.14069646, - 2.07721856, - ], - ] - ) - df1 = pd.DataFrame(nums1, index=timestamp_index1, columns=index1) - # - timestamp_index2 = [ - pd.Timestamp("2022-01-01 21:00:00+00:00"), - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - ] - iterables2 = [ - ["asset1", "asset2", "asset3"], - ["open", "high", "low", "close", "volume"], - ] - index2 = pd.MultiIndex.from_product( - iterables2, names=[None, "timestamp"] - ) - nums2 = [ - [ - 0.79095104, - -0.10304008, - -0.69848962, - 0.50078409, - 0.41756371, - -1.33487885, - 1.04546138, - 0.191062, - 0.08841533, - 0.61717725, - -2.15558483, - 1.21036169, - 2.60355386, - 0.07508052, - 1.00702849, - ], - [ - 0.56223723, - 0.97433151, - -1.40471182, - 0.53292355, - 0.24381913, - 0.64343069, - -0.46733655, - -1.20471491, - -0.08347491, - 0.33365524, - 0.04370572, - -0.53547653, - -1.07622168, - 0.7318155, - -0.47146482, - ], - [ - -0.48272741, - 1.17859032, - -0.40816664, - 0.46684297, - 0.42518077, - -1.52913855, - 1.09925095, - 0.48817537, - 1.2662552, - -0.59757824, - 0.23724902, - -0.00660826, - 0.09780482, - -0.17166633, - -0.54515917, - ], - [ - -0.37618442, - -0.3086281, - 1.09168123, - -1.1751162, - 0.38291194, - 1.80830268, - 1.28318855, - 0.75696503, - -1.04042572, - 0.06493231, - -0.10392893, - 1.89053412, - -0.21200498, - 1.61212857, - -2.00765278, - ], - [ - -0.19674075, - -1.02532132, - -0.22486018, - 0.37664998, - 0.35619408, - -0.77304675, - 0.59053699, - -1.53249898, - 0.57548424, - -0.32093537, - -0.52109972, - 1.70938034, - -0.55419632, - 0.45531674, - 0.66878119, - ], - [ - 0.05903553, - 1.2040308, - 0.62323671, - -0.23639535, - 0.87270792, - 2.60253287, - -0.77788842, - 0.80645833, - 1.85438743, - -1.77561587, - 0.41469478, - -0.29791883, - 0.75140743, - 0.50389702, - 0.55311024, - ], - [ - -0.97820763, - -1.32155197, - -0.6143911, - 0.01473404, - 0.87798665, - 0.1701048, - -0.75376376, - 0.72503616, - 0.5791076, - 0.43942739, - 0.62505817, - 0.44998739, - 0.37350664, - -0.73485633, - -0.70406184, - ], - [ - -1.35719477, - -1.82401288, - 0.77263763, - 2.36399552, - -0.45353019, - 0.33983713, - -0.62895329, - 1.34256611, - 0.2207564, - 0.24146184, - 0.90769186, - 0.57426869, - -0.04587782, - -1.6319128, - 0.38094798, - ], - ] - df2 = pd.DataFrame(nums2, index=timestamp_index2, columns=index2) - return df1, df2 - - def test1(self) -> None: - """ - - Subset by both columns and index - - Make inner intersection and compute pct_change - """ - df1, df2 = self.get_multiindex_dfs() - subset_multiindex_df_kwargs = { - "start_timestamp": pd.Timestamp("2022-01-01 21:02:00+00:00"), - "end_timestamp": pd.Timestamp("2022-01-01 21:04:00+00:00"), - "columns_level0": ["asset1", "asset2"], - "columns_level1": ["low", "high"], - } - compare_dfs_kwargs = { - "column_mode": "inner", - "row_mode": "inner", - "diff_mode": "pct_change", - "assert_diff_threshold": None, - } - df_diff = hpandas.compare_multiindex_dfs( - df1, - df2, - subset_multiindex_df_kwargs=subset_multiindex_df_kwargs, - compare_dfs_kwargs=compare_dfs_kwargs, - ) - expected_length = 3 - expected_column_names = [ - ("asset1.pct_change", "high.pct_change"), - ("asset1.pct_change", "low.pct_change"), - ("asset2.pct_change", "high.pct_change"), - ("asset2.pct_change", "low.pct_change"), - ] - expected_column_unique_values = None - expected_signature = r"""# df= - index=[2022-01-01 21:02:00+00:00, 2022-01-01 21:04:00+00:00] - columns=('asset1.pct_change', 'high.pct_change'),('asset1.pct_change', 'low.pct_change'),('asset2.pct_change', 'high.pct_change'),('asset2.pct_change', 'low.pct_change') - shape=(3, 4) - asset1.pct_change asset2.pct_change - timestamp high.pct_change low.pct_change high.pct_change low.pct_change - 2022-01-01 21:02:00+00:00 -32.881643 287.700041 -94.505475 -259.066028 - 2022-01-01 21:03:00+00:00 246.576815 47.525948 -137.632125 36.090517 - 2022-01-01 21:04:00+00:00 185.862978 -765.280229 -153.498432 198.418808 - """ - self.check_df_output( - df_diff, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - -# ############################################################################# -# Test_multiindex_df_info1 -# ############################################################################# - - -class Test_multiindex_df_info1(hunitest.TestCase): - @staticmethod - def get_multiindex_df_with_datetime_index() -> pd.DataFrame: - datetime_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - ] - iterables = [["asset1", "asset2"], ["open", "high", "low", "close"]] - index = pd.MultiIndex.from_product(iterables, names=[None, "timestamp"]) - nums = np.array( - [ - [ - 0.77650806, - 0.12492164, - -0.35929232, - 1.04137784, - 0.20099949, - 1.4078602, - -0.1317103, - 0.10023361, - ], - [ - -0.56299812, - 0.79105046, - 0.76612895, - -1.49935339, - -1.05923797, - 0.06039862, - -0.77652117, - 2.04578691, - ], - [ - 0.77348467, - 0.45237724, - 1.61051308, - 0.41800008, - 0.20838053, - -0.48289112, - 1.03015762, - 0.17123323, - ], - [ - 0.40486053, - 0.88037142, - -1.94567068, - -1.51714645, - -0.52759748, - -0.31592803, - 1.50826723, - -0.50215196, - ], - [ - 0.17409714, - -2.13997243, - -0.18530403, - -0.48807381, - 0.5621593, - 0.25899393, - 1.14069646, - 2.07721856, - ], - ] - ) - df = pd.DataFrame(nums, index=datetime_index, columns=index) - return df - - @staticmethod - def get_multiindex_df_with_non_datetime_index() -> pd.DataFrame: - non_datetime_index = ["M", "N"] - index = pd.MultiIndex.from_product([["A", "B"], ["X", "Y"]]) - data = [[1, 2, 3, 4], [5, 6, 7, 8]] - df = pd.DataFrame(data, index=non_datetime_index, columns=index) - return df - - def test1(self) -> None: - """ - Test DataFrame with a datetime index. - """ - df = self.get_multiindex_df_with_datetime_index() - actual = hpandas.multiindex_df_info(df) - # This is required by `pandas` >= 2.2. - expected = """ - shape=2 x 4 x 5 - columns_level0=2 ['asset1', 'asset2'] - columns_level1=4 ['close', 'high', 'low', 'open'] - rows=5 ['2022-01-01 21:01:00+00:00', '2022-01-01 21:02:00+00:00', '2022-01-01 21:03:00+00:00', '2022-01-01 21:04:00+00:00', '2022-01-01 21:05:00+00:00'] - start_timestamp=2022-01-01 21:01:00+00:00 - end_timestamp=2022-01-01 21:05:00+00:00 - frequency=min - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - Test DataFrame with a non-frequency datetime index. - """ - df = self.get_multiindex_df_with_datetime_index() - non_frequency_datetime_index = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:04:30+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - ] - df.index = non_frequency_datetime_index - actual = hpandas.multiindex_df_info(df) - expected = """ - shape=2 x 4 x 5 - columns_level0=2 ['asset1', 'asset2'] - columns_level1=4 ['close', 'high', 'low', 'open'] - rows=5 ['2022-01-01 21:01:00+00:00', '2022-01-01 21:02:00+00:00', '2022-01-01 21:04:00+00:00', '2022-01-01 21:04:30+00:00', '2022-01-01 21:06:00+00:00'] - start_timestamp=2022-01-01 21:01:00+00:00 - end_timestamp=2022-01-01 21:06:00+00:00 - frequency=None - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test3(self) -> None: - """ - Test DataFrame with a non-datetime index. - """ - df = self.get_multiindex_df_with_non_datetime_index() - actual = hpandas.multiindex_df_info(df) - expected = """ - shape=2 x 2 x 2 - columns_level0=2 ['A', 'B'] - columns_level1=2 ['X', 'Y'] - rows=2 ['M', 'N'] - """ - self.assert_equal(actual, expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py deleted file mode 100644 index f0295958f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_stats.py +++ /dev/null @@ -1,426 +0,0 @@ -import logging -from typing import Dict, List - -import pandas as pd - -import helpers.hprint as hprint -import helpers.hpandas as hpandas -import helpers.hpandas_stats as hpanstat -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_compute_duration_df -# ############################################################################# - - -class Test_compute_duration_df(hunitest.TestCase): - """ - Compute timestamp stats from dfs and check the intersection. - """ - - @staticmethod - def get_dict_with_dfs() -> Dict[str, pd.DataFrame]: - timestamp_index1 = [ - pd.Timestamp("2022-01-01 21:00:00+00:00"), - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - pd.Timestamp("2022-01-01 21:06:00+00:00"), - ] - timestamp_index2 = [ - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - pd.Timestamp("2022-01-01 21:05:00+00:00"), - ] - timestamp_index3 = [ - pd.Timestamp("2022-01-01 21:01:00+00:00"), - pd.Timestamp("2022-01-01 21:02:00+00:00"), - pd.Timestamp("2022-01-01 21:03:00+00:00"), - pd.Timestamp("2022-01-01 21:04:00+00:00"), - ] - # - value1 = {"value1": [None, None, 1, 2, 3, 4, 5, None]} - value2 = {"value2": [1, 2, 3, None]} - value3 = {"value3": [None, None, 1, 2]} - # - df1 = pd.DataFrame(value1, index=timestamp_index1) - df2 = pd.DataFrame(value2, index=timestamp_index2) - df3 = pd.DataFrame(value3, index=timestamp_index3) - # - tag_to_df = { - "tag1": df1, - "tag2": df2, - "tag3": df3, - } - return tag_to_df - - def helper( - self, - valid_intersect: bool, - expected_start_timestamp: pd.Timestamp, - expected_end_timestamp: pd.Timestamp, - ) -> None: - """ - Checks if the intersection is valid and the same amongst all dfs. - """ - tag_to_df = self.get_dict_with_dfs() - _, tag_dfs = hpandas.compute_duration_df( - tag_to_df, valid_intersect=valid_intersect, intersect_dfs=True - ) - # Collect all start timestamps. - start_timestamps = [tag_dfs[tag].index.min() for tag in tag_dfs] - # Check that all start timestamps are equal. - start_equal = all( - element == start_timestamps[0] for element in start_timestamps - ) - self.assertTrue(start_equal) - # Check that start intersection is correct. - required_start_intersection = expected_start_timestamp - self.assertEqual(start_timestamps[0], required_start_intersection) - # Collect all end timestamps. - end_timestamps = [tag_dfs[tag].index.max() for tag in tag_dfs] - # Check that all end timestamps are equal. - end_equal = all( - element == end_timestamps[0] for element in end_timestamps - ) - self.assertTrue(end_equal) - # Check that end intersection is correct. - required_end_intersection = expected_end_timestamp - self.assertEqual(end_timestamps[0], required_end_intersection) - - def test1(self) -> None: - """ - Check only timestamp stats. - """ - tag_to_df = self.get_dict_with_dfs() - df_stats, _ = hpandas.compute_duration_df(tag_to_df) - expected_length = 3 - expected_column_names = [ - "max_index", - "max_valid_index", - "min_index", - "min_valid_index", - ] - expected_column_unique_values = None - expected_signature = r""" - # df= - index=[tag1, tag3] - columns=min_index,max_index,min_valid_index,max_valid_index - shape=(3, 4) - min_index max_index min_valid_index max_valid_index - tag1 2022-01-01 21:00:00+00:00 2022-01-01 21:06:00+00:00 2022-01-01 21:02:00+00:00 2022-01-01 21:06:00+00:00 - tag2 2022-01-01 21:02:00+00:00 2022-01-01 21:05:00+00:00 2022-01-01 21:02:00+00:00 2022-01-01 21:04:00+00:00 - tag3 2022-01-01 21:01:00+00:00 2022-01-01 21:04:00+00:00 2022-01-01 21:03:00+00:00 2022-01-01 21:04:00+00:00 - """ - expected_signature = hprint.dedent(expected_signature) - self.check_df_output( - df_stats, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test2(self) -> None: - """ - Modify initial DataFrames in dictionary with non-valid intersection - (incl NaNs). - """ - valid_intersect = False - expected_start_timestamp = pd.Timestamp("2022-01-01 21:02:00+00:00") - expected_end_timestamp = pd.Timestamp("2022-01-01 21:04:00+00:00") - self.helper( - valid_intersect, expected_start_timestamp, expected_end_timestamp - ) - - def test3(self) -> None: - """ - Modify initial DataFrames in dictionary with valid intersection - (excluding NaNs). - """ - valid_intersect = True - expected_start_timestamp = pd.Timestamp("2022-01-01 21:03:00+00:00") - expected_end_timestamp = pd.Timestamp("2022-01-01 21:04:00+00:00") - self.helper( - valid_intersect, expected_start_timestamp, expected_end_timestamp - ) - - -# ############################################################################# -# Test_compute_weighted_sum -# ############################################################################# - - -class Test_compute_weighted_sum(hunitest.TestCase): - def helper( - self, - index1: List[int], - index2: List[int], - weights_data: Dict[str, List[float]], - index_mode: str, - expected_signature: str, - ) -> None: - """ - Build inputs and check that function output is correct. - """ - # Create test data. - data1 = {"A": [1, 2], "B": [3, 4]} - df1 = pd.DataFrame(data1, index=index1) - data2 = {"A": [5, 6], "B": [7, 8]} - df2 = pd.DataFrame(data2, index=index2) - dfs = {"df1": df1, "df2": df2} - # Create weights DataFrame. - weights = pd.DataFrame(weights_data, index=dfs.keys()) - # Run the function. - weighted_sums = hpandas.compute_weighted_sum( - dfs=dfs, weights=weights, index_mode=index_mode - ) - actual_signature = str(weighted_sums) - self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) - - def test1(self) -> None: - """ - Check that weighted sums are computed correctly. - - index_mode = "assert_equal". - """ - index1 = [0, 1] - index2 = [0, 1] - weights_data = {"w1": [0.2, 0.8]} - index_mode = "assert_equal" - expected_signature = r""" - {'w1': A B - 0 4.2 6.2 - 1 5.2 7.2} - """ - expected_signature = hprint.dedent(expected_signature) - self.helper(index1, index2, weights_data, index_mode, expected_signature) - - def test2(self) -> None: - """ - Check that weighted sums are computed correctly. - - index_mode = "intersect". - """ - index1 = [0, 1] - index2 = [0, 2] - weights_data = {"w1": [0.2, 0.8], "w2": [0.5, 0.5]} - index_mode = "intersect" - expected_signature = r""" - {'w1': A B - 0 4.2 6.2 - 1 NaN NaN - 2 NaN NaN, 'w2': A B - 0 3.0 5.0 - 1 NaN NaN - 2 NaN NaN} - """ - expected_signature = hprint.dedent(expected_signature) - self.helper(index1, index2, weights_data, index_mode, expected_signature) - - def test3(self) -> None: - """ - Check that weighted sums are computed correctly. - - index_mode = "leave_unchanged". - """ - index1 = [0, 1] - index2 = [2, 3] - weights_data = {"w1": [0.2, 0.8]} - index_mode = "leave_unchanged" - expected_signature = r""" - {'w1': A B - 0 NaN NaN - 1 NaN NaN - 2 NaN NaN - 3 NaN NaN} - """ - expected_signature = hprint.dedent(expected_signature) - self.helper(index1, index2, weights_data, index_mode, expected_signature) - - def test4(self) -> None: - """ - Check that an assertion is raised if input is an empty dict. - """ - dfs: Dict[str, pd.DataFrame] = {} - weights_data = {"w1": [0.2, 0.8]} - index_mode = "assert_equal" - with self.assertRaises(AssertionError) as cm: - hpandas.compute_weighted_sum( - dfs=dfs, - weights=pd.DataFrame(weights_data), - index_mode=index_mode, - ) - actual_signature = str(cm.exception) - expected_signature = r""" - * Failed assertion * - cond={} - dictionary of dfs must be nonempty - """ - expected_signature = hprint.dedent(expected_signature) - self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) - - -# ############################################################################# -# Test_get_value_counts_stats_df -# ############################################################################# - - -class Test_get_value_counts_stats_df(hunitest.TestCase): - """ - Test value counts statistics computation. - """ - - def helper( - self, - category_data: List[str], - num_rows: int, - expected: str, - ) -> None: - """ - Test value counts with given parameters. - """ - # Prepare inputs. - df = pd.DataFrame({"category": category_data}) - # Run test. - result_df = hpandas.get_value_counts_stats_df( - df, "category", num_rows=num_rows - ) - # Check outputs. - actual = str(result_df) - expected = hprint.dedent(expected) - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test basic value counts with default parameters. - """ - # Prepare inputs. - category_data = ["A", "B", "A", "C", "A", "B", "D", "A", "C", "A"] - num_rows = 10 - # Prepare outputs. - expected = """ - count pct [%] - category - A 5 50.0 - B 2 20.0 - C 2 20.0 - D 1 10.0 - """ - # Run test. - self.helper(category_data, num_rows, expected) - - def test2(self) -> None: - """ - Test limiting the number of rows returned. - """ - # Prepare inputs. - category_data = ["A", "B", "A", "C", "A", "B", "D", "A", "C", "A"] - num_rows = 2 - # Prepare outputs. - expected = """ - count pct [%] - category - A 5 50.0 - B 2 20.0 - """ - # Run test. - self.helper(category_data, num_rows, expected) - - def test3(self) -> None: - """ - Test with num_rows=0 to return all rows. - """ - # Prepare inputs. - category_data = ["A", "B", "A", "C", "A", "B"] - num_rows = 0 - # Prepare outputs. - expected = """ - count pct [%] - category - A 3 50.000000 - B 2 33.333333 - C 1 16.666667 - """ - # Run test. - self.helper(category_data, num_rows, expected) - - -# ############################################################################# -# Test__get_unique_values_stats -# ############################################################################# - - -class Test__get_unique_values_stats(hunitest.TestCase): - """ - Test unique values count and percentage computation. - """ - - def helper(self, df_data: Dict, expected: str) -> None: - """ - Test unique values stats computation. - """ - # Prepare inputs. - df = pd.DataFrame(df_data) - # Run test. - result_df = hpanstat._get_unique_values_stats(df) - # Check outputs. - actual = str(result_df) - expected = hprint.dedent(expected) - self.assert_equal(actual, expected, dedent=True) - - def test1(self) -> None: - """ - Test basic unique values computation. - """ - df_data = { - "col1": [1, 2, 1, 3, 1], - "col2": ["a", "b", "a", "c", "d"], - "col3": [1.0, 1.0, 1.0, 1.0, 1.0], - } - expected = """ - num_unique unique [%] - col1 3 60.0 - col2 4 80.0 - col3 1 20.0 - """ - self.helper(df_data, expected) - - def test2(self) -> None: - """ - Test with NaN values. - """ - df_data = { - "col1": [1, 2, 1, None, 1], - "col2": ["a", "b", "a", None, "c"], - } - expected = """ - num_unique unique [%] - col1 2 40.0 - col2 3 60.0 - """ - self.helper(df_data, expected) - - def test3(self) -> None: - """ - Test with single unique value. - """ - df_data = { - "col1": [5, 5, 5, 5], - "col2": ["x", "x", "x", "x"], - } - expected = """ - num_unique unique [%] - col1 1 25.0 - col2 1 25.0 - """ - self.helper(df_data, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py deleted file mode 100644 index f11d6988a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_transform.py +++ /dev/null @@ -1,1888 +0,0 @@ -import csv -import io -import logging -import re -import time -from typing import Any, Dict, List, Optional, Tuple - -import numpy as np -import pandas as pd -import pytest - -import helpers.hdatetime as hdateti -import helpers.hpandas as hpandas -import helpers.hpandas_transform as hpantran -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_trim_df1 -# ############################################################################# - - -class Test_trim_df1(hunitest.TestCase): - def get_df(self, *args: Any, **kwargs: Any) -> pd.DataFrame: - """ - Return a df where the CSV txt is read verbatim without inferring dates. - - The `start_time` column is thus a str. - """ - txt = """ - ,start_time,egid,close - 4,2022-01-04 21:38:00.000000,13684,1146.48 - 8,2022-01-04 21:38:00.000000,17085,179.45 - 14,2022-01-04 21:37:00.000000,13684,1146.26 - 18,2022-01-04 21:37:00.000000,17085,179.42 - 24,2022-01-04 21:36:00.000000,13684,1146.0 - 27,2022-01-04 21:36:00.000000,17085,179.46 - 34,2022-01-04 21:35:00.000000,13684,1146.0 - 38,2022-01-04 21:35:00.000000,17085,179.42 - 40,2022-01-04 21:34:00.000000,17085,179.42 - 44,2022-01-04 21:34:00.000000,13684,1146.0 - """ - txt = hprint.dedent(txt) - df = pd.read_csv(io.StringIO(txt), *args, index_col=0, **kwargs) - df["start_time"] = pd.to_datetime(df["start_time"]) - return df - - def test_types1(self) -> None: - """ - Check the types of a df coming from `read_csv()`. - - The timestamps in `start_time` are left as strings. - """ - df = self.get_df() - # - actual = hpandas.df_to_str( - df, print_dtypes=True, print_shape_info=True, tag="df" - ) - expected = r"""# df= - index=[4, 44] - columns=start_time,egid,close - shape=(10, 3) - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 - 1 start_time datetime64[ns] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 - 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 - 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - ... - 38 2022-01-04 21:35:00 17085 179.42 - 40 2022-01-04 21:34:00 17085 179.42 - 44 2022-01-04 21:34:00 13684 1146.00""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def get_df_with_parse_dates(self) -> pd.DataFrame: - """ - Read the CSV parsing `start_time` as timestamps. - - The inferred type is a nasty `datetime64` which is not as well- - behaved as our beloved `pd.Timestamp`. - """ - df = self.get_df(parse_dates=["start_time"]) - return df - - def test_types2(self) -> None: - """ - Check the types of a df coming from `read_csv()` forcing parsing some - values as dates. - """ - df = self.get_df_with_parse_dates() - # Check. - actual = hpandas.df_to_str( - df, print_dtypes=True, print_shape_info=True, tag="df" - ) - expected = r"""# df= - index=[4, 44] - columns=start_time,egid,close - shape=(10, 3) - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 - 1 start_time datetime64[ns] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 - 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 - 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - ... - 38 2022-01-04 21:35:00 17085 179.42 - 40 2022-01-04 21:34:00 17085 179.42 - 44 2022-01-04 21:34:00 13684 1146.00""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def get_df_with_tz_timestamp(self) -> pd.DataFrame: - """ - Force the column parsed as `datetime64` into a tz-aware object. - - The resulting object is a `datetime64[ns, tz]`. - """ - df = self.get_df_with_parse_dates() - # Apply the tz. - col_name = "start_time" - df[col_name] = ( - df[col_name].dt.tz_localize("UTC").dt.tz_convert("America/New_York") - ) - df[col_name] = pd.to_datetime(df[col_name]) - return df - - def test_types3(self) -> None: - """ - Check the types of a df coming from `read_csv()` after conversion to - tz-aware objects. - """ - df = self.get_df_with_tz_timestamp() - # Check. - actual = hpandas.df_to_str( - df, print_dtypes=True, print_shape_info=True, tag="df" - ) - expected = r"""# df= - index=[4, 44] - columns=start_time,egid,close - shape=(10, 3) - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 10 / 10 = 100.00% 0 / 10 = 0.00% 4 - 1 start_time datetime64[ns, America/New_York] 5 / 10 = 50.00% 0 / 10 = 0.00% 2022-01-04T21:38:00.000000000 - 2 egid int64 2 / 10 = 20.00% 0 / 10 = 0.00% 13684 - 3 close float64 6 / 10 = 60.00% 0 / 10 = 0.00% 1146.48 - start_time egid close - 4 2022-01-04 16:38:00-05:00 13684 1146.48 - 8 2022-01-04 16:38:00-05:00 17085 179.45 - 14 2022-01-04 16:37:00-05:00 13684 1146.26 - ... - 38 2022-01-04 16:35:00-05:00 17085 179.42 - 40 2022-01-04 16:34:00-05:00 17085 179.42 - 44 2022-01-04 16:34:00-05:00 13684 1146.00""" - self.assert_equal(actual, expected, fuzzy_match=True) - - # ////////////////////////////////////////////////////////////////////////////// - - def helper( - self, - df: pd.DataFrame, - ts_col_name: Optional[str], - start_ts: Optional[pd.Timestamp], - end_ts: Optional[pd.Timestamp], - left_close: bool, - right_close: bool, - expected: str, - ) -> None: - """ - Run trimming and check the outcome. - - See param description in `hpandas.trim_df`. - - :param expected: the expected oucome of the trimming - """ - df_trim = hpandas.trim_df( - df, ts_col_name, start_ts, end_ts, left_close, right_close - ) - actual = hpandas.df_to_str(df_trim, print_shape_info=True, tag="df_trim") - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_trim_df1(self) -> None: - """ - Test trimming: baseline case. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[4, 38] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - ... - 27 2022-01-04 21:36:00 17085 179.46 - 34 2022-01-04 21:35:00 13684 1146.00 - 38 2022-01-04 21:35:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df2(self) -> None: - """ - Trim a df with a column that is `datetime64` without tz using a - `pd.Timestamp` without tz. - - This operation is valid. - """ - df = self.get_df_with_parse_dates() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[4, 38] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - ... - 27 2022-01-04 21:36:00 17085 179.46 - 34 2022-01-04 21:35:00 13684 1146.00 - 38 2022-01-04 21:35:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df3(self) -> None: - """ - Trim a df with a column that is `datetime64` with tz vs a `pd.Timestamp - with tz. - - This operation is valid. - """ - df = self.get_df_with_tz_timestamp() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00", tz="UTC") - end_ts = pd.Timestamp("2022-01-04 21:38:00", tz="UTC") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[4, 38] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 4 2022-01-04 16:38:00-05:00 13684 1146.48 - 8 2022-01-04 16:38:00-05:00 17085 179.45 - 14 2022-01-04 16:37:00-05:00 13684 1146.26 - ... - 27 2022-01-04 16:36:00-05:00 17085 179.46 - 34 2022-01-04 16:35:00-05:00 13684 1146.00 - 38 2022-01-04 16:35:00-05:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - # pylint: disable=line-too-long - def test_trim_df4(self) -> None: - """ - Trim a df with a column that is `datetime64` with tz vs a - `pd.Timestamp` without tz. - - This operation is invalid and we expect an assertion. - """ - df = self.get_df_with_tz_timestamp() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - with self.assertRaises(TypeError) as cm: - hpandas.trim_df( - df, ts_col_name, start_ts, end_ts, left_close, right_close - ) - # Check. - actual = str(cm.exception) - expected = r""" - Invalid comparison between dtype=datetime64[ns, America/New_York] and Timestamp""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_trim_df5(self) -> None: - """ - Test filtering on the index. - """ - df = self.get_df() - df = df.set_index("start_time") - # Run. - ts_col_name = None - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] - columns=egid,close - shape=(8, 2) - egid close - start_time - 2022-01-04 21:38:00 13684 1146.48 - 2022-01-04 21:38:00 17085 179.45 - 2022-01-04 21:37:00 13684 1146.26 - ... - 2022-01-04 21:36:00 17085 179.46 - 2022-01-04 21:35:00 13684 1146.00 - 2022-01-04 21:35:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df6(self) -> None: - """ - Test excluding the lower boundary. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = False - right_close = True - expected = r"""# df_trim= - index=[4, 27] - columns=start_time,egid,close - shape=(6, 3) - start_time egid close - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45 - 14 2022-01-04 21:37:00 13684 1146.26 - 18 2022-01-04 21:37:00 17085 179.42 - 24 2022-01-04 21:36:00 13684 1146.00 - 27 2022-01-04 21:36:00 17085 179.46""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df7(self) -> None: - """ - Test excluding the upper boundary. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = False - expected = r"""# df_trim= - index=[14, 38] - columns=start_time,egid,close - shape=(6, 3) - start_time egid close - 14 2022-01-04 21:37:00 13684 1146.26 - 18 2022-01-04 21:37:00 17085 179.42 - 24 2022-01-04 21:36:00 13684 1146.00 - 27 2022-01-04 21:36:00 17085 179.46 - 34 2022-01-04 21:35:00 13684 1146.00 - 38 2022-01-04 21:35:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df8(self) -> None: - """ - Test filtering on a sorted column. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - df = df.sort_values(ts_col_name) - expected = r"""# df_trim= - index=[4, 38] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 34 2022-01-04 21:35:00 13684 1146.00 - 38 2022-01-04 21:35:00 17085 179.42 - 24 2022-01-04 21:36:00 13684 1146.00 - ... - 18 2022-01-04 21:37:00 17085 179.42 - 4 2022-01-04 21:38:00 13684 1146.48 - 8 2022-01-04 21:38:00 17085 179.45""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df9(self) -> None: - """ - Test filtering on a sorted index. - """ - df = self.get_df() - df = df.set_index("start_time") - # Run. - ts_col_name = None - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = True - right_close = True - df = df.sort_index() - expected = r"""# df_trim= - index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] - columns=egid,close - shape=(8, 2) - egid close - start_time - 2022-01-04 21:35:00 13684 1146.00 - 2022-01-04 21:35:00 17085 179.42 - 2022-01-04 21:36:00 13684 1146.00 - ... - 2022-01-04 21:37:00 17085 179.42 - 2022-01-04 21:38:00 13684 1146.48 - 2022-01-04 21:38:00 17085 179.45""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df10(self) -> None: - """ - Test filtering on a sorted index, excluding lower and upper boundaries. - """ - df = self.get_df() - df = df.set_index("start_time") - # Run. - ts_col_name = None - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - left_close = False - right_close = False - df = df.sort_index() - expected = r"""# df_trim= - index=[2022-01-04 21:36:00, 2022-01-04 21:37:00] - columns=egid,close - shape=(4, 2) - egid close - start_time - 2022-01-04 21:36:00 13684 1146.00 - 2022-01-04 21:36:00 17085 179.46 - 2022-01-04 21:37:00 13684 1146.26 - 2022-01-04 21:37:00 17085 179.42""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df11(self) -> None: - """ - Test filtering on a non-sorted column, with `start_ts` being None. - """ - df = self.get_df() - # Run. - ts_col_name = "start_time" - start_ts = None - end_ts = pd.Timestamp("2022-01-04 21:37:00") - left_close = True - right_close = True - expected = r"""# df_trim= - index=[14, 44] - columns=start_time,egid,close - shape=(8, 3) - start_time egid close - 14 2022-01-04 21:37:00 13684 1146.26 - 18 2022-01-04 21:37:00 17085 179.42 - 24 2022-01-04 21:36:00 13684 1146.00 - ... - 38 2022-01-04 21:35:00 17085 179.42 - 40 2022-01-04 21:34:00 17085 179.42 - 44 2022-01-04 21:34:00 13684 1146.00""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - def test_trim_df12(self) -> None: - """ - Test filtering on a sorted index, with `end_ts` being None. - """ - df = self.get_df() - df = df.set_index("start_time") - # Run. - ts_col_name = None - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = None - left_close = True - right_close = True - df = df.sort_index() - expected = r"""# df_trim= - index=[2022-01-04 21:35:00, 2022-01-04 21:38:00] - columns=egid,close - shape=(8, 2) - egid close - start_time - 2022-01-04 21:35:00 13684 1146.00 - 2022-01-04 21:35:00 17085 179.42 - 2022-01-04 21:36:00 13684 1146.00 - ... - 2022-01-04 21:37:00 17085 179.42 - 2022-01-04 21:38:00 13684 1146.48 - 2022-01-04 21:38:00 17085 179.45""" - self.helper( - df, ts_col_name, start_ts, end_ts, left_close, right_close, expected - ) - - -# ############################################################################# -# Test_trim_df2 -# ############################################################################# - - -@pytest.mark.skip( - "Used for comparing speed of different trimming methods (CmTask1404)." -) -class Test_trim_df2(Test_trim_df1): - """ - Test the speed of different approaches to df trimming. - """ - - def get_data( - self, set_as_index: bool, sort: bool - ) -> Tuple[pd.DataFrame, str, pd.Timestamp, pd.Timestamp]: - """ - Get the data for experiments. - - :param set_as_index: whether to set the filtering values as - index - :param sort: whether to sort the filtering values - :return: the df to trim, the parameters for trimming - """ - # Get a large df. - df = self.get_df() - df = df.loc[df.index.repeat(100000)].reset_index(drop=True) - # Define the params. - ts_col_name = "start_time" - start_ts = pd.Timestamp("2022-01-04 21:35:00") - end_ts = pd.Timestamp("2022-01-04 21:38:00") - # Prepare the data. - if set_as_index: - df = df.set_index(ts_col_name, append=True, drop=False) - if sort: - df = df.sort_index(level=ts_col_name) - elif sort: - df = df.sort_values(ts_col_name) - return df, ts_col_name, start_ts, end_ts - - def check_trimmed_df( - self, - df: pd.DataFrame, - ts_col_name: str, - start_ts: pd.Timestamp, - end_ts: pd.Timestamp, - ) -> None: - """ - Confirm that the trimmed df matches what is expected. - - The trimmed df is compared to the one produced by - `hpandas.trim_df()` with lower and upper boundaries included. - Thus, it is ensured that all the trimming methods produce the - same output. - - See param descriptions in `hpandas.trim_df()`. - - :param df: the df trimmed in a test, to compare with the - `hpandas.trim_df()` one - """ - # Clean up the df from the test. - if df.index.nlevels > 1: - df = df.droplevel(ts_col_name) - df = df.reset_index(drop=True) - df = df.sort_values(by=[ts_col_name, "egid"], ascending=[False, True]) - # Get the reference trimmed df. - left_close = True - right_close = True - df_trim_for_comparison = hpandas.trim_df( - df, ts_col_name, start_ts, end_ts, left_close, right_close - ) - assert df.equals(df_trim_for_comparison) - - def test_simple_mask_col(self) -> None: - """ - Trim with a simple mask; filtering on a column. - """ - set_as_index = False - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - mask = df[ts_col_name] >= start_ts - df = df[mask] - if not df.empty: - mask = df[ts_col_name] <= end_ts - df = df[mask] - end_time = time.time() - _LOG.info( - "Simple mask trim (column): %.2f seconds", (end_time - start_time) - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_simple_mask_idx(self) -> None: - """ - Trim with a simple mask; filtering on an index. - """ - set_as_index = True - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - mask = df.index.get_level_values(ts_col_name) >= start_ts - df = df[mask] - if not df.empty: - mask = df.index.get_level_values(ts_col_name) <= end_ts - df = df[mask] - end_time = time.time() - _LOG.info( - "Simple mask trim (index): %.2f seconds", (end_time - start_time) - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_between_col(self) -> None: - """ - Trim using `pd.Series.between`; filtering on a column. - """ - set_as_index = False - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df[df[ts_col_name].between(start_ts, end_ts, inclusive="both")] - end_time = time.time() - _LOG.info( - "`pd.Series.between` trim (column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_between_idx(self) -> None: - """ - Trim using `pd.Series.between`; filtering on an index. - """ - set_as_index = True - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - filter_values = pd.Series( - df.index.get_level_values(ts_col_name) - ).between(start_ts, end_ts, inclusive="both") - df = df.droplevel(ts_col_name) - df = df[filter_values] - end_time = time.time() - _LOG.info( - "`pd.Series.between` trim (index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_truncate_non_sorted_col(self) -> None: - """ - Trim using `pd.DataFrame.truncate`; filtering on a non-sorted column. - """ - set_as_index = False - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df.set_index(df[ts_col_name], append=True).sort_index( - level=ts_col_name - ) - df = df.swaplevel() - df = df.truncate(before=start_ts, after=end_ts) - end_time = time.time() - _LOG.info( - "`pd.DataFrame.truncate` trim (non-sorted column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_truncate_non_sorted_idx(self) -> None: - """ - Trim using `pd.DataFrame.truncate`; filtering on a non-sorted index. - """ - set_as_index = True - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - df = df.swaplevel() - # Run. - start_time = time.time() - df = df.sort_index(level=ts_col_name) - df = df.truncate(before=start_ts, after=end_ts) - end_time = time.time() - _LOG.info( - "`pd.DataFrame.truncate` trim (non-sorted index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_truncate_sorted_col(self) -> None: - """ - Trim using `pd.DataFrame.truncate`; filtering on a sorted column. - """ - set_as_index = False - sort = True - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df.set_index(ts_col_name, drop=False) - df = df.truncate(before=start_ts, after=end_ts) - end_time = time.time() - _LOG.info( - "`pd.DataFrame.truncate` trim (sorted column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_truncate_sorted_idx(self) -> None: - """ - Trim using `pd.DataFrame.truncate`; filtering on a sorted index. - """ - set_as_index = True - sort = True - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - df = df.swaplevel() - # Run. - start_time = time.time() - df = df.truncate(before=start_ts, after=end_ts) - end_time = time.time() - _LOG.info( - "`pd.DataFrame.truncate` trim (sorted index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_searchsorted_non_sorted_col(self) -> None: - """ - Trim using `pd.Series.searchsorted`; filtering on a non-sorted column. - """ - set_as_index = False - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df.sort_values(ts_col_name, ascending=True) - left_idx = df[ts_col_name].searchsorted(start_ts, side="left") - right_idx = df[ts_col_name].searchsorted(end_ts, side="right") - df = df.iloc[left_idx:right_idx] - end_time = time.time() - _LOG.info( - "`pd.Series.searchsorted` trim (non-sorted column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_searchsorted_non_sorted_idx(self) -> None: - """ - Trim using `pd.Series.searchsorted`; filtering on a non-sorted index. - """ - set_as_index = True - sort = False - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - df = df.sort_index(level=ts_col_name) - left_idx = df.index.get_level_values(ts_col_name).searchsorted( - start_ts, side="left" - ) - right_idx = df.index.get_level_values(ts_col_name).searchsorted( - end_ts, side="right" - ) - df = df.iloc[left_idx:right_idx] - end_time = time.time() - _LOG.info( - "`pd.Series.searchsorted` trim (non-sorted index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_searchsorted_sorted_col(self) -> None: - """ - Trim using `pd.Series.searchsorted`; filtering on a sorted column. - """ - set_as_index = False - sort = True - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - left_idx = df[ts_col_name].searchsorted(start_ts, side="left") - right_idx = df[ts_col_name].searchsorted(end_ts, side="right") - df = df.iloc[left_idx:right_idx] - end_time = time.time() - _LOG.info( - "`pd.Series.searchsorted` trim (sorted column): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - def test_searchsorted_sorted_idx(self) -> None: - """ - Trim using `pd.Series.searchsorted`; filtering on a sorted index. - """ - set_as_index = True - sort = True - df, ts_col_name, start_ts, end_ts = self.get_data( - set_as_index=set_as_index, sort=sort - ) - # Run. - start_time = time.time() - left_idx = df.index.get_level_values(ts_col_name).searchsorted( - start_ts, side="left" - ) - right_idx = df.index.get_level_values(ts_col_name).searchsorted( - end_ts, side="right" - ) - df = df.iloc[left_idx:right_idx] - end_time = time.time() - _LOG.info( - "`pd.Series.searchsorted` trim (sorted index): %.2f seconds", - (end_time - start_time), - ) - # Check. - self.check_trimmed_df(df, ts_col_name, start_ts, end_ts) - - -# ############################################################################# -# Test_assemble_df_rows -# ############################################################################# - - -class Test_assemble_df_rows(hunitest.TestCase): - """ - Test assembing df values into a column-row structure. - """ - - @staticmethod - def get_rows_values_example(df_as_str: str) -> hpantran.RowsValues: - """ - Prepare the input. - """ - # Separate the rows. - rows = df_as_str.split("\n") - # Clean up extra spaces. - rows_merged_space = [re.sub(" +", " ", row) for row in rows if len(row)] - # Identify individual values in the rows. - rows_values = list(csv.reader(rows_merged_space, delimiter=" ")) - return rows_values - - def test1(self) -> None: - """ - Test unnamed index, compact df. - """ - # Get the input. - df_as_str = """ - col1 col2 col3 col4 - 0 0.1 0.1 0.1 0.1 - 1 0.2 0.2 0.2 0.2""" - rows_values = self.get_rows_values_example(df_as_str) - # Run. - actual = hpantran._assemble_df_rows(rows_values) - # Check. - expected = [ - ["", "col1", "col2", "col3", "col4"], - ["0", "0.1", "0.1", "0.1", "0.1"], - ["1", "0.2", "0.2", "0.2", "0.2"], - ] - self.assertListEqual(actual, expected) - - def test2(self) -> None: - """ - Test unnamed index, large df. - """ - # Get the input. - df_as_str = """ - column_with_a_very_long_name_1 column_with_a_very_long_name_2 column_with_a_very_long_name_3 column_with_a_very_long_name_4 column_with_a_very_long_name_5 - 0 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 - 1 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789""" - rows_values = self.get_rows_values_example(df_as_str) - # Run. - actual = hpantran._assemble_df_rows(rows_values) - # Check. - expected = [ - [ - "", - "column_with_a_very_long_name_1", - "column_with_a_very_long_name_2", - "column_with_a_very_long_name_3", - "column_with_a_very_long_name_4", - "column_with_a_very_long_name_5", - ], - [ - "0", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - ], - [ - "1", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - ], - ] - self.assertListEqual(actual, expected) - - def test3(self) -> None: - """ - Test named index, compact df. - """ - # Get the input. - df_as_str = """ - col1 col2 col3 col4 - idx - 0 0.1 0.1 0.1 0.1 - 1 0.2 0.2 0.2 0.2""" - rows_values = self.get_rows_values_example(df_as_str) - # Run. - actual = hpantran._assemble_df_rows(rows_values) - # Check. - expected = [ - ["idx", "col1", "col2", "col3", "col4"], - ["0", "0.1", "0.1", "0.1", "0.1"], - ["1", "0.2", "0.2", "0.2", "0.2"], - ] - self.assertListEqual(actual, expected) - - def test4(self) -> None: - """ - Test named index, large df. - """ - # Get the input. - df_as_str = """ - column_with_a_very_long_name_1 column_with_a_very_long_name_2 column_with_a_very_long_name_3 column_with_a_very_long_name_4 column_with_a_very_long_name_5 - idx - 0 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 - 1 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789 0.123456789123456789123456789""" - rows_values = self.get_rows_values_example(df_as_str) - # Run. - actual = hpantran._assemble_df_rows(rows_values) - # Check. - expected = [ - [ - "idx", - "column_with_a_very_long_name_1", - "column_with_a_very_long_name_2", - "column_with_a_very_long_name_3", - "column_with_a_very_long_name_4", - "column_with_a_very_long_name_5", - ], - [ - "0", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - ], - [ - "1", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - "0.123456789123456789123456789", - ], - ] - self.assertListEqual(actual, expected) - - -# ############################################################################# -# Test_str_to_df -# ############################################################################# - - -class Test_str_to_df(hunitest.TestCase): - """ - Test converting a string representation of a dataframe into a Pandas df. - """ - - def test1(self) -> None: - # Prepare input. - df_as_str = """ - col1 col2 col3 col4 - 0 0.1 a None 2020-01-01 - 1 0.2 "b c" None 2021-05-05""" - col_to_type = { - "__index__": int, - "col1": float, - "col2": str, - "col3": None, - "col4": pd.Timestamp, - } - col_to_name_type: Dict[str, type] = {} - # Run. - actual = hpandas.str_to_df(df_as_str, col_to_type, col_to_name_type) - # Check. - expected = pd.DataFrame( - { - "col1": [0.1, 0.2], - "col2": ["a", "b c"], - "col3": [None, None], - "col4": [ - pd.Timestamp("2020-01-01"), - pd.Timestamp("2021-05-05"), - ], - }, - index=[0, 1], - ) - hunitest.compare_df(actual, expected) - - def test2(self) -> None: - """ - Run a full circle check. - - The df used for testing: - - 1 2 - end_timestamp - 2023-08-15 0.21 1.7 - 2023-08-16 0.22 1.8 - 2023-08-17 0.23 1.9 - """ - # Create a df from the data. - data = { - 1: [0.21, 0.22, 0.23], - 2: [1.7, 1.8, 1.9], - } - timestamps = [ - pd.Timestamp("2023-08-15"), - pd.Timestamp("2023-08-16"), - pd.Timestamp("2023-08-17"), - ] - expected = pd.DataFrame(data, index=timestamps) - expected.index.name = "end_timestamp" - # Convert the df into a string. - df_as_str = hpandas.df_to_str(expected) - # Convert the resulting string back into a df. - col_to_type = { - "__index__": pd.Timestamp, - "1": float, - "2": float, - } - col_to_name_type = { - "1": int, - "2": int, - } - actual = hpandas.str_to_df(df_as_str, col_to_type, col_to_name_type) - # Check that the initial df and the final df are the same. - hunitest.compare_df(actual, expected) - - -# ############################################################################# -# TestFindGapsInDataframes -# ############################################################################# - - -class TestFindGapsInDataframes(hunitest.TestCase): - def test_find_gaps_in_dataframes(self) -> None: - """ - Verify that gaps are caught. - """ - # Prepare inputs. - test_data = pd.DataFrame( - data={ - "dummy_value_1": [1, 2, 3], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [0, 0, 0], - } - ) - # Run. - missing_data = hpandas.find_gaps_in_dataframes( - test_data.head(2), test_data.tail(2) - ) - # Check output. - actual = pd.concat(missing_data) - actual = hpandas.df_to_str(actual) - expected = r""" dummy_value_1 dummy_value_2 dummy_value_3 - 2 3 C 0 - 0 1 A 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# TestSubsetDf1 -# ############################################################################# - - -class TestSubsetDf1(hunitest.TestCase): - def test1(self) -> None: - # Generate some random data. - np.random.seed(42) - df = pd.DataFrame( - np.random.randint(0, 100, size=(20, 4)), columns=list("ABCD") - ) - # Subset. - df2 = hpandas.subset_df(df, nrows=5, seed=43) - # Check. - actual = hpandas.df_to_str(df2) - expected = r""" - A B C D - 0 51 92 14 71 - 1 60 20 82 86 - 3 23 2 21 52 - ... - 17 80 35 49 3 - 18 1 5 53 3 - 19 53 92 62 17 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# TestCheckAndFilterMatchingColumns -# ############################################################################# - - -class TestCheckAndFilterMatchingColumns(hunitest.TestCase): - """ - Test that matching columns are filtered correctly. - """ - - @staticmethod - def get_test_data() -> pd.DataFrame: - df = pd.DataFrame( - data=[[3, 4, 5]] * 3, - columns=["col1", "col2", "col3"], - ) - return df - - def test_check_and_filter_matching_columns1(self) -> None: - """ - - required columns = received columns - - `filter_data_mode` = "assert" - """ - df = self.get_test_data() - columns = ["col1", "col2", "col3"] - filter_data_mode = "assert" - df = hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - actual_columns = df.columns.to_list() - self.assert_equal(str(actual_columns), str(columns)) - - def test_check_and_filter_matching_columns2(self) -> None: - """ - - received columns contain some columns apart from required ones - - `filter_data_mode` = "assert" - """ - df = self.get_test_data() - columns = ["col1", "col3"] - filter_data_mode = "assert" - with self.assertRaises(AssertionError): - hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - - def test_check_and_filter_matching_columns3(self) -> None: - """ - - received columns do not contain some of required columns - - `filter_data_mode` = "assert" - """ - df = self.get_test_data() - columns = ["col1", "col4"] - filter_data_mode = "assert" - with self.assertRaises(AssertionError): - hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - - def test_check_and_filter_matching_columns4(self) -> None: - """ - - received columns contain some columns apart from required ones - - `filter_data_mode` = "warn_and_trim" - """ - df = self.get_test_data() - columns = ["col1", "col3"] - filter_data_mode = "warn_and_trim" - df = hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - actual_columns = df.columns.to_list() - self.assert_equal(str(actual_columns), str(columns)) - - def test_check_and_filter_matching_columns5(self) -> None: - """ - - received columns do not contain some of required columns - - `filter_data_mode` = "warn_and_trim" - """ - df = self.get_test_data() - columns = ["col1", "col2", "col4"] - filter_data_mode = "warn_and_trim" - df = hpandas.check_and_filter_matching_columns( - df, columns, filter_data_mode - ) - actual_columns = df.columns.to_list() - expected_columns = ["col1", "col2"] - self.assert_equal(str(actual_columns), str(expected_columns)) - - -# ############################################################################# - - -# ############################################################################# -# Test_merge_dfs1 -# ############################################################################# - - -class Test_merge_dfs1(hunitest.TestCase): - """ - Test that 2 dataframes are merged correctly. - """ - - @staticmethod - def get_dataframe(data: Dict, index: List[int]) -> pd.DataFrame: - df = pd.DataFrame.from_dict(data) - index = pd.Index(index) - df = df.set_index(index, drop=True) - return df - - def test1(self) -> None: - """ - Overlap of `threshold_col` values is 100%. - """ - # Create test data. - data1 = { - "col1": [1, 10, 100], - "col2": [2, np.nan, 200], - "col3": [3, 30, 300], - "threshold_col": [7, 70, 700], - } - index1 = [1, 2, 3] - df1 = self.get_dataframe(data1, index1) - # - data2 = { - "col3": [3, 30, 300], - "col4": [4, 40, 400], - "col5": [5, np.nan, 500], - "threshold_col": [7, 70, 700], - } - index2 = [3, 4, 5] - df2 = self.get_dataframe(data2, index2) - # - threshold_col_name = "threshold_col" - cols_to_merge_on = ["col3", "threshold_col"] - merged_df = hpandas.merge_dfs( - df1, - df2, - threshold_col_name, - how="outer", - on=cols_to_merge_on, - ) - # Set expected values. - expected_length = 3 - expected_column_names = [ - "col1", - "col2", - "col3", - "col4", - "col5", - "threshold_col", - ] - expected_column_unique_values = None - expected_signature = r""" - # df= - index=[0, 2] - columns=col1,col2,col3,threshold_col,col4,col5 - shape=(3, 6) - col1 col2 col3 threshold_col col4 col5 - 0 1 2.0 3 7 4 5.0 - 1 10 NaN 30 70 40 NaN - 2 100 200.0 300 700 400 500.0 - """ - # Check. - self.check_df_output( - merged_df, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test2(self) -> None: - """ - Overlap of `threshold_col` values is below the threshold. - """ - # Create test data. - data1 = { - "col1": [1, 10, 100], - "col2": [2, np.nan, 200], - "col3": [3, 30, 300], - "threshold_col": [7, 70, 700], - } - index1 = [1, 2, 3] - df1 = self.get_dataframe(data1, index1) - # - data2 = { - "col3": [3, 30, 300], - "col4": [4, 40, 400], - "col5": [5, np.nan, 500], - "threshold_col": [7, 60, 600], - } - index2 = [3, 4, 5] - df2 = self.get_dataframe(data2, index2) - # - threshold_col_name = "threshold_col" - cols_to_merge_on = ["col3", "threshold_col"] - # Check. - with self.assertRaises(AssertionError): - hpandas.merge_dfs( - df1, - df2, - threshold_col_name, - how="outer", - on=cols_to_merge_on, - ) - - def test3(self) -> None: - """ - Overlap of `threshold_col` values is above the threshold. - """ - # Create test data. - data1 = { - "col1": [1, 3, 5, 7, 10, 100, 100, 100, 100, 10, 10], - "col2": [2, 4, 6, 8, np.nan, 200, 200, np.nan, 10, 10, 100], - "col3": [1, 2, 3, 4, 30, 300, 300, np.nan, 300, 300, 30], - "threshold_col": [0, 1, 3, 5, 7, 9, 11, 13, 15, 70, 700], - } - index1 = range(0, 11) - df1 = self.get_dataframe(data1, index1) - # - data2 = { - "col3": [3, 30, 300, 1, 2, 3, 4, 30, 300, 300, np.nan], - "col4": [4, 40, 400, 2, 4, 6, 8, 11, 13, 15, 70], - "col5": [5, np.nan, 500, 5, 7, 10, 1, 2, 3, 4, 30], - "threshold_col": [1, 2, 3, 5, 7, 9, 11, 13, 15, 70, 700], - } - index2 = range(9, 20) - df2 = self.get_dataframe(data2, index2) - # - threshold_col_name = "threshold_col" - cols_to_merge_on = ["col3", "threshold_col"] - merged_df = hpandas.merge_dfs( - df1, - df2, - threshold_col_name, - how="outer", - on=cols_to_merge_on, - ) - # Set expected values. - expected_length = 20 - expected_column_names = [ - "col1", - "col2", - "col3", - "col4", - "col5", - "threshold_col", - ] - expected_column_unique_values = None - # This is required by `pandas` >= 2.2. - expected_signature = r""" - # df= - index=[0, 19] - columns=col1,col2,col3,threshold_col,col4,col5 - shape=(20, 6) - col1 col2 col3 threshold_col col4 col5 - 0 1.0 2.0 1.0 0 NaN NaN - 1 NaN NaN 1.0 5 2.0 5.0 - 2 3.0 4.0 2.0 1 NaN NaN - ... - 17 10.0 10.0 300.0 70 15.0 4.0 - 18 100.0 NaN NaN 13 NaN NaN - 19 NaN NaN NaN 700 70.0 30.0 - """ - # Check. - self.check_df_output( - merged_df, - expected_length, - expected_column_names, - expected_column_unique_values, - expected_signature, - ) - - def test4(self) -> None: - """ - There are common columns (besides columns to merge on) in dataframes. - """ - # Create test data. - data1 = { - "col1": [1, 10, 100], - "col5": [2, np.nan, 200], - "col3": [3, 30, 300], - "threshold_col": [7, 70, 700], - } - index1 = [1, 2, 3] - df1 = self.get_dataframe(data1, index1) - # - data2 = { - "col3": [3, 30, 300], - "col4": [4, 40, 400], - "col5": [5, np.nan, 500], - "threshold_col": [7, 70, 700], - } - index2 = [3, 4, 5] - df2 = self.get_dataframe(data2, index2) - # - threshold_col_name = "threshold_col" - cols_to_merge_on = ["col3", "threshold_col"] - # Check. - with self.assertRaises(AssertionError): - hpandas.merge_dfs( - df1, - df2, - threshold_col_name, - how="outer", - on=cols_to_merge_on, - ) - - -# ############################################################################# -# Test_apply_index_mode -# ############################################################################# - - -class Test_apply_index_mode(hunitest.TestCase): - @staticmethod - def get_test_data() -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Generate toy dataframes for the test. - """ - # Define common columns. - columns = ["A", "B"] - # Build dataframes with intersecting indices. - idx1 = [0, 1, 2, 3, 4] - data1 = [ - [0.21, 0.44], - [0.11, 0.42], - [1.99, 0.8], - [3.1, 0.91], - [3.5, 1.4], - ] - df1 = pd.DataFrame(data1, columns=columns, index=idx1) - # - idx2 = [0, 6, 2, 3, 5] - data1 = [ - [0.1, 0.4], - [0.11, 0.2], - [1.29, 0.38], - [0.1, 0.9], - [3.3, 2.4], - ] - df2 = pd.DataFrame(data1, columns=columns, index=idx2) - return df1, df2 - - def test1(self) -> None: - """ - Check that returned dataframes have indices that are equal to the - common index. - - - `mode="intersect"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - # Use an index intersection to transform dataframes. - mode = "intersect" - df1_out, df2_out = hpandas.apply_index_mode(df1_in, df2_in, mode) - # Check that indices are common. - common_index = df1_in.index.intersection(df2_in.index) - common_index = hpandas.df_to_str(common_index) - idx1 = hpandas.df_to_str(df1_out.index) - idx2 = hpandas.df_to_str(df2_out.index) - self.assert_equal(idx1, common_index) - self.assert_equal(idx2, common_index) - - def test2(self) -> None: - """ - Check that dataframe indices did not change after applying an index - mode. - - - `mode="leave_unchanged"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - mode = "leave_unchanged" - df1_out, df2_out = hpandas.apply_index_mode(df1_in, df2_in, mode) - # Check that indices are as-is. - df1_in_idx = hpandas.df_to_str(df1_in.index) - df1_out_idx = hpandas.df_to_str(df1_out.index) - self.assert_equal(df1_in_idx, df1_out_idx) - # - df2_in_idx = hpandas.df_to_str(df2_in.index) - df2_out_idx = hpandas.df_to_str(df2_out.index) - self.assert_equal(df2_in_idx, df2_out_idx) - - def test3(self) -> None: - """ - Check that an assertion is raised when indices are not equal. - - - `mode="assert_equal"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - mode = "assert_equal" - # Check that both indices are equal, assert otherwise. - with self.assertRaises(AssertionError) as cm: - hpandas.apply_index_mode(df1_in, df2_in, mode) - actual = str(cm.exception) - # Check the error exception message. - self.check_string(actual) - - -# ############################################################################# -# Test_apply_column_mode -# ############################################################################# - - -class Test_apply_column_mode(hunitest.TestCase): - """ - Test that function applies column modes correctly. - """ - - @staticmethod - def get_test_data() -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Generate toy dataframes for the test. - """ - # Build dataframes with intersecting columns. - columns_1 = ["A", "B"] - data1 = [ - [0.21, 0.44], - [0.11, 0.42], - [1.99, 0.8], - [3.1, 0.91], - [3.5, 1.4], - ] - df1 = pd.DataFrame(data1, columns=columns_1) - # - columns_2 = ["A", "C"] - data2 = [ - [0.1, 0.4], - [0.11, 0.2], - [1.29, 0.38], - [0.1, 0.9], - [3.3, 2.4], - ] - df2 = pd.DataFrame(data2, columns=columns_2) - return df1, df2 - - def test1(self) -> None: - """ - Check that returned dataframes have columns that are equal to the - common ones. - - - `mode="intersect"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - # Use a column intersection mode to transform dataframes. - mode = "intersect" - df1_out, df2_out = hpandas.apply_columns_mode(df1_in, df2_in, mode) - # Check that dfs have equal column names. - common_columns = df1_in.columns.intersection(df2_in.columns) - common_columns = hpandas.df_to_str(common_columns) - columns1 = hpandas.df_to_str(df1_out.columns) - self.assert_equal(columns1, common_columns) - # - columns2 = hpandas.df_to_str(df2_out.columns) - self.assert_equal(columns2, common_columns) - - def test2(self) -> None: - """ - Check that dataframes' columns did not change after applying a column - mode. - - - `mode="leave_unchanged"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - mode = "leave_unchanged" - df1_out, df2_out = hpandas.apply_columns_mode(df1_in, df2_in, mode) - # Check that columns are as-is. - df1_in_columns = hpandas.df_to_str(df1_in.columns) - df1_out_columns = hpandas.df_to_str(df1_out.columns) - self.assert_equal(df1_in_columns, df1_out_columns) - # - df2_in_columns = hpandas.df_to_str(df2_in.columns) - df2_out_columns = hpandas.df_to_str(df2_out.columns) - self.assert_equal(df2_in_columns, df2_out_columns) - - def test3(self) -> None: - """ - Check that an assertion is raised when columns are not equal. - - - `mode="assert_equal"` - """ - # Get test data. - df1_in, df2_in = self.get_test_data() - mode = "assert_equal" - # Check that both dataframes columns are equal, assert otherwise. - with self.assertRaises(AssertionError) as cm: - hpandas.apply_columns_mode(df1_in, df2_in, mode) - actual = str(cm.exception) - # Compare the actual outcome with an expected one. - self.check_string(actual) - - -# ############################################################################# - - -# ############################################################################# -# Test_get_df_from_iterator -# ############################################################################# - - -class Test_get_df_from_iterator(hunitest.TestCase): - def test1(self) -> None: - """ - Check that a dataframe is correctly built from an iterator of - dataframes. - """ - # Build iterator of dataframes for the test. - data1 = { - "num_col": [1, 2], - "str_col": ["A", "B"], - } - df1 = pd.DataFrame(data=data1) - data2 = { - "num_col": [3, 4], - "str_col": ["C", "D"], - } - df2 = pd.DataFrame(data=data2) - data3 = { - "num_col": [5, 6], - "str_col": ["E", "F"], - } - df3 = pd.DataFrame(data=data3) - # Run. - iter_ = iter([df1, df2, df3]) - df = hpandas.get_df_from_iterator(iter_) - actual_signature = hpandas.df_to_str(df) - expected_signature = """ num_col str_col - 0 1 A - 0 3 C - 0 5 E - 1 2 B - 1 4 D - 1 6 F - """ - self.assert_equal(actual_signature, expected_signature, fuzzy_match=True) - - -# ############################################################################# -# TestFilterByTime -# ############################################################################# - - -class TestFilterByTime(hunitest.TestCase): - @staticmethod - def _get_test_data() -> pd.DataFrame: - """ - Get data for testing. - - :return: data for testing - """ - df = pd.DataFrame( - { - "col1": [1, 2, 3, 4], - "col2": [ - hdateti.to_datetime("2018-04-05"), - hdateti.to_datetime("2018-04-06"), - hdateti.to_datetime("2018-04-07"), - hdateti.to_datetime("2018-04-08"), - ], - } - ) - df.index = pd.date_range("2017-01-01", periods=4) - return df - - def test_filter_by_index1(self) -> None: - """ - Verify that `[lower_bound, upper_bound)` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2017-01-02") - upper_bound = hdateti.to_datetime("2017-01-04") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="left", - ts_col_name=None, - ) - expected = df[1:3] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_index2(self) -> None: - """ - Verify that `(lower_bound, upper_bound]` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2017-01-02") - upper_bound = hdateti.to_datetime("2017-01-04") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="right", - ts_col_name=None, - ) - expected = df[2:4] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_index3(self) -> None: - """ - Verify that `[lower_bound, upper_bound]` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2017-01-02") - upper_bound = hdateti.to_datetime("2017-01-04") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="both", - ts_col_name=None, - ) - expected = df[1:4] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_index4(self) -> None: - """ - Verify that `(lower_bound, upper_bound)` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2017-01-02") - upper_bound = hdateti.to_datetime("2017-01-04") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="neither", - ts_col_name=None, - ) - expected = df[2:3] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_column1(self) -> None: - """ - Verify that `[lower_bound, upper_bound)` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2018-04-06") - upper_bound = hdateti.to_datetime("2018-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="left", - ts_col_name="col2", - ) - expected = df[1:3] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_column2(self) -> None: - """ - Verify that `(lower_bound, upper_bound]` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2018-04-06") - upper_bound = hdateti.to_datetime("2018-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="right", - ts_col_name="col2", - ) - expected = df[2:4] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_column3(self) -> None: - """ - Verify that `[lower_bound, upper_bound]` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2018-04-06") - upper_bound = hdateti.to_datetime("2018-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="both", - ts_col_name="col2", - ) - expected = df[1:4] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_filter_by_column4(self) -> None: - """ - Verify that `(lower_bound, upper_bound)` works. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2018-04-06") - upper_bound = hdateti.to_datetime("2018-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="neither", - ts_col_name="col2", - ) - expected = df[2:3] - self.assert_equal(actual.to_string(), expected.to_string()) - - def test_no_intersection(self) -> None: - """ - Verify that if time interval is not covered by data then empty - DataFrame is returned. - """ - df = self._get_test_data() - lower_bound = hdateti.to_datetime("2021-04-06") - upper_bound = hdateti.to_datetime("2021-04-08") - actual = hpantran.filter_by_time( - df=df, - lower_bound=lower_bound, - upper_bound=upper_bound, - inclusive="both", - ts_col_name=None, - ) - self.assertEqual(actual.shape[0], 0) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py deleted file mode 100644 index 67eddb250..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpandas_utils.py +++ /dev/null @@ -1,251 +0,0 @@ -import logging - -import pandas as pd - -import helpers.hpandas as hpandas -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_df_to_str -# ############################################################################# - - -class Test_df_to_str(hunitest.TestCase): - @staticmethod - def get_test_data() -> pd.DataFrame: - test_data = { - "dummy_value_1": [1, 2, 3], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [0, 0, 0], - } - df = pd.DataFrame(data=test_data) - return df - - def test_df_to_str1(self) -> None: - """ - Test common call to `df_to_str` with basic df. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df) - expected = r""" - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str2(self) -> None: - """ - Test common call to `df_to_str` with tag. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df, tag="df") - expected = r"""# df= - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str3(self) -> None: - """ - Test common call to `df_to_str` with print_shape_info. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df, print_shape_info=True) - expected = r""" - index=[0, 2] - columns=dummy_value_1,dummy_value_2,dummy_value_3 - shape=(3, 3) - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str4(self) -> None: - """ - Test common call to `df_to_str` with print_dtypes. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df, print_dtypes=True) - expected = r""" - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 3 / 3 = 100.00% 0 / 3 = 0.00% 0 - 1 dummy_value_1 int64 3 / 3 = 100.00% 0 / 3 = 0.00% 1 - 2 dummy_value_2 object 3 / 3 = 100.00% 0 / 3 = 0.00% A - 3 dummy_value_3 int64 1 / 3 = 33.33% 0 / 3 = 0.00% 0 - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str5(self) -> None: - """ - Test common call to `df_to_str` with multiple args. - """ - df = self.get_test_data() - actual = hpandas.df_to_str( - df, print_shape_info=True, print_dtypes=True, tag="df" - ) - expected = r""" - # df= - index=[0, 2] - columns=dummy_value_1,dummy_value_2,dummy_value_3 - shape=(3, 3) - * type= - col_name dtype num_unique num_nans first_elem type(first_elem) - 0 index int64 3 / 3 = 100.00% 0 / 3 = 0.00% 0 - 1 dummy_value_1 int64 3 / 3 = 100.00% 0 / 3 = 0.00% 1 - 2 dummy_value_2 object 3 / 3 = 100.00% 0 / 3 = 0.00% A - 3 dummy_value_3 int64 1 / 3 = 33.33% 0 / 3 = 0.00% 0 - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str6(self) -> None: - """ - Test common call to `df_to_str` with `pd.Series`. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df["dummy_value_2"]) - expected = r""" - dummy_value_2 - 0 A - 1 B - 2 C - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str7(self) -> None: - """ - Test common call to `df_to_str` with `pd.Index`. - """ - df = self.get_test_data() - index = df.index - index.name = "index_name" - actual = hpandas.df_to_str(index) - expected = r""" - index_name - 0 0 - 1 1 - 2 2 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str8(self) -> None: - """ - Test that `-0.0` is replaced with `0.0`. - """ - test_data = { - "dummy_value_1": [1, 2, 3, 4], - "dummy_value_2": ["A", "B", "C", "D"], - "dummy_value_3": [0, 0, 0, 0], - "dummy_value_4": [+0.0, -0.0, +0.0, -0.0], - } - df = pd.DataFrame(data=test_data) - actual = hpandas.df_to_str(df, handle_signed_zeros=True) - expected = r""" - dummy_value_1 dummy_value_2 dummy_value_3 dummy_value_4 - 0 1 A 0 0.0 - 1 2 B 0 0.0 - 2 3 C 0 0.0 - 3 4 D 0 0.0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str9(self) -> None: - """ - Test that `-0.0` is replaced with `0.0` in a multi-index dataframe. - """ - test_data = { - ("A", "X"): [-0.0, 5.0, -0.0], - ("A", "Y"): [2, 6, 0], - ("B", "X"): [0, 7, 3], - ("B", "Y"): [4.4, -0.0, 5.1], - } - df = pd.DataFrame(data=test_data) - actual = hpandas.df_to_str(df, handle_signed_zeros=True) - expected = r""" - A B - X Y X Y - 0 0.0 2 0 4.4 - 1 5.0 6 7 0.0 - 2 0.0 0 3 5.1""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_df_to_str10(self) -> None: - """ - Test common call to `df_to_str` with `print_memory_usage = True`. - """ - df = self.get_test_data() - actual = hpandas.df_to_str(df, print_memory_usage=True) - # This is required by `numpy` >= 2.1.0 - expected = r""" - * memory= - shallow deep - Index 132.0 b 132.0 b - dummy_value_1 24.0 b 24.0 b - dummy_value_2 24.0 b 150.0 b - dummy_value_3 24.0 b 24.0 b - total 204.0 b 330.0 b - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_head -# ############################################################################# - - -class Test_head(hunitest.TestCase): - def test1(self) -> None: - """ - Test basic head functionality without seed. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": [1, 2, 3, 4, 5], - "col2": ["a", "b", "c", "d", "e"], - } - ) - hpandas.head(df, num_rows=2) - - def test2(self) -> None: - """ - Test head with a seed for reproducible sampling. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": list(range(10)), - "col2": list("abcdefghij"), - } - ) - hpandas.head(df, seed=42, num_rows=3) - - def test3(self) -> None: - """ - Test head with different num_rows parameter. - """ - # Prepare input. - df = pd.DataFrame( - { - "col1": list(range(5)), - "col2": list("abcde"), - } - ) - hpandas.head(df, num_rows=4) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py deleted file mode 100644 index a1be56d40..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparquet.py +++ /dev/null @@ -1,1468 +0,0 @@ -import datetime -import logging -import os -import random -from typing import Any, List, Optional, Tuple - -import pandas as pd -import pyarrow -import pyarrow.parquet as parquet -import pytest - -import helpers.hdbg as hdbg -import helpers.hmoto as hmoto -import helpers.hpandas as hpandas -import helpers.hparquet as hparque -import helpers.hprint as hprint -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# Most of these unit tests are taken from -# `amp/helpers/notebooks/gallery_parquet.ipynb` - - -def _get_df(date: datetime.date, seed: int = 42) -> pd.DataFrame: - """ - Create pandas random data, like: - - ``` - idx instr val1 val2 - 2000-01-01 0 A 99 30 - 2000-01-02 0 A 54 46 - 2000-01-03 0 A 85 86 - ``` - """ - instruments = "A B C D E".split() - date = pd.Timestamp(date, tz="America/New_York") - start_date = date.replace(hour=9, minute=30) - end_date = date.replace(hour=16, minute=0) - df_idx = pd.date_range(start_date, end_date, freq="5T") - _LOG.debug("df_idx=[%s, %s]", min(df_idx), max(df_idx)) - _LOG.debug("len(df_idx)=%s", len(df_idx)) - random.seed(seed) - # For each instruments generate random data. - df = [] - for idx, inst in enumerate(instruments): - df_tmp = pd.DataFrame( - { - "idx": idx, - "instr": inst, - "val1": [random.randint(0, 100) for _ in range(len(df_idx))], - "val2": [random.randint(0, 100) for _ in range(len(df_idx))], - }, - index=df_idx, - ) - df.append(df_tmp) - # Create a single df for all the instruments. - df = pd.concat(df) - return df - - -def _get_test_df_with_timestamps() -> pd.DataFrame: - """ - Create a DataFrame with timestamps. - """ - timestamp = pd.Timestamp("2022-01-01 00:00:00.123456", tz="America/New_York") - index = [timestamp for _ in range(6)] - df = pd.DataFrame( - { - "n_legs": [2, 2, 4, 4, 5, 100], - "animal": [ - "Flamingo", - "Parrot", - "Dog", - "Horse", - "Brittle stars", - "Centipede", - ], - "year": [2001, 2002, 2001, 2003, 2003, 2001], - }, - index=index, - ) - knowledge_timestamp = pd.Timestamp.now(tz="UTC") - df["knowledge_timestamp"] = knowledge_timestamp - return df - - -def _get_df_example1() -> pd.DataFrame: - date = datetime.date(2020, 1, 1) - df = _get_df(date) - _LOG.debug("df=\n%s", df.head(3)) - return df - - -def _compare_dfs(self: Any, df1: pd.DataFrame, df2: pd.DataFrame) -> str: - df1_as_str: str = hpandas.df_to_str(df1, print_shape_info=True, tag="") - df2_as_str = hpandas.df_to_str(df2, print_shape_info=True, tag="") - self.assert_equal(df1_as_str, df2_as_str, fuzzy_match=True) - # When Parquet reads partitioned dataset can convert partitioning columns into - # categorical variables that can create false positives. - pd.testing.assert_frame_equal( - df1, df2, check_dtype=False, check_categorical=False - ) - return df1_as_str - - -# ############################################################################# - - -# ############################################################################# -# TestParquet1 -# ############################################################################# - - -class TestParquet1(hunitest.TestCase): - def test_get_df1(self) -> None: - """ - Check the output of `_get_df()`. - """ - # Prepare data. - df = _get_df_example1() - # Check. - actual = hpandas.df_to_str(df, print_shape_info=True, tag="df") - expected = r"""# df= - index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] - columns=idx,instr,val1,val2 - shape=(395, 4) - idx instr val1 val2 - 2020-01-01 09:30:00-05:00 0 A 81 35 - 2020-01-01 09:35:00-05:00 0 A 14 58 - 2020-01-01 09:40:00-05:00 0 A 3 81 - ... - 2020-01-01 15:50:00-05:00 4 E 57 3 - 2020-01-01 15:55:00-05:00 4 E 33 50 - 2020-01-01 16:00:00-05:00 4 E 96 75""" - self.assert_equal(actual, expected, fuzzy_match=True) - - # ////////////////////////////////////////////////////////////////////////////// - - def get_file_name(self) -> str: - dir_name = self.get_scratch_space() - file_name = os.path.join(dir_name, "df.parquet") - return file_name - - def write_data_as_parquet(self) -> Tuple[pd.DataFrame, str]: - # Prepare data. - df = _get_df_example1() - # Save data. - file_name = self.get_file_name() - hparque.to_parquet(df, file_name, log_level=logging.INFO) - return df, file_name - - def write_and_read_helper(self, columns: List[str]) -> None: - """ - - Save a dataframe as Parquet - - Read back certain columns of the data from the file - - Check that the df is what expected - """ - df, file_name = self.write_data_as_parquet() - # Read back one column of the data. - df2 = hparque.from_parquet( - file_name, columns=columns, log_level=logging.INFO - ) - _LOG.debug("df2=\n%s", df2.head(3)) - # Check. - df = df[columns] - _compare_dfs(self, df, df2) - - def test_write_and_read_everything1(self) -> None: - """ - Read all the columns from the file. - """ - df, file_name = self.write_data_as_parquet() - # Read data back. - df2 = hparque.from_parquet(file_name, log_level=logging.INFO) - _LOG.debug("df2=\n%s", df2.head(3)) - # Check. - _compare_dfs(self, df, df2) - - def test_write_and_read_one_column1(self) -> None: - """ - - Read back one column of the data from the file. - """ - # Read back one column of the data. - columns = ["val1"] - self.write_and_read_helper(columns) - - def test_write_and_read_two_columns1(self) -> None: - """ - Read back one column of the data from the file. - """ - # Read back two columns of the data. - columns = ["idx", "val1"] - self.write_and_read_helper(columns) - - # ////////////////////////////////////////////////////////////////////////////// - - def read_filtered_parquet( - self, file_name: str, filters: Any - ) -> pd.DataFrame: - filesystem = None - dataset = parquet.ParquetDataset( - file_name, - filesystem=filesystem, - filters=filters, - ) - columns = None - table = dataset.read(columns=columns) - df = table.to_pandas() - _LOG.debug("df=\n%s", df.head(3)) - return df - - def test_read_with_filter1(self) -> None: - """ - Read only a subset of the rows. - """ - _, file_name = self.write_data_as_parquet() - # Read. - filters = [] - filters.append([("idx", "=", 0)]) - df2 = self.read_filtered_parquet(file_name, filters) - # Check. - actual = hpandas.df_to_str(df2, print_shape_info=True, tag="df") - expected = r"""# df= - index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] - columns=idx,instr,val1,val2 - shape=(79, 4) - idx instr val1 val2 - 2020-01-01 09:30:00-05:00 0 A 81 35 - 2020-01-01 09:35:00-05:00 0 A 14 58 - 2020-01-01 09:40:00-05:00 0 A 3 81 - ... - 2020-01-01 15:50:00-05:00 0 A 29 76 - 2020-01-01 15:55:00-05:00 0 A 12 8 - 2020-01-01 16:00:00-05:00 0 A 48 49""" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_write_and_read_partition_parquet_files_with_unit(self) -> None: - """ - Write the Pandas DataFrame to partitioned Parquet files and read it - back, verifying the retention of time unit information in the index. - """ - # Prepare test data. - dst_dir = os.path.join(self.get_scratch_space(), "tmp.partition_parquet") - initial_df = _get_test_df_with_timestamps() - initial_df.index = initial_df.index.as_unit("us") - partition_columns = initial_df.columns.tolist() - # The `to_partitioned_parquet` saves the given dataframe as Parquet - # files partitioned along the given columns. - hparque.to_partitioned_parquet(initial_df, partition_columns, dst_dir) - df_from_parquet_files = hparque.from_parquet(dst_dir) - # Check that the time unit is ns. - self.assert_equal("ns", df_from_parquet_files.index.unit) - # TODO(Vlad): Refactor after CmampTask7331 is resolved. - # self.assert_equal(initial_df.index.unit, df.index.unit) - - def test_write_and_read_parquet_file_with_unit(self) -> None: - """ - Write the provided DataFrame to Parquet file and read it back, - verifying the retention of time unit information in the index. - """ - test_parquet_file = os.path.join( - self.get_scratch_space(), "tmp_dummy.parquet" - ) - initial_df = _get_test_df_with_timestamps() - initial_df.index = initial_df.index.as_unit("us") - # The `to_parquet` function writes a DF to a single parquet file without - # any partition. - hparque.to_parquet(initial_df, test_parquet_file) - df = hparque.from_parquet(test_parquet_file) - self.assert_equal("ns", df.index.unit) - # TODO(Vlad): Refactor after CmampTask7331 is resolved. - # self.assert_equal(initial_df.index.unit, df.index.unit) - - @pytest.mark.skip(reason="TODO(Juraj): HelpersTask21.") - def test_save_read_concat_data(self) -> None: - """ - Verify that data produced by different version of Pandas preserves - types when reading/writing to/from Parquet. - """ - # Copy sample data that saved with the Pandas v.1.5.1 from S3 to the - # scratch dir. - s3_path = self.get_s3_input_dir() - local_path = self.get_scratch_space() - aws_profile = "ck" - hs3.copy_data_from_s3_to_local_dir(s3_path, local_path, aws_profile) - # Read sample data from the scratch dir. - sample_data = hparque.from_parquet(local_path) - # Generate artificial test data. - data = { - "timestamp": [1696896000000], - "open": [27578.4], - "high": [27584.3], - "low": [27571.2], - "close": [27571.3], - "volume": [154.933], - "exchange_id": ["binance"], - "knowledge_timestamp": [ - pd.Timestamp("2023-11-06 14:15:11.241716+0000", tz="UTC") - ], - } - index = pd.Series( - [pd.Timestamp("2023-10-10T00:00:00+00:00")], name="timestamp" - ) - test_data = pd.DataFrame(data, index=index) - # Concatenate sample and test data and save it to the scratch dir. - combined_test_data = pd.concat([sample_data, test_data]) - local_combined_file_path = os.path.join( - local_path, "combined_dummy.parquet" - ) - hparque.to_parquet(combined_test_data, local_combined_file_path) - # Read the data back from the scratch dir. - actual_df = hparque.from_parquet(local_combined_file_path) - # Check that the data types the same as in the sample data. - dtypes_sample = str(sample_data.dtypes) - dtypes_actual = str(actual_df.dtypes) - self.assert_equal(dtypes_sample, dtypes_actual, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# TestPartitionedParquet1 -# ############################################################################# - - -class TestPartitionedParquet1(hunitest.TestCase): - # From https://arrow.apache.org/docs/python/dataset.html#reading-partitioned-data - # A dataset can exploit a nested structure, where the sub-dir names hold - # information about which subset of the data is stored in that dir - # E.g., "Hive" partitioning scheme "key=vale" dir names - - def write_partitioned_dataset_and_check( - self, - df: pd.DataFrame, - partition_cols: List[str], - exp_dir_signature: Optional[str], - ) -> str: - """ - - Write df as a partitioned dataset - - (Optional) Check the signature of the directory - - :param partition_cols: columns used for - :param exp_dir_signature: expected signature of the written directory - :return path to the saved Parquet data - """ - _LOG.debug(hprint.to_str("partition_cols")) - # Prepare data. - dir_name = os.path.join(self.get_scratch_space(), "data.parquet") - table = pyarrow.Table.from_pandas(df) - # Write partitioned dataset. - parquet.write_to_dataset( - table, - dir_name, - partition_cols, - ) - # Check dir signature. - if exp_dir_signature is not None: - include_file_content = False - remove_dir_name = True - dir_signature = hunitest.get_dir_signature( - dir_name, include_file_content, remove_dir_name=remove_dir_name - ) - self.assert_equal( - dir_signature, - exp_dir_signature, - fuzzy_match=True, - purify_text=True, - ) - return dir_name - - def write_and_read_helper( - self, - df: pd.DataFrame, - partition_cols: List[str], - exp_dir_signature: Optional[str], - columns_to_read: Optional[List[str]], - ) -> str: - """ - - Write df as a partitioned dataset using `partitioned_cols` - - Read certain column back - - :param partition_cols: columns used for - :param exp_dir_signature: expected signature of the written directory - :return: read df as string - """ - _LOG.debug(hprint.to_str("partition_cols columns_to_read")) - # Write and check. - dir_name = self.write_partitioned_dataset_and_check( - df, partition_cols, exp_dir_signature - ) - # Read back certain columns. - df2 = hparque.from_parquet( - dir_name, columns=columns_to_read, log_level=logging.INFO - ) - # Compare. - if columns_to_read is not None: - df = df[columns_to_read] - # - hdbg.dassert_set_eq(df.columns, df2.columns) - df2 = df2[df.columns] - df_as_str = _compare_dfs(self, df, df2) - return df_as_str - - # ////////////////////////////////////////////////////////////////////////////// - - def test_write_and_read1(self) -> None: - """ - - Write a partitioned dataset with one partitioning column - - Read everything back - """ - df = _get_df_example1() - partition_cols = ["idx"] - exp_dir_signature = r""" - # Dir structure - . - idx=0 - idx=0/data.parquet - idx=1 - idx=1/data.parquet - idx=2 - idx=2/data.parquet - idx=3 - idx=3/data.parquet - idx=4 - idx=4/data.parquet""" - columns_to_read = None - self.write_and_read_helper( - df, partition_cols, exp_dir_signature, columns_to_read - ) - - def test_write_and_read2(self) -> None: - """ - - Write a partitioned dataset with two partitioning columns - - Read everything back - """ - df = _get_df_example1() - partition_cols = ["idx", "instr"] - exp_dir_signature = r"""# Dir structure - . - idx=0 - idx=0/instr=A - idx=0/instr=A/data.parquet - idx=1 - idx=1/instr=B - idx=1/instr=B/data.parquet - idx=2 - idx=2/instr=C - idx=2/instr=C/data.parquet - idx=3 - idx=3/instr=D - idx=3/instr=D/data.parquet - idx=4 - idx=4/instr=E - idx=4/instr=E/data.parquet""" - # Read back everything. - columns_to_read = None - self.write_and_read_helper( - df, partition_cols, exp_dir_signature, columns_to_read - ) - - def test_write_and_read3(self) -> None: - """ - - Write a partitioned dataset with one partitioning column - - Read two columns back - """ - df = _get_df_example1() - partition_cols = ["idx"] - exp_dir_signature = None - columns_to_read = ["idx", "instr"] - df_as_str = self.write_and_read_helper( - df, partition_cols, exp_dir_signature, columns_to_read - ) - expected = r"""# = - index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] - columns=idx,instr - shape=(395, 2) - idx instr - 2020-01-01 09:30:00-05:00 0 A - 2020-01-01 09:35:00-05:00 0 A - 2020-01-01 09:40:00-05:00 0 A - ... - 2020-01-01 15:50:00-05:00 4 E - 2020-01-01 15:55:00-05:00 4 E - 2020-01-01 16:00:00-05:00 4 E""" - self.assert_equal(df_as_str, expected, fuzzy_match=True) - - def test_write_and_read4(self) -> None: - """ - - Write a partitioned dataset with one partitioning column - - Read two columns back filtering by the one of the partitioned column - """ - df = _get_df_example1() - partition_cols = ["idx"] - exp_dir_signature = None - # Write and check. - dir_name = self.write_partitioned_dataset_and_check( - df, partition_cols, exp_dir_signature - ) - # Read back everything. - columns_to_read = ["idx", "instr"] - filters = [] - filters.append(("idx", "=", 0)) - # Note that `from_parquet` doesn't work with filters. - # df2 = hparque.from_parquet( - # dir_name, - # columns=columns_to_read, - # filters=filters, - # log_level=logging.INFO, - # ) - filesystem = None - dataset = parquet.ParquetDataset( - dir_name, - filesystem=filesystem, - filters=filters, - ) - table = dataset.read(columns=columns_to_read) - df2 = table.to_pandas() - # Compare. - df_as_str = hpandas.df_to_str(df2, print_shape_info=True, tag="df") - expected = r"""# df= - index=[0, 78] - columns=idx,instr - shape=(79, 2) - idx instr - 0 0 A - 1 0 A - 2 0 A - ... - 76 0 A - 77 0 A - 78 0 A""" - self.assert_equal(df_as_str, expected, fuzzy_match=True) - - # ////////////////////////////////////////////////////////////////////////////// - - def test_merge1(self) -> None: - """ - - Write a partitioned dataset in multiple chunks using the same partitioning - column - - Make sure that reading it back we get the original data. - """ - df = _get_df_example1() - # - partition_cols = ["idx"] - # Write the first chunk. - df_chunk1 = df[df["idx"].isin([0, 1])] - exp_dir_signature = """ - # Dir structure - . - idx=0 - idx=0/data.parquet - idx=1 - idx=1/data.parquet""" - # Write and check. - _ = self.write_partitioned_dataset_and_check( - df_chunk1, partition_cols, exp_dir_signature - ) - # Write the second chunk. - df_chunk2 = df[df["idx"].isin([2, 3, 4])] - exp_dir_signature = """ - # Dir structure - . - idx=0 - idx=0/data.parquet - idx=1 - idx=1/data.parquet - idx=2 - idx=2/data.parquet - idx=3 - idx=3/data.parquet - idx=4 - idx=4/data.parquet""" - # Write and check. - dir_name = self.write_partitioned_dataset_and_check( - df_chunk2, partition_cols, exp_dir_signature - ) - # Read everything. - columns_to_read = None - df2 = hparque.from_parquet( - dir_name, columns=columns_to_read, log_level=logging.INFO - ) - # Compare. - hdbg.dassert_set_eq(df.columns, df2.columns) - df2 = df2[df.columns] - df_as_str = _compare_dfs(self, df, df2) - expected = r""" - # = - index=[2020-01-01 09:30:00-05:00, 2020-01-01 16:00:00-05:00] - columns=idx,instr,val1,val2 - shape=(395, 4) - idx instr val1 val2 - 2020-01-01 09:30:00-05:00 0 A 81 35 - 2020-01-01 09:35:00-05:00 0 A 14 58 - 2020-01-01 09:40:00-05:00 0 A 3 81 - ... - 2020-01-01 15:50:00-05:00 4 E 57 3 - 2020-01-01 15:55:00-05:00 4 E 33 50 - 2020-01-01 16:00:00-05:00 4 E 96 75""" - self.assert_equal(df_as_str, expected, fuzzy_match=True) - self.assert_equal(df_as_str, expected, fuzzy_match=True) - - def _run_write_and_read_mixed_units_partitioned_dataset( - self, first_unit: str, second_unit: str - ) -> None: - """ - Write two DataFrames with different time units to a partitioned Parquet - dataset and read it back. - - :param first_unit: time unit of the first DataFrame - :param second_unit: time unit of the second DataFrame - """ - initial_df = _get_test_df_with_timestamps() - partition_columns = ["n_legs", "animal", "year"] - dst_dir = os.path.join(self.get_scratch_space(), "tmp.pp_mixed_units") - # Write first DF as partitioned parquet. - first_df = initial_df.copy() - first_df.index = first_df.index.as_unit(first_unit) - first_df["knowledge_timestamp"] = first_df["knowledge_timestamp"].astype( - f"datetime64[{first_unit}, UTC]" - ) - hparque.to_partitioned_parquet(first_df, partition_columns, dst_dir) - # Write second DF as partitioned parquet. - second_df = initial_df.copy() - second_df.index = second_df.index.as_unit(second_unit) - second_df["knowledge_timestamp"] = second_df[ - "knowledge_timestamp" - ].astype(f"datetime64[{second_unit}, UTC]") - hparque.to_partitioned_parquet(second_df, partition_columns, dst_dir) - # Read it back. - _ = hparque.from_parquet(dst_dir) - - def test_write_and_read_mixed_units_partition_dataset_1(self) -> None: - """ - Write two DataFrames with different time units to a partitioned Parquet - dataset and read it back. - - The combination `ns` and `us` should not raise an error. - See CmampTask7331 for details. - """ - self._run_write_and_read_mixed_units_partitioned_dataset("ns", "us") - - @pytest.mark.skip( - reason="Since names and order the files is not guaranteed, the test is " - "flaky, decided to skip it for now.", - ) - def test_write_and_read_mixed_units_partition_dataset_2(self) -> None: - """ - Write two DataFrames with different time units to a partitioned Parquet - dataset and read it back. - - The combination `ms` and `us` should raise an error. - """ - with self.assertRaises(pyarrow.lib.ArrowInvalid): - self._run_write_and_read_mixed_units_partitioned_dataset("ms", "us") - - -# ############################################################################# - - -# ############################################################################# -# TestGetParquetFiltersFromTimestampInterval1 -# ############################################################################# - - -class TestGetParquetFiltersFromTimestampInterval1(hunitest.TestCase): - def test_no_interval(self) -> None: - """ - No timestamps provided. - """ - partition_mode = "by_year_month" - start_ts = None - end_ts = None - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - self.assertIsNone(filters) - - def test_by_month_half1(self) -> None: - """ - Test a left-bound interval [..., None]. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = None - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = r"[[('year', '==', 2020), ('month', '>=', 1)], [('year', '>', 2020)]]" - self.assert_equal(actual, expected) - - def test_by_month_half2(self) -> None: - """ - Test a right-bound interval [None, ...]. - """ - partition_mode = "by_year_month" - start_ts = None - end_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = r"[[('year', '==', 2020), ('month', '<=', 1)], [('year', '<', 2020)]]" - self.assert_equal(actual, expected) - - def test_by_month_one_year1(self) -> None: - """ - Test an interval contained in a whole year. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = pd.Timestamp("2020-12-02 09:31:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = ( - r"[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 12)]]" - ) - self.assert_equal(actual, expected) - - def test_by_month_one_year2(self) -> None: - """ - Test an interval contained in a whole year. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = pd.Timestamp("2020-01-02 09:32:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = ( - r"[[('year', '==', 2020), ('month', '>=', 1), ('month', '<=', 1)]]" - ) - self.assert_equal(actual, expected) - - def test_by_month_invalid1(self) -> None: - """ - Test an invalid interval. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = pd.Timestamp("2020-01-02 09:30:00+00:00") - with self.assertRaises(AssertionError) as fail: - hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(fail.exception) - expected = r""" - * Failed assertion * - 2020-01-02 09:31:00+00:00 <= 2020-01-02 09:30:00+00:00 - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_by_month_invalid2(self) -> None: - """ - Test an invalid partition mode. - """ - partition_mode = "new_mode" - start_ts = pd.Timestamp("2020-01-02 09:31:00+00:00") - end_ts = pd.Timestamp("2020-01-02 09:32:00+00:00") - with self.assertRaises(ValueError) as fail: - hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(fail.exception) - expected = r"Unknown partition mode `new_mode`!" - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_by_month_two_years1(self) -> None: - """ - Test an interval spanning two years. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") - end_ts = pd.Timestamp("2021-12-02 09:31:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = ( - r"[[('year', '==', 2020), ('month', '>=', 6)], " - r"[('year', '==', 2021), ('month', '<=', 12)]]" - ) - self.assert_equal(actual, expected) - - def test_by_month_over_two_years1(self) -> None: - """ - Test an interval longer than two years. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, start_ts, end_ts - ) - actual = str(filters) - expected = ( - r"[[('year', '==', 2020), ('month', '>=', 6)], " - r"[('year', '>', 2020), ('year', '<', 2022)], " - r"[('year', '==', 2022), ('month', '<=', 12)]]" - ) - self.assert_equal(actual, expected) - - def test_additional_filters1(self) -> None: - """ - No timestamps provided while a single additional filter is provided. - """ - partition_mode = "by_year_month" - start_ts = None - end_ts = None - additional_filters = [ - ( - "currency_pair", - "in", - ("BTC_USDT",), - ) - ] - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, - start_ts, - end_ts, - additional_filters=additional_filters, - ) - actual = str(filters) - expected = r"[('currency_pair', 'in', ('BTC_USDT',))]" - self.assert_equal(actual, expected) - - def test_additional_filters2(self) -> None: - """ - Test an interval with multiple additional filters. - """ - partition_mode = "by_year_month" - start_ts = pd.Timestamp("2020-06-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") - additional_filters = [ - ("exchange_id", "in", ("binance")), - ("currency_pairs", "in", ("ADA_USDT", "BTC_USDT")), - ] - filters = hparque.get_parquet_filters_from_timestamp_interval( - partition_mode, - start_ts, - end_ts, - additional_filters=additional_filters, - ) - actual = str(filters) - expected = ( - r"[[('exchange_id', 'in', 'binance'), " - r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " - r"('year', '==', 2020), ('month', '>=', 6)], " - r"[('exchange_id', 'in', 'binance'), " - r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " - r"('year', '>', 2020), ('year', '<', 2022)], " - r"[('exchange_id', 'in', 'binance'), " - r"('currency_pairs', 'in', ('ADA_USDT', 'BTC_USDT')), " - r"('year', '==', 2022), ('month', '<=', 12)]]" - ) - self.assert_equal(actual, expected) - - -# ############################################################################# - - -# ############################################################################# -# TestAddDatePartitionColumns -# ############################################################################# - - -class TestAddDatePartitionColumns(hunitest.TestCase): - def add_date_partition_columns_helper( - self, partition_mode: str, expected: str - ) -> None: - # Prepare inputs. - test_data = { - "dummy_value": [1, 2, 3], - "dummy_timestamp": [1638646800000, 1638646860000, 1638646960000], - } - start_timestamp = "2021-12-04 19:40:00+00:00" - end_timestamp = "2021-12-04 19:42:00+00:00" - index = pd.date_range(start_timestamp, end_timestamp, freq="1T") - df = pd.DataFrame(index=index, data=test_data) - # Run. - hparque.add_date_partition_columns(df, partition_mode) - # Check output. - actual = hpandas.df_to_str(df) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_add_date_partition_columns1(self) -> None: - partition_mode = "by_date" - expected = r""" dummy_value dummy_timestamp date - 2021-12-04 19:40:00+00:00 1 1638646800000 20211204 - 2021-12-04 19:41:00+00:00 2 1638646860000 20211204 - 2021-12-04 19:42:00+00:00 3 1638646960000 20211204""" - self.add_date_partition_columns_helper(partition_mode, expected) - - def test_add_date_partition_columns2(self) -> None: - partition_mode = "by_year" - expected = r""" dummy_value dummy_timestamp year - 2021-12-04 19:40:00+00:00 1 1638646800000 2021 - 2021-12-04 19:41:00+00:00 2 1638646860000 2021 - 2021-12-04 19:42:00+00:00 3 1638646960000 2021""" - self.add_date_partition_columns_helper(partition_mode, expected) - - def test_add_date_partition_columns3(self) -> None: - partition_mode = "by_year_month_day" - # pylint: disable=line-too-long - expected = r""" dummy_value dummy_timestamp year month day - 2021-12-04 19:40:00+00:00 1 1638646800000 2021 12 4 - 2021-12-04 19:41:00+00:00 2 1638646860000 2021 12 4 - 2021-12-04 19:42:00+00:00 3 1638646960000 2021 12 4""" - self.add_date_partition_columns_helper(partition_mode, expected) - - def test_add_date_partition_columns4(self) -> None: - partition_mode = "by_year_week" - expected = r""" dummy_value dummy_timestamp year weekofyear - 2021-12-04 19:40:00+00:00 1 1638646800000 2021 48 - 2021-12-04 19:41:00+00:00 2 1638646860000 2021 48 - 2021-12-04 19:42:00+00:00 3 1638646960000 2021 48""" - self.add_date_partition_columns_helper(partition_mode, expected) - - -# ############################################################################# - - -# ############################################################################# -# TestToPartitionedDataset -# ############################################################################# - - -class TestToPartitionedDataset(hunitest.TestCase): - @staticmethod - def get_test_data1() -> pd.DataFrame: - test_data = { - "dummy_value_1": [1, 2, 3], - "dummy_value_2": ["A", "B", "C"], - "dummy_value_3": [0, 0, 0], - } - df = pd.DataFrame(data=test_data) - return df - - def test_get_test_data1(self) -> None: - test_data = self.get_test_data1() - actual = hpandas.df_to_str(test_data) - expected = r""" - dummy_value_1 dummy_value_2 dummy_value_3 - 0 1 A 0 - 1 2 B 0 - 2 3 C 0""" - self.assert_equal(actual, expected, fuzzy_match=True) - - @pytest.mark.skip( - reason="CmTask1305: after removing circular dependencies in " - "`hio.from_file`, this test fails reading a parquet file" - ) - def test_to_partitioned_dataset(self) -> None: - """ - Test partitioned Parquet datasets with existing columns. - """ - # Prepare inputs. - test_dir = self.get_scratch_space() - df = self.get_test_data1() - # Run. - partition_cols = ["dummy_value_1", "dummy_value_2"] - hparque.to_partitioned_parquet(df, partition_cols, test_dir) - # Check output. - include_file_content = False - remove_dir_name = True - dir_signature = hunitest.get_dir_signature( - test_dir, include_file_content, remove_dir_name=remove_dir_name - ) - expected = r""" - # Dir structure - . - dummy_value_1=1 - dummy_value_1=1/dummy_value_2=A - dummy_value_1=1/dummy_value_2=A/data.parquet - dummy_value_1=2 - dummy_value_1=2/dummy_value_2=B - dummy_value_1=2/dummy_value_2=B/data.parquet - dummy_value_1=3 - dummy_value_1=3/dummy_value_2=C - dummy_value_1=3/dummy_value_2=C/data.parquet""" - self.assert_equal( - dir_signature, expected, purify_text=True, fuzzy_match=True - ) - # - include_file_content = True - dir_signature = hunitest.get_dir_signature( - test_dir, include_file_content, remove_dir_name=remove_dir_name - ) - self.check_string(dir_signature, purify_text=True, fuzzy_match=True) - - def test_to_partitioned_dataset_wrong_column(self) -> None: - """ - Assert that wrong columns are detected before partitioning. - """ - # Prepare inputs. - test_dir = self.get_scratch_space() - df = self.get_test_data1() - # Run. - partition_cols = ["void_column", "dummy_value_2"] - # Check output. - with self.assertRaises(AssertionError) as cm: - hparque.to_partitioned_parquet(df, partition_cols, test_dir) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - val1=['dummy_value_2', 'void_column'] - issubset - val2=['dummy_value_1', 'dummy_value_2', 'dummy_value_3'] - val1 - val2=['void_column'] - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# TestListAndMergePqFiles -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -@pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", -) -class TestListAndMergePqFiles(hmoto.S3Mock_TestCase): - def generate_test_data(self) -> hs3.AwsProfile: - """ - Upload test daily Parquet files for 3 days to the mocked S3 bucket. - """ - start_date = "2022-02-02" - end_date = "2022-02-04" - assets = ["A", "B", "C", "D", "E", "F"] - asset_col_name = "asset" - test_dir = self.get_scratch_space() - partition_mode = "by_year_month" - custom_partition_cols = "asset,year,month" - hparque.generate_parquet_files( - start_date, - end_date, - assets, - asset_col_name, - test_dir, - partition_mode=partition_mode, - custom_partition_cols=custom_partition_cols, - ) - s3fs_ = hs3.get_s3fs(self.mock_aws_profile) - s3_bucket = f"s3://{self.bucket_name}" - s3fs_.put(test_dir, s3_bucket, recursive=True) - return s3fs_ - - @pytest.mark.slow("~7 seconds.") - def test_list_and_merge_pq_files(self) -> None: - """ - Check if predefined generated Parquet files are properly merged. - """ - s3fs_ = self.generate_test_data() - # Prepare common `hs3.listdir` params. - s3_bucket = f"s3://{self.bucket_name}" - pattern = "*.parquet" - only_files = True - use_relative_paths = True - # Check bucket content before merge. - parquet_path_list_before = hs3.listdir( - s3_bucket, - pattern, - only_files, - use_relative_paths, - aws_profile=s3fs_, - ) - self.assertEqual(len(parquet_path_list_before), 6) - # Add extra parquet files and rename existing one. - # e.g., `dummy.parquet`, `dummy_new.parquet`. - # Every second file is left intact to replicate ready out-of-the-box folder. - # e.g., `asset=A/year=2022/month=2/77a2534aaf9649fab6511cea53a6bf7f-0.parquet`. - for path in parquet_path_list_before[::2]: - original_path = f"{s3_bucket}/{path}" - original_file_name = os.path.basename(original_path) - renamed_path = original_path.replace( - original_file_name, "dummy.parquet" - ) - additional_path = original_path.replace( - original_file_name, "dummy_new.parquet" - ) - s3fs_.rename(original_path, renamed_path) - s3fs_.copy(renamed_path, additional_path) - # Check if edits are in place. - updated_parquet_path_list = hs3.listdir( - s3_bucket, - pattern, - only_files, - use_relative_paths, - aws_profile=s3fs_, - ) - data_parquet_path_list = [ - path for path in updated_parquet_path_list if "dummy" not in path - ] - self.assertEqual(len(updated_parquet_path_list), 9) - self.assertEqual(len(data_parquet_path_list), 3) - # Check bucket content after merge. - hparque.list_and_merge_pq_files(self.bucket_name, aws_profile=s3fs_) - parquet_path_list_after = hs3.listdir( - s3_bucket, - pattern, - only_files, - use_relative_paths, - aws_profile=s3fs_, - ) - parquet_path_list_after.sort() - expected_list = [ - "tmp.scratch/asset=A/year=2022/month=2/data.parquet", - "tmp.scratch/asset=B/year=2022/month=2/data.parquet", - "tmp.scratch/asset=C/year=2022/month=2/data.parquet", - "tmp.scratch/asset=D/year=2022/month=2/data.parquet", - "tmp.scratch/asset=E/year=2022/month=2/data.parquet", - "tmp.scratch/asset=F/year=2022/month=2/data.parquet", - ] - self.assertListEqual(parquet_path_list_after, expected_list) - - def test_list_and_merge_pq_files_duplicate_drop(self) -> None: - # Prepare test data. - test_data = { - "dummy_value_1": [1, 1, 1], - "dummy_value_2": ["A", "A", "A"], - "knowledge_timestamp": [1, 2, 3], - "end_download_timestamp": [3, 2, 1], - } - df = pd.DataFrame(data=test_data) - # Save test data to s3 bucket. - s3fs_ = hs3.get_s3fs(self.mock_aws_profile) - s3_bucket = f"s3://{self.bucket_name}" - original_sample_path = f"{s3_bucket}/dummy/data.parquet" - dummy_sample_path = original_sample_path.replace( - "data.parquet", "dummy.parquet" - ) - hparque.to_parquet(df, dummy_sample_path, aws_profile=s3fs_) - # Check if new columns are in place. - df = hparque.from_parquet(dummy_sample_path, aws_profile=s3fs_) - self.assertIn("knowledge_timestamp", df.columns) - self.assertIn("end_download_timestamp", df.columns) - self.assertEqual(len(df), 3) - # Check if duplicates are dropped after merge. - hparque.list_and_merge_pq_files(self.bucket_name, aws_profile=s3fs_) - df = hparque.from_parquet(original_sample_path, aws_profile=s3fs_) - self.assertEqual(len(df), 1) - - -# ############################################################################# - - -# ############################################################################# -# TestListAndMergePqFilesMixedUnits -# ############################################################################# - - -class TestListAndMergePqFilesMixedUnits(hunitest.TestCase): - def _list_and_merge_mixed_units_pq_files( - self, first_unit: str, second_unit: str - ) -> None: - """ - Run `list_and_merge_pq_files` with different time units in the same - column and index. - - :param first_unit: first time unit. - :param second_unit: second time unit. - """ - # Prepare test data. - dst_dir = os.path.join(self.get_scratch_space(), "tmp.list_and_merge") - first_file_name = os.path.join(dst_dir, "tmp.1first.parquet") - second_file_name = os.path.join(dst_dir, "tmp.2second.parquet") - merged_file_name = os.path.join(dst_dir, "tmp.merged.parquet") - # Write first DF with the `first_unit`. - initial_df = _get_test_df_with_timestamps() - first_df = initial_df.copy() - first_df.index = first_df.index.as_unit(first_unit) - first_df["knowledge_timestamp"] = first_df["knowledge_timestamp"].astype( - f"datetime64[{first_unit}, UTC]" - ) - hparque.to_parquet(first_df, first_file_name) - # Write second DF with the `second_unit`. - second_df = initial_df.copy() - second_df.index = second_df.index.as_unit(second_unit) - second_df["knowledge_timestamp"] = second_df[ - "knowledge_timestamp" - ].astype(f"datetime64[{second_unit}, UTC]") - hparque.to_parquet(second_df, second_file_name) - # List and merge. - hparque.list_and_merge_pq_files(dst_dir, file_name="tmp.merged.parquet") - # Read it back. - _ = hparque.from_parquet(merged_file_name) - - def test_parquet_files_with_mixed_time_units_1(self) -> None: - """ - Test merging Parquet files with the `ns` and `us`. - """ - first_unit = "ns" - second_unit = "us" - self._list_and_merge_mixed_units_pq_files(first_unit, second_unit) - - # TODO(Nina): @Samarth fix the test. - @pytest.mark.skip(reason="Broken.") - def test_parquet_files_with_mixed_time_units_2(self) -> None: - """ - Test merging Parquet files with the `ms` and `ns`. - - It should raise an error. See CmampTask7331 for details. - - The test will not raise an asserion when the time units is `ms` and - `us`. The reason is that we do not lose data when converting from - the first time unit, which is `ms`, to the second time unit, which - is `us`, transitioning from low resolution to high resolution. - """ - first_unit = "us" - second_unit = "ms" - with self.assertRaises(pyarrow.lib.ArrowInvalid): - self._list_and_merge_mixed_units_pq_files(first_unit, second_unit) - - -# ############################################################################# - - -# ############################################################################# -# TestYieldParquetTiles -# ############################################################################# - - -class TestYieldParquetTiles(hunitest.TestCase): - def generate_test_data(self) -> None: - """ - Generate test data and write it to a scratch dir. - - Data has the following structure: - - ``` - asset_id ... year month - end_ts - 2021-11-01 100 2021 11 - 2021-11-01 200 2021 11 - 2021-11-01 300 2021 11 - ... - 2022-02-01 200 2022 2 - 2022-02-01 300 2022 2 - 2022-02-01 400 2022 2 - ``` - """ - # Generate synthetic data. - asset_ids = [100, 200, 300, 400] - prices = list(range(1, 17)) - volatility = list(range(17, 33)) - dates = ["2021-11-01", "2021-12-01", "2022-01-01", "2022-02-01"] - dates = map(pd.Timestamp, dates) - index_ = [dates, asset_ids] - multi_index = pd.MultiIndex.from_product( - index_, names=["end_ts", "asset_id"] - ) - df = pd.DataFrame( - {"price": prices, "volatility": volatility}, index=multi_index - ) - df["year"] = df.index.get_level_values(0).year - df["month"] = df.index.get_level_values(0).month - df = df.reset_index(level=1) - _LOG.debug("Test data: df=\n%s", hpandas.df_to_str(df)) - # Write the data to a scratch dir. - partition_columns = ["asset_id", "year", "month"] - dst_dir = self.get_scratch_space() - hparque.to_partitioned_parquet(df, partition_columns, dst_dir) - - def test_yield_tiles_by_asset(self) -> None: - """ - Test reading only certain asset ids. - """ - self.generate_test_data() - # Read data. - file_name = self.get_scratch_space() - asset_ids = [100, 200] - asset_id_col = "asset_id" - asset_batch_size = 1 - columns = [asset_id_col, "price"] - generator_ = hparque.yield_parquet_tiles_by_assets( - file_name, asset_ids, asset_id_col, asset_batch_size, columns - ) - df = pd.concat(generator_) - _LOG.debug("Filtered data: df=\n%s", hpandas.df_to_str(df)) - # Check asset ids filtering. - actual = str(asset_ids) - expected = str(df[asset_id_col].unique().tolist()) - self.assert_equal(actual, expected) - - def test_yield_tiles_by_year(self) -> None: - """ - Test reading only certain asset ids and dates. - """ - self.generate_test_data() - # Read data. - file_name = self.get_scratch_space() - start_year = 2021 - start_month = 12 - start_date = datetime.date(start_year, start_month, 1) - end_year = 2022 - end_month = 1 - end_date = datetime.date(end_year, end_month, 2) - asset_ids = [300, 400] - asset_id_col = "asset_id" - columns = [asset_id_col, "price"] - generator_ = hparque.yield_parquet_tiles_by_year( - file_name, - start_date, - end_date, - columns, - asset_ids=asset_ids, - asset_id_col=asset_id_col, - ) - df = pd.concat(generator_) - _LOG.debug("Filtered data: df=\n%s", hpandas.df_to_str(df)) - # Check asset ids filtering. - actual = str(asset_ids) - expected = str(df[asset_id_col].unique().tolist()) - self.assert_equal(actual, expected) - # Check start date filtering. - min_date = df.index.min() - self.assertEqual(min_date.month, start_month) - self.assertEqual(min_date.year, start_year) - # Check end date filtering. - max_date = df.index.max() - self.assertEqual(max_date.month, end_month) - self.assertEqual(max_date.year, end_year) - - -# ############################################################################# - - -# ############################################################################# -# TestBuildFilterWithOnlyEqualities -# ############################################################################# - - -class TestBuildFilterWithOnlyEqualities(hunitest.TestCase): - def test_year_month_day_equality(self) -> None: - """ - Test interval with same year, month and day. - """ - start_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-02 21:31:00+00:00") - filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) - actual = str(filters) - expected = ( - r"[('year', '==', 2022), ('month', '==', 12), ('day', '==', 2)]" - ) - self.assert_equal(actual, expected) - - def test_year_month_equality(self) -> None: - """ - Test interval with same year and month. - """ - start_ts = pd.Timestamp("2022-12-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-28 21:31:00+00:00") - filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) - actual = str(filters) - expected = r"[('year', '==', 2022), ('month', '==', 12)]" - self.assert_equal(actual, expected) - - def test_year_equality(self) -> None: - """ - Test interval with same year. - """ - start_ts = pd.Timestamp("2022-10-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-12-02 21:31:00+00:00") - filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) - actual = str(filters) - expected = r"[('year', '==', 2022)]" - self.assert_equal(actual, expected) - - def test_no_equality(self) -> None: - """ - Test interval with different start and end years. - """ - start_ts = pd.Timestamp("2021-10-02 09:31:00+00:00") - end_ts = pd.Timestamp("2022-10-02 21:31:00+00:00") - filters = hparque.build_filter_with_only_equalities(start_ts, end_ts) - actual = str(filters) - expected = r"[]" - self.assert_equal(actual, expected) - - -# ############################################################################# - - -# ############################################################################# -# TestPartitionedParquet2 -# ############################################################################# - - -class TestPartitionedParquet2(hunitest.TestCase): - """ - Test case for writing and reading partitioned Parquet datasets with mixed - timestamp formats. - """ - - def _get_test_df(self) -> pd.DataFrame: - """ - Create a DataFrame with timestamps. - """ - # Mock the get_current_time method. - timestamp = pd.Timestamp("2024-05-20 00:00:00", tz="UTC") - index = [timestamp for _ in range(4)] - df = pd.DataFrame( - { - "bids": [200, 123, 263, 167], - "asks": [150, 120, 240, 150], - "symbol": ["BTC_USDT" for _ in range(4)], - }, - index=index, - ) - end_download_timestamp = "2024-06-04 20:38:43.467599+00:00" - df["end_download_timestamp"] = end_download_timestamp - return df - - def _run_write_and_read_mixed_timestamp_partitioned_dataset(self) -> None: - """ - Write two DataFrames with different timestamp formats to a partitioned - Parquet dataset and read it back. - """ - initial_df = self._get_test_df() - partition_columns = ["bids", "asks", "symbol"] - dst_dir = os.path.join(self.get_scratch_space(), "tmp.pp_mixed_units") - # Write first DF as partitioned parquet. - first_df = initial_df.copy() - hparque.to_partitioned_parquet(first_df, partition_columns, dst_dir) - # Write second DF as partitioned parquet. - second_df = initial_df.copy() - second_df["end_download_timestamp"] = pd.to_datetime( - second_df["end_download_timestamp"] - ) - hparque.to_partitioned_parquet(second_df, partition_columns, dst_dir) - # Read it back and verify the output. - combined_df = hparque.from_parquet(dst_dir) - combined_df["end_download_timestamp"] = pd.to_datetime( - combined_df["end_download_timestamp"] - ).dt.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") - actual = hpandas.df_to_str(combined_df) - expected = r""" - end_download_timestamp bids asks symbol - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 123 120 BTC_USDT - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 123 120 BTC_USDT - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 167 150 BTC_USDT - ... - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 200 150 BTC_USDT - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 263 240 BTC_USDT - 2024-05-20 00:00:00+00:00 2024-06-04 20:38:43.467599+00:00 263 240 BTC_USDT - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Test writing and reading a partitioned Parquet dataset with mixed - timestamp formats. - """ - self._run_write_and_read_mixed_timestamp_partitioned_dataset() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py deleted file mode 100644 index 8e65eeb2e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hparser.py +++ /dev/null @@ -1,398 +0,0 @@ -import argparse -import os - -import helpers.hio as hio -import helpers.hparser as hparser -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestParseLimitRange -# ############################################################################# - - -class TestParseLimitRange(hunitest.TestCase): - def test_parse_limit_range_valid1(self) -> None: - """ - Test parsing valid range format. - """ - limit_str = "1:5" - expected = (1, 5) - actual = hparser.parse_limit_range(limit_str) - self.assertEqual(actual, expected) - - def test_parse_limit_range_valid2(self) -> None: - """ - Test parsing valid range format with same start and end. - """ - limit_str = "3:3" - expected = (3, 3) - actual = hparser.parse_limit_range(limit_str) - self.assertEqual(actual, expected) - - def test_parse_limit_range_valid3(self) -> None: - """ - Test parsing valid range format with larger numbers. - """ - limit_str = "10:100" - expected = (10, 100) - actual = hparser.parse_limit_range(limit_str) - self.assertEqual(actual, expected) - - def test_parse_limit_range_no_colon(self) -> None: - """ - Test that missing colon raises assertion error. - """ - limit_str = "15" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_multiple_colons(self) -> None: - """ - Test that multiple colons raise assertion error. - """ - limit_str = "1:2:3" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_invalid_start(self) -> None: - """ - Test that non-integer start raises fatal error. - """ - limit_str = "abc:5" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_invalid_end(self) -> None: - """ - Test that non-integer end raises fatal error. - """ - limit_str = "1:xyz" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_start_zero(self) -> None: - """ - Test that start index of 0 raises assertion error. - """ - limit_str = "0:5" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_end_zero(self) -> None: - """ - Test that end index of 0 raises assertion error. - """ - limit_str = "1:0" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - def test_parse_limit_range_start_greater_than_end(self) -> None: - """ - Test that start greater than end raises assertion error. - """ - limit_str = "5:3" - with self.assertRaises(AssertionError): - hparser.parse_limit_range(limit_str) - - -# ############################################################################# -# TestApplyLimitRange -# ############################################################################# - - -class TestApplyLimitRange(hunitest.TestCase): - def test_apply_limit_range_no_limit(self) -> None: - """ - Test that None limit range returns original items. - """ - items = ["a", "b", "c", "d", "e"] - actual = hparser.apply_limit_range(items, None) - self.assertEqual(actual, items) - - def test_apply_limit_range_valid_range(self) -> None: - """ - Test applying valid range to items. - """ - items = ["a", "b", "c", "d", "e"] - limit_range = (1, 3) - expected = ["b", "c", "d"] # 0-indexed, inclusive - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - def test_apply_limit_range_single_item(self) -> None: - """ - Test applying range that selects single item. - """ - items = ["a", "b", "c", "d", "e"] - limit_range = (2, 2) - expected = ["c"] - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - def test_apply_limit_range_first_item(self) -> None: - """ - Test applying range starting from first item. - """ - items = ["a", "b", "c", "d", "e"] - limit_range = (0, 1) - expected = ["a", "b"] - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - def test_apply_limit_range_last_item(self) -> None: - """ - Test applying range ending at last item. - """ - items = ["a", "b", "c", "d", "e"] - limit_range = (3, 4) - expected = ["d", "e"] - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - def test_apply_limit_range_start_exceeds_length(self) -> None: - """ - Test that start index exceeding items length raises assertion error. - """ - items = ["a", "b", "c"] - limit_range = (5, 6) - with self.assertRaises(AssertionError): - hparser.apply_limit_range(items, limit_range) - - def test_apply_limit_range_end_exceeds_length(self) -> None: - """ - Test that end index exceeding items length raises assertion error. - """ - items = ["a", "b", "c"] - limit_range = (1, 5) - with self.assertRaises(AssertionError): - hparser.apply_limit_range(items, limit_range) - - def test_apply_limit_range_custom_item_name(self) -> None: - """ - Test that custom item name doesn't affect functionality. - """ - items = [1, 2, 3, 4, 5] - limit_range = (0, 2) - expected = [1, 2, 3] - actual = hparser.apply_limit_range( - items, limit_range, item_name="numbers" - ) - self.assertEqual(actual, expected) - - def test_apply_limit_range_empty_list(self) -> None: - """ - Test applying limit range to empty list. - """ - items = [] - limit_range = (0, 1) - with self.assertRaises(AssertionError): - hparser.apply_limit_range(items, limit_range) - - def test_apply_limit_range_complex_objects(self) -> None: - """ - Test applying limit range to complex objects. - """ - items = [{"id": i, "value": f"item{i}"} for i in range(10)] - limit_range = (2, 4) - expected = [ - {"id": 2, "value": "item2"}, - {"id": 3, "value": "item3"}, - {"id": 4, "value": "item4"}, - ] - actual = hparser.apply_limit_range(items, limit_range) - self.assertEqual(actual, expected) - - -# ############################################################################# -# Test_add_multi_file_args -# ############################################################################# - - -class Test_add_multi_file_args(hunitest.TestCase): - def test_adds_correct_arguments(self) -> None: - """ - Test that add_multi_file_args adds the correct arguments to parser. - """ - # Prepare inputs. - parser = argparse.ArgumentParser() - # Run function. - hparser.add_multi_file_args(parser) - # Check that the arguments were added. - namespace = parser.parse_args([]) - self.assertTrue(hasattr(namespace, "files")) - self.assertTrue(hasattr(namespace, "from_files")) - self.assertTrue(hasattr(namespace, "input")) - - -# ############################################################################# -# Test_parse_multi_file_args -# ############################################################################# - - -class Test_parse_multi_file_args(hunitest.TestCase): - # Helper method. - def _create_test_file(self, file_path: str, content: str = "test") -> None: - """ - Create a test file with given content. - """ - hio.create_dir(os.path.dirname(file_path), incremental=True) - hio.to_file(file_path, content) - - def test_files_comma_separated(self) -> None: - """ - Test parsing comma-separated file list. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - file1 = f"{scratch_dir}/file1.txt" - file2 = f"{scratch_dir}/file2.txt" - file3 = f"{scratch_dir}/file3.txt" - self._create_test_file(file1) - self._create_test_file(file2) - self._create_test_file(file3) - # Create namespace with files argument. - args = argparse.Namespace() - args.files = f"{file1},{file2},{file3}" - args.from_files = None - args.input = None - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1, file2, file3] - self.assert_equal(str(actual), str(expected)) - - def test_from_files(self) -> None: - """ - Test parsing file containing list of files. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - file1 = f"{scratch_dir}/file1.txt" - file2 = f"{scratch_dir}/file2.txt" - file3 = f"{scratch_dir}/file3.txt" - self._create_test_file(file1) - self._create_test_file(file2) - self._create_test_file(file3) - # Create file list. - list_file = f"{scratch_dir}/list.txt" - content = f"{file1}\n{file2}\n{file3}\n" - self._create_test_file(list_file, content) - # Create namespace with from_files argument. - args = argparse.Namespace() - args.files = None - args.from_files = list_file - args.input = None - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1, file2, file3] - self.assert_equal(str(actual), str(expected)) - - def test_from_files_with_empty_lines(self) -> None: - """ - Test parsing file with empty lines and comments. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - file1 = f"{scratch_dir}/file1.txt" - file2 = f"{scratch_dir}/file2.txt" - self._create_test_file(file1) - self._create_test_file(file2) - # Create file list with empty lines and comments. - list_file = f"{scratch_dir}/list.txt" - content = f""" - # This is a comment - {file1} - - # Another comment - {file2} - - """ - self._create_test_file(list_file, content) - # Create namespace with from_files argument. - args = argparse.Namespace() - args.files = None - args.from_files = list_file - args.input = None - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1, file2] - self.assert_equal(str(actual), str(expected)) - - def test_input_multiple(self) -> None: - """ - Test parsing repeated --input arguments. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test files. - file1 = f"{scratch_dir}/file1.txt" - file2 = f"{scratch_dir}/file2.txt" - self._create_test_file(file1) - self._create_test_file(file2) - # Create namespace with input argument. - args = argparse.Namespace() - args.files = None - args.from_files = None - args.input = [file1, file2] - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1, file2] - self.assert_equal(str(actual), str(expected)) - - def test_backward_compatibility_single_file(self) -> None: - """ - Test that single -i/--input still works. - """ - # Prepare inputs. - scratch_dir = self.get_scratch_space() - # Create test file. - file1 = f"{scratch_dir}/file1.txt" - self._create_test_file(file1) - # Create namespace with input argument (single file, not list). - args = argparse.Namespace() - args.files = None - args.from_files = None - args.input = file1 # Single file as string, not list - # Run function. - actual = hparser.parse_multi_file_args(args) - # Check outputs. - expected = [file1] - self.assert_equal(str(actual), str(expected)) - - def test_file_validation(self) -> None: - """ - Test that non-existent files raise error. - """ - # Create namespace with non-existent file. - args = argparse.Namespace() - args.files = "/nonexistent/file1.txt,/nonexistent/file2.txt" - args.from_files = None - args.input = None - # Run function and check that it raises error. - with self.assertRaises(AssertionError): - hparser.parse_multi_file_args(args) - - def test_empty_file_list(self) -> None: - """ - Test empty file list handling. - """ - # Prepare inputs. - - # Create namespace with no files. - args = argparse.Namespace() - args.files = None - args.from_files = None - args.input = None - # Run function and check that it raises error. - with self.assertRaises(AssertionError) as cm: - hparser.parse_multi_file_args(args) - # Check the error message. - act = str(cm.exception) - self.assertIn("No input files specified", act) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py deleted file mode 100644 index 8064ddbe1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpickle.py +++ /dev/null @@ -1,97 +0,0 @@ -import logging - -import helpers.hpickle as hpickle -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestToPickleable -# ############################################################################# - - -class TestToPickleable(hunitest.TestCase): - def test_list1(self) -> None: - """ - Test that a list is converted to a pickleable correctly. - - force_values_to_string = False - """ - _obj = [1, "2", [3, 0.4], (5, None)] - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = [1, "2", [3, 0.4], (5, None)] - self.assertEqual(actual, expected) - - def test_list2(self) -> None: - """ - Test that a list is converted to a pickleable correctly. - - force_values_to_string = True - """ - _obj = [1, "2", [3, 0.4], (5, None)] - force_values_to_string = True - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = ["1", "2", ["3", "0.4"], ("5", "None")] - self.assertEqual(actual, expected) - - def test_tuple1(self) -> None: - """ - Test that a tuple is converted to a pickleable correctly. - - force_values_to_string = False - """ - _obj = (1, "2", [3, 0.4], (5, None)) - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = (1, "2", [3, 0.4], (5, None)) - self.assertEqual(actual, expected) - - def test_dict1(self) -> None: - """ - Test that a dict is converted to a pickleable correctly. - - force_values_to_string = False - """ - _obj = {"a": 1, 2: ["b", 3], "c": {0.4: None}} - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = {"a": 1, 2: ["b", 3], "c": {0.4: None}} - self.assertEqual(actual, expected) - - def test_iterable1(self) -> None: - """ - Test that an iterable is converted to a pickleable correctly. - - force_values_to_string = False - """ - _obj = {1, 2, 3} - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = [1, 2, 3] - self.assertEqual(actual, expected) - - def test_unpickleable1(self) -> None: - """ - Test that an unpickleable object is converted to a string. - - force_values_to_string = False - """ - _obj = lambda x: x - force_values_to_string = False - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = ". at 0x>" - self.assert_equal(actual, expected, purify_text=True) - - def test_unpickleable2(self) -> None: - """ - Test that an unpickleable object is converted to a string. - - force_values_to_string = True - """ - _obj = lambda x: x - force_values_to_string = True - actual = hpickle.to_pickleable(_obj, force_values_to_string) - expected = ". at 0x>" - self.assert_equal(actual, expected, purify_text=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py deleted file mode 100644 index a829ea82f..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hplayback.py +++ /dev/null @@ -1,506 +0,0 @@ -import datetime -import logging -import os -from typing import Any, Optional - -import pandas as pd -import pytest - -import config_root.config as cconfig -import helpers.hio as hio -import helpers.hplayback as hplayba -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestJsonRoundtrip1 -# ############################################################################# - - -class TestJsonRoundtrip1(hunitest.TestCase): - """ - Test roundtrip conversion through jsonpickle for different types. - """ - - def test1(self) -> None: - obj = 3 - # - hplayba.round_trip_convert(obj, logging.DEBUG) - - def test2(self) -> None: - obj = "hello" - # - hplayba.round_trip_convert(obj, logging.DEBUG) - - def test3(self) -> None: - data = { - "Product": ["Desktop Computer", "Tablet", "iPhone", "Laptop"], - "Price": [700, 250, 800, 1200], - } - df = pd.DataFrame(data, columns=["Product", "Price"]) - df.index.name = "hello" - # - obj = df - hplayba.round_trip_convert(obj, logging.DEBUG) - - def test4(self) -> None: - obj = datetime.date(2015, 1, 1) - # - hplayba.round_trip_convert(obj, logging.DEBUG) - - -# ############################################################################# -# TestPlaybackInputOutput1 -# ############################################################################# - - -class TestPlaybackInputOutput1(hunitest.TestCase): - """ - Freeze the output of Playback. - """ - - def helper(self, mode: str, *args: Any, **kwargs: Any) -> None: - # TODO(gp): Factor out the common code. - # Define a function to generate a unit test for. - def get_result_assert_equal(a: Any, b: Any) -> Any: - p = hplayba.Playback("assert_equal") - if isinstance(a, datetime.date) and isinstance(b, datetime.date): - return p.run(abs(a - b)) - if isinstance(a, dict) and isinstance(b, dict): - c = {} - c.update(a) - c.update(b) - return p.run(c) - if isinstance(a, cconfig.Config) and isinstance(b, cconfig.Config): - c = cconfig.Config(update_mode="overwrite") - c.update(a) - c.update(b) - return p.run(c) - return p.run(a + b) - - def get_result_check_string(a: Any, b: Any) -> Any: - p = hplayba.Playback("check_string") - if isinstance(a, datetime.date) and isinstance(b, datetime.date): - return p.run(abs(a - b)) - if isinstance(a, dict) and isinstance(b, dict): - c = {} - c.update(a) - c.update(b) - return p.run(c) - if isinstance(a, cconfig.Config) and isinstance(b, cconfig.Config): - c = cconfig.Config(update_mode="overwrite") - c.update(a) - c.update(b) - return p.run(c) - return p.run(a + b) - - def get_result_assert_equal_none() -> Any: - p = hplayba.Playback("assert_equal") - return p.run("Some string.") - - def get_result_check_string_none() -> Any: - p = hplayba.Playback("check_string") - return p.run("Some string") - - if mode == "assert_equal": - if not args and not kwargs: - code = get_result_assert_equal_none() - else: - code = get_result_assert_equal(*args, **kwargs) - elif mode == "check_string": - if not args and not kwargs: - code = get_result_check_string_none() - else: - code = get_result_check_string(*args, **kwargs) - else: - raise ValueError("Invalid mode ") - self.check_string(code, purify_text=True) - _LOG.debug("Testing code:\n%s", code) - exec(code, locals()) # pylint: disable=exec-used - - def test1(self) -> None: - """ - Test for int inputs. - """ - # Create inputs. - a = 3 - b = 2 - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test2(self) -> None: - """ - Test for string inputs. - """ - # Create inputs. - a = "test" - b = "case" - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test3(self) -> None: - """ - Test for list inputs. - """ - # Create inputs. - a = [1, 2, 3] - b = [4, 5, 6] - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test4(self) -> None: - """ - Test for dict inputs. - """ - # Create inputs. - a = {"1": 2} - b = {"3": 4} - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test5(self) -> None: - """ - Test for pd.DataFrame inputs. - """ - # Create inputs. - a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) - b = pd.DataFrame({"Price": [1, 1, 1, 1]}) - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test6(self) -> None: - """ - Test for datetime.date inputs (using `jsonpickle`). - """ - # Create inputs. - a = datetime.date(2015, 1, 1) - b = datetime.date(2012, 1, 1) - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test7(self) -> None: - """ - Test for int inputs with check_string. - """ - # Create inputs. - a = 3 - b = 2 - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test8(self) -> None: - """ - Test for string inputs with check_string. - """ - # Create inputs. - a = "test" - b = "case" - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test9(self) -> None: - """ - Test for list inputs with check_string. - """ - # Create inputs. - a = [1, 2, 3] - b = [4, 5, 6] - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test10(self) -> None: - """ - Test for dict inputs with check_string. - """ - # Create inputs. - a = {"1": 2} - b = {"3": 4} - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test11(self) -> None: - """ - Test for pd.DataFrame inputs with check_string. - """ - # Create inputs. - a = pd.DataFrame({"Price": [700, 250, 800, 1200]}) - b = pd.DataFrame({"Price": [1, 1, 1, 1]}) - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test12(self) -> None: - """ - Test for dict inputs with data structures recursion. - """ - # Create inputs. - a = {"1": ["a", 2]} - b = {"3": pd.DataFrame({"Price": [700, 250, 800, 1200]}), "4": {"5": 6}} - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test13(self) -> None: - """ - Test for pd.Series inputs with check_string. - """ - # Create inputs. - a = pd.Series([10, 20, 15], name="N Numbers") - b = pd.Series([10.0, 0.0, 5.5], name="Z Numbers") - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test14(self) -> None: - """ - Test for pd.Series inputs with assert_equal. - """ - # Create inputs. - a = pd.Series([10, 20, 15], name="N Numbers") - b = pd.Series([10.0, 0.0, 5.5], name="Z Numbers") - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test15(self) -> None: - """ - Test for cconfig.Config inputs with check_string. - """ - # Create inputs. - a = cconfig.Config([("meta", "meta value 1"), ("list", [1, 2])]) - b = cconfig.Config([("meta", "meta value 2")]) - # Generate, freeze and execute a unit test. - self.helper("check_string", a=a, b=b) - - def test16(self) -> None: - """ - Test for cconfig.Config inputs with assert_equal. - """ - # Create inputs. - a = cconfig.Config([("meta", "meta value 1"), ("list", [1, 2])]) - b = cconfig.Config([("meta", "meta value 2")]) - # Generate, freeze and execute a unit test. - self.helper("assert_equal", a=a, b=b) - - def test17(self) -> None: - """ - Test if testing function has no args with check_string. - """ - self.helper("check_string") - - def test18(self) -> None: - """ - Test if testing function has no args with assert_equal. - """ - self.helper("assert_equal") - - -# ############################################################################# -# TestToPythonCode1 -# ############################################################################# - - -class TestToPythonCode1(hunitest.TestCase): - """ - Test to_python_code() for different types. - """ - - def _check(self, input_obj: Any, expected: str) -> None: - res = hplayba.to_python_code(input_obj) - self.assert_equal(res, expected) - - def test_float1(self) -> None: - """ - Test float without first zero. - """ - self._check(0.1, "0.1") - - def test_float2(self) -> None: - """ - Test positive float. - """ - self._check(1.0, "1.0") - - def test_float3(self) -> None: - """ - Test negative float. - """ - self._check(-1.1, "-1.1") - - def test_int1(self) -> None: - """ - Test zero. - """ - self._check(0, "0") - - def test_int2(self) -> None: - """ - Test positive int. - """ - self._check(10, "10") - - def test_int3(self) -> None: - """ - Test negative int. - """ - self._check(-10, "-10") - - def test_str1(self) -> None: - """ - Test str simple. - """ - self._check("a", '"a"') - - def test_str2(self) -> None: - """ - Test str with double quotes. - """ - self._check('"b"', '"\\"b\\""') - - def test_str3(self) -> None: - """ - Test str with single quotes. - """ - self._check("'c'", "\"'c'\"") - - def test_list1(self) -> None: - """ - Test List. - """ - self._check([1, 0.2, "3"], '[1, 0.2, "3"]') - - def test_dict1(self) -> None: - """ - Test Dist. - """ - self._check({"a": 0.2, 3: "b"}, '{"a": 0.2, 3: "b"}') - - def test_df1(self) -> None: - """ - Test pd.DataFrame (single quotes expected in field names) - """ - self._check( - pd.DataFrame.from_dict({"a": [0.2, 0.1]}), - "pd.DataFrame.from_dict({'a': [0.2, 0.1]})", - ) - - def test_dataseries1(self) -> None: - """ - Test pd.Series. - """ - self._check( - pd.Series([0.2, 0.1], name="a"), - "pd.Series(data=[0.2, 0.1], index=RangeIndex(start=0, stop=2, step=1), " - 'name="a", dtype=float64)', - ) - - def test_config1(self) -> None: - """ - Test cconfig.Config. - """ - config = cconfig.Config() - config["var1"] = "val1" - config["var2"] = cconfig.Config([("var3", 10), ("var4", "val4")]) - self._check( - config, - "cconfig.Config.from_python(\"Config({'var1': 'val1', " - "'var2': Config({'var3': 10, 'var4': 'val4'})})\")", - ) - - -# ############################################################################# -# TestPlaybackFilePath1 -# ############################################################################# - - -class TestPlaybackFilePath1(hunitest.TestCase): - """ - Test file mode correctness. - """ - - def test1(self) -> None: - """ - Test writing to file when number of tests is more than generated (10). - """ - test_file = hplayba.Playback._get_test_file_name( - "./path/to/somewhere.py" - ) - self.assert_equal( - test_file, "./path/to/test/test_by_playback_somewhere.py" - ) - - -# ############################################################################# -# TestPlaybackFileMode1 -# ############################################################################# - - -class TestPlaybackFileMode1(hunitest.TestCase): - """ - Test file mode correctness. - """ - - def get_code(self, max_tests: Optional[int] = None) -> str: - """ - Return a code for executable file to run. - """ - max_tests_str = "" if max_tests is None else f", max_tests={max_tests}" - code = ( - "\n".join( - [ - "import helpers.hplayback as hplayba", - "def plbck_sum(a: int, b: int) -> int:", - ' hplayba.Playback("check_string", to_file=True%s).run(None)', - " return a + b", - "", - "[plbck_sum(i, i + 1) for i in range(4)]", - ] - ) - % max_tests_str - ) - return code - - def helper(self, max_tests: Optional[int] = None) -> Any: - """ - Return generated by playback code. - """ - # Get file paths. - tmp_dir = self.get_scratch_space() - # File with code. - code_basename = "code_.py" - tmp_py_file = os.path.join(tmp_dir, code_basename) - # File with test. - tmp_test_file = os.path.join( - tmp_dir, "test", "test_by_playback_" + code_basename - ) - # Save the code to the file. - hio.to_file(tmp_py_file, self.get_code(max_tests)) - # Executes the code. - hsystem.system(f"python {tmp_py_file}") - playback_code = hio.from_file(tmp_test_file) - return playback_code - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~10 seconds.") - def test1(self) -> None: - """ - Test writing to file when number of tests is more than generated. - """ - max_tests = 100 - self.check_string(self.helper(max_tests)) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~10 seconds.") - def test2(self) -> None: - """ - Test writing to file when number of tests is default. - """ - self.check_string(self.helper()) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~10 seconds.") - def test3(self) -> None: - """ - Test writing to file when number of tests is lower than generated. - """ - max_tests = 2 - self.check_string(self.helper(max_tests)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py deleted file mode 100644 index 395138e7a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hprint.py +++ /dev/null @@ -1,844 +0,0 @@ -import logging -import pprint -from typing import List - -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_printing1 -# ############################################################################# - - -class Test_printing1(hunitest.TestCase): - def test_color_highlight1(self) -> None: - for c in hprint._COLOR_MAP: - _LOG.debug(hprint.color_highlight(c, c)) - - -# ############################################################################# -# Test_to_str1 -# ############################################################################# - - -class Test_to_str1(hunitest.TestCase): - def test1(self) -> None: - x = 1 - # To disable linter complaints. - _ = x - actual = hprint.to_str("x") - expected = "x=1" - self.assertEqual(actual, expected) - - def test2(self) -> None: - x = "hello world" - # To disable linter complaints. - _ = x - actual = hprint.to_str("x") - expected = "x='hello world'" - self.assertEqual(actual, expected) - - def test3(self) -> None: - x = 2 - # To disable linter complaints. - _ = x - actual = hprint.to_str("x*2") - expected = "x*2=4" - self.assertEqual(actual, expected) - - def test4(self) -> None: - """ - Test printing multiple values separated by space. - """ - x = 1 - y = "hello" - # To disable linter complaints. - _ = x, y - actual = hprint.to_str("x y") - expected = "x=1, y='hello'" - self.assertEqual(actual, expected) - - def test5(self) -> None: - """ - Test printing multiple strings separated by space. - """ - x = "1" - y = "hello" - # To disable linter complaints. - _ = x, y - actual = hprint.to_str("x y") - expected = "x='1', y='hello'" - self.assertEqual(actual, expected) - - def test6(self) -> None: - """ - Test printing a list. - """ - x = [1, "hello", "world"] - # To disable linter complaints. - _ = x - actual = hprint.to_str("x") - expected = "x=[1, 'hello', 'world']" - self.assertEqual(actual, expected) - - -# ############################################################################# - - -def example_func1(x: int, y: str) -> str: - _ = x, y - ret = hprint.func_signature_to_str() - return ret # type: ignore[no-any-return] - - -def example_func2() -> str: - ret = hprint.func_signature_to_str() - return ret # type: ignore[no-any-return] - - -def example_func3(x: int, y: str) -> str: - _ = x, y - ret = hprint.func_signature_to_str("y") - return ret # type: ignore[no-any-return] - - -def example_func4(x: int, y: str, z: float) -> str: - _ = x, y, z - ret = hprint.func_signature_to_str("x z") - return ret # type: ignore[no-any-return] - - -def example_func5(x: int, y: str, z: float) -> str: - _ = x, y, z - ret = hprint.func_signature_to_str(["y", "z"]) - return ret # type: ignore[no-any-return] - - -# ############################################################################# -# Test_func_signature_to_str1 -# ############################################################################# - - -class Test_func_signature_to_str1(hunitest.TestCase): - def test1(self) -> None: - actual = example_func1(1, "hello") - expected = "# example_func1: x=1, y='hello'" - self.assert_equal(actual, expected) - - def test2(self) -> None: - actual = example_func2() - expected = "# example_func2:" - self.assert_equal(actual, expected) - - def test3(self) -> None: - actual = example_func3(1, "hello") - expected = "# example_func3: x=1" - self.assert_equal(actual, expected) - - def test4(self) -> None: - actual = example_func4(1, "hello", 3.14) - expected = "# example_func4: y='hello'" - self.assert_equal(actual, expected) - - def test5(self) -> None: - actual = example_func5(1, "hello", 3.14) - expected = "# example_func5: x=1" - self.assert_equal(actual, expected) - - -# ############################################################################# -# Test_log -# ############################################################################# - - -class Test_log(hunitest.TestCase): - def test2(self) -> None: - x = 1 - # To disable linter complaints. - _ = x - for verb in [logging.DEBUG, logging.INFO]: - hprint.log(_LOG, verb, "x") - - def test3(self) -> None: - x = 1 - y = "hello" - # To disable linter complaints. - _ = x, y - for verb in [logging.DEBUG, logging.INFO]: - hprint.log(_LOG, verb, "x y") - - def test4(self) -> None: - """ - The command: - - > pytest -k Test_log::test4 -o log_cli=true --dbg_verbosity DEBUG - - should print something like: - - DEBUG test_printing:printing.py:315 x=1, y='hello', z=['cruel', 'world'] - INFO test_printing:printing.py:315 x=1, y='hello', z=['cruel', 'world'] - """ - x = 1 - y = "hello" - z = ["cruel", "world"] - # To disable linter complaints. - _ = x, y, z - for verb in [logging.DEBUG, logging.INFO]: - hprint.log(_LOG, verb, "x y z") - - -# ############################################################################# -# Test_sort_dictionary -# ############################################################################# - - -class Test_sort_dictionary(hunitest.TestCase): - def test1(self) -> None: - dict_ = { - "tool": { - "poetry": { - "name": "lm", - "version": "0.1.0", - "description": "", - "authors": [""], - "dependencies": { - "awscli": "*", - "boto3": "*", - "flaky": "*", - "fsspec": "*", - "gluonts": "*", - "invoke": "*", - "jupyter": "*", - "matplotlib": "*", - "mxnet": "*", - "networkx": "*", - "pandas": "^1.1.0", - "psycopg2": "*", - "pyarrow": "*", - "pytest": "^6.0.0", - "pytest-cov": "*", - "pytest-instafail": "*", - "pytest-xdist": "*", - "python": "^3.7", - "pywavelets": "*", - "s3fs": "*", - "seaborn": "*", - "sklearn": "*", - "statsmodels": "*", - "bs4": "*", - "jsonpickle": "*", - "lxml": "*", - "tqdm": "*", - "requests": "*", - }, - "dev-dependencies": {}, - } - }, - "build-system": { - "requires": ["poetry>=0.12"], - "build-backend": "poetry.masonry.api", - }, - } - actual = hprint.sort_dictionary(dict_) - self.check_string(pprint.pformat(actual)) - - -# ############################################################################# -# Test_indent1 -# ############################################################################# - - -class Test_indent1(hunitest.TestCase): - def test1(self) -> None: - txt = """foo - -klass TestHelloWorld(hunitest.TestCase): - bar -""" - num_spaces = 2 - actual = hprint.indent(txt, num_spaces=num_spaces) - expected = """ foo - - klass TestHelloWorld(hunitest.TestCase): - bar -""" - self.assert_equal(actual, expected, fuzzy_match=False) - - -# ############################################################################# -# Test_dedent1 -# ############################################################################# - - -class Test_dedent1(hunitest.TestCase): - def test1(self) -> None: - txt = """ - foo - - klass TestHelloWorld(hunitest.TestCase): - bar -""" - actual = hprint.dedent(txt) - expected = """foo - -klass TestHelloWorld(hunitest.TestCase): - bar""" - self.assert_equal(actual, expected, fuzzy_match=False) - - def test2(self) -> None: - txt = r""" - read_data: - file_name: foo_bar.txt - nrows: 999 - single_val: hello - zscore: - style: gaz - com: 28""" - actual = hprint.dedent(txt) - expected = """read_data: - file_name: foo_bar.txt - nrows: 999 -single_val: hello -zscore: - style: gaz - com: 28""" - self.assert_equal(actual, expected, fuzzy_match=False) - - def test_roundtrip1(self) -> None: - """ - Verify that `indent` and `dedent` are inverse of each other. - """ - txt1 = """foo - - -# ############################################################################# -# TestHelloWorld -# ############################################################################# - - -class TestHelloWorld(hunitest.TestCase): - bar""" - num_spaces = 3 - txt2 = hprint.indent(txt1, num_spaces=num_spaces) - txt3 = hprint.dedent(txt2) - self.assert_equal(txt1, txt3, fuzzy_match=False) - - -# ############################################################################# -# Test_align_on_left1 -# ############################################################################# - - -class Test_align_on_left1(hunitest.TestCase): - def test1(self) -> None: - txt = """foo - -klass TestHelloWorld(hunitest.TestCase): - bar -""" - actual = hprint.align_on_left(txt) - expected = """foo - -klass TestHelloWorld(hunitest.TestCase): -bar -""" - self.assert_equal(actual, expected, fuzzy_match=False) - - -# ############################################################################# -# Test_logging1 -# ############################################################################# - - -class Test_logging1(hunitest.TestCase): - def test_log_frame1(self) -> None: - hprint.log_frame(_LOG, "%s %s", "hello", "world") - - def test_log_frame2(self) -> None: - hprint.log_frame(_LOG, "%s", "hello", level=1) - - def test_log_frame3(self) -> None: - hprint.log_frame(_LOG, "%s", "hello", level=2, verbosity=logging.INFO) - - -# ############################################################################# -# Test_remove_lead_trail_empty_lines1 -# ############################################################################# - - -class Test_remove_lead_trail_empty_lines1(hunitest.TestCase): - def helper(self, input_str: str, expected_output: List[str]) -> None: - """ - Test the `remove_lead_trail_empty_lines` function. - - :param input_str: The input string to be processed. - :param expected_output: The expected output list of strings. - - Example: - input_str = "line1\n\n\nline2" - expected_output = ["line1", "", "", "line2"] - """ - # Test as string. - actual = hprint.remove_lead_trail_empty_lines(input_str) - expected = "\n".join(expected_output) - self.assertEqual(actual, expected) - # Test as list of strings. - input_str = input_str.splitlines() - actual = hprint.remove_lead_trail_empty_lines(input_str) - self.assertEqual(actual, expected_output) - - def test_empty_string_returns_empty_list(self) -> None: - input_str: str = "" - expected_output: List[str] = [] - self.helper(input_str, expected_output) - - def test_single_line_string_returns_single_line_list(self) -> None: - input_str: str = "line" - expected_output = ["line"] - self.helper(input_str, expected_output) - - def test_multiple_lines_with_no_empty_lines_returns_same_lines( - self, - ) -> None: - input_str: str = "line1\nline2\nline3" - expected_output = ["line1", "line2", "line3"] - self.helper(input_str, expected_output) - - def test_leading_empty_lines_are_removed(self) -> None: - input_str: str = "\n\nline1\nline2" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_trailing_empty_lines_are_removed(self) -> None: - input_str: str = "line1\nline2\n\n" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_leading_and_trailing_empty_lines_are_removed(self) -> None: - input_str: str = "\n\nline1\nline2\n\n" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_consecutive_empty_lines_in_middle_are_not_removed(self) -> None: - input_str: str = "line1\n\n\nline2" - expected_output = ["line1", "", "", "line2"] - self.helper(input_str, expected_output) - - def test_only_empty_lines_returns_empty_list(self) -> None: - input_str: str = "\n\n\n" - expected_output: List[str] = [] - self.helper(input_str, expected_output) - - def test_mixed_content_with_leading_trailing_and_middle_empty_lines( - self, - ) -> None: - input_str: str = "\n\nline1\n\nline2\n\n" - expected_output = ["line1", "", "line2"] - self.helper(input_str, expected_output) - - def test_single_empty_line_returns_empty_list(self) -> None: - input_str: str = "\n" - expected_output: List[str] = [] - self.helper(input_str, expected_output) - - def test_multiple_consecutive_empty_lines_at_beginning_and_end( - self, - ) -> None: - input_str: str = "\n\n\nline1\nline2\n\n\n" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_input_with_only_spaces_and_tabs_as_empty_lines(self) -> None: - input_str: str = " \n\t\nline1\nline2\n \n\t" - expected_output = ["line1", "line2"] - self.helper(input_str, expected_output) - - def test_input_with_mixed_line_endings_unix_and_windows(self) -> None: - input_str: str = "line1\n\nline2\r\n\r\nline3" - expected_output = ["line1", "", "line2", "", "line3"] - self.helper(input_str, expected_output) - - def test_input_with_special_characters(self) -> None: - input_str: str = "line1\n\n!@#$%^&*()\n\nline2" - expected_output = ["line1", "", "!@#$%^&*()", "", "line2"] - self.helper(input_str, expected_output) - - -# ############################################################################# -# Test_remove_empty_lines -# ############################################################################# - - -class Test_remove_empty_lines(hunitest.TestCase): - """ - Test remove_empty_lines function with different modes. - """ - - def helper(self, lines: str, mode: str, expected: str) -> None: - """ - Test helper for remove_empty_lines. - - :param lines: Input text as string (will be split into list) - :param mode: Mode parameter for remove_empty_lines - :param expected: Expected output as string (will be split into list) - """ - # Prepare inputs. - lines_str = hprint.dedent(lines) - if lines_str: - lines_list = lines_str.split("\n") - else: - lines_list = [] - # Prepare outputs. - expected_str = hprint.dedent(expected) - if expected_str: - expected_list = expected_str.split("\n") - else: - expected_list = [] - # Run test. - actual = hprint.remove_empty_lines(lines_list, mode=mode) - # Check outputs. - self.assert_equal(str(actual), str(expected_list)) - - def test1(self) -> None: - """ - Test no_empty_lines mode with an empty list. - """ - # Prepare inputs. - lines = "" - mode = "no_empty_lines" - # Prepare outputs. - expected = "" - # Run test. - self.helper(lines, mode, expected) - - def test2(self) -> None: - """ - Test no_empty_lines mode with no empty lines in the input. - """ - # Prepare inputs. - lines = """ - line1 - line2 - line3 - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test3(self) -> None: - """ - Test no_empty_lines mode with all lines being empty. - """ - # Prepare inputs. - lines = """ - - - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = "" - # Run test. - self.helper(lines, mode, expected) - - def test4(self) -> None: - """ - Test no_empty_lines mode removes leading empty lines. - """ - # Prepare inputs. - lines = """ - - line1 - line2 - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test5(self) -> None: - """ - Test no_empty_lines mode removes trailing empty lines. - """ - # Prepare inputs. - lines = """ - line1 - line2 - - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test6(self) -> None: - """ - Test no_empty_lines mode removes empty lines in the middle. - """ - # Prepare inputs. - lines = """ - line1 - - line2 - - line3 - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test7(self) -> None: - """ - Test no_empty_lines mode removes lines with only whitespace. - """ - # Prepare inputs. - lines = """ - line1 - - line2 - \t - line3 - """ - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test8(self) -> None: - """ - Test no_consecutive_empty_lines mode with empty list. - """ - # Prepare inputs. - lines = "" - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = "" - # Run test. - self.helper(lines, mode, expected) - - def test9(self) -> None: - """ - Test no_consecutive_empty_lines mode with no empty lines. - """ - # Prepare inputs. - lines = """ - line1 - line2 - line3 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test10(self) -> None: - """ - Test no_consecutive_empty_lines mode keeps single empty line. - """ - # Prepare inputs. - lines = """ - line1 - - line2 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test11(self) -> None: - """ - Test no_consecutive_empty_lines mode keeps one of two consecutive empty lines. - """ - # Prepare inputs. - lines = """ - line1 - - - line2 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test12(self) -> None: - """ - Test no_consecutive_empty_lines mode keeps one of multiple consecutive empty lines. - """ - # Prepare inputs. - lines = """ - line1 - - - - - line2 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - """ - # Run test. - self.helper(lines, mode, expected) - - def test13(self) -> None: - """ - Test no_consecutive_empty_lines mode with multiple groups of consecutive empty lines. - """ - # Prepare inputs. - lines = """ - line1 - - - line2 - - - - line3 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test14(self) -> None: - """ - Test no_consecutive_empty_lines mode keeps all non-consecutive empty lines. - """ - # Prepare inputs. - lines = """ - line1 - - line2 - - line3 - """ - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - - line3 - """ - # Run test. - self.helper(lines, mode, expected) - - def test15(self) -> None: - """ - Test that invalid mode raises ValueError. - """ - # Prepare inputs. - lines = ["line1", "line2"] - mode = "invalid_mode" - # Run test and check output. - with self.assertRaises(ValueError) as cm: - hprint.remove_empty_lines(lines, mode=mode) - actual = str(cm.exception) - expected = "Invalid mode='invalid_mode'" - self.assert_equal(actual, expected) - - def test16(self) -> None: - """ - Test remove_empty_lines with string input (decorator functionality). - """ - # Prepare inputs. - text = """ - line1 - - line2 - - line3 - """ - text = hprint.dedent(text) - mode = "no_empty_lines" - # Prepare outputs. - expected = """ - line1 - line2 - line3 - """ - expected = hprint.dedent(expected) - # Run test. - actual = hprint.remove_empty_lines(text, mode=mode) - # Check outputs. - self.assert_equal(actual, expected) - - def test17(self) -> None: - """ - Test no_consecutive_empty_lines with string input (decorator functionality). - """ - # Prepare inputs. - text = """ - line1 - - - line2 - """ - text = hprint.dedent(text) - mode = "no_consecutive_empty_lines" - # Prepare outputs. - expected = """ - line1 - - line2 - """ - expected = hprint.dedent(expected) - # Run test. - actual = hprint.remove_empty_lines(text, mode=mode) - # Check outputs. - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py deleted file mode 100644 index 652fdf47a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hpytest.py +++ /dev/null @@ -1,228 +0,0 @@ -import io -import os -import pprint -import re -from contextlib import redirect_stdout - -import pytest - -# TODO(heanh): add `junitparser` in `//helpers` image. -pytest.importorskip("junitparser") - -import helpers.hio as hio # noqa: E402 -import helpers.hpytest as hpytest # noqa: E402 -import helpers.hunit_test as hunitest # noqa: E402 - - -def _strip_color_codes(text: str) -> str: - """ - Remove ANSI color escape codes from text. - - :param text: text to strip the color codes from - :return: text with the color codes removed - """ - # Remove ANSI escape codes. - txt = re.sub(r"\033\[[0-9;]*m", "", text) - return txt - - -# ############################################################################# -# Test_JUnitReporter -# ############################################################################# - - -class Test_JUnitReporter(hunitest.TestCase): - """ - Test scenario where there are passed, skipped tests with leads to `PASSED` - result. - """ - - def helper(self) -> hpytest.JUnitReporter: - """ - Helper function to create a `JUnitReporter` object. - - :return: `JUnitReporter` object - """ - xml_str = """ - - - - - /app/dummy/test/test_module.py:25: Dummy skip message for testing purposes. - - - - - """ - input_dir = self.get_scratch_space() - input_file_path = os.path.join(input_dir, "test.xml") - hio.to_file(input_file_path, xml_str) - reporter = hpytest.JUnitReporter(input_file_path) - return reporter - - def test_parse(self) -> None: - """ - Test parsing the JUnit XML file. - """ - reporter = self.helper() - reporter.parse() - actual = pprint.pformat(reporter.overall_stats) - expected = r""" - {'error': 0, - 'failed': 0, - 'passed': 1, - 'skipped': 1, - 'total_tests': 2, - 'total_time': 3.0} - """ - self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) - - def test_print_summary(self) -> None: - """ - Test printing the summary of the results from JUnit XML file. - """ - reporter = self.helper() - reporter.parse() - captured_output = io.StringIO() - with redirect_stdout(captured_output): - reporter.print_summary() - actual = captured_output.getvalue() - actual = _strip_color_codes(actual) - expected = r""" - ====================================================================== - collected 2 items - - ====================================================================== - Test: dummy-test-suite-1 - Timestamp: 2025-01-01T12:00:00.000000+00:00 - ---------------------------------------------------------------------- - dummy.test.test_module.DummyTestCase::test_dummy_function PASSED (1.000s) - dummy.test.test_module.DummyTestCase::test_another_function SKIPPED (1.000s) - Summary: 1 passed, 1 skipped in 2.000s - - ====================================================================== - Test: dummy-test-suite-2 - Timestamp: 2025-01-01T12:01:00.000000+00:00 - ---------------------------------------------------------------------- - Summary: no tests in 1.000s - - ====================================================================== - Summary: 1 passed, 1 skipped in 3.00s - Result: PASSED - """ - self.assert_equal( - actual, - expected, - dedent=True, - fuzzy_match=True, - ) - - -# ############################################################################# -# Test_JUnitReporter2 -# ############################################################################# - - -class Test_JUnitReporter2(hunitest.TestCase): - """ - Test scenario where there are passed, error, failed, and skipped tests with - leads to `FAILED` result. - """ - - def helper(self) -> hpytest.JUnitReporter: - """ - Helper function to create a `JUnitReporter` object. - - :return: `JUnitReporter` object - """ - xml_str = """ - - - - - /app/dummy/test/test_module.py:25: Dummy skip message for testing purposes. - - - - - - /app/dummy/test/test_module.py:30: Dummy failure message for testing purposes. - - - /app/dummy/test/test_module.py:35: Dummy error message for testing purposes. - - - - - """ - input_dir = self.get_scratch_space() - input_file_path = os.path.join(input_dir, "test.xml") - hio.to_file(input_file_path, xml_str) - reporter = hpytest.JUnitReporter(input_file_path) - return reporter - - def test_parse(self) -> None: - """ - Test parsing the JUnit XML file. - """ - reporter = self.helper() - reporter.parse() - actual = pprint.pformat(reporter.overall_stats) - expected = r""" - {'error': 1, - 'failed': 1, - 'passed': 2, - 'skipped': 1, - 'total_tests': 5, - 'total_time': 6.0} - """ - self.assert_equal(actual, expected, dedent=True, fuzzy_match=True) - - def test_print_summary(self) -> None: - """ - Test printing the summary of the results from JUnit XML file. - """ - reporter = self.helper() - reporter.parse() - captured_output = io.StringIO() - with redirect_stdout(captured_output): - reporter.print_summary() - actual = captured_output.getvalue() - actual = _strip_color_codes(actual) - expected = r""" - ====================================================================== - collected 5 items - - ====================================================================== - Test: dummy-test-suite-1 - Timestamp: 2025-01-01T12:00:00.000000+00:00 - ---------------------------------------------------------------------- - dummy.test.test_module.DummyTestCase::test_dummy_function PASSED (1.000s) - dummy.test.test_module.DummyTestCase::test_another_function SKIPPED (1.000s) - Summary: 1 passed, 1 skipped in 2.000s - - ====================================================================== - Test: dummy-test-suite-2 - Timestamp: 2025-01-01T12:01:00.000000+00:00 - ---------------------------------------------------------------------- - dummy.test.test_module.DummyTestCase::test_passed_function PASSED (1.000s) - dummy.test.test_module.DummyTestCase::test_failed_function FAILED (1.000s) - dummy.test.test_module.DummyTestCase::test_error_function ERROR (1.000s) - Summary: 1 passed, 1 failed, 1 error in 3.000s - - ====================================================================== - Test: dummy-test-suite-3 - Timestamp: 2025-01-01T12:02:00.000000+00:00 - ---------------------------------------------------------------------- - Summary: no tests in 1.000s - - ====================================================================== - Summary: 2 passed, 1 failed, 1 error, 1 skipped in 6.00s - Result: FAILED - """ - self.assert_equal( - actual, - expected, - dedent=True, - fuzzy_match=True, - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py deleted file mode 100644 index d64310202..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hretry.py +++ /dev/null @@ -1,154 +0,0 @@ -import asyncio -import logging - -import pytest - -import helpers.hretry as hretry -import helpers.htimer as htimer -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -EXCEPTIONS = (AttributeError, ValueError) - - -# ############################################################################# -# Test_retry -# ############################################################################# - - -class Test_retry(hunitest.TestCase): - def test_retry1(self) -> None: - """ - Test normal case. - """ - self.exception_count = 0 - num_attempts = 3 - - @hretry.sync_retry(num_attempts, EXCEPTIONS) - def func() -> bool: - if self.exception_count < num_attempts - 1: - self.exception_count += 1 - raise ValueError("Simulated expected error") - _LOG.debug("All good") - return True - - self.assertTrue(func()) - self.assertEqual(self.exception_count, num_attempts - 1) - - def test_retry2(self) -> None: - """ - Test when the number of exceptions is greater than the number of - retries. - """ - self.exception_count = 0 - num_attempts = 3 - - @hretry.sync_retry(num_attempts, EXCEPTIONS) - def func() -> bool: - if self.exception_count < num_attempts: - self.exception_count += 1 - raise ValueError("Simulated expected error") - _LOG.debug("All good") - return True - - with self.assertRaises(ValueError): - func() - - def test_retry3(self) -> None: - """ - Test when the raised exception is not in the list of expected - exceptions. - """ - self.exception_count = 0 - num_attempts = 3 - - @hretry.sync_retry(num_attempts, EXCEPTIONS) - def func() -> None: - if self.exception_count < num_attempts - 1: - self.exception_count += 1 - raise IndexError("Simulated non expected error") - _LOG.debug("All good") - - with self.assertRaises(IndexError): - func() - - -# ############################################################################# -# Test_retry2 -# ############################################################################# - - -class Test_retry2(hunitest.TestCase): - def test_async_retry1(self) -> None: - """ - Test normal case. - """ - self.exception_count = 0 - num_attempts = 3 - retry_delay_in_sec = 1 - - @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) - async def func() -> bool: - if self.exception_count < num_attempts - 1: - self.exception_count += 1 - await asyncio.sleep(0.1) - raise ValueError("Simulated expected error") - _LOG.debug("All good") - return True - - with htimer.TimedScope(logging.INFO, "async_retry_loop") as ts: - result = asyncio.run(func()) - self.assertEqual(round(ts.elapsed_time, 1), 2.2) - self.assertTrue(result) - self.assertEqual(self.exception_count, num_attempts - 1) - - @pytest.mark.skip(reason="See CmTask11013") - def test_async_retry2(self) -> None: - """ - Test when the number of exceptions is greater than the number of - retries. - """ - self.exception_count = 0 - num_attempts = 3 - retry_delay_in_sec = 1 - - @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) - async def func() -> bool: - if self.exception_count < num_attempts: - self.exception_count += 1 - await asyncio.sleep(0.1) - raise ValueError("Simulated expected error") - _LOG.debug("All good") - return True - - with self.assertRaises(ValueError) as fail: - with htimer.TimedScope(logging.INFO, "async_retry_loop") as ts: - asyncio.run(func()) - self.assertEqual(round(ts.elapsed_time, 1), 3.3) - actual = str(fail.exception) - expected = "Simulated expected error" - self.assert_equal(actual, expected) - - def test_async_retry3(self) -> None: - """ - Test when the raised exception is not in the list of expected - exceptions. - """ - self.exception_count = 0 - num_attempts = 3 - retry_delay_in_sec = 1 - - @hretry.async_retry(num_attempts, EXCEPTIONS, retry_delay_in_sec) - async def func() -> None: - if self.exception_count < num_attempts - 1: - self.exception_count += 1 - await asyncio.sleep(0.1) - raise IndexError("Simulated non expected error") - _LOG.debug("All good") - - with self.assertRaises(IndexError) as fail: - asyncio.run(func()) - actual = str(fail.exception) - expected = "Simulated non expected error" - self.assert_equal(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py deleted file mode 100644 index 8f9dd84df..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hs3.py +++ /dev/null @@ -1,597 +0,0 @@ -import logging -import os -from typing import Generator, Tuple - -import pytest - -import helpers.hio as hio -import helpers.hmoto as hmoto -import helpers.hs3 as hs3 -import helpers.hserver as hserver -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestReplaceStarWithDoubleStar -# ############################################################################# - - -class TestReplaceStarWithDoubleStar(hunitest.TestCase): - def test1(self) -> None: - """ - Test non replacement of a single asterisk at the end of the path. - """ - pattern_to_modify = "s3://bucket/path/*" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/path/*") - - def test2(self) -> None: - """ - Test replacement of a single asterisk within the path. - """ - pattern_to_modify = "s3://bucket/path/*/file" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/path/**/*/file") - - def test3(self) -> None: - """ - Test no replacement when there are no asterisks in the path. - """ - pattern_to_modify = "s3://bucket/path/file" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/path/file") - - def test4(self) -> None: - """ - Test replacement when multiple asterisk are in the path. - """ - pattern_to_modify = "s3://bucket/*/path/*" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/**/*/path/*") - - def test5(self) -> None: - """ - Test non-replacement of asterisk at the end of the path in a special - case. - """ - pattern_to_modify = "s3://bucket/*/path/csv*" - new_pattern = hs3._replace_star_with_double_star(pattern_to_modify) - self.assert_equal(new_pattern, "s3://bucket/**/*/path/csv*") - - -# ############################################################################# -# TestToFileAndFromFile1 -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -@pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", -) -class TestToFileAndFromFile1(hmoto.S3Mock_TestCase): - def write_read_helper(self, file_name: str, force_flush: bool) -> None: - # Prepare inputs. - file_content = "line_mock1\nline_mock2\nline_mock3" - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - s3_path = f"s3://{self.bucket_name}/{file_name}" - # Save file. - # TODO(Nikola): Is it possible to verify `force_flush`? - hs3.to_file( - file_content, - s3_path, - aws_profile=moto_s3fs, - force_flush=force_flush, - ) - # Read file. - saved_content = hs3.from_file(s3_path, aws_profile=moto_s3fs) - # Check output. - expected = r"""line_mock1 - line_mock2 - line_mock3""" - self.assert_equal(saved_content, expected, fuzzy_match=True) - - # ######################################################################### - - def test_to_file_and_from_file1(self) -> None: - """ - Verify that regular `.txt` file is saved/read on S3. - """ - # Prepare inputs. - regular_file_name = "mock.txt" - force_flush = False - self.write_read_helper(regular_file_name, force_flush) - - def test_to_file_and_from_file2(self) -> None: - """ - Verify that compressed (e.g,`.gz`,`gzip`) file is saved/read on S3. - """ - # Prepare inputs. - gzip_file_name = "mock.gzip" - force_flush = True - self.write_read_helper(gzip_file_name, force_flush) - - def test_to_file_invalid1(self) -> None: - """ - Verify that only binary mode is allowed. - """ - # Prepare inputs. - regular_file_name = "mock.txt" - regular_file_content = "line_mock1\nline_mock2\nline_mock3" - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - s3_path = f"s3://{self.bucket_name}/{regular_file_name}" - # Save file with `t` mode. - with self.assertRaises(ValueError) as fail: - hs3.to_file( - regular_file_content, s3_path, mode="wt", aws_profile=moto_s3fs - ) - # Check output. - actual = str(fail.exception) - expected = r"S3 only allows binary mode!" - self.assert_equal(actual, expected) - - def test_from_file_invalid1(self) -> None: - """ - Verify that encoding is not allowed. - """ - # Prepare inputs. - regular_file_name = "mock.txt" - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - s3_path = f"s3://{self.bucket_name}/{regular_file_name}" - # Read with encoding. - with self.assertRaises(ValueError) as fail: - hs3.from_file(s3_path, encoding=True, aws_profile=moto_s3fs) - # Check output. - actual = str(fail.exception) - expected = r"Encoding is not supported when reading from S3!" - self.assert_equal(actual, expected) - - -# ############################################################################# -# TestListdir1 -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -@pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", -) -class TestListdir1(hmoto.S3Mock_TestCase): - def prepare_test_data(self) -> Tuple[str, hs3.AwsProfile]: - bucket_s3_path = f"s3://{self.bucket_name}" - depth_one_s3_path = f"{bucket_s3_path}/depth_one" - # Prepare test files. - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - first_s3_path = f"{depth_one_s3_path}/mock1.txt" - lines = [b"line_mock1"] - with moto_s3fs.open(first_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - second_s3_path = f"{depth_one_s3_path}/mock2.gzip" - with moto_s3fs.open(second_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - # Prepare test directories. - # `moto_s3fs.mkdir` is useless as empty directory is not visible. - # There must be at least one file in the directory to be visible. - regular_dir_s3_path = f"{depth_one_s3_path}/mock" - additional_file_s3_path = f"{regular_dir_s3_path}/regular_mock3.txt" - with moto_s3fs.open(additional_file_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - git_dir_s3_path = f"s3://{bucket_s3_path}/.git" - additional_file_s3_path = f"{git_dir_s3_path}/git_mock3.txt" - with moto_s3fs.open(additional_file_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - return bucket_s3_path, moto_s3fs - - # ######################################################################### - - def test_listdir1(self) -> None: - """ - Verify that all paths are found. - """ - bucket_s3_path, moto_s3fs = self.prepare_test_data() - pattern = "*" - only_files = False - use_relative_paths = False - paths = hs3.listdir( - bucket_s3_path, - pattern, - only_files, - use_relative_paths, - aws_profile=moto_s3fs, - exclude_git_dirs=False, - ) - paths.sort() - expected_paths = [ - "mock_bucket/.git", - "mock_bucket/.git/git_mock3.txt", - "mock_bucket/depth_one", - "mock_bucket/depth_one/mock", - "mock_bucket/depth_one/mock/regular_mock3.txt", - "mock_bucket/depth_one/mock1.txt", - "mock_bucket/depth_one/mock2.gzip", - ] - self.assertListEqual(paths, expected_paths) - - def test_listdir2(self) -> None: - """ - Verify that all relative paths are found. - """ - bucket_s3_path, moto_s3fs = self.prepare_test_data() - # Exclude `.git` by going level below. - bucket_s3_path = os.path.join(bucket_s3_path, "depth_one") - pattern = "*" - only_files = False - use_relative_paths = True - paths = hs3.listdir( - bucket_s3_path, - pattern, - only_files, - use_relative_paths, - aws_profile=moto_s3fs, - exclude_git_dirs=False, - ) - paths.sort() - expected_paths = [ - "mock", - "mock/regular_mock3.txt", - "mock1.txt", - "mock2.gzip", - ] - self.assertListEqual(paths, expected_paths) - - def test_listdir3(self) -> None: - """ - Verify that all paths are found, except `.git` ones. - """ - bucket_s3_path, moto_s3fs = self.prepare_test_data() - pattern = "*" - only_files = False - use_relative_paths = False - paths = hs3.listdir( - bucket_s3_path, - pattern, - only_files, - use_relative_paths, - aws_profile=moto_s3fs, - ) - paths.sort() - expected_paths = [ - "mock_bucket/depth_one", - "mock_bucket/depth_one/mock", - "mock_bucket/depth_one/mock/regular_mock3.txt", - "mock_bucket/depth_one/mock1.txt", - "mock_bucket/depth_one/mock2.gzip", - ] - self.assertListEqual(paths, expected_paths) - - def test_listdir4(self) -> None: - """ - Verify that all file paths are found. - """ - bucket_s3_path, moto_s3fs = self.prepare_test_data() - pattern = "*" - only_files = True - use_relative_paths = False - paths = hs3.listdir( - bucket_s3_path, - pattern, - only_files, - use_relative_paths, - aws_profile=moto_s3fs, - exclude_git_dirs=False, - ) - paths.sort() - expected_paths = [ - "mock_bucket/.git/git_mock3.txt", - "mock_bucket/depth_one/mock/regular_mock3.txt", - "mock_bucket/depth_one/mock1.txt", - "mock_bucket/depth_one/mock2.gzip", - ] - self.assertListEqual(paths, expected_paths) - - -# ############################################################################# -# TestDu1 -# ############################################################################# - - -@pytest.mark.requires_ck_infra -@pytest.mark.requires_aws -@pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", -) -class TestDu1(hmoto.S3Mock_TestCase): - def test_du1(self) -> None: - """ - Verify that total file size is returned. - """ - bucket_s3_path = f"s3://{self.bucket_name}" - depth_one_s3_path = f"{bucket_s3_path}/depth_one" - # Prepare test files. - moto_s3fs = hs3.get_s3fs(self.mock_aws_profile) - first_s3_path = f"{bucket_s3_path}/mock1.txt" - lines = [b"line_mock\n"] * 150 - with moto_s3fs.open(first_s3_path, "wb") as s3_file: - s3_file.writelines(lines) - second_s3_path = f"{depth_one_s3_path}/mock2.txt" - with moto_s3fs.open(second_s3_path, "wb") as s3_file: - # One level deeper to test recursive `du`. - s3_file.writelines(lines) - # Get multiple files. - size = hs3.du(bucket_s3_path, aws_profile=moto_s3fs) - expected_size = 3000 - self.assertEqual(size, expected_size) - size = hs3.du(depth_one_s3_path, aws_profile=moto_s3fs) - expected_size = 1500 - self.assertEqual(size, expected_size) - # Get exactly one file. - size = hs3.du(second_s3_path, aws_profile=moto_s3fs) - self.assertEqual(size, expected_size) - # Verify size in human-readable form. - size = hs3.du(bucket_s3_path, human_format=True, aws_profile=moto_s3fs) - expected_size = r"2.9 KB" - self.assert_equal(size, expected_size) - - -# ############################################################################# -# TestGenerateAwsFiles -# ############################################################################# - - -class TestGenerateAwsFiles(hunitest.TestCase): - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test(self) -> None: - self.setUp() - os.environ["MOCK_AWS_ACCESS_KEY_ID"] = "mock_access_key" - os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] = "mock_secret_access_key" - os.environ["MOCK_AWS_SESSION_TOKEN"] = "mock_session_token" - os.environ["MOCK_AWS_S3_BUCKET"] = "mock_s3_bucket" - os.environ["MOCK_AWS_DEFAULT_REGION"] = "mock_default_region" - # - os.environ["TEST_AWS_ACCESS_KEY_ID"] = "test_access_key" - os.environ["TEST_AWS_SECRET_ACCESS_KEY"] = "test_secret_access_key" - os.environ["TEST_AWS_SESSION_TOKEN"] = "test_session_token" - os.environ["TEST_AWS_S3_BUCKET"] = "test_s3_bucket" - os.environ["TEST_AWS_DEFAULT_REGION"] = "test_default_region" - # Generate AWS files with mock AWS profiles. - self._scratch_test_dir = self.get_scratch_space() - aws_profiles = ["mock", "test"] - hs3.generate_aws_files( - home_dir=self._scratch_test_dir, aws_profiles=aws_profiles - ) - - def tear_down_test(self) -> None: - del os.environ["MOCK_AWS_ACCESS_KEY_ID"] - del os.environ["MOCK_AWS_SECRET_ACCESS_KEY"] - del os.environ["MOCK_AWS_SESSION_TOKEN"] - del os.environ["MOCK_AWS_S3_BUCKET"] - del os.environ["MOCK_AWS_DEFAULT_REGION"] - # - del os.environ["TEST_AWS_ACCESS_KEY_ID"] - del os.environ["TEST_AWS_SECRET_ACCESS_KEY"] - del os.environ["TEST_AWS_SESSION_TOKEN"] - del os.environ["TEST_AWS_S3_BUCKET"] - del os.environ["TEST_AWS_DEFAULT_REGION"] - - def helper(self, file_name: str, expected: str) -> None: - # Check. - target_dir = os.path.join(self._scratch_test_dir, ".aws") - actual = hio.from_file(os.path.join(target_dir, file_name)) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Check that AWS credentials file is generated correctly. - """ - file_name = "credentials" - expected = r""" - [mock] - aws_access_key_id=mock_access_key - aws_secret_access_key=mock_secret_access_key - aws_session_token=mock_session_token - aws_s3_bucket=mock_s3_bucket - - [test] - aws_access_key_id=test_access_key - aws_secret_access_key=test_secret_access_key - aws_session_token=test_session_token - aws_s3_bucket=test_s3_bucket - """ - self.helper(file_name, expected) - - def test2(self) -> None: - """ - Check that AWS config file is generated correctly. - """ - file_name = "config" - expected = """ - [profile mock] - region=mock_default_region - - [profile test] - region=test_default_region - """ - self.helper(file_name, expected) - - -# ############################################################################# - - -# ############################################################################# -# Test_get_s3_bucket_from_stage -# ############################################################################# - - -class Test_get_s3_bucket_from_stage(hunitest.TestCase): - def test1(self) -> None: - """ - Check for a valid stage. - """ - # Define arguments. - stage = "test" - # Run. - actual = hs3.get_s3_bucket_from_stage(stage) - expected = "cryptokaizen-data-test" - self.assert_equal(actual, expected) - - def test2(self) -> None: - """ - Check for a valid stage and optional suffix. - """ - # Define arguments. - stage = "preprod" - suffix = "suffix_test" - # Run. - actual = hs3.get_s3_bucket_from_stage(stage, add_suffix=suffix) - expected = "cryptokaizen-data.preprod/suffix_test" - self.assert_equal(actual, expected) - - def test3(self) -> None: - """ - Check Invalid stage. - """ - # Define arguments. - stage = "Invalid" - # Run. - with self.assertRaises(AssertionError) as cm: - hs3.get_s3_bucket_from_stage(stage) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - 'Invalid' in '{'test': 'cryptokaizen-data-test', 'preprod': 'cryptokaizen-data.preprod', 'prod': 'cryptokaizen-data'}' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - -_AWS_PROFILE = "ck" - - -# ############################################################################# -# Test_s3_get_credentials1 -# ############################################################################# - - -@pytest.mark.requires_aws -@pytest.mark.requires_ck_infra -class Test_s3_get_credentials1(hunitest.TestCase): - def test1(self) -> None: - res = hs3.get_aws_credentials(_AWS_PROFILE) - _LOG.debug("res=%s", str(res)) - - -# ############################################################################# -# Test_s3_functions1 -# ############################################################################# - - -class Test_s3_functions1(hunitest.TestCase): - def test_extract_bucket_from_path1(self) -> None: - path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "tmp/TestCachingOnS3.test_with_caching1/joblib", - ) - bucket, path = hs3.split_path(path) - self.assert_equal(bucket, "cryptokaizen-unit-test") - self.assert_equal(path, "/tmp/TestCachingOnS3.test_with_caching1/joblib") - - -# ############################################################################# -# Test_s3_1 -# ############################################################################# - - -@pytest.mark.requires_aws -@pytest.mark.requires_ck_infra -class Test_s3_1(hunitest.TestCase): - def test_ls1(self) -> None: - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "README.md", - ) - _LOG.debug("file_path=%s", file_path) - # > aws s3 ls s3://***** - # PRE data/ - # 2021-04-06 1:17:44 48 README.md - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_names = s3fs.ls(file_path) - _LOG.debug("file_names=%s", file_names) - self.assertGreater(len(file_names), 0) - - @pytest.mark.requires_aws - @pytest.mark.requires_ck_infra - def test_glob1(self) -> None: - # > aws s3 ls s3://alphamatic-data/data/ib/metadata/ - # 2021-04-26 08:39:00 18791 exchanges-2021-04-01-134738089177.csv - # 2021-04-26 08:39:00 18815 exchanges-2021-04-01-143112738505.csv - # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-134738089177.csv - # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-143112738505.csv - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "data/ib/metadata", - ) - glob_pattern = file_path + "/exchanges-*" - _LOG.debug("glob_pattern=%s", glob_pattern) - file_names = s3fs.glob(glob_pattern) - _LOG.debug("file_names=%s", file_names) - self.assertGreater(len(file_names), 0) - - @pytest.mark.requires_aws - @pytest.mark.requires_ck_infra - def test_exists1(self) -> None: - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "README.md", - ) - _LOG.debug("file_path=%s", file_path) - actual = s3fs.exists(file_path) - expected = True - self.assertEqual(actual, expected) - - @pytest.mark.requires_aws - @pytest.mark.requires_ck_infra - def test_exists2(self) -> None: - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "README_does_not_exist.md", - ) - _LOG.debug("file_path=%s", file_path) - actual = s3fs.exists(file_path) - expected = False - self.assertEqual(actual, expected) - - @pytest.mark.requires_aws - @pytest.mark.requires_ck_infra - def test_exists3(self) -> None: - # > aws s3 ls alphamatic-data/data/ib/metadata/symbols-2021-04-01-143112738505.csv - # 2021-04-26 08:39:00 61677776 symbols-2021-04-01-143112738505.csv - s3fs = hs3.get_s3fs(_AWS_PROFILE) - file_path = os.path.join( - hs3.get_s3_bucket_path_unit_test(_AWS_PROFILE), - "alphamatic-data", - "data/ib/metadata/symbols-2021-04-01-143112738505.csv", - ) - _LOG.debug("file_path=%s", file_path) - actual = s3fs.exists(file_path) - expected = True - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py deleted file mode 100644 index cc046ddac..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsecrets.py +++ /dev/null @@ -1,209 +0,0 @@ -# TODO(gp): Use pytest.import_skip instead of all this machinery. -_HAS_MOTO = True -try: - import moto -except ImportError: - # `moto` may not be installed in a non-cmamp repo, so we skip it (see "DevTools376: - # Break 2022-02-22"). - import helpers.hgit as hgit - - assert not hgit.is_cmamp(), ( - "`cmamp` should have moto, while other repos are allowed to not have it)" - ) - _HAS_MOTO = False - -if _HAS_MOTO: - import json - import logging - import unittest.mock as umock - - import boto3 - import botocore - import pytest - - import helpers.hgit as hgit - import helpers.hs3 as hs3 - import helpers.hsecrets as hsecret - import helpers.hserver as hserver - import helpers.hunit_test as hunitest - - _LOG = logging.getLogger(__name__) - - # The `mock_aws` decorator ensures the calls to the AWS API are - # mocked. - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - class TestCreateClient(hunitest.TestCase): - def test_create_client1(self) -> None: - """ - Simple smoke test to verify connection to AWS. - """ - client = hsecret.get_secrets_client(aws_profile="ck") - self.assertIsInstance(client, botocore.client.BaseClient) - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - class TestGetSecret(hunitest.TestCase): - @moto.mock_aws - def test_get_secret(self) -> None: - """ - Verify that the secret can be retrieved correctly. - """ - # Make sure the region name matches the one used in `hsecret` profile. - client = boto3.client( - "secretsmanager", region_name=hs3.AWS_EUROPE_REGION_1 - ) - secret = {"testkey": "testvalue"} - secret_name = "test.local.sandbox.1" - client.create_secret( - Name=secret_name, SecretString=json.dumps(secret) - ) - self.assertDictEqual(hsecret.get_secret(secret_name), secret) - - @moto.mock_aws - @pytest.mark.skip( - reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." - ) - def test_trading_key(self) -> None: - """ - Verify locking mechanism for trading key is processed correctly. - """ - # Define test params. - secret_value = {"test.trading.key": "test.trading.value"} - secret_name = "test.trading.sandbox.1" - usedBy = "pytest" - hsecret.store_secret(secret_name, secret_value) - # Define expected values. - usedBy = hsecret._get_flag_value(usedBy) - expected = f"Secret key is already in use by {usedBy}" - # Call get secret to lock the key. - _ = hsecret.get_secret(secret_name) - # Recall get secret for same key to verify the lock. - try: - hsecret.get_secret(secret_name) - except RuntimeError as rte: - actual = str(rte) - self.assert_equal(actual, expected, fuzzy_match=True) - - @moto.mock_aws - @pytest.mark.skip( - reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." - ) - def test_lock_for_different_script(self) -> None: - """ - Verify locking mechanism for access to trading key is passed if - scripts are different. - """ - # Define test params. - secret_value = {"test.trading.key": "test.trading.value"} - secret_name = "test.trading.sandbox.1" - script1 = "pytest" - script2 = "run_system_observer.py" - hsecret.store_secret(secret_name, secret_value) - # Call get secret to lock the key with testing script. - _ = hsecret.get_secret(secret_name) - usedBy1 = hsecret._get_flag_value(script1) - # Define expected values. - usedBy2 = hsecret._get_flag_value(script2) - # Update secret value with expected usedBy script names. - secret_value["usedBy"] = [usedBy1, usedBy2] - # Call get secret for same key to verify the lock for mocked script. - with umock.patch("sys.argv", [script2]): - actual = hsecret.get_secret(secret_name) - self.assert_equal( - str(actual), expected=str(secret_value), fuzzy_match=True - ) - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - class TestStoreSecret(hunitest.TestCase): - @moto.mock_aws - def test_store_secret1(self) -> None: - """ - Verify that a secret can be stored correctly. - """ - secret = {"testkey": "testvalue"} - secret_name = "test.local.sandbox.1" - hsecret.store_secret(secret_name, secret) - # Make sure the region name matches the one used in `hsecret`. - client = boto3.client( - "secretsmanager", region_name=hs3.AWS_EUROPE_REGION_1 - ) - test_secret_value = json.loads( - client.get_secret_value(SecretId=secret_name)["SecretString"] - ) - self.assertDictEqual(test_secret_value, secret) - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - @pytest.mark.skip( - reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." - ) - class TestLockSecret(hunitest.TestCase): - @moto.mock_aws - def test_lock_secret(self) -> None: - """ - Verify that the lock secret function locks the key. - """ - # Define test params. - secret = {"testkey": "testvalue"} - secret_name = "test.local.sandbox.1" - hsecret.store_secret(secret_name, secret) - usedBy = "pytest" - # Lock the stored secret. - hsecret.lock_secret(secret_name, secret) - # Retry locking the same secret. - try: - hsecret.lock_secret(secret_name, secret) - except RuntimeError as rte: - usedBy = hsecret._get_flag_value(usedBy) - expected = f"Secret key is already in use by {usedBy}" - actual = str(rte) - self.assert_equal(actual, expected, fuzzy_match=True) - - @pytest.mark.requires_ck_infra - @pytest.mark.requires_aws - @pytest.mark.skipif( - not hserver.is_CK_S3_available(), - reason="Run only if CK S3 is available", - ) - @pytest.mark.skip( - reason="TODO(Juraj): Temporarily disabled in #Cmtask10068." - ) - class TestUpdateUsedby(hunitest.TestCase): - @moto.mock_aws - def test1(self) -> None: - """ - Verify that update_usedby updates value in secrets manager. - """ - # Define test params. - secret_value = {"testkey": "testvalue"} - secret_name = "test.local.sandbox.1" - usedBy = "pytest" - hsecret.store_secret(secret_name, secret_value) - # Define expected value. - expected = r""" - {'testkey': 'testvalue', 'usedBy': ['pytest']} - """ - # Run. - hsecret.update_usedby(secret_name, secret_value, usedBy) - actual = hsecret.get_secret(secret_name) - # Verify. - self.assert_equal(str(actual), expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py deleted file mode 100644 index 3e6a1ba7d..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hserver.py +++ /dev/null @@ -1,321 +0,0 @@ -import logging - -import pytest - -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# _TestCase1 -# ############################################################################# - - -class _TestCase1: - # def test_config_func_to_str1(self) -> None: - # val = hserver.config_func_to_str() - # _LOG.info("val=\n%s", val) - # if self.exp_config_func_to_str is not None: - # self.assert_equal(val, self.exp_config_func_to_str) - - def test_consistency1(self) -> None: - hserver._dassert_setup_consistency() - - def test_is_host_csfy_server1(self) -> None: - val = hserver.is_host_csfy_server() - _LOG.info("val=\n%s", val) - if self.exp_is_host_csfy_server is not None: - self.assertEqual(val, self.exp_is_host_csfy_server) - - def test_is_host_mac1(self) -> None: - val = hserver.is_host_mac() - _LOG.info("val=\n%s", val) - if self.exp_is_host_mac is not None: - self.assertEqual(val, self.exp_is_host_mac) - - def test_get_docker_info1(self) -> None: - val = hserver.get_docker_info() - _LOG.info("val=\n%s", val) - # Remove the docker version since it is not stable. - val = hprint.filter_text("docker_version=", val) - if self.exp_get_docker_info is not None: - self.assert_equal(val, self.exp_get_docker_info) - - def test_get_setup_settings1(self) -> None: - setups = hserver._get_setup_settings() - val = hserver._setup_to_str(setups) - _LOG.info("val=\n%s", val) - if self.exp_get_setup_settings is not None: - self.assert_equal(val, self.exp_get_setup_settings) - - # def test_get_setup_signature1(self) -> None: - # val = hserver._get_setup_signature() - # _LOG.info("val=\n%s", val) - # if self.exp_get_setup_signature is not None: - # self.assert_equal(val, self.exp_get_setup_signature) - - def test_is_inside_ci1(self) -> None: - val = hserver.is_inside_ci() - _LOG.info("val=\n%s", val) - if self.exp_is_inside_ci is not None: - self.assertEqual(val, self.exp_is_inside_ci) - - -# ############################################################################# -# Test_hserver1 -# ############################################################################# - - -class Test_hserver1(_TestCase1, hunitest.TestCase): - """ - Smoke test without checking anything. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = None - self.exp_get_docker_info = None - self.exp_get_setup_settings = None - self.exp_get_setup_signature = None - self.exp_is_host_csfy_server = None - self.exp_is_host_mac = None - self.exp_is_inside_ci = None - - -# ############################################################################# -# Test_hserver_inside_ci1 -# ############################################################################# - - -@pytest.mark.skipif( - not hserver.is_inside_ci(), - reason="Config not matching", -) -class Test_hserver_inside_ci1(_TestCase1, hunitest.TestCase): - """ - Run tests inside CI. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = None - self.exp_get_docker_info = hprint.dedent(r""" - Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=True - has_docker_sibling_containers_support=True - has_docker_children_containers_support=True - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server False - is_outside_docker_container_on_csfy_server False - is_inside_docker_container_on_host_mac False - is_outside_docker_container_on_host_mac False - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci True - """) - self.exp_get_setup_signature = None - self.exp_is_host_csfy_server = False - self.exp_is_host_mac = False - self.exp_is_inside_ci = True - - -# ############################################################################# -# Test_hserver_inside_docker_container_on_csfy_server1 -# ############################################################################# - - -@pytest.mark.skipif( - not hserver.is_inside_docker_container_on_csfy_server(), - reason="Config not matching", -) -class Test_hserver_inside_docker_container_on_csfy_server1( - _TestCase1, hunitest.TestCase -): - """ - Run tests inside Docker container on a Causify dev server. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" - # Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=True - has_docker_sibling_containers_support=True - has_docker_children_containers_support=True - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server True - is_outside_docker_container_on_csfy_server False - is_inside_docker_container_on_host_mac False - is_outside_docker_container_on_host_mac False - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci False - """) - self.exp_get_setup_signature = "" - self.exp_is_host_csfy_server = True - self.exp_is_host_mac = False - self.exp_is_inside_ci = False - - -# ############################################################################# -# Test_hserver_outside_docker_container_on_csfy_server1 -# ############################################################################# - - -@pytest.mark.skipif( - not hserver.is_outside_docker_container_on_csfy_server(), - reason="Config not matching", -) -class Test_hserver_outside_docker_container_on_csfy_server1( - _TestCase1, hunitest.TestCase -): - """ - Run tests outside Docker container on a Causify dev server. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" - # Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=False - has_docker_sibling_containers_support=*undef* - has_docker_children_containers_support=*undef* - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server False - is_outside_docker_container_on_csfy_server True - is_inside_docker_container_on_host_mac False - is_outside_docker_container_on_host_mac False - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci False - """) - self.exp_get_setup_signature = "" - self.exp_is_host_csfy_server = True - self.exp_is_host_mac = False - self.exp_is_inside_ci = False - - -# ############################################################################# -# Test_hserver_inside_docker_container_on_gp_mac1 -# ############################################################################# - - -@pytest.mark.skipif( - not (hserver.is_inside_docker() and hserver.is_host_gp_mac()), - reason="Config not matching", -) -class Test_hserver_inside_docker_container_on_gp_mac1( - _TestCase1, hunitest.TestCase -): - """ - Run tests inside Docker container on GP's Mac. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" - # Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=True - has_docker_sibling_containers_support=True - has_docker_children_containers_support=True - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server False - is_outside_docker_container_on_csfy_server False - is_inside_docker_container_on_host_mac True - is_outside_docker_container_on_host_mac False - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci False - """) - self.exp_get_setup_signature = "" - self.exp_is_host_csfy_server = False - self.exp_is_host_mac = True - self.exp_is_inside_ci = False - - -# ############################################################################# -# Test_hserver_outside_docker_container_on_gp_mac1 -# ############################################################################# - - -@pytest.mark.skipif( - not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), - reason="Config not matching", -) -class Test_hserver_outside_docker_container_on_gp_mac1( - _TestCase1, hunitest.TestCase -): - """ - Run tests outside Docker container on GP's Mac. - """ - - def setUp(self) -> None: - super().setUp() - self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" - # Docker info - has_docker=True - docker_needs_sudo=False - has_privileged_mode=True - is_inside_docker=False - has_docker_sibling_containers_support=*undef* - has_docker_children_containers_support=*undef* - """) - self.exp_get_setup_settings = hprint.dedent(r""" - is_inside_docker_container_on_csfy_server False - is_outside_docker_container_on_csfy_server False - is_inside_docker_container_on_host_mac False - is_outside_docker_container_on_host_mac True - is_inside_docker_container_on_external_linux False - is_outside_docker_container_on_external_linux False - is_dev4 False - is_ig_prod False - is_prod_csfy False - is_inside_ci False - """) - self.exp_get_setup_signature = "" - self.exp_is_host_csfy_server = False - self.exp_is_host_mac = True - self.exp_is_inside_ci = False - - -# ############################################################################# - - -# TODO(gp): Add test mocking the environment variables in _get_setup_signature. -# We should have one class for each set up (e.g., outside Mac, outside Linux, -# inside Docker, inside CI, etc.) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py deleted file mode 100644 index 998b65c86..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hslack.py +++ /dev/null @@ -1,81 +0,0 @@ -import os -import unittest.mock as umock - -import helpers.hslack as hslack -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestSlackNotifier -# ############################################################################# - - -class TestSlackNotifier(hunitest.TestCase): - def test1(self) -> None: - """ - Check that `SlackNotifier` initializes with provided bot token. - """ - # Create notifier with explicit token. - notifier = hslack.SlackNotifier(bot_token="xoxb-test1-token") - self.assertEqual(notifier.bot_token, "xoxb-test1-token") - - def test2(self) -> None: - """ - Check that `SlackNotifier` initializes with environment variable token. - """ - # Mock environment variable and create notifier. - with umock.patch.dict( - os.environ, {"SLACK_BOT_TOKEN": "xoxb-test2-token"} - ): - notifier = hslack.SlackNotifier() - self.assertEqual(notifier.bot_token, "xoxb-test2-token") - - def test3(self) -> None: - """ - Check that `SlackNotifier` raises `ValueError` when no token is - provided. - """ - # Clear environment and verify initialization fails. - with umock.patch.dict(os.environ, {}, clear=True): - with self.assertRaises(ValueError) as cm: - hslack.SlackNotifier() - self.assertIn("No bot token provided", str(cm.exception)) - - def test4(self) -> None: - """ - Check that `send_message()` successfully sends message to Slack - channel. - """ - # Mock successful Slack API response. - with umock.patch("helpers.hslack.requests.post") as mock_post: - mock_response = umock.MagicMock() - mock_response.json.return_value = {"ok": True} - mock_response.raise_for_status.return_value = None - mock_post.return_value = mock_response - # Send message and verify API call. - notifier = hslack.SlackNotifier(bot_token="xoxb-test4-token") - notifier.send_message("#test4", "test4 message content") - # Verify request parameters. - mock_post.assert_called_once() - _, kwargs = mock_post.call_args - self.assertEqual(kwargs["json"]["channel"], "#test4") - self.assertEqual(kwargs["json"]["text"], "test4 message content") - - def test5(self) -> None: - """ - Check that `send_message()` raises `ValueError` on Slack API error. - """ - # Mock Slack API error response. - with umock.patch("helpers.hslack.requests.post") as mock_post: - mock_response = umock.MagicMock() - mock_response.json.return_value = { - "ok": False, - "error": "channel_not_found", - } - mock_response.raise_for_status.return_value = None - mock_post.return_value = mock_response - # Verify error is raised with correct message. - notifier = hslack.SlackNotifier(bot_token="xoxb-test5-token") - with self.assertRaises(ValueError) as cm: - notifier.send_message("#test5", "test5 message content") - self.assertIn("channel_not_found", str(cm.exception)) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py deleted file mode 100644 index f6adba2f6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsql.py +++ /dev/null @@ -1,29 +0,0 @@ -import helpers.hsql as hsql -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestCreateInOperator -# ############################################################################# - - -class TestCreateInOperator(hunitest.TestCase): - def test_create_in_operator1(self) -> None: - """ - Test creating IN operator for more than one value. - """ - values = ["binance", "ftx"] - column = "exchange_id" - actual = hsql.create_in_operator(values, column) - expected = "exchange_id IN ('binance','ftx')" - self.assertEqual(actual, expected) - - def test_create_in_operator2(self) -> None: - """ - Test creating IN operator for one value. - """ - values = ["ftx"] - column = "exchange_id" - actual = hsql.create_in_operator(values, column) - expected = "exchange_id IN ('ftx')" - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py deleted file mode 100644 index 1e5b4ff01..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hstring.py +++ /dev/null @@ -1,270 +0,0 @@ -import os -from typing import List, Tuple - -import helpers.hio as hio -import helpers.hstring as hstring -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestExtractVersionFromFileName -# ############################################################################# - - -class TestExtractVersionFromFileName(hunitest.TestCase): - def _test_extract_version_from_file_name( - self, version: str, expected: Tuple[int, int] - ) -> None: - """ - Verify function provides expected output on valid inputs. - - :param version: version in string format to input, e.g. 1.0 - :param expected: expected output version in (major, minor) - format - """ - fn = f"/app/datapull/ccxt/universe/download/universe_v{version}.json" - self.assertEqual(hstring.extract_version_from_file_name(fn), expected) - - def test_extract_version_from_file_name1(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("1.1", (1, 1)) - - def test_extract_version_from_file_name2(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("4", (4, 0)) - - def test_extract_version_from_file_name3(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("1.0", (1, 0)) - - def test_extract_version_from_file_name4(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("3.11", (3, 11)) - - def test_extract_version_from_file_name5(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("16.2", (16, 2)) - - def test_extract_version_from_file_name6(self) -> None: - """ - Verify function provides expected output on valid input. - """ - self._test_extract_version_from_file_name("25.11", (25, 11)) - - def _test_extract_version_from_file_name_incorrect_format( - self, file_name: str - ) -> None: - """ - Helper function to verify function raises AssertionError on incorrect - input format. - - :param file_name: incorrect file_name to test - """ - expected_fail = "Can't parse file" - with self.assertRaises(AssertionError) as fail: - _ = hstring.extract_version_from_file_name(file_name) - self.assertIn(expected_fail, str(fail.exception)) - - def test_extract_version_from_file_name_incorrect_format1(self) -> None: - """ - Verify function raises AssertionError on incorrect input format. - """ - self._test_extract_version_from_file_name_incorrect_format("incorrect") - - def test_extract_version_from_file_name_incorrect_format2(self) -> None: - """ - Verify function raises AssertionError on incorrect input format. - """ - self._test_extract_version_from_file_name_incorrect_format( - "universe_vxx.json" - ) - - def test_extract_version_from_file_name_incorrect_format3(self) -> None: - """ - Verify function raises AssertionError on incorrect input format. - """ - self._test_extract_version_from_file_name_incorrect_format( - "universe_v.1.json" - ) - - def test_extract_version_from_file_name_incorrect_format4(self) -> None: - """ - Verify function raises AssertionError on incorrect input format. - """ - self._test_extract_version_from_file_name_incorrect_format( - "universe_11.json" - ) - - -# ############################################################################# -# TestGetDocstringLineIndices -# ############################################################################# - - -class TestGetDocstringLineIndices(hunitest.TestCase): - """ - Test determining which code lines are inside (doc)strings. - """ - - def helper(self, code: str, expected: List[str]) -> None: - lines = code.split("\n") - actual_idxs = hstring.get_docstring_line_indices(lines) - actual = [lines[i].strip() for i in actual_idxs] - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test one type of quotes. - """ - code = """ - def test_assert_equal1(self) -> None: - ''' - Test one. - ''' - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - s = ''' - Inside a string. - ''' - d = '''Does not count''' - self.check_string(actual) - - """ - expected = ["'''", "Test one.", "s = '''", "Inside a string."] - self.helper(code, expected) - - def test2(self) -> None: - """ - Test the second type of quotes. - """ - code = ''' - def test_assert_equal1(self) -> None: - """ - Test one. - """ - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - s = """ - Inside a string. - """ - d = """Does not count""" - self.check_string(actual) - - ''' - expected = ['"""', "Test one.", 's = """', "Inside a string."] - self.helper(code, expected) - - def test3(self) -> None: - """ - Test quotes within quotes. - """ - code = """ - def test_assert_equal1(self) -> None: - ''' - Test one. - """ - code += '''\ -""" - String within "Test one". - """ - ''' - code += """\ -''' - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - s = ''' - Inside a string. - ''' - d = '''Does not count''' - self.check_string(actual) - - """ - expected = [ - "'''", - "Test one.", - '"""', - 'String within "Test one".', - '"""', - "s = '''", - "Inside a string.", - ] - self.helper(code, expected) - - -# ############################################################################# -# TestGetCodeBlockLineIndices -# ############################################################################# - - -class TestGetCodeBlockLineIndices(hunitest.TestCase): - def helper(self, code: str, expected: List[str]) -> None: - lines = code.split("\n") - actual_idxs = hstring.get_code_block_line_indices(lines) - actual = [lines[i].strip() for i in actual_idxs] - self.assertEqual(actual, expected) - - def test1(self) -> None: - """ - Test getting code block line indices. - """ - code = """ - def test_assert_equal1(self) -> None: - ``` - Test one. - ``` - d = ```Does not count``` - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - """ - expected = ["```", "Test one."] - self.helper(code, expected) - - -# ############################################################################# -# TestGetDocstrings -# ############################################################################# - - -class TestGetDocstrings(hunitest.TestCase): - def test1(self) -> None: - """ - Test that grouped lines within docstrings are correctly returned. - """ - # Prepare inputs. - test_get_docstring_lines_input_dir = self.get_input_dir() - text_file_path = os.path.join( - test_get_docstring_lines_input_dir, "test.txt" - ) - text = hio.from_file(text_file_path) - lines = text.splitlines() - # Run. - actual = hstring.get_docstrings(lines) - # Check. - expected = [ - [1, 2, 3, 4, 5, 6], - [11, 12, 13, 14, 15, 16], - ] - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py deleted file mode 100644 index 4d2431bca..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hsystem.py +++ /dev/null @@ -1,494 +0,0 @@ -import logging -import os -import platform -import re -import tempfile -from typing import List - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur - -_LOG = logging.getLogger(__name__) - - -def _get_ls_error_message(filename: str = "this_file_doesnt_exist") -> str: - """ - Get the expected error message for ls command for the current OS. - - :param filename: The filename that doesn't exist - """ - if platform.system() == "Darwin": - return f"ls: {filename}: No such file or directory" - elif platform.system() == "Linux": - return f"ls: cannot access '{filename}': No such file or directory" - raise RuntimeError(f"Unsupported OS: {platform.system()}") - -# ############################################################################# - - -# ############################################################################# -# Test_system1 -# ############################################################################# - - -class Test_system1(hunitest.TestCase): - def test1(self) -> None: - hsystem.system("ls") - - def test2(self) -> None: - hsystem.system("ls /dev/null", suppress_output=False) - - def test3(self) -> None: - """ - Output to a file. - """ - with tempfile.NamedTemporaryFile() as fp: - temp_file_name = fp.name - _LOG.debug("temp_file_name=%s", temp_file_name) - hsystem.system("ls", output_file=temp_file_name) - hdbg.dassert_path_exists(temp_file_name) - - def test4(self) -> None: - """ - Tee to a file. - """ - with tempfile.NamedTemporaryFile() as fp: - temp_file_name = fp.name - _LOG.debug("temp_file_name=%s", temp_file_name) - hsystem.system("ls", output_file=temp_file_name, tee=True) - hdbg.dassert_path_exists(temp_file_name) - - def test5(self) -> None: - """ - Test dry_run. - """ - temp_file_name = tempfile._get_default_tempdir() # type: ignore - candidate_name = tempfile._get_candidate_names() # type: ignore - temp_file_name += "/" + next(candidate_name) - _LOG.debug("temp_file_name=%s", temp_file_name) - hsystem.system("ls", output_file=temp_file_name, dry_run=True) - hdbg.dassert_path_not_exists(temp_file_name) - - def test6(self) -> None: - """ - Test abort_on_error=True. - """ - hsystem.system("ls this_file_doesnt_exist", abort_on_error=False) - - def test7(self) -> None: - """ - Test abort_on_error=True (default). - """ - with self.assertRaises(RuntimeError) as cm: - hsystem.system("ls this_file_doesnt_exist") - actual = str(cm.exception) - # Different systems return different rc. - actual = re.sub(r"rc='\d+'", "rc=''", actual) - # Use OS-specific expected error message. - error_msg = _get_ls_error_message() - expected = f""" - ################################################################################ - ################################################################################ - _system() failed - ################################################################################ - ################################################################################ - # _system: cmd='(ls this_file_doesnt_exist) 2>&1', print_command=False, abort_on_error=True, suppress_error=None, suppress_output=True, blocking=True, wrapper=None, output_file=None, num_error_lines=30, tee=False, dry_run=False, log_level=10 - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - cmd='(ls this_file_doesnt_exist) 2>&1' - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - rc='' - - output=' - {error_msg} - ' - - Output saved in 'tmp.system_output.txt' - - Command saved in 'tmp.system_cmd.sh' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test8(self) -> None: - """ - Check that an assert error is raised when `tee` is passed without a log - file. - """ - with self.assertRaises(AssertionError) as cm: - _ = hsystem.system("ls this_should_fail", tee=True) - actual = str(cm.exception) - expected = r""" - ################################################################################ - * Failed assertion * - 'True' implies 'False' - ################################################################################ - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test9(self) -> None: - """ - Check that the failing command fails and logs are stored in the log - file. - - - `allow_errors = False` - - `tee = True` - - Log file path is passed - """ - log_dir = self.get_scratch_space() - log_file_path = os.path.join(log_dir, "tee_log") - with self.assertRaises(RuntimeError) as cm: - _ = hsystem.system( - "ls this_should_fail", tee=True, output_file=log_file_path - ) - actual = str(cm.exception) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # Normalize rc value (differs across systems). - actual = re.sub(r"rc='\d+'", "rc=''", actual) - # Check log output contains the OS-specific error message. - actual = hio.from_file(log_file_path) - error_msg = _get_ls_error_message("this_should_fail") - expected = error_msg + "\n" - self.assert_equal(actual, expected) - - def test10(self) -> None: - """ - Check that the failing command passes and logs are stored in the log - file. - - - `allow_errors = True` - - `tee = True` - - Log file path is passed - """ - log_dir = self.get_scratch_space() - log_file_path = os.path.join(log_dir, "tee_log") - rc = hsystem.system( - "ls this_should_fail", - tee=True, - abort_on_error=False, - output_file=log_file_path, - ) - self.assertNotEqual(rc, 0) - # Check log output. - actual = hio.from_file(log_file_path) - # Use OS-specific expected error message. - error_msg = _get_ls_error_message("this_should_fail") - expected = error_msg + "\n" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# - - -# ############################################################################# -# Test_system2 -# ############################################################################# - - -class Test_system2(hunitest.TestCase): - def test_get_user_name(self) -> None: - actual = hsystem.get_user_name() - _LOG.debug("actual=%s", actual) - # - expected = hsystem.system_to_string("whoami")[1] - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - # - expected = hsystem.system_to_one_line("whoami")[1] - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - - def test_get_server_name(self) -> None: - actual = hsystem.get_server_name() - _LOG.debug("actual=%s", actual) - # - expected = hsystem.system_to_string("uname -n")[1] - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - - def test_get_os_name(self) -> None: - actual = hsystem.get_os_name() - _LOG.debug("actual=%s", actual) - # - expected = hsystem.system_to_string("uname -s")[1] - _LOG.debug("expected=%s", expected) - self.assertEqual(actual, expected) - - -# ############################################################################# - - -# ############################################################################# -# Test_compute_file_signature1 -# ############################################################################# - - -class Test_compute_file_signature1(hunitest.TestCase): - def test1(self) -> None: - """ - Compute the signature of a file using 1 enclosing dir. - """ - file_name = ( - "/app/amp/core/test/TestCheckSameConfigs." - + "test_check_same_configs_error/output/test.txt" - ) - dir_depth = 1 - actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) - expected = ["output", "test.txt"] - self.assert_equal(str(actual), str(expected)) - - def test2(self) -> None: - """ - Compute the signature of a file using 2 enclosing dirs. - """ - file_name = ( - "/app/amp/core/test/TestCheckSameConfigs." - + "test_check_same_configs_error/output/test.txt" - ) - dir_depth = 2 - actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) - expected = [ - "TestCheckSameConfigs.test_check_same_configs_error", - "output", - "test.txt", - ] - self.assert_equal(str(actual), str(expected)) - - def test3(self) -> None: - """ - Compute the signature of a file using 4 enclosing dirs. - """ - file_name = "/app/amp/core/test/TestApplyAdfTest.test1/output/test.txt" - dir_depth = 4 - actual = hsystem._compute_file_signature(file_name, dir_depth=dir_depth) - expected = [ - "core", - "test", - "TestApplyAdfTest.test1", - "output", - "test.txt", - ] - self.assert_equal(str(actual), str(expected)) - - -# ############################################################################# - - -# ############################################################################# -# Test_find_file_with_dir1 -# ############################################################################# - - -class Test_find_file_with_dir1(hunitest.TestCase): - def test1(self) -> None: - """ - Check whether we can find this file using one enclosing dir. - """ - # Use this file. - file_name = "helpers/test/test_hsystem.py" - dir_depth = 1 - actual = hsystem.find_file_with_dir(file_name, dir_depth=dir_depth) - expected = r"""['helpers/test/test_hsystem.py']""" - self.assert_equal(str(actual), str(expected), purify_text=True) - - def _helper(self, dir_depth: int, mode: str) -> List[str]: - """ - Test helper for find_file_with_dir. - - :param dir_depth: Number of directory levels to use for matching - :param mode: Search mode for matching - :return: List of matching files - """ - # Create a fake golden outcome to be used in this test. - golden_content = "hello world" - self.check_string(golden_content) - # E.g., helpers/test/test_hsystem.py::Test_find_file_with_dir1::test2/test.txt - file_name = os.path.join(self.get_output_dir(), "test.txt") - _LOG.debug("file_name=%s", file_name) - actual = hsystem.find_file_with_dir( - file_name, dir_depth=dir_depth, mode=mode - ) - _LOG.debug("Found %d matching files", len(actual)) - return actual - - def test2(self) -> None: - """ - Check whether we can find a test golden output using different number - of enclosing dirs. - - With only 1 enclosing dir, we can't find it. - """ - # Use only one dir which is not enough to identify the file. - # E.g., .../test/TestSqlWriterBackend1.test_insert_tick_data1/output/test.txt - dir_depth = 1 - mode = "return_all_results" - actual = self._helper(dir_depth, mode) - # For sure there are more than 100 tests. - self.assertGreater(len(actual), 100) - - def test3(self) -> None: - """ - Like `test2`, but using 2 levels for sure we are going to identify the - file. - """ - dir_depth = 2 - mode = "return_all_results" - actual = self._helper(dir_depth, mode) - _LOG.debug("Found %d matching files", len(actual)) - # There should be a single match. - expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test3/output/test.txt']""" - self.assert_equal(str(actual), str(expected), purify_text=True) - self.assertEqual(len(actual), 1) - - def test4(self) -> None: - """ - Like `test2`, but using 2 levels for sure we are going to identify the - file and asserting in case we don't find a single result. - """ - dir_depth = 2 - mode = "assert_unless_one_result" - actual = self._helper(dir_depth, mode) - _LOG.debug("Found %d matching files", len(actual)) - # There should be a single match. - expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test4/output/test.txt']""" - self.assert_equal(str(actual), str(expected), purify_text=True) - self.assertEqual(len(actual), 1) - - def test5(self) -> None: - """ - Like `test2`, using more level than 2, again, we should have a single - result. - """ - dir_depth = 3 - mode = "assert_unless_one_result" - actual = self._helper(dir_depth, mode) - _LOG.debug("Found %d matching files", len(actual)) - expected = r"""['helpers/test/outcomes/Test_find_file_with_dir1.test5/output/test.txt']""" - self.assert_equal(str(actual), str(expected), purify_text=True) - self.assertEqual(len(actual), 1) - - -# ############################################################################# - - -# ############################################################################# -# Test_Linux_commands1 -# ############################################################################# - - -class Test_Linux_commands1(hunitest.TestCase): - def test_du1(self) -> None: - hsystem.du(".") - - -# ############################################################################# - - -# ############################################################################# -# Test_has_timestamp1 -# ############################################################################# - - -class Test_has_timestamp1(hunitest.TestCase): - def test_has_not_timestamp1(self) -> None: - """ - No timestamp. - """ - file_name = "patch.amp.8c5a2da9.tgz" - actual = hsystem.has_timestamp(file_name) - expected = False - self.assertEqual(actual, expected) - - def test_has_timestamp1(self) -> None: - """ - Valid timestamp. - """ - file_name = "patch.amp.8c5a2da9.20210725_225857.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - def test_has_timestamp2(self) -> None: - """ - Valid timestamp. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725-22_58_57.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - def test_has_timestamp3(self) -> None: - """ - Valid timestamp. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725225857.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - def test_has_timestamp4(self) -> None: - """ - Valid timestamp. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_22_58_57.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - def test_has_timestamp5(self) -> None: - """ - Valid timestamp. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725225857.tgz" - actual = hsystem.has_timestamp(file_name) - expected = True - self.assertEqual(actual, expected) - - -# ############################################################################# -# Test_append_timestamp_tag1 -# ############################################################################# - - -class Test_append_timestamp_tag1(hunitest.TestCase): - def test_no_timestamp1(self) -> None: - """ - Invalid timestamp, with no tag. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.tgz" - tag = "" - actual = hsystem.append_timestamp_tag(file_name, tag) - # /foo/bar/patch.amp.8c5a2da9.20210726-15_11_25.tgz - expected = r"/foo/bar/patch.amp.8c5a2da9.\S+.tgz" - self.assertRegex(actual, expected) - - def test_no_timestamp2(self) -> None: - """ - Invalid timestamp, with no tag. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.tgz" - tag = "hello" - actual = hsystem.append_timestamp_tag(file_name, tag) - # /foo/bar/patch.amp.8c5a2da9.20210726-15_11_25.hello.tgz - expected = r"/foo/bar/patch.amp.8c5a2da9.\S+.hello.tgz" - self.assertRegex(actual, expected) - - def test1(self) -> None: - """ - Valid timestamp, with no tag. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" - tag = "" - actual = hsystem.append_timestamp_tag(file_name, tag) - # /foo/bar/patch.amp.8c5a2da9.20210725_225857.20210726-15_11_25.tgz - expected = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - Valid timestamp, with a tag. - """ - file_name = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.tgz" - tag = "hello" - actual = hsystem.append_timestamp_tag(file_name, tag) - expected = "/foo/bar/patch.amp.8c5a2da9.20210725_225857.hello.tgz" - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py deleted file mode 100644 index 385de303a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htable.py +++ /dev/null @@ -1,159 +0,0 @@ -import logging - -import helpers.hprint as hprint -import helpers.htable as htable -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestTable1 -# ############################################################################# - - -class TestTable1(hunitest.TestCase): - # ######################################################################### - - @staticmethod - def _get_table() -> htable.Table: - txt = """completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests""" - cols = ["status", "outcome", "descr", "workflow"] - # table = [line for line in csv.reader(txt.split("\n"), delimiter=' ')] - # _LOG.debug(hprint.to_str("table")) - # _LOG.debug("size=%s", str(htable.size(table))) - table = htable.Table.from_text(cols, txt, delimiter=" ") - return table - - def test_from_text1(self) -> None: - table = self._get_table() - self.assertIsInstance(table, htable.Table) - _LOG.debug(hprint.to_str("table")) - - def test_from_text_invalid1(self) -> None: - txt = """completed failure Lint Run_linter -completed success Lint -completed success Lint Slow_tests""" - cols = ["status", "outcome", "descr", "workflow"] - with self.assertRaises(AssertionError) as cm: - htable.Table.from_text(cols, txt, delimiter=" ") - actual = str(cm.exception) - expected = """ - * Failed assertion * - '3' - == - '4' - Invalid row='['completed', 'success', 'Lint']' for cols='['status', 'outcome', 'descr', 'workflow']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_from_text_invalid2(self) -> None: - txt = """completed failure Lint Run_linter - completed success Lint Fast_tess - completed success Lint Slow_tests""" - cols = ["status", "outcome", "descr", "workflow", "EXTRA"] - with self.assertRaises(AssertionError) as cm: - htable.Table.from_text(cols, txt, delimiter=" ") - actual = str(cm.exception) - expected = """ - * Failed assertion * - '4' - == - '5' - Invalid row='['completed', 'failure', 'Lint', 'Run_linter']' for cols='['status', 'outcome', 'descr', 'workflow', 'EXTRA']' - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - # ######################################################################### - - def test_repr1(self) -> None: - table = self._get_table() - actual = repr(table) - expected = r""" -cols=['status', 'outcome', 'descr', 'workflow'] -table= -['completed', 'failure', 'Lint', 'Run_linter'] -['completed', 'success', 'Lint', 'Fast_tests'] -['completed', 'success', 'Lint', 'Slow_tests'] -size=(3, 4) -""" - expected = expected.rstrip().lstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - - def test_str1(self) -> None: - table = self._get_table() - actual = str(table) - expected = r""" -status | outcome | descr | workflow | ---------- | ------- | ----- | ---------- | -completed | failure | Lint | Run_linter | -completed | success | Lint | Fast_tests | -completed | success | Lint | Slow_tests | -""" - expected = expected.rstrip().lstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - - # ######################################################################### - - def test_filter_table1(self) -> None: - """ - Filter resulting in a single matching row. - """ - table = self._get_table() - # - table_filter = table.filter_rows("outcome", "failure") - expected = r""" -cols=['status', 'outcome', 'descr', 'workflow'] -table= -['completed', 'failure', 'Lint', 'Run_linter'] -size=(1, 4) -""" - actual = repr(table_filter) - expected = expected.rstrip().lstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - - def test_filter_table2(self) -> None: - """ - Filter resulting in no matches. - """ - table = self._get_table() - # - table_filter = table.filter_rows("status", "in progress") - expected = r""" -cols=['status', 'outcome', 'descr', 'workflow'] -table= - -size=(0, 4) -""" - actual = repr(table_filter) - expected = expected.rstrip().lstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - - def test_filter_table3(self) -> None: - """ - Filter with a column constant using the constant value. - """ - table = self._get_table() - # - table_filter = table.filter_rows("descr", "Lint") - actual = repr(table_filter) - expected = repr(table) - self.assert_equal(actual, expected, fuzzy_match=False) - - # ######################################################################### - - def test_unique1(self) -> None: - table = self._get_table() - # - actual = table.unique("descr") - expected = ["Lint"] - self.assert_equal(str(actual), str(expected), fuzzy_match=False) - - def test_unique2(self) -> None: - table = self._get_table() - # - actual = table.unique("workflow") - expected = ["Fast_tests", "Run_linter", "Slow_tests"] - self.assert_equal(str(actual), str(expected), fuzzy_match=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py deleted file mode 100644 index fa2059b0b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htext_protect.py +++ /dev/null @@ -1,578 +0,0 @@ -import logging - -import helpers.hprint as hprint -import helpers.htext_protect as htexprot -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test__extract_protected_content -# ############################################################################# - - -class Test__extract_protected_content(hunitest.TestCase): - """ - Test the extract_protected_content function. - """ - - def helper( - self, - txt: str, - file_type: str, - expected_txt: str, - expected_map_size: int, - ) -> None: - """ - Test helper for extract_protected_content. - - :param txt: Input text to process - :param file_type: File type ('md', 'txt', or 'tex') - :param expected_txt: Expected output text with placeholders - :param expected_map_size: Expected number of protected items - """ - # Prepare inputs. - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Run test. - actual_lines, protected_map = htexprot.extract_protected_content( - lines, file_type - ) - # Check outputs. - actual = "\n".join(actual_lines) - expected = hprint.dedent( - expected_txt, remove_lead_trail_empty_lines_=True - ) - self.assert_equal(actual, expected) - self.assertEqual(len(protected_map), expected_map_size) - - def test1(self) -> None: - """ - Test extracting single fenced block with content. - """ - # Prepare inputs. - txt = """ - Some text here. - ```python - def foo(): - return 42 - ``` - More text. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Some text here. - ```python - <<>> - ``` - More text. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test2(self) -> None: - """ - Test extracting multiple fenced blocks. - """ - # Prepare inputs. - txt = """ - Text. - ```python - code1 - ``` - Middle. - ```javascript - code2 - ``` - End. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text. - ```python - <<>> - ``` - Middle. - ```javascript - <<>> - ``` - End. - """ - expected_map_size = 2 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test3(self) -> None: - """ - Test extracting empty fenced block. - """ - # Prepare inputs. - txt = """ - Text before. - ```python - ``` - Text after. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text before. - ```python - <<>> - ``` - Text after. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test4(self) -> None: - """ - Test extracting fenced blocks with different languages. - """ - # Prepare inputs. - txt = """ - ```python - python_code - ``` - ```javascript - js_code - ``` - ```bash - bash_code - ``` - """ - file_type = "md" - # Prepare outputs. - expected = """ - ```python - <<>> - ``` - ```javascript - <<>> - ``` - ```bash - <<>> - ``` - """ - expected_map_size = 3 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test5(self) -> None: - """ - Test extracting HTML single-line comment. - """ - # Prepare inputs. - txt = """ - Text before. - - Text after. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text before. - <<>> - Text after. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test6(self) -> None: - """ - Test extracting HTML multi-line comment. - """ - # Prepare inputs. - txt = """ - Text before. - - Text after. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text before. - <<>> - Text after. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test7(self) -> None: - """ - Test extracting LaTeX comment. - """ - # Prepare inputs. - txt = """ - Some LaTeX text. - % This is a LaTeX comment - More text. - """ - file_type = "tex" - # Prepare outputs. - expected = """ - Some LaTeX text. - <<>> - More text. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test8(self) -> None: - """ - Test extracting math block. - """ - # Prepare inputs. - txt = """ - Text before. - $$ - E = mc^2 - $$ - Text after. - """ - file_type = "md" - # Prepare outputs. - expected = """ - Text before. - $$ - <<>> - $$ - Text after. - """ - expected_map_size = 1 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test9(self) -> None: - """ - Test fenced block not extracted for tex files. - """ - # Prepare inputs. - txt = """ - LaTeX text. - ``` - This should not be extracted for tex files - ``` - More text. - """ - file_type = "tex" - # Prepare outputs. - expected = """ - LaTeX text. - ``` - This should not be extracted for tex files - ``` - More text. - """ - expected_map_size = 0 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - def test10(self) -> None: - """ - Test mixed content (fenced blocks + comments + normal text). - """ - # Prepare inputs. - txt = """ - # Title - Some text. - ```python - code here - ``` - - $$ - math here - $$ - End. - """ - file_type = "md" - # Prepare outputs. - expected = """ - # Title - Some text. - ```python - <<>> - ``` - <<>> - $$ - <<>> - $$ - End. - """ - expected_map_size = 3 - # Run test. - self.helper(txt, file_type, expected, expected_map_size) - - -# ############################################################################# -# Test__restore_protected_content -# ############################################################################# - - -class Test__restore_protected_content(hunitest.TestCase): - """ - Test the restore_protected_content function. - """ - - def helper( - self, - txt: str, - protected_map: dict, - expected_txt: str, - ) -> None: - """ - Test helper for restore_protected_content. - - :param txt: Input text with placeholders - :param protected_map: Mapping of placeholders to original content - :param expected_txt: Expected output with restored content - """ - # Prepare inputs. - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - # Run test. - actual_lines = htexprot.restore_protected_content(lines, protected_map) - # Check outputs. - actual = "\n".join(actual_lines) - expected = hprint.dedent( - expected_txt, remove_lead_trail_empty_lines_=True - ) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test restoring single placeholder. - """ - # Prepare inputs. - txt = """ - Text before. - ```python - <<>> - ``` - Text after. - """ - protected_map = { - "<<>>": "def foo():\n return 42" - } - # Prepare outputs. - expected = """ - Text before. - ```python - def foo(): - return 42 - ``` - Text after. - """ - # Run test. - self.helper(txt, protected_map, expected) - - def test2(self) -> None: - """ - Test restoring multiple placeholders. - """ - # Prepare inputs. - txt = """ - ```python - <<>> - ``` - <<>> - ``` - <<>> - ``` - """ - protected_map = { - "<<>>": "code1", - "<<>>": "", - "<<>>": "code2", - } - # Prepare outputs. - expected = """ - ```python - code1 - ``` - - ``` - code2 - ``` - """ - # Run test. - self.helper(txt, protected_map, expected) - - def test3(self) -> None: - """ - Test restoring multi-line content from single placeholder. - """ - # Prepare inputs. - txt = """ - Text. - <<>> - More text. - """ - protected_map = { - "<<>>": "" - } - # Prepare outputs. - expected = """ - Text. - - More text. - """ - # Run test. - self.helper(txt, protected_map, expected) - - def test4(self) -> None: - """ - Test with empty map (no-op). - """ - # Prepare inputs. - txt = """ - Text line 1. - Text line 2. - Text line 3. - """ - protected_map = {} - # Prepare outputs. - expected = """ - Text line 1. - Text line 2. - Text line 3. - """ - # Run test. - self.helper(txt, protected_map, expected) - - def test5(self) -> None: - """ - Test restoring empty content. - """ - # Prepare inputs. - txt = """ - Before. - ``` - <<>> - ``` - After. - """ - protected_map = {"<<>>": ""} - # Prepare outputs. - expected = """ - Before. - ``` - - ``` - After. - """ - # Run test. - self.helper(txt, protected_map, expected) - - -# ############################################################################# -# Test_extract_restore_roundtrip -# ############################################################################# - - -class Test_extract_restore_roundtrip(hunitest.TestCase): - """ - Test that extract followed by restore is identity operation. - """ - - def helper(self, txt: str, file_type: str) -> None: - """ - Test helper for roundtrip (extract then restore). - - :param txt: Input text - :param file_type: File type ('md', 'txt', or 'tex') - """ - # Prepare inputs. - lines = txt.split("\n") - lines = hprint.dedent(lines, remove_lead_trail_empty_lines_=True) - original = "\n".join(lines) - # Run test. - extracted_lines, protected_map = htexprot.extract_protected_content( - lines, file_type - ) - restored_lines = htexprot.restore_protected_content( - extracted_lines, protected_map - ) - # Check outputs. - actual = "\n".join(restored_lines) - self.assert_equal(actual, original) - - def test1(self) -> None: - """ - Test roundtrip with fenced blocks. - """ - # Prepare inputs. - txt = """ - # Title - Some text. - ```python - def foo(): - return 42 - ``` - More text. - """ - file_type = "md" - # Run test. - self.helper(txt, file_type) - - def test2(self) -> None: - """ - Test roundtrip with mixed content. - """ - # Prepare inputs. - txt = """ - Text. - ```python - code - ``` - - $$ - E = mc^2 - $$ - End. - """ - file_type = "md" - # Run test. - self.helper(txt, file_type) - - def test3(self) -> None: - """ - Test roundtrip with LaTeX comments. - """ - # Prepare inputs. - txt = """ - LaTeX text. - % Comment 1 - More text. - % Comment 2 - End. - """ - file_type = "tex" - # Run test. - self.helper(txt, file_type) - - def test4(self) -> None: - """ - Test roundtrip with no protected content. - """ - # Prepare inputs. - txt = """ - Just regular text. - No special content here. - Just plain paragraphs. - """ - file_type = "md" - # Run test. - self.helper(txt, file_type) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py deleted file mode 100644 index ff57a87c0..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htimer.py +++ /dev/null @@ -1,24 +0,0 @@ -import logging -import time - -import helpers.htimer as htimer -import helpers.hunit_test as hunitest - - -# ############################################################################# -# TestTimedScope -# ############################################################################# - - -class TestTimedScope(hunitest.TestCase): - def test_1(self) -> None: - """ - Test that elapsed time is correctly computed. - """ - # Run the function to test. - with htimer.TimedScope(logging.INFO, "Test") as ts: - time.sleep(1) - # Round actual time up to 1 decimal and compare it with expected. - actual_rounded_time = round(ts.elapsed_time, 1) - expected_rounded_time = 1.0 - self.assertEqual(actual_rounded_time, expected_rounded_time) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py deleted file mode 100644 index 808a2221e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_htraceback.py +++ /dev/null @@ -1,474 +0,0 @@ -import logging -from typing import List - -import helpers.hdbg as hdbg -import helpers.hprint as hprint -import helpers.htraceback as htraceb -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_Traceback1 -# ############################################################################# - - -class Test_Traceback1(hunitest.TestCase): - def test_parse0(self) -> None: - txt = """ - - TEST - Traceback - TEST - Traceback (most recent call last): - File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": - NameError: name 'repo_short_name' is not defined - TEST TEST TEST - """ - txt = hprint.dedent(txt) - _LOG.debug("txt=\n%s", txt) - purify_from_client = False - # Run the function under test. - act_cfile, act_traceback = htraceb.parse_traceback( - txt, purify_from_client=purify_from_client - ) - # Check. - exp_traceback = """Traceback (most recent call last): - File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": -NameError: name 'repo_short_name' is not defined - TEST TEST TEST""" - self.assertEqual(act_traceback, exp_traceback) - - # pylint: disable=line-too-long - # TODO(gp): Add test and fix for the following traceback: - - # Bug1: - # Traceback (most recent call last): - # File "/Users/saggese/src/venv/amp.client_venv/bin/invoke", line 8, in - # sys.exit(program.run()) - # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 373, in run - # self.parse_collection() - # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 465, in parse_collection - # self.load_collection() - # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/program.py", line 696, in load_collection - # module, parent = loader.load(coll_name) - # File "/Users/saggese/src/venv/amp.client_venv/lib/python3.9/site-packages/invoke/loader.py", line 76, in load - # module = imp.load_module(name, fd, path, desc) - # File "/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/imp.py", line 234, in load_module - # return load_source(name, filename, file) - # File "/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/imp.py", line 171, in load_source - # module = _load(spec) - # File "", line 711, in _load - # File "", line 680, in _load_unlocked - # File "", line 855, in exec_module - # File "", line 228, in _call_with_frames_removed - # File "/Users/saggese/src/lem1/amp/tasks.py", line 8, in - # from helpers.lib_tasks import set_default_params # This is not an invoke target. - # File "/Users/saggese/src/lem1/amp/helpers/lib_tasks.py", line 23, in - # import helpers.hgit as hgit - # File "/Users/saggese/src/lem1/amp/helpers/git.py", line 16, in - # import helpers.hsystem as hsystem - # File "/Users/saggese/src/lem1/amp/helpers/system_interaction.py", line 529 - # signature2 = _compute_file_signature(file_name, dir_depth) - # ^ - # SyntaxError: invalid syntax - # Traceback (most recent call last): - # File "/Users/saggese/src/lem1/amp/dev_scripts/tg.py", line 21, in - # import helpers.hsystem as hsystem - # File "/Users/saggese/src/lem1/amp/helpers/system_interaction.py", line 529 - # signature2 = _compute_file_signature(file_name, dir_depth) - # ^ - # SyntaxError: invalid syntax - - # Bug2: - # Traceback (most recent call last): - # File "/app/amp/dataflow/pipelines/real_time/test/test_dataflow_amp_real_time_pipeline.py", line 46, in test1 - # ) = mdmdinex.get_ReplayedTimeMarketData_example2( - # TypeError: get_ReplayedTimeMarketData_example2() got an unexpected keyword argument 'df' - # - # 13:34:45 INFO traceback_to_cfile : _main : 76 : in_file_name=log.txt - # 13:34:45 INFO parser : read_file : 304 : Reading from 'log.txt' - # 13:34:45 ERROR traceback_to_cfile : _main : 87 : Can't find traceback in the file - - # Bug3: - # =================================== FAILURES =================================== - # _________________________ TestGetDataForInterval.test1 _________________________ - # Traceback (most recent call last): - # File "/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3361, in get_loc - # return self._engine.get_loc(casted_key) - # File "pandas/_libs/index.pyx", line 76, in pandas._libs.index.IndexEngine.get_loc - # File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc - # File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item - # File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item - # KeyError: 'end_ts' - # - # The above exception was the direct cause of the following exception: - # - # Traceback (most recent call last): - # File "/app/amp/market_data/test/test_market_data_client.py", line 46, in test1 - # data = market_data_client.get_data_for_interval( - # File "/app/amp/market_data/market_data.py", line 212, in get_data_for_interval - # df = self._get_data( - # File "/app/amp/market_data/market_data_client.py", line 93, in _get_data - # market_data["start_ts"] = market_data["end_ts"] - pd.Timedelta( - # File "/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 3458, in __getitem__ - # indexer = self.columns.get_loc(key) - # File "/venv/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3363, in get_loc - # raise KeyError(key) from err - # KeyError: 'end_ts' - - # Bug4: - # dataflow/model/test/test_experiment_utils.py::Test_get_configs_from_command_line1::test1 (0.01 s) FAILED [100%] - # - # =================================== FAILURES =================================== - # __________________ Test_get_configs_from_command_line1.test1 ___________________ - # Traceback (most recent call last): - # File "/app/dataflow/model/test/test_experiment_utils.py", line 35, in test1 - # configs = dtfmoexuti.get_configs_from_command_line(args) - # File "/app/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - # configs = cconfig.get_configs_from_builder(config_builder) - # File "/app/config_root/config/builder.py", line 48, in get_configs_from_builder - # imp = importlib.import_module(import_) - # File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - # return _bootstrap._gcd_import(name[level:], package, level) - # File "", line 1014, in _gcd_import - # File "", line 991, in _find_and_load - # File "", line 961, in _find_and_load_unlocked - # File "", line 219, in _call_with_frames_removed - # File "", line 1014, in _gcd_import - # File "", line 991, in _find_and_load - # File "", line 961, in _find_and_load_unlocked - # File "", line 219, in _call_with_frames_removed - # File "", line 1014, in _gcd_import - # File "", line 991, in _find_and_load - # File "", line 973, in _find_and_load_unlocked - # ModuleNotFoundError: No module named 'research' - # ============================= slowest 3 durations ============================== - - # pylint: enable=line-too-long - - def _parse_traceback_helper( - self, - txt: str, - purify_from_client: bool, - exp_cfile: str, - exp_traceback: str, - ) -> None: - hdbg.dassert_isinstance(txt, str) - hdbg.dassert_isinstance(exp_cfile, str) - hdbg.dassert_isinstance(exp_traceback, str) - txt = hprint.dedent(txt) - _LOG.debug("txt=\n%s", txt) - # Run the function under test. - act_cfile, act_traceback = htraceb.parse_traceback( - txt, purify_from_client=purify_from_client - ) - _LOG.debug("act_cfile=\n%s", act_cfile) - _LOG.debug("act_traceback=\n%s", act_traceback) - # Compare cfile. - act_cfile = htraceb.cfile_to_str(act_cfile) - exp_cfile = hprint.dedent(exp_cfile) - _LOG.debug(hprint.to_str("exp_cfile act_cfile")) - self.assert_equal( - act_cfile, exp_cfile, fuzzy_match=True, purify_text=True - ) - # Compare traceback. - # Handle `None`. - act_traceback = str(act_traceback) - exp_traceback = hprint.dedent(exp_traceback) - _LOG.debug(hprint.to_str("exp_traceback act_traceback")) - self.assert_equal( - act_traceback, exp_traceback, fuzzy_match=True, purify_text=True - ) - - def test_parse1(self) -> None: - """ - Parse traceback with all files from Docker that actually exist in the - current repo. - """ - txt = """ - - TEST - Traceback - TEST - Traceback (most recent call last): - File "/app/amp/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "/app/amp/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "/app/amp/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": - NameError: name 'repo_short_name' is not defined - TEST TEST TEST - """ - purify_from_client = False - # pylint: disable=line-too-long - exp_cfile = [ - ( - "$GIT_ROOT/helpers/test/test_lib_tasks.py", - 27, - "test_get_gh_issue_title2:actual = ltasks._get_gh_issue_title(issue_id, repo)", - ), - ( - "$GIT_ROOT/helpers/lib_tasks.py", - 1265, - "_get_gh_issue_title:task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name)", - ), - ( - "$GIT_ROOT/helpers/git.py", - 397, - 'get_task_prefix_from_repo_short_name:if repo_short_name == "amp":', - ), - ] - exp_cfile = htraceb.cfile_to_str(exp_cfile) - # pylint: enable=line-too-long - exp_traceback = """ - Traceback (most recent call last): - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 27, in test_get_gh_issue_title2 - actual = ltasks._get_gh_issue_title(issue_id, repo) - File "$GIT_ROOT/helpers/lib_tasks.py", line 1265, in _get_gh_issue_title - task_prefix = hgit.get_task_prefix_from_repo_short_name(repo_short_name) - File "$GIT_ROOT/helpers/git.py", line 397, in get_task_prefix_from_repo_short_name - if repo_short_name == "amp": - NameError: name 'repo_short_name' is not defined - TEST TEST TEST - """ - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse_empty_traceback1(self) -> None: - """ - Parse an empty traceback file. - """ - txt = """ - - TEST - Traceback - TEST TEST TEST - """ - purify_from_client = True - exp_cfile: List[htraceb.CfileRow] = [] - exp_cfile = htraceb.cfile_to_str(exp_cfile) - exp_traceback = "None" - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse2(self) -> None: - """ - Parse a traceback file with both files from Docker and local files. - """ - # Use references to this file so that we are independent of the file - # layout. - # pylint: disable=line-too-long - txt = """ - Traceback (most recent call last): - File "./helpers/test/test_htraceback.py", line 146, in - _main(_parse()) - File "./helpers/test/test_htraceback.py", line 105, in _main - configs = cdtfut.get_configs_from_command_line(args) - File "/app/amp/./helpers/test/test_htraceback.py", line 228, in get_configs_from_command_line - "config_builder": args.config_builder, - """ - purify_from_client = True - exp_cfile = """ - helpers/test/test_htraceback.py:146::_main(_parse()) - helpers/test/test_htraceback.py:105:_main:configs = cdtfut.get_configs_from_command_line(args) - helpers/test/test_htraceback.py:228:get_configs_from_command_line:"config_builder": args.config_builder, - """ - exp_traceback = """ - Traceback (most recent call last): - File "./helpers/test/test_htraceback.py", line 146, in - _main(_parse()) - File "./helpers/test/test_htraceback.py", line 105, in _main - configs = cdtfut.get_configs_from_command_line(args) - File "$GIT_ROOT/./helpers/test/test_htraceback.py", line 228, in get_configs_from_command_line - "config_builder": args.config_builder, - """ - # pylint: enable=line-too-long - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse3(self) -> None: - """ - Parse a traceback file with both files from Docker and local files. - """ - # Use references to this file so that we are independent from the file - # layout. - # pylint: disable=line-too-long - txt = """ - collected 6 items - - helpers/test/test_lib_tasks.py::Test_pytest_failed1::test_classes1 (0.02 s) FAILED [ 16%] - - =================================== FAILURES =================================== - ______________________ Test_pytest_failed1.test_classes1 _______________________ - Traceback (most recent call last): - File "/app/amp/helpers/test/test_lib_tasks.py", line 1460, in test_classes1 - self._helper(file_name, target_type, expected) - File "/app/amp/helpers/test/test_lib_tasks.py", line 1440, in _helper - actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, - File "/venv/lib/python3.8/site-packages/invoke/tasks.py", line 127, in __call__ - result = self.body(*args, **kwargs) - File "/app/amp/helpers/lib_tasks.py", line 2140, in pytest_failed - hdbg.dassert(m, "Invalid test='%s'", test) - File "/app/amp/helpers/dbg.py", line 129, in dassert - _dfatal(txt, msg, *args) - File "/app/amp/helpers/dbg.py", line 117, in _dfatal - dfatal(dfatal_txt) - File "/app/amp/helpers/dbg.py", line 63, in dfatal - raise assertion_type(ret) - AssertionError: - * Failed assertion * - cond=None - Invalid test='dev_scripts/testing/test/test_run_tests.py' - """ - # pylint: enable=line-too-long - purify_from_client = False - exp_cfile = """ - $GIT_ROOT/helpers/test/test_lib_tasks.py:1460:test_classes1:self._helper(file_name, target_type, expected) - $GIT_ROOT/helpers/test/test_lib_tasks.py:1440:_helper:actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, - /venv/lib/python3.8/site-packages/invoke/tasks.py:127:__call__:result = self.body(*args, **kwargs) - $GIT_ROOT/helpers/lib_tasks.py:2140:pytest_failed:hdbg.dassert(m, "Invalid test='%s'", test) - $GIT_ROOT/helpers/dbg.py:129:dassert:_dfatal(txt, msg, *args) - $GIT_ROOT/helpers/dbg.py:117:_dfatal:dfatal(dfatal_txt) - $GIT_ROOT/helpers/dbg.py:63:dfatal:raise assertion_type(ret)""" - exp_traceback = r""" - Traceback (most recent call last): - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 1460, in test_classes1 - self._helper(file_name, target_type, expected) - File "$GIT_ROOT/helpers/test/test_lib_tasks.py", line 1440, in _helper - actual = ltasks.pytest_failed(ctx, use_frozen_list=use_frozen_list, - File "/venv/lib/python3.8/site-packages/invoke/tasks.py", line 127, in __call__ - result = self.body(*args, **kwargs) - File "$GIT_ROOT/helpers/lib_tasks.py", line 2140, in pytest_failed - hdbg.dassert(m, "Invalid test='%s'", test) - File "$GIT_ROOT/helpers/dbg.py", line 129, in dassert - _dfatal(txt, msg, *args) - File "$GIT_ROOT/helpers/dbg.py", line 117, in _dfatal - dfatal(dfatal_txt) - File "$GIT_ROOT/helpers/dbg.py", line 63, in dfatal - raise assertion_type(ret) - AssertionError: - * Failed assertion * - cond=None - Invalid test='dev_scripts/testing/test/test_run_tests.py' - """ - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse4(self) -> None: - """ - Parse a traceback file with both files from Docker and local files. - """ - # pylint: disable=line-too-long - txt = """ - =================================== FAILURES =================================== - ____________ TestEgSingleInstrumentDataReader2.test_true_real_time1 ____________ - Traceback (most recent call last): - File "/app/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 182, in test_true_real_time1 - self._execute_node(node) - File "/app/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 238, in _execute_node - dict_ = node.fit() - File "/app/amp/core/dataflow/nodes/sources.py", line 385, in fit - self.df = self._get_data_until_current_time() - File "/app/amp/core/dataflow/nodes/sources.py", line 429, in _get_data_until_current_time - df = self._get_data() - File "/app/amp/core/dataflow/nodes/sources.py", line 574, in _get_data - hdbg.dassert_lte(df.index.max(), current_time) - File "/app/amp/helpers/dbg.py", line 172, in dassert_lte - cond = val1 <= val2 - TypeError: '<=' not supported between instances of 'float' and 'Timestamp' - ============================= slowest 3 durations ============================== - """ - purify_from_client = False - exp_cfile = r""" - $GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:182:test_true_real_time1:self._execute_node(node) - $GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py:238:_execute_node:dict_ = node.fit() - $GIT_ROOT/core/dataflow/nodes/sources.py:385:fit:self.df = self._get_data_until_current_time() - $GIT_ROOT/core/dataflow/nodes/sources.py:429:_get_data_until_current_time:df = self._get_data() - $GIT_ROOT/core/dataflow/nodes/sources.py:574:_get_data:hdbg.dassert_lte(df.index.max(), current_time) - $GIT_ROOT/helpers/dbg.py:172:dassert_lte:cond = val1 <= val2/TypeError: '<=' not supported between instances of 'float' and 'Timestamp'""" - exp_traceback = r""" - Traceback (most recent call last): - File "$GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 182, in test_true_real_time1 - self._execute_node(node) - File "$GIT_ROOT/core_lime/dataflow/nodes/test/test_core_lime_dataflow_nodes.py", line 238, in _execute_node - dict_ = node.fit() - File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 385, in fit - self.df = self._get_data_until_current_time() - File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 429, in _get_data_until_current_time - df = self._get_data() - File "$GIT_ROOT/core/dataflow/nodes/sources.py", line 574, in _get_data - hdbg.dassert_lte(df.index.max(), current_time) - File "$GIT_ROOT/helpers/dbg.py", line 172, in dassert_lte - cond = val1 <= val2 - TypeError: '<=' not supported between instances of 'float' and 'Timestamp'""" - # pylint: enable=line-too-long - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) - - def test_parse5(self) -> None: - """ - Parse a traceback file with both files from Docker and local files. - """ - # pylint: disable=line-too-long - txt = """ - Traceback (most recent call last): - File "/app/dataflow_lm/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 - configs = dtfmoexuti.get_configs_from_command_line(args) - File "/app/amp/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - configs = cconfig.get_configs_from_builder(config_builder) - File "/app/amp/config_root/config/builder.py", line 46, in get_configs_from_builder - imp = importlib.import_module(import_) - File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - return _bootstrap._gcd_import(name[level:], package, level) - File "", line 1014, in _gcd_import - File "", line 991, in _find_and_load - File "", line 973, in _find_and_load_unlocked - ModuleNotFoundError: No module named 'dataflow_lm.pipelines.E8.8Ed_configs' - """ - purify_from_client = False - exp_cfile = """ - $GIT_ROOT/dataflow_lm/pipelines/E8/test/test_E8d_configs.py:37:test1:configs = dtfmoexuti.get_configs_from_command_line(args) - $GIT_ROOT/dataflow/model/experiment_utils.py:195:get_configs_from_command_line:configs = cconfig.get_configs_from_builder(config_builder) - $GIT_ROOT/config_root/config/builder.py:46:get_configs_from_builder:imp = importlib.import_module(import_) - /usr/lib/python3.8/importlib/__init__.py:127:import_module:return _bootstrap._gcd_import(name[level:], package, level) - :1014:_gcd_import: - :991:_find_and_load: - :973:_find_and_load_unlocked: - """ - exp_traceback = """ - Traceback (most recent call last): - File "$GIT_ROOT/dataflow_lm/pipelines/E8/test/test_E8d_configs.py", line 37, in test1 - configs = dtfmoexuti.get_configs_from_command_line(args) - File "$GIT_ROOT/dataflow/model/experiment_utils.py", line 195, in get_configs_from_command_line - configs = cconfig.get_configs_from_builder(config_builder) - File "$GIT_ROOT/config_root/config/builder.py", line 46, in get_configs_from_builder - imp = importlib.import_module(import_) - File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module - return _bootstrap._gcd_import(name[level:], package, level) - File "", line 1014, in _gcd_import - File "", line 991, in _find_and_load - File "", line 973, in _find_and_load_unlocked - ModuleNotFoundError: No module named 'dataflow_lm.pipelines.E8.8Ed_configs' - """ - # pylint: enable=line-too-long - self._parse_traceback_helper( - txt, purify_from_client, exp_cfile, exp_traceback - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py deleted file mode 100644 index a6e1e2ef6..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test.py +++ /dev/null @@ -1,954 +0,0 @@ -""" -Import as: - -import helpers.test.test_unit_test as ttutes -""" - -import logging -import tempfile -from typing import Optional, Tuple - -import pandas as pd -import pytest - -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur - -_LOG = logging.getLogger(__name__) - - -def _git_add(file_name: str) -> None: - # pylint: disable=unreachable - cmd = f"git add -u {file_name}" - _LOG.debug("> %s", cmd) - rc = hsystem.system(cmd, abort_on_error=False) - if rc: - _LOG.warning( - "Can't run '%s': you need to add the file manually", - cmd, - ) - - -def _to_skip_on_update_outcomes() -> bool: - """ - Determine whether to skip on `--update_outcomes`. - - Some tests can't pass with `--update_outcomes`, since they exercise - the logic in `--update_outcomes` itself. - - We can't always use `@pytest.mark.skipif(hunitest.get_update_tests)` - since pytest decides which tests need to be run before the variable - is actually set. - """ - to_skip = False - if hunitest.get_update_tests(): - _LOG.warning( - "Skip this test since it exercises the logic for --update_outcomes" - ) - to_skip = True - return to_skip - - -# ############################################################################# -# TestTestCase1 -# ############################################################################# - - -class TestTestCase1(hunitest.TestCase): - """ - Test free-standing functions in unit_test.py. - """ - - def test_get_input_dir1(self) -> None: - """ - Test hunitest.get_input_dir(). - """ - actual = self.get_input_dir() - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_input_dir1/input" - self.assertEqual(actual, expected) - - def test_get_input_dir2(self) -> None: - use_only_test_class = False - test_class_name = "test_class" - test_method_name = "test_method" - actual = self.get_input_dir( - use_only_test_class=use_only_test_class, - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # - expected = "$GIT_ROOT/helpers/test/outcomes/test_class.test_method/input" - self.assertEqual(actual, expected) - - def test_get_input_dir3(self) -> None: - use_only_test_class = False - test_class_name = None - test_method_name = None - actual = self.get_input_dir( - use_only_test_class=use_only_test_class, - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # - expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_input_dir3/input" - self.assertEqual(actual, expected) - - def test_get_input_dir4(self) -> None: - use_only_test_class = True - test_class_name = None - test_method_name = None - actual = self.get_input_dir( - use_only_test_class=use_only_test_class, - test_class_name=test_class_name, - test_method_name=test_method_name, - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # - expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1/input" - self.assertEqual(actual, expected) - - def test_get_output_dir1(self) -> None: - """ - Test hunitest.get_output_dir(). - """ - actual = self.get_output_dir() - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_output_dir1/output" - self.assertEqual(actual, expected) - - def test_get_scratch_space1(self) -> None: - """ - Test hunitest.get_scratch_space(). - """ - actual = self.get_scratch_space() - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = ( - "$GIT_ROOT/helpers/test/outcomes/TestTestCase1.test_get_scratch_space1" - "/tmp.scratch" - ) - self.assertEqual(actual, expected) - - def test_get_scratch_space2(self) -> None: - test_class_name = "test_class" - test_method_name = "test_method" - actual = self.get_scratch_space( - test_class_name=test_class_name, test_method_name=test_method_name - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = ( - "$GIT_ROOT/helpers/test/outcomes/test_class.test_method/tmp.scratch" - ) - self.assertEqual(actual, expected) - - def test_get_scratch_space3(self) -> None: - test_class_name = "test_class" - test_method_name = "test_method" - use_absolute_path = False - actual = self.get_scratch_space( - test_class_name=test_class_name, - test_method_name=test_method_name, - use_absolute_path=use_absolute_path, - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = "outcomes/test_class.test_method/tmp.scratch" - self.assertEqual(actual, expected) - - def test_get_s3_scratch_dir1(self) -> None: - actual = self.get_s3_scratch_dir() - _LOG.debug("actual=%s", actual) - # It is difficult to test, so we just execute. - - def test_get_s3_scratch_dir2(self) -> None: - test_class_name = "test_class" - test_method_name = "test_method" - actual = self.get_s3_scratch_dir( - test_class_name=test_class_name, test_method_name=test_method_name - ) - _LOG.debug("actual=%s", actual) - # It is difficult to test, so we just execute. - - def test_assert_equal1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_assert_not_equal1(self) -> None: - actual = "hello world" - expected = "hello world w" - tmp_dir = tempfile.mkdtemp() - with self.assertRaises(RuntimeError): - self.assert_equal(actual, expected, dst_dir=tmp_dir) - - def test_assert_not_equal2(self) -> None: - actual = "hello world" - expected = "hello world w" - # Create a dir like `/var/tmp/tmph_kun9xq`. - tmp_dir = tempfile.mkdtemp() - self.assert_equal( - actual, expected, abort_on_error=False, dst_dir=tmp_dir - ) - # Compute the signature from the dir. - actual = hunitest.get_dir_signature( - tmp_dir, include_file_content=True, num_lines=None - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - actual = actual.replace(tmp_dir, "$TMP_DIR") - # pylint: disable=line-too-long - expected = """ - # Dir structure - $TMP_DIR - $TMP_DIR/tmp_diff.sh - # File signatures - len(file_names)=1 - file_names=$TMP_DIR/tmp_diff.sh - # $TMP_DIR/tmp_diff.sh - num_lines=8 - ''' - #!/bin/bash - if [[ $1 == "wrap" ]]; then - cmd='vimdiff -c "windo set wrap"' - else - cmd='vimdiff' - fi; - cmd="$cmd helpers/test/outcomes/TestTestCase1.test_assert_not_equal2/tmp.final.actual.txt helpers/test/outcomes/TestTestCase1.test_assert_not_equal2/tmp.final.expected.txt" - eval $cmd - - ''' - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert_equal_fuzzy_match1(self) -> None: - actual = "hello world" - expected = "hello world " - is_equal = self.assert_equal(actual, expected, fuzzy_match=True) - self.assertTrue(is_equal) - - def test_assert_equal5(self) -> None: - actual = "hello world" - expected = "hello world2" - with self.assertRaises(RuntimeError): - self.assert_equal(actual, expected, fuzzy_match=True) - - def _remove_lines1(self) -> None: - txt = r""" - # ##################################################################### - * Failed assertion * - 'in1' not in '{'in1': 'out1'}' - ## - `in1` already receiving input from node n1 - # ##################################################################### - # ##################################################################### - """ - actual = hunitest._remove_spaces(txt) - expected = r""" - * Failed assertion * - 'in1' not in '{'in1': 'out1'}' - ## - `in1` already receiving input from node n1 - # ##################################################################### - """ - self.assert_equal(actual, expected, fuzzy_match=False) - - -# ############################################################################# -# Test_AssertEqual1 -# ############################################################################# - - -class Test_AssertEqual1(hunitest.TestCase): - def test_equal1(self) -> None: - """ - Matching actual and expected without fuzzy matching. - """ - actual = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - expected = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - test_name = self._get_test_name() - test_dir = self.get_scratch_space() - is_equal = hunitest.assert_equal(actual, expected, test_name, test_dir) - _LOG.debug(hprint.to_str("is_equal")) - self.assertTrue(is_equal) - - def test_equal2(self) -> None: - """ - Matching actual and expected with fuzzy matching. - """ - actual = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - expected = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - test_name = self._get_test_name() - test_dir = self.get_scratch_space() - fuzzy_match = True - is_equal = hunitest.assert_equal( - actual, expected, test_name, test_dir, fuzzy_match=fuzzy_match - ) - _LOG.debug(hprint.to_str("is_equal")) - self.assertTrue(is_equal) - - def test_not_equal1(self) -> None: - """ - Mismatching actual and expected. - """ - actual = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - expected = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -""" - test_name = self._get_test_name() - test_dir = self.get_scratch_space() - fuzzy_match = False - with self.assertRaises(RuntimeError) as cm: - hunitest.assert_equal( - actual, expected, test_name, test_dir, fuzzy_match=fuzzy_match - ) - # Check that the assertion is what expected. - actual = str(cm.exception) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - expected = ''' --------------------------------------------------------------------------------- -ACTUAL vs EXPECTED: Test_AssertEqual1.test_not_equal1 --------------------------------------------------------------------------------- - - ( -completed failure Lint Run_linter | completed failure Lint Run_linter -completed success Lint Fast_tests ( -completed success Lint Slow_tests ( -Diff with: -> ./tmp_diff.sh --------------------------------------------------------------------------------- -ACTUAL VARIABLE: Test_AssertEqual1.test_not_equal1 --------------------------------------------------------------------------------- -expected = r""" -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests -"""''' - if actual != expected: - hio.to_file("actual.txt", actual) - hio.to_file("expected.txt", expected) - self.assert_equal(actual, expected, fuzzy_match=False) - # We don't use self.assert_equal() since this is exactly we are testing, - # so we use a trusted function. - self.assertEqual(actual, expected) - - # For debugging: don't commit code with this test enabled. - @pytest.mark.skip( - reason="This is only used to debug the debugging the infrastructure" - ) - def test_not_equal_debug(self) -> None: - """ - Create a mismatch on purpose to see how the suggested updated to - expected variable looks like. - """ - actual = r"""empty -start - -completed failure Lint Run_linter -completed success Lint Fast_tests -completed success Lint Slow_tests - -end - -""" - expected = "hello" - self.assert_equal(actual, expected, fuzzy_match=False) - - -# ############################################################################# -# TestCheckString1 -# ############################################################################# - - -class TestCheckString1(hunitest.TestCase): - def test_check_string1(self) -> None: - """ - Compare the actual value to a matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - golden_outcome = "hello world" - # - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - # Overwrite the golden file, so that --update_golden doesn't matter. - hio.to_file(file_name, golden_outcome) - try: - # Check. - outcome_updated, file_exists, is_equal = self.check_string(actual) - # Actual match the golden outcome and it wasn't updated. - finally: - # Clean up. - hio.to_file(file_name, golden_outcome) - _git_add(file_name) - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertTrue(is_equal) - - def test_check_string_not_equal1(self) -> None: - """ - Compare the actual value to a mismatching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - golden_outcome = "hello world2" - # - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - # Modify the golden. - hio.to_file(file_name, golden_outcome) - try: - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False - ) - finally: - # Clean up. - hio.to_file(file_name, golden_outcome) - _git_add(file_name) - # Actual doesn't match the golden outcome. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - - def test_check_string_not_equal2(self) -> None: - """ - Compare the actual value to a mismatching golden outcome and udpate it. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - golden_outcome = "hello world2" - # Force updating the golden outcomes. - self.mock_update_tests() - # - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - # Modify the golden. - hio.to_file(file_name, golden_outcome) - try: - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False - ) - new_golden = hio.from_file(file_name) - _git_add(file_name) - finally: - # Clean up. - hio.to_file(file_name, golden_outcome) - _git_add(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertTrue(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - # The golden outcome was updated. - self.assertEqual(new_golden, "hello world") - - def test_check_string_not_equal3(self) -> None: - """ - Like test_check_string_not_equal1() but raising the exception. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - golden_outcome = "hello world2" - # - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - # Modify the golden. - hio.to_file(file_name, golden_outcome) - try: - # Check. - with self.assertRaises(RuntimeError): - self.check_string(actual) - finally: - # Clean up. - hio.to_file(file_name, golden_outcome) - _git_add(file_name) - - def test_check_string_missing1(self) -> None: - """ - When running with --update_outcomes, the golden outcome was missing and - so it was added. - - This tests the code path when action_on_missing_golden="update". - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - # Force updating the golden outcomes. - self.mock_update_tests() - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False - ) - hdbg.dassert_file_exists(file_name) - new_golden = hio.from_file(file_name) - finally: - # Clean up. - hio.delete_file(file_name) - _git_add(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertTrue(outcome_updated) - self.assertFalse(file_exists) - self.assertFalse(is_equal) - # - self.assertEqual(new_golden, "hello world") - - def test_check_string_missing2(self) -> None: - """ - Without running with --update_outcomes, the golden outcome was missing, - action_on_missing_golden="assert", and the unit test framework - asserted. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False, action_on_missing_golden="assert" - ) - hdbg.dassert_file_exists(file_name + ".tmp") - new_golden = hio.from_file(file_name + ".tmp") - finally: - # Clean up. - hio.delete_file(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertFalse(outcome_updated) - self.assertFalse(file_exists) - self.assertFalse(is_equal) - # - self.assertEqual(new_golden, "hello world") - - def test_check_string_missing3(self) -> None: - """ - Without running with --update_outcomes, the golden outcome was missing, - action_on_missing_golden="update", and the unit test framework updates - the golden. - """ - if _to_skip_on_update_outcomes(): - return - actual = "hello world" - tag = "test" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_string( - actual, abort_on_error=False, action_on_missing_golden="update" - ) - hdbg.dassert_file_exists(file_name) - new_golden = hio.from_file(file_name) - finally: - # Clean up. - hio.delete_file(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertTrue(outcome_updated) - self.assertFalse(file_exists) - self.assertFalse(is_equal) - # - self.assertEqual(new_golden, "hello world") - - -# ############################################################################# -# TestCheckDataFrame1 -# ############################################################################# - - -class TestCheckDataFrame1(hunitest.TestCase): - """ - Some of these tests can't pass with `--update_outcomes`, since they - exercise the logic in `--update_outcomes` itself. - """ - - def _check_df_helper( - self, actual: pd.DataFrame, abort_on_error: bool, err_threshold: float - ) -> Tuple[bool, bool, Optional[bool]]: - golden_outcomes = pd.DataFrame( - [[0, 1, 2], [3, 4, 5]], columns="a b c".split() - ) - # - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - # Overwrite the golden file, so that --update_golden doesn't matter. - hio.create_enclosing_dir(file_name, incremental=True) - golden_outcomes.to_csv(file_name) - try: - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, - abort_on_error=abort_on_error, - err_threshold=err_threshold, - ) - finally: - # Clean up. - golden_outcomes.to_csv(file_name) - _git_add(file_name) - return outcome_updated, file_exists, is_equal - - def test_check_df_equal1(self) -> None: - """ - Compare the actual value of a df to a matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = True - err_threshold = 0.0001 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome matches the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertTrue(is_equal) - - def test_check_df_equal2(self) -> None: - """ - Compare the actual value of a df to a matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1.01, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = True - err_threshold = 0.05 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome matches the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertTrue(is_equal) - - def test_check_df_equal3(self) -> None: - """ - Compare the actual value of a df to a matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1.05, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = True - err_threshold = 0.05 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome matches the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertTrue(is_equal) - - def test_check_df_not_equal1(self) -> None: - """ - Compare the actual value of a df to a non-matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1.06, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = False - err_threshold = 0.05 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome doesn't match the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - exp_error_msg = """ - actual= - a b c - 0 0 1.06 2 - 1 3 4.00 5 - expected= - a b c - 0 0 1 2 - 1 3 4 5 - actual_masked= - [[ nan 1.06 nan] - [ nan nan nan]] - expected_masked= - [[nan 1. nan] - [nan nan nan]] - err= - [[ nan 0.06 nan] - [ nan nan nan]] - max_err=0.060 - """ - self.assert_equal(self._error_msg, exp_error_msg, fuzzy_match=True) - - def test_check_df_not_equal2(self) -> None: - """ - Compare the actual value of a df to a not matching golden outcome. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a d c".split()) - abort_on_error = False - err_threshold = 0.05 - outcome_updated, file_exists, is_equal = self._check_df_helper( - actual, abort_on_error, err_threshold - ) - # Actual outcome doesn't match the golden outcome and it wasn't updated. - self.assertFalse(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - - def test_check_df_not_equal3(self) -> None: - """ - Compare the actual value to a mismatching golden outcome and update it. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - golden_outcome = pd.DataFrame( - [[0, 2, 2], [3, 4, 5]], columns="a b c".split() - ) - # Force updating the golden outcomes. - self.mock_update_tests() - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - # Modify the golden. - hio.create_enclosing_dir(file_name, incremental=True) - golden_outcome.to_csv(file_name) - try: - # Check. - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, abort_on_error=False - ) - # - new_golden = pd.read_csv(file_name, index_col=0) - finally: - # Clean up. - hio.to_file(file_name, str(golden_outcome)) - _git_add(file_name) - # Actual doesn't match the golden outcome and it was updated. - self.assertTrue(outcome_updated) - self.assertTrue(file_exists) - self.assertFalse(is_equal) - # Check golden. - self.assert_equal(str(new_golden), str(actual)) - - def test_check_df_not_equal4(self) -> None: - """ - Like `test_check_df_not_equal1()` but raising the exception. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1.06, 2], [3, 4, 5]], columns="a b c".split()) - abort_on_error = True - err_threshold = 0.05 - with self.assertRaises(RuntimeError): - self._check_df_helper(actual, abort_on_error, err_threshold) - - def test_check_df_missing1(self) -> None: - """ - When running with --update_outcomes, the golden outcome was missing and - so it was added. - - This tests the code path when action_on_missing_golden="update". - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - # Force updating the golden outcomes. - self.mock_update_tests() - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - _LOG.debug(hprint.to_str("file_name")) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, abort_on_error=False - ) - hdbg.dassert_file_exists(file_name) - new_golden = pd.read_csv(file_name, index_col=0) - finally: - # Clean up. - hio.delete_file(file_name) - _git_add(file_name) - # Expected outcome doesn't exists and it was updated. - self.assertTrue(outcome_updated) - self.assertFalse(file_exists) - self.assertFalse(is_equal) - # Check golden. - self.assert_equal(str(new_golden), str(actual)) - - def test_check_df_missing2(self) -> None: - """ - Without running with --update_outcomes, the golden outcome was missing, - action_on_missing_golden="assert", and the unit test framework - asserted. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, abort_on_error=False, action_on_missing_golden="assert" - ) - hdbg.dassert_file_exists(file_name + ".tmp") - new_golden = pd.read_csv(file_name + ".tmp", index_col=0) - hdbg.dassert_path_not_exists(file_name) - finally: - # Clean up. - hio.delete_file(file_name) - # Expected outcome doesn't exists and it was not updated. - self.assertFalse(outcome_updated) - self.assertFalse(file_exists) - self.assertIs(is_equal, None) - # Check golden. - self.assert_equal(str(new_golden), str(actual)) - - def test_check_df_missing3(self) -> None: - """ - Without running with --update_outcomes, the golden outcome was missing, - action_on_missing_golden="update", and the unit test framework updates - the golden. - """ - if _to_skip_on_update_outcomes(): - return - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - tag = "test_df" - _, file_name = self._get_golden_outcome_file_name(tag) - try: - # Remove the golden. - hio.delete_file(file_name) - # Check. - outcome_updated, file_exists, is_equal = self.check_dataframe( - actual, abort_on_error=False, action_on_missing_golden="update" - ) - hdbg.dassert_file_exists(file_name) - new_golden = pd.read_csv(file_name, index_col=0) - finally: - # Clean up. - hio.delete_file(file_name) - # Expected outcome doesn't exists and it was not updated. - self.assertTrue(outcome_updated) - self.assertFalse(file_exists) - self.assertIs(is_equal, None) - # Check golden. - self.assert_equal(str(new_golden), str(actual)) - - -# ############################################################################# -# Test_check_string_debug1 -# ############################################################################# - - -class Test_check_string_debug1(hunitest.TestCase): - def test1(self) -> None: - actual = "hello" - # action_on_missing_golden = "assert" - action_on_missing_golden = "update" - self.check_string( - actual, action_on_missing_golden=action_on_missing_golden - ) - - def test2(self) -> None: - actual = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns="a b c".split()) - # action_on_missing_golden = "assert" - action_on_missing_golden = "update" - self.check_dataframe( - actual, action_on_missing_golden=action_on_missing_golden - ) - - -# ############################################################################# -# Test_get_dir_signature1 -# ############################################################################# - - -class Test_get_dir_signature1(hunitest.TestCase): - def helper(self, include_file_content: bool) -> str: - in_dir = self.get_input_dir() - actual = hunitest.get_dir_signature( - in_dir, include_file_content, num_lines=None - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - return actual # type: ignore[no-any-return] - - def test1(self) -> None: - """ - Test dir signature excluding the file content. - """ - include_file_content = False - actual = self.helper(include_file_content) - # pylint: disable=line-too-long - expected = r""" - # Dir structure - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0 - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.pkl - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/config.txt - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_0/run_notebook.0.log - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1 - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.pkl - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/config.txt - $GIT_ROOT/helpers/test/outcomes/Test_get_dir_signature1.test1/input/result_1/run_notebook.1.log - """ - # pylint: enable=line-too-long - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - Test dir signature including the file content. - """ - include_file_content = True - actual = self.helper(include_file_content) - # The golden outcome is long and uninteresting so we use check_string. - self.check_string(actual, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py deleted file mode 100644 index 14910d1f5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_mock.py +++ /dev/null @@ -1,288 +0,0 @@ -import logging -import unittest.mock as umock -from typing import Any - -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - - -def _check(self: Any, str_to_eval: str, exp_val: str) -> None: - """ - Evaluate `str_to_eval` and compare it to expected value `exp_val`. - """ - # The variable lives 3 levels in the stack trace from here. - act_val = hprint.to_str(str_to_eval, frame_level=3) - _LOG.debug("%s", act_val) - self.assert_equal(act_val, exp_val, purify_text=True) - - -# ############################################################################# -# _Class -# ############################################################################# - - -class _Class: - def __init__(self) -> None: - self.a = 3 - self.b = 14 - - def get_a(self) -> int: - return self.a - - def get_b(self) -> int: - return self.b - - -# ############################################################################# -# _TestCase -# ############################################################################# - - -class _TestCase(hunitest.TestCase): - def check(self, *args, **kwargs) -> None: - _check(self, *args, **kwargs) - - -# ############################################################################# -# Test_Mock1 -# ############################################################################# - - -# References -# - https://docs.python.org/3/library/unittest.mock.html -# - https://realpython.com/python-mock-library/ -# -# - Mocks are used to imitate objects in the code base and need to have the same -# interface of objects they are replacing -# - `Mock` and `MagicMock` objects -# - avoid to create stubs by creating attributes and methods as they are -# accessed -# - accessing the same attribute returns the same mock -# - can be configured to specify return values -# - store details of how they have been used -# - After execution, one can make assertions about how mocks have been used - -# umock.Mockspec -# :param spec: specification for the mock object, e.g., using a class to create -# the proper interface - - -# ############################################################################# -# Test_Mock1 -# ############################################################################# - - -class Test_Mock1(_TestCase): - """ - - A `Mock` creates attributes / methods as you access them - - The return value of a mocked attribute / method is also a `Mock` - """ - - def test_lazy_attributes1(self) -> None: - """ - Assigning a class attribute on a Mock creates a Mock. - """ - obj = umock.Mock() - # obj is a Mock object. - self.check("obj", "obj=") - # Calling an attribute creates a Mock. - self.check("obj.a", "obj.a=") - # Assigning an attribute in the mock creates an attribute. - obj.a = 3 - self.check("obj.a", "obj.a=3") - - def test_lazy_methods1(self) -> None: - """ - Calling a method on a Mock creates a Mock. - """ - # Mock json module `import json`. - json = umock.Mock() - self.check("json", "json=") - # Create a function on the fly that returns a mock. - v = json.dumps() - self.assertTrue(isinstance(v, umock.Mock)) - self.check("json.dumps", "json.dumps=") - # The mocked function and the returned value from a mock function are - # different mocks. - self.check("v", "v=") - self.check("type(v)", "type(v)=") - self.check( - "json.dumps()", "json.dumps()=" - ) - self.assertTrue(isinstance(json.dumps, umock.Mock)) - self.assertNotEqual(id(v), id(json.dumps)) - - def test_assert1(self) -> None: - """ - Check what function was called. - """ - json = umock.Mock() - json.loads("hello") - # Check that the mocked function was called as expected. - json.loads.assert_called() - json.loads.assert_called_once() - json.loads.assert_called_with("hello") - self.assertEqual(json.loads.call_count, 1) - - def test_str1(self) -> None: - mock = umock.Mock() - # Calling `str()` on a mock creates a mock on the fly. - self.check("str(mock)", "str(mock)=\"\"") - # Assign a mocked function returning "hello" to mock.__str__. - mock.__str__ = umock.Mock(return_value="hello") - self.assertEqual(str(mock), "hello") - # One can't assign the return value, like one would do with a MagicMock. - # mock.__str__.return_value = "hello" - - def test_spec1(self) -> None: - # Create a Mock based on the class `_Class`. - mock = umock.Mock(spec=_Class) - # - self.assertTrue(isinstance(mock, _Class)) - mock.get_a = umock.Mock(return_value=3) - self.assertEqual(mock.get_a(), 3) - - -# ############################################################################# -# Test_MagicMock1 -# ############################################################################# - - -class Test_MagicMock1(_TestCase): - """ - A `MagicMock` is a subclass of `Mock` with some magic methods already - created. - """ - - def test_get1(self) -> None: - """ - Assign a MagicMock using array notation. - """ - mock = umock.MagicMock() - # MagicMock automatically infer `__get_item__()`. - mock[3] = "fish" - # Check. - mock.__setitem__.assert_called_with(3, "fish") - - def test_get2(self) -> None: - mock = umock.MagicMock() - mock.__getitem__.return_value = "result" - - def test_str1(self) -> None: - """ - Mock `str()` method. - """ - mock = umock.MagicMock() - # Mock `str()`. - mock.__str__.return_value = "foobar" - # Check. - self.assertEqual(str(mock), "foobar") - mock.__str__.assert_called_with() - - -# ############################################################################# -# Test_Mock_Class1 -# ############################################################################# - - -class Test_Mock_Class1(_TestCase): - def test_without_mock1(self) -> None: - obj = _Class() - self.assertEqual(obj.get_a(), 3) - self.assertEqual(obj.get_b(), 14) - - def test_with_mock1(self) -> None: - obj = _Class() - # Mock method `get_a()`. - obj.get_a = umock.MagicMock(return_value=4) - # Check. - self.assertEqual(obj.get_a(), 4) - obj.get_a.assert_called() - - def test_with_mock2(self) -> None: - obj = _Class() - # Mock method `get_a()`. - obj.get_a = umock.MagicMock(side_effect=KeyError("foo")) - # Check. - with self.assertRaises(KeyError) as cm: - obj.get_a() - # - actual = str(cm.exception) - expected = "'foo'" - self.assert_equal(actual, expected) - obj.get_a.assert_called() - - -# ############################################################################# -# Test_Mock_Class_with_decorator1 -# ############################################################################# - -# `umock.patch()` -# - replaces classes in a particular module with a Mock object -# - by default creates a MagicMock - -# `umock.patch.object(target, attribute)` patches the named member "attribute" -# on the object "target" with a mock object. - - -# ############################################################################# -# Test_Mock_Class_with_decorator1 -# ############################################################################# - - -class Test_Mock_Class_with_decorator1(_TestCase): - @umock.patch.object(_Class, "get_a", return_value=4) - def test1(self, mock_method: umock.MagicMock) -> None: - """ - Patch method of an object using a decorator. - """ - obj = _Class() - # Check. - # self.assertIs(mock_method, umock.MagicMock) - self.check( - "mock_method", "mock_method=" - ) - self.assertEqual(obj.get_a(), 4) - mock_method.assert_called() - obj.get_a.assert_called() - - -# ############################################################################# -# Test_Mock_Class_with_context_manager1 -# ############################################################################# - - -class Test_Mock_Class_with_context_manager1(_TestCase): - def test1(self) -> None: - """ - Patch an object method using a context manager. - """ - # Inside the context manager, the method is mocked. - with umock.patch.object(_Class, "get_a", return_value=4): - obj = _Class() - # Check. - self.check( - "obj.get_a", "obj.get_a=" - ) - self.assertEqual(obj.get_a(), 4) - obj.get_a.assert_called() - # Outside the context manager everything is normal. - obj = _Class() - # Check. - self.check( - "obj.get_a", - "obj.get_a=>", - ) - self.assertEqual(obj.get_a(), 3) - - def test_dict1(self) -> None: - """ - Patch a dictionary. - """ - foo = {"key": "value"} - with umock.patch.dict(foo, {"key": "new_value"}, clear=True): - self.assertEqual(foo["key"], "new_value") - # Outside the context manager everything is normal. - self.assertEqual(foo["key"], "value") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py deleted file mode 100644 index 6488621a1..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_purification.py +++ /dev/null @@ -1,1065 +0,0 @@ -""" -Import as: - -import helpers.test.test_hunit_test_purification as thuntepur -""" - -import datetime -import logging -import os -import unittest.mock as umock -from typing import Any, List - -import pytest - -import helpers.hgit as hgit -import helpers.hprint as hprint -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_purify_text1 -# ############################################################################# - - -class Test_purify_text1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str, **kwargs: Any) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(txt) - self.assert_equal(actual, expected, **kwargs) - - def test1(self) -> None: - txt = "amp/helpers/test/test_system_interaction.py" - expected = "helpers/test/test_system_interaction.py" - self.check_helper(txt, expected) - - def test2(self) -> None: - txt = "amp/helpers/test/test_system_interaction.py" - expected = "helpers/test/test_system_interaction.py" - self.check_helper(txt, expected) - - def test3(self) -> None: - txt = "['amp/helpers/test/test_system_interaction.py']" - expected = "['helpers/test/test_system_interaction.py']" - self.check_helper(txt, expected) - - def test4(self) -> None: - txt = "app.helpers.test.test_system_interaction.py" - expected = "helpers.test.test_system_interaction.py" - self.check_helper(txt, expected) - - def test5(self) -> None: - """ - Test that longer paths are processed before shorter ones. - """ - txt = "/home/user/project/src/file.py" - with ( - umock.patch("helpers.hgit.get_client_root") as mock_git_root, - umock.patch("os.getcwd") as mock_pwd, - ): - mock_git_root.return_value = "/home/user/project" - mock_pwd.return_value = "/home/user" - expected = "$GIT_ROOT/src/file.py" - self.check_helper(txt, expected) - - def test6(self) -> None: - """ - Test that paths with multiple occurrences of the same pattern are - processed correctly. - """ - txt = "/home/user/project/src/project/file.py" - with ( - umock.patch("helpers.hgit.get_client_root") as mock_git_root, - umock.patch("os.getcwd") as mock_pwd, - ): - mock_git_root.return_value = "/home/user/project" - mock_pwd.return_value = "/home/user" - expected = "$GIT_ROOT/src/project/file.py" - self.check_helper(txt, expected) - - def test7(self) -> None: - """ - Test that paths with multiple patterns are processed in the correct - order. - """ - txt = "/home/user/project/src/project/file.py" - with ( - umock.patch("helpers.hgit.get_client_root") as mock_git_root, - umock.patch("os.getcwd") as mock_pwd, - ): - mock_git_root.return_value = "/home/user/project" - mock_pwd.return_value = "/home/user/project/src" - expected = "$GIT_ROOT/src/project/file.py" - self.check_helper(txt, expected) - - def test8(self) -> None: - """ - Test that paths with no matching patterns are left unchanged. - """ - txt = "/home/user/other/file.py" - with ( - umock.patch("helpers.hgit.get_client_root") as mock_git_root, - umock.patch("os.getcwd") as mock_pwd, - ): - mock_git_root.return_value = "/home/user/project" - mock_pwd.return_value = "/home/user/project/src" - expected = "/home/user/other/file.py" - self.check_helper(txt, expected) - - def test9(self) -> None: - super_module_path = hgit.get_client_root(super_module=True) - # TODO(gp): We should remove the current path. - # pylint: disable=line-too-long - txt = r""" - ************* Module input [pylint] - $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint] - $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8] - $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8] - cmd line='$SUPER_MODULE/dev_scripts/linter.py -f $SUPER_MODULE/amp/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $SUPER_MODULE/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log' - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint] - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint] - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy] - """ - txt = hprint.dedent(txt) - txt = txt.replace("$SUPER_MODULE", super_module_path) - expected = r""" - ************* Module input [pylint] - $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py: Your code has been rated at -10.00/10 (previous run: -10.00/10, +0.00) [pylint] - $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:20: W605 invalid escape sequence '\s' [flake8] - $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3:9: F821 undefined name 're' [flake8] - cmd line='$GIT_ROOT/dev_scripts/linter.py -f $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py --linter_log $GIT_ROOT/dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/linter.log' - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [E0602(undefined-variable), ] Undefined variable 're' [pylint] - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: [W1401(anomalous-backslash-in-string), ] Anomalous backslash in string: '\s'. String constant might be missing an r prefix. [pylint] - dev_scripts/test/Test_linter_py1.test_linter1/tmp.scratch/input.py:3: error: Name 're' is not defined [mypy] - """ - # pylint: enable=line-too-long - self.check_helper(txt, expected, dedent=True) - - def test10(self) -> None: - """ - Test case when client root path is equal to `/` - """ - # pylint: disable=redefined-outer-name - hgit = umock.Mock() - hgit.get_client_root.return_value = "/" - txt = "/tmp/subdir1" - expected = txt - self.check_helper(txt, expected) - - def test11(self) -> None: - """ - Test the correct order of `app` -> `amp` purification with multiple - import statements. - """ - txt = """ - import app.amp.helpers_root.helpers.test.test_file - from app.amp.helpers_root.helpers.hprint import dedent - import app.amp.helpers.config - from amp.app.helpers.config import get_config - import amp.app.helpers_root.config - """ - expected = """ - import helpers.test.test_file - from helpers.hprint import dedent - import helpers.config - from helpers.config import get_config - import helpers.config - """ - self.check_helper(txt, expected) - - def test12(self) -> None: - """ - Test amp and app purification in file path strings. - """ - txt = """ - app/amp/helpers_root/helpers/test/test_file.py - amp/app/helpers_root/helpers/test/test_file.py - """ - expected = """ - helpers/test/test_file.py - helpers/test/test_file.py - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_directory_paths1 -# ############################################################################# - - -class Test_purify_directory_paths1(hunitest.TestCase): - def check_helper(self, input_: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_directory_paths(input_) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test1(self) -> None: - """ - Test the replacement of `GIT_ROOT`. - """ - with ( - umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ), - umock.patch.dict( - "os.environ", - {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, - clear=True, - ), - umock.patch("os.getcwd", return_value="/home/user"), - ): - input_ = "/home/user/gitroot/src/subdir/file.py" - expected = "$GIT_ROOT/src/subdir/file.py" - self.check_helper(input_, expected) - - def test2(self) -> None: - """ - Test the replacement of `CSFY_HOST_GIT_ROOT_PATH`. - """ - with ( - umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ), - umock.patch.dict( - "os.environ", - {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, - clear=True, - ), - umock.patch("os.getcwd", return_value="/home/user"), - ): - input_ = "/home/user/csfy_host_git_root/other/file.py" - expected = "$CSFY_HOST_GIT_ROOT_PATH/other/file.py" - self.check_helper(input_, expected) - - def test3(self) -> None: - """ - Test the replacement of `PWD`. - """ - with ( - umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ), - umock.patch.dict( - "os.environ", - {"CSFY_HOST_GIT_ROOT_PATH": "/home/user/csfy_host_git_root"}, - clear=True, - ), - umock.patch("os.getcwd", return_value="/home/user"), - ): - input_ = "/home/user/documents/file.py" - expected = "$PWD/documents/file.py" - self.check_helper(input_, expected) - - def test4(self) -> None: - """ - Test the replacement when `GIT_ROOT`, `CSFY_HOST_GIT_ROOT_PATH` and - current working directory are the same. - """ - with ( - umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user" - ), - umock.patch.dict( - "os.environ", - {"CSFY_HOST_GIT_ROOT_PATH": "/home/user"}, - clear=True, - ), - umock.patch("os.getcwd", return_value="/home/user"), - ): - input_ = "/home/user/file.py" - expected = "$GIT_ROOT/file.py" - self.check_helper(input_, expected) - - -# ############################################################################# -# Test_purify_from_environment1 -# ############################################################################# - - -class Test_purify_from_environment1(hunitest.TestCase): - def check_helper(self, input_: str, expected: str) -> None: - try: - # Manually set a user name to test the behaviour. - hsystem.set_user_name("root") - # Run. - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_from_environment(input_) - self.assert_equal(actual, expected, fuzzy_match=True) - finally: - # Reset the global user name variable regardless of a test results. - hsystem.set_user_name(None) - - def test1(self) -> None: - input_ = "IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-root-1.0.0" - expected = "IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0" - self.check_helper(input_, expected) - - def test2(self) -> None: - input_ = "--name root.amp_test.app.app" - expected = "--name $USER_NAME.amp_test.app.app" - self.check_helper(input_, expected) - - def test3(self) -> None: - input_ = "run --rm -l user=root" - expected = "run --rm -l user=$USER_NAME" - self.check_helper(input_, expected) - - def test4(self) -> None: - input_ = "run_docker_as_root='True'" - expected = "run_docker_as_root='True'" - self.check_helper(input_, expected) - - def test5(self) -> None: - input_ = "out_col_groups: [('root_q_mv',), ('root_q_mv_adj',), ('root_q_mv_os',)]" - expected = "out_col_groups: [('root_q_mv',), ('root_q_mv_adj',), ('root_q_mv_os',)]" - self.check_helper(input_, expected) - - -# ############################################################################# -# Test_purify_amp_reference1 -# ############################################################################# - - -class Test_purify_amp_reference1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - txt = hprint.dedent(txt) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_amp_references(txt) - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Remove the reference to `amp.`. - """ - txt = """ - * Failed assertion * - Instance '' - of class '_Man' is not a subclass of '' - """ - expected = r""" - * Failed assertion * - Instance '' - of class '_Man' is not a subclass of '' - """ - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test removing multiple amp references in a single string. - """ - txt = """ - ImportError: No module named 'amp.helpers.test.test_file' - """ - expected = r""" - ImportError: No module named 'helpers.test.test_file' - """ - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test removing amp references in file paths. - """ - txt = """ - File "/home/user/amp/helpers/test/test_dbg.py", line 10 - File "/home/user/amp/helpers/test/test_file.py", line 20 - """ - expected = r""" - File "/home/user/helpers/test/test_dbg.py", line 10 - File "/home/user/helpers/test/test_file.py", line 20 - """ - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test removing amp references in import statements. - """ - txt = """ - from amp.helpers.test import test_dbg - import amp.helpers.test.test_file - from amp.helpers.test.test_dbg import _Man - """ - expected = r""" - from helpers.test import test_dbg - import helpers.test.test_file - from helpers.test.test_dbg import _Man - """ - self.check_helper(txt, expected) - - def test5(self) -> None: - """ - Test removing amp references in docstrings and comments. - """ - txt = """ - # This is a test for amp.helpers.test.test_dbg - """ - expected = r""" - # This is a test for helpers.test.test_dbg - """ - self.check_helper(txt, expected) - - def test6(self) -> None: - """ - Test removing amp references in error messages with multiple - occurrences. - """ - txt = """ - Error in amp.helpers.test.test_dbg: Invalid input - Error in amp.helpers.test.test_file: File not found - Error in amp.helpers.test.test_dbg: Permission denied - """ - expected = r""" - Error in helpers.test.test_dbg: Invalid input - Error in helpers.test.test_file: File not found - Error in helpers.test.test_dbg: Permission denied - """ - self.check_helper(txt, expected) - - def test7(self) -> None: - """ - Test that longer amp paths are processed before shorter ones. - """ - txt = "amp/helpers/amp/test/test_file.py" - expected = "helpers/test/test_file.py" - self.check_helper(txt, expected) - - def test8(self) -> None: - """ - Test that nested amp references are processed correctly. - """ - txt = "amp.helpers.test.amp.TestClass" - expected = "helpers.test.amp.TestClass" - self.check_helper(txt, expected) - - def test9(self) -> None: - """ - Test removing amp references from test creation comments with various - module paths. - """ - txt = """ - # Test created for amp.helpers.test.test_file - # Test created for amp.core.dataflow.model - # Test created for amp.helpers.test.test_dbg._Man - """ - expected = r""" - # Test created for helpers.test.test_file - # Test created for core.dataflow.model - # Test created for helpers.test.test_dbg._Man - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_app_references1 -# ############################################################################# - - -class Test_purify_app_references1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_app_references(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test app.helpers reference removal. - """ - txt = "app.helpers.test.test_file" - expected = "helpers.test.test_file" - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test app.amp.helpers reference removal. - """ - txt = "app.amp.helpers.test.test_file" - expected = "amp.helpers.test.test_file" - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test app.amp.helpers_root.helpers reference removal. - """ - txt = "app.amp.helpers_root.helpers.test.test_file" - expected = "amp.helpers.test.test_file" - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test multiple app references in the same string. - """ - txt = """ - app.helpers.test.test_file - app.amp.helpers.test.test_file - app.amp.helpers_root.helpers.test.test_file - """ - expected = """ - helpers.test.test_file - amp.helpers.test.test_file - amp.helpers.test.test_file - """ - self.check_helper(txt, expected) - - def test5(self) -> None: - """ - Test that longer app paths are processed before shorter ones. - """ - txt = "app/helpers/app/test/test_file.py" - expected = "helpers/test/test_file.py" - self.check_helper(txt, expected) - - def test6(self) -> None: - """ - Test that app.amp.helpers_root references are processed before app.amp. - """ - txt = "app.amp.helpers_root.helpers.test.TestClass" - expected = "amp.helpers.test.TestClass" - self.check_helper(txt, expected) - - def test7(self) -> None: - """ - Test string with no app references. - """ - txt = "path/to/file.txt" - expected = "path/to/file.txt" - self.check_helper(txt, expected) - - def test8(self) -> None: - """ - Test removing app references from test creation comments with various - module paths. - """ - txt = """ - # Test created for app.helpers.test.test_file - # Test created for app.core.dataflow.model - # Test created for app.helpers.test.test_dbg._Man - """ - expected = r""" - # Test created for helpers.test.test_file - # Test created for core.dataflow.model - # Test created for helpers.test.test_dbg._Man - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_from_env_vars -# ############################################################################# - - -# TODO(ShaopengZ): numerical issue. (arm vs x86) -@pytest.mark.requires_ck_infra -class Test_purify_from_env_vars(hunitest.TestCase): - """ - Test purification from env vars. - """ - - def check_helper(self, env_var: str) -> None: - env_var_value = os.environ[env_var] - input_ = f"s3://{env_var_value}/" - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_from_env_vars(input_) - expected = f"s3://${env_var}/" - self.assert_equal(actual, expected, fuzzy_match=True) - - @pytest.mark.skipif( - not hrecouti.get_repo_config().get_name() == "//cmamp", - reason="Run only in //cmamp", - ) - def test1(self) -> None: - """ - - $CSFY_AWS_S3_BUCKET - """ - env_var = "CSFY_AWS_S3_BUCKET" - self.check_helper(env_var) - - -# TODO(gp): HelpersTask1 -# @pytest.mark.skipif( -# not hrecouti.get_repo_config().get_name() == "//cmamp", -# reason="Run only in //cmamp", -# ) -# def test_end_to_end(self) -> None: -# """ -# - Multiple env vars. -# """ -# #am_aws_s3_bucket = os.environ["AM_AWS_S3_BUCKET"] -# csfy_aws_s3_bucket = os.environ["CSFY_AWS_S3_BUCKET"] -# # -# text = f""" -# $AM_AWS_S3_BUCKET = {am_aws_s3_bucket} -# $CSFY_AWS_S3_BUCKET = {csfy_aws_s3_bucket} -# """ -# # -# text_purifier = huntepur.TextPurifier() -# actual = text_purifier.purify_from_env_vars(text) -# self.check_string(actual, fuzzy_match=True) - - -# ############################################################################# -# Test_purify_object_representation1 -# ############################################################################# - - -class Test_purify_object_representation1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - txt = hprint.dedent(txt) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_object_representation(txt) - expected = hprint.dedent(expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - txt = """ - load_prices: {'source_node_name': 'RealTimeDataSource object - at 0x7f571c329b50 - """ - expected = r""" - load_prices: {'source_node_name': 'RealTimeDataSource object - at 0x""" - self.check_helper(txt, expected) - - def test2(self) -> None: - txt = """ - load_prices: {'source_node_name at 0x7f571c329b51': - 'RealTimeDataSource object at 0x7f571c329b50 - """ - expected = r""" - load_prices: {'source_node_name at 0x': - 'RealTimeDataSource object at 0x""" - self.check_helper(txt, expected) - - def test3(self) -> None: - txt = """ - load_prices: {'source_node_name': 'RealTimeDataSource', - 'source_node_kwargs': {'market_data': - , 'period': 'last_5mins', 'asset_id_col': 'asset_id', - 'multiindex_output': True}} process_forecasts: {'prediction_col': 'close', - 'execution_mode': 'real_time', 'process_forecasts_config': - {'market_data': - ,'portfolio ': , 'order_type': 'price@twap', 'ath_start_time': - datetime.time(9, 30), 'trading_start_time': datetime.time(9, 30), - 'ath_end_time': datetime.time(16, 40), 'trading_end_time': - datetime.time(16, 4 0)}} - """ - expected = r""" - load_prices: {'source_node_name': 'RealTimeDataSource', - 'source_node_kwargs': {'market_data': - , 'period': 'last_5mins', 'asset_id_col': 'asset_id', - 'multiindex_output': True}} process_forecasts: {'prediction_col': 'close', - 'execution_mode': 'real_time', 'process_forecasts_config': - {'market_data': - ,'portfolio ': , 'order_type': 'price@twap', 'ath_start_time': - datetime.time(9, 30), 'trading_start_time': datetime.time(9, 30), - 'ath_end_time': datetime.time(16, 40), 'trading_end_time': - datetime.time(16, 4 0)}}""" - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test replacing wall_clock_time=Timestamp('..., tz='America/New_York')) - """ - txt = """ - _knowledge_datetime_col_name='timestamp_db' _delay_in_secs='0' - >, 'bar_duration_in_secs': 300, 'rt_timeout_in_secs_or_time': 900} , - _dst_dir=None , _fit_at_beginning=False , - _wake_up_timestamp=None , _bar_duration_in_secs=300 , - _events=[Event(num_it=1, current_time=Timestamp('2000-01-01 - 10:05:00-0500', tz='America/New_York'), - wall_clock_time=Timestamp('2022-08-04 09:29:13.441715-0400', - tz='America/New_York')), Event(num_it=2, - current_time=Timestamp('2000-01-01 10:10:00-0500', - tz='America/New_York'), wall_clock_time=Timestamp('2022-08-04 - 09:29:13.892793-0400', tz='America/New_York')), Event(num_it=3, - current_time=Timestamp('2000-01-01 10:15:00-0500', - tz='America/New_York'), wall_clock_time=Timestamp('2022-08-04 - 09:29:14.131619-0400', tz='America/New_York'))] ) - """ - expected = """ - _knowledge_datetime_col_name='timestamp_db' _delay_in_secs='0' - >, 'bar_duration_in_secs': 300, 'rt_timeout_in_secs_or_time': 900} , - _dst_dir=None , _fit_at_beginning=False , - _wake_up_timestamp=None , _bar_duration_in_secs=300 , - _events=[Event(num_it=1, current_time=Timestamp('2000-01-01 - 10:05:00-0500', tz='America/New_York'), - wall_clock_time=Timestamp('xxx', tz='America/New_York')), - Event(num_it=2, current_time=Timestamp('2000-01-01 10:10:00-0500', - tz='America/New_York'), wall_clock_time=Timestamp('xxx', - tz='America/New_York')), Event(num_it=3, - current_time=Timestamp('2000-01-01 10:15:00-0500', - tz='America/New_York'), wall_clock_time=Timestamp('xxx', - tz='America/New_York'))] ) - """ - txt = " ".join(hprint.dedent(txt).split("\n")) - expected = " ".join(hprint.dedent(expected).split("\n")) - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_today_date1 -# ############################################################################# - - -class Test_purify_today_date1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_today_date(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test replacing today's date and time with placeholders. - """ - today = datetime.date.today() - today_str = today.strftime("%Y%m%d") - txt = f""" - Report generated on {today_str}_103045. - Next run scheduled at {today_str}_235959. - """ - expected = """ - Report generated on YYYYMMDD_HHMMSS. - Next run scheduled at YYYYMMDD_HHMMSS. - """ - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test replacing today's date only with placeholder. - """ - today = datetime.date.today() - today_str = today.strftime("%Y%m%d") - txt = f""" - Backup completed: {today_str}. - Last modified: {today_str}. - """ - expected = """ - Backup completed: YYYYMMDD. - Last modified: YYYYMMDD. - """ - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test to check that non-date-like numbers are not replaced. - """ - txt = """ - ID: 20000319_123456 - Code: 20000321 - Reference: 20000320_999999 - """ - expected = """ - ID: 20000319_123456 - Code: 20000321 - Reference: 20000320_999999 - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_white_spaces1 -# ############################################################################# - - -class Test_purify_white_spaces1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_white_spaces(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test removing trailing spaces and tabs. - """ - txt = "Line 1 \nLine 2\t\nLine 3 \t \n" - expected = "Line 1\nLine 2\nLine 3\n" - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test removing trailing spaces and preserving empty lines. - """ - txt = "Line 1\n\n\nLine 2\n\n\n\nLine 3 " - expected = "Line 1\n\n\nLine 2\n\n\n\nLine 3" - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test removing trailing whitespace and preserving leading whitespace. - """ - txt = " \n Line 1\nLine 2\n Line 3 \n " - expected = " \n Line 1\nLine 2\n Line 3\n" - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test preserving intentional whitespace within lines. - """ - txt = "Line 1 with spaces\nLine 2\twith\ttabs" - expected = "Line 1 with spaces\nLine 2\twith\ttabs\n" - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_parquet_file_names1 -# ############################################################################# - - -class Test_purify_parquet_file_names1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_parquet_file_names(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test purification of Parquet file names with the path. - - The Parquet file names with the - GUID have to be replaced with the `data.parquet` string. - """ - txt = """ - s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=1/ea5e3faed73941a2901a2128abeac4ca-0.parquet - s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=2/f7a39fefb69b40e0987cec39569df8ed-0.parquet - """ - expected = """ - s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=1/data.parquet - s3://some_bucket/root/currency_pair=BTC_USDT/year=2024/month=2/data.parquet - """ - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test purification of Parquet file name without the path. - """ - txt = """ - ffa39fffb69b40e0987cec39569df8ed-0.parquet - """ - expected = """ - data.parquet - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_helpers1 -# ############################################################################# - - -class Test_purify_helpers1(hunitest.TestCase): - def check_helper(self, txt: str, expected: str) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_helpers(txt) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test replacing helpers references in import statements. - """ - txt = """ - import helpers_root.helpers.hdbg as hdbg - from helpers_root.helpers.hprint import dedent - import helpers_root.config_root.config as config - """ - expected = """ - import helpers.hdbg as hdbg - from helpers.hprint import dedent - import config_root.config as config - """ - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test replacing helpers references in file paths. - """ - txt = """ - /path/to/helpers/hdbg.py - /path/to/helpers/hprint.py - /path/to/config_root/config.py - """ - expected = """ - /path/to/helpers/hdbg.py - /path/to/helpers/hprint.py - /path/to/config_root/config.py - """ - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test replacing helpers references in docstrings and comments. - """ - txt = """ - import helpers_root.helpers.hdbg - from /path/to/helpers_root/helpers/hprint import dedent - import helpers_root.config_root.config - from /path/to/helpers_root/config_root/config import settings - """ - expected = """ - import helpers.hdbg - from /path/to/helpers/hprint import dedent - import config_root.config - from /path/to/config_root/config import settings - """ - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test that non-matching patterns are not replaced. - """ - txt = """ - import other_module - from other_package import helpers - import helpers_utils - path/to/other/helpers/file.py - """ - expected = """ - import other_module - from other_package import helpers - import helpers_utils - path/to/other/helpers/file.py - """ - self.check_helper(txt, expected) - - -# ############################################################################# -# Test_purify_docker_image_name1 -# ############################################################################# - - -class Test_purify_docker_image_name1(hunitest.TestCase): - def test1(self) -> None: - txt = r""" - docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.edb567be pdflatex -output-directory - """ - expected = r""" - docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.xxxxxxxx pdflatex -output-directory - """ - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_docker_image_name(txt) - self.assert_equal(actual, expected, fuzzy_match=True) - - def test2(self) -> None: - """ - Test patterns like `tmp.latex.aarch64.2f590c86.2f590c86`. - """ - txt = r""" - docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.2f590c86.2f590c86 pdflatex -output-directory - """ - expected = r""" - docker run --rm --user $(id -u):$(id -g) --workdir $GIT_ROOT --mount type=bind,source=/Users/saggese/src/helpers1,target=$GIT_ROOT tmp.latex.aarch64.xxxxxxxx pdflatex -output-directory - """ - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_docker_image_name(txt) - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_purify_line_number1 -# ############################################################################# - - -class Test_purify_line_number1(hunitest.TestCase): - def test1(self) -> None: - """ - Check that the text is purified from line numbers correctly. - """ - txt = """ - dag_config (marked_as_used=False, writer=None, val_type=config_root.config.config_.Config): - in_col_groups (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::286::apply_history_lookback, val_type=list): [('close',), ('volume',)] - out_col_group (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::286::apply_history_lookback, val_type=tuple): () - """ - expected = r""" - dag_config (marked_as_used=False, writer=None, val_type=config_root.config.config_.Config): - in_col_groups (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::$LINE_NUMBER::apply_history_lookback, val_type=list): [('close',), ('volume',)] - out_col_group (marked_as_used=True, writer=$GIT_ROOT/dataflow/system/system_builder_utils.py::$LINE_NUMBER::apply_history_lookback, val_type=tuple): () - """ - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_line_number(txt) - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_purify_file_names1 -# ############################################################################# - - -class Test_purify_file_names1(hunitest.TestCase): - def check_helper(self, file_names: List[str], expected: List[str]) -> None: - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(file_names) - actual = "\n".join(str(path) for path in actual) - expected = "\n".join(str(path) for path in expected) - self.assert_equal(actual, expected) - - def test1(self) -> None: - """ - Test basic file name purification with relative paths. - """ - with umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ): - txt = [ - "/home/user/gitroot/helpers/test/test_file.py", - "/home/user/gitroot/amp/helpers/test/test_dbg.py", - ] - expected = [ - "helpers/test/test_file.py", - "helpers/test/test_dbg.py", - ] - self.check_helper(txt, expected) - - def test2(self) -> None: - """ - Test file name purification with nested amp references. - """ - with umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ): - txt = [ - "/home/user/gitroot/amp/helpers/amp/test/test_file.py", - "/home/user/gitroot/amp/helpers/test/amp/test_dbg.py", - ] - expected = [ - "helpers/test/test_file.py", - "helpers/test/test_dbg.py", - ] - self.check_helper(txt, expected) - - def test3(self) -> None: - """ - Test file name purification with app references to ensure that they are - not replaced. - """ - with umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ): - txt = [ - "/home/user/gitroot/app/helpers/test/test_file.py", - "/home/user/gitroot/app/amp/helpers/test/test_dbg.py", - ] - expected = [ - "app/helpers/test/test_file.py", - "app/helpers/test/test_dbg.py", - ] - self.check_helper(txt, expected) - - def test4(self) -> None: - """ - Test file name purification with empty list. - """ - with umock.patch( - "helpers.hgit.get_client_root", return_value="/home/user/gitroot" - ): - txt = [] - expected = [] - self.check_helper(txt, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py deleted file mode 100644 index 35421d368..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hunit_test_utils.py +++ /dev/null @@ -1,553 +0,0 @@ -import os - -import helpers.hio as hio -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.hunit_test_utils as hunteuti - - -# ############################################################################# -# TestUnitTestRenamer -# ############################################################################# - - -class TestUnitTestRenamer(hunitest.TestCase): - """ - Test class renaming functionality. - """ - - -# ############################################################################# -# TestCases -# ############################################################################# - - - @staticmethod - def helper() -> str: - """ - Create file content. - """ - content = """ -class TestCases(hunitest.TestCase): - def test_assert_equal1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - self.check_string(actual) - """ - return content - - -# ############################################################################# -# TestNewCase -# ############################################################################# - - - def test_rename_class1(self) -> None: - """ - Test renaming of existing class. - """ - content = self.helper() - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer("TestCases", "TestNewCase", root_dir) - actual, _ = renamer._rename_class(content) - expected = """ -class TestNewCase(hunitest.TestCase): - def test_assert_equal1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test_check_string1(self) -> None: - actual = "hello world" - self.check_string(actual) - """ - self.assert_equal(actual, expected) - - def test_rename_class2(self) -> None: - """ - Test renaming of non existing class. - """ - content = self.helper() - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer("TestCase", "TestNewCase", root_dir) - actual, _ = renamer._rename_class(content) - # Check if the content of the file was not changed. - self.assert_equal(actual, content) - - -# ############################################################################# -# TestPytestRenameMethod -# ############################################################################# - - -class TestPytestRenameMethod(hunitest.TestCase): - """ - Test method renaming functionality. - """ - - -# ############################################################################# -# TestCases -# ############################################################################# - - - @staticmethod - def helper() -> str: - """ - Create file content. - """ - content = """ -class TestCases(hunitest.TestCase): - def test1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test10(self) -> None: - actual = "hello world" - self.check_string(actual) - - -# ############################################################################# -# TestOtherCases -# ############################################################################# - - -class TestOtherCases(hunitest.TestCase): - def test1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test10(self) -> None: - actual = "hello world" - self.check_string(actual) - """ - return content - - -# ############################################################################# -# TestCases -# ############################################################################# - - - def test_rename_method1(self) -> None: - """ - Test renaming of existing method. - """ - content = self.helper() - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer( - "TestCases.test1", "TestCases.test_new", root_dir - ) - actual, _ = renamer._rename_method(content) - expected = """ -class TestCases(hunitest.TestCase): - def test_new(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test10(self) -> None: - actual = "hello world" - self.check_string(actual) - - -# ############################################################################# -# TestOtherCases -# ############################################################################# - - -class TestOtherCases(hunitest.TestCase): - def test1(self) -> None: - actual = "hello world" - expected = actual - self.assert_equal(actual, expected) - - def test10(self) -> None: - actual = "hello world" - self.check_string(actual) - """ - self.assert_equal(actual, expected) - - def test_rename_method2(self) -> None: - """ - Test renaming of non existing method. - """ - content = self.helper() - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer( - "TestOtherCases.test5", "TestOtherCases.test6", root_dir - ) - actual, _ = renamer._rename_method(content) - # Check if the content of the file was not changed. - self.assert_equal(actual, content) - - def test_rename_method3(self) -> None: - """ - Test renaming of invalid method names. - """ - self.helper() - root_dir = os.getcwd() - with self.assertRaises(AssertionError): - hunteuti.UnitTestRenamer( - "TestCases.test10", "TestOtherCases.test6", root_dir - ) - - -# ############################################################################# -# TestPytestRenameOutcomes -# ############################################################################# - - -class TestPytestRenameOutcomes(hunitest.TestCase): - """ - Test golden outcomes directory renaming. - """ - - @staticmethod - def helper(toy_test: str) -> None: - """ - Create the temporary outcome to rename. - - :param toy_test: the name of the toy directory - """ - outcomes_paths = [ - "TestCase.test_check_string1", - "TestCase.test_rename", - "TestCase.test_rename3", - "TestCases.test_rename2", - "TestRename.test_rename1", - ] - for path in outcomes_paths: - outcomes_dir = os.path.join(toy_test, "test/outcomes", path) - hio.create_dir(outcomes_dir, incremental=False) - hio.to_file(f"{outcomes_dir}/test.txt", "Test files.") - cmd = f"git add {toy_test}/" - hsystem.system(cmd, abort_on_error=False, suppress_output=False) - - def _clean_up(self, toy_test: str) -> None: - """ - Remove temporary test directory. - - :param toy_test: the name of the toy directory - """ - cmd = f"git reset {toy_test}/ && rm -rf {toy_test}/" - hsystem.system(cmd, abort_on_error=False, suppress_output=False) - - def test_rename_class_outcomes(self) -> None: - """ - Rename outcome directory. - """ - toy_test = "toyCmTask1279." + self._testMethodName - # Create outcomes directory. - test_path = os.path.join(toy_test, "test") - # Create the toy outcomes. - self.helper(toy_test) - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer( - "TestCase", "TestRenamedCase", root_dir - ) - renamer.rename_outcomes( - test_path, - ) - # Check if the dirs were renamed. - outcomes_path = os.path.join(test_path, "outcomes") - outcomes_dirs = os.listdir(outcomes_path) - actual = sorted( - [ - ent - for ent in outcomes_dirs - if os.path.isdir(os.path.join(outcomes_path, ent)) - ] - ) - expected = [ - "TestCases.test_rename2", - "TestRename.test_rename1", - "TestRenamedCase.test_check_string1", - "TestRenamedCase.test_rename", - "TestRenamedCase.test_rename3", - ] - self.assertEqual(actual, expected) - self._clean_up(toy_test) - - def test_rename_method_outcomes(self) -> None: - """ - Rename outcome directory. - """ - toy_test = "toyCmTask1279." + self._testMethodName - # Create outcomes directory. - test_path = os.path.join(toy_test, "test") - # Create the toy outcomes. - self.helper(toy_test) - root_dir = os.getcwd() - renamer = hunteuti.UnitTestRenamer( - "TestCase.test_rename", - "TestCase.test_method_renamed", - root_dir, - ) - renamer.rename_outcomes( - test_path, - ) - # Check if the dirs were renamed. - outcomes_path = os.path.join(test_path, "outcomes") - outcomes_dirs = os.listdir(outcomes_path) - actual = sorted( - [ - ent - for ent in outcomes_dirs - if os.path.isdir(os.path.join(outcomes_path, ent)) - ] - ) - expected = [ - "TestCase.test_check_string1", - "TestCase.test_method_renamed", - "TestCase.test_rename3", - "TestCases.test_rename2", - "TestRename.test_rename1", - ] - self.assertEqual(actual, expected) - self._clean_up(toy_test) - - -# ############################################################################# -# Test_get_test_file_for_source -# ############################################################################# - - -class Test_get_test_file_for_source(hunitest.TestCase): - """ - Test mapping source files to test files. - """ - - def test1(self) -> None: - """ - Source file with existing test file returns the test path. - """ - actual = hunteuti.get_test_file_for_source("helpers/hdbg.py") - expected = "helpers/test/test_hdbg.py" - self.assertEqual(actual, expected) - - def test2(self) -> None: - """ - Source file without test file returns None. - """ - actual = hunteuti.get_test_file_for_source("tasks.py") - self.assertIsNone(actual) - - def test3(self) -> None: - """ - Test file as input returns None. - """ - actual = hunteuti.get_test_file_for_source("helpers/test/test_hdbg.py") - self.assertIsNone(actual) - - -# ############################################################################# -# TestIsTestFile -# ############################################################################# - - -class TestIsTestFile(hunitest.TestCase): - """ - Test test file detection. - """ - - def test_path_with_test_dir(self) -> None: - """ - Path containing /test/ is detected as test file. - """ - actual = hunteuti.is_test_file("helpers/test/test_hdbg.py") - self.assertTrue(actual) - - def test_path_with_test_prefix(self) -> None: - """ - Basename starting with test_ is detected as test file. - """ - actual = hunteuti.is_test_file("helpers/test_hdbg.py") - self.assertTrue(actual) - - def test_path_with_test_suffix(self) -> None: - """ - Basename ending with _test.py is detected as test file. - """ - actual = hunteuti.is_test_file("helpers/hdbg_test.py") - self.assertTrue(actual) - - def test_source_file(self) -> None: - """ - Source file path is not detected as test file. - """ - actual = hunteuti.is_test_file("helpers/hdbg.py") - self.assertFalse(actual) - - def test_nested_path_with_test(self) -> None: - """ - Path with /test/ anywhere is detected as test file. - """ - actual = hunteuti.is_test_file( - "dev_scripts_helpers/scraping/test/__init__.py" - ) - self.assertTrue(actual) - - -# ############################################################################# -# TestGetTestFilesForSources -# ############################################################################# - - -class TestGetTestFilesForSources(hunitest.TestCase): - """ - Test mapping lists of source files to test files. - """ - - def test_mixed_files(self) -> None: - """ - Mixed source and test files returns only matched test files. - """ - files = [ - "helpers/hdbg.py", - "helpers/test/test_hdbg.py", - "helpers/hio.py", - ] - actual = hunteuti.get_test_files_for_sources(files) - expected = [ - "helpers/test/test_hdbg.py", - "helpers/test/test_hio.py", - ] - self.assertEqual(sorted(actual), sorted(expected)) - - def test_only_test_files(self) -> None: - """ - Only test files as input returns empty list. - """ - files = [ - "helpers/test/test_hdbg.py", - "helpers/test/test_hio.py", - ] - actual = hunteuti.get_test_files_for_sources(files) - expected = [] - self.assertEqual(actual, expected) - - def test_only_source_files_with_tests(self) -> None: - """ - Source files with existing tests return matching test files. - """ - files = [ - "helpers/hdbg.py", - "helpers/hio.py", - ] - actual = hunteuti.get_test_files_for_sources(files) - expected = [ - "helpers/test/test_hdbg.py", - "helpers/test/test_hio.py", - ] - self.assertEqual(sorted(actual), sorted(expected)) - - def test_source_without_test(self) -> None: - """ - Source file without test file is skipped. - """ - files = ["tasks.py"] - actual = hunteuti.get_test_files_for_sources(files) - expected = [] - self.assertEqual(actual, expected) - - def test_empty_list(self) -> None: - """ - Empty input returns empty list. - """ - files = [] - actual = hunteuti.get_test_files_for_sources(files) - expected = [] - self.assertEqual(actual, expected) - - -# ############################################################################# -# TestGetParentDirs -# ############################################################################# - - -class TestGetParentDirs(hunitest.TestCase): - """ - Test extracting minimal parent directories from file list. - """ - - def test_single_file(self) -> None: - """ - Single file returns its parent directory. - """ - files = ["helpers/hdbg.py"] - actual = hunteuti.get_parent_dirs(files) - expected = ["helpers"] - self.assertEqual(actual, expected) - - def test_files_in_same_dir(self) -> None: - """ - Multiple files in same directory return that directory once. - """ - files = [ - "helpers/hdbg.py", - "helpers/hio.py", - ] - actual = hunteuti.get_parent_dirs(files) - expected = ["helpers"] - self.assertEqual(actual, expected) - - def test_files_in_different_dirs(self) -> None: - """ - Files in different directories return all distinct dirs. - """ - files = [ - "dev_scripts_helpers/scraping/process_hn_article.py", - "helpers/hgit.py", - "helpers/lib_tasks_utils.py", - ] - actual = hunteuti.get_parent_dirs(files) - expected = [ - "dev_scripts_helpers/scraping", - "helpers", - ] - self.assertEqual(sorted(actual), sorted(expected)) - - def test_nested_dirs_dedup(self) -> None: - """ - Nested directories are deduplicated to keep only parent. - """ - files = [ - "dev_scripts_helpers/scraping/process_hn_article.py", - "dev_scripts_helpers/scraping/test/__init__.py", - "helpers/hgit.py", - "helpers/lib_tasks_utils.py", - ] - actual = hunteuti.get_parent_dirs(files) - expected = [ - "dev_scripts_helpers/scraping", - "helpers", - ] - self.assertEqual(sorted(actual), sorted(expected)) - - def test_empty_list(self) -> None: - """ - Empty file list returns empty directory list. - """ - files = [] - actual = hunteuti.get_parent_dirs(files) - expected = [] - self.assertEqual(actual, expected) - - def test_root_level_files(self) -> None: - """ - Files at root level are handled correctly. - """ - files = [ - "tasks.py", - "pyproject.toml", - ] - actual = hunteuti.get_parent_dirs(files) - expected = ["."] - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py deleted file mode 100644 index 79aa3ab80..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_hversion.py +++ /dev/null @@ -1,74 +0,0 @@ -import logging - -import helpers.hunit_test as hunitest -import helpers.hversion as hversio - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestVersioning1 -# ############################################################################# - - -class TestVersioning1(hunitest.TestCase): - def test_get_changelog_version1(self) -> None: - """ - Test `cmamp` version. - """ - container_dir_name = "." - code_version = hversio.get_changelog_version(container_dir_name) - _LOG.debug("code_version=%s", code_version) - - def test_get_container_version1(self) -> None: - container_version = hversio.get_container_version() - _LOG.debug("container_version=%s", container_version) - - def test_check_version1(self) -> None: - container_dir_name = "." - hversio.check_version(container_dir_name) - - def test__check_version1(self) -> None: - code_version = "1.0.0" - container_version = "1.0.2" - is_ok = hversio._check_version(code_version, container_version) - self.assertFalse(is_ok) - - def test__check_version2(self) -> None: - code_version = "1.0.0" - container_version = "1.0.0" - is_ok = hversio._check_version(code_version, container_version) - self.assertTrue(is_ok) - - def test__check_version3(self) -> None: - code_version = "1.0.0" - container_version = "amp-1.0.0" - is_ok = hversio._check_version(code_version, container_version) - self.assertTrue(is_ok) - - def test_bump_version1(self) -> None: - """ - Test major version bump. - """ - version = "2.2.0" - result = hversio.bump_version(version, bump_type="major") - expected = "3.0.0" - self.assertEqual(result, expected) - - def test_bump_version2(self) -> None: - """ - Test minor version bump. - """ - version = "2.2.0" - result = hversio.bump_version(version, bump_type="minor") - expected = "2.3.0" - self.assertEqual(result, expected) - - def test_bump_version3(self) -> None: - """ - Test patch version bump. - """ - version = "2.2.0" - result = hversio.bump_version(version, bump_type="patch") - expected = "2.2.1" - self.assertEqual(result, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py deleted file mode 100644 index 987b30476..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_joblib_helpers.py +++ /dev/null @@ -1,569 +0,0 @@ -import logging -import os -import time -from typing import Any, List, Optional, Union - -import pytest - -import helpers.hjoblib as hjoblib -import helpers.hprint as hprint -import helpers.hunit_test as hunitest - -_LOG = logging.getLogger(__name__) - -# ############################################################################# - - -def workload_function( - val1: int, - val2: str, - # - **kwargs: Any, -) -> str: - """ - Execute the test workload. - """ - _LOG.info("Starting workload %s", val1) - incremental = kwargs.pop("incremental") - num_attempts = kwargs.pop("num_attempts") - _ = val1, val2, incremental, num_attempts - res: str = hprint.to_str("val1 val2 incremental num_attempts kwargs") - _LOG.debug("res=%s", res) - sleep = 0.01 - # sleep = 2 - time.sleep(sleep) - _LOG.info("Ending workload %s", val1) - if val1 == -1: - raise ValueError(f"Error: {res}") - return res - - -# ############################################################################# -# Test_parallel_execute1 -# ############################################################################# - - -def get_workload1( - randomize: bool, *, seed: Optional[int] = None -) -> hjoblib.Workload: - """ - Return a workload for `workload_function()` with 5 tasks that succeeds. - """ - tasks = [] - for i in range(5): - # val1, val2 - task = ((i, 2 * i), {f"hello{i}": f"world{2 * i}", "good": "bye"}) - tasks.append(task) - workload: hjoblib.Workload = (workload_function, "workload_function", tasks) - if randomize: - # Randomize workload. - workload = hjoblib.randomize_workload(workload, seed=seed) - return workload - - -# ############################################################################# - - -def _outcome_to_string(outcome: List[str]) -> str: - outcome = "\n".join(sorted(map(str, outcome))) - return outcome - - -def _helper_success( - self_: Any, - workload: hjoblib.Workload, - num_threads: Union[str, int], - abort_on_error: bool, - expected_return: str, - backend: str, -) -> None: - """ - Run a workload that is supposed to succeed and check its result. - """ - dry_run = False - incremental = True - num_attempts = 1 - log_file = os.path.join(self_.get_scratch_space(), "log.txt") - # - res = hjoblib.parallel_execute( - workload, - dry_run, - num_threads, - incremental, - abort_on_error, - num_attempts, - log_file, - backend=backend, - ) - # Check. - _LOG.debug("res=%s", str(res)) - actual = _outcome_to_string(res) - self_.assert_equal(actual, expected_return) - - -# ############################################################################# -# Test_parallel_execute1 -# ############################################################################# - - -class Test_parallel_execute1(hunitest.TestCase): - """ - Execute a workload of 5 tasks that all succeed. - """ - - # pylint: disable=line-too-long - EXPECTED_RETURN = r"""val1=0, val2=0, incremental=True, num_attempts=1, kwargs={'hello0': 'world0', 'good': 'bye'} -val1=1, val2=2, incremental=True, num_attempts=1, kwargs={'hello1': 'world2', 'good': 'bye'} -val1=2, val2=4, incremental=True, num_attempts=1, kwargs={'hello2': 'world4', 'good': 'bye'} -val1=3, val2=6, incremental=True, num_attempts=1, kwargs={'hello3': 'world6', 'good': 'bye'} -val1=4, val2=8, incremental=True, num_attempts=1, kwargs={'hello4': 'world8', 'good': 'bye'}""" - - def test_dry_run1(self) -> None: - """ - Dry-run a workload. - """ - workload = get_workload1(randomize=True) - dry_run = True - num_threads = "serial" - incremental = True - num_attempts = 1 - abort_on_error = True - log_file = os.path.join(self.get_scratch_space(), "log.txt") - res = hjoblib.parallel_execute( - workload, - dry_run, - num_threads, - incremental, - abort_on_error, - num_attempts, - log_file, - ) - _LOG.debug("res=%s", str(res)) - self.assertIs(res, None) - - def _run_test(self, num_threads: Union[str, int], backend: str) -> None: - workload = get_workload1(randomize=True) - abort_on_error = True - # - expected_return = self.EXPECTED_RETURN - _helper_success( - self, - workload, - num_threads, - abort_on_error, - expected_return, - backend, - ) - - # pylint: enable=line-too-long - - def test_serial1(self) -> None: - num_threads = "serial" - backend = "" - self._run_test(num_threads, backend) - - def test_parallel_loky1(self) -> None: - num_threads = "1" - backend = "loky" - self._run_test(num_threads, backend) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~6 seconds, see CmTask4951.") - def test_parallel_loky2(self) -> None: - num_threads = "3" - backend = "loky" - self._run_test(num_threads, backend) - - def test_parallel_asyncio_threading1(self) -> None: - num_threads = "1" - backend = "asyncio_threading" - self._run_test(num_threads, backend) - - def test_parallel_asyncio_threading2(self) -> None: - num_threads = "3" - backend = "asyncio_threading" - self._run_test(num_threads, backend) - - -# ############################################################################# -# Test_parallel_execute2 -# ############################################################################# - - -def get_workload2() -> hjoblib.Workload: - """ - Return a workload for `workload_function()` with 1 task that fails. - """ - task = ((-1, 7), {"hello2": "world2", "good2": "bye2"}) - tasks = [task] - workload: hjoblib.Workload = (workload_function, "workload_function", tasks) - return workload - - -def _helper_fail( - self_: Any, - workload: hjoblib.Workload, - num_threads: Union[str, int], - abort_on_error: bool, - expected_assertion: str, - backend: str, -) -> None: - dry_run = False - incremental = True - num_attempts = 1 - log_file = os.path.join(self_.get_scratch_space(), "log.txt") - # - with self_.assertRaises(ValueError) as cm: - res = hjoblib.parallel_execute( - workload, - dry_run, - num_threads, - incremental, - abort_on_error, - num_attempts, - log_file, - backend=backend, - ) - # Print result if it succeeds. - _LOG.debug("res=%s", str(res)) - # Check. - actual = str(cm.exception) - self_.assert_equal(actual, expected_assertion) - - -# # To observe the output in real-time. -# if __name__ == "__main__": -# hdbg.init_logger(verbosity=logging.INFO) -# workload = get_workload1(randomize=True) -# # num_threads = "serial" -# num_threads = "1" -# # num_threads = "5" -# # backend = "loky" -# backend = "asyncio_threading" -# # backend = "asyncio_multiprocessing" -# abort_on_error = True -# # -# dry_run = False -# incremental = True -# num_attempts = 1 -# log_file = "./log.txt" -# # -# _LOG.info("\n" + hprint.frame("Start workload")) -# with htimer.TimedScope(logging.INFO, "Execute workload"): -# res = hjoblib.parallel_execute( -# workload, -# dry_run, -# num_threads, -# incremental, -# abort_on_error, -# num_attempts, -# log_file, -# backend=backend, -# ) -# _LOG.info("\n" + hprint.frame("Results")) -# import pprint -# -# print(pprint.pformat(res)) - - -# ############################################################################# -# Test_parallel_execute2 -# ############################################################################# - - -class Test_parallel_execute2(hunitest.TestCase): - """ - Execute a workload of 1 task that fails. - """ - - # pylint: disable=line-too-long - EXPECTED_STRING = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'}""" - - # pylint: enable=line-too-long - - def _run_test( - self, - abort_on_error: bool, - num_threads: Union[str, int], - backend: str, - should_succeed: bool, - ) -> None: - workload = get_workload2() - # - expected_return = self.EXPECTED_STRING - if should_succeed: - _helper_success( - self, - workload, - num_threads, - abort_on_error, - expected_return, - backend, - ) - else: - _helper_fail( - self, - workload, - num_threads, - abort_on_error, - expected_return, - backend, - ) - - def test_serial1(self) -> None: - num_threads = "serial" - abort_on_error = True - backend = "" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_serial2(self) -> None: - num_threads = "serial" - abort_on_error = False - backend = "" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~7 seconds.") - def test_parallel_loky1(self) -> None: - num_threads = 2 - abort_on_error = True - backend = "loky" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~7 seconds.") - def test_parallel_loky2(self) -> None: - num_threads = 2 - abort_on_error = False - backend = "loky" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading1(self) -> None: - num_threads = 2 - abort_on_error = True - backend = "asyncio_threading" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading2(self) -> None: - num_threads = 2 - abort_on_error = False - backend = "asyncio_threading" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - -# ############################################################################# -# Test_parallel_execute3 -# ############################################################################# - - -def get_workload3( - randomize: bool, seed: Optional[int] = None -) -> hjoblib.Workload: - """ - Return a workload for `workload_function()` with 5 tasks succeeding and one - task failing. - """ - workload: hjoblib.Workload = get_workload1(randomize=True) - # Modify the workflow in place. - (workload_func, func_name, tasks) = workload - _ = workload_func, func_name - task = ((-1, 7), {"hello2": "world2", "good2": "bye2"}) - tasks.append(task) - if randomize: - # Randomize workload. - workload = hjoblib.randomize_workload(workload, seed=seed) - return workload - - -# ############################################################################# -# Test_parallel_execute3 -# ############################################################################# - - -class Test_parallel_execute3(hunitest.TestCase): - """ - Execute a workload with 5 tasks that succeed and 1 task that fails. - """ - - # pylint: disable=line-too-long - EXPECTED_STRING1 = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'}""" - - EXPECTED_STRING2 = r"""Error: val1=-1, val2=7, incremental=True, num_attempts=1, kwargs={'hello2': 'world2', 'good2': 'bye2'} -val1=0, val2=0, incremental=True, num_attempts=1, kwargs={'hello0': 'world0', 'good': 'bye'} -val1=1, val2=2, incremental=True, num_attempts=1, kwargs={'hello1': 'world2', 'good': 'bye'} -val1=2, val2=4, incremental=True, num_attempts=1, kwargs={'hello2': 'world4', 'good': 'bye'} -val1=3, val2=6, incremental=True, num_attempts=1, kwargs={'hello3': 'world6', 'good': 'bye'} -val1=4, val2=8, incremental=True, num_attempts=1, kwargs={'hello4': 'world8', 'good': 'bye'}""" - - # pylint: enable=line-too-long - - def _run_test( - self, - abort_on_error: bool, - num_threads: Union[str, int], - backend: str, - should_succeed: bool, - ) -> None: - workload = get_workload3(randomize=False) - # Since there is an error and `abort_on_error=True` we only get information - # about the failed task. - if should_succeed: - expected_return = self.EXPECTED_STRING2 - _helper_success( - self, - workload, - num_threads, - abort_on_error, - expected_return, - backend, - ) - else: - # Since there is an error and `abort_on_error=True` we only get information - # about the failed task. - expected_exception = self.EXPECTED_STRING1 - _helper_fail( - self, - workload, - num_threads, - abort_on_error, - expected_exception, - backend, - ) - - def test_serial1(self) -> None: - num_threads = "serial" - abort_on_error = True - backend = "" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_serial2(self) -> None: - """ - Execute: - - a workload with 5 tasks that succeed and 1 task that fails - - serially - - don't abort because abort_on_error=False - """ - num_threads = "serial" - abort_on_error = False - backend = "" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_loky1(self) -> None: - num_threads = "1" - abort_on_error = True - backend = "loky" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - @pytest.mark.requires_ck_infra - @pytest.mark.slow("~7 seconds.") - def test_parallel_loky2(self) -> None: - num_threads = "3" - abort_on_error = True - backend = "loky" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_loky3(self) -> None: - num_threads = "1" - abort_on_error = False - backend = "loky" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - @pytest.mark.slow("~5 seconds.") - def test_parallel_loky4(self) -> None: - num_threads = "3" - abort_on_error = False - backend = "loky" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading1(self) -> None: - num_threads = "1" - abort_on_error = True - backend = "asyncio_threading" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading2(self) -> None: - num_threads = "3" - abort_on_error = True - backend = "asyncio_threading" - # - should_succeed = False - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading3(self) -> None: - num_threads = "1" - abort_on_error = False - backend = "asyncio_threading" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - def test_parallel_asyncio_threading4(self) -> None: - num_threads = "3" - abort_on_error = False - backend = "asyncio_threading" - # - should_succeed = True - self._run_test(abort_on_error, num_threads, backend, should_succeed) - - -# ############################################################################# - - -# ############################################################################# -# Test_joblib_example1 -# ############################################################################# - - -@pytest.mark.skip(reason="Just for experimenting with joblib") -class Test_joblib_example1(hunitest.TestCase): - @staticmethod - def func(val: int) -> int: - print(f"val={val}") - if val == -1: - raise ValueError(f"val={val}") - print(f" out={val}") - return val - - def test1(self) -> None: - """ - Show that when a job fails the entire `joblib.Parallel` fails without - returning anything, but just propagating the exception. - """ - # num_threads = 5 - num_threads = 1 - vals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - # vals[1] = -1 - vals[5] = -1 - import joblib - - backend = "loky" - res = joblib.Parallel(n_jobs=num_threads, backend=backend, verbose=200)( - joblib.delayed(Test_joblib_example1.func)(val) for val in vals - ) - print(f"res={str(res)}") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py deleted file mode 100644 index 12f04c506..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks.py +++ /dev/null @@ -1,540 +0,0 @@ -# This should only test helper functions from `lib_tasks.py`. -# `test_tasks.py` associated to `tasks.py` should test specific task targets. - -import logging -import os -import re -import unittest.mock as umock -from typing import Dict, Generator - -import invoke -import pytest - -import helpers.hgit as hgit -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.lib_tasks as hlibtask -import helpers.lib_tasks_gh as hlitagh -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - - -def _get_default_params() -> Dict[str, str]: - """ - Get fake params pointing to a different image so we can test the code - without affecting the official images. - """ - ecr_base_path = os.environ["CSFY_ECR_BASE_PATH"] - default_params = { - "CSFY_ECR_BASE_PATH": ecr_base_path, - "BASE_IMAGE": "amp_test", - "HELPERS_IMAGE_PROD": f"{ecr_base_path}/helpers:prod", - } - return default_params - - -# ############################################################################# -# _LibTasksTestCase -# ############################################################################# - - -class _LibTasksTestCase(hunitest.TestCase): - """ - Test class injecting default parameters in the `lib_tasks` singleton in - `set_up_test()` and cleaning up the singleton in `tear_down_test()`. - """ - - # This will be run before and after each test. - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - # Run before each test. - self.set_up_test() - yield - # Run after each test. - self.tear_down_test() - - def set_up_test(self) -> None: - params = _get_default_params() - hlitauti.set_default_params(params) - - def tear_down_test(self) -> None: - hlitauti.reset_default_params() - - -# ############################################################################# - - -# TODO(gp): Make it public. -def _build_mock_context_returning_ok() -> invoke.MockContext: - """ - Build a MockContext catching any command and returning rc=0. - """ - ctx = invoke.MockContext( - repeat=True, run={re.compile(".*"): invoke.Result(exited=0)} - ) - return ctx - - -# ############################################################################# -# _CheckDryRunTestCase -# ############################################################################# - - -class _CheckDryRunTestCase(hunitest.TestCase): - """ - Test class running an invoke target with/without dry-run and checking that - the issued commands are what is expected. - """ - - def _check_calls(self, ctx: invoke.MockContext) -> None: - """ - `check_string()` the sequence of commands issued in the context. - """ - actual = "\n".join(map(str, ctx.run.mock_calls)) - actual = hprint.remove_non_printable_chars(actual) - self.check_string(actual) - - def _check_output(self, target: str, check: bool = True) -> None: - """ - Dry run target checking that the sequence of commands issued is the - expected one. - """ - ctx = _build_mock_context_returning_ok() - # pylint: disable=exec-used - exec(f"hlibtask.{target}") - # pylint: enable=exec-used - # Check the outcome. - if check: - self._check_calls(ctx) - - -# TODO(gp): We should group the tests by what is tested and not how it's -# tested. E.g. TestDryRunTasks1::test_print_setup and -# TestDryRunTasks2::test_print_setup should go together in a class. - - -# ############################################################################# -# TestDryRunTasks1 -# ############################################################################# - - -class TestDryRunTasks1(hunitest.TestCase): - """ - - Run invoke in dry-run mode from command line - - Compare the output to the golden outcomes - """ - - # TODO(gp): -> TestGitCommands1 - - def dry_run( - self, target: str, dry_run: bool = True, check_string: bool = True - ) -> None: - """ - Invoke the given target with dry run. - - This is used to test the commands that we can't actually - execute. - """ - opts = "--dry" if dry_run else "" - # - # TODO(vitalii): While deploying the container versioning - # we disable the check in the unit tests. Remove `SKIP_VERSION_CHECK=1` - # after CmampTask570 is fixed. - cmd = f"SKIP_VERSION_CHECK=1 invoke {opts} {target} | grep -v INFO | grep -v '>>ENV<<:'" - _, actual = hsystem.system_to_string(cmd) - # - actual = hprint.remove_non_printable_chars(actual) - # docker_ps: sudo=False - regex = r"# \S+:" - actual = hunitest.filter_text(regex, actual) - # - regex = r"(WARN|INFO)\s+hcache.py" - actual = hunitest.filter_text(regex, actual) - # Filter out `no module` warnings. - # TODO(Grisha): add the "no module warning" filtering - # to `purify_text()` in `check_string()`. - regex = "WARN.*No module" - actual = hunitest.filter_text(regex, actual) - if check_string: - self.check_string(actual) - - # ######################################################################### - - # TODO(gp): We can't test this since amp and cmamp have now different base image. - # def test_print_setup(self) -> None: - # target = "print_setup" - # self.dry_run(target) - - # The problem is that we use system and not ctx to execute the command, so that - # --dry-run doesn't work. - @pytest.mark.skip(reason="This is actually run") - def test_git_pull(self) -> None: - target = "git_pull" - self.dry_run(target) - - @pytest.mark.skip(reason="This is actually run") - def test_git_fetch_master(self) -> None: - target = "git_fetch_master" - self.dry_run(target) - - @pytest.mark.skip(reason="This is actually run deleting files") - def test_git_clean(self) -> None: - target = "git_clean" - self.dry_run(target) - - # ######################################################################### - # TODO(gp): -> TestDockerCommands1 - - @pytest.mark.slow("~6 sec.") - @pytest.mark.skipif( - hserver.is_inside_ci(), reason="In CI the output is different" - ) - def test_docker_images_ls_repo(self) -> None: - target = "docker_images_ls_repo" - # TODO(gp): amp and cmamp have different version of aws cli and so the - # output is different. - check_string = False - self.dry_run(target, check_string=check_string) - - @pytest.mark.slow("~6 sec.") - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. Different golden outcomes in helpers and other repos.", - ) - def test_docker_ps(self) -> None: - target = "docker_ps" - self.dry_run(target) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_stats(self) -> None: - target = "docker_stats" - self.dry_run(target) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_kill_last(self) -> None: - target = "docker_kill" - self.dry_run(target) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_kill_all(self) -> None: - target = "docker_kill --all" - self.dry_run(target) - - -# ############################################################################# - - -# ############################################################################# -# TestDryRunTasks2 -# ############################################################################# - - -# Outside CK infra, the class hangs, so we skip it. -@pytest.mark.requires_ck_infra -@pytest.mark.slow(reason="Around 7s") -@pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", -) -class TestDryRunTasks2(_LibTasksTestCase, _CheckDryRunTestCase): - """ - - Call the invoke task directly from Python - - `check_string()` that the sequence of commands issued by the target is the - expected one using mocks to return ok for every system call. - """ - - def test_print_setup(self) -> None: - target = "print_setup(ctx)" - self._check_output(target) - - def test_git_pull(self) -> None: - target = "git_pull(ctx)" - self._check_output(target) - - def test_git_fetch_master(self) -> None: - target = "git_fetch_master(ctx)" - self._check_output(target) - - def test_git_clean(self) -> None: - target = "git_clean(ctx)" - self._check_output(target) - - # TODO(Grisha): is not it the same as `test_git_clean()`? - def test_git_clean2(self) -> None: - target = "git_clean(ctx, dry_run=False)" - self._check_output(target) - - # ######################################################################### - - def test_docker_images_ls_repo(self) -> None: - target = "docker_images_ls_repo(ctx)" - self._check_output(target, check=False) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_kill_all(self) -> None: - target = "docker_kill(ctx, all=True)" - self._check_output(target) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_kill_last(self) -> None: - target = "docker_kill(ctx)" - self._check_output(target) - - def test_docker_ps(self) -> None: - target = "docker_ps(ctx)" - self._check_output(target) - - def test_docker_pull(self) -> None: - target = "docker_pull(ctx)" - self._check_output(target, check=False) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_docker_stats(self) -> None: - target = "docker_stats(ctx)" - self._check_output(target) - - # ######################################################################### - # TODO(gp): -> TestGhCommands1 - - # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_gh_create_pr1(self) -> None: - with ( - umock.patch.object( - hgit, "get_branch_name", return_value="AmpTask1_test_branch" - ), - umock.patch.object( - hlitagh, - "_get_repo_full_name_from_cmd", - return_value=("github.com/alphamatic/amp", "amp"), - ), - ): - target = "gh_create_pr(ctx, title='test')" - self._check_output(target) - - # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_gh_create_pr2(self) -> None: - with ( - umock.patch.object( - hgit, "get_branch_name", return_value="AmpTask1_test_branch" - ), - umock.patch.object( - hlitagh, - "_get_repo_full_name_from_cmd", - return_value=("github.com/alphamatic/amp", "amp"), - ), - ): - target = "gh_create_pr(ctx, body='hello_world', title='test')" - self._check_output(target) - - # TODO(ShaopengZ): Outside CK infra, the test hangs, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_gh_create_pr3(self) -> None: - with ( - umock.patch.object( - hgit, "get_branch_name", return_value="AmpTask1_test_branch" - ), - umock.patch.object( - hlitagh, - "_get_repo_full_name_from_cmd", - return_value=("github.com/alphamatic/amp", "amp"), - ), - ): - target = "gh_create_pr(ctx, draft=False, title='test')" - self._check_output(target) - - # TODO(*): Remove skip after migration to `csfy`.` - @pytest.mark.skip( - reason="migration to new repo " - "ref: https://github.com/causify-ai/cmamp/issues/13063" - ) - def test_gh_issue_title(self) -> None: - target = "gh_issue_title(ctx, 1)" - self._check_output(target) - - # TODO(Shaopengz): Outside CK infra, the test hangs, so skip. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") - def test_gh_workflow_list(self) -> None: - target = "gh_workflow_list(ctx, filter_by_branch='master')" - self._check_output(target) - - # This is an action with side effects so we can't test it. - # def test_gh_workflow_run(self) -> None: - # target = "gh_workflow_run(ctx)" - # self._check_output(target) - - # ######################################################################### - # TODO(gp): -> TestGitCommands1 - def test_git_branch_files(self) -> None: - # This test needs a reference to Git master branch. - hgit.fetch_origin_master_if_needed() - # - target = "git_branch_files(ctx)" - self._check_output(target) - - @pytest.mark.skip( - reason="HelpersTask638: Skip Failing test to merge the PR in cmamp" - ) - def test_git_branch_create1(self) -> None: - target = ( - "git_branch_create(ctx, branch_name='AmpTask123_test', " - "only_branch_from_master=False)" - ) - self._check_output(target) - - # TODO(*): Remove skip after migration to `csfy`.` - @pytest.mark.skip( - reason="migration to new repo " - "ref: https://github.com/causify-ai/cmamp/issues/13063" - ) - def test_git_branch_create2(self) -> None: - # Difference between `cmamp` and `kaizenflow`. - target = ( - "git_branch_create(ctx, issue_id=1, only_branch_from_master=False)" - ) - self._check_output(target) - - def test_git_branch_create3(self) -> None: - with self.assertRaises(AssertionError): - target = ( - "git_branch_create(ctx, branch_name='test', issue_id=1, " - "only_branch_from_master=False)" - ) - self._check_output(target, check=False) - - # This is an action with side effects so we can't test it. - # def test_git_branch_delete_merged(self) -> None: - # target = "git_branch_delete_merged(ctx)" - # self._check_output(target) - - def test_git_merge_master(self) -> None: - target = "git_merge_master(ctx, abort_if_not_clean=False)" - self._check_output(target) - - # ######################################################################### - # TODO(gp): -> TestLintCommands1 - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_lint1(self) -> None: - target = "lint(ctx, modified=True)" - # The output depends on the client, so don't check it. - self._check_output(target, check=False) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_lint2(self) -> None: - target = "lint(ctx, branch=True)" - # The output depends on the client, so don't check it. - self._check_output(target, check=False) - - @pytest.mark.skip( - reason="AmpTask1347: Add support for mocking `system*()` " - "functions to unit test" - ) - def test_lint3(self) -> None: - file = __file__ - target = f"lint(ctx, files='{file}')" - # The output depends on the client, so don't check it. - self._check_output(target, check=False) - - def test_find_test_class1(self) -> None: - class_name = self.__class__.__name__ - target = f"find_test_class(ctx, class_name='{class_name}')" - self._check_output(target) - - # ######################################################################### - - @pytest.mark.skipif( - hserver.is_inside_ci(), reason="In CI the output is different" - ) - def test_docker_login(self) -> None: - """ - Instead of using _build_mock_context_returning_ok(), set the return - values more explicitly. - """ - stdout = "aws-cli/1.19.49 Python/3.7.6 Darwin/19.6.0 botocore/1.20.49\n" - ctx = invoke.MockContext( - run={ - "aws --version": invoke.Result(stdout), - re.compile("^docker login"): invoke.Result(exited=0), - re.compile("^eval"): invoke.Result(exited=0), - } - ) - hlibtask.docker_login(ctx) - # Check the outcome. - # self._check_calls(ctx) - - -# ############################################################################# - -# TODO(gp): Run test coverage with -# > i run_fast_slow_tests \ -# --pytest-opts="helpers/test/test_lib_tasks.py test/test_tasks.py" \ -# --coverage - -# TODO(gp): Add tests for: -# - print_tasks -# - git_files -# - git_last_commit_files -# - check_python_files -# - docker_stats -# - traceback (with checked in file) -# - lint - - -# ############################################################################# - - -# ############################################################################# -# TestFailing -# ############################################################################# - - -class TestFailing(hunitest.TestCase): - """ - Run a test that fails based on CSFY_FORCE_TEST_FAIL environment variable. - """ - - def test_failing(self) -> None: - if os.environ.get("CSFY_FORCE_TEST_FAIL", "") == "1": - self.fail("test failed succesfully") diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py deleted file mode 100644 index 80ea28ffb..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker.py +++ /dev/null @@ -1,494 +0,0 @@ -import logging -import os -import re -import unittest.mock as umock -from typing import Dict, Optional - -import pytest - -import helpers.hgit as hgit -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur -import helpers.lib_tasks_docker as hlitadoc -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - - -# pylint: disable=protected-access - - -# ############################################################################# -# Test_generate_compose_file1 -# ############################################################################# - - -class Test_generate_compose_file1(hunitest.TestCase): - def helper( - self, - stage: str, - *, - use_privileged_mode: bool = False, - use_sibling_container: bool = False, - shared_data_dirs: Optional[Dict[str, str]] = None, - mount_as_submodule: bool = False, - use_network_mode_host: bool = True, - use_main_network: bool = False, - ) -> None: - txt = [] - # - params = [ - "stage", - "use_privileged_mode", - "use_sibling_container", - "shared_data_dirs", - "mount_as_submodule", - "use_network_mode_host", - ] - txt_tmp = hprint.to_str(" ".join(params)) - txt.append(txt_tmp) - # - file_name = None - txt_tmp = hlitadoc._generate_docker_compose_file( - stage, - use_privileged_mode, - use_sibling_container, - shared_data_dirs, - mount_as_submodule, - use_network_mode_host, - use_main_network, - file_name, - ) - # Remove all the env variables that are function of the host. - txt_tmp = hunitest.filter_text("CSFY_HOST_", txt_tmp) - txt_tmp = hunitest.filter_text("CSFY_GIT_ROOT_PATH", txt_tmp) - txt_tmp = hunitest.filter_text("CSFY_HELPERS_ROOT_PATH", txt_tmp) - txt_tmp = hunitest.filter_text( - "CSFY_USE_HELPERS_AS_NESTED_MODULE", txt_tmp - ) - txt_tmp = hunitest.filter_text("OPENAI_API_KEY", txt_tmp) - txt.append(txt_tmp) - # - txt = "\n".join(txt) - txt = hunitest.filter_text(r"working_dir", txt) - self.check_string(txt) - - def test1(self) -> None: - self.helper(stage="prod", use_privileged_mode=True) - - def test2(self) -> None: - self.helper( - stage="prod", shared_data_dirs={"/data/shared": "/shared_data"} - ) - - def test3(self) -> None: - self.helper(stage="prod", use_main_network=True) - - # TODO(ShaopengZ): This hangs outside CK infra, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - hgit.is_in_amp_as_submodule(), reason="Only run in amp directly" - ) - def test4(self) -> None: - self.helper(stage="dev") - - # TODO(ShaopengZ): This hangs outside CK infra, so we skip it. - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test5(self) -> None: - self.helper(stage="dev") - - -# ############################################################################# -# Test_generate_compose_file2 -# ############################################################################# - - -class Test_generate_compose_file2(hunitest.TestCase): - def helper( - self, - mock_getcwd: str, - mock_find_git_root: str, - mock_find_helpers_root: str, - mock_is_in_helpers_as_supermodule: bool, - *, - stage: str = "prod", - use_privileged_mode: bool = True, - use_sibling_container: bool = False, - shared_data_dirs: Optional[Dict[str, str]] = None, - mount_as_submodule: bool = False, - use_network_mode_host: bool = True, - use_main_network: bool = False, - ) -> None: - txt = [] - # - params = [ - "stage", - "use_privileged_mode", - "use_sibling_container", - "shared_data_dirs", - "mount_as_submodule", - "use_network_mode_host", - ] - txt_tmp = hprint.to_str(" ".join(params)) - txt.append(txt_tmp) - # - file_name = None - with ( - umock.patch.object(os, "getcwd", return_value=mock_getcwd), - umock.patch.object( - hgit, "find_git_root", return_value=mock_find_git_root - ), - umock.patch.object( - hgit, "find_helpers_root", return_value=mock_find_helpers_root - ), - umock.patch.object( - hgit, - "is_in_helpers_as_supermodule", - return_value=mock_is_in_helpers_as_supermodule, - ), - ): - txt_tmp = hlitadoc._generate_docker_compose_file( - stage, - use_privileged_mode, - use_sibling_container, - shared_data_dirs, - mount_as_submodule, - use_network_mode_host, - use_main_network, - file_name, - ) - # Remove all the env variables that are function of the host. - txt_tmp = hunitest.filter_text("CSFY_HOST_", txt_tmp) - txt_tmp = hunitest.filter_text("OPENAI_API_KEY", txt_tmp) - txt.append(txt_tmp) - # - txt = "\n".join(txt) - self.check_string(txt) - - def test1(self) -> None: - """ - Check that file is generated correctly when the repo is `//cmamp`. - """ - self.helper( - mock_getcwd="/data/dummy/src/cmamp1", - mock_find_git_root="/data/dummy/src/cmamp1", - mock_find_helpers_root="/data/dummy/src/cmamp1/helpers_root", - mock_is_in_helpers_as_supermodule=False, - ) - - def test2(self) -> None: - """ - Check that file is generated correctly when the repo is `//helpers`. - """ - self.helper( - mock_getcwd="/data/dummy/src/helpers1", - mock_find_git_root="/data/dummy/src/helpers1", - mock_find_helpers_root="/data/dummy/src/helpers1", - mock_is_in_helpers_as_supermodule=True, - ) - - def test3(self) -> None: - """ - Check that file is generated correctly when the repo is `//cmamp` and - `//cmamp/ck.infra` is a runnable dir. - """ - self.helper( - mock_getcwd="/data/dummy/src/cmamp1/ck.infra", - mock_find_git_root="/data/dummy/src/cmamp1", - mock_find_helpers_root="/data/dummy/src/cmamp1/helpers_root", - mock_is_in_helpers_as_supermodule=False, - ) - - def test4(self) -> None: - """ - Check that file is generated correctly when the repo is `//orange`. - """ - self.helper( - mock_getcwd="/data/dummy/src/orange1", - mock_find_git_root="/data/dummy/src/orange1", - mock_find_helpers_root="/data/dummy/src/orange1/amp/helpers_root", - mock_is_in_helpers_as_supermodule=False, - ) - - -# ############################################################################# - - -# ############################################################################# -# TestLibTasksGetDockerCmd1 -# ############################################################################# - - -# TODO(ShaopengZ): This hangs outside CK infra, so we skip it. -@pytest.mark.requires_ck_infra -class TestLibTasksGetDockerCmd1(httestlib._LibTasksTestCase): - """ - Test `_get_docker_compose_cmd()`. - """ - - def check(self, actual: str, expected: str) -> None: - # Remove current timestamp (e.g., `20220317_232120``) from the `--name` - # so that the tests pass. - timestamp_regex = r"\.\d{8}_\d{6}" - actual = re.sub(timestamp_regex, "", actual) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_txt_from_client(actual) - # This is required when different repos run Docker with user vs root / remap. - actual = hunitest.filter_text("--user", actual) - self.assert_equal(actual, expected, fuzzy_match=True) - - @pytest.mark.requires_ck_infra - # TODO(gp): After using a single docker file as part of AmpTask2308 - # "Update_amp_container" we can probably run these tests in any repo, so - # we should be able to remove this `skipif`. - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_docker_bash1(self) -> None: - """ - Command for docker_bash target. - """ - base_image = "" - stage = "dev" - version = "1.0.0" - cmd = "bash" - service_name = "app" - use_entrypoint = False - print_docker_config = False - actual = hlitadoc._get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - service_name=service_name, - use_entrypoint=use_entrypoint, - print_docker_config=print_docker_config, - ) - expected = r""" - IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.app.app \ - --entrypoint bash \ - app - """ - self.check(actual, expected) - - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_docker_bash2(self) -> None: - """ - Command for docker_bash with entrypoint. - """ - base_image = "" - stage = "local" - version = "1.0.0" - cmd = "bash" - print_docker_config = False - actual = hlitadoc._get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - print_docker_config=print_docker_config, - ) - expected = r"""IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.app.app \ - app \ - bash """ - self.check(actual, expected) - - @pytest.mark.requires_ck_infra - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_docker_bash3(self) -> None: - """ - Command for docker_bash with some env vars. - """ - base_image = "" - stage = "local" - version = "1.0.0" - cmd = "bash" - extra_env_vars = ["PORT=9999", "SKIP_RUN=1"] - print_docker_config = False - actual = hlitadoc._get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - extra_env_vars=extra_env_vars, - print_docker_config=print_docker_config, - ) - expected = r""" - IMAGE=$CSFY_ECR_BASE_PATH/amp_test:local-$USER_NAME-1.0.0 \ - PORT=9999 \ - SKIP_RUN=1 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.app.app \ - app \ - bash - """ - self.check(actual, expected) - - if False: - - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_docker_bash4(self) -> None: - base_image = "" - stage = "dev" - version = "1.0.0" - cmd = "bash" - entrypoint = False - print_docker_config = False - actual = hlitadoc._get_docker_compose_cmd( - base_image, - stage, - version, - cmd, - entrypoint=entrypoint, - print_docker_config=print_docker_config, - ) - expected = r""" - IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.app.app \ - --entrypoint bash \ - app - """ - self.check(actual, expected) - - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_docker_jupyter1(self) -> None: - base_image = "" - stage = "dev" - version = "1.0.0" - port = 9999 - self_test = True - print_docker_config = False - actual = hlitadoc._get_docker_jupyter_cmd( - base_image, - stage, - version, - port, - self_test, - print_docker_config=print_docker_config, - ) - expected = r""" - IMAGE=$CSFY_ECR_BASE_PATH/amp_test:dev-1.0.0 \ - PORT=9999 \ - docker compose \ - --file $GIT_ROOT/devops/compose/tmp.docker-compose.yml \ - --env-file devops/env/default.env \ - run \ - --rm \ - --name $USER_NAME.amp_test.jupyter_server_test.app \ - --service-ports \ - jupyter_server_test - """ - self.check(actual, expected) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_is_image_name_valid1 -# ############################################################################# - - -class Test_dassert_is_image_name_valid1(hunitest.TestCase): - def test1(self) -> None: - """ - Check that valid images pass the assertion. - """ - valid_images = [ - "12345.dkr.ecr.us-east-1.amazonaws.com/amp:dev", - "abcde.dkr.ecr.us-east-1.amazonaws.com/amp:local-saggese-1.0.0", - "12345.dkr.ecr.us-east-1.amazonaws.com/amp:dev-1.0.0", - "sorrentum/cmamp", - ] - for image in valid_images: - hlitadoc.dassert_is_image_name_valid(image) - - def test2(self) -> None: - """ - Check that invalid images do not pass the assertion. - """ - invalid_images = [ - # Missing required parts. - "invalid-image-name", - # Missing stage/version. - "12345.dkr.ecr.us-east-1.amazonaws.com/amp:", - # Invalid version. - "12345.dkr.ecr.us-east-1.amazonaws.com/amp:prod-1.0.0-invalid", - ] - # TODO(gp): Add a check for the output. - for image in invalid_images: - with self.assertRaises(AssertionError): - hlitadoc.dassert_is_image_name_valid(image) - - -# ############################################################################# - - -# ############################################################################# -# Test_dassert_is_base_image_name_valid1 -# ############################################################################# - - -class Test_dassert_is_base_image_name_valid1(hunitest.TestCase): - def test1(self) -> None: - """ - Check that valid base images pass the assertion. - """ - valid_base_images = [ - "12345.dkr.ecr.us-east-1.amazonaws.com/amp", - "sorrentum/cmamp", - "ghcr.io/cryptokaizen/cmamp", - ] - for base_image in valid_base_images: - hlitadoc._dassert_is_base_image_name_valid(base_image) - - def test2(self) -> None: - """ - Check that invalid base images do not pass the assertion. - """ - invalid_base_images = [ - # Missing required parts. - "invalid-base-image", - # Extra character at the end. - "abcde.dkr.ecr.us-east-1.amazonaws.com/amp:", - # Extra part in the name. - "ghcr.io/cryptokaizen/cmamp/invalid", - ] - for base_image in invalid_base_images: - with self.assertRaises(AssertionError): - hlitadoc._dassert_is_base_image_name_valid(base_image) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py deleted file mode 100644 index ff430ed24..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_docker_release.py +++ /dev/null @@ -1,1530 +0,0 @@ -import logging -import os -import unittest.mock as umock -from typing import Generator, List - -import boto3 -import moto -import pytest - -import helpers.hgit as hgit -import helpers.hunit_test as hunitest -import helpers.lib_tasks_docker as hlitadoc -import helpers.lib_tasks_docker_release as hltadore -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - - -def _extract_commands_from_call(calls: List[umock._Call]) -> List[str]: - """ - Extract command strings from a list of mock call arguments. - - Example: - calls = [ - ( - # args tuple: (context, command) - (mock_ctx, "docker build --no-cache image1"), - # kwargs dictionary - {"pty": True} - ) - ] - After extraction: - ["docker build --no-cache image1"] - - :param calls: list of mock call objects containing (args, kwargs) - :return: list of command strings - """ - # Each mock call is a (args, kwargs) tuple, extract the command string - # from args[1] in each call. - call_list = [args_[1] for args_, kwargs_ in calls] - return call_list - - -# ############################################################################# -# _DockerFlowTestHelper -# ############################################################################# - - -class _DockerFlowTestHelper(hunitest.TestCase): - """ - Helper test class to perform common setup, teardown logic and assertion - checks for Docker flow tests. - """ - - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - self.set_up_test() - yield - self.tear_down_test() - - def set_up_test(self) -> None: - # Mock system calls. - self.system_patcher = umock.patch("helpers.hsystem.system") - self.mock_system = self.system_patcher.start() - # Mock run. - self.run_patcher = umock.patch("helpers.lib_tasks_utils.run") - self.mock_run = self.run_patcher.start() - # Mock version validation. - self.version_patcher = umock.patch( - "helpers.lib_tasks_docker.dassert_is_subsequent_version" - ) - self.mock_version = self.version_patcher.start() - # Mock docker login. - self.docker_login_patcher = umock.patch( - "helpers.lib_tasks_docker.docker_login" - ) - self.mock_docker_login = self.docker_login_patcher.start() - # Mock environment variable. - self.env_patcher = umock.patch.dict( - "os.environ", {"CSFY_ECR_BASE_PATH": "test.ecr.path"} - ) - self.get_default_param_patcher = umock.patch( - "helpers.lib_tasks_utils.get_default_param", - side_effect=lambda param: { - "CSFY_ECR_BASE_PATH": "test.ecr.path", - "BASE_IMAGE": "test-image", - }.get(param, ""), - ) - self.mock_get_default_param = self.get_default_param_patcher.start() - self.env_patcher.start() - self.get_docker_base_image_name_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_docker_base_image_name" - ) - self.mock_get_docker_base_image_name = ( - self.get_docker_base_image_name_patcher.start() - ) - # - self.patchers = { - "system": self.system_patcher, - "run": self.run_patcher, - "version": self.version_patcher, - "docker_login": self.docker_login_patcher, - "env": self.env_patcher, - "docker_base_image_name": self.get_docker_base_image_name_patcher, - "default_param": self.get_default_param_patcher, - } - # Test inputs. - self.mock_ctx = httestlib._build_mock_context_returning_ok() - self.test_version = "1.0.0" - self.test_base_image = "test-registry.com/test-image" - self.test_multi_arch = "linux/amd64,linux/arm64" - self.mock_get_docker_base_image_name.return_value = "test-image" - - def tear_down_test(self) -> None: - """ - Clean up test environment by stopping all mocks after each test case. - """ - for patcher in self.patchers.values(): - patcher.stop() - - def _check_docker_command_output( - self, expected: str, call_args_list: List[umock._Call] - ) -> None: - """ - Verify that the sequence of Docker commands from mock calls matches the - expected string. - - :param expected: expected command string - :param call_args_list: list of mock call objects - """ - actual_cmds = _extract_commands_from_call(call_args_list) - actual_cmds = "\n".join(actual_cmds) - _LOG.debug("Actual Docker commands:\n%s", actual_cmds) - self.assert_equal( - actual_cmds, - expected, - purify_text=True, - purify_expected_text=True, - fuzzy_match=True, - remove_lead_trail_empty_lines=True, - dedent=True, - ) - - -# ############################################################################# -# Test_docker_build_local_image1 -# ############################################################################# - - -class Test_docker_build_local_image1(_DockerFlowTestHelper): - """ - Test building a local Docker image. - """ - - def test_single_arch1(self) -> None: - """ - Test building with single architecture. - - This test checks: - - Single architecture build - - No-cache build options - - Custom build arguments - - Local user-specific tagging - """ - # Call tested function. - hltadore.docker_build_local_image( - self.mock_ctx, - self.test_version, - cache=False, - base_image=self.test_base_image, - poetry_mode="update", - ) - # The output is a list of strings, each representing a command. - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test-registry.com/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test-registry.com/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_multi_arch1(self) -> None: - """ - Test building with multiple architectures. - - This test checks: - - Multi-architecture build (amd64, arm64) - - Buildx driver setup - - Platform-specific build options - - Image pushing to registry - """ - # Call tested function. - hltadore.docker_build_local_image( - self.mock_ctx, - self.test_version, - cache=False, - base_image=self.test_base_image, - poetry_mode="update", - multi_arch=self.test_multi_arch, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test-registry.com/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - docker pull test-registry.com/test-image:local-$USER_NAME-1.0.0 - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test-registry.com/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_build_prod_image1 -# ############################################################################# - - -class Test_docker_build_prod_image1(_DockerFlowTestHelper): - """ - Test building a prod Docker image. - """ - - def test_single_arch_prod_image1(self) -> None: - """ - Test building with single architecture. - - This test checks: - - Production build workflow - - Single architecture build - - Build arguments for prod environment - - Prod image versioning - - Default and versioned tagging - """ - # Call tested function. - hltadore.docker_build_prod_image( - self.mock_ctx, - self.test_version, - base_image=self.test_base_image, - cache=False, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --tag test-registry.com/test-image:prod-1.0.0 \ - --file /app/devops/docker_build/prod.Dockerfile \ - --build-arg VERSION=1.0.0 \ - --build-arg ECR_BASE_PATH=test.ecr.path \ - --build-arg IMAGE_NAME=test-image \ - /app - docker tag test-registry.com/test-image:prod-1.0.0 test-registry.com/test-image:prod - docker image ls test-registry.com/test-image:prod - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_multi_arch_prod_image1(self) -> None: - """ - Test building with multiple architectures. - - This test checks: - - Multi-architecture production build - - Buildx setup for multi-platform builds - - Push to registry during build - - Production build arguments - - Multi-arch specific options - """ - # Call tested function. - hltadore.docker_build_multi_arch_prod_image( - self.mock_ctx, - self.test_version, - base_image=self.test_base_image, - cache=False, - multi_arch=self.test_multi_arch, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg VERSION=1.0.0 --build-arg ECR_BASE_PATH=test.ecr.path \ - --tag test-registry.com/test-image:prod-1.0.0 \ - --file devops/docker_build/prod.Dockerfile \ - - - docker pull test-registry.com/test-image:prod-1.0.0 - docker image ls test-registry.com/test-image:prod-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - # TODO(gp): Is the assertion too strict? - reason="Needs to run inside a super module", - ) - def test_candidate_tag1(self) -> None: - """ - Test building with candidate mode using tag. - - This test checks: - - Production build using candidate mode - - Custom tag specification - - Build arguments - - Non-default image tagging - """ - test_tag = "test_tag" - # Call tested function. - hltadore.docker_build_prod_image( - self.mock_ctx, - self.test_version, - base_image=self.test_base_image, - cache=False, - candidate=True, - tag=test_tag, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --tag test-registry.com/test-image:prod-test_tag \ - --file /app/devops/docker_build/prod.Dockerfile \ - --build-arg VERSION=1.0.0 \ - --build-arg ECR_BASE_PATH=test.ecr.path \ - --build-arg IMAGE_NAME=test-image \ - /app - docker image ls test-registry.com/test-image:prod-test_tag - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_candidate_user_tag1(self) -> None: - """ - Test building with candidate mode using user tag. - - This test checks: - - Production build using candidate mode - - Combined user and custom tag parameters - - Custom tag format (prod-user-tag) - - Build arguments - """ - test_user_tag = "test_user" - test_tag = "test_tag" - # Call tested function. - hltadore.docker_build_prod_image( - self.mock_ctx, - self.test_version, - base_image=self.test_base_image, - cache=False, - candidate=True, - user_tag=test_user_tag, - tag=test_tag, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --tag test-registry.com/test-image:prod-test_user-test_tag \ - --file /app/devops/docker_build/prod.Dockerfile \ - --build-arg VERSION=1.0.0 \ - --build-arg ECR_BASE_PATH=test.ecr.path \ - --build-arg IMAGE_NAME=test-image \ - /app - docker image ls test-registry.com/test-image:prod-test_user-test_tag - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_tag_push_multi_arch_prod_image1 -# ############################################################################# - - -class Test_docker_tag_push_multi_arch_prod_image1(_DockerFlowTestHelper): - """ - Test tagging and pushing a multi-architecture Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test pushing to AWS ECR. - - This test checks: - - Multi-arch image tagging - - AWS ECR target registry - - Production image versioning - """ - # Call tested function. - target_registry = "aws_ecr.ck" - hltadore.docker_tag_push_multi_arch_prod_image( - self.mock_ctx, - self.test_version, - target_registry=target_registry, - ) - expected = r""" - docker buildx imagetools create -t test.ecr.path/test-image:prod test.ecr.path/test-image:prod-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_dockerhub1(self) -> None: - """ - Test pushing to DockerHub from AWS ECR. - - This test checks: - - Multi-arch image tagging - - DockerHub registry (differs from AWS ECR test) - - Version and latest tagging - - Cross-registry image copying - """ - # Call tested function. - target_registry = "dockerhub.causify" - hltadore.docker_tag_push_multi_arch_prod_image( - self.mock_ctx, - self.test_version, - target_registry=target_registry, - ) - expected = r""" - docker buildx imagetools create -t causify/test-image:prod-1.0.0 test.ecr.path/test-image:prod-1.0.0 - docker buildx imagetools create -t causify/test-image:prod test.ecr.path/test-image:prod-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_tag_push_multi_build_local_image_as_dev1 -# ############################################################################# - - -class Test_docker_tag_push_multi_build_local_image_as_dev1( - _DockerFlowTestHelper -): - """ - Test tagging and pushing a multi-arch local Docker image as dev. - """ - - def test_aws_ecr1(self) -> None: - """ - Test pushing to AWS ECR. - - This test checks: - - Multi-arch image tagging - - AWS ECR target registry - - Dev image versioning - - Default and versioned tagging - """ - # Call tested function. - target_registry = "aws_ecr.ck" - hltadore.docker_tag_push_multi_build_local_image_as_dev( - self.mock_ctx, - self.test_version, - target_registry=target_registry, - ) - expected = r""" - docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_dockerhub1(self) -> None: - """ - Test pushing to DockerHub from AWS ECR. - - This test checks: - - Multi-arch image tagging - - DockerHub registry (differs from AWS ECR test) - - Version and latest tagging - - Cross-registry image copying - """ - # Call tested function. - target_registry = "dockerhub.causify" - hltadore.docker_tag_push_multi_build_local_image_as_dev( - self.mock_ctx, - self.test_version, - target_registry=target_registry, - ) - expected = r""" - docker buildx imagetools create -t causify/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t causify/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_release_dev_image1 -# ############################################################################# - - -class Test_docker_release_dev_image1(_DockerFlowTestHelper): - """ - Test releasing a dev Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test releasing the dev image to AWS ECR. - - This test checks: - - Build workflow - - No-cache build options - - Dev image versioning - - Default and versioned tagging - - Registry target selection - - Architecture support - - Tagging and versioning - """ - # Call tested function. - hltadore.docker_release_dev_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - push_to_repo=True, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 test.ecr.path/test-image:dev-1.0.0 - docker tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 test.ecr.path/test-image:dev - docker push test.ecr.path/test-image:dev-1.0.0 - docker push test.ecr.path/test-image:dev - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_release_prod_image1 -# ############################################################################# - - -class Test_docker_release_prod_image1(_DockerFlowTestHelper): - """ - Test releasing a prod Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test releasing the prod image to AWS ECR. - - This test checks: - - Build workflow - - No-cache build options - - Prod image versioning - - Default and versioned tagging - - Registry target selection - - Architecture support - - Tagging and versioning - """ - # Call tested function. - hltadore.docker_release_prod_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - push_to_repo=True, - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - DOCKER_BUILDKIT=0 \ - time \ - docker build \ - --no-cache \ - --tag test.ecr.path/test-image:prod-1.0.0 \ - --file /app/devops/docker_build/prod.Dockerfile \ - --build-arg VERSION=1.0.0 \ - --build-arg ECR_BASE_PATH=test.ecr.path \ - --build-arg IMAGE_NAME=test-image \ - /app - docker tag test.ecr.path/test-image:prod-1.0.0 test.ecr.path/test-image:prod - docker image ls test.ecr.path/test-image:prod - docker push test.ecr.path/test-image:prod-1.0.0 - docker push test.ecr.path/test-image:prod - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_release_multi_build_dev_image1 -# ############################################################################# - - -class Test_docker_release_multi_build_dev_image1(_DockerFlowTestHelper): - """ - Test releasing a multi-arch dev Docker image. - """ - - def test_single_registry1(self) -> None: - """ - Test releasing to a single registry. - - This test checks: - - Multi-arch build setup - - Build and push workflow - - Dev image tagging - - Test skipping options - - Single registry target - """ - # Call tested function. - hltadore.docker_release_multi_build_dev_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - target_registries="aws_ecr.ck", - ) - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - docker pull test.ecr.path/test-image:local-$USER_NAME-1.0.0 - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_multiple_registries1(self) -> None: - """ - Test releasing to multiple registries. - - This test checks: - - Multi-arch build workflow - - Multiple registry targets (AWS ECR and DockerHub) - - Parallel image tagging - - Image retagging for different registries - """ - # Call tested function. - hltadore.docker_release_multi_build_dev_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - target_registries="aws_ecr.ck,dockerhub.causify", - ) - expected = r""" - cp -f devops/docker_build/dockerignore.dev $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg AM_CONTAINER_VERSION=1.0.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test.ecr.path/test-image:local-$USER_NAME-1.0.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - docker pull test.ecr.path/test-image:local-$USER_NAME-1.0.0 - invoke docker_cmd --stage local --version 1.0.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t causify/test-image:dev-1.0.0 test.ecr.path/test-image:local-$USER_NAME-1.0.0 - docker buildx imagetools create -t causify/test-image:dev test.ecr.path/test-image:local-$USER_NAME-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_rollback_dev_image1 -# ############################################################################# - - -class Test_docker_rollback_dev_image1(_DockerFlowTestHelper): - """ - Test rolling back a dev Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test rolling back and pushing to AWS ECR. - - This test checks: - - Dev image rollback workflow - - Version-specific image pull - - Retagging as latest - - Repository pushing - """ - # Call tested function. - hltadore.docker_rollback_dev_image( - self.mock_ctx, - self.test_version, - push_to_repo=True, - ) - expected = r""" - docker pull test.ecr.path/test-image:dev-1.0.0 - docker tag test.ecr.path/test-image:dev-1.0.0 test.ecr.path/test-image:dev - docker push test.ecr.path/test-image:dev-1.0.0 - docker push test.ecr.path/test-image:dev - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_rollback_prod_image1 -# ############################################################################# - - -class Test_docker_rollback_prod_image1(_DockerFlowTestHelper): - """ - Test rolling back a prod Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test rolling back and pushing to AWS ECR. - - This test checks: - - Production image rollback workflow - - Version-specific image pull - - Retagging as latest production - - Repository pushing - """ - # Call tested function. - hltadore.docker_rollback_prod_image( - self.mock_ctx, - self.test_version, - push_to_repo=True, - ) - expected = r""" - docker pull test.ecr.path/test-image:prod-1.0.0 - docker tag test.ecr.path/test-image:prod-1.0.0 test.ecr.path/test-image:prod - docker push test.ecr.path/test-image:prod-1.0.0 - docker push test.ecr.path/test-image:prod - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_push_prod_candidate_image1 -# ############################################################################# - - -class Test_docker_push_prod_candidate_image1(_DockerFlowTestHelper): - """ - Test pushing a prod candidate Docker image. - """ - - def test_aws_ecr1(self) -> None: - """ - Test pushing to AWS ECR. - - This test checks: - - Candidate image pushing - - AWS ECR target registry - - Hash-based image tagging - """ - # Call tested function. - candidate = "4759b3685f903e6c669096e960b248ec31c63b69" - hltadore.docker_push_prod_candidate_image( - self.mock_ctx, - candidate=candidate, - ) - expected = r""" - docker push test.ecr.path/test-image:prod-4759b3685f903e6c669096e960b248ec31c63b69 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_release_multi_arch_prod_image1 -# ############################################################################# - - -class Test_docker_release_multi_arch_prod_image1(_DockerFlowTestHelper): - """ - Test releasing a multi-arch prod Docker image. - """ - - def test_multiple_registries1(self) -> None: - """ - Test releasing to AWS ECR and DockerHub. - - This test checks: - - Multi-arch build workflow - - AWS ECR and DockerHub target registries - - Test skipping options - - Image tagging and pushing - """ - # Call tested function. - hltadore.docker_release_multi_arch_prod_image( - self.mock_ctx, - self.test_version, - cache=False, - skip_tests=True, - fast_tests=False, - slow_tests=False, - superslow_tests=False, - qa_tests=False, - docker_registry=["aws_ecr.ck", "dockerhub.causify"], - ) - expected = r""" - cp -f devops/docker_build/dockerignore.prod $GIT_ROOT/.dockerignore - docker buildx create \ - --name multiarch_builder \ - --driver docker-container \ - --bootstrap \ - && \ - docker buildx use multiarch_builder - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker buildx build \ - --no-cache \ - --push \ - --platform linux/amd64,linux/arm64 \ - --build-arg VERSION=1.0.0 --build-arg ECR_BASE_PATH=test.ecr.path \ - --tag test.ecr.path/test-image:prod-1.0.0 \ - --file devops/docker_build/prod.Dockerfile \ - - - docker pull test.ecr.path/test-image:prod-1.0.0 - docker image ls test.ecr.path/test-image:prod-1.0.0 - docker buildx imagetools create -t test.ecr.path/test-image:prod test.ecr.path/test-image:prod-1.0.0 - docker buildx imagetools create -t causify/test-image:prod-1.0.0 test.ecr.path/test-image:prod-1.0.0 - docker buildx imagetools create -t causify/test-image:prod test.ecr.path/test-image:prod-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_create_candidate_image1 -# ############################################################################# - - -class Test_docker_create_candidate_image1(_DockerFlowTestHelper): - """ - Test creating a candidate Docker image. - """ - - def set_up_test2(self) -> None: - """ - Set up test environment with additional mocks specific to this test - class. - """ - self.set_up_test() - # Mock git hash. - self.git_hash_patcher = umock.patch( - "helpers.hgit.get_head_hash", - return_value="4759b3685f903e6c669096e960b248ec31c63b69", - ) - self.mock_git_hash = self.git_hash_patcher.start() - self.patchers["git_hash"] = self.git_hash_patcher - # Mock workspace size check. - self.workspace_check_patcher = umock.patch( - "helpers.lib_tasks_docker_release._check_workspace_dir_sizes" - ) - self.mock_workspace_check = self.workspace_check_patcher.start() - self.patchers["workspace_check"] = self.workspace_check_patcher - # Mock file existence check to handle both paths. - self.file_exists_patcher = umock.patch( - "helpers.hdbg.dassert_file_exists" - ) - self.mock_file_exists = self.file_exists_patcher.start() - self.patchers["file_exists"] = self.file_exists_patcher - # Mock `docker_build_prod_image()`. - self.build_prod_patcher = umock.patch( - "helpers.lib_tasks_docker_release.docker_build_prod_image" - ) - self.mock_build_prod = self.build_prod_patcher.start() - self.patchers["build_prod"] = self.build_prod_patcher - # Mock `docker_push_prod_candidate_image()`. - self.push_prod_patcher = umock.patch( - "helpers.lib_tasks_docker_release.docker_push_prod_candidate_image" - ) - self.mock_push_prod = self.push_prod_patcher.start() - self.patchers["push_prod"] = self.push_prod_patcher - - def tear_down_test2(self) -> None: - """ - Clean up test environment. - """ - self.tear_down_test() - - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - """ - Set up and tear down test environment for each test. - """ - self.set_up_test2() - yield - self.tear_down_test2() - - def test_aws_ecr1(self) -> None: - """ - Test creating and pushing to AWS ECR. - - This test checks: - - Task definition update with correct parameters - - Proper command construction for aws_update_task_definition.py - """ - # Call tested function. - hltadore.docker_create_candidate_image( - self.mock_ctx, - user_tag="test_user", - ) - # Verify the mocks were called with correct parameters. - self.mock_build_prod.assert_called_once_with( - self.mock_ctx, - container_dir_name=".", - version=hlitadoc._IMAGE_VERSION_FROM_CHANGELOG, - candidate=True, - tag="test_user-4759b3685f903e6c669096e960b248ec31c63b69", - ) - self.mock_push_prod.assert_called_once_with( - self.mock_ctx, - "test_user-4759b3685f903e6c669096e960b248ec31c63b69", - ) - - -# ############################################################################# -# Test_docker_update_prod_task_definition1 -# ############################################################################# - - -class Test_docker_update_prod_task_definition1(_DockerFlowTestHelper): - """ - Test updating a prod task definition to the desired version. - """ - - @pytest.fixture(autouse=True) - def aws_credentials(self) -> None: - """ - Mocked AWS credentials for moto. - """ - os.environ["DOCKER_MOCK_AWS_ACCESS_KEY_ID"] = "testing" - os.environ["DOCKER_MOCK_AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["DOCKER_MOCK_AWS_SECURITY_TOKEN"] = "testing" - os.environ["DOCKER_MOCK_AWS_SESSION_TOKEN"] = "testing" - os.environ["DOCKER_MOCK_AWS_DEFAULT_REGION"] = "us-east-1" - - def set_up_test2(self) -> None: - """ - Set up test environment with additional mocks specific to this test - class. - """ - self.set_up_test() - # Mock AWS and S3 functionality. - self.aws_patcher = umock.patch( - "helpers.haws.get_task_definition_image_url" - ) - self.mock_aws = self.aws_patcher.start() - self.mock_aws.return_value = ( - "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69" - ) - self.patchers["aws"] = self.aws_patcher - self.s3_patcher = umock.patch("helpers.hs3.get_s3fs") - self.mock_s3 = self.s3_patcher.start() - self.mock_s3.return_value.cat.return_value = b"test_content" - self.patchers["s3"] = self.s3_patcher - # Mock file operations. - self.file_patcher = umock.patch( - "helpers.hs3.from_file", return_value="test_content" - ) - self.mock_file = self.file_patcher.start() - self.patchers["file"] = self.file_patcher - # Mock listdir to return test DAG files. - self.listdir_patcher = umock.patch( - "helpers.hs3.listdir", - return_value=["/app/im_v2/airflow/dags/test_dag.py"], - ) - self.mock_listdir = self.listdir_patcher.start() - self.patchers["listdir"] = self.listdir_patcher - - def tear_down_test2(self) -> None: - """ - Clean up test environment. - """ - # Clean up environment variables. - for key in [ - "DOCKER_MOCK_AWS_ACCESS_KEY_ID", - "DOCKER_MOCK_AWS_SECRET_ACCESS_KEY", - "DOCKER_MOCK_AWS_SECURITY_TOKEN", - "DOCKER_MOCK_AWS_SESSION_TOKEN", - "DOCKER_MOCK_AWS_DEFAULT_REGION", - ]: - if key in os.environ: - del os.environ[key] - # Call parent teardown. - self.tear_down_test() - - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - """ - Set up and tear down test environment for each test. - """ - self.set_up_test2() - yield - self.tear_down_test2() - - @moto.mock_aws - @umock.patch("helpers.haws.update_task_definition") - @umock.patch("helpers.haws.get_ecs_client") - def test_promotion_to_prod( - self, - mock_get_ecs_client: umock.Mock, - mock_update_task_definition: umock.Mock, - ) -> None: - """ - Test the promotion of a preprod Docker image and DAGs to production. - - This test checks: - - Task definition update workflow - - Preprod to prod image conversion. - - DAG file synchronization - - Image tagging and pushing - """ - # Mock AWS ECS client using moto and register a task definition. - region = "us-east-1" - mock_ecs_client = boto3.client("ecs", region_name=region) - mock_ecs_client.register_task_definition( - family="test_task", - containerDefinitions=[ - { - "name": "test-container", - "image": "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69", - } - ], - executionRoleArn="__mock__", - networkMode="bridge", - requiresCompatibilities=["EC2"], - cpu="256", - memory="512", - ) - mock_get_ecs_client.return_value = mock_ecs_client - # Add mock client to patchers for cleanup. - self.ecs_client_patcher = umock.patch( - "boto3.client", return_value=mock_ecs_client - ) - self.mock_ecs_client = self.ecs_client_patcher.start() - self.patchers["ecs_client_test1"] = self.ecs_client_patcher - # Call tested function. - hltadore.docker_update_prod_task_definition( - self.mock_ctx, - version=self.test_version, - preprod_tag="4759b3685f903e6c669096e960b248ec31c63b69", - airflow_dags_s3_path="s3://test-bucket/dags/", - task_definition="test_task", - ) - expected = r""" - docker pull test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 - docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod-1.0.0 - docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod - docker rmi test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 - docker push test.ecr.path/test-image:prod-1.0.0 - docker push test.ecr.path/test-image:prod - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - # Check whether `update_task_definition` was called with the expected arguments. - expected_image_url = "test.ecr.path/test-image:prod-1.0.0" - mock_update_task_definition.assert_called_once_with( - "test_task", expected_image_url, environment="prod" - ) - - @moto.mock_aws - @umock.patch("helpers.haws.get_ecs_client") - def test_promotion_to_prod_exception_handling( - self, mock_get_ecs_client: umock.Mock - ) -> None: - """ - Test exception handling and rollback behavior when updating prod task - definition. - - This test checks: - - Exception handling during task definition update - - Rollback of task definition to original image - - Rollback of S3 DAG files - - Proper error propagation - """ - # Mock AWS ECS client using moto and register a task definition. - region = "us-east-1" - mock_ecs_client = boto3.client("ecs", region_name=region) - mock_ecs_client.register_task_definition( - family="test_task", - containerDefinitions=[ - { - "name": "test-container", - "image": "test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69", - } - ], - executionRoleArn="__mock__", - networkMode="bridge", - requiresCompatibilities=["EC2"], - cpu="256", - memory="512", - ) - mock_get_ecs_client.return_value = mock_ecs_client - # Add mock client to patchers for cleanup. - self.ecs_client_patcher = umock.patch( - "boto3.client", return_value=mock_ecs_client - ) - self.mock_ecs_client = self.ecs_client_patcher.start() - self.patchers["ecs_client_test2"] = self.ecs_client_patcher - # Mock S3 bucket operations to simulate a failure. - self.mock_s3.return_value.put.side_effect = Exception("S3 upload failed") - # Call tested function and verify exception is raised. - with self.assertRaises(Exception) as cm: - hltadore.docker_update_prod_task_definition( - self.mock_ctx, - version=self.test_version, - preprod_tag="4759b3685f903e6c669096e960b248ec31c63b69", - airflow_dags_s3_path="s3://test-bucket/dags/", - task_definition="test_task", - ) - # Check the error message. - self.assertIn("S3 upload failed", str(cm.exception)) - # Check whether rollback commands were executed. - expected = r""" - docker pull test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 - docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod-1.0.0 - docker tag test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 test.ecr.path/test-image:prod - docker rmi test.ecr.path/test-image:4759b3685f903e6c669096e960b248ec31c63b69 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - # Check whether task definition was rolled back. - self.mock_aws.assert_called_with("test_task") - - -# ############################################################################# -# Test_docker_tag_push_dev_image1 -# ############################################################################# - - -class Test_docker_tag_push_dev_image1(_DockerFlowTestHelper): - """ - Test tagging and pushing dev image from a base registry to multiple registries. - """ - - def set_up_test2(self) -> None: - """ - Set up test environment with additional mocks for GHCR workflow. - """ - super().set_up_test() - # Mock version retrieval from changelog. - self.changelog_version_patcher = umock.patch( - "helpers.hversion.get_changelog_version" - ) - self.mock_changelog_version = self.changelog_version_patcher.start() - self.mock_changelog_version.return_value = self.test_version - # Mock repo config for GHCR registry URL and image name. - self.get_container_registry_url_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_container_registry_url" - ) - self.mock_get_container_registry_url = ( - self.get_container_registry_url_patcher.start() - ) - # Use side_effect to return different values based on registry. - self.mock_get_container_registry_url.side_effect = lambda registry: { - "ghcr": "ghcr.io/causify-ai", - "ecr": "test.ecr.path", - }.get(registry, "ghcr.io/causify-ai") - # Add new patchers to cleanup list. - self.patchers.update( - { - "changelog_version": self.changelog_version_patcher, - "container_registry_url": self.get_container_registry_url_patcher, - } - ) - - def tear_down_test2(self) -> None: - """ - Clean up test environment. - """ - self.tear_down_test() - - @pytest.fixture(autouse=True) - def setup_teardown_test(self) -> Generator: - """ - Set up and tear down test environment for each test. - """ - self.set_up_test2() - yield - self.tear_down_test2() - - def test_normal_execution1(self) -> None: - """ - Test normal execution without dry_run. - - This test checks: - - GHCR image pulling - - Tagging for GHCR and AWS ECR - - Pushing to both registries - - Versioned and latest image handling - """ - # Call tested function. - hltadore.docker_tag_push_dev_image( - self.mock_ctx, - target_registries="ghcr,ecr", - container_dir_name=".", - dry_run=False, - ) - # Verify expected Docker commands were executed. - expected = r""" - docker pull ghcr.io/causify-ai/test-image:dev-1.0.0 - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev - docker push ghcr.io/causify-ai/test-image:dev - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev-1.0.0 - docker push ghcr.io/causify-ai/test-image:dev-1.0.0 - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev - docker push test.ecr.path/test-image:dev - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev-1.0.0 - docker push test.ecr.path/test-image:dev-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_dry_run1(self) -> None: - """ - Test dry_run mode execution. - - This test checks: - - No actual Docker commands are executed when dry_run=True - - All operations are simulated - - Function completes without errors - - Mock calls should include dry_run parameter - """ - # Call tested function with dry_run enabled. - hltadore.docker_tag_push_dev_image( - self.mock_ctx, - target_registries="ghcr,ecr", - container_dir_name=".", - dry_run=True, - ) - # Verify expected Docker commands were executed. - expected = r""" - docker pull ghcr.io/causify-ai/test-image:dev-1.0.0 - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev - docker push ghcr.io/causify-ai/test-image:dev - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 ghcr.io/causify-ai/test-image:dev-1.0.0 - docker push ghcr.io/causify-ai/test-image:dev-1.0.0 - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev - docker push test.ecr.path/test-image:dev - docker tag ghcr.io/causify-ai/test-image:dev-1.0.0 test.ecr.path/test-image:dev-1.0.0 - docker push test.ecr.path/test-image:dev-1.0.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - -# ############################################################################# -# Test_docker_build_test_dev_image1 -# ############################################################################# - - -class Test_docker_build_test_dev_image1(_DockerFlowTestHelper): - """ - Test the complete periodic dev image release workflow. - """ - - def set_up_test(self) -> None: - """ - Set up test environment with additional mocks for the dev image - workflow. - """ - super().set_up_test() - # Mock version operations. - self.get_changelog_version_patcher = umock.patch( - "helpers.hversion.get_changelog_version" - ) - self.mock_get_changelog_version = ( - self.get_changelog_version_patcher.start() - ) - self.mock_get_changelog_version.return_value = "2.3.0" - self.bump_version_patcher = umock.patch("helpers.hversion.bump_version") - self.mock_bump_version = self.bump_version_patcher.start() - self.mock_bump_version.return_value = "2.4.0" - # Mock repo config methods. - self.get_release_team_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_release_team" - ) - self.mock_get_release_team = self.get_release_team_patcher.start() - self.mock_get_release_team.return_value = "dev_system" - self.get_issue_prefix_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_issue_prefix" - ) - self.mock_get_issue_prefix = self.get_issue_prefix_patcher.start() - self.mock_get_issue_prefix.return_value = "TestTask" - self.get_container_registry_url_patcher = umock.patch( - "helpers.repo_config_utils.RepoConfig.get_container_registry_url" - ) - self.mock_get_container_registry_url = ( - self.get_container_registry_url_patcher.start() - ) - self.mock_get_container_registry_url.return_value = "ghcr.io/causify-ai" - # Mock GitHub operations. - self.gh_get_team_member_names_patcher = umock.patch( - "helpers.lib_tasks_gh.gh_get_team_member_names" - ) - self.mock_gh_get_team_member_names = ( - self.gh_get_team_member_names_patcher.start() - ) - self.mock_gh_get_team_member_names.return_value = ["user1", "user2"] - self.gh_create_pr_patcher = umock.patch( - "helpers.lib_tasks_gh.gh_create_pr" - ) - self.mock_gh_create_pr = self.gh_create_pr_patcher.start() - # Mock file operations. - self.get_client_root_patcher = umock.patch( - "helpers.hversion._get_client_root" - ) - self.mock_get_client_root = self.get_client_root_patcher.start() - self.mock_get_client_root.return_value = "/test/root" - self.from_file_patcher = umock.patch("helpers.hio.from_file") - self.mock_from_file = self.from_file_patcher.start() - self.mock_from_file.return_value = "# Existing changelog content\n" - self.to_file_patcher = umock.patch("helpers.hio.to_file") - self.mock_to_file = self.to_file_patcher.start() - # Mock file existence check for dassert_file_exists (changelog validation). - self.file_exists_patcher = umock.patch( - "helpers.hdbg.dassert_file_exists" - ) - self.mock_file_exists = self.file_exists_patcher.start() - # Mock os.path.exists selectively for file staging logic. - # Store the original function before patching - original_exists = os.path.exists - # Define which files should exist for staging - staged_files = { - "/test/root/./devops/docker_build/poetry.lock", - "/test/root/./devops/docker_build/pip_list.txt", - "/test/root/./changelog.txt", - } - - def selective_exists(path): - # Return True for staged files, use original function for everything else - if path in staged_files: - return True - return original_exists(path) - - self.path_exists_patcher = umock.patch( - "os.path.exists", side_effect=selective_exists - ) - self.mock_path_exists = self.path_exists_patcher.start() - # Mock date operations. - self.date_patcher = umock.patch("datetime.date") - self.mock_date = self.date_patcher.start() - # Set up strftime to return different formats based on the format string. - # Branch name uses %Y%m%d, changelog uses %Y-%m-%d - self.mock_date.today.return_value.strftime.side_effect = lambda fmt: { - "%Y%m%d": "20251023", - "%Y-%m-%d": "2025-10-23", - }.get(fmt, "2025-10-23") - # Mock Docker image operations. - self.get_image_patcher = umock.patch( - "helpers.lib_tasks_docker.get_image" - ) - self.mock_get_image = self.get_image_patcher.start() - self.mock_get_image.return_value = ( - "test.ecr.path/test-image:local-testuser-2.4.0" - ) - # Mock _run_tests to prevent actual test execution. - self.run_tests_patcher = umock.patch( - "helpers.lib_tasks_docker_release._run_tests" - ) - self.mock_run_tests = self.run_tests_patcher.start() - # Mock is_inside_ci to control CI-specific behavior. - self.is_inside_ci_patcher = umock.patch("helpers.hserver.is_inside_ci") - self.mock_is_inside_ci = self.is_inside_ci_patcher.start() - # Default to True to simulate CI environment. - self.mock_is_inside_ci.return_value = True - # Add all new patchers to cleanup list. - self.patchers.update( - { - "get_changelog_version": self.get_changelog_version_patcher, - "bump_version": self.bump_version_patcher, - "get_release_team": self.get_release_team_patcher, - "get_issue_prefix": self.get_issue_prefix_patcher, - "container_registry_url": self.get_container_registry_url_patcher, - "gh_get_team_member_names": self.gh_get_team_member_names_patcher, - "gh_create_pr": self.gh_create_pr_patcher, - "get_client_root": self.get_client_root_patcher, - "from_file": self.from_file_patcher, - "to_file": self.to_file_patcher, - "file_exists": self.file_exists_patcher, - "path_exists": self.path_exists_patcher, - "date": self.date_patcher, - "get_image": self.get_image_patcher, - "run_tests": self.run_tests_patcher, - "is_inside_ci": self.is_inside_ci_patcher, - } - ) - - def test_complete_workflow1(self) -> None: - """ - Test the complete periodic dev image release workflow. - """ - # Call the tested function. - hltadore.docker_build_test_dev_image( - self.mock_ctx, - reviewers="", # Empty to trigger team lookup - container_dir_name=".", - ) - # Verify version operations were called. - self.mock_bump_version.assert_called_once_with( - "2.3.0", bump_type="minor" - ) - # Verify GitHub team lookup was performed. - self.mock_get_release_team.assert_called_once() - self.mock_gh_get_team_member_names.assert_called_once_with("dev_system") - # Verify issue prefix was fetched for branch creation. - self.mock_get_issue_prefix.assert_called() - # Verify PR was created with team members as reviewers. - self.mock_gh_create_pr.assert_called_once() - pr_call_args = self.mock_gh_create_pr.call_args - self.assertIn("reviewer", pr_call_args.kwargs) - self.assertEqual(pr_call_args.kwargs["reviewer"], "user1,user2") - # Verify expected Docker and Git commands were executed. - expected = r""" - git checkout -b TestTask_Periodic_image_release_20251023 - cp -f devops/docker_build/dockerignore.dev /app/.dockerignore - tar -czh . | DOCKER_BUILDKIT=0 \ - time \ - docker build \ - \ - --build-arg AM_CONTAINER_VERSION=2.4.0 --build-arg INSTALL_DIND=True --build-arg POETRY_MODE=update --build-arg CLEAN_UP_INSTALLATION=True \ - --tag test.ecr.path/test-image:local-testuser-2.4.0 \ - --file devops/docker_build/dev.Dockerfile \ - - - invoke docker_cmd --stage local --version 2.4.0 --cmd 'cp -f /install/poetry.lock.out /install/pip_list.txt .' --skip-pull - cp -f poetry.lock.out ./devops/docker_build/poetry.lock - cp -f pip_list.txt ./devops/docker_build/pip_list.txt - docker image ls test.ecr.path/test-image:local-testuser-2.4.0 - sudo chmod -R 777 .git/objects/ - git add /test/root/./devops/docker_build/poetry.lock - git add /test/root/./devops/docker_build/pip_list.txt - git add /test/root/./changelog.txt - git commit -m "Poetry output from the v2.4.0 build" --no-verify - git push origin TestTask_Periodic_image_release_20251023 - docker tag test.ecr.path/test-image:local-testuser-2.4.0 ghcr.io/causify-ai/test-image:dev-2.4.0 - docker push ghcr.io/causify-ai/test-image:dev-2.4.0 - """ - self._check_docker_command_output(expected, self.mock_run.call_args_list) - - def test_with_existing_reviewers1(self) -> None: - """ - Test the workflow when reviewers is already provided. - """ - # Call the tested function with a specific reviewer. - hltadore.docker_build_test_dev_image( - self.mock_ctx, - reviewers="specific_user", - container_dir_name=".", - ) - # Verify PR was created with the provided reviewer. - self.mock_gh_create_pr.assert_called_once() - pr_call_args = self.mock_gh_create_pr.call_args - self.assertIn("reviewer", pr_call_args.kwargs) - self.assertEqual(pr_call_args.kwargs["reviewer"], "specific_user") - # Verify team lookup was NOT performed since reviewers was provided. - self.mock_gh_get_team_member_names.assert_not_called() diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py deleted file mode 100644 index 886e1dc36..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_find.py +++ /dev/null @@ -1,267 +0,0 @@ -import logging -import os - -import pytest - -import helpers.hgit as hgit -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.hunit_test_purification as huntepur -import helpers.lib_tasks_find as hlitafin -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_find_short_import1 -# ############################################################################# - - -class Test_find_short_import1(hunitest.TestCase): - def test1(self) -> None: - iterator = [ - ("file1.py", 10, "import dataflow.core.dag_runner as dtfcodarun"), - ("file1.py", 11, "import helpers.hpandas as hpandas"), - ] - results = hlitafin._find_short_import(iterator, "dtfcodarun") - actual = "\n".join(map(str, results)) - # pylint: disable=line-too-long - expected = r"""('file1.py', 10, 'import dataflow.core.dag_runner as dtfcodarun', 'dtfcodarun', 'import dataflow.core.dag_runner as dtfcodarun')""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# Test_find_func_class_uses1 -# ############################################################################# - - -class Test_find_func_class_uses1(hunitest.TestCase): - def test1(self) -> None: - iterator = [ - ( - "file1.py", - 10, - "dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)", - ), - ( - "file1.py", - 11, - "This test is similar to `TestRealTimeDagRunner1`. It uses:", - ), - ("file1.py", 12, "dag_builder: dtfcodabui.DagRunner,"), - ("file1.py", 13, ":param dag_builder: `DagRunner` instance"), - ] - results = hlitafin._find_func_class_uses(iterator, "DagRunner") - actual = "\n".join(map(str, results)) - expected = r""" - ('file1.py', 10, 'dag_runner = dtfamsys.RealTimeDagRunner(**dag_runner_kwargs)', 'dtfamsys', 'RealTimeDagRunner') - ('file1.py', 12, 'dag_builder: dtfcodabui.DagRunner,', 'dtfcodabui', 'DagRunner')""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# TestLibTasksRunTests1 -# ############################################################################# - - -class TestLibTasksRunTests1(hunitest.TestCase): - """ - Test `_find_test_files()`, `_find_test_decorator()`. - """ - - def test_find_test_files1(self) -> None: - """ - Find all the test files in the current dir. - """ - files = hlitafin._find_test_files() - # For sure there are more than 1 test files: at least this one. - self.assertGreater(len(files), 1) - - def test_find_test_files2(self) -> None: - """ - Find all the test files from the top of the super module root. - """ - git_root = hgit.get_client_root(super_module=True) - files = hlitafin._find_test_files(git_root) - # For sure there are more than 1 test files: at least this one. - self.assertGreater(len(files), 1) - - def test_find_test_class1(self) -> None: - """ - Find the current test class. - """ - git_root = hgit.get_client_root(super_module=True) - file_names = hlitafin._find_test_files(git_root) - # - file_names = hlitafin._find_test_class( - "TestLibTasksRunTests1", file_names - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(file_names) - expected = ["helpers/test/test_lib_tasks_find.py::TestLibTasksRunTests1"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test_find_test_class2(self) -> None: - """ - Find the current test class. - """ - file_names = [__file__] - # - file_names = hlitafin._find_test_class( - "TestLibTasksRunTests1", file_names - ) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(file_names) - expected = ["helpers/test/test_lib_tasks_find.py::TestLibTasksRunTests1"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test_find_test_class3(self) -> None: - """ - Create synthetic code and look for a class. - """ - scratch_space = self.get_scratch_space() - dir_name = os.path.join(scratch_space, "test") - file_dict = { - "test_this.py": hprint.dedent( - """ - foo - - class TestHelloWorld(hunitest.TestCase): - bar - """ - ), - "test_that.py": hprint.dedent( - """ - foo - baz - - class TestHello_World(hunitest.): - bar - """ - ), - } - incremental = True - hunitest.create_test_dir(dir_name, incremental, file_dict) - # - file_names = hlitafin._find_test_files(dir_name) - act_file_names = [os.path.relpath(d, scratch_space) for d in file_names] - exp_file_names = ["test/test_that.py", "test/test_this.py"] - self.assert_equal(str(act_file_names), str(exp_file_names)) - # - actual = hlitafin._find_test_class("TestHelloWorld", file_names) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(actual) - expected = [ - "helpers/test/outcomes/TestLibTasksRunTests1.test_find_test_class3/tmp.scratch/" - "test/test_this.py::TestHelloWorld" - ] - self.assert_equal(str(actual), str(expected), purify_text=True) - - def test_find_test_decorator1(self) -> None: - """ - Find test functions in the "no_container" in synthetic code. - """ - scratch_space = self.get_scratch_space() - dir_name = os.path.join(scratch_space, "test") - file_dict = { - "test_this.py": hprint.dedent( - """ - foo - - class TestHelloWorld(hunitest.TestCase): - bar - """ - ), - "test_that.py": hprint.dedent( - """ - foo - baz - - @pytest.mark.no_container - class TestHello_World(hunitest.): - bar - """ - ), - } - incremental = True - hunitest.create_test_dir(dir_name, incremental, file_dict) - # - file_names = hlitafin._find_test_files(dir_name) - actual = hlitafin._find_test_decorator("no_container", file_names) - text_purifier = huntepur.TextPurifier() - actual = text_purifier.purify_file_names(actual) - expected = [ - "helpers/test/outcomes/TestLibTasksRunTests1.test_find_test_decorator1/" - "tmp.scratch/test/test_that.py" - ] - self.assert_equal(str(actual), str(expected), purify_text=True) - - # TODO(gp): This test can run in amp. - @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") - def test_find_test_decorator2(self) -> None: - """ - Find test functions in the "no_container" test list. - """ - file_name = hgit.find_file_in_git_tree("hunit_test.py") - file_names = [file_name] - actual = hlitafin._find_test_decorator("qa", file_names) - expected = ["$GIT_ROOT/helpers/hunit_test.py"] - self.assert_equal(str(actual), str(expected), purify_text=True) - - -# ############################################################################# -# Test_find_check_string_output1 -# ############################################################################# - - -class Test_find_check_string_output1(hunitest.TestCase): - def helper(self, expected: str, fuzzy_match: bool) -> None: - # Look for the `check_string()` corresponding to this test. - ctx = httestlib._build_mock_context_returning_ok() - class_name = self.__class__.__name__ - method_name = self._testMethodName - as_python = True - # We don't want to copy but just print. - pbcopy = False - actual = hlitafin.find_check_string_output( - ctx, class_name, method_name, as_python, fuzzy_match, pbcopy - ) - # Check that it matches exactly. - self.assert_equal(actual, expected, fuzzy_match=False) - - def test1(self) -> None: - """ - Test `find_check_string_output()` by searching the `check_string` of - this test. - """ - # Force to generate a `check_string` file so we can search for it. - actual = "A fake check_string output to use for test1" - self.check_string(actual) - # Check. - expected = ''' - actual = - expected = r""" - A fake check_string output to use for test1 - """.lstrip().rstrip() - self.assert_equal(actual, expected, fuzzy_match=False) - ''' - self.helper(expected, fuzzy_match=False) - - def test2(self) -> None: - """ - Like test1 but using `fuzzy_match=True`. - """ - # Force to generate a `check_string` file so we can search for it. - actual = "A fake check_string output to use for test2" - self.check_string(actual) - # Check. - expected = ''' - actual = - expected = r""" -A fake check_string output to use for test2 - - """.lstrip().rstrip() - self.assert_equal(actual, expected, fuzzy_match=True) - ''' - self.helper(expected, fuzzy_match=True) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py deleted file mode 100644 index a5ee64c9e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_gh.py +++ /dev/null @@ -1,133 +0,0 @@ -import logging -import unittest.mock as umock - -import pytest - -import helpers.hgit as hgit -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.lib_tasks_gh as hlitagh - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -# ############################################################################# -# TestLibTasks1 -# ############################################################################# - - -class TestLibTasks1(hunitest.TestCase): - """ - Test some auxiliary functions, e.g., `_get_gh_issue_title()`. - """ - - @pytest.mark.skip("CmTask #2362.") - def test_get_gh_issue_title1(self) -> None: - issue_id = 1 - repo = "amp" - actual = hlitagh._get_gh_issue_title(issue_id, repo) - expected = ( - "AmpTask1_Bridge_Python_and_R", - "https://github.com/alphamatic/amp/issues/1", - ) - self.assert_equal(str(actual), str(expected)) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="""Skip unless helpers is the supermodule. Fails when updating submodules; - passes in fast tests super-repo run. See CmTask10845.""", - ) - def test_get_gh_issue_title4(self) -> None: - cmd = "invoke gh_login" - hsystem.system(cmd) - # - issue_id = 1 - repo = "current" - _ = hlitagh._get_gh_issue_title(issue_id, repo) - - def test_get_org_name1(self) -> None: - """ - Test _get_org_name when org_name is provided. - """ - org_name = "test-org" - result = hlitagh._get_org_name(org_name) - expected = "test-org" - self.assertEqual(result, expected) - - @umock.patch.object(hgit, "get_repo_full_name_from_dirname") - def test_get_org_name2(self, mock_get_repo: umock.Mock) -> None: - """ - Test _get_org_name when org_name is empty (infers from repo). - """ - mock_get_repo.return_value = "causify-ai/helpers" - result = hlitagh._get_org_name("") - expected = "causify-ai" - self.assertEqual(result, expected) - mock_get_repo.assert_called_once_with(".", include_host_name=False) - - -# ############################################################################# -# TestGhOrgTeamFunctions -# ############################################################################# - - -class TestGhOrgTeamFunctions(hunitest.TestCase): - """ - Test gh_get_org_team_names and gh_get_team_member_names with mocked data. - """ - - @umock.patch.object(hlitagh, "_gh_run_and_get_json") - @umock.patch.object(hlitagh, "_get_org_name") - def test_gh_get_org_team_names1( - self, mock_get_org_name: umock.Mock, mock_gh_run: umock.Mock - ) -> None: - """ - Test gh_get_org_team_names with sorted team names. - """ - # Setup mocks. - mock_get_org_name.return_value = "test-org" - mock_gh_run.return_value = [ - {"slug": "dev_backend", "id": 1}, - {"slug": "dev_frontend", "id": 2}, - {"slug": "qa_team", "id": 3}, - ] - # Call function. - result = hlitagh.gh_get_org_team_names("test-org", sort=True) - # Verify result. - expected = ["dev_backend", "dev_frontend", "qa_team"] - self.assertEqual(result, expected) - # Verify mocks were called correctly. - mock_get_org_name.assert_called_once_with("test-org") - mock_gh_run.assert_called_once_with( - "gh api /orgs/test-org/teams --paginate" - ) - - @umock.patch.object(hlitagh, "_gh_run_and_get_json") - @umock.patch.object(hlitagh, "_get_org_name") - def test_gh_get_team_member_names1( - self, mock_get_org_name: umock.Mock, mock_gh_run: umock.Mock - ) -> None: - """ - Test gh_get_team_member_names with member list. - """ - # Setup mocks. - mock_get_org_name.return_value = "test-org" - mock_gh_run.return_value = [ - {"login": "user1", "id": 101}, - {"login": "user2", "id": 102}, - {"login": "user3", "id": 103}, - ] - # Call function. - result = hlitagh.gh_get_team_member_names( - "dev_team", org_name="test-org" - ) - # Verify result. - expected = ["user1", "user2", "user3"] - self.assertEqual(result, expected) - # Verify mocks were called correctly. - mock_get_org_name.assert_called_once_with("test-org") - mock_gh_run.assert_called_once_with( - "gh api /orgs/test-org/teams/dev_team/members --paginate" - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py deleted file mode 100644 index e60ea8f36..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_git.py +++ /dev/null @@ -1,267 +0,0 @@ -from typing import List - -import pytest - -import helpers.hgit as hgit -import helpers.hunit_test as hunitest -import helpers.lib_tasks_git as hlitagit -import helpers.test.test_lib_tasks as httestlib - -# pylint: disable=protected-access - - -# ############################################################################# -# TestLibTasksGitCreatePatch1 -# ############################################################################# - - -@pytest.mark.slow(reason="Around 7s") -@pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", -) -class TestLibTasksGitCreatePatch1(hunitest.TestCase): - """ - Test `git_patch_create()`. - """ - - @staticmethod - def helper( - modified: bool, branch: bool, last_commit: bool, files: str - ) -> None: - ctx = httestlib._build_mock_context_returning_ok() - # - mode = "tar" - hlitagit.git_patch_create( - ctx, mode, modified, branch, last_commit, files - ) - # - mode = "diff" - hlitagit.git_patch_create( - ctx, mode, modified, branch, last_commit, files - ) - - def test1(self) -> None: - """ - Test modified files mode. - """ - hgit.fetch_origin_master_if_needed() - # Prepare inputs. - modified = True - branch = False - last_commit = False - files = "" - # Run test. - self.helper(modified, branch, last_commit, files) - - def test2(self) -> None: - """ - Test branch mode. - """ - # Prepare inputs. - modified = False - branch = True - last_commit = False - files = "" - # Run test. - self.helper(modified, branch, last_commit, files) - - def test3(self) -> None: - """ - Test last commit mode. - """ - hgit.fetch_origin_master_if_needed() - # Prepare inputs. - modified = False - branch = False - last_commit = True - files = "" - # Run test. - self.helper(modified, branch, last_commit, files) - - def test4(self) -> None: - """ - Test with specific files. - """ - hgit.fetch_origin_master_if_needed() - # Prepare inputs. - modified = True - branch = False - last_commit = False - files = __file__ - # Run test. - self.helper(modified, branch, last_commit, files) - - def test5(self) -> None: - """ - Test with all flags False raises AssertionError. - """ - hgit.fetch_origin_master_if_needed() - # Prepare inputs. - ctx = httestlib._build_mock_context_returning_ok() - mode = "diff" - modified = False - branch = False - last_commit = False - files = __file__ - # Run test and check output. - with self.assertRaises(AssertionError) as cm: - hlitagit.git_patch_create( - ctx, mode, modified, branch, last_commit, files - ) - actual = str(cm.exception) - expected = """ -* Failed assertion * -'0' -== -'1' -Specify only one among --modified, --branch, --last-commit -""" - self.assert_equal(actual, expected, fuzzy_match=True) - - -# ############################################################################# -# TestFilterGitFilesByType -# ############################################################################# - - -class TestFilterGitFilesByType(hunitest.TestCase): - """ - Test _filter_git_files_by_type() function. - """ - - def helper( - self, - files: List[str], - keep_python: bool, - keep_jupyter: bool, - keep_markdown: bool, - expected: List[str], - ) -> None: - """ - Test helper for _filter_git_files_by_type. - - :param files: List of files to filter - :param keep_python: include Python files - :param keep_jupyter: include Jupyter notebooks - :param keep_markdown: include Markdown files - :param expected: Expected filtered result - """ - # Run test. - result = hlitagit._filter_git_files_by_type( - files, keep_python, keep_jupyter, keep_markdown - ) - # Check outputs. - self.assertEqual(result, expected) - - def test1(self) -> None: - """ - Test filtering to include only Python files. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - keep_python = True - keep_jupyter = False - keep_markdown = False - # Prepare outputs. - expected = ["foo.py"] - # Run test. - self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) - - def test2(self) -> None: - """ - Test filtering to include only Jupyter notebooks. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - keep_python = False - keep_jupyter = True - keep_markdown = False - # Prepare outputs. - expected = ["bar.ipynb"] - # Run test. - self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) - - def test3(self) -> None: - """ - Test filtering to include only Markdown files. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - keep_python = False - keep_jupyter = False - keep_markdown = True - # Prepare outputs. - expected = ["baz.md"] - # Run test. - self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) - - def test4(self) -> None: - """ - Test filtering with multiple file types. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md", "qux.txt"] - keep_python = True - keep_jupyter = False - keep_markdown = True - # Prepare outputs. - expected = ["foo.py", "baz.md"] - # Run test. - self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) - - def test5(self) -> None: - """ - Test filtering with all file types. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - keep_python = True - keep_jupyter = True - keep_markdown = True - # Prepare outputs. - expected = files - # Run test. - self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) - - def test6(self) -> None: - """ - Test filtering with empty file list. - """ - # Prepare inputs. - files: List[str] = [] - keep_python = True - keep_jupyter = True - keep_markdown = False - # Prepare outputs. - expected: List[str] = [] - # Run test. - self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) - - def test7(self) -> None: - """ - Test filtering when no files match. - """ - # Prepare inputs. - files = ["foo.py", "bar.ipynb", "baz.md"] - keep_python = False - keep_jupyter = False - keep_markdown = False - # Prepare outputs. - expected: List[str] = [] - # Run test. - self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) - - def test8(self) -> None: - """ - Test that filtering preserves file order. - """ - # Prepare inputs. - files = ["c.py", "a.ipynb", "b.md", "d.py"] - keep_python = True - keep_jupyter = False - keep_markdown = True - # Prepare outputs. - expected = ["c.py", "b.md", "d.py"] - # Run test. - self.helper(files, keep_python, keep_jupyter, keep_markdown, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py deleted file mode 100644 index 47a41e0d8..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_integrate.py +++ /dev/null @@ -1,27 +0,0 @@ -import helpers.hunit_test as hunitest -import helpers.lib_tasks_integrate as hlitaint - - -# ############################################################################# -# Test_infer_dst_dir1 -# ############################################################################# - - -class Test_infer_dst_dir1(hunitest.TestCase): - def test1(self) -> None: - # Define input variables. - src_dir = "/src/cmamp1/oms/broker/broker.py" - # Call function to test. - actual = hlitaint._infer_dst_file_path( - src_dir, - default_src_dir_basename="cmamp1", - default_dst_dir_basename="amp1", - check_exists=False, - ) - # Define expected output. - expected = ( - "/src/amp1/oms/broker/broker.py", - "oms/broker/broker.py", - ) - # Compare actual and expected output. - self.assertEqual(actual, expected) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py deleted file mode 100644 index cb40f72a5..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_lint.py +++ /dev/null @@ -1,32 +0,0 @@ -import logging - -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.lib_tasks_lint as hlitalin -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_lint_check_if_it_was_run -# ############################################################################# - - -class Test_lint_check_if_it_was_run(hunitest.TestCase): - """ - Test `lint_check_if_it_was_run()`. - """ - - def test1(self) -> None: - # Build a mock context. - ctx = httestlib._build_mock_context_returning_ok() - # Stash the leftover changes from the previous tests. - cmd = "git stash --include-untracked" - hsystem.system(cmd) - # Simple check that the function does not fail. - _ = hlitalin.lint_check_if_it_was_run(ctx) - # Pop the stashed changes to restore the original state. - cmd = "git stash pop" - # Do not abort on error because the stash may be empty. - hsystem.system(cmd, abort_on_error=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py deleted file mode 100644 index 321f7f515..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_pytest.py +++ /dev/null @@ -1,1163 +0,0 @@ -import logging -import os -import re -import unittest.mock as umock -from typing import List - -import pytest - -import helpers.hdbg as hdbg -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import helpers.hunit_test as hunitest -import helpers.lib_tasks_pytest as hlitapyt -import helpers.test.test_lib_tasks as httestlib - -_LOG = logging.getLogger(__name__) - -# pylint: disable=protected-access - - -def _remove_junit_suite_name(text: str) -> str: - """ - Remove the junit suite name from the input text. - - E.g. '-o junit_suite_name="helpers"' -> '-o junit_suite_name=""' - - :param text: input text to process - :return: text with the junit suite name removed - """ - txt = re.sub(r'(-o\s*junit_suite_name=)"[^"]*"', r'\1""', text) - return txt - - -def _purify_pytest_command(text: str) -> str: - """ - Purify the pytest command by removing environment-specific values. - - :param text: input text to process - :return: text with environment-specific values removed - """ - txt = _remove_junit_suite_name(text) - return txt - - -# ############################################################################# -# Test_build_run_command_line1 -# ############################################################################# - - -class Test_build_run_command_line1(hunitest.TestCase): - def run_fast_tests1_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Basic run fast tests. - - :param is_dev_csfy_return_value: mocking the return_value of - `hserver.is_dev_csfy()` - :param is_inside_ci_return_value: mocking the return_value of - `hserver.is_inside_ci()` - :param expected: expected output string - """ - custom_marker = "" - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = False - n_threads = "1" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests1_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests1_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests1_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_inside_ci_return_value = False - is_dev_csfy_return_value = False - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def run_fast_tests2_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Coverage and collect-only. - - See `run_fast_tests1_helper()` for params description. - """ - custom_marker = "" - pytest_opts = "" - skip_submodules = False - coverage = True - collect_only = True - tee_to_file = False - n_threads = "1" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests2_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - r'pytest -m "not slow and not superslow" . ' - r"-o timeout_func_only=true --timeout 5 --reruns 2 " - r'--only-rerun "Failed: Timeout" --cov=.' - r" --cov-branch --cov-report term-missing --cov-report html " - r"--collect-only -n 1 " - r"--junit-xml=tmp.junit.xml " - r'-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests2_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests2_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests2_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - r'pytest -m "not slow and not superslow" . ' - r"-o timeout_func_only=true --timeout 50 --reruns 2 " - r'--only-rerun "Failed: Timeout" --cov=.' - r" --cov-branch --cov-report term-missing --cov-report html " - r"--collect-only -n 1 " - r"--junit-xml=tmp.junit.xml " - r'-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - self.run_fast_tests2_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - @pytest.mark.skip(reason="Fix support for pytest_mark") - @pytest.mark.skipif(not hgit.is_amp(), reason="Only run in amp") - def test_run_fast_tests4(self) -> None: - """ - Select pytest_mark. - """ - scratch_space = self.get_scratch_space(use_absolute_path=False) - dir_name = os.path.join(scratch_space, "test") - file_dict = { - "test_this.py": hprint.dedent( - """ - foo - - class TestHelloWorld(hunitest.TestCase): - bar - """ - ), - "test_that.py": hprint.dedent( - """ - foo - baz - - @pytest.mark.no_container - class TestHello_World(hunitest.): - bar - """ - ), - } - incremental = True - hunitest.create_test_dir(dir_name, incremental, file_dict) - # - test_list_name = "fast_tests" - custom_marker = "" - pytest_opts = "" - skip_submodules = True - coverage = False - collect_only = False - tee_to_file = False - n_threads = "1" - # - actual = hlitapyt._build_run_command_line( - test_list_name, - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - expected = ( - "pytest Test_build_run_command_line1.test_run_fast_tests4/tmp.scratch/" - "test/test_that.py" - ) - self.assert_equal(actual, expected) - - def run_fast_tests5_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Basic run fast tests tee-ing to a file. Mock depending on - `is_dev_csfy_return_value`. - - See `run_fast_tests1_helper()` for params description. - """ - custom_marker = "" - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = True - n_threads = "1" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests5_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - " 2>&1" - " | tee tmp.pytest.fast_tests.log" - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests5_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests5_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests5_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - " 2>&1" - " | tee tmp.pytest.fast_tests.log" - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - self.run_fast_tests5_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def run_fast_tests6_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Run fast tests with a custom test marker. - - See `run_fast_tests1_helper()` for params description. - """ - custom_marker = "optimizer" - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = False - n_threads = "1" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests6_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - 'pytest -m "optimizer and not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests6_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests6_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests6_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - 'pytest -m "optimizer and not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - self.run_fast_tests6_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def run_fast_tests7_helper( - self, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Run fast tests with parallelization. - - See `run_fast_tests1_helper()` for params description. - """ - custom_marker = "" - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = False - n_threads = "auto" - # - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_run_fast_tests7_inside_ck_infra(self) -> None: - """ - Mock test for running fast tests inside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n auto ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - self.run_fast_tests7_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests7_inside_ci(self) -> None: - """ - Mock test for running fast tests inside CI flow only. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = True - self.run_fast_tests1_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def test_run_fast_tests7_outside_ck_infra(self) -> None: - """ - Mock test for running fast tests outside the CK infra. - """ - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n auto ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - self.run_fast_tests7_helper( - is_dev_csfy_return_value, is_inside_ci_return_value, expected - ) - - def get_custom_marker_helper( - self, - run_only_test_list: str, - skip_test_list: str, - is_dev_csfy_return_value: bool, - is_inside_ci_return_value: bool, - expected: str, - ) -> None: - """ - Check that a correct cmd line is generated with custom marker string. - - :param run_only_test_list: a string of comma-separated markers - to run - :param skip_test_list: a string of comma-separated markers to - skip - :param is_dev_csfy_return_value: see `run_fast_tests1_helper()` - :param is_inside_ci_return_value: see `run_fast_tests1_helper()` - :param expected: expected output string - """ - # Mock settings. - pytest_opts = "" - skip_submodules = False - coverage = False - collect_only = False - tee_to_file = False - n_threads = "1" - # Mock test. - with ( - umock.patch.object( - hserver, "is_dev_csfy", return_value=is_dev_csfy_return_value - ), - umock.patch.object( - hserver, "is_inside_ci", return_value=is_inside_ci_return_value - ), - ): - custom_marker = hlitapyt._get_custom_marker( - run_only_test_list=run_only_test_list, - skip_test_list=skip_test_list, - ) - actual = hlitapyt._build_run_command_line( - "fast_tests", - custom_marker, - pytest_opts, - skip_submodules, - coverage, - collect_only, - tee_to_file, - n_threads, - ) - actual = _purify_pytest_command(actual) - expected = _purify_pytest_command(expected) - self.assert_equal(actual, expected) - - def test_get_custom_marker1_full(self) -> None: - # Input params. - run_only_test_list = "run_marker_1,run_marker_2" - skip_test_list = "skip_marker_1,skip_marker_2" - is_dev_csfy_return_value = False - is_inside_ci_return_value = False - # Expected output. - expected = ( - 'pytest -m "' - "run_marker_1 and run_marker_2 " - "and not requires_ck_infra " - "and not skip_marker_1 and not skip_marker_2 " - 'and not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 50 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1 ' - "--junit-xml=tmp.junit.xml " - '-o junit_suite_name="helpers"' - ) - # Mock check. - self.get_custom_marker_helper( - run_only_test_list, - skip_test_list, - is_dev_csfy_return_value, - is_inside_ci_return_value, - expected, - ) - - def get_custom_marker2_empty(self) -> None: - # Input params. - run_only_test_list = "" - skip_test_list = "" - is_dev_csfy_return_value = True - is_inside_ci_return_value = True - # Expected output. - expected = ( - 'pytest -m "not slow and not superslow" . ' - "-o timeout_func_only=true --timeout 5 --reruns 2 " - '--only-rerun "Failed: Timeout" -n 1' - ) - # Mock check. - self.get_custom_marker_helper( - run_only_test_list, - skip_test_list, - is_dev_csfy_return_value, - is_inside_ci_return_value, - expected, - ) - - -# ############################################################################# -# Test_pytest_repro1 -# ############################################################################# - - -class Test_pytest_repro1(hunitest.TestCase): - def helper(self, file_name: str, mode: str, expected: List[str]) -> None: - script_name = os.path.join( - self.get_scratch_space(), "tmp.pytest_repro.sh" - ) - ctx = httestlib._build_mock_context_returning_ok() - actual = hlitapyt.pytest_repro( - ctx, mode=mode, file_name=file_name, script_name=script_name - ) - hdbg.dassert_isinstance(actual, str) - expected = "\n".join(["pytest " + x for x in expected]) - self.assert_equal(actual, expected) - - # //////////////////////////////////////////////////////////////////////////// - - def _build_pytest_filehelper(self, txt: str) -> str: - txt = hprint.dedent(txt) - file_name = os.path.join(self.get_scratch_space(), "cache/lastfailed") - hio.to_file(file_name, txt) - return file_name - - def _build_pytest_file1(self) -> str: - txt = """ - { - "dev_scripts/testing/test/test_run_tests.py": true, - "dev_scripts/testing/test/test_run_tests2.py": true, - "helpers/test/test_printing.py::Test_dedent1::test2": true, - "documentation/scripts/test/test_all.py": true, - "documentation/scripts/test/test_render_md.py": true, - "helpers/test/helpers/test/test_list.py::Test_list_1": true, - "helpers/test/test_cache.py::TestAmpTask1407": true - } - """ - return self._build_pytest_filehelper(txt) - - def test_tests1(self) -> None: - file_name = self._build_pytest_file1() - mode = "tests" - expected = [ - "dev_scripts/testing/test/test_run_tests.py", - "dev_scripts/testing/test/test_run_tests2.py", - "documentation/scripts/test/test_all.py", - "documentation/scripts/test/test_render_md.py", - "helpers/test/helpers/test/test_list.py::Test_list_1", - "helpers/test/test_cache.py::TestAmpTask1407", - "helpers/test/test_printing.py::Test_dedent1::test2", - ] - self.helper(file_name, mode, expected) - - def test_files1(self) -> None: - file_name = self._build_pytest_file1() - mode = "files" - expected = [ - "dev_scripts/testing/test/test_run_tests.py", - "dev_scripts/testing/test/test_run_tests2.py", - "documentation/scripts/test/test_all.py", - "documentation/scripts/test/test_render_md.py", - "helpers/test/helpers/test/test_list.py", - "helpers/test/test_cache.py", - "helpers/test/test_printing.py", - ] - self.helper(file_name, mode, expected) - - def test_classes1(self) -> None: - file_name = self._build_pytest_file1() - mode = "classes" - expected = [ - "helpers/test/helpers/test/test_list.py::Test_list_1", - "helpers/test/test_cache.py::TestAmpTask1407", - "helpers/test/test_printing.py::Test_dedent1", - ] - self.helper(file_name, mode, expected) - - def _build_pytest_file2(self) -> str: - # pylint: disable=line-too-long - txt = """ - { - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1": true, - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2": true, - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1": true, - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test2": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test3": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test4": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test01": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test02": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test03": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test04": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test05": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test06": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test07": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test09": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test10": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test11": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test12": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test13": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1": true, - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1": true, - "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder::test1": true, - "core/dataflow/test/test_runners.py::TestIncrementalDagRunner::test1": true, - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_dump_json1": true, - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_load_json1": true, - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test1": true, - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test2": true, - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test3": true, - "core/test/test_config.py::Test_subtract_config1::test_test1": true, - "core/test/test_config.py::Test_subtract_config1::test_test2": true, - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1": true, - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1": true, - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2": true, - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test1": true, - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test2": true, - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test3": true, - "helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2": true, - "helpers/test/test_printing.py::Test_dedent1::test2": true - } - """ - # pylint: enable=line-too-long - return self._build_pytest_filehelper(txt) - - def test_tests2(self) -> None: - file_name = self._build_pytest_file2() - mode = "tests" - # pylint: disable=line-too-long - expected = [ - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression1", - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_compare_to_linear_regression2", - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit1", - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel::test_fit_no_x1", - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test1", - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test2", - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel::test3", - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test1", - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test2", - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel::test3", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test1", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test2", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test3", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test4", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel::test5", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test01", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test02", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test03", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test04", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test05", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test06", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test07", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test09", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test10", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test11", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test12", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel::test13", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode1", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_col_mode2", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_demodulate1", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator::test_modulate1", - "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder::test1", - "core/dataflow/test/test_runners.py::TestIncrementalDagRunner::test1", - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_dump_json1", - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator::test_load_json1", - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test1", - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test2", - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1::test3", - "core/test/test_config.py::Test_subtract_config1::test_test1", - "core/test/test_config.py::Test_subtract_config1::test_test2", - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_dump_json1", - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json1", - "core/test/test_dataframe_modeler.py::TestDataFrameModeler::test_load_json2", - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test1", - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test2", - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1::test3", - "helpers/test/test_lib_tasks.py::Test_find_check_string_output1::test2", - "helpers/test/test_printing.py::Test_dedent1::test2", - ] - # pylint: enable=line-too-long - self.helper(file_name, mode, expected) - - def test_files2(self) -> None: - file_name = self._build_pytest_file2() - mode = "files" - # pylint: disable=line-too-long - expected = [ - "core/dataflow/nodes/test/test_sarimax_models.py", - "core/dataflow/nodes/test/test_volatility_models.py", - "core/dataflow/test/test_builders.py", - "core/dataflow/test/test_runners.py", - "core/dataflow_model/test/test_model_evaluator.py", - "core/dataflow_model/test/test_run_experiment.py", - "core/test/test_config.py", - "core/test/test_dataframe_modeler.py", - "dev_scripts/test/test_run_notebook.py", - "helpers/test/test_lib_tasks.py", - "helpers/test/test_printing.py", - ] - # pylint: enable=line-too-long - self.helper(file_name, mode, expected) - - def test_classes2(self) -> None: - file_name = self._build_pytest_file2() - mode = "classes" - # pylint: disable=line-too-long - expected = [ - "core/dataflow/nodes/test/test_sarimax_models.py::TestContinuousSarimaxModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestMultiindexVolatilityModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestSingleColumnVolatilityModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestSmaModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModel", - "core/dataflow/nodes/test/test_volatility_models.py::TestVolatilityModulator", - "core/dataflow/test/test_builders.py::TestArmaReturnsBuilder", - "core/dataflow/test/test_runners.py::TestIncrementalDagRunner", - "core/dataflow_model/test/test_model_evaluator.py::TestModelEvaluator", - "core/dataflow_model/test/test_run_experiment.py::TestRunExperiment1", - "core/test/test_config.py::Test_subtract_config1", - "core/test/test_dataframe_modeler.py::TestDataFrameModeler", - "dev_scripts/test/test_run_notebook.py::TestRunNotebook1", - "helpers/test/test_lib_tasks.py::Test_find_check_string_output1", - "helpers/test/test_printing.py::Test_dedent1", - ] - # pylint: enable=line-too-long - self.helper(file_name, mode, expected) - - -# ############################################################################# -# Test_pytest_repro_end_to_end -# ############################################################################# - - -@pytest.mark.slow("~6 sec.") -class Test_pytest_repro_end_to_end(hunitest.TestCase): - """ - - Run the `pytest_repro` invoke from command line - - A fixed file imitating the pytest output file is used - - Compare the output to the golden outcome - """ - - def helper(self, cmd: str) -> None: - # Save output in tmp dir. - script_name = os.path.join( - self.get_scratch_space(), "tmp.pytest_repro.sh" - ) - cmd += f" --script-name {script_name}" - # Run the command. - _, actual = hsystem.system_to_string(cmd) - # Filter out the "No module named ..." warnings. - # TODO(Grisha): add the "no module warning" filtering to - # `purify_text()` in `check_string()`. - regex = "WARN.*No module" - actual = hunitest.filter_text(regex, actual) - # Remove "Encountered unexpected exception importing solver GLPK" - # generated on Mac. - regex = "Encountered unexpected exception importing solver GLPK" - actual = hunitest.filter_text(regex, actual) - # ImportError("cannot import name 'glpk' from 'cvxopt' (/venv/lib/python3.9/site-packages/cvxopt/__init__.py)") - regex = r"""ImportError\("cannot import name""" - actual = hunitest.filter_text(regex, actual) - # Modify the outcome for reproducibility. - actual = hprint.remove_non_printable_chars(actual) - actual = re.sub(r"[0-9]{2}:[0-9]{2}:[0-9]{2} - ", r"HH:MM:SS - ", actual) - actual = actual.replace("/app/amp/", "/app/") - actual = re.sub( - r"lib_tasks_pytest.py pytest_repro:[0-9]+", - r"lib_tasks_pytest.py pytest_repro:{LINE_NUM}", - actual, - ) - # Remove unstable content. - lines = actual.split("\n") - line_cmd = lines[0] - _LOG.debug("%s", "\n".join(lines)) - for i, line in enumerate(lines): - m = re.search("# pytest_repro: ", line) - if m: - test_output_start = i + 1 - break - lines_test_output = lines[test_output_start:] - # - actual = "\n".join([line_cmd] + lines_test_output) - regex = "init_logger" - actual = hunitest.filter_text(regex, actual) - regex = r"(WARN|INFO)\s+hcache.py" - actual = hunitest.filter_text(regex, actual) - # Check the outcome. - self.check_string(actual, purify_text=True, fuzzy_match=True) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test1(self) -> None: - file_name = f"{self.get_input_dir()}/cache/lastfailed" - cmd = f"invoke pytest_repro --file-name='{file_name}'" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test2(self) -> None: - """ - The tests are different since the input depends on the test and it's - different for different tests. - """ - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}'" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test3(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}'" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test4(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test5(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test6(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" - self.helper(cmd) - - @pytest.mark.skipif( - not hgit.is_in_helpers_as_supermodule(), - reason="Run only in helpers as super module. See CmTask10739", - ) - def test7(self) -> None: - file_name = f"{self.get_input_dir()}/log.txt" - cmd = f"invoke pytest_repro --file-name='{file_name}' --show-stacktrace" - self.helper(cmd) - - -# ############################################################################# -# Test_pytest_failed1 -# ############################################################################# - - -class Test_pytest_failed1(hunitest.TestCase): - def get_pytest_text1(self) -> str: - txt = """ - 20:48:15 - ^[[36mINFO ^[[0m hdbg.py init_logger:1018 > cmd='/venv/bin/pytest helpers_root/dev_scripts_helpers/documentation/' - collected 47 items - - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1::test1 (2.07 s) FAILED [ 2%] - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question1 (0.00 s) PASSED [ 4%] - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question2 (0.00 s) PASSED [ 6%] - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_process_question1::test_process_question3 (0.00 s) PASSED [ 8%] - - - =================================== FAILURES =================================== - _________________________ Test_preprocess_notes1.test1 _________________________ - - FAILED helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3::test_run_all1 - AttributeError: 'list' object has no attribute 'split' - FAILED helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1::test2 - RuntimeError: cmd='(/app/helpers_root/dev_scripts_helpers/documentation/notes_to_pdf.py --input /app/helpers_root/dev_scripts_helpers/documentation/test/outcomes/Test_notes - - ======================== 4 failed, 43 passed in 40.48s ========================= - """ - txt = hprint.dedent(txt) - return txt - - def helper( - self, - txt: str, - only_file: bool, - only_class: bool, - exp_failed_tests: str, - exp_num_failed: int, - exp_num_passed: int, - ) -> None: - act_failed_tests, act_num_failed, act_num_passed = ( - hlitapyt._parse_failed_tests(txt, only_file, only_class) - ) - act_failed_tests = "\n".join(act_failed_tests) - self.assert_equal( - act_failed_tests, - exp_failed_tests, - dedent=True, - remove_lead_trail_empty_lines=True, - ) - self.assertEqual(act_num_failed, exp_num_failed) - self.assertEqual(act_num_passed, exp_num_passed) - - def test1(self) -> None: - # Prepare inputs and outputs. - txt = self.get_pytest_text1() - only_file = False - only_class = False - exp_failed_tests = """ - helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1::test2 - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1::test1 - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3::test_run_all1 - """ - exp_num_failed = 4 - exp_num_passed = 43 - # Check. - self.helper( - txt, - only_file, - only_class, - exp_failed_tests, - exp_num_failed, - exp_num_passed, - ) - - def test2(self) -> None: - # Prepare inputs and outputs. - txt = self.get_pytest_text1() - only_file = True - only_class = False - exp_failed_tests = """ - helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py - """ - exp_num_failed = 4 - exp_num_passed = 43 - # Check. - self.helper( - txt, - only_file, - only_class, - exp_failed_tests, - exp_num_failed, - exp_num_passed, - ) - - def test3(self) -> None: - # Prepare inputs and outputs. - txt = self.get_pytest_text1() - only_file = False - only_class = True - exp_failed_tests = """ - helpers_root/dev_scripts_helpers/documentation/test/test_notes_to_pdf.py::Test_notes_to_pdf1 - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes1 - helpers_root/dev_scripts_helpers/documentation/test/test_preprocess_notes.py::Test_preprocess_notes3 - """ - exp_num_failed = 4 - exp_num_passed = 43 - # Check. - self.helper( - txt, - only_file, - only_class, - exp_failed_tests, - exp_num_failed, - exp_num_passed, - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py deleted file mode 100644 index ac2b17b42..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_lib_tasks_utils.py +++ /dev/null @@ -1,301 +0,0 @@ -import logging -import os - -import pytest - -import helpers.hgit as hgit -import helpers.hio as hio -import helpers.hunit_test as hunitest -import helpers.lib_tasks_utils as hlitauti - -_LOG = logging.getLogger(__name__) - - -# pylint: disable=protected-access - - -# ############################################################################# -# Test_get_files_to_process1 -# ############################################################################# - - -class Test_get_files_to_process1(hunitest.TestCase): - """ - We can't check the outcome so we just execute the code. - """ - - def test_modified1(self) -> None: - """ - Retrieve files modified in this client. - """ - modified = True - branch = False - last_commit = False - all_ = False - files_from_user = "" - mutually_exclusive = True - remove_dirs = True - _ = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - - @pytest.mark.skipif( - hgit.get_branch_name() != "master", - reason="This test makes sense for a branch", - ) - def test_branch1(self) -> None: - """ - Retrieved files modified in this client. - """ - # This test needs a reference to Git master branch. - hgit.fetch_origin_master_if_needed() - # - modified = False - branch = True - last_commit = False - all_ = False - files_from_user = "" - mutually_exclusive = True - remove_dirs = True - _ = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - - def test_last_commit1(self) -> None: - """ - Retrieved files modified in the last commit. - """ - modified = False - branch = False - last_commit = True - all_ = False - files_from_user = "" - mutually_exclusive = True - remove_dirs = True - _ = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - - def test_files1(self) -> None: - """ - Pass through files from user. - """ - modified = False - branch = False - last_commit = False - all_ = False - files_from_user = __file__ - mutually_exclusive = True - remove_dirs = True - files = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - self.assertEqual(files, [__file__]) - - def test_files2(self) -> None: - """ - Pass through files from user. - - Use two types of paths we don't want to process: - - non-existent python file - - pattern "/*" that matches no files - """ - modified = False - branch = False - last_commit = False - all_ = False - files_from_user = "testfile1.py testfiles1/*" - mutually_exclusive = True - remove_dirs = True - files = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - self.assertEqual(files, []) - - def test_files3(self) -> None: - """ - Pass through files from user. - - Use the sequence of paths separated by newlines. - """ - modified = False - branch = False - last_commit = False - all_ = False - # Specify the number of toy files. - n_toy_files = 4 - files_from_user = [] - # Get root directory. - root_dir = hgit.get_client_root(super_module=False) - # Generate toy files and store their paths. - for file_num in range(n_toy_files): - # Build the name of the test file. - file_name = f"test_toy{str(file_num)}.tmp.py" - # Build the path to the test file. - test_path = os.path.join(root_dir, file_name) - # Create the empty toy file. - hio.to_file(test_path, "") - files_from_user.append(test_path) - mutually_exclusive = True - remove_dirs = True - # Join the names with `\n` separator. - joined_files_from_user = "\n".join(files_from_user) - files = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - joined_files_from_user, - mutually_exclusive, - remove_dirs, - ) - # Remove the toy files. - for path in files_from_user: - hio.delete_file(path) - self.assertEqual(files, files_from_user) - - def test_assert1(self) -> None: - """ - Test that --modified and --branch together cause an assertion. - """ - modified = True - branch = True - last_commit = False - all_ = True - files_from_user = "" - mutually_exclusive = True - remove_dirs = True - with self.assertRaises(AssertionError) as cm: - hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - '3' - == - '1' - Specify only one among --modified, --branch, --last-commit, --all_files, and --files - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert2(self) -> None: - """ - Test that --modified and --files together cause an assertion if - `mutually_exclusive=True`. - """ - modified = True - branch = False - last_commit = False - all_ = False - files_from_user = __file__ - mutually_exclusive = True - remove_dirs = True - with self.assertRaises(AssertionError) as cm: - hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - actual = str(cm.exception) - expected = r""" - * Failed assertion * - '2' - == - '1' - Specify only one among --modified, --branch, --last-commit, --all_files, and --files - """ - self.assert_equal(actual, expected, fuzzy_match=True) - - def test_assert3(self) -> None: - """ - Test that --modified and --files together don't cause an assertion if - `mutually_exclusive=False`. - """ - modified = True - branch = False - last_commit = False - all_ = False - files_from_user = __file__ - mutually_exclusive = False - remove_dirs = True - files = hlitauti._get_files_to_process( - modified, - branch, - last_commit, - all_, - files_from_user, - mutually_exclusive, - remove_dirs, - ) - self.assertEqual(files, [__file__]) - - -# ############################################################################# - - -# ############################################################################# -# TestLibTasksRemoveSpaces1 -# ############################################################################# - - -class TestLibTasksRemoveSpaces1(hunitest.TestCase): - def test1(self) -> None: - txt = r""" - IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev \ - docker-compose \ - --file $GIT_ROOT/devops/compose/docker-compose_as_submodule.yml \ - run \ - --rm \ - -l user=$USER_NAME \ - --entrypoint bash \ - user_space - """ - actual = hlitauti._to_single_line_cmd(txt) - expected = ( - "IMAGE=*****.dkr.ecr.us-east-1.amazonaws.com/amp_test:dev" - " docker-compose --file" - " $GIT_ROOT/devops/compose/docker-compose_as_submodule.yml" - " run --rm -l user=$USER_NAME --entrypoint bash user_space" - ) - self.assert_equal(actual, expected, fuzzy_match=False) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py deleted file mode 100644 index ac46b6c17..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_master_buildmeister_dashboard.py +++ /dev/null @@ -1,74 +0,0 @@ -import os - -import pytest - -import config_root.config as cconfig -import dev_scripts_helpers.notebooks.run_notebook_test_case as dshnrntca -import helpers.hgit as hgit -import helpers.hserver as hserver -import helpers.lib_tasks_gh as hlitagh - - -def build_config() -> cconfig.ConfigList: - """ - Get an empty config for the test. - """ - config = {} - config = cconfig.Config() - config_list = cconfig.ConfigList([config]) - return config_list - - -# ############################################################################# -# Test_Master_buildmeister_dashboard_notebook -# ############################################################################# - - -class Test_Master_buildmeister_dashboard_notebook( - dshnrntca.Test_Run_Notebook_TestCase -): - @pytest.mark.skipif( - not hserver.is_inside_ci(), - reason="No access to data from `lemonade` repo locally", - ) - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", - ) - @pytest.mark.superslow("~42 sec.") - def test1(self) -> None: - amp_dir = hgit.get_amp_abs_path() - notebook_path = os.path.join( - amp_dir, - "devops", - "notebooks", - "Master_buildmeister_dashboard.ipynb", - ) - config_builder = ( - "helpers.test.test_master_buildmeister_dashboard.build_config()" - ) - self._test_run_notebook(notebook_path, config_builder) - - @pytest.mark.skipif( - not hserver.is_inside_ci(), - reason="No access to data from `lemonade` repo locally", - ) - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Run only in amp as super-module", - ) - @pytest.mark.superslow("~30 sec.") - def test2(self) -> None: - """ - Check that we can get status for all the workflows. - """ - repo_list = [ - "causify-ai/cmamp", - "causify-ai/orange", - "causify-ai/lemonade", - "causify-ai/kaizenflow", - "causify-ai/helpers", - "causify-ai/quant_dashboard", - ] - for repo_name in repo_list: - hlitagh.gh_get_workflow_type_names(repo_name) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py deleted file mode 100644 index ced80844b..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_amp.py +++ /dev/null @@ -1,284 +0,0 @@ -import logging - -import pytest - -import helpers.hgit as hgit -import helpers.hserver as hserver -import helpers.hunit_test as hunitest -import helpers.hunit_test_utils as hunteuti -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# TestRepoConfig_Amp -# ############################################################################# - - -class TestRepoConfig_Amp(hunitest.TestCase): - # Difference between `cmamp` and `kaizenflow`. - expected_repo_name = "//cmamp" - - def test_repo_name1(self) -> None: - """ - Show that when importing repo_config, one doesn't get necessarily the - outermost repo_config (e.g., for lime one gets amp.repo_config). - """ - - actual = hrecouti.get_repo_config().get_name() - _LOG.info( - "actual=%s expected_repo_name=%s", actual, self.expected_repo_name - ) - - @pytest.mark.skipif( - not hgit.is_in_amp_as_supermodule(), - reason="Only run in amp as supermodule", - ) - def test_repo_name2(self) -> None: - """ - If //amp is a supermodule, then repo_config should report //amp. - """ - actual = hrecouti.get_repo_config().get_name() - self.assertEqual(actual, self.expected_repo_name) - - @pytest.mark.skipif( - not hgit.is_in_amp_as_submodule(), reason="Only run in amp as submodule" - ) - def test_repo_name3(self) -> None: - """ - If //amp is a supermodule, then repo_config should report something - different than //amp. - """ - actual = hrecouti.get_repo_config().get_name() - self.assertNotEqual(actual, self.expected_repo_name) - - def test_config_func_to_str(self) -> None: - _LOG.info(hserver.config_func_to_str()) - - def test_is_dev4(self) -> None: - """ - Amp could run on dev4 or not. - """ - _ = hserver.is_dev4() - - def test_is_CK_S3_available(self) -> None: - """ - When running Amp on dev_csfy, the CSFY bucket should be available. - """ - if hserver.is_dev_csfy(): - actual = hserver.is_CK_S3_available() - expected = True - self.assertEqual(actual, expected) - - -# ############################################################################# -# TestRepoConfig_Amp_signature -# ############################################################################# - - -# > pytest ./amp/helpers/test/test_repo_config_amp.py - - -# ############################################################################# -# TestRepoConfig_Amp_signature1 -# ############################################################################# - - -class TestRepoConfig_Amp_signature1(hunitest.TestCase): - def test_dev_csfy_server(self) -> None: - target_name = "amp" - hunteuti.execute_only_in_target_repo(target_name) - # - hunteuti.execute_only_on_dev_csfy() - # - expected = r""" - # Repo config: - # repo_config.config - enable_privileged_mode='True' - get_docker_base_image_name='amp' - get_docker_shared_group='' - get_docker_user='' - get_host_name='github.com' - get_invalid_words='[]' - get_shared_data_dirs='{'/data/shared': '/shared_data'}' - has_dind_support='True' - has_docker_sudo='True' - is_CK_S3_available='True' - run_docker_as_root='False' - skip_submodules_test='False' - use_docker_db_container_name_to_connect='False' - use_docker_network_mode_host='False' - use_docker_sibling_containers='False' - # Server config: - # hserver.config - is_AM_S3_available()='True' - is_dev4()='False' - is_dev_csfy()='True' - is_inside_ci()='False' - is_inside_docker()='True' - is_mac(version='Catalina')='False' - is_mac(version='Monterey')='False' - is_mac(version='Sequoia')='False' - is_mac(version='Ventura')='False' - # Env vars: - CSFY_ENABLE_DIND='1' - CSFY_FORCE_TEST_FAIL='' - CSFY_REPO_CONFIG_CHECK='True' - CSFY_REPO_CONFIG_PATH='' - CSFY_CI='' - GH_ACTION_ACCESS_TOKEN=empty - """ - hunteuti.check_env_to_str(self, expected) - - def test_mac(self) -> None: - target_name = "amp" - hunteuti.execute_only_in_target_repo(target_name) - # - hunteuti.execute_only_on_mac(version="Catalina") - # - expected = r""" - # Repo config: - # repo_config.config - enable_privileged_mode='False' - get_docker_base_image_name='amp' - get_docker_shared_group='' - get_docker_user='' - get_host_name='github.com' - get_invalid_words='[]' - get_shared_data_dirs='None' - has_dind_support='False' - has_docker_sudo='True' - is_CK_S3_available='False' - run_docker_as_root='False' - skip_submodules_test='False' - use_docker_db_container_name_to_connect='True' - use_docker_network_mode_host='False' - use_docker_sibling_containers='True' - # Server config: - # hserver.config - is_AM_S3_available='True' - is_dev4='False' - is_dev_csfy='False' - is_inside_ci='False' - is_inside_docker='True' - is_mac='True' - # Env vars: - CSFY_ENABLE_DIND='1' - CSFY_FORCE_TEST_FAIL='' - CSFY_REPO_CONFIG_CHECK='False' - CSFY_REPO_CONFIG_PATH='' - CSFY_CI='' - GH_ACTION_ACCESS_TOKEN=empty - """ - hunteuti.check_env_to_str(self, expected) - # - exp_enable_privileged_mode = True - exp_has_dind_support = True - hrecouti.assert_setup( - self, exp_enable_privileged_mode, exp_has_dind_support - ) - - @pytest.mark.skipif( - not hrecouti.get_repo_config().get_name() == "//amp", - reason="Run only in //amp", - ) - def test_amp_ci(self) -> None: - hunteuti.execute_only_on_ci() - # - expected = r""" - # Repo config: - # repo_config.config - enable_privileged_mode='True' - get_docker_base_image_name='amp' - get_docker_shared_group='' - get_docker_user='' - get_host_name='github.com' - get_invalid_words='[]' - get_shared_data_dirs='None' - has_dind_support='True' - has_docker_sudo='False' - is_CK_S3_available='False' - run_docker_as_root='True' - skip_submodules_test='False' - use_docker_db_container_name_to_connect='False' - use_docker_network_mode_host='False' - use_docker_sibling_containers='False' - # Server config: - # hserver.config - is_AM_S3_available()='True' - is_dev4()='False' - is_dev_csfy()='False' - is_inside_ci()='True' - is_inside_docker()='True' - is_mac(version='Catalina')='False' - is_mac(version='Monterey')='False' - is_mac(version='Ventura')='False' - is_mac(version='Sequoia')='False' - # Env vars: - CSFY_CI='true' - CSFY_ENABLE_DIND='1' - CSFY_FORCE_TEST_FAIL='' - CSFY_REPO_CONFIG_CHECK='True' - CSFY_REPO_CONFIG_PATH='' - """ - # We ignore the AWS vars, since GH Actions does some replacement to mask - # the env vars coming from secrets. - skip_secrets_vars = True - hunteuti.check_env_to_str( - self, expected, skip_secrets_vars=skip_secrets_vars - ) - - @pytest.mark.skipif( - not hrecouti.get_repo_config().get_name() == "//cmamp", - reason="Run only in //cmamp", - ) - def test_cmamp_ci(self) -> None: - hunteuti.execute_only_on_ci() - # - expected = r""" - # Repo config - get_host_name='github.com' - get_html_dir_to_url_mapping='{'s3://cryptokaizen-html': 'http://172.30.2.44', 's3://cryptokaizen-html/v2': 'http://172.30.2.44/v2'}' - get_invalid_words='[]' - get_docker_base_image_name='cmamp' - # Server config - enable_privileged_mode='True' - get_docker_shared_group='' - get_docker_user='' - get_host_user_name='runner' - get_shared_data_dirs='None' - has_dind_support='True' - has_docker_sudo='False' - is_AM_S3_available='True' - is_CK_S3_available='True' - is_dev4='False' - is_dev_csfy='False' - is_external_linux='False' - is_host_mac='False' - is_ig_prod='False' - is_inside_ci='True' - is_inside_docker='True' - is_inside_ecs_container='False' - is_inside_unit_test='True' - is_prod_csfy='False' - run_docker_as_root='True' - skip_submodules_test='False' - use_docker_db_container_name_to_connect='False' - use_docker_network_mode_host='False' - use_docker_sibling_containers='False' - use_main_network='False' - # Env vars - CSFY_CI='true' - CSFY_ECR_BASE_PATH='$CSFY_ECR_BASE_PATH' - CSFY_ENABLE_DIND='1' - CSFY_FORCE_TEST_FAIL='' - CSFY_REPO_CONFIG_CHECK='True' - CSFY_REPO_CONFIG_PATH='' - """ - # We ignore the AWS vars, since GH Actions does some replacement to mask - # the env vars coming from secrets. - skip_secrets_vars = True - hunteuti.check_env_to_str( - self, expected, skip_secrets_vars=skip_secrets_vars - ) diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py deleted file mode 100644 index f5b284c58..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/helpers/test/test_repo_config_utils.py +++ /dev/null @@ -1,65 +0,0 @@ -import logging -import os - -import helpers.hio as hio -import helpers.hprint as hprint -import helpers.hunit_test as hunitest -import helpers.repo_config_utils as hrecouti - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# -# Test_repo_config1 -# ############################################################################# - - -class Test_repo_config1(hunitest.TestCase): - def create_test_file(self) -> str: - yaml_txt = """ - repo_info: - repo_name: helpers - github_repo_account: causify-ai - github_host_name: github.com - invalid_words: - issue_prefix: HelpersTask - - docker_info: - docker_image_name: helpers - - s3_bucket_info: - unit_test_bucket_name: s3://cryptokaizen-unit-test - html_bucket_name: s3://cryptokaizen-html - html_ip: http://172.30.2.44 - - container_registry_info: - ecr: 623860924167.dkr.ecr.eu-north-1.amazonaws.com - ghcr: ghcr.io/cryptokaizen - - runnable_dir_info: - use_helpers_as_nested_module: False - venv_tag: helpers - dir_suffix: helpers - """ - yaml_txt = hprint.dedent(yaml_txt) - file_name = os.path.join(self.get_scratch_space(), "yaml.txt") - hio.to_file(file_name, yaml_txt) - return file_name - - def test1(self) -> None: - file_name = self.create_test_file() - repo_config = hrecouti.RepoConfig.from_file(file_name) - actual = repo_config.get_name() - expected = "//helpers" - self.assert_equal(actual, expected) - - def test2(self) -> None: - file_name = self.create_test_file() - repo_config = hrecouti.RepoConfig.from_file(file_name) - actual = repo_config.get_repo_map() - expected = { - "helpers": "causify-ai/helpers", - } - self.assert_equal(str(actual), str(expected)) - - # TODO(gp): Test all the methods of the RepoConfig class. diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_by_neighborhood.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_by_neighborhood.png index 45c1135ed7dad317826c7a8ec9efdc67ba53c71d..4cc2a85c7ab98f436dce590ed10cd451d76ab61d 100644 GIT binary patch literal 27259 zcmd442UL}JmoAPyD(IKQ1{O460R;;vN;M(Uq$s^3BGQY}K|o^!V?&P$HbA8uI?|Ox zGXxM(P^y#zDgwgcfJ#&9efG&WbMMT(cm3D;-&r&Bt(8e8!a48zD|Q-E!pAr3?RWF>ZydYk{*J%x^E_bespodw)5rRxEuWUPr@OP8 zr?bP+_1?B8JsjLzr6gq}cW+y7@9F98A-jFM%fJ4Bq}xfm?VqEfOz)U z9$KcMzhy(>dFMp6@P~&?F6cJ@aPDgVF}r@wi(C7Ol_t|ToRbz=JEc}ucLuSZuwP6R z)a*+iyX_%Sv+r)%4t`~PKOdj`hc-px; z-!=b>eqi8E+0nx<@9gHbge(=7yjb?oD(G9;#@%rd$sBHN+{JAxE-qOs`++gi;oAM; zcBb*RJNrE1lMhyUR*$k-hqI1rnQU7Tks`Ns>Pyvj{Hc)QsmI9&)w5XN*lM+Q(Se)r zflAX~MSe_rzNWg|zj(U(haY~Bc7C&C`F2ZlOG``qifMswMsijD;96NvwG|f+Smb-2 z@DekLcc_bBBp_gL>Qq7U!Kcb0eEe-acoufCb91JcTYEv0L2`I^btI?n%dWDiHUCIC z^KCR(QBhI+@WbE7hkIjURKxP~|60Hpsmd=q`npa>iJKiZviOL;zWxfM1zC?zc3lYJ z7ks!$>(0kNHb=R8?OPf*)*hhaR`e}?(TZ&eXXY$$eDla!+$8f@cuK2g@P~j^1>d^w z%KCn~uQ>Th*(l<-&`Yf3#e7%yl!gdg_vEDa%uG+Dhi@}^*pO`GG}Qgle`adT!kj(R z+nSdi<<~8^Tuk?`hPY!%A3xfgsfAs>+z_TToltpwGbcH{sCKY;ChgU$SJD!8!yjJ@ z$@@N9v|Rkg9}SAeE4CTm?+Bi;Fi6s`%=YSU$vIe5RCGJ{l>9Avib#1s-LngpdV7hD z_2VU8rCG3U$hcV=MI7y!7%Vm#?QhOG*`?v<@1LK)rmf<6xJ2^!2#3SX>o6Q_FZ8QX z3E99mXX$x49h-?>X0$$2d+lXmCClvvpMLx0mtV|vnb1>CDGkc!?{rzV&lfFFtQAj9hYGTakZOIu^mbXiM>gMDt zd*401cI#F`q8>Bp*FS!^aOsk#*S@hAO-*A(Z8c%SN@?snOkApt5=8o{QMH_UqAHc<}@u^xw0oa zSfiAl7oGg@Y(sExa6_7f+3G#cY7Rd#FNTJO z`VW8HpzhtY#8jt8(eF!azRzIVFXtACndLg?>V4A9pZ^u7;LzPYF8T?FHqbNfKQlYY zEHABSyl-o0Q7?O4qDj7I;!h$XT|GTa^Cu@04U!F8SU%|%(YY?I&yQw&Y_baW%D;ps zHO0LNoEX&_@=w-Je0coj9S3&4*Z4Rmz0P2$z;9^Tk3YtoKF#MB5WvZ(SXcM#nn>h8 za%JT4;P-zm^y0P{{QUFJUp{|6+?;8PtxnNTXX1C_RW1wJveGi#-~H*|b<;?LBkqnRPGv7%a$vIaNB4RmqX=pi+3iaR{NA*P|Bk;%E+lJi@l#rE+E z;oyYBF)Gg^C0#(D}3kGu&}V-e*3LH!={4OGH8@$o+zSvMKoYhkyrFSuf7}k zUYz(--@a7hAY8k4%{1%yWzpWghGZdW`>Vg5J==v%{`ARR!uZkpX0Ld=Xstox>#cS( zlS4WqoF*M?#N(4a)e>%QZWNg2%;$ggZK!8$xW|M;X|hqeA>yOUV9aHq-L<%7jI|xy zltHfrOV`!ED-CrSSXXr7#0ep(Ru_X)CGf21GAjxbVc+z_N-8PU%u5{cm5o zXbr!OkBf_n{)(sa7?+(H>j-X$JGgSWq`7faRMhi`?a?hcPE0n-N6x}7>-ftLTyCn4 z;y2N*%d=zWsGdHv;Fk3&#)nU~qlIJbbvFxwEKE5xz$cSCMQZm6X)wCtk z_98wg-Ibj$CMKq?H2LW)eJ1X+}l{%|fNB+0F#=mPE}Gdd`V+nUEr zZj3t^k>tf`ym0ZN-qiR=L#~UN#bfuMTXS9N-elN}w0YDdBGwD-alWN;Y1M!pM!bwzeDFt%{aKjC`&T z<~-?%(#v&jrY)fp!PJRv?0$K7x8IjztnIyYH<1tAvQK?GnPdO@7hIeA##FPBktQpr zk-i4g;-Es>XZ2c-8U-c}k`q$Wg)3LQjjOXA>O|EG{YNy0@;5s4zsWGg4WGYsT}r&x z-RGsHA`O0Yifa@nG{fY-oHxyNPQ<;Xck-VaX&ONqbLe`0^Q4c?H<7{N-nt~yoTMef ziYY?79k=Y-rMp=tN*_yMkmt%?*5lHY7TK6$5;I>w=vhaHRaLkIt*Y1fumjU>&ON1I zOWct~-+gxo2i5D{pYsAo8jKt}iVCl+m5WD|Zyp)MO-FuB#`-1)j15qp&bGhiKQ@qx zb#<)1w=dB(CL1~OEXCi1ge}-et&EHeZ`Hm%ZtW?!qDffkzV<>E9$qOBPw)Swa&wLs z$FQ|%1KYni(J=MS^tT>iq&9UV4I^jg6bgF{sb)rq!G{rG2OaJSpZavh(!Y1(yt#7| z^qAUs+*eDLChtFcrq=c1wv>fUUXY1SuaF1l7Q*iX#KgX~Cmwf{f{SqLnF6c#B-qtN z$@&c36OwUF`Th5~FYn6exU^&+e)s3Owc5QpgNk<#+f04!48sQgbU;-#*_+#fz*QR^ zJe`P}VpSEfy{Y{2>YE~}OG3XD%}fVKm}LGcAS8V#-Li1FMrp<#;j$&$;X2+TTEAw} zp}pVilI97>r1rQBI5hPN zmstlRB=5+$Mg8>CPxXmj#*{hpsS!mi-6_ICFl$Vz$uAjgIEGxHLYL0P)+@JHgTU|f4Hy|<1{7LO2<82{j zM|T2E9$OV+&A)oji>t@Jj*klkPmM&B%HwtShzbrC3|+C9Kt#Tbz$UojV%Zx6nAAWV zw-C`7WS4UMkqa05x*M;3Ty59Gm>6!jZK^e1H8)qvu6ot<^mGU^@8_!R#e3_$K0NzA zbZ5sC^JE0M)$7)+Q?a*CI5GV3Fv|g%Wfv~|CEV%O76yZHNmV$o{_3%9mIZIpX0QS- ztz)D9dPmoGJhCqBZ~5_uA1(m`2r@1q&^2E^z<8Zuqbjmt!&7U&#*WITXBW7*$oULN zx3Srmk*5Xay!+j6^^9_FwsWsBaG75i4*GNi+R41kPsOFMZNihd`uh0^(tY7hj19^F z*nTf4cst$;XRGDvv23@F_8t7CAM2Q58!B(@u>LKz2Hzo|u9D-}aIca5@WA3u?2LC^ zN^$z<&WhF?xz-AV4J?Oc&^KGwXsTJB5CXh*@Zjs$ud!}OisgVtE?lI{oxdn63&_aG zbpG|nk4re=$Fg17Eoz#Y**U^{-AkU`+#2!j-8<|4j^d6+MSwA7#^E&ceVu@2-kgL( z4}QbbNZDN}^~Pb#dUq+OyVWGGSk$JkK5@xVo8nYfjbhf_JzjnK%8bv5-px5_1K5!H>>R19%rxvy4;GVj;o#Y6^QqVF1(vz8f@n?kQKUQ?Jn2$ zg5DiPf>Os=QJKmwPMN5%YRv+gV-1Qd%P~0^Mvprbt_ZPRfjGGqk9U1KCu-=DbrGnIT2~Q?#QG>jM z&$<8eS!WkSZrm}YnaM2WV9v@vvgf|V>)=qYwDjHZ(ned7&rIpNVnjM--CY?s*JD-T zFCTmMGOSmH;Fy}?EX1e?gcixTwM*j=4T|%rs;agIZ3gb#ePQKxSG8S$D^4pdMFny8 z%7OUrN!B#4OasCZVq8K%Rci3BWhv+$or-K1JSk8bWM(q+_UNjRoes64%8Z?^ZF^bM zjoZtCLo~b!L*A`Ajfmy)bz)+*-D5zty&^$B zkzp|rIbm#We(zmpr_F~Kx9fUyk#_8Pt<2!(VznEKG*3G|I6zE$`pI=LhBz z2^j774qo+$Y6*Vz^7G$|4EWyz43!oYbSwBAqCA6Sw%XOzwRcg;yhY1)$ji$!B3-9C z-$zbVApZ#X`1mL@*iIclWq*CQaAAhi!9m3U85=E$7j3{#mAD;wt>qz~g9jHkHg$8C ztl4`+Z5L1gA_KktUXhL}-PH$=&wVR5Q{sqi@%?-hYv=xUKa-O+ZM{?CDbD4RB}=@8 z_FP;hcJu?*Crx78wik%J<-o0L#?5BbUf~evSJ->?3=ROug@}sm+EqIcW?pSvA&Z@0 z)}FeJZ9qfo6kPVs|F>1|8Z{*Y?E5vP34oN^& z;vJ~`%P-E%(zpZuxkmnZs;05IsW|lyal6%Znc9)kzMqb0MJrkW%U|_$vLf077wcyK z$c@B>{QUKApPt=)?AbpGr$$?aK7;M4PEBcv$SPDBmC7Tu*P<{3>JclZxhMWskYIa(jz~Rbc;{oy?&)A7P#JzhmFA`F81RIkx~H`-eT6{H@Meu)O2G zNq_;Rb-0 z28I4!c*^E?S6()@U?ICb&H9_ORBKaR4kzi0u^bx5uD*HmhLyJb@!DCdeD|-KnHjIm zrjV-KisZDBz35Ldwr?lujNBQk0^h>`=idGT@mXG?Jy;3GjSXSr8CN>lJ{?*a(OzpB zInmcy-Fp-|AG4958HI`m0eh_ zhZuN6DL4pKqB^5IOgOq<;BhT0jM>C4h+uP#cPVQ`v&7inX=!v!cxa?QZ7rQ)7t9|l zkFW|#^lA4~_2{Y2QAm5;+G>_+np6AuM5psaikI1`$PWp`e)&!LJ8%}q?r<`KjtI+nYa%yl51p!P z5M__MkByD#c+3Ei?~9CzI?*O!R~&Yut*B5p;F9`3FXF)D!x=yT2S5W9X?KnuJ?bn| zoY}s7!WATh6G}ru5L~rSktsPhwi;D?-{^NW&a*gaqmwq}8fUkfIV?*Q#8Dd?ZqOK_ zX|6+c?C?36!U@NdDI=T5#$`{u|J_1~GgBv3-D=ee$g+f^Hs~a`Ws4e06|em1OP(?l z&MKF^fT;;EJ2s`I;uN>Ck8MSiZI+VA{KdbcYM0G6u|jX%#TUc^2PA>a>B{&hjJsMNxdY~M-qgwv+(G`9DJ5?()+NV5Gojddmc6W#O ze?O0tSu4AQvvy;oLegfOQN8Hh0eQ7qg-DgPKjpaKLhWNY*gn5;81W$+g$l0uVPIEX zP!Yp#Y{J+c5^LqW^`D&Vk_PGJ(q7PP?GG+y`MPy!0MuAy51+y);Q!|p6{^8AQ+bkB zMg2PzV#cn6uiW0OnyNP1`WT3n>K=fq?pHH;9YK0Wk3M8a>ZLM2uiJO>uLm>Od1t23 z@pvF)eejRC+jr~UKJp)NRL@M7E9}(l%?U(eWp?WPF>=uFWSC9m7N4w|x8G}`VrwW044C>Fv+0HH~(gP3gjz+<--j-*_6y%-!awfPsBaSS(IzUVANqZRd`uzj6Nxf?1_f8&g}wQIB~~4My(9MYlo$X8qRmf}=jq-=i`Zc^4}D9Fb?BWg7K~I)gqC!Te=o5VvJFkgqXgxIe3=rNskf zG7f_y$U*N39c(kcGj7|$eE@7!`{Qt=AS<(7Td;1t&nzuf#_`I|Vfx$yWfK9szDfI@ z{aD5Cp#D&^ZfWkBzq!Jnn=v9Z`Qk=<;GURr$G{LyWN?Xm;J9aIgy6B8!$lG7m&RW{ zREqRA3g5q%VObH-@+r{Ed zDtGYXTb>O_Dec)~=vFuqo*HHVHn?kWFr7BR!qt6h+^g`@pNp4NDS32Xs#E>Rve4fE zsP7suB9{>Y%p<;j{eBUZORt23C*SSR(bd(>3^oQg-$9Z4z^}i?y1d;lzIUF`j%(`I zH#vYj@$bAWBONg6eBi(VF{`3evO$xN!3)G8HlV=w%e_04?b2!pg(R+MYSauLkYJK^ zTtiLm6>^k8qHdgYXaxe7<3QUJ;+YWk>hJ$N=cbsRN`$0EEjYYXK*cxn1~*5a(nWSQ zy1q$Ezvz^opRaEM=+>meF(De^+Y*W4!WQZI`abS!N>Be`qsEpE8-B)9M-!n_QnJ6* z>XO8Yi;CYq{MeAFrz&BVn;o#^6MiceNW}>mM6ftx@Mn4&1aMTw2LXF1spuE@o`_Tk zFa+bwz|rTXkAcT>c#WE#I4oqJM=r?-lUscT+V05vW!Cwb=DDUK+8wUAvQ`goKT@x! zWBV-ksw+Ds&bl!i@)5G=jN6-2?ndT*)G}uL9;&J z%ZYLh)`9k^GfXMXr8U>~*0q5kKn*39eMiOZP|evv15h(uI0Kpjs_rX9Vfqj zmJJw9;pVe@qJu5>d30wNHdTYWQVm;YfSnw{S2jnjooq0&igVHgpd=UBlYsYg!X5Gl9%y)Et&A|CwRpw0SAgLo zV2@haA8K*wLC4mSc0w{3N{-}%Q; z^6fnO*LN|Hc0!m;=J#vXI8$AWpsI35`0TM9n~EhdcvzLEzs(asKo{Av&@yN~?iiHfx!jxEtrTN$z{+1eP+q%k4x??b3&flX-s}>BkPsklIgY#Q zi+lU(j2|62v!U|q+p|mi$K?WIuNs^TGtWqy03)n2G0MG+m{M9*Rkg(^?H=31)h*)I zE&Ef$uT@pfEn3bG*jkFrwZ*EafNFJ?U2VH-`6WRW#24NwAxeS*R=5OkzfAb-jkci3 z_KFzeNTuL*-)~(JmmfXaQCe45_YC*uFjPTim~4LQiRwtXCi=2>U0t>xUfyl8NA@`Z zHhfvIOeGPi^zZM<`BYn*YNU64D0f>WHYkD%Eg%wTqcz~;J=$;534UG*WRAf0?b~CZ zpp+vbIGPvwH}vk+4e&hVIC#QG!@*2YWxTzrR&GV zGvq6UPv$hGuLds2{hnC8fPmF>IKV0#e}R|mJxSamPS9$EWht-$mvCKHgDg6kuT>MR zwXt zo*$2ZkAL=HhF)837ivsK-=5jE&yTL^eVnt6kFOGWDL!a|E5mIsG(&jw@!BWo`0>>P zkiIvF*hL-vB!A5@2I$mel%OP6kAOsiQe09Grb9o)c&EbX8yo!5&E%@5%D=vTB4l66qhcXeg! z^EpU?7#CQzV?VLI>w+gw7)6{7{jLethk+Jw44x=XNl^x0z+&2cZeNPcNWgj{!#;za z2UOpH7<2Q+jab+Mz7Ntm()ReoIKG3>9W!jr?5jl84MQ!4n*46>Y&@+KVz;dSa01ce zQ0bBp=+Y#6qp;w=pdM}nf(Hz@C1nA$vd~`l`&`MVe0*=89^fMz3g4~SHM9Qt53^R$ zzc{jJ?h*R?X|aEto<;uT#>lIQm@cSXu`Y)olZkT}bCN_Z=RTdY~}N4)e(Bi;{~Krjn* z#BGoeW-p%oDe#4Vi-)oCtv^dkEzAYzYaVN>n6G%Q^@#qNFMj{OJox|ZdH>&k;O#)) z?5p{*gEQhaCCY!lMu~FYt6M5*k{b@X&;7rw)BpST{>Od7pUT3gA#o>ys%Z&Q^!#`g z%=_``d%c-=c#CKIp8m!ptqOos3^Zd)VL)DYO>{bbW~-uNyX^??*xh5L&;CT1|Akb| zmc4t82~sUuw&lp(My8H1K)MbD9WU#1x&WNnwpA-6%pV^jfOvNlHIV}%fLL|#NT0#C zZ{KW>|G|6G%>&7L_i=HPw;H8=yso2|vAEa}@dn56 z>IMk5tYok4#*g;LY20K$_JMdrwW(Kc?E&1nTt%!{XDA93={loK#|ej_h-sI>?FczS9IDrCfVEA5;?M8K}@ z?sx?U8W>1ZUFRp-5L6g2)`<1!pN8hR_a%pS z>|*~Hkzy&}DzPnF zK1TKNo=0@BXy5E@>cIVt*!&z+56dRzEm#0LMjf#|$4LqP4I=q{*shK#fsqz&0d{7< z<)Z3VAn5KyETyG-tkh;S>uXnpRd{rCG;I$NP?{aZ!Nr?2Z{0@?p+Rh!o6Ho-y9iOB z;VmyY+l(LG$LXZ9nM2g#s8#Tk0jkeDjv3H4T{@}bFWmk8Pr@hwbbPDZi}$#h)qx(1 zvPI!q;FWTyE7k?{@AT@cSO5O|@5#h~6qo}jM<*UwnJBZ}sCcO;faEp~)Fq8$OFkqE zpVQ&7Kx>Jp6R8k@2%Ch;+6{lfY4j~16P=-HA{?D_-zk-kb$`ODBy9VCq1!PBQA z_n!E3z@&s0;Pgak;PTTO6OjVP!KRN4um&9gC@Y%;+6%E5d`TG9q=aJfpg}}$qAbB zr>c%h-H;_~lfHiS#eOB@s}UHiLYx=S1MM%*gp&I>|NMjj`1<9mRwcl6l7DOpcvb(` zUw>cZIiVJH#-TV(kbbzf^VO|vE0sG9DNpPXZcq&rGi}*MO?)g3u#J%J9GXU<+S=p( zkx#?v!fg7jV+j)j%j}Kz;u;f+2NEX%MbkL2+inoMT{Y2GFjJiL@}r+U&A*>79CV^2 z6_420S9tjka3!I%c6f1X=W_^c$yL1lJ;pctQS7~a9)0*iPcPo%o!0zocL{yUf2|?2 z&BB6DH}6ZF{khOkRzA-$>U;mhe`0DKNR#ACOdgs}|H3c-p>G#0i2UzME<1ea`yrEG z^`Fq^KQ1B9{*OZ|W43%z5P6W%aY}ADc3i#s58TgyH;ns2b}i5|>VhaUWL zj!b{3XOqA&wmk8$3)!XwqtuFv8ykYb@xHkFcd}i#X22xpA#7VJ;0z4F>QG8cLcIw9 zewFwaO0PuZARP@jtVNt|!VPd%+HRJs3GTUWf$*vOD8*V@L`0zMD9~b_UZ4{`_bpVy zWR#{rxwi-;`^Pj;|Po4V7qBa2Yc zxBGJ>5aFIzRUIUrW7o0sK>p4=9+Il}5bHNBQNh!G@b?COeHWUWi*Q?cG{YV>KO+`l z;Q%~`R)Hfo+k+-t6uvwY`7%1%NLdgI8pkrE=i6Ui$>5`eE{3JWoB7BV;wmm$xbQV{ z<8_O22(u5sH(HSSt{rc&e5;`*IG+amXq~}9fa`w0Bs z2a5-=x5Q>wwo_GAEP*-)N z88B0(Sq(B_Hq20P=V~y(vcv^VjXPlDkrUiaTY>nz@Fc7~SA-~BVI7A>N*edy1$nq^ z!h~s+?^$Wxf)aC?u&|q)Kyy;?%ye&(EkZEEHS+x_)~q@6)fn9GJi$GHPIOqzg4>6@ zY|{3x)EY2f*9dT!-YJ#e78K~x=QC+D1%dag(Zs3Nu8T)2|A{`AwI109h~(!5|f zU>PqwD|fz`ovOE6gzh25Q(qs6Ci;GU@6A1P<_wam(H*CK_;+yAEL*YScvBRlE0k4l z8b#QPQX5MO@Y-Zx#OCVb0_D&uR7L0@%ImSbxoaBrV%5H9MW^n@uPHXSkiQIuh1MM4 z!mxkv!k?gM-5hu5aR&GI&!W^WHadY#MTp-3|D&C^`Vv*=$UiPzsD;loWzK>n_ApqH zIE^sy@ZrO3eBJVG#)n9NNq4Fam$+MJ+a9EsDhEUdc(R3imW~Bq?7{H1xydj?ki)w% z&4S}O4lw8W@zsXiu5E96he=b;jP!TLuH0T0mLzMYt*gr@J*gcj8wZjLuR7J?PzC&4 z4<6B3sVU*GGK!4X_y|frun)xgP4T{`2^T^hy8xRN&zuIDkDU$5DG|5z%g9IrTpIVP ztw5825w-{M<*j%~EG%CiJ&O<(tQhqPhegcHkke@i0zW-n09x}95V9zHQ8`MN zCRCyZVBDUt>?&h!@UYw1mV>osC^mUe8UhIfoknwqcjzh zmagxe#I{mFOvXMO?PP#8lC;COTcwdG_v8vz+8pvpBOHii9fWs|gDdh59pY9f6c^ghsQn^-{i8m5Jr!~@L|B0 z&*<+wlcI8EjTp?Y2+ElVlN!B;;1&j_wBNm zrtdf%svf^)))g>8u+r%E|G0d491a#~dEQ(^R>f)ftvc5h zNTdd+2T);`HhQDpC+F$*!eIu&H4}=$vHm0UgnUcSX)Y{L4>Hs`i5F8Q5D8MG6dM zT-#ouP>N$l`D=i;%(_c?kqh_Up6CVy4dOv^T-l~%Q72&ou7**|EM0eyk zjSiK$^zg^FjJ+`Zgn&Cj)o7$o`)hDY_eRMHP4?oAqN2|ry-=>E;ErquQq>XugWRIV z`=d-rQ_c46G7lN0+M6ILY+y3XNX`QC@CAc9NC>a z52GWBSkGlx+tMBN$wu1|sO{lIz`13_Mkxe{%gdXm7mnWHmYah@Y5+g3MCD}eNYUh% zszlfHe?wk)DbgG5Z#mXtbCt?WTo_0AXULaPrw9<_-87ztLy5g$xSs38uaZYU*8%s( z0BVfO5V&tE_&R8@Qd5DgO^Z%F0eds}s0h+%?TVm{i2y~6*l-E677HV17bW+%r$x!Z zPwg)_+srJx8gXdCa60Duk0eE%`l1dWms>7NXTjXL_BdK%mNWe?;D1~G{rA>m>Bwxo zraZo9z2EKeHE)|N1GHdc)EqT^+nPRS55F=fd?QMsRNj9X8*2is;|ZfDb@wb`Dg}Qo zTj~G)kIkOY9f)BA6{82o1<+SI*pR&`>1x-0@>U}23`761yET*B_zxF>cgT>2Kw#`* zWUDQblGWZa9GDv%0rJE_^+x%=Unl_o=!#gqS`bX!HyNb^9S9A2kvy*B>Ym%)i?TgA zTinUQ1Zsi&tJXv;q^;R^k{B@br#&DDEH}F!#18H?(Te|Un6uz($2uXnh^kXiuZxM|p?UL^T?{CP52iaJgW zUB+d2>uIUJXwf3wLx&D!*8PnvHtEfIgs1#Oo*4XOg*D8I%_R7N_0)#H?Iv_)bgQ`B zT_-4*xV?B%7tA7~KYb<=He#qDq_l_f;f|P6i7}2wxHVVC+b^SKOPv&IRQ!>SGqdL{ zO2BRGW3%$KqZL!3ol+B<_Rhc4+b*N{sM!+iMPaDxUZK0f6QxpUFOHxr907_G{Zg>d zn7F#m80j867E8AT^6?g(D0%<>j0)#E;JFxd3nU|UkYLX)hH|XVK+P4g!G;Xa5S@Vl z_gXwTS!SI-(ro~TW3JwVRCFQfKRWVrSy`E_ngrd1)2!>gvyAh-R6in|Lvv}hzB7_A zfH>jYG-C>502ND+oLFe)a`CZhM@QKxx0MOBC~IMbVw6k(FKU}CsUL!^xQd$ zwc1Sj{(*tK>c4U=XPn49f$~KC3RCHzE+{sJg&qDc5qJcy$MjhS0MH=l<`ZZqX&iAc z9Yl(F@bsiVF2Y82cz|dxa$VU;fQG$gE3N7W*evRdDIBgnD5%S$hTaYs^K&Aj+&h|| zcrd}h>ECZv{ld@h?X{1dZ{4oYa_N&^3llT{4cPi8z2Lt4zgdecJ@UUIhQG1SR;vq! z(1w$%=+90s5c*GKM*fc~%!j{Kn5zFws_6eWB>qio_9vvGV;RQM9^EhWC%pd_4sKiW ziuCnPw*LRZHgUZdHOVO~x8+cS|cd-NbMVj;7Tq5P_OWQ#Dtx+YfDc3AovZN6`6!0$FbL9!LJIfb3i3UtsJm}{$)U8!G(rAY8d=pX-wF$v`jOY=Uzl8ex&;teJ(}=R)g#v?0*r4MausPS%65Le_lQy9>gt70DwIjKiq`2eK*Hj6Dr4 zy!d>3Yt?Lbpb6N7nEGL}5k)P?rA{4tOBKKF2Ukt92^y4dOQ%){dC!?WTep7fFnoHS>96sw?t7B6-5Ogc#YH_H)|)Lf?ZMtuOTxD-KPLj4j`5N2Mfoj z`b8|=ygH5dG$tZdX|uv(vGaWhDot{O1u+YM+_YD-1CBz{UJyk%`0{YkkpZ?DdQzc< z-7`$LOucn$mD$W+fBjlkUapbvbz>AP(`$Te0}5fl%vj?pIN=_GCUQUqB!7;mn5?WR zN=pN@V5Cw~rEo^g&7kV9izY#`yOeR496Y)wS@ldeU~IhPxGRG_hI*>=k|>z0@>#lBa0 zXix@Ck>gaHRSr@O_PoOBaZc_b*C`ov^s8sGB)@6@8}aC0pX8vCz~AjEwB?M zLgKARbHm2nxO(*nODw{ZV?PLA?Xjlq@Gc|9q209Zo@K|W9)UF8E1FboR6QMM=JPx5 ze=eaH6!_}31Ost;sH-&J{oQXcjEn%T*@Mp{y8wIb{p)%t!FD^pIYcT5$|vfx-^-PO zc1k8OfFtUB$0so13?tl+N6Q$2eE5V1OX1QWhzn_H1VD>S#EgEOrfF10%Wy+L>2TW*4I&795nsSu5i3B z8I77jcOn$PhGFFm5Jog}b#OsRR@=B~Qy&pJG99O4s23JDE}Ij9RwvTa3GIMtlJ@*~ z2+ZCq@3~(`SG%G_e?Gd3)Gq3Dyd&$Cgp)oD`Z)GU5G~~X@~pp=ss*nR4^YG|PPJee zqT;T);nfdO0C86Q6<5S>e*CE^Df)%jiU&Irt&aLDJ>nshH`VKk%p~PzPp*Fg|2(?} zIPlKi6Ave+rVMaqR~qn8m)g<@i#e0){m>EWg%a5y)zkp;FRxJ&Y8p6Y4GgGAll&Xt z&!78NIswIFd%&PVAfkBwiO!He|NK+>#QR@IxUEeMalDXOd!Fy!e1tXPO8@3n_};E_ z7x=FuRQ`LSpZ7fU^K>IQfAGKmb~9G!q5c&>zB{v|q(>Oc!K1WHg*>jR11Y5EL! zwAHTq9(6|ptn|mDMW$uIZu|E1?3KL3?|Y=pCsY<_<3nesh|ic=^CVai!W(!jNcic0 zkL>70C-w>1V7NTV6a%%^5#fOm3+ZetOnDn%gn;(KuBlF&)eh)^@@*p^At_PdJ&tA0 z0wCxjAppel4fy6L@IjZ>?BPjMe8;3Cgl84`etw7^6ZHE~VToAShdLUR_*si$3Ok^| z4#C+-N+Dt(hcZbZQEZ6l4%Ar!bDX$AlBhaCL6myg@S6g*#U>Y_j{rKG3KoRg%xEwM zpj{PAln(~s9c%!2gIuITISUCV9)!*o_=3@Lby!VJE&rbzzgKc;30n1XCz_$pMF)*1 zg7UIy3PSTTQ4pZ$b913~yqON}dcGVbsxjWJ9*wdo=-z6H7eLmeo^sHNBj`m*fZdPU z@0b7hqsY>wVaqrHJ&``1;Pj}QgnGzD69M6fDzGRf7tpztmSkflOM_Ve90BB2#?+3( z-drD9uu#+-)szc&+A=2|IUXBXHRltuSS+0CY77!ZC?(_I7@=)u7^d;&@f_#oTFJ?F zYB{6{CunTEPZA=mvEFr@`otLc5(uM@gLxx!FllgjMKUo^`;c=WAeJ5$+xeXl1AvN} zL4Z`68*?$>hD;Zv&SHW%HOLJz~hDlQb<-~iu@9fNRY28DZkj4>U)8oKx0O;y! z_s9WJ!4TR1igIJiQ~`j8x-Pn|?nzd3bSO2CZ%hQ@CVtMM6>YE(M~0U5{JmnAv0{+~ z#xR)$7}y@{7(5@>MVU=)rhoyO)Vhb#fxIEiud77hMyBV%3h4gv!$$JdV~)vwWL_3C zKUJpe4#2^eo|?fOY=6D<4{+Y>YrDFb-udc@0T$mjw*QN3o}7GBCq8X;oLkI?upr|K z8suKVdRd#jj&rRC`B$`cGu%two@3rDpTXRTebUnUwW;d^dc0oJNAWeW2G^YhY`G7{ z`!G62h(M{8w}MfY_)8d!pHa0`S{-n3BV1y1(O z2t=JIFOOoR{-5!>hvhFo?sPW-7&+`TPd*5>~CLlR*!{*8;ZYt#1@IUW4(2%pd;%*Qd4&e^`n0N{8?N#_B<(_#6zN& zwo)=3Sdq+S6ml=4PlBW{NNB_{DNOWddGq=ni0Fom#ead6VdnKqwk!I#}Y(t>W-#CKr z_J7gjF?#NQ&5HlqGyIo3Vg5b6!Q)*D(2}5Di`#5&zL1{(amenaCPS|a*YkB$AS6nv z69RWy%sa>{Hxm!q>rpLvXLZ^jr*8`avwuA;K#1Bo?tC$ zZTEUJnpT?YBavIodBn8{b%|C4jFiP^0q76m?c_96CqI#ONkiY>4pYvROuu%4+&f3JYN$^ihIvTlh<9_6-E2hloAR zEy8dVU0$cqlI8|UCo8skfdx`<`>7!<47>Z4^wOYjT`QSWH0Fv16af%YiJWHsW8QxBN$$ zp;ZuxE+HDQafMzP`Szh&IaExN)N(j*1M~zB+MWZ_C_TplO;L2sEAH%9aiF zQ@ymxT5T7)x*VD$D_vVf{2bV0b>rWj7e#wJut%%qk#=ZW4-ni@CpoxcN_}+qMl=rI zEa>SF-aBtxG0XkkFJK9^FpBF@4m|K2JX`}%Few)+4aYv6hXy;b^{(i049^10L+*`G zCK3gO*8fQMoy|;6(WmM*%VS zYQ`Mj(>P24{{HAdtbI@jt&8zt*keBf$bMjWnZsms_P~T;Vqj6`Ak)G>uh0N1Rx~~C zL@t+Ws_Ra)^k7$E0WNO(LDnV{ujtVo}udv=<-f#vo7t zBwZM@xh?4ytWCke!5Ceo2e#HQS~2juxq|m0l0k8dKpBTpEZcABcxyHQJ}nK^SZsM9 zEn<6H3F&jgehCz*pp?eaL#=8!>od}{1S?NjQCawnxRMWEYe~|MXWexXH^ufIL zA-9g2Jm==0@S{YHEtyKGJ29voB`>DFBzg{mN<(x6R6?%?6IqL5Ckw|{+f>o2c}%JU z6@@2kK4`6O?;Qq_vjx2gF#8$eM~88PkDoZVG+vH(V`kn8y`6Z0Itn8pXN7t@wPHF^ zR=#K*4Gls(jR&7o*Jy}8D}Oxi0wc5_*nkZ+l9VoXuqT{J-;iH zPJ_a*%i1{5w;nFxKpIfwP+Ng^x*uw8>m6^})g_7RcQ2u)wZ zpwrOgt0I~7^{IQ$m`3~1pzx`H;u`bfAPWSLC0rz!+}>xt1bo5m#URoo;rQo{%DM3k zqu(O|eI{f_#5+CH`flJCD4OiVP-vIn0#bSYZ@>%FjHBmVlynAiN;vB~(Sv4$7EvOP zxh+aCtg@n!5)+^Wf5g@`V(LQzO4wtzx0lA%k9s%>BV8!2&MZNjWVU02IPDAYsdpVL zd3IwSm@B9M;Fo#C(Bc+f3k%a(VL0xt#-Y&KO}>b5v|}Iy$o7v>4#%jrBTbDMl!bsz z?HlX)m?h1Bzx2J?fziOhhjF)*StEBu6pu-@!*@uGKCVZdw}7_>P3Cy={-5vOQfuGyCGYRx^m?RIGtOi=TyjSlz7ky8`NrU9CS7Yb9& zw1KBL+Wvlc;4e*#J#}WPwNU2t#DSm#LzU=O!xXi;(Gk2G<~d@-(-VBYJfJ}e z4D{eR8@w;*oJ^l*xj@hchCDO_nBFP=x(ovu8E?j<)Ep=L@Eb0wd}!#o4-(<|4*t-f zAEE`uj>7#$-5*Gxv7mC|{S+;i3f4697XA({*U9b|iR41{TQ@rOA#S*kF*Z4|04-;5 zeVUMO=iQa_=*h{}QY}YwYzPf*LC*RJ8>JDtIBTUl02!9U0a#K}qM7!}9M$=H$Wk;H z5xnS0AD=wd&TvucTmo7sShDLGPCIqI>_#K4))W}}_3Hf-g0XN_FXHF-d{eWr7uo+& zYYk~=2ygq^B5&1%1Z1HtmkKkGx%FtDxtTU0`^o>AZ#6H-)F5bNW26M}1iihvmDU|T zk~zF&qx4(p!DBUtX&|6F7^o8N*KL}`mK!8{n)R-fw4hIDX0Kbw2~1wj(I08 zjT}Bj2Qvzhoj157>-0VD1Pml~G(&Tj?eo2bx38PTbp^!zg(URb-6L!}Le;peJUb*f zr{ybFkYl+Q^x6`cxToyZ=qRF>M~C~bs1xg8+pv}?gh@aa<7ttEnPBmgj#V-kuI!yY z$Y|uyf{V5LLanebcz0a+FgO@6rZ5&lhAOrrB0W7FmTMc%7;%0`O299DuVX?9^wB_N zsBm3jO9kJe&!QfxVPw$fZ%9#aDwu&(p*FL8(COsi#1AH-!JU%AV)39)n!vGWt$f5z z9fAoZ%R*`lSCu$LvZ>QpWN3oqm#6DbE)|FpbkuTfiT@*&f&Ap470_L+LGz_)Z~|C| zSpUJzbVW#E*$?GoWWM*7IhdQef?uBAeZI)oaLP4C2N!KLz2&gnYQYHZt-F%H1|{2Gj%4f0-*k0Sa|UL_wtX?1a(CI^It!zAcWAIF+!Uvs4p%71<4Lu z0ApgrpaV51gIQAn`nUvAVBpkX^hH@K?W_yW%`gmtG!3Jb-H)z zlx>F&E4yfL5;rYGv=be28r~+zjv$Z{tmn?3FElVPpaP(E8OLcAj;GypYz{%Pv{kDj=0vL1a7$c!0NlQ{3sFrXP{;e?5nPkvVUi}%kn zg!i5N6&W5=H6rVJOGB47p}vgNM)5;ks0iN??`5eso4_yrWXx2OZ~fWt(Q&i15V{uT zJ-?a$UwTbZK6dqesU_Dx4JbP3^f=HgAIBQqQD9gH0+)ARAHrY=nw5>dGt>Cg7!moC+V5Y$I!98#Y|l~W?7B@p3FvC63kXx{E3m$G&7L{V;97323hebT9ufRucG=SO{%nB>jVNEwTZ&OH5hF(p#03&8j z)l|*qJGKTEpAu=9%I;H37djHCz8xPMOvglXYHF*j_JHiGnOFuL9BX#cBVdQpg@ZS? zJfI&(=M2qh$p-r?81euhbF&;ZRKO4WAv1&AddOKcTcy9Hv$J#ekrF=Sii0sKmvvFT zLUM?~``Cl+^~p&?e?0 zYvWi#tcK#5X+SK!Qf+)9m6yrba$+$oCII7xA?73__mGv&()ZmwV%DKT zg2wl}P$kW;Op}x`JA-VzW&zMsz+qm0FKooeXHo+N31$}Y#+U%mkr_Ftz5Z~jtr3JdK>!@;M@QY{!p=89l+ASru~uW?S|>qqu_tT@&5Au~a=4RAPXV73t_5AB zzfDN`fNg8XF&xYUJ3l7vtxmn&l-y7`R6i~a4~oHyg4x(;gF2KSzLf9T8Vri$+i@BDxOK;lLzp1g zNUm#yG$)8GbcijA<-&fD#9JdB>w~93=ZVvV5muGpCNzO?Q3xm+C=Q6s=W1&WpusmO z2cpaY94~#`TJG%oqAc5uo3}a^uwfQ{h}k7%6vv^bz-oypTA^JuTea-d);O1bLOCRh z0Wq1vW0iZE6gZkwM1lh1$VS!`1TtO^R@V-i$VeYTQ}@RzZj|kAPXlD5aRSH+G}syu2F3ZlVGr-W zMzkFSCqO>a(6HbsFLK{#PuzpL`Yue+Cx7aErU4ELv6`^H9mao%;1w|4NVVJ`H5(C8 zrhHl$mM)^;fcTR^izcRRl?X0@H(h`ikBPXTxd&y6RE|E_i}q~zu&BbQ^Mmdtl~T^S z*|1v?7%K@GB56ujZX7NhkxaUQFtywpeKIuO*=mwLR5TQF#E^JN96d6x5~!w*MDl#0 zW6B=IjCUXhW4cid*kkYub^HQtL`v4>^O}t0_hN_!{`(YsS1+KPrxSrX@Zio@=s`Z1 z?x-H}cwQ63=78DE6GjB2aKZ0J@1!qtgbS-Tu$t7m}?RTellSVnNpO z$TGxY7{OPdD@(qi?g+cBP1^SPGAap>;SDgmfY)gUl!d-n-mEM2ymhG}>hxa*LG{hV z2;N>aVOBDPqz;-^Oam{ALA|D)GHMwha~v42T4DmBRfAfUC>udDye5ha{Kp3(J`4`J z{g2wt=as<#jN|WIT;w2!*~*Vx9JL#pi}?}TtZ>nL@cXb7rYsIak~EXrjuKiakur?z zuw2Ye>qgF_xn80=lMS0jjZ~7om6*OMAA9T3b#Qez8qB(_(N;BiM*=SSj=(50}Of9+5=z>tG^1 z--sILG31+5)}^(8T|aY|o~hu-5#t!{ov}pvX+fo#&zh@CL$EqQZnx9t-FsA7--Wds zvxkr%i3LXOOkm3~fwWX7lavTDt!i)2oa^<&r_rbpDT<8ro{n!xC|ihuT5_BvWhn&t zxgZ{P1&pA^kanOi2da0YUqBCIz|RDcOtQNikUpG_rTti5kpul=Fjd6`k!3-$dLaR= z+$Fl}z1Ye*!xhs>#??Um=wevSoflT5wFyNjTkhJ_DRV9o+U+RXHJrPfFVNl-$MPlS zy3|gJEP}$k<@qZndwgn=K+0X4P8llrHE&N9l}Nw+*T(7JT{;iny8hhSl3_gW$W_{! LJDT;A~=V&T`zu;k>n-gSEx!%}(a_ z7cJ~;_X>*(iwSN%=iqSuqLhe;&A)zuu$}!`k#C`)C-EUaomV`0k%eWI1N}3v`3#Q> z3yXy>^D7=YEX~=r<-u3>&?cU@wjBIFeVB4&`|9uB|KR(( zcOUwVq~D+3;IjokpM_oTB*kNWL-gy-KP(`1u{>wC)@CK4VTIX7M?EW z@&55`LyVhDu#nZ!rZX2VT;P!N$hfvfU@JR2`$@rdLMM~?a^98<-r|HKA|2^0 zU=hIx*tU#~P21Yq+AZ+6*$wkW{a@jWv%DYdF<1W(EUESMDC_%&;vzY3-39m9A9%j_ zi~Fprt84VR@+aMuk$x({61SWAb=U6HR2;CM32647^~@Qq(Jy%@p17E8Q&d}lZ>(<8X^TB}d#B%w8xPj#+jYIU z%{$ro)UP6I&5xpt2Srbf+nD_`Zn@3_MnZ;}npQP&eKJr z6+Aq$BaT0ecyi?WP8k`QoVTv$@D>|~9KE`$1UlZ_zC3t+O;=TPfOeejW7pv?(eC5@ zU50Ekyb_BzTn<*pozy72wl=_ILqoFpb-t;dlV9IIjJ&pX*UpPy-Z`~Re0l$nWAB+8 zcsYI^9v<@#!6)zT(y96JeK_R9_Wk>{?eeD6e!G0Rqpj`n!7IzUMr)0Ajvc#EIm0m2 z$ijv>_BCkz^2;wngM%jy9Jmyt9Ul{|5veqpS6Ru|mEPj@Li+v03=hkGyXLriLi#(M z`#;({JL~*9e^J$ov#KFdjwKd`-ZS4Lad{#IPbR;qt<}MTSG>N-8K74f{prh>2yR(7 zMP^g#<(kTurNNR<``=Q1$+G1h{3`0-zO$^=%-m*)+aw>+az(SHuCtx{kUtBed#U1D(w&La&H%& zR%^-i)R1YxN<>>!M(E{v<>Y?<@=&u{#(T!&$dMz(rKPH&p`mV*U6HhVk%`rRsl1T) zoJ`4d=z4PL5(`}&&1m%?S$C)8T8-wH7Zsi=_@x=v>OML!FK4X5ieZ%Hg#B{tEPJ&- zG1>g>Hn)?94j%lCHg>4v?Jn%b(6?`D--kNH9J@97`S|KakN4}n`kb(Sn%?ob$szO7 z(55b{i&JBLmUYh`|M>Cab6;Qln>UI!b8h5$>tp$9nbKnc!suLjmBfO<14h>C_iv@Y24x zs7Rr$txYe>DW0zjo58W~od5g<3+N1#m7NeW$Pg`ejrbz=u)+Lzuh`VrFz*=SJTJE| z56<4fySuu%#VFq1${~8{lCWXsXVDOMRZgQ9XKu#mB-*<3;8`D!_0|V;4mG`#x$E$yB4{~Ve5vJXc<}A(XPm_Z8_?f zG+UR74ldlg;t+FF?z`OCnJK+g%ex%nwsBEWJMfK1m=52E%&Vi+4&ehd5Rj`n2S4~Pvn(Td1nsV{$2hHkC>-s&oFiN|3 z@5b_mHl$cQeH$oZRdkCBKPlMArESsOomxr-zU;LKyK6UWI5)oNQ}gqo`m>INcGapUeU0g{h<`C^ zPmjjj;}y7S<9B_%NR0Y3rIKLD7onbGCtqe{?9fRtV5sy2sLGWu7s-Bf?7^OBWTBnv z;eSw4iq}uK#NBl=uF%MmV-Dl;St2x9HfO)Y<0s(rdvFuWgT$;-@Y7{St2CJ5>y2D~ z!Qm~tv2pL3jT^f}L%J#>m6>>qyPsT@D>5#$r(g|Oxn*>Z2kpE4@S%Qky8}W}nD@-f zp|7Qh>xA^);MzXiZ&Sf(m;D&OeG-SB8EaL+Tn1xq7)CFL4zjvF?(xoUIkll$Wuy2Zn`((e1~g_w*#@X=ge0 zIShSfuyX8sli-a&zR#{%X6=p>2b!{+DT{w9yk4E1)!N(kT;(GQMK2q$=yA4MU zAC9uBO}KXJR+_YWx?_*_O-^wY#9{66{-)7^yxDH7i`x4ju~g3~r$(zonNwaG&y?0q zBkSxm&dr`2A4o((^qd~8jnRm_6`>Lm6DsR2S7q-t(dO&caaZT_*RPScxTH(46!J3@ zPyZ|^xGQE=qbZa%`qoXo_P&0gfbu;-%gTtG!baIqixWh420lA;61Pqe|4H}&=Tj?M{h4*;%XFV5)lZHrr_TTy(Rp#^{hFVD-pl}u z%F~Qg;WEs2QFH96seVx^7mMWqNIJZ7sUU9GT!kD=`NpOt*AU^|E2^XaBILFSM@di$hvR6 zuz~2Z^}^PQd$4VN5$5qGI9q9H!(U4|anyYtK79BMYbaIa_+#RNuX(O_rZ$q!-j5$Y z-ar&;|Lf{%rvmLnBb$tjj1Bn47pd6w4XU!EceWJkCYxO~oc(F_wz`^uoY8HCnI4Ef z=~jN%ukW?2iZW5$gA`+#F37yW#wE1@IF_yQg>3+@!mYXnr-5dhjLb|9WU0MCQXQfn za(Z^9_V5bOU5;&lL(JV5J{{H%!_U~jys=X&Mj}2780j^DZ=cV}^xEHlTHV_alJlCL zOL32p=crcJ7y=Pze}BJ9y1SWK_?F@w8WA@xwEekAe+sAUCRU)Ju&_`9iN7G5e`CB} z>Yh*S?KXYB>^%FiduK9UzTAkbpPX9yP`uRN?2fQuLxxjdP?Jo_Js~T_JzqYA^gIv6 z5iyf6jsI>`Gb&Pif4@yofy6i)?m=mv503YHf3xhtKbJf8$-0e*@@=I5U$ttLb>GzZ zfVhXJC#$owb5`y2Wx+^n)NLXC{aIaq6%}!o%1v2683?%=t(){@OL1GOwRwmU0Bv7d z<9cHctUNv3KFg{%{W(Jj^WuDxy_cgs-^;y8)=RISPOQgi<^VpspqmTO%E7#mWLhK@ zFQyT3>^0C?pASN8UDsJ9b>M)T`S}NY$CkK?xR;r~oE5X~=$)?HIhUZ7?(8rLmiKP9 z)ip2(Ub$(LwTp|3gvVHjjBCXk)C9Msoo(B(x+*;{10D13ncdj1`%l}tlDoTD8Aqol zNBbI(lx(c>#)rQGs2@Fb>JiRUo|k9x+jEYyX;~iMlTDIzZ{Rhgn%KtsmDh_H?E?t9 zu}RW?1E6SHPx{5LDg{MFMLP^LQ!2lY4YaiQn+fWtz3V!^Idy6NfyHO1^EpvA?e>^B zZ`xV*_~`X@LN}10HZpGn8hh2()J_&s*kP-@dh6DHWPh_eyL3Kud8FC3*edQ>%*tVA z6{{6{Z%gq+N1*YhN&P&Kyi_(yyrU7v9^62z zBUa>uOGO0B_t+(oqryu%jV%oUt{eK1{6#%S~FIKoR5pD|o-L7{q7_6jWDNXE<~*D>E;Ceq(Y+ z#JD+OdZtzvpuVl6!`$qBP<2;kcXN(A)7VQVUJut0si$kCXra}836F{A1rjy2wK>M; z0|QO>(?9!2?&Jd?$+7VY1h;50{2c#Elf_RHZ<>T4MhkRG8UmvKss8z;wXoIb-?Nzq) znGBcNo^}8TpV-*gEycDi>C2Wa+wC~~$&4*T})+f}2M)$7-9XE@bfL^3zFM}%}!I%UbNkz!G~jiHW9q9LPl zIRS zzJVpK@8wF&ZvyI!bXBIFdNrRA7IN{mrqOC_uY;74rEz=o=;i@)_Dn&)%P%~OGLHdl zUz~htm6P}Y8dlHKbZexePu+jrVor%(PiFEa6?l8*EXcFylS9EX~Sx~#| z<8_u5GFxuUdV5+Zef|*A__*)SE6bVvi`&9&voB0fIyTNGq7*~~mCjjQSueBdA_@(L z8=v1~l5mmxKu(~Ue2=|uqPI$KP-B%~QH~EgPiW`OgWOKs(I<_g4S57^f6Poy&|na3 zYe=_CL@8{6B!BJNwF>7J&q!1WQHYX5BO}MYc?^sXy%n)S!A+D-6|!xlCu%;v0#S#V zAFhplhuOEjtYz5OT1#j7l(|ix@$m4t7WvhhOMm>(S?BkTj*ePjp&Wrt_R~rDGz{3}gizYp z!afw_dFO2dK$(zHwl3{G0MuQi8dnz=Mdr3+_x(FrGt{RzROA0nS+1LylOKJ^|V> z`}?2fKLzbLbGbk7r+^V}X{A}jcUnDObnf`3b~)|W8W#(BN074hnIrngp^=1g^g zy;84gDJX-}L8EsYMsKewj*rE2HL~tHnH(OY z8U3X2`g$u=Bzie+sWDoy5s_hDlaH;s${t@<3KWk2KK#`Rd77;NM?4NZM_+S}J}`zy z>-ROgw2uMTPfiWwf$tLYZS$LS90DNSHW{_H*o!Mr*iaA4GC4io!ZnFXf8d&*UwYQX zn9+UB?L~glBYAG4F@2f^MRU9bav)y0ZJ$Kk$rneNz1=E(_S;hJc;r0lx`wtFPCk=K zDoT>sNoQK~t=nMhFAQ~*=$rbl&43uFyOr|H7xj?rn`B*8E|uP4EPqk`l&U><(bPpH z^J3-^aG+AIL*WIyy?kG%PN~23#wTR$)d&_i@#qrbP$YO{HIyHE>9!$4*>dkP>p*Dm z)i=H5blI@*R6gJQ`SYt145e+FGPQ{Js}x}&Hu5&Fg0;7P*aHdcI?*hR1tLb-=f*R_ zKRa|+F;Fc8Njb(;illE>34T5$t#E78RjOf7Df;pq^Gy?aB^CW&*S9gPKXS3 zQ_7p2$wD1yS?fIXxtQHydZ2ym&*dU-s;lRIA{ZMU-?s6D#Hs|pJ99uQRKUVeM@>yF zt;c&HQ4)kC(z(CMhG?%5wbO#@uD|x<5h)PeXZGgjNKYfzj1J~4fT_o3~)%^T6IXR=moUx!Lf$Mnaaxgx8IyyR8R#T|qcQQEk znEhefoNZy1=QR$EairOjfd>Q9{gC~I-!|(pR-=E*yK2*`f^C&pUsK|ZRB2(JA zzY2AjM#`pfOAt?BXh**l@mPU9RGG5=m{Hz8I(?Cgh4pfrdKqGA|? z-&0h&=VpF*9&+lWKR`pQD(7qMvC2?pOgb;b@++67d2 zVwB?+jdh@9hT`&M`Fi1VZ|?~W0cuXWg;A72BG*P#sm^x2*qEu8boxs2+0q07jR*;@ zNIaC=_l`hEkU~I48Yt0vc~7pdXnL5s?_AmDauI*SET?J|xY79X9U%WfS7_n29KU@! z!X;^cm@W(POIRX)$uLx8-KbQ`iS%+EK6Ay_w+Ap~5_CuPqhA()-uTPuT5G{h#5&^J zQ76Tygh-`*WdXc7+5Y;b#`*Kl(iB7uJyb^3C>^4}nZ=qFGbyNQ7}()JRp)toPfxV_q4=2M zHC#Wg+-EhWCPYV?V4f!qFBmtS!-s$W5GorBZi$#~b(|BP9{cV5seok3f?!7h#;8GJ?iIg(f4s$ODjpX+)`yi7 z0*k>jrMpag%q<@I1RUGW!L|a*mj%b4a$os%!4je@^)elzLtVdY6gyXTfLI|QFyI); zoXPH3C-IjK7cXk$c}^t0@<@uC&Ai%-C3aG8-U6ic)TVa7wi=^J$?EH zqC3ZxWo!3*Y-*yyo+{(lAV4&JY~YK<>Dq)UtJg=ctM$F6ZE(LpPz~Y>=P*@uDC*-L}cHDqg?# z4Jc!(1)rj@c#UOOPx%lmWURy^^vRm5X;*ZStu;gq7ZA^^-rn8?@87?F17$}o-PX_) zY#o~?=;Pp*<14q|O6RCWtMeB$Ha5Nq5{vx?YMcYbfwH|vkE!4)_s@r%dbJO4d=YpoDCVD;GMWIMPDYvqYU~#)pHQcBI{Ev8pa-{8c94iJ?Mi8m* zh#do&+bJp-?J1emA$}v=m^a$>J;%}Rdtym8PGGazudFu`1)IYP3Hj;ywm)C{Z#i^> zQ`}a<@YZK;t~(#yKTb3CbbM}Sjwe?MUpLwC8hL|M!i{ora%rWt?pXTiapU%vUqkm2 zv*hCD#sM~cFG*%*JeTj>d2zOsgA(jY4h~BfS6BMBql$`ulq1-hVsTZ%R(j_xam{{! zPbRw3x!JX7DJ!cOB$S))9_;1Bk>3p@{?$PW(CRf*-5WUWoZthF(iukTlr)S4@Y-|q z)+UR?M~-X+rzb{7_~XlqoTwXLXS)su%W;t0d^AkX^9C&>*;jgzj_FLaS>c<^V z#4e~|Xsp};tepF!uqunci-$cAB=!paa$t%DAzC%#(7?xh@ zeCs8WLVy2#;qfrJkU^{Y$fLDq!#pEEj7oTZ{`pc0&hKMY`RO}@07Sg`ESHw7TeBv7 z^>!6{6?hVUZvd0r%rAQf0mBl+iIm$&RDxmFHn8#(o=`WWH1WfrA@g!+KDjYiG+HJ9 z@h_WM7SL}T`;}z{{VKX>z9{{gzs6;S`eG-Pr_+d(I{{I_8sM$y&lE3$Fp~26kpY6b zI!tJdeDled~sHqHva*vpIew~{i%wZx${sg{I{;uV<#gdS1-ZNsY3CU8xA&H4vgg#i}cK^H#STK+dgZl<>hXs%U zg5?E00<{C$uX)Zg0>_?n(WtgD~yYiqkb^l9#`RBx5^WEfd*Zgjj%by#&#FzjfVah7>9Tq(rVi6ZsY` zTv#{V*UQlgc`)|qO^%_VA$34xyj;}b1rrFjnyHprWLzL|4!Sv#SV&1Ky#|X*RmJqu zLBydoTwFq+Td@4lKN+=&#yZG;eUoG@$@H8`11Q*n_zsTD5FBt?&&8e^Au*cn$pM;0k$vfA=B0E1T!ncMqN-W<_J= z+{T-n9f!Xhr`Q8ojiLh>S#>cxdcORp0a$2=UryfcE-sxAzcc`}`x;b(B_3a1!d}wS z(o*9zF=zs=$~Z3p?`{F1a}vb5CK&3*=RjF8v`bnscfbukLaiQyTa3MCLg8hO=1lcx zNqNs?6U>EeN0EslBYf+TYjvYps9eZ@<2cZ4xcAJv=c&#pQc0OY*tV(l%EL}}0SG$T zEMez9mb{-H^+3rpcNP>kep)CK7!@GUNAVmn@X!zgMzXek9%SUYD=qG%+dFnwmBZIG z3Vm!RR47{)yjSXl_MK{ZL{8C{Q5| zykFOV^?61x7?KY$HK}Jy?l1sss}SOt2JGN)%aOhUdgp?t%lR(uOS?3qPC2iRgP|!3 zxUw5%cGL0aSb;m8`$z)@J7$GM#@1FFCub~*-gDnR6;2KouvaIWE0b~w z9>iIy8TAAuBi}L9TzCRU&1ReuhP~VScr74#Voq;zNoyN>d^^JQ+f9}qLh(3kYlZYQ0c^^V z%f&n<27O~1-8AAPFMinp<%z8zUVK#P7y?BB>r47MEPg6J%kwLmiZ(Hv6Jt{sw}z8M zj15qOwsDtVc^!_P*pcBRZdWJ^cKi~ zs!P~5m9@UQ+|kn$3pqW)MbC3;tQsId42%jz8c+mx`En*cUC#vF5F~DwfG{Zlu7h*G zb(xN_=QE11&>hMxa#3Ie?LB*o3{$uu_1u?1*hisEY^*5-r3_9`3yh6s#IcRCC})Sp z$7}Itu?V%%z$ih$77G&i=_&IIl*UM07q4zg4C`6F^k)Eavmf*ZK1BBB~9qX*s92-{8Dj>6>xl!iOvc1EZn z-zQMbc^Cgde>RXa9(wxpX~<4;TJd^f#r~}HI4(Z~eO_F=e0xhTI5!g$lak6&)0b0A zBAg}ZPiLOqx@B{}nV+vD=sJ|)yPX=|>N^Vz7tC#Je*1s3`;(sVKX#zb<@@FsovmvD zI{t<&`KxaNUeW(F{zG8&FA(bg{tM0@ADHi--N5r>oAd0`E<-ZPUYd=1YRVbm{FB9R z|HKFR&%cVs!dBEpC8v|lA@vuJ^}%R?48!eyoYwAA%4pq!L6E4RFNmH1SoB42bO_MC19?&k95s_@9fXDDM4zGJfYIXf{>F;1KNT+%I`s=TxV}aD<{s*#Ay|+(#Q6}`x$9jEYUuX)MHn=gN#`Q-4EEZkBKQTQ`4Fe-dgO|T698@nYg zJxOf9!Nng6Dl0W$ajXTwNV#6${d6sh2>dL#D^OxIb#?DkG2@U)QWpZi=boNjz~u;) z3{sM$-XoF|;3^4~%|d~th0?$i>ASdwjh4eDyF!p8^NoJq_i3EtJ?_NZca3 zP_)K&T0trBCrHwtMc)(Ff#TKh;;bJ@jHs}TQJxUEd(y^1PvgJ8qLdmfZKxs~PI>;I4&YL8$9*fhu{_&(BZC!679)OThvUs6j+F3bZ?c zfjZ38YqJZkQx<5Q^U_)L2!C#`25`5?_$0rH{RUw~;~~ z6hv}c>rVshxb>yhf|nUS+#?AA_CuJwU9Hn#>npy%Ie6%AZ!x(c96>V{aXo0asgFvS zT%Ra1atAY9$H=uX!IMPQaO_8Y>HxCD%*+fTwGI)N&F%x_mn)X*?*z5ikn5R6PLbt# zOwBAOo3$cF9}@=f3LT)4XQv0e)Ap}lzaGYd7V@>;*bCq#>4Vtm~s9W zazFu31vf8hB=6m_<;!18)#Cm60_SiA9(HC^E5&j$f4B(^0u_Az^yvu-=BzOk({BND zA#uL2Vqdnb0zhm8B`keTNuWp=UR*>~wUp25xk@A~pKJx8ZruXDxvoQ>Z-YKFoSGKn zmT@`ZW72t3y!rVR`<^@iCuL@nWAS)TQ&u`cx%s#qDoKX$95&zFQ8BXSTvxwZD=Z>b zuWxMRka2llDYx!A|B*^^#=>c1?X457e#E@wPC2CQQIPhsOgD{^$=)^sjKHKSQy+PP z2Ix>+x^$^Xos-VfX0G|q9maPIf;o+bfLWU$D=K;doB~zEpjwnL2tS*KK zfIKD969ktPYyq_Im0mSnn8VX@oW%E%Ypx8Gt^?F1;MHR-p5q}P zn|2h>IP?J~>Eeb!?fiXaJa0A{!GoM3oClERYt;C`IH7jZyfpR%4KqYS1S{|N6GCVihpFRl3O8IP!u8ZQhL6}0BicS^&+3Fd3vJO z@)w{7!<|>EKH*os7X?a*gfbxaA>A)YFyun55Kyi4>*x;~87-x67<_c=*6|tp%8}FW zkAOE%4w3!wsG@J_V8Cbmd?#;sk*KJJRG;uh#{`_Fukf8o6T)9>1LdvPn$Yw_u%H$VYoL)idH z$`OZ=!NiG9I2-Ev`f({#dN49`7HM$casl9BK<1g%LQ=Cw(@)3yhdKy)e&^;649hIP zU&qq(e*>V0W?{JMC}Wkug(O)a-Q__1OErJH4$(()1>Go84{GJ0NTRQ$i!>)x$lUJ_ z(%EB~(co5Q5k{p>Ng3q-&T={2e=sR!2z9*;pd=E%hPtcZs8=AYS%_OVWjc~Zm26gg z+A8ZpyAQd7krt0Yu&qJD;pzMH@6TP0&;18J3Xoa0Y30f-UAC>M#A?h;_K3(%^}g6? zknz0fIb5q&voq6a-{OFQ$gvPjsv8K9QCN4okxSTyU+4-_vAj;$APVdB7PbUXCp92^ z_!*Uvsyet(go?1JWJ@sNcqneC4c+h#gd_9_?Ao)ZBH<_l2GJlwb%13smh9NP`2g~4 z4K$>?Kd#MXQkngJ{QnB0hISzEU7@N6XWHP+d$R@ z@9!P!@zTdhBZXQe<~DMUB0dEUe2+;_E%;{QX`r2#L5A3OzV!fru?0MB(fU_uwJ#}@ zj9xhIQuo|s^3!#Zk^Za!%TIq@Rha_5JwncH*jY%shW|D8#N&h1Wdjen*y+EP>Kj{~ z$Q5S^lXg~t)v=~3T7#bf@0=#0ni$>=Vo`~S7rC|IC|tW{4Icw8*oR~*2PbAhyQUG_ z4*UxXJeFP)_U}C_4q@rlH8t;`n+fQ0D+5(&Bp9<%=$wtV?^0p+2lSb(L`1ZEI1rnd z8`L{VHn@L9VP-{h+Y$cG)P05eKfK*kx<;yoIW=tGy7kX~y%xO4F6>1`(|HfTX>>p2 z?~kJwgGtrq2cG{8$PFyk^Hh+DM|Ww*mEmx1`T~dfas7$A;h^S}m;k>f&W1^;(^zn3 zIQ1?-{I+aNix)P?cm(W)I-&;~6m#-LgxBn}b7NEF^p79VZNKm(dOMx@w@6I$_}@q@ zqiFJ9kr-n{uhl(q!TuG8zyX{<7Y84Mu>R<1H4OBQQ)4#Mfa7eD^4R&nm=lkGqaO-l zts5-EPAH4$lu13`cF_0xT0z~i{((8TyKK#l)|4iWC-Gad!4m*+rca4OhJsj8*MCOV z_M-%a0C4Chpa$-vt@T$?0>s8C7tZnq1<7!dp`&u3L?%t+(X%XtUqLq|l0^{6m#rWS zYA88S;GogK*>r${-MI1;@F@wd^qas$c&fhngTQJjxDE{U+gp!pijc)A8SN}DNDuW*Q{E#wRmQ- z1vGLAG-32(U0C`YPhLsgKoGlX+oA?8F9o;DN8btVu6V2z7lEh{9PS>3uAOiyfjkc3 zTQH|@w0?7o0~&NLFvSHR$OdBdwTTNU*kSUC=#nr1c)D}_?d;t(Fvp;9wCJjMR&txy zmtbk*b38|kWp#{YQ)ViRXcpskFmf=%;nI+2nxiu*4xSG&uOBXkd0tH>8KYmcU>{A> zcpQ%yMRG`_fT4a`b2e_X9NE zh*2LN*!>!~prcgfzLTS%26(Ir1e=2Ta(bH9SK$m6wx51d!=i++#exx+ZX311a5K2*2`()$-5uxbcF^a=zr}Ps_nSNDPyZDi?5r_J z!P&>F%2lOQd|o=Yrz~AJ|7Q{FKb_G3oecJ0_X&TqJz9x}!A#91x&AVHJo$D@O8}_d8)EAm*D`-dzOn%gy)e~#1+esd!7fFD}YfaYq!oLjE< zia&Vp;NMIMt~)thcEP@c4qQ0%D^Q%Hq!~UiY=rJ>KvZG8Kj5kbREX}sNjz&$(r$>C z23})p(_upS-G^Y-*n&^fM&C6$RJV}1=47&&8Y(O9C^=gE{DR^W3YS1>yRQ{C&_%!k zD+oI1OlfF5meCTl3i-G}9XyqFA0yEtI%ojq1Zvp2yF=|D_`Px@a`;cg5vNKMTEGWv zXLRFnuSI&oOAJT84zMc+nkf)SG_>aUgVyplqVpyYc*s}@u{p@P<$upaDSgZKU9s^A zI3Ejm_NhOTFB%FKohec&rcs*mGUgn?kz6SLu>ThC?(b%d!PfUbfXtfGIbOs+WyQs1 zBiMX?23CWSHUPbppOKh7bj@%IML3^;R_#w=QjSXaUQLwOHdswCTU@`{T3Tk;g2wW< z4kLG&py0cHV+xjHJhYStKJ7WiGB!GTmk+w1P<<}GGvNC5>)6%a1}Gdtpbznyf`!AW zV>#Y>9~#s(p58_G1)3_utn-lXV6QWDn!P7@lKJU7eI5(*o{7q@_&_a?r@#yeYy3=; zbh%BB+#V+n-+uM2sKOE13WIx)9@!eH)ppmNy>qS(XnQcMOc? zRXOg?Auw2*OrQy3A6mfZG%@#?{JAizjmKl~SH5U?egs0#;%V2eTNeSb_#h5C8AJ3`kBejZ7R1A zj6+A=*56hG?otBJi6&^R%y-^kn88D8geLI3ZIbV(6{DziX|F<2#1H%O8-LEJJkQF? z%6x;geKHqI7h;u@4F+q(l3|?WFI6@A5tkHp7KF?X;l>n&Xnc4yLM(y!vHqr5 zm?o*R!q@7!192lB=a6guLy6WJh@pi_~ zoHx$>q@q8p{&{)y--%kt6)ep3?z@V>BN2Z;(ct0szd=d=%_Z#MnD+^`#Zv96NpMS!G{r~hJ+ zzQ|}Uv9}iAis{luD}DyqcT37+B+d_BY_=Cn9^JQViKF^2g81!D1M zMvymSKqr#C0+}d+24#T9o5X;U)N|!qg>;`EK(mr}l3(vW}G*Hprjyf`%zI#2$853t%=d-)a)!^m)AryB_)0^!Ob<1_#EoOG0exz zjd%kCgA@=dS^KS~$2p{(=4c`mp_Xv|VPZl6RGz>XsksE=S7R!_XcB=R;r0&Ydb2$E zbGx$!a!FL#kW6Vi^>JR5NA<1e7LTfj{&29paUgaNxKYg z-ZIB;bKfrFz64wo19w9w43jF!Ep#~my^3pV$1enaKL1zE0}z~^7*rsI4%V6k!&vB8 zkd=3A-Fgr}g#gQ4fB)8z%r8(pB}2aCh*xKlUk#PCdhXA2CsmUX@bDB-aMUY;D;~@S zn|nMu@nhg!lH}s1uE{hFzjze(VV~8s&Q>xM70=)Qfc+$8snH{RR@%lD-3c^;r*8nK z(*o~F^Hq?3G@&f6DV+ml{$3S)EbPGjFk2Dr1y<*jcrPeRY-S)XCos(B$o5){<$w{E zbmb%#ZMTS#yY{5Tn)e?*3!gXuzMUV|r;q6?tpa0B<;AjL#gz1ws?ZN)*hj@~bFlUO*Gk#v(q%I%B9?gf~ zD9N6CILvCUOnu`bcvRi_%)!it9QSlwv^$YKMfT(D$GjHO1Npr#Pl}eF@KGrEx9}v$ z;BObo0XN?N7c=0T_@O_l6b}CMAH}%KlM4&~`*fi>df);q4a{(`l@S;D=k{7I&h<@@ zANS&U5(^puz4@XccXa75r9TUQ0RvJj?j^ez>-_VJwdXu~P^;r{DIzb5x%^CA6>dLu zu)ysc?kwqYz`AHVPvc;5(f%KHr=;b+*ZHXCZok0!`met_C%&~`yK%##(_-bNy`t!W zVyGk9k~8N+b*stmzyJQ6b#_7BEmgU+wg5kyiI#jPkD(;iFb~y+{T_9^dD18S&>_VII~6+r#S99~X+Q;LW*CFUQh) z;Xi}mHlBcBW8~f^Mz91Cg4hMzkh{(1udX7@MqyAIT6*rn1A+dFC=y*TFe1ID7BvVc zK3{M2ttdy9n2>c*s%byw)L|@6a!(dU?VwldO=F`z6#>d3IpN3$ks2KtI;W@ZkG7x? z;b#CtJ4lwM->t2IgU0j+$RC>5`=_M$322JBR!%F zEeJhJWaXt%Ok~HA_nt8|T6Iym*y$MlkObLq#%h|B11|_FgGyLti9_+V*p|lb$0fc0 z=DiXF^-Ph7FYYU*PB$oZ?LXGNHyH+kG%p0G<0&n_Em~f2zL=Yb4*V^s0dZ)kjz-#} z!9$2S&k!EZkw@f@KY;Mh`=RdFp`-_Mqb5pfGQ7e+q>V5{efB8f>L=Y?T+X3HaB4q` z?~7MHx|M9DcWGLJ5VF|VEC~Ntd@_}NG^wr=&3MEczzRuoZ^9j4W~f45fhZ%CwZA~L z3IYtYGXe0aX=CukSfSc?@lI3Qj!WE<)ukn%9zcYQeElZ?K!L(FfeJnI|a$1q>GWCwOgrRB4BU7PJ_w(MW#}&( zpiY!~NxtvLoWsD{~ENn;96+m|nA@GV6(_x$}&X##}40S>+Z%DE_AblW{< z--gwzP2-I7noT}ZqZIQ7cC(E+Y4jgN;wTJIU@HJ4UeMLk z^#-gWZna8U3#My41y+d$oJvchN*UgLr>Q2iN3UJKJ`#jZV*`LZjSQoM2X(+BWjPB& zpz#(UDEjW4&*2!o z$kJQ-}d*;qNfM@0gi2+(CeS)J$qPg(8V8vIqw| z-N$+{gOc4if3H8zE)536=sg&~D`2d!DF6bF2Hh2fscuf~Xf%V0ppJvDuNQdTjK~5+ zIBge~44M$))7O$m4l^=y6A%z@b~yy+O_#=wRn(OUy77ZaUO>Osw&tuBtlV*YDcxzV z$&R&gFg#L;XfYSKxy!>NW3j|#Nx0y^!?u^%+DM~H(hK}uGf3Jwfw+tGX1BLB_qMBY zQwt$UE^sTRURTw`lWiDB=!!$k zI)Q2Kg4~JE_wjyU{HkC)&vJqqNc9`GSqHWR2l)Rz;fccVz+CDDCNp%%PA`Xvnvho? zGaZ%jGuZmD>uGIoGKkNC$vX-jA3~P#(=(j?GH^KW)Jut?0u1Nsg2o>X(9vw2-Hf1J z(GR4q;X^(v%JB|1Q--IfwoRG8-iwKB@N0F!C-=f?76p3WOh5YnaW@~6Tm{!rGIbr? zGvE4Cai4RI93H~MNzQ}_I^0Jq++(U;IMckQ-Lmo!z+#9>f?=9?ZG@PL=jyu8C?7fD_IhQ1GE_Cr~tQX85l_9lRu zivl@GEhBw;{o&a^hTdWjOgTQUGU{NsCfo`rO=D2%?~s#gPHW*$vr9HUBS_RdZnG9# zV;Bj_TR*d4@d|&wvBDMGUQYKn%i%2~;U3?W1M>u0UG=Zs>deH&0cOa=V5J!-1^c3Y z7U<~x``J-gmm|ZimYx3YMMWt(?MOL9X!x{SXM6(i37w2Xbt+StfvGnY9L=blRRhya z)lR&@%RNULi(#^$4ptZwD<2&Ur+F-E(2RhT70CtdmYkwfW=5wB zItvwJ&s>6ME4&n0KAa*~9aR(`P3&-Aaf)<)kCopjt{>kpR(N9o7mW<` z5MkOz97g~85jb`TV7C%yxDXaH&>CrJ6kKuH3G?;Ut~Xl&wU?A(^3_3wlJYz-f&C%} ziSHQZuIl@kV6d>8h1$+l7hq!cO~cA{g2vE5x+Qlf!Wk*plMqe&(mK#^?*L2(v&K## z<)f=eoyK90MLxmw%QQ}>(QYkYjo}pqWq@UT4C)JiA$5BXnMv5?f1Da$nh#^oNy3_q z12~yG(TtChtsJghjfPxtIV5u#Kj?2&gcrxRSk z*oDAl+tr5&bhPV(+@^5@vdSg(UM`5$o@fUq=TIf*YB`IHpR^aF*Z&9}T8S<>vK$2b zCpfG$`jbuE?w3hUj3Av0ESR}RE=D*&EyT0I5YxG$i67KQgYuHu-~1R|h#l1YPoxj` zv-VjltS#;{3G)wFgDI(NFE{uoR+ zJ5j!(){P?55`s3V2z13U!6$MRzfzQPpm3T+2Tbc^mZaara6!ORBN#Qx!4$!K^U-Xe z_|lTVk4_F3E~vqDMI9uhDS}*k!6`2xQ8(6(u%Hq$Hw*}cig}%7E|=u9K{@KDUo^j$a5$qUcEty(S~V3XnHO>2S{|GHGiIOEK$BS~=2%}qo#GJ1aTUXPnJa((`4o&AMJ9EyBHzRf*a)`N2dTQPug~U=kbVu# z;=7IE9BkD=`ln8v!aO}NK1rjE;NIY+<)S|G!QtV(SmCp{HLp$Fa1USOz)|Zwb!k%z zGv?d8-Dj}%qLQsNx1>E^acMDH=ISFxF>Ycbz}$Yo5F6^INAt{4+{YWZJ(APw$`&#k zeWEFA-XBw+^vc9=WgzL2U5R{B@wkaJJZ>*)Ojb-O;XV5y_$Xr9O#t}4toQhAbXTS3sb`!QCN-$i)azFTFHAf+=_KPK^gdgN?)`!6rV7W+ijOKdJofBqL8p<(p*tu zsbaJZxT1Ha50}7ZLLIIV-!L!O0*rSM8t=)n2H2&QU~m!+?qkDxGy{nIf&_;-s7a3l z8Ot1}K?kW~S_DR4@ZmBB<);T)lezy2i0%?+%s5LY@qg z^UOp;^jKNtd=?q0g{N39SwJ8+Mp)K?sz5^9=s=4x5r|0BtGT`+6Zt(`hj&7{DJkqI z{;@+CXolS9ApSewi?>wa`Ou3tH^EARjK2_PGlixQ#sqK(FiEs7K^A!ld7j3b?SrO) zldep}0EAY2#>jzT%kn2z&~B9i89_l!ilud>>mGT>c`Pmq(dV~?A%!Z1kG7NqYJ5XD z%H+9bQ;g5#6F?Wo>TSnPz)Fr?MHwbcjt2mN6NMj zwu>cb3YQ;In<%c~iYV7^L7)*Vp4n#>t6*qqc?SR#%bOAp5KZC;f@CHoP<)sn%zwY$ zYqEs#jOM>!k_wpzsDy;c-0+&E0Xn{E&yj%}QxSUgNXwOS>Wu;e*^N0iG!ghS28H3J z_$7~{@P4Ymj`{69Fo~z8E?KxR3hV=82CaIE?xd)(uyhr=If!6W($c7)K#-!jdEkjU z`uc>r`UT=iF~>_|!lMWc6QFa1A4N{cw=jt@N6;vPwN#@*n4I)bD3z&05wR+HlpqU+ zIICmAJ6R<>-e1zt`Xq*|+84!h@VKpRfgsl_X_627H6JMBfIy}ab*x=!* z6XDv`tGBbVrWOcg@zGo=5V&f9&@}0uq+qId@Sr0&G~`;pT7y#+Nux_p`JUf^!?CA2 z!B8Lm`zp9et;H9(V=PL5!Iy~%7jpPhx+3c=fOG?^2%Kf4>VMvczv=;#=2%2;VoJsi zDJeaiz#7!>G=ZDI9bPPX^y<~CYSbZWdnyHv6%6;&z`|?6Q)1Hxs+JNer7zkWkRCLX zni>m0(l*l~r1;(NKL#|Q`+Z2ZLpi#6yl#hX3*GBD3u!QC$+Wtt1`esD; z03AL6BdEt|m?3>Psvov#^d9XL5lQsbb7G)I2_p^+OF$JcvkIOta9o|KwMNQ&SZ0xA zshbOpv!6u;VLOb@W%~hMg$irJ3)C7#bCeH-fr{9LHwPP_ap1s#m zkf4uw-m_+8!+D7Aizl!LH7+{9aH>=~rdQ>X2u52IvN?PbC&1oOXw4Za*~M0gY5y9` zHXWtr8*i-I`hxvtxPTG@73_Zsh#*aWqJBK)^QZSG+jFw~-`|tDc7k-d!t8(HWEz&PDGojZQxOGQI zZnFLBb9Z+Pwze^qCWzy=86!JFN2SWaw7{IcM&PvgM`U4(!OXx>hu-JUpWhKV!BOjy zNL~k29#CSbEC)H=0TXapY5mIDsb^;O?A$Ut3Pfp;7Bvo?Z}^Ig%_e{5<3KZSi8kmF zP2>i69QbTLX3*iQ0IrP%wzMREf1VeZHJV>Ve-{EOrtQ*+cYYudD00pPWB`X>z*Z?I z`@s-#XorfTBK@@mKr=lKo4mbe6|~zgEA?=IwM;}#;BORp`7}?!Tz0E84AU?N)mns2 zCCcXpj!F$o*)2VTjJ%8BCyls6Ja9yV3RzOAp27>BsRhrIsrS%`=Q1x`9y#X|uSSk^qg3$P79)!I2)MG4+9;yyuTBJLkyo0stq$`8Rh98re+ zSI|9vC^=&6K1nr)ejJ$3l5w4E4T`TXmdZvVAmm^c#~gh`ljM~%-ESsg=q79(X3jA- z>ZPw!iJQR138xXy5oa9-QsN>vwzO;)vh)Dg*b7e&d7QwmCBRrOeXJd4gYg0LdyZq( zD}jEweE8S>fDd)m^zO%aBr>pI-c~e{k3`Oy33y`3IUW$VoW(zlnlldU0W_?}-xWda z^(eq}sD6PF9lh%3;9emv5;e8}DlgVe5+~sAf`8~{6?(3S%As^Uwoezaki75cNYut? z&Z=k1>pO}A6wu#a4OxE_9Z^b|Q|GuCZWx0GYR-^c!gvmkpsIx(p)*L6l;w%i_*pp95~n? zC4W{9ByFu6EV9OOk%NP@h-nuZiHqK5DMFcp({3(Pq-4$Kd21)-;<9$wY47{{{l3rl z`97at`!Ll^gj2y9jiRG|+IFxlXv&=REb9TS>@$(vF>!y6sfhB{LQRqcw04zef63VURCE(hIJk>k_ zJ7NXvDSSr%tBnyb5$n8kYBI#j>A+2jQW@6cxohYt%qRoT&dkO*U1TTp_LwEVboQ3$ z+vGdf$&62Kff^K{K61X$nmgruQoV{Z5TQ|2FiOKxoGc1}Sxb;?GC@Q)f2nc8M&xf0 zW%NPGZ+qY>qnJL?N$2`r0S3LIZ{BC^%Ib@aH3+DkWWZD+P^mN5XrY$xu!N4_OcO@m ydvB@I6_heM< Date: Wed, 6 May 2026 22:22:18 -0400 Subject: [PATCH 56/58] final reshaping --- .../Untitled.ipynb | 6 - .../ml/house_price_model.pkl | Bin 0 -> 480103 bytes .../ml/train.csv | 1461 +++++++++++++++++ .../ml_model | 1 - .../playbook.yaml | 2 +- .../results/price_by_neighborhood.png | Bin 27259 -> 27162 bytes .../template.API.ipynb | 4 +- .../template.example.ipynb | 14 +- .../template.example.py | 2 +- .../template_utils.py | 2 +- 10 files changed, 1473 insertions(+), 19 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Untitled.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml/house_price_model.pkl create mode 100644 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml/train.csv delete mode 160000 class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml_model diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Untitled.ipynb b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Untitled.ipynb deleted file mode 100644 index 363fcab7e..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/Untitled.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml/house_price_model.pkl b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml/house_price_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3860b0af5fadeb6533f4c36775c2faef1248ecbe GIT binary patch literal 480103 zcmeEvcUTim*LUb0K~b@S*c*bqXO^m>h>D6ev_L?b6u};`02&oF7E}})mfM1;D2lR3 zR}iF$AQ(CdQlzM$^35hpSi=*Ohxfg%@4K$=-am47X6MW~XX};6QCyOW&6yjp3 zd2Mo`+jzR^dN_H|U7XzLY?izQx;?=jyNA7#J%%OYy;+*Mtjyu6&;9BEr@Tzu$W zY}4J}xrS3%DmM1^G$%KECp#N&Xh{&O&n7zEgGP7t@ZL&upxb!+c*68SEO{@epf}xd zE1MimnO6g78z6zYVGRoxfR~YLA`8YC9#8e4BV1k?CyK$P)q8~rd}8Xk*l7zd2YC}8 z7Z(E?Zw8yCY(Tg7ba!{fY`2&~lNC*EJZ&85hBlsF`~a7S9>8Wv!5a9pz8QTO{W`0E z0Kdi<1A`cYf*6BYDz5Hc-ZTeK`eq-x8{`~nPAUSHth*bX;STlG5>m``H#>KGSV6GS zEOnzVaQ8OmMX>oLtq2vOJGy&1!3MyRw(;?HhiSb$pz?H@yN@?ftU)Xpd+)6tu;t0X z)(WeiCFkJcZsR?DD$ItZ%79hqLZ|t-ZE|z>h3!Dr$?(VSq6c@Prc+ppQZ*_qxC@gn(cR=^Xey~?{*0`CIw;hA-#tp%Gp|cb`>8>`g zi$JN2<#4Frm4G(`b_|BQi#^+vnmvVkv1Hw7&hEBe(2hR*21xg$`4U=q!)T}+Yd~k~ zL!FCjmlsi>usv(Sp~8+1+pIT@FO(Qch~{Ds2OK(4G|&=ta-n#^^fWfBFSiDC?O*}x zy85_yJK1=8+Q1r7hFQU~g8kRW&Kp(>V+l)+=vl48P%I@8}$D`2V(uts~TFuQiZLtbIjuw zs#!0VeCJTk9h4c97*=%F5c052^LFQrG4L0TqOh;*WRGF=f@NYxf#oS_CS@jVCSxXR zCTAvZra))RA)Y9jDVZsoshFwK8MF;-AC_WQb+Z{X#_|oYK3J-M^femnY$~KypsEV}t|4iLr^Oc!n#gc10NQAu5uwm9dSnow0+lld+4jo55u4VffQw z(J+F{6rq|9PTn+Lk7szYBpv=}8*>HP28Ae5MgH7EgWW>O%iG4=$;;cx4yym3wYU~! zXhF9`RL2G~ z#&PEj!nR&+G%p)s^7Nv4(dn>5F&uewm5nFSX`r=EBx@PrfA#~yRzvxRGCU2~2Viev z$qU;VFfvs7`lD?T&a5AKT9;b7(7dONa@@H?5Ic zbS^H?On*KY^#0P`62^A$&g1!1M`tFOLZf&bxmZFSe)i|lV9n3e`-QXDR##O~M+B`} z>Gat7k1C}L4E>Br0c^&c|Dt1H!aIfxzhmrW>|^X_9AE@=b_W^8Wrh#qhB@PA_lC^p zV#%0euEhK)tG@!`D>xj>nA+G9^N*w@#{1!BhZ?6L!=$ruE@UiXm<2HwGnO(e7|R&T z%^54q8J6aZm5f!aK5(o-6F-gaMr3FY(M*t;3`?nN^5hK=-Mab!WD!fwz{SW~6fu|9~XGE0erWH>1#GN65jSbV+#-!sIJ#e76#lzZ~yoHF4(-v9%YG92T$}-2gt7 zwRCcYiomES z#GR{$^oqq>T!iE<%g&hiS%@qCXGN}G$(sMnvDa*Jv2v8cOMUkz2N|@z<&e- zYBbVx2muOr3N#x4=P_A0r%BV%klL;PSrM?Q7Y`$%X_3a87g_SQJ`g`Uxxoe_0BE}qN1GprE^W!XDn8piJ%qZNj4-!d$GDir&xT`PzVuXv&*W;2KrI%g2_N8`SDkr&h`YUPh3G zs6QeMks@4tm`Stx64??fM5yCd7;OhvI8ZZEo_2N~G#eKWh7BoF+Q|*7YR*!I>r{Jp zR~j*A)7fU@STda{skLdumCn#Lqwo+rp(@P{t|tiWs|d#fE*BgM;Cd1+Ii28|j<5)s z1`UYrG(Zh1NVGvWcbXksF2E7MoF(P$?gEnxoL*Wp*l;2o)@HH#5ld-0oSQvhT7rR> zAyIZJ(*g+&bhp1{phU5@!Z%f~?+#TmMta z<*=_p2f-#bOE7nCr1C<0*x+DkFD8QAoLQkdwiVj#LuWgC5jx2)?2P8+yc46TbPjy(Q@EYL~-!Bwm@cO}n~MIi2E>r^6~A*fS8pIGY8nl)tx+xaS&nMSjP_2Ec|3ua<~L+J=~O0*r7Kc14<2~QW%=eZ4R6MbuU zsJ_HQ63$8x6#Go=S|0un}xazFWc+H99QRXYCHq*lA_yX||pOk?lT7wLB=&rrNl@-d5Uxbeqp)%E5M)@4dn z)pb8kZj=13=IZGz!}#(_Bj&*n3A3p#xx*L={XO5k+vKqS*sbqU>4n40n?9rtnP{=4 zuhY)n*F08^DtHldGi8wJh&!@Bjyb<*k9;`$(t9h{W^<*sq#$4OoZXW-c{8>RH`W>b zdC$-j!{)^VdmlVy5;rE<;f&UG!&U6q(W-C$88muxO{{aqc#SV%FQ%-m)sC;)5}I9{ z-EeSRc^lSS;=;9Io3CH{nk!vAc75UDI-RmZM=Uev>4eSJ8R_)tf=8>{t6#o%?9{7X zn}0P@n)WO=mX+M-*l0R=ecY7NGa2fObBYqG`Gk))ZxrfYytYu)U1ogCw#s_YX@_LVwAM1ysuPQ^+df^Wz^WM3 zXteOTeRQ>+hSIc?{T$4a!z-CXpKknW)$-LP@ub=P>=?Dtx$#eozDzu|{!%}7aB)ZI z66HBh?~gfIt>3Xj>YCi3Ic=Mky&3SqsE_-b*nt_#&+bb$&E1}BAXn;Dc-r^r_SbJm z?2c~@X~XVW|8NZPX(^S#IRkdic)p-)SQz!?`9t>o#&(QY&Kjw5KKi9!KX;!OhS9H+ z=33>2+*Ud+(;W2D&99fuoPWwk&WM$Z+Y#Jnv%>Fv_a$6|?1yBZDt_Hs@$M2;I>T>JL=7FOGpBpMNzmGpzy0Lg$_L|OGUC!+UTvXRtg1K! z)T}f%nXmLaCV#hKPUIMml(;)<%ZwilOg=GmYseFjHGTB6FY1jtxy)(J{aYlGgSSZN zue7 zGU|Rl^4mP2xp8OcW9Pimz$teW!?sl&j8kR z(L%j&uT1ZPpFal;F7UFtWfgUbqO$pJ*|yga|GXKya;WuuorIN9H9F@9wF)4RiFG`0co!;g$J7*&y|?bHb-^#)yH$~s0j`>k^_%SQj$5UGIE_w?7$I^d)IQ> zjp-9cKA@>M4gb2uPgj4JL(o+1)vIQ$+n}AkZX7o4zI^M9f*K>O98Se#rE6~uewemg zu-MSc=Fal7i{#ahS+}%Ze|pjX>8&drdeZLOlEUw=FWp&tFQ9Ip)WKEzme!w&DT-h< z-S_I#aNkWnXoh-?Ym(a(?ffSF=q+Qv^n3o%R=IS>gOr6OpW>)9H><2_G(Ql>T6!sR z?UHl9%Vwrm{5+QME~Y@Xkh4D^tw?!dw|2$ADd3(6)>@7ciCGwR(S({;ZsU)e>tcFUAZ$pTJfUPWW%=wY_Bm3ohoU^CoJ*wy}wN|WP#e? zYnn03CIk;q=&(A_Zyz&a$idxx$I5-Qd>T@?^TJ8*5PP@H1=>edCeEn)+@NhojrJWg z>7Fw+B=_1umi54sp}jvHSYP=*W*6(n!~JbE8ntiB(Y{LZpS*I6 zmSoK<$sd(4Ltk?8m&r1eW0&8*Tf}Vj4e=cH;P~4kZ@2HcZ9Y5Rd)y+47b8~e-8QpN z&6mfODh-~lvwc554^5iwtodfn(9r4Y6-t?5Yd>7OylA6pQSzSBjp>|{*)i)rwlD5A z+A=%u5u+~PVK3)$Rnye!oragF&u3S7g|^3Bi(5Nol|oA_)vBfJ_Im#TUtdo!n=q&GY4L*@?9uCj-(Qm%zNcS7%Fokx+H=SL_Kg{_{f^Q>E4RvU*v_X0DcKB#iOqDAXMIPUuR9#Ru;l0>saMJM z(Gw0x&2%p-Jo^o9VcuBOK$&*sw<$VTcf=}9KRSMM)9}@s-U0U4!@Jfc zEJ&+yN+^{&c-X>k&bKoaDu=Zg&2l%?q~BX8zA2h9Bzo#2L(i2S<0!{WJxg+Lq@uim$#pEqrP5QP(xx()8YW&${4$*4&%UF>QX_=;9;4 zxpdb^#i?gE4SJItRp+4N?$|5mrIwUwd&tH)myT*&zE>M~&w5JQ&yB%)Ma30aS?gtu zZ&mnOcv1@k%#znQSh>=q7KR<#y>9co(%vD<_eG}doH{e;dEcidSm`)ctXZ?N#OEUt zpN_8mW#Sfe|9)IZRNu=ZACJ{}cwz2lzc*J~HeI9dU%J=T>w3+2{Y4KVXZsn?iO=10 zoA%mqmdDoYJJIY6`MMDmss4{Eilf%8h|SlTACQLUSyj7T)h}7`Bz)FkpRsMew>p|; z4ZF1Mw^jdNx;M<7Hl?_FuGxHel;qRYMych)$IdaDWVCK3n3G|Gr%jDE7@txwZ~h!c z+ywP&ZXv#t`!`EP=f_?Q;Ouw&5d8GPo%YME*H7$TwdI`xhv9YpfZM>UADfICYMtD( zCcQD}V_rL0qa-^p(rv|z&>s!2hNj*&nTpk#=5;g-G4k|WBRfiN#3WM<<$}W2ikCs{ zald95Ok76^ZE_CWEcck!Uot2+|Fxp7xq-35;Ad*9C~NI^L_d*8lu{osQK$FVf#Ys( z57XH8R9*!K8DVoZCzktu*VrnL-DA%)I`sM0w!T^i+Iq`PuIP}>0n6(~&n7@LOA796 zvdsd0SPBdqFPfu`4_pt!1)vP?jt^peY@OFb^vMsil)31+b9u`)ci!;FQ@D-g<_QB(LEw6h0O4?1!C!IlKsoUX!8L)zA^0_$)!hvz0WssILG_ko*eW?FC~i@S{No|zUG@Jg+kPDZ?2%*U`4UFZ(p zaP{g0_kvhTp2S`c3dwaY&7q&5YrlvkkCx?vE5l?0Y@cEHih!2HfD%^5vO6*K^!~3gLUtw=Z zicvT_`255c8u|L&86d>K3%Blf@%jbz1Wbp_uk8uAPvH-lm$(^5Q` zD3cCQ5LN;Ak{6&b7`(AGFLrtfTUK*&4mN-+KX^0fOfFJ>tJ3kC^0QL0<%ofxOiO@* zqV&(ZQ>>%4J_}1m3pL2<|BcETC{*pV+o?YFLik0pG?xtt!I$BL*&=!RJ@+jMwa>q-Lh)pVCf!`el^Ed59$K*hNjKFwHUMWIgeQL`TKOnYjjdHju64nfK z0VpWS{sFq1v@UNd!Nwp4E;SOHUt>dE`r>&UZ0;ZY=6wEso$;uAN%m(dw@L!F89MD3 zeTuQcfAG)c+b>Fg%3ZA=PxmKdsC;Q^wTfr{cq>Cf~WEEn+m6NctV9v^>eox(+gvYoG!{RglF zCwmCA2v6*H{n6D<6U#$!E&pxj4u-tL7ccQ{ZXQ~W-#U6{Qq83re4u*oAunb38Cu#+X*J89%A5dT2qqXzx} zILRA5E1mZlSS^jbNC~b3DcY|FU#_SEKTVz-{k5S5+;p4Wek1ue7$<#sh)HuRu!#Kh z;z6$_Fg;f-uvDcS3@F|G>|oy?z=7TJ_^GQT_UN3meB@dRHc;}dn?gk8%u;km@2i&DeVpoH#T1UQD#Z>n&W{hSI=F;(Q>X?4bMiYjnHrMm))A`$@ogQ_*kJ$M#jC8(2{Pe&#?EDYG_y^Q>j)hSI0NWWS;s{6jpQze7j$luKKRrdx z&IGnG({j#$YCs}MnE>ZkjJ~IldKH-=ySw5!=E(*duQZjt~i^AB9hM$$s6|=Fv zqz4oW@!_8BlC%B%v(qqFp1QnWunwT0DE$+R19INX%fxOV1}>%Z`Tqj<)LK$F)WXyW z!{4kd#5!MV?aW0ixTn`BS4WNC5IQ<)O(u4UEMME6U;m=?f8bhN*l|1=3n24ztoizj z;+OvP*=kgM5{9NVUN*D&{G#CAzPmbS&W9CZtI6{5m167fd}lh}r*+1i>->DkoOl%Zl z;8H`e`PaX73XM=I!u0;&U%=-VgnOi9VH=lzrjAGt%-b4JkIf)GpfJt&_KVWLw&wHt z%?4H2R5Cwh5nsM2e#-7c(Gw>KhYJYaA-G2xKO4N#Yp8SGN6Y`3B5R;eme1_6AN30a z_ePV`Q8?5y9y$h>{f8Nl@`Wkn#kQZwZr?j!j6D3qC_uy$mq`1$sicYdmvnW(EkU@4 z<`I}UxL~U~V66zbd_koR(<=B6JPglZGMfHPd$ENgE&mZqVY3-H!ixP0a&eZcT zs5uDkUf@Zs)64%_?QUe)Mq5Ql+o<;b=TT75nVdq(c-5!=P?YHAgJ8&+E;V1k|val5Kj(btk~!7-RBSw6>uFJBPq zkyhYb&z|(e1=?BdS=b;le}wo@51-nhyxK4YTZR-SD6r(q7d6y;xi_voCM*+s^#{Kb zpZ_mVkL%gH8Ll_|Y;pl6LDpZ*gFjUM#k0qSdWJjqei^wY1B)fg=X600QTi*)UR8a# zAsGuJ^Hc2k`itU!IPJ5(ZfYW?^#?zd&;J*w#~3pxi<&>LAyH5LBW6J6XNvFHlcfAU zxuo?~k3Ca)#*5fX_g4sos)A6D)PL|^i9e}l2Tu10k?6pp$@*(|mA@d=Bema|+wJSA zwRMS#70O?*Q)GVJ7NFo?pkBo8y*1SNS19`jtt-VAAOaTKGY$=#ugpDWj zGmQZXisCnFa;|-^oR1+$z~h_8=NE)}B&ZO!+E4=62j7mdU-JnwM*<1TTp_mpnFCo` z8hzelreuD689+f%`fD@N%{k$pF*MHb_y+U&1)&~Va>I07&zAT5%{Mt$&{6y)$nE8BQSY0hfDp#n01|ZZRE>Zk`)*r3}j{hpuGl=RH zErm#OJRUsg4YWNyjOCWf7?8>ZO4a1sFG@a6*}~{uP=k^B73BXfJcHVQhq_`yz3UDc zImyRe!7ab`;VxHlaHl;HHy4(a;Qlr(Ub=6q@d;C!8y%w?@bhO=j@m3N!#^skMQ!Sv zhI90F;~w`Z!|hH4mC5hAk8e6rzjm5N9vC0qxVpkPF&>retM#u}@4xWs-M_r-SJAnz zJ%)PcZ^*B+pY{=ZPlkFaVnIDHrZF`(|Mh&gCk0tpJ{juaVnIC*qmj>J?HjY*yg11i zu}jXsf_IADRl!`#n+eMC*mN?#wHH8niR$X-bGLc<{EEe{AO?b3cR>v_1e1nHlu~WO zqCBd$+T)=zeuNWgl+-~LOoJ5=X&*I zBe(2Xe(ops7&RDzavb>mM-b|f>{sg_Q%&_aJpK6EkSZ*Y%uf-^t7oz7Of^;a-jyiH zZ zWep}FfdplCd9?(g9;yB{)gdH{ky`pm*5hY4sEyj0DE+Nw_x^n9M@zR&V5E*He#-Zu z+3Bf3s8>u7>X9HF7wVojd$h6Xy~h8wO0T`|BizXaveN1wy6@IBELzMQ(J@I{V~_4=4S!^eA$gV_hplC+5~xAW%yfs z>8fFC$0_B2=S_5@ek%FE>{{Gwg_=aLuYT5?YgccA_+7W0_W6GT|D;YjYMoyT^qRHD zRM4xyp^4+Gr1rFc^*WQ&HlO$bj1GtT`=osZ*N(n^ZFlAqsIhxsIrU-=(A==5y5V>u zxa~Z5tY2{(h_XnjajI(ui>Lg$@_CUYrU(jte>yFTMI5$XQ?pwdlQ5IIxZsftHrw?= z^<>Qw{O?8$>4?O__CM(7#aczkqs*vp!bMpVc#{M{GcHhVAv5 z>r&JW>Y=WR5FqMeXb6DMEf)EIjrFkIXrNk|g54%Ppy1m8;zvU(8e&iwKA$-@;!bRS zn}p3Lja1AVF>vVyv4`3xr+3X!8yeT+_l1Io&CRpyNXO9okUTZ?08-~KP>&LKGlsg@ zCHnVdwU^jTvi(divGotL7}Bw__w#O03&|HXR3^Hulb;g#vfBm};{OZO10XG*D*qxo zBYZ&tHjxa~8GHEp3wrfP?`x@H^9y4~-CZ`{X+;{gn{0oBFTeam>A!y9b=|i^acm7z zn4n;l*!=EicAC96Ou|NyQv2gvkqM1eFir{$J6Vj~CCkUV-ggtFzv~6J zq@-!t7=z4DSu3{wPvZB51m66BnUMLJU0xwkP%mns&&!aN?LC2d-1qgusr8yJZPnO* zvivaN@9UAu&turbCTjVCS*j2Em0`|ge&$+!`w>-sGqHXi^EVe@#$3 zo(w3#Mw0oN1^@*SgzDcJfyN;i#GQ`Gf#3ky!>XR;7~0_EC7UC*{=vP9DO2`j3Vq*F zl>T^w>qgJ|N}*mQ1ntlUE(-JX+zs0Us?YoR9eO)H=6 z{-ehmplXK?JnEZ2O9LMYS}Reuz`qhs%1Hn7F?>K*#E2@E-U8raNCyxc$PEv_b4L3wba67PsiojGKC z174eYQp2jO5&wK-rpu!j#ki#=Wwl05DgHEwasQW81+G}8*cve7Eq~sj%nR3tOGZ^`y|}b zZ3CkM>cRDqa}BcFaX`VJ;JKOg>LJq3$j`|;|FUq3;L zFfHi_b5rt9{?_IR3g@ zT-S~Tp+ChNa0&bo#XtYmgn+h@`Aj|RXgF<`Pkv9{)x)t9rPP|E zS8ui&XY|;s_d_bMV261kwwtU!P6H??%KnEH7S{$`&+O)vLHvJ#dfcNKNnbQu$`cAO zDqPJ@)D-v)l}yv=nuN8b}>Ms7LBQnY-q6P}x3p%6*Uhz#!UW5-Dj=1G>%TrYO{9o#zBzH&ew zW=Q78EdUCNs{iVyVu@VukC>oW3-Jp=J$>|mOF7hsgTh|Eva0G5Q!pKQ6D zC$#=V@dNqm$NVqUVI&9xHPZoH_23VJb_nXBB{zJ;^=bw7KIAKHC~ttC^Z2@smIy*U zd6Y3+Y7-my>$R7}XO5s4pmO3p?(!;%(tli|f7Hk9@*YDi?R#0%WBRmUy}R^+cas4M z3VQX>JOZC{H*obVHk>^>sRJXG3w$L025#6oog>Rzg+pyo_ER|jD^M@VJET3}+*bUF zeqywO|6AP8YKqG*<6`_Pi@CdRLOC8CJi34JhEI5q(w=j#@4dr)D~`K7QF)2q^k_5c zvoQ~MFOu-t|1JVlex4S#*T%bi_9>&(h`X@$|3YzB|kQ>dZROC-UKu_*oSoe)!|| zXZu^gth>FuKkoVs7Dqqv=L~KGJ53ZvxMj&;O_VXc8DHeFH#aoS+p91JOGE+*8pZ%9C~BxVzSI1t&x0433u55Xb$tH6Ks~Nk@3;Dq zTlX&IVKd249T)D^L+t^I$@EhGKsCO5&fv|96zm|`{xCnjd{O#)O#ImU*N|i^fvb1t z4^w>mgUUD7Dp)?plF0mc7yn?(gz z_N!^>9E+9xfB`Z;r3>l^LOoJ3b2o4&I<3id$@zvwk@<14pdNPvH?ua0mcPA6UOnyx zZu@7eFFTWpF|$A9FXx*h%Ko=~rk~bnPwjRJ7G)=jKSx(PE>)pOY^X-ZPr=NL$;Bnad)*Mwz zw__Pde?h26;t!s3Pzu_PD*^h4JhxkS5Zyh}8a$;Ux&o^*S9=^Z%VM8~lhU#As z>X9IE@CR!JfZzQ+?ZwJtJ@?9W3PSesf?dZ#MA?t@6vi*6SI^M&;p>X=`+$0`>zU(~ z8Tik^_18|6mE&qlr8bP8UWX@}S5ej&H{dc0^`}Xn$j2w^4IR3{y##mGc3d6oU5>AD zcevR0{vEz;N`3ApuNy#1{zK5%_?I|SbE;kG*mwAl8(|kMmG6U+KBZLp%#UE}8P?wM zW|^RE_p5no9<`t@Zk6()&{j~?lymn3s{wp%SuKgrsRUShT!*=N29TjGJl~R82ZH9i zu04F>8+iLRw0v7j3&@rlQQ;n{h5(wNQWrcGmJNn)cUPklOi zqc>(?*%9dJSA+jNa|~O0xgHPf<(hfYyz1X$8UKY>Pdz5&_b|_z9zi|ggDcb5dR-_V zmXGC<=5Wdl;a)w`s*rmuVS``dojv*)*n2WmNI3vd7eHPJG{m4Ve6E(gpeFW>a=V^Z zK_W(cSCfBv_yBY-s;eo7{$z7f<9d9oVM}_-F!wugm@HYowFh6mAk-rz)u2)Zgkm{l0Uoo*a|sLHV@_m#ApQoV(>Bwzy1WhdZhl( z^$LZL^K?A8@I#MZz}SCgcMdi-TPV~ORsWhE#om>9?}ffAiM&;41D8~O+WGi5s!_mg zW8>Ffx@`cX?kr0Fhv&uXX(1Keyn?7WMe*~}jS_kSRS@c-B{$^bdi8WIT<%>OCuyLA z1afKDCWj!@BS9s6&xl@N=Jw)@VT-|$V+}nXeUGz(k+y@T7>o})RNMo^2wJS$8NnmkS+v!VJNMY8e=Br3FCg9Jd z1J8O5*TD25cDmbqRmE)em+g#7*Zg+_S3Ic4{WjhWlQR=0CVUbb>LJJlyNZR>(z4j& zebe_I*_ht#SOpq65CTL)3~8uj?no<%HHh<;e8j{t42=y0qnbBB{3tz=OG*avv!(Gf zj5(lzieqaL13{VMzpKT(;xSPV2=Pi?v*=VLy9g zBvwV%pW^~hP}EQntRjCdLMBS+297BH1q+)ihrPdp6(fNJ#Ww*I6oh)D^5;U;EhfDu z$ZSf-PLm!`Jht-r1>e;}^$GL$km^@PjkKtnyY+H1_7n*uDCjx?DoX$1177=TeSF#D zV;L*IRWQq1)5Si4OM)ugrAoqq>)I*QS6C5Q|FEv|6QzISplYh|=EQDZ5tN-M{?E%& zlc{P+-Mos3Ul8h%^yfk?sRfOXspa+96LS4AxAUQzAk-t*-)mM`?4!LEZBfz%m>!v* z;sua=L8ynu1Ni*=-t}d%!Fo5g-a9M&T~Sf?<7JZ$$Sry#)T=Ivzh6KMGxWtXp~sR0 zp&lBCp#EwOw@axmn?EjC9+8jzKn4?3Tl`~HcfagZ%Cyeu29;5EqV%8X?lEFq);r7^ zF>t9BKtTj;&;~83nz&xw%O%@q6n<(IdaOne>XGc{0_3qH^*YqBnLgr^K1Slwqzx+z`z+6L)(E1Yv^*)?-PA^rU7>q{(2?|eeqjC}@ zU&9M$Jqv8bM*qQY2vATIKWJ8TWymq3z2{_WX&&KGG0zCat%0vC) za{TPA^5urB>hb+;74r@o{=g%KRz@37tHZ}N>wlGe#lbz=uZCyMX~dh`BBmVd&B1S7 zTU32)MLOPcBxIb)p+Y>x?Y4RGhmZK#*xLMQ>5uU?!w=TF?7O(lp4I@mM*%)mH^#)+ z>@sN3k4PG-Rtc;W3k*)i<$@XBDq42~Q*pMF@#UY3OYxIuOd6VxJi=F>w$qHUDFPVf z*rAv1SwQ2wQ+0$>6{wyQiGjdx;6l^ptPj^3Kx%ZjLvcH&x|P?J^bY3V; z%9u+z)K|@I29!5(-9Gk$vJ<8MY~LrRrW)dyEjeFi*HABtf1WZsQ1Ze9p&MwTpkB+e zBM&DVy%Xw{6oh&tHPt+3PPTVrt2FiFv;c8gu~Sh*duQ0&Yv*h8@Q-H5-EFW ze4}p1?)+l-mSb1R{LHTJA__u1l6>y5j`N(lY54N@*p@%|#XnY6H)+;|fq{7#jm(d) z1}G@1`~pksGqzn$6nZS;FHn!Ef24uRn%g&O#L!PYK9*oKDOzUrG)_0Kpt$;5Y)JI- z(P|L7)Dwhys6FuV=TPTH8=Goemg{z`0X1hqs7D&+t#`^-Q3tQknk;cz!T`-LynOY5 zxcV~-rX-FV_C@H&Vno#+2rsRcE^QQQzbJm5o)Pnry}WSO2JBy;){w66+Eu^G*se<7 z&c?4T)HrT@Egi3KtuwiiRD^5S_BCBMs0@$n9dfU*vKC+crfJW7I}R?Hv*A1=Jr1`R z|2*T{v}`Akt*2vhNG0!cTN9pn2P5`<3cNk__~t`k`-b@E94D>O_14-`XV-2uhl$Y z2mB5lsn1a&#DZ3!*Q6)0u|ADg^}-V|88WmQ!{kp*XedWRGAS9?3)SowyJpd=mzX`d zZ-6a;*F{7hW6;a!A29#(lO7LkwMBUm~z;LNGEaII-^ zJ^p0xCc_osYo|WPE|M~)aO?mIiW*v&@&iVxj6E;Z;V%kW=`G9CeSaVedrP)oZ6{y8 zAhaSS<3i(?D`I9&H+|LP6RY3thAG~;cVFmttVG$bW3_d+8xFn#&$SCsrIt!tDo4#u(1NFYIhD?mX}{5QNdm!997gB?K( z1jTIt3Zki06Ge1Uj~A^8)S2U^&sIkFxksz?B+q<=>If&9fN4?dgZQ^^OUU*Ub-&Y%0ooFt=di|l7E?<8^ zXobd8m^`!nWixfYW=)uBc9YN(!-CL?)c!pxPf&pL_4@w1ZWeX(LW^rZ)9vSiG1w~% zZOHS?n8w#%l>K4T{D=OU{|-ar6pw!dpI^|6Mw-948^#`Ht|5!mzhR?zjGe!RV)x&D zljlsd-0=m|{(~O?6cnXDWBtYvvTudI+xHh}r_}}RzW-R7l{IJQ4%~3b`vLZg(r~Z2 zfo+#{KH_d$KhAukT!#0X-V_oV*?{*MHu=VT+Y3T|2X6a5DJE#iCc;3~wMa zQg@t{Mln#1YFZiby%AWhYnVGRv>KSrz8Jl7!4u%-n7CLjqy*2MZ96!_@ExfCH0PA* zf?7}-`gY`#(rQ3YYbh>|{|0W-{pF6FXa~37j6CVn{1wzlJWq`+r~#bDVV0}*{sMus zIaEb+39POC_~Ow~zaUh+J0yRD>i;KRwEvxdr%f&%fq zNWtHlEjd>o_xS7LYvpF?q)v##rjg|{7XcI$gnA_XnH1HMSWEL$J71oP#jc~Vkzn-L zCiYOXJ|`wS{m27MiOe72!Iv*;s7X;7)0%ib0XvTrCMeU6&yR*sbj*XKzqMi37b^2Y zL&eVcEG&sE-+BjMe?h26Y9`>IE+^Jz(BR<6m9H>2GCxOrsGc_MoNoKFSfS-F%6`QK zZ65U>60uoi`N3UKT@dO`A(s>UI{ygQ^y^kL3Nfoc;>Gs@s?unSC1c)WlmFmf$S;3U z`oF2zZ$5ak@PB7O5K0)-WAC>MI$7I*y(G&A41WFn1?pK0v#N?MUp}Jz4eJBuOXkOg zLp@UctJRxI;laASzn<6bIaEVp$<_DY{23Hwf7GI}%TEg3uo8rNr1q~pIN&9wi z+Ufb-PV6GRMAhHZlmf?dM{~QqTP805mY*k$q;BS5mWY8%R{|6ignEJ-=5u$JpL%5S zU8omY5bB|PA=RVp$)onN&2!CbD(x{;r}&>bBoi3dV=r2adUVp(>`bwt9=ZPRt(O41 zEM+cia{JcfV=R_l6B7^Juj}?5KU7Yl>VHJT8|zV2ih(ewE+*8Qa-ho9KY0zPl2o4W zz3&x1RDYDGy<-vnh%>0*Kw~xDZ{Vo`Oy7Fk$a9CjPCyy1W$)kK_%I)Twc9?*q@Wmg zYx4erS*PLZ86PEd>mLGl&yRMF;ivG+FGj6Ozs$iOPdH^iFy|iFOUphQv6%y+X4Uml zwao;jR#!*+FR1}HLh3(hylw!eGd_Q3nN@;xNq2pZizT?SOpNQYjAp!f&wCY9>O0)6 zuhDK?sTv%u2%s*#nhTnp`(>0T)PgCsb$yrMKL8jrpm9k}1<04Cm2XIB1=jI))1;3i zf&(@FD|Gw)2A7O&Z$=*f0m`2^IxpBO^DopB&#T9MH?E^L)?dY-RBWh68Y;N&2Fa#L zosWE(iWQQfTK)lmnmQWV(P|1oH~3s_Yws_y;lEh5(dBWN#UEp}51${UM_rUG#ysRR zRd30ybJ~oU9)DbdF<*L4exJx5pSW5uxo@6LLbT9Nh6+MGlKxz;zdVCrQ1 z)i(3(7d2FTAA(;o34hk(sp938{8UH3Xn4AubXmLc<^KihsVx}Hi9OTUVp{2tj2$BD zAK}O6{|nU94oEDCwP!WVum1U@=NpCx;-W6bJr??IjHvRvt)X4gHsFrX6TgB`Zzidj z;J1CL!G}&UU!kJNv;@5cEX z3=cS}U5Py+*Pn&(cjHh$gy!lc(AlxFUy6@DjLg9%k@*`u`SstYvs^SfDeBKeP}{hO zy1V+@S`hLIbMJi4CHOi%)D%^Iy|u;~%orAd{aG)I0m@Dkf2eCO$rIz>3O#Wu2=z$% zbH8KwtGC&KZ3}ZS90}x720%ek@*^@1URNuBjcr5>1ZA!UC@6|QQ~K(v!l~IpUziYt zdgS^uIw1j86>VGABw2?YKmrMhcYTLY5bBZa&m7kw0i5ZcmZkGbyFq=_97O4Fzpi7> zOt|4KSpP^JQT*(_zkH-Ze+d2GT|uuN$$s9yQ$((FJ6P1v%_}Od{@;(SzhJey0z=~j zPwxppT>W|JbpF^BA}>|MgnIL)mDaw&*W#1<-x_+%BNq?s>k{w9EWtM_j$7$Bz6Q5V z+dRNwW<9Px(^OIFVi|sVwPN_GgT;7lpU3xqPOQbvV#+s#t}Vom)F{Wqr^n+9tX=g7 zx98!z@KcN>+spCDgW2BEKQi$Cm5)3|Exmxxs4;er9hC%_iG!N&y^X`8Bl}-ZdsPBP z7bh6FA9@SsmhbdUt?HiwPH2moOonfh<&~o$|Pq`1EXv`8l&f64Fd8c`p z^_~jQq|#@vqGl5q6#nSt!t8obeo@kW$lY3??I<}Vp-2+D>>+EoWt=Rgv-C;z%LHjG zM<+Z)r%2;psP|t$JuUq^cfSwmIn+x^#Dm__Q?OzZhM_bZ;5YTZK|SKc>cC|qr%xUG zQs{|QG_;VqDC$KX7Z+ZSZO};=H0f>(W`Sb4)KcuJ0&kz1ilR6E)O)H`EW$ zWkv~I)e1sAk{#M%Enj1s$A?!GnZ#mpXlx+_dx-Cq)1<7t+viA3w+#q!^=HdSdT#T7 zhLs=&f@-?}6#NU+)ApM5F*ZTjF+Ocl5*F}>{$fEru2=7Ck>2}Pxlg;jTZQC{Du2Is zj8EbA(LMHx#Y{b9@GJJdP_GOc!qKYxPe{R`YJc@=S=*F^l_7-*YTZ?SqVx|wK1WaW zOgy%N%#T|F6cok(xhYg$%On8{MhskP3s6uH>iyw;hETOx?@D6c-6|}STz}ePLA}gf zx2vfN(g*vj4$sGI|KQ)k?|*-Rde&dYex}9*o%Wo%^)1$j6eg&)8=qg4{cATxZ1)`= zFBIyE;+Hx%RIyAV7h6u2&+ICHL8ynuGnkyI!~PN5@U-TB;>v<r@gnFnvcy_@4yMJl0%bsuD zb_K-MA3WVL=g_LJJsyA5)V$XCUT+fmaT+n9UR2hJIa#b-xaaG6lGoqo;KwJWeR>ke z!CjZGR`47B2`~Nib=#@>8oc%15UeDk2*0#U_e*_NA#Rs#((l~3Dtx1BFG^%&KCbVl zyy73j1iX^c;-xq+4yF)CmkXv8P7I&(F&^M`t1UUBLhzO6VrMC6bHLrAal^hBR)W}H zQtl%|v+=FN+s{-)R0H$zjk;&NTfy@1LyE85%fa%vNZ&7$;jxPTu1Xm;pK#BQuL5>Q z`~+n-;aVQs+Q4JiX-|QA5R(=9|-+8f*{mG_f00l+aKjy>Ou$m6M+s7pk|6ib>rY$!?-1JMnP=1~CU^T}Fj7a9 z{R{FE_5?cRVob!qrPcri1)-ikdcdU|YU|h+p5tX}x}BIs=KN&?mwD?z8};+m4-PIv ziZK>BU#1Pe{R%=o(o%BHU{Sj-M|+m9~B$2mly6jMj?uy85F4b z`S#=gDbzdB+wJ~+t!=pW+HLcPZcM^&&d5G;((wb{_k>^H>n4@>Q~l>Brlfqv2Yc>3 zywRlupKQ@SczeHhcyVmf^##}meEG@Pi)9O6;~KkP?|(VrGDz32G1h#Rg3Ausdrjjo z2S1oRZqUNdvG_BGjLFU6X`nLho6*VjcfjFy^S^!@0Z*`Q95H9Wi8`R`k{Vr~RRzwE zT^;7yryT6H@n-hf^bsg+URW7SZv@MhEi1g=+5$BFVn!COZwA|r8CqX|rG#zo-4qot z=@}^LI{VQncvpX_)l?1k^!~5&|Fjb85@?qL)N*~N_%$ujj$1DE} z^~8gE+znhe=kfNEUyH?tdZex%(XzZU7Nm@KP@a~A<&vlR8^XPMq^`z=Y9F5U=~FTi zdS%Fc1x}Ty3!$kM$t5N8_@(hXwpur4t48wU9>3NPW?ao3to~1r9qL2o* z01661JyQ8|q5j_Z#1FUOckr%|9#HhQ@%aUz9;yDBxvN@YT~+VM?pYb%?Oib>UzGme zv9m`Fmb~aORG)L}o7UW&2p|*@2`fPsxq;6hOR2-u8 ze=@PstbXDPEFK9Ys5JwipeX+1x65zZ{Lbk%)gpdDs3+*vS*KFB!qTh^dxjJy7$byx z^-zC=sXR_;f2O8AT12zPvb*(Pl-*yTUYKcNHT7dkLq$>cmwQ^w zOV&uhZjkvYbbx}Q_?x%tO*fsN-tAp2#4iZ-NcGQoH9R@icjM?(Rr_^FWh=HKmT~J-nt4Ffm!#k*k zI`y2=fJlw}ZXcII_K33I(r{von`Lf~y<*k!qnZP-H`o}m{uCpCf`U+wq(AqQ&2LgA zir%}nbo;oFxcYBvmaAN5+}Q2JGmOrdLnDy7xazzwW@t$mFjHx|)su z{&X?PAnPM;V34oTAX$yiJrUh7^L!(&H!r)&tfm4#(IK~L+{E{I;L~5@i*J6y>uE#6 z4K?54CO`Y98SGBPt$c5_Jd!ae&9*ol_g}nB4L^4ow^D2!;qWjT zR|<)*9kwDLOlVki-Ke=3*bIGWbgU)=Jb$I|P|7zB+%pX}8*qIuI5x6l)IBW@xUqcX zVuoHR=#!Jb`O7;QjDzogG~J>E7=7<+;XSwk*gyWYl6JHb%5Zq zxlH^Jvy<~W_+uvRyow-_&cugr%_N+yATdRi<-8V#%$;qxzcl#k{15T79{zNtA*#yU zdBY!ZI1$GPGMnXm6yCvX9SJhaISvx%1nxbPrN_?n?^n%K9p`x1m41?z%(lHKc4yRC#@`q{^ zjl6oKsfPQm-^PuFJCe61V0mPy7|{h4(bR&vs6uDCK+<4+w>5S_m`Pzih{cGyO;gFwC1K?=4DgwW8w-9=6u;Xs{gXAVPce-@k_^1{d`c2~U6 zkAsMkA3tO-Sl#zUH?Is~1#yYu&z;xenw0-is8>)B>Y;joS$mX*wo+Rae9LBzhp$DG z45Ng3@ata?>XGV?Gi+QQwQc;p&!-=yV$ED_JAatsZ{SG=8T|T|n%Lv-fc;c-IdZZ& zNvKyy5bB}!3^mo>R-8|jwvOEwbNX#JuMRRtRQcCxf`$V49Ya#NKvo?fF8_illoM&@ zZ-suxO%Up#{si;qezN#hvHZE;CAC-t63C^(KW;**f9@ZV0lC&ArPE8W<79r&^~q^L zs3!TUaB-EpV(Zc7EE|6ib9*e`t*U~yKPI?u78+b4$+zbO056R}?w0i_tylc(1#z@wta zFUs{13%z`@gnGpUp&rT?Qmd6=YCzk2YO-!zy3j8Wh|(YBhwSBr#fR#=^bs$F@rw!d zlx_@Nc1+&{ci2rkEe$td_b$wF)V-O7`vi?;1V8(L2M_$@6kSn?ho;WE@gyl9Pcs@f zPlp|kThk(@9<$5Fw}e%B{|dfVE9Z7)C9yKQycS`+-Ve6sAibx0M5)ydSge zK06<57;w3Ld&g%mnLc*vQr%|o8V_T@e)(MHDfD1v}U%Hc+Ii zbP$9Lz4wlk&=ioWQbjpQaKb*Dvu@tJ-?_eXUGI}W7-n~WbKm#O&dhJ`X}dXR(zZ># zalNk;*3dOXYgM0CDD(1y%h->#qILLQ%YW43{G@ ztj0SIdvz0&utHoPNMn<)z{NcVljW;f1Hww?9NZMfIdKG48cryG1)vz}not*mdyKcN zLB`c6)p}7Nb{7G-p~GH)qEuBiEi|=U`=vLAx}C<(=J>pz7!>@T`QoYl#0KEQ;9UF2e``hUJE}Fi5{7m(owJ9Rc)jHCASf$A) zPUO=$q#s?JRyaR}bh8N})x+uUaBSf$aH7?5-YcgBfrH>sc2MPi_LjO%rNS7I4^r#a zhfO+H33ct#e-uZ0aiY8}Yt_c88K`ajiuvITqcO>;9_}%?`=mISm2yVu!RjK+0(XPP z=$Y1!>ec_tvJAtZrL#H)ZOegu-R}0eFI@p6wjG+Lf z4{f>L8VaU}^*Z@nr4%erPt|u)42O~HU+tfo8UcN_9GvWSzBlG$r|lXVS`Flu$0Vvg zD+5oIUdViTT?%AVi{I`y2jbPpEFijT^uVIS>iO*Ndl8^e>OS1std#~ zx4+cqa0}RD8uMbKS{tZ{QV)vr{s9i0Uod^>nIYIsyXL!g#mz9v>&sW4g}>l4@qP^2 z$hv>2o^Vu;=anmdr`DSmkSjFRBYNddc-)!Hu8hJea2$*#a|58315rEV)kD`gAzXSO zm*o|(`25&eVb~5_A4Ie2@yem51yxZL=iu2}-c_*j++Rj(MR^IVKtsto7`g62 z@S-r0r!_CTvU>L&hYzHuD-fw3PED{tFPiSfxUpmF<|u&`xG4XLR1dum+;kQhr?IqE zw!SUew{n(Ro|bl z3J#@_dIhQEf4y|py(TLHn}HPOhRjs}MXB&BRo@!X7Nn5Q6(v$Voc=tooQn90xwF0& z32ga|%I`08^}qxF2DDV%C`s83t76EwSN`RtNc#UG+fGHyiu(b%l4Vjw(iV~?FPJf!L&nU2S zxWdV`$EpQBbx?5FA!T_vxbbZAI@y)EAUX9G{qpoL;JxpR%$B=N;Ecz?~5=1s%TrOZEN(syE*BX4g+u(W&0x>q{0s92tQX z;y4VAeI1~hkGgh5^>9>;CfioeVqaaqr&v1}(?V?v7brtc^>B~z=IWW2?N+Ov-Uh*y4lzL;!BcI0;acNcEPW8@!NBFRSb-tNp^6>xS3g z;XJ>oCsI9JS99KSJ`IdHyXCR>8i+~Z@w8KXJeB^9Hqrgso_rJ-Kt^g(;qUCSpM1JF zh_tKt3)PbvzA}`a5}6RKzMLiC)kWq|$=_kSEUK2X*^eetm=|gT6s5u|z17tnT^&if z*$k2D;Xc)Po2`tJ+v7Ifw^(3V6w?1MRL_um5Sj1fSEu~9q+@dsfUCIIX}ZO`|G{T$rrOW1ApO**DnFWX&`S^b2GT9Sg`|2F z713vzP3F+dV7=t$%v5-gHYV-TrhM4P$hE&6s}kDZuwF2%xeJM1~x8ikB89RGJe7SBseT5!(#fX3b23F z%LU6=o}lBimc;`TcPMW6eOYQ~8c>Z+W6|h<>2Bj zr7>I8a=;*a^T}O}b>Q^4VM^?jAK-C9bYZ?z6R^nW?|5T;-+z~72uJmJ1GsOW^|u6p zc|v>jh+TbS!;8Z!IHwI1@1!tx?1qNdepQa zih!yh2hUr<+cB+R(Ei9EObvH~W@QAB9IC43G+d?dVi;>?mHoRgYKaFH{fYmzJ@XyWglCI1*y(@puQ}%kSC) z@tYa*U9pvT#h9A>{HL;iw7Wuz{&p6|pB&3ghuKqpM5>1q!~UIFN!Qd5lYcWdMqsWN zGKWh3rb(ktFYpN$c?BZa{QlS0qaLEn)svA+Xr(JV!n)9uDuETiNPi;LL-ml85A2)u zKk1gYb|faA%oP}bMR+3B!+BNMi`#?g5;dKR$L&hT_Tn=3KiLAkG;PMfwGF8vPrxv! z-rZWGm_$0)?=Mu3anHVvuJZ~`ozj$p$>8;;0e<^MRsLC7Zx@y^QbeAs2bKE!ZJHTR zI#-QI^-z1nQCDYAVHLe|rU%VCTKEB6^y$!TE}NcPBzOL7Lz4j2Lgi0oziNJ9?%WSw zvFZQFzf}N5sqn7z$}gSeoY{=;J~&uD{x4oVdE@jZI>RM%(2Mi+BF~kjiJRo@Uf@ic z|5W-j{4eQRDX>YYx{y?_Pt3E~)1xne?`2bFD%eB#s(HO~%Icv3$eFo+@o#TCbwy%rvtO}en8(sP? zyA;^!x_rMDkPoKZb=NT&_#EDe8MFD(VmA1FZSc>1=Ndt9_Rcwb)apQLjFm%54Ni;%cLxq<;gWXV0!!^Zo~nxH$RM(H-L0sfzk%Ewfv}HD*g9 zBShj~swW)P;|<{23>y#`ay>_As)uXpHRs-`Vs&1Ue5v9dA@ml?ct)7j`!mZq-(g>6 zfIl`2wGG@lF4N=HqM~|nCUXY8zwJjl07F&Pg8bIL2m5?P9zc=m>b3Q9^E=WNWTtN@uhATi+2Zj`;pZx@ zl203Xb6qgD5syz3o+`T5YXR-pc+wTrM5>1?Ki6f-pXsNcKOPln6-qkSiK_g-$VRdK z`97pxVJdugaYT{M^+8=T*<3yKB~^J}w-k%Ag)(mi?M7?)JwwehvFrH!XY_b=iBu1l zf4mjEgRi_-opvS#>yO7<^(^b5vVY>`4R^OChKsx`NwMkKEQO!so3;Ff>Ph?_BLUt{ z{&}jSEfc$huNPpC&{R+AcW(Qe$~5dE9uM^Zic;Cn)EPEbJ~@Dt>JX_OF8|YB#|#5z z_YHGK-{=>dd3kPy$P*YCo@$GK z$(E5)B_h?ssmWWGp|kMsRFNwGQF{2-7rzCvaQOdvuAbRJkN8TfOECA=oGFD@lA*EE z;~LAVe7Iy~YxLK&a+qE7`e(gi72J1w#N!jDY#83Es=UuJ6IP#_YjdYmfIIs;4=j7w1jGkM4qWv~97`~| zchKDVJM`N9{MGM;9WZW8m!{;Rntumy{{yNwe*UCJZG$XPs2;f0RK>bJ)2VW-dAQI6 zxHziE+iXSKTPuEnsUPV89BP_S6NCGDW4dnhW&!nA9U7Yhu#KqBaYL{LpeR*UV~?~y z;uPvFumVk3{LIli`+E=Y5ojtAo=El3bxzoru&#_0M zL*XNlH;XBrsy_Ji)i>CCynHq})gyX!x8^sEK6onv%fPpFGMD&G{a?I#I(oZkz%p0r z_>{E}<1T^c{}^Zac&hR{epcebHvLd+GafH>7NE4K@KT9e->*>fAss*=Qa$t@a?*!Q zXA0>yZ;oH+Jt;z9T0c^UO8%nY5m}Lw0x?~rFgFCkpOz5uR=j=8MnSJ4N{))^4IU9g z2U`k-9>B%f&)XO8)y8$pHx13iERexm!SX^;y+()26|6-XOIPLfPs6y|9Pob*!k6E@ zuE13Zfx!Y)1Eock|C18;EKE{L#pdDVr}cQn|3dXRl~x=Sv?Sd+dLUQev>lWjmHev- ztc{xlbe%1ZvumZnv3o7osp#ZM^%1EPh@X*yl1Z6 ze~agO^&YGJ*b|-c% zu?viaV^u1r8k)w#uG1%UPqV*(;N7!6kB;{P)njYzivk|P1jof^_Fl{b_dTW@8ladB zmT!Ol`OuCOAiYm|-GNat;L_qZOCpD-z(kK((+kUzLED*ByDQ?=;G%lu(|Ayn?t$oR4>Gg0K0lcOTi&SP72I(c10-R2R@?G-(gT zIrrHL0c@7R_YZ2d)&ba4e5LU+;p2&%4>uCzAg=$7rDO9oE~nlPb3g)l;Q>CLs?M3} z%^o$U(?_7qL&;I$S9T5cJhI=L^n{3Tdxw2S26IDb08o_Z-Xn6p)clTy-lnnGY8)A-$(Zvw8%U@IYy@(gu4zLK8i4d6dhxDh)Ke1N>wC~P@q?2xloDaQ^obbK|Bl z1ER4Zc>NoDMuMs2yPvqy@cBguwhk%G4dG^hqEvXdS7n6{tRt~F1mJ~7`FJAd!|BiN zbGMVe;e&qq^WznmHy$rTz7!Y785-r^(|}5AV#YMzLd^Ofc=BCWaP?Q-R_qnsU$(tq z)Lp@mV3Zt{{l!N2lRNyQMBZT1OniN)>hx^v6cWe{!ES(}M9zn6zZ{PLtfxyTjM}qU zzDz(qdL5|rmw)qRtebBPhU!VT-06JzRCxAt$Ih*cOww-_qI(au!yGv}muLN?pO%v@ zG5A%F$#5?8{CY;BiJT9$XPjr#`b8A74E8$8h0wkT9GijCqSBvnJyGo9$x4y!|1BOh z%3;PQf&54EiJTAj{_;*p(;05^vNEY!Ape9dKiUo5D_cVoL>|fIbb}Tvyt@Ft`Q+#h zkp4oBnAN)4Bc}ov9W3uaIjqP{NzLV za72!(o@qn^tj#?xI|Q=fUE5i*6(eKeu>GBFPcPU2m5slj7?*}Y)y=ctj7sx?D<{0n zko{Z)>nHxI`dt!T`w_o`ufql$`w+RT#OXO88A zPqZ=*^@FMKN^rPh$%+!NOf>%U7s*&>EL61!()R`L_v4QM{3b9CVU$Vl!4{Z!8T zC`k4vTzm9~>H$u@M6WFBvQV7!D)B~gt4)+7%~oboy7y2O$KiRC=E^r)H9gIOLhq%F zUSUosPJ1K{M!e`Z;(;3Zu@|UoLRA#k)be(1kysO)J;f$~^n@U)rlxkg&&?Cw{+I$z zF&a}1pePl7z~K)ong)q}7-zSf-=w=HpwCg%72uj`<|uj?tKaPXhJ8)Lu%r04LbYeo zBkEcaWkdPNkt_eTEs~`&Tf;ou$`8AWD_9!nanDfMFIQ-nX5Hq6)u0UHhTu3rQ7U}G z9Nkg0J^q+I0&qjHAE2n@pMM7Z3DJANiLz<6UG$;rB!<8<-OXid_7gMk$`0_J6!`FZ4A3U0Thtli5%%+F`YDn61GeV%NM#)jxe=a#? ziMmUm$Qulr9Vn9gBIzN})&5bNhSpt-{Bwb*|7neisapppVdIg)+~=V1>w8Wossz&~ z#)-UZk>uKyfp=DxkZy1{`j6g-GU4i9TC=@4P`W&MzH@#hcJy}-u3aSHp|^F?+q50-Q?i8ao~PX9xG57nhAe^9;gwzX^~hT50zWMlzK4t?5< z|8oJSKhM2bm1e%cs}I2G=IKD`!`be9u>Y@tvu59_b*gy0(U>$ z8;_2>xw)$9=&zP3wV(IEmRsg+@jkJzqQ8n^c32S{rWGWo|GpUR*pM=Afl)cMe3%}* ze_<}1Y1#TF+qVf^x%#oS`b0SFRRVGyo+iSaEAe3bk_@=nK6H~-iv?pGckREiItn~->(-xhDnaJGw<{<0DF-!eCmswoVuN9u zEoWp;Z3AY@tD2whqhXONllE_YCXTU-T}CDC>-g7;_7^W2?$w6hUgy$RONF_vZcL}7 zR;Y`{Hw{NOCDI=^&5A4<3bBQ_HlJp7lV3H^Wkd;3I-FP9N>!(T^<}|QWqmt;0WZv- zn#kc_-9Mx^jzM_c&Hu&Ho{|;i5$!9`v?4rJ)sm@g{wy{2naCrL%1t+K%*pe{2IHG{ zc8?bYQBu@&;2z^qQ)s+o@Tny}BClugeBZLAYMC1*gAC?{0H7$9{Rv8O{pwG7k*)_o zO(vo?N+@Ao$fi$pIZ}VXBMPg>Ux&x}<&P*KqDZ*%XU4XM(&tSMp*3z6{qWFdbB|x~ z%5xHV>EuB5=46$jv82=-eUc!GgyP-ivgsZ|hts9nmk!CCRci!eQ;FxN14&&V~RWQf|Jt`gZV-)?uI z&)YQYd-sLzzl;<7@<)^qQ6%E~<2TkveDtAYfn9cxI#l_0yuwrVjI=XCT$ZswMk97~taE zlruXL=pFOC0G2sIlhDFR#~!ADwg)AOgB3y{`>fmUhhh;hZ)U?>ZN)k;sIEjY@OUoh zs8)4t_KpE_Lx;9Mo0Eeka1Zl(+7Km0Njwd4{!tSk4W{puk!vjZaMvhRbbP& zH{Z!0yNKFW?kz84zZGd8;yx)DhPy!n z!f)t>+1|^jeG-Q)$Kz=l07a?vSM8I%@WWI{>V+awJzV`sGh5b0PpXX7ywXvQ1tEdF z@G!rtBT_va)sg>N-9e|-nk{#mk|%IX2r54!)x(uP&Gc+5%go+8Vv$D%>EXvz_UH9& z)!UKtk#y-c72e!gw)wF^8fN$p`+K}d$oq-BVW=JAJkDDWAYOm<{BX`B!)bUtb0t6j zsN|1ZId=;ERR$)5$8(m>q9_%fQC4W-l%7Lc{zR&W(&dEmLp%C_sa1&=o;_?KUAp}j zst1i7n(4~T>mvO0ngo{aqVlKGe^=V>sl|gTuz%LC`9j-2^OG)pj)w4554jvT_VWge zkCYcn_xeG;be2kgl%L4n?)d-NNpeEWjk}z4)4Wz~Gqg?JKD3u{Fx+5n<}!L^5^So9 z=zBFh7uqQv-a6%WED59P zp%+R)Q{$V!b#p#}wPRQLpPHKqo*o?ckg=f(ZnaBu@3hGVHbWWq3ntb8#oezQ&TX#* z`po8Q+1ooobxXjN*6rVbUT=M$;%`5|M#DGitTol(F@5CQqJm#wirjH1bK*PHS?c}5 z`Drzj4!^SC#NdCudT78Da(D)M1<^nQigR8`-hk=h6Bl)Ej1bVr_?RItc5P5FgYPzZ1+S)e)C-NZs|3dY6htJMzU2MPNnlqMx z0NfDh0~DpwU-RO?BQKS$G1R4Z;}!^we{Z&az3BrdEF1~shB8O^cp}xqGKpyQ#MSuBBV>QQo3_Aei~yX1*QsK_T88SD>j zE5GJPx^(z2R8L0LAcvKF%I89ZQy6w1Da^IM@jTytD*YKNQUa`QxM6Bs9Qpw*fc~Yz zZ~s_4ao8IcwiYj+VZ_Iy_X2tKP<`U0pQe@cgMRDicDdif^07cX-mvGG5hB$?^^k+t zG{4_OSAEklegES`jC+Q*DF6q3wXs*a#Z#ol4i2y zf+MjmT*5T=Nr0kM_yZSTOc<*eOG-6}R1fDBWgATW$x>f9v_2{-P2}=JdLnzfRTJ((r?UyI=FRJVQ|$Ks$2V?)H5 zG+$VEKG}cx?_8+A`o?g-@)8(efgQdsnE-3DWmY99y1>~-PwH68rom00zqJ<1Yl6Nz zC#==zp9!MO*s0rPVnCfm_U$F|fuOjtF8zMpJMh)^;hYhg<-puzZQ8C|#h_;U=D4ux z@6hj7|GeS-*}xXG77dF00>-pf8E(8$3J%Jh@r>P60RrvpCTd4`fjfh(AIwa2$^2a37#o9bHCLk4P0%+Q0An%GtZu370SpT;StT*MzDlkt+De=6H@){D3JV zf!vU(E_79`nlwB$HO&v3i^tQ}0u-gHs@Ij%^YT{t2@C)r{9mXZk19wNwEe0d?S~!1 z+wXNwX!&7v{c?6yxnf(8KyC=Ng~rn)3d`IYd@*Ufd}a^TCQ?209(9Xj(=%qwp0>F; z8q>rTERAW-&wnD-L+=9z55hN=uoj(vFuaY%Sq6c}yIv7m|4HUqV`^Jj*zRtH{`?hw z>8RB+=L?~C!m)h3{Z^Ow@`+Rrr$6Ib!#Dc+#VJ8W@|hyfExVJQN3-}EFY+-X-3C{Y z4&V`~9;!zioXq>(y}&l_Gt24@m0{2R z&Oh)y=hPFao)Z2!8k?@MbF^7OMmlDUzYcZz@~P_If}L}=Hx5b_7=T2{QQ@VpoM9V| zNy8)&fEUgLC`zPys6KP<^OgaqkK1byZdN1m0Ialo^X>F?Wu$ZKsPs3#I_miq|6J1c z=Py)`M|C2OZT@Pps!3oO0jdr}s)s9oxOZG1Q0P~ub8$zh098hKD*csuOP2L*$|lvH z3Qwc8{dE}uHJ)42QryGialam_{THf*pBpE>uwiIO{%PQvc4MV&fiKLxaBAL|Wyw(b z!{rUC?#1v?YRHF5r&9QP(6vbE{wZ+U-PEU0J`O5u^|(G@Y7QLh8$2;^c?8tW9KPt? zv=_kgnVgnW>P~R7u~qt`TN=D#ZG1ne&K=y=SKM&*emO9n`=op=hZ4!o-?PYYOm9rN zEi~$*LMiBc7~yg}>l0WqYgWQnyKHc>_vw`rp5+67*-@|TRg<7`+wcjU_Zxs&$d|-l zdY#~4d#w4Ap=@yeoGiogXA1~4T&;9rYzOG2kW?kt)dK2iM_<%W>xZpY2vK{T)%rg| ztwb9z#Wk($ak{1Sxx)${_ZgBbG_}HYEi%mcG4y-;v=aA^h1hl64VwHNe$$99BWi`K znue>Vg|eKOUrk5YapsobyH+nIACI0#dgC7B?JB|!9@YfHd`M3aqpDi2+xMT|eb^7X zj;mNS$N(rxg-<)2=N|Ob1It7JZpi5If}y4uM`_WMnnu>Y=n59Yrj*(S3hXL^sydbY zvP^ZOofmwu0F){>gogl%QsK{+8bz0X3niVlOQcq)JUHpgH}$Kh52k59GTsU?L!>ZI zura^P?)P<`g;m-f8nC+pY`TXvRM zn!pNGq(6~bq3fJbzNo)AkgoChblf;jz>A60p|YPn^6By&{vjfJk-PQD3PbX`<2_v~ zky_!}9~#$-KXr2CglMG%>?O`H8oP&@Qt7{trKLYzKU!ef07{Muzpv<=pRRf|HWdN5 zArk{AN~BikIw!Om+x&^XvfO@MMFkt%fxAHi+xYdDNUczN#(A7aEnnYM`fzqkzQ|t0 zJ(zQ*lVc<425?mN+rQQ_mF!3pIDrhAL-eBI>Mz@4+i&`SeHlSf2TMt*C6)X|7Bhw% zb4(;%c0q+_->^}56PG}0|6iyber}wkmfOh*J{E9o*yS;OZA0PAfXu}+_N2hEcKQAn zEi&QIA^qz5flu(1jjMNkj3xa+nr z2PkCUaNJ@Y1Ac!#xMD{^5gei(FYCOb6n56dsaEP&fjd6(b$+F(p!v|8LYhYe=#}$n z$W703&^NPLR(IKFpsZA+U9+JS+;rZuTX9Saz>+2nJ04R690w>|s@>B9t`8i)p#PP6 z!012qlJTTo|063*|7)rzf1+11eOcJdgxFZ&sUEJX@Xn?GRXizu@|r-b9Nz{?-{3bD zsB1t}59bAIWV~x(Wt6nZdK!9THTbsH!3?0PisT`xhq{jL`<%UM4V0F&U2_*$A&Qct zs%nKbYLh+UUy1BRGm_UnbzLohlq#X75p}V+Y~l?#TIkLiJ}>9JzzN&NP z4+$V0u%*H?WoC{DyXQ%|nG2EXp>p8p=XFoahlQ>1S5dt80c%GJb3>V)!#asn50xhu zf3m5be&A5z@_y?AFfTlw*0Y%fmHztc6%SO%`H8$&tk)#@%EI?Pq|=azR1cLWM_q?+ ziv3`+Om+G(;}DU%IvBWTe6Z^+Qu$Q&8|z7~9M|sy>1HHEs<+@D`OK!jKIAvAz&jUv ziz*EFe%b|o{UuU8q8IG#Vz=uH>eDexJf7JzHxkuXvW@P z(X&F7N`KW)qci4ii6fnxM}?33YFM>z(nr!`F^E(TSO0+ZYH^?ulHlZ_D0qSu%1$EH z!?oXxFS~!zXTHd3lx)f+om)qxf0P(QAyO$#8BPIe$u>Kxl7$ENbA3lR4;ZJ)A#m^)4;@HwE3sN z2sp0k+0%0{3EmtGACH(&45xpxRy?2l3C_7HcCLK$XLxSL>In?#c(^y~&|-;(LYO(s z5%m^_s=fQpAF*r+^tGC^w#Ij@Yv8xUxyQ{Lv>f5yN#uW{$AeSJ)FP#9^5~> zIe&Vd;W}J3^mYGn4RWs?xw%1EpF44elPve_Yfdbj6X-|r{ukT|q}R1aM}lsy;vY!F zwS~K6S&L`EA8k{(V9}@^2Z?^Xed9i5JoGh~mn5`T4_8%So<=4stX1h@pG6Q$N1TqE zy5&89grLia>Jh2VE0<4wYV-XCrqLrjYMM|Nfy47wyh@+wvt)ipfXI7=O0)-`{=GGT zbj1`^RgH?ceQQ#Yk3dt0^heYHQ9X2>6FRsp&tbJq-up3mo-f9Iz4$*V;Rj6bq!oIE zKv#^x2Xp_?0VqnP|4%R0ZJL@d=?P|kp?X$3o_5d&$mAT${spmBxPqZc3pHTM<}QOs zj5pqNGA|UHhsPhe!lxQk`Y#`M_qc?L=qHHv8Z^z!Jlq#6M;XQq8RT9)^geT*0vBFZ z(Wm5`FK~!T7U-%_aZ>4@;!>zHrXYs2tER%kb#%pwhaXAzts+uAq!&kjdGolR^!ajf zV&+RqFc#Nt^kb7RgFy9=^QdOd6dLgK7;tB(S~hkYVYwlLe83ddXU+I4l<>M`iz_3t0~3KcIkDP+0toyaBg{Jj}$7AOJ7i2~dyvS5x+YAq8u76;*}^j|OUI3!3ROJuLS(aUSuzy0b-KedTe57!R!yxLW>kDOZa zHjb2PQR$EJ8QI$%7ry+c@Iq3(YtwGMd7`uvntWB1=y$>&y5GO;{8T>)7K+ygTb>lR>iDlQ8Sx{|2c+mW*@;&lG=OY|Ec=c4!M z#?q^hR_CG)5(A;k3-cYv4iY)*; zUst=YZLv#!RRwx2#{3g!eg!5gTm}U`tOOI@tb7#s;WtRSKi%?*@^`2-G9_ffp(@xp z;?Tk$^#7G*5TebE!!^~7>;09?~0!Pm{E4VToN@H(`vQ55{Hi z{GaAofc~efs#(_!bX#076$Idgn}o&>iQ8lqf6*HojhAnDijOB!J(M3DF}xMO+NT#B z4u0*8UBctjgkSM`amT02N=AX$W}e=E{s3*hd@B2WXd0I;D|!kXHj9!YQayB?6B_ou z(m9keOgR*RW#jP<`g}Z<{#|CVp06i_W3TXd zZ~&kv75+o=;dpBd3V4MP{x4oV-T)RfpEzKRiMN1P0Tm~eeCCCwTgsK!NmndV;Xmp+ zo!w~SMCw%_Qax0kIQD7U4ymOZO@7t7!7dMDBZGNi&%S#^uO88>RV%xD`N*6k%;6vS z1N`zMdiC(toI3!TQDIdW<`aiK#N!$J_;@P&_cpbT7-|(O@`~A@-@rVB9ZCABMx=VE z9pb1f)ojs0r+Z1xcv}}OFt2bLH3zk-z4ZTHmB*h|8w~6)z7Ch zT*E>0$|uUtAB6(jhgC+?;l?Dp2@dHAq>Wb6SJj+@CdP^FX=Mt^=u$8bRa33-Lc1{ylx+ zZ!1i3O)F!yLJeK6K(}UwyYSQsU$xk>Z2HTmF`M6C4a59#H)yWp)CyG@P6&U5y3$?a zW|w)%eZ(%~+uXLEEoG6us7b&*#+w_bu@~fZ-S;J(HcnMFRku#5*^%rcaE>_AlM1i0 z_(fruuNO80wWVC3;c0;IL~2FU-_^!?pGHO$=8OQ`5DHHXYDc`WIeeN$I$%bnf5c^F z_t@_dq;s=~)C!dYM@xr_Aw~4jjX%V$L5VWrza4p70yg{Q&uI5re?Ly;IS7L ziMv5#904dwrN8IK_bbH>ML!%@c7dTX5ZQ`pEik(3K zZU}b^O$`n|ePT1m@Po)(qIQe@LE1GCsTEFt!xF1zy7=AOCfD5au}^sY89mgPNUd;q zcEZ$HR-KF2G`(RNq?@@Asnrs^9L}->cC7C-YpJ6D-+D2K)C#A*SLc9ix~H#}{NsIT zm_P0Y%|ZA9|1}z`T>9z9VYl#juos{x)%*X=G+=vDXD+E1ib$<+&2(cP04c(Qx*siG_TpD?XXO@|FZ!fZ7SGKXaajO>j*fhBlk&&YmRzWfPVCf1aZwdEN0I zFWg_K9e!@y`^ZAt?k{J-(7S0LEB5%pOVCN-szelYn>t8x`KTng`I?n@@|kRSbfwMD zozo&=XLx`5xXm^o^ZSJNxo@N3`9gPAs{e(DlA?|1@y zI;!B^z#vzkdo8FKj7$V%JRRv$-8iaZ@wCoR$N8#;D)C(U=Sj-c791 z*8d44QjN|HdesQ3)>h2b7$b&tp0Eo^%S!}I&&0YLyTZYMC3T8B!;8VR?e8a>>okC` zEvB!GWb;9Jf4!$Omy`nVt9OP({2O?=?7MiT?ho+Qtem}dE@vM(eMLu!s($~@jr+?M z!H7C>Ua7R@#%1(X#YevAtcw%ci-v3JHTNH5(Iv{}C$l3th&637d;@#Pb#9Ode)selYr0F_ZYw#Mx&YY z@l^UpSoxhw9q)y0<0Q!cF}DKrKNUVD;rOxxSstWw6Nyxh=mmSbNxo{mL=5&33FL;% zo&mSNP(82f`fR#+=E;Lb`=G!yaMbis=^rstA^K^4FzG=_RQT1>R^pmV;z>6nAW}V| z7peKH^V+cB7!2U;r}cOtsO0yyPIn9`>Ev1!yOl&T+0$zp^U^(;k%zpBVA!kqB@~4uI(nICl9q*xP!r{G0%TGwEw?4b6>2cWs=&nrUZaU%{~)3fPvtSAR9r>n;P-W3Lm=RD{;mEj0)hdXD}hnB#> zGctE7^vwYA&eQha%LxZ_-?$Gw9P0y4etF$+(j@?l*L$+w$Dt8gb&Bm0KU4zj2MlOB zFZT(|l9C*nE882}^(#6ed3q^Wa@9dWnqCI(H(wty**FjM&psc^c-sK(uhZP*+};GG z`<}_2>)Zw=0aM&7$(tK@_;scYJ{37++)0D1lZ4z6>XM&m@e)H zt;!rAIaF0mB72_f)I&i6b325^CoId_)fMW4&B4ppxx$xEqQCgi)Pg`jh#sK5cTLzpH1)5c|AZ!*AH_=0(s#Yl71>s*?(0$+VjTyC{nKo z%5Q0$;(z{?>Axb9{$y^`?X`;wFhBf#W{+2jNcB*C9={=3z+(D@(dd({FDW>3|=V{YVb}_4`lemVu2Ox#drflX``SR1c?rjuhsFTZFE^O6v2P*N@E+c~s)u_Qac0eVRz?FIE0^tKA_A z)xr;$HrH%iaBtc=AJpJPp;BZ&R@aW0#L{ zmegg#%M&awb>^gjNg8@h?^6=NXK9r<`Iyh(?7Aq6MLxOU_l}XD9BJ`D@_=&Q4zEIJ zf2X>6z_UhZYownyd|Vn#(1Beaw3EPrwoz{vk4XS0%Ez8PI=l%mF30!!ytNz*S;ROW zx2OUz)kAjceklbzKMWbW>P!c)9d^OaUH{+BM1&kLZ5%W^k>302#cOU4qJ{RN;cAeT zlw>wbX?Ba^{IVdd6;CaKZUFQgVqD0JhHIJ_K_%Z=r^k)jm}2B1Fc%5oQB}rw>3{Au zG8Btg0}^HK+_&-)@PeVHma1x{DGrKy9QRhhVUFZe;mabMvdT@JNT>Z0sTIx+sb4W! z^qcH2VzD94*jpr!8#)M2Ek4?{g*!UE!d~Ferr^t8s)t$K=F9S zDIl!=kJf!KsMs4vx=$F9TH)Wn6Ye?mkdY6qv_GU`c6j+#CVc&g)CyOAyiXeyd2>eb zgD~tjQkWaUlK@4D)Cw=3xjc@wG0vK^g?|L87n7>|nELH`rf#kxQ?t_Huh#7FeL*^H zok*>aU7XPCuxup#;kMvNpUM!@gT|=zZ;wniT;UjuL0rO|lc@oUQsH0itgvDC3nZOO zNu*ZDp6>M7^rhpZ8jBS3u%}2MH)Ql|hC-xPxbmaP`iugHnoa%hq~?$=JE78lN%(ZV zgtjc~CSJbGUVx%hc-81K>!V|$NU0^!i-voDd7Hs_OI zUAKe{O^#ojcWrb!9QB%JA#*7SR)k(?ST3Ic-%BLC9JspxZdbc%+HUO%JF1Uue)k|6 z_U-$jZ2Gqp_5?D*s*?`*eoxA8Cz)@1zUqqr~2a~W~PA3 z%~6g^{qw=S(^3!UwM8KJ*0AyUY9(O&byl5wY6Gkp{^~X}JsRH4Y8kpiB?C&sr!I<1 zD*jzz`j-Jyd{uqp`jx(CSm>AFg)u@?Jw)9&RS{+~Khu>)nN<4ge#CNdH)ugZdC|Zj z?B~zsDd}+^!B5_ys!Vzu z?(JiJlaA??xnSz3ZQ!<1(1>ram8ikr02aB6ZS(yfDQzUwa zV$<>Y$LN_`NM-->J@h2WXTI1!=3Fs10AcO_tmhYSD*q$afB@W(eH@@DDlfF_Jdx@# zw0f&&8pa8n{(#J((tqsayvMf6K_Y)@bnE$p)C)kQdc+lWitW9ZUU->{eLx0tLz$id zZz9#hmA}m8;%1g|d-U0dLsPI5csz3#Kv5#qL!W${`@DmWHnlG`dSM@lq0L6Raf|r% zpDO>}bxOL{af|A9M} z8)?l=5ZR05`DEb+x2#~&70N`ahuTw)mPg)BZlaIge&)GCOcAyLuRqk{=l@@*p5_F* zUSO-~v?+Ty2gS+b@ibLFp3461mHQ59M??yIs-xtn@Y+M-_SsJhC*9JRNcC{`^H$hi zJ@#VW=22Aw1J=UU-?nFIky<;-{>v@32XJ?&!2Nm#T!o~1$4-aZ4R<&Ia-9nXu08o4 zZoB!wTqgy>>@MZzNwU#!(iPjA>kCrhv}9XWS>^|5w!(MV*A);9qj{h8l=lY%hTpY+ zu)hlY6r1@h|As%f>M};_)-^W}t?0UR-39*BfY<`lY?|c2s5q=|oFSi<2>+yeZvQnUM&O>&318V44Nv8YQZCl* zVH()9W!1+{tr&1W^0NUwqYNb8UhzdEwhTwg4|*ik#}A#BY6R2zwgzZ8Gb3@oOw~`8f zRCY%R%{@kdYNF(bRBti5!3)`RGsQx;nwV?>uR7A7NcC`V!-s4M(C63IiBkt9k)GZ_ zqS+jB&ou%fPh??x(UB>hw+Qa$3Q zhUAKa-(w9SsaK6k|L*)2eKQy;ypU9{ChD;G_*I+X^kKV0_IP-~-D1{JUA_=5nty9W z^wLzgb)2W_JJu(t7=KL6ZC@-jzUrTM;h_giA2DH^#w&k#vAkr%-q;+l{S^)GAYFbbeh2ot14t0@rtisFpj^B@ek1axN3l|8^ z0B*qjQB~ET@;*maPjttYbwA$ym$?z3C>0)TR>78i_Y~M77vYIi4_)VkyslzUVUSX# zUKq9%ufIbNRicu=t@yplu5}+VWjx+Wc>8(!0GWR{2gBGk=udvSDVZ?X(hyAxwsoNw(y&wJSe;P?cq4(eK7w0ODx zn+yT3GI|||R1Zg$;T72-K-HY~{J^^ufln=jr?TI@XwI?8y%I&<%mSH>Xju@JsBjB7qaQCFZ^YWbyW)Bkv>%Ny{Ze9zdm99e?}FBqWW)G4k4Saqz~Dn@81Ra^U`1vTF}diiG1gZW|JP*BOqnD*FwU z-JsWUyHN?p+dHKD&+ z6ZARt^!IeR2JkNN+5LAMYWHbiazLs|3urpq^1apd2Us=ss@#Iby)c8)!-YGKiev2D zvgYKeH0%iBzvH#eBMVJWCX+$38hZn*MG5%Hfxf24eg1R8N|Ga}!)s z>oBjPoTWW-@FnJRA50vNmtg`_qp7ND>tfHY+pm4GUV+J0~DpgZ&q4qoiH;L zn~eaxa19^-7pf;;X!4c5Ci}y!n>!+~Vx%w^&nBmOyyUz8q=~0hvX0Ho@o8}k#fF|6`m5=zk*B!*!Kb@_JqL zzB>#0c=SmSSO$QSqmpl;89l-B4nR>V{E<@Hk3+Rlq+Sg~^$=yjQ8kB* zwjcD}BP%WqzV=CIs)w_mZDKhHTz@KCs-l&QDI$e=;RZg{{~Oiw4EV;1*qXq+U=}a( z`%BA8tA04}yg*lt)S=41n_9chA2N#(*{jqohx|<2(SH;{lnIqb_hmLcu2?gE$ga-< zURm`1pyexwvLU-TVWZUTY`W3$s^^YlN(CkWAUu`*uaDWjzId&G)T>E_cO7i?;LPh> zkq^G=wkM9%EBP0yGj2mK?&kSN-rQW3m$R%bG}uB8jNfUpZ~Q1ncsY{&u3|h3Zd_il z?dFw87@?dqrtgAOXn6BWdVbCan0GYh}?htI|n=r|*KXQ(Ha2qm{EYG|bXKq}sQf&pJuqn(;Zu`i^>VO+9|W zI*%e?S^l+L%s&(ChVO5k-<%D+#S#yy%N0O{3u~Jv%*g}di(Osgjy1wn-qX}-ZK{Fi zV!fw2M{2=y`o>KMww8gihuYPKSTiu(Jv?AS`@dc*K9%9POwcQhs0^+tqjBe=X{@gg zC>S6-mBCR7-dr^87Mgb9lMpNqcY_9nnv2F@lqAug%8efAq$m7bG~8FWOk_YkOJSy1 z)p-}uH@_%*@+IM-t-HV(eyGWys>=C;m+ezs^$z=jGMWoCGy@1v^je{E=)TXU*VJt3 zb^CIRz=}SU9F_ddeU*;{O5r zg#g^pp~q`Rq%yeIoHrM5XQqoeJfA??l~d{Ob0h!tlZptDyV9BB`T;WrCX;#%P}hmN zL|pwb)IXO^|6Rg7@vAfw3&Pt^BcF?gd(2_GM>gxcQfKD(J~7w<+zlFyd_^ZN{}|I< z`ay@2nr|ka@euHupya6X4?TzKOLzP~@Oqpc-{78oiF8FXqJW5!;POwZw@fiBNTVUB zjPog?ixlC8U^_rjD*X#~K!w_xk0Q@y+F7OO-jo?hy7>j7K;!=?gs*>GuVF>r_`Qvs zefqSK!n|-Rzy2Z$Htx@Toc(Fd_TpgRrWrP_skxXI9?#U~&Y2)@e<4RsZ%6B*p5nnl$^-#A1?u>C{zx=Pn*QDUfZJ$m>^&$ zW5pduR~UZkWYV&!AK}CyX}9GjM!-*Nmpz!dIRiHOY!A;d4uz81HxC{x4gwx~F1*v! ze-7F|{&sP?Py!NsmrK=K_<=EjYqnMmasvM98~2~p%Y@4IUw^d>dim9lMqZ%-+Sa}sk&YGqj|@?ka>}UAQ)!)^;DDDu6Dd0ZJ6--sjFGV=Da){V>K;U`tnoM@0~;Qp5DuTaq|ltH*e}YtOP1D*e^# zFYX&s8;o7W<7KW2ZT~homosWAnWWR!iBu20haCGgWxr?8`!=2U8SBUyAV(R_3)c%> ze!4PZhYmBGNaq4l*&pQbJ7SmXdjYQpQU_5zL|O3qFB{WH-{aZ;^y5)km^A`$Lz$j^ z(uf0;s2+2k;w>AfEIU=6nUX+C70~-W;!pahK69c{(T=^q(d9!7oHF9DEr0Iw0w_FH zFdEu>leI#u0M$UrQRQFvbH7yntF9UhC!IEqC?cXvs64vmvgv!~jf`-9myfML3UfoI z2LJs*6dh4ET>gRl-{0x0hmIJREuAAU!2;PsWxsXM1j*n9K1n!4`~ z{AOs7A#))jLa5ASxceY779tHuNTMW}m1_uz3ZYD;lr-Mvxx1U^G*6PNC}n6;67f6t z-e=c&?)@I;etLf2*Z24OJ)i!#yECl4)*jZ{@3Z#aYokjl`VC@=HD^b^COudT*$V_U z3H?v%R{U5{M9X^6WR&s0Ho-?FKd+q5y@6W9;2m35*$&#^y$$G`ACPKg{|wGvZzmFX zJOz$wi4?n8Sp*%VR%DhvE{AXP%+?xeWWpIwp2{aB#KB2j{fdh+V_;0eXqhi=wea`g zpYwA!Ie;zZpGK%(@c^sO4H#tY`2et|ojklG_YD{yDB%Gg=0oxE-fX>Lsc>$jy4jb| zGWavDzqaYi02Cvs9yD!A$w_g>Z{9k>O(HZAh_48w1K)~c&$g7vze z0=Dg`0@9AI6~52d;C|cCj6>1IV6Cqe`)2NEeC=0m|6~3iA;WI{HksjcVN=b+oZuOs8o|V zZ`R#BJMtZ2{Ll)SEId^)zFfv?8Tsnd+#r9fj+*(R)0)ncYJp75}m|vi`Q&(WD`4spx-8>%<1{CXb7S%7un# zLi=$~J2=yC*Ul@A`Pf9%V4MO-^#mW`hgO90a}Uz8ziO|ebtM~X;uZX_Uo(I%`Joko zzlxbi4xO=fz*VDLMZJbrUlxAIxo(#x0BsR}s`~RTzB0W=PF zqGw6iW8!_cp5PZ${3jhseWmaqfpkYK75(_mo0*ewFEZJuA%bKG+Mxcx6PHaNt}(oF zn9nP0Hc@^?&yGw436b6s>@T!)>!Q#2@$94j+EM}js5?=~uj*v1{b+3tra2ilL@1sJ%qXL-&_UF7nn9^OHHtTReyX96HSW*J7e^KpV}nxqQTx_a)!_c3sQW2r zD!?lbnxQXj1#I2RYO~fiz-!4@`wjfq3Jr8x`r0tc{|oj0u*{#Js*H#4qUmSfXYLz( zNO-8nKjF7x!DP&~CKOBh8&v7>q9D~mU*G7O)1^6Hv`>k-mXVdgSRj%9R1efa@}ePv z@EUjRl;*5Q&D)YfFy>#X%u9>_x}-7{27Tn-=E#Q%On3=PzwgDAu4T)Zq{myMp%Dd5 zK;^*mcTB1ArswZ}7d_zwQvhlSYbxh2U#}N2$cr?12vzyht-3~#R8Q8^w3908|uK{twJA#f>X(_j*UFbB0TD-LiPn2|qNn zlR>=)RtKMW`ekA{NMKGm^UifcReq@VS2b`FsC25Be)N{}_gy1XvFk*7x1Jrz{7{de z|J>6O&U(|?iYt;ZH6p!c&xAWw|F!!tEJuA#e6Qo&8H`FC(>%LC0IHyJBJU&ehN1g- zYA{}_KlqeUe7&$B8&e}Zpn=WZ^+zTD&(Pf`Ee~f2c+pUHRP;^#l?$_tB1pFx@IyU< z{;4FL?F&X%92;;dRq(U{)ExX!k6?efarzW4KL4iTo!RR%m!lV2&Xd7|*AV&zUUYFF zZ28aWM&n0{rwMow5j{WDL*?N;#}DQjeK1|n~&x#A6aB0;gre`Qj?NGD$+Ak8mIJNfJZ;>4M@xAHO2ES5xJ?Hw7 zOEt+bsA`E)<8i zN|vNp&MF7TlCCVXaVvw%4sN=Y81@Q0SJ9j$bK@J(8Pu?KgH;Q7wEy6-4U>NVOP2Zf z&Z|dIDfX3>)%4aU*X`ZbCI~%YN>Fv~gk{9lQ8jO#MPXIM2TkNq52+xYI!?=#0R4=Q z&Yf{29BW6q#d*Qte7C7YrU?zP=o){Exo&C$OLI)M#!#zJteHqJ)#H_-QdP75o6m$@ z4DR*XmKG~N*>#Q~m=lpdI1M<(;QaAJJ;H>w=Geq+`nyBN(!VDPni?K8^^R2Z9+CZj zvLmW#?tX_D6Y^;D{7?_I1I`y3YFE+QY8dyg9*e?0AVIik&xG$Epk7CnA)Ef-ic++p zb3m`pfXr?`b9(k@9|5l*5{Ih&C*ApV{a1AesaJp>>Y;q`)N%Qd40>6ZZt^VEB!Q5q zs6D9o>uAI;*<+O;a4-_$Lq)H7uVl5YbR_8+ihqE5ZfjnBptD1KKfcUP!Ge*%9L92I zyY=r+P>(56%c{yI{lpzq2prv-+3M9&ZPN)kc2xAR&wHCYTc46bEqi!{Fnt9ZArCm+FT~Mggq0p5Pj-aMRh8AZa)*0>N1;@2hZIPg?wPD1JU=Rp2t`ih<58L;W+`FCL}9xCqihym2N0a0F0+6;uG4+;6JMhsKf7Vji z0B5S#nkAOC!j5Ivir}YL|Mlt#=hb6MNHghP6ULi}r9h#f9=}&FY-wTr1a`2%1T50oV`3kfS?H;i2BX zl*sGeW0+Vu(pgR_cL<WfVx@|Uydrss!xC|^9)T=yWKrSMu|$#p|! zuTM|tRGK_8+QOgID?ugyu(STyq)>$)>JgwW zO?5nrRk`lVyGxZ_e20%s6E4CZy+L%8Bm3E z3qUPlpyFR0erL(dyl~QOI8^lDylBaMi8G{L zA%3Wb>@nWTT(9b{f_~1j6Z5e$PP=#i9qNW^e}H;mz^}pJy8YFI_IER}m~LA9AA_E4 zG*t3WHF?eS&P^BChJ&)BqStg}c3dBe_di6yO?$k8{7{e3|5P$34hGd0zr|+b$JL|v zc-buLEV&fy(C6K+^UT(yJF(psXdjdngz)>Y%fObr@VS=hnHG zZILh_swGlaHUS!po$*~gA_In>xl(#SFATa_mzuOqyaQy*#;;73gHUZuhobSxP+*@p zwLaGvKMg@@R+XK928>NJIyNpX2!iH*j|}G5!OY1g)<|zG25V-V?#g{v08YHmo>uPI z0)8`?6Wokg0L%8iKiJU`u)jienEe)Np3bd{w#o)gQ@Wm<8QTCG%AM2M2`<3D{Gp_? zixa5&sDEqI?Dqikv@LY)p9i+bguR`amk!dNuafXcPX)N=_^`r@7T~_*epHoD4am@+ zH#Q)+7S#5g^T~1f`+o`4>)kd8f=YlfGpgv?xmCrg^Lyph<3g=xIYkvAwh>qsVGN+{ zxB`&oA$|zzq5GWlhSqp6qVCpVZMSf2CbEs32e4>wiGq(B#-ODcM$dh59?S^r+2zF>P!tZvrk zM1kn}s5n&oqxJJn+;NA!p1@mr#PCo-V<>6xgg-z%P0lum6Jrw!ca^4MDM(;W#c7gH z;L4$j<)&=B`E#R3!X zC_5_th90XoT)^X05+DdJ`lN3DRP>Yc!WR0Ovq|m8AD~`d-E?PA9n(OtheiVksXD-UTX>E#Uzh9r(>!pS8loypi8RqYd7RrYPVF9ZUdsC=kX!` zRQdzdB5s2a6ZHuCx^fE~dA`V3%svEanaF>-B$@{63g+FIC7KS)zOAk3T3P~KCgfO+ z-=72-kIy^g9Bl;f&M1qx{A%E~)w1M^cssbxuyOhELI?KotCU)9`y9@?=y|+EJq1cW z-8#x}(K+z@&~OdK?OC8w|7y|J{>fmS=b+^4HNaeQ;PMeJG>lUn#|@3;j3*v9G~w9%=@*==oTiy* z;s2U!G}f50ey{@q5uGp^UjHn|`hF$Vs1BOl&!^U`-dg z>yL`RMxNTV{XPNMA|gG!Ahi6y=3c&P9LyqJTg4BpkT(+--%b8n1AXR=_K$bRLxB*q zsQ>-}S~bPW{-k%N)z!A?MPb^+dVxK`45-Q<(IhDgsm}H|5~#k$Y$%EKGOUO+}N_Kb-Z{a7^5eJeK~Rq81+jEFzRAJogLMxOZDG^ zoo!>Y#nu&rz3W7_e8q!;xI1{Iytr2bzAUa5-)f!!%Cf4XUP%PQTqF4P?ZO=RB4giT z&m(t%$y_nRFX?Y!@_|#;Eq+d5(GW-D;yJoNyl#o(vA&;xeSXvn+ z$^J^8{dCb-ojBp49>G)?e5p!f{Y><{GWtn4HiM8c&Cju0)zQ%MKR`ViXB)&9oekoq z0lt_l5vqJ(0Gap`w;1Cf=AAvMn6`24{rk|0u$5?{d*tQd37I{}HnbBhGL4 z!k!TMvrl*P=ZAWzJ#i^jzMB1Dt(n}gl@bdb>efB{=dEB z_3pm@yyh_Zil>6^AtL6B+ngv(Z=MTPM(woJkA4F8<&H`%z83*w6R&0#sb|3Wz>WJ3 zkIaVFgG%$CF_M6`T6>62dmz+X@}ljzQ6-F>9#CM=92|FO_A`SiO>kY^ zo1qiqs)6lR;~=}p_aO1Oh2%Q*BH(zxcHD%CjbN3k?l)t`2WZ}4aPzi)3H1IgqP42N zY3sF=R{%434bPuM^0&Edy%tv?)9=TCK*Ok5`wzmr3 zdWZ

4NV1r^0;cA7>pNMhg>&@`3nJ$=`3?Kp)2+UcJV=0IXj*V%$xk-Fv7%;>B$8 zQvF5GHuoL!)>v@N7FK>3t#bQS%!6Lb&sY~XspF0d>C71#*ji%$0X^rYQ?Cd$Fp3E{T7WigG^6+Cm0=d*0iz0fS{xi)G$Csqj z161-WEVWxw^f9IP_4hPo_K8b&r20$6pIe^Ey>hP+XHJJL+A3Z%Ya86hlJtF6=M3Gq z`5k{jn^)0C1p_QdA_YP#c{2aGtDjNj& z=&ab-rv)S_uDcR-q!3J(d$93HS1$O{H}66!9%bZ7#%n2 zj(1Q_O+$3Y@?z*V^qYg~&@XVm>5auJiobyTu={5RhcyHJkBiQ(RAz&kZFeh@FSP?V zR^hV23DMxcGp8lEX4tNP__&T#1i7II&_pHdWG+2g?bb-o9W0?|P6EFNhk2voR{9tGVe3$Lc@dS*e)a}hmhT~C<*aHbjVcm@@Lak z2fO-qc6bVeF-O@^sg{)4hVZsJ=U#7BV~dFNi}P|MH4XgGN)c5IH)YeE5++R^nCgkG zLIQK{1L318gci9k?HuSy8rztv`~fu=Qy!MzC!KlXhgO7U=DGt{PaW*!|1bpOEDq@Y zYdQtc|NPJj-N#e&x1pJ=$f>&hn;a7bOgnmyN`5gZLw3KHS4r!iie4q|oMb`k3xWL) zh(EtOjZl8>R_mq9>s!v<3&ySy%g^YEszJs7;kCOPWyC@-^}o?u0>qz+o`D@WwReyY z)=j`A)a<_GhgK+GJXML=RZJI^whh(Q%EUO!NV@-2&U8a#erSdC8Go0%e%<3ug)t*E zML>R(9n#kke`SvJ7{BDs+!a(Vyi^PmNEfA2m0#N+Vxg@>0w#+Xa`@yO1)PNQM@8S^ zy}eo{G=|h2%MY!PJ;JlreCpZ{KuhbPALo?{?7u^jm z*HKlZoobFntETn(@YlwMkDr-$$)oC^Pfj#*N@zdFTdqTZ^Y^NhdEsTGpN3TJr~h@y zvO zcI8AlH21uHg6K;Z*%vOLwNCf*y5>`0lKNerg??qA^l*5n=+O$GW;Z~zsj&oP=gd!k zu<{pZhjUd^l=P7*8UD0J0ySqL|Pq4 zc8t~CH~QZ)bNZio(a?}ZFttsqi*xDwo!UGe;WIyu1RbD(+uf>;hBgFQ&^6vgxdDXU znsjXouJ^}25FXIL)$Xp2h6V)nW+5Hmrg#9K8|qCPqJjiwK9K1`Vj;Pa95^{QR_UZ` zVUJBWGX)l@AU0I0)_yHwi(|c;fT{~iKW9zhrpnu)|8V^)o77aJuXY5PP<`O?xoz<+ zV9mSqbAj_RKP-}1|Ih{?`H^WxkPY3(Q((Wpm^Ik)v22*cb8I83aLx;P!b6Sgai`at zS-WFagp6rmH$azE_1~$r%5mL7Z!8ZHa8r|R`hkDlAN059+>54gm_mOv?2b=4vLkPYc0&Yug_7O}Q$ zjhvq-aQH2vrz-#N$)`$h&w#!D)JPl?VP;&EC$LBfRVRXoXedQ?0Do>??H?t2(l6SPtzqvC*8ZU z_*wD$d;u>ssz0jsL+_#X=3VzdMJoD9HOBYs3^N2~j#0j-zmml&9+smeTer19Ab=;5 zhl>A~JpHqa7w7dlRVa2#B7dY+Ja)DKE5xiRKy zS}t^tv1Au6&4KB87ISX~6+)%MA9nB9lng_sE)073vL3qGOd7qZ!4nSgA8pCBmb1@u9ZOzA)?>dlzfLX6S$VLcj&43tU^${B&h*Jg}?W+kPlI7J!_R?^TC` zKp#bUzoi=@K}uVMX5+(P&@fN^v+DRt5HtO37MAf60(m3F{R#z;9no*5txYWm9{S|s zk&7kZ&|~$Nk{dq&O--Yti{VeOsFQhhxB7daedhAYq2YPpb1PkTi&P=F+?aTHg=+1; zWPMVTd6o1)4$+t@}jgnW`>!>-AS0 zaPOaC8vck>HIXSr5DAqFmy$cdr@F#@cwB;$zyv3HkIJ-|uWxek8fnw(2?FzL$=fUZ z%t;;m2$CUagG?o_y4iH`ln-XMiA?Mj5!#jY1nfhQ5J4o=jyS#ByCJXXN%xpm@iI)z zkx0+ziQ+<4em@J{s5y@SJr0J{3Jr9`Yz82wI^bnjk+z{Co<1_jFaOm{aMP3F>g1 z7uNQR&dLI1b`+ngFMen>g<5<89x%t=8{ zz%>L3k=`SFjK3x4wnrR%c)DHAA~=V1{}Gk^Y6@lXO9L`VqX<#aYhEZxyoJZcCdh@0 zCI#>x?jIU2kQa^6{~Rj~B)~=RXJ|#mAMru$&AS#Jn)2R1McRIUfOf>SXl*fmnd5G( zgtu4U&2;kgg%xoA*6YdHuuNv@=Hs98U}LpON^0Xv81=zuh*bj5=(xnn`$a(I>PL&<<3B*mQn$^=4&;G|&D*>#9?l0d)|9RqSdt4) zpNl?cuGRu3ciD_#$-aj>q|Odcc4~$OM%s(Y)@1)npx!_BqOlG1p0i?9S#?dXV}$mi z5md#kpGgbr^!SAgsm4I;Fws^TTmnc1(9nRO9!l}&-1VP~3)**FrvwO`yNpx^sW8Dr zp{&i7l-Tsk*9r^sR{98hg(7+?RXwfUF=*%~F9E2Cm{8I4-YX#Wf(b`oLQ0PyB7#hW zYUX+|mER2AIm$W!n@)H@Q@;R^{HVPUWJ5zQ?>U zY0MlQEa1gJ*-@3By}}@E^^*If6P5@fBB+V_11JAEC55b+T@7LGX?~bEvHXAmgyo;O zykJMdJ};~T5pdHZLemE@zbj6?!NjCcg>mV7ChQPIM38~NMzcgVi9S+tq0X2!N!Vnf z{Bk{8jS;j!?S}LT&xafE@XZCcmiY_gG4sFiKiMrmf_ezD5!rCIYCD+3%A5m{m^VQ% z8cp~U&(2>7cgoBR#!mf>epfets`h_2aD{~S1oHhS2qGe=Nw5Q&vMwTE%Y$>58XiF` z{crpa3f+HGW z5W9j1I4Lv$=n_Fh1T_izo9B@_3_Knamua6}hD}BU+;l~E`KjbrJmeo=@T3@%C(<)8 zfG(-%dGCE9wZDIWT0}3J{@dTqI%{&_nu=LxJ3ptvhmU?c=pARn zuBjKAljeEB)6=$P{BjM064u+&hE6>N%f*aLkHiIm=U!KKpV`3x*4SbVo2g9r`t~>Q zSuX)fpS~HWfyIN+!{aShX$1j;(T@UmjSc`at!qQ#4u1iczCUl=+mH?tP2{qV*d+o+ z*pQ^Rj+yZ3Ddl~jJ_W28Q?q=`+;U*Mr$BUg(JLTVAF37|()3?w^;axhfV*&z#YHBX zV47)D9qZ_xEv=pziPX>v4bcSE=B~y0<}*rPEF&2EL3~hlwOch1KLVseySv zmGma#aY5K1BE2!An;xkmGO2{uxND(Wm#*rPUK5Ck5FXIP8A7X8Q%B*8q`iK7}nkV`MqeU9Qa$Bmz6%#HM$D-T77`M{t92QqI1lp<1cz`8ewZ* z`iK$Uq(cLOWC+@ze#9vzKF3Dy``J*>{(Ug^g?EGZ3wyjE2ofTQgxV8-5$tp-W36#c3)BEKi!J0C+DU<*MjG(;2npUSE`B0#CH zNX`J$Wb7^ym{V}go&a|Uk|Ag_8&wQ9#gDBhbieRwTq1UbNN>=y77sx}1d#~ju&Vhj ztJPGcbcjm4fEN$FM^%51CK?AW92C&&6P5<-QSwl0WeKc>L;MjWMi2?vLHxP7M(Y4z zB=@M+#j`}<(*jiomHg|Z*4`7TC=ggni}+K~>rUS_;)pnTfJFq!{s(BoU5mA0@XK}n zJ4*%D;vxAFBt#I2P=1w@`OS1K=T}Q@+tW$cvQo(p)Ng969d?j3mMs-MdzgLGFZ?i4 zWM_EwrT~PkKi+$8z42lZp;g3|m6f-zo&x5@*7tG~pF`G!F!!^oBH*CXPz$s4D0pqx zu(vr^i{Q-i#_104!{L%Z4TTXyi@|M?a`s#A3=o_3MrM*+BY2xvZBcUC2W)36iAHPO z26i1=SE^h3!1(5Y8d-;;K~QV)mrssmpq*_q$8K{DX#JcjJ<+@d6tT=UMdw6=#_{^F ztvC#1d{}*Zj5rgR4bu4ObT$l1FO9kVIz0l!+4#@v)m5BmJ+m#L4dfN4Uejf#J{jnv9{uJ*k)E$ z2sV>QZ(s|MoK*BBTdREUv7x|MGot5*R;XR@=iCWq)1%-BV@^V^4-f5Bl9&Uoh4dO4 zgIiXm-&(^+w@Lf~T4^qe{78Q+n>FT?btaaC3d~6jjJxZfA6m%~-@~Ex=A7pin~PGg zN5uP#o^#Uqp%tN-xD%Xuqn6$_3rocA6Y1H8-TbNA@9Kzx{YCu~u!}@`U;xl16@8!W zFIQ}vltQ{pn;%*c>>n+_>Ub-x7X;i&2|+pmxzw(g`|# zXoWuYa3^c=3j}rmqw-Ldzv+cT z^t^Y1$BZEURP?RB-r+~(5=reB8ZZ8b2};_d-;%(tU0x;JHjOlD2o--P_hSPxyzp%% zNMLR%{0{ba>M>!To{{e${e#zxI6>*w(l2R&tPbe&_(kZepa*cvJDnry32D&d)W^ZD zK3R}G?bycax|z`Gn7ie@zO`^uNPn~XAx_`~?ex!4FT%mci@M+Ul;%KdGqG%=xu>Br ztxvv9q#a!0AgU2~*#%nsvYT2x=_Qc6;4CROs}$Jp_-a*YngTRZrQfPqy#r~biqcqN z0_eYZMEl^NU=a4?q}y$Y7oaook?uv^tKf@z$P?3_`Jij0X{NVFJ#c#b=0JVzZxDP# zdhK`_HaJwhJ$3k-a+R$vf%x-$A@DJYjZk)>6Y57?cVH4pa{ zgXPYC19YCS{|oj0aH15!)H3EC%ct8Z9ei`aBZ_i@5`8@pomHF(jvW*Cznf+og1sO< zP`Bwe)rcR0dIXJ7>1Wp$Op;WRN{$V}GLUWHyx`aq<{X(;^!1NY{4F`RwiedvISOke z@G#>^M0%BTfRhd9k4jamR=rfBHF}c9vZbQG5-?l#7aq&<29ZBJ1-Q)dA7on5L?bE> z{>VUE?-zaSBrnOl+sOiJ$xwDw{ADb34Vo?m{jUu*{7{b|zvK7U)pVT!%7z;Qf-yEK zFgHEi-TwSgkJx@|R(_-}^4;huKPnI_BGNNVy6LIPzxqdOf;#Ov7D1$E8v%4lMep_b z`n1mev81M&AL^n0jOVX1F7YSpOP>J4iA_-gUQl8APu81$+PsK-f{RN2nai!-OGyNi zLd8EoJ+}kOZS?chU;W&buTX)w#Khz_XpBtwB`(rre#>r^Rhe+?%vrT~? z>Y;k!q-z;NSq z@>L;SYjT&_^iesa$6WA3Jwp3~Gp--#N-nh?Ee6j8K6QkZpQb)#g~5x#q+Wcg`cv*; z&-yjwnLzy`{s@90s7Gi&?pi#?DKo{~arpwlR#11K;@@%3_e@i14)%8!h8jTpsp!pY zz68I%->Yo~M5t%$9;SS^@GyLK`Oxps@&0hb8;!Tti(_H)d9$oLpW@)bJ?6h<6!PF_ zm7~c8W20bw9b<#-+cMyosI(w->r1d|(mTzrxMpyKRkW$y-3MT0O%JMGJ_2zrV~@DL zw1lU}rPMq&%LN}_zWNloy$XPio$uO}s!1z(}bAZ|84-}~@0ym@@ZBj>N0o9$0_JnD?28v>*?wzvF z1BUm%h9}zG1DX|Kss~;bf*A_MkEBW*VZ+2{{fmuDK-`z%Vxy(X!HwdTH!`39TcF-p zlb3jZ6pV~F*2W1zJse-5E{ldNf~uSE-0+s(`exlwO_^v)sD~g6f_!*a&Kr2Gowdke zsP>p?Ow5d618C-U-CZ5sMoI6ZeQn*9C^F7@vOj~zgABY6obinNHk7On8}0Ih?| zUkI!vL-bUtMi+ItvvaN&)-5nMcg^lgDtewU)uax0q39V+q-!A%L`0B@$B+A)mvpg= z<)k}BV)B?kQm+;dSKihkwO0)_ikp`;Ur$Qy_zYzbc!+_g9(iw@XKS;WNd5FXH+FAL58so2A1FK+~4 zlaRpNw5K~z$=?z=QY^YN06R+Lf9+B?fBv;-)BeVXO;=y^=ux?ProiE|sQ)4_D1uB# z5AkfcUYYExg|`EzrDH#c`5N@Br9)5;K{lk1`0HG+n)}pi^Hs77FbyI-+}aKGsLKEL zq5kH}vU$DUHeu1JoY=ainDkQxO|+s3TY{L5@W*Tb*f-;be1VuB;Q@^y{8~Cu8@XzS zxe)Uu(i`jt=#r}Zy!V<(^&d^NBFKc=gGa}vhu%HqE2Wbw;8jEIk0!biWFzRW`QgI@ zz)jch;*XAHUmX{M9-VTziN+;z}&jY|S>quU99lnb2dG=-=i0gz1^^*h=7vP_>`!)~?^LFZ&6cpo92R(O-U= z61#S2AnDp6G<2dNkuVf;565gQ*m>lxLIM_p1m-Yip6#xG1c4BgAgDQ4|8qZ$8x`?0 z2HQ`h*X-GWi69t)B>eSPIYrTEPFEDhIYO`d&)|4Bf2#6VUfe!VR4WqON~Bjg4p3fH z^aEDLZ1kGOCe=TFuaz9in47ZctJEYd>s-DRu0w5& zDoOPhK_ImD3)y2_e6H7VySjRMTzv+%omehL&k1S>f+4+?Bff`EP&*&c-y0N=hHWF> z2R-yu^3T&){piQ%R7{gdPtyYEl8SzV|KSpGdVb%GyEshZOeW|#+&>|KloVFXaF{BvoF_TQ3x3C7} zmA7M4`Zj^f_va;Ubu9tUn)QDJLZUt8P&I9%4|eZ2Gv*{VS3d3O^yp4-F0;8GQ1D zS~#`}xpO%AO5N%CZ$L2FJL&5`^)TM5Elr2&nlpoH8JE6IzC1U1T z$52cS*)k4)P2oeN-WC;`#-0<3^&`>)bHK@l^GBuH?gz4qA5}61yi`2uKTZhB?a!&A zAK$;{V*i_70wF4qI8^*?>=)SeWqV+vh=9Y`KzRAD3>P!qu-z9!U$Z>97Xn^Nqx}I| z<+T?!(68^p7hvxt<9nerSc-6VKl*O7cA`KfTuSO=UDz@HcwlPY;kkDRch1 zrAM#56qjQ{4{x3pClI0)m7gD45xObYOJ#pVZI!4iBt5Z(s{IfjV)6eIXv~~y#Omwl zFEF&DeEFdj(nI_?*Guau8#1bLT^6Trlc=gNuuMau`2nk83_%)e^^gj><Fym)n3Jl@Y!6T3xHz9w;jkjCm2p z9GZ$vx1~W)2$^)Gvbb_`y-uS%w+ctkWXQU?UR~ zBhs4-4=v=*$f%Dv?IvJaxW)tjp;GOyi*F>JJ{pMqMg*Lc-7^8g@3kVlrow6)4Gvq@ z9*7!~ByjFDDh?I@sB--P?G3Q^YaO@(`H@Z_@IxzpuY;R@*1ctFf!IwXFeksfo*ira z(27ugnxxi8y7D-qR}u!km>H3t-Q)G3D!;;}rxzxeyuc0+>CI07#D|K0#F4>rYfODe zhX#IVh4RHyO`TP2`s#SaGTYS=ySz8g^YgkCzm8w{*oJj9z)vdq$(25|Z zW>iHdz0u)%P2b;90*9fZ?5Nr=Lr!^Y?TT>B4iRt!W1a$>g!4y54}WzW9Pz!F^e|9< zuN47W(#pI#>0>vBOq5Va?e$@rO*Q^8&-_@}9G;A_>0^+Kaf5?X$#v~7)*Yo7KR zTFDkAeEn77MykL3Ypn=!GO?5|^nphgJ`Xer>9ze&zkR=@INX60np3qO?G9|Q-EoH$ z8vbDdlDO8&(%j{QVeCa%{W&vO#>y93uaXT(5>J39FQoo58y^ovTfV;DT>J_SR{!>G z#_bq*ug*MmgSnYC`fa;Iy-k~P_I%)L*p1=~`K!O$C!Syp82w(R4z>Hx!#ou;H|9$8J?e?-NW6PRjku zlW+J+pT50*QB?^Ody4LIQn`KILj}qf4N(L{$()jQoW zSL;e|Y&x+594&;Vw~4YS6{!rzwh}rP&G}+CJwMbVyvB$O_(8uNQ(d=le`2qLQ=G1x z>0|C6C9s25*zzx$Q5~k9kU%c8Jp>(i?AY|>8tQt##li)ab))8_(%-%lu?yd$LVF!O z-7wd(xp-qJDO5rFioDG8c!K}+%chsGl_wAKixL>Bg_WNs>7%@()vDJ}fmyZb?#vAG zgWC9^9zp+T(aV3)RSpkxRq9(#`l*513wb{Y{RhO%=?}h4^N*{YohC5BDs1`9CRv%N zElMQa7DCnjZ2eQew;8-3tv`OKN048o>jw=)ElJttcdWeE`gixBmcpX`IRe|FP&c8H z|Dem{aCy~2QvDSY>ZMPd;*|ehA0Eh?GktE!Ets)G#Zblo!eMH5ufF8Q!y9JFQG0gf zz>TRnDO=M+;kBPyb6obHr@Ol-q6pud^XJhWF#GLew<(d_jjFY z)Weu?l#glONv1Af*V39xleF@|tq4(L&q0Y`Uw>a^vsZ6G!^ELyavM{@vps7a&ih7# zO~X1men^LcksH=slz-|CVsAf(vsUGSJ8{gS>){okN$qQ;mEwCq!#>nXuJHjww8oB} zGp`g(KN7b*uKhjm)Nqm=|Kcq;f_ao2nA!qHCuTi-7Z34ta|w` z@#^(%!jw3oa^jGEZ5lM_>u%wp9$~2C#vvR3_368NRYARuL&kh2>JW232pdey*ql5L z89`O$u-9Gm=g`je0h5W*i1c`jQejQa9u@Ilt3C%}orr*=qi*Mcu&OE@Fe+Bj~3U zjql%XzvGLYMhrPA*agrfKhz`W4?OPC$@;di&%)JX;(Gn5(6K)$;>^kjf%ZrIsmd?% z$l=~h?FfN5Scsk<>dB!8+>}i(z3-N#m&6h}4jG~S8D|SW&^Jx<*!tZ$PUtvfg#OR< z>OJr8IC|&RP;3h7@Eo}`gMEn-Vg%hwl?@~5J=lXx0E^jxaIS}-IIKhz_bN$#hz z^O-~s=$|Vv;f|VvAL^le@sx4*WiXu;adXhn^S%F6$~C^U703`lgWA`p79Ob&h8xF+p7fCpgCZA3 zpUX=Kf%#Sv3p5hgaL7mrHD#+@pxw3JQuBTq*iqp9ZTEmLz(hyeY>&D-*lqrNMamO> zaN1=9IN}fjk0?%eP2c$fq$*sE{0P%Q!T6EN_ag2k%eAc!uyMQ3jap~<< zg$e$E;a75JLE(Miy;7%q;%*Nxq2K*X`5pM#n5l`5&sQgduU}T5Q4FjC&D%}K3~Y@B zPs9vETSLo$PKsrl|I`9-XzHw`%iSM>i%X&}7N4#HmG#>tg2s0J*NcXBEaG+msR)|r zLaM^+QuuM*YgZW!GY}qH5lk!hxZs1HXU@A8D3zj_qg*#10|? zP6~7Zx;>)~bfiqxH`|*3xkRIdg9M`fc=mm$*-E^@|#Af}C|7w6Psmkw_ zGh9MJg(=|06qcTOU+128aWd&zOnzvE%7y3e_Q1a{C>vC$U~lFAf9wVCvGcrg_ju9y zp%tP1RCF5pfdRHQ8E5O$1WTd);$%_KScirFIt|b(qKSyGTG?-nel(ze$3uLJK}_?-pJ7w zmyRBW2}bo&zD~DbeuPz|ZL%}eoD(;2^PPAozWC#)&YM|qrrXkiGxS5?{p`6Hf3-4T z`0v?=ytX95c{9eQzB1SgL^t-kKl)=X7?rWaMXI$3jCyt@uk>dc=+NrCko%aWNM9p~_Jw7=Gv^@5Yt#3^LNxJXh*&`k> zeAe*m@2d;Jsa6NZheI`x*`d+4JfH$PH=Mi@#HfdTXQ~&N|EvIxsrA23B^QH9QWI|` zPptrlvaagPUKR}36(3}YT6_ho&S{jG)V2XlrOV##T`m8GdVhd=1XC+_O!*DV!OCo3 zx^K9^+Bl>t2wEZU2TJkh+=G(x8}$ao1bPa5g(G@2w4e!7WIB1z*>tB5dV0g3`wJ}F zN2UUqCL}k?2dC$*?TAsl7ID+;g}}01L{DWZMCI07hf8<~OxPg#spxs{MUxJ-{7_FG zWz0?4^l6VyN=_Q#BVek8m4BX7N540UC;!(>6+hHN(1%wH{7lkqhtulf+%YXAFy{g2 z*^$Q&^-zDp-{LOYm-cb}EV=P1wwXvTCH#)OuQD@r=11HiUE4&}ewlg_6_3U~#r|e% z*~b8<7@R*;UbLfg2JiM?zic}D+yv)LMumRfLA6HT9f{$`AGa4jI_=Y+JR| zwd_bNj!6HZCypUi{gtXZmp9Ld!mbnPH7^5HuT=Ebzk{y)zU70>MFgCbu@<09WUtVK zDWUw_I9ARJBPXoCn~Y6F1f29gIA}~McgnufArO<}&~^U-fc~YDpZ8uKsr~r_)N8sm zQxrtTXDNA2%q0C(=8<9KK^s1y&9 zU@v(O*ehbEe0}l^C{-IOEmC#`QuJ}xzTCY7Y6tig0(&>mneTS-u}l&yHY__Qu(rWqVkW{Xr`XGa)>nd6Rqf2(K9!8#S|DNAJ~-Da9wO5F<|C zPrn2>3Fi;Jk5p8Vcl)nj+$*VjuzE9oI>C10eZuB*DpiFR+p(%4PqDQ`dawncODg(2 ziTMZDZ+%EQVaX5m_@Qdk@YRK;oBc3(V)>X}z=scw3HsE6tWPgM?W9spj%4?iXMAr5Os9ftFQ)cJ0&&>x^4cf!Wy(37*4 zhH=rnxN5Q74KN zYEYH`m!nh}$feAZd+duZMY^Lc)R5_k1D5`vY5uvV3TI)lTFvCCZuYRmnw7Cg?ghNON#}WI zdLmS=SaRIWGZkw1w$EER|2~*|Y_I$Ll~3WEw8gLDMtDH=W7G1aLn=Z3A+bSa%8xo#%1*Z#9t(!ibw{nsOa#udx1M1MpW)}Iti-W%tHE&P9oFg5^+4*L z5$lCR0(2aayD}}O{l8wKKX{2ym&8-{k%=kv_tT`Wf5-_F@Dib}k47Y<@@NRh>GM2{ znpiGF*XP`;3&NffFbPfkCO~}9Z8T(}YtHkDpn>(xAz-odre~NFBH*M9L%>Nmf5=oL zzUUf%q;kFbJrQVeY9u1Yd)QGM_>ma z%8nm8OhOO1DemPPYGxFTpZPe5*s#Xey5;AG4utm8Z1!qmIh5zjjA8_1;zW9I4InmD zybV)_8{4Otk{rC_piP$jGSGwu>p~FIyF*jw?Pbsx9?eQI~ zsDJVvkLWHx6@NKzk#S$T9`xGF3ilqs+}5xzv)VWq`ElQzi;bBu zDnP13UZnt5ZYeZb7aR*K<+N#SMxjtYsh+mSCJxSd^dk4b%`@PUPuUs&j~8G^@uI93 z_pZW=nra^P>*C;`Y4)lErtg5!+v-4ak<0y==h=b@^p(53VG zJS#8uzXa<2zq73V$r{gzOgrJB9$~29LX|zTV-1cPFtHZm14p5b_vVIptzCCk!7w)v z6D88C^el@<&;XgJNk|8{DVu(Fr_4?9Hvw2*BE57EJ(a1@-c@eAquO6!Sw3PzMgP%G zyl#|k3h7XRObc53rGzs6OMZsj=UiuVrT{%MT~z#OjAo?l7G~Rlb7?aF)=wrz)b-_m;7ELIrM;=vgvR9ootHc$B_oB;D>qyM9%dx+dX=* zKWthI=0NN~O+CR<_@N%5{^SDn@#Dou-aOV-5`i5i(nH~crIc1KFxq1oj_o7T1L1?E zEI;>c_539-NueS?)Z_2JO0+eeN~yJdQuwHKqX+1h?@q3Kf zYcHAH=`Y`wTRtFdKjG@HXEudbpFQ^P4^WG^tbW++V?h#IRA4|eqv~q5JA5QDO3N-h z2)Z1b*7EgwFnrzcWUwPW1k%jrm5w`E4x|n*0SA=c0EX%4!H(@TjgYe?QfiQd>_OC`^7PhMf?3g@sSmW*x6Y? zO=te?JxxsTfPw_L7ZIcbLpk8UrQT~yPOjq(OdaK&V(r}QBU&FVn$7Pt>W;*+ zP)MImH3xqBxoH~LiO~unPOWJ2D_>p7K`C-0Hz-ioTVJI0c^n-xsk`RvMd8n@h6R(S z==gfm#`0s6|DMfz-t8Hb6$~;gbjLK}#i^AzHQbh~KDeE8y2Lp^(C+?XMdNhs`Gt6z zGs!hOHB6#w1w5Pl&z~B*wp!#MVa0;%u;E4FoEV2D>F}6wtqZKtf+3n#O+qVH-Ns)d zZmhuMR}fg>${Qf?!cYzr1t>c8UiH|v4}{V8Po|1@+$7eTLHPl-0*VCjQm*p$;wmD3 z$?Z9{gtLSd0q|`3kE#nONh`a>*nZgX;hGn{Z?Cszv_ka!EpV^jE^TPHc3j*6-6NHL zEi>Qs|5+;*tkD8%6s`RIpBvT_YYa|pSUnGurL-3u++@XqTMvLD(SlV6cl<)!a}K>A zqm7$=XX=;7XI_Q-^2jVoZrSh3!#@42wqoU#02Z5^E6KZKqRb^3n4Bm52fTKh`O zLP*>GmRR5~I%nLQf5bBX;DDujOc~{ul3KYv+1|PX9*%M%tYmaGZSn9Ais;dYD>&&f z%5Yd0gl&luy`mkD@rU`!tCs2eVPuw3JiDI>I-#2Ofn4{NCpT+&VgB$e#MOGd+5Sx- z9{Y0t!hK;`IJ9sf=Y6{*>@@G|h9NiK;X#W|PIEh1hd*el^_P7Sgj8w{9y?HwiuLv` zPsF#M;3JO=l=}}!!D<7p8Sh<}k7YNWtyWWxM4SYdDU+iU5ZY#5*6)!QnxHx|t4GZP zG_s$2wntbn+IP0MzUS#s^fszXl;-Q#*wo!Wd-}^3FmaftL+G@pz23_O5V=2^MMbqHraQLpuzmKvCo&Vxh|?daU@y**1mw+$|W2 zfFjX`c7LnqKX|$^{Y*pl1(B9eQG$5aHc?x){P+3R>Og<0{$U6di8mRET=sLj4(#;f z6buX^P3Y-|TQ=f5{f-EzAFcjfRZ}8(`J_syu@d^Z9w*0T#n%}KdCXjI%F&kbpuJlq)msrLfi_ors zT0y=j`SPlgIVj8I!^oXWq9v@p(#j9vXz@~yq_6*r{$mE)on+odAWp6Rq*#1nef0HR zJMAUcHi0~Up;oec-P<5;mrb3urkO~5S^%D{{5e=<%K1UV8L2HRURZ#hjGq?L;Vl^P zf1x(?I~EsM`Swkpt&JmMcP_i%(;B~sOgOg9F#?}Wsoh~28i|9xS7hEQ4Z{VpalM|^ zRO_X^&&_HdUE>7?Mat`5U{sqnj?oZ1v3< z$jSZGmS=DCa7NFsL*q3nF-pDo>erwI99^N2|7b=DTJdn&-VeNT^r`Ied`?XTN}ltk z<-PAENF(P^JM5N$1=;`q|3yH}JHnJU6j-O-|3cIU7C$T5%}!;Y3j}IGwP2^J_$pp{ z*Do5M#srb-v=^LuYb0#p)DMhM(1nkI7ebd$C@fA?*Ya~E5C8FeQ=^pxq6N8O?pzd{ zPkhr{$v>gxOzx!P@w!Gln<_2pxwY%wGk3`CbUa#zz{`eL9#M7Z#=KjMUl~9Ffs#Nz zsQ8u74GH5ti|RAOzo_|N>4l$lZmj%vnep%dHu-HwSM5{`a9}iG0|f#q0bhNh^5GNu zYBmRBi-X7{sA$0p)$o^%o*!|>g=|bv7+(5|U^aa| zl~`ov)a)ZTbF2gG9!A|}JQoS53=D~&PXhU;&f)VEichLdnUh3n3g!O&!(D~I3)Ur| z65{pmd3n%7E9xRHXeT1v@W%QF>lRQF=m)59LMoTBeEb7Nmn#xfRNA5V+m6%T;hPRh z4CNp{Tm9G8-uhV8{S2f2f^`ol2~B^|L$?M(XZ9qSYo=xrR7zU;xtfP#WF5CNrq3q7 zD4$$LDgf3cu+q@lukc*1ihk7<-(Dn=3!uP+t|kI6P_IAhUHBFi9C}7@9;JRhdhOUh zl)q^+gzT5ArQzl6UPJ1DrgwisoFUyh!9;O4hz+*>AS;pP$XSWY2h z^@AU=cvI&SUrRTZ;I$LvF7DVGjMmMx?-Kbs8Lgc)?|tqjFFYoln>fCB4L<($(Lv3F zr_r^UfsvkP_haSl0~)4wjYMC@y&Rr1Ar6-vU11w*8i5D!i&q~>%f_u9Ok0=}5|5@7 z@9y({WFlU8Rb|3Zc^x!K)hcoOL4S;loqL@&PDFWK?uM5py~5?S1taFgB(L>h)pRk=LhjM;@m7CFpWs$fAvi!nG1Qs?O<(+oY#@^3Vo=bub~&GlaEgUBs(yhW4M3m!uk3qyxU4C*@OhA4k^2QP^ULC80o zs&?++6m37sgZxa-m(xB3UN-#qRSw%bs)tIfRRBD6tw8nQIpsQ4R<=j$XHTZ`5KL~O zRSc*4LgP@$mVUI=9kcYfU~&~5?{on{I&65^_EsJHZ*q|sssRr}I8Z%WGQxA3X5Aqt z4J$~O$Ul_7IMsuCq~g~)sTj+PA8t5|uaBF(R>L)EXOG1b>>0aCw){Jdiqmnc@cxHZ z$0j`!m0QzL1r!k|16ZL%;rPU-?KXwSCOwqc)&S)P;i28bCl`emE^AII@a?lmEnea} zaKHoA1Ih;aMB#;-@3%c7r)kZMk=W4&c((FWiQMk*Fe13wRx6I|%T-DxhZ(5?P(+}b zwDzMsE36v|=$U>iziS*Lo=tvHJuugwbod>NR6{yE^AW;-8S48q?T8C+Xt7p5WRodQ zHW_Skx9t^dtdu+S^;}!5tQaTPW=1f^y6%IYj10gbT4Ad!oHMaU_Qjodrs(57o6|^Z z#}oK!UoycUITt-1-q$lRF$=|ZnMd4ior)CQ*LCH{KE>yb_YVJX^(j*JcWHa|MLjw_ zs*S_^=^1FNvXZY#`$yMG& zQ-QgvIuQ+f?&CcPN9|vE*8WQ-nEtz=8dM%!=qluwdp&ci43>Ha9I$l2TWz{|D{@9K zxr5dRaO|0fYFbw>JQ8!j2lLgF{Q^lPIzGI~ssoQfMPbN*^yl|It6VzMm%Ir&Cvd5= zy`=4G*G~e(og}gO8}iTAe)tz!tK8fW!)O&_!z(N`P33fmWL$d&Lp4wZS~Jl( z`{5%o-F4;XSwABgcaXvm4@0F2WK8JtiEkrrQVSMN6OY9KiT00Ngf=Lo!P>5IZK zuicUo&)EK?9ID?xBKl2I*^Di-S4UmHV~Q6BU#VI?=Ndl$Qe|BCng~3)@Ox_8PO*5i ztlrg-!kfr@y5mZ(zWcFW%RTy9_%v3Vo~+&D;S;2{H=@Hn?L>6x+*0{-E~Y5po1%5_ zWH+Q7=dR~D_AT!5@VMRe&N=9C%-evt9%a-8A3A-NR1=WF?Ip22^ zYVIjF?*ue;Imf$r*j;?FMfn@WR;j3Zf$ojN-QFRNv+-L4r%(Tqp}KkJ+`y1WH8J6l zWIA_N26v+F@Un(6QPB3#)kal=wFA(_(o|hzJXF(y$r_gZ zBtFF|c|T9_CzELrb5N539>Rm%&~=JbggU3ALx}wf9?fkS)ZhIhc`YYg}Ylp9D$Pe{i=V!|nsPxf+9*Q;*&Hk$7EHn;n zXSJ`{UzIoFhn#4u&-_&>K2+1>_a9&JnONxUW~u)IGp@xEAF81}QTZ3{pomRQakfr- zAh81phEBHni>TIq7AYG_nn8gJT(0y~t3I=v@2D=%7|RdZ8w}N;2UIG;wHRB9JNE2f zmqIGg)8{mu6T>F|z#F4%4R$4w!~ej~Lf~b?bDw%E`WW43{HlbZ8YWOckEr;CNAeAb zI%M|AG?9@ii4WCNK`^1qCtlWNI`=ncK7Cw#sHT;lbqo0xXe{r_SlK(6(JI51|I)4> z0@i(vY`*>o^TE6W2q+FrpwP45*D`~s^LCu%{?M!0RvGTRQpc$++?zcCA`vB;LWR3p0!2_dDP%bF8PS z_H)F6`%hK68l|9iwojhd-ps=VawDTi$4dO%z96v&nSmD`s#?>2K_DuQG`soy$#wKh zul0@%6HcRHPM61SnDrVtx%K{3eC!G8F+yieFS9IE9}&`Zk=!dBQ2DdQpgna|-h)#G z+j6qex|ppWre_zTU1rxN->-j;v?BuA^Cr}wu%eF2&L{@CCG>lzwI&PsjM;kqTHtl8 z@%2J+SW4x;#30|iWu&wblFQxyhS(G$d&eOnNNVapQys2Pudl?7pR)5)uK7v5`B{AE zNYKmp7@K;ZvoT<+{fyf}3bx3W|&-NaYD=S6zc(|0;5wfxh% zEsKcj;6YxbW=B8i?gRUSOf??Rc-l5kY|43u~LwUgv41ST^e4;j}GRdU;KKWYoK=jLP+Tj7z0Vn`1n042# z`9xRaicW1m_>d*E7aW}?JX`sVGPGSAbI+T6PRGmdM&M<`%NFh#xhmG5aYBdr^z`MoAn>x`J>|SBj+MADT8Uuo1z2Zk)J;fzvsBuTCeAniH0N96p6912 zW5)i2P5#Z?do59~a7OC>7b^#S8EKaZPbW@lwH;p!eEa(TIY+$sb8h~g9xm9ZU+TH4 zx)9vbamIIpo&H$;0{h+gHNG@-)3Aka_#Y$yVf|&ZDG4kA08skz4gn~%F@x* zaidkfeo8@spH(`Bc1%Zxt6UUw&OO9yTYIUy%q~YgW)0R)YvG0785zIr-t{<^X&dqM zwSxtI+-tVguQfaHw{MnHgI1>D0TX|IKk4!jb!it``1)HkYLhd4M%Szml-3_R9JCBT z`10~EyL0l8%*e>iMV~Uzs_X@axXFQGEcTSf}hfKnBmcNeO8ijPly zKHgQ*3IeJG3k$R#6Pg193^rYI zmN;?_x&n|Fh5+c&0YC3!^{@u3I})e$0-jBkQuWnsrn=ZSdl`^g(c^0!YJ#9k%@U*67jg#ceCbuAB`&N@o3QL#yEM_ zjrt1|2q=L#mHMf**3_q85LpX~E{N9Yaw8t}6{sBaBUC6%;*Y#b)2A5E+Z9Nr(eZ09 zAwe{PKeqbw+_$IA$4j1!*MqX*6BWK_P6`Sn57E>2w?x9ksULAFLklLH@Huxy>w)X) zD8>mJw)Ahx>HC$B4r}&=jY!U7#`*^;1ItM1)qL2Y4Z3f9%slyL961XzEO5C^%b=mX zz%o)={RvNNcHrJ0)44?~iRgGv6P_*q8_ur&qHf0{2|Av;yfHd9ya-mK4*DkYH%&Og zcmvBwY5kArVuwvP&UQ20$Eg3(rO(`dN{44&2KyH(M_)#&b)aL-x7b5C|BYe6jVTx> z&%2}&vC*{S&+eOuT4Lps|`u?r;Z0{Lj_0>i*I+>f}WfzPV%4S95_*uFR^|4E^@(ny- z?$HEfv?H{8j8QD&^iIpI46Z=|3mhMs_+=79#BV0C500ny}T6nV{>ovmPkiMzuc{jc<7oyMQL4)MpDI7 zBEqO$?9^ZPB(7Tp)n`++<%Yf^hYt2?c2_&ARLzxnQ^&t1q^UvAPn143;PuFYhzcSjH9Qp^Ilp5db$VSVq>K+oCXCXRqst3<0SEJk5 z*SuS!rsWep9J!p?qD5ptJzVS|6-E%EeHHq}2Lk*YUN78jjr#XDsu!XDli0g9_f>D%cyiw#_zjJCHu(>ha*b;* z#gIGxz^_B#Wy1&i%`Y*PFOVpIz>8Bocuu)av!jZLTft3BWaRL_>zMkJ1YE#>O%(%Ze5bJj+0?>QSyn^DwS zVmjyZICJ}X_nKX(&Bw5^n-?zN`K?yBp8YNq7r489$-A70%j-iF$M=rKD`dRC_N>pv z8FcTR=$tJ19GqI)p9$B?QZ*PT{B;e%$L5%vFRUy_w*by zXpYhL!()#ZqqT`@JpVTD5%*QQ1V{Ny6!PG9eo@DVDBPsK>zaB#a=Cx=)NvGqR+9E3 z_6@X0{r0-&+U@j2iQ6ZxdZL(x{q9{=>nisU$+qt>s?Z=4S*(2Le{zG(zr?)PyknYy zLs4x?co~;{c-Hoy1JZLo8eS)5+E?Dx^}U}>Ht{4c(Z?)9v&J?LQ~)?1=n|Da){ZIQ zof~s3&i>~F|N>E-RU(@HUba*b~ z=dn2F6F2YZ?wWwtuk|E%(K5zaVA|N{i*r7joU%_;stL5htzuG{JGqpO*E`dQXOn-# z@emceBkrUc9iMT!vHaQac2>tu``)}OaTXucpE&2EaUTASa3|i)_X@?wCWSV8>rLkL zo}~&&e$BpOz93+U*A8tTM$RD4`9L43{0X-X9#Nm=#;b@VThhxwAyRrTeWC_6})5TwI~&x`ZwUtz&~7M(cfqa`CeaOc*Cmp|q5gBZUV*wV+L_m8w&a=H2XFP{~* zYSCy$yg26**I%vTxRc`}q8JysiE}<${aahS`a;aU?00@$eyGIOF(@at{5O32R(QzN zz1fS@GSlowmbpbSo|3?pzNnorn@9g*UZWq=yvgTtC2719xeZPXf8O^tPHa0X++uDL z?%i?X5>NAZJbLnru;8V!_~OSL-*J5lvB`m_GKXzm;d&D*U*q|u_}vS2To?Bo7wDdG zSTouNZ%R`*xuWz2-qO{YXC7;VHTxYs8n+`A8@)dCOnG|@TAF{cx}s+)>JjRYQ5KSn zm5x`3%*=B`m*44jGm3OVp9>Z|=}NkwpwAJdVO{Q^=%Tqwo>`fwMR?CuJ`Jz%C{wP7 zk!&{Jr?hL;!2NHr=d_~=ai)*Z%cZB?OpTI}e8!sHmgT`{#n2z0-fYQ17IW)&nkHBO zOU!A_TR%?gTB~J-eu7-UACR>)J53%(y|tpX@=?InI)%nHNY8s1;Bo zs!8iCscwh1+^)>hcJU_%K|n!UuWq_53!()aEtved#8*6f-?jDf%KoGr?F9#0AVD;O zKQ`5RSy9ujoH{0y$(+E3FHqT^*LkwP1ho*Q&f$ntEAjQWb*&bkKJYYvacE>q-^n+9 zWYo2D66OdoLY?m!GtcK zh%vH$wYcmq8AE@Ln-<})$^UJpe_H5jz%?yv) zQOAo_cQ|PFFogW0Ub%~8K3X1uNA zpaI={y7V?lAWzUb) zIPxeR&uuz@i>>}9w(VAbZbO{J0bURt8@^{*|IqbR`=$456czzqoLbS^zi8%djMm9D zhj%mL+0ut}Kwm`u|Mt`9pU&J>Fn(J7g<8Uly)dvWMAx)l*ZY2@)`h#axz=|6V`TDC z)4*b?53+0T&hhAV8I5rssF}L?FuJ?lwo8YMM`)P&;gO0C50UM}g5keXY>= zklYJnyrAs|GjBZuRiRwrSJ~99)i;9X1(NgW_yJ9LcnosWy6%itk)Md(n}&S8DIZ8q zhJbLj38E4FvEdU`*A?!m^sn2SOuUfVN3t8&()1~Yn>U>XTuNwR1kY21=f$l5CrXk!U@m*%QF}opM5r7!m0=*3Hk;@Fs=Q_x5{jZW|?|yIrk`t z@$5#R4nP4QUdpZZ*3BdGhRX8ZxpF0_qqOCZuiuclYpe0khEB|UAPOwj0&6T-H!1mr zH(We2C8jS>k)+>j%HQN3A5F#VdG&gs-U|>DuoXC}tj{9i2BtHzROiVsfxhfuMj6dZ#d_)@J;7p?wm)nRR@qEPiOKI4@{EDudbp)<= z`O&tvQz2fXtm1EXI~$d6m{V4GIveXe^IPihjE~W9*GB_1LvVhgx$byB>4xvX+0z`9@>xsAq|z3_(WUv z68-g)UCHV2RN!)#Hm1+kwd`n>l=v{iiJbNaehC6E8-8ndgYi+(%x85$erZDjM4j={ zw20?*{Mz_RYbQo(!#-qTpjJSUX!YQ4v9FHD-+pLQ zw-=k6Jv2c&O!9a79RLnqg7zqQEU*f4s{kw;zy`ssCV;f2-!B6+XRf82c|N zsnwM`UR&fB?82PPR~7k2arnHg@~Pd*33!RAr8(i9ggs_YttECQVzVWOJfAOrffFZ8 zdGf}|356fcZ8Xe&!Mf>O-vnJ$V*ztNBJ;XO~k1cQj>kt4gkvlJYbh>6 zk5TJ)4FDi;@dgB5HhjFP@{(hHJjt2#^fL}fjTfcUfk%f^2BF5CPvX=Hrn2=2ai#$fdN1IjCu9g`pU%R)4m0J~3!W>f^Vj zxY?Iy#i7&uI1f3$>x2dY}>*FtptOLp&+Wv)D+Vh`25N6Ywe_}A1WoecOP*gk00-0$xmYdLI$7n)8&(j$>H;|!|#-mnm($Fcnr^~eUi;;hak)}^yB_qArg>K7D z%TeV1TWDV}wMB1p&a9nPCAeLI18y*Sh4V_fBxdd`#oRfs7GI5X{hyWSFB5R$RHi&+ zbGuX|J(ZzRiNmYgaL{*q1?L~T0!V_UW1NPj2{528&;^eH-4VJ}SDL)K{r0{A?#TK}cf7B~?k<{{wQ7*6s~soIn((GEk2cUfyfiN8<6r$v#~TnGb+s>q-}O-#k-cwSJ!0y=D)EqVi*&!1xQ5sg||OCCUm02B_|gCs#m30vD}n{K^-nGBo`gWl6R` z$6iOSpLRW#(Mk&X0#t^UjP<;)MZ8zez0d7<5yN=QHe2~?tnKb@7j~@KU-_NYdzYki z!sH)YLZ#>vaOq<|^&k9d zFN#_lT{1c1C_ZL+OnyqwvpD(uM6KKbSFlUGd{nREGdSYM^NK89G@f_7w5&!o3N1D% zN_Ls_0C6=eQ)gc&L$?}g)jG{hMGeKgXI+&N(7_FlzLyl;L${*uB@F8P49)0!eaE_n zYBZ_q!pl#pV^Pl&-$tG|9D>q0ea5!4i$b!M+UqlT7@Kt8eau@c7WFqWAJns;0CV4r zUVW%G7R~8?;njM+)@M$@@y_NoCe>#2-GqnP$B2dlEh2=!g(S{WzkNwHE zbo_uOJez8T)uw)UFx#Kh20;bV@h>2OBlu&(zd91{y2v1q^brLY{pxH-;01YM0uIWR zdN15@r!%NCLdBlVzM{N`Pg$?*{&$+aHs{9k*!EAW?lMvnacV^?CgIwqL2VW4IwXZl zoYDaK6sK0uj;Q?GN5^8G-;G5#KNbf|oGl7?w(>vibFSs{K0zd`{ScM=G$eA_>+j+$ z!*hdc{hGbwMU+l5BQ+JLR^S)m_{1gu1%5J^;Ar|=uimLKAymn+@MDP8?Be_DuBD{(8O z?Bu+`@_p`0P(zUGpQg`_=eIkiZHHi-`<*U=^rfU$8(%u^)J)xm7p6zf$xb!H_8hle z-_i>2fT1{2!>5 z#@y5OghhTEj{`^Vli%sO?SHDVYlp4~tR3K|n!b~5-VY+kEj7(K&6j*n$MdD%47_f; zhV47kdlH+GLABUabJRtz-s3+-GhS{aPOa$jf59!0T=~Zz%JTvw)^|#D*8;)0%N@f;hE;dJvTV{DN}AaobDlI>kux76>ee5H}s0El#ay`BzTc@RitM z|5|b3{t$BPA9$n2`e&2h?snp1zXJi}AUdAE9|^<|{ITJ4tPhP_bv2ujTEUt?phQrg zRMf)D#kL=A{Y-yG0%<5r4Suu{{KuBQ#p`~i)#DNw4?tqWpEEsnm&#rsJJW#k^w@mb(K*qGi8aVSY8yZrCZs$;rnKE#ID4aY=3~s!n-VplW*sjsDg9b0EsbyW;ut_4FfA3%?8(PIey3QC%7} z_j(X|?9jg=B+mz>J=Z&FoOB(%+|)HvXSE}WKQ=$B>y`j)p{F_J=Qw|~#eIB^ds-Im zwJ2_oxok03{;EDQF*6Fes%(=FK9Y!DSYsmqy01VVZ)*} zhZP>7v29b21#T!t2W|T~&v1PBFR`LEkLuA>U7o9yM6@<&UpCTHdMlbZRV+B=yVYl( z56Pv!;FvXb^^hJ=J=#zs@AUa2ugi%9=iMiKNGIqUgl+sNf~F4C0C1qXqW64akrJOf zp6^Od27u6A)R;b-s)g{XTxDV%87D~C@S% z`a9HP;2oE90gPR>IMoB{B#NI;oJ}m+)8d7DvxmA>mR5W2T(WQWP$&PPFJ(?BTUy{F7vrfP_C3K-+pF1VYz->;RyLK^5-Azujx}woa%u- zQ+VrTy5EQic3Yl@s)djj=y<2YQrExRK$#$m+5qw}9nUpEASYY>8^^7w?D8R#@o)!m zst5W^#V@4#9s8{9VKO+1G=>C)?nVS&w)FS(-x;yW;UVJ$B^!Q+ef_b))ZrFP`YTTL zXz~kZA&kCnoR-irmNEb0R1f-dDu2RrzvR@0tWo2{G@GhIInk@{zp~;@R$e;N;RK@< z48{wfdNlckHw?W!!*l#u5-2gj3G%R&zxO8f;O@EiBtF$4Uu<~p>2qlfNnA#%Dkas+ zZhQJ#gW3+1q0{Et?u^?w&T&-#(lht*EY7}FJC`Ql)d%t;wj7Pac?pw`<*a^(?SG}d zPn+X`ZAYh#7<9%Sue-GVLL1{q>eTEE{B!s%gq%;_pVRsZ;;7|LD*b7S>gM+k<}^G* z#wMk$&}%(XlhzHENtMqO4IR zvKPPJKnYo1nYy!|;8mr!laq`dpwVS_{Nt`CHUy49WB+=e)GbM#&4OmG@Xz{y(bsd%_e`dJRn$ zCSE{wMNN@UY%p%P-DQCERFBqGYt*<`^L8$Odg=*vkML633(jP!jr{%t)dH#~P8IGx zPqWC}eV5b$fY4pr*wsU{K#@RoMZx)mgS+PRnrDv9-mxQl$)IG=t zjS&cU%vtF^O8mGifUJUw78IOb(~dcDsz(ckPpbv-9_xD9?-(h0L?yIaHu)RkaxPp@ z^N?7}B5nQc(Wx^T{q+*#P$f?Fz%PoQIwE$hW_>@!0P-0vW1R4&d$if)e_*#&>)>mD zGLDXyy@aHdzh`IrXEan>wp~oWu=OQ2ydpJ_2jwO5|F_H9 znU9qhr+W1Io9ECHby@VO{#Qw|#69XD4~!RJ)uZ9%$ESWF#;LSz(RXK@#DWjNv*q7D z_xOy3IiV7tN>DCr_(wyh2D;P+GJYz-cmY(8*8Z$Nw5TJf-N8>{_DN3IN-KYO#q2%m zgZ}SrYmk!aExNJwVR^SjxYMU8t-sIZ;@YPnnn$$5vESTXoA|3SK2ZK@yxPzhd`_2t zd--BMj=^($r=PipR2Rfeyq+D8XM8o6)qWO(_7lbK2d7U%uLpeei(a0DS7?^!rtC|{ zyH!S1PD@Qf88J7^H`wh*1CJyg+HrR-igW$uZ&;X!dTQ-;-NlbadrOs6=O(zK@`?36 zE505^)z%8jKTXa+InxF!Ui;#Vk0-Q$_{F#!p;f%>y>gW(eDbP@)0uD3XsH zHxU~peSC_ZY2-gBo*awvN1Rl(oB0|Y)N~1#L3!AEkkY*Lv1wTE-HZJ$DW(6C3Ecng z$WR!fK=nnc#3xo;x7_OXOnRzE8}w;e1bI+ z#m{aPvn}Hse%CKlVyFYWIMoBnN4*!GTNFR{@r{)-fz4jqh7@LYN}lG?Y^tZRP`7H; zWk*K5IMsuCpyZc3oAHV0k)d!h>V&t%1SoWs;#7}@=e`SlOXT+tHXELETY_o=o~`~~ z`ku4TZ-4b4PGH+GT4mYN7uB;;XHx1P=@e7oJrawCU-c=D5=l$oa6h=zyz=imr zy#r;UQ8oUeX${1vE(tdahletrrVUgNC>wloQF02GeajE*8~$o>ki@x5kUm@f1L~%Z z9i`weG1QC7fn9%)UdSaYMN`K12NS=4Qdwc_TkqWS=AqHVApa=I2~cU}=WHJrQeCzA z{~A?;iC?g)szAXAT|S{>>u%C}hV%cm^x5i9B+g;R_Wu{EK|eCIpR7N*cibWDdv@#u zG~p~xKe2E688b_q`EB_CPiu4h@n`DMycBEvwy3s~A}qecw8C`)^#M^2W z_D@D@&$Uh(@gNuu^z=mrgbYxQ|3F>V z?nsI!?)7l&6g}Ie+0-}xUhcVl6HhQ&>DbC&yZgfYcQP)F?FT3a4BbN^!$Ox&%==}l zY1=xCJW4PBjHZi-fbsza0DYq1%Ek-65Lrv-uaVCUB@O9#ZWEqO{-6U3Ui<6|Bzx2G z{#*oNvf(p}HmHT2dq}PafWYNyA@IVdB3OIiIpyjkReBJku0C5h?`{C&xm;}N+y9z) ze%v(QX6wIY4gNIE@d~5<{Dr#E4_{EdeC^hov@J+&d+Jsnw>!9l+0TOtD_rs5hbtR4 zEONtdm-!4%Sd@w{bjqv@4|c{nHpzV-?|z4T)=%?4f+CQuZ-sK3lfl^Yy;Aw<<4HLE zowDNf&U^9Z=U+5E-X@?no!dp&^@~Blw@br|hn; zR&Vhd^_oGe=ceK}{eqltVSn6VNa>ER+$`LGtCRcZTqopkSnizRvD+weYvreN4wupS zP1~lP^o~To+N!VA-=BqQO?o`+=gdb3&d&8s91{C4neh7Wj_8Cg@y{-wPk7kR8annr z9MK6~8)z7)F!(7|5T4$fb9RndE6R$ZN*iN1{3a^`P#2&w&?Smgicci9>qhjg@L=3L z4$;zw;NR-VEqNc$JEigdT$`ot5{DQ34#P@iCbjuodcEkT#84P?3r@&{D3K!Eb5!!Lk>0VRR*ph5}PR+L^XzNSE}tvW-;%QkI;V3R-a z;I-5BYbYxg9dB_S3B(ZmvEj7_3||Rw{C;?@T19N z^ygUmi$CN-2G6>u5G`?W2t>ywzc3uPj`1rNC=d+gH2oDGaXQq^J#V3^8@Uz|6uMfC zR1l~PPy$*pqdj`Rc=MiUUU>h`ja2#rKOcb?^c|=it^Fu3z4?X6+cbX2ulxvS7#cW>1)KrVc)MZTvw-DkV;3X!&;<@#70`S)AsiVQi;N83d5`l7bIczA!AOw z@0yLHIu<-GOMHTz+Dy8eAN&&Qtd5MZ4~fBv`T;%LpV^4yw#sGsv`)q9-WIzyUU$Xj zw9bm+4byDepCx@5hekGP_3CxZy|p)-B!(8q4_p50#&VX;a<~77UF%gwYQmPjsJxli zasVX*Y60~{xjG&Jxx`wfCo?#gqDd>-3yza60xx_L0Yy=T7ebd$$k`8aEH#gopvF*s zK&^lxO%TI7**v~uUVN32T1c1wrZtPA=%sIe*y7A5xtat2Y=Q-wGfwaUB?M|oYrh#S zQ)JMiH?B$J=SPy~>E$A?+ei(>sTK5tRB-Ex(n2D(W|-xEB7!_i$2;jX;@QeC{q2k= zx9^5a95D>hvEkPfwqIx0Js@WQKb-D+Tc-itc`zYRqWu7#Eqw?F z?M>vDzW%ux_gik+vxTw!{e{}ltwgo^s+NR~-;djFSk2|?SmIA5y&r3)TVu}XdGi#i z%<=oVfemMFU%+cW-Eq40E*GhfeR4;^APpyQ9LK*+`-wYteXwPkZyY{tb!Wrm1F4Ao z5jJpo=S1|bxS@4bax%h6XCCzDJ;ciOJ>{p4Do5WBoKWxA)*pSixAM&-{StJb~*(y=7?yc6R^Fl?&0@ng!MjEEbIT>%@ux9pw)o7C?~O=X}Q zDEYa)i5Q-@2B$96#g9>y+0ut%gYp*nrSD2M7sbCn?Z1U_DESMO(Wvo@A@rUPQyKbI z@_Js7N1V!lyp-E}PTx2l*E6-1(y`{>aRu=~djDD+OP0a zpzi{|FpJUvav?NyK?K8_)_RFk8K5Sjboj)9d76Vplmw8|==j%7pArxrs0=Nba9aa< zjD;N&f3IQG{AW#yz{J#ud z66WXH+&{6>pPU5%fy>os>_6D#uN$DT?a*>>2`e>3$A;$?_t&c470P(n2~3m$m4JSX zil2{e))F~8uPrf@ze_?ri_%qYlpiPk`d%ap;li0x}t^6zBPoB5O z{yd}nZ231Fx*~P9&#`7tc#7lkZ7 zdMX1}8bK2jock2M_;T)uPPfS|pgRJW+q6RqdMFseY3Wyo1lRF0gGMHwjBz3xZ<-L| zmm`5A_=BzsaPW01dP`m0QITPCHOYa|D!`_SISP@Ny;IGa{Z-FTOH^K!#C#;3xK&0Z zx9~UPz~rYq7n{UOUj3cOKfac^3lM~3lfS-|^}<~q){F*kHvEM+>N~&9U8yflp-~2G za50N$-6pGM$%sqjFc4Vaa>>T7Se(j0d!Rze_FearmtGK4(0k)~@=t3Ui3HII{@COf zg<_@>Y3y&vK5))VJFQcl7hf9+`h0wsVPo2zq?j%crHFZIm5Gj`Vq8zQdG)a z+g+%>U(w)-=_&~{A7hd7$5}7Bj*3M60$WYG`#cnxKb_exxat)u zC>=58{*F}ie*MoAy-s43ar41x^(paK->c~4hZoO~YDt%NwfxWj5-QWYwL9>2AnFqM zgyZ~_B3WS4$r;q30mo81)^hAuRl6@QnpD-yTL z$V0&hT|RMtZjgt=4-YaF#ty-Aonwqth6d;RRps#Ns#|t_OkK|$Ovj^R2*RF1C@cVd(cNJUE#SbDZ8{Do$`()5eOo5o2(jO&G}DO5tD#c=);$z z?n7o^@{cu5+Hu}1H z1o6nw2DNXsGr9c}YqXjZko)DkFJ3XA`;t4kZpfNfF~(H40&%i(*6rPrg=8&_bIOuy z(JfsU)g!~AP*Co;UDNd3QO7-ri!%-ep&M?k-&Is<;gPBRI&MRm=wSOFTdU;q@#~=* zlhj)kV>G+NqRC&v@%zA;Sp{7)5vtFBVzMs><@&bMIbrbx9qeizmdbyF^sei*xRmw= z3yS057Cm{rAqThUJ5rTA`n1gnn^F3!sDJfW3x4hjeq01U8>pYft@X78PD}kY0;h%k zTFRLZm7~9&@-5TX75;9fe)W9>Kibq!Qy9Sxon`tP|9FN4&-BhOo~D19`j3isr@)z} zA4@s<3Bv#NcIoY2%sFeLxA&Zlrp>6uEf%++JmKeOx1(MQ{{(qi+(PhHkb~dSJ{EwR zcjTiAWJ~B$%aW63#5z*TQmtuy21R%pyGCFfG?j!HG6VI=LHbo6A# z-u_B(x6L1_o#RD%Kth7pRnIo258;85fS(HGKfdBK(Yftf`L4S>$h&mBY!jYMwYpt5 z&S*urlQ-%3Jn5-b2i0dXoWxj0Y9mgqz)#69JTh)~@%$^FFBC9Nn6b$(C@#Stv=@<2 z`U$V4TDN1|sMC#U1+5G& zuQLcH&(rbN(jWO3xVUds9uY(yqT_W;8s%pzzXL|&@``W9F{i;AO*~EFazCfl~-$`xmEHwE7oLSSqyqd2aoqct$G^TlyyLdmq0Y9K}fO z*zgP1WVl^9?2dn$vn^0U|D~1x-OuQ^sM539=eR9f z(P@aog3FBg{|mLFpYDC{=RBmV!$oG}SKP45mcx5Ce*L05`Wlu=l4-Tj{SuCIjn|#n z>l)sE!>^mto?P^8?3cuEaeREw@K~8nWCOPNRoG?t+Em=8PCMtYvm**Vd?Eev3VXD% zs&s47L~C?#=AmUTT0caSzaIDYFHJ+&!}r&o?~;TLJ0*Ec+EW2QtUInOUx67CAm+#WRx;)XmaRBB$sK!knEJx-^GlrEz02J_?VlnJcV~yAj?a;N{~WIl zukpWx>iu`;0KpC)#zB}no$K!2{ct{U$AO{uPa9l$v(c`X{>rH1D z@&+A`wl?C~x*Ao#-iF&}-z9D7c&9B0yli+*sB7Dnr|&XSHK2B|JQB*2ieES(;6$YP z2Wk09boJ79HS)L2{S(KZZ}w*HRxTPk`5!$cx@yQjP(7H~6CbJ#TlQ95m3fV^t70v` z-w8o}MPBKtswliQBh~5rJASa@$p7|>96m8Ce|OFM&;YU=8miz0YMKxOst1&bR)4|? z@gqZ4^e|EjAYan))lEk}0!4(0Jz6l~>H7RH2j?8!8b}VNz2MYaAd!6R6QZ3wPJ~^b z=|vu=7a*3N>Q;Ul(epDIMhd=r8~>D#BKRMu9#A&WL+ZWL_MUHuo`&RcPdN{g2LOS~ z-zl~J@g9YfGgjUquha2(3j!}&`w5Kk_e?k&DzO6#@V$Qj16D*@{R#E&@T#r-CQwIS zLOT})AKoaxIMsvxhzgf{`A9W!bWirxw1oT3)<2MwUi|-!74_Em*o7lgt}t4`*~(9p zzP~zC`hTGs^mBl=90vUDaC3~_2m+spdiPI&r~%ZL5WT}MVHEgwW{-$!qH z_&#Z4?TAcOkkOo~1XO0VIsc2vQ$(&Ib(54bkafwz&p*38#W!QRcPgsOL545;&-ab{ z^e>r!Yo3+pPqxn|?!HkP+Hr34C*Vp>d{SBO6G+aa=@h57X)Ocvh(H~{Pc>P-`Rroi zQ1_W5-Zg}fPiQ*LIc$zVr65}9vcONhmnUERAPmFCZJ!+yK=!4#ee3;=s?Vl68f*8= z?vYO&fkwxp{f+6f;io9%J{=kuEH!nYB}d--frw+ z(d?mHrlS9n#bIHL6JXG#!VnDgK*@*W@*WU{pEoV;Yv4idqn97Q>0%S096()Z^@lGm z`ABql8hq;bTxaqpWLV&01F7pTGX1#PQF|xy2OaNZfWXUEeh`i>KE3M(>Hrh~D1->d zC#p8{j&(~4khlX5;$us{X5ebGADse70}w>u;tdGAY3<^m;wlYYltX1#%J{&s|WD2g(7|jTWs|Z~P;^;hRXO?XKR9 z{)StsQ|95r*{2!39BlRHX#cMC`Xz716ONM9L7i3rN`(=p!aA%ToN6mYJs=ue{tyrP zi}+b~>Wp7}Z8m70IU{wE^b*nAf4txe@i9d(&NeGt>VzrC^BXS__4%FK#QP{)H*G~9 zse9A_&sP4)W~z^3Y(1qG@StAU@Z6KnfA+Q5#Hjxyy+kzqV=zWVz*ku2nm6AkiSZsH zw){DLy1bfubRJ{=Z1^*q%BS~O8pi15`wexYdx^$c{4}w&*^RqB{LGzlVgsr!aI@2~ zb;3W#?24GyEDF#0w)H~Z*a&=U_p_GvzNI)|+UMiF&buHt3+<_shCD?*lY5Lz%sh!a z9KGM=PW3~#Cv&sAxE)8qhAj)fh95-_4<7IQ^wv`}_DAiZzF#k+A3J6n>z|B89%0+_ z^7cMMuDj+foU}V0@2z-QP`u|7?#es*qfPtEcir$9FXV%qZV!;=|hm9|C@Pc=C0mI~PV&VkX)V-|$b4t6- zHG8=g9XKp^_3MNp%xjWt=Xm{3`379)?RTj0B)z2A`H0(J+r|;quVKi6$%Y|Bg<2mu zF9@5~f|8jk(q1CK0Ry9XXCMhd)0I_(R!*3WK6RLXBW(ITRfDrHG*;5EZb0bZ3I}pjJs@d?4 z#WBWx--IxJ?vo8Ku7`V!vwYd`S&UTiH>ReuaQ}jxn?az%O#7>@)n1RgZNXQ^-kb7v z>p{F;UvbMWbO4XLKo+f=c^pTL$=<;)I*Yv~m^m-)djauPB8e596VS;6!xrnArl64N zr6YBH9^w4#1YA^Fjm?bw2NOg1sN4R@(aEo`pjMY1UX-m%M`8Q2wd6Zp#|xKrITjI@ zg%V1OtuJdn!=@vH4s;9u--iO~RX;_^VFB^Y+I zol8H0M{+?&i&AdbO9C1dW(TyPuX^;$TfW1r<-N}CBP&IWJ! z{GcUqa;s;ZW1OY1b^S$-EAQ0&@ReGNhWy#^eMeSy-u%jrap+2_{_&8oC=?LNa`8Ww zDK-9^LI>xyu}~fnMe=DC!})vyK{jmi@9DXBQLp=cQWwPo9!wReQ23n+RZpAd5uUTw zx0!Q2h+IK?!QpDx&+Fw4UZE(-6jOl9IE07{?S|;Ig`cIw%-< zf85;!M3YNcBqaPt`1fDepX4kb#xo_FXfGEwzA&JplDpJBL?Yp3kl*Ow&#&6oT^l{i zUE!%S_<^sIR^j$zNCi!q>f7TKS5LZv4ee`ltySW1YOk9KWL6AT z^%>*sc{&AiZnci8O!q-qad*^w50v8KGbfHjm_ER|)vaI8qxLk^wvF=Rm*%34o>O~y zx4MZwAL#fg{wj6BSpE?6N1oXj4gP56@**2AnBw{6%H+G~+_buNUe_~GlV^&P22U+S z8poFQiwk@Ehb#~@?t~6d2yxdI5a;Ervp2t&47X?8jgMb4+=N1Q?s4}J!f1JBfZ9ccA0cr&V!zc*&EW|e0M;|U2# z>M&TM{6Vu0zSD>dYxLfVccIVo_g?p9bda*uU-8qC8ngL6jC(62sRON=MYpSBqm>Tn zX{Q_Q;MB1m*ZTSa`$n$~t18G{-(uv<=%5Db049)TqPYnXUx`gq!%x`FPau`)?ZTZ| z-~S}3gBm>^DI~dxf>t*uSJtssx9`pq)5YVWjaWW@_*yj+P++t9aRQ~}g#nyhi z2P(&Ovrg#^uoy=aI}LSqj0qg?tCUxay(9i1;Oy6W7_S-Gb&%ujotRq5N#}bDwRRTR!k#dc8Pv81?sWye9M$I!o5x(v2nLQF8y_M8{6M z@qo4JquYj0$5qZ6chAj^#J8(@bmGVJaBJmt;p>Pp-$J=E0G z=#zFA1-srHeXiX#>^fl4sf~vBux`g0Hk01mz?+gTHMzf~41ZGi>KM!m!^4c0E-M;B z#e4p8f#Ez?ls2wr(k_hArpTDb?z5xO-DTqh3d2LttZ$R+n!HR!n(~heOs%eB3r*9s z+yhmpThzX9{z^rt%jnjwO>6{6t7^mIpIW!kso;BCN18rE$7j^3;fN%ZaAIkA((@~b zt2FOQ=zX6*gj)Tx+jW6b4UkZc`D$ zAg0(x8=ZL(M*i9_8}5N&D}Rn_rvY5QAjY+GHhhIAr+G-P5JqbD8)_vwp|Lh%;WO7M z(PTS%`A0PDJz~p$WAELm%VtHB-)I%f5u8L&O>Fqy!?x87zvav56_KP?kS-N+{m4+F zc5+?By&jQ_<(H&ZG`yb6_49=MOsm(%LDJvgf&ME=t>AYm9ju%7fY04OzSE_JS60M91B{oE$je!K3Y1v|djj-HF7FE|Pk+8f}d zpUODq+hXuU!OFe?C<)6Yn&kGm9FH}h1br>-Q;Kg!UUjNYh(hb%#x}WRlZuLW?LC!Y zbrY|uw0W#PX*-&y>b@x>GZ~NQ*zWUZ#Y}weW!M0fCei53*W~d_1|2}%EESv!r|(AN z_go!*y)px-o+o|;EV+kNKgq9BQ!GUVH^(MMcP~V_MIIA8dI^wmv3lvdR(Fx&^qpRX z3f0K`pn2u_Dp4(j9wCE3^GXLxZ3Q!rK z1XLG7ZGUt`=8Cfw22mIF%X-*Y5&E7e6cEc}*E9=1!XuCWqRN>Um}o;$(7pC2|Ay^4 z%LQsYaz7nk!|WvjZwr;ZVCTTE{E%fv!)s0iOPN9tlWi#T2Bd|zvvihHC9>it^xTLK zv0T!dE_d3`8PLs+1TXY@xgkO-{crFjg~N{_6U+$ex=Ua zwwVW#=Ro%ffz%h)053^p;CCuioqO#8KY4rY>_OIPQVSFiPm;=vgBPMuKxmZg3-+2G z$v8`9EB_L?3%&i5yc+GG=gjW*GO9OXq;ir}hTcpOMz4sViDp0FAn92?6q8MUh)1vf ze`1Oi=X^_=YIcNifdQxtP!ihs5nUAD@$RAMAya%B-T&F-2Py+zDw_T=j*01l?t4fSq1-{PATKWBdJI*FVU~)P6u^fRc;?fG89YoP;}hRkx@A zzgGU=Pzm}86y;_^E*y2;gx5}4-0H%sU3g_s$)kvGmUw=T(6jSlkW=6ObqskY)x!u+a5Hj2R`sM1!i066gVzgY@ zA3`1e*#Z-&KyY-5dobl7{xHd_ay~nC02BD@Y*j#%kujZ<9!D^mKA;M;**^>(XZ%$S z_1-r0bBbpK;}vqCs%a_*@S?-F$E%CN=Nd(jiy@-$?bHV9!KOOeq;*+^YywFhq$&#a z5j?Wt^R~D5`Ti`DyZ{9i0zW&-j2GpD`Z8|o162A8su+Z zH110hadlda-bf;f(e$vjAJin&m-t!sS+soS#QPg#JQ?MOJuP55AwMc~)=IugSbjH3 z)5=OEe`#0l{CY1QP<}AMv~eI<)F8VofUNO4!!OAeHy;abjZ0)brB6-Xn zXCFrD%ZAUj4)JpB874)2p`L*{fZnCaueb5z52E;#x$7Dy%yC5fe=Aql4mnBxMO+{xWn@-&I(-9{;2}tmc zx`z<|TR}SK5!!Mg|NZ)a>nQ6(Qu53XG1#Wrh>ME@GH}?4gC={TlF)^{#(t-c7=#+xesN3Gz*2gQe= zi-L-VP(1Vn1hKtr-K9ygFED}1phD5!n@vM+MWhUkBDd42O-KW^0mcE;il$m>+wUwS z;@o^9>@)(U?&ScvAz2y@M8J-3OZgR_mglCZ`H_zBt1#rszIUglYT>xGqkYN4zu@%{ zJhG|Q@|Q)b-C}$hRVz?Zph!?IDt*;MSzihBuD#FoSr#wl;D>l@`E#1OzEvLjukQt5 zT)PHJ2GnLSRE#L3t`I!XBG2e&6j=ZbE&M&i35i}){{STfiuCIq4*{{rAm4WYb>gNH z3M>S2t?Q`;Tlu4YPVO?yJCYRMidg?AIDp{)Z1^IjpnzRbJSo!#c$ks^MS}LE-iyBM zJ^X!2g04vn`I4T0uI%#9whai_mJ?0hrQ_wSWR_oHfLZXO;CK@DMu_E}T8{^61=Nn# zOwQL89`aX@QyVa|RU|n?4E*a~=Dj;K=bvcr#pu@?GVLG^8(thQgmL^x zQY+AB6s~q}8^e@ft{s4zX|ovDu1Hw z&W%<7c7bg>WBDbi6>a>8woRkZ4^w?s{ny_-Yp|2z`fZq6$w;l1SZOJ6o2*5h3eV&h zn{L45^Alu>MkFrF-~QS9R0#fgW0tp|gFo)>_vy`or`cG+swBGO=Ul9x+RtqYby(GT zM!@het^g%1*Nn0Gupg0qetx(X=Zmf{pWC5!<~g*_=H30DPMN4n#$xnF5)9$yCYTccT!x{Mq_wRXDCn5}~4wL_}T=g09(cGsU zxV#eKHvFLG?aTiVYW2_d?tqDa*^{^n2?&cA3p-N*yVnY48gqPgT+V2>{^Uq<4o&B9dNnNgz|Ww1AQY;W9*cS+V)WX@r+P<{Qz4=- zgW7+vcVg+8WBcZYO;h8svwm5Kt5vUeD`R9>L4Kd)e|lpPWv6!Qm{fH_bq;V8N@Q z{W!HJhyNg^P2`o1N>3!m{DPlXk7q0YD#wo9{EXwIK0E-)vEj9fBQw`sy)4y!fCowj z3tsf`vrY2@;iH`P{8hJWjo!Of>z;7d*XUfMy&mc_4KJm1Ol6cGs1;Bo(1%p{wAOmP zA(Fc5dCY5*%;@!Dt3RkG=r7{u25JD_F3|g<03ZqlM7JK@D~vW-HQMV4$uZ?GoBU`x z6aF{UhQ4>_w2npUBKLLpKnJrscde}Ov6kwU6dV);tw=aNGeJ~DUala zPiT1ys|`BVfBoD-#LYWAslV+}oE@;*^~4ha+HvE&$%FyLm{YteC}-CTjKT)DZfAFm zx{lWQK+w9|n2Sx7AFqqT!s7q$LjevmFl9%fi-1a`QGr~4{aU_NVaBMtyQHZMbWt!J zpaS8UdM`31YD!Hf8S49xz5o!0iVe5F!qEL2D#IQ0>LUNqqbF8x4)~DHbUbHcy($IO z1#&}rRCLic*x6O44>okVK<=mG@j3*LvJIt%dJ^LrQ~?`4zv4#Dv)J>bC1fZJxefVC zQW+=*6)X2l(>Hwi`x}Ywdwd)1K%YH9caTZ`*+x6ixh0QRzdN^vQI!If0V)CgfRZ1p zJ$}L0>}N=pTR2Hwlnwn3C=gH*TK^Gy8IIftA2Rfa6cqwITm4TT)N{Dp;+>7&yQ9Z{ zG2C(CEXMM~6aXm6Fk$=F{|N|GG|BeJ19K@;Cae5ht$sjz*a30A0wUbappDug=G%{f z0)e*&wC|EwTB^thzf%QRHCAP#AJ&)Y~7n{I{4r*r2>R zk{m?Gb7mlTWW$T&F`p0=a+%Y0&BWbf`+F*sz6p$e7o5hVAw@s!_oIUH$`dWlhu}$V zcoS_hBk`_`SxQb_LvZdDvw=q{OL0`q{hEnH5T4VuO+mrhaW*lU!;$2CP>e^nAUy}1F!V6I-AVM5elV@EFCT-~TC)eIUn>#D*B3;hxdt^ zgG+zz-jMzuPyd3QjeG9eB z8>u-13@29k=Y8r`m#o`p&Z^b#N|Jb#ziz z`Muu{RrOXsEj4F=eAvp*U8zwr?X(`_`2CIhjK1kAuWN_UWusT4z48xsSsQba-r~ax z5}zExMQhDH1bTsZl1E+n#DWWyTk4yUiF+0rupF$ZTSU;+CjLo)9_6NLmwiIBs29(!^`lF z7$?=eE*V(k?yVyg-}3Obr`umX?OBW`?tbk%s8tT;ZrQFC`Y91Fn4$Jz!mx{Yx5Y_* z*Xa-c=T7@;)79U()5bxc6NLg|+Pl0Prw)Y6d=!qxfJD@4@}xKcRthB%&E{}84J&&< zrT&ImamVhtOBjbI6b^HVAWzd0=7bzZASNUWCK*ENJ$hn_iLqWAd%8}crsFUcg}_|n zdezQ0v@ERdB~`QxAX6bjVdx{fY1T1$HSb$$IH^U)bCx4`1XTztgOZ;4-n#2ItW=I7 z?dkG!8>k7J{AGSvKC3y8TtLTT)B60`@E%)V%&gcLOHKuWMd6})yd<@P_N1cK?#Zbp zqGrxp)Om3rDf|{~{U6V=zW#qht&ERytNC4=EBo|*?@tEM@yHTET5R%Xgf@*{(c=tx z005$Jz07$1ed={<+XXXHYe{NF?|+()E)r+DyBvz{k|<@`KpZytAs)T>|HPegXqZ=f z-Ovk+?nIzgV4}e-D9%?vczy2^{c1^yRR0BeB&ijQM+#s2tlMW|mBHm+0h)1Aw_*TZ zl3Ia0kpGog^E>ycijgD^OCPNVM|j z_L=g9@Tsbpe|ATFqc?qp5qq~ETEP4d? z>6Kpx^R>?CRd@HodF4k?rwo*8u98$|lYz4iaAJq2K1ArTht7r__mE15*jD@Po}w`x-W8p> zrI>>s&CM~tfwGTvULS4xziYMs?pkdySp%SNibCpI=V8t>J@jPvqR~vf=mMLswT`n! zP)8pnFK~jP3-N*KNv=FS_kP)Nz)~J*M$oqivUf_J|}+RhcRwKlcajIYI2@3qd6MUwYh#F zWvYHP;cARjkLE@2=^6csD4S%v+Hmz*siR}if7r_3dE1q`@7qr`dK1ve;wn3>d8huR zs`p{^q5!pml|x$nDYki9No;L?IP{~c^aV;#e(3K&nP41H{YSB2^eZAMqC`Ps)+s4d z4R~l5plr1IH$JEuNvypTGqy@#F15cK`YBueLp;b&{4Bevo@98c?%A@{|FZs>y@;^l z3sjd@{}WYPzb8}&j9>S8zps?3h4M&JJt!{~%Jtpk$48khBSz?&{l`>?c@yHlZNJ}8 z4f?GI{fuo^NA&o#e`62dLyR90O&@s)yXK-2alPtqSyu12KLf^Zt=| zw_pf>3Ze}iXT19z;rcnhg*`V?W-k$>CmcF=#?=yI$Jc)9XdOn%(-P(wGcUl=RG*&y z*ssKd&r8neYh5B^fnx~ArccAQ(vU3RB#(BT+`4z%&|Md$ZXEzQ*@n*HDbpgHn_na? z==tO0NGOKzj}7lT&}eSp$}pL!1C)bG+WCds55jVn_3?4r;u#n4*z)H(&HnU7|4^e< z@0m_1=GR?fq)fl4@YFw&)B%2{LgV}CMZ})&uazf-#E}NH7aY!NM(RL|rd8PQ8ZoPL zj=6h&9OK=eY~{Dr)KGly5KGRbCCtIHFTfOcJmazdP8g1YdaT@zuPr;- zcfv_7{t|3zn&F&`Ox`w6<#o$K3%2jvJ*-t3QoL0$r^YxLomP2u_1(c(TpQ%#azyzO zcF%aN!I_wV)vM3X^Kwc-lUiJS{iwrTEO<9X%Xc321(IIO#`kUvM!Ym{vw+7_R4NXwFka_sX)m$$luviRS2C}upu7VY!$wfB@`AN^&cPW#`( zxU_&jCMzGyRT-2`X>|>AJkYaPi+#wry2<7KhI!ayRr}EqAr*hf0$k(vHqlfYkJI%7 zf5Tiy{Z}`oy)w{cVO7q;J6zW);G2LNG+nD?{OOp}HEC=Sx>mHKvX zL=btDj>mfSsu@%n;J~3Oj!vDLAHDX=z@t3{f&D~#eg72 zJWwg1R2KVmsTSg93IB+#Qm+_F(+1ey0^$fcdP z$g$#t#Q$>VA$Vk~e~1U=6+bsnBUreD1whb;R6Nn%EOph^=^c7zH2P@hZM`E3{BG%_ z8VDi`xf%!_Ws|@7-%_iW!}qNtj<3e!?yS{6dOrlaYmK?6c03Bdm~6RO)h-r0tjyXa zH#!F2KC^H~e0B*gk@FZ<@;n~xyc4f_RkaXtu4fc2<(@{`$5oqcRtP}_TU|cp4T!^B zpEoO7wFtn=ziS$eOf14@_LY6>t8^W4-yVCR(8>`t8F1qJ6I6)8mfV>+N6P~7tln9@ zA9)mI-kUV?#O34YG>=m4n-8P?KUWkdW@Mtxr{{HcoSly2u7p=js7u3>A3fb#eY*mO zmLGk3rS>vb@7(3E{Hm|Gs=1NIJ~9-aCJvR}aP`MNHI-SMiB*3HwNhaCgt$WeC&d45 zZsUjCU_6LZgcy>0}lNz>UJm4*c?(0DLg0;(n)03j=) z_&;`=H{Ti`O8$U|!ZA_XpxPl>Pzmr%#X>&sI-z@3OGo8?j3l*a33IrI>P-XN&@xsS zuJ9}*l3YZ`8y`Yc`l3HJJV(oHfli?jqiF_J0($~z#Z(+4ctbQ%$}3e+4S-ozec!DmBw>C&pD_Z*_rMs<>riRg(pR4$cUyPBY zR?v=AzM@+zc89#fv90)0UMDCHTlu@r%5W^TN|oBS{Ey1dxy@*renYKtQ?BL{3OSz= zSM(13&lZf14V@NmHp`XKYxx^$#qGBE15uOX@Z`Ye%To0R^)E@SXzka_p+havG*k}_ z-V;qCsBlrJRX_fKih$P&(xsw_yxtES-Vb#65J66(^gLsb37`JZb-vxd|Ej;*~X$R5neZK`UW8~+-mS0?kUviI@zMHb{z6} zGU9W!3mLDkY!Fh^OHpl9< zR}r8Uvu0^26;~jNbf6~#}A$O6mdj7wED@5bGXv3gyi9!MK zMXx&C-c5F@M^hDe+T13H7oyU4(WOXIm!<`ZYhUd&lsp6}3yBpavDT16x#|xYh zhzUOfj;0btUYPugleYSdK1aR-fH2fElG#-3w8Wf#M?J~cbiBY2!6Tch;e1@i=j+a3 zRCSV6PtptN?&qjACOL|nP2+$Zm18o?ztYJtcYj_KsY1si2btyfbf0`7ASjUWT5n0J z2j!vaM|8rh?o?>IE_;*6K~Q0$a0Y@$Nva3GQ=#$X!S{)Zr_)F8n0cwur}{T6y5_$! z#GbMK+1k&;Vqb@;gF+aonk3Z&(=RTEfatrnt_OBblG?@${gf?#`P6=Lw_e0E-d)Ou z7t4{$NL7LAf!wtA6W!ftpzbi~?2Zspoz^fMjwXUfNva3qk$OLIOHL_~t#W5ai>dC6 zw_-?AJxG@db$Z^8;OCBS*~c->iqQ+mR(|eYtHf?obQ?{zxM~3^rrOIJ-G9X8VP4>Z z?e9RDXyvaZ0`Kr`i3wj@l>{*EO$6^JP&Q~!aXJFRxWYoAPqD95{mI(@&S>@ScD0Br zMfE^Vw)%&7ke~RufvN&U1TPv*O^J>%oxgLadU35!qiK=WIS-d=P-! zcI$a~ai`KZ0AOMtT6IuWZ28aJSIIl8Cw=%@R{6#8n75t&hFXc_pMJcP zamJJgvKus%FhQ+`y?>I_s$mw(F&6F}!f$)>O`1lKt?78h20UB&rzPseucI#CXhFw! zYoO+Ac%2sCpO|@iFq&pbYDMdRxMk`)!mruUx$W&E8ofX&ckGntP*R{swDRkjCsgtA z)^3+0%$cwCXOo{7&>?laUN8ycL|i>X5L6Qze!kW67e1~R+N+OqBAQ?0T{cwR1-~`Z zYV~7mJf1yuXrIcM8z{ej)Whz{*U;B(8V9V*@1b3fb6yf=VMs5>>(k1%f65nK8h7nD zs1k9P5fCxKe$7gir1v(#(1mp=Q28{~i41OZL_5B;)p$9TM^2}!?#~UXABGxG2U;}a zWlFF39%~}{Jy+o|Ui%I4Azx5MAb+S^o0o%Y&XL<;tO!GH!?tC%q1R>noPEaoJjl&- zJZBYxM>f32?78c%*ho9D0S{CGr~~{?g?i(fj7B!mZRC>MMUi)*qJ$Cbo$9F*TmC;x z+t$wb%p-H@c&-x?CL{b~!;2!R4wHESP847KkKj(1?*z`UK7Vh+4k^_O{RdPc+|@aP zmH@Q|M{vAhylq6KVlngO2Acm3jQ8^RtQ7E2N|CBZnquoPL*kHbjbgLVK{~uR26Od7l}x{$s<71cV;+uQptGqh~;<9K1>bQL_p1yi? zvyQtQ(E6M4R_3GZ5l3!kXpHq!g#6a%gcRII=yQJHm}QTUk8-@-lekhO$a;J2&}-^s z+DC&_*ZT!%P5TuuyiUa6mzv~=T{fjy=jp&ig{LK0_viE7^9=+zAuM6`_~`=7S=!S_ z&qaVs^+pC(Esey=c28cnJrsa3*}d#(Xe@3W*Zy_2o#FeT8%sZjIL9g1Skou{G5(lD@Hd}IoEQJenbAQFR0b#s^al#h zUH`C>pf2??i0p3vA5(z_Jt1!Q1}Y&*WoYt?78t|G4qwfh*)uLMv9V^r%b-c3_jq0Ib zzIMt3#QoWPxzh47q&rf5j>YXf6s~LVbW*n|sE>DXix+dWk>N(c*^0Dn_JoV6EX326ZSZ!^>w(dU1VT7HX?-!?)++Yv|#9Z8OfHeB`X+ zH|~vnK2kZ+Cce}De3bH5(L3o{G77rS>9G2K2og-|@%j73nObHElJ$BBYd`SPDyQ3Zt@`7m|PLD^;5<89#PjG$iPkPgT$D8W$Y(p!*)x75O zdij&*>3E+_2p-w+qIg^Zqk|1p37AHhHHgy}5c|?PSm!PGVf;80TmC&V?hsGT`ZW3k z!rf0w-*q*erAz~qA50^d7-&!Gz202wZ~T&oq!M4d^Nh0;FsVSzpdBf^&c}oA34wkq zxA?nz85daC%FpSgF(mQff<~KGJl~i5&|oPe9!w*cxPeeHqEJAXcKNofb*j76-US#A zZ1VTvtDJXrbdx&40okzOaf_~H&zib1$`2+TX1S0q6)N@$sNl~at7`+xc;o?E!*T@2 z5yS)22(x8c`_-Nv7Ek2fe`VR$>>>%XNil9x{rCg#08kSeo;UkS4B>4$c5OLzf(zn_ z@nh@pY~`=Kvjbf^dW?$4?LnmRYv*tyUwS(HpY2^> zlONsjup6MglaX3LyTIND$zv92b4;qbmhi|;^!D#{yuSW{%EKNCpm^eX5)c_TH*KwO z=8@y+c%DN&o=yHe3PZVqcZA=Uo?&%Di+gI{d@Oe|PC zrgeaRjMNpB&>z_HR}EbB@k=p}8~_=LLK49v8{X?%r>nkeV;J@CZ>Sl4o9*-5^lJ~! z8zb#;8*fKV^}(h0AD^$f?2IqXd+xq3_5f0P)YHJs!4PRYytL0^Ng2`$f2yIgB?dc{ zMMN1+O2pfSdh33-@kX6Z?3-LW8;=!rp76}%ORzz&&W_bf9^qp5n1UwH^KqM_KYAbK zMd1n6_|%HPa6H0(zf$6gLhRbZRVVO^6Eca8@p?H81e(dO>6HX83N#JsaXCWA9Ee&)YAUVcgmmK%3&_QD8PRJOA0CiI!%_lN8S>CE5->D=H2p?DH>*bNE_7g zL)R5iS>fa=I(~D*HeL7`a1c_t7^nAZjZQZWeCxP7oLmGEh51ixz_Sgt;A7`jHCY@^ zE}-L8W%nX*G!!;;G+WPTY9*;2P-aR>y_pK%iL4eCt!_R}kQ(aHZfx>5jkGLPQ1OwP zg$k=d_{WA{QE?zUaDNozEK-u{L3ya8McXvL9KK|wu`xjE3Pe%n)IX9`k5_O|T~PChq&x>Ho~sN>V+bPU7+jh&P8eC#|00+vwYl@4X?PpO*8IGBr?t zZ0+Z#^hC{}y$@smW5bK`$?eIwp!XY7)49P5VE?M`vJ-LDWAMt*Bin4sHL#NRhcCI8 zO>p8{_2$d;EU@O?uV;*!?!}6?VpeR{dW2VU6WiR^N=Jvsov{6~B@YevdUw{~Q#Q7q z`=R+!uV^&;e*4bTt?r_I1-`9zY2HF9QzrAG=ai!hGq3iHco~SCI%#jI+Yo`|PpcQ{ zwJE@gJcIS$T+9BDS$^ZT0shKH1VjhUvkg`}*;PMHbtzsctRu3cj4iKn!$}9)nC1*( zR{hYWsgN7m{R^Q`#+m)KX{gj%KBzuOmZtI{30`02C&VC|9j`sVUm{09MB!Ur!(IZm zu0JAu@8(Cg7s)!XQ9^v~NhAz}e{A@b>76T00;NB}EJjOzgJ~4J5QPGwssEzftBPl( zw(&su+4BFKS))>&#ym@B!zZj;7dh>xE#oYkP4$m4JbJkAp)_(4y{{(m; z3I)Xc#n0OmTuE;9HZc2zZf8e1gf@B`7}xE9PE6oAM$;#$`f2^&`Q*i~#1tgw)WkZ4 zoIx)aR|~--Tm5UE9eK}RC8g2#R4kjn>0AEFBq=Wgd_iPuXn1}}wd@^cuy zl)o|k^lgAi8BXcGZ?#dV&Jx}DcSCUt!^|V@&&;qqQ4<$z#l-_7eBRovGr;%K-d#U& zGzI(r{9;-jmw{rPzwAidGl6|HpoSXQmReHPAx*2T{dt2 z^z$zA+3Z}a_c9+DPfk0ych^;vpw=S)RF7wf8{xTa?UWuYmBYvTtZlJujSUb%)Vir{CMHPU7s zqP4fB(d!bUT1S$e5K$;tE_HW{Fr@xKRYAVs@Djfj5b1Bz9qxP%C)?BUdiM2rwxN?> z;IP(WR58N>T?&>SIyXbKnSNf5us>By|9} zsL9$ME`x54-=d5U(2IY4Y6LU*y78XK=Ku>VK~^ZzW+&52T8AyYo~+f z&khP81#~=TXFXn$I!I1k97p@E@+{{aGPu`*91=|ojvf*{6 z56oMB;wWSP2g(Ht1T-~+Y{sYaZ?#vQ`Sh7*qp4qrRTJL_j)$eFo2>F*`ub>=)kI-yrQ`?wE4e@b`izn<|NhTfevjGxHaqo@J^>=D{OCYV z`!Yi(#`Ek1<1hLj&PA8X9rA8l!VW5n3fbEuu(tg66OF36{+){2Rf zPoViGMjOLs+(nW3YO@wE&p<{&)l0_@$wM7?s~x*;?2G1nY;Km}ehbAb%5PS0aSw4a z-Zz^)?h&fW3trp$bT;B;k23Gr>pI4-2fxvY%*A-mz+Q!ZiGRofK~IJSf}ZsY1eoGS z(6wQK04NjH)t%P`bVMZ+Hfp8zlimvgIAB0PQD~|{OH-|mKkm)89VyKsNJrWWj;afS zp$@7MOe8#0?*%z;iuwA_+qT;`;v6{;0K$+<)~k9*7UYI!DwfvH!f$+o-XHqdUOXYS z76|gQsfszdeL~mh*^;2j#bQiA;*g!{aT43MbaK&TR285af$Bjyg!0!aR}w}~mLD4W z`JB`?J6Yw=joLLNZj&P;)nY6E^cM40tx=uDD8D4tqqQG8y1FTjD?b-}eSHMuVd39U zJ=N2udBk~nUXM?9rx{HRoBZ-0`q(et?I`7ClC}M~Z>pzu_S(o;eo3ka?Lwt5+6#4R z{Hh!2CK2QsTF2ziPU-8qot!$1=X{`m+J2EsqK@=vQ=+;Vc;c+!g|7zgh| zA~5w2P(AR{(8QE`GORh`1zNqaRWoIzia_;%veCpWj|If(Jd|3N8roLyx0!@NN(S2Ko1sm|Y zwn~}9c-wJQSP%QDOA|2}vu35~w*b6)Vb;}+gI)0ft%KQTTIJy}>zpPijk$vtkN9bJ zJR}cu5_%0>eeN)dIMxFHz=+2+5qDhi=BNG+&&L&@;+G2B_1@$npYA@1)>(IvT;_#7Gy3GB+G6F4 zMwTVW=jpO!QiD16AH0cCQR=sEWKgHLWfm zPcjcC2htMe^lI3)13!bQrkN^L?V<|eN4dd_ie~Wn2!wx5X$dh!uU;~0j zHvALwEz$dXUSiz#Cuyo_rq1V`PCmb9+w=<>u6~S%E7|gY`|inJ+n18KYMKOgvC|TK}Dz6Mlu*z20WYJ%i{*Q)Q47$}4_upc-uQ&vHn~ zOicHY*(*a+v!dInZ7oaE^wna?<#Yki-g^Bb>6IA>0*gWcvA6fb^qoa93r-;<-)`iyr$ZC_lh z%k5``H7qwDj6im1+eVi`I*0f;t$nAe&*2d$`SJ(D+AwW=uWkoYnv#Od?6u@Cd-x)+ z1M&%;doz%>ny=IL&ap^4Bgnba@T(|eLQTionp>DFcs9_cv=FJzn!D#=R2F`$XJEPW z*#qR1I;O=+m9yyLw<#0GEq6odMI(MLym1B@E4|J3wir z1i29+3JYf6fM*+O^KUAL9s3bZYS8hDr;xaQ*iEg1*QhgFUdAxqZYfFipd3{C#xXHn zkmBs^?W=AZG0uwFW z!6Te30?I~fX5M8_RKfj{XdrAxj`p-mn}2G zvAr#4uf3a$$}gIH+$hLHA*DvdsP4H)aj$8(duS1g^=`d6+W0A65*eSi);9?)D?NR+ z>&kc(vDBpH$Mw-TbkqKJx*R_Caj0lFAu1a=j!Jnuv=>I3cg*{-y!I&?oY8sqm2oLZ z(3Tr;{`%8D#1u7dF9@(manBJD)2hsQC!#`SzD-i{u+a+V!>^`Hx2@@7HeA9(( zD4mLrxwvq02w6zS^PG?{8Q~ure#+!-NBUBGIfjU%|Ml-o1do!Yh-OMT_jPX)RZ7F` zB45QxEl_}JV$0uNC&T5TQWWEtWZCc$rH(Z@MbT0V6d*rP7*Of7`sc1mzehZ6;`r8! zA4SdtfH1^b^`-<&5ttNOG_8HH$^7`TgJlkV!lb^bA#3@MeV?nQ<{m;0q9x4X%s}wS zR{ov3(O)J%Wd7V7^j9#2wDQ-cXOs~A7fv|e`F%7AvwCs96J^dH&)e4Q&EqqTrXo&f z_I}XXL*3>H1r~+s_4$Jd1NopoQqe>wKpsE$bT^-wNKXC*zpx$;6yk5Z4m|6Np9q7` zKI@~5BXS%3fZRYS5}nD;VC zdL1OUL37M}t|i6?N#zgaXOn+oldjD^yg$=;DigA)a!lx0Mk*<}7lhV-^~}!~6W?YZ z=8cSxV?68#_5u!zf#*C| z&HRD$Hnd$^p}NIWJuE+UpY@tA`|*`@-+)f%Y_Z!d)8@&Cj^R#wKKKORk3nDLdfIo? zPenD{&hujl8+>-e!m?7&yU6hL%oEo8qLITRmu+DW%$j18k3Fp|r!ByjkZ1o#1!tV1k#pJCyk+ubf5={te|GyP%wE8BiMyD9 z@PAl6{?^~z3j#y0sW>7Bh6qhnaQ0er(6H%w-{f~jkd`zZz^P=WdNe#Q&g%yet~lXL z{-_9Y86Cf%0S`X|4nj&S;{iA8i0z|3UiP{d&iMI1wxKraxo)%M-r?jV$Vgb;a%UtA zgnw-K;tD^D(d$#C9OPoO^ogNy@In*{i2mYputgYT?#y~sDM|Hc)$IH_f`j-ji}!XK8zHs8 zB5V0Kj+xzgqE;BGD9pG1Pxe`_bB{WuvISm@4q_NDP;R>XvrV26JoVMf*M{0Mnp)ZF zzwxtvC^FQS__@JU0@VY`LZf;L3l*xw@tY#~7thwdkMfpUAcg+GR{w>iFET>5oon<|(O!F}$F`dmjN^wbe<%;g zD}HY1zrUe6^xHpk@;4`IbkxFL_c=8++jd}!qoa#g##!QRG2gAZk-PAV-5YClt@h)C z<)!ZZf-;evoyqE90}F85F{&G_%W`o-@97<9dsO3_>-*UboDhdb$|o9^zU3i-dEbgL zWeY)g$y-dz*@T&nHN4?}OD({ELu-rmmd+B0hTR)im36XerY9 z)M@;s)FPDs$!dOjqyX()QQRziKpC2PcA)EnsRCrbWUOwxu~F#TnVP;@6+y`RRY?^Y z=!oS8Yd0;NaRWK;Qi!)1Rr-f`t^V2PR4*IIWC0kOvlfXoyrV3IeE(cc0N*61jGc>0oz)^G>G2| z2qV`^H!OlN<6Z+c`R%H-wj@tamGT-u{%m-0yjYp{%0PQkIMvl--w`Ekc7C>-8Y5*2 zp`9hE%&!X_)D?B}E^s>!jU>TTiRBrG#34JCY~|q?)hf%Y(Nr#T+sU+(CoeWym5b$M zJ^=+32&lqXD3~Y|5UsM_Texpel=_k~M1v^^Pzl=j6SSN9fbdcpXS!)eG~)suP#NfV zV<6$b^3Tj#l}463HTvF_Uj5xln+!a_==GGX{~0qB!U5rK`SV&A(g@f>X!&A6=^_SgWGp~-J->QGJG=PnpNgP$a|*9pdtB$c7z zIft(7BGLmo?dhK!`!C0T}^8-2KfOTz8dU86FvURXan-mK3cel4BKITx|!@;%<| zIaiR+l@8V2!fnxe<9GA-%U2<#i4Tvx3iZGdf$M#n+1^F11*1BAu}DOI^IfkyuPgpT zs8!<@1SJ>rHht}s(tci}wjqnQu{EFlN~5+sj9l^yHDRV!G-{`1vGNPyyUnzBfoT*O z0PZN^*i$@G?`42wL8XAop<;;+_ch<>W}#1gd3XyQuVPz|XH%{HkM;{We>041@e7`d zgklK)*znu04cM1p9Y^YkV#7Z=2>vHYtt4l;OYR-*lhO*41OS8~cQJxTw*1MXeU3Oe z@ueKvvf^Kl9_7l7OeK{7APi}H8zj9%G-`#$&8sEekKMavR|kJGoYpX$T+90Um!wwE zo>VSK$@MN@HLNxF^B_Mml#b^$;MvM=YUy~Zw9c1&LziFC2*D#8UTf>-IKzFuWD)>` zA=--I5vC+Skp{yHQ79m|F$HDva*_YBmx9^L2h<8oG=2QNwfRH@$GHXcDd$P`KWGl1 zWI%ms?N>Yh%3I=Km4ZRQWS-Q$Hh>382o#A{eZ~%|_xQ^nWpue}bFtA^vKhWe9S}X! zi~Lou5eV{OYk$uL8nXfgF^mh6K#74O(fU7UwD~)t`1$8s=Qc@u8KL}a`Ezdg>$K4| zZ}ft&xSY%X`bhK?yQm|p)^W34TwU9$k@BjIrr7sI?uMdyPx0tyr0*vKhA3H zy}oN*Yr3xQ8unTa&I(=<)EdeUC?QZJcuu+K>S{NFGvoWj5hXkSrTi_YcATN3yS6^3 z%t(!!E(kWArC+8rZnMqai!I)pcFDk3L*`#>Lv}(Ldm}dQX4D_Qp?36b$o{)FJRRp{ zh@(}sqrP|KV!QYq{kj~yh!0#!Z+~T{3;vu+O6z%C!2YgB?mC)B;qN|kH}v}$h=yfn zJyLyi1%JPh@cPPPjMg`-?mcwvS#<7r=e9rG%+a}}k6IrMv%!n@uTuIT>y30qdk&dv zSb^MbKI{8pP!X!#mlxh~RW<5Yr<6UL7mAi1S1ofqACK%3&!{R-4MA@&YMTCdc?|h0 zN27^4=}7m{sn-c#pCH1?{aOM=CHY;up7U(TK-qKG^Ow~n$8oFL{`_E8 zf_sI0>|FKv0p{@Mnh}n==v>S0)2*s!;*ocfZRe+bq4XA6R`_Xa2v)m7CdXMWK47{6V53&ziXR2GR^{lYJ^!dh?jD4OctM)J9FU$%X}+| zQ5dL7FfBB>Aqusj^))kb3N01C#`LDFyrM<-5pK%rfs2=otdNO;q*AW(VJ6-yUgEo}Oo+u1fM8N0~$ z8yIya77v(FryX+X2IeUBaF{%NG1_rQdToHtO?>*!L+|BPSFlXmR_YUQFgDCRIH&DO z9!j+C(N|sJA=Z7j<;Ca9GQ4$XM1o1zySO0v0`JIT6_gb*%jS;8HuU^*Rag0mTXEjr zHFu1o4DsZBH}r27WT8)0N8%egN1^NKi8D?Vx*$Wf_Nz`yWuc-8Noymj(s9Gn_VK4m zb8wce+ZTNXA6R$N@h2%r-0N=hzo-J!Of^~Ik%R@A@;h%wH-B#v$RTR-eBx2ZV^@2JhLHDw^9WuT zif{G>)d_M#*93g&&+O8_HiwWQzu?73DzbIOuvudl*1ZlU-Rby_=a49V_F1b1r}xdg zu_l&rLK(^n76fUkFz0l{M_#6>#oQ9hP*MgG6uJn3mrZ^M*HTq-tW8Io#79Alqkf<= zKnb8esC1QM*S#ZV)yz*`ZXHYpfgl2xyNz+yik5%8s^AClsjxizrey$mg^rhL{+(5` zZ!YN7d6z$VnU3e{BJi@6U)2(?cO6r_7*#1y8K5LI`H?CA17VldrPn3DNXES>Z0XNF zn6`G_wIIe>6E=M90f)r7GcPiZ+JSeATyiYvXQ`=QKfkX3PK2O{INqF5cf_Dj>=Z z^O16F=|gdVyduBYl*4QnPzlfnR5+pjluvQ}x z#`gPfIo2jhY+%yz-78BNV{tu)^enlIB&2Q9zS;-OrZ>eUadN(ROyXJeL-mW>K<9jI$J=N`YE|pTY^h z(CU4EXYsZ?SBX(7WQSEX{2h=QAOe&SOnXymaejuay~&4Q@)a~xLB`d@Uzh^50*V5DQTgzR)gyOKZ&({b zroeQn;6<%?K^rI;3@K>T%rHW}fw)$gYGYa(Oor0&CSp8W{q@dk8{bVnh;*UjtIUx| zKK7x3@(G`bS+8$OY&!-#P%EHF5(_HrYwvV03zyjI1nrSc{&!=~rNo@}V5Amoc)!pO zvwmoMGPXaUgg{Yf)vQ_9pMwu2rF1>FEQlNf0t+&ZjS%D?C?`-P&_~pJwf1Uncn56O z_81}UEs=k~v&nC2w6&Fs)#(;5sC83QT27pGVeG#!6a{KWYkzJ#y?+o~j(^RPw}Iqf zdj50vH4cr~Uu)P{er$M!sm|5McZV~cEA<;{L!Tw;-*dw0ghK>A zT2r}q>=bY8I^}@Pa9cmzYs0(QlBxGFXR-X% zPY1KnqcCai?2KGIy$3$H#Q!NWsavbP=x{mWPRUnluTz3H`t7vv&pC{)R+zoaHa5d2 z4o#7E(?5+LDnGfFzupWhM#g;9nwo~E9=<=`>iP||(#mit=W;eW8s#~6o@O5Q8tysh zOQDBlXO8u%3pp+o9C~1ey0~As|N2LX?yDr zp)y?=&fV$Sc2Ot)L-dm;HHrhW^e zoh4Ku)H{?HP!gb&RHDKOZ<)3eZyPN=#yCo9p8Vo=Cd>{;JW6K8bB(++dKMzOC8pv) zqIlR%u_zqDSbnVO3nDO^*OKc3yNb_!(JM`=^+*~qq$r|hGyHb@r>V)GlM+JSs33>W! z1wS5wA5X#0dg^CcTYYtb(^_9c;Iz`$q@2Z2xcXX@PgQ@j@OKmSt1sAMroVyu5r^Rk z-){Wn85TU#JF{$tz8&=+73~gzGebX>a`e-M|Lg74Gg!v4aMasv>A1>KK~PqdC+^Jr zj?`=6pCB*GS_$3?a`3mb_XOa7b{!zFOHdJ1Jc39p6KYXKUJ#^>bqfTnHea(((1SNFavbj}5O<^-*hyVl<;_0FwYJYMdzA zzkm6J+uJW)6IWX>vP`!0Av$O;BL6>7Eq_PAga?%fqa9#-A{d{TSX8#6i)(eK4Je;|?*_A6xpO{J1fy zRv4whXb0Lch0|$$@C^~u^VFA>HL;`ztzbB4D*`W!?qHNi!`F6Hctdc)F7BE*O78!& z?FUA8FxmlEls`VP;NdXEV`HR{wefbkpoKt~0hD7~R1r5#ps>o$GGp#JmiPspGpRNsLmV z|FEUM%GJ;KIXAHrtSUO7wg_G9n&?w6l*F2Lrm*AKaDTZz={zHhO6@B+mxODw*= zv<&^IxVDtHnoZ~6^9X{0MF#2sES-8UoM71V(EObnb(866IxSXX zK6svn=T;m1Aiht1?M&JSkbUTQKW?*CN857^9aPGxBk1UOCvKzsY+X-NLDw(7b0nkc zZb}_!$q2ugIWo4s^}Arnk4d54*wPD0hK}Ge; zC&~<>0ye0Ik&9><<0zeMEdQp|0m_5I3)c>{ic5U8$Apn{>G)c4{j+6} z!Kk{~@;^Dbb^D0x(@m>C;R1zB=-%^s zoxCLWUO@W=eFGGLrY86vd!F-V*=$RmYU9p$t`1xN4=NzP4~uanu6)V9U#PgLz0jMTT8 z)L{@eWr}p^24u5&hwDz;{V3V=-e>vtm(h>G{leV6&m-r^Cr3RWT|nlOEz4eJmEk!C zh106v-avBu^mh+Qzm3)!&hKR}m4Y=q78WS-tZ??&%ntoj?C`!9r!_K{nB(n!9|pN( z@o=~9W50aMyMs!e&6_qUumqtC9z$$w9MIOp_SOg9Bw*!}x!o=1$KvAX8@)6Z-^BKI z``79?HXd2#xnYGKxa7OclE>SMD--T2j)p9V?nfCkmemj2;`yMk2q z__vrkaK5PLoj}VNsZUetKr4UFu1;5Zn(m2nlDlEX@93LS2T`pEe~nt-4ddy2>Eo;9 zbcAu##+HAG56VmA{|B=!9aCdU(J;pRLwg140R4bUM@ReTPvR*$Fml8GB#H6|dD!Hy zT*2LwDxJ{cZFgIP`}5=d{21kLN*!qJkE>|*jaaats^6@EH_53`kV1DE0&i350R519 zk5Y5^g!yaD>goL-N|-*#e^cr(2_hD{d?Hc%$?ccH?h@1o@NDHbaDUo5!|m4p$J$R* z>HzIPlnp+?(Vo*UxBG3z@@JDDiUaa1^8bVOD+GiwntoUthCMZ)k0@A}VS5hRY-0U+ z>BnSJ2?PZf`_8mgaY zJ7w^JT%41pR+184jHL_LWjF=g!8MxEcIg3k&|LG0st>Q{AbYvYg1{P#yKn4Kdz2T2 zEj_cxjBNE7yG{09(s@=GUi)-zq06;vi2F0AY`R_n?wP-}-ZcIL4)BXkd3pCMuKhW3 z>!77Cap_pix8cF=xYy79cU$>i#8po-W?X%2fVS!;X3QWSV7)Pm6dp}~_J_>M{Ii>T z!9+rnqg=I5oL9v1+phv-r%GJJ33L>}CbT-FcOpU-4 z!>kN|DOc&(yK>^SLyXLl{6KOG9d9GX!(*CCZ;WCI_XSU5YN`G0xq+k#1Qfhg63+^; zbR3Mf)j)6OrPU>pDLREx{c&C8*5 z!j{b~HmzLWeH-P^sx#tYHWet+ujRxic2%xD(j{v#Bc84OKshtj-@nB-rB;w0kN8Rfc<4j@O+UT`?`5qSS?`Y@XWlmPsq`1wS3tF;Grjq+&mX~EM!U%We9#g&u= zfdww@92++Im$Z$X)MnsC#`gCc(@wv+_aR47Z8k~ZN47Q5RZ@HKiop1cZF}_alo_Ya zc5)_hpt5w5!u1GzNwLlQGcx+fJTiS)Ktu{&@Okkk<3*)-acN|)5s?8{MxnKG@{=mu zur%-5n)E32^x_WJliXZ%tHUORy|W&o?XvBZzn{!Q3UA^%mVbDHxb`;dOxx6s^_ijZ6qSZ@#Sa%WwWDj#yDTV$Tb_(X@kRie3a7#o3XXvpgP2FY?K-nSJ9A zG1dQUhdijNrfZj$go5j`W6i%qXEeP@a`=Q%+QgSWy#mQ)bW=4(Y-*dDYVcFh2rp?L z!pUo2xHgcKrsH+QrVbv1sRloV50iSB&a>|v7r1(@gTw?Zs2;Yi;)mo7-=DLS*gF8( zWW%4_k?x^!_AKMaPcZ6$SrDK;g7{UhydvIyBl_<=9!PeeWsEbZ`QOpc?Qy>09T`jx zrxh$m;S6f7{O6Aw9(;1ai!29#z(us=?$! zJql8Zwyoq{{@T9F=R2V7&Vq2xlyu)swt8%LpFs=?&Znu($6{r7}v{B=!v>KuqEbUb%y zBOXjVP$gRWN_wMt#2|CeqrFpnB}Tyz9b5SoX)9%&+UDQlS)9UtA+a5kJV_ZyQ0O8A zURZbm6FUK32wgr=yFtqD{_`sm2YW;NW0PO!eueMb(dLY%k_|5u14l59qG90$W)o=b zU;0efT4J{BN$q5dK#B4L`C;J;W;1B@Cp_}%Rl><{3w$rPc!xeUUb{5+i?IaNg<@vQ zzv9i;2Fkj&E#CVfO2?0}`~+0x;@P&M;?Vy{xsv-8>09NeI9xQJ;BJ7ej9pKu+aJfP z+UKV#Sz6-vMjQQ!tJBbw);+$Q9-x8qm)M;P)viF#Ukn@|b1w`{ z3g+#4G9(F^c6*xD`(PyM@mKbHyJ%ayMBQ|}$siu;HpQ%_Rn8;SOXuzqyS99!(^J}Q zNYPCca?wLWJva(KB&GdM%)_|X6@{E3Njdo9=^@eEhkIiCJAEwUum36c0=8^rhi3Y4 z9``xXv&5@xH*Ws)1N1ejE#kd44~W4NzTY~t+n0PxH`XSWjXD+5gXsrwDZFs2_P6Jq zSITbjB2Uoq9nBl@pu<5IfNd4^1wP?4h@4|s?@5}{@x`Ya@oar^%z~wleJ^{GE9iKW zlSmXl`^wOu1*d&`uJmCXJ~!nwkRK|3X`PxX;-vn1uXmLZ5;_ISnN9wV3+Mglf2rm7 z%E^??YZX#Hi*fkcl+!@DP;OY##P>w1Q+4N?TO!Flv=g` zAO0_Ox_I-JDBt4g8@BvQxpIHrEA(L86abtCY$Hv6Vfh~~$vGtdIf8VD1}6}phxiy) z;6lJDrbERDT|SXy{r=L!7ZIcb9WQgZvHpRJ0XL!9WL)c2!qebQ)XYB;Me6>7KZqb1 zw*2cnMXp|DMlI(gK6$B%L=>q_PoHnxm_Be(;3lAlDEZVLys0LRD(Z)an_ZINgb*E@ z{K&fV`re7>C00h7n}1QbBt}jETnJWnWFTUp%O|Eg1)f5qf+RQ*$PZi$R(7Bth@#;W zb0-v(`FwYl*c1bJw){UH9;nhGL-K@w5R(nh-KFi&Wxv(GT-jqDll*4-hrY5i^XPu= z>a6vsZ`Zl=OP8L;8~cs$8g*ul$k)>XjKhPLADi>(J|2EWV-@a-h~X18y+VTG`2YW>I{$3C2!@@*pw0f>wVbzc+l<-Yd};+$ ziGtfNeeso-(pPt!U+}w)EtCo~PkCqN;~;i>E6;ojNtqGT~&457bassq-G=>>)8- z0{I7O1r!0EQ?5=%wJ$N!TcJ(Q8O-~In^G(2hZNrF?zjwMcDrSfb+SYF z?}jCh)938jD^BMB(loZ5nsT{QI|i0*|Cjxb`Gk9*WI$19<)?Hg>ow6kd}rHRC(bdJ zA5dbTNHjgl?MtNa-cJpQElS)dQGQVVZ0$d!{+yeJv+TdjztUVtPt?Btss7Pn)IYzW zHuPDdLu+zXx}Vs9h6L~0ZIUws{g|i`efzmT&dF39*L|8d-nmNobL-i`*zJyq`mAHg zDB{Mc;66k6xOi^t@>P08xc-IW(Bxqnn3HwJVk!M<=>K{ zwXq?&BHp!+%*Fy-9|Q5MVlNt6(WP)T8703 z?<1VE+$(Kl3YuOJ-!Lja4`q_O3|b$pL~cGR8Ml=E5vN+}N?M&g=H5Ov=*P^n$Zgo$ z89f(AVw@7mtM$77hftY+_SG@0JkT_@+R_PMdEQo=@_R8+Smq8X1pFV&%YBRI5i{y60{?}BDIsZU?VU_~gBPD)L zo5T;q&WH0JUg{J~>eCt)$4|U+3seRu0j->bmlefr$&9-wA53b}@j7BWjH+QM0VYXQ z&U|9Vg36E1X?E~Bw*O*_sjO3%M^lwTY+Fpl?We!)-}kR+QtK(f+9Tp5fua4!i_UB&pi^fI)ZR1k@3dfPFEvBt#6@ZUs zjLS6I;emTzR&*LBmxbkn`#~=%e~z{T!u{7B?&g6Gqn~GO$kBHV5nh)m!x?f5N#`h= zKI-a;d-WZpBopU=6%${5df5FIe%7Jqja@}4$jwW-JlQ%9Eoyt?FV(74gm9l@3JQL> z_QSrZsk6%d5bE&H7MMT<0u`W|OxU@HKX_|2LSIxwuu}&x8MLmWwx!D}Ufin*2|D+E z7&k3|Mg!_V>$*;ALB&LyGan6=`?*MLS^*Ue;ek3pe3bn51?qLY;i^5)2in*(&O)$t zoqg?Al(bXW)$*=axzl{Y?bD2-P`31;i$i`zezEC>%{E}VY5C6?F#a*`$5^K=8rG8c zL_vH|UqAt%9;kF=bl?5taZ`fRlmk~WZu?Hu=M<7FoExy2eNy!PJ5YqSv2*x^RYP=(~Fd zk*3hl1p?HFsRvLFFx|9ZT*2ijvmUnEKQ%Cj+)I1GNfP6M@&E;(1yk#>ybYRjAbaU{ zYSYa&+6#`O7|)jfzt)Z*)#e0A>`(u@SXpPGWU9H29m1K9B?Za{Lyo4@OYuTQjql!A zM*YDizfR%lP8)g#FfP!s;W-6mdn|S{pO_7l17<;>KT_o*Gm`g|2&_w2NT5!vhITG0 zHw6Uc*K`(S8oUs?e4=pTww@_{PqbM7a07nKm*2$v=^R`BZLYr<((lMN#`X^jQ?S4V zdYlrUkFI?rRGt()KQrUJg#Lm4$d-OsaowYI<715agAFe#&K6N8`T~=s%=Gcw6m-y# zqnsJGDi}Xjx-)Bb{Uto@PTH-#j-mMbU9GioUA&QZ+l}Xw4bpJ$5gP^;D;A&|15;HU zPh{i1UY2_`_M~Ea&*Ith4&TNn4zF0Q)DPn;h13ewyCTe4v8C0UQb%MvFDJFjI6m%o z({f-y#y&Jo%K3VQ`Zi=&*5}h?gMH}q@J(H!igzLVH<$d}&y}DD@}(B{w%)>bo=I8V ze(?ay%<|5amCHrmwxe{{*xf`NH;3fUX5Pr{?aW&J>1l{F_xq$kiz|5HTrV?oyBxH( zVYsh!!W~pwVu43*;{PGks%2OB4i~khzkm5e)rsnbrMH^DADO0s)vS)aC2oIf^}Z~^ zk6cRQY#gBH=*t4>3=bm^lC5jbAigZOqw;SB0HM3N5fAbLMWTr* z-1|}7yUngoC;TM7>V@do(uZ(#@u?%D8*c<=!&e^iY3Lute4qwUGN3k)9_8vx9GXXH z`DK5;tP~+}uOEa1N(iQ7GQ1GF)B;=ewhh)+S6gfv)m+ky`}(x}K#QNZL-Q4H>oU%Y z!H@(f60~PZeEev~d*X`6d4;spFvh(PY~}y`m2B_3_pdUVRyO=&!@hHt{)k{SZ9uKS zL`&=^A1`BE9uUHKAP-P7pf=DSsr*mxdAJ8!mbFaw?8;D5iPo?=Tyg&eN(dB*h8G@l zJ8S$SjWy#!$+2|2o*2)T|53i17hV1kLMqbnnkSI3c~L)Xc(f#LlyTZ6#`*_J3=|3U z5EZqi;4D8S&-qwuwGPM6 zFdTz||LOdT%8$#qzy+%a(C%sZ*ZE+POUx$Dy*AW4 zDRC+W)C-&ZejnbH2edM2@z7qa@7Z~7B$shk^f$9&^ecS%J0R8FA-9k#UT^Ku_bG@r7{QS}1qw(ob{X!gCkbwmVYMo7Rh!T8+s^FQT^;46i6cGNo#J6XrxC&Xf7ePd~OtAErGu>b)QV zU1*(fq~vEl?v@kIJJ9PK<`jF5)aYoAw^~Qna}_MHrqsHpZN{5pnM;vRv&~EJMMv55 zS|@W6*UsqRkZONaHD%fR0k*`Zr5O_ZEPgK(fh zp^Jb^bpdYagu3g22e5JQlab>23GSbl|Ns-SL#J7(DCRD5=0~TW9zDS_Sd`}segrhDhe+8aGKhCeW`Th1D9=6m~^Qazxo zqWGy}o7+B;Kj)fA#zTSvS7t|}{7tDIt^XNb9D9eDVH|V5U}vJlsVnCck+{Egg^6BJi@wf8sU+Ket)BKZzyX6<@y2kC8Yf0{S256`*=F{Z;!Tpq_A8^SbBW*a(TCD&X1VAG9v= z*w~%^5DOkYc$MK$MR)LIKJXkBQ^7ZGaCJPJMYGq z3~Uzt(R9i3Kvb>YuaoB8BCPPWTj8<`CD>3!!N`z&fcpe3f4fdK45?_ZQ17i3fvjea zD9L_Yf&=b0REF7jW1{M55;5lrjvJ~Na?v^kw{cvf8LN5&?;7%*@Ckj4g12gP_%Xx? zz4=Q?)4abwDo7hPQRPcCj{A|deph8CPRPCDGS4F&CuhEyqjl#VUfZg>`6R`ANUvSa zn?l`}|Cd_9vB;DjfF=$m0-7ApY5~Rr)Cxv(qTqZYcwtwr2I)$M(eWHH9!wdi1aPU~ZeyS46Y@`o ztyaI}LWa=sxcPf4x`RIG({wx|BzA3e8It#|FPnl(UYmbC#19_TKD|-Fq1;-Ho9cmT8Rzkm6}rJ9ffZ${jZI6w>9A6xoyW-)svUrB25 zsSWreOE7w#3VSLun2uZ)%=e%dlNp$4-Ap56V%+Mv{J)JKMC_-}mmhap@ zCIV02p>bzVcVB$ZIdEIm@`rfIYW?2d!tNlebc1<`g*R{}H&Xk?hkVRQH%&j0a~}1) z{#nadCJ(hfitIBx+(*hQ9@d++uKGh3DElzn!`DZA4WHu`hTbhc+&|e$b1&Dr%#mU$g~*7t*7t6nHh{Lf3Sn*2?<9k`F$_8E{$3d(@cX6{#!&=p>FsnnCNSR;o2!WSPH3!|u z%R0B|yu`<~&BY7D;T*pp@bF~n7+44WMYUcHOGXh$?_hheGz1Ob$o)TX`HaC4yxEGA7 z9TrxBT8xE=g)X1Cxk=if)1)Bs6Esx8^DyycXD}oIib8|i<8KpC*U^jCWm0D|ex&2w z#CV`qK#`z53sQN$rZo;YalEHRn;`N79q(j^ARV^ye>PD*(usSSgasH8Zf>*jZbmm8 zy6+4k3+U)BQdoHN(dC`*Vz?5aj>GW(*;xJSvR))3)825n9^?^ z-jK2X{btspkN6%&zpLdZ_pX_4gr<3vRjx4g#=W+$3;5AG0h3ntcfbBf#1%m!I`2Q8 zigmF`yR^@~*eY~Ofa}5>G%|GU^Kw~xywmc+Sd|QOoG{k6#B^&Gy5{ozr>oXQtTQU& zG~cQM+mv0A8EtKeyvOSrz8`fOWoj&SF`8z9cCR5f+&UkQSMC^eyL!}Fq_i=qW5)F- z=%cA|#g%h~NS@d-WJPcpLLZcSE1tOVhnU8eE$9II6*Vb7QNI18Wmci2X@sT*ED=;J z_$j=QHOkerZ6j?QK(3+nd5)TRTP5^3Fpc0B;E%{w@qWn6dHLd0DCtg%nA6b$iR9yO zAX-rA;8O3y__Yss4I|$qPt5Zp=K(o9T?J^%G55d>qCzep#1M5`l#WIw1d0nl(?v+>&V zwSUe`4kwlAchrr8*ADlI5MX26I(hUGYcPj!fm_{&hP@mL$ z;kN8_gNjH`YVX%u00>nIFe!(wm#IxmJ+sH&#eod6bS(XqT8{Y9vJE^1GaK`e7 z*&>)dfOsi4Cn+tT$ob~6!{n6Z|9IAr*|fuK1&sP9(K0|aRN~CeeH{;fj%1uQ1bYFq z2NRm$ySytIkTA&bf1H2jSr4}QhwRYR7oFPuh8ocqbY>0=UAH9A5QPmG{g+vfc}O|I zzi-?bEBs>plFF$D5%}H;*?kWla#4lh`p)?#H}K#G3YI&x58@SAVe%xy^C*hkdD?B< z75pu4U-=FLM|98ZwN*-2TU=-G$*{+qMEtGm&kO5!o1kZwy|vDy_@TBz{nd(QK14Eq zS*kut@J0m#?pzKH3P3AVR=>_J@QHe#r5>^qUC1BwzL9gh*{XTW_O9>R zce_r8(($!N5Xi~aH8pg6KUq$_NU8!r;Br?Z@Pa7@YDFu5HNi2(W1KQ_^=0E){IROj zLiKv@*;^P*1DpJ+D@V7xIV_HGTi@@f6))!=uYUWbpSP}rl7}F}0vB&=EdSq8tBua@ zh?J)%o;|qiMXJ#8oYjqZHu=AG+(cHcbS0I4!LLH#Wy3o!QSqKp9>Z9EO{vuwh*;?I z3FZ0ruD-)sex#zi;jDeHI;2Z{aRbS)$v-*REq2_QKt^iWlv>fKQP_Z4FNqcV>y*|` zh$Oqy`=5!p|23sn(4Hyzgw%3~;*2)4zlD$+>HR0i7=ipvsZ~?co_=J$%e6&rEuOWi znKI+ffF}o9Y})xL1}0K#xDwO?>K_Hos38vPnT5T%6;DP0K;WY72)s?H73g!p z`|;Fed#c~cBLZ&ww0N&fSZTD2TjgbmW2zy2w*2c=Y!7&I)`GGAVU`wVii;6VQHpDSs`Fn(?;N_4!43Y}=ako?UL@Hy=zl za2}UpLd8bqQ9&YBEH1KD-fxfk7(Y1Qr@tpUvbRijylo+}-?8XSXN`1pt)`Ps-Tf!1 zN0-?T60jTEG9l32w9pA1SzXrCyu)pDL1lrl{rIc+WVM`jcy1<^X+67ap6h*Vc-uLl zh=v&*w!f*VFdq8865a^?4OKOb#A z)dT89;e`|A?X+#X`%MlaXVG48YV46Hehvpz15iEadXWC4!0wycKfX-DC{Of!cB86< zXn`WZXddwDVe_Qc47Hco7TnyfI=OUXoAIir$zNv~rXx}O?0XM5dP|n-FQ3AwYQQuC z)q{8`*Q>NJjHlG``bD|Dc8qv7RRi%seno!q1pmLqH>G-zA1WQ;mDZidndtRA8$=F; ziV_HLR9yc}sUAJp_Q9Wt0q1QScv1n>#Tp{Xg}-=TWBs$qpFHvYS!s74vK<|-W{4m* zHhjrI6aMRU@r>KTn^HZR{Dz7{IOt>5#)@LqV6qJ~bV0(>R?U`Qe=<5W`A87?hEBB% zEt@U>m}8rBwFbm69_|5D4=4*wOndIWXTyBMGkLn!m$|oVAVlZ%XxO?MJw+cze73dET3@ldI`?yrvP)R(}(?3P)C7bSCH0 z@thS1ylnV-E4fZe16>%)j}0$Ohnvc{K-DZfr$zT-(l5{2a5}VMz>bwDX~w)cUB4fo z&YbAjN@i>nz8vUhdi`4<4sUI9>%=2BoGs<{`1<&KY*H3U3l7f}}A6><^?Yr-u_O=XQI`5)8&&C0?18t{82F;9@B8p3(?ywU&8S9 zj}<+)`jc}Zpx|x2_%vR&t`c@SQGF=sL-wcXFixfwf<)Nxi=Xg|yGMtS!$G$RfNDmK zcu-AXsudw(p-X*15_SKOQLk81mj0YtaD(XB($8OK8B_2xoRp#CIS7H54KE6p&bXJN zsj2P`5er@Fp1`m=#rB^55+8el{7p?YtsZqa_8g>hyUX!oKinj~pnzo9RZ)2WZef2FW<5Cj8?4JMD) zOl^*jjwM!}-#fL}LwAX7-cbHvs=?$!yp%hALkByeYu%X8({}bPEL`f1$#(xBu$}C+581h+Qa|T1WBXyl8?}A6LF29DIdo9WznK-GFL*s(-}7ign?3mP(}%m-=pV-UgX8B^ zDeS_%-}r1?Z+rwh>L)5yblHi>k12iMnYm%hMbkGqI>w=@gK>9SE#;xOxc(#LZoI&C zYNHmMbWX>bC2=va&KSoWcsVKV@I5?v*=`N)kO1sw$qySgq6&}RdULzRwK#-_Bw7}q zkHV;+=YR*K1$GPWG5GweJTxs~_VVO{Ae>j!jOXMv=0RpunZ=rD{s8X!0vzw`aullIQ_OkPCyY6!Vy@6HjO@6pDPPhXl18N8DnZgSfyv%z9^{}1n!nk0_mVc0krdOy(Vrtfu zS~aB>Dh6JQ%)T=(SpD1d+3?(BtHVrO*E3R^rqqhwe*_mzoT%%alFgAkq7}-cDYcpc z1txU)gnmrm)G0cK66FtgHu=MD%^k3OycXkv9~&OTf%HUvasB;U{BNiY{i2C>mYV`> z`x@a2g=0MDz(aVp=jw<9^A@AtS6w^xlG=&6qs`7mcx$!4LV_qopeeX(fm zk|SyaO2v7Dim#ae!07muxOa*jys^|@wNV=DqVaNR(=hFhVR%^@&Nc{mgrm?NDTgUj z&^EWL+VA>D;9>cOha&s4$e5%At`20JWrAoy1;D67BnO{J&s=}??XVEi4iXZ)FlqkBq}9rW zmz-|gRG?E zxw{(6k5v_1JeGV}+l37J1;4E^eKx#d8)x|??SjeabUa6;*?1wWwl5FrTNI60l>wCj zlL+-mh2y^)SH<&g)pN@%3m=IEJyARyPM5#`0V)B{DOb&L*aKdgUd~bbzCIF1f`L4+ zof{}g)Al#+=2Om&w9_r7g4}tRM|60%y~U>Bwsn1==27NzI$)LvC<(3nbV}<#@T?Z@ zsTk18iIIx2)qicss1ILm{fj%J+Qg}k3TBoc^bJrc=m%8&*=v^<^7t9nJ=;**(jkAM z`km0&{+m)6S~tbUo?m&2gOcr(ZS7n9#onXrshLN;Pqw)H3a<>3yF1Z}kqQEp0ZKxn z^4ynQoCvkoTQ|;f+tT9p1N{NoyT~u@e{A(HLrhp`^$9UjvENV$`b86*^^@C&nrfqt zkCYwWcHM`zEL(Xo=FT39Dhylrq(BeP?s~qUr-cSOo|n7tSwJMV{JC+Dk%ucfd;C@2 z=GV69bLJ3Dt<_P8ufg%>@*X4A1sY%UEn_jUsJ>m$gL_!#WZf7YzgVp4YqRCa`&*bZ z_qO*rS6#IJVX#Bqy%Ky?sj&C_zJZCynX&d@=Qa#~}A;5%BL{>Ku%UGoRXy@h8KR9_}Baera^tH+BhG&&Zs(pa)Jr|RmJlO?oQK93Z+4edvSm|00p3_{@TKq zl|;yE**0%4__la2&M+H`^}~l;k)RGx6l~?^+2`)*4&|PV+pd9f0Cl0&pUlqeuRQK| zxoN%L?O{~CZ0UoX&`(AF=Bxhy3ZCDZk^23{bo3N|Y-{B($!(I~^?3*n9(sMudo^Ug zf8Ei()obz4@oQ=n$$fZV-yO4`4dvoR``UarJ`s*K^}e+-!zCJJT?;>dEvt;Pd^cHy%hOr z^=-XqZaFfaU{Ta|OW7Zy`v2M90#La$4JWhV*(c&@LuLMrfz4O_G}b3PLgql{h@Q5W ze8`!!h&ff_wSDO7U@~Z3M|irb=46X^a$CH~VhAW`%b0lroYqB@PG){122D4*UN_yF zG@#=(t(&bn$DMEW(e9%csYk~nYXn`2t?N!VU*D^rlLsT~YdUIdsyZ*VHk-e^pEtQ1 z5)`;-eY557=Nxq~XR)WmITH{aoBZg3>d56QPB5yjrm7!IFcrUH!x=}yJw2ma!+|hT z7ZMb>TulVtrmBA&yb!v4!oQvG*ik3FBo2aydS;Wqq|pBQ@~@69R{bJz>>1Zaf1~Jhz4L{FsjdlM5$s>>e5A6*` zEwpZ`X2bhN46C}MdCfXeLVm!*Xdl-0Y2Czr|JiI}$QEnu2bxI|)CKS`YJ|}rJg3~Y z(l$Z7%oppH))mLLcyGfBRX?|H$5I*lA6xkiNGv(LXygS(>I0))82teS5aolKwK{ff z#3J7~iM=7sZNGQDz57<`Mzxsw>@)JST+=;{QGQr3gtdLpN0j_whV40M?Bmt8n_k8; zp3V$=B4D;|0<8k6(E~AGe^NoOt1+a~FVCBQy0g*Y@s4Fz!drY!${j<;Ibll^B)&KS zaoFl#6fU1}Z`E(84}EXJ%AK_au?o}Bv~C=37&gRdyH{RmYpQ{&UOjbm8gGnCCfh&c z%-V~G|LA?=lKVp}wa$Hb`|?1vYU2X^8)L#K>hd|eTDJs;dixpI9u2{z@`bX|*~zG| z!^T;A&qg3~71`97$rb2f(!gsbmeJTv`}wQ>o{4y5rGFtxNI@Rm_uR|48-zTRG6PpF zyMaoEeqLm=DjF+9s`?s=8vjZTrGUp_D%qKl#Z`G zj|9;O{@A)EcXS1w*T;yFnt&<;N-8Ic_U~UlVPP@2&feCEaoZhR`g-Ky5uN0$TWp%P zpG?(z_;WL({A}q9Ga@~Saa0D>3QRPu{&YefekQuC)tTM3Pn5*o3~0A78V8d`t0p(g z?3cuvNXxr9Zaxwd>VSvY0H8?loRV931l-BWlk%fWbtTqjp_rR zy_51rb?eWFhanMABrx?N`S^rVz==-NTQPr3$R|3@iDz&3_@HcmD7+5=Wg^F|Iw#Uy8Adk~@1n0Zh6hL|~%BMAXyw~Od;n!~D zGaIb{ax)!oF2=)SP+{N-`pdJn)r8-|!OMNOdyvWWMZYSGW{;B3Io(PxF>oi((DBOV zNR&TzQxsvUes9m$z-F%v(`Gr8oZ5I0emO^EE`>&tUbKSYpd-yre|O-Ui7PU^T6`L# z)S8jgcP@`133~dq`y0~-3Ii$~>VqmD;o5GGiB2|~jXfDzI+!9bDYRl19=Vo2^ZA^_ zJWq*HI*7xjO1bEj8NQ)pp$l z#vR~n^7mgb)^Xa4vy5wtZ1^9iG^}>sJ}W_GK+dKnj8=d4v*xu$^1;y$mwmp>xHbzE z0!H~jt*H2gAL|v_+8r;ev6uKRALDQX-KPKWrPj*@M!b;)}48XJHzuV(u z8*}q5T1>@srQOtyDsUyJ5R@NV`l4_qjI%_|!?(y3(?9mB-9F~Pa<^^x$peKS@%j7k z^Hj8k%^=eRyesL8`jEXlaP_-+TRC?l zQA}|5IhCoA*za;cyLIp4jFrQbeAfhsj4yKyB@=W*MV^X75M`(@n)A zqzb#U{1x^n1(2s{eV8N7JZct141b!_s-=>)q(MWko<8aTQw=JgR?G^?E1nTikHT$7kCdEHhV~C8{y&(?9Cha$!a{Xh zvTV4ugsBERPz*4+G^%8HTq%?|RTF+kBZJ%GQ6rQyq$lz>f7Es6mYHdf%8qH zCe%S5F!BF^DhSVoKr3~eR{qrbALswymS2l5ou`}XhvC z>^5a4KJ=!)e}wlY{IJ)1{m_~Fac$hvc?~EOT|JWGf3`dkt?2T>RAy8RLPtjz*6uAu zhFd3fTQ6OT*6U7`iM*ADxaiTj;JIFiAG`Fz{PGK!Z#rbtC5;56c(?Y`tlSW+Ib>S* z3nSAJ-+6a$YwLJ~_DplsIdcc;-N;ud8(4&T@0~DU|CwCs$e)S}#r(Udw%qX5;bHlx zV@%hF+A?K-h^cPb$78S-BI;s%Vus;M-_owlH`UOED7^6FG5gU|60X+yF>YG|(+s2P zU$z%frgn2)v{#=4-efKW6bNR;ymk(2I3T}tR{yoW-@M2)I=+V(&!(!nl>U`CQCD*9 zT@*k2sQQt;bKS8Y*GUZk2wc1tfw!rthH{|Z3$MSqS5zKO+@(&d1Ax#S*X&t=;7uoG z`-K@ioR#1yDRNc}=Nak?jJXR59_okI%-nMN zRS{kGR_2cIy)I#DAUZbrAsnq=QIDE$>L;hYNSb?s`5<5zRm0jjjj9L_lD2d<#oK2@ zlZ_b`3Z&N9{(xeEsRVf`SEjmc9N`<)zkl}5D2XF%AU|NL!Q|4Ynp?)#H$>|;Ruc;f zL&;y;71@|RTlsCbQ+=1+!2Df18(s#N^URLhvkPKpRLQ&MbAUu#VoezgX4!6 z70GQsfLymMy|Zdi0G@kgxa;6l>O1p5>2-%fz3`&VIqM6C6eC;D`OBVDM-J7le`w-P zeKh9yMfH`{j4b@@rDIsP#oibVzi?QEM^OBDnZz64zXerx~#c19RwIHQ& z(J0I$x?)su0hVcFx_f2jBg`#*8re4I{vWbH@ZX^_t&^1;)f<{mWoW9Xc1pn)LZ$Ef zU2;PMNLAVk&UEppA5=e>aGEL=ZUa19)GuM_UO#di9Umv&MhK6=1U8*@u;L7h)vxg( z4?seKzTwvVZGd>gLFLFIA5xi)M`w|!i?LIgeF>H`EM2_GF#r&{iyHBu`e4=pTq>Aw z8)fAwgRZ0NL&yX=9*HlhW>Y1(W46>?O7bMP(D7KaF?}|C`z6a~?Td(Fq+(5}Ow)4= zPRJYNd`iDT%FxrNjyM9<)|AQsB@hMY6Gx>USXga{kyu~=Je&NA`ICoMB}7PU!vb;G z@S<>-QI$5OGEkosPH4(Dj9QbDwc^qLc+~62C_fvOxnSYjeRoZy#GN!y{=cCzVINx+ z5TmSZ4=hxPBoP$2&=pgmrc{O&3@K(lAU4E#&)KaRCb7T<>9f_}yeXS?w%iVoxULS; zVZ&SdNNs#(m&CY0)RfAAKBD56**dO{Q0rY+A9W#KVjF97)8}gTt^CmSM2ly=QdHf( zEH?;a9Llg!86}&&X|WIf*GClTFt*=jQkhlJ9-nmg>tf3c^z9sR2zy`T$c+ur#{q$y zRbQ@{<4F;wZd!Uac-Bl+J(sRwDE7qElBG9Yka>pJ)Qyf2=xl@b;L>VG{8cr2ca}m7 zKD#vUNMDW{mfO6)=cc#EFz%q~{ASt<+}mnF=JA!Uv5A~<{;@${u%W}8`yKLg@Kh`R zEAs~jq54lI?h5_FQK!L+US=q{Blq;ZYKrfJ(N~4R(~kt-#xftQOC3LCAx?Cwx9u%b zaQ&Dir_?$WBmTCU3AQ*NbLL-5joey-xjju6FF*i804hUM1-LrDnrEJ+8$Wu7H3=$1B-hkN zQwZT9U2v&zVKYNM^Imq|w&1hfQHfD`b5&vN({@G{t&Lm!9j##-5A7Xu<}sQwFnu%% zBg!K68~3D`SF5sXck&(^#svU2RUCFNDQIoh4n|eXh8GFLO@ZUmo)zuEs{Q#v zLleY|&Y`~(VBI#U=)ZBogB^jSCIE!4dLzCmm7yhLcv-%ISH5ma2YYK@Qjw15E^NfJ z$qz85@@K=p{TTjW$|P^{SBfuAANu>R{Y#jt+PZzuc_9P84xakchg<=I2wc1xfww7@ zp;Z%C@Bd-%O~7jE-v04p&YU@O<`6QKA@AmgyZ{N_ z9PojH2o)US5!^7@JHzWSm8%%)A49w z1D;L(NmJTny~jSJ79G!7h~Sb9FOC<-NJYSl4sTRIx>RVelqZj#c4*%0jve#1S+@KO z2kpE&b`bMkB{sZRp3{u-|A9);-)L|(>6Q36TMG@W?zJ=I$!;{fdweh5E=f32PpkbJ z+h}akr?yY>>^RJ`otJvg_A-htyZcGCS0*-odF9Pa>a@ZE-};_uv!)1_@p@ieIG2mk z(ONSOX@zQr-PM_DbqtO9;@ZE1b|5YuylQCGnyW}5^k?R^yZLxqhqyGe}7!-Y;{PQt(NG)Ebbto5eeG2i|n|at_WWV)ywV&aWgS%>}Scc+) zgBdN{p}&K7i*6c!DrXTxQ<2M^5sTD0+ zrM&%P;+Ds8o4x+gq(1Edhtr6cq*kT0R(-{ft?L)Xqbxcr_iv()Xn7T-^{`$Wy6gttVee_xOC=pG%80dk zax)Z_g+F-qXhJqphlTg=y=X51ea_>Pd-;=LBTA?CZEo?!=@%H@Xm!M)zeMN9uDyAP-yq7WTSz1sgo2-oOyc#lFC}tVd4S z_cTW-uLR(MasYLQ`&4M80f( zYPQe5fUoq9?7PV&3NPD#>PX5R7j$*+Ku@2J0yMhCr@W34w{Y+kogvHmrXv)(>FRvP zT!eJx_Gs#dqrSUGw7DIWii6Ky8aP%z16Mk_wANKTkG2ljUuT+i1UXFaKhXYg3F@3W zOY7E6E;?eyKdf)P54oF~eEqa;6I$(BnBZC;jcnH&zAl=28=bGZ(=D%E0#aBuy=&I! z*Z-1Pe$zezP8$&hUwU(}>*t3huGiU3HK=^rh!z~x`M`fS^VOPn13Vaygn|6PRKpOY zLIHMr%D=L4>#PzvH>sfl@gP5%iH1kGWXp5@k}zKGzPnCRvn1&HY^s9O-fHcx_?G`M zQw63GOfJ+TRS52PlTgC&5NTGZ&SiYrk4=813a|a=pQ$!^0j0vQOulp35k@?icrcZ; zb`W@2bCA2@qNCThpO-q)2I?Ou2AEu0`8nKG3WUDJgz;W0&oFuk*vijYqkl%>*ipu9 ziLCg~b1!$TUdLE|ptxXiq4QB;`S975gi^?=$2>G3I3JCs^t`BPc!pN^Q4#Y_~Av0BbxNTj^ z{0(8GfR5*!Kp-Yt`8^j^^qCV9$v9Pk&$z)yz$Lw0x);4?I(7*s2SA4vW-Qntv;4P? z^q;;-CyeY*$7k)3S$=UjKQgx8AE+ArBjDc;k9=u&cQIZa(Ys=7+XUR*@>*7hIfs$p zr|o&}-FrCw>BnI{U2ov`j*E0E{qymJ(7ks@c*P*oDTYdWQ_i5U*43&4j8U0p(lwt> z$yl&bWBn`5G~9l{E9G(BZ{cyblv~)m$;BKrWKG-a=Md-1gi8-=e5pM$<2n_OxP!i_ z_doNXbTt~Iyn2hHUJzcUKIgrng$K?GO?$KZ=uy0^K40_Bi!?mBE9xKe@(~_(#%qR; zOD>jQo?pXzo`T;!Eb8sKZ4c^WSnav*kSU(-7yK z)#$3CuyIx?N%d%|Uh!659Z~!&Gt}r%5ZReN)UCM9+ZcYUvzrlb-l@*a#n|vO7!+a zM;hv1lIo3xiWP;_UYpCV*nb zT&!}4`?W8=bT@Tp{0KQ){rTVS6kfl;P3j}!kPjQ)sN<%U=LE)#^#|<@RF9^=Q(vwloKDS9iYc4LFzswG!Z{A8(Tij!j z{EE)iXDlBhm&bC?r;a*;_s)-Qx1rt|8_YX{kFRyXoSYwrv<`ToRp&mNf3?rWN`vZe zd|gn473*`xRh&!1gl+A|k~3lW&bcG{WvU@~%7zN9@7Kfe+eHcO2RepeG|P0*vsStI z_=q!FUwOBYT-CWZ6+7-D&Vl67xXnc@AKX{r^w^?*33X_imk3NT)#c=S7yl&Wl2eU? zxzb)D==z}1VaR|g6?a(yVdv|S+#)uFw4f!-!OU+A(Nvd8^|pF`z}h#fCY=r;*U<6S zj!2w7b-M6h{{iX%_o)zbw|*h&UwKY8ThEi4C4&5HL&vn{`rh7|p5!cA!W`~G1ea`h zgP6)s`5he@hd!8E@Df3N3ey)ZNa#&JL6X$V=RKjI!UqP8voyB+vv?+)&fUVvxwMMq z%sGXG$q0XJ`1GZtZ$JOUBkzl&i~sUAA-I&J4zzaApZ12@=4WDH(%L3W>fm0ee@W^< z^A?I;{x0b2J9bWmPm}k8JW?`@T{ysrahA_k{!TM3mB~{sjCknp;3a}`QS~F*CR(Un zwKgp-g0cPC@`rk+rAysvbm;$6yd-s?b+gL(FF44w*^iOC3KK|05JVIr1ecQ3frgJd z)RluOQjA8_FVT2f0zxknp01f#`Y=RlFBO!Bt^5whdue#h^km$N!-hB4 zOL)`P!&j>RfS8hABAWgZoz9##{Aot#@|Y$cX_(MqV$iFup-q-QU+|^t%8_x5{YOSG z(V+_(?zr0=$e5qr!~19PyN(anSawC)!z0nNLh-Ju^L%@V`MDS8NKawGq-z)_wY()=Nkd| z;-P7Qd&WMe-f(iY{%ZRWDF$>;zc%kRy1%ZjcuGwI-q?EAr;D+<`0bwWpJx=N;%+hi zwi;~|K8sP+jc1GAJQckuQ#{!m|T7p{cK)tFl_v7RBqot+E>I6@D2TS*?lZ8~J-kwOFA;61DDE8hoWJM2(Vi_=g2@MTs$tQ%_71nf#MAH& zTMjBL%M9)!`>`;RWxVZs287 zg^uUS?j=(6Y#tlG`m&UlNhBWihpqf4uZ%tFtIoXlLXzsy2DMPUXtq3>dzuUrH0V)3Qt*hGEizk zAIiy=ze25!P5<`pj8hFZ{Gb*S)Z2d=z(^Ha>)jYKy2FMHOnzsHM|+B*o@u;DkRUcWJYdITe0MyfYx!nyU=!q($&dO73uwJnfm zM68v1f&)JLrPA1F=V`2aBgv}!{u9_H$7z&L?n7)jqeqPYoGi@Q_hV|@BM)@nZQ(U% zojknytM27ngcIu7PqWp~wXR6hzt{EKRvsv6n9luz4Jr6+{H*Slt{AUrlUN_S^)62J z+|{l2-FOow3XefN24m0yu&Z~ zJBbvRjG3Y2OxgoZtxOAo4o4La5);yl?%&2dvJA$CFvN`?QHNwf#SEjzqo~HItjSCD z$9s`J^!tjkzah1;wr!uC?>xwTbUb$rf=jlc8oH+Ks=GQKV);d2 zUR2J#6i!a2=O28s0T1IHs2+?bD*dQp_vb{m<&g6MrUB$4$WRz^8{g29q>W(oNsiX%b8|t%%rADvbH}mvF*P2&42DA?2=TE*8UD* zlV20R7GC#xznw?UrWK5XWq*17y4nQmpTS;D9{-gg`n&Q{kI78+XzkAxq`V{aD&DPZ z^CpyW{76zgS~2BAy|41owAh_{PX2=h?Y|O)_7-2tuD?JYCcGro13NZDXI$Tv5ns%jj#_D{cWZCFUZA)7sqqB3Hgby8_O>v)vI={8~-_SInq5pJ^jkZ zJ;-cxSN~a#ev61q<37Eg3an2NAFu+j9XRiwj8n60c}n5)Jhn1 z75^-~YW?Z#dGz|Ge9H?xBCyKfMYmhq`CyA$vo$w!b8!EThG%Y`e}v5vU9xYuKEXLR zHz(PSJddkBKdkCaE&N&+P564V`BOaPxcRvFzB$;Scv15K@-MJmky7`AeP3byo8JTn z+ZN-zqNnrK+|#l3?L6}_%Wh&tv$Q9xMnC_TP`$tRMjmXlrge4E=K(dgcD_xLz7noosmKbDm(YAb7QDrkXdSV-c~eT-hV1%pm!EO(4^TbWW-sa0IC`Rm+oo>&8L2ics&6q8?FCV9`4r;DPKPsGIH%C zMm(GRP)_>r`#THpipL34xv|xVu70RA5u__L^)411aq{NXR4Dy%BKoU&7mE^oxAic^ zJ=^1>I@T6gzhaEbsk+VhSC6ufbB5dFx{|RPZO0tLrMkC!q{XG;SIv)n8T|7!GJkg4 zXjSi%sCvMOa~g65==Ap)^UH(taQW&|ID zL4uoI-#UgqLX~J_{^`PJh@1VO;!RxwnqS`2so-idGAc168O>^a=uy!Wb zoxHXp&Uy66?}{hlT&~MFeeuY@#AY|`lfsfZXG=)zffl=Dx7pB_sL%n|k4M?lxD)G? zL&%MEo9oHE>1r@70pUu{#8c5XuMG$xP3icHjd-{XV~wUWI1g<;5l->DXMX7(Lav2~ z!t@U`;@QSr8Tn{DzdV3^Od}JV;Kr3iHhhb>J-Qt{98aF2mkX^!a4BiCp&Zn6hkc33 z{H_Crc<*oJ$2dJ=%ily{T??z*8=I_iaPE)ONz+dKv+0GPzwqN|v|ppm1zQigpH|IA z$3K21Ry;eQu`w%>97#iSxLOUI1-K^IOj@+4w>=*7?KmncTz*jd#c2;X94!QwV6(yY z{vJ<27_9Pk-0vE~XtUVrzchq@XyE&%e@8M{xwCP`L0d+f1>+TLFO-W4Ej$P2@^98o zo#fdnlvJcWpj1ABOE&o-9#i|V;YWSiamcMtFys3QY@^bNv!%9ZQw>K-Ba>8lnaC4&nDvN$8CA|%ihibuPfqFdV*2&dkN2xPr}=lS3R#H2dABKt(OasQ2hV@f2Xi% zuMW{vf#^GdujE2By7!gc6wzspaQ|xCliB% zS$ySEt1!|5A`1H^_cRiBQR)4^pwh(8sgEb!8k1k5<1O{^8W>t^Lt#|ESqs+Jdy$(V zLt%&w5nQt2#qsVi4kbxb1nE*?mX-P&{%Eb>+fM7KS7+$uS8Vk3vgQ9;YklRXq_d zNr85uVu|+8?i;RNzun1)ET`i+TO06f<=0p`;BEr-N#UDxJl=@lk_~@wp!Nyv(LRie z!eGL{6hc3y(iiPtu2GtEDC2;8llT8|Y%F#gD~)6{MQrl#Eq&MKW3Lm8R0d2KtQ643 zpXfW8%VK#!-)kJC-m-^wg7F5^u-{*i6A+h_6@-^X^TM?0;Zr5b_ zn^h{_^ciui$NW1jIYLmm#4!xlpY|rtyiG*{-F8d6UMxVp0-xmX ztGR}rFE2=R-HlQ47CHM9Z?B`djtWlO5<`&wrF}m(B_*JxU+o=@6HZ|JIY-Oh?N3E` zT@Rm5@_bb26Bn5?_7URrOm}ik$^Vy7hrj1}1~x1?i|iKW+{ti^?9>5U zNbk52A>>J#P2mh>rVca<6+GcX4DnOBF5^%;U$U6aNh)m{Oa~+jDvusbNL?PxS@wN^ zmM__#{-C_^tz|aV^Ta%3sm*Xdax<-9oT%MMJfzq?&v8SO)GI0K5(5BXh)fY&N>Ya* z@IVv_h>o`n`z#COk++&<#He~CsRNiI zadrYiO_lSS?-WA{Kl|PAi`v)Fev;II*8bf4k#C6IrF)uNy2Oye?~OP7RQ4hGKU?{e zKC33_C`6D)0U!*K5rRuL{Fi|#?f3V&N)DpuFK^j^m!u9fFN#B zJ4HpVn!xfmbvCzlWISL+k~+XRrtnIyeIM~p2bL9jqctI+#9q7Q#6ZAASO z{n*-HuzQpLvD>Q{sRJ8coD1_N6`Al{OU9|kA51sh;cV2pcu2<)OR+$J} z#r__>4x`Z>;`iSocHr*!mTeo50MPhcaYhOHDi{1&Okr-yGM0!Nku9HjdFt~#v_->^}lS@GZ9Lw>c8ei z5#r<=Fws1D`CsC2ZrUuLHq`X571R-W-Byg%Ob8|&Xb(83(bU2afFVwcra122S7N0* z@urtYAZb8*z%goc_=0MN*%581>gNsSAZv9cl_~WBQXk_1RRPHY4nk^(M8$djBrK;p z{TfyhKn{k8!m(o~`&+WR2QEF_2YR^xE zZqck9_knRz2b6&PlBOE!gTjlxt2bi7h)EyM$1s_0yYFFEps=eEPpvv~5i#cQr^1JwiX0C*Y3L%~F$ zfSCO=q00*SIH?ogq5rYFi5d?Ka#udMQO9dv&sbsEe_wb|qs%j3W}{{KJ~=(BuYgvATn(w%tJ zvA3h1eO`>e-@LRae)e|k6+CKv%bCymS| zru0ZeMyiU-`$QDrA4(hA>{I2V>2KsZmkwWruQxlCRyHpmss1?hIjrI)HeALVFtFDn z9?~}sdprxsP&LH%b z-_N4Vc;r^;Sw3`eN$LRisSwRhsUa#2%{6;k`H=5u4>(4R4qnI)r~^2f#nA;sr1#{K zyaFEihK^Tk#Dl5>l?@>k-Duyu*Zg@gN1A=#>`f-q@kV9{F4=}oyzRoHZWqpz(*Yn1 zx$_ZRvf=gCYMvOLbDq%KdeNCV!UsmZ$vddhRC|QC^1O01$df4O#aX{6I zuN&9Op}&C%qQw(=sp6`KKh;_iMg~Af6(*o`rlI~}mJcSF7EO^m_73sTO};1hxewV5 z28s}WvC+!}rV_ktFb=3#28T!16Z#waWqkPROYWoz#!=bRkUv}bZQnmsxEtY1TGH`~ zdyr5J;g1b3io|`%=p_Kk0TcknBZZS6yW$6bx$@>Y37rMJRE`QfcmcqIVt zQ(@G$F??cxTl{j9XNgTd#@W5I(zMUEfla3VQ63*x%zt=W%F6_D{(-vm)orHR?1Zyf zz)8)g#^Yt14o=G;!tyOq8TW%VBvJhim|HSddD)f2%O7{+g8K3;Zox;f$%Is^lP+24#i%{kIk%GWM%{H2 zoNxKz+Zp}1lYO83OQ=lKJ}FF7#Uh7s`i~vw3wO)z1p$>T?rPM>FXpR<|oHk-brX)*m>TQ!Peq=Lg7pk!1m zhcz$q`1)o>>SY|t3q{9sNCcNafq+UwdkUXxtlH17ytby}=vX(Y10bNEu$BL4>48Z@ zJkK;)m2OxWgO?}RNzFn*exNcyNkA^@xv2l9#Mpcob}T^ZRSAg4CV#u2!d^qEv(}(~ z#rcjxkUtw<(SO#NgdUd|sQ^$gFv;M}6v|)eU%+o~?$A-qBAVO^0t++FYFw}bDg%^& zHcTC&!aftmav5fw>Ml$5e@G4}5Ku{4{}F5%QqSMAzY{sC_7bBPk*)ktPmrJZx{=DV z;jz(xis+Z7jP=Kc7w2cqNCklc0hOTDU**pG?+Bw^RRt~EN0X-X0vI*cKTt2ABsBe} zzec@^sGe+C?rj$&b$|jS$0mQDp!FUd<9Jg27wVM_FHq?|bSUM8qm_$-$|1nR+rDrB zhax->g#zMA=96H(cG2WQ5LkF$ewWPpJGU%)wRUzCsYS;lD)@L-#Bf0Hd^HM-E*C16t3DF`^K*HGJ3x!``ML3clp zeLG!jk%#?%-gMBZDZ|DeGBAdgDXa zZX%wt&ZPrwi_q#Cr^v)J=g~r+k@>^t+aV(zL96^DcmE}OL7H}qEUgEM-r{>~x_N=h zT-m7=ZD>1WM1Ccvx6Ld3INqNuppUJrvkgN7ZUeO%4Sik|3JCqrUG^%b`;pfmqVRrt z<18IaG2~05CZe;^lEarBPfqkBFVpeXjd-@9<*K!8P%mXaQk{+$oJ1n=s6TA@M@eH| z-xdTiQWKCDOv)%oSQH8fN4Y7^W6yXU2=L z{UIAx`9%?Vj0*%nt-wSBMHHtmAP!#IQXgIzN5(;h!VsG_)W0OPf^jH}_3FX{VwKPB zu`l^?QU|_3a+1`FHiHp;mv7jQnQL!Wg^)u4APl)=L;h^#*V#?9vE9ZaCHo&F$A*uy z&}^=HpKX?{cCbWkSpYfWH@qf- zOE&ojIcq1*9dLtO4gg_@mm;_XN(R&h?o%Q6V?iw6ckZF<&uXKks2RiqN(dAQ?o(l< zibEow=QHt5?3=TU^~WYZqzC#!d@VaQX2VM!I6>22l`k@Ch^Xe%+kX#@kXj%FdD!GX z?aIl%cG#bB8#Nog)hJ%c)vnA_qd!nP`p1Sc?jIR^WY7w9cIM8E4U@R2Ty1lYl6Je$ zqMG2zw|8wtaheI2AL}1QyXRb-kBfcqi!%e)9@I!i>6_I$4b-`Y-tTr@+ov!O>GD#~ z#+`~o6UVi#pXhQMolI>r`|YSZNGYoAkGGn)@a63Te?>i_J^>v&gJab0D$WX;w6JH$ zbELTQ!o?-!JiNPq-&x~p&){>t$w4vSlW?4+ZQ+1}xAAG?r*p5w<)hf$r+#)DcnXD9 zytmizj>8&5kJcL8h(JFBTCBLIeFs~bOr21t`~n9LK6kO*!9wgXb+p3vz7#c_+x=2* z4aQ{|ddF6L%fO5GA5Uwac>P~O^_u2Iqjhxw_v+i~k~h1ecOjZx}of zh17}T7M12ZoqPSSRh5!dPqO|$b!s2}t981Rs)PO`N%d&$&+C2m7qO+&B+n5$W2HW! z3wXBrpMu-P@BSDmMYW)s+3@0c%zG&$sUEGHL`8n8CI+A(XC{)*{@1o2P(7e5wEB;7 zTFF6uOmqgcGxn1@F&^qqlIp>IDvTPvIfa$Zu$Wm6ZB}Jpv~-lv(V4roh?RKZ9;iJM;C9dT8GqTy+RcR2DoBc zfTQ}0%jjy1Q!jll0m7lPW?fn!K*5Q{<;~s+(7>efRr_1r#jmCkA6g&3hTScGnO)FK z#=IH)y@4Bw(7mFds7C?i*vRanyxG8;SZn#EafQh>*wpWFl!r|`dRO~v{kU^akmAtv zY=;FIh?D9$>7jly;@#S-HZmw1>6iA6_!;>MCAE%EyT%VgyPwV;f7CV>?RK8f*+(}9 zDK1xDUDP)RIn>+NwmW+jS6+=BaimW+<`v$IsF?q6p;k@XYem;=zfwwwOYa5_$<>tI zi$+roS??app+2~mYRr}(Qk`aVIF*gtq`^4_UNo4MiRBOwkGr@W__Q{NoJPl&H{#(o zP%BUw6kfDIl>BAoz1f=r$$H30nDEncGS9*vs`b25)z^m%2M#I38y!c&K=@-*O?yX2 zC4Ju(MJ}KT#;LSIA~5wwl3KxiDvYXb_LwLeX==dT5#u1eS@66gxEo1-NXL%Vhs&-iTg9S1u z7ZvK)aNZIt7Oom%|00T9LVLjBG3hyF$wD<1!tT{)cjWn@@b1&RXsQK8_>tq(-E zc{lc~yc8z2%^TtYCGGXs4VwNFlv)c2`Ibh?8HYTi>JQ4pCcnTuaL=?hR!y$IVV7oT zIGQn^g$9)DcfBhrjNrzn3c_zikmJqNP{tGb+2n_Mg7S*5W%q&|uMVnuR~|_I?wy{E zAl*Mu8~R?Wi=*^fSoAW(R(s_?4ojJbwl{ki>JhjcjSe+TIDcb3p7|j$tHX#RXx{$Q zTDmRW@V3s)?hRfXg*F`fI<|P-1$0Gc!Q2@&50G5zR}U>uU~yzpC?3*mO?$6z$@t;o_Kx=^-oRS@d_GOH4##W03_klsBOmu&Y}~R_eF;{% z+iq1)?o+(#p2PNYPhVo?YXdftZ%?4qUTudSzH|?5*)UvCV)z)L9sTa#Dm;(Osqa*} z9%r=wIg&qL{}LJc z8{R}_`Bi$R>d)=(MF!LH91{eWZ1QvTbKg8Vcam|I1XKnn0kj8|J_;z|;3pS)FE*;a zB=vGT{E^9G*60ga##8J+jdc0lFWt5+_{44(aay(EOpd>W; zqn7tgC$y#&cThR&Mc$x2;P5ue+g7NDuI>O*7{v=|z*FbUd%|Z~&k{KqYDY z2hU$p&aZL)dcj&Fn)IjR(LMz7vz32<+B3QElv9j*{n+qb!%iedYsHX9>G>;KH{>r# zWoY_G-tlH%WO>>--Sw38UNK0HEr0n>=hAD>9hRa(vX);Q&y11E1C;?U4ULMSBVTg~ ztC~Y6s=vwpT}jaA!hBDK3y3u(`(AoIaBs4gDs1Ca^MqgrslAr6mj9wW(Jg%OVaD+f zQ-lG3B}gm3V9=;MLc0wyscRkcwsp4l1396+#n-Y^x%XYl4DTOzVccu@2P#Kj5L`Rv z_{ISR?+LYo+7Uh$$5H?O6YBlaW@Ehn*gNZm_BbmwqI(V2!Ji*4O>G+%jqSZ}>2w<5 zj<(FZS{Ze93U`RG*!##GPxD9 zt>34u5f;>x!v(kpb6cSIh3Xr$e1pgn^o0909!>zN0VvMzvnK)Zb5{3PsxCn?pFj^o ziwZ@j>249UIpy^}kkp~)uhh7p$Tn2(=$;rBFKjgFN!xsiSaa7Nv{=6 z)u2Jya!7f&nZDeNXmT24C<^B{m|C{{?TmUQXU9Y}`Q>o=1qY|}d||<;DkZ5NO{J+E zO|K_7$CZN4f3%jGbwfN!szaOjxpnF@Lu5L$yJE;%nJe?f)A*^AT)7 z5y5K(`B7o!k~%;Bz&2Y~cDeNrjv)LiL8kHpH3O;z{gHx+4tJQMmg;BP@g(EkL7;j- z*=SUU=dh)Y;24FUCw2P&m)no|93G&kK-p;J7wsLj@^{nQ6TFkr)Ux$I!Tf2*md{%% z)&HTL+3;B{R+PO9HD<*B!PL{w0XlCvic; zeJf4wWAw>!kzZ*#HrlOWIiNZVTkw{*%d~%r4-H#Cr937TDaJY57qm#jpA}y?;JFzH zKb)w&_ye^d)jrtksQXi7X(mV$uRK^@n0j^y)p-vAIMRC7%TM& zUPz8j{wWnv`yW@uFfLGlsRolf84?zS0s<`@%~^M5*?(D8TQKgO0B;hQs^1;i0>YyI zrn|HExl0{}0R0pwE|@%;^H8+Sc)5;WgqNu!}QtB!WQe3Y^OvEgsU7Crd&#*z^)X{u@CPbG2iYhqbQ%Kjj;15Gw{m1+Ze+mtbX zf}BnMgsb`1+C_^PO&yqO@J7+<->8rL6QWpu2LBEBBI9YzVB%qc39iM%fZBWap5JSR z;kf^D|7WI(;FbA(iX@U=zV7ueBJq2`d4s{{q~2hJVa`_n5D&(S_*!N|bG zse-v_=w1E3a`}*8^fkI7snxACq;x!Z*6~*P2)ic54wQR}94wumylq~9bZ_+is5BEJ z0bk*az1b6F@v`9Ia^1Lp36<$${*dZF(qBR9a@_f^PaGpk??4v=RSJ}V>T-gfdcBaf zZ~tv0Ljq;?IzZQ@LdCVk&-ek}yNN{CK=K?-$8pr15X1w90A2@LH2GVfi-~!ot-f6v z=uIX7KomAQ=pjDH4IvdRO1>zE`2K8B$G(d%klu8>V7<(;)% zcXeXiYa&TyXsT4PWl|=eJFWeZ$w#6Yw}G;$lBAMV+?&S(nygCIE5iBOuOg*PIaDW5 z8K4Akp9(9_TrVM}Y$|zMl^aApfrb`FNNA*zK!N^;1ty0+U!D>A%M)@(Gz%h6((!Xn z%iMm0aPg=QS%KsZI^ND63B?fp*y_(D;y5x12w|jhKxKfEh*K5)jmo#EB@Tq&=w&-R ziqxY$;BXhqEPwXaaeY7VqMLj!mhG5o!IIvWq~4eTaoEZ)iYFLOfSwTJ8coq3Oi7o< zb7AaGPnqCVUbG7tR#%ShxP1@OiOerMmh6m;wEOtJYv+%z;k#;!b1&ieo~QbD9G8r9 zK6E!vJ8=zFx^n_q0Pn-(A&K zr6-~mdS&Mlf_<>TKHEOOa-(t2X&vN;DqcYYQ0MUh(}%=50=rsvl;vLivT~ zSa9FceKKytJ=y7(_w8^I;aT45GT&t`3EPqeq}%GBXos&54|Qcsxuf_yanB|68+A!YE;2V?9Ry&!Dm=iCnNwDZZtrt7Z< z-`RK0a7npLp`y{AHjFI=5)uG@f;X7ypyGK-})E_ zDfv-JBt0FI1D+1*_s9GDp+zVf|HqjF3Vf@TI zdHRgDen|18r@{c6l!3c-u|^;|~yrTyP5 z|9_a}!w{vZDiy;4$(qnM4K=yl%JGg} zf=LBB-pB!o(`Ppo`MoBZn_69Be4~Obe^EZc%^7D|GT|o;V%*yR6dk-g5Q@tuAeAEBSrPkEv!^-BKcw4$Mx=-hj<6t z=ni^P{4Y`cO`GKd>xQn|HnZED$(8J?A5;jK017YKmcm)FE3T+@FlkBC37m9CBu<~h zkyQPV4)r|BI=`M+-1e|@`>uhcI*fH85btbokU_GbGDbndqEJBG>r(w_EYF{uK*uXK z;@P^s<~1j$cEkP2k#xL+C4y|&@Y9>MOHpg<%edAB^#!JvW&%ZDHWd`S8rc3sl+=-8 zvgSX2bnA)dx&Oe+F+){8DdY&_EK^eT)2c}{OBg@(*k;|xU{VztS}1r_<7vH;vwUbz zDwfgWcb|#DRoAzVw(}z^X$f;Uha1|Tt^QYh*IzuR#Fu z4ipFU4b1YvYbZ=d_y$qeTD6v;J8T(eIc(*(4(a@G!AA2Y&oX11T}|>i#Eh4m<#7bKW*ZpS>Lq9SH2urX7+lRCIVeQQ z;)oODITJv2fO6A@xr5H@I=+6l$|Airs~PiWD?jun=r7`H*{KgUPbpf~MUAokBxm`c zhpBu-UWzj3g%1xuKg;N)WRt&Rls@^1Sk z-Jy>UicEYpsr4}GoC~8W%lpf?xb}u&X4u(-Xvf2JmwPcMQSP4I!Rw0S@pGMoS0|o$ zW20X^Qig2}!bc00{Z|?KpdL?$6*%p@ijBLj_!8RV4xXYD{xjY>7~ff6@kDLyEgW2D zWW3$pA9Xz0vO`YR6YO+s-mS$U_pn34#T?BC#aM1b>dfe4N3owq)P$3(Q*l(E_wQYg zW@67}PLG>q+`<+5s`Da@-4M@naa!u47=*b#4=-Gd|CcQ2{JqZ;!dfa&CaSAP&1+Fd zOmQj9?DA1|sz<|Hw7T_;Seep#U?1%uaz0H5awav-vOzTf#rb_rSU^ncI%DPju|cFR z9k1Mohue@Zq$eDzKSFBwE#!*G{Eb26K!_;R6$fOWr5C)b;cPATCe4L7_;C#p{FiO0 z?^?EHyGQq6GL|M7C-_VQUXtq3#N?S9hVj!{<|Q8MhN5tm>{LO2LU)yD7gFgr zyjDa0Z1PuL_}XjTr;CiL2B;oTw%_HXwq?!f+soj?Tt-#PmOqHyR8=p=GtbhYzX4@| zenjD-67%bbvlkNAj~W<7YJwob5bGnj1gZy=Nev!|LIKfX@LU()un?*GgYv*00PwER z^3O_}QAh9+k+09tBmXs1$+>y%hV^8Ff7X5~u8iA4f$9Nef_kBHDd0ILJr?3t9KTr+&=Ly%I>F{fCcU8Pd`(6LVN!yX##@U9`mCey<$CV?hRuX7V z-jhDnn?mreSK14flv<-M3(wy#Js6Fy_t@o&Wy(Co9xrJgg(Wo1LnFXP88s%xw|J%sM4#vCedSBG zQd$r>l5RTZG{ON^?r0yZ)Mrd&11B=~A7zlqr zIlu(d>QBG>*c|?(l{-p8R=Sb1#nHuo@xq4j1C$4-6YL2PM-&j<)Qi3JW%wO<#f;sox){ z6a6&O@i(`rznnS=wNf3aZb*4M3~GkHCgsm!|AF(?Tp=BC!SLB%KPo%njHxr%y{V`6 zV01|z^z>CM$}AXr(0fiQDyy8oWXAb;%-vPg&VOGt`d(IEliTqsDsI24prc+Cs`m`s zee82CN*GYOee;H5+-BH{Ju^#P5VF{+89&qq%LVdF2VAqoYD?@A#xHWincE+a_b*99 zjsm&2A6lnlht3~DuFlIuE|*TI8#_M0`lkk1&M5XlzMC=}#wTW=FI|%tdv!=g+88WRc-pV*DlcZLXFN^9gcvtI~ zdO(Vrfu@tBR&bvxzi0su54ioKFK1mTZwmM(Qmb_ageq^TzT-md$H^}m*J?*dRO zpa>u@6=p4*{+dr3dcA(tCx|qpbqtP~BZ7E;pjN1>c`32z^5t!Es(w=PLw!h6D_S;a zzBT4^(6Ta%#8gJp$kzWsPRLJuEqnco;~Ozj(|?*jcyoZF3$JQOTYedk#a2Acc_SYA z;qo*&+k2_O7=L_Gq!J&EGP}2|^EwiV-^ukg-`6n-8SxGu|M@r%aXezzjJ|mq5AQzT zZRNo=2e+*XDC3Ub&LHuBv|~=6WU`@b=(gw})?Pc3-wZ=@V}}>XkyB zDq0W})RA)!;o1#v^fT@veEqG-y-D}bX~FFBeu1Uvd(e8zxrIkj=#4&lo(tU2golbz zys6Hp`PVV77e@2Z!S2=J8@uen&8~Wkxt_caX(zw1jQh;{KQ9q%Bc<#p^e|9~v=JwI zZ{INc95G5D`vMbfC|B|hzvPb@a9F!}ayXezOPGUf8@jS2m4Vr=xT^|?KHGG3%I3I} zcF=c4;kt&Q1o;7#fsl&Eoq0W*SZ%7~-A31yJVM7S8a3eAhN9vp|Bmk0&Xb$zcx;H^ zk_}&fy{^UT1)hv=Oi5B1NS6u)oq`|pZ9ko_*2@ZL@+|GNs$Ke^@tdVg8RWwze=uqN zqOkA+BbAV(GW7aaS@VU+YiaOy?%*i$cU#z~3MHuw4PTj%UPb)s>773O*=4dGI;v3c zDB1TC`2{$d$?IJvYv_0**-b&7#goU*OGBin7}Ot30f3UwuDX$-w{+g`~{*t?Id&h2x_c7Z`xbfGLOZNM$4X@~~EEk9|{G z^T>31{c)L3BZdA<;d!4`+hDDwM}ArsQK!+;@$v^6RUDQTQ7Op@*(nw4%#}P?;Rw^aaz9eD0T4s zshHZsuc`An4xFnf8Tjg7;w5U@Hf`E0A#2&oUxZw!UDh!x*{Kx`uRrPCPa^xe?TY=2 zgUH>$frMl3Y2)4-Q01`j1gai}=H=HVulg?zB)8M?NsV~84JM72KW|gO4}xgb=iEE9 zKvEGR3KQlwe)EQHXcl+JN<-DDSsop4;DE&Pu{&^AzQ}l)bRd**n;w{=|H(^aDk$d< z$oA`G)ZU+502vBHycoeHoBXF-pC5SJ&%McSsOVf2m6lObc#wy!{Zx)cZyWo4>3_^Lz<2{{MQcC#RG(((bgv4>%%>qz zUIvgK7Vf~?0^^XXXA7&7?}+1@3WuzsOe;_)as5wfpcazU3eu(G;Z--E@vZnDH0KPo zW<1A+t^Tr}j+<{~uE|J^+3*UJ-^X~LHkRtY;&N~}lGIAF|87{mYuSp`{)`K7Z23bz zw0fa#$=-igDVS?bx#qzrza+JS{HRdD;d2RLvcl3PmZ` z_fD`r+2sC@iwhP-zA*WZ^?%82+7qE*qEJ9M@H(|^u5Zy~`Hi}cz+Zl;{zLhh4^#L9 zwWDv-Uf8C~@)uiI<87uDkt_CW#r!n>`0Pt7vA$|S*R08_kl(C=6(RY9&=BXTs;WDw zQ(Fxz4Capu#!Bl)R6U&Ujg;h{1`P^`$DhMotExNa;>_-b`|LGKap+(ro$Bf@xco}u zl(7nVnB#Uy&~kYQF^Q)zYdk6OCz^jy7nOhWn{R_e{w4cYe)JC zQeNj_eeGs7kEcJv3aHvJd!PVghv=O#9d98cJ73iyt>ckkq~*}+!I}R@_5SjrVM;H7 z3W3=lTvI(=v>=m=mTNcLCp*;x>PF!m%s%t^Bj0>{)HRkz=Fz=qQOw`;p_xjH$L?kP z;iby%i<kr!8U~1S@?ftCq z61f>}WF{TY-Gtzh4X>ZHhz<_vP_aec#`=(x8!2X;3OnQfQRqo<&JBLWQCt zGG#~-<(lVGWXhQ7ny+~%ITwXg6p0cS6-}m6ns^RA`_uin&;49q&mYh8yuLqwoMqp$ z*IsMyv-e)_z4ine%>1Oo7z%RIKSHLZ&9Eed?S%AjsE5)es6vfE5xMfmskgbSQz4JJ z{e}PW0u6_HO#CJl(R6Z=s?^G*zBJg%% zfA~4&KdIG9PtP@R)OT|2Yqs?k`+XPw=^a{!p?nU9dgyn8GD4;ll1&_2w7$9Lv8;7M z@i^&^tK@8wQB4~)RGuOpM(?ErpA5+0lkdyw_hArkh@5aMQ#4tc9vKR`E5b8zyYPMHe zOJVs^q4$c*FhLOBC~k_+h4Fe(p56?2P%Rga_uklwhyJx0-Ig7leNj zS{P9-NyT$SkB8}724WE>I?qgPy@f?wZTRW_(h<9NIcR&fp9ki8TJo7z>kIsPiMrD{ zaT<1M@tFsV_1T#7xc5e@1Pw5rwJIGdGq2-D%@5#ZR0OVAE_~_vx5 zUOmJ-K%;u9$tlVckH35x1+$s?l}GN?%f7iU{998bJVlBQB>a zJ1J~;bfK;VK|N$ThEivc{e(jD&-0{!*w7DuJ^!qI<0OCT##4M0QUco};yCet{PvK) z^nEx>SB=ye4)vIdNpL-*i~LUCKV)4=Fxw-_hC{t|DB(d$oSSqtX`AR&AC}W$QT^eh zfAZr6w^R#lN9`4Au3zt0c_NUlsT>aVnCkK15rxNf9D^=CO<`+lIO&gy1Lb!pJaVrN zC;s734_#w7#K#~j^_~7;oE^$`?dEW($28R93AHFLSni$sJ*y%JHX%g@C}q#zrgAvc zV~WO;XkT1V9#3uYYW9I8OfLq`Z9+j#e!EC*noa9GMJ@nFWpD#=OM{+pv55@ClJK;Mr{!+I5{{l6b zcjQ$}%i1vhkuZLG67Pjt6Mg*DyOoWWq2{!4#;fGAZ{(+~f=Bll`=Xbf)Rw6I9Eunw!%b>vl)ZTcTC(Z6_! zn7lOMgA*EuRi%|3SeK2Xt_*oK5FesMmBnNNd&p8ff=BAl#lSgc(_Q(;^dnP)x?CoH zo}hg_llZZb*Z_AaGM+y9?6 zJ;R{`)AyrUpZrSh%I|!rx+en^P=*Jo^56L%4jq{C?@deSCM(#SjoE7&$}%B{>K~`_ z$MnVdgACnAKgt}`m;G9s;m`qnPl9c5LK83%^{R(yux~IO^ceYsu7+Q6fqrxnTQ4K0 z{CiKk@3w?Ew`ur}I;6>P=z!YMz)1rT=Ib`+;kbxVf4?6`PRsBecN=y28D34fR3z%p7WxnO z5~1=YsELy=ab)OS)8~>$GXF34zj|!F)c*qAm?s3?&S^AxX{zALlP7AA<2A%}4(mj& zq)20ZFRdqSl#sxA`uG=msT$*BcZ_KW*Qa6po#r~n_7SHaKjxBu()j|T@V3~$i+zRV z%TN{@tW3nOoP6c4Y+a9&J7Uewyky{%S5q|8ua)7RuC(?X!V9!CXU@t?y#?5lYC>Flo{ssD^$vK?7~MbL4mUoK}5($TTw&e3_R2fr$Yf}HrP52S3*f0w}4 z%Q775q5Kf(4?5Iqc~<##d8D&!OF-XwIMn;+#0!J`6WhNdz$cx>p^o@D>2IG`wmSe% z19_C;L8^qIASeD;8EQT`ZAmPq4)G6%dZ-=}@3qb!u7bGJ78!wV49L;5k*KblcSKl!A)k<`zN*=(2PBfZet zui-;2eLDX0 zM33VRlT2)b>SuF>h;4?m5(Wr94bY1zd!0ELxMfcGO3)TrZRJDhD(YBx$gOowO)_RI+su9W- z8X$QHS}}F4Ce=HB7yQ&w;6Ah^7!EV1i>qKLh@cc|dMF*@{oq=k^08wjqkAJ*OcUyw zI5o{HCnopoio3yL8jyTW{6`mR{ytw7$hK=oPy|6MRGtIr53IFnO5fwYR62lV$JzpKwF2Ve4gbck9($Zvy4I2AzanY;%+YZ~h++t(fXBb`UQr z;G@0jTCd9fjv`L-H!9_~dwgRLjX3dBUf0D&^jfp^LLg{`O!V;j7u|8SLGNoch@gfx zP$0BFL!05yYSmDI{q|9p5H66|O?GJhqVnA} z7L=G3^Zv8rl~ew2TQ23EvmzGA%>2D}7*dlH|DGiZGt#}|*-n5VNQxj5N|&ItaIflu zU&n8T$-R$(d8on;MDRAn2FSo4C;1msVE@xIZd8^dQIY=t0_~W+Xp8jLHkr$5VpDEb%~84>iQ{(jt7qTw!5#1Rn0c7z z;6f)-{omG<;yC}F=J_hY*hI4sr;JTIu;=qB?Gv~|aJpe@0GT@mI^29=Kc?Od|!N&0mx?PATt>eOUByGWsW zS8*iPdFo4`wD2vgwI-=-a;`6C^NS~S@5T^p#o1-wxB5QBr%q82k?_pM=GdGuNNswC zGSo%cL{JYwHq?(2 z@4b89x06XfEsx)x$9kPM;^!nE#Y6cW3XlBzQR?lEL}#(TJ>g%V4)Zx%i8gU@vcDWL zA4d)S{z_|%v0$7=#%5cLXWn|Hj!Ujs|2~~nw*)U?)!g?}&pb%P6a^bJ$0$bPWn3Qf zM3h7E-nt)shdp25V!p>GNt@Q=lzZ(RDY?&a6LDRh)4mMi`qT$O5xq&+jt@dpoc0!C zLiDy~A-__rzfOneQqn_=BJwLTIX@HE5xhKQ{(539nDo*U)q>U7w^NpyPP&!2B>8>v zV^wSX-upvcyN(9pnSD9>$-xoW`X4)Mt5b4ugE(V5-usU+(zHJJ=Uc)s>h#;%kLTC? z50pV`zX%r(s$%~eD5F)j^?N~)NPNZhCf2oIsEQ7UGH4Vq^qxU}Lb0g7xAr>A4kaWP zAp|CG6G|HIzLilBs_vB`AMVL=5eec)T?cA{C`=@xU|933AS6rv^@(QZQD67DU8^ZN zC%|shra-2k*I?xZC$^&&Wcm<`?*AJovmVvHLCPRYFMl8>q8JOO znJN}3;184@4rP#O7)pjg{_tf^yN+@!9AoA`_lF-r7X)RP%1>;0Lo<2l-E?`~>9HWi z%-{LPYsBgM$v!Q0To@PyYD^W2ROybPY;xi^-d6qd{d5{MBZdKr8Dc0n9Lg|>(VGA9 z6Pb%tT&O|ZIk*Hd3{Ya(_ekrn+S@Uw4=s*mX}>5rPWnr&|MJ9XvmaY1HXOP` z_Skn9FfQ1HE6)#RJ0Uq7%Ao#`;17T6$tbv=9jMiMo5IrmkQ`3>tGz1_dtRl>w*7J9 zr?knMREcr1ZGWh~A}F&Q73?5okZtx!({JSlvy|UR^)H>YXpzWIUAFQ$=|7|nh3$mw z2%*fy(Ea?~Dt1`cEsNqqaS9DbH}%u1%*p5-V{2+o!hHyS~K5$yytK z3OQqD29KYQU+s$ZU7;SG9N~zqSbm}-ZizYe_G9(Z423Yv*veSE!!Z?8UvOtB-unltJkd{DY(V@7}M(T2I)cYD?AczA6*clm0ShmkcZn%~-lJ#E+m1@@}Bt z2}-lvTtr@C{)t}Nmj;)aUXUn%HVY$f4uaAm%<+heX&fIvX=vD*$ua?nqH)q+{d-&D zGMiMEwH+hn|J-{aRC;PA%jx@wA3+%eNzm^ERcKiDfP6(-=IXo~iLjOF1&Oi`LqSgZ zkDWQaM7aMN3zQuxzh(<>>d(R?5Je0Fl(Gv$LF7$BUJ2BWi1*m~v%kpHmh(c1r{X~f zF$_`}3vF#8oB$=IIH zLBC=mM!gM#ByvEd?&d!aUng|KMp}Ljr|0t-H{N8sHVX~C(auli`fs~v8YXBrOTDZ5 z|K9BjB#dS#nS2*xHa{fNq`&k3FHnwo0#2p-kifP;MNE+QyUNBAMf|xOElur{E>yL)3<0dYOyu42`zFd#Lh+1$txCu zmH0*h%bWV@#TX^vV8oiWH}JV$9oGt?({Weo=FSS;XZV^~Z|}zKkH7^@V=`hc+{Y-z zI-Z|`Dliol-omX$MSpp%ZZI|v)fYWMy(JbW^}~qTyV}n8lJ6>gW#V6bm4U~tfk))P zL!Ef+<MpEv`j1Ru~p_K)B2 zf#3Ap_HNa`O8iG8t2;op>JOZ#tY19%zup-=gS{jtUp*riUnSpFd&le@Lj-c%WG4}? z2mc1X`Q9-DZwGSlm$r`wnCI$O5QH^_xlX4e>F{ zS6Pi3Gd!FA%W2E12pG#OpXZPKf5Uv&J*vp-{ca?S3MIjgA$9+HDq<+eNq<3J&(BGf z*GFyh_Z4f7IrKUXWRSoCN>RX25Me%qxzO(f9X#NQOP2S8fpjw4W6n`~Mc9prfAD}S z3%-uFDA5GA2V8N=|8|iV<1YT7!g{8PNfI>2kQ`3@b55^1`Xe)+?bbDfRS|Y$s^*p4 zlUoW7u4*_T+?dMJe~eWATdw)z}fChj@rK>^Z&q=-l*Tws0B+F5|XNL4(zdjh5 zYGiU(4Ke)nWROXW+qS{$=Z)IbpyW_~hr<8C@OqGey{8(vm5|9{s=tFB z2}_V71C;529Av7I$s4})>Y~}=B$w-Smf<1F4|3}usKg|HaO+hB&evuAJ&vWTNBo?= z|Ad--kKPJ|jM}|3l>7Y}-FtY=hc8 zQ#UiXOMhRHOThTdP?q*TQu#`!H@%k=a%L-^)AuX%>x#+QbdBvy*S|mo<^zgIp@EC@ z`%dGlvnx9!t#{(Kk?Z>DA&2pu@Im;2gf?C{AurH4M+4`1$}{)*z9+bPbEvfT*JA8h z_vyy6wdELv!t4B71;lLPTDbF|J=}TYbs+ap`zjyuR3^ zrH}Ehxmj7;>unJfAowx*{>Fk@QQCak_h`^#Y7?aJKc*MON27eEXq}1$%>^4YPrbQj z6Al8Xtq&+xX^IVyfj<-l;M8=?#Y(WHouQ*f`GZPQ z+Srb=heHRH4}y=!&gUEXS?OY~=QF~=Z!n?19{5QN6$dB%FSS`mioFSD$v;Yt6aO)* zNhgoH$FTKGBj|wY4O97J(M=?LX2y)KTkgcdUi6^{1Rrt7kbDF=5OhVJ)}i+dvVGvR zh3nt@ur2E(7`tebT?6~eq!4sK5CDBoLjS?_wsE`j-@Gp+w)QaR zzt?TV-|r3ZV2Z`Duz{IB!0qqyu*-!TN|7E`Z8uNUr#Za;cBxw$)2iCQ5p<%oi+<75d>k?GEU_<_|aE@NZmadEy0w%tAExXF0`T^7+z*o(Wt6UR)8#=k`x zdL4U`fL~g9v}WGkKI(%bG+Si7eBw)XrH7W4ZFW7=2Xq>0Njbcr#Y_uBA%!8f;7e~5C3pX z-OGPM1XlgPUoMPTbn|qvvd)Xw53v4{ieB63dsyW7bl-JHiZDSlJ{9V%TI|T$BeM=s z8ZZqzodfYRTCm>3Z`>4{{#(}nj&9dF)2NU(f7?5<+VW$s_8u6ySB9x+D>wyrlMl+= z4ZU|V3hpqQ28P$)rUm`{zkybRTd558CZ!!~jsyydIN0|79O#3(A|#io2@fv1iJEYD ztN*b`&}ZgX__J%4Q_~9XP*W3I5(#?D{Fn!Zx&Ti6qzjS@d_~0BdL@u4MqZh9DB(fM zAg8F-E-9%@VL4C+MdKvj{;OCo?bAPvFF4~ ztvHZkdO^$^VkkHqTCGJd1}TF)C+vE>R^APk2`KcPIptqs`zO9RX7((m35nsvkK!SD zL*YNBVK}rBK@5YGK|VRVaPRVf1eUEZC>kgE$GD8BpEkvhTK_Chub?#X7?!P2NIrrL z$V4->e^P~G7r9I0uyQ-JlY~GJ%B3b2zkO zYG&T%?hna#%|F*Y?}=sEKY-+OD!=kn=Y=^5A)|%{->-a)?S0l_g+kG=hDSk@npH2)ou2UFUD!`VVZ1j!Jz8SYh_P$zV<>NX9$ zk-$OvXK05YA%aL7{t=Hs=5;?WY9y4*vS-alTF{$tvZDEZl9B;qT85NU? z*Y>@>bK^oCcD}Rl3whNY%x&kqNnB?xc<-dO78RwJlf53 zdO&m}&Y$I?%}Xo7A0z<$sAYe+Vb9eR17Ct{i4wiL{?MtrP_HI*#x1Co&h7cz>#+&21+piU9w`*3$G z{6q&j-{ZUAc?zNHaJKu|dO9>IMov>L$KiI;^!wrmaM%&KOh_;Xhjo`lS9Sa4!?{Bd z&tzNjVc}to@!Xs5g62f?3HIK1p?Kbd)>WyuVca=$UYqvY5I1&oREzW`N%q42yjItrW!l1HD zF&iR^120v4&Vp7hotgDNv*2+Ax649>EO_xMps?3J3l5%ne6Qav6C&-LFRb2>2{Vv_HI~_31!+rZNf`4f#TG6nxC2h>MciA=jvsEM}yVfo#yG_ za6sfKtsxyuUUbZE#L~fgw|m=Nf)?HAkKOk(4a`CU)XA3V5G$t|U8;}7i9AiAgbcaRzllq}zLOCKTP=|KDLWl5mr5!xVmhz6wc z6^f2qQs9CAq5z*;G;o{wbRu3K_ z!@ArOxfchL!9Si%$+JoViSn!mA$CbXO-OBx-0& zyUEbQt^fR#Q39AmoSW;Ynhb(xOX<815`a$DO{tob1Q*&~IXwNI0J4qCCOiux){vB6 zCa%a#gyJ!g_urW%!KP+imz`^qLF$z3@sy?{kPDckVJ?*f`%Sa$BX|?RM_=)$t#SgS zXDqZfbxDLqHF6CGs}pgT&YpU4cnfXug3+JrYA&Z*pgUMU$5ni*f`zp9Ue5bO6( zHR$o6<~l=Ze{mdKTqZp0xllZ`hc)`sg5!bDFuFpXpg+0B;uoajptEA!=3R&4;fvX( zgX34m!4i`>R-!F&uyX6y)i)2vK@Fb~UVJhR9Dwbp9-RKK{uoh#KAVRE$#ISs32S0LcVl{3UwRA_ErYR zg65N6YR~0Zcp4|#VjU0z>s7Cw{UA$)JH6vd`n_Vo{@bpAp0Zf5T=U_m?&DaHe`|m8 zf)EuBx`aAxF^PfL+gmLA3SwayH}5fbop1ca+v>un2WL_wsBEq?B!z*h%`-+OW@pdq80i?F<6v_X9*Qx>^4|gc!J-9G zUt?fn1I0_=$rGT6NldDl?*tk*c07>SR}GZv332mMO6a5~()jK%1t8d~&=K|{1Wb;s z*Epq}2_{vQkL>oA&?{rVoGhuzhS!Es2OeF-!Q^mK@6IjB^m?DHwdG~SK>L}r(M>U( z&Qr^qGF3SZD&?vx_9W+mmP1V4Gk$N#F^O|5-+YfA{=TYDVPghd?_GVtZ9xf8CLKzi z{4SL4eM>LyaeNe=w$oC`Pc0t!Gin7MN<@I*JDwwv*WBn@k}XYhHS-|gb3@d(6Tx&W z;(^-2d@Be}%h{H?I-X9sV3j<1O&VaX8(w}XF`|n+R&0oqj;EV6c<*p%NCeD2oII{h zhpxv}wN0cr6a@KdxV2AE>1|>ab9j2F5Uzgwn1{i6nB-`(EZWr{wA$A!c)9-)Fz$-J z-C|-3)CQAx`p0k51KRrMM4k);sz<@hXBzGxA+zqi3EveuNx+x9`sNK%(FL1d@AiSG)0>t!x<}C4mZtxDNWKCjR|RwP znL9y3@0>%2Bn9xZZ)X@AngahqiC*o1EA%_Z73bA-eVv}+xpcd2Uribbw; z%I}!!7p1$P)+hDXys;D@O+B}M+`&0?lDXshpp|Cez4W<{(;-v97(UO}RGgtx0++mW zQ{Mwx!tDWTw>i=A1)o!MOqFj;S2 z(e=O-Ixp>4^wqZ}^1IRU! zI~Q7)zQ(hhJ|mOvr2uq}yopOy0DBXfMj?bE7Kl=|{r0XSIB}4)UVETY^om(y1$dZ|=To3QB3o@ur*_9<)E!qr;Q>8lhh+fWzGsh0lxzSPo9*jPzH*(Ovr;j} z$;cJ34@(sfh*0Qbw>XNt3G{=)AJ-?(D)pw5Bv(c(wy}pcxrblxi(CT1R-xwISxR)0 z$feV1`G$0!H)SUrjVN@jiCOO3OwH)49Ako|!<>O+=RYpCa|azadigr}{&65xeN$f( zbQIdi#>-u|U8H;0jNx|WHG^=q7yG5|n$Rht_j%+R0VeIYz8aiK)FUIEd$Yq%0I4vj zfANN+bO{ZzM$#fHdieN9d^#&F>1x)?OSr#ZhsOqUECT0Uqf_6j2|x2OqyNyE@qzEm zb&zO%vN)yWG94FCH?&@%4tSPR&CM`XI#-l_K6AmDu8e>MDV@nqlp1R3lxeZiCk}kFS8q-N> zHsNYXdqJVv=C+f>2|DT14T*n*9EwuuehN+;+1!edER7dRwOu^1pV435~S}!7lv|k#D`|*yGjP zsLJNRpXV}noV*=q&A|CIE3D{L(QmwatSvxcL)4D30XlR_+|vfWUy^iee8_BupYf2* znKEE=21hvN9L&wP^1}Q#5j_y`>|LbCWrMJJ@Mwem{e+X*>5m6>}?-$Ly@ zUP0btA0g)hcU!AX8+O7tP9?EkTakb~P!u1dfnq*K5 zaGR5OQZBuLnUAmUklM=tSFcqDMw!jvtR(mBgFXX}`1pU_q5lGwuK691tyc#HUDTxM zW1FF-EqDRdiVa&6Sz_lW211r3{pa7DVT6S zfRIVmr@Z!6LTH`CSF00c5UXxKt|6xk=Kq{%bA)jZmQpTVPVFs+w)CDRvyR#`l8f~kBYf)zj9)~+EPN#P}73BOS9p?Hfc)2%4}HlW%eua@mbJg zSL^V-BLlt(-e@+vn+e6Q8Xn&!hDJP*zwDNX=Rg2?QHbT8OwfBe+0sXr@M5&6TXMiUs-;QPZno@-Tv)1 zF;rsc&}z{=UOXKp)^%DfbIyRL0{28e-lsti*TKxX-D!|#x}r@?G!=G$z=w@@X&@5T zT%Ywm4Y~s^e%kmr70PuSDxFek5WhWOwy0hj6!Trt>X%4^EBS}GX6%oL;YEJ)O{(k`8s46o;J$m~6x3JXu~x@vnU4!#{( zrv2HU2H&UZ+O_>ohOP#o2Ua&yfu{b{*}F0x%HgT@R}~tB7tQfgHKu`quIrDyhE#Z; zWau{AD-}#*pR3-AAckHa7HiskN`a!_cI~klsZei|mixjl0mi28c9edX4zjJEUd7WA zU^|cdnlUzM;8|n)DZD-zlI34xFU(WnrG@hcdJu8S#ODf=^1c+fBjoH_q?rJXHxuK8 z^J0O&D8IM69VI@Va!q7Y(tjlK~-RO}B`DstFY}rIuxVt5J+_ZR* z@Ag!h`I!nwO5Kza?ht2frt|hu7{pLYIU(HWUJAVR`T%891qo#(ywR}L#Hau0IKksSSb4LA9Wx#9wIHY zZQFvVu=Ztz)RRLoz_sUja5VwxRdvs5eBe%iDN-@%ZiLtT>4cvVihS{K#qQ{~h^sMB z?y)+(;{p}N1;)xtPNstVY0aoYfjD4f1zqg+rUIAzR`=##aq#AQWZci4F|aaa()nJm zIQX*UJzpU;4#uolZ7nJg1vwM8Y`(RN3Xdk3<$PC;fkMN#yKwb5h*o-WQ_?#QLhVG? zXuU~u5ohuI7$_TGw6ml;8W>61{S&4q!=Z!Aqa{435d37-rIg80@Y-wKg(Hp$ zuuwVFq-b6o`03npQru%yXKtrjGf!Ge#%Qoi!tj)85jd XZEViII(i}p^wu8GTjw_P%ewysX?K&7 literal 0 HcmV?d00001 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml/train.csv b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml/train.csv new file mode 100644 index 000000000..d68e0d76c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml/train.csv @@ -0,0 +1,1461 @@ +Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice +1,60,RL,65,8450,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,Ex,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,Gd,8,Typ,0,NA,Attchd,2003,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,NA,NA,NA,0,2,2008,WD,Normal,208500 +2,20,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,1976,1976,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,Ex,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1976,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,181500 +3,60,RL,68,11250,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,Ex,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,2001,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,223500 +4,70,RL,60,9550,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,1915,1970,Gable,CompShg,Wd Sdng,Wd Shng,None,0,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,Gd,Y,SBrkr,961,756,0,1717,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1998,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,NA,NA,NA,0,2,2006,WD,Abnorml,140000 +5,60,RL,84,14260,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,350,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,Ex,Y,SBrkr,1145,1053,0,2198,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,2000,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,NA,NA,NA,0,12,2008,WD,Normal,250000 +6,50,RL,85,14115,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1.5Fin,5,5,1993,1995,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,Wood,Gd,TA,No,GLQ,732,Unf,0,64,796,GasA,Ex,Y,SBrkr,796,566,0,1362,1,0,1,1,1,1,TA,5,Typ,0,NA,Attchd,1993,Unf,2,480,TA,TA,Y,40,30,0,320,0,0,NA,MnPrv,Shed,700,10,2009,WD,Normal,143000 +7,20,RL,75,10084,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,Stone,186,Gd,TA,PConc,Ex,TA,Av,GLQ,1369,Unf,0,317,1686,GasA,Ex,Y,SBrkr,1694,0,0,1694,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2004,RFn,2,636,TA,TA,Y,255,57,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,307000 +8,60,RL,NA,10382,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,PosN,Norm,1Fam,2Story,7,6,1973,1973,Gable,CompShg,HdBoard,HdBoard,Stone,240,TA,TA,CBlock,Gd,TA,Mn,ALQ,859,BLQ,32,216,1107,GasA,Ex,Y,SBrkr,1107,983,0,2090,1,0,2,1,3,1,TA,7,Typ,2,TA,Attchd,1973,RFn,2,484,TA,TA,Y,235,204,228,0,0,0,NA,NA,Shed,350,11,2009,WD,Normal,200000 +9,50,RM,51,6120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1.5Fin,7,5,1931,1950,Gable,CompShg,BrkFace,Wd Shng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,952,952,GasA,Gd,Y,FuseF,1022,752,0,1774,0,0,2,0,2,2,TA,8,Min1,2,TA,Detchd,1931,Unf,2,468,Fa,TA,Y,90,0,205,0,0,0,NA,NA,NA,0,4,2008,WD,Abnorml,129900 +10,190,RL,50,7420,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,BrkSide,Artery,Artery,2fmCon,1.5Unf,5,6,1939,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,GLQ,851,Unf,0,140,991,GasA,Ex,Y,SBrkr,1077,0,0,1077,1,0,1,0,2,2,TA,5,Typ,2,TA,Attchd,1939,RFn,1,205,Gd,TA,Y,0,4,0,0,0,0,NA,NA,NA,0,1,2008,WD,Normal,118000 +11,20,RL,70,11200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1965,1965,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,Rec,906,Unf,0,134,1040,GasA,Ex,Y,SBrkr,1040,0,0,1040,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1965,Unf,1,384,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,2,2008,WD,Normal,129500 +12,60,RL,85,11924,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2005,2006,Hip,CompShg,WdShing,Wd Shng,Stone,286,Ex,TA,PConc,Ex,TA,No,GLQ,998,Unf,0,177,1175,GasA,Ex,Y,SBrkr,1182,1142,0,2324,1,0,3,0,4,1,Ex,11,Typ,2,Gd,BuiltIn,2005,Fin,3,736,TA,TA,Y,147,21,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,345000 +13,20,RL,NA,12968,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,6,1962,1962,Hip,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,737,Unf,0,175,912,GasA,TA,Y,SBrkr,912,0,0,912,1,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1962,Unf,1,352,TA,TA,Y,140,0,0,0,176,0,NA,NA,NA,0,9,2008,WD,Normal,144000 +14,20,RL,91,10652,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,Stone,306,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1494,1494,GasA,Ex,Y,SBrkr,1494,0,0,1494,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2006,RFn,3,840,TA,TA,Y,160,33,0,0,0,0,NA,NA,NA,0,8,2007,New,Partial,279500 +15,20,RL,NA,10920,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1960,1960,Hip,CompShg,MetalSd,MetalSd,BrkFace,212,TA,TA,CBlock,TA,TA,No,BLQ,733,Unf,0,520,1253,GasA,TA,Y,SBrkr,1253,0,0,1253,1,0,1,1,2,1,TA,5,Typ,1,Fa,Attchd,1960,RFn,1,352,TA,TA,Y,0,213,176,0,0,0,NA,GdWo,NA,0,5,2008,WD,Normal,157000 +16,45,RM,51,6120,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,BrkSide,Norm,Norm,1Fam,1.5Unf,7,8,1929,2001,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,832,832,GasA,Ex,Y,FuseA,854,0,0,854,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1991,Unf,2,576,TA,TA,Y,48,112,0,0,0,0,NA,GdPrv,NA,0,7,2007,WD,Normal,132000 +17,20,RL,NA,11241,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,7,1970,1970,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,180,TA,TA,CBlock,TA,TA,No,ALQ,578,Unf,0,426,1004,GasA,Ex,Y,SBrkr,1004,0,0,1004,1,0,1,0,2,1,TA,5,Typ,1,TA,Attchd,1970,Fin,2,480,TA,TA,Y,0,0,0,0,0,0,NA,NA,Shed,700,3,2010,WD,Normal,149000 +18,90,RL,72,10791,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,4,5,1967,1967,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,1296,0,0,1296,0,0,2,0,2,2,TA,6,Typ,0,NA,CarPort,1967,Unf,2,516,TA,TA,Y,0,0,0,0,0,0,NA,NA,Shed,500,10,2006,WD,Normal,90000 +19,20,RL,66,13695,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,RRAe,Norm,1Fam,1Story,5,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,TA,TA,No,GLQ,646,Unf,0,468,1114,GasA,Ex,Y,SBrkr,1114,0,0,1114,1,0,1,1,3,1,Gd,6,Typ,0,NA,Detchd,2004,Unf,2,576,TA,TA,Y,0,102,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,159000 +20,20,RL,70,7560,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1958,1965,Hip,CompShg,BrkFace,Plywood,None,0,TA,TA,CBlock,TA,TA,No,LwQ,504,Unf,0,525,1029,GasA,TA,Y,SBrkr,1339,0,0,1339,0,0,1,0,3,1,TA,6,Min1,0,NA,Attchd,1958,Unf,1,294,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,5,2009,COD,Abnorml,139000 +21,60,RL,101,14215,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,1Fam,2Story,8,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,380,Gd,TA,PConc,Ex,TA,Av,Unf,0,Unf,0,1158,1158,GasA,Ex,Y,SBrkr,1158,1218,0,2376,0,0,3,1,4,1,Gd,9,Typ,1,Gd,BuiltIn,2005,RFn,3,853,TA,TA,Y,240,154,0,0,0,0,NA,NA,NA,0,11,2006,New,Partial,325300 +22,45,RM,57,7449,Pave,Grvl,Reg,Bnk,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Unf,7,7,1930,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,637,637,GasA,Ex,Y,FuseF,1108,0,0,1108,0,0,1,0,3,1,Gd,6,Typ,1,Gd,Attchd,1930,Unf,1,280,TA,TA,N,0,0,205,0,0,0,NA,GdPrv,NA,0,6,2007,WD,Normal,139400 +23,20,RL,75,9742,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2002,2002,Hip,CompShg,VinylSd,VinylSd,BrkFace,281,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1777,1777,GasA,Ex,Y,SBrkr,1795,0,0,1795,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2002,RFn,2,534,TA,TA,Y,171,159,0,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,230000 +24,120,RM,44,4224,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,1Story,5,7,1976,1976,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,840,Unf,0,200,1040,GasA,TA,Y,SBrkr,1060,0,0,1060,1,0,1,0,3,1,TA,6,Typ,1,TA,Attchd,1976,Unf,2,572,TA,TA,Y,100,110,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,129900 +25,20,RL,NA,8246,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,8,1968,2001,Gable,CompShg,Plywood,Plywood,None,0,TA,Gd,CBlock,TA,TA,Mn,Rec,188,ALQ,668,204,1060,GasA,Ex,Y,SBrkr,1060,0,0,1060,1,0,1,0,3,1,Gd,6,Typ,1,TA,Attchd,1968,Unf,1,270,TA,TA,Y,406,90,0,0,0,0,NA,MnPrv,NA,0,5,2010,WD,Normal,154000 +26,20,RL,110,14230,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,Stone,640,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1566,1566,GasA,Ex,Y,SBrkr,1600,0,0,1600,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2007,RFn,3,890,TA,TA,Y,0,56,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,256300 +27,20,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1951,2000,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,Mn,BLQ,234,Rec,486,180,900,GasA,TA,Y,SBrkr,900,0,0,900,0,1,1,0,3,1,Gd,5,Typ,0,NA,Detchd,2005,Unf,2,576,TA,TA,Y,222,32,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,134800 +28,20,RL,98,11478,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2007,2008,Gable,CompShg,VinylSd,VinylSd,Stone,200,Gd,TA,PConc,Ex,TA,No,GLQ,1218,Unf,0,486,1704,GasA,Ex,Y,SBrkr,1704,0,0,1704,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2008,RFn,3,772,TA,TA,Y,0,50,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,306000 +29,20,RL,47,16321,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1957,1997,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,Gd,BLQ,1277,Unf,0,207,1484,GasA,TA,Y,SBrkr,1600,0,0,1600,1,0,1,0,2,1,TA,6,Typ,2,Gd,Attchd,1957,RFn,1,319,TA,TA,Y,288,258,0,0,0,0,NA,NA,NA,0,12,2006,WD,Normal,207500 +30,30,RM,60,6324,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,BrkSide,Feedr,RRNn,1Fam,1Story,4,6,1927,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,520,520,GasA,Fa,N,SBrkr,520,0,0,520,0,0,1,0,1,1,Fa,4,Typ,0,NA,Detchd,1920,Unf,1,240,Fa,TA,Y,49,0,87,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,68500 +31,70,C (all),50,8500,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Feedr,Norm,1Fam,2Story,4,4,1920,1950,Gambrel,CompShg,BrkFace,BrkFace,None,0,TA,Fa,BrkTil,TA,TA,No,Unf,0,Unf,0,649,649,GasA,TA,N,SBrkr,649,668,0,1317,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1920,Unf,1,250,TA,Fa,N,0,54,172,0,0,0,NA,MnPrv,NA,0,7,2008,WD,Normal,40000 +32,20,RL,NA,8544,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,6,1966,2006,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1228,1228,GasA,Gd,Y,SBrkr,1228,0,0,1228,0,0,1,1,3,1,Gd,6,Typ,0,NA,Attchd,1966,Unf,1,271,TA,TA,Y,0,65,0,0,0,0,NA,MnPrv,NA,0,6,2008,WD,Normal,149350 +33,20,RL,85,11049,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,TA,Av,Unf,0,Unf,0,1234,1234,GasA,Ex,Y,SBrkr,1234,0,0,1234,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2007,RFn,2,484,TA,TA,Y,0,30,0,0,0,0,NA,NA,NA,0,1,2008,WD,Normal,179900 +34,20,RL,70,10552,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1959,1959,Hip,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,Rec,1018,Unf,0,380,1398,GasA,Gd,Y,SBrkr,1700,0,0,1700,0,1,1,1,4,1,Gd,6,Typ,1,Gd,Attchd,1959,RFn,2,447,TA,TA,Y,0,38,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,165500 +35,120,RL,60,7313,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,9,5,2005,2005,Hip,CompShg,MetalSd,MetalSd,BrkFace,246,Ex,TA,PConc,Ex,TA,No,GLQ,1153,Unf,0,408,1561,GasA,Ex,Y,SBrkr,1561,0,0,1561,1,0,2,0,2,1,Ex,6,Typ,1,Gd,Attchd,2005,Fin,2,556,TA,TA,Y,203,47,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,277500 +36,60,RL,108,13418,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,8,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,Stone,132,Gd,TA,PConc,Ex,TA,Av,Unf,0,Unf,0,1117,1117,GasA,Ex,Y,SBrkr,1132,1320,0,2452,0,0,3,1,4,1,Gd,9,Typ,1,Gd,BuiltIn,2004,Fin,3,691,TA,TA,Y,113,32,0,0,0,0,NA,NA,NA,0,9,2006,WD,Normal,309000 +37,20,RL,112,10859,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,5,1994,1995,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1097,1097,GasA,Ex,Y,SBrkr,1097,0,0,1097,0,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1995,Unf,2,672,TA,TA,Y,392,64,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,145000 +38,20,RL,74,8532,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1954,1990,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,650,TA,TA,CBlock,TA,TA,No,Rec,1213,Unf,0,84,1297,GasA,Gd,Y,SBrkr,1297,0,0,1297,0,1,1,0,3,1,TA,5,Typ,1,TA,Attchd,1954,Fin,2,498,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,153000 +39,20,RL,68,7922,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1953,2007,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,TA,No,GLQ,731,Unf,0,326,1057,GasA,TA,Y,SBrkr,1057,0,0,1057,1,0,1,0,3,1,Gd,5,Typ,0,NA,Detchd,1953,Unf,1,246,TA,TA,Y,0,52,0,0,0,0,NA,NA,NA,0,1,2010,WD,Abnorml,109000 +40,90,RL,65,6040,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,Duplex,1Story,4,5,1955,1955,Gable,CompShg,AsbShng,Plywood,None,0,TA,TA,PConc,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,N,FuseP,1152,0,0,1152,0,0,2,0,2,2,Fa,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,AdjLand,82000 +41,20,RL,84,8658,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1965,1965,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,101,TA,TA,CBlock,TA,TA,No,Rec,643,Unf,0,445,1088,GasA,Ex,Y,SBrkr,1324,0,0,1324,0,0,2,0,3,1,TA,6,Typ,1,TA,Attchd,1965,RFn,2,440,TA,TA,Y,0,138,0,0,0,0,NA,GdWo,NA,0,12,2006,WD,Abnorml,160000 +42,20,RL,115,16905,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,5,6,1959,1959,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,TA,Gd,BLQ,967,Unf,0,383,1350,GasA,Gd,Y,SBrkr,1328,0,0,1328,0,1,1,1,2,1,TA,5,Typ,2,Gd,Attchd,1959,RFn,1,308,TA,TA,P,0,104,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,170000 +43,85,RL,NA,9180,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,SawyerW,Norm,Norm,1Fam,SFoyer,5,7,1983,1983,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,Av,ALQ,747,LwQ,93,0,840,GasA,Gd,Y,SBrkr,884,0,0,884,1,0,1,0,2,1,Gd,5,Typ,0,NA,Attchd,1983,RFn,2,504,TA,Gd,Y,240,0,0,0,0,0,NA,MnPrv,NA,0,12,2007,WD,Normal,144000 +44,20,RL,NA,9200,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,6,1975,1980,Hip,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,Gd,TA,Av,LwQ,280,BLQ,491,167,938,GasA,TA,Y,SBrkr,938,0,0,938,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1977,Unf,1,308,TA,TA,Y,145,0,0,0,0,0,NA,MnPrv,NA,0,7,2008,WD,Normal,130250 +45,20,RL,70,7945,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1959,1959,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,ALQ,179,BLQ,506,465,1150,GasA,Ex,Y,FuseA,1150,0,0,1150,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1959,RFn,1,300,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,141000 +46,120,RL,61,7658,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,9,5,2005,2005,Hip,CompShg,MetalSd,MetalSd,BrkFace,412,Ex,TA,PConc,Ex,TA,No,GLQ,456,Unf,0,1296,1752,GasA,Ex,Y,SBrkr,1752,0,0,1752,1,0,2,0,2,1,Ex,6,Typ,1,Gd,Attchd,2005,RFn,2,576,TA,TA,Y,196,82,0,0,0,0,NA,NA,NA,0,2,2010,WD,Normal,319900 +47,50,RL,48,12822,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,1.5Fin,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,TA,No,GLQ,1351,Unf,0,83,1434,GasA,Ex,Y,SBrkr,1518,631,0,2149,1,0,1,1,1,1,Gd,6,Typ,1,Ex,Attchd,2003,RFn,2,670,TA,TA,Y,168,43,0,0,198,0,NA,NA,NA,0,8,2009,WD,Abnorml,239686 +48,20,FV,84,11096,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,24,Unf,0,1632,1656,GasA,Ex,Y,SBrkr,1656,0,0,1656,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2006,RFn,3,826,TA,TA,Y,0,146,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,249700 +49,190,RM,33,4456,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,4,5,1920,2008,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,736,736,GasA,Gd,Y,SBrkr,736,716,0,1452,0,0,2,0,2,3,TA,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,102,0,0,0,NA,NA,NA,0,6,2009,New,Partial,113000 +50,20,RL,66,7742,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,7,1966,1966,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,BLQ,763,Unf,0,192,955,GasA,Ex,Y,SBrkr,955,0,0,955,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1966,Unf,1,386,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,1,2007,WD,Normal,127000 +51,60,RL,NA,13869,Pave,NA,IR2,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,6,1997,1997,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Av,GLQ,182,Unf,0,612,794,GasA,Gd,Y,SBrkr,794,676,0,1470,0,1,2,0,3,1,TA,6,Typ,0,NA,Attchd,1997,Fin,2,388,TA,TA,Y,0,75,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,177000 +52,50,RM,52,6240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,6,6,1934,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,816,816,GasA,TA,Y,SBrkr,816,0,360,1176,0,0,1,0,3,1,TA,6,Typ,1,Gd,Detchd,1985,Unf,2,528,TA,TA,Y,112,0,0,0,0,0,NA,MnPrv,Shed,400,9,2006,WD,Normal,114500 +53,90,RM,110,8472,Grvl,NA,IR2,Bnk,AllPub,Corner,Mod,IDOTRR,RRNn,Norm,Duplex,1Story,5,5,1963,1963,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Fa,TA,CBlock,Gd,TA,Gd,LwQ,104,GLQ,712,0,816,GasA,TA,N,SBrkr,816,0,0,816,1,0,1,0,2,1,TA,5,Typ,0,NA,CarPort,1963,Unf,2,516,TA,TA,Y,106,0,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,110000 +54,20,RL,68,50271,Pave,NA,IR1,Low,AllPub,Inside,Gtl,Veenker,Norm,Norm,1Fam,1Story,9,5,1981,1987,Gable,WdShngl,WdShing,Wd Shng,None,0,Gd,TA,CBlock,Ex,TA,Gd,GLQ,1810,Unf,0,32,1842,GasA,Gd,Y,SBrkr,1842,0,0,1842,2,0,0,1,0,1,Gd,5,Typ,1,Gd,Attchd,1981,Fin,3,894,TA,TA,Y,857,72,0,0,0,0,NA,NA,NA,0,11,2006,WD,Normal,385000 +55,80,RL,60,7134,Pave,NA,Reg,Bnk,AllPub,Inside,Mod,NAmes,Norm,Norm,1Fam,SLvl,5,5,1955,1955,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,384,Unf,0,0,384,GasA,TA,Y,SBrkr,1360,0,0,1360,0,0,1,0,3,1,TA,6,Min1,1,TA,Detchd,1962,Unf,2,572,TA,TA,Y,0,50,0,0,0,0,NA,MnPrv,NA,0,2,2007,WD,Normal,130000 +56,20,RL,100,10175,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1964,1964,Gable,CompShg,HdBoard,Plywood,BrkFace,272,TA,TA,CBlock,TA,TA,No,BLQ,490,Unf,0,935,1425,GasA,Gd,Y,SBrkr,1425,0,0,1425,0,0,2,0,3,1,TA,7,Typ,1,Gd,Attchd,1964,RFn,2,576,TA,TA,Y,0,0,0,407,0,0,NA,NA,NA,0,7,2008,WD,Normal,180500 +57,160,FV,24,2645,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,Twnhs,2Story,8,5,1999,2000,Gable,CompShg,MetalSd,MetalSd,BrkFace,456,Gd,TA,PConc,Gd,TA,No,GLQ,649,Unf,0,321,970,GasA,Ex,Y,SBrkr,983,756,0,1739,1,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1999,Fin,2,480,TA,TA,Y,115,0,0,0,0,0,NA,NA,NA,0,8,2009,WD,Abnorml,172500 +58,60,RL,89,11645,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,860,860,GasA,Ex,Y,SBrkr,860,860,0,1720,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2004,RFn,2,565,TA,TA,Y,0,70,0,0,0,0,NA,NA,NA,0,8,2006,WD,Normal,196500 +59,60,RL,66,13682,Pave,NA,IR2,HLS,AllPub,CulDSac,Gtl,StoneBr,Norm,Norm,1Fam,2Story,10,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,1031,Ex,TA,PConc,Ex,TA,Gd,Unf,0,Unf,0,1410,1410,GasA,Ex,Y,SBrkr,1426,1519,0,2945,0,0,3,1,3,1,Gd,10,Typ,1,Gd,BuiltIn,2006,Fin,3,641,TA,TA,Y,192,0,37,0,0,0,NA,NA,NA,0,10,2006,New,Partial,438780 +60,20,RL,60,7200,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,7,1972,1972,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,Av,ALQ,632,Unf,0,148,780,GasA,Ex,Y,SBrkr,780,0,0,780,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1973,Unf,1,352,TA,TA,Y,196,0,0,0,0,0,NA,MnPrv,NA,0,1,2008,WD,Normal,124900 +61,20,RL,63,13072,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,RRAe,Norm,1Fam,1Story,6,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,ALQ,941,Unf,0,217,1158,GasA,Ex,Y,SBrkr,1158,0,0,1158,1,0,1,1,3,1,Gd,5,Typ,0,NA,Detchd,2006,Unf,2,576,TA,TA,Y,0,50,0,0,0,0,NA,NA,NA,0,5,2006,New,Partial,158000 +62,75,RM,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,2.5Unf,5,7,1920,1996,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,Fa,No,Unf,0,Unf,0,530,530,GasA,TA,N,SBrkr,581,530,0,1111,0,0,1,0,3,1,Fa,6,Typ,0,NA,Detchd,1935,Unf,1,288,TA,TA,N,0,0,144,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,101000 +63,120,RL,44,6442,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,178,Gd,TA,PConc,Gd,Gd,Mn,GLQ,24,Unf,0,1346,1370,GasA,Ex,Y,SBrkr,1370,0,0,1370,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2006,RFn,2,484,TA,TA,Y,120,49,0,0,0,0,NA,NA,NA,0,10,2007,WD,Normal,202500 +64,70,RM,50,10300,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,OldTown,RRAn,Feedr,1Fam,2Story,7,6,1921,1950,Gable,CompShg,Stucco,Stucco,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,576,576,GasA,Gd,Y,SBrkr,902,808,0,1710,0,0,2,0,3,1,TA,9,Typ,0,NA,Detchd,1990,Unf,2,480,TA,TA,Y,12,11,64,0,0,0,NA,GdPrv,NA,0,4,2010,WD,Normal,140000 +65,60,RL,NA,9375,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,573,TA,TA,PConc,Gd,TA,No,GLQ,739,Unf,0,318,1057,GasA,Ex,Y,SBrkr,1057,977,0,2034,1,0,2,1,3,1,Gd,8,Typ,0,NA,Attchd,1998,RFn,2,645,TA,TA,Y,576,36,0,0,0,0,NA,GdPrv,NA,0,2,2009,WD,Normal,219500 +66,60,RL,76,9591,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,8,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,344,Gd,TA,PConc,Ex,TA,Av,Unf,0,Unf,0,1143,1143,GasA,Ex,Y,SBrkr,1143,1330,0,2473,0,0,2,1,4,1,Gd,9,Typ,1,Gd,BuiltIn,2004,RFn,3,852,TA,TA,Y,192,151,0,0,0,0,NA,NA,NA,0,10,2007,WD,Normal,317000 +67,20,RL,NA,19900,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,PosA,Norm,1Fam,1Story,7,5,1970,1989,Gable,CompShg,Plywood,Plywood,BrkFace,287,TA,TA,CBlock,Gd,TA,Gd,GLQ,912,Unf,0,1035,1947,GasA,TA,Y,SBrkr,2207,0,0,2207,1,0,2,0,3,1,TA,7,Min1,1,Gd,Attchd,1970,RFn,2,576,TA,TA,Y,301,0,0,0,0,0,NA,NA,NA,0,7,2010,WD,Normal,180000 +68,20,RL,72,10665,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,167,Gd,TA,PConc,Gd,TA,Av,GLQ,1013,Unf,0,440,1453,GasA,Ex,Y,SBrkr,1479,0,0,1479,1,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2003,RFn,2,558,TA,TA,Y,144,29,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,226000 +69,30,RM,47,4608,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Artery,Norm,1Fam,1Story,4,6,1945,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,747,747,GasA,TA,Y,SBrkr,747,0,0,747,0,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1945,Unf,1,220,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,80000 +70,50,RL,81,15593,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,ClearCr,Norm,Norm,1Fam,1.5Fin,7,4,1953,1953,Gable,CompShg,BrkFace,AsbShng,None,0,Gd,TA,CBlock,TA,TA,No,BLQ,603,Unf,0,701,1304,GasW,TA,Y,SBrkr,1304,983,0,2287,0,0,2,0,3,1,TA,7,Typ,1,TA,Attchd,1953,Fin,2,667,TA,TA,Y,0,21,114,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,225000 +71,20,RL,95,13651,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,7,6,1973,1973,Gable,CompShg,Plywood,Plywood,BrkFace,1115,TA,Gd,CBlock,Gd,TA,Gd,ALQ,1880,Unf,0,343,2223,GasA,Ex,Y,SBrkr,2223,0,0,2223,1,0,2,0,3,1,TA,8,Typ,2,Gd,Attchd,1973,Fin,2,516,TA,TA,Y,300,0,0,0,0,0,NA,NA,NA,0,2,2007,WD,Normal,244000 +72,20,RL,69,7599,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Mitchel,Norm,Norm,1Fam,1Story,4,6,1982,2006,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,565,Unf,0,280,845,GasA,TA,Y,SBrkr,845,0,0,845,1,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1987,Unf,2,360,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,129500 +73,60,RL,74,10141,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,40,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,832,832,GasA,Gd,Y,SBrkr,885,833,0,1718,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1998,Fin,2,427,TA,TA,Y,0,94,0,0,291,0,NA,NA,NA,0,12,2009,WD,Normal,185000 +74,20,RL,85,10200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1954,2003,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,104,TA,TA,CBlock,TA,TA,No,ALQ,320,BLQ,362,404,1086,GasA,Gd,Y,SBrkr,1086,0,0,1086,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1989,Unf,2,490,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,5,2010,WD,Normal,144900 +75,50,RM,60,5790,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2Story,3,6,1915,1950,Gambrel,CompShg,VinylSd,VinylSd,None,0,Gd,Gd,CBlock,Fa,TA,No,Unf,0,Unf,0,840,840,GasA,Gd,N,SBrkr,840,765,0,1605,0,0,2,0,3,2,TA,8,Typ,0,NA,Detchd,1915,Unf,1,379,TA,TA,Y,0,0,202,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,107400 +76,180,RM,21,1596,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,SLvl,4,5,1973,1973,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,Gd,TA,Gd,GLQ,462,Unf,0,0,462,GasA,TA,Y,SBrkr,526,462,0,988,1,0,1,0,2,1,TA,5,Typ,0,NA,BuiltIn,1973,Unf,1,297,TA,TA,Y,120,101,0,0,0,0,NA,GdWo,NA,0,11,2009,WD,Normal,91000 +77,20,RL,NA,8475,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,7,1956,1956,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,228,Unf,0,724,952,GasA,Ex,Y,FuseA,952,0,0,952,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1956,Unf,1,283,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,135750 +78,50,RM,50,8635,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,5,5,1948,2001,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,336,GLQ,41,295,672,GasA,TA,Y,SBrkr,1072,213,0,1285,1,0,1,0,2,1,TA,6,Min1,0,NA,Detchd,1948,Unf,1,240,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,1,2008,WD,Normal,127000 +79,90,RL,72,10778,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,4,5,1968,1968,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1768,1768,GasA,TA,N,SBrkr,1768,0,0,1768,0,0,2,0,4,2,TA,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,136500 +80,50,RM,60,10440,Pave,Grvl,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2Story,5,6,1910,1981,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,440,440,GasA,Gd,Y,SBrkr,682,548,0,1230,0,0,1,1,2,1,TA,5,Typ,0,NA,Detchd,1966,Unf,2,440,TA,TA,Y,74,0,128,0,0,0,NA,MnPrv,NA,0,5,2009,WD,Normal,110000 +81,60,RL,100,13000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,2Story,6,6,1968,1968,Gable,CompShg,VinylSd,VinylSd,BrkFace,576,TA,Gd,CBlock,Gd,TA,No,Rec,448,Unf,0,448,896,GasA,TA,Y,SBrkr,1182,960,0,2142,0,0,2,1,4,1,Gd,8,Typ,1,Gd,Attchd,1968,Fin,1,509,TA,TA,Y,0,72,0,0,252,0,NA,NA,NA,0,6,2009,WD,Normal,193500 +82,120,RM,32,4500,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Mitchel,Norm,Norm,TwnhsE,1Story,6,5,1998,1998,Hip,CompShg,VinylSd,VinylSd,BrkFace,443,TA,Gd,PConc,Ex,Gd,No,GLQ,1201,Unf,0,36,1237,GasA,Ex,Y,SBrkr,1337,0,0,1337,1,0,2,0,2,1,TA,5,Typ,0,NA,Attchd,1998,Fin,2,405,TA,TA,Y,0,199,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,153500 +83,20,RL,78,10206,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,Stone,468,TA,TA,PConc,Gd,TA,No,GLQ,33,Unf,0,1530,1563,GasA,Ex,Y,SBrkr,1563,0,0,1563,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,2007,RFn,3,758,TA,TA,Y,144,99,0,0,0,0,NA,NA,NA,0,10,2008,WD,Normal,245000 +84,20,RL,80,8892,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1960,1960,Gable,CompShg,MetalSd,MetalSd,BrkCmn,66,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1065,1065,GasA,Gd,Y,SBrkr,1065,0,0,1065,0,0,1,1,3,1,TA,6,Typ,0,NA,Detchd,1974,Unf,2,461,TA,TA,Y,74,0,0,0,0,0,NA,NA,NA,0,7,2007,COD,Normal,126500 +85,80,RL,NA,8530,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,SLvl,7,5,1995,1996,Gable,CompShg,HdBoard,HdBoard,BrkFace,22,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,384,384,GasA,Gd,Y,SBrkr,804,670,0,1474,0,0,2,1,3,1,TA,7,Typ,1,TA,BuiltIn,1995,Fin,2,400,TA,TA,Y,120,72,0,0,0,0,NA,NA,Shed,700,5,2009,WD,Normal,168500 +86,60,RL,121,16059,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1991,1992,Hip,CompShg,HdBoard,HdBoard,BrkFace,284,Gd,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1288,1288,GasA,Ex,Y,SBrkr,1301,1116,0,2417,0,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,1991,Unf,2,462,TA,TA,Y,127,82,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,260000 +87,60,RL,122,11911,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,684,684,GasA,Ex,Y,SBrkr,684,876,0,1560,0,0,2,1,3,1,Gd,6,Typ,1,Gd,BuiltIn,2005,Fin,2,400,TA,TA,Y,100,38,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,174000 +88,160,FV,40,3951,Pave,Pave,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,6,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,Stone,76,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,612,612,GasA,Ex,Y,SBrkr,612,612,0,1224,0,0,2,1,2,1,Gd,4,Typ,0,NA,Detchd,2009,RFn,2,528,TA,TA,Y,0,234,0,0,0,0,NA,NA,NA,0,6,2009,New,Partial,164500 +89,50,C (all),105,8470,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,IDOTRR,Feedr,Feedr,1Fam,1.5Fin,3,2,1915,1982,Hip,CompShg,Plywood,Plywood,None,0,Fa,Fa,CBlock,TA,Fa,No,Unf,0,Unf,0,1013,1013,GasA,TA,N,SBrkr,1013,0,513,1526,0,0,1,0,2,1,Fa,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,156,0,0,0,NA,MnPrv,NA,0,10,2009,ConLD,Abnorml,85000 +90,20,RL,60,8070,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,4,5,1994,1995,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,588,Unf,0,402,990,GasA,Ex,Y,SBrkr,990,0,0,990,1,0,1,0,3,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,123600 +91,20,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1950,1950,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,FuseA,1040,0,0,1040,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1950,Unf,2,420,TA,TA,Y,0,29,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,109900 +92,20,RL,85,8500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,3,1961,1961,Hip,CompShg,HdBoard,HdBoard,BrkCmn,203,TA,TA,CBlock,TA,TA,No,Rec,600,Unf,0,635,1235,GasA,TA,Y,SBrkr,1235,0,0,1235,0,0,1,0,2,1,TA,6,Typ,0,NA,Attchd,1961,Unf,2,480,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,12,2006,WD,Abnorml,98600 +93,30,RL,80,13360,Pave,Grvl,IR1,HLS,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,5,7,1921,2006,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,BrkTil,Gd,TA,No,ALQ,713,Unf,0,163,876,GasA,Ex,Y,SBrkr,964,0,0,964,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1921,Unf,2,432,TA,TA,Y,0,0,44,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,163500 +94,190,C (all),60,7200,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,2fmCon,2.5Unf,6,6,1910,1998,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,Fa,Mn,Rec,1046,Unf,0,168,1214,GasW,Ex,N,SBrkr,1260,1031,0,2291,0,1,2,0,4,2,TA,9,Typ,1,Gd,Detchd,1900,Unf,2,506,TA,TA,Y,0,0,0,0,99,0,NA,NA,NA,0,11,2007,WD,Normal,133900 +95,60,RL,69,9337,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,6,5,1997,1997,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,PConc,Gd,TA,No,GLQ,648,Unf,0,176,824,GasA,Ex,Y,SBrkr,905,881,0,1786,1,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1997,RFn,2,684,TA,TA,Y,0,162,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,204750 +96,60,RL,NA,9765,Pave,NA,IR2,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,8,1993,1993,Gable,CompShg,VinylSd,VinylSd,BrkFace,68,Ex,Gd,PConc,Gd,Gd,No,ALQ,310,Unf,0,370,680,GasA,Gd,Y,SBrkr,680,790,0,1470,0,0,2,1,3,1,TA,6,Typ,1,TA,BuiltIn,1993,Fin,2,420,TA,TA,Y,232,63,0,0,0,0,NA,NA,Shed,480,4,2009,WD,Normal,185000 +97,20,RL,78,10264,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,183,Gd,TA,PConc,Gd,TA,Av,ALQ,1162,Unf,0,426,1588,GasA,Ex,Y,SBrkr,1588,0,0,1588,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1999,RFn,2,472,TA,TA,Y,158,29,0,0,0,0,NA,NA,NA,0,8,2006,WD,Normal,214000 +98,20,RL,73,10921,Pave,NA,Reg,HLS,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,1965,1965,Hip,CompShg,HdBoard,HdBoard,BrkFace,48,TA,TA,CBlock,TA,TA,No,Rec,520,Unf,0,440,960,GasA,TA,Y,FuseF,960,0,0,960,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1965,Fin,1,432,TA,TA,P,120,0,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,94750 +99,30,RL,85,10625,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,5,1920,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,ALQ,108,Unf,0,350,458,GasA,Fa,N,SBrkr,835,0,0,835,0,0,1,0,2,1,TA,5,Typ,0,NA,Basment,1920,Unf,1,366,Fa,TA,Y,0,0,77,0,0,0,NA,NA,Shed,400,5,2010,COD,Abnorml,83000 +100,20,RL,77,9320,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1959,1959,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,569,Unf,0,381,950,GasA,Fa,Y,SBrkr,1225,0,0,1225,1,0,1,1,3,1,TA,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,352,0,0,0,0,0,NA,NA,Shed,400,1,2010,WD,Normal,128950 +101,20,RL,NA,10603,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,7,1977,2001,Gable,CompShg,Plywood,Plywood,BrkFace,28,TA,TA,PConc,TA,TA,Mn,ALQ,1200,Unf,0,410,1610,GasA,Gd,Y,SBrkr,1610,0,0,1610,1,0,2,0,3,1,Gd,6,Typ,2,TA,Attchd,1977,RFn,2,480,TA,TA,Y,168,68,0,0,0,0,NA,NA,NA,0,2,2010,WD,Normal,205000 +102,60,RL,77,9206,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,6,5,1985,1985,Gable,CompShg,HdBoard,HdBoard,BrkFace,336,Gd,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,741,741,GasA,TA,Y,SBrkr,977,755,0,1732,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1985,Fin,2,476,TA,TA,Y,192,46,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,178000 +103,90,RL,64,7018,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SawyerW,Norm,Norm,Duplex,1Story,5,5,1979,1979,Gable,CompShg,HdBoard,HdBoard,None,0,TA,Fa,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,1535,0,0,1535,0,0,2,0,4,2,TA,8,Typ,0,NA,Attchd,1979,Unf,2,410,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Alloca,118964 +104,20,RL,94,10402,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1226,1226,GasA,Ex,Y,SBrkr,1226,0,0,1226,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2009,RFn,3,740,TA,TA,Y,0,36,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,198900 +105,50,RM,NA,7758,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,7,4,1931,1950,Gable,CompShg,Stucco,Stucco,BrkFace,600,TA,Fa,PConc,TA,TA,No,LwQ,224,Unf,0,816,1040,GasA,Ex,Y,FuseF,1226,592,0,1818,0,0,1,1,4,1,TA,7,Typ,2,TA,Detchd,1951,Unf,1,240,TA,TA,Y,0,0,0,0,184,0,NA,NA,NA,0,6,2007,WD,Normal,169500 +106,60,FV,75,9375,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,8,5,2003,2004,Hip,CompShg,VinylSd,VinylSd,BrkFace,768,Gd,TA,PConc,Ex,TA,No,Unf,0,Unf,0,1053,1053,GasA,Ex,Y,SBrkr,1053,939,0,1992,0,0,2,1,3,1,Gd,9,Typ,1,Gd,Attchd,2003,RFn,2,648,TA,TA,Y,140,45,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,250000 +107,30,RM,60,10800,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,4,7,1885,1995,Mansard,CompShg,VinylSd,VinylSd,None,0,TA,TA,BrkTil,Fa,TA,No,Unf,0,Unf,0,641,641,GasA,Gd,Y,SBrkr,1047,0,0,1047,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1954,Unf,1,273,Fa,Fa,N,0,0,0,0,0,0,NA,NA,Shed,450,8,2007,WD,Normal,100000 +108,20,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,5,1948,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,104,BLQ,169,516,789,GasA,Ex,Y,SBrkr,789,0,0,789,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1948,Unf,1,250,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2008,WD,Partial,115000 +109,50,RM,85,8500,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,IDOTRR,Artery,Norm,1Fam,1.5Fin,5,7,1919,2005,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,793,793,GasW,TA,N,FuseF,997,520,0,1517,0,0,2,0,3,1,Fa,7,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,144,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,115000 +110,20,RL,105,11751,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,6,1977,1977,Hip,CompShg,Plywood,Plywood,BrkFace,480,TA,TA,CBlock,Gd,TA,No,BLQ,705,Unf,0,1139,1844,GasA,Ex,Y,SBrkr,1844,0,0,1844,0,0,2,0,3,1,TA,7,Typ,1,TA,Attchd,1977,RFn,2,546,TA,TA,Y,0,122,0,0,0,0,NA,MnPrv,NA,0,1,2010,COD,Normal,190000 +111,50,RL,75,9525,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,6,4,1954,1972,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,Fa,No,Rec,444,Unf,0,550,994,GasA,Gd,Y,SBrkr,1216,639,0,1855,0,0,2,0,4,1,TA,7,Typ,0,NA,Attchd,1954,Unf,1,325,TA,TA,Y,182,0,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,136900 +112,80,RL,NA,7750,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,SLvl,7,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,250,Unf,0,134,384,GasA,Ex,Y,SBrkr,774,656,0,1430,0,0,2,1,3,1,TA,7,Typ,1,TA,BuiltIn,2000,Fin,2,400,TA,TA,Y,180,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,180000 +113,60,RL,77,9965,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,Stone,220,Gd,TA,PConc,Ex,TA,Av,GLQ,984,Unf,0,280,1264,GasA,Ex,Y,SBrkr,1282,1414,0,2696,1,0,2,1,4,1,Ex,10,Typ,1,Gd,BuiltIn,2007,Fin,3,792,TA,TA,Y,120,184,0,0,168,0,NA,NA,NA,0,10,2007,New,Partial,383970 +114,20,RL,NA,21000,Pave,NA,Reg,Bnk,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,1Story,6,5,1953,1953,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,184,TA,Gd,CBlock,Gd,TA,Mn,ALQ,35,Rec,869,905,1809,GasA,TA,Y,SBrkr,2259,0,0,2259,1,0,2,0,3,1,Gd,7,Typ,2,Gd,Basment,1953,Unf,2,450,TA,TA,Y,166,120,192,0,0,0,NA,MnPrv,NA,0,10,2007,COD,Abnorml,217000 +115,70,RL,61,7259,Pave,NA,IR1,Lvl,AllPub,Inside,Mod,Crawfor,Norm,Norm,1Fam,2Story,6,8,1945,2002,Gambrel,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,ALQ,774,LwQ,150,104,1028,GasA,Ex,Y,SBrkr,1436,884,0,2320,1,0,2,1,3,1,Gd,9,Typ,1,TA,Detchd,1945,Unf,1,180,TA,TA,Y,224,0,0,0,0,0,NA,MnPrv,NA,0,7,2007,WD,Normal,259500 +116,160,FV,34,3230,Pave,Pave,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,6,5,1999,1999,Gable,CompShg,MetalSd,MetalSd,BrkFace,1129,TA,TA,PConc,Gd,TA,No,GLQ,419,Unf,0,310,729,GasA,Gd,Y,SBrkr,729,729,0,1458,0,0,2,1,2,1,TA,5,Typ,1,Fa,Detchd,1999,Unf,2,440,TA,TA,Y,0,32,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,176000 +117,20,RL,NA,11616,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1962,1962,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,116,TA,TA,CBlock,TA,TA,No,LwQ,170,BLQ,670,252,1092,GasA,TA,Y,SBrkr,1092,0,0,1092,0,1,1,0,3,1,TA,6,Typ,1,Po,Attchd,1962,Unf,1,288,TA,TA,Y,0,20,144,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,139000 +118,20,RL,74,8536,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1125,1125,GasA,Gd,Y,SBrkr,1125,0,0,1125,0,0,1,1,2,1,TA,5,Typ,0,NA,Attchd,2007,Unf,2,430,TA,TA,Y,80,64,0,0,0,0,NA,NA,NA,0,4,2007,New,Partial,155000 +119,60,RL,90,12376,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1990,1990,Hip,CompShg,Plywood,Plywood,None,0,TA,TA,PConc,Gd,TA,Mn,GLQ,1470,Unf,0,203,1673,GasA,Gd,Y,SBrkr,1699,1523,0,3222,1,0,3,0,5,1,Gd,11,Typ,2,TA,Attchd,1990,Unf,3,594,TA,TA,Y,367,0,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,320000 +120,60,RL,65,8461,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,6,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,728,728,GasA,Ex,Y,SBrkr,728,728,0,1456,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2005,Fin,2,390,TA,TA,Y,0,24,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,163990 +121,80,RL,NA,21453,Pave,NA,IR1,Low,AllPub,CulDSac,Sev,ClearCr,Norm,Norm,1Fam,SLvl,6,5,1969,1969,Flat,Metal,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,Gd,ALQ,938,Unf,0,0,938,GasA,Ex,Y,SBrkr,988,0,0,988,1,0,1,0,1,1,TA,4,Typ,2,TA,Attchd,1969,Unf,2,540,TA,TA,Y,0,130,0,130,0,0,NA,NA,NA,0,10,2006,WD,Normal,180000 +122,50,RM,50,6060,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,4,5,1939,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,732,732,GasA,Gd,Y,SBrkr,772,351,0,1123,0,0,1,0,3,1,TA,4,Typ,0,NA,Detchd,1979,Unf,1,264,TA,TA,P,0,0,140,0,0,0,NA,MnPrv,NA,0,6,2007,WD,Normal,100000 +123,20,RL,75,9464,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,7,1958,1958,Hip,CompShg,MetalSd,MetalSd,BrkFace,135,TA,Gd,CBlock,TA,TA,No,BLQ,570,Unf,0,510,1080,GasA,Gd,Y,SBrkr,1080,0,0,1080,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1958,Unf,1,288,TA,TA,Y,0,0,0,0,130,0,NA,NA,NA,0,6,2008,WD,Normal,136000 +124,120,RL,55,7892,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,TwnhsE,1Story,6,5,1993,1993,Gable,CompShg,Plywood,Plywood,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,300,Unf,0,899,1199,GasA,Ex,Y,SBrkr,1199,0,0,1199,0,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,1993,RFn,2,530,TA,TA,Y,0,63,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,153900 +125,20,RL,48,17043,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,5,1979,1998,Gable,CompShg,HdBoard,HdBoard,None,0,TA,Gd,CBlock,Gd,Fa,No,Unf,0,Unf,0,1362,1362,GasA,TA,Y,SBrkr,1586,0,0,1586,0,0,2,0,3,1,TA,7,Typ,1,TA,Attchd,1979,Unf,2,435,TA,TA,Y,192,0,0,0,0,0,NA,NA,NA,0,1,2009,WD,Normal,181000 +126,190,RM,60,6780,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,2fmCon,1.5Fin,6,8,1935,1982,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Fa,CBlock,TA,TA,Av,GLQ,490,Unf,0,30,520,GasA,Gd,N,SBrkr,520,0,234,754,1,0,1,0,2,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,53,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,84500 +127,120,RL,NA,4928,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NPkVill,Norm,Norm,TwnhsE,1Story,6,5,1976,1976,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,120,Unf,0,958,1078,GasA,TA,Y,SBrkr,958,0,0,958,0,0,2,0,2,1,TA,5,Typ,1,TA,Attchd,1977,RFn,2,440,TA,TA,Y,0,205,0,0,0,0,NA,NA,NA,0,2,2007,WD,Normal,128000 +128,45,RM,55,4388,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,OldTown,Feedr,Norm,1Fam,1.5Unf,5,7,1930,1950,Gable,CompShg,WdShing,Wd Sdng,None,0,TA,Gd,BrkTil,TA,TA,No,LwQ,116,Unf,0,556,672,GasA,Ex,Y,SBrkr,840,0,0,840,0,0,1,0,3,1,TA,5,Typ,1,TA,NA,NA,NA,0,0,NA,NA,N,0,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,87000 +129,60,RL,69,7590,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,PosN,Norm,1Fam,2Story,6,5,1966,1966,Gable,CompShg,VinylSd,VinylSd,BrkFace,266,TA,TA,CBlock,TA,TA,No,BLQ,512,Unf,0,148,660,GasA,TA,Y,SBrkr,660,688,0,1348,0,0,1,1,3,1,TA,6,Typ,1,Fa,Attchd,1966,RFn,2,453,TA,TA,Y,188,108,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,155000 +130,20,RL,69,8973,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1958,1991,Gable,CompShg,Plywood,Plywood,BrkFace,85,TA,TA,CBlock,TA,TA,No,Rec,567,BLQ,28,413,1008,GasA,TA,Y,FuseA,1053,0,0,1053,0,1,1,1,3,1,Ex,6,Typ,0,NA,2Types,1998,RFn,2,750,TA,TA,Y,0,80,0,180,0,0,NA,MnWw,NA,0,7,2006,WD,Abnorml,150000 +131,60,RL,88,14200,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,2Story,7,6,1966,1966,Gable,CompShg,MetalSd,MetalSd,BrkFace,309,TA,TA,CBlock,TA,TA,No,Rec,445,Unf,0,479,924,GasA,Ex,Y,SBrkr,1216,941,0,2157,0,0,2,1,4,1,Gd,8,Typ,2,Gd,Attchd,1966,Fin,2,487,TA,TA,Y,105,66,0,0,0,0,NA,GdPrv,NA,0,5,2006,WD,Normal,226000 +132,60,RL,NA,12224,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,40,Gd,TA,PConc,Gd,TA,No,GLQ,695,Unf,0,297,992,GasA,Ex,Y,SBrkr,1022,1032,0,2054,1,0,2,1,3,1,Gd,7,Typ,1,TA,BuiltIn,2000,RFn,2,390,TA,TA,Y,24,48,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,244000 +133,20,RL,75,7388,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1959,2002,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,405,Unf,0,658,1063,GasA,Gd,Y,SBrkr,1327,0,0,1327,1,0,1,0,3,1,Gd,7,Typ,0,NA,Detchd,1974,Unf,2,624,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,150750 +134,20,RL,NA,6853,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,136,Gd,TA,PConc,Ex,TA,No,GLQ,1005,Unf,0,262,1267,GasA,Ex,Y,SBrkr,1296,0,0,1296,1,0,2,0,2,1,Gd,6,Typ,0,NA,Attchd,2001,Fin,2,471,TA,TA,Y,192,25,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,220000 +135,20,RL,78,10335,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,6,1968,1993,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,Rec,570,Unf,0,891,1461,GasA,Gd,Y,SBrkr,1721,0,0,1721,0,0,2,1,3,1,TA,7,Min1,1,TA,Attchd,1968,RFn,2,440,TA,TA,Y,0,96,180,0,0,0,NA,MnPrv,NA,0,7,2006,WD,Normal,180000 +136,20,RL,80,10400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,7,6,1970,1970,Hip,CompShg,Plywood,Plywood,BrkFace,288,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,1304,1304,GasA,Gd,Y,SBrkr,1682,0,0,1682,0,0,2,0,3,1,TA,7,Typ,1,Gd,Attchd,1970,Unf,2,530,TA,TA,Y,98,0,0,0,0,0,NA,MnPrv,NA,0,5,2008,WD,Normal,174000 +137,20,RL,NA,10355,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1967,1967,Gable,CompShg,MetalSd,MetalSd,BrkFace,196,TA,TA,CBlock,TA,TA,No,BLQ,695,Unf,0,519,1214,GasA,TA,Y,SBrkr,1214,0,0,1214,0,0,2,0,3,1,TA,5,Typ,1,Fa,Attchd,1967,RFn,1,318,TA,TA,Y,0,111,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,143000 +138,90,RL,82,11070,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,Duplex,1Story,7,5,1988,1989,Gable,CompShg,VinylSd,VinylSd,BrkFace,70,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1907,1907,GasA,Gd,Y,SBrkr,1959,0,0,1959,0,0,3,0,5,2,TA,9,Typ,0,NA,2Types,1989,Unf,3,766,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Family,171000 +139,60,RL,73,9066,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,1999,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,320,Gd,TA,PConc,Gd,TA,Mn,GLQ,668,Unf,0,336,1004,GasA,Ex,Y,SBrkr,1004,848,0,1852,0,0,2,1,3,1,Gd,7,Typ,2,TA,Attchd,1999,Fin,3,660,TA,TA,Y,224,106,0,0,0,0,NA,GdPrv,NA,0,12,2008,WD,Normal,230000 +140,60,RL,65,15426,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,6,5,1997,1997,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,821,Unf,0,107,928,GasA,Ex,Y,SBrkr,928,836,0,1764,1,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1997,RFn,2,470,TA,TA,Y,276,99,0,0,0,0,NA,MnPrv,NA,0,8,2009,WD,Normal,231500 +141,20,RL,70,10500,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1971,1971,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,ALQ,432,Unf,0,432,864,GasA,TA,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,TA,5,Typ,1,Po,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2010,ConLI,Normal,115000 +142,20,RL,78,11645,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,1300,Unf,0,434,1734,GasA,Ex,Y,SBrkr,1734,0,0,1734,1,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2005,Fin,2,660,TA,TA,Y,160,24,0,0,0,0,NA,NA,NA,0,1,2006,WD,Normal,260000 +143,50,RL,71,8520,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Artery,Norm,1Fam,1.5Fin,5,4,1952,1952,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,Fa,CBlock,TA,TA,No,Rec,507,Unf,0,403,910,GasA,Fa,Y,SBrkr,910,475,0,1385,0,0,2,0,4,1,TA,6,Typ,0,NA,Detchd,2000,Unf,2,720,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,6,2010,WD,Normal,166000 +144,20,RL,78,10335,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,183,Gd,TA,PConc,Gd,TA,Gd,GLQ,679,Unf,0,811,1490,GasA,Ex,Y,SBrkr,1501,0,0,1501,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1999,RFn,2,577,TA,TA,Y,144,29,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,204000 +145,90,RM,70,9100,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,RRAe,Norm,Duplex,1Story,5,5,1963,1963,Gable,CompShg,HdBoard,HdBoard,BrkFace,336,TA,TA,CBlock,TA,TA,No,Rec,1332,Unf,0,396,1728,GasA,TA,Y,SBrkr,1728,0,0,1728,1,0,2,0,6,2,TA,10,Typ,0,NA,Detchd,1963,Unf,2,504,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2006,ConLI,Abnorml,125000 +146,160,RM,24,2522,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,Twnhs,2Story,6,5,2004,2006,Gable,CompShg,VinylSd,VinylSd,Stone,50,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,970,970,GasA,Ex,Y,SBrkr,970,739,0,1709,0,0,2,0,3,1,Gd,7,Maj1,0,NA,Detchd,2004,Unf,2,380,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,130000 +147,30,RM,51,6120,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,BrkSide,Norm,Norm,1Fam,1Story,5,7,1931,1993,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,BLQ,209,Unf,0,506,715,GasA,TA,Y,FuseA,875,0,0,875,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1931,Unf,1,180,Fa,TA,Y,48,0,0,0,0,0,NA,NA,NA,0,11,2009,WD,Normal,105000 +148,60,RL,NA,9505,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2001,2001,Gable,CompShg,VinylSd,VinylSd,BrkFace,180,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,884,884,GasA,Ex,Y,SBrkr,884,1151,0,2035,0,0,2,1,3,1,Gd,8,Typ,1,Gd,BuiltIn,2001,Fin,2,434,TA,TA,Y,144,48,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,222500 +149,20,RL,63,7500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,120,TA,TA,PConc,Gd,TA,No,GLQ,680,Unf,0,400,1080,GasA,Ex,Y,SBrkr,1080,0,0,1080,1,0,1,0,3,1,Gd,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,141000 +150,50,RM,NA,6240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,5,4,1936,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,Gd,TA,No,Unf,0,Unf,0,896,896,GasA,Gd,Y,FuseA,896,448,0,1344,0,0,1,0,3,1,TA,7,Typ,0,NA,Detchd,1936,Unf,1,240,Fa,TA,Y,200,114,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,115000 +151,20,RL,120,10356,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,6,1975,1975,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,Av,BLQ,716,Unf,0,253,969,GasA,TA,Y,SBrkr,969,0,0,969,0,0,1,1,3,1,TA,5,Typ,0,NA,Attchd,1975,Unf,2,440,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,1,2007,WD,Normal,122000 +152,20,RL,107,13891,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2007,2008,Hip,CompShg,VinylSd,VinylSd,Stone,436,Gd,TA,PConc,Ex,TA,Gd,GLQ,1400,Unf,0,310,1710,GasA,Ex,Y,SBrkr,1710,0,0,1710,1,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2007,RFn,3,866,TA,TA,Y,0,102,0,0,0,0,NA,NA,NA,0,1,2008,New,Partial,372402 +153,60,RL,NA,14803,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NWAmes,Norm,Norm,1Fam,2Story,6,5,1971,1971,Gable,CompShg,HdBoard,HdBoard,BrkFace,252,TA,TA,CBlock,TA,TA,No,Rec,416,Unf,0,409,825,GasA,Gd,Y,SBrkr,1097,896,0,1993,0,0,2,1,4,1,TA,8,Typ,1,Gd,Attchd,1971,RFn,2,495,TA,TA,Y,0,66,0,0,0,0,NA,GdWo,NA,0,6,2006,WD,Normal,190000 +154,20,RL,NA,13500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,1Story,6,7,1960,1975,Flat,CompShg,BrkFace,Plywood,None,0,TA,TA,CBlock,Gd,TA,Gd,BLQ,429,ALQ,1080,93,1602,GasA,Gd,Y,SBrkr,1252,0,0,1252,1,0,1,0,1,1,TA,4,Typ,1,Gd,Attchd,1960,RFn,2,564,TA,TA,Y,409,0,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,235000 +155,30,RM,84,11340,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1Story,6,5,1923,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,1200,1200,GasA,TA,Y,FuseA,1200,0,0,1200,0,0,1,0,4,1,TA,7,Typ,0,NA,Detchd,1923,Unf,1,312,Fa,Fa,Y,0,0,228,0,0,0,NA,NA,NA,0,3,2006,WD,Family,125000 +156,50,RL,60,9600,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Artery,Norm,1Fam,1.5Fin,6,5,1924,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,572,572,Grav,Fa,N,FuseF,572,524,0,1096,0,0,1,0,2,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,8,128,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,79000 +157,20,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1950,1950,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,FuseF,1040,0,0,1040,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1950,Unf,2,625,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,109500 +158,60,RL,92,12003,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,2Story,8,5,2009,2010,Gable,CompShg,VinylSd,VinylSd,BrkFace,84,Gd,TA,PConc,Ex,TA,No,Unf,0,Unf,0,774,774,GasA,Ex,Y,SBrkr,774,1194,0,1968,0,0,2,1,4,1,Ex,8,Typ,1,Gd,BuiltIn,2009,Fin,3,680,TA,TA,Y,0,75,0,0,0,0,NA,NA,NA,0,5,2010,New,Partial,269500 +159,60,FV,100,12552,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,222,Unf,0,769,991,GasA,Ex,Y,SBrkr,991,956,0,1947,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2004,RFn,2,678,TA,TA,Y,0,136,0,0,0,0,NA,GdWo,NA,0,5,2010,WD,Normal,254900 +160,60,RL,134,19378,Pave,NA,IR1,HLS,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,456,Gd,TA,PConc,Gd,TA,Mn,GLQ,57,Unf,0,1335,1392,GasA,Ex,Y,SBrkr,1392,1070,0,2462,1,0,2,1,4,1,Gd,9,Typ,1,Gd,Attchd,2006,RFn,2,576,TA,TA,Y,239,132,0,168,0,0,NA,NA,NA,0,3,2006,New,Partial,320000 +161,20,RL,NA,11120,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Veenker,Norm,Norm,1Fam,1Story,6,6,1984,1984,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,PConc,Gd,TA,No,BLQ,660,Unf,0,572,1232,GasA,TA,Y,SBrkr,1232,0,0,1232,0,0,2,0,3,1,TA,6,Typ,0,NA,Attchd,1984,Unf,2,516,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,162500 +162,60,RL,110,13688,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,664,Gd,TA,PConc,Ex,TA,Av,GLQ,1016,Unf,0,556,1572,GasA,Ex,Y,SBrkr,1572,1096,0,2668,1,0,2,1,3,1,Ex,10,Typ,2,Gd,BuiltIn,2003,Fin,3,726,TA,TA,Y,400,0,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,412500 +163,20,RL,95,12182,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,1Fam,1Story,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,226,Gd,TA,PConc,Gd,TA,Mn,BLQ,1201,Unf,0,340,1541,GasA,Ex,Y,SBrkr,1541,0,0,1541,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2005,RFn,2,532,TA,TA,Y,0,70,0,0,0,0,NA,NA,NA,0,5,2010,New,Partial,220000 +164,45,RL,55,5500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Unf,4,6,1956,1956,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,882,882,GasA,Ex,Y,SBrkr,882,0,0,882,0,0,1,0,1,1,TA,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,4,2007,WD,Normal,103200 +165,40,RM,40,5400,Pave,Pave,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1Story,6,7,1926,2004,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,BrkTil,TA,TA,Mn,LwQ,370,Unf,0,779,1149,GasA,Gd,Y,FuseA,1149,467,0,1616,0,0,2,0,3,1,Gd,5,Typ,0,NA,Detchd,1926,Unf,1,216,TA,TA,Y,0,0,183,0,0,0,NA,NA,NA,0,10,2007,WD,Normal,152000 +166,190,RL,62,10106,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,2fmCon,1.5Fin,5,7,1940,1999,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,BrkTil,TA,TA,No,ALQ,351,Rec,181,112,644,GasA,Gd,Y,SBrkr,808,547,0,1355,1,0,2,0,4,2,TA,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,140,0,0,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,127500 +167,20,RL,NA,10708,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,1Story,5,5,1955,1993,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,CBlock,TA,TA,No,LwQ,379,BLQ,768,470,1617,GasA,Ex,Y,FuseA,1867,0,0,1867,1,0,1,0,2,1,TA,7,Typ,3,Gd,Attchd,1955,Fin,1,303,TA,TA,Y,476,0,0,0,142,0,NA,GdWo,NA,0,11,2009,COD,Normal,190000 +168,60,RL,86,10562,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,Stone,300,Gd,TA,PConc,Ex,TA,No,GLQ,1288,Unf,0,294,1582,GasA,Ex,Y,SBrkr,1610,551,0,2161,1,0,1,1,3,1,Ex,8,Typ,1,Gd,Attchd,2007,Fin,3,789,TA,TA,Y,178,120,0,0,0,0,NA,NA,NA,0,11,2007,New,Partial,325624 +169,60,RL,62,8244,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,840,840,GasA,Ex,Y,SBrkr,840,880,0,1720,0,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2004,Fin,2,440,TA,TA,Y,100,48,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,183500 +170,20,RL,NA,16669,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,1Story,8,6,1981,1981,Hip,WdShake,Plywood,Plywood,BrkFace,653,Gd,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1686,1686,GasA,TA,Y,SBrkr,1707,0,0,1707,0,0,2,1,2,1,TA,6,Typ,1,TA,Attchd,1981,RFn,2,511,TA,TA,Y,574,64,0,0,0,0,NA,NA,NA,0,1,2006,WD,Normal,228000 +171,50,RM,NA,12358,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,OldTown,Feedr,Norm,1Fam,1.5Fin,5,6,1941,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,360,Unf,0,360,720,GasA,TA,Y,SBrkr,854,0,528,1382,0,0,1,1,2,1,TA,7,Typ,0,NA,Detchd,1991,Unf,2,660,TA,TA,Y,237,0,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,128500 +172,20,RL,141,31770,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1960,1960,Hip,CompShg,BrkFace,Plywood,Stone,112,TA,TA,CBlock,TA,Gd,Gd,BLQ,639,Unf,0,441,1080,GasA,Fa,Y,SBrkr,1656,0,0,1656,1,0,1,0,3,1,TA,7,Typ,2,Gd,Attchd,1960,Fin,2,528,TA,TA,P,210,62,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,215000 +173,160,RL,44,5306,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,2Story,7,7,1987,1987,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,Gd,PConc,Gd,Gd,No,GLQ,495,Rec,215,354,1064,GasA,Gd,Y,SBrkr,1064,703,0,1767,1,0,2,0,2,1,Gd,5,Typ,1,TA,Attchd,1987,RFn,2,504,Gd,TA,Y,441,35,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,239000 +174,20,RL,80,10197,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1961,1961,Gable,CompShg,WdShing,Wd Shng,BrkCmn,491,TA,TA,CBlock,TA,TA,No,ALQ,288,Rec,374,700,1362,GasA,TA,Y,SBrkr,1362,0,0,1362,1,0,1,1,3,1,TA,6,Typ,1,TA,Attchd,1961,Unf,2,504,TA,TA,Y,0,20,0,0,0,0,NA,NA,NA,0,6,2008,COD,Normal,163000 +175,20,RL,47,12416,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,6,5,1986,1986,Gable,CompShg,VinylSd,Plywood,Stone,132,TA,TA,CBlock,Gd,Fa,No,ALQ,1398,LwQ,208,0,1606,GasA,TA,Y,SBrkr,1651,0,0,1651,1,0,2,0,3,1,TA,7,Min2,1,TA,Attchd,1986,Fin,2,616,TA,TA,Y,192,0,0,0,0,0,NA,NA,NA,0,11,2008,WD,Normal,184000 +176,20,RL,84,12615,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,6,7,1950,2001,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,CBlock,TA,Gd,Av,ALQ,477,Unf,0,725,1202,GasA,TA,Y,SBrkr,2158,0,0,2158,1,0,2,0,4,1,Gd,7,Typ,1,Gd,Attchd,1950,Unf,2,576,TA,TA,Y,0,29,39,0,0,0,NA,MnPrv,NA,0,6,2007,WD,Normal,243000 +177,60,RL,97,10029,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,ClearCr,Norm,Norm,1Fam,2Story,6,5,1988,1989,Gable,CompShg,Plywood,Plywood,BrkFace,268,Gd,TA,PConc,Gd,TA,No,GLQ,831,Unf,0,320,1151,GasA,TA,Y,SBrkr,1164,896,0,2060,0,1,2,1,4,1,TA,8,Typ,1,TA,Attchd,1988,Unf,2,521,TA,TA,Y,0,228,0,0,192,0,NA,NA,NA,0,9,2007,WD,Normal,211000 +178,50,RL,NA,13650,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1.5Fin,5,5,1958,1958,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,Gd,CBlock,TA,TA,No,ALQ,57,BLQ,441,554,1052,GasA,Ex,Y,SBrkr,1252,668,0,1920,1,0,2,0,4,1,Gd,8,Typ,1,Gd,Attchd,1958,Unf,2,451,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,172500 +179,20,RL,63,17423,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,StoneBr,Norm,Norm,1Fam,1Story,9,5,2008,2009,Hip,CompShg,VinylSd,VinylSd,Stone,748,Ex,TA,PConc,Ex,TA,No,GLQ,1904,Unf,0,312,2216,GasA,Ex,Y,SBrkr,2234,0,0,2234,1,0,2,0,1,1,Ex,9,Typ,1,Gd,Attchd,2009,Fin,3,1166,TA,TA,Y,0,60,0,0,0,0,NA,NA,NA,0,7,2009,New,Partial,501837 +180,30,RM,60,8520,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1923,2006,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,CBlock,TA,TA,No,Unf,0,Unf,0,968,968,GasA,TA,Y,SBrkr,968,0,0,968,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1935,Unf,2,480,Fa,TA,N,0,0,184,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,100000 +181,160,FV,NA,2117,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,Twnhs,2Story,6,5,2000,2000,Gable,CompShg,MetalSd,MetalSd,BrkFace,456,Gd,TA,PConc,Gd,TA,No,GLQ,436,Unf,0,320,756,GasA,Ex,Y,SBrkr,769,756,0,1525,0,0,2,1,3,1,Gd,5,Typ,1,TA,Detchd,2000,Unf,2,440,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,177000 +182,70,RL,54,7588,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,6,1920,1950,Gable,CompShg,Stucco,Stucco,None,0,TA,TA,BrkTil,Fa,TA,No,LwQ,352,Unf,0,441,793,GasA,Gd,Y,SBrkr,901,901,0,1802,0,0,1,1,4,1,TA,9,Typ,1,Gd,Detchd,1920,Unf,1,216,Fa,TA,Y,0,0,40,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,200100 +183,20,RL,60,9060,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Artery,Norm,1Fam,1Story,5,6,1957,2006,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,98,TA,TA,PConc,NA,NA,NA,NA,0,NA,0,0,0,GasA,Ex,Y,SBrkr,1340,0,0,1340,0,0,1,0,3,1,TA,7,Typ,1,Gd,Attchd,1957,RFn,1,252,TA,TA,Y,116,0,0,180,0,0,NA,MnPrv,NA,0,6,2007,WD,Normal,120000 +184,50,RM,63,11426,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1362,1362,GasA,Ex,Y,SBrkr,1362,720,0,2082,0,0,2,1,3,1,Gd,6,Mod,0,NA,Detchd,2003,Unf,2,484,TA,TA,N,280,238,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,200000 +185,50,RL,92,7438,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,BrkSide,RRAn,Feedr,1Fam,1.5Fin,5,8,1908,1991,Gable,CompShg,AsbShng,Plywood,None,0,TA,TA,PConc,Fa,TA,No,Unf,0,Unf,0,504,504,GasA,Gd,Y,SBrkr,936,316,0,1252,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1986,Unf,2,576,TA,TA,Y,104,0,0,0,0,0,NA,MnPrv,NA,0,6,2006,WD,Normal,127000 +186,75,RM,90,22950,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,2.5Fin,10,9,1892,1993,Gable,WdShngl,Wd Sdng,Wd Sdng,None,0,Gd,Gd,BrkTil,TA,TA,Mn,Unf,0,Unf,0,1107,1107,GasA,Ex,Y,SBrkr,1518,1518,572,3608,0,0,2,1,4,1,Ex,12,Typ,2,TA,Detchd,1993,Unf,3,840,Ex,TA,Y,0,260,0,0,410,0,NA,GdPrv,NA,0,6,2006,WD,Normal,475000 +187,80,RL,NA,9947,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,SLvl,7,5,1990,1991,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,Av,GLQ,611,Unf,0,577,1188,GasA,Ex,Y,SBrkr,1217,0,0,1217,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1990,Unf,2,497,TA,TA,Y,168,27,0,0,0,0,NA,GdPrv,NA,0,6,2009,WD,Normal,173000 +188,50,RL,60,10410,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,7,1916,1987,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Fa,TA,No,Unf,0,Unf,0,660,660,GasA,Ex,Y,SBrkr,808,704,144,1656,0,0,2,1,3,1,TA,8,Min2,0,NA,Detchd,1916,Unf,1,180,Fa,Fa,N,0,0,0,140,0,0,NA,MnPrv,NA,0,8,2009,WD,Normal,135000 +189,90,RL,64,7018,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SawyerW,Feedr,Norm,Duplex,SFoyer,5,5,1979,1979,Gable,CompShg,Plywood,Plywood,Stone,275,TA,TA,CBlock,Gd,TA,Av,GLQ,1086,Unf,0,0,1086,GasA,TA,Y,SBrkr,1224,0,0,1224,2,0,0,2,2,2,TA,6,Typ,2,TA,Detchd,1979,Unf,2,528,TA,TA,Y,120,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Alloca,153337 +190,120,RL,41,4923,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,2001,2002,Gable,CompShg,CemntBd,CmentBd,None,0,Gd,TA,PConc,Ex,TA,Av,GLQ,1153,Unf,0,440,1593,GasA,Ex,Y,SBrkr,1593,0,0,1593,1,0,1,1,0,1,Ex,5,Typ,1,Gd,Attchd,2001,Fin,2,682,TA,TA,Y,0,120,0,0,224,0,NA,NA,NA,0,8,2008,WD,Normal,286000 +191,70,RL,70,10570,Pave,NA,Reg,Bnk,AllPub,Inside,Mod,Crawfor,Norm,Norm,1Fam,2Story,8,8,1932,1994,Hip,CompShg,BrkFace,BrkFace,None,0,Gd,TA,CBlock,Gd,Gd,No,Rec,297,Unf,0,556,853,GasA,TA,Y,SBrkr,1549,1178,0,2727,0,0,2,1,3,1,Gd,10,Maj1,2,TA,Detchd,1932,Unf,2,440,TA,TA,Y,0,74,0,0,0,0,NA,NA,NA,0,12,2007,WD,Normal,315000 +192,60,RL,NA,7472,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NAmes,Norm,Norm,1Fam,2Story,7,9,1972,2004,Gable,CompShg,HdBoard,HdBoard,BrkFace,138,TA,TA,CBlock,TA,TA,No,ALQ,626,Unf,0,99,725,GasA,Gd,Y,SBrkr,725,754,0,1479,1,0,1,1,4,1,Gd,7,Typ,0,NA,Attchd,1972,Fin,2,484,TA,TA,Y,0,32,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,184000 +193,20,RL,68,9017,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,560,Unf,0,871,1431,GasA,Ex,Y,SBrkr,1431,0,0,1431,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1999,Fin,2,666,TA,TA,Y,0,35,0,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,192000 +194,160,RM,24,2522,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,Twnhs,2Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,Stone,50,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,970,970,GasA,Ex,Y,SBrkr,970,739,0,1709,0,0,2,0,3,1,Gd,7,Maj1,0,NA,Detchd,2004,Unf,2,380,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,130000 +195,20,RL,60,7180,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,7,1972,1972,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,Av,ALQ,390,Unf,0,474,864,GasA,TA,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1989,Unf,1,352,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,127000 +196,160,RL,24,2280,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NPkVill,Norm,Norm,Twnhs,2Story,6,6,1976,1976,Gable,CompShg,Plywood,Brk Cmn,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,566,Unf,0,289,855,GasA,TA,Y,SBrkr,855,601,0,1456,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1976,Unf,2,440,TA,TA,Y,87,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,148500 +197,20,RL,79,9416,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2007,2007,Hip,CompShg,CemntBd,CmentBd,Stone,205,Ex,TA,PConc,Ex,TA,No,GLQ,1126,Unf,0,600,1726,GasA,Ex,Y,SBrkr,1726,0,0,1726,1,0,2,0,3,1,Ex,8,Typ,1,Gd,Attchd,2007,Fin,3,786,TA,TA,Y,171,138,0,0,266,0,NA,NA,NA,0,9,2007,New,Partial,311872 +198,75,RL,174,25419,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Artery,Norm,1Fam,2Story,8,4,1918,1990,Gable,CompShg,Stucco,Stucco,None,0,Gd,Gd,PConc,TA,TA,No,GLQ,1036,LwQ,184,140,1360,GasA,Gd,Y,SBrkr,1360,1360,392,3112,1,1,2,0,4,1,Gd,8,Typ,1,Ex,Detchd,1918,Unf,2,795,TA,TA,Y,0,16,552,0,0,512,Ex,GdPrv,NA,0,3,2006,WD,Abnorml,235000 +199,75,RM,92,5520,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2.5Fin,6,6,1912,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,755,755,GasA,Ex,Y,SBrkr,929,929,371,2229,0,0,1,0,5,1,TA,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,198,30,0,0,0,NA,MnPrv,NA,0,7,2009,WD,Abnorml,104000 +200,20,RL,76,9591,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2004,2005,Hip,CompShg,VinylSd,VinylSd,BrkFace,262,Gd,TA,PConc,Ex,TA,Av,GLQ,1088,Unf,0,625,1713,GasA,Ex,Y,SBrkr,1713,0,0,1713,1,0,2,0,3,1,Ex,7,Typ,1,Gd,Attchd,2004,Fin,3,856,TA,TA,Y,0,26,0,0,170,0,NA,NA,NA,0,1,2009,WD,Normal,274900 +201,20,RM,80,8546,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1121,1121,GasA,Ex,Y,SBrkr,1121,0,0,1121,0,0,2,0,2,1,TA,5,Typ,0,NA,Attchd,2003,RFn,2,440,TA,TA,Y,132,64,0,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,140000 +202,20,RL,75,10125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,6,6,1977,1977,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,641,LwQ,279,276,1196,GasA,TA,Y,SBrkr,1279,0,0,1279,0,1,2,0,3,1,TA,6,Typ,2,Fa,Detchd,1980,Unf,2,473,TA,TA,Y,238,83,0,0,0,0,NA,MnPrv,NA,0,2,2008,WD,Normal,171500 +203,50,RL,50,7000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Artery,Norm,1Fam,1.5Fin,6,6,1924,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,BrkTil,Fa,TA,No,LwQ,617,Unf,0,0,617,GasA,Gd,Y,SBrkr,865,445,0,1310,0,0,2,0,2,1,TA,6,Min1,0,NA,Attchd,1924,Unf,1,398,TA,TA,Y,0,0,126,0,0,0,NA,NA,NA,0,5,2006,COD,Normal,112000 +204,120,RM,NA,4438,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,205,Gd,TA,PConc,Gd,TA,Av,GLQ,662,Unf,0,186,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,3,Typ,1,Gd,Attchd,2004,RFn,2,420,TA,TA,Y,149,0,0,0,0,0,NA,NA,NA,0,1,2008,WD,Normal,149000 +205,50,RM,50,3500,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,7,1947,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,CBlock,TA,TA,No,LwQ,312,Unf,0,408,720,GasA,TA,Y,SBrkr,720,564,0,1284,0,0,1,1,2,1,TA,5,Typ,0,NA,Detchd,1948,Unf,1,240,TA,TA,Y,0,35,0,0,0,0,NA,MnWw,NA,0,4,2009,WD,Normal,110000 +206,20,RL,99,11851,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,1Story,7,5,1990,1990,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1424,1424,GasA,Ex,Y,SBrkr,1442,0,0,1442,0,0,2,0,3,1,TA,5,Typ,0,NA,Attchd,1990,RFn,2,500,TA,TA,Y,0,34,0,508,0,0,NA,NA,NA,0,5,2009,WD,Normal,180500 +207,20,RL,40,13673,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,RRAe,Norm,1Fam,1Story,5,5,1962,1962,Gable,CompShg,HdBoard,HdBoard,None,0,TA,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,1140,1140,GasA,TA,Y,SBrkr,1696,0,0,1696,0,0,1,1,3,1,TA,8,Min2,1,TA,Attchd,1962,RFn,1,349,TA,TA,Y,0,30,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,143900 +208,20,RL,NA,12493,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1960,1960,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,ALQ,419,Rec,306,375,1100,GasA,TA,Y,SBrkr,1100,0,0,1100,1,0,1,0,3,1,TA,6,Typ,1,Po,Attchd,1960,RFn,1,312,TA,TA,Y,355,0,0,0,0,0,NA,GdWo,NA,0,4,2008,WD,Normal,141000 +209,60,RL,NA,14364,Pave,NA,IR1,Low,AllPub,Inside,Mod,SawyerW,Norm,Norm,1Fam,2Story,7,5,1988,1989,Gable,CompShg,Plywood,Plywood,BrkFace,128,Gd,TA,CBlock,Gd,TA,Gd,GLQ,1065,Unf,0,92,1157,GasA,Ex,Y,SBrkr,1180,882,0,2062,1,0,2,1,3,1,TA,7,Typ,1,Gd,Attchd,1988,Fin,2,454,TA,TA,Y,60,55,0,0,154,0,NA,NA,NA,0,4,2007,WD,Normal,277000 +210,20,RL,75,8250,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,7,1964,1964,Hip,CompShg,HdBoard,HdBoard,Stone,260,TA,TA,CBlock,Gd,TA,No,Rec,787,Unf,0,305,1092,GasA,Ex,Y,SBrkr,1092,0,0,1092,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1964,RFn,2,504,TA,Gd,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,7,2008,WD,Normal,145000 +211,30,RL,67,5604,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,6,1925,1950,Gable,CompShg,Stucco,Stucco,None,0,TA,TA,CBlock,TA,TA,No,Rec,468,Unf,0,396,864,GasA,TA,N,FuseA,864,0,0,864,1,0,1,0,2,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,96,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,98000 +212,20,RL,83,10420,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,6,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Mn,GLQ,36,Unf,0,1176,1212,GasA,Ex,Y,SBrkr,1212,0,0,1212,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2009,RFn,2,460,TA,TA,Y,100,22,0,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,186000 +213,60,FV,72,8640,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,822,Unf,0,78,900,GasA,Ex,Y,SBrkr,932,920,0,1852,1,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,2009,RFn,2,644,TA,TA,Y,168,108,0,0,0,0,NA,NA,NA,0,7,2009,New,Partial,252678 +214,20,RL,43,13568,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,5,1995,1995,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,ALQ,716,Unf,0,274,990,GasA,Ex,Y,SBrkr,990,0,0,990,0,1,1,0,3,1,TA,5,Typ,0,NA,Attchd,1996,Unf,2,576,TA,TA,Y,224,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,156000 +215,60,RL,NA,10900,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,CollgCr,Norm,Norm,1Fam,2Story,6,7,1977,1977,Gable,CompShg,HdBoard,HdBoard,BrkFace,153,TA,TA,CBlock,Gd,TA,No,GLQ,378,Unf,0,311,689,GasA,Ex,Y,SBrkr,689,703,0,1392,0,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1977,Fin,1,299,TA,TA,Y,0,36,0,0,0,0,NA,MnPrv,Shed,450,3,2010,WD,Normal,161750 +216,20,RL,72,10011,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1957,1996,Gable,CompShg,HdBoard,HdBoard,BrkFace,64,TA,TA,CBlock,TA,TA,No,BLQ,360,Unf,0,710,1070,GasA,TA,Y,SBrkr,1236,0,0,1236,0,1,1,0,2,1,Gd,6,Min1,1,Fa,Attchd,1957,Unf,1,447,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,5,2006,WD,Normal,134450 +217,20,RL,65,8450,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,266,Gd,TA,PConc,Gd,TA,Mn,GLQ,946,Unf,0,490,1436,GasA,Ex,Y,SBrkr,1436,0,0,1436,1,0,2,0,3,1,Gd,8,Typ,0,NA,Attchd,2004,Unf,2,484,TA,TA,Y,139,98,0,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,210000 +218,70,RM,57,9906,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,4,4,1925,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,686,686,GasA,Fa,N,SBrkr,810,518,0,1328,0,0,1,0,3,1,TA,8,Typ,0,NA,Detchd,1940,Unf,1,210,TA,TA,Y,0,172,60,0,0,0,NA,NA,NA,0,9,2006,WD,Family,107000 +219,50,RL,NA,15660,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,7,9,1939,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,312,Gd,Gd,CBlock,TA,TA,No,BLQ,341,Unf,0,457,798,GasA,Ex,Y,SBrkr,1137,817,0,1954,0,1,1,1,3,1,Gd,8,Typ,2,TA,Attchd,1939,Unf,2,431,TA,TA,Y,0,119,150,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,311500 +220,120,RL,43,3010,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,16,Gd,TA,PConc,Gd,TA,Av,GLQ,16,Unf,0,1232,1248,GasA,Ex,Y,SBrkr,1248,0,0,1248,0,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,2005,Fin,2,438,TA,TA,Y,108,0,0,0,0,0,NA,NA,NA,0,3,2006,New,Partial,167240 +221,20,RL,73,8990,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Mn,Unf,0,Unf,0,1498,1498,GasA,Ex,Y,SBrkr,1498,0,0,1498,0,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,2006,RFn,2,675,TA,TA,Y,351,33,0,0,0,0,NA,NA,NA,0,4,2006,New,Partial,204900 +222,60,RL,NA,8068,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1010,1010,GasA,Ex,Y,SBrkr,1010,1257,0,2267,0,0,2,1,4,1,Gd,8,Typ,1,TA,BuiltIn,2002,RFn,2,390,TA,TA,Y,120,46,0,0,0,0,NA,NA,NA,0,12,2009,ConLI,Normal,200000 +223,60,RL,85,11475,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,RRAn,Norm,1Fam,2Story,6,6,1975,1975,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,550,Unf,0,163,713,GasA,TA,Y,SBrkr,811,741,0,1552,1,0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1975,RFn,2,434,TA,TA,Y,209,208,0,0,0,0,NA,MnPrv,NA,0,2,2006,WD,Normal,179900 +224,20,RL,70,10500,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1971,1971,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,ALQ,524,LwQ,180,160,864,GasA,Gd,Y,SBrkr,864,0,0,864,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1989,Unf,2,576,TA,TA,Y,216,0,0,0,0,0,NA,NA,NA,0,3,2009,WD,Abnorml,97000 +225,20,RL,103,13472,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,10,5,2003,2003,Hip,CompShg,VinylSd,VinylSd,BrkFace,922,Ex,TA,PConc,Ex,TA,Gd,GLQ,56,Unf,0,2336,2392,GasA,Ex,Y,SBrkr,2392,0,0,2392,0,0,2,0,3,1,Ex,8,Typ,1,Ex,Attchd,2003,Fin,3,968,TA,TA,Y,248,105,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,386250 +226,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,5,5,1971,1971,Gable,CompShg,HdBoard,HdBoard,BrkFace,142,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,630,630,GasA,TA,Y,SBrkr,630,672,0,1302,0,0,2,1,3,1,TA,6,Typ,0,NA,Detchd,1991,Unf,1,280,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2009,COD,Abnorml,112000 +227,60,RL,82,9950,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,5,1995,1995,Gable,CompShg,VinylSd,VinylSd,BrkFace,290,Gd,TA,PConc,Gd,TA,No,GLQ,565,Unf,0,638,1203,GasA,Ex,Y,SBrkr,1214,1306,0,2520,0,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,1995,RFn,3,721,TA,TA,Y,224,114,0,0,0,0,NA,NA,NA,0,6,2007,WD,Abnorml,290000 +228,160,RM,21,1869,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,6,1970,1970,Gable,CompShg,HdBoard,HdBoard,BrkFace,127,TA,TA,CBlock,TA,TA,No,Rec,321,Unf,0,162,483,GasA,TA,Y,SBrkr,483,504,0,987,0,0,1,1,2,1,TA,5,Typ,0,NA,Detchd,1987,Unf,1,280,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,106000 +229,20,RL,70,8521,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,5,1967,1967,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,ALQ,842,Unf,0,70,912,GasA,TA,Y,SBrkr,912,0,0,912,0,0,1,0,3,1,TA,5,Typ,1,Fa,Detchd,1974,Unf,1,336,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,5,2010,WD,Normal,125000 +230,120,RL,43,3182,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,16,Gd,TA,PConc,Gd,TA,Av,GLQ,16,Unf,0,1357,1373,GasA,Ex,Y,SBrkr,1555,0,0,1555,0,0,2,0,2,1,Gd,7,Typ,1,TA,Attchd,2005,Fin,2,430,TA,TA,Y,143,20,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,192500 +231,20,RL,73,8760,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1959,1959,Hip,CompShg,MetalSd,MetalSd,BrkFace,220,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1194,1194,GasA,TA,Y,SBrkr,1194,0,0,1194,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1959,RFn,1,312,TA,TA,Y,0,0,120,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,148000 +232,60,RL,174,15138,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1995,1996,Gable,CompShg,VinylSd,VinylSd,BrkFace,506,Gd,TA,PConc,Gd,TA,No,GLQ,689,Unf,0,773,1462,GasA,Ex,Y,SBrkr,1490,1304,0,2794,1,0,2,1,4,1,Ex,9,Typ,1,TA,Attchd,1995,Fin,3,810,TA,TA,Y,0,146,202,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,403000 +233,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,5,1972,1972,Gable,CompShg,HdBoard,HdBoard,BrkFace,297,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,483,483,GasA,TA,Y,SBrkr,483,504,0,987,0,0,1,1,2,1,TA,5,Typ,1,Po,Attchd,1972,Unf,1,288,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,94500 +234,20,RL,75,10650,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,6,1976,1976,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,Gd,Av,LwQ,182,ALQ,712,0,894,GasA,TA,Y,SBrkr,894,0,0,894,1,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1976,Unf,1,308,TA,TA,Y,365,0,0,0,0,0,NA,MnPrv,NA,0,2,2010,WD,Normal,128200 +235,60,RL,NA,7851,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,NA,NA,Gd,TA,PConc,Gd,TA,No,GLQ,625,Unf,0,235,860,GasA,Ex,Y,SBrkr,860,1100,0,1960,1,0,2,1,4,1,Gd,8,Typ,2,TA,BuiltIn,2002,Fin,2,440,TA,TA,Y,288,48,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,216500 +236,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,TwnhsE,2Story,6,3,1971,1971,Gable,CompShg,HdBoard,HdBoard,BrkFace,604,TA,TA,CBlock,TA,TA,No,ALQ,358,Unf,0,125,483,GasA,TA,Y,SBrkr,483,504,0,987,0,0,1,1,2,1,TA,5,Typ,0,NA,Detchd,1971,Unf,1,264,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,89500 +237,20,RL,65,8773,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,98,Gd,TA,PConc,Gd,TA,Av,GLQ,24,Unf,0,1390,1414,GasA,Ex,Y,SBrkr,1414,0,0,1414,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2004,RFn,2,494,TA,TA,Y,132,105,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,185500 +238,60,RL,NA,9453,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,SawyerW,RRNe,Norm,1Fam,2Story,7,7,1993,2003,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,No,BLQ,402,Unf,0,594,996,GasA,Ex,Y,SBrkr,1014,730,0,1744,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1993,RFn,2,457,TA,TA,Y,370,70,0,238,0,0,NA,NA,NA,0,2,2010,WD,Normal,194500 +239,20,RL,93,12030,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2007,2007,Hip,CompShg,VinylSd,VinylSd,BrkFace,254,Ex,TA,PConc,Ex,TA,No,Unf,0,Unf,0,1694,1694,GasA,Ex,Y,SBrkr,1694,0,0,1694,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2007,Fin,3,818,TA,TA,Y,168,228,0,0,0,0,NA,NA,NA,0,12,2007,New,Partial,318000 +240,50,RL,52,8741,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,6,4,1945,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,Fa,No,LwQ,94,Unf,0,641,735,GasA,TA,Y,FuseA,798,689,0,1487,0,0,1,1,3,1,TA,7,Typ,1,Gd,Detchd,1949,Unf,1,220,TA,TA,Y,0,140,0,0,0,0,NA,MnPrv,NA,0,4,2010,WD,Normal,113000 +241,20,FV,75,9000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2008,2008,Gable,CompShg,VinylSd,VinylSd,Stone,36,Gd,TA,PConc,Gd,TA,Av,GLQ,1078,Unf,0,488,1566,GasA,Ex,Y,SBrkr,1566,0,0,1566,1,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2008,RFn,2,750,TA,TA,Y,144,168,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,262500 +242,30,RM,40,3880,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,9,1945,1997,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,TA,No,ALQ,329,Unf,0,357,686,GasA,Gd,Y,SBrkr,866,0,0,866,0,0,1,0,2,1,Gd,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,58,42,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,110500 +243,50,RM,63,5000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,4,1900,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,540,540,GasA,Gd,N,FuseA,889,551,0,1440,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1940,Unf,1,352,Fa,TA,Y,0,0,77,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,79000 +244,160,RL,75,10762,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,SawyerW,Norm,Norm,TwnhsE,2Story,6,6,1980,1980,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,626,626,GasA,TA,Y,SBrkr,626,591,0,1217,0,0,1,1,3,1,TA,6,Typ,1,TA,Attchd,1980,RFn,1,288,TA,TA,Y,0,28,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,120000 +245,60,RL,NA,8880,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1994,2002,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,695,Unf,0,253,948,GasA,Ex,Y,SBrkr,1222,888,0,2110,1,0,2,1,3,1,Gd,8,Typ,2,Fa,Attchd,1994,RFn,2,463,TA,TA,Y,0,130,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,205000 +246,20,RL,80,10400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,7,5,1988,1988,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,102,TA,TA,CBlock,Gd,TA,Av,GLQ,929,Unf,0,916,1845,GasA,Gd,Y,SBrkr,1872,0,0,1872,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1988,Fin,2,604,TA,TA,Y,197,39,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,241500 +247,190,RM,69,9142,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,Fa,Stone,Fa,TA,No,Unf,0,Unf,0,1020,1020,GasA,Gd,N,FuseP,908,1020,0,1928,0,0,2,0,4,2,Fa,9,Typ,0,NA,Detchd,1910,Unf,1,440,Po,Po,Y,0,60,112,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,137000 +248,20,RL,75,11310,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1954,1954,Hip,CompShg,Wd Sdng,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1367,1367,GasA,Ex,Y,SBrkr,1375,0,0,1375,0,0,1,0,2,1,TA,5,Typ,1,TA,Attchd,1954,Unf,2,451,TA,TA,Y,0,30,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,140000 +249,60,RL,72,11317,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,101,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,840,840,GasA,Ex,Y,SBrkr,840,828,0,1668,0,0,2,1,3,1,Gd,8,Typ,0,NA,Attchd,2003,RFn,2,500,TA,TA,Y,144,68,0,0,0,0,NA,NA,NA,0,9,2007,WD,Normal,180000 +250,50,RL,NA,159000,Pave,NA,IR2,Low,AllPub,CulDSac,Sev,ClearCr,Norm,Norm,1Fam,1.5Fin,6,7,1958,2006,Gable,CompShg,Wd Sdng,HdBoard,BrkCmn,472,Gd,TA,CBlock,Gd,TA,Gd,Rec,697,Unf,0,747,1444,GasA,Gd,Y,SBrkr,1444,700,0,2144,0,1,2,0,4,1,Gd,7,Typ,2,TA,Attchd,1958,Fin,2,389,TA,TA,Y,0,98,0,0,0,0,NA,NA,Shed,500,6,2007,WD,Normal,277000 +251,30,RL,55,5350,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,3,2,1940,1966,Gable,CompShg,Wd Sdng,Plywood,None,0,TA,Po,CBlock,TA,TA,No,Unf,0,Unf,0,728,728,GasA,Ex,Y,SBrkr,1306,0,0,1306,0,0,1,0,3,1,Fa,6,Mod,0,NA,NA,NA,NA,0,0,NA,NA,Y,263,0,0,0,0,0,NA,GdWo,Shed,450,5,2010,WD,Normal,76500 +252,120,RM,44,4750,Pave,NA,IR1,HLS,AllPub,Inside,Mod,Crawfor,Norm,Norm,TwnhsE,1Story,8,5,2006,2007,Hip,CompShg,VinylSd,VinylSd,Stone,481,Gd,TA,PConc,Gd,TA,Gd,GLQ,1573,Unf,0,0,1573,GasA,Ex,Y,SBrkr,1625,0,0,1625,1,1,2,0,2,1,Gd,5,Typ,1,Gd,Attchd,2006,Fin,2,538,TA,TA,Y,123,0,0,0,153,0,NA,NA,NA,0,12,2007,WD,Family,235000 +253,60,RL,65,8366,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,6,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,798,798,GasA,Ex,Y,SBrkr,798,842,0,1640,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2004,RFn,2,520,TA,TA,Y,138,45,0,0,0,0,NA,NA,NA,0,12,2008,WD,Normal,173000 +254,80,RL,85,9350,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,6,7,1964,1991,Hip,CompShg,HdBoard,HdBoard,BrkFace,108,TA,TA,CBlock,Gd,TA,Gd,LwQ,270,ALQ,580,452,1302,GasA,Ex,Y,SBrkr,1302,0,0,1302,0,1,2,0,3,1,Gd,7,Min1,0,NA,Attchd,1964,RFn,1,309,TA,TA,Y,333,0,0,0,0,0,NA,MnPrv,NA,0,10,2007,CWD,Normal,158000 +255,20,RL,70,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1957,1957,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,Rec,922,Unf,0,392,1314,GasA,TA,Y,SBrkr,1314,0,0,1314,1,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1957,RFn,1,294,TA,TA,Y,250,0,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,145000 +256,60,RL,66,8738,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,302,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,975,975,GasA,Ex,Y,SBrkr,1005,1286,0,2291,0,0,2,1,4,1,Gd,8,Typ,1,TA,BuiltIn,1999,Fin,2,429,TA,TA,Y,192,0,0,0,0,0,NA,NA,NA,0,2,2006,WD,Normal,230000 +257,60,FV,64,8791,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,6,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Rec,503,Unf,0,361,864,GasA,Ex,Y,SBrkr,864,864,0,1728,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2003,RFn,2,673,TA,TA,Y,216,56,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,207500 +258,20,RL,68,8814,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,180,Gd,TA,PConc,Gd,TA,No,GLQ,1334,Unf,0,270,1604,GasA,Ex,Y,SBrkr,1604,0,0,1604,1,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2006,RFn,2,660,TA,TA,Y,123,110,0,0,0,0,NA,NA,NA,0,3,2009,WD,Abnorml,220000 +259,60,RL,80,12435,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2001,Gable,CompShg,VinylSd,VinylSd,BrkFace,172,Gd,TA,PConc,Gd,TA,No,GLQ,361,Unf,0,602,963,GasA,Ex,Y,SBrkr,963,829,0,1792,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,2001,RFn,2,564,TA,TA,Y,0,96,0,245,0,0,NA,NA,NA,0,5,2008,WD,Normal,231500 +260,20,RM,70,12702,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,5,1956,1956,Gable,CompShg,BrkFace,BrkFace,None,0,TA,TA,PConc,NA,NA,NA,NA,0,NA,0,0,0,GasA,Gd,Y,FuseA,882,0,0,882,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1956,Unf,1,308,TA,TA,Y,0,45,0,0,0,0,NA,NA,NA,0,12,2008,WD,Normal,97000 +261,80,RL,120,19296,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Artery,Norm,1Fam,SLvl,6,5,1962,1962,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,399,TA,TA,CBlock,TA,TA,Gd,Rec,672,ALQ,690,0,1362,GasA,TA,Y,SBrkr,1382,0,0,1382,1,0,1,0,3,1,TA,6,Typ,1,TA,Attchd,1991,Unf,2,884,TA,TA,Y,0,0,252,0,0,0,NA,GdWo,NA,0,5,2009,WD,Normal,176000 +262,60,RL,69,9588,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2007,2007,Gable,CompShg,CemntBd,CmentBd,Stone,270,Gd,TA,PConc,Ex,TA,No,Unf,0,Unf,0,1482,1482,GasA,Ex,Y,SBrkr,1482,1092,0,2574,0,0,2,1,3,1,Ex,10,Typ,1,Gd,BuiltIn,2007,Fin,3,868,TA,TA,Y,0,148,0,0,0,0,NA,NA,NA,0,11,2007,New,Partial,276000 +263,80,RL,88,8471,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Sawyer,Norm,Norm,1Fam,SLvl,6,7,1977,1995,Gable,CompShg,HdBoard,Plywood,BrkFace,46,TA,TA,CBlock,Gd,Gd,Av,ALQ,506,Unf,0,0,506,GasA,TA,Y,SBrkr,1212,0,0,1212,1,0,1,0,3,1,TA,6,Typ,1,TA,Attchd,1978,Unf,2,492,TA,TA,Y,292,12,0,0,0,0,NA,GdWo,NA,0,7,2006,WD,Normal,151000 +264,50,RM,50,5500,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,7,1929,2001,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,LwQ,234,ALQ,692,0,926,GasA,TA,Y,SBrkr,926,0,390,1316,1,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1974,Unf,2,484,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,130000 +265,30,RM,30,5232,Pave,Grvl,IR3,Bnk,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1Story,5,5,1925,2004,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Fa,TA,No,Unf,0,Unf,0,680,680,GasA,Gd,N,FuseP,764,0,0,764,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1965,Unf,2,504,TA,TA,N,0,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,73000 +266,20,RL,78,12090,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,6,1981,1981,Gable,CompShg,MetalSd,MetalSd,BrkFace,210,TA,Gd,CBlock,Gd,TA,No,GLQ,588,LwQ,228,606,1422,GasA,TA,Y,SBrkr,1422,0,0,1422,0,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,1981,Fin,2,576,TA,TA,Y,276,0,0,0,0,0,NA,GdPrv,NA,0,6,2008,WD,Normal,175500 +267,60,RL,70,11207,Pave,NA,IR1,HLS,AllPub,FR2,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1997,1997,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Av,GLQ,714,Unf,0,88,802,GasA,Gd,Y,SBrkr,802,709,0,1511,1,0,2,1,3,1,TA,8,Typ,1,TA,Attchd,1997,Fin,2,413,TA,TA,Y,95,75,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,185000 +268,75,RL,60,8400,Pave,NA,Reg,Bnk,AllPub,Inside,Mod,SWISU,Norm,Norm,1Fam,2.5Fin,5,8,1939,1997,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,LwQ,378,Unf,0,342,720,GasA,Ex,Y,SBrkr,1052,720,420,2192,0,0,2,1,4,1,Gd,8,Typ,1,Gd,Detchd,1939,Unf,1,240,TA,TA,Y,262,24,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,179500 +269,30,RM,71,6900,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,5,6,1940,1955,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,403,Rec,125,212,740,GasA,Ex,Y,SBrkr,778,0,0,778,0,0,1,0,2,1,TA,4,Typ,1,Gd,Detchd,1966,Fin,1,924,Ex,Ex,Y,0,25,0,0,0,0,NA,NA,NA,0,2,2008,WD,Normal,120500 +270,20,RL,NA,7917,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,6,7,1976,1976,Hip,CompShg,HdBoard,HdBoard,BrkFace,174,TA,Gd,CBlock,TA,Gd,No,BLQ,751,Unf,0,392,1143,GasA,TA,Y,SBrkr,1113,0,0,1113,1,0,1,1,3,1,TA,6,Typ,1,Fa,Attchd,1987,RFn,1,504,TA,Gd,Y,370,30,0,0,0,0,NA,GdPrv,NA,0,5,2007,WD,Normal,148000 +271,60,FV,84,10728,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Mn,Unf,0,Unf,0,1095,1095,GasA,Gd,Y,SBrkr,1095,844,0,1939,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2006,RFn,3,1053,TA,TA,Y,192,51,0,0,0,0,NA,NA,NA,0,8,2006,New,Partial,266000 +272,20,RL,73,39104,Pave,NA,IR1,Low,AllPub,CulDSac,Sev,ClearCr,Norm,Norm,1Fam,1Story,7,7,1954,2005,Flat,Membran,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,Gd,LwQ,226,GLQ,1063,96,1385,GasA,Ex,Y,SBrkr,1363,0,0,1363,1,0,1,0,2,1,TA,5,Mod,2,TA,Attchd,1954,Unf,2,439,TA,TA,Y,81,0,0,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,241500 +273,60,RL,92,11764,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,7,1999,2007,Gable,CompShg,VinylSd,VinylSd,BrkFace,348,Gd,TA,PConc,Gd,TA,No,GLQ,524,Unf,0,628,1152,GasA,Ex,Y,SBrkr,1164,1106,0,2270,0,0,2,1,4,1,Gd,9,Typ,1,Gd,Attchd,1999,Fin,3,671,TA,TA,Y,132,57,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,290000 +274,20,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,6,6,1958,1988,Hip,CompShg,Wd Sdng,Wd Sdng,BrkCmn,183,TA,TA,CBlock,TA,TA,No,Rec,620,LwQ,620,0,1240,GasA,Gd,Y,SBrkr,1632,0,0,1632,1,0,2,0,3,1,TA,6,Min1,1,Gd,Attchd,1958,RFn,1,338,TA,TA,Y,289,0,0,0,0,0,NA,MnPrv,NA,0,4,2009,WD,Normal,139000 +275,20,RL,76,8314,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,7,1982,1982,Gable,CompShg,HdBoard,ImStucc,None,0,TA,TA,CBlock,TA,TA,Gd,ALQ,546,Unf,0,270,816,GasA,TA,Y,SBrkr,816,0,0,816,0,0,1,0,2,1,TA,5,Typ,0,NA,Attchd,1982,Unf,1,264,TA,TA,Y,168,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,124500 +276,50,RL,55,7264,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,7,7,1925,2007,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,Gd,BrkTil,TA,TA,No,Unf,0,Unf,0,952,952,GasW,Gd,N,SBrkr,952,596,0,1548,0,0,2,1,3,1,Ex,5,Typ,0,NA,Detchd,1978,Unf,2,672,TA,TA,Y,74,0,0,0,144,0,NA,NA,NA,0,10,2009,WD,Normal,205000 +277,20,RL,129,9196,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,TA,No,Unf,0,Unf,0,1560,1560,GasA,Ex,Y,SBrkr,1560,0,0,1560,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2003,Fin,2,573,TA,TA,Y,100,150,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,201000 +278,20,RL,140,19138,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,1Story,4,5,1951,1951,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,LwQ,120,Unf,0,744,864,GasA,Ex,Y,SBrkr,864,0,0,864,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1951,Unf,2,400,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,141000 +279,20,RL,107,14450,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,9,5,2006,2007,Gable,CompShg,CemntBd,CmentBd,BrkFace,315,Ex,TA,PConc,Ex,TA,Gd,Unf,0,Unf,0,2121,2121,GasA,Ex,Y,SBrkr,2121,0,0,2121,0,0,2,1,3,1,Ex,8,Typ,1,Ex,Attchd,2007,Fin,3,732,TA,TA,Y,124,98,0,0,142,0,NA,NA,NA,0,5,2007,New,Partial,415298 +280,60,RL,83,10005,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,2Story,7,5,1977,1977,Hip,CompShg,Plywood,Plywood,BrkFace,299,TA,TA,CBlock,Gd,TA,No,BLQ,392,Unf,0,768,1160,GasA,Ex,Y,SBrkr,1156,866,0,2022,0,0,2,1,4,1,TA,8,Typ,1,TA,Attchd,1977,Fin,2,505,TA,TA,Y,288,117,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,192000 +281,60,RL,82,11287,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,6,1989,1989,Gable,CompShg,Plywood,Plywood,BrkFace,340,Gd,TA,CBlock,Gd,TA,Av,GLQ,421,Unf,0,386,807,GasA,Gd,Y,SBrkr,1175,807,0,1982,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1989,Fin,2,575,TA,TA,Y,0,84,0,196,0,0,NA,NA,NA,0,1,2007,WD,Normal,228500 +282,20,FV,60,7200,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,6,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,68,Gd,TA,PConc,Gd,TA,No,GLQ,905,Unf,0,357,1262,GasA,Gd,Y,SBrkr,1262,0,0,1262,0,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,2006,Fin,2,572,TA,TA,Y,0,120,0,0,0,0,NA,NA,NA,0,5,2006,New,Partial,185000 +283,120,RL,34,5063,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,Twnhs,1Story,7,5,2007,2008,Gable,CompShg,VinylSd,VinylSd,Stone,166,Gd,TA,PConc,Gd,TA,No,GLQ,904,Unf,0,410,1314,GasA,Ex,Y,SBrkr,1314,0,0,1314,1,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2008,RFn,2,626,TA,TA,Y,172,62,0,0,0,0,NA,NA,NA,0,4,2009,ConLw,Normal,207500 +284,20,RL,74,9612,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Feedr,Norm,1Fam,1Story,8,5,2008,2009,Gable,CompShg,VinylSd,VinylSd,Stone,72,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1468,1468,GasA,Ex,Y,SBrkr,1468,0,0,1468,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,2008,Fin,3,898,TA,TA,Y,210,150,0,0,0,0,NA,NA,NA,0,12,2009,New,Partial,244600 +285,120,RL,50,8012,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,TwnhsE,1Story,6,5,1992,1992,Gable,CompShg,Plywood,ImStucc,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,430,Unf,0,1145,1575,GasA,Gd,Y,SBrkr,1575,0,0,1575,1,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,1992,RFn,2,529,TA,TA,Y,0,0,52,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,179200 +286,160,FV,35,4251,Pave,Pave,IR1,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,7,5,2006,2007,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,625,625,GasA,Ex,Y,SBrkr,625,625,0,1250,0,0,2,1,2,1,Gd,5,Typ,0,NA,Detchd,2006,RFn,2,528,TA,TA,Y,0,54,0,0,0,0,NA,NA,NA,0,6,2007,New,Partial,164700 +287,50,RL,77,9786,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1.5Fin,6,7,1962,1981,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,600,Unf,0,312,912,GasA,TA,Y,SBrkr,1085,649,0,1734,0,0,1,1,3,1,Gd,7,Typ,1,Gd,Attchd,1962,RFn,2,440,TA,TA,Y,0,0,0,0,128,0,NA,GdPrv,NA,0,6,2006,WD,Normal,159000 +288,20,RL,NA,8125,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,4,1971,1971,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,BLQ,614,Unf,0,244,858,GasA,TA,Y,SBrkr,858,0,0,858,0,0,1,0,3,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,88000 +289,20,RL,NA,9819,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1967,1967,Gable,CompShg,MetalSd,MetalSd,BrkFace,31,TA,Gd,CBlock,TA,TA,No,BLQ,450,Unf,0,432,882,GasA,TA,Y,SBrkr,900,0,0,900,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1970,Unf,1,280,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,2,2010,WD,Normal,122000 +290,70,RL,60,8730,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,RRAn,Norm,1Fam,2Story,6,7,1915,2003,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,698,698,GasA,Ex,Y,FuseA,698,698,0,1396,0,0,1,0,3,1,TA,7,Typ,0,NA,Detchd,2003,Unf,1,384,TA,TA,Y,0,0,0,0,259,0,NA,NA,NA,0,7,2007,WD,Normal,153575 +291,60,RL,120,15611,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1079,1079,GasA,Ex,Y,SBrkr,1079,840,0,1919,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2006,RFn,2,685,Gd,TA,Y,0,51,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,233230 +292,190,RL,55,5687,Pave,Grvl,Reg,Bnk,AllPub,Inside,Gtl,SWISU,Norm,Norm,2fmCon,2Story,5,6,1912,2000,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Fa,PConc,TA,Fa,No,Rec,210,Unf,0,570,780,GasA,Ex,N,SBrkr,936,780,0,1716,1,0,2,0,6,1,Fa,9,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,184,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,135900 +293,50,RL,60,11409,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,5,4,1949,2008,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,LwQ,292,Unf,0,476,768,GasA,Gd,Y,SBrkr,1148,568,0,1716,0,0,1,1,3,1,TA,8,Min2,1,Gd,Attchd,1949,Unf,1,281,TA,TA,Y,0,0,0,0,160,0,NA,NA,NA,0,1,2009,WD,Normal,131000 +294,60,RL,NA,16659,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,PosA,Norm,1Fam,2Story,7,7,1977,1994,Gable,CompShg,Plywood,Plywood,BrkFace,34,TA,TA,CBlock,TA,TA,No,ALQ,795,Unf,0,0,795,GasA,Fa,Y,SBrkr,1468,795,0,2263,1,0,2,1,3,1,Gd,9,Typ,1,TA,Attchd,1977,Fin,2,539,TA,TA,Y,0,250,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,235000 +295,20,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1953,1953,Hip,CompShg,HdBoard,HdBoard,Stone,238,TA,TA,CBlock,TA,TA,No,GLQ,1285,Unf,0,131,1416,GasA,TA,Y,SBrkr,1644,0,0,1644,1,0,1,0,3,1,TA,7,Typ,2,Gd,Attchd,1953,Fin,2,418,TA,TA,Y,110,0,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,167000 +296,80,RL,37,7937,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,SLvl,6,6,1984,1984,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,Av,GLQ,819,Unf,0,184,1003,GasA,TA,Y,SBrkr,1003,0,0,1003,1,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1984,Unf,2,588,TA,TA,Y,120,0,0,0,0,0,NA,GdPrv,NA,0,3,2006,WD,Normal,142500 +297,50,RM,75,13710,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,5,5,1950,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,420,Unf,0,490,910,GasA,TA,Y,FuseA,910,648,0,1558,0,0,1,1,4,1,TA,6,Typ,0,NA,Attchd,1950,Unf,1,282,TA,TA,Y,289,0,0,0,0,0,NA,MnPrv,NA,0,6,2007,WD,Normal,152000 +298,60,FV,66,7399,Pave,Pave,IR1,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,1997,1998,Hip,CompShg,VinylSd,VinylSd,BrkFace,1600,Gd,TA,PConc,Gd,TA,No,BLQ,649,Unf,0,326,975,GasA,Ex,Y,SBrkr,975,975,0,1950,0,0,2,1,3,1,Gd,7,Typ,1,TA,Detchd,1997,RFn,2,576,TA,TA,Y,0,10,0,0,198,0,NA,NA,NA,0,6,2007,WD,Normal,239000 +299,60,RL,90,11700,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,6,6,1968,1968,Mansard,CompShg,HdBoard,AsphShn,BrkFace,365,Gd,TA,CBlock,TA,TA,No,ALQ,384,Rec,175,143,702,GasA,Gd,Y,SBrkr,1041,702,0,1743,0,1,1,2,3,1,TA,7,Typ,1,Gd,Attchd,1968,Unf,2,539,TA,TA,Y,224,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,175000 +300,20,RL,80,14000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,6,8,1950,2004,Gable,CompShg,HdBoard,HdBoard,None,0,TA,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,1092,1092,GasA,Ex,Y,SBrkr,1152,0,0,1152,0,1,1,0,3,1,Gd,6,Typ,1,Gd,Attchd,1950,Unf,1,300,TA,TA,Y,0,36,0,0,0,0,NA,GdPrv,NA,0,8,2009,WD,Family,158500 +301,190,RL,90,15750,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,2fmCon,1Story,5,5,1953,1953,Hip,CompShg,MetalSd,MetalSd,BrkFace,56,TA,TA,CBlock,TA,TA,Mn,BLQ,841,Unf,0,324,1165,GasA,TA,Y,SBrkr,1336,0,0,1336,1,0,1,0,2,1,TA,5,Typ,2,Gd,Attchd,1953,Unf,1,375,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,157000 +302,60,RL,66,16226,Pave,NA,IR3,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,1998,1999,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,281,Unf,0,747,1028,GasA,Ex,Y,SBrkr,1210,1242,0,2452,0,0,2,1,4,1,Gd,9,Typ,1,TA,BuiltIn,1998,Fin,2,683,TA,TA,Y,208,50,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,267000 +303,20,RL,118,13704,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,150,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1541,1541,GasA,Ex,Y,SBrkr,1541,0,0,1541,0,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,2001,RFn,3,843,TA,TA,Y,468,81,0,0,0,0,NA,NA,NA,0,1,2006,WD,Normal,205000 +304,20,RL,70,9800,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,7,1972,1972,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,TA,TA,No,ALQ,894,Unf,0,0,894,GasA,TA,Y,SBrkr,894,0,0,894,1,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1975,Unf,2,552,TA,TA,Y,256,0,0,0,0,0,NA,GdWo,NA,0,7,2006,WD,Abnorml,149900 +305,75,RM,87,18386,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2.5Fin,7,9,1880,2002,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,1470,1470,GasA,Ex,Y,SBrkr,1675,1818,0,3493,0,0,3,0,3,1,Gd,10,Typ,1,Ex,Attchd,2003,Unf,3,870,TA,TA,Y,302,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,295000 +306,20,RL,80,10386,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2004,2005,Gable,CompShg,CemntBd,CmentBd,Stone,246,Gd,TA,PConc,Gd,TA,No,GLQ,1464,Unf,0,536,2000,GasA,Ex,Y,SBrkr,2000,0,0,2000,1,0,2,0,3,1,Gd,8,Typ,0,NA,Attchd,2004,Fin,3,888,TA,TA,Y,168,0,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,305900 +307,60,RL,116,13474,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Feedr,Norm,1Fam,2Story,7,5,1990,1991,Gable,CompShg,HdBoard,Plywood,BrkFace,246,Gd,TA,CBlock,Gd,TA,No,ALQ,700,Unf,0,0,700,GasA,Gd,Y,SBrkr,1122,1121,0,2243,1,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1990,RFn,3,746,TA,TA,Y,127,44,224,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,225000 +308,50,RM,NA,7920,Pave,Grvl,IR1,Lvl,AllPub,Inside,Gtl,IDOTRR,Artery,Norm,1Fam,1.5Fin,6,7,1920,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Fa,CBlock,TA,TA,No,Unf,0,Unf,0,319,319,GasA,TA,Y,FuseA,1035,371,0,1406,0,0,1,0,3,1,Fa,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,144,0,0,0,0,NA,MnPrv,NA,0,3,2008,WD,Normal,89500 +309,30,RL,NA,12342,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,1940,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,262,Unf,0,599,861,GasA,Ex,Y,SBrkr,861,0,0,861,0,0,1,0,1,1,TA,4,Typ,0,NA,Detchd,1961,Unf,2,539,TA,TA,Y,158,0,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,82500 +310,20,RL,90,12378,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,9,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,TA,Gd,GLQ,1274,Unf,0,622,1896,GasA,Ex,Y,SBrkr,1944,0,0,1944,1,0,2,0,3,1,Ex,8,Typ,3,Ex,Attchd,2003,Fin,3,708,TA,TA,Y,208,175,0,0,0,0,NA,NA,NA,0,11,2006,WD,Normal,360000 +311,60,RL,NA,7685,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1993,1994,Gable,CompShg,HdBoard,HdBoard,BrkFace,112,TA,TA,PConc,Gd,TA,No,ALQ,518,Unf,0,179,697,GasA,Gd,Y,SBrkr,697,804,0,1501,0,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,1993,Fin,2,420,TA,TA,Y,190,63,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,165600 +312,20,RL,50,8000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1948,2002,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,TA,No,ALQ,680,Unf,0,292,972,GasA,Ex,Y,SBrkr,972,0,0,972,1,0,1,0,2,1,TA,5,Typ,1,Gd,Detchd,1948,Unf,1,240,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,132000 +313,190,RM,65,7800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,2fmCon,1.5Fin,5,7,1939,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,Gd,TA,Mn,Rec,507,Unf,0,286,793,GasA,TA,Y,SBrkr,793,325,0,1118,1,0,1,0,3,1,TA,5,Typ,1,Gd,Detchd,1939,Unf,2,410,TA,TA,Y,0,0,0,0,271,0,NA,MnPrv,NA,0,5,2006,WD,Normal,119900 +314,20,RL,150,215245,Pave,NA,IR3,Low,AllPub,Inside,Sev,Timber,Norm,Norm,1Fam,1Story,7,5,1965,1965,Hip,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,Gd,TA,Gd,ALQ,1236,Rec,820,80,2136,GasW,TA,Y,SBrkr,2036,0,0,2036,2,0,2,0,3,1,TA,8,Typ,2,Gd,Attchd,1965,RFn,2,513,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,375000 +315,70,RM,60,9600,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,7,7,1925,1990,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,Gd,No,LwQ,16,Unf,0,712,728,GasA,Ex,Y,SBrkr,832,809,0,1641,0,1,1,1,3,1,Ex,6,Typ,1,Gd,Detchd,1925,Unf,2,546,Fa,TA,Y,0,0,234,0,0,0,NA,NA,NA,0,8,2006,WD,Normal,178000 +316,60,RL,71,7795,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,425,Unf,0,291,716,GasA,Ex,Y,SBrkr,716,716,0,1432,1,0,2,1,3,1,Gd,6,Typ,1,Gd,Attchd,2004,Fin,2,432,TA,TA,Y,100,51,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,188500 +317,60,RL,94,13005,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,Norm,Norm,1Fam,2Story,7,7,1980,1980,Gable,CompShg,CemntBd,CmentBd,BrkFace,278,Gd,TA,CBlock,Gd,TA,No,GLQ,692,Unf,0,153,845,GasA,TA,Y,SBrkr,1153,1200,0,2353,1,0,2,1,4,1,Ex,10,Typ,1,TA,Attchd,1983,RFn,2,484,TA,TA,Y,288,195,0,0,0,0,NA,GdPrv,NA,0,8,2009,WD,Normal,260000 +318,60,FV,75,9000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1088,1088,GasA,Ex,Y,SBrkr,1088,871,0,1959,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2006,RFn,3,1025,TA,TA,Y,208,46,0,0,0,0,NA,NA,NA,0,12,2007,WD,Normal,270000 +319,60,RL,90,9900,Pave,NA,Reg,Low,AllPub,Inside,Mod,NoRidge,Norm,Norm,1Fam,2Story,7,5,1993,1993,Gable,CompShg,HdBoard,HdBoard,BrkFace,256,Gd,TA,PConc,Gd,TA,Gd,GLQ,987,Unf,0,360,1347,GasA,Ex,Y,SBrkr,1372,1274,0,2646,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,1993,RFn,3,656,TA,TA,Y,340,60,144,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,260000 +320,80,RL,NA,14115,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,SLvl,7,5,1980,1980,Gable,CompShg,Plywood,Plywood,BrkFace,225,TA,TA,CBlock,Gd,TA,Av,GLQ,1036,Unf,0,336,1372,GasA,TA,Y,SBrkr,1472,0,0,1472,1,0,2,0,3,1,TA,6,Typ,2,TA,Attchd,1980,Unf,2,588,TA,TA,Y,233,48,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,187500 +321,60,RL,111,16259,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,370,TA,TA,PConc,Ex,Gd,Av,Unf,0,Unf,0,1249,1249,GasA,Ex,Y,SBrkr,1249,1347,0,2596,0,0,3,1,4,1,Gd,9,Typ,0,NA,Attchd,2006,RFn,3,840,TA,TA,Y,240,154,0,0,0,0,NA,NA,NA,0,9,2006,New,Partial,342643 +322,60,RL,99,12099,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,8,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,388,Gd,TA,PConc,Ex,TA,Av,GLQ,970,Unf,0,166,1136,GasA,Ex,Y,SBrkr,1136,1332,0,2468,1,0,2,1,4,1,Gd,10,Typ,1,Gd,BuiltIn,2004,Fin,3,872,TA,TA,Y,184,154,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,354000 +323,60,RL,86,10380,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1986,1987,Gable,CompShg,Plywood,Plywood,BrkFace,172,Gd,TA,CBlock,TA,TA,Gd,LwQ,28,ALQ,1474,0,1502,GasA,Ex,Y,SBrkr,1553,1177,0,2730,1,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1987,Fin,2,576,TA,TA,Y,201,96,0,0,0,0,NA,MnPrv,NA,0,8,2007,WD,Normal,301000 +324,20,RM,49,5820,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,3,8,1955,2005,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,TA,No,ALQ,256,Unf,0,906,1162,GasA,Ex,Y,SBrkr,1163,0,0,1163,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1955,Unf,1,220,Fa,TA,Y,142,98,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,126175 +325,80,RL,96,11275,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,PosN,Norm,1Fam,SLvl,7,7,1967,2007,Mansard,WdShake,Wd Sdng,Wd Sdng,BrkFace,300,Gd,Gd,CBlock,Gd,TA,No,Unf,0,Unf,0,710,710,GasA,Ex,Y,SBrkr,1898,1080,0,2978,0,0,2,1,5,1,Gd,11,Typ,1,Gd,BuiltIn,1961,Fin,2,564,TA,TA,Y,240,0,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,242000 +326,45,RM,50,5000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,RRAe,Norm,1Fam,1.5Unf,5,6,1941,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,Av,BLQ,116,Unf,0,604,720,GasA,Po,N,FuseF,803,0,0,803,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1941,Unf,2,360,TA,TA,Y,0,0,244,0,0,0,NA,NA,NA,0,12,2007,WD,Normal,87000 +327,120,RL,32,10846,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Veenker,Norm,Norm,TwnhsE,1Story,8,5,1993,1993,Gable,CompShg,BrkFace,BrkFace,None,0,Gd,TA,PConc,Gd,TA,Gd,GLQ,1619,Unf,0,100,1719,GasA,Ex,Y,SBrkr,1719,0,0,1719,2,0,1,1,1,1,Gd,6,Typ,2,Gd,Attchd,1993,Fin,2,473,TA,TA,Y,122,30,0,0,0,0,NA,NA,NA,0,5,2008,Con,Normal,324000 +328,20,RL,80,11600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1960,1960,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,175,TA,TA,CBlock,TA,TA,No,Rec,565,Unf,0,818,1383,GasA,TA,Y,SBrkr,1383,0,0,1383,0,0,1,1,3,1,TA,7,Typ,0,NA,Attchd,1960,RFn,1,292,TA,TA,Y,0,45,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,145250 +329,75,RL,NA,11888,Pave,Pave,IR1,Bnk,AllPub,Inside,Gtl,BrkSide,PosN,Norm,1Fam,2.5Unf,6,6,1916,1994,Gable,CompShg,Wd Sdng,Wd Shng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,844,844,GasA,Gd,N,FuseA,1445,689,0,2134,0,0,2,0,5,1,Gd,10,Typ,0,NA,Detchd,1930,Unf,2,441,TA,TA,Y,0,60,268,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,214500 +330,70,RM,60,6402,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,IDOTRR,Norm,Norm,1Fam,2Story,5,5,1920,1950,Gable,CompShg,Wd Sdng,Wd Shng,None,0,TA,TA,PConc,TA,TA,Mn,Unf,0,Unf,0,596,596,GasA,TA,N,SBrkr,596,596,0,1192,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1920,Unf,1,189,Fa,Fa,N,0,0,137,0,0,0,NA,GdWo,NA,0,7,2009,WD,Normal,78000 +331,90,RL,NA,10624,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,Duplex,1Story,5,4,1964,1964,Gable,CompShg,HdBoard,HdBoard,BrkFace,84,TA,TA,CBlock,TA,TA,No,GLQ,40,Rec,264,1424,1728,GasA,TA,Y,SBrkr,1728,0,0,1728,0,1,2,0,6,2,TA,10,Typ,0,NA,Detchd,2002,Unf,1,352,TA,TA,Y,155,0,0,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,119000 +332,20,RL,70,8176,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1958,1992,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,846,Unf,0,210,1056,GasA,Fa,Y,SBrkr,1056,0,0,1056,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1958,RFn,1,308,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,139000 +333,20,RL,85,10655,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,296,Gd,TA,PConc,Gd,TA,No,GLQ,1124,NA,479,1603,3206,GasA,Ex,Y,SBrkr,1629,0,0,1629,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2003,RFn,3,880,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,284000 +334,120,RM,59,8198,Pave,NA,Reg,Lvl,AllPub,FR3,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,Stone,146,Gd,TA,PConc,Gd,TA,Av,GLQ,720,Unf,0,638,1358,GasA,Ex,Y,SBrkr,1358,0,0,1358,1,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2004,RFn,2,484,TA,TA,Y,192,30,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,207000 +335,60,RL,59,9042,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Gd,GLQ,828,Unf,0,115,943,GasA,Gd,Y,SBrkr,943,695,0,1638,1,0,2,1,3,1,TA,7,Typ,2,TA,Attchd,1998,Fin,2,472,TA,TA,Y,100,38,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,192000 +336,190,RL,NA,164660,Grvl,NA,IR1,HLS,AllPub,Corner,Sev,Timber,Norm,Norm,2fmCon,1.5Fin,5,6,1965,1965,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,Gd,ALQ,1249,BLQ,147,103,1499,GasA,Ex,Y,SBrkr,1619,167,0,1786,2,0,2,0,3,1,TA,7,Typ,2,Gd,Attchd,1965,Fin,2,529,TA,TA,Y,670,0,0,0,0,0,NA,NA,Shed,700,8,2008,WD,Normal,228950 +337,20,RL,86,14157,Pave,NA,IR1,HLS,AllPub,Corner,Gtl,StoneBr,Norm,Norm,1Fam,1Story,9,5,2005,2006,Hip,CompShg,VinylSd,VinylSd,Stone,200,Gd,TA,PConc,Ex,TA,Gd,GLQ,1249,Unf,0,673,1922,GasA,Ex,Y,SBrkr,1922,0,0,1922,1,0,2,0,3,1,Gd,8,Typ,1,Gd,Attchd,2005,Fin,3,676,TA,TA,Y,178,51,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,377426 +338,20,RL,70,9135,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2002,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,113,Gd,TA,PConc,Gd,TA,Av,GLQ,810,Unf,0,726,1536,GasA,Ex,Y,SBrkr,1536,0,0,1536,1,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2002,RFn,2,532,TA,TA,Y,192,74,0,0,0,0,NA,NA,NA,0,12,2008,WD,Normal,214000 +339,20,RL,91,14145,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NWAmes,Norm,Norm,1Fam,1Story,7,7,1984,1998,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,CBlock,Gd,TA,Mn,ALQ,213,Unf,0,995,1208,GasA,Ex,Y,SBrkr,1621,0,0,1621,1,0,2,0,3,1,Gd,8,Typ,0,NA,Attchd,1984,RFn,2,440,TA,TA,Y,108,45,0,0,0,0,NA,NA,Shed,400,5,2006,WD,Normal,202500 +340,20,RL,66,12400,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,6,7,1958,1998,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,176,TA,TA,CBlock,TA,Fa,No,Rec,585,Unf,0,630,1215,GasA,TA,Y,FuseA,1215,0,0,1215,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1958,Unf,1,297,TA,TA,Y,0,0,0,0,234,0,NA,NA,NA,0,6,2009,WD,Normal,155000 +341,60,RL,85,14191,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,2Story,8,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,967,967,GasA,Ex,Y,SBrkr,993,915,0,1908,0,0,2,1,4,1,Gd,9,Typ,0,NA,Attchd,2002,Fin,2,431,TA,TA,Y,135,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,202900 +342,20,RH,60,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Feedr,Norm,1Fam,1Story,4,4,1950,1950,Gable,CompShg,Wd Sdng,AsbShng,None,0,Fa,Fa,CBlock,TA,Fa,No,Unf,0,Unf,0,721,721,GasA,Gd,Y,SBrkr,841,0,0,841,0,0,1,0,2,1,TA,4,Typ,0,NA,CarPort,1950,Unf,1,294,TA,TA,N,250,0,24,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,82000 +343,90,RL,NA,8544,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,Duplex,1Story,3,4,1949,1950,Gable,CompShg,Stucco,Stucco,BrkFace,340,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,Wall,Fa,N,FuseA,1040,0,0,1040,0,0,2,0,2,2,TA,6,Typ,0,NA,Detchd,1949,Unf,2,400,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,87500 +344,120,RL,63,8849,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,9,5,2005,2005,Hip,CompShg,MetalSd,MetalSd,BrkFace,616,Ex,TA,PConc,Ex,TA,No,GLQ,28,Unf,0,1656,1684,GasA,Ex,Y,SBrkr,1684,0,0,1684,0,0,2,0,2,1,Ex,6,Typ,1,Ex,Attchd,2005,RFn,2,564,TA,TA,Y,495,72,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,266000 +345,160,RM,36,2592,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,5,3,1976,1976,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,Gd,TA,No,Rec,129,BLQ,232,175,536,GasA,TA,Y,SBrkr,536,576,0,1112,0,0,1,1,3,1,TA,4,Typ,0,NA,Attchd,1976,Unf,1,336,TA,TA,Y,182,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,85000 +346,50,RL,65,6435,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,RRAn,Norm,1Fam,1.5Fin,6,5,1939,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,972,972,GasA,Gd,Y,SBrkr,972,605,0,1577,0,0,1,0,3,1,Fa,6,Typ,1,Gd,Detchd,1939,Unf,1,312,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,140200 +347,20,RL,NA,12772,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,8,1960,1998,Hip,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,Mn,BLQ,498,Unf,0,460,958,GasA,TA,Y,SBrkr,958,0,0,958,0,0,1,0,2,1,TA,5,Typ,0,NA,Attchd,1960,RFn,1,301,TA,TA,Y,0,0,0,0,0,0,NA,NA,Gar2,15500,4,2007,WD,Normal,151500 +348,20,RL,NA,17600,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1960,1960,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,30,TA,TA,CBlock,TA,TA,No,BLQ,1270,Unf,0,208,1478,GasA,Ex,Y,FuseA,1478,0,0,1478,1,0,2,0,3,1,TA,6,Typ,2,Gd,Attchd,1960,Unf,2,498,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,12,2009,WD,Normal,157500 +349,160,RL,36,2448,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,Twnhs,2Story,7,5,2003,2004,Gable,CompShg,VinylSd,Wd Shng,Stone,106,Gd,TA,PConc,Gd,TA,No,GLQ,573,Unf,0,191,764,GasA,Ex,Y,SBrkr,764,862,0,1626,1,0,2,1,2,1,Gd,6,Typ,0,NA,BuiltIn,2003,RFn,2,474,TA,TA,Y,0,27,0,0,0,0,NA,NA,NA,0,10,2008,WD,Normal,154000 +350,60,RL,56,20431,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2005,2006,Hip,CompShg,CemntBd,CmentBd,BrkFace,870,Ex,TA,PConc,Ex,TA,No,GLQ,1410,Unf,0,438,1848,GasA,Ex,Y,SBrkr,1848,880,0,2728,1,0,2,1,4,1,Ex,10,Typ,2,Ex,Attchd,2006,Fin,3,706,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2006,New,Partial,437154 +351,120,RL,68,7820,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,9,5,2007,2007,Hip,CompShg,MetalSd,MetalSd,BrkFace,362,Ex,TA,PConc,Ex,TA,No,Unf,0,Unf,0,1869,1869,GasA,Ex,Y,SBrkr,1869,0,0,1869,0,0,2,0,2,1,Ex,6,Typ,1,Gd,Attchd,2007,RFn,2,617,TA,TA,Y,210,54,0,0,0,0,NA,NA,NA,0,12,2007,New,Partial,318061 +352,120,RL,NA,5271,Pave,NA,IR1,Low,AllPub,Inside,Mod,ClearCr,Norm,Norm,1Fam,1Story,7,5,1986,1986,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,Gd,TA,Gd,GLQ,1082,Unf,0,371,1453,GasA,Gd,Y,SBrkr,1453,0,0,1453,1,0,1,1,2,1,Gd,6,Typ,1,TA,Attchd,1986,RFn,2,445,TA,TA,Y,0,80,0,0,184,0,NA,NA,NA,0,12,2006,WD,Abnorml,190000 +353,50,RL,60,9084,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Artery,Norm,1Fam,1.5Fin,5,6,1941,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,Fa,Mn,LwQ,236,Rec,380,0,616,GasA,TA,N,SBrkr,616,495,0,1111,0,1,1,0,3,1,TA,5,Typ,0,NA,Detchd,1941,Unf,1,200,TA,Fa,Y,48,0,0,0,0,0,NA,NA,NA,0,3,2008,ConLw,Normal,95000 +354,30,RM,60,8520,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,6,8,1928,2003,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,BrkTil,TA,TA,No,Unf,0,Unf,0,624,624,GasA,Gd,Y,SBrkr,720,0,0,720,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,2005,Unf,2,484,TA,TA,Y,106,0,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,105900 +355,50,RL,60,8400,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,6,5,1940,2000,Gable,CompShg,Wd Sdng,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,LwQ,388,Unf,0,552,940,GasA,Ex,Y,SBrkr,1192,403,0,1595,0,0,1,0,2,1,TA,6,Typ,2,Gd,Attchd,1940,Unf,1,240,TA,TA,Y,0,0,108,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,140000 +356,20,RL,105,11249,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,6,5,1995,1995,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,Gd,PConc,Gd,Gd,No,ALQ,334,BLQ,544,322,1200,GasA,Ex,Y,SBrkr,1200,0,0,1200,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1995,RFn,2,521,TA,TA,Y,0,26,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,177500 +357,20,RL,NA,9248,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,1Story,6,6,1992,1992,Gable,CompShg,HdBoard,HdBoard,BrkFace,106,TA,TA,PConc,Gd,TA,No,GLQ,560,Unf,0,598,1158,GasA,Gd,Y,SBrkr,1167,0,0,1167,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1992,RFn,2,400,TA,TA,Y,120,26,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,173000 +358,120,RM,44,4224,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,1Story,5,5,1976,1976,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,PConc,Gd,TA,No,ALQ,874,Unf,0,268,1142,GasA,TA,Y,SBrkr,1142,0,0,1142,1,0,1,1,3,1,TA,6,Typ,1,Po,Attchd,1976,Fin,2,528,TA,TA,Y,536,90,0,0,0,0,NA,MnPrv,NA,0,8,2007,WD,Normal,134000 +359,80,RL,92,6930,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,SLvl,5,4,1958,1958,Hip,CompShg,Wd Sdng,ImStucc,BrkFace,120,TA,TA,CBlock,TA,TA,Av,BLQ,300,Rec,294,468,1062,GasA,Ex,Y,FuseF,1352,0,0,1352,0,1,1,0,3,1,Gd,6,Min2,0,NA,BuiltIn,1958,Unf,1,288,TA,TA,Y,168,0,294,0,0,0,NA,NA,NA,0,7,2006,WD,Abnorml,130000 +360,60,RL,78,12011,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,530,Gd,TA,PConc,Gd,TA,Av,GLQ,956,Unf,0,130,1086,GasA,Ex,Y,SBrkr,1086,838,0,1924,1,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1998,RFn,2,592,TA,TA,Y,208,75,0,0,374,0,NA,NA,NA,0,6,2006,WD,Normal,280000 +361,85,RL,NA,7540,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,SFoyer,6,6,1978,1978,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,Gd,TA,Av,GLQ,773,Unf,0,115,888,GasA,Ex,Y,SBrkr,912,0,0,912,1,0,1,0,2,1,TA,5,Typ,1,TA,Attchd,1978,RFn,2,470,TA,TA,Y,0,0,0,0,192,0,NA,MnPrv,NA,0,6,2007,WD,Normal,156000 +362,50,RL,NA,9144,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,5,5,1940,1982,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,399,Unf,0,484,883,GasA,Gd,Y,SBrkr,988,517,0,1505,1,0,1,0,3,1,TA,8,Typ,0,NA,Detchd,1940,Unf,1,240,TA,TA,N,0,0,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,145000 +363,85,RL,64,7301,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,SFoyer,7,5,2003,2003,Gable,CompShg,HdBoard,HdBoard,BrkFace,500,Gd,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,Ex,Y,SBrkr,495,1427,0,1922,0,0,3,0,4,1,Gd,7,Typ,1,Ex,BuiltIn,2003,RFn,2,672,TA,TA,Y,0,0,177,0,0,0,NA,NA,NA,0,7,2009,ConLD,Normal,198500 +364,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,8,1972,2007,Gable,CompShg,HdBoard,HdBoard,BrkFace,510,TA,TA,CBlock,TA,TA,No,ALQ,162,Unf,0,321,483,GasA,Gd,Y,SBrkr,483,504,0,987,0,0,1,1,2,1,Gd,5,Typ,0,NA,Detchd,1972,Unf,1,264,TA,TA,Y,250,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,118000 +365,60,RL,NA,18800,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,NWAmes,Norm,Norm,1Fam,2Story,6,5,1976,1976,Gable,CompShg,HdBoard,HdBoard,BrkFace,120,TA,TA,PConc,Gd,TA,Mn,GLQ,712,Unf,0,84,796,GasA,TA,Y,SBrkr,790,784,0,1574,1,0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1976,Fin,2,566,TA,TA,Y,306,111,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,190000 +366,70,RM,59,10690,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,2Story,5,7,1920,1997,Hip,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,Fa,No,Rec,456,Unf,0,216,672,GasA,Gd,Y,FuseA,672,672,0,1344,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1964,Unf,1,468,TA,Fa,Y,0,128,218,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,147000 +367,20,RL,NA,9500,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1963,1963,Gable,CompShg,Plywood,Plywood,BrkFace,247,TA,TA,CBlock,Gd,TA,No,BLQ,609,Unf,0,785,1394,GasA,Gd,Y,SBrkr,1394,0,0,1394,1,0,1,1,3,1,TA,6,Typ,2,Gd,Attchd,1963,RFn,2,514,TA,TA,Y,0,76,0,0,185,0,NA,NA,NA,0,7,2009,WD,Normal,159000 +368,80,RL,101,9150,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,SLvl,6,5,1962,1962,Gable,Tar&Grv,Plywood,Plywood,BrkFace,305,TA,TA,CBlock,Gd,TA,Gd,GLQ,371,Unf,0,728,1099,GasA,Gd,Y,SBrkr,1431,0,0,1431,0,1,1,0,3,1,TA,6,Typ,1,Gd,Basment,1962,RFn,1,296,TA,TA,Y,64,110,0,0,0,0,NA,NA,NA,0,12,2008,WD,Normal,165000 +369,20,RL,78,7800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1954,1954,Gable,CompShg,HdBoard,HdBoard,BrkFace,200,TA,TA,PConc,TA,TA,No,LwQ,540,Unf,0,728,1268,GasA,Gd,Y,SBrkr,1268,0,0,1268,0,0,1,0,2,1,TA,7,Typ,1,Gd,Attchd,1954,Fin,1,244,TA,TA,Y,0,98,0,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,132000 +370,20,RL,NA,9830,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1959,2006,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,CBlock,TA,TA,No,ALQ,72,Rec,258,733,1063,GasA,Ex,Y,SBrkr,1287,0,0,1287,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1997,Fin,2,576,TA,TA,Y,364,17,0,0,182,0,NA,NA,NA,0,3,2010,WD,Normal,162000 +371,60,RL,NA,8121,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,953,953,GasA,Ex,Y,SBrkr,953,711,0,1664,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,2000,RFn,2,460,TA,TA,Y,100,40,0,0,0,0,NA,NA,NA,0,1,2006,WD,Normal,172400 +372,50,RL,80,17120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,ClearCr,Feedr,Norm,1Fam,1.5Fin,4,4,1959,1959,Gable,CompShg,WdShing,Plywood,None,0,TA,TA,CBlock,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,1120,468,0,1588,0,0,2,0,4,1,TA,7,Min2,1,Gd,Detchd,1991,Fin,2,680,TA,TA,N,0,59,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,134432 +373,120,RL,50,7175,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,TwnhsE,1Story,6,5,1984,1984,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,623,LwQ,121,0,744,GasA,TA,Y,SBrkr,752,0,0,752,1,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1984,Unf,1,264,TA,TA,Y,353,0,0,0,90,0,NA,MnPrv,NA,0,2,2010,WD,Normal,125000 +374,20,RL,79,10634,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1953,1953,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,TA,TA,No,BLQ,428,LwQ,180,0,608,GasA,TA,Y,SBrkr,1319,0,0,1319,1,0,1,0,3,1,TA,5,Min2,0,NA,Attchd,1953,Unf,1,270,TA,TA,Y,66,0,0,0,0,0,NA,GdWo,NA,0,11,2009,WD,Normal,123000 +375,60,RL,65,8200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,847,847,GasA,Ex,Y,SBrkr,847,1081,0,1928,0,0,2,1,4,1,Gd,8,Typ,1,Gd,BuiltIn,2003,Fin,2,434,TA,TA,Y,100,48,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,219500 +376,30,RL,NA,10020,Pave,NA,IR1,Low,AllPub,Inside,Sev,Edwards,Norm,Norm,1Fam,1Story,1,1,1922,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Fa,Fa,BrkTil,Fa,Po,Gd,BLQ,350,Unf,0,333,683,GasA,Gd,N,FuseA,904,0,0,904,1,0,0,1,1,1,Fa,4,Maj1,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,61000 +377,85,RL,57,8846,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,SFoyer,5,5,1996,1996,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,298,Unf,0,572,870,GasA,Ex,Y,SBrkr,914,0,0,914,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1998,Unf,2,576,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,148000 +378,60,FV,102,11143,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,1Fam,2Story,8,5,2004,2005,Gable,CompShg,CemntBd,CmentBd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1580,1580,GasA,Ex,Y,SBrkr,1580,886,0,2466,0,0,3,0,4,1,Gd,8,Typ,1,Gd,Attchd,2004,RFn,2,610,TA,TA,Y,159,214,0,0,0,0,NA,NA,NA,0,12,2007,WD,Normal,340000 +379,20,RL,88,11394,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,StoneBr,Norm,Norm,1Fam,1Story,9,2,2010,2010,Hip,CompShg,VinylSd,VinylSd,Stone,350,Gd,TA,PConc,Ex,TA,Av,GLQ,1445,Unf,0,411,1856,GasA,Ex,Y,SBrkr,1856,0,0,1856,1,0,1,1,1,1,Ex,8,Typ,1,Ex,Attchd,2010,Fin,3,834,TA,TA,Y,113,0,0,0,0,0,NA,NA,NA,0,6,2010,New,Partial,394432 +380,60,RL,60,8123,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,RRAn,Norm,1Fam,2Story,6,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,16,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,982,982,GasA,Ex,Y,SBrkr,1007,793,0,1800,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,2000,Fin,2,463,TA,TA,Y,100,63,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,179000 +381,50,RL,50,5000,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,5,6,1924,1950,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,LwQ,218,Unf,0,808,1026,GasA,TA,Y,SBrkr,1026,665,0,1691,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Detchd,1924,Unf,1,308,TA,TA,Y,0,0,242,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,127000 +382,20,FV,60,7200,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,Gd,No,Unf,0,Unf,0,1293,1293,GasA,Ex,Y,SBrkr,1301,0,0,1301,1,0,2,0,2,1,Gd,5,Typ,1,Gd,Attchd,2006,RFn,2,572,TA,TA,Y,216,121,0,0,0,0,NA,NA,NA,0,8,2006,New,Partial,187750 +383,60,RL,79,9245,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,939,939,GasA,Ex,Y,SBrkr,939,858,0,1797,0,0,2,1,3,1,Gd,8,Typ,0,NA,Attchd,2006,RFn,2,639,TA,TA,Y,144,53,0,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,213500 +384,45,RH,60,9000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,SawyerW,Norm,Norm,1Fam,1.5Unf,6,3,1928,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Fa,Fa,No,Unf,0,Unf,0,784,784,GasA,TA,N,FuseA,784,0,0,784,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1950,Unf,2,360,Fa,Fa,N,0,0,91,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,76000 +385,60,RL,NA,53107,Pave,NA,IR2,Low,AllPub,Corner,Mod,ClearCr,Feedr,Norm,1Fam,2Story,6,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,985,Unf,0,595,1580,GasA,Ex,Y,SBrkr,1079,874,0,1953,1,0,2,1,3,1,Gd,9,Typ,2,Fa,Attchd,1992,Fin,2,501,TA,TA,Y,216,231,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,240000 +386,120,RL,43,3182,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,8,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,16,Gd,TA,PConc,Gd,TA,No,GLQ,24,Unf,0,1232,1256,GasA,Ex,Y,SBrkr,1269,0,0,1269,0,0,2,0,2,1,Gd,6,Typ,1,TA,Attchd,2004,Fin,2,430,TA,TA,Y,146,20,0,0,144,0,NA,NA,NA,0,4,2010,WD,Normal,192000 +387,50,RL,58,8410,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Edwards,Feedr,Norm,1Fam,1.5Fin,5,3,1910,1996,Gambrel,CompShg,Wd Sdng,VinylSd,None,0,TA,Fa,PConc,TA,TA,No,Unf,0,Unf,0,658,658,GasA,TA,Y,SBrkr,658,526,0,1184,0,0,1,0,5,1,TA,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,151,0,0,0,0,NA,NA,NA,0,5,2006,WD,AdjLand,81000 +388,80,RL,72,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,SLvl,6,6,1976,1976,Hip,CompShg,MetalSd,MetalSd,BrkFace,255,TA,TA,CBlock,TA,TA,Av,ALQ,631,Unf,0,410,1041,GasA,Ex,Y,SBrkr,1125,0,0,1125,1,0,1,0,3,1,TA,6,Typ,1,Fa,Detchd,1977,Unf,1,352,TA,TA,Y,296,0,0,0,0,0,NA,GdWo,NA,0,10,2009,WD,Abnorml,125000 +389,20,RL,93,9382,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,1999,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,125,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1468,1468,GasA,Ex,Y,SBrkr,1479,0,0,1479,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1999,RFn,2,577,TA,TA,Y,120,25,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,191000 +390,60,RL,96,12474,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,10,5,2007,2008,Gable,CompShg,VinylSd,VinylSd,Stone,272,Ex,TA,PConc,Ex,TA,Av,GLQ,1280,Unf,0,402,1682,GasA,Ex,Y,SBrkr,1742,590,0,2332,1,0,2,1,3,1,Ex,9,Typ,1,Ex,BuiltIn,2008,Fin,3,846,TA,TA,Y,196,134,0,0,0,0,NA,NA,NA,0,8,2008,New,Partial,426000 +391,50,RL,50,8405,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,5,8,1900,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,Gd,No,Rec,241,BLQ,391,229,861,GasA,Ex,Y,SBrkr,961,406,0,1367,1,0,1,0,4,1,TA,7,Typ,0,NA,Detchd,1978,Unf,1,384,TA,TA,Y,0,130,112,0,0,0,NA,MnPrv,NA,0,4,2008,WD,Normal,119000 +392,60,RL,71,12209,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,2Story,6,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Ex,TA,No,ALQ,690,Unf,0,114,804,GasA,Ex,Y,SBrkr,804,1157,0,1961,1,0,2,1,3,1,Gd,7,Typ,1,TA,BuiltIn,2001,Fin,2,560,TA,TA,Y,125,192,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,215000 +393,20,RL,NA,8339,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1959,1959,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,882,0,0,882,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1959,RFn,1,294,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,Shed,1200,7,2007,WD,Normal,106500 +394,30,RL,NA,7446,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,BrkSide,Feedr,Norm,1Fam,1Story,4,5,1941,1950,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,Rec,266,Unf,0,522,788,GasA,TA,Y,FuseA,788,0,0,788,0,0,1,0,2,1,TA,4,Typ,2,TA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,4,2006,WD,Abnorml,100000 +395,50,RL,60,10134,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,6,1940,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,735,735,GasA,Gd,Y,FuseA,735,299,0,1034,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1940,Unf,1,240,TA,TA,Y,0,39,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,109000 +396,20,RL,68,9571,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,6,1956,1956,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,Av,BLQ,739,Unf,0,405,1144,GasA,TA,Y,SBrkr,1144,0,0,1144,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1956,Unf,1,596,TA,TA,Y,44,0,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,129000 +397,20,RL,60,7200,Pave,NA,Reg,Low,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,5,1972,1972,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,Av,Rec,777,Unf,0,117,894,GasA,TA,Y,SBrkr,894,0,0,894,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1985,RFn,2,600,TA,TA,Y,215,0,0,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,123000 +398,60,RL,69,7590,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,PosN,Norm,1Fam,2Story,5,5,1962,1962,Gable,CompShg,VinylSd,VinylSd,BrkFace,288,TA,TA,CBlock,TA,TA,No,ALQ,540,Unf,0,324,864,GasA,TA,Y,SBrkr,876,936,0,1812,0,0,2,0,4,1,TA,8,Typ,1,TA,Attchd,1962,RFn,1,264,TA,TA,Y,0,168,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,169500 +399,30,RM,60,8967,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,5,2,1920,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Fa,BrkTil,Fa,Po,No,Unf,0,Unf,0,961,961,GasA,Gd,Y,Mix,1077,0,0,1077,0,0,1,0,2,1,TA,6,Maj2,0,NA,Detchd,1920,Unf,1,338,Po,Po,N,0,0,0,0,0,0,NA,NA,NA,0,11,2007,WD,Abnorml,67000 +400,60,FV,65,8125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2006,2007,Gable,CompShg,CemntBd,CmentBd,Stone,100,Gd,TA,PConc,Gd,TA,No,GLQ,812,Unf,0,280,1092,GasA,Ex,Y,SBrkr,1112,438,0,1550,1,0,2,0,2,1,Gd,7,Typ,0,NA,Attchd,2007,Fin,2,438,TA,TA,Y,0,168,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,241000 +401,120,RL,38,14963,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,Veenker,Norm,Norm,TwnhsE,1Story,8,5,1996,1996,Gable,CompShg,BrkFace,BrkFace,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,786,Unf,0,474,1260,GasA,Ex,Y,SBrkr,1288,0,0,1288,1,0,1,1,1,1,Ex,4,Typ,2,Gd,Attchd,1996,Fin,2,500,TA,TA,Y,120,30,0,0,224,0,NA,NA,NA,0,12,2008,WD,Normal,245500 +402,20,RL,65,8767,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,24,Unf,0,1286,1310,GasA,Ex,Y,SBrkr,1310,0,0,1310,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,2005,Fin,2,400,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,164990 +403,30,RL,60,10200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,8,1940,1997,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,672,672,GasA,Ex,Y,SBrkr,672,0,0,672,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1940,Unf,1,240,TA,TA,N,168,0,0,0,0,0,NA,GdPrv,NA,0,8,2008,WD,Normal,108000 +404,60,RL,93,12090,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1998,1998,Hip,CompShg,VinylSd,VinylSd,BrkFace,650,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1141,1141,GasA,Gd,Y,SBrkr,1165,1098,0,2263,0,0,2,1,4,1,Gd,10,Typ,1,TA,BuiltIn,1998,Fin,2,420,TA,TA,Y,144,123,0,0,0,0,NA,NA,NA,0,7,2006,WD,Abnorml,258000 +405,60,RL,NA,10364,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1995,1996,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,806,806,GasA,Gd,Y,SBrkr,806,766,0,1572,0,0,2,1,3,1,TA,7,Typ,1,TA,BuiltIn,1995,Fin,2,373,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,168000 +406,20,RL,NA,9991,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,4,4,1976,1993,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,BLQ,1116,Unf,0,165,1281,GasA,Ex,Y,SBrkr,1620,0,0,1620,1,0,2,0,3,1,TA,8,Min1,1,TA,Attchd,1993,Unf,2,490,TA,TA,Y,120,78,0,0,0,0,NA,GdWo,NA,0,6,2009,WD,Normal,150000 +407,50,RL,51,10480,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,6,5,1936,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,1064,1064,GasA,Ex,Y,FuseA,1166,0,473,1639,0,0,1,0,3,1,TA,6,Maj2,0,NA,Detchd,1936,Unf,1,240,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,115000 +408,70,RL,63,15576,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,6,7,1915,1976,Gable,CompShg,Wd Sdng,Plywood,None,0,TA,TA,BrkTil,Gd,TA,No,Unf,0,Unf,0,840,840,GasA,Ex,Y,SBrkr,840,840,0,1680,0,0,2,0,4,1,TA,8,Typ,0,NA,Attchd,1960,Unf,1,308,TA,TA,Y,0,0,160,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,177000 +409,60,RL,109,14154,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,350,Gd,TA,PConc,Ex,Gd,No,Unf,0,Unf,0,1063,1063,GasA,Ex,Y,SBrkr,1071,1101,0,2172,0,0,2,1,3,1,Gd,9,Typ,1,Gd,Attchd,2006,RFn,3,947,TA,TA,Y,192,62,0,0,0,0,NA,NA,NA,0,8,2007,New,Partial,280000 +410,60,FV,85,10800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,8,5,2007,2008,Gable,CompShg,VinylSd,VinylSd,Stone,100,Gd,TA,PConc,Ex,TA,No,GLQ,789,Unf,0,245,1034,GasA,Ex,Y,SBrkr,1050,1028,0,2078,1,0,2,1,3,1,Ex,8,Typ,1,Gd,Attchd,2008,Fin,3,836,TA,TA,Y,0,102,0,0,0,0,NA,NA,NA,0,4,2008,New,Partial,339750 +411,20,RL,68,9571,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,3,1958,1958,Gable,CompShg,BrkComm,Brk Cmn,None,0,TA,Fa,CBlock,TA,Fa,No,Unf,0,Unf,0,1276,1276,GasA,TA,Y,FuseA,1276,0,0,1276,0,0,1,0,3,1,TA,5,Mod,0,NA,Attchd,1958,Unf,1,350,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,COD,Abnorml,60000 +412,190,RL,100,34650,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,Gilbert,Norm,Norm,2fmCon,1Story,5,5,1955,1955,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,Mn,Rec,1056,Unf,0,0,1056,GasA,TA,N,SBrkr,1056,0,0,1056,1,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1955,Fin,2,572,TA,TA,Y,264,0,0,0,0,0,NA,NA,NA,0,1,2006,WD,Normal,145000 +413,20,FV,NA,4403,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2009,2009,Gable,CompShg,MetalSd,MetalSd,Stone,432,Ex,TA,PConc,Ex,TA,Av,GLQ,578,Unf,0,892,1470,GasA,Ex,Y,SBrkr,1478,0,0,1478,1,0,2,1,2,1,Gd,7,Typ,1,Gd,Attchd,2009,Fin,2,484,TA,TA,Y,0,144,0,0,0,0,NA,NA,NA,0,6,2010,New,Partial,222000 +414,30,RM,56,8960,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1Story,5,6,1927,1950,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1008,1008,GasA,Gd,Y,FuseA,1028,0,0,1028,0,0,1,0,2,1,TA,5,Typ,1,Gd,Detchd,1927,Unf,2,360,TA,TA,Y,0,0,130,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,115000 +415,60,RL,59,11228,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1993,1993,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,BLQ,50,GLQ,531,499,1080,GasA,Ex,Y,SBrkr,1080,1017,0,2097,0,1,2,1,3,1,Gd,9,Typ,1,TA,Attchd,1993,Unf,3,678,TA,TA,Y,196,187,0,0,0,0,NA,NA,NA,0,12,2008,WD,Normal,228000 +416,20,RL,73,8899,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,1Story,7,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,24,Unf,0,1316,1340,GasA,Ex,Y,SBrkr,1340,0,0,1340,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2007,Fin,2,396,TA,TA,Y,100,30,0,0,0,0,NA,NA,NA,0,8,2007,New,Partial,181134 +417,60,RL,74,7844,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,2Story,6,7,1978,1978,Hip,CompShg,HdBoard,HdBoard,BrkFace,203,TA,TA,CBlock,TA,TA,No,ALQ,209,Unf,0,463,672,GasA,TA,Y,SBrkr,672,728,0,1400,0,0,1,1,3,1,TA,6,Typ,1,TA,Attchd,1978,Fin,2,440,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,149500 +418,70,RL,86,22420,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Crawfor,Feedr,Norm,1Fam,2Story,6,6,1918,1950,Hip,CompShg,Wd Sdng,Stucco,None,0,TA,TA,BrkTil,Gd,TA,No,BLQ,1128,Unf,0,242,1370,GasW,TA,N,FuseA,1370,1254,0,2624,1,0,2,1,4,1,TA,10,Typ,1,Gd,Detchd,1918,Unf,3,864,TA,TA,N,0,0,0,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,239000 +419,50,RL,60,8160,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,5,6,1940,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,ALQ,312,Unf,0,444,756,GasA,Fa,N,FuseF,756,378,0,1134,1,0,1,1,3,1,TA,7,Typ,0,NA,Detchd,1940,Unf,1,240,TA,TA,P,0,0,0,0,0,0,NA,NA,NA,0,4,2007,WD,AdjLand,126000 +420,20,RL,65,8450,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1968,1968,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,775,Unf,0,281,1056,GasA,Ex,Y,SBrkr,1056,0,0,1056,1,0,1,0,3,1,TA,6,Typ,1,Fa,Attchd,1968,Unf,1,304,TA,TA,Y,0,85,184,0,0,0,NA,NA,NA,0,7,2010,WD,Normal,142000 +421,90,RM,78,7060,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,Duplex,SFoyer,7,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,200,TA,Gd,PConc,Gd,Gd,Gd,GLQ,1309,Unf,0,35,1344,GasA,Ex,Y,SBrkr,1344,0,0,1344,2,0,2,0,2,2,TA,8,Typ,0,NA,Attchd,1997,Fin,4,784,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2008,WD,Alloca,206300 +422,20,RL,NA,16635,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,7,1977,2000,Gable,CompShg,CemntBd,CmentBd,Stone,126,Gd,TA,CBlock,Gd,TA,No,ALQ,1246,Unf,0,356,1602,GasA,Gd,Y,SBrkr,1602,0,0,1602,0,1,2,0,3,1,Gd,8,Typ,1,TA,Attchd,1977,Fin,2,529,TA,TA,Y,240,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,215000 +423,20,RL,100,21750,Pave,NA,Reg,HLS,AllPub,Inside,Mod,Mitchel,Artery,Norm,1Fam,1Story,5,5,1954,1954,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,988,988,GasA,Ex,Y,FuseA,988,0,0,988,0,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1954,RFn,2,520,TA,TA,N,0,0,0,0,0,0,NA,NA,NA,0,2,2008,WD,Normal,113000 +424,60,RL,80,9200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,473,Gd,TA,PConc,Gd,TA,No,GLQ,986,Unf,0,484,1470,GasA,Gd,Y,SBrkr,1470,1160,0,2630,1,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1998,Fin,3,696,TA,TA,Y,0,66,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,315000 +425,20,RL,72,9000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1956,1956,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,74,TA,TA,CBlock,Gd,TA,No,LwQ,616,Unf,0,580,1196,GasA,Gd,Y,FuseA,1196,0,0,1196,1,0,1,0,2,1,TA,6,Typ,1,Gd,Attchd,1956,RFn,1,297,TA,TA,Y,0,44,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,139000 +426,60,RM,60,3378,Pave,Grvl,Reg,HLS,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,7,8,1946,1992,Gable,CompShg,HdBoard,HdBoard,None,0,TA,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,651,651,GasA,Gd,Y,SBrkr,707,682,0,1389,0,0,1,1,3,1,TA,6,Typ,2,Gd,Detchd,1947,Unf,1,240,TA,TA,P,0,0,126,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,135000 +427,80,RL,NA,12800,Pave,NA,Reg,Low,AllPub,Inside,Mod,SawyerW,Norm,Norm,1Fam,SLvl,7,5,1989,1989,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,145,Gd,TA,PConc,Gd,TA,Gd,GLQ,1518,Unf,0,0,1518,GasA,Gd,Y,SBrkr,1644,0,0,1644,1,1,2,0,2,1,Gd,5,Typ,1,TA,Attchd,1989,Fin,2,569,TA,TA,Y,80,0,0,0,396,0,NA,NA,NA,0,8,2009,WD,Normal,275000 +428,20,RL,77,8593,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1957,1957,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,288,Unf,0,619,907,GasA,Ex,Y,SBrkr,907,0,0,907,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1964,Unf,1,352,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,109008 +429,20,RL,64,6762,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,BrkFace,108,Gd,TA,PConc,Gd,TA,No,GLQ,664,Unf,0,544,1208,GasA,Ex,Y,SBrkr,1208,0,0,1208,1,0,2,0,2,1,Gd,6,Typ,0,NA,Attchd,2007,RFn,2,628,TA,TA,Y,105,54,0,0,0,0,NA,NA,NA,0,9,2007,New,Partial,195400 +430,20,RL,130,11457,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,1Story,6,5,1988,1988,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,Mn,GLQ,1005,Unf,0,387,1392,GasA,TA,Y,SBrkr,1412,0,0,1412,1,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,1988,Unf,2,576,TA,TA,Y,0,0,169,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,175000 +431,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,5,1971,1971,Gable,CompShg,HdBoard,HdBoard,BrkFace,232,TA,TA,CBlock,TA,TA,No,ALQ,387,Unf,0,96,483,GasA,TA,Y,SBrkr,483,504,0,987,0,0,1,1,2,1,TA,4,Typ,0,NA,Detchd,1971,Unf,1,264,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2008,COD,Abnorml,85400 +432,50,RM,60,5586,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,OldTown,Feedr,Norm,1Fam,1.5Fin,6,7,1920,1998,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,901,901,GasA,Gd,Y,SBrkr,1088,110,0,1198,0,0,1,0,4,1,TA,7,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,98,0,0,0,0,NA,MnPrv,NA,0,9,2008,ConLD,Abnorml,79900 +433,160,RM,24,1920,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,TwnhsE,2Story,5,5,1971,1971,Gable,CompShg,HdBoard,HdBoard,BrkFace,376,TA,TA,CBlock,TA,TA,No,ALQ,471,Unf,0,294,765,GasA,Ex,Y,SBrkr,765,600,0,1365,1,0,1,1,2,1,TA,6,Min1,0,NA,Detchd,1971,Unf,2,440,TA,TA,Y,240,36,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,122500 +434,60,RL,100,10839,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,926,926,GasA,Ex,Y,SBrkr,926,678,0,1604,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1997,Fin,2,470,TA,TA,Y,0,36,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,181000 +435,180,RM,21,1890,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,SFoyer,4,7,1972,1972,Gable,CompShg,CemntBd,CmentBd,None,0,TA,Gd,CBlock,Gd,TA,Av,ALQ,495,Unf,0,135,630,GasA,Gd,Y,SBrkr,630,0,0,630,1,0,1,0,1,1,TA,3,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,88,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,81000 +436,60,RL,43,10667,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,CollgCr,PosN,Norm,1Fam,2Story,7,6,1996,1996,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,385,ALQ,344,70,799,GasA,Ex,Y,SBrkr,827,834,0,1661,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,1996,RFn,2,550,TA,TA,Y,158,61,0,0,0,0,NA,NA,NA,0,4,2009,ConLw,Normal,212000 +437,50,RM,40,4400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,6,8,1920,1950,Gable,CompShg,Stucco,Stucco,None,0,TA,TA,BrkTil,Fa,TA,No,Unf,0,Unf,0,648,648,GasA,TA,Y,FuseA,734,384,0,1118,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1990,Unf,2,440,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,116000 +438,45,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Unf,6,7,1926,2004,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,PConc,TA,TA,No,Unf,0,Unf,0,884,884,GasA,Gd,Y,SBrkr,904,0,0,904,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1926,Unf,1,180,TA,TA,Y,0,0,105,0,0,0,NA,NA,NA,0,1,2009,WD,Normal,119000 +439,30,RL,40,4280,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,5,6,1913,2002,Gable,CompShg,WdShing,Stucco,None,0,TA,TA,PConc,TA,TA,No,LwQ,365,Unf,0,75,440,GasA,TA,N,SBrkr,694,0,0,694,0,0,1,0,2,1,Gd,4,Typ,1,Gd,Detchd,1990,Unf,1,352,Gd,TA,P,0,0,34,0,0,0,NA,MnPrv,NA,0,3,2007,WD,Normal,90350 +440,50,RL,67,12354,Pave,Grvl,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,6,8,1920,2000,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,Fa,Mn,Unf,0,Unf,0,684,684,GasA,Gd,Y,SBrkr,684,512,0,1196,0,0,1,0,3,1,Gd,7,Typ,0,NA,Detchd,2005,Unf,2,528,TA,TA,Y,0,46,0,0,0,0,NA,GdPrv,Shed,800,8,2009,ConLI,Normal,110000 +441,20,RL,105,15431,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,10,5,2008,2008,Hip,CompShg,VinylSd,VinylSd,Stone,200,Ex,TA,PConc,Ex,TA,Gd,GLQ,1767,ALQ,539,788,3094,GasA,Ex,Y,SBrkr,2402,0,0,2402,1,0,2,0,2,1,Ex,10,Typ,2,Gd,Attchd,2008,Fin,3,672,TA,TA,Y,0,72,0,0,170,0,NA,NA,NA,0,4,2009,WD,Normal,555000 +442,90,RL,92,12108,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,Duplex,1Story,4,4,1955,1955,Gable,CompShg,VinylSd,VinylSd,BrkFace,270,TA,TA,CBlock,TA,TA,No,ALQ,133,Unf,0,1307,1440,GasA,TA,N,FuseF,1440,0,0,1440,0,0,2,0,4,2,Fa,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,118000 +443,50,RM,52,6240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,5,7,1930,1992,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,1078,1078,GasA,TA,Y,SBrkr,1128,445,0,1573,0,0,2,0,3,1,TA,8,Typ,1,Gd,Detchd,1930,Unf,2,360,TA,TA,P,0,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,162900 +444,120,RL,53,3922,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2006,2007,Gable,CompShg,WdShing,Wd Shng,BrkFace,72,Gd,TA,PConc,Ex,TA,Av,Unf,0,Unf,0,1258,1258,GasA,Ex,Y,SBrkr,1258,0,0,1258,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2007,Fin,3,648,TA,TA,Y,144,16,0,0,0,0,NA,NA,NA,0,6,2007,New,Partial,172500 +445,60,RL,70,8750,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,1994,1995,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,Gd,PConc,Gd,TA,No,GLQ,642,Unf,0,273,915,GasA,Ex,Y,SBrkr,933,975,0,1908,1,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1994,Unf,2,493,TA,TA,Y,144,133,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,210000 +446,20,RL,73,9855,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,6,5,1956,1956,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1436,1436,GasA,Fa,Y,SBrkr,1689,0,0,1689,0,0,1,0,3,1,TA,7,Typ,1,Gd,Attchd,1956,Unf,2,480,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,11,2009,COD,Normal,127500 +447,20,RL,137,16492,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,PosA,Norm,1Fam,1Story,6,6,1966,2002,Gable,CompShg,BrkFace,Plywood,None,0,Gd,TA,CBlock,TA,TA,No,ALQ,247,Rec,713,557,1517,GasA,Ex,Y,SBrkr,1888,0,0,1888,0,0,2,1,2,1,Gd,6,Mod,1,Gd,Attchd,1966,Fin,2,578,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,190000 +448,60,RL,NA,11214,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,1998,1999,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,930,930,GasA,Gd,Y,SBrkr,956,930,0,1886,0,0,2,1,4,1,Gd,10,Typ,1,TA,Attchd,1998,Fin,2,431,TA,TA,Y,89,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,199900 +449,50,RM,50,8600,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,6,6,1937,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,780,780,GasA,TA,Y,SBrkr,780,596,0,1376,0,0,2,0,3,1,TA,7,Typ,1,Gd,Detchd,1937,Unf,1,198,TA,TA,N,0,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,119500 +450,50,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,3,7,1948,2002,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,ALQ,331,Unf,0,318,649,GasA,Ex,Y,SBrkr,679,504,0,1183,0,0,1,1,2,1,TA,6,Typ,0,NA,Detchd,1981,Unf,1,308,TA,TA,Y,0,176,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,120000 +451,30,RM,70,5684,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,6,8,1930,2005,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,813,813,GasA,Ex,Y,FuseA,813,0,0,813,0,0,1,0,2,1,Gd,5,Typ,0,NA,Detchd,1932,Unf,1,270,Fa,Fa,N,0,113,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,110000 +452,20,RL,62,70761,Pave,NA,IR1,Low,AllPub,Inside,Mod,ClearCr,Norm,Norm,1Fam,1Story,7,5,1975,1975,Gable,WdShngl,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,Gd,ALQ,655,Unf,0,878,1533,GasA,TA,Y,SBrkr,1533,0,0,1533,1,0,2,0,2,1,Gd,5,Typ,2,TA,Attchd,1975,Unf,2,576,TA,TA,Y,200,54,0,0,0,0,NA,NA,NA,0,12,2006,WD,Normal,280000 +453,60,RL,NA,9303,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,2Story,6,5,1996,1997,Hip,CompShg,VinylSd,VinylSd,BrkFace,42,Gd,TA,PConc,Ex,TA,No,ALQ,742,Unf,0,130,872,GasA,Ex,Y,SBrkr,888,868,0,1756,1,0,2,1,3,1,TA,7,Typ,0,NA,Attchd,1996,Fin,2,422,TA,TA,Y,144,122,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,204000 +454,60,FV,75,9000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,8,5,2008,2008,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,768,768,GasA,Ex,Y,SBrkr,786,804,0,1590,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2008,RFn,2,676,TA,TA,Y,0,30,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,210000 +455,90,RL,63,9297,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,Duplex,1Story,5,5,1976,1976,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,1606,Unf,0,122,1728,GasA,TA,Y,SBrkr,1728,0,0,1728,2,0,2,0,4,2,TA,8,Typ,0,NA,Detchd,1976,Unf,2,560,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Family,188000 +456,20,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,7,6,1973,1973,Hip,CompShg,HdBoard,HdBoard,BrkFace,320,TA,TA,CBlock,TA,TA,No,ALQ,916,Unf,0,326,1242,GasA,Fa,Y,SBrkr,1242,0,0,1242,0,0,1,1,3,1,TA,6,Typ,1,TA,Attchd,1973,Unf,2,528,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,9,2007,WD,Normal,175500 +457,70,RM,34,4571,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,5,5,1916,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,624,624,GasA,Fa,N,SBrkr,624,720,0,1344,0,0,1,0,4,1,TA,7,Typ,0,NA,Detchd,1916,Unf,3,513,Fa,Fa,Y,0,0,96,0,0,0,NA,NA,NA,0,5,2008,COD,Abnorml,98000 +458,20,RL,NA,53227,Pave,NA,IR1,Low,AllPub,CulDSac,Mod,ClearCr,Norm,Norm,1Fam,1Story,4,6,1954,1994,Flat,Tar&Grv,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,Gd,BLQ,1116,Unf,0,248,1364,GasA,Ex,Y,SBrkr,1663,0,0,1663,1,0,1,0,2,1,Gd,6,Min1,2,Gd,Attchd,1954,Fin,2,529,TA,TA,Y,224,137,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,256000 +459,70,RM,NA,5100,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,8,7,1925,1996,Hip,CompShg,Stucco,Wd Shng,None,0,TA,Gd,PConc,TA,TA,No,Unf,0,Unf,0,588,588,GasA,Fa,Y,SBrkr,833,833,0,1666,0,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1925,Unf,1,228,TA,TA,Y,192,63,0,0,0,0,NA,MnPrv,NA,0,6,2008,WD,Normal,161000 +460,50,RL,NA,7015,Pave,NA,IR1,Bnk,AllPub,Corner,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,5,4,1950,1950,Gable,CompShg,MetalSd,MetalSd,BrkCmn,161,TA,TA,CBlock,TA,TA,No,LwQ,185,Unf,0,524,709,GasA,TA,Y,SBrkr,979,224,0,1203,1,0,1,0,3,1,Gd,5,Typ,1,TA,Detchd,1950,Unf,1,352,TA,TA,Y,0,0,248,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,110000 +461,60,FV,75,8004,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Somerst,RRAn,Norm,1Fam,2Story,8,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,Stone,110,Gd,TA,PConc,Gd,TA,No,GLQ,544,Unf,0,288,832,GasA,Ex,Y,SBrkr,832,1103,0,1935,1,0,2,1,3,1,TA,8,Typ,0,NA,BuiltIn,2009,Fin,2,552,TA,TA,Y,0,150,0,0,0,0,NA,NA,NA,0,12,2009,New,Partial,263435 +462,70,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Feedr,Norm,1Fam,2Story,7,9,1936,2007,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,Gd,PConc,Gd,Gd,No,ALQ,350,BLQ,210,0,560,GasA,Ex,Y,SBrkr,575,560,0,1135,1,0,1,0,3,1,Gd,6,Typ,0,NA,Detchd,1971,RFn,2,576,TA,TA,Y,256,0,0,0,0,0,NA,MnPrv,NA,0,4,2009,WD,Normal,155000 +463,20,RL,60,8281,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1965,1965,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,553,BLQ,311,0,864,GasA,Gd,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,TA,5,Typ,1,Po,Detchd,1965,Unf,1,360,TA,TA,Y,0,0,236,0,0,0,NA,GdWo,NA,0,12,2009,WD,Normal,62383 +464,70,RL,74,11988,Pave,NA,IR1,HLS,AllPub,Inside,Mod,Crawfor,Norm,Norm,1Fam,2Story,6,7,1934,1995,Hip,CompShg,Stucco,Stucco,None,0,TA,TA,CBlock,TA,TA,No,LwQ,326,Unf,0,389,715,GasA,Fa,Y,FuseA,849,811,0,1660,0,0,1,1,3,1,TA,6,Typ,1,Gd,Detchd,1939,Unf,1,240,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,188700 +465,20,RL,60,8430,Pave,NA,Reg,HLS,AllPub,Inside,Mod,CollgCr,Norm,Norm,1Fam,1Story,5,5,1978,1978,Gable,CompShg,HdBoard,HdBoard,BrkFace,136,TA,TA,CBlock,Gd,TA,No,Rec,616,Unf,0,424,1040,GasA,TA,Y,SBrkr,1040,0,0,1040,0,0,2,0,3,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,124000 +466,120,RM,NA,3072,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2004,2004,Hip,CompShg,VinylSd,VinylSd,BrkFace,18,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1375,1375,GasA,Ex,Y,SBrkr,1414,0,0,1414,0,0,2,0,2,1,Gd,6,Typ,1,TA,Attchd,2004,Fin,2,398,TA,TA,Y,144,20,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,178740 +467,20,RL,85,10628,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,7,5,1970,1970,Flat,Tar&Grv,Plywood,Plywood,None,0,TA,Gd,CBlock,TA,Gd,Gd,GLQ,778,Unf,0,499,1277,GasA,TA,Y,SBrkr,1277,0,0,1277,1,0,1,0,2,1,TA,5,Typ,1,Po,Attchd,1970,Unf,2,526,TA,TA,Y,0,0,0,0,176,0,NA,GdWo,NA,0,4,2007,WD,Normal,167000 +468,70,RL,79,9480,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Artery,Norm,1Fam,2Story,5,7,1942,1995,Gable,CompShg,MetalSd,MetalSd,Stone,224,TA,TA,CBlock,TA,TA,No,LwQ,386,Unf,0,342,728,GasA,Ex,Y,SBrkr,888,756,0,1644,0,0,1,1,3,1,Gd,7,Typ,2,Gd,Attchd,1942,Unf,1,312,TA,TA,Y,168,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,146500 +469,20,RL,98,11428,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,248,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1626,1626,GasA,Ex,Y,SBrkr,1634,0,0,1634,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2006,RFn,3,866,TA,TA,Y,0,44,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,250000 +470,60,RL,76,9291,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,SawyerW,RRNe,Norm,1Fam,2Story,6,5,1993,1993,Gable,CompShg,HdBoard,HdBoard,BrkFace,120,Gd,TA,PConc,Gd,TA,No,GLQ,426,Unf,0,406,832,GasA,Ex,Y,SBrkr,832,878,0,1710,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1993,RFn,2,506,TA,TA,Y,144,70,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,187000 +471,120,RL,NA,6820,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1985,1985,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,368,BLQ,1120,0,1488,GasA,TA,Y,SBrkr,1502,0,0,1502,1,0,1,1,1,1,Gd,4,Typ,0,NA,Attchd,1985,RFn,2,528,TA,TA,Y,0,54,0,0,140,0,NA,NA,NA,0,6,2010,WD,Normal,212000 +472,60,RL,92,11952,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,PosA,Norm,1Fam,2Story,7,6,1977,1977,Mansard,WdShake,WdShing,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,808,808,GasA,TA,Y,SBrkr,1161,808,0,1969,0,0,2,1,3,1,TA,8,Typ,1,Gd,Attchd,1977,RFn,2,534,TA,TA,Y,0,0,0,0,276,0,NA,NA,NA,0,11,2007,WD,Normal,190000 +473,180,RM,35,3675,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,TwnhsE,SLvl,6,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,80,TA,TA,PConc,Gd,TA,Gd,GLQ,459,Unf,0,88,547,GasA,Ex,Y,SBrkr,1072,0,0,1072,1,0,1,0,2,1,TA,5,Typ,0,NA,Basment,2005,RFn,2,525,TA,TA,Y,0,28,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,148000 +474,20,RL,110,14977,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,BrkFace,304,Gd,TA,PConc,Ex,TA,Gd,GLQ,1350,Unf,0,626,1976,GasA,Ex,Y,SBrkr,1976,0,0,1976,1,0,2,0,2,1,Gd,7,Typ,1,Ex,Attchd,2006,RFn,3,908,TA,TA,Y,250,63,0,0,0,0,NA,NA,NA,0,7,2007,New,Partial,440000 +475,120,RL,41,5330,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,2000,2000,Gable,CompShg,CemntBd,CmentBd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,1196,Unf,0,298,1494,GasA,Ex,Y,SBrkr,1652,0,0,1652,1,0,2,0,2,1,Ex,6,Typ,0,NA,Attchd,2000,RFn,2,499,TA,TA,Y,96,48,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,251000 +476,20,RL,80,8480,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,6,1963,1963,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,GLQ,630,Unf,0,340,970,GasA,TA,Y,SBrkr,970,0,0,970,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1996,Unf,2,624,TA,TA,Y,0,24,0,0,192,0,NA,NA,NA,0,7,2007,WD,Normal,132500 +477,20,RL,75,13125,Pave,NA,Reg,Lvl,AllPub,Inside,Mod,CollgCr,Norm,Norm,1Fam,1Story,6,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,215,TA,TA,PConc,Gd,TA,Gd,GLQ,994,Unf,0,484,1478,GasA,Ex,Y,SBrkr,1493,0,0,1493,1,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,1997,Fin,2,508,TA,TA,Y,140,39,0,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,208900 +478,60,RL,105,13693,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,772,Ex,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,2153,2153,GasA,Ex,Y,SBrkr,2069,574,0,2643,0,0,2,1,3,1,Ex,9,Typ,1,Gd,BuiltIn,2006,Fin,3,694,TA,TA,Y,414,84,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,380000 +479,20,RL,79,10637,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2007,2008,Hip,CompShg,VinylSd,VinylSd,Stone,336,Gd,TA,PConc,Ex,TA,Gd,GLQ,1288,Unf,0,417,1705,GasA,Ex,Y,SBrkr,1718,0,0,1718,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2007,RFn,3,826,TA,TA,Y,208,44,0,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,297000 +480,30,RM,50,5925,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,4,7,1937,2000,Hip,CompShg,Stucco,Stucco,BrkCmn,435,TA,TA,BrkTil,Fa,TA,No,Rec,168,Unf,0,739,907,GasA,TA,Y,SBrkr,1131,0,0,1131,0,0,1,0,2,1,TA,7,Typ,0,NA,Detchd,1995,Unf,2,672,TA,TA,Y,0,72,0,0,0,0,NA,MnPrv,NA,0,3,2007,WD,Alloca,89471 +481,20,RL,98,16033,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,NridgHt,Norm,Norm,1Fam,1Story,9,5,2004,2005,Hip,CompShg,VinylSd,VinylSd,BrkFace,378,Gd,TA,PConc,Ex,TA,Gd,GLQ,1261,Unf,0,572,1833,GasA,Ex,Y,SBrkr,1850,0,0,1850,1,0,2,0,3,1,Gd,8,Typ,1,Gd,Attchd,2004,Fin,3,772,TA,TA,Y,519,112,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,326000 +482,20,RL,72,11846,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,9,5,2003,2004,Hip,CompShg,VinylSd,VinylSd,BrkFace,562,Gd,TA,PConc,Ex,TA,Gd,GLQ,1567,Unf,0,225,1792,GasA,Ex,Y,SBrkr,1792,0,0,1792,1,0,2,0,2,1,Ex,6,Typ,1,Gd,Attchd,2003,Fin,3,874,TA,TA,Y,206,49,0,0,0,0,NA,NA,NA,0,8,2006,WD,Normal,374000 +483,70,RM,50,2500,Pave,Pave,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2Story,7,8,1915,2005,Gable,CompShg,Stucco,Stucco,None,0,Gd,TA,PConc,TA,TA,No,ALQ,299,Unf,0,611,910,GasA,Ex,Y,SBrkr,916,910,0,1826,1,0,1,1,4,1,Ex,7,Min2,1,Gd,Attchd,1915,Unf,1,164,Fa,Fa,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,155000 +484,120,RM,32,4500,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Mitchel,Norm,Norm,Twnhs,1Story,6,5,1998,1998,Hip,CompShg,VinylSd,VinylSd,BrkFace,116,TA,TA,PConc,Ex,TA,No,GLQ,897,Unf,0,319,1216,GasA,Ex,Y,SBrkr,1216,0,0,1216,1,0,2,0,2,1,TA,5,Typ,0,NA,Attchd,1998,Unf,2,402,TA,TA,Y,0,125,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,164000 +485,20,RL,NA,7758,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,7,1962,2001,Gable,CompShg,HdBoard,Plywood,None,0,TA,Gd,CBlock,TA,TA,No,ALQ,588,Unf,0,411,999,GasA,Gd,Y,SBrkr,999,0,0,999,1,0,1,0,3,1,Gd,6,Typ,0,NA,Detchd,1963,Unf,1,264,TA,TA,Y,0,132,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,132500 +486,20,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1950,2007,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,CBlock,TA,TA,No,ALQ,607,Unf,0,506,1113,GasA,Gd,Y,SBrkr,1113,0,0,1113,0,0,1,0,3,1,Gd,5,Typ,1,Gd,Attchd,1950,Unf,1,264,TA,TA,Y,0,80,120,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,147000 +487,20,RL,79,10289,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1965,1965,Hip,CompShg,MetalSd,MetalSd,BrkFace,168,TA,TA,CBlock,TA,TA,No,ALQ,836,Unf,0,237,1073,GasA,TA,Y,SBrkr,1073,0,0,1073,1,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1965,RFn,2,515,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,156000 +488,20,RL,70,12243,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,5,6,1971,1971,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,Av,ALQ,998,Unf,0,486,1484,GasA,Gd,Y,SBrkr,1484,0,0,1484,0,0,2,0,3,1,TA,7,Typ,1,TA,Attchd,1971,Unf,2,487,TA,TA,Y,224,0,0,0,180,0,NA,NA,NA,0,2,2007,WD,Normal,175000 +489,190,RL,60,10800,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,2fmCon,1.5Fin,5,4,1900,1970,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Fa,CBlock,TA,Fa,No,BLQ,664,Unf,0,290,954,GasA,TA,N,FuseA,1766,648,0,2414,0,0,2,0,3,2,TA,10,Mod,1,Gd,Attchd,1970,Unf,2,520,TA,Fa,N,142,0,0,0,0,0,NA,NA,NA,0,5,2006,ConLD,Normal,160000 +490,180,RM,21,1526,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,SFoyer,4,8,1970,2002,Gable,CompShg,CemntBd,CmentBd,None,0,TA,Gd,CBlock,Gd,TA,Av,GLQ,515,Unf,0,115,630,GasA,TA,Y,SBrkr,630,0,0,630,1,0,1,0,1,1,Gd,3,Typ,0,NA,Attchd,1970,Unf,1,286,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,86000 +491,160,RM,NA,2665,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,5,6,1976,1976,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,PConc,Gd,TA,Mn,Unf,0,Unf,0,264,264,GasA,TA,Y,SBrkr,616,688,0,1304,0,0,1,1,3,1,TA,4,Typ,1,Gd,BuiltIn,1976,Fin,1,336,TA,TA,Y,141,24,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,115000 +492,50,RL,79,9490,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Artery,Norm,1Fam,1.5Fin,6,7,1941,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,403,Rec,165,238,806,GasA,TA,Y,FuseA,958,620,0,1578,1,0,1,0,3,1,Fa,5,Typ,2,TA,Attchd,1941,Unf,1,240,TA,TA,Y,0,0,32,0,0,0,NA,MnPrv,NA,0,8,2006,WD,Normal,133000 +493,60,RL,105,15578,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,728,728,GasA,Gd,Y,SBrkr,728,728,0,1456,0,0,2,1,3,1,TA,8,Typ,0,NA,Attchd,2006,RFn,2,429,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2006,New,Partial,172785 +494,20,RL,70,7931,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1960,1960,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,374,LwQ,532,363,1269,GasA,TA,Y,FuseA,1269,0,0,1269,0,0,1,1,3,1,TA,6,Typ,1,Fa,Detchd,1964,Unf,1,308,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,155000 +495,30,RM,50,5784,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1Story,5,8,1938,1996,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,Fa,TA,No,Unf,0,Unf,0,190,190,GasA,Gd,Y,FuseA,886,0,0,886,0,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1938,Unf,1,273,TA,TA,Y,144,20,80,0,0,0,NA,NA,NA,0,12,2009,WD,Normal,91300 +496,30,C (all),60,7879,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,4,5,1920,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,495,Unf,0,225,720,GasA,TA,N,FuseA,720,0,0,720,0,0,1,0,2,1,TA,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,523,115,0,0,0,NA,GdWo,NA,0,11,2009,WD,Abnorml,34900 +497,20,RL,NA,12692,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,1Story,8,5,1992,1993,Hip,CompShg,BrkFace,BrkFace,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,1231,Unf,0,1969,3200,GasA,Ex,Y,SBrkr,3228,0,0,3228,1,0,3,0,4,1,Gd,10,Typ,1,Gd,Attchd,1992,RFn,2,546,TA,TA,Y,264,75,291,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,430000 +498,50,RL,60,9120,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,7,6,1925,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,PConc,TA,TA,No,Rec,329,Unf,0,697,1026,GasA,Ex,Y,SBrkr,1133,687,0,1820,1,0,2,0,4,1,TA,8,Typ,0,NA,Detchd,1925,Unf,1,240,TA,TA,N,0,100,0,0,0,0,NA,GdPrv,NA,0,6,2008,WD,Normal,184000 +499,20,RL,65,7800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,7,1967,2004,Hip,CompShg,HdBoard,HdBoard,BrkFace,89,TA,TA,PConc,TA,TA,No,ALQ,450,Unf,0,414,864,GasA,Ex,Y,SBrkr,899,0,0,899,0,0,1,0,3,1,Gd,5,Typ,0,NA,Attchd,1967,Fin,1,288,TA,TA,Y,64,0,0,0,0,0,NA,MnPrv,NA,0,6,2009,WD,Normal,130000 +500,20,RL,70,7535,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1958,1985,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,111,LwQ,279,522,912,GasA,Fa,Y,SBrkr,912,0,0,912,0,1,1,0,2,1,TA,5,Typ,0,NA,Attchd,1958,Fin,1,297,TA,TA,Y,12,285,0,0,0,0,NA,MnWw,Shed,480,6,2007,WD,Normal,120000 +501,160,RM,21,1890,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,5,1973,1973,Gable,CompShg,HdBoard,HdBoard,BrkFace,285,TA,TA,CBlock,TA,TA,No,BLQ,356,Unf,0,316,672,GasA,TA,Y,SBrkr,672,546,0,1218,0,0,1,1,3,1,TA,7,Typ,0,NA,Detchd,1973,Unf,1,264,TA,TA,Y,144,28,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,113000 +502,60,FV,75,9803,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,400,Unf,0,466,866,GasA,Gd,Y,SBrkr,866,902,0,1768,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2005,RFn,2,603,TA,TA,Y,0,108,0,0,0,0,NA,NA,NA,0,2,2008,WD,Normal,226700 +503,20,RL,70,9170,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Feedr,Norm,1Fam,1Story,5,7,1965,1965,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,698,GLQ,96,420,1214,GasA,Ex,Y,SBrkr,1214,0,0,1214,1,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1965,Unf,2,461,Fa,Fa,Y,0,0,184,0,0,0,NA,GdPrv,Shed,400,4,2007,WD,Normal,140000 +504,20,RL,100,15602,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,7,8,1959,1997,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,ALQ,1247,Unf,0,254,1501,GasA,TA,Y,SBrkr,1801,0,0,1801,1,0,2,0,1,1,TA,6,Typ,2,TA,Attchd,1959,Fin,2,484,TA,TA,Y,0,54,0,0,161,0,NA,GdWo,NA,0,3,2010,WD,Normal,289000 +505,160,RL,24,2308,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NPkVill,Norm,Norm,TwnhsE,2Story,6,5,1974,1974,Gable,CompShg,Plywood,Brk Cmn,None,0,TA,TA,CBlock,TA,TA,No,ALQ,257,Rec,495,103,855,GasA,TA,Y,SBrkr,855,467,0,1322,0,1,2,1,3,1,TA,6,Typ,1,Fa,Attchd,1974,Unf,2,440,TA,TA,Y,260,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,147000 +506,90,RM,60,7596,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,Duplex,2Story,5,5,1952,1952,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,360,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,960,960,GasA,Gd,Y,SBrkr,960,1000,0,1960,0,0,2,0,4,2,TA,10,Typ,0,NA,Detchd,1952,Unf,2,400,TA,TA,N,0,0,0,0,0,0,NA,NA,NA,0,7,2009,COD,Normal,124500 +507,60,RL,80,9554,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,8,5,1993,1994,Gable,CompShg,VinylSd,VinylSd,BrkFace,125,Gd,TA,PConc,Gd,TA,No,GLQ,380,Unf,0,397,777,GasA,Ex,Y,SBrkr,1065,846,0,1911,0,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1993,RFn,2,471,TA,TA,Y,182,81,0,0,0,0,NA,NA,NA,0,9,2006,WD,Normal,215000 +508,20,FV,75,7862,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,6,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,27,Unf,0,1191,1218,GasA,Ex,Y,SBrkr,1218,0,0,1218,0,0,2,0,2,1,Gd,4,Typ,0,NA,Attchd,2009,Fin,2,676,TA,TA,Y,0,102,0,0,0,0,NA,NA,NA,0,9,2009,New,Partial,208300 +509,70,RM,60,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,7,9,1928,2005,Gambrel,CompShg,MetalSd,MetalSd,None,0,TA,Ex,BrkTil,TA,TA,No,Rec,141,Unf,0,548,689,GasA,Ex,Y,SBrkr,689,689,0,1378,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Detchd,1928,Unf,2,360,TA,TA,N,0,0,116,0,0,0,NA,NA,NA,0,10,2008,WD,Normal,161000 +510,20,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1959,1959,Gable,CompShg,MetalSd,MetalSd,BrkFace,132,TA,TA,CBlock,TA,TA,No,ALQ,991,Unf,0,50,1041,GasA,Ex,Y,SBrkr,1041,0,0,1041,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1959,RFn,1,270,TA,TA,Y,224,88,0,0,0,0,NA,MnPrv,NA,0,7,2009,WD,Normal,124500 +511,20,RL,75,14559,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1951,2000,Hip,CompShg,Wd Sdng,Wd Sdng,BrkCmn,70,Gd,TA,CBlock,TA,TA,No,BLQ,650,Rec,180,178,1008,GasA,Ex,Y,SBrkr,1363,0,0,1363,1,0,1,0,2,1,TA,6,Min1,2,TA,CarPort,1951,Unf,1,288,TA,TA,Y,324,42,0,0,168,0,NA,NA,Shed,2000,6,2009,WD,Normal,164900 +512,120,RL,40,6792,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,Stone,94,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1368,1368,GasA,Ex,Y,SBrkr,1368,0,0,1368,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2005,RFn,2,474,TA,TA,Y,132,35,0,0,0,0,NA,NA,NA,0,3,2006,New,Partial,202665 +513,20,RL,70,9100,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,5,1958,1958,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,521,LwQ,174,169,864,GasA,TA,Y,SBrkr,864,0,0,864,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1964,Unf,2,624,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,129900 +514,20,RL,71,9187,Pave,NA,Reg,Bnk,AllPub,Corner,Gtl,Mitchel,Norm,Norm,1Fam,1Story,6,5,1983,1983,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,PConc,TA,TA,No,ALQ,336,Unf,0,748,1084,GasA,TA,Y,SBrkr,1080,0,0,1080,0,0,1,1,3,1,TA,5,Typ,0,NA,Attchd,1983,Unf,2,484,TA,TA,Y,120,0,158,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,134000 +515,45,RL,55,10594,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1.5Unf,5,5,1926,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,768,768,Grav,Fa,N,SBrkr,789,0,0,789,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1926,Unf,1,200,Po,Po,Y,0,0,112,0,0,0,NA,MnPrv,NA,0,6,2007,WD,Normal,96500 +516,20,RL,94,12220,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,10,5,2009,2009,Hip,CompShg,CemntBd,CmentBd,BrkFace,305,Ex,TA,CBlock,Ex,TA,No,GLQ,1436,Unf,0,570,2006,GasA,Ex,Y,SBrkr,2020,0,0,2020,1,0,2,1,3,1,Ex,9,Typ,1,Gd,Attchd,2009,Fin,3,900,TA,TA,Y,156,54,0,0,0,0,NA,NA,NA,0,9,2009,New,Partial,402861 +517,80,RL,NA,10448,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,Norm,Norm,1Fam,SLvl,6,6,1972,1972,Gable,CompShg,HdBoard,HdBoard,BrkFace,333,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,689,689,GasA,TA,Y,SBrkr,1378,741,0,2119,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1972,RFn,2,583,TA,TA,Y,0,104,0,0,0,0,NA,GdPrv,NA,0,8,2009,COD,Abnorml,158000 +518,60,RL,79,10208,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,VinylSd,VinylSd,BrkFace,921,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1264,1264,GasA,Ex,Y,SBrkr,1277,1067,0,2344,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1996,RFn,3,889,TA,TA,Y,220,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,265000 +519,60,RL,NA,9531,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,2Story,6,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Mn,GLQ,706,Unf,0,88,794,GasA,Ex,Y,SBrkr,882,914,0,1796,1,0,2,1,3,1,TA,7,Typ,0,NA,Attchd,1998,RFn,2,546,TA,TA,Y,0,36,0,0,0,0,NA,MnPrv,NA,0,5,2007,WD,Normal,211000 +520,70,RL,53,10918,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,9,1926,2004,Gambrel,CompShg,MetalSd,MetalSd,None,0,Gd,TA,BrkTil,Gd,TA,No,Unf,0,Unf,0,1276,1276,GasA,Ex,Y,SBrkr,1276,804,0,2080,0,0,1,1,3,1,Gd,9,Typ,2,Gd,Detchd,1926,Unf,1,282,TA,TA,Y,0,0,0,0,145,0,NA,MnPrv,NA,0,6,2009,WD,Normal,234000 +521,190,RL,60,10800,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,4,7,1900,2000,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,N,FuseA,694,600,0,1294,0,0,2,0,3,2,TA,7,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,220,114,210,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,106250 +522,20,RL,90,11988,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Feedr,Norm,1Fam,1Story,6,6,1957,1957,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,777,Unf,0,467,1244,GasA,Ex,Y,FuseA,1244,0,0,1244,0,0,1,1,3,1,TA,6,Typ,2,Gd,Attchd,1957,Unf,1,336,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,150000 +523,50,RM,50,5000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,BrkSide,Feedr,Norm,1Fam,1.5Fin,6,7,1947,1950,Gable,CompShg,CemntBd,CmentBd,None,0,TA,Gd,CBlock,TA,TA,No,ALQ,399,Unf,0,605,1004,GasA,Ex,Y,SBrkr,1004,660,0,1664,0,0,2,0,3,1,TA,7,Typ,2,Gd,Detchd,1950,Unf,2,420,TA,TA,Y,0,24,36,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,159000 +524,60,RL,130,40094,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,Edwards,PosN,PosN,1Fam,2Story,10,5,2007,2008,Hip,CompShg,CemntBd,CmentBd,Stone,762,Ex,TA,PConc,Ex,TA,Gd,GLQ,2260,Unf,0,878,3138,GasA,Ex,Y,SBrkr,3138,1538,0,4676,1,0,3,1,3,1,Ex,11,Typ,1,Gd,BuiltIn,2007,Fin,3,884,TA,TA,Y,208,406,0,0,0,0,NA,NA,NA,0,10,2007,New,Partial,184750 +525,60,RL,95,11787,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,VinylSd,VinylSd,BrkFace,594,Gd,TA,PConc,Gd,TA,No,GLQ,719,Unf,0,660,1379,GasA,Ex,Y,SBrkr,1383,1015,0,2398,1,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1996,Fin,3,834,TA,TA,Y,239,60,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,315750 +526,20,FV,62,7500,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1257,1257,GasA,Ex,Y,SBrkr,1266,0,0,1266,0,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,2005,Unf,2,453,TA,TA,Y,38,144,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,176000 +527,20,RL,70,13300,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1956,2000,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,Gd,TA,No,Rec,377,Unf,0,551,928,GasA,TA,Y,SBrkr,928,0,0,928,0,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1956,Unf,1,252,TA,TA,Y,261,0,156,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,132000 +528,60,RL,67,14948,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2008,2008,Hip,CompShg,VinylSd,VinylSd,Stone,268,Ex,TA,PConc,Ex,TA,Av,GLQ,1330,Unf,0,122,1452,GasA,Ex,Y,SBrkr,1476,1237,0,2713,1,0,2,1,3,1,Ex,11,Typ,1,Gd,Attchd,2008,Fin,3,858,TA,TA,Y,126,66,0,0,0,0,NA,NA,NA,0,11,2008,New,Partial,446261 +529,30,RL,58,9098,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,7,1920,2002,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,Mn,ALQ,348,Unf,0,180,528,GasA,Ex,Y,SBrkr,605,0,0,605,1,0,1,0,2,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,144,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,86000 +530,20,RL,NA,32668,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Crawfor,Norm,Norm,1Fam,1Story,6,3,1957,1975,Hip,CompShg,Wd Sdng,Stone,NA,NA,Gd,TA,PConc,TA,TA,No,Rec,1219,Unf,0,816,2035,GasA,TA,Y,SBrkr,2515,0,0,2515,1,0,3,0,4,2,TA,9,Maj1,2,TA,Attchd,1975,RFn,2,484,TA,TA,Y,0,0,200,0,0,0,NA,NA,NA,0,3,2007,WD,Alloca,200624 +531,80,RL,85,10200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,SLvl,6,5,1988,1989,Gable,CompShg,HdBoard,HdBoard,BrkFace,219,Gd,TA,CBlock,Gd,TA,Av,GLQ,783,Unf,0,678,1461,GasA,Ex,Y,SBrkr,1509,0,0,1509,1,0,2,0,3,1,Gd,5,Typ,1,Fa,Attchd,1988,RFn,2,600,TA,TA,Y,224,0,0,0,0,0,NA,NA,NA,0,8,2008,WD,Abnorml,175000 +532,70,RM,60,6155,Pave,NA,IR1,Lvl,AllPub,FR3,Gtl,BrkSide,RRNn,Feedr,1Fam,2Story,6,8,1920,1999,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Fa,Fa,Mn,Unf,0,Unf,0,611,611,GasA,Ex,Y,SBrkr,751,611,0,1362,0,0,2,0,3,1,TA,6,Typ,0,NA,Detchd,1920,Fin,2,502,TA,Fa,Y,0,0,84,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,128000 +533,20,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1955,2007,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,Ex,Y,SBrkr,827,0,0,827,0,0,1,0,2,1,TA,5,Mod,1,Po,Detchd,1967,Unf,1,392,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,107500 +534,20,RL,50,5000,Pave,NA,Reg,Low,AllPub,Inside,Mod,BrkSide,Norm,Norm,1Fam,1Story,1,3,1946,1950,Gable,CompShg,VinylSd,VinylSd,None,0,Fa,Fa,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,Fa,N,FuseF,334,0,0,334,0,0,1,0,1,1,Fa,2,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,0,0,0,0,NA,NA,NA,0,1,2007,WD,Normal,39300 +535,60,RL,74,9056,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,8,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,Gd,Av,Unf,0,Unf,0,707,707,GasA,Ex,Y,SBrkr,707,707,0,1414,0,0,2,1,3,1,Gd,6,Typ,1,Gd,Attchd,2004,Fin,2,403,TA,TA,Y,100,35,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,178000 +536,190,RL,70,7000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,2fmCon,2Story,5,7,1910,1991,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,Gd,TA,Gd,GLQ,969,Unf,0,148,1117,GasA,TA,Y,SBrkr,820,527,0,1347,1,0,1,0,3,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,85,0,148,0,0,0,NA,NA,NA,0,1,2008,WD,Normal,107500 +537,60,RL,57,8924,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,1998,1999,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,880,880,GasA,Ex,Y,SBrkr,880,844,0,1724,0,0,2,1,3,1,Gd,8,Typ,0,NA,Attchd,1998,Fin,2,527,TA,TA,Y,120,155,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,188000 +538,20,RL,NA,12735,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1972,1972,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,600,Unf,0,264,864,GasA,TA,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1980,Unf,2,576,TA,TA,Y,216,0,0,0,0,0,NA,MnWw,NA,0,4,2008,COD,Normal,111250 +539,20,RL,NA,11553,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1968,1968,Hip,CompShg,Plywood,Plywood,BrkFace,188,TA,TA,CBlock,TA,TA,No,BLQ,673,Unf,0,378,1051,GasA,TA,Y,SBrkr,1159,0,0,1159,0,0,1,1,3,1,TA,7,Typ,1,Fa,Attchd,1968,Unf,1,336,TA,TA,Y,466,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,158000 +540,20,RL,NA,11423,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,479,Gd,TA,PConc,Gd,TA,Av,GLQ,1358,Unf,0,223,1581,GasA,Ex,Y,SBrkr,1601,0,0,1601,1,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,2001,RFn,2,670,TA,TA,Y,180,0,0,0,0,0,NA,MnPrv,Shed,2000,5,2010,WD,Normal,272000 +541,20,RL,85,14601,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,9,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,584,Ex,TA,PConc,Ex,TA,Av,GLQ,1260,Unf,0,578,1838,GasA,Ex,Y,SBrkr,1838,0,0,1838,1,0,2,0,2,1,Ex,8,Typ,1,Gd,Attchd,2006,Fin,3,765,TA,TA,Y,270,68,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,315000 +542,60,RL,NA,11000,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,72,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,969,969,GasA,Ex,Y,SBrkr,997,1288,0,2285,0,0,2,1,4,1,Gd,8,Typ,1,TA,BuiltIn,2000,Fin,3,648,TA,TA,Y,0,56,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,248000 +543,20,RL,78,10140,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,RRAn,Norm,1Fam,1Story,7,5,1998,1999,Hip,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,TA,No,LwQ,144,GLQ,1127,379,1650,GasA,Ex,Y,SBrkr,1680,0,0,1680,1,0,2,0,3,1,Gd,7,Maj1,1,TA,Attchd,1998,Fin,2,583,TA,TA,Y,78,73,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,213250 +544,120,RH,34,4058,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,TwnhsE,SFoyer,7,5,1998,1998,Gable,CompShg,MetalSd,MetalSd,BrkFace,182,TA,TA,PConc,Gd,TA,Av,GLQ,584,LwQ,139,0,723,GasA,Ex,Y,SBrkr,767,0,0,767,1,0,1,0,1,1,TA,4,Typ,0,NA,Attchd,1998,Fin,1,367,TA,TA,Y,120,40,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,133000 +545,60,RL,58,17104,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,Gd,Av,GLQ,554,Unf,0,100,654,GasA,Ex,Y,SBrkr,664,832,0,1496,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2006,RFn,2,426,TA,TA,Y,100,24,0,0,0,0,NA,NA,NA,0,9,2006,New,Partial,179665 +546,50,RL,NA,13837,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,Norm,Norm,1Fam,1.5Fin,7,5,1988,1988,Gable,CompShg,HdBoard,HdBoard,BrkFace,178,Gd,Gd,PConc,Gd,Gd,No,GLQ,1002,LwQ,202,0,1204,GasA,Gd,Y,SBrkr,1377,806,0,2183,0,0,2,1,4,1,Gd,9,Typ,0,NA,Attchd,1988,Unf,3,786,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,2,2006,WD,Normal,229000 +547,50,RL,70,8737,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,6,7,1923,1950,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,BrkTil,Gd,TA,No,Rec,300,Unf,0,765,1065,GasA,Ex,Y,FuseA,915,720,0,1635,0,0,1,1,3,1,TA,6,Typ,1,Gd,Detchd,1950,Unf,2,440,TA,TA,Y,0,38,0,144,0,0,NA,NA,NA,0,5,2007,WD,Normal,210000 +548,85,RL,54,7244,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SFoyer,5,7,1970,1970,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,Gd,TA,Av,ALQ,619,Unf,0,149,768,GasA,Ex,Y,SBrkr,768,0,0,768,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1987,Unf,2,624,TA,TA,Y,104,0,0,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,129500 +549,20,RM,49,8235,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,OldTown,Feedr,RRNn,1Fam,1Story,5,7,1955,1995,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,LwQ,180,Rec,645,0,825,GasA,TA,Y,SBrkr,825,0,0,825,1,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1963,RFn,2,720,TA,TA,Y,140,50,0,0,0,0,NA,MnPrv,NA,0,6,2008,WD,Normal,125000 +550,60,FV,75,9375,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,912,912,GasA,Ex,Y,SBrkr,912,1182,0,2094,0,0,2,1,4,1,Gd,8,Typ,1,Gd,BuiltIn,2003,Fin,2,615,TA,TA,Y,182,182,0,0,0,0,NA,NA,NA,0,11,2009,WD,Normal,263000 +551,120,RL,53,4043,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NPkVill,Norm,Norm,TwnhsE,1Story,6,6,1977,1977,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,559,Unf,0,510,1069,GasA,TA,Y,SBrkr,1069,0,0,1069,0,0,2,0,2,1,TA,4,Typ,0,NA,Attchd,1977,RFn,2,440,TA,TA,Y,0,55,0,0,200,0,NA,NA,NA,0,10,2008,COD,Abnorml,140000 +552,20,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1957,1957,Hip,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,Rec,308,Unf,0,620,928,GasA,Gd,Y,FuseA,928,0,0,928,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1957,Fin,1,288,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,112500 +553,20,RL,87,11146,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,250,Gd,TA,PConc,Ex,TA,Av,Unf,0,Unf,0,1709,1709,GasA,Ex,Y,SBrkr,1717,0,0,1717,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2006,RFn,3,908,TA,TA,Y,169,39,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,255500 +554,20,RL,67,8777,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Feedr,Norm,1Fam,1Story,4,5,1949,2003,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,NA,NA,NA,NA,0,NA,0,0,0,GasA,Ex,Y,SBrkr,1126,0,0,1126,0,0,2,0,2,1,Gd,5,Typ,0,NA,Detchd,2002,Fin,2,520,TA,TA,N,0,96,0,0,0,0,NA,MnPrv,NA,0,5,2009,WD,Normal,108000 +555,60,RL,85,10625,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,7,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,292,Gd,TA,PConc,Gd,TA,No,GLQ,866,Unf,0,132,998,GasA,Ex,Y,SBrkr,1006,1040,0,2046,1,0,2,1,3,1,Gd,8,Typ,1,Gd,BuiltIn,2003,RFn,3,871,TA,TA,Y,320,62,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,284000 +556,45,RM,58,6380,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Unf,5,6,1922,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,Fa,No,Unf,0,Unf,0,993,993,GasA,TA,Y,FuseA,1048,0,0,1048,0,0,1,0,2,1,TA,5,Typ,1,Gd,Detchd,1922,Unf,1,280,TA,TA,Y,0,0,116,0,0,0,NA,NA,NA,0,8,2006,WD,Normal,113000 +557,20,RL,69,14850,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1957,1957,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,895,Unf,0,197,1092,GasA,TA,Y,FuseA,1092,0,0,1092,1,0,1,0,2,1,TA,6,Typ,1,TA,Attchd,1957,Fin,1,299,TA,TA,Y,268,0,0,0,122,0,NA,MnWw,NA,0,5,2006,WD,Normal,141000 +558,50,C (all),60,11040,Pave,NA,Reg,Low,AllPub,Inside,Mod,IDOTRR,Norm,Norm,1Fam,1.5Fin,4,6,1920,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,637,Unf,0,0,637,GasA,Gd,Y,SBrkr,897,439,0,1336,0,0,1,1,3,1,TA,7,Typ,0,NA,CarPort,1994,Unf,1,570,TA,TA,Y,0,47,120,0,0,0,NA,NA,NA,0,9,2006,COD,Normal,108000 +559,60,RL,57,21872,Pave,NA,IR2,HLS,AllPub,FR2,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,Gd,GLQ,604,Unf,0,125,729,GasA,Ex,Y,SBrkr,729,717,0,1446,0,1,2,1,3,1,TA,6,Typ,1,TA,Attchd,1996,Unf,2,406,TA,TA,Y,264,22,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,175000 +560,120,RL,NA,3196,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,18,Gd,TA,PConc,Gd,TA,Gd,Unf,0,Unf,0,1374,1374,GasA,Ex,Y,SBrkr,1557,0,0,1557,0,0,2,0,2,1,Gd,7,Typ,1,TA,Attchd,2003,Fin,2,420,TA,TA,Y,143,20,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,234000 +561,20,RL,NA,11341,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,6,1957,1996,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,180,TA,TA,CBlock,Gd,TA,No,ALQ,1302,Unf,0,90,1392,GasA,TA,Y,SBrkr,1392,0,0,1392,1,0,1,1,3,1,TA,5,Mod,1,Gd,Detchd,1957,Unf,2,528,TA,TA,Y,0,0,0,0,95,0,NA,NA,NA,0,5,2010,WD,Normal,121500 +562,20,RL,77,10010,Pave,NA,Reg,Lvl,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,1Story,5,5,1974,1975,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,Av,ALQ,1071,LwQ,123,195,1389,GasA,Gd,Y,SBrkr,1389,0,0,1389,1,0,1,0,2,1,TA,6,Typ,1,TA,Attchd,1975,RFn,2,418,TA,TA,Y,240,38,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,170000 +563,30,RL,63,13907,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,6,1940,1969,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,290,Unf,0,706,996,GasA,Ex,Y,SBrkr,996,0,0,996,1,0,1,0,3,1,TA,6,Typ,1,Gd,NA,NA,NA,0,0,NA,NA,Y,144,0,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,108000 +564,50,RL,66,21780,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,6,7,1918,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Gd,TA,Mn,Unf,0,Unf,0,1163,1163,GasA,Ex,Y,SBrkr,1163,511,0,1674,0,0,2,0,4,1,TA,8,Typ,1,Gd,Detchd,1955,Fin,2,396,TA,TA,N,72,36,0,0,144,0,NA,NA,NA,0,7,2008,WD,Normal,185000 +565,60,RL,NA,13346,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,5,1992,2000,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,728,Unf,0,367,1095,GasA,Ex,Y,SBrkr,1166,1129,0,2295,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,1992,RFn,2,590,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,268000 +566,70,RL,66,6858,Pave,NA,Reg,Bnk,AllPub,Corner,Gtl,SWISU,Norm,Norm,1Fam,2Story,6,4,1915,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,806,806,GasA,TA,N,FuseF,841,806,0,1647,1,0,1,1,4,1,Fa,6,Typ,0,NA,Detchd,1920,Unf,1,216,TA,TA,Y,0,66,136,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,128000 +567,60,RL,77,11198,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,1Fam,2Story,9,5,2005,2007,Hip,CompShg,VinylSd,VinylSd,BrkFace,245,Gd,TA,PConc,Gd,Gd,No,Unf,0,Unf,0,1122,1122,GasA,Ex,Y,SBrkr,1134,1370,0,2504,0,0,2,1,4,1,Ex,11,Typ,1,Gd,BuiltIn,2005,Fin,3,656,TA,TA,Y,144,39,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,325000 +568,20,RL,70,10171,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,168,Gd,TA,PConc,Gd,TA,No,GLQ,2,Unf,0,1515,1517,GasA,Ex,Y,SBrkr,1535,0,0,1535,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2004,RFn,2,532,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,214000 +569,50,RL,79,12327,Pave,NA,IR1,Low,AllPub,Inside,Mod,SawyerW,Norm,Norm,1Fam,1.5Fin,8,8,1983,2009,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,CBlock,Gd,TA,Gd,GLQ,1441,Unf,0,55,1496,GasA,Ex,Y,SBrkr,1496,636,0,2132,1,0,1,1,1,1,Gd,5,Min2,1,Gd,BuiltIn,1983,Fin,2,612,Gd,TA,Y,349,40,0,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,316600 +570,90,RL,NA,7032,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,Duplex,SFoyer,5,5,1979,1979,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,Gd,TA,Gd,GLQ,943,Unf,0,0,943,GasA,TA,Y,SBrkr,943,0,0,943,1,0,1,0,2,1,TA,4,Typ,2,TA,Detchd,1979,Unf,2,600,TA,TA,Y,42,0,0,0,0,0,NA,NA,NA,0,12,2006,WD,Normal,135960 +571,90,RL,74,13101,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,Duplex,1Story,5,5,1965,1965,Gable,CompShg,HdBoard,HdBoard,BrkFace,108,TA,TA,CBlock,TA,TA,No,LwQ,231,Unf,0,1497,1728,GasA,TA,Y,SBrkr,1728,0,0,1728,0,0,2,0,6,2,TA,10,Typ,0,NA,Detchd,1987,Unf,2,576,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2008,WD,Normal,142600 +572,20,RL,60,7332,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1959,1959,Gable,CompShg,WdShing,Wd Shng,BrkFace,207,TA,TA,CBlock,TA,TA,No,BLQ,414,Unf,0,450,864,GasA,Ex,Y,SBrkr,864,0,0,864,1,0,1,0,2,1,Gd,4,Typ,0,NA,Attchd,1959,Unf,1,288,TA,TA,Y,168,0,0,0,0,0,NA,NA,NA,0,10,2006,WD,Abnorml,120000 +573,60,RL,83,13159,Pave,NA,IR1,HLS,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,2Story,7,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Ex,TA,Av,Unf,0,Unf,0,846,846,GasA,Gd,Y,SBrkr,846,846,0,1692,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2009,RFn,2,650,TA,TA,Y,208,114,0,0,0,0,NA,NA,NA,0,7,2009,New,Partial,224500 +574,80,RL,76,9967,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,SLvl,7,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,384,384,GasA,Ex,Y,SBrkr,774,656,0,1430,0,0,2,1,3,1,TA,8,Typ,1,TA,BuiltIn,2000,RFn,2,400,TA,TA,Y,100,0,0,0,0,0,NA,NA,NA,0,12,2007,WD,Normal,170000 +575,80,RL,70,10500,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,SLvl,5,7,1971,2005,Gambrel,CompShg,MetalSd,AsphShn,BrkFace,82,TA,TA,CBlock,TA,TA,Av,ALQ,349,Unf,0,23,372,GasA,TA,Y,SBrkr,576,533,0,1109,0,1,1,0,3,1,TA,5,Typ,0,NA,BuiltIn,1971,Unf,1,288,TA,TA,Y,35,0,0,0,0,0,NA,GdWo,NA,0,12,2007,WD,Normal,139000 +576,50,RL,80,8480,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1.5Fin,5,5,1947,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,442,Unf,0,390,832,GasA,TA,Y,SBrkr,832,384,0,1216,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1947,Unf,1,336,TA,TA,Y,158,0,102,0,0,0,NA,NA,NA,0,10,2008,COD,Abnorml,118500 +577,50,RL,52,6292,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,7,7,1928,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,861,861,GasA,Gd,Y,SBrkr,877,600,0,1477,0,1,2,0,3,1,TA,6,Typ,1,Gd,Detchd,1928,Unf,1,216,TA,TA,Y,0,50,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,145000 +578,80,RL,96,11777,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,SLvl,5,6,1966,1966,Gable,CompShg,VinylSd,VinylSd,BrkFace,97,TA,TA,CBlock,TA,TA,Av,LwQ,328,ALQ,551,285,1164,GasA,Ex,Y,SBrkr,1320,0,0,1320,1,0,1,0,3,1,TA,6,Typ,2,Fa,Attchd,1966,RFn,2,564,TA,TA,Y,160,68,240,0,0,0,NA,NA,NA,0,5,2006,WD,Abnorml,164500 +579,160,FV,34,3604,Pave,Pave,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,7,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,689,689,GasA,Ex,Y,SBrkr,703,689,0,1392,0,0,2,0,2,1,Gd,5,Typ,0,NA,Detchd,2007,Unf,2,540,TA,TA,Y,0,102,0,0,0,0,NA,NA,NA,0,2,2008,WD,Abnorml,146000 +580,50,RM,81,12150,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,5,1954,1954,Gable,CompShg,MetalSd,MetalSd,BrkFace,335,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,1050,1050,GasA,Ex,N,FuseF,1050,745,0,1795,0,0,2,0,4,1,TA,7,Typ,0,NA,Attchd,1954,Unf,1,352,Fa,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2008,WD,Normal,131500 +581,20,RL,NA,14585,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1960,1987,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,85,TA,TA,CBlock,TA,TA,No,BLQ,594,Rec,219,331,1144,GasA,Ex,Y,SBrkr,1429,0,0,1429,0,1,1,0,3,1,Gd,7,Typ,2,Gd,Attchd,1960,Unf,2,572,TA,TA,Y,216,110,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,181900 +582,20,RL,98,12704,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2008,2009,Hip,CompShg,VinylSd,VinylSd,BrkFace,306,Ex,TA,PConc,Ex,TA,No,Unf,0,Unf,0,2042,2042,GasA,Ex,Y,SBrkr,2042,0,0,2042,0,0,2,1,3,1,Ex,8,Typ,1,Gd,Attchd,2009,RFn,3,1390,TA,TA,Y,0,90,0,0,0,0,NA,NA,NA,0,8,2009,New,Partial,253293 +583,90,RL,81,11841,Grvl,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,Duplex,SFoyer,6,5,1990,1990,Gable,CompShg,HdBoard,HdBoard,BrkFace,104,TA,Gd,CBlock,Gd,TA,Av,GLQ,816,Unf,0,0,816,GasA,TA,Y,SBrkr,816,0,0,816,1,0,1,0,3,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,32,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,118500 +584,75,RM,75,13500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,PosA,1Fam,2.5Unf,10,9,1893,2000,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Ex,Ex,BrkTil,TA,TA,No,Unf,0,Unf,0,1237,1237,GasA,Gd,Y,SBrkr,1521,1254,0,2775,0,0,3,1,3,1,Gd,9,Typ,1,Gd,Detchd,1988,Unf,2,880,Gd,TA,Y,105,502,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,325000 +585,50,RM,51,6120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,4,7,1935,1995,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,884,884,GasA,Ex,Y,SBrkr,989,584,0,1573,0,0,1,0,3,1,Gd,6,Typ,0,NA,Detchd,1935,Unf,1,240,TA,TA,Y,0,0,54,0,120,0,NA,NA,NA,0,7,2009,WD,Normal,133000 +586,20,RL,88,11443,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2005,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,208,Gd,TA,PConc,Ex,TA,Gd,GLQ,1460,Unf,0,408,1868,GasA,Ex,Y,SBrkr,2028,0,0,2028,1,0,2,0,2,1,Gd,7,Typ,2,Gd,Attchd,2005,RFn,3,880,TA,TA,Y,326,66,0,0,0,0,NA,NA,NA,0,3,2006,New,Partial,369900 +587,30,RL,55,10267,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,RRAn,Norm,1Fam,1Story,6,7,1918,2000,Gable,CompShg,Stucco,Wd Shng,None,0,TA,Gd,BrkTil,TA,Gd,Mn,Rec,210,ALQ,606,0,816,GasA,Ex,Y,SBrkr,838,0,0,838,1,0,1,0,2,1,Fa,5,Typ,0,NA,Detchd,1961,Fin,1,275,TA,TA,N,0,0,112,0,0,0,NA,MnWw,NA,0,5,2008,WD,Normal,130000 +588,85,RL,74,8740,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,SFoyer,5,6,1982,1982,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,Av,ALQ,672,Unf,0,168,840,GasA,TA,Y,SBrkr,860,0,0,860,1,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1996,Unf,2,528,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,137000 +589,20,RL,65,25095,Pave,NA,IR1,Low,AllPub,Inside,Sev,ClearCr,Norm,Norm,1Fam,1Story,5,8,1968,2003,Flat,Tar&Grv,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,Gd,GLQ,1324,Unf,0,113,1437,GasA,Ex,Y,SBrkr,1473,0,0,1473,2,0,1,0,1,1,Ex,5,Typ,2,Gd,Attchd,1968,Unf,1,452,TA,TA,Y,0,48,0,0,60,0,NA,NA,NA,0,6,2009,WD,Partial,143000 +590,40,RM,50,9100,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,RRAn,Feedr,1Fam,1Story,5,6,1930,1960,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,742,742,GasA,TA,Y,FuseA,779,0,156,935,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1988,Unf,1,308,TA,TA,P,0,0,0,0,0,0,NA,NA,Shed,600,8,2008,WD,Normal,79500 +591,60,RL,64,8320,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,490,Unf,0,280,770,GasA,Ex,Y,SBrkr,770,812,0,1582,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2004,RFn,2,520,TA,TA,Y,0,45,0,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,185900 +592,60,RL,97,13478,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,1Fam,2Story,10,5,2008,2008,Gable,CompShg,CemntBd,CmentBd,Stone,420,Ex,TA,PConc,Ex,TA,Gd,GLQ,1338,Unf,0,384,1722,GasA,Ex,Y,SBrkr,1728,568,0,2296,1,0,2,1,3,1,Ex,10,Typ,1,Gd,BuiltIn,2008,RFn,3,842,TA,TA,Y,382,274,0,0,0,0,NA,NA,NA,0,6,2009,ConLI,Normal,451950 +593,20,RL,60,6600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,8,1982,2003,Gable,CompShg,HdBoard,HdBoard,None,0,TA,Gd,PConc,TA,Gd,No,GLQ,816,Unf,0,0,816,GasA,Ex,Y,SBrkr,816,0,0,816,1,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1985,Fin,2,816,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,138000 +594,120,RM,NA,4435,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,170,Gd,TA,PConc,Gd,TA,Av,GLQ,685,Unf,0,163,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,4,Typ,0,NA,Attchd,2003,Fin,2,420,TA,TA,Y,140,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,140000 +595,20,RL,88,7990,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,6,1975,1975,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,924,924,GasA,TA,Y,SBrkr,924,0,0,924,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1981,Unf,1,280,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,4,2008,WD,Normal,110000 +596,20,RL,69,11302,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,1Fam,1Story,8,5,2005,2006,Gable,CompShg,VinylSd,Other,BrkFace,238,Gd,TA,PConc,Gd,TA,Gd,GLQ,1422,Unf,0,392,1814,GasA,Ex,Y,SBrkr,1826,0,0,1826,1,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,2005,Fin,3,758,TA,TA,Y,180,75,0,0,120,0,NA,NA,NA,0,8,2006,New,Partial,319000 +597,70,RM,60,3600,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,6,7,1910,1993,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,684,684,GasA,Ex,N,FuseA,684,684,0,1368,0,0,1,0,3,1,TA,7,Typ,0,NA,Detchd,1930,Unf,1,216,TA,Fa,N,0,158,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,114504 +598,120,RL,53,3922,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,BrkFace,72,Gd,TA,PConc,Ex,TA,Av,Unf,0,Unf,0,1258,1258,GasA,Ex,Y,SBrkr,1402,0,0,1402,0,2,0,2,2,1,Gd,7,Typ,1,Gd,Attchd,2006,Fin,3,648,TA,TA,Y,120,16,0,0,0,0,NA,NA,NA,0,2,2007,New,Partial,194201 +599,20,RL,80,12984,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,5,6,1977,1977,Gable,CompShg,Plywood,Plywood,BrkFace,459,TA,TA,CBlock,Gd,TA,Mn,ALQ,1283,LwQ,147,0,1430,GasA,Ex,Y,SBrkr,1647,0,0,1647,1,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,1977,Fin,2,621,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,217500 +600,160,RM,24,1950,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blueste,Norm,Norm,Twnhs,2Story,6,6,1980,1980,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,Gd,TA,No,LwQ,81,GLQ,612,23,716,GasA,TA,Y,SBrkr,716,840,0,1556,1,0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1980,Fin,2,452,TA,TA,Y,161,0,0,0,0,0,NA,GdPrv,NA,0,7,2008,COD,Normal,151000 +601,60,RL,74,10927,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,8,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,280,Gd,TA,PConc,Gd,TA,Av,GLQ,546,Unf,0,512,1058,GasA,Ex,Y,SBrkr,1058,846,0,1904,1,0,2,1,3,1,Ex,8,Typ,1,Gd,BuiltIn,2003,Fin,2,736,TA,TA,Y,179,60,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,275000 +602,50,RM,50,9000,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,6,6,1937,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,PConc,TA,TA,No,Unf,0,Unf,0,780,780,GasA,TA,Y,SBrkr,780,595,0,1375,0,0,1,1,3,1,Gd,6,Typ,1,Gd,Detchd,1979,Unf,1,544,TA,TA,P,0,162,0,0,126,0,NA,NA,NA,0,12,2007,WD,Normal,141000 +603,60,RL,80,10041,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,8,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,Mn,GLQ,789,Unf,0,119,908,GasA,Ex,Y,SBrkr,927,988,0,1915,1,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1992,Fin,2,506,TA,TA,Y,120,150,0,0,0,0,NA,NA,NA,0,2,2006,WD,Abnorml,220000 +604,160,FV,30,3182,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,7,5,2004,2005,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,600,600,GasA,Ex,Y,SBrkr,600,600,0,1200,0,0,2,1,2,1,Gd,4,Typ,0,NA,Detchd,2004,RFn,2,480,TA,TA,Y,0,172,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,151000 +605,20,RL,88,12803,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,99,Gd,TA,PConc,Gd,TA,Mn,GLQ,922,Unf,0,572,1494,GasA,Ex,Y,SBrkr,1494,0,0,1494,1,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,2002,RFn,2,530,TA,TA,Y,192,36,0,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,221000 +606,60,RL,85,13600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,2Story,7,6,1965,1990,Gable,CompShg,HdBoard,HdBoard,BrkFace,176,TA,TA,CBlock,TA,TA,No,BLQ,454,Unf,0,314,768,GasA,TA,Y,SBrkr,1186,800,0,1986,0,0,2,1,3,1,TA,7,Typ,3,Fa,Attchd,1965,Unf,2,486,TA,TA,Y,0,42,0,0,189,0,NA,NA,NA,0,10,2009,WD,Normal,205000 +607,20,RL,82,12464,Pave,NA,IR2,Low,AllPub,Corner,Mod,CollgCr,Norm,Norm,1Fam,1Story,5,5,1996,1996,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,PConc,Gd,TA,No,GLQ,732,Unf,0,308,1040,GasA,Gd,Y,SBrkr,1040,0,0,1040,1,0,1,0,3,1,Gd,6,Typ,0,NA,Detchd,2000,Unf,2,576,TA,TA,Y,168,0,0,0,0,0,NA,GdPrv,NA,0,11,2009,WD,Normal,152000 +608,20,RL,78,7800,Pave,NA,Reg,Bnk,AllPub,Inside,Mod,Edwards,Norm,Norm,1Fam,2Story,5,8,1948,2002,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,Gd,No,GLQ,603,Unf,0,293,896,GasA,Ex,Y,SBrkr,1112,896,0,2008,1,0,3,0,3,1,Ex,8,Typ,0,NA,Attchd,1948,Unf,1,230,TA,TA,Y,103,0,0,0,0,0,NA,NA,NA,0,8,2006,WD,Normal,225000 +609,70,RL,78,12168,Pave,NA,Reg,HLS,AllPub,Inside,Mod,Crawfor,Norm,Norm,1Fam,2Story,8,6,1934,1998,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,PConc,Gd,TA,Mn,BLQ,428,Unf,0,537,965,GasA,TA,Y,SBrkr,1940,1254,0,3194,0,0,2,1,4,1,TA,10,Typ,2,Gd,Basment,1934,Unf,2,380,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,9,2007,WD,Alloca,359100 +610,20,RL,61,7943,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,4,5,1961,1961,Gable,CompShg,VinylSd,VinylSd,BrkCmn,192,TA,Fa,CBlock,TA,TA,Mn,Rec,903,Unf,0,126,1029,GasA,Gd,Y,SBrkr,1029,0,0,1029,1,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1961,Unf,1,261,TA,TA,Y,64,0,39,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,118500 +611,60,RL,NA,11050,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,PosN,Norm,1Fam,2Story,9,5,2000,2000,Hip,CompShg,VinylSd,VinylSd,BrkFace,204,Gd,TA,PConc,Ex,TA,Mn,GLQ,904,Unf,0,536,1440,GasA,Ex,Y,SBrkr,1476,677,0,2153,1,0,2,1,3,1,Ex,8,Typ,2,Ex,Attchd,2000,Fin,3,736,TA,TA,Y,253,142,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,313000 +612,80,RL,NA,10395,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,NWAmes,Norm,Norm,1Fam,SLvl,6,6,1978,1978,Gable,CompShg,HdBoard,HdBoard,BrkFace,233,TA,TA,CBlock,Gd,TA,Av,ALQ,605,Unf,0,427,1032,GasA,TA,Y,SBrkr,1032,0,0,1032,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1978,Unf,2,564,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,Shed,500,7,2007,WD,Normal,148000 +613,60,RL,NA,11885,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2001,2001,Gable,CompShg,VinylSd,VinylSd,BrkFace,108,Gd,TA,PConc,Gd,TA,Av,GLQ,990,Unf,0,309,1299,GasA,Ex,Y,SBrkr,1299,573,0,1872,1,0,2,1,3,1,Ex,7,Typ,1,TA,BuiltIn,2001,RFn,2,531,TA,TA,Y,160,122,0,0,0,0,NA,NA,NA,0,11,2009,WD,Normal,261500 +614,20,RL,70,8402,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Feedr,Norm,1Fam,1Story,5,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,ALQ,206,Unf,0,914,1120,GasA,Ex,Y,SBrkr,1120,0,0,1120,0,0,1,0,3,1,TA,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,30,0,0,0,0,NA,NA,NA,0,12,2007,New,Partial,147000 +615,180,RM,21,1491,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,SFoyer,4,6,1972,1972,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,Gd,TA,Av,LwQ,150,GLQ,480,0,630,GasA,Ex,Y,SBrkr,630,0,0,630,1,0,1,0,1,1,TA,3,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,96,24,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,75500 +616,85,RL,80,8800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,SFoyer,6,7,1963,1963,Gable,CompShg,MetalSd,MetalSd,BrkFace,156,TA,Gd,PConc,TA,TA,Gd,GLQ,763,Unf,0,173,936,GasA,Ex,Y,SBrkr,1054,0,0,1054,1,0,1,0,3,1,Gd,6,Typ,0,NA,Attchd,1963,RFn,2,480,TA,TA,Y,120,0,0,0,0,0,NA,MnPrv,NA,0,5,2010,WD,Abnorml,137500 +617,60,RL,NA,7861,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2002,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,457,Unf,0,326,783,GasA,Ex,Y,SBrkr,807,702,0,1509,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2002,Fin,2,393,TA,TA,Y,100,75,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,183200 +618,45,RL,59,7227,Pave,NA,Reg,HLS,AllPub,Corner,Mod,NAmes,Artery,Norm,1Fam,1.5Unf,6,6,1954,1954,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,832,832,GasA,Gd,Y,SBrkr,832,0,0,832,0,0,1,0,2,1,Gd,4,Typ,0,NA,Detchd,1962,Unf,2,528,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,105500 +619,20,RL,90,11694,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,9,5,2007,2007,Hip,CompShg,CemntBd,CmentBd,BrkFace,452,Ex,TA,PConc,Ex,TA,Av,GLQ,48,Unf,0,1774,1822,GasA,Ex,Y,SBrkr,1828,0,0,1828,0,0,2,0,3,1,Gd,9,Typ,1,Gd,Attchd,2007,Unf,3,774,TA,TA,Y,0,108,0,0,260,0,NA,NA,NA,0,7,2007,New,Partial,314813 +620,60,RL,85,12244,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,2Story,8,5,2003,2003,Hip,CompShg,VinylSd,VinylSd,Stone,226,Gd,TA,PConc,Gd,TA,Gd,GLQ,871,Unf,0,611,1482,GasA,Ex,Y,SBrkr,1482,780,0,2262,1,0,2,1,4,1,Gd,10,Typ,2,Gd,Attchd,2003,Fin,3,749,TA,TA,Y,168,0,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,305000 +621,30,RL,45,8248,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,3,3,1914,1950,Gable,CompShg,Stucco,Stucco,None,0,TA,TA,BrkTil,TA,TA,No,BLQ,41,Unf,0,823,864,GasA,TA,N,FuseF,864,0,0,864,1,0,1,0,2,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,100,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,67000 +622,60,RL,90,10800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,6,7,1974,1997,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,ALQ,956,Rec,182,384,1522,GasA,TA,Y,SBrkr,1548,1066,0,2614,0,0,2,1,4,1,TA,9,Typ,1,TA,Attchd,1974,RFn,2,624,TA,TA,Y,38,243,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,240000 +623,20,RL,71,7064,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,6,1977,1977,Gable,CompShg,Plywood,Plywood,BrkFace,153,TA,TA,CBlock,TA,TA,No,BLQ,560,Unf,0,420,980,GasA,TA,Y,SBrkr,980,0,0,980,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1986,Unf,2,484,TA,TA,Y,192,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,135000 +624,160,FV,NA,2117,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,6,5,2000,2000,Gable,CompShg,MetalSd,MetalSd,BrkFace,513,Gd,TA,PConc,Gd,TA,No,GLQ,420,Unf,0,336,756,GasA,Ex,Y,SBrkr,756,756,0,1512,0,0,2,1,2,1,Gd,4,Typ,1,TA,Detchd,2000,Unf,2,440,TA,TA,Y,0,32,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,168500 +625,60,RL,80,10400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,6,5,1972,1972,Gable,CompShg,VinylSd,VinylSd,None,288,TA,TA,CBlock,TA,TA,No,Rec,247,Unf,0,485,732,GasA,Gd,Y,SBrkr,1012,778,0,1790,1,0,1,2,4,1,TA,8,Min2,1,TA,Attchd,1972,RFn,2,484,TA,TA,Y,148,0,0,0,147,0,NA,NA,NA,0,11,2006,WD,Normal,165150 +626,20,RL,87,10000,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1962,1962,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,261,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1116,1116,GasA,TA,Y,SBrkr,1116,0,0,1116,0,0,1,1,3,1,TA,5,Typ,0,NA,Attchd,1962,Unf,2,440,TA,TA,Y,0,0,0,0,385,0,NA,NA,NA,0,2,2010,WD,Normal,160000 +627,20,RL,NA,12342,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1960,1978,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,978,978,GasA,TA,Y,SBrkr,1422,0,0,1422,0,0,1,0,3,1,TA,6,Min1,1,TA,Attchd,1960,RFn,1,286,TA,TA,Y,0,0,36,0,0,0,NA,GdWo,Shed,600,8,2007,WD,Normal,139900 +628,80,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,6,6,1955,1972,Gable,CompShg,AsbShng,AsbShng,BrkFace,164,TA,TA,CBlock,TA,TA,Av,BLQ,674,LwQ,132,350,1156,GasA,Ex,Y,SBrkr,1520,0,0,1520,1,0,1,0,3,1,TA,7,Typ,2,Gd,Basment,1955,RFn,1,364,TA,TA,Y,0,0,189,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,153000 +629,60,RL,70,11606,Pave,NA,IR1,HLS,AllPub,Inside,Sev,NAmes,Norm,Norm,1Fam,2Story,5,5,1969,1969,Gable,CompShg,Plywood,Plywood,BrkFace,192,TA,TA,PConc,Gd,TA,Av,Rec,650,Unf,0,390,1040,GasA,TA,Y,SBrkr,1040,1040,0,2080,0,1,1,2,5,1,Fa,9,Typ,2,TA,Attchd,1969,Unf,2,504,TA,TA,Y,335,0,0,0,0,0,NA,NA,NA,0,9,2007,WD,Family,135000 +630,80,RL,82,9020,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Feedr,Norm,1Fam,SLvl,6,5,1964,1964,Gable,WdShngl,Plywood,Wd Sdng,BrkFace,259,TA,TA,CBlock,TA,TA,Gd,GLQ,624,Rec,336,288,1248,GasA,TA,Y,SBrkr,1350,0,0,1350,1,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1964,RFn,2,520,TA,TA,Y,176,0,0,0,0,0,NA,GdPrv,NA,0,6,2008,WD,Normal,168500 +631,70,RM,50,9000,Pave,Grvl,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Artery,Norm,1Fam,2Story,5,6,1880,1991,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,BrkTil,Fa,Fa,No,Unf,0,Unf,0,636,636,GasA,TA,Y,FuseA,1089,661,0,1750,0,0,1,0,3,1,Ex,8,Typ,0,NA,Detchd,1937,Unf,1,240,Fa,Po,N,0,0,293,0,0,0,NA,MnPrv,NA,0,6,2006,WD,Abnorml,124000 +632,120,RL,34,4590,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,Twnhs,1Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,108,Gd,TA,PConc,Gd,Gd,Mn,GLQ,24,Unf,0,1530,1554,GasA,Ex,Y,SBrkr,1554,0,0,1554,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2006,RFn,2,627,TA,TA,Y,156,73,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,209500 +633,20,RL,85,11900,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,7,5,1977,1977,Hip,CompShg,Plywood,Plywood,BrkFace,209,TA,Gd,CBlock,TA,TA,No,ALQ,822,Unf,0,564,1386,GasA,TA,Y,SBrkr,1411,0,0,1411,0,0,2,0,3,1,TA,6,Typ,1,TA,Attchd,1977,Fin,2,544,TA,TA,Y,192,0,0,0,0,0,NA,NA,NA,0,4,2009,WD,Family,82500 +634,20,RL,80,9250,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1954,2005,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,480,LwQ,468,108,1056,GasA,TA,Y,SBrkr,1056,0,0,1056,0,1,1,0,3,1,TA,6,Typ,0,NA,Attchd,1954,Unf,1,260,TA,TA,Y,390,0,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,139400 +635,90,RL,64,6979,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,Duplex,SFoyer,6,5,1980,1980,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,GLQ,1056,Unf,0,0,1056,GasA,Gd,Y,SBrkr,1056,0,0,1056,2,0,0,0,0,2,TA,4,Typ,0,NA,Detchd,1980,Unf,2,576,TA,TA,Y,264,56,0,0,0,0,NA,GdPrv,Shed,600,6,2010,WD,Normal,144000 +636,190,RH,60,10896,Pave,Pave,Reg,Bnk,AllPub,Inside,Gtl,SWISU,Feedr,Norm,2fmCon,2.5Fin,6,7,1914,1995,Hip,CompShg,VinylSd,VinylSd,None,0,Fa,TA,CBlock,TA,Fa,No,LwQ,256,Unf,0,1184,1440,GasA,Ex,Y,FuseA,1440,1440,515,3395,0,0,2,0,8,2,Fa,14,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,110,0,0,0,0,NA,NA,NA,0,3,2007,WD,Abnorml,200000 +637,30,RM,51,6120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,2,3,1936,1950,Gable,CompShg,AsbShng,AsbShng,None,0,Fa,Fa,BrkTil,TA,Fa,No,Unf,0,Unf,0,264,264,Grav,Fa,N,FuseA,800,0,0,800,0,0,1,0,1,1,Fa,4,Maj1,1,Po,NA,NA,NA,0,0,NA,NA,N,0,0,0,0,0,0,NA,NA,NA,0,1,2009,ConLw,Normal,60000 +638,190,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,1.5Fin,5,4,1954,1954,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,811,811,GasA,TA,Y,FuseA,811,576,0,1387,0,0,2,0,3,2,Gd,7,Typ,0,NA,BuiltIn,1954,Unf,1,256,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2009,WD,Normal,93000 +639,30,RL,67,8777,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Feedr,Norm,1Fam,1Story,5,7,1910,1950,Gable,CompShg,MetalSd,Wd Sdng,None,0,TA,TA,CBlock,Fa,TA,No,Unf,0,Unf,0,796,796,GasA,Gd,Y,FuseA,796,0,0,796,0,0,1,0,2,1,TA,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,P,328,0,164,0,0,0,NA,MnPrv,NA,0,5,2008,WD,Normal,85000 +640,120,RL,53,3982,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,8,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,Gd,Av,GLQ,1154,Unf,0,366,1520,GasA,Ex,Y,SBrkr,1567,0,0,1567,1,0,2,0,1,1,Ex,7,Typ,1,Gd,Attchd,2006,Fin,3,648,TA,TA,Y,312,0,0,0,0,0,NA,NA,NA,0,10,2006,New,Partial,264561 +641,120,RL,62,12677,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,8,5,2003,2004,Hip,CompShg,MetalSd,MetalSd,BrkFace,472,Ex,TA,PConc,Ex,TA,Gd,GLQ,1218,Unf,0,300,1518,GasA,Ex,Y,SBrkr,1518,0,0,1518,0,0,1,1,1,1,Ex,6,Typ,1,Gd,Attchd,2003,RFn,2,588,TA,TA,Y,185,140,0,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,274000 +642,60,FV,NA,7050,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2001,2001,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,738,Unf,0,319,1057,GasA,Ex,Y,SBrkr,1057,872,0,1929,1,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,2001,Fin,2,650,TA,TA,Y,0,235,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,226000 +643,80,RL,75,13860,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,8,7,1972,1995,Gable,CompShg,Plywood,Wd Sdng,None,0,Gd,TA,CBlock,Gd,TA,Gd,GLQ,1410,Unf,0,542,1952,GasA,Gd,Y,SBrkr,2000,704,0,2704,1,0,2,1,4,1,Ex,9,Typ,3,TA,Attchd,1972,Fin,2,538,TA,TA,Y,269,111,0,0,0,0,NA,MnPrv,NA,0,7,2009,WD,Normal,345000 +644,60,RL,80,10793,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,RRAn,Norm,1Fam,2Story,5,5,1969,1969,Mansard,CompShg,WdShing,HdBoard,BrkFace,263,TA,TA,CBlock,TA,TA,No,Rec,493,BLQ,287,0,780,GasA,Ex,Y,SBrkr,780,840,0,1620,0,0,2,1,4,1,TA,7,Min1,0,NA,Attchd,1969,Fin,2,462,TA,TA,Y,208,0,0,0,0,0,NA,GdWo,NA,0,4,2007,WD,Normal,152000 +645,20,FV,85,9187,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,9,5,2009,2009,Gable,CompShg,CemntBd,CmentBd,Stone,162,Ex,TA,PConc,Ex,TA,Mn,GLQ,1121,Unf,0,645,1766,GasA,Ex,Y,SBrkr,1766,0,0,1766,1,0,2,1,2,1,Ex,7,Typ,1,Gd,Attchd,2009,Fin,3,478,TA,TA,Y,195,130,0,0,0,0,NA,NA,NA,0,10,2009,New,Partial,370878 +646,20,RL,NA,10530,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1971,1971,Hip,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,282,LwQ,35,664,981,GasA,TA,Y,SBrkr,981,0,0,981,1,0,1,1,3,1,TA,5,Typ,0,NA,Detchd,1979,Unf,2,576,TA,TA,Y,0,312,40,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,143250 +647,20,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1950,1950,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,NA,NA,NA,NA,0,NA,0,0,0,GasA,Gd,Y,SBrkr,1048,0,0,1048,0,0,1,0,3,1,TA,7,Min1,0,NA,Detchd,1950,Unf,2,420,TA,TA,Y,0,27,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,98300 +648,20,RL,85,10452,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,6,5,1953,1953,Hip,CompShg,Wd Sdng,Wd Sdng,Stone,216,TA,TA,CBlock,TA,TA,Mn,Rec,500,Unf,0,594,1094,GasA,Ex,Y,SBrkr,1094,0,0,1094,0,0,1,0,3,1,TA,5,Typ,2,Gd,Attchd,1953,RFn,2,495,TA,TA,Y,0,0,0,0,287,0,NA,NA,NA,0,6,2008,WD,Normal,155000 +649,60,RL,70,7700,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,PosN,Norm,1Fam,2Story,6,5,1966,1966,Gable,CompShg,MetalSd,MetalSd,BrkFace,351,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,756,756,GasA,TA,Y,SBrkr,1051,788,0,1839,0,0,1,1,4,1,TA,7,Typ,1,TA,Attchd,1966,Unf,2,442,TA,TA,Y,0,124,216,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,155000 +650,180,RM,21,1936,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,SFoyer,4,6,1970,1970,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,Gd,TA,Av,BLQ,131,GLQ,499,0,630,GasA,Gd,Y,SBrkr,630,0,0,630,1,0,1,0,1,1,TA,3,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,12,2007,WD,Normal,84500 +651,60,FV,65,8125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,6,2007,2007,Gable,CompShg,CemntBd,CmentBd,NA,NA,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,813,813,GasA,Ex,Y,SBrkr,822,843,0,1665,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2007,RFn,2,562,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,205950 +652,70,RL,60,9084,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Artery,Norm,1Fam,2Story,4,5,1940,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,Mn,Unf,0,Unf,0,755,755,GasA,TA,Y,SBrkr,755,755,0,1510,1,0,1,0,4,1,TA,7,Typ,1,Gd,Detchd,1940,Unf,1,296,Fa,Po,P,120,0,0,0,0,0,NA,MnPrv,NA,0,10,2009,WD,Normal,108000 +653,60,RL,70,8750,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,1996,1996,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,880,880,GasA,Ex,Y,SBrkr,909,807,0,1716,0,0,2,1,2,1,Gd,7,Typ,1,TA,Attchd,1996,RFn,2,512,TA,TA,Y,0,120,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,191000 +654,50,RM,60,10320,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,6,7,1906,1995,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,756,756,GasA,Ex,Y,SBrkr,756,713,0,1469,0,0,1,0,3,1,TA,7,Typ,0,NA,Detchd,1906,Unf,1,216,TA,TA,Y,57,0,239,0,0,0,NA,MnPrv,NA,0,6,2008,WD,Normal,135000 +655,20,RL,91,10437,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,1Story,8,6,1995,1995,Hip,CompShg,MetalSd,MetalSd,BrkFace,660,Gd,Gd,PConc,Gd,TA,Gd,GLQ,1696,Unf,0,413,2109,GasA,Ex,Y,SBrkr,2113,0,0,2113,1,0,2,1,2,1,Gd,7,Typ,1,TA,Attchd,1995,Fin,3,839,TA,TA,Y,236,46,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,350000 +656,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,5,1971,1971,Gable,CompShg,HdBoard,ImStucc,BrkFace,381,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,525,525,GasA,TA,Y,SBrkr,525,567,0,1092,0,0,1,1,3,1,TA,6,Typ,0,NA,Detchd,1971,Unf,1,264,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2010,WD,Family,88000 +657,20,RL,72,10007,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1959,2006,Gable,CompShg,HdBoard,HdBoard,BrkFace,54,Gd,TA,CBlock,TA,TA,No,ALQ,806,Unf,0,247,1053,GasA,Ex,Y,SBrkr,1053,0,0,1053,1,0,1,1,3,1,Gd,5,Typ,0,NA,Attchd,1959,RFn,1,312,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,8,2008,WD,Normal,145500 +658,70,RL,60,7200,Pave,NA,Reg,HLS,AllPub,Inside,Mod,Crawfor,Norm,Norm,1Fam,2Story,7,6,1931,2000,Gable,CompShg,Stucco,Wd Shng,None,0,TA,Fa,BrkTil,Gd,TA,No,Unf,0,Unf,0,776,776,GasA,TA,Y,SBrkr,851,651,0,1502,0,0,1,1,3,1,TA,6,Typ,1,Gd,Attchd,1931,RFn,1,270,TA,TA,P,0,0,112,0,0,0,NA,MnPrv,NA,0,2,2008,WD,Normal,149000 +659,50,RL,78,17503,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Artery,Norm,1Fam,1.5Fin,6,5,1948,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,912,912,GasA,TA,Y,SBrkr,912,546,0,1458,0,1,1,0,3,1,TA,6,Typ,1,Gd,Attchd,1948,Unf,1,330,TA,TA,Y,192,0,0,0,0,0,NA,NA,NA,0,1,2010,WD,Abnorml,97500 +660,20,RL,75,9937,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,7,1964,1999,Hip,CompShg,MetalSd,MetalSd,None,0,TA,Gd,PConc,TA,TA,No,BLQ,637,Unf,0,849,1486,GasA,Ex,Y,SBrkr,1486,0,0,1486,1,0,1,0,3,1,TA,7,Typ,0,NA,Detchd,1968,Fin,2,480,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,3,2009,WD,Normal,167000 +661,60,RL,NA,12384,Pave,NA,Reg,Lvl,AllPub,CulDSac,Gtl,NWAmes,Norm,Norm,1Fam,2Story,7,7,1976,1976,Gable,CompShg,Plywood,Plywood,BrkFace,233,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,793,793,GasA,TA,Y,SBrkr,1142,793,0,1935,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1976,RFn,2,550,TA,TA,Y,0,113,252,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,197900 +662,60,RL,52,46589,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,7,1994,2005,Hip,CompShg,VinylSd,VinylSd,BrkFace,528,Gd,TA,PConc,Gd,Gd,No,GLQ,1361,Rec,180,88,1629,GasA,Ex,Y,SBrkr,1686,762,0,2448,1,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1994,RFn,3,711,TA,TA,Y,517,76,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,402000 +663,20,RL,120,13560,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,3,1968,1968,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,216,TA,TA,CBlock,Fa,Fa,No,Unf,0,Unf,0,1392,1392,GasA,Gd,Y,SBrkr,1392,0,0,1392,1,0,1,0,2,1,TA,5,Maj2,2,TA,Attchd,1968,RFn,2,576,TA,TA,Y,0,0,240,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,110000 +664,85,RL,90,10012,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,SFoyer,4,5,1972,1972,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,Av,BLQ,920,Rec,180,38,1138,GasA,TA,Y,SBrkr,1181,0,0,1181,1,0,2,0,3,1,TA,6,Typ,0,NA,Detchd,1974,RFn,2,588,TA,TA,Y,0,0,180,0,0,0,NA,MnPrv,NA,0,4,2008,WD,Normal,137500 +665,20,RL,49,20896,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,Somerst,RRAn,Norm,1Fam,1Story,8,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,TA,Mn,GLQ,1721,Unf,0,356,2077,GasA,Ex,Y,SBrkr,2097,0,0,2097,1,0,1,1,1,1,Ex,8,Typ,1,Ex,Attchd,2005,Fin,3,1134,TA,TA,Y,192,267,0,0,0,0,NA,NA,NA,0,1,2006,New,Partial,423000 +666,60,RL,106,11194,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,40,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1406,1406,GasA,Ex,Y,SBrkr,1454,482,0,1936,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,2000,RFn,2,504,TA,TA,Y,188,124,0,0,0,0,NA,NA,NA,0,11,2006,WD,Normal,230500 +667,60,RL,NA,18450,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,2Story,6,5,1965,1979,Flat,Tar&Grv,Plywood,Plywood,BrkCmn,113,TA,Gd,CBlock,Gd,TA,No,LwQ,187,Rec,723,111,1021,GasA,TA,Y,SBrkr,1465,915,0,2380,0,0,2,1,3,1,TA,7,Sev,1,Po,CarPort,1965,Unf,2,596,TA,TA,Y,0,265,0,0,0,0,NA,NA,NA,0,8,2007,WD,Abnorml,129000 +668,20,RL,65,8125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,6,5,1994,1998,Gable,CompShg,HdBoard,HdBoard,BrkFace,258,TA,TA,PConc,Gd,TA,No,GLQ,1138,Unf,0,270,1408,GasA,Ex,Y,SBrkr,1679,0,0,1679,1,0,2,0,3,1,Gd,7,Typ,1,Fa,Attchd,1994,RFn,2,575,TA,TA,Y,224,42,0,0,0,0,NA,NA,NA,0,10,2008,WD,Normal,193500 +669,20,RL,NA,14175,Pave,NA,Reg,Bnk,AllPub,Corner,Mod,Sawyer,Norm,Norm,1Fam,1Story,5,6,1956,1987,Gable,CompShg,CemntBd,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,988,Unf,0,200,1188,GasA,Gd,Y,SBrkr,1437,0,0,1437,1,0,1,1,3,1,TA,6,Min2,1,TA,Detchd,1999,Unf,2,576,TA,TA,Y,304,0,0,0,0,0,NA,NA,NA,0,11,2006,WD,Normal,168000 +670,30,RL,80,11600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,4,5,1922,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,Fa,TA,No,Unf,0,Unf,0,700,700,GasA,Ex,Y,SBrkr,1180,0,0,1180,0,0,1,0,2,1,Fa,5,Typ,1,Gd,Detchd,1922,Unf,1,252,TA,Fa,Y,0,0,67,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,137500 +671,60,RL,64,8633,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,CollgCr,Norm,Norm,1Fam,2Story,6,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,193,Unf,0,545,738,GasA,Ex,Y,SBrkr,738,738,0,1476,1,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2005,Fin,2,540,TA,TA,Y,100,35,0,0,0,0,NA,NA,NA,0,2,2009,WD,Normal,173500 +672,70,RH,54,6629,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Artery,Norm,1Fam,2Story,6,6,1925,1950,Gambrel,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,BrkTil,TA,TA,No,BLQ,551,Unf,0,121,672,GasA,TA,N,SBrkr,697,672,0,1369,1,0,2,0,3,1,TA,6,Typ,0,NA,Detchd,1930,Unf,1,300,TA,TA,Y,147,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,103600 +673,20,RL,NA,11250,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Veenker,Norm,Norm,1Fam,1Story,6,6,1977,1977,Gable,CompShg,Plywood,Plywood,None,0,Gd,TA,CBlock,Gd,TA,No,ALQ,767,Unf,0,441,1208,GasA,TA,Y,SBrkr,1208,0,0,1208,1,0,1,1,3,1,TA,6,Typ,1,TA,Attchd,1977,RFn,2,546,TA,TA,Y,198,42,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,165000 +674,20,RL,110,14442,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,6,7,1957,2004,Hip,CompShg,CemntBd,CmentBd,BrkFace,106,TA,TA,PConc,TA,TA,No,GLQ,1186,Unf,0,291,1477,GasA,Ex,Y,SBrkr,1839,0,0,1839,1,0,2,0,3,1,Gd,7,Typ,2,TA,Attchd,1957,Fin,2,416,TA,TA,Y,0,87,0,0,200,0,NA,NA,NA,0,6,2007,WD,Normal,257500 +675,20,RL,80,9200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1965,1965,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,Rec,892,Unf,0,244,1136,GasA,TA,Y,SBrkr,1136,0,0,1136,1,0,1,0,3,1,TA,5,Typ,1,Gd,Attchd,1965,RFn,1,384,TA,TA,Y,426,0,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,140000 +676,160,RL,24,2289,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NPkVill,Norm,Norm,Twnhs,2Story,6,6,1978,1978,Gable,CompShg,Plywood,Brk Cmn,None,0,TA,TA,CBlock,TA,TA,No,ALQ,311,Unf,0,544,855,GasA,TA,Y,SBrkr,855,586,0,1441,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1978,Unf,2,440,TA,TA,Y,28,0,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,148500 +677,70,RM,60,9600,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,4,2,1900,1950,Gable,CompShg,AsbShng,Stucco,None,0,TA,TA,BrkTil,TA,Fa,No,Unf,0,Unf,0,1095,1095,GasW,Fa,N,SBrkr,1095,679,0,1774,1,0,2,0,4,2,TA,8,Min2,0,NA,2Types,1920,Unf,3,779,Fa,Fa,N,0,0,90,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,87000 +678,30,RL,52,9022,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,8,1924,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,768,768,GasA,Ex,Y,SBrkr,792,0,0,792,0,0,1,0,2,1,Gd,5,Typ,0,NA,Detchd,1924,Unf,1,240,Fa,Fa,N,316,0,120,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,109500 +679,20,RL,80,11844,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,1Fam,1Story,8,5,2008,2008,Hip,CompShg,VinylSd,VinylSd,Stone,464,Gd,TA,PConc,Ex,TA,Mn,Unf,0,Unf,0,2046,2046,GasA,Ex,Y,SBrkr,2046,0,0,2046,0,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2008,Fin,3,834,TA,TA,Y,322,82,0,0,0,0,NA,NA,NA,0,7,2009,New,Partial,372500 +680,20,RL,NA,9945,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1961,1961,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,57,TA,TA,CBlock,TA,TA,No,Rec,827,Unf,0,161,988,GasA,TA,Y,SBrkr,988,0,0,988,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1963,Unf,2,572,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2007,WD,Normal,128500 +681,120,RL,50,8012,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,TwnhsE,1Story,6,5,1980,1980,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,BLQ,543,BLQ,119,261,923,GasA,TA,Y,SBrkr,923,0,0,923,0,0,2,0,2,1,TA,5,Typ,1,TA,Attchd,1980,RFn,1,264,TA,TA,Y,80,0,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,143000 +682,50,RH,55,4500,Pave,Pave,IR2,Bnk,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,5,5,1932,2000,Gable,CompShg,VinylSd,Stucco,None,0,TA,TA,BrkTil,TA,TA,No,Rec,182,Unf,0,611,793,GasA,Ex,Y,SBrkr,848,672,0,1520,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1968,Unf,1,281,TA,TA,Y,0,0,56,0,0,0,NA,NA,NA,0,7,2009,WD,Abnorml,159434 +683,120,RL,NA,2887,Pave,NA,Reg,HLS,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,1Story,6,5,1996,1997,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,Gd,TA,Mn,GLQ,1003,Unf,0,288,1291,GasA,Ex,Y,SBrkr,1291,0,0,1291,1,0,1,0,2,1,Gd,6,Typ,1,Gd,Attchd,1996,Unf,2,431,TA,TA,Y,307,0,0,0,0,0,NA,NA,NA,0,11,2008,WD,Normal,173000 +684,20,RL,90,11248,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,9,5,2002,2002,Hip,CompShg,VinylSd,VinylSd,Stone,215,Gd,TA,PConc,Gd,TA,Av,GLQ,1059,Unf,0,567,1626,GasA,Ex,Y,SBrkr,1668,0,0,1668,1,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,2002,Fin,3,702,TA,TA,Y,257,45,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,285000 +685,60,RL,58,16770,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,30,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1195,1195,GasA,Gd,Y,SBrkr,1195,644,0,1839,0,0,2,1,4,1,TA,7,Typ,0,NA,Attchd,1998,Fin,2,486,TA,TA,Y,0,81,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,221000 +686,160,RL,NA,5062,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,StoneBr,Norm,Norm,TwnhsE,2Story,7,5,1984,1984,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,CBlock,Gd,TA,Mn,GLQ,828,LwQ,182,180,1190,GasA,Gd,Y,SBrkr,1190,900,0,2090,1,0,2,0,3,1,Gd,6,Min1,1,TA,Attchd,1984,Fin,2,577,TA,TA,Y,219,0,0,0,0,0,NA,NA,NA,0,9,2007,WD,Normal,207500 +687,60,FV,84,10207,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,6,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,874,874,GasA,Ex,Y,SBrkr,874,887,0,1761,0,0,3,0,3,1,Gd,7,Typ,0,NA,Attchd,2007,Fin,2,578,TA,TA,Y,144,105,0,0,0,0,NA,NA,NA,0,8,2007,New,Partial,227875 +688,160,FV,NA,5105,Pave,NA,IR2,Lvl,AllPub,FR2,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,7,5,2004,2004,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,239,Unf,0,312,551,GasA,Ex,Y,SBrkr,551,551,0,1102,0,0,2,1,2,1,Gd,4,Typ,0,NA,Detchd,2004,Unf,2,480,TA,TA,Y,0,60,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,148800 +689,20,RL,60,8089,Pave,NA,Reg,HLS,AllPub,Inside,Gtl,StoneBr,Norm,Norm,1Fam,1Story,8,6,2007,2007,Gable,CompShg,MetalSd,MetalSd,BrkFace,0,Gd,TA,PConc,Gd,TA,Av,GLQ,945,Unf,0,474,1419,GasA,Ex,Y,SBrkr,1419,0,0,1419,1,0,2,0,2,1,Gd,7,Typ,1,Gd,Attchd,2007,RFn,2,567,TA,TA,Y,140,0,0,0,0,0,NA,NA,NA,0,10,2007,New,Partial,392000 +690,120,RL,61,7577,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,6,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,Stone,256,Gd,TA,PConc,Gd,TA,Av,ALQ,20,Unf,0,1342,1362,GasA,Ex,Y,SBrkr,1362,0,0,1362,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2005,RFn,2,460,TA,TA,Y,192,28,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,194700 +691,120,RM,NA,4426,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,147,Gd,TA,PConc,Gd,TA,Gd,GLQ,697,Unf,0,151,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,3,Typ,1,TA,Attchd,2004,RFn,2,420,TA,TA,Y,149,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,141000 +692,60,RL,104,21535,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,2Story,10,6,1994,1995,Gable,WdShngl,HdBoard,HdBoard,BrkFace,1170,Ex,TA,PConc,Ex,TA,Gd,GLQ,1455,Unf,0,989,2444,GasA,Ex,Y,SBrkr,2444,1872,0,4316,0,1,3,1,4,1,Ex,10,Typ,2,Ex,Attchd,1994,Fin,3,832,TA,TA,Y,382,50,0,0,0,0,NA,NA,NA,0,1,2007,WD,Normal,755000 +693,60,RL,42,26178,Pave,NA,IR1,Lvl,AllPub,Inside,Mod,Timber,Norm,Norm,1Fam,2Story,7,5,1989,1990,Hip,CompShg,MetalSd,MetalSd,BrkFace,293,Gd,TA,PConc,Gd,TA,Gd,GLQ,965,Unf,0,245,1210,GasA,Ex,Y,SBrkr,1238,1281,0,2519,1,0,2,1,4,1,Gd,9,Typ,2,Gd,Attchd,1989,RFn,2,628,TA,TA,Y,320,27,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,335000 +694,30,RL,60,5400,Pave,NA,Reg,Lvl,AllPub,Corner,Sev,OldTown,Norm,Norm,1Fam,1Story,5,6,1921,1968,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,1073,1073,GasA,Ex,Y,SBrkr,1073,0,0,1073,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1968,Unf,1,326,TA,TA,Y,0,0,112,0,0,0,NA,NA,NA,0,12,2006,WD,Abnorml,108480 +695,50,RM,51,6120,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,5,6,1936,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Fa,BrkTil,TA,TA,No,Unf,0,Unf,0,927,927,GasA,TA,Y,SBrkr,1067,472,0,1539,0,0,1,1,3,1,TA,5,Typ,0,NA,Detchd,1995,Unf,2,576,TA,TA,Y,112,0,0,0,0,0,NA,MnPrv,NA,0,4,2009,WD,Normal,141500 +696,20,RL,54,13811,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,6,6,1987,1987,Gable,CompShg,HdBoard,HdBoard,BrkFace,72,TA,TA,CBlock,Gd,Gd,No,GLQ,980,LwQ,40,92,1112,GasA,Gd,Y,SBrkr,1137,0,0,1137,1,0,2,0,2,1,Gd,5,Typ,1,TA,Attchd,1987,Unf,2,551,TA,TA,Y,125,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,176000 +697,30,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,5,7,1921,1950,Gable,CompShg,Wd Sdng,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,LwQ,616,Unf,0,0,616,GasA,Gd,Y,SBrkr,616,0,0,616,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1921,Unf,1,205,TA,TA,Y,0,0,129,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,89000 +698,20,RL,57,6420,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,7,1952,1952,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,Ex,Gd,Mn,LwQ,210,ALQ,551,219,980,GasA,Fa,Y,FuseA,1148,0,0,1148,0,1,1,0,2,1,TA,6,Typ,0,NA,Detchd,1952,Unf,1,308,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,9,2006,WD,Normal,123500 +699,20,RL,65,8450,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,RRAe,Norm,1Fam,1Story,5,8,1965,2009,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,GLQ,553,BLQ,117,224,894,GasA,Ex,Y,SBrkr,894,0,0,894,1,0,1,0,3,1,TA,5,Typ,1,Gd,Detchd,1973,Unf,1,336,TA,TA,Y,416,144,0,0,0,0,NA,MnPrv,NA,0,4,2010,WD,Normal,138500 +700,120,FV,59,4282,Pave,Pave,IR2,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,1Story,7,5,2004,2004,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,Mn,GLQ,16,Unf,0,1375,1391,GasA,Ex,Y,SBrkr,1391,0,0,1391,0,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,2004,RFn,2,530,TA,TA,Y,156,158,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,196000 +701,20,RL,85,14331,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2002,2002,Hip,CompShg,VinylSd,VinylSd,BrkFace,630,Gd,TA,PConc,Ex,TA,Gd,GLQ,1274,Unf,0,526,1800,GasA,Ex,Y,SBrkr,1800,0,0,1800,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2002,Fin,3,765,TA,TA,Y,270,78,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,312500 +702,20,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,7,5,1969,1969,Hip,CompShg,HdBoard,HdBoard,BrkFace,168,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1164,1164,GasA,TA,Y,SBrkr,1164,0,0,1164,0,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1969,Unf,2,528,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,COD,Normal,140000 +703,60,RL,82,12438,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,1Fam,2Story,8,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,466,Ex,TA,PConc,Ex,Gd,No,Unf,0,Unf,0,1234,1234,GasA,Ex,Y,SBrkr,1264,1312,0,2576,0,0,2,1,4,1,Ex,10,Typ,1,Gd,BuiltIn,2006,Fin,3,666,TA,TA,Y,324,100,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,361919 +704,190,RM,76,7630,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Feedr,Norm,2fmCon,2Story,5,9,1900,1996,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,BrkTil,Gd,TA,No,Unf,0,Unf,0,360,360,GasA,Gd,Y,SBrkr,1032,780,0,1812,0,0,2,0,4,2,Gd,8,Typ,1,Po,Detchd,1999,Unf,2,672,TA,TA,N,344,0,40,0,0,0,NA,MnPrv,NA,0,5,2010,WD,Normal,140000 +705,20,RL,70,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,109,Gd,TA,PConc,Gd,TA,Av,GLQ,712,Unf,0,761,1473,GasA,Ex,Y,SBrkr,1484,0,0,1484,1,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2004,RFn,2,606,TA,TA,Y,0,35,0,144,0,0,NA,NA,NA,0,5,2010,WD,Normal,213000 +706,190,RM,70,5600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,2fmCon,2Story,4,5,1930,1950,Hip,CompShg,VinylSd,Wd Shng,None,0,Fa,Fa,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,Fa,N,SBrkr,372,720,0,1092,0,0,2,0,3,2,Fa,7,Mod,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,0,0,0,0,NA,NA,Othr,3500,7,2010,WD,Normal,55000 +707,20,RL,NA,115149,Pave,NA,IR2,Low,AllPub,CulDSac,Sev,ClearCr,Norm,Norm,1Fam,1Story,7,5,1971,2002,Gable,CompShg,Plywood,Plywood,Stone,351,TA,TA,CBlock,Gd,TA,Gd,GLQ,1219,Unf,0,424,1643,GasA,TA,Y,SBrkr,1824,0,0,1824,1,0,2,0,2,1,Gd,5,Typ,2,TA,Attchd,1971,Unf,2,739,TA,TA,Y,380,48,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,302000 +708,120,RL,48,6240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,8,5,2006,2006,Hip,CompShg,MetalSd,MetalSd,BrkFace,176,Gd,TA,PConc,Gd,TA,No,GLQ,863,Unf,0,461,1324,GasA,Ex,Y,SBrkr,1324,0,0,1324,1,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2006,Fin,2,550,TA,TA,Y,192,38,0,0,0,0,NA,NA,NA,0,12,2009,WD,Normal,254000 +709,60,RL,65,9018,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2007,2007,Hip,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,728,728,GasA,Ex,Y,SBrkr,728,728,0,1456,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2007,Fin,2,400,TA,TA,Y,100,24,0,0,0,0,NA,NA,NA,0,7,2007,New,Partial,179540 +710,20,RL,NA,7162,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,7,1966,1966,Gable,CompShg,HdBoard,HdBoard,BrkCmn,41,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,876,876,GasA,TA,Y,SBrkr,904,0,0,904,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1966,Unf,1,408,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,12,2008,WD,Abnorml,109900 +711,30,RL,56,4130,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,3,6,1935,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,CBlock,TA,TA,No,Unf,0,Unf,0,270,270,GasA,Gd,Y,SBrkr,729,0,0,729,0,0,1,0,2,1,TA,5,Maj2,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,52000 +712,50,C (all),66,8712,Pave,Pave,Reg,HLS,AllPub,Inside,Mod,IDOTRR,Norm,Norm,1Fam,1.5Fin,4,7,1900,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,Stone,TA,TA,Mn,Unf,0,Unf,0,859,859,GasA,Gd,Y,SBrkr,859,319,0,1178,0,0,1,0,2,1,TA,7,Typ,0,NA,Detchd,1964,RFn,1,384,TA,TA,N,68,0,98,0,0,0,NA,NA,NA,0,1,2010,WD,Abnorml,102776 +713,120,RL,40,4671,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1988,1989,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,Mn,GLQ,767,Unf,0,461,1228,GasA,Gd,Y,SBrkr,1228,0,0,1228,1,0,2,0,2,1,Gd,5,Typ,1,Gd,Attchd,1988,Fin,2,472,TA,TA,Y,168,120,0,0,0,0,NA,NA,NA,0,10,2008,WD,Normal,189000 +714,190,RL,60,9873,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,RRAn,Norm,2fmCon,1Story,4,5,1970,1970,Gable,CompShg,HdBoard,HdBoard,BrkFace,160,TA,TA,CBlock,TA,TA,Av,ALQ,789,Unf,0,171,960,GasW,TA,N,SBrkr,960,0,0,960,1,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1970,Unf,2,576,TA,TA,Y,0,288,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,129000 +715,60,RL,NA,13517,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,RRAe,Norm,1Fam,2Story,6,8,1976,2005,Gable,CompShg,HdBoard,Plywood,BrkFace,289,Gd,TA,CBlock,TA,TA,No,GLQ,533,Unf,0,192,725,GasA,Ex,Y,SBrkr,725,754,0,1479,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,1976,RFn,2,475,TA,TA,Y,0,44,0,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,130500 +716,20,RL,78,10140,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,5,1974,1974,Hip,CompShg,HdBoard,HdBoard,BrkFace,174,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1064,1064,GasA,TA,Y,SBrkr,1350,0,0,1350,0,0,2,0,3,1,TA,7,Typ,1,TA,Attchd,1974,RFn,2,478,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,8,2009,WD,Normal,165000 +717,70,RM,60,10800,Pave,Grvl,Reg,Bnk,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,7,8,1890,1998,Gable,CompShg,Wd Sdng,VinylSd,None,0,TA,Gd,BrkTil,TA,TA,No,Unf,0,Unf,0,718,718,GasA,Ex,Y,SBrkr,1576,978,0,2554,0,0,1,1,3,1,TA,8,Typ,0,NA,Detchd,1996,Unf,2,704,TA,TA,P,0,48,143,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,159500 +718,20,RL,80,10000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,5,6,1973,2000,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,No,BLQ,1084,Unf,0,92,1176,GasA,Gd,Y,SBrkr,1178,0,0,1178,0,1,1,1,3,1,Gd,5,Typ,1,Fa,Attchd,1973,Unf,2,439,TA,TA,Y,224,0,0,0,0,0,NA,MnPrv,NA,0,11,2008,WD,Normal,157000 +719,60,RL,96,10542,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,5,1993,1994,Hip,CompShg,Wd Sdng,ImStucc,BrkFace,651,Gd,TA,PConc,Gd,TA,Gd,GLQ,1173,Unf,0,138,1311,GasA,Ex,Y,SBrkr,1325,1093,0,2418,1,0,2,1,3,1,Gd,9,Typ,1,TA,Attchd,1993,RFn,3,983,TA,TA,Y,250,154,216,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,341000 +720,20,RL,69,9920,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,6,1969,1969,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,Gd,TA,Gd,ALQ,523,Unf,0,448,971,GasA,TA,Y,SBrkr,971,0,0,971,0,0,1,1,3,1,TA,5,Typ,1,Po,Attchd,1969,Unf,1,300,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,128500 +721,120,RL,NA,6563,Pave,NA,IR1,Low,AllPub,CulDSac,Mod,StoneBr,Norm,Norm,1Fam,1Story,8,5,1985,1985,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,Gd,GLQ,1148,Unf,0,594,1742,GasA,TA,Y,SBrkr,1742,0,0,1742,1,0,2,0,2,1,Gd,5,Typ,1,TA,Attchd,1985,RFn,2,564,TA,TA,Y,114,28,234,0,0,0,NA,NA,NA,0,12,2006,WD,Normal,275000 +722,120,RM,NA,4426,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,169,Gd,TA,PConc,Gd,TA,Av,GLQ,662,Unf,0,186,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,3,Typ,0,NA,Attchd,2004,RFn,2,420,TA,TA,Y,160,0,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,143000 +723,20,RL,70,8120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,7,1970,1970,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,ALQ,191,Unf,0,673,864,GasA,Ex,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1994,Unf,2,463,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,124500 +724,50,RL,60,8172,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,4,6,1954,1972,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,941,941,GasA,Ex,Y,SBrkr,997,473,0,1470,0,0,2,0,4,1,TA,7,Typ,0,NA,Detchd,1958,Unf,1,548,TA,TA,Y,0,0,0,0,156,0,NA,NA,NA,0,5,2008,WD,Normal,135000 +725,20,RL,86,13286,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,9,5,2007,2008,Hip,CompShg,CemntBd,CmentBd,Stone,340,Ex,TA,PConc,Ex,TA,No,GLQ,1234,Unf,0,464,1698,GasA,Ex,Y,SBrkr,1698,0,0,1698,1,0,2,0,3,1,Ex,8,Typ,1,Gd,Attchd,2007,Fin,3,768,TA,TA,Y,327,64,0,0,0,0,NA,NA,NA,0,2,2009,WD,Normal,320000 +726,20,RL,60,6960,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1970,1970,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,375,BLQ,239,250,864,GasA,TA,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,Gd,5,Typ,0,NA,Detchd,1989,Unf,2,660,TA,TA,Y,96,0,0,0,0,0,NA,NA,Shed,500,11,2009,WD,Normal,120500 +727,20,RL,NA,21695,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,1Story,6,9,1988,2007,Hip,CompShg,Wd Sdng,Plywood,BrkFace,260,Gd,Gd,CBlock,Gd,TA,No,GLQ,808,Unf,0,72,880,GasA,Ex,Y,SBrkr,1680,0,0,1680,1,0,2,0,3,1,Gd,5,Typ,1,Gd,Attchd,1988,Fin,2,540,TA,TA,Y,292,44,0,182,0,0,NA,NA,NA,0,12,2009,WD,Normal,222000 +728,20,RL,64,7314,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,Stone,82,Gd,TA,PConc,Gd,TA,Av,GLQ,724,Unf,0,508,1232,GasA,Ex,Y,SBrkr,1232,0,0,1232,1,0,2,0,2,1,Gd,6,Typ,0,NA,Attchd,2007,RFn,2,632,TA,TA,Y,132,0,0,0,0,0,NA,NA,NA,0,2,2009,WD,Normal,194500 +729,90,RL,85,11475,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,Duplex,1Story,5,5,1958,1958,Gable,CompShg,VinylSd,VinylSd,BrkFace,95,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1584,1584,GasA,TA,Y,SBrkr,1776,0,0,1776,1,0,2,0,4,2,TA,9,Typ,0,NA,Detchd,1968,Unf,3,888,TA,TA,Y,0,25,0,0,0,0,NA,NA,NA,0,7,2009,COD,Abnorml,110000 +730,30,RM,52,6240,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,4,5,1925,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,152,Unf,0,628,780,GasA,TA,Y,FuseA,848,0,360,1208,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1962,Unf,2,539,TA,TA,Y,0,23,112,0,0,0,NA,NA,NA,0,1,2009,WD,Normal,103000 +731,120,RL,39,5389,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1995,1996,Gable,CompShg,CemntBd,CmentBd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,1180,Unf,0,415,1595,GasA,Ex,Y,SBrkr,1616,0,0,1616,1,0,2,0,2,1,Gd,5,Typ,1,TA,Attchd,1995,RFn,2,608,TA,TA,Y,237,152,0,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,236500 +732,80,RL,73,9590,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,SLvl,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,442,Gd,TA,PConc,Ex,TA,Av,GLQ,786,Unf,0,82,868,GasA,Ex,Y,SBrkr,1146,0,0,1146,1,0,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,2003,Fin,2,438,TA,TA,Y,160,22,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,187500 +733,60,RL,75,11404,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,1998,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,202,Gd,TA,PConc,Gd,TA,Av,ALQ,252,Unf,0,901,1153,GasA,Ex,Y,SBrkr,1153,878,0,2031,0,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1998,Fin,2,541,TA,TA,Y,192,84,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,222500 +734,20,RL,80,10000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,6,1961,1983,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,BLQ,594,Unf,0,270,864,GasA,Ex,Y,SBrkr,1144,0,0,1144,1,0,1,0,3,1,TA,6,Typ,1,TA,Attchd,1961,RFn,1,264,TA,TA,Y,165,0,0,0,0,0,NA,GdWo,Shed,400,3,2009,WD,Normal,131400 +735,20,RL,NA,8978,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1968,1968,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,948,948,GasA,TA,Y,SBrkr,948,0,0,948,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1968,Unf,1,300,TA,TA,Y,147,0,0,0,0,0,NA,NA,NA,0,5,2007,WD,Family,108000 +736,75,RM,60,10800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2.5Unf,7,7,1914,1970,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Gd,TA,Mn,Rec,390,Unf,0,490,880,GasW,Fa,N,SBrkr,880,888,0,1768,0,0,1,1,2,1,TA,6,Typ,2,TA,Detchd,1914,Unf,2,320,TA,TA,N,0,341,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,163000 +737,90,RL,60,8544,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,Duplex,1Story,3,4,1950,1950,Gable,CompShg,Stucco,Stone,None,0,TA,TA,CBlock,NA,NA,NA,NA,0,NA,0,0,0,GasA,Gd,N,FuseF,1040,0,0,1040,0,0,2,0,2,2,TA,6,Typ,0,NA,Detchd,1949,Unf,2,400,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,93500 +738,60,RL,72,10463,Pave,NA,IR1,HLS,AllPub,CulDSac,Gtl,Gilbert,Norm,Norm,1Fam,2Story,8,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,893,893,GasA,Ex,Y,SBrkr,901,900,0,1801,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2005,Fin,3,800,TA,TA,Y,0,116,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,239900 +739,90,RL,60,10800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,Duplex,1Story,5,5,1987,1988,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,Gd,Gd,GLQ,1200,Unf,0,0,1200,GasA,TA,Y,SBrkr,1200,0,0,1200,3,0,3,0,3,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,120,0,0,0,0,0,NA,NA,NA,0,3,2009,WD,Alloca,179000 +740,60,RL,65,9313,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,864,864,GasA,Ex,Y,SBrkr,864,864,0,1728,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2004,RFn,2,572,TA,TA,Y,187,56,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,190000 +741,70,RM,60,9600,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,5,7,1910,2002,Gable,CompShg,Wd Sdng,Wd Shng,None,0,TA,Gd,BrkTil,Fa,Fa,No,Unf,0,Unf,0,264,264,GasA,Ex,Y,SBrkr,768,664,0,1432,0,0,2,0,2,1,TA,7,Typ,0,NA,Detchd,1910,Unf,2,360,TA,Gd,Y,270,0,112,0,0,0,NA,GdPrv,NA,0,5,2007,WD,Abnorml,132000 +742,20,RL,65,6768,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,6,8,1961,1996,Hip,CompShg,HdBoard,HdBoard,None,0,TA,Gd,CBlock,TA,TA,Mn,GLQ,832,Unf,0,80,912,GasA,Gd,Y,SBrkr,912,0,0,912,1,1,1,0,3,1,Gd,5,Typ,0,NA,Detchd,1962,Unf,1,288,TA,TA,Y,168,0,0,0,0,0,NA,GdPrv,NA,0,5,2008,WD,Normal,142000 +743,20,RL,65,8450,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,7,5,2000,2001,Gable,CompShg,VinylSd,VinylSd,BrkFace,108,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1349,1349,GasA,Ex,Y,SBrkr,1349,0,0,1349,0,0,2,0,3,1,TA,6,Typ,0,NA,Attchd,2000,Unf,2,539,TA,TA,Y,120,55,0,0,0,0,NA,GdPrv,NA,0,12,2007,WD,Normal,179000 +744,80,RL,70,12886,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,SLvl,5,6,1963,1999,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,Av,ALQ,444,Unf,0,76,520,GasA,Ex,Y,SBrkr,1464,0,0,1464,0,1,2,0,3,1,TA,6,Min2,1,TA,Attchd,1997,RFn,2,480,TA,TA,Y,302,0,0,0,100,0,NA,NA,NA,0,10,2009,WD,Normal,175000 +745,120,RL,41,5395,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1993,1993,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,733,Unf,0,604,1337,GasA,Gd,Y,SBrkr,1337,0,0,1337,1,0,2,0,2,1,Gd,5,Typ,1,TA,Attchd,1993,RFn,2,462,TA,TA,Y,96,0,70,168,0,0,NA,NA,NA,0,10,2008,WD,Normal,180000 +746,60,RL,NA,8963,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,8,9,1976,1996,Hip,CompShg,VinylSd,VinylSd,BrkFace,289,Ex,Gd,CBlock,TA,Gd,No,GLQ,575,ALQ,80,487,1142,GasA,Ex,Y,SBrkr,1175,1540,0,2715,0,1,3,1,4,1,Gd,11,Typ,2,TA,BuiltIn,1994,Fin,2,831,TA,TA,Y,0,204,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,299800 +747,60,RL,NA,8795,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,300,Unf,0,652,952,GasA,Ex,Y,SBrkr,980,1276,0,2256,0,0,2,1,4,1,Gd,8,Typ,1,TA,BuiltIn,2000,Fin,2,554,TA,TA,Y,224,54,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,236000 +748,70,RM,65,11700,Pave,Pave,IR1,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2Story,7,7,1880,2003,Mansard,CompShg,Stucco,Stucco,None,0,Gd,TA,Stone,TA,Fa,No,Unf,0,Unf,0,1240,1240,GasW,TA,N,SBrkr,1320,1320,0,2640,0,0,1,1,4,1,Gd,8,Typ,1,Gd,Detchd,1950,Unf,4,864,TA,TA,N,181,0,386,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,265979 +749,20,RL,59,10593,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,1Story,7,5,1996,1996,Hip,CompShg,VinylSd,VinylSd,BrkFace,338,Gd,TA,PConc,Gd,TA,No,GLQ,919,Unf,0,801,1720,GasA,Ex,Y,SBrkr,1720,0,0,1720,1,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,1996,Fin,2,527,TA,TA,Y,240,56,154,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,260400 +750,50,RL,50,8405,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,4,3,1945,1950,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,Wall,TA,N,FuseF,1088,441,0,1529,0,0,2,0,4,1,TA,9,Mod,0,NA,Detchd,1945,Unf,1,240,TA,TA,N,92,0,185,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,98000 +751,50,RM,55,8800,Pave,Grvl,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,4,7,1910,2004,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,Fa,No,Unf,0,Unf,0,576,576,GasA,Gd,Y,SBrkr,792,348,0,1140,0,0,1,0,3,1,TA,7,Min2,0,NA,NA,NA,NA,0,0,NA,NA,N,0,160,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,96500 +752,60,RL,NA,7750,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,RRAn,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,660,660,GasA,Ex,Y,SBrkr,660,660,0,1320,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2003,Fin,2,400,TA,TA,Y,0,48,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,162000 +753,20,RL,79,9236,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,6,5,1997,1997,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,PConc,Gd,TA,Gd,GLQ,1200,Unf,0,279,1479,GasA,Ex,Y,SBrkr,1494,0,0,1494,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1997,RFn,2,576,TA,TA,Y,168,27,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,217000 +754,60,RL,80,10240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,8,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,178,Gd,TA,PConc,Gd,TA,Mn,Unf,0,Unf,0,1030,1030,GasA,Gd,Y,SBrkr,1038,1060,0,2098,0,0,2,1,3,1,Ex,8,Typ,1,Gd,Attchd,2005,RFn,3,878,TA,TA,Y,192,52,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,275500 +755,20,RL,61,7930,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,8,1969,2005,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,GLQ,439,LwQ,472,115,1026,GasA,Gd,Y,SBrkr,1026,0,0,1026,1,0,1,0,3,1,Gd,5,Typ,0,NA,Detchd,1969,RFn,2,440,TA,TA,Y,171,48,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,156000 +756,160,FV,34,3230,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,6,5,1999,1999,Gable,CompShg,MetalSd,MetalSd,BrkFace,894,TA,TA,PConc,Gd,TA,No,GLQ,381,Unf,0,348,729,GasA,Gd,Y,SBrkr,742,729,0,1471,0,0,2,1,3,1,TA,6,Typ,0,NA,Detchd,1999,Unf,2,440,TA,TA,Y,0,24,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,172500 +757,60,RL,68,10769,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,20,Unf,0,846,866,GasA,Ex,Y,SBrkr,866,902,0,1768,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2007,RFn,2,578,TA,TA,Y,144,105,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,212000 +758,60,RL,NA,11616,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,Norm,Norm,1Fam,2Story,6,5,1978,1978,Hip,CompShg,HdBoard,HdBoard,BrkCmn,328,TA,TA,CBlock,TA,TA,Mn,Rec,438,Unf,0,234,672,GasA,TA,Y,SBrkr,672,714,0,1386,0,0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1978,Fin,2,440,TA,TA,Y,335,0,0,0,0,0,NA,GdPrv,NA,0,4,2010,WD,Abnorml,158900 +759,160,FV,24,2280,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,Twnhs,2Story,7,5,1999,1999,Gable,CompShg,MetalSd,MetalSd,BrkFace,360,TA,TA,PConc,Gd,TA,No,ALQ,549,Unf,0,195,744,GasA,Gd,Y,SBrkr,757,744,0,1501,0,0,2,1,3,1,TA,6,Typ,0,NA,Detchd,1999,Unf,2,440,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,179400 +760,60,RL,65,12257,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1995,1995,Gable,CompShg,VinylSd,VinylSd,BrkFace,513,Gd,TA,PConc,Gd,TA,Av,LwQ,56,ALQ,64,1198,1318,GasA,Ex,Y,SBrkr,1328,1203,0,2531,0,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,1995,RFn,3,752,TA,TA,Y,222,98,0,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,290000 +761,20,RL,70,9100,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1959,1959,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,612,Unf,0,252,864,GasA,Ex,Y,SBrkr,864,0,0,864,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,2008,Unf,1,300,Ex,Ex,Y,0,0,0,0,0,0,NA,NA,Shed,450,10,2009,WD,Normal,127500 +762,30,RM,60,6911,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,BrkSide,Feedr,Norm,1Fam,1Story,5,5,1924,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,Mn,LwQ,405,Unf,0,740,1145,GasA,TA,Y,SBrkr,1301,0,0,1301,0,0,1,0,2,1,Fa,5,Min1,0,NA,Detchd,1965,Unf,2,440,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,100000 +763,60,FV,72,8640,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Mn,GLQ,24,Unf,0,732,756,GasA,Ex,Y,SBrkr,764,783,0,1547,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2009,Unf,2,614,TA,TA,Y,169,45,0,0,0,0,NA,NA,NA,0,6,2010,Con,Normal,215200 +764,60,RL,82,9430,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,673,Gd,TA,PConc,Gd,TA,Mn,GLQ,1163,Unf,0,89,1252,GasA,Ex,Y,SBrkr,1268,1097,0,2365,1,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,1999,RFn,3,856,TA,TA,Y,0,128,0,0,180,0,NA,NA,NA,0,7,2009,WD,Normal,337000 +765,120,RL,30,9549,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Veenker,Norm,Norm,TwnhsE,1Story,8,5,1995,1996,Hip,CompShg,BrkFace,BrkFace,None,0,Gd,Gd,PConc,Gd,Gd,Av,LwQ,437,GLQ,1057,0,1494,GasA,Ex,Y,SBrkr,1494,0,0,1494,1,0,1,1,2,1,Ex,6,Typ,1,Gd,Attchd,1995,Fin,2,481,TA,TA,Y,0,30,0,0,216,0,NA,NA,NA,0,4,2006,WD,Normal,270000 +766,20,RL,75,14587,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,9,5,2008,2008,Gable,CompShg,VinylSd,VinylSd,Stone,284,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1498,1498,GasA,Ex,Y,SBrkr,1506,0,0,1506,0,0,2,0,2,1,Ex,6,Typ,1,Gd,Attchd,2008,Fin,2,592,TA,TA,Y,0,174,0,0,0,0,NA,NA,NA,0,8,2008,New,Partial,264132 +767,60,RL,80,10421,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,7,5,1988,1988,Gable,CompShg,HdBoard,HdBoard,BrkFace,42,TA,TA,CBlock,Gd,TA,No,GLQ,394,Unf,0,586,980,GasA,TA,Y,SBrkr,980,734,0,1714,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1988,Unf,2,496,TA,TA,Y,228,66,156,0,0,0,NA,MnPrv,Shed,500,3,2010,WD,Normal,196500 +768,50,RL,75,12508,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1.5Fin,6,7,1940,1985,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,Gd,TA,Mn,ALQ,660,Unf,0,323,983,GasA,Ex,Y,SBrkr,983,767,0,1750,1,0,2,0,4,1,TA,7,Mod,0,NA,Attchd,1989,Unf,1,423,TA,TA,Y,245,0,156,0,0,0,NA,NA,Shed,1300,7,2008,WD,Normal,160000 +769,20,RL,70,9100,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2004,2005,Hip,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,24,Unf,0,1836,1860,GasA,Ex,Y,SBrkr,1836,0,0,1836,0,0,2,0,3,1,Gd,8,Typ,1,Gd,Attchd,2004,Fin,2,484,TA,TA,Y,120,33,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,216837 +770,60,RL,47,53504,Pave,NA,IR2,HLS,AllPub,CulDSac,Mod,StoneBr,Norm,Norm,1Fam,2Story,8,5,2003,2003,Hip,CompShg,CemntBd,Wd Shng,BrkFace,603,Ex,TA,PConc,Gd,TA,Gd,ALQ,1416,Unf,0,234,1650,GasA,Ex,Y,SBrkr,1690,1589,0,3279,1,0,3,1,4,1,Ex,12,Mod,1,Gd,BuiltIn,2003,Fin,3,841,TA,TA,Y,503,36,0,0,210,0,NA,NA,NA,0,6,2010,WD,Normal,538000 +771,85,RL,NA,7252,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,Norm,Norm,1Fam,SFoyer,5,5,1982,1982,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,Gd,TA,Av,GLQ,685,Unf,0,173,858,GasA,TA,Y,SBrkr,858,0,0,858,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1983,Unf,2,576,TA,TA,Y,120,0,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,134900 +772,20,RL,67,8877,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,1951,1951,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,Fa,Fa,No,LwQ,836,Unf,0,0,836,GasA,TA,Y,FuseF,1220,0,0,1220,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1951,Unf,2,396,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2006,COD,Normal,102000 +773,80,RL,94,7819,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,SLvl,6,5,1976,1976,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,Av,ALQ,422,BLQ,127,480,1029,GasA,TA,Y,SBrkr,1117,0,0,1117,1,0,1,0,3,1,TA,6,Typ,1,TA,Detchd,1976,Unf,2,672,TA,TA,Y,144,0,0,0,0,0,NA,MnPrv,NA,0,3,2010,WD,Abnorml,107000 +774,20,RL,70,10150,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,5,1958,1958,Gable,CompShg,Wd Sdng,Wd Sdng,None,1,TA,TA,CBlock,TA,TA,No,Rec,456,Unf,0,456,912,GasA,Ex,Y,FuseA,912,0,0,912,0,0,1,0,2,1,TA,5,Typ,0,NA,Attchd,1958,RFn,1,275,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2007,COD,Normal,114500 +775,20,RL,110,14226,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,375,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1935,1935,GasA,Gd,Y,SBrkr,1973,0,0,1973,0,0,2,0,3,1,Gd,9,Typ,1,Gd,Attchd,2006,Fin,3,895,TA,TA,Y,315,45,0,0,0,0,NA,NA,NA,0,7,2007,New,Partial,395000 +776,120,RM,32,4500,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Mitchel,Norm,Norm,TwnhsE,1Story,6,5,1998,1998,Hip,CompShg,VinylSd,VinylSd,BrkFace,320,TA,TA,PConc,Ex,TA,No,GLQ,866,Unf,0,338,1204,GasA,Ex,Y,SBrkr,1204,0,0,1204,1,0,2,0,2,1,TA,5,Typ,0,NA,Attchd,1998,Fin,2,412,TA,TA,Y,0,247,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,162000 +777,20,RL,86,11210,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,240,Gd,TA,PConc,Gd,TA,Av,GLQ,20,Unf,0,1594,1614,GasA,Ex,Y,SBrkr,1614,0,0,1614,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2005,RFn,3,865,TA,TA,Y,144,59,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,221500 +778,20,RL,100,13350,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1974,1974,Hip,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,762,Unf,0,102,864,GasA,TA,Y,SBrkr,894,0,0,894,1,0,1,0,3,1,TA,5,Typ,1,Fa,Attchd,1974,Unf,2,440,TA,TA,Y,241,0,0,0,0,0,NA,MnPrv,NA,0,6,2006,WD,Normal,142500 +779,90,RH,60,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Feedr,Norm,Duplex,1Story,5,5,1977,1977,Gable,CompShg,Plywood,Plywood,BrkFace,320,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,2020,0,0,2020,0,0,2,0,4,2,TA,10,Typ,2,TA,Detchd,1977,Unf,2,630,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2007,WD,Normal,144000 +780,90,RL,78,10530,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,Duplex,SFoyer,6,5,1977,1977,Gable,CompShg,Plywood,ImStucc,BrkFace,90,TA,TA,CBlock,Gd,TA,Gd,GLQ,975,Unf,0,0,975,GasA,TA,Y,SBrkr,1004,0,0,1004,1,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1977,Unf,2,504,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,135000 +781,20,RL,63,7875,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,1Story,7,5,1995,1996,Gable,CompShg,HdBoard,HdBoard,BrkFace,38,TA,TA,PConc,Gd,Gd,No,Unf,0,Unf,0,1237,1237,GasA,Gd,Y,SBrkr,1253,0,0,1253,0,0,2,0,3,1,TA,6,Typ,1,TA,Attchd,1995,Fin,2,402,TA,TA,Y,220,21,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,176000 +782,60,RL,65,7153,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,6,5,1992,1992,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,Gd,PConc,Gd,TA,No,ALQ,387,Unf,0,374,761,GasA,Ex,Y,SBrkr,810,793,0,1603,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1992,RFn,2,484,TA,TA,Y,0,124,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,175900 +783,20,RL,67,16285,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1413,1413,GasA,Ex,Y,SBrkr,1430,0,0,1430,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2001,RFn,2,605,TA,TA,Y,0,33,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,187100 +784,85,RL,NA,9101,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Mitchel,Norm,Norm,1Fam,SFoyer,5,6,1978,1978,Gable,CompShg,Plywood,Plywood,BrkFace,104,TA,Gd,PConc,Gd,TA,Av,GLQ,1097,Unf,0,0,1097,GasA,Ex,Y,SBrkr,1110,0,0,1110,1,0,1,0,1,1,Gd,4,Typ,1,TA,Attchd,1978,Fin,2,602,TA,TA,Y,303,30,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,165500 +785,75,RM,35,6300,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2.5Unf,6,6,1914,2001,Gable,CompShg,Wd Sdng,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,742,742,GasA,Ex,Y,SBrkr,742,742,0,1484,0,0,2,0,3,1,TA,9,Typ,1,Gd,NA,NA,NA,0,0,NA,NA,Y,0,291,134,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,128000 +786,20,RL,NA,9790,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Feedr,Norm,1Fam,1Story,6,5,1967,1967,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,251,LwQ,630,491,1372,GasA,TA,Y,SBrkr,1342,0,0,1342,0,0,2,0,3,1,TA,7,Typ,1,Gd,Attchd,1967,Unf,2,457,TA,TA,Y,0,0,0,0,197,0,NA,NA,NA,0,9,2009,WD,Normal,161500 +787,50,RM,60,10800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1.5Fin,5,6,1915,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,PConc,Fa,TA,No,LwQ,686,Unf,0,0,686,GasA,TA,Y,SBrkr,966,686,0,1652,1,0,2,0,4,1,TA,7,Typ,0,NA,Detchd,1961,Unf,1,416,TA,TA,Y,0,0,196,0,0,0,NA,NA,Shed,1200,6,2010,WD,Normal,139000 +788,60,RL,76,10142,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,656,Unf,0,300,956,GasA,Ex,Y,SBrkr,956,1128,0,2084,1,0,2,1,4,1,Gd,8,Typ,0,NA,BuiltIn,2004,RFn,2,618,TA,TA,Y,0,45,0,0,0,0,NA,NA,NA,0,1,2010,WD,Normal,233000 +789,20,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1Story,4,7,1954,2000,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,901,901,GasA,Ex,Y,SBrkr,901,0,0,901,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1954,Unf,1,281,Fa,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,107900 +790,60,RL,NA,12205,Pave,NA,IR1,Low,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,2Story,6,8,1966,2007,Gable,CompShg,HdBoard,HdBoard,BrkFace,157,TA,TA,CBlock,TA,Fa,Gd,LwQ,568,Unf,0,264,832,GasA,Gd,Y,SBrkr,976,1111,0,2087,0,0,2,1,5,1,Gd,9,Typ,0,NA,Attchd,1966,Fin,2,444,TA,TA,Y,133,168,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,187500 +791,120,RL,43,3182,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,11,Gd,TA,PConc,Gd,TA,No,GLQ,16,Unf,0,1129,1145,GasA,Ex,Y,SBrkr,1145,0,0,1145,0,0,2,0,2,1,Gd,5,Typ,1,Gd,Attchd,2005,Fin,2,397,TA,TA,Y,100,16,0,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,160200 +792,80,RL,NA,11333,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Mitchel,Norm,Norm,1Fam,SLvl,6,5,1976,1976,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,Av,ALQ,539,Unf,0,490,1029,GasA,TA,Y,SBrkr,1062,0,0,1062,1,0,1,0,3,1,TA,5,Typ,2,TA,Attchd,1976,RFn,2,539,TA,TA,Y,120,0,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,146800 +793,60,RL,92,9920,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,862,Unf,0,255,1117,GasA,Ex,Y,SBrkr,1127,886,0,2013,1,0,2,1,3,1,TA,8,Typ,1,TA,Attchd,1997,Unf,2,455,TA,TA,Y,180,130,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,269790 +794,20,RL,76,9158,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,CemntBd,CmentBd,Stone,140,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1496,1496,GasA,Ex,Y,SBrkr,1496,0,0,1496,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2007,Fin,2,474,TA,TA,Y,168,130,0,0,0,0,NA,NA,NA,0,6,2007,New,Partial,225000 +795,60,RL,NA,10832,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,1994,1996,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,712,712,GasA,Ex,Y,SBrkr,1086,809,0,1895,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1994,Fin,2,409,TA,TA,Y,143,46,0,0,0,0,NA,NA,Shed,500,10,2008,WD,Normal,194500 +796,60,RL,70,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,6,6,1980,1981,Gable,CompShg,HdBoard,HdBoard,BrkFace,130,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,650,650,GasA,TA,Y,SBrkr,888,676,0,1564,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1980,Unf,2,476,TA,TA,Y,0,50,0,0,204,0,NA,MnPrv,NA,0,4,2010,WD,Normal,171000 +797,20,RL,71,8197,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,6,5,1977,1977,Gable,CompShg,Plywood,Plywood,BrkFace,148,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,660,660,GasA,Ex,Y,SBrkr,1285,0,0,1285,0,0,1,1,3,1,TA,7,Typ,1,TA,Attchd,1977,RFn,2,528,TA,TA,Y,138,0,0,0,0,0,NA,MnPrv,NA,0,4,2007,WD,Normal,143500 +798,20,RL,57,7677,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1953,1953,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,570,Unf,0,203,773,GasA,Gd,Y,SBrkr,773,0,0,773,0,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1953,Unf,1,240,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2008,WD,Abnorml,110000 +799,60,RL,104,13518,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2008,2009,Hip,CompShg,VinylSd,VinylSd,Stone,860,Ex,TA,PConc,Ex,TA,No,Unf,0,Unf,0,1926,1926,GasA,Ex,Y,SBrkr,1966,1174,0,3140,0,0,3,1,4,1,Ex,11,Typ,2,Gd,BuiltIn,2009,Fin,3,820,TA,TA,Y,144,78,0,0,0,0,NA,NA,NA,0,7,2009,New,Partial,485000 +800,50,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,SWISU,Feedr,Norm,1Fam,1.5Fin,5,7,1937,1950,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,252,TA,TA,BrkTil,Gd,TA,No,ALQ,569,Unf,0,162,731,GasA,Ex,Y,SBrkr,981,787,0,1768,1,0,1,1,3,1,Gd,7,Typ,2,TA,Detchd,1939,Unf,1,240,TA,TA,Y,0,0,264,0,0,0,NA,MnPrv,NA,0,6,2007,WD,Normal,175000 +801,60,RL,79,12798,Pave,NA,IR1,HLS,AllPub,Inside,Mod,ClearCr,Feedr,Norm,1Fam,2Story,6,5,1997,1997,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Gd,GLQ,462,Unf,0,154,616,GasA,Gd,Y,SBrkr,616,1072,0,1688,1,0,2,1,4,1,Gd,8,Typ,0,NA,Attchd,1997,RFn,2,603,TA,TA,Y,403,114,185,0,0,0,NA,NA,Shed,400,5,2008,WD,Normal,200000 +802,30,RM,40,4800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,4,7,1916,1990,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,LwQ,197,Unf,0,999,1196,GasA,Ex,Y,FuseA,1196,0,0,1196,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1957,Unf,2,440,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,109900 +803,60,RL,63,8199,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2005,2005,Gable,CompShg,WdShing,Wd Shng,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,648,Unf,0,80,728,GasA,Ex,Y,SBrkr,728,728,0,1456,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2005,Fin,2,410,TA,TA,Y,36,18,0,0,0,0,NA,NA,NA,0,10,2008,WD,Normal,189000 +804,60,RL,107,13891,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2008,2009,Hip,CompShg,VinylSd,VinylSd,Stone,424,Ex,TA,PConc,Ex,TA,Gd,Unf,0,Unf,0,1734,1734,GasA,Ex,Y,SBrkr,1734,1088,0,2822,0,0,3,1,4,1,Ex,12,Typ,1,Gd,BuiltIn,2009,RFn,3,1020,TA,TA,Y,52,170,0,0,192,0,NA,NA,NA,0,1,2009,New,Partial,582933 +805,20,RL,75,9000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1954,1954,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,LwQ,812,Unf,0,124,936,GasA,TA,Y,SBrkr,1128,0,0,1128,0,0,1,0,2,1,TA,5,Min1,0,NA,Attchd,1954,Unf,1,286,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,6,2006,WD,Family,118000 +806,20,RL,91,12274,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2008,2008,Gable,CompShg,VinylSd,VinylSd,Stone,256,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1417,1417,GasA,Ex,Y,SBrkr,1428,0,0,1428,0,0,2,0,3,1,Ex,6,Typ,0,NA,Attchd,2008,RFn,2,554,TA,TA,Y,0,60,0,0,0,0,NA,NA,NA,0,7,2008,New,Partial,227680 +807,80,RL,75,9750,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,5,5,1967,1967,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,TA,TA,Av,ALQ,400,Rec,480,100,980,GasA,Gd,Y,SBrkr,980,0,0,980,0,0,2,0,3,1,TA,6,Typ,0,NA,Attchd,1967,Fin,1,384,TA,TA,Y,68,0,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,135500 +808,70,RL,144,21384,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,2Story,5,6,1923,2004,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,Gd,GLQ,1309,Unf,0,15,1324,GasA,Ex,Y,SBrkr,1072,504,0,1576,2,0,1,1,3,1,Gd,6,Typ,1,TA,Attchd,1923,RFn,2,528,TA,TA,Y,0,312,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,223500 +809,80,RL,85,13400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,5,5,1966,1966,Gable,CompShg,VinylSd,VinylSd,BrkFace,1047,TA,TA,CBlock,TA,TA,Av,ALQ,516,BLQ,128,380,1024,GasA,TA,Y,SBrkr,1086,0,0,1086,1,0,1,0,3,1,TA,6,Typ,1,Gd,Attchd,1966,RFn,2,484,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,6,2006,WD,Normal,159950 +810,75,RM,90,8100,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2.5Unf,5,5,1898,1965,Hip,CompShg,AsbShng,AsbShng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,849,849,GasA,TA,N,FuseA,1075,1063,0,2138,0,0,2,0,2,3,TA,11,Typ,0,NA,Detchd,1910,Unf,2,360,Fa,Po,N,40,156,0,0,0,0,NA,MnPrv,NA,0,11,2009,WD,Normal,106000 +811,20,RL,78,10140,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,6,1974,1999,Hip,CompShg,HdBoard,HdBoard,BrkFace,99,TA,TA,CBlock,TA,TA,No,ALQ,663,LwQ,377,0,1040,GasA,Fa,Y,SBrkr,1309,0,0,1309,1,0,1,1,3,1,Gd,5,Typ,1,Fa,Attchd,1974,RFn,2,484,TA,TA,Y,265,0,0,0,0,648,Fa,GdPrv,NA,0,1,2006,WD,Normal,181000 +812,120,RM,NA,4438,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,169,Gd,TA,PConc,Gd,TA,Gd,GLQ,662,Unf,0,186,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,4,Typ,1,Gd,Attchd,2004,Fin,2,420,TA,TA,Y,140,0,0,0,0,0,NA,NA,NA,0,6,2008,ConLD,Normal,144500 +813,20,C (all),66,8712,Grvl,NA,Reg,Bnk,AllPub,Inside,Mod,IDOTRR,Norm,Norm,1Fam,1Story,5,5,1952,1952,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,Fa,TA,CBlock,TA,TA,Av,Unf,0,Unf,0,540,540,GasA,TA,N,FuseA,1044,0,0,1044,0,0,1,0,2,1,Fa,4,Typ,0,NA,Basment,1952,Unf,2,504,TA,TA,N,0,0,0,0,0,0,NA,NA,Shed,54,6,2010,WD,Alloca,55993 +814,20,RL,75,9750,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Gable,CompShg,MetalSd,MetalSd,BrkFace,243,TA,TA,CBlock,TA,TA,No,Rec,608,Unf,0,834,1442,GasA,Gd,Y,SBrkr,1442,0,0,1442,0,0,1,1,4,1,TA,7,Typ,0,NA,Attchd,1958,RFn,1,301,TA,TA,Y,0,0,275,0,0,0,NA,NA,Shed,500,4,2007,COD,Normal,157900 +815,50,RL,45,8248,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,5,7,1918,1950,Gable,CompShg,Stucco,Stucco,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,686,686,GasW,Gd,Y,SBrkr,686,564,0,1250,0,1,1,1,3,1,Fa,7,Typ,0,NA,Detchd,1955,Unf,1,280,TA,TA,P,207,0,96,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,116000 +816,20,RL,48,12137,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,442,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1649,1649,GasA,Ex,Y,SBrkr,1661,0,0,1661,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1998,RFn,2,598,TA,TA,Y,0,34,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,224900 +817,20,RL,NA,11425,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1954,1954,Gable,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,BLQ,486,Unf,0,522,1008,GasA,Gd,Y,SBrkr,1008,0,0,1008,0,0,1,0,2,1,TA,4,Typ,1,Gd,Attchd,1954,RFn,1,275,TA,TA,Y,0,0,120,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,137000 +818,20,RL,NA,13265,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,1Story,8,5,2002,2002,Hip,CompShg,CemntBd,CmentBd,BrkFace,148,Gd,TA,PConc,Gd,TA,No,GLQ,1218,Unf,0,350,1568,GasA,Ex,Y,SBrkr,1689,0,0,1689,1,0,2,0,3,1,Gd,7,Typ,2,Gd,Attchd,2002,RFn,3,857,TA,TA,Y,150,59,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,271000 +819,80,RL,80,8816,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,SLvl,6,7,1971,1971,Gable,CompShg,HdBoard,HdBoard,BrkFace,80,TA,TA,CBlock,TA,TA,Av,GLQ,504,Unf,0,506,1010,GasA,Gd,Y,SBrkr,1052,0,0,1052,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1971,Unf,2,440,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,6,2010,WD,Normal,155000 +820,120,RL,44,6371,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,7,5,2009,2010,Gable,CompShg,VinylSd,VinylSd,Stone,128,Gd,TA,PConc,Gd,TA,Mn,GLQ,733,Unf,0,625,1358,GasA,Ex,Y,SBrkr,1358,0,0,1358,1,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2010,RFn,2,484,TA,TA,Y,192,35,0,0,0,0,NA,NA,NA,0,6,2010,New,Partial,224000 +821,60,RL,72,7226,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,798,798,GasA,Ex,Y,SBrkr,798,842,0,1640,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2003,RFn,2,595,TA,TA,Y,0,45,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,183000 +822,20,RM,60,6000,Pave,Pave,Reg,Bnk,AllPub,Inside,Mod,OldTown,Norm,Norm,2fmCon,1Story,4,4,1953,1953,Gable,CompShg,MetalSd,MetalSd,None,0,Fa,TA,CBlock,Fa,TA,No,Unf,0,Unf,0,936,936,GasA,TA,N,SBrkr,936,0,0,936,0,0,1,0,2,1,TA,4,Min2,0,NA,Detchd,1974,Unf,2,576,TA,TA,Y,0,32,112,0,0,0,NA,NA,NA,0,2,2009,WD,Normal,93000 +823,60,RL,NA,12394,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Gd,Unf,0,Unf,0,847,847,GasA,Ex,Y,SBrkr,847,886,0,1733,0,0,2,1,3,1,Gd,7,Typ,1,Gd,BuiltIn,2003,Fin,2,433,TA,TA,Y,100,48,0,0,0,0,NA,NA,NA,0,10,2007,WD,Family,225000 +824,50,RL,60,9900,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,6,7,1940,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,778,778,GasA,TA,Y,SBrkr,944,545,0,1489,0,0,2,0,3,1,TA,7,Typ,1,Gd,Detchd,1940,Unf,1,240,TA,TA,Y,335,0,0,0,0,0,NA,GdWo,NA,0,7,2009,WD,Normal,139500 +825,20,FV,81,11216,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,Gd,No,Unf,0,Unf,0,1489,1489,GasA,Ex,Y,SBrkr,1489,0,0,1489,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2006,RFn,2,776,TA,TA,Y,0,140,0,0,0,0,NA,NA,NA,0,6,2006,New,Partial,232600 +826,20,RL,114,14803,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,PosN,PosN,1Fam,1Story,10,5,2007,2008,Hip,CompShg,CemntBd,CmentBd,BrkFace,816,Ex,TA,PConc,Ex,TA,Av,GLQ,1636,Unf,0,442,2078,GasA,Ex,Y,SBrkr,2084,0,0,2084,1,0,2,0,2,1,Ex,7,Typ,1,Gd,Attchd,2007,Fin,3,1220,TA,TA,Y,188,45,0,0,0,0,NA,NA,NA,0,6,2008,New,Partial,385000 +827,45,RM,50,6130,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Unf,5,6,1924,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,ALQ,784,Unf,0,0,784,GasA,Gd,Y,SBrkr,784,0,0,784,1,0,1,0,2,1,Gd,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,116,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,109500 +828,20,RL,65,8529,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,7,5,2001,2001,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,20,Unf,0,1434,1454,GasA,Ex,Y,SBrkr,1434,0,0,1434,0,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,2001,RFn,2,527,TA,TA,Y,290,39,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,189000 +829,60,RL,NA,28698,Pave,NA,IR2,Low,AllPub,CulDSac,Sev,ClearCr,Norm,Norm,1Fam,2Story,5,5,1967,1967,Flat,Tar&Grv,Plywood,Plywood,None,0,TA,TA,PConc,TA,Gd,Gd,LwQ,249,ALQ,764,0,1013,GasA,TA,Y,SBrkr,1160,966,0,2126,0,1,2,1,3,1,TA,7,Min2,0,NA,Attchd,1967,Fin,2,538,TA,TA,Y,486,0,0,0,225,0,NA,NA,NA,0,6,2009,WD,Abnorml,185000 +830,160,FV,24,2544,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,Twnhs,2Story,7,5,2005,2005,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,600,600,GasA,Ex,Y,SBrkr,520,623,80,1223,0,0,2,1,2,1,Gd,4,Typ,0,NA,Detchd,2005,RFn,2,480,TA,TA,Y,0,166,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,147400 +831,20,RL,80,11900,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1957,1957,Gable,CompShg,HdBoard,HdBoard,BrkFace,387,TA,TA,CBlock,TA,TA,No,Rec,1040,Unf,0,352,1392,GasA,TA,Y,FuseA,1392,0,0,1392,1,0,1,1,3,1,TA,6,Typ,2,Gd,Attchd,1957,RFn,2,458,TA,TA,Y,0,0,0,0,192,0,NA,NA,NA,0,6,2008,WD,Normal,166000 +832,160,FV,30,3180,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,7,5,2005,2005,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,600,600,GasA,Ex,Y,SBrkr,520,600,80,1200,0,0,2,1,2,1,Gd,4,Typ,0,NA,Detchd,2005,RFn,2,480,TA,TA,Y,0,166,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,151000 +833,60,RL,44,9548,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,6,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,223,Gd,TA,PConc,Gd,TA,No,GLQ,483,Unf,0,458,941,GasA,Ex,Y,SBrkr,941,888,0,1829,1,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,2003,RFn,2,613,TA,TA,Y,192,39,0,0,0,0,NA,NA,NA,0,1,2010,WD,Normal,237000 +834,20,RL,100,10004,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1964,1964,Gable,CompShg,HdBoard,Plywood,BrkFace,180,TA,TA,CBlock,TA,TA,No,Rec,196,BLQ,345,975,1516,GasA,TA,Y,SBrkr,1516,0,0,1516,0,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1964,RFn,2,472,TA,TA,Y,0,0,0,0,152,0,NA,NA,NA,0,2,2009,WD,Normal,167000 +835,20,RL,75,7875,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1961,1961,Gable,CompShg,VinylSd,VinylSd,BrkFace,136,TA,TA,CBlock,TA,TA,No,Rec,572,Unf,0,572,1144,GasA,Gd,Y,SBrkr,1144,0,0,1144,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1961,Unf,2,456,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,9,2008,WD,Normal,139950 +836,20,RL,60,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,4,7,1950,1995,Gable,CompShg,VinylSd,HdBoard,None,0,TA,TA,CBlock,Gd,TA,No,BLQ,442,Unf,0,625,1067,GasA,TA,Y,SBrkr,1067,0,0,1067,0,0,2,0,2,1,Gd,4,Min2,0,NA,Attchd,1996,Unf,2,436,TA,TA,Y,290,0,0,0,0,0,NA,NA,NA,0,2,2010,WD,Normal,128000 +837,30,RM,90,8100,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1948,1973,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,BrkTil,TA,TA,No,Rec,338,Unf,0,1221,1559,GasA,Gd,Y,SBrkr,1559,0,0,1559,1,0,1,0,2,1,TA,5,Min2,0,NA,Detchd,1948,Unf,2,812,TA,TA,Y,0,116,230,0,0,0,NA,GdWo,NA,0,6,2007,COD,Normal,153500 +838,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,5,1973,1973,Gable,CompShg,HdBoard,HdBoard,BrkFace,158,TA,TA,CBlock,TA,TA,No,BLQ,330,Unf,0,153,483,GasA,TA,Y,SBrkr,483,504,0,987,1,0,1,1,2,1,TA,5,Typ,0,NA,Detchd,1973,Unf,1,264,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2008,WD,Normal,100000 +839,20,RL,75,9525,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,6,1995,2006,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1099,1099,GasA,Ex,Y,SBrkr,1099,0,0,1099,0,0,1,1,3,1,Gd,6,Typ,0,NA,Attchd,1999,Unf,1,352,TA,TA,Y,278,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,144000 +840,50,RL,70,11767,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,5,6,1946,1995,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,352,Unf,0,416,768,GasA,Ex,Y,SBrkr,768,432,0,1200,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1946,Unf,1,240,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,130500 +841,70,RH,NA,12155,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,2Story,6,8,1925,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,BLQ,156,Unf,0,516,672,GasA,TA,N,SBrkr,810,672,0,1482,0,0,2,0,4,1,Fa,7,Typ,0,NA,Detchd,1934,Unf,1,400,TA,TA,P,0,0,254,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,140000 +842,70,RM,60,10440,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,5,8,1904,2002,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,650,650,GasA,Gd,Y,SBrkr,958,581,0,1539,0,0,2,0,3,1,Gd,8,Typ,1,Po,Detchd,1983,Unf,2,686,Gd,TA,P,70,78,68,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,157500 +843,80,RL,82,9020,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,6,7,1966,1966,Gable,CompShg,HdBoard,HdBoard,BrkFace,183,TA,TA,CBlock,TA,TA,Gd,Rec,312,ALQ,539,276,1127,GasA,TA,Y,SBrkr,1165,0,0,1165,1,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1966,RFn,2,490,Gd,Gd,Y,0,129,0,0,0,0,NA,GdPrv,NA,0,5,2008,WD,Normal,174900 +844,90,RL,80,8000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Artery,Norm,Duplex,1Story,5,4,1961,1961,Gable,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1800,1800,GasA,Ex,N,SBrkr,1800,0,0,1800,0,0,2,0,6,2,TA,10,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,141000 +845,50,RM,100,12665,Pave,Grvl,IR1,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1.5Fin,5,8,1915,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,Mn,Unf,0,Unf,0,876,876,GasA,Gd,Y,SBrkr,876,540,0,1416,0,0,1,1,4,1,TA,7,Typ,1,Gd,Detchd,1949,Unf,3,720,TA,TA,Y,418,0,194,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,153900 +846,85,RL,NA,16647,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,RRAe,Norm,1Fam,SFoyer,5,5,1975,1981,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,Gd,ALQ,1390,Unf,0,0,1390,GasA,TA,Y,SBrkr,1701,0,0,1701,1,0,2,0,3,1,TA,6,Min2,2,TA,Basment,1975,Fin,2,611,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,1,2007,WD,Normal,171000 +847,60,RL,75,9317,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1993,1993,Gable,CompShg,HdBoard,HdBoard,BrkFace,137,Gd,TA,PConc,Gd,TA,No,ALQ,513,Unf,0,227,740,GasA,Ex,Y,SBrkr,1006,769,0,1775,1,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1993,Unf,2,425,TA,TA,Y,234,72,192,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,213000 +848,20,RL,36,15523,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,6,1972,1972,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,TA,Av,BLQ,460,Unf,0,404,864,GasA,Ex,Y,SBrkr,864,0,0,864,1,0,1,0,3,1,TA,5,Typ,1,Fa,Attchd,1972,Unf,1,338,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,133500 +849,50,RL,75,45600,Pave,NA,IR2,Bnk,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,1.5Fin,6,8,1908,1997,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,907,907,GasA,TA,Y,SBrkr,1307,1051,0,2358,0,0,3,0,5,1,TA,10,Typ,1,Gd,Detchd,1908,Unf,2,360,Fa,TA,Y,486,40,0,0,175,0,NA,NA,NA,0,9,2008,WD,Normal,240000 +850,80,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,SLvl,6,7,1976,1994,Hip,CompShg,Plywood,Plywood,BrkFace,360,Gd,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,528,528,GasA,Ex,Y,SBrkr,1094,761,0,1855,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1976,RFn,2,512,TA,TA,Y,113,100,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,187000 +851,120,RM,36,4435,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,170,Gd,TA,PConc,Gd,TA,Av,GLQ,659,Unf,0,189,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,3,Typ,0,NA,Attchd,2003,Fin,2,420,TA,TA,Y,140,0,0,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,131500 +852,120,RL,NA,3196,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,8,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,40,Gd,TA,PConc,Gd,TA,Gd,Unf,0,Unf,0,1273,1273,GasA,Ex,Y,SBrkr,1456,0,0,1456,0,0,2,0,2,1,Gd,7,Typ,1,TA,Attchd,2003,Fin,2,400,TA,TA,Y,143,20,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,215000 +853,75,RL,53,7128,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2.5Unf,7,5,1941,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,Rec,364,Unf,0,554,918,GasA,Gd,Y,SBrkr,918,728,0,1646,0,0,2,0,4,1,TA,7,Typ,2,Gd,Detchd,1941,Unf,1,240,TA,TA,Y,0,0,0,0,126,0,NA,MnPrv,NA,0,8,2007,WD,Normal,164000 +854,80,RL,NA,12095,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,SLvl,6,6,1964,1964,Gable,CompShg,MetalSd,HdBoard,BrkFace,115,TA,Gd,CBlock,TA,TA,Gd,Rec,564,Unf,0,563,1127,GasA,TA,Y,SBrkr,1445,0,0,1445,0,0,1,1,3,1,TA,7,Typ,1,Fa,Attchd,1964,RFn,2,645,TA,TA,Y,180,0,0,0,0,0,NA,MnPrv,NA,0,8,2009,WD,Normal,158000 +855,20,RL,102,17920,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,4,1955,1974,Hip,CompShg,Wd Sdng,Plywood,None,0,TA,TA,CBlock,TA,TA,Mn,ALQ,306,Rec,1085,372,1763,GasA,TA,Y,SBrkr,1779,0,0,1779,1,0,1,1,3,1,TA,6,Typ,1,Gd,Attchd,1955,Unf,2,454,TA,TA,Y,0,418,0,0,312,0,NA,NA,NA,0,7,2006,WD,Abnorml,170000 +856,20,RL,NA,6897,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,8,1962,2010,Gable,CompShg,HdBoard,HdBoard,None,0,TA,Gd,CBlock,TA,TA,No,ALQ,659,Unf,0,381,1040,GasA,Ex,Y,SBrkr,1040,0,0,1040,1,0,1,1,3,1,TA,6,Typ,0,NA,Detchd,1962,Unf,1,260,TA,TA,Y,0,104,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,127000 +857,80,RL,NA,10970,Pave,NA,IR1,Low,AllPub,Inside,Mod,CollgCr,Norm,Norm,1Fam,SLvl,6,6,1978,1978,Gable,CompShg,Plywood,HdBoard,None,0,TA,TA,CBlock,Gd,Gd,Gd,GLQ,505,LwQ,435,0,940,GasA,TA,Y,SBrkr,1026,0,0,1026,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1981,Unf,2,576,TA,Fa,Y,0,0,34,0,0,0,NA,MnPrv,NA,0,10,2008,WD,Normal,147000 +858,60,RL,65,8125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1994,1995,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,702,702,GasA,Gd,Y,SBrkr,702,779,0,1481,0,0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1994,Fin,2,343,TA,TA,Y,0,36,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,174000 +859,20,RL,80,10400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,7,5,1976,1976,Gable,CompShg,HdBoard,HdBoard,BrkFace,189,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1090,1090,GasA,TA,Y,SBrkr,1370,0,0,1370,0,0,2,0,3,1,TA,6,Typ,1,TA,Attchd,1976,RFn,2,479,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,6,2009,WD,Family,152000 +860,60,RL,NA,11029,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,PosA,Norm,1Fam,2Story,6,7,1968,1984,Gable,CompShg,HdBoard,HdBoard,BrkFace,220,TA,TA,CBlock,TA,TA,Mn,BLQ,619,Unf,0,435,1054,GasA,TA,Y,SBrkr,1512,1142,0,2654,1,0,2,1,4,1,Gd,9,Typ,1,Gd,Attchd,1968,Unf,2,619,TA,TA,Y,0,65,0,0,222,0,NA,NA,NA,0,8,2006,WD,Normal,250000 +861,50,RL,55,7642,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,7,8,1918,1998,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,912,912,GasA,Gd,Y,SBrkr,912,514,0,1426,0,0,1,1,3,1,Gd,7,Typ,1,Gd,Detchd,1925,Unf,1,216,TA,TA,Y,0,240,0,0,0,0,NA,GdPrv,NA,0,6,2007,WD,Normal,189950 +862,190,RL,75,11625,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,2fmCon,1Story,5,4,1965,1965,Hip,CompShg,Plywood,HdBoard,None,0,TA,TA,PConc,TA,TA,Mn,BLQ,841,Unf,0,198,1039,GasA,Ex,Y,SBrkr,1039,0,0,1039,1,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1965,Unf,2,504,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,131500 +863,20,RL,81,9672,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,SawyerW,Norm,Norm,1Fam,1Story,6,5,1984,1985,Hip,CompShg,HdBoard,Plywood,None,0,TA,TA,PConc,Gd,TA,No,GLQ,338,Unf,0,702,1040,GasA,TA,Y,SBrkr,1097,0,0,1097,0,0,2,0,3,1,TA,6,Typ,0,NA,Attchd,1986,Unf,2,480,TA,TA,Y,0,0,0,0,0,0,NA,GdPrv,NA,0,5,2010,WD,Normal,152000 +864,20,RL,70,7931,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1959,1959,Hip,CompShg,BrkFace,Plywood,None,0,TA,TA,CBlock,TA,TA,No,BLQ,1148,Unf,0,0,1148,GasA,TA,Y,SBrkr,1148,0,0,1148,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1959,Unf,1,672,TA,TA,Y,0,0,0,0,0,0,NA,GdPrv,NA,0,7,2009,WD,Normal,132500 +865,20,FV,72,8640,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2007,2008,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,TA,No,Unf,0,Unf,0,1372,1372,GasA,Ex,Y,SBrkr,1372,0,0,1372,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2008,Fin,2,529,TA,TA,Y,0,140,0,0,0,0,NA,NA,NA,0,5,2008,New,Partial,250580 +866,20,RL,NA,8750,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1970,1970,Gable,CompShg,MetalSd,MetalSd,BrkFace,76,TA,TA,CBlock,TA,TA,No,BLQ,828,Unf,0,174,1002,GasA,TA,Y,SBrkr,1002,0,0,1002,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1973,Unf,2,902,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,8,2009,WD,Normal,148500 +867,20,RL,67,10656,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,Stone,274,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1638,1638,GasA,Ex,Y,SBrkr,1646,0,0,1646,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,2007,RFn,3,870,TA,TA,Y,192,80,0,0,0,0,NA,NA,NA,0,11,2007,New,Partial,248900 +868,20,RL,85,6970,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,4,5,1961,1961,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,932,Unf,0,108,1040,GasA,TA,Y,SBrkr,1120,0,0,1120,1,0,1,1,3,1,Fa,5,Typ,0,NA,Attchd,1961,RFn,2,544,TA,TA,Y,168,0,0,0,0,0,NA,NA,Shed,400,5,2007,WD,Normal,129000 +869,60,RL,NA,14762,Pave,NA,IR2,Lvl,AllPub,Corner,Gtl,Gilbert,Feedr,Norm,1Fam,2Story,5,6,1948,1950,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,Gd,Y,SBrkr,1547,720,53,2320,0,0,2,0,2,1,TA,7,Typ,1,TA,Attchd,1979,Unf,2,672,TA,TA,P,120,144,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,169000 +870,60,RL,80,9938,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1993,1994,Gable,CompShg,MetalSd,MetalSd,BrkFace,246,Gd,TA,PConc,Gd,TA,No,GLQ,750,Unf,0,300,1050,GasA,Ex,Y,SBrkr,1062,887,0,1949,1,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1993,Fin,2,574,TA,TA,Y,156,90,0,0,0,0,NA,GdPrv,NA,0,6,2010,WD,Normal,236000 +871,20,RL,60,6600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,PosN,Norm,1Fam,1Story,5,5,1962,1962,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,894,894,GasA,Gd,N,SBrkr,894,0,0,894,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1962,Unf,1,308,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,109500 +872,60,RL,70,8750,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,6,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,116,TA,TA,PConc,Gd,TA,No,GLQ,505,Unf,0,299,804,GasA,Ex,Y,SBrkr,804,878,0,1682,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1998,RFn,2,523,TA,TA,Y,0,77,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,200500 +873,20,RL,74,8892,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1953,1996,Gable,CompShg,WdShing,Wd Shng,None,0,Gd,TA,Stone,TA,TA,Av,Unf,0,Unf,0,105,105,GasA,Gd,Y,SBrkr,910,0,0,910,0,0,1,0,3,1,Gd,5,Typ,0,NA,Attchd,1953,Unf,2,414,TA,TA,Y,196,0,150,0,0,0,NA,GdWo,NA,0,10,2008,WD,Normal,116000 +874,40,RL,60,12144,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1949,1950,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,CBlock,TA,TA,No,Rec,375,Unf,0,457,832,GasA,Gd,Y,SBrkr,1036,0,232,1268,0,0,1,0,3,1,TA,6,Typ,1,Gd,Attchd,1949,Unf,1,288,TA,TA,Y,0,28,0,0,0,0,NA,NA,Othr,0,9,2009,WD,Normal,133000 +875,50,RM,52,5720,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1.5Fin,5,6,1941,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,676,676,GasA,Ex,Y,SBrkr,676,455,0,1131,0,0,1,1,3,1,TA,5,Typ,0,NA,Detchd,1941,Unf,1,200,TA,TA,Y,26,0,0,0,0,0,NA,NA,NA,0,8,2009,WD,Abnorml,66500 +876,60,FV,75,9000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,8,5,2007,2007,Gable,CompShg,CemntBd,CmentBd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,64,Unf,0,1120,1184,GasA,Ex,Y,SBrkr,1184,1426,0,2610,0,0,2,1,4,1,Ex,11,Typ,1,Gd,BuiltIn,2007,Fin,2,550,TA,TA,Y,208,364,0,0,0,0,NA,NA,NA,0,8,2007,New,Partial,303477 +877,20,RL,94,25286,Pave,NA,Reg,HLS,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,1Story,4,5,1963,1963,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,PConc,TA,TA,Gd,ALQ,633,Unf,0,431,1064,GasA,Gd,Y,SBrkr,1040,0,0,1040,1,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1963,Unf,2,648,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,1,2007,WD,Normal,132250 +878,60,RL,74,8834,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2004,2005,Hip,CompShg,VinylSd,VinylSd,Stone,216,Gd,TA,PConc,Ex,TA,No,GLQ,1170,Unf,0,292,1462,GasA,Ex,Y,SBrkr,1462,762,0,2224,1,0,2,1,4,1,Ex,10,Typ,1,Gd,Attchd,2004,Fin,3,738,TA,TA,Y,184,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,350000 +879,85,RL,88,11782,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,SFoyer,5,7,1961,1995,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,Av,ALQ,899,Unf,0,210,1109,GasA,TA,Y,SBrkr,1155,0,0,1155,1,0,1,0,3,1,Gd,6,Min2,0,NA,Detchd,1987,Unf,2,576,TA,TA,Y,192,0,0,0,0,0,NA,MnPrv,Shed,400,6,2010,WD,Normal,148000 +880,20,RL,NA,7000,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,8,1978,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,90,Gd,Gd,CBlock,TA,TA,No,ALQ,646,Unf,0,218,864,GasA,Ex,Y,SBrkr,864,0,0,864,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1978,Unf,1,336,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,7,2009,WD,Normal,136500 +881,20,RL,60,7024,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Ex,Gd,No,ALQ,980,Unf,0,110,1090,GasA,Gd,Y,SBrkr,1090,0,0,1090,1,0,1,1,2,1,TA,5,Typ,0,NA,Attchd,2005,Fin,2,450,TA,TA,Y,0,49,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,157000 +882,50,RL,44,13758,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Timber,Norm,Norm,1Fam,1.5Fin,7,5,1990,1991,Gable,CompShg,HdBoard,HdBoard,BrkFace,117,Gd,Gd,CBlock,Gd,TA,Mn,LwQ,902,Unf,0,254,1156,GasA,Ex,Y,SBrkr,1187,530,0,1717,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1990,RFn,2,400,TA,TA,Y,168,36,0,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,187500 +883,60,RL,NA,9636,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1992,1993,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,808,808,GasA,Gd,Y,SBrkr,808,785,0,1593,0,0,2,1,3,1,TA,7,Typ,1,TA,BuiltIn,1993,RFn,2,389,TA,TA,Y,342,40,0,0,0,0,NA,MnPrv,NA,0,12,2009,WD,Normal,178000 +884,75,RL,60,6204,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,2.5Fin,4,5,1912,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,PConc,TA,Fa,No,Unf,0,Unf,0,795,795,GasA,TA,N,SBrkr,954,795,481,2230,1,0,1,0,5,1,TA,10,Typ,0,NA,Detchd,1997,Unf,1,440,TA,Gd,Y,0,188,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,118500 +885,20,RL,65,7150,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1967,1967,Gable,CompShg,HdBoard,HdBoard,BrkFace,60,TA,TA,CBlock,TA,TA,No,BLQ,432,Unf,0,460,892,GasA,TA,Y,SBrkr,892,0,0,892,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1967,RFn,1,288,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,7,2009,WD,Normal,100000 +886,120,FV,50,5119,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Somerst,Norm,Norm,TwnhsE,1Story,9,5,1999,2000,Gable,CompShg,MetalSd,MetalSd,BrkFace,60,Gd,TA,PConc,Ex,TA,Av,GLQ,1238,Unf,0,460,1698,GasA,Ex,Y,SBrkr,1709,0,0,1709,1,0,2,0,2,1,Gd,5,Typ,1,TA,Attchd,1999,Fin,2,506,TA,TA,Y,97,65,0,0,0,0,NA,NA,NA,0,1,2008,CWD,Abnorml,328900 +887,90,RL,70,8393,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,Duplex,1Story,5,5,1959,2005,Gable,CompShg,MetalSd,MetalSd,BrkFace,122,TA,TA,CBlock,TA,TA,No,LwQ,528,Unf,0,1098,1626,GasA,Ex,Y,SBrkr,1712,0,0,1712,0,0,2,0,4,2,TA,8,Typ,0,NA,Attchd,2005,Fin,2,588,TA,TA,Y,272,54,0,0,0,0,NA,NA,NA,0,6,2006,WD,Family,145000 +888,50,RL,59,16466,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,5,7,1955,1955,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,PConc,TA,TA,No,Unf,0,Unf,0,816,816,GasA,TA,Y,SBrkr,872,521,0,1393,0,0,1,1,3,1,TA,8,Typ,0,NA,Attchd,1955,Unf,1,300,TA,TA,Y,121,0,0,0,265,0,NA,NA,NA,0,4,2008,WD,Normal,135500 +889,20,RL,95,15865,Pave,NA,IR1,Lvl,AllPub,Inside,Mod,NAmes,Norm,Norm,1Fam,1Story,8,6,1970,1970,Flat,Tar&Grv,Wd Sdng,Wd Sdng,None,0,Gd,Gd,PConc,TA,Gd,Gd,ALQ,351,Rec,823,1043,2217,GasA,Ex,Y,SBrkr,2217,0,0,2217,1,0,2,0,4,1,Gd,8,Typ,1,TA,Attchd,1970,Unf,2,621,TA,TA,Y,81,207,0,0,224,0,NA,NA,NA,0,10,2007,WD,Normal,268000 +890,20,RL,128,12160,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,6,4,1953,1953,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,90,TA,TA,CBlock,TA,TA,No,BLQ,1024,Unf,0,481,1505,GasA,Ex,Y,SBrkr,1505,0,0,1505,1,0,1,0,2,1,TA,6,Typ,1,TA,Attchd,1953,RFn,2,505,TA,TA,Y,0,0,0,162,0,0,NA,NA,NA,0,2,2009,WD,Normal,149500 +891,50,RL,60,8064,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Artery,Norm,1Fam,1.5Fin,5,7,1949,2006,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,Mn,Unf,0,Unf,0,672,672,GasA,Ex,Y,SBrkr,672,252,0,924,0,0,1,0,3,1,TA,6,Typ,1,Po,Detchd,2003,Unf,2,576,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,Shed,2000,7,2007,WD,Normal,122900 +892,60,RL,70,11184,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,2Story,6,5,1978,1978,Hip,CompShg,HdBoard,HdBoard,BrkFace,92,TA,TA,CBlock,TA,TA,No,LwQ,226,Rec,500,192,918,GasA,Gd,Y,SBrkr,918,765,0,1683,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1978,RFn,2,440,TA,TA,Y,243,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,172500 +893,20,RL,70,8414,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,6,8,1963,2003,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,GLQ,663,Unf,0,396,1059,GasA,TA,Y,SBrkr,1068,0,0,1068,0,1,1,0,3,1,TA,6,Typ,0,NA,Attchd,1963,RFn,1,264,TA,TA,Y,192,0,0,0,0,0,NA,MnPrv,NA,0,2,2006,WD,Normal,154500 +894,20,RL,NA,13284,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,PosN,Norm,1Fam,1Story,5,5,1954,1954,Gable,CompShg,Wd Sdng,Plywood,None,0,TA,TA,PConc,Gd,TA,Mn,BLQ,1064,Unf,0,319,1383,GasA,TA,Y,SBrkr,1383,0,0,1383,1,0,1,0,3,1,TA,6,Typ,1,Gd,Attchd,1954,Unf,1,354,TA,TA,Y,511,116,0,0,0,0,NA,GdPrv,NA,0,6,2008,WD,Normal,165000 +895,90,RL,64,7018,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SawyerW,Norm,Norm,Duplex,1Story,5,5,1979,1979,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,1535,0,0,1535,0,0,2,0,4,2,TA,8,Typ,0,NA,Attchd,1979,Unf,2,400,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Alloca,118858 +896,60,RL,71,7056,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,2Story,6,5,1963,1963,Hip,CompShg,HdBoard,HdBoard,BrkFace,415,TA,TA,CBlock,TA,TA,No,BLQ,400,Unf,0,380,780,GasA,TA,Y,SBrkr,983,813,0,1796,1,0,1,1,4,1,TA,8,Typ,1,TA,Attchd,1963,RFn,2,483,TA,TA,Y,0,50,0,0,0,0,NA,NA,NA,0,10,2008,WD,Normal,140000 +897,30,RM,50,8765,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,4,6,1936,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,ALQ,285,Unf,0,666,951,GasA,Ex,N,SBrkr,951,0,0,951,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1936,Unf,1,327,TA,TA,Y,0,28,0,0,0,0,NA,NA,NA,0,4,2006,WD,Abnorml,106500 +898,90,RL,64,7018,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Feedr,Norm,Duplex,2Story,5,5,1979,1979,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,1120,1120,0,2240,0,0,2,0,6,2,TA,12,Typ,0,NA,Detchd,1979,Unf,2,528,TA,TA,Y,154,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Alloca,142953 +899,20,RL,100,12919,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,9,5,2009,2010,Hip,CompShg,VinylSd,VinylSd,Stone,760,Ex,TA,PConc,Ex,TA,Gd,GLQ,2188,Unf,0,142,2330,GasA,Ex,Y,SBrkr,2364,0,0,2364,1,0,2,1,2,1,Ex,11,Typ,2,Gd,Attchd,2009,Fin,3,820,TA,TA,Y,0,67,0,0,0,0,NA,NA,NA,0,3,2010,New,Partial,611657 +900,20,RL,65,6993,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,7,1961,1994,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,TA,No,BLQ,465,Unf,0,447,912,GasA,TA,Y,SBrkr,1236,0,0,1236,0,0,1,0,3,1,TA,6,Typ,1,TA,Attchd,1961,Unf,1,288,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,135000 +901,20,RL,NA,7340,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1971,1971,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,ALQ,322,Unf,0,536,858,GasA,TA,Y,SBrkr,858,0,0,858,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1979,Unf,1,684,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,110000 +902,20,RL,64,8712,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1957,2000,Hip,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,Mn,BLQ,860,Unf,0,132,992,GasA,TA,Y,SBrkr,1306,0,0,1306,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1968,Unf,1,756,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,153000 +903,60,RL,63,7875,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,783,783,GasA,Ex,Y,SBrkr,807,702,0,1509,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2003,Fin,2,393,TA,TA,Y,0,75,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,180000 +904,20,RL,50,14859,Pave,NA,IR1,HLS,AllPub,CulDSac,Gtl,Gilbert,Norm,Norm,1Fam,1Story,7,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,27,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1670,1670,GasA,Ex,Y,SBrkr,1670,0,0,1670,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2006,RFn,3,690,TA,TA,Y,144,60,0,0,0,0,NA,NA,NA,0,8,2006,New,Partial,240000 +905,20,RL,NA,6173,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,6,1967,1967,Gable,CompShg,HdBoard,Wd Sdng,BrkFace,75,TA,TA,CBlock,TA,TA,No,GLQ,599,Unf,0,277,876,GasA,TA,Y,SBrkr,902,0,0,902,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1967,Unf,1,288,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,8,2007,WD,Normal,125500 +906,20,RL,80,9920,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1954,1954,Gable,CompShg,HdBoard,HdBoard,Stone,110,TA,TA,CBlock,TA,TA,No,Rec,354,LwQ,290,412,1056,GasA,TA,Y,SBrkr,1063,0,0,1063,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1954,RFn,1,280,TA,TA,Y,0,0,164,0,0,0,NA,MnPrv,NA,0,2,2010,WD,Normal,128000 +907,20,RL,116,13501,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,208,Gd,TA,PConc,Gd,TA,No,GLQ,63,Unf,0,1560,1623,GasA,Ex,Y,SBrkr,1636,0,0,1636,1,0,2,0,3,1,Gd,8,Typ,1,Gd,Attchd,2006,RFn,3,865,TA,TA,Y,0,60,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,255000 +908,50,RL,86,11500,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,7,7,1936,1987,Gable,CompShg,BrkFace,BrkFace,None,0,Gd,TA,CBlock,Gd,TA,No,Rec,223,Unf,0,794,1017,GasA,Gd,Y,SBrkr,1020,1037,0,2057,0,0,1,1,3,1,Gd,6,Typ,1,Gd,Attchd,1936,Fin,1,180,Fa,TA,Y,0,0,0,0,322,0,NA,NA,NA,0,6,2006,WD,Normal,250000 +909,20,RL,NA,8885,Pave,NA,IR1,Low,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,1Story,5,5,1983,1983,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,Av,BLQ,301,ALQ,324,239,864,GasA,TA,Y,SBrkr,902,0,0,902,1,0,1,0,2,1,TA,5,Typ,0,NA,Attchd,1983,Unf,2,484,TA,TA,Y,164,0,0,0,0,0,NA,MnPrv,NA,0,6,2006,WD,Normal,131000 +910,60,RL,149,12589,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,742,742,GasA,Ex,Y,SBrkr,742,742,0,1484,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2005,Fin,2,390,TA,TA,Y,36,24,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,174000 +911,90,RL,80,11600,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Feedr,Norm,Duplex,2Story,5,5,1960,1960,Gable,CompShg,MetalSd,MetalSd,BrkFace,361,TA,TA,CBlock,TA,TA,No,Rec,443,Unf,0,662,1105,GasA,TA,Y,FuseA,1105,1169,0,2274,0,0,2,0,5,2,TA,12,Typ,0,NA,Detchd,1960,Unf,2,480,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,1,2010,WD,Normal,154300 +912,20,RL,NA,9286,Pave,NA,IR1,Lvl,AllPub,CulDSac,Mod,CollgCr,Norm,Norm,1Fam,1Story,5,7,1977,1989,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,Gd,Gd,Av,ALQ,196,Unf,0,1072,1268,GasA,TA,Y,SBrkr,1268,0,0,1268,0,0,1,1,3,1,Gd,5,Typ,0,NA,Detchd,1978,Unf,1,252,TA,TA,Y,173,0,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,143500 +913,30,RM,51,6120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,5,7,1925,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Rec,489,Unf,0,279,768,GasA,TA,N,SBrkr,1015,0,0,1015,0,0,1,0,3,1,TA,6,Min1,0,NA,Detchd,1925,Unf,1,450,TA,TA,Y,0,0,112,0,120,0,NA,MnPrv,Shed,620,7,2006,WD,Abnorml,88000 +914,90,RH,82,6270,Pave,NA,Reg,HLS,AllPub,Inside,Gtl,Crawfor,Norm,Norm,Duplex,2Story,5,6,1949,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,284,Unf,0,717,1001,GasA,TA,N,FuseA,1001,1001,0,2002,0,0,2,0,4,2,TA,8,Typ,0,NA,2Types,1949,Unf,3,871,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,145000 +915,160,FV,30,3000,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,6,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,Stone,76,Gd,TA,PConc,Gd,TA,Av,GLQ,294,Unf,0,318,612,GasA,Ex,Y,SBrkr,612,612,0,1224,0,0,2,1,2,1,Gd,4,Typ,0,NA,Detchd,2009,RFn,2,528,TA,TA,Y,0,234,0,0,0,0,NA,NA,NA,0,6,2009,New,Partial,173733 +916,160,RM,21,2001,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,2Story,4,5,1970,1970,Gable,CompShg,CemntBd,CmentBd,BrkFace,80,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,546,546,GasA,Fa,Y,SBrkr,546,546,0,1092,0,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1970,Unf,1,286,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,1,2007,WD,Normal,75000 +917,20,C (all),50,9000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,2,3,1949,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,CBlock,TA,TA,Av,BLQ,50,Unf,0,430,480,GasA,TA,N,FuseA,480,0,0,480,1,0,0,0,1,1,TA,4,Typ,0,NA,Detchd,1958,Unf,1,308,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2006,WD,Abnorml,35311 +918,20,RL,NA,17140,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,6,1956,1956,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,1059,Unf,0,75,1134,GasA,Ex,Y,FuseA,1229,0,0,1229,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1956,RFn,1,284,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,135000 +919,60,RL,103,13125,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1991,1991,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Ex,TA,Mn,BLQ,48,GLQ,634,422,1104,GasA,Ex,Y,SBrkr,912,1215,0,2127,1,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1991,RFn,3,833,TA,TA,Y,72,192,224,0,0,0,NA,GdPrv,NA,0,11,2007,WD,Normal,238000 +920,20,RL,87,11029,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,8,1958,2002,Hip,CompShg,MetalSd,MetalSd,None,0,Ex,TA,CBlock,Gd,TA,No,ALQ,528,BLQ,411,245,1184,GasA,Ex,Y,SBrkr,1414,0,0,1414,1,0,1,0,3,1,TA,6,Min1,1,TA,Attchd,1990,Unf,2,601,TA,TA,Y,0,51,0,0,190,0,NA,NA,NA,0,5,2008,WD,Normal,176500 +921,60,RL,70,8462,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,6,5,1994,1994,Gable,CompShg,HdBoard,HdBoard,BrkFace,105,Gd,Gd,PConc,Gd,Gd,No,GLQ,814,Unf,0,114,928,GasA,Ex,Y,SBrkr,936,785,0,1721,0,1,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1994,RFn,2,471,TA,TA,Y,300,87,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,201000 +922,90,RL,67,8777,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Feedr,Norm,Duplex,1.5Fin,5,7,1900,2003,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,1084,Unf,0,188,1272,GasA,Gd,Y,SBrkr,1272,928,0,2200,2,0,2,2,4,2,TA,9,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,70,0,0,0,0,NA,GdPrv,NA,0,9,2008,WD,Normal,145900 +923,20,RL,65,10237,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,RRAn,Norm,1Fam,1Story,6,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,28,Unf,0,1288,1316,GasA,Ex,Y,SBrkr,1316,0,0,1316,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,2005,Fin,2,397,TA,TA,Y,100,0,0,23,0,0,NA,NA,NA,0,10,2006,New,Partial,169990 +924,120,RL,50,8012,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,TwnhsE,1Story,6,5,1993,1994,Gable,CompShg,Plywood,Plywood,None,0,Gd,TA,PConc,Gd,TA,No,LwQ,165,GLQ,841,598,1604,GasA,Ex,Y,SBrkr,1617,0,0,1617,1,0,2,0,2,1,Gd,5,Typ,1,Fa,Attchd,1993,RFn,2,533,TA,TA,Y,0,69,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,193000 +925,20,RL,79,10240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,6,1980,1980,Gable,CompShg,Plywood,Plywood,BrkFace,157,TA,Gd,CBlock,Gd,TA,No,BLQ,625,LwQ,1061,0,1686,GasA,TA,Y,SBrkr,1686,0,0,1686,1,0,2,0,3,1,TA,7,Typ,1,TA,Attchd,1980,Unf,2,612,TA,TA,Y,384,131,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,207500 +926,20,RL,NA,15611,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,Norm,Norm,1Fam,1Story,5,6,1977,1977,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Av,ALQ,767,LwQ,93,266,1126,GasA,TA,Y,SBrkr,1126,0,0,1126,0,1,2,0,3,1,Ex,6,Typ,0,NA,Attchd,1977,RFn,2,540,TA,TA,Y,180,0,0,0,0,0,NA,NA,NA,0,3,2008,WD,Abnorml,175000 +927,60,RL,93,11999,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,8,5,2003,2004,Hip,CompShg,VinylSd,VinylSd,BrkFace,340,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1181,1181,GasA,Ex,Y,SBrkr,1234,1140,0,2374,0,0,2,1,4,1,Ex,10,Typ,1,Gd,BuiltIn,2003,Fin,3,656,TA,TA,Y,104,100,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,285000 +928,60,RL,NA,9900,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Feedr,Norm,1Fam,2Story,7,5,1968,1968,Gable,CompShg,MetalSd,MetalSd,BrkFace,342,TA,TA,CBlock,TA,TA,No,BLQ,552,Unf,0,280,832,GasA,Gd,Y,SBrkr,1098,880,0,1978,0,0,2,1,4,1,TA,9,Typ,1,Gd,Attchd,1968,RFn,2,486,TA,TA,Y,0,43,0,0,0,0,NA,GdPrv,NA,0,4,2008,WD,Normal,176000 +929,20,RL,NA,11838,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2001,2001,Hip,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1753,1753,GasA,Ex,Y,SBrkr,1788,0,0,1788,0,0,2,0,3,1,Ex,7,Typ,1,TA,Attchd,2001,RFn,2,522,TA,TA,Y,202,151,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,236500 +930,60,RL,NA,13006,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,1997,1997,Gable,CompShg,HdBoard,HdBoard,BrkFace,285,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,964,964,GasA,Gd,Y,SBrkr,993,1243,0,2236,0,0,2,1,4,1,Gd,8,Typ,1,TA,BuiltIn,1997,Fin,2,642,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2006,WD,Normal,222000 +931,20,RL,73,8925,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,16,Unf,0,1450,1466,GasA,Ex,Y,SBrkr,1466,0,0,1466,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2007,Fin,3,610,TA,TA,Y,100,18,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,201000 +932,20,RL,70,9100,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1965,1965,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,338,Rec,466,121,925,GasA,Ex,Y,SBrkr,925,0,0,925,0,1,1,0,2,1,TA,5,Typ,0,NA,Detchd,1965,Unf,1,429,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,7,2009,WD,Normal,117500 +933,20,RL,84,11670,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Somerst,RRNn,Norm,1Fam,1Story,9,5,2006,2006,Hip,CompShg,VinylSd,ImStucc,Stone,302,Ex,TA,PConc,Ex,Gd,No,Unf,0,Unf,0,1905,1905,GasA,Ex,Y,SBrkr,1905,0,0,1905,0,0,2,0,3,1,Ex,8,Typ,1,Gd,Attchd,2006,Fin,3,788,TA,TA,Y,0,191,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,320000 +934,20,RL,63,8487,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,210,Gd,TA,PConc,Gd,TA,Av,GLQ,20,Unf,0,1480,1500,GasA,Ex,Y,SBrkr,1500,0,0,1500,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2004,RFn,2,570,TA,TA,Y,192,36,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,190000 +935,20,RL,313,27650,Pave,NA,IR2,HLS,AllPub,Inside,Mod,NAmes,PosA,Norm,1Fam,1Story,7,7,1960,2007,Flat,Tar&Grv,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,Gd,TA,Gd,GLQ,425,Unf,0,160,585,GasA,Ex,Y,SBrkr,2069,0,0,2069,1,0,2,0,4,1,Gd,9,Typ,1,Gd,Attchd,1960,RFn,2,505,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2008,WD,Normal,242000 +936,30,RL,52,5825,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,4,5,1926,1953,Gable,CompShg,MetalSd,MetalSd,BrkFace,108,TA,Gd,PConc,Fa,TA,Mn,Unf,0,Unf,0,600,600,GasA,Gd,Y,SBrkr,747,0,0,747,0,0,1,0,1,1,TA,5,Typ,0,NA,Detchd,1953,Unf,2,528,TA,TA,Y,0,0,32,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,79900 +937,20,RL,67,10083,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,NA,NA,Gd,TA,PConc,Gd,TA,No,GLQ,833,Unf,0,343,1176,GasA,Ex,Y,SBrkr,1200,0,0,1200,1,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,2003,RFn,2,555,TA,TA,Y,0,41,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,184900 +938,60,RL,75,9675,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Mn,GLQ,341,Unf,0,772,1113,GasA,Ex,Y,SBrkr,1113,858,0,1971,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2005,RFn,2,689,TA,TA,Y,0,48,0,0,0,0,NA,NA,NA,0,2,2009,WD,Normal,253000 +939,60,RL,73,8760,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,Gd,Mn,GLQ,464,Unf,0,927,1391,GasA,Ex,Y,SBrkr,1391,571,0,1962,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2006,RFn,3,868,TA,TA,Y,0,90,0,0,0,0,NA,NA,NA,0,8,2006,New,Partial,239799 +940,70,RL,NA,24090,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,2Story,7,7,1940,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,Mn,Unf,0,Unf,0,1032,1032,GasA,Ex,Y,SBrkr,1207,1196,0,2403,0,0,2,0,4,1,TA,10,Typ,2,TA,Attchd,1940,Unf,1,349,TA,TA,Y,56,0,318,0,0,0,NA,NA,NA,0,6,2010,COD,Normal,244400 +941,90,RL,55,12640,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,Duplex,1Story,6,5,1976,1976,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,Gd,Rec,936,LwQ,396,396,1728,GasA,TA,Y,SBrkr,1728,0,0,1728,0,0,2,0,4,2,TA,8,Typ,0,NA,Attchd,1976,Unf,2,574,TA,TA,Y,40,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,150900 +942,60,RL,NA,8755,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,Gilbert,RRNn,Norm,1Fam,2Story,7,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,298,Gd,TA,PConc,Gd,TA,No,ALQ,772,Unf,0,220,992,GasA,Ex,Y,SBrkr,1022,1038,0,2060,1,0,2,1,3,1,Gd,8,Typ,1,TA,BuiltIn,1999,RFn,2,390,TA,TA,Y,0,0,0,168,0,0,NA,GdPrv,NA,0,6,2009,WD,Normal,214000 +943,90,RL,42,7711,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,Duplex,1Story,4,3,1977,1977,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,Gd,TA,Gd,GLQ,1440,Unf,0,0,1440,GasA,TA,Y,SBrkr,1440,0,0,1440,2,0,2,0,4,2,TA,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,321,0,0,0,0,0,NA,NA,NA,0,8,2007,Oth,Abnorml,150000 +944,90,RL,100,25000,Pave,NA,Reg,Low,AllPub,Inside,Gtl,Mitchel,Norm,Norm,Duplex,1Story,5,4,1967,1967,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,TA,Av,Unf,0,Unf,0,1632,1632,GasA,TA,Y,SBrkr,1632,0,0,1632,0,0,2,0,4,2,TA,8,Typ,0,NA,Attchd,1967,Unf,2,576,TA,TA,P,0,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,143000 +945,20,RL,NA,14375,Pave,NA,IR1,Lvl,NoSeWa,CulDSac,Gtl,Timber,Norm,Norm,1Fam,SLvl,6,6,1958,1958,Gable,CompShg,HdBoard,HdBoard,BrkFace,541,TA,TA,CBlock,TA,TA,No,GLQ,111,Rec,354,354,819,GasA,Gd,Y,FuseA,1344,0,0,1344,0,1,1,0,3,1,Gd,7,Typ,1,Gd,Basment,1958,RFn,2,525,TA,TA,Y,0,118,0,0,233,0,NA,NA,NA,0,1,2009,COD,Abnorml,137500 +946,50,RM,98,8820,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,6,1890,1996,Hip,CompShg,VinylSd,VinylSd,None,0,TA,TA,BrkTil,TA,TA,No,LwQ,1088,Unf,0,0,1088,GasA,TA,Y,SBrkr,1188,561,120,1869,0,0,1,0,2,1,TA,7,Typ,0,NA,Detchd,1963,Unf,2,456,TA,TA,Y,48,0,244,0,0,0,NA,MnWw,NA,0,9,2009,WD,Normal,124900 +947,80,RL,70,8163,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,5,6,1959,1959,Gable,CompShg,HdBoard,HdBoard,BrkFace,128,TA,Gd,CBlock,TA,TA,Av,ALQ,748,BLQ,294,102,1144,GasA,TA,Y,SBrkr,1144,0,0,1144,1,0,1,0,3,1,TA,6,Typ,1,TA,Attchd,1959,RFn,1,796,TA,TA,Y,86,0,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,143000 +948,20,RL,85,14536,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2002,2003,Hip,CompShg,VinylSd,VinylSd,BrkFace,236,Gd,TA,PConc,Gd,TA,Av,GLQ,1300,Unf,0,316,1616,GasA,Ex,Y,SBrkr,1629,0,0,1629,1,0,2,0,3,1,Gd,9,Typ,1,Gd,Attchd,2002,Fin,3,808,TA,TA,Y,0,252,0,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,270000 +949,60,RL,65,14006,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,144,Gd,TA,PConc,Gd,TA,NA,Unf,0,Unf,0,936,936,GasA,Ex,Y,SBrkr,936,840,0,1776,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,2002,RFn,2,474,TA,TA,Y,144,96,0,0,0,0,NA,NA,NA,0,2,2006,WD,Normal,192500 +950,20,RL,78,9360,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,7,1972,2006,Hip,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,982,Unf,0,179,1161,GasA,TA,Y,SBrkr,1381,0,0,1381,1,0,1,1,3,1,Gd,5,Typ,1,TA,Attchd,1972,RFn,2,676,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,197500 +951,20,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,8,1950,2002,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,398,BLQ,149,317,864,GasA,Gd,Y,SBrkr,864,0,0,864,1,0,1,0,3,1,Gd,5,Typ,0,NA,Detchd,1980,RFn,2,720,TA,TA,Y,194,0,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,129000 +952,20,RH,60,7800,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,SawyerW,Norm,Norm,1Fam,1Story,5,5,1965,1965,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,641,Unf,0,187,828,GasA,Gd,Y,SBrkr,965,0,0,965,1,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1979,Unf,1,300,TA,TA,Y,421,0,0,0,0,0,NA,MnPrv,NA,0,7,2006,WD,Abnorml,119900 +953,85,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,SFoyer,5,8,1972,2003,Gable,CompShg,WdShing,HdBoard,None,0,TA,Gd,CBlock,Gd,TA,Av,GLQ,660,Unf,0,108,768,GasA,Gd,Y,SBrkr,768,0,0,768,0,1,1,0,2,1,TA,5,Typ,0,NA,Detchd,1974,Fin,1,396,TA,TA,Y,192,0,0,0,0,0,NA,MnPrv,NA,0,4,2009,WD,Normal,133900 +954,60,RL,NA,11075,Pave,NA,IR1,Lvl,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,2Story,5,4,1969,1969,Gable,CompShg,HdBoard,HdBoard,BrkFace,232,TA,TA,CBlock,TA,TA,Av,ALQ,562,LwQ,193,29,784,GasA,Ex,Y,SBrkr,1168,800,0,1968,0,1,2,1,4,1,TA,7,Min2,1,Po,Attchd,1969,RFn,2,530,TA,TA,Y,305,189,0,0,0,0,NA,MnPrv,Shed,400,9,2008,WD,Normal,172000 +955,90,RL,35,9400,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Edwards,Norm,Norm,Duplex,SFoyer,6,5,1975,1975,Flat,Tar&Grv,WdShing,Plywood,BrkFace,250,TA,TA,CBlock,Gd,Gd,Gd,GLQ,945,Unf,0,0,945,GasA,TA,Y,SBrkr,980,0,0,980,0,2,2,0,4,0,TA,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2006,WD,AdjLand,127500 +956,90,RH,82,7136,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Crawfor,Norm,Norm,Duplex,2Story,6,6,1946,1950,Gable,CompShg,MetalSd,MetalSd,BrkFace,423,TA,TA,CBlock,Gd,TA,No,Rec,484,Unf,0,495,979,GasA,TA,N,FuseF,979,979,0,1958,0,0,2,0,4,2,TA,8,Typ,0,NA,Attchd,1946,Unf,2,492,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,145000 +957,160,RM,24,1300,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blueste,Norm,Norm,TwnhsE,2Story,6,6,1980,1980,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,285,Unf,0,276,561,GasA,TA,Y,SBrkr,561,668,0,1229,0,0,1,1,2,1,TA,5,Typ,1,TA,Attchd,1980,Fin,2,462,TA,TA,Y,150,0,0,0,0,0,NA,GdPrv,NA,0,5,2009,WD,Normal,124000 +958,20,RL,70,7420,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1962,1962,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,417,Unf,0,640,1057,GasA,TA,Y,SBrkr,1057,0,0,1057,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1977,Fin,2,576,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,132000 +959,20,RL,65,8450,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Mn,GLQ,699,Unf,0,638,1337,GasA,Ex,Y,SBrkr,1337,0,0,1337,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2003,RFn,2,531,TA,TA,Y,0,39,0,0,0,0,NA,NA,NA,0,10,2007,WD,Normal,185000 +960,160,FV,24,2572,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Somerst,Norm,Norm,Twnhs,2Story,7,5,1999,1999,Hip,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,ALQ,604,Unf,0,92,696,GasA,Ex,Y,SBrkr,696,720,0,1416,1,0,2,1,3,1,Gd,6,Typ,0,NA,Detchd,1999,Unf,2,484,TA,TA,Y,0,44,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,155000 +961,20,RL,50,7207,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,5,7,1958,2008,Gable,CompShg,Wd Sdng,Plywood,None,0,TA,Gd,CBlock,TA,TA,Gd,BLQ,696,Unf,0,162,858,GasA,Gd,Y,SBrkr,858,0,0,858,1,0,1,0,2,1,TA,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,117,0,0,0,0,0,NA,NA,NA,0,2,2010,WD,Normal,116500 +962,60,RL,NA,12227,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,PosN,Norm,1Fam,2Story,6,7,1977,1995,Gable,CompShg,HdBoard,HdBoard,BrkFace,424,TA,Gd,CBlock,Gd,Gd,No,ALQ,896,Unf,0,434,1330,GasA,TA,Y,SBrkr,1542,1330,0,2872,1,0,2,1,4,1,TA,11,Typ,1,TA,Attchd,1977,Fin,2,619,TA,TA,Y,550,282,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,272000 +963,160,RL,24,2308,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NPkVill,Norm,Norm,TwnhsE,2Story,6,6,1976,1976,Gable,CompShg,Plywood,Brk Cmn,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,556,Unf,0,248,804,GasA,TA,Y,SBrkr,804,744,0,1548,1,0,2,1,3,1,Gd,7,Typ,1,TA,Detchd,1976,Unf,2,440,TA,TA,Y,48,0,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,155000 +964,20,RL,122,11923,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,9,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,TA,No,Unf,0,Unf,0,1800,1800,GasA,Ex,Y,SBrkr,1800,0,0,1800,0,0,2,0,2,1,Ex,7,Typ,0,NA,Attchd,2007,Fin,2,702,TA,TA,Y,288,136,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,239000 +965,60,RL,80,11316,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,2Story,7,5,2002,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,44,Gd,TA,PConc,Gd,TA,No,GLQ,624,Unf,0,193,817,GasA,Ex,Y,SBrkr,824,1070,0,1894,1,0,2,1,4,1,Gd,8,Typ,1,Gd,BuiltIn,2002,Fin,2,510,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,2,2010,WD,Normal,214900 +966,60,RL,65,10237,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,RRAn,Norm,1Fam,2Story,6,5,2005,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,783,783,GasA,Ex,Y,SBrkr,783,701,0,1484,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2005,Fin,2,393,TA,TA,Y,0,72,0,0,0,0,NA,NA,NA,0,7,2007,New,Partial,178900 +967,50,RL,130,9600,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,5,7,1940,1950,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,Gd,BrkTil,TA,Fa,No,Rec,428,Unf,0,300,728,GasA,Ex,Y,SBrkr,976,332,0,1308,1,0,1,1,2,1,TA,7,Min2,2,TA,Detchd,1940,Unf,1,256,TA,TA,Y,0,70,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,160000 +968,20,RL,NA,7390,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1955,1955,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,151,TA,TA,CBlock,TA,TA,No,ALQ,902,Unf,0,196,1098,GasA,TA,Y,SBrkr,1098,0,0,1098,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1955,Unf,1,260,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,135000 +969,50,RM,50,5925,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,3,6,1910,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,600,600,Grav,Fa,N,SBrkr,600,368,0,968,0,0,1,0,2,1,TA,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,5,2009,WD,Abnorml,37900 +970,190,RL,75,10382,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,2fmCon,SLvl,6,5,1958,1958,Hip,CompShg,HdBoard,HdBoard,BrkFace,105,TA,Fa,CBlock,TA,TA,Gd,ALQ,513,Unf,0,75,588,GasA,TA,Y,SBrkr,1095,0,0,1095,1,0,1,0,2,1,TA,6,Typ,0,NA,Attchd,1958,RFn,1,264,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2006,ConLD,Normal,140000 +971,50,RL,60,10800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1.5Fin,4,4,1949,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,720,720,GasA,TA,N,FuseA,720,472,0,1192,0,0,1,1,4,1,TA,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,12,2006,WD,Abnorml,135000 +972,160,RL,36,2268,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,Twnhs,2Story,7,5,2003,2004,Gable,CompShg,VinylSd,Wd Shng,Stone,106,Gd,TA,PConc,Gd,TA,No,GLQ,567,Unf,0,197,764,GasA,Ex,Y,SBrkr,764,862,0,1626,0,0,2,0,2,1,Gd,6,Typ,0,NA,BuiltIn,2003,RFn,2,474,TA,TA,Y,0,27,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,173000 +973,120,RL,55,7892,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,TwnhsE,1Story,6,5,1979,1979,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,918,918,GasA,TA,Y,SBrkr,918,0,0,918,0,0,2,0,2,1,TA,5,Typ,1,TA,Attchd,1979,Unf,1,264,TA,TA,Y,28,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,99500 +974,20,FV,95,11639,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2007,2008,Gable,CompShg,CemntBd,CmentBd,NA,NA,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1428,1428,GasA,Ex,Y,SBrkr,1428,0,0,1428,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2007,Fin,2,480,TA,TA,Y,0,120,0,0,0,0,NA,NA,NA,0,12,2008,New,Partial,182000 +975,70,RL,60,11414,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,BrkSide,RRAn,Feedr,1Fam,2Story,7,8,1910,1993,Gable,CompShg,HdBoard,HdBoard,None,0,TA,Gd,BrkTil,Gd,TA,No,Unf,0,Unf,0,728,728,GasA,TA,N,SBrkr,1136,883,0,2019,0,0,1,0,3,1,Gd,8,Typ,0,NA,Detchd,1997,Unf,2,532,TA,TA,Y,509,135,0,0,0,0,NA,GdPrv,NA,0,10,2009,WD,Normal,167500 +976,160,FV,NA,2651,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Somerst,Norm,Norm,Twnhs,2Story,7,5,2000,2000,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,641,Unf,0,32,673,GasA,Ex,Y,SBrkr,673,709,0,1382,1,0,2,1,3,1,Gd,6,Typ,0,NA,Detchd,2000,Unf,2,490,TA,TA,Y,153,50,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,165000 +977,30,RL,51,5900,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,4,7,1923,1958,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,440,440,GasA,TA,Y,FuseA,869,0,0,869,0,0,1,0,2,1,Fa,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2006,WD,Normal,85500 +978,120,FV,35,4274,Pave,Pave,IR1,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,1Story,7,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,NA,NA,Gd,TA,PConc,Gd,TA,No,GLQ,1106,Unf,0,135,1241,GasA,Ex,Y,SBrkr,1241,0,0,1241,1,0,1,1,1,1,Gd,4,Typ,0,NA,Attchd,2007,Fin,2,569,TA,TA,Y,0,116,0,0,0,0,NA,NA,NA,0,11,2007,New,Partial,199900 +979,20,RL,68,9450,Pave,NA,Reg,Bnk,AllPub,Inside,Mod,Edwards,Norm,Norm,1Fam,1Story,4,5,1954,1954,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,LwQ,552,Unf,0,342,894,GasA,Ex,Y,SBrkr,894,0,0,894,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1999,Unf,2,400,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2007,WD,Abnorml,110000 +980,20,RL,80,8816,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,6,1963,1963,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,TA,TA,No,Rec,651,Unf,0,470,1121,GasA,TA,Y,SBrkr,1121,0,0,1121,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1963,Unf,2,480,TA,TA,Y,0,80,0,0,0,0,NA,MnPrv,NA,0,6,2009,WD,Normal,139000 +981,85,RL,NA,12122,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,SFoyer,7,9,1961,2007,Gable,CompShg,CemntBd,CmentBd,Stone,210,Ex,TA,CBlock,TA,TA,Av,ALQ,867,Unf,0,77,944,GasA,Gd,Y,SBrkr,999,0,0,999,1,0,1,0,3,1,Ex,6,Typ,0,NA,Attchd,1961,RFn,2,588,TA,TA,Y,144,76,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,178400 +982,60,RL,98,12203,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1998,1999,Hip,CompShg,VinylSd,VinylSd,BrkFace,975,Gd,TA,PConc,Gd,TA,No,GLQ,854,Unf,0,371,1225,GasA,Ex,Y,SBrkr,1276,1336,0,2612,1,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1998,Fin,3,676,TA,TA,Y,250,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,336000 +983,20,RL,43,3182,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,1Fam,1Story,7,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,BrkFace,16,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1266,1266,GasA,Ex,Y,SBrkr,1266,0,0,1266,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2007,Fin,2,388,TA,TA,Y,100,16,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,159895 +984,60,RL,NA,11250,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2002,2002,Gable,CompShg,CemntBd,CmentBd,None,0,Gd,TA,PConc,Gd,TA,Mn,Unf,0,Unf,0,1128,1128,GasA,Ex,Y,SBrkr,1149,1141,0,2290,0,0,2,1,4,1,Gd,9,Typ,1,Gd,Attchd,2002,Unf,2,779,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,255900 +985,90,RL,75,10125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,Duplex,1.5Fin,5,5,1977,1977,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,1302,432,0,1734,0,0,2,0,4,2,Gd,8,Typ,0,NA,Attchd,1977,Unf,2,539,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2009,COD,Normal,126000 +986,190,RL,68,10880,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,2fmCon,1Story,5,5,1950,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,1040,Unf,0,124,1164,GasW,TA,N,SBrkr,1164,0,0,1164,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1950,Unf,1,240,TA,TA,Y,0,48,0,0,0,0,NA,NA,NA,0,8,2008,ConLD,Normal,125000 +987,50,RM,59,5310,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Feedr,Norm,1Fam,1.5Fin,6,8,1910,2003,Hip,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,Fa,No,Unf,0,Unf,0,485,485,GasA,Gd,Y,SBrkr,1001,634,0,1635,0,0,1,0,2,1,Gd,5,Typ,0,NA,Attchd,1950,Unf,1,255,Fa,TA,Y,394,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,117000 +988,20,RL,83,10159,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,9,5,2009,2010,Hip,CompShg,VinylSd,VinylSd,Stone,450,Ex,TA,PConc,Ex,TA,Av,GLQ,1646,Unf,0,284,1930,GasA,Ex,Y,SBrkr,1940,0,0,1940,1,0,2,1,3,1,Ex,8,Typ,1,Gd,Attchd,2010,Fin,3,606,TA,TA,Y,168,95,0,0,0,0,NA,NA,NA,0,4,2010,New,Partial,395192 +989,60,RL,NA,12046,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,6,6,1976,1976,Gable,CompShg,Plywood,Plywood,BrkFace,298,TA,TA,CBlock,TA,TA,No,LwQ,156,Unf,0,692,848,GasA,TA,Y,SBrkr,1118,912,0,2030,0,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1976,Fin,2,551,TA,TA,Y,0,224,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,195000 +990,60,FV,65,8125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,Gd,No,Unf,0,Unf,0,770,770,GasA,Ex,Y,SBrkr,778,798,0,1576,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2006,RFn,2,614,TA,TA,Y,0,50,0,0,0,0,NA,NA,NA,0,8,2006,New,Partial,197000 +991,60,RL,82,9452,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,423,Gd,TA,PConc,Gd,TA,No,GLQ,1074,Unf,0,322,1396,GasA,Ex,Y,SBrkr,1407,985,0,2392,1,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1997,Fin,3,870,TA,TA,Y,0,70,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,348000 +992,70,RM,121,17671,Pave,Grvl,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Artery,Norm,1Fam,2Story,8,9,1882,1986,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,Gd,BrkTil,TA,TA,No,BLQ,216,Unf,0,700,916,GasA,Gd,Y,SBrkr,916,826,0,1742,0,0,1,1,4,1,Gd,8,Typ,1,Gd,Attchd,1925,Unf,2,424,TA,TA,P,0,169,0,0,0,0,NA,NA,NA,0,11,2009,WD,Normal,168000 +993,60,RL,80,9760,Pave,NA,Reg,Lvl,AllPub,Inside,Mod,NAmes,Norm,Norm,1Fam,2Story,6,8,1964,1993,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,340,TA,TA,CBlock,TA,TA,Gd,BLQ,536,Rec,117,169,822,GasA,Gd,Y,SBrkr,1020,831,0,1851,0,0,2,1,3,1,Gd,7,Typ,1,Fa,Attchd,1964,RFn,2,440,TA,TA,Y,239,42,0,0,0,0,NA,MnWw,NA,0,7,2007,WD,Normal,187000 +994,60,RL,68,8846,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,6,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,750,750,GasA,Ex,Y,SBrkr,750,750,0,1500,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2005,RFn,2,564,TA,TA,Y,0,35,0,0,0,0,NA,NA,NA,0,8,2006,New,Partial,173900 +995,20,RL,96,12456,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NridgHt,Norm,Norm,1Fam,1Story,10,5,2006,2007,Hip,CompShg,CemntBd,CmentBd,Stone,230,Ex,TA,PConc,Ex,TA,Gd,GLQ,1172,Unf,0,528,1700,GasA,Ex,Y,SBrkr,1718,0,0,1718,1,0,2,0,3,1,Ex,7,Typ,1,Gd,Attchd,2008,Fin,3,786,TA,TA,Y,216,48,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,337500 +996,50,RL,51,4712,Pave,NA,IR1,Lvl,AllPub,Inside,Mod,BrkSide,Feedr,Norm,1Fam,1.5Fin,4,7,1946,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,384,Unf,0,363,747,GasA,TA,Y,SBrkr,774,456,0,1230,1,0,1,1,3,1,TA,5,Typ,0,NA,Detchd,1946,Unf,1,305,TA,TA,Y,0,57,0,0,63,0,NA,MnPrv,NA,0,8,2006,WD,Abnorml,121600 +997,20,RL,NA,10659,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1961,1961,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,915,Unf,0,135,1050,GasA,TA,Y,SBrkr,1050,0,0,1050,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1961,Unf,1,368,TA,TA,Y,0,319,0,0,0,0,NA,NA,NA,0,1,2006,COD,Normal,136500 +998,20,RL,NA,11717,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,PosA,Norm,1Fam,1Story,6,6,1970,1970,Hip,CompShg,HdBoard,HdBoard,BrkFace,571,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1442,1442,GasA,TA,Y,SBrkr,1442,0,0,1442,0,0,2,0,2,1,TA,6,Typ,1,TA,Attchd,1970,RFn,2,615,TA,TA,Y,371,0,0,0,0,0,NA,NA,NA,0,2,2009,WD,Normal,185000 +999,30,RM,60,9786,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,3,4,1922,1950,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,Fa,No,Unf,0,Unf,0,1007,1007,GasA,Fa,N,SBrkr,1077,0,0,1077,0,0,1,0,3,1,TA,6,Typ,1,Gd,Detchd,1922,Unf,1,210,TA,Fa,P,0,100,48,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,91000 +1000,20,RL,64,6762,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,24,Gd,TA,PConc,Gd,TA,Av,GLQ,686,Unf,0,501,1187,GasA,Ex,Y,SBrkr,1208,0,0,1208,1,0,2,0,2,1,Gd,6,Typ,0,NA,Attchd,2006,RFn,2,632,TA,TA,Y,105,61,0,0,0,0,NA,NA,NA,0,2,2010,WD,Normal,206000 +1001,20,RL,74,10206,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,3,3,1952,1952,Flat,Tar&Grv,BrkComm,Brk Cmn,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasW,Fa,N,FuseF,944,0,0,944,0,0,1,0,2,1,Fa,4,Min1,0,NA,Detchd,1956,Unf,2,528,TA,Fa,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,82000 +1002,30,RL,60,5400,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1920,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Fa,TA,No,Unf,0,Unf,0,691,691,GasA,Ex,Y,FuseA,691,0,0,691,0,0,1,0,2,1,Ex,4,Typ,0,NA,Detchd,1920,Unf,1,216,Fa,TA,N,0,20,94,0,0,0,NA,NA,NA,0,1,2007,WD,Abnorml,86000 +1003,20,RL,75,11957,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Somerst,RRAn,Norm,1Fam,1Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,53,Gd,TA,PConc,Gd,TA,No,GLQ,24,Unf,0,1550,1574,GasA,Ex,Y,SBrkr,1574,0,0,1574,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2006,RFn,3,824,TA,TA,Y,144,104,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,232000 +1004,90,RL,NA,11500,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,Feedr,RRAn,Duplex,1Story,5,6,1976,1976,Gable,CompShg,VinylSd,VinylSd,BrkFace,164,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1680,1680,GasA,Fa,Y,SBrkr,1680,0,0,1680,0,0,2,0,4,2,TA,8,Typ,0,NA,Detchd,1976,Unf,2,528,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,136905 +1005,120,RL,43,3182,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,16,Gd,TA,PConc,Gd,TA,No,GLQ,16,Unf,0,1330,1346,GasA,Ex,Y,SBrkr,1504,0,0,1504,0,0,2,0,1,1,Gd,7,Typ,1,Gd,Attchd,2005,Fin,2,457,TA,TA,Y,156,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,181000 +1006,80,RL,65,8385,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,SLvl,5,8,1977,1977,Gable,CompShg,HdBoard,HdBoard,BrkFace,220,Gd,TA,CBlock,Gd,Gd,Av,GLQ,595,Unf,0,390,985,GasA,TA,Y,SBrkr,985,0,0,985,0,0,2,0,3,1,TA,6,Typ,0,NA,Attchd,1977,Unf,1,328,TA,TA,Y,210,0,0,0,0,0,NA,NA,NA,0,11,2008,WD,Normal,149900 +1007,20,RL,NA,12155,Pave,NA,IR3,Lvl,AllPub,Inside,Gtl,NAmes,PosN,Norm,1Fam,1Story,6,3,1970,1970,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,LwQ,1237,Unf,0,420,1657,GasA,Gd,Y,SBrkr,1657,0,0,1657,0,1,2,0,3,1,TA,7,Typ,1,TA,Attchd,1970,Unf,2,484,TA,TA,Y,0,0,0,0,147,0,NA,NA,NA,0,3,2007,WD,Normal,163500 +1008,160,RM,21,2217,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,4,4,1970,1970,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,273,LwQ,273,0,546,GasA,TA,Y,SBrkr,546,546,0,1092,0,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1970,RFn,1,286,TA,TA,Y,238,0,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,88000 +1009,20,RL,43,12118,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,1Story,7,5,2004,2005,Hip,CompShg,VinylSd,VinylSd,Stone,108,Gd,TA,PConc,Ex,TA,Mn,Unf,0,Unf,0,1710,1710,GasA,Ex,Y,SBrkr,1710,0,0,1710,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2004,Fin,2,550,TA,TA,Y,100,48,0,0,180,0,NA,NA,NA,0,4,2009,WD,Normal,240000 +1010,50,RL,60,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,5,5,1926,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Fa,BrkTil,TA,TA,No,Unf,0,Unf,0,1008,1008,GasA,Ex,Y,SBrkr,1008,0,514,1522,0,0,2,0,4,1,TA,7,Typ,0,NA,NA,NA,NA,0,0,NA,NA,P,0,0,138,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,102000 +1011,50,RL,115,21286,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1.5Fin,5,5,1948,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,720,720,GasA,TA,Y,SBrkr,720,551,0,1271,0,0,2,0,4,1,TA,7,Typ,1,Gd,Attchd,1948,Unf,1,312,TA,TA,Y,0,0,108,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,135000 +1012,90,RL,75,9825,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,Duplex,1Story,5,5,1965,1965,Hip,CompShg,AsphShn,AsphShn,None,0,TA,TA,CBlock,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,N,SBrkr,1664,0,0,1664,0,0,2,0,4,2,TA,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,100000 +1013,70,RL,55,10592,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,6,7,1923,1996,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,PConc,TA,Fa,No,Unf,0,Unf,0,602,602,GasA,TA,Y,SBrkr,900,602,0,1502,0,0,1,1,3,1,Gd,7,Typ,2,TA,Detchd,1923,Unf,1,180,TA,TA,Y,96,0,112,0,53,0,NA,NA,NA,0,8,2007,WD,Normal,165000 +1014,30,RM,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,4,1910,2006,Hip,CompShg,MetalSd,Stucco,None,0,TA,TA,BrkTil,TA,TA,No,ALQ,247,Rec,465,310,1022,GasW,TA,N,SBrkr,1022,0,0,1022,1,0,1,0,2,1,TA,4,Maj2,0,NA,Detchd,1956,Unf,1,280,TA,TA,Y,0,30,226,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,85000 +1015,20,RL,60,11664,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Artery,Norm,1Fam,1Story,6,5,1948,1950,Gable,CompShg,MetalSd,MetalSd,BrkFace,206,TA,TA,CBlock,TA,Fa,No,BLQ,336,Unf,0,746,1082,GasA,TA,Y,SBrkr,1082,0,0,1082,0,0,1,0,2,1,TA,5,Typ,1,Gd,Detchd,1948,Unf,1,240,TA,TA,Y,0,130,0,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,119200 +1016,60,RL,70,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,8,6,2001,2001,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,643,Unf,0,167,810,GasA,Ex,Y,SBrkr,810,855,0,1665,1,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2001,Fin,2,528,TA,TA,Y,0,45,0,0,0,0,NA,NA,NA,0,11,2009,WD,Normal,227000 +1017,20,RL,73,11883,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,1996,1996,Hip,CompShg,VinylSd,VinylSd,BrkFace,196,Gd,TA,PConc,Gd,TA,Gd,GLQ,690,Unf,0,814,1504,GasA,Ex,Y,SBrkr,1504,0,0,1504,1,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,1996,Fin,2,478,TA,TA,Y,115,66,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,203000 +1018,120,RL,NA,5814,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1984,1984,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,CBlock,Gd,TA,Av,GLQ,1036,Unf,0,184,1220,GasA,Gd,Y,SBrkr,1360,0,0,1360,1,0,1,0,1,1,Gd,4,Typ,1,Ex,Attchd,1984,RFn,2,565,TA,TA,Y,63,0,0,0,0,0,NA,NA,NA,0,8,2009,COD,Abnorml,187500 +1019,80,RL,NA,10784,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,Gilbert,Norm,Norm,1Fam,SLvl,7,5,1991,1992,Gable,CompShg,HdBoard,HdBoard,BrkFace,76,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,384,384,GasA,Gd,Y,SBrkr,802,670,0,1472,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1991,RFn,2,402,TA,TA,Y,164,0,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,160000 +1020,120,RL,43,3013,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,145,Gd,TA,PConc,Gd,TA,Gd,GLQ,16,Unf,0,1346,1362,GasA,Ex,Y,SBrkr,1506,0,0,1506,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2005,Fin,2,440,TA,TA,Y,142,20,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,213490 +1021,20,RL,60,7024,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,1024,Unf,0,108,1132,GasA,Ex,Y,SBrkr,1132,0,0,1132,1,0,1,1,2,1,Gd,5,Typ,0,NA,Attchd,2005,Fin,2,451,TA,TA,Y,252,64,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,176000 +1022,20,RL,64,7406,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,84,Gd,TA,PConc,Gd,TA,Av,GLQ,684,Unf,0,515,1199,GasA,Ex,Y,SBrkr,1220,0,0,1220,1,0,2,0,2,1,Gd,6,Typ,0,NA,Attchd,2006,RFn,2,632,TA,TA,Y,105,54,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,194000 +1023,50,RM,52,9439,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,5,1930,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,LwQ,324,Unf,0,588,912,GasA,Gd,Y,FuseA,912,336,0,1248,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1957,Unf,1,160,Fa,Fa,Y,0,0,192,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,87000 +1024,120,RL,43,3182,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,14,Gd,TA,PConc,Gd,Gd,No,GLQ,16,Unf,0,1330,1346,GasA,Ex,Y,SBrkr,1504,0,0,1504,0,0,2,0,2,1,Gd,7,Typ,1,Gd,Attchd,2005,Fin,2,437,TA,TA,Y,156,20,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,191000 +1025,20,RL,NA,15498,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,1Story,8,6,1976,1976,Hip,WdShake,Stone,HdBoard,None,0,Gd,TA,CBlock,Gd,TA,Av,ALQ,1165,LwQ,400,0,1565,GasA,TA,Y,SBrkr,2898,0,0,2898,1,0,2,0,2,1,Gd,10,Typ,1,Gd,Attchd,1976,Fin,2,665,TA,TA,Y,0,72,174,0,0,0,NA,NA,NA,0,5,2008,COD,Abnorml,287000 +1026,20,RL,70,7700,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,5,1972,1972,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,LwQ,138,Rec,468,276,882,GasA,TA,Y,SBrkr,882,0,0,882,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1980,Unf,2,461,TA,TA,Y,96,0,0,0,0,0,NA,MnPrv,NA,0,3,2007,WD,Normal,112500 +1027,20,RL,73,9300,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,5,1960,1960,Gable,CompShg,MetalSd,HdBoard,BrkFace,324,TA,TA,CBlock,TA,TA,No,Rec,697,Unf,0,571,1268,GasA,TA,Y,SBrkr,1264,0,0,1264,1,0,1,0,3,1,TA,6,Typ,2,Gd,Attchd,1960,Unf,2,461,TA,TA,Y,0,0,0,0,143,0,NA,NA,NA,0,4,2010,WD,Normal,167500 +1028,20,RL,71,9520,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2007,2008,Gable,CompShg,VinylSd,VinylSd,Stone,338,Gd,TA,PConc,Gd,TA,Gd,GLQ,1513,Unf,0,125,1638,GasA,Ex,Y,SBrkr,1646,0,0,1646,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2008,RFn,3,800,TA,TA,Y,192,44,0,0,0,0,NA,NA,NA,0,4,2008,New,Partial,293077 +1029,50,RL,79,9492,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Artery,Norm,1Fam,1.5Fin,5,5,1941,1950,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,Rec,368,BLQ,41,359,768,GasA,TA,Y,SBrkr,968,408,0,1376,1,0,1,0,3,1,TA,6,Typ,1,Gd,Attchd,1941,Unf,1,240,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,105000 +1030,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,7,1972,1972,Gable,CompShg,HdBoard,HdBoard,BrkFace,281,TA,TA,CBlock,TA,TA,No,BLQ,317,Unf,0,355,672,GasA,Gd,Y,SBrkr,672,546,0,1218,0,1,1,1,3,1,TA,7,Typ,0,NA,Detchd,1972,Unf,1,264,TA,TA,Y,0,28,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,118000 +1031,190,RH,NA,7082,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,2fmCon,2Story,5,8,1916,1995,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,TA,TA,Mn,Unf,0,Unf,0,686,686,GasA,Gd,Y,SBrkr,948,980,0,1928,0,0,2,0,5,2,TA,10,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,228,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,160000 +1032,75,RL,102,15863,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,SWISU,Norm,Norm,1Fam,2.5Fin,7,3,1920,1970,Gable,CompShg,Wd Sdng,Plywood,None,0,TA,TA,BrkTil,TA,TA,No,GLQ,523,Unf,0,301,824,GasA,Ex,Y,SBrkr,1687,998,397,3082,1,0,2,1,5,1,TA,12,Typ,2,TA,Basment,1970,Fin,2,672,TA,TA,Y,136,63,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,197000 +1033,60,RL,NA,14541,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,7,1993,1993,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,Gd,PConc,Gd,Gd,No,GLQ,1012,Unf,0,326,1338,GasA,Ex,Y,SBrkr,1352,1168,0,2520,1,0,2,1,5,1,Gd,10,Typ,1,TA,Attchd,1993,RFn,3,796,TA,TA,Y,209,55,0,0,0,0,NA,NA,NA,0,11,2006,WD,Abnorml,310000 +1034,20,RL,NA,8125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,Stone,295,Gd,TA,PConc,Gd,TA,No,GLQ,986,Unf,0,668,1654,GasA,Ex,Y,SBrkr,1654,0,0,1654,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2002,Unf,3,900,TA,TA,Y,0,136,0,0,0,0,NA,NA,NA,0,2,2006,WD,Normal,230000 +1035,30,RL,50,6305,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,5,7,1938,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,PConc,Fa,Fa,No,Unf,0,Unf,0,920,920,GasA,Ex,Y,SBrkr,954,0,0,954,0,0,1,0,2,1,Fa,5,Typ,1,Gd,Basment,1938,Unf,1,240,Fa,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,6,2007,WD,Normal,119750 +1036,20,RL,NA,11500,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,3,1957,1957,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,Ex,N,SBrkr,845,0,0,845,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1957,Unf,1,290,TA,TA,N,186,0,0,0,0,0,NA,NA,NA,0,1,2009,WD,Normal,84000 +1037,20,RL,89,12898,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,9,5,2007,2008,Hip,CompShg,VinylSd,VinylSd,Stone,70,Gd,TA,PConc,Ex,TA,Gd,GLQ,1022,Unf,0,598,1620,GasA,Ex,Y,SBrkr,1620,0,0,1620,1,0,2,0,2,1,Ex,6,Typ,1,Ex,Attchd,2008,Fin,3,912,TA,TA,Y,228,0,0,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,315500 +1038,60,RL,NA,9240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,396,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1055,1055,GasA,Ex,Y,SBrkr,1055,1208,0,2263,0,0,2,1,3,1,Gd,7,Typ,1,TA,BuiltIn,2001,Fin,2,905,TA,TA,Y,0,45,0,0,189,0,NA,NA,NA,0,9,2008,WD,Normal,287000 +1039,160,RM,21,1533,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,2Story,4,6,1970,2008,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,546,546,GasA,TA,Y,SBrkr,798,546,0,1344,0,0,1,1,3,1,TA,6,Typ,1,TA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,97000 +1040,180,RM,21,1477,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,SFoyer,4,4,1970,1970,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,Gd,TA,Av,GLQ,509,Unf,0,121,630,GasA,TA,Y,SBrkr,630,0,0,630,1,0,1,0,1,1,TA,3,Typ,0,NA,Attchd,1970,Unf,1,286,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,80000 +1041,20,RL,88,13125,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,4,1957,2000,Gable,CompShg,Wd Sdng,Wd Sdng,BrkCmn,67,TA,TA,CBlock,TA,TA,No,Rec,168,BLQ,682,284,1134,GasA,Ex,Y,SBrkr,1803,0,0,1803,1,0,2,0,3,1,TA,8,Maj1,1,TA,Attchd,1957,RFn,2,484,TA,TA,Y,0,0,0,0,0,0,NA,GdPrv,NA,0,1,2006,WD,Normal,155000 +1042,60,RL,NA,9130,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Feedr,Norm,1Fam,2Story,6,8,1966,2000,Hip,CompShg,HdBoard,HdBoard,BrkFace,252,TA,TA,CBlock,TA,TA,No,GLQ,400,Rec,64,336,800,GasA,Gd,Y,SBrkr,800,832,0,1632,0,1,1,1,4,1,Gd,7,Typ,0,NA,Attchd,1966,Unf,2,484,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,173000 +1043,120,RL,34,5381,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,Twnhs,1Story,6,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,Stone,135,Gd,TA,PConc,Gd,TA,Av,ALQ,900,Unf,0,406,1306,GasA,Ex,Y,SBrkr,1306,0,0,1306,1,0,2,0,1,1,Gd,5,Typ,1,Gd,Attchd,2005,RFn,2,624,TA,TA,Y,170,63,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,196000 +1044,60,RL,86,11839,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1990,1990,Hip,CompShg,HdBoard,HdBoard,BrkFace,99,TA,TA,PConc,Gd,TA,No,GLQ,1085,Unf,0,390,1475,GasA,Ex,Y,SBrkr,1532,797,0,2329,1,0,2,1,4,1,Gd,10,Typ,1,Ex,Attchd,1990,Unf,2,514,TA,TA,Y,192,121,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,262280 +1045,20,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,PosN,Norm,1Fam,1Story,8,5,1981,1981,Hip,WdShngl,BrkFace,BrkFace,None,0,Gd,TA,PConc,Gd,TA,No,ALQ,1104,Unf,0,1420,2524,GasA,TA,Y,SBrkr,2524,0,0,2524,1,0,2,1,4,1,Gd,9,Typ,1,Gd,Attchd,1981,Fin,2,542,TA,TA,Y,474,120,0,0,0,0,NA,MnPrv,NA,0,7,2009,WD,Normal,278000 +1046,20,RL,NA,13680,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Edwards,Norm,Norm,1Fam,1Story,3,5,1955,1955,Hip,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,Ex,Y,FuseA,1733,0,0,1733,0,0,2,0,4,1,TA,8,Min2,1,Gd,Attchd,1955,Unf,2,452,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,139600 +1047,60,RL,85,16056,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,1Fam,2Story,9,5,2005,2006,Hip,CompShg,CemntBd,CmentBd,Stone,208,Gd,TA,PConc,Ex,TA,Av,GLQ,240,Unf,0,1752,1992,GasA,Ex,Y,SBrkr,1992,876,0,2868,0,0,3,1,4,1,Ex,11,Typ,1,Gd,BuiltIn,2005,Fin,3,716,TA,TA,Y,214,108,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,556581 +1048,20,RL,57,9245,Pave,NA,IR2,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,5,1994,1995,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,686,Unf,0,304,990,GasA,Ex,Y,SBrkr,990,0,0,990,0,1,1,0,3,1,TA,5,Typ,0,NA,Detchd,1996,Unf,2,672,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,2,2008,WD,Normal,145000 +1049,20,RL,100,21750,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,4,1960,2006,Hip,CompShg,HdBoard,HdBoard,BrkFace,75,TA,Fa,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,1771,0,0,1771,0,0,1,0,3,1,TA,9,Min1,1,TA,Attchd,1960,Unf,2,336,TA,TA,Y,0,0,0,0,0,0,NA,GdPrv,NA,0,11,2009,WD,Normal,115000 +1050,20,RL,60,11100,Pave,NA,Reg,Low,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,7,1946,2006,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,NA,NA,NA,NA,0,NA,0,0,0,GasA,Ex,Y,SBrkr,930,0,0,930,0,0,1,0,2,1,Gd,6,Typ,0,NA,Detchd,1946,Unf,1,308,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Abnorml,84900 +1051,20,RL,73,8993,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,1Story,7,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1302,1302,GasA,Ex,Y,SBrkr,1302,0,0,1302,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2007,Fin,2,436,TA,TA,Y,0,22,0,0,0,0,NA,NA,NA,0,8,2007,New,Partial,176485 +1052,20,RL,103,11175,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1316,1316,GasA,Ex,Y,SBrkr,1316,0,0,1316,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,2007,Fin,2,440,TA,TA,Y,0,20,0,0,0,0,NA,NA,NA,0,10,2007,New,Partial,200141 +1053,60,RL,100,9500,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Artery,Norm,1Fam,2Story,6,6,1964,1978,Gable,CompShg,VinylSd,VinylSd,BrkCmn,272,TA,TA,CBlock,TA,TA,No,Rec,442,Unf,0,374,816,GasA,TA,Y,SBrkr,1127,850,0,1977,0,1,1,1,4,1,TA,9,Typ,1,TA,Attchd,1964,RFn,2,540,TA,TA,Y,0,52,0,0,0,0,NA,GdPrv,NA,0,6,2007,WD,Normal,165000 +1054,20,RL,68,8562,Pave,NA,Reg,Lvl,AllPub,Inside,Mod,Edwards,Norm,Norm,1Fam,1Story,5,6,1957,2002,Hip,CompShg,HdBoard,HdBoard,Stone,145,TA,TA,CBlock,TA,TA,Av,Rec,383,Unf,0,833,1216,GasA,Ex,Y,FuseA,1526,0,0,1526,0,0,1,0,4,1,TA,7,Min2,1,Gd,Basment,1957,Unf,1,364,TA,TA,Y,116,78,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,144500 +1055,60,RL,90,11367,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,210,Gd,TA,PConc,Gd,TA,Mn,GLQ,932,Unf,0,133,1065,GasA,Ex,Y,SBrkr,1091,898,0,1989,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2002,Fin,2,586,TA,TA,Y,199,60,0,0,0,0,NA,NA,NA,0,11,2006,WD,Normal,255000 +1056,20,RL,104,11361,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,5,1976,1976,Gable,CompShg,Plywood,Plywood,BrkFace,160,TA,TA,CBlock,Gd,TA,No,ALQ,644,Unf,0,549,1193,GasA,TA,Y,SBrkr,1523,0,0,1523,0,1,2,0,3,1,TA,7,Typ,1,TA,Attchd,1976,Fin,2,478,TA,TA,Y,0,0,0,0,189,0,NA,MnPrv,NA,0,5,2008,COD,Abnorml,180000 +1057,120,RL,43,7052,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,Stone,240,Gd,TA,PConc,Gd,TA,Av,GLQ,659,Unf,0,705,1364,GasA,Ex,Y,SBrkr,1364,0,0,1364,1,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2005,RFn,2,484,TA,TA,Y,192,36,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,185850 +1058,60,RL,NA,29959,Pave,NA,IR2,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,6,1994,1994,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,595,Unf,0,378,973,GasA,Ex,Y,SBrkr,979,871,0,1850,0,0,2,1,3,1,Gd,7,Typ,1,Gd,BuiltIn,1994,Fin,2,467,TA,TA,Y,168,98,0,0,0,0,NA,NA,NA,0,1,2009,WD,Normal,248000 +1059,60,RL,96,11308,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,9,5,2008,2008,Gable,CompShg,VinylSd,VinylSd,Stone,154,Ex,TA,PConc,Ex,TA,Av,GLQ,936,Unf,0,168,1104,GasA,Ex,Y,SBrkr,1130,1054,0,2184,1,0,2,1,3,1,Ex,10,Typ,1,Gd,Attchd,2008,Fin,3,836,TA,TA,Y,0,102,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,335000 +1060,50,RL,NA,11275,Pave,NA,IR1,HLS,AllPub,Corner,Mod,Crawfor,Norm,Norm,1Fam,1.5Fin,6,7,1932,1950,Gable,CompShg,MetalSd,MetalSd,BrkFace,480,TA,TA,CBlock,TA,TA,Mn,Rec,297,LwQ,557,0,854,GasA,TA,Y,SBrkr,1096,895,0,1991,0,0,1,1,3,1,TA,7,Typ,1,Gd,Detchd,1977,Unf,2,432,TA,Fa,Y,0,0,19,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,220000 +1061,120,RL,41,4920,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,2001,2001,Gable,CompShg,CemntBd,CmentBd,None,0,Gd,TA,PConc,Gd,TA,Mn,GLQ,616,Unf,0,722,1338,GasA,Ex,Y,SBrkr,1338,0,0,1338,1,0,2,0,2,1,Gd,6,Typ,0,NA,Attchd,2001,Fin,2,582,TA,TA,Y,0,0,170,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,213500 +1062,30,C (all),120,18000,Grvl,NA,Reg,Low,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,3,4,1935,1950,Gable,CompShg,MetalSd,MetalSd,None,0,Fa,TA,CBlock,TA,TA,No,Unf,0,Unf,0,894,894,GasA,TA,Y,SBrkr,894,0,0,894,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1994,RFn,3,1248,TA,TA,Y,0,20,0,0,0,0,NA,NA,Shed,560,8,2008,ConLD,Normal,81000 +1063,190,RM,85,13600,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,5,5,1900,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,662,662,GasA,TA,N,SBrkr,1422,915,0,2337,0,0,2,0,5,2,TA,10,Min2,0,NA,Detchd,1945,Unf,2,560,TA,TA,Y,0,57,0,0,0,0,NA,NA,NA,0,9,2007,WD,Normal,90000 +1064,30,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1Story,6,6,1925,1980,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,BrkTil,TA,TA,No,BLQ,397,Unf,0,706,1103,GasA,Gd,Y,SBrkr,1103,0,0,1103,0,0,1,0,2,1,Gd,5,Typ,1,Gd,Detchd,1976,Unf,2,440,TA,TA,Y,166,120,0,0,0,0,NA,MnPrv,NA,0,7,2006,WD,Normal,110500 +1065,20,RL,NA,11000,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1966,1966,Gable,CompShg,Plywood,Plywood,BrkFace,200,TA,TA,CBlock,TA,TA,Mn,BLQ,740,Rec,230,184,1154,GasA,Ex,Y,SBrkr,1154,0,0,1154,0,0,1,1,3,1,TA,6,Typ,1,Po,Attchd,1966,RFn,2,480,TA,TA,Y,0,58,0,0,0,0,NA,MnPrv,NA,0,11,2009,WD,Normal,154000 +1066,60,RL,80,14000,Pave,NA,Reg,Lvl,AllPub,Inside,Mod,ClearCr,Norm,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,PConc,Ex,TA,Gd,GLQ,1201,Unf,0,105,1306,GasA,Ex,Y,SBrkr,1306,954,0,2260,1,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1996,RFn,2,533,TA,TA,Y,296,44,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,328000 +1067,60,RL,59,7837,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,7,1993,1994,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,799,799,GasA,Gd,Y,SBrkr,799,772,0,1571,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1993,RFn,2,380,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,178000 +1068,60,RL,80,9760,Pave,NA,Reg,Lvl,AllPub,Inside,Mod,NAmes,Norm,Norm,1Fam,2Story,6,6,1964,1964,Gable,CompShg,HdBoard,HdBoard,BrkFace,360,TA,TA,CBlock,TA,TA,Gd,GLQ,674,LwQ,106,0,780,GasA,TA,Y,SBrkr,798,813,0,1611,1,0,1,1,4,1,TA,7,Typ,0,NA,Attchd,1964,RFn,2,442,TA,TA,Y,328,128,0,0,189,0,NA,NA,NA,0,6,2008,WD,Normal,167900 +1069,160,RM,42,3964,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,6,4,1973,1973,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,837,Unf,0,105,942,GasA,Gd,Y,SBrkr,1291,1230,0,2521,1,0,2,1,5,1,TA,10,Maj1,1,Gd,Attchd,1973,Fin,2,576,TA,TA,Y,728,20,0,0,0,0,NA,GdPrv,NA,0,6,2006,WD,Normal,151400 +1070,45,RL,60,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1.5Unf,5,7,1949,2003,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,220,Unf,0,625,845,GasA,TA,Y,SBrkr,893,0,0,893,0,1,1,0,2,1,Gd,4,Typ,0,NA,Detchd,1985,Unf,2,576,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,135000 +1071,20,RL,72,10152,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1956,1956,Hip,CompShg,MetalSd,MetalSd,BrkFace,120,TA,TA,CBlock,TA,TA,No,BLQ,586,Unf,0,462,1048,GasA,TA,Y,SBrkr,1048,0,0,1048,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1956,Unf,1,286,TA,TA,Y,0,20,0,0,192,0,NA,NA,NA,0,6,2007,WD,Normal,135000 +1072,60,RL,78,11700,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,RRAn,Norm,1Fam,2Story,6,6,1968,1968,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,298,Unf,0,429,727,GasA,Ex,Y,SBrkr,829,727,0,1556,0,0,1,1,4,1,TA,8,Typ,0,NA,Attchd,1968,Unf,2,441,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,154000 +1073,50,RL,50,7585,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Artery,Norm,1Fam,1.5Fin,5,3,1948,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,Fa,Fa,Mn,Unf,0,Unf,0,810,810,GasA,Fa,Y,FuseA,1002,454,0,1456,1,1,1,0,4,1,TA,7,Typ,1,TA,Detchd,1954,Unf,1,280,TA,TA,P,0,0,0,0,0,0,NA,NA,NA,0,8,2006,WD,Normal,91500 +1074,60,RL,75,7950,Pave,NA,IR1,Bnk,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,2Story,6,6,1977,1977,Hip,CompShg,HdBoard,Plywood,BrkFace,140,TA,TA,CBlock,TA,TA,No,BLQ,535,Unf,0,155,690,GasA,TA,Y,SBrkr,698,728,0,1426,0,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1977,Fin,2,440,TA,TA,Y,252,0,0,0,0,0,NA,MnPrv,NA,0,7,2009,WD,Normal,159500 +1075,20,RL,74,8556,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1240,1240,GasA,Ex,Y,SBrkr,1240,0,0,1240,0,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,2006,RFn,3,826,TA,TA,Y,140,93,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,194000 +1076,70,RL,75,13125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,6,1940,1984,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,410,Unf,0,390,800,GasA,TA,Y,SBrkr,960,780,0,1740,0,0,1,1,3,1,TA,6,Typ,2,Gd,Attchd,1940,Unf,1,240,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2007,CWD,Normal,219500 +1077,50,RL,60,10800,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,8,1936,1989,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,Fa,TA,No,ALQ,626,Unf,0,170,796,GasA,Gd,Y,SBrkr,1096,370,0,1466,0,1,2,0,3,1,Gd,7,Min1,1,TA,Attchd,1950,Unf,2,566,TA,TA,Y,436,21,0,0,0,0,NA,NA,Shed,500,4,2006,WD,Normal,170000 +1078,20,RL,NA,15870,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1969,1969,Gable,CompShg,VinylSd,Plywood,None,0,TA,TA,CBlock,TA,TA,Mn,BLQ,75,Rec,791,230,1096,GasA,Ex,Y,SBrkr,1096,0,0,1096,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1969,Fin,1,299,TA,TA,Y,240,32,0,0,0,0,NA,NA,NA,0,3,2006,WD,Abnorml,138800 +1079,120,RM,37,4435,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,169,Gd,TA,PConc,Gd,TA,Mn,GLQ,662,Unf,0,186,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,3,Typ,1,Gd,Attchd,2004,RFn,2,420,TA,TA,Y,140,0,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,155900 +1080,20,RL,65,8775,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,5,1994,1994,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,495,Unf,0,495,990,GasA,Gd,Y,SBrkr,990,0,0,990,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1996,Unf,1,299,TA,TA,Y,0,64,0,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,126000 +1081,20,RL,80,11040,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,7,1971,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,144,Gd,Gd,CBlock,TA,TA,No,ALQ,656,Unf,0,602,1258,GasA,Ex,Y,SBrkr,1258,0,0,1258,0,1,2,0,3,1,Gd,5,Typ,0,NA,Attchd,1971,RFn,2,528,TA,TA,Y,55,0,0,216,0,0,NA,NA,NA,0,10,2008,COD,Abnorml,145000 +1082,20,RL,75,7500,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,5,1963,1963,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,ALQ,824,Unf,0,216,1040,GasA,Fa,Y,SBrkr,1040,0,0,1040,1,0,1,1,3,1,TA,5,Typ,0,NA,Attchd,1963,Fin,1,308,TA,TA,Y,0,0,220,0,0,0,NA,MnPrv,NA,0,6,2010,WD,Normal,133000 +1083,20,RL,70,8749,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,100,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1459,1459,GasA,Ex,Y,SBrkr,1459,0,0,1459,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,2002,RFn,2,527,TA,TA,Y,192,39,0,0,0,0,NA,NA,NA,0,9,2007,WD,Normal,192000 +1084,20,RL,80,8800,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1964,1964,Hip,CompShg,HdBoard,HdBoard,BrkFace,425,TA,TA,CBlock,TA,TA,No,BLQ,553,Unf,0,698,1251,GasA,TA,Y,SBrkr,1251,0,0,1251,1,0,1,0,3,1,TA,6,Typ,2,Gd,Attchd,1964,RFn,1,461,TA,TA,Y,0,116,0,0,0,0,NA,MnPrv,Shed,700,3,2006,WD,Normal,160000 +1085,60,RL,NA,13031,Pave,NA,IR2,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1995,1996,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,No,ALQ,592,Unf,0,99,691,GasA,Gd,Y,SBrkr,691,807,0,1498,0,0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1995,Fin,2,409,TA,TA,Y,315,44,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,187500 +1086,85,RL,73,9069,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,SFoyer,6,6,1992,1992,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,Av,GLQ,747,Unf,0,189,936,GasA,Ex,Y,SBrkr,996,0,0,996,1,0,1,0,2,1,Gd,5,Typ,0,NA,Attchd,1992,Unf,2,564,TA,TA,Y,120,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,147000 +1087,160,RM,NA,1974,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,4,5,1973,1973,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,334,Unf,0,212,546,GasA,TA,Y,SBrkr,546,546,0,1092,0,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1973,RFn,1,286,TA,TA,Y,120,96,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,83500 +1088,60,FV,85,10574,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,8,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Mn,Unf,0,Unf,0,1082,1082,GasA,Ex,Y,SBrkr,1082,871,0,1953,0,0,2,1,3,1,Gd,9,Typ,1,Gd,Attchd,2005,RFn,3,1043,TA,TA,Y,160,50,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,252000 +1089,160,RM,24,2522,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,Twnhs,2Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,Stone,50,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,970,970,GasA,Ex,Y,SBrkr,970,739,0,1709,0,0,2,0,3,1,Gd,7,Maj1,0,NA,Detchd,2004,Unf,2,380,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,137500 +1090,120,FV,37,3316,Pave,Pave,IR1,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,1Story,8,5,2005,2005,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,1039,Unf,0,208,1247,GasA,Ex,Y,SBrkr,1247,0,0,1247,1,0,1,1,1,1,Gd,4,Typ,1,Gd,Attchd,2005,Fin,2,550,TA,TA,Y,0,84,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,197000 +1091,90,RL,60,8544,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,Duplex,1Story,3,4,1950,1950,Gable,CompShg,BrkFace,BrkFace,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,Wall,Fa,N,FuseA,1040,0,0,1040,0,0,2,0,2,2,TA,6,Typ,0,NA,Detchd,1987,Unf,2,400,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,92900 +1092,160,FV,24,2160,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,Twnhs,2Story,7,5,1999,2000,Gable,CompShg,MetalSd,MetalSd,BrkFace,212,Gd,TA,PConc,Gd,TA,No,BLQ,510,Unf,0,90,600,GasA,Ex,Y,SBrkr,624,628,0,1252,1,0,2,1,2,1,Gd,4,Typ,0,NA,Detchd,1999,Unf,2,462,TA,TA,Y,0,48,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,160000 +1093,50,RL,60,8400,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,6,5,1925,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,TA,TA,No,Rec,423,Unf,0,758,1181,GasA,Fa,Y,SBrkr,1390,304,0,1694,0,0,2,0,4,1,TA,7,Typ,1,Gd,Detchd,1925,Unf,2,576,TA,TA,Y,342,0,128,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,136500 +1094,20,RL,71,9230,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,8,1965,1998,Hip,CompShg,MetalSd,MetalSd,BrkFace,166,TA,TA,CBlock,TA,TA,Mn,GLQ,661,Unf,0,203,864,GasA,Gd,Y,SBrkr,1200,0,0,1200,1,0,1,1,1,1,Gd,6,Typ,0,NA,Detchd,1977,Unf,2,884,TA,TA,Y,0,64,0,0,0,0,NA,MnPrv,NA,0,10,2006,WD,Normal,146000 +1095,20,RL,74,5868,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1956,2000,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,248,Rec,240,448,936,GasA,Ex,Y,SBrkr,936,0,0,936,1,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1956,Fin,1,308,TA,TA,Y,0,0,80,0,160,0,NA,NA,NA,0,5,2010,WD,Normal,129000 +1096,20,RL,78,9317,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,6,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,24,Unf,0,1290,1314,GasA,Gd,Y,SBrkr,1314,0,0,1314,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,2006,RFn,2,440,TA,TA,Y,0,22,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,176432 +1097,70,RM,60,6882,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,2Story,6,7,1914,2006,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,684,684,GasA,TA,Y,SBrkr,773,582,0,1355,0,0,1,1,3,1,Gd,7,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,136,0,115,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,127000 +1098,120,RL,NA,3696,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1986,1986,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1074,1074,GasA,Ex,Y,SBrkr,1088,0,0,1088,0,0,1,1,2,1,Gd,5,Typ,0,NA,Attchd,1987,RFn,2,461,TA,TA,Y,0,74,137,0,0,0,NA,NA,NA,0,10,2007,WD,Normal,170000 +1099,50,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,4,6,1936,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,BLQ,672,Unf,0,0,672,GasA,TA,Y,SBrkr,757,567,0,1324,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1936,Unf,1,240,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,128000 +1100,20,RL,82,11880,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,RRAn,Norm,1Fam,1Story,7,5,1978,1978,Gable,CompShg,Plywood,Plywood,BrkFace,206,TA,TA,CBlock,Gd,TA,No,ALQ,704,Unf,0,567,1271,GasA,TA,Y,SBrkr,1601,0,0,1601,0,0,2,0,3,1,TA,7,Typ,1,TA,Attchd,1978,RFn,2,478,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2009,COD,Abnorml,157000 +1101,30,RL,60,8400,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1Story,2,5,1920,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,Fa,No,Rec,290,Unf,0,0,290,GasA,TA,N,FuseF,438,0,0,438,0,0,1,0,1,1,Fa,3,Typ,0,NA,Detchd,1930,Unf,1,246,TA,TA,N,0,0,0,0,0,0,NA,NA,NA,0,1,2009,WD,Normal,60000 +1102,20,RL,61,9758,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1971,1971,Gable,CompShg,HdBoard,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,412,LwQ,287,251,950,GasA,TA,Y,SBrkr,950,0,0,950,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1981,Unf,1,280,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,119500 +1103,20,RL,70,7000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1960,2002,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,45,TA,TA,CBlock,TA,TA,No,Rec,588,Unf,0,422,1010,GasA,Ex,Y,SBrkr,1134,0,0,1134,0,0,1,0,2,1,TA,6,Typ,0,NA,Attchd,1960,RFn,1,254,TA,TA,Y,0,16,0,0,0,0,NA,MnWw,NA,0,4,2007,WD,Family,135000 +1104,20,RL,79,8910,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1959,1959,Hip,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,Mn,ALQ,655,Unf,0,0,655,GasA,Ex,Y,SBrkr,1194,0,0,1194,0,1,1,0,3,1,TA,6,Typ,1,Fa,BuiltIn,1954,Fin,2,539,TA,TA,Y,0,0,192,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,159500 +1105,160,RM,24,2016,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,TwnhsE,2Story,5,5,1970,1970,Gable,CompShg,HdBoard,HdBoard,BrkFace,304,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,630,630,GasA,TA,Y,SBrkr,630,672,0,1302,0,0,2,1,3,1,TA,6,Typ,0,NA,Detchd,1970,Unf,2,440,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,106000 +1106,60,RL,98,12256,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1994,1995,Gable,CompShg,HdBoard,HdBoard,BrkFace,362,Gd,TA,PConc,Ex,TA,Av,GLQ,1032,Unf,0,431,1463,GasA,Ex,Y,SBrkr,1500,1122,0,2622,1,0,2,1,3,1,Gd,9,Typ,2,TA,Attchd,1994,RFn,2,712,TA,TA,Y,186,32,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,325000 +1107,20,RL,114,10357,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,SawyerW,Feedr,Norm,1Fam,1Story,7,5,1990,1991,Hip,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,Mn,GLQ,738,Unf,0,172,910,GasA,Gd,Y,SBrkr,1442,0,0,1442,1,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,1990,Fin,2,719,TA,TA,Y,0,244,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,179900 +1108,60,RL,168,23257,Pave,NA,IR3,HLS,AllPub,CulDSac,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,Gd,No,Unf,0,Unf,0,868,868,GasA,Ex,Y,SBrkr,887,1134,0,2021,0,0,2,1,3,1,Gd,9,Typ,1,Gd,BuiltIn,2006,RFn,2,422,TA,TA,Y,0,100,0,0,0,0,NA,NA,NA,0,9,2006,New,Partial,274725 +1109,60,RL,NA,8063,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,924,924,GasA,Ex,Y,SBrkr,948,742,0,1690,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,2000,RFn,2,463,TA,TA,Y,100,48,0,0,0,0,NA,NA,NA,0,11,2007,WD,Abnorml,181000 +1110,20,RL,107,11362,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2004,2005,Gable,CompShg,MetalSd,MetalSd,Stone,42,Gd,TA,PConc,Ex,TA,Mn,GLQ,1039,Unf,0,797,1836,GasA,Ex,Y,SBrkr,1836,0,0,1836,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2004,Fin,3,862,TA,TA,Y,125,185,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,280000 +1111,60,RL,NA,8000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1995,1996,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,No,GLQ,219,Unf,0,554,773,GasA,Gd,Y,SBrkr,773,885,0,1658,1,0,2,1,3,1,TA,8,Typ,1,TA,Attchd,1995,Fin,2,431,TA,TA,Y,224,84,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,188000 +1112,60,RL,80,10480,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,7,6,1976,1976,Hip,CompShg,Plywood,Plywood,BrkFace,660,TA,TA,CBlock,TA,TA,No,ALQ,403,Unf,0,400,803,GasA,TA,Y,SBrkr,1098,866,0,1964,0,0,2,1,4,1,TA,8,Typ,1,Gd,Attchd,1976,RFn,2,483,TA,TA,Y,0,69,0,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,205000 +1113,20,RL,73,7100,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1957,1957,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,GLQ,708,Unf,0,108,816,GasA,TA,Y,FuseA,816,0,0,816,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1957,Unf,1,308,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,129900 +1114,20,RL,66,8923,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1953,2006,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,643,Unf,0,365,1008,GasA,Gd,Y,SBrkr,1008,0,0,1008,1,0,1,0,2,1,Gd,6,Typ,0,NA,Attchd,1953,Unf,1,240,TA,TA,Y,0,18,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,134500 +1115,20,RL,90,5400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,7,1954,2000,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,Rec,415,Unf,0,418,833,GasA,Ex,Y,SBrkr,833,0,0,833,0,0,1,0,2,1,Gd,4,Typ,0,NA,Detchd,1955,Unf,1,326,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,8,2006,WD,Normal,117000 +1116,20,RL,93,12085,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2007,2007,Hip,CompShg,VinylSd,VinylSd,Stone,328,Gd,TA,PConc,Ex,TA,No,GLQ,1004,Unf,0,730,1734,GasA,Ex,Y,SBrkr,1734,0,0,1734,1,0,2,0,3,1,Ex,7,Typ,1,Gd,Attchd,2007,RFn,3,928,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2007,New,Partial,318000 +1117,80,RL,NA,7750,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,SLvl,8,5,2002,2002,Hip,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,353,Unf,0,55,408,GasA,Ex,Y,SBrkr,779,640,0,1419,1,0,2,1,3,1,Gd,7,Typ,1,TA,BuiltIn,2002,Fin,2,527,TA,TA,Y,120,0,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,184100 +1118,20,RL,57,9764,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,7,1967,2003,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,702,Unf,0,192,894,GasA,Ex,Y,SBrkr,894,0,0,894,1,0,1,0,3,1,Gd,5,Typ,0,NA,Attchd,1967,RFn,2,450,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,130000 +1119,80,RL,85,13825,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,SLvl,5,6,1958,1987,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,533,533,GasA,TA,Y,SBrkr,1021,580,0,1601,0,1,1,0,3,1,TA,6,Min2,0,NA,BuiltIn,1958,RFn,1,300,TA,TA,Y,280,34,0,0,0,0,NA,NA,NA,0,12,2008,WD,Normal,140000 +1120,20,RL,70,7560,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1959,1959,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,LwQ,369,Unf,0,671,1040,GasA,TA,Y,FuseA,1040,0,0,1040,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1959,RFn,1,286,TA,TA,Y,140,0,252,0,0,0,NA,GdWo,NA,0,7,2006,WD,Normal,133700 +1121,30,RM,59,8263,Pave,NA,Reg,Bnk,AllPub,Inside,Mod,IDOTRR,Norm,Norm,1Fam,1Story,6,5,1920,1950,Gable,CompShg,BrkFace,BrkFace,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,1012,1012,GasA,TA,Y,FuseA,1012,0,0,1012,0,0,1,0,2,1,TA,6,Typ,1,Gd,Detchd,1920,Unf,1,308,TA,TA,Y,0,22,112,0,0,0,NA,MnPrv,NA,0,5,2007,WD,Normal,118400 +1122,20,RL,84,10084,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,196,Gd,TA,PConc,Gd,TA,Av,GLQ,24,Unf,0,1528,1552,GasA,Ex,Y,SBrkr,1552,0,0,1552,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2005,RFn,3,782,TA,TA,Y,144,20,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,212900 +1123,20,RL,NA,8926,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,3,1956,1956,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,672,672,GasA,Ex,Y,FuseA,960,0,0,960,0,0,1,0,3,1,TA,5,Typ,0,NA,Basment,1956,Unf,1,288,TA,TA,Y,64,0,0,0,160,0,NA,MnPrv,NA,0,10,2009,COD,Abnorml,112000 +1124,20,RL,50,9405,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,9,1947,2008,Hip,CompShg,VinylSd,VinylSd,None,0,TA,Ex,CBlock,TA,TA,No,Unf,0,Unf,0,698,698,GasA,Ex,Y,SBrkr,698,0,0,698,0,1,1,0,2,1,TA,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,200,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,118000 +1125,80,RL,NA,9125,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,SLvl,7,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,BrkFace,170,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,384,384,GasA,Gd,Y,SBrkr,812,670,0,1482,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1992,Fin,2,392,TA,TA,Y,100,25,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,163900 +1126,20,RL,60,10434,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1955,1955,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1005,1005,GasA,TA,Y,SBrkr,1005,0,0,1005,0,0,1,0,2,1,Fa,5,Typ,1,TA,Detchd,1977,Unf,2,672,Fa,Fa,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2009,WD,Normal,115000 +1127,120,RL,53,3684,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2007,2007,Hip,CompShg,VinylSd,VinylSd,BrkFace,130,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1373,1373,GasA,Ex,Y,SBrkr,1555,0,0,1555,0,0,2,0,2,1,Gd,7,Typ,1,TA,Attchd,2007,Fin,3,660,TA,TA,Y,143,20,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,174000 +1128,20,RL,182,14572,Pave,NA,IR3,Lvl,AllPub,Corner,Gtl,Gilbert,Norm,Norm,1Fam,1Story,7,5,2004,2004,Hip,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,1300,Unf,0,230,1530,GasA,Ex,Y,SBrkr,1530,0,0,1530,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2004,Fin,3,630,TA,TA,Y,144,36,0,0,0,0,NA,NA,NA,0,11,2007,WD,Family,259000 +1129,60,RL,59,11796,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,847,847,GasA,Ex,Y,SBrkr,847,1112,0,1959,0,0,2,1,4,1,Gd,8,Typ,1,Gd,BuiltIn,2004,Fin,2,434,TA,TA,Y,100,48,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,215000 +1130,90,RM,60,7200,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,Duplex,SFoyer,5,5,1980,1980,Gable,CompShg,MetalSd,MetalSd,BrkFace,180,TA,TA,CBlock,Gd,TA,Gd,GLQ,936,Unf,0,0,936,GasA,TA,Y,SBrkr,936,0,0,936,1,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1980,Unf,2,672,TA,TA,Y,49,0,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,140000 +1131,50,RL,65,7804,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,4,3,1928,1950,Gable,CompShg,WdShing,Plywood,None,0,TA,TA,BrkTil,TA,TA,No,BLQ,622,Unf,0,500,1122,GasA,TA,Y,SBrkr,1328,653,0,1981,1,0,2,0,4,1,Gd,7,Min2,2,TA,Detchd,1981,Unf,2,576,TA,TA,Y,431,44,0,0,0,0,NA,MnPrv,NA,0,12,2009,WD,Normal,135000 +1132,20,RL,63,10712,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,5,1991,1992,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,Mn,BLQ,212,Unf,0,762,974,GasA,TA,Y,SBrkr,974,0,0,974,0,0,1,0,3,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,28,0,0,0,0,NA,MnPrv,NA,0,9,2007,Oth,Abnorml,93500 +1133,70,RM,90,9900,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,6,4,1880,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,Mn,Unf,0,Unf,0,1008,1008,GasW,TA,Y,SBrkr,1178,1032,0,2210,0,0,2,0,5,1,Fa,8,Typ,0,NA,Detchd,1930,Unf,1,205,Fa,TA,N,0,48,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,117500 +1134,60,RL,80,9828,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,8,5,1995,1995,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,584,Unf,0,544,1128,GasA,Ex,Y,SBrkr,1142,878,0,2020,0,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1995,RFn,2,466,TA,TA,Y,0,155,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,239500 +1135,60,RL,57,8773,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1997,1997,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,916,916,GasA,Gd,Y,SBrkr,916,684,0,1600,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1997,Fin,2,460,TA,TA,Y,100,38,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,169000 +1136,30,RM,60,6180,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,BrkSide,Norm,Norm,1Fam,1Story,6,5,1926,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,960,960,GasA,TA,N,SBrkr,986,0,0,986,0,0,1,0,2,1,TA,5,Typ,1,Gd,Detchd,1926,Unf,1,180,TA,TA,Y,0,128,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,102000 +1137,50,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1.5Fin,6,5,1950,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,280,Unf,0,752,1032,GasA,TA,Y,FuseA,1032,220,0,1252,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1950,Unf,1,288,TA,TA,Y,0,0,96,0,0,0,NA,NA,NA,0,4,2008,WD,Abnorml,119000 +1138,50,RL,54,6342,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1.5Fin,5,8,1875,1996,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,780,780,GasA,Gd,N,SBrkr,780,240,0,1020,0,0,1,0,2,1,TA,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,176,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,94000 +1139,20,RL,NA,9819,Pave,NA,IR1,Lvl,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,1Story,6,5,1977,1977,Gable,CompShg,Plywood,ImStucc,None,0,TA,TA,PConc,TA,TA,Gd,ALQ,1567,Unf,0,0,1567,GasA,TA,Y,SBrkr,1567,0,0,1567,1,0,2,0,2,1,Gd,5,Typ,2,TA,Attchd,1977,RFn,2,714,TA,TA,Y,264,32,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,196000 +1140,30,RL,98,8731,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,5,5,1920,1950,Gable,CompShg,Stucco,Stucco,None,0,TA,Fa,BrkTil,TA,TA,No,BLQ,645,Unf,0,270,915,GasA,TA,Y,SBrkr,1167,0,0,1167,0,0,1,0,3,1,TA,6,Maj1,1,Gd,Detchd,1972,Unf,2,495,TA,TA,Y,0,0,216,0,126,0,NA,NA,NA,0,5,2007,WD,Normal,144000 +1141,20,RL,60,7350,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1951,1951,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,Mn,ALQ,852,Unf,0,100,952,GasA,TA,Y,SBrkr,952,0,0,952,1,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1988,Unf,2,840,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2008,COD,Abnorml,139000 +1142,60,RL,NA,10304,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NWAmes,PosN,Norm,1Fam,2Story,5,7,1976,1976,Gable,CompShg,Plywood,Plywood,BrkFace,44,TA,Gd,CBlock,TA,TA,No,ALQ,381,Unf,0,399,780,GasA,Ex,Y,SBrkr,1088,780,0,1868,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,1976,Unf,2,484,TA,TA,Y,448,96,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,197500 +1143,60,RL,77,9965,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2006,2007,Hip,CompShg,VinylSd,VinylSd,Stone,340,Gd,TA,PConc,Ex,TA,Gd,GLQ,1150,Unf,0,316,1466,GasA,Ex,Y,SBrkr,1466,1362,0,2828,1,0,3,0,4,1,Gd,11,Typ,1,TA,BuiltIn,2006,RFn,3,1052,TA,TA,Y,125,144,0,0,0,0,NA,NA,NA,0,4,2007,New,Partial,424870 +1144,20,RL,NA,9000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,3,1959,1959,Gable,CompShg,Wd Sdng,Plywood,None,0,TA,TA,CBlock,TA,TA,No,GLQ,288,Unf,0,718,1006,GasA,TA,Y,SBrkr,1006,0,0,1006,0,0,1,0,3,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,24,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,80000 +1145,190,RL,60,12180,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,2fmCon,1.5Fin,4,4,1941,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Fa,BrkTil,Gd,TA,No,BLQ,348,Unf,0,324,672,Grav,Fa,N,FuseA,672,252,0,924,1,0,1,0,2,1,Fa,5,Typ,0,NA,Detchd,1941,Unf,1,280,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,7,2010,WD,Normal,80000 +1146,50,RM,52,6240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,5,6,1928,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,1042,1042,GasA,Ex,Y,SBrkr,1042,534,0,1576,0,0,1,0,3,1,TA,8,Typ,1,Gd,Detchd,1928,Unf,1,225,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2006,WD,Family,149000 +1147,20,RL,NA,11200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,6,5,1985,1985,Gable,CompShg,Wd Sdng,Wd Shng,BrkFace,85,Gd,TA,CBlock,Gd,TA,No,GLQ,1258,Unf,0,40,1298,GasA,TA,Y,SBrkr,1298,0,0,1298,1,0,2,0,3,1,Gd,5,Typ,1,TA,Attchd,1985,Unf,2,403,TA,TA,Y,165,26,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,180000 +1148,70,RL,75,12000,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,7,1941,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,275,Unf,0,429,704,GasA,Ex,Y,SBrkr,860,704,0,1564,0,0,1,1,3,1,Fa,7,Typ,1,Gd,Attchd,1941,Unf,1,234,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,174500 +1149,50,RM,NA,5700,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,7,7,1926,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,572,572,GasA,TA,Y,SBrkr,572,539,0,1111,0,0,1,0,2,1,TA,5,Typ,1,Gd,Detchd,1982,Unf,1,288,TA,TA,Y,0,0,176,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,116900 +1150,70,RM,50,9000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,2Story,7,9,1920,1988,Hip,CompShg,VinylSd,VinylSd,None,0,TA,Gd,PConc,TA,TA,No,ALQ,624,Unf,0,26,650,GasA,Ex,Y,SBrkr,832,650,0,1482,0,1,1,0,3,1,TA,7,Typ,0,NA,Detchd,1930,Unf,2,324,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,143000 +1151,20,RL,57,8280,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1950,1950,Gable,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,932,932,GasA,Ex,Y,FuseA,932,0,0,932,0,0,1,0,2,1,Gd,4,Typ,1,Gd,Attchd,1950,Unf,1,306,TA,TA,Y,0,0,214,0,0,0,NA,GdPrv,NA,0,11,2007,WD,Normal,124000 +1152,20,RL,134,17755,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,4,1959,1959,Gable,CompShg,HdBoard,Plywood,BrkFace,132,TA,TA,CBlock,TA,TA,No,BLQ,176,Unf,0,1290,1466,GasA,TA,Y,SBrkr,1466,0,0,1466,0,0,1,1,3,1,Fa,6,Typ,2,Gd,Attchd,1959,Fin,2,528,TA,TA,Y,0,140,0,0,100,0,NA,NA,NA,0,11,2006,WD,Normal,149900 +1153,20,RL,90,14115,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,6,7,1956,2004,Gable,CompShg,Stone,Stone,None,0,TA,TA,PConc,TA,TA,No,ALQ,296,GLQ,547,230,1073,GasA,Ex,Y,SBrkr,1811,0,0,1811,0,0,1,0,2,1,Ex,6,Typ,1,Gd,Attchd,1956,Fin,2,470,TA,TA,Y,0,0,280,0,0,0,NA,NA,NA,0,7,2006,WD,Abnorml,230000 +1154,30,RM,NA,5890,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,6,8,1930,2007,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,Gd,BrkTil,TA,TA,Av,ALQ,538,Unf,0,278,816,GasA,Ex,Y,SBrkr,816,0,0,816,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,2002,Unf,1,432,TA,TA,Y,0,0,96,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,120500 +1155,60,RL,NA,13700,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,2Story,7,6,1965,1988,Gable,CompShg,VinylSd,VinylSd,Stone,288,TA,TA,CBlock,TA,TA,Gd,ALQ,454,Unf,0,410,864,GasA,TA,Y,SBrkr,902,918,0,1820,0,0,1,2,4,1,Gd,8,Typ,2,Gd,Attchd,1965,Unf,2,492,TA,TA,Y,60,84,0,0,273,0,NA,GdPrv,NA,0,5,2008,WD,Normal,201800 +1156,20,RL,90,10768,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Veenker,Norm,Norm,1Fam,1Story,5,8,1976,2004,Gable,CompShg,Plywood,Plywood,None,0,Gd,Gd,CBlock,Gd,TA,Gd,ALQ,1157,Unf,0,280,1437,GasA,TA,Y,SBrkr,1437,0,0,1437,1,0,2,0,3,1,Gd,6,Typ,1,Fa,Attchd,1976,RFn,2,528,TA,TA,Y,0,21,0,0,180,0,NA,NA,NA,0,7,2007,WD,Normal,218000 +1157,80,RL,85,9350,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,5,8,1965,1999,Gable,CompShg,BrkFace,BrkFace,None,0,TA,Gd,PConc,TA,TA,Gd,ALQ,633,Unf,0,586,1219,GasA,Gd,Y,SBrkr,1265,0,0,1265,0,1,2,0,3,1,Gd,6,Typ,1,Gd,Attchd,1965,RFn,2,502,TA,TA,Y,0,92,0,96,0,0,NA,MnPrv,NA,0,10,2008,WD,Normal,179900 +1158,120,RL,34,5001,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,Twnhs,1Story,7,5,2007,2008,Gable,CompShg,VinylSd,VinylSd,Stone,166,Gd,TA,PConc,Gd,TA,No,GLQ,904,Unf,0,410,1314,GasA,Ex,Y,SBrkr,1314,0,0,1314,1,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2008,RFn,2,626,TA,TA,Y,172,62,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,230000 +1159,20,RL,92,11932,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Somerst,Feedr,Norm,1Fam,1Story,8,5,2007,2008,Gable,CompShg,VinylSd,VinylSd,Stone,186,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1580,1580,GasA,Ex,Y,SBrkr,1580,0,0,1580,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2008,RFn,3,830,TA,TA,Y,0,24,0,0,0,0,NA,NA,NA,0,6,2008,ConLD,Partial,235128 +1160,60,RL,76,9120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,6,6,1974,1974,Hip,CompShg,HdBoard,HdBoard,BrkFace,270,Gd,TA,CBlock,TA,TA,No,ALQ,442,Unf,0,459,901,GasA,TA,Y,SBrkr,943,933,0,1876,0,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1974,RFn,2,540,Gd,TA,Y,0,69,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,185000 +1161,160,RL,24,2280,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NPkVill,Norm,Norm,Twnhs,2Story,6,5,1978,1978,Gable,CompShg,Plywood,Brk Cmn,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,311,Unf,0,544,855,GasA,Fa,Y,SBrkr,855,601,0,1456,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1978,Unf,2,440,TA,TA,Y,26,0,0,0,0,0,NA,NA,NA,0,7,2010,WD,Normal,146000 +1162,20,RL,NA,14778,Pave,NA,IR1,Low,AllPub,CulDSac,Gtl,Crawfor,PosN,Norm,1Fam,1Story,6,7,1954,2006,Hip,CompShg,HdBoard,HdBoard,BrkFace,72,Gd,TA,CBlock,TA,TA,No,BLQ,728,Unf,0,568,1296,GasA,Ex,Y,SBrkr,1640,0,0,1640,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1993,Unf,2,924,TA,TA,Y,108,0,0,216,0,0,NA,NA,NA,0,11,2008,WD,Normal,224000 +1163,20,RL,109,8724,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1968,1968,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,Gd,TA,No,BLQ,492,Unf,0,402,894,GasA,Gd,Y,SBrkr,894,0,0,894,0,0,1,0,3,1,TA,5,Typ,1,Po,Attchd,1968,Fin,2,450,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,129000 +1164,90,RL,60,12900,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,Duplex,SFoyer,4,4,1969,1969,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,Av,GLQ,1198,Unf,0,0,1198,GasA,TA,Y,SBrkr,1258,0,0,1258,2,0,0,2,0,2,TA,6,Typ,0,NA,CarPort,1969,Unf,2,400,Fa,TA,Y,120,0,0,0,0,0,NA,NA,NA,0,1,2008,WD,Alloca,108959 +1165,80,RL,NA,16157,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,SLvl,5,7,1978,1978,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,PConc,Gd,TA,Gd,ALQ,680,Rec,391,289,1360,GasA,Ex,Y,SBrkr,1432,0,0,1432,1,0,1,1,2,1,Gd,5,Typ,1,TA,Attchd,1978,Unf,2,588,TA,TA,Y,168,180,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,194000 +1166,20,RL,79,9541,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,7,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,Stone,268,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1502,1502,GasA,Ex,Y,SBrkr,1502,0,0,1502,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2009,RFn,2,644,TA,TA,Y,0,114,0,0,0,0,NA,NA,NA,0,9,2009,New,Partial,233170 +1167,20,RL,64,10475,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2008,2008,Gable,CompShg,VinylSd,VinylSd,Stone,72,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1694,1694,GasA,Ex,Y,SBrkr,1694,0,0,1694,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2008,RFn,3,776,TA,TA,Y,160,33,0,0,0,0,NA,NA,NA,0,2,2010,WD,Normal,245350 +1168,60,RL,58,10852,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,RRAn,Norm,1Fam,2Story,6,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,786,Unf,0,173,959,GasA,Ex,Y,SBrkr,959,712,0,1671,1,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,2000,Fin,2,472,TA,TA,Y,0,38,0,0,0,0,NA,NA,NA,0,2,2006,WD,Normal,173000 +1169,70,RL,120,13728,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,2Story,6,7,1935,1986,Hip,CompShg,Stucco,Stucco,None,0,TA,TA,CBlock,TA,TA,No,Rec,626,Unf,0,501,1127,GasA,Ex,Y,SBrkr,1236,872,0,2108,0,0,2,0,4,1,Gd,7,Typ,2,TA,Basment,1935,Unf,2,540,TA,TA,Y,0,0,0,0,90,0,NA,NA,NA,0,7,2008,WD,Normal,235000 +1170,60,RL,118,35760,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NoRidge,Norm,Norm,1Fam,2Story,10,5,1995,1996,Hip,CompShg,HdBoard,HdBoard,BrkFace,1378,Gd,Gd,PConc,Ex,TA,Gd,GLQ,1387,Unf,0,543,1930,GasA,Ex,Y,SBrkr,1831,1796,0,3627,1,0,3,1,4,1,Gd,10,Typ,1,TA,Attchd,1995,Fin,3,807,TA,TA,Y,361,76,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,625000 +1171,80,RL,76,9880,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SLvl,6,6,1977,1977,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,Av,ALQ,522,Unf,0,574,1096,GasA,TA,Y,SBrkr,1118,0,0,1118,1,0,1,0,3,1,TA,6,Typ,1,Po,Attchd,1977,Fin,1,358,TA,TA,Y,203,0,0,0,0,576,Gd,GdPrv,NA,0,7,2008,WD,Normal,171000 +1172,20,RL,76,9120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,ALQ,662,Unf,0,599,1261,GasA,Ex,Y,SBrkr,1261,0,0,1261,1,0,1,0,3,1,TA,6,Typ,1,TA,Attchd,1958,RFn,2,433,TA,TA,Y,0,0,0,0,288,0,NA,NA,Shed,1400,11,2008,WD,Normal,163000 +1173,160,FV,35,4017,Pave,Pave,IR1,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,7,5,2006,2007,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,625,625,GasA,Ex,Y,SBrkr,625,625,0,1250,0,0,2,1,2,1,Gd,5,Typ,0,NA,Detchd,2006,Fin,2,625,TA,TA,Y,0,54,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,171900 +1174,50,RL,138,18030,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,1.5Fin,5,6,1946,1994,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,152,BLQ,469,977,1598,GasA,TA,Y,SBrkr,1636,971,479,3086,0,0,3,0,3,1,Ex,12,Maj1,1,Gd,NA,NA,NA,0,0,NA,NA,Y,122,0,0,0,0,0,NA,MnPrv,NA,0,3,2007,WD,Normal,200500 +1175,70,RL,80,16560,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,6,8,1932,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Gd,TA,No,Rec,503,Unf,0,449,952,GasA,TA,Y,SBrkr,1170,1175,0,2345,0,0,2,1,4,1,TA,9,Typ,1,Gd,Detchd,1932,Unf,2,360,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,239000 +1176,50,RL,85,10678,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,1.5Fin,8,5,1992,2000,Hip,CompShg,HdBoard,HdBoard,BrkFace,337,Gd,TA,PConc,Gd,TA,No,GLQ,700,Unf,0,983,1683,GasA,Ex,Y,SBrkr,2129,743,0,2872,0,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,1992,Fin,2,541,TA,TA,Y,0,33,0,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,285000 +1177,20,RL,37,6951,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,5,1984,1985,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,658,Unf,0,218,876,GasA,TA,Y,SBrkr,923,0,0,923,1,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1984,Unf,1,264,TA,TA,Y,362,0,0,0,0,0,NA,MnPrv,NA,0,10,2008,WD,Normal,119500 +1178,50,RM,NA,3950,Pave,Grvl,Reg,Bnk,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1.5Fin,6,8,1926,2004,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,468,Unf,0,350,818,GasA,TA,Y,SBrkr,818,406,0,1224,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1926,Unf,1,210,TA,TA,N,0,0,116,0,0,0,NA,NA,NA,0,12,2009,WD,Normal,115000 +1179,50,RL,54,7681,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,5,6,1921,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,731,731,GasA,Ex,Y,SBrkr,820,523,0,1343,0,0,1,1,3,1,TA,7,Typ,1,Gd,Detchd,1921,Unf,1,186,Fa,TA,Y,192,0,102,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,154900 +1180,20,RL,77,8335,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,5,1954,1954,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,Gd,Y,SBrkr,1124,0,0,1124,0,0,1,0,3,1,TA,5,Min2,1,Gd,NA,NA,NA,0,0,NA,NA,N,0,36,190,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,93000 +1181,60,RL,NA,11170,Pave,NA,IR2,Lvl,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,2Story,7,5,1990,1991,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,Wood,Gd,TA,No,LwQ,1216,Unf,0,0,1216,GasA,Ex,Y,SBrkr,1298,1216,0,2514,0,0,2,1,4,1,TA,8,Typ,0,NA,Attchd,1990,Fin,2,693,TA,TA,Y,0,0,0,0,0,0,NA,GdPrv,NA,0,4,2006,WD,Normal,250000 +1182,120,RM,64,5587,Pave,NA,IR1,HLS,AllPub,Inside,Mod,Crawfor,Norm,Norm,TwnhsE,1Story,8,5,2008,2008,Hip,CompShg,CemntBd,CmentBd,Stone,186,Ex,TA,PConc,Ex,TA,Gd,GLQ,1480,Unf,0,120,1600,GasA,Ex,Y,SBrkr,1652,0,0,1652,1,1,2,0,2,1,Gd,5,Typ,1,Gd,Attchd,2008,Fin,2,482,TA,TA,Y,162,53,0,153,0,0,NA,NA,NA,0,11,2008,New,Partial,392500 +1183,60,RL,160,15623,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,2Story,10,5,1996,1996,Hip,CompShg,Wd Sdng,ImStucc,None,0,Gd,TA,PConc,Ex,TA,Av,GLQ,2096,Unf,0,300,2396,GasA,Ex,Y,SBrkr,2411,2065,0,4476,1,0,3,1,4,1,Ex,10,Typ,2,TA,Attchd,1996,Fin,3,813,TA,TA,Y,171,78,0,0,0,555,Ex,MnPrv,NA,0,7,2007,WD,Abnorml,745000 +1184,30,RL,60,10800,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1920,1950,Hip,CompShg,Stucco,Stucco,None,0,TA,TA,BrkTil,TA,TA,No,Rec,821,Unf,0,299,1120,GasA,Ex,Y,SBrkr,1130,0,0,1130,1,0,1,0,2,1,TA,5,Typ,1,Gd,Detchd,1970,Unf,2,720,TA,TA,Y,229,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,120000 +1185,20,RL,50,35133,Grvl,NA,Reg,Lvl,AllPub,Inside,Mod,Timber,Norm,Norm,1Fam,1Story,5,4,1963,1963,Hip,CompShg,MetalSd,MetalSd,BrkFace,226,TA,TA,CBlock,TA,TA,Gd,Rec,1159,Unf,0,413,1572,GasA,Gd,Y,SBrkr,1572,0,0,1572,1,0,1,1,3,1,TA,5,Typ,2,TA,2Types,1963,RFn,3,995,TA,TA,Y,0,263,0,0,263,0,NA,NA,NA,0,5,2007,WD,Normal,186700 +1186,50,RL,60,9738,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,5,7,1924,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,Gd,BrkTil,TA,TA,No,BLQ,392,Unf,0,392,784,GasA,Gd,Y,SBrkr,949,272,0,1221,1,0,1,0,4,1,TA,7,Typ,0,NA,Attchd,1965,Unf,1,392,TA,TA,Y,0,0,236,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,104900 +1187,190,RL,107,10615,Pave,NA,IR1,Bnk,AllPub,Corner,Mod,OldTown,Artery,Artery,2fmCon,2Story,3,5,1900,1970,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Fa,TA,Mn,BLQ,440,Unf,0,538,978,GasA,TA,Y,SBrkr,1014,685,0,1699,1,0,2,0,3,2,TA,7,Typ,0,NA,CarPort,1920,Unf,2,420,Fa,Fa,Y,0,74,0,0,0,0,NA,NA,NA,0,8,2009,WD,Abnorml,95000 +1188,20,RL,89,12461,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,1Story,8,5,1994,1995,Gable,CompShg,ImStucc,ImStucc,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,1456,Unf,0,168,1624,GasA,Ex,Y,SBrkr,1624,0,0,1624,1,0,2,0,2,1,Gd,5,Typ,1,Fa,Attchd,1994,RFn,3,757,TA,TA,Y,0,114,192,0,0,0,NA,GdPrv,NA,0,7,2006,WD,Normal,262000 +1189,60,RL,68,8935,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,95,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,831,831,GasA,Ex,Y,SBrkr,831,829,0,1660,0,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2002,RFn,2,493,TA,TA,Y,144,68,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,195000 +1190,60,RL,60,7500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,994,994,GasA,Gd,Y,SBrkr,1028,776,0,1804,0,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1999,Fin,2,442,TA,TA,Y,140,60,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,189000 +1191,190,RL,NA,32463,Pave,NA,Reg,Low,AllPub,Inside,Mod,Mitchel,Norm,Norm,2fmCon,1Story,4,4,1961,1975,Gable,CompShg,MetalSd,MetalSd,Stone,149,TA,Gd,CBlock,TA,TA,Av,BLQ,1159,Unf,0,90,1249,GasA,Ex,Y,SBrkr,1622,0,0,1622,1,0,1,0,3,1,TA,7,Typ,1,TA,2Types,1975,Fin,4,1356,TA,TA,Y,439,0,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,168000 +1192,160,FV,24,2645,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,Twnhs,2Story,8,5,1999,2000,Gable,CompShg,MetalSd,MetalSd,BrkFace,456,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,776,776,GasA,Ex,Y,SBrkr,764,677,0,1441,0,0,2,1,2,1,Gd,5,Typ,0,NA,Detchd,1999,Unf,2,492,TA,TA,Y,206,0,0,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,174000 +1193,50,RM,60,9600,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,5,8,1925,1994,Gambrel,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,TA,TA,Mn,Unf,0,Unf,0,702,702,GasA,Gd,Y,SBrkr,842,630,0,1472,0,0,1,0,3,1,Gd,6,Typ,0,NA,Detchd,1925,Unf,1,250,TA,Fa,P,0,0,84,0,0,0,NA,GdWo,NA,0,7,2007,WD,Normal,125000 +1194,120,RM,NA,4500,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Mitchel,Norm,Norm,TwnhsE,1Story,6,5,1999,1999,Hip,CompShg,VinylSd,VinylSd,BrkFace,425,TA,TA,PConc,Ex,TA,No,GLQ,883,Unf,0,341,1224,GasA,Ex,Y,SBrkr,1224,0,0,1224,1,0,2,0,2,1,TA,5,Typ,0,NA,Attchd,1999,Fin,2,402,TA,TA,Y,0,304,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,165000 +1195,60,RL,80,9364,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Sawyer,Norm,Norm,1Fam,2Story,6,7,1969,1969,Gable,CompShg,HdBoard,HdBoard,Stone,143,TA,TA,CBlock,TA,TA,No,ALQ,371,Unf,0,292,663,GasA,TA,Y,SBrkr,663,689,0,1352,0,0,1,1,4,1,TA,7,Typ,0,NA,Attchd,1969,Fin,1,299,TA,TA,Y,379,36,0,0,0,0,NA,MnPrv,NA,0,3,2010,WD,Normal,158000 +1196,60,RL,51,8029,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,728,728,GasA,Ex,Y,SBrkr,728,728,0,1456,0,0,2,1,3,1,Gd,8,Typ,0,NA,Attchd,2005,Fin,2,400,TA,TA,Y,100,24,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,176000 +1197,60,RL,58,14054,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,879,879,GasA,Ex,Y,SBrkr,879,984,0,1863,0,0,2,1,4,1,Gd,9,Typ,1,Gd,BuiltIn,2006,Fin,3,660,TA,TA,Y,100,17,0,0,0,0,NA,NA,NA,0,11,2006,New,Partial,219210 +1198,75,RM,65,8850,Pave,NA,IR1,Bnk,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2.5Unf,7,6,1916,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,815,815,GasA,Ex,Y,SBrkr,815,875,0,1690,0,0,1,0,3,1,TA,7,Typ,1,Gd,Detchd,1916,Unf,1,225,TA,TA,Y,0,0,330,0,0,0,NA,NA,NA,0,7,2006,ConLw,Normal,144000 +1199,20,RL,70,9100,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2001,2001,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1212,1212,GasA,Ex,Y,SBrkr,1212,0,0,1212,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2001,RFn,2,573,TA,TA,Y,356,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,178000 +1200,20,RL,75,11235,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,4,5,1963,1979,Gable,CompShg,HdBoard,HdBoard,BrkFace,51,TA,TA,CBlock,TA,TA,No,Rec,547,Unf,0,504,1051,GasA,Gd,Y,SBrkr,1382,0,0,1382,0,0,1,1,3,1,TA,6,Typ,1,Po,Attchd,1974,Unf,2,459,TA,TA,Y,0,82,0,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,148000 +1201,20,RL,71,9353,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1970,1970,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,864,864,GasA,Gd,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1972,Unf,1,280,TA,TA,Y,0,0,0,0,0,0,NA,NA,Shed,0,7,2006,Oth,Abnorml,116050 +1202,60,RL,80,10400,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,866,866,GasA,Ex,Y,SBrkr,866,913,0,1779,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,1998,RFn,2,546,TA,TA,Y,198,36,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,197900 +1203,50,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,5,8,1925,1997,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,884,884,GasA,Ex,Y,SBrkr,884,464,0,1348,1,0,1,0,3,1,TA,5,Typ,1,Fa,Detchd,1960,Unf,1,216,TA,TA,N,0,0,208,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,117000 +1204,20,RL,75,9750,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2000,2001,Gable,CompShg,VinylSd,VinylSd,BrkFace,171,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1630,1630,GasA,Ex,Y,SBrkr,1630,0,0,1630,0,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,2000,Unf,2,451,TA,TA,Y,74,234,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,213000 +1205,20,RL,78,10140,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,5,6,1975,1975,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,788,Unf,0,268,1056,GasA,Ex,Y,SBrkr,1074,0,0,1074,1,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1975,RFn,2,495,TA,TA,Y,0,88,0,0,0,0,NA,MnPrv,NA,0,7,2006,WD,Normal,153500 +1206,20,RL,90,14684,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,SawyerW,Norm,Norm,1Fam,1Story,7,7,1990,1991,Hip,CompShg,HdBoard,HdBoard,BrkFace,234,Gd,TA,CBlock,Gd,TA,Mn,ALQ,485,BLQ,177,1496,2158,GasA,Gd,Y,SBrkr,2196,0,0,2196,0,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,1990,RFn,3,701,TA,TA,Y,84,70,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,271900 +1207,20,RH,NA,8900,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,4,4,1966,1966,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,Rec,1056,Unf,0,0,1056,GasA,TA,Y,SBrkr,1056,0,0,1056,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1966,Unf,1,384,TA,TA,Y,0,42,0,0,0,0,NA,MnPrv,NA,0,11,2006,WD,Normal,107000 +1208,20,RL,70,9135,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,6,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,120,Gd,TA,PConc,Gd,TA,Av,GLQ,340,Unf,0,1342,1682,GasA,Ex,Y,SBrkr,1700,0,0,1700,1,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2003,RFn,2,544,TA,TA,Y,192,23,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,200000 +1209,20,RL,70,7763,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1962,1980,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,Rec,504,BLQ,108,319,931,GasA,TA,Y,SBrkr,1283,0,0,1283,1,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1980,Unf,2,506,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,10,2008,WD,Normal,140000 +1210,20,RL,85,10182,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Somerst,RRNn,Norm,1Fam,1Story,8,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,Stone,420,Gd,TA,PConc,Ex,TA,Mn,GLQ,1220,Unf,0,440,1660,GasA,Ex,Y,SBrkr,1660,0,0,1660,1,0,2,0,3,1,Gd,8,Typ,1,Gd,Attchd,2006,RFn,2,500,TA,TA,Y,322,50,0,0,0,0,NA,NA,NA,0,5,2006,New,Partial,290000 +1211,60,RL,70,11218,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,6,5,1992,1992,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1055,1055,GasA,Ex,Y,SBrkr,1055,790,0,1845,0,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1992,RFn,2,462,TA,TA,Y,635,104,0,0,0,0,NA,GdPrv,Shed,400,5,2010,WD,Normal,189000 +1212,50,RL,152,12134,Pave,NA,IR1,Bnk,AllPub,Inside,Mod,Gilbert,Norm,Norm,1Fam,1.5Fin,8,7,1988,2005,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,Wood,Gd,TA,Av,GLQ,427,Unf,0,132,559,GasA,Gd,Y,SBrkr,1080,672,0,1752,0,0,2,0,4,1,TA,8,Typ,0,NA,Basment,1988,RFn,2,492,TA,TA,Y,325,12,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,164000 +1213,30,RL,50,9340,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,6,1941,1950,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,344,Unf,0,328,672,GasA,TA,Y,SBrkr,672,0,0,672,1,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1941,Unf,1,234,TA,TA,N,0,113,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,113000 +1214,80,RL,NA,10246,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,Norm,Norm,1Fam,SLvl,4,9,1965,2001,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,Gd,Av,GLQ,648,Unf,0,0,648,GasA,Ex,Y,SBrkr,960,0,0,960,1,1,0,0,0,1,TA,3,Typ,0,NA,Attchd,1965,Unf,1,364,TA,TA,Y,88,0,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,145000 +1215,85,RL,69,10205,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SFoyer,5,5,1962,1962,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,Av,BLQ,784,Unf,0,141,925,GasA,TA,Y,SBrkr,999,0,0,999,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1962,Unf,1,300,TA,TA,Y,150,72,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,134500 +1216,20,RL,99,7094,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1966,1966,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,180,LwQ,374,340,894,GasA,TA,Y,SBrkr,894,0,0,894,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1966,RFn,1,384,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,5,2007,WD,Normal,125000 +1217,90,RM,68,8930,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,RRAe,Norm,Duplex,1.5Fin,6,5,1978,1978,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,1318,584,0,1902,0,0,2,0,4,2,TA,8,Typ,0,NA,Attchd,1978,Unf,2,539,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,112000 +1218,20,FV,72,8640,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2009,2009,Gable,CompShg,CemntBd,CmentBd,Stone,72,Gd,TA,PConc,Gd,TA,Mn,GLQ,936,Unf,0,364,1300,GasA,Ex,Y,SBrkr,1314,0,0,1314,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2009,RFn,2,552,TA,TA,Y,135,112,0,0,0,0,NA,NA,NA,0,9,2009,New,Partial,229456 +1219,50,RM,52,6240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,4,5,1947,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,Gd,N,SBrkr,672,240,0,912,0,0,1,0,2,1,TA,3,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,80500 +1220,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,5,1971,1971,Gable,CompShg,CemntBd,CmentBd,BrkFace,236,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,672,672,GasA,TA,Y,SBrkr,672,546,0,1218,0,0,1,1,3,1,TA,7,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,201,0,0,0,0,0,NA,NA,NA,0,4,2006,WD,Abnorml,91500 +1221,20,RL,66,7800,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1964,1964,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,312,LwQ,600,0,912,GasA,TA,Y,SBrkr,912,0,0,912,0,0,1,0,2,1,TA,5,Typ,0,NA,Attchd,1964,Unf,1,288,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,11,2006,WD,Abnorml,115000 +1222,20,RL,55,8250,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,5,1968,1968,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,BLQ,250,LwQ,492,210,952,GasA,Ex,Y,SBrkr,1211,0,0,1211,0,0,1,0,3,1,TA,5,Typ,1,TA,Attchd,1968,Unf,1,322,TA,TA,Y,0,63,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,134000 +1223,50,RL,78,10496,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Artery,Norm,1Fam,1.5Fin,6,6,1949,1950,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,320,TA,TA,CBlock,TA,TA,Mn,Rec,196,Unf,0,844,1040,GasA,Ex,Y,SBrkr,1168,678,0,1846,0,0,2,0,3,1,TA,7,Typ,1,Gd,Attchd,1949,Unf,1,315,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,1,2007,WD,Normal,143000 +1224,20,RL,89,10680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,3,1951,1951,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,44,TA,TA,CBlock,TA,Fa,No,LwQ,756,Unf,0,1380,2136,GasA,TA,N,FuseA,2136,0,0,2136,0,0,2,0,4,1,TA,7,Mod,0,NA,Detchd,1951,Unf,2,528,TA,TA,Y,0,30,0,0,0,0,NA,MnPrv,NA,0,10,2006,WD,Normal,137900 +1225,60,RL,60,15384,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,RRAn,Norm,1Fam,2Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Av,GLQ,724,Unf,0,64,788,GasA,Ex,Y,SBrkr,788,702,0,1490,1,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2004,Fin,2,388,TA,TA,Y,100,75,0,0,0,0,NA,NA,NA,0,2,2008,WD,Normal,184000 +1226,80,RL,65,10482,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,6,8,1958,1958,Hip,CompShg,VinylSd,VinylSd,BrkFace,63,TA,Gd,CBlock,TA,TA,Av,GLQ,507,Unf,0,81,588,GasA,Ex,Y,SBrkr,1138,0,0,1138,0,1,1,0,3,1,TA,6,Typ,0,NA,Attchd,1958,RFn,1,264,TA,TA,Y,224,0,0,0,0,0,NA,MnWw,NA,0,6,2007,WD,Normal,145000 +1227,60,RL,86,14598,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Somerst,Feedr,Norm,1Fam,2Story,6,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,Stone,74,Gd,TA,PConc,Gd,TA,Mn,Unf,0,Unf,0,894,894,GasA,Ex,Y,SBrkr,894,1039,0,1933,0,0,2,1,4,1,Gd,9,Typ,1,Gd,BuiltIn,2007,Fin,3,668,TA,TA,Y,100,18,0,0,0,0,NA,NA,NA,0,1,2008,WD,Normal,214000 +1228,20,RL,72,8872,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,8,1965,2008,Gable,CompShg,VinylSd,VinylSd,BrkFace,300,TA,TA,CBlock,TA,TA,No,ALQ,595,Unf,0,317,912,GasA,Ex,Y,SBrkr,912,0,0,912,1,0,1,0,2,1,Gd,5,Typ,0,NA,Detchd,1992,Unf,2,576,TA,TA,Y,0,240,0,0,0,0,NA,NA,NA,0,12,2008,WD,Normal,147000 +1229,120,RL,65,8769,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,9,5,2008,2008,Hip,CompShg,MetalSd,MetalSd,BrkFace,766,Ex,TA,PConc,Ex,TA,No,GLQ,1540,Unf,0,162,1702,GasA,Ex,Y,SBrkr,1702,0,0,1702,1,0,1,1,1,1,Ex,7,Typ,1,Gd,Attchd,2008,Fin,3,1052,TA,TA,Y,0,72,0,0,224,0,NA,NA,NA,0,10,2008,New,Partial,367294 +1230,80,RL,70,7910,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,5,5,1960,1960,Hip,CompShg,BrkFace,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,ALQ,666,Unf,0,409,1075,GasA,Gd,Y,SBrkr,1507,0,0,1507,0,0,2,0,4,1,TA,7,Maj1,0,NA,Basment,1960,Unf,1,404,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,8,2008,WD,Normal,127000 +1231,90,RL,NA,18890,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,RRAe,Duplex,1.5Fin,5,5,1977,1977,Shed,CompShg,Plywood,Plywood,None,1,TA,TA,CBlock,Gd,TA,No,GLQ,498,Rec,211,652,1361,GasA,Ex,Y,SBrkr,1361,1259,0,2620,0,0,2,2,4,2,TA,12,Typ,1,TA,BuiltIn,1977,RFn,2,600,TA,TA,N,155,24,145,0,0,0,NA,NA,Gar2,8300,8,2007,WD,Normal,190000 +1232,90,RL,70,7728,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,Duplex,SLvl,5,6,1962,1962,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,120,TA,TA,CBlock,TA,TA,Av,ALQ,803,Unf,0,303,1106,GasA,TA,Y,SBrkr,1190,0,0,1190,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1962,Unf,2,540,TA,TA,Y,0,18,0,0,0,0,NA,GdWo,NA,0,5,2006,WD,Normal,132500 +1233,90,RL,70,9842,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,Duplex,1Story,4,5,1962,1962,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,GasA,TA,Y,SBrkr,1224,0,0,1224,0,0,2,0,2,2,TA,6,Typ,0,NA,CarPort,1962,Unf,2,462,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,101800 +1234,20,RL,NA,12160,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1959,1959,Hip,CompShg,Plywood,Plywood,BrkFace,180,TA,TA,CBlock,TA,TA,No,Rec,1000,Unf,0,188,1188,GasA,Fa,Y,SBrkr,1188,0,0,1188,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1959,RFn,2,531,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,5,2010,COD,Abnorml,142000 +1235,70,RH,55,8525,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,2Story,5,6,1911,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,TA,TA,Av,Unf,0,Unf,0,940,940,GasA,TA,N,FuseA,1024,940,0,1964,0,0,1,1,4,1,TA,7,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,192,0,0,0,0,NA,NA,NA,0,11,2008,WD,Abnorml,130000 +1236,70,RL,96,13132,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,5,5,1914,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Gd,TA,Mn,Unf,0,Unf,0,747,747,GasA,Gd,Y,FuseF,892,892,0,1784,0,0,1,1,4,1,TA,9,Typ,0,NA,Detchd,1914,Unf,1,180,Fa,Fa,N,203,40,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,138887 +1237,160,RL,36,2628,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,Twnhs,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,Wd Shng,Stone,106,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,764,764,GasA,Ex,Y,SBrkr,764,862,0,1626,0,0,2,1,2,1,Gd,6,Typ,0,NA,BuiltIn,2003,RFn,2,474,TA,TA,Y,0,27,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,175500 +1238,60,RL,41,12393,Pave,NA,IR2,Lvl,AllPub,FR2,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,847,847,GasA,Ex,Y,SBrkr,847,1101,0,1948,0,0,2,1,4,1,Gd,8,Typ,1,Gd,BuiltIn,2004,Fin,2,434,TA,TA,Y,100,48,0,0,0,0,NA,NA,NA,0,9,2006,WD,Normal,195000 +1239,20,RL,63,13072,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,RRAe,Norm,1Fam,1Story,6,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1141,1141,GasA,Ex,Y,SBrkr,1141,0,0,1141,0,0,1,1,3,1,TA,6,Typ,0,NA,Detchd,2005,Unf,2,484,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2006,WD,Abnorml,142500 +1240,20,RL,64,9037,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,32,Gd,TA,PConc,Gd,TA,Av,GLQ,428,Unf,0,1048,1476,GasA,Ex,Y,SBrkr,1484,0,0,1484,0,0,2,0,2,1,Ex,6,Typ,1,Gd,Attchd,2006,RFn,2,472,TA,TA,Y,120,33,0,0,0,0,NA,NA,NA,0,12,2007,WD,Normal,265900 +1241,60,RL,65,8158,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,252,Gd,TA,PConc,Gd,TA,No,GLQ,550,Unf,0,334,884,GasA,Ex,Y,SBrkr,884,884,0,1768,1,0,2,1,3,1,Gd,8,Typ,0,NA,Attchd,2003,RFn,2,543,TA,TA,Y,0,63,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,224900 +1242,20,RL,83,9849,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,6,2007,2007,Hip,CompShg,VinylSd,VinylSd,Stone,0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1689,1689,GasA,Ex,Y,SBrkr,1689,0,0,1689,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2007,RFn,3,954,TA,TA,Y,0,56,0,0,0,0,NA,NA,NA,0,6,2007,New,Partial,248328 +1243,85,RL,85,10625,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,SFoyer,7,6,1974,1974,Gable,CompShg,Plywood,Plywood,BrkFace,81,TA,TA,CBlock,Gd,TA,Gd,GLQ,885,LwQ,168,0,1053,GasA,TA,Y,SBrkr,1173,0,0,1173,1,0,2,0,3,1,Gd,6,Typ,2,TA,Attchd,1974,RFn,2,528,TA,TA,Y,0,120,0,0,0,0,NA,MnPrv,NA,0,1,2010,WD,Family,170000 +1244,20,RL,107,13891,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,10,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,NA,NA,Ex,TA,PConc,Ex,Gd,Gd,GLQ,1386,Unf,0,690,2076,GasA,Ex,Y,SBrkr,2076,0,0,2076,1,0,2,1,2,1,Ex,7,Typ,1,Gd,Attchd,2006,Fin,3,850,TA,TA,Y,216,229,0,0,0,0,NA,NA,NA,0,9,2006,New,Partial,465000 +1245,70,RL,NA,11435,Pave,NA,IR1,HLS,AllPub,Corner,Mod,Crawfor,Norm,Norm,1Fam,2Story,8,7,1929,1950,Gable,CompShg,BrkFace,Stucco,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,792,792,GasA,Fa,Y,SBrkr,792,725,0,1517,0,0,1,0,3,1,Gd,7,Typ,2,Gd,Detchd,1931,Unf,2,400,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,230000 +1246,80,RL,78,12090,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,SLvl,6,7,1984,2003,Hip,CompShg,VinylSd,VinylSd,BrkFace,74,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,585,585,GasA,Ex,Y,SBrkr,1140,728,0,1868,0,0,3,1,3,1,TA,7,Typ,1,TA,BuiltIn,1984,Fin,2,477,TA,TA,Y,268,112,0,0,147,0,NA,NA,NA,0,1,2007,WD,Abnorml,178000 +1247,60,FV,65,8125,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,756,756,GasA,Ex,Y,SBrkr,756,797,0,1553,0,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2005,RFn,2,615,TA,TA,Y,0,45,0,0,0,0,NA,NA,NA,0,3,2006,New,Partial,186500 +1248,80,RL,NA,12328,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SLvl,6,5,1976,1976,Gable,CompShg,HdBoard,HdBoard,BrkFace,335,TA,TA,CBlock,TA,TA,Av,GLQ,539,Unf,0,473,1012,GasA,TA,Y,SBrkr,1034,0,0,1034,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1976,Unf,3,888,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,169900 +1249,75,RM,60,9600,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2.5Unf,6,5,1917,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,BrkTil,Gd,TA,No,Rec,319,Unf,0,416,735,OthW,Fa,N,SBrkr,1134,924,0,2058,0,0,1,1,3,1,TA,8,Typ,1,Gd,Detchd,1950,Unf,2,396,Fa,Fa,P,0,0,259,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,129500 +1250,20,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1950,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,534,Rec,96,246,876,GasA,TA,Y,SBrkr,988,0,0,988,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1950,Unf,1,276,TA,TA,Y,0,80,0,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,119000 +1251,20,RL,93,11160,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,7,5,1968,1968,Hip,CompShg,BrkFace,BrkFace,None,0,Gd,TA,CBlock,TA,TA,No,ALQ,1065,Unf,0,1045,2110,GasA,Ex,Y,SBrkr,2110,0,0,2110,1,0,2,1,3,1,Ex,8,Typ,2,TA,Attchd,1968,Fin,2,522,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,244000 +1252,120,RL,NA,3136,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,7,5,2003,2003,Gable,CompShg,VinylSd,Wd Shng,Stone,163,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1405,1405,GasA,Ex,Y,SBrkr,1405,0,0,1405,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2003,RFn,2,478,TA,TA,Y,148,36,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,171750 +1253,20,RL,62,9858,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,6,1968,1968,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,BLQ,510,Unf,0,354,864,GasA,TA,Y,SBrkr,874,0,0,874,1,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1968,RFn,1,288,TA,TA,Y,33,0,0,0,0,0,NA,GdWo,Shed,600,11,2009,WD,Normal,130000 +1254,60,RL,NA,17542,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Veenker,Norm,Norm,1Fam,2Story,7,7,1974,2003,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,CBlock,TA,TA,Gd,LwQ,125,ALQ,1031,36,1192,GasA,TA,Y,SBrkr,1516,651,0,2167,1,0,2,1,3,1,Gd,9,Typ,2,Gd,Attchd,1974,RFn,2,518,TA,TA,Y,220,47,0,0,0,0,NA,MnPrv,NA,0,7,2007,WD,Normal,294000 +1255,60,RL,60,6931,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,2Story,7,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,Stone,92,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,746,746,GasA,Ex,Y,SBrkr,760,896,0,1656,0,0,2,1,3,1,Gd,7,Typ,1,Gd,BuiltIn,2003,Fin,2,397,TA,TA,Y,178,128,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,165400 +1256,50,RM,52,6240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,6,6,1931,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,Fa,No,LwQ,425,Unf,0,459,884,GasA,TA,Y,FuseA,959,408,0,1367,0,0,1,0,3,1,TA,6,Typ,1,Gd,Detchd,1978,Unf,1,560,TA,TA,Y,0,0,0,0,120,0,NA,NA,NA,0,11,2007,WD,Normal,127500 +1257,20,RL,91,14303,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,1Story,8,5,1994,1994,Hip,CompShg,HdBoard,HdBoard,BrkFace,554,Gd,TA,PConc,Gd,TA,Gd,GLQ,1314,Unf,0,672,1986,GasA,Ex,Y,SBrkr,1987,0,0,1987,1,0,2,0,2,1,Gd,7,Typ,1,TA,Attchd,1994,Fin,2,691,TA,TA,Y,262,36,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,301500 +1258,30,RL,56,4060,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Edwards,Feedr,Norm,1Fam,1Story,5,8,1922,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,Fa,TA,No,Unf,0,Unf,0,864,864,GasA,Ex,Y,SBrkr,864,0,0,864,0,0,1,0,2,1,TA,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,96,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,99900 +1259,80,RL,59,9587,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,SLvl,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,Stone,182,Gd,TA,PConc,Gd,TA,Gd,GLQ,655,Unf,0,201,856,GasA,Ex,Y,SBrkr,1166,0,0,1166,1,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,2005,Fin,2,400,TA,TA,Y,212,0,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,190000 +1260,20,RL,65,9750,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,8,1969,1969,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,602,LwQ,438,14,1054,GasA,Gd,Y,SBrkr,1054,0,0,1054,1,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1969,Unf,2,460,TA,TA,Y,180,0,0,0,80,0,NA,NA,NA,0,7,2008,WD,Normal,151000 +1261,60,RL,NA,24682,Pave,NA,IR3,Lvl,AllPub,CulDSac,Gtl,Gilbert,RRAn,Norm,1Fam,2Story,6,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,841,841,GasA,Ex,Y,SBrkr,892,783,0,1675,0,0,2,1,3,1,TA,7,Typ,1,TA,BuiltIn,1999,Fin,2,502,TA,TA,Y,0,103,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,181000 +1262,20,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1956,1956,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Rec,504,Unf,0,546,1050,GasA,Gd,Y,SBrkr,1050,0,0,1050,0,0,1,0,2,1,TA,5,Typ,0,NA,Attchd,1956,Unf,1,338,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,128900 +1263,50,RL,NA,11250,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,1.5Fin,4,5,1957,1989,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,Av,Unf,0,Unf,0,1104,1104,GasA,Ex,Y,FuseA,1104,684,0,1788,1,0,1,0,5,1,TA,8,Min2,2,TA,Attchd,1957,Unf,1,304,TA,TA,Y,120,0,0,0,0,0,NA,NA,NA,0,11,2009,WD,Normal,161500 +1264,70,RL,60,13515,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,2Story,6,6,1919,1950,Gambrel,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,764,764,GasA,Ex,Y,FuseA,1060,764,0,1824,0,0,1,0,3,1,TA,8,Typ,1,Gd,Detchd,1940,Unf,2,520,TA,TA,N,0,0,126,0,0,0,NA,GdPrv,NA,0,7,2007,WD,Normal,180500 +1265,120,RH,34,4060,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,TwnhsE,1Story,6,5,1998,1999,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,266,Unf,0,1139,1405,GasA,Ex,Y,SBrkr,1337,0,0,1337,1,0,2,0,2,1,Gd,5,Typ,0,NA,Attchd,1998,Fin,2,511,TA,TA,Y,144,68,0,0,0,0,NA,NA,NA,0,8,2008,COD,Abnorml,181000 +1266,160,FV,35,3735,Pave,NA,Reg,Lvl,AllPub,FR3,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,7,5,1999,1999,Hip,CompShg,MetalSd,MetalSd,BrkFace,218,Gd,TA,PConc,Gd,TA,No,GLQ,450,Unf,0,241,691,GasA,Ex,Y,SBrkr,713,739,0,1452,1,0,2,1,3,1,Gd,6,Typ,0,NA,Detchd,1999,Unf,2,506,TA,TA,Y,0,34,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,183900 +1267,190,RM,60,10120,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,OldTown,Feedr,Norm,2fmCon,2.5Unf,7,4,1910,1950,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,Fa,TA,CBlock,TA,TA,No,Unf,0,Unf,0,925,925,GasA,TA,N,FuseF,964,925,0,1889,0,0,1,1,4,2,TA,9,Typ,1,Gd,Detchd,1960,Unf,1,308,TA,TA,N,0,0,264,0,0,0,NA,MnPrv,NA,0,1,2007,WD,Normal,122000 +1268,20,RL,89,13214,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,9,5,2008,2009,Hip,CompShg,Stucco,CmentBd,None,0,Ex,TA,PConc,Ex,TA,Gd,Unf,0,Unf,0,2002,2002,GasA,Ex,Y,SBrkr,2018,0,0,2018,0,0,2,0,3,1,Ex,10,Typ,1,Gd,Attchd,2009,Fin,3,746,TA,TA,Y,144,76,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,378500 +1269,50,RL,NA,14100,Pave,NA,IR1,Lvl,AllPub,Inside,Mod,Crawfor,Norm,Norm,1Fam,1.5Fin,8,9,1935,1997,Gable,CompShg,Stucco,Stucco,BrkFace,632,TA,Gd,CBlock,TA,TA,Mn,Rec,192,Unf,0,536,728,GasA,Ex,Y,SBrkr,1968,1479,0,3447,0,0,3,1,4,1,Gd,11,Typ,2,Gd,BuiltIn,1982,Unf,3,1014,TA,TA,Y,314,12,0,0,0,0,NA,GdWo,NA,0,5,2008,WD,Normal,381000 +1270,50,RL,78,11344,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1.5Fin,5,5,1958,1958,Gable,CompShg,MetalSd,MetalSd,BrkFace,180,TA,TA,CBlock,TA,TA,No,BLQ,460,Unf,0,414,874,GasW,TA,Y,FuseA,874,650,0,1524,0,0,1,1,3,1,TA,7,Typ,0,NA,Attchd,1958,Unf,1,315,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,7,2007,WD,Normal,144000 +1271,40,RL,NA,23595,Pave,NA,Reg,Low,AllPub,Inside,Sev,ClearCr,Norm,Norm,1Fam,1Story,7,6,1979,1979,Shed,WdShake,Plywood,Plywood,None,0,Gd,TA,PConc,Gd,TA,Gd,GLQ,1258,Unf,0,74,1332,GasA,TA,Y,SBrkr,1332,192,0,1524,2,0,0,1,0,1,Gd,4,Typ,1,TA,Attchd,1979,Fin,2,586,TA,TA,Y,268,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,260000 +1272,20,RL,NA,9156,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,PosN,Norm,1Fam,1Story,6,7,1968,1968,Hip,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1489,1489,GasA,Gd,Y,SBrkr,1489,0,0,1489,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,1968,RFn,2,462,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,185750 +1273,20,RL,NA,13526,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,6,1965,1965,Hip,CompShg,HdBoard,Plywood,BrkFace,114,TA,TA,CBlock,TA,TA,No,BLQ,560,LwQ,375,0,935,GasA,TA,Y,SBrkr,935,0,0,935,1,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1965,Unf,1,288,TA,TA,Y,180,0,0,0,0,0,NA,MnPrv,NA,0,11,2006,WD,Normal,137000 +1274,80,RL,124,11512,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Edwards,Norm,Norm,1Fam,SLvl,6,7,1959,2006,Gable,CompShg,Plywood,Plywood,BrkFace,84,TA,TA,CBlock,TA,TA,Av,ALQ,719,Unf,0,300,1019,GasA,Gd,Y,SBrkr,1357,0,0,1357,1,0,1,0,2,1,Ex,5,Typ,1,Gd,Basment,1959,RFn,1,312,TA,TA,Y,0,0,0,0,163,0,NA,GdPrv,NA,0,5,2008,WD,Normal,177000 +1275,50,RL,53,5362,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,5,6,1910,2003,Gable,CompShg,Wd Sdng,Wd Shng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,661,661,GasA,Ex,Y,SBrkr,661,589,0,1250,0,0,2,0,3,1,TA,8,Typ,1,Gd,Detchd,1985,Unf,2,552,TA,TA,Y,242,0,81,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,139000 +1276,90,RL,95,11345,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Feedr,Norm,Duplex,2Story,5,5,1948,1950,Gable,Roll,AsbShng,AsbShng,Stone,567,TA,TA,CBlock,TA,TA,No,Rec,220,Unf,0,708,928,GasA,Gd,Y,FuseA,928,992,0,1920,0,0,2,0,4,2,TA,10,Typ,0,NA,Detchd,1948,Unf,2,400,TA,Fa,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,137000 +1277,60,RL,NA,12936,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NWAmes,Norm,Norm,1Fam,2Story,6,6,1972,1972,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,Gd,No,BLQ,593,Unf,0,130,723,GasA,TA,Y,SBrkr,735,660,0,1395,0,1,1,1,3,1,TA,6,Typ,1,TA,Attchd,1972,Unf,2,497,TA,TA,Y,294,116,0,0,0,0,NA,NA,NA,0,12,2009,WD,Normal,162000 +1278,80,RL,NA,17871,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NWAmes,Norm,Norm,1Fam,SLvl,6,5,1967,1976,Gable,CompShg,HdBoard,HdBoard,BrkFace,359,TA,TA,CBlock,Gd,TA,Av,ALQ,528,Unf,0,1152,1680,GasA,Fa,Y,SBrkr,1724,0,0,1724,1,0,1,1,3,1,TA,7,Typ,1,Gd,Attchd,1967,RFn,2,480,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,197900 +1279,60,RL,75,9473,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,NA,NA,Gd,TA,PConc,Gd,TA,No,GLQ,804,Unf,0,324,1128,GasA,Ex,Y,SBrkr,1128,903,0,2031,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2002,RFn,2,577,TA,TA,Y,0,211,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,237000 +1280,50,C (all),60,7500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,4,4,1920,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,698,698,GasA,TA,Y,FuseA,698,430,0,1128,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1980,RFn,2,528,TA,TA,Y,30,0,164,0,0,0,NA,NA,NA,0,4,2010,COD,Abnorml,68400 +1281,20,RL,67,9808,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,110,Gd,TA,PConc,Gd,TA,No,GLQ,788,Unf,0,785,1573,GasA,Ex,Y,SBrkr,1573,0,0,1573,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2002,RFn,2,544,TA,TA,Y,0,72,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,227000 +1282,20,RL,50,8049,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Timber,Norm,Norm,1Fam,1Story,7,5,1990,1990,Hip,CompShg,HdBoard,HdBoard,BrkFace,54,TA,TA,CBlock,Gd,TA,No,ALQ,1053,Unf,0,256,1309,GasA,TA,Y,SBrkr,1339,0,0,1339,1,0,2,0,2,1,TA,6,Typ,1,TA,Attchd,1990,Fin,2,484,Gd,Gd,Y,0,58,0,0,90,0,NA,NA,NA,0,7,2006,WD,Normal,180000 +1283,20,RL,61,8800,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,7,1977,2008,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Gd,TA,Mn,LwQ,532,Rec,144,364,1040,GasA,TA,Y,SBrkr,1040,0,0,1040,0,0,2,0,3,1,Gd,5,Typ,0,NA,Detchd,1977,Unf,2,484,TA,TA,Y,0,0,0,0,288,0,NA,NA,NA,0,9,2009,WD,Normal,150500 +1284,90,RL,94,9400,Pave,NA,Reg,Low,AllPub,Corner,Gtl,Mitchel,Norm,Norm,Duplex,2Story,6,5,1971,1971,Mansard,CompShg,MetalSd,Wd Shng,None,0,TA,TA,CBlock,TA,TA,Av,Unf,0,Unf,0,912,912,GasA,TA,Y,SBrkr,912,912,0,1824,0,0,2,2,4,2,TA,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,128,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,139000 +1285,50,RL,50,9638,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Feedr,Norm,1Fam,1.5Fin,6,7,1919,1990,Gable,CompShg,Wd Sdng,Wd Shng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,804,804,GasA,Ex,Y,SBrkr,1699,748,0,2447,0,0,2,0,4,1,Gd,10,Min2,1,Gd,Detchd,1969,Unf,1,336,TA,TA,Y,272,0,42,0,116,0,NA,NA,NA,0,3,2010,WD,Normal,169000 +1286,50,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,6,6,1939,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Fa,CBlock,TA,TA,No,Unf,0,Unf,0,780,780,GasA,Ex,Y,FuseF,825,587,0,1412,0,0,1,0,4,1,TA,6,Typ,1,Gd,Detchd,1939,Unf,1,280,TA,TA,Y,45,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,132500 +1287,20,RL,NA,9790,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Feedr,Norm,1Fam,1Story,6,5,1963,1963,Hip,CompShg,HdBoard,HdBoard,BrkFace,451,TA,TA,CBlock,TA,TA,No,ALQ,569,Rec,81,678,1328,GasA,TA,Y,SBrkr,1328,0,0,1328,1,0,1,1,3,1,TA,6,Typ,2,Gd,Attchd,1963,Unf,2,528,TA,TA,Y,0,26,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,143000 +1288,20,RL,NA,36500,Pave,NA,IR1,Low,AllPub,Inside,Mod,ClearCr,Norm,Norm,1Fam,1Story,5,5,1964,1964,Gable,CompShg,Wd Sdng,Wd Sdng,BrkCmn,621,TA,Gd,CBlock,TA,TA,Av,Rec,812,Unf,0,812,1624,GasA,Fa,Y,SBrkr,1582,0,0,1582,0,1,2,0,4,1,TA,7,Typ,0,NA,Attchd,1964,Unf,2,390,TA,TA,N,168,198,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,190000 +1289,120,RL,40,5664,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,2000,2000,Gable,CompShg,CemntBd,CmentBd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,1158,Unf,0,343,1501,GasA,Ex,Y,SBrkr,1659,0,0,1659,1,0,2,0,2,1,Ex,5,Typ,1,Ex,Attchd,2000,Fin,2,499,TA,TA,Y,212,59,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,278000 +1290,60,RL,86,11065,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,2Story,8,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,Stone,788,Gd,TA,PConc,Gd,TA,Mn,Unf,0,Unf,0,1085,1085,GasA,Ex,Y,SBrkr,1120,850,0,1970,0,0,2,1,3,1,Ex,8,Typ,1,Gd,BuiltIn,2006,Fin,3,753,TA,TA,Y,177,74,0,0,0,0,NA,NA,NA,0,10,2006,New,Partial,281000 +1291,80,RL,NA,14112,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,SLvl,5,7,1964,1964,Hip,CompShg,Wd Sdng,HdBoard,BrkFace,86,TA,TA,PConc,TA,TA,Av,GLQ,1014,Unf,0,138,1152,GasA,TA,Y,SBrkr,1152,0,0,1152,1,0,1,0,3,1,TA,6,Typ,1,Gd,Attchd,1964,RFn,2,484,TA,TA,Y,227,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,180500 +1292,160,RM,21,1680,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,5,7,1972,1972,Gable,CompShg,CemntBd,CmentBd,BrkFace,268,TA,TA,CBlock,TA,TA,No,ALQ,231,Unf,0,399,630,GasA,TA,Y,SBrkr,630,672,0,1302,0,0,2,1,3,1,TA,6,Typ,0,NA,Detchd,1972,Unf,1,264,TA,TA,Y,185,0,0,0,0,0,NA,NA,NA,0,2,2009,WD,Normal,119500 +1293,70,RM,60,6600,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2Story,5,4,1892,1965,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,Stone,TA,TA,No,Unf,0,Unf,0,994,994,GasA,TA,N,SBrkr,1378,994,0,2372,0,0,2,0,4,2,TA,11,Min2,0,NA,Attchd,1985,RFn,1,432,TA,TA,Y,0,287,0,0,0,0,NA,NA,NA,0,12,2009,WD,Normal,107500 +1294,60,RL,78,10140,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,2Story,7,5,1976,1976,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,Gd,TA,No,GLQ,194,Unf,0,638,832,GasA,TA,Y,SBrkr,832,832,0,1664,0,0,2,1,4,1,TA,8,Typ,1,TA,Attchd,1976,RFn,2,528,TA,TA,Y,0,28,0,0,259,0,NA,GdWo,NA,0,3,2006,WD,Normal,162900 +1295,20,RL,60,8172,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,7,1955,1990,Hip,CompShg,WdShing,Plywood,None,0,TA,TA,CBlock,TA,TA,No,Rec,167,Unf,0,697,864,GasA,TA,Y,SBrkr,864,0,0,864,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1957,Unf,2,572,TA,TA,N,0,0,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,115000 +1296,20,RL,70,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,5,1968,1968,Hip,CompShg,HdBoard,HdBoard,BrkFace,168,TA,TA,CBlock,TA,TA,Av,BLQ,1016,Unf,0,36,1052,GasA,Gd,Y,SBrkr,1052,0,0,1052,1,0,1,1,3,1,TA,5,Typ,0,NA,Attchd,1968,RFn,1,288,TA,TA,Y,356,0,0,0,0,0,NA,GdWo,NA,0,11,2006,WD,Normal,138500 +1297,20,RL,80,8700,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1963,1963,Hip,CompShg,MetalSd,MetalSd,BrkFace,148,TA,Gd,CBlock,TA,TA,Mn,ALQ,776,Unf,0,344,1120,GasA,Gd,Y,SBrkr,1128,0,0,1128,1,0,2,0,3,1,TA,6,Typ,0,NA,Attchd,1963,RFn,2,525,TA,TA,Y,192,20,123,0,0,0,NA,MnPrv,NA,0,12,2008,WD,Normal,155000 +1298,180,RM,35,3675,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,TwnhsE,SFoyer,6,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,82,TA,TA,PConc,Gd,TA,Gd,GLQ,547,Unf,0,0,547,GasA,Gd,Y,SBrkr,1072,0,0,1072,1,0,2,0,2,1,TA,5,Typ,0,NA,Basment,2005,Fin,2,525,TA,TA,Y,0,44,0,0,0,0,NA,NA,NA,0,6,2006,New,Partial,140000 +1299,60,RL,313,63887,Pave,NA,IR3,Bnk,AllPub,Corner,Gtl,Edwards,Feedr,Norm,1Fam,2Story,10,5,2008,2008,Hip,ClyTile,Stucco,Stucco,Stone,796,Ex,TA,PConc,Ex,TA,Gd,GLQ,5644,Unf,0,466,6110,GasA,Ex,Y,SBrkr,4692,950,0,5642,2,0,2,1,3,1,Ex,12,Typ,3,Gd,Attchd,2008,Fin,2,1418,TA,TA,Y,214,292,0,0,0,480,Gd,NA,NA,0,1,2008,New,Partial,160000 +1300,20,RL,75,7500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1959,1994,Hip,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,LwQ,340,Rec,906,0,1246,GasA,Ex,Y,SBrkr,1246,0,0,1246,1,0,1,1,3,1,Gd,6,Typ,0,NA,Attchd,1959,RFn,1,305,TA,TA,Y,218,0,0,0,0,0,NA,GdPrv,NA,0,5,2010,WD,Normal,154000 +1301,60,RL,NA,10762,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,None,344,Gd,TA,PConc,Gd,TA,No,GLQ,694,Unf,0,284,978,GasA,Ex,Y,SBrkr,1005,978,0,1983,0,0,2,1,3,1,Gd,9,Typ,1,TA,Attchd,1999,Fin,2,490,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,225000 +1302,70,RL,NA,7500,Pave,NA,IR1,Bnk,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,6,7,1942,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,547,Unf,0,224,771,GasA,Fa,Y,SBrkr,753,741,0,1494,0,0,1,0,3,1,Gd,7,Typ,2,Gd,Attchd,1942,Unf,1,213,TA,TA,P,0,0,0,0,224,0,NA,NA,NA,0,11,2009,WD,Normal,177500 +1303,60,RL,92,10120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1994,1994,Hip,CompShg,VinylSd,VinylSd,BrkFace,391,Gd,TA,PConc,Gd,TA,No,GLQ,740,Unf,0,425,1165,GasA,Ex,Y,SBrkr,1203,1323,0,2526,1,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1994,RFn,3,844,TA,TA,Y,309,78,0,0,0,0,NA,NA,NA,0,12,2006,WD,Normal,290000 +1304,20,RL,73,8688,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,228,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1616,1616,GasA,Ex,Y,SBrkr,1616,0,0,1616,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2005,RFn,3,834,TA,TA,Y,208,59,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,232000 +1305,160,RM,32,3363,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,TwnhsE,2Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,Stone,117,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,976,976,GasA,Ex,Y,SBrkr,976,732,0,1708,0,0,2,0,3,1,Gd,7,Maj1,0,NA,Detchd,2004,Unf,2,380,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,4,2006,WD,Normal,130000 +1306,20,RL,108,13173,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NridgHt,Norm,Norm,1Fam,1Story,9,5,2006,2007,Hip,CompShg,VinylSd,VinylSd,Stone,300,Gd,TA,PConc,Ex,TA,No,GLQ,1572,Unf,0,80,1652,GasA,Ex,Y,SBrkr,1652,0,0,1652,1,0,2,0,2,1,Ex,6,Typ,2,Ex,Attchd,2006,Fin,2,840,TA,TA,Y,404,102,0,0,0,0,NA,NA,NA,0,11,2009,WD,Normal,325000 +1307,120,RL,48,6955,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,TwnhsE,1Story,7,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,Stone,94,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1368,1368,GasA,Ex,Y,SBrkr,1368,0,0,1368,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2005,RFn,2,474,TA,TA,Y,132,35,0,0,0,0,NA,NA,NA,0,9,2006,New,Partial,202500 +1308,20,RL,60,8072,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,5,1994,1995,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,PConc,Gd,Gd,No,ALQ,746,Unf,0,244,990,GasA,Ex,Y,SBrkr,990,0,0,990,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,2000,Unf,2,480,TA,TA,Y,0,64,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,138000 +1309,20,RM,100,12000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,7,1948,2005,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,GLQ,144,ALQ,608,172,924,GasA,Ex,Y,SBrkr,1122,0,0,1122,1,0,1,0,2,1,Gd,6,Typ,0,NA,Attchd,1948,Unf,2,528,TA,TA,Y,0,36,0,0,0,0,NA,GdWo,NA,0,5,2008,WD,Normal,147000 +1310,20,RL,NA,7153,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,6,5,1991,1991,Gable,CompShg,HdBoard,HdBoard,BrkFace,88,TA,TA,CBlock,Gd,TA,No,GLQ,1200,Unf,0,78,1278,GasA,Gd,Y,SBrkr,1294,0,0,1294,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1991,RFn,2,496,TA,TA,Y,112,51,0,0,0,0,NA,GdWo,NA,0,6,2008,WD,Normal,179200 +1311,20,RL,100,17500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,PosA,Norm,1Fam,1Story,7,8,1959,2002,Gable,CompShg,BrkFace,HdBoard,None,0,Gd,Gd,PConc,Gd,TA,Av,GLQ,1406,Unf,0,496,1902,GasA,TA,Y,SBrkr,1902,0,0,1902,1,0,2,0,3,1,Ex,7,Typ,2,TA,Attchd,1959,Fin,2,567,TA,TA,Y,0,207,162,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,335000 +1312,20,RL,68,8814,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2005,2007,Gable,CompShg,VinylSd,VinylSd,BrkFace,80,Gd,TA,PConc,Gd,TA,No,GLQ,925,Unf,0,349,1274,GasA,Ex,Y,SBrkr,1274,0,0,1274,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2005,RFn,2,508,TA,TA,Y,264,98,0,0,0,0,NA,NA,NA,0,1,2007,New,Partial,203000 +1313,60,RL,NA,9572,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1990,1990,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,336,Gd,TA,PConc,Ex,TA,No,GLQ,482,Unf,0,971,1453,GasA,Ex,Y,SBrkr,1453,1357,0,2810,0,0,2,1,4,1,Gd,9,Typ,1,Ex,Attchd,1990,RFn,2,750,Gd,Gd,Y,500,0,0,0,0,0,NA,NA,NA,0,6,2007,WD,Normal,302000 +1314,60,RL,108,14774,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,2Story,9,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,165,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1393,1393,GasA,Ex,Y,SBrkr,1422,1177,0,2599,0,0,2,1,4,1,Gd,10,Typ,1,TA,BuiltIn,1999,Fin,3,779,TA,TA,Y,668,30,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,333168 +1315,20,RL,60,8190,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,6,1954,1954,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,732,Unf,0,216,948,GasA,Ex,Y,SBrkr,948,0,0,948,1,0,1,0,3,1,TA,5,Typ,1,TA,Detchd,1956,Unf,1,280,TA,TA,Y,0,36,0,0,0,0,NA,NA,NA,0,10,2007,WD,Normal,119000 +1316,60,RL,85,11075,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,2Story,6,5,1969,1969,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,Fa,TA,Mn,ALQ,500,LwQ,276,176,952,GasA,TA,Y,SBrkr,1092,1020,0,2112,0,0,2,1,4,1,TA,9,Typ,2,Gd,Attchd,1969,Unf,2,576,TA,TA,Y,280,0,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,206900 +1317,20,RL,61,10226,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2008,2008,Gable,CompShg,VinylSd,VinylSd,Stone,270,Gd,TA,PConc,Ex,TA,Gd,Unf,0,Unf,0,1622,1622,GasA,Ex,Y,SBrkr,1630,0,0,1630,1,0,2,0,3,1,Ex,8,Typ,1,Gd,Attchd,2008,RFn,3,860,TA,TA,Y,172,42,0,0,0,0,NA,NA,NA,0,1,2009,WD,Normal,295493 +1318,120,FV,47,4230,Pave,Pave,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,TwnhsE,1Story,7,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,Gd,No,Unf,0,Unf,0,1352,1352,GasA,Ex,Y,SBrkr,1352,0,0,1352,0,0,2,0,2,1,Gd,5,Typ,1,Gd,Attchd,2006,RFn,2,466,TA,TA,Y,0,241,0,0,0,0,NA,NA,NA,0,4,2007,New,Partial,208900 +1319,20,RL,NA,14781,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2001,2002,Hip,CompShg,VinylSd,VinylSd,BrkFace,178,Gd,TA,PConc,Gd,TA,Gd,Unf,0,Unf,0,1753,1753,GasA,Ex,Y,SBrkr,1787,0,0,1787,0,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,2001,RFn,3,748,TA,TA,Y,198,150,0,0,0,0,NA,NA,NA,0,8,2006,WD,Normal,275000 +1320,20,RL,75,10215,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,1954,1954,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,132,TA,TA,PConc,TA,TA,No,ALQ,492,Unf,0,372,864,GasA,Ex,Y,SBrkr,948,0,0,948,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1954,Unf,1,248,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,2,2007,WD,Normal,111000 +1321,20,RL,70,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,3,1957,1957,Hip,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,ALQ,189,Rec,661,628,1478,GasA,Gd,Y,SBrkr,1478,0,0,1478,1,0,1,1,3,1,TA,6,Typ,2,Gd,Attchd,1957,RFn,2,442,TA,TA,Y,114,0,0,0,216,0,NA,NA,NA,0,6,2009,WD,Normal,156500 +1322,20,RL,NA,6627,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,BrkSide,Feedr,Norm,1Fam,1Story,3,6,1949,1950,Hip,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,NA,NA,NA,NA,0,NA,0,0,0,Floor,TA,N,SBrkr,720,0,0,720,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1955,Unf,1,287,TA,Fa,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,72500 +1323,60,RL,107,10186,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,674,Unf,0,76,750,GasA,Ex,Y,SBrkr,1061,862,0,1923,1,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1992,RFn,2,564,TA,TA,Y,240,39,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,190000 +1324,30,RL,50,5330,Pave,NA,Reg,HLS,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,4,7,1940,1950,Hip,CompShg,VinylSd,VinylSd,None,0,Fa,TA,CBlock,TA,TA,No,LwQ,280,Unf,0,140,420,GasA,Gd,Y,SBrkr,708,0,0,708,0,0,1,0,2,1,Fa,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,164,0,0,0,0,0,NA,NA,NA,0,12,2009,WD,Normal,82500 +1325,20,RL,75,9986,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,BrkFace,428,Gd,TA,PConc,Ex,TA,Av,Unf,0,Unf,0,1795,1795,GasA,Ex,Y,SBrkr,1795,0,0,1795,0,0,2,0,2,1,Gd,7,Typ,1,Gd,Attchd,2007,RFn,3,895,TA,TA,Y,0,49,0,0,0,0,NA,NA,NA,0,2,2007,New,Partial,147000 +1326,30,RM,40,3636,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1Story,4,4,1922,1950,Gable,CompShg,AsbShng,AsbShng,None,0,TA,TA,BrkTil,TA,Fa,No,Unf,0,Unf,0,796,796,GasA,Fa,N,SBrkr,796,0,0,796,0,0,1,0,2,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,0,100,0,0,0,NA,MnPrv,NA,0,1,2008,WD,Normal,55000 +1327,30,RH,70,4270,Pave,NA,Reg,Bnk,AllPub,Inside,Mod,Edwards,Norm,Norm,1Fam,1Story,3,6,1931,2006,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,No,Rec,544,Unf,0,0,544,GasA,Ex,Y,SBrkr,774,0,0,774,0,0,1,0,3,1,Gd,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,286,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,79000 +1328,20,RL,60,6600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,9,1982,2008,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,Gd,CBlock,TA,TA,No,ALQ,641,Unf,0,175,816,GasA,Ex,Y,SBrkr,816,0,0,816,0,1,1,0,3,1,Gd,5,Typ,1,Ex,Attchd,1982,Unf,1,264,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,10,2008,WD,Normal,130500 +1329,50,RM,60,10440,Pave,Grvl,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1.5Fin,6,7,1920,1950,Gable,CompShg,BrkFace,Wd Sdng,None,0,Gd,Gd,BrkTil,Gd,TA,No,LwQ,493,Unf,0,1017,1510,GasW,Ex,Y,SBrkr,1584,1208,0,2792,0,0,2,0,5,1,TA,8,Mod,2,TA,Detchd,1920,Unf,2,520,Fa,TA,Y,0,547,0,0,480,0,NA,MnPrv,Shed,1150,6,2008,WD,Normal,256000 +1330,60,RL,63,9084,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,1998,1998,Hip,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,935,935,GasA,Gd,Y,SBrkr,955,677,0,1632,0,0,2,1,3,1,TA,8,Typ,1,TA,Attchd,1998,Fin,2,462,TA,TA,Y,0,28,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,176500 +1331,20,RL,85,10000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,Stone,410,Gd,TA,PConc,Gd,Gd,Av,Unf,0,Unf,0,1588,1588,GasA,Ex,Y,SBrkr,1588,0,0,1588,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2006,RFn,3,825,TA,TA,Y,144,45,0,0,0,0,NA,NA,NA,0,12,2007,WD,Normal,227000 +1332,80,RL,55,10780,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,SLvl,5,5,1976,1976,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,Av,ALQ,483,Unf,0,428,911,GasA,Gd,Y,SBrkr,954,0,0,954,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1976,Unf,2,576,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,132500 +1333,20,RL,67,8877,Pave,NA,Reg,Lvl,AllPub,Inside,Mod,Edwards,Norm,Norm,1Fam,1Story,4,6,1938,1958,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,Mn,ALQ,690,Unf,0,126,816,GasA,Ex,Y,SBrkr,816,0,0,816,1,0,1,0,2,1,TA,3,Typ,1,Gd,Detchd,1958,Unf,1,288,Fa,Fa,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,100000 +1334,50,RM,60,7200,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,5,6,1938,1995,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,803,803,GasA,Ex,Y,SBrkr,803,557,0,1360,0,0,1,1,2,1,Gd,6,Typ,0,NA,Detchd,1951,Unf,1,297,TA,TA,Y,0,65,190,0,0,0,NA,MnPrv,NA,0,7,2006,WD,Normal,125500 +1335,160,RM,24,2368,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,TwnhsE,2Story,5,6,1970,1970,Gable,CompShg,HdBoard,HdBoard,None,312,TA,TA,CBlock,TA,TA,No,LwQ,765,Unf,0,0,765,GasA,TA,Y,SBrkr,765,600,0,1365,0,0,1,1,3,1,TA,7,Min1,0,NA,Attchd,1970,Unf,2,440,TA,TA,Y,0,36,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,125000 +1336,20,RL,80,9650,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,5,1977,1977,Gable,CompShg,Plywood,Plywood,BrkFace,360,TA,TA,CBlock,Gd,TA,No,ALQ,686,Unf,0,664,1350,GasA,TA,Y,SBrkr,1334,0,0,1334,0,1,2,0,2,1,TA,6,Typ,1,TA,Attchd,1977,RFn,2,630,TA,TA,Y,0,16,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,167900 +1337,90,RL,87,9246,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,Feedr,Norm,Duplex,1Story,5,5,1973,1973,Gable,CompShg,Plywood,Plywood,BrkFace,564,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1656,1656,GasA,TA,Y,SBrkr,1656,0,0,1656,0,0,2,0,4,2,TA,8,Typ,0,NA,Detchd,1973,Unf,2,506,TA,TA,Y,0,211,0,0,0,0,NA,NA,NA,0,11,2008,WD,Normal,135000 +1338,30,RM,153,4118,Pave,Grvl,IR1,Bnk,AllPub,Corner,Mod,OldTown,Feedr,Norm,1Fam,1Story,4,4,1941,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,693,693,Grav,Fa,N,FuseA,693,0,0,693,0,0,1,0,2,1,Fa,4,Typ,0,NA,NA,NA,NA,0,0,NA,NA,N,0,20,0,0,0,0,NA,NA,NA,0,3,2006,WD,Normal,52500 +1339,60,RL,95,13450,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,700,Unf,0,216,916,GasA,Ex,Y,SBrkr,920,941,0,1861,1,0,2,1,3,1,Gd,8,Typ,0,NA,BuiltIn,2002,RFn,2,492,TA,TA,Y,146,91,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,200000 +1340,20,RL,120,9560,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,5,7,1972,1972,Hip,CompShg,MetalSd,MetalSd,None,0,TA,Gd,CBlock,TA,TA,Mn,Rec,360,Unf,0,504,864,GasA,Ex,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1972,RFn,1,288,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,128500 +1341,20,RL,70,8294,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1971,1971,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,858,858,GasA,TA,Y,SBrkr,872,0,0,872,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1974,Unf,4,480,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,6,2007,WD,Normal,123000 +1342,20,RL,66,13695,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,RRAe,Norm,1Fam,1Story,6,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,GLQ,814,Unf,0,300,1114,GasA,Ex,Y,SBrkr,1114,0,0,1114,1,0,1,0,3,1,Gd,6,Typ,0,NA,Detchd,2004,Unf,2,576,TA,TA,Y,0,78,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,155000 +1343,60,RL,NA,9375,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,149,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1284,1284,GasA,Ex,Y,SBrkr,1284,885,0,2169,0,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2002,RFn,2,647,TA,TA,Y,192,87,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,228500 +1344,50,RL,57,7558,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,6,6,1928,1950,Gable,CompShg,BrkFace,Stone,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,896,896,GasA,Gd,Y,SBrkr,1172,741,0,1913,0,0,1,1,3,1,TA,9,Typ,1,TA,Detchd,1929,Unf,2,342,Fa,Fa,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2009,WD,Normal,177000 +1345,60,RL,85,11103,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,728,728,GasA,Ex,Y,SBrkr,728,728,0,1456,0,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,2006,Fin,2,440,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,7,2007,New,Partial,155835 +1346,30,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,4,4,1920,1950,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,PConc,TA,TA,No,ALQ,250,Unf,0,710,960,GasA,Gd,Y,FuseA,960,0,0,960,0,0,1,0,2,1,Fa,5,Typ,0,NA,Detchd,1997,Unf,1,308,TA,TA,Y,0,0,168,0,0,0,NA,NA,NA,0,7,2007,WD,Normal,108500 +1347,20,RL,NA,20781,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,NWAmes,PosN,Norm,1Fam,1Story,7,7,1968,2003,Hip,CompShg,BrkFace,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,BLQ,297,Rec,68,1203,1568,GasA,TA,Y,SBrkr,2156,0,0,2156,0,0,2,0,3,1,TA,9,Typ,1,Gd,Attchd,1968,RFn,2,508,Gd,TA,Y,0,80,0,290,0,0,NA,NA,NA,0,6,2006,WD,Normal,262500 +1348,20,RL,93,15306,Pave,NA,IR1,HLS,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,Stone,100,Gd,TA,PConc,Ex,TA,Gd,GLQ,80,Unf,0,1652,1732,GasA,Ex,Y,SBrkr,1776,0,0,1776,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2006,Fin,3,712,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2007,New,Partial,283463 +1349,20,RL,NA,16196,Pave,NA,IR3,Low,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1Story,7,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,Gd,GLQ,1443,Unf,0,39,1482,GasA,Ex,Y,SBrkr,1494,0,0,1494,1,0,2,0,3,1,Gd,5,Typ,1,Fa,Attchd,1998,RFn,2,514,TA,TA,Y,402,25,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,215000 +1350,70,RM,50,5250,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,8,5,1872,1987,Gable,CompShg,MetalSd,MetalSd,None,0,TA,Gd,BrkTil,TA,Fa,No,LwQ,259,Unf,0,425,684,OthW,Fa,N,SBrkr,938,1215,205,2358,0,0,2,0,4,1,TA,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,54,20,0,0,0,NA,NA,NA,0,12,2008,WD,Normal,122000 +1351,90,RL,91,11643,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Artery,Norm,Duplex,2Story,5,5,1969,1969,Gable,CompShg,MetalSd,MetalSd,BrkFace,368,TA,TA,CBlock,TA,TA,No,LwQ,500,Unf,0,748,1248,GasA,TA,Y,SBrkr,1338,1296,0,2634,1,1,2,2,6,2,TA,12,Typ,0,NA,Detchd,1969,Unf,4,968,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2009,WD,Normal,200000 +1352,60,RL,70,9247,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,2Story,6,6,1962,1962,Gable,CompShg,HdBoard,HdBoard,BrkFace,318,TA,TA,CBlock,TA,TA,No,Rec,319,Unf,0,539,858,GasA,Ex,Y,SBrkr,858,858,0,1716,0,0,1,1,4,1,TA,8,Typ,1,Gd,Attchd,1962,Fin,2,490,TA,TA,Y,0,84,0,0,120,0,NA,NA,NA,0,3,2008,WD,Normal,171000 +1353,50,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,6,9,1937,2000,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,698,698,GasA,TA,Y,SBrkr,786,390,0,1176,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1999,Unf,2,624,TA,TA,N,210,0,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,134900 +1354,50,RL,56,14720,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NoRidge,Norm,Norm,1Fam,1.5Fin,8,5,1995,1996,Hip,CompShg,VinylSd,VinylSd,BrkFace,579,Gd,TA,PConc,Gd,TA,Av,GLQ,816,Unf,0,1217,2033,GasA,Ex,Y,SBrkr,2053,1185,0,3238,1,0,2,1,4,1,Gd,9,Typ,1,Ex,Attchd,1996,Fin,3,666,TA,TA,Y,283,86,0,0,0,0,NA,NA,NA,0,3,2010,WD,Normal,410000 +1355,60,RL,NA,10316,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,735,Unf,0,257,992,GasA,Ex,Y,SBrkr,992,873,0,1865,1,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,2000,RFn,3,839,TA,TA,Y,0,184,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,235000 +1356,80,RL,102,10192,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,SLvl,7,6,1968,1992,Gable,CompShg,MetalSd,MetalSd,BrkFace,143,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,570,570,GasA,Gd,Y,SBrkr,1222,698,0,1920,0,0,3,0,4,1,Gd,8,Typ,1,TA,Attchd,1968,RFn,2,487,TA,TA,Y,0,98,0,0,0,0,NA,GdPrv,NA,0,9,2006,WD,Normal,170000 +1357,20,RL,NA,9477,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1966,1966,Gable,CompShg,HdBoard,HdBoard,BrkFace,65,TA,TA,CBlock,TA,TA,No,Rec,340,Unf,0,524,864,GasA,TA,Y,SBrkr,892,0,0,892,0,0,1,0,3,1,TA,5,Typ,0,NA,Attchd,1966,RFn,1,264,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,10,2008,WD,Normal,110000 +1358,20,RL,NA,12537,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1971,2008,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,No,GLQ,734,Unf,0,344,1078,GasA,Ex,Y,SBrkr,1078,0,0,1078,1,0,1,1,3,1,TA,6,Typ,1,Fa,Attchd,1971,Fin,2,500,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,149900 +1359,160,FV,NA,2117,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,Twnhs,2Story,6,5,2000,2000,Gable,CompShg,MetalSd,MetalSd,BrkFace,216,Gd,TA,PConc,Gd,TA,No,GLQ,378,Unf,0,378,756,GasA,Ex,Y,SBrkr,769,804,0,1573,0,0,2,1,3,1,Gd,5,Typ,0,NA,Detchd,2000,Unf,2,440,TA,TA,Y,0,32,0,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,177500 +1360,20,RL,129,16737,Pave,NA,Reg,Lvl,AllPub,FR3,Gtl,NridgHt,Norm,Norm,1Fam,1Story,9,5,2004,2005,Hip,CompShg,VinylSd,VinylSd,BrkFace,66,Gd,TA,PConc,Ex,TA,Av,GLQ,1447,Unf,0,533,1980,GasA,Ex,Y,SBrkr,1980,0,0,1980,1,0,2,0,3,1,Ex,8,Typ,1,Gd,Attchd,2004,Fin,3,770,TA,TA,Y,194,45,0,0,0,0,NA,NA,NA,0,9,2006,WD,Normal,315000 +1361,70,RL,51,9842,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Feedr,Norm,1Fam,2Story,5,6,1921,1998,Gable,CompShg,MetalSd,Wd Sdng,None,0,TA,TA,BrkTil,TA,Fa,No,Unf,0,Unf,0,612,612,GasA,Ex,Y,SBrkr,990,1611,0,2601,0,0,3,1,4,1,TA,8,Typ,0,NA,BuiltIn,1998,RFn,2,621,TA,TA,Y,183,0,301,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,189000 +1362,20,RL,124,16158,Pave,NA,IR1,Low,AllPub,Inside,Mod,StoneBr,Norm,Norm,1Fam,1Story,7,5,2005,2005,Hip,CompShg,VinylSd,VinylSd,Stone,16,Gd,TA,PConc,Ex,TA,Av,ALQ,1274,Unf,0,256,1530,GasA,Ex,Y,SBrkr,1530,0,0,1530,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2005,Fin,2,430,TA,TA,Y,168,36,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,260000 +1363,50,RL,NA,12513,Pave,NA,IR1,Lvl,AllPub,FR2,Gtl,NAmes,Feedr,Norm,1Fam,1.5Fin,4,4,1920,2007,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,BrkTil,TA,Fa,No,Unf,0,Unf,0,715,715,GasA,Gd,Y,SBrkr,1281,457,0,1738,0,0,2,0,4,1,TA,7,Typ,1,Gd,Attchd,1920,Unf,1,368,TA,TA,Y,55,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,104900 +1364,60,RL,73,8499,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,616,616,GasA,Ex,Y,SBrkr,616,796,0,1412,0,0,2,1,3,1,Gd,6,Typ,1,Gd,BuiltIn,2007,Fin,2,432,TA,TA,Y,0,36,0,0,0,0,NA,NA,NA,0,3,2007,New,Partial,156932 +1365,160,FV,30,3180,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,TwnhsE,2Story,7,5,2005,2005,Gable,CompShg,MetalSd,MetalSd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,600,600,GasA,Ex,Y,SBrkr,520,600,80,1200,0,0,2,1,2,1,Gd,4,Typ,0,NA,Detchd,2005,RFn,2,480,TA,TA,Y,0,166,0,0,0,0,NA,NA,NA,0,4,2006,WD,Abnorml,144152 +1366,60,FV,NA,7500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,533,Unf,0,281,814,GasA,Ex,Y,SBrkr,814,860,0,1674,1,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,2000,RFn,2,663,TA,TA,Y,0,96,0,0,0,0,NA,NA,NA,0,1,2010,WD,Normal,216000 +1367,60,RL,68,9179,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,158,Gd,TA,PConc,Gd,TA,No,GLQ,633,Unf,0,240,873,GasA,Ex,Y,SBrkr,882,908,0,1790,1,0,2,1,3,1,Gd,7,Typ,0,NA,Attchd,1999,RFn,2,588,TA,TA,Y,0,88,0,0,0,0,NA,NA,NA,0,6,2008,WD,Abnorml,193000 +1368,160,RM,41,2665,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,5,6,1977,1977,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,PConc,TA,TA,No,ALQ,548,Rec,173,36,757,GasA,Ex,Y,SBrkr,925,550,0,1475,0,0,2,0,4,1,TA,6,Typ,1,TA,Attchd,1977,RFn,1,336,TA,TA,Y,104,26,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,127000 +1369,120,RM,NA,4435,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2003,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,170,Gd,TA,PConc,Gd,TA,Av,GLQ,685,Unf,0,163,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,4,Typ,0,NA,Attchd,2003,Fin,2,420,TA,TA,Y,140,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,144000 +1370,20,RL,48,10635,Pave,NA,IR2,Lvl,AllPub,FR2,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2003,2003,Hip,CompShg,VinylSd,VinylSd,BrkFace,171,Gd,TA,PConc,Gd,TA,Av,BLQ,370,GLQ,972,315,1657,GasA,Ex,Y,SBrkr,1668,0,0,1668,1,0,2,0,3,1,Gd,8,Typ,1,TA,Attchd,2003,Fin,2,502,TA,TA,Y,0,262,0,0,0,0,NA,NA,NA,0,5,2010,WD,Normal,232000 +1371,50,RL,90,5400,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Artery,Norm,1Fam,1.5Fin,4,6,1920,1950,Gable,CompShg,CBlock,CBlock,None,0,Fa,TA,PConc,TA,TA,No,ALQ,315,Rec,105,420,840,GasA,Ex,Y,SBrkr,840,534,0,1374,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1967,Fin,1,338,TA,TA,Y,0,0,198,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,105000 +1372,80,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SLvl,6,6,1955,1996,Hip,CompShg,AsbShng,AsbShng,None,0,TA,TA,CBlock,TA,TA,Av,BLQ,831,Unf,0,161,992,GasA,Gd,Y,SBrkr,1661,0,0,1661,1,0,1,0,3,1,Gd,8,Typ,1,TA,BuiltIn,1955,RFn,1,377,TA,TA,Y,0,28,0,0,178,0,NA,MnPrv,NA,0,10,2008,WD,Normal,165500 +1373,60,RL,75,9750,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,6,1998,1998,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,Av,GLQ,975,Unf,0,133,1108,GasA,Ex,Y,SBrkr,1108,989,0,2097,1,0,2,1,3,1,Gd,8,Typ,1,TA,Detchd,1998,RFn,2,583,TA,TA,Y,253,170,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,274300 +1374,20,RL,NA,11400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,1Story,10,5,2001,2002,Hip,CompShg,VinylSd,VinylSd,BrkFace,705,Ex,TA,PConc,Ex,TA,Gd,GLQ,1282,Unf,0,1351,2633,GasA,Ex,Y,SBrkr,2633,0,0,2633,1,0,2,1,2,1,Ex,8,Typ,2,Gd,Attchd,2001,RFn,3,804,TA,TA,Y,314,140,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,466500 +1375,60,FV,85,10625,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2005,2005,Gable,CompShg,CemntBd,CmentBd,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1026,1026,GasA,Ex,Y,SBrkr,1026,932,0,1958,0,0,2,1,3,1,Gd,9,Typ,1,Gd,Attchd,2005,Fin,3,936,TA,TA,Y,154,210,0,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,250000 +1376,20,RL,89,10991,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,BrkFace,80,Gd,TA,PConc,Gd,TA,Gd,Unf,0,Unf,0,1571,1571,GasA,Ex,Y,SBrkr,1571,0,0,1571,0,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2007,Fin,3,722,TA,TA,Y,100,36,0,0,0,0,NA,NA,NA,0,12,2007,New,Partial,239000 +1377,30,RL,52,6292,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1Story,6,5,1930,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Gd,TA,Mn,Rec,384,Unf,0,384,768,GasA,TA,N,SBrkr,790,0,0,790,0,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1925,Unf,1,160,Fa,TA,Y,0,141,0,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,91000 +1378,50,RL,60,10998,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,5,5,1941,1960,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,LwQ,408,BLQ,420,156,984,GasA,Ex,Y,SBrkr,984,620,0,1604,0,0,2,0,3,1,TA,6,Min2,0,NA,Detchd,1977,Unf,2,660,TA,TA,Y,0,68,0,0,0,0,NA,NA,NA,0,7,2009,WD,Normal,117000 +1379,160,RM,21,1953,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrDale,Norm,Norm,Twnhs,2Story,6,5,1973,1973,Gable,CompShg,HdBoard,HdBoard,BrkFace,408,TA,TA,CBlock,TA,Fa,No,BLQ,309,Unf,0,174,483,GasA,TA,Y,SBrkr,483,504,0,987,0,0,1,1,2,1,TA,5,Typ,0,NA,Detchd,1973,Unf,1,264,TA,TA,Y,72,0,0,0,0,0,NA,NA,NA,0,6,2006,WD,Normal,83000 +1380,80,RL,73,9735,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,SLvl,5,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,384,384,GasA,Gd,Y,NA,754,640,0,1394,0,0,2,1,3,1,Gd,7,Typ,0,NA,BuiltIn,2007,Fin,2,400,TA,TA,Y,100,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,167500 +1381,30,RL,45,8212,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,3,3,1914,1950,Gable,CompShg,Stucco,Stucco,None,0,TA,Fa,BrkTil,TA,Fa,No,Rec,203,Unf,0,661,864,GasA,TA,N,FuseF,864,0,0,864,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1938,Unf,1,200,TA,Fa,Y,0,0,96,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,58500 +1382,20,RL,NA,12925,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,7,1970,1970,Gable,CompShg,BrkFace,Plywood,None,0,TA,TA,CBlock,TA,TA,Mn,BLQ,865,Unf,0,340,1205,GasA,Ex,Y,SBrkr,2117,0,0,2117,0,0,2,1,4,1,TA,7,Typ,2,Gd,Attchd,1970,Fin,2,550,TA,TA,Y,0,42,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,237500 +1383,70,RM,60,7200,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2Story,7,7,1920,1950,Hip,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Fa,TA,No,Unf,0,Unf,0,596,596,GasA,Ex,Y,SBrkr,998,764,0,1762,1,0,1,1,4,1,Gd,8,Typ,0,NA,Detchd,1989,Unf,2,576,TA,TA,N,36,0,221,0,0,0,NA,NA,NA,0,10,2006,WD,Normal,157000 +1384,30,RL,NA,25339,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,7,1918,2007,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,BrkTil,TA,TA,No,Unf,0,Unf,0,816,816,GasA,Ex,Y,SBrkr,1416,0,0,1416,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2007,Unf,2,576,TA,TA,N,0,0,112,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,112000 +1385,50,RL,60,9060,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1.5Fin,6,5,1939,1950,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,BrkTil,TA,TA,Mn,Rec,204,Unf,0,356,560,GasA,TA,Y,SBrkr,698,560,0,1258,0,0,1,0,2,1,TA,6,Typ,0,NA,Detchd,1939,Unf,1,280,TA,TA,P,0,0,0,0,0,0,NA,MnPrv,NA,0,10,2009,WD,Normal,105000 +1386,50,RM,40,5436,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,4,8,1922,2007,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,BrkTil,TA,TA,No,BLQ,735,Unf,0,61,796,GasA,Gd,Y,SBrkr,796,358,0,1154,1,0,1,0,3,1,Gd,7,Typ,0,NA,Detchd,1922,Unf,1,240,TA,TA,N,0,96,0,0,0,0,NA,MnPrv,NA,0,5,2010,WD,Normal,125500 +1387,60,RL,80,16692,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,RRAn,Norm,1Fam,2Story,7,5,1978,1978,Gable,CompShg,Plywood,Plywood,BrkFace,184,TA,TA,CBlock,Gd,TA,No,BLQ,790,LwQ,469,133,1392,GasA,TA,Y,SBrkr,1392,1392,0,2784,1,0,3,1,5,1,Gd,12,Typ,2,TA,Attchd,1978,RFn,2,564,TA,TA,Y,0,112,0,0,440,519,Fa,MnPrv,TenC,2000,7,2006,WD,Normal,250000 +1388,50,RM,60,8520,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1.5Fin,6,7,1916,1950,Gable,CompShg,Stucco,Stucco,None,0,TA,Gd,BrkTil,TA,TA,No,Rec,168,LwQ,546,0,714,GasW,TA,N,SBrkr,1664,862,0,2526,0,0,2,0,5,1,Gd,10,Typ,1,Gd,Detchd,1916,Unf,1,216,TA,TA,Y,88,15,0,0,0,0,NA,GdWo,NA,0,8,2007,CWD,Family,136000 +1389,20,RL,42,14892,Pave,NA,IR1,HLS,AllPub,CulDSac,Gtl,Gilbert,Norm,Norm,1Fam,1Story,9,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,Stone,160,Ex,TA,PConc,Ex,TA,Gd,GLQ,1320,Unf,0,426,1746,GasA,Ex,Y,SBrkr,1746,0,0,1746,1,0,2,0,3,1,Ex,7,Typ,2,Gd,Attchd,2006,Fin,3,758,TA,TA,Y,201,39,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,377500 +1390,50,RM,60,6000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,6,6,1941,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,Gd,BrkTil,TA,Gd,No,ALQ,375,Unf,0,360,735,GasA,Ex,Y,SBrkr,869,349,0,1218,0,1,1,0,3,1,TA,6,Typ,1,Gd,Detchd,2003,Unf,2,440,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,3,2007,WD,Normal,131000 +1391,20,RL,70,9100,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,244,Gd,TA,PConc,Gd,TA,Av,GLQ,1400,Unf,0,125,1525,GasA,Ex,Y,SBrkr,1525,0,0,1525,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2000,RFn,2,541,TA,TA,Y,219,36,0,0,0,0,NA,NA,NA,0,9,2006,WD,Normal,235000 +1392,90,RL,65,8944,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,Duplex,1Story,5,5,1967,1967,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1584,1584,GasA,TA,Y,SBrkr,1584,0,0,1584,0,0,2,0,4,2,TA,8,Mod,0,NA,Detchd,1967,Unf,3,792,TA,TA,Y,0,152,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,124000 +1393,85,RL,68,7838,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,SFoyer,5,5,1967,1967,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,Av,ALQ,769,Unf,0,95,864,GasA,TA,Y,SBrkr,900,0,0,900,1,0,1,0,3,1,TA,6,Typ,1,Po,Attchd,1967,RFn,1,288,TA,TA,Y,175,144,0,0,0,0,NA,MnWw,NA,0,12,2006,WD,Normal,123000 +1394,190,RM,60,10800,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,1.5Fin,6,7,1905,2000,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Fa,TA,No,Unf,0,Unf,0,482,482,GasA,Ex,N,SBrkr,1221,691,0,1912,0,0,2,0,3,2,TA,7,Typ,1,TA,Detchd,2003,Unf,2,672,Gd,TA,Y,0,25,212,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,163000 +1395,120,RL,53,4045,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2006,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,45,Gd,TA,PConc,Gd,TA,Av,GLQ,1070,Unf,0,286,1356,GasA,Ex,Y,SBrkr,1500,0,0,1500,1,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2006,Fin,3,648,TA,TA,Y,161,20,0,0,0,0,NA,NA,NA,0,10,2006,New,Partial,246578 +1396,60,RL,88,12665,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,2Story,8,5,2005,2006,Hip,CompShg,VinylSd,VinylSd,BrkFace,245,Gd,TA,PConc,Gd,Gd,Gd,Unf,0,Unf,0,1094,1094,GasA,Ex,Y,SBrkr,1133,1349,0,2482,0,0,2,1,4,1,Gd,9,Typ,1,Gd,BuiltIn,2005,Fin,3,642,TA,TA,Y,144,39,0,0,0,0,NA,NA,NA,0,2,2007,WD,Normal,281213 +1397,20,RL,NA,57200,Pave,NA,IR1,Bnk,AllPub,Inside,Sev,Timber,Norm,Norm,1Fam,1Story,5,5,1948,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,Av,BLQ,353,Rec,334,60,747,GasA,TA,Y,SBrkr,1687,0,0,1687,1,0,1,0,3,1,TA,7,Min1,2,TA,Detchd,1966,Unf,2,572,TA,TA,N,0,0,50,0,0,0,NA,NA,NA,0,6,2010,WD,Normal,160000 +1398,70,RM,51,6120,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,2Story,5,8,1920,2004,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,TA,TA,Mn,Unf,0,Unf,0,939,939,GasA,Ex,Y,SBrkr,939,574,0,1513,0,0,1,1,4,1,TA,8,Typ,0,NA,Detchd,1933,Unf,1,180,Fa,Fa,N,24,0,150,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,137500 +1399,50,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1.5Fin,5,4,1950,1982,Gable,CompShg,VinylSd,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,No,Rec,180,BLQ,352,676,1208,GasA,Gd,Y,FuseA,1136,768,0,1904,1,0,1,1,3,1,TA,7,Min1,0,NA,Attchd,1950,Unf,1,240,TA,TA,Y,0,0,168,0,0,0,NA,GdPrv,NA,0,5,2009,WD,Normal,138000 +1400,50,RL,51,6171,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,6,6,1925,1990,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,BrkTil,TA,TA,No,BLQ,264,Unf,0,712,976,GasA,Ex,Y,SBrkr,1160,448,0,1608,0,0,2,1,3,1,Gd,7,Typ,1,Gd,Detchd,1925,Unf,1,216,Fa,TA,Y,147,16,0,0,0,0,NA,MnPrv,NA,0,10,2009,WD,Normal,137450 +1401,50,RM,50,6000,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,6,7,1929,1950,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,862,862,GasA,TA,Y,SBrkr,950,208,0,1158,0,0,1,0,3,1,TA,5,Typ,1,Gd,BuiltIn,1929,RFn,1,208,TA,TA,Y,0,0,112,0,0,0,NA,NA,NA,0,7,2008,WD,Normal,120000 +1402,60,RL,62,7415,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,TA,TA,No,GLQ,759,Unf,0,80,839,GasA,Ex,Y,SBrkr,864,729,0,1593,1,0,2,1,3,1,TA,8,Typ,1,TA,Attchd,2004,Fin,2,398,TA,TA,Y,100,75,0,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,193000 +1403,20,RL,64,6762,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,Gd,Av,Unf,0,Unf,0,1286,1286,GasA,Ex,Y,SBrkr,1294,0,0,1294,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2006,RFn,2,662,TA,TA,Y,168,55,0,0,0,0,NA,NA,NA,0,7,2006,New,Partial,193879 +1404,20,RL,49,15256,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Somerst,RRAn,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,Stone,84,Gd,TA,PConc,Gd,TA,Gd,GLQ,929,Unf,0,556,1485,GasA,Ex,Y,SBrkr,1464,0,0,1464,1,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2007,Unf,3,754,TA,TA,Y,168,160,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,282922 +1405,50,RL,60,10410,Pave,Grvl,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Artery,Norm,1Fam,1.5Fin,3,4,1915,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,672,672,GasA,TA,Y,SBrkr,694,520,0,1214,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1998,Unf,3,936,TA,TA,Y,216,0,160,0,0,0,NA,MnPrv,NA,0,1,2006,WD,Family,105000 +1406,120,RM,44,3842,Pave,NA,IR1,HLS,AllPub,Inside,Mod,Crawfor,Norm,Norm,TwnhsE,1Story,8,5,2004,2005,Hip,CompShg,CemntBd,CmentBd,Stone,174,Gd,TA,PConc,Ex,TA,Gd,GLQ,1373,Unf,0,221,1594,GasA,Ex,Y,SBrkr,1646,0,0,1646,1,1,2,0,2,1,Gd,5,Typ,1,Gd,Attchd,2004,Fin,2,482,TA,TA,Y,128,53,0,0,155,0,NA,NA,NA,0,1,2008,WD,Normal,275000 +1407,85,RL,70,8445,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,SFoyer,5,7,1972,2007,Gable,CompShg,HdBoard,Wd Shng,None,0,TA,TA,CBlock,Gd,TA,Av,GLQ,656,Unf,0,112,768,GasA,TA,Y,SBrkr,768,0,0,768,1,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1988,Unf,2,396,TA,TA,Y,58,0,0,0,0,0,NA,MnPrv,NA,0,3,2009,WD,Normal,133000 +1408,20,RL,NA,8780,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,5,1985,1985,Gable,CompShg,HdBoard,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,625,Unf,0,208,833,GasA,Ex,Y,SBrkr,833,0,0,833,1,0,1,0,3,1,TA,5,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,3,2009,WD,Normal,112000 +1409,70,RM,60,7740,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,4,7,1910,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,Fa,TA,No,Unf,0,Unf,0,622,622,GasA,Gd,Y,SBrkr,741,622,0,1363,0,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1966,Unf,2,528,TA,TA,Y,0,0,0,0,168,0,NA,NA,NA,0,6,2010,WD,Normal,125500 +1410,60,RL,46,20544,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,NWAmes,Norm,Norm,1Fam,2Story,7,6,1986,1991,Gable,CompShg,Plywood,Plywood,BrkFace,123,TA,Gd,CBlock,Gd,TA,No,Unf,0,Unf,0,791,791,GasA,Gd,Y,SBrkr,1236,857,0,2093,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1986,Fin,2,542,TA,TA,Y,364,63,0,0,0,0,NA,MnPrv,NA,0,11,2008,WD,Normal,215000 +1411,60,RL,79,12420,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2001,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,666,Unf,0,278,944,GasA,Ex,Y,SBrkr,944,896,0,1840,1,0,2,1,3,1,Gd,6,Typ,0,NA,Attchd,2001,RFn,2,622,TA,TA,Y,0,45,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,230000 +1412,50,RL,80,9600,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1.5Fin,6,8,1950,2005,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,TA,No,BLQ,120,Unf,0,736,856,GasA,Ex,Y,SBrkr,1112,556,0,1668,0,0,1,1,3,1,TA,6,Min2,0,NA,Attchd,1950,Unf,1,271,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,9,2009,WD,Normal,140000 +1413,90,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,Duplex,1Story,4,5,1949,1950,Gable,CompShg,BrkFace,Stone,None,0,TA,TA,Slab,NA,NA,NA,NA,0,NA,0,0,0,Wall,Fa,N,FuseF,1040,0,0,1040,0,0,2,0,2,2,TA,6,Typ,0,NA,Detchd,1956,Unf,2,420,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,6,2009,WD,Normal,90000 +1414,20,RL,88,10994,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,SawyerW,Norm,Norm,1Fam,1Story,8,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,Stone,366,Gd,TA,PConc,Gd,Gd,No,GLQ,976,Unf,0,868,1844,GasA,Ex,Y,SBrkr,1844,0,0,1844,1,0,2,0,2,1,Gd,7,Typ,1,Gd,Attchd,2005,Fin,2,620,TA,TA,Y,165,44,0,0,0,0,NA,NA,NA,0,9,2009,COD,Abnorml,257000 +1415,50,RL,64,13053,Pave,Pave,Reg,Bnk,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,6,7,1923,2000,Gambrel,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,833,833,GasA,Gd,Y,SBrkr,1053,795,0,1848,0,0,1,1,4,1,Gd,8,Typ,1,Gd,Detchd,1922,Unf,2,370,TA,TA,N,0,0,0,0,220,0,NA,NA,NA,0,6,2008,WD,Normal,207000 +1416,120,RL,51,3635,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Blmngtn,Norm,Norm,TwnhsE,1Story,7,5,2007,2007,Hip,CompShg,VinylSd,VinylSd,BrkFace,130,Gd,TA,PConc,Gd,TA,No,ALQ,988,Unf,0,398,1386,GasA,Ex,Y,SBrkr,1569,0,0,1569,0,1,2,0,1,1,Gd,7,Typ,1,TA,Attchd,2007,RFn,3,660,TA,TA,Y,143,20,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,175900 +1417,190,RM,60,11340,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,4,6,1885,1950,Gable,CompShg,VinylSd,AsbShng,None,0,TA,TA,PConc,TA,TA,No,Unf,0,Unf,0,777,777,GasA,Gd,Y,SBrkr,1246,1044,0,2290,0,0,2,0,4,2,TA,11,Typ,0,NA,Detchd,1971,Unf,2,560,TA,TA,N,0,0,114,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,122500 +1418,60,RL,NA,16545,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,731,Gd,TA,PConc,Gd,TA,Mn,GLQ,781,Unf,0,503,1284,GasA,Ex,Y,SBrkr,1310,1140,0,2450,1,0,2,1,3,1,Gd,7,Typ,1,TA,Attchd,1998,Fin,3,1069,TA,TA,Y,0,126,0,0,0,0,NA,NA,NA,0,5,2009,WD,Normal,340000 +1419,20,RL,71,9204,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1963,1963,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,BLQ,25,Rec,872,247,1144,GasA,TA,Y,SBrkr,1144,0,0,1144,1,0,1,1,3,1,TA,6,Typ,0,NA,Detchd,1962,Unf,1,336,TA,TA,Y,0,88,0,0,0,0,NA,NA,NA,0,8,2008,COD,Normal,124000 +1420,20,RL,NA,16381,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1Story,6,5,1969,1969,Gable,CompShg,Plywood,Plywood,BrkFace,312,Gd,Gd,CBlock,TA,TA,Av,Rec,1110,Unf,0,734,1844,GasA,Gd,Y,SBrkr,1844,0,0,1844,1,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,1969,RFn,2,540,TA,TA,Y,0,73,216,0,0,0,NA,NA,NA,0,12,2006,WD,Normal,223000 +1421,60,RL,90,11700,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NWAmes,Norm,Norm,1Fam,2Story,6,6,1968,1968,Gable,CompShg,HdBoard,HdBoard,BrkFace,420,TA,TA,CBlock,TA,TA,No,ALQ,404,Unf,0,304,708,GasA,Gd,Y,SBrkr,708,708,0,1416,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1968,RFn,2,776,TA,TA,Y,0,169,0,0,119,0,NA,NA,NA,0,5,2006,WD,Normal,179900 +1422,120,RL,53,4043,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NPkVill,Norm,Norm,TwnhsE,1Story,6,5,1977,1977,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,ALQ,360,Unf,0,709,1069,GasA,TA,Y,SBrkr,1069,0,0,1069,0,0,2,0,2,1,TA,4,Typ,1,Fa,Attchd,1977,RFn,2,440,TA,TA,Y,0,55,0,0,165,0,NA,NA,NA,0,7,2010,WD,Normal,127500 +1423,120,RM,37,4435,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,170,Gd,TA,PConc,Gd,TA,Av,GLQ,686,Unf,0,162,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,3,Typ,0,NA,Attchd,2003,Fin,2,420,TA,TA,Y,140,0,0,0,0,0,NA,NA,NA,0,3,2008,WD,Normal,136500 +1424,80,RL,NA,19690,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Edwards,Norm,Norm,1Fam,SLvl,6,7,1966,1966,Flat,Tar&Grv,Plywood,Plywood,None,0,Gd,Gd,CBlock,Gd,TA,Av,Unf,0,Unf,0,697,697,GasA,TA,Y,SBrkr,1575,626,0,2201,0,0,2,0,4,1,Gd,8,Typ,1,Gd,Attchd,1966,Unf,2,432,Gd,Gd,Y,586,236,0,0,0,738,Gd,GdPrv,NA,0,8,2006,WD,Alloca,274970 +1425,20,RL,NA,9503,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1958,1983,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,ALQ,457,Rec,374,193,1024,GasA,TA,Y,SBrkr,1344,0,0,1344,1,0,1,0,2,1,TA,6,Min1,1,TA,Detchd,1970,Unf,1,484,TA,TA,Y,316,28,0,0,0,0,NA,GdWo,NA,0,6,2007,WD,Normal,144000 +1426,20,RL,80,10721,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1959,1959,Hip,CompShg,HdBoard,HdBoard,Stone,243,Gd,TA,CBlock,TA,TA,No,Unf,0,Unf,0,1252,1252,GasA,Ex,Y,SBrkr,1252,0,0,1252,0,0,1,0,3,1,Gd,7,Typ,0,NA,Detchd,1960,Unf,2,528,TA,TA,Y,0,39,0,0,0,0,NA,NA,NA,0,10,2008,WD,Normal,142000 +1427,60,RL,81,10944,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NoRidge,Norm,Norm,1Fam,2Story,7,5,1994,1994,Gable,CompShg,VinylSd,VinylSd,BrkFace,448,Gd,TA,PConc,Gd,TA,No,GLQ,1000,Unf,0,223,1223,GasA,Ex,Y,SBrkr,1223,904,0,2127,1,0,2,1,3,1,Gd,5,Typ,2,TA,Attchd,1994,RFn,2,525,TA,TA,Y,171,132,0,0,0,0,NA,NA,NA,0,8,2008,WD,Normal,271000 +1428,50,RL,60,10930,Pave,Grvl,Reg,Bnk,AllPub,Inside,Gtl,NAmes,Artery,Norm,1Fam,1.5Fin,5,6,1945,1950,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,580,Unf,0,333,913,GasA,TA,Y,FuseA,1048,510,0,1558,1,0,1,1,3,1,TA,6,Typ,1,TA,Attchd,1962,Unf,1,288,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,4,2008,WD,Normal,140000 +1429,30,RM,60,7200,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,7,1940,1992,Gable,CompShg,MetalSd,MetalSd,Stone,294,TA,Gd,CBlock,TA,TA,No,BLQ,510,Unf,0,278,788,GasA,TA,Y,SBrkr,804,0,0,804,1,0,1,0,2,1,Gd,4,Typ,2,Gd,Attchd,1940,Unf,1,240,TA,TA,Y,0,0,154,0,0,0,NA,MnPrv,NA,0,2,2010,WD,Abnorml,119000 +1430,20,RL,NA,12546,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,7,1981,1981,Gable,CompShg,MetalSd,MetalSd,BrkFace,310,Gd,Gd,CBlock,Gd,TA,No,BLQ,678,Unf,0,762,1440,GasA,Ex,Y,SBrkr,1440,0,0,1440,0,0,2,0,3,1,Gd,7,Typ,1,TA,Attchd,1981,Fin,2,467,TA,TA,Y,0,0,99,0,0,0,NA,NA,NA,0,4,2007,WD,Normal,182900 +1431,60,RL,60,21930,Pave,NA,IR3,Lvl,AllPub,Inside,Gtl,Gilbert,RRAn,Norm,1Fam,2Story,5,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,Gd,Av,Unf,0,Unf,0,732,732,GasA,Ex,Y,SBrkr,734,1104,0,1838,0,0,2,1,4,1,TA,7,Typ,1,Gd,BuiltIn,2005,Fin,2,372,TA,TA,Y,100,40,0,0,0,0,NA,NA,NA,0,7,2006,WD,Normal,192140 +1432,120,RL,NA,4928,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NPkVill,Norm,Norm,TwnhsE,1Story,6,6,1976,1976,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,Gd,TA,No,LwQ,958,Unf,0,0,958,GasA,TA,Y,SBrkr,958,0,0,958,0,0,2,0,2,1,TA,5,Typ,0,NA,Attchd,1976,RFn,2,440,TA,TA,Y,0,60,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,143750 +1433,30,RL,60,10800,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,4,6,1927,2007,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,656,656,GasA,TA,Y,SBrkr,968,0,0,968,0,0,2,0,4,1,TA,5,Typ,0,NA,Detchd,1928,Unf,1,216,Fa,Fa,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,64500 +1434,60,RL,93,10261,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,318,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,936,936,GasA,Ex,Y,SBrkr,962,830,0,1792,1,0,2,1,3,1,TA,8,Typ,1,TA,Attchd,2000,Fin,2,451,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,186500 +1435,20,RL,80,17400,Pave,NA,Reg,Low,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,1Story,5,5,1977,1977,Gable,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,ALQ,936,Unf,0,190,1126,GasA,Fa,Y,SBrkr,1126,0,0,1126,1,0,2,0,3,1,TA,5,Typ,1,Gd,Attchd,1977,RFn,2,484,TA,TA,P,295,41,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,160000 +1436,20,RL,80,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,9,1962,2005,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,237,Gd,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,1319,1319,GasA,TA,Y,SBrkr,1537,0,0,1537,1,0,1,1,3,1,Gd,7,Typ,1,Gd,Attchd,1962,RFn,2,462,TA,TA,Y,0,36,0,0,0,0,NA,GdPrv,NA,0,7,2008,COD,Abnorml,174000 +1437,20,RL,60,9000,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1971,1971,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,PConc,TA,TA,No,ALQ,616,Unf,0,248,864,GasA,TA,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1974,Unf,2,528,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,5,2007,WD,Normal,120500 +1438,20,RL,96,12444,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2008,2008,Hip,CompShg,VinylSd,VinylSd,Stone,426,Ex,TA,PConc,Ex,TA,Av,GLQ,1336,Unf,0,596,1932,GasA,Ex,Y,SBrkr,1932,0,0,1932,1,0,2,0,2,1,Ex,7,Typ,1,Gd,Attchd,2008,Fin,3,774,TA,TA,Y,0,66,0,304,0,0,NA,NA,NA,0,11,2008,New,Partial,394617 +1439,20,RM,90,7407,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1Story,6,7,1957,1996,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,GLQ,600,Unf,0,312,912,GasA,TA,Y,FuseA,1236,0,0,1236,1,0,1,0,2,1,TA,6,Typ,0,NA,Attchd,1957,Unf,2,923,TA,TA,Y,0,158,158,0,0,0,NA,MnPrv,NA,0,4,2010,WD,Normal,149700 +1440,60,RL,80,11584,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,SLvl,7,6,1979,1979,Hip,CompShg,HdBoard,HdBoard,BrkFace,96,TA,TA,CBlock,TA,TA,No,GLQ,315,Rec,110,114,539,GasA,TA,Y,SBrkr,1040,685,0,1725,0,0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1979,RFn,2,550,TA,TA,Y,0,88,216,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,197000 +1441,70,RL,79,11526,Pave,NA,IR1,Bnk,AllPub,Inside,Mod,Crawfor,Norm,Norm,1Fam,2.5Fin,6,7,1922,1994,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,BrkTil,Ex,TA,No,Unf,0,Unf,0,588,588,GasA,Fa,Y,SBrkr,1423,748,384,2555,0,0,2,0,3,1,TA,11,Min1,1,Gd,Detchd,1993,Fin,2,672,TA,TA,Y,431,0,0,0,0,0,NA,NA,NA,0,9,2008,WD,Normal,191000 +1442,120,RM,NA,4426,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,TwnhsE,1Story,6,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,147,Gd,TA,PConc,Gd,TA,Av,GLQ,697,Unf,0,151,848,GasA,Ex,Y,SBrkr,848,0,0,848,1,0,1,0,1,1,Gd,3,Typ,1,TA,Attchd,2004,RFn,2,420,TA,TA,Y,149,0,0,0,0,0,NA,NA,NA,0,5,2008,WD,Normal,149300 +1443,60,FV,85,11003,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,2Story,10,5,2008,2008,Gable,CompShg,VinylSd,VinylSd,Stone,160,Ex,TA,PConc,Ex,TA,Av,GLQ,765,Unf,0,252,1017,GasA,Ex,Y,SBrkr,1026,981,0,2007,1,0,2,1,3,1,Ex,10,Typ,1,Ex,Attchd,2008,Fin,3,812,TA,TA,Y,168,52,0,0,0,0,NA,NA,NA,0,4,2009,WD,Normal,310000 +1444,30,RL,NA,8854,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Unf,6,6,1916,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,Unf,0,Unf,0,952,952,Grav,Fa,N,FuseF,952,0,0,952,0,0,1,0,2,1,Fa,4,Typ,1,Gd,Detchd,1916,Unf,1,192,Fa,Po,P,0,98,0,0,40,0,NA,NA,NA,0,5,2009,WD,Normal,121000 +1445,20,RL,63,8500,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,106,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1422,1422,GasA,Ex,Y,SBrkr,1422,0,0,1422,0,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2004,RFn,2,626,TA,TA,Y,192,60,0,0,0,0,NA,NA,NA,0,11,2007,WD,Normal,179600 +1446,85,RL,70,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,SFoyer,6,5,1966,1966,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,TA,Gd,LwQ,187,Rec,627,0,814,GasA,Gd,Y,SBrkr,913,0,0,913,1,0,1,0,3,1,TA,6,Typ,0,NA,Detchd,1990,Unf,1,240,TA,TA,Y,0,0,252,0,0,0,NA,NA,NA,0,5,2007,WD,Normal,129000 +1447,20,RL,NA,26142,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,7,1962,1962,Gable,CompShg,HdBoard,HdBoard,BrkFace,189,TA,TA,CBlock,TA,TA,No,Rec,593,Unf,0,595,1188,GasA,TA,Y,SBrkr,1188,0,0,1188,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1962,Unf,1,312,TA,TA,P,261,39,0,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,157900 +1448,60,RL,80,10000,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,8,5,1995,1996,Gable,CompShg,VinylSd,VinylSd,BrkFace,438,Gd,TA,PConc,Gd,TA,No,GLQ,1079,Unf,0,141,1220,GasA,Ex,Y,SBrkr,1220,870,0,2090,1,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1995,RFn,2,556,TA,TA,Y,0,65,0,0,0,0,NA,NA,NA,0,12,2007,WD,Normal,240000 +1449,50,RL,70,11767,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,2Story,4,7,1910,2000,Gable,CompShg,MetalSd,HdBoard,None,0,TA,TA,CBlock,Fa,TA,No,Unf,0,Unf,0,560,560,GasA,Gd,N,SBrkr,796,550,0,1346,0,0,1,1,2,1,TA,6,Min2,0,NA,Detchd,1950,Unf,1,384,Fa,TA,Y,168,24,0,0,0,0,NA,GdWo,NA,0,5,2007,WD,Normal,112000 +1450,180,RM,21,1533,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,Twnhs,SFoyer,5,7,1970,1970,Gable,CompShg,CemntBd,CmentBd,None,0,TA,TA,CBlock,Gd,TA,Av,GLQ,553,Unf,0,77,630,GasA,Ex,Y,SBrkr,630,0,0,630,1,0,1,0,1,1,Ex,3,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,NA,NA,0,8,2006,WD,Abnorml,92000 +1451,90,RL,60,9000,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,Duplex,2Story,5,5,1974,1974,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,896,896,GasA,TA,Y,SBrkr,896,896,0,1792,0,0,2,2,4,2,TA,8,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,32,45,0,0,0,0,NA,NA,NA,0,9,2009,WD,Normal,136000 +1452,20,RL,78,9262,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2008,2009,Gable,CompShg,CemntBd,CmentBd,Stone,194,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1573,1573,GasA,Ex,Y,SBrkr,1578,0,0,1578,0,0,2,0,3,1,Ex,7,Typ,1,Gd,Attchd,2008,Fin,3,840,TA,TA,Y,0,36,0,0,0,0,NA,NA,NA,0,5,2009,New,Partial,287090 +1453,180,RM,35,3675,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,TwnhsE,SLvl,5,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,80,TA,TA,PConc,Gd,TA,Gd,GLQ,547,Unf,0,0,547,GasA,Gd,Y,SBrkr,1072,0,0,1072,1,0,1,0,2,1,TA,5,Typ,0,NA,Basment,2005,Fin,2,525,TA,TA,Y,0,28,0,0,0,0,NA,NA,NA,0,5,2006,WD,Normal,145000 +1454,20,RL,90,17217,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,5,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1140,1140,GasA,Ex,Y,SBrkr,1140,0,0,1140,0,0,1,0,3,1,TA,6,Typ,0,NA,NA,NA,NA,0,0,NA,NA,Y,36,56,0,0,0,0,NA,NA,NA,0,7,2006,WD,Abnorml,84500 +1455,20,FV,62,7500,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,410,Unf,0,811,1221,GasA,Ex,Y,SBrkr,1221,0,0,1221,1,0,2,0,2,1,Gd,6,Typ,0,NA,Attchd,2004,RFn,2,400,TA,TA,Y,0,113,0,0,0,0,NA,NA,NA,0,10,2009,WD,Normal,185000 +1456,60,RL,62,7917,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1999,2000,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,953,953,GasA,Ex,Y,SBrkr,953,694,0,1647,0,0,2,1,3,1,TA,7,Typ,1,TA,Attchd,1999,RFn,2,460,TA,TA,Y,0,40,0,0,0,0,NA,NA,NA,0,8,2007,WD,Normal,175000 +1457,20,RL,85,13175,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,6,1978,1988,Gable,CompShg,Plywood,Plywood,Stone,119,TA,TA,CBlock,Gd,TA,No,ALQ,790,Rec,163,589,1542,GasA,TA,Y,SBrkr,2073,0,0,2073,1,0,2,0,3,1,TA,7,Min1,2,TA,Attchd,1978,Unf,2,500,TA,TA,Y,349,0,0,0,0,0,NA,MnPrv,NA,0,2,2010,WD,Normal,210000 +1458,70,RL,66,9042,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,9,1941,2006,Gable,CompShg,CemntBd,CmentBd,None,0,Ex,Gd,Stone,TA,Gd,No,GLQ,275,Unf,0,877,1152,GasA,Ex,Y,SBrkr,1188,1152,0,2340,0,0,2,0,4,1,Gd,9,Typ,2,Gd,Attchd,1941,RFn,1,252,TA,TA,Y,0,60,0,0,0,0,NA,GdPrv,Shed,2500,5,2010,WD,Normal,266500 +1459,20,RL,68,9717,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1950,1996,Hip,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,Mn,GLQ,49,Rec,1029,0,1078,GasA,Gd,Y,FuseA,1078,0,0,1078,1,0,1,0,2,1,Gd,5,Typ,0,NA,Attchd,1950,Unf,1,240,TA,TA,Y,366,0,112,0,0,0,NA,NA,NA,0,4,2010,WD,Normal,142125 +1460,20,RL,75,9937,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,5,6,1965,1965,Gable,CompShg,HdBoard,HdBoard,None,0,Gd,TA,CBlock,TA,TA,No,BLQ,830,LwQ,290,136,1256,GasA,Gd,Y,SBrkr,1256,0,0,1256,1,0,1,1,3,1,TA,6,Typ,0,NA,Attchd,1965,Fin,1,276,TA,TA,Y,736,68,0,0,0,0,NA,NA,NA,0,6,2008,WD,Normal,147500 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml_model b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml_model deleted file mode 160000 index 141f2778a..000000000 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/ml_model +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 141f2778a863a41d77628688cade4a05d5ca48a1 diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml index 24f1adaef..dabce6c88 100644 --- a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml +++ b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/playbook.yaml @@ -24,7 +24,7 @@ # ── Paths ───────────────────────────────────────────────── project_root: "{{ playbook_dir }}" - model_pkl: "{{ project_root }}/ml_model/house_price_model.pkl" + model_pkl: "{{ project_root }}/ml/house_price_model.pkl" # ── Health check ────────────────────────────────────────── api_base: "http://localhost:{{ host_port }}" diff --git a/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_by_neighborhood.png b/class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment/results/price_by_neighborhood.png index 4cc2a85c7ab98f436dce590ed10cd451d76ab61d..45c1135ed7dad317826c7a8ec9efdc67ba53c71d 100644 GIT binary patch literal 27162 zcmd442UM2nmL-hYQh_on2&jOHk_<=&%LE@NNJgRvsANTQw8|372!5z25)4QVf+Wcn zg$M?6mL#HNkSqxN`%t%gdQDHS`Fp0;A~=V&T`zu;k>n-gSEx!%}(a_ z7cJ~;_X>*(iwSN%=iqSuqLhe;&A)zuu$}!`k#C`)C-EUaomV`0k%eWI1N}3v`3#Q> z3yXy>^D7=YEX~=r<-u3>&?cU@wjBIFeVB4&`|9uB|KR(( zcOUwVq~D+3;IjokpM_oTB*kNWL-gy-KP(`1u{>wC)@CK4VTIX7M?EW z@&55`LyVhDu#nZ!rZX2VT;P!N$hfvfU@JR2`$@rdLMM~?a^98<-r|HKA|2^0 zU=hIx*tU#~P21Yq+AZ+6*$wkW{a@jWv%DYdF<1W(EUESMDC_%&;vzY3-39m9A9%j_ zi~Fprt84VR@+aMuk$x({61SWAb=U6HR2;CM32647^~@Qq(Jy%@p17E8Q&d}lZ>(<8X^TB}d#B%w8xPj#+jYIU z%{$ro)UP6I&5xpt2Srbf+nD_`Zn@3_MnZ;}npQP&eKJr z6+Aq$BaT0ecyi?WP8k`QoVTv$@D>|~9KE`$1UlZ_zC3t+O;=TPfOeejW7pv?(eC5@ zU50Ekyb_BzTn<*pozy72wl=_ILqoFpb-t;dlV9IIjJ&pX*UpPy-Z`~Re0l$nWAB+8 zcsYI^9v<@#!6)zT(y96JeK_R9_Wk>{?eeD6e!G0Rqpj`n!7IzUMr)0Ajvc#EIm0m2 z$ijv>_BCkz^2;wngM%jy9Jmyt9Ul{|5veqpS6Ru|mEPj@Li+v03=hkGyXLriLi#(M z`#;({JL~*9e^J$ov#KFdjwKd`-ZS4Lad{#IPbR;qt<}MTSG>N-8K74f{prh>2yR(7 zMP^g#<(kTurNNR<``=Q1$+G1h{3`0-zO$^=%-m*)+aw>+az(SHuCtx{kUtBed#U1D(w&La&H%& zR%^-i)R1YxN<>>!M(E{v<>Y?<@=&u{#(T!&$dMz(rKPH&p`mV*U6HhVk%`rRsl1T) zoJ`4d=z4PL5(`}&&1m%?S$C)8T8-wH7Zsi=_@x=v>OML!FK4X5ieZ%Hg#B{tEPJ&- zG1>g>Hn)?94j%lCHg>4v?Jn%b(6?`D--kNH9J@97`S|KakN4}n`kb(Sn%?ob$szO7 z(55b{i&JBLmUYh`|M>Cab6;Qln>UI!b8h5$>tp$9nbKnc!suLjmBfO<14h>C_iv@Y24x zs7Rr$txYe>DW0zjo58W~od5g<3+N1#m7NeW$Pg`ejrbz=u)+Lzuh`VrFz*=SJTJE| z56<4fySuu%#VFq1${~8{lCWXsXVDOMRZgQ9XKu#mB-*<3;8`D!_0|V;4mG`#x$E$yB4{~Ve5vJXc<}A(XPm_Z8_?f zG+UR74ldlg;t+FF?z`OCnJK+g%ex%nwsBEWJMfK1m=52E%&Vi+4&ehd5Rj`n2S4~Pvn(Td1nsV{$2hHkC>-s&oFiN|3 z@5b_mHl$cQeH$oZRdkCBKPlMArESsOomxr-zU;LKyK6UWI5)oNQ}gqo`m>INcGapUeU0g{h<`C^ zPmjjj;}y7S<9B_%NR0Y3rIKLD7onbGCtqe{?9fRtV5sy2sLGWu7s-Bf?7^OBWTBnv z;eSw4iq}uK#NBl=uF%MmV-Dl;St2x9HfO)Y<0s(rdvFuWgT$;-@Y7{St2CJ5>y2D~ z!Qm~tv2pL3jT^f}L%J#>m6>>qyPsT@D>5#$r(g|Oxn*>Z2kpE4@S%Qky8}W}nD@-f zp|7Qh>xA^);MzXiZ&Sf(m;D&OeG-SB8EaL+Tn1xq7)CFL4zjvF?(xoUIkll$Wuy2Zn`((e1~g_w*#@X=ge0 zIShSfuyX8sli-a&zR#{%X6=p>2b!{+DT{w9yk4E1)!N(kT;(GQMK2q$=yA4MU zAC9uBO}KXJR+_YWx?_*_O-^wY#9{66{-)7^yxDH7i`x4ju~g3~r$(zonNwaG&y?0q zBkSxm&dr`2A4o((^qd~8jnRm_6`>Lm6DsR2S7q-t(dO&caaZT_*RPScxTH(46!J3@ zPyZ|^xGQE=qbZa%`qoXo_P&0gfbu;-%gTtG!baIqixWh420lA;61Pqe|4H}&=Tj?M{h4*;%XFV5)lZHrr_TTy(Rp#^{hFVD-pl}u z%F~Qg;WEs2QFH96seVx^7mMWqNIJZ7sUU9GT!kD=`NpOt*AU^|E2^XaBILFSM@di$hvR6 zuz~2Z^}^PQd$4VN5$5qGI9q9H!(U4|anyYtK79BMYbaIa_+#RNuX(O_rZ$q!-j5$Y z-ar&;|Lf{%rvmLnBb$tjj1Bn47pd6w4XU!EceWJkCYxO~oc(F_wz`^uoY8HCnI4Ef z=~jN%ukW?2iZW5$gA`+#F37yW#wE1@IF_yQg>3+@!mYXnr-5dhjLb|9WU0MCQXQfn za(Z^9_V5bOU5;&lL(JV5J{{H%!_U~jys=X&Mj}2780j^DZ=cV}^xEHlTHV_alJlCL zOL32p=crcJ7y=Pze}BJ9y1SWK_?F@w8WA@xwEekAe+sAUCRU)Ju&_`9iN7G5e`CB} z>Yh*S?KXYB>^%FiduK9UzTAkbpPX9yP`uRN?2fQuLxxjdP?Jo_Js~T_JzqYA^gIv6 z5iyf6jsI>`Gb&Pif4@yofy6i)?m=mv503YHf3xhtKbJf8$-0e*@@=I5U$ttLb>GzZ zfVhXJC#$owb5`y2Wx+^n)NLXC{aIaq6%}!o%1v2683?%=t(){@OL1GOwRwmU0Bv7d z<9cHctUNv3KFg{%{W(Jj^WuDxy_cgs-^;y8)=RISPOQgi<^VpspqmTO%E7#mWLhK@ zFQyT3>^0C?pASN8UDsJ9b>M)T`S}NY$CkK?xR;r~oE5X~=$)?HIhUZ7?(8rLmiKP9 z)ip2(Ub$(LwTp|3gvVHjjBCXk)C9Msoo(B(x+*;{10D13ncdj1`%l}tlDoTD8Aqol zNBbI(lx(c>#)rQGs2@Fb>JiRUo|k9x+jEYyX;~iMlTDIzZ{Rhgn%KtsmDh_H?E?t9 zu}RW?1E6SHPx{5LDg{MFMLP^LQ!2lY4YaiQn+fWtz3V!^Idy6NfyHO1^EpvA?e>^B zZ`xV*_~`X@LN}10HZpGn8hh2()J_&s*kP-@dh6DHWPh_eyL3Kud8FC3*edQ>%*tVA z6{{6{Z%gq+N1*YhN&P&Kyi_(yyrU7v9^62z zBUa>uOGO0B_t+(oqryu%jV%oUt{eK1{6#%S~FIKoR5pD|o-L7{q7_6jWDNXE<~*D>E;Ceq(Y+ z#JD+OdZtzvpuVl6!`$qBP<2;kcXN(A)7VQVUJut0si$kCXra}836F{A1rjy2wK>M; z0|QO>(?9!2?&Jd?$+7VY1h;50{2c#Elf_RHZ<>T4MhkRG8UmvKss8z;wXoIb-?Nzq) znGBcNo^}8TpV-*gEycDi>C2Wa+wC~~$&4*T})+f}2M)$7-9XE@bfL^3zFM}%}!I%UbNkz!G~jiHW9q9LPl zIRS zzJVpK@8wF&ZvyI!bXBIFdNrRA7IN{mrqOC_uY;74rEz=o=;i@)_Dn&)%P%~OGLHdl zUz~htm6P}Y8dlHKbZexePu+jrVor%(PiFEa6?l8*EXcFylS9EX~Sx~#| z<8_u5GFxuUdV5+Zef|*A__*)SE6bVvi`&9&voB0fIyTNGq7*~~mCjjQSueBdA_@(L z8=v1~l5mmxKu(~Ue2=|uqPI$KP-B%~QH~EgPiW`OgWOKs(I<_g4S57^f6Poy&|na3 zYe=_CL@8{6B!BJNwF>7J&q!1WQHYX5BO}MYc?^sXy%n)S!A+D-6|!xlCu%;v0#S#V zAFhplhuOEjtYz5OT1#j7l(|ix@$m4t7WvhhOMm>(S?BkTj*ePjp&Wrt_R~rDGz{3}gizYp z!afw_dFO2dK$(zHwl3{G0MuQi8dnz=Mdr3+_x(FrGt{RzROA0nS+1LylOKJ^|V> z`}?2fKLzbLbGbk7r+^V}X{A}jcUnDObnf`3b~)|W8W#(BN074hnIrngp^=1g^g zy;84gDJX-}L8EsYMsKewj*rE2HL~tHnH(OY z8U3X2`g$u=Bzie+sWDoy5s_hDlaH;s${t@<3KWk2KK#`Rd77;NM?4NZM_+S}J}`zy z>-ROgw2uMTPfiWwf$tLYZS$LS90DNSHW{_H*o!Mr*iaA4GC4io!ZnFXf8d&*UwYQX zn9+UB?L~glBYAG4F@2f^MRU9bav)y0ZJ$Kk$rneNz1=E(_S;hJc;r0lx`wtFPCk=K zDoT>sNoQK~t=nMhFAQ~*=$rbl&43uFyOr|H7xj?rn`B*8E|uP4EPqk`l&U><(bPpH z^J3-^aG+AIL*WIyy?kG%PN~23#wTR$)d&_i@#qrbP$YO{HIyHE>9!$4*>dkP>p*Dm z)i=H5blI@*R6gJQ`SYt145e+FGPQ{Js}x}&Hu5&Fg0;7P*aHdcI?*hR1tLb-=f*R_ zKRa|+F;Fc8Njb(;illE>34T5$t#E78RjOf7Df;pq^Gy?aB^CW&*S9gPKXS3 zQ_7p2$wD1yS?fIXxtQHydZ2ym&*dU-s;lRIA{ZMU-?s6D#Hs|pJ99uQRKUVeM@>yF zt;c&HQ4)kC(z(CMhG?%5wbO#@uD|x<5h)PeXZGgjNKYfzj1J~4fT_o3~)%^T6IXR=moUx!Lf$Mnaaxgx8IyyR8R#T|qcQQEk znEhefoNZy1=QR$EairOjfd>Q9{gC~I-!|(pR-=E*yK2*`f^C&pUsK|ZRB2(JA zzY2AjM#`pfOAt?BXh**l@mPU9RGG5=m{Hz8I(?Cgh4pfrdKqGA|? z-&0h&=VpF*9&+lWKR`pQD(7qMvC2?pOgb;b@++67d2 zVwB?+jdh@9hT`&M`Fi1VZ|?~W0cuXWg;A72BG*P#sm^x2*qEu8boxs2+0q07jR*;@ zNIaC=_l`hEkU~I48Yt0vc~7pdXnL5s?_AmDauI*SET?J|xY79X9U%WfS7_n29KU@! z!X;^cm@W(POIRX)$uLx8-KbQ`iS%+EK6Ay_w+Ap~5_CuPqhA()-uTPuT5G{h#5&^J zQ76Tygh-`*WdXc7+5Y;b#`*Kl(iB7uJyb^3C>^4}nZ=qFGbyNQ7}()JRp)toPfxV_q4=2M zHC#Wg+-EhWCPYV?V4f!qFBmtS!-s$W5GorBZi$#~b(|BP9{cV5seok3f?!7h#;8GJ?iIg(f4s$ODjpX+)`yi7 z0*k>jrMpag%q<@I1RUGW!L|a*mj%b4a$os%!4je@^)elzLtVdY6gyXTfLI|QFyI); zoXPH3C-IjK7cXk$c}^t0@<@uC&Ai%-C3aG8-U6ic)TVa7wi=^J$?EH zqC3ZxWo!3*Y-*yyo+{(lAV4&JY~YK<>Dq)UtJg=ctM$F6ZE(LpPz~Y>=P*@uDC*-L}cHDqg?# z4Jc!(1)rj@c#UOOPx%lmWURy^^vRm5X;*ZStu;gq7ZA^^-rn8?@87?F17$}o-PX_) zY#o~?=;Pp*<14q|O6RCWtMeB$Ha5Nq5{vx?YMcYbfwH|vkE!4)_s@r%dbJO4d=YpoDCVD;GMWIMPDYvqYU~#)pHQcBI{Ev8pa-{8c94iJ?Mi8m* zh#do&+bJp-?J1emA$}v=m^a$>J;%}Rdtym8PGGazudFu`1)IYP3Hj;ywm)C{Z#i^> zQ`}a<@YZK;t~(#yKTb3CbbM}Sjwe?MUpLwC8hL|M!i{ora%rWt?pXTiapU%vUqkm2 zv*hCD#sM~cFG*%*JeTj>d2zOsgA(jY4h~BfS6BMBql$`ulq1-hVsTZ%R(j_xam{{! zPbRw3x!JX7DJ!cOB$S))9_;1Bk>3p@{?$PW(CRf*-5WUWoZthF(iukTlr)S4@Y-|q z)+UR?M~-X+rzb{7_~XlqoTwXLXS)su%W;t0d^AkX^9C&>*;jgzj_FLaS>c<^V z#4e~|Xsp};tepF!uqunci-$cAB=!paa$t%DAzC%#(7?xh@ zeCs8WLVy2#;qfrJkU^{Y$fLDq!#pEEj7oTZ{`pc0&hKMY`RO}@07Sg`ESHw7TeBv7 z^>!6{6?hVUZvd0r%rAQf0mBl+iIm$&RDxmFHn8#(o=`WWH1WfrA@g!+KDjYiG+HJ9 z@h_WM7SL}T`;}z{{VKX>z9{{gzs6;S`eG-Pr_+d(I{{I_8sM$y&lE3$Fp~26kpY6b zI!tJdeDled~sHqHva*vpIew~{i%wZx${sg{I{;uV<#gdS1-ZNsY3CU8xA&H4vgg#i}cK^H#STK+dgZl<>hXs%U zg5?E00<{C$uX)Zg0>_?n(WtgD~yYiqkb^l9#`RBx5^WEfd*Zgjj%by#&#FzjfVah7>9Tq(rVi6ZsY` zTv#{V*UQlgc`)|qO^%_VA$34xyj;}b1rrFjnyHprWLzL|4!Sv#SV&1Ky#|X*RmJqu zLBydoTwFq+Td@4lKN+=&#yZG;eUoG@$@H8`11Q*n_zsTD5FBt?&&8e^Au*cn$pM;0k$vfA=B0E1T!ncMqN-W<_J= z+{T-n9f!Xhr`Q8ojiLh>S#>cxdcORp0a$2=UryfcE-sxAzcc`}`x;b(B_3a1!d}wS z(o*9zF=zs=$~Z3p?`{F1a}vb5CK&3*=RjF8v`bnscfbukLaiQyTa3MCLg8hO=1lcx zNqNs?6U>EeN0EslBYf+TYjvYps9eZ@<2cZ4xcAJv=c&#pQc0OY*tV(l%EL}}0SG$T zEMez9mb{-H^+3rpcNP>kep)CK7!@GUNAVmn@X!zgMzXek9%SUYD=qG%+dFnwmBZIG z3Vm!RR47{)yjSXl_MK{ZL{8C{Q5| zykFOV^?61x7?KY$HK}Jy?l1sss}SOt2JGN)%aOhUdgp?t%lR(uOS?3qPC2iRgP|!3 zxUw5%cGL0aSb;m8`$z)@J7$GM#@1FFCub~*-gDnR6;2KouvaIWE0b~w z9>iIy8TAAuBi}L9TzCRU&1ReuhP~VScr74#Voq;zNoyN>d^^JQ+f9}qLh(3kYlZYQ0c^^V z%f&n<27O~1-8AAPFMinp<%z8zUVK#P7y?BB>r47MEPg6J%kwLmiZ(Hv6Jt{sw}z8M zj15qOwsDtVc^!_P*pcBRZdWJ^cKi~ zs!P~5m9@UQ+|kn$3pqW)MbC3;tQsId42%jz8c+mx`En*cUC#vF5F~DwfG{Zlu7h*G zb(xN_=QE11&>hMxa#3Ie?LB*o3{$uu_1u?1*hisEY^*5-r3_9`3yh6s#IcRCC})Sp z$7}Itu?V%%z$ih$77G&i=_&IIl*UM07q4zg4C`6F^k)Eavmf*ZK1BBB~9qX*s92-{8Dj>6>xl!iOvc1EZn z-zQMbc^Cgde>RXa9(wxpX~<4;TJd^f#r~}HI4(Z~eO_F=e0xhTI5!g$lak6&)0b0A zBAg}ZPiLOqx@B{}nV+vD=sJ|)yPX=|>N^Vz7tC#Je*1s3`;(sVKX#zb<@@FsovmvD zI{t<&`KxaNUeW(F{zG8&FA(bg{tM0@ADHi--N5r>oAd0`E<-ZPUYd=1YRVbm{FB9R z|HKFR&%cVs!dBEpC8v|lA@vuJ^}%R?48!eyoYwAA%4pq!L6E4RFNmH1SoB42bO_MC19?&k95s_@9fXDDM4zGJfYIXf{>F;1KNT+%I`s=TxV}aD<{s*#Ay|+(#Q6}`x$9jEYUuX)MHn=gN#`Q-4EEZkBKQTQ`4Fe-dgO|T698@nYg zJxOf9!Nng6Dl0W$ajXTwNV#6${d6sh2>dL#D^OxIb#?DkG2@U)QWpZi=boNjz~u;) z3{sM$-XoF|;3^4~%|d~th0?$i>ASdwjh4eDyF!p8^NoJq_i3EtJ?_NZca3 zP_)K&T0trBCrHwtMc)(Ff#TKh;;bJ@jHs}TQJxUEd(y^1PvgJ8qLdmfZKxs~PI>;I4&YL8$9*fhu{_&(BZC!679)OThvUs6j+F3bZ?c zfjZ38YqJZkQx<5Q^U_)L2!C#`25`5?_$0rH{RUw~;~~ z6hv}c>rVshxb>yhf|nUS+#?AA_CuJwU9Hn#>npy%Ie6%AZ!x(c96>V{aXo0asgFvS zT%Ra1atAY9$H=uX!IMPQaO_8Y>HxCD%*+fTwGI)N&F%x_mn)X*?*z5ikn5R6PLbt# zOwBAOo3$cF9}@=f3LT)4XQv0e)Ap}lzaGYd7V@>;*bCq#>4Vtm~s9W zazFu31vf8hB=6m_<;!18)#Cm60_SiA9(HC^E5&j$f4B(^0u_Az^yvu-=BzOk({BND zA#uL2Vqdnb0zhm8B`keTNuWp=UR*>~wUp25xk@A~pKJx8ZruXDxvoQ>Z-YKFoSGKn zmT@`ZW72t3y!rVR`<^@iCuL@nWAS)TQ&u`cx%s#qDoKX$95&zFQ8BXSTvxwZD=Z>b zuWxMRka2llDYx!A|B*^^#=>c1?X457e#E@wPC2CQQIPhsOgD{^$=)^sjKHKSQy+PP z2Ix>+x^$^Xos-VfX0G|q9maPIf;o+bfLWU$D=K;doB~zEpjwnL2tS*KK zfIKD969ktPYyq_Im0mSnn8VX@oW%E%Ypx8Gt^?F1;MHR-p5q}P zn|2h>IP?J~>Eeb!?fiXaJa0A{!GoM3oClERYt;C`IH7jZyfpR%4KqYS1S{|N6GCVihpFRl3O8IP!u8ZQhL6}0BicS^&+3Fd3vJO z@)w{7!<|>EKH*os7X?a*gfbxaA>A)YFyun55Kyi4>*x;~87-x67<_c=*6|tp%8}FW zkAOE%4w3!wsG@J_V8Cbmd?#;sk*KJJRG;uh#{`_Fukf8o6T)9>1LdvPn$Yw_u%H$VYoL)idH z$`OZ=!NiG9I2-Ev`f({#dN49`7HM$casl9BK<1g%LQ=Cw(@)3yhdKy)e&^;649hIP zU&qq(e*>V0W?{JMC}Wkug(O)a-Q__1OErJH4$(()1>Go84{GJ0NTRQ$i!>)x$lUJ_ z(%EB~(co5Q5k{p>Ng3q-&T={2e=sR!2z9*;pd=E%hPtcZs8=AYS%_OVWjc~Zm26gg z+A8ZpyAQd7krt0Yu&qJD;pzMH@6TP0&;18J3Xoa0Y30f-UAC>M#A?h;_K3(%^}g6? zknz0fIb5q&voq6a-{OFQ$gvPjsv8K9QCN4okxSTyU+4-_vAj;$APVdB7PbUXCp92^ z_!*Uvsyet(go?1JWJ@sNcqneC4c+h#gd_9_?Ao)ZBH<_l2GJlwb%13smh9NP`2g~4 z4K$>?Kd#MXQkngJ{QnB0hISzEU7@N6XWHP+d$R@ z@9!P!@zTdhBZXQe<~DMUB0dEUe2+;_E%;{QX`r2#L5A3OzV!fru?0MB(fU_uwJ#}@ zj9xhIQuo|s^3!#Zk^Za!%TIq@Rha_5JwncH*jY%shW|D8#N&h1Wdjen*y+EP>Kj{~ z$Q5S^lXg~t)v=~3T7#bf@0=#0ni$>=Vo`~S7rC|IC|tW{4Icw8*oR~*2PbAhyQUG_ z4*UxXJeFP)_U}C_4q@rlH8t;`n+fQ0D+5(&Bp9<%=$wtV?^0p+2lSb(L`1ZEI1rnd z8`L{VHn@L9VP-{h+Y$cG)P05eKfK*kx<;yoIW=tGy7kX~y%xO4F6>1`(|HfTX>>p2 z?~kJwgGtrq2cG{8$PFyk^Hh+DM|Ww*mEmx1`T~dfas7$A;h^S}m;k>f&W1^;(^zn3 zIQ1?-{I+aNix)P?cm(W)I-&;~6m#-LgxBn}b7NEF^p79VZNKm(dOMx@w@6I$_}@q@ zqiFJ9kr-n{uhl(q!TuG8zyX{<7Y84Mu>R<1H4OBQQ)4#Mfa7eD^4R&nm=lkGqaO-l zts5-EPAH4$lu13`cF_0xT0z~i{((8TyKK#l)|4iWC-Gad!4m*+rca4OhJsj8*MCOV z_M-%a0C4Chpa$-vt@T$?0>s8C7tZnq1<7!dp`&u3L?%t+(X%XtUqLq|l0^{6m#rWS zYA88S;GogK*>r${-MI1;@F@wd^qas$c&fhngTQJjxDE{U+gp!pijc)A8SN}DNDuW*Q{E#wRmQ- z1vGLAG-32(U0C`YPhLsgKoGlX+oA?8F9o;DN8btVu6V2z7lEh{9PS>3uAOiyfjkc3 zTQH|@w0?7o0~&NLFvSHR$OdBdwTTNU*kSUC=#nr1c)D}_?d;t(Fvp;9wCJjMR&txy zmtbk*b38|kWp#{YQ)ViRXcpskFmf=%;nI+2nxiu*4xSG&uOBXkd0tH>8KYmcU>{A> zcpQ%yMRG`_fT4a`b2e_X9NE zh*2LN*!>!~prcgfzLTS%26(Ir1e=2Ta(bH9SK$m6wx51d!=i++#exx+ZX311a5K2*2`()$-5uxbcF^a=zr}Ps_nSNDPyZDi?5r_J z!P&>F%2lOQd|o=Yrz~AJ|7Q{FKb_G3oecJ0_X&TqJz9x}!A#91x&AVHJo$D@O8}_d8)EAm*D`-dzOn%gy)e~#1+esd!7fFD}YfaYq!oLjE< zia&Vp;NMIMt~)thcEP@c4qQ0%D^Q%Hq!~UiY=rJ>KvZG8Kj5kbREX}sNjz&$(r$>C z23})p(_upS-G^Y-*n&^fM&C6$RJV}1=47&&8Y(O9C^=gE{DR^W3YS1>yRQ{C&_%!k zD+oI1OlfF5meCTl3i-G}9XyqFA0yEtI%ojq1Zvp2yF=|D_`Px@a`;cg5vNKMTEGWv zXLRFnuSI&oOAJT84zMc+nkf)SG_>aUgVyplqVpyYc*s}@u{p@P<$upaDSgZKU9s^A zI3Ejm_NhOTFB%FKohec&rcs*mGUgn?kz6SLu>ThC?(b%d!PfUbfXtfGIbOs+WyQs1 zBiMX?23CWSHUPbppOKh7bj@%IML3^;R_#w=QjSXaUQLwOHdswCTU@`{T3Tk;g2wW< z4kLG&py0cHV+xjHJhYStKJ7WiGB!GTmk+w1P<<}GGvNC5>)6%a1}Gdtpbznyf`!AW zV>#Y>9~#s(p58_G1)3_utn-lXV6QWDn!P7@lKJU7eI5(*o{7q@_&_a?r@#yeYy3=; zbh%BB+#V+n-+uM2sKOE13WIx)9@!eH)ppmNy>qS(XnQcMOc? zRXOg?Auw2*OrQy3A6mfZG%@#?{JAizjmKl~SH5U?egs0#;%V2eTNeSb_#h5C8AJ3`kBejZ7R1A zj6+A=*56hG?otBJi6&^R%y-^kn88D8geLI3ZIbV(6{DziX|F<2#1H%O8-LEJJkQF? z%6x;geKHqI7h;u@4F+q(l3|?WFI6@A5tkHp7KF?X;l>n&Xnc4yLM(y!vHqr5 zm?o*R!q@7!192lB=a6guLy6WJh@pi_~ zoHx$>q@q8p{&{)y--%kt6)ep3?z@V>BN2Z;(ct0szd=d=%_Z#MnD+^`#Zv96NpMS!G{r~hJ+ zzQ|}Uv9}iAis{luD}DyqcT37+B+d_BY_=Cn9^JQViKF^2g81!D1M zMvymSKqr#C0+}d+24#T9o5X;U)N|!qg>;`EK(mr}l3(vW}G*Hprjyf`%zI#2$853t%=d-)a)!^m)AryB_)0^!Ob<1_#EoOG0exz zjd%kCgA@=dS^KS~$2p{(=4c`mp_Xv|VPZl6RGz>XsksE=S7R!_XcB=R;r0&Ydb2$E zbGx$!a!FL#kW6Vi^>JR5NA<1e7LTfj{&29paUgaNxKYg z-ZIB;bKfrFz64wo19w9w43jF!Ep#~my^3pV$1enaKL1zE0}z~^7*rsI4%V6k!&vB8 zkd=3A-Fgr}g#gQ4fB)8z%r8(pB}2aCh*xKlUk#PCdhXA2CsmUX@bDB-aMUY;D;~@S zn|nMu@nhg!lH}s1uE{hFzjze(VV~8s&Q>xM70=)Qfc+$8snH{RR@%lD-3c^;r*8nK z(*o~F^Hq?3G@&f6DV+ml{$3S)EbPGjFk2Dr1y<*jcrPeRY-S)XCos(B$o5){<$w{E zbmb%#ZMTS#yY{5Tn)e?*3!gXuzMUV|r;q6?tpa0B<;AjL#gz1ws?ZN)*hj@~bFlUO*Gk#v(q%I%B9?gf~ zD9N6CILvCUOnu`bcvRi_%)!it9QSlwv^$YKMfT(D$GjHO1Npr#Pl}eF@KGrEx9}v$ z;BObo0XN?N7c=0T_@O_l6b}CMAH}%KlM4&~`*fi>df);q4a{(`l@S;D=k{7I&h<@@ zANS&U5(^puz4@XccXa75r9TUQ0RvJj?j^ez>-_VJwdXu~P^;r{DIzb5x%^CA6>dLu zu)ysc?kwqYz`AHVPvc;5(f%KHr=;b+*ZHXCZok0!`met_C%&~`yK%##(_-bNy`t!W zVyGk9k~8N+b*stmzyJQ6b#_7BEmgU+wg5kyiI#jPkD(;iFb~y+{T_9^dD18S&>_VII~6+r#S99~X+Q;LW*CFUQh) z;Xi}mHlBcBW8~f^Mz91Cg4hMzkh{(1udX7@MqyAIT6*rn1A+dFC=y*TFe1ID7BvVc zK3{M2ttdy9n2>c*s%byw)L|@6a!(dU?VwldO=F`z6#>d3IpN3$ks2KtI;W@ZkG7x? z;b#CtJ4lwM->t2IgU0j+$RC>5`=_M$322JBR!%F zEeJhJWaXt%Ok~HA_nt8|T6Iym*y$MlkObLq#%h|B11|_FgGyLti9_+V*p|lb$0fc0 z=DiXF^-Ph7FYYU*PB$oZ?LXGNHyH+kG%p0G<0&n_Em~f2zL=Yb4*V^s0dZ)kjz-#} z!9$2S&k!EZkw@f@KY;Mh`=RdFp`-_Mqb5pfGQ7e+q>V5{efB8f>L=Y?T+X3HaB4q` z?~7MHx|M9DcWGLJ5VF|VEC~Ntd@_}NG^wr=&3MEczzRuoZ^9j4W~f45fhZ%CwZA~L z3IYtYGXe0aX=CukSfSc?@lI3Qj!WE<)ukn%9zcYQeElZ?K!L(FfeJnI|a$1q>GWCwOgrRB4BU7PJ_w(MW#}&( zpiY!~NxtvLoWsD{~ENn;96+m|nA@GV6(_x$}&X##}40S>+Z%DE_AblW{< z--gwzP2-I7noT}ZqZIQ7cC(E+Y4jgN;wTJIU@HJ4UeMLk z^#-gWZna8U3#My41y+d$oJvchN*UgLr>Q2iN3UJKJ`#jZV*`LZjSQoM2X(+BWjPB& zpz#(UDEjW4&*2!o z$kJQ-}d*;qNfM@0gi2+(CeS)J$qPg(8V8vIqw| z-N$+{gOc4if3H8zE)536=sg&~D`2d!DF6bF2Hh2fscuf~Xf%V0ppJvDuNQdTjK~5+ zIBge~44M$))7O$m4l^=y6A%z@b~yy+O_#=wRn(OUy77ZaUO>Osw&tuBtlV*YDcxzV z$&R&gFg#L;XfYSKxy!>NW3j|#Nx0y^!?u^%+DM~H(hK}uGf3Jwfw+tGX1BLB_qMBY zQwt$UE^sTRURTw`lWiDB=!!$k zI)Q2Kg4~JE_wjyU{HkC)&vJqqNc9`GSqHWR2l)Rz;fccVz+CDDCNp%%PA`Xvnvho? zGaZ%jGuZmD>uGIoGKkNC$vX-jA3~P#(=(j?GH^KW)Jut?0u1Nsg2o>X(9vw2-Hf1J z(GR4q;X^(v%JB|1Q--IfwoRG8-iwKB@N0F!C-=f?76p3WOh5YnaW@~6Tm{!rGIbr? zGvE4Cai4RI93H~MNzQ}_I^0Jq++(U;IMckQ-Lmo!z+#9>f?=9?ZG@PL=jyu8C?7fD_IhQ1GE_Cr~tQX85l_9lRu zivl@GEhBw;{o&a^hTdWjOgTQUGU{NsCfo`rO=D2%?~s#gPHW*$vr9HUBS_RdZnG9# zV;Bj_TR*d4@d|&wvBDMGUQYKn%i%2~;U3?W1M>u0UG=Zs>deH&0cOa=V5J!-1^c3Y z7U<~x``J-gmm|ZimYx3YMMWt(?MOL9X!x{SXM6(i37w2Xbt+StfvGnY9L=blRRhya z)lR&@%RNULi(#^$4ptZwD<2&Ur+F-E(2RhT70CtdmYkwfW=5wB zItvwJ&s>6ME4&n0KAa*~9aR(`P3&-Aaf)<)kCopjt{>kpR(N9o7mW<` z5MkOz97g~85jb`TV7C%yxDXaH&>CrJ6kKuH3G?;Ut~Xl&wU?A(^3_3wlJYz-f&C%} ziSHQZuIl@kV6d>8h1$+l7hq!cO~cA{g2vE5x+Qlf!Wk*plMqe&(mK#^?*L2(v&K## z<)f=eoyK90MLxmw%QQ}>(QYkYjo}pqWq@UT4C)JiA$5BXnMv5?f1Da$nh#^oNy3_q z12~yG(TtChtsJghjfPxtIV5u#Kj?2&gcrxRSk z*oDAl+tr5&bhPV(+@^5@vdSg(UM`5$o@fUq=TIf*YB`IHpR^aF*Z&9}T8S<>vK$2b zCpfG$`jbuE?w3hUj3Av0ESR}RE=D*&EyT0I5YxG$i67KQgYuHu-~1R|h#l1YPoxj` zv-VjltS#;{3G)wFgDI(NFE{uoR+ zJ5j!(){P?55`s3V2z13U!6$MRzfzQPpm3T+2Tbc^mZaara6!ORBN#Qx!4$!K^U-Xe z_|lTVk4_F3E~vqDMI9uhDS}*k!6`2xQ8(6(u%Hq$Hw*}cig}%7E|=u9K{@KDUo^j$a5$qUcEty(S~V3XnHO>2S{|GHGiIOEK$BS~=2%}qo#GJ1aTUXPnJa((`4o&AMJ9EyBHzRf*a)`N2dTQPug~U=kbVu# z;=7IE9BkD=`ln8v!aO}NK1rjE;NIY+<)S|G!QtV(SmCp{HLp$Fa1USOz)|Zwb!k%z zGv?d8-Dj}%qLQsNx1>E^acMDH=ISFxF>Ycbz}$Yo5F6^INAt{4+{YWZJ(APw$`&#k zeWEFA-XBw+^vc9=WgzL2U5R{B@wkaJJZ>*)Ojb-O;XV5y_$Xr9O#t}4toQhAbXTS3sb`!QCN-$i)azFTFHAf+=_KPK^gdgN?)`!6rV7W+ijOKdJofBqL8p<(p*tu zsbaJZxT1Ha50}7ZLLIIV-!L!O0*rSM8t=)n2H2&QU~m!+?qkDxGy{nIf&_;-s7a3l z8Ot1}K?kW~S_DR4@ZmBB<);T)lezy2i0%?+%s5LY@qg z^UOp;^jKNtd=?q0g{N39SwJ8+Mp)K?sz5^9=s=4x5r|0BtGT`+6Zt(`hj&7{DJkqI z{;@+CXolS9ApSewi?>wa`Ou3tH^EARjK2_PGlixQ#sqK(FiEs7K^A!ld7j3b?SrO) zldep}0EAY2#>jzT%kn2z&~B9i89_l!ilud>>mGT>c`Pmq(dV~?A%!Z1kG7NqYJ5XD z%H+9bQ;g5#6F?Wo>TSnPz)Fr?MHwbcjt2mN6NMj zwu>cb3YQ;In<%c~iYV7^L7)*Vp4n#>t6*qqc?SR#%bOAp5KZC;f@CHoP<)sn%zwY$ zYqEs#jOM>!k_wpzsDy;c-0+&E0Xn{E&yj%}QxSUgNXwOS>Wu;e*^N0iG!ghS28H3J z_$7~{@P4Ymj`{69Fo~z8E?KxR3hV=82CaIE?xd)(uyhr=If!6W($c7)K#-!jdEkjU z`uc>r`UT=iF~>_|!lMWc6QFa1A4N{cw=jt@N6;vPwN#@*n4I)bD3z&05wR+HlpqU+ zIICmAJ6R<>-e1zt`Xq*|+84!h@VKpRfgsl_X_627H6JMBfIy}ab*x=!* z6XDv`tGBbVrWOcg@zGo=5V&f9&@}0uq+qId@Sr0&G~`;pT7y#+Nux_p`JUf^!?CA2 z!B8Lm`zp9et;H9(V=PL5!Iy~%7jpPhx+3c=fOG?^2%Kf4>VMvczv=;#=2%2;VoJsi zDJeaiz#7!>G=ZDI9bPPX^y<~CYSbZWdnyHv6%6;&z`|?6Q)1Hxs+JNer7zkWkRCLX zni>m0(l*l~r1;(NKL#|Q`+Z2ZLpi#6yl#hX3*GBD3u!QC$+Wtt1`esD; z03AL6BdEt|m?3>Psvov#^d9XL5lQsbb7G)I2_p^+OF$JcvkIOta9o|KwMNQ&SZ0xA zshbOpv!6u;VLOb@W%~hMg$irJ3)C7#bCeH-fr{9LHwPP_ap1s#m zkf4uw-m_+8!+D7Aizl!LH7+{9aH>=~rdQ>X2u52IvN?PbC&1oOXw4Za*~M0gY5y9` zHXWtr8*i-I`hxvtxPTG@73_Zsh#*aWqJBK)^QZSG+jFw~-`|tDc7k-d!t8(HWEz&PDGojZQxOGQI zZnFLBb9Z+Pwze^qCWzy=86!JFN2SWaw7{IcM&PvgM`U4(!OXx>hu-JUpWhKV!BOjy zNL~k29#CSbEC)H=0TXapY5mIDsb^;O?A$Ut3Pfp;7Bvo?Z}^Ig%_e{5<3KZSi8kmF zP2>i69QbTLX3*iQ0IrP%wzMREf1VeZHJV>Ve-{EOrtQ*+cYYudD00pPWB`X>z*Z?I z`@s-#XorfTBK@@mKr=lKo4mbe6|~zgEA?=IwM;}#;BORp`7}?!Tz0E84AU?N)mns2 zCCcXpj!F$o*)2VTjJ%8BCyls6Ja9yV3RzOAp27>BsRhrIsrS%`=Q1x`9y#X|uSSk^qg3$P79)!I2)MG4+9;yyuTBJLkyo0stq$`8Rh98re+ zSI|9vC^=&6K1nr)ejJ$3l5w4E4T`TXmdZvVAmm^c#~gh`ljM~%-ESsg=q79(X3jA- z>ZPw!iJQR138xXy5oa9-QsN>vwzO;)vh)Dg*b7e&d7QwmCBRrOeXJd4gYg0LdyZq( zD}jEweE8S>fDd)m^zO%aBr>pI-c~e{k3`Oy33y`3IUW$VoW(zlnlldU0W_?}-xWda z^(eq}sD6PF9lh%3;9emv5;e8}DlgVe5+~sAf`8~{6?(3S%As^Uwoezaki75cNYut? z&Z=k1>pO}A6wu#a4OxE_9Z^b|Q|GuCZWx0GYR-^c!gvmkpsIx(p)*L6l;w%i_*pp95~n? zC4W{9ByFu6EV9OOk%NP@h-nuZiHqK5DMFcp({3(Pq-4$Kd21)-;<9$wY47{{{l3rl z`97at`!Ll^gj2y9jiRG|+IFxlXv&=REb9TS>@$(vF>!y6sfhB{LQRqcw04zef63VURCE(hIJk>k_ zJ7NXvDSSr%tBnyb5$n8kYBI#j>A+2jQW@6cxohYt%qRoT&dkO*U1TTp_LwEVboQ3$ z+vGdf$&62Kff^K{K61X$nmgruQoV{Z5TQ|2FiOKxoGc1}Sxb;?GC@Q)f2nc8M&xf0 zW%NPGZ+qY>qnJL?N$2`r0S3LIZ{BC^%Ib@aH3+DkWWZD+P^mN5XrY$xu!N4_OcO@m ydvB@I6_heM<Q-E!pAr3?RWF>ZydYk{*J%x^E_bespodw)5rRxEuWUPr@OP8 zr?bP+_1?B8JsjLzr6gq}cW+y7@9F98A-jFM%fJ4Bq}xfm?VqEfOz)U z9$KcMzhy(>dFMp6@P~&?F6cJ@aPDgVF}r@wi(C7Ol_t|ToRbz=JEc}ucLuSZuwP6R z)a*+iyX_%Sv+r)%4t`~PKOdj`hc-px; z-!=b>eqi8E+0nx<@9gHbge(=7yjb?oD(G9;#@%rd$sBHN+{JAxE-qOs`++gi;oAM; zcBb*RJNrE1lMhyUR*$k-hqI1rnQU7Tks`Ns>Pyvj{Hc)QsmI9&)w5XN*lM+Q(Se)r zflAX~MSe_rzNWg|zj(U(haY~Bc7C&C`F2ZlOG``qifMswMsijD;96NvwG|f+Smb-2 z@DekLcc_bBBp_gL>Qq7U!Kcb0eEe-acoufCb91JcTYEv0L2`I^btI?n%dWDiHUCIC z^KCR(QBhI+@WbE7hkIjURKxP~|60Hpsmd=q`npa>iJKiZviOL;zWxfM1zC?zc3lYJ z7ks!$>(0kNHb=R8?OPf*)*hhaR`e}?(TZ&eXXY$$eDla!+$8f@cuK2g@P~j^1>d^w z%KCn~uQ>Th*(l<-&`Yf3#e7%yl!gdg_vEDa%uG+Dhi@}^*pO`GG}Qgle`adT!kj(R z+nSdi<<~8^Tuk?`hPY!%A3xfgsfAs>+z_TToltpwGbcH{sCKY;ChgU$SJD!8!yjJ@ z$@@N9v|Rkg9}SAeE4CTm?+Bi;Fi6s`%=YSU$vIe5RCGJ{l>9Avib#1s-LngpdV7hD z_2VU8rCG3U$hcV=MI7y!7%Vm#?QhOG*`?v<@1LK)rmf<6xJ2^!2#3SX>o6Q_FZ8QX z3E99mXX$x49h-?>X0$$2d+lXmCClvvpMLx0mtV|vnb1>CDGkc!?{rzV&lfFFtQAj9hYGTakZOIu^mbXiM>gMDt zd*401cI#F`q8>Bp*FS!^aOsk#*S@hAO-*A(Z8c%SN@?snOkApt5=8o{QMH_UqAHc<}@u^xw0oa zSfiAl7oGg@Y(sExa6_7f+3G#cY7Rd#FNTJO z`VW8HpzhtY#8jt8(eF!azRzIVFXtACndLg?>V4A9pZ^u7;LzPYF8T?FHqbNfKQlYY zEHABSyl-o0Q7?O4qDj7I;!h$XT|GTa^Cu@04U!F8SU%|%(YY?I&yQw&Y_baW%D;ps zHO0LNoEX&_@=w-Je0coj9S3&4*Z4Rmz0P2$z;9^Tk3YtoKF#MB5WvZ(SXcM#nn>h8 za%JT4;P-zm^y0P{{QUFJUp{|6+?;8PtxnNTXX1C_RW1wJveGi#-~H*|b<;?LBkqnRPGv7%a$vIaNB4RmqX=pi+3iaR{NA*P|Bk;%E+lJi@l#rE+E z;oyYBF)Gg^C0#(D}3kGu&}V-e*3LH!={4OGH8@$o+zSvMKoYhkyrFSuf7}k zUYz(--@a7hAY8k4%{1%yWzpWghGZdW`>Vg5J==v%{`ARR!uZkpX0Ld=Xstox>#cS( zlS4WqoF*M?#N(4a)e>%QZWNg2%;$ggZK!8$xW|M;X|hqeA>yOUV9aHq-L<%7jI|xy zltHfrOV`!ED-CrSSXXr7#0ep(Ru_X)CGf21GAjxbVc+z_N-8PU%u5{cm5o zXbr!OkBf_n{)(sa7?+(H>j-X$JGgSWq`7faRMhi`?a?hcPE0n-N6x}7>-ftLTyCn4 z;y2N*%d=zWsGdHv;Fk3&#)nU~qlIJbbvFxwEKE5xz$cSCMQZm6X)wCtk z_98wg-Ibj$CMKq?H2LW)eJ1X+}l{%|fNB+0F#=mPE}Gdd`V+nUEr zZj3t^k>tf`ym0ZN-qiR=L#~UN#bfuMTXS9N-elN}w0YDdBGwD-alWN;Y1M!pM!bwzeDFt%{aKjC`&T z<~-?%(#v&jrY)fp!PJRv?0$K7x8IjztnIyYH<1tAvQK?GnPdO@7hIeA##FPBktQpr zk-i4g;-Es>XZ2c-8U-c}k`q$Wg)3LQjjOXA>O|EG{YNy0@;5s4zsWGg4WGYsT}r&x z-RGsHA`O0Yifa@nG{fY-oHxyNPQ<;Xck-VaX&ONqbLe`0^Q4c?H<7{N-nt~yoTMef ziYY?79k=Y-rMp=tN*_yMkmt%?*5lHY7TK6$5;I>w=vhaHRaLkIt*Y1fumjU>&ON1I zOWct~-+gxo2i5D{pYsAo8jKt}iVCl+m5WD|Zyp)MO-FuB#`-1)j15qp&bGhiKQ@qx zb#<)1w=dB(CL1~OEXCi1ge}-et&EHeZ`Hm%ZtW?!qDffkzV<>E9$qOBPw)Swa&wLs z$FQ|%1KYni(J=MS^tT>iq&9UV4I^jg6bgF{sb)rq!G{rG2OaJSpZavh(!Y1(yt#7| z^qAUs+*eDLChtFcrq=c1wv>fUUXY1SuaF1l7Q*iX#KgX~Cmwf{f{SqLnF6c#B-qtN z$@&c36OwUF`Th5~FYn6exU^&+e)s3Owc5QpgNk<#+f04!48sQgbU;-#*_+#fz*QR^ zJe`P}VpSEfy{Y{2>YE~}OG3XD%}fVKm}LGcAS8V#-Li1FMrp<#;j$&$;X2+TTEAw} zp}pVilI97>r1rQBI5hPN zmstlRB=5+$Mg8>CPxXmj#*{hpsS!mi-6_ICFl$Vz$uAjgIEGxHLYL0P)+@JHgTU|f4Hy|<1{7LO2<82{j zM|T2E9$OV+&A)oji>t@Jj*klkPmM&B%HwtShzbrC3|+C9Kt#Tbz$UojV%Zx6nAAWV zw-C`7WS4UMkqa05x*M;3Ty59Gm>6!jZK^e1H8)qvu6ot<^mGU^@8_!R#e3_$K0NzA zbZ5sC^JE0M)$7)+Q?a*CI5GV3Fv|g%Wfv~|CEV%O76yZHNmV$o{_3%9mIZIpX0QS- ztz)D9dPmoGJhCqBZ~5_uA1(m`2r@1q&^2E^z<8Zuqbjmt!&7U&#*WITXBW7*$oULN zx3Srmk*5Xay!+j6^^9_FwsWsBaG75i4*GNi+R41kPsOFMZNihd`uh0^(tY7hj19^F z*nTf4cst$;XRGDvv23@F_8t7CAM2Q58!B(@u>LKz2Hzo|u9D-}aIca5@WA3u?2LC^ zN^$z<&WhF?xz-AV4J?Oc&^KGwXsTJB5CXh*@Zjs$ud!}OisgVtE?lI{oxdn63&_aG zbpG|nk4re=$Fg17Eoz#Y**U^{-AkU`+#2!j-8<|4j^d6+MSwA7#^E&ceVu@2-kgL( z4}QbbNZDN}^~Pb#dUq+OyVWGGSk$JkK5@xVo8nYfjbhf_JzjnK%8bv5-px5_1K5!H>>R19%rxvy4;GVj;o#Y6^QqVF1(vz8f@n?kQKUQ?Jn2$ zg5DiPf>Os=QJKmwPMN5%YRv+gV-1Qd%P~0^Mvprbt_ZPRfjGGqk9U1KCu-=DbrGnIT2~Q?#QG>jM z&$<8eS!WkSZrm}YnaM2WV9v@vvgf|V>)=qYwDjHZ(ned7&rIpNVnjM--CY?s*JD-T zFCTmMGOSmH;Fy}?EX1e?gcixTwM*j=4T|%rs;agIZ3gb#ePQKxSG8S$D^4pdMFny8 z%7OUrN!B#4OasCZVq8K%Rci3BWhv+$or-K1JSk8bWM(q+_UNjRoes64%8Z?^ZF^bM zjoZtCLo~b!L*A`Ajfmy)bz)+*-D5zty&^$B zkzp|rIbm#We(zmpr_F~Kx9fUyk#_8Pt<2!(VznEKG*3G|I6zE$`pI=LhBz z2^j774qo+$Y6*Vz^7G$|4EWyz43!oYbSwBAqCA6Sw%XOzwRcg;yhY1)$ji$!B3-9C z-$zbVApZ#X`1mL@*iIclWq*CQaAAhi!9m3U85=E$7j3{#mAD;wt>qz~g9jHkHg$8C ztl4`+Z5L1gA_KktUXhL}-PH$=&wVR5Q{sqi@%?-hYv=xUKa-O+ZM{?CDbD4RB}=@8 z_FP;hcJu?*Crx78wik%J<-o0L#?5BbUf~evSJ->?3=ROug@}sm+EqIcW?pSvA&Z@0 z)}FeJZ9qfo6kPVs|F>1|8Z{*Y?E5vP34oN^& z;vJ~`%P-E%(zpZuxkmnZs;05IsW|lyal6%Znc9)kzMqb0MJrkW%U|_$vLf077wcyK z$c@B>{QUKApPt=)?AbpGr$$?aK7;M4PEBcv$SPDBmC7Tu*P<{3>JclZxhMWskYIa(jz~Rbc;{oy?&)A7P#JzhmFA`F81RIkx~H`-eT6{H@Meu)O2G zNq_;Rb-0 z28I4!c*^E?S6()@U?ICb&H9_ORBKaR4kzi0u^bx5uD*HmhLyJb@!DCdeD|-KnHjIm zrjV-KisZDBz35Ldwr?lujNBQk0^h>`=idGT@mXG?Jy;3GjSXSr8CN>lJ{?*a(OzpB zInmcy-Fp-|AG4958HI`m0eh_ zhZuN6DL4pKqB^5IOgOq<;BhT0jM>C4h+uP#cPVQ`v&7inX=!v!cxa?QZ7rQ)7t9|l zkFW|#^lA4~_2{Y2QAm5;+G>_+np6AuM5psaikI1`$PWp`e)&!LJ8%}q?r<`KjtI+nYa%yl51p!P z5M__MkByD#c+3Ei?~9CzI?*O!R~&Yut*B5p;F9`3FXF)D!x=yT2S5W9X?KnuJ?bn| zoY}s7!WATh6G}ru5L~rSktsPhwi;D?-{^NW&a*gaqmwq}8fUkfIV?*Q#8Dd?ZqOK_ zX|6+c?C?36!U@NdDI=T5#$`{u|J_1~GgBv3-D=ee$g+f^Hs~a`Ws4e06|em1OP(?l z&MKF^fT;;EJ2s`I;uN>Ck8MSiZI+VA{KdbcYM0G6u|jX%#TUc^2PA>a>B{&hjJsMNxdY~M-qgwv+(G`9DJ5?()+NV5Gojddmc6W#O ze?O0tSu4AQvvy;oLegfOQN8Hh0eQ7qg-DgPKjpaKLhWNY*gn5;81W$+g$l0uVPIEX zP!Yp#Y{J+c5^LqW^`D&Vk_PGJ(q7PP?GG+y`MPy!0MuAy51+y);Q!|p6{^8AQ+bkB zMg2PzV#cn6uiW0OnyNP1`WT3n>K=fq?pHH;9YK0Wk3M8a>ZLM2uiJO>uLm>Od1t23 z@pvF)eejRC+jr~UKJp)NRL@M7E9}(l%?U(eWp?WPF>=uFWSC9m7N4w|x8G}`VrwW044C>Fv+0HH~(gP3gjz+<--j-*_6y%-!awfPsBaSS(IzUVANqZRd`uzj6Nxf?1_f8&g}wQIB~~4My(9MYlo$X8qRmf}=jq-=i`Zc^4}D9Fb?BWg7K~I)gqC!Te=o5VvJFkgqXgxIe3=rNskf zG7f_y$U*N39c(kcGj7|$eE@7!`{Qt=AS<(7Td;1t&nzuf#_`I|Vfx$yWfK9szDfI@ z{aD5Cp#D&^ZfWkBzq!Jnn=v9Z`Qk=<;GURr$G{LyWN?Xm;J9aIgy6B8!$lG7m&RW{ zREqRA3g5q%VObH-@+r{Ed zDtGYXTb>O_Dec)~=vFuqo*HHVHn?kWFr7BR!qt6h+^g`@pNp4NDS32Xs#E>Rve4fE zsP7suB9{>Y%p<;j{eBUZORt23C*SSR(bd(>3^oQg-$9Z4z^}i?y1d;lzIUF`j%(`I zH#vYj@$bAWBONg6eBi(VF{`3evO$xN!3)G8HlV=w%e_04?b2!pg(R+MYSauLkYJK^ zTtiLm6>^k8qHdgYXaxe7<3QUJ;+YWk>hJ$N=cbsRN`$0EEjYYXK*cxn1~*5a(nWSQ zy1q$Ezvz^opRaEM=+>meF(De^+Y*W4!WQZI`abS!N>Be`qsEpE8-B)9M-!n_QnJ6* z>XO8Yi;CYq{MeAFrz&BVn;o#^6MiceNW}>mM6ftx@Mn4&1aMTw2LXF1spuE@o`_Tk zFa+bwz|rTXkAcT>c#WE#I4oqJM=r?-lUscT+V05vW!Cwb=DDUK+8wUAvQ`goKT@x! zWBV-ksw+Ds&bl!i@)5G=jN6-2?ndT*)G}uL9;&J z%ZYLh)`9k^GfXMXr8U>~*0q5kKn*39eMiOZP|evv15h(uI0Kpjs_rX9Vfqj zmJJw9;pVe@qJu5>d30wNHdTYWQVm;YfSnw{S2jnjooq0&igVHgpd=UBlYsYg!X5Gl9%y)Et&A|CwRpw0SAgLo zV2@haA8K*wLC4mSc0w{3N{-}%Q; z^6fnO*LN|Hc0!m;=J#vXI8$AWpsI35`0TM9n~EhdcvzLEzs(asKo{Av&@yN~?iiHfx!jxEtrTN$z{+1eP+q%k4x??b3&flX-s}>BkPsklIgY#Q zi+lU(j2|62v!U|q+p|mi$K?WIuNs^TGtWqy03)n2G0MG+m{M9*Rkg(^?H=31)h*)I zE&Ef$uT@pfEn3bG*jkFrwZ*EafNFJ?U2VH-`6WRW#24NwAxeS*R=5OkzfAb-jkci3 z_KFzeNTuL*-)~(JmmfXaQCe45_YC*uFjPTim~4LQiRwtXCi=2>U0t>xUfyl8NA@`Z zHhfvIOeGPi^zZM<`BYn*YNU64D0f>WHYkD%Eg%wTqcz~;J=$;534UG*WRAf0?b~CZ zpp+vbIGPvwH}vk+4e&hVIC#QG!@*2YWxTzrR&GV zGvq6UPv$hGuLds2{hnC8fPmF>IKV0#e}R|mJxSamPS9$EWht-$mvCKHgDg6kuT>MR zwXt zo*$2ZkAL=HhF)837ivsK-=5jE&yTL^eVnt6kFOGWDL!a|E5mIsG(&jw@!BWo`0>>P zkiIvF*hL-vB!A5@2I$mel%OP6kAOsiQe09Grb9o)c&EbX8yo!5&E%@5%D=vTB4l66qhcXeg! z^EpU?7#CQzV?VLI>w+gw7)6{7{jLethk+Jw44x=XNl^x0z+&2cZeNPcNWgj{!#;za z2UOpH7<2Q+jab+Mz7Ntm()ReoIKG3>9W!jr?5jl84MQ!4n*46>Y&@+KVz;dSa01ce zQ0bBp=+Y#6qp;w=pdM}nf(Hz@C1nA$vd~`l`&`MVe0*=89^fMz3g4~SHM9Qt53^R$ zzc{jJ?h*R?X|aEto<;uT#>lIQm@cSXu`Y)olZkT}bCN_Z=RTdY~}N4)e(Bi;{~Krjn* z#BGoeW-p%oDe#4Vi-)oCtv^dkEzAYzYaVN>n6G%Q^@#qNFMj{OJox|ZdH>&k;O#)) z?5p{*gEQhaCCY!lMu~FYt6M5*k{b@X&;7rw)BpST{>Od7pUT3gA#o>ys%Z&Q^!#`g z%=_``d%c-=c#CKIp8m!ptqOos3^Zd)VL)DYO>{bbW~-uNyX^??*xh5L&;CT1|Akb| zmc4t82~sUuw&lp(My8H1K)MbD9WU#1x&WNnwpA-6%pV^jfOvNlHIV}%fLL|#NT0#C zZ{KW>|G|6G%>&7L_i=HPw;H8=yso2|vAEa}@dn56 z>IMk5tYok4#*g;LY20K$_JMdrwW(Kc?E&1nTt%!{XDA93={loK#|ej_h-sI>?FczS9IDrCfVEA5;?M8K}@ z?sx?U8W>1ZUFRp-5L6g2)`<1!pN8hR_a%pS z>|*~Hkzy&}DzPnF zK1TKNo=0@BXy5E@>cIVt*!&z+56dRzEm#0LMjf#|$4LqP4I=q{*shK#fsqz&0d{7< z<)Z3VAn5KyETyG-tkh;S>uXnpRd{rCG;I$NP?{aZ!Nr?2Z{0@?p+Rh!o6Ho-y9iOB z;VmyY+l(LG$LXZ9nM2g#s8#Tk0jkeDjv3H4T{@}bFWmk8Pr@hwbbPDZi}$#h)qx(1 zvPI!q;FWTyE7k?{@AT@cSO5O|@5#h~6qo}jM<*UwnJBZ}sCcO;faEp~)Fq8$OFkqE zpVQ&7Kx>Jp6R8k@2%Ch;+6{lfY4j~16P=-HA{?D_-zk-kb$`ODBy9VCq1!PBQA z_n!E3z@&s0;Pgak;PTTO6OjVP!KRN4um&9gC@Y%;+6%E5d`TG9q=aJfpg}}$qAbB zr>c%h-H;_~lfHiS#eOB@s}UHiLYx=S1MM%*gp&I>|NMjj`1<9mRwcl6l7DOpcvb(` zUw>cZIiVJH#-TV(kbbzf^VO|vE0sG9DNpPXZcq&rGi}*MO?)g3u#J%J9GXU<+S=p( zkx#?v!fg7jV+j)j%j}Kz;u;f+2NEX%MbkL2+inoMT{Y2GFjJiL@}r+U&A*>79CV^2 z6_420S9tjka3!I%c6f1X=W_^c$yL1lJ;pctQS7~a9)0*iPcPo%o!0zocL{yUf2|?2 z&BB6DH}6ZF{khOkRzA-$>U;mhe`0DKNR#ACOdgs}|H3c-p>G#0i2UzME<1ea`yrEG z^`Fq^KQ1B9{*OZ|W43%z5P6W%aY}ADc3i#s58TgyH;ns2b}i5|>VhaUWL zj!b{3XOqA&wmk8$3)!XwqtuFv8ykYb@xHkFcd}i#X22xpA#7VJ;0z4F>QG8cLcIw9 zewFwaO0PuZARP@jtVNt|!VPd%+HRJs3GTUWf$*vOD8*V@L`0zMD9~b_UZ4{`_bpVy zWR#{rxwi-;`^Pj;|Po4V7qBa2Yc zxBGJ>5aFIzRUIUrW7o0sK>p4=9+Il}5bHNBQNh!G@b?COeHWUWi*Q?cG{YV>KO+`l z;Q%~`R)Hfo+k+-t6uvwY`7%1%NLdgI8pkrE=i6Ui$>5`eE{3JWoB7BV;wmm$xbQV{ z<8_O22(u5sH(HSSt{rc&e5;`*IG+amXq~}9fa`w0Bs z2a5-=x5Q>wwo_GAEP*-)N z88B0(Sq(B_Hq20P=V~y(vcv^VjXPlDkrUiaTY>nz@Fc7~SA-~BVI7A>N*edy1$nq^ z!h~s+?^$Wxf)aC?u&|q)Kyy;?%ye&(EkZEEHS+x_)~q@6)fn9GJi$GHPIOqzg4>6@ zY|{3x)EY2f*9dT!-YJ#e78K~x=QC+D1%dag(Zs3Nu8T)2|A{`AwI109h~(!5|f zU>PqwD|fz`ovOE6gzh25Q(qs6Ci;GU@6A1P<_wam(H*CK_;+yAEL*YScvBRlE0k4l z8b#QPQX5MO@Y-Zx#OCVb0_D&uR7L0@%ImSbxoaBrV%5H9MW^n@uPHXSkiQIuh1MM4 z!mxkv!k?gM-5hu5aR&GI&!W^WHadY#MTp-3|D&C^`Vv*=$UiPzsD;loWzK>n_ApqH zIE^sy@ZrO3eBJVG#)n9NNq4Fam$+MJ+a9EsDhEUdc(R3imW~Bq?7{H1xydj?ki)w% z&4S}O4lw8W@zsXiu5E96he=b;jP!TLuH0T0mLzMYt*gr@J*gcj8wZjLuR7J?PzC&4 z4<6B3sVU*GGK!4X_y|frun)xgP4T{`2^T^hy8xRN&zuIDkDU$5DG|5z%g9IrTpIVP ztw5825w-{M<*j%~EG%CiJ&O<(tQhqPhegcHkke@i0zW-n09x}95V9zHQ8`MN zCRCyZVBDUt>?&h!@UYw1mV>osC^mUe8UhIfoknwqcjzh zmagxe#I{mFOvXMO?PP#8lC;COTcwdG_v8vz+8pvpBOHii9fWs|gDdh59pY9f6c^ghsQn^-{i8m5Jr!~@L|B0 z&*<+wlcI8EjTp?Y2+ElVlN!B;;1&j_wBNm zrtdf%svf^)))g>8u+r%E|G0d491a#~dEQ(^R>f)ftvc5h zNTdd+2T);`HhQDpC+F$*!eIu&H4}=$vHm0UgnUcSX)Y{L4>Hs`i5F8Q5D8MG6dM zT-#ouP>N$l`D=i;%(_c?kqh_Up6CVy4dOv^T-l~%Q72&ou7**|EM0eyk zjSiK$^zg^FjJ+`Zgn&Cj)o7$o`)hDY_eRMHP4?oAqN2|ry-=>E;ErquQq>XugWRIV z`=d-rQ_c46G7lN0+M6ILY+y3XNX`QC@CAc9NC>a z52GWBSkGlx+tMBN$wu1|sO{lIz`13_Mkxe{%gdXm7mnWHmYah@Y5+g3MCD}eNYUh% zszlfHe?wk)DbgG5Z#mXtbCt?WTo_0AXULaPrw9<_-87ztLy5g$xSs38uaZYU*8%s( z0BVfO5V&tE_&R8@Qd5DgO^Z%F0eds}s0h+%?TVm{i2y~6*l-E677HV17bW+%r$x!Z zPwg)_+srJx8gXdCa60Duk0eE%`l1dWms>7NXTjXL_BdK%mNWe?;D1~G{rA>m>Bwxo zraZo9z2EKeHE)|N1GHdc)EqT^+nPRS55F=fd?QMsRNj9X8*2is;|ZfDb@wb`Dg}Qo zTj~G)kIkOY9f)BA6{82o1<+SI*pR&`>1x-0@>U}23`761yET*B_zxF>cgT>2Kw#`* zWUDQblGWZa9GDv%0rJE_^+x%=Unl_o=!#gqS`bX!HyNb^9S9A2kvy*B>Ym%)i?TgA zTinUQ1Zsi&tJXv;q^;R^k{B@br#&DDEH}F!#18H?(Te|Un6uz($2uXnh^kXiuZxM|p?UL^T?{CP52iaJgW zUB+d2>uIUJXwf3wLx&D!*8PnvHtEfIgs1#Oo*4XOg*D8I%_R7N_0)#H?Iv_)bgQ`B zT_-4*xV?B%7tA7~KYb<=He#qDq_l_f;f|P6i7}2wxHVVC+b^SKOPv&IRQ!>SGqdL{ zO2BRGW3%$KqZL!3ol+B<_Rhc4+b*N{sM!+iMPaDxUZK0f6QxpUFOHxr907_G{Zg>d zn7F#m80j867E8AT^6?g(D0%<>j0)#E;JFxd3nU|UkYLX)hH|XVK+P4g!G;Xa5S@Vl z_gXwTS!SI-(ro~TW3JwVRCFQfKRWVrSy`E_ngrd1)2!>gvyAh-R6in|Lvv}hzB7_A zfH>jYG-C>502ND+oLFe)a`CZhM@QKxx0MOBC~IMbVw6k(FKU}CsUL!^xQd$ zwc1Sj{(*tK>c4U=XPn49f$~KC3RCHzE+{sJg&qDc5qJcy$MjhS0MH=l<`ZZqX&iAc z9Yl(F@bsiVF2Y82cz|dxa$VU;fQG$gE3N7W*evRdDIBgnD5%S$hTaYs^K&Aj+&h|| zcrd}h>ECZv{ld@h?X{1dZ{4oYa_N&^3llT{4cPi8z2Lt4zgdecJ@UUIhQG1SR;vq! z(1w$%=+90s5c*GKM*fc~%!j{Kn5zFws_6eWB>qio_9vvGV;RQM9^EhWC%pd_4sKiW ziuCnPw*LRZHgUZdHOVO~x8+cS|cd-NbMVj;7Tq5P_OWQ#Dtx+YfDc3AovZN6`6!0$FbL9!LJIfb3i3UtsJm}{$)U8!G(rAY8d=pX-wF$v`jOY=Uzl8ex&;teJ(}=R)g#v?0*r4MausPS%65Le_lQy9>gt70DwIjKiq`2eK*Hj6Dr4 zy!d>3Yt?Lbpb6N7nEGL}5k)P?rA{4tOBKKF2Ukt92^y4dOQ%){dC!?WTep7fFnoHS>96sw?t7B6-5Ogc#YH_H)|)Lf?ZMtuOTxD-KPLj4j`5N2Mfoj z`b8|=ygH5dG$tZdX|uv(vGaWhDot{O1u+YM+_YD-1CBz{UJyk%`0{YkkpZ?DdQzc< z-7`$LOucn$mD$W+fBjlkUapbvbz>AP(`$Te0}5fl%vj?pIN=_GCUQUqB!7;mn5?WR zN=pN@V5Cw~rEo^g&7kV9izY#`yOeR496Y)wS@ldeU~IhPxGRG_hI*>=k|>z0@>#lBa0 zXix@Ck>gaHRSr@O_PoOBaZc_b*C`ov^s8sGB)@6@8}aC0pX8vCz~AjEwB?M zLgKARbHm2nxO(*nODw{ZV?PLA?Xjlq@Gc|9q209Zo@K|W9)UF8E1FboR6QMM=JPx5 ze=eaH6!_}31Ost;sH-&J{oQXcjEn%T*@Mp{y8wIb{p)%t!FD^pIYcT5$|vfx-^-PO zc1k8OfFtUB$0so13?tl+N6Q$2eE5V1OX1QWhzn_H1VD>S#EgEOrfF10%Wy+L>2TW*4I&795nsSu5i3B z8I77jcOn$PhGFFm5Jog}b#OsRR@=B~Qy&pJG99O4s23JDE}Ij9RwvTa3GIMtlJ@*~ z2+ZCq@3~(`SG%G_e?Gd3)Gq3Dyd&$Cgp)oD`Z)GU5G~~X@~pp=ss*nR4^YG|PPJee zqT;T);nfdO0C86Q6<5S>e*CE^Df)%jiU&Irt&aLDJ>nshH`VKk%p~PzPp*Fg|2(?} zIPlKi6Ave+rVMaqR~qn8m)g<@i#e0){m>EWg%a5y)zkp;FRxJ&Y8p6Y4GgGAll&Xt z&!78NIswIFd%&PVAfkBwiO!He|NK+>#QR@IxUEeMalDXOd!Fy!e1tXPO8@3n_};E_ z7x=FuRQ`LSpZ7fU^K>IQfAGKmb~9G!q5c&>zB{v|q(>Oc!K1WHg*>jR11Y5EL! zwAHTq9(6|ptn|mDMW$uIZu|E1?3KL3?|Y=pCsY<_<3nesh|ic=^CVai!W(!jNcic0 zkL>70C-w>1V7NTV6a%%^5#fOm3+ZetOnDn%gn;(KuBlF&)eh)^@@*p^At_PdJ&tA0 z0wCxjAppel4fy6L@IjZ>?BPjMe8;3Cgl84`etw7^6ZHE~VToAShdLUR_*si$3Ok^| z4#C+-N+Dt(hcZbZQEZ6l4%Ar!bDX$AlBhaCL6myg@S6g*#U>Y_j{rKG3KoRg%xEwM zpj{PAln(~s9c%!2gIuITISUCV9)!*o_=3@Lby!VJE&rbzzgKc;30n1XCz_$pMF)*1 zg7UIy3PSTTQ4pZ$b913~yqON}dcGVbsxjWJ9*wdo=-z6H7eLmeo^sHNBj`m*fZdPU z@0b7hqsY>wVaqrHJ&``1;Pj}QgnGzD69M6fDzGRf7tpztmSkflOM_Ve90BB2#?+3( z-drD9uu#+-)szc&+A=2|IUXBXHRltuSS+0CY77!ZC?(_I7@=)u7^d;&@f_#oTFJ?F zYB{6{CunTEPZA=mvEFr@`otLc5(uM@gLxx!FllgjMKUo^`;c=WAeJ5$+xeXl1AvN} zL4Z`68*?$>hD;Zv&SHW%HOLJz~hDlQb<-~iu@9fNRY28DZkj4>U)8oKx0O;y! z_s9WJ!4TR1igIJiQ~`j8x-Pn|?nzd3bSO2CZ%hQ@CVtMM6>YE(M~0U5{JmnAv0{+~ z#xR)$7}y@{7(5@>MVU=)rhoyO)Vhb#fxIEiud77hMyBV%3h4gv!$$JdV~)vwWL_3C zKUJpe4#2^eo|?fOY=6D<4{+Y>YrDFb-udc@0T$mjw*QN3o}7GBCq8X;oLkI?upr|K z8suKVdRd#jj&rRC`B$`cGu%two@3rDpTXRTebUnUwW;d^dc0oJNAWeW2G^YhY`G7{ z`!G62h(M{8w}MfY_)8d!pHa0`S{-n3BV1y1(O z2t=JIFOOoR{-5!>hvhFo?sPW-7&+`TPd*5>~CLlR*!{*8;ZYt#1@IUW4(2%pd;%*Qd4&e^`n0N{8?N#_B<(_#6zN& zwo)=3Sdq+S6ml=4PlBW{NNB_{DNOWddGq=ni0Fom#ead6VdnKqwk!I#}Y(t>W-#CKr z_J7gjF?#NQ&5HlqGyIo3Vg5b6!Q)*D(2}5Di`#5&zL1{(amenaCPS|a*YkB$AS6nv z69RWy%sa>{Hxm!q>rpLvXLZ^jr*8`avwuA;K#1Bo?tC$ zZTEUJnpT?YBavIodBn8{b%|C4jFiP^0q76m?c_96CqI#ONkiY>4pYvROuu%4+&f3JYN$^ihIvTlh<9_6-E2hloAR zEy8dVU0$cqlI8|UCo8skfdx`<`>7!<47>Z4^wOYjT`QSWH0Fv16af%YiJWHsW8QxBN$$ zp;ZuxE+HDQafMzP`Szh&IaExN)N(j*1M~zB+MWZ_C_TplO;L2sEAH%9aiF zQ@ymxT5T7)x*VD$D_vVf{2bV0b>rWj7e#wJut%%qk#=ZW4-ni@CpoxcN_}+qMl=rI zEa>SF-aBtxG0XkkFJK9^FpBF@4m|K2JX`}%Few)+4aYv6hXy;b^{(i049^10L+*`G zCK3gO*8fQMoy|;6(WmM*%VS zYQ`Mj(>P24{{HAdtbI@jt&8zt*keBf$bMjWnZsms_P~T;Vqj6`Ak)G>uh0N1Rx~~C zL@t+Ws_Ra)^k7$E0WNO(LDnV{ujtVo}udv=<-f#vo7t zBwZM@xh?4ytWCke!5Ceo2e#HQS~2juxq|m0l0k8dKpBTpEZcABcxyHQJ}nK^SZsM9 zEn<6H3F&jgehCz*pp?eaL#=8!>od}{1S?NjQCawnxRMWEYe~|MXWexXH^ufIL zA-9g2Jm==0@S{YHEtyKGJ29voB`>DFBzg{mN<(x6R6?%?6IqL5Ckw|{+f>o2c}%JU z6@@2kK4`6O?;Qq_vjx2gF#8$eM~88PkDoZVG+vH(V`kn8y`6Z0Itn8pXN7t@wPHF^ zR=#K*4Gls(jR&7o*Jy}8D}Oxi0wc5_*nkZ+l9VoXuqT{J-;iH zPJ_a*%i1{5w;nFxKpIfwP+Ng^x*uw8>m6^})g_7RcQ2u)wZ zpwrOgt0I~7^{IQ$m`3~1pzx`H;u`bfAPWSLC0rz!+}>xt1bo5m#URoo;rQo{%DM3k zqu(O|eI{f_#5+CH`flJCD4OiVP-vIn0#bSYZ@>%FjHBmVlynAiN;vB~(Sv4$7EvOP zxh+aCtg@n!5)+^Wf5g@`V(LQzO4wtzx0lA%k9s%>BV8!2&MZNjWVU02IPDAYsdpVL zd3IwSm@B9M;Fo#C(Bc+f3k%a(VL0xt#-Y&KO}>b5v|}Iy$o7v>4#%jrBTbDMl!bsz z?HlX)m?h1Bzx2J?fziOhhjF)*StEBu6pu-@!*@uGKCVZdw}7_>P3Cy={-5vOQfuGyCGYRx^m?RIGtOi=TyjSlz7ky8`NrU9CS7Yb9& zw1KBL+Wvlc;4e*#J#}WPwNU2t#DSm#LzU=O!xXi;(Gk2G<~d@-(-VBYJfJ}e z4D{eR8@w;*oJ^l*xj@hchCDO_nBFP=x(ovu8E?j<)Ep=L@Eb0wd}!#o4-(<|4*t-f zAEE`uj>7#$-5*Gxv7mC|{S+;i3f4697XA({*U9b|iR41{TQ@rOA#S*kF*Z4|04-;5 zeVUMO=iQa_=*h{}QY}YwYzPf*LC*RJ8>JDtIBTUl02!9U0a#K}qM7!}9M$=H$Wk;H z5xnS0AD=wd&TvucTmo7sShDLGPCIqI>_#K4))W}}_3Hf-g0XN_FXHF-d{eWr7uo+& zYYk~=2ygq^B5&1%1Z1HtmkKkGx%FtDxtTU0`^o>AZ#6H-)F5bNW26M}1iihvmDU|T zk~zF&qx4(p!DBUtX&|6F7^o8N*KL}`mK!8{n)R-fw4hIDX0Kbw2~1wj(I08 zjT}Bj2Qvzhoj157>-0VD1Pml~G(&Tj?eo2bx38PTbp^!zg(URb-6L!}Le;peJUb*f zr{ybFkYl+Q^x6`cxToyZ=qRF>M~C~bs1xg8+pv}?gh@aa<7ttEnPBmgj#V-kuI!yY z$Y|uyf{V5LLanebcz0a+FgO@6rZ5&lhAOrrB0W7FmTMc%7;%0`O299DuVX?9^wB_N zsBm3jO9kJe&!QfxVPw$fZ%9#aDwu&(p*FL8(COsi#1AH-!JU%AV)39)n!vGWt$f5z z9fAoZ%R*`lSCu$LvZ>QpWN3oqm#6DbE)|FpbkuTfiT@*&f&Ap470_L+LGz_)Z~|C| zSpUJzbVW#E*$?GoWWM*7IhdQef?uBAeZI)oaLP4C2N!KLz2&gnYQYHZt-F%H1|{2Gj%4f0-*k0Sa|UL_wtX?1a(CI^It!zAcWAIF+!Uvs4p%71<4Lu z0ApgrpaV51gIQAn`nUvAVBpkX^hH@K?W_yW%`gmtG!3Jb-H)z zlx>F&E4yfL5;rYGv=be28r~+zjv$Z{tmn?3FElVPpaP(E8OLcAj;GypYz{%Pv{kDj=0vL1a7$c!0NlQ{3sFrXP{;e?5nPkvVUi}%kn zg!i5N6&W5=H6rVJOGB47p}vgNM)5;ks0iN??`5eso4_yrWXx2OZ~fWt(Q&i15V{uT zJ-?a$UwTbZK6dqesU_Dx4JbP3^f=HgAIBQqQD9gH0+)ARAHrY=nw5>dGt>Cg7!moC+V5Y$I!98#Y|l~W?7B@p3FvC63kXx{E3m$G&7L{V;97323hebT9ufRucG=SO{%nB>jVNEwTZ&OH5hF(p#03&8j z)l|*qJGKTEpAu=9%I;H37djHCz8xPMOvglXYHF*j_JHiGnOFuL9BX#cBVdQpg@ZS? zJfI&(=M2qh$p-r?81euhbF&;ZRKO4WAv1&AddOKcTcy9Hv$J#ekrF=Sii0sKmvvFT zLUM?~``Cl+^~p&?e?0 zYvWi#tcK#5X+SK!Qf+)9m6yrba$+$oCII7xA?73__mGv&()ZmwV%DKT zg2wl}P$kW;Op}x`JA-VzW&zMsz+qm0FKooeXHo+N31$}Y#+U%mkr_Ftz5Z~jtr3JdK>!@;M@QY{!p=89l+ASru~uW?S|>qqu_tT@&5Au~a=4RAPXV73t_5AB zzfDN`fNg8XF&xYUJ3l7vtxmn&l-y7`R6i~a4~oHyg4x(;gF2KSzLf9T8Vri$+i@BDxOK;lLzp1g zNUm#yG$)8GbcijA<-&fD#9JdB>w~93=ZVvV5muGpCNzO?Q3xm+C=Q6s=W1&WpusmO z2cpaY94~#`TJG%oqAc5uo3}a^uwfQ{h}k7%6vv^bz-oypTA^JuTea-d);O1bLOCRh z0Wq1vW0iZE6gZkwM1lh1$VS!`1TtO^R@V-i$VeYTQ}@RzZj|kAPXlD5aRSH+G}syu2F3ZlVGr-W zMzkFSCqO>a(6HbsFLK{#PuzpL`Yue+Cx7aErU4ELv6`^H9mao%;1w|4NVVJ`H5(C8 zrhHl$mM)^;fcTR^izcRRl?X0@H(h`ikBPXTxd&y6RE|E_i}q~zu&BbQ^Mmdtl~T^S z*|1v?7%K@GB56ujZX7NhkxaUQFtywpeKIuO*=mwLR5TQF#E^JN96d6x5~!w*MDl#0 zW6B=IjCUXhW4cid*kkYub^HQtL`v4>^O}t0_hN_!{`(YsS1+KPrxSrX@Zio@=s`Z1 z?x-H}cwQ63=78DE6GjB2aKZ0J@1!qtgbS-Tu$t7m}?RTellSVnNpO z$TGxY7{OPdD@(qi?g+cBP1^SPGAap>;SDgmfY)gUl!d-n-mEM2ymhG}>hxa*LG{hV z2;N>aVOBDPqz;-^Oam{ALA|D)GHMwha~v42T4DmBRfAfUC>udDye5ha{Kp3(J`4`J z{g2wt=as<#jN|WIT;w2!*~*Vx9JL#pi}?}TtZ>nL@cXb7rYsIak~EXrjuKiakur?z zuw2Ye>qgF_xn80=lMS0jjZ~7om6*OMAA9T3b#Qez8qB(_(N;BiM*=SSj=(50}Of9+5=z>tG^1 z--sILG31+5)}^(8T|aY|o~hu-5#t!{ov}pvX+fo#&zh@CL$EqQZnx9t-FsA7--Wds zvxkr%i3LXOOkm3~fwWX7lavTDt!i)2oa^<&r_rbpDT<8ro{n!xC|ihuT5_BvWhn&t zxgZ{P1&pA^kanOi2da0YUqBCIz|RDcOtQNikUpG_rTti5kpul=Fjd6`k!3-$dLaR= z+$Fl}z1Ye*!xhs>#??Um=wevSoflT5wFyNjTkhJ_DRV9o+U+RXHJrPfFVNl-$MPlS zy3|gJEP}$k<@qZndwgn=K+0X4P8llrHE&N9l}Nw+*T(7JT{;iny8hhSl3_gW$W_{! LJDT Date: Wed, 6 May 2026 22:35:11 -0400 Subject: [PATCH 57/58] final reshaping --- .github/workflows/ansible-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml index 9ffd66bc2..3c269efdc 100644 --- a/.github/workflows/ansible-workflow.yml +++ b/.github/workflows/ansible-workflow.yml @@ -40,7 +40,7 @@ jobs: - name: Confirm model file was saved working-directory: ${{ env.PROJECT_DIR }} run: | - ls -lh ml_model/house_price_model.pkl + ls -lh ml/house_price_model.pkl echo "Model trained successfully." - name: Upload trained model as artifact From 21cde4ff053d32a56ce66edf6e198c3f4d711785 Mon Sep 17 00:00:00 2001 From: Likhon Leo Gomes Date: Wed, 6 May 2026 22:41:30 -0400 Subject: [PATCH 58/58] final reshaping --- .github/workflows/ansible-workflow.yml | 200 ------------------------- 1 file changed, 200 deletions(-) delete mode 100644 .github/workflows/ansible-workflow.yml diff --git a/.github/workflows/ansible-workflow.yml b/.github/workflows/ansible-workflow.yml deleted file mode 100644 index 3c269efdc..000000000 --- a/.github/workflows/ansible-workflow.yml +++ /dev/null @@ -1,200 +0,0 @@ -name: House Price Prediction CI/CD - -on: - push: - branches: ["**"] # run on every branch - pull_request: - branches: ["**"] # run on PRs targeting any branch - workflow_dispatch: # allow manual trigger from GitHub UI - -# All jobs cd into the project subfolder before running anything. -env: - PROJECT_DIR: class_project/data605/Spring2026/projects/UmdTask405_-DATA605_Spring2026_Ansible_Deployment - -jobs: - # ──────────────────────────────────────────────────────────── - # JOB 1 – Train the model and run the example script - # ──────────────────────────────────────────────────────────── - train: - name: Train Model - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Python 3.12 - uses: actions/setup-python@v5 - with: - python-version: "3.12" - cache: pip - - - name: Install Python dependencies - working-directory: ${{ env.PROJECT_DIR }} - run: pip install -r requirements.txt - - - name: Train model via template.example.py - working-directory: ${{ env.PROJECT_DIR }} - run: python template.example.py - - - name: Confirm model file was saved - working-directory: ${{ env.PROJECT_DIR }} - run: | - ls -lh ml/house_price_model.pkl - echo "Model trained successfully." - - - name: Upload trained model as artifact - uses: actions/upload-artifact@v4 - with: - name: house-price-model - path: ${{ env.PROJECT_DIR }}/ml_model/house_price_model.pkl - retention-days: 7 - - - name: Upload results plots as artifact - uses: actions/upload-artifact@v4 - with: - name: results - path: ${{ env.PROJECT_DIR }}/results/ - retention-days: 7 - - # ──────────────────────────────────────────────────────────── - # JOB 2 – Build Docker image and run Flask API tests - # ──────────────────────────────────────────────────────────── - build-and-test: - name: Build Docker & Test API - runs-on: ubuntu-latest - needs: train - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Download trained model artifact - uses: actions/download-artifact@v4 - with: - name: house-price-model - path: ${{ env.PROJECT_DIR }}/ml_model/ - - - name: Confirm model file is present - working-directory: ${{ env.PROJECT_DIR }} - run: ls -lh ml_model/house_price_model.pkl - - - name: Build Docker image - working-directory: ${{ env.PROJECT_DIR }} - run: docker build -t house-price-project . - - - name: Start container - working-directory: ${{ env.PROJECT_DIR }} - run: | - docker run -d \ - --name house-price \ - -p 5001:5000 \ - -v ${{ github.workspace }}/${{ env.PROJECT_DIR }}:/project \ - -e PORT=5000 \ - house-price-project \ - bash -c "PORT=5000 python /project/app.py" - - - name: Wait for API to be healthy - run: | - echo "Waiting for API to start..." - for i in $(seq 1 20); do - STATUS=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5001/health || echo "000") - echo "Attempt $i: HTTP $STATUS" - if [ "$STATUS" = "200" ]; then - echo "API is up!" - break - fi - sleep 5 - done - curl -sf http://localhost:5001/health - - - name: "Test – GET /health" - run: | - RESPONSE=$(curl -sf http://localhost:5001/health) - echo "Response: $RESPONSE" - echo $RESPONSE | python3 -c " - import sys, json - data = json.load(sys.stdin) - assert data['status'] == 'ok', f'Expected ok, got {data}' - print('✅ /health passed') - " - - - name: "Test – POST /predict (single house)" - run: | - RESPONSE=$(curl -sf -X POST http://localhost:5001/predict \ - -H "Content-Type: application/json" \ - -d '{"OverallQual": 7, "GrLivArea": 1800, "GarageCars": 2}') - echo "Response: $RESPONSE" - echo $RESPONSE | python3 -c " - import sys, json - data = json.load(sys.stdin) - price = data['predicted_price'] - assert price > 0, f'Expected positive price, got {price}' - print(f'✅ /predict passed → \${price:,.0f}') - " - - - name: "Test – POST /predict/batch" - run: | - RESPONSE=$(curl -sf -X POST http://localhost:5001/predict/batch \ - -H "Content-Type: application/json" \ - -d '{"instances": [{"OverallQual": 3, "GrLivArea": 800}, {"OverallQual": 9, "GrLivArea": 3000}]}') - echo "Response: $RESPONSE" - echo $RESPONSE | python3 -c " - import sys, json - data = json.load(sys.stdin) - preds = data['predictions'] - assert data['count'] == 2, f'Expected 2 predictions, got {data[\"count\"]}' - assert preds[1] > preds[0], f'Expected higher quality = higher price: {preds}' - print(f'✅ /predict/batch passed') - " - - - name: "Test – POST /predict with invalid payload returns 400" - run: | - STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://localhost:5001/predict \ - -H "Content-Type: application/json" \ - -d '{"OverallQual": 99, "GrLivArea": -1}') - echo "HTTP status: $STATUS" - [ "$STATUS" = "400" ] && echo "✅ Validation test passed" || (echo "❌ Expected 400, got $STATUS" && exit 1) - - - name: Print container logs (always, for debugging) - if: always() - run: docker logs house-price - - # ──────────────────────────────────────────────────────────── - # JOB 3 – Run Ansible playbook (deploy + test) - # ──────────────────────────────────────────────────────────── - ansible: - name: Ansible Deploy & Test - runs-on: ubuntu-latest - needs: build-and-test - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Download trained model artifact - uses: actions/download-artifact@v4 - with: - name: house-price-model - path: ${{ env.PROJECT_DIR }}/ml_model/ - - - name: Set up Python 3.12 - uses: actions/setup-python@v5 - with: - python-version: "3.12" - cache: pip - - - name: Install Ansible - run: pip install ansible - - - name: Build Docker image for Ansible deploy - working-directory: ${{ env.PROJECT_DIR }} - run: docker build -t house-price-project . - - - name: Run Ansible playbook (deploy + test) - working-directory: ${{ env.PROJECT_DIR }} - run: ansible-playbook playbook.yaml - - - name: Print container logs - if: always() - run: docker logs house-price 2>/dev/null || true \ No newline at end of file